From 5ef33a9f2b9f4fb56553529f7b31f4f5f57ce014 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 19 Aug 2015 14:12:05 -0700
Subject: [PATCH 0001/2608] Don't bitshift by negative amounts.

Don't bitshift by negative amounts when encoding/decoding run sizes in
chunk header maps.  This affected systems with page sizes greater than 8
KiB.

Reported by Ingvar Hagelund <ingvar@redpill-linpro.com>.
---
 ChangeLog                                     |  6 +++
 include/jemalloc/internal/arena.h             | 48 +++++++++++++++----
 include/jemalloc/internal/private_symbols.txt |  2 +
 src/arena.c                                   |  7 ++-
 4 files changed, 50 insertions(+), 13 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 0cf887c2..c98179c6 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,12 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
+* 4.x.x (XXX)
+
+  Bug fixes:
+  - Don't bitshift by negative amounts when encoding/decoding run sizes in chunk
+    header maps.  This affected systems with page sizes greater than 8 KiB.
+
 * 4.0.0 (August 17, 2015)
 
   This version contains many speed and space optimizations, both minor and
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index cb015eed..23472134 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -519,6 +519,7 @@ arena_chunk_map_misc_t	*arena_run_to_miscelm(arena_run_t *run);
 size_t	*arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind);
 size_t	arena_mapbitsp_read(size_t *mapbitsp);
 size_t	arena_mapbits_get(arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_size_decode(size_t mapbits);
 size_t	arena_mapbits_unallocated_size_get(arena_chunk_t *chunk,
     size_t pageind);
 size_t	arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind);
@@ -530,6 +531,7 @@ size_t	arena_mapbits_decommitted_get(arena_chunk_t *chunk, size_t pageind);
 size_t	arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind);
 size_t	arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind);
 void	arena_mapbitsp_write(size_t *mapbitsp, size_t mapbits);
+size_t	arena_mapbits_size_encode(size_t size);
 void	arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind,
     size_t size, size_t flags);
 void	arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind,
@@ -652,6 +654,21 @@ arena_mapbits_get(arena_chunk_t *chunk, size_t pageind)
 	return (arena_mapbitsp_read(arena_mapbitsp_get(chunk, pageind)));
 }
 
+JEMALLOC_ALWAYS_INLINE size_t
+arena_mapbits_size_decode(size_t mapbits)
+{
+	size_t size;
+
+	if (CHUNK_MAP_SIZE_SHIFT > 0)
+		size = (mapbits & CHUNK_MAP_SIZE_MASK) >> CHUNK_MAP_SIZE_SHIFT;
+	else if (CHUNK_MAP_SIZE_SHIFT == 0)
+		size = mapbits & CHUNK_MAP_SIZE_MASK;
+	else
+		size = (mapbits & CHUNK_MAP_SIZE_MASK) << -CHUNK_MAP_SIZE_SHIFT;
+
+	return (size);
+}
+
 JEMALLOC_ALWAYS_INLINE size_t
 arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind)
 {
@@ -659,7 +676,7 @@ arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind)
 
 	mapbits = arena_mapbits_get(chunk, pageind);
 	assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0);
-	return ((mapbits & CHUNK_MAP_SIZE_MASK) >> CHUNK_MAP_SIZE_SHIFT);
+	return (arena_mapbits_size_decode(mapbits));
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
@@ -670,7 +687,7 @@ arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind)
 	mapbits = arena_mapbits_get(chunk, pageind);
 	assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) ==
 	    (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED));
-	return ((mapbits & CHUNK_MAP_SIZE_MASK) >> CHUNK_MAP_SIZE_SHIFT);
+	return (arena_mapbits_size_decode(mapbits));
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
@@ -754,6 +771,22 @@ arena_mapbitsp_write(size_t *mapbitsp, size_t mapbits)
 	*mapbitsp = mapbits;
 }
 
+JEMALLOC_ALWAYS_INLINE size_t
+arena_mapbits_size_encode(size_t size)
+{
+	size_t mapbits;
+
+	if (CHUNK_MAP_SIZE_SHIFT > 0)
+		mapbits = size << CHUNK_MAP_SIZE_SHIFT;
+	else if (CHUNK_MAP_SIZE_SHIFT == 0)
+		mapbits = size;
+	else
+		mapbits = size >> -CHUNK_MAP_SIZE_SHIFT;
+
+	assert((mapbits & ~CHUNK_MAP_SIZE_MASK) == 0);
+	return (mapbits);
+}
+
 JEMALLOC_ALWAYS_INLINE void
 arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size,
     size_t flags)
@@ -761,11 +794,10 @@ arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size,
 	size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
 
 	assert((size & PAGE_MASK) == 0);
-	assert(((size << CHUNK_MAP_SIZE_SHIFT) & ~CHUNK_MAP_SIZE_MASK) == 0);
 	assert((flags & CHUNK_MAP_FLAGS_MASK) == flags);
 	assert((flags & CHUNK_MAP_DECOMMITTED) == 0 || (flags &
 	    (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0);
-	arena_mapbitsp_write(mapbitsp, (size << CHUNK_MAP_SIZE_SHIFT) |
+	arena_mapbitsp_write(mapbitsp, arena_mapbits_size_encode(size) |
 	    CHUNK_MAP_BININD_INVALID | flags);
 }
 
@@ -777,10 +809,9 @@ arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind,
 	size_t mapbits = arena_mapbitsp_read(mapbitsp);
 
 	assert((size & PAGE_MASK) == 0);
-	assert(((size << CHUNK_MAP_SIZE_SHIFT) & ~CHUNK_MAP_SIZE_MASK) == 0);
 	assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0);
-	arena_mapbitsp_write(mapbitsp, (size << CHUNK_MAP_SIZE_SHIFT) | (mapbits
-	    & ~CHUNK_MAP_SIZE_MASK));
+	arena_mapbitsp_write(mapbitsp, arena_mapbits_size_encode(size) |
+	    (mapbits & ~CHUNK_MAP_SIZE_MASK));
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -799,11 +830,10 @@ arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size,
 	size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
 
 	assert((size & PAGE_MASK) == 0);
-	assert(((size << CHUNK_MAP_SIZE_SHIFT) & ~CHUNK_MAP_SIZE_MASK) == 0);
 	assert((flags & CHUNK_MAP_FLAGS_MASK) == flags);
 	assert((flags & CHUNK_MAP_DECOMMITTED) == 0 || (flags &
 	    (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0);
-	arena_mapbitsp_write(mapbitsp, (size << CHUNK_MAP_SIZE_SHIFT) |
+	arena_mapbitsp_write(mapbitsp, arena_mapbits_size_encode(size) |
 	    CHUNK_MAP_BININD_INVALID | flags | CHUNK_MAP_LARGE |
 	    CHUNK_MAP_ALLOCATED);
 }
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index dbf6aa7c..ed1f6c29 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -50,6 +50,8 @@ arena_mapbits_large_size_get
 arena_mapbitsp_get
 arena_mapbitsp_read
 arena_mapbitsp_write
+arena_mapbits_size_decode
+arena_mapbits_size_encode
 arena_mapbits_small_runind_get
 arena_mapbits_small_set
 arena_mapbits_unallocated_set
diff --git a/src/arena.c b/src/arena.c
index af48b39d..bd76e96b 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -39,7 +39,7 @@ JEMALLOC_INLINE_C arena_chunk_map_misc_t *
 arena_miscelm_key_create(size_t size)
 {
 
-	return ((arena_chunk_map_misc_t *)((size << CHUNK_MAP_SIZE_SHIFT) |
+	return ((arena_chunk_map_misc_t *)(arena_mapbits_size_encode(size) |
 	    CHUNK_MAP_KEY));
 }
 
@@ -58,8 +58,7 @@ arena_miscelm_key_size_get(const arena_chunk_map_misc_t *miscelm)
 
 	assert(arena_miscelm_is_key(miscelm));
 
-	return (((uintptr_t)miscelm & CHUNK_MAP_SIZE_MASK) >>
-	    CHUNK_MAP_SIZE_SHIFT);
+	return (arena_mapbits_size_decode((uintptr_t)miscelm));
 }
 
 JEMALLOC_INLINE_C size_t
@@ -73,7 +72,7 @@ arena_miscelm_size_get(arena_chunk_map_misc_t *miscelm)
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm);
 	pageind = arena_miscelm_to_pageind(miscelm);
 	mapbits = arena_mapbits_get(chunk, pageind);
-	return ((mapbits & CHUNK_MAP_SIZE_MASK) >> CHUNK_MAP_SIZE_SHIFT);
+	return (arena_mapbits_size_decode(mapbits));
 }
 
 JEMALLOC_INLINE_C int

From d01fd19755bc0c2f5be3143349016dd0d7de7b36 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 19 Aug 2015 15:21:32 -0700
Subject: [PATCH 0002/2608] Rename index_t to szind_t to avoid an existing type
 on Solaris.

This resolves #256.
---
 ChangeLog                                     |  1 +
 include/jemalloc/internal/arena.h             | 38 +++++++--------
 .../jemalloc/internal/jemalloc_internal.h.in  | 28 +++++------
 include/jemalloc/internal/tcache.h            | 18 ++++----
 src/arena.c                                   | 46 +++++++++----------
 src/tcache.c                                  |  8 ++--
 test/unit/size_classes.c                      |  2 +-
 7 files changed, 71 insertions(+), 70 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index c98179c6..284d6d29 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -9,6 +9,7 @@ brevity.  Much more detail can be found in the git revision history:
   Bug fixes:
   - Don't bitshift by negative amounts when encoding/decoding run sizes in chunk
     header maps.  This affected systems with page sizes greater than 8 KiB.
+  - Rename index_t to szind_t to avoid an existing type on Solaris.
 
 * 4.0.0 (August 17, 2015)
 
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 23472134..62a9a85c 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -39,7 +39,7 @@ typedef struct arena_s arena_t;
 #ifdef JEMALLOC_ARENA_STRUCTS_A
 struct arena_run_s {
 	/* Index of bin this run is associated with. */
-	index_t		binind;
+	szind_t		binind;
 
 	/* Number of free regions in run. */
 	unsigned	nfree;
@@ -448,7 +448,7 @@ bool	arena_lg_dirty_mult_set(arena_t *arena, ssize_t lg_dirty_mult);
 void	arena_maybe_purge(arena_t *arena);
 void	arena_purge_all(arena_t *arena);
 void	arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin,
-    index_t binind, uint64_t prof_accumbytes);
+    szind_t binind, uint64_t prof_accumbytes);
 void	arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info,
     bool zero);
 #ifdef JEMALLOC_JET
@@ -524,7 +524,7 @@ size_t	arena_mapbits_unallocated_size_get(arena_chunk_t *chunk,
     size_t pageind);
 size_t	arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind);
 size_t	arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind);
-index_t	arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind);
+szind_t	arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind);
 size_t	arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind);
 size_t	arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind);
 size_t	arena_mapbits_decommitted_get(arena_chunk_t *chunk, size_t pageind);
@@ -541,17 +541,17 @@ void	arena_mapbits_internal_set(arena_chunk_t *chunk, size_t pageind,
 void	arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind,
     size_t size, size_t flags);
 void	arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind,
-    index_t binind);
+    szind_t binind);
 void	arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind,
-    size_t runind, index_t binind, size_t flags);
+    size_t runind, szind_t binind, size_t flags);
 void	arena_metadata_allocated_add(arena_t *arena, size_t size);
 void	arena_metadata_allocated_sub(arena_t *arena, size_t size);
 size_t	arena_metadata_allocated_get(arena_t *arena);
 bool	arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes);
 bool	arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes);
 bool	arena_prof_accum(arena_t *arena, uint64_t accumbytes);
-index_t	arena_ptr_small_binind_get(const void *ptr, size_t mapbits);
-index_t	arena_bin_index(arena_t *arena, arena_bin_t *bin);
+szind_t	arena_ptr_small_binind_get(const void *ptr, size_t mapbits);
+szind_t	arena_bin_index(arena_t *arena, arena_bin_t *bin);
 unsigned	arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info,
     const void *ptr);
 prof_tctx_t	*arena_prof_tctx_get(const void *ptr);
@@ -701,11 +701,11 @@ arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind)
 	return (mapbits >> CHUNK_MAP_RUNIND_SHIFT);
 }
 
-JEMALLOC_ALWAYS_INLINE index_t
+JEMALLOC_ALWAYS_INLINE szind_t
 arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
-	index_t binind;
+	szind_t binind;
 
 	mapbits = arena_mapbits_get(chunk, pageind);
 	binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT;
@@ -840,7 +840,7 @@ arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size,
 
 JEMALLOC_ALWAYS_INLINE void
 arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind,
-    index_t binind)
+    szind_t binind)
 {
 	size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
 	size_t mapbits = arena_mapbitsp_read(mapbitsp);
@@ -854,7 +854,7 @@ arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind,
 
 JEMALLOC_ALWAYS_INLINE void
 arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind,
-    index_t binind, size_t flags)
+    szind_t binind, size_t flags)
 {
 	size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
 
@@ -931,10 +931,10 @@ arena_prof_accum(arena_t *arena, uint64_t accumbytes)
 	}
 }
 
-JEMALLOC_ALWAYS_INLINE index_t
+JEMALLOC_ALWAYS_INLINE szind_t
 arena_ptr_small_binind_get(const void *ptr, size_t mapbits)
 {
-	index_t binind;
+	szind_t binind;
 
 	binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT;
 
@@ -946,7 +946,7 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits)
 		size_t rpages_ind;
 		arena_run_t *run;
 		arena_bin_t *bin;
-		index_t run_binind, actual_binind;
+		szind_t run_binind, actual_binind;
 		arena_bin_info_t *bin_info;
 		arena_chunk_map_misc_t *miscelm;
 		void *rpages;
@@ -980,10 +980,10 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits)
 #  endif /* JEMALLOC_ARENA_INLINE_A */
 
 #  ifdef JEMALLOC_ARENA_INLINE_B
-JEMALLOC_INLINE index_t
+JEMALLOC_INLINE szind_t
 arena_bin_index(arena_t *arena, arena_bin_t *bin)
 {
-	index_t binind = bin - arena->bins;
+	szind_t binind = bin - arena->bins;
 	assert(binind < NBINS);
 	return (binind);
 }
@@ -1161,7 +1161,7 @@ arena_salloc(const void *ptr, bool demote)
 	size_t ret;
 	arena_chunk_t *chunk;
 	size_t pageind;
-	index_t binind;
+	szind_t binind;
 
 	assert(ptr != NULL);
 
@@ -1220,7 +1220,7 @@ arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache)
 		if (likely((mapbits & CHUNK_MAP_LARGE) == 0)) {
 			/* Small allocation. */
 			if (likely(tcache != NULL)) {
-				index_t binind = arena_ptr_small_binind_get(ptr,
+				szind_t binind = arena_ptr_small_binind_get(ptr,
 				    mapbits);
 				tcache_dalloc_small(tsd, tcache, ptr, binind);
 			} else {
@@ -1272,7 +1272,7 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
 		if (likely(size <= SMALL_MAXCLASS)) {
 			/* Small allocation. */
 			if (likely(tcache != NULL)) {
-				index_t binind = size2index(size);
+				szind_t binind = size2index(size);
 				tcache_dalloc_small(tsd, tcache, ptr, binind);
 			} else {
 				size_t pageind = ((uintptr_t)ptr -
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 7a137b62..f6e464e9 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -184,7 +184,7 @@ static const bool config_cache_oblivious =
 #include "jemalloc/internal/jemalloc_internal_macros.h"
 
 /* Size class index type. */
-typedef unsigned index_t;
+typedef unsigned szind_t;
 
 /*
  * Flags bits:
@@ -511,12 +511,12 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/huge.h"
 
 #ifndef JEMALLOC_ENABLE_INLINE
-index_t	size2index_compute(size_t size);
-index_t	size2index_lookup(size_t size);
-index_t	size2index(size_t size);
-size_t	index2size_compute(index_t index);
-size_t	index2size_lookup(index_t index);
-size_t	index2size(index_t index);
+szind_t	size2index_compute(size_t size);
+szind_t	size2index_lookup(size_t size);
+szind_t	size2index(size_t size);
+size_t	index2size_compute(szind_t index);
+size_t	index2size_lookup(szind_t index);
+size_t	index2size(szind_t index);
 size_t	s2u_compute(size_t size);
 size_t	s2u_lookup(size_t size);
 size_t	s2u(size_t size);
@@ -527,7 +527,7 @@ arena_t	*arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing,
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
-JEMALLOC_INLINE index_t
+JEMALLOC_INLINE szind_t
 size2index_compute(size_t size)
 {
 
@@ -558,7 +558,7 @@ size2index_compute(size_t size)
 	}
 }
 
-JEMALLOC_ALWAYS_INLINE index_t
+JEMALLOC_ALWAYS_INLINE szind_t
 size2index_lookup(size_t size)
 {
 
@@ -571,7 +571,7 @@ size2index_lookup(size_t size)
 	}
 }
 
-JEMALLOC_ALWAYS_INLINE index_t
+JEMALLOC_ALWAYS_INLINE szind_t
 size2index(size_t size)
 {
 
@@ -582,7 +582,7 @@ size2index(size_t size)
 }
 
 JEMALLOC_INLINE size_t
-index2size_compute(index_t index)
+index2size_compute(szind_t index)
 {
 
 #if (NTBINS > 0)
@@ -609,7 +609,7 @@ index2size_compute(index_t index)
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-index2size_lookup(index_t index)
+index2size_lookup(szind_t index)
 {
 	size_t ret = (size_t)index2size_tab[index];
 	assert(ret == index2size_compute(index));
@@ -617,7 +617,7 @@ index2size_lookup(index_t index)
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-index2size(index_t index)
+index2size(szind_t index)
 {
 
 	assert(index < NSIZES);
@@ -976,7 +976,7 @@ u2rz(size_t usize)
 	size_t ret;
 
 	if (usize <= SMALL_MAXCLASS) {
-		index_t binind = size2index(usize);
+		szind_t binind = size2index(usize);
 		ret = arena_bin_info[binind].redzone_size;
 	} else
 		ret = 0;
diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h
index 493f4575..5079cd26 100644
--- a/include/jemalloc/internal/tcache.h
+++ b/include/jemalloc/internal/tcache.h
@@ -77,7 +77,7 @@ struct tcache_s {
 	ql_elm(tcache_t) link;		/* Used for aggregating stats. */
 	uint64_t	prof_accumbytes;/* Cleared after arena_prof_accum(). */
 	unsigned	ev_cnt;		/* Event count since incremental GC. */
-	index_t		next_gc_bin;	/* Next bin to GC. */
+	szind_t		next_gc_bin;	/* Next bin to GC. */
 	tcache_bin_t	tbins[1];	/* Dynamically sized. */
 	/*
 	 * The pointer stacks associated with tbins follow as a contiguous
@@ -126,10 +126,10 @@ extern tcaches_t	*tcaches;
 size_t	tcache_salloc(const void *ptr);
 void	tcache_event_hard(tsd_t *tsd, tcache_t *tcache);
 void	*tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
-    tcache_bin_t *tbin, index_t binind);
+    tcache_bin_t *tbin, szind_t binind);
 void	tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
-    index_t binind, unsigned rem);
-void	tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, index_t binind,
+    szind_t binind, unsigned rem);
+void	tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
     unsigned rem, tcache_t *tcache);
 void	tcache_arena_associate(tcache_t *tcache, arena_t *arena);
 void	tcache_arena_reassociate(tcache_t *tcache, arena_t *oldarena,
@@ -161,7 +161,7 @@ void	*tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
 void	*tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
     size_t size, bool zero);
 void	tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr,
-    index_t binind);
+    szind_t binind);
 void	tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr,
     size_t size);
 tcache_t	*tcaches_get(tsd_t *tsd, unsigned ind);
@@ -267,7 +267,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
     bool zero)
 {
 	void *ret;
-	index_t binind;
+	szind_t binind;
 	size_t usize;
 	tcache_bin_t *tbin;
 
@@ -312,7 +312,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
     bool zero)
 {
 	void *ret;
-	index_t binind;
+	szind_t binind;
 	size_t usize;
 	tcache_bin_t *tbin;
 
@@ -360,7 +360,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, index_t binind)
+tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind)
 {
 	tcache_bin_t *tbin;
 	tcache_bin_info_t *tbin_info;
@@ -386,7 +386,7 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, index_t binind)
 JEMALLOC_ALWAYS_INLINE void
 tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size)
 {
-	index_t binind;
+	szind_t binind;
 	tcache_bin_t *tbin;
 	tcache_bin_info_t *tbin_info;
 
diff --git a/src/arena.c b/src/arena.c
index bd76e96b..b1bb9db0 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -314,7 +314,7 @@ arena_run_reg_dalloc(arena_run_t *run, void *ptr)
 	arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
 	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 	size_t mapbits = arena_mapbits_get(chunk, pageind);
-	index_t binind = arena_ptr_small_binind_get(ptr, mapbits);
+	szind_t binind = arena_ptr_small_binind_get(ptr, mapbits);
 	arena_bin_info_t *bin_info = &arena_bin_info[binind];
 	unsigned regind = arena_run_regind(run, bin_info, ptr);
 
@@ -507,7 +507,7 @@ arena_run_init_large(arena_t *arena, arena_run_t *run, size_t size, bool zero)
 
 static bool
 arena_run_split_small(arena_t *arena, arena_run_t *run, size_t size,
-    index_t binind)
+    szind_t binind)
 {
 	arena_chunk_t *chunk;
 	arena_chunk_map_misc_t *miscelm;
@@ -779,7 +779,7 @@ arena_chunk_dalloc(arena_t *arena, arena_chunk_t *chunk)
 static void
 arena_huge_malloc_stats_update(arena_t *arena, size_t usize)
 {
-	index_t index = size2index(usize) - nlclasses - NBINS;
+	szind_t index = size2index(usize) - nlclasses - NBINS;
 
 	cassert(config_stats);
 
@@ -792,7 +792,7 @@ arena_huge_malloc_stats_update(arena_t *arena, size_t usize)
 static void
 arena_huge_malloc_stats_update_undo(arena_t *arena, size_t usize)
 {
-	index_t index = size2index(usize) - nlclasses - NBINS;
+	szind_t index = size2index(usize) - nlclasses - NBINS;
 
 	cassert(config_stats);
 
@@ -805,7 +805,7 @@ arena_huge_malloc_stats_update_undo(arena_t *arena, size_t usize)
 static void
 arena_huge_dalloc_stats_update(arena_t *arena, size_t usize)
 {
-	index_t index = size2index(usize) - nlclasses - NBINS;
+	szind_t index = size2index(usize) - nlclasses - NBINS;
 
 	cassert(config_stats);
 
@@ -818,7 +818,7 @@ arena_huge_dalloc_stats_update(arena_t *arena, size_t usize)
 static void
 arena_huge_dalloc_stats_update_undo(arena_t *arena, size_t usize)
 {
-	index_t index = size2index(usize) - nlclasses - NBINS;
+	szind_t index = size2index(usize) - nlclasses - NBINS;
 
 	cassert(config_stats);
 
@@ -1124,7 +1124,7 @@ arena_run_alloc_large(arena_t *arena, size_t size, bool zero)
 }
 
 static arena_run_t *
-arena_run_alloc_small_helper(arena_t *arena, size_t size, index_t binind)
+arena_run_alloc_small_helper(arena_t *arena, size_t size, szind_t binind)
 {
 	arena_run_t *run = arena_run_first_best_fit(arena, size);
 	if (run != NULL) {
@@ -1135,7 +1135,7 @@ arena_run_alloc_small_helper(arena_t *arena, size_t size, index_t binind)
 }
 
 static arena_run_t *
-arena_run_alloc_small(arena_t *arena, size_t size, index_t binind)
+arena_run_alloc_small(arena_t *arena, size_t size, szind_t binind)
 {
 	arena_chunk_t *chunk;
 	arena_run_t *run;
@@ -1888,7 +1888,7 @@ static arena_run_t *
 arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
 {
 	arena_run_t *run;
-	index_t binind;
+	szind_t binind;
 	arena_bin_info_t *bin_info;
 
 	/* Look for a usable run. */
@@ -1939,7 +1939,7 @@ static void *
 arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
 {
 	void *ret;
-	index_t binind;
+	szind_t binind;
 	arena_bin_info_t *bin_info;
 	arena_run_t *run;
 
@@ -1985,7 +1985,7 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
 }
 
 void
-arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, index_t binind,
+arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, szind_t binind,
     uint64_t prof_accumbytes)
 {
 	unsigned i, nfill;
@@ -2130,7 +2130,7 @@ arena_dalloc_junk_small_t *arena_dalloc_junk_small =
 void
 arena_quarantine_junk_small(void *ptr, size_t usize)
 {
-	index_t binind;
+	szind_t binind;
 	arena_bin_info_t *bin_info;
 	cassert(config_fill);
 	assert(opt_junk_free);
@@ -2148,7 +2148,7 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero)
 	void *ret;
 	arena_bin_t *bin;
 	arena_run_t *run;
-	index_t binind;
+	szind_t binind;
 
 	binind = size2index(size);
 	assert(binind < NBINS);
@@ -2232,7 +2232,7 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero)
 	ret = (void *)((uintptr_t)arena_miscelm_to_rpages(miscelm) +
 	    random_offset);
 	if (config_stats) {
-		index_t index = size2index(usize) - NBINS;
+		szind_t index = size2index(usize) - NBINS;
 
 		arena->stats.nmalloc_large++;
 		arena->stats.nrequests_large++;
@@ -2325,7 +2325,7 @@ arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 	ret = arena_miscelm_to_rpages(miscelm);
 
 	if (config_stats) {
-		index_t index = size2index(usize) - NBINS;
+		szind_t index = size2index(usize) - NBINS;
 
 		arena->stats.nmalloc_large++;
 		arena->stats.nrequests_large++;
@@ -2384,7 +2384,7 @@ arena_prof_promoted(const void *ptr, size_t size)
 {
 	arena_chunk_t *chunk;
 	size_t pageind;
-	index_t binind;
+	szind_t binind;
 
 	cassert(config_prof);
 	assert(ptr != NULL);
@@ -2412,7 +2412,7 @@ arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run,
 	if (run == bin->runcur)
 		bin->runcur = NULL;
 	else {
-		index_t binind = arena_bin_index(extent_node_arena_get(
+		szind_t binind = arena_bin_index(extent_node_arena_get(
 		    &chunk->node), bin);
 		arena_bin_info_t *bin_info = &arena_bin_info[binind];
 
@@ -2476,7 +2476,7 @@ arena_dalloc_bin_locked_impl(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 	arena_run_t *run;
 	arena_bin_t *bin;
 	arena_bin_info_t *bin_info;
-	index_t binind;
+	szind_t binind;
 
 	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 	rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, pageind);
@@ -2573,7 +2573,7 @@ arena_dalloc_large_locked_impl(arena_t *arena, arena_chunk_t *chunk,
 		if (!junked)
 			arena_dalloc_junk_large(ptr, usize);
 		if (config_stats) {
-			index_t index = size2index(usize) - NBINS;
+			szind_t index = size2index(usize) - NBINS;
 
 			arena->stats.ndalloc_large++;
 			arena->stats.allocated_large -= usize;
@@ -2620,8 +2620,8 @@ arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 	arena_run_trim_tail(arena, chunk, run, oldsize + large_pad, size +
 	    large_pad, true);
 	if (config_stats) {
-		index_t oldindex = size2index(oldsize) - NBINS;
-		index_t index = size2index(size) - NBINS;
+		szind_t oldindex = size2index(oldsize) - NBINS;
+		szind_t index = size2index(size) - NBINS;
 
 		arena->stats.ndalloc_large++;
 		arena->stats.allocated_large -= oldsize;
@@ -2699,8 +2699,8 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 		    pageind+npages-1)));
 
 		if (config_stats) {
-			index_t oldindex = size2index(oldsize) - NBINS;
-			index_t index = size2index(size) - NBINS;
+			szind_t oldindex = size2index(oldsize) - NBINS;
+			szind_t index = size2index(size) - NBINS;
 
 			arena->stats.ndalloc_large++;
 			arena->stats.allocated_large -= oldsize;
diff --git a/src/tcache.c b/src/tcache.c
index 3814365c..f1a30d50 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -32,7 +32,7 @@ size_t	tcache_salloc(const void *ptr)
 void
 tcache_event_hard(tsd_t *tsd, tcache_t *tcache)
 {
-	index_t binind = tcache->next_gc_bin;
+	szind_t binind = tcache->next_gc_bin;
 	tcache_bin_t *tbin = &tcache->tbins[binind];
 	tcache_bin_info_t *tbin_info = &tcache_bin_info[binind];
 
@@ -72,7 +72,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache)
 
 void *
 tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
-    tcache_bin_t *tbin, index_t binind)
+    tcache_bin_t *tbin, szind_t binind)
 {
 	void *ret;
 
@@ -87,7 +87,7 @@ tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
 
 void
 tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
-    index_t binind, unsigned rem)
+    szind_t binind, unsigned rem)
 {
 	arena_t *arena;
 	void *ptr;
@@ -166,7 +166,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 }
 
 void
-tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, index_t binind,
+tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
     unsigned rem, tcache_t *tcache)
 {
 	arena_t *arena;
diff --git a/test/unit/size_classes.c b/test/unit/size_classes.c
index d7918346..d3aaebd7 100644
--- a/test/unit/size_classes.c
+++ b/test/unit/size_classes.c
@@ -26,7 +26,7 @@ get_max_size_class(void)
 TEST_BEGIN(test_size_classes)
 {
 	size_t size_class, max_size_class;
-	index_t index, max_index;
+	szind_t index, max_index;
 
 	max_size_class = get_max_size_class();
 	max_index = size2index(max_size_class);

From b5c2a347d7cbf1154181ccb3adc599c8bd2094c9 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 19 Aug 2015 23:28:34 -0700
Subject: [PATCH 0003/2608] Silence compiler warnings for unreachable code.

Reported by Ingvar Hagelund.
---
 include/jemalloc/internal/arena.h | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 62a9a85c..f2954b32 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -659,12 +659,13 @@ arena_mapbits_size_decode(size_t mapbits)
 {
 	size_t size;
 
-	if (CHUNK_MAP_SIZE_SHIFT > 0)
-		size = (mapbits & CHUNK_MAP_SIZE_MASK) >> CHUNK_MAP_SIZE_SHIFT;
-	else if (CHUNK_MAP_SIZE_SHIFT == 0)
-		size = mapbits & CHUNK_MAP_SIZE_MASK;
-	else
-		size = (mapbits & CHUNK_MAP_SIZE_MASK) << -CHUNK_MAP_SIZE_SHIFT;
+#if CHUNK_MAP_SIZE_SHIFT > 0
+	size = (mapbits & CHUNK_MAP_SIZE_MASK) >> CHUNK_MAP_SIZE_SHIFT;
+#elif CHUNK_MAP_SIZE_SHIFT == 0
+	size = mapbits & CHUNK_MAP_SIZE_MASK;
+#else
+	size = (mapbits & CHUNK_MAP_SIZE_MASK) << -CHUNK_MAP_SIZE_SHIFT;
+#endif
 
 	return (size);
 }
@@ -776,12 +777,13 @@ arena_mapbits_size_encode(size_t size)
 {
 	size_t mapbits;
 
-	if (CHUNK_MAP_SIZE_SHIFT > 0)
-		mapbits = size << CHUNK_MAP_SIZE_SHIFT;
-	else if (CHUNK_MAP_SIZE_SHIFT == 0)
-		mapbits = size;
-	else
-		mapbits = size >> -CHUNK_MAP_SIZE_SHIFT;
+#if CHUNK_MAP_SIZE_SHIFT > 0
+	mapbits = size << CHUNK_MAP_SIZE_SHIFT;
+#elif CHUNK_MAP_SIZE_SHIFT == 0
+	mapbits = size;
+#else
+	mapbits = size >> -CHUNK_MAP_SIZE_SHIFT;
+#endif
 
 	assert((mapbits & ~CHUNK_MAP_SIZE_MASK) == 0);
 	return (mapbits);

From 45e9f66c280e1ba8bebf7bed387a43bc9e45536d Mon Sep 17 00:00:00 2001
From: Christopher Ferris <cferris@google.com>
Date: Fri, 21 Aug 2015 12:23:06 -0700
Subject: [PATCH 0004/2608] Fix arenas_cache_cleanup().

Fix arenas_cache_cleanup() to handle allocation/deallocation within the
application's thread-specific data cleanup functions even after
arenas_cache is torn down.
---
 ChangeLog       | 5 ++++-
 src/jemalloc.c  | 6 +++++-
 test/unit/tsd.c | 6 ++++++
 3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 284d6d29..9bcf2992 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,9 +4,12 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
-* 4.x.x (XXX)
+* 4.0.1 (XXX)
 
   Bug fixes:
+  - Fix arenas_cache_cleanup() to handle allocation/deallocation within the
+    application's thread-specific data cleanup functions even after
+    arenas_cache is torn down.
   - Don't bitshift by negative amounts when encoding/decoding run sizes in chunk
     header maps.  This affected systems with page sizes greater than 8 KiB.
   - Rename index_t to szind_t to avoid an existing type on Solaris.
diff --git a/src/jemalloc.c b/src/jemalloc.c
index ed7863b9..03619130 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -649,8 +649,12 @@ arenas_cache_cleanup(tsd_t *tsd)
 	arena_t **arenas_cache;
 
 	arenas_cache = tsd_arenas_cache_get(tsd);
-	if (arenas_cache != NULL)
+	if (arenas_cache != NULL) {
+		bool *arenas_cache_bypassp = tsd_arenas_cache_bypassp_get(tsd);
+		*arenas_cache_bypassp = true;
+		tsd_arenas_cache_set(tsd, NULL);
 		a0dalloc(arenas_cache);
+	}
 }
 
 void
diff --git a/test/unit/tsd.c b/test/unit/tsd.c
index b031c484..8be787fd 100644
--- a/test/unit/tsd.c
+++ b/test/unit/tsd.c
@@ -56,9 +56,14 @@ static void *
 thd_start(void *arg)
 {
 	data_t d = (data_t)(uintptr_t)arg;
+	void *p;
+
 	assert_x_eq(*data_tsd_get(), DATA_INIT,
 	    "Initial tsd get should return initialization value");
 
+	p = malloc(1);
+	assert_ptr_not_null(p, "Unexpected malloc() failure");
+
 	data_tsd_set(&d);
 	assert_x_eq(*data_tsd_get(), d,
 	    "After tsd set, tsd get should return value that was set");
@@ -67,6 +72,7 @@ thd_start(void *arg)
 	assert_x_eq(*data_tsd_get(), (data_t)(uintptr_t)arg,
 	    "Resetting local data should have no effect on tsd");
 
+	free(p);
 	return (NULL);
 }
 

From 5d2e875ac9283cb99ff714c5cb56e1fc98a7f007 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 26 Aug 2015 13:47:20 -0700
Subject: [PATCH 0005/2608] Add JEMALLOC_CXX_THROW to the memalign() function
 prototype.

Add JEMALLOC_CXX_THROW to the memalign() function prototype, in order to
match glibc and avoid compilation errors when including both
jemalloc/jemalloc.h and malloc.h in C++ code.

This change was unintentionally omitted from
ae93d6bf364e9db9f9ee69c3e5f9df110d8685a4 (Avoid function prototype
incompatibilities.).
---
 ChangeLog                             | 3 +++
 include/jemalloc/jemalloc_protos.h.in | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/ChangeLog b/ChangeLog
index 9bcf2992..97728b07 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -13,6 +13,9 @@ brevity.  Much more detail can be found in the git revision history:
   - Don't bitshift by negative amounts when encoding/decoding run sizes in chunk
     header maps.  This affected systems with page sizes greater than 8 KiB.
   - Rename index_t to szind_t to avoid an existing type on Solaris.
+  - Add JEMALLOC_CXX_THROW to the memalign() function prototype, in order to
+    match glibc and avoid compilation errors when including both
+    jemalloc/jemalloc.h and malloc.h in C++ code.
 
 * 4.0.0 (August 17, 2015)
 
diff --git a/include/jemalloc/jemalloc_protos.h.in b/include/jemalloc/jemalloc_protos.h.in
index 317ffdb9..a78414b1 100644
--- a/include/jemalloc/jemalloc_protos.h.in
+++ b/include/jemalloc/jemalloc_protos.h.in
@@ -56,7 +56,7 @@ JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW	@je_@malloc_usable_size(
 #ifdef JEMALLOC_OVERRIDE_MEMALIGN
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
     void JEMALLOC_NOTHROW	*@je_@memalign(size_t alignment, size_t size)
-    JEMALLOC_ATTR(malloc);
+    JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc);
 #endif
 
 #ifdef JEMALLOC_OVERRIDE_VALLOC

From 30949da601f7405c294a71d30bd67be29cfbc2a5 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Tue, 25 Aug 2015 16:13:59 -0700
Subject: [PATCH 0006/2608] Fix arenas_cache_cleanup() and arena_get_hard().

Fix arenas_cache_cleanup() and arena_get_hard() to handle
allocation/deallocation within the application's thread-specific data
cleanup functions even after arenas_cache is torn down.

This is a more general fix that complements
45e9f66c280e1ba8bebf7bed387a43bc9e45536d (Fix arenas_cache_cleanup().).
---
 ChangeLog      |  6 +++---
 src/jemalloc.c | 11 +++++------
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 97728b07..17f6538c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -7,9 +7,9 @@ brevity.  Much more detail can be found in the git revision history:
 * 4.0.1 (XXX)
 
   Bug fixes:
-  - Fix arenas_cache_cleanup() to handle allocation/deallocation within the
-    application's thread-specific data cleanup functions even after
-    arenas_cache is torn down.
+  - Fix arenas_cache_cleanup() and arena_get_hard() to handle
+    allocation/deallocation within the application's thread-specific data
+    cleanup functions even after arenas_cache is torn down.
   - Don't bitshift by negative amounts when encoding/decoding run sizes in chunk
     header maps.  This affected systems with page sizes greater than 8 KiB.
   - Rename index_t to szind_t to avoid an existing type on Solaris.
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 03619130..df962c69 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -510,17 +510,17 @@ arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing)
 		assert(ind < narenas_actual || !init_if_missing);
 		narenas_cache = (ind < narenas_actual) ? narenas_actual : ind+1;
 
-		if (!*arenas_cache_bypassp) {
+		if (tsd_nominal(tsd) && !*arenas_cache_bypassp) {
 			*arenas_cache_bypassp = true;
 			arenas_cache = (arena_t **)a0malloc(sizeof(arena_t *) *
 			    narenas_cache);
 			*arenas_cache_bypassp = false;
-		} else
-			arenas_cache = NULL;
+		}
 		if (arenas_cache == NULL) {
 			/*
 			 * This function must always tell the truth, even if
-			 * it's slow, so don't let OOM or recursive allocation
+			 * it's slow, so don't let OOM, thread cleanup (note
+			 * tsd_nominal check), nor recursive allocation
 			 * avoidance (note arenas_cache_bypass check) get in the
 			 * way.
 			 */
@@ -531,6 +531,7 @@ arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing)
 			malloc_mutex_unlock(&arenas_lock);
 			return (arena);
 		}
+		assert(tsd_nominal(tsd) && !*arenas_cache_bypassp);
 		tsd_arenas_cache_set(tsd, arenas_cache);
 		tsd_narenas_cache_set(tsd, narenas_cache);
 	}
@@ -650,8 +651,6 @@ arenas_cache_cleanup(tsd_t *tsd)
 
 	arenas_cache = tsd_arenas_cache_get(tsd);
 	if (arenas_cache != NULL) {
-		bool *arenas_cache_bypassp = tsd_arenas_cache_bypassp_get(tsd);
-		*arenas_cache_bypassp = true;
 		tsd_arenas_cache_set(tsd, NULL);
 		a0dalloc(arenas_cache);
 	}

From 6d8075f1e6e72ee274832f1164c164f713788e34 Mon Sep 17 00:00:00 2001
From: Mike Hommey <mh@glandium.org>
Date: Thu, 27 Aug 2015 20:30:15 -0700
Subject: [PATCH 0007/2608] Fix chunk purge hook calls for in-place huge
 shrinking reallocation.

Fix chunk purge hook calls for in-place huge shrinking reallocation to
specify the old chunk size rather than the new chunk size.  This bug
caused no correctness issues for the default chunk purge function, but
was visible to custom functions set via the "arena.<i>.chunk_hooks"
mallctl.

This resolves #264.
---
 ChangeLog  | 4 ++++
 src/huge.c | 4 ++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 17f6538c..53d9caf8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -16,6 +16,10 @@ brevity.  Much more detail can be found in the git revision history:
   - Add JEMALLOC_CXX_THROW to the memalign() function prototype, in order to
     match glibc and avoid compilation errors when including both
     jemalloc/jemalloc.h and malloc.h in C++ code.
+  - Fix chunk purge hook calls for in-place huge shrinking reallocation to
+    specify the old chunk size rather than the new chunk size.  This bug caused
+    no correctness issues for the default chunk purge function, but was
+    visible to custom functions set via the "arena.<i>.chunk_hooks" mallctl.
 
 * 4.0.0 (August 17, 2015)
 
diff --git a/src/huge.c b/src/huge.c
index 54c2114c..f49a9376 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -149,7 +149,7 @@ huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize,
 	if (oldsize > usize) {
 		size_t sdiff = oldsize - usize;
 		zeroed = !chunk_purge_wrapper(arena, &chunk_hooks, ptr,
-		    CHUNK_CEILING(usize), usize, sdiff);
+		    CHUNK_CEILING(oldsize), usize, sdiff);
 		if (config_fill && unlikely(opt_junk_free)) {
 			memset((void *)((uintptr_t)ptr + usize), 0x5a, sdiff);
 			zeroed = false;
@@ -204,7 +204,7 @@ huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize)
 		size_t sdiff = oldsize - usize;
 		zeroed = !chunk_purge_wrapper(arena, &chunk_hooks,
 		    CHUNK_ADDR2BASE((uintptr_t)ptr + usize),
-		    CHUNK_CEILING(usize), CHUNK_ADDR2OFFSET((uintptr_t)ptr +
+		    CHUNK_CEILING(oldsize), CHUNK_ADDR2OFFSET((uintptr_t)ptr +
 		    usize), sdiff);
 		if (config_fill && unlikely(opt_junk_free)) {
 			huge_dalloc_junk((void *)((uintptr_t)ptr + usize),

From 4a2a3c9a6e5f8fda3536f48095d68ddaa13cd977 Mon Sep 17 00:00:00 2001
From: Mike Hommey <mh@glandium.org>
Date: Fri, 28 Aug 2015 13:45:51 +0900
Subject: [PATCH 0008/2608] Don't purge junk filled chunks when shrinking huge
 allocations

When junk filling is enabled, shrinking an allocation fills the bytes
that were previously allocated but now aren't. Purging the chunk before
doing that is just a waste of time.

This resolves #260.
---
 src/huge.c               | 14 ++++++++------
 test/integration/chunk.c |  4 ++++
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/src/huge.c b/src/huge.c
index f49a9376..4d5887c4 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -148,11 +148,12 @@ huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize,
 	/* Fill if necessary (shrinking). */
 	if (oldsize > usize) {
 		size_t sdiff = oldsize - usize;
-		zeroed = !chunk_purge_wrapper(arena, &chunk_hooks, ptr,
-		    CHUNK_CEILING(oldsize), usize, sdiff);
 		if (config_fill && unlikely(opt_junk_free)) {
 			memset((void *)((uintptr_t)ptr + usize), 0x5a, sdiff);
 			zeroed = false;
+		} else {
+			zeroed = !chunk_purge_wrapper(arena, &chunk_hooks, ptr,
+			    CHUNK_CEILING(oldsize), usize, sdiff);
 		}
 	} else
 		zeroed = true;
@@ -202,14 +203,15 @@ huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize)
 
 	if (oldsize > usize) {
 		size_t sdiff = oldsize - usize;
-		zeroed = !chunk_purge_wrapper(arena, &chunk_hooks,
-		    CHUNK_ADDR2BASE((uintptr_t)ptr + usize),
-		    CHUNK_CEILING(oldsize), CHUNK_ADDR2OFFSET((uintptr_t)ptr +
-		    usize), sdiff);
 		if (config_fill && unlikely(opt_junk_free)) {
 			huge_dalloc_junk((void *)((uintptr_t)ptr + usize),
 			    sdiff);
 			zeroed = false;
+		} else {
+			zeroed = !chunk_purge_wrapper(arena, &chunk_hooks,
+			    CHUNK_ADDR2BASE((uintptr_t)ptr + usize),
+			    CHUNK_CEILING(oldsize),
+			    CHUNK_ADDR2OFFSET((uintptr_t)ptr + usize), sdiff);
 		}
 	} else
 		zeroed = true;
diff --git a/test/integration/chunk.c b/test/integration/chunk.c
index 7eb1b6d2..af1c9a53 100644
--- a/test/integration/chunk.c
+++ b/test/integration/chunk.c
@@ -1,5 +1,9 @@
 #include "test/jemalloc_test.h"
 
+#ifdef JEMALLOC_FILL
+const char *malloc_conf = "junk:false";
+#endif
+
 static chunk_hooks_t orig_hooks;
 static chunk_hooks_t old_hooks;
 

From c0f43b65500a78c87ef16579a33661103ec0a4fb Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 2 Sep 2015 12:46:35 -0700
Subject: [PATCH 0009/2608] Fix TLS configuration.

Fix TLS configuration such that it is enabled by default for platforms
on which it works correctly.  This regression was introduced by
ac5db02034c01357a4ce90504886046a58117921 (Make --enable-tls and
--enable-lazy-lock take precedence over configure.ac-hardcoded
defaults).
---
 ChangeLog    |  2 ++
 configure.ac | 22 ++++++++++++++--------
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 53d9caf8..dba05ebc 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -20,6 +20,8 @@ brevity.  Much more detail can be found in the git revision history:
     specify the old chunk size rather than the new chunk size.  This bug caused
     no correctness issues for the default chunk purge function, but was
     visible to custom functions set via the "arena.<i>.chunk_hooks" mallctl.
+  - Fix TLS configuration such that it is enabled by default for platforms on
+    which it works correctly.
 
 * 4.0.0 (August 17, 2015)
 
diff --git a/configure.ac b/configure.ac
index f7c7f3ce..5e77b680 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1272,13 +1272,16 @@ fi
 ,
 enable_tls=""
 )
-if test "x${enable_tls}" = "x" -a "x${force_tls}" = "x1" ; then
-  AC_MSG_RESULT([Forcing TLS to avoid allocator/threading bootstrap issues])
-  enable_tls="1"
-fi
-if test "x${enable_tls}" = "x" -a "x${force_tls}" = "x0" ; then
-  AC_MSG_RESULT([Forcing no TLS to avoid allocator/threading bootstrap issues])
-  enable_tls="0"
+if test "x${enable_tls}" = "x" ; then
+  if test "x${force_tls}" = "x1" ; then
+    AC_MSG_RESULT([Forcing TLS to avoid allocator/threading bootstrap issues])
+    enable_tls="1"
+  elif test "x${force_tls}" = "x0" ; then
+    AC_MSG_RESULT([Forcing no TLS to avoid allocator/threading bootstrap issues])
+    enable_tls="0"
+  else
+    enable_tls="1"
+  fi
 fi
 if test "x${enable_tls}" = "x1" ; then
 AC_MSG_CHECKING([for TLS])
@@ -1298,9 +1301,12 @@ else
 fi
 AC_SUBST([enable_tls])
 if test "x${enable_tls}" = "x1" ; then
+  if test "x${force_tls}" = "x0" ; then
+    AC_MSG_WARN([TLS enabled despite being marked unusable on this platform])
+  fi
   AC_DEFINE_UNQUOTED([JEMALLOC_TLS], [ ])
 elif test "x${force_tls}" = "x1" ; then
-  AC_MSG_ERROR([Failed to configure TLS, which is mandatory for correct function])
+  AC_MSG_WARN([TLS disabled despite being marked critical on this platform])
 fi
 
 dnl ============================================================================

From 594c759f37c301d0245dc2accf4d4aaf9d202819 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 2 Sep 2015 14:52:24 -0700
Subject: [PATCH 0010/2608] Optimize arena_prof_tctx_set().

Optimize arena_prof_tctx_set() to avoid reading run metadata when
deciding whether it's actually necessary to write.
---
 include/jemalloc/internal/arena.h | 23 +++++++++++----
 include/jemalloc/internal/prof.h  | 10 +++----
 src/prof.c                        |  2 +-
 test/unit/prof_reset.c            | 49 ++++++++++++++++++++-----------
 4 files changed, 56 insertions(+), 28 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index f2954b32..76c5b936 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -555,7 +555,7 @@ szind_t	arena_bin_index(arena_t *arena, arena_bin_t *bin);
 unsigned	arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info,
     const void *ptr);
 prof_tctx_t	*arena_prof_tctx_get(const void *ptr);
-void	arena_prof_tctx_set(const void *ptr, prof_tctx_t *tctx);
+void	arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx);
 void	*arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero,
     tcache_t *tcache);
 arena_t	*arena_aalloc(const void *ptr);
@@ -1092,7 +1092,7 @@ arena_prof_tctx_get(const void *ptr)
 }
 
 JEMALLOC_INLINE void
-arena_prof_tctx_set(const void *ptr, prof_tctx_t *tctx)
+arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx)
 {
 	arena_chunk_t *chunk;
 
@@ -1102,12 +1102,25 @@ arena_prof_tctx_set(const void *ptr, prof_tctx_t *tctx)
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 	if (likely(chunk != ptr)) {
 		size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
+
 		assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
 
-		if (unlikely(arena_mapbits_large_get(chunk, pageind) != 0)) {
-			arena_chunk_map_misc_t *elm = arena_miscelm_get(chunk,
-			    pageind);
+		if (unlikely(usize > SMALL_MAXCLASS || tctx >
+		    (prof_tctx_t *)(uintptr_t)1U)) {
+			arena_chunk_map_misc_t *elm;
+
+			assert(arena_mapbits_large_get(chunk, pageind) != 0);
+
+			elm = arena_miscelm_get(chunk, pageind);
 			atomic_write_p(&elm->prof_tctx_pun, tctx);
+		} else {
+			/*
+			 * tctx must always be initialized for large runs.
+			 * Assert that the surrounding conditional logic is
+			 * equivalent to checking whether ptr refers to a large
+			 * run.
+			 */
+			assert(arena_mapbits_large_get(chunk, pageind) == 0);
 		}
 	} else
 		huge_prof_tctx_set(ptr, tctx);
diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h
index 2e227116..fe89828b 100644
--- a/include/jemalloc/internal/prof.h
+++ b/include/jemalloc/internal/prof.h
@@ -332,7 +332,7 @@ bool	prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit,
     prof_tdata_t **tdata_out);
 prof_tctx_t	*prof_alloc_prep(tsd_t *tsd, size_t usize, bool update);
 prof_tctx_t	*prof_tctx_get(const void *ptr);
-void	prof_tctx_set(const void *ptr, prof_tctx_t *tctx);
+void	prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx);
 void	prof_malloc_sample_object(const void *ptr, size_t usize,
     prof_tctx_t *tctx);
 void	prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx);
@@ -402,13 +402,13 @@ prof_tctx_get(const void *ptr)
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_tctx_set(const void *ptr, prof_tctx_t *tctx)
+prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	arena_prof_tctx_set(ptr, tctx);
+	arena_prof_tctx_set(ptr, usize, tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -473,7 +473,7 @@ prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx)
 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
 		prof_malloc_sample_object(ptr, usize, tctx);
 	else
-		prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U);
+		prof_tctx_set(ptr, usize, (prof_tctx_t *)(uintptr_t)1U);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -503,7 +503,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
 		prof_malloc_sample_object(ptr, usize, tctx);
 	else
-		prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U);
+		prof_tctx_set(ptr, usize, (prof_tctx_t *)(uintptr_t)1U);
 }
 
 JEMALLOC_ALWAYS_INLINE void
diff --git a/src/prof.c b/src/prof.c
index a05792fd..b79eba64 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -219,7 +219,7 @@ void
 prof_malloc_sample_object(const void *ptr, size_t usize, prof_tctx_t *tctx)
 {
 
-	prof_tctx_set(ptr, tctx);
+	prof_tctx_set(ptr, usize, tctx);
 
 	malloc_mutex_lock(tctx->tdata->lock);
 	tctx->cnts.curobjs++;
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
index 3af19642..da34d702 100644
--- a/test/unit/prof_reset.c
+++ b/test/unit/prof_reset.c
@@ -16,6 +16,27 @@ prof_dump_open_intercept(bool propagate_err, const char *filename)
 	return (fd);
 }
 
+static size_t
+get_lg_prof_sample(void)
+{
+	size_t lg_prof_sample;
+	size_t sz = sizeof(size_t);
+
+	assert_d_eq(mallctl("prof.lg_sample", &lg_prof_sample, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure while reading profiling sample rate");
+	return (lg_prof_sample);
+}
+
+static void
+do_prof_reset(size_t lg_prof_sample)
+{
+	assert_d_eq(mallctl("prof.reset", NULL, NULL,
+	    &lg_prof_sample, sizeof(size_t)), 0,
+	    "Unexpected mallctl failure while resetting profile data");
+	assert_zu_eq(lg_prof_sample, get_lg_prof_sample(),
+	    "Expected profile sample rate change");
+}
+
 TEST_BEGIN(test_prof_reset_basic)
 {
 	size_t lg_prof_sample_orig, lg_prof_sample, lg_prof_sample_next;
@@ -30,9 +51,7 @@ TEST_BEGIN(test_prof_reset_basic)
 	    "Unexpected mallctl failure while reading profiling sample rate");
 	assert_zu_eq(lg_prof_sample_orig, 0,
 	    "Unexpected profiling sample rate");
-	sz = sizeof(size_t);
-	assert_d_eq(mallctl("prof.lg_sample", &lg_prof_sample, &sz, NULL, 0), 0,
-	    "Unexpected mallctl failure while reading profiling sample rate");
+	lg_prof_sample = get_lg_prof_sample();
 	assert_zu_eq(lg_prof_sample_orig, lg_prof_sample,
 	    "Unexpected disagreement between \"opt.lg_prof_sample\" and "
 	    "\"prof.lg_sample\"");
@@ -41,10 +60,7 @@ TEST_BEGIN(test_prof_reset_basic)
 	for (i = 0; i < 2; i++) {
 		assert_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0), 0,
 		    "Unexpected mallctl failure while resetting profile data");
-		sz = sizeof(size_t);
-		assert_d_eq(mallctl("prof.lg_sample", &lg_prof_sample, &sz,
-		    NULL, 0), 0, "Unexpected mallctl failure while reading "
-		    "profiling sample rate");
+		lg_prof_sample = get_lg_prof_sample();
 		assert_zu_eq(lg_prof_sample_orig, lg_prof_sample,
 		    "Unexpected profile sample rate change");
 	}
@@ -52,22 +68,15 @@ TEST_BEGIN(test_prof_reset_basic)
 	/* Test resets with prof.lg_sample changes. */
 	lg_prof_sample_next = 1;
 	for (i = 0; i < 2; i++) {
-		assert_d_eq(mallctl("prof.reset", NULL, NULL,
-		    &lg_prof_sample_next, sizeof(size_t)), 0,
-		    "Unexpected mallctl failure while resetting profile data");
-		sz = sizeof(size_t);
-		assert_d_eq(mallctl("prof.lg_sample", &lg_prof_sample, &sz,
-		    NULL, 0), 0, "Unexpected mallctl failure while reading "
-		    "profiling sample rate");
+		do_prof_reset(lg_prof_sample_next);
+		lg_prof_sample = get_lg_prof_sample();
 		assert_zu_eq(lg_prof_sample, lg_prof_sample_next,
 		    "Expected profile sample rate change");
 		lg_prof_sample_next = lg_prof_sample_orig;
 	}
 
 	/* Make sure the test code restored prof.lg_sample. */
-	sz = sizeof(size_t);
-	assert_d_eq(mallctl("prof.lg_sample", &lg_prof_sample, &sz, NULL, 0), 0,
-	    "Unexpected mallctl failure while reading profiling sample rate");
+	lg_prof_sample = get_lg_prof_sample();
 	assert_zu_eq(lg_prof_sample_orig, lg_prof_sample,
 	    "Unexpected disagreement between \"opt.lg_prof_sample\" and "
 	    "\"prof.lg_sample\"");
@@ -182,6 +191,7 @@ thd_start(void *varg)
 
 TEST_BEGIN(test_prof_reset)
 {
+	size_t lg_prof_sample_orig;
 	bool active;
 	thd_t thds[NTHREADS];
 	unsigned thd_args[NTHREADS];
@@ -195,6 +205,9 @@ TEST_BEGIN(test_prof_reset)
 	    "Unexpected pre-existing tdata structures");
 	tdata_count = prof_tdata_count();
 
+	lg_prof_sample_orig = get_lg_prof_sample();
+	do_prof_reset(5);
+
 	active = true;
 	assert_d_eq(mallctl("prof.active", NULL, NULL, &active, sizeof(active)),
 	    0, "Unexpected mallctl failure while activating profiling");
@@ -214,6 +227,8 @@ TEST_BEGIN(test_prof_reset)
 	active = false;
 	assert_d_eq(mallctl("prof.active", NULL, NULL, &active, sizeof(active)),
 	    0, "Unexpected mallctl failure while deactivating profiling");
+
+	do_prof_reset(lg_prof_sample_orig);
 }
 TEST_END
 #undef NTHREADS

From b4330b02a8a909aed71c46d2c661d69545628fb4 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Fri, 4 Sep 2015 10:31:41 -0700
Subject: [PATCH 0011/2608] Fix pointer comparision with undefined behavior.

This didn't cause bad code generation in the one case spot-checked (gcc
4.8.1), but had the potential to to so.  This bug was introduced by
594c759f37c301d0245dc2accf4d4aaf9d202819 (Optimize
arena_prof_tctx_set().).
---
 include/jemalloc/internal/arena.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 76c5b936..4c1a471a 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -1105,8 +1105,8 @@ arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx)
 
 		assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
 
-		if (unlikely(usize > SMALL_MAXCLASS || tctx >
-		    (prof_tctx_t *)(uintptr_t)1U)) {
+		if (unlikely(usize > SMALL_MAXCLASS || (uintptr_t)tctx >
+		    (uintptr_t)1U)) {
 			arena_chunk_map_misc_t *elm;
 
 			assert(arena_mapbits_large_get(chunk, pageind) != 0);

From 0a116faf95ba8541ce75448bb9b6fba0efdde69a Mon Sep 17 00:00:00 2001
From: Mike Hommey <mh@glandium.org>
Date: Thu, 3 Sep 2015 15:48:48 +0900
Subject: [PATCH 0012/2608] Force initialization of the init_lock in
 malloc_init_hard on Windows XP

This resolves #269.
---
 src/jemalloc.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index df962c69..7cf1487a 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -179,13 +179,24 @@ static bool			malloc_initializer = NO_INITIALIZER;
 static malloc_mutex_t	init_lock = SRWLOCK_INIT;
 #else
 static malloc_mutex_t	init_lock;
+static bool init_lock_initialized = false;
 
 JEMALLOC_ATTR(constructor)
 static void WINAPI
 _init_init_lock(void)
 {
 
-	malloc_mutex_init(&init_lock);
+	/* If another constructor in the same binary is using mallctl to
+	 * e.g. setup chunk hooks, it may end up running before this one,
+	 * and malloc_init_hard will crash trying to lock the uninitialized
+	 * lock. So we force an initialization of the lock in
+	 * malloc_init_hard as well. We don't try to care about atomicity
+	 * of the accessed to the init_lock_initialized boolean, since it
+	 * really only matters early in the process creation, before any
+	 * separate thread normally starts doing anything. */
+	if (!init_lock_initialized)
+		malloc_mutex_init(&init_lock);
+	init_lock_initialized = true;
 }
 
 #ifdef _MSC_VER
@@ -1300,6 +1311,9 @@ static bool
 malloc_init_hard(void)
 {
 
+#if defined(_WIN32) && _WIN32_WINNT < 0x0600
+	_init_init_lock();
+#endif
 	malloc_mutex_lock(&init_lock);
 	if (!malloc_init_hard_needed()) {
 		malloc_mutex_unlock(&init_lock);

From a306a60651db0bd835d4009271e0be236b450fb3 Mon Sep 17 00:00:00 2001
From: Dmitry-Me <wipedout@yandex.ru>
Date: Fri, 4 Sep 2015 13:15:28 +0300
Subject: [PATCH 0013/2608] Reduce variables scope

---
 src/arena.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index b1bb9db0..949fc5bf 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -425,7 +425,7 @@ arena_run_split_large_helper(arena_t *arena, arena_run_t *run, size_t size,
 {
 	arena_chunk_t *chunk;
 	arena_chunk_map_misc_t *miscelm;
-	size_t flag_dirty, flag_decommitted, run_ind, need_pages, i;
+	size_t flag_dirty, flag_decommitted, run_ind, need_pages;
 	size_t flag_unzeroed_mask;
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
@@ -459,6 +459,7 @@ arena_run_split_large_helper(arena_t *arena, arena_run_t *run, size_t size,
 			 * The run is clean, so some pages may be zeroed (i.e.
 			 * never before touched).
 			 */
+			size_t i;
 			for (i = 0; i < need_pages; i++) {
 				if (arena_mapbits_unzeroed_get(chunk, run_ind+i)
 				    != 0)
@@ -1938,7 +1939,6 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
 static void *
 arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
 {
-	void *ret;
 	szind_t binind;
 	arena_bin_info_t *bin_info;
 	arena_run_t *run;
@@ -1952,6 +1952,7 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
 		 * Another thread updated runcur while this one ran without the
 		 * bin lock in arena_bin_nonfull_run_get().
 		 */
+		void *ret;
 		assert(bin->runcur->nfree > 0);
 		ret = arena_run_reg_alloc(bin->runcur, bin_info);
 		if (run != NULL) {
@@ -1990,8 +1991,6 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, szind_t binind,
 {
 	unsigned i, nfill;
 	arena_bin_t *bin;
-	arena_run_t *run;
-	void *ptr;
 
 	assert(tbin->ncached == 0);
 
@@ -2001,6 +2000,8 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, szind_t binind,
 	malloc_mutex_lock(&bin->lock);
 	for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >>
 	    tbin->lg_fill_div); i < nfill; i++) {
+		arena_run_t *run;
+		void *ptr;
 		if ((run = bin->runcur) != NULL && run->nfree > 0)
 			ptr = arena_run_reg_alloc(run, &arena_bin_info[binind]);
 		else
@@ -2075,12 +2076,13 @@ arena_redzone_corruption_t *arena_redzone_corruption =
 static void
 arena_redzones_validate(void *ptr, arena_bin_info_t *bin_info, bool reset)
 {
-	size_t size = bin_info->reg_size;
-	size_t redzone_size = bin_info->redzone_size;
-	size_t i;
 	bool error = false;
 
 	if (opt_junk_alloc) {
+		size_t size = bin_info->reg_size;
+		size_t redzone_size = bin_info->redzone_size;
+		size_t i;
+
 		for (i = 1; i <= redzone_size; i++) {
 			uint8_t *byte = (uint8_t *)((uintptr_t)ptr - i);
 			if (*byte != 0xa5) {
@@ -3240,7 +3242,6 @@ small_run_size_init(void)
 bool
 arena_boot(void)
 {
-	size_t header_size;
 	unsigned i;
 
 	arena_lg_dirty_mult_default_set(opt_lg_dirty_mult);
@@ -3259,7 +3260,7 @@ arena_boot(void)
 	 */
 	map_bias = 0;
 	for (i = 0; i < 3; i++) {
-		header_size = offsetof(arena_chunk_t, map_bits) +
+		size_t header_size = offsetof(arena_chunk_t, map_bits) +
 		    ((sizeof(arena_chunk_map_bits_t) +
 		    sizeof(arena_chunk_map_misc_t)) * (chunk_npages-map_bias));
 		map_bias = (header_size + PAGE_MASK) >> LG_PAGE;

From a00b10735a80f7070714b278c8acdad4473bea69 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 9 Sep 2015 23:16:10 -0700
Subject: [PATCH 0014/2608] Fix "prof.reset" mallctl-related corruption.

Fix heap profiling to distinguish among otherwise identical sample sites
with interposed resets (triggered via the "prof.reset" mallctl).  This
bug could cause data structure corruption that would most likely result
in a segfault.
---
 ChangeLog                        |  4 ++
 include/jemalloc/internal/prof.h |  5 +-
 src/prof.c                       | 14 ++++--
 test/unit/prof_reset.c           | 81 ++++++++++++++++++++++++++------
 4 files changed, 84 insertions(+), 20 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index dba05ebc..63c9d56a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -22,6 +22,10 @@ brevity.  Much more detail can be found in the git revision history:
     visible to custom functions set via the "arena.<i>.chunk_hooks" mallctl.
   - Fix TLS configuration such that it is enabled by default for platforms on
     which it works correctly.
+  - Fix heap profiling to distinguish among otherwise identical sample sites
+    with interposed resets (triggered via the "prof.reset" mallctl).  This bug
+    could cause data structure corruption that would most likely result in a
+    segfault.
 
 * 4.0.0 (August 17, 2015)
 
diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h
index fe89828b..eca8aa8a 100644
--- a/include/jemalloc/internal/prof.h
+++ b/include/jemalloc/internal/prof.h
@@ -90,10 +90,11 @@ struct prof_tctx_s {
 	prof_tdata_t		*tdata;
 
 	/*
-	 * Copy of tdata->thr_uid, necessary because tdata may be defunct during
-	 * teardown.
+	 * Copy of tdata->thr_{uid,discrim}, necessary because tdata may be
+	 * defunct during teardown.
 	 */
 	uint64_t		thr_uid;
+	uint64_t		thr_discrim;
 
 	/* Profiling counters, protected by tdata->lock. */
 	prof_cnt_t		cnts;
diff --git a/src/prof.c b/src/prof.c
index b79eba64..7427bf54 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -139,9 +139,16 @@ prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b)
 	uint64_t b_thr_uid = b->thr_uid;
 	int ret = (a_thr_uid > b_thr_uid) - (a_thr_uid < b_thr_uid);
 	if (ret == 0) {
-		uint64_t a_tctx_uid = a->tctx_uid;
-		uint64_t b_tctx_uid = b->tctx_uid;
-		ret = (a_tctx_uid > b_tctx_uid) - (a_tctx_uid < b_tctx_uid);
+		uint64_t a_thr_discrim = a->thr_discrim;
+		uint64_t b_thr_discrim = b->thr_discrim;
+		ret = (a_thr_discrim > b_thr_discrim) - (a_thr_discrim <
+		    b_thr_discrim);
+		if (ret == 0) {
+			uint64_t a_tctx_uid = a->tctx_uid;
+			uint64_t b_tctx_uid = b->tctx_uid;
+			ret = (a_tctx_uid > b_tctx_uid) - (a_tctx_uid <
+			    b_tctx_uid);
+		}
 	}
 	return (ret);
 }
@@ -791,6 +798,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
 		}
 		ret.p->tdata = tdata;
 		ret.p->thr_uid = tdata->thr_uid;
+		ret.p->thr_discrim = tdata->thr_discrim;
 		memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
 		ret.p->gctx = gctx;
 		ret.p->tctx_uid = tdata->tctx_uid_next++;
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
index da34d702..69983e5e 100644
--- a/test/unit/prof_reset.c
+++ b/test/unit/prof_reset.c
@@ -16,6 +16,14 @@ prof_dump_open_intercept(bool propagate_err, const char *filename)
 	return (fd);
 }
 
+static void
+set_prof_active(bool active)
+{
+
+	assert_d_eq(mallctl("prof.active", NULL, NULL, &active, sizeof(active)),
+	    0, "Unexpected mallctl failure");
+}
+
 static size_t
 get_lg_prof_sample(void)
 {
@@ -97,15 +105,12 @@ prof_dump_header_intercept(bool propagate_err, const prof_cnt_t *cnt_all)
 
 TEST_BEGIN(test_prof_reset_cleanup)
 {
-	bool active;
 	void *p;
 	prof_dump_header_t *prof_dump_header_orig;
 
 	test_skip_if(!config_prof);
 
-	active = true;
-	assert_d_eq(mallctl("prof.active", NULL, NULL, &active, sizeof(active)),
-	    0, "Unexpected mallctl failure while activating profiling");
+	set_prof_active(true);
 
 	assert_zu_eq(prof_bt_count(), 0, "Expected 0 backtraces");
 	p = mallocx(1, 0);
@@ -133,9 +138,7 @@ TEST_BEGIN(test_prof_reset_cleanup)
 	dallocx(p, 0);
 	assert_zu_eq(prof_bt_count(), 0, "Expected 0 backtraces");
 
-	active = false;
-	assert_d_eq(mallctl("prof.active", NULL, NULL, &active, sizeof(active)),
-	    0, "Unexpected mallctl failure while deactivating profiling");
+	set_prof_active(false);
 }
 TEST_END
 
@@ -192,7 +195,6 @@ thd_start(void *varg)
 TEST_BEGIN(test_prof_reset)
 {
 	size_t lg_prof_sample_orig;
-	bool active;
 	thd_t thds[NTHREADS];
 	unsigned thd_args[NTHREADS];
 	unsigned i;
@@ -208,9 +210,7 @@ TEST_BEGIN(test_prof_reset)
 	lg_prof_sample_orig = get_lg_prof_sample();
 	do_prof_reset(5);
 
-	active = true;
-	assert_d_eq(mallctl("prof.active", NULL, NULL, &active, sizeof(active)),
-	    0, "Unexpected mallctl failure while activating profiling");
+	set_prof_active(true);
 
 	for (i = 0; i < NTHREADS; i++) {
 		thd_args[i] = i;
@@ -224,9 +224,7 @@ TEST_BEGIN(test_prof_reset)
 	assert_zu_eq(prof_tdata_count(), tdata_count,
 	    "Unexpected remaining tdata structures");
 
-	active = false;
-	assert_d_eq(mallctl("prof.active", NULL, NULL, &active, sizeof(active)),
-	    0, "Unexpected mallctl failure while deactivating profiling");
+	set_prof_active(false);
 
 	do_prof_reset(lg_prof_sample_orig);
 }
@@ -237,6 +235,58 @@ TEST_END
 #undef RESET_INTERVAL
 #undef DUMP_INTERVAL
 
+/* Test sampling at the same allocation site across resets. */
+#define	NITER 10
+TEST_BEGIN(test_xallocx)
+{
+	size_t lg_prof_sample_orig;
+	unsigned i;
+	void *ptrs[NITER];
+
+	test_skip_if(!config_prof);
+
+	lg_prof_sample_orig = get_lg_prof_sample();
+	set_prof_active(true);
+
+	/* Reset profiling. */
+	do_prof_reset(0);
+
+	for (i = 0; i < NITER; i++) {
+		void *p;
+		size_t sz, nsz;
+
+		/* Reset profiling. */
+		do_prof_reset(0);
+
+		/* Allocate small object (which will be promoted). */
+		p = ptrs[i] = mallocx(1, 0);
+		assert_ptr_not_null(p, "Unexpected mallocx() failure");
+
+		/* Reset profiling. */
+		do_prof_reset(0);
+
+		/* Perform successful xallocx(). */
+		sz = sallocx(p, 0);
+		assert_zu_eq(xallocx(p, sz, 0, 0), sz,
+		    "Unexpected xallocx() failure");
+
+		/* Perform unsuccessful xallocx(). */
+		nsz = nallocx(sz+1, 0);
+		assert_zu_eq(xallocx(p, nsz, 0, 0), sz,
+		    "Unexpected xallocx() success");
+	}
+
+	for (i = 0; i < NITER; i++) {
+		/* dallocx. */
+		dallocx(ptrs[i], 0);
+	}
+
+	set_prof_active(false);
+	do_prof_reset(lg_prof_sample_orig);
+}
+TEST_END
+#undef NITER
+
 int
 main(void)
 {
@@ -247,5 +297,6 @@ main(void)
 	return (test(
 	    test_prof_reset_basic,
 	    test_prof_reset_cleanup,
-	    test_prof_reset));
+	    test_prof_reset,
+	    test_xallocx));
 }

From 560a4e1e01d3733c2f107cdb3cc3580f3ed84442 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 11 Sep 2015 16:18:53 -0700
Subject: [PATCH 0015/2608] Fix xallocx() bugs.

Fix xallocx() bugs related to the 'extra' parameter when specified as
non-zero.
---
 ChangeLog                                     |   2 +
 include/jemalloc/internal/arena.h             |   2 +-
 include/jemalloc/internal/huge.h              |   7 +-
 .../jemalloc/internal/jemalloc_internal.h.in  |   2 +-
 include/jemalloc/internal/size_classes.sh     |   5 +
 src/arena.c                                   | 210 +++++++--------
 src/huge.c                                    | 111 ++++----
 test/integration/rallocx.c                    |   2 +-
 test/integration/xallocx.c                    | 242 +++++++++++++++++-
 9 files changed, 399 insertions(+), 184 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 63c9d56a..18d72ebd 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -26,6 +26,8 @@ brevity.  Much more detail can be found in the git revision history:
     with interposed resets (triggered via the "prof.reset" mallctl).  This bug
     could cause data structure corruption that would most likely result in a
     segfault.
+  - Fix xallocx() bugs related to the 'extra' parameter when specified as
+    non-zero.
 
 * 4.0.0 (August 17, 2015)
 
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 4c1a471a..f77f2574 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -488,7 +488,7 @@ extern arena_ralloc_junk_large_t *arena_ralloc_junk_large;
 bool	arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size,
     size_t extra, bool zero);
 void	*arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
-    size_t size, size_t extra, size_t alignment, bool zero, tcache_t *tcache);
+    size_t size, size_t alignment, bool zero, tcache_t *tcache);
 dss_prec_t	arena_dss_prec_get(arena_t *arena);
 bool	arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
 ssize_t	arena_lg_dirty_mult_default_get(void);
diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h
index 8b6c6cec..328eeed7 100644
--- a/include/jemalloc/internal/huge.h
+++ b/include/jemalloc/internal/huge.h
@@ -13,11 +13,10 @@ void	*huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero,
     tcache_t *tcache);
 void	*huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment,
     bool zero, tcache_t *tcache);
-bool	huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size,
-    size_t extra, bool zero);
+bool	huge_ralloc_no_move(void *ptr, size_t oldsize, size_t usize_min,
+    size_t usize_max, bool zero);
 void	*huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
-    size_t size, size_t extra, size_t alignment, bool zero,
-    tcache_t *tcache);
+    size_t usize, size_t alignment, bool zero, tcache_t *tcache);
 #ifdef JEMALLOC_JET
 typedef void (huge_dalloc_junk_t)(void *, size_t);
 extern huge_dalloc_junk_t *huge_dalloc_junk;
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index f6e464e9..a341b253 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -1096,7 +1096,7 @@ iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment,
 		    zero, tcache, arena));
 	}
 
-	return (arena_ralloc(tsd, arena, ptr, oldsize, size, 0, alignment, zero,
+	return (arena_ralloc(tsd, arena, ptr, oldsize, size, alignment, zero,
 	    tcache));
 }
 
diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh
index 1c2d6816..fc82036d 100755
--- a/include/jemalloc/internal/size_classes.sh
+++ b/include/jemalloc/internal/size_classes.sh
@@ -167,6 +167,8 @@ size_classes() {
           lg_large_minclass=$((${lg_grp} + 2))
         fi
       fi
+      # Final written value is correct:
+      huge_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))"
       index=$((${index} + 1))
       ndelta=$((${ndelta} + 1))
     done
@@ -185,6 +187,7 @@ size_classes() {
   # - lookup_maxclass
   # - small_maxclass
   # - lg_large_minclass
+  # - huge_maxclass
 }
 
 cat <<EOF
@@ -215,6 +218,7 @@ cat <<EOF
  *   LOOKUP_MAXCLASS: Maximum size class included in lookup table.
  *   SMALL_MAXCLASS: Maximum small size class.
  *   LG_LARGE_MINCLASS: Lg of minimum large size class.
+ *   HUGE_MAXCLASS: Maximum (huge) size class.
  */
 
 #define	LG_SIZE_CLASS_GROUP	${lg_g}
@@ -238,6 +242,7 @@ for lg_z in ${lg_zarr} ; do
         echo "#define	LOOKUP_MAXCLASS		${lookup_maxclass}"
         echo "#define	SMALL_MAXCLASS		${small_maxclass}"
         echo "#define	LG_LARGE_MINCLASS	${lg_large_minclass}"
+        echo "#define	HUGE_MAXCLASS		${huge_maxclass}"
         echo "#endif"
         echo
       done
diff --git a/src/arena.c b/src/arena.c
index 949fc5bf..b41f0ce8 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2642,42 +2642,42 @@ arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 
 static bool
 arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
-    size_t oldsize, size_t size, size_t extra, bool zero)
+    size_t oldsize, size_t usize_min, size_t usize_max, bool zero)
 {
 	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 	size_t npages = (oldsize + large_pad) >> LG_PAGE;
 	size_t followsize;
-	size_t usize_min = s2u(size);
 
 	assert(oldsize == arena_mapbits_large_size_get(chunk, pageind) -
 	    large_pad);
 
 	/* Try to extend the run. */
-	assert(usize_min > oldsize);
 	malloc_mutex_lock(&arena->lock);
-	if (pageind+npages < chunk_npages &&
-	    arena_mapbits_allocated_get(chunk, pageind+npages) == 0 &&
-	    (followsize = arena_mapbits_unallocated_size_get(chunk,
-	    pageind+npages)) >= usize_min - oldsize) {
+	if (pageind+npages >= chunk_npages || arena_mapbits_allocated_get(chunk,
+	    pageind+npages) != 0)
+		goto label_fail;
+	followsize = arena_mapbits_unallocated_size_get(chunk, pageind+npages);
+	if (oldsize + followsize >= usize_min) {
 		/*
 		 * The next run is available and sufficiently large.  Split the
 		 * following run, then merge the first part with the existing
 		 * allocation.
 		 */
 		arena_run_t *run;
-		size_t flag_dirty, flag_unzeroed_mask, splitsize, usize;
+		size_t usize, splitsize, size, flag_dirty, flag_unzeroed_mask;
 
-		usize = s2u(size + extra);
+		usize = usize_max;
 		while (oldsize + followsize < usize)
 			usize = index2size(size2index(usize)-1);
 		assert(usize >= usize_min);
+		assert(usize >= oldsize);
 		splitsize = usize - oldsize;
+		if (splitsize == 0)
+			goto label_fail;
 
 		run = &arena_miscelm_get(chunk, pageind+npages)->run;
-		if (arena_run_split_large(arena, run, splitsize, zero)) {
-			malloc_mutex_unlock(&arena->lock);
-			return (true);
-		}
+		if (arena_run_split_large(arena, run, splitsize, zero))
+			goto label_fail;
 
 		size = oldsize + splitsize;
 		npages = (size + large_pad) >> LG_PAGE;
@@ -2719,8 +2719,8 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 		malloc_mutex_unlock(&arena->lock);
 		return (false);
 	}
+label_fail:
 	malloc_mutex_unlock(&arena->lock);
-
 	return (true);
 }
 
@@ -2749,98 +2749,114 @@ arena_ralloc_junk_large_t *arena_ralloc_junk_large =
  * always fail if growing an object, and the following run is already in use.
  */
 static bool
-arena_ralloc_large(void *ptr, size_t oldsize, size_t size, size_t extra,
-    bool zero)
+arena_ralloc_large(void *ptr, size_t oldsize, size_t usize_min,
+    size_t usize_max, bool zero)
 {
-	size_t usize;
+	arena_chunk_t *chunk;
+	arena_t *arena;
 
-	/* Make sure extra can't cause size_t overflow. */
-	if (unlikely(extra >= arena_maxclass))
-		return (true);
-
-	usize = s2u(size + extra);
-	if (usize == oldsize) {
-		/* Same size class. */
+	if (oldsize == usize_max) {
+		/* Current size class is compatible and maximal. */
 		return (false);
-	} else {
-		arena_chunk_t *chunk;
-		arena_t *arena;
-
-		chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-		arena = extent_node_arena_get(&chunk->node);
-
-		if (usize < oldsize) {
-			/* Fill before shrinking in order avoid a race. */
-			arena_ralloc_junk_large(ptr, oldsize, usize);
-			arena_ralloc_large_shrink(arena, chunk, ptr, oldsize,
-			    usize);
-			return (false);
-		} else {
-			bool ret = arena_ralloc_large_grow(arena, chunk, ptr,
-			    oldsize, size, extra, zero);
-			if (config_fill && !ret && !zero) {
-				if (unlikely(opt_junk_alloc)) {
-					memset((void *)((uintptr_t)ptr +
-					    oldsize), 0xa5, isalloc(ptr,
-					    config_prof) - oldsize);
-				} else if (unlikely(opt_zero)) {
-					memset((void *)((uintptr_t)ptr +
-					    oldsize), 0, isalloc(ptr,
-					    config_prof) - oldsize);
-				}
-			}
-			return (ret);
-		}
 	}
+
+	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+	arena = extent_node_arena_get(&chunk->node);
+
+	if (oldsize < usize_max) {
+		bool ret = arena_ralloc_large_grow(arena, chunk, ptr, oldsize,
+		    usize_min, usize_max, zero);
+		if (config_fill && !ret && !zero) {
+			if (unlikely(opt_junk_alloc)) {
+				memset((void *)((uintptr_t)ptr + oldsize), 0xa5,
+				    isalloc(ptr, config_prof) - oldsize);
+			} else if (unlikely(opt_zero)) {
+				memset((void *)((uintptr_t)ptr + oldsize), 0,
+				    isalloc(ptr, config_prof) - oldsize);
+			}
+		}
+		return (ret);
+	}
+
+	assert(oldsize > usize_max);
+	/* Fill before shrinking in order avoid a race. */
+	arena_ralloc_junk_large(ptr, oldsize, usize_max);
+	arena_ralloc_large_shrink(arena, chunk, ptr, oldsize, usize_max);
+	return (false);
 }
 
 bool
 arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra,
     bool zero)
 {
+	size_t usize_min, usize_max;
 
-	if (likely(size <= arena_maxclass)) {
+	/* Check for size overflow. */
+	if (unlikely(size > HUGE_MAXCLASS))
+		return (true);
+	usize_min = s2u(size);
+	/* Clamp extra if necessary to avoid (size + extra) overflow. */
+	if (unlikely(size + extra > HUGE_MAXCLASS))
+		extra = HUGE_MAXCLASS - size;
+	usize_max = s2u(size + extra);
+
+	if (likely(oldsize <= arena_maxclass && usize_min <= arena_maxclass)) {
 		/*
 		 * Avoid moving the allocation if the size class can be left the
 		 * same.
 		 */
-		if (likely(oldsize <= arena_maxclass)) {
-			if (oldsize <= SMALL_MAXCLASS) {
-				assert(
-				    arena_bin_info[size2index(oldsize)].reg_size
-				    == oldsize);
-				if ((size + extra <= SMALL_MAXCLASS &&
-				    size2index(size + extra) ==
-				    size2index(oldsize)) || (size <= oldsize &&
-				    size + extra >= oldsize))
+		if (oldsize <= SMALL_MAXCLASS) {
+			assert(arena_bin_info[size2index(oldsize)].reg_size ==
+			    oldsize);
+			if ((usize_max <= SMALL_MAXCLASS &&
+			    size2index(usize_max) == size2index(oldsize)) ||
+			    (size <= oldsize && usize_max >= oldsize))
+				return (false);
+		} else {
+			if (usize_max > SMALL_MAXCLASS) {
+				if (!arena_ralloc_large(ptr, oldsize, usize_min,
+				    usize_max, zero))
 					return (false);
-			} else {
-				assert(size <= arena_maxclass);
-				if (size + extra > SMALL_MAXCLASS) {
-					if (!arena_ralloc_large(ptr, oldsize,
-					    size, extra, zero))
-						return (false);
-				}
 			}
 		}
 
 		/* Reallocation would require a move. */
 		return (true);
-	} else
-		return (huge_ralloc_no_move(ptr, oldsize, size, extra, zero));
+	} else {
+		return (huge_ralloc_no_move(ptr, oldsize, usize_min, usize_max,
+		    zero));
+	}
+}
+
+static void *
+arena_ralloc_move_helper(tsd_t *tsd, arena_t *arena, size_t usize,
+    size_t alignment, bool zero, tcache_t *tcache)
+{
+
+	if (alignment == 0)
+		return (arena_malloc(tsd, arena, usize, zero, tcache));
+	usize = sa2u(usize, alignment);
+	if (usize == 0)
+		return (NULL);
+	return (ipalloct(tsd, usize, alignment, zero, tcache, arena));
 }
 
 void *
 arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size,
-    size_t extra, size_t alignment, bool zero, tcache_t *tcache)
+    size_t alignment, bool zero, tcache_t *tcache)
 {
 	void *ret;
+	size_t usize;
 
-	if (likely(size <= arena_maxclass)) {
+	usize = s2u(size);
+	if (usize == 0)
+		return (NULL);
+
+	if (likely(usize <= arena_maxclass)) {
 		size_t copysize;
 
 		/* Try to avoid moving the allocation. */
-		if (!arena_ralloc_no_move(ptr, oldsize, size, extra, zero))
+		if (!arena_ralloc_no_move(ptr, oldsize, usize, 0, zero))
 			return (ptr);
 
 		/*
@@ -2848,53 +2864,23 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size,
 		 * the object.  In that case, fall back to allocating new space
 		 * and copying.
 		 */
-		if (alignment != 0) {
-			size_t usize = sa2u(size + extra, alignment);
-			if (usize == 0)
-				return (NULL);
-			ret = ipalloct(tsd, usize, alignment, zero, tcache,
-			    arena);
-		} else {
-			ret = arena_malloc(tsd, arena, size + extra, zero,
-			    tcache);
-		}
-
-		if (ret == NULL) {
-			if (extra == 0)
-				return (NULL);
-			/* Try again, this time without extra. */
-			if (alignment != 0) {
-				size_t usize = sa2u(size, alignment);
-				if (usize == 0)
-					return (NULL);
-				ret = ipalloct(tsd, usize, alignment, zero,
-				    tcache, arena);
-			} else {
-				ret = arena_malloc(tsd, arena, size, zero,
-				    tcache);
-			}
-
-			if (ret == NULL)
-				return (NULL);
-		}
+		ret = arena_ralloc_move_helper(tsd, arena, usize, alignment,
+		    zero, tcache);
+		if (ret == NULL)
+			return (NULL);
 
 		/*
 		 * Junk/zero-filling were already done by
 		 * ipalloc()/arena_malloc().
 		 */
 
-		/*
-		 * Copy at most size bytes (not size+extra), since the caller
-		 * has no expectation that the extra bytes will be reliably
-		 * preserved.
-		 */
-		copysize = (size < oldsize) ? size : oldsize;
+		copysize = (usize < oldsize) ? usize : oldsize;
 		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize);
 		memcpy(ret, ptr, copysize);
 		isqalloc(tsd, ptr, oldsize, tcache);
 	} else {
-		ret = huge_ralloc(tsd, arena, ptr, oldsize, size, extra,
-		    alignment, zero, tcache);
+		ret = huge_ralloc(tsd, arena, ptr, oldsize, usize, alignment,
+		    zero, tcache);
 	}
 	return (ret);
 }
diff --git a/src/huge.c b/src/huge.c
index 4d5887c4..187bdaa9 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -126,18 +126,19 @@ huge_dalloc_junk_t *huge_dalloc_junk = JEMALLOC_N(huge_dalloc_junk_impl);
 #endif
 
 static void
-huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize,
-    size_t size, size_t extra, bool zero)
+huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize_min,
+    size_t usize_max, bool zero)
 {
-	size_t usize_next;
+	size_t usize, usize_next;
 	extent_node_t *node;
 	arena_t *arena;
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 	bool zeroed;
 
 	/* Increase usize to incorporate extra. */
-	while (usize < s2u(size+extra) && (usize_next = s2u(usize+1)) < oldsize)
-		usize = usize_next;
+	for (usize = usize_min; usize < usize_max && (usize_next = s2u(usize+1))
+	    <= oldsize; usize = usize_next)
+		; /* Do nothing. */
 
 	if (oldsize == usize)
 		return;
@@ -195,6 +196,8 @@ huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize)
 	arena = extent_node_arena_get(node);
 	chunk_hooks = chunk_hooks_get(arena);
 
+	assert(oldsize > usize);
+
 	/* Split excess chunks. */
 	cdiff = CHUNK_CEILING(oldsize) - CHUNK_CEILING(usize);
 	if (cdiff != 0 && chunk_hooks.split(ptr, CHUNK_CEILING(oldsize),
@@ -230,18 +233,11 @@ huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize)
 }
 
 static bool
-huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t size, bool zero) {
-	size_t usize;
+huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t usize, bool zero) {
 	extent_node_t *node;
 	arena_t *arena;
 	bool is_zeroed_subchunk, is_zeroed_chunk;
 
-	usize = s2u(size);
-	if (usize == 0) {
-		/* size_t overflow. */
-		return (true);
-	}
-
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
 	malloc_mutex_lock(&arena->huge_mtx);
@@ -282,89 +278,76 @@ huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t size, bool zero) {
 }
 
 bool
-huge_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra,
-    bool zero)
+huge_ralloc_no_move(void *ptr, size_t oldsize, size_t usize_min,
+    size_t usize_max, bool zero)
 {
-	size_t usize;
-
-	/* Both allocations must be huge to avoid a move. */
-	if (oldsize < chunksize)
-		return (true);
 
 	assert(s2u(oldsize) == oldsize);
-	usize = s2u(size);
-	if (usize == 0) {
-		/* size_t overflow. */
+
+	/* Both allocations must be huge to avoid a move. */
+	if (oldsize < chunksize || usize_max < chunksize)
 		return (true);
+
+	if (CHUNK_CEILING(usize_max) > CHUNK_CEILING(oldsize)) {
+		/* Attempt to expand the allocation in-place. */
+		if (!huge_ralloc_no_move_expand(ptr, oldsize, usize_max, zero))
+			return (false);
+		/* Try again, this time with usize_min. */
+		if (usize_min < usize_max && CHUNK_CEILING(usize_min) >
+		    CHUNK_CEILING(oldsize) && huge_ralloc_no_move_expand(ptr,
+		    oldsize, usize_min, zero))
+			return (false);
 	}
 
 	/*
 	 * Avoid moving the allocation if the existing chunk size accommodates
 	 * the new size.
 	 */
-	if (CHUNK_CEILING(oldsize) >= CHUNK_CEILING(usize)
-	    && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(s2u(size+extra))) {
-		huge_ralloc_no_move_similar(ptr, oldsize, usize, size, extra,
+	if (CHUNK_CEILING(oldsize) >= CHUNK_CEILING(usize_min)
+	    && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(usize_max)) {
+		huge_ralloc_no_move_similar(ptr, oldsize, usize_min, usize_max,
 		    zero);
 		return (false);
 	}
 
 	/* Attempt to shrink the allocation in-place. */
-	if (CHUNK_CEILING(oldsize) >= CHUNK_CEILING(usize))
-		return (huge_ralloc_no_move_shrink(ptr, oldsize, usize));
+	if (CHUNK_CEILING(oldsize) > CHUNK_CEILING(usize_max))
+		return (huge_ralloc_no_move_shrink(ptr, oldsize, usize_max));
+	return (true);
+}
 
-	/* Attempt to expand the allocation in-place. */
-	if (huge_ralloc_no_move_expand(ptr, oldsize, size + extra, zero)) {
-		if (extra == 0)
-			return (true);
+static void *
+huge_ralloc_move_helper(tsd_t *tsd, arena_t *arena, size_t usize,
+    size_t alignment, bool zero, tcache_t *tcache)
+{
 
-		/* Try again, this time without extra. */
-		return (huge_ralloc_no_move_expand(ptr, oldsize, size, zero));
-	}
-	return (false);
+	if (alignment <= chunksize)
+		return (huge_malloc(tsd, arena, usize, zero, tcache));
+	return (huge_palloc(tsd, arena, usize, alignment, zero, tcache));
 }
 
 void *
-huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size,
-    size_t extra, size_t alignment, bool zero, tcache_t *tcache)
+huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize,
+    size_t alignment, bool zero, tcache_t *tcache)
 {
 	void *ret;
 	size_t copysize;
 
 	/* Try to avoid moving the allocation. */
-	if (!huge_ralloc_no_move(ptr, oldsize, size, extra, zero))
+	if (!huge_ralloc_no_move(ptr, oldsize, usize, usize, zero))
 		return (ptr);
 
 	/*
-	 * size and oldsize are different enough that we need to use a
+	 * usize and oldsize are different enough that we need to use a
 	 * different size class.  In that case, fall back to allocating new
 	 * space and copying.
 	 */
-	if (alignment > chunksize) {
-		ret = huge_palloc(tsd, arena, size + extra, alignment, zero,
-		    tcache);
-	} else
-		ret = huge_malloc(tsd, arena, size + extra, zero, tcache);
+	ret = huge_ralloc_move_helper(tsd, arena, usize, alignment, zero,
+	    tcache);
+	if (ret == NULL)
+		return (NULL);
 
-	if (ret == NULL) {
-		if (extra == 0)
-			return (NULL);
-		/* Try again, this time without extra. */
-		if (alignment > chunksize) {
-			ret = huge_palloc(tsd, arena, size, alignment, zero,
-			    tcache);
-		} else
-			ret = huge_malloc(tsd, arena, size, zero, tcache);
-
-		if (ret == NULL)
-			return (NULL);
-	}
-
-	/*
-	 * Copy at most size bytes (not size+extra), since the caller has no
-	 * expectation that the extra bytes will be reliably preserved.
-	 */
-	copysize = (size < oldsize) ? size : oldsize;
+	copysize = (usize < oldsize) ? usize : oldsize;
 	memcpy(ret, ptr, copysize);
 	isqalloc(tsd, ptr, oldsize, tcache);
 	return (ret);
diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c
index 8b6cde31..be1b27b7 100644
--- a/test/integration/rallocx.c
+++ b/test/integration/rallocx.c
@@ -22,7 +22,7 @@ TEST_BEGIN(test_grow_and_shrink)
 			    szs[j-1], szs[j-1]+1);
 			szs[j] = sallocx(q, 0);
 			assert_zu_ne(szs[j], szs[j-1]+1,
-			    "Expected size to at least: %zu", szs[j-1]+1);
+			    "Expected size to be at least: %zu", szs[j-1]+1);
 			p = q;
 		}
 
diff --git a/test/integration/xallocx.c b/test/integration/xallocx.c
index ab4cf945..8f0de630 100644
--- a/test/integration/xallocx.c
+++ b/test/integration/xallocx.c
@@ -48,6 +48,243 @@ TEST_BEGIN(test_no_move_fail)
 }
 TEST_END
 
+static unsigned
+get_nsizes_impl(const char *cmd)
+{
+	unsigned ret;
+	size_t z;
+
+	z = sizeof(unsigned);
+	assert_d_eq(mallctl(cmd, &ret, &z, NULL, 0), 0,
+	    "Unexpected mallctl(\"%s\", ...) failure", cmd);
+
+	return (ret);
+}
+
+static unsigned
+get_nsmall(void)
+{
+
+	return (get_nsizes_impl("arenas.nbins"));
+}
+
+static unsigned
+get_nlarge(void)
+{
+
+	return (get_nsizes_impl("arenas.nlruns"));
+}
+
+static unsigned
+get_nhuge(void)
+{
+
+	return (get_nsizes_impl("arenas.nhchunks"));
+}
+
+static size_t
+get_size_impl(const char *cmd, size_t ind)
+{
+	size_t ret;
+	size_t z;
+	size_t mib[4];
+	size_t miblen = 4;
+
+	z = sizeof(size_t);
+	assert_d_eq(mallctlnametomib(cmd, mib, &miblen),
+	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
+	mib[2] = ind;
+	z = sizeof(size_t);
+	assert_d_eq(mallctlbymib(mib, miblen, &ret, &z, NULL, 0),
+	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
+
+	return (ret);
+}
+
+static size_t
+get_small_size(size_t ind)
+{
+
+	return (get_size_impl("arenas.bin.0.size", ind));
+}
+
+static size_t
+get_large_size(size_t ind)
+{
+
+	return (get_size_impl("arenas.lrun.0.size", ind));
+}
+
+static size_t
+get_huge_size(size_t ind)
+{
+
+	return (get_size_impl("arenas.hchunk.0.size", ind));
+}
+
+TEST_BEGIN(test_extra_small)
+{
+	size_t small0, small1, hugemax;
+	void *p;
+
+	/* Get size classes. */
+	small0 = get_small_size(0);
+	small1 = get_small_size(1);
+	hugemax = get_huge_size(get_nhuge()-1);
+
+	p = mallocx(small0, 0);
+	assert_ptr_not_null(p, "Unexpected mallocx() error");
+
+	assert_zu_eq(xallocx(p, small1, 0, 0), small0,
+	    "Unexpected xallocx() behavior");
+
+	assert_zu_eq(xallocx(p, small1, 0, 0), small0,
+	    "Unexpected xallocx() behavior");
+
+	assert_zu_eq(xallocx(p, small0, small1 - small0, 0), small0,
+	    "Unexpected xallocx() behavior");
+
+	/* Test size+extra overflow. */
+	assert_zu_eq(xallocx(p, small0, hugemax - small0 + 1, 0), small0,
+	    "Unexpected xallocx() behavior");
+	assert_zu_eq(xallocx(p, small0, SIZE_T_MAX - small0, 0), small0,
+	    "Unexpected xallocx() behavior");
+
+	dallocx(p, 0);
+}
+TEST_END
+
+TEST_BEGIN(test_extra_large)
+{
+	size_t smallmax, large0, large1, large2, huge0, hugemax;
+	void *p;
+
+	/* Get size classes. */
+	smallmax = get_small_size(get_nsmall()-1);
+	large0 = get_large_size(0);
+	large1 = get_large_size(1);
+	large2 = get_large_size(2);
+	huge0 = get_huge_size(0);
+	hugemax = get_huge_size(get_nhuge()-1);
+
+	p = mallocx(large2, 0);
+	assert_ptr_not_null(p, "Unexpected mallocx() error");
+
+	assert_zu_eq(xallocx(p, large2, 0, 0), large2,
+	    "Unexpected xallocx() behavior");
+	/* Test size decrease with zero extra. */
+	assert_zu_eq(xallocx(p, large0, 0, 0), large0,
+	    "Unexpected xallocx() behavior");
+	assert_zu_eq(xallocx(p, smallmax, 0, 0), large0,
+	    "Unexpected xallocx() behavior");
+
+	assert_zu_eq(xallocx(p, large2, 0, 0), large2,
+	    "Unexpected xallocx() behavior");
+	/* Test size decrease with non-zero extra. */
+	assert_zu_eq(xallocx(p, large0, large2 - large0, 0), large2,
+	    "Unexpected xallocx() behavior");
+	assert_zu_eq(xallocx(p, large1, large2 - large1, 0), large2,
+	    "Unexpected xallocx() behavior");
+	assert_zu_eq(xallocx(p, large0, large1 - large0, 0), large1,
+	    "Unexpected xallocx() behavior");
+	assert_zu_eq(xallocx(p, smallmax, large0 - smallmax, 0), large0,
+	    "Unexpected xallocx() behavior");
+
+	assert_zu_eq(xallocx(p, large0, 0, 0), large0,
+	    "Unexpected xallocx() behavior");
+	/* Test size increase with zero extra. */
+	assert_zu_eq(xallocx(p, large2, 0, 0), large2,
+	    "Unexpected xallocx() behavior");
+	assert_zu_eq(xallocx(p, huge0, 0, 0), large2,
+	    "Unexpected xallocx() behavior");
+
+	assert_zu_eq(xallocx(p, large0, 0, 0), large0,
+	    "Unexpected xallocx() behavior");
+	/* Test size increase with non-zero extra. */
+	assert_zu_lt(xallocx(p, large0, huge0 - large0, 0), huge0,
+	    "Unexpected xallocx() behavior");
+
+	assert_zu_eq(xallocx(p, large0, 0, 0), large0,
+	    "Unexpected xallocx() behavior");
+	/* Test size increase with non-zero extra. */
+	assert_zu_eq(xallocx(p, large0, large2 - large0, 0), large2,
+	    "Unexpected xallocx() behavior");
+
+	assert_zu_eq(xallocx(p, large2, 0, 0), large2,
+	    "Unexpected xallocx() behavior");
+	/* Test size+extra overflow. */
+	assert_zu_lt(xallocx(p, large2, hugemax - large2 + 1, 0), huge0,
+	    "Unexpected xallocx() behavior");
+
+	dallocx(p, 0);
+}
+TEST_END
+
+TEST_BEGIN(test_extra_huge)
+{
+	size_t largemax, huge0, huge1, huge2, hugemax;
+	void *p;
+
+	/* Get size classes. */
+	largemax = get_large_size(get_nlarge()-1);
+	huge0 = get_huge_size(0);
+	huge1 = get_huge_size(1);
+	huge2 = get_huge_size(2);
+	hugemax = get_huge_size(get_nhuge()-1);
+
+	p = mallocx(huge2, 0);
+	assert_ptr_not_null(p, "Unexpected mallocx() error");
+
+	assert_zu_eq(xallocx(p, huge2, 0, 0), huge2,
+	    "Unexpected xallocx() behavior");
+	/* Test size decrease with zero extra. */
+	assert_zu_eq(xallocx(p, huge0, 0, 0), huge0,
+	    "Unexpected xallocx() behavior");
+	assert_zu_eq(xallocx(p, largemax, 0, 0), huge0,
+	    "Unexpected xallocx() behavior");
+
+	assert_zu_eq(xallocx(p, huge2, 0, 0), huge2,
+	    "Unexpected xallocx() behavior");
+	/* Test size decrease with non-zero extra. */
+	assert_zu_eq(xallocx(p, huge0, huge2 - huge0, 0), huge2,
+	    "Unexpected xallocx() behavior");
+	assert_zu_eq(xallocx(p, huge1, huge2 - huge1, 0), huge2,
+	    "Unexpected xallocx() behavior");
+	assert_zu_eq(xallocx(p, huge0, huge1 - huge0, 0), huge1,
+	    "Unexpected xallocx() behavior");
+	assert_zu_eq(xallocx(p, largemax, huge0 - largemax, 0), huge0,
+	    "Unexpected xallocx() behavior");
+
+	assert_zu_eq(xallocx(p, huge0, 0, 0), huge0,
+	    "Unexpected xallocx() behavior");
+	/* Test size increase with zero extra. */
+	assert_zu_eq(xallocx(p, huge2, 0, 0), huge2,
+	    "Unexpected xallocx() behavior");
+	assert_zu_eq(xallocx(p, hugemax+1, 0, 0), huge2,
+	    "Unexpected xallocx() behavior");
+
+	assert_zu_eq(xallocx(p, huge0, 0, 0), huge0,
+	    "Unexpected xallocx() behavior");
+	/* Test size increase with non-zero extra. */
+	assert_zu_le(xallocx(p, huge0, SIZE_T_MAX - huge0, 0), hugemax,
+	    "Unexpected xallocx() behavior");
+
+	assert_zu_eq(xallocx(p, huge0, 0, 0), huge0,
+	    "Unexpected xallocx() behavior");
+	/* Test size increase with non-zero extra. */
+	assert_zu_eq(xallocx(p, huge0, huge2 - huge0, 0), huge2,
+	    "Unexpected xallocx() behavior");
+
+	assert_zu_eq(xallocx(p, huge2, 0, 0), huge2,
+	    "Unexpected xallocx() behavior");
+	/* Test size+extra overflow. */
+	assert_zu_le(xallocx(p, huge2, hugemax - huge2 + 1, 0), hugemax,
+	    "Unexpected xallocx() behavior");
+
+	dallocx(p, 0);
+}
+TEST_END
+
 int
 main(void)
 {
@@ -55,5 +292,8 @@ main(void)
 	return (test(
 	    test_same_size,
 	    test_extra_no_move,
-	    test_no_move_fail));
+	    test_no_move_fail,
+	    test_extra_small,
+	    test_extra_large,
+	    test_extra_huge));
 }

From 676df88e48ae5ab77b05d78cb511cfa2e57d277f Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 11 Sep 2015 20:50:20 -0700
Subject: [PATCH 0016/2608] Rename arena_maxclass to large_maxclass.

arena_maxclass is no longer an appropriate name, because arenas also
manage huge allocations.
---
 include/jemalloc/internal/arena.h             |  4 ++--
 .../jemalloc/internal/jemalloc_internal.h.in  |  2 +-
 include/jemalloc/internal/private_symbols.txt |  2 +-
 src/arena.c                                   | 20 +++++++++----------
 src/tcache.c                                  |  6 +++---
 test/unit/junk.c                              | 12 +++++------
 test/unit/stats.c                             |  6 +++---
 test/unit/zero.c                              |  4 ++--
 8 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index f77f2574..9712c1c9 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -424,7 +424,7 @@ extern arena_bin_info_t	arena_bin_info[NBINS];
 extern size_t		map_bias; /* Number of arena chunk header pages. */
 extern size_t		map_misc_offset;
 extern size_t		arena_maxrun; /* Max run size for arenas. */
-extern size_t		arena_maxclass; /* Max size class for arenas. */
+extern size_t		large_maxclass; /* Max large size class. */
 extern unsigned		nlclasses; /* Number of large size classes. */
 extern unsigned		nhclasses; /* Number of huge size classes. */
 
@@ -1143,7 +1143,7 @@ arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero,
 			    zero));
 		} else
 			return (arena_malloc_small(arena, size, zero));
-	} else if (likely(size <= arena_maxclass)) {
+	} else if (likely(size <= large_maxclass)) {
 		/*
 		 * Initialize tcache after checking size in order to avoid
 		 * infinite recursion during tcache initialization.
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index a341b253..e2959f18 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -705,7 +705,7 @@ sa2u(size_t size, size_t alignment)
 	}
 
 	/* Try for a large size class. */
-	if (likely(size <= arena_maxclass) && likely(alignment < chunksize)) {
+	if (likely(size <= large_maxclass) && likely(alignment < chunksize)) {
 		/*
 		 * We can't achieve subpage alignment, so round up alignment
 		 * to the minimum that can actually be supported.
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index ed1f6c29..9d21a807 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -58,7 +58,6 @@ arena_mapbits_unallocated_set
 arena_mapbits_unallocated_size_get
 arena_mapbits_unallocated_size_set
 arena_mapbits_unzeroed_get
-arena_maxclass
 arena_maxrun
 arena_maybe_purge
 arena_metadata_allocated_add
@@ -285,6 +284,7 @@ ixalloc
 jemalloc_postfork_child
 jemalloc_postfork_parent
 jemalloc_prefork
+large_maxclass
 lg_floor
 malloc_cprintf
 malloc_mutex_init
diff --git a/src/arena.c b/src/arena.c
index b41f0ce8..a119d268 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -11,7 +11,7 @@ arena_bin_info_t	arena_bin_info[NBINS];
 size_t		map_bias;
 size_t		map_misc_offset;
 size_t		arena_maxrun; /* Max run size for arenas. */
-size_t		arena_maxclass; /* Max size class for arenas. */
+size_t		large_maxclass; /* Max large size class. */
 static size_t	small_maxrun; /* Max run size used for small size classes. */
 static bool	*small_run_tab; /* Valid small run page multiples. */
 unsigned	nlclasses; /* Number of large size classes. */
@@ -2357,7 +2357,7 @@ arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 	    && (usize & PAGE_MASK) == 0))) {
 		/* Small; alignment doesn't require special run placement. */
 		ret = arena_malloc(tsd, arena, usize, zero, tcache);
-	} else if (usize <= arena_maxclass && alignment <= PAGE) {
+	} else if (usize <= large_maxclass && alignment <= PAGE) {
 		/*
 		 * Large; alignment doesn't require special run placement.
 		 * However, the cached pointer may be at a random offset from
@@ -2368,7 +2368,7 @@ arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 		if (config_cache_oblivious)
 			ret = (void *)((uintptr_t)ret & ~PAGE_MASK);
 	} else {
-		if (likely(usize <= arena_maxclass)) {
+		if (likely(usize <= large_maxclass)) {
 			ret = arena_palloc_large(tsd, arena, usize, alignment,
 			    zero);
 		} else if (likely(alignment <= chunksize))
@@ -2800,7 +2800,7 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra,
 		extra = HUGE_MAXCLASS - size;
 	usize_max = s2u(size + extra);
 
-	if (likely(oldsize <= arena_maxclass && usize_min <= arena_maxclass)) {
+	if (likely(oldsize <= large_maxclass && usize_min <= large_maxclass)) {
 		/*
 		 * Avoid moving the allocation if the size class can be left the
 		 * same.
@@ -2852,7 +2852,7 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size,
 	if (usize == 0)
 		return (NULL);
 
-	if (likely(usize <= arena_maxclass)) {
+	if (likely(usize <= large_maxclass)) {
 		size_t copysize;
 
 		/* Try to avoid moving the allocation. */
@@ -3258,17 +3258,17 @@ arena_boot(void)
 
 	arena_maxrun = chunksize - (map_bias << LG_PAGE);
 	assert(arena_maxrun > 0);
-	arena_maxclass = index2size(size2index(chunksize)-1);
-	if (arena_maxclass > arena_maxrun) {
+	large_maxclass = index2size(size2index(chunksize)-1);
+	if (large_maxclass > arena_maxrun) {
 		/*
 		 * For small chunk sizes it's possible for there to be fewer
 		 * non-header pages available than are necessary to serve the
 		 * size classes just below chunksize.
 		 */
-		arena_maxclass = arena_maxrun;
+		large_maxclass = arena_maxrun;
 	}
-	assert(arena_maxclass > 0);
-	nlclasses = size2index(arena_maxclass) - size2index(SMALL_MAXCLASS);
+	assert(large_maxclass > 0);
+	nlclasses = size2index(large_maxclass) - size2index(SMALL_MAXCLASS);
 	nhclasses = NSIZES - nlclasses - NBINS;
 
 	bin_info_init();
diff --git a/src/tcache.c b/src/tcache.c
index f1a30d50..fdafd0c6 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -496,13 +496,13 @@ tcache_boot(void)
 	unsigned i;
 
 	/*
-	 * If necessary, clamp opt_lg_tcache_max, now that arena_maxclass is
+	 * If necessary, clamp opt_lg_tcache_max, now that large_maxclass is
 	 * known.
 	 */
 	if (opt_lg_tcache_max < 0 || (1U << opt_lg_tcache_max) < SMALL_MAXCLASS)
 		tcache_maxclass = SMALL_MAXCLASS;
-	else if ((1U << opt_lg_tcache_max) > arena_maxclass)
-		tcache_maxclass = arena_maxclass;
+	else if ((1U << opt_lg_tcache_max) > large_maxclass)
+		tcache_maxclass = large_maxclass;
 	else
 		tcache_maxclass = (1U << opt_lg_tcache_max);
 
diff --git a/test/unit/junk.c b/test/unit/junk.c
index 01d314b3..b23dd1e9 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -140,7 +140,7 @@ TEST_BEGIN(test_junk_large)
 {
 
 	test_skip_if(!config_fill);
-	test_junk(SMALL_MAXCLASS+1, arena_maxclass);
+	test_junk(SMALL_MAXCLASS+1, large_maxclass);
 }
 TEST_END
 
@@ -148,7 +148,7 @@ TEST_BEGIN(test_junk_huge)
 {
 
 	test_skip_if(!config_fill);
-	test_junk(arena_maxclass+1, chunksize*2);
+	test_junk(large_maxclass+1, chunksize*2);
 }
 TEST_END
 
@@ -172,8 +172,8 @@ arena_ralloc_junk_large_intercept(void *ptr, size_t old_usize, size_t usize)
 {
 
 	arena_ralloc_junk_large_orig(ptr, old_usize, usize);
-	assert_zu_eq(old_usize, arena_maxclass, "Unexpected old_usize");
-	assert_zu_eq(usize, shrink_size(arena_maxclass), "Unexpected usize");
+	assert_zu_eq(old_usize, large_maxclass, "Unexpected old_usize");
+	assert_zu_eq(usize, shrink_size(large_maxclass), "Unexpected usize");
 	most_recently_trimmed = ptr;
 }
 
@@ -181,13 +181,13 @@ TEST_BEGIN(test_junk_large_ralloc_shrink)
 {
 	void *p1, *p2;
 
-	p1 = mallocx(arena_maxclass, 0);
+	p1 = mallocx(large_maxclass, 0);
 	assert_ptr_not_null(p1, "Unexpected mallocx() failure");
 
 	arena_ralloc_junk_large_orig = arena_ralloc_junk_large;
 	arena_ralloc_junk_large = arena_ralloc_junk_large_intercept;
 
-	p2 = rallocx(p1, shrink_size(arena_maxclass), 0);
+	p2 = rallocx(p1, shrink_size(large_maxclass), 0);
 	assert_ptr_eq(p1, p2, "Unexpected move during shrink");
 
 	arena_ralloc_junk_large = arena_ralloc_junk_large_orig;
diff --git a/test/unit/stats.c b/test/unit/stats.c
index 81ef0b72..8e4bc631 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -42,7 +42,7 @@ TEST_BEGIN(test_stats_huge)
 	size_t sz;
 	int expected = config_stats ? 0 : ENOENT;
 
-	p = mallocx(arena_maxclass+1, 0);
+	p = mallocx(large_maxclass+1, 0);
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
 
 	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0,
@@ -88,7 +88,7 @@ TEST_BEGIN(test_stats_arenas_summary)
 
 	little = mallocx(SMALL_MAXCLASS, 0);
 	assert_ptr_not_null(little, "Unexpected mallocx() failure");
-	large = mallocx(arena_maxclass, 0);
+	large = mallocx(large_maxclass, 0);
 	assert_ptr_not_null(large, "Unexpected mallocx() failure");
 	huge = mallocx(chunksize, 0);
 	assert_ptr_not_null(huge, "Unexpected mallocx() failure");
@@ -200,7 +200,7 @@ TEST_BEGIN(test_stats_arenas_large)
 	assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)),
 	    0, "Unexpected mallctl() failure");
 
-	p = mallocx(arena_maxclass, 0);
+	p = mallocx(large_maxclass, 0);
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
 
 	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0,
diff --git a/test/unit/zero.c b/test/unit/zero.c
index 65a8f0c9..93afc2b8 100644
--- a/test/unit/zero.c
+++ b/test/unit/zero.c
@@ -55,7 +55,7 @@ TEST_BEGIN(test_zero_large)
 {
 
 	test_skip_if(!config_fill);
-	test_zero(SMALL_MAXCLASS+1, arena_maxclass);
+	test_zero(SMALL_MAXCLASS+1, large_maxclass);
 }
 TEST_END
 
@@ -63,7 +63,7 @@ TEST_BEGIN(test_zero_huge)
 {
 
 	test_skip_if(!config_fill);
-	test_zero(arena_maxclass+1, chunksize*2);
+	test_zero(large_maxclass+1, chunksize*2);
 }
 TEST_END
 

From 8f57e3f1aeb86021b3d078b825bc8c42b2a9af6f Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 11 Sep 2015 20:59:00 -0700
Subject: [PATCH 0017/2608] Remove check_stress from check target's
 dependencies.

Prior to this change the debug build/test command needed to look like:

  make all tests && make check_unit && make check_integration && \
    make check_integration_prof

This is now simply:

  make check

Rename the check_stress target to stress.
---
 Makefile.in | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index 5084b1a4..9b2d0a1b 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -343,9 +343,9 @@ check_unit_dir:
 	@mkdir -p $(objroot)test/unit
 check_integration_dir:
 	@mkdir -p $(objroot)test/integration
-check_stress_dir:
+stress_dir:
 	@mkdir -p $(objroot)test/stress
-check_dir: check_unit_dir check_integration_dir check_stress_dir
+check_dir: check_unit_dir check_integration_dir
 
 check_unit: tests_unit check_unit_dir
 	$(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%)
@@ -355,7 +355,7 @@ ifeq ($(enable_prof), 1)
 endif
 check_integration: tests_integration check_integration_dir
 	$(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%)
-check_stress: tests_stress check_stress_dir
+stress: tests_stress stress_dir
 	$(SHELL) $(objroot)test/test.sh $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%)
 check: tests check_dir check_integration_prof
 	$(SHELL) $(objroot)test/test.sh $(TESTS:$(srcroot)%.c=$(objroot)%)
@@ -372,7 +372,7 @@ coverage_integration: check_integration
 	$(SHELL) $(srcroot)coverage.sh $(srcroot)test/src integration $(C_TESTLIB_INTEGRATION_OBJS)
 	$(SHELL) $(srcroot)coverage.sh $(srcroot)test/integration integration $(TESTS_INTEGRATION_OBJS)
 
-coverage_stress: check_stress
+coverage_stress: stress
 	$(SHELL) $(srcroot)coverage.sh $(srcroot)src pic $(C_PIC_OBJS)
 	$(SHELL) $(srcroot)coverage.sh $(srcroot)src jet $(C_JET_OBJS)
 	$(SHELL) $(srcroot)coverage.sh $(srcroot)test/src stress $(C_TESTLIB_STRESS_OBJS)

From 4acb6c7ff3411ddc4d180b0cbdba4fd2c3651ef0 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 14 Sep 2015 22:31:32 -0700
Subject: [PATCH 0018/2608] Fix ixallocx_prof() size+extra overflow.

Fix ixallocx_prof() to clamp the extra parameter if size+extra would
overflow HUGE_MAXCLASS.
---
 src/jemalloc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 7cf1487a..6ed3d4e2 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2275,6 +2275,9 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 	prof_tctx_t *old_tctx, *tctx;
 
 	old_tctx = prof_tctx_get(ptr);
+	/* Clamp extra if necessary to avoid (size + extra) overflow. */
+	if (unlikely(size + extra > HUGE_MAXCLASS))
+		extra = HUGE_MAXCLASS - size;
 	/*
 	 * usize isn't knowable before ixalloc() returns when extra is non-zero.
 	 * Therefore, compute its maximum possible value and use that in

From 46ff0491280635e51c9771d56a2d64dc0c4d6bd2 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 14 Sep 2015 22:40:42 -0700
Subject: [PATCH 0019/2608] Optimize irallocx_prof() to optimistically update
 the sampler state.

---
 src/jemalloc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 6ed3d4e2..74fab0fa 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2124,7 +2124,7 @@ irallocx_prof(tsd_t *tsd, void *oldptr, size_t old_usize, size_t size,
 	prof_tctx_t *old_tctx, *tctx;
 
 	old_tctx = prof_tctx_get(oldptr);
-	tctx = prof_alloc_prep(tsd, *usize, false);
+	tctx = prof_alloc_prep(tsd, *usize, true);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
 		p = irallocx_prof_sample(tsd, oldptr, old_usize, size,
 		    alignment, *usize, zero, tcache, arena, tctx);
@@ -2133,7 +2133,7 @@ irallocx_prof(tsd_t *tsd, void *oldptr, size_t old_usize, size_t size,
 		    tcache, arena);
 	}
 	if (unlikely(p == NULL)) {
-		prof_alloc_rollback(tsd, tctx, false);
+		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
 	}
 
@@ -2148,7 +2148,7 @@ irallocx_prof(tsd_t *tsd, void *oldptr, size_t old_usize, size_t size,
 		 */
 		*usize = isalloc(p, config_prof);
 	}
-	prof_realloc(tsd, p, *usize, tctx, false, old_usize, old_tctx);
+	prof_realloc(tsd, p, *usize, tctx, true, old_usize, old_tctx);
 
 	return (p);
 }

From ef363de7010b5e13f4e1c0d7b3a109362bda7aa7 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 14 Sep 2015 22:45:31 -0700
Subject: [PATCH 0020/2608] Fix irealloc_prof() to prof_alloc_rollback() on
 OOM.

---
 ChangeLog      | 1 +
 src/jemalloc.c | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/ChangeLog b/ChangeLog
index 18d72ebd..1625776d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -28,6 +28,7 @@ brevity.  Much more detail can be found in the git revision history:
     segfault.
   - Fix xallocx() bugs related to the 'extra' parameter when specified as
     non-zero.
+  - Fix irealloc_prof() to prof_alloc_rollback() on OOM.
 
 * 4.0.0 (August 17, 2015)
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 74fab0fa..68017025 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1712,8 +1712,10 @@ irealloc_prof(tsd_t *tsd, void *oldptr, size_t old_usize, size_t usize)
 		p = irealloc_prof_sample(tsd, oldptr, old_usize, usize, tctx);
 	else
 		p = iralloc(tsd, oldptr, old_usize, usize, 0, false);
-	if (p == NULL)
+	if (unlikely(p == NULL)) {
+		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
+	}
 	prof_realloc(tsd, p, usize, tctx, true, old_usize, old_tctx);
 
 	return (p);

From cec0d63d8bc46205d38456024176a0ece590253e Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 14 Sep 2015 23:17:25 -0700
Subject: [PATCH 0021/2608] Make one call to prof_active_get_unlocked() per
 allocation event.

Make one call to prof_active_get_unlocked() per allocation event, and
use the result throughout the relevant functions that handle an
allocation event.  Also add a missing check in prof_realloc().  These
fixes protect allocation events against concurrent prof_active changes.
---
 ChangeLog                        |  4 ++++
 include/jemalloc/internal/prof.h | 18 ++++++++++--------
 src/jemalloc.c                   | 29 +++++++++++++++++++----------
 3 files changed, 33 insertions(+), 18 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 1625776d..21edac0f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -29,6 +29,10 @@ brevity.  Much more detail can be found in the git revision history:
   - Fix xallocx() bugs related to the 'extra' parameter when specified as
     non-zero.
   - Fix irealloc_prof() to prof_alloc_rollback() on OOM.
+  - Make one call to prof_active_get_unlocked() per allocation event, and use
+    the result throughout the relevant functions that handle an allocation
+    event.  Also add a missing check in prof_realloc().  These fixes protect
+    allocation events against concurrent prof_active changes.
 
 * 4.0.0 (August 17, 2015)
 
diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h
index eca8aa8a..c66611ca 100644
--- a/include/jemalloc/internal/prof.h
+++ b/include/jemalloc/internal/prof.h
@@ -331,14 +331,16 @@ bool	prof_gdump_get_unlocked(void);
 prof_tdata_t	*prof_tdata_get(tsd_t *tsd, bool create);
 bool	prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit,
     prof_tdata_t **tdata_out);
-prof_tctx_t	*prof_alloc_prep(tsd_t *tsd, size_t usize, bool update);
+prof_tctx_t	*prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active,
+    bool update);
 prof_tctx_t	*prof_tctx_get(const void *ptr);
 void	prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx);
 void	prof_malloc_sample_object(const void *ptr, size_t usize,
     prof_tctx_t *tctx);
 void	prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx);
 void	prof_realloc(tsd_t *tsd, const void *ptr, size_t usize,
-    prof_tctx_t *tctx, bool updated, size_t old_usize, prof_tctx_t *old_tctx);
+    prof_tctx_t *tctx, bool prof_active, bool updated,
+    size_t old_usize, prof_tctx_t *old_tctx);
 void	prof_free(tsd_t *tsd, const void *ptr, size_t usize);
 #endif
 
@@ -443,7 +445,7 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
 }
 
 JEMALLOC_ALWAYS_INLINE prof_tctx_t *
-prof_alloc_prep(tsd_t *tsd, size_t usize, bool update)
+prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update)
 {
 	prof_tctx_t *ret;
 	prof_tdata_t *tdata;
@@ -451,8 +453,8 @@ prof_alloc_prep(tsd_t *tsd, size_t usize, bool update)
 
 	assert(usize == s2u(usize));
 
-	if (!prof_active_get_unlocked() || likely(prof_sample_accum_update(tsd,
-	    usize, update, &tdata)))
+	if (!prof_active || likely(prof_sample_accum_update(tsd, usize, update,
+	    &tdata)))
 		ret = (prof_tctx_t *)(uintptr_t)1U;
 	else {
 		bt_init(&bt, tdata->vec);
@@ -479,17 +481,17 @@ prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx)
 
 JEMALLOC_ALWAYS_INLINE void
 prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
-    bool updated, size_t old_usize, prof_tctx_t *old_tctx)
+    bool prof_active, bool updated, size_t old_usize, prof_tctx_t *old_tctx)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
 
-	if (!updated && ptr != NULL) {
+	if (prof_active && !updated && ptr != NULL) {
 		assert(usize == isalloc(ptr, true));
 		if (prof_sample_accum_update(tsd, usize, true, NULL)) {
 			/*
-			 * Don't sample.  The usize passed to PROF_ALLOC_PREP()
+			 * Don't sample.  The usize passed to prof_alloc_prep()
 			 * was larger than what actually got allocated, so a
 			 * backtrace was captured for this allocation, even
 			 * though its actual usize was insufficient to cross the
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 68017025..5a32baf4 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1378,7 +1378,7 @@ imalloc_prof(tsd_t *tsd, size_t usize)
 	void *p;
 	prof_tctx_t *tctx;
 
-	tctx = prof_alloc_prep(tsd, usize, true);
+	tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U))
 		p = imalloc_prof_sample(tsd, usize, tctx);
 	else
@@ -1468,7 +1468,7 @@ imemalign_prof(tsd_t *tsd, size_t alignment, size_t usize)
 	void *p;
 	prof_tctx_t *tctx;
 
-	tctx = prof_alloc_prep(tsd, usize, true);
+	tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U))
 		p = imemalign_prof_sample(tsd, alignment, usize, tctx);
 	else
@@ -1599,7 +1599,7 @@ icalloc_prof(tsd_t *tsd, size_t usize)
 	void *p;
 	prof_tctx_t *tctx;
 
-	tctx = prof_alloc_prep(tsd, usize, true);
+	tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U))
 		p = icalloc_prof_sample(tsd, usize, tctx);
 	else
@@ -1704,10 +1704,12 @@ JEMALLOC_ALWAYS_INLINE_C void *
 irealloc_prof(tsd_t *tsd, void *oldptr, size_t old_usize, size_t usize)
 {
 	void *p;
+	bool prof_active;
 	prof_tctx_t *old_tctx, *tctx;
 
+	prof_active = prof_active_get_unlocked();
 	old_tctx = prof_tctx_get(oldptr);
-	tctx = prof_alloc_prep(tsd, usize, true);
+	tctx = prof_alloc_prep(tsd, usize, prof_active, true);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U))
 		p = irealloc_prof_sample(tsd, oldptr, old_usize, usize, tctx);
 	else
@@ -1716,7 +1718,8 @@ irealloc_prof(tsd_t *tsd, void *oldptr, size_t old_usize, size_t usize)
 		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
 	}
-	prof_realloc(tsd, p, usize, tctx, true, old_usize, old_tctx);
+	prof_realloc(tsd, p, usize, tctx, prof_active, true, old_usize,
+	    old_tctx);
 
 	return (p);
 }
@@ -2014,7 +2017,7 @@ imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize)
 	if (unlikely(imallocx_flags_decode(tsd, size, flags, usize, &alignment,
 	    &zero, &tcache, &arena)))
 		return (NULL);
-	tctx = prof_alloc_prep(tsd, *usize, true);
+	tctx = prof_alloc_prep(tsd, *usize, prof_active_get_unlocked(), true);
 	if (likely((uintptr_t)tctx == (uintptr_t)1U)) {
 		p = imallocx_maybe_flags(tsd, size, flags, *usize, alignment,
 		    zero, tcache, arena);
@@ -2123,10 +2126,12 @@ irallocx_prof(tsd_t *tsd, void *oldptr, size_t old_usize, size_t size,
     arena_t *arena)
 {
 	void *p;
+	bool prof_active;
 	prof_tctx_t *old_tctx, *tctx;
 
+	prof_active = prof_active_get_unlocked();
 	old_tctx = prof_tctx_get(oldptr);
-	tctx = prof_alloc_prep(tsd, *usize, true);
+	tctx = prof_alloc_prep(tsd, *usize, prof_active, true);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
 		p = irallocx_prof_sample(tsd, oldptr, old_usize, size,
 		    alignment, *usize, zero, tcache, arena, tctx);
@@ -2150,7 +2155,8 @@ irallocx_prof(tsd_t *tsd, void *oldptr, size_t old_usize, size_t size,
 		 */
 		*usize = isalloc(p, config_prof);
 	}
-	prof_realloc(tsd, p, *usize, tctx, true, old_usize, old_tctx);
+	prof_realloc(tsd, p, *usize, tctx, prof_active, true,
+	    old_usize, old_tctx);
 
 	return (p);
 }
@@ -2274,8 +2280,10 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
     size_t extra, size_t alignment, bool zero)
 {
 	size_t max_usize, usize;
+	bool prof_active;
 	prof_tctx_t *old_tctx, *tctx;
 
+	prof_active = prof_active_get_unlocked();
 	old_tctx = prof_tctx_get(ptr);
 	/* Clamp extra if necessary to avoid (size + extra) overflow. */
 	if (unlikely(size + extra > HUGE_MAXCLASS))
@@ -2288,7 +2296,7 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 	 */
 	max_usize = (alignment == 0) ? s2u(size+extra) : sa2u(size+extra,
 	    alignment);
-	tctx = prof_alloc_prep(tsd, max_usize, false);
+	tctx = prof_alloc_prep(tsd, max_usize, prof_active, false);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
 		usize = ixallocx_prof_sample(ptr, old_usize, size, extra,
 		    alignment, zero, max_usize, tctx);
@@ -2300,7 +2308,8 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 		prof_alloc_rollback(tsd, tctx, false);
 		return (usize);
 	}
-	prof_realloc(tsd, ptr, usize, tctx, false, old_usize, old_tctx);
+	prof_realloc(tsd, ptr, usize, tctx, prof_active, false, old_usize,
+	    old_tctx);
 
 	return (usize);
 }

From d9704042ee436cdb329a0777ad350b9fef78891d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 14 Sep 2015 23:28:32 -0700
Subject: [PATCH 0022/2608] s/oldptr/old_ptr/g

---
 src/jemalloc.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 5a32baf4..0c66f4bf 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1682,7 +1682,7 @@ label_return:
 }
 
 static void *
-irealloc_prof_sample(tsd_t *tsd, void *oldptr, size_t old_usize, size_t usize,
+irealloc_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize,
     prof_tctx_t *tctx)
 {
 	void *p;
@@ -1690,30 +1690,30 @@ irealloc_prof_sample(tsd_t *tsd, void *oldptr, size_t old_usize, size_t usize,
 	if (tctx == NULL)
 		return (NULL);
 	if (usize <= SMALL_MAXCLASS) {
-		p = iralloc(tsd, oldptr, old_usize, LARGE_MINCLASS, 0, false);
+		p = iralloc(tsd, old_ptr, old_usize, LARGE_MINCLASS, 0, false);
 		if (p == NULL)
 			return (NULL);
 		arena_prof_promoted(p, usize);
 	} else
-		p = iralloc(tsd, oldptr, old_usize, usize, 0, false);
+		p = iralloc(tsd, old_ptr, old_usize, usize, 0, false);
 
 	return (p);
 }
 
 JEMALLOC_ALWAYS_INLINE_C void *
-irealloc_prof(tsd_t *tsd, void *oldptr, size_t old_usize, size_t usize)
+irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize)
 {
 	void *p;
 	bool prof_active;
 	prof_tctx_t *old_tctx, *tctx;
 
 	prof_active = prof_active_get_unlocked();
-	old_tctx = prof_tctx_get(oldptr);
+	old_tctx = prof_tctx_get(old_ptr);
 	tctx = prof_alloc_prep(tsd, usize, prof_active, true);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U))
-		p = irealloc_prof_sample(tsd, oldptr, old_usize, usize, tctx);
+		p = irealloc_prof_sample(tsd, old_ptr, old_usize, usize, tctx);
 	else
-		p = iralloc(tsd, oldptr, old_usize, usize, 0, false);
+		p = iralloc(tsd, old_ptr, old_usize, usize, 0, false);
 	if (unlikely(p == NULL)) {
 		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
@@ -2098,7 +2098,7 @@ label_oom:
 }
 
 static void *
-irallocx_prof_sample(tsd_t *tsd, void *oldptr, size_t old_usize, size_t size,
+irallocx_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
     size_t alignment, size_t usize, bool zero, tcache_t *tcache, arena_t *arena,
     prof_tctx_t *tctx)
 {
@@ -2107,13 +2107,13 @@ irallocx_prof_sample(tsd_t *tsd, void *oldptr, size_t old_usize, size_t size,
 	if (tctx == NULL)
 		return (NULL);
 	if (usize <= SMALL_MAXCLASS) {
-		p = iralloct(tsd, oldptr, old_usize, LARGE_MINCLASS, alignment,
+		p = iralloct(tsd, old_ptr, old_usize, LARGE_MINCLASS, alignment,
 		    zero, tcache, arena);
 		if (p == NULL)
 			return (NULL);
 		arena_prof_promoted(p, usize);
 	} else {
-		p = iralloct(tsd, oldptr, old_usize, size, alignment, zero,
+		p = iralloct(tsd, old_ptr, old_usize, size, alignment, zero,
 		    tcache, arena);
 	}
 
@@ -2121,7 +2121,7 @@ irallocx_prof_sample(tsd_t *tsd, void *oldptr, size_t old_usize, size_t size,
 }
 
 JEMALLOC_ALWAYS_INLINE_C void *
-irallocx_prof(tsd_t *tsd, void *oldptr, size_t old_usize, size_t size,
+irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
     size_t alignment, size_t *usize, bool zero, tcache_t *tcache,
     arena_t *arena)
 {
@@ -2130,13 +2130,13 @@ irallocx_prof(tsd_t *tsd, void *oldptr, size_t old_usize, size_t size,
 	prof_tctx_t *old_tctx, *tctx;
 
 	prof_active = prof_active_get_unlocked();
-	old_tctx = prof_tctx_get(oldptr);
+	old_tctx = prof_tctx_get(old_ptr);
 	tctx = prof_alloc_prep(tsd, *usize, prof_active, true);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
-		p = irallocx_prof_sample(tsd, oldptr, old_usize, size,
+		p = irallocx_prof_sample(tsd, old_ptr, old_usize, size,
 		    alignment, *usize, zero, tcache, arena, tctx);
 	} else {
-		p = iralloct(tsd, oldptr, old_usize, size, alignment, zero,
+		p = iralloct(tsd, old_ptr, old_usize, size, alignment, zero,
 		    tcache, arena);
 	}
 	if (unlikely(p == NULL)) {
@@ -2144,7 +2144,7 @@ irallocx_prof(tsd_t *tsd, void *oldptr, size_t old_usize, size_t size,
 		return (NULL);
 	}
 
-	if (p == oldptr && alignment != 0) {
+	if (p == old_ptr && alignment != 0) {
 		/*
 		 * The allocation did not move, so it is possible that the size
 		 * class is smaller than would guarantee the requested

From ce9a4e34795a22838b97a5f10cd8090ab21f22fd Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 14 Sep 2015 23:31:02 -0700
Subject: [PATCH 0023/2608] s/max_usize/usize_max/g

---
 src/jemalloc.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 0c66f4bf..a7c7a03d 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2251,7 +2251,7 @@ ixallocx_helper(void *ptr, size_t old_usize, size_t size, size_t extra,
 
 static size_t
 ixallocx_prof_sample(void *ptr, size_t old_usize, size_t size, size_t extra,
-    size_t alignment, size_t max_usize, bool zero, prof_tctx_t *tctx)
+    size_t alignment, size_t usize_max, bool zero, prof_tctx_t *tctx)
 {
 	size_t usize;
 
@@ -2265,7 +2265,7 @@ ixallocx_prof_sample(void *ptr, size_t old_usize, size_t size, size_t extra,
 		    (SMALL_MAXCLASS+1), alignment, zero))
 			return (old_usize);
 		usize = isalloc(ptr, config_prof);
-		if (max_usize < LARGE_MINCLASS)
+		if (usize_max < LARGE_MINCLASS)
 			arena_prof_promoted(ptr, usize);
 	} else {
 		usize = ixallocx_helper(ptr, old_usize, size, extra, alignment,
@@ -2279,7 +2279,7 @@ JEMALLOC_ALWAYS_INLINE_C size_t
 ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
     size_t extra, size_t alignment, bool zero)
 {
-	size_t max_usize, usize;
+	size_t usize_max, usize;
 	bool prof_active;
 	prof_tctx_t *old_tctx, *tctx;
 
@@ -2294,12 +2294,12 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 	 * prof_alloc_prep() to decide whether to capture a backtrace.
 	 * prof_realloc() will use the actual usize to decide whether to sample.
 	 */
-	max_usize = (alignment == 0) ? s2u(size+extra) : sa2u(size+extra,
+	usize_max = (alignment == 0) ? s2u(size+extra) : sa2u(size+extra,
 	    alignment);
-	tctx = prof_alloc_prep(tsd, max_usize, prof_active, false);
+	tctx = prof_alloc_prep(tsd, usize_max, prof_active, false);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
 		usize = ixallocx_prof_sample(ptr, old_usize, size, extra,
-		    alignment, zero, max_usize, tctx);
+		    alignment, zero, usize_max, tctx);
 	} else {
 		usize = ixallocx_helper(ptr, old_usize, size, extra, alignment,
 		    zero);

From 23f6e103c871b4db5b315caf1a8d5c46d9675691 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 14 Sep 2015 23:32:26 -0700
Subject: [PATCH 0024/2608] Fix ixallocx_prof_sample() argument order reversal.

Fix ixallocx_prof() to pass usize_max and zero to ixallocx_prof_sample()
in the correct order.
---
 ChangeLog      | 2 ++
 src/jemalloc.c | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/ChangeLog b/ChangeLog
index 21edac0f..95a4ac52 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -33,6 +33,8 @@ brevity.  Much more detail can be found in the git revision history:
     the result throughout the relevant functions that handle an allocation
     event.  Also add a missing check in prof_realloc().  These fixes protect
     allocation events against concurrent prof_active changes.
+  - Fix ixallocx_prof() to pass usize_max and zero to ixallocx_prof_sample() in
+    the correct order.
 
 * 4.0.0 (August 17, 2015)
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index a7c7a03d..2566f9b6 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2299,7 +2299,7 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 	tctx = prof_alloc_prep(tsd, usize_max, prof_active, false);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
 		usize = ixallocx_prof_sample(ptr, old_usize, size, extra,
-		    alignment, zero, usize_max, tctx);
+		    alignment, usize_max, zero, tctx);
 	} else {
 		usize = ixallocx_helper(ptr, old_usize, size, extra, alignment,
 		    zero);

From ea8d97b8978a0c0423f0ed64332463a25b787c3d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 14 Sep 2015 23:44:37 -0700
Subject: [PATCH 0025/2608] Fix prof_{malloc,free}_sample_object() call order
 in prof_realloc().

Fix prof_realloc() to call prof_free_sampled_object() after calling
prof_malloc_sample_object().  Prior to this fix, if tctx and old_tctx
were the same, the tctx could have been prematurely destroyed.
---
 ChangeLog                        |  3 +++
 include/jemalloc/internal/prof.h | 11 ++++++++---
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 95a4ac52..269d0898 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -35,6 +35,9 @@ brevity.  Much more detail can be found in the git revision history:
     allocation events against concurrent prof_active changes.
   - Fix ixallocx_prof() to pass usize_max and zero to ixallocx_prof_sample() in
     the correct order.
+  - Fix prof_realloc() to call prof_free_sampled_object() after calling
+    prof_malloc_sample_object().  Prior to this fix, if tctx and old_tctx were
+    the same, the tctx could have been prematurely destroyed.
 
 * 4.0.0 (August 17, 2015)
 
diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h
index c66611ca..5eb5926b 100644
--- a/include/jemalloc/internal/prof.h
+++ b/include/jemalloc/internal/prof.h
@@ -483,6 +483,7 @@ JEMALLOC_ALWAYS_INLINE void
 prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
     bool prof_active, bool updated, size_t old_usize, prof_tctx_t *old_tctx)
 {
+	bool sampled, old_sampled;
 
 	cassert(config_prof);
 	assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
@@ -501,12 +502,16 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 		}
 	}
 
-	if (unlikely((uintptr_t)old_tctx > (uintptr_t)1U))
-		prof_free_sampled_object(tsd, old_usize, old_tctx);
-	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
+	sampled = ((uintptr_t)tctx > (uintptr_t)1U);
+	old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U);
+
+	if (unlikely(sampled))
 		prof_malloc_sample_object(ptr, usize, tctx);
 	else
 		prof_tctx_set(ptr, usize, (prof_tctx_t *)(uintptr_t)1U);
+
+	if (unlikely(old_sampled))
+		prof_free_sampled_object(tsd, old_usize, old_tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE void

From 708ed79834fc3b8e5b14dbb0128a0ebfce63a38f Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 14 Sep 2015 23:48:11 -0700
Subject: [PATCH 0026/2608] Resolve an unsupported special case in
 arena_prof_tctx_set().

Add arena_prof_tctx_reset() and use it instead of arena_prof_tctx_set()
when resetting the tctx pointer during reallocation, which happens
whenever an originally sampled reallocated object is not sampled during
reallocation.

This regression was introduced by
594c759f37c301d0245dc2accf4d4aaf9d202819 (Optimize
arena_prof_tctx_set().)
---
 include/jemalloc/internal/arena.h             | 31 +++++++++++++++++++
 include/jemalloc/internal/huge.h              |  1 +
 include/jemalloc/internal/private_symbols.txt |  3 ++
 include/jemalloc/internal/prof.h              | 20 ++++++++++--
 src/huge.c                                    |  7 +++++
 src/jemalloc.c                                |  6 ++--
 6 files changed, 62 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 9712c1c9..12c61797 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -556,6 +556,8 @@ unsigned	arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info,
     const void *ptr);
 prof_tctx_t	*arena_prof_tctx_get(const void *ptr);
 void	arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx);
+void	arena_prof_tctx_reset(const void *ptr, size_t usize,
+    const void *old_ptr, prof_tctx_t *old_tctx);
 void	*arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero,
     tcache_t *tcache);
 arena_t	*arena_aalloc(const void *ptr);
@@ -1126,6 +1128,35 @@ arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx)
 		huge_prof_tctx_set(ptr, tctx);
 }
 
+JEMALLOC_INLINE void
+arena_prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr,
+    prof_tctx_t *old_tctx)
+{
+
+	cassert(config_prof);
+	assert(ptr != NULL);
+
+	if (unlikely(usize > SMALL_MAXCLASS || (ptr == old_ptr &&
+	    (uintptr_t)old_tctx > (uintptr_t)1U))) {
+		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+		if (likely(chunk != ptr)) {
+			size_t pageind;
+			arena_chunk_map_misc_t *elm;
+
+			pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >>
+			    LG_PAGE;
+			assert(arena_mapbits_allocated_get(chunk, pageind) !=
+			    0);
+			assert(arena_mapbits_large_get(chunk, pageind) != 0);
+
+			elm = arena_miscelm_get(chunk, pageind);
+			atomic_write_p(&elm->prof_tctx_pun,
+			    (prof_tctx_t *)(uintptr_t)1U);
+		} else
+			huge_prof_tctx_reset(ptr);
+	}
+}
+
 JEMALLOC_ALWAYS_INLINE void *
 arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero,
     tcache_t *tcache)
diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h
index 328eeed7..ece7af98 100644
--- a/include/jemalloc/internal/huge.h
+++ b/include/jemalloc/internal/huge.h
@@ -26,6 +26,7 @@ arena_t	*huge_aalloc(const void *ptr);
 size_t	huge_salloc(const void *ptr);
 prof_tctx_t	*huge_prof_tctx_get(const void *ptr);
 void	huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx);
+void	huge_prof_tctx_reset(const void *ptr);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 9d21a807..a90021aa 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -80,6 +80,7 @@ arena_prof_accum_impl
 arena_prof_accum_locked
 arena_prof_promoted
 arena_prof_tctx_get
+arena_prof_tctx_reset
 arena_prof_tctx_set
 arena_ptr_small_binind_get
 arena_purge_all
@@ -250,6 +251,7 @@ huge_dalloc_junk
 huge_malloc
 huge_palloc
 huge_prof_tctx_get
+huge_prof_tctx_reset
 huge_prof_tctx_set
 huge_ralloc
 huge_ralloc_no_move
@@ -379,6 +381,7 @@ prof_reset
 prof_sample_accum_update
 prof_sample_threshold_update
 prof_tctx_get
+prof_tctx_reset
 prof_tctx_set
 prof_tdata_cleanup
 prof_tdata_get
diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h
index 5eb5926b..e5198c3e 100644
--- a/include/jemalloc/internal/prof.h
+++ b/include/jemalloc/internal/prof.h
@@ -335,11 +335,13 @@ prof_tctx_t	*prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active,
     bool update);
 prof_tctx_t	*prof_tctx_get(const void *ptr);
 void	prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx);
+void	prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr,
+    prof_tctx_t *tctx);
 void	prof_malloc_sample_object(const void *ptr, size_t usize,
     prof_tctx_t *tctx);
 void	prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx);
 void	prof_realloc(tsd_t *tsd, const void *ptr, size_t usize,
-    prof_tctx_t *tctx, bool prof_active, bool updated,
+    prof_tctx_t *tctx, bool prof_active, bool updated, const void *old_ptr,
     size_t old_usize, prof_tctx_t *old_tctx);
 void	prof_free(tsd_t *tsd, const void *ptr, size_t usize);
 #endif
@@ -414,6 +416,17 @@ prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx)
 	arena_prof_tctx_set(ptr, usize, tctx);
 }
 
+JEMALLOC_ALWAYS_INLINE void
+prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr,
+    prof_tctx_t *old_tctx)
+{
+
+	cassert(config_prof);
+	assert(ptr != NULL);
+
+	arena_prof_tctx_reset(ptr, usize, old_ptr, old_tctx);
+}
+
 JEMALLOC_ALWAYS_INLINE bool
 prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
     prof_tdata_t **tdata_out)
@@ -481,7 +494,8 @@ prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx)
 
 JEMALLOC_ALWAYS_INLINE void
 prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
-    bool prof_active, bool updated, size_t old_usize, prof_tctx_t *old_tctx)
+    bool prof_active, bool updated, const void *old_ptr, size_t old_usize,
+    prof_tctx_t *old_tctx)
 {
 	bool sampled, old_sampled;
 
@@ -508,7 +522,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 	if (unlikely(sampled))
 		prof_malloc_sample_object(ptr, usize, tctx);
 	else
-		prof_tctx_set(ptr, usize, (prof_tctx_t *)(uintptr_t)1U);
+		prof_tctx_reset(ptr, usize, old_ptr, old_tctx);
 
 	if (unlikely(old_sampled))
 		prof_free_sampled_object(tsd, old_usize, old_tctx);
diff --git a/src/huge.c b/src/huge.c
index 187bdaa9..f8778db2 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -424,3 +424,10 @@ huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx)
 	extent_node_prof_tctx_set(node, tctx);
 	malloc_mutex_unlock(&arena->huge_mtx);
 }
+
+void
+huge_prof_tctx_reset(const void *ptr)
+{
+
+	huge_prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U);
+}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 2566f9b6..a29e6139 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1718,7 +1718,7 @@ irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize)
 		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
 	}
-	prof_realloc(tsd, p, usize, tctx, prof_active, true, old_usize,
+	prof_realloc(tsd, p, usize, tctx, prof_active, true, old_ptr, old_usize,
 	    old_tctx);
 
 	return (p);
@@ -2155,7 +2155,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 		 */
 		*usize = isalloc(p, config_prof);
 	}
-	prof_realloc(tsd, p, *usize, tctx, prof_active, true,
+	prof_realloc(tsd, p, *usize, tctx, prof_active, true, old_ptr,
 	    old_usize, old_tctx);
 
 	return (p);
@@ -2308,7 +2308,7 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 		prof_alloc_rollback(tsd, tctx, false);
 		return (usize);
 	}
-	prof_realloc(tsd, ptr, usize, tctx, prof_active, false, old_usize,
+	prof_realloc(tsd, ptr, usize, tctx, prof_active, false, ptr, old_usize,
 	    old_tctx);
 
 	return (usize);

From 0108b1fd0411db444d4074582013879f757fa1df Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 15 Sep 2015 00:08:05 -0700
Subject: [PATCH 0027/2608] Don't run stress tests as part of check target.

This change was intended as part of
8f57e3f1aeb86021b3d078b825bc8c42b2a9af6f (Remove check_stress from check
target's dependencies.).
---
 Makefile.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile.in b/Makefile.in
index 9b2d0a1b..a836e749 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -358,7 +358,7 @@ check_integration: tests_integration check_integration_dir
 stress: tests_stress stress_dir
 	$(SHELL) $(objroot)test/test.sh $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%)
 check: tests check_dir check_integration_prof
-	$(SHELL) $(objroot)test/test.sh $(TESTS:$(srcroot)%.c=$(objroot)%)
+	$(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%)
 
 ifeq ($(enable_code_coverage), 1)
 coverage_unit: check_unit

From 8c485b02a61ab96d4d248e234302edf57577b77d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 15 Sep 2015 00:49:09 -0700
Subject: [PATCH 0028/2608] Fix ixallocx_prof() to check for size greater than
 HUGE_MAXCLASS.

---
 ChangeLog      | 3 +--
 src/jemalloc.c | 6 +++++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 269d0898..e4da6384 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -26,8 +26,7 @@ brevity.  Much more detail can be found in the git revision history:
     with interposed resets (triggered via the "prof.reset" mallctl).  This bug
     could cause data structure corruption that would most likely result in a
     segfault.
-  - Fix xallocx() bugs related to the 'extra' parameter when specified as
-    non-zero.
+  - Fix xallocx() bugs related to size+extra exceeding HUGE_MAXCLASS.
   - Fix irealloc_prof() to prof_alloc_rollback() on OOM.
   - Make one call to prof_active_get_unlocked() per allocation event, and use
     the result throughout the relevant functions that handle an allocation
diff --git a/src/jemalloc.c b/src/jemalloc.c
index a29e6139..f403306b 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2286,8 +2286,12 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 	prof_active = prof_active_get_unlocked();
 	old_tctx = prof_tctx_get(ptr);
 	/* Clamp extra if necessary to avoid (size + extra) overflow. */
-	if (unlikely(size + extra > HUGE_MAXCLASS))
+	if (unlikely(size + extra > HUGE_MAXCLASS)) {
+		/* Check for size overflow. */
+		if (size > HUGE_MAXCLASS)
+			return (old_usize);
 		extra = HUGE_MAXCLASS - size;
+	}
 	/*
 	 * usize isn't knowable before ixalloc() returns when extra is non-zero.
 	 * Therefore, compute its maximum possible value and use that in

From 6d91929e52ba87c222f4f92bd1c9ddc9e7c2c083 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 15 Sep 2015 10:42:36 -0700
Subject: [PATCH 0029/2608] Address portability issues on Solaris.

Don't assume Bourne shell is in /bin/sh when running size_classes.sh .

Consider __sparcv9 a synonym for __sparc64__ when defining LG_QUANTUM.

This resolves #275.
---
 configure.ac                                     | 3 ++-
 include/jemalloc/internal/jemalloc_internal.h.in | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index 5e77b680..2308b990 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1621,8 +1621,9 @@ AC_CONFIG_COMMANDS([include/jemalloc/internal/public_unnamespace.h], [
 ])
 AC_CONFIG_COMMANDS([include/jemalloc/internal/size_classes.h], [
   mkdir -p "${objroot}include/jemalloc/internal"
-  "${srcdir}/include/jemalloc/internal/size_classes.sh" "${LG_QUANTA}" ${LG_TINY_MIN} "${LG_PAGE_SIZES}" ${LG_SIZE_CLASS_GROUP} > "${objroot}include/jemalloc/internal/size_classes.h"
+  "${SHELL}" "${srcdir}/include/jemalloc/internal/size_classes.sh" "${LG_QUANTA}" ${LG_TINY_MIN} "${LG_PAGE_SIZES}" ${LG_SIZE_CLASS_GROUP} > "${objroot}include/jemalloc/internal/size_classes.h"
 ], [
+  SHELL="${SHELL}"
   srcdir="${srcdir}"
   objroot="${objroot}"
   LG_QUANTA="${LG_QUANTA}"
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index e2959f18..8536a3ed 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -232,7 +232,7 @@ typedef unsigned szind_t;
 #  ifdef __alpha__
 #    define LG_QUANTUM		4
 #  endif
-#  ifdef __sparc64__
+#  if (defined(__sparc64__) || defined(__sparcv9))
 #    define LG_QUANTUM		4
 #  endif
 #  if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64))

From 78ae1ac486ffd7953536786c9a5f9dc2bda78858 Mon Sep 17 00:00:00 2001
From: Dmitry-Me <wipedout@yandex.ru>
Date: Tue, 8 Sep 2015 15:09:20 +0300
Subject: [PATCH 0030/2608] Reduce variable scope.

This resolves #274.
---
 src/chunk_dss.c  | 8 +++-----
 src/chunk_mmap.c | 6 ++++--
 src/prof.c       | 4 ++--
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/chunk_dss.c b/src/chunk_dss.c
index de0546d0..61fc9169 100644
--- a/src/chunk_dss.c
+++ b/src/chunk_dss.c
@@ -69,8 +69,6 @@ void *
 chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size, size_t alignment,
     bool *zero, bool *commit)
 {
-	void *ret;
-
 	cassert(have_dss);
 	assert(size > 0 && (size & chunksize_mask) == 0);
 	assert(alignment > 0 && (alignment & chunksize_mask) == 0);
@@ -84,9 +82,6 @@ chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size, size_t alignment,
 
 	malloc_mutex_lock(&dss_mtx);
 	if (dss_prev != (void *)-1) {
-		size_t gap_size, cpad_size;
-		void *cpad, *dss_next;
-		intptr_t incr;
 
 		/*
 		 * The loop is necessary to recover from races with other
@@ -94,6 +89,9 @@ chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size, size_t alignment,
 		 * malloc.
 		 */
 		do {
+			void *ret, *cpad, *dss_next;
+			size_t gap_size, cpad_size;
+			intptr_t incr;
 			/* Avoid an unnecessary system call. */
 			if (new_addr != NULL && dss_max != new_addr)
 				break;
diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c
index 36eb0754..b9ba7419 100644
--- a/src/chunk_mmap.c
+++ b/src/chunk_mmap.c
@@ -6,14 +6,16 @@
 static void *
 chunk_alloc_mmap_slow(size_t size, size_t alignment, bool *zero, bool *commit)
 {
-	void *ret, *pages;
-	size_t alloc_size, leadsize;
+	void *ret;
+	size_t alloc_size;
 
 	alloc_size = size + alignment - PAGE;
 	/* Beware size_t wrap-around. */
 	if (alloc_size < size)
 		return (NULL);
 	do {
+		void *pages;
+		size_t leadsize;
 		pages = pages_map(NULL, alloc_size);
 		if (pages == NULL)
 			return (NULL);
diff --git a/src/prof.c b/src/prof.c
index 7427bf54..d68478fd 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -1577,7 +1577,6 @@ prof_idump(void)
 {
 	tsd_t *tsd;
 	prof_tdata_t *tdata;
-	char filename[PATH_MAX + 1];
 
 	cassert(config_prof);
 
@@ -1593,6 +1592,7 @@ prof_idump(void)
 	}
 
 	if (opt_prof_prefix[0] != '\0') {
+		char filename[PATH_MAX + 1];
 		malloc_mutex_lock(&prof_dump_seq_mtx);
 		prof_dump_filename(filename, 'i', prof_dump_iseq);
 		prof_dump_iseq++;
@@ -1631,7 +1631,6 @@ prof_gdump(void)
 {
 	tsd_t *tsd;
 	prof_tdata_t *tdata;
-	char filename[DUMP_FILENAME_BUFSIZE];
 
 	cassert(config_prof);
 
@@ -1647,6 +1646,7 @@ prof_gdump(void)
 	}
 
 	if (opt_prof_prefix[0] != '\0') {
+		char filename[DUMP_FILENAME_BUFSIZE];
 		malloc_mutex_lock(&prof_dump_seq_mtx);
 		prof_dump_filename(filename, 'u', prof_dump_useq);
 		prof_dump_useq++;

From aca490f004bffa619319aec718fc74e9855b45ae Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 15 Sep 2015 14:39:29 -0700
Subject: [PATCH 0031/2608] Add more xallocx() overflow tests.

---
 test/integration/xallocx.c | 64 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)

diff --git a/test/integration/xallocx.c b/test/integration/xallocx.c
index 8f0de630..76c91e94 100644
--- a/test/integration/xallocx.c
+++ b/test/integration/xallocx.c
@@ -122,6 +122,68 @@ get_huge_size(size_t ind)
 	return (get_size_impl("arenas.hchunk.0.size", ind));
 }
 
+TEST_BEGIN(test_size)
+{
+	size_t small0, hugemax;
+	void *p;
+
+	/* Get size classes. */
+	small0 = get_small_size(0);
+	hugemax = get_huge_size(get_nhuge()-1);
+
+	p = mallocx(small0, 0);
+	assert_ptr_not_null(p, "Unexpected mallocx() error");
+
+	/* Test smallest supported size. */
+	assert_zu_eq(xallocx(p, 1, 0, 0), small0,
+	    "Unexpected xallocx() behavior");
+
+	/* Test largest supported size. */
+	assert_zu_le(xallocx(p, hugemax, 0, 0), hugemax,
+	    "Unexpected xallocx() behavior");
+
+	/* Test size overflow. */
+	assert_zu_le(xallocx(p, hugemax+1, 0, 0), hugemax,
+	    "Unexpected xallocx() behavior");
+	assert_zu_le(xallocx(p, SIZE_T_MAX, 0, 0), hugemax,
+	    "Unexpected xallocx() behavior");
+
+	dallocx(p, 0);
+}
+TEST_END
+
+TEST_BEGIN(test_size_extra_overflow)
+{
+	size_t small0, hugemax;
+	void *p;
+
+	/* Get size classes. */
+	small0 = get_small_size(0);
+	hugemax = get_huge_size(get_nhuge()-1);
+
+	p = mallocx(small0, 0);
+	assert_ptr_not_null(p, "Unexpected mallocx() error");
+
+	/* Test overflows that can be resolved by clamping extra. */
+	assert_zu_le(xallocx(p, hugemax-1, 2, 0), hugemax,
+	    "Unexpected xallocx() behavior");
+	assert_zu_le(xallocx(p, hugemax, 1, 0), hugemax,
+	    "Unexpected xallocx() behavior");
+
+	/* Test overflow such that hugemax-size underflows. */
+	assert_zu_le(xallocx(p, hugemax+1, 2, 0), hugemax,
+	    "Unexpected xallocx() behavior");
+	assert_zu_le(xallocx(p, hugemax+2, 3, 0), hugemax,
+	    "Unexpected xallocx() behavior");
+	assert_zu_le(xallocx(p, SIZE_T_MAX-2, 2, 0), hugemax,
+	    "Unexpected xallocx() behavior");
+	assert_zu_le(xallocx(p, SIZE_T_MAX-1, 1, 0), hugemax,
+	    "Unexpected xallocx() behavior");
+
+	dallocx(p, 0);
+}
+TEST_END
+
 TEST_BEGIN(test_extra_small)
 {
 	size_t small0, small1, hugemax;
@@ -293,6 +355,8 @@ main(void)
 	    test_same_size,
 	    test_extra_no_move,
 	    test_no_move_fail,
+	    test_size,
+	    test_size_extra_overflow,
 	    test_extra_small,
 	    test_extra_large,
 	    test_extra_huge));

From 9a505b768cd50bffbfaa3a993df9117e7454134e Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 15 Sep 2015 14:39:58 -0700
Subject: [PATCH 0032/2608] Centralize xallocx() size[+extra] overflow checks.

---
 src/arena.c    |  7 -------
 src/jemalloc.c | 18 +++++++++++-------
 2 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index a119d268..2e888eaa 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2791,15 +2791,8 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra,
 {
 	size_t usize_min, usize_max;
 
-	/* Check for size overflow. */
-	if (unlikely(size > HUGE_MAXCLASS))
-		return (true);
 	usize_min = s2u(size);
-	/* Clamp extra if necessary to avoid (size + extra) overflow. */
-	if (unlikely(size + extra > HUGE_MAXCLASS))
-		extra = HUGE_MAXCLASS - size;
 	usize_max = s2u(size + extra);
-
 	if (likely(oldsize <= large_maxclass && usize_min <= large_maxclass)) {
 		/*
 		 * Avoid moving the allocation if the size class can be left the
diff --git a/src/jemalloc.c b/src/jemalloc.c
index f403306b..ab7cf024 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2285,13 +2285,6 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 
 	prof_active = prof_active_get_unlocked();
 	old_tctx = prof_tctx_get(ptr);
-	/* Clamp extra if necessary to avoid (size + extra) overflow. */
-	if (unlikely(size + extra > HUGE_MAXCLASS)) {
-		/* Check for size overflow. */
-		if (size > HUGE_MAXCLASS)
-			return (old_usize);
-		extra = HUGE_MAXCLASS - size;
-	}
 	/*
 	 * usize isn't knowable before ixalloc() returns when extra is non-zero.
 	 * Therefore, compute its maximum possible value and use that in
@@ -2335,6 +2328,17 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
 	tsd = tsd_fetch();
 
 	old_usize = isalloc(ptr, config_prof);
+
+	/* Clamp extra if necessary to avoid (size + extra) overflow. */
+	if (unlikely(size + extra > HUGE_MAXCLASS)) {
+		/* Check for size overflow. */
+		if (unlikely(size > HUGE_MAXCLASS)) {
+			usize = old_usize;
+			goto label_not_resized;
+		}
+		extra = HUGE_MAXCLASS - size;
+	}
+
 	if (config_valgrind && unlikely(in_valgrind))
 		old_rzsize = u2rz(old_usize);
 

From 345c1b0eeeac333c3da8baa46e9b96c3b2aca443 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 15 Sep 2015 14:59:56 -0700
Subject: [PATCH 0033/2608] Link test to librt if it contains clock_gettime(2).

This resolves #257.
---
 Makefile.in  | 7 ++++---
 configure.ac | 9 +++++++++
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index a836e749..01285afb 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -28,6 +28,7 @@ CFLAGS := @CFLAGS@
 LDFLAGS := @LDFLAGS@
 EXTRA_LDFLAGS := @EXTRA_LDFLAGS@
 LIBS := @LIBS@
+TESTLIBS := @TESTLIBS@
 RPATH_EXTRA := @RPATH_EXTRA@
 SO := @so@
 IMPORTLIB := @importlib@
@@ -265,15 +266,15 @@ $(STATIC_LIBS):
 
 $(objroot)test/unit/%$(EXE): $(objroot)test/unit/%.$(O) $(TESTS_UNIT_LINK_OBJS) $(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS)
 	@mkdir -p $(@D)
-	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS)
+	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(TESTLIBS) $(EXTRA_LDFLAGS)
 
 $(objroot)test/integration/%$(EXE): $(objroot)test/integration/%.$(O) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
-	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(filter -lpthread,$(LIBS))) -lm $(EXTRA_LDFLAGS)
+	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(filter -lpthread,$(LIBS))) -lm $(TESTLIBS) $(EXTRA_LDFLAGS)
 
 $(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_STRESS_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
-	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS)
+	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(TESTLIBS) $(EXTRA_LDFLAGS)
 
 build_lib_shared: $(DSOS)
 build_lib_static: $(STATIC_LIBS)
diff --git a/configure.ac b/configure.ac
index 2308b990..7a1290e0 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1190,6 +1190,14 @@ fi
 
 CPPFLAGS="$CPPFLAGS -D_REENTRANT"
 
+dnl Check whether clock_gettime(2) is in libc or librt.  This function is only
+dnl used in test code, so save the result to TESTLIBS to avoid poluting LIBS.
+SAVED_LIBS="${LIBS}"
+LIBS=
+AC_SEARCH_LIBS([clock_gettime], [rt], [TESTLIBS="${LIBS}"])
+AC_SUBST([TESTLIBS])
+LIBS="${SAVED_LIBS}"
+
 dnl Check if the GNU-specific secure_getenv function exists.
 AC_CHECK_FUNC([secure_getenv],
               [have_secure_getenv="1"],
@@ -1694,6 +1702,7 @@ AC_MSG_RESULT([CPPFLAGS           : ${CPPFLAGS}])
 AC_MSG_RESULT([LDFLAGS            : ${LDFLAGS}])
 AC_MSG_RESULT([EXTRA_LDFLAGS      : ${EXTRA_LDFLAGS}])
 AC_MSG_RESULT([LIBS               : ${LIBS}])
+AC_MSG_RESULT([TESTLIBS           : ${TESTLIBS}])
 AC_MSG_RESULT([RPATH_EXTRA        : ${RPATH_EXTRA}])
 AC_MSG_RESULT([])
 AC_MSG_RESULT([XSLTPROC           : ${XSLTPROC}])

From 65b940a3c5b7467d79f757e98aa5ba1810129c3e Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 15 Sep 2015 15:48:42 -0700
Subject: [PATCH 0034/2608] Loosen expected xallocx() results.

Systems that do not support chunk split/merge cannot shrink/grow huge
allocations in place.
---
 test/integration/xallocx.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/test/integration/xallocx.c b/test/integration/xallocx.c
index 76c91e94..058e27c5 100644
--- a/test/integration/xallocx.c
+++ b/test/integration/xallocx.c
@@ -300,9 +300,9 @@ TEST_BEGIN(test_extra_huge)
 	assert_zu_eq(xallocx(p, huge2, 0, 0), huge2,
 	    "Unexpected xallocx() behavior");
 	/* Test size decrease with zero extra. */
-	assert_zu_eq(xallocx(p, huge0, 0, 0), huge0,
+	assert_zu_ge(xallocx(p, huge0, 0, 0), huge0,
 	    "Unexpected xallocx() behavior");
-	assert_zu_eq(xallocx(p, largemax, 0, 0), huge0,
+	assert_zu_ge(xallocx(p, largemax, 0, 0), huge0,
 	    "Unexpected xallocx() behavior");
 
 	assert_zu_eq(xallocx(p, huge2, 0, 0), huge2,
@@ -314,27 +314,27 @@ TEST_BEGIN(test_extra_huge)
 	    "Unexpected xallocx() behavior");
 	assert_zu_eq(xallocx(p, huge0, huge1 - huge0, 0), huge1,
 	    "Unexpected xallocx() behavior");
-	assert_zu_eq(xallocx(p, largemax, huge0 - largemax, 0), huge0,
+	assert_zu_ge(xallocx(p, largemax, huge0 - largemax, 0), huge0,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_eq(xallocx(p, huge0, 0, 0), huge0,
+	assert_zu_ge(xallocx(p, huge0, 0, 0), huge0,
 	    "Unexpected xallocx() behavior");
 	/* Test size increase with zero extra. */
-	assert_zu_eq(xallocx(p, huge2, 0, 0), huge2,
+	assert_zu_le(xallocx(p, huge2, 0, 0), huge2,
 	    "Unexpected xallocx() behavior");
-	assert_zu_eq(xallocx(p, hugemax+1, 0, 0), huge2,
+	assert_zu_le(xallocx(p, hugemax+1, 0, 0), huge2,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_eq(xallocx(p, huge0, 0, 0), huge0,
+	assert_zu_ge(xallocx(p, huge0, 0, 0), huge0,
 	    "Unexpected xallocx() behavior");
 	/* Test size increase with non-zero extra. */
 	assert_zu_le(xallocx(p, huge0, SIZE_T_MAX - huge0, 0), hugemax,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_eq(xallocx(p, huge0, 0, 0), huge0,
+	assert_zu_ge(xallocx(p, huge0, 0, 0), huge0,
 	    "Unexpected xallocx() behavior");
 	/* Test size increase with non-zero extra. */
-	assert_zu_eq(xallocx(p, huge0, huge2 - huge0, 0), huge2,
+	assert_zu_le(xallocx(p, huge0, huge2 - huge0, 0), huge2,
 	    "Unexpected xallocx() behavior");
 
 	assert_zu_eq(xallocx(p, huge2, 0, 0), huge2,

From 1d7540c9d71ee8a85ea97c9459698e090ee04719 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 15 Sep 2015 15:26:23 -0700
Subject: [PATCH 0035/2608] Update ChangeLog for 4.0.1.

---
 ChangeLog | 71 ++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 47 insertions(+), 24 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index e4da6384..4498683e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,39 +4,62 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
-* 4.0.1 (XXX)
+* 4.0.1 (September 15, 2015)
+
+  This is a bugfix release that is somewhat high risk due to the amount of
+  refactoring required to address deep xallocx() problems.  As a side effect of
+  these fixes, xallocx() now tries harder to partially fulfill requests for
+  optional extra space.  Note that a couple of minor heap profiling
+  optimizations are included, but these are better thought of as performance
+  fixes that were integral to disovering most of the other bugs.
+
+  Optimizations:
+  - Avoid a chunk metadata read in arena_prof_tctx_set(), since it is in the
+    fast path when heap profiling is enabled.  Additionally, split a special
+    case out into arena_prof_tctx_reset(), which also avoids chunk metadata
+    reads.
+  - Optimize irallocx_prof() to optimistically update the sampler state.  The
+    prior implementation appears to have been a holdover from when
+    rallocx()/xallocx() functionality was combined as rallocm().
 
   Bug fixes:
+  - Fix TLS configuration such that it is enabled by default for platforms on
+    which it works correctly.
   - Fix arenas_cache_cleanup() and arena_get_hard() to handle
     allocation/deallocation within the application's thread-specific data
     cleanup functions even after arenas_cache is torn down.
-  - Don't bitshift by negative amounts when encoding/decoding run sizes in chunk
-    header maps.  This affected systems with page sizes greater than 8 KiB.
-  - Rename index_t to szind_t to avoid an existing type on Solaris.
-  - Add JEMALLOC_CXX_THROW to the memalign() function prototype, in order to
-    match glibc and avoid compilation errors when including both
-    jemalloc/jemalloc.h and malloc.h in C++ code.
+  - Fix xallocx() bugs related to size+extra exceeding HUGE_MAXCLASS.
   - Fix chunk purge hook calls for in-place huge shrinking reallocation to
     specify the old chunk size rather than the new chunk size.  This bug caused
     no correctness issues for the default chunk purge function, but was
     visible to custom functions set via the "arena.<i>.chunk_hooks" mallctl.
-  - Fix TLS configuration such that it is enabled by default for platforms on
-    which it works correctly.
-  - Fix heap profiling to distinguish among otherwise identical sample sites
-    with interposed resets (triggered via the "prof.reset" mallctl).  This bug
-    could cause data structure corruption that would most likely result in a
-    segfault.
-  - Fix xallocx() bugs related to size+extra exceeding HUGE_MAXCLASS.
-  - Fix irealloc_prof() to prof_alloc_rollback() on OOM.
-  - Make one call to prof_active_get_unlocked() per allocation event, and use
-    the result throughout the relevant functions that handle an allocation
-    event.  Also add a missing check in prof_realloc().  These fixes protect
-    allocation events against concurrent prof_active changes.
-  - Fix ixallocx_prof() to pass usize_max and zero to ixallocx_prof_sample() in
-    the correct order.
-  - Fix prof_realloc() to call prof_free_sampled_object() after calling
-    prof_malloc_sample_object().  Prior to this fix, if tctx and old_tctx were
-    the same, the tctx could have been prematurely destroyed.
+  - Fix heap profiling bugs:
+    + Fix heap profiling to distinguish among otherwise identical sample sites
+      with interposed resets (triggered via the "prof.reset" mallctl).  This bug
+      could cause data structure corruption that would most likely result in a
+      segfault.
+    + Fix irealloc_prof() to prof_alloc_rollback() on OOM.
+    + Make one call to prof_active_get_unlocked() per allocation event, and use
+      the result throughout the relevant functions that handle an allocation
+      event.  Also add a missing check in prof_realloc().  These fixes protect
+      allocation events against concurrent prof_active changes.
+    + Fix ixallocx_prof() to pass usize_max and zero to ixallocx_prof_sample()
+      in the correct order.
+    + Fix prof_realloc() to call prof_free_sampled_object() after calling
+      prof_malloc_sample_object().  Prior to this fix, if tctx and old_tctx were
+      the same, the tctx could have been prematurely destroyed.
+  - Fix portability bugs:
+    + Don't bitshift by negative amounts when encoding/decoding run sizes in
+      chunk header maps.  This affected systems with page sizes greater than 8
+      KiB.
+    + Rename index_t to szind_t to avoid an existing type on Solaris.
+    + Add JEMALLOC_CXX_THROW to the memalign() function prototype, in order to
+      match glibc and avoid compilation errors when including both
+      jemalloc/jemalloc.h and malloc.h in C++ code.
+    + Don't assume that /bin/sh is appropriate when running size_classes.sh
+      during configuration.
+    + Consider __sparcv9 a synonym for __sparc64__ when defining LG_QUANTUM.
+    + Link tests to librt if it contains clock_gettime(2).
 
 * 4.0.0 (August 17, 2015)
 

From 38e2c8fa9c4a2a0613609b8b88a355670a2f9770 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 17 Sep 2015 10:05:56 -0700
Subject: [PATCH 0036/2608] Fix ixallocx_prof_sample().

Fix ixallocx_prof_sample() to never modify nor create sampled small
allocations.  xallocx() is in general incapable of moving small
allocations, so this fix removes buggy code without loss of generality.
---
 ChangeLog      |  7 +++++++
 src/jemalloc.c | 21 ++++-----------------
 2 files changed, 11 insertions(+), 17 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 4498683e..619c522b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,13 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
+* 4.0.2 (XXX)
+
+  Bug fixes:
+  - Fix ixallocx_prof_sample() to never modify nor create sampled small
+    allocations.  xallocx() is in general incapable of moving small allocations,
+    so this fix removes buggy code without loss of generality.
+
 * 4.0.1 (September 15, 2015)
 
   This is a bugfix release that is somewhat high risk due to the amount of
diff --git a/src/jemalloc.c b/src/jemalloc.c
index ab7cf024..ad904eb5 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2251,26 +2251,13 @@ ixallocx_helper(void *ptr, size_t old_usize, size_t size, size_t extra,
 
 static size_t
 ixallocx_prof_sample(void *ptr, size_t old_usize, size_t size, size_t extra,
-    size_t alignment, size_t usize_max, bool zero, prof_tctx_t *tctx)
+    size_t alignment, bool zero, prof_tctx_t *tctx)
 {
 	size_t usize;
 
 	if (tctx == NULL)
 		return (old_usize);
-	/* Use minimum usize to determine whether promotion may happen. */
-	if (((alignment == 0) ? s2u(size) : sa2u(size, alignment)) <=
-	    SMALL_MAXCLASS) {
-		if (ixalloc(ptr, old_usize, SMALL_MAXCLASS+1,
-		    (SMALL_MAXCLASS+1 >= size+extra) ? 0 : size+extra -
-		    (SMALL_MAXCLASS+1), alignment, zero))
-			return (old_usize);
-		usize = isalloc(ptr, config_prof);
-		if (usize_max < LARGE_MINCLASS)
-			arena_prof_promoted(ptr, usize);
-	} else {
-		usize = ixallocx_helper(ptr, old_usize, size, extra, alignment,
-		    zero);
-	}
+	usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, zero);
 
 	return (usize);
 }
@@ -2296,12 +2283,12 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 	tctx = prof_alloc_prep(tsd, usize_max, prof_active, false);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
 		usize = ixallocx_prof_sample(ptr, old_usize, size, extra,
-		    alignment, usize_max, zero, tctx);
+		    alignment, zero, tctx);
 	} else {
 		usize = ixallocx_helper(ptr, old_usize, size, extra, alignment,
 		    zero);
 	}
-	if (unlikely(usize == old_usize)) {
+	if (usize == old_usize) {
 		prof_alloc_rollback(tsd, tctx, false);
 		return (usize);
 	}

From 4be9c79f881066f4d3424d45d7845c03e1032d3c Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 17 Sep 2015 10:17:55 -0700
Subject: [PATCH 0037/2608] Fix irallocx_prof_sample().

Fix irallocx_prof_sample() to always allocate large regions, even when
alignment is non-zero.
---
 ChangeLog      |  2 ++
 src/jemalloc.c | 10 +++++-----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 619c522b..b5e10c49 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -10,6 +10,8 @@ brevity.  Much more detail can be found in the git revision history:
   - Fix ixallocx_prof_sample() to never modify nor create sampled small
     allocations.  xallocx() is in general incapable of moving small allocations,
     so this fix removes buggy code without loss of generality.
+  - Fix irallocx_prof_sample() to always allocate large regions, even when
+    alignment is non-zero.
 
 * 4.0.1 (September 15, 2015)
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index ad904eb5..b58252fd 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2098,8 +2098,8 @@ label_oom:
 }
 
 static void *
-irallocx_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
-    size_t alignment, size_t usize, bool zero, tcache_t *tcache, arena_t *arena,
+irallocx_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize,
+    size_t usize, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena,
     prof_tctx_t *tctx)
 {
 	void *p;
@@ -2113,7 +2113,7 @@ irallocx_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 			return (NULL);
 		arena_prof_promoted(p, usize);
 	} else {
-		p = iralloct(tsd, old_ptr, old_usize, size, alignment, zero,
+		p = iralloct(tsd, old_ptr, old_usize, usize, alignment, zero,
 		    tcache, arena);
 	}
 
@@ -2133,8 +2133,8 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 	old_tctx = prof_tctx_get(old_ptr);
 	tctx = prof_alloc_prep(tsd, *usize, prof_active, true);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
-		p = irallocx_prof_sample(tsd, old_ptr, old_usize, size,
-		    alignment, *usize, zero, tcache, arena, tctx);
+		p = irallocx_prof_sample(tsd, old_ptr, old_usize, *usize,
+		    alignment, zero, tcache, arena, tctx);
 	} else {
 		p = iralloct(tsd, old_ptr, old_usize, size, alignment, zero,
 		    tcache, arena);

From 3263be6efb5232963c0820da65e235d1693e404d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 17 Sep 2015 10:19:28 -0700
Subject: [PATCH 0038/2608] Simplify imallocx_prof_sample().

Simplify imallocx_prof_sample() to always operate on usize rather than
sometimes using size.  This avoids redundant usize computations and
more closely fits the style adopted by i[rx]allocx_prof_sample() to fix
sampling bugs.
---
 src/jemalloc.c | 39 +++++++++++++--------------------------
 1 file changed, 13 insertions(+), 26 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index b58252fd..49c5f2ac 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1965,41 +1965,29 @@ imallocx_flags(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, arena_t *arena)
 {
 
-	if (alignment != 0)
+	if (unlikely(alignment != 0))
 		return (ipalloct(tsd, usize, alignment, zero, tcache, arena));
-	if (zero)
+	if (unlikely(zero))
 		return (icalloct(tsd, usize, tcache, arena));
 	return (imalloct(tsd, usize, tcache, arena));
 }
 
-JEMALLOC_ALWAYS_INLINE_C void *
-imallocx_maybe_flags(tsd_t *tsd, size_t size, int flags, size_t usize,
-    size_t alignment, bool zero, tcache_t *tcache, arena_t *arena)
-{
-
-	if (likely(flags == 0))
-		return (imalloc(tsd, size));
-	return (imallocx_flags(tsd, usize, alignment, zero, tcache, arena));
-}
-
 static void *
-imallocx_prof_sample(tsd_t *tsd, size_t size, int flags, size_t usize,
-    size_t alignment, bool zero, tcache_t *tcache, arena_t *arena)
+imallocx_prof_sample(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
+    tcache_t *tcache, arena_t *arena)
 {
 	void *p;
 
 	if (usize <= SMALL_MAXCLASS) {
 		assert(((alignment == 0) ? s2u(LARGE_MINCLASS) :
 		    sa2u(LARGE_MINCLASS, alignment)) == LARGE_MINCLASS);
-		p = imallocx_maybe_flags(tsd, LARGE_MINCLASS, flags,
-		    LARGE_MINCLASS, alignment, zero, tcache, arena);
+		p = imallocx_flags(tsd, LARGE_MINCLASS, alignment, zero, tcache,
+		    arena);
 		if (p == NULL)
 			return (NULL);
 		arena_prof_promoted(p, usize);
-	} else {
-		p = imallocx_maybe_flags(tsd, size, flags, usize, alignment,
-		    zero, tcache, arena);
-	}
+	} else
+		p = imallocx_flags(tsd, usize, alignment, zero, tcache, arena);
 
 	return (p);
 }
@@ -2018,12 +2006,11 @@ imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize)
 	    &zero, &tcache, &arena)))
 		return (NULL);
 	tctx = prof_alloc_prep(tsd, *usize, prof_active_get_unlocked(), true);
-	if (likely((uintptr_t)tctx == (uintptr_t)1U)) {
-		p = imallocx_maybe_flags(tsd, size, flags, *usize, alignment,
-		    zero, tcache, arena);
-	} else if ((uintptr_t)tctx > (uintptr_t)1U) {
-		p = imallocx_prof_sample(tsd, size, flags, *usize, alignment,
-		    zero, tcache, arena);
+	if (likely((uintptr_t)tctx == (uintptr_t)1U))
+		p = imallocx_flags(tsd, *usize, alignment, zero, tcache, arena);
+	else if ((uintptr_t)tctx > (uintptr_t)1U) {
+		p = imallocx_prof_sample(tsd, *usize, alignment, zero, tcache,
+		    arena);
 	} else
 		p = NULL;
 	if (unlikely(p == NULL)) {

From 3ca0cf6a68c9eab7668be14d2b07645277f8b833 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 17 Sep 2015 14:47:39 -0700
Subject: [PATCH 0039/2608] Fix prof_alloc_rollback().

Fix prof_alloc_rollback() to read tdata from thread-specific data rather
than dereferencing a potentially invalid tctx.
---
 ChangeLog  | 2 ++
 src/prof.c | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/ChangeLog b/ChangeLog
index b5e10c49..fb376b26 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -12,6 +12,8 @@ brevity.  Much more detail can be found in the git revision history:
     so this fix removes buggy code without loss of generality.
   - Fix irallocx_prof_sample() to always allocate large regions, even when
     alignment is non-zero.
+  - Fix prof_alloc_rollback() to read tdata from thread-specific data rather
+    than dereferencing a potentially invalid tctx.
 
 * 4.0.1 (September 15, 2015)
 
diff --git a/src/prof.c b/src/prof.c
index d68478fd..0a08062c 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -209,7 +209,7 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated)
 		 */
 		tdata = prof_tdata_get(tsd, true);
 		if (tdata != NULL)
-			prof_sample_threshold_update(tctx->tdata);
+			prof_sample_threshold_update(tdata);
 	}
 
 	if ((uintptr_t)tctx > (uintptr_t)1U) {

From 4d0e162d2db0531624edee497613c7ecb1ef212d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 17 Sep 2015 14:50:29 -0700
Subject: [PATCH 0040/2608] Expand check_integration_prof testing.

Run integration tests with MALLOC_CONF="prof:true,prof_active:false" in
addition to MALLOC_CONF="prof:true".
---
 Makefile.in | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Makefile.in b/Makefile.in
index 01285afb..1ac6f292 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -353,6 +353,7 @@ check_unit: tests_unit check_unit_dir
 check_integration_prof: tests_integration check_integration_dir
 ifeq ($(enable_prof), 1)
 	$(MALLOC_CONF)="prof:true" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%)
+	$(MALLOC_CONF)="prof:true,prof_active:false" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%)
 endif
 check_integration: tests_integration check_integration_dir
 	$(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%)

From 21523297fcd72128c14b40ebefbf8ccf114fbede Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 17 Sep 2015 15:27:28 -0700
Subject: [PATCH 0041/2608] Add mallocx() OOM tests.

---
 src/jemalloc.c             |  2 ++
 test/integration/mallocx.c | 70 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 72 insertions(+)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 49c5f2ac..5a2d3240 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1923,6 +1923,7 @@ imallocx_flags_decode_hard(tsd_t *tsd, size_t size, int flags, size_t *usize,
 		*alignment = MALLOCX_ALIGN_GET_SPECIFIED(flags);
 		*usize = sa2u(size, *alignment);
 	}
+	assert(*usize != 0);
 	*zero = MALLOCX_ZERO_GET(flags);
 	if ((flags & MALLOCX_TCACHE_MASK) != 0) {
 		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE)
@@ -2267,6 +2268,7 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 	 */
 	usize_max = (alignment == 0) ? s2u(size+extra) : sa2u(size+extra,
 	    alignment);
+	assert(usize_max != 0);
 	tctx = prof_alloc_prep(tsd, usize_max, prof_active, false);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
 		usize = ixallocx_prof_sample(ptr, old_usize, size, extra,
diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index 4b0e33f0..3973938b 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -1,5 +1,74 @@
 #include "test/jemalloc_test.h"
 
+static unsigned
+get_nsizes_impl(const char *cmd)
+{
+	unsigned ret;
+	size_t z;
+
+	z = sizeof(unsigned);
+	assert_d_eq(mallctl(cmd, &ret, &z, NULL, 0), 0,
+	    "Unexpected mallctl(\"%s\", ...) failure", cmd);
+
+	return (ret);
+}
+
+static unsigned
+get_nhuge(void)
+{
+
+	return (get_nsizes_impl("arenas.nhchunks"));
+}
+
+static size_t
+get_size_impl(const char *cmd, size_t ind)
+{
+	size_t ret;
+	size_t z;
+	size_t mib[4];
+	size_t miblen = 4;
+
+	z = sizeof(size_t);
+	assert_d_eq(mallctlnametomib(cmd, mib, &miblen),
+	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
+	mib[2] = ind;
+	z = sizeof(size_t);
+	assert_d_eq(mallctlbymib(mib, miblen, &ret, &z, NULL, 0),
+	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
+
+	return (ret);
+}
+
+static size_t
+get_huge_size(size_t ind)
+{
+
+	return (get_size_impl("arenas.hchunk.0.size", ind));
+}
+
+TEST_BEGIN(test_oom)
+{
+	size_t hugemax, size, alignment;
+
+	hugemax = get_huge_size(get_nhuge()-1);
+
+	/* In practice hugemax is too large to be allocated. */
+	assert_ptr_null(mallocx(hugemax, 0),
+	    "Expected OOM for mallocx(size=%#zx, 0)", hugemax);
+
+#if LG_SIZEOF_PTR == 3
+	size      = ZU(0x8000000000000000);
+	alignment = ZU(0x8000000000000000);
+#else
+	size      = ZU(0x80000000);
+	alignment = ZU(0x80000000);
+#endif
+	assert_ptr_null(mallocx(size, MALLOCX_ALIGN(alignment)),
+	    "Expected OOM for mallocx(size=%#zx, MALLOCX_ALIGN(%#zx)", size,
+	    alignment);
+}
+TEST_END
+
 TEST_BEGIN(test_basic)
 {
 #define	MAXSZ (((size_t)1) << 26)
@@ -96,6 +165,7 @@ main(void)
 {
 
 	return (test(
+	    test_oom,
 	    test_basic,
 	    test_alignment_and_size));
 }

From e56b24e3a2db1edde23ede2477a94962ed006ae2 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 20 Sep 2015 09:58:10 -0700
Subject: [PATCH 0042/2608] Make arena_dalloc_large_locked_impl() static.

---
 src/arena.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arena.c b/src/arena.c
index 2e888eaa..7f4a6cae 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2560,7 +2560,7 @@ arena_dalloc_junk_large_t *arena_dalloc_junk_large =
     JEMALLOC_N(arena_dalloc_junk_large_impl);
 #endif
 
-void
+static void
 arena_dalloc_large_locked_impl(arena_t *arena, arena_chunk_t *chunk,
     void *ptr, bool junked)
 {

From 66814c1a52de5a4a51569f9a88bae6c9b8a4c873 Mon Sep 17 00:00:00 2001
From: Craig Rodrigues <rodrigc@FreeBSD.org>
Date: Sun, 20 Sep 2015 21:57:32 -0700
Subject: [PATCH 0043/2608] Fix tsd_boot1() to use explicit 'void' parameter
 list.

---
 include/jemalloc/internal/tsd.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 62a887e6..eed7aa01 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -190,7 +190,7 @@ a_name##tsd_boot0(void)							\
 	return (false);							\
 }									\
 a_attr void								\
-a_name##tsd_boot1()							\
+a_name##tsd_boot1(void)							\
 {									\
 									\
 	/* Do nothing. */						\
@@ -235,7 +235,7 @@ a_name##tsd_boot0(void)							\
 	return (false);							\
 }									\
 a_attr void								\
-a_name##tsd_boot1()							\
+a_name##tsd_boot1(void)							\
 {									\
 									\
 	/* Do nothing. */						\
@@ -345,7 +345,7 @@ a_name##tsd_boot0(void)							\
 	return (false);							\
 }									\
 a_attr void								\
-a_name##tsd_boot1()							\
+a_name##tsd_boot1(void)							\
 {									\
 	a_name##tsd_wrapper_t *wrapper;					\
 	wrapper = (a_name##tsd_wrapper_t *)				\
@@ -467,7 +467,7 @@ a_name##tsd_boot0(void)							\
 	return (false);							\
 }									\
 a_attr void								\
-a_name##tsd_boot1()							\
+a_name##tsd_boot1(void)							\
 {									\
 	a_name##tsd_wrapper_t *wrapper;					\
 	wrapper = (a_name##tsd_wrapper_t *)				\

From b8e966f121e55ffa0c904f9ff7d419797b872aa8 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 21 Sep 2015 10:19:37 -0700
Subject: [PATCH 0044/2608] Update ChangeLog for 4.0.2.

---
 ChangeLog | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ChangeLog b/ChangeLog
index fb376b26..58e4462b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,7 +4,9 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
-* 4.0.2 (XXX)
+* 4.0.2 (September 21, 2015)
+
+  This bugfix release addresses a few bugs specific to heap profiling.
 
   Bug fixes:
   - Fix ixallocx_prof_sample() to never modify nor create sampled small

From fb64ec29ec05fbcba09898a3c93211966a6fa985 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 21 Sep 2015 18:37:18 -0700
Subject: [PATCH 0045/2608] Fix prof_tctx_dump_iter() to filter.

Fix prof_tctx_dump_iter() to filter out nodes that were created after
heap profile dumping started.  Prior to this fix, spurious entries with
arbitrary object/byte counts could appear in heap profiles, which
resulted in jeprof inaccuracies or failures.
---
 ChangeLog  |  6 ++++++
 src/prof.c | 22 +++++++++++++++++-----
 2 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 58e4462b..c6bd5562 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,12 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
+* 4.0.3 (XXX)
+
+  Bug fixes:
+  - Fix prof_tctx_dump_iter() to filter out nodes that were created after heap
+    profile dumping started.
+
 * 4.0.2 (September 21, 2015)
 
   This bugfix release addresses a few bugs specific to heap profiling.
diff --git a/src/prof.c b/src/prof.c
index 0a08062c..5d2b9598 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -1102,11 +1102,23 @@ prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg)
 {
 	bool propagate_err = *(bool *)arg;
 
-	if (prof_dump_printf(propagate_err,
-	    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
-	    tctx->thr_uid, tctx->dump_cnts.curobjs, tctx->dump_cnts.curbytes,
-	    tctx->dump_cnts.accumobjs, tctx->dump_cnts.accumbytes))
-		return (tctx);
+	switch (tctx->state) {
+	case prof_tctx_state_initializing:
+	case prof_tctx_state_nominal:
+		/* Not captured by this dump. */
+		break;
+	case prof_tctx_state_dumping:
+	case prof_tctx_state_purgatory:
+		if (prof_dump_printf(propagate_err,
+		    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": "
+		    "%"FMTu64"]\n", tctx->thr_uid, tctx->dump_cnts.curobjs,
+		    tctx->dump_cnts.curbytes, tctx->dump_cnts.accumobjs,
+		    tctx->dump_cnts.accumbytes))
+			return (tctx);
+		break;
+	default:
+		not_reached();
+	}
 	return (NULL);
 }
 

From d260f442ce693de4351229027b37b3293fcbfd7d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 24 Sep 2015 16:38:45 -0700
Subject: [PATCH 0046/2608] Fix xallocx(..., MALLOCX_ZERO) bugs.

Zero all trailing bytes of large allocations when
--enable-cache-oblivious configure option is enabled.  This regression
was introduced by 8a03cf039cd06f9fa6972711195055d865673966 (Implement
cache index randomization for large allocations.).

Zero trailing bytes of huge allocations when resizing from/to a size
class that is not a multiple of the chunk size.
---
 ChangeLog                  |   4 ++
 src/arena.c                |  10 ++++
 src/huge.c                 |  30 +++++-----
 test/integration/xallocx.c | 119 ++++++++++++++++++++++++++++++++++++-
 4 files changed, 148 insertions(+), 15 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index c6bd5562..a9929f82 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -7,6 +7,10 @@ brevity.  Much more detail can be found in the git revision history:
 * 4.0.3 (XXX)
 
   Bug fixes:
+  - Fix xallocx(..., MALLOCX_ZERO) to zero all trailing bytes of large
+    allocations when --enable-cache-oblivious configure option is enabled.
+  - Fix xallocx(..., MALLOCX_ZERO) to zero trailing bytes of huge allocations
+    when resizing from/to a size class that is not a multiple of the chunk size.
   - Fix prof_tctx_dump_iter() to filter out nodes that were created after heap
     profile dumping started.
 
diff --git a/src/arena.c b/src/arena.c
index 7f4a6cae..3081519c 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2679,6 +2679,16 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 		if (arena_run_split_large(arena, run, splitsize, zero))
 			goto label_fail;
 
+		if (config_cache_oblivious && zero) {
+			/*
+			 * Zero the trailing bytes of the original allocation's
+			 * last page, since they are in an indeterminate state.
+			 */
+			assert(PAGE_CEILING(oldsize) == oldsize);
+			memset((void *)((uintptr_t)ptr + oldsize), 0,
+			    PAGE_CEILING((uintptr_t)ptr) - (uintptr_t)ptr);
+		}
+
 		size = oldsize + splitsize;
 		npages = (size + large_pad) >> LG_PAGE;
 
diff --git a/src/huge.c b/src/huge.c
index f8778db2..1e9a6651 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -133,7 +133,7 @@ huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize_min,
 	extent_node_t *node;
 	arena_t *arena;
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
-	bool zeroed;
+	bool pre_zeroed, post_zeroed;
 
 	/* Increase usize to incorporate extra. */
 	for (usize = usize_min; usize < usize_max && (usize_next = s2u(usize+1))
@@ -145,26 +145,27 @@ huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize_min,
 
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
+	pre_zeroed = extent_node_zeroed_get(node);
 
 	/* Fill if necessary (shrinking). */
 	if (oldsize > usize) {
 		size_t sdiff = oldsize - usize;
 		if (config_fill && unlikely(opt_junk_free)) {
 			memset((void *)((uintptr_t)ptr + usize), 0x5a, sdiff);
-			zeroed = false;
+			post_zeroed = false;
 		} else {
-			zeroed = !chunk_purge_wrapper(arena, &chunk_hooks, ptr,
-			    CHUNK_CEILING(oldsize), usize, sdiff);
+			post_zeroed = !chunk_purge_wrapper(arena, &chunk_hooks,
+			    ptr, CHUNK_CEILING(oldsize), usize, sdiff);
 		}
 	} else
-		zeroed = true;
+		post_zeroed = pre_zeroed;
 
 	malloc_mutex_lock(&arena->huge_mtx);
 	/* Update the size of the huge allocation. */
 	assert(extent_node_size_get(node) != usize);
 	extent_node_size_set(node, usize);
-	/* Clear node's zeroed field if zeroing failed above. */
-	extent_node_zeroed_set(node, extent_node_zeroed_get(node) && zeroed);
+	/* Update zeroed. */
+	extent_node_zeroed_set(node, post_zeroed);
 	malloc_mutex_unlock(&arena->huge_mtx);
 
 	arena_chunk_ralloc_huge_similar(arena, ptr, oldsize, usize);
@@ -172,7 +173,7 @@ huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize_min,
 	/* Fill if necessary (growing). */
 	if (oldsize < usize) {
 		if (zero || (config_fill && unlikely(opt_zero))) {
-			if (!zeroed) {
+			if (!pre_zeroed) {
 				memset((void *)((uintptr_t)ptr + oldsize), 0,
 				    usize - oldsize);
 			}
@@ -190,10 +191,11 @@ huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize)
 	arena_t *arena;
 	chunk_hooks_t chunk_hooks;
 	size_t cdiff;
-	bool zeroed;
+	bool pre_zeroed, post_zeroed;
 
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
+	pre_zeroed = extent_node_zeroed_get(node);
 	chunk_hooks = chunk_hooks_get(arena);
 
 	assert(oldsize > usize);
@@ -209,21 +211,21 @@ huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize)
 		if (config_fill && unlikely(opt_junk_free)) {
 			huge_dalloc_junk((void *)((uintptr_t)ptr + usize),
 			    sdiff);
-			zeroed = false;
+			post_zeroed = false;
 		} else {
-			zeroed = !chunk_purge_wrapper(arena, &chunk_hooks,
+			post_zeroed = !chunk_purge_wrapper(arena, &chunk_hooks,
 			    CHUNK_ADDR2BASE((uintptr_t)ptr + usize),
 			    CHUNK_CEILING(oldsize),
 			    CHUNK_ADDR2OFFSET((uintptr_t)ptr + usize), sdiff);
 		}
 	} else
-		zeroed = true;
+		post_zeroed = pre_zeroed;
 
 	malloc_mutex_lock(&arena->huge_mtx);
 	/* Update the size of the huge allocation. */
 	extent_node_size_set(node, usize);
-	/* Clear node's zeroed field if zeroing failed above. */
-	extent_node_zeroed_set(node, extent_node_zeroed_get(node) && zeroed);
+	/* Update zeroed. */
+	extent_node_zeroed_set(node, post_zeroed);
 	malloc_mutex_unlock(&arena->huge_mtx);
 
 	/* Zap the excess chunks. */
diff --git a/test/integration/xallocx.c b/test/integration/xallocx.c
index 058e27c5..f69d48d7 100644
--- a/test/integration/xallocx.c
+++ b/test/integration/xallocx.c
@@ -347,6 +347,121 @@ TEST_BEGIN(test_extra_huge)
 }
 TEST_END
 
+static void
+print_filled_extents(const void *p, uint8_t c, size_t len)
+{
+	const uint8_t *pc = (const uint8_t *)p;
+	size_t i, range0;
+	uint8_t c0;
+
+	malloc_printf("  p=%p, c=%#x, len=%zu:", p, c, len);
+	range0 = 0;
+	c0 = pc[0];
+	for (i = 0; i < len; i++) {
+		if (pc[i] != c0) {
+			malloc_printf(" %#x[%zu..%zu)", c0, range0, i);
+			range0 = i;
+			c0 = pc[i];
+		}
+	}
+	malloc_printf(" %#x[%zu..%zu)\n", c0, range0, i);
+}
+
+static bool
+validate_fill(const void *p, uint8_t c, size_t offset, size_t len)
+{
+	const uint8_t *pc = (const uint8_t *)p;
+	bool err;
+	size_t i;
+
+	for (i = offset, err = false; i < offset+len; i++) {
+		if (pc[i] != c)
+			err = true;
+	}
+
+	if (err)
+		print_filled_extents(p, c, offset + len);
+
+	return (err);
+}
+
+static void
+test_zero(size_t szmin, size_t szmax)
+{
+	size_t sz, nsz;
+	void *p;
+#define	FILL_BYTE 0x7aU
+
+	sz = szmax;
+	p = mallocx(sz, MALLOCX_ZERO);
+	assert_ptr_not_null(p, "Unexpected mallocx() error");
+	assert_false(validate_fill(p, 0x00, 0, sz), "Memory not filled: sz=%zu",
+	    sz);
+
+	/*
+	 * Fill with non-zero so that non-debug builds are more likely to detect
+	 * errors.
+	 */
+	memset(p, FILL_BYTE, sz);
+	assert_false(validate_fill(p, FILL_BYTE, 0, sz),
+	    "Memory not filled: sz=%zu", sz);
+
+	/* Shrink in place so that we can expect growing in place to succeed. */
+	sz = szmin;
+	assert_zu_eq(xallocx(p, sz, 0, MALLOCX_ZERO), sz,
+	    "Unexpected xallocx() error");
+	assert_false(validate_fill(p, FILL_BYTE, 0, sz),
+	    "Memory not filled: sz=%zu", sz);
+
+	for (sz = szmin; sz < szmax; sz = nsz) {
+		nsz = nallocx(sz+1, MALLOCX_ZERO);
+		assert_zu_eq(xallocx(p, sz+1, 0, MALLOCX_ZERO), nsz,
+		    "Unexpected xallocx() failure");
+		assert_false(validate_fill(p, FILL_BYTE, 0, sz),
+		    "Memory not filled: sz=%zu", sz);
+		assert_false(validate_fill(p, 0x00, sz, nsz-sz),
+		    "Memory not filled: sz=%zu, nsz-sz=%zu", sz, nsz-sz);
+		memset((void *)((uintptr_t)p + sz), FILL_BYTE, nsz-sz);
+		assert_false(validate_fill(p, FILL_BYTE, 0, nsz),
+		    "Memory not filled: nsz=%zu", nsz);
+	}
+
+	dallocx(p, 0);
+}
+
+TEST_BEGIN(test_zero_large)
+{
+	size_t large0, largemax;
+
+	/* Get size classes. */
+	large0 = get_large_size(0);
+	largemax = get_large_size(get_nlarge()-1);
+
+	test_zero(large0, largemax);
+}
+TEST_END
+
+TEST_BEGIN(test_zero_huge)
+{
+	size_t huge0, huge1;
+	static const bool maps_coalesce =
+#ifdef JEMALLOC_MAPS_COALESCE
+	    true
+#else
+	    false
+#endif
+	    ;
+
+	/* Get size classes. */
+	huge0 = get_huge_size(0);
+	huge1 = get_huge_size(1);
+
+	if (maps_coalesce)
+		test_zero(huge0, huge0 * 4);
+	test_zero(huge1, huge0 * 2);
+}
+TEST_END
+
 int
 main(void)
 {
@@ -359,5 +474,7 @@ main(void)
 	    test_size_extra_overflow,
 	    test_extra_small,
 	    test_extra_large,
-	    test_extra_huge));
+	    test_extra_huge,
+	    test_zero_large,
+	    test_zero_huge));
 }

From 03eb37e8fd35587b944f8cbc85cd81a08b0ed17a Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 24 Sep 2015 16:44:16 -0700
Subject: [PATCH 0047/2608] Make mallocx() OOM test more robust.

Make mallocx() OOM testing work correctly even on systems that can
allocate the majority of virtual address space in a single contiguous
region.
---
 test/integration/mallocx.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index 3973938b..6253175d 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -52,9 +52,20 @@ TEST_BEGIN(test_oom)
 
 	hugemax = get_huge_size(get_nhuge()-1);
 
-	/* In practice hugemax is too large to be allocated. */
-	assert_ptr_null(mallocx(hugemax, 0),
-	    "Expected OOM for mallocx(size=%#zx, 0)", hugemax);
+	/*
+	 * It should be impossible to allocate two objects that each consume
+	 * more than half the virtual address space.
+	 */
+	{
+		void *p;
+
+		p = mallocx(hugemax, 0);
+		if (p != NULL) {
+			assert_ptr_null(mallocx(hugemax, 0),
+			    "Expected OOM for mallocx(size=%#zx, 0)", hugemax);
+			dallocx(p, 0);
+		}
+	}
 
 #if LG_SIZEOF_PTR == 3
 	size      = ZU(0x8000000000000000);

From d36c7ebb004e73122c76276b854364f543458b8c Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 24 Sep 2015 16:53:18 -0700
Subject: [PATCH 0048/2608] Work around an NPTL-specific TSD issue.

Work around a potentially bad thread-specific data initialization
interaction with NPTL (glibc's pthreads implementation).

This resolves #283.
---
 ChangeLog | 2 ++
 src/tsd.c | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index a9929f82..b7381a58 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -13,6 +13,8 @@ brevity.  Much more detail can be found in the git revision history:
     when resizing from/to a size class that is not a multiple of the chunk size.
   - Fix prof_tctx_dump_iter() to filter out nodes that were created after heap
     profile dumping started.
+  - Work around a potentially bad thread-specific data initialization
+    interaction with NPTL (glibc's pthreads implementation).
 
 * 4.0.2 (September 21, 2015)
 
diff --git a/src/tsd.c b/src/tsd.c
index 2100833a..9ffe9afe 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -73,6 +73,9 @@ tsd_cleanup(void *arg)
 	tsd_t *tsd = (tsd_t *)arg;
 
 	switch (tsd->state) {
+	case tsd_state_uninitialized:
+		/* Do nothing. */
+		break;
 	case tsd_state_nominal:
 #define O(n, t)								\
 		n##_cleanup(tsd);

From 044047fae122d3e4a22d8d6748b598922b3c3ccc Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 24 Sep 2015 19:52:28 -0700
Subject: [PATCH 0049/2608] Remove fragile xallocx() test case.

In addition to depending on map coalescing, the test depended on
munmap() being disabled so that chunk recycling would always succeed.
---
 test/integration/xallocx.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/test/integration/xallocx.c b/test/integration/xallocx.c
index f69d48d7..37362521 100644
--- a/test/integration/xallocx.c
+++ b/test/integration/xallocx.c
@@ -444,20 +444,11 @@ TEST_END
 TEST_BEGIN(test_zero_huge)
 {
 	size_t huge0, huge1;
-	static const bool maps_coalesce =
-#ifdef JEMALLOC_MAPS_COALESCE
-	    true
-#else
-	    false
-#endif
-	    ;
 
 	/* Get size classes. */
 	huge0 = get_huge_size(0);
 	huge1 = get_huge_size(1);
 
-	if (maps_coalesce)
-		test_zero(huge0, huge0 * 4);
 	test_zero(huge1, huge0 * 2);
 }
 TEST_END

From 02709688e09325026be402b63400f88e587293d7 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 24 Sep 2015 20:05:26 -0700
Subject: [PATCH 0050/2608] Update ChangeLog for 4.0.3.

---
 ChangeLog | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ChangeLog b/ChangeLog
index b7381a58..e3b0a519 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,7 +4,9 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
-* 4.0.3 (XXX)
+* 4.0.3 (September 24, 2015)
+
+  This bugfix release continues the trend of xallocx() and heap profiling fixes.
 
   Bug fixes:
   - Fix xallocx(..., MALLOCX_ZERO) to zero all trailing bytes of large

From a784e411f21f4dc827c8c411b7afa7df949c2233 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 24 Sep 2015 22:21:55 -0700
Subject: [PATCH 0051/2608] Fix a xallocx(..., MALLOCX_ZERO) bug.

Fix xallocx(..., MALLOCX_ZERO to zero the last full trailing page of
large allocations that have been randomly assigned an offset of 0 when
--enable-cache-oblivious configure option is enabled.  This addresses a
special case missed in d260f442ce693de4351229027b37b3293fcbfd7d (Fix
xallocx(..., MALLOCX_ZERO) bugs.).
---
 ChangeLog                                        |  7 +++++++
 include/jemalloc/internal/jemalloc_internal.h.in |  4 ++++
 src/arena.c                                      | 12 +++++++++---
 3 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index e3b0a519..cd8f695b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,13 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
+* 4.0.4 (XXX)
+
+  Bug fixes:
+  - Fix xallocx(..., MALLOCX_ZERO to zero the last full trailing page of large
+    allocations that have been randomly assigned an offset of 0 when
+    --enable-cache-oblivious configure option is enabled.
+
 * 4.0.3 (September 24, 2015)
 
   This bugfix release continues the trend of xallocx() and heap profiling fixes.
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 8536a3ed..654cd088 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -317,6 +317,10 @@ typedef unsigned szind_t;
 #define	PAGE		((size_t)(1U << LG_PAGE))
 #define	PAGE_MASK	((size_t)(PAGE - 1))
 
+/* Return the page base address for the page containing address a. */
+#define	PAGE_ADDR2BASE(a)						\
+	((void *)((uintptr_t)(a) & ~PAGE_MASK))
+
 /* Return the smallest pagesize multiple that is >= s. */
 #define	PAGE_CEILING(s)							\
 	(((s) + PAGE_MASK) & ~PAGE_MASK)
diff --git a/src/arena.c b/src/arena.c
index 3081519c..43733cc1 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2683,10 +2683,16 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 			/*
 			 * Zero the trailing bytes of the original allocation's
 			 * last page, since they are in an indeterminate state.
+			 * There will always be trailing bytes, because ptr's
+			 * offset from the beginning of the run is a multiple of
+			 * CACHELINE in [0 .. PAGE).
 			 */
-			assert(PAGE_CEILING(oldsize) == oldsize);
-			memset((void *)((uintptr_t)ptr + oldsize), 0,
-			    PAGE_CEILING((uintptr_t)ptr) - (uintptr_t)ptr);
+			void *zbase = (void *)((uintptr_t)ptr + oldsize);
+			void *zpast = PAGE_ADDR2BASE((void *)((uintptr_t)zbase +
+			    PAGE));
+			size_t nzero = (uintptr_t)zpast - (uintptr_t)zbase;
+			assert(nzero > 0);
+			memset(zbase, 0, nzero);
 		}
 
 		size = oldsize + splitsize;

From fed1f9f367bec652f20ea0ccac4b21560a3b4089 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 1 Oct 2015 13:48:09 -0700
Subject: [PATCH 0052/2608] Fix intermittent xallocx() test failures.

Modify xallocx() tests that expect to expand in place to use a separate
arena.  This avoids the potential for interposed internal allocations
from e.g. heap profile sampling to disrupt the tests.

This resolves #286.
---
 test/integration/xallocx.c | 108 ++++++++++++++++++++++---------------
 1 file changed, 65 insertions(+), 43 deletions(-)

diff --git a/test/integration/xallocx.c b/test/integration/xallocx.c
index 37362521..00451961 100644
--- a/test/integration/xallocx.c
+++ b/test/integration/xallocx.c
@@ -1,5 +1,24 @@
 #include "test/jemalloc_test.h"
 
+/*
+ * Use a separate arena for xallocx() extension/contraction tests so that
+ * internal allocation e.g. by heap profiling can't interpose allocations where
+ * xallocx() would ordinarily be able to extend.
+ */
+static unsigned
+arena_ind(void)
+{
+	static unsigned ind = 0;
+
+	if (ind == 0) {
+		size_t sz = sizeof(ind);
+		assert_d_eq(mallctl("arenas.extend", &ind, &sz, NULL, 0), 0,
+		    "Unexpected mallctl failure creating arena");
+	}
+
+	return (ind);
+}
+
 TEST_BEGIN(test_same_size)
 {
 	void *p;
@@ -218,6 +237,7 @@ TEST_END
 
 TEST_BEGIN(test_extra_large)
 {
+	int flags = MALLOCX_ARENA(arena_ind());
 	size_t smallmax, large0, large1, large2, huge0, hugemax;
 	void *p;
 
@@ -229,61 +249,62 @@ TEST_BEGIN(test_extra_large)
 	huge0 = get_huge_size(0);
 	hugemax = get_huge_size(get_nhuge()-1);
 
-	p = mallocx(large2, 0);
+	p = mallocx(large2, flags);
 	assert_ptr_not_null(p, "Unexpected mallocx() error");
 
-	assert_zu_eq(xallocx(p, large2, 0, 0), large2,
+	assert_zu_eq(xallocx(p, large2, 0, flags), large2,
 	    "Unexpected xallocx() behavior");
 	/* Test size decrease with zero extra. */
-	assert_zu_eq(xallocx(p, large0, 0, 0), large0,
+	assert_zu_eq(xallocx(p, large0, 0, flags), large0,
 	    "Unexpected xallocx() behavior");
-	assert_zu_eq(xallocx(p, smallmax, 0, 0), large0,
+	assert_zu_eq(xallocx(p, smallmax, 0, flags), large0,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_eq(xallocx(p, large2, 0, 0), large2,
+	assert_zu_eq(xallocx(p, large2, 0, flags), large2,
 	    "Unexpected xallocx() behavior");
 	/* Test size decrease with non-zero extra. */
-	assert_zu_eq(xallocx(p, large0, large2 - large0, 0), large2,
+	assert_zu_eq(xallocx(p, large0, large2 - large0, flags), large2,
 	    "Unexpected xallocx() behavior");
-	assert_zu_eq(xallocx(p, large1, large2 - large1, 0), large2,
+	assert_zu_eq(xallocx(p, large1, large2 - large1, flags), large2,
 	    "Unexpected xallocx() behavior");
-	assert_zu_eq(xallocx(p, large0, large1 - large0, 0), large1,
+	assert_zu_eq(xallocx(p, large0, large1 - large0, flags), large1,
 	    "Unexpected xallocx() behavior");
-	assert_zu_eq(xallocx(p, smallmax, large0 - smallmax, 0), large0,
+	assert_zu_eq(xallocx(p, smallmax, large0 - smallmax, flags), large0,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_eq(xallocx(p, large0, 0, 0), large0,
+	assert_zu_eq(xallocx(p, large0, 0, flags), large0,
 	    "Unexpected xallocx() behavior");
 	/* Test size increase with zero extra. */
-	assert_zu_eq(xallocx(p, large2, 0, 0), large2,
+	assert_zu_eq(xallocx(p, large2, 0, flags), large2,
 	    "Unexpected xallocx() behavior");
-	assert_zu_eq(xallocx(p, huge0, 0, 0), large2,
+	assert_zu_eq(xallocx(p, huge0, 0, flags), large2,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_eq(xallocx(p, large0, 0, 0), large0,
+	assert_zu_eq(xallocx(p, large0, 0, flags), large0,
 	    "Unexpected xallocx() behavior");
 	/* Test size increase with non-zero extra. */
-	assert_zu_lt(xallocx(p, large0, huge0 - large0, 0), huge0,
+	assert_zu_lt(xallocx(p, large0, huge0 - large0, flags), huge0,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_eq(xallocx(p, large0, 0, 0), large0,
+	assert_zu_eq(xallocx(p, large0, 0, flags), large0,
 	    "Unexpected xallocx() behavior");
 	/* Test size increase with non-zero extra. */
-	assert_zu_eq(xallocx(p, large0, large2 - large0, 0), large2,
+	assert_zu_eq(xallocx(p, large0, large2 - large0, flags), large2,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_eq(xallocx(p, large2, 0, 0), large2,
+	assert_zu_eq(xallocx(p, large2, 0, flags), large2,
 	    "Unexpected xallocx() behavior");
 	/* Test size+extra overflow. */
-	assert_zu_lt(xallocx(p, large2, hugemax - large2 + 1, 0), huge0,
+	assert_zu_lt(xallocx(p, large2, hugemax - large2 + 1, flags), huge0,
 	    "Unexpected xallocx() behavior");
 
-	dallocx(p, 0);
+	dallocx(p, flags);
 }
 TEST_END
 
 TEST_BEGIN(test_extra_huge)
 {
+	int flags = MALLOCX_ARENA(arena_ind());
 	size_t largemax, huge0, huge1, huge2, hugemax;
 	void *p;
 
@@ -294,56 +315,56 @@ TEST_BEGIN(test_extra_huge)
 	huge2 = get_huge_size(2);
 	hugemax = get_huge_size(get_nhuge()-1);
 
-	p = mallocx(huge2, 0);
+	p = mallocx(huge2, flags);
 	assert_ptr_not_null(p, "Unexpected mallocx() error");
 
-	assert_zu_eq(xallocx(p, huge2, 0, 0), huge2,
+	assert_zu_eq(xallocx(p, huge2, 0, flags), huge2,
 	    "Unexpected xallocx() behavior");
 	/* Test size decrease with zero extra. */
-	assert_zu_ge(xallocx(p, huge0, 0, 0), huge0,
+	assert_zu_ge(xallocx(p, huge0, 0, flags), huge0,
 	    "Unexpected xallocx() behavior");
-	assert_zu_ge(xallocx(p, largemax, 0, 0), huge0,
+	assert_zu_ge(xallocx(p, largemax, 0, flags), huge0,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_eq(xallocx(p, huge2, 0, 0), huge2,
+	assert_zu_eq(xallocx(p, huge2, 0, flags), huge2,
 	    "Unexpected xallocx() behavior");
 	/* Test size decrease with non-zero extra. */
-	assert_zu_eq(xallocx(p, huge0, huge2 - huge0, 0), huge2,
+	assert_zu_eq(xallocx(p, huge0, huge2 - huge0, flags), huge2,
 	    "Unexpected xallocx() behavior");
-	assert_zu_eq(xallocx(p, huge1, huge2 - huge1, 0), huge2,
+	assert_zu_eq(xallocx(p, huge1, huge2 - huge1, flags), huge2,
 	    "Unexpected xallocx() behavior");
-	assert_zu_eq(xallocx(p, huge0, huge1 - huge0, 0), huge1,
+	assert_zu_eq(xallocx(p, huge0, huge1 - huge0, flags), huge1,
 	    "Unexpected xallocx() behavior");
-	assert_zu_ge(xallocx(p, largemax, huge0 - largemax, 0), huge0,
+	assert_zu_ge(xallocx(p, largemax, huge0 - largemax, flags), huge0,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_ge(xallocx(p, huge0, 0, 0), huge0,
+	assert_zu_ge(xallocx(p, huge0, 0, flags), huge0,
 	    "Unexpected xallocx() behavior");
 	/* Test size increase with zero extra. */
-	assert_zu_le(xallocx(p, huge2, 0, 0), huge2,
+	assert_zu_le(xallocx(p, huge2, 0, flags), huge2,
 	    "Unexpected xallocx() behavior");
-	assert_zu_le(xallocx(p, hugemax+1, 0, 0), huge2,
+	assert_zu_le(xallocx(p, hugemax+1, 0, flags), huge2,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_ge(xallocx(p, huge0, 0, 0), huge0,
+	assert_zu_ge(xallocx(p, huge0, 0, flags), huge0,
 	    "Unexpected xallocx() behavior");
 	/* Test size increase with non-zero extra. */
-	assert_zu_le(xallocx(p, huge0, SIZE_T_MAX - huge0, 0), hugemax,
+	assert_zu_le(xallocx(p, huge0, SIZE_T_MAX - huge0, flags), hugemax,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_ge(xallocx(p, huge0, 0, 0), huge0,
+	assert_zu_ge(xallocx(p, huge0, 0, flags), huge0,
 	    "Unexpected xallocx() behavior");
 	/* Test size increase with non-zero extra. */
-	assert_zu_le(xallocx(p, huge0, huge2 - huge0, 0), huge2,
+	assert_zu_le(xallocx(p, huge0, huge2 - huge0, flags), huge2,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_eq(xallocx(p, huge2, 0, 0), huge2,
+	assert_zu_eq(xallocx(p, huge2, 0, flags), huge2,
 	    "Unexpected xallocx() behavior");
 	/* Test size+extra overflow. */
-	assert_zu_le(xallocx(p, huge2, hugemax - huge2 + 1, 0), hugemax,
+	assert_zu_le(xallocx(p, huge2, hugemax - huge2 + 1, flags), hugemax,
 	    "Unexpected xallocx() behavior");
 
-	dallocx(p, 0);
+	dallocx(p, flags);
 }
 TEST_END
 
@@ -388,12 +409,13 @@ validate_fill(const void *p, uint8_t c, size_t offset, size_t len)
 static void
 test_zero(size_t szmin, size_t szmax)
 {
+	int flags = MALLOCX_ARENA(arena_ind()) | MALLOCX_ZERO;
 	size_t sz, nsz;
 	void *p;
 #define	FILL_BYTE 0x7aU
 
 	sz = szmax;
-	p = mallocx(sz, MALLOCX_ZERO);
+	p = mallocx(sz, flags);
 	assert_ptr_not_null(p, "Unexpected mallocx() error");
 	assert_false(validate_fill(p, 0x00, 0, sz), "Memory not filled: sz=%zu",
 	    sz);
@@ -408,14 +430,14 @@ test_zero(size_t szmin, size_t szmax)
 
 	/* Shrink in place so that we can expect growing in place to succeed. */
 	sz = szmin;
-	assert_zu_eq(xallocx(p, sz, 0, MALLOCX_ZERO), sz,
+	assert_zu_eq(xallocx(p, sz, 0, flags), sz,
 	    "Unexpected xallocx() error");
 	assert_false(validate_fill(p, FILL_BYTE, 0, sz),
 	    "Memory not filled: sz=%zu", sz);
 
 	for (sz = szmin; sz < szmax; sz = nsz) {
-		nsz = nallocx(sz+1, MALLOCX_ZERO);
-		assert_zu_eq(xallocx(p, sz+1, 0, MALLOCX_ZERO), nsz,
+		nsz = nallocx(sz+1, flags);
+		assert_zu_eq(xallocx(p, sz+1, 0, flags), nsz,
 		    "Unexpected xallocx() failure");
 		assert_false(validate_fill(p, FILL_BYTE, 0, sz),
 		    "Memory not filled: sz=%zu", sz);
@@ -426,7 +448,7 @@ test_zero(size_t szmin, size_t szmax)
 		    "Memory not filled: nsz=%zu", nsz);
 	}
 
-	dallocx(p, 0);
+	dallocx(p, flags);
 }
 
 TEST_BEGIN(test_zero_large)

From ea7449ffada5323f87f91e40f2213a514e691584 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Mon, 19 Oct 2015 16:56:05 -0400
Subject: [PATCH 0053/2608] Fix a manual editing error.

---
 doc/jemalloc.xml.in | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 8fc774b1..26a5e142 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1418,8 +1418,8 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         can cause asynchronous string deallocation.  Furthermore, each
         invocation of this interface can only read or write; simultaneous
         read/write is not supported due to string lifetime limitations.  The
-        name string must nil-terminated and comprised only of characters in the
-        sets recognized
+        name string must be nil-terminated and comprised only of characters in
+        the sets recognized
         by <citerefentry><refentrytitle>isgraph</refentrytitle>
         <manvolnum>3</manvolnum></citerefentry> and
         <citerefentry><refentrytitle>isblank</refentrytitle>

From be4134710669e8b09d6b928f72a4208e5a68187c Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 24 Oct 2015 07:53:25 -0700
Subject: [PATCH 0054/2608] Update ChangeLog for 4.0.4.

---
 ChangeLog | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/ChangeLog b/ChangeLog
index cd8f695b..8ed42cbe 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,7 +4,12 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
-* 4.0.4 (XXX)
+* 4.0.4 (October 24, 2015)
+
+  This bugfix release fixes another xallocx() regression.  No other regressions
+  have come to light in over a month, so this is likely a good starting point
+  for people who prefer to wait for "dot one" releases with all the major issues
+  shaken out.
 
   Bug fixes:
   - Fix xallocx(..., MALLOCX_ZERO to zero the last full trailing page of large

From 606ae49fa3b2610f41961d38e82828ddbbd89a69 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Mon, 9 Nov 2015 14:59:14 -0800
Subject: [PATCH 0055/2608] Integrate raw heap profile support into jeprof.

---
 bin/jeprof.in | 54 ++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 43 insertions(+), 11 deletions(-)

diff --git a/bin/jeprof.in b/bin/jeprof.in
index e7178078..d00ef5db 100644
--- a/bin/jeprof.in
+++ b/bin/jeprof.in
@@ -1160,8 +1160,21 @@ sub PrintSymbolizedProfile {
   }
   print '---', "\n";
 
-  $PROFILE_PAGE =~ m,[^/]+$,;    # matches everything after the last slash
-  my $profile_marker = $&;
+  my $profile_marker;
+  if ($main::profile_type eq 'heap') {
+    $HEAP_PAGE =~ m,[^/]+$,;    # matches everything after the last slash
+    $profile_marker = $&;
+  } elsif ($main::profile_type eq 'growth') {
+    $GROWTH_PAGE =~ m,[^/]+$,;    # matches everything after the last slash
+    $profile_marker = $&;
+  } elsif ($main::profile_type eq 'contention') {
+    $CONTENTION_PAGE =~ m,[^/]+$,;    # matches everything after the last slash
+    $profile_marker = $&;
+  } else { # elsif ($main::profile_type eq 'cpu')
+    $PROFILE_PAGE =~ m,[^/]+$,;    # matches everything after the last slash
+    $profile_marker = $&;
+  }
+
   print '--- ', $profile_marker, "\n";
   if (defined($main::collected_profile)) {
     # if used with remote fetch, simply dump the collected profile to output.
@@ -1171,6 +1184,12 @@ sub PrintSymbolizedProfile {
     }
     close(SRC);
   } else {
+    # --raw/http: For everything to work correctly for non-remote profiles, we
+    # would need to extend PrintProfileData() to handle all possible profile
+    # types, re-enable the code that is currently disabled in ReadCPUProfile()
+    # and FixCallerAddresses(), and remove the remote profile dumping code in
+    # the block above.
+    die "--raw/http: jeprof can only dump remote profiles for --raw\n";
     # dump a cpu-format profile to standard out
     PrintProfileData($profile);
   }
@@ -3427,12 +3446,22 @@ sub FetchDynamicProfile {
       }
       $url .= sprintf("seconds=%d", $main::opt_seconds);
       $fetch_timeout = $main::opt_seconds * 1.01 + 60;
+      # Set $profile_type for consumption by PrintSymbolizedProfile.
+      $main::profile_type = 'cpu';
     } else {
       # For non-CPU profiles, we add a type-extension to
       # the target profile file name.
       my $suffix = $path;
       $suffix =~ s,/,.,g;
       $profile_file .= $suffix;
+      # Set $profile_type for consumption by PrintSymbolizedProfile.
+      if ($path =~ m/$HEAP_PAGE/) {
+        $main::profile_type = 'heap';
+      } elsif ($path =~ m/$GROWTH_PAGE/) {
+        $main::profile_type = 'growth';
+      } elsif ($path =~ m/$CONTENTION_PAGE/) {
+        $main::profile_type = 'contention';
+      }
     }
 
     my $profile_dir = $ENV{"JEPROF_TMPDIR"} || ($ENV{HOME} . "/jeprof");
@@ -3730,6 +3759,8 @@ sub ReadProfile {
   my $symbol_marker = $&;
   $PROFILE_PAGE =~ m,[^/]+$,;    # matches everything after the last slash
   my $profile_marker = $&;
+  $HEAP_PAGE =~ m,[^/]+$,;    # matches everything after the last slash
+  my $heap_marker = $&;
 
   # Look at first line to see if it is a heap or a CPU profile.
   # CPU profile may start with no header at all, and just binary data
@@ -3756,7 +3787,13 @@ sub ReadProfile {
     $header = ReadProfileHeader(*PROFILE) || "";
   }
 
+  if ($header =~ m/^--- *($heap_marker|$growth_marker)/o) {
+    # Skip "--- ..." line for profile types that have their own headers.
+    $header = ReadProfileHeader(*PROFILE) || "";
+  }
+
   $main::profile_type = '';
+
   if ($header =~ m/^heap profile:.*$growth_marker/o) {
     $main::profile_type = 'growth';
     $result =  ReadHeapProfile($prog, *PROFILE, $header);
@@ -3808,9 +3845,9 @@ sub ReadProfile {
 # independent implementation.
 sub FixCallerAddresses {
   my $stack = shift;
-  if ($main::use_symbolized_profile) {
-    return $stack;
-  } else {
+  # --raw/http: Always subtract one from pc's, because PrintSymbolizedProfile()
+  # dumps unadjusted profiles.
+  {
     $stack =~ /(\s)/;
     my $delimiter = $1;
     my @addrs = split(' ', $stack);
@@ -3878,12 +3915,7 @@ sub ReadCPUProfile {
     for (my $j = 0; $j < $d; $j++) {
       my $pc = $slots->get($i+$j);
       # Subtract one from caller pc so we map back to call instr.
-      # However, don't do this if we're reading a symbolized profile
-      # file, in which case the subtract-one was done when the file
-      # was written.
-      if ($j > 0 && !$main::use_symbolized_profile) {
-        $pc--;
-      }
+      $pc--;
       $pc = sprintf("%0*x", $address_length, $pc);
       $pcs->{$pc} = 1;
       push @k, $pc;

From 566d4c02400700b94a952eddeed34313360211d3 Mon Sep 17 00:00:00 2001
From: Nathan Froyd <froydnj@gmail.com>
Date: Thu, 5 Nov 2015 12:18:43 -0500
Subject: [PATCH 0056/2608] use correct macro definitions for clang-cl

clang-cl, an MSVC-compatible frontend built on top of clang, defined
_MSC_VER *and* supports __attribute__ syntax.  The ordering of the
checks in jemalloc_macros.h.in, however, do the wrong thing for
clang-cl, as we want the Windows-specific macro definitions for
clang-cl.  To support this use case, we reorder the checks so that
_MSC_VER is checked first (which includes clang-cl), and then
JEMALLOC_HAVE_ATTR) is checked.  No functionality change intended.
---
 include/jemalloc/jemalloc_macros.h.in | 52 +++++++++++++--------------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in
index 7f64d9ff..698caa19 100644
--- a/include/jemalloc/jemalloc_macros.h.in
+++ b/include/jemalloc/jemalloc_macros.h.in
@@ -36,32 +36,7 @@
 #  define JEMALLOC_CXX_THROW
 #endif
 
-#ifdef JEMALLOC_HAVE_ATTR
-#  define JEMALLOC_ATTR(s) __attribute__((s))
-#  define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s))
-#  ifdef JEMALLOC_HAVE_ATTR_ALLOC_SIZE
-#    define JEMALLOC_ALLOC_SIZE(s) JEMALLOC_ATTR(alloc_size(s))
-#    define JEMALLOC_ALLOC_SIZE2(s1, s2) JEMALLOC_ATTR(alloc_size(s1, s2))
-#  else
-#    define JEMALLOC_ALLOC_SIZE(s)
-#    define JEMALLOC_ALLOC_SIZE2(s1, s2)
-#  endif
-#  ifndef JEMALLOC_EXPORT
-#    define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default"))
-#  endif
-#  ifdef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF
-#    define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(gnu_printf, s, i))
-#  elif defined(JEMALLOC_HAVE_ATTR_FORMAT_PRINTF)
-#    define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(printf, s, i))
-#  else
-#    define JEMALLOC_FORMAT_PRINTF(s, i)
-#  endif
-#  define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline)
-#  define JEMALLOC_NOTHROW JEMALLOC_ATTR(nothrow)
-#  define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s))
-#  define JEMALLOC_RESTRICT_RETURN
-#  define JEMALLOC_ALLOCATOR
-#elif _MSC_VER
+#if _MSC_VER
 #  define JEMALLOC_ATTR(s)
 #  define JEMALLOC_ALIGNED(s) __declspec(align(s))
 #  define JEMALLOC_ALLOC_SIZE(s)
@@ -87,6 +62,31 @@
 #  else
 #    define JEMALLOC_ALLOCATOR
 #  endif
+#elif defined(JEMALLOC_HAVE_ATTR)
+#  define JEMALLOC_ATTR(s) __attribute__((s))
+#  define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s))
+#  ifdef JEMALLOC_HAVE_ATTR_ALLOC_SIZE
+#    define JEMALLOC_ALLOC_SIZE(s) JEMALLOC_ATTR(alloc_size(s))
+#    define JEMALLOC_ALLOC_SIZE2(s1, s2) JEMALLOC_ATTR(alloc_size(s1, s2))
+#  else
+#    define JEMALLOC_ALLOC_SIZE(s)
+#    define JEMALLOC_ALLOC_SIZE2(s1, s2)
+#  endif
+#  ifndef JEMALLOC_EXPORT
+#    define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default"))
+#  endif
+#  ifdef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF
+#    define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(gnu_printf, s, i))
+#  elif defined(JEMALLOC_HAVE_ATTR_FORMAT_PRINTF)
+#    define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(printf, s, i))
+#  else
+#    define JEMALLOC_FORMAT_PRINTF(s, i)
+#  endif
+#  define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline)
+#  define JEMALLOC_NOTHROW JEMALLOC_ATTR(nothrow)
+#  define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s))
+#  define JEMALLOC_RESTRICT_RETURN
+#  define JEMALLOC_ALLOCATOR
 #else
 #  define JEMALLOC_ATTR(s)
 #  define JEMALLOC_ALIGNED(s)

From f97298bfc1c6edbb4fd00820e9e028e8d213af73 Mon Sep 17 00:00:00 2001
From: Mike Hommey <mh@glandium.org>
Date: Thu, 3 Sep 2015 20:32:57 +0900
Subject: [PATCH 0057/2608] Remove arena_run_dalloc_decommit().

This resolves #284.
---
 src/arena.c | 25 ++-----------------------
 1 file changed, 2 insertions(+), 23 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 43733cc1..58797ded 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1660,18 +1660,6 @@ arena_run_size_get(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 	return (size);
 }
 
-static bool
-arena_run_decommit(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run)
-{
-	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
-	size_t run_ind = arena_miscelm_to_pageind(miscelm);
-	size_t offset = run_ind << LG_PAGE;
-	size_t length = arena_run_size_get(arena, chunk, run, run_ind);
-
-	return (arena->chunk_hooks.decommit(chunk, chunksize, offset, length,
-	    arena->ind));
-}
-
 static void
 arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned,
     bool decommitted)
@@ -1749,15 +1737,6 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned,
 		arena_maybe_purge(arena);
 }
 
-static void
-arena_run_dalloc_decommit(arena_t *arena, arena_chunk_t *chunk,
-    arena_run_t *run)
-{
-	bool committed = arena_run_decommit(arena, chunk, run);
-
-	arena_run_dalloc(arena, run, committed, false, !committed);
-}
-
 static void
 arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
     size_t oldsize, size_t newsize)
@@ -2441,7 +2420,7 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 	malloc_mutex_unlock(&bin->lock);
 	/******************************/
 	malloc_mutex_lock(&arena->lock);
-	arena_run_dalloc_decommit(arena, chunk, run);
+	arena_run_dalloc(arena, run, true, false, false);
 	malloc_mutex_unlock(&arena->lock);
 	/****************************/
 	malloc_mutex_lock(&bin->lock);
@@ -2584,7 +2563,7 @@ arena_dalloc_large_locked_impl(arena_t *arena, arena_chunk_t *chunk,
 		}
 	}
 
-	arena_run_dalloc_decommit(arena, chunk, run);
+	arena_run_dalloc(arena, run, true, false, false);
 }
 
 void

From bd418ce11efe908d0edfbe66d5af17e78582c377 Mon Sep 17 00:00:00 2001
From: Steve Dougherty <sdougherty@barracuda.com>
Date: Fri, 11 Sep 2015 17:56:28 -0400
Subject: [PATCH 0058/2608] Assert compact color bit is unused

Signed-off-by: Joshua Kahn <jkahn@barracuda.com>

This resolves #280.
---
 include/jemalloc/internal/rb.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/rb.h b/include/jemalloc/internal/rb.h
index 2ca8e593..b460d74b 100644
--- a/include/jemalloc/internal/rb.h
+++ b/include/jemalloc/internal/rb.h
@@ -79,6 +79,15 @@ struct {								\
     (a_node)->a_field.rbn_right_red = (a_type *) (((intptr_t)		\
       (a_node)->a_field.rbn_right_red) & ((ssize_t)-2));		\
 } while (0)
+
+/* Node initializer. */
+#define	rbt_node_new(a_type, a_field, a_rbt, a_node) do {		\
+    /* Bookkeeping bit cannot be used by node pointer. */		\
+    assert(((uintptr_t)(a_node) & 0x1) == 0);				\
+    rbtn_left_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil);	\
+    rbtn_right_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil);	\
+    rbtn_red_set(a_type, a_field, (a_node));				\
+} while (0)
 #else
 /* Right accessors. */
 #define	rbtn_right_get(a_type, a_field, a_node)				\
@@ -99,7 +108,6 @@ struct {								\
 #define	rbtn_black_set(a_type, a_field, a_node) do {			\
     (a_node)->a_field.rbn_red = false;					\
 } while (0)
-#endif
 
 /* Node initializer. */
 #define	rbt_node_new(a_type, a_field, a_rbt, a_node) do {		\
@@ -107,6 +115,7 @@ struct {								\
     rbtn_right_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil);	\
     rbtn_red_set(a_type, a_field, (a_node));				\
 } while (0)
+#endif
 
 /* Tree initializer. */
 #define	rb_new(a_type, a_field, a_rbt) do {				\

From 13b401553172942c3cc1d89c70fd965be71c1540 Mon Sep 17 00:00:00 2001
From: Joshua Kahn <jkahn@barracuda.com>
Date: Fri, 18 Sep 2015 16:58:17 -0400
Subject: [PATCH 0059/2608] Allow const keys for lookup

Signed-off-by: Steve Dougherty <sdougherty@barracuda.com>

This resolves #281.
---
 include/jemalloc/internal/arena.h |  4 ++--
 include/jemalloc/internal/rb.h    | 18 +++++++++---------
 src/arena.c                       |  7 ++++---
 src/extent.c                      |  4 ++--
 test/unit/rb.c                    |  2 +-
 5 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 12c61797..9e2375ce 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -512,7 +512,7 @@ arena_chunk_map_bits_t	*arena_bitselm_get(arena_chunk_t *chunk,
     size_t pageind);
 arena_chunk_map_misc_t	*arena_miscelm_get(arena_chunk_t *chunk,
     size_t pageind);
-size_t	arena_miscelm_to_pageind(arena_chunk_map_misc_t *miscelm);
+size_t	arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm);
 void	*arena_miscelm_to_rpages(arena_chunk_map_misc_t *miscelm);
 arena_chunk_map_misc_t	*arena_rd_to_miscelm(arena_runs_dirty_link_t *rd);
 arena_chunk_map_misc_t	*arena_run_to_miscelm(arena_run_t *run);
@@ -590,7 +590,7 @@ arena_miscelm_get(arena_chunk_t *chunk, size_t pageind)
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_miscelm_to_pageind(arena_chunk_map_misc_t *miscelm)
+arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm)
 {
 	arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm);
 	size_t pageind = ((uintptr_t)miscelm - ((uintptr_t)chunk +
diff --git a/include/jemalloc/internal/rb.h b/include/jemalloc/internal/rb.h
index b460d74b..7ddc383b 100644
--- a/include/jemalloc/internal/rb.h
+++ b/include/jemalloc/internal/rb.h
@@ -178,11 +178,11 @@ a_prefix##next(a_rbt_type *rbtree, a_type *node);			\
 a_attr a_type *								\
 a_prefix##prev(a_rbt_type *rbtree, a_type *node);			\
 a_attr a_type *								\
-a_prefix##search(a_rbt_type *rbtree, a_type *key);			\
+a_prefix##search(a_rbt_type *rbtree, const a_type *key);		\
 a_attr a_type *								\
-a_prefix##nsearch(a_rbt_type *rbtree, a_type *key);			\
+a_prefix##nsearch(a_rbt_type *rbtree, const a_type *key);		\
 a_attr a_type *								\
-a_prefix##psearch(a_rbt_type *rbtree, a_type *key);			\
+a_prefix##psearch(a_rbt_type *rbtree, const a_type *key);		\
 a_attr void								\
 a_prefix##insert(a_rbt_type *rbtree, a_type *node);			\
 a_attr void								\
@@ -263,7 +263,7 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start,		\
  *            last/first.
  *
  *   static ex_node_t *
- *   ex_search(ex_t *tree, ex_node_t *key);
+ *   ex_search(ex_t *tree, const ex_node_t *key);
  *       Description: Search for node that matches key.
  *       Args:
  *         tree: Pointer to an initialized red-black tree object.
@@ -271,9 +271,9 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start,		\
  *       Ret: Node in tree that matches key, or NULL if no match.
  *
  *   static ex_node_t *
- *   ex_nsearch(ex_t *tree, ex_node_t *key);
+ *   ex_nsearch(ex_t *tree, const ex_node_t *key);
  *   static ex_node_t *
- *   ex_psearch(ex_t *tree, ex_node_t *key);
+ *   ex_psearch(ex_t *tree, const ex_node_t *key);
  *       Description: Search for node that matches key.  If no match is found,
  *                    return what would be key's successor/predecessor, were
  *                    key in tree.
@@ -406,7 +406,7 @@ a_prefix##prev(a_rbt_type *rbtree, a_type *node) {			\
     return (ret);							\
 }									\
 a_attr a_type *								\
-a_prefix##search(a_rbt_type *rbtree, a_type *key) {			\
+a_prefix##search(a_rbt_type *rbtree, const a_type *key) {		\
     a_type *ret;							\
     int cmp;								\
     ret = rbtree->rbt_root;						\
@@ -424,7 +424,7 @@ a_prefix##search(a_rbt_type *rbtree, a_type *key) {			\
     return (ret);							\
 }									\
 a_attr a_type *								\
-a_prefix##nsearch(a_rbt_type *rbtree, a_type *key) {			\
+a_prefix##nsearch(a_rbt_type *rbtree, const a_type *key) {		\
     a_type *ret;							\
     a_type *tnode = rbtree->rbt_root;					\
     ret = &rbtree->rbt_nil;						\
@@ -446,7 +446,7 @@ a_prefix##nsearch(a_rbt_type *rbtree, a_type *key) {			\
     return (ret);							\
 }									\
 a_attr a_type *								\
-a_prefix##psearch(a_rbt_type *rbtree, a_type *key) {			\
+a_prefix##psearch(a_rbt_type *rbtree, const a_type *key) {		\
     a_type *ret;							\
     a_type *tnode = rbtree->rbt_root;					\
     ret = &rbtree->rbt_nil;						\
diff --git a/src/arena.c b/src/arena.c
index 58797ded..844d721c 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -62,7 +62,7 @@ arena_miscelm_key_size_get(const arena_chunk_map_misc_t *miscelm)
 }
 
 JEMALLOC_INLINE_C size_t
-arena_miscelm_size_get(arena_chunk_map_misc_t *miscelm)
+arena_miscelm_size_get(const arena_chunk_map_misc_t *miscelm)
 {
 	arena_chunk_t *chunk;
 	size_t pageind, mapbits;
@@ -76,7 +76,7 @@ arena_miscelm_size_get(arena_chunk_map_misc_t *miscelm)
 }
 
 JEMALLOC_INLINE_C int
-arena_run_comp(arena_chunk_map_misc_t *a, arena_chunk_map_misc_t *b)
+arena_run_comp(const arena_chunk_map_misc_t *a, const arena_chunk_map_misc_t *b)
 {
 	uintptr_t a_miscelm = (uintptr_t)a;
 	uintptr_t b_miscelm = (uintptr_t)b;
@@ -169,7 +169,8 @@ run_quantize_first(size_t size)
 }
 
 JEMALLOC_INLINE_C int
-arena_avail_comp(arena_chunk_map_misc_t *a, arena_chunk_map_misc_t *b)
+arena_avail_comp(const arena_chunk_map_misc_t *a,
+    const arena_chunk_map_misc_t *b)
 {
 	int ret;
 	uintptr_t a_miscelm = (uintptr_t)a;
diff --git a/src/extent.c b/src/extent.c
index 13f94411..9f5146e5 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -15,7 +15,7 @@ extent_quantize(size_t size)
 }
 
 JEMALLOC_INLINE_C int
-extent_szad_comp(extent_node_t *a, extent_node_t *b)
+extent_szad_comp(const extent_node_t *a, const extent_node_t *b)
 {
 	int ret;
 	size_t a_qsize = extent_quantize(extent_node_size_get(a));
@@ -41,7 +41,7 @@ rb_gen(, extent_tree_szad_, extent_tree_t, extent_node_t, szad_link,
     extent_szad_comp)
 
 JEMALLOC_INLINE_C int
-extent_ad_comp(extent_node_t *a, extent_node_t *b)
+extent_ad_comp(const extent_node_t *a, const extent_node_t *b)
 {
 	uintptr_t a_addr = (uintptr_t)extent_node_addr_get(a);
 	uintptr_t b_addr = (uintptr_t)extent_node_addr_get(b);
diff --git a/test/unit/rb.c b/test/unit/rb.c
index b38eb0e3..0262037b 100644
--- a/test/unit/rb.c
+++ b/test/unit/rb.c
@@ -21,7 +21,7 @@ struct node_s {
 };
 
 static int
-node_cmp(node_t *a, node_t *b) {
+node_cmp(const node_t *a, const node_t *b) {
 	int ret;
 
 	assert_u32_eq(a->magic, NODE_MAGIC, "Bad magic");

From e8ab0ab9c0e395d3c09398fa981704a9be968838 Mon Sep 17 00:00:00 2001
From: Joshua Kahn <jkahn@barracuda.com>
Date: Fri, 18 Sep 2015 15:58:39 -0400
Subject: [PATCH 0060/2608] Add function to destroy tree

ex_destroy iterates over the tree using post-order traversal so nodes
can be removed and processed by the callback function without paying the
cost to rebalance the tree. The destruction process cannot be stopped
once started.
---
 include/jemalloc/internal/rb.h | 41 +++++++++++++++++++++++++++++++++-
 1 file changed, 40 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/rb.h b/include/jemalloc/internal/rb.h
index 7ddc383b..30ccab44 100644
--- a/include/jemalloc/internal/rb.h
+++ b/include/jemalloc/internal/rb.h
@@ -192,7 +192,10 @@ a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)(	\
   a_rbt_type *, a_type *, void *), void *arg);				\
 a_attr a_type *								\
 a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start,		\
-  a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg);
+  a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg);		\
+a_attr void								\
+a_prefix##destroy(a_rbt_type *rbtree, void (*cb)(a_type *, void *),	\
+  void *arg);
 
 /*
  * The rb_gen() macro generates a type-specific red-black tree implementation,
@@ -321,6 +324,20 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start,		\
  *         arg  : Opaque pointer passed to cb().
  *       Ret: NULL if iteration completed, or the non-NULL callback return value
  *            that caused termination of the iteration.
+ *
+ *   static void
+ *   ex_destroy(ex_t *tree, void (*cb)(ex_node_t *, void *), void *arg);
+ *       Description: Iterate over the tree with post-order traversal, remove
+ *                    each node, and run the callback if non-null.  This is
+ *                    used for destroying a tree without paying the cost to
+ *                    rebalance it.  The tree must not be otherwise altered
+ *                    during traversal.
+ *       Args:
+ *         tree: Pointer to an initialized red-black tree object.
+ *         cb  : Callback function, which, if non-null, is called for each node
+ *               during iteration.  There is no way to stop iteration once it has
+ *               begun.
+ *         arg : Opaque pointer passed to cb().
  */
 #define	rb_gen(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp)	\
 a_attr void								\
@@ -985,6 +1002,28 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start,		\
 	ret = NULL;							\
     }									\
     return (ret);							\
+}									\
+a_attr void								\
+a_prefix##destroy_recurse(a_rbt_type *rbtree, a_type *node, void (*cb)(	\
+  a_type *, void *), void *arg) {					\
+    if (node == &rbtree->rbt_nil) {					\
+	return;								\
+    }									\
+    a_prefix##destroy_recurse(rbtree, rbtn_left_get(a_type, a_field,	\
+      node), cb, arg);							\
+    rbtn_left_set(a_type, a_field, (node), &rbtree->rbt_nil);		\
+    a_prefix##destroy_recurse(rbtree, rbtn_right_get(a_type, a_field,	\
+      node), cb, arg);							\
+    rbtn_right_set(a_type, a_field, (node), &rbtree->rbt_nil);		\
+    if (cb) {								\
+	cb(node, arg);							\
+    }									\
+}									\
+a_attr void								\
+a_prefix##destroy(a_rbt_type *rbtree, void (*cb)(a_type *, void *),	\
+  void *arg) {								\
+    a_prefix##destroy_recurse(rbtree, rbtree->rbt_root, cb, arg);	\
+    rbtree->rbt_root = &rbtree->rbt_nil;				\
 }
 
 #endif /* RB_H_ */

From 710ca112e31e8621177d08162f60158c27dd2974 Mon Sep 17 00:00:00 2001
From: Joshua Kahn <jkahn@barracuda.com>
Date: Mon, 21 Sep 2015 17:14:55 -0400
Subject: [PATCH 0061/2608] Add test for tree destruction

---
 test/unit/rb.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/test/unit/rb.c b/test/unit/rb.c
index 0262037b..14132c13 100644
--- a/test/unit/rb.c
+++ b/test/unit/rb.c
@@ -212,6 +212,15 @@ remove_reverse_iterate_cb(tree_t *tree, node_t *node, void *data)
 	return (ret);
 }
 
+static void
+destroy_cb(node_t *node, void *data)
+{
+	unsigned *nnodes = (unsigned *)data;
+
+	assert_u_gt(*nnodes, 0, "Destruction removed too many nodes");
+	(*nnodes)--;
+}
+
 TEST_BEGIN(test_rb_random)
 {
 #define	NNODES 25
@@ -278,7 +287,7 @@ TEST_BEGIN(test_rb_random)
 			}
 
 			/* Remove nodes. */
-			switch (i % 4) {
+			switch (i % 5) {
 			case 0:
 				for (k = 0; k < j; k++)
 					node_remove(&tree, &nodes[k], j - k);
@@ -314,6 +323,12 @@ TEST_BEGIN(test_rb_random)
 				assert_u_eq(nnodes, 0,
 				    "Removal terminated early");
 				break;
+			} case 4: {
+				unsigned nnodes = j;
+				tree_destroy(&tree, destroy_cb, &nnodes);
+				assert_u_eq(nnodes, 0,
+				    "Destruction terminated early");
+				break;
 			} default:
 				not_reached();
 			}

From f4a0f32d340985de477bbe329ecdaecd69ed1055 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 27 Oct 2015 15:12:10 -0700
Subject: [PATCH 0062/2608] Fast-path improvement: reduce # of branches and
 unnecessary operations.

- Combine multiple runtime branches into a single malloc_slow check.
- Avoid calling arena_choose / size2index / index2size on fast path.
- A few micro optimizations.
---
 include/jemalloc/internal/arena.h             |  63 +++---
 .../jemalloc/internal/jemalloc_internal.h.in  |  62 +++---
 include/jemalloc/internal/prof.h              |   6 +-
 include/jemalloc/internal/tcache.h            | 116 +++++++----
 src/arena.c                                   |  26 +--
 src/ckh.c                                     |  10 +-
 src/huge.c                                    |   6 +-
 src/jemalloc.c                                | 192 +++++++++++++-----
 src/prof.c                                    |  37 ++--
 src/quarantine.c                              |  20 +-
 src/tcache.c                                  |  33 +--
 11 files changed, 357 insertions(+), 214 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 9e2375ce..9715ad93 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -461,8 +461,10 @@ extern arena_dalloc_junk_small_t *arena_dalloc_junk_small;
 void	arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info);
 #endif
 void	arena_quarantine_junk_small(void *ptr, size_t usize);
-void	*arena_malloc_small(arena_t *arena, size_t size, bool zero);
-void	*arena_malloc_large(arena_t *arena, size_t size, bool zero);
+void	*arena_malloc_small(arena_t *arena, size_t size, szind_t ind,
+    bool zero);
+void	*arena_malloc_large(arena_t *arena, size_t size, szind_t ind,
+    bool zero);
 void	*arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache);
 void	arena_prof_promoted(const void *ptr, size_t size);
@@ -558,11 +560,11 @@ prof_tctx_t	*arena_prof_tctx_get(const void *ptr);
 void	arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx);
 void	arena_prof_tctx_reset(const void *ptr, size_t usize,
     const void *old_ptr, prof_tctx_t *old_tctx);
-void	*arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero,
-    tcache_t *tcache);
+void	*arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
+    bool zero, tcache_t *tcache, bool slow_path);
 arena_t	*arena_aalloc(const void *ptr);
 size_t	arena_salloc(const void *ptr, bool demote);
-void	arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache);
+void	arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path);
 void	arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache);
 #endif
 
@@ -1158,34 +1160,34 @@ arena_prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr,
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero,
-    tcache_t *tcache)
+arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
+    bool zero, tcache_t *tcache, bool slow_path)
 {
 
 	assert(size != 0);
 
+	if (likely(tcache != NULL)) {
+		if (likely(size <= SMALL_MAXCLASS)) {
+			return (tcache_alloc_small(tsd, arena, tcache, size,
+			    ind, zero, slow_path));
+		}
+		if (likely(size <= tcache_maxclass)) {
+			return (tcache_alloc_large(tsd, arena, tcache, size,
+			    ind, zero, slow_path));
+		}
+		/* (size > tcache_maxclass) case falls through. */
+		assert(size > tcache_maxclass);
+	}
+
 	arena = arena_choose(tsd, arena);
 	if (unlikely(arena == NULL))
 		return (NULL);
 
-	if (likely(size <= SMALL_MAXCLASS)) {
-		if (likely(tcache != NULL)) {
-			return (tcache_alloc_small(tsd, arena, tcache, size,
-			    zero));
-		} else
-			return (arena_malloc_small(arena, size, zero));
-	} else if (likely(size <= large_maxclass)) {
-		/*
-		 * Initialize tcache after checking size in order to avoid
-		 * infinite recursion during tcache initialization.
-		 */
-		if (likely(tcache != NULL) && size <= tcache_maxclass) {
-			return (tcache_alloc_large(tsd, arena, tcache, size,
-			    zero));
-		} else
-			return (arena_malloc_large(arena, size, zero));
-	} else
-		return (huge_malloc(tsd, arena, size, zero, tcache));
+	if (likely(size <= SMALL_MAXCLASS))
+		return (arena_malloc_small(arena, size, ind, zero));
+	if (likely(size <= large_maxclass))
+		return (arena_malloc_large(arena, size, ind, zero));
+	return (huge_malloc(tsd, arena, size, zero, tcache));
 }
 
 JEMALLOC_ALWAYS_INLINE arena_t *
@@ -1251,7 +1253,7 @@ arena_salloc(const void *ptr, bool demote)
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache)
+arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 {
 	arena_chunk_t *chunk;
 	size_t pageind, mapbits;
@@ -1268,7 +1270,8 @@ arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache)
 			if (likely(tcache != NULL)) {
 				szind_t binind = arena_ptr_small_binind_get(ptr,
 				    mapbits);
-				tcache_dalloc_small(tsd, tcache, ptr, binind);
+				tcache_dalloc_small(tsd, tcache, ptr, binind,
+				    slow_path);
 			} else {
 				arena_dalloc_small(extent_node_arena_get(
 				    &chunk->node), chunk, ptr, pageind);
@@ -1283,7 +1286,7 @@ arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache)
 			if (likely(tcache != NULL) && size - large_pad <=
 			    tcache_maxclass) {
 				tcache_dalloc_large(tsd, tcache, ptr, size -
-				    large_pad);
+				    large_pad, slow_path);
 			} else {
 				arena_dalloc_large(extent_node_arena_get(
 				    &chunk->node), chunk, ptr);
@@ -1319,7 +1322,7 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
 			/* Small allocation. */
 			if (likely(tcache != NULL)) {
 				szind_t binind = size2index(size);
-				tcache_dalloc_small(tsd, tcache, ptr, binind);
+				tcache_dalloc_small(tsd, tcache, ptr, binind, true);
 			} else {
 				size_t pageind = ((uintptr_t)ptr -
 				    (uintptr_t)chunk) >> LG_PAGE;
@@ -1331,7 +1334,7 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
 			    PAGE_MASK) == 0);
 
 			if (likely(tcache != NULL) && size <= tcache_maxclass)
-				tcache_dalloc_large(tsd, tcache, ptr, size);
+				tcache_dalloc_large(tsd, tcache, ptr, size, true);
 			else {
 				arena_dalloc_large(extent_node_arena_get(
 				    &chunk->node), chunk, ptr);
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 654cd088..d31da4ca 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -437,7 +437,7 @@ extern unsigned		ncpus;
  * index2size_tab encodes the same information as could be computed (at
  * unacceptable cost in some code paths) by index2size_compute().
  */
-extern size_t const	index2size_tab[NSIZES];
+extern size_t const	index2size_tab[NSIZES+1];
 /*
  * size2index_tab is a compact lookup table that rounds request sizes up to
  * size classes.  In order to reduce cache footprint, the table is compressed,
@@ -624,7 +624,7 @@ JEMALLOC_ALWAYS_INLINE size_t
 index2size(szind_t index)
 {
 
-	assert(index < NSIZES);
+	assert(index <= NSIZES);
 	return (index2size_lookup(index));
 }
 
@@ -823,12 +823,14 @@ arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing,
 #ifndef JEMALLOC_ENABLE_INLINE
 arena_t	*iaalloc(const void *ptr);
 size_t	isalloc(const void *ptr, bool demote);
-void	*iallocztm(tsd_t *tsd, size_t size, bool zero, tcache_t *tcache,
-    bool is_metadata, arena_t *arena);
-void	*imalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena);
-void	*imalloc(tsd_t *tsd, size_t size);
-void	*icalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena);
-void	*icalloc(tsd_t *tsd, size_t size);
+void	*iallocztm(tsd_t *tsd, size_t size, szind_t ind, bool zero,
+    tcache_t *tcache, bool is_metadata, arena_t *arena, bool slow_path);
+void	*imalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache,
+    arena_t *arena);
+void	*imalloc(tsd_t *tsd, size_t size, szind_t ind, bool slow_path);
+void	*icalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache,
+    arena_t *arena);
+void	*icalloc(tsd_t *tsd, size_t size, szind_t ind);
 void	*ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, bool is_metadata, arena_t *arena);
 void	*ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
@@ -837,10 +839,11 @@ void	*ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero);
 size_t	ivsalloc(const void *ptr, bool demote);
 size_t	u2rz(size_t usize);
 size_t	p2rz(const void *ptr);
-void	idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata);
+void	idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata,
+    bool slow_path);
 void	idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache);
 void	idalloc(tsd_t *tsd, void *ptr);
-void	iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache);
+void	iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path);
 void	isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache);
 void	isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache);
 void	*iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
@@ -881,14 +884,14 @@ isalloc(const void *ptr, bool demote)
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-iallocztm(tsd_t *tsd, size_t size, bool zero, tcache_t *tcache, bool is_metadata,
-    arena_t *arena)
+iallocztm(tsd_t *tsd, size_t size, szind_t ind, bool zero, tcache_t *tcache,
+    bool is_metadata, arena_t *arena, bool slow_path)
 {
 	void *ret;
 
 	assert(size != 0);
 
-	ret = arena_malloc(tsd, arena, size, zero, tcache);
+	ret = arena_malloc(tsd, arena, size, ind, zero, tcache, slow_path);
 	if (config_stats && is_metadata && likely(ret != NULL)) {
 		arena_metadata_allocated_add(iaalloc(ret), isalloc(ret,
 		    config_prof));
@@ -897,31 +900,33 @@ iallocztm(tsd_t *tsd, size_t size, bool zero, tcache_t *tcache, bool is_metadata
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-imalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena)
+imalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache, arena_t *arena)
 {
 
-	return (iallocztm(tsd, size, false, tcache, false, arena));
+	return (iallocztm(tsd, size, ind, false, tcache, false, arena, true));
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-imalloc(tsd_t *tsd, size_t size)
+imalloc(tsd_t *tsd, size_t size, szind_t ind, bool slow_path)
 {
 
-	return (iallocztm(tsd, size, false, tcache_get(tsd, true), false, NULL));
+	return (iallocztm(tsd, size, ind, false, tcache_get(tsd, true), false,
+	    NULL, slow_path));
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-icalloct(tsd_t *tsd, size_t size, tcache_t *tcache, arena_t *arena)
+icalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache, arena_t *arena)
 {
 
-	return (iallocztm(tsd, size, true, tcache, false, arena));
+	return (iallocztm(tsd, size, ind, true, tcache, false, arena, true));
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-icalloc(tsd_t *tsd, size_t size)
+icalloc(tsd_t *tsd, size_t size, szind_t ind)
 {
 
-	return (iallocztm(tsd, size, true, tcache_get(tsd, true), false, NULL));
+	return (iallocztm(tsd, size, ind, true, tcache_get(tsd, true), false,
+	    NULL, true));
 }
 
 JEMALLOC_ALWAYS_INLINE void *
@@ -997,7 +1002,8 @@ p2rz(const void *ptr)
 }
 
 JEMALLOC_ALWAYS_INLINE void
-idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata)
+idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata,
+    bool slow_path)
 {
 
 	assert(ptr != NULL);
@@ -1006,31 +1012,31 @@ idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata)
 		    config_prof));
 	}
 
-	arena_dalloc(tsd, ptr, tcache);
+	arena_dalloc(tsd, ptr, tcache, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void
 idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache)
 {
 
-	idalloctm(tsd, ptr, tcache, false);
+	idalloctm(tsd, ptr, tcache, false, true);
 }
 
 JEMALLOC_ALWAYS_INLINE void
 idalloc(tsd_t *tsd, void *ptr)
 {
 
-	idalloctm(tsd, ptr, tcache_get(tsd, false), false);
+	idalloctm(tsd, ptr, tcache_get(tsd, false), false, true);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache)
+iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 {
 
-	if (config_fill && unlikely(opt_quarantine))
+	if (slow_path && config_fill && unlikely(opt_quarantine))
 		quarantine(tsd, ptr);
 	else
-		idalloctm(tsd, ptr, tcache, false);
+		idalloctm(tsd, ptr, tcache, false, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void
diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h
index e5198c3e..a25502a9 100644
--- a/include/jemalloc/internal/prof.h
+++ b/include/jemalloc/internal/prof.h
@@ -436,16 +436,16 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
 	cassert(config_prof);
 
 	tdata = prof_tdata_get(tsd, true);
-	if ((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
+	if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX))
 		tdata = NULL;
 
 	if (tdata_out != NULL)
 		*tdata_out = tdata;
 
-	if (tdata == NULL)
+	if (unlikely(tdata == NULL))
 		return (true);
 
-	if (tdata->bytes_until_sample >= usize) {
+	if (likely(tdata->bytes_until_sample >= usize)) {
 		if (update)
 			tdata->bytes_until_sample -= usize;
 		return (true);
diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h
index 5079cd26..c2921405 100644
--- a/include/jemalloc/internal/tcache.h
+++ b/include/jemalloc/internal/tcache.h
@@ -70,6 +70,13 @@ struct tcache_bin_s {
 	int		low_water;	/* Min # cached since last GC. */
 	unsigned	lg_fill_div;	/* Fill (ncached_max >> lg_fill_div). */
 	unsigned	ncached;	/* # of cached objects. */
+	/*
+	 * To make use of adjacent cacheline prefetch, the items in the avail
+	 * stack goes to higher address for newer allocations.  avail points
+	 * just above the available space, which means that
+	 * avail[-ncached, ... 1] are available items and the lowest item will
+	 * be allocated first.
+	 */
 	void		**avail;	/* Stack of available objects. */
 };
 
@@ -126,7 +133,7 @@ extern tcaches_t	*tcaches;
 size_t	tcache_salloc(const void *ptr);
 void	tcache_event_hard(tsd_t *tsd, tcache_t *tcache);
 void	*tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
-    tcache_bin_t *tbin, szind_t binind);
+    tcache_bin_t *tbin, szind_t binind, bool *tcache_success);
 void	tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
     szind_t binind, unsigned rem);
 void	tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
@@ -155,15 +162,15 @@ void	tcache_flush(void);
 bool	tcache_enabled_get(void);
 tcache_t *tcache_get(tsd_t *tsd, bool create);
 void	tcache_enabled_set(bool enabled);
-void	*tcache_alloc_easy(tcache_bin_t *tbin);
+void	*tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success);
 void	*tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
-    size_t size, bool zero);
+    size_t size, szind_t ind, bool zero, bool slow_path);
 void	*tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
-    size_t size, bool zero);
+    size_t size, szind_t ind, bool zero, bool slow_path);
 void	tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr,
-    szind_t binind);
+    szind_t binind, bool slow_path);
 void	tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr,
-    size_t size);
+    size_t size, bool slow_path);
 tcache_t	*tcaches_get(tsd_t *tsd, unsigned ind);
 #endif
 
@@ -247,44 +254,69 @@ tcache_event(tsd_t *tsd, tcache_t *tcache)
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-tcache_alloc_easy(tcache_bin_t *tbin)
+tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success)
 {
 	void *ret;
 
 	if (unlikely(tbin->ncached == 0)) {
 		tbin->low_water = -1;
+		*tcache_success = false;
 		return (NULL);
 	}
+	/*
+	 * tcache_success (instead of ret) should be checked upon the return of
+	 * this function.  We avoid checking (ret == NULL) because there is
+	 * never a null stored on the avail stack (which is unknown to the
+	 * compiler), and eagerly checking ret would cause pipeline stall
+	 * (waiting for the cacheline).
+	 */
+	*tcache_success = true;
+	ret = *(tbin->avail - tbin->ncached);
 	tbin->ncached--;
+
 	if (unlikely((int)tbin->ncached < tbin->low_water))
 		tbin->low_water = tbin->ncached;
-	ret = tbin->avail[tbin->ncached];
+
 	return (ret);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
 tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
-    bool zero)
+    szind_t binind, bool zero, bool slow_path)
 {
 	void *ret;
-	szind_t binind;
-	size_t usize;
 	tcache_bin_t *tbin;
+	bool tcache_success;
+	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 
-	binind = size2index(size);
 	assert(binind < NBINS);
 	tbin = &tcache->tbins[binind];
-	usize = index2size(binind);
-	ret = tcache_alloc_easy(tbin);
-	if (unlikely(ret == NULL)) {
-		ret = tcache_alloc_small_hard(tsd, arena, tcache, tbin, binind);
-		if (ret == NULL)
+	ret = tcache_alloc_easy(tbin, &tcache_success);
+	assert(tcache_success == (ret != NULL));
+	if (unlikely(!tcache_success)) {
+		bool tcache_hard_success;
+		arena = arena_choose(tsd, arena);
+		if (unlikely(arena == NULL))
+			return (NULL);
+
+		ret = tcache_alloc_small_hard(tsd, arena, tcache, tbin, binind,
+			&tcache_hard_success);
+		if (tcache_hard_success == false)
 			return (NULL);
 	}
-	assert(tcache_salloc(ret) == usize);
+
+	assert(ret);
+	/*
+	 * Only compute usize if required.  The checks in the following if
+	 * statement are all static.
+	 */
+	if (config_prof || (slow_path && config_fill) || unlikely(zero)) {
+		usize = index2size(binind);
+		assert(tcache_salloc(ret) == usize);
+	}
 
 	if (likely(!zero)) {
-		if (config_fill) {
+		if (slow_path && config_fill) {
 			if (unlikely(opt_junk_alloc)) {
 				arena_alloc_junk_small(ret,
 				    &arena_bin_info[binind], false);
@@ -292,7 +324,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 				memset(ret, 0, usize);
 		}
 	} else {
-		if (config_fill && unlikely(opt_junk_alloc)) {
+		if (slow_path && config_fill && unlikely(opt_junk_alloc)) {
 			arena_alloc_junk_small(ret, &arena_bin_info[binind],
 			    true);
 		}
@@ -309,28 +341,38 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 
 JEMALLOC_ALWAYS_INLINE void *
 tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
-    bool zero)
+    szind_t binind, bool zero, bool slow_path)
 {
 	void *ret;
-	szind_t binind;
-	size_t usize;
 	tcache_bin_t *tbin;
+	bool tcache_success;
+	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 
-	binind = size2index(size);
-	usize = index2size(binind);
-	assert(usize <= tcache_maxclass);
 	assert(binind < nhbins);
 	tbin = &tcache->tbins[binind];
-	ret = tcache_alloc_easy(tbin);
-	if (unlikely(ret == NULL)) {
+	ret = tcache_alloc_easy(tbin, &tcache_success);
+	assert(tcache_success == (ret != NULL));
+	if (unlikely(!tcache_success)) {
 		/*
 		 * Only allocate one large object at a time, because it's quite
 		 * expensive to create one and not use it.
 		 */
-		ret = arena_malloc_large(arena, usize, zero);
+		arena = arena_choose(tsd, arena);
+		if (unlikely(arena == NULL))
+			return (NULL);
+
+		usize = index2size(binind);
+		assert(usize <= tcache_maxclass);
+		ret = arena_malloc_large(arena, usize, binind, zero);
 		if (ret == NULL)
 			return (NULL);
 	} else {
+		/* Only compute usize on demand */
+		if (config_prof || (slow_path && config_fill) || unlikely(zero)) {
+			usize = index2size(binind);
+			assert(usize <= tcache_maxclass);
+		}
+
 		if (config_prof && usize == LARGE_MINCLASS) {
 			arena_chunk_t *chunk =
 			    (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
@@ -340,7 +382,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 			    BININD_INVALID);
 		}
 		if (likely(!zero)) {
-			if (config_fill) {
+			if (slow_path && config_fill) {
 				if (unlikely(opt_junk_alloc))
 					memset(ret, 0xa5, usize);
 				else if (unlikely(opt_zero))
@@ -360,14 +402,15 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind)
+tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
+    bool slow_path)
 {
 	tcache_bin_t *tbin;
 	tcache_bin_info_t *tbin_info;
 
 	assert(tcache_salloc(ptr) <= SMALL_MAXCLASS);
 
-	if (config_fill && unlikely(opt_junk_free))
+	if (slow_path && config_fill && unlikely(opt_junk_free))
 		arena_dalloc_junk_small(ptr, &arena_bin_info[binind]);
 
 	tbin = &tcache->tbins[binind];
@@ -377,14 +420,15 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind)
 		    (tbin_info->ncached_max >> 1));
 	}
 	assert(tbin->ncached < tbin_info->ncached_max);
-	tbin->avail[tbin->ncached] = ptr;
 	tbin->ncached++;
+	*(tbin->avail - tbin->ncached) = ptr;
 
 	tcache_event(tsd, tcache);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size)
+tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size,
+    bool slow_path)
 {
 	szind_t binind;
 	tcache_bin_t *tbin;
@@ -396,7 +440,7 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size)
 
 	binind = size2index(size);
 
-	if (config_fill && unlikely(opt_junk_free))
+	if (slow_path && config_fill && unlikely(opt_junk_free))
 		arena_dalloc_junk_large(ptr, size);
 
 	tbin = &tcache->tbins[binind];
@@ -406,8 +450,8 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size)
 		    (tbin_info->ncached_max >> 1), tcache);
 	}
 	assert(tbin->ncached < tbin_info->ncached_max);
-	tbin->avail[tbin->ncached] = ptr;
 	tbin->ncached++;
+	*(tbin->avail - tbin->ncached) = ptr;
 
 	tcache_event(tsd, tcache);
 }
diff --git a/src/arena.c b/src/arena.c
index 844d721c..143afb9a 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1990,11 +1990,10 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, szind_t binind,
 			/*
 			 * OOM.  tbin->avail isn't yet filled down to its first
 			 * element, so the successful allocations (if any) must
-			 * be moved to the base of tbin->avail before bailing
-			 * out.
+			 * be moved just before tbin->avail before bailing out.
 			 */
 			if (i > 0) {
-				memmove(tbin->avail, &tbin->avail[nfill - i],
+				memmove(tbin->avail - i, tbin->avail - nfill,
 				    i * sizeof(void *));
 			}
 			break;
@@ -2004,7 +2003,7 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, szind_t binind,
 			    true);
 		}
 		/* Insert such that low regions get used first. */
-		tbin->avail[nfill - 1 - i] = ptr;
+		*(tbin->avail - nfill + i) = ptr;
 	}
 	if (config_stats) {
 		bin->stats.nmalloc += i;
@@ -2125,14 +2124,12 @@ arena_quarantine_junk_small(void *ptr, size_t usize)
 }
 
 void *
-arena_malloc_small(arena_t *arena, size_t size, bool zero)
+arena_malloc_small(arena_t *arena, size_t size, szind_t binind, bool zero)
 {
 	void *ret;
 	arena_bin_t *bin;
 	arena_run_t *run;
-	szind_t binind;
 
-	binind = size2index(size);
 	assert(binind < NBINS);
 	bin = &arena->bins[binind];
 	size = index2size(binind);
@@ -2179,7 +2176,7 @@ arena_malloc_small(arena_t *arena, size_t size, bool zero)
 }
 
 void *
-arena_malloc_large(arena_t *arena, size_t size, bool zero)
+arena_malloc_large(arena_t *arena, size_t size, szind_t binind, bool zero)
 {
 	void *ret;
 	size_t usize;
@@ -2189,7 +2186,7 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero)
 	UNUSED bool idump;
 
 	/* Large allocation. */
-	usize = s2u(size);
+	usize = index2size(binind);
 	malloc_mutex_lock(&arena->lock);
 	if (config_cache_oblivious) {
 		uint64_t r;
@@ -2214,7 +2211,7 @@ arena_malloc_large(arena_t *arena, size_t size, bool zero)
 	ret = (void *)((uintptr_t)arena_miscelm_to_rpages(miscelm) +
 	    random_offset);
 	if (config_stats) {
-		szind_t index = size2index(usize) - NBINS;
+		szind_t index = binind - NBINS;
 
 		arena->stats.nmalloc_large++;
 		arena->stats.nrequests_large++;
@@ -2336,7 +2333,8 @@ arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 	if (usize <= SMALL_MAXCLASS && (alignment < PAGE || (alignment == PAGE
 	    && (usize & PAGE_MASK) == 0))) {
 		/* Small; alignment doesn't require special run placement. */
-		ret = arena_malloc(tsd, arena, usize, zero, tcache);
+		ret = arena_malloc(tsd, arena, usize, size2index(usize), zero,
+		    tcache, true);
 	} else if (usize <= large_maxclass && alignment <= PAGE) {
 		/*
 		 * Large; alignment doesn't require special run placement.
@@ -2344,7 +2342,8 @@ arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 		 * the base of the run, so do some bit manipulation to retrieve
 		 * the base.
 		 */
-		ret = arena_malloc(tsd, arena, usize, zero, tcache);
+		ret = arena_malloc(tsd, arena, usize, size2index(usize), zero,
+		    tcache, true);
 		if (config_cache_oblivious)
 			ret = (void *)((uintptr_t)ret & ~PAGE_MASK);
 	} else {
@@ -2823,7 +2822,8 @@ arena_ralloc_move_helper(tsd_t *tsd, arena_t *arena, size_t usize,
 {
 
 	if (alignment == 0)
-		return (arena_malloc(tsd, arena, usize, zero, tcache));
+		return (arena_malloc(tsd, arena, usize, size2index(usize), zero,
+		    tcache, true));
 	usize = sa2u(usize, alignment);
 	if (usize == 0)
 		return (NULL);
diff --git a/src/ckh.c b/src/ckh.c
index 53a1c1ef..e4328d22 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -283,12 +283,12 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh)
 		ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
 
 		if (!ckh_rebuild(ckh, tab)) {
-			idalloctm(tsd, tab, tcache_get(tsd, false), true);
+			idalloctm(tsd, tab, tcache_get(tsd, false), true, true);
 			break;
 		}
 
 		/* Rebuilding failed, so back out partially rebuilt table. */
-		idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true);
+		idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true, true);
 		ckh->tab = tab;
 		ckh->lg_curbuckets = lg_prevbuckets;
 	}
@@ -330,7 +330,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh)
 	ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
 
 	if (!ckh_rebuild(ckh, tab)) {
-		idalloctm(tsd, tab, tcache_get(tsd, false), true);
+		idalloctm(tsd, tab, tcache_get(tsd, false), true, true);
 #ifdef CKH_COUNT
 		ckh->nshrinks++;
 #endif
@@ -338,7 +338,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh)
 	}
 
 	/* Rebuilding failed, so back out partially rebuilt table. */
-	idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true);
+	idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true, true);
 	ckh->tab = tab;
 	ckh->lg_curbuckets = lg_prevbuckets;
 #ifdef CKH_COUNT
@@ -421,7 +421,7 @@ ckh_delete(tsd_t *tsd, ckh_t *ckh)
 	    (unsigned long long)ckh->nrelocs);
 #endif
 
-	idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true);
+	idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true, true);
 	if (config_debug)
 		memset(ckh, 0x5a, sizeof(ckh_t));
 }
diff --git a/src/huge.c b/src/huge.c
index 1e9a6651..c1fa3795 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -75,7 +75,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment,
 	arena = arena_choose(tsd, arena);
 	if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(arena,
 	    size, alignment, &is_zeroed)) == NULL) {
-		idalloctm(tsd, node, tcache, true);
+		idalloctm(tsd, node, tcache, true, true);
 		return (NULL);
 	}
 
@@ -83,7 +83,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment,
 
 	if (huge_node_set(ret, node)) {
 		arena_chunk_dalloc_huge(arena, ret, size);
-		idalloctm(tsd, node, tcache, true);
+		idalloctm(tsd, node, tcache, true, true);
 		return (NULL);
 	}
 
@@ -372,7 +372,7 @@ huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache)
 	    extent_node_size_get(node));
 	arena_chunk_dalloc_huge(extent_node_arena_get(node),
 	    extent_node_addr_get(node), extent_node_size_get(node));
-	idalloctm(tsd, node, tcache, true);
+	idalloctm(tsd, node, tcache, true, true);
 }
 
 arena_t *
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 5a2d3240..eed6331d 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -70,12 +70,29 @@ typedef enum {
 } malloc_init_t;
 static malloc_init_t	malloc_init_state = malloc_init_uninitialized;
 
+/* 0 should be the common case.  Set to true to trigger initialization. */
+static bool	malloc_slow = true;
+
+/* When malloc_slow != 0, set the corresponding bits for sanity check. */
+enum {
+	flag_opt_junk_alloc	= (1U),
+	flag_opt_junk_free	= (1U << 1),
+	flag_opt_quarantine	= (1U << 2),
+	flag_opt_zero		= (1U << 3),
+	flag_opt_utrace		= (1U << 4),
+	flag_in_valgrind	= (1U << 5),
+	flag_opt_xmalloc	= (1U << 6)
+};
+static uint8_t	malloc_slow_flags;
+
+/* Last entry for overflow detection only.  */
 JEMALLOC_ALIGNED(CACHELINE)
-const size_t	index2size_tab[NSIZES] = {
+const size_t	index2size_tab[NSIZES+1] = {
 #define	SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \
 	((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta)),
 	SIZE_CLASSES
 #undef SC
+	ZU(0)
 };
 
 JEMALLOC_ALIGNED(CACHELINE)
@@ -309,14 +326,15 @@ a0ialloc(size_t size, bool zero, bool is_metadata)
 	if (unlikely(malloc_init_a0()))
 		return (NULL);
 
-	return (iallocztm(NULL, size, zero, false, is_metadata, a0get()));
+	return (iallocztm(NULL, size, size2index(size), zero, false,
+	    is_metadata, a0get(), true));
 }
 
 static void
 a0idalloc(void *ptr, bool is_metadata)
 {
 
-	idalloctm(NULL, ptr, false, is_metadata);
+	idalloctm(NULL, ptr, false, is_metadata, true);
 }
 
 void *
@@ -838,6 +856,26 @@ malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v,
 	    (int)vlen, v);
 }
 
+static void
+malloc_slow_flag_init(void)
+{
+	/*
+	 * Combine the runtime options into malloc_slow for fast path.  Called
+	 * after processing all the options.
+	 */
+	malloc_slow_flags |= (opt_junk_alloc ? flag_opt_junk_alloc : 0)
+	    | (opt_junk_free ? flag_opt_junk_free : 0)
+	    | (opt_quarantine ? flag_opt_quarantine : 0)
+	    | (opt_zero ? flag_opt_zero : 0)
+	    | (opt_utrace ? flag_opt_utrace : 0)
+	    | (opt_xmalloc ? flag_opt_xmalloc : 0);
+
+	if (config_valgrind)
+		malloc_slow_flags |= (in_valgrind ? flag_in_valgrind : 0);
+
+	malloc_slow = (malloc_slow_flags != 0);
+}
+
 static void
 malloc_conf_init(void)
 {
@@ -1304,6 +1342,8 @@ malloc_init_hard_finish(void)
 	arenas[0] = a0;
 
 	malloc_init_state = malloc_init_initialized;
+	malloc_slow_flag_init();
+
 	return (false);
 }
 
@@ -1355,34 +1395,36 @@ malloc_init_hard(void)
  */
 
 static void *
-imalloc_prof_sample(tsd_t *tsd, size_t usize, prof_tctx_t *tctx)
+imalloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind,
+    prof_tctx_t *tctx, bool slow_path)
 {
 	void *p;
 
 	if (tctx == NULL)
 		return (NULL);
 	if (usize <= SMALL_MAXCLASS) {
-		p = imalloc(tsd, LARGE_MINCLASS);
+		szind_t ind_large = size2index(LARGE_MINCLASS);
+		p = imalloc(tsd, LARGE_MINCLASS, ind_large, slow_path);
 		if (p == NULL)
 			return (NULL);
 		arena_prof_promoted(p, usize);
 	} else
-		p = imalloc(tsd, usize);
+		p = imalloc(tsd, usize, ind, slow_path);
 
 	return (p);
 }
 
 JEMALLOC_ALWAYS_INLINE_C void *
-imalloc_prof(tsd_t *tsd, size_t usize)
+imalloc_prof(tsd_t *tsd, size_t usize, szind_t ind, bool slow_path)
 {
 	void *p;
 	prof_tctx_t *tctx;
 
 	tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U))
-		p = imalloc_prof_sample(tsd, usize, tctx);
+		p = imalloc_prof_sample(tsd, usize, ind, tctx, slow_path);
 	else
-		p = imalloc(tsd, usize);
+		p = imalloc(tsd, usize, ind, slow_path);
 	if (unlikely(p == NULL)) {
 		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
@@ -1393,23 +1435,45 @@ imalloc_prof(tsd_t *tsd, size_t usize)
 }
 
 JEMALLOC_ALWAYS_INLINE_C void *
-imalloc_body(size_t size, tsd_t **tsd, size_t *usize)
+imalloc_body(size_t size, tsd_t **tsd, size_t *usize, bool slow_path)
 {
+	szind_t ind;
 
-	if (unlikely(malloc_init()))
+	if (slow_path && unlikely(malloc_init()))
 		return (NULL);
 	*tsd = tsd_fetch();
+	ind = size2index(size);
 
-	if (config_prof && opt_prof) {
-		*usize = s2u(size);
-		if (unlikely(*usize == 0))
-			return (NULL);
-		return (imalloc_prof(*tsd, *usize));
+	if (config_stats ||
+	    (config_prof && opt_prof) ||
+	    (slow_path && config_valgrind && unlikely(in_valgrind))) {
+		*usize = index2size(ind);
 	}
 
-	if (config_stats || (config_valgrind && unlikely(in_valgrind)))
-		*usize = s2u(size);
-	return (imalloc(*tsd, size));
+	if (config_prof && opt_prof) {
+		if (unlikely(*usize == 0))
+			return (NULL);
+		return (imalloc_prof(*tsd, *usize, ind, slow_path));
+	}
+
+	return (imalloc(*tsd, size, ind, slow_path));
+}
+
+JEMALLOC_ALWAYS_INLINE_C void
+imalloc_post_check(void *ret, tsd_t *tsd, size_t usize, bool slow_path)
+{
+	if (unlikely(ret == NULL)) {
+		if (slow_path && config_xmalloc && unlikely(opt_xmalloc)) {
+			malloc_write("<jemalloc>: Error in malloc(): "
+			    "out of memory\n");
+			abort();
+		}
+		set_errno(ENOMEM);
+	}
+	if (config_stats && likely(ret != NULL)) {
+		assert(usize == isalloc(ret, config_prof));
+		*tsd_thread_allocatedp_get(tsd) += usize;
+	}
 }
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
@@ -1424,21 +1488,20 @@ je_malloc(size_t size)
 	if (size == 0)
 		size = 1;
 
-	ret = imalloc_body(size, &tsd, &usize);
-	if (unlikely(ret == NULL)) {
-		if (config_xmalloc && unlikely(opt_xmalloc)) {
-			malloc_write("<jemalloc>: Error in malloc(): "
-			    "out of memory\n");
-			abort();
-		}
-		set_errno(ENOMEM);
+	if (likely(!malloc_slow)) {
+		/*
+		 * imalloc_body() is inlined so that fast and slow paths are
+		 * generated separately with statically known slow_path.
+		 */
+		ret = imalloc_body(size, &tsd, &usize, false);
+		imalloc_post_check(ret, tsd, usize, false);
+	} else {
+		ret = imalloc_body(size, &tsd, &usize, true);
+		imalloc_post_check(ret, tsd, usize, true);
+		UTRACE(0, size, ret);
+		JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, false);
 	}
-	if (config_stats && likely(ret != NULL)) {
-		assert(usize == isalloc(ret, config_prof));
-		*tsd_thread_allocatedp_get(tsd) += usize;
-	}
-	UTRACE(0, size, ret);
-	JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, false);
+
 	return (ret);
 }
 
@@ -1576,34 +1639,35 @@ je_aligned_alloc(size_t alignment, size_t size)
 }
 
 static void *
-icalloc_prof_sample(tsd_t *tsd, size_t usize, prof_tctx_t *tctx)
+icalloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind, prof_tctx_t *tctx)
 {
 	void *p;
 
 	if (tctx == NULL)
 		return (NULL);
 	if (usize <= SMALL_MAXCLASS) {
-		p = icalloc(tsd, LARGE_MINCLASS);
+		szind_t ind_large = size2index(LARGE_MINCLASS);
+		p = icalloc(tsd, LARGE_MINCLASS, ind_large);
 		if (p == NULL)
 			return (NULL);
 		arena_prof_promoted(p, usize);
 	} else
-		p = icalloc(tsd, usize);
+		p = icalloc(tsd, usize, ind);
 
 	return (p);
 }
 
 JEMALLOC_ALWAYS_INLINE_C void *
-icalloc_prof(tsd_t *tsd, size_t usize)
+icalloc_prof(tsd_t *tsd, size_t usize, szind_t ind)
 {
 	void *p;
 	prof_tctx_t *tctx;
 
 	tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U))
-		p = icalloc_prof_sample(tsd, usize, tctx);
+		p = icalloc_prof_sample(tsd, usize, ind, tctx);
 	else
-		p = icalloc(tsd, usize);
+		p = icalloc(tsd, usize, ind);
 	if (unlikely(p == NULL)) {
 		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
@@ -1621,6 +1685,7 @@ je_calloc(size_t num, size_t size)
 	void *ret;
 	tsd_t *tsd;
 	size_t num_size;
+	szind_t ind;
 	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 
 	if (unlikely(malloc_init())) {
@@ -1650,17 +1715,18 @@ je_calloc(size_t num, size_t size)
 		goto label_return;
 	}
 
+	ind = size2index(num_size);
 	if (config_prof && opt_prof) {
-		usize = s2u(num_size);
+		usize = index2size(ind);
 		if (unlikely(usize == 0)) {
 			ret = NULL;
 			goto label_return;
 		}
-		ret = icalloc_prof(tsd, usize);
+		ret = icalloc_prof(tsd, usize, ind);
 	} else {
 		if (config_stats || (config_valgrind && unlikely(in_valgrind)))
-			usize = s2u(num_size);
-		ret = icalloc(tsd, num_size);
+			usize = index2size(ind);
+		ret = icalloc(tsd, num_size, ind);
 	}
 
 label_return:
@@ -1725,7 +1791,7 @@ irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize)
 }
 
 JEMALLOC_INLINE_C void
-ifree(tsd_t *tsd, void *ptr, tcache_t *tcache)
+ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 {
 	size_t usize;
 	UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);
@@ -1740,10 +1806,15 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache)
 		usize = isalloc(ptr, config_prof);
 	if (config_stats)
 		*tsd_thread_deallocatedp_get(tsd) += usize;
-	if (config_valgrind && unlikely(in_valgrind))
-		rzsize = p2rz(ptr);
-	iqalloc(tsd, ptr, tcache);
-	JEMALLOC_VALGRIND_FREE(ptr, rzsize);
+
+	if (likely(!slow_path))
+		iqalloc(tsd, ptr, tcache, false);
+	else {
+		if (config_valgrind && unlikely(in_valgrind))
+			rzsize = p2rz(ptr);
+		iqalloc(tsd, ptr, tcache, true);
+		JEMALLOC_VALGRIND_FREE(ptr, rzsize);
+	}
 }
 
 JEMALLOC_INLINE_C void
@@ -1780,7 +1851,7 @@ je_realloc(void *ptr, size_t size)
 			/* realloc(ptr, 0) is equivalent to free(ptr). */
 			UTRACE(ptr, 0, 0);
 			tsd = tsd_fetch();
-			ifree(tsd, ptr, tcache_get(tsd, false));
+			ifree(tsd, ptr, tcache_get(tsd, false), true);
 			return (NULL);
 		}
 		size = 1;
@@ -1807,7 +1878,10 @@ je_realloc(void *ptr, size_t size)
 		}
 	} else {
 		/* realloc(NULL, size) is equivalent to malloc(size). */
-		ret = imalloc_body(size, &tsd, &usize);
+		if (likely(!malloc_slow))
+			ret = imalloc_body(size, &tsd, &usize, false);
+		else
+			ret = imalloc_body(size, &tsd, &usize, true);
 	}
 
 	if (unlikely(ret == NULL)) {
@@ -1836,7 +1910,10 @@ je_free(void *ptr)
 	UTRACE(ptr, 0, 0);
 	if (likely(ptr != NULL)) {
 		tsd_t *tsd = tsd_fetch();
-		ifree(tsd, ptr, tcache_get(tsd, false));
+		if (likely(!malloc_slow))
+			ifree(tsd, ptr, tcache_get(tsd, false), false);
+		else
+			ifree(tsd, ptr, tcache_get(tsd, false), true);
 	}
 }
 
@@ -1965,12 +2042,14 @@ JEMALLOC_ALWAYS_INLINE_C void *
 imallocx_flags(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, arena_t *arena)
 {
+	szind_t ind;
 
+	ind = size2index(usize);
 	if (unlikely(alignment != 0))
 		return (ipalloct(tsd, usize, alignment, zero, tcache, arena));
 	if (unlikely(zero))
-		return (icalloct(tsd, usize, tcache, arena));
-	return (imalloct(tsd, usize, tcache, arena));
+		return (icalloct(tsd, usize, ind, tcache, arena));
+	return (imalloct(tsd, usize, ind, tcache, arena));
 }
 
 static void *
@@ -2034,9 +2113,10 @@ imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize)
 	arena_t *arena;
 
 	if (likely(flags == 0)) {
+		szind_t ind = size2index(size);
 		if (config_stats || (config_valgrind && unlikely(in_valgrind)))
-			*usize = s2u(size);
-		return (imalloc(tsd, size));
+			*usize = index2size(ind);
+		return (imalloc(tsd, size, ind, true));
 	}
 
 	if (unlikely(imallocx_flags_decode_hard(tsd, size, flags, usize,
@@ -2375,7 +2455,7 @@ je_dallocx(void *ptr, int flags)
 		tcache = tcache_get(tsd, false);
 
 	UTRACE(ptr, 0, 0);
-	ifree(tsd_fetch(), ptr, tcache);
+	ifree(tsd_fetch(), ptr, tcache, true);
 }
 
 JEMALLOC_ALWAYS_INLINE_C size_t
diff --git a/src/prof.c b/src/prof.c
index 5d2b9598..199e63e4 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -551,9 +551,9 @@ prof_gctx_create(tsd_t *tsd, prof_bt_t *bt)
 	/*
 	 * Create a single allocation that has space for vec of length bt->len.
 	 */
-	prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsd, offsetof(prof_gctx_t,
-	    vec) + (bt->len * sizeof(void *)), false, tcache_get(tsd, true),
-	    true, NULL);
+	size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *));
+	prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsd, size,
+	    size2index(size), false, tcache_get(tsd, true), true, NULL, true);
 	if (gctx == NULL)
 		return (NULL);
 	gctx->lock = prof_gctx_mutex_choose();
@@ -594,7 +594,7 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
 		prof_leave(tsd, tdata_self);
 		/* Destroy gctx. */
 		malloc_mutex_unlock(gctx->lock);
-		idalloctm(tsd, gctx, tcache_get(tsd, false), true);
+		idalloctm(tsd, gctx, tcache_get(tsd, false), true, true);
 	} else {
 		/*
 		 * Compensate for increment in prof_tctx_destroy() or
@@ -701,7 +701,7 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx)
 		prof_tdata_destroy(tsd, tdata, false);
 
 	if (destroy_tctx)
-		idalloctm(tsd, tctx, tcache_get(tsd, false), true);
+		idalloctm(tsd, tctx, tcache_get(tsd, false), true, true);
 }
 
 static bool
@@ -730,7 +730,8 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
 		if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) {
 			/* OOM. */
 			prof_leave(tsd, tdata);
-			idalloctm(tsd, gctx.v, tcache_get(tsd, false), true);
+			idalloctm(tsd, gctx.v, tcache_get(tsd, false), true,
+			    true);
 			return (true);
 		}
 		new_gctx = true;
@@ -789,8 +790,9 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
 
 		/* Link a prof_tctx_t into gctx for this thread. */
 		tcache = tcache_get(tsd, true);
-		ret.v = iallocztm(tsd, sizeof(prof_tctx_t), false, tcache, true,
-		    NULL);
+		ret.v = iallocztm(tsd, sizeof(prof_tctx_t),
+		    size2index(sizeof(prof_tctx_t)), false, tcache, true, NULL,
+		    true);
 		if (ret.p == NULL) {
 			if (new_gctx)
 				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
@@ -810,7 +812,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
 		if (error) {
 			if (new_gctx)
 				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
-			idalloctm(tsd, ret.v, tcache, true);
+			idalloctm(tsd, ret.v, tcache, true, true);
 			return (NULL);
 		}
 		malloc_mutex_lock(gctx->lock);
@@ -1211,7 +1213,7 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs)
 					tctx_tree_remove(&gctx->tctxs,
 					    to_destroy);
 					idalloctm(tsd, to_destroy,
-					    tcache_get(tsd, false), true);
+					    tcache_get(tsd, false), true, true);
 				} else
 					next = NULL;
 			} while (next != NULL);
@@ -1714,8 +1716,8 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
 
 	/* Initialize an empty cache for this thread. */
 	tcache = tcache_get(tsd, true);
-	tdata = (prof_tdata_t *)iallocztm(tsd, sizeof(prof_tdata_t), false,
-	    tcache, true, NULL);
+	tdata = (prof_tdata_t *)iallocztm(tsd, sizeof(prof_tdata_t),
+	    size2index(sizeof(prof_tdata_t)), false, tcache, true, NULL, true);
 	if (tdata == NULL)
 		return (NULL);
 
@@ -1729,7 +1731,7 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
 
 	if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS,
 	    prof_bt_hash, prof_bt_keycomp)) {
-		idalloctm(tsd, tdata, tcache, true);
+		idalloctm(tsd, tdata, tcache, true, true);
 		return (NULL);
 	}
 
@@ -1784,9 +1786,9 @@ prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
 
 	tcache = tcache_get(tsd, false);
 	if (tdata->thread_name != NULL)
-		idalloctm(tsd, tdata->thread_name, tcache, true);
+		idalloctm(tsd, tdata->thread_name, tcache, true, true);
 	ckh_delete(tsd, &tdata->bt2tctx);
-	idalloctm(tsd, tdata, tcache, true);
+	idalloctm(tsd, tdata, tcache, true, true);
 }
 
 static void
@@ -1947,7 +1949,8 @@ prof_thread_name_alloc(tsd_t *tsd, const char *thread_name)
 	if (size == 1)
 		return ("");
 
-	ret = iallocztm(tsd, size, false, tcache_get(tsd, true), true, NULL);
+	ret = iallocztm(tsd, size, size2index(size), false, tcache_get(tsd,
+	    true), true, NULL, true);
 	if (ret == NULL)
 		return (NULL);
 	memcpy(ret, thread_name, size);
@@ -1980,7 +1983,7 @@ prof_thread_name_set(tsd_t *tsd, const char *thread_name)
 
 	if (tdata->thread_name != NULL) {
 		idalloctm(tsd, tdata->thread_name, tcache_get(tsd, false),
-		    true);
+		    true, true);
 		tdata->thread_name = NULL;
 	}
 	if (strlen(s) > 0)
diff --git a/src/quarantine.c b/src/quarantine.c
index 6c43dfca..ff8801cb 100644
--- a/src/quarantine.c
+++ b/src/quarantine.c
@@ -23,12 +23,14 @@ static quarantine_t *
 quarantine_init(tsd_t *tsd, size_t lg_maxobjs)
 {
 	quarantine_t *quarantine;
+	size_t size;
 
 	assert(tsd_nominal(tsd));
 
-	quarantine = (quarantine_t *)iallocztm(tsd, offsetof(quarantine_t, objs)
-	    + ((ZU(1) << lg_maxobjs) * sizeof(quarantine_obj_t)), false,
-	    tcache_get(tsd, true), true, NULL);
+	size = offsetof(quarantine_t, objs) + ((ZU(1) << lg_maxobjs) *
+	    sizeof(quarantine_obj_t));
+	quarantine = (quarantine_t *)iallocztm(tsd, size, size2index(size),
+	    false, tcache_get(tsd, true), true, NULL, true);
 	if (quarantine == NULL)
 		return (NULL);
 	quarantine->curbytes = 0;
@@ -55,7 +57,7 @@ quarantine_alloc_hook_work(tsd_t *tsd)
 	if (tsd_quarantine_get(tsd) == NULL)
 		tsd_quarantine_set(tsd, quarantine);
 	else
-		idalloctm(tsd, quarantine, tcache_get(tsd, false), true);
+		idalloctm(tsd, quarantine, tcache_get(tsd, false), true, true);
 }
 
 static quarantine_t *
@@ -87,7 +89,7 @@ quarantine_grow(tsd_t *tsd, quarantine_t *quarantine)
 		memcpy(&ret->objs[ncopy_a], quarantine->objs, ncopy_b *
 		    sizeof(quarantine_obj_t));
 	}
-	idalloctm(tsd, quarantine, tcache_get(tsd, false), true);
+	idalloctm(tsd, quarantine, tcache_get(tsd, false), true, true);
 
 	tsd_quarantine_set(tsd, ret);
 	return (ret);
@@ -98,7 +100,7 @@ quarantine_drain_one(tsd_t *tsd, quarantine_t *quarantine)
 {
 	quarantine_obj_t *obj = &quarantine->objs[quarantine->first];
 	assert(obj->usize == isalloc(obj->ptr, config_prof));
-	idalloctm(tsd, obj->ptr, NULL, false);
+	idalloctm(tsd, obj->ptr, NULL, false, true);
 	quarantine->curbytes -= obj->usize;
 	quarantine->curobjs--;
 	quarantine->first = (quarantine->first + 1) & ((ZU(1) <<
@@ -123,7 +125,7 @@ quarantine(tsd_t *tsd, void *ptr)
 	assert(opt_quarantine);
 
 	if ((quarantine = tsd_quarantine_get(tsd)) == NULL) {
-		idalloctm(tsd, ptr, NULL, false);
+		idalloctm(tsd, ptr, NULL, false, true);
 		return;
 	}
 	/*
@@ -162,7 +164,7 @@ quarantine(tsd_t *tsd, void *ptr)
 		}
 	} else {
 		assert(quarantine->curbytes == 0);
-		idalloctm(tsd, ptr, NULL, false);
+		idalloctm(tsd, ptr, NULL, false, true);
 	}
 }
 
@@ -177,7 +179,7 @@ quarantine_cleanup(tsd_t *tsd)
 	quarantine = tsd_quarantine_get(tsd);
 	if (quarantine != NULL) {
 		quarantine_drain(tsd, quarantine, 0);
-		idalloctm(tsd, quarantine, tcache_get(tsd, false), true);
+		idalloctm(tsd, quarantine, tcache_get(tsd, false), true, true);
 		tsd_quarantine_set(tsd, NULL);
 	}
 }
diff --git a/src/tcache.c b/src/tcache.c
index fdafd0c6..78c62300 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -72,7 +72,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache)
 
 void *
 tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
-    tcache_bin_t *tbin, szind_t binind)
+    tcache_bin_t *tbin, szind_t binind, bool *tcache_success)
 {
 	void *ret;
 
@@ -80,7 +80,7 @@ tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
 	    tcache->prof_accumbytes : 0);
 	if (config_prof)
 		tcache->prof_accumbytes = 0;
-	ret = tcache_alloc_easy(tbin);
+	ret = tcache_alloc_easy(tbin, tcache_success);
 
 	return (ret);
 }
@@ -102,7 +102,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
 		/* Lock the arena bin associated with the first object. */
 		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
-		    tbin->avail[0]);
+		    *(tbin->avail - 1));
 		arena_t *bin_arena = extent_node_arena_get(&chunk->node);
 		arena_bin_t *bin = &bin_arena->bins[binind];
 
@@ -122,7 +122,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 		}
 		ndeferred = 0;
 		for (i = 0; i < nflush; i++) {
-			ptr = tbin->avail[i];
+			ptr = *(tbin->avail - 1 - i);
 			assert(ptr != NULL);
 			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 			if (extent_node_arena_get(&chunk->node) == bin_arena) {
@@ -139,7 +139,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 				 * locked.  Stash the object, so that it can be
 				 * handled in a future pass.
 				 */
-				tbin->avail[ndeferred] = ptr;
+				*(tbin->avail - 1 - ndeferred) = ptr;
 				ndeferred++;
 			}
 		}
@@ -158,8 +158,8 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 		malloc_mutex_unlock(&bin->lock);
 	}
 
-	memmove(tbin->avail, &tbin->avail[tbin->ncached - rem],
-	    rem * sizeof(void *));
+	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
+	    sizeof(void *));
 	tbin->ncached = rem;
 	if ((int)tbin->ncached < tbin->low_water)
 		tbin->low_water = tbin->ncached;
@@ -182,7 +182,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
 		/* Lock the arena associated with the first object. */
 		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
-		    tbin->avail[0]);
+		    *(tbin->avail - 1));
 		arena_t *locked_arena = extent_node_arena_get(&chunk->node);
 		UNUSED bool idump;
 
@@ -206,7 +206,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 		}
 		ndeferred = 0;
 		for (i = 0; i < nflush; i++) {
-			ptr = tbin->avail[i];
+			ptr = *(tbin->avail - 1 - i);
 			assert(ptr != NULL);
 			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 			if (extent_node_arena_get(&chunk->node) ==
@@ -220,7 +220,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 				 * Stash the object, so that it can be handled
 				 * in a future pass.
 				 */
-				tbin->avail[ndeferred] = ptr;
+				*(tbin->avail - 1 - ndeferred) = ptr;
 				ndeferred++;
 			}
 		}
@@ -241,8 +241,8 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 		malloc_mutex_unlock(&arena->lock);
 	}
 
-	memmove(tbin->avail, &tbin->avail[tbin->ncached - rem],
-	    rem * sizeof(void *));
+	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
+	    sizeof(void *));
 	tbin->ncached = rem;
 	if ((int)tbin->ncached < tbin->low_water)
 		tbin->low_water = tbin->ncached;
@@ -333,9 +333,14 @@ tcache_create(tsd_t *tsd, arena_t *arena)
 	assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
 	for (i = 0; i < nhbins; i++) {
 		tcache->tbins[i].lg_fill_div = 1;
+		stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
+		/*
+		 * avail points past the available space.  Allocations will
+		 * access the slots toward higher addresses (for the benefit of
+		 * prefetch).
+		 */
 		tcache->tbins[i].avail = (void **)((uintptr_t)tcache +
 		    (uintptr_t)stack_offset);
-		stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
 	}
 
 	return (tcache);
@@ -379,7 +384,7 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache)
 	    arena_prof_accum(arena, tcache->prof_accumbytes))
 		prof_idump();
 
-	idalloctm(tsd, tcache, false, true);
+	idalloctm(tsd, tcache, false, true, true);
 }
 
 void

From ea59ebf4d3c2a5749e170cc45c294e04129e5b49 Mon Sep 17 00:00:00 2001
From: Dmitry-Me <wipedout@yandex.ru>
Date: Thu, 12 Nov 2015 14:59:29 +0300
Subject: [PATCH 0063/2608] Reuse previously computed value

---
 src/zone.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/zone.c b/src/zone.c
index 12e1734a..6859b3fe 100644
--- a/src/zone.c
+++ b/src/zone.c
@@ -121,9 +121,11 @@ zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size)
 static void
 zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size)
 {
+	size_t alloc_size;
 
-	if (ivsalloc(ptr, config_prof) != 0) {
-		assert(ivsalloc(ptr, config_prof) == size);
+	alloc_size = ivsalloc(ptr, config_prof);
+	if (alloc_size != 0) {
+		assert(alloc_size == size);
 		je_free(ptr);
 		return;
 	}

From a6ec1c869e1abe3eb70616d19d3e553339449636 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Thu, 12 Nov 2015 10:51:32 -0800
Subject: [PATCH 0064/2608] Fix a comment.

---
 include/jemalloc/internal/tcache.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h
index c2921405..aa73060a 100644
--- a/include/jemalloc/internal/tcache.h
+++ b/include/jemalloc/internal/tcache.h
@@ -74,7 +74,7 @@ struct tcache_bin_s {
 	 * To make use of adjacent cacheline prefetch, the items in the avail
 	 * stack goes to higher address for newer allocations.  avail points
 	 * just above the available space, which means that
-	 * avail[-ncached, ... 1] are available items and the lowest item will
+	 * avail[-ncached, ... -1] are available items and the lowest item will
 	 * be allocated first.
 	 */
 	void		**avail;	/* Stack of available objects. */

From f9e3459f751b08b3c2108fda7462827cf8a4f2af Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Thu, 12 Nov 2015 11:06:41 -0800
Subject: [PATCH 0065/2608] Tweak code to allow compilation of concatenated
 src/*.c sources.

This resolves #294.
---
 include/jemalloc/internal/assert.h | 45 ++++++++++++++++++++++++++++++
 include/jemalloc/internal/util.h   | 44 +----------------------------
 src/ctl.c                          |  6 ++--
 src/util.c                         | 13 +++++++++
 4 files changed, 62 insertions(+), 46 deletions(-)
 create mode 100644 include/jemalloc/internal/assert.h

diff --git a/include/jemalloc/internal/assert.h b/include/jemalloc/internal/assert.h
new file mode 100644
index 00000000..6f8f7eb9
--- /dev/null
+++ b/include/jemalloc/internal/assert.h
@@ -0,0 +1,45 @@
+/*
+ * Define a custom assert() in order to reduce the chances of deadlock during
+ * assertion failure.
+ */
+#ifndef assert
+#define	assert(e) do {							\
+	if (unlikely(config_debug && !(e))) {				\
+		malloc_printf(						\
+		    "<jemalloc>: %s:%d: Failed assertion: \"%s\"\n",	\
+		    __FILE__, __LINE__, #e);				\
+		abort();						\
+	}								\
+} while (0)
+#endif
+
+#ifndef not_reached
+#define	not_reached() do {						\
+	if (config_debug) {						\
+		malloc_printf(						\
+		    "<jemalloc>: %s:%d: Unreachable code reached\n",	\
+		    __FILE__, __LINE__);				\
+		abort();						\
+	}								\
+	unreachable();							\
+} while (0)
+#endif
+
+#ifndef not_implemented
+#define	not_implemented() do {						\
+	if (config_debug) {						\
+		malloc_printf("<jemalloc>: %s:%d: Not implemented\n",	\
+		    __FILE__, __LINE__);				\
+		abort();						\
+	}								\
+} while (0)
+#endif
+
+#ifndef assert_not_implemented
+#define	assert_not_implemented(e) do {					\
+	if (unlikely(config_debug && !(e)))				\
+		not_implemented();					\
+} while (0)
+#endif
+
+
diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index b2ea740f..0bccea24 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -81,49 +81,7 @@
 #	define unreachable()
 #endif
 
-/*
- * Define a custom assert() in order to reduce the chances of deadlock during
- * assertion failure.
- */
-#ifndef assert
-#define	assert(e) do {							\
-	if (unlikely(config_debug && !(e))) {				\
-		malloc_printf(						\
-		    "<jemalloc>: %s:%d: Failed assertion: \"%s\"\n",	\
-		    __FILE__, __LINE__, #e);				\
-		abort();						\
-	}								\
-} while (0)
-#endif
-
-#ifndef not_reached
-#define	not_reached() do {						\
-	if (config_debug) {						\
-		malloc_printf(						\
-		    "<jemalloc>: %s:%d: Unreachable code reached\n",	\
-		    __FILE__, __LINE__);				\
-		abort();						\
-	}								\
-	unreachable();							\
-} while (0)
-#endif
-
-#ifndef not_implemented
-#define	not_implemented() do {						\
-	if (config_debug) {						\
-		malloc_printf("<jemalloc>: %s:%d: Not implemented\n",	\
-		    __FILE__, __LINE__);				\
-		abort();						\
-	}								\
-} while (0)
-#endif
-
-#ifndef assert_not_implemented
-#define	assert_not_implemented(e) do {					\
-	if (unlikely(config_debug && !(e)))				\
-		not_implemented();					\
-} while (0)
-#endif
+#include "jemalloc/internal/assert.h"
 
 /* Use to assert a particular configuration, e.g., cassert(config_debug). */
 #define	cassert(c) do {							\
diff --git a/src/ctl.c b/src/ctl.c
index 3de8e602..db1ddcb5 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -115,7 +115,7 @@ CTL_PROTO(tcache_create)
 CTL_PROTO(tcache_flush)
 CTL_PROTO(tcache_destroy)
 CTL_PROTO(arena_i_purge)
-static void	arena_purge(unsigned arena_ind);
+static void	arena_i_purge(unsigned arena_ind);
 CTL_PROTO(arena_i_dss)
 CTL_PROTO(arena_i_lg_dirty_mult)
 CTL_PROTO(arena_i_chunk_hooks)
@@ -1538,7 +1538,7 @@ label_return:
 
 /* ctl_mutex must be held during execution of this function. */
 static void
-arena_purge(unsigned arena_ind)
+arena_i_purge(unsigned arena_ind)
 {
 	tsd_t *tsd;
 	unsigned i;
@@ -1576,7 +1576,7 @@ arena_i_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	READONLY();
 	WRITEONLY();
 	malloc_mutex_lock(&ctl_mtx);
-	arena_purge(mib[1]);
+	arena_i_purge(mib[1]);
 	malloc_mutex_unlock(&ctl_mtx);
 
 	ret = 0;
diff --git a/src/util.c b/src/util.c
index 4cb0d6c1..1373ee15 100644
--- a/src/util.c
+++ b/src/util.c
@@ -1,3 +1,7 @@
+/*
+ * Define simple versions of assertion macros that won't recurse in case
+ * of assertion failures in malloc_*printf().
+ */
 #define	assert(e) do {							\
 	if (config_debug && !(e)) {					\
 		malloc_write("<jemalloc>: Failed assertion\n");		\
@@ -648,3 +652,12 @@ malloc_printf(const char *format, ...)
 	malloc_vcprintf(NULL, NULL, format, ap);
 	va_end(ap);
 }
+
+/*
+ * Restore normal assertion macros, in order to make it possible to compile all
+ * C files as a single concatenation.
+ */
+#undef assert
+#undef not_reached
+#undef not_implemented
+#include "jemalloc/internal/assert.h"

From 3a92319ddc5610b755f755cbbbd12791ca9d0c3d Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Thu, 12 Nov 2015 11:23:39 -0800
Subject: [PATCH 0066/2608] Use AC_CONFIG_AUX_DIR([build-aux]).

This resolves #293.
---
 config.guess => build-aux/config.guess | 0
 config.sub => build-aux/config.sub     | 0
 install-sh => build-aux/install-sh     | 0
 configure.ac                           | 2 ++
 4 files changed, 2 insertions(+)
 rename config.guess => build-aux/config.guess (100%)
 rename config.sub => build-aux/config.sub (100%)
 rename install-sh => build-aux/install-sh (100%)

diff --git a/config.guess b/build-aux/config.guess
similarity index 100%
rename from config.guess
rename to build-aux/config.guess
diff --git a/config.sub b/build-aux/config.sub
similarity index 100%
rename from config.sub
rename to build-aux/config.sub
diff --git a/install-sh b/build-aux/install-sh
similarity index 100%
rename from install-sh
rename to build-aux/install-sh
diff --git a/configure.ac b/configure.ac
index 7a1290e0..5a3eba27 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,6 +1,8 @@
 dnl Process this file with autoconf to produce a configure script.
 AC_INIT([Makefile.in])
 
+AC_CONFIG_AUX_DIR([build-aux])
+
 dnl ============================================================================
 dnl Custom macro definitions.
 

From 43de1b3ebc928fa0884422ccd0a2e9cd233d1059 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Mon, 14 Dec 2015 11:42:08 -0800
Subject: [PATCH 0067/2608] Implement --retain and --exclude in jeprof.

These options make it possible to filter symbolized backtrace frames
using regular expressions.
---
 bin/jeprof.in | 50 ++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 48 insertions(+), 2 deletions(-)

diff --git a/bin/jeprof.in b/bin/jeprof.in
index d00ef5db..444041ec 100644
--- a/bin/jeprof.in
+++ b/bin/jeprof.in
@@ -223,12 +223,14 @@ Call-graph Options:
    --nodefraction=<f>  Hide nodes below <f>*total [default=.005]
    --edgefraction=<f>  Hide edges below <f>*total [default=.001]
    --maxdegree=<n>     Max incoming/outgoing edges per node [default=8]
-   --focus=<regexp>    Focus on nodes matching <regexp>
+   --focus=<regexp>    Focus on backtraces with nodes matching <regexp>
    --thread=<n>        Show profile for thread <n>
-   --ignore=<regexp>   Ignore nodes matching <regexp>
+   --ignore=<regexp>   Ignore backtraces with nodes matching <regexp>
    --scale=<n>         Set GV scaling [default=0]
    --heapcheck         Make nodes with non-0 object counts
                        (i.e. direct leak generators) more visible
+   --retain=<regexp>   Retain only nodes that match <regexp>
+   --exclude=<regexp>  Exclude all nodes that match <regexp>
 
 Miscellaneous:
    --tools=<prefix or binary:fullpath>[,...]   \$PATH for object tool pathnames
@@ -339,6 +341,8 @@ sub Init() {
   $main::opt_ignore = '';
   $main::opt_scale = 0;
   $main::opt_heapcheck = 0;
+  $main::opt_retain = '';
+  $main::opt_exclude = '';
   $main::opt_seconds = 30;
   $main::opt_lib = "";
 
@@ -410,6 +414,8 @@ sub Init() {
              "ignore=s"       => \$main::opt_ignore,
              "scale=i"        => \$main::opt_scale,
              "heapcheck"      => \$main::opt_heapcheck,
+             "retain=s"       => \$main::opt_retain,
+             "exclude=s"      => \$main::opt_exclude,
              "inuse_space!"   => \$main::opt_inuse_space,
              "inuse_objects!" => \$main::opt_inuse_objects,
              "alloc_space!"   => \$main::opt_alloc_space,
@@ -2840,6 +2846,43 @@ sub ExtractCalls {
   return $calls;
 }
 
+sub FilterFrames {
+  my $symbols = shift;
+  my $profile = shift;
+
+  if ($main::opt_retain eq '' && $main::opt_exclude eq '') {
+    return $profile;
+  }
+
+  my $result = {};
+  foreach my $k (keys(%{$profile})) {
+    my $count = $profile->{$k};
+    my @addrs = split(/\n/, $k);
+    my @path = ();
+    foreach my $a (@addrs) {
+      my $sym;
+      if (exists($symbols->{$a})) {
+        $sym = $symbols->{$a}->[0];
+      } else {
+        $sym = $a;
+      }
+      if ($main::opt_retain ne '' && $sym !~ m/$main::opt_retain/) {
+        next;
+      }
+      if ($main::opt_exclude ne '' && $sym =~ m/$main::opt_exclude/) {
+        next;
+      }
+      push(@path, $a);
+    }
+    if (scalar(@path) > 0) {
+      my $reduced_path = join("\n", @path);
+      AddEntry($result, $reduced_path, $count);
+    }
+  }
+
+  return $result;
+}
+
 sub RemoveUninterestingFrames {
   my $symbols = shift;
   my $profile = shift;
@@ -2984,6 +3027,9 @@ sub RemoveUninterestingFrames {
     my $reduced_path = join("\n", @path);
     AddEntry($result, $reduced_path, $count);
   }
+
+  $result = FilterFrames($symbols, $result);
+
   return $result;
 }
 

From 9cb481a73f6d2b518f695a669c1f850e477fdd2c Mon Sep 17 00:00:00 2001
From: Cosmin Paraschiv <cparaschiv@ixiacom.com>
Date: Mon, 11 Jan 2016 11:05:00 -0800
Subject: [PATCH 0068/2608] Call malloc_test_boot0() from
 malloc_init_hard_recursible().

When using LinuxThreads, malloc bootstrapping deadlocks, since
malloc_tsd_boot0() ends up calling pthread_setspecific(), which causes
recursive allocation.  Fix it by moving the malloc_tsd_boot0() call to
malloc_init_hard_recursible().

The deadlock was introduced by 8bb3198f72fc7587dc93527f9f19fb5be52fa553
(Refactor/fix arenas manipulation.), when tsd_boot() was split and the
top half, tsd_boot0(), got an extra tsd_wrapper_set() call.
---
 src/jemalloc.c | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index eed6331d..fab0eb05 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1276,26 +1276,37 @@ malloc_init_hard_a0(void)
  *
  * init_lock must be held.
  */
-static void
+static bool
 malloc_init_hard_recursible(void)
 {
+	bool ret = false;
 
 	malloc_init_state = malloc_init_recursible;
 	malloc_mutex_unlock(&init_lock);
 
+	/* LinuxThreads' pthread_setspecific() allocates. */
+	if (malloc_tsd_boot0()) {
+		ret = true;
+		goto label_return;
+	}
+
 	ncpus = malloc_ncpus();
 
 #if (!defined(JEMALLOC_MUTEX_INIT_CB) && !defined(JEMALLOC_ZONE) \
     && !defined(_WIN32) && !defined(__native_client__))
-	/* LinuxThreads's pthread_atfork() allocates. */
+	/* LinuxThreads' pthread_atfork() allocates. */
 	if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent,
 	    jemalloc_postfork_child) != 0) {
+		ret = true;
 		malloc_write("<jemalloc>: Error in pthread_atfork()\n");
 		if (opt_abort)
 			abort();
 	}
 #endif
+
+label_return:
 	malloc_mutex_lock(&init_lock);
+	return (ret);
 }
 
 /* init_lock must be held. */
@@ -1365,16 +1376,16 @@ malloc_init_hard(void)
 		malloc_mutex_unlock(&init_lock);
 		return (true);
 	}
-	if (malloc_tsd_boot0()) {
-		malloc_mutex_unlock(&init_lock);
-		return (true);
-	}
-	if (config_prof && prof_boot2()) {
+
+	if (malloc_init_hard_recursible()) {
 		malloc_mutex_unlock(&init_lock);
 		return (true);
 	}
 
-	malloc_init_hard_recursible();
+	if (config_prof && prof_boot2()) {
+		malloc_mutex_unlock(&init_lock);
+		return (true);
+	}
 
 	if (malloc_init_hard_finish()) {
 		malloc_mutex_unlock(&init_lock);

From fdbb950495b1f3e000a816f921ed1d97ca4953cb Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Tue, 12 Jan 2016 14:47:00 -0800
Subject: [PATCH 0069/2608] Don't discard curl options if timeout is not
 defined.

Merge of https://github.com/gperftools/gperftools/commit/5078abdb331e63d7a216994f186eb736861f8df7
---
 bin/jeprof.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/jeprof.in b/bin/jeprof.in
index 444041ec..a2402f40 100644
--- a/bin/jeprof.in
+++ b/bin/jeprof.in
@@ -3339,7 +3339,7 @@ sub ResolveRedirectionForCurl {
 # Add a timeout flat to URL_FETCHER.  Returns a new list.
 sub AddFetchTimeout {
   my $timeout = shift;
-  my @fetcher = shift;
+  my @fetcher = @_;
   if (defined($timeout)) {
     if (join(" ", @fetcher) =~ m/\bcurl -s/) {
       push(@fetcher, "--max-time", sprintf("%d", $timeout));

From f459d5a2034e733eab74cc9b029dfec2ff13b196 Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Tue, 12 Jan 2016 14:48:09 -0800
Subject: [PATCH 0070/2608] Detect failed profile fetches

Summary:

Currently an HTTP error response will still try to be parsed, resulting in these messages:

substr outside of string at /home/davejwatson/local/jemalloc-github/bin/jeprof line 3635, <PROFILE> line 1.
Use of uninitialized value in string eq at /home/davejwatson/local/jemalloc-github/bin/jeprof line 3635, <PROFILE> line 1.
substr outside of string at /home/davejwatson/local/jemalloc-github/bin/jeprof line 3637, <PROFILE> line 1.
Use of uninitialized value in string eq at /home/davejwatson/local/jemalloc-github/bin/jeprof line 3637, <PROFILE> line 1.
/home/davejwatson/jeprof/server.1452638936.localhost.pprof.heap: header size >= 2**16

After this fix, curl will return an error status code that will be correctly checked at line 3536, resulting in this error message:

Failed to get profile: curl -s --fail 'http://localhost:4010/pprof/heap' > /home/davejwatson/jeprof/.tmp.server.1452639085.localhost.pprof.heap: No such file or directory

Test Plan:

Tested with MALLOC_CONF="prof:false".  Also tested fetching symbols.  Didn't test redirects, but this should only affect http error codes >= 400
---
 bin/jeprof.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/jeprof.in b/bin/jeprof.in
index a2402f40..dfd9195e 100644
--- a/bin/jeprof.in
+++ b/bin/jeprof.in
@@ -95,7 +95,7 @@ my @EVINCE = ("evince");    # could also be xpdf or perhaps acroread
 my @KCACHEGRIND = ("kcachegrind");
 my @PS2PDF = ("ps2pdf");
 # These are used for dynamic profiles
-my @URL_FETCHER = ("curl", "-s");
+my @URL_FETCHER = ("curl", "-s", "--fail");
 
 # These are the web pages that servers need to support for dynamic profiles
 my $HEAP_PAGE = "/pprof/heap";

From d1acd1bea9bc2735b53ac68fa98891cab8c71d02 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Fri, 29 Jan 2016 19:59:06 -0800
Subject: [PATCH 0071/2608] Pass retain and exclude parameters to
 /pprof/symbol.

Pass the retain and exclude parameters to the /pprof/symbol pprof server
endpoint so that the server has the opportunity to optimize which
symbols it looks up and/or returns mappings for.
---
 bin/jeprof.in | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/bin/jeprof.in b/bin/jeprof.in
index dfd9195e..42087fce 100644
--- a/bin/jeprof.in
+++ b/bin/jeprof.in
@@ -3385,6 +3385,27 @@ sub ReadSymbols {
   return $map;
 }
 
+sub URLEncode {
+  my $str = shift;
+  $str =~ s/([^A-Za-z0-9\-_.!~*'()])/ sprintf "%%%02x", ord $1 /eg;
+  return $str;
+}
+
+sub AppendSymbolFilterParams {
+  my $url = shift;
+  my @params = ();
+  if ($main::opt_retain ne '') {
+    push(@params, sprintf("retain=%s", URLEncode($main::opt_retain)));
+  }
+  if ($main::opt_exclude ne '') {
+    push(@params, sprintf("exclude=%s", URLEncode($main::opt_exclude)));
+  }
+  if (scalar @params > 0) {
+    $url = sprintf("%s?%s", $url, join("&", @params));
+  }
+  return $url;
+}
+
 # Fetches and processes symbols to prepare them for use in the profile output
 # code.  If the optional 'symbol_map' arg is not given, fetches symbols from
 # $SYMBOL_PAGE for all PC values found in profile.  Otherwise, the raw symbols
@@ -3409,9 +3430,11 @@ sub FetchSymbols {
     my $command_line;
     if (join(" ", @URL_FETCHER) =~ m/\bcurl -s/) {
       $url = ResolveRedirectionForCurl($url);
+      $url = AppendSymbolFilterParams($url);
       $command_line = ShellEscape(@URL_FETCHER, "-d", "\@$main::tmpfile_sym",
                                   $url);
     } else {
+      $url = AppendSymbolFilterParams($url);
       $command_line = (ShellEscape(@URL_FETCHER, "--post", $url)
                        . " < " . ShellEscape($main::tmpfile_sym));
     }

From 109712b681cd146cb88b17d743fe53314c962144 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Fri, 19 Feb 2016 12:08:14 -0800
Subject: [PATCH 0072/2608] Fix a documentation editing error.

---
 doc/jemalloc.xml.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 26a5e142..519ba3fa 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1467,7 +1467,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         <listitem><para>Flush the specified thread-specific cache (tcache).  The
         same considerations apply to this interface as to <link
         linkend="thread.tcache.flush"><mallctl>thread.tcache.flush</mallctl></link>,
-        except that the tcache will never be automatically be discarded.
+        except that the tcache will never be automatically discarded.
         </para></listitem>
       </varlistentry>
 

From b24f74b8624fb43f9b562a067ca14563f45cb553 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Wed, 17 Feb 2016 06:40:33 -0800
Subject: [PATCH 0073/2608] Don't rely on unpurged chunks in xallocx() test.

---
 test/integration/xallocx.c | 40 +++++++++++++++++++-------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/test/integration/xallocx.c b/test/integration/xallocx.c
index 00451961..5c4998b6 100644
--- a/test/integration/xallocx.c
+++ b/test/integration/xallocx.c
@@ -305,63 +305,63 @@ TEST_END
 TEST_BEGIN(test_extra_huge)
 {
 	int flags = MALLOCX_ARENA(arena_ind());
-	size_t largemax, huge0, huge1, huge2, hugemax;
+	size_t largemax, huge1, huge2, huge3, hugemax;
 	void *p;
 
 	/* Get size classes. */
 	largemax = get_large_size(get_nlarge()-1);
-	huge0 = get_huge_size(0);
 	huge1 = get_huge_size(1);
 	huge2 = get_huge_size(2);
+	huge3 = get_huge_size(3);
 	hugemax = get_huge_size(get_nhuge()-1);
 
-	p = mallocx(huge2, flags);
+	p = mallocx(huge3, flags);
 	assert_ptr_not_null(p, "Unexpected mallocx() error");
 
-	assert_zu_eq(xallocx(p, huge2, 0, flags), huge2,
+	assert_zu_eq(xallocx(p, huge3, 0, flags), huge3,
 	    "Unexpected xallocx() behavior");
 	/* Test size decrease with zero extra. */
-	assert_zu_ge(xallocx(p, huge0, 0, flags), huge0,
+	assert_zu_ge(xallocx(p, huge1, 0, flags), huge1,
 	    "Unexpected xallocx() behavior");
-	assert_zu_ge(xallocx(p, largemax, 0, flags), huge0,
+	assert_zu_ge(xallocx(p, largemax, 0, flags), huge1,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_eq(xallocx(p, huge2, 0, flags), huge2,
+	assert_zu_eq(xallocx(p, huge3, 0, flags), huge3,
 	    "Unexpected xallocx() behavior");
 	/* Test size decrease with non-zero extra. */
-	assert_zu_eq(xallocx(p, huge0, huge2 - huge0, flags), huge2,
+	assert_zu_eq(xallocx(p, huge1, huge3 - huge1, flags), huge3,
+	    "Unexpected xallocx() behavior");
+	assert_zu_eq(xallocx(p, huge2, huge3 - huge2, flags), huge3,
 	    "Unexpected xallocx() behavior");
 	assert_zu_eq(xallocx(p, huge1, huge2 - huge1, flags), huge2,
 	    "Unexpected xallocx() behavior");
-	assert_zu_eq(xallocx(p, huge0, huge1 - huge0, flags), huge1,
-	    "Unexpected xallocx() behavior");
-	assert_zu_ge(xallocx(p, largemax, huge0 - largemax, flags), huge0,
+	assert_zu_ge(xallocx(p, largemax, huge1 - largemax, flags), huge1,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_ge(xallocx(p, huge0, 0, flags), huge0,
+	assert_zu_ge(xallocx(p, huge1, 0, flags), huge1,
 	    "Unexpected xallocx() behavior");
 	/* Test size increase with zero extra. */
-	assert_zu_le(xallocx(p, huge2, 0, flags), huge2,
+	assert_zu_le(xallocx(p, huge3, 0, flags), huge3,
 	    "Unexpected xallocx() behavior");
-	assert_zu_le(xallocx(p, hugemax+1, 0, flags), huge2,
+	assert_zu_le(xallocx(p, hugemax+1, 0, flags), huge3,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_ge(xallocx(p, huge0, 0, flags), huge0,
+	assert_zu_ge(xallocx(p, huge1, 0, flags), huge1,
 	    "Unexpected xallocx() behavior");
 	/* Test size increase with non-zero extra. */
-	assert_zu_le(xallocx(p, huge0, SIZE_T_MAX - huge0, flags), hugemax,
+	assert_zu_le(xallocx(p, huge1, SIZE_T_MAX - huge1, flags), hugemax,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_ge(xallocx(p, huge0, 0, flags), huge0,
+	assert_zu_ge(xallocx(p, huge1, 0, flags), huge1,
 	    "Unexpected xallocx() behavior");
 	/* Test size increase with non-zero extra. */
-	assert_zu_le(xallocx(p, huge0, huge2 - huge0, flags), huge2,
+	assert_zu_le(xallocx(p, huge1, huge3 - huge1, flags), huge3,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_eq(xallocx(p, huge2, 0, flags), huge2,
+	assert_zu_eq(xallocx(p, huge3, 0, flags), huge3,
 	    "Unexpected xallocx() behavior");
 	/* Test size+extra overflow. */
-	assert_zu_le(xallocx(p, huge2, hugemax - huge2 + 1, flags), hugemax,
+	assert_zu_le(xallocx(p, huge3, hugemax - huge3 + 1, flags), hugemax,
 	    "Unexpected xallocx() behavior");
 
 	dallocx(p, flags);

From 49931bd8ffeffde72865990d74b7ff65d6e8c466 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 19 Feb 2016 18:24:30 -0800
Subject: [PATCH 0074/2608] Fix test_stats_arenas_summary fragility.

Fix test_stats_arenas_summary to deallocate before asserting that
purging must have happened.
---
 test/unit/stats.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/unit/stats.c b/test/unit/stats.c
index 8e4bc631..6e803160 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -93,6 +93,10 @@ TEST_BEGIN(test_stats_arenas_summary)
 	huge = mallocx(chunksize, 0);
 	assert_ptr_not_null(huge, "Unexpected mallocx() failure");
 
+	dallocx(little, 0);
+	dallocx(large, 0);
+	dallocx(huge, 0);
+
 	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
@@ -116,10 +120,6 @@ TEST_BEGIN(test_stats_arenas_summary)
 		assert_u64_le(nmadvise, purged,
 		    "nmadvise should be no greater than purged");
 	}
-
-	dallocx(little, 0);
-	dallocx(large, 0);
-	dallocx(huge, 0);
 }
 TEST_END
 

From ef349f3f944b9b40bdeeff6cc322ef753f1ad4be Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 19 Feb 2016 18:29:43 -0800
Subject: [PATCH 0075/2608] Fix arena_sdalloc() line wrapping.

---
 include/jemalloc/internal/arena.h | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 9715ad93..b6824896 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -1306,7 +1306,8 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
 		if (config_prof && opt_prof) {
 			size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >>
 			    LG_PAGE;
-			assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
+			assert(arena_mapbits_allocated_get(chunk, pageind) !=
+			    0);
 			if (arena_mapbits_large_get(chunk, pageind) != 0) {
 				/*
 				 * Make sure to use promoted size, not request
@@ -1322,7 +1323,8 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
 			/* Small allocation. */
 			if (likely(tcache != NULL)) {
 				szind_t binind = size2index(size);
-				tcache_dalloc_small(tsd, tcache, ptr, binind, true);
+				tcache_dalloc_small(tsd, tcache, ptr, binind,
+				    true);
 			} else {
 				size_t pageind = ((uintptr_t)ptr -
 				    (uintptr_t)chunk) >> LG_PAGE;
@@ -1333,9 +1335,10 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
 			assert(config_cache_oblivious || ((uintptr_t)ptr &
 			    PAGE_MASK) == 0);
 
-			if (likely(tcache != NULL) && size <= tcache_maxclass)
-				tcache_dalloc_large(tsd, tcache, ptr, size, true);
-			else {
+			if (likely(tcache != NULL) && size <= tcache_maxclass) {
+				tcache_dalloc_large(tsd, tcache, ptr, size,
+				    true);
+			} else {
 				arena_dalloc_large(extent_node_arena_get(
 				    &chunk->node), chunk, ptr);
 			}

From f829009929bdce17bef8a963264a92e39271a166 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Sun, 7 Feb 2016 14:23:22 -0800
Subject: [PATCH 0076/2608] Add --with-malloc-conf.

Add --with-malloc-conf, which makes it possible to embed a default
options string during configuration.
---
 INSTALL                                       |  8 ++++
 configure.ac                                  | 10 +++++
 doc/jemalloc.xml.in                           | 28 ++++++++++----
 .../jemalloc/internal/jemalloc_internal.h.in  |  1 +
 .../internal/jemalloc_internal_defs.h.in      |  3 ++
 src/ctl.c                                     | 37 ++++++++++---------
 src/jemalloc.c                                |  9 +++--
 src/stats.c                                   |  2 +
 test/unit/mallctl.c                           | 33 +++++++++--------
 9 files changed, 87 insertions(+), 44 deletions(-)

diff --git a/INSTALL b/INSTALL
index 8d396874..5c25054a 100644
--- a/INSTALL
+++ b/INSTALL
@@ -84,6 +84,14 @@ any of the following arguments (not a definitive list) to 'configure':
     versions of jemalloc can coexist in the same installation directory.  For
     example, libjemalloc.so.0 becomes libjemalloc<suffix>.so.0.
 
+--with-malloc-conf=<malloc_conf>
+    Embed <malloc_conf> as a run-time options string that is processed prior to
+    the malloc_conf global variable, the /etc/malloc.conf symlink, and the
+    MALLOC_CONF environment variable.  For example, to change the default chunk
+    size to 256 KiB:
+
+      --with-malloc-conf=lg_chunk:18
+
 --disable-cc-silence
     Disable code that silences non-useful compiler warnings.  This is mainly
     useful during development when auditing the set of warnings that are being
diff --git a/configure.ac b/configure.ac
index 5a3eba27..5232c8f3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -577,6 +577,15 @@ AC_ARG_WITH([install_suffix],
 install_suffix="$INSTALL_SUFFIX"
 AC_SUBST([install_suffix])
 
+dnl Specify default malloc_conf.
+AC_ARG_WITH([malloc_conf],
+  [AS_HELP_STRING([--with-malloc-conf=<malloc_conf>], [config.malloc_conf options string])],
+  [JEMALLOC_CONFIG_MALLOC_CONF="$with_malloc_conf"],
+  [JEMALLOC_CONFIG_MALLOC_CONF=""]
+)
+config_malloc_conf="$JEMALLOC_CONFIG_MALLOC_CONF"
+AC_DEFINE_UNQUOTED([JEMALLOC_CONFIG_MALLOC_CONF], ["$config_malloc_conf"])
+
 dnl Substitute @je_@ in jemalloc_protos.h.in, primarily to make generation of
 dnl jemalloc_protos_jet.h easy.
 je_="je_"
@@ -1726,6 +1735,7 @@ AC_MSG_RESULT([JEMALLOC_PREFIX    : ${JEMALLOC_PREFIX}])
 AC_MSG_RESULT([JEMALLOC_PRIVATE_NAMESPACE])
 AC_MSG_RESULT([                   : ${JEMALLOC_PRIVATE_NAMESPACE}])
 AC_MSG_RESULT([install_suffix     : ${install_suffix}])
+AC_MSG_RESULT([malloc_conf        : ${config_malloc_conf}])
 AC_MSG_RESULT([autogen            : ${enable_autogen}])
 AC_MSG_RESULT([cc-silence         : ${enable_cc_silence}])
 AC_MSG_RESULT([debug              : ${enable_debug}])
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 519ba3fa..48765b01 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -455,19 +455,20 @@ for (i = 0; i < nbins; i++) {
     routines, the allocator initializes its internals based in part on various
     options that can be specified at compile- or run-time.</para>
 
-    <para>The string pointed to by the global variable
-    <varname>malloc_conf</varname>, the &ldquo;name&rdquo; of the file
-    referenced by the symbolic link named <filename
-    class="symlink">/etc/malloc.conf</filename>, and the value of the
+    <para>The string specified via <option>--with-malloc-conf</option>, the
+    string pointed to by the global variable <varname>malloc_conf</varname>, the
+    &ldquo;name&rdquo; of the file referenced by the symbolic link named
+    <filename class="symlink">/etc/malloc.conf</filename>, and the value of the
     environment variable <envar>MALLOC_CONF</envar>, will be interpreted, in
     that order, from left to right as options.  Note that
     <varname>malloc_conf</varname> may be read before
     <function>main<parameter/></function> is entered, so the declaration of
     <varname>malloc_conf</varname> should specify an initializer that contains
-    the final value to be read by jemalloc.  <varname>malloc_conf</varname> is
-    a compile-time setting, whereas <filename
-    class="symlink">/etc/malloc.conf</filename> and <envar>MALLOC_CONF</envar>
-    can be safely set any time prior to program invocation.</para>
+    the final value to be read by jemalloc.  <option>--with-malloc-conf</option>
+    and <varname>malloc_conf</varname> are compile-time mechanisms, whereas
+    <filename class="symlink">/etc/malloc.conf</filename> and
+    <envar>MALLOC_CONF</envar> can be safely set any time prior to program
+    invocation.</para>
 
     <para>An options string is a comma-separated list of option:value pairs.
     There is one key corresponding to each <link
@@ -776,6 +777,17 @@ for (i = 0; i < nbins; i++) {
         during build configuration.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="config.malloc_conf">
+        <term>
+          <mallctl>config.malloc_conf</mallctl>
+          (<type>const char *</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Embedded configure-time-specified run-time options
+        string, empty unless <option>--with-malloc-conf</option> was specified
+        during build configuration.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="config.munmap">
         <term>
           <mallctl>config.munmap</mallctl>
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index d31da4ca..8c507f79 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -49,6 +49,7 @@ static const bool config_lazy_lock =
     false
 #endif
     ;
+static const char * const config_malloc_conf = JEMALLOC_CONFIG_MALLOC_CONF;
 static const bool config_prof =
 #ifdef JEMALLOC_PROF
     true
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index b0f8caaf..c84e27c9 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -259,4 +259,7 @@
  */
 #undef JEMALLOC_EXPORT
 
+/* config.malloc_conf options string. */
+#undef JEMALLOC_CONFIG_MALLOC_CONF
+
 #endif /* JEMALLOC_INTERNAL_DEFS_H_ */
diff --git a/src/ctl.c b/src/ctl.c
index db1ddcb5..9618d632 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -77,6 +77,7 @@ CTL_PROTO(config_cache_oblivious)
 CTL_PROTO(config_debug)
 CTL_PROTO(config_fill)
 CTL_PROTO(config_lazy_lock)
+CTL_PROTO(config_malloc_conf)
 CTL_PROTO(config_munmap)
 CTL_PROTO(config_prof)
 CTL_PROTO(config_prof_libgcc)
@@ -241,6 +242,7 @@ static const ctl_named_node_t	config_node[] = {
 	{NAME("debug"),		CTL(config_debug)},
 	{NAME("fill"),		CTL(config_fill)},
 	{NAME("lazy_lock"),	CTL(config_lazy_lock)},
+	{NAME("malloc_conf"),	CTL(config_malloc_conf)},
 	{NAME("munmap"),	CTL(config_munmap)},
 	{NAME("prof"),		CTL(config_prof)},
 	{NAME("prof_libgcc"),	CTL(config_prof_libgcc)},
@@ -1199,17 +1201,17 @@ label_return:								\
 	return (ret);							\
 }
 
-#define	CTL_RO_BOOL_CONFIG_GEN(n)					\
+#define	CTL_RO_CONFIG_GEN(n, t)						\
 static int								\
 n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
     void *newp, size_t newlen)						\
 {									\
 	int ret;							\
-	bool oldval;							\
+	t oldval;							\
 									\
 	READONLY();							\
 	oldval = n;							\
-	READ(oldval, bool);						\
+	READ(oldval, t);						\
 									\
 	ret = 0;							\
 label_return:								\
@@ -1241,20 +1243,21 @@ label_return:
 
 /******************************************************************************/
 
-CTL_RO_BOOL_CONFIG_GEN(config_cache_oblivious)
-CTL_RO_BOOL_CONFIG_GEN(config_debug)
-CTL_RO_BOOL_CONFIG_GEN(config_fill)
-CTL_RO_BOOL_CONFIG_GEN(config_lazy_lock)
-CTL_RO_BOOL_CONFIG_GEN(config_munmap)
-CTL_RO_BOOL_CONFIG_GEN(config_prof)
-CTL_RO_BOOL_CONFIG_GEN(config_prof_libgcc)
-CTL_RO_BOOL_CONFIG_GEN(config_prof_libunwind)
-CTL_RO_BOOL_CONFIG_GEN(config_stats)
-CTL_RO_BOOL_CONFIG_GEN(config_tcache)
-CTL_RO_BOOL_CONFIG_GEN(config_tls)
-CTL_RO_BOOL_CONFIG_GEN(config_utrace)
-CTL_RO_BOOL_CONFIG_GEN(config_valgrind)
-CTL_RO_BOOL_CONFIG_GEN(config_xmalloc)
+CTL_RO_CONFIG_GEN(config_cache_oblivious, bool)
+CTL_RO_CONFIG_GEN(config_debug, bool)
+CTL_RO_CONFIG_GEN(config_fill, bool)
+CTL_RO_CONFIG_GEN(config_lazy_lock, bool)
+CTL_RO_CONFIG_GEN(config_malloc_conf, const char *)
+CTL_RO_CONFIG_GEN(config_munmap, bool)
+CTL_RO_CONFIG_GEN(config_prof, bool)
+CTL_RO_CONFIG_GEN(config_prof_libgcc, bool)
+CTL_RO_CONFIG_GEN(config_prof_libunwind, bool)
+CTL_RO_CONFIG_GEN(config_stats, bool)
+CTL_RO_CONFIG_GEN(config_tcache, bool)
+CTL_RO_CONFIG_GEN(config_tls, bool)
+CTL_RO_CONFIG_GEN(config_utrace, bool)
+CTL_RO_CONFIG_GEN(config_valgrind, bool)
+CTL_RO_CONFIG_GEN(config_xmalloc, bool)
 
 /******************************************************************************/
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index fab0eb05..8415c0e2 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -902,10 +902,13 @@ malloc_conf_init(void)
 			opt_tcache = false;
 	}
 
-	for (i = 0; i < 3; i++) {
+	for (i = 0; i < 4; i++) {
 		/* Get runtime configuration. */
 		switch (i) {
 		case 0:
+			opts = config_malloc_conf;
+			break;
+		case 1:
 			if (je_malloc_conf != NULL) {
 				/*
 				 * Use options that were compiled into the
@@ -918,7 +921,7 @@ malloc_conf_init(void)
 				opts = buf;
 			}
 			break;
-		case 1: {
+		case 2: {
 			int linklen = 0;
 #ifndef _WIN32
 			int saved_errno = errno;
@@ -945,7 +948,7 @@ malloc_conf_init(void)
 			buf[linklen] = '\0';
 			opts = buf;
 			break;
-		} case 2: {
+		} case 3: {
 			const char *envname =
 #ifdef JEMALLOC_PREFIX
 			    JEMALLOC_CPREFIX"MALLOC_CONF"
diff --git a/src/stats.c b/src/stats.c
index 154c3e74..7d09c23c 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -438,6 +438,8 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		CTL_GET("config.debug", &bv, bool);
 		malloc_cprintf(write_cb, cbopaque, "Assertions %s\n",
 		    bv ? "enabled" : "disabled");
+		malloc_cprintf(write_cb, cbopaque,
+		    "config.malloc_conf: \"%s\"\n", config_malloc_conf);
 
 #define	OPT_WRITE_BOOL(n)						\
 		if (je_mallctl("opt."#n, &bv, &bsz, NULL, 0) == 0) {	\
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 31e354ca..fde223f9 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -117,8 +117,8 @@ TEST_END
 TEST_BEGIN(test_mallctl_config)
 {
 
-#define	TEST_MALLCTL_CONFIG(config) do {				\
-	bool oldval;							\
+#define	TEST_MALLCTL_CONFIG(config, t) do {				\
+	t oldval;							\
 	size_t sz = sizeof(oldval);					\
 	assert_d_eq(mallctl("config."#config, &oldval, &sz, NULL, 0),	\
 	    0, "Unexpected mallctl() failure");				\
@@ -126,20 +126,21 @@ TEST_BEGIN(test_mallctl_config)
 	assert_zu_eq(sz, sizeof(oldval), "Unexpected output size");	\
 } while (0)
 
-	TEST_MALLCTL_CONFIG(cache_oblivious);
-	TEST_MALLCTL_CONFIG(debug);
-	TEST_MALLCTL_CONFIG(fill);
-	TEST_MALLCTL_CONFIG(lazy_lock);
-	TEST_MALLCTL_CONFIG(munmap);
-	TEST_MALLCTL_CONFIG(prof);
-	TEST_MALLCTL_CONFIG(prof_libgcc);
-	TEST_MALLCTL_CONFIG(prof_libunwind);
-	TEST_MALLCTL_CONFIG(stats);
-	TEST_MALLCTL_CONFIG(tcache);
-	TEST_MALLCTL_CONFIG(tls);
-	TEST_MALLCTL_CONFIG(utrace);
-	TEST_MALLCTL_CONFIG(valgrind);
-	TEST_MALLCTL_CONFIG(xmalloc);
+	TEST_MALLCTL_CONFIG(cache_oblivious, bool);
+	TEST_MALLCTL_CONFIG(debug, bool);
+	TEST_MALLCTL_CONFIG(fill, bool);
+	TEST_MALLCTL_CONFIG(lazy_lock, bool);
+	TEST_MALLCTL_CONFIG(malloc_conf, const char *);
+	TEST_MALLCTL_CONFIG(munmap, bool);
+	TEST_MALLCTL_CONFIG(prof, bool);
+	TEST_MALLCTL_CONFIG(prof_libgcc, bool);
+	TEST_MALLCTL_CONFIG(prof_libunwind, bool);
+	TEST_MALLCTL_CONFIG(stats, bool);
+	TEST_MALLCTL_CONFIG(tcache, bool);
+	TEST_MALLCTL_CONFIG(tls, bool);
+	TEST_MALLCTL_CONFIG(utrace, bool);
+	TEST_MALLCTL_CONFIG(valgrind, bool);
+	TEST_MALLCTL_CONFIG(xmalloc, bool);
 
 #undef TEST_MALLCTL_CONFIG
 }

From e5d5a4a51792258aed65e6b45191d1f56c1305ea Mon Sep 17 00:00:00 2001
From: Cameron Evans <camerone@fb.com>
Date: Tue, 2 Feb 2016 21:52:08 -0800
Subject: [PATCH 0077/2608] Add time_update().

---
 Makefile.in                                   |  5 +--
 .../jemalloc/internal/jemalloc_internal.h.in  |  4 +++
 .../internal/jemalloc_internal_decls.h        |  8 +++++
 include/jemalloc/internal/private_symbols.txt |  1 +
 include/jemalloc/internal/time.h              | 22 ++++++++++++
 src/time.c                                    | 36 +++++++++++++++++++
 test/unit/time.c                              | 23 ++++++++++++
 7 files changed, 97 insertions(+), 2 deletions(-)
 create mode 100644 include/jemalloc/internal/time.h
 create mode 100644 src/time.c
 create mode 100644 test/unit/time.c

diff --git a/Makefile.in b/Makefile.in
index 1ac6f292..c4f8cf90 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -85,8 +85,8 @@ C_SRCS := $(srcroot)src/jemalloc.c $(srcroot)src/arena.c \
 	$(srcroot)src/extent.c $(srcroot)src/hash.c $(srcroot)src/huge.c \
 	$(srcroot)src/mb.c $(srcroot)src/mutex.c $(srcroot)src/pages.c \
 	$(srcroot)src/prof.c $(srcroot)src/quarantine.c $(srcroot)src/rtree.c \
-	$(srcroot)src/stats.c $(srcroot)src/tcache.c $(srcroot)src/util.c \
-	$(srcroot)src/tsd.c
+	$(srcroot)src/stats.c $(srcroot)src/tcache.c $(srcroot)src/time.c \
+	$(srcroot)src/tsd.c $(srcroot)src/util.c
 ifeq ($(enable_valgrind), 1)
 C_SRCS += $(srcroot)src/valgrind.c
 endif
@@ -143,6 +143,7 @@ TESTS_UNIT := $(srcroot)test/unit/atomic.c \
 	$(srcroot)test/unit/SFMT.c \
 	$(srcroot)test/unit/size_classes.c \
 	$(srcroot)test/unit/stats.c \
+	$(srcroot)test/unit/time.c \
 	$(srcroot)test/unit/tsd.c \
 	$(srcroot)test/unit/util.c \
 	$(srcroot)test/unit/zero.c
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 8c507f79..e7bc4c84 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -356,6 +356,7 @@ typedef unsigned szind_t;
 #  define VARIABLE_ARRAY(type, name, count) type name[(count)]
 #endif
 
+#include "jemalloc/internal/time.h"
 #include "jemalloc/internal/valgrind.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
@@ -384,6 +385,7 @@ typedef unsigned szind_t;
 /******************************************************************************/
 #define	JEMALLOC_H_STRUCTS
 
+#include "jemalloc/internal/time.h"
 #include "jemalloc/internal/valgrind.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
@@ -469,6 +471,7 @@ void	jemalloc_prefork(void);
 void	jemalloc_postfork_parent(void);
 void	jemalloc_postfork_child(void);
 
+#include "jemalloc/internal/time.h"
 #include "jemalloc/internal/valgrind.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
@@ -497,6 +500,7 @@ void	jemalloc_postfork_child(void);
 /******************************************************************************/
 #define	JEMALLOC_H_INLINES
 
+#include "jemalloc/internal/time.h"
 #include "jemalloc/internal/valgrind.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
index a601d6eb..0f29e676 100644
--- a/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -61,4 +61,12 @@ isblank(int c)
 #endif
 #include <fcntl.h>
 
+#include <sys/time.h>
+#ifdef _WIN32
+struct timespec {
+	time_t	tv_sec;
+	long	tv_nsec;
+};
+#endif
+
 #endif /* JEMALLOC_INTERNAL_H */
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index a90021aa..8b1fd45c 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -460,6 +460,7 @@ tcaches_get
 tcache_stats_merge
 thread_allocated_cleanup
 thread_deallocated_cleanup
+time_update
 tsd_arena_get
 tsd_arena_set
 tsd_boot
diff --git a/include/jemalloc/internal/time.h b/include/jemalloc/internal/time.h
new file mode 100644
index 00000000..e3e6c5f4
--- /dev/null
+++ b/include/jemalloc/internal/time.h
@@ -0,0 +1,22 @@
+#define JEMALLOC_CLOCK_GETTIME defined(_POSIX_MONOTONIC_CLOCK) \
+    && _POSIX_MONOTONIC_CLOCK >= 0
+
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+bool	time_update(struct timespec *time);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/src/time.c b/src/time.c
new file mode 100644
index 00000000..2147c529
--- /dev/null
+++ b/src/time.c
@@ -0,0 +1,36 @@
+#include "jemalloc/internal/jemalloc_internal.h"
+
+bool
+time_update(struct timespec *time)
+{
+	struct timespec old_time;
+
+	memcpy(&old_time, time, sizeof(struct timespec));
+
+#ifdef _WIN32
+	FILETIME ft;
+	uint64_t ticks;
+	GetSystemTimeAsFileTime(&ft);
+	ticks = (ft.dwHighDateTime << 32) | ft.dWLowDateTime;
+	time->tv_sec = ticks / 10000;
+	time->tv_nsec = ((ticks % 10000) * 100);
+#elif JEMALLOC_CLOCK_GETTIME
+	if (sysconf(_SC_MONOTONIC_CLOCK) > 0)
+		clock_gettime(CLOCK_MONOTONIC, time);
+	else
+		clock_gettime(CLOCK_REALTIME, time);
+#else
+	struct timeval tv;
+	gettimeofday(&tv, NULL);
+	time->tv_sec = tv.tv_sec;
+	time->tv_nsec = tv.tv_usec * 1000;
+#endif
+
+	/* Handle non-monotonic clocks. */
+	if (unlikely(old_time.tv_sec > time->tv_sec))
+		return (true);
+	if (unlikely(old_time.tv_sec == time->tv_sec))
+		return old_time.tv_nsec > time->tv_nsec;
+
+	return (false);
+}
diff --git a/test/unit/time.c b/test/unit/time.c
new file mode 100644
index 00000000..80460f98
--- /dev/null
+++ b/test/unit/time.c
@@ -0,0 +1,23 @@
+#include "test/jemalloc_test.h"
+
+TEST_BEGIN(test_time_update)
+{
+	struct timespec ts;
+
+	memset(&ts, 0, sizeof(struct timespec));
+
+	assert_false(time_update(&ts), "Basic time update failed.");
+
+	/* Only Rip Van Winkle sleeps this long. */
+	ts.tv_sec += 631152000;
+	assert_true(time_update(&ts), "Update should detect time roll-back.");
+}
+TEST_END
+
+int
+main(void)
+{
+
+	return (test(
+	    test_time_update));
+}

From 94451d184b09fdf57837d0a69ab05ec06317f5ca Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 19 Feb 2016 12:35:37 -0800
Subject: [PATCH 0078/2608] Flesh out time_*() API.

---
 Makefile.in                                   |   2 +-
 include/jemalloc/internal/private_symbols.txt |  12 +
 include/jemalloc/internal/time.h              |  18 +-
 src/time.c                                    | 158 +++++++++++++-
 test/include/test/jemalloc_test.h.in          |   1 +
 test/include/test/timer.h                     |  16 +-
 test/src/timer.c                              |  40 +---
 test/unit/time.c                              | 206 +++++++++++++++++-
 8 files changed, 397 insertions(+), 56 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index c4f8cf90..e314a6f3 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -116,7 +116,7 @@ C_TESTLIB_SRCS := $(srcroot)test/src/btalloc.c $(srcroot)test/src/btalloc_0.c \
 	$(srcroot)test/src/mtx.c $(srcroot)test/src/mq.c \
 	$(srcroot)test/src/SFMT.c $(srcroot)test/src/test.c \
 	$(srcroot)test/src/thd.c $(srcroot)test/src/timer.c
-C_UTIL_INTEGRATION_SRCS := $(srcroot)src/util.c
+C_UTIL_INTEGRATION_SRCS := $(srcroot)src/time.c $(srcroot)src/util.c
 TESTS_UNIT := $(srcroot)test/unit/atomic.c \
 	$(srcroot)test/unit/bitmap.c \
 	$(srcroot)test/unit/ckh.c \
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 8b1fd45c..4c40af61 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -460,6 +460,18 @@ tcaches_get
 tcache_stats_merge
 thread_allocated_cleanup
 thread_deallocated_cleanup
+ticker_init
+ticker_tick
+time_add
+time_compare
+time_copy
+time_divide
+time_idivide
+time_imultiply
+time_init
+time_nsec
+time_sec
+time_subtract
 time_update
 tsd_arena_get
 tsd_arena_set
diff --git a/include/jemalloc/internal/time.h b/include/jemalloc/internal/time.h
index e3e6c5f4..a290f386 100644
--- a/include/jemalloc/internal/time.h
+++ b/include/jemalloc/internal/time.h
@@ -1,8 +1,11 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
 #define JEMALLOC_CLOCK_GETTIME defined(_POSIX_MONOTONIC_CLOCK) \
     && _POSIX_MONOTONIC_CLOCK >= 0
 
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
+/* Maximum supported number of seconds (~584 years). */
+#define	TIME_SEC_MAX	18446744072
 
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
@@ -12,6 +15,17 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
+void	time_init(struct timespec *time, time_t sec, long nsec);
+time_t	time_sec(const struct timespec *time);
+long	time_nsec(const struct timespec *time);
+void	time_copy(struct timespec *time, const struct timespec *source);
+int	time_compare(const struct timespec *a, const struct timespec *b);
+void	time_add(struct timespec *time, const struct timespec *addend);
+void	time_subtract(struct timespec *time, const struct timespec *subtrahend);
+void	time_imultiply(struct timespec *time, uint64_t multiplier);
+void	time_idivide(struct timespec *time, uint64_t divisor);
+uint64_t	time_divide(const struct timespec *time,
+    const struct timespec *divisor);
 bool	time_update(struct timespec *time);
 
 #endif /* JEMALLOC_H_EXTERNS */
diff --git a/src/time.c b/src/time.c
index 2147c529..3f930385 100644
--- a/src/time.c
+++ b/src/time.c
@@ -1,11 +1,160 @@
 #include "jemalloc/internal/jemalloc_internal.h"
 
+#define	BILLION		1000000000
+
+UNUSED static bool
+time_valid(const struct timespec *time)
+{
+
+	if (time->tv_sec > TIME_SEC_MAX)
+		return (false);
+	if (time->tv_nsec >= BILLION)
+		return (false);
+
+	return (true);
+}
+
+void
+time_init(struct timespec *time, time_t sec, long nsec)
+{
+
+	time->tv_sec = sec;
+	time->tv_nsec = nsec;
+
+	assert(time_valid(time));
+}
+
+time_t
+time_sec(const struct timespec *time)
+{
+
+	assert(time_valid(time));
+
+	return (time->tv_sec);
+}
+
+long
+time_nsec(const struct timespec *time)
+{
+
+	assert(time_valid(time));
+
+	return (time->tv_nsec);
+}
+
+void
+time_copy(struct timespec *time, const struct timespec *source)
+{
+
+	assert(time_valid(source));
+
+	*time = *source;
+}
+
+int
+time_compare(const struct timespec *a, const struct timespec *b)
+{
+	int ret;
+
+	assert(time_valid(a));
+	assert(time_valid(b));
+
+	ret = (a->tv_sec > b->tv_sec) - (a->tv_sec < b->tv_sec);
+	if (ret == 0)
+		ret = (a->tv_nsec > b->tv_nsec) - (a->tv_nsec < b->tv_nsec);
+
+	return (ret);
+}
+
+void
+time_add(struct timespec *time, const struct timespec *addend)
+{
+
+	assert(time_valid(time));
+	assert(time_valid(addend));
+
+	time->tv_sec += addend->tv_sec;
+	time->tv_nsec += addend->tv_nsec;
+	if (time->tv_nsec >= BILLION) {
+		time->tv_sec++;
+		time->tv_nsec -= BILLION;
+	}
+
+	assert(time_valid(time));
+}
+
+void
+time_subtract(struct timespec *time, const struct timespec *subtrahend)
+{
+
+	assert(time_valid(time));
+	assert(time_valid(subtrahend));
+	assert(time_compare(time, subtrahend) >= 0);
+
+	time->tv_sec -= subtrahend->tv_sec;
+	if (time->tv_nsec < subtrahend->tv_nsec) {
+		time->tv_sec--;
+		time->tv_nsec += BILLION;
+	}
+	time->tv_nsec -= subtrahend->tv_nsec;
+}
+
+void
+time_imultiply(struct timespec *time, uint64_t multiplier)
+{
+	time_t sec;
+	uint64_t nsec;
+
+	assert(time_valid(time));
+
+	sec = time->tv_sec * multiplier;
+	nsec = time->tv_nsec * multiplier;
+	sec += nsec / BILLION;
+	nsec %= BILLION;
+	time_init(time, sec, (long)nsec);
+
+	assert(time_valid(time));
+}
+
+void
+time_idivide(struct timespec *time, uint64_t divisor)
+{
+	time_t sec;
+	uint64_t nsec;
+
+	assert(time_valid(time));
+
+	sec = time->tv_sec / divisor;
+	nsec = ((time->tv_sec % divisor) * BILLION + time->tv_nsec) / divisor;
+	sec += nsec / BILLION;
+	nsec %= BILLION;
+	time_init(time, sec, (long)nsec);
+
+	assert(time_valid(time));
+}
+
+uint64_t
+time_divide(const struct timespec *time, const struct timespec *divisor)
+{
+	uint64_t t, d;
+
+	assert(time_valid(time));
+	assert(time_valid(divisor));
+
+	t = time_sec(time) * BILLION + time_nsec(time);
+	d = time_sec(divisor) * BILLION + time_nsec(divisor);
+	assert(d != 0);
+	return (t / d);
+}
+
 bool
 time_update(struct timespec *time)
 {
 	struct timespec old_time;
 
-	memcpy(&old_time, time, sizeof(struct timespec));
+	assert(time_valid(time));
+
+	time_copy(&old_time, time);
 
 #ifdef _WIN32
 	FILETIME ft;
@@ -27,10 +176,11 @@ time_update(struct timespec *time)
 #endif
 
 	/* Handle non-monotonic clocks. */
-	if (unlikely(old_time.tv_sec > time->tv_sec))
+	if (unlikely(time_compare(&old_time, time) > 0)) {
+		time_copy(time, &old_time);
 		return (true);
-	if (unlikely(old_time.tv_sec == time->tv_sec))
-		return old_time.tv_nsec > time->tv_nsec;
+	}
 
+	assert(time_valid(time));
 	return (false);
 }
diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in
index 455569da..223162e1 100644
--- a/test/include/test/jemalloc_test.h.in
+++ b/test/include/test/jemalloc_test.h.in
@@ -94,6 +94,7 @@
 #  define JEMALLOC_H_STRUCTS
 #  define JEMALLOC_H_EXTERNS
 #  define JEMALLOC_H_INLINES
+#  include "jemalloc/internal/time.h"
 #  include "jemalloc/internal/util.h"
 #  include "jemalloc/internal/qr.h"
 #  include "jemalloc/internal/ql.h"
diff --git a/test/include/test/timer.h b/test/include/test/timer.h
index a7fefdfd..a791f9ce 100644
--- a/test/include/test/timer.h
+++ b/test/include/test/timer.h
@@ -3,21 +3,9 @@
 #include <unistd.h>
 #include <sys/time.h>
 
-#define JEMALLOC_CLOCK_GETTIME defined(_POSIX_MONOTONIC_CLOCK) \
-    && _POSIX_MONOTONIC_CLOCK >= 0
-
 typedef struct {
-#ifdef _WIN32
-	FILETIME ft0;
-	FILETIME ft1;
-#elif JEMALLOC_CLOCK_GETTIME
-	struct timespec ts0;
-	struct timespec ts1;
-	int clock_id;
-#else
-	struct timeval tv0;
-	struct timeval tv1;
-#endif
+	struct timespec t0;
+	struct timespec t1;
 } timedelta_t;
 
 void	timer_start(timedelta_t *timer);
diff --git a/test/src/timer.c b/test/src/timer.c
index 0c93abaf..15306cfd 100644
--- a/test/src/timer.c
+++ b/test/src/timer.c
@@ -4,50 +4,26 @@ void
 timer_start(timedelta_t *timer)
 {
 
-#ifdef _WIN32
-	GetSystemTimeAsFileTime(&timer->ft0);
-#elif JEMALLOC_CLOCK_GETTIME
-	if (sysconf(_SC_MONOTONIC_CLOCK) <= 0)
-		timer->clock_id = CLOCK_REALTIME;
-	else
-		timer->clock_id = CLOCK_MONOTONIC;
-	clock_gettime(timer->clock_id, &timer->ts0);
-#else
-	gettimeofday(&timer->tv0, NULL);
-#endif
+	time_init(&timer->t0, 0, 0);
+	time_update(&timer->t0);
 }
 
 void
 timer_stop(timedelta_t *timer)
 {
 
-#ifdef _WIN32
-	GetSystemTimeAsFileTime(&timer->ft0);
-#elif JEMALLOC_CLOCK_GETTIME
-	clock_gettime(timer->clock_id, &timer->ts1);
-#else
-	gettimeofday(&timer->tv1, NULL);
-#endif
+	time_copy(&timer->t1, &timer->t0);
+	time_update(&timer->t1);
 }
 
 uint64_t
 timer_usec(const timedelta_t *timer)
 {
+	struct timespec delta;
 
-#ifdef _WIN32
-	uint64_t t0, t1;
-	t0 = (((uint64_t)timer->ft0.dwHighDateTime) << 32) |
-	    timer->ft0.dwLowDateTime;
-	t1 = (((uint64_t)timer->ft1.dwHighDateTime) << 32) |
-	    timer->ft1.dwLowDateTime;
-	return ((t1 - t0) / 10);
-#elif JEMALLOC_CLOCK_GETTIME
-	return (((timer->ts1.tv_sec - timer->ts0.tv_sec) * 1000000) +
-	    (timer->ts1.tv_nsec - timer->ts0.tv_nsec) / 1000);
-#else
-	return (((timer->tv1.tv_sec - timer->tv0.tv_sec) * 1000000) +
-	    timer->tv1.tv_usec - timer->tv0.tv_usec);
-#endif
+	time_copy(&delta, &timer->t1);
+	time_subtract(&delta, &timer->t0);
+	return (time_sec(&delta) * 1000000 + time_nsec(&delta) / 1000);
 }
 
 void
diff --git a/test/unit/time.c b/test/unit/time.c
index 80460f98..941e6f13 100644
--- a/test/unit/time.c
+++ b/test/unit/time.c
@@ -1,16 +1,206 @@
 #include "test/jemalloc_test.h"
 
+#define	BILLION	1000000000
+
+TEST_BEGIN(test_time_init)
+{
+	struct timespec ts;
+
+	time_init(&ts, 42, 43);
+	assert_ld_eq(ts.tv_sec, 42, "tv_sec incorrectly initialized");
+	assert_ld_eq(ts.tv_nsec, 43, "tv_nsec incorrectly initialized");
+}
+TEST_END
+
+TEST_BEGIN(test_time_sec)
+{
+	struct timespec ts;
+
+	time_init(&ts, 42, 43);
+	assert_ld_eq(time_sec(&ts), 42, "tv_sec incorrectly read");
+}
+TEST_END
+
+TEST_BEGIN(test_time_nsec)
+{
+	struct timespec ts;
+
+	time_init(&ts, 42, 43);
+	assert_ld_eq(time_nsec(&ts), 43, "tv_nsec incorrectly read");
+}
+TEST_END
+
+TEST_BEGIN(test_time_copy)
+{
+	struct timespec tsa, tsb;
+
+	time_init(&tsa, 42, 43);
+	time_init(&tsb, 0, 0);
+	time_copy(&tsb, &tsa);
+	assert_ld_eq(time_sec(&tsb), 42, "tv_sec incorrectly copied");
+	assert_ld_eq(time_nsec(&tsb), 43, "tv_nsec incorrectly copied");
+}
+TEST_END
+
+TEST_BEGIN(test_time_compare)
+{
+	struct timespec tsa, tsb;
+
+	time_init(&tsa, 42, 43);
+	time_copy(&tsb, &tsa);
+	assert_d_eq(time_compare(&tsa, &tsb), 0, "Times should be equal");
+	assert_d_eq(time_compare(&tsb, &tsa), 0, "Times should be equal");
+
+	time_init(&tsb, 42, 42);
+	assert_d_eq(time_compare(&tsa, &tsb), 1,
+	    "tsa should be greater than tsb");
+	assert_d_eq(time_compare(&tsb, &tsa), -1,
+	    "tsb should be less than tsa");
+
+	time_init(&tsb, 42, 44);
+	assert_d_eq(time_compare(&tsa, &tsb), -1,
+	    "tsa should be less than tsb");
+	assert_d_eq(time_compare(&tsb, &tsa), 1,
+	    "tsb should be greater than tsa");
+
+	time_init(&tsb, 41, BILLION - 1);
+	assert_d_eq(time_compare(&tsa, &tsb), 1,
+	    "tsa should be greater than tsb");
+	assert_d_eq(time_compare(&tsb, &tsa), -1,
+	    "tsb should be less than tsa");
+
+	time_init(&tsb, 43, 0);
+	assert_d_eq(time_compare(&tsa, &tsb), -1,
+	    "tsa should be less than tsb");
+	assert_d_eq(time_compare(&tsb, &tsa), 1,
+	    "tsb should be greater than tsa");
+}
+TEST_END
+
+TEST_BEGIN(test_time_add)
+{
+	struct timespec tsa, tsb;
+
+	time_init(&tsa, 42, 43);
+	time_copy(&tsb, &tsa);
+	time_add(&tsa, &tsb);
+	time_init(&tsb, 84, 86);
+	assert_d_eq(time_compare(&tsa, &tsb), 0, "Incorrect addition result");
+
+	time_init(&tsa, 42, BILLION - 1);
+	time_copy(&tsb, &tsa);
+	time_add(&tsa, &tsb);
+	time_init(&tsb, 85, BILLION - 2);
+	assert_d_eq(time_compare(&tsa, &tsb), 0, "Incorrect addition result");
+}
+TEST_END
+
+TEST_BEGIN(test_time_subtract)
+{
+	struct timespec tsa, tsb;
+
+	time_init(&tsa, 42, 43);
+	time_copy(&tsb, &tsa);
+	time_subtract(&tsa, &tsb);
+	time_init(&tsb, 0, 0);
+	assert_d_eq(time_compare(&tsa, &tsb), 0,
+	    "Incorrect subtraction result");
+
+	time_init(&tsa, 42, 43);
+	time_init(&tsb, 41, 44);
+	time_subtract(&tsa, &tsb);
+	time_init(&tsb, 0, BILLION - 1);
+	assert_d_eq(time_compare(&tsa, &tsb), 0,
+	    "Incorrect subtraction result");
+}
+TEST_END
+
+TEST_BEGIN(test_time_imultiply)
+{
+	struct timespec tsa, tsb;
+
+	time_init(&tsa, 42, 43);
+	time_imultiply(&tsa, 10);
+	time_init(&tsb, 420, 430);
+	assert_d_eq(time_compare(&tsa, &tsb), 0,
+	    "Incorrect multiplication result");
+
+	time_init(&tsa, 42, 666666666);
+	time_imultiply(&tsa, 3);
+	time_init(&tsb, 127, 999999998);
+	assert_d_eq(time_compare(&tsa, &tsb), 0,
+	    "Incorrect multiplication result");
+}
+TEST_END
+
+TEST_BEGIN(test_time_idivide)
+{
+	struct timespec tsa, tsb;
+
+	time_init(&tsa, 42, 43);
+	time_copy(&tsb, &tsa);
+	time_imultiply(&tsa, 10);
+	time_idivide(&tsa, 10);
+	assert_d_eq(time_compare(&tsa, &tsb), 0, "Incorrect division result");
+
+	time_init(&tsa, 42, 666666666);
+	time_copy(&tsb, &tsa);
+	time_imultiply(&tsa, 3);
+	time_idivide(&tsa, 3);
+	assert_d_eq(time_compare(&tsa, &tsb), 0, "Incorrect division result");
+}
+TEST_END
+
+TEST_BEGIN(test_time_divide)
+{
+	struct timespec tsa, tsb, tsc;
+
+	time_init(&tsa, 42, 43);
+	time_copy(&tsb, &tsa);
+	time_imultiply(&tsa, 10);
+	assert_u64_eq(time_divide(&tsa, &tsb), 10,
+	    "Incorrect division result");
+
+	time_init(&tsa, 42, 43);
+	time_copy(&tsb, &tsa);
+	time_imultiply(&tsa, 10);
+	time_init(&tsc, 0, 1);
+	time_add(&tsa, &tsc);
+	assert_u64_eq(time_divide(&tsa, &tsb), 10,
+	    "Incorrect division result");
+
+	time_init(&tsa, 42, 43);
+	time_copy(&tsb, &tsa);
+	time_imultiply(&tsa, 10);
+	time_init(&tsc, 0, 1);
+	time_subtract(&tsa, &tsc);
+	assert_u64_eq(time_divide(&tsa, &tsb), 9, "Incorrect division result");
+}
+TEST_END
+
 TEST_BEGIN(test_time_update)
 {
 	struct timespec ts;
 
-	memset(&ts, 0, sizeof(struct timespec));
+	time_init(&ts, 0, 0);
 
 	assert_false(time_update(&ts), "Basic time update failed.");
 
 	/* Only Rip Van Winkle sleeps this long. */
-	ts.tv_sec += 631152000;
-	assert_true(time_update(&ts), "Update should detect time roll-back.");
+	{
+		struct timespec addend;
+		time_init(&addend, 631152000, 0);
+		time_add(&ts, &addend);
+	}
+	{
+		struct timespec ts0;
+		time_copy(&ts0, &ts);
+		assert_true(time_update(&ts),
+		    "Update should detect time roll-back.");
+		assert_d_eq(time_compare(&ts, &ts0), 0,
+		    "Time should not have been modified");
+	}
+
 }
 TEST_END
 
@@ -19,5 +209,15 @@ main(void)
 {
 
 	return (test(
+	    test_time_init,
+	    test_time_sec,
+	    test_time_nsec,
+	    test_time_copy,
+	    test_time_compare,
+	    test_time_add,
+	    test_time_subtract,
+	    test_time_imultiply,
+	    test_time_idivide,
+	    test_time_divide,
 	    test_time_update));
 }

From 9998000b2b77205a37ae630d5fd1ec9ee4569476 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Tue, 2 Feb 2016 20:27:54 -0800
Subject: [PATCH 0079/2608] Implement ticker.

Implement ticker, which provides a simple API for ticking off some
number of events before indicating that the ticker has hit its limit.
---
 Makefile.in                                   |  5 +-
 .../jemalloc/internal/jemalloc_internal.h.in  |  4 +
 include/jemalloc/internal/private_symbols.txt |  3 +
 include/jemalloc/internal/ticker.h            | 75 ++++++++++++++++++
 src/ticker.c                                  |  2 +
 test/unit/ticker.c                            | 76 +++++++++++++++++++
 6 files changed, 163 insertions(+), 2 deletions(-)
 create mode 100644 include/jemalloc/internal/ticker.h
 create mode 100644 src/ticker.c
 create mode 100644 test/unit/ticker.c

diff --git a/Makefile.in b/Makefile.in
index e314a6f3..f3c2e4bd 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -85,8 +85,8 @@ C_SRCS := $(srcroot)src/jemalloc.c $(srcroot)src/arena.c \
 	$(srcroot)src/extent.c $(srcroot)src/hash.c $(srcroot)src/huge.c \
 	$(srcroot)src/mb.c $(srcroot)src/mutex.c $(srcroot)src/pages.c \
 	$(srcroot)src/prof.c $(srcroot)src/quarantine.c $(srcroot)src/rtree.c \
-	$(srcroot)src/stats.c $(srcroot)src/tcache.c $(srcroot)src/time.c \
-	$(srcroot)src/tsd.c $(srcroot)src/util.c
+	$(srcroot)src/stats.c $(srcroot)src/tcache.c $(srcroot)src/ticker.c \
+	$(srcroot)src/time.c $(srcroot)src/tsd.c $(srcroot)src/util.c
 ifeq ($(enable_valgrind), 1)
 C_SRCS += $(srcroot)src/valgrind.c
 endif
@@ -143,6 +143,7 @@ TESTS_UNIT := $(srcroot)test/unit/atomic.c \
 	$(srcroot)test/unit/SFMT.c \
 	$(srcroot)test/unit/size_classes.c \
 	$(srcroot)test/unit/stats.c \
+	$(srcroot)test/unit/ticker.c \
 	$(srcroot)test/unit/time.c \
 	$(srcroot)test/unit/tsd.c \
 	$(srcroot)test/unit/util.c \
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index e7bc4c84..12d51be2 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -361,6 +361,7 @@ typedef unsigned szind_t;
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/prng.h"
+#include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/ckh.h"
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/stats.h"
@@ -390,6 +391,7 @@ typedef unsigned szind_t;
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/prng.h"
+#include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/ckh.h"
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/stats.h"
@@ -476,6 +478,7 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/prng.h"
+#include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/ckh.h"
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/stats.h"
@@ -505,6 +508,7 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/prng.h"
+#include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/ckh.h"
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/stats.h"
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 4c40af61..216367e5 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -460,8 +460,11 @@ tcaches_get
 tcache_stats_merge
 thread_allocated_cleanup
 thread_deallocated_cleanup
+ticker_copy
 ticker_init
+ticker_read
 ticker_tick
+ticker_ticks
 time_add
 time_compare
 time_copy
diff --git a/include/jemalloc/internal/ticker.h b/include/jemalloc/internal/ticker.h
new file mode 100644
index 00000000..4696e56d
--- /dev/null
+++ b/include/jemalloc/internal/ticker.h
@@ -0,0 +1,75 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct ticker_s ticker_t;
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+struct ticker_s {
+	int32_t	tick;
+	int32_t	nticks;
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+void	ticker_init(ticker_t *ticker, int32_t nticks);
+void	ticker_copy(ticker_t *ticker, const ticker_t *other);
+int32_t	ticker_read(const ticker_t *ticker);
+bool	ticker_ticks(ticker_t *ticker, int32_t nticks);
+bool	ticker_tick(ticker_t *ticker);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TICKER_C_))
+JEMALLOC_INLINE void
+ticker_init(ticker_t *ticker, int32_t nticks)
+{
+
+	ticker->tick = nticks;
+	ticker->nticks = nticks;
+}
+
+JEMALLOC_INLINE void
+ticker_copy(ticker_t *ticker, const ticker_t *other)
+{
+
+	*ticker = *other;
+}
+
+JEMALLOC_INLINE int32_t
+ticker_read(const ticker_t *ticker)
+{
+
+	return (ticker->tick);
+}
+
+JEMALLOC_INLINE bool
+ticker_ticks(ticker_t *ticker, int32_t nticks)
+{
+
+	if (unlikely(ticker->tick < nticks)) {
+		ticker->tick = ticker->nticks;
+		return (true);
+	}
+	ticker->tick -= nticks;
+	return(false);
+}
+
+JEMALLOC_INLINE bool
+ticker_tick(ticker_t *ticker)
+{
+
+	return (ticker_ticks(ticker, 1));
+}
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/src/ticker.c b/src/ticker.c
new file mode 100644
index 00000000..db090240
--- /dev/null
+++ b/src/ticker.c
@@ -0,0 +1,2 @@
+#define	JEMALLOC_TICKER_C_
+#include "jemalloc/internal/jemalloc_internal.h"
diff --git a/test/unit/ticker.c b/test/unit/ticker.c
new file mode 100644
index 00000000..e737020a
--- /dev/null
+++ b/test/unit/ticker.c
@@ -0,0 +1,76 @@
+#include "test/jemalloc_test.h"
+
+TEST_BEGIN(test_ticker_tick)
+{
+#define	NREPS 2
+#define	NTICKS 3
+	ticker_t ticker;
+	int32_t i, j;
+
+	ticker_init(&ticker, NTICKS);
+	for (i = 0; i < NREPS; i++) {
+		for (j = 0; j < NTICKS; j++) {
+			assert_u_eq(ticker_read(&ticker), NTICKS - j,
+			    "Unexpected ticker value (i=%d, j=%d)", i, j);
+			assert_false(ticker_tick(&ticker),
+			    "Unexpected ticker fire (i=%d, j=%d)", i, j);
+		}
+		assert_u32_eq(ticker_read(&ticker), 0,
+		    "Expected ticker depletion");
+		assert_true(ticker_tick(&ticker),
+		    "Expected ticker fire (i=%d)", i);
+		assert_u32_eq(ticker_read(&ticker), NTICKS,
+		    "Expected ticker reset");
+	}
+#undef NTICKS
+}
+TEST_END
+
+TEST_BEGIN(test_ticker_ticks)
+{
+#define	NTICKS 3
+	ticker_t ticker;
+
+	ticker_init(&ticker, NTICKS);
+
+	assert_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value");
+	assert_false(ticker_ticks(&ticker, NTICKS), "Unexpected ticker fire");
+	assert_u_eq(ticker_read(&ticker), 0, "Unexpected ticker value");
+	assert_true(ticker_ticks(&ticker, NTICKS), "Expected ticker fire");
+	assert_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value");
+
+	assert_true(ticker_ticks(&ticker, NTICKS + 1), "Expected ticker fire");
+	assert_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value");
+#undef NTICKS
+}
+TEST_END
+
+TEST_BEGIN(test_ticker_copy)
+{
+#define	NTICKS 3
+	ticker_t ta, tb;
+
+	ticker_init(&ta, NTICKS);
+	ticker_copy(&tb, &ta);
+	assert_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value");
+	assert_true(ticker_ticks(&tb, NTICKS + 1), "Expected ticker fire");
+	assert_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value");
+
+	ticker_tick(&ta);
+	ticker_copy(&tb, &ta);
+	assert_u_eq(ticker_read(&tb), NTICKS - 1, "Unexpected ticker value");
+	assert_true(ticker_ticks(&tb, NTICKS), "Expected ticker fire");
+	assert_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value");
+#undef NTICKS
+}
+TEST_END
+
+int
+main(void)
+{
+
+	return (test(
+	    test_ticker_tick,
+	    test_ticker_ticks,
+	    test_ticker_copy));
+}

From c87ab25d189e0ae76fd568db4bf273e2788cf1a9 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Tue, 2 Feb 2016 20:37:24 -0800
Subject: [PATCH 0080/2608] Use ticker for incremental tcache GC.

---
 include/jemalloc/internal/tcache.h | 6 ++----
 src/tcache.c                       | 3 ++-
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h
index aa73060a..c64f5d34 100644
--- a/include/jemalloc/internal/tcache.h
+++ b/include/jemalloc/internal/tcache.h
@@ -83,7 +83,7 @@ struct tcache_bin_s {
 struct tcache_s {
 	ql_elm(tcache_t) link;		/* Used for aggregating stats. */
 	uint64_t	prof_accumbytes;/* Cleared after arena_prof_accum(). */
-	unsigned	ev_cnt;		/* Event count since incremental GC. */
+	ticker_t	gc_ticker;	/* Drives incremental GC. */
 	szind_t		next_gc_bin;	/* Next bin to GC. */
 	tcache_bin_t	tbins[1];	/* Dynamically sized. */
 	/*
@@ -247,9 +247,7 @@ tcache_event(tsd_t *tsd, tcache_t *tcache)
 	if (TCACHE_GC_INCR == 0)
 		return;
 
-	tcache->ev_cnt++;
-	assert(tcache->ev_cnt <= TCACHE_GC_INCR);
-	if (unlikely(tcache->ev_cnt == TCACHE_GC_INCR))
+	if (unlikely(ticker_tick(&tcache->gc_ticker)))
 		tcache_event_hard(tsd, tcache);
 }
 
diff --git a/src/tcache.c b/src/tcache.c
index 78c62300..e8c3152d 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -67,7 +67,6 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache)
 	tcache->next_gc_bin++;
 	if (tcache->next_gc_bin == nhbins)
 		tcache->next_gc_bin = 0;
-	tcache->ev_cnt = 0;
 }
 
 void *
@@ -330,6 +329,8 @@ tcache_create(tsd_t *tsd, arena_t *arena)
 
 	tcache_arena_associate(tcache, arena);
 
+	ticker_init(&tcache->gc_ticker, TCACHE_GC_INCR);
+
 	assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
 	for (i = 0; i < nhbins; i++) {
 		tcache->tbins[i].lg_fill_div = 1;

From 34676d33690f6cc6885ff769e537ca940aacf886 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Tue, 9 Feb 2016 16:28:40 -0800
Subject: [PATCH 0081/2608] Refactor prng* from cpp macros into inline
 functions.

Remove 32-bit variant, convert prng64() to prng_lg_range(), and add
prng_range().
---
 Makefile.in                                   |  8 ++-
 include/jemalloc/internal/ckh.h               |  4 +-
 .../jemalloc/internal/jemalloc_internal.h.in  |  4 +-
 include/jemalloc/internal/private_symbols.txt |  4 +-
 include/jemalloc/internal/prng.h              | 67 +++++++++++-------
 include/jemalloc/internal/util.h              | 37 ++++++++--
 src/arena.c                                   |  4 +-
 src/ckh.c                                     |  4 +-
 src/prng.c                                    |  2 +
 src/prof.c                                    |  3 +-
 test/unit/prng.c                              | 68 ++++++++++++++++++
 test/unit/util.c                              | 69 ++++++++++++-------
 12 files changed, 205 insertions(+), 69 deletions(-)
 create mode 100644 src/prng.c
 create mode 100644 test/unit/prng.c

diff --git a/Makefile.in b/Makefile.in
index f3c2e4bd..6b210fee 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -84,9 +84,10 @@ C_SRCS := $(srcroot)src/jemalloc.c $(srcroot)src/arena.c \
 	$(srcroot)src/chunk_mmap.c $(srcroot)src/ckh.c $(srcroot)src/ctl.c \
 	$(srcroot)src/extent.c $(srcroot)src/hash.c $(srcroot)src/huge.c \
 	$(srcroot)src/mb.c $(srcroot)src/mutex.c $(srcroot)src/pages.c \
-	$(srcroot)src/prof.c $(srcroot)src/quarantine.c $(srcroot)src/rtree.c \
-	$(srcroot)src/stats.c $(srcroot)src/tcache.c $(srcroot)src/ticker.c \
-	$(srcroot)src/time.c $(srcroot)src/tsd.c $(srcroot)src/util.c
+	$(srcroot)src/prng.c $(srcroot)src/prof.c $(srcroot)src/quarantine.c \
+	$(srcroot)src/rtree.c $(srcroot)src/stats.c $(srcroot)src/tcache.c \
+	$(srcroot)src/ticker.c $(srcroot)src/time.c $(srcroot)src/tsd.c \
+	$(srcroot)src/util.c
 ifeq ($(enable_valgrind), 1)
 C_SRCS += $(srcroot)src/valgrind.c
 endif
@@ -129,6 +130,7 @@ TESTS_UNIT := $(srcroot)test/unit/atomic.c \
 	$(srcroot)test/unit/math.c \
 	$(srcroot)test/unit/mq.c \
 	$(srcroot)test/unit/mtx.c \
+	$(srcroot)test/unit/prng.c \
 	$(srcroot)test/unit/prof_accum.c \
 	$(srcroot)test/unit/prof_active.c \
 	$(srcroot)test/unit/prof_gdump.c \
diff --git a/include/jemalloc/internal/ckh.h b/include/jemalloc/internal/ckh.h
index 75c1c979..45fb3455 100644
--- a/include/jemalloc/internal/ckh.h
+++ b/include/jemalloc/internal/ckh.h
@@ -40,9 +40,7 @@ struct ckh_s {
 #endif
 
 	/* Used for pseudo-random number generation. */
-#define	CKH_A		1103515241
-#define	CKH_C		12347
-	uint32_t	prng_state;
+	uint64_t	prng_state;
 
 	/* Total number of items. */
 	size_t		count;
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 12d51be2..616eb9f3 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -547,7 +547,7 @@ size2index_compute(size_t size)
 #if (NTBINS != 0)
 	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
 		size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
-		size_t lg_ceil = lg_floor(pow2_ceil(size));
+		size_t lg_ceil = lg_floor(pow2_ceil_zu(size));
 		return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin);
 	}
 #endif
@@ -644,7 +644,7 @@ s2u_compute(size_t size)
 #if (NTBINS > 0)
 	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
 		size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
-		size_t lg_ceil = lg_floor(pow2_ceil(size));
+		size_t lg_ceil = lg_floor(pow2_ceil_zu(size));
 		return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) :
 		    (ZU(1) << lg_ceil));
 	}
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 216367e5..d910202d 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -348,7 +348,9 @@ pages_map
 pages_purge
 pages_trim
 pages_unmap
-pow2_ceil
+pow2_ceil_u32
+pow2_ceil_u64
+pow2_ceil_zu
 prof_active_get
 prof_active_get_unlocked
 prof_active_set
diff --git a/include/jemalloc/internal/prng.h b/include/jemalloc/internal/prng.h
index 216d0ef4..83c90906 100644
--- a/include/jemalloc/internal/prng.h
+++ b/include/jemalloc/internal/prng.h
@@ -18,31 +18,9 @@
  * proportional to bit position.  For example, the lowest bit has a cycle of 2,
  * the next has a cycle of 4, etc.  For this reason, we prefer to use the upper
  * bits.
- *
- * Macro parameters:
- *   uint32_t r          : Result.
- *   unsigned lg_range   : (0..32], number of least significant bits to return.
- *   uint32_t state      : Seed value.
- *   const uint32_t a, c : See above discussion.
  */
-#define	prng32(r, lg_range, state, a, c) do {				\
-	assert((lg_range) > 0);						\
-	assert((lg_range) <= 32);					\
-									\
-	r = (state * (a)) + (c);					\
-	state = r;							\
-	r >>= (32 - (lg_range));					\
-} while (false)
-
-/* Same as prng32(), but 64 bits of pseudo-randomness, using uint64_t. */
-#define	prng64(r, lg_range, state, a, c) do {				\
-	assert((lg_range) > 0);						\
-	assert((lg_range) <= 64);					\
-									\
-	r = (state * (a)) + (c);					\
-	state = r;							\
-	r >>= (64 - (lg_range));					\
-} while (false)
+#define	PRNG_A	UINT64_C(6364136223846793005)
+#define	PRNG_C	UINT64_C(1442695040888963407)
 
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
@@ -56,5 +34,46 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
+#ifndef JEMALLOC_ENABLE_INLINE
+uint64_t	prng_lg_range(uint64_t *state, unsigned lg_range);
+uint64_t	prng_range(uint64_t *state, uint64_t range);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PRNG_C_))
+JEMALLOC_ALWAYS_INLINE uint64_t
+prng_lg_range(uint64_t *state, unsigned lg_range)
+{
+	uint64_t ret;
+
+	assert(lg_range > 0);
+	assert(lg_range <= 64);
+
+	ret = (*state * PRNG_A) + PRNG_C;
+	*state = ret;
+	ret >>= (64 - lg_range);
+
+	return (ret);
+}
+
+JEMALLOC_ALWAYS_INLINE uint64_t
+prng_range(uint64_t *state, uint64_t range)
+{
+	uint64_t ret;
+	unsigned lg_range;
+
+	assert(range > 1);
+
+	/* Compute the ceiling of lg(range). */
+	lg_range = jemalloc_ffsl(pow2_ceil_u64(range)) - 1;
+
+	/* Generate a result in [0..range) via repeated trial. */
+	do {
+		ret = prng_lg_range(state, lg_range);
+	} while (ret >= range);
+
+	return (ret);
+}
+#endif
+
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index 0bccea24..dfe5c93c 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -123,7 +123,9 @@ void	malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
 #ifndef JEMALLOC_ENABLE_INLINE
 int	jemalloc_ffsl(long bitmap);
 int	jemalloc_ffs(int bitmap);
-size_t	pow2_ceil(size_t x);
+uint64_t	pow2_ceil_u64(uint64_t x);
+uint32_t	pow2_ceil_u32(uint32_t x);
+size_t	pow2_ceil_zu(size_t x);
 size_t	lg_floor(size_t x);
 void	set_errno(int errnum);
 int	get_errno(void);
@@ -150,9 +152,8 @@ jemalloc_ffs(int bitmap)
 	return (JEMALLOC_INTERNAL_FFS(bitmap));
 }
 
-/* Compute the smallest power of 2 that is >= x. */
-JEMALLOC_INLINE size_t
-pow2_ceil(size_t x)
+JEMALLOC_INLINE uint64_t
+pow2_ceil_u64(uint64_t x)
 {
 
 	x--;
@@ -161,13 +162,37 @@ pow2_ceil(size_t x)
 	x |= x >> 4;
 	x |= x >> 8;
 	x |= x >> 16;
-#if (LG_SIZEOF_PTR == 3)
 	x |= x >> 32;
-#endif
 	x++;
 	return (x);
 }
 
+JEMALLOC_INLINE uint32_t
+pow2_ceil_u32(uint32_t x)
+{
+
+	x--;
+	x |= x >> 1;
+	x |= x >> 2;
+	x |= x >> 4;
+	x |= x >> 8;
+	x |= x >> 16;
+	x++;
+	return (x);
+}
+
+/* Compute the smallest power of 2 that is >= x. */
+JEMALLOC_INLINE size_t
+pow2_ceil_zu(size_t x)
+{
+
+#if (LG_SIZEOF_PTR == 3)
+	return (pow2_ceil_u64(x));
+#else
+	return (pow2_ceil_u32(x));
+#endif
+}
+
 #if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
 JEMALLOC_INLINE size_t
 lg_floor(size_t x)
diff --git a/src/arena.c b/src/arena.c
index 143afb9a..aa787f99 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2196,9 +2196,7 @@ arena_malloc_large(arena_t *arena, size_t size, szind_t binind, bool zero)
 		 * that is a multiple of the cacheline size, e.g. [0 .. 63) * 64
 		 * for 4 KiB pages and 64-byte cachelines.
 		 */
-		prng64(r, LG_PAGE - LG_CACHELINE, arena->offset_state,
-		    UINT64_C(6364136223846793009),
-		    UINT64_C(1442695040888963409));
+		r = prng_lg_range(&arena->offset_state, LG_PAGE - LG_CACHELINE);
 		random_offset = ((uintptr_t)r) << LG_CACHELINE;
 	} else
 		random_offset = 0;
diff --git a/src/ckh.c b/src/ckh.c
index e4328d22..08fc433d 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -99,7 +99,7 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
 	 * Cycle through the cells in the bucket, starting at a random position.
 	 * The randomness avoids worst-case search overhead as buckets fill up.
 	 */
-	prng32(offset, LG_CKH_BUCKET_CELLS, ckh->prng_state, CKH_A, CKH_C);
+	offset = prng_lg_range(&ckh->prng_state, LG_CKH_BUCKET_CELLS);
 	for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
 		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) +
 		    ((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))];
@@ -141,7 +141,7 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
 		 * were an item for which both hashes indicated the same
 		 * bucket.
 		 */
-		prng32(i, LG_CKH_BUCKET_CELLS, ckh->prng_state, CKH_A, CKH_C);
+		i = prng_lg_range(&ckh->prng_state, LG_CKH_BUCKET_CELLS);
 		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
 		assert(cell->key != NULL);
 
diff --git a/src/prng.c b/src/prng.c
new file mode 100644
index 00000000..76646a2a
--- /dev/null
+++ b/src/prng.c
@@ -0,0 +1,2 @@
+#define	JEMALLOC_PRNG_C_
+#include "jemalloc/internal/jemalloc_internal.h"
diff --git a/src/prof.c b/src/prof.c
index 199e63e4..31f5e601 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -871,8 +871,7 @@ prof_sample_threshold_update(prof_tdata_t *tdata)
 	 *   pp 500
 	 *   (http://luc.devroye.org/rnbookindex.html)
 	 */
-	prng64(r, 53, tdata->prng_state, UINT64_C(6364136223846793005),
-	    UINT64_C(1442695040888963407));
+	r = prng_lg_range(&tdata->prng_state, 53);
 	u = (double)r * (1.0/9007199254740992.0L);
 	tdata->bytes_until_sample = (uint64_t)(log(u) /
 	    log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
diff --git a/test/unit/prng.c b/test/unit/prng.c
new file mode 100644
index 00000000..b22bd2f5
--- /dev/null
+++ b/test/unit/prng.c
@@ -0,0 +1,68 @@
+#include "test/jemalloc_test.h"
+
+TEST_BEGIN(test_prng_lg_range)
+{
+	uint64_t sa, sb, ra, rb;
+	unsigned lg_range;
+
+	sa = 42;
+	ra = prng_lg_range(&sa, 64);
+	sa = 42;
+	rb = prng_lg_range(&sa, 64);
+	assert_u64_eq(ra, rb,
+	    "Repeated generation should produce repeated results");
+
+	sb = 42;
+	rb = prng_lg_range(&sb, 64);
+	assert_u64_eq(ra, rb,
+	    "Equivalent generation should produce equivalent results");
+
+	sa = 42;
+	ra = prng_lg_range(&sa, 64);
+	rb = prng_lg_range(&sa, 64);
+	assert_u64_ne(ra, rb,
+	    "Full-width results must not immediately repeat");
+
+	sa = 42;
+	ra = prng_lg_range(&sa, 64);
+	for (lg_range = 63; lg_range > 0; lg_range--) {
+		sb = 42;
+		rb = prng_lg_range(&sb, lg_range);
+		assert_u64_eq((rb & (UINT64_C(0xffffffffffffffff) << lg_range)),
+		    0, "High order bits should be 0, lg_range=%u", lg_range);
+		assert_u64_eq(rb, (ra >> (64 - lg_range)),
+		    "Expected high order bits of full-width result, "
+		    "lg_range=%u", lg_range);
+	}
+}
+TEST_END
+
+TEST_BEGIN(test_prng_range)
+{
+	uint64_t range;
+#define	MAX_RANGE	10000000
+#define	RANGE_STEP	97
+#define	NREPS		10
+
+	for (range = 2; range < MAX_RANGE; range += RANGE_STEP) {
+		uint64_t s;
+		unsigned rep;
+
+		s = range;
+		for (rep = 0; rep < NREPS; rep++) {
+			uint64_t r = prng_range(&s, range);
+
+			assert_u64_lt(r, range, "Out of range");
+		}
+	}
+}
+TEST_END
+
+int
+main(void)
+{
+
+	return (test(
+	    test_prng_lg_range,
+	    test_prng_range));
+}
diff --git a/test/unit/util.c b/test/unit/util.c
index 8ab39a45..2f65aad2 100644
--- a/test/unit/util.c
+++ b/test/unit/util.c
@@ -1,33 +1,54 @@
 #include "test/jemalloc_test.h"
 
-TEST_BEGIN(test_pow2_ceil)
+#define	TEST_POW2_CEIL(t, suf, pri) do {				\
+	unsigned i, pow2;						\
+	t x;								\
+									\
+	assert_zu_eq(pow2_ceil_##suf(0), 0, "Unexpected result");	\
+									\
+	for (i = 0; i < sizeof(t) * 8; i++) {				\
+		assert_zu_eq(pow2_ceil_##suf(((t)1) << i), ((t)1) << i,	\
+		    "Unexpected result");				\
+	}								\
+									\
+	for (i = 2; i < sizeof(t) * 8; i++) {				\
+		assert_zu_eq(pow2_ceil_##suf((((t)1) << i) - 1),	\
+		    ((t)1) << i, "Unexpected result");			\
+	}								\
+									\
+	for (i = 0; i < sizeof(t) * 8 - 1; i++) {			\
+		assert_zu_eq(pow2_ceil_##suf((((t)1) << i) + 1),	\
+		    ((t)1) << (i+1), "Unexpected result");		\
+	}								\
+									\
+	for (pow2 = 1; pow2 < 25; pow2++) {				\
+		for (x = (((t)1) << (pow2-1)) + 1; x <= ((t)1) << pow2;	\
+		    x++) {						\
+			assert_zu_eq(pow2_ceil_##suf(x),		\
+			    ((t)1) << pow2,				\
+			    "Unexpected result, x=%"pri, x);		\
+		}							\
+	}								\
+} while (0)
+
+TEST_BEGIN(test_pow2_ceil_u64)
 {
-	unsigned i, pow2;
-	size_t x;
 
-	assert_zu_eq(pow2_ceil(0), 0, "Unexpected result");
+	TEST_POW2_CEIL(uint64_t, u64, FMTu64);
+}
+TEST_END
 
-	for (i = 0; i < sizeof(size_t) * 8; i++) {
-		assert_zu_eq(pow2_ceil(ZU(1) << i), ZU(1) << i,
-		    "Unexpected result");
-	}
+TEST_BEGIN(test_pow2_ceil_u32)
+{
 
-	for (i = 2; i < sizeof(size_t) * 8; i++) {
-		assert_zu_eq(pow2_ceil((ZU(1) << i) - 1), ZU(1) << i,
-		    "Unexpected result");
-	}
+	TEST_POW2_CEIL(uint32_t, u32, FMTu32);
+}
+TEST_END
 
-	for (i = 0; i < sizeof(size_t) * 8 - 1; i++) {
-		assert_zu_eq(pow2_ceil((ZU(1) << i) + 1), ZU(1) << (i+1),
-		    "Unexpected result");
-	}
+TEST_BEGIN(test_pow2_ceil_zu)
+{
 
-	for (pow2 = 1; pow2 < 25; pow2++) {
-		for (x = (ZU(1) << (pow2-1)) + 1; x <= ZU(1) << pow2; x++) {
-			assert_zu_eq(pow2_ceil(x), ZU(1) << pow2,
-			    "Unexpected result, x=%zu", x);
-		}
-	}
+	TEST_POW2_CEIL(size_t, zu, "zu");
 }
 TEST_END
 
@@ -286,7 +307,9 @@ main(void)
 {
 
 	return (test(
-	    test_pow2_ceil,
+	    test_pow2_ceil_u64,
+	    test_pow2_ceil_u32,
+	    test_pow2_ceil_zu,
 	    test_malloc_strtoumax_no_endptr,
 	    test_malloc_strtoumax,
 	    test_malloc_snprintf_truncated,

From 578cd165812a11cd7250bfe5051cddc30ffec6e5 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 19 Feb 2016 18:40:03 -0800
Subject: [PATCH 0082/2608] Refactor arena_malloc_hard() out of arena_malloc().

---
 include/jemalloc/internal/arena.h             | 22 ++++++-------------
 include/jemalloc/internal/private_symbols.txt |  2 +-
 src/arena.c                                   | 18 ++++++++++++++-
 3 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index b6824896..24c4c1d1 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -461,10 +461,10 @@ extern arena_dalloc_junk_small_t *arena_dalloc_junk_small;
 void	arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info);
 #endif
 void	arena_quarantine_junk_small(void *ptr, size_t usize);
-void	*arena_malloc_small(arena_t *arena, size_t size, szind_t ind,
-    bool zero);
-void	*arena_malloc_large(arena_t *arena, size_t size, szind_t ind,
-    bool zero);
+void	*arena_malloc_large(arena_t *arena, size_t size,
+    szind_t ind, bool zero);
+void	*arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
+    bool zero, tcache_t *tcache);
 void	*arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache);
 void	arena_prof_promoted(const void *ptr, size_t size);
@@ -1160,8 +1160,8 @@ arena_prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr,
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
-    bool zero, tcache_t *tcache, bool slow_path)
+arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind, bool zero,
+    tcache_t *tcache, bool slow_path)
 {
 
 	assert(size != 0);
@@ -1179,15 +1179,7 @@ arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
 		assert(size > tcache_maxclass);
 	}
 
-	arena = arena_choose(tsd, arena);
-	if (unlikely(arena == NULL))
-		return (NULL);
-
-	if (likely(size <= SMALL_MAXCLASS))
-		return (arena_malloc_small(arena, size, ind, zero));
-	if (likely(size <= large_maxclass))
-		return (arena_malloc_large(arena, size, ind, zero));
-	return (huge_malloc(tsd, arena, size, zero, tcache));
+	return (arena_malloc_hard(tsd, arena, size, ind, zero, tcache));
 }
 
 JEMALLOC_ALWAYS_INLINE arena_t *
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index d910202d..87b5a919 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -35,8 +35,8 @@ arena_lg_dirty_mult_default_set
 arena_lg_dirty_mult_get
 arena_lg_dirty_mult_set
 arena_malloc
+arena_malloc_hard
 arena_malloc_large
-arena_malloc_small
 arena_mapbits_allocated_get
 arena_mapbits_binind_get
 arena_mapbits_decommitted_get
diff --git a/src/arena.c b/src/arena.c
index aa787f99..b452df62 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2123,7 +2123,7 @@ arena_quarantine_junk_small(void *ptr, size_t usize)
 	arena_redzones_validate(ptr, bin_info, true);
 }
 
-void *
+static void *
 arena_malloc_small(arena_t *arena, size_t size, szind_t binind, bool zero)
 {
 	void *ret;
@@ -2236,6 +2236,22 @@ arena_malloc_large(arena_t *arena, size_t size, szind_t binind, bool zero)
 	return (ret);
 }
 
+void *
+arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
+    bool zero, tcache_t *tcache)
+{
+
+	arena = arena_choose(tsd, arena);
+	if (unlikely(arena == NULL))
+		return (NULL);
+
+	if (likely(size <= SMALL_MAXCLASS))
+		return (arena_malloc_small(arena, size, ind, zero));
+	if (likely(size <= large_maxclass))
+		return (arena_malloc_large(arena, size, ind, zero));
+	return (huge_malloc(tsd, arena, size, zero, tcache));
+}
+
 /* Only handles large allocations that require more than page alignment. */
 static void *
 arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,

From 4985dc681e2e44f9d43c902647371790acac3ad4 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 19 Feb 2016 19:24:58 -0800
Subject: [PATCH 0083/2608] Refactor arena_ralloc_no_move().

Refactor early return logic in arena_ralloc_no_move() to return early on
failure rather than on success.
---
 src/arena.c | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index b452df62..68220d7c 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2810,20 +2810,19 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra,
 		if (oldsize <= SMALL_MAXCLASS) {
 			assert(arena_bin_info[size2index(oldsize)].reg_size ==
 			    oldsize);
-			if ((usize_max <= SMALL_MAXCLASS &&
-			    size2index(usize_max) == size2index(oldsize)) ||
-			    (size <= oldsize && usize_max >= oldsize))
-				return (false);
+			if ((usize_max > SMALL_MAXCLASS ||
+			    size2index(usize_max) != size2index(oldsize)) &&
+			    (size > oldsize || usize_max < oldsize))
+				return (true);
 		} else {
-			if (usize_max > SMALL_MAXCLASS) {
-				if (!arena_ralloc_large(ptr, oldsize, usize_min,
-				    usize_max, zero))
-					return (false);
-			}
+			if (usize_max <= SMALL_MAXCLASS)
+				return (true);
+			if (arena_ralloc_large(ptr, oldsize, usize_min,
+			    usize_max, zero))
+				return (true);
 		}
 
-		/* Reallocation would require a move. */
-		return (true);
+		return (false);
 	} else {
 		return (huge_ralloc_no_move(ptr, oldsize, usize_min, usize_max,
 		    zero));

From db927b672748994bef0df6b5f9e94fe6c1d40d02 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 19 Feb 2016 19:37:10 -0800
Subject: [PATCH 0084/2608] Refactor arenas_cache tsd.

Refactor arenas_cache tsd into arenas_tdata, which is a structure of
type arena_tdata_t.
---
 include/jemalloc/internal/arena.h             |   6 +
 .../jemalloc/internal/jemalloc_internal.h.in  |  61 +++++---
 include/jemalloc/internal/private_symbols.txt |   8 +-
 include/jemalloc/internal/tsd.h               |   6 +-
 src/jemalloc.c                                | 143 ++++++++++--------
 src/tsd.c                                     |   4 +-
 6 files changed, 139 insertions(+), 89 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 24c4c1d1..2750c008 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -31,6 +31,7 @@ typedef struct arena_chunk_s arena_chunk_t;
 typedef struct arena_bin_info_s arena_bin_info_t;
 typedef struct arena_bin_s arena_bin_t;
 typedef struct arena_s arena_t;
+typedef struct arena_tdata_s arena_tdata_t;
 
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
@@ -403,6 +404,11 @@ struct arena_s {
 	/* bins is used to store trees of free regions. */
 	arena_bin_t		bins[NBINS];
 };
+
+/* Used in conjunction with tsd for fast arena-related context lookup. */
+struct arena_tdata_s {
+	arena_t			*arena;
+};
 #endif /* JEMALLOC_ARENA_STRUCTS_B */
 
 #endif /* JEMALLOC_H_STRUCTS */
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 616eb9f3..760dbdda 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -459,16 +459,18 @@ void	bootstrap_free(void *ptr);
 arena_t	*arenas_extend(unsigned ind);
 arena_t	*arena_init(unsigned ind);
 unsigned	narenas_total_get(void);
-arena_t	*arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing);
+arena_tdata_t	*arena_tdata_get_hard(tsd_t *tsd, unsigned ind);
+arena_t	*arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing,
+    arena_tdata_t *tdata);
 arena_t	*arena_choose_hard(tsd_t *tsd);
 void	arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind);
 unsigned	arena_nbound(unsigned ind);
 void	thread_allocated_cleanup(tsd_t *tsd);
 void	thread_deallocated_cleanup(tsd_t *tsd);
 void	arena_cleanup(tsd_t *tsd);
-void	arenas_cache_cleanup(tsd_t *tsd);
-void	narenas_cache_cleanup(tsd_t *tsd);
-void	arenas_cache_bypass_cleanup(tsd_t *tsd);
+void	arenas_tdata_cleanup(tsd_t *tsd);
+void	narenas_tdata_cleanup(tsd_t *tsd);
+void	arenas_tdata_bypass_cleanup(tsd_t *tsd);
 void	jemalloc_prefork(void);
 void	jemalloc_postfork_parent(void);
 void	jemalloc_postfork_child(void);
@@ -535,6 +537,8 @@ size_t	s2u_lookup(size_t size);
 size_t	s2u(size_t size);
 size_t	sa2u(size_t size, size_t alignment);
 arena_t	*arena_choose(tsd_t *tsd, arena_t *arena);
+arena_tdata_t	*arena_tdata_get(tsd_t *tsd, unsigned ind,
+    bool refresh_if_missing);
 arena_t	*arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing,
     bool refresh_if_missing);
 #endif
@@ -785,32 +789,45 @@ arena_choose(tsd_t *tsd, arena_t *arena)
 	return (ret);
 }
 
+JEMALLOC_INLINE arena_tdata_t *
+arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing)
+{
+	arena_tdata_t *tdata;
+	arena_tdata_t *arenas_tdata = tsd_arenas_tdata_get(tsd);
+
+	if (unlikely(arenas_tdata == NULL)) {
+		/* arenas_tdata hasn't been initialized yet. */
+		return (arena_tdata_get_hard(tsd, ind));
+	}
+	if (unlikely(ind >= tsd_narenas_tdata_get(tsd))) {
+		/*
+		 * ind is invalid, cache is old (too small), or tdata to be
+		 * initialized.
+		 */
+		return (refresh_if_missing ? arena_tdata_get_hard(tsd, ind) :
+		    NULL);
+	}
+
+	tdata = &arenas_tdata[ind];
+	if (likely(tdata != NULL) || !refresh_if_missing)
+		return (tdata);
+	return (arena_tdata_get_hard(tsd, ind));
+}
+
 JEMALLOC_INLINE arena_t *
 arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing,
     bool refresh_if_missing)
 {
-	arena_t *arena;
-	arena_t **arenas_cache = tsd_arenas_cache_get(tsd);
+	arena_tdata_t *tdata;
 
 	/* init_if_missing requires refresh_if_missing. */
 	assert(!init_if_missing || refresh_if_missing);
 
-	if (unlikely(arenas_cache == NULL)) {
-		/* arenas_cache hasn't been initialized yet. */
-		return (arena_get_hard(tsd, ind, init_if_missing));
-	}
-	if (unlikely(ind >= tsd_narenas_cache_get(tsd))) {
-		/*
-		 * ind is invalid, cache is old (too small), or arena to be
-		 * initialized.
-		 */
-		return (refresh_if_missing ? arena_get_hard(tsd, ind,
-		    init_if_missing) : NULL);
-	}
-	arena = arenas_cache[ind];
-	if (likely(arena != NULL) || !refresh_if_missing)
-		return (arena);
-	return (arena_get_hard(tsd, ind, init_if_missing));
+	tdata = arena_tdata_get(tsd, ind, refresh_if_missing);
+	if (unlikely(tdata == NULL || tdata->arena == NULL))
+		return (arena_get_hard(tsd, ind, init_if_missing, tdata));
+
+	return (tdata->arena);
 }
 #endif
 
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 87b5a919..a0e6d8ab 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -93,11 +93,13 @@ arena_redzone_corruption
 arena_run_regind
 arena_run_to_miscelm
 arena_salloc
-arenas_cache_bypass_cleanup
-arenas_cache_cleanup
+arenas_tdata_bypass_cleanup
+arenas_tdata_cleanup
 arena_sdalloc
 arena_stats_merge
 arena_tcache_fill_small
+arena_tdata_get
+arena_tdata_get_hard
 atomic_add_p
 atomic_add_u
 atomic_add_uint32
@@ -311,7 +313,7 @@ map_bias
 map_misc_offset
 mb_write
 mutex_boot
-narenas_cache_cleanup
+narenas_tdata_cleanup
 narenas_total_get
 ncpus
 nhbins
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index eed7aa01..16cc2f17 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -537,9 +537,9 @@ struct tsd_init_head_s {
     O(thread_deallocated,	uint64_t)				\
     O(prof_tdata,		prof_tdata_t *)				\
     O(arena,			arena_t *)				\
-    O(arenas_cache,		arena_t **)				\
-    O(narenas_cache,		unsigned)				\
-    O(arenas_cache_bypass,	bool)					\
+    O(arenas_tdata,		arena_tdata_t *)			\
+    O(narenas_tdata,		unsigned)				\
+    O(arenas_tdata_bypass,	bool)					\
     O(tcache_enabled,		tcache_enabled_t)			\
     O(quarantine,		quarantine_t *)				\
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 8415c0e2..d2b2afce 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -516,74 +516,99 @@ arena_unbind(tsd_t *tsd, unsigned ind)
 	tsd_arena_set(tsd, NULL);
 }
 
-arena_t *
-arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing)
+arena_tdata_t *
+arena_tdata_get_hard(tsd_t *tsd, unsigned ind)
 {
-	arena_t *arena;
-	arena_t **arenas_cache = tsd_arenas_cache_get(tsd);
-	unsigned narenas_cache = tsd_narenas_cache_get(tsd);
+	arena_tdata_t *tdata, *arenas_tdata_old;
+	arena_tdata_t *arenas_tdata = tsd_arenas_tdata_get(tsd);
+	unsigned narenas_tdata_old, i;
+	unsigned narenas_tdata = tsd_narenas_tdata_get(tsd);
 	unsigned narenas_actual = narenas_total_get();
 
-	/* Deallocate old cache if it's too small. */
-	if (arenas_cache != NULL && narenas_cache < narenas_actual) {
-		a0dalloc(arenas_cache);
-		arenas_cache = NULL;
-		narenas_cache = 0;
-		tsd_arenas_cache_set(tsd, arenas_cache);
-		tsd_narenas_cache_set(tsd, narenas_cache);
+	/*
+	 * Dissociate old tdata array (and set up for deallocation upon return)
+	 * if it's too small.
+	 */
+	if (arenas_tdata != NULL && narenas_tdata < narenas_actual) {
+		arenas_tdata_old = arenas_tdata;
+		narenas_tdata_old = narenas_tdata;
+		arenas_tdata = NULL;
+		narenas_tdata = 0;
+		tsd_arenas_tdata_set(tsd, arenas_tdata);
+		tsd_narenas_tdata_set(tsd, narenas_tdata);
+	} else {
+		arenas_tdata_old = NULL;
+		narenas_tdata_old = 0;
 	}
 
-	/* Allocate cache if it's missing. */
-	if (arenas_cache == NULL) {
-		bool *arenas_cache_bypassp = tsd_arenas_cache_bypassp_get(tsd);
-		assert(ind < narenas_actual || !init_if_missing);
-		narenas_cache = (ind < narenas_actual) ? narenas_actual : ind+1;
+	/* Allocate tdata array if it's missing. */
+	if (arenas_tdata == NULL) {
+		bool *arenas_tdata_bypassp = tsd_arenas_tdata_bypassp_get(tsd);
+		narenas_tdata = (ind < narenas_actual) ? narenas_actual : ind+1;
 
-		if (tsd_nominal(tsd) && !*arenas_cache_bypassp) {
-			*arenas_cache_bypassp = true;
-			arenas_cache = (arena_t **)a0malloc(sizeof(arena_t *) *
-			    narenas_cache);
-			*arenas_cache_bypassp = false;
+		if (tsd_nominal(tsd) && !*arenas_tdata_bypassp) {
+			*arenas_tdata_bypassp = true;
+			arenas_tdata = (arena_tdata_t *)a0malloc(
+			    sizeof(arena_tdata_t) * narenas_tdata);
+			*arenas_tdata_bypassp = false;
 		}
-		if (arenas_cache == NULL) {
-			/*
-			 * This function must always tell the truth, even if
-			 * it's slow, so don't let OOM, thread cleanup (note
-			 * tsd_nominal check), nor recursive allocation
-			 * avoidance (note arenas_cache_bypass check) get in the
-			 * way.
-			 */
-			if (ind >= narenas_actual)
-				return (NULL);
-			malloc_mutex_lock(&arenas_lock);
-			arena = arenas[ind];
-			malloc_mutex_unlock(&arenas_lock);
-			return (arena);
+		if (arenas_tdata == NULL) {
+			tdata = NULL;
+			goto label_return;
 		}
-		assert(tsd_nominal(tsd) && !*arenas_cache_bypassp);
-		tsd_arenas_cache_set(tsd, arenas_cache);
-		tsd_narenas_cache_set(tsd, narenas_cache);
+		assert(tsd_nominal(tsd) && !*arenas_tdata_bypassp);
+		tsd_arenas_tdata_set(tsd, arenas_tdata);
+		tsd_narenas_tdata_set(tsd, narenas_tdata);
 	}
 
 	/*
-	 * Copy to cache.  It's possible that the actual number of arenas has
-	 * increased since narenas_total_get() was called above, but that causes
-	 * no correctness issues unless two threads concurrently execute the
-	 * arenas.extend mallctl, which we trust mallctl synchronization to
+	 * Copy to tdata array.  It's possible that the actual number of arenas
+	 * has increased since narenas_total_get() was called above, but that
+	 * causes no correctness issues unless two threads concurrently execute
+	 * the arenas.extend mallctl, which we trust mallctl synchronization to
 	 * prevent.
 	 */
 	malloc_mutex_lock(&arenas_lock);
-	memcpy(arenas_cache, arenas, sizeof(arena_t *) * narenas_actual);
+	for (i = 0; i < narenas_actual; i++)
+		arenas_tdata[i].arena = arenas[i];
 	malloc_mutex_unlock(&arenas_lock);
-	if (narenas_cache > narenas_actual) {
-		memset(&arenas_cache[narenas_actual], 0, sizeof(arena_t *) *
-		    (narenas_cache - narenas_actual));
+	if (narenas_tdata > narenas_actual) {
+		memset(&arenas_tdata[narenas_actual], 0, sizeof(arena_tdata_t)
+		    * (narenas_tdata - narenas_actual));
 	}
 
-	/* Read the refreshed cache, and init the arena if necessary. */
-	arena = arenas_cache[ind];
-	if (init_if_missing && arena == NULL)
-		arena = arenas_cache[ind] = arena_init(ind);
+	/* Read the refreshed tdata array. */
+	tdata = &arenas_tdata[ind];
+label_return:
+	if (arenas_tdata_old != NULL)
+		a0dalloc(arenas_tdata_old);
+	return (tdata);
+}
+
+arena_t *
+arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing,
+    arena_tdata_t *tdata)
+{
+	arena_t *arena;
+	unsigned narenas_actual;
+
+	if (init_if_missing && tdata != NULL) {
+		tdata->arena = arena_init(ind);
+		if (tdata->arena != NULL)
+			return (tdata->arena);
+	}
+
+	/*
+	 * This function must always tell the truth, even if it's slow, so don't
+	 * let OOM, thread cleanup (note tsd_nominal check), nor recursive
+	 * allocation avoidance (note arenas_tdata_bypass check) get in the way.
+	 */
+	narenas_actual = narenas_total_get();
+	if (ind >= narenas_actual)
+		return (NULL);
+	malloc_mutex_lock(&arenas_lock);
+	arena = arenas[ind];
+	malloc_mutex_unlock(&arenas_lock);
 	return (arena);
 }
 
@@ -674,26 +699,26 @@ arena_cleanup(tsd_t *tsd)
 }
 
 void
-arenas_cache_cleanup(tsd_t *tsd)
+arenas_tdata_cleanup(tsd_t *tsd)
 {
-	arena_t **arenas_cache;
+	arena_tdata_t *arenas_tdata;
 
-	arenas_cache = tsd_arenas_cache_get(tsd);
-	if (arenas_cache != NULL) {
-		tsd_arenas_cache_set(tsd, NULL);
-		a0dalloc(arenas_cache);
+	arenas_tdata = tsd_arenas_tdata_get(tsd);
+	if (arenas_tdata != NULL) {
+		tsd_arenas_tdata_set(tsd, NULL);
+		a0dalloc(arenas_tdata);
 	}
 }
 
 void
-narenas_cache_cleanup(tsd_t *tsd)
+narenas_tdata_cleanup(tsd_t *tsd)
 {
 
 	/* Do nothing. */
 }
 
 void
-arenas_cache_bypass_cleanup(tsd_t *tsd)
+arenas_tdata_bypass_cleanup(tsd_t *tsd)
 {
 
 	/* Do nothing. */
diff --git a/src/tsd.c b/src/tsd.c
index 9ffe9afe..b85b8b9d 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -113,7 +113,7 @@ malloc_tsd_boot0(void)
 	ncleanups = 0;
 	if (tsd_boot0())
 		return (true);
-	*tsd_arenas_cache_bypassp_get(tsd_fetch()) = true;
+	*tsd_arenas_tdata_bypassp_get(tsd_fetch()) = true;
 	return (false);
 }
 
@@ -122,7 +122,7 @@ malloc_tsd_boot1(void)
 {
 
 	tsd_boot1();
-	*tsd_arenas_cache_bypassp_get(tsd_fetch()) = false;
+	*tsd_arenas_tdata_bypassp_get(tsd_fetch()) = false;
 }
 
 #ifdef _WIN32

From 1a4ad3c0fab470c9a720a40c4433532d98bd9adc Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 19 Feb 2016 19:51:23 -0800
Subject: [PATCH 0085/2608] Refactor out arena_compute_npurge().

Refactor out arena_compute_npurge() by integrating its logic into
arena_stash_dirty() as an incremental computation.
---
 src/arena.c | 92 +++++++++++++++++++++++++----------------------------
 1 file changed, 43 insertions(+), 49 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 68220d7c..47b136b6 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -23,7 +23,7 @@ unsigned	nhclasses; /* Number of huge size classes. */
  * definition.
  */
 
-static void	arena_purge(arena_t *arena, bool all);
+static void	arena_purge_to_limit(arena_t *arena, size_t ndirty_limit);
 static void	arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty,
     bool cleaned, bool decommitted);
 static void	arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk,
@@ -1205,16 +1205,14 @@ arena_lg_dirty_mult_set(arena_t *arena, ssize_t lg_dirty_mult)
 	return (false);
 }
 
-void
-arena_maybe_purge(arena_t *arena)
+static void
+arena_maybe_purge_ratio(arena_t *arena)
 {
 
 	/* Don't purge if the option is disabled. */
 	if (arena->lg_dirty_mult < 0)
 		return;
-	/* Don't recursively purge. */
-	if (arena->purging)
-		return;
+
 	/*
 	 * Iterate, since preventing recursive purging could otherwise leave too
 	 * many dirty pages.
@@ -1229,10 +1227,21 @@ arena_maybe_purge(arena_t *arena)
 		 */
 		if (arena->ndirty <= threshold)
 			return;
-		arena_purge(arena, false);
+		arena_purge_to_limit(arena, threshold);
 	}
 }
 
+void
+arena_maybe_purge(arena_t *arena)
+{
+
+	/* Don't recursively purge. */
+	if (arena->purging)
+		return;
+
+	arena_maybe_purge_ratio(arena);
+}
+
 static size_t
 arena_dirty_count(arena_t *arena)
 {
@@ -1268,35 +1277,15 @@ arena_dirty_count(arena_t *arena)
 }
 
 static size_t
-arena_compute_npurge(arena_t *arena, bool all)
-{
-	size_t npurge;
-
-	/*
-	 * Compute the minimum number of pages that this thread should try to
-	 * purge.
-	 */
-	if (!all) {
-		size_t threshold = (arena->nactive >> arena->lg_dirty_mult);
-		threshold = threshold < chunk_npages ? chunk_npages : threshold;
-
-		npurge = arena->ndirty - threshold;
-	} else
-		npurge = arena->ndirty;
-
-	return (npurge);
-}
-
-static size_t
-arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks, bool all,
-    size_t npurge, arena_runs_dirty_link_t *purge_runs_sentinel,
+arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks,
+    size_t ndirty_limit, arena_runs_dirty_link_t *purge_runs_sentinel,
     extent_node_t *purge_chunks_sentinel)
 {
 	arena_runs_dirty_link_t *rdelm, *rdelm_next;
 	extent_node_t *chunkselm;
 	size_t nstashed = 0;
 
-	/* Stash at least npurge pages. */
+	/* Stash runs/chunks according to ndirty_limit. */
 	for (rdelm = qr_next(&arena->runs_dirty, rd_link),
 	    chunkselm = qr_next(&arena->chunks_cache, cc_link);
 	    rdelm != &arena->runs_dirty; rdelm = rdelm_next) {
@@ -1308,6 +1297,8 @@ arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks, bool all,
 			bool zero;
 			UNUSED void *chunk;
 
+			npages = extent_node_size_get(chunkselm) >> LG_PAGE;
+
 			chunkselm_next = qr_next(chunkselm, cc_link);
 			/*
 			 * Allocate.  chunkselm remains valid due to the
@@ -1322,7 +1313,8 @@ arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks, bool all,
 			assert(zero == extent_node_zeroed_get(chunkselm));
 			extent_node_dirty_insert(chunkselm, purge_runs_sentinel,
 			    purge_chunks_sentinel);
-			npages = extent_node_size_get(chunkselm) >> LG_PAGE;
+			assert(npages == (extent_node_size_get(chunkselm) >>
+			    LG_PAGE));
 			chunkselm = chunkselm_next;
 		} else {
 			arena_chunk_t *chunk =
@@ -1360,7 +1352,7 @@ arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks, bool all,
 		}
 
 		nstashed += npages;
-		if (!all && nstashed >= npurge)
+		if (arena->ndirty - nstashed <= ndirty_limit)
 			break;
 	}
 
@@ -1501,10 +1493,10 @@ arena_unstash_purged(arena_t *arena, chunk_hooks_t *chunk_hooks,
 }
 
 static void
-arena_purge(arena_t *arena, bool all)
+arena_purge_to_limit(arena_t *arena, size_t ndirty_limit)
 {
 	chunk_hooks_t chunk_hooks = chunk_hooks_get(arena);
-	size_t npurge, npurgeable, npurged;
+	size_t npurge, npurged;
 	arena_runs_dirty_link_t purge_runs_sentinel;
 	extent_node_t purge_chunks_sentinel;
 
@@ -1518,24 +1510,26 @@ arena_purge(arena_t *arena, bool all)
 		size_t ndirty = arena_dirty_count(arena);
 		assert(ndirty == arena->ndirty);
 	}
-	assert((arena->nactive >> arena->lg_dirty_mult) < arena->ndirty || all);
+	assert((arena->nactive >> arena->lg_dirty_mult) < arena->ndirty ||
+	    ndirty_limit == 0);
+
+	qr_new(&purge_runs_sentinel, rd_link);
+	extent_node_dirty_linkage_init(&purge_chunks_sentinel);
+
+	npurge = arena_stash_dirty(arena, &chunk_hooks, ndirty_limit,
+	    &purge_runs_sentinel, &purge_chunks_sentinel);
+	if (npurge == 0)
+		goto label_return;
+	npurged = arena_purge_stashed(arena, &chunk_hooks, &purge_runs_sentinel,
+	    &purge_chunks_sentinel);
+	assert(npurged == npurge);
+	arena_unstash_purged(arena, &chunk_hooks, &purge_runs_sentinel,
+	    &purge_chunks_sentinel);
 
 	if (config_stats)
 		arena->stats.npurge++;
 
-	npurge = arena_compute_npurge(arena, all);
-	qr_new(&purge_runs_sentinel, rd_link);
-	extent_node_dirty_linkage_init(&purge_chunks_sentinel);
-
-	npurgeable = arena_stash_dirty(arena, &chunk_hooks, all, npurge,
-	    &purge_runs_sentinel, &purge_chunks_sentinel);
-	assert(npurgeable >= npurge);
-	npurged = arena_purge_stashed(arena, &chunk_hooks, &purge_runs_sentinel,
-	    &purge_chunks_sentinel);
-	assert(npurged == npurgeable);
-	arena_unstash_purged(arena, &chunk_hooks, &purge_runs_sentinel,
-	    &purge_chunks_sentinel);
-
+label_return:
 	arena->purging = false;
 }
 
@@ -1544,7 +1538,7 @@ arena_purge_all(arena_t *arena)
 {
 
 	malloc_mutex_lock(&arena->lock);
-	arena_purge(arena, true);
+	arena_purge_to_limit(arena, 0);
 	malloc_mutex_unlock(&arena->lock);
 }
 

From 8e82af1166242bebd29289d2b16ce447273b427a Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Sat, 6 Feb 2016 00:46:19 -0800
Subject: [PATCH 0086/2608] Implement smoothstep table generation.

Check in a generated smootherstep table as smoothstep.h rather than
generating it at configure time, since not all systems (e.g. Windows)
have dc.
---
 Makefile.in                                   |   1 +
 .../jemalloc/internal/jemalloc_internal.h.in  |   4 +
 include/jemalloc/internal/smoothstep.h        | 246 ++++++++++++++++++
 include/jemalloc/internal/smoothstep.sh       | 115 ++++++++
 test/unit/smoothstep.c                        | 106 ++++++++
 5 files changed, 472 insertions(+)
 create mode 100644 include/jemalloc/internal/smoothstep.h
 create mode 100755 include/jemalloc/internal/smoothstep.sh
 create mode 100644 test/unit/smoothstep.c

diff --git a/Makefile.in b/Makefile.in
index 6b210fee..9530aa8e 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -144,6 +144,7 @@ TESTS_UNIT := $(srcroot)test/unit/atomic.c \
 	$(srcroot)test/unit/rtree.c \
 	$(srcroot)test/unit/SFMT.c \
 	$(srcroot)test/unit/size_classes.c \
+	$(srcroot)test/unit/smoothstep.c \
 	$(srcroot)test/unit/stats.c \
 	$(srcroot)test/unit/ticker.c \
 	$(srcroot)test/unit/time.c \
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 760dbdda..e84c4357 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -364,6 +364,7 @@ typedef unsigned szind_t;
 #include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/ckh.h"
 #include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/smoothstep.h"
 #include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/mutex.h"
@@ -394,6 +395,7 @@ typedef unsigned szind_t;
 #include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/ckh.h"
 #include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/smoothstep.h"
 #include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/mutex.h"
@@ -483,6 +485,7 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/ckh.h"
 #include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/smoothstep.h"
 #include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/mutex.h"
@@ -513,6 +516,7 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/ckh.h"
 #include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/smoothstep.h"
 #include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/mutex.h"
diff --git a/include/jemalloc/internal/smoothstep.h b/include/jemalloc/internal/smoothstep.h
new file mode 100644
index 00000000..c5333cca
--- /dev/null
+++ b/include/jemalloc/internal/smoothstep.h
@@ -0,0 +1,246 @@
+/*
+ * This file was generated by the following command:
+ *   sh smoothstep.sh smoother 200 24 3 15
+ */
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+/*
+ * This header defines a precomputed table based on the smoothstep family of
+ * sigmoidal curves (https://en.wikipedia.org/wiki/Smoothstep) that grow from 0
+ * to 1 in 0 <= x <= 1.  The table is stored as integer fixed point values so
+ * that floating point math can be avoided.
+ *
+ *                      3     2
+ *   smoothstep(x) = -2x  + 3x
+ *
+ *                       5      4      3
+ *   smootherstep(x) = 6x  - 15x  + 10x
+ *
+ *                          7      6      5      4
+ *   smootheststep(x) = -20x  + 70x  - 84x  + 35x
+ */
+
+#define	SMOOTHSTEP_VARIANT	"smoother"
+#define	SMOOTHSTEP_NSTEPS	200
+#define	SMOOTHSTEP_BFP		24
+#define	SMOOTHSTEP \
+ /* STEP(step, h,                            x,     y) */ \
+    STEP(   1, UINT64_C(0x0000000000000014), 0.005, 0.000001240643750) \
+    STEP(   2, UINT64_C(0x00000000000000a5), 0.010, 0.000009850600000) \
+    STEP(   3, UINT64_C(0x0000000000000229), 0.015, 0.000032995181250) \
+    STEP(   4, UINT64_C(0x0000000000000516), 0.020, 0.000077619200000) \
+    STEP(   5, UINT64_C(0x00000000000009dc), 0.025, 0.000150449218750) \
+    STEP(   6, UINT64_C(0x00000000000010e8), 0.030, 0.000257995800000) \
+    STEP(   7, UINT64_C(0x0000000000001aa4), 0.035, 0.000406555756250) \
+    STEP(   8, UINT64_C(0x0000000000002777), 0.040, 0.000602214400000) \
+    STEP(   9, UINT64_C(0x00000000000037c2), 0.045, 0.000850847793750) \
+    STEP(  10, UINT64_C(0x0000000000004be6), 0.050, 0.001158125000000) \
+    STEP(  11, UINT64_C(0x000000000000643c), 0.055, 0.001529510331250) \
+    STEP(  12, UINT64_C(0x000000000000811f), 0.060, 0.001970265600000) \
+    STEP(  13, UINT64_C(0x000000000000a2e2), 0.065, 0.002485452368750) \
+    STEP(  14, UINT64_C(0x000000000000c9d8), 0.070, 0.003079934200000) \
+    STEP(  15, UINT64_C(0x000000000000f64f), 0.075, 0.003758378906250) \
+    STEP(  16, UINT64_C(0x0000000000012891), 0.080, 0.004525260800000) \
+    STEP(  17, UINT64_C(0x00000000000160e7), 0.085, 0.005384862943750) \
+    STEP(  18, UINT64_C(0x0000000000019f95), 0.090, 0.006341279400000) \
+    STEP(  19, UINT64_C(0x000000000001e4dc), 0.095, 0.007398417481250) \
+    STEP(  20, UINT64_C(0x00000000000230fc), 0.100, 0.008560000000000) \
+    STEP(  21, UINT64_C(0x0000000000028430), 0.105, 0.009829567518750) \
+    STEP(  22, UINT64_C(0x000000000002deb0), 0.110, 0.011210480600000) \
+    STEP(  23, UINT64_C(0x00000000000340b1), 0.115, 0.012705922056250) \
+    STEP(  24, UINT64_C(0x000000000003aa67), 0.120, 0.014318899200000) \
+    STEP(  25, UINT64_C(0x0000000000041c00), 0.125, 0.016052246093750) \
+    STEP(  26, UINT64_C(0x00000000000495a8), 0.130, 0.017908625800000) \
+    STEP(  27, UINT64_C(0x000000000005178b), 0.135, 0.019890532631250) \
+    STEP(  28, UINT64_C(0x000000000005a1cf), 0.140, 0.022000294400000) \
+    STEP(  29, UINT64_C(0x0000000000063498), 0.145, 0.024240074668750) \
+    STEP(  30, UINT64_C(0x000000000006d009), 0.150, 0.026611875000000) \
+    STEP(  31, UINT64_C(0x000000000007743f), 0.155, 0.029117537206250) \
+    STEP(  32, UINT64_C(0x0000000000082157), 0.160, 0.031758745600000) \
+    STEP(  33, UINT64_C(0x000000000008d76b), 0.165, 0.034537029243750) \
+    STEP(  34, UINT64_C(0x0000000000099691), 0.170, 0.037453764200000) \
+    STEP(  35, UINT64_C(0x00000000000a5edf), 0.175, 0.040510175781250) \
+    STEP(  36, UINT64_C(0x00000000000b3067), 0.180, 0.043707340800000) \
+    STEP(  37, UINT64_C(0x00000000000c0b38), 0.185, 0.047046189818750) \
+    STEP(  38, UINT64_C(0x00000000000cef5e), 0.190, 0.050527509400000) \
+    STEP(  39, UINT64_C(0x00000000000ddce6), 0.195, 0.054151944356250) \
+    STEP(  40, UINT64_C(0x00000000000ed3d8), 0.200, 0.057920000000000) \
+    STEP(  41, UINT64_C(0x00000000000fd439), 0.205, 0.061832044393750) \
+    STEP(  42, UINT64_C(0x000000000010de0e), 0.210, 0.065888310600000) \
+    STEP(  43, UINT64_C(0x000000000011f158), 0.215, 0.070088898931250) \
+    STEP(  44, UINT64_C(0x0000000000130e17), 0.220, 0.074433779200000) \
+    STEP(  45, UINT64_C(0x0000000000143448), 0.225, 0.078922792968750) \
+    STEP(  46, UINT64_C(0x00000000001563e7), 0.230, 0.083555655800000) \
+    STEP(  47, UINT64_C(0x0000000000169cec), 0.235, 0.088331959506250) \
+    STEP(  48, UINT64_C(0x000000000017df4f), 0.240, 0.093251174400000) \
+    STEP(  49, UINT64_C(0x0000000000192b04), 0.245, 0.098312651543750) \
+    STEP(  50, UINT64_C(0x00000000001a8000), 0.250, 0.103515625000000) \
+    STEP(  51, UINT64_C(0x00000000001bde32), 0.255, 0.108859214081250) \
+    STEP(  52, UINT64_C(0x00000000001d458b), 0.260, 0.114342425600000) \
+    STEP(  53, UINT64_C(0x00000000001eb5f8), 0.265, 0.119964156118750) \
+    STEP(  54, UINT64_C(0x0000000000202f65), 0.270, 0.125723194200000) \
+    STEP(  55, UINT64_C(0x000000000021b1bb), 0.275, 0.131618222656250) \
+    STEP(  56, UINT64_C(0x0000000000233ce3), 0.280, 0.137647820800000) \
+    STEP(  57, UINT64_C(0x000000000024d0c3), 0.285, 0.143810466693750) \
+    STEP(  58, UINT64_C(0x0000000000266d40), 0.290, 0.150104539400000) \
+    STEP(  59, UINT64_C(0x000000000028123d), 0.295, 0.156528321231250) \
+    STEP(  60, UINT64_C(0x000000000029bf9c), 0.300, 0.163080000000000) \
+    STEP(  61, UINT64_C(0x00000000002b753d), 0.305, 0.169757671268750) \
+    STEP(  62, UINT64_C(0x00000000002d32fe), 0.310, 0.176559340600000) \
+    STEP(  63, UINT64_C(0x00000000002ef8bc), 0.315, 0.183482925806250) \
+    STEP(  64, UINT64_C(0x000000000030c654), 0.320, 0.190526259200000) \
+    STEP(  65, UINT64_C(0x0000000000329b9f), 0.325, 0.197687089843750) \
+    STEP(  66, UINT64_C(0x0000000000347875), 0.330, 0.204963085800000) \
+    STEP(  67, UINT64_C(0x0000000000365cb0), 0.335, 0.212351836381250) \
+    STEP(  68, UINT64_C(0x0000000000384825), 0.340, 0.219850854400000) \
+    STEP(  69, UINT64_C(0x00000000003a3aa8), 0.345, 0.227457578418750) \
+    STEP(  70, UINT64_C(0x00000000003c340f), 0.350, 0.235169375000000) \
+    STEP(  71, UINT64_C(0x00000000003e342b), 0.355, 0.242983540956250) \
+    STEP(  72, UINT64_C(0x0000000000403ace), 0.360, 0.250897305600000) \
+    STEP(  73, UINT64_C(0x00000000004247c8), 0.365, 0.258907832993750) \
+    STEP(  74, UINT64_C(0x0000000000445ae9), 0.370, 0.267012224200000) \
+    STEP(  75, UINT64_C(0x0000000000467400), 0.375, 0.275207519531250) \
+    STEP(  76, UINT64_C(0x00000000004892d8), 0.380, 0.283490700800000) \
+    STEP(  77, UINT64_C(0x00000000004ab740), 0.385, 0.291858693568750) \
+    STEP(  78, UINT64_C(0x00000000004ce102), 0.390, 0.300308369400000) \
+    STEP(  79, UINT64_C(0x00000000004f0fe9), 0.395, 0.308836548106250) \
+    STEP(  80, UINT64_C(0x00000000005143bf), 0.400, 0.317440000000000) \
+    STEP(  81, UINT64_C(0x0000000000537c4d), 0.405, 0.326115448143750) \
+    STEP(  82, UINT64_C(0x000000000055b95b), 0.410, 0.334859570600000) \
+    STEP(  83, UINT64_C(0x000000000057fab1), 0.415, 0.343669002681250) \
+    STEP(  84, UINT64_C(0x00000000005a4015), 0.420, 0.352540339200000) \
+    STEP(  85, UINT64_C(0x00000000005c894e), 0.425, 0.361470136718750) \
+    STEP(  86, UINT64_C(0x00000000005ed622), 0.430, 0.370454915800000) \
+    STEP(  87, UINT64_C(0x0000000000612655), 0.435, 0.379491163256250) \
+    STEP(  88, UINT64_C(0x00000000006379ac), 0.440, 0.388575334400000) \
+    STEP(  89, UINT64_C(0x000000000065cfeb), 0.445, 0.397703855293750) \
+    STEP(  90, UINT64_C(0x00000000006828d6), 0.450, 0.406873125000000) \
+    STEP(  91, UINT64_C(0x00000000006a842f), 0.455, 0.416079517831250) \
+    STEP(  92, UINT64_C(0x00000000006ce1bb), 0.460, 0.425319385600000) \
+    STEP(  93, UINT64_C(0x00000000006f413a), 0.465, 0.434589059868750) \
+    STEP(  94, UINT64_C(0x000000000071a270), 0.470, 0.443884854200000) \
+    STEP(  95, UINT64_C(0x000000000074051d), 0.475, 0.453203066406250) \
+    STEP(  96, UINT64_C(0x0000000000766905), 0.480, 0.462539980800000) \
+    STEP(  97, UINT64_C(0x000000000078cde7), 0.485, 0.471891870443750) \
+    STEP(  98, UINT64_C(0x00000000007b3387), 0.490, 0.481254999400000) \
+    STEP(  99, UINT64_C(0x00000000007d99a4), 0.495, 0.490625624981250) \
+    STEP( 100, UINT64_C(0x0000000000800000), 0.500, 0.500000000000000) \
+    STEP( 101, UINT64_C(0x000000000082665b), 0.505, 0.509374375018750) \
+    STEP( 102, UINT64_C(0x000000000084cc78), 0.510, 0.518745000600000) \
+    STEP( 103, UINT64_C(0x0000000000873218), 0.515, 0.528108129556250) \
+    STEP( 104, UINT64_C(0x00000000008996fa), 0.520, 0.537460019200000) \
+    STEP( 105, UINT64_C(0x00000000008bfae2), 0.525, 0.546796933593750) \
+    STEP( 106, UINT64_C(0x00000000008e5d8f), 0.530, 0.556115145800000) \
+    STEP( 107, UINT64_C(0x000000000090bec5), 0.535, 0.565410940131250) \
+    STEP( 108, UINT64_C(0x0000000000931e44), 0.540, 0.574680614400000) \
+    STEP( 109, UINT64_C(0x0000000000957bd0), 0.545, 0.583920482168750) \
+    STEP( 110, UINT64_C(0x000000000097d729), 0.550, 0.593126875000000) \
+    STEP( 111, UINT64_C(0x00000000009a3014), 0.555, 0.602296144706250) \
+    STEP( 112, UINT64_C(0x00000000009c8653), 0.560, 0.611424665600000) \
+    STEP( 113, UINT64_C(0x00000000009ed9aa), 0.565, 0.620508836743750) \
+    STEP( 114, UINT64_C(0x0000000000a129dd), 0.570, 0.629545084200000) \
+    STEP( 115, UINT64_C(0x0000000000a376b1), 0.575, 0.638529863281250) \
+    STEP( 116, UINT64_C(0x0000000000a5bfea), 0.580, 0.647459660800000) \
+    STEP( 117, UINT64_C(0x0000000000a8054e), 0.585, 0.656330997318750) \
+    STEP( 118, UINT64_C(0x0000000000aa46a4), 0.590, 0.665140429400000) \
+    STEP( 119, UINT64_C(0x0000000000ac83b2), 0.595, 0.673884551856250) \
+    STEP( 120, UINT64_C(0x0000000000aebc40), 0.600, 0.682560000000000) \
+    STEP( 121, UINT64_C(0x0000000000b0f016), 0.605, 0.691163451893750) \
+    STEP( 122, UINT64_C(0x0000000000b31efd), 0.610, 0.699691630600000) \
+    STEP( 123, UINT64_C(0x0000000000b548bf), 0.615, 0.708141306431250) \
+    STEP( 124, UINT64_C(0x0000000000b76d27), 0.620, 0.716509299200000) \
+    STEP( 125, UINT64_C(0x0000000000b98c00), 0.625, 0.724792480468750) \
+    STEP( 126, UINT64_C(0x0000000000bba516), 0.630, 0.732987775800000) \
+    STEP( 127, UINT64_C(0x0000000000bdb837), 0.635, 0.741092167006250) \
+    STEP( 128, UINT64_C(0x0000000000bfc531), 0.640, 0.749102694400000) \
+    STEP( 129, UINT64_C(0x0000000000c1cbd4), 0.645, 0.757016459043750) \
+    STEP( 130, UINT64_C(0x0000000000c3cbf0), 0.650, 0.764830625000000) \
+    STEP( 131, UINT64_C(0x0000000000c5c557), 0.655, 0.772542421581250) \
+    STEP( 132, UINT64_C(0x0000000000c7b7da), 0.660, 0.780149145600000) \
+    STEP( 133, UINT64_C(0x0000000000c9a34f), 0.665, 0.787648163618750) \
+    STEP( 134, UINT64_C(0x0000000000cb878a), 0.670, 0.795036914200000) \
+    STEP( 135, UINT64_C(0x0000000000cd6460), 0.675, 0.802312910156250) \
+    STEP( 136, UINT64_C(0x0000000000cf39ab), 0.680, 0.809473740800000) \
+    STEP( 137, UINT64_C(0x0000000000d10743), 0.685, 0.816517074193750) \
+    STEP( 138, UINT64_C(0x0000000000d2cd01), 0.690, 0.823440659400000) \
+    STEP( 139, UINT64_C(0x0000000000d48ac2), 0.695, 0.830242328731250) \
+    STEP( 140, UINT64_C(0x0000000000d64063), 0.700, 0.836920000000000) \
+    STEP( 141, UINT64_C(0x0000000000d7edc2), 0.705, 0.843471678768750) \
+    STEP( 142, UINT64_C(0x0000000000d992bf), 0.710, 0.849895460600000) \
+    STEP( 143, UINT64_C(0x0000000000db2f3c), 0.715, 0.856189533306250) \
+    STEP( 144, UINT64_C(0x0000000000dcc31c), 0.720, 0.862352179200000) \
+    STEP( 145, UINT64_C(0x0000000000de4e44), 0.725, 0.868381777343750) \
+    STEP( 146, UINT64_C(0x0000000000dfd09a), 0.730, 0.874276805800000) \
+    STEP( 147, UINT64_C(0x0000000000e14a07), 0.735, 0.880035843881250) \
+    STEP( 148, UINT64_C(0x0000000000e2ba74), 0.740, 0.885657574400000) \
+    STEP( 149, UINT64_C(0x0000000000e421cd), 0.745, 0.891140785918750) \
+    STEP( 150, UINT64_C(0x0000000000e58000), 0.750, 0.896484375000000) \
+    STEP( 151, UINT64_C(0x0000000000e6d4fb), 0.755, 0.901687348456250) \
+    STEP( 152, UINT64_C(0x0000000000e820b0), 0.760, 0.906748825600000) \
+    STEP( 153, UINT64_C(0x0000000000e96313), 0.765, 0.911668040493750) \
+    STEP( 154, UINT64_C(0x0000000000ea9c18), 0.770, 0.916444344200000) \
+    STEP( 155, UINT64_C(0x0000000000ebcbb7), 0.775, 0.921077207031250) \
+    STEP( 156, UINT64_C(0x0000000000ecf1e8), 0.780, 0.925566220800000) \
+    STEP( 157, UINT64_C(0x0000000000ee0ea7), 0.785, 0.929911101068750) \
+    STEP( 158, UINT64_C(0x0000000000ef21f1), 0.790, 0.934111689400000) \
+    STEP( 159, UINT64_C(0x0000000000f02bc6), 0.795, 0.938167955606250) \
+    STEP( 160, UINT64_C(0x0000000000f12c27), 0.800, 0.942080000000000) \
+    STEP( 161, UINT64_C(0x0000000000f22319), 0.805, 0.945848055643750) \
+    STEP( 162, UINT64_C(0x0000000000f310a1), 0.810, 0.949472490600000) \
+    STEP( 163, UINT64_C(0x0000000000f3f4c7), 0.815, 0.952953810181250) \
+    STEP( 164, UINT64_C(0x0000000000f4cf98), 0.820, 0.956292659200000) \
+    STEP( 165, UINT64_C(0x0000000000f5a120), 0.825, 0.959489824218750) \
+    STEP( 166, UINT64_C(0x0000000000f6696e), 0.830, 0.962546235800000) \
+    STEP( 167, UINT64_C(0x0000000000f72894), 0.835, 0.965462970756250) \
+    STEP( 168, UINT64_C(0x0000000000f7dea8), 0.840, 0.968241254400000) \
+    STEP( 169, UINT64_C(0x0000000000f88bc0), 0.845, 0.970882462793750) \
+    STEP( 170, UINT64_C(0x0000000000f92ff6), 0.850, 0.973388125000000) \
+    STEP( 171, UINT64_C(0x0000000000f9cb67), 0.855, 0.975759925331250) \
+    STEP( 172, UINT64_C(0x0000000000fa5e30), 0.860, 0.977999705600000) \
+    STEP( 173, UINT64_C(0x0000000000fae874), 0.865, 0.980109467368750) \
+    STEP( 174, UINT64_C(0x0000000000fb6a57), 0.870, 0.982091374200000) \
+    STEP( 175, UINT64_C(0x0000000000fbe400), 0.875, 0.983947753906250) \
+    STEP( 176, UINT64_C(0x0000000000fc5598), 0.880, 0.985681100800000) \
+    STEP( 177, UINT64_C(0x0000000000fcbf4e), 0.885, 0.987294077943750) \
+    STEP( 178, UINT64_C(0x0000000000fd214f), 0.890, 0.988789519400000) \
+    STEP( 179, UINT64_C(0x0000000000fd7bcf), 0.895, 0.990170432481250) \
+    STEP( 180, UINT64_C(0x0000000000fdcf03), 0.900, 0.991440000000000) \
+    STEP( 181, UINT64_C(0x0000000000fe1b23), 0.905, 0.992601582518750) \
+    STEP( 182, UINT64_C(0x0000000000fe606a), 0.910, 0.993658720600000) \
+    STEP( 183, UINT64_C(0x0000000000fe9f18), 0.915, 0.994615137056250) \
+    STEP( 184, UINT64_C(0x0000000000fed76e), 0.920, 0.995474739200000) \
+    STEP( 185, UINT64_C(0x0000000000ff09b0), 0.925, 0.996241621093750) \
+    STEP( 186, UINT64_C(0x0000000000ff3627), 0.930, 0.996920065800000) \
+    STEP( 187, UINT64_C(0x0000000000ff5d1d), 0.935, 0.997514547631250) \
+    STEP( 188, UINT64_C(0x0000000000ff7ee0), 0.940, 0.998029734400000) \
+    STEP( 189, UINT64_C(0x0000000000ff9bc3), 0.945, 0.998470489668750) \
+    STEP( 190, UINT64_C(0x0000000000ffb419), 0.950, 0.998841875000000) \
+    STEP( 191, UINT64_C(0x0000000000ffc83d), 0.955, 0.999149152206250) \
+    STEP( 192, UINT64_C(0x0000000000ffd888), 0.960, 0.999397785600000) \
+    STEP( 193, UINT64_C(0x0000000000ffe55b), 0.965, 0.999593444243750) \
+    STEP( 194, UINT64_C(0x0000000000ffef17), 0.970, 0.999742004200000) \
+    STEP( 195, UINT64_C(0x0000000000fff623), 0.975, 0.999849550781250) \
+    STEP( 196, UINT64_C(0x0000000000fffae9), 0.980, 0.999922380800000) \
+    STEP( 197, UINT64_C(0x0000000000fffdd6), 0.985, 0.999967004818750) \
+    STEP( 198, UINT64_C(0x0000000000ffff5a), 0.990, 0.999990149400000) \
+    STEP( 199, UINT64_C(0x0000000000ffffeb), 0.995, 0.999998759356250) \
+    STEP( 200, UINT64_C(0x0000000001000000), 1.000, 1.000000000000000) \
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/include/jemalloc/internal/smoothstep.sh b/include/jemalloc/internal/smoothstep.sh
new file mode 100755
index 00000000..8124693f
--- /dev/null
+++ b/include/jemalloc/internal/smoothstep.sh
@@ -0,0 +1,115 @@
+#!/bin/sh
+#
+# Generate a discrete lookup table for a sigmoid function in the smoothstep
+# family (https://en.wikipedia.org/wiki/Smoothstep), where the lookup table
+# entries correspond to x in [1/nsteps, 2/nsteps, ..., nsteps/nsteps].  Encode
+# the entries using a binary fixed point representation.
+#
+# Usage: smoothstep.sh <variant> <nsteps> <bfp> <xprec> <yprec>
+#
+#        <variant> is in {smooth, smoother, smoothest}.
+#        <nsteps> must be greater than zero.
+#        <bfp> must be in [0..62]; reasonable values are roughly [10..30].
+#        <xprec> is x decimal precision.
+#        <yprec> is y decimal precision.
+
+#set -x
+
+cmd="sh smoothstep.sh $*"
+variant=$1
+nsteps=$2
+bfp=$3
+xprec=$4
+yprec=$5
+
+case "${variant}" in
+  smooth)
+    ;;
+  smoother)
+    ;;
+  smoothest)
+    ;;
+  *)
+    echo "Unsupported variant"
+    exit 1
+    ;;
+esac
+
+smooth() {
+  step=$1
+  y=`echo ${yprec} k ${step} ${nsteps} / sx _2 lx 3 ^ '*' 3 lx 2 ^ '*' + p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g'`
+  h=`echo ${yprec} k 2 ${bfp} ^ ${y} '*' p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g' | tr '.' ' ' | awk '{print $1}' `
+}
+
+smoother() {
+  step=$1
+  y=`echo ${yprec} k ${step} ${nsteps} / sx 6 lx 5 ^ '*' _15 lx 4 ^ '*' + 10 lx 3 ^ '*' + p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g'`
+  h=`echo ${yprec} k 2 ${bfp} ^ ${y} '*' p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g' | tr '.' ' ' | awk '{print $1}' `
+}
+
+smoothest() {
+  step=$1
+  y=`echo ${yprec} k ${step} ${nsteps} / sx _20 lx 7 ^ '*' 70 lx 6 ^ '*' + _84 lx 5 ^ '*' + 35 lx 4 ^ '*' + p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g'`
+  h=`echo ${yprec} k 2 ${bfp} ^ ${y} '*' p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g' | tr '.' ' ' | awk '{print $1}' `
+}
+
+cat <<EOF
+/*
+ * This file was generated by the following command:
+ *   $cmd
+ */
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+/*
+ * This header defines a precomputed table based on the smoothstep family of
+ * sigmoidal curves (https://en.wikipedia.org/wiki/Smoothstep) that grow from 0
+ * to 1 in 0 <= x <= 1.  The table is stored as integer fixed point values so
+ * that floating point math can be avoided.
+ *
+ *                      3     2
+ *   smoothstep(x) = -2x  + 3x
+ *
+ *                       5      4      3
+ *   smootherstep(x) = 6x  - 15x  + 10x
+ *
+ *                          7      6      5      4
+ *   smootheststep(x) = -20x  + 70x  - 84x  + 35x
+ */
+
+#define	SMOOTHSTEP_VARIANT	"${variant}"
+#define	SMOOTHSTEP_NSTEPS	${nsteps}
+#define	SMOOTHSTEP_BFP		${bfp}
+#define	SMOOTHSTEP \\
+ /* STEP(step, h,                            x,     y) */ \\
+EOF
+
+s=1
+while [ $s -le $nsteps ] ; do
+  $variant ${s}
+  x=`echo ${xprec} k ${s} ${nsteps} / p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g'`
+  printf '    STEP(%4d, UINT64_C(0x%016x), %s, %s) \\\n' ${s} ${h} ${x} ${y}
+
+  s=$((s+1))
+done
+echo
+
+cat <<EOF
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
+EOF
diff --git a/test/unit/smoothstep.c b/test/unit/smoothstep.c
new file mode 100644
index 00000000..4cfb2134
--- /dev/null
+++ b/test/unit/smoothstep.c
@@ -0,0 +1,106 @@
+#include "test/jemalloc_test.h"
+
+static const uint64_t smoothstep_tab[] = {
+#define	STEP(step, h, x, y) \
+	h,
+	SMOOTHSTEP
+#undef STEP
+};
+
+TEST_BEGIN(test_smoothstep_integral)
+{
+	uint64_t sum, min, max;
+	unsigned i;
+
+	/*
+	 * The integral of smoothstep in the [0..1] range equals 1/2.  Verify
+	 * that the fixed point representation's integral is no more than
+	 * rounding error distant from 1/2.  Regarding rounding, each table
+	 * element is rounded down to the nearest fixed point value, so the
+	 * integral may be off by as much as SMOOTHSTEP_NSTEPS ulps.
+	 */
+	sum = 0;
+	for (i = 0; i < SMOOTHSTEP_NSTEPS; i++)
+		sum += smoothstep_tab[i];
+
+	max = (KQU(1) << (SMOOTHSTEP_BFP-1)) * (SMOOTHSTEP_NSTEPS+1);
+	min = max - SMOOTHSTEP_NSTEPS;
+
+	assert_u64_ge(sum, min,
+	    "Integral too small, even accounting for truncation");
+	assert_u64_le(sum, max, "Integral exceeds 1/2");
+	if (false) {
+		malloc_printf("%"FMTu64" ulps under 1/2 (limit %d)\n",
+		    max - sum, SMOOTHSTEP_NSTEPS);
+	}
+}
+TEST_END
+
+TEST_BEGIN(test_smoothstep_monotonic)
+{
+	uint64_t prev_h;
+	unsigned i;
+
+	/*
+	 * The smoothstep function is monotonic in [0..1], i.e. its slope is
+	 * non-negative.  In practice we want to parametrize table generation
+	 * such that piecewise slope is greater than zero, but do not require
+	 * that here.
+	 */
+	prev_h = 0;
+	for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
+		uint64_t h = smoothstep_tab[i];
+		assert_u64_ge(h, prev_h, "Piecewise non-monotonic, i=%u", i);
+		prev_h = h;
+	}
+	assert_u64_eq(smoothstep_tab[SMOOTHSTEP_NSTEPS-1],
+	    (KQU(1) << SMOOTHSTEP_BFP), "Last step must equal 1");
+}
+TEST_END
+
+TEST_BEGIN(test_smoothstep_slope)
+{
+	uint64_t prev_h, prev_delta;
+	unsigned i;
+
+	/*
+	 * The smoothstep slope strictly increases until x=0.5, and then
+	 * strictly decreases until x=1.0.  Verify the slightly weaker
+	 * requirement of monotonicity, so that inadequate table precision does
+	 * not cause false test failures.
+	 */
+	prev_h = 0;
+	prev_delta = 0;
+	for (i = 0; i < SMOOTHSTEP_NSTEPS / 2 + SMOOTHSTEP_NSTEPS % 2; i++) {
+		uint64_t h = smoothstep_tab[i];
+		uint64_t delta = h - prev_h;
+		assert_u64_ge(delta, prev_delta,
+		    "Slope must monotonically increase in 0.0 <= x <= 0.5, "
+		    "i=%u", i);
+		prev_h = h;
+		prev_delta = delta;
+	}
+
+	prev_h = KQU(1) << SMOOTHSTEP_BFP;
+	prev_delta = 0;
+	for (i = SMOOTHSTEP_NSTEPS-1; i >= SMOOTHSTEP_NSTEPS / 2; i--) {
+		uint64_t h = smoothstep_tab[i];
+		uint64_t delta = prev_h - h;
+		assert_u64_ge(delta, prev_delta,
+		    "Slope must monotonically decrease in 0.5 <= x <= 1.0, "
+		    "i=%u", i);
+		prev_h = h;
+		prev_delta = delta;
+	}
+}
+TEST_END
+
+int
+main(void)
+{
+
+	return (test(
+	    test_smoothstep_integral,
+	    test_smoothstep_monotonic,
+	    test_smoothstep_slope));
+}

From 243f7a0508bb014c2a7bf592c466a923911db234 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 19 Feb 2016 20:09:31 -0800
Subject: [PATCH 0087/2608] Implement decay-based unused dirty page purging.

This is an alternative to the existing ratio-based unused dirty page
purging, and is intended to eventually become the sole purging
mechanism.

Add mallctls:
- opt.purge
- opt.decay_time
- arena.<i>.decay
- arena.<i>.decay_time
- arenas.decay_time
- stats.arenas.<i>.decay_time

This resolves #325.
---
 Makefile.in                                   |  11 +-
 doc/jemalloc.xml.in                           |  95 ++++-
 include/jemalloc/internal/arena.h             | 119 +++++-
 include/jemalloc/internal/ctl.h               |   1 +
 include/jemalloc/internal/huge.h              |   4 +-
 .../jemalloc/internal/jemalloc_internal.h.in  |  22 +-
 include/jemalloc/internal/private_symbols.txt |  12 +-
 include/jemalloc/internal/tcache.h            |   2 +-
 include/jemalloc/internal/time.h              |   5 +
 src/arena.c                                   | 327 +++++++++++++++-
 src/ctl.c                                     | 166 ++++++--
 src/huge.c                                    |  25 +-
 src/jemalloc.c                                |  53 ++-
 src/stats.c                                   |  60 ++-
 src/tcache.c                                  |   4 +-
 src/time.c                                    |   9 +
 test/unit/decay.c                             | 370 ++++++++++++++++++
 test/unit/mallctl.c                           |  95 +++++
 18 files changed, 1268 insertions(+), 112 deletions(-)
 create mode 100644 test/unit/decay.c

diff --git a/Makefile.in b/Makefile.in
index 9530aa8e..e5681926 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -121,6 +121,7 @@ C_UTIL_INTEGRATION_SRCS := $(srcroot)src/time.c $(srcroot)src/util.c
 TESTS_UNIT := $(srcroot)test/unit/atomic.c \
 	$(srcroot)test/unit/bitmap.c \
 	$(srcroot)test/unit/ckh.c \
+	$(srcroot)test/unit/decay.c \
 	$(srcroot)test/unit/hash.c \
 	$(srcroot)test/unit/junk.c \
 	$(srcroot)test/unit/junk_alloc.c \
@@ -354,18 +355,22 @@ stress_dir:
 check_dir: check_unit_dir check_integration_dir
 
 check_unit: tests_unit check_unit_dir
-	$(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%)
+	$(MALLOC_CONF)="purge:ratio" $(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%)
+	$(MALLOC_CONF)="purge:decay" $(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%)
 check_integration_prof: tests_integration check_integration_dir
 ifeq ($(enable_prof), 1)
 	$(MALLOC_CONF)="prof:true" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%)
 	$(MALLOC_CONF)="prof:true,prof_active:false" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%)
 endif
+check_integration_decay: tests_integration check_integration_dir
+	$(MALLOC_CONF)="purge:decay,decay_time:-1" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%)
+	$(MALLOC_CONF)="purge:decay,decay_time:0" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%)
+	$(MALLOC_CONF)="purge:decay" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%)
 check_integration: tests_integration check_integration_dir
 	$(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%)
 stress: tests_stress stress_dir
 	$(SHELL) $(objroot)test/test.sh $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%)
-check: tests check_dir check_integration_prof
-	$(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%)
+check: check_unit check_integration check_integration_decay check_integration_prof
 
 ifeq ($(enable_code_coverage), 1)
 coverage_unit: check_unit
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 48765b01..0ced0aaa 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -949,6 +949,20 @@ for (i = 0; i < nbins; i++) {
         number of CPUs, or one if there is a single CPU.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.purge">
+        <term>
+          <mallctl>opt.purge</mallctl>
+          (<type>const char *</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Purge mode is &ldquo;ratio&rdquo; (default) or
+        &ldquo;decay&rdquo;.  See <link
+        linkend="opt.lg_dirty_mult"><mallctl>opt.lg_dirty_mult</mallctl></link>
+        for details of the ratio mode.  See <link
+        linkend="opt.decay_time"><mallctl>opt.decay_time</mallctl></link> for
+        details of the decay mode.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="opt.lg_dirty_mult">
         <term>
           <mallctl>opt.lg_dirty_mult</mallctl>
@@ -971,6 +985,26 @@ for (i = 0; i < nbins; i++) {
         for related dynamic control options.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.decay_time">
+        <term>
+          <mallctl>opt.decay_time</mallctl>
+          (<type>ssize_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Approximate time in seconds from the creation of a set
+        of unused dirty pages until an equivalent set of unused dirty pages is
+        purged and/or reused.  The pages are incrementally purged according to a
+        sigmoidal decay curve that starts and ends with zero purge rate.  A
+        decay time of 0 causes all unused dirty pages to be purged immediately
+        upon creation.  A decay time of -1 disables purging.  The default decay
+        time is 10 seconds.  See <link
+        linkend="arenas.decay_time"><mallctl>arenas.decay_time</mallctl></link>
+        and <link
+        linkend="arena.i.decay_time"><mallctl>arena.&lt;i&gt;.decay_time</mallctl></link>
+        for related dynamic control options.
+        </para></listitem>
+      </varlistentry>
+
       <varlistentry id="opt.stats_print">
         <term>
           <mallctl>opt.stats_print</mallctl>
@@ -1501,12 +1535,27 @@ malloc_conf = "xmalloc:true";]]></programlisting>
           (<type>void</type>)
           <literal>--</literal>
         </term>
-        <listitem><para>Purge unused dirty pages for arena &lt;i&gt;, or for
+        <listitem><para>Purge all unused dirty pages for arena &lt;i&gt;, or for
         all arenas if &lt;i&gt; equals <link
         linkend="arenas.narenas"><mallctl>arenas.narenas</mallctl></link>.
         </para></listitem>
       </varlistentry>
 
+      <varlistentry id="arena.i.decay">
+        <term>
+          <mallctl>arena.&lt;i&gt;.decay</mallctl>
+          (<type>void</type>)
+          <literal>--</literal>
+        </term>
+        <listitem><para>Trigger decay-based purging of unused dirty pages for
+        arena &lt;i&gt;, or for all arenas if &lt;i&gt; equals <link
+        linkend="arenas.narenas"><mallctl>arenas.narenas</mallctl></link>.
+        The proportion of unused dirty pages to be purged depends on the current
+        time; see <link
+        linkend="opt.decay_time"><mallctl>opt.decay_time</mallctl></link> for
+        details.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="arena.i.dss">
         <term>
           <mallctl>arena.&lt;i&gt;.dss</mallctl>
@@ -1535,6 +1584,22 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         for additional information.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="arena.i.decay_time">
+        <term>
+          <mallctl>arena.&lt;i&gt;.decay_time</mallctl>
+          (<type>ssize_t</type>)
+          <literal>rw</literal>
+        </term>
+        <listitem><para>Current per-arena approximate time in seconds from the
+        creation of a set of unused dirty pages until an equivalent set of
+        unused dirty pages is purged and/or reused.  Each time this interface is
+        set, all currently unused dirty pages are considered to have fully
+        decayed, which causes immediate purging of all unused dirty pages unless
+        the decay time is set to -1 (i.e. purging disabled).  See <link
+        linkend="opt.decay_time"><mallctl>opt.decay_time</mallctl></link> for
+        additional information.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="arena.i.chunk_hooks">
         <term>
           <mallctl>arena.&lt;i&gt;.chunk_hooks</mallctl>
@@ -1769,6 +1834,21 @@ typedef struct {
         for additional information.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="arenas.decay_time">
+        <term>
+          <mallctl>arenas.decay_time</mallctl>
+          (<type>ssize_t</type>)
+          <literal>rw</literal>
+        </term>
+        <listitem><para>Current default per-arena approximate time in seconds
+        from the creation of a set of unused dirty pages until an equivalent set
+        of unused dirty pages is purged and/or reused, used to initialize <link
+        linkend="arena.i.decay_time"><mallctl>arena.&lt;i&gt;.decay_time</mallctl></link>
+        during arena creation.  See <link
+        linkend="opt.decay_time"><mallctl>opt.decay_time</mallctl></link> for
+        additional information.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="arenas.quantum">
         <term>
           <mallctl>arenas.quantum</mallctl>
@@ -2113,6 +2193,19 @@ typedef struct {
         for details.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="stats.arenas.i.decay_time">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.decay_time</mallctl>
+          (<type>ssize_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Approximate time in seconds from the creation of a set
+        of unused dirty pages until an equivalent set of unused dirty pages is
+        purged and/or reused.  See <link
+        linkend="opt.decay_time"><mallctl>opt.decay_time</mallctl></link>
+        for details.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="stats.arenas.i.nthreads">
         <term>
           <mallctl>stats.arenas.&lt;i&gt;.nthreads</mallctl>
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 2750c008..76d3be19 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -23,6 +23,18 @@
  */
 #define	LG_DIRTY_MULT_DEFAULT	3
 
+typedef enum {
+	purge_mode_ratio = 0,
+	purge_mode_decay = 1,
+
+	purge_mode_limit = 2
+} purge_mode_t;
+#define	PURGE_DEFAULT		purge_mode_ratio
+/* Default decay time in seconds. */
+#define	DECAY_TIME_DEFAULT	10
+/* Number of event ticks between time checks. */
+#define	DECAY_NTICKS_PER_UPDATE	1000
+
 typedef struct arena_runs_dirty_link_s arena_runs_dirty_link_t;
 typedef struct arena_run_s arena_run_t;
 typedef struct arena_chunk_map_bits_s arena_chunk_map_bits_t;
@@ -325,7 +337,7 @@ struct arena_s {
 	/* Minimum ratio (log base 2) of nactive:ndirty. */
 	ssize_t			lg_dirty_mult;
 
-	/* True if a thread is currently executing arena_purge(). */
+	/* True if a thread is currently executing arena_purge_to_limit(). */
 	bool			purging;
 
 	/* Number of pages in active runs and huge regions. */
@@ -376,6 +388,53 @@ struct arena_s {
 	arena_runs_dirty_link_t	runs_dirty;
 	extent_node_t		chunks_cache;
 
+	/*
+	 * Approximate time in seconds from the creation of a set of unused
+	 * dirty pages until an equivalent set of unused dirty pages is purged
+	 * and/or reused.
+	 */
+	ssize_t			decay_time;
+	/* decay_time / SMOOTHSTEP_NSTEPS. */
+	struct timespec		decay_interval;
+	/*
+	 * Time at which the current decay interval logically started.  We do
+	 * not actually advance to a new epoch until sometime after it starts
+	 * because of scheduling and computation delays, and it is even possible
+	 * to completely skip epochs.  In all cases, during epoch advancement we
+	 * merge all relevant activity into the most recently recorded epoch.
+	 */
+	struct timespec		decay_epoch;
+	/* decay_deadline randomness generator. */
+	uint64_t		decay_jitter_state;
+	/*
+	 * Deadline for current epoch.  This is the sum of decay_interval and
+	 * per epoch jitter which is a uniform random variable in
+	 * [0..decay_interval).  Epochs always advance by precise multiples of
+	 * decay_interval, but we randomize the deadline to reduce the
+	 * likelihood of arenas purging in lockstep.
+	 */
+	struct timespec		decay_deadline;
+	/*
+	 * Number of dirty pages at beginning of current epoch.  During epoch
+	 * advancement we use the delta between decay_ndirty and ndirty to
+	 * determine how many dirty pages, if any, were generated, and record
+	 * the result in decay_backlog.
+	 */
+	size_t			decay_ndirty;
+	/*
+	 * Memoized result of arena_decay_backlog_npages_limit() corresponding
+	 * to the current contents of decay_backlog, i.e. the limit on how many
+	 * pages are allowed to exist for the decay epochs.
+	 */
+	size_t			decay_backlog_npages_limit;
+	/*
+	 * Trailing log of how many unused dirty pages were generated during
+	 * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last
+	 * element is the most recent epoch.  Corresponding epoch times are
+	 * relative to decay_epoch.
+	 */
+	size_t			decay_backlog[SMOOTHSTEP_NSTEPS];
+
 	/* Extant huge allocations. */
 	ql_head(extent_node_t)	huge;
 	/* Synchronizes all huge allocation/update/deallocation. */
@@ -408,6 +467,7 @@ struct arena_s {
 /* Used in conjunction with tsd for fast arena-related context lookup. */
 struct arena_tdata_s {
 	arena_t			*arena;
+	ticker_t		decay_ticker;
 };
 #endif /* JEMALLOC_ARENA_STRUCTS_B */
 
@@ -423,7 +483,10 @@ static const size_t	large_pad =
 #endif
     ;
 
+extern purge_mode_t	opt_purge;
+extern const char	*purge_mode_names[];
 extern ssize_t		opt_lg_dirty_mult;
+extern ssize_t		opt_decay_time;
 
 extern arena_bin_info_t	arena_bin_info[NBINS];
 
@@ -451,9 +514,11 @@ bool	arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk,
     size_t oldsize, size_t usize, bool *zero);
 ssize_t	arena_lg_dirty_mult_get(arena_t *arena);
 bool	arena_lg_dirty_mult_set(arena_t *arena, ssize_t lg_dirty_mult);
+ssize_t	arena_decay_time_get(arena_t *arena);
+bool	arena_decay_time_set(arena_t *arena, ssize_t decay_time);
 void	arena_maybe_purge(arena_t *arena);
-void	arena_purge_all(arena_t *arena);
-void	arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin,
+void	arena_purge(arena_t *arena, bool all);
+void	arena_tcache_fill_small(tsd_t *tsd, arena_t *arena, tcache_bin_t *tbin,
     szind_t binind, uint64_t prof_accumbytes);
 void	arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info,
     bool zero);
@@ -467,7 +532,7 @@ extern arena_dalloc_junk_small_t *arena_dalloc_junk_small;
 void	arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info);
 #endif
 void	arena_quarantine_junk_small(void *ptr, size_t usize);
-void	*arena_malloc_large(arena_t *arena, size_t size,
+void	*arena_malloc_large(tsd_t *tsd, arena_t *arena, size_t size,
     szind_t ind, bool zero);
 void	*arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
     bool zero, tcache_t *tcache);
@@ -478,8 +543,8 @@ void	arena_dalloc_bin_junked_locked(arena_t *arena, arena_chunk_t *chunk,
     void *ptr, arena_chunk_map_bits_t *bitselm);
 void	arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
     size_t pageind, arena_chunk_map_bits_t *bitselm);
-void	arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr,
-    size_t pageind);
+void	arena_dalloc_small(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+    void *ptr, size_t pageind);
 #ifdef JEMALLOC_JET
 typedef void (arena_dalloc_junk_large_t)(void *, size_t);
 extern arena_dalloc_junk_large_t *arena_dalloc_junk_large;
@@ -488,12 +553,13 @@ void	arena_dalloc_junk_large(void *ptr, size_t usize);
 #endif
 void	arena_dalloc_large_junked_locked(arena_t *arena, arena_chunk_t *chunk,
     void *ptr);
-void	arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr);
+void	arena_dalloc_large(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+    void *ptr);
 #ifdef JEMALLOC_JET
 typedef void (arena_ralloc_junk_large_t)(void *, size_t, size_t);
 extern arena_ralloc_junk_large_t *arena_ralloc_junk_large;
 #endif
-bool	arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size,
+bool	arena_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
     size_t extra, bool zero);
 void	*arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
     size_t size, size_t alignment, bool zero, tcache_t *tcache);
@@ -501,9 +567,11 @@ dss_prec_t	arena_dss_prec_get(arena_t *arena);
 bool	arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
 ssize_t	arena_lg_dirty_mult_default_get(void);
 bool	arena_lg_dirty_mult_default_set(ssize_t lg_dirty_mult);
+ssize_t	arena_decay_time_default_get(void);
+bool	arena_decay_time_default_set(ssize_t decay_time);
 void	arena_stats_merge(arena_t *arena, const char **dss,
-    ssize_t *lg_dirty_mult, size_t *nactive, size_t *ndirty,
-    arena_stats_t *astats, malloc_bin_stats_t *bstats,
+    ssize_t *lg_dirty_mult, ssize_t *decay_time, size_t *nactive,
+    size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats,
     malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats);
 arena_t	*arena_new(unsigned ind);
 bool	arena_boot(void);
@@ -566,6 +634,8 @@ prof_tctx_t	*arena_prof_tctx_get(const void *ptr);
 void	arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx);
 void	arena_prof_tctx_reset(const void *ptr, size_t usize,
     const void *old_ptr, prof_tctx_t *old_tctx);
+void	arena_decay_ticks(tsd_t *tsd, arena_t *arena, unsigned nticks);
+void	arena_decay_tick(tsd_t *tsd, arena_t *arena);
 void	*arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
     bool zero, tcache_t *tcache, bool slow_path);
 arena_t	*arena_aalloc(const void *ptr);
@@ -1165,6 +1235,27 @@ arena_prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr,
 	}
 }
 
+JEMALLOC_ALWAYS_INLINE void
+arena_decay_ticks(tsd_t *tsd, arena_t *arena, unsigned nticks)
+{
+	ticker_t *decay_ticker;
+
+	if (unlikely(tsd == NULL))
+		return;
+	decay_ticker = decay_ticker_get(tsd, arena->ind);
+	if (unlikely(decay_ticker == NULL))
+		return;
+	if (unlikely(ticker_ticks(decay_ticker, nticks)))
+		arena_purge(arena, false);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_decay_tick(tsd_t *tsd, arena_t *arena)
+{
+
+	arena_decay_ticks(tsd, arena, 1);
+}
+
 JEMALLOC_ALWAYS_INLINE void *
 arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind, bool zero,
     tcache_t *tcache, bool slow_path)
@@ -1271,7 +1362,7 @@ arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 				tcache_dalloc_small(tsd, tcache, ptr, binind,
 				    slow_path);
 			} else {
-				arena_dalloc_small(extent_node_arena_get(
+				arena_dalloc_small(tsd, extent_node_arena_get(
 				    &chunk->node), chunk, ptr, pageind);
 			}
 		} else {
@@ -1286,7 +1377,7 @@ arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 				tcache_dalloc_large(tsd, tcache, ptr, size -
 				    large_pad, slow_path);
 			} else {
-				arena_dalloc_large(extent_node_arena_get(
+				arena_dalloc_large(tsd, extent_node_arena_get(
 				    &chunk->node), chunk, ptr);
 			}
 		}
@@ -1326,7 +1417,7 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
 			} else {
 				size_t pageind = ((uintptr_t)ptr -
 				    (uintptr_t)chunk) >> LG_PAGE;
-				arena_dalloc_small(extent_node_arena_get(
+				arena_dalloc_small(tsd, extent_node_arena_get(
 				    &chunk->node), chunk, ptr, pageind);
 			}
 		} else {
@@ -1337,7 +1428,7 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
 				tcache_dalloc_large(tsd, tcache, ptr, size,
 				    true);
 			} else {
-				arena_dalloc_large(extent_node_arena_get(
+				arena_dalloc_large(tsd, extent_node_arena_get(
 				    &chunk->node), chunk, ptr);
 			}
 		}
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index 751c14b5..9add3ed9 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -35,6 +35,7 @@ struct ctl_arena_stats_s {
 	unsigned		nthreads;
 	const char		*dss;
 	ssize_t			lg_dirty_mult;
+	ssize_t			decay_time;
 	size_t			pactive;
 	size_t			pdirty;
 	arena_stats_t		astats;
diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h
index ece7af98..68d3789f 100644
--- a/include/jemalloc/internal/huge.h
+++ b/include/jemalloc/internal/huge.h
@@ -13,8 +13,8 @@ void	*huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero,
     tcache_t *tcache);
 void	*huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment,
     bool zero, tcache_t *tcache);
-bool	huge_ralloc_no_move(void *ptr, size_t oldsize, size_t usize_min,
-    size_t usize_max, bool zero);
+bool	huge_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize,
+    size_t usize_min, size_t usize_max, bool zero);
 void	*huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
     size_t usize, size_t alignment, bool zero, tcache_t *tcache);
 #ifdef JEMALLOC_JET
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index e84c4357..3b2f75d6 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -545,6 +545,7 @@ arena_tdata_t	*arena_tdata_get(tsd_t *tsd, unsigned ind,
     bool refresh_if_missing);
 arena_t	*arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing,
     bool refresh_if_missing);
+ticker_t	*decay_ticker_get(tsd_t *tsd, unsigned ind);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
@@ -833,6 +834,17 @@ arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing,
 
 	return (tdata->arena);
 }
+
+JEMALLOC_INLINE ticker_t *
+decay_ticker_get(tsd_t *tsd, unsigned ind)
+{
+	arena_tdata_t *tdata;
+
+	tdata = arena_tdata_get(tsd, ind, true);
+	if (unlikely(tdata == NULL))
+		return (NULL);
+	return (&tdata->decay_ticker);
+}
 #endif
 
 #include "jemalloc/internal/bitmap.h"
@@ -883,8 +895,8 @@ void	*iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
     size_t alignment, bool zero, tcache_t *tcache, arena_t *arena);
 void	*iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
     size_t alignment, bool zero);
-bool	ixalloc(void *ptr, size_t oldsize, size_t size, size_t extra,
-    size_t alignment, bool zero);
+bool	ixalloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
+    size_t extra, size_t alignment, bool zero);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
@@ -1150,8 +1162,8 @@ iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment,
 }
 
 JEMALLOC_ALWAYS_INLINE bool
-ixalloc(void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment,
-    bool zero)
+ixalloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t extra,
+    size_t alignment, bool zero)
 {
 
 	assert(ptr != NULL);
@@ -1163,7 +1175,7 @@ ixalloc(void *ptr, size_t oldsize, size_t size, size_t extra, size_t alignment,
 		return (true);
 	}
 
-	return (arena_ralloc_no_move(ptr, oldsize, size, extra, zero));
+	return (arena_ralloc_no_move(tsd, ptr, oldsize, size, extra, zero));
 }
 #endif
 
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index a0e6d8ab..95ddf0c8 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -25,6 +25,12 @@ arena_dalloc_junk_small
 arena_dalloc_large
 arena_dalloc_large_junked_locked
 arena_dalloc_small
+arena_decay_time_default_get
+arena_decay_time_default_set
+arena_decay_time_get
+arena_decay_time_set
+arena_decay_tick
+arena_decay_ticks
 arena_dss_prec_get
 arena_dss_prec_set
 arena_get
@@ -83,7 +89,7 @@ arena_prof_tctx_get
 arena_prof_tctx_reset
 arena_prof_tctx_set
 arena_ptr_small_binind_get
-arena_purge_all
+arena_purge
 arena_quarantine_junk_small
 arena_ralloc
 arena_ralloc_junk_large
@@ -185,6 +191,7 @@ ctl_nametomib
 ctl_postfork_child
 ctl_postfork_parent
 ctl_prefork
+decay_ticker_get
 dss_prec_names
 extent_node_achunk_get
 extent_node_achunk_set
@@ -318,6 +325,7 @@ narenas_total_get
 ncpus
 nhbins
 opt_abort
+opt_decay_time
 opt_dss
 opt_junk
 opt_junk_alloc
@@ -336,6 +344,7 @@ opt_prof_gdump
 opt_prof_leak
 opt_prof_prefix
 opt_prof_thread_active_init
+opt_purge
 opt_quarantine
 opt_redzone
 opt_stats_print
@@ -397,6 +406,7 @@ prof_thread_active_init_set
 prof_thread_active_set
 prof_thread_name_get
 prof_thread_name_set
+purge_mode_names
 quarantine
 quarantine_alloc_hook
 quarantine_alloc_hook_work
diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h
index c64f5d34..09935c36 100644
--- a/include/jemalloc/internal/tcache.h
+++ b/include/jemalloc/internal/tcache.h
@@ -361,7 +361,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 
 		usize = index2size(binind);
 		assert(usize <= tcache_maxclass);
-		ret = arena_malloc_large(arena, usize, binind, zero);
+		ret = arena_malloc_large(tsd, arena, usize, binind, zero);
 		if (ret == NULL)
 			return (NULL);
 	} else {
diff --git a/include/jemalloc/internal/time.h b/include/jemalloc/internal/time.h
index a290f386..dd1dd5bd 100644
--- a/include/jemalloc/internal/time.h
+++ b/include/jemalloc/internal/time.h
@@ -26,7 +26,12 @@ void	time_imultiply(struct timespec *time, uint64_t multiplier);
 void	time_idivide(struct timespec *time, uint64_t divisor);
 uint64_t	time_divide(const struct timespec *time,
     const struct timespec *divisor);
+#ifdef JEMALLOC_JET
+typedef bool (time_update_t)(struct timespec *);
+extern time_update_t *time_update;
+#else
 bool	time_update(struct timespec *time);
+#endif
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/src/arena.c b/src/arena.c
index 47b136b6..b1078ae9 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -4,8 +4,17 @@
 /******************************************************************************/
 /* Data. */
 
+purge_mode_t	opt_purge = PURGE_DEFAULT;
+const char	*purge_mode_names[] = {
+	"ratio",
+	"decay",
+	"N/A"
+};
 ssize_t		opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT;
 static ssize_t	lg_dirty_mult_default;
+ssize_t		opt_decay_time = DECAY_TIME_DEFAULT;
+static ssize_t	decay_time_default;
+
 arena_bin_info_t	arena_bin_info[NBINS];
 
 size_t		map_bias;
@@ -1205,10 +1214,193 @@ arena_lg_dirty_mult_set(arena_t *arena, ssize_t lg_dirty_mult)
 	return (false);
 }
 
+static void
+arena_decay_deadline_init(arena_t *arena)
+{
+
+	assert(opt_purge == purge_mode_decay);
+
+	/*
+	 * Generate a new deadline that is uniformly random within the next
+	 * epoch after the current one.
+	 */
+	time_copy(&arena->decay_deadline, &arena->decay_epoch);
+	time_add(&arena->decay_deadline, &arena->decay_interval);
+	if (arena->decay_time > 0) {
+		uint64_t decay_interval_ns, r;
+		struct timespec jitter;
+
+		decay_interval_ns = time_sec(&arena->decay_interval) *
+		    1000000000 + time_nsec(&arena->decay_interval);
+		r = prng_range(&arena->decay_jitter_state, decay_interval_ns);
+		time_init(&jitter, r / 1000000000, r % 1000000000);
+		time_add(&arena->decay_deadline, &jitter);
+	}
+}
+
+static bool
+arena_decay_deadline_reached(const arena_t *arena, const struct timespec *time)
+{
+
+	assert(opt_purge == purge_mode_decay);
+
+	return (time_compare(&arena->decay_deadline, time) <= 0);
+}
+
+static size_t
+arena_decay_backlog_npages_limit(const arena_t *arena)
+{
+	static const uint64_t h_steps[] = {
+#define	STEP(step, h, x, y) \
+		h,
+		SMOOTHSTEP
+#undef STEP
+	};
+	uint64_t sum;
+	size_t npages_limit_backlog;
+	unsigned i;
+
+	assert(opt_purge == purge_mode_decay);
+
+	/*
+	 * For each element of decay_backlog, multiply by the corresponding
+	 * fixed-point smoothstep decay factor.  Sum the products, then divide
+	 * to round down to the nearest whole number of pages.
+	 */
+	sum = 0;
+	for (i = 0; i < SMOOTHSTEP_NSTEPS; i++)
+		sum += arena->decay_backlog[i] * h_steps[i];
+	npages_limit_backlog = (sum >> SMOOTHSTEP_BFP);
+
+	return (npages_limit_backlog);
+}
+
+static void
+arena_decay_epoch_advance(arena_t *arena, const struct timespec *time)
+{
+	uint64_t nadvance;
+	struct timespec delta;
+	size_t ndirty_delta;
+
+	assert(opt_purge == purge_mode_decay);
+	assert(arena_decay_deadline_reached(arena, time));
+
+	time_copy(&delta, time);
+	time_subtract(&delta, &arena->decay_epoch);
+	nadvance = time_divide(&delta, &arena->decay_interval);
+	assert(nadvance > 0);
+
+	/* Add nadvance decay intervals to epoch. */
+	time_copy(&delta, &arena->decay_interval);
+	time_imultiply(&delta, nadvance);
+	time_add(&arena->decay_epoch, &delta);
+
+	/* Set a new deadline. */
+	arena_decay_deadline_init(arena);
+
+	/* Update the backlog. */
+	if (nadvance >= SMOOTHSTEP_NSTEPS) {
+		memset(arena->decay_backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
+		    sizeof(size_t));
+	} else {
+		memmove(arena->decay_backlog, &arena->decay_backlog[nadvance],
+		    (SMOOTHSTEP_NSTEPS - nadvance) * sizeof(size_t));
+		if (nadvance > 1) {
+			memset(&arena->decay_backlog[SMOOTHSTEP_NSTEPS -
+			    nadvance], 0, (nadvance-1) * sizeof(size_t));
+		}
+	}
+	ndirty_delta = (arena->ndirty > arena->decay_ndirty) ? arena->ndirty -
+	    arena->decay_ndirty : 0;
+	arena->decay_ndirty = arena->ndirty;
+	arena->decay_backlog[SMOOTHSTEP_NSTEPS-1] = ndirty_delta;
+	arena->decay_backlog_npages_limit =
+	    arena_decay_backlog_npages_limit(arena);
+}
+
+static size_t
+arena_decay_npages_limit(arena_t *arena)
+{
+	size_t npages_limit;
+
+	assert(opt_purge == purge_mode_decay);
+
+	npages_limit = arena->decay_backlog_npages_limit;
+
+	/* Add in any dirty pages created during the current epoch. */
+	if (arena->ndirty > arena->decay_ndirty)
+		npages_limit += arena->ndirty - arena->decay_ndirty;
+
+	return (npages_limit);
+}
+
+static void
+arena_decay_init(arena_t *arena, ssize_t decay_time)
+{
+
+	arena->decay_time = decay_time;
+	if (decay_time > 0) {
+		time_init(&arena->decay_interval, decay_time, 0);
+		time_idivide(&arena->decay_interval, SMOOTHSTEP_NSTEPS);
+	}
+
+	time_init(&arena->decay_epoch, 0, 0);
+	time_update(&arena->decay_epoch);
+	arena->decay_jitter_state = (uint64_t)(uintptr_t)arena;
+	arena_decay_deadline_init(arena);
+	arena->decay_ndirty = arena->ndirty;
+	arena->decay_backlog_npages_limit = 0;
+	memset(arena->decay_backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t));
+}
+
+static bool
+arena_decay_time_valid(ssize_t decay_time)
+{
+
+	return (decay_time >= -1 && decay_time <= TIME_SEC_MAX);
+}
+
+ssize_t
+arena_decay_time_get(arena_t *arena)
+{
+	ssize_t decay_time;
+
+	malloc_mutex_lock(&arena->lock);
+	decay_time = arena->decay_time;
+	malloc_mutex_unlock(&arena->lock);
+
+	return (decay_time);
+}
+
+bool
+arena_decay_time_set(arena_t *arena, ssize_t decay_time)
+{
+
+	if (!arena_decay_time_valid(decay_time))
+		return (true);
+
+	malloc_mutex_lock(&arena->lock);
+	/*
+	 * Restart decay backlog from scratch, which may cause many dirty pages
+	 * to be immediately purged.  It would conceptually be possible to map
+	 * the old backlog onto the new backlog, but there is no justification
+	 * for such complexity since decay_time changes are intended to be
+	 * infrequent, either between the {-1, 0, >0} states, or a one-time
+	 * arbitrary change during initial arena configuration.
+	 */
+	arena_decay_init(arena, decay_time);
+	arena_maybe_purge(arena);
+	malloc_mutex_unlock(&arena->lock);
+
+	return (false);
+}
+
 static void
 arena_maybe_purge_ratio(arena_t *arena)
 {
 
+	assert(opt_purge == purge_mode_ratio);
+
 	/* Don't purge if the option is disabled. */
 	if (arena->lg_dirty_mult < 0)
 		return;
@@ -1231,6 +1423,41 @@ arena_maybe_purge_ratio(arena_t *arena)
 	}
 }
 
+static void
+arena_maybe_purge_decay(arena_t *arena)
+{
+	struct timespec time;
+	size_t ndirty_limit;
+
+	assert(opt_purge == purge_mode_decay);
+
+	/* Purge all or nothing if the option is disabled. */
+	if (arena->decay_time <= 0) {
+		if (arena->decay_time == 0)
+			arena_purge_to_limit(arena, 0);
+		return;
+	}
+
+	time_copy(&time, &arena->decay_epoch);
+	if (unlikely(time_update(&time))) {
+		/* Time went backwards.  Force an epoch advance. */
+		time_copy(&time, &arena->decay_deadline);
+	}
+
+	if (arena_decay_deadline_reached(arena, &time))
+		arena_decay_epoch_advance(arena, &time);
+
+	ndirty_limit = arena_decay_npages_limit(arena);
+
+	/*
+	 * Don't try to purge unless the number of purgeable pages exceeds the
+	 * current limit.
+	 */
+	if (arena->ndirty <= ndirty_limit)
+		return;
+	arena_purge_to_limit(arena, ndirty_limit);
+}
+
 void
 arena_maybe_purge(arena_t *arena)
 {
@@ -1239,7 +1466,10 @@ arena_maybe_purge(arena_t *arena)
 	if (arena->purging)
 		return;
 
-	arena_maybe_purge_ratio(arena);
+	if (opt_purge == purge_mode_ratio)
+		arena_maybe_purge_ratio(arena);
+	else
+		arena_maybe_purge_decay(arena);
 }
 
 static size_t
@@ -1298,6 +1528,9 @@ arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks,
 			UNUSED void *chunk;
 
 			npages = extent_node_size_get(chunkselm) >> LG_PAGE;
+			if (opt_purge == purge_mode_decay && arena->ndirty -
+			    (nstashed + npages) < ndirty_limit)
+				break;
 
 			chunkselm_next = qr_next(chunkselm, cc_link);
 			/*
@@ -1327,6 +1560,9 @@ arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks,
 			    arena_mapbits_unallocated_size_get(chunk, pageind);
 
 			npages = run_size >> LG_PAGE;
+			if (opt_purge == purge_mode_decay && arena->ndirty -
+			    (nstashed + npages) < ndirty_limit)
+				break;
 
 			assert(pageind + npages <= chunk_npages);
 			assert(arena_mapbits_dirty_get(chunk, pageind) ==
@@ -1352,7 +1588,8 @@ arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks,
 		}
 
 		nstashed += npages;
-		if (arena->ndirty - nstashed <= ndirty_limit)
+		if (opt_purge == purge_mode_ratio && arena->ndirty - nstashed <=
+		    ndirty_limit)
 			break;
 	}
 
@@ -1492,6 +1729,15 @@ arena_unstash_purged(arena_t *arena, chunk_hooks_t *chunk_hooks,
 	}
 }
 
+/*
+ * NB: ndirty_limit is interpreted differently depending on opt_purge:
+ *   - purge_mode_ratio: Purge as few dirty run/chunks as possible to reach the
+ *                       desired state:
+ *                       (arena->ndirty <= ndirty_limit)
+ *   - purge_mode_decay: Purge as many dirty runs/chunks as possible without
+ *                       violating the invariant:
+ *                       (arena->ndirty >= ndirty_limit)
+ */
 static void
 arena_purge_to_limit(arena_t *arena, size_t ndirty_limit)
 {
@@ -1510,8 +1756,8 @@ arena_purge_to_limit(arena_t *arena, size_t ndirty_limit)
 		size_t ndirty = arena_dirty_count(arena);
 		assert(ndirty == arena->ndirty);
 	}
-	assert((arena->nactive >> arena->lg_dirty_mult) < arena->ndirty ||
-	    ndirty_limit == 0);
+	assert(opt_purge != purge_mode_ratio || (arena->nactive >>
+	    arena->lg_dirty_mult) < arena->ndirty || ndirty_limit == 0);
 
 	qr_new(&purge_runs_sentinel, rd_link);
 	extent_node_dirty_linkage_init(&purge_chunks_sentinel);
@@ -1534,11 +1780,14 @@ label_return:
 }
 
 void
-arena_purge_all(arena_t *arena)
+arena_purge(arena_t *arena, bool all)
 {
 
 	malloc_mutex_lock(&arena->lock);
-	arena_purge_to_limit(arena, 0);
+	if (all)
+		arena_purge_to_limit(arena, 0);
+	else
+		arena_maybe_purge(arena);
 	malloc_mutex_unlock(&arena->lock);
 }
 
@@ -1960,8 +2209,8 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
 }
 
 void
-arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, szind_t binind,
-    uint64_t prof_accumbytes)
+arena_tcache_fill_small(tsd_t *tsd, arena_t *arena, tcache_bin_t *tbin,
+    szind_t binind, uint64_t prof_accumbytes)
 {
 	unsigned i, nfill;
 	arena_bin_t *bin;
@@ -2008,6 +2257,7 @@ arena_tcache_fill_small(arena_t *arena, tcache_bin_t *tbin, szind_t binind,
 	}
 	malloc_mutex_unlock(&bin->lock);
 	tbin->ncached = i;
+	arena_decay_tick(tsd, arena);
 }
 
 void
@@ -2118,7 +2368,8 @@ arena_quarantine_junk_small(void *ptr, size_t usize)
 }
 
 static void *
-arena_malloc_small(arena_t *arena, size_t size, szind_t binind, bool zero)
+arena_malloc_small(tsd_t *tsd, arena_t *arena, size_t size, szind_t binind,
+    bool zero)
 {
 	void *ret;
 	arena_bin_t *bin;
@@ -2166,11 +2417,13 @@ arena_malloc_small(arena_t *arena, size_t size, szind_t binind, bool zero)
 		memset(ret, 0, size);
 	}
 
+	arena_decay_tick(tsd, arena);
 	return (ret);
 }
 
 void *
-arena_malloc_large(arena_t *arena, size_t size, szind_t binind, bool zero)
+arena_malloc_large(tsd_t *tsd, arena_t *arena, size_t size, szind_t binind,
+    bool zero)
 {
 	void *ret;
 	size_t usize;
@@ -2227,6 +2480,7 @@ arena_malloc_large(arena_t *arena, size_t size, szind_t binind, bool zero)
 		}
 	}
 
+	arena_decay_tick(tsd, arena);
 	return (ret);
 }
 
@@ -2240,9 +2494,9 @@ arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
 		return (NULL);
 
 	if (likely(size <= SMALL_MAXCLASS))
-		return (arena_malloc_small(arena, size, ind, zero));
+		return (arena_malloc_small(tsd, arena, size, ind, zero));
 	if (likely(size <= large_maxclass))
-		return (arena_malloc_large(arena, size, ind, zero));
+		return (arena_malloc_large(tsd, arena, size, ind, zero));
 	return (huge_malloc(tsd, arena, size, zero, tcache));
 }
 
@@ -2329,6 +2583,7 @@ arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 		else if (unlikely(opt_zero))
 			memset(ret, 0, usize);
 	}
+	arena_decay_tick(tsd, arena);
 	return (ret);
 }
 
@@ -2515,7 +2770,7 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 }
 
 void
-arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr,
+arena_dalloc_small(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk, void *ptr,
     size_t pageind)
 {
 	arena_chunk_map_bits_t *bitselm;
@@ -2527,6 +2782,7 @@ arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 	}
 	bitselm = arena_bitselm_get(chunk, pageind);
 	arena_dalloc_bin(arena, chunk, ptr, pageind, bitselm);
+	arena_decay_tick(tsd, arena);
 }
 
 #ifdef JEMALLOC_JET
@@ -2583,12 +2839,13 @@ arena_dalloc_large_junked_locked(arena_t *arena, arena_chunk_t *chunk,
 }
 
 void
-arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr)
+arena_dalloc_large(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk, void *ptr)
 {
 
 	malloc_mutex_lock(&arena->lock);
 	arena_dalloc_large_locked_impl(arena, chunk, ptr, false);
 	malloc_mutex_unlock(&arena->lock);
+	arena_decay_tick(tsd, arena);
 }
 
 static void
@@ -2789,14 +3046,16 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t usize_min,
 }
 
 bool
-arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra,
-    bool zero)
+arena_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
+    size_t extra, bool zero)
 {
 	size_t usize_min, usize_max;
 
 	usize_min = s2u(size);
 	usize_max = s2u(size + extra);
 	if (likely(oldsize <= large_maxclass && usize_min <= large_maxclass)) {
+		arena_chunk_t *chunk;
+
 		/*
 		 * Avoid moving the allocation if the size class can be left the
 		 * same.
@@ -2816,10 +3075,12 @@ arena_ralloc_no_move(void *ptr, size_t oldsize, size_t size, size_t extra,
 				return (true);
 		}
 
+		chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+		arena_decay_tick(tsd, extent_node_arena_get(&chunk->node));
 		return (false);
 	} else {
-		return (huge_ralloc_no_move(ptr, oldsize, usize_min, usize_max,
-		    zero));
+		return (huge_ralloc_no_move(tsd, ptr, oldsize, usize_min,
+		    usize_max, zero));
 	}
 }
 
@@ -2852,7 +3113,7 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size,
 		size_t copysize;
 
 		/* Try to avoid moving the allocation. */
-		if (!arena_ralloc_no_move(ptr, oldsize, usize, 0, zero))
+		if (!arena_ralloc_no_move(tsd, ptr, oldsize, usize, 0, zero))
 			return (ptr);
 
 		/*
@@ -2915,15 +3176,36 @@ bool
 arena_lg_dirty_mult_default_set(ssize_t lg_dirty_mult)
 {
 
+	if (opt_purge != purge_mode_ratio)
+		return (true);
 	if (!arena_lg_dirty_mult_valid(lg_dirty_mult))
 		return (true);
 	atomic_write_z((size_t *)&lg_dirty_mult_default, (size_t)lg_dirty_mult);
 	return (false);
 }
 
+ssize_t
+arena_decay_time_default_get(void)
+{
+
+	return ((ssize_t)atomic_read_z((size_t *)&decay_time_default));
+}
+
+bool
+arena_decay_time_default_set(ssize_t decay_time)
+{
+
+	if (opt_purge != purge_mode_decay)
+		return (true);
+	if (!arena_decay_time_valid(decay_time))
+		return (true);
+	atomic_write_z((size_t *)&decay_time_default, (size_t)decay_time);
+	return (false);
+}
+
 void
 arena_stats_merge(arena_t *arena, const char **dss, ssize_t *lg_dirty_mult,
-    size_t *nactive, size_t *ndirty, arena_stats_t *astats,
+    ssize_t *decay_time, size_t *nactive, size_t *ndirty, arena_stats_t *astats,
     malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats,
     malloc_huge_stats_t *hstats)
 {
@@ -2932,6 +3214,7 @@ arena_stats_merge(arena_t *arena, const char **dss, ssize_t *lg_dirty_mult,
 	malloc_mutex_lock(&arena->lock);
 	*dss = dss_prec_names[arena->dss_prec];
 	*lg_dirty_mult = arena->lg_dirty_mult;
+	*decay_time = arena->decay_time;
 	*nactive += arena->nactive;
 	*ndirty += arena->ndirty;
 
@@ -3050,6 +3333,9 @@ arena_new(unsigned ind)
 	qr_new(&arena->runs_dirty, rd_link);
 	qr_new(&arena->chunks_cache, cc_link);
 
+	if (opt_purge == purge_mode_decay)
+		arena_decay_init(arena, arena_decay_time_default_get());
+
 	ql_new(&arena->huge);
 	if (malloc_mutex_init(&arena->huge_mtx))
 		return (NULL);
@@ -3227,6 +3513,7 @@ arena_boot(void)
 	unsigned i;
 
 	arena_lg_dirty_mult_default_set(opt_lg_dirty_mult);
+	arena_decay_time_default_set(opt_decay_time);
 
 	/*
 	 * Compute the header size such that it is large enough to contain the
diff --git a/src/ctl.c b/src/ctl.c
index 9618d632..f003b415 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -92,7 +92,9 @@ CTL_PROTO(opt_abort)
 CTL_PROTO(opt_dss)
 CTL_PROTO(opt_lg_chunk)
 CTL_PROTO(opt_narenas)
+CTL_PROTO(opt_purge)
 CTL_PROTO(opt_lg_dirty_mult)
+CTL_PROTO(opt_decay_time)
 CTL_PROTO(opt_stats_print)
 CTL_PROTO(opt_junk)
 CTL_PROTO(opt_zero)
@@ -115,10 +117,12 @@ CTL_PROTO(opt_prof_accum)
 CTL_PROTO(tcache_create)
 CTL_PROTO(tcache_flush)
 CTL_PROTO(tcache_destroy)
+static void	arena_i_purge(unsigned arena_ind, bool all);
 CTL_PROTO(arena_i_purge)
-static void	arena_i_purge(unsigned arena_ind);
+CTL_PROTO(arena_i_decay)
 CTL_PROTO(arena_i_dss)
 CTL_PROTO(arena_i_lg_dirty_mult)
+CTL_PROTO(arena_i_decay_time)
 CTL_PROTO(arena_i_chunk_hooks)
 INDEX_PROTO(arena_i)
 CTL_PROTO(arenas_bin_i_size)
@@ -132,6 +136,7 @@ INDEX_PROTO(arenas_hchunk_i)
 CTL_PROTO(arenas_narenas)
 CTL_PROTO(arenas_initialized)
 CTL_PROTO(arenas_lg_dirty_mult)
+CTL_PROTO(arenas_decay_time)
 CTL_PROTO(arenas_quantum)
 CTL_PROTO(arenas_page)
 CTL_PROTO(arenas_tcache_max)
@@ -182,6 +187,7 @@ INDEX_PROTO(stats_arenas_i_hchunks_j)
 CTL_PROTO(stats_arenas_i_nthreads)
 CTL_PROTO(stats_arenas_i_dss)
 CTL_PROTO(stats_arenas_i_lg_dirty_mult)
+CTL_PROTO(stats_arenas_i_decay_time)
 CTL_PROTO(stats_arenas_i_pactive)
 CTL_PROTO(stats_arenas_i_pdirty)
 CTL_PROTO(stats_arenas_i_mapped)
@@ -260,7 +266,9 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("dss"),		CTL(opt_dss)},
 	{NAME("lg_chunk"),	CTL(opt_lg_chunk)},
 	{NAME("narenas"),	CTL(opt_narenas)},
+	{NAME("purge"),		CTL(opt_purge)},
 	{NAME("lg_dirty_mult"),	CTL(opt_lg_dirty_mult)},
+	{NAME("decay_time"),	CTL(opt_decay_time)},
 	{NAME("stats_print"),	CTL(opt_stats_print)},
 	{NAME("junk"),		CTL(opt_junk)},
 	{NAME("zero"),		CTL(opt_zero)},
@@ -290,8 +298,10 @@ static const ctl_named_node_t	tcache_node[] = {
 
 static const ctl_named_node_t arena_i_node[] = {
 	{NAME("purge"),		CTL(arena_i_purge)},
+	{NAME("decay"),		CTL(arena_i_decay)},
 	{NAME("dss"),		CTL(arena_i_dss)},
 	{NAME("lg_dirty_mult"),	CTL(arena_i_lg_dirty_mult)},
+	{NAME("decay_time"),	CTL(arena_i_decay_time)},
 	{NAME("chunk_hooks"),	CTL(arena_i_chunk_hooks)}
 };
 static const ctl_named_node_t super_arena_i_node[] = {
@@ -341,6 +351,7 @@ static const ctl_named_node_t arenas_node[] = {
 	{NAME("narenas"),	CTL(arenas_narenas)},
 	{NAME("initialized"),	CTL(arenas_initialized)},
 	{NAME("lg_dirty_mult"),	CTL(arenas_lg_dirty_mult)},
+	{NAME("decay_time"),	CTL(arenas_decay_time)},
 	{NAME("quantum"),	CTL(arenas_quantum)},
 	{NAME("page"),		CTL(arenas_page)},
 	{NAME("tcache_max"),	CTL(arenas_tcache_max)},
@@ -441,6 +452,7 @@ static const ctl_named_node_t stats_arenas_i_node[] = {
 	{NAME("nthreads"),	CTL(stats_arenas_i_nthreads)},
 	{NAME("dss"),		CTL(stats_arenas_i_dss)},
 	{NAME("lg_dirty_mult"),	CTL(stats_arenas_i_lg_dirty_mult)},
+	{NAME("decay_time"),	CTL(stats_arenas_i_decay_time)},
 	{NAME("pactive"),	CTL(stats_arenas_i_pactive)},
 	{NAME("pdirty"),	CTL(stats_arenas_i_pdirty)},
 	{NAME("mapped"),	CTL(stats_arenas_i_mapped)},
@@ -523,6 +535,7 @@ ctl_arena_clear(ctl_arena_stats_t *astats)
 
 	astats->dss = dss_prec_names[dss_prec_limit];
 	astats->lg_dirty_mult = -1;
+	astats->decay_time = -1;
 	astats->pactive = 0;
 	astats->pdirty = 0;
 	if (config_stats) {
@@ -545,8 +558,8 @@ ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena)
 	unsigned i;
 
 	arena_stats_merge(arena, &cstats->dss, &cstats->lg_dirty_mult,
-	    &cstats->pactive, &cstats->pdirty, &cstats->astats, cstats->bstats,
-	    cstats->lstats, cstats->hstats);
+	    &cstats->decay_time, &cstats->pactive, &cstats->pdirty,
+	    &cstats->astats, cstats->bstats, cstats->lstats, cstats->hstats);
 
 	for (i = 0; i < NBINS; i++) {
 		cstats->allocated_small += cstats->bstats[i].curregs *
@@ -1265,7 +1278,9 @@ CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
 CTL_RO_NL_GEN(opt_dss, opt_dss, const char *)
 CTL_RO_NL_GEN(opt_lg_chunk, opt_lg_chunk, size_t)
 CTL_RO_NL_GEN(opt_narenas, opt_narenas, size_t)
+CTL_RO_NL_GEN(opt_purge, purge_mode_names[opt_purge], const char *)
 CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t)
+CTL_RO_NL_GEN(opt_decay_time, opt_decay_time, ssize_t)
 CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool)
 CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, const char *)
 CTL_RO_NL_CGEN(config_fill, opt_quarantine, opt_quarantine, size_t)
@@ -1539,34 +1554,52 @@ label_return:
 
 /******************************************************************************/
 
-/* ctl_mutex must be held during execution of this function. */
 static void
-arena_i_purge(unsigned arena_ind)
+arena_i_purge(unsigned arena_ind, bool all)
 {
-	tsd_t *tsd;
-	unsigned i;
-	bool refreshed;
-	VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas);
 
-	tsd = tsd_fetch();
-	for (i = 0, refreshed = false; i < ctl_stats.narenas; i++) {
-		tarenas[i] = arena_get(tsd, i, false, false);
-		if (tarenas[i] == NULL && !refreshed) {
-			tarenas[i] = arena_get(tsd, i, false, true);
-			refreshed = true;
-		}
-	}
+	malloc_mutex_lock(&ctl_mtx);
+	{
+		tsd_t *tsd = tsd_fetch();
+		unsigned narenas = ctl_stats.narenas;
 
-	if (arena_ind == ctl_stats.narenas) {
-		unsigned i;
-		for (i = 0; i < ctl_stats.narenas; i++) {
-			if (tarenas[i] != NULL)
-				arena_purge_all(tarenas[i]);
+		if (arena_ind == narenas) {
+			unsigned i;
+			bool refreshed;
+			VARIABLE_ARRAY(arena_t *, tarenas, narenas);
+
+			for (i = 0, refreshed = false; i < narenas; i++) {
+				tarenas[i] = arena_get(tsd, i, false, false);
+				if (tarenas[i] == NULL && !refreshed) {
+					tarenas[i] = arena_get(tsd, i, false,
+					    true);
+					refreshed = true;
+				}
+			}
+
+			/*
+			 * No further need to hold ctl_mtx, since narenas and
+			 * tarenas contain everything needed below.
+			 */
+			malloc_mutex_unlock(&ctl_mtx);
+
+			for (i = 0; i < narenas; i++) {
+				if (tarenas[i] != NULL)
+					arena_purge(tarenas[i], all);
+			}
+		} else {
+			arena_t *tarena;
+
+			assert(arena_ind < narenas);
+
+			tarena = arena_get(tsd, arena_ind, false, true);
+
+			/* No further need to hold ctl_mtx. */
+			malloc_mutex_unlock(&ctl_mtx);
+
+			if (tarena != NULL)
+				arena_purge(tarena, all);
 		}
-	} else {
-		assert(arena_ind < ctl_stats.narenas);
-		if (tarenas[arena_ind] != NULL)
-			arena_purge_all(tarenas[arena_ind]);
 	}
 }
 
@@ -1578,9 +1611,22 @@ arena_i_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 
 	READONLY();
 	WRITEONLY();
-	malloc_mutex_lock(&ctl_mtx);
-	arena_i_purge(mib[1]);
-	malloc_mutex_unlock(&ctl_mtx);
+	arena_i_purge(mib[1], true);
+
+	ret = 0;
+label_return:
+	return (ret);
+}
+
+static int
+arena_i_decay_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
+    void *newp, size_t newlen)
+{
+	int ret;
+
+	READONLY();
+	WRITEONLY();
+	arena_i_purge(mib[1], false);
 
 	ret = 0;
 label_return:
@@ -1677,6 +1723,40 @@ label_return:
 	return (ret);
 }
 
+static int
+arena_i_decay_time_ctl(const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
+{
+	int ret;
+	unsigned arena_ind = mib[1];
+	arena_t *arena;
+
+	arena = arena_get(tsd_fetch(), arena_ind, false, true);
+	if (arena == NULL) {
+		ret = EFAULT;
+		goto label_return;
+	}
+
+	if (oldp != NULL && oldlenp != NULL) {
+		size_t oldval = arena_decay_time_get(arena);
+		READ(oldval, ssize_t);
+	}
+	if (newp != NULL) {
+		if (newlen != sizeof(ssize_t)) {
+			ret = EINVAL;
+			goto label_return;
+		}
+		if (arena_decay_time_set(arena, *(ssize_t *)newp)) {
+			ret = EFAULT;
+			goto label_return;
+		}
+	}
+
+	ret = 0;
+label_return:
+	return (ret);
+}
+
 static int
 arena_i_chunk_hooks_ctl(const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
@@ -1801,6 +1881,32 @@ label_return:
 	return (ret);
 }
 
+static int
+arenas_decay_time_ctl(const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
+{
+	int ret;
+
+	if (oldp != NULL && oldlenp != NULL) {
+		size_t oldval = arena_decay_time_default_get();
+		READ(oldval, ssize_t);
+	}
+	if (newp != NULL) {
+		if (newlen != sizeof(ssize_t)) {
+			ret = EINVAL;
+			goto label_return;
+		}
+		if (arena_decay_time_default_set(*(ssize_t *)newp)) {
+			ret = EFAULT;
+			goto label_return;
+		}
+	}
+
+	ret = 0;
+label_return:
+	return (ret);
+}
+
 CTL_RO_NL_GEN(arenas_quantum, QUANTUM, size_t)
 CTL_RO_NL_GEN(arenas_page, PAGE, size_t)
 CTL_RO_NL_CGEN(config_tcache, arenas_tcache_max, tcache_maxclass, size_t)
@@ -2002,6 +2108,8 @@ CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats.mapped, size_t)
 CTL_RO_GEN(stats_arenas_i_dss, ctl_stats.arenas[mib[2]].dss, const char *)
 CTL_RO_GEN(stats_arenas_i_lg_dirty_mult, ctl_stats.arenas[mib[2]].lg_dirty_mult,
     ssize_t)
+CTL_RO_GEN(stats_arenas_i_decay_time, ctl_stats.arenas[mib[2]].decay_time,
+    ssize_t)
 CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned)
 CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t)
 CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t)
diff --git a/src/huge.c b/src/huge.c
index c1fa3795..9f880484 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -99,6 +99,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment,
 	} else if (config_fill && unlikely(opt_junk_alloc))
 		memset(ret, 0xa5, size);
 
+	arena_decay_tick(tsd, arena);
 	return (ret);
 }
 
@@ -280,7 +281,7 @@ huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t usize, bool zero) {
 }
 
 bool
-huge_ralloc_no_move(void *ptr, size_t oldsize, size_t usize_min,
+huge_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize_min,
     size_t usize_max, bool zero)
 {
 
@@ -292,13 +293,18 @@ huge_ralloc_no_move(void *ptr, size_t oldsize, size_t usize_min,
 
 	if (CHUNK_CEILING(usize_max) > CHUNK_CEILING(oldsize)) {
 		/* Attempt to expand the allocation in-place. */
-		if (!huge_ralloc_no_move_expand(ptr, oldsize, usize_max, zero))
+		if (!huge_ralloc_no_move_expand(ptr, oldsize, usize_max,
+		    zero)) {
+			arena_decay_tick(tsd, huge_aalloc(ptr));
 			return (false);
+		}
 		/* Try again, this time with usize_min. */
 		if (usize_min < usize_max && CHUNK_CEILING(usize_min) >
 		    CHUNK_CEILING(oldsize) && huge_ralloc_no_move_expand(ptr,
-		    oldsize, usize_min, zero))
+		    oldsize, usize_min, zero)) {
+			arena_decay_tick(tsd, huge_aalloc(ptr));
 			return (false);
+		}
 	}
 
 	/*
@@ -309,12 +315,17 @@ huge_ralloc_no_move(void *ptr, size_t oldsize, size_t usize_min,
 	    && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(usize_max)) {
 		huge_ralloc_no_move_similar(ptr, oldsize, usize_min, usize_max,
 		    zero);
+		arena_decay_tick(tsd, huge_aalloc(ptr));
 		return (false);
 	}
 
 	/* Attempt to shrink the allocation in-place. */
-	if (CHUNK_CEILING(oldsize) > CHUNK_CEILING(usize_max))
-		return (huge_ralloc_no_move_shrink(ptr, oldsize, usize_max));
+	if (CHUNK_CEILING(oldsize) > CHUNK_CEILING(usize_max)) {
+		if (!huge_ralloc_no_move_shrink(ptr, oldsize, usize_max)) {
+			arena_decay_tick(tsd, huge_aalloc(ptr));
+			return (false);
+		}
+	}
 	return (true);
 }
 
@@ -336,7 +347,7 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize,
 	size_t copysize;
 
 	/* Try to avoid moving the allocation. */
-	if (!huge_ralloc_no_move(ptr, oldsize, usize, usize, zero))
+	if (!huge_ralloc_no_move(tsd, ptr, oldsize, usize, usize, zero))
 		return (ptr);
 
 	/*
@@ -373,6 +384,8 @@ huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache)
 	arena_chunk_dalloc_huge(extent_node_arena_get(node),
 	    extent_node_addr_get(node), extent_node_size_get(node));
 	idalloctm(tsd, node, tcache, true, true);
+
+	arena_decay_tick(tsd, arena);
 }
 
 arena_t *
diff --git a/src/jemalloc.c b/src/jemalloc.c
index d2b2afce..f69d951b 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -577,6 +577,17 @@ arena_tdata_get_hard(tsd_t *tsd, unsigned ind)
 		    * (narenas_tdata - narenas_actual));
 	}
 
+	/* Copy/initialize tickers. */
+	for (i = 0; i < narenas_actual; i++) {
+		if (i < narenas_tdata_old) {
+			ticker_copy(&arenas_tdata[i].decay_ticker,
+			    &arenas_tdata_old[i].decay_ticker);
+		} else {
+			ticker_init(&arenas_tdata[i].decay_ticker,
+			    DECAY_NTICKS_PER_UPDATE);
+		}
+	}
+
 	/* Read the refreshed tdata array. */
 	tdata = &arenas_tdata[ind];
 label_return:
@@ -1120,8 +1131,27 @@ malloc_conf_init(void)
 			}
 			CONF_HANDLE_SIZE_T(opt_narenas, "narenas", 1,
 			    SIZE_T_MAX, false)
+			if (strncmp("purge", k, klen) == 0) {
+				int i;
+				bool match = false;
+				for (i = 0; i < purge_mode_limit; i++) {
+					if (strncmp(purge_mode_names[i], v,
+					    vlen) == 0) {
+						opt_purge = (purge_mode_t)i;
+						match = true;
+						break;
+					}
+				}
+				if (!match) {
+					malloc_conf_error("Invalid conf value",
+					    k, klen, v, vlen);
+				}
+				continue;
+			}
 			CONF_HANDLE_SSIZE_T(opt_lg_dirty_mult, "lg_dirty_mult",
 			    -1, (sizeof(size_t) << 3) - 1)
+			CONF_HANDLE_SSIZE_T(opt_decay_time, "decay_time", -1,
+			    TIME_SEC_MAX);
 			CONF_HANDLE_BOOL(opt_stats_print, "stats_print", true)
 			if (config_fill) {
 				if (CONF_MATCH("junk")) {
@@ -2344,12 +2374,12 @@ label_oom:
 }
 
 JEMALLOC_ALWAYS_INLINE_C size_t
-ixallocx_helper(void *ptr, size_t old_usize, size_t size, size_t extra,
-    size_t alignment, bool zero)
+ixallocx_helper(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
+    size_t extra, size_t alignment, bool zero)
 {
 	size_t usize;
 
-	if (ixalloc(ptr, old_usize, size, extra, alignment, zero))
+	if (ixalloc(tsd, ptr, old_usize, size, extra, alignment, zero))
 		return (old_usize);
 	usize = isalloc(ptr, config_prof);
 
@@ -2357,14 +2387,15 @@ ixallocx_helper(void *ptr, size_t old_usize, size_t size, size_t extra,
 }
 
 static size_t
-ixallocx_prof_sample(void *ptr, size_t old_usize, size_t size, size_t extra,
-    size_t alignment, bool zero, prof_tctx_t *tctx)
+ixallocx_prof_sample(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
+    size_t extra, size_t alignment, bool zero, prof_tctx_t *tctx)
 {
 	size_t usize;
 
 	if (tctx == NULL)
 		return (old_usize);
-	usize = ixallocx_helper(ptr, old_usize, size, extra, alignment, zero);
+	usize = ixallocx_helper(tsd, ptr, old_usize, size, extra, alignment,
+	    zero);
 
 	return (usize);
 }
@@ -2390,11 +2421,11 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 	assert(usize_max != 0);
 	tctx = prof_alloc_prep(tsd, usize_max, prof_active, false);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
-		usize = ixallocx_prof_sample(ptr, old_usize, size, extra,
+		usize = ixallocx_prof_sample(tsd, ptr, old_usize, size, extra,
 		    alignment, zero, tctx);
 	} else {
-		usize = ixallocx_helper(ptr, old_usize, size, extra, alignment,
-		    zero);
+		usize = ixallocx_helper(tsd, ptr, old_usize, size, extra,
+		    alignment, zero);
 	}
 	if (usize == old_usize) {
 		prof_alloc_rollback(tsd, tctx, false);
@@ -2441,8 +2472,8 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
 		usize = ixallocx_prof(tsd, ptr, old_usize, size, extra,
 		    alignment, zero);
 	} else {
-		usize = ixallocx_helper(ptr, old_usize, size, extra, alignment,
-		    zero);
+		usize = ixallocx_helper(tsd, ptr, old_usize, size, extra,
+		    alignment, zero);
 	}
 	if (unlikely(usize == old_usize))
 		goto label_not_resized;
diff --git a/src/stats.c b/src/stats.c
index 7d09c23c..8d5ed71e 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -258,7 +258,7 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 {
 	unsigned nthreads;
 	const char *dss;
-	ssize_t lg_dirty_mult;
+	ssize_t lg_dirty_mult, decay_time;
 	size_t page, pactive, pdirty, mapped;
 	size_t metadata_mapped, metadata_allocated;
 	uint64_t npurge, nmadvise, purged;
@@ -278,13 +278,23 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	malloc_cprintf(write_cb, cbopaque, "dss allocation precedence: %s\n",
 	    dss);
 	CTL_M2_GET("stats.arenas.0.lg_dirty_mult", i, &lg_dirty_mult, ssize_t);
-	if (lg_dirty_mult >= 0) {
-		malloc_cprintf(write_cb, cbopaque,
-		    "min active:dirty page ratio: %u:1\n",
-		    (1U << lg_dirty_mult));
-	} else {
-		malloc_cprintf(write_cb, cbopaque,
-		    "min active:dirty page ratio: N/A\n");
+	if (opt_purge == purge_mode_ratio) {
+		if (lg_dirty_mult >= 0) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "min active:dirty page ratio: %u:1\n",
+			    (1U << lg_dirty_mult));
+		} else {
+			malloc_cprintf(write_cb, cbopaque,
+			    "min active:dirty page ratio: N/A\n");
+		}
+	}
+	CTL_M2_GET("stats.arenas.0.decay_time", i, &decay_time, ssize_t);
+	if (opt_purge == purge_mode_decay) {
+		if (decay_time >= 0) {
+			malloc_cprintf(write_cb, cbopaque, "decay time: %zd\n",
+			    decay_time);
+		} else
+			malloc_cprintf(write_cb, cbopaque, "decay time: N/A\n");
 	}
 	CTL_M2_GET("stats.arenas.0.pactive", i, &pactive, size_t);
 	CTL_M2_GET("stats.arenas.0.pdirty", i, &pdirty, size_t);
@@ -292,9 +302,8 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	CTL_M2_GET("stats.arenas.0.nmadvise", i, &nmadvise, uint64_t);
 	CTL_M2_GET("stats.arenas.0.purged", i, &purged, uint64_t);
 	malloc_cprintf(write_cb, cbopaque,
-	    "dirty pages: %zu:%zu active:dirty, %"FMTu64" sweep%s, %"FMTu64
-	    " madvise%s, %"FMTu64" purged\n", pactive, pdirty, npurge, npurge ==
-	    1 ? "" : "s", nmadvise, nmadvise == 1 ? "" : "s", purged);
+	    "purging: dirty: %zu, sweeps: %"FMTu64", madvises: %"FMTu64", "
+	    "purged: %"FMTu64"\n", pdirty, npurge, nmadvise, purged);
 
 	malloc_cprintf(write_cb, cbopaque,
 	    "                            allocated      nmalloc      ndalloc"
@@ -486,7 +495,13 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		OPT_WRITE_SIZE_T(lg_chunk)
 		OPT_WRITE_CHAR_P(dss)
 		OPT_WRITE_SIZE_T(narenas)
-		OPT_WRITE_SSIZE_T_MUTABLE(lg_dirty_mult, arenas.lg_dirty_mult)
+		OPT_WRITE_CHAR_P(purge)
+		if (opt_purge == purge_mode_ratio) {
+			OPT_WRITE_SSIZE_T_MUTABLE(lg_dirty_mult,
+			    arenas.lg_dirty_mult)
+		}
+		if (opt_purge == purge_mode_decay)
+			OPT_WRITE_SSIZE_T_MUTABLE(decay_time, arenas.decay_time)
 		OPT_WRITE_BOOL(stats_print)
 		OPT_WRITE_CHAR_P(junk)
 		OPT_WRITE_SIZE_T(quarantine)
@@ -531,13 +546,22 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		malloc_cprintf(write_cb, cbopaque, "Page size: %zu\n", sv);
 
 		CTL_GET("arenas.lg_dirty_mult", &ssv, ssize_t);
-		if (ssv >= 0) {
+		if (opt_purge == purge_mode_ratio) {
+			if (ssv >= 0) {
+				malloc_cprintf(write_cb, cbopaque,
+				    "Min active:dirty page ratio per arena: "
+				    "%u:1\n", (1U << ssv));
+			} else {
+				malloc_cprintf(write_cb, cbopaque,
+				    "Min active:dirty page ratio per arena: "
+				    "N/A\n");
+			}
+		}
+		CTL_GET("arenas.decay_time", &ssv, ssize_t);
+		if (opt_purge == purge_mode_decay) {
 			malloc_cprintf(write_cb, cbopaque,
-			    "Min active:dirty page ratio per arena: %u:1\n",
-			    (1U << ssv));
-		} else {
-			malloc_cprintf(write_cb, cbopaque,
-			    "Min active:dirty page ratio per arena: N/A\n");
+			    "Unused dirty page decay time: %zd%s\n",
+			    ssv, (ssv < 0) ? " (no decay)" : "");
 		}
 		if (je_mallctl("arenas.tcache_max", &sv, &ssz, NULL, 0) == 0) {
 			malloc_cprintf(write_cb, cbopaque,
diff --git a/src/tcache.c b/src/tcache.c
index e8c3152d..426bb1f7 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -75,7 +75,7 @@ tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
 {
 	void *ret;
 
-	arena_tcache_fill_small(arena, tbin, binind, config_prof ?
+	arena_tcache_fill_small(tsd, arena, tbin, binind, config_prof ?
 	    tcache->prof_accumbytes : 0);
 	if (config_prof)
 		tcache->prof_accumbytes = 0;
@@ -143,6 +143,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 			}
 		}
 		malloc_mutex_unlock(&bin->lock);
+		arena_decay_ticks(tsd, bin_arena, nflush - ndeferred);
 	}
 	if (config_stats && !merged_stats) {
 		/*
@@ -226,6 +227,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 		malloc_mutex_unlock(&locked_arena->lock);
 		if (config_prof && idump)
 			prof_idump();
+		arena_decay_ticks(tsd, locked_arena, nflush - ndeferred);
 	}
 	if (config_stats && !merged_stats) {
 		/*
diff --git a/src/time.c b/src/time.c
index 3f930385..2fe93e1e 100644
--- a/src/time.c
+++ b/src/time.c
@@ -147,6 +147,10 @@ time_divide(const struct timespec *time, const struct timespec *divisor)
 	return (t / d);
 }
 
+#ifdef JEMALLOC_JET
+#undef time_update
+#define	time_update JEMALLOC_N(time_update_impl)
+#endif
 bool
 time_update(struct timespec *time)
 {
@@ -184,3 +188,8 @@ time_update(struct timespec *time)
 	assert(time_valid(time));
 	return (false);
 }
+#ifdef JEMALLOC_JET
+#undef time_update
+#define	time_update JEMALLOC_N(time_update)
+time_update_t *time_update = JEMALLOC_N(time_update_impl);
+#endif
diff --git a/test/unit/decay.c b/test/unit/decay.c
new file mode 100644
index 00000000..324019dc
--- /dev/null
+++ b/test/unit/decay.c
@@ -0,0 +1,370 @@
+#include "test/jemalloc_test.h"
+
+const char *malloc_conf = "purge:decay,decay_time:1";
+
+static time_update_t *time_update_orig;
+
+static unsigned nupdates_mock;
+static struct timespec time_mock;
+static bool nonmonotonic_mock;
+
+static bool
+time_update_mock(struct timespec *time)
+{
+
+	nupdates_mock++;
+	if (!nonmonotonic_mock)
+		time_copy(time, &time_mock);
+	return (nonmonotonic_mock);
+}
+
+TEST_BEGIN(test_decay_ticks)
+{
+	ticker_t *decay_ticker;
+	unsigned tick0, tick1;
+	size_t sz, huge0, large0;
+	void *p;
+	unsigned tcache_ind;
+
+	test_skip_if(opt_purge != purge_mode_decay);
+
+	decay_ticker = decay_ticker_get(tsd_fetch(), 0);
+	assert_ptr_not_null(decay_ticker,
+	    "Unexpected failure getting decay ticker");
+
+	sz = sizeof(size_t);
+	assert_d_eq(mallctl("arenas.hchunk.0.size", &huge0, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
+	assert_d_eq(mallctl("arenas.lrun.0.size", &large0, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
+
+	/* malloc(). */
+	tick0 = ticker_read(decay_ticker);
+	p = malloc(huge0);
+	assert_ptr_not_null(p, "Unexpected malloc() failure");
+	tick1 = ticker_read(decay_ticker);
+	assert_u32_ne(tick1, tick0, "Expected ticker to tick during malloc()");
+	/* free(). */
+	tick0 = ticker_read(decay_ticker);
+	free(p);
+	tick1 = ticker_read(decay_ticker);
+	assert_u32_ne(tick1, tick0, "Expected ticker to tick during free()");
+
+	/* calloc(). */
+	tick0 = ticker_read(decay_ticker);
+	p = calloc(1, huge0);
+	assert_ptr_not_null(p, "Unexpected calloc() failure");
+	tick1 = ticker_read(decay_ticker);
+	assert_u32_ne(tick1, tick0, "Expected ticker to tick during calloc()");
+	free(p);
+
+	/* posix_memalign(). */
+	tick0 = ticker_read(decay_ticker);
+	assert_d_eq(posix_memalign(&p, sizeof(size_t), huge0), 0,
+	    "Unexpected posix_memalign() failure");
+	tick1 = ticker_read(decay_ticker);
+	assert_u32_ne(tick1, tick0,
+	    "Expected ticker to tick during posix_memalign()");
+	free(p);
+
+	/* aligned_alloc(). */
+	tick0 = ticker_read(decay_ticker);
+	p = aligned_alloc(sizeof(size_t), huge0);
+	assert_ptr_not_null(p, "Unexpected aligned_alloc() failure");
+	tick1 = ticker_read(decay_ticker);
+	assert_u32_ne(tick1, tick0,
+	    "Expected ticker to tick during aligned_alloc()");
+	free(p);
+
+	/* realloc(). */
+	/* Allocate. */
+	tick0 = ticker_read(decay_ticker);
+	p = realloc(NULL, huge0);
+	assert_ptr_not_null(p, "Unexpected realloc() failure");
+	tick1 = ticker_read(decay_ticker);
+	assert_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()");
+	/* Reallocate. */
+	tick0 = ticker_read(decay_ticker);
+	p = realloc(p, huge0);
+	assert_ptr_not_null(p, "Unexpected realloc() failure");
+	tick1 = ticker_read(decay_ticker);
+	assert_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()");
+	/* Deallocate. */
+	tick0 = ticker_read(decay_ticker);
+	realloc(p, 0);
+	tick1 = ticker_read(decay_ticker);
+	assert_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()");
+
+	/* Huge mallocx(). */
+	tick0 = ticker_read(decay_ticker);
+	p = mallocx(huge0, 0);
+	assert_ptr_not_null(p, "Unexpected mallocx() failure");
+	tick1 = ticker_read(decay_ticker);
+	assert_u32_ne(tick1, tick0,
+	    "Expected ticker to tick during huge mallocx()");
+	/* Huge rallocx(). */
+	tick0 = ticker_read(decay_ticker);
+	p = rallocx(p, huge0, 0);
+	assert_ptr_not_null(p, "Unexpected rallocx() failure");
+	tick1 = ticker_read(decay_ticker);
+	assert_u32_ne(tick1, tick0,
+	    "Expected ticker to tick during huge rallocx()");
+	/* Huge xallocx(). */
+	tick0 = ticker_read(decay_ticker);
+	xallocx(p, huge0, 0, 0);
+	tick1 = ticker_read(decay_ticker);
+	assert_u32_ne(tick1, tick0,
+	    "Expected ticker to tick during huge xallocx()");
+	/* Huge dallocx(). */
+	tick0 = ticker_read(decay_ticker);
+	dallocx(p, 0);
+	tick1 = ticker_read(decay_ticker);
+	assert_u32_ne(tick1, tick0,
+	    "Expected ticker to tick during huge dallocx()");
+	/* Huge sdallocx(). */
+	p = mallocx(huge0, 0);
+	assert_ptr_not_null(p, "Unexpected mallocx() failure");
+	tick0 = ticker_read(decay_ticker);
+	sdallocx(p, huge0, 0);
+	tick1 = ticker_read(decay_ticker);
+	assert_u32_ne(tick1, tick0,
+	    "Expected ticker to tick during huge sdallocx()");
+
+	/* Large mallocx(). */
+	tick0 = ticker_read(decay_ticker);
+	p = mallocx(large0, MALLOCX_TCACHE_NONE);
+	assert_ptr_not_null(p, "Unexpected mallocx() failure");
+	tick1 = ticker_read(decay_ticker);
+	assert_u32_ne(tick1, tick0,
+	    "Expected ticker to tick during large mallocx()");
+	/* Large rallocx(). */
+	tick0 = ticker_read(decay_ticker);
+	p = rallocx(p, large0, MALLOCX_TCACHE_NONE);
+	assert_ptr_not_null(p, "Unexpected rallocx() failure");
+	tick1 = ticker_read(decay_ticker);
+	assert_u32_ne(tick1, tick0,
+	    "Expected ticker to tick during large rallocx()");
+	/* Large xallocx(). */
+	tick0 = ticker_read(decay_ticker);
+	xallocx(p, large0, 0, MALLOCX_TCACHE_NONE);
+	tick1 = ticker_read(decay_ticker);
+	assert_u32_ne(tick1, tick0,
+	    "Expected ticker to tick during large xallocx()");
+	/* Large dallocx(). */
+	tick0 = ticker_read(decay_ticker);
+	dallocx(p, MALLOCX_TCACHE_NONE);
+	tick1 = ticker_read(decay_ticker);
+	assert_u32_ne(tick1, tick0,
+	    "Expected ticker to tick during large dallocx()");
+	/* Large sdallocx(). */
+	p = mallocx(large0, MALLOCX_TCACHE_NONE);
+	assert_ptr_not_null(p, "Unexpected mallocx() failure");
+	tick0 = ticker_read(decay_ticker);
+	sdallocx(p, large0, MALLOCX_TCACHE_NONE);
+	tick1 = ticker_read(decay_ticker);
+	assert_u32_ne(tick1, tick0,
+	    "Expected ticker to tick during large sdallocx()");
+
+	/* Small mallocx(). */
+	tick0 = ticker_read(decay_ticker);
+	p = mallocx(1, MALLOCX_TCACHE_NONE);
+	assert_ptr_not_null(p, "Unexpected mallocx() failure");
+	tick1 = ticker_read(decay_ticker);
+	assert_u32_ne(tick1, tick0,
+	    "Expected ticker to tick during small mallocx()");
+	/* Small rallocx(). */
+	tick0 = ticker_read(decay_ticker);
+	p = rallocx(p, 1, MALLOCX_TCACHE_NONE);
+	assert_ptr_not_null(p, "Unexpected rallocx() failure");
+	tick1 = ticker_read(decay_ticker);
+	assert_u32_ne(tick1, tick0,
+	    "Expected ticker to tick during small rallocx()");
+	/* Small xallocx(). */
+	tick0 = ticker_read(decay_ticker);
+	xallocx(p, 1, 0, MALLOCX_TCACHE_NONE);
+	tick1 = ticker_read(decay_ticker);
+	assert_u32_ne(tick1, tick0,
+	    "Expected ticker to tick during small xallocx()");
+	/* Small dallocx(). */
+	tick0 = ticker_read(decay_ticker);
+	dallocx(p, MALLOCX_TCACHE_NONE);
+	tick1 = ticker_read(decay_ticker);
+	assert_u32_ne(tick1, tick0,
+	    "Expected ticker to tick during small dallocx()");
+	/* Small sdallocx(). */
+	p = mallocx(1, MALLOCX_TCACHE_NONE);
+	assert_ptr_not_null(p, "Unexpected mallocx() failure");
+	tick0 = ticker_read(decay_ticker);
+	sdallocx(p, 1, MALLOCX_TCACHE_NONE);
+	tick1 = ticker_read(decay_ticker);
+	assert_u32_ne(tick1, tick0,
+	    "Expected ticker to tick during small sdallocx()");
+
+	/* tcache fill. */
+	sz = sizeof(unsigned);
+	assert_d_eq(mallctl("tcache.create", &tcache_ind, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
+	tick0 = ticker_read(decay_ticker);
+	p = mallocx(1, MALLOCX_TCACHE(tcache_ind));
+	assert_ptr_not_null(p, "Unexpected mallocx() failure");
+	tick1 = ticker_read(decay_ticker);
+	assert_u32_ne(tick1, tick0,
+	    "Expected ticker to tick during tcache fill");
+	/* tcache flush. */
+	dallocx(p, MALLOCX_TCACHE(tcache_ind));
+	tick0 = ticker_read(decay_ticker);
+	assert_d_eq(mallctl("tcache.flush", NULL, NULL, &tcache_ind,
+	    sizeof(unsigned)), 0, "Unexpected mallctl failure");
+	tick1 = ticker_read(decay_ticker);
+	assert_u32_ne(tick1, tick0,
+	    "Expected ticker to tick during tcache flush");
+}
+TEST_END
+
+TEST_BEGIN(test_decay_ticker)
+{
+#define	NPS 1024
+	int flags = (MALLOCX_ARENA(0) | MALLOCX_TCACHE_NONE);
+	void *ps[NPS];
+	uint64_t epoch, npurge0, npurge1;
+	size_t sz, tcache_max, large;
+	unsigned i, nupdates0;
+	struct timespec time, decay_time, deadline;
+
+	test_skip_if(opt_purge != purge_mode_decay);
+
+	/*
+	 * Allocate a bunch of large objects, pause the clock, deallocate the
+	 * objects, restore the clock, then [md]allocx() in a tight loop to
+	 * verify the ticker triggers purging.
+	 */
+
+	sz = sizeof(size_t);
+	assert_d_eq(mallctl("arenas.tcache_max", &tcache_max, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
+	large = nallocx(tcache_max + 1, flags);
+
+	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctl failure");
+	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(uint64_t)), 0,
+	    "Unexpected mallctl failure");
+	sz = sizeof(uint64_t);
+	assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge0, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
+
+	for (i = 0; i < NPS; i++) {
+		ps[i] = mallocx(large, flags);
+		assert_ptr_not_null(ps[i], "Unexpected mallocx() failure");
+	}
+
+	nupdates_mock = 0;
+	time_init(&time_mock, 0, 0);
+	time_update(&time_mock);
+	nonmonotonic_mock = false;
+
+	time_update_orig = time_update;
+	time_update = time_update_mock;
+
+	for (i = 0; i < NPS; i++) {
+		dallocx(ps[i], flags);
+		nupdates0 = nupdates_mock;
+		assert_d_eq(mallctl("arena.0.decay", NULL, NULL, NULL, 0), 0,
+		    "Unexpected arena.0.decay failure");
+		assert_u_gt(nupdates_mock, nupdates0,
+		    "Expected time_update() to be called");
+	}
+
+	time_update = time_update_orig;
+
+	time_init(&time, 0, 0);
+	time_update(&time);
+	time_init(&decay_time, opt_decay_time, 0);
+	time_copy(&deadline, &time);
+	time_add(&deadline, &decay_time);
+	do {
+		for (i = 0; i < DECAY_NTICKS_PER_UPDATE / 2; i++) {
+			void *p = mallocx(1, flags);
+			assert_ptr_not_null(p, "Unexpected mallocx() failure");
+			dallocx(p, flags);
+		}
+		assert_d_eq(mallctl("epoch", NULL, NULL, &epoch,
+		    sizeof(uint64_t)), 0, "Unexpected mallctl failure");
+		sz = sizeof(uint64_t);
+		assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge1, &sz,
+		    NULL, 0), 0, "Unexpected mallctl failure");
+
+		time_update(&time);
+	} while (time_compare(&time, &deadline) <= 0 && npurge1 == npurge0);
+
+	assert_u64_gt(npurge1, npurge0, "Expected purging to occur");
+#undef NPS
+}
+TEST_END
+
+TEST_BEGIN(test_decay_nonmonotonic)
+{
+#define	NPS (SMOOTHSTEP_NSTEPS + 1)
+	int flags = (MALLOCX_ARENA(0) | MALLOCX_TCACHE_NONE);
+	void *ps[NPS];
+	uint64_t epoch, npurge0, npurge1;
+	size_t sz, large0;
+	unsigned i, nupdates0;
+
+	test_skip_if(opt_purge != purge_mode_decay);
+
+	sz = sizeof(size_t);
+	assert_d_eq(mallctl("arenas.lrun.0.size", &large0, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
+
+	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctl failure");
+	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(uint64_t)), 0,
+	    "Unexpected mallctl failure");
+	sz = sizeof(uint64_t);
+	assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge0, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
+
+	nupdates_mock = 0;
+	time_init(&time_mock, 0, 0);
+	time_update(&time_mock);
+	nonmonotonic_mock = true;
+
+	time_update_orig = time_update;
+	time_update = time_update_mock;
+
+	for (i = 0; i < NPS; i++) {
+		ps[i] = mallocx(large0, flags);
+		assert_ptr_not_null(ps[i], "Unexpected mallocx() failure");
+	}
+
+	for (i = 0; i < NPS; i++) {
+		dallocx(ps[i], flags);
+		nupdates0 = nupdates_mock;
+		assert_d_eq(mallctl("arena.0.decay", NULL, NULL, NULL, 0), 0,
+		    "Unexpected arena.0.decay failure");
+		assert_u_gt(nupdates_mock, nupdates0,
+		    "Expected time_update() to be called");
+	}
+
+	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(uint64_t)), 0,
+	    "Unexpected mallctl failure");
+	sz = sizeof(uint64_t);
+	assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge1, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
+
+	assert_u64_gt(npurge1, npurge0, "Expected purging to occur");
+
+	time_update = time_update_orig;
+#undef NPS
+}
+TEST_END
+
+int
+main(void)
+{
+
+	return (test(
+	    test_decay_ticks,
+	    test_decay_ticker,
+	    test_decay_nonmonotonic));
+}
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index fde223f9..b312fc64 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -164,7 +164,9 @@ TEST_BEGIN(test_mallctl_opt)
 	TEST_MALLCTL_OPT(size_t, lg_chunk, always);
 	TEST_MALLCTL_OPT(const char *, dss, always);
 	TEST_MALLCTL_OPT(size_t, narenas, always);
+	TEST_MALLCTL_OPT(const char *, purge, always);
 	TEST_MALLCTL_OPT(ssize_t, lg_dirty_mult, always);
+	TEST_MALLCTL_OPT(ssize_t, decay_time, always);
 	TEST_MALLCTL_OPT(bool, stats_print, always);
 	TEST_MALLCTL_OPT(const char *, junk, fill);
 	TEST_MALLCTL_OPT(size_t, quarantine, fill);
@@ -355,6 +357,8 @@ TEST_BEGIN(test_arena_i_lg_dirty_mult)
 	ssize_t lg_dirty_mult, orig_lg_dirty_mult, prev_lg_dirty_mult;
 	size_t sz = sizeof(ssize_t);
 
+	test_skip_if(opt_purge != purge_mode_ratio);
+
 	assert_d_eq(mallctl("arena.0.lg_dirty_mult", &orig_lg_dirty_mult, &sz,
 	    NULL, 0), 0, "Unexpected mallctl() failure");
 
@@ -382,6 +386,39 @@ TEST_BEGIN(test_arena_i_lg_dirty_mult)
 }
 TEST_END
 
+TEST_BEGIN(test_arena_i_decay_time)
+{
+	ssize_t decay_time, orig_decay_time, prev_decay_time;
+	size_t sz = sizeof(ssize_t);
+
+	test_skip_if(opt_purge != purge_mode_decay);
+
+	assert_d_eq(mallctl("arena.0.decay_time", &orig_decay_time, &sz,
+	    NULL, 0), 0, "Unexpected mallctl() failure");
+
+	decay_time = -2;
+	assert_d_eq(mallctl("arena.0.decay_time", NULL, NULL,
+	    &decay_time, sizeof(ssize_t)), EFAULT,
+	    "Unexpected mallctl() success");
+
+	decay_time = TIME_SEC_MAX;
+	assert_d_eq(mallctl("arena.0.decay_time", NULL, NULL,
+	    &decay_time, sizeof(ssize_t)), 0,
+	    "Unexpected mallctl() failure");
+
+	for (prev_decay_time = decay_time, decay_time = -1;
+	    decay_time < 20; prev_decay_time = decay_time, decay_time++) {
+		ssize_t old_decay_time;
+
+		assert_d_eq(mallctl("arena.0.decay_time", &old_decay_time,
+		    &sz, &decay_time, sizeof(ssize_t)), 0,
+		    "Unexpected mallctl() failure");
+		assert_zd_eq(old_decay_time, prev_decay_time,
+		    "Unexpected old arena.0.decay_time");
+	}
+}
+TEST_END
+
 TEST_BEGIN(test_arena_i_purge)
 {
 	unsigned narenas;
@@ -402,6 +439,26 @@ TEST_BEGIN(test_arena_i_purge)
 }
 TEST_END
 
+TEST_BEGIN(test_arena_i_decay)
+{
+	unsigned narenas;
+	size_t sz = sizeof(unsigned);
+	size_t mib[3];
+	size_t miblen = 3;
+
+	assert_d_eq(mallctl("arena.0.decay", NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+
+	assert_d_eq(mallctl("arenas.narenas", &narenas, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+	assert_d_eq(mallctlnametomib("arena.0.decay", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[1] = narenas;
+	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+}
+TEST_END
+
 TEST_BEGIN(test_arena_i_dss)
 {
 	const char *dss_prec_old, *dss_prec_new;
@@ -466,6 +523,8 @@ TEST_BEGIN(test_arenas_lg_dirty_mult)
 	ssize_t lg_dirty_mult, orig_lg_dirty_mult, prev_lg_dirty_mult;
 	size_t sz = sizeof(ssize_t);
 
+	test_skip_if(opt_purge != purge_mode_ratio);
+
 	assert_d_eq(mallctl("arenas.lg_dirty_mult", &orig_lg_dirty_mult, &sz,
 	    NULL, 0), 0, "Unexpected mallctl() failure");
 
@@ -493,6 +552,39 @@ TEST_BEGIN(test_arenas_lg_dirty_mult)
 }
 TEST_END
 
+TEST_BEGIN(test_arenas_decay_time)
+{
+	ssize_t decay_time, orig_decay_time, prev_decay_time;
+	size_t sz = sizeof(ssize_t);
+
+	test_skip_if(opt_purge != purge_mode_decay);
+
+	assert_d_eq(mallctl("arenas.decay_time", &orig_decay_time, &sz,
+	    NULL, 0), 0, "Unexpected mallctl() failure");
+
+	decay_time = -2;
+	assert_d_eq(mallctl("arenas.decay_time", NULL, NULL,
+	    &decay_time, sizeof(ssize_t)), EFAULT,
+	    "Unexpected mallctl() success");
+
+	decay_time = TIME_SEC_MAX;
+	assert_d_eq(mallctl("arenas.decay_time", NULL, NULL,
+	    &decay_time, sizeof(ssize_t)), 0,
+	    "Expected mallctl() failure");
+
+	for (prev_decay_time = decay_time, decay_time = -1;
+	    decay_time < 20; prev_decay_time = decay_time, decay_time++) {
+		ssize_t old_decay_time;
+
+		assert_d_eq(mallctl("arenas.decay_time", &old_decay_time,
+		    &sz, &decay_time, sizeof(ssize_t)), 0,
+		    "Unexpected mallctl() failure");
+		assert_zd_eq(old_decay_time, prev_decay_time,
+		    "Unexpected old arenas.decay_time");
+	}
+}
+TEST_END
+
 TEST_BEGIN(test_arenas_constants)
 {
 
@@ -621,10 +713,13 @@ main(void)
 	    test_tcache,
 	    test_thread_arena,
 	    test_arena_i_lg_dirty_mult,
+	    test_arena_i_decay_time,
 	    test_arena_i_purge,
+	    test_arena_i_decay,
 	    test_arena_i_dss,
 	    test_arenas_initialized,
 	    test_arenas_lg_dirty_mult,
+	    test_arenas_decay_time,
 	    test_arenas_constants,
 	    test_arenas_bin_constants,
 	    test_arenas_lrun_constants,

From 9f24c944744e91d0cfe1864287ca7a52c16598fa Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 20 Feb 2016 09:02:49 -0800
Subject: [PATCH 0088/2608] Increase test coverage in test_decay_ticks.

---
 test/unit/decay.c | 217 ++++++++++++++++++++--------------------------
 1 file changed, 96 insertions(+), 121 deletions(-)

diff --git a/test/unit/decay.c b/test/unit/decay.c
index 324019dc..20730de4 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -24,7 +24,6 @@ TEST_BEGIN(test_decay_ticks)
 	unsigned tick0, tick1;
 	size_t sz, huge0, large0;
 	void *p;
-	unsigned tcache_ind;
 
 	test_skip_if(opt_purge != purge_mode_decay);
 
@@ -38,6 +37,12 @@ TEST_BEGIN(test_decay_ticks)
 	assert_d_eq(mallctl("arenas.lrun.0.size", &large0, &sz, NULL, 0), 0,
 	    "Unexpected mallctl failure");
 
+	/*
+	 * Test the standard APIs using a huge size class, since we can't
+	 * control tcache interactions (except by completely disabling tcache
+	 * for the entire test program).
+	 */
+
 	/* malloc(). */
 	tick0 = ticker_read(decay_ticker);
 	p = malloc(huge0);
@@ -95,129 +100,99 @@ TEST_BEGIN(test_decay_ticks)
 	tick1 = ticker_read(decay_ticker);
 	assert_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()");
 
-	/* Huge mallocx(). */
-	tick0 = ticker_read(decay_ticker);
-	p = mallocx(huge0, 0);
-	assert_ptr_not_null(p, "Unexpected mallocx() failure");
-	tick1 = ticker_read(decay_ticker);
-	assert_u32_ne(tick1, tick0,
-	    "Expected ticker to tick during huge mallocx()");
-	/* Huge rallocx(). */
-	tick0 = ticker_read(decay_ticker);
-	p = rallocx(p, huge0, 0);
-	assert_ptr_not_null(p, "Unexpected rallocx() failure");
-	tick1 = ticker_read(decay_ticker);
-	assert_u32_ne(tick1, tick0,
-	    "Expected ticker to tick during huge rallocx()");
-	/* Huge xallocx(). */
-	tick0 = ticker_read(decay_ticker);
-	xallocx(p, huge0, 0, 0);
-	tick1 = ticker_read(decay_ticker);
-	assert_u32_ne(tick1, tick0,
-	    "Expected ticker to tick during huge xallocx()");
-	/* Huge dallocx(). */
-	tick0 = ticker_read(decay_ticker);
-	dallocx(p, 0);
-	tick1 = ticker_read(decay_ticker);
-	assert_u32_ne(tick1, tick0,
-	    "Expected ticker to tick during huge dallocx()");
-	/* Huge sdallocx(). */
-	p = mallocx(huge0, 0);
-	assert_ptr_not_null(p, "Unexpected mallocx() failure");
-	tick0 = ticker_read(decay_ticker);
-	sdallocx(p, huge0, 0);
-	tick1 = ticker_read(decay_ticker);
-	assert_u32_ne(tick1, tick0,
-	    "Expected ticker to tick during huge sdallocx()");
+	/*
+	 * Test the *allocx() APIs using huge, large, and small size classes,
+	 * with tcache explicitly disabled.
+	 */
+	{
+		unsigned i;
+		size_t allocx_sizes[3];
+		allocx_sizes[0] = huge0;
+		allocx_sizes[1] = large0;
+		allocx_sizes[2] = 1;
 
-	/* Large mallocx(). */
-	tick0 = ticker_read(decay_ticker);
-	p = mallocx(large0, MALLOCX_TCACHE_NONE);
-	assert_ptr_not_null(p, "Unexpected mallocx() failure");
-	tick1 = ticker_read(decay_ticker);
-	assert_u32_ne(tick1, tick0,
-	    "Expected ticker to tick during large mallocx()");
-	/* Large rallocx(). */
-	tick0 = ticker_read(decay_ticker);
-	p = rallocx(p, large0, MALLOCX_TCACHE_NONE);
-	assert_ptr_not_null(p, "Unexpected rallocx() failure");
-	tick1 = ticker_read(decay_ticker);
-	assert_u32_ne(tick1, tick0,
-	    "Expected ticker to tick during large rallocx()");
-	/* Large xallocx(). */
-	tick0 = ticker_read(decay_ticker);
-	xallocx(p, large0, 0, MALLOCX_TCACHE_NONE);
-	tick1 = ticker_read(decay_ticker);
-	assert_u32_ne(tick1, tick0,
-	    "Expected ticker to tick during large xallocx()");
-	/* Large dallocx(). */
-	tick0 = ticker_read(decay_ticker);
-	dallocx(p, MALLOCX_TCACHE_NONE);
-	tick1 = ticker_read(decay_ticker);
-	assert_u32_ne(tick1, tick0,
-	    "Expected ticker to tick during large dallocx()");
-	/* Large sdallocx(). */
-	p = mallocx(large0, MALLOCX_TCACHE_NONE);
-	assert_ptr_not_null(p, "Unexpected mallocx() failure");
-	tick0 = ticker_read(decay_ticker);
-	sdallocx(p, large0, MALLOCX_TCACHE_NONE);
-	tick1 = ticker_read(decay_ticker);
-	assert_u32_ne(tick1, tick0,
-	    "Expected ticker to tick during large sdallocx()");
+		for (i = 0; i < sizeof(allocx_sizes) / sizeof(size_t); i++) {
+			sz = allocx_sizes[i];
 
-	/* Small mallocx(). */
-	tick0 = ticker_read(decay_ticker);
-	p = mallocx(1, MALLOCX_TCACHE_NONE);
-	assert_ptr_not_null(p, "Unexpected mallocx() failure");
-	tick1 = ticker_read(decay_ticker);
-	assert_u32_ne(tick1, tick0,
-	    "Expected ticker to tick during small mallocx()");
-	/* Small rallocx(). */
-	tick0 = ticker_read(decay_ticker);
-	p = rallocx(p, 1, MALLOCX_TCACHE_NONE);
-	assert_ptr_not_null(p, "Unexpected rallocx() failure");
-	tick1 = ticker_read(decay_ticker);
-	assert_u32_ne(tick1, tick0,
-	    "Expected ticker to tick during small rallocx()");
-	/* Small xallocx(). */
-	tick0 = ticker_read(decay_ticker);
-	xallocx(p, 1, 0, MALLOCX_TCACHE_NONE);
-	tick1 = ticker_read(decay_ticker);
-	assert_u32_ne(tick1, tick0,
-	    "Expected ticker to tick during small xallocx()");
-	/* Small dallocx(). */
-	tick0 = ticker_read(decay_ticker);
-	dallocx(p, MALLOCX_TCACHE_NONE);
-	tick1 = ticker_read(decay_ticker);
-	assert_u32_ne(tick1, tick0,
-	    "Expected ticker to tick during small dallocx()");
-	/* Small sdallocx(). */
-	p = mallocx(1, MALLOCX_TCACHE_NONE);
-	assert_ptr_not_null(p, "Unexpected mallocx() failure");
-	tick0 = ticker_read(decay_ticker);
-	sdallocx(p, 1, MALLOCX_TCACHE_NONE);
-	tick1 = ticker_read(decay_ticker);
-	assert_u32_ne(tick1, tick0,
-	    "Expected ticker to tick during small sdallocx()");
+			/* mallocx(). */
+			tick0 = ticker_read(decay_ticker);
+			p = mallocx(sz, MALLOCX_TCACHE_NONE);
+			assert_ptr_not_null(p, "Unexpected mallocx() failure");
+			tick1 = ticker_read(decay_ticker);
+			assert_u32_ne(tick1, tick0,
+			    "Expected ticker to tick during mallocx() (sz=%zu)",
+			    sz);
+			/* rallocx(). */
+			tick0 = ticker_read(decay_ticker);
+			p = rallocx(p, sz, MALLOCX_TCACHE_NONE);
+			assert_ptr_not_null(p, "Unexpected rallocx() failure");
+			tick1 = ticker_read(decay_ticker);
+			assert_u32_ne(tick1, tick0,
+			    "Expected ticker to tick during rallocx() (sz=%zu)",
+			    sz);
+			/* xallocx(). */
+			tick0 = ticker_read(decay_ticker);
+			xallocx(p, sz, 0, MALLOCX_TCACHE_NONE);
+			tick1 = ticker_read(decay_ticker);
+			assert_u32_ne(tick1, tick0,
+			    "Expected ticker to tick during xallocx() (sz=%zu)",
+			    sz);
+			/* dallocx(). */
+			tick0 = ticker_read(decay_ticker);
+			dallocx(p, MALLOCX_TCACHE_NONE);
+			tick1 = ticker_read(decay_ticker);
+			assert_u32_ne(tick1, tick0,
+			    "Expected ticker to tick during dallocx() (sz=%zu)",
+			    sz);
+			/* sdallocx(). */
+			p = mallocx(sz, MALLOCX_TCACHE_NONE);
+			assert_ptr_not_null(p, "Unexpected mallocx() failure");
+			tick0 = ticker_read(decay_ticker);
+			sdallocx(p, sz, MALLOCX_TCACHE_NONE);
+			tick1 = ticker_read(decay_ticker);
+			assert_u32_ne(tick1, tick0,
+			    "Expected ticker to tick during sdallocx() "
+			    "(sz=%zu)", sz);
+		}
+	}
 
-	/* tcache fill. */
-	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("tcache.create", &tcache_ind, &sz, NULL, 0), 0,
-	    "Unexpected mallctl failure");
-	tick0 = ticker_read(decay_ticker);
-	p = mallocx(1, MALLOCX_TCACHE(tcache_ind));
-	assert_ptr_not_null(p, "Unexpected mallocx() failure");
-	tick1 = ticker_read(decay_ticker);
-	assert_u32_ne(tick1, tick0,
-	    "Expected ticker to tick during tcache fill");
-	/* tcache flush. */
-	dallocx(p, MALLOCX_TCACHE(tcache_ind));
-	tick0 = ticker_read(decay_ticker);
-	assert_d_eq(mallctl("tcache.flush", NULL, NULL, &tcache_ind,
-	    sizeof(unsigned)), 0, "Unexpected mallctl failure");
-	tick1 = ticker_read(decay_ticker);
-	assert_u32_ne(tick1, tick0,
-	    "Expected ticker to tick during tcache flush");
+	/*
+	 * Test tcache fill/flush interactions for large and small size classes,
+	 * using an explicit tcache.
+	 */
+	{
+		unsigned tcache_ind, i;
+		size_t tcache_sizes[2];
+		tcache_sizes[0] = large0;
+		tcache_sizes[1] = 1;
+
+		sz = sizeof(unsigned);
+		assert_d_eq(mallctl("tcache.create", &tcache_ind, &sz, NULL, 0),
+		    0, "Unexpected mallctl failure");
+
+		for (i = 0; i < sizeof(tcache_sizes) / sizeof(size_t); i++) {
+			sz = tcache_sizes[i];
+
+			/* tcache fill. */
+			tick0 = ticker_read(decay_ticker);
+			p = mallocx(sz, MALLOCX_TCACHE(tcache_ind));
+			assert_ptr_not_null(p, "Unexpected mallocx() failure");
+			tick1 = ticker_read(decay_ticker);
+			assert_u32_ne(tick1, tick0,
+			    "Expected ticker to tick during tcache fill "
+			    "(sz=%zu)", sz);
+			/* tcache flush. */
+			dallocx(p, MALLOCX_TCACHE(tcache_ind));
+			tick0 = ticker_read(decay_ticker);
+			assert_d_eq(mallctl("tcache.flush", NULL, NULL,
+			    &tcache_ind, sizeof(unsigned)), 0,
+			    "Unexpected mallctl failure");
+			tick1 = ticker_read(decay_ticker);
+			assert_u32_ne(tick1, tick0,
+			    "Expected ticker to tick during tcache flush "
+			    "(sz=%zu)", sz);
+		}
+	}
 }
 TEST_END
 

From a0aaad1afa8c1c4b30bf15c6b8744084ffc32055 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 20 Feb 2016 10:23:48 -0800
Subject: [PATCH 0089/2608] Handle unaligned keys in hash().

Reported by Christopher Ferris <cferris@google.com>.
---
 include/jemalloc/internal/hash.h | 18 +++++++++++++++++-
 test/unit/hash.c                 | 19 ++++++++++++++++---
 2 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h
index bcead337..8b5fb037 100644
--- a/include/jemalloc/internal/hash.h
+++ b/include/jemalloc/internal/hash.h
@@ -1,6 +1,6 @@
 /*
  * The following hash function is based on MurmurHash3, placed into the public
- * domain by Austin Appleby.  See http://code.google.com/p/smhasher/ for
+ * domain by Austin Appleby.  See https://github.com/aappleby/smhasher for
  * details.
  */
 /******************************************************************************/
@@ -49,6 +49,14 @@ JEMALLOC_INLINE uint32_t
 hash_get_block_32(const uint32_t *p, int i)
 {
 
+	/* Handle unaligned read. */
+	if (unlikely((uintptr_t)p & (sizeof(uint32_t)-1)) != 0) {
+		uint32_t ret;
+
+		memcpy(&ret, &p[i], sizeof(uint32_t));
+		return (ret);
+	}
+
 	return (p[i]);
 }
 
@@ -56,6 +64,14 @@ JEMALLOC_INLINE uint64_t
 hash_get_block_64(const uint64_t *p, int i)
 {
 
+	/* Handle unaligned read. */
+	if (unlikely((uintptr_t)p & (sizeof(uint64_t)-1)) != 0) {
+		uint64_t ret;
+
+		memcpy(&ret, &p[i], sizeof(uint64_t));
+		return (ret);
+	}
+
 	return (p[i]);
 }
 
diff --git a/test/unit/hash.c b/test/unit/hash.c
index 77a8cede..ea73d701 100644
--- a/test/unit/hash.c
+++ b/test/unit/hash.c
@@ -59,17 +59,17 @@ hash_variant_string(hash_variant_t variant)
 	}
 }
 
+#define	KEY_SIZE	256
 static void
-hash_variant_verify(hash_variant_t variant)
+hash_variant_verify_key(hash_variant_t variant, uint8_t *key)
 {
 	const size_t hashbytes = hash_variant_bits(variant) / 8;
-	uint8_t key[256];
 	VARIABLE_ARRAY(uint8_t, hashes, hashbytes * 256);
 	VARIABLE_ARRAY(uint8_t, final, hashbytes);
 	unsigned i;
 	uint32_t computed, expected;
 
-	memset(key, 0, sizeof(key));
+	memset(key, 0, KEY_SIZE);
 	memset(hashes, 0, sizeof(hashes));
 	memset(final, 0, sizeof(final));
 
@@ -139,6 +139,19 @@ hash_variant_verify(hash_variant_t variant)
 	    hash_variant_string(variant), expected, computed);
 }
 
+static void
+hash_variant_verify(hash_variant_t variant)
+{
+#define	MAX_ALIGN	16
+	uint8_t key[KEY_SIZE + (MAX_ALIGN - 1)];
+	unsigned i;
+
+	for (i = 0; i < MAX_ALIGN; i++)
+		hash_variant_verify_key(variant, &key[i]);
+#undef MAX_ALIGN
+}
+#undef KEY_SIZE
+
 TEST_BEGIN(test_hash_x86_32)
 {
 

From effaf7d40fba191386162e907195b0198c75866a Mon Sep 17 00:00:00 2001
From: Christopher Ferris <cferris@google.com>
Date: Sat, 20 Feb 2016 10:26:17 -0800
Subject: [PATCH 0090/2608] Fix a typo in the ckh_search() prototype.

---
 include/jemalloc/internal/ckh.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/ckh.h b/include/jemalloc/internal/ckh.h
index 45fb3455..f75ad90b 100644
--- a/include/jemalloc/internal/ckh.h
+++ b/include/jemalloc/internal/ckh.h
@@ -72,7 +72,7 @@ bool	ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data);
 bool	ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data);
 bool	ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
     void **data);
-bool	ckh_search(ckh_t *ckh, const void *seachkey, void **key, void **data);
+bool	ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data);
 void	ckh_string_hash(const void *key, size_t r_hash[2]);
 bool	ckh_string_keycomp(const void *k1, const void *k2);
 void	ckh_pointer_hash(const void *key, size_t r_hash[2]);

From 46e0b2301c0e0ee71f5714d0cdf320ba2d027271 Mon Sep 17 00:00:00 2001
From: rustyx <me@rustyx.org>
Date: Sat, 30 Jan 2016 13:37:26 +0100
Subject: [PATCH 0091/2608] Detect LG_SIZEOF_PTR depending on MSVC platform
 target

---
 configure.ac                        | 17 +++++++++++------
 include/jemalloc/jemalloc_defs.h.in |  8 ++++++++
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/configure.ac b/configure.ac
index 5232c8f3..9a489d98 100644
--- a/configure.ac
+++ b/configure.ac
@@ -166,13 +166,18 @@ if test "x${je_cv_msvc}" = "xyes" -a "x${ac_cv_header_inttypes_h}" = "xno"; then
   CPPFLAGS="$CPPFLAGS -I${srcdir}/include/msvc_compat/C99"
 fi
 
-AC_CHECK_SIZEOF([void *])
-if test "x${ac_cv_sizeof_void_p}" = "x8" ; then
-  LG_SIZEOF_PTR=3
-elif test "x${ac_cv_sizeof_void_p}" = "x4" ; then
-  LG_SIZEOF_PTR=2
+if test "x${je_cv_msvc}" = "xyes" ; then
+  LG_SIZEOF_PTR=LG_SIZEOF_PTR_WIN
+  AC_MSG_RESULT([Using a predefined value for sizeof(void *): 4 for 32-bit, 8 for 64-bit])
 else
-  AC_MSG_ERROR([Unsupported pointer size: ${ac_cv_sizeof_void_p}])
+  AC_CHECK_SIZEOF([void *])
+  if test "x${ac_cv_sizeof_void_p}" = "x8" ; then
+    LG_SIZEOF_PTR=3
+  elif test "x${ac_cv_sizeof_void_p}" = "x4" ; then
+    LG_SIZEOF_PTR=2
+  else
+    AC_MSG_ERROR([Unsupported pointer size: ${ac_cv_sizeof_void_p}])
+  fi
 fi
 AC_DEFINE_UNQUOTED([LG_SIZEOF_PTR], [$LG_SIZEOF_PTR])
 
diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in
index ab13c375..6d89435c 100644
--- a/include/jemalloc/jemalloc_defs.h.in
+++ b/include/jemalloc/jemalloc_defs.h.in
@@ -33,5 +33,13 @@
  */
 #undef JEMALLOC_USE_CXX_THROW
 
+#ifdef _MSC_VER
+#  ifdef _WIN64
+#    define LG_SIZEOF_PTR_WIN 3
+#  else
+#    define LG_SIZEOF_PTR_WIN 2
+#  endif
+#endif
+
 /* sizeof(void *) == 2^LG_SIZEOF_PTR. */
 #undef LG_SIZEOF_PTR

From bc49863fb5c59d5a0ff63845464d901ef00c7845 Mon Sep 17 00:00:00 2001
From: rustyx <me@rustyx.org>
Date: Sat, 30 Jan 2016 13:38:33 +0100
Subject: [PATCH 0092/2608] Fix error "+ 2")syntax error: invalid arithmetic
 operator (error token is " in Cygwin x64

---
 configure.ac | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 9a489d98..3ae468ab 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1116,7 +1116,7 @@ if test "x$LG_PAGE" = "xdetect"; then
     if (f == NULL) {
 	return 1;
     }
-    fprintf(f, "%d\n", result);
+    fprintf(f, "%d", result);
     fclose(f);
 
     return 0;

From 90c7269c0588bd6d49bf27ba05a261744ad97165 Mon Sep 17 00:00:00 2001
From: rustyx <me@rustyx.org>
Date: Sat, 30 Jan 2016 13:41:09 +0100
Subject: [PATCH 0093/2608] Add CPU "pause" intrinsic for MSVC

---
 configure.ac | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/configure.ac b/configure.ac
index 3ae468ab..8adb6f7b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -218,12 +218,22 @@ dnl CPU-specific settings.
 CPU_SPINWAIT=""
 case "${host_cpu}" in
   i686|x86_64)
-	AC_CACHE_VAL([je_cv_pause],
-	  [JE_COMPILABLE([pause instruction], [],
-	                [[__asm__ volatile("pause"); return 0;]],
-	                [je_cv_pause])])
-	if test "x${je_cv_pause}" = "xyes" ; then
-	    CPU_SPINWAIT='__asm__ volatile("pause")'
+	if test "x${je_cv_msvc}" = "xyes" ; then
+	    AC_CACHE_VAL([je_cv_pause_msvc],
+	      [JE_COMPILABLE([pause instruction MSVC], [],
+					[[_mm_pause(); return 0;]],
+					[je_cv_pause_msvc])])
+	    if test "x${je_cv_pause_msvc}" = "xyes" ; then
+		CPU_SPINWAIT='_mm_pause()'
+	    fi
+	else
+	    AC_CACHE_VAL([je_cv_pause],
+	      [JE_COMPILABLE([pause instruction], [],
+					[[__asm__ volatile("pause"); return 0;]],
+					[je_cv_pause])])
+	    if test "x${je_cv_pause}" = "xyes" ; then
+		CPU_SPINWAIT='__asm__ volatile("pause")'
+	    fi
 	fi
 	;;
   powerpc)

From 7f283980f00f0543e97f46567fbe5bdd4d732724 Mon Sep 17 00:00:00 2001
From: rustyx <me@rustyx.org>
Date: Sat, 30 Jan 2016 14:51:16 +0100
Subject: [PATCH 0094/2608] getpid() fix for Win32

---
 include/msvc_compat/windows_extra.h | 2 ++
 src/prof.c                          | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/include/msvc_compat/windows_extra.h b/include/msvc_compat/windows_extra.h
index 0c5e323f..114f43b1 100644
--- a/include/msvc_compat/windows_extra.h
+++ b/include/msvc_compat/windows_extra.h
@@ -23,4 +23,6 @@
 #  define ERANGE ERROR_INVALID_DATA
 #endif
 
+#define getpid() GetCurrentProcessId()
+
 #endif /* MSVC_COMPAT_WINDOWS_EXTRA_H */
diff --git a/src/prof.c b/src/prof.c
index 31f5e601..3abb38e2 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -1384,6 +1384,8 @@ prof_dump_maps(bool propagate_err)
 	cassert(config_prof);
 #ifdef __FreeBSD__
 	mfd = prof_open_maps("/proc/curproc/map");
+#elif defined(_WIN32)
+	mfd = -1; // Not implemented
 #else
 	{
 		int pid = getpid();

From efbee8627873fbcee454319573cdf94816b25824 Mon Sep 17 00:00:00 2001
From: rustyx <me@rustyx.org>
Date: Tue, 2 Feb 2016 11:27:18 +0100
Subject: [PATCH 0095/2608] Prevent MSVC from optimizing away tls_callback
 (resolves #318)

---
 src/tsd.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/tsd.c b/src/tsd.c
index b85b8b9d..34c1573c 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -148,13 +148,15 @@ _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved)
 #ifdef _MSC_VER
 #  ifdef _M_IX86
 #    pragma comment(linker, "/INCLUDE:__tls_used")
+#    pragma comment(linker, "/INCLUDE:_tls_callback")
 #  else
 #    pragma comment(linker, "/INCLUDE:_tls_used")
+#    pragma comment(linker, "/INCLUDE:tls_callback")
 #  endif
 #  pragma section(".CRT$XLY",long,read)
 #endif
 JEMALLOC_SECTION(".CRT$XLY") JEMALLOC_ATTR(used)
-static BOOL	(WINAPI *const tls_callback)(HINSTANCE hinstDLL,
+BOOL	(WINAPI *const tls_callback)(HINSTANCE hinstDLL,
     DWORD fdwReason, LPVOID lpvReserved) = _tls_callback;
 #endif
 

From 3c2c5a5071416b9be6a114ccbc7796443b24f1cd Mon Sep 17 00:00:00 2001
From: rustyx <me@rustyx.org>
Date: Tue, 2 Feb 2016 11:52:07 +0100
Subject: [PATCH 0096/2608] Fix warning in ipalloc

---
 include/jemalloc/internal/jemalloc_internal.h.in | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 3b2f75d6..aa97d7c7 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -1001,8 +1001,8 @@ JEMALLOC_ALWAYS_INLINE void *
 ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero)
 {
 
-	return (ipallocztm(tsd, usize, alignment, zero, tcache_get(tsd,
-	    NULL), false, NULL));
+	return (ipallocztm(tsd, usize, alignment, zero, tcache_get(tsd, true),
+	    false, NULL));
 }
 
 JEMALLOC_ALWAYS_INLINE size_t

From 984c64f724bfeb73e32251801e6df6ab6df53d15 Mon Sep 17 00:00:00 2001
From: rustyx <me@rustyx.org>
Date: Tue, 2 Feb 2016 11:52:41 +0100
Subject: [PATCH 0097/2608] Add MS Visual Studio 2015 support

---
 .gitignore                                    |  16 +
 msvc/ReadMe.txt                               |  24 ++
 msvc/jemalloc_vc2015.sln                      |  63 +++
 .../projects/vc2015/jemalloc/jemalloc.vcxproj | 391 ++++++++++++++++++
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  | 242 +++++++++++
 .../vc2015/test_threads/test_threads.cpp      | 100 +++++
 .../vc2015/test_threads/test_threads.h        |   3 +
 .../vc2015/test_threads/test_threads.vcxproj  | 327 +++++++++++++++
 .../test_threads/test_threads.vcxproj.filters |  26 ++
 .../vc2015/test_threads/test_threads_main.cpp |  12 +
 10 files changed, 1204 insertions(+)
 create mode 100644 msvc/ReadMe.txt
 create mode 100644 msvc/jemalloc_vc2015.sln
 create mode 100644 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
 create mode 100644 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
 create mode 100644 msvc/projects/vc2015/test_threads/test_threads.cpp
 create mode 100644 msvc/projects/vc2015/test_threads/test_threads.h
 create mode 100644 msvc/projects/vc2015/test_threads/test_threads.vcxproj
 create mode 100644 msvc/projects/vc2015/test_threads/test_threads.vcxproj.filters
 create mode 100644 msvc/projects/vc2015/test_threads/test_threads_main.cpp

diff --git a/.gitignore b/.gitignore
index d0e39361..08278d08 100644
--- a/.gitignore
+++ b/.gitignore
@@ -73,3 +73,19 @@ test/include/test/jemalloc_test_defs.h
 /test/unit/*.out
 
 /VERSION
+
+*.pdb
+*.sdf
+*.opendb
+*.opensdf
+*.cachefile
+*.suo
+*.user
+*.sln.docstates
+*.tmp
+/msvc/Win32/
+/msvc/x64/
+/msvc/projects/*/*/Debug*/
+/msvc/projects/*/*/Release*/
+/msvc/projects/*/*/Win32/
+/msvc/projects/*/*/x64/
diff --git a/msvc/ReadMe.txt b/msvc/ReadMe.txt
new file mode 100644
index 00000000..02b97f74
--- /dev/null
+++ b/msvc/ReadMe.txt
@@ -0,0 +1,24 @@
+
+How to build jemalloc for Windows
+=================================
+
+1. Install Cygwin with at least the following packages:
+   * autoconf
+   * autogen
+   * gawk
+   * grep
+   * sed
+
+2. Install Visual Studio 2015 with Visual C++
+
+3. Add Cygwin\bin to the PATH environment variable
+
+4. Open "VS2015 x86 Native Tools Command Prompt"
+   (note: x86/x64 doesn't matter at this point)
+
+5. Generate header files:
+   sh -c "./autogen.sh CC=cl --enable-lazy-lock=no"
+
+6. Now the project can be opened and built in Visual Studio:
+   msvc\jemalloc_vc2015.sln
+
diff --git a/msvc/jemalloc_vc2015.sln b/msvc/jemalloc_vc2015.sln
new file mode 100644
index 00000000..aedd5e5e
--- /dev/null
+++ b/msvc/jemalloc_vc2015.sln
@@ -0,0 +1,63 @@
+﻿
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 14
+VisualStudioVersion = 14.0.24720.0
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{70A99006-6DE9-472B-8F83-4CEE6C616DF3}"
+	ProjectSection(SolutionItems) = preProject
+		ReadMe.txt = ReadMe.txt
+	EndProjectSection
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "jemalloc", "projects\vc2015\jemalloc\jemalloc.vcxproj", "{8D6BB292-9E1C-413D-9F98-4864BDC1514A}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test_threads", "projects\vc2015\test_threads\test_threads.vcxproj", "{09028CFD-4EB7-491D-869C-0708DB97ED44}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|x64 = Debug|x64
+		Debug|x86 = Debug|x86
+		Debug-static|x64 = Debug-static|x64
+		Debug-static|x86 = Debug-static|x86
+		Release|x64 = Release|x64
+		Release|x86 = Release|x86
+		Release-static|x64 = Release-static|x64
+		Release-static|x86 = Release-static|x86
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x64.ActiveCfg = Debug|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x64.Build.0 = Debug|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x86.ActiveCfg = Debug|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x86.Build.0 = Debug|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x64.ActiveCfg = Debug-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x64.Build.0 = Debug-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x86.ActiveCfg = Debug-static|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x86.Build.0 = Debug-static|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x64.ActiveCfg = Release|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x64.Build.0 = Release|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x86.ActiveCfg = Release|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x86.Build.0 = Release|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x64.ActiveCfg = Release-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x64.Build.0 = Release-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x86.ActiveCfg = Release-static|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x86.Build.0 = Release-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x64.ActiveCfg = Debug|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x64.Build.0 = Debug|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x86.ActiveCfg = Debug|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x86.Build.0 = Debug|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x64.ActiveCfg = Debug-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x64.Build.0 = Debug-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x86.ActiveCfg = Debug-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x86.Build.0 = Debug-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x64.ActiveCfg = Release|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x64.Build.0 = Release|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x86.ActiveCfg = Release|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x86.Build.0 = Release|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x64.ActiveCfg = Release-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x64.Build.0 = Release-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x86.ActiveCfg = Release-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x86.Build.0 = Release-static|Win32
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
new file mode 100644
index 00000000..395837c3
--- /dev/null
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -0,0 +1,391 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug-static|Win32">
+      <Configuration>Debug-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug-static|x64">
+      <Configuration>Debug-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|Win32">
+      <Configuration>Release-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|x64">
+      <Configuration>Release-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\arena.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\assert.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\atomic.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\base.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\bitmap.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\chunk.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\chunk_dss.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\chunk_mmap.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\ckh.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\ctl.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\extent.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\hash.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\huge.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal_decls.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal_defs.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal_macros.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\mb.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\mutex.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\pages.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\private_namespace.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\private_unnamespace.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\prng.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\prof.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\public_namespace.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\public_unnamespace.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\ql.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\qr.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\quarantine.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\rb.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\rtree.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\size_classes.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\stats.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\tcache.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\tsd.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\util.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\valgrind.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_defs.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_macros.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_mangle.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_protos.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_protos_jet.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_rename.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_typedefs.h" />
+    <ClInclude Include="..\..\..\..\include\msvc_compat\C99\stdbool.h" />
+    <ClInclude Include="..\..\..\..\include\msvc_compat\C99\stdint.h" />
+    <ClInclude Include="..\..\..\..\include\msvc_compat\strings.h" />
+    <ClInclude Include="..\..\..\..\include\msvc_compat\windows_extra.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\..\src\arena.c" />
+    <ClCompile Include="..\..\..\..\src\atomic.c" />
+    <ClCompile Include="..\..\..\..\src\base.c" />
+    <ClCompile Include="..\..\..\..\src\bitmap.c" />
+    <ClCompile Include="..\..\..\..\src\chunk.c" />
+    <ClCompile Include="..\..\..\..\src\chunk_dss.c" />
+    <ClCompile Include="..\..\..\..\src\chunk_mmap.c" />
+    <ClCompile Include="..\..\..\..\src\ckh.c" />
+    <ClCompile Include="..\..\..\..\src\ctl.c" />
+    <ClCompile Include="..\..\..\..\src\extent.c" />
+    <ClCompile Include="..\..\..\..\src\hash.c" />
+    <ClCompile Include="..\..\..\..\src\huge.c" />
+    <ClCompile Include="..\..\..\..\src\jemalloc.c" />
+    <ClCompile Include="..\..\..\..\src\mb.c" />
+    <ClCompile Include="..\..\..\..\src\mutex.c" />
+    <ClCompile Include="..\..\..\..\src\pages.c" />
+    <ClCompile Include="..\..\..\..\src\prof.c" />
+    <ClCompile Include="..\..\..\..\src\quarantine.c" />
+    <ClCompile Include="..\..\..\..\src\rtree.c" />
+    <ClCompile Include="..\..\..\..\src\stats.c" />
+    <ClCompile Include="..\..\..\..\src\tcache.c" />
+    <ClCompile Include="..\..\..\..\src\tsd.c" />
+    <ClCompile Include="..\..\..\..\src\util.c" />
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{8D6BB292-9E1C-413D-9F98-4864BDC1514A}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>jemalloc</RootNamespace>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)d</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-$(PlatformToolset)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-$(PlatformToolset)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)d</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-$(PlatformToolset)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-$(PlatformToolset)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>_REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>_REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
new file mode 100644
index 00000000..69f64169
--- /dev/null
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -0,0 +1,242 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="Header Files\internal">
+      <UniqueIdentifier>{5697dfa3-16cf-4932-b428-6e0ec6e9f98e}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Header Files\msvc_compat">
+      <UniqueIdentifier>{0cbd2ca6-42a7-4f82-8517-d7e7a14fd986}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="Header Files\msvc_compat\C99">
+      <UniqueIdentifier>{0abe6f30-49b5-46dd-8aca-6e33363fa52c}</UniqueIdentifier>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_defs.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_macros.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_mangle.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_protos.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_protos_jet.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_rename.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_typedefs.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\arena.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\assert.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\atomic.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\base.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\bitmap.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\chunk.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\chunk_dss.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\chunk_mmap.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\ckh.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\ctl.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\extent.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\hash.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\huge.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal_decls.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal_defs.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal_macros.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\mb.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\mutex.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\pages.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\private_namespace.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\private_unnamespace.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\prng.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\prof.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\public_namespace.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\public_unnamespace.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\ql.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\qr.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\quarantine.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\rb.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\rtree.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\size_classes.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\stats.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\tcache.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\tsd.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\util.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\valgrind.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\msvc_compat\strings.h">
+      <Filter>Header Files\msvc_compat</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\msvc_compat\windows_extra.h">
+      <Filter>Header Files\msvc_compat</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\msvc_compat\C99\stdbool.h">
+      <Filter>Header Files\msvc_compat\C99</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\msvc_compat\C99\stdint.h">
+      <Filter>Header Files\msvc_compat\C99</Filter>
+    </ClInclude>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\..\src\arena.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\atomic.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\base.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\bitmap.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\chunk.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\chunk_dss.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\chunk_mmap.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ckh.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ctl.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\extent.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hash.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\huge.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\jemalloc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\mb.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\mutex.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pages.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\quarantine.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\rtree.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\stats.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\tcache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\tsd.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\util.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2015/test_threads/test_threads.cpp b/msvc/projects/vc2015/test_threads/test_threads.cpp
new file mode 100644
index 00000000..046843f3
--- /dev/null
+++ b/msvc/projects/vc2015/test_threads/test_threads.cpp
@@ -0,0 +1,100 @@
+// jemalloc C++ threaded test
+// Author: Rustam Abdullaev
+// Public Domain
+
+#include <atomic>
+#include <functional>
+#include <future>
+#include <random>
+#include <thread>
+#include <vector>
+#include <stdio.h>
+#include <jemalloc/jemalloc.h>
+#include <windows.h>
+
+using std::vector;
+using std::thread;
+using std::uniform_int_distribution;
+using std::minstd_rand;
+
+#if NDEBUG && JEMALLOC_ISSUE_318_WORKAROUND
+extern "C" JEMALLOC_EXPORT void _malloc_thread_cleanup(void);
+
+static thread_local struct JeMallocThreadHelper {
+	~JeMallocThreadHelper() {
+		_malloc_thread_cleanup();
+	}
+} tls_jemallocThreadHelper;
+#endif
+
+int test_threads()
+{
+	je_malloc_conf = "narenas:3";
+	int narenas = 0;
+	size_t sz = sizeof(narenas);
+	je_mallctl("opt.narenas", &narenas, &sz, NULL, 0);
+	if (narenas != 3) {
+		printf("Error: unexpected number of arenas: %d\n", narenas);
+		return 1;
+	}
+	static const int sizes[] = { 7, 16, 32, 60, 91, 100, 120, 144, 169, 199, 255, 400, 670, 900, 917, 1025, 3333, 5190, 13131, 49192, 99999, 123123, 255265, 2333111 };
+	static const int numSizes = (int)(sizeof(sizes) / sizeof(sizes[0]));
+	vector<thread> workers;
+	static const int numThreads = narenas + 1, numAllocsMax = 25, numIter1 = 50, numIter2 = 50;
+	je_malloc_stats_print(NULL, NULL, NULL);
+  size_t allocated1;
+  size_t sz1 = sizeof(allocated1);
+  je_mallctl("stats.active", &allocated1, &sz1, NULL, 0);
+  printf("\nPress Enter to start threads...\n");
+	getchar();
+	printf("Starting %d threads x %d x %d iterations...\n", numThreads, numIter1, numIter2);
+	for (int i = 0; i < numThreads; i++) {
+		workers.emplace_back([tid=i]() {
+			uniform_int_distribution<int> sizeDist(0, numSizes - 1);
+			minstd_rand rnd(tid * 17);
+			uint8_t* ptrs[numAllocsMax];
+			int ptrsz[numAllocsMax];
+			for (int i = 0; i < numIter1; ++i) {
+				thread t([&]() {
+					for (int i = 0; i < numIter2; ++i) {
+						const int numAllocs = numAllocsMax - sizeDist(rnd);
+						for (int j = 0; j < numAllocs; j++) {
+							const int x = sizeDist(rnd);
+							const int sz = sizes[x];
+							ptrsz[j] = sz;
+							ptrs[j] = (uint8_t*)je_malloc(sz);
+							if (!ptrs[j]) {
+								printf("Unable to allocate %d bytes in thread %d, iter %d, alloc %d. %d", sz, tid, i, j, x);
+								exit(1);
+							}
+							for (int k = 0; k < sz; k++)
+								ptrs[j][k] = tid + k;
+						}
+						for (int j = 0; j < numAllocs; j++) {
+							for (int k = 0, sz = ptrsz[j]; k < sz; k++)
+								if (ptrs[j][k] != (uint8_t)(tid + k)) {
+									printf("Memory error in thread %d, iter %d, alloc %d @ %d : %02X!=%02X", tid, i, j, k, ptrs[j][k], (uint8_t)(tid + k));
+									exit(1);
+								}
+							je_free(ptrs[j]);
+						}
+					}
+				});
+				t.join();
+			}
+		});
+	}
+	for (thread& t : workers) {
+		t.join();
+	}
+  je_malloc_stats_print(NULL, NULL, NULL);
+  size_t allocated2;
+  je_mallctl("stats.active", &allocated2, &sz1, NULL, 0);
+  size_t leaked = allocated2 - allocated1;
+  printf("\nDone. Leaked: %Id bytes\n", leaked);
+  bool failed = leaked > 65536; // in case C++ runtime allocated something (e.g. iostream locale or facet)
+  printf("\nTest %s!\n", (failed ? "FAILED" : "successful"));
+  printf("\nPress Enter to continue...\n");
+  getchar();
+  return failed ? 1 : 0;
+}
diff --git a/msvc/projects/vc2015/test_threads/test_threads.h b/msvc/projects/vc2015/test_threads/test_threads.h
new file mode 100644
index 00000000..64d0cdb3
--- /dev/null
+++ b/msvc/projects/vc2015/test_threads/test_threads.h
@@ -0,0 +1,3 @@
+#pragma once
+
+int test_threads();
diff --git a/msvc/projects/vc2015/test_threads/test_threads.vcxproj b/msvc/projects/vc2015/test_threads/test_threads.vcxproj
new file mode 100644
index 00000000..b681e71e
--- /dev/null
+++ b/msvc/projects/vc2015/test_threads/test_threads.vcxproj
@@ -0,0 +1,327 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug-static|Win32">
+      <Configuration>Debug-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug-static|x64">
+      <Configuration>Debug-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|Win32">
+      <Configuration>Release-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|x64">
+      <Configuration>Release-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{09028CFD-4EB7-491D-869C-0708DB97ED44}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>test_threads</RootNamespace>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemallocd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>jemallocd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="test_threads.cpp" />
+    <ClCompile Include="test_threads_main.cpp" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\jemalloc\jemalloc.vcxproj">
+      <Project>{8d6bb292-9e1c-413d-9f98-4864bdc1514a}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="test_threads.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2015/test_threads/test_threads.vcxproj.filters b/msvc/projects/vc2015/test_threads/test_threads.vcxproj.filters
new file mode 100644
index 00000000..4c233407
--- /dev/null
+++ b/msvc/projects/vc2015/test_threads/test_threads.vcxproj.filters
@@ -0,0 +1,26 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="test_threads.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="test_threads_main.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="test_threads.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2015/test_threads/test_threads_main.cpp b/msvc/projects/vc2015/test_threads/test_threads_main.cpp
new file mode 100644
index 00000000..ffd96e6a
--- /dev/null
+++ b/msvc/projects/vc2015/test_threads/test_threads_main.cpp
@@ -0,0 +1,12 @@
+#include "test_threads.h"
+#include <future>
+#include <functional>
+#include <chrono>
+
+using namespace std::chrono_literals;
+
+int main(int argc, char** argv)
+{
+  int rc = test_threads();
+  return rc;
+}

From aac93f414eaeea8b84e14bd9b6a2430828ba700d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 20 Feb 2016 11:25:30 -0800
Subject: [PATCH 0098/2608] Add symbol mangling for prng_[lg_]range().

---
 include/jemalloc/internal/private_symbols.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 95ddf0c8..284410a9 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -362,6 +362,8 @@ pages_unmap
 pow2_ceil_u32
 pow2_ceil_u64
 pow2_ceil_zu
+prng_lg_range
+prng_range
 prof_active_get
 prof_active_get_unlocked
 prof_active_set

From ecae12323d44cd739662051a2b9a5965cbe0e965 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 20 Feb 2016 23:41:33 -0800
Subject: [PATCH 0099/2608] Fix overflow in prng_range().

Add jemalloc_ffs64() and use it instead of jemalloc_ffsl() in
prng_range(), since long is not guaranteed to be a 64-bit type.
---
 configure.ac                                  | 14 +++++++++++++-
 .../internal/jemalloc_internal_defs.h.in      |  8 ++++++--
 include/jemalloc/internal/private_symbols.txt |  3 +++
 include/jemalloc/internal/prng.h              |  2 +-
 include/jemalloc/internal/util.h              | 19 +++++++++++++++++--
 5 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/configure.ac b/configure.ac
index 8adb6f7b..da4ee3ac 100644
--- a/configure.ac
+++ b/configure.ac
@@ -201,6 +201,16 @@ else
 fi
 AC_DEFINE_UNQUOTED([LG_SIZEOF_LONG], [$LG_SIZEOF_LONG])
 
+AC_CHECK_SIZEOF([long long])
+if test "x${ac_cv_sizeof_long_long}" = "x8" ; then
+  LG_SIZEOF_LONG_LONG=3
+elif test "x${ac_cv_sizeof_long_long}" = "x4" ; then
+  LG_SIZEOF_LONG_LONG=2
+else
+  AC_MSG_ERROR([Unsupported long long size: ${ac_cv_sizeof_long_long}])
+fi
+AC_DEFINE_UNQUOTED([LG_SIZEOF_LONG_LONG], [$LG_SIZEOF_LONG_LONG])
+
 AC_CHECK_SIZEOF([intmax_t])
 if test "x${ac_cv_sizeof_intmax_t}" = "x16" ; then
   LG_SIZEOF_INTMAX_T=4
@@ -1040,7 +1050,7 @@ dnl ============================================================================
 dnl Check for  __builtin_ffsl(), then ffsl(3), and fail if neither are found.
 dnl One of those two functions should (theoretically) exist on all platforms
 dnl that jemalloc currently has a chance of functioning on without modification.
-dnl We additionally assume ffs() or __builtin_ffs() are defined if
+dnl We additionally assume ffs[ll]() or __builtin_ffs[ll]() are defined if
 dnl ffsl() or __builtin_ffsl() are defined, respectively.
 JE_COMPILABLE([a program using __builtin_ffsl], [
 #include <stdio.h>
@@ -1053,6 +1063,7 @@ JE_COMPILABLE([a program using __builtin_ffsl], [
 	}
 ], [je_cv_gcc_builtin_ffsl])
 if test "x${je_cv_gcc_builtin_ffsl}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_INTERNAL_FFSLL], [__builtin_ffsll])
   AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [__builtin_ffsl])
   AC_DEFINE([JEMALLOC_INTERNAL_FFS], [__builtin_ffs])
 else
@@ -1067,6 +1078,7 @@ else
 	}
   ], [je_cv_function_ffsl])
   if test "x${je_cv_function_ffsl}" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_INTERNAL_FFSLL], [ffsll])
     AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [ffsl])
     AC_DEFINE([JEMALLOC_INTERNAL_FFS], [ffs])
   else
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index c84e27c9..4bcda716 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -189,9 +189,10 @@
 #undef JEMALLOC_TLS
 
 /*
- * ffs()/ffsl() functions to use for bitmapping.  Don't use these directly;
- * instead, use jemalloc_ffs() or jemalloc_ffsl() from util.h.
+ * ffs*() functions to use for bitmapping.  Don't use these directly; instead,
+ * use jemalloc_ffs*() from util.h.
  */
+#undef JEMALLOC_INTERNAL_FFSLL
 #undef JEMALLOC_INTERNAL_FFSL
 #undef JEMALLOC_INTERNAL_FFS
 
@@ -241,6 +242,9 @@
 /* sizeof(long) == 2^LG_SIZEOF_LONG. */
 #undef LG_SIZEOF_LONG
 
+/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */
+#undef LG_SIZEOF_LONG_LONG
+
 /* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */
 #undef LG_SIZEOF_INTMAX_T
 
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 284410a9..8428cf48 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -292,6 +292,9 @@ isqalloc
 isthreaded
 ivsalloc
 ixalloc
+jemalloc_ffs
+jemalloc_ffs64
+jemalloc_ffsl
 jemalloc_postfork_child
 jemalloc_postfork_parent
 jemalloc_prefork
diff --git a/include/jemalloc/internal/prng.h b/include/jemalloc/internal/prng.h
index 83c90906..44d67c9a 100644
--- a/include/jemalloc/internal/prng.h
+++ b/include/jemalloc/internal/prng.h
@@ -64,7 +64,7 @@ prng_range(uint64_t *state, uint64_t range)
 	assert(range > 1);
 
 	/* Compute the ceiling of lg(range). */
-	lg_range = jemalloc_ffsl(pow2_ceil_u64(range)) - 1;
+	lg_range = jemalloc_ffs64(pow2_ceil_u64(range)) - 1;
 
 	/* Generate a result in [0..range) via repeated trial. */
 	do {
diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index dfe5c93c..39f70878 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -121,6 +121,7 @@ void	malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
+int	jemalloc_ffs64(uint64_t bitmap);
 int	jemalloc_ffsl(long bitmap);
 int	jemalloc_ffs(int bitmap);
 uint64_t	pow2_ceil_u64(uint64_t x);
@@ -134,10 +135,24 @@ int	get_errno(void);
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_UTIL_C_))
 
 /* Sanity check. */
-#if !defined(JEMALLOC_INTERNAL_FFSL) || !defined(JEMALLOC_INTERNAL_FFS)
-#  error Both JEMALLOC_INTERNAL_FFSL && JEMALLOC_INTERNAL_FFS should have been defined by configure
+#if !defined(JEMALLOC_INTERNAL_FFSLL) || !defined(JEMALLOC_INTERNAL_FFSL) \
+    || !defined(JEMALLOC_INTERNAL_FFS)
+#  error JEMALLOC_INTERNAL_FFS{,L,LL} should have been defined by configure
 #endif
 
+JEMALLOC_ALWAYS_INLINE int
+jemalloc_ffs64(uint64_t bitmap)
+{
+
+#if LG_SIZEOF_LONG == 3
+	return (JEMALLOC_INTERNAL_FFSL(bitmap));
+#elif LG_SIZEOF_LONG_LONG == 3
+	return (JEMALLOC_INTERNAL_FFSLL(bitmap));
+#else
+#error No implementation for 64-bit ffs()
+#endif
+}
+
 JEMALLOC_ALWAYS_INLINE int
 jemalloc_ffsl(long bitmap)
 {

From 56139dc4035abc76744ad24844daaba77a721640 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 20 Feb 2016 23:43:17 -0800
Subject: [PATCH 0100/2608] Remove _WIN32-specific struct timespec declaration.

struct timespec is already defined by the system (at least on MinGW).
---
 include/jemalloc/internal/jemalloc_internal_decls.h | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
index 0f29e676..0bca63e5 100644
--- a/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -62,11 +62,5 @@ isblank(int c)
 #include <fcntl.h>
 
 #include <sys/time.h>
-#ifdef _WIN32
-struct timespec {
-	time_t	tv_sec;
-	long	tv_nsec;
-};
-#endif
 
 #endif /* JEMALLOC_INTERNAL_H */

From fd9cd7a6cc575cab43e22f989c6709ffe0da451f Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 20 Feb 2016 23:45:22 -0800
Subject: [PATCH 0101/2608] Fix time_update() to compile and work on MinGW.

---
 src/time.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/time.c b/src/time.c
index 2fe93e1e..8205c61b 100644
--- a/src/time.c
+++ b/src/time.c
@@ -161,12 +161,15 @@ time_update(struct timespec *time)
 	time_copy(&old_time, time);
 
 #ifdef _WIN32
-	FILETIME ft;
-	uint64_t ticks;
-	GetSystemTimeAsFileTime(&ft);
-	ticks = (ft.dwHighDateTime << 32) | ft.dWLowDateTime;
-	time->tv_sec = ticks / 10000;
-	time->tv_nsec = ((ticks % 10000) * 100);
+	{
+		FILETIME ft;
+		uint64_t ticks;
+		GetSystemTimeAsFileTime(&ft);
+		ticks = (((uint64_t)ft.dwHighDateTime) << 32) |
+		    ft.dwLowDateTime;
+		time->tv_sec = ticks / 10000000;
+		time->tv_nsec = ((ticks % 10000000) * 100);
+	}
 #elif JEMALLOC_CLOCK_GETTIME
 	if (sysconf(_SC_MONOTONIC_CLOCK) > 0)
 		clock_gettime(CLOCK_MONOTONIC, time);

From 788d29d397574396c4c93bf1f90da59dd7efc5cc Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 20 Feb 2016 23:46:14 -0800
Subject: [PATCH 0102/2608] Fix Windows-specific prof-related compilation
 portability issues.

---
 include/msvc_compat/windows_extra.h |  2 --
 src/prof.c                          | 19 ++++++++++++++++---
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/include/msvc_compat/windows_extra.h b/include/msvc_compat/windows_extra.h
index 114f43b1..0c5e323f 100644
--- a/include/msvc_compat/windows_extra.h
+++ b/include/msvc_compat/windows_extra.h
@@ -23,6 +23,4 @@
 #  define ERANGE ERROR_INVALID_DATA
 #endif
 
-#define getpid() GetCurrentProcessId()
-
 #endif /* MSVC_COMPAT_WINDOWS_EXTRA_H */
diff --git a/src/prof.c b/src/prof.c
index 3abb38e2..173da69f 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -1359,6 +1359,7 @@ label_return:
 	return (ret);
 }
 
+#ifndef _WIN32
 JEMALLOC_FORMAT_PRINTF(1, 2)
 static int
 prof_open_maps(const char *format, ...)
@@ -1374,6 +1375,18 @@ prof_open_maps(const char *format, ...)
 
 	return (mfd);
 }
+#endif
+
+static int
+prof_getpid(void)
+{
+
+#ifdef _WIN32
+	return (GetCurrentProcessId());
+#else
+	return (getpid());
+#endif
+}
 
 static bool
 prof_dump_maps(bool propagate_err)
@@ -1388,7 +1401,7 @@ prof_dump_maps(bool propagate_err)
 	mfd = -1; // Not implemented
 #else
 	{
-		int pid = getpid();
+		int pid = prof_getpid();
 
 		mfd = prof_open_maps("/proc/%d/task/%d/maps", pid, pid);
 		if (mfd == -1)
@@ -1557,12 +1570,12 @@ prof_dump_filename(char *filename, char v, uint64_t vseq)
 	        /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
 		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
 		    "%s.%d.%"FMTu64".%c%"FMTu64".heap",
-		    opt_prof_prefix, (int)getpid(), prof_dump_seq, v, vseq);
+		    opt_prof_prefix, prof_getpid(), prof_dump_seq, v, vseq);
 	} else {
 	        /* "<prefix>.<pid>.<seq>.<v>.heap" */
 		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
 		    "%s.%d.%"FMTu64".%c.heap",
-		    opt_prof_prefix, (int)getpid(), prof_dump_seq, v);
+		    opt_prof_prefix, prof_getpid(), prof_dump_seq, v);
 	}
 	prof_dump_seq++;
 }

From 9bad07903962962de9f656d281b9b1e7e9501c87 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 21 Feb 2016 11:25:02 -0800
Subject: [PATCH 0103/2608] Refactor time_* into nstime_*.

Use a single uint64_t in nstime_t to store nanoseconds rather than using
struct timespec.  This reduces fragility around conversions between long
and uint64_t, especially missing casts that only cause problems on
32-bit platforms.
---
 Makefile.in                                   |  38 ++-
 include/jemalloc/internal/arena.h             |   6 +-
 .../jemalloc/internal/jemalloc_internal.h.in  |   8 +-
 include/jemalloc/internal/nstime.h            |  48 ++++
 include/jemalloc/internal/private_symbols.txt |  24 +-
 include/jemalloc/internal/time.h              |  41 ----
 src/arena.c                                   |  53 ++---
 src/jemalloc.c                                |   2 +-
 src/nstime.c                                  | 148 ++++++++++++
 src/time.c                                    | 198 ----------------
 test/include/test/jemalloc_test.h.in          |   2 +-
 test/include/test/timer.h                     |   4 +-
 test/src/timer.c                              |  16 +-
 test/unit/decay.c                             |  48 ++--
 test/unit/mallctl.c                           |   4 +-
 test/unit/nstime.c                            | 220 +++++++++++++++++
 test/unit/time.c                              | 223 ------------------
 17 files changed, 526 insertions(+), 557 deletions(-)
 create mode 100644 include/jemalloc/internal/nstime.h
 delete mode 100644 include/jemalloc/internal/time.h
 create mode 100644 src/nstime.c
 delete mode 100644 src/time.c
 create mode 100644 test/unit/nstime.c
 delete mode 100644 test/unit/time.c

diff --git a/Makefile.in b/Makefile.in
index e5681926..a4555c03 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -78,15 +78,31 @@ LIBJEMALLOC := $(LIBPREFIX)jemalloc$(install_suffix)
 # Lists of files.
 BINS := $(objroot)bin/jemalloc-config $(objroot)bin/jemalloc.sh $(objroot)bin/jeprof
 C_HDRS := $(objroot)include/jemalloc/jemalloc$(install_suffix).h
-C_SRCS := $(srcroot)src/jemalloc.c $(srcroot)src/arena.c \
-	$(srcroot)src/atomic.c $(srcroot)src/base.c $(srcroot)src/bitmap.c \
-	$(srcroot)src/chunk.c $(srcroot)src/chunk_dss.c \
-	$(srcroot)src/chunk_mmap.c $(srcroot)src/ckh.c $(srcroot)src/ctl.c \
-	$(srcroot)src/extent.c $(srcroot)src/hash.c $(srcroot)src/huge.c \
-	$(srcroot)src/mb.c $(srcroot)src/mutex.c $(srcroot)src/pages.c \
-	$(srcroot)src/prng.c $(srcroot)src/prof.c $(srcroot)src/quarantine.c \
-	$(srcroot)src/rtree.c $(srcroot)src/stats.c $(srcroot)src/tcache.c \
-	$(srcroot)src/ticker.c $(srcroot)src/time.c $(srcroot)src/tsd.c \
+C_SRCS := $(srcroot)src/jemalloc.c \
+	$(srcroot)src/arena.c \
+	$(srcroot)src/atomic.c \
+	$(srcroot)src/base.c \
+	$(srcroot)src/bitmap.c \
+	$(srcroot)src/chunk.c \
+	$(srcroot)src/chunk_dss.c \
+	$(srcroot)src/chunk_mmap.c \
+	$(srcroot)src/ckh.c \
+	$(srcroot)src/ctl.c \
+	$(srcroot)src/extent.c \
+	$(srcroot)src/hash.c \
+	$(srcroot)src/huge.c \
+	$(srcroot)src/mb.c \
+	$(srcroot)src/mutex.c \
+	$(srcroot)src/nstime.c \
+	$(srcroot)src/pages.c \
+	$(srcroot)src/prng.c \
+	$(srcroot)src/prof.c \
+	$(srcroot)src/quarantine.c \
+	$(srcroot)src/rtree.c \
+	$(srcroot)src/stats.c \
+	$(srcroot)src/tcache.c \
+	$(srcroot)src/ticker.c \
+	$(srcroot)src/tsd.c \
 	$(srcroot)src/util.c
 ifeq ($(enable_valgrind), 1)
 C_SRCS += $(srcroot)src/valgrind.c
@@ -117,7 +133,7 @@ C_TESTLIB_SRCS := $(srcroot)test/src/btalloc.c $(srcroot)test/src/btalloc_0.c \
 	$(srcroot)test/src/mtx.c $(srcroot)test/src/mq.c \
 	$(srcroot)test/src/SFMT.c $(srcroot)test/src/test.c \
 	$(srcroot)test/src/thd.c $(srcroot)test/src/timer.c
-C_UTIL_INTEGRATION_SRCS := $(srcroot)src/time.c $(srcroot)src/util.c
+C_UTIL_INTEGRATION_SRCS := $(srcroot)src/nstime.c $(srcroot)src/util.c
 TESTS_UNIT := $(srcroot)test/unit/atomic.c \
 	$(srcroot)test/unit/bitmap.c \
 	$(srcroot)test/unit/ckh.c \
@@ -148,7 +164,7 @@ TESTS_UNIT := $(srcroot)test/unit/atomic.c \
 	$(srcroot)test/unit/smoothstep.c \
 	$(srcroot)test/unit/stats.c \
 	$(srcroot)test/unit/ticker.c \
-	$(srcroot)test/unit/time.c \
+	$(srcroot)test/unit/nstime.c \
 	$(srcroot)test/unit/tsd.c \
 	$(srcroot)test/unit/util.c \
 	$(srcroot)test/unit/zero.c
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 76d3be19..65d4158b 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -395,7 +395,7 @@ struct arena_s {
 	 */
 	ssize_t			decay_time;
 	/* decay_time / SMOOTHSTEP_NSTEPS. */
-	struct timespec		decay_interval;
+	nstime_t		decay_interval;
 	/*
 	 * Time at which the current decay interval logically started.  We do
 	 * not actually advance to a new epoch until sometime after it starts
@@ -403,7 +403,7 @@ struct arena_s {
 	 * to completely skip epochs.  In all cases, during epoch advancement we
 	 * merge all relevant activity into the most recently recorded epoch.
 	 */
-	struct timespec		decay_epoch;
+	nstime_t		decay_epoch;
 	/* decay_deadline randomness generator. */
 	uint64_t		decay_jitter_state;
 	/*
@@ -413,7 +413,7 @@ struct arena_s {
 	 * decay_interval, but we randomize the deadline to reduce the
 	 * likelihood of arenas purging in lockstep.
 	 */
-	struct timespec		decay_deadline;
+	nstime_t		decay_deadline;
 	/*
 	 * Number of dirty pages at beginning of current epoch.  During epoch
 	 * advancement we use the delta between decay_ndirty and ndirty to
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index aa97d7c7..0260b9a8 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -356,7 +356,7 @@ typedef unsigned szind_t;
 #  define VARIABLE_ARRAY(type, name, count) type name[(count)]
 #endif
 
-#include "jemalloc/internal/time.h"
+#include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/valgrind.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
@@ -387,7 +387,7 @@ typedef unsigned szind_t;
 /******************************************************************************/
 #define	JEMALLOC_H_STRUCTS
 
-#include "jemalloc/internal/time.h"
+#include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/valgrind.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
@@ -477,7 +477,7 @@ void	jemalloc_prefork(void);
 void	jemalloc_postfork_parent(void);
 void	jemalloc_postfork_child(void);
 
-#include "jemalloc/internal/time.h"
+#include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/valgrind.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
@@ -508,7 +508,7 @@ void	jemalloc_postfork_child(void);
 /******************************************************************************/
 #define	JEMALLOC_H_INLINES
 
-#include "jemalloc/internal/time.h"
+#include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/valgrind.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h
new file mode 100644
index 00000000..bd04f04b
--- /dev/null
+++ b/include/jemalloc/internal/nstime.h
@@ -0,0 +1,48 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#define JEMALLOC_CLOCK_GETTIME defined(_POSIX_MONOTONIC_CLOCK) \
+    && _POSIX_MONOTONIC_CLOCK >= 0
+
+typedef struct nstime_s nstime_t;
+
+/* Maximum supported number of seconds (~584 years). */
+#define	NSTIME_SEC_MAX	18446744072
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+struct nstime_s {
+	uint64_t	ns;
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+void	nstime_init(nstime_t *time, uint64_t ns);
+void	nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec);
+uint64_t	nstime_ns(const nstime_t *time);
+uint64_t	nstime_sec(const nstime_t *time);
+uint64_t	nstime_nsec(const nstime_t *time);
+void	nstime_copy(nstime_t *time, const nstime_t *source);
+int	nstime_compare(const nstime_t *a, const nstime_t *b);
+void	nstime_add(nstime_t *time, const nstime_t *addend);
+void	nstime_subtract(nstime_t *time, const nstime_t *subtrahend);
+void	nstime_imultiply(nstime_t *time, uint64_t multiplier);
+void	nstime_idivide(nstime_t *time, uint64_t divisor);
+uint64_t	nstime_divide(const nstime_t *time, const nstime_t *divisor);
+#ifdef JEMALLOC_JET
+typedef bool (nstime_update_t)(nstime_t *);
+extern nstime_update_t *nstime_update;
+#else
+bool	nstime_update(nstime_t *time);
+#endif
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 8428cf48..c12baadb 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -327,6 +327,19 @@ narenas_tdata_cleanup
 narenas_total_get
 ncpus
 nhbins
+nstime_add
+nstime_compare
+nstime_copy
+nstime_divide
+nstime_idivide
+nstime_imultiply
+nstime_init
+nstime_init2
+nstime_ns
+nstime_nsec
+nstime_sec
+nstime_subtract
+nstime_update
 opt_abort
 opt_decay_time
 opt_dss
@@ -484,17 +497,6 @@ ticker_init
 ticker_read
 ticker_tick
 ticker_ticks
-time_add
-time_compare
-time_copy
-time_divide
-time_idivide
-time_imultiply
-time_init
-time_nsec
-time_sec
-time_subtract
-time_update
 tsd_arena_get
 tsd_arena_set
 tsd_boot
diff --git a/include/jemalloc/internal/time.h b/include/jemalloc/internal/time.h
deleted file mode 100644
index dd1dd5bd..00000000
--- a/include/jemalloc/internal/time.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-#define JEMALLOC_CLOCK_GETTIME defined(_POSIX_MONOTONIC_CLOCK) \
-    && _POSIX_MONOTONIC_CLOCK >= 0
-
-/* Maximum supported number of seconds (~584 years). */
-#define	TIME_SEC_MAX	18446744072
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-void	time_init(struct timespec *time, time_t sec, long nsec);
-time_t	time_sec(const struct timespec *time);
-long	time_nsec(const struct timespec *time);
-void	time_copy(struct timespec *time, const struct timespec *source);
-int	time_compare(const struct timespec *a, const struct timespec *b);
-void	time_add(struct timespec *time, const struct timespec *addend);
-void	time_subtract(struct timespec *time, const struct timespec *subtrahend);
-void	time_imultiply(struct timespec *time, uint64_t multiplier);
-void	time_idivide(struct timespec *time, uint64_t divisor);
-uint64_t	time_divide(const struct timespec *time,
-    const struct timespec *divisor);
-#ifdef JEMALLOC_JET
-typedef bool (time_update_t)(struct timespec *);
-extern time_update_t *time_update;
-#else
-bool	time_update(struct timespec *time);
-#endif
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
diff --git a/src/arena.c b/src/arena.c
index b1078ae9..77c691a1 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1224,27 +1224,24 @@ arena_decay_deadline_init(arena_t *arena)
 	 * Generate a new deadline that is uniformly random within the next
 	 * epoch after the current one.
 	 */
-	time_copy(&arena->decay_deadline, &arena->decay_epoch);
-	time_add(&arena->decay_deadline, &arena->decay_interval);
+	nstime_copy(&arena->decay_deadline, &arena->decay_epoch);
+	nstime_add(&arena->decay_deadline, &arena->decay_interval);
 	if (arena->decay_time > 0) {
-		uint64_t decay_interval_ns, r;
-		struct timespec jitter;
+		nstime_t jitter;
 
-		decay_interval_ns = time_sec(&arena->decay_interval) *
-		    1000000000 + time_nsec(&arena->decay_interval);
-		r = prng_range(&arena->decay_jitter_state, decay_interval_ns);
-		time_init(&jitter, r / 1000000000, r % 1000000000);
-		time_add(&arena->decay_deadline, &jitter);
+		nstime_init(&jitter, prng_range(&arena->decay_jitter_state,
+		    nstime_ns(&arena->decay_interval)));
+		nstime_add(&arena->decay_deadline, &jitter);
 	}
 }
 
 static bool
-arena_decay_deadline_reached(const arena_t *arena, const struct timespec *time)
+arena_decay_deadline_reached(const arena_t *arena, const nstime_t *time)
 {
 
 	assert(opt_purge == purge_mode_decay);
 
-	return (time_compare(&arena->decay_deadline, time) <= 0);
+	return (nstime_compare(&arena->decay_deadline, time) <= 0);
 }
 
 static size_t
@@ -1276,24 +1273,24 @@ arena_decay_backlog_npages_limit(const arena_t *arena)
 }
 
 static void
-arena_decay_epoch_advance(arena_t *arena, const struct timespec *time)
+arena_decay_epoch_advance(arena_t *arena, const nstime_t *time)
 {
 	uint64_t nadvance;
-	struct timespec delta;
+	nstime_t delta;
 	size_t ndirty_delta;
 
 	assert(opt_purge == purge_mode_decay);
 	assert(arena_decay_deadline_reached(arena, time));
 
-	time_copy(&delta, time);
-	time_subtract(&delta, &arena->decay_epoch);
-	nadvance = time_divide(&delta, &arena->decay_interval);
+	nstime_copy(&delta, time);
+	nstime_subtract(&delta, &arena->decay_epoch);
+	nadvance = nstime_divide(&delta, &arena->decay_interval);
 	assert(nadvance > 0);
 
 	/* Add nadvance decay intervals to epoch. */
-	time_copy(&delta, &arena->decay_interval);
-	time_imultiply(&delta, nadvance);
-	time_add(&arena->decay_epoch, &delta);
+	nstime_copy(&delta, &arena->decay_interval);
+	nstime_imultiply(&delta, nadvance);
+	nstime_add(&arena->decay_epoch, &delta);
 
 	/* Set a new deadline. */
 	arena_decay_deadline_init(arena);
@@ -1340,12 +1337,12 @@ arena_decay_init(arena_t *arena, ssize_t decay_time)
 
 	arena->decay_time = decay_time;
 	if (decay_time > 0) {
-		time_init(&arena->decay_interval, decay_time, 0);
-		time_idivide(&arena->decay_interval, SMOOTHSTEP_NSTEPS);
+		nstime_init2(&arena->decay_interval, decay_time, 0);
+		nstime_idivide(&arena->decay_interval, SMOOTHSTEP_NSTEPS);
 	}
 
-	time_init(&arena->decay_epoch, 0, 0);
-	time_update(&arena->decay_epoch);
+	nstime_init(&arena->decay_epoch, 0);
+	nstime_update(&arena->decay_epoch);
 	arena->decay_jitter_state = (uint64_t)(uintptr_t)arena;
 	arena_decay_deadline_init(arena);
 	arena->decay_ndirty = arena->ndirty;
@@ -1357,7 +1354,7 @@ static bool
 arena_decay_time_valid(ssize_t decay_time)
 {
 
-	return (decay_time >= -1 && decay_time <= TIME_SEC_MAX);
+	return (decay_time >= -1 && decay_time <= NSTIME_SEC_MAX);
 }
 
 ssize_t
@@ -1426,7 +1423,7 @@ arena_maybe_purge_ratio(arena_t *arena)
 static void
 arena_maybe_purge_decay(arena_t *arena)
 {
-	struct timespec time;
+	nstime_t time;
 	size_t ndirty_limit;
 
 	assert(opt_purge == purge_mode_decay);
@@ -1438,10 +1435,10 @@ arena_maybe_purge_decay(arena_t *arena)
 		return;
 	}
 
-	time_copy(&time, &arena->decay_epoch);
-	if (unlikely(time_update(&time))) {
+	nstime_copy(&time, &arena->decay_epoch);
+	if (unlikely(nstime_update(&time))) {
 		/* Time went backwards.  Force an epoch advance. */
-		time_copy(&time, &arena->decay_deadline);
+		nstime_copy(&time, &arena->decay_deadline);
 	}
 
 	if (arena_decay_deadline_reached(arena, &time))
diff --git a/src/jemalloc.c b/src/jemalloc.c
index f69d951b..76b4f154 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1151,7 +1151,7 @@ malloc_conf_init(void)
 			CONF_HANDLE_SSIZE_T(opt_lg_dirty_mult, "lg_dirty_mult",
 			    -1, (sizeof(size_t) << 3) - 1)
 			CONF_HANDLE_SSIZE_T(opt_decay_time, "decay_time", -1,
-			    TIME_SEC_MAX);
+			    NSTIME_SEC_MAX);
 			CONF_HANDLE_BOOL(opt_stats_print, "stats_print", true)
 			if (config_fill) {
 				if (CONF_MATCH("junk")) {
diff --git a/src/nstime.c b/src/nstime.c
new file mode 100644
index 00000000..4cf90b58
--- /dev/null
+++ b/src/nstime.c
@@ -0,0 +1,148 @@
+#include "jemalloc/internal/jemalloc_internal.h"
+
+#define	BILLION	UINT64_C(1000000000)
+
+void
+nstime_init(nstime_t *time, uint64_t ns)
+{
+
+	time->ns = ns;
+}
+
+void
+nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec)
+{
+
+	time->ns = sec * BILLION + nsec;
+}
+
+uint64_t
+nstime_ns(const nstime_t *time)
+{
+
+	return (time->ns);
+}
+
+uint64_t
+nstime_sec(const nstime_t *time)
+{
+
+	return (time->ns / BILLION);
+}
+
+uint64_t
+nstime_nsec(const nstime_t *time)
+{
+
+	return (time->ns % BILLION);
+}
+
+void
+nstime_copy(nstime_t *time, const nstime_t *source)
+{
+
+	*time = *source;
+}
+
+int
+nstime_compare(const nstime_t *a, const nstime_t *b)
+{
+
+	return ((a->ns > b->ns) - (a->ns < b->ns));
+}
+
+void
+nstime_add(nstime_t *time, const nstime_t *addend)
+{
+
+	assert(UINT64_MAX - time->ns >= addend->ns);
+
+	time->ns += addend->ns;
+}
+
+void
+nstime_subtract(nstime_t *time, const nstime_t *subtrahend)
+{
+
+	assert(nstime_compare(time, subtrahend) >= 0);
+
+	time->ns -= subtrahend->ns;
+}
+
+void
+nstime_imultiply(nstime_t *time, uint64_t multiplier)
+{
+
+	assert((((time->ns | multiplier) & (UINT64_MAX << (sizeof(uint64_t) <<
+	    2))) == 0) || ((time->ns * multiplier) / multiplier == time->ns));
+
+	time->ns *= multiplier;
+}
+
+void
+nstime_idivide(nstime_t *time, uint64_t divisor)
+{
+
+	assert(divisor != 0);
+
+	time->ns /= divisor;
+}
+
+uint64_t
+nstime_divide(const nstime_t *time, const nstime_t *divisor)
+{
+
+	assert(divisor->ns != 0);
+
+	return (time->ns / divisor->ns);
+}
+
+#ifdef JEMALLOC_JET
+#undef nstime_update
+#define	nstime_update JEMALLOC_N(nstime_update_impl)
+#endif
+bool
+nstime_update(nstime_t *time)
+{
+	nstime_t old_time;
+
+	nstime_copy(&old_time, time);
+
+#ifdef _WIN32
+	{
+		FILETIME ft;
+		uint64_t ticks;
+		GetSystemTimeAsFileTime(&ft);
+		ticks = (((uint64_t)ft.dwHighDateTime) << 32) |
+		    ft.dwLowDateTime;
+		time->ns = ticks * 100;
+	}
+#elif JEMALLOC_CLOCK_GETTIME
+	{
+		struct timespec ts;
+
+		if (sysconf(_SC_MONOTONIC_CLOCK) > 0)
+			clock_gettime(CLOCK_MONOTONIC, &ts);
+		else
+			clock_gettime(CLOCK_REALTIME, &ts);
+		time->ns = ts.tv_sec * BILLION + ts.tv_nsec;
+	}
+#else
+	struct timeval tv;
+	gettimeofday(&tv, NULL);
+	time->ns = tv.tv_sec * BILLION + tv.tv_usec * 1000;
+#endif
+
+	/* Handle non-monotonic clocks. */
+	if (unlikely(nstime_compare(&old_time, time) > 0)) {
+		nstime_copy(time, &old_time);
+		return (true);
+	}
+
+	return (false);
+}
+#ifdef JEMALLOC_JET
+#undef nstime_update
+#define	nstime_update JEMALLOC_N(nstime_update)
+nstime_update_t *nstime_update = JEMALLOC_N(nstime_update_impl);
+#endif
diff --git a/src/time.c b/src/time.c
deleted file mode 100644
index 8205c61b..00000000
--- a/src/time.c
+++ /dev/null
@@ -1,198 +0,0 @@
-#include "jemalloc/internal/jemalloc_internal.h"
-
-#define	BILLION		1000000000
-
-UNUSED static bool
-time_valid(const struct timespec *time)
-{
-
-	if (time->tv_sec > TIME_SEC_MAX)
-		return (false);
-	if (time->tv_nsec >= BILLION)
-		return (false);
-
-	return (true);
-}
-
-void
-time_init(struct timespec *time, time_t sec, long nsec)
-{
-
-	time->tv_sec = sec;
-	time->tv_nsec = nsec;
-
-	assert(time_valid(time));
-}
-
-time_t
-time_sec(const struct timespec *time)
-{
-
-	assert(time_valid(time));
-
-	return (time->tv_sec);
-}
-
-long
-time_nsec(const struct timespec *time)
-{
-
-	assert(time_valid(time));
-
-	return (time->tv_nsec);
-}
-
-void
-time_copy(struct timespec *time, const struct timespec *source)
-{
-
-	assert(time_valid(source));
-
-	*time = *source;
-}
-
-int
-time_compare(const struct timespec *a, const struct timespec *b)
-{
-	int ret;
-
-	assert(time_valid(a));
-	assert(time_valid(b));
-
-	ret = (a->tv_sec > b->tv_sec) - (a->tv_sec < b->tv_sec);
-	if (ret == 0)
-		ret = (a->tv_nsec > b->tv_nsec) - (a->tv_nsec < b->tv_nsec);
-
-	return (ret);
-}
-
-void
-time_add(struct timespec *time, const struct timespec *addend)
-{
-
-	assert(time_valid(time));
-	assert(time_valid(addend));
-
-	time->tv_sec += addend->tv_sec;
-	time->tv_nsec += addend->tv_nsec;
-	if (time->tv_nsec >= BILLION) {
-		time->tv_sec++;
-		time->tv_nsec -= BILLION;
-	}
-
-	assert(time_valid(time));
-}
-
-void
-time_subtract(struct timespec *time, const struct timespec *subtrahend)
-{
-
-	assert(time_valid(time));
-	assert(time_valid(subtrahend));
-	assert(time_compare(time, subtrahend) >= 0);
-
-	time->tv_sec -= subtrahend->tv_sec;
-	if (time->tv_nsec < subtrahend->tv_nsec) {
-		time->tv_sec--;
-		time->tv_nsec += BILLION;
-	}
-	time->tv_nsec -= subtrahend->tv_nsec;
-}
-
-void
-time_imultiply(struct timespec *time, uint64_t multiplier)
-{
-	time_t sec;
-	uint64_t nsec;
-
-	assert(time_valid(time));
-
-	sec = time->tv_sec * multiplier;
-	nsec = time->tv_nsec * multiplier;
-	sec += nsec / BILLION;
-	nsec %= BILLION;
-	time_init(time, sec, (long)nsec);
-
-	assert(time_valid(time));
-}
-
-void
-time_idivide(struct timespec *time, uint64_t divisor)
-{
-	time_t sec;
-	uint64_t nsec;
-
-	assert(time_valid(time));
-
-	sec = time->tv_sec / divisor;
-	nsec = ((time->tv_sec % divisor) * BILLION + time->tv_nsec) / divisor;
-	sec += nsec / BILLION;
-	nsec %= BILLION;
-	time_init(time, sec, (long)nsec);
-
-	assert(time_valid(time));
-}
-
-uint64_t
-time_divide(const struct timespec *time, const struct timespec *divisor)
-{
-	uint64_t t, d;
-
-	assert(time_valid(time));
-	assert(time_valid(divisor));
-
-	t = time_sec(time) * BILLION + time_nsec(time);
-	d = time_sec(divisor) * BILLION + time_nsec(divisor);
-	assert(d != 0);
-	return (t / d);
-}
-
-#ifdef JEMALLOC_JET
-#undef time_update
-#define	time_update JEMALLOC_N(time_update_impl)
-#endif
-bool
-time_update(struct timespec *time)
-{
-	struct timespec old_time;
-
-	assert(time_valid(time));
-
-	time_copy(&old_time, time);
-
-#ifdef _WIN32
-	{
-		FILETIME ft;
-		uint64_t ticks;
-		GetSystemTimeAsFileTime(&ft);
-		ticks = (((uint64_t)ft.dwHighDateTime) << 32) |
-		    ft.dwLowDateTime;
-		time->tv_sec = ticks / 10000000;
-		time->tv_nsec = ((ticks % 10000000) * 100);
-	}
-#elif JEMALLOC_CLOCK_GETTIME
-	if (sysconf(_SC_MONOTONIC_CLOCK) > 0)
-		clock_gettime(CLOCK_MONOTONIC, time);
-	else
-		clock_gettime(CLOCK_REALTIME, time);
-#else
-	struct timeval tv;
-	gettimeofday(&tv, NULL);
-	time->tv_sec = tv.tv_sec;
-	time->tv_nsec = tv.tv_usec * 1000;
-#endif
-
-	/* Handle non-monotonic clocks. */
-	if (unlikely(time_compare(&old_time, time) > 0)) {
-		time_copy(time, &old_time);
-		return (true);
-	}
-
-	assert(time_valid(time));
-	return (false);
-}
-#ifdef JEMALLOC_JET
-#undef time_update
-#define	time_update JEMALLOC_N(time_update)
-time_update_t *time_update = JEMALLOC_N(time_update_impl);
-#endif
diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in
index 223162e1..4aaaf952 100644
--- a/test/include/test/jemalloc_test.h.in
+++ b/test/include/test/jemalloc_test.h.in
@@ -94,7 +94,7 @@
 #  define JEMALLOC_H_STRUCTS
 #  define JEMALLOC_H_EXTERNS
 #  define JEMALLOC_H_INLINES
-#  include "jemalloc/internal/time.h"
+#  include "jemalloc/internal/nstime.h"
 #  include "jemalloc/internal/util.h"
 #  include "jemalloc/internal/qr.h"
 #  include "jemalloc/internal/ql.h"
diff --git a/test/include/test/timer.h b/test/include/test/timer.h
index a791f9ce..0b27e019 100644
--- a/test/include/test/timer.h
+++ b/test/include/test/timer.h
@@ -4,8 +4,8 @@
 #include <sys/time.h>
 
 typedef struct {
-	struct timespec t0;
-	struct timespec t1;
+	nstime_t t0;
+	nstime_t t1;
 } timedelta_t;
 
 void	timer_start(timedelta_t *timer);
diff --git a/test/src/timer.c b/test/src/timer.c
index 15306cfd..e91b3cf2 100644
--- a/test/src/timer.c
+++ b/test/src/timer.c
@@ -4,26 +4,26 @@ void
 timer_start(timedelta_t *timer)
 {
 
-	time_init(&timer->t0, 0, 0);
-	time_update(&timer->t0);
+	nstime_init(&timer->t0, 0);
+	nstime_update(&timer->t0);
 }
 
 void
 timer_stop(timedelta_t *timer)
 {
 
-	time_copy(&timer->t1, &timer->t0);
-	time_update(&timer->t1);
+	nstime_copy(&timer->t1, &timer->t0);
+	nstime_update(&timer->t1);
 }
 
 uint64_t
 timer_usec(const timedelta_t *timer)
 {
-	struct timespec delta;
+	nstime_t delta;
 
-	time_copy(&delta, &timer->t1);
-	time_subtract(&delta, &timer->t0);
-	return (time_sec(&delta) * 1000000 + time_nsec(&delta) / 1000);
+	nstime_copy(&delta, &timer->t1);
+	nstime_subtract(&delta, &timer->t0);
+	return (nstime_ns(&delta) / 1000);
 }
 
 void
diff --git a/test/unit/decay.c b/test/unit/decay.c
index 20730de4..66d54dc8 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -2,19 +2,19 @@
 
 const char *malloc_conf = "purge:decay,decay_time:1";
 
-static time_update_t *time_update_orig;
+static nstime_update_t *nstime_update_orig;
 
 static unsigned nupdates_mock;
-static struct timespec time_mock;
+static nstime_t time_mock;
 static bool nonmonotonic_mock;
 
 static bool
-time_update_mock(struct timespec *time)
+nstime_update_mock(nstime_t *time)
 {
 
 	nupdates_mock++;
 	if (!nonmonotonic_mock)
-		time_copy(time, &time_mock);
+		nstime_copy(time, &time_mock);
 	return (nonmonotonic_mock);
 }
 
@@ -204,7 +204,7 @@ TEST_BEGIN(test_decay_ticker)
 	uint64_t epoch, npurge0, npurge1;
 	size_t sz, tcache_max, large;
 	unsigned i, nupdates0;
-	struct timespec time, decay_time, deadline;
+	nstime_t time, decay_time, deadline;
 
 	test_skip_if(opt_purge != purge_mode_decay);
 
@@ -233,12 +233,12 @@ TEST_BEGIN(test_decay_ticker)
 	}
 
 	nupdates_mock = 0;
-	time_init(&time_mock, 0, 0);
-	time_update(&time_mock);
+	nstime_init(&time_mock, 0);
+	nstime_update(&time_mock);
 	nonmonotonic_mock = false;
 
-	time_update_orig = time_update;
-	time_update = time_update_mock;
+	nstime_update_orig = nstime_update;
+	nstime_update = nstime_update_mock;
 
 	for (i = 0; i < NPS; i++) {
 		dallocx(ps[i], flags);
@@ -246,16 +246,16 @@ TEST_BEGIN(test_decay_ticker)
 		assert_d_eq(mallctl("arena.0.decay", NULL, NULL, NULL, 0), 0,
 		    "Unexpected arena.0.decay failure");
 		assert_u_gt(nupdates_mock, nupdates0,
-		    "Expected time_update() to be called");
+		    "Expected nstime_update() to be called");
 	}
 
-	time_update = time_update_orig;
+	nstime_update = nstime_update_orig;
 
-	time_init(&time, 0, 0);
-	time_update(&time);
-	time_init(&decay_time, opt_decay_time, 0);
-	time_copy(&deadline, &time);
-	time_add(&deadline, &decay_time);
+	nstime_init(&time, 0);
+	nstime_update(&time);
+	nstime_init2(&decay_time, opt_decay_time, 0);
+	nstime_copy(&deadline, &time);
+	nstime_add(&deadline, &decay_time);
 	do {
 		for (i = 0; i < DECAY_NTICKS_PER_UPDATE / 2; i++) {
 			void *p = mallocx(1, flags);
@@ -268,8 +268,8 @@ TEST_BEGIN(test_decay_ticker)
 		assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge1, &sz,
 		    NULL, 0), 0, "Unexpected mallctl failure");
 
-		time_update(&time);
-	} while (time_compare(&time, &deadline) <= 0 && npurge1 == npurge0);
+		nstime_update(&time);
+	} while (nstime_compare(&time, &deadline) <= 0 && npurge1 == npurge0);
 
 	assert_u64_gt(npurge1, npurge0, "Expected purging to occur");
 #undef NPS
@@ -300,12 +300,12 @@ TEST_BEGIN(test_decay_nonmonotonic)
 	    "Unexpected mallctl failure");
 
 	nupdates_mock = 0;
-	time_init(&time_mock, 0, 0);
-	time_update(&time_mock);
+	nstime_init(&time_mock, 0);
+	nstime_update(&time_mock);
 	nonmonotonic_mock = true;
 
-	time_update_orig = time_update;
-	time_update = time_update_mock;
+	nstime_update_orig = nstime_update;
+	nstime_update = nstime_update_mock;
 
 	for (i = 0; i < NPS; i++) {
 		ps[i] = mallocx(large0, flags);
@@ -318,7 +318,7 @@ TEST_BEGIN(test_decay_nonmonotonic)
 		assert_d_eq(mallctl("arena.0.decay", NULL, NULL, NULL, 0), 0,
 		    "Unexpected arena.0.decay failure");
 		assert_u_gt(nupdates_mock, nupdates0,
-		    "Expected time_update() to be called");
+		    "Expected nstime_update() to be called");
 	}
 
 	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(uint64_t)), 0,
@@ -329,7 +329,7 @@ TEST_BEGIN(test_decay_nonmonotonic)
 
 	assert_u64_gt(npurge1, npurge0, "Expected purging to occur");
 
-	time_update = time_update_orig;
+	nstime_update = nstime_update_orig;
 #undef NPS
 }
 TEST_END
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index b312fc64..e8dc4926 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -401,7 +401,7 @@ TEST_BEGIN(test_arena_i_decay_time)
 	    &decay_time, sizeof(ssize_t)), EFAULT,
 	    "Unexpected mallctl() success");
 
-	decay_time = TIME_SEC_MAX;
+	decay_time = 0x7fffffff;
 	assert_d_eq(mallctl("arena.0.decay_time", NULL, NULL,
 	    &decay_time, sizeof(ssize_t)), 0,
 	    "Unexpected mallctl() failure");
@@ -567,7 +567,7 @@ TEST_BEGIN(test_arenas_decay_time)
 	    &decay_time, sizeof(ssize_t)), EFAULT,
 	    "Unexpected mallctl() success");
 
-	decay_time = TIME_SEC_MAX;
+	decay_time = 0x7fffffff;
 	assert_d_eq(mallctl("arenas.decay_time", NULL, NULL,
 	    &decay_time, sizeof(ssize_t)), 0,
 	    "Expected mallctl() failure");
diff --git a/test/unit/nstime.c b/test/unit/nstime.c
new file mode 100644
index 00000000..cd7d9a6d
--- /dev/null
+++ b/test/unit/nstime.c
@@ -0,0 +1,220 @@
+#include "test/jemalloc_test.h"
+
+#define	BILLION	UINT64_C(1000000000)
+
+TEST_BEGIN(test_nstime_init)
+{
+	nstime_t nst;
+
+	nstime_init(&nst, 42000000043);
+	assert_u64_eq(nstime_ns(&nst), 42000000043, "ns incorrectly read");
+	assert_u64_eq(nstime_sec(&nst), 42, "sec incorrectly read");
+	assert_u64_eq(nstime_nsec(&nst), 43, "nsec incorrectly read");
+}
+TEST_END
+
+TEST_BEGIN(test_nstime_init2)
+{
+	nstime_t nst;
+
+	nstime_init2(&nst, 42, 43);
+	assert_u64_eq(nstime_sec(&nst), 42, "sec incorrectly read");
+	assert_u64_eq(nstime_nsec(&nst), 43, "nsec incorrectly read");
+}
+TEST_END
+
+TEST_BEGIN(test_nstime_copy)
+{
+	nstime_t nsta, nstb;
+
+	nstime_init2(&nsta, 42, 43);
+	nstime_init(&nstb, 0);
+	nstime_copy(&nstb, &nsta);
+	assert_u64_eq(nstime_sec(&nstb), 42, "sec incorrectly copied");
+	assert_u64_eq(nstime_nsec(&nstb), 43, "nsec incorrectly copied");
+}
+TEST_END
+
+TEST_BEGIN(test_nstime_compare)
+{
+	nstime_t nsta, nstb;
+
+	nstime_init2(&nsta, 42, 43);
+	nstime_copy(&nstb, &nsta);
+	assert_d_eq(nstime_compare(&nsta, &nstb), 0, "Times should be equal");
+	assert_d_eq(nstime_compare(&nstb, &nsta), 0, "Times should be equal");
+
+	nstime_init2(&nstb, 42, 42);
+	assert_d_eq(nstime_compare(&nsta, &nstb), 1,
+	    "nsta should be greater than nstb");
+	assert_d_eq(nstime_compare(&nstb, &nsta), -1,
+	    "nstb should be less than nsta");
+
+	nstime_init2(&nstb, 42, 44);
+	assert_d_eq(nstime_compare(&nsta, &nstb), -1,
+	    "nsta should be less than nstb");
+	assert_d_eq(nstime_compare(&nstb, &nsta), 1,
+	    "nstb should be greater than nsta");
+
+	nstime_init2(&nstb, 41, BILLION - 1);
+	assert_d_eq(nstime_compare(&nsta, &nstb), 1,
+	    "nsta should be greater than nstb");
+	assert_d_eq(nstime_compare(&nstb, &nsta), -1,
+	    "nstb should be less than nsta");
+
+	nstime_init2(&nstb, 43, 0);
+	assert_d_eq(nstime_compare(&nsta, &nstb), -1,
+	    "nsta should be less than nstb");
+	assert_d_eq(nstime_compare(&nstb, &nsta), 1,
+	    "nstb should be greater than nsta");
+}
+TEST_END
+
+TEST_BEGIN(test_nstime_add)
+{
+	nstime_t nsta, nstb;
+
+	nstime_init2(&nsta, 42, 43);
+	nstime_copy(&nstb, &nsta);
+	nstime_add(&nsta, &nstb);
+	nstime_init2(&nstb, 84, 86);
+	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+	    "Incorrect addition result");
+
+	nstime_init2(&nsta, 42, BILLION - 1);
+	nstime_copy(&nstb, &nsta);
+	nstime_add(&nsta, &nstb);
+	nstime_init2(&nstb, 85, BILLION - 2);
+	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+	    "Incorrect addition result");
+}
+TEST_END
+
+TEST_BEGIN(test_nstime_subtract)
+{
+	nstime_t nsta, nstb;
+
+	nstime_init2(&nsta, 42, 43);
+	nstime_copy(&nstb, &nsta);
+	nstime_subtract(&nsta, &nstb);
+	nstime_init(&nstb, 0);
+	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+	    "Incorrect subtraction result");
+
+	nstime_init2(&nsta, 42, 43);
+	nstime_init2(&nstb, 41, 44);
+	nstime_subtract(&nsta, &nstb);
+	nstime_init2(&nstb, 0, BILLION - 1);
+	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+	    "Incorrect subtraction result");
+}
+TEST_END
+
+TEST_BEGIN(test_nstime_imultiply)
+{
+	nstime_t nsta, nstb;
+
+	nstime_init2(&nsta, 42, 43);
+	nstime_imultiply(&nsta, 10);
+	nstime_init2(&nstb, 420, 430);
+	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+	    "Incorrect multiplication result");
+
+	nstime_init2(&nsta, 42, 666666666);
+	nstime_imultiply(&nsta, 3);
+	nstime_init2(&nstb, 127, 999999998);
+	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+	    "Incorrect multiplication result");
+}
+TEST_END
+
+TEST_BEGIN(test_nstime_idivide)
+{
+	nstime_t nsta, nstb;
+
+	nstime_init2(&nsta, 42, 43);
+	nstime_copy(&nstb, &nsta);
+	nstime_imultiply(&nsta, 10);
+	nstime_idivide(&nsta, 10);
+	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+	    "Incorrect division result");
+
+	nstime_init2(&nsta, 42, 666666666);
+	nstime_copy(&nstb, &nsta);
+	nstime_imultiply(&nsta, 3);
+	nstime_idivide(&nsta, 3);
+	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+	    "Incorrect division result");
+}
+TEST_END
+
+TEST_BEGIN(test_nstime_divide)
+{
+	nstime_t nsta, nstb, nstc;
+
+	nstime_init2(&nsta, 42, 43);
+	nstime_copy(&nstb, &nsta);
+	nstime_imultiply(&nsta, 10);
+	assert_u64_eq(nstime_divide(&nsta, &nstb), 10,
+	    "Incorrect division result");
+
+	nstime_init2(&nsta, 42, 43);
+	nstime_copy(&nstb, &nsta);
+	nstime_imultiply(&nsta, 10);
+	nstime_init(&nstc, 1);
+	nstime_add(&nsta, &nstc);
+	assert_u64_eq(nstime_divide(&nsta, &nstb), 10,
+	    "Incorrect division result");
+
+	nstime_init2(&nsta, 42, 43);
+	nstime_copy(&nstb, &nsta);
+	nstime_imultiply(&nsta, 10);
+	nstime_init(&nstc, 1);
+	nstime_subtract(&nsta, &nstc);
+	assert_u64_eq(nstime_divide(&nsta, &nstb), 9,
+	    "Incorrect division result");
+}
+TEST_END
+
+TEST_BEGIN(test_nstime_update)
+{
+	nstime_t nst;
+
+	nstime_init(&nst, 0);
+
+	assert_false(nstime_update(&nst), "Basic time update failed.");
+
+	/* Only Rip Van Winkle sleeps this long. */
+	{
+		nstime_t addend;
+		nstime_init2(&addend, 631152000, 0);
+		nstime_add(&nst, &addend);
+	}
+	{
+		nstime_t nst0;
+		nstime_copy(&nst0, &nst);
+		assert_true(nstime_update(&nst),
+		    "Update should detect time roll-back.");
+		assert_d_eq(nstime_compare(&nst, &nst0), 0,
+		    "Time should not have been modified");
+	}
+
+}
+TEST_END
+
+int
+main(void)
+{
+
+	return (test(
+	    test_nstime_init,
+	    test_nstime_init2,
+	    test_nstime_copy,
+	    test_nstime_compare,
+	    test_nstime_add,
+	    test_nstime_subtract,
+	    test_nstime_imultiply,
+	    test_nstime_idivide,
+	    test_nstime_divide,
+	    test_nstime_update));
+}
diff --git a/test/unit/time.c b/test/unit/time.c
deleted file mode 100644
index 941e6f13..00000000
--- a/test/unit/time.c
+++ /dev/null
@@ -1,223 +0,0 @@
-#include "test/jemalloc_test.h"
-
-#define	BILLION	1000000000
-
-TEST_BEGIN(test_time_init)
-{
-	struct timespec ts;
-
-	time_init(&ts, 42, 43);
-	assert_ld_eq(ts.tv_sec, 42, "tv_sec incorrectly initialized");
-	assert_ld_eq(ts.tv_nsec, 43, "tv_nsec incorrectly initialized");
-}
-TEST_END
-
-TEST_BEGIN(test_time_sec)
-{
-	struct timespec ts;
-
-	time_init(&ts, 42, 43);
-	assert_ld_eq(time_sec(&ts), 42, "tv_sec incorrectly read");
-}
-TEST_END
-
-TEST_BEGIN(test_time_nsec)
-{
-	struct timespec ts;
-
-	time_init(&ts, 42, 43);
-	assert_ld_eq(time_nsec(&ts), 43, "tv_nsec incorrectly read");
-}
-TEST_END
-
-TEST_BEGIN(test_time_copy)
-{
-	struct timespec tsa, tsb;
-
-	time_init(&tsa, 42, 43);
-	time_init(&tsb, 0, 0);
-	time_copy(&tsb, &tsa);
-	assert_ld_eq(time_sec(&tsb), 42, "tv_sec incorrectly copied");
-	assert_ld_eq(time_nsec(&tsb), 43, "tv_nsec incorrectly copied");
-}
-TEST_END
-
-TEST_BEGIN(test_time_compare)
-{
-	struct timespec tsa, tsb;
-
-	time_init(&tsa, 42, 43);
-	time_copy(&tsb, &tsa);
-	assert_d_eq(time_compare(&tsa, &tsb), 0, "Times should be equal");
-	assert_d_eq(time_compare(&tsb, &tsa), 0, "Times should be equal");
-
-	time_init(&tsb, 42, 42);
-	assert_d_eq(time_compare(&tsa, &tsb), 1,
-	    "tsa should be greater than tsb");
-	assert_d_eq(time_compare(&tsb, &tsa), -1,
-	    "tsb should be less than tsa");
-
-	time_init(&tsb, 42, 44);
-	assert_d_eq(time_compare(&tsa, &tsb), -1,
-	    "tsa should be less than tsb");
-	assert_d_eq(time_compare(&tsb, &tsa), 1,
-	    "tsb should be greater than tsa");
-
-	time_init(&tsb, 41, BILLION - 1);
-	assert_d_eq(time_compare(&tsa, &tsb), 1,
-	    "tsa should be greater than tsb");
-	assert_d_eq(time_compare(&tsb, &tsa), -1,
-	    "tsb should be less than tsa");
-
-	time_init(&tsb, 43, 0);
-	assert_d_eq(time_compare(&tsa, &tsb), -1,
-	    "tsa should be less than tsb");
-	assert_d_eq(time_compare(&tsb, &tsa), 1,
-	    "tsb should be greater than tsa");
-}
-TEST_END
-
-TEST_BEGIN(test_time_add)
-{
-	struct timespec tsa, tsb;
-
-	time_init(&tsa, 42, 43);
-	time_copy(&tsb, &tsa);
-	time_add(&tsa, &tsb);
-	time_init(&tsb, 84, 86);
-	assert_d_eq(time_compare(&tsa, &tsb), 0, "Incorrect addition result");
-
-	time_init(&tsa, 42, BILLION - 1);
-	time_copy(&tsb, &tsa);
-	time_add(&tsa, &tsb);
-	time_init(&tsb, 85, BILLION - 2);
-	assert_d_eq(time_compare(&tsa, &tsb), 0, "Incorrect addition result");
-}
-TEST_END
-
-TEST_BEGIN(test_time_subtract)
-{
-	struct timespec tsa, tsb;
-
-	time_init(&tsa, 42, 43);
-	time_copy(&tsb, &tsa);
-	time_subtract(&tsa, &tsb);
-	time_init(&tsb, 0, 0);
-	assert_d_eq(time_compare(&tsa, &tsb), 0,
-	    "Incorrect subtraction result");
-
-	time_init(&tsa, 42, 43);
-	time_init(&tsb, 41, 44);
-	time_subtract(&tsa, &tsb);
-	time_init(&tsb, 0, BILLION - 1);
-	assert_d_eq(time_compare(&tsa, &tsb), 0,
-	    "Incorrect subtraction result");
-}
-TEST_END
-
-TEST_BEGIN(test_time_imultiply)
-{
-	struct timespec tsa, tsb;
-
-	time_init(&tsa, 42, 43);
-	time_imultiply(&tsa, 10);
-	time_init(&tsb, 420, 430);
-	assert_d_eq(time_compare(&tsa, &tsb), 0,
-	    "Incorrect multiplication result");
-
-	time_init(&tsa, 42, 666666666);
-	time_imultiply(&tsa, 3);
-	time_init(&tsb, 127, 999999998);
-	assert_d_eq(time_compare(&tsa, &tsb), 0,
-	    "Incorrect multiplication result");
-}
-TEST_END
-
-TEST_BEGIN(test_time_idivide)
-{
-	struct timespec tsa, tsb;
-
-	time_init(&tsa, 42, 43);
-	time_copy(&tsb, &tsa);
-	time_imultiply(&tsa, 10);
-	time_idivide(&tsa, 10);
-	assert_d_eq(time_compare(&tsa, &tsb), 0, "Incorrect division result");
-
-	time_init(&tsa, 42, 666666666);
-	time_copy(&tsb, &tsa);
-	time_imultiply(&tsa, 3);
-	time_idivide(&tsa, 3);
-	assert_d_eq(time_compare(&tsa, &tsb), 0, "Incorrect division result");
-}
-TEST_END
-
-TEST_BEGIN(test_time_divide)
-{
-	struct timespec tsa, tsb, tsc;
-
-	time_init(&tsa, 42, 43);
-	time_copy(&tsb, &tsa);
-	time_imultiply(&tsa, 10);
-	assert_u64_eq(time_divide(&tsa, &tsb), 10,
-	    "Incorrect division result");
-
-	time_init(&tsa, 42, 43);
-	time_copy(&tsb, &tsa);
-	time_imultiply(&tsa, 10);
-	time_init(&tsc, 0, 1);
-	time_add(&tsa, &tsc);
-	assert_u64_eq(time_divide(&tsa, &tsb), 10,
-	    "Incorrect division result");
-
-	time_init(&tsa, 42, 43);
-	time_copy(&tsb, &tsa);
-	time_imultiply(&tsa, 10);
-	time_init(&tsc, 0, 1);
-	time_subtract(&tsa, &tsc);
-	assert_u64_eq(time_divide(&tsa, &tsb), 9, "Incorrect division result");
-}
-TEST_END
-
-TEST_BEGIN(test_time_update)
-{
-	struct timespec ts;
-
-	time_init(&ts, 0, 0);
-
-	assert_false(time_update(&ts), "Basic time update failed.");
-
-	/* Only Rip Van Winkle sleeps this long. */
-	{
-		struct timespec addend;
-		time_init(&addend, 631152000, 0);
-		time_add(&ts, &addend);
-	}
-	{
-		struct timespec ts0;
-		time_copy(&ts0, &ts);
-		assert_true(time_update(&ts),
-		    "Update should detect time roll-back.");
-		assert_d_eq(time_compare(&ts, &ts0), 0,
-		    "Time should not have been modified");
-	}
-
-}
-TEST_END
-
-int
-main(void)
-{
-
-	return (test(
-	    test_time_init,
-	    test_time_sec,
-	    test_time_nsec,
-	    test_time_copy,
-	    test_time_compare,
-	    test_time_add,
-	    test_time_subtract,
-	    test_time_imultiply,
-	    test_time_idivide,
-	    test_time_divide,
-	    test_time_update));
-}

From 817d9030a5811f98c43b10ac53b8f17180dbc44f Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Mon, 22 Feb 2016 10:44:58 -0800
Subject: [PATCH 0104/2608] Indentation style cleanup.

---
 include/jemalloc/internal/arena.h | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 65d4158b..561b5886 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -167,8 +167,8 @@ struct arena_chunk_map_misc_s {
 
 		/* Profile counters, used for large object runs. */
 		union {
-			void				*prof_tctx_pun;
-			prof_tctx_t			*prof_tctx;
+			void			*prof_tctx_pun;
+			prof_tctx_t		*prof_tctx;
 		};
 
 		/* Small region run metadata. */
@@ -233,28 +233,28 @@ struct arena_chunk_s {
  */
 struct arena_bin_info_s {
 	/* Size of regions in a run for this bin's size class. */
-	size_t		reg_size;
+	size_t			reg_size;
 
 	/* Redzone size. */
-	size_t		redzone_size;
+	size_t			redzone_size;
 
 	/* Interval between regions (reg_size + (redzone_size << 1)). */
-	size_t		reg_interval;
+	size_t			reg_interval;
 
 	/* Total size of a run for this bin's size class. */
-	size_t		run_size;
+	size_t			run_size;
 
 	/* Total number of regions in a run for this bin's size class. */
-	uint32_t	nregs;
+	uint32_t		nregs;
 
 	/*
 	 * Metadata used to manipulate bitmaps for runs associated with this
 	 * bin.
 	 */
-	bitmap_info_t	bitmap_info;
+	bitmap_info_t		bitmap_info;
 
 	/* Offset of first region in a run for this bin's size class. */
-	uint32_t	reg0_offset;
+	uint32_t		reg0_offset;
 };
 
 struct arena_bin_s {
@@ -264,13 +264,13 @@ struct arena_bin_s {
 	 * which may be acquired while holding one or more bin locks, but not
 	 * vise versa.
 	 */
-	malloc_mutex_t	lock;
+	malloc_mutex_t		lock;
 
 	/*
 	 * Current run being used to service allocations of this bin's size
 	 * class.
 	 */
-	arena_run_t	*runcur;
+	arena_run_t		*runcur;
 
 	/*
 	 * Tree of non-full runs.  This tree is used when looking for an
@@ -279,10 +279,10 @@ struct arena_bin_s {
 	 * objects packed well, and it can also help reduce the number of
 	 * almost-empty chunks.
 	 */
-	arena_run_tree_t runs;
+	arena_run_tree_t	runs;
 
 	/* Bin statistics. */
-	malloc_bin_stats_t stats;
+	malloc_bin_stats_t	stats;
 };
 
 struct arena_s {

From a9a46847925e38373e6a5da250c0cecb11a8277b Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Mon, 22 Feb 2016 14:58:05 -0800
Subject: [PATCH 0105/2608] Test run quantization.

Also rename run_quantize_*() to improve clarity.  These tests
demonstrate that run_quantize_ceil() is flawed.
---
 Makefile.in                                   |   1 +
 include/jemalloc/internal/arena.h             |   6 +
 include/jemalloc/internal/private_symbols.txt |   2 +
 src/arena.c                                   |  38 +++--
 test/unit/run_quantize.c                      | 157 ++++++++++++++++++
 5 files changed, 194 insertions(+), 10 deletions(-)
 create mode 100644 test/unit/run_quantize.c

diff --git a/Makefile.in b/Makefile.in
index a4555c03..f60823f5 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -159,6 +159,7 @@ TESTS_UNIT := $(srcroot)test/unit/atomic.c \
 	$(srcroot)test/unit/quarantine.c \
 	$(srcroot)test/unit/rb.c \
 	$(srcroot)test/unit/rtree.c \
+	$(srcroot)test/unit/run_quantize.c \
 	$(srcroot)test/unit/SFMT.c \
 	$(srcroot)test/unit/size_classes.c \
 	$(srcroot)test/unit/smoothstep.c \
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 561b5886..f98aeb80 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -494,9 +494,15 @@ extern size_t		map_bias; /* Number of arena chunk header pages. */
 extern size_t		map_misc_offset;
 extern size_t		arena_maxrun; /* Max run size for arenas. */
 extern size_t		large_maxclass; /* Max large size class. */
+extern size_t		small_maxrun; /* Max run size for small size classes. */
 extern unsigned		nlclasses; /* Number of large size classes. */
 extern unsigned		nhclasses; /* Number of huge size classes. */
 
+#ifdef JEMALLOC_JET
+typedef size_t (run_quantize_t)(size_t);
+extern run_quantize_t *run_quantize_floor;
+extern run_quantize_t *run_quantize_ceil;
+#endif
 void	arena_chunk_cache_maybe_insert(arena_t *arena, extent_node_t *node,
     bool cache);
 void	arena_chunk_cache_maybe_remove(arena_t *arena, extent_node_t *node,
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index c12baadb..3e37a61a 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -445,6 +445,8 @@ rtree_subtree_read_hard
 rtree_subtree_tryread
 rtree_val_read
 rtree_val_write
+run_quantize_ceil
+run_quantize_floor
 s2u
 s2u_compute
 s2u_lookup
diff --git a/src/arena.c b/src/arena.c
index 77c691a1..ff5b5fb8 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -21,7 +21,7 @@ size_t		map_bias;
 size_t		map_misc_offset;
 size_t		arena_maxrun; /* Max run size for arenas. */
 size_t		large_maxclass; /* Max large size class. */
-static size_t	small_maxrun; /* Max run size used for small size classes. */
+size_t		small_maxrun; /* Max run size for small size classes. */
 static bool	*small_run_tab; /* Valid small run page multiples. */
 unsigned	nlclasses; /* Number of large size classes. */
 unsigned	nhclasses; /* Number of huge size classes. */
@@ -100,8 +100,12 @@ arena_run_comp(const arena_chunk_map_misc_t *a, const arena_chunk_map_misc_t *b)
 rb_gen(static UNUSED, arena_run_tree_, arena_run_tree_t, arena_chunk_map_misc_t,
     rb_link, arena_run_comp)
 
+#ifdef JEMALLOC_JET
+#undef run_quantize_floor
+#define	run_quantize_floor JEMALLOC_N(run_quantize_floor_impl)
+#endif
 static size_t
-run_quantize(size_t size)
+run_quantize_floor(size_t size)
 {
 	size_t qsize;
 
@@ -119,13 +123,18 @@ run_quantize(size_t size)
 	 */
 	qsize = index2size(size2index(size - large_pad + 1) - 1) + large_pad;
 	if (qsize <= SMALL_MAXCLASS + large_pad)
-		return (run_quantize(size - large_pad));
+		return (run_quantize_floor(size - large_pad));
 	assert(qsize <= size);
 	return (qsize);
 }
+#ifdef JEMALLOC_JET
+#undef run_quantize_floor
+#define	run_quantize_floor JEMALLOC_N(run_quantize_floor)
+run_quantize_t *run_quantize_floor = JEMALLOC_N(run_quantize_floor_impl);
+#endif
 
 static size_t
-run_quantize_next(size_t size)
+run_quantize_ceil_hard(size_t size)
 {
 	size_t large_run_size_next;
 
@@ -158,10 +167,14 @@ run_quantize_next(size_t size)
 	}
 }
 
+#ifdef JEMALLOC_JET
+#undef run_quantize_ceil
+#define	run_quantize_ceil JEMALLOC_N(run_quantize_ceil_impl)
+#endif
 static size_t
-run_quantize_first(size_t size)
+run_quantize_ceil(size_t size)
 {
-	size_t qsize = run_quantize(size);
+	size_t qsize = run_quantize_floor(size);
 
 	if (qsize < size) {
 		/*
@@ -172,10 +185,15 @@ run_quantize_first(size_t size)
 		 * search would potentially find sufficiently aligned available
 		 * memory somewhere lower.
 		 */
-		qsize = run_quantize_next(size);
+		qsize = run_quantize_ceil_hard(size);
 	}
 	return (qsize);
 }
+#ifdef JEMALLOC_JET
+#undef run_quantize_ceil
+#define	run_quantize_ceil JEMALLOC_N(run_quantize_ceil)
+run_quantize_t *run_quantize_ceil = JEMALLOC_N(run_quantize_ceil_impl);
+#endif
 
 JEMALLOC_INLINE_C int
 arena_avail_comp(const arena_chunk_map_misc_t *a,
@@ -183,9 +201,9 @@ arena_avail_comp(const arena_chunk_map_misc_t *a,
 {
 	int ret;
 	uintptr_t a_miscelm = (uintptr_t)a;
-	size_t a_qsize = run_quantize(arena_miscelm_is_key(a) ?
+	size_t a_qsize = run_quantize_floor(arena_miscelm_is_key(a) ?
 	    arena_miscelm_key_size_get(a) : arena_miscelm_size_get(a));
-	size_t b_qsize = run_quantize(arena_miscelm_size_get(b));
+	size_t b_qsize = run_quantize_floor(arena_miscelm_size_get(b));
 
 	/*
 	 * Compare based on quantized size rather than size, in order to sort
@@ -1081,7 +1099,7 @@ arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, size_t oldsize,
 static arena_run_t *
 arena_run_first_best_fit(arena_t *arena, size_t size)
 {
-	size_t search_size = run_quantize_first(size);
+	size_t search_size = run_quantize_ceil(size);
 	arena_chunk_map_misc_t *key = arena_miscelm_key_create(search_size);
 	arena_chunk_map_misc_t *miscelm =
 	    arena_avail_tree_nsearch(&arena->runs_avail, key);
diff --git a/test/unit/run_quantize.c b/test/unit/run_quantize.c
new file mode 100644
index 00000000..aff4056b
--- /dev/null
+++ b/test/unit/run_quantize.c
@@ -0,0 +1,157 @@
+#include "test/jemalloc_test.h"
+
+TEST_BEGIN(test_small_run_size)
+{
+	unsigned nbins, i;
+	size_t sz, run_size;
+	size_t mib[4];
+	size_t miblen = sizeof(mib) / sizeof(size_t);
+
+	/*
+	 * Iterate over all small size classes, get their run sizes, and verify
+	 * that the quantized size is the same as the run size.
+	 */
+
+	sz = sizeof(unsigned);
+	assert_d_eq(mallctl("arenas.nbins", &nbins, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
+
+	assert_d_eq(mallctlnametomib("arenas.bin.0.run_size", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib failure");
+	for (i = 0; i < nbins; i++) {
+		mib[2] = i;
+		sz = sizeof(size_t);
+		assert_d_eq(mallctlbymib(mib, miblen, &run_size, &sz, NULL, 0),
+		    0, "Unexpected mallctlbymib failure");
+		assert_zu_eq(run_size, run_quantize_floor(run_size),
+		    "Small run quantization should be a no-op (run_size=%zu)",
+		    run_size);
+		assert_zu_eq(run_size, run_quantize_ceil(run_size),
+		    "Small run quantization should be a no-op (run_size=%zu)",
+		    run_size);
+	}
+}
+TEST_END
+
+TEST_BEGIN(test_large_run_size)
+{
+	bool cache_oblivious;
+	unsigned nlruns, i;
+	size_t sz, run_size_prev, ceil_prev;
+	size_t mib[4];
+	size_t miblen = sizeof(mib) / sizeof(size_t);
+
+	/*
+	 * Iterate over all large size classes, get their run sizes, and verify
+	 * that the quantized size is the same as the run size.
+	 */
+
+	sz = sizeof(bool);
+	assert_d_eq(mallctl("config.cache_oblivious", &cache_oblivious, &sz,
+	    NULL, 0), 0, "Unexpected mallctl failure");
+
+	sz = sizeof(unsigned);
+	assert_d_eq(mallctl("arenas.nlruns", &nlruns, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
+
+	assert_d_eq(mallctlnametomib("arenas.lrun.0.size", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib failure");
+	for (i = 0; i < nlruns; i++) {
+		size_t lrun_size, run_size, floor, ceil;
+
+		mib[2] = i;
+		sz = sizeof(size_t);
+		assert_d_eq(mallctlbymib(mib, miblen, &lrun_size, &sz, NULL, 0),
+		    0, "Unexpected mallctlbymib failure");
+		run_size = cache_oblivious ? lrun_size + PAGE : lrun_size;
+		floor = run_quantize_floor(run_size);
+		ceil = run_quantize_ceil(run_size);
+
+		assert_zu_eq(run_size, floor,
+		    "Large run quantization should be a no-op for precise "
+		    "size (lrun_size=%zu, run_size=%zu)", lrun_size, run_size);
+		assert_zu_eq(run_size, ceil,
+		    "Large run quantization should be a no-op for precise "
+		    "size (lrun_size=%zu, run_size=%zu)", lrun_size, run_size);
+
+		if (i > 0) {
+			assert_zu_eq(run_size_prev, run_quantize_floor(run_size
+			    - PAGE), "Floor should be a precise size");
+			if (run_size_prev < ceil_prev) {
+				assert_zu_eq(ceil_prev, run_size,
+				    "Ceiling should be a precise size "
+				    "(run_size_prev=%zu, ceil_prev=%zu, "
+				    "run_size=%zu)", run_size_prev, ceil_prev,
+				    run_size);
+			}
+		}
+		run_size_prev = floor;
+		ceil_prev = run_quantize_ceil(run_size + PAGE);
+	}
+}
+TEST_END
+
+TEST_BEGIN(test_monotonic)
+{
+	bool cache_oblivious;
+	unsigned nbins, nlruns, i;
+	size_t sz, max_run_size, floor_prev, ceil_prev;
+
+	/*
+	 * Iterate over all run sizes and verify that
+	 * run_quantize_{floor,ceil}() are monotonic.
+	 */
+
+	sz = sizeof(bool);
+	assert_d_eq(mallctl("config.cache_oblivious", &cache_oblivious, &sz,
+	    NULL, 0), 0, "Unexpected mallctl failure");
+
+	sz = sizeof(unsigned);
+	assert_d_eq(mallctl("arenas.nbins", &nbins, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
+
+	sz = sizeof(unsigned);
+	assert_d_eq(mallctl("arenas.nlruns", &nlruns, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
+
+	max_run_size = (large_maxclass > small_maxrun) ? large_maxclass :
+	    small_maxrun;
+
+	floor_prev = 0;
+	ceil_prev = 0;
+	for (i = 1; i < max_run_size >> LG_PAGE; i++) {
+		size_t run_size, floor, ceil;
+
+		run_size = i << LG_PAGE;
+		floor = run_quantize_floor(run_size);
+		ceil = run_quantize_ceil(run_size);
+
+		assert_zu_le(floor, run_size,
+		    "Floor should be <= (floor=%zu, run_size=%zu, ceil=%zu)",
+		    floor, run_size, ceil);
+		assert_zu_ge(ceil, run_size,
+		    "Ceiling should be >= (floor=%zu, run_size=%zu, ceil=%zu)",
+		    floor, run_size, ceil);
+
+		assert_zu_le(floor_prev, floor, "Floor should be monotonic "
+		    "(floor_prev=%zu, floor=%zu, run_size=%zu, ceil=%zu)",
+		    floor_prev, floor, run_size, ceil);
+		assert_zu_le(ceil_prev, ceil, "Ceiling should be monotonic "
+		    "(floor=%zu, run_size=%zu, ceil_prev=%zu, ceil=%zu)",
+		    floor, run_size, ceil_prev, ceil);
+
+		floor_prev = floor;
+		ceil_prev = ceil;
+	}
+}
+TEST_END
+
+int
+main(void)
+{
+
+	return (test(
+	    test_small_run_size,
+	    test_large_run_size,
+	    test_monotonic));
+}

From 08551eee586eefa8c98f33b97679f259af50afab Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Mon, 22 Feb 2016 15:01:37 -0800
Subject: [PATCH 0106/2608] Fix run_quantize_ceil().

In practice this bug had limited impact (and then only by increasing
chunk fragmentation) because run_quantize_ceil() returned correct
results except for inputs that could only arise from aligned allocation
requests that required more than page alignment.

This bug existed in the original run quantization implementation, which
was introduced by 8a03cf039cd06f9fa6972711195055d865673966 (Implement
cache index randomization for large allocations.).
---
 src/arena.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arena.c b/src/arena.c
index ff5b5fb8..97dea843 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -185,7 +185,7 @@ run_quantize_ceil(size_t size)
 		 * search would potentially find sufficiently aligned available
 		 * memory somewhere lower.
 		 */
-		qsize = run_quantize_ceil_hard(size);
+		qsize = run_quantize_ceil_hard(qsize);
 	}
 	return (qsize);
 }

From 0da8ce1e96bedff697f7133c8cfb328390b6d11d Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Mon, 22 Feb 2016 16:20:56 -0800
Subject: [PATCH 0107/2608] Use table lookup for run_quantize_{floor,ceil}().

Reduce run quantization overhead by generating lookup tables during
bootstrapping, and using the tables for all subsequent run quantization.
---
 include/jemalloc/internal/arena.h             |   2 +-
 include/jemalloc/internal/private_symbols.txt |   1 +
 src/arena.c                                   | 107 ++++++++++++++----
 test/unit/run_quantize.c                      |  12 +-
 4 files changed, 90 insertions(+), 32 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index f98aeb80..8dc6852d 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -494,7 +494,7 @@ extern size_t		map_bias; /* Number of arena chunk header pages. */
 extern size_t		map_misc_offset;
 extern size_t		arena_maxrun; /* Max run size for arenas. */
 extern size_t		large_maxclass; /* Max large size class. */
-extern size_t		small_maxrun; /* Max run size for small size classes. */
+extern size_t		run_quantize_max; /* Max run_quantize_*() input. */
 extern unsigned		nlclasses; /* Number of large size classes. */
 extern unsigned		nhclasses; /* Number of huge size classes. */
 
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 3e37a61a..761aa754 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -447,6 +447,7 @@ rtree_val_read
 rtree_val_write
 run_quantize_ceil
 run_quantize_floor
+run_quantize_max
 s2u
 s2u_compute
 s2u_lookup
diff --git a/src/arena.c b/src/arena.c
index 97dea843..c4149461 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -21,8 +21,11 @@ size_t		map_bias;
 size_t		map_misc_offset;
 size_t		arena_maxrun; /* Max run size for arenas. */
 size_t		large_maxclass; /* Max large size class. */
-size_t		small_maxrun; /* Max run size for small size classes. */
+size_t		run_quantize_max; /* Max run_quantize_*() input. */
+static size_t	small_maxrun; /* Max run size for small size classes. */
 static bool	*small_run_tab; /* Valid small run page multiples. */
+static size_t	*run_quantize_floor_tab; /* run_quantize_floor() memoization. */
+static size_t	*run_quantize_ceil_tab; /* run_quantize_ceil() memoization. */
 unsigned	nlclasses; /* Number of large size classes. */
 unsigned	nhclasses; /* Number of huge size classes. */
 
@@ -100,12 +103,8 @@ arena_run_comp(const arena_chunk_map_misc_t *a, const arena_chunk_map_misc_t *b)
 rb_gen(static UNUSED, arena_run_tree_, arena_run_tree_t, arena_chunk_map_misc_t,
     rb_link, arena_run_comp)
 
-#ifdef JEMALLOC_JET
-#undef run_quantize_floor
-#define	run_quantize_floor JEMALLOC_N(run_quantize_floor_impl)
-#endif
 static size_t
-run_quantize_floor(size_t size)
+run_quantize_floor_compute(size_t size)
 {
 	size_t qsize;
 
@@ -123,18 +122,13 @@ run_quantize_floor(size_t size)
 	 */
 	qsize = index2size(size2index(size - large_pad + 1) - 1) + large_pad;
 	if (qsize <= SMALL_MAXCLASS + large_pad)
-		return (run_quantize_floor(size - large_pad));
+		return (run_quantize_floor_compute(size - large_pad));
 	assert(qsize <= size);
 	return (qsize);
 }
-#ifdef JEMALLOC_JET
-#undef run_quantize_floor
-#define	run_quantize_floor JEMALLOC_N(run_quantize_floor)
-run_quantize_t *run_quantize_floor = JEMALLOC_N(run_quantize_floor_impl);
-#endif
 
 static size_t
-run_quantize_ceil_hard(size_t size)
+run_quantize_ceil_compute_hard(size_t size)
 {
 	size_t large_run_size_next;
 
@@ -167,14 +161,10 @@ run_quantize_ceil_hard(size_t size)
 	}
 }
 
-#ifdef JEMALLOC_JET
-#undef run_quantize_ceil
-#define	run_quantize_ceil JEMALLOC_N(run_quantize_ceil_impl)
-#endif
 static size_t
-run_quantize_ceil(size_t size)
+run_quantize_ceil_compute(size_t size)
 {
-	size_t qsize = run_quantize_floor(size);
+	size_t qsize = run_quantize_floor_compute(size);
 
 	if (qsize < size) {
 		/*
@@ -185,10 +175,51 @@ run_quantize_ceil(size_t size)
 		 * search would potentially find sufficiently aligned available
 		 * memory somewhere lower.
 		 */
-		qsize = run_quantize_ceil_hard(qsize);
+		qsize = run_quantize_ceil_compute_hard(qsize);
 	}
 	return (qsize);
 }
+
+#ifdef JEMALLOC_JET
+#undef run_quantize_floor
+#define	run_quantize_floor JEMALLOC_N(run_quantize_floor_impl)
+#endif
+static size_t
+run_quantize_floor(size_t size)
+{
+	size_t ret;
+
+	assert(size > 0);
+	assert(size <= run_quantize_max);
+	assert((size & PAGE_MASK) == 0);
+
+	ret = run_quantize_floor_tab[(size >> LG_PAGE) - 1];
+	assert(ret == run_quantize_floor_compute(size));
+	return (ret);
+}
+#ifdef JEMALLOC_JET
+#undef run_quantize_floor
+#define	run_quantize_floor JEMALLOC_N(run_quantize_floor)
+run_quantize_t *run_quantize_floor = JEMALLOC_N(run_quantize_floor_impl);
+#endif
+
+#ifdef JEMALLOC_JET
+#undef run_quantize_ceil
+#define	run_quantize_ceil JEMALLOC_N(run_quantize_ceil_impl)
+#endif
+static size_t
+run_quantize_ceil(size_t size)
+{
+	size_t ret;
+
+	assert(size > 0);
+	assert(size <= run_quantize_max);
+	assert((size & PAGE_MASK) == 0);
+
+	ret = run_quantize_ceil_tab[(size >> LG_PAGE) - 1];
+	assert(ret == run_quantize_ceil_compute(size));
+	return (ret);
+}
 #ifdef JEMALLOC_JET
 #undef run_quantize_ceil
 #define	run_quantize_ceil JEMALLOC_N(run_quantize_ceil)
@@ -3522,6 +3553,35 @@ small_run_size_init(void)
 	return (false);
 }
 
+static bool
+run_quantize_init(void)
+{
+	unsigned i;
+
+	run_quantize_max = chunksize + large_pad;
+
+	run_quantize_floor_tab = (size_t *)base_alloc(sizeof(size_t) *
+	    (run_quantize_max >> LG_PAGE));
+	if (run_quantize_floor_tab == NULL)
+		return (true);
+
+	run_quantize_ceil_tab = (size_t *)base_alloc(sizeof(size_t) *
+	    (run_quantize_max >> LG_PAGE));
+	if (run_quantize_ceil_tab == NULL)
+		return (true);
+
+	for (i = 1; i <= run_quantize_max >> LG_PAGE; i++) {
+		size_t run_size = i << LG_PAGE;
+
+		run_quantize_floor_tab[i-1] =
+		    run_quantize_floor_compute(run_size);
+		run_quantize_ceil_tab[i-1] =
+		    run_quantize_ceil_compute(run_size);
+	}
+
+	return (false);
+}
+
 bool
 arena_boot(void)
 {
@@ -3570,7 +3630,12 @@ arena_boot(void)
 	nhclasses = NSIZES - nlclasses - NBINS;
 
 	bin_info_init();
-	return (small_run_size_init());
+	if (small_run_size_init())
+		return (true);
+	if (run_quantize_init())
+		return (true);
+
+	return (false);
 }
 
 void
diff --git a/test/unit/run_quantize.c b/test/unit/run_quantize.c
index aff4056b..f6a2f74f 100644
--- a/test/unit/run_quantize.c
+++ b/test/unit/run_quantize.c
@@ -93,19 +93,14 @@ TEST_END
 
 TEST_BEGIN(test_monotonic)
 {
-	bool cache_oblivious;
 	unsigned nbins, nlruns, i;
-	size_t sz, max_run_size, floor_prev, ceil_prev;
+	size_t sz, floor_prev, ceil_prev;
 
 	/*
 	 * Iterate over all run sizes and verify that
 	 * run_quantize_{floor,ceil}() are monotonic.
 	 */
 
-	sz = sizeof(bool);
-	assert_d_eq(mallctl("config.cache_oblivious", &cache_oblivious, &sz,
-	    NULL, 0), 0, "Unexpected mallctl failure");
-
 	sz = sizeof(unsigned);
 	assert_d_eq(mallctl("arenas.nbins", &nbins, &sz, NULL, 0), 0,
 	    "Unexpected mallctl failure");
@@ -114,12 +109,9 @@ TEST_BEGIN(test_monotonic)
 	assert_d_eq(mallctl("arenas.nlruns", &nlruns, &sz, NULL, 0), 0,
 	    "Unexpected mallctl failure");
 
-	max_run_size = (large_maxclass > small_maxrun) ? large_maxclass :
-	    small_maxrun;
-
 	floor_prev = 0;
 	ceil_prev = 0;
-	for (i = 1; i < max_run_size >> LG_PAGE; i++) {
+	for (i = 1; i < run_quantize_max >> LG_PAGE; i++) {
 		size_t run_size, floor, ceil;
 
 		run_size = i << LG_PAGE;

From 2b1fc90b7b109c5efac7974b8f9abe269ecb6daf Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Wed, 17 Feb 2016 06:56:14 -0800
Subject: [PATCH 0108/2608] Remove rbt_nil

Since this is an intrusive tree, rbt_nil is the whole size of the node
and can be quite large.  For example, miscelm is ~100 bytes.
---
 include/jemalloc/internal/rb.h | 154 ++++++++++++++-------------------
 test/unit/rb.c                 |  41 +++++----
 2 files changed, 86 insertions(+), 109 deletions(-)

diff --git a/include/jemalloc/internal/rb.h b/include/jemalloc/internal/rb.h
index 30ccab44..3770342f 100644
--- a/include/jemalloc/internal/rb.h
+++ b/include/jemalloc/internal/rb.h
@@ -42,7 +42,6 @@ struct {								\
 #define	rb_tree(a_type)							\
 struct {								\
     a_type *rbt_root;							\
-    a_type rbt_nil;							\
 }
 
 /* Left accessors. */
@@ -84,8 +83,8 @@ struct {								\
 #define	rbt_node_new(a_type, a_field, a_rbt, a_node) do {		\
     /* Bookkeeping bit cannot be used by node pointer. */		\
     assert(((uintptr_t)(a_node) & 0x1) == 0);				\
-    rbtn_left_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil);	\
-    rbtn_right_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil);	\
+    rbtn_left_set(a_type, a_field, (a_node), NULL);	\
+    rbtn_right_set(a_type, a_field, (a_node), NULL);	\
     rbtn_red_set(a_type, a_field, (a_node));				\
 } while (0)
 #else
@@ -111,25 +110,23 @@ struct {								\
 
 /* Node initializer. */
 #define	rbt_node_new(a_type, a_field, a_rbt, a_node) do {		\
-    rbtn_left_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil);	\
-    rbtn_right_set(a_type, a_field, (a_node), &(a_rbt)->rbt_nil);	\
+    rbtn_left_set(a_type, a_field, (a_node), NULL);	\
+    rbtn_right_set(a_type, a_field, (a_node), NULL);	\
     rbtn_red_set(a_type, a_field, (a_node));				\
 } while (0)
 #endif
 
 /* Tree initializer. */
 #define	rb_new(a_type, a_field, a_rbt) do {				\
-    (a_rbt)->rbt_root = &(a_rbt)->rbt_nil;				\
-    rbt_node_new(a_type, a_field, a_rbt, &(a_rbt)->rbt_nil);		\
-    rbtn_black_set(a_type, a_field, &(a_rbt)->rbt_nil);			\
+    (a_rbt)->rbt_root = NULL;						\
 } while (0)
 
 /* Internal utility macros. */
 #define	rbtn_first(a_type, a_field, a_rbt, a_root, r_node) do {		\
     (r_node) = (a_root);						\
-    if ((r_node) != &(a_rbt)->rbt_nil) {				\
+    if ((r_node) != NULL) {						\
 	for (;								\
-	  rbtn_left_get(a_type, a_field, (r_node)) != &(a_rbt)->rbt_nil;\
+	  rbtn_left_get(a_type, a_field, (r_node)) != NULL;		\
 	  (r_node) = rbtn_left_get(a_type, a_field, (r_node))) {	\
 	}								\
     }									\
@@ -137,10 +134,9 @@ struct {								\
 
 #define	rbtn_last(a_type, a_field, a_rbt, a_root, r_node) do {		\
     (r_node) = (a_root);						\
-    if ((r_node) != &(a_rbt)->rbt_nil) {				\
-	for (; rbtn_right_get(a_type, a_field, (r_node)) !=		\
-	  &(a_rbt)->rbt_nil; (r_node) = rbtn_right_get(a_type, a_field,	\
-	  (r_node))) {							\
+    if ((r_node) != NULL) {						\
+	for (; rbtn_right_get(a_type, a_field, (r_node)) != NULL;	\
+	  (r_node) = rbtn_right_get(a_type, a_field, (r_node))) {	\
 	}								\
     }									\
 } while (0)
@@ -335,8 +331,8 @@ a_prefix##destroy(a_rbt_type *rbtree, void (*cb)(a_type *, void *),	\
  *       Args:
  *         tree: Pointer to an initialized red-black tree object.
  *         cb  : Callback function, which, if non-null, is called for each node
- *               during iteration.  There is no way to stop iteration once it has
- *               begun.
+ *               during iteration.  There is no way to stop iteration once it
+ *               has begun.
  *         arg : Opaque pointer passed to cb().
  */
 #define	rb_gen(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp)	\
@@ -346,36 +342,30 @@ a_prefix##new(a_rbt_type *rbtree) {					\
 }									\
 a_attr bool								\
 a_prefix##empty(a_rbt_type *rbtree) {					\
-    return (rbtree->rbt_root == &rbtree->rbt_nil);			\
+    return (rbtree->rbt_root == NULL);					\
 }									\
 a_attr a_type *								\
 a_prefix##first(a_rbt_type *rbtree) {					\
     a_type *ret;							\
     rbtn_first(a_type, a_field, rbtree, rbtree->rbt_root, ret);		\
-    if (ret == &rbtree->rbt_nil) {					\
-	ret = NULL;							\
-    }									\
     return (ret);							\
 }									\
 a_attr a_type *								\
 a_prefix##last(a_rbt_type *rbtree) {					\
     a_type *ret;							\
     rbtn_last(a_type, a_field, rbtree, rbtree->rbt_root, ret);		\
-    if (ret == &rbtree->rbt_nil) {					\
-	ret = NULL;							\
-    }									\
     return (ret);							\
 }									\
 a_attr a_type *								\
 a_prefix##next(a_rbt_type *rbtree, a_type *node) {			\
     a_type *ret;							\
-    if (rbtn_right_get(a_type, a_field, node) != &rbtree->rbt_nil) {	\
+    if (rbtn_right_get(a_type, a_field, node) != NULL) {		\
 	rbtn_first(a_type, a_field, rbtree, rbtn_right_get(a_type,	\
 	  a_field, node), ret);						\
     } else {								\
 	a_type *tnode = rbtree->rbt_root;				\
-	assert(tnode != &rbtree->rbt_nil);				\
-	ret = &rbtree->rbt_nil;						\
+	assert(tnode != NULL);						\
+	ret = NULL;							\
 	while (true) {							\
 	    int cmp = (a_cmp)(node, tnode);				\
 	    if (cmp < 0) {						\
@@ -386,24 +376,21 @@ a_prefix##next(a_rbt_type *rbtree, a_type *node) {			\
 	    } else {							\
 		break;							\
 	    }								\
-	    assert(tnode != &rbtree->rbt_nil);				\
+	    assert(tnode != NULL);					\
 	}								\
     }									\
-    if (ret == &rbtree->rbt_nil) {					\
-	ret = (NULL);							\
-    }									\
     return (ret);							\
 }									\
 a_attr a_type *								\
 a_prefix##prev(a_rbt_type *rbtree, a_type *node) {			\
     a_type *ret;							\
-    if (rbtn_left_get(a_type, a_field, node) != &rbtree->rbt_nil) {	\
+    if (rbtn_left_get(a_type, a_field, node) != NULL) {			\
 	rbtn_last(a_type, a_field, rbtree, rbtn_left_get(a_type,	\
 	  a_field, node), ret);						\
     } else {								\
 	a_type *tnode = rbtree->rbt_root;				\
-	assert(tnode != &rbtree->rbt_nil);				\
-	ret = &rbtree->rbt_nil;						\
+	assert(tnode != NULL);						\
+	ret = NULL;							\
 	while (true) {							\
 	    int cmp = (a_cmp)(node, tnode);				\
 	    if (cmp < 0) {						\
@@ -414,12 +401,9 @@ a_prefix##prev(a_rbt_type *rbtree, a_type *node) {			\
 	    } else {							\
 		break;							\
 	    }								\
-	    assert(tnode != &rbtree->rbt_nil);				\
+	    assert(tnode != NULL);					\
 	}								\
     }									\
-    if (ret == &rbtree->rbt_nil) {					\
-	ret = (NULL);							\
-    }									\
     return (ret);							\
 }									\
 a_attr a_type *								\
@@ -427,7 +411,7 @@ a_prefix##search(a_rbt_type *rbtree, const a_type *key) {		\
     a_type *ret;							\
     int cmp;								\
     ret = rbtree->rbt_root;						\
-    while (ret != &rbtree->rbt_nil					\
+    while (ret != NULL							\
       && (cmp = (a_cmp)(key, ret)) != 0) {				\
 	if (cmp < 0) {							\
 	    ret = rbtn_left_get(a_type, a_field, ret);			\
@@ -435,17 +419,14 @@ a_prefix##search(a_rbt_type *rbtree, const a_type *key) {		\
 	    ret = rbtn_right_get(a_type, a_field, ret);			\
 	}								\
     }									\
-    if (ret == &rbtree->rbt_nil) {					\
-	ret = (NULL);							\
-    }									\
     return (ret);							\
 }									\
 a_attr a_type *								\
 a_prefix##nsearch(a_rbt_type *rbtree, const a_type *key) {		\
     a_type *ret;							\
     a_type *tnode = rbtree->rbt_root;					\
-    ret = &rbtree->rbt_nil;						\
-    while (tnode != &rbtree->rbt_nil) {					\
+    ret = NULL;								\
+    while (tnode != NULL) {						\
 	int cmp = (a_cmp)(key, tnode);					\
 	if (cmp < 0) {							\
 	    ret = tnode;						\
@@ -457,17 +438,14 @@ a_prefix##nsearch(a_rbt_type *rbtree, const a_type *key) {		\
 	    break;							\
 	}								\
     }									\
-    if (ret == &rbtree->rbt_nil) {					\
-	ret = (NULL);							\
-    }									\
     return (ret);							\
 }									\
 a_attr a_type *								\
 a_prefix##psearch(a_rbt_type *rbtree, const a_type *key) {		\
     a_type *ret;							\
     a_type *tnode = rbtree->rbt_root;					\
-    ret = &rbtree->rbt_nil;						\
-    while (tnode != &rbtree->rbt_nil) {					\
+    ret = NULL;								\
+    while (tnode != NULL) {						\
 	int cmp = (a_cmp)(key, tnode);					\
 	if (cmp < 0) {							\
 	    tnode = rbtn_left_get(a_type, a_field, tnode);		\
@@ -479,9 +457,6 @@ a_prefix##psearch(a_rbt_type *rbtree, const a_type *key) {		\
 	    break;							\
 	}								\
     }									\
-    if (ret == &rbtree->rbt_nil) {					\
-	ret = (NULL);							\
-    }									\
     return (ret);							\
 }									\
 a_attr void								\
@@ -493,7 +468,7 @@ a_prefix##insert(a_rbt_type *rbtree, a_type *node) {			\
     rbt_node_new(a_type, a_field, rbtree, node);			\
     /* Wind. */								\
     path->node = rbtree->rbt_root;					\
-    for (pathp = path; pathp->node != &rbtree->rbt_nil; pathp++) {	\
+    for (pathp = path; pathp->node != NULL; pathp++) {			\
 	int cmp = pathp->cmp = a_cmp(node, pathp->node);		\
 	assert(cmp != 0);						\
 	if (cmp < 0) {							\
@@ -513,7 +488,8 @@ a_prefix##insert(a_rbt_type *rbtree, a_type *node) {			\
 	    rbtn_left_set(a_type, a_field, cnode, left);		\
 	    if (rbtn_red_get(a_type, a_field, left)) {			\
 		a_type *leftleft = rbtn_left_get(a_type, a_field, left);\
-		if (rbtn_red_get(a_type, a_field, leftleft)) {		\
+		if (leftleft != NULL && rbtn_red_get(a_type, a_field,	\
+		  leftleft)) {						\
 		    /* Fix up 4-node. */				\
 		    a_type *tnode;					\
 		    rbtn_black_set(a_type, a_field, leftleft);		\
@@ -528,7 +504,8 @@ a_prefix##insert(a_rbt_type *rbtree, a_type *node) {			\
 	    rbtn_right_set(a_type, a_field, cnode, right);		\
 	    if (rbtn_red_get(a_type, a_field, right)) {			\
 		a_type *left = rbtn_left_get(a_type, a_field, cnode);	\
-		if (rbtn_red_get(a_type, a_field, left)) {		\
+		if (left != NULL && rbtn_red_get(a_type, a_field,	\
+		  left)) {						\
 		    /* Split 4-node. */					\
 		    rbtn_black_set(a_type, a_field, left);		\
 		    rbtn_black_set(a_type, a_field, right);		\
@@ -561,7 +538,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
     /* Wind. */								\
     nodep = NULL; /* Silence compiler warning. */			\
     path->node = rbtree->rbt_root;					\
-    for (pathp = path; pathp->node != &rbtree->rbt_nil; pathp++) {	\
+    for (pathp = path; pathp->node != NULL; pathp++) {			\
 	int cmp = pathp->cmp = a_cmp(node, pathp->node);		\
 	if (cmp < 0) {							\
 	    pathp[1].node = rbtn_left_get(a_type, a_field,		\
@@ -573,7 +550,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 	        /* Find node's successor, in preparation for swap. */	\
 		pathp->cmp = 1;						\
 		nodep = pathp;						\
-		for (pathp++; pathp->node != &rbtree->rbt_nil;		\
+		for (pathp++; pathp->node != NULL;			\
 		  pathp++) {						\
 		    pathp->cmp = -1;					\
 		    pathp[1].node = rbtn_left_get(a_type, a_field,	\
@@ -616,7 +593,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 	}								\
     } else {								\
 	a_type *left = rbtn_left_get(a_type, a_field, node);		\
-	if (left != &rbtree->rbt_nil) {					\
+	if (left != NULL) {						\
 	    /* node has no successor, but it has a left child.        */\
 	    /* Splice node out, without losing the left child.        */\
 	    assert(!rbtn_red_get(a_type, a_field, node));		\
@@ -636,33 +613,32 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 	    return;							\
 	} else if (pathp == path) {					\
 	    /* The tree only contained one node. */			\
-	    rbtree->rbt_root = &rbtree->rbt_nil;			\
+	    rbtree->rbt_root = NULL;					\
 	    return;							\
 	}								\
     }									\
     if (rbtn_red_get(a_type, a_field, pathp->node)) {			\
 	/* Prune red node, which requires no fixup. */			\
 	assert(pathp[-1].cmp < 0);					\
-	rbtn_left_set(a_type, a_field, pathp[-1].node,			\
-	  &rbtree->rbt_nil);						\
+	rbtn_left_set(a_type, a_field, pathp[-1].node, NULL);		\
 	return;								\
     }									\
     /* The node to be pruned is black, so unwind until balance is     */\
     /* restored.                                                      */\
-    pathp->node = &rbtree->rbt_nil;					\
+    pathp->node = NULL;							\
     for (pathp--; (uintptr_t)pathp >= (uintptr_t)path; pathp--) {	\
 	assert(pathp->cmp != 0);					\
 	if (pathp->cmp < 0) {						\
 	    rbtn_left_set(a_type, a_field, pathp->node,			\
 	      pathp[1].node);						\
-	    assert(!rbtn_red_get(a_type, a_field, pathp[1].node));	\
 	    if (rbtn_red_get(a_type, a_field, pathp->node)) {		\
 		a_type *right = rbtn_right_get(a_type, a_field,		\
 		  pathp->node);						\
 		a_type *rightleft = rbtn_left_get(a_type, a_field,	\
 		  right);						\
 		a_type *tnode;						\
-		if (rbtn_red_get(a_type, a_field, rightleft)) {		\
+		if (rightleft != NULL && rbtn_red_get(a_type, a_field,	\
+		  rightleft)) {						\
 		    /* In the following diagrams, ||, //, and \\      */\
 		    /* indicate the path to the removed node.         */\
 		    /*                                                */\
@@ -705,7 +681,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 		  pathp->node);						\
 		a_type *rightleft = rbtn_left_get(a_type, a_field,	\
 		  right);						\
-		if (rbtn_red_get(a_type, a_field, rightleft)) {		\
+		if (rightleft != NULL && rbtn_red_get(a_type, a_field,	\
+		  rightleft)) {						\
 		    /*      ||                                        */\
 		    /*    pathp(b)                                    */\
 		    /*  //        \                                   */\
@@ -759,7 +736,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 		  left);						\
 		a_type *leftrightleft = rbtn_left_get(a_type, a_field,	\
 		  leftright);						\
-		if (rbtn_red_get(a_type, a_field, leftrightleft)) {	\
+		if (leftrightleft != NULL && rbtn_red_get(a_type,	\
+		  a_field, leftrightleft)) {				\
 		    /*      ||                                        */\
 		    /*    pathp(b)                                    */\
 		    /*   /        \\                                  */\
@@ -785,7 +763,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 		    /*   (b)                                          */\
 		    /*   /                                            */\
 		    /* (b)                                            */\
-		    assert(leftright != &rbtree->rbt_nil);		\
+		    assert(leftright != NULL);				\
 		    rbtn_red_set(a_type, a_field, leftright);		\
 		    rbtn_rotate_right(a_type, a_field, pathp->node,	\
 		      tnode);						\
@@ -808,7 +786,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 		return;							\
 	    } else if (rbtn_red_get(a_type, a_field, pathp->node)) {	\
 		a_type *leftleft = rbtn_left_get(a_type, a_field, left);\
-		if (rbtn_red_get(a_type, a_field, leftleft)) {		\
+		if (leftleft != NULL && rbtn_red_get(a_type, a_field,	\
+		  leftleft)) {						\
 		    /*        ||                                      */\
 		    /*      pathp(r)                                  */\
 		    /*     /        \\                                */\
@@ -846,7 +825,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 		}							\
 	    } else {							\
 		a_type *leftleft = rbtn_left_get(a_type, a_field, left);\
-		if (rbtn_red_get(a_type, a_field, leftleft)) {		\
+		if (leftleft != NULL && rbtn_red_get(a_type, a_field,	\
+		  leftleft)) {						\
 		    /*               ||                               */\
 		    /*             pathp(b)                           */\
 		    /*            /        \\                         */\
@@ -892,13 +872,13 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 a_attr a_type *								\
 a_prefix##iter_recurse(a_rbt_type *rbtree, a_type *node,		\
   a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) {		\
-    if (node == &rbtree->rbt_nil) {					\
-	return (&rbtree->rbt_nil);					\
+    if (node == NULL) {							\
+	return (NULL);							\
     } else {								\
 	a_type *ret;							\
 	if ((ret = a_prefix##iter_recurse(rbtree, rbtn_left_get(a_type,	\
-	  a_field, node), cb, arg)) != &rbtree->rbt_nil			\
-	  || (ret = cb(rbtree, node, arg)) != NULL) {			\
+	  a_field, node), cb, arg)) != NULL || (ret = cb(rbtree, node,	\
+	  arg)) != NULL) {						\
 	    return (ret);						\
 	}								\
 	return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type,	\
@@ -912,8 +892,8 @@ a_prefix##iter_start(a_rbt_type *rbtree, a_type *start, a_type *node,	\
     if (cmp < 0) {							\
 	a_type *ret;							\
 	if ((ret = a_prefix##iter_start(rbtree, start,			\
-	  rbtn_left_get(a_type, a_field, node), cb, arg)) !=		\
-	  &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) {	\
+	  rbtn_left_get(a_type, a_field, node), cb, arg)) != NULL ||	\
+	  (ret = cb(rbtree, node, arg)) != NULL) {			\
 	    return (ret);						\
 	}								\
 	return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type,	\
@@ -940,21 +920,18 @@ a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)(	\
     } else {								\
 	ret = a_prefix##iter_recurse(rbtree, rbtree->rbt_root, cb, arg);\
     }									\
-    if (ret == &rbtree->rbt_nil) {					\
-	ret = NULL;							\
-    }									\
     return (ret);							\
 }									\
 a_attr a_type *								\
 a_prefix##reverse_iter_recurse(a_rbt_type *rbtree, a_type *node,	\
   a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) {		\
-    if (node == &rbtree->rbt_nil) {					\
-	return (&rbtree->rbt_nil);					\
+    if (node == NULL) {							\
+	return (NULL);							\
     } else {								\
 	a_type *ret;							\
 	if ((ret = a_prefix##reverse_iter_recurse(rbtree,		\
-	  rbtn_right_get(a_type, a_field, node), cb, arg)) !=		\
-	  &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) {	\
+	  rbtn_right_get(a_type, a_field, node), cb, arg)) != NULL ||	\
+	  (ret = cb(rbtree, node, arg)) != NULL) {			\
 	    return (ret);						\
 	}								\
 	return (a_prefix##reverse_iter_recurse(rbtree,			\
@@ -969,8 +946,8 @@ a_prefix##reverse_iter_start(a_rbt_type *rbtree, a_type *start,		\
     if (cmp > 0) {							\
 	a_type *ret;							\
 	if ((ret = a_prefix##reverse_iter_start(rbtree, start,		\
-	  rbtn_right_get(a_type, a_field, node), cb, arg)) !=		\
-	  &rbtree->rbt_nil || (ret = cb(rbtree, node, arg)) != NULL) {	\
+	  rbtn_right_get(a_type, a_field, node), cb, arg)) != NULL ||	\
+	  (ret = cb(rbtree, node, arg)) != NULL) {			\
 	    return (ret);						\
 	}								\
 	return (a_prefix##reverse_iter_recurse(rbtree,			\
@@ -998,23 +975,20 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start,		\
 	ret = a_prefix##reverse_iter_recurse(rbtree, rbtree->rbt_root,	\
 	  cb, arg);							\
     }									\
-    if (ret == &rbtree->rbt_nil) {					\
-	ret = NULL;							\
-    }									\
     return (ret);							\
 }									\
 a_attr void								\
 a_prefix##destroy_recurse(a_rbt_type *rbtree, a_type *node, void (*cb)(	\
   a_type *, void *), void *arg) {					\
-    if (node == &rbtree->rbt_nil) {					\
+    if (node == NULL) {							\
 	return;								\
     }									\
     a_prefix##destroy_recurse(rbtree, rbtn_left_get(a_type, a_field,	\
       node), cb, arg);							\
-    rbtn_left_set(a_type, a_field, (node), &rbtree->rbt_nil);		\
+    rbtn_left_set(a_type, a_field, (node), NULL);			\
     a_prefix##destroy_recurse(rbtree, rbtn_right_get(a_type, a_field,	\
       node), cb, arg);							\
-    rbtn_right_set(a_type, a_field, (node), &rbtree->rbt_nil);		\
+    rbtn_right_set(a_type, a_field, (node), NULL);			\
     if (cb) {								\
 	cb(node, arg);							\
     }									\
@@ -1023,7 +997,7 @@ a_attr void								\
 a_prefix##destroy(a_rbt_type *rbtree, void (*cb)(a_type *, void *),	\
   void *arg) {								\
     a_prefix##destroy_recurse(rbtree, rbtree->rbt_root, cb, arg);	\
-    rbtree->rbt_root = &rbtree->rbt_nil;				\
+    rbtree->rbt_root = NULL;						\
 }
 
 #endif /* RB_H_ */
diff --git a/test/unit/rb.c b/test/unit/rb.c
index 14132c13..cf3d3a78 100644
--- a/test/unit/rb.c
+++ b/test/unit/rb.c
@@ -3,7 +3,7 @@
 #define	rbtn_black_height(a_type, a_field, a_rbt, r_height) do {	\
     a_type *rbp_bh_t;							\
     for (rbp_bh_t = (a_rbt)->rbt_root, (r_height) = 0;			\
-      rbp_bh_t != &(a_rbt)->rbt_nil;					\
+	 rbp_bh_t != NULL;						\
       rbp_bh_t = rbtn_left_get(a_type, a_field, rbp_bh_t)) {		\
 	if (!rbtn_red_get(a_type, a_field, rbp_bh_t)) {			\
 	    (r_height)++;						\
@@ -68,38 +68,43 @@ TEST_BEGIN(test_rb_empty)
 TEST_END
 
 static unsigned
-tree_recurse(node_t *node, unsigned black_height, unsigned black_depth,
-    node_t *nil)
+tree_recurse(node_t *node, unsigned black_height, unsigned black_depth)
 {
 	unsigned ret = 0;
-	node_t *left_node = rbtn_left_get(node_t, link, node);
-	node_t *right_node = rbtn_right_get(node_t, link, node);
+	node_t *left_node;
+	node_t *right_node;
+
+	if (node == NULL)
+		return (ret);
+
+	left_node = rbtn_left_get(node_t, link, node);
+	right_node = rbtn_right_get(node_t, link, node);
 
 	if (!rbtn_red_get(node_t, link, node))
 		black_depth++;
 
 	/* Red nodes must be interleaved with black nodes. */
 	if (rbtn_red_get(node_t, link, node)) {
-		assert_false(rbtn_red_get(node_t, link, left_node),
-		    "Node should be black");
-		assert_false(rbtn_red_get(node_t, link, right_node),
-		    "Node should be black");
+		if (left_node != NULL)
+			assert_false(rbtn_red_get(node_t, link, left_node),
+				"Node should be black");
+		if (right_node != NULL)
+			assert_false(rbtn_red_get(node_t, link, right_node),
+			    "Node should be black");
 	}
 
-	if (node == nil)
-		return (ret);
 	/* Self. */
 	assert_u32_eq(node->magic, NODE_MAGIC, "Bad magic");
 
 	/* Left subtree. */
-	if (left_node != nil)
-		ret += tree_recurse(left_node, black_height, black_depth, nil);
+	if (left_node != NULL)
+		ret += tree_recurse(left_node, black_height, black_depth);
 	else
 		ret += (black_depth != black_height);
 
 	/* Right subtree. */
-	if (right_node != nil)
-		ret += tree_recurse(right_node, black_height, black_depth, nil);
+	if (right_node != NULL)
+		ret += tree_recurse(right_node, black_height, black_depth);
 	else
 		ret += (black_depth != black_height);
 
@@ -181,8 +186,7 @@ node_remove(tree_t *tree, node_t *node, unsigned nnodes)
 	node->magic = 0;
 
 	rbtn_black_height(node_t, link, tree, black_height);
-	imbalances = tree_recurse(tree->rbt_root, black_height, 0,
-	    &(tree->rbt_nil));
+	imbalances = tree_recurse(tree->rbt_root, black_height, 0);
 	assert_u_eq(imbalances, 0, "Tree is unbalanced");
 	assert_u_eq(tree_iterate(tree), nnodes-1,
 	    "Unexpected node iteration count");
@@ -253,7 +257,6 @@ TEST_BEGIN(test_rb_random)
 		for (j = 1; j <= NNODES; j++) {
 			/* Initialize tree and nodes. */
 			tree_new(&tree);
-			tree.rbt_nil.magic = 0;
 			for (k = 0; k < j; k++) {
 				nodes[k].magic = NODE_MAGIC;
 				nodes[k].key = bag[k];
@@ -266,7 +269,7 @@ TEST_BEGIN(test_rb_random)
 				rbtn_black_height(node_t, link, &tree,
 				    black_height);
 				imbalances = tree_recurse(tree.rbt_root,
-				    black_height, 0, &(tree.rbt_nil));
+				    black_height, 0);
 				assert_u_eq(imbalances, 0,
 				    "Tree is unbalanced");
 

From 3417a304ccde61ac1f68b436ec22c03f1d6824ec Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Tue, 23 Feb 2016 12:06:21 -0800
Subject: [PATCH 0109/2608] Separate arena_avail trees

Separate run trees by index, replacing the previous quantize logic.
Quantization by index is now performed only on insertion / removal from
the tree, and not on node comparison, saving some cpu.  This also means
we don't have to dereference the miscelm* pointers, saving half of the
memory loads from miscelms/mapbits that have fallen out of cache.  A
linear scan of the indicies appears to be fast enough.

The only cost of this is an extra tree array in each arena.
---
 include/jemalloc/internal/arena.h |  12 +--
 src/arena.c                       | 142 +++++++++++-------------------
 2 files changed, 58 insertions(+), 96 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 8dc6852d..2548082b 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -351,12 +351,6 @@ struct arena_s {
 	 */
 	size_t			ndirty;
 
-	/*
-	 * Size/address-ordered tree of this arena's available runs.  The tree
-	 * is used for first-best-fit run allocation.
-	 */
-	arena_avail_tree_t	runs_avail;
-
 	/*
 	 * Unused dirty memory this arena manages.  Dirty memory is conceptually
 	 * tracked as an arbitrarily interleaved LRU of dirty runs and cached
@@ -462,6 +456,12 @@ struct arena_s {
 
 	/* bins is used to store trees of free regions. */
 	arena_bin_t		bins[NBINS];
+
+	/*
+	 * Quantized address-ordered trees of this arena's available runs.  The
+	 * trees are used for first-best-fit run allocation.
+	 */
+	arena_avail_tree_t	runs_avail[1]; /* Dynamically sized. */
 };
 
 /* Used in conjunction with tsd for fast arena-related context lookup. */
diff --git a/src/arena.c b/src/arena.c
index c4149461..06422727 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -28,6 +28,8 @@ static size_t	*run_quantize_floor_tab; /* run_quantize_floor() memoization. */
 static size_t	*run_quantize_ceil_tab; /* run_quantize_ceil() memoization. */
 unsigned	nlclasses; /* Number of large size classes. */
 unsigned	nhclasses; /* Number of huge size classes. */
+static szind_t	runs_avail_bias; /* Size index for first runs_avail tree. */
+static szind_t	runs_avail_nclasses; /* Number of runs_avail trees. */
 
 /******************************************************************************/
 /*
@@ -45,42 +47,12 @@ static void	arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk,
 
 /******************************************************************************/
 
-#define	CHUNK_MAP_KEY		((uintptr_t)0x1U)
-
-JEMALLOC_INLINE_C arena_chunk_map_misc_t *
-arena_miscelm_key_create(size_t size)
-{
-
-	return ((arena_chunk_map_misc_t *)(arena_mapbits_size_encode(size) |
-	    CHUNK_MAP_KEY));
-}
-
-JEMALLOC_INLINE_C bool
-arena_miscelm_is_key(const arena_chunk_map_misc_t *miscelm)
-{
-
-	return (((uintptr_t)miscelm & CHUNK_MAP_KEY) != 0);
-}
-
-#undef CHUNK_MAP_KEY
-
-JEMALLOC_INLINE_C size_t
-arena_miscelm_key_size_get(const arena_chunk_map_misc_t *miscelm)
-{
-
-	assert(arena_miscelm_is_key(miscelm));
-
-	return (arena_mapbits_size_decode((uintptr_t)miscelm));
-}
-
 JEMALLOC_INLINE_C size_t
 arena_miscelm_size_get(const arena_chunk_map_misc_t *miscelm)
 {
 	arena_chunk_t *chunk;
 	size_t pageind, mapbits;
 
-	assert(!arena_miscelm_is_key(miscelm));
-
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm);
 	pageind = arena_miscelm_to_pageind(miscelm);
 	mapbits = arena_mapbits_get(chunk, pageind);
@@ -88,7 +60,8 @@ arena_miscelm_size_get(const arena_chunk_map_misc_t *miscelm)
 }
 
 JEMALLOC_INLINE_C int
-arena_run_comp(const arena_chunk_map_misc_t *a, const arena_chunk_map_misc_t *b)
+arena_run_addr_comp(const arena_chunk_map_misc_t *a,
+    const arena_chunk_map_misc_t *b)
 {
 	uintptr_t a_miscelm = (uintptr_t)a;
 	uintptr_t b_miscelm = (uintptr_t)b;
@@ -101,7 +74,7 @@ arena_run_comp(const arena_chunk_map_misc_t *a, const arena_chunk_map_misc_t *b)
 
 /* Generate red-black tree functions. */
 rb_gen(static UNUSED, arena_run_tree_, arena_run_tree_t, arena_chunk_map_misc_t,
-    rb_link, arena_run_comp)
+    rb_link, arena_run_addr_comp)
 
 static size_t
 run_quantize_floor_compute(size_t size)
@@ -226,61 +199,42 @@ run_quantize_ceil(size_t size)
 run_quantize_t *run_quantize_ceil = JEMALLOC_N(run_quantize_ceil_impl);
 #endif
 
-JEMALLOC_INLINE_C int
-arena_avail_comp(const arena_chunk_map_misc_t *a,
-    const arena_chunk_map_misc_t *b)
-{
-	int ret;
-	uintptr_t a_miscelm = (uintptr_t)a;
-	size_t a_qsize = run_quantize_floor(arena_miscelm_is_key(a) ?
-	    arena_miscelm_key_size_get(a) : arena_miscelm_size_get(a));
-	size_t b_qsize = run_quantize_floor(arena_miscelm_size_get(b));
-
-	/*
-	 * Compare based on quantized size rather than size, in order to sort
-	 * equally useful runs only by address.
-	 */
-	ret = (a_qsize > b_qsize) - (a_qsize < b_qsize);
-	if (ret == 0) {
-		if (!arena_miscelm_is_key(a)) {
-			uintptr_t b_miscelm = (uintptr_t)b;
-
-			ret = (a_miscelm > b_miscelm) - (a_miscelm < b_miscelm);
-		} else {
-			/*
-			 * Treat keys as if they are lower than anything else.
-			 */
-			ret = -1;
-		}
-	}
-
-	return (ret);
-}
-
 /* Generate red-black tree functions. */
 rb_gen(static UNUSED, arena_avail_tree_, arena_avail_tree_t,
-    arena_chunk_map_misc_t, rb_link, arena_avail_comp)
+    arena_chunk_map_misc_t, rb_link, arena_run_addr_comp)
+
+static arena_avail_tree_t *
+arena_runs_avail_get(arena_t *arena, szind_t ind)
+{
+
+	assert(ind >= runs_avail_bias);
+	assert(ind - runs_avail_bias < runs_avail_nclasses);
+
+	return (&arena->runs_avail[ind - runs_avail_bias]);
+}
 
 static void
 arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
     size_t npages)
 {
-
+	szind_t ind = size2index(run_quantize_floor(arena_miscelm_size_get(
+	    arena_miscelm_get(chunk, pageind))));
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
-	arena_avail_tree_insert(&arena->runs_avail, arena_miscelm_get(chunk,
-	    pageind));
+	arena_avail_tree_insert(arena_runs_avail_get(arena, ind),
+	    arena_miscelm_get(chunk, pageind));
 }
 
 static void
 arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
     size_t npages)
 {
-
+	szind_t ind = size2index(run_quantize_floor(arena_miscelm_size_get(
+	    arena_miscelm_get(chunk, pageind))));
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
-	arena_avail_tree_remove(&arena->runs_avail, arena_miscelm_get(chunk,
-	    pageind));
+	arena_avail_tree_remove(arena_runs_avail_get(arena, ind),
+	    arena_miscelm_get(chunk, pageind));
 }
 
 static void
@@ -770,7 +724,6 @@ arena_chunk_alloc(arena_t *arena)
 			return (NULL);
 	}
 
-	/* Insert the run into the runs_avail tree. */
 	arena_avail_insert(arena, chunk, map_bias, chunk_npages-map_bias);
 
 	return (chunk);
@@ -791,10 +744,7 @@ arena_chunk_dalloc(arena_t *arena, arena_chunk_t *chunk)
 	assert(arena_mapbits_decommitted_get(chunk, map_bias) ==
 	    arena_mapbits_decommitted_get(chunk, chunk_npages-1));
 
-	/*
-	 * Remove run from the runs_avail tree, so that the arena does not use
-	 * it.
-	 */
+	/* Remove run from runs_avail, so that the arena does not use it. */
 	arena_avail_remove(arena, chunk, map_bias, chunk_npages-map_bias);
 
 	if (arena->spare != NULL) {
@@ -1124,19 +1074,23 @@ arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, size_t oldsize,
 
 /*
  * Do first-best-fit run selection, i.e. select the lowest run that best fits.
- * Run sizes are quantized, so not all candidate runs are necessarily exactly
- * the same size.
+ * Run sizes are indexed, so not all candidate runs are necessarily exactly the
+ * same size.
  */
 static arena_run_t *
 arena_run_first_best_fit(arena_t *arena, size_t size)
 {
-	size_t search_size = run_quantize_ceil(size);
-	arena_chunk_map_misc_t *key = arena_miscelm_key_create(search_size);
-	arena_chunk_map_misc_t *miscelm =
-	    arena_avail_tree_nsearch(&arena->runs_avail, key);
-	if (miscelm == NULL)
-		return (NULL);
-	return (&miscelm->run);
+	szind_t ind, i;
+
+	ind = size2index(run_quantize_ceil(size));
+	for (i = ind; i < runs_avail_nclasses; i++) {
+		arena_chunk_map_misc_t *miscelm = arena_avail_tree_first(
+		    arena_runs_avail_get(arena, i));
+		if (miscelm != NULL)
+			return (&miscelm->run);
+	}
+
+	return (NULL);
 }
 
 static arena_run_t *
@@ -3315,19 +3269,23 @@ arena_t *
 arena_new(unsigned ind)
 {
 	arena_t *arena;
+	size_t arena_size;
 	unsigned i;
 	arena_bin_t *bin;
 
+	/* Compute arena size to incorporate sufficient runs_avail elements. */
+	arena_size = offsetof(arena_t, runs_avail) + (sizeof(arena_avail_tree_t)
+	    * (runs_avail_nclasses - 1));
 	/*
 	 * Allocate arena, arena->lstats, and arena->hstats contiguously, mainly
 	 * because there is no way to clean up if base_alloc() OOMs.
 	 */
 	if (config_stats) {
-		arena = (arena_t *)base_alloc(CACHELINE_CEILING(sizeof(arena_t))
-		    + QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t) +
+		arena = (arena_t *)base_alloc(CACHELINE_CEILING(arena_size) +
+		    QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t) +
 		    nhclasses) * sizeof(malloc_huge_stats_t));
 	} else
-		arena = (arena_t *)base_alloc(sizeof(arena_t));
+		arena = (arena_t *)base_alloc(arena_size);
 	if (arena == NULL)
 		return (NULL);
 
@@ -3339,11 +3297,11 @@ arena_new(unsigned ind)
 	if (config_stats) {
 		memset(&arena->stats, 0, sizeof(arena_stats_t));
 		arena->stats.lstats = (malloc_large_stats_t *)((uintptr_t)arena
-		    + CACHELINE_CEILING(sizeof(arena_t)));
+		    + CACHELINE_CEILING(arena_size));
 		memset(arena->stats.lstats, 0, nlclasses *
 		    sizeof(malloc_large_stats_t));
 		arena->stats.hstats = (malloc_huge_stats_t *)((uintptr_t)arena
-		    + CACHELINE_CEILING(sizeof(arena_t)) +
+		    + CACHELINE_CEILING(arena_size) +
 		    QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t)));
 		memset(arena->stats.hstats, 0, nhclasses *
 		    sizeof(malloc_huge_stats_t));
@@ -3375,7 +3333,8 @@ arena_new(unsigned ind)
 	arena->nactive = 0;
 	arena->ndirty = 0;
 
-	arena_avail_tree_new(&arena->runs_avail);
+	for(i = 0; i < runs_avail_nclasses; i++)
+		arena_avail_tree_new(&arena->runs_avail[i]);
 	qr_new(&arena->runs_dirty, rd_link);
 	qr_new(&arena->chunks_cache, cc_link);
 
@@ -3635,6 +3594,9 @@ arena_boot(void)
 	if (run_quantize_init())
 		return (true);
 
+	runs_avail_bias = size2index(PAGE);
+	runs_avail_nclasses = size2index(run_quantize_max)+1 - runs_avail_bias;
+
 	return (false);
 }
 

From ae45142adc12d39793c45ecac4dafad5674a4591 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Tue, 23 Feb 2016 18:27:24 -0800
Subject: [PATCH 0110/2608] Collapse arena_avail_tree_* into arena_run_tree_*.

These tree types converged to become identical, yet they still had
independently generated red-black tree implementations.
---
 include/jemalloc/internal/arena.h |  3 +--
 src/arena.c                       | 18 +++++++-----------
 2 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 2548082b..05800e4f 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -175,7 +175,6 @@ struct arena_chunk_map_misc_s {
 		arena_run_t			run;
 	};
 };
-typedef rb_tree(arena_chunk_map_misc_t) arena_avail_tree_t;
 typedef rb_tree(arena_chunk_map_misc_t) arena_run_tree_t;
 #endif /* JEMALLOC_ARENA_STRUCTS_A */
 
@@ -461,7 +460,7 @@ struct arena_s {
 	 * Quantized address-ordered trees of this arena's available runs.  The
 	 * trees are used for first-best-fit run allocation.
 	 */
-	arena_avail_tree_t	runs_avail[1]; /* Dynamically sized. */
+	arena_run_tree_t	runs_avail[1]; /* Dynamically sized. */
 };
 
 /* Used in conjunction with tsd for fast arena-related context lookup. */
diff --git a/src/arena.c b/src/arena.c
index 06422727..ec81336b 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -199,11 +199,7 @@ run_quantize_ceil(size_t size)
 run_quantize_t *run_quantize_ceil = JEMALLOC_N(run_quantize_ceil_impl);
 #endif
 
-/* Generate red-black tree functions. */
-rb_gen(static UNUSED, arena_avail_tree_, arena_avail_tree_t,
-    arena_chunk_map_misc_t, rb_link, arena_run_addr_comp)
-
-static arena_avail_tree_t *
+static arena_run_tree_t *
 arena_runs_avail_get(arena_t *arena, szind_t ind)
 {
 
@@ -221,7 +217,7 @@ arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
 	    arena_miscelm_get(chunk, pageind))));
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
-	arena_avail_tree_insert(arena_runs_avail_get(arena, ind),
+	arena_run_tree_insert(arena_runs_avail_get(arena, ind),
 	    arena_miscelm_get(chunk, pageind));
 }
 
@@ -233,7 +229,7 @@ arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
 	    arena_miscelm_get(chunk, pageind))));
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
-	arena_avail_tree_remove(arena_runs_avail_get(arena, ind),
+	arena_run_tree_remove(arena_runs_avail_get(arena, ind),
 	    arena_miscelm_get(chunk, pageind));
 }
 
@@ -1084,7 +1080,7 @@ arena_run_first_best_fit(arena_t *arena, size_t size)
 
 	ind = size2index(run_quantize_ceil(size));
 	for (i = ind; i < runs_avail_nclasses; i++) {
-		arena_chunk_map_misc_t *miscelm = arena_avail_tree_first(
+		arena_chunk_map_misc_t *miscelm = arena_run_tree_first(
 		    arena_runs_avail_get(arena, i));
 		if (miscelm != NULL)
 			return (&miscelm->run);
@@ -3274,8 +3270,8 @@ arena_new(unsigned ind)
 	arena_bin_t *bin;
 
 	/* Compute arena size to incorporate sufficient runs_avail elements. */
-	arena_size = offsetof(arena_t, runs_avail) + (sizeof(arena_avail_tree_t)
-	    * (runs_avail_nclasses - 1));
+	arena_size = offsetof(arena_t, runs_avail) + (sizeof(arena_run_tree_t) *
+	    (runs_avail_nclasses - 1));
 	/*
 	 * Allocate arena, arena->lstats, and arena->hstats contiguously, mainly
 	 * because there is no way to clean up if base_alloc() OOMs.
@@ -3334,7 +3330,7 @@ arena_new(unsigned ind)
 	arena->ndirty = 0;
 
 	for(i = 0; i < runs_avail_nclasses; i++)
-		arena_avail_tree_new(&arena->runs_avail[i]);
+		arena_run_tree_new(&arena->runs_avail[i]);
 	qr_new(&arena->runs_dirty, rd_link);
 	qr_new(&arena->chunks_cache, cc_link);
 

From b41a07c31a53cb91729f69b4a23e3a8801ee9846 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Tue, 23 Feb 2016 11:39:02 -0800
Subject: [PATCH 0111/2608] Fix Windows build issues

This resolves #333.
---
 .../internal/jemalloc_internal_decls.h        |  3 +-
 include/msvc_compat/strings.h                 | 30 +++++++++++++++++++
 test/include/test/jemalloc_test.h.in          |  1 -
 test/include/test/timer.h                     |  3 --
 4 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
index 0bca63e5..2b8ca5d0 100644
--- a/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -18,6 +18,7 @@
 #  endif
 #  include <pthread.h>
 #  include <errno.h>
+#  include <sys/time.h>
 #endif
 #include <sys/types.h>
 
@@ -61,6 +62,4 @@ isblank(int c)
 #endif
 #include <fcntl.h>
 
-#include <sys/time.h>
-
 #endif /* JEMALLOC_INTERNAL_H */
diff --git a/include/msvc_compat/strings.h b/include/msvc_compat/strings.h
index f01ffdd1..a3ee2506 100644
--- a/include/msvc_compat/strings.h
+++ b/include/msvc_compat/strings.h
@@ -21,7 +21,37 @@ static __forceinline int ffs(int x)
 	return (ffsl(x));
 }
 
+#  ifdef  _M_X64
+#    pragma intrinsic(_BitScanForward64)
+#  endif
+
+static __forceinline int ffsll(unsigned __int64 x)
+{
+	unsigned long i;
+#ifdef  _M_X64
+	if (_BitScanForward64(&i, x))
+		return (i + 1);
+	return (0);
 #else
+// Fallback for 32-bit build where 64-bit version not available
+// assuming little endian
+	union {
+		unsigned __int64 ll;
+		unsigned   long l[2];
+	} s;
+
+	s.ll = x;
+
+	if (_BitScanForward(&i, s.l[0]))
+		return (i + 1);
+	else if(_BitScanForward(&i, s.l[1]))
+		return (i + 33);
+	return (0);
+#endif
+}
+
+#else
+#  define ffsll(x) __builtin_ffsll(x)
 #  define ffsl(x) __builtin_ffsl(x)
 #  define ffs(x) __builtin_ffs(x)
 #endif
diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in
index 4aaaf952..0a3dbeac 100644
--- a/test/include/test/jemalloc_test.h.in
+++ b/test/include/test/jemalloc_test.h.in
@@ -11,7 +11,6 @@
 #ifdef _WIN32
 #  include "msvc_compat/strings.h"
 #endif
-#include <sys/time.h>
 
 #ifdef _WIN32
 #  include <windows.h>
diff --git a/test/include/test/timer.h b/test/include/test/timer.h
index 0b27e019..ace6191b 100644
--- a/test/include/test/timer.h
+++ b/test/include/test/timer.h
@@ -1,8 +1,5 @@
 /* Simple timer, for use in benchmark reporting. */
 
-#include <unistd.h>
-#include <sys/time.h>
-
 typedef struct {
 	nstime_t t0;
 	nstime_t t1;

From 9f4ee6034c3ac6a8c8b5f9a0d76822fb2fd90c41 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Wed, 24 Feb 2016 10:32:45 -0800
Subject: [PATCH 0112/2608] Refactor jemalloc_ffs*() into ffs_*().

Use appropriate versions to resolve 64-to-32-bit data loss warnings.
---
 include/jemalloc/internal/arena.h             |  2 +-
 include/jemalloc/internal/bitmap.h            |  4 +-
 .../internal/jemalloc_internal_defs.h.in      |  2 +-
 include/jemalloc/internal/private_symbols.txt |  9 +-
 include/jemalloc/internal/prng.h              |  2 +-
 include/jemalloc/internal/util.h              | 90 ++++++++++++-------
 src/arena.c                                   |  3 +-
 src/chunk.c                                   |  2 +-
 8 files changed, 72 insertions(+), 42 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 05800e4f..165fb52d 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -1099,7 +1099,7 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr)
 
 	/* Rescale (factor powers of 2 out of the numerator and denominator). */
 	interval = bin_info->reg_interval;
-	shift = jemalloc_ffs(interval) - 1;
+	shift = ffs_zu(interval) - 1;
 	diff >>= shift;
 	interval >>= shift;
 
diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h
index fcc6005c..c14e7162 100644
--- a/include/jemalloc/internal/bitmap.h
+++ b/include/jemalloc/internal/bitmap.h
@@ -176,11 +176,11 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo)
 
 	i = binfo->nlevels - 1;
 	g = bitmap[binfo->levels[i].group_offset];
-	bit = jemalloc_ffsl(g) - 1;
+	bit = ffs_lu(g) - 1;
 	while (i > 0) {
 		i--;
 		g = bitmap[binfo->levels[i].group_offset + bit];
-		bit = (bit << LG_BITMAP_GROUP_NBITS) + (jemalloc_ffsl(g) - 1);
+		bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffs_lu(g) - 1);
 	}
 
 	bitmap_set(bitmap, binfo, bit);
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 4bcda716..2c753719 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -190,7 +190,7 @@
 
 /*
  * ffs*() functions to use for bitmapping.  Don't use these directly; instead,
- * use jemalloc_ffs*() from util.h.
+ * use ffs_*() from util.h.
  */
 #undef JEMALLOC_INTERNAL_FFSLL
 #undef JEMALLOC_INTERNAL_FFSL
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 761aa754..adab8a5c 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -243,6 +243,12 @@ extent_tree_szad_reverse_iter
 extent_tree_szad_reverse_iter_recurse
 extent_tree_szad_reverse_iter_start
 extent_tree_szad_search
+ffs_llu
+ffs_lu
+ffs_u
+ffs_u32
+ffs_u64
+ffs_zu
 get_errno
 hash
 hash_fmix_32
@@ -292,9 +298,6 @@ isqalloc
 isthreaded
 ivsalloc
 ixalloc
-jemalloc_ffs
-jemalloc_ffs64
-jemalloc_ffsl
 jemalloc_postfork_child
 jemalloc_postfork_parent
 jemalloc_prefork
diff --git a/include/jemalloc/internal/prng.h b/include/jemalloc/internal/prng.h
index 44d67c9a..5830f8b7 100644
--- a/include/jemalloc/internal/prng.h
+++ b/include/jemalloc/internal/prng.h
@@ -64,7 +64,7 @@ prng_range(uint64_t *state, uint64_t range)
 	assert(range > 1);
 
 	/* Compute the ceiling of lg(range). */
-	lg_range = jemalloc_ffs64(pow2_ceil_u64(range)) - 1;
+	lg_range = ffs_u64(pow2_ceil_u64(range)) - 1;
 
 	/* Generate a result in [0..range) via repeated trial. */
 	do {
diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index 39f70878..46d47df3 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -121,9 +121,12 @@ void	malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
-int	jemalloc_ffs64(uint64_t bitmap);
-int	jemalloc_ffsl(long bitmap);
-int	jemalloc_ffs(int bitmap);
+unsigned	ffs_llu(unsigned long long bitmap);
+unsigned	ffs_lu(unsigned long bitmap);
+unsigned	ffs_u(unsigned bitmap);
+unsigned	ffs_zu(size_t bitmap);
+unsigned	ffs_u64(uint64_t bitmap);
+unsigned	ffs_u32(uint32_t bitmap);
 uint64_t	pow2_ceil_u64(uint64_t x);
 uint32_t	pow2_ceil_u32(uint32_t x);
 size_t	pow2_ceil_zu(size_t x);
@@ -140,31 +143,63 @@ int	get_errno(void);
 #  error JEMALLOC_INTERNAL_FFS{,L,LL} should have been defined by configure
 #endif
 
-JEMALLOC_ALWAYS_INLINE int
-jemalloc_ffs64(uint64_t bitmap)
+JEMALLOC_ALWAYS_INLINE unsigned
+ffs_llu(unsigned long long bitmap)
+{
+
+	return (JEMALLOC_INTERNAL_FFSLL(bitmap));
+}
+
+JEMALLOC_ALWAYS_INLINE unsigned
+ffs_lu(unsigned long bitmap)
+{
+
+	return (JEMALLOC_INTERNAL_FFSL(bitmap));
+}
+
+JEMALLOC_ALWAYS_INLINE unsigned
+ffs_u(unsigned bitmap)
+{
+
+	return (JEMALLOC_INTERNAL_FFS(bitmap));
+}
+
+JEMALLOC_ALWAYS_INLINE unsigned
+ffs_zu(size_t bitmap)
+{
+
+#if LG_SIZEOF_PTR == LG_SIZEOF_LONG
+	return (ffs_lu(bitmap));
+#elif LG_SIZEOF_PTR == LG_SIZEOF_INT
+	return (ffs_u(bitmap));
+#else
+#error No implementation for size_t ffs()
+#endif
+}
+
+JEMALLOC_ALWAYS_INLINE unsigned
+ffs_u64(uint64_t bitmap)
 {
 
 #if LG_SIZEOF_LONG == 3
-	return (JEMALLOC_INTERNAL_FFSL(bitmap));
+	return (ffs_lu(bitmap));
 #elif LG_SIZEOF_LONG_LONG == 3
-	return (JEMALLOC_INTERNAL_FFSLL(bitmap));
+	return (ffs_llu(bitmap));
 #else
 #error No implementation for 64-bit ffs()
 #endif
 }
 
-JEMALLOC_ALWAYS_INLINE int
-jemalloc_ffsl(long bitmap)
+JEMALLOC_ALWAYS_INLINE unsigned
+ffs_u32(uint32_t bitmap)
 {
 
-	return (JEMALLOC_INTERNAL_FFSL(bitmap));
-}
-
-JEMALLOC_ALWAYS_INLINE int
-jemalloc_ffs(int bitmap)
-{
-
-	return (JEMALLOC_INTERNAL_FFS(bitmap));
+#if LG_SIZEOF_INT == 2
+	return (ffs_u(bitmap));
+#else
+#error No implementation for 32-bit ffs()
+#endif
+	return (ffs_u(bitmap));
 }
 
 JEMALLOC_INLINE uint64_t
@@ -235,7 +270,7 @@ lg_floor(size_t x)
 #elif (LG_SIZEOF_PTR == 2)
 	_BitScanReverse(&ret, x);
 #else
-#  error "Unsupported type sizes for lg_floor()"
+#  error "Unsupported type size for lg_floor()"
 #endif
 	return (ret);
 }
@@ -251,7 +286,7 @@ lg_floor(size_t x)
 #elif (LG_SIZEOF_PTR == LG_SIZEOF_LONG)
 	return (((8 << LG_SIZEOF_PTR) - 1) - __builtin_clzl(x));
 #else
-#  error "Unsupported type sizes for lg_floor()"
+#  error "Unsupported type size for lg_floor()"
 #endif
 }
 #else
@@ -266,20 +301,13 @@ lg_floor(size_t x)
 	x |= (x >> 4);
 	x |= (x >> 8);
 	x |= (x >> 16);
-#if (LG_SIZEOF_PTR == 3 && LG_SIZEOF_PTR == LG_SIZEOF_LONG)
+#if (LG_SIZEOF_PTR == 3)
 	x |= (x >> 32);
-	if (x == KZU(0xffffffffffffffff))
-		return (63);
-	x++;
-	return (jemalloc_ffsl(x) - 2);
-#elif (LG_SIZEOF_PTR == 2)
-	if (x == KZU(0xffffffff))
-		return (31);
-	x++;
-	return (jemalloc_ffs(x) - 2);
-#else
-#  error "Unsupported type sizes for lg_floor()"
 #endif
+	if (x == SIZE_T_MAX)
+		return ((8 << LG_SIZEOF_PTR) - 1);
+	x++;
+	return (ffs_zu(x) - 2);
 }
 #endif
 
diff --git a/src/arena.c b/src/arena.c
index ec81336b..7b065d60 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -3391,8 +3391,7 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info)
 	 * be twice as large in order to maintain alignment.
 	 */
 	if (config_fill && unlikely(opt_redzone)) {
-		size_t align_min = ZU(1) << (jemalloc_ffs(bin_info->reg_size) -
-		    1);
+		size_t align_min = ZU(1) << (ffs_zu(bin_info->reg_size) - 1);
 		if (align_min <= REDZONE_MINSIZE) {
 			bin_info->redzone_size = REDZONE_MINSIZE;
 			pad_size = 0;
diff --git a/src/chunk.c b/src/chunk.c
index 6ba1ca7a..3d32a404 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -716,7 +716,7 @@ chunk_boot(void)
 	 * so pages_map will always take fast path.
 	 */
 	if (!opt_lg_chunk) {
-		opt_lg_chunk = jemalloc_ffs((int)info.dwAllocationGranularity)
+		opt_lg_chunk = ffs_u((unsigned)info.dwAllocationGranularity)
 		    - 1;
 	}
 #else

From 8dd5115edee9e778d3b45d0924530ee49a4e34e6 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Wed, 24 Feb 2016 11:00:40 -0800
Subject: [PATCH 0113/2608] Explicitly cast mib[] elements to unsigned where
 appropriate.

---
 src/ctl.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index f003b415..a7f1b675 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -24,7 +24,7 @@ ctl_named_node(const ctl_node_t *node)
 }
 
 JEMALLOC_INLINE_C const ctl_named_node_t *
-ctl_named_children(const ctl_named_node_t *node, int index)
+ctl_named_children(const ctl_named_node_t *node, size_t index)
 {
 	const ctl_named_node_t *children = ctl_named_node(node->children);
 
@@ -975,7 +975,7 @@ ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 		assert(node->nchildren > 0);
 		if (ctl_named_node(node->children) != NULL) {
 			/* Children are named. */
-			if (node->nchildren <= mib[i]) {
+			if (node->nchildren <= (unsigned)mib[i]) {
 				ret = ENOENT;
 				goto label_return;
 			}
@@ -1611,7 +1611,7 @@ arena_i_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 
 	READONLY();
 	WRITEONLY();
-	arena_i_purge(mib[1], true);
+	arena_i_purge((unsigned)mib[1], true);
 
 	ret = 0;
 label_return:
@@ -1626,7 +1626,7 @@ arena_i_decay_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 
 	READONLY();
 	WRITEONLY();
-	arena_i_purge(mib[1], false);
+	arena_i_purge((unsigned)mib[1], false);
 
 	ret = 0;
 label_return:
@@ -1639,7 +1639,7 @@ arena_i_dss_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 {
 	int ret;
 	const char *dss = NULL;
-	unsigned arena_ind = mib[1];
+	unsigned arena_ind = (unsigned)mib[1];
 	dss_prec_t dss_prec_old = dss_prec_limit;
 	dss_prec_t dss_prec = dss_prec_limit;
 
@@ -1694,7 +1694,7 @@ arena_i_lg_dirty_mult_ctl(const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
-	unsigned arena_ind = mib[1];
+	unsigned arena_ind = (unsigned)mib[1];
 	arena_t *arena;
 
 	arena = arena_get(tsd_fetch(), arena_ind, false, true);
@@ -1728,7 +1728,7 @@ arena_i_decay_time_ctl(const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
-	unsigned arena_ind = mib[1];
+	unsigned arena_ind = (unsigned)mib[1];
 	arena_t *arena;
 
 	arena = arena_get(tsd_fetch(), arena_ind, false, true);
@@ -1762,7 +1762,7 @@ arena_i_chunk_hooks_ctl(const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
-	unsigned arena_ind = mib[1];
+	unsigned arena_ind = (unsigned)mib[1];
 	arena_t *arena;
 
 	malloc_mutex_lock(&ctl_mtx);
@@ -1841,7 +1841,7 @@ arenas_initialized_ctl(const size_t *mib, size_t miblen, void *oldp,
 	if (*oldlenp != ctl_stats.narenas * sizeof(bool)) {
 		ret = EINVAL;
 		nread = (*oldlenp < ctl_stats.narenas * sizeof(bool))
-		    ? (*oldlenp / sizeof(bool)) : ctl_stats.narenas;
+		    ? (unsigned)(*oldlenp / sizeof(bool)) : ctl_stats.narenas;
 	} else {
 		ret = 0;
 		nread = ctl_stats.narenas;

From 603b3bd413d670909811ce49d28a8b0a4ae3ba6b Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Wed, 24 Feb 2016 11:02:14 -0800
Subject: [PATCH 0114/2608] Make nhbins unsigned rather than size_t.

---
 include/jemalloc/internal/tcache.h | 2 +-
 src/tcache.c                       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h
index 09935c36..25eaf142 100644
--- a/include/jemalloc/internal/tcache.h
+++ b/include/jemalloc/internal/tcache.h
@@ -115,7 +115,7 @@ extern tcache_bin_info_t	*tcache_bin_info;
  * Number of tcache bins.  There are NBINS small-object bins, plus 0 or more
  * large-object bins.
  */
-extern size_t	nhbins;
+extern unsigned	nhbins;
 
 /* Maximum cached size class. */
 extern size_t	tcache_maxclass;
diff --git a/src/tcache.c b/src/tcache.c
index 426bb1f7..fb1f057f 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -10,7 +10,7 @@ ssize_t	opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
 tcache_bin_info_t	*tcache_bin_info;
 static unsigned		stack_nelms; /* Total stack elms per tcache. */
 
-size_t			nhbins;
+unsigned		nhbins;
 size_t			tcache_maxclass;
 
 tcaches_t		*tcaches;

From 8f683b94a751c65af8f9fa25970ccf2917b96bb8 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Wed, 24 Feb 2016 11:03:40 -0800
Subject: [PATCH 0115/2608] Make opt_narenas unsigned rather than size_t.

---
 doc/jemalloc.xml.in                           |  2 +-
 .../jemalloc/internal/jemalloc_internal.h.in  |  2 +-
 src/ctl.c                                     |  2 +-
 src/jemalloc.c                                | 20 +++++++++++--------
 src/stats.c                                   | 10 ++++++++--
 test/unit/mallctl.c                           |  2 +-
 6 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 0ced0aaa..28b5fb78 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -941,7 +941,7 @@ for (i = 0; i < nbins; i++) {
       <varlistentry id="opt.narenas">
         <term>
           <mallctl>opt.narenas</mallctl>
-          (<type>size_t</type>)
+          (<type>unsigned</type>)
           <literal>r-</literal>
         </term>
         <listitem><para>Maximum number of arenas to use for automatic
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 0260b9a8..a61a13a9 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -433,7 +433,7 @@ extern bool	opt_redzone;
 extern bool	opt_utrace;
 extern bool	opt_xmalloc;
 extern bool	opt_zero;
-extern size_t	opt_narenas;
+extern unsigned	opt_narenas;
 
 extern bool	in_valgrind;
 
diff --git a/src/ctl.c b/src/ctl.c
index a7f1b675..e0044336 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1277,7 +1277,7 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool)
 CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
 CTL_RO_NL_GEN(opt_dss, opt_dss, const char *)
 CTL_RO_NL_GEN(opt_lg_chunk, opt_lg_chunk, size_t)
-CTL_RO_NL_GEN(opt_narenas, opt_narenas, size_t)
+CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned)
 CTL_RO_NL_GEN(opt_purge, purge_mode_names[opt_purge], const char *)
 CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t)
 CTL_RO_NL_GEN(opt_decay_time, opt_decay_time, ssize_t)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 76b4f154..3d356c30 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -40,7 +40,7 @@ bool	opt_redzone = false;
 bool	opt_utrace = false;
 bool	opt_xmalloc = false;
 bool	opt_zero = false;
-size_t	opt_narenas = 0;
+unsigned	opt_narenas = 0;
 
 /* Initialized to true if the process is running inside Valgrind. */
 bool	in_valgrind;
@@ -1031,7 +1031,7 @@ malloc_conf_init(void)
 				if (cont)				\
 					continue;			\
 			}
-#define	CONF_HANDLE_SIZE_T(o, n, min, max, clip)			\
+#define	CONF_HANDLE_T_U(t, o, n, min, max, clip)			\
 			if (CONF_MATCH(n)) {				\
 				uintmax_t um;				\
 				char *end;				\
@@ -1045,11 +1045,11 @@ malloc_conf_init(void)
 					    k, klen, v, vlen);		\
 				} else if (clip) {			\
 					if ((min) != 0 && um < (min))	\
-						o = (min);		\
+						o = (t)(min);		\
 					else if (um > (max))		\
-						o = (max);		\
+						o = (t)(max);		\
 					else				\
-						o = um;			\
+						o = (t)um;		\
 				} else {				\
 					if (((min) != 0 && um < (min))	\
 					    || um > (max)) {		\
@@ -1058,10 +1058,14 @@ malloc_conf_init(void)
 						    "conf value",	\
 						    k, klen, v, vlen);	\
 					} else				\
-						o = um;			\
+						o = (t)um;		\
 				}					\
 				continue;				\
 			}
+#define	CONF_HANDLE_UNSIGNED(o, n, min, max, clip)			\
+			CONF_HANDLE_T_U(unsigned, o, n, min, max, clip)
+#define	CONF_HANDLE_SIZE_T(o, n, min, max, clip)			\
+			CONF_HANDLE_T_U(size_t, o, n, min, max, clip)
 #define	CONF_HANDLE_SSIZE_T(o, n, min, max)				\
 			if (CONF_MATCH(n)) {				\
 				long l;					\
@@ -1129,8 +1133,8 @@ malloc_conf_init(void)
 				}
 				continue;
 			}
-			CONF_HANDLE_SIZE_T(opt_narenas, "narenas", 1,
-			    SIZE_T_MAX, false)
+			CONF_HANDLE_UNSIGNED(opt_narenas, "narenas", 1,
+			    UINT_MAX, false)
 			if (strncmp("purge", k, klen) == 0) {
 				int i;
 				bool match = false;
diff --git a/src/stats.c b/src/stats.c
index 8d5ed71e..a7249479 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -435,9 +435,10 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		bool bv;
 		unsigned uv;
 		ssize_t ssv;
-		size_t sv, bsz, ssz, sssz, cpsz;
+		size_t sv, bsz, usz, ssz, sssz, cpsz;
 
 		bsz = sizeof(bool);
+		usz = sizeof(unsigned);
 		ssz = sizeof(size_t);
 		sssz = sizeof(ssize_t);
 		cpsz = sizeof(const char *);
@@ -464,6 +465,11 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			    : "false", bv2 ? "true" : "false");		\
 		}							\
 }
+#define	OPT_WRITE_UNSIGNED(n)						\
+		if (je_mallctl("opt."#n, &uv, &usz, NULL, 0) == 0) {	\
+			malloc_cprintf(write_cb, cbopaque,		\
+			"  opt."#n": %zu\n", sv);			\
+		}
 #define	OPT_WRITE_SIZE_T(n)						\
 		if (je_mallctl("opt."#n, &sv, &ssz, NULL, 0) == 0) {	\
 			malloc_cprintf(write_cb, cbopaque,		\
@@ -494,7 +500,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		OPT_WRITE_BOOL(abort)
 		OPT_WRITE_SIZE_T(lg_chunk)
 		OPT_WRITE_CHAR_P(dss)
-		OPT_WRITE_SIZE_T(narenas)
+		OPT_WRITE_UNSIGNED(narenas)
 		OPT_WRITE_CHAR_P(purge)
 		if (opt_purge == purge_mode_ratio) {
 			OPT_WRITE_SSIZE_T_MUTABLE(lg_dirty_mult,
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index e8dc4926..01333514 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -163,7 +163,7 @@ TEST_BEGIN(test_mallctl_opt)
 	TEST_MALLCTL_OPT(bool, abort, always);
 	TEST_MALLCTL_OPT(size_t, lg_chunk, always);
 	TEST_MALLCTL_OPT(const char *, dss, always);
-	TEST_MALLCTL_OPT(size_t, narenas, always);
+	TEST_MALLCTL_OPT(unsigned, narenas, always);
 	TEST_MALLCTL_OPT(const char *, purge, always);
 	TEST_MALLCTL_OPT(ssize_t, lg_dirty_mult, always);
 	TEST_MALLCTL_OPT(ssize_t, decay_time, always);

From 0931cecbfaeada8b10fed56ff0175c8ffb9e9233 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Wed, 24 Feb 2016 11:04:08 -0800
Subject: [PATCH 0116/2608] Use ssize_t for readlink() rather than int.

---
 src/jemalloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 3d356c30..1acea404 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -958,7 +958,7 @@ malloc_conf_init(void)
 			}
 			break;
 		case 2: {
-			int linklen = 0;
+			ssize_t linklen = 0;
 #ifndef _WIN32
 			int saved_errno = errno;
 			const char *linkname =

From 1c42a04cc6d3cc5d92bec55432015785584a4b0d Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Wed, 24 Feb 2016 11:04:51 -0800
Subject: [PATCH 0117/2608] Change lg_floor() return type from size_t to
 unsigned.

---
 .../jemalloc/internal/jemalloc_internal.h.in  | 19 +++++++++----------
 include/jemalloc/internal/util.h              | 16 +++++++++-------
 2 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index a61a13a9..ffad04ba 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -555,27 +555,27 @@ size2index_compute(size_t size)
 
 #if (NTBINS != 0)
 	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
-		size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
-		size_t lg_ceil = lg_floor(pow2_ceil_zu(size));
+		szind_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
+		szind_t lg_ceil = lg_floor(pow2_ceil_zu(size));
 		return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin);
 	}
 #endif
 	{
-		size_t x = unlikely(ZI(size) < 0) ? ((size<<1) ?
+		szind_t x = unlikely(ZI(size) < 0) ? ((size<<1) ?
 		    (ZU(1)<<(LG_SIZEOF_PTR+3)) : ((ZU(1)<<(LG_SIZEOF_PTR+3))-1))
 		    : lg_floor((size<<1)-1);
-		size_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 :
+		szind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 :
 		    x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM);
-		size_t grp = shift << LG_SIZE_CLASS_GROUP;
+		szind_t grp = shift << LG_SIZE_CLASS_GROUP;
 
-		size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
+		szind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
 		    ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
 
 		size_t delta_inverse_mask = ZI(-1) << lg_delta;
-		size_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) &
+		szind_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) &
 		    ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
 
-		size_t index = NTBINS + grp + mod;
+		szind_t index = NTBINS + grp + mod;
 		return (index);
 	}
 }
@@ -586,8 +586,7 @@ size2index_lookup(size_t size)
 
 	assert(size <= LOOKUP_MAXCLASS);
 	{
-		size_t ret = ((size_t)(size2index_tab[(size-1) >>
-		    LG_TINY_MIN]));
+		szind_t ret = (size2index_tab[(size-1) >> LG_TINY_MIN]);
 		assert(ret == size2index_compute(size));
 		return (ret);
 	}
diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index 46d47df3..031f8045 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -130,7 +130,7 @@ unsigned	ffs_u32(uint32_t bitmap);
 uint64_t	pow2_ceil_u64(uint64_t x);
 uint32_t	pow2_ceil_u32(uint32_t x);
 size_t	pow2_ceil_zu(size_t x);
-size_t	lg_floor(size_t x);
+unsigned	lg_floor(size_t x);
 void	set_errno(int errnum);
 int	get_errno(void);
 #endif
@@ -244,7 +244,7 @@ pow2_ceil_zu(size_t x)
 }
 
 #if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
-JEMALLOC_INLINE size_t
+JEMALLOC_INLINE unsigned
 lg_floor(size_t x)
 {
 	size_t ret;
@@ -255,10 +255,11 @@ lg_floor(size_t x)
 	    : "=r"(ret) // Outputs.
 	    : "r"(x)    // Inputs.
 	    );
-	return (ret);
+	assert(ret < UINT_MAX);
+	return ((unsigned)ret);
 }
 #elif (defined(_MSC_VER))
-JEMALLOC_INLINE size_t
+JEMALLOC_INLINE unsigned
 lg_floor(size_t x)
 {
 	unsigned long ret;
@@ -272,10 +273,11 @@ lg_floor(size_t x)
 #else
 #  error "Unsupported type size for lg_floor()"
 #endif
-	return (ret);
+	assert(ret < UINT_MAX);
+	return ((unsigned)ret);
 }
 #elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ))
-JEMALLOC_INLINE size_t
+JEMALLOC_INLINE unsigned
 lg_floor(size_t x)
 {
 
@@ -290,7 +292,7 @@ lg_floor(size_t x)
 #endif
 }
 #else
-JEMALLOC_INLINE size_t
+JEMALLOC_INLINE unsigned
 lg_floor(size_t x)
 {
 

From 9e1810ca9dc4a5f5f0841b9a6c1abb4337753552 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Wed, 24 Feb 2016 12:42:23 -0800
Subject: [PATCH 0118/2608] Silence miscellaneous 64-to-32-bit data loss
 warnings.

---
 include/jemalloc/internal/arena.h     |  4 ++--
 include/jemalloc/internal/bitmap.h    |  2 +-
 include/jemalloc/internal/hash.h      | 15 ++++++++++-----
 include/jemalloc/jemalloc_macros.h.in |  6 +++---
 src/arena.c                           | 21 +++++++++++----------
 src/chunk.c                           |  4 ++--
 src/ckh.c                             | 12 ++++++------
 src/ctl.c                             |  5 +++--
 src/jemalloc.c                        |  2 +-
 src/tcache.c                          |  2 +-
 src/util.c                            |  3 ++-
 test/integration/rallocx.c            | 10 +++++-----
 test/unit/hash.c                      |  4 ++--
 13 files changed, 49 insertions(+), 41 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 165fb52d..59b480b5 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -1053,7 +1053,7 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits)
 		run = &miscelm->run;
 		run_binind = run->binind;
 		bin = &arena->bins[run_binind];
-		actual_binind = bin - arena->bins;
+		actual_binind = (szind_t)(bin - arena->bins);
 		assert(run_binind == actual_binind);
 		bin_info = &arena_bin_info[actual_binind];
 		rpages = arena_miscelm_to_rpages(miscelm);
@@ -1070,7 +1070,7 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits)
 JEMALLOC_INLINE szind_t
 arena_bin_index(arena_t *arena, arena_bin_t *bin)
 {
-	szind_t binind = bin - arena->bins;
+	szind_t binind = (szind_t)(bin - arena->bins);
 	assert(binind < NBINS);
 	return (binind);
 }
diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h
index c14e7162..8452bfed 100644
--- a/include/jemalloc/internal/bitmap.h
+++ b/include/jemalloc/internal/bitmap.h
@@ -113,7 +113,7 @@ void	bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
 JEMALLOC_INLINE bool
 bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo)
 {
-	unsigned rgoff = binfo->levels[binfo->nlevels].group_offset - 1;
+	size_t rgoff = binfo->levels[binfo->nlevels].group_offset - 1;
 	bitmap_t rg = bitmap[rgoff];
 	/* The bitmap is full iff the root group is 0. */
 	return (rg == 0);
diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h
index 8b5fb037..864fda81 100644
--- a/include/jemalloc/internal/hash.h
+++ b/include/jemalloc/internal/hash.h
@@ -337,13 +337,18 @@ hash_x64_128(const void *key, const int len, const uint32_t seed,
 JEMALLOC_INLINE void
 hash(const void *key, size_t len, const uint32_t seed, size_t r_hash[2])
 {
+
+	assert(len <= INT_MAX); /* Unfortunate implementation limitation. */
+
 #if (LG_SIZEOF_PTR == 3 && !defined(JEMALLOC_BIG_ENDIAN))
-	hash_x64_128(key, len, seed, (uint64_t *)r_hash);
+	hash_x64_128(key, (int)len, seed, (uint64_t *)r_hash);
 #else
-	uint64_t hashes[2];
-	hash_x86_128(key, len, seed, hashes);
-	r_hash[0] = (size_t)hashes[0];
-	r_hash[1] = (size_t)hashes[1];
+	{
+		uint64_t hashes[2];
+		hash_x86_128(key, (int)len, seed, hashes);
+		r_hash[0] = (size_t)hashes[0];
+		r_hash[1] = (size_t)hashes[1];
+	}
 #endif
 }
 #endif
diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in
index 698caa19..d164edac 100644
--- a/include/jemalloc/jemalloc_macros.h.in
+++ b/include/jemalloc/jemalloc_macros.h.in
@@ -11,12 +11,12 @@
 #define	JEMALLOC_VERSION_NREV @jemalloc_version_nrev@
 #define	JEMALLOC_VERSION_GID "@jemalloc_version_gid@"
 
-#  define MALLOCX_LG_ALIGN(la)	(la)
+#  define MALLOCX_LG_ALIGN(la)	((int)(la))
 #  if LG_SIZEOF_PTR == 2
-#    define MALLOCX_ALIGN(a)	(ffs(a)-1)
+#    define MALLOCX_ALIGN(a)	((int)(ffs(a)-1))
 #  else
 #    define MALLOCX_ALIGN(a)						\
-	 ((a < (size_t)INT_MAX) ? ffs(a)-1 : ffs(a>>32)+31)
+	 ((int)((a < (size_t)INT_MAX) ? ffs((int)a)-1 : ffs((int)(a>>32))+31))
 #  endif
 #  define MALLOCX_ZERO	((int)0x40)
 /*
diff --git a/src/arena.c b/src/arena.c
index 7b065d60..987e2064 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -308,7 +308,7 @@ arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info)
 	assert(run->nfree > 0);
 	assert(!bitmap_full(run->bitmap, &bin_info->bitmap_info));
 
-	regind = bitmap_sfu(run->bitmap, &bin_info->bitmap_info);
+	regind = (unsigned)bitmap_sfu(run->bitmap, &bin_info->bitmap_info);
 	miscelm = arena_run_to_miscelm(run);
 	rpages = arena_miscelm_to_rpages(miscelm);
 	ret = (void *)((uintptr_t)rpages + (uintptr_t)bin_info->reg0_offset +
@@ -3411,18 +3411,19 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info)
 	 * size).
 	 */
 	try_run_size = PAGE;
-	try_nregs = try_run_size / bin_info->reg_size;
+	try_nregs = (uint32_t)(try_run_size / bin_info->reg_size);
 	do {
 		perfect_run_size = try_run_size;
 		perfect_nregs = try_nregs;
 
 		try_run_size += PAGE;
-		try_nregs = try_run_size / bin_info->reg_size;
+		try_nregs = (uint32_t)(try_run_size / bin_info->reg_size);
 	} while (perfect_run_size != perfect_nregs * bin_info->reg_size);
 	assert(perfect_nregs <= RUN_MAXREGS);
 
 	actual_run_size = perfect_run_size;
-	actual_nregs = (actual_run_size - pad_size) / bin_info->reg_interval;
+	actual_nregs = (uint32_t)((actual_run_size - pad_size) /
+	    bin_info->reg_interval);
 
 	/*
 	 * Redzones can require enough padding that not even a single region can
@@ -3434,8 +3435,8 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info)
 		assert(config_fill && unlikely(opt_redzone));
 
 		actual_run_size += PAGE;
-		actual_nregs = (actual_run_size - pad_size) /
-		    bin_info->reg_interval;
+		actual_nregs = (uint32_t)((actual_run_size - pad_size) /
+		    bin_info->reg_interval);
 	}
 
 	/*
@@ -3443,8 +3444,8 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info)
 	 */
 	while (actual_run_size > arena_maxrun) {
 		actual_run_size -= PAGE;
-		actual_nregs = (actual_run_size - pad_size) /
-		    bin_info->reg_interval;
+		actual_nregs = (uint32_t)((actual_run_size - pad_size) /
+		    bin_info->reg_interval);
 	}
 	assert(actual_nregs > 0);
 	assert(actual_run_size == s2u(actual_run_size));
@@ -3452,8 +3453,8 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info)
 	/* Copy final settings. */
 	bin_info->run_size = actual_run_size;
 	bin_info->nregs = actual_nregs;
-	bin_info->reg0_offset = actual_run_size - (actual_nregs *
-	    bin_info->reg_interval) - pad_size + bin_info->redzone_size;
+	bin_info->reg0_offset = (uint32_t)(actual_run_size - (actual_nregs *
+	    bin_info->reg_interval) - pad_size + bin_info->redzone_size);
 
 	if (actual_run_size > small_maxrun)
 		small_maxrun = actual_run_size;
diff --git a/src/chunk.c b/src/chunk.c
index 3d32a404..9de36eb6 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -732,8 +732,8 @@ chunk_boot(void)
 
 	if (have_dss && chunk_dss_boot())
 		return (true);
-	if (rtree_new(&chunks_rtree, (ZU(1) << (LG_SIZEOF_PTR+3)) -
-	    opt_lg_chunk, chunks_rtree_node_alloc, NULL))
+	if (rtree_new(&chunks_rtree, (unsigned)((ZU(1) << (LG_SIZEOF_PTR+3)) -
+	    opt_lg_chunk), chunks_rtree_node_alloc, NULL))
 		return (true);
 
 	return (false);
diff --git a/src/ckh.c b/src/ckh.c
index 08fc433d..d1cfd234 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -99,7 +99,7 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
 	 * Cycle through the cells in the bucket, starting at a random position.
 	 * The randomness avoids worst-case search overhead as buckets fill up.
 	 */
-	offset = prng_lg_range(&ckh->prng_state, LG_CKH_BUCKET_CELLS);
+	offset = (unsigned)prng_lg_range(&ckh->prng_state, LG_CKH_BUCKET_CELLS);
 	for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
 		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) +
 		    ((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))];
@@ -141,7 +141,8 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
 		 * were an item for which both hashes indicated the same
 		 * bucket.
 		 */
-		i = prng_lg_range(&ckh->prng_state, LG_CKH_BUCKET_CELLS);
+		i = (unsigned)prng_lg_range(&ckh->prng_state,
+		    LG_CKH_BUCKET_CELLS);
 		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
 		assert(cell->key != NULL);
 
@@ -247,8 +248,7 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh)
 {
 	bool ret;
 	ckhc_t *tab, *ttab;
-	size_t lg_curcells;
-	unsigned lg_prevbuckets;
+	unsigned lg_prevbuckets, lg_curcells;
 
 #ifdef CKH_COUNT
 	ckh->ngrows++;
@@ -302,8 +302,8 @@ static void
 ckh_shrink(tsd_t *tsd, ckh_t *ckh)
 {
 	ckhc_t *tab, *ttab;
-	size_t lg_curcells, usize;
-	unsigned lg_prevbuckets;
+	size_t usize;
+	unsigned lg_prevbuckets, lg_curcells;
 
 	/*
 	 * It is possible (though unlikely, given well behaved hashes) that the
diff --git a/src/ctl.c b/src/ctl.c
index e0044336..107bacd6 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1925,7 +1925,7 @@ arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i)
 }
 
 CTL_RO_NL_GEN(arenas_nlruns, nlclasses, unsigned)
-CTL_RO_NL_GEN(arenas_lrun_i_size, index2size(NBINS+mib[2]), size_t)
+CTL_RO_NL_GEN(arenas_lrun_i_size, index2size(NBINS+(szind_t)mib[2]), size_t)
 static const ctl_named_node_t *
 arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i)
 {
@@ -1936,7 +1936,8 @@ arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i)
 }
 
 CTL_RO_NL_GEN(arenas_nhchunks, nhclasses, unsigned)
-CTL_RO_NL_GEN(arenas_hchunk_i_size, index2size(NBINS+nlclasses+mib[2]), size_t)
+CTL_RO_NL_GEN(arenas_hchunk_i_size, index2size(NBINS+nlclasses+(szind_t)mib[2]),
+    size_t)
 static const ctl_named_node_t *
 arenas_hchunk_i_index(const size_t *mib, size_t miblen, size_t i)
 {
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 1acea404..ced27b88 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1396,7 +1396,7 @@ malloc_init_hard_finish(void)
 	 * machinery will fail to allocate memory at far lower limits.
 	 */
 	if (narenas_auto > chunksize / sizeof(arena_t *)) {
-		narenas_auto = chunksize / sizeof(arena_t *);
+		narenas_auto = (unsigned)(chunksize / sizeof(arena_t *));
 		malloc_printf("<jemalloc>: Reducing narenas to limit (%d)\n",
 		    narenas_auto);
 	}
diff --git a/src/tcache.c b/src/tcache.c
index fb1f057f..9f10a745 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -461,7 +461,7 @@ tcaches_create(tsd_t *tsd, unsigned *r_ind)
 		elm = tcaches_avail;
 		tcaches_avail = tcaches_avail->next;
 		elm->tcache = tcache;
-		*r_ind = elm - tcaches;
+		*r_ind = (unsigned)(elm - tcaches);
 	} else {
 		elm = &tcaches[tcaches_past];
 		elm->tcache = tcache;
diff --git a/src/util.c b/src/util.c
index 1373ee15..d519818d 100644
--- a/src/util.c
+++ b/src/util.c
@@ -581,7 +581,8 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap)
 		str[i] = '\0';
 	else
 		str[size - 1] = '\0';
-	ret = i;
+	assert(i < INT_MAX);
+	ret = (int)i;
 
 #undef APPEND_C
 #undef APPEND_S
diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c
index be1b27b7..022e0bf0 100644
--- a/test/integration/rallocx.c
+++ b/test/integration/rallocx.c
@@ -138,22 +138,22 @@ TEST_END
 TEST_BEGIN(test_lg_align_and_zero)
 {
 	void *p, *q;
-	size_t lg_align, sz;
+	unsigned lg_align;
+	size_t sz;
 #define	MAX_LG_ALIGN 25
 #define	MAX_VALIDATE (ZU(1) << 22)
 
-	lg_align = ZU(0);
+	lg_align = 0;
 	p = mallocx(1, MALLOCX_LG_ALIGN(lg_align)|MALLOCX_ZERO);
 	assert_ptr_not_null(p, "Unexpected mallocx() error");
 
 	for (lg_align++; lg_align <= MAX_LG_ALIGN; lg_align++) {
 		q = rallocx(p, 1, MALLOCX_LG_ALIGN(lg_align)|MALLOCX_ZERO);
 		assert_ptr_not_null(q,
-		    "Unexpected rallocx() error for lg_align=%zu", lg_align);
+		    "Unexpected rallocx() error for lg_align=%u", lg_align);
 		assert_ptr_null(
 		    (void *)((uintptr_t)q & ((ZU(1) << lg_align)-1)),
-		    "%p inadequately aligned for lg_align=%zu",
-		    q, lg_align);
+		    "%p inadequately aligned for lg_align=%u", q, lg_align);
 		sz = sallocx(q, 0);
 		if ((sz << 1) <= MAX_VALIDATE) {
 			assert_false(validate_fill(q, 0, 0, sz),
diff --git a/test/unit/hash.c b/test/unit/hash.c
index ea73d701..f50ba81b 100644
--- a/test/unit/hash.c
+++ b/test/unit/hash.c
@@ -35,7 +35,7 @@ typedef enum {
 	hash_variant_x64_128
 } hash_variant_t;
 
-static size_t
+static int
 hash_variant_bits(hash_variant_t variant)
 {
 
@@ -63,7 +63,7 @@ hash_variant_string(hash_variant_t variant)
 static void
 hash_variant_verify_key(hash_variant_t variant, uint8_t *key)
 {
-	const size_t hashbytes = hash_variant_bits(variant) / 8;
+	const int hashbytes = hash_variant_bits(variant) / 8;
 	VARIABLE_ARRAY(uint8_t, hashes, hashbytes * 256);
 	VARIABLE_ARRAY(uint8_t, final, hashbytes);
 	unsigned i;

From b3d0070b1495ddd36893d481c512b5da1ab8acef Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Wed, 24 Feb 2016 13:00:40 -0800
Subject: [PATCH 0119/2608] Compile with -Wshorten-64-to-32.

This will prevent accidental creation of potential integer truncation
bugs when developing on LP64 systems.
---
 configure.ac | 1 +
 1 file changed, 1 insertion(+)

diff --git a/configure.ac b/configure.ac
index da4ee3ac..eb387ed9 100644
--- a/configure.ac
+++ b/configure.ac
@@ -140,6 +140,7 @@ if test "x$CFLAGS" = "x" ; then
     fi
     JE_CFLAGS_APPEND([-Wall])
     JE_CFLAGS_APPEND([-Werror=declaration-after-statement])
+    JE_CFLAGS_APPEND([-Wshorten-64-to-32])
     JE_CFLAGS_APPEND([-pipe])
     JE_CFLAGS_APPEND([-g3])
   elif test "x$je_cv_msvc" = "xyes" ; then

From ca8fffb5c13b6a7c45fd034667a8910c61d09c3b Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 24 Feb 2016 13:16:51 -0800
Subject: [PATCH 0120/2608] Silence miscellaneous 64-to-32-bit data loss
 warnings.

---
 src/prof.c | 2 +-
 src/util.c | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/prof.c b/src/prof.c
index 173da69f..93421abb 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -989,7 +989,7 @@ prof_dump_close(bool propagate_err)
 static bool
 prof_dump_write(bool propagate_err, const char *s)
 {
-	unsigned i, slen, n;
+	size_t i, slen, n;
 
 	cassert(config_prof);
 
diff --git a/src/util.c b/src/util.c
index d519818d..9aaa8062 100644
--- a/src/util.c
+++ b/src/util.c
@@ -53,8 +53,12 @@ wrtmessage(void *cbopaque, const char *s)
 	 * Use syscall(2) rather than write(2) when possible in order to avoid
 	 * the possibility of memory allocation within libc.  This is necessary
 	 * on FreeBSD; most operating systems do not have this problem though.
+	 *
+	 * syscall() returns long or int, depending on platform, so capture the
+	 * unused result in the widest plausible type to avoid compiler
+	 * warnings.
 	 */
-	UNUSED int result = syscall(SYS_write, STDERR_FILENO, s, strlen(s));
+	UNUSED long result = syscall(SYS_write, STDERR_FILENO, s, strlen(s));
 #else
 	UNUSED int result = write(STDERR_FILENO, s, strlen(s));
 #endif

From aa63d5d377b4508b83502e923690d1d7b67c8c88 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 24 Feb 2016 14:01:47 -0800
Subject: [PATCH 0121/2608] Fix ffs_zu() compilation error on MinGW.

This regression was caused by 9f4ee6034c3ac6a8c8b5f9a0d76822fb2fd90c41
(Refactor jemalloc_ffs*() into ffs_*().).
---
 include/jemalloc/internal/util.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index 031f8045..b8885bfa 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -168,10 +168,12 @@ JEMALLOC_ALWAYS_INLINE unsigned
 ffs_zu(size_t bitmap)
 {
 
-#if LG_SIZEOF_PTR == LG_SIZEOF_LONG
-	return (ffs_lu(bitmap));
-#elif LG_SIZEOF_PTR == LG_SIZEOF_INT
+#if LG_SIZEOF_PTR == LG_SIZEOF_INT
 	return (ffs_u(bitmap));
+#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG
+	return (ffs_lu(bitmap));
+#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG_LONG
+	return (ffs_llu(bitmap));
 #else
 #error No implementation for size_t ffs()
 #endif

From f591d2611a311e8d100273fccfeb462c92ae9ce7 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Wed, 24 Feb 2016 14:36:58 -0800
Subject: [PATCH 0122/2608] Update manual to reflect removal of global huge
 object tree.

This resolves #323.
---
 doc/jemalloc.xml.in | 27 +++++++++++----------------
 1 file changed, 11 insertions(+), 16 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 28b5fb78..bbccabd7 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -518,23 +518,18 @@ for (i = 0; i < nbins; i++) {
     common case, but it increases memory usage and fragmentation, since a
     bounded number of objects can remain allocated in each thread cache.</para>
 
-    <para>Memory is conceptually broken into equal-sized chunks, where the
-    chunk size is a power of two that is greater than the page size.  Chunks
-    are always aligned to multiples of the chunk size.  This alignment makes it
-    possible to find metadata for user objects very quickly.</para>
-
-    <para>User objects are broken into three categories according to size:
-    small, large, and huge.  Small and large objects are managed entirely by
-    arenas; huge objects are additionally aggregated in a single data structure
-    that is shared by all threads.  Huge objects are typically used by
-    applications infrequently enough that this single data structure is not a
-    scalability issue.</para>
-
-    <para>Each chunk that is managed by an arena tracks its contents as runs of
+    <para>Memory is conceptually broken into equal-sized chunks, where the chunk
+    size is a power of two that is greater than the page size.  Chunks are
+    always aligned to multiples of the chunk size.  This alignment makes it
+    possible to find metadata for user objects very quickly.  User objects are
+    broken into three categories according to size: small, large, and huge.
+    Multiple small and large objects can reside within a single chunk, whereas
+    huge objects each have one or more chunks backing them.  Each chunk that
+    contains small and/or large objects tracks its contents as runs of
     contiguous pages (unused, backing a set of small objects, or backing one
-    large object).  The combination of chunk alignment and chunk page maps
-    makes it possible to determine all metadata regarding small and large
-    allocations in constant time.</para>
+    large object).  The combination of chunk alignment and chunk page maps makes
+    it possible to determine all metadata regarding small and large allocations
+    in constant time.</para>
 
     <para>Small objects are managed in groups by page runs.  Each run maintains
     a bitmap to track which regions are in use.  Allocation requests that are no

From 5ec703dd33b60924ec39534d3fbc234dfa01b15a Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Wed, 24 Feb 2016 15:35:24 -0800
Subject: [PATCH 0123/2608] Document the heap profile format.

This resolves #258.
---
 doc/jemalloc.xml.in | 50 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index bbccabd7..d7b33582 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1191,7 +1191,8 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         the <command>jeprof</command> command, which is based on the
         <command>pprof</command> that is developed as part of the <ulink
         url="http://code.google.com/p/gperftools/">gperftools
-        package</ulink>.</para></listitem>
+        package</ulink>.  See <link linkend="heap_profile_format">HEAP PROFILE
+        FORMAT</link> for heap profile format documentation.</para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.prof_prefix">
@@ -2623,6 +2624,53 @@ typedef struct {
       </varlistentry>
     </variablelist>
   </refsect1>
+  <refsect1 id="heap_profile_format">
+    <title>HEAP PROFILE FORMAT</title>
+    <para>Although the heap profiling functionality was originally designed to
+    be compatible with the
+    <command>pprof</command> command that is developed as part of the <ulink
+    url="http://code.google.com/p/gperftools/">gperftools
+    package</ulink>, the addition of per thread heap profiling functionality
+    required a different heap profile format.  The <command>jeprof</command>
+    command is derived from <command>pprof</command>, with enhancements to
+    support the heap profile format described here.</para>
+
+    <para>In the following hypothetical heap profile, <constant>[...]</constant>
+    indicates elision for the sake of compactness.  <programlisting><![CDATA[
+heap_v2/524288
+  t*: 28106: 56637512 [0: 0]
+  [...]
+  t3: 352: 16777344 [0: 0]
+  [...]
+  t99: 17754: 29341640 [0: 0]
+  [...]
+@ 0x5f86da8 0x5f5a1dc [...] 0x29e4d4e 0xa200316 0xabb2988 [...]
+  t*: 13: 6688 [0: 0]
+  t3: 12: 6496 [0: ]
+  t99: 1: 192 [0: 0]
+[...]
+
+MAPPED_LIBRARIES:
+[...]]]></programlisting> The following matches the above heap profile, but most
+tokens are replaced with <constant>&lt;description&gt;</constant> to indicate
+descriptions of the corresponding fields.  <programlisting><![CDATA[
+<heap_profile_format_version>/<mean_sample_interval>
+  <aggregate>: <curobjs>: <curbytes> [<cumobjs>: <cumbytes>]
+  [...]
+  <thread_3_aggregate>: <curobjs>: <curbytes>[<cumobjs>: <cumbytes>]
+  [...]
+  <thread_99_aggregate>: <curobjs>: <curbytes>[<cumobjs>: <cumbytes>]
+  [...]
+@ <top_frame> <frame> [...] <frame> <frame> <frame> [...]
+  <backtrace_aggregate>: <curobjs>: <curbytes> [<cumobjs>: <cumbytes>]
+  <backtrace_thread_3>: <curobjs>: <curbytes> [<cumobjs>: <cumbytes>]
+  <backtrace_thread_99>: <curobjs>: <curbytes> [<cumobjs>: <cumbytes>]
+[...]
+
+MAPPED_LIBRARIES:
+</proc/<pid>/maps>]]></programlisting></para>
+  </refsect1>
+
   <refsect1 id="debugging_malloc_problems">
     <title>DEBUGGING MALLOC PROBLEMS</title>
     <para>When debugging, it is a good idea to configure/build jemalloc with

From c7a9a6c86b483d4aebb51bd62d902f4022a7367b Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Wed, 24 Feb 2016 17:18:44 -0800
Subject: [PATCH 0124/2608] Attempt mmap-based in-place huge reallocation.

Attempt mmap-based in-place huge reallocation by plumbing new_addr into
chunk_alloc_mmap().  This can dramatically speed up incremental huge
reallocation.

This resolves #335.
---
 include/jemalloc/internal/chunk_mmap.h |  4 ++--
 src/chunk.c                            | 11 ++++-------
 src/chunk_mmap.c                       | 10 ++++++----
 3 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/chunk_mmap.h b/include/jemalloc/internal/chunk_mmap.h
index 7d8014c5..6f2d0ac2 100644
--- a/include/jemalloc/internal/chunk_mmap.h
+++ b/include/jemalloc/internal/chunk_mmap.h
@@ -9,8 +9,8 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-void	*chunk_alloc_mmap(size_t size, size_t alignment, bool *zero,
-    bool *commit);
+void	*chunk_alloc_mmap(void *new_addr, size_t size, size_t alignment,
+    bool *zero, bool *commit);
 bool	chunk_dalloc_mmap(void *chunk, size_t size);
 
 #endif /* JEMALLOC_H_EXTERNS */
diff --git a/src/chunk.c b/src/chunk.c
index 9de36eb6..6a107e1d 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -350,12 +350,9 @@ chunk_alloc_core(arena_t *arena, void *new_addr, size_t size, size_t alignment,
 	    chunk_alloc_dss(arena, new_addr, size, alignment, zero, commit)) !=
 	    NULL)
 		return (ret);
-	/*
-	 * mmap.  Requesting an address is not implemented for
-	 * chunk_alloc_mmap(), so only call it if (new_addr == NULL).
-	 */
-	if (new_addr == NULL && (ret = chunk_alloc_mmap(size, alignment, zero,
-	    commit)) != NULL)
+	/* mmap. */
+	if ((ret = chunk_alloc_mmap(new_addr, size, alignment, zero, commit)) !=
+	    NULL)
 		return (ret);
 	/* "secondary" dss. */
 	if (have_dss && dss_prec == dss_prec_secondary && (ret =
@@ -380,7 +377,7 @@ chunk_alloc_base(size_t size)
 	 */
 	zero = true;
 	commit = true;
-	ret = chunk_alloc_mmap(size, chunksize, &zero, &commit);
+	ret = chunk_alloc_mmap(NULL, size, chunksize, &zero, &commit);
 	if (ret == NULL)
 		return (NULL);
 	if (config_valgrind)
diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c
index b9ba7419..56b2ee42 100644
--- a/src/chunk_mmap.c
+++ b/src/chunk_mmap.c
@@ -32,7 +32,8 @@ chunk_alloc_mmap_slow(size_t size, size_t alignment, bool *zero, bool *commit)
 }
 
 void *
-chunk_alloc_mmap(size_t size, size_t alignment, bool *zero, bool *commit)
+chunk_alloc_mmap(void *new_addr, size_t size, size_t alignment, bool *zero,
+    bool *commit)
 {
 	void *ret;
 	size_t offset;
@@ -53,9 +54,10 @@ chunk_alloc_mmap(size_t size, size_t alignment, bool *zero, bool *commit)
 	assert(alignment != 0);
 	assert((alignment & chunksize_mask) == 0);
 
-	ret = pages_map(NULL, size);
-	if (ret == NULL)
-		return (NULL);
+	ret = pages_map(new_addr, size);
+	if (ret == NULL || ret == new_addr)
+		return (ret);
+	assert(new_addr == NULL);
 	offset = ALIGNMENT_ADDR2OFFSET(ret, alignment);
 	if (offset != 0) {
 		pages_unmap(ret, size);

From cd86c1481ad7356a7bbcd14549e938769f474fd6 Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Wed, 24 Feb 2016 11:02:49 -0800
Subject: [PATCH 0125/2608] Fix arena_run_first_best_fit

Merge of 3417a304ccde61ac1f68b436ec22c03f1d6824ec looks like a small
bug: first_best_fit doesn't scan through all the classes, since ind is
offset from runs_avail_nclasses by run_avail_bias.
---
 src/arena.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arena.c b/src/arena.c
index 987e2064..3b125b05 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1079,7 +1079,7 @@ arena_run_first_best_fit(arena_t *arena, size_t size)
 	szind_t ind, i;
 
 	ind = size2index(run_quantize_ceil(size));
-	for (i = ind; i < runs_avail_nclasses; i++) {
+	for (i = ind; i < runs_avail_nclasses + runs_avail_bias; i++) {
 		arena_chunk_map_misc_t *miscelm = arena_run_tree_first(
 		    arena_runs_avail_get(arena, i));
 		if (miscelm != NULL)

From 38127291670af8d12a21eb78ba49201f3a5af7d1 Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Wed, 24 Feb 2016 20:10:02 -0800
Subject: [PATCH 0126/2608] Fix arena_size computation.

Fix arena_size arena_new() computation to incorporate
runs_avail_nclasses elements for runs_avail, rather than
(runs_avail_nclasses - 1) elements.  Since offsetof(arena_t, runs_avail)
is used rather than sizeof(arena_t) for the first term of the
computation, all of the runs_avail elements must be added into the
second term.

This bug was introduced (by Jason Evans) while merging pull request #330
as 3417a304ccde61ac1f68b436ec22c03f1d6824ec (Separate arena_avail
trees).
---
 src/arena.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arena.c b/src/arena.c
index 3b125b05..ad675d13 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -3271,7 +3271,7 @@ arena_new(unsigned ind)
 
 	/* Compute arena size to incorporate sufficient runs_avail elements. */
 	arena_size = offsetof(arena_t, runs_avail) + (sizeof(arena_run_tree_t) *
-	    (runs_avail_nclasses - 1));
+	    runs_avail_nclasses);
 	/*
 	 * Allocate arena, arena->lstats, and arena->hstats contiguously, mainly
 	 * because there is no way to clean up if base_alloc() OOMs.

From 767d85061a6fb88ec977bbcd9b429a43aff391e6 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Wed, 24 Feb 2016 23:58:10 -0800
Subject: [PATCH 0127/2608] Refactor arenas array (fixes deadlock).

Refactor the arenas array, which contains pointers to all extant arenas,
such that it starts out as a sparse array of maximum size, and use
double-checked atomics-based reads as the basis for fast and simple
arena_get().  Additionally, reduce arenas_lock's role such that it only
protects against arena initalization races.  These changes remove the
possibility for arena lookups to trigger locking, which resolves at
least one known (fork-related) deadlock.

This resolves #315.
---
 include/jemalloc/internal/arena.h             |   8 +-
 include/jemalloc/internal/atomic.h            |   4 +-
 .../jemalloc/internal/jemalloc_internal.h.in  |  37 +--
 include/jemalloc/internal/private_symbols.txt |   7 +-
 src/arena.c                                   |  21 ++
 src/chunk.c                                   |   4 +-
 src/ctl.c                                     |  43 +--
 src/jemalloc.c                                | 247 +++++++-----------
 src/tcache.c                                  |   5 +-
 9 files changed, 162 insertions(+), 214 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 59b480b5..470eee65 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -290,14 +290,14 @@ struct arena_s {
 
 	/*
 	 * Number of threads currently assigned to this arena.  This field is
-	 * protected by arenas_lock.
+	 * synchronized via atomic operations.
 	 */
 	unsigned		nthreads;
 
 	/*
 	 * There are three classes of arena operations from a locking
 	 * perspective:
-	 * 1) Thread assignment (modifies nthreads) is protected by arenas_lock.
+	 * 1) Thread assignment (modifies nthreads) is synchronized via atomics.
 	 * 2) Bin-related operations are protected by bin locks.
 	 * 3) Chunk- and run-related operations are protected by this mutex.
 	 */
@@ -465,7 +465,6 @@ struct arena_s {
 
 /* Used in conjunction with tsd for fast arena-related context lookup. */
 struct arena_tdata_s {
-	arena_t			*arena;
 	ticker_t		decay_ticker;
 };
 #endif /* JEMALLOC_ARENA_STRUCTS_B */
@@ -578,6 +577,9 @@ void	arena_stats_merge(arena_t *arena, const char **dss,
     ssize_t *lg_dirty_mult, ssize_t *decay_time, size_t *nactive,
     size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats,
     malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats);
+unsigned	arena_nthreads_get(arena_t *arena);
+void	arena_nthreads_inc(arena_t *arena);
+void	arena_nthreads_dec(arena_t *arena);
 arena_t	*arena_new(unsigned ind);
 bool	arena_boot(void);
 void	arena_prefork(arena_t *arena);
diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h
index a9aad35d..3f15ea14 100644
--- a/include/jemalloc/internal/atomic.h
+++ b/include/jemalloc/internal/atomic.h
@@ -28,8 +28,8 @@
  * callers.
  *
  *   <t> atomic_read_<t>(<t> *p) { return (*p); }
- *   <t> atomic_add_<t>(<t> *p, <t> x) { return (*p + x); }
- *   <t> atomic_sub_<t>(<t> *p, <t> x) { return (*p - x); }
+ *   <t> atomic_add_<t>(<t> *p, <t> x) { return (*p += x); }
+ *   <t> atomic_sub_<t>(<t> *p, <t> x) { return (*p -= x); }
  *   bool atomic_cas_<t>(<t> *p, <t> c, <t> s)
  *   {
  *     if (*p != c)
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index ffad04ba..611ed36a 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -438,7 +438,13 @@ extern unsigned	opt_narenas;
 extern bool	in_valgrind;
 
 /* Number of CPUs. */
-extern unsigned		ncpus;
+extern unsigned	ncpus;
+
+/*
+ * Arenas that are used to service external requests.  Not all elements of the
+ * arenas array are necessarily used; arenas are created lazily as needed.
+ */
+extern arena_t	**arenas;
 
 /*
  * index2size_tab encodes the same information as could be computed (at
@@ -452,21 +458,17 @@ extern size_t const	index2size_tab[NSIZES+1];
  */
 extern uint8_t const	size2index_tab[];
 
-arena_t	*a0get(void);
 void	*a0malloc(size_t size);
 void	a0dalloc(void *ptr);
 void	*bootstrap_malloc(size_t size);
 void	*bootstrap_calloc(size_t num, size_t size);
 void	bootstrap_free(void *ptr);
 arena_t	*arenas_extend(unsigned ind);
-arena_t	*arena_init(unsigned ind);
 unsigned	narenas_total_get(void);
+arena_t	*arena_init(unsigned ind);
 arena_tdata_t	*arena_tdata_get_hard(tsd_t *tsd, unsigned ind);
-arena_t	*arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing,
-    arena_tdata_t *tdata);
 arena_t	*arena_choose_hard(tsd_t *tsd);
 void	arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind);
-unsigned	arena_nbound(unsigned ind);
 void	thread_allocated_cleanup(tsd_t *tsd);
 void	thread_deallocated_cleanup(tsd_t *tsd);
 void	arena_cleanup(tsd_t *tsd);
@@ -543,8 +545,7 @@ size_t	sa2u(size_t size, size_t alignment);
 arena_t	*arena_choose(tsd_t *tsd, arena_t *arena);
 arena_tdata_t	*arena_tdata_get(tsd_t *tsd, unsigned ind,
     bool refresh_if_missing);
-arena_t	*arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing,
-    bool refresh_if_missing);
+arena_t	*arena_get(unsigned ind, bool init_if_missing);
 ticker_t	*decay_ticker_get(tsd_t *tsd, unsigned ind);
 #endif
 
@@ -819,19 +820,19 @@ arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing)
 }
 
 JEMALLOC_INLINE arena_t *
-arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing,
-    bool refresh_if_missing)
+arena_get(unsigned ind, bool init_if_missing)
 {
-	arena_tdata_t *tdata;
+	arena_t *ret;
 
-	/* init_if_missing requires refresh_if_missing. */
-	assert(!init_if_missing || refresh_if_missing);
+	assert(ind <= MALLOCX_ARENA_MAX);
 
-	tdata = arena_tdata_get(tsd, ind, refresh_if_missing);
-	if (unlikely(tdata == NULL || tdata->arena == NULL))
-		return (arena_get_hard(tsd, ind, init_if_missing, tdata));
-
-	return (tdata->arena);
+	ret = arenas[ind];
+	if (unlikely(ret == NULL)) {
+		ret = atomic_read_p((void *)&arenas[ind]);
+		if (init_if_missing && unlikely(ret == NULL))
+			ret = arena_init(ind);
+	}
+	return (ret);
 }
 
 JEMALLOC_INLINE ticker_t *
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index adab8a5c..d716b82d 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -1,5 +1,4 @@
 a0dalloc
-a0get
 a0malloc
 arena_aalloc
 arena_alloc_junk_small
@@ -34,7 +33,6 @@ arena_decay_ticks
 arena_dss_prec_get
 arena_dss_prec_set
 arena_get
-arena_get_hard
 arena_init
 arena_lg_dirty_mult_default_get
 arena_lg_dirty_mult_default_set
@@ -73,10 +71,12 @@ arena_migrate
 arena_miscelm_get
 arena_miscelm_to_pageind
 arena_miscelm_to_rpages
-arena_nbound
 arena_new
 arena_node_alloc
 arena_node_dalloc
+arena_nthreads_dec
+arena_nthreads_get
+arena_nthreads_inc
 arena_palloc
 arena_postfork_child
 arena_postfork_parent
@@ -106,6 +106,7 @@ arena_stats_merge
 arena_tcache_fill_small
 arena_tdata_get
 arena_tdata_get_hard
+arenas
 atomic_add_p
 atomic_add_u
 atomic_add_uint32
diff --git a/src/arena.c b/src/arena.c
index ad675d13..3f394681 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -3261,6 +3261,27 @@ arena_stats_merge(arena_t *arena, const char **dss, ssize_t *lg_dirty_mult,
 	}
 }
 
+unsigned
+arena_nthreads_get(arena_t *arena)
+{
+
+	return (atomic_read_u(&arena->nthreads));
+}
+
+void
+arena_nthreads_inc(arena_t *arena)
+{
+
+	atomic_add_u(&arena->nthreads, 1);
+}
+
+void
+arena_nthreads_dec(arena_t *arena)
+{
+
+	atomic_sub_u(&arena->nthreads, 1);
+}
+
 arena_t *
 arena_new(unsigned ind)
 {
diff --git a/src/chunk.c b/src/chunk.c
index 6a107e1d..26622ced 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -415,9 +415,7 @@ chunk_arena_get(unsigned arena_ind)
 {
 	arena_t *arena;
 
-	/* Dodge tsd for a0 in order to avoid bootstrapping issues. */
-	arena = (arena_ind == 0) ? a0get() : arena_get(tsd_fetch(), arena_ind,
-	     false, true);
+	arena = arena_get(arena_ind, false);
 	/*
 	 * The arena we're allocating on behalf of must have been initialized
 	 * already.
diff --git a/src/ctl.c b/src/ctl.c
index 107bacd6..dbf57c36 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -694,9 +694,7 @@ ctl_grow(void)
 static void
 ctl_refresh(void)
 {
-	tsd_t *tsd;
 	unsigned i;
-	bool refreshed;
 	VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas);
 
 	/*
@@ -706,19 +704,14 @@ ctl_refresh(void)
 	ctl_stats.arenas[ctl_stats.narenas].nthreads = 0;
 	ctl_arena_clear(&ctl_stats.arenas[ctl_stats.narenas]);
 
-	tsd = tsd_fetch();
-	for (i = 0, refreshed = false; i < ctl_stats.narenas; i++) {
-		tarenas[i] = arena_get(tsd, i, false, false);
-		if (tarenas[i] == NULL && !refreshed) {
-			tarenas[i] = arena_get(tsd, i, false, true);
-			refreshed = true;
-		}
-	}
+	for (i = 0; i < ctl_stats.narenas; i++)
+		tarenas[i] = arena_get(i, false);
 
 	for (i = 0; i < ctl_stats.narenas; i++) {
-		if (tarenas[i] != NULL)
-			ctl_stats.arenas[i].nthreads = arena_nbound(i);
-		else
+		if (tarenas[i] != NULL) {
+			ctl_stats.arenas[i].nthreads =
+			    arena_nthreads_get(arena_get(i, false));
+		} else
 			ctl_stats.arenas[i].nthreads = 0;
 	}
 
@@ -1332,7 +1325,7 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 		}
 
 		/* Initialize arena if necessary. */
-		newarena = arena_get(tsd, newind, true, true);
+		newarena = arena_get(newind, true);
 		if (newarena == NULL) {
 			ret = EAGAIN;
 			goto label_return;
@@ -1560,22 +1553,14 @@ arena_i_purge(unsigned arena_ind, bool all)
 
 	malloc_mutex_lock(&ctl_mtx);
 	{
-		tsd_t *tsd = tsd_fetch();
 		unsigned narenas = ctl_stats.narenas;
 
 		if (arena_ind == narenas) {
 			unsigned i;
-			bool refreshed;
 			VARIABLE_ARRAY(arena_t *, tarenas, narenas);
 
-			for (i = 0, refreshed = false; i < narenas; i++) {
-				tarenas[i] = arena_get(tsd, i, false, false);
-				if (tarenas[i] == NULL && !refreshed) {
-					tarenas[i] = arena_get(tsd, i, false,
-					    true);
-					refreshed = true;
-				}
-			}
+			for (i = 0; i < narenas; i++)
+				tarenas[i] = arena_get(i, false);
 
 			/*
 			 * No further need to hold ctl_mtx, since narenas and
@@ -1592,7 +1577,7 @@ arena_i_purge(unsigned arena_ind, bool all)
 
 			assert(arena_ind < narenas);
 
-			tarena = arena_get(tsd, arena_ind, false, true);
+			tarena = arena_get(arena_ind, false);
 
 			/* No further need to hold ctl_mtx. */
 			malloc_mutex_unlock(&ctl_mtx);
@@ -1664,7 +1649,7 @@ arena_i_dss_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	}
 
 	if (arena_ind < ctl_stats.narenas) {
-		arena_t *arena = arena_get(tsd_fetch(), arena_ind, false, true);
+		arena_t *arena = arena_get(arena_ind, false);
 		if (arena == NULL || (dss_prec != dss_prec_limit &&
 		    arena_dss_prec_set(arena, dss_prec))) {
 			ret = EFAULT;
@@ -1697,7 +1682,7 @@ arena_i_lg_dirty_mult_ctl(const size_t *mib, size_t miblen, void *oldp,
 	unsigned arena_ind = (unsigned)mib[1];
 	arena_t *arena;
 
-	arena = arena_get(tsd_fetch(), arena_ind, false, true);
+	arena = arena_get(arena_ind, false);
 	if (arena == NULL) {
 		ret = EFAULT;
 		goto label_return;
@@ -1731,7 +1716,7 @@ arena_i_decay_time_ctl(const size_t *mib, size_t miblen, void *oldp,
 	unsigned arena_ind = (unsigned)mib[1];
 	arena_t *arena;
 
-	arena = arena_get(tsd_fetch(), arena_ind, false, true);
+	arena = arena_get(arena_ind, false);
 	if (arena == NULL) {
 		ret = EFAULT;
 		goto label_return;
@@ -1767,7 +1752,7 @@ arena_i_chunk_hooks_ctl(const size_t *mib, size_t miblen, void *oldp,
 
 	malloc_mutex_lock(&ctl_mtx);
 	if (arena_ind < narenas_total_get() && (arena =
-	    arena_get(tsd_fetch(), arena_ind, false, true)) != NULL) {
+	    arena_get(arena_ind, false)) != NULL) {
 		if (newp != NULL) {
 			chunk_hooks_t old_chunk_hooks, new_chunk_hooks;
 			WRITE(new_chunk_hooks, chunk_hooks_t);
diff --git a/src/jemalloc.c b/src/jemalloc.c
index ced27b88..86032a40 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -47,7 +47,7 @@ bool	in_valgrind;
 
 unsigned	ncpus;
 
-/* Protects arenas initialization (arenas, narenas_total). */
+/* Protects arenas initialization. */
 static malloc_mutex_t	arenas_lock;
 /*
  * Arenas that are used to service external requests.  Not all elements of the
@@ -57,8 +57,8 @@ static malloc_mutex_t	arenas_lock;
  * arenas.  arenas[narenas_auto..narenas_total) are only used if the application
  * takes some action to create them and allocate from them.
  */
-static arena_t		**arenas;
-static unsigned		narenas_total;
+arena_t			**arenas;
+static unsigned		narenas_total; /* Use narenas_total_*(). */
 static arena_t		*a0; /* arenas[0]; read-only after initialization. */
 static unsigned		narenas_auto; /* Read-only after initialization. */
 
@@ -311,14 +311,6 @@ malloc_init(void)
  * cannot tolerate TLS variable access.
  */
 
-arena_t *
-a0get(void)
-{
-
-	assert(a0 != NULL);
-	return (a0);
-}
-
 static void *
 a0ialloc(size_t size, bool zero, bool is_metadata)
 {
@@ -327,7 +319,7 @@ a0ialloc(size_t size, bool zero, bool is_metadata)
 		return (NULL);
 
 	return (iallocztm(NULL, size, size2index(size), zero, false,
-	    is_metadata, a0get(), true));
+	    is_metadata, arena_get(0, false), true));
 }
 
 static void
@@ -391,47 +383,59 @@ bootstrap_free(void *ptr)
 	a0idalloc(ptr, false);
 }
 
+static void
+arena_set(unsigned ind, arena_t *arena)
+{
+
+	atomic_write_p((void **)&arenas[ind], arena);
+}
+
+static void
+narenas_total_set(unsigned narenas)
+{
+
+	atomic_write_u(&narenas_total, narenas);
+}
+
+static void
+narenas_total_inc(void)
+{
+
+	atomic_add_u(&narenas_total, 1);
+}
+
+unsigned
+narenas_total_get(void)
+{
+
+	return (atomic_read_u(&narenas_total));
+}
+
 /* Create a new arena and insert it into the arenas array at index ind. */
 static arena_t *
 arena_init_locked(unsigned ind)
 {
 	arena_t *arena;
 
-	/* Expand arenas if necessary. */
-	assert(ind <= narenas_total);
+	assert(ind <= narenas_total_get());
 	if (ind > MALLOCX_ARENA_MAX)
 		return (NULL);
-	if (ind == narenas_total) {
-		unsigned narenas_new = narenas_total + 1;
-		arena_t **arenas_new =
-		    (arena_t **)a0malloc(CACHELINE_CEILING(narenas_new *
-		    sizeof(arena_t *)));
-		if (arenas_new == NULL)
-			return (NULL);
-		memcpy(arenas_new, arenas, narenas_total * sizeof(arena_t *));
-		arenas_new[ind] = NULL;
-		/*
-		 * Deallocate only if arenas came from a0malloc() (not
-		 * base_alloc()).
-		 */
-		if (narenas_total != narenas_auto)
-			a0dalloc(arenas);
-		arenas = arenas_new;
-		narenas_total = narenas_new;
-	}
+	if (ind == narenas_total_get())
+		narenas_total_inc();
 
 	/*
 	 * Another thread may have already initialized arenas[ind] if it's an
 	 * auto arena.
 	 */
-	arena = arenas[ind];
+	arena = arena_get(ind, false);
 	if (arena != NULL) {
 		assert(ind < narenas_auto);
 		return (arena);
 	}
 
 	/* Actually initialize the arena. */
-	arena = arenas[ind] = arena_new(ind);
+	arena = arena_new(ind);
+	arena_set(ind, arena);
 	return (arena);
 }
 
@@ -446,37 +450,16 @@ arena_init(unsigned ind)
 	return (arena);
 }
 
-unsigned
-narenas_total_get(void)
-{
-	unsigned narenas;
-
-	malloc_mutex_lock(&arenas_lock);
-	narenas = narenas_total;
-	malloc_mutex_unlock(&arenas_lock);
-
-	return (narenas);
-}
-
-static void
-arena_bind_locked(tsd_t *tsd, unsigned ind)
-{
-	arena_t *arena;
-
-	arena = arenas[ind];
-	arena->nthreads++;
-
-	if (tsd_nominal(tsd))
-		tsd_arena_set(tsd, arena);
-}
-
 static void
 arena_bind(tsd_t *tsd, unsigned ind)
 {
+	arena_t *arena;
 
-	malloc_mutex_lock(&arenas_lock);
-	arena_bind_locked(tsd, ind);
-	malloc_mutex_unlock(&arenas_lock);
+	arena = arena_get(ind, false);
+	arena_nthreads_inc(arena);
+
+	if (tsd_nominal(tsd))
+		tsd_arena_set(tsd, arena);
 }
 
 void
@@ -484,35 +467,20 @@ arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind)
 {
 	arena_t *oldarena, *newarena;
 
-	malloc_mutex_lock(&arenas_lock);
-	oldarena = arenas[oldind];
-	newarena = arenas[newind];
-	oldarena->nthreads--;
-	newarena->nthreads++;
-	malloc_mutex_unlock(&arenas_lock);
+	oldarena = arena_get(oldind, false);
+	newarena = arena_get(newind, false);
+	arena_nthreads_dec(oldarena);
+	arena_nthreads_inc(newarena);
 	tsd_arena_set(tsd, newarena);
 }
 
-unsigned
-arena_nbound(unsigned ind)
-{
-	unsigned nthreads;
-
-	malloc_mutex_lock(&arenas_lock);
-	nthreads = arenas[ind]->nthreads;
-	malloc_mutex_unlock(&arenas_lock);
-	return (nthreads);
-}
-
 static void
 arena_unbind(tsd_t *tsd, unsigned ind)
 {
 	arena_t *arena;
 
-	malloc_mutex_lock(&arenas_lock);
-	arena = arenas[ind];
-	arena->nthreads--;
-	malloc_mutex_unlock(&arenas_lock);
+	arena = arena_get(ind, false);
+	arena_nthreads_dec(arena);
 	tsd_arena_set(tsd, NULL);
 }
 
@@ -568,14 +536,6 @@ arena_tdata_get_hard(tsd_t *tsd, unsigned ind)
 	 * the arenas.extend mallctl, which we trust mallctl synchronization to
 	 * prevent.
 	 */
-	malloc_mutex_lock(&arenas_lock);
-	for (i = 0; i < narenas_actual; i++)
-		arenas_tdata[i].arena = arenas[i];
-	malloc_mutex_unlock(&arenas_lock);
-	if (narenas_tdata > narenas_actual) {
-		memset(&arenas_tdata[narenas_actual], 0, sizeof(arena_tdata_t)
-		    * (narenas_tdata - narenas_actual));
-	}
 
 	/* Copy/initialize tickers. */
 	for (i = 0; i < narenas_actual; i++) {
@@ -587,6 +547,10 @@ arena_tdata_get_hard(tsd_t *tsd, unsigned ind)
 			    DECAY_NTICKS_PER_UPDATE);
 		}
 	}
+	if (narenas_tdata > narenas_actual) {
+		memset(&arenas_tdata[narenas_actual], 0, sizeof(arena_tdata_t)
+		    * (narenas_tdata - narenas_actual));
+	}
 
 	/* Read the refreshed tdata array. */
 	tdata = &arenas_tdata[ind];
@@ -596,33 +560,6 @@ label_return:
 	return (tdata);
 }
 
-arena_t *
-arena_get_hard(tsd_t *tsd, unsigned ind, bool init_if_missing,
-    arena_tdata_t *tdata)
-{
-	arena_t *arena;
-	unsigned narenas_actual;
-
-	if (init_if_missing && tdata != NULL) {
-		tdata->arena = arena_init(ind);
-		if (tdata->arena != NULL)
-			return (tdata->arena);
-	}
-
-	/*
-	 * This function must always tell the truth, even if it's slow, so don't
-	 * let OOM, thread cleanup (note tsd_nominal check), nor recursive
-	 * allocation avoidance (note arenas_tdata_bypass check) get in the way.
-	 */
-	narenas_actual = narenas_total_get();
-	if (ind >= narenas_actual)
-		return (NULL);
-	malloc_mutex_lock(&arenas_lock);
-	arena = arenas[ind];
-	malloc_mutex_unlock(&arenas_lock);
-	return (arena);
-}
-
 /* Slow path, called only by arena_choose(). */
 arena_t *
 arena_choose_hard(tsd_t *tsd)
@@ -635,15 +572,16 @@ arena_choose_hard(tsd_t *tsd)
 		choose = 0;
 		first_null = narenas_auto;
 		malloc_mutex_lock(&arenas_lock);
-		assert(a0get() != NULL);
+		assert(arena_get(0, false) != NULL);
 		for (i = 1; i < narenas_auto; i++) {
-			if (arenas[i] != NULL) {
+			if (arena_get(i, false) != NULL) {
 				/*
 				 * Choose the first arena that has the lowest
 				 * number of threads assigned to it.
 				 */
-				if (arenas[i]->nthreads <
-				    arenas[choose]->nthreads)
+				if (arena_nthreads_get(arena_get(i, false)) <
+				    arena_nthreads_get(arena_get(choose,
+				    false)))
 					choose = i;
 			} else if (first_null == narenas_auto) {
 				/*
@@ -659,13 +597,13 @@ arena_choose_hard(tsd_t *tsd)
 			}
 		}
 
-		if (arenas[choose]->nthreads == 0
+		if (arena_nthreads_get(arena_get(choose, false)) == 0
 		    || first_null == narenas_auto) {
 			/*
 			 * Use an unloaded arena, or the least loaded arena if
 			 * all arenas are already initialized.
 			 */
-			ret = arenas[choose];
+			ret = arena_get(choose, false);
 		} else {
 			/* Initialize a new arena. */
 			choose = first_null;
@@ -675,10 +613,10 @@ arena_choose_hard(tsd_t *tsd)
 				return (NULL);
 			}
 		}
-		arena_bind_locked(tsd, choose);
+		arena_bind(tsd, choose);
 		malloc_mutex_unlock(&arenas_lock);
 	} else {
-		ret = a0get();
+		ret = arena_get(0, false);
 		arena_bind(tsd, 0);
 	}
 
@@ -750,7 +688,7 @@ stats_print_atexit(void)
 		 * continue to allocate.
 		 */
 		for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
-			arena_t *arena = arenas[i];
+			arena_t *arena = arena_get(i, false);
 			if (arena != NULL) {
 				tcache_t *tcache;
 
@@ -1309,7 +1247,8 @@ malloc_init_hard_a0_locked(void)
 	 * Create enough scaffolding to allow recursive allocation in
 	 * malloc_ncpus().
 	 */
-	narenas_total = narenas_auto = 1;
+	narenas_auto = 1;
+	narenas_total_set(narenas_auto);
 	arenas = &a0;
 	memset(arenas, 0, sizeof(arena_t *) * narenas_auto);
 	/*
@@ -1391,28 +1330,22 @@ malloc_init_hard_finish(void)
 	}
 	narenas_auto = opt_narenas;
 	/*
-	 * Make sure that the arenas array can be allocated.  In practice, this
-	 * limit is enough to allow the allocator to function, but the ctl
-	 * machinery will fail to allocate memory at far lower limits.
+	 * Limit the number of arenas to the indexing range of MALLOCX_ARENA().
 	 */
-	if (narenas_auto > chunksize / sizeof(arena_t *)) {
-		narenas_auto = (unsigned)(chunksize / sizeof(arena_t *));
+	if (narenas_auto > MALLOCX_ARENA_MAX) {
+		narenas_auto = MALLOCX_ARENA_MAX;
 		malloc_printf("<jemalloc>: Reducing narenas to limit (%d)\n",
 		    narenas_auto);
 	}
-	narenas_total = narenas_auto;
+	narenas_total_set(narenas_auto);
 
 	/* Allocate and initialize arenas. */
-	arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas_total);
+	arenas = (arena_t **)base_alloc(sizeof(arena_t *) *
+	    (MALLOCX_ARENA_MAX+1));
 	if (arenas == NULL)
 		return (true);
-	/*
-	 * Zero the array.  In practice, this should always be pre-zeroed,
-	 * since it was just mmap()ed, but let's be sure.
-	 */
-	memset(arenas, 0, sizeof(arena_t *) * narenas_total);
 	/* Copy the pointer to the one arena that was already initialized. */
-	arenas[0] = a0;
+	arena_set(0, a0);
 
 	malloc_init_state = malloc_init_initialized;
 	malloc_slow_flag_init();
@@ -2084,7 +2017,7 @@ imallocx_flags_decode_hard(tsd_t *tsd, size_t size, int flags, size_t *usize,
 		*tcache = tcache_get(tsd, true);
 	if ((flags & MALLOCX_ARENA_MASK) != 0) {
 		unsigned arena_ind = MALLOCX_ARENA_GET(flags);
-		*arena = arena_get(tsd, arena_ind, true, true);
+		*arena = arena_get(arena_ind, true);
 		if (unlikely(*arena == NULL))
 			return (true);
 	} else
@@ -2325,7 +2258,7 @@ je_rallocx(void *ptr, size_t size, int flags)
 
 	if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) {
 		unsigned arena_ind = MALLOCX_ARENA_GET(flags);
-		arena = arena_get(tsd, arena_ind, true, true);
+		arena = arena_get(arena_ind, true);
 		if (unlikely(arena == NULL))
 			goto label_oom;
 	} else
@@ -2677,7 +2610,7 @@ JEMALLOC_EXPORT void
 _malloc_prefork(void)
 #endif
 {
-	unsigned i;
+	unsigned i, narenas;
 
 #ifdef JEMALLOC_MUTEX_INIT_CB
 	if (!malloc_initialized())
@@ -2689,9 +2622,11 @@ _malloc_prefork(void)
 	ctl_prefork();
 	prof_prefork();
 	malloc_mutex_prefork(&arenas_lock);
-	for (i = 0; i < narenas_total; i++) {
-		if (arenas[i] != NULL)
-			arena_prefork(arenas[i]);
+	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
+		arena_t *arena;
+
+		if ((arena = arena_get(i, false)) != NULL)
+			arena_prefork(arena);
 	}
 	chunk_prefork();
 	base_prefork();
@@ -2705,7 +2640,7 @@ JEMALLOC_EXPORT void
 _malloc_postfork(void)
 #endif
 {
-	unsigned i;
+	unsigned i, narenas;
 
 #ifdef JEMALLOC_MUTEX_INIT_CB
 	if (!malloc_initialized())
@@ -2716,9 +2651,11 @@ _malloc_postfork(void)
 	/* Release all mutexes, now that fork() has completed. */
 	base_postfork_parent();
 	chunk_postfork_parent();
-	for (i = 0; i < narenas_total; i++) {
-		if (arenas[i] != NULL)
-			arena_postfork_parent(arenas[i]);
+	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
+		arena_t *arena;
+
+		if ((arena = arena_get(i, false)) != NULL)
+			arena_postfork_parent(arena);
 	}
 	malloc_mutex_postfork_parent(&arenas_lock);
 	prof_postfork_parent();
@@ -2728,16 +2665,18 @@ _malloc_postfork(void)
 void
 jemalloc_postfork_child(void)
 {
-	unsigned i;
+	unsigned i, narenas;
 
 	assert(malloc_initialized());
 
 	/* Release all mutexes, now that fork() has completed. */
 	base_postfork_child();
 	chunk_postfork_child();
-	for (i = 0; i < narenas_total; i++) {
-		if (arenas[i] != NULL)
-			arena_postfork_child(arenas[i]);
+	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
+		arena_t *arena;
+
+		if ((arena = arena_get(i, false)) != NULL)
+			arena_postfork_child(arena);
 	}
 	malloc_mutex_postfork_child(&arenas_lock);
 	prof_postfork_child();
diff --git a/src/tcache.c b/src/tcache.c
index 9f10a745..6e32f404 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -325,7 +325,8 @@ tcache_create(tsd_t *tsd, arena_t *arena)
 	/* Avoid false cacheline sharing. */
 	size = sa2u(size, CACHELINE);
 
-	tcache = ipallocztm(tsd, size, CACHELINE, true, false, true, a0get());
+	tcache = ipallocztm(tsd, size, CACHELINE, true, false, true,
+	    arena_get(0, false));
 	if (tcache == NULL)
 		return (NULL);
 
@@ -453,7 +454,7 @@ tcaches_create(tsd_t *tsd, unsigned *r_ind)
 
 	if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX)
 		return (true);
-	tcache = tcache_create(tsd, a0get());
+	tcache = tcache_create(tsd, arena_get(0, false));
 	if (tcache == NULL)
 		return (true);
 

From 0c516a00c4cb28cff55ce0995f756b5aae074c9e Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Thu, 25 Feb 2016 15:29:49 -0800
Subject: [PATCH 0128/2608] Make *allocx() size class overflow behavior
 defined.

Limit supported size and alignment to HUGE_MAXCLASS, which in turn is
now limited to be less than PTRDIFF_MAX.

This resolves #278 and #295.
---
 doc/jemalloc.xml.in                           | 14 ++--
 include/jemalloc/internal/arena.h             |  3 +-
 include/jemalloc/internal/huge.h              |  4 +-
 .../jemalloc/internal/jemalloc_internal.h.in  | 15 ++--
 include/jemalloc/internal/size_classes.sh     |  4 +-
 include/jemalloc/internal/tcache.h            | 10 +--
 include/jemalloc/jemalloc_macros.h.in         |  3 +-
 src/arena.c                                   | 36 ++++----
 src/ckh.c                                     |  6 +-
 src/huge.c                                    | 34 ++++----
 src/jemalloc.c                                | 68 +++++++++------
 test/integration/mallocx.c                    | 30 +++++++
 test/integration/rallocx.c                    | 84 ++++++++++++++++++-
 test/unit/size_classes.c                      | 25 +++++-
 14 files changed, 247 insertions(+), 89 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index d7b33582..bc5dbd1d 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -310,16 +310,14 @@
       <para>The <function>mallocx<parameter/></function> function allocates at
       least <parameter>size</parameter> bytes of memory, and returns a pointer
       to the base address of the allocation.  Behavior is undefined if
-      <parameter>size</parameter> is <constant>0</constant>, or if request size
-      overflows due to size class and/or alignment constraints.</para>
+      <parameter>size</parameter> is <constant>0</constant>.</para>
 
       <para>The <function>rallocx<parameter/></function> function resizes the
       allocation at <parameter>ptr</parameter> to be at least
       <parameter>size</parameter> bytes, and returns a pointer to the base
       address of the resulting allocation, which may or may not have moved from
       its original location.  Behavior is undefined if
-      <parameter>size</parameter> is <constant>0</constant>, or if request size
-      overflows due to size class and/or alignment constraints.</para>
+      <parameter>size</parameter> is <constant>0</constant>.</para>
 
       <para>The <function>xallocx<parameter/></function> function resizes the
       allocation at <parameter>ptr</parameter> in place to be at least
@@ -354,10 +352,10 @@
       memory, but it performs the same size computation as the
       <function>mallocx<parameter/></function> function, and returns the real
       size of the allocation that would result from the equivalent
-      <function>mallocx<parameter/></function> function call.  Behavior is
-      undefined if <parameter>size</parameter> is <constant>0</constant>, or if
-      request size overflows due to size class and/or alignment
-      constraints.</para>
+      <function>mallocx<parameter/></function> function call, or
+      <constant>0</constant> if the inputs exceed the maximum supported size
+      class and/or alignment.  Behavior is undefined if
+      <parameter>size</parameter> is <constant>0</constant>.</para>
 
       <para>The <function>mallctl<parameter/></function> function provides a
       general interface for introspecting the memory allocator, as well as
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 470eee65..891b9d79 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -536,8 +536,7 @@ extern arena_dalloc_junk_small_t *arena_dalloc_junk_small;
 void	arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info);
 #endif
 void	arena_quarantine_junk_small(void *ptr, size_t usize);
-void	*arena_malloc_large(tsd_t *tsd, arena_t *arena, size_t size,
-    szind_t ind, bool zero);
+void	*arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t ind, bool zero);
 void	*arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
     bool zero, tcache_t *tcache);
 void	*arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize,
diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h
index 68d3789f..cb6f69e6 100644
--- a/include/jemalloc/internal/huge.h
+++ b/include/jemalloc/internal/huge.h
@@ -9,9 +9,9 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-void	*huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero,
+void	*huge_malloc(tsd_t *tsd, arena_t *arena, size_t usize, bool zero,
     tcache_t *tcache);
-void	*huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment,
+void	*huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
     bool zero, tcache_t *tcache);
 bool	huge_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize,
     size_t usize_min, size_t usize_max, bool zero);
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 611ed36a..3f54391f 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -642,7 +642,7 @@ JEMALLOC_ALWAYS_INLINE size_t
 index2size(szind_t index)
 {
 
-	assert(index <= NSIZES);
+	assert(index < NSIZES);
 	return (index2size_lookup(index));
 }
 
@@ -745,17 +745,16 @@ sa2u(size_t size, size_t alignment)
 			return (usize);
 	}
 
-	/* Huge size class.  Beware of size_t overflow. */
+	/* Huge size class.  Beware of overflow. */
+
+	if (unlikely(alignment > HUGE_MAXCLASS))
+		return (0);
 
 	/*
 	 * We can't achieve subchunk alignment, so round up alignment to the
 	 * minimum that can actually be supported.
 	 */
 	alignment = CHUNK_CEILING(alignment);
-	if (alignment == 0) {
-		/* size_t overflow. */
-		return (0);
-	}
 
 	/* Make sure result is a huge size class. */
 	if (size <= chunksize)
@@ -1106,7 +1105,7 @@ iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
 	size_t usize, copysize;
 
 	usize = sa2u(size + extra, alignment);
-	if (usize == 0)
+	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
 		return (NULL);
 	p = ipalloct(tsd, usize, alignment, zero, tcache, arena);
 	if (p == NULL) {
@@ -1114,7 +1113,7 @@ iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
 			return (NULL);
 		/* Try again, without extra this time. */
 		usize = sa2u(size, alignment);
-		if (usize == 0)
+		if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
 			return (NULL);
 		p = ipalloct(tsd, usize, alignment, zero, tcache, arena);
 		if (p == NULL)
diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh
index fc82036d..2b0ca29a 100755
--- a/include/jemalloc/internal/size_classes.sh
+++ b/include/jemalloc/internal/size_classes.sh
@@ -142,10 +142,10 @@ size_classes() {
 
   # All remaining groups.
   lg_grp=$((${lg_grp} + ${lg_g}))
-  while [ ${lg_grp} -lt ${ptr_bits} ] ; do
+  while [ ${lg_grp} -lt $((${ptr_bits} - 1)) ] ; do
     sep_line
     ndelta=1
-    if [ ${lg_grp} -eq $((${ptr_bits} - 1)) ] ; then
+    if [ ${lg_grp} -eq $((${ptr_bits} - 2)) ] ; then
       ndelta_limit=$((${g} - 1))
     else
       ndelta_limit=${g}
diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h
index 25eaf142..8357820b 100644
--- a/include/jemalloc/internal/tcache.h
+++ b/include/jemalloc/internal/tcache.h
@@ -344,7 +344,6 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 	void *ret;
 	tcache_bin_t *tbin;
 	bool tcache_success;
-	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 
 	assert(binind < nhbins);
 	tbin = &tcache->tbins[binind];
@@ -359,14 +358,15 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 		if (unlikely(arena == NULL))
 			return (NULL);
 
-		usize = index2size(binind);
-		assert(usize <= tcache_maxclass);
-		ret = arena_malloc_large(tsd, arena, usize, binind, zero);
+		ret = arena_malloc_large(tsd, arena, binind, zero);
 		if (ret == NULL)
 			return (NULL);
 	} else {
+		size_t usize JEMALLOC_CC_SILENCE_INIT(0);
+
 		/* Only compute usize on demand */
-		if (config_prof || (slow_path && config_fill) || unlikely(zero)) {
+		if (config_prof || (slow_path && config_fill) ||
+		    unlikely(zero)) {
 			usize = index2size(binind);
 			assert(usize <= tcache_maxclass);
 		}
diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in
index d164edac..9f356f98 100644
--- a/include/jemalloc/jemalloc_macros.h.in
+++ b/include/jemalloc/jemalloc_macros.h.in
@@ -16,7 +16,8 @@
 #    define MALLOCX_ALIGN(a)	((int)(ffs(a)-1))
 #  else
 #    define MALLOCX_ALIGN(a)						\
-	 ((int)((a < (size_t)INT_MAX) ? ffs((int)a)-1 : ffs((int)(a>>32))+31))
+       ((int)(((a) < (size_t)INT_MAX) ? ffs((int)(a))-1 :		\
+       ffs((int)((a)>>32))+31))
 #  endif
 #  define MALLOCX_ZERO	((int)0x40)
 /*
diff --git a/src/arena.c b/src/arena.c
index 3f394681..1ceb59fd 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2364,16 +2364,16 @@ arena_quarantine_junk_small(void *ptr, size_t usize)
 }
 
 static void *
-arena_malloc_small(tsd_t *tsd, arena_t *arena, size_t size, szind_t binind,
-    bool zero)
+arena_malloc_small(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
 {
 	void *ret;
 	arena_bin_t *bin;
+	size_t usize;
 	arena_run_t *run;
 
 	assert(binind < NBINS);
 	bin = &arena->bins[binind];
-	size = index2size(binind);
+	usize = index2size(binind);
 
 	malloc_mutex_lock(&bin->lock);
 	if ((run = bin->runcur) != NULL && run->nfree > 0)
@@ -2392,7 +2392,7 @@ arena_malloc_small(tsd_t *tsd, arena_t *arena, size_t size, szind_t binind,
 		bin->stats.curregs++;
 	}
 	malloc_mutex_unlock(&bin->lock);
-	if (config_prof && !isthreaded && arena_prof_accum(arena, size))
+	if (config_prof && !isthreaded && arena_prof_accum(arena, usize))
 		prof_idump();
 
 	if (!zero) {
@@ -2401,16 +2401,16 @@ arena_malloc_small(tsd_t *tsd, arena_t *arena, size_t size, szind_t binind,
 				arena_alloc_junk_small(ret,
 				    &arena_bin_info[binind], false);
 			} else if (unlikely(opt_zero))
-				memset(ret, 0, size);
+				memset(ret, 0, usize);
 		}
-		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
+		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, usize);
 	} else {
 		if (config_fill && unlikely(opt_junk_alloc)) {
 			arena_alloc_junk_small(ret, &arena_bin_info[binind],
 			    true);
 		}
-		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
-		memset(ret, 0, size);
+		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, usize);
+		memset(ret, 0, usize);
 	}
 
 	arena_decay_tick(tsd, arena);
@@ -2418,8 +2418,7 @@ arena_malloc_small(tsd_t *tsd, arena_t *arena, size_t size, szind_t binind,
 }
 
 void *
-arena_malloc_large(tsd_t *tsd, arena_t *arena, size_t size, szind_t binind,
-    bool zero)
+arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
 {
 	void *ret;
 	size_t usize;
@@ -2490,10 +2489,10 @@ arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
 		return (NULL);
 
 	if (likely(size <= SMALL_MAXCLASS))
-		return (arena_malloc_small(tsd, arena, size, ind, zero));
+		return (arena_malloc_small(tsd, arena, ind, zero));
 	if (likely(size <= large_maxclass))
-		return (arena_malloc_large(tsd, arena, size, ind, zero));
-	return (huge_malloc(tsd, arena, size, zero, tcache));
+		return (arena_malloc_large(tsd, arena, ind, zero));
+	return (huge_malloc(tsd, arena, index2size(ind), zero, tcache));
 }
 
 /* Only handles large allocations that require more than page alignment. */
@@ -3047,6 +3046,13 @@ arena_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
 {
 	size_t usize_min, usize_max;
 
+	/* Calls with non-zero extra had to clamp extra. */
+	assert(extra == 0 || size + extra <= HUGE_MAXCLASS);
+
+	/* Prevent exceeding PTRDIFF_MAX. */
+	if (unlikely(size > HUGE_MAXCLASS))
+		return (true);
+
 	usize_min = s2u(size);
 	usize_max = s2u(size + extra);
 	if (likely(oldsize <= large_maxclass && usize_min <= large_maxclass)) {
@@ -3089,7 +3095,7 @@ arena_ralloc_move_helper(tsd_t *tsd, arena_t *arena, size_t usize,
 		return (arena_malloc(tsd, arena, usize, size2index(usize), zero,
 		    tcache, true));
 	usize = sa2u(usize, alignment);
-	if (usize == 0)
+	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
 		return (NULL);
 	return (ipalloct(tsd, usize, alignment, zero, tcache, arena));
 }
@@ -3102,7 +3108,7 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size,
 	size_t usize;
 
 	usize = s2u(size);
-	if (usize == 0)
+	if (unlikely(usize == 0 || size > HUGE_MAXCLASS))
 		return (NULL);
 
 	if (likely(usize <= large_maxclass)) {
diff --git a/src/ckh.c b/src/ckh.c
index d1cfd234..3b423aa2 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -266,7 +266,7 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh)
 
 		lg_curcells++;
 		usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
-		if (usize == 0) {
+		if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) {
 			ret = true;
 			goto label_return;
 		}
@@ -312,7 +312,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh)
 	lg_prevbuckets = ckh->lg_curbuckets;
 	lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1;
 	usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
-	if (usize == 0)
+	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
 		return;
 	tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, true,
 	    NULL);
@@ -387,7 +387,7 @@ ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
 	ckh->keycomp = keycomp;
 
 	usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE);
-	if (usize == 0) {
+	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) {
 		ret = true;
 		goto label_return;
 	}
diff --git a/src/huge.c b/src/huge.c
index 9f880484..5f7ceaf1 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -31,35 +31,30 @@ huge_node_unset(const void *ptr, const extent_node_t *node)
 }
 
 void *
-huge_malloc(tsd_t *tsd, arena_t *arena, size_t size, bool zero,
+huge_malloc(tsd_t *tsd, arena_t *arena, size_t usize, bool zero,
     tcache_t *tcache)
 {
-	size_t usize;
 
-	usize = s2u(size);
-	if (usize == 0) {
-		/* size_t overflow. */
-		return (NULL);
-	}
+	assert(usize == s2u(usize));
 
 	return (huge_palloc(tsd, arena, usize, chunksize, zero, tcache));
 }
 
 void *
-huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment,
+huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
     bool zero, tcache_t *tcache)
 {
 	void *ret;
-	size_t usize;
+	size_t ausize;
 	extent_node_t *node;
 	bool is_zeroed;
 
 	/* Allocate one or more contiguous chunks for this request. */
 
-	usize = sa2u(size, alignment);
-	if (unlikely(usize == 0))
+	ausize = sa2u(usize, alignment);
+	if (unlikely(ausize == 0 || ausize > HUGE_MAXCLASS))
 		return (NULL);
-	assert(usize >= chunksize);
+	assert(ausize >= chunksize);
 
 	/* Allocate an extent node with which to track the chunk. */
 	node = ipallocztm(tsd, CACHELINE_CEILING(sizeof(extent_node_t)),
@@ -74,15 +69,15 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment,
 	is_zeroed = zero;
 	arena = arena_choose(tsd, arena);
 	if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(arena,
-	    size, alignment, &is_zeroed)) == NULL) {
+	    usize, alignment, &is_zeroed)) == NULL) {
 		idalloctm(tsd, node, tcache, true, true);
 		return (NULL);
 	}
 
-	extent_node_init(node, arena, ret, size, is_zeroed, true);
+	extent_node_init(node, arena, ret, usize, is_zeroed, true);
 
 	if (huge_node_set(ret, node)) {
-		arena_chunk_dalloc_huge(arena, ret, size);
+		arena_chunk_dalloc_huge(arena, ret, usize);
 		idalloctm(tsd, node, tcache, true, true);
 		return (NULL);
 	}
@@ -95,9 +90,9 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t size, size_t alignment,
 
 	if (zero || (config_fill && unlikely(opt_zero))) {
 		if (!is_zeroed)
-			memset(ret, 0, size);
+			memset(ret, 0, usize);
 	} else if (config_fill && unlikely(opt_junk_alloc))
-		memset(ret, 0xa5, size);
+		memset(ret, 0xa5, usize);
 
 	arena_decay_tick(tsd, arena);
 	return (ret);
@@ -286,6 +281,8 @@ huge_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize_min,
 {
 
 	assert(s2u(oldsize) == oldsize);
+	/* The following should have been caught by callers. */
+	assert(usize_min > 0 && usize_max <= HUGE_MAXCLASS);
 
 	/* Both allocations must be huge to avoid a move. */
 	if (oldsize < chunksize || usize_max < chunksize)
@@ -346,6 +343,9 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize,
 	void *ret;
 	size_t copysize;
 
+	/* The following should have been caught by callers. */
+	assert(usize > 0 && usize <= HUGE_MAXCLASS);
+
 	/* Try to avoid moving the allocation. */
 	if (!huge_ralloc_no_move(tsd, ptr, oldsize, usize, usize, zero))
 		return (ptr);
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 86032a40..d9197e00 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1449,18 +1449,17 @@ imalloc_body(size_t size, tsd_t **tsd, size_t *usize, bool slow_path)
 		return (NULL);
 	*tsd = tsd_fetch();
 	ind = size2index(size);
+	if (unlikely(ind >= NSIZES))
+		return (NULL);
 
-	if (config_stats ||
-	    (config_prof && opt_prof) ||
-	    (slow_path && config_valgrind && unlikely(in_valgrind))) {
+	if (config_stats || (config_prof && opt_prof) || (slow_path &&
+	    config_valgrind && unlikely(in_valgrind))) {
 		*usize = index2size(ind);
+		assert(*usize > 0 && *usize <= HUGE_MAXCLASS);
 	}
 
-	if (config_prof && opt_prof) {
-		if (unlikely(*usize == 0))
-			return (NULL);
+	if (config_prof && opt_prof)
 		return (imalloc_prof(*tsd, *usize, ind, slow_path));
-	}
 
 	return (imalloc(*tsd, size, ind, slow_path));
 }
@@ -1584,7 +1583,7 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment)
 	}
 
 	usize = sa2u(size, alignment);
-	if (unlikely(usize == 0)) {
+	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) {
 		result = NULL;
 		goto label_oom;
 	}
@@ -1722,12 +1721,12 @@ je_calloc(size_t num, size_t size)
 	}
 
 	ind = size2index(num_size);
+	if (unlikely(ind >= NSIZES)) {
+		ret = NULL;
+		goto label_return;
+	}
 	if (config_prof && opt_prof) {
 		usize = index2size(ind);
-		if (unlikely(usize == 0)) {
-			ret = NULL;
-			goto label_return;
-		}
 		ret = icalloc_prof(tsd, usize, ind);
 	} else {
 		if (config_stats || (config_valgrind && unlikely(in_valgrind)))
@@ -1874,8 +1873,8 @@ je_realloc(void *ptr, size_t size)
 
 		if (config_prof && opt_prof) {
 			usize = s2u(size);
-			ret = unlikely(usize == 0) ? NULL : irealloc_prof(tsd,
-			    ptr, old_usize, usize);
+			ret = unlikely(usize == 0 || usize > HUGE_MAXCLASS) ?
+			    NULL : irealloc_prof(tsd, ptr, old_usize, usize);
 		} else {
 			if (config_stats || (config_valgrind &&
 			    unlikely(in_valgrind)))
@@ -2006,7 +2005,8 @@ imallocx_flags_decode_hard(tsd_t *tsd, size_t size, int flags, size_t *usize,
 		*alignment = MALLOCX_ALIGN_GET_SPECIFIED(flags);
 		*usize = sa2u(size, *alignment);
 	}
-	assert(*usize != 0);
+	if (unlikely(*usize == 0 || *usize > HUGE_MAXCLASS))
+		return (true);
 	*zero = MALLOCX_ZERO_GET(flags);
 	if ((flags & MALLOCX_TCACHE_MASK) != 0) {
 		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE)
@@ -2032,7 +2032,6 @@ imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize,
 
 	if (likely(flags == 0)) {
 		*usize = s2u(size);
-		assert(*usize != 0);
 		*alignment = 0;
 		*zero = false;
 		*tcache = tcache_get(tsd, true);
@@ -2051,6 +2050,8 @@ imallocx_flags(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
 	szind_t ind;
 
 	ind = size2index(usize);
+	if (unlikely(ind >= NSIZES))
+		return (NULL);
 	if (unlikely(alignment != 0))
 		return (ipalloct(tsd, usize, alignment, zero, tcache, arena));
 	if (unlikely(zero))
@@ -2120,8 +2121,13 @@ imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize)
 
 	if (likely(flags == 0)) {
 		szind_t ind = size2index(size);
-		if (config_stats || (config_valgrind && unlikely(in_valgrind)))
+		if (unlikely(ind >= NSIZES))
+			return (NULL);
+		if (config_stats || (config_valgrind &&
+		    unlikely(in_valgrind))) {
 			*usize = index2size(ind);
+			assert(*usize > 0 && *usize <= HUGE_MAXCLASS);
+		}
 		return (imalloc(tsd, size, ind, true));
 	}
 
@@ -2278,7 +2284,8 @@ je_rallocx(void *ptr, size_t size, int flags)
 
 	if (config_prof && opt_prof) {
 		usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment);
-		assert(usize != 0);
+		if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
+			goto label_oom;
 		p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize,
 		    zero, tcache, arena);
 		if (unlikely(p == NULL))
@@ -2392,14 +2399,23 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
 
 	old_usize = isalloc(ptr, config_prof);
 
-	/* Clamp extra if necessary to avoid (size + extra) overflow. */
-	if (unlikely(size + extra > HUGE_MAXCLASS)) {
-		/* Check for size overflow. */
+	if (unlikely(extra > 0)) {
+		/*
+		 * The API explicitly absolves itself of protecting against
+		 * (size + extra) numerical overflow, but we may need to clamp
+		 * extra to avoid exceeding HUGE_MAXCLASS.
+		 *
+		 * Ordinarily, size limit checking is handled deeper down, but
+		 * here we have to check as part of (size + extra) clamping,
+		 * since we need the clamped value in the above helper
+		 * functions.
+		 */
 		if (unlikely(size > HUGE_MAXCLASS)) {
 			usize = old_usize;
 			goto label_not_resized;
 		}
-		extra = HUGE_MAXCLASS - size;
+		if (unlikely(HUGE_MAXCLASS - size < extra))
+			extra = HUGE_MAXCLASS - size;
 	}
 
 	if (config_valgrind && unlikely(in_valgrind))
@@ -2474,7 +2490,6 @@ inallocx(size_t size, int flags)
 		usize = s2u(size);
 	else
 		usize = sa2u(size, MALLOCX_ALIGN_GET_SPECIFIED(flags));
-	assert(usize != 0);
 	return (usize);
 }
 
@@ -2507,13 +2522,18 @@ JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
 JEMALLOC_ATTR(pure)
 je_nallocx(size_t size, int flags)
 {
+	size_t usize;
 
 	assert(size != 0);
 
 	if (unlikely(malloc_init()))
 		return (0);
 
-	return (inallocx(size, flags));
+	usize = inallocx(size, flags);
+	if (unlikely(usize > HUGE_MAXCLASS))
+		return (0);
+
+	return (usize);
 }
 
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index 6253175d..35c559a4 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -46,6 +46,35 @@ get_huge_size(size_t ind)
 	return (get_size_impl("arenas.hchunk.0.size", ind));
 }
 
+TEST_BEGIN(test_overflow)
+{
+	size_t hugemax, size;
+
+	hugemax = get_huge_size(get_nhuge()-1);
+
+	assert_ptr_null(mallocx(hugemax+1, 0),
+	    "Expected OOM for mallocx(size=%#zx, 0)", hugemax+1);
+
+	assert_ptr_null(mallocx(PTRDIFF_MAX+1, 0),
+	    "Expected OOM for mallocx(size=%#zx, 0)", ZU(PTRDIFF_MAX+1));
+
+	assert_ptr_null(mallocx(SIZE_T_MAX, 0),
+	    "Expected OOM for mallocx(size=%#zx, 0)", SIZE_T_MAX);
+
+#if LG_SIZEOF_PTR == 3
+	size      = ZU(0x600000000000000);
+#else
+	size      = ZU(0x6000000);
+#endif
+	assert_ptr_null(mallocx(size, 0),
+	    "Expected OOM for mallocx(size=%#zx, 0", size);
+
+	assert_ptr_null(mallocx(1, MALLOCX_ALIGN(PTRDIFF_MAX+1)),
+	    "Expected OOM for mallocx(size=1, MALLOCX_ALIGN(%#zx))",
+	    ZU(PTRDIFF_MAX+1));
+}
+TEST_END
+
 TEST_BEGIN(test_oom)
 {
 	size_t hugemax, size, alignment;
@@ -176,6 +205,7 @@ main(void)
 {
 
 	return (test(
+	    test_overflow,
 	    test_oom,
 	    test_basic,
 	    test_alignment_and_size));
diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c
index 022e0bf0..3b7d21cf 100644
--- a/test/integration/rallocx.c
+++ b/test/integration/rallocx.c
@@ -1,5 +1,51 @@
 #include "test/jemalloc_test.h"
 
+static unsigned
+get_nsizes_impl(const char *cmd)
+{
+	unsigned ret;
+	size_t z;
+
+	z = sizeof(unsigned);
+	assert_d_eq(mallctl(cmd, &ret, &z, NULL, 0), 0,
+	    "Unexpected mallctl(\"%s\", ...) failure", cmd);
+
+	return (ret);
+}
+
+static unsigned
+get_nhuge(void)
+{
+
+	return (get_nsizes_impl("arenas.nhchunks"));
+}
+
+static size_t
+get_size_impl(const char *cmd, size_t ind)
+{
+	size_t ret;
+	size_t z;
+	size_t mib[4];
+	size_t miblen = 4;
+
+	z = sizeof(size_t);
+	assert_d_eq(mallctlnametomib(cmd, mib, &miblen),
+	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
+	mib[2] = ind;
+	z = sizeof(size_t);
+	assert_d_eq(mallctlbymib(mib, miblen, &ret, &z, NULL, 0),
+	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
+
+	return (ret);
+}
+
+static size_t
+get_huge_size(size_t ind)
+{
+
+	return (get_size_impl("arenas.hchunk.0.size", ind));
+}
+
 TEST_BEGIN(test_grow_and_shrink)
 {
 	void *p, *q;
@@ -173,6 +219,41 @@ TEST_BEGIN(test_lg_align_and_zero)
 }
 TEST_END
 
+TEST_BEGIN(test_overflow)
+{
+	size_t hugemax, size;
+	void *p;
+
+	hugemax = get_huge_size(get_nhuge()-1);
+
+	p = mallocx(1, 0);
+	assert_ptr_not_null(p, "Unexpected mallocx() failure");
+
+	assert_ptr_null(rallocx(p, hugemax+1, 0),
+	    "Expected OOM for rallocx(p, size=%#zx, 0)", hugemax+1);
+
+	assert_ptr_null(rallocx(p, PTRDIFF_MAX+1, 0),
+	    "Expected OOM for rallocx(p, size=%#zx, 0)", ZU(PTRDIFF_MAX+1));
+
+	assert_ptr_null(rallocx(p, SIZE_T_MAX, 0),
+	    "Expected OOM for rallocx(p, size=%#zx, 0)", SIZE_T_MAX);
+
+#if LG_SIZEOF_PTR == 3
+	size      = ZU(0x600000000000000);
+#else
+	size      = ZU(0x6000000);
+#endif
+	assert_ptr_null(rallocx(p, size, 0),
+	    "Expected OOM for rallocx(p, size=%#zx, 0", size);
+
+	assert_ptr_null(rallocx(p, 1, MALLOCX_ALIGN(PTRDIFF_MAX+1)),
+	    "Expected OOM for rallocx(p, size=1, MALLOCX_ALIGN(%#zx))",
+	    ZU(PTRDIFF_MAX+1));
+
+	dallocx(p, 0);
+}
+TEST_END
+
 int
 main(void)
 {
@@ -181,5 +262,6 @@ main(void)
 	    test_grow_and_shrink,
 	    test_zero,
 	    test_align,
-	    test_lg_align_and_zero));
+	    test_lg_align_and_zero,
+	    test_overflow));
 }
diff --git a/test/unit/size_classes.c b/test/unit/size_classes.c
index d3aaebd7..3a2126fc 100644
--- a/test/unit/size_classes.c
+++ b/test/unit/size_classes.c
@@ -80,10 +80,33 @@ TEST_BEGIN(test_size_classes)
 }
 TEST_END
 
+TEST_BEGIN(test_overflow)
+{
+	size_t max_size_class;
+
+	max_size_class = get_max_size_class();
+
+	assert_u_ge(size2index(max_size_class+1), NSIZES,
+	    "size2index() should return >= NSIZES on overflow");
+	assert_u_ge(size2index(PTRDIFF_MAX+1), NSIZES,
+	    "size2index() should return >= NSIZES on overflow");
+	assert_u_ge(size2index(SIZE_T_MAX), NSIZES,
+	    "size2index() should return >= NSIZES on overflow");
+
+	assert_zu_gt(s2u(max_size_class+1), HUGE_MAXCLASS,
+	    "s2u() should return > HUGE_MAXCLASS for unsupported size");
+	assert_zu_gt(s2u(PTRDIFF_MAX+1), HUGE_MAXCLASS,
+	    "s2u() should return > HUGE_MAXCLASS for unsupported size");
+	assert_zu_eq(s2u(SIZE_T_MAX), 0,
+	    "s2u() should return 0 on overflow");
+}
+TEST_END
+
 int
 main(void)
 {
 
 	return (test(
-	    test_size_classes));
+	    test_size_classes,
+	    test_overflow));
 }

From e3195fa4a54344cf707d30e510e91ed43f5a8b84 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 25 Feb 2016 16:40:24 -0800
Subject: [PATCH 0129/2608] Cast PTRDIFF_MAX to size_t before adding 1.

This fixes compilation warnings regarding integer overflow that were
introduced by 0c516a00c4cb28cff55ce0995f756b5aae074c9e (Make *allocx()
size class overflow behavior defined.).
---
 test/integration/mallocx.c | 8 ++++----
 test/integration/rallocx.c | 8 ++++----
 test/unit/size_classes.c   | 4 ++--
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index 35c559a4..6ecd636b 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -55,8 +55,8 @@ TEST_BEGIN(test_overflow)
 	assert_ptr_null(mallocx(hugemax+1, 0),
 	    "Expected OOM for mallocx(size=%#zx, 0)", hugemax+1);
 
-	assert_ptr_null(mallocx(PTRDIFF_MAX+1, 0),
-	    "Expected OOM for mallocx(size=%#zx, 0)", ZU(PTRDIFF_MAX+1));
+	assert_ptr_null(mallocx(ZU(PTRDIFF_MAX)+1, 0),
+	    "Expected OOM for mallocx(size=%#zx, 0)", ZU(PTRDIFF_MAX)+1);
 
 	assert_ptr_null(mallocx(SIZE_T_MAX, 0),
 	    "Expected OOM for mallocx(size=%#zx, 0)", SIZE_T_MAX);
@@ -69,9 +69,9 @@ TEST_BEGIN(test_overflow)
 	assert_ptr_null(mallocx(size, 0),
 	    "Expected OOM for mallocx(size=%#zx, 0", size);
 
-	assert_ptr_null(mallocx(1, MALLOCX_ALIGN(PTRDIFF_MAX+1)),
+	assert_ptr_null(mallocx(1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)),
 	    "Expected OOM for mallocx(size=1, MALLOCX_ALIGN(%#zx))",
-	    ZU(PTRDIFF_MAX+1));
+	    ZU(PTRDIFF_MAX)+1);
 }
 TEST_END
 
diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c
index 3b7d21cf..c3c22419 100644
--- a/test/integration/rallocx.c
+++ b/test/integration/rallocx.c
@@ -232,8 +232,8 @@ TEST_BEGIN(test_overflow)
 	assert_ptr_null(rallocx(p, hugemax+1, 0),
 	    "Expected OOM for rallocx(p, size=%#zx, 0)", hugemax+1);
 
-	assert_ptr_null(rallocx(p, PTRDIFF_MAX+1, 0),
-	    "Expected OOM for rallocx(p, size=%#zx, 0)", ZU(PTRDIFF_MAX+1));
+	assert_ptr_null(rallocx(p, ZU(PTRDIFF_MAX)+1, 0),
+	    "Expected OOM for rallocx(p, size=%#zx, 0)", ZU(PTRDIFF_MAX)+1);
 
 	assert_ptr_null(rallocx(p, SIZE_T_MAX, 0),
 	    "Expected OOM for rallocx(p, size=%#zx, 0)", SIZE_T_MAX);
@@ -246,9 +246,9 @@ TEST_BEGIN(test_overflow)
 	assert_ptr_null(rallocx(p, size, 0),
 	    "Expected OOM for rallocx(p, size=%#zx, 0", size);
 
-	assert_ptr_null(rallocx(p, 1, MALLOCX_ALIGN(PTRDIFF_MAX+1)),
+	assert_ptr_null(rallocx(p, 1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)),
 	    "Expected OOM for rallocx(p, size=1, MALLOCX_ALIGN(%#zx))",
-	    ZU(PTRDIFF_MAX+1));
+	    ZU(PTRDIFF_MAX)+1);
 
 	dallocx(p, 0);
 }
diff --git a/test/unit/size_classes.c b/test/unit/size_classes.c
index 3a2126fc..2e2caaf5 100644
--- a/test/unit/size_classes.c
+++ b/test/unit/size_classes.c
@@ -88,14 +88,14 @@ TEST_BEGIN(test_overflow)
 
 	assert_u_ge(size2index(max_size_class+1), NSIZES,
 	    "size2index() should return >= NSIZES on overflow");
-	assert_u_ge(size2index(PTRDIFF_MAX+1), NSIZES,
+	assert_u_ge(size2index(ZU(PTRDIFF_MAX)+1), NSIZES,
 	    "size2index() should return >= NSIZES on overflow");
 	assert_u_ge(size2index(SIZE_T_MAX), NSIZES,
 	    "size2index() should return >= NSIZES on overflow");
 
 	assert_zu_gt(s2u(max_size_class+1), HUGE_MAXCLASS,
 	    "s2u() should return > HUGE_MAXCLASS for unsupported size");
-	assert_zu_gt(s2u(PTRDIFF_MAX+1), HUGE_MAXCLASS,
+	assert_zu_gt(s2u(ZU(PTRDIFF_MAX)+1), HUGE_MAXCLASS,
 	    "s2u() should return > HUGE_MAXCLASS for unsupported size");
 	assert_zu_eq(s2u(SIZE_T_MAX), 0,
 	    "s2u() should return 0 on overflow");

From 9d2c10f2e83e9a357630bebd220e02a2ef95d6fa Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 25 Feb 2016 16:42:15 -0800
Subject: [PATCH 0130/2608] Add more HUGE_MAXCLASS overflow checks.

Add HUGE_MAXCLASS overflow checks that are specific to heap profiling
code paths.  This fixes test failures that were introduced by
0c516a00c4cb28cff55ce0995f756b5aae074c9e (Make *allocx() size class
overflow behavior defined.).
---
 src/jemalloc.c | 57 ++++++++++++++++++++++++++++++--------------------
 1 file changed, 34 insertions(+), 23 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index d9197e00..c8841783 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2032,6 +2032,8 @@ imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize,
 
 	if (likely(flags == 0)) {
 		*usize = s2u(size);
+		if (unlikely(*usize == 0 || *usize > HUGE_MAXCLASS))
+			return (true);
 		*alignment = 0;
 		*zero = false;
 		*tcache = tcache_get(tsd, true);
@@ -2049,11 +2051,10 @@ imallocx_flags(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
 {
 	szind_t ind;
 
-	ind = size2index(usize);
-	if (unlikely(ind >= NSIZES))
-		return (NULL);
 	if (unlikely(alignment != 0))
 		return (ipalloct(tsd, usize, alignment, zero, tcache, arena));
+	ind = size2index(usize);
+	assert(ind < NSIZES);
 	if (unlikely(zero))
 		return (icalloct(tsd, usize, ind, tcache, arena));
 	return (imalloct(tsd, usize, ind, tcache, arena));
@@ -2360,10 +2361,23 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 	 * prof_alloc_prep() to decide whether to capture a backtrace.
 	 * prof_realloc() will use the actual usize to decide whether to sample.
 	 */
-	usize_max = (alignment == 0) ? s2u(size+extra) : sa2u(size+extra,
-	    alignment);
-	assert(usize_max != 0);
+	if (alignment == 0) {
+		usize_max = s2u(size+extra);
+		assert(usize_max > 0 && usize_max <= HUGE_MAXCLASS);
+	} else {
+		usize_max = sa2u(size+extra, alignment);
+		if (unlikely(usize_max == 0 || usize_max > HUGE_MAXCLASS)) {
+			/*
+			 * usize_max is out of range, and chances are that
+			 * allocation will fail, but use the maximum possible
+			 * value and carry on with prof_alloc_prep(), just in
+			 * case allocation succeeds.
+			 */
+			usize_max = HUGE_MAXCLASS;
+		}
+	}
 	tctx = prof_alloc_prep(tsd, usize_max, prof_active, false);
+
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
 		usize = ixallocx_prof_sample(tsd, ptr, old_usize, size, extra,
 		    alignment, zero, tctx);
@@ -2399,24 +2413,21 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
 
 	old_usize = isalloc(ptr, config_prof);
 
-	if (unlikely(extra > 0)) {
-		/*
-		 * The API explicitly absolves itself of protecting against
-		 * (size + extra) numerical overflow, but we may need to clamp
-		 * extra to avoid exceeding HUGE_MAXCLASS.
-		 *
-		 * Ordinarily, size limit checking is handled deeper down, but
-		 * here we have to check as part of (size + extra) clamping,
-		 * since we need the clamped value in the above helper
-		 * functions.
-		 */
-		if (unlikely(size > HUGE_MAXCLASS)) {
-			usize = old_usize;
-			goto label_not_resized;
-		}
-		if (unlikely(HUGE_MAXCLASS - size < extra))
-			extra = HUGE_MAXCLASS - size;
+	/*
+	 * The API explicitly absolves itself of protecting against (size +
+	 * extra) numerical overflow, but we may need to clamp extra to avoid
+	 * exceeding HUGE_MAXCLASS.
+	 *
+	 * Ordinarily, size limit checking is handled deeper down, but here we
+	 * have to check as part of (size + extra) clamping, since we need the
+	 * clamped value in the above helper functions.
+	 */
+	if (unlikely(size > HUGE_MAXCLASS)) {
+		usize = old_usize;
+		goto label_not_resized;
 	}
+	if (unlikely(HUGE_MAXCLASS - size < extra))
+		extra = HUGE_MAXCLASS - size;
 
 	if (config_valgrind && unlikely(in_valgrind))
 		old_rzsize = u2rz(old_usize);

From 8282a2ad979a9e72ffb645321c8a0b58a09eb9d8 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 25 Feb 2016 16:44:48 -0800
Subject: [PATCH 0131/2608] Remove a superfluous comment.

---
 src/arena.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/arena.c b/src/arena.c
index 1ceb59fd..ec4315a7 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -3049,7 +3049,6 @@ arena_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
 	/* Calls with non-zero extra had to clamp extra. */
 	assert(extra == 0 || size + extra <= HUGE_MAXCLASS);
 
-	/* Prevent exceeding PTRDIFF_MAX. */
 	if (unlikely(size > HUGE_MAXCLASS))
 		return (true);
 

From 42ce80e15a5aa2ab6f2ec7e5f7c18164803f3076 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 25 Feb 2016 20:51:00 -0800
Subject: [PATCH 0132/2608] Silence miscellaneous 64-to-32-bit data loss
 warnings.

This resolves #341.
---
 include/jemalloc/internal/arena.h | 19 +++++++++----------
 src/arena.c                       |  4 ++--
 src/prof.c                        |  2 +-
 src/util.c                        |  4 ++--
 4 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 891b9d79..c7c18748 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -634,7 +634,7 @@ bool	arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes);
 bool	arena_prof_accum(arena_t *arena, uint64_t accumbytes);
 szind_t	arena_ptr_small_binind_get(const void *ptr, size_t mapbits);
 szind_t	arena_bin_index(arena_t *arena, arena_bin_t *bin);
-unsigned	arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info,
+size_t	arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info,
     const void *ptr);
 prof_tctx_t	*arena_prof_tctx_get(const void *ptr);
 void	arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx);
@@ -1076,11 +1076,10 @@ arena_bin_index(arena_t *arena, arena_bin_t *bin)
 	return (binind);
 }
 
-JEMALLOC_INLINE unsigned
+JEMALLOC_INLINE size_t
 arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr)
 {
-	unsigned shift, diff, regind;
-	size_t interval;
+	size_t diff, interval, shift, regind;
 	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
 	void *rpages = arena_miscelm_to_rpages(miscelm);
 
@@ -1095,7 +1094,7 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr)
 	 * Avoid doing division with a variable divisor if possible.  Using
 	 * actual division here can reduce allocator throughput by over 20%!
 	 */
-	diff = (unsigned)((uintptr_t)ptr - (uintptr_t)rpages -
+	diff = (size_t)((uintptr_t)ptr - (uintptr_t)rpages -
 	    bin_info->reg0_offset);
 
 	/* Rescale (factor powers of 2 out of the numerator and denominator). */
@@ -1122,9 +1121,9 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr)
 		 * divide by 0, and 1 and 2 are both powers of two, which are
 		 * handled above.
 		 */
-#define	SIZE_INV_SHIFT	((sizeof(unsigned) << 3) - LG_RUN_MAXREGS)
-#define	SIZE_INV(s)	(((1U << SIZE_INV_SHIFT) / (s)) + 1)
-		static const unsigned interval_invs[] = {
+#define	SIZE_INV_SHIFT	((sizeof(size_t) << 3) - LG_RUN_MAXREGS)
+#define	SIZE_INV(s)	(((ZU(1) << SIZE_INV_SHIFT) / (s)) + 1)
+		static const size_t interval_invs[] = {
 		    SIZE_INV(3),
 		    SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
 		    SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
@@ -1135,8 +1134,8 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr)
 		    SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
 		};
 
-		if (likely(interval <= ((sizeof(interval_invs) /
-		    sizeof(unsigned)) + 2))) {
+		if (likely(interval <= ((sizeof(interval_invs) / sizeof(size_t))
+		    + 2))) {
 			regind = (diff * interval_invs[interval - 3]) >>
 			    SIZE_INV_SHIFT;
 		} else
diff --git a/src/arena.c b/src/arena.c
index ec4315a7..5fcecbaf 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -301,7 +301,7 @@ JEMALLOC_INLINE_C void *
 arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info)
 {
 	void *ret;
-	unsigned regind;
+	size_t regind;
 	arena_chunk_map_misc_t *miscelm;
 	void *rpages;
 
@@ -325,7 +325,7 @@ arena_run_reg_dalloc(arena_run_t *run, void *ptr)
 	size_t mapbits = arena_mapbits_get(chunk, pageind);
 	szind_t binind = arena_ptr_small_binind_get(ptr, mapbits);
 	arena_bin_info_t *bin_info = &arena_bin_info[binind];
-	unsigned regind = arena_run_regind(run, bin_info, ptr);
+	size_t regind = arena_run_regind(run, bin_info, ptr);
 
 	assert(run->nfree < bin_info->nregs);
 	/* Freeing an interior pointer can cause assertion failure. */
diff --git a/src/prof.c b/src/prof.c
index 93421abb..b3872277 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -109,7 +109,7 @@ static char		prof_dump_buf[
     1
 #endif
 ];
-static unsigned		prof_dump_buf_end;
+static size_t		prof_dump_buf_end;
 static int		prof_dump_fd;
 
 /* Do not dump any profiles until bootstrapping is complete. */
diff --git a/src/util.c b/src/util.c
index 9aaa8062..02673c70 100644
--- a/src/util.c
+++ b/src/util.c
@@ -60,7 +60,7 @@ wrtmessage(void *cbopaque, const char *s)
 	 */
 	UNUSED long result = syscall(SYS_write, STDERR_FILENO, s, strlen(s));
 #else
-	UNUSED int result = write(STDERR_FILENO, s, strlen(s));
+	UNUSED ssize_t result = write(STDERR_FILENO, s, strlen(s));
 #endif
 }
 
@@ -90,7 +90,7 @@ buferror(int err, char *buf, size_t buflen)
 
 #ifdef _WIN32
 	FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, err, 0,
-	    (LPSTR)buf, buflen, NULL);
+	    (LPSTR)buf, (DWORD)buflen, NULL);
 	return (0);
 #elif defined(__GLIBC__) && defined(_GNU_SOURCE)
 	char *b = strerror_r(err, buf, buflen);

From ebd00e95b863c790d085c906a9aeddd22bd19d69 Mon Sep 17 00:00:00 2001
From: rustyx <me@rustyx.org>
Date: Fri, 26 Feb 2016 17:18:48 +0100
Subject: [PATCH 0133/2608] Fix MSVC project

---
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj         | 1 +
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 395837c3..d8ad505b 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -101,6 +101,7 @@
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
     <ClCompile Include="..\..\..\..\src\mb.c" />
     <ClCompile Include="..\..\..\..\src\mutex.c" />
+    <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\quarantine.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 69f64169..89a51f76 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -238,5 +238,8 @@
     <ClCompile Include="..\..\..\..\src\util.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\nstime.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
 </Project>
\ No newline at end of file

From 4c4ee292e411b0b2381e7b5e8f7c34d480cda99a Mon Sep 17 00:00:00 2001
From: rustyx <me@rustyx.org>
Date: Fri, 26 Feb 2016 17:18:58 +0100
Subject: [PATCH 0134/2608] Improve test_threads performance

---
 msvc/projects/vc2015/test_threads/test_threads.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/msvc/projects/vc2015/test_threads/test_threads.cpp b/msvc/projects/vc2015/test_threads/test_threads.cpp
index 046843f3..603bdce7 100644
--- a/msvc/projects/vc2015/test_threads/test_threads.cpp
+++ b/msvc/projects/vc2015/test_threads/test_threads.cpp
@@ -58,22 +58,22 @@ int test_threads()
 				thread t([&]() {
 					for (int i = 0; i < numIter2; ++i) {
 						const int numAllocs = numAllocsMax - sizeDist(rnd);
-						for (int j = 0; j < numAllocs; j++) {
+						for (int j = 0; j < numAllocs; j += 64) {
 							const int x = sizeDist(rnd);
 							const int sz = sizes[x];
 							ptrsz[j] = sz;
 							ptrs[j] = (uint8_t*)je_malloc(sz);
 							if (!ptrs[j]) {
-								printf("Unable to allocate %d bytes in thread %d, iter %d, alloc %d. %d", sz, tid, i, j, x);
+								printf("Unable to allocate %d bytes in thread %d, iter %d, alloc %d. %d\n", sz, tid, i, j, x);
 								exit(1);
 							}
 							for (int k = 0; k < sz; k++)
 								ptrs[j][k] = tid + k;
 						}
-						for (int j = 0; j < numAllocs; j++) {
+						for (int j = 0; j < numAllocs; j += 64) {
 							for (int k = 0, sz = ptrsz[j]; k < sz; k++)
 								if (ptrs[j][k] != (uint8_t)(tid + k)) {
-									printf("Memory error in thread %d, iter %d, alloc %d @ %d : %02X!=%02X", tid, i, j, k, ptrs[j][k], (uint8_t)(tid + k));
+									printf("Memory error in thread %d, iter %d, alloc %d @ %d : %02X!=%02X\n", tid, i, j, k, ptrs[j][k], (uint8_t)(tid + k));
 									exit(1);
 								}
 							je_free(ptrs[j]);

From 01ecdf32d657f9e19f84ba9785c9954734666a9c Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Fri, 26 Feb 2016 13:59:41 -0800
Subject: [PATCH 0135/2608] Miscellaneous bitmap refactoring.

---
 include/jemalloc/internal/bitmap.h            | 21 ++++++------
 include/jemalloc/internal/private_symbols.txt |  1 -
 src/bitmap.c                                  | 33 +++++++++----------
 test/unit/bitmap.c                            | 22 ++++++++-----
 4 files changed, 38 insertions(+), 39 deletions(-)

diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h
index 8452bfed..a53ac799 100644
--- a/include/jemalloc/internal/bitmap.h
+++ b/include/jemalloc/internal/bitmap.h
@@ -93,9 +93,8 @@ struct bitmap_info_s {
 #ifdef JEMALLOC_H_EXTERNS
 
 void	bitmap_info_init(bitmap_info_t *binfo, size_t nbits);
-size_t	bitmap_info_ngroups(const bitmap_info_t *binfo);
-size_t	bitmap_size(size_t nbits);
 void	bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo);
+size_t	bitmap_size(const bitmap_info_t *binfo);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
@@ -128,7 +127,7 @@ bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
 	assert(bit < binfo->nbits);
 	goff = bit >> LG_BITMAP_GROUP_NBITS;
 	g = bitmap[goff];
-	return (!(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))));
+	return (!(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))));
 }
 
 JEMALLOC_INLINE void
@@ -143,8 +142,8 @@ bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
 	goff = bit >> LG_BITMAP_GROUP_NBITS;
 	gp = &bitmap[goff];
 	g = *gp;
-	assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)));
-	g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
+	assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)));
+	g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
 	*gp = g;
 	assert(bitmap_get(bitmap, binfo, bit));
 	/* Propagate group state transitions up the tree. */
@@ -155,8 +154,8 @@ bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
 			goff = bit >> LG_BITMAP_GROUP_NBITS;
 			gp = &bitmap[binfo->levels[i].group_offset + goff];
 			g = *gp;
-			assert(g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)));
-			g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
+			assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)));
+			g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
 			*gp = g;
 			if (g != 0)
 				break;
@@ -201,8 +200,8 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
 	gp = &bitmap[goff];
 	g = *gp;
 	propagate = (g == 0);
-	assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK))) == 0);
-	g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
+	assert((g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))) == 0);
+	g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
 	*gp = g;
 	assert(!bitmap_get(bitmap, binfo, bit));
 	/* Propagate group state transitions up the tree. */
@@ -214,9 +213,9 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
 			gp = &bitmap[binfo->levels[i].group_offset + goff];
 			g = *gp;
 			propagate = (g == 0);
-			assert((g & (1LU << (bit & BITMAP_GROUP_NBITS_MASK)))
+			assert((g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)))
 			    == 0);
-			g ^= 1LU << (bit & BITMAP_GROUP_NBITS_MASK);
+			g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
 			*gp = g;
 			if (!propagate)
 				break;
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index d716b82d..b57cfbcc 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -131,7 +131,6 @@ base_stats_get
 bitmap_full
 bitmap_get
 bitmap_info_init
-bitmap_info_ngroups
 bitmap_init
 bitmap_set
 bitmap_sfu
diff --git a/src/bitmap.c b/src/bitmap.c
index c733372b..22c92fe4 100644
--- a/src/bitmap.c
+++ b/src/bitmap.c
@@ -32,22 +32,6 @@ bitmap_info_init(bitmap_info_t *binfo, size_t nbits)
 	binfo->nbits = nbits;
 }
 
-size_t
-bitmap_info_ngroups(const bitmap_info_t *binfo)
-{
-
-	return (binfo->levels[binfo->nlevels].group_offset << LG_SIZEOF_BITMAP);
-}
-
-size_t
-bitmap_size(size_t nbits)
-{
-	bitmap_info_t binfo;
-
-	bitmap_info_init(&binfo, nbits);
-	return (bitmap_info_ngroups(&binfo));
-}
-
 void
 bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo)
 {
@@ -61,8 +45,7 @@ bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo)
 	 * correspond to the first logical bit in the group, so extra bits
 	 * are the most significant bits of the last group.
 	 */
-	memset(bitmap, 0xffU, binfo->levels[binfo->nlevels].group_offset <<
-	    LG_SIZEOF_BITMAP);
+	memset(bitmap, 0xffU, bitmap_size(binfo));
 	extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK))
 	    & BITMAP_GROUP_NBITS_MASK;
 	if (extra != 0)
@@ -76,3 +59,17 @@ bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo)
 			bitmap[binfo->levels[i+1].group_offset - 1] >>= extra;
 	}
 }
+
+static size_t
+bitmap_info_ngroups(const bitmap_info_t *binfo)
+{
+
+	return (binfo->levels[binfo->nlevels].group_offset);
+}
+
+size_t
+bitmap_size(const bitmap_info_t *binfo)
+{
+
+	return (bitmap_info_ngroups(binfo) << LG_SIZEOF_BITMAP);
+}
diff --git a/test/unit/bitmap.c b/test/unit/bitmap.c
index 7da583d8..1ab0bb8e 100644
--- a/test/unit/bitmap.c
+++ b/test/unit/bitmap.c
@@ -6,7 +6,11 @@ TEST_BEGIN(test_bitmap_size)
 
 	prev_size = 0;
 	for (i = 1; i <= BITMAP_MAXBITS; i++) {
-		size_t size = bitmap_size(i);
+		bitmap_info_t binfo;
+		size_t size;
+
+		bitmap_info_init(&binfo, i);
+		size = bitmap_size(&binfo);
 		assert_true(size >= prev_size,
 		    "Bitmap size is smaller than expected");
 		prev_size = size;
@@ -23,8 +27,8 @@ TEST_BEGIN(test_bitmap_init)
 		bitmap_info_init(&binfo, i);
 		{
 			size_t j;
-			bitmap_t *bitmap = (bitmap_t *)malloc(sizeof(bitmap_t) *
-				bitmap_info_ngroups(&binfo));
+			bitmap_t *bitmap = (bitmap_t *)malloc(
+			    bitmap_size(&binfo));
 			bitmap_init(bitmap, &binfo);
 
 			for (j = 0; j < i; j++) {
@@ -46,8 +50,8 @@ TEST_BEGIN(test_bitmap_set)
 		bitmap_info_init(&binfo, i);
 		{
 			size_t j;
-			bitmap_t *bitmap = (bitmap_t *)malloc(sizeof(bitmap_t) *
-				bitmap_info_ngroups(&binfo));
+			bitmap_t *bitmap = (bitmap_t *)malloc(
+			    bitmap_size(&binfo));
 			bitmap_init(bitmap, &binfo);
 
 			for (j = 0; j < i; j++)
@@ -69,8 +73,8 @@ TEST_BEGIN(test_bitmap_unset)
 		bitmap_info_init(&binfo, i);
 		{
 			size_t j;
-			bitmap_t *bitmap = (bitmap_t *)malloc(sizeof(bitmap_t) *
-				bitmap_info_ngroups(&binfo));
+			bitmap_t *bitmap = (bitmap_t *)malloc(
+			    bitmap_size(&binfo));
 			bitmap_init(bitmap, &binfo);
 
 			for (j = 0; j < i; j++)
@@ -98,8 +102,8 @@ TEST_BEGIN(test_bitmap_sfu)
 		bitmap_info_init(&binfo, i);
 		{
 			ssize_t j;
-			bitmap_t *bitmap = (bitmap_t *)malloc(sizeof(bitmap_t) *
-				bitmap_info_ngroups(&binfo));
+			bitmap_t *bitmap = (bitmap_t *)malloc(
+			    bitmap_size(&binfo));
 			bitmap_init(bitmap, &binfo);
 
 			/* Iteratively set bits starting at the beginning. */

From b8823ab02607d6f03febd32ac504bb6188c54047 Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Wed, 24 Feb 2016 08:04:43 -0800
Subject: [PATCH 0136/2608] Use linear scan for small bitmaps

For small bitmaps, a linear scan of the bitmap is slightly faster than
a tree search - bitmap_t is more compact, and there are fewer writes
since we don't have to propogate state transitions up the tree.
On x86_64 with the current settings, I'm seeing ~.5%-1% CPU improvement
in production canaries with this change.

The old tree code is left since 32bit sizes are much larger (and ffsl
smaller), and maybe the run sizes will change in the future.

This resolves #339.
---
 include/jemalloc/internal/bitmap.h | 50 ++++++++++++++++++++++++++++--
 src/bitmap.c                       | 41 +++++++++++++++++++++++-
 2 files changed, 88 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h
index a53ac799..ee2e7e9d 100644
--- a/include/jemalloc/internal/bitmap.h
+++ b/include/jemalloc/internal/bitmap.h
@@ -15,6 +15,15 @@ typedef unsigned long bitmap_t;
 #define	BITMAP_GROUP_NBITS		(ZU(1) << LG_BITMAP_GROUP_NBITS)
 #define	BITMAP_GROUP_NBITS_MASK		(BITMAP_GROUP_NBITS-1)
 
+/*
+ * Do some analysis on how big the bitmap is before we use a tree.  For a brute
+ * force linear search, if we would have to call ffsl more than 2^3 times, use a
+ * tree instead.
+ */
+#if LG_RUN_MAXREGS - LG_BITMAP_GROUP_NBITS > 3
+#  define USE_TREE
+#endif
+
 /* Number of groups required to store a given number of bits. */
 #define	BITMAP_BITS2GROUPS(nbits)					\
     ((nbits + BITMAP_GROUP_NBITS_MASK) >> LG_BITMAP_GROUP_NBITS)
@@ -48,6 +57,8 @@ typedef unsigned long bitmap_t;
 /*
  * Maximum number of groups required to support LG_BITMAP_MAXBITS.
  */
+#ifdef USE_TREE
+
 #if LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS
 #  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_1_LEVEL(BITMAP_MAXBITS)
 #elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 2
@@ -65,6 +76,13 @@ typedef unsigned long bitmap_t;
     (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP)				\
     + !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP)
 
+#else /* USE_TREE */
+
+#define	BITMAP_GROUPS_MAX						\
+    (ZU(1) << (LG_RUN_MAXREGS - LG_SIZEOF_BITMAP - LG_SIZEOF_BITMAP))
+
+#endif /* USE_TREE */
+
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
@@ -78,6 +96,7 @@ struct bitmap_info_s {
 	/* Logical number of bits in bitmap (stored at bottom level). */
 	size_t nbits;
 
+#ifdef USE_TREE
 	/* Number of levels necessary for nbits. */
 	unsigned nlevels;
 
@@ -86,6 +105,10 @@ struct bitmap_info_s {
 	 * bottom to top (e.g. the bottom level is stored in levels[0]).
 	 */
 	bitmap_level_t levels[BITMAP_MAX_LEVELS+1];
+#else /* USE_TREE */
+	/* Number of groups necessary for nbits. */
+	size_t ngroups;
+#endif /* USE_TREE */
 };
 
 #endif /* JEMALLOC_H_STRUCTS */
@@ -112,10 +135,20 @@ void	bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
 JEMALLOC_INLINE bool
 bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo)
 {
+#ifdef USE_TREE
 	size_t rgoff = binfo->levels[binfo->nlevels].group_offset - 1;
 	bitmap_t rg = bitmap[rgoff];
 	/* The bitmap is full iff the root group is 0. */
 	return (rg == 0);
+#else
+	size_t i;
+
+	for (i = 0; i < binfo->ngroups; i++) {
+		if (bitmap[i] != 0)
+			return (false);
+	}
+	return (true);
+#endif
 }
 
 JEMALLOC_INLINE bool
@@ -146,6 +179,7 @@ bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
 	g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
 	*gp = g;
 	assert(bitmap_get(bitmap, binfo, bit));
+#ifdef USE_TREE
 	/* Propagate group state transitions up the tree. */
 	if (g == 0) {
 		unsigned i;
@@ -161,6 +195,7 @@ bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
 				break;
 		}
 	}
+#endif
 }
 
 /* sfu: set first unset. */
@@ -173,6 +208,7 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo)
 
 	assert(!bitmap_full(bitmap, binfo));
 
+#ifdef USE_TREE
 	i = binfo->nlevels - 1;
 	g = bitmap[binfo->levels[i].group_offset];
 	bit = ffs_lu(g) - 1;
@@ -181,7 +217,15 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo)
 		g = bitmap[binfo->levels[i].group_offset + bit];
 		bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffs_lu(g) - 1);
 	}
-
+#else
+	i = 0;
+	g = bitmap[0];
+	while ((bit = ffs_lu(g)) == 0) {
+		i++;
+		g = bitmap[i];
+	}
+	bit = (bit - 1) + (i << 6);
+#endif
 	bitmap_set(bitmap, binfo, bit);
 	return (bit);
 }
@@ -192,7 +236,7 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
 	size_t goff;
 	bitmap_t *gp;
 	bitmap_t g;
-	bool propagate;
+	UNUSED bool propagate;
 
 	assert(bit < binfo->nbits);
 	assert(bitmap_get(bitmap, binfo, bit));
@@ -204,6 +248,7 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
 	g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
 	*gp = g;
 	assert(!bitmap_get(bitmap, binfo, bit));
+#ifdef USE_TREE
 	/* Propagate group state transitions up the tree. */
 	if (propagate) {
 		unsigned i;
@@ -221,6 +266,7 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
 				break;
 		}
 	}
+#endif /* USE_TREE */
 }
 
 #endif
diff --git a/src/bitmap.c b/src/bitmap.c
index 22c92fe4..b1e66271 100644
--- a/src/bitmap.c
+++ b/src/bitmap.c
@@ -3,6 +3,8 @@
 
 /******************************************************************************/
 
+#ifdef USE_TREE
+
 void
 bitmap_info_init(bitmap_info_t *binfo, size_t nbits)
 {
@@ -32,6 +34,13 @@ bitmap_info_init(bitmap_info_t *binfo, size_t nbits)
 	binfo->nbits = nbits;
 }
 
+static size_t
+bitmap_info_ngroups(const bitmap_info_t *binfo)
+{
+
+	return (binfo->levels[binfo->nlevels].group_offset);
+}
+
 void
 bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo)
 {
@@ -60,13 +69,43 @@ bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo)
 	}
 }
 
+#else /* USE_TREE */
+
+void
+bitmap_info_init(bitmap_info_t *binfo, size_t nbits)
+{
+	size_t i;
+
+	assert(nbits > 0);
+	assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS));
+
+	i = nbits >> LG_BITMAP_GROUP_NBITS;
+	if (nbits % BITMAP_GROUP_NBITS != 0)
+		i++;
+	binfo->ngroups = i;
+	binfo->nbits = nbits;
+}
+
 static size_t
 bitmap_info_ngroups(const bitmap_info_t *binfo)
 {
 
-	return (binfo->levels[binfo->nlevels].group_offset);
+	return (binfo->ngroups);
 }
 
+void
+bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo)
+{
+	size_t extra;
+
+	memset(bitmap, 0xffU, bitmap_size(binfo));
+	extra = (binfo->nbits % (binfo->ngroups * BITMAP_GROUP_NBITS));
+	if (extra != 0)
+		bitmap[binfo->ngroups - 1] >>= (BITMAP_GROUP_NBITS - extra);
+}
+
+#endif /* USE_TREE */
+
 size_t
 bitmap_size(const bitmap_info_t *binfo)
 {

From 20fad3430c5fa999fd094199f55a6af962993b51 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Fri, 26 Feb 2016 14:43:39 -0800
Subject: [PATCH 0137/2608] Refactor some bitmap cpp logic.

---
 include/jemalloc/internal/bitmap.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h
index ee2e7e9d..2594e3a4 100644
--- a/include/jemalloc/internal/bitmap.h
+++ b/include/jemalloc/internal/bitmap.h
@@ -20,7 +20,7 @@ typedef unsigned long bitmap_t;
  * force linear search, if we would have to call ffsl more than 2^3 times, use a
  * tree instead.
  */
-#if LG_RUN_MAXREGS - LG_BITMAP_GROUP_NBITS > 3
+#if LG_BITMAP_MAXBITS - LG_BITMAP_GROUP_NBITS > 3
 #  define USE_TREE
 #endif
 
@@ -78,8 +78,7 @@ typedef unsigned long bitmap_t;
 
 #else /* USE_TREE */
 
-#define	BITMAP_GROUPS_MAX						\
-    (ZU(1) << (LG_RUN_MAXREGS - LG_SIZEOF_BITMAP - LG_SIZEOF_BITMAP))
+#define	BITMAP_GROUPS_MAX BITMAP_BITS2GROUPS(BITMAP_MAXBITS)
 
 #endif /* USE_TREE */
 

From d412624b25eed2b5c52b7d94a71070d3aab03cb4 Mon Sep 17 00:00:00 2001
From: buchgr <jakob.buchgraber@tum.de>
Date: Wed, 9 Dec 2015 18:00:57 +0100
Subject: [PATCH 0138/2608] Move retaining out of default chunk hooks

This fixes chunk allocation to reuse retained memory even if an
application-provided chunk allocation function is in use.

This resolves #307.
---
 src/chunk.c | 36 +++++++++++++++++++++++++-----------
 1 file changed, 25 insertions(+), 11 deletions(-)

diff --git a/src/chunk.c b/src/chunk.c
index 26622ced..b179d213 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -332,19 +332,12 @@ chunk_alloc_core(arena_t *arena, void *new_addr, size_t size, size_t alignment,
     bool *zero, bool *commit, dss_prec_t dss_prec)
 {
 	void *ret;
-	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 
 	assert(size != 0);
 	assert((size & chunksize_mask) == 0);
 	assert(alignment != 0);
 	assert((alignment & chunksize_mask) == 0);
 
-	/* Retained. */
-	if ((ret = chunk_recycle(arena, &chunk_hooks,
-	    &arena->chunks_szad_retained, &arena->chunks_ad_retained, false,
-	    new_addr, size, alignment, zero, commit, true)) != NULL)
-		return (ret);
-
 	/* "primary" dss. */
 	if (have_dss && dss_prec == dss_prec_primary && (ret =
 	    chunk_alloc_dss(arena, new_addr, size, alignment, zero, commit)) !=
@@ -442,6 +435,21 @@ chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero,
 	return (ret);
 }
 
+static void *
+chunk_alloc_retained(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr,
+    size_t size, size_t alignment, bool *zero, bool *commit)
+{
+
+	assert(size != 0);
+	assert((size & chunksize_mask) == 0);
+	assert(alignment != 0);
+	assert((alignment & chunksize_mask) == 0);
+
+	return (chunk_recycle(arena, chunk_hooks, &arena->chunks_szad_retained,
+	    &arena->chunks_ad_retained, false, new_addr, size, alignment, zero,
+	    commit, true));
+}
+
 void *
 chunk_alloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr,
     size_t size, size_t alignment, bool *zero, bool *commit)
@@ -449,10 +457,16 @@ chunk_alloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr,
 	void *ret;
 
 	chunk_hooks_assure_initialized(arena, chunk_hooks);
-	ret = chunk_hooks->alloc(new_addr, size, alignment, zero, commit,
-	    arena->ind);
-	if (ret == NULL)
-		return (NULL);
+
+	ret = chunk_alloc_retained(arena, chunk_hooks, new_addr, size,
+	    alignment, zero, commit);
+	if (ret == NULL) {
+		ret = chunk_hooks->alloc(new_addr, size, alignment, zero,
+		    commit, arena->ind);
+		if (ret == NULL)
+			return (NULL);
+	}
+
 	if (config_valgrind && chunk_hooks->alloc != chunk_alloc_default)
 		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, chunksize);
 	return (ret);

From a62e94cabb349982f3270a2057ab49b975e7cbb7 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 26 Feb 2016 16:27:52 -0800
Subject: [PATCH 0139/2608] Remove invalid tests.

Remove invalid tests that were intended to be tests of (hugemax+1) OOM,
for which tests already exist.
---
 test/integration/mallocx.c | 10 +---------
 test/integration/rallocx.c | 10 +---------
 2 files changed, 2 insertions(+), 18 deletions(-)

diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index 6ecd636b..42eee105 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -48,7 +48,7 @@ get_huge_size(size_t ind)
 
 TEST_BEGIN(test_overflow)
 {
-	size_t hugemax, size;
+	size_t hugemax;
 
 	hugemax = get_huge_size(get_nhuge()-1);
 
@@ -61,14 +61,6 @@ TEST_BEGIN(test_overflow)
 	assert_ptr_null(mallocx(SIZE_T_MAX, 0),
 	    "Expected OOM for mallocx(size=%#zx, 0)", SIZE_T_MAX);
 
-#if LG_SIZEOF_PTR == 3
-	size      = ZU(0x600000000000000);
-#else
-	size      = ZU(0x6000000);
-#endif
-	assert_ptr_null(mallocx(size, 0),
-	    "Expected OOM for mallocx(size=%#zx, 0", size);
-
 	assert_ptr_null(mallocx(1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)),
 	    "Expected OOM for mallocx(size=1, MALLOCX_ALIGN(%#zx))",
 	    ZU(PTRDIFF_MAX)+1);
diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c
index c3c22419..66ad8660 100644
--- a/test/integration/rallocx.c
+++ b/test/integration/rallocx.c
@@ -221,7 +221,7 @@ TEST_END
 
 TEST_BEGIN(test_overflow)
 {
-	size_t hugemax, size;
+	size_t hugemax;
 	void *p;
 
 	hugemax = get_huge_size(get_nhuge()-1);
@@ -238,14 +238,6 @@ TEST_BEGIN(test_overflow)
 	assert_ptr_null(rallocx(p, SIZE_T_MAX, 0),
 	    "Expected OOM for rallocx(p, size=%#zx, 0)", SIZE_T_MAX);
 
-#if LG_SIZEOF_PTR == 3
-	size      = ZU(0x600000000000000);
-#else
-	size      = ZU(0x6000000);
-#endif
-	assert_ptr_null(rallocx(p, size, 0),
-	    "Expected OOM for rallocx(p, size=%#zx, 0", size);
-
 	assert_ptr_null(rallocx(p, 1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)),
 	    "Expected OOM for rallocx(p, size=1, MALLOCX_ALIGN(%#zx))",
 	    ZU(PTRDIFF_MAX)+1);

From 3763d3b5f92d855596e111a339c1fa9583c4602a Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 26 Feb 2016 17:29:35 -0800
Subject: [PATCH 0140/2608] Refactor arena_cactive_update() into
 arena_cactive_{add,sub}().

This removes an implicit conversion from size_t to ssize_t.  For cactive
decreases, the size_t value was intentionally underflowed to generate
"negative" values (actually positive values above the positive range of
ssize_t), and the conversion to ssize_t was undefined according to C
language semantics.

This regression was perpetuated by
1522937e9cbcfa24c881dc439cc454f9a34a7e88 (Fix the cactive statistic.)
and first release in 4.0.0, which in retrospect only fixed one of two
problems introduced by aa5113b1fdafd1129c22512837c6c3d66c295fc8
(Refactor overly large/complex functions) and first released in 3.5.0.
---
 src/arena.c | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 5fcecbaf..3163d56e 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -373,15 +373,27 @@ arena_run_page_validate_zeroed(arena_chunk_t *chunk, size_t run_ind)
 }
 
 static void
-arena_cactive_update(arena_t *arena, size_t add_pages, size_t sub_pages)
+arena_cactive_add(arena_t *arena, size_t add_pages)
 {
 
 	if (config_stats) {
-		ssize_t cactive_diff = CHUNK_CEILING((arena->nactive + add_pages
-		    - sub_pages) << LG_PAGE) - CHUNK_CEILING(arena->nactive <<
+		size_t cactive_add = CHUNK_CEILING((arena->nactive +
+		    add_pages) << LG_PAGE) - CHUNK_CEILING(arena->nactive <<
 		    LG_PAGE);
-		if (cactive_diff != 0)
-			stats_cactive_add(cactive_diff);
+		if (cactive_add != 0)
+			stats_cactive_add(cactive_add);
+	}
+}
+
+static void
+arena_cactive_sub(arena_t *arena, size_t sub_pages)
+{
+
+	if (config_stats) {
+		size_t cactive_sub = CHUNK_CEILING(arena->nactive << LG_PAGE) -
+		    CHUNK_CEILING((arena->nactive - sub_pages) << LG_PAGE);
+		if (cactive_sub != 0)
+			stats_cactive_sub(cactive_sub);
 	}
 }
 
@@ -403,7 +415,7 @@ arena_run_split_remove(arena_t *arena, arena_chunk_t *chunk, size_t run_ind,
 	arena_avail_remove(arena, chunk, run_ind, total_pages);
 	if (flag_dirty != 0)
 		arena_run_dirty_remove(arena, chunk, run_ind, total_pages);
-	arena_cactive_update(arena, need_pages, 0);
+	arena_cactive_add(arena, need_pages);
 	arena->nactive += need_pages;
 
 	/* Keep track of trailing unused pages for later use. */
@@ -1915,7 +1927,7 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned,
 	assert(run_ind < chunk_npages);
 	size = arena_run_size_get(arena, chunk, run, run_ind);
 	run_pages = (size >> LG_PAGE);
-	arena_cactive_update(arena, 0, run_pages);
+	arena_cactive_sub(arena, run_pages);
 	arena->nactive -= run_pages;
 
 	/*

From 14be4a7ccad0582ab0427e61273d81ff0a5822e7 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 26 Feb 2016 21:00:02 -0800
Subject: [PATCH 0141/2608] Update ChangeLog in preparation for 4.1.0.

---
 ChangeLog | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index 8ed42cbe..92d267eb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,76 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
+* 4.1.0 (XXX)
+
+  This release is primarily about optimizations, but it also incorporates a lot
+  of portability-motivated refactoring and enhancements.  Many people worked on
+  this release, to an extent that even with the omission here of minor changes
+  (see git revision history), and of the people who reported and diagnosed
+  issues, so much of the work was contributed that starting with this release,
+  changes are annotated with author credits to help reflect the collaborative
+  effort involved.
+
+  New features:
+  - Implement decay-based unused dirty page purging, a major optimization with
+    mallctl API impact.  This is an alternative to the existing ratio-based
+    unused dirty page purging, and is intended to eventually become the sole
+    purging mechanism.  New mallctls:
+    + opt.purge
+    + opt.decay_time
+    + arena.<i>.decay
+    + arena.<i>.decay_time
+    + arenas.decay_time
+    + stats.arenas.<i>.decay_time
+    (@jasone, @cevans87)
+  - Add --with-malloc-conf, which makes it possible to embed a default
+    options string during configuration.  This was motivated by the desire to
+    specify --with-malloc-conf=purge:decay , since the default must remain
+    purge:ratio until the 5.0.0 release.  (@jasone)
+  - Make *allocx() size class overflow behavior defined.  The maximum
+    size class is now less than PTRDIFF_MAX to protect applications against
+    numerical overflow, and all allocation functions are guaranteed to indicate
+    errors rather than potentially crashing if the request size exceeds the
+    maximum size class.  (@jasone)
+  - Add MS Visual Studio 2015 support.  (@rustyx, @yuslepukhin)
+  - jeprof:
+    + Add raw heap profile support.  (@jasone)
+    + Add --retain and --exclude for backtrace symbol filtering.  (@jasone)
+
+  Optimizations:
+  - Optimize the fast path to combine various bootstrapping and configuration
+    checks and execute more streamlined code in the common case.  (@interwq)
+  - Use linear scan for small bitmaps (used for small object tracking).  In
+    addition to speeding up bitmap operations on 64-bit systems, this reduces
+    allocator metadata overhead by approximately 0.2%.  (@djwatson)
+  - Separate arena_avail trees, which substantially speeds up run tree
+    operations.  (@djwatson)
+  - Use memoization (boot-time-computed table) for run quantization.  Separate
+    arena_avail trees reduced the importance of this optimization.  (@jasone)
+  - Attempt mmap-based in-place huge reallocation.  This can dramatically speed
+    up incremental huge reallocation.  (@jasone)
+
+  Incompatible changes:
+  - Make opt.narenas unsigned rather than size_t.  (@jasone)
+
+  Bug fixes:
+  - Refactor arenas array.  In addition to fixing a fork-related deadlock, this
+    makes arena lookups faster and simpler.  (@jasone)
+  - Handle unaligned keys in hash().  This caused problems for some ARM systems.
+    (@jasone)
+  - Fix run quantization.  In practice this bug had no impact unless
+    applications requested memory with alignment exceeding one page.  (@jasone)
+  - Move retained memory allocation out of the default chunk allocation
+    function, to a location that gets executed even if the application installs
+    a custom chunk allocation function.  This resolves a virtual memory leak.
+    (@buchgr)
+  - Resolve undefined unsigned-to-signed conversion that could cause corruption
+    of the stats.cactive statistic.  (@jasone)
+  - Fix LinuxThreads-specific bootstrapping deadlock.  (Cosmin Paraschiv)
+  - jeprof:
+    + Don't discard curl options if timeout is not defined.  (@djwatson)
+    + Detect failed profile fetches.  (@djwatson)
+
 * 4.0.4 (October 24, 2015)
 
   This bugfix release fixes another xallocx() regression.  No other regressions

From 40ee9aa9577ea5eb6616c10b9e6b0fa7e6796821 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 27 Feb 2016 12:34:50 -0800
Subject: [PATCH 0142/2608] Fix stats.cactive accounting regression.

Fix stats.cactive accounting to always increase/decrease by multiples of
the chunk size, even for huge size classes that are not multiples of the
chunk size, e.g. {2.5, 3, 3.5, 5, 7} MiB with 2 MiB chunk size.  This
regression was introduced by 155bfa7da18cab0d21d87aa2dce4554166836f5d
(Normalize size classes.) and first released in 4.0.0.

This resolves #336.
---
 include/jemalloc/internal/stats.h | 14 +++++++--
 src/arena.c                       | 48 +++++++++++--------------------
 2 files changed, 29 insertions(+), 33 deletions(-)

diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index c91dba99..705903ad 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -167,15 +167,25 @@ stats_cactive_get(void)
 JEMALLOC_INLINE void
 stats_cactive_add(size_t size)
 {
+	UNUSED size_t cactive;
 
-	atomic_add_z(&stats_cactive, size);
+	assert(size > 0);
+	assert((size & chunksize_mask) == 0);
+
+	cactive = atomic_add_z(&stats_cactive, size);
+	assert(cactive - size < cactive);
 }
 
 JEMALLOC_INLINE void
 stats_cactive_sub(size_t size)
 {
+	UNUSED size_t cactive;
 
-	atomic_sub_z(&stats_cactive, size);
+	assert(size > 0);
+	assert((size & chunksize_mask) == 0);
+
+	cactive = atomic_sub_z(&stats_cactive, size);
+	assert(cactive + size > cactive);
 }
 #endif
 
diff --git a/src/arena.c b/src/arena.c
index 3163d56e..c579a582 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -373,7 +373,7 @@ arena_run_page_validate_zeroed(arena_chunk_t *chunk, size_t run_ind)
 }
 
 static void
-arena_cactive_add(arena_t *arena, size_t add_pages)
+arena_nactive_add(arena_t *arena, size_t add_pages)
 {
 
 	if (config_stats) {
@@ -383,10 +383,11 @@ arena_cactive_add(arena_t *arena, size_t add_pages)
 		if (cactive_add != 0)
 			stats_cactive_add(cactive_add);
 	}
+	arena->nactive += add_pages;
 }
 
 static void
-arena_cactive_sub(arena_t *arena, size_t sub_pages)
+arena_nactive_sub(arena_t *arena, size_t sub_pages)
 {
 
 	if (config_stats) {
@@ -395,6 +396,7 @@ arena_cactive_sub(arena_t *arena, size_t sub_pages)
 		if (cactive_sub != 0)
 			stats_cactive_sub(cactive_sub);
 	}
+	arena->nactive -= sub_pages;
 }
 
 static void
@@ -415,8 +417,7 @@ arena_run_split_remove(arena_t *arena, arena_chunk_t *chunk, size_t run_ind,
 	arena_avail_remove(arena, chunk, run_ind, total_pages);
 	if (flag_dirty != 0)
 		arena_run_dirty_remove(arena, chunk, run_ind, total_pages);
-	arena_cactive_add(arena, need_pages);
-	arena->nactive += need_pages;
+	arena_nactive_add(arena, need_pages);
 
 	/* Keep track of trailing unused pages for later use. */
 	if (rem_pages > 0) {
@@ -905,7 +906,7 @@ arena_chunk_alloc_huge_hard(arena_t *arena, chunk_hooks_t *chunk_hooks,
 			arena_huge_malloc_stats_update_undo(arena, usize);
 			arena->stats.mapped -= usize;
 		}
-		arena->nactive -= (usize >> LG_PAGE);
+		arena_nactive_sub(arena, usize >> LG_PAGE);
 		malloc_mutex_unlock(&arena->lock);
 	}
 
@@ -927,7 +928,7 @@ arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment,
 		arena_huge_malloc_stats_update(arena, usize);
 		arena->stats.mapped += usize;
 	}
-	arena->nactive += (usize >> LG_PAGE);
+	arena_nactive_add(arena, usize >> LG_PAGE);
 
 	ret = chunk_alloc_cache(arena, &chunk_hooks, NULL, csize, alignment,
 	    zero, true);
@@ -937,8 +938,6 @@ arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment,
 		    alignment, zero, csize);
 	}
 
-	if (config_stats && ret != NULL)
-		stats_cactive_add(usize);
 	return (ret);
 }
 
@@ -953,9 +952,8 @@ arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t usize)
 	if (config_stats) {
 		arena_huge_dalloc_stats_update(arena, usize);
 		arena->stats.mapped -= usize;
-		stats_cactive_sub(usize);
 	}
-	arena->nactive -= (usize >> LG_PAGE);
+	arena_nactive_sub(arena, usize >> LG_PAGE);
 
 	chunk_dalloc_cache(arena, &chunk_hooks, chunk, csize, true);
 	malloc_mutex_unlock(&arena->lock);
@@ -972,17 +970,10 @@ arena_chunk_ralloc_huge_similar(arena_t *arena, void *chunk, size_t oldsize,
 	malloc_mutex_lock(&arena->lock);
 	if (config_stats)
 		arena_huge_ralloc_stats_update(arena, oldsize, usize);
-	if (oldsize < usize) {
-		size_t udiff = usize - oldsize;
-		arena->nactive += udiff >> LG_PAGE;
-		if (config_stats)
-			stats_cactive_add(udiff);
-	} else {
-		size_t udiff = oldsize - usize;
-		arena->nactive -= udiff >> LG_PAGE;
-		if (config_stats)
-			stats_cactive_sub(udiff);
-	}
+	if (oldsize < usize)
+		arena_nactive_add(arena, (usize - oldsize) >> LG_PAGE);
+	else
+		arena_nactive_sub(arena, (oldsize - usize) >> LG_PAGE);
 	malloc_mutex_unlock(&arena->lock);
 }
 
@@ -996,12 +987,10 @@ arena_chunk_ralloc_huge_shrink(arena_t *arena, void *chunk, size_t oldsize,
 	malloc_mutex_lock(&arena->lock);
 	if (config_stats) {
 		arena_huge_ralloc_stats_update(arena, oldsize, usize);
-		if (cdiff != 0) {
+		if (cdiff != 0)
 			arena->stats.mapped -= cdiff;
-			stats_cactive_sub(udiff);
-		}
 	}
-	arena->nactive -= udiff >> LG_PAGE;
+	arena_nactive_sub(arena, udiff >> LG_PAGE);
 
 	if (cdiff != 0) {
 		chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
@@ -1031,7 +1020,7 @@ arena_chunk_ralloc_huge_expand_hard(arena_t *arena, chunk_hooks_t *chunk_hooks,
 			    usize);
 			arena->stats.mapped -= cdiff;
 		}
-		arena->nactive -= (udiff >> LG_PAGE);
+		arena_nactive_sub(arena, udiff >> LG_PAGE);
 		malloc_mutex_unlock(&arena->lock);
 	} else if (chunk_hooks->merge(chunk, CHUNK_CEILING(oldsize), nchunk,
 	    cdiff, true, arena->ind)) {
@@ -1059,7 +1048,7 @@ arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, size_t oldsize,
 		arena_huge_ralloc_stats_update(arena, oldsize, usize);
 		arena->stats.mapped += cdiff;
 	}
-	arena->nactive += (udiff >> LG_PAGE);
+	arena_nactive_add(arena, udiff >> LG_PAGE);
 
 	err = (chunk_alloc_cache(arena, &arena->chunk_hooks, nchunk, cdiff,
 	    chunksize, zero, true) == NULL);
@@ -1075,8 +1064,6 @@ arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, size_t oldsize,
 		err = true;
 	}
 
-	if (config_stats && !err)
-		stats_cactive_add(udiff);
 	return (err);
 }
 
@@ -1927,8 +1914,7 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned,
 	assert(run_ind < chunk_npages);
 	size = arena_run_size_get(arena, chunk, run, run_ind);
 	run_pages = (size >> LG_PAGE);
-	arena_cactive_sub(arena, run_pages);
-	arena->nactive -= run_pages;
+	arena_nactive_sub(arena, run_pages);
 
 	/*
 	 * The run is dirty if the caller claims to have dirtied it, as well as

From 69acd25a64a570ec8987558d149a6730bcf9a83d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 27 Feb 2016 14:38:03 -0800
Subject: [PATCH 0143/2608] Add/alphabetize private symbols.

---
 include/jemalloc/internal/private_symbols.txt | 30 +++++++++----------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index b57cfbcc..54d3807d 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -24,12 +24,12 @@ arena_dalloc_junk_small
 arena_dalloc_large
 arena_dalloc_large_junked_locked
 arena_dalloc_small
+arena_decay_tick
+arena_decay_ticks
 arena_decay_time_default_get
 arena_decay_time_default_set
 arena_decay_time_get
 arena_decay_time_set
-arena_decay_tick
-arena_decay_ticks
 arena_dss_prec_get
 arena_dss_prec_set
 arena_get
@@ -51,9 +51,6 @@ arena_mapbits_large_binind_set
 arena_mapbits_large_get
 arena_mapbits_large_set
 arena_mapbits_large_size_get
-arena_mapbitsp_get
-arena_mapbitsp_read
-arena_mapbitsp_write
 arena_mapbits_size_decode
 arena_mapbits_size_encode
 arena_mapbits_small_runind_get
@@ -62,6 +59,9 @@ arena_mapbits_unallocated_set
 arena_mapbits_unallocated_size_get
 arena_mapbits_unallocated_size_set
 arena_mapbits_unzeroed_get
+arena_mapbitsp_get
+arena_mapbitsp_read
+arena_mapbitsp_write
 arena_maxrun
 arena_maybe_purge
 arena_metadata_allocated_add
@@ -99,14 +99,14 @@ arena_redzone_corruption
 arena_run_regind
 arena_run_to_miscelm
 arena_salloc
-arenas_tdata_bypass_cleanup
-arenas_tdata_cleanup
 arena_sdalloc
 arena_stats_merge
 arena_tcache_fill_small
 arena_tdata_get
 arena_tdata_get_hard
 arenas
+arenas_tdata_bypass_cleanup
+arenas_tdata_cleanup
 atomic_add_p
 atomic_add_u
 atomic_add_uint32
@@ -170,9 +170,9 @@ chunk_prefork
 chunk_purge_arena
 chunk_purge_wrapper
 chunk_register
+chunks_rtree
 chunksize
 chunksize_mask
-chunks_rtree
 ckh_count
 ckh_delete
 ckh_insert
@@ -280,11 +280,11 @@ idalloct
 idalloctm
 imalloc
 imalloct
+in_valgrind
 index2size
 index2size_compute
 index2size_lookup
 index2size_tab
-in_valgrind
 ipalloc
 ipalloct
 ipallocztm
@@ -489,13 +489,13 @@ tcache_flush
 tcache_get
 tcache_get_hard
 tcache_maxclass
-tcaches
 tcache_salloc
+tcache_stats_merge
+tcaches
 tcaches_create
 tcaches_destroy
 tcaches_flush
 tcaches_get
-tcache_stats_merge
 thread_allocated_cleanup
 thread_deallocated_cleanup
 ticker_copy
@@ -520,6 +520,8 @@ tsd_init_check_recursion
 tsd_init_finish
 tsd_init_head
 tsd_nominal
+tsd_prof_tdata_get
+tsd_prof_tdata_set
 tsd_quarantine_get
 tsd_quarantine_set
 tsd_set
@@ -527,14 +529,12 @@ tsd_tcache_enabled_get
 tsd_tcache_enabled_set
 tsd_tcache_get
 tsd_tcache_set
-tsd_tls
-tsd_tsd
-tsd_prof_tdata_get
-tsd_prof_tdata_set
 tsd_thread_allocated_get
 tsd_thread_allocated_set
 tsd_thread_deallocated_get
 tsd_thread_deallocated_set
+tsd_tls
+tsd_tsd
 u2rz
 valgrind_freelike_block
 valgrind_make_mem_defined

From fd4858225b84c12e071eeeaea1fa1bce8731e409 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 27 Feb 2016 20:38:29 -0800
Subject: [PATCH 0144/2608] Fix decay tests for --disable-stats case.

---
 test/unit/decay.c | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/test/unit/decay.c b/test/unit/decay.c
index 66d54dc8..1052f6fb 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -201,7 +201,9 @@ TEST_BEGIN(test_decay_ticker)
 #define	NPS 1024
 	int flags = (MALLOCX_ARENA(0) | MALLOCX_TCACHE_NONE);
 	void *ps[NPS];
-	uint64_t epoch, npurge0, npurge1;
+	uint64_t epoch;
+	uint64_t npurge0 = 0;
+	uint64_t npurge1 = 0;
 	size_t sz, tcache_max, large;
 	unsigned i, nupdates0;
 	nstime_t time, decay_time, deadline;
@@ -224,8 +226,8 @@ TEST_BEGIN(test_decay_ticker)
 	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(uint64_t)), 0,
 	    "Unexpected mallctl failure");
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge0, &sz, NULL, 0), 0,
-	    "Unexpected mallctl failure");
+	assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge0, &sz, NULL, 0),
+	    config_stats ? 0 : ENOENT, "Unexpected mallctl result");
 
 	for (i = 0; i < NPS; i++) {
 		ps[i] = mallocx(large, flags);
@@ -266,12 +268,14 @@ TEST_BEGIN(test_decay_ticker)
 		    sizeof(uint64_t)), 0, "Unexpected mallctl failure");
 		sz = sizeof(uint64_t);
 		assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge1, &sz,
-		    NULL, 0), 0, "Unexpected mallctl failure");
+		    NULL, 0), config_stats ? 0 : ENOENT,
+		    "Unexpected mallctl result");
 
 		nstime_update(&time);
 	} while (nstime_compare(&time, &deadline) <= 0 && npurge1 == npurge0);
 
-	assert_u64_gt(npurge1, npurge0, "Expected purging to occur");
+	if (config_stats)
+		assert_u64_gt(npurge1, npurge0, "Expected purging to occur");
 #undef NPS
 }
 TEST_END
@@ -281,7 +285,9 @@ TEST_BEGIN(test_decay_nonmonotonic)
 #define	NPS (SMOOTHSTEP_NSTEPS + 1)
 	int flags = (MALLOCX_ARENA(0) | MALLOCX_TCACHE_NONE);
 	void *ps[NPS];
-	uint64_t epoch, npurge0, npurge1;
+	uint64_t epoch;
+	uint64_t npurge0 = 0;
+	uint64_t npurge1 = 0;
 	size_t sz, large0;
 	unsigned i, nupdates0;
 
@@ -296,8 +302,8 @@ TEST_BEGIN(test_decay_nonmonotonic)
 	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(uint64_t)), 0,
 	    "Unexpected mallctl failure");
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge0, &sz, NULL, 0), 0,
-	    "Unexpected mallctl failure");
+	assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge0, &sz, NULL, 0),
+	    config_stats ? 0 : ENOENT, "Unexpected mallctl result");
 
 	nupdates_mock = 0;
 	nstime_init(&time_mock, 0);
@@ -324,10 +330,11 @@ TEST_BEGIN(test_decay_nonmonotonic)
 	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(uint64_t)), 0,
 	    "Unexpected mallctl failure");
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge1, &sz, NULL, 0), 0,
-	    "Unexpected mallctl failure");
+	assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge1, &sz, NULL, 0),
+	    config_stats ? 0 : ENOENT, "Unexpected mallctl result");
 
-	assert_u64_gt(npurge1, npurge0, "Expected purging to occur");
+	if (config_stats)
+		assert_u64_gt(npurge1, npurge0, "Expected purging to occur");
 
 	nstime_update = nstime_update_orig;
 #undef NPS

From 3c07f803aa282598451eb0664cc94717b769a5e6 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 27 Feb 2016 20:40:13 -0800
Subject: [PATCH 0145/2608] Fix stats.arenas.<i>.[...] for --disable-stats
 case.

Add missing stats.arenas.<i>.{dss,lg_dirty_mult,decay_time}
initialization.

Fix stats.arenas.<i>.{pactive,pdirty} to read under the protection of
the arena mutex.
---
 include/jemalloc/internal/arena.h             |   5 +-
 include/jemalloc/internal/ctl.h               |   3 +
 include/jemalloc/internal/private_symbols.txt |   1 +
 src/arena.c                                   |  39 ++++-
 src/ctl.c                                     | 142 +++++++++---------
 test/unit/mallctl.c                           |   4 +-
 6 files changed, 114 insertions(+), 80 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index c7c18748..3519873c 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -572,7 +572,10 @@ ssize_t	arena_lg_dirty_mult_default_get(void);
 bool	arena_lg_dirty_mult_default_set(ssize_t lg_dirty_mult);
 ssize_t	arena_decay_time_default_get(void);
 bool	arena_decay_time_default_set(ssize_t decay_time);
-void	arena_stats_merge(arena_t *arena, const char **dss,
+void	arena_basic_stats_merge(arena_t *arena, unsigned *nthreads,
+    const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time,
+    size_t *nactive, size_t *ndirty);
+void	arena_stats_merge(arena_t *arena, unsigned *nthreads, const char **dss,
     ssize_t *lg_dirty_mult, ssize_t *decay_time, size_t *nactive,
     size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats,
     malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats);
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index 9add3ed9..9c5e9328 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -38,6 +38,9 @@ struct ctl_arena_stats_s {
 	ssize_t			decay_time;
 	size_t			pactive;
 	size_t			pdirty;
+
+	/* The remainder are only populated if config_stats is true. */
+
 	arena_stats_t		astats;
 
 	/* Aggregate stats for small size classes, based on bin stats. */
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 54d3807d..5880996a 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -2,6 +2,7 @@ a0dalloc
 a0malloc
 arena_aalloc
 arena_alloc_junk_small
+arena_basic_stats_merge
 arena_bin_index
 arena_bin_info
 arena_bitselm_get
diff --git a/src/arena.c b/src/arena.c
index c579a582..99e20fde 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -3202,20 +3202,45 @@ arena_decay_time_default_set(ssize_t decay_time)
 	return (false);
 }
 
-void
-arena_stats_merge(arena_t *arena, const char **dss, ssize_t *lg_dirty_mult,
-    ssize_t *decay_time, size_t *nactive, size_t *ndirty, arena_stats_t *astats,
-    malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats,
-    malloc_huge_stats_t *hstats)
+static void
+arena_basic_stats_merge_locked(arena_t *arena, unsigned *nthreads,
+    const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time,
+    size_t *nactive, size_t *ndirty)
 {
-	unsigned i;
 
-	malloc_mutex_lock(&arena->lock);
+	*nthreads += arena_nthreads_get(arena);
 	*dss = dss_prec_names[arena->dss_prec];
 	*lg_dirty_mult = arena->lg_dirty_mult;
 	*decay_time = arena->decay_time;
 	*nactive += arena->nactive;
 	*ndirty += arena->ndirty;
+}
+
+void
+arena_basic_stats_merge(arena_t *arena, unsigned *nthreads, const char **dss,
+    ssize_t *lg_dirty_mult, ssize_t *decay_time, size_t *nactive,
+    size_t *ndirty)
+{
+
+	malloc_mutex_lock(&arena->lock);
+	arena_basic_stats_merge_locked(arena, nthreads, dss, lg_dirty_mult,
+	    decay_time, nactive, ndirty);
+	malloc_mutex_unlock(&arena->lock);
+}
+
+void
+arena_stats_merge(arena_t *arena, unsigned *nthreads, const char **dss,
+    ssize_t *lg_dirty_mult, ssize_t *decay_time, size_t *nactive,
+    size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats,
+    malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats)
+{
+	unsigned i;
+
+	cassert(config_stats);
+
+	malloc_mutex_lock(&arena->lock);
+	arena_basic_stats_merge_locked(arena, nthreads, dss, lg_dirty_mult,
+	    decay_time, nactive, ndirty);
 
 	astats->mapped += arena->stats.mapped;
 	astats->npurge += arena->stats.npurge;
diff --git a/src/ctl.c b/src/ctl.c
index dbf57c36..17bd0719 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -533,6 +533,7 @@ static void
 ctl_arena_clear(ctl_arena_stats_t *astats)
 {
 
+	astats->nthreads = 0;
 	astats->dss = dss_prec_names[dss_prec_limit];
 	astats->lg_dirty_mult = -1;
 	astats->decay_time = -1;
@@ -557,16 +558,23 @@ ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena)
 {
 	unsigned i;
 
-	arena_stats_merge(arena, &cstats->dss, &cstats->lg_dirty_mult,
-	    &cstats->decay_time, &cstats->pactive, &cstats->pdirty,
-	    &cstats->astats, cstats->bstats, cstats->lstats, cstats->hstats);
+	if (config_stats) {
+		arena_stats_merge(arena, &cstats->nthreads, &cstats->dss,
+		    &cstats->lg_dirty_mult, &cstats->decay_time,
+		    &cstats->pactive, &cstats->pdirty, &cstats->astats,
+		    cstats->bstats, cstats->lstats, cstats->hstats);
 
-	for (i = 0; i < NBINS; i++) {
-		cstats->allocated_small += cstats->bstats[i].curregs *
-		    index2size(i);
-		cstats->nmalloc_small += cstats->bstats[i].nmalloc;
-		cstats->ndalloc_small += cstats->bstats[i].ndalloc;
-		cstats->nrequests_small += cstats->bstats[i].nrequests;
+		for (i = 0; i < NBINS; i++) {
+			cstats->allocated_small += cstats->bstats[i].curregs *
+			    index2size(i);
+			cstats->nmalloc_small += cstats->bstats[i].nmalloc;
+			cstats->ndalloc_small += cstats->bstats[i].ndalloc;
+			cstats->nrequests_small += cstats->bstats[i].nrequests;
+		}
+	} else {
+		arena_basic_stats_merge(arena, &cstats->nthreads, &cstats->dss,
+		    &cstats->lg_dirty_mult, &cstats->decay_time,
+		    &cstats->pactive, &cstats->pdirty);
 	}
 }
 
@@ -575,57 +583,68 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats)
 {
 	unsigned i;
 
+	sstats->nthreads += astats->nthreads;
 	sstats->pactive += astats->pactive;
 	sstats->pdirty += astats->pdirty;
 
-	sstats->astats.mapped += astats->astats.mapped;
-	sstats->astats.npurge += astats->astats.npurge;
-	sstats->astats.nmadvise += astats->astats.nmadvise;
-	sstats->astats.purged += astats->astats.purged;
+	if (config_stats) {
+		sstats->astats.mapped += astats->astats.mapped;
+		sstats->astats.npurge += astats->astats.npurge;
+		sstats->astats.nmadvise += astats->astats.nmadvise;
+		sstats->astats.purged += astats->astats.purged;
 
-	sstats->astats.metadata_mapped += astats->astats.metadata_mapped;
-	sstats->astats.metadata_allocated += astats->astats.metadata_allocated;
+		sstats->astats.metadata_mapped +=
+		    astats->astats.metadata_mapped;
+		sstats->astats.metadata_allocated +=
+		    astats->astats.metadata_allocated;
 
-	sstats->allocated_small += astats->allocated_small;
-	sstats->nmalloc_small += astats->nmalloc_small;
-	sstats->ndalloc_small += astats->ndalloc_small;
-	sstats->nrequests_small += astats->nrequests_small;
+		sstats->allocated_small += astats->allocated_small;
+		sstats->nmalloc_small += astats->nmalloc_small;
+		sstats->ndalloc_small += astats->ndalloc_small;
+		sstats->nrequests_small += astats->nrequests_small;
 
-	sstats->astats.allocated_large += astats->astats.allocated_large;
-	sstats->astats.nmalloc_large += astats->astats.nmalloc_large;
-	sstats->astats.ndalloc_large += astats->astats.ndalloc_large;
-	sstats->astats.nrequests_large += astats->astats.nrequests_large;
+		sstats->astats.allocated_large +=
+		    astats->astats.allocated_large;
+		sstats->astats.nmalloc_large += astats->astats.nmalloc_large;
+		sstats->astats.ndalloc_large += astats->astats.ndalloc_large;
+		sstats->astats.nrequests_large +=
+		    astats->astats.nrequests_large;
 
-	sstats->astats.allocated_huge += astats->astats.allocated_huge;
-	sstats->astats.nmalloc_huge += astats->astats.nmalloc_huge;
-	sstats->astats.ndalloc_huge += astats->astats.ndalloc_huge;
+		sstats->astats.allocated_huge += astats->astats.allocated_huge;
+		sstats->astats.nmalloc_huge += astats->astats.nmalloc_huge;
+		sstats->astats.ndalloc_huge += astats->astats.ndalloc_huge;
 
-	for (i = 0; i < NBINS; i++) {
-		sstats->bstats[i].nmalloc += astats->bstats[i].nmalloc;
-		sstats->bstats[i].ndalloc += astats->bstats[i].ndalloc;
-		sstats->bstats[i].nrequests += astats->bstats[i].nrequests;
-		sstats->bstats[i].curregs += astats->bstats[i].curregs;
-		if (config_tcache) {
-			sstats->bstats[i].nfills += astats->bstats[i].nfills;
-			sstats->bstats[i].nflushes +=
-			    astats->bstats[i].nflushes;
+		for (i = 0; i < NBINS; i++) {
+			sstats->bstats[i].nmalloc += astats->bstats[i].nmalloc;
+			sstats->bstats[i].ndalloc += astats->bstats[i].ndalloc;
+			sstats->bstats[i].nrequests +=
+			    astats->bstats[i].nrequests;
+			sstats->bstats[i].curregs += astats->bstats[i].curregs;
+			if (config_tcache) {
+				sstats->bstats[i].nfills +=
+				    astats->bstats[i].nfills;
+				sstats->bstats[i].nflushes +=
+				    astats->bstats[i].nflushes;
+			}
+			sstats->bstats[i].nruns += astats->bstats[i].nruns;
+			sstats->bstats[i].reruns += astats->bstats[i].reruns;
+			sstats->bstats[i].curruns += astats->bstats[i].curruns;
 		}
-		sstats->bstats[i].nruns += astats->bstats[i].nruns;
-		sstats->bstats[i].reruns += astats->bstats[i].reruns;
-		sstats->bstats[i].curruns += astats->bstats[i].curruns;
-	}
 
-	for (i = 0; i < nlclasses; i++) {
-		sstats->lstats[i].nmalloc += astats->lstats[i].nmalloc;
-		sstats->lstats[i].ndalloc += astats->lstats[i].ndalloc;
-		sstats->lstats[i].nrequests += astats->lstats[i].nrequests;
-		sstats->lstats[i].curruns += astats->lstats[i].curruns;
-	}
+		for (i = 0; i < nlclasses; i++) {
+			sstats->lstats[i].nmalloc += astats->lstats[i].nmalloc;
+			sstats->lstats[i].ndalloc += astats->lstats[i].ndalloc;
+			sstats->lstats[i].nrequests +=
+			    astats->lstats[i].nrequests;
+			sstats->lstats[i].curruns += astats->lstats[i].curruns;
+		}
 
-	for (i = 0; i < nhclasses; i++) {
-		sstats->hstats[i].nmalloc += astats->hstats[i].nmalloc;
-		sstats->hstats[i].ndalloc += astats->hstats[i].ndalloc;
-		sstats->hstats[i].curhchunks += astats->hstats[i].curhchunks;
+		for (i = 0; i < nhclasses; i++) {
+			sstats->hstats[i].nmalloc += astats->hstats[i].nmalloc;
+			sstats->hstats[i].ndalloc += astats->hstats[i].ndalloc;
+			sstats->hstats[i].curhchunks +=
+			    astats->hstats[i].curhchunks;
+		}
 	}
 }
 
@@ -636,19 +655,9 @@ ctl_arena_refresh(arena_t *arena, unsigned i)
 	ctl_arena_stats_t *sstats = &ctl_stats.arenas[ctl_stats.narenas];
 
 	ctl_arena_clear(astats);
-
-	sstats->nthreads += astats->nthreads;
-	if (config_stats) {
-		ctl_arena_stats_amerge(astats, arena);
-		/* Merge into sum stats as well. */
-		ctl_arena_stats_smerge(sstats, astats);
-	} else {
-		astats->pactive += arena->nactive;
-		astats->pdirty += arena->ndirty;
-		/* Merge into sum stats as well. */
-		sstats->pactive += arena->nactive;
-		sstats->pdirty += arena->ndirty;
-	}
+	ctl_arena_stats_amerge(astats, arena);
+	/* Merge into sum stats as well. */
+	ctl_arena_stats_smerge(sstats, astats);
 }
 
 static bool
@@ -701,20 +710,11 @@ ctl_refresh(void)
 	 * Clear sum stats, since they will be merged into by
 	 * ctl_arena_refresh().
 	 */
-	ctl_stats.arenas[ctl_stats.narenas].nthreads = 0;
 	ctl_arena_clear(&ctl_stats.arenas[ctl_stats.narenas]);
 
 	for (i = 0; i < ctl_stats.narenas; i++)
 		tarenas[i] = arena_get(i, false);
 
-	for (i = 0; i < ctl_stats.narenas; i++) {
-		if (tarenas[i] != NULL) {
-			ctl_stats.arenas[i].nthreads =
-			    arena_nthreads_get(arena_get(i, false));
-		} else
-			ctl_stats.arenas[i].nthreads = 0;
-	}
-
 	for (i = 0; i < ctl_stats.narenas; i++) {
 		bool initialized = (tarenas[i] != NULL);
 
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 01333514..69f8c20c 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -687,8 +687,10 @@ TEST_BEGIN(test_stats_arenas)
 	    0), 0, "Unexpected mallctl() failure");			\
 } while (0)
 
-	TEST_STATS_ARENAS(const char *, dss);
 	TEST_STATS_ARENAS(unsigned, nthreads);
+	TEST_STATS_ARENAS(const char *, dss);
+	TEST_STATS_ARENAS(ssize_t, lg_dirty_mult);
+	TEST_STATS_ARENAS(ssize_t, decay_time);
 	TEST_STATS_ARENAS(size_t, pactive);
 	TEST_STATS_ARENAS(size_t, pdirty);
 

From 39f58755a7c2c5c12c9b732c17fe472c9872ab4b Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 27 Feb 2016 21:18:15 -0800
Subject: [PATCH 0146/2608] Fix a potential tsd cleanup leak.

Prior to 767d85061a6fb88ec977bbcd9b429a43aff391e6 (Refactor arenas array
(fixes deadlock).), it was possible under some circumstances for
arena_get() to trigger recreation of the arenas cache during tsd
cleanup, and the arenas cache would then be leaked.  In principle a
similar issue could still occur as a side effect of decay-based purging,
which calls arena_tdata_get().  Fix arenas_tdata_cleanup() by setting
tsd->arenas_tdata_bypass to true, so that arena_tdata_get() will
gracefully fail (an expected behavior) rather than recreating
tsd->arena_tdata.

Reported by Christopher Ferris <cferris@google.com>.
---
 src/jemalloc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index c8841783..0735376e 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -652,6 +652,9 @@ arenas_tdata_cleanup(tsd_t *tsd)
 {
 	arena_tdata_t *arenas_tdata;
 
+	/* Prevent tsd->arenas_tdata from being (re)created. */
+	*tsd_arenas_tdata_bypassp_get(tsd) = true;
+
 	arenas_tdata = tsd_arenas_tdata_get(tsd);
 	if (arenas_tdata != NULL) {
 		tsd_arenas_tdata_set(tsd, NULL);

From 7d3055432d303f114d15f67c60bdebcbb4dbd39a Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 27 Feb 2016 23:40:31 -0800
Subject: [PATCH 0147/2608] Fix decay tests for --disable-tcache case.

---
 test/unit/decay.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/test/unit/decay.c b/test/unit/decay.c
index 1052f6fb..70a2e67a 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -160,7 +160,7 @@ TEST_BEGIN(test_decay_ticks)
 	 * Test tcache fill/flush interactions for large and small size classes,
 	 * using an explicit tcache.
 	 */
-	{
+	if (config_tcache) {
 		unsigned tcache_ind, i;
 		size_t tcache_sizes[2];
 		tcache_sizes[0] = large0;
@@ -204,7 +204,7 @@ TEST_BEGIN(test_decay_ticker)
 	uint64_t epoch;
 	uint64_t npurge0 = 0;
 	uint64_t npurge1 = 0;
-	size_t sz, tcache_max, large;
+	size_t sz, large;
 	unsigned i, nupdates0;
 	nstime_t time, decay_time, deadline;
 
@@ -216,10 +216,18 @@ TEST_BEGIN(test_decay_ticker)
 	 * verify the ticker triggers purging.
 	 */
 
-	sz = sizeof(size_t);
-	assert_d_eq(mallctl("arenas.tcache_max", &tcache_max, &sz, NULL, 0), 0,
-	    "Unexpected mallctl failure");
-	large = nallocx(tcache_max + 1, flags);
+	if (config_tcache) {
+		size_t tcache_max;
+
+		sz = sizeof(size_t);
+		assert_d_eq(mallctl("arenas.tcache_max", &tcache_max, &sz, NULL,
+		    0), 0, "Unexpected mallctl failure");
+		large = nallocx(tcache_max + 1, flags);
+	}  else {
+		sz = sizeof(size_t);
+		assert_d_eq(mallctl("arenas.lrun.0.size", &large, &sz, NULL, 0),
+		    0, "Unexpected mallctl failure");
+	}
 
 	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl failure");

From e025c5158b2dd524a20ffc8db9d096816f6641fa Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 28 Feb 2016 00:01:13 -0800
Subject: [PATCH 0148/2608] Update ChangeLog.

---
 ChangeLog | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 92d267eb..e35d74cc 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -30,12 +30,12 @@ brevity.  Much more detail can be found in the git revision history:
     options string during configuration.  This was motivated by the desire to
     specify --with-malloc-conf=purge:decay , since the default must remain
     purge:ratio until the 5.0.0 release.  (@jasone)
+  - Add MS Visual Studio 2015 support.  (@rustyx, @yuslepukhin)
   - Make *allocx() size class overflow behavior defined.  The maximum
     size class is now less than PTRDIFF_MAX to protect applications against
     numerical overflow, and all allocation functions are guaranteed to indicate
     errors rather than potentially crashing if the request size exceeds the
     maximum size class.  (@jasone)
-  - Add MS Visual Studio 2015 support.  (@rustyx, @yuslepukhin)
   - jeprof:
     + Add raw heap profile support.  (@jasone)
     + Add --retain and --exclude for backtrace symbol filtering.  (@jasone)
@@ -57,22 +57,25 @@ brevity.  Much more detail can be found in the git revision history:
   - Make opt.narenas unsigned rather than size_t.  (@jasone)
 
   Bug fixes:
+  - Fix stats.cactive accounting regression.  (@rustyx, @jasone)
+  - Handle unaligned keys in hash().  This caused problems for some ARM systems.
+    (@jasone, Christopher Ferris)
   - Refactor arenas array.  In addition to fixing a fork-related deadlock, this
     makes arena lookups faster and simpler.  (@jasone)
-  - Handle unaligned keys in hash().  This caused problems for some ARM systems.
-    (@jasone)
-  - Fix run quantization.  In practice this bug had no impact unless
-    applications requested memory with alignment exceeding one page.  (@jasone)
   - Move retained memory allocation out of the default chunk allocation
     function, to a location that gets executed even if the application installs
     a custom chunk allocation function.  This resolves a virtual memory leak.
     (@buchgr)
-  - Resolve undefined unsigned-to-signed conversion that could cause corruption
-    of the stats.cactive statistic.  (@jasone)
+  - Fix a potential tsd cleanup leak.  (Christopher Ferris, @jasone)
+  - Fix run quantization.  In practice this bug had no impact unless
+    applications requested memory with alignment exceeding one page.
+    (@jasone, @djwatson)
   - Fix LinuxThreads-specific bootstrapping deadlock.  (Cosmin Paraschiv)
   - jeprof:
     + Don't discard curl options if timeout is not defined.  (@djwatson)
     + Detect failed profile fetches.  (@djwatson)
+  - Fix stats.arenas.<i>.{dss,lg_dirty_mult,decay_time,pactive,pdirty} for
+    --disable-stats case.  (@jasone)
 
 * 4.0.4 (October 24, 2015)
 

From e270a8f936d52766557a2ceca8b5e3ad315dc54d Mon Sep 17 00:00:00 2001
From: rustyx <me@rustyx.org>
Date: Sat, 27 Feb 2016 18:29:31 +0100
Subject: [PATCH 0149/2608] Make test_threads more generic

---
 .../vc2015/test_threads/test_threads.cpp      | 121 ++++++++----------
 1 file changed, 55 insertions(+), 66 deletions(-)

diff --git a/msvc/projects/vc2015/test_threads/test_threads.cpp b/msvc/projects/vc2015/test_threads/test_threads.cpp
index 603bdce7..c8cb7d66 100644
--- a/msvc/projects/vc2015/test_threads/test_threads.cpp
+++ b/msvc/projects/vc2015/test_threads/test_threads.cpp
@@ -10,88 +10,77 @@
 #include <vector>
 #include <stdio.h>
 #include <jemalloc/jemalloc.h>
-#include <windows.h>
 
 using std::vector;
 using std::thread;
 using std::uniform_int_distribution;
 using std::minstd_rand;
 
-#if NDEBUG && JEMALLOC_ISSUE_318_WORKAROUND
-extern "C" JEMALLOC_EXPORT void _malloc_thread_cleanup(void);
-
-static thread_local struct JeMallocThreadHelper {
-	~JeMallocThreadHelper() {
-		_malloc_thread_cleanup();
-	}
-} tls_jemallocThreadHelper;
-#endif
-
 int test_threads()
 {
-	je_malloc_conf = "narenas:3";
-	int narenas = 0;
-	size_t sz = sizeof(narenas);
-	je_mallctl("opt.narenas", &narenas, &sz, NULL, 0);
-	if (narenas != 3) {
-		printf("Error: unexpected number of arenas: %d\n", narenas);
-		return 1;
-	}
-	static const int sizes[] = { 7, 16, 32, 60, 91, 100, 120, 144, 169, 199, 255, 400, 670, 900, 917, 1025, 3333, 5190, 13131, 49192, 99999, 123123, 255265, 2333111 };
-	static const int numSizes = (int)(sizeof(sizes) / sizeof(sizes[0]));
-	vector<thread> workers;
-	static const int numThreads = narenas + 1, numAllocsMax = 25, numIter1 = 50, numIter2 = 50;
-	je_malloc_stats_print(NULL, NULL, NULL);
+  je_malloc_conf = "narenas:3";
+  int narenas = 0;
+  size_t sz = sizeof(narenas);
+  je_mallctl("opt.narenas", &narenas, &sz, NULL, 0);
+  if (narenas != 3) {
+    printf("Error: unexpected number of arenas: %d\n", narenas);
+    return 1;
+  }
+  static const int sizes[] = { 7, 16, 32, 60, 91, 100, 120, 144, 169, 199, 255, 400, 670, 900, 917, 1025, 3333, 5190, 13131, 49192, 99999, 123123, 255265, 2333111 };
+  static const int numSizes = (int)(sizeof(sizes) / sizeof(sizes[0]));
+  vector<thread> workers;
+  static const int numThreads = narenas + 1, numAllocsMax = 25, numIter1 = 50, numIter2 = 50;
+  je_malloc_stats_print(NULL, NULL, NULL);
   size_t allocated1;
   size_t sz1 = sizeof(allocated1);
   je_mallctl("stats.active", &allocated1, &sz1, NULL, 0);
   printf("\nPress Enter to start threads...\n");
-	getchar();
-	printf("Starting %d threads x %d x %d iterations...\n", numThreads, numIter1, numIter2);
-	for (int i = 0; i < numThreads; i++) {
-		workers.emplace_back([tid=i]() {
-			uniform_int_distribution<int> sizeDist(0, numSizes - 1);
-			minstd_rand rnd(tid * 17);
-			uint8_t* ptrs[numAllocsMax];
-			int ptrsz[numAllocsMax];
-			for (int i = 0; i < numIter1; ++i) {
-				thread t([&]() {
-					for (int i = 0; i < numIter2; ++i) {
-						const int numAllocs = numAllocsMax - sizeDist(rnd);
-						for (int j = 0; j < numAllocs; j += 64) {
-							const int x = sizeDist(rnd);
-							const int sz = sizes[x];
-							ptrsz[j] = sz;
-							ptrs[j] = (uint8_t*)je_malloc(sz);
-							if (!ptrs[j]) {
-								printf("Unable to allocate %d bytes in thread %d, iter %d, alloc %d. %d\n", sz, tid, i, j, x);
-								exit(1);
-							}
-							for (int k = 0; k < sz; k++)
-								ptrs[j][k] = tid + k;
-						}
-						for (int j = 0; j < numAllocs; j += 64) {
-							for (int k = 0, sz = ptrsz[j]; k < sz; k++)
-								if (ptrs[j][k] != (uint8_t)(tid + k)) {
-									printf("Memory error in thread %d, iter %d, alloc %d @ %d : %02X!=%02X\n", tid, i, j, k, ptrs[j][k], (uint8_t)(tid + k));
-									exit(1);
-								}
-							je_free(ptrs[j]);
-						}
-					}
-				});
-				t.join();
-			}
-		});
-	}
-	for (thread& t : workers) {
-		t.join();
-	}
+  getchar();
+  printf("Starting %d threads x %d x %d iterations...\n", numThreads, numIter1, numIter2);
+  for (int i = 0; i < numThreads; i++) {
+    workers.emplace_back([tid=i]() {
+      uniform_int_distribution<int> sizeDist(0, numSizes - 1);
+      minstd_rand rnd(tid * 17);
+      uint8_t* ptrs[numAllocsMax];
+      int ptrsz[numAllocsMax];
+      for (int i = 0; i < numIter1; ++i) {
+        thread t([&]() {
+          for (int i = 0; i < numIter2; ++i) {
+            const int numAllocs = numAllocsMax - sizeDist(rnd);
+            for (int j = 0; j < numAllocs; j += 64) {
+              const int x = sizeDist(rnd);
+              const int sz = sizes[x];
+              ptrsz[j] = sz;
+              ptrs[j] = (uint8_t*)je_malloc(sz);
+              if (!ptrs[j]) {
+                printf("Unable to allocate %d bytes in thread %d, iter %d, alloc %d. %d\n", sz, tid, i, j, x);
+                exit(1);
+              }
+              for (int k = 0; k < sz; k++)
+                ptrs[j][k] = tid + k;
+            }
+            for (int j = 0; j < numAllocs; j += 64) {
+              for (int k = 0, sz = ptrsz[j]; k < sz; k++)
+                if (ptrs[j][k] != (uint8_t)(tid + k)) {
+                  printf("Memory error in thread %d, iter %d, alloc %d @ %d : %02X!=%02X\n", tid, i, j, k, ptrs[j][k], (uint8_t)(tid + k));
+                  exit(1);
+                }
+              je_free(ptrs[j]);
+            }
+          }
+        });
+        t.join();
+      }
+    });
+  }
+  for (thread& t : workers) {
+    t.join();
+  }
   je_malloc_stats_print(NULL, NULL, NULL);
   size_t allocated2;
   je_mallctl("stats.active", &allocated2, &sz1, NULL, 0);
   size_t leaked = allocated2 - allocated1;
-  printf("\nDone. Leaked: %Id bytes\n", leaked);
+  printf("\nDone. Leaked: %zd bytes\n", leaked);
   bool failed = leaked > 65536; // in case C++ runtime allocated something (e.g. iostream locale or facet)
   printf("\nTest %s!\n", (failed ? "FAILED" : "successful"));
   printf("\nPress Enter to continue...\n");

From 3a342616ffc4992e19fdb57df6d6b85a952718be Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 28 Feb 2016 14:52:17 -0800
Subject: [PATCH 0150/2608] Update ChangeLog for 4.1.0.

---
 ChangeLog | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ChangeLog b/ChangeLog
index e35d74cc..9cbfbf96 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,7 +4,7 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
-* 4.1.0 (XXX)
+* 4.1.0 (February 28, 2016)
 
   This release is primarily about optimizations, but it also incorporates a lot
   of portability-motivated refactoring and enhancements.  Many people worked on

From 994da4232621dd1210fcf39bdf0d6454cefda473 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 28 Feb 2016 15:20:40 -0800
Subject: [PATCH 0151/2608] Update copyright dates for 2016.

---
 COPYING | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/COPYING b/COPYING
index 611968cd..104b1f8b 100644
--- a/COPYING
+++ b/COPYING
@@ -1,10 +1,10 @@
 Unless otherwise specified, files in the jemalloc source distribution are
 subject to the following license:
 --------------------------------------------------------------------------------
-Copyright (C) 2002-2015 Jason Evans <jasone@canonware.com>.
+Copyright (C) 2002-2016 Jason Evans <jasone@canonware.com>.
 All rights reserved.
 Copyright (C) 2007-2012 Mozilla Foundation.  All rights reserved.
-Copyright (C) 2009-2015 Facebook, Inc.  All rights reserved.
+Copyright (C) 2009-2016 Facebook, Inc.  All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:

From 86478b29989075cfe7dcf5f0c104bac3fa584a17 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Fri, 26 Feb 2016 17:53:13 -0800
Subject: [PATCH 0152/2608] Remove errno overrides.

---
 include/msvc_compat/windows_extra.h | 22 +---------------------
 1 file changed, 1 insertion(+), 21 deletions(-)

diff --git a/include/msvc_compat/windows_extra.h b/include/msvc_compat/windows_extra.h
index 0c5e323f..3008faa3 100644
--- a/include/msvc_compat/windows_extra.h
+++ b/include/msvc_compat/windows_extra.h
@@ -1,26 +1,6 @@
 #ifndef MSVC_COMPAT_WINDOWS_EXTRA_H
 #define	MSVC_COMPAT_WINDOWS_EXTRA_H
 
-#ifndef ENOENT
-#  define ENOENT ERROR_PATH_NOT_FOUND
-#endif
-#ifndef EINVAL
-#  define EINVAL ERROR_BAD_ARGUMENTS
-#endif
-#ifndef EAGAIN
-#  define EAGAIN ERROR_OUTOFMEMORY
-#endif
-#ifndef EPERM
-#  define EPERM  ERROR_WRITE_FAULT
-#endif
-#ifndef EFAULT
-#  define EFAULT ERROR_INVALID_ADDRESS
-#endif
-#ifndef ENOMEM
-#  define ENOMEM ERROR_NOT_ENOUGH_MEMORY
-#endif
-#ifndef ERANGE
-#  define ERANGE ERROR_INVALID_DATA
-#endif
+#include <errno.h>
 
 #endif /* MSVC_COMPAT_WINDOWS_EXTRA_H */

From 0e1d5c25c677064ed81e3ec0f88b52f835557171 Mon Sep 17 00:00:00 2001
From: rustyx <me@rustyx.org>
Date: Mon, 29 Feb 2016 21:04:29 +0100
Subject: [PATCH 0153/2608] Fix MSVC project and improve MSVC lib naming (v140
 -> vc140)

---
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj |  8 ++++++--
 .../vc2015/jemalloc/jemalloc.vcxproj.filters   | 18 +++++++++++++++---
 .../vc2015/test_threads/test_threads.vcxproj   |  4 ++--
 3 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index d8ad505b..f3f0260b 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -54,6 +54,7 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal_macros.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\mb.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\mutex.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\nstime.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\pages.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\private_namespace.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\private_unnamespace.h" />
@@ -69,6 +70,7 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\size_classes.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\stats.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\tcache.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\ticker.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\tsd.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\util.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\valgrind.h" />
@@ -103,11 +105,13 @@
     <ClCompile Include="..\..\..\..\src\mutex.c" />
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
+    <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\quarantine.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\stats.c" />
     <ClCompile Include="..\..\..\..\src\tcache.c" />
+    <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
     <ClCompile Include="..\..\..\..\src\util.c" />
   </ItemGroup>
@@ -227,7 +231,7 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
     <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
     <IntDir>$(Platform)\$(Configuration)\</IntDir>
-    <TargetName>$(ProjectName)-$(PlatformToolset)-$(Configuration)</TargetName>
+    <TargetName>$(ProjectName)-vc$(PlatformToolsetVersion)-$(Configuration)</TargetName>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
@@ -236,7 +240,7 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
     <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
     <IntDir>$(Platform)\$(Configuration)\</IntDir>
-    <TargetName>$(ProjectName)-$(PlatformToolset)-$(Configuration)</TargetName>
+    <TargetName>$(ProjectName)-vc$(PlatformToolsetVersion)-$(Configuration)</TargetName>
   </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <ClCompile>
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 89a51f76..ce70632b 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -101,6 +101,9 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\mutex.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\nstime.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\pages.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
@@ -146,6 +149,9 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\tcache.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\ticker.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\tsd.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
@@ -214,9 +220,15 @@
     <ClCompile Include="..\..\..\..\src\mutex.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\nstime.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prng.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\prof.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -232,14 +244,14 @@
     <ClCompile Include="..\..\..\..\src\tcache.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ticker.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\tsd.c">
       <Filter>Source Files</Filter>
     </ClCompile>
     <ClCompile Include="..\..\..\..\src\util.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\nstime.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
   </ItemGroup>
 </Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2015/test_threads/test_threads.vcxproj b/msvc/projects/vc2015/test_threads/test_threads.vcxproj
index b681e71e..f5e9898f 100644
--- a/msvc/projects/vc2015/test_threads/test_threads.vcxproj
+++ b/msvc/projects/vc2015/test_threads/test_threads.vcxproj
@@ -223,7 +223,7 @@
     <Link>
       <SubSystem>Console</SubSystem>
       <GenerateDebugInformation>true</GenerateDebugInformation>
-      <AdditionalDependencies>jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>jemalloc-vc$(PlatformToolsetVersion)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
     </Link>
   </ItemDefinitionGroup>
@@ -306,7 +306,7 @@
       <EnableCOMDATFolding>true</EnableCOMDATFolding>
       <OptimizeReferences>true</OptimizeReferences>
       <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
-      <AdditionalDependencies>jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>jemalloc-vc$(PlatformToolsetVersion)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
     </Link>
   </ItemDefinitionGroup>
   <ItemGroup>

From 33184bf69813087bf1885b0993685f9d03320c69 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Mon, 29 Feb 2016 14:30:19 -0800
Subject: [PATCH 0154/2608] Fix stack corruption and uninitialized var warning

Stack corruption happens in x64 bit

This resolves #347.
---
 src/arena.c      |  2 +-
 test/unit/hash.c | 13 +++++++------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 99e20fde..965c0fe2 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2423,7 +2423,7 @@ arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
 	uintptr_t random_offset;
 	arena_run_t *run;
 	arena_chunk_map_misc_t *miscelm;
-	UNUSED bool idump;
+	UNUSED bool idump JEMALLOC_CC_SILENCE_INIT(false);
 
 	/* Large allocation. */
 	usize = index2size(binind);
diff --git a/test/unit/hash.c b/test/unit/hash.c
index f50ba81b..010c9d76 100644
--- a/test/unit/hash.c
+++ b/test/unit/hash.c
@@ -64,14 +64,15 @@ static void
 hash_variant_verify_key(hash_variant_t variant, uint8_t *key)
 {
 	const int hashbytes = hash_variant_bits(variant) / 8;
-	VARIABLE_ARRAY(uint8_t, hashes, hashbytes * 256);
+	const int hashes_size = hashbytes * 256;
+	VARIABLE_ARRAY(uint8_t, hashes, hashes_size);
 	VARIABLE_ARRAY(uint8_t, final, hashbytes);
 	unsigned i;
 	uint32_t computed, expected;
 
 	memset(key, 0, KEY_SIZE);
-	memset(hashes, 0, sizeof(hashes));
-	memset(final, 0, sizeof(final));
+	memset(hashes, 0, hashes_size);
+	memset(final, 0, hashbytes);
 
 	/*
 	 * Hash keys of the form {0}, {0,1}, {0,1,2}, ..., {0,1,...,255} as the
@@ -102,17 +103,17 @@ hash_variant_verify_key(hash_variant_t variant, uint8_t *key)
 	/* Hash the result array. */
 	switch (variant) {
 	case hash_variant_x86_32: {
-		uint32_t out = hash_x86_32(hashes, hashbytes*256, 0);
+		uint32_t out = hash_x86_32(hashes, hashes_size, 0);
 		memcpy(final, &out, sizeof(out));
 		break;
 	} case hash_variant_x86_128: {
 		uint64_t out[2];
-		hash_x86_128(hashes, hashbytes*256, 0, out);
+		hash_x86_128(hashes, hashes_size, 0, out);
 		memcpy(final, out, sizeof(out));
 		break;
 	} case hash_variant_x64_128: {
 		uint64_t out[2];
-		hash_x64_128(hashes, hashbytes*256, 0, out);
+		hash_x64_128(hashes, hashes_size, 0, out);
 		memcpy(final, out, sizeof(out));
 		break;
 	} default: not_reached();

From 022f6891faf1fffa435f2bc613c25e8482a32702 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 2 Mar 2016 22:41:32 -0800
Subject: [PATCH 0155/2608] Avoid a potential innocuous compiler warning.

Add a cast to avoid comparing a ssize_t value to a uint64_t value that
is always larger than a 32-bit ssize_t.  This silences an innocuous
compiler warning from e.g. gcc 4.2.1 about the comparison always having
the same result.
---
 include/jemalloc/internal/nstime.h | 2 +-
 src/arena.c                        | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h
index bd04f04b..dcb4b47f 100644
--- a/include/jemalloc/internal/nstime.h
+++ b/include/jemalloc/internal/nstime.h
@@ -7,7 +7,7 @@
 typedef struct nstime_s nstime_t;
 
 /* Maximum supported number of seconds (~584 years). */
-#define	NSTIME_SEC_MAX	18446744072
+#define	NSTIME_SEC_MAX	KQU(18446744072)
 
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
diff --git a/src/arena.c b/src/arena.c
index 965c0fe2..f436959e 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1352,7 +1352,11 @@ static bool
 arena_decay_time_valid(ssize_t decay_time)
 {
 
-	return (decay_time >= -1 && decay_time <= NSTIME_SEC_MAX);
+	if (decay_time < -1)
+		return (false);
+	if (decay_time == -1 || (uint64_t)decay_time <= NSTIME_SEC_MAX)
+		return (true);
+	return (false);
 }
 
 ssize_t

From e3998c681dec35fe0de25f693a39de6fb881134e Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Mon, 7 Mar 2016 17:55:55 -0800
Subject: [PATCH 0156/2608] Replace contributor name with github account.

---
 ChangeLog | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 9cbfbf96..69f4dbb0 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -59,14 +59,14 @@ brevity.  Much more detail can be found in the git revision history:
   Bug fixes:
   - Fix stats.cactive accounting regression.  (@rustyx, @jasone)
   - Handle unaligned keys in hash().  This caused problems for some ARM systems.
-    (@jasone, Christopher Ferris)
+    (@jasone, @cferris1000)
   - Refactor arenas array.  In addition to fixing a fork-related deadlock, this
     makes arena lookups faster and simpler.  (@jasone)
   - Move retained memory allocation out of the default chunk allocation
     function, to a location that gets executed even if the application installs
     a custom chunk allocation function.  This resolves a virtual memory leak.
     (@buchgr)
-  - Fix a potential tsd cleanup leak.  (Christopher Ferris, @jasone)
+  - Fix a potential tsd cleanup leak.  (@cferris1000, @jasone)
   - Fix run quantization.  In practice this bug had no impact unless
     applications requested memory with alignment exceeding one page.
     (@jasone, @djwatson)

From 6bafa6678fc36483e638f1c3a0a9bf79fb89bfc9 Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Mon, 29 Feb 2016 11:22:52 -0800
Subject: [PATCH 0157/2608] Pairing heap

Initial implementation of a twopass pairing heap with aux list.
Research papers linked in comments.

Where search/nsearch/last aren't needed, this gives much faster first(),
delete(), and insert().  Insert is O(1), and first/delete don't have to
walk the whole tree.

Also tested rb_old with parent pointers - it was better than the current
rb.h for memory loads, but still much worse than a pairing heap.

An array-based heap would be much faster if everything fits in memory,
but on a cold cache it has many more memory loads for most operations.
---
 Makefile.in                                   |   1 +
 .../jemalloc/internal/jemalloc_internal.h.in  |   4 +
 include/jemalloc/internal/ph.h                | 255 ++++++++++++++++++
 include/jemalloc/internal/private_symbols.txt |   8 +
 src/ph.c                                      |   2 +
 5 files changed, 270 insertions(+)
 create mode 100644 include/jemalloc/internal/ph.h
 create mode 100644 src/ph.c

diff --git a/Makefile.in b/Makefile.in
index f60823f5..3d725be1 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -95,6 +95,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/mutex.c \
 	$(srcroot)src/nstime.c \
 	$(srcroot)src/pages.c \
+	$(srcroot)src/ph.c \
 	$(srcroot)src/prng.c \
 	$(srcroot)src/prof.c \
 	$(srcroot)src/quarantine.c \
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 3f54391f..d3b94c00 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -371,6 +371,7 @@ typedef unsigned szind_t;
 #include "jemalloc/internal/tsd.h"
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/extent.h"
+#include "jemalloc/internal/ph.h"
 #include "jemalloc/internal/arena.h"
 #include "jemalloc/internal/bitmap.h"
 #include "jemalloc/internal/base.h"
@@ -401,6 +402,7 @@ typedef unsigned szind_t;
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/bitmap.h"
+#include "jemalloc/internal/ph.h"
 #define	JEMALLOC_ARENA_STRUCTS_A
 #include "jemalloc/internal/arena.h"
 #undef JEMALLOC_ARENA_STRUCTS_A
@@ -494,6 +496,7 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/bitmap.h"
 #include "jemalloc/internal/extent.h"
+#include "jemalloc/internal/ph.h"
 #include "jemalloc/internal/arena.h"
 #include "jemalloc/internal/base.h"
 #include "jemalloc/internal/rtree.h"
@@ -525,6 +528,7 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/tsd.h"
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/extent.h"
+#include "jemalloc/internal/ph.h"
 #include "jemalloc/internal/base.h"
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/pages.h"
diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
new file mode 100644
index 00000000..aeca693e
--- /dev/null
+++ b/include/jemalloc/internal/ph.h
@@ -0,0 +1,255 @@
+/*
+ * A Pairing Heap implementation.
+ *
+ * "The Pairing Heap: A New Form of Self-Adjusting Heap"
+ * https://www.cs.cmu.edu/~sleator/papers/pairing-heaps.pdf
+ *
+ * With auxiliary list, described in a follow on paper
+ *
+ * "Pairing Heaps: Experiments and Analysis"
+ * http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.106.2988&rep=rep1&type=pdf
+ *
+ * Where search/nsearch/last are not needed, ph.h outperforms rb.h by ~7x fewer
+ * cpu cycles, and ~4x fewer memory references.
+ *
+ * Tagging parent/prev pointers on the next list was also described in the
+ * original paper, such that only two pointers are needed.  This is not
+ * implemented here, as it substantially increases the memory references
+ * needed when ph_remove is called, almost overshadowing the other performance
+ * gains.
+ *
+ *******************************************************************************
+ */
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct ph_node_s ph_node_t;
+typedef struct ph_heap_s ph_heap_t;
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+struct ph_node_s {
+	ph_node_t	*subheaps;
+	ph_node_t	*parent;
+	ph_node_t	*next;
+	ph_node_t	*prev;
+};
+
+struct ph_heap_s {
+	ph_node_t	*root;
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+ph_node_t	*ph_merge(ph_node_t *heap1, ph_node_t *heap2);
+ph_node_t	*ph_merge_pairs(ph_node_t *subheaps);
+void	ph_merge_aux_list(ph_heap_t *l);
+void	ph_new(ph_heap_t *n);
+ph_node_t	*ph_first(ph_heap_t *l);
+void	ph_insert(ph_heap_t *l, ph_node_t *n);
+ph_node_t	*ph_remove_first(ph_heap_t *l);
+void	ph_remove(ph_heap_t *l, ph_node_t *n);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PH_C_))
+
+/* Helper routines ************************************************************/
+
+JEMALLOC_INLINE ph_node_t *
+ph_merge(ph_node_t *heap1, ph_node_t *heap2)
+{
+
+	if (heap1 == NULL)
+		return (heap2);
+	if (heap2 == NULL)
+		return (heap1);
+	/* Optional: user-settable comparison function */
+	if ((uintptr_t)heap1 < (uintptr_t)heap2) {
+		heap2->parent = heap1;
+		heap2->prev = NULL;
+		heap2->next = heap1->subheaps;
+		if (heap1->subheaps != NULL)
+			heap1->subheaps->prev = heap2;
+		heap1->subheaps = heap2;
+		return (heap1);
+	} else {
+		heap1->parent = heap2;
+		heap1->prev = NULL;
+		heap1->next = heap2->subheaps;
+		if (heap2->subheaps != NULL)
+			heap2->subheaps->prev = heap1;
+		heap2->subheaps = heap1;
+		return (heap2);
+	}
+}
+
+JEMALLOC_INLINE ph_node_t *
+ph_merge_pairs(ph_node_t *subheaps)
+{
+
+	if (subheaps == NULL)
+		return (NULL);
+	if (subheaps->next == NULL)
+		return (subheaps);
+	{
+		ph_node_t *l0 = subheaps;
+		ph_node_t *l1 = l0->next;
+		ph_node_t *lrest = l1->next;
+
+		if (lrest != NULL)
+			lrest->prev = NULL;
+		l1->next = NULL;
+		l1->prev = NULL;
+		l0->next = NULL;
+		l0->prev = NULL;
+		return (ph_merge(ph_merge(l0, l1), ph_merge_pairs(lrest)));
+	}
+}
+
+/*
+ * Merge the aux list into the root node.
+ */
+JEMALLOC_INLINE void
+ph_merge_aux_list(ph_heap_t *l)
+{
+
+	if (l->root == NULL)
+		return;
+	if (l->root->next != NULL) {
+		ph_node_t *l0 = l->root->next;
+		ph_node_t *l1 = l0->next;
+		ph_node_t *lrest = NULL;
+
+		/* Multipass merge. */
+		while (l1 != NULL) {
+			lrest = l1->next;
+			if (lrest != NULL)
+				lrest->prev = NULL;
+			l1->next = NULL;
+			l1->prev = NULL;
+			l0->next = NULL;
+			l0->prev = NULL;
+			l0 = ph_merge(l0, l1);
+			l1 = lrest;
+		}
+		l->root->next = NULL;
+		l->root = ph_merge(l->root, l0);
+	}
+}
+
+/* User API *******************************************************************/
+
+JEMALLOC_INLINE void
+ph_new(ph_heap_t *n)
+{
+
+	memset(n, 0, sizeof(ph_heap_t));
+}
+
+JEMALLOC_INLINE ph_node_t *
+ph_first(ph_heap_t *l)
+{
+
+	/*
+	 * For the cost of an extra pointer, a l->min could be stored instead of
+	 * merging the aux list here.  Current users always call ph_remove(l,
+	 * ph_first(l)) though, and the aux list must always be merged for
+	 * delete of the min node anyway.
+	 */
+	ph_merge_aux_list(l);
+	return (l->root);
+}
+
+JEMALLOC_INLINE void
+ph_insert(ph_heap_t *l, ph_node_t *n)
+{
+
+	memset(n, 0, sizeof(ph_node_t));
+
+	/*
+	 * Non-aux list insert:
+	 *
+	 * l->root = ph_merge(l->root, n);
+	 *
+	 * Aux list insert:
+	 */
+	if (l->root == NULL)
+		l->root = n;
+	else {
+		n->next = l->root->next;
+		if (l->root->next != NULL)
+			l->root->next->prev = n;
+		n->prev = l->root;
+		l->root->next = n;
+	}
+}
+
+JEMALLOC_INLINE ph_node_t *
+ph_remove_first(ph_heap_t *l)
+{
+	ph_node_t *ret;
+
+	ph_merge_aux_list(l);
+	if (l->root == NULL)
+		return (NULL);
+
+	ret = l->root;
+
+	l->root = ph_merge_pairs(l->root->subheaps);
+
+	return (ret);
+}
+
+JEMALLOC_INLINE void
+ph_remove(ph_heap_t *l, ph_node_t *n)
+{
+	ph_node_t *replace;
+
+	/*
+	 * We can delete from aux list without merging it, but we need to merge
+	 * if we are dealing with the root node.
+	 */
+	if (l->root == n) {
+		ph_merge_aux_list(l);
+		if (l->root == n) {
+			ph_remove_first(l);
+			return;
+		}
+	}
+
+	/* Find a possible replacement node, and link to parent. */
+	replace = ph_merge_pairs(n->subheaps);
+	if (n->parent != NULL && n->parent->subheaps == n) {
+		if (replace != NULL)
+			n->parent->subheaps = replace;
+		else
+			n->parent->subheaps = n->next;
+	}
+	/* Set next/prev for sibling linked list. */
+	if (replace != NULL) {
+		replace->parent = n->parent;
+		replace->prev = n->prev;
+		if (n->prev != NULL)
+			n->prev->next = replace;
+		replace->next = n->next;
+		if (n->next != NULL)
+			n->next->prev = replace;
+	} else {
+		if (n->prev != NULL)
+			n->prev->next = n->next;
+		if (n->next != NULL)
+			n->next->prev = n->prev;
+	}
+}
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 5880996a..2de1d5f3 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -379,6 +379,14 @@ pages_map
 pages_purge
 pages_trim
 pages_unmap
+ph_first
+ph_insert
+ph_merge
+ph_merge_aux_list
+ph_merge_pairs
+ph_new
+ph_remove_first
+ph_remove
 pow2_ceil_u32
 pow2_ceil_u64
 pow2_ceil_zu
diff --git a/src/ph.c b/src/ph.c
new file mode 100644
index 00000000..051a20d7
--- /dev/null
+++ b/src/ph.c
@@ -0,0 +1,2 @@
+#define	JEMALLOC_PH_C_
+#include "jemalloc/internal/jemalloc_internal.h"

From 34dca5671fec8c592f1ca80ce11dc808cf6b83ed Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Mon, 29 Feb 2016 11:30:34 -0800
Subject: [PATCH 0158/2608] Unittest for pairing heap

---
 Makefile.in    |  1 +
 test/unit/ph.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 93 insertions(+)
 create mode 100644 test/unit/ph.c

diff --git a/Makefile.in b/Makefile.in
index 3d725be1..7f2d668a 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -148,6 +148,7 @@ TESTS_UNIT := $(srcroot)test/unit/atomic.c \
 	$(srcroot)test/unit/math.c \
 	$(srcroot)test/unit/mq.c \
 	$(srcroot)test/unit/mtx.c \
+	$(srcroot)test/unit/ph.c \
 	$(srcroot)test/unit/prng.c \
 	$(srcroot)test/unit/prof_accum.c \
 	$(srcroot)test/unit/prof_active.c \
diff --git a/test/unit/ph.c b/test/unit/ph.c
new file mode 100644
index 00000000..b0e44028
--- /dev/null
+++ b/test/unit/ph.c
@@ -0,0 +1,92 @@
+#include "test/jemalloc_test.h"
+
+typedef struct node_s node_t;
+
+struct node_s {
+	ph_node_t link;
+};
+
+TEST_BEGIN(test_ph_empty)
+{
+	ph_heap_t heap;
+
+	ph_new(&heap);
+
+	assert_ptr_null(ph_first(&heap), "Unexpected node");
+}
+TEST_END
+
+TEST_BEGIN(test_ph_random)
+{
+#define	NNODES 25
+#define	NBAGS 250
+#define	SEED 42
+	sfmt_t *sfmt;
+	uint64_t bag[NNODES];
+	ph_heap_t heap;
+	node_t nodes[NNODES];
+	unsigned i, j, k;
+
+	sfmt = init_gen_rand(SEED);
+	for (i = 0; i < NBAGS; i++) {
+		switch (i) {
+		case 0:
+			/* Insert in order. */
+			for (j = 0; j < NNODES; j++)
+				bag[j] = j;
+			break;
+		case 1:
+			/* Insert in reverse order. */
+			for (j = 0; j < NNODES; j++)
+				bag[j] = NNODES - j - 1;
+			break;
+		default:
+			for (j = 0; j < NNODES; j++)
+				bag[j] = gen_rand64_range(sfmt, NNODES);
+		}
+
+		for (j = 1; j <= NNODES; j++) {
+			/* Initialize heap and nodes. */
+			ph_new(&heap);
+
+			/* Insert nodes. */
+			for (k = 0; k < j; k++) {
+				ph_insert(&heap, &nodes[k].link);
+
+				assert_ptr_not_null(ph_first(&heap),
+				    "Heap should not be empty");
+			}
+
+			/* Remove nodes. */
+			switch (i % 2) {
+			case 0:
+				for (k = 0; k < j; k++)
+					ph_remove(&heap, &nodes[k].link);
+				break;
+			case 1:
+				for (k = j; k > 0; k--)
+					ph_remove(&heap, &nodes[k-1].link);
+				break;
+			default:
+				not_reached();
+			}
+
+			assert_ptr_null(ph_first(&heap),
+			    "Heap should not be empty");
+		}
+	}
+	fini_gen_rand(sfmt);
+#undef NNODES
+#undef NBAGS
+#undef SEED
+}
+TEST_END
+
+int
+main(void)
+{
+
+	return (test(
+	    test_ph_empty,
+	    test_ph_random));
+}

From f8d80d62a8765c54aaa9433148fd112f7c794734 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Tue, 8 Mar 2016 13:43:47 -0800
Subject: [PATCH 0159/2608] Refactor ph_merge_ordered() out of ph_merge().

---
 include/jemalloc/internal/ph.h                | 39 +++++++++++--------
 include/jemalloc/internal/private_symbols.txt |  1 +
 2 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index aeca693e..519f0dda 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -49,6 +49,7 @@ struct ph_heap_s {
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
+ph_node_t	*ph_merge_ordered(ph_node_t *heap1, ph_node_t *heap2);
 ph_node_t	*ph_merge(ph_node_t *heap1, ph_node_t *heap2);
 ph_node_t	*ph_merge_pairs(ph_node_t *subheaps);
 void	ph_merge_aux_list(ph_heap_t *l);
@@ -63,6 +64,23 @@ void	ph_remove(ph_heap_t *l, ph_node_t *n);
 
 /* Helper routines ************************************************************/
 
+JEMALLOC_INLINE ph_node_t *
+ph_merge_ordered(ph_node_t *heap1, ph_node_t *heap2)
+{
+
+	assert(heap1 != NULL);
+	assert(heap2 != NULL);
+	assert ((uintptr_t)heap1 <= (uintptr_t)heap2);
+
+	heap2->parent = heap1;
+	heap2->prev = NULL;
+	heap2->next = heap1->subheaps;
+	if (heap1->subheaps != NULL)
+		heap1->subheaps->prev = heap2;
+	heap1->subheaps = heap2;
+	return (heap1);
+}
+
 JEMALLOC_INLINE ph_node_t *
 ph_merge(ph_node_t *heap1, ph_node_t *heap2)
 {
@@ -72,23 +90,10 @@ ph_merge(ph_node_t *heap1, ph_node_t *heap2)
 	if (heap2 == NULL)
 		return (heap1);
 	/* Optional: user-settable comparison function */
-	if ((uintptr_t)heap1 < (uintptr_t)heap2) {
-		heap2->parent = heap1;
-		heap2->prev = NULL;
-		heap2->next = heap1->subheaps;
-		if (heap1->subheaps != NULL)
-			heap1->subheaps->prev = heap2;
-		heap1->subheaps = heap2;
-		return (heap1);
-	} else {
-		heap1->parent = heap2;
-		heap1->prev = NULL;
-		heap1->next = heap2->subheaps;
-		if (heap2->subheaps != NULL)
-			heap2->subheaps->prev = heap1;
-		heap2->subheaps = heap1;
-		return (heap2);
-	}
+	if ((uintptr_t)heap1 < (uintptr_t)heap2)
+		return (ph_merge_ordered(heap1, heap2));
+	else
+		return (ph_merge_ordered(heap2, heap1));
 }
 
 JEMALLOC_INLINE ph_node_t *
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 2de1d5f3..aeb43b1d 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -383,6 +383,7 @@ ph_first
 ph_insert
 ph_merge
 ph_merge_aux_list
+ph_merge_ordered
 ph_merge_pairs
 ph_new
 ph_remove_first

From 4a0dbb5ac844830ebd7f89af20203a574ce1b3da Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Mon, 29 Feb 2016 11:54:42 -0800
Subject: [PATCH 0160/2608] Use pairing heap for arena->runs_avail

Use pairing heap instead of red black tree in arena runs_avail.  The
extra links are unioned with the bitmap_t, so this change doesn't use
any extra memory.

Canaries show this change to be a 1% cpu win, and 2% latency win.  In
particular, large free()s, and small bin frees are now O(1) (barring
coalescing).

I also tested changing bin->runs to be a pairing heap, but saw a much
smaller win, and it would mean increasing the size of arena_run_s by two
pointers, so I left that as an rb-tree for now.
---
 include/jemalloc/internal/arena.h             | 29 +++++++++++++++----
 include/jemalloc/internal/private_symbols.txt |  1 +
 src/arena.c                                   | 28 +++++++++---------
 3 files changed, 40 insertions(+), 18 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 3519873c..babd5129 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -36,6 +36,7 @@ typedef enum {
 #define	DECAY_NTICKS_PER_UPDATE	1000
 
 typedef struct arena_runs_dirty_link_s arena_runs_dirty_link_t;
+typedef struct arena_avail_links_s arena_avail_links_t;
 typedef struct arena_run_s arena_run_t;
 typedef struct arena_chunk_map_bits_s arena_chunk_map_bits_t;
 typedef struct arena_chunk_map_misc_s arena_chunk_map_misc_t;
@@ -146,6 +147,11 @@ struct arena_runs_dirty_link_s {
 	qr(arena_runs_dirty_link_t)	rd_link;
 };
 
+struct arena_avail_links_s {
+	arena_runs_dirty_link_t		rd;
+	ph_node_t			ph_link;
+};
+
 /*
  * Each arena_chunk_map_misc_t corresponds to one page within the chunk, just
  * like arena_chunk_map_bits_t.  Two separate arrays are stored within each
@@ -163,7 +169,7 @@ struct arena_chunk_map_misc_s {
 
 	union {
 		/* Linkage for list of dirty runs. */
-		arena_runs_dirty_link_t		rd;
+		arena_avail_links_t		avail;
 
 		/* Profile counters, used for large object runs. */
 		union {
@@ -457,10 +463,10 @@ struct arena_s {
 	arena_bin_t		bins[NBINS];
 
 	/*
-	 * Quantized address-ordered trees of this arena's available runs.  The
-	 * trees are used for first-best-fit run allocation.
+	 * Quantized address-ordered heaps of this arena's available runs.  The
+	 * heaps are used for first-best-fit run allocation.
 	 */
-	arena_run_tree_t	runs_avail[1]; /* Dynamically sized. */
+	ph_heap_t		runs_avail[1]; /* Dynamically sized. */
 };
 
 /* Used in conjunction with tsd for fast arena-related context lookup. */
@@ -600,6 +606,7 @@ arena_chunk_map_misc_t	*arena_miscelm_get(arena_chunk_t *chunk,
 size_t	arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm);
 void	*arena_miscelm_to_rpages(arena_chunk_map_misc_t *miscelm);
 arena_chunk_map_misc_t	*arena_rd_to_miscelm(arena_runs_dirty_link_t *rd);
+arena_chunk_map_misc_t	*arena_ph_to_miscelm(ph_node_t *ph);
 arena_chunk_map_misc_t	*arena_run_to_miscelm(arena_run_t *run);
 size_t	*arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind);
 size_t	arena_mapbitsp_read(size_t *mapbitsp);
@@ -702,7 +709,19 @@ JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t *
 arena_rd_to_miscelm(arena_runs_dirty_link_t *rd)
 {
 	arena_chunk_map_misc_t *miscelm = (arena_chunk_map_misc_t
-	    *)((uintptr_t)rd - offsetof(arena_chunk_map_misc_t, rd));
+	    *)((uintptr_t)rd - offsetof(arena_chunk_map_misc_t, avail));
+
+	assert(arena_miscelm_to_pageind(miscelm) >= map_bias);
+	assert(arena_miscelm_to_pageind(miscelm) < chunk_npages);
+
+	return (miscelm);
+}
+
+JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t *
+arena_ph_to_miscelm(ph_node_t *ph)
+{
+	arena_chunk_map_misc_t *miscelm = (arena_chunk_map_misc_t *)
+	    ((uintptr_t)ph - offsetof(arena_chunk_map_misc_t, avail.ph_link));
 
 	assert(arena_miscelm_to_pageind(miscelm) >= map_bias);
 	assert(arena_miscelm_to_pageind(miscelm) < chunk_npages);
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index aeb43b1d..aed60cb1 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -79,6 +79,7 @@ arena_nthreads_dec
 arena_nthreads_get
 arena_nthreads_inc
 arena_palloc
+arena_ph_to_miscelm
 arena_postfork_child
 arena_postfork_parent
 arena_prefork
diff --git a/src/arena.c b/src/arena.c
index f436959e..fc9852df 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -199,7 +199,7 @@ run_quantize_ceil(size_t size)
 run_quantize_t *run_quantize_ceil = JEMALLOC_N(run_quantize_ceil_impl);
 #endif
 
-static arena_run_tree_t *
+static ph_heap_t *
 arena_runs_avail_get(arena_t *arena, szind_t ind)
 {
 
@@ -217,8 +217,8 @@ arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
 	    arena_miscelm_get(chunk, pageind))));
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
-	arena_run_tree_insert(arena_runs_avail_get(arena, ind),
-	    arena_miscelm_get(chunk, pageind));
+	ph_insert(arena_runs_avail_get(arena, ind),
+	    &arena_miscelm_get(chunk, pageind)->avail.ph_link);
 }
 
 static void
@@ -229,8 +229,8 @@ arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
 	    arena_miscelm_get(chunk, pageind))));
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
-	arena_run_tree_remove(arena_runs_avail_get(arena, ind),
-	    arena_miscelm_get(chunk, pageind));
+	ph_remove(arena_runs_avail_get(arena, ind),
+	    &arena_miscelm_get(chunk, pageind)->avail.ph_link);
 }
 
 static void
@@ -245,8 +245,8 @@ arena_run_dirty_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
 	assert(arena_mapbits_dirty_get(chunk, pageind+npages-1) ==
 	    CHUNK_MAP_DIRTY);
 
-	qr_new(&miscelm->rd, rd_link);
-	qr_meld(&arena->runs_dirty, &miscelm->rd, rd_link);
+	qr_new(&miscelm->avail.rd, rd_link);
+	qr_meld(&arena->runs_dirty, &miscelm->avail.rd, rd_link);
 	arena->ndirty += npages;
 }
 
@@ -262,7 +262,7 @@ arena_run_dirty_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
 	assert(arena_mapbits_dirty_get(chunk, pageind+npages-1) ==
 	    CHUNK_MAP_DIRTY);
 
-	qr_remove(&miscelm->rd, rd_link);
+	qr_remove(&miscelm->avail.rd, rd_link);
 	assert(arena->ndirty >= npages);
 	arena->ndirty -= npages;
 }
@@ -1079,10 +1079,12 @@ arena_run_first_best_fit(arena_t *arena, size_t size)
 
 	ind = size2index(run_quantize_ceil(size));
 	for (i = ind; i < runs_avail_nclasses + runs_avail_bias; i++) {
-		arena_chunk_map_misc_t *miscelm = arena_run_tree_first(
-		    arena_runs_avail_get(arena, i));
-		if (miscelm != NULL)
+		ph_node_t *node = ph_first(arena_runs_avail_get(arena, i));
+		if (node != NULL) {
+			arena_chunk_map_misc_t *miscelm =
+			    arena_ph_to_miscelm(node);
 			return (&miscelm->run);
+		}
 	}
 
 	return (NULL);
@@ -3323,7 +3325,7 @@ arena_new(unsigned ind)
 	arena_bin_t *bin;
 
 	/* Compute arena size to incorporate sufficient runs_avail elements. */
-	arena_size = offsetof(arena_t, runs_avail) + (sizeof(arena_run_tree_t) *
+	arena_size = offsetof(arena_t, runs_avail) + (sizeof(ph_heap_t) *
 	    runs_avail_nclasses);
 	/*
 	 * Allocate arena, arena->lstats, and arena->hstats contiguously, mainly
@@ -3383,7 +3385,7 @@ arena_new(unsigned ind)
 	arena->ndirty = 0;
 
 	for(i = 0; i < runs_avail_nclasses; i++)
-		arena_run_tree_new(&arena->runs_avail[i]);
+		ph_new(&arena->runs_avail[i]);
 	qr_new(&arena->runs_dirty, rd_link);
 	qr_new(&arena->chunks_cache, cc_link);
 

From 613cdc80f6b61f698b3b0c3f2d22442044473f9b Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Tue, 8 Mar 2016 01:04:48 -0800
Subject: [PATCH 0161/2608] Convert arena_bin_t's runs from a tree to a heap.

---
 include/jemalloc/internal/arena.h | 22 +++++---------
 src/arena.c                       | 50 ++++++++++---------------------
 2 files changed, 23 insertions(+), 49 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index babd5129..c08a742f 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -147,11 +147,6 @@ struct arena_runs_dirty_link_s {
 	qr(arena_runs_dirty_link_t)	rd_link;
 };
 
-struct arena_avail_links_s {
-	arena_runs_dirty_link_t		rd;
-	ph_node_t			ph_link;
-};
-
 /*
  * Each arena_chunk_map_misc_t corresponds to one page within the chunk, just
  * like arena_chunk_map_bits_t.  Two separate arrays are stored within each
@@ -159,17 +154,17 @@ struct arena_avail_links_s {
  */
 struct arena_chunk_map_misc_s {
 	/*
-	 * Linkage for run trees.  There are two disjoint uses:
+	 * Linkage for run heaps.  There are two disjoint uses:
 	 *
-	 * 1) arena_t's runs_avail tree.
+	 * 1) arena_t's runs_avail heaps.
 	 * 2) arena_run_t conceptually uses this linkage for in-use non-full
 	 *    runs, rather than directly embedding linkage.
 	 */
-	rb_node(arena_chunk_map_misc_t)		rb_link;
+	ph_node_t				ph_link;
 
 	union {
 		/* Linkage for list of dirty runs. */
-		arena_avail_links_t		avail;
+		arena_runs_dirty_link_t		rd;
 
 		/* Profile counters, used for large object runs. */
 		union {
@@ -181,7 +176,6 @@ struct arena_chunk_map_misc_s {
 		arena_run_t			run;
 	};
 };
-typedef rb_tree(arena_chunk_map_misc_t) arena_run_tree_t;
 #endif /* JEMALLOC_ARENA_STRUCTS_A */
 
 #ifdef JEMALLOC_ARENA_STRUCTS_B
@@ -278,13 +272,13 @@ struct arena_bin_s {
 	arena_run_t		*runcur;
 
 	/*
-	 * Tree of non-full runs.  This tree is used when looking for an
+	 * Heap of non-full runs.  This heap is used when looking for an
 	 * existing run when runcur is no longer usable.  We choose the
 	 * non-full run that is lowest in memory; this policy tends to keep
 	 * objects packed well, and it can also help reduce the number of
 	 * almost-empty chunks.
 	 */
-	arena_run_tree_t	runs;
+	ph_heap_t		runs;
 
 	/* Bin statistics. */
 	malloc_bin_stats_t	stats;
@@ -709,7 +703,7 @@ JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t *
 arena_rd_to_miscelm(arena_runs_dirty_link_t *rd)
 {
 	arena_chunk_map_misc_t *miscelm = (arena_chunk_map_misc_t
-	    *)((uintptr_t)rd - offsetof(arena_chunk_map_misc_t, avail));
+	    *)((uintptr_t)rd - offsetof(arena_chunk_map_misc_t, rd));
 
 	assert(arena_miscelm_to_pageind(miscelm) >= map_bias);
 	assert(arena_miscelm_to_pageind(miscelm) < chunk_npages);
@@ -721,7 +715,7 @@ JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t *
 arena_ph_to_miscelm(ph_node_t *ph)
 {
 	arena_chunk_map_misc_t *miscelm = (arena_chunk_map_misc_t *)
-	    ((uintptr_t)ph - offsetof(arena_chunk_map_misc_t, avail.ph_link));
+	    ((uintptr_t)ph - offsetof(arena_chunk_map_misc_t, ph_link));
 
 	assert(arena_miscelm_to_pageind(miscelm) >= map_bias);
 	assert(arena_miscelm_to_pageind(miscelm) < chunk_npages);
diff --git a/src/arena.c b/src/arena.c
index fc9852df..0d232ff8 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -59,23 +59,6 @@ arena_miscelm_size_get(const arena_chunk_map_misc_t *miscelm)
 	return (arena_mapbits_size_decode(mapbits));
 }
 
-JEMALLOC_INLINE_C int
-arena_run_addr_comp(const arena_chunk_map_misc_t *a,
-    const arena_chunk_map_misc_t *b)
-{
-	uintptr_t a_miscelm = (uintptr_t)a;
-	uintptr_t b_miscelm = (uintptr_t)b;
-
-	assert(a != NULL);
-	assert(b != NULL);
-
-	return ((a_miscelm > b_miscelm) - (a_miscelm < b_miscelm));
-}
-
-/* Generate red-black tree functions. */
-rb_gen(static UNUSED, arena_run_tree_, arena_run_tree_t, arena_chunk_map_misc_t,
-    rb_link, arena_run_addr_comp)
-
 static size_t
 run_quantize_floor_compute(size_t size)
 {
@@ -218,7 +201,7 @@ arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
 	ph_insert(arena_runs_avail_get(arena, ind),
-	    &arena_miscelm_get(chunk, pageind)->avail.ph_link);
+	    &arena_miscelm_get(chunk, pageind)->ph_link);
 }
 
 static void
@@ -230,7 +213,7 @@ arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
 	ph_remove(arena_runs_avail_get(arena, ind),
-	    &arena_miscelm_get(chunk, pageind)->avail.ph_link);
+	    &arena_miscelm_get(chunk, pageind)->ph_link);
 }
 
 static void
@@ -245,8 +228,8 @@ arena_run_dirty_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
 	assert(arena_mapbits_dirty_get(chunk, pageind+npages-1) ==
 	    CHUNK_MAP_DIRTY);
 
-	qr_new(&miscelm->avail.rd, rd_link);
-	qr_meld(&arena->runs_dirty, &miscelm->avail.rd, rd_link);
+	qr_new(&miscelm->rd, rd_link);
+	qr_meld(&arena->runs_dirty, &miscelm->rd, rd_link);
 	arena->ndirty += npages;
 }
 
@@ -262,7 +245,7 @@ arena_run_dirty_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
 	assert(arena_mapbits_dirty_get(chunk, pageind+npages-1) ==
 	    CHUNK_MAP_DIRTY);
 
-	qr_remove(&miscelm->avail.rd, rd_link);
+	qr_remove(&miscelm->rd, rd_link);
 	assert(arena->ndirty >= npages);
 	arena->ndirty -= npages;
 }
@@ -2069,11 +2052,14 @@ arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 static arena_run_t *
 arena_bin_runs_first(arena_bin_t *bin)
 {
-	arena_chunk_map_misc_t *miscelm = arena_run_tree_first(&bin->runs);
-	if (miscelm != NULL)
-		return (&miscelm->run);
+	ph_node_t *node;
+	arena_chunk_map_misc_t *miscelm;
 
-	return (NULL);
+	node = ph_first(&bin->runs);
+	if (node == NULL)
+		return (NULL);
+	miscelm = arena_ph_to_miscelm(node);
+	return (&miscelm->run);
 }
 
 static void
@@ -2081,9 +2067,7 @@ arena_bin_runs_insert(arena_bin_t *bin, arena_run_t *run)
 {
 	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
 
-	assert(arena_run_tree_search(&bin->runs, miscelm) == NULL);
-
-	arena_run_tree_insert(&bin->runs, miscelm);
+	ph_insert(&bin->runs, &miscelm->ph_link);
 }
 
 static void
@@ -2091,9 +2075,7 @@ arena_bin_runs_remove(arena_bin_t *bin, arena_run_t *run)
 {
 	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
 
-	assert(arena_run_tree_search(&bin->runs, miscelm) != NULL);
-
-	arena_run_tree_remove(&bin->runs, miscelm);
+	ph_remove(&bin->runs, &miscelm->ph_link);
 }
 
 static arena_run_t *
@@ -2676,8 +2658,6 @@ arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 {
 
 	assert(run != bin->runcur);
-	assert(arena_run_tree_search(&bin->runs, arena_run_to_miscelm(run)) ==
-	    NULL);
 
 	malloc_mutex_unlock(&bin->lock);
 	/******************************/
@@ -3414,7 +3394,7 @@ arena_new(unsigned ind)
 		if (malloc_mutex_init(&bin->lock))
 			return (NULL);
 		bin->runcur = NULL;
-		arena_run_tree_new(&bin->runs);
+		ph_new(&bin->runs);
 		if (config_stats)
 			memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
 	}

From ca18f2834e17f31551f871cf4ca487aa9249614e Mon Sep 17 00:00:00 2001
From: Rajeev Misra <rk_misra@hotmail.com>
Date: Thu, 10 Mar 2016 22:49:05 -0800
Subject: [PATCH 0162/2608] typecast address to pointer to byte to avoid
 unaligned memory access error

---
 include/jemalloc/internal/hash.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h
index 864fda81..1ff2d9a0 100644
--- a/include/jemalloc/internal/hash.h
+++ b/include/jemalloc/internal/hash.h
@@ -53,7 +53,7 @@ hash_get_block_32(const uint32_t *p, int i)
 	if (unlikely((uintptr_t)p & (sizeof(uint32_t)-1)) != 0) {
 		uint32_t ret;
 
-		memcpy(&ret, &p[i], sizeof(uint32_t));
+		memcpy(&ret, (uint8_t *)(p + i), sizeof(uint32_t));
 		return (ret);
 	}
 
@@ -68,7 +68,7 @@ hash_get_block_64(const uint64_t *p, int i)
 	if (unlikely((uintptr_t)p & (sizeof(uint64_t)-1)) != 0) {
 		uint64_t ret;
 
-		memcpy(&ret, &p[i], sizeof(uint64_t));
+		memcpy(&ret, (uint8_t *)(p + i), sizeof(uint64_t));
 		return (ret);
 	}
 

From 824b947be08e87e0c317f585c250731897c2aa2c Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Fri, 11 Mar 2016 10:11:56 -0800
Subject: [PATCH 0163/2608] Add (size_t) casts to MALLOCX_ALIGN().

Add (size_t) casts to MALLOCX_ALIGN() macros so that passing the integer
constant 0x80000000 does not cause a compiler warning about invalid
shift amount.

This resolves #354.
---
 include/jemalloc/jemalloc_macros.h.in |  8 ++++----
 test/integration/mallocx.c            | 23 ++++++++++-------------
 2 files changed, 14 insertions(+), 17 deletions(-)

diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in
index 9f356f98..129240ed 100644
--- a/include/jemalloc/jemalloc_macros.h.in
+++ b/include/jemalloc/jemalloc_macros.h.in
@@ -13,11 +13,11 @@
 
 #  define MALLOCX_LG_ALIGN(la)	((int)(la))
 #  if LG_SIZEOF_PTR == 2
-#    define MALLOCX_ALIGN(a)	((int)(ffs(a)-1))
+#    define MALLOCX_ALIGN(a)	((int)(ffs((int)(a))-1))
 #  else
 #    define MALLOCX_ALIGN(a)						\
-       ((int)(((a) < (size_t)INT_MAX) ? ffs((int)(a))-1 :		\
-       ffs((int)((a)>>32))+31))
+       ((int)(((size_t)(a) < (size_t)INT_MAX) ? ffs((int)(a))-1 :	\
+       ffs((int)(((size_t)(a))>>32))+31))
 #  endif
 #  define MALLOCX_ZERO	((int)0x40)
 /*
@@ -29,7 +29,7 @@
 /*
  * Bias arena index bits so that 0 encodes "use an automatically chosen arena".
  */
-#  define MALLOCX_ARENA(a)	((int)(((a)+1) << 20))
+#  define MALLOCX_ARENA(a)	((((int)(a))+1) << 20)
 
 #if defined(__cplusplus) && defined(JEMALLOC_USE_CXX_THROW)
 #  define JEMALLOC_CXX_THROW throw()
diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index 42eee105..d82bf422 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -69,18 +69,14 @@ TEST_END
 
 TEST_BEGIN(test_oom)
 {
-	size_t hugemax, size, alignment;
-
-	hugemax = get_huge_size(get_nhuge()-1);
 
 	/*
 	 * It should be impossible to allocate two objects that each consume
 	 * more than half the virtual address space.
 	 */
 	{
-		void *p;
-
-		p = mallocx(hugemax, 0);
+		size_t hugemax = get_huge_size(get_nhuge()-1);
+		void *p = mallocx(hugemax, 0);
 		if (p != NULL) {
 			assert_ptr_null(mallocx(hugemax, 0),
 			    "Expected OOM for mallocx(size=%#zx, 0)", hugemax);
@@ -89,15 +85,16 @@ TEST_BEGIN(test_oom)
 	}
 
 #if LG_SIZEOF_PTR == 3
-	size      = ZU(0x8000000000000000);
-	alignment = ZU(0x8000000000000000);
+	assert_ptr_null(mallocx(0x8000000000000000ULL,
+	    MALLOCX_ALIGN(0x8000000000000000ULL)),
+	    "Expected OOM for mallocx()");
+	assert_ptr_null(mallocx(0x8000000000000000ULL,
+	    MALLOCX_ALIGN(0x80000000)),
+	    "Expected OOM for mallocx()");
 #else
-	size      = ZU(0x80000000);
-	alignment = ZU(0x80000000);
+	assert_ptr_null(mallocx(0x80000000UL, MALLOCX_ALIGN(0x80000000UL)),
+	    "Expected OOM for mallocx()");
 #endif
-	assert_ptr_null(mallocx(size, MALLOCX_ALIGN(alignment)),
-	    "Expected OOM for mallocx(size=%#zx, MALLOCX_ALIGN(%#zx)", size,
-	    alignment);
 }
 TEST_END
 

From 434ea64b267e5e9e16a66ab1cccf9fab34302ff5 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Mon, 14 Mar 2016 20:19:11 -0700
Subject: [PATCH 0164/2608] Add --with-version.

Also avoid deleting the VERSION file while trying to (re)generate it.

This resolves #305.
---
 INSTALL      |  4 ++++
 configure.ac | 49 +++++++++++++++++++++++++++++--------------------
 2 files changed, 33 insertions(+), 20 deletions(-)

diff --git a/INSTALL b/INSTALL
index 5c25054a..68787165 100644
--- a/INSTALL
+++ b/INSTALL
@@ -35,6 +35,10 @@ any of the following arguments (not a definitive list) to 'configure':
     will cause files to be installed into /usr/local/include, /usr/local/lib,
     and /usr/local/man.
 
+--with-version=<major>.<minor>.<bugfix>-<nrev>-g<gid>
+    Use the specified version string rather than trying to generate one (if in
+    a git repository) or use existing the VERSION file (if present).
+
 --with-rpath=<colon-separated-rpath>
     Embed one or more library paths, so that libjemalloc can find the libraries
     it is linked to.  This works only on ELF-based systems.
diff --git a/configure.ac b/configure.ac
index eb387ed9..3082916b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1172,27 +1172,36 @@ dnl ============================================================================
 dnl jemalloc configuration.
 dnl 
 
-dnl Set VERSION if source directory is inside a git repository.
-if test "x`test ! \"${srcroot}\" && cd \"${srcroot}\"; git rev-parse --is-inside-work-tree 2>/dev/null`" = "xtrue" ; then
-  dnl Pattern globs aren't powerful enough to match both single- and
-  dnl double-digit version numbers, so iterate over patterns to support up to
-  dnl version 99.99.99 without any accidental matches.
-  rm -f "${objroot}VERSION"
-  for pattern in ['[0-9].[0-9].[0-9]' '[0-9].[0-9].[0-9][0-9]' \
-                 '[0-9].[0-9][0-9].[0-9]' '[0-9].[0-9][0-9].[0-9][0-9]' \
-                 '[0-9][0-9].[0-9].[0-9]' '[0-9][0-9].[0-9].[0-9][0-9]' \
-                 '[0-9][0-9].[0-9][0-9].[0-9]' \
-                 '[0-9][0-9].[0-9][0-9].[0-9][0-9]']; do
-    if test ! -e "${objroot}VERSION" ; then
-      (test ! "${srcroot}" && cd "${srcroot}"; git describe --long --abbrev=40 --match="${pattern}") > "${objroot}VERSION.tmp" 2>/dev/null
-      if test $? -eq 0 ; then
-        mv "${objroot}VERSION.tmp" "${objroot}VERSION"
-        break
-      fi
+AC_ARG_WITH([version],
+  [AS_HELP_STRING([--with-version=<major>.<minor>.<bugfix>-<nrev>-g<gid>],
+   [Version string])],
+  [
+    echo "${with_version}" | grep ['^[0-9]\+\.[0-9]\+\.[0-9]\+-[0-9]\+-g[0-9a-f]\+$'] 2>&1 1>/dev/null
+    if test $? -ne 0 ; then
+      AC_MSG_ERROR([${with_version} does not match <major>.<minor>.<bugfix>-<nrev>-g<gid>])
     fi
-  done
-fi
-rm -f "${objroot}VERSION.tmp"
+    echo "$with_version" > "${objroot}VERSION"
+  ], [
+    dnl Set VERSION if source directory is inside a git repository.
+    if test "x`test ! \"${srcroot}\" && cd \"${srcroot}\"; git rev-parse --is-inside-work-tree 2>/dev/null`" = "xtrue" ; then
+      dnl Pattern globs aren't powerful enough to match both single- and
+      dnl double-digit version numbers, so iterate over patterns to support up
+      dnl to version 99.99.99 without any accidental matches.
+      for pattern in ['[0-9].[0-9].[0-9]' '[0-9].[0-9].[0-9][0-9]' \
+                     '[0-9].[0-9][0-9].[0-9]' '[0-9].[0-9][0-9].[0-9][0-9]' \
+                     '[0-9][0-9].[0-9].[0-9]' '[0-9][0-9].[0-9].[0-9][0-9]' \
+                     '[0-9][0-9].[0-9][0-9].[0-9]' \
+                     '[0-9][0-9].[0-9][0-9].[0-9][0-9]']; do
+        (test ! "${srcroot}" && cd "${srcroot}"; git describe --long --abbrev=40 --match="${pattern}") > "${objroot}VERSION.tmp" 2>/dev/null
+        if test $? -eq 0 ; then
+          mv "${objroot}VERSION.tmp" "${objroot}VERSION"
+          break
+        fi
+      done
+    fi
+    rm -f "${objroot}VERSION.tmp"
+  ])
+
 if test ! -e "${objroot}VERSION" ; then
   if test ! -e "${srcroot}VERSION" ; then
     AC_MSG_RESULT(

From 22af74e10615ce6b6898ae38a378af27757f9e16 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Tue, 15 Mar 2016 09:35:14 -0700
Subject: [PATCH 0165/2608] Refactor out signed/unsigned comparisons.

---
 include/jemalloc/internal/util.h |  4 ++--
 src/util.c                       | 11 ++++-------
 test/src/timer.c                 |  5 ++---
 test/unit/bitmap.c               |  4 ++--
 test/unit/util.c                 |  8 ++++----
 5 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index b8885bfa..6e214702 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -106,9 +106,9 @@ void	malloc_write(const char *s);
  * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating
  * point math.
  */
-int	malloc_vsnprintf(char *str, size_t size, const char *format,
+size_t	malloc_vsnprintf(char *str, size_t size, const char *format,
     va_list ap);
-int	malloc_snprintf(char *str, size_t size, const char *format, ...)
+size_t	malloc_snprintf(char *str, size_t size, const char *format, ...)
     JEMALLOC_FORMAT_PRINTF(3, 4);
 void	malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
     const char *format, va_list ap);
diff --git a/src/util.c b/src/util.c
index 02673c70..982a2e31 100644
--- a/src/util.c
+++ b/src/util.c
@@ -314,10 +314,9 @@ x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, size_t *slen_p)
 	return (s);
 }
 
-int
+size_t
 malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap)
 {
-	int ret;
 	size_t i;
 	const char *f;
 
@@ -585,21 +584,19 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap)
 		str[i] = '\0';
 	else
 		str[size - 1] = '\0';
-	assert(i < INT_MAX);
-	ret = (int)i;
 
 #undef APPEND_C
 #undef APPEND_S
 #undef APPEND_PADDED_S
 #undef GET_ARG_NUMERIC
-	return (ret);
+	return (i);
 }
 
 JEMALLOC_FORMAT_PRINTF(3, 4)
-int
+size_t
 malloc_snprintf(char *str, size_t size, const char *format, ...)
 {
-	int ret;
+	size_t ret;
 	va_list ap;
 
 	va_start(ap, format);
diff --git a/test/src/timer.c b/test/src/timer.c
index e91b3cf2..3c7e63a2 100644
--- a/test/src/timer.c
+++ b/test/src/timer.c
@@ -32,9 +32,8 @@ timer_ratio(timedelta_t *a, timedelta_t *b, char *buf, size_t buflen)
 	uint64_t t0 = timer_usec(a);
 	uint64_t t1 = timer_usec(b);
 	uint64_t mult;
-	unsigned i = 0;
-	unsigned j;
-	int n;
+	size_t i = 0;
+	size_t j, n;
 
 	/* Whole. */
 	n = malloc_snprintf(&buf[i], buflen-i, "%"FMTu64, t0 / t1);
diff --git a/test/unit/bitmap.c b/test/unit/bitmap.c
index 1ab0bb8e..a2dd5463 100644
--- a/test/unit/bitmap.c
+++ b/test/unit/bitmap.c
@@ -101,7 +101,7 @@ TEST_BEGIN(test_bitmap_sfu)
 		bitmap_info_t binfo;
 		bitmap_info_init(&binfo, i);
 		{
-			ssize_t j;
+			size_t j;
 			bitmap_t *bitmap = (bitmap_t *)malloc(
 			    bitmap_size(&binfo));
 			bitmap_init(bitmap, &binfo);
@@ -119,7 +119,7 @@ TEST_BEGIN(test_bitmap_sfu)
 			 * Iteratively unset bits starting at the end, and
 			 * verify that bitmap_sfu() reaches the unset bits.
 			 */
-			for (j = i - 1; j >= 0; j--) {
+			for (j = i - 1; j < i; j--) { /* (i..0] */
 				bitmap_unset(bitmap, &binfo, j);
 				assert_zd_eq(bitmap_sfu(bitmap, &binfo), j,
 				    "First unset bit should the bit previously "
diff --git a/test/unit/util.c b/test/unit/util.c
index 2f65aad2..d24c1c79 100644
--- a/test/unit/util.c
+++ b/test/unit/util.c
@@ -160,14 +160,14 @@ TEST_BEGIN(test_malloc_snprintf_truncated)
 {
 #define	BUFLEN	15
 	char buf[BUFLEN];
-	int result;
+	size_t result;
 	size_t len;
 #define TEST(expected_str_untruncated, ...) do {			\
 	result = malloc_snprintf(buf, len, __VA_ARGS__);		\
 	assert_d_eq(strncmp(buf, expected_str_untruncated, len-1), 0,	\
 	    "Unexpected string inequality (\"%s\" vs \"%s\")",		\
 	    buf, expected_str_untruncated);		\
-	assert_d_eq(result, strlen(expected_str_untruncated),		\
+	assert_zu_eq(result, strlen(expected_str_untruncated),		\
 	    "Unexpected result");					\
 } while (0)
 
@@ -193,11 +193,11 @@ TEST_BEGIN(test_malloc_snprintf)
 {
 #define	BUFLEN	128
 	char buf[BUFLEN];
-	int result;
+	size_t result;
 #define	TEST(expected_str, ...) do {					\
 	result = malloc_snprintf(buf, sizeof(buf), __VA_ARGS__);	\
 	assert_str_eq(buf, expected_str, "Unexpected output");		\
-	assert_d_eq(result, strlen(expected_str), "Unexpected result");	\
+	assert_zu_eq(result, strlen(expected_str), "Unexpected result");\
 } while (0)
 
 	TEST("hello", "hello");

From 18903c592fdbf2384b59051bd251d234e84647af Mon Sep 17 00:00:00 2001
From: Chris Peterson <cpeterson@mozilla.com>
Date: Mon, 14 Mar 2016 21:44:32 -0700
Subject: [PATCH 0166/2608] Enable -Wsign-compare warnings.

---
 configure.ac | 1 +
 1 file changed, 1 insertion(+)

diff --git a/configure.ac b/configure.ac
index 3082916b..275576bd 100644
--- a/configure.ac
+++ b/configure.ac
@@ -141,6 +141,7 @@ if test "x$CFLAGS" = "x" ; then
     JE_CFLAGS_APPEND([-Wall])
     JE_CFLAGS_APPEND([-Werror=declaration-after-statement])
     JE_CFLAGS_APPEND([-Wshorten-64-to-32])
+    JE_CFLAGS_APPEND([-Wsign-compare])
     JE_CFLAGS_APPEND([-pipe])
     JE_CFLAGS_APPEND([-g3])
   elif test "x$je_cv_msvc" = "xyes" ; then

From 6c460ad91bf349ebac3b23e58d97769a982110fe Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 22 Mar 2016 17:54:35 -0700
Subject: [PATCH 0167/2608] Optimize rtree_get().

Specialize fast path to avoid code that cannot execute for dependent
loads.

Manually unroll.
---
 include/jemalloc/internal/rtree.h | 166 +++++++++++++++++++++++-------
 src/rtree.c                       |   2 +
 src/util.c                        |   1 +
 3 files changed, 134 insertions(+), 35 deletions(-)

diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 28ae9d1d..3f8db3ad 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -16,8 +16,34 @@ typedef struct rtree_s rtree_t;
  */
 #define	LG_RTREE_BITS_PER_LEVEL	4
 #define	RTREE_BITS_PER_LEVEL	(ZU(1) << LG_RTREE_BITS_PER_LEVEL)
-#define	RTREE_HEIGHT_MAX						\
-    ((ZU(1) << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL)
+/*
+ * Avoid math in RTREE_HEIGHT_MAX definition so that it can be used in cpp
+ * conditionals.  The following defininitions are precomputed equivalents to:
+ *
+ *  #define	RTREE_HEIGHT_MAX					\
+ *      ((ZU(1) << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL)
+ */
+#if LG_RTREE_BITS_PER_LEVEL == 2
+#  if LG_SIZEOF_PTR == 3
+#    define RTREE_HEIGHT_MAX	16
+#  elif LG_SIZEOF_PTR == 2
+#    define RTREE_HEIGHT_MAX	8
+#  endif
+#elif LG_RTREE_BITS_PER_LEVEL == 3
+#  if LG_SIZEOF_PTR == 3
+#    define RTREE_HEIGHT_MAX	8
+#  elif LG_SIZEOF_PTR == 2
+#    define RTREE_HEIGHT_MAX	4
+#  endif
+#elif LG_RTREE_BITS_PER_LEVEL == 4
+#  if LG_SIZEOF_PTR == 3
+#    define RTREE_HEIGHT_MAX	4
+#  elif LG_SIZEOF_PTR == 2
+#    define RTREE_HEIGHT_MAX	2
+#  endif
+#else
+#  error Unsupported LG_RTREE_BITS_PER_LEVEL
+#endif
 
 /* Used for two-stage lock-free node initialization. */
 #define	RTREE_NODE_INITIALIZING	((rtree_node_elm_t *)0x1)
@@ -111,15 +137,18 @@ unsigned	rtree_start_level(rtree_t *rtree, uintptr_t key);
 uintptr_t	rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level);
 
 bool	rtree_node_valid(rtree_node_elm_t *node);
-rtree_node_elm_t	*rtree_child_tryread(rtree_node_elm_t *elm);
+rtree_node_elm_t	*rtree_child_tryread(rtree_node_elm_t *elm,
+    bool dependent);
 rtree_node_elm_t	*rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm,
-    unsigned level);
+    unsigned level, bool dependent);
 extent_node_t	*rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm,
     bool dependent);
 void	rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm,
     const extent_node_t *val);
-rtree_node_elm_t	*rtree_subtree_tryread(rtree_t *rtree, unsigned level);
-rtree_node_elm_t	*rtree_subtree_read(rtree_t *rtree, unsigned level);
+rtree_node_elm_t	*rtree_subtree_tryread(rtree_t *rtree, unsigned level,
+    bool dependent);
+rtree_node_elm_t	*rtree_subtree_read(rtree_t *rtree, unsigned level,
+    bool dependent);
 
 extent_node_t	*rtree_get(rtree_t *rtree, uintptr_t key, bool dependent);
 bool	rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val);
@@ -157,25 +186,28 @@ rtree_node_valid(rtree_node_elm_t *node)
 }
 
 JEMALLOC_INLINE rtree_node_elm_t *
-rtree_child_tryread(rtree_node_elm_t *elm)
+rtree_child_tryread(rtree_node_elm_t *elm, bool dependent)
 {
 	rtree_node_elm_t *child;
 
 	/* Double-checked read (first read may be stale. */
 	child = elm->child;
-	if (!rtree_node_valid(child))
+	if (!dependent && !rtree_node_valid(child))
 		child = atomic_read_p(&elm->pun);
+	assert(!dependent || child != NULL);
 	return (child);
 }
 
 JEMALLOC_INLINE rtree_node_elm_t *
-rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level)
+rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level,
+    bool dependent)
 {
 	rtree_node_elm_t *child;
 
-	child = rtree_child_tryread(elm);
-	if (unlikely(!rtree_node_valid(child)))
+	child = rtree_child_tryread(elm, dependent);
+	if (!dependent && unlikely(!rtree_node_valid(child)))
 		child = rtree_child_read_hard(rtree, elm, level);
+	assert(!dependent || child != NULL);
 	return (child);
 }
 
@@ -209,25 +241,27 @@ rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, const extent_node_t *val)
 }
 
 JEMALLOC_INLINE rtree_node_elm_t *
-rtree_subtree_tryread(rtree_t *rtree, unsigned level)
+rtree_subtree_tryread(rtree_t *rtree, unsigned level, bool dependent)
 {
 	rtree_node_elm_t *subtree;
 
 	/* Double-checked read (first read may be stale. */
 	subtree = rtree->levels[level].subtree;
-	if (!rtree_node_valid(subtree))
+	if (!dependent && unlikely(!rtree_node_valid(subtree)))
 		subtree = atomic_read_p(&rtree->levels[level].subtree_pun);
+	assert(!dependent || subtree != NULL);
 	return (subtree);
 }
 
 JEMALLOC_INLINE rtree_node_elm_t *
-rtree_subtree_read(rtree_t *rtree, unsigned level)
+rtree_subtree_read(rtree_t *rtree, unsigned level, bool dependent)
 {
 	rtree_node_elm_t *subtree;
 
-	subtree = rtree_subtree_tryread(rtree, level);
-	if (unlikely(!rtree_node_valid(subtree)))
+	subtree = rtree_subtree_tryread(rtree, level, dependent);
+	if (!dependent && unlikely(!rtree_node_valid(subtree)))
 		subtree = rtree_subtree_read_hard(rtree, level);
+	assert(!dependent || subtree != NULL);
 	return (subtree);
 }
 
@@ -235,26 +269,88 @@ JEMALLOC_INLINE extent_node_t *
 rtree_get(rtree_t *rtree, uintptr_t key, bool dependent)
 {
 	uintptr_t subkey;
-	unsigned i, start_level;
-	rtree_node_elm_t *node, *child;
+	unsigned start_level;
+	rtree_node_elm_t *node;
 
 	start_level = rtree_start_level(rtree, key);
 
-	for (i = start_level, node = rtree_subtree_tryread(rtree, start_level);
-	    /**/; i++, node = child) {
-		if (!dependent && unlikely(!rtree_node_valid(node)))
-			return (NULL);
-		subkey = rtree_subkey(rtree, key, i);
-		if (i == rtree->height - 1) {
-			/*
-			 * node is a leaf, so it contains values rather than
-			 * child pointers.
-			 */
-			return (rtree_val_read(rtree, &node[subkey],
-			    dependent));
-		}
-		assert(i < rtree->height - 1);
-		child = rtree_child_tryread(&node[subkey]);
+	node = rtree_subtree_tryread(rtree, start_level, dependent);
+#define	RTREE_GET_BIAS	(RTREE_HEIGHT_MAX - rtree->height)
+	switch (start_level + RTREE_GET_BIAS) {
+#define	RTREE_GET_SUBTREE(level)					\
+	case level:							\
+		assert(level < (RTREE_HEIGHT_MAX-1));			\
+		if (!dependent && unlikely(!rtree_node_valid(node)))	\
+			return (NULL);					\
+		subkey = rtree_subkey(rtree, key, level -		\
+		    RTREE_GET_BIAS);					\
+		node = rtree_child_tryread(&node[subkey], dependent);	\
+		/* Fall through. */
+#define	RTREE_GET_LEAF(level)						\
+	case level:							\
+		assert(level == (RTREE_HEIGHT_MAX-1));			\
+		if (!dependent && unlikely(!rtree_node_valid(node)))	\
+			return (NULL);					\
+		subkey = rtree_subkey(rtree, key, level -		\
+		    RTREE_GET_BIAS);					\
+		/*							\
+		 * node is a leaf, so it contains values rather than	\
+		 * child pointers.					\
+		 */							\
+		return (rtree_val_read(rtree, &node[subkey],		\
+		    dependent));
+#if RTREE_HEIGHT_MAX > 1
+	RTREE_GET_SUBTREE(0)
+#endif
+#if RTREE_HEIGHT_MAX > 2
+	RTREE_GET_SUBTREE(1)
+#endif
+#if RTREE_HEIGHT_MAX > 3
+	RTREE_GET_SUBTREE(2)
+#endif
+#if RTREE_HEIGHT_MAX > 4
+	RTREE_GET_SUBTREE(3)
+#endif
+#if RTREE_HEIGHT_MAX > 5
+	RTREE_GET_SUBTREE(4)
+#endif
+#if RTREE_HEIGHT_MAX > 6
+	RTREE_GET_SUBTREE(5)
+#endif
+#if RTREE_HEIGHT_MAX > 7
+	RTREE_GET_SUBTREE(6)
+#endif
+#if RTREE_HEIGHT_MAX > 8
+	RTREE_GET_SUBTREE(7)
+#endif
+#if RTREE_HEIGHT_MAX > 9
+	RTREE_GET_SUBTREE(8)
+#endif
+#if RTREE_HEIGHT_MAX > 10
+	RTREE_GET_SUBTREE(9)
+#endif
+#if RTREE_HEIGHT_MAX > 11
+	RTREE_GET_SUBTREE(10)
+#endif
+#if RTREE_HEIGHT_MAX > 12
+	RTREE_GET_SUBTREE(11)
+#endif
+#if RTREE_HEIGHT_MAX > 13
+	RTREE_GET_SUBTREE(12)
+#endif
+#if RTREE_HEIGHT_MAX > 14
+	RTREE_GET_SUBTREE(13)
+#endif
+#if RTREE_HEIGHT_MAX > 15
+	RTREE_GET_SUBTREE(14)
+#endif
+#if RTREE_HEIGHT_MAX > 16
+#  error Unsupported RTREE_HEIGHT_MAX
+#endif
+	RTREE_GET_LEAF(RTREE_HEIGHT_MAX-1)
+#undef RTREE_GET_SUBTREE
+#undef RTREE_GET_LEAF
+	default: not_reached();
 	}
 	not_reached();
 }
@@ -268,7 +364,7 @@ rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val)
 
 	start_level = rtree_start_level(rtree, key);
 
-	node = rtree_subtree_read(rtree, start_level);
+	node = rtree_subtree_read(rtree, start_level, false);
 	if (node == NULL)
 		return (true);
 	for (i = start_level; /**/; i++, node = child) {
@@ -282,7 +378,7 @@ rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val)
 			return (false);
 		}
 		assert(i + 1 < rtree->height);
-		child = rtree_child_read(rtree, &node[subkey], i);
+		child = rtree_child_read(rtree, &node[subkey], i, false);
 		if (child == NULL)
 			return (true);
 	}
diff --git a/src/rtree.c b/src/rtree.c
index af0d97e7..3166b45f 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -15,6 +15,8 @@ rtree_new(rtree_t *rtree, unsigned bits, rtree_node_alloc_t *alloc,
 {
 	unsigned bits_in_leaf, height, i;
 
+	assert(RTREE_HEIGHT_MAX == ((ZU(1) << (LG_SIZEOF_PTR+3)) /
+	    RTREE_BITS_PER_LEVEL));
 	assert(bits > 0 && bits <= (sizeof(uintptr_t) << 3));
 
 	bits_in_leaf = (bits % RTREE_BITS_PER_LEVEL) == 0 ? RTREE_BITS_PER_LEVEL
diff --git a/src/util.c b/src/util.c
index 982a2e31..581d540b 100644
--- a/src/util.c
+++ b/src/util.c
@@ -14,6 +14,7 @@
 		malloc_write("<jemalloc>: Unreachable code reached\n");	\
 		abort();						\
 	}								\
+	unreachable();							\
 } while (0)
 
 #define	not_implemented() do {						\

From 6a885198c2a27333f1fcfae5637dc2377189a3a3 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 23 Mar 2016 16:14:41 -0700
Subject: [PATCH 0168/2608] Always inline performance-critical rtree
 operations.

---
 include/jemalloc/internal/rtree.h | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 3f8db3ad..36aa002b 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -155,7 +155,7 @@ bool	rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_))
-JEMALLOC_INLINE unsigned
+JEMALLOC_ALWAYS_INLINE unsigned
 rtree_start_level(rtree_t *rtree, uintptr_t key)
 {
 	unsigned start_level;
@@ -169,7 +169,7 @@ rtree_start_level(rtree_t *rtree, uintptr_t key)
 	return (start_level);
 }
 
-JEMALLOC_INLINE uintptr_t
+JEMALLOC_ALWAYS_INLINE uintptr_t
 rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level)
 {
 
@@ -178,14 +178,14 @@ rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level)
 	    rtree->levels[level].bits) - 1));
 }
 
-JEMALLOC_INLINE bool
+JEMALLOC_ALWAYS_INLINE bool
 rtree_node_valid(rtree_node_elm_t *node)
 {
 
 	return ((uintptr_t)node > (uintptr_t)RTREE_NODE_INITIALIZING);
 }
 
-JEMALLOC_INLINE rtree_node_elm_t *
+JEMALLOC_ALWAYS_INLINE rtree_node_elm_t *
 rtree_child_tryread(rtree_node_elm_t *elm, bool dependent)
 {
 	rtree_node_elm_t *child;
@@ -198,7 +198,7 @@ rtree_child_tryread(rtree_node_elm_t *elm, bool dependent)
 	return (child);
 }
 
-JEMALLOC_INLINE rtree_node_elm_t *
+JEMALLOC_ALWAYS_INLINE rtree_node_elm_t *
 rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level,
     bool dependent)
 {
@@ -211,7 +211,7 @@ rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level,
 	return (child);
 }
 
-JEMALLOC_INLINE extent_node_t *
+JEMALLOC_ALWAYS_INLINE extent_node_t *
 rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm, bool dependent)
 {
 
@@ -240,7 +240,7 @@ rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, const extent_node_t *val)
 	atomic_write_p(&elm->pun, val);
 }
 
-JEMALLOC_INLINE rtree_node_elm_t *
+JEMALLOC_ALWAYS_INLINE rtree_node_elm_t *
 rtree_subtree_tryread(rtree_t *rtree, unsigned level, bool dependent)
 {
 	rtree_node_elm_t *subtree;
@@ -253,7 +253,7 @@ rtree_subtree_tryread(rtree_t *rtree, unsigned level, bool dependent)
 	return (subtree);
 }
 
-JEMALLOC_INLINE rtree_node_elm_t *
+JEMALLOC_ALWAYS_INLINE rtree_node_elm_t *
 rtree_subtree_read(rtree_t *rtree, unsigned level, bool dependent)
 {
 	rtree_node_elm_t *subtree;
@@ -265,7 +265,7 @@ rtree_subtree_read(rtree_t *rtree, unsigned level, bool dependent)
 	return (subtree);
 }
 
-JEMALLOC_INLINE extent_node_t *
+JEMALLOC_ALWAYS_INLINE extent_node_t *
 rtree_get(rtree_t *rtree, uintptr_t key, bool dependent)
 {
 	uintptr_t subkey;
@@ -352,6 +352,7 @@ rtree_get(rtree_t *rtree, uintptr_t key, bool dependent)
 #undef RTREE_GET_LEAF
 	default: not_reached();
 	}
+#undef RTREE_GET_BIAS
 	not_reached();
 }
 

From f6bd2e5a178aed23398996f008feee5bf070a624 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 23 Mar 2016 15:32:07 -0700
Subject: [PATCH 0169/2608] Code formatting fixes.

---
 src/tcache.c             | 3 ++-
 test/stress/microbench.c | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index 6e32f404..c12727a6 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -23,7 +23,8 @@ static tcaches_t	*tcaches_avail;
 
 /******************************************************************************/
 
-size_t	tcache_salloc(const void *ptr)
+size_t
+tcache_salloc(const void *ptr)
 {
 
 	return (arena_salloc(ptr, false));
diff --git a/test/stress/microbench.c b/test/stress/microbench.c
index ee39fea7..7dc45f89 100644
--- a/test/stress/microbench.c
+++ b/test/stress/microbench.c
@@ -1,7 +1,8 @@
 #include "test/jemalloc_test.h"
 
 JEMALLOC_INLINE_C void
-time_func(timedelta_t *timer, uint64_t nwarmup, uint64_t niter, void (*func)(void))
+time_func(timedelta_t *timer, uint64_t nwarmup, uint64_t niter,
+    void (*func)(void))
 {
 	uint64_t i;
 

From 61a6dfcd5fd89d21f04c99fabaf7269d05f61adf Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 23 Mar 2016 16:04:38 -0700
Subject: [PATCH 0170/2608] Constify various internal arena APIs.

---
 include/jemalloc/internal/arena.h             | 124 +++++++++++-------
 include/jemalloc/internal/private_symbols.txt |   9 +-
 src/arena.c                                   |  51 +++----
 src/tcache.c                                  |   2 +-
 4 files changed, 112 insertions(+), 74 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index c08a742f..09ae6894 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -593,29 +593,38 @@ void	arena_postfork_child(arena_t *arena);
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
-arena_chunk_map_bits_t	*arena_bitselm_get(arena_chunk_t *chunk,
+arena_chunk_map_bits_t	*arena_bitselm_get_mutable(arena_chunk_t *chunk,
     size_t pageind);
-arena_chunk_map_misc_t	*arena_miscelm_get(arena_chunk_t *chunk,
+const arena_chunk_map_bits_t	*arena_bitselm_get_const(
+    const arena_chunk_t *chunk, size_t pageind);
+arena_chunk_map_misc_t	*arena_miscelm_get_mutable(arena_chunk_t *chunk,
     size_t pageind);
+const arena_chunk_map_misc_t	*arena_miscelm_get_const(
+    const arena_chunk_t *chunk, size_t pageind);
 size_t	arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm);
-void	*arena_miscelm_to_rpages(arena_chunk_map_misc_t *miscelm);
+void	*arena_miscelm_to_rpages(const arena_chunk_map_misc_t *miscelm);
 arena_chunk_map_misc_t	*arena_rd_to_miscelm(arena_runs_dirty_link_t *rd);
 arena_chunk_map_misc_t	*arena_ph_to_miscelm(ph_node_t *ph);
 arena_chunk_map_misc_t	*arena_run_to_miscelm(arena_run_t *run);
-size_t	*arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbitsp_read(size_t *mapbitsp);
-size_t	arena_mapbits_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_size_decode(size_t mapbits);
-size_t	arena_mapbits_unallocated_size_get(arena_chunk_t *chunk,
+size_t	*arena_mapbitsp_get_mutable(arena_chunk_t *chunk, size_t pageind);
+const size_t	*arena_mapbitsp_get_const(const arena_chunk_t *chunk,
     size_t pageind);
-size_t	arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind);
-szind_t	arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_decommitted_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbitsp_read(const size_t *mapbitsp);
+size_t	arena_mapbits_get(const arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_size_decode(size_t mapbits);
+size_t	arena_mapbits_unallocated_size_get(const arena_chunk_t *chunk,
+    size_t pageind);
+size_t	arena_mapbits_large_size_get(const arena_chunk_t *chunk,
+    size_t pageind);
+size_t	arena_mapbits_small_runind_get(const arena_chunk_t *chunk,
+    size_t pageind);
+szind_t	arena_mapbits_binind_get(const arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_dirty_get(const arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_unzeroed_get(const arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_decommitted_get(const arena_chunk_t *chunk,
+    size_t pageind);
+size_t	arena_mapbits_large_get(const arena_chunk_t *chunk, size_t pageind);
+size_t	arena_mapbits_allocated_get(const arena_chunk_t *chunk, size_t pageind);
 void	arena_mapbitsp_write(size_t *mapbitsp, size_t mapbits);
 size_t	arena_mapbits_size_encode(size_t size);
 void	arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind,
@@ -657,7 +666,7 @@ void	arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache);
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
 #  ifdef JEMALLOC_ARENA_INLINE_A
 JEMALLOC_ALWAYS_INLINE arena_chunk_map_bits_t *
-arena_bitselm_get(arena_chunk_t *chunk, size_t pageind)
+arena_bitselm_get_mutable(arena_chunk_t *chunk, size_t pageind)
 {
 
 	assert(pageind >= map_bias);
@@ -666,8 +675,15 @@ arena_bitselm_get(arena_chunk_t *chunk, size_t pageind)
 	return (&chunk->map_bits[pageind-map_bias]);
 }
 
+JEMALLOC_ALWAYS_INLINE const arena_chunk_map_bits_t *
+arena_bitselm_get_const(const arena_chunk_t *chunk, size_t pageind)
+{
+
+	return (arena_bitselm_get_mutable((arena_chunk_t *)chunk, pageind));
+}
+
 JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t *
-arena_miscelm_get(arena_chunk_t *chunk, size_t pageind)
+arena_miscelm_get_mutable(arena_chunk_t *chunk, size_t pageind)
 {
 
 	assert(pageind >= map_bias);
@@ -677,6 +693,13 @@ arena_miscelm_get(arena_chunk_t *chunk, size_t pageind)
 	    (uintptr_t)map_misc_offset) + pageind-map_bias);
 }
 
+JEMALLOC_ALWAYS_INLINE const arena_chunk_map_misc_t *
+arena_miscelm_get_const(const arena_chunk_t *chunk, size_t pageind)
+{
+
+	return (arena_miscelm_get_mutable((arena_chunk_t *)chunk, pageind));
+}
+
 JEMALLOC_ALWAYS_INLINE size_t
 arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm)
 {
@@ -691,7 +714,7 @@ arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm)
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-arena_miscelm_to_rpages(arena_chunk_map_misc_t *miscelm)
+arena_miscelm_to_rpages(const arena_chunk_map_misc_t *miscelm)
 {
 	arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm);
 	size_t pageind = arena_miscelm_to_pageind(miscelm);
@@ -736,24 +759,31 @@ arena_run_to_miscelm(arena_run_t *run)
 }
 
 JEMALLOC_ALWAYS_INLINE size_t *
-arena_mapbitsp_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbitsp_get_mutable(arena_chunk_t *chunk, size_t pageind)
 {
 
-	return (&arena_bitselm_get(chunk, pageind)->bits);
+	return (&arena_bitselm_get_mutable(chunk, pageind)->bits);
+}
+
+JEMALLOC_ALWAYS_INLINE const size_t *
+arena_mapbitsp_get_const(const arena_chunk_t *chunk, size_t pageind)
+{
+
+	return (arena_mapbitsp_get_mutable((arena_chunk_t *)chunk, pageind));
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbitsp_read(size_t *mapbitsp)
+arena_mapbitsp_read(const size_t *mapbitsp)
 {
 
 	return (*mapbitsp);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_get(const arena_chunk_t *chunk, size_t pageind)
 {
 
-	return (arena_mapbitsp_read(arena_mapbitsp_get(chunk, pageind)));
+	return (arena_mapbitsp_read(arena_mapbitsp_get_const(chunk, pageind)));
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
@@ -773,7 +803,7 @@ arena_mapbits_size_decode(size_t mapbits)
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_unallocated_size_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
@@ -783,7 +813,7 @@ arena_mapbits_unallocated_size_get(arena_chunk_t *chunk, size_t pageind)
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_large_size_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
@@ -794,7 +824,7 @@ arena_mapbits_large_size_get(arena_chunk_t *chunk, size_t pageind)
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_small_runind_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
@@ -805,7 +835,7 @@ arena_mapbits_small_runind_get(arena_chunk_t *chunk, size_t pageind)
 }
 
 JEMALLOC_ALWAYS_INLINE szind_t
-arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_binind_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 	szind_t binind;
@@ -817,7 +847,7 @@ arena_mapbits_binind_get(arena_chunk_t *chunk, size_t pageind)
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_dirty_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
@@ -828,7 +858,7 @@ arena_mapbits_dirty_get(arena_chunk_t *chunk, size_t pageind)
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_unzeroed_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
@@ -839,7 +869,7 @@ arena_mapbits_unzeroed_get(arena_chunk_t *chunk, size_t pageind)
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_decommitted_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_decommitted_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
@@ -850,7 +880,7 @@ arena_mapbits_decommitted_get(arena_chunk_t *chunk, size_t pageind)
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_large_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
@@ -859,7 +889,7 @@ arena_mapbits_large_get(arena_chunk_t *chunk, size_t pageind)
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_allocated_get(arena_chunk_t *chunk, size_t pageind)
+arena_mapbits_allocated_get(const arena_chunk_t *chunk, size_t pageind)
 {
 	size_t mapbits;
 
@@ -895,7 +925,7 @@ JEMALLOC_ALWAYS_INLINE void
 arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size,
     size_t flags)
 {
-	size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind);
 
 	assert((size & PAGE_MASK) == 0);
 	assert((flags & CHUNK_MAP_FLAGS_MASK) == flags);
@@ -909,7 +939,7 @@ JEMALLOC_ALWAYS_INLINE void
 arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind,
     size_t size)
 {
-	size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind);
 	size_t mapbits = arena_mapbitsp_read(mapbitsp);
 
 	assert((size & PAGE_MASK) == 0);
@@ -921,7 +951,7 @@ arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind,
 JEMALLOC_ALWAYS_INLINE void
 arena_mapbits_internal_set(arena_chunk_t *chunk, size_t pageind, size_t flags)
 {
-	size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind);
 
 	assert((flags & CHUNK_MAP_UNZEROED) == flags);
 	arena_mapbitsp_write(mapbitsp, flags);
@@ -931,7 +961,7 @@ JEMALLOC_ALWAYS_INLINE void
 arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size,
     size_t flags)
 {
-	size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind);
 
 	assert((size & PAGE_MASK) == 0);
 	assert((flags & CHUNK_MAP_FLAGS_MASK) == flags);
@@ -946,7 +976,7 @@ JEMALLOC_ALWAYS_INLINE void
 arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind,
     szind_t binind)
 {
-	size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind);
 	size_t mapbits = arena_mapbitsp_read(mapbitsp);
 
 	assert(binind <= BININD_INVALID);
@@ -960,7 +990,7 @@ JEMALLOC_ALWAYS_INLINE void
 arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind,
     szind_t binind, size_t flags)
 {
-	size_t *mapbitsp = arena_mapbitsp_get(chunk, pageind);
+	size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind);
 
 	assert(binind < BININD_INVALID);
 	assert(pageind - runind >= map_bias);
@@ -1048,12 +1078,12 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits)
 		size_t pageind;
 		size_t actual_mapbits;
 		size_t rpages_ind;
-		arena_run_t *run;
+		const arena_run_t *run;
 		arena_bin_t *bin;
 		szind_t run_binind, actual_binind;
 		arena_bin_info_t *bin_info;
-		arena_chunk_map_misc_t *miscelm;
-		void *rpages;
+		const arena_chunk_map_misc_t *miscelm;
+		const void *rpages;
 
 		assert(binind != BININD_INVALID);
 		assert(binind < NBINS);
@@ -1066,7 +1096,7 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits)
 		assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
 		rpages_ind = pageind - arena_mapbits_small_runind_get(chunk,
 		    pageind);
-		miscelm = arena_miscelm_get(chunk, rpages_ind);
+		miscelm = arena_miscelm_get_const(chunk, rpages_ind);
 		run = &miscelm->run;
 		run_binind = run->binind;
 		bin = &arena->bins[run_binind];
@@ -1182,8 +1212,8 @@ arena_prof_tctx_get(const void *ptr)
 		if (likely((mapbits & CHUNK_MAP_LARGE) == 0))
 			ret = (prof_tctx_t *)(uintptr_t)1U;
 		else {
-			arena_chunk_map_misc_t *elm = arena_miscelm_get(chunk,
-			    pageind);
+			arena_chunk_map_misc_t *elm =
+			    arena_miscelm_get_mutable(chunk, pageind);
 			ret = atomic_read_p(&elm->prof_tctx_pun);
 		}
 	} else
@@ -1212,7 +1242,7 @@ arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx)
 
 			assert(arena_mapbits_large_get(chunk, pageind) != 0);
 
-			elm = arena_miscelm_get(chunk, pageind);
+			elm = arena_miscelm_get_mutable(chunk, pageind);
 			atomic_write_p(&elm->prof_tctx_pun, tctx);
 		} else {
 			/*
@@ -1248,7 +1278,7 @@ arena_prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr,
 			    0);
 			assert(arena_mapbits_large_get(chunk, pageind) != 0);
 
-			elm = arena_miscelm_get(chunk, pageind);
+			elm = arena_miscelm_get_mutable(chunk, pageind);
 			atomic_write_p(&elm->prof_tctx_pun,
 			    (prof_tctx_t *)(uintptr_t)1U);
 		} else
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index aed60cb1..26066695 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -5,7 +5,8 @@ arena_alloc_junk_small
 arena_basic_stats_merge
 arena_bin_index
 arena_bin_info
-arena_bitselm_get
+arena_bitselm_get_const
+arena_bitselm_get_mutable
 arena_boot
 arena_choose
 arena_choose_hard
@@ -60,7 +61,8 @@ arena_mapbits_unallocated_set
 arena_mapbits_unallocated_size_get
 arena_mapbits_unallocated_size_set
 arena_mapbits_unzeroed_get
-arena_mapbitsp_get
+arena_mapbitsp_get_const
+arena_mapbitsp_get_mutable
 arena_mapbitsp_read
 arena_mapbitsp_write
 arena_maxrun
@@ -69,7 +71,8 @@ arena_metadata_allocated_add
 arena_metadata_allocated_get
 arena_metadata_allocated_sub
 arena_migrate
-arena_miscelm_get
+arena_miscelm_get_const
+arena_miscelm_get_mutable
 arena_miscelm_to_pageind
 arena_miscelm_to_rpages
 arena_new
diff --git a/src/arena.c b/src/arena.c
index 0d232ff8..8291ab2a 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -197,11 +197,11 @@ arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
     size_t npages)
 {
 	szind_t ind = size2index(run_quantize_floor(arena_miscelm_size_get(
-	    arena_miscelm_get(chunk, pageind))));
+	    arena_miscelm_get_const(chunk, pageind))));
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
 	ph_insert(arena_runs_avail_get(arena, ind),
-	    &arena_miscelm_get(chunk, pageind)->ph_link);
+	    &arena_miscelm_get_mutable(chunk, pageind)->ph_link);
 }
 
 static void
@@ -209,18 +209,19 @@ arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
     size_t npages)
 {
 	szind_t ind = size2index(run_quantize_floor(arena_miscelm_size_get(
-	    arena_miscelm_get(chunk, pageind))));
+	    arena_miscelm_get_const(chunk, pageind))));
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
 	ph_remove(arena_runs_avail_get(arena, ind),
-	    &arena_miscelm_get(chunk, pageind)->ph_link);
+	    &arena_miscelm_get_mutable(chunk, pageind)->ph_link);
 }
 
 static void
 arena_run_dirty_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
     size_t npages)
 {
-	arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind);
+	arena_chunk_map_misc_t *miscelm = arena_miscelm_get_mutable(chunk,
+	    pageind);
 
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
@@ -237,7 +238,8 @@ static void
 arena_run_dirty_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
     size_t npages)
 {
-	arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind);
+	arena_chunk_map_misc_t *miscelm = arena_miscelm_get_mutable(chunk,
+	    pageind);
 
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
@@ -679,17 +681,18 @@ arena_chunk_init_hard(arena_t *arena)
 	 */
 	if (!zero) {
 		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(
-		    (void *)arena_bitselm_get(chunk, map_bias+1),
-		    (size_t)((uintptr_t) arena_bitselm_get(chunk,
-		    chunk_npages-1) - (uintptr_t)arena_bitselm_get(chunk,
-		    map_bias+1)));
+		    (void *)arena_bitselm_get_const(chunk, map_bias+1),
+		    (size_t)((uintptr_t)arena_bitselm_get_const(chunk,
+		    chunk_npages-1) -
+		    (uintptr_t)arena_bitselm_get_const(chunk, map_bias+1)));
 		for (i = map_bias+1; i < chunk_npages-1; i++)
 			arena_mapbits_internal_set(chunk, i, flag_unzeroed);
 	} else {
 		JEMALLOC_VALGRIND_MAKE_MEM_DEFINED((void
-		    *)arena_bitselm_get(chunk, map_bias+1), (size_t)((uintptr_t)
-		    arena_bitselm_get(chunk, chunk_npages-1) -
-		    (uintptr_t)arena_bitselm_get(chunk, map_bias+1)));
+		    *)arena_bitselm_get_const(chunk, map_bias+1),
+		    (size_t)((uintptr_t)arena_bitselm_get_const(chunk,
+		    chunk_npages-1) -
+		    (uintptr_t)arena_bitselm_get_const(chunk, map_bias+1)));
 		if (config_debug) {
 			for (i = map_bias+1; i < chunk_npages-1; i++) {
 				assert(arena_mapbits_unzeroed_get(chunk, i) ==
@@ -1103,7 +1106,7 @@ arena_run_alloc_large(arena_t *arena, size_t size, bool zero)
 	 */
 	chunk = arena_chunk_alloc(arena);
 	if (chunk != NULL) {
-		run = &arena_miscelm_get(chunk, map_bias)->run;
+		run = &arena_miscelm_get_mutable(chunk, map_bias)->run;
 		if (arena_run_split_large(arena, run, size, zero))
 			run = NULL;
 		return (run);
@@ -1148,7 +1151,7 @@ arena_run_alloc_small(arena_t *arena, size_t size, szind_t binind)
 	 */
 	chunk = arena_chunk_alloc(arena);
 	if (chunk != NULL) {
-		run = &arena_miscelm_get(chunk, map_bias)->run;
+		run = &arena_miscelm_get_mutable(chunk, map_bias)->run;
 		if (arena_run_split_small(arena, run, size, binind))
 			run = NULL;
 		return (run);
@@ -2043,7 +2046,7 @@ arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 	    flag_dirty | (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk,
 	    pageind+head_npages)));
 
-	tail_miscelm = arena_miscelm_get(chunk, pageind + head_npages);
+	tail_miscelm = arena_miscelm_get_mutable(chunk, pageind + head_npages);
 	tail_run = &tail_miscelm->run;
 	arena_run_dalloc(arena, tail_run, dirty, false, (flag_decommitted !=
 	    0));
@@ -2520,7 +2523,7 @@ arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 		arena_chunk_map_misc_t *head_miscelm = miscelm;
 		arena_run_t *head_run = run;
 
-		miscelm = arena_miscelm_get(chunk,
+		miscelm = arena_miscelm_get_mutable(chunk,
 		    arena_miscelm_to_pageind(head_miscelm) + (leadsize >>
 		    LG_PAGE));
 		run = &miscelm->run;
@@ -2703,7 +2706,7 @@ arena_dalloc_bin_locked_impl(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 
 	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 	rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, pageind);
-	run = &arena_miscelm_get(chunk, rpages_ind)->run;
+	run = &arena_miscelm_get_mutable(chunk, rpages_ind)->run;
 	binind = run->binind;
 	bin = &arena->bins[binind];
 	bin_info = &arena_bin_info[binind];
@@ -2741,7 +2744,7 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 	size_t rpages_ind;
 
 	rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, pageind);
-	run = &arena_miscelm_get(chunk, rpages_ind)->run;
+	run = &arena_miscelm_get_mutable(chunk, rpages_ind)->run;
 	bin = &arena->bins[run->binind];
 	malloc_mutex_lock(&bin->lock);
 	arena_dalloc_bin_locked_impl(arena, chunk, ptr, bitselm, false);
@@ -2759,7 +2762,7 @@ arena_dalloc_small(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk, void *ptr,
 		assert(arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk,
 		    pageind)) != BININD_INVALID);
 	}
-	bitselm = arena_bitselm_get(chunk, pageind);
+	bitselm = arena_bitselm_get_mutable(chunk, pageind);
 	arena_dalloc_bin(arena, chunk, ptr, pageind, bitselm);
 	arena_decay_tick(tsd, arena);
 }
@@ -2787,7 +2790,8 @@ arena_dalloc_large_locked_impl(arena_t *arena, arena_chunk_t *chunk,
     void *ptr, bool junked)
 {
 	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
-	arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind);
+	arena_chunk_map_misc_t *miscelm = arena_miscelm_get_mutable(chunk,
+	    pageind);
 	arena_run_t *run = &miscelm->run;
 
 	if (config_fill || config_stats) {
@@ -2832,7 +2836,8 @@ arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr,
     size_t oldsize, size_t size)
 {
 	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
-	arena_chunk_map_misc_t *miscelm = arena_miscelm_get(chunk, pageind);
+	arena_chunk_map_misc_t *miscelm = arena_miscelm_get_mutable(chunk,
+	    pageind);
 	arena_run_t *run = &miscelm->run;
 
 	assert(size < oldsize);
@@ -2898,7 +2903,7 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 		if (splitsize == 0)
 			goto label_fail;
 
-		run = &arena_miscelm_get(chunk, pageind+npages)->run;
+		run = &arena_miscelm_get_mutable(chunk, pageind+npages)->run;
 		if (arena_run_split_large(arena, run, splitsize, zero))
 			goto label_fail;
 
diff --git a/src/tcache.c b/src/tcache.c
index c12727a6..a8620c3d 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -129,7 +129,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 				size_t pageind = ((uintptr_t)ptr -
 				    (uintptr_t)chunk) >> LG_PAGE;
 				arena_chunk_map_bits_t *bitselm =
-				    arena_bitselm_get(chunk, pageind);
+				    arena_bitselm_get_mutable(chunk, pageind);
 				arena_dalloc_bin_junked_locked(bin_arena, chunk,
 				    ptr, bitselm);
 			} else {

From ff63dca363021faf5ccacc6dce2cb05df0268214 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 23 Mar 2016 16:06:12 -0700
Subject: [PATCH 0171/2608] Avoid blindly enabling assertions for header code
 when testing.

Restructure the test program master header to avoid blindly enabling
assertions.  Prior to this change, assertion code in e.g. arena.h was
always enabled for tests, which could skew performance-related testing.
---
 test/include/test/jemalloc_test.h.in | 78 ++++++++++++++++------------
 1 file changed, 45 insertions(+), 33 deletions(-)

diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in
index 0a3dbeac..1f36e469 100644
--- a/test/include/test/jemalloc_test.h.in
+++ b/test/include/test/jemalloc_test.h.in
@@ -19,39 +19,6 @@
 #  include <pthread.h>
 #endif
 
-/******************************************************************************/
-/*
- * Define always-enabled assertion macros, so that test assertions execute even
- * if assertions are disabled in the library code.  These definitions must
- * exist prior to including "jemalloc/internal/util.h".
- */
-#define	assert(e) do {							\
-	if (!(e)) {							\
-		malloc_printf(						\
-		    "<jemalloc>: %s:%d: Failed assertion: \"%s\"\n",	\
-		    __FILE__, __LINE__, #e);				\
-		abort();						\
-	}								\
-} while (0)
-
-#define	not_reached() do {						\
-	malloc_printf(							\
-	    "<jemalloc>: %s:%d: Unreachable code reached\n",		\
-	    __FILE__, __LINE__);					\
-	abort();							\
-} while (0)
-
-#define	not_implemented() do {						\
-	malloc_printf("<jemalloc>: %s:%d: Not implemented\n",		\
-	    __FILE__, __LINE__);					\
-	abort();							\
-} while (0)
-
-#define	assert_not_implemented(e) do {					\
-	if (!(e))							\
-		not_implemented();					\
-} while (0)
-
 #include "test/jemalloc_test_defs.h"
 
 #ifdef JEMALLOC_OSSPIN
@@ -86,6 +53,14 @@
 #  include "jemalloc/internal/jemalloc_internal_defs.h"
 #  include "jemalloc/internal/jemalloc_internal_macros.h"
 
+static const bool config_debug =
+#ifdef JEMALLOC_DEBUG
+    true
+#else
+    false
+#endif
+    ;
+
 #  define JEMALLOC_N(n) @private_namespace@##n
 #  include "jemalloc/internal/private_namespace.h"
 
@@ -149,3 +124,40 @@
 #include "test/thd.h"
 #define	MEXP 19937
 #include "test/SFMT.h"
+
+/******************************************************************************/
+/*
+ * Define always-enabled assertion macros, so that test assertions execute even
+ * if assertions are disabled in the library code.
+ */
+#undef assert
+#undef not_reached
+#undef not_implemented
+#undef assert_not_implemented
+
+#define	assert(e) do {							\
+	if (!(e)) {							\
+		malloc_printf(						\
+		    "<jemalloc>: %s:%d: Failed assertion: \"%s\"\n",	\
+		    __FILE__, __LINE__, #e);				\
+		abort();						\
+	}								\
+} while (0)
+
+#define	not_reached() do {						\
+	malloc_printf(							\
+	    "<jemalloc>: %s:%d: Unreachable code reached\n",		\
+	    __FILE__, __LINE__);					\
+	abort();							\
+} while (0)
+
+#define	not_implemented() do {						\
+	malloc_printf("<jemalloc>: %s:%d: Not implemented\n",		\
+	    __FILE__, __LINE__);					\
+	abort();							\
+} while (0)
+
+#define	assert_not_implemented(e) do {					\
+	if (!(e))							\
+		not_implemented();					\
+} while (0)

From 232b13d86298b9eafc36b0610d7965a95bda0679 Mon Sep 17 00:00:00 2001
From: rustyx <me@rustyx.org>
Date: Wed, 23 Mar 2016 10:13:22 +0100
Subject: [PATCH 0172/2608] Fix MSVC project

---
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj         | 2 ++
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters | 6 ++++++
 2 files changed, 8 insertions(+)

diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index f3f0260b..0a6c4e61 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -56,6 +56,7 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\mutex.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\nstime.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\pages.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\ph.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\private_namespace.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\private_unnamespace.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\prng.h" />
@@ -105,6 +106,7 @@
     <ClCompile Include="..\..\..\..\src\mutex.c" />
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
+    <ClCompile Include="..\..\..\..\src\ph.c" />
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\quarantine.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index ce70632b..412c24d6 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -107,6 +107,9 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\pages.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\ph.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\private_namespace.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
@@ -226,6 +229,9 @@
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ph.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\prng.c">
       <Filter>Source Files</Filter>
     </ClCompile>

From af3184cac0e0c70045d8158b9c176696f2ca1090 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 24 Mar 2016 01:42:08 -0700
Subject: [PATCH 0173/2608] Use abort() for fallback implementations of
 unreachable().

---
 include/jemalloc/internal/util.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index 6e214702..228584a4 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -73,12 +73,12 @@
       JEMALLOC_CLANG_HAS_BUILTIN(__builtin_unreachable)
 #	define unreachable() __builtin_unreachable()
 #  else
-#	define unreachable()
+#	define unreachable() abort()
 #  endif
 #else
 #	define likely(x)   !!(x)
 #	define unlikely(x) !!(x)
-#	define unreachable()
+#	define unreachable() abort()
 #endif
 
 #include "jemalloc/internal/assert.h"

From f3060284c521cc74e333c5ab3a6c8fc0648defb5 Mon Sep 17 00:00:00 2001
From: Chris Peterson <cpeterson@mozilla.com>
Date: Sat, 26 Mar 2016 00:30:11 -0700
Subject: [PATCH 0174/2608] Remove unused arenas_extend() function declaration.

The arenas_extend() function was renamed to arenas_init() in commit
8bb3198f72fc7587dc93527f9f19fb5be52fa553, but its function declaration
was not removed from jemalloc_internal.h.in.
---
 include/jemalloc/internal/jemalloc_internal.h.in | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index d3b94c00..c1cccd64 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -465,7 +465,6 @@ void	a0dalloc(void *ptr);
 void	*bootstrap_malloc(size_t size);
 void	*bootstrap_calloc(size_t num, size_t size);
 void	bootstrap_free(void *ptr);
-arena_t	*arenas_extend(unsigned ind);
 unsigned	narenas_total_get(void);
 arena_t	*arena_init(unsigned ind);
 arena_tdata_t	*arena_tdata_get_hard(tsd_t *tsd, unsigned ind);

From 0bc716ae27d1bd66faa8f165a2c4a4cf6bd8143f Mon Sep 17 00:00:00 2001
From: Chris Peterson <cpeterson@mozilla.com>
Date: Sat, 26 Mar 2016 01:19:28 -0700
Subject: [PATCH 0175/2608] Fix -Wunreachable-code warning in
 malloc_vsnprintf().

Variables s and slen are declared inside a switch statement, but outside
a case scope. clang reports these variable definitions as "unreachable",
though this is not really meaningful in this case. This is the only
-Wunreachable-code warning in jemalloc.

src/util.c:501:5 [-Wunreachable-code] code will never be executed

This resolves #364.
---
 src/util.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/util.c b/src/util.c
index 581d540b..a1c4a2a4 100644
--- a/src/util.c
+++ b/src/util.c
@@ -408,6 +408,8 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap)
 			int prec = -1;
 			int width = -1;
 			unsigned char len = '?';
+			char *s;
+			size_t slen;
 
 			f++;
 			/* Flags. */
@@ -498,8 +500,6 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap)
 			}
 			/* Conversion specifier. */
 			switch (*f) {
-				char *s;
-				size_t slen;
 			case '%':
 				/* %% */
 				APPEND_C(*f);

From ce7c0f999bf7634078ec759f3d13290dbb34170c Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Wed, 30 Mar 2016 18:36:04 -0700
Subject: [PATCH 0176/2608] Fix potential chunk leaks.

Move chunk_dalloc_arena()'s implementation into chunk_dalloc_wrapper(),
so that if the dalloc hook fails, proper decommit/purge/retain cascading
occurs.  This fixes three potential chunk leaks on OOM paths, one during
dss-based chunk allocation, one during chunk header commit (currently
relevant only on Windows), and one during rtree write (e.g. if rtree
node allocation fails).

Merge chunk_purge_arena() into chunk_purge_default() (refactor, no
change to functionality).
---
 include/jemalloc/internal/chunk.h             |  6 +--
 include/jemalloc/internal/private_symbols.txt |  2 -
 src/arena.c                                   | 16 +++---
 src/chunk.c                                   | 51 ++++++-------------
 src/chunk_dss.c                               |  2 +-
 5 files changed, 26 insertions(+), 51 deletions(-)

diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index 5d193835..d800478d 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -62,12 +62,8 @@ void	*chunk_alloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks,
     void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit);
 void	chunk_dalloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks,
     void *chunk, size_t size, bool committed);
-void	chunk_dalloc_arena(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *chunk, size_t size, bool zeroed, bool committed);
 void	chunk_dalloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *chunk, size_t size, bool committed);
-bool	chunk_purge_arena(arena_t *arena, void *chunk, size_t offset,
-    size_t length);
+    void *chunk, size_t size, bool zeroed, bool committed);
 bool	chunk_purge_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks,
     void *chunk, size_t size, size_t offset, size_t length);
 bool	chunk_boot(void);
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 26066695..969c73df 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -152,7 +152,6 @@ chunk_alloc_dss
 chunk_alloc_mmap
 chunk_alloc_wrapper
 chunk_boot
-chunk_dalloc_arena
 chunk_dalloc_cache
 chunk_dalloc_mmap
 chunk_dalloc_wrapper
@@ -172,7 +171,6 @@ chunk_npages
 chunk_postfork_child
 chunk_postfork_parent
 chunk_prefork
-chunk_purge_arena
 chunk_purge_wrapper
 chunk_register
 chunks_rtree
diff --git a/src/arena.c b/src/arena.c
index 8291ab2a..45964787 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -602,8 +602,8 @@ arena_chunk_alloc_internal_hard(arena_t *arena, chunk_hooks_t *chunk_hooks,
 		/* Commit header. */
 		if (chunk_hooks->commit(chunk, chunksize, 0, map_bias <<
 		    LG_PAGE, arena->ind)) {
-			chunk_dalloc_wrapper(arena, chunk_hooks,
-			    (void *)chunk, chunksize, *commit);
+			chunk_dalloc_wrapper(arena, chunk_hooks, (void *)chunk,
+			    chunksize, *zero, *commit);
 			chunk = NULL;
 		}
 	}
@@ -614,7 +614,7 @@ arena_chunk_alloc_internal_hard(arena_t *arena, chunk_hooks_t *chunk_hooks,
 			    LG_PAGE, arena->ind);
 		}
 		chunk_dalloc_wrapper(arena, chunk_hooks, (void *)chunk,
-		    chunksize, *commit);
+		    chunksize, *zero, *commit);
 		chunk = NULL;
 	}
 
@@ -1010,7 +1010,7 @@ arena_chunk_ralloc_huge_expand_hard(arena_t *arena, chunk_hooks_t *chunk_hooks,
 		malloc_mutex_unlock(&arena->lock);
 	} else if (chunk_hooks->merge(chunk, CHUNK_CEILING(oldsize), nchunk,
 	    cdiff, true, arena->ind)) {
-		chunk_dalloc_arena(arena, chunk_hooks, nchunk, cdiff, *zero,
+		chunk_dalloc_wrapper(arena, chunk_hooks, nchunk, cdiff, *zero,
 		    true);
 		err = true;
 	}
@@ -1036,8 +1036,8 @@ arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, size_t oldsize,
 	}
 	arena_nactive_add(arena, udiff >> LG_PAGE);
 
-	err = (chunk_alloc_cache(arena, &arena->chunk_hooks, nchunk, cdiff,
-	    chunksize, zero, true) == NULL);
+	err = (chunk_alloc_cache(arena, &chunk_hooks, nchunk, cdiff, chunksize,
+	    zero, true) == NULL);
 	malloc_mutex_unlock(&arena->lock);
 	if (err) {
 		err = arena_chunk_ralloc_huge_expand_hard(arena, &chunk_hooks,
@@ -1045,7 +1045,7 @@ arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, size_t oldsize,
 		    cdiff);
 	} else if (chunk_hooks.merge(chunk, CHUNK_CEILING(oldsize), nchunk,
 	    cdiff, true, arena->ind)) {
-		chunk_dalloc_arena(arena, &chunk_hooks, nchunk, cdiff, *zero,
+		chunk_dalloc_wrapper(arena, &chunk_hooks, nchunk, cdiff, *zero,
 		    true);
 		err = true;
 	}
@@ -1699,7 +1699,7 @@ arena_unstash_purged(arena_t *arena, chunk_hooks_t *chunk_hooks,
 			extent_node_dirty_remove(chunkselm);
 			arena_node_dalloc(arena, chunkselm);
 			chunkselm = chunkselm_next;
-			chunk_dalloc_arena(arena, chunk_hooks, addr, size,
+			chunk_dalloc_wrapper(arena, chunk_hooks, addr, size,
 			    zeroed, committed);
 		} else {
 			arena_chunk_t *chunk =
diff --git a/src/chunk.c b/src/chunk.c
index b179d213..304d4e5a 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -425,8 +425,8 @@ chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero,
 	arena_t *arena;
 
 	arena = chunk_arena_get(arena_ind);
-	ret = chunk_alloc_core(arena, new_addr, size, alignment, zero,
-	    commit, arena->dss_prec);
+	ret = chunk_alloc_core(arena, new_addr, size, alignment, zero, commit,
+	    arena->dss_prec);
 	if (ret == NULL)
 		return (NULL);
 	if (config_valgrind)
@@ -579,8 +579,18 @@ chunk_dalloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
 	arena_maybe_purge(arena);
 }
 
+static bool
+chunk_dalloc_default(void *chunk, size_t size, bool committed,
+    unsigned arena_ind)
+{
+
+	if (!have_dss || !chunk_in_dss(chunk))
+		return (chunk_dalloc_mmap(chunk, size));
+	return (true);
+}
+
 void
-chunk_dalloc_arena(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
+chunk_dalloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
     size_t size, bool zeroed, bool committed)
 {
 
@@ -604,27 +614,6 @@ chunk_dalloc_arena(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
 	    &arena->chunks_ad_retained, false, chunk, size, zeroed, committed);
 }
 
-static bool
-chunk_dalloc_default(void *chunk, size_t size, bool committed,
-    unsigned arena_ind)
-{
-
-	if (!have_dss || !chunk_in_dss(chunk))
-		return (chunk_dalloc_mmap(chunk, size));
-	return (true);
-}
-
-void
-chunk_dalloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
-    size_t size, bool committed)
-{
-
-	chunk_hooks_assure_initialized(arena, chunk_hooks);
-	chunk_hooks->dalloc(chunk, size, committed, arena->ind);
-	if (config_valgrind && chunk_hooks->dalloc != chunk_dalloc_default)
-		JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(chunk, size);
-}
-
 static bool
 chunk_commit_default(void *chunk, size_t size, size_t offset, size_t length,
     unsigned arena_ind)
@@ -643,8 +632,9 @@ chunk_decommit_default(void *chunk, size_t size, size_t offset, size_t length,
 	    length));
 }
 
-bool
-chunk_purge_arena(arena_t *arena, void *chunk, size_t offset, size_t length)
+static bool
+chunk_purge_default(void *chunk, size_t size, size_t offset, size_t length,
+    unsigned arena_ind)
 {
 
 	assert(chunk != NULL);
@@ -657,15 +647,6 @@ chunk_purge_arena(arena_t *arena, void *chunk, size_t offset, size_t length)
 	    length));
 }
 
-static bool
-chunk_purge_default(void *chunk, size_t size, size_t offset, size_t length,
-    unsigned arena_ind)
-{
-
-	return (chunk_purge_arena(chunk_arena_get(arena_ind), chunk, offset,
-	    length));
-}
-
 bool
 chunk_purge_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
     size_t size, size_t offset, size_t length)
diff --git a/src/chunk_dss.c b/src/chunk_dss.c
index 61fc9169..943d0e98 100644
--- a/src/chunk_dss.c
+++ b/src/chunk_dss.c
@@ -136,7 +136,7 @@ chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size, size_t alignment,
 					    CHUNK_HOOKS_INITIALIZER;
 					chunk_dalloc_wrapper(arena,
 					    &chunk_hooks, cpad, cpad_size,
-					    true);
+					    false, true);
 				}
 				if (*zero) {
 					JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(

From f86bc081d6190be14c64aeaae9d02863b440bfb3 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Thu, 31 Mar 2016 11:19:46 -0700
Subject: [PATCH 0177/2608] Update a comment.

---
 src/arena.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 45964787..38a1ce34 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -668,8 +668,8 @@ arena_chunk_init_hard(arena_t *arena)
 
 	/*
 	 * Initialize the map to contain one maximal free untouched run.  Mark
-	 * the pages as zeroed if chunk_alloc() returned a zeroed or decommitted
-	 * chunk.
+	 * the pages as zeroed if arena_chunk_alloc_internal() returned a zeroed
+	 * or decommitted chunk.
 	 */
 	flag_unzeroed = (zero || !commit) ? 0 : CHUNK_MAP_UNZEROED;
 	flag_decommitted = commit ? 0 : CHUNK_MAP_DECOMMITTED;

From a82070ef5fc3aa81fda43086cdcc22bfa826b894 Mon Sep 17 00:00:00 2001
From: Chris Peterson <cpeterson@mozilla.com>
Date: Sun, 27 Mar 2016 23:28:39 -0700
Subject: [PATCH 0178/2608] Add JEMALLOC_ALLOC_JUNK and JEMALLOC_FREE_JUNK
 macros

Replace hardcoded 0xa5 and 0x5a junk values with JEMALLOC_ALLOC_JUNK and
JEMALLOC_FREE_JUNK macros, respectively.
---
 include/jemalloc/internal/tcache.h |  7 +++---
 include/jemalloc/internal/util.h   |  4 ++++
 src/arena.c                        | 36 ++++++++++++++++--------------
 src/ckh.c                          |  2 +-
 src/huge.c                         | 15 +++++++------
 src/quarantine.c                   |  2 +-
 test/unit/junk.c                   |  6 ++---
 7 files changed, 40 insertions(+), 32 deletions(-)

diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h
index 8357820b..1edd39fd 100644
--- a/include/jemalloc/internal/tcache.h
+++ b/include/jemalloc/internal/tcache.h
@@ -381,9 +381,10 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 		}
 		if (likely(!zero)) {
 			if (slow_path && config_fill) {
-				if (unlikely(opt_junk_alloc))
-					memset(ret, 0xa5, usize);
-				else if (unlikely(opt_zero))
+				if (unlikely(opt_junk_alloc)) {
+					memset(ret, JEMALLOC_ALLOC_JUNK,
+					    usize);
+				} else if (unlikely(opt_zero))
 					memset(ret, 0, usize);
 			}
 		} else
diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index 228584a4..949a0e0a 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -40,6 +40,10 @@
  */
 #define	MALLOC_PRINTF_BUFSIZE	4096
 
+/* Junk fill patterns. */
+#define	JEMALLOC_ALLOC_JUNK	0xa5
+#define	JEMALLOC_FREE_JUNK	0x5a
+
 /*
  * Wrap a cpp argument that contains commas such that it isn't broken up into
  * multiple arguments.
diff --git a/src/arena.c b/src/arena.c
index 38a1ce34..1d30de57 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2249,15 +2249,16 @@ void
 arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, bool zero)
 {
 
+	size_t redzone_size = bin_info->redzone_size;
+
 	if (zero) {
-		size_t redzone_size = bin_info->redzone_size;
-		memset((void *)((uintptr_t)ptr - redzone_size), 0xa5,
-		    redzone_size);
-		memset((void *)((uintptr_t)ptr + bin_info->reg_size), 0xa5,
-		    redzone_size);
+		memset((void *)((uintptr_t)ptr - redzone_size),
+		    JEMALLOC_ALLOC_JUNK, redzone_size);
+		memset((void *)((uintptr_t)ptr + bin_info->reg_size),
+		    JEMALLOC_ALLOC_JUNK, redzone_size);
 	} else {
-		memset((void *)((uintptr_t)ptr - bin_info->redzone_size), 0xa5,
-		    bin_info->reg_interval);
+		memset((void *)((uintptr_t)ptr - redzone_size),
+		    JEMALLOC_ALLOC_JUNK, bin_info->reg_interval);
 	}
 }
 
@@ -2293,22 +2294,22 @@ arena_redzones_validate(void *ptr, arena_bin_info_t *bin_info, bool reset)
 
 		for (i = 1; i <= redzone_size; i++) {
 			uint8_t *byte = (uint8_t *)((uintptr_t)ptr - i);
-			if (*byte != 0xa5) {
+			if (*byte != JEMALLOC_ALLOC_JUNK) {
 				error = true;
 				arena_redzone_corruption(ptr, size, false, i,
 				    *byte);
 				if (reset)
-					*byte = 0xa5;
+					*byte = JEMALLOC_ALLOC_JUNK;
 			}
 		}
 		for (i = 0; i < redzone_size; i++) {
 			uint8_t *byte = (uint8_t *)((uintptr_t)ptr + size + i);
-			if (*byte != 0xa5) {
+			if (*byte != JEMALLOC_ALLOC_JUNK) {
 				error = true;
 				arena_redzone_corruption(ptr, size, true, i,
 				    *byte);
 				if (reset)
-					*byte = 0xa5;
+					*byte = JEMALLOC_ALLOC_JUNK;
 			}
 		}
 	}
@@ -2327,7 +2328,7 @@ arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info)
 	size_t redzone_size = bin_info->redzone_size;
 
 	arena_redzones_validate(ptr, bin_info, false);
-	memset((void *)((uintptr_t)ptr - redzone_size), 0x5a,
+	memset((void *)((uintptr_t)ptr - redzone_size), JEMALLOC_FREE_JUNK,
 	    bin_info->reg_interval);
 }
 #ifdef JEMALLOC_JET
@@ -2458,7 +2459,7 @@ arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
 	if (!zero) {
 		if (config_fill) {
 			if (unlikely(opt_junk_alloc))
-				memset(ret, 0xa5, usize);
+				memset(ret, JEMALLOC_ALLOC_JUNK, usize);
 			else if (unlikely(opt_zero))
 				memset(ret, 0, usize);
 		}
@@ -2563,7 +2564,7 @@ arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 
 	if (config_fill && !zero) {
 		if (unlikely(opt_junk_alloc))
-			memset(ret, 0xa5, usize);
+			memset(ret, JEMALLOC_ALLOC_JUNK, usize);
 		else if (unlikely(opt_zero))
 			memset(ret, 0, usize);
 	}
@@ -2776,7 +2777,7 @@ arena_dalloc_junk_large(void *ptr, size_t usize)
 {
 
 	if (config_fill && unlikely(opt_junk_free))
-		memset(ptr, 0x5a, usize);
+		memset(ptr, JEMALLOC_FREE_JUNK, usize);
 }
 #ifdef JEMALLOC_JET
 #undef arena_dalloc_junk_large
@@ -2977,7 +2978,7 @@ arena_ralloc_junk_large(void *ptr, size_t old_usize, size_t usize)
 {
 
 	if (config_fill && unlikely(opt_junk_free)) {
-		memset((void *)((uintptr_t)ptr + usize), 0x5a,
+		memset((void *)((uintptr_t)ptr + usize), JEMALLOC_FREE_JUNK,
 		    old_usize - usize);
 	}
 }
@@ -3012,7 +3013,8 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t usize_min,
 		    usize_min, usize_max, zero);
 		if (config_fill && !ret && !zero) {
 			if (unlikely(opt_junk_alloc)) {
-				memset((void *)((uintptr_t)ptr + oldsize), 0xa5,
+				memset((void *)((uintptr_t)ptr + oldsize),
+				    JEMALLOC_ALLOC_JUNK,
 				    isalloc(ptr, config_prof) - oldsize);
 			} else if (unlikely(opt_zero)) {
 				memset((void *)((uintptr_t)ptr + oldsize), 0,
diff --git a/src/ckh.c b/src/ckh.c
index 3b423aa2..07b49dd2 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -423,7 +423,7 @@ ckh_delete(tsd_t *tsd, ckh_t *ckh)
 
 	idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true, true);
 	if (config_debug)
-		memset(ckh, 0x5a, sizeof(ckh_t));
+		memset(ckh, JEMALLOC_FREE_JUNK, sizeof(ckh_t));
 }
 
 size_t
diff --git a/src/huge.c b/src/huge.c
index 5f7ceaf1..a63c8258 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -92,7 +92,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 		if (!is_zeroed)
 			memset(ret, 0, usize);
 	} else if (config_fill && unlikely(opt_junk_alloc))
-		memset(ret, 0xa5, usize);
+		memset(ret, JEMALLOC_ALLOC_JUNK, usize);
 
 	arena_decay_tick(tsd, arena);
 	return (ret);
@@ -112,7 +112,7 @@ huge_dalloc_junk(void *ptr, size_t usize)
 		 * unmapped.
 		 */
 		if (!config_munmap || (have_dss && chunk_in_dss(ptr)))
-			memset(ptr, 0x5a, usize);
+			memset(ptr, JEMALLOC_FREE_JUNK, usize);
 	}
 }
 #ifdef JEMALLOC_JET
@@ -147,7 +147,8 @@ huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize_min,
 	if (oldsize > usize) {
 		size_t sdiff = oldsize - usize;
 		if (config_fill && unlikely(opt_junk_free)) {
-			memset((void *)((uintptr_t)ptr + usize), 0x5a, sdiff);
+			memset((void *)((uintptr_t)ptr + usize),
+			    JEMALLOC_FREE_JUNK, sdiff);
 			post_zeroed = false;
 		} else {
 			post_zeroed = !chunk_purge_wrapper(arena, &chunk_hooks,
@@ -174,8 +175,8 @@ huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize_min,
 				    usize - oldsize);
 			}
 		} else if (config_fill && unlikely(opt_junk_alloc)) {
-			memset((void *)((uintptr_t)ptr + oldsize), 0xa5, usize -
-			    oldsize);
+			memset((void *)((uintptr_t)ptr + oldsize),
+			    JEMALLOC_ALLOC_JUNK, usize - oldsize);
 		}
 	}
 }
@@ -268,8 +269,8 @@ huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t usize, bool zero) {
 			    CHUNK_CEILING(oldsize));
 		}
 	} else if (config_fill && unlikely(opt_junk_alloc)) {
-		memset((void *)((uintptr_t)ptr + oldsize), 0xa5, usize -
-		    oldsize);
+		memset((void *)((uintptr_t)ptr + oldsize), JEMALLOC_ALLOC_JUNK,
+		    usize - oldsize);
 	}
 
 	return (false);
diff --git a/src/quarantine.c b/src/quarantine.c
index ff8801cb..c024deab 100644
--- a/src/quarantine.c
+++ b/src/quarantine.c
@@ -160,7 +160,7 @@ quarantine(tsd_t *tsd, void *ptr)
 			    && usize <= SMALL_MAXCLASS)
 				arena_quarantine_junk_small(ptr, usize);
 			else
-				memset(ptr, 0x5a, usize);
+				memset(ptr, JEMALLOC_FREE_JUNK, usize);
 		}
 	} else {
 		assert(quarantine->curbytes == 0);
diff --git a/test/unit/junk.c b/test/unit/junk.c
index b23dd1e9..f4e62261 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -29,7 +29,7 @@ arena_dalloc_junk_small_intercept(void *ptr, arena_bin_info_t *bin_info)
 
 	arena_dalloc_junk_small_orig(ptr, bin_info);
 	for (i = 0; i < bin_info->reg_size; i++) {
-		assert_c_eq(((char *)ptr)[i], 0x5a,
+		assert_c_eq(((char *)ptr)[i], JEMALLOC_FREE_JUNK,
 		    "Missing junk fill for byte %zu/%zu of deallocated region",
 		    i, bin_info->reg_size);
 	}
@@ -44,7 +44,7 @@ arena_dalloc_junk_large_intercept(void *ptr, size_t usize)
 
 	arena_dalloc_junk_large_orig(ptr, usize);
 	for (i = 0; i < usize; i++) {
-		assert_c_eq(((char *)ptr)[i], 0x5a,
+		assert_c_eq(((char *)ptr)[i], JEMALLOC_FREE_JUNK,
 		    "Missing junk fill for byte %zu/%zu of deallocated region",
 		    i, usize);
 	}
@@ -98,7 +98,7 @@ test_junk(size_t sz_min, size_t sz_max)
 
 		for (i = sz_prev; i < sz; i++) {
 			if (opt_junk_alloc) {
-				assert_c_eq(s[i], 0xa5,
+				assert_c_eq(s[i], JEMALLOC_ALLOC_JUNK,
 				    "Newly allocated byte %zu/%zu isn't "
 				    "junk-filled", i, sz);
 			}

From b582d2ad9418630d65540ce8dfa9f96e69eb4df9 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Thu, 31 Mar 2016 12:31:10 -0700
Subject: [PATCH 0179/2608] Update implementation details docs re: PTRDIFF_MAX.

Document that the maximum size class is limited by PTRDIFF_MAX, rather
than the full address space.  This reflects changes that were part of
0c516a00c4cb28cff55ce0995f756b5aae074c9e (Make *allocx() size class
overflow behavior defined.).
---
 doc/jemalloc.xml.in | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index bc5dbd1d..63088cd1 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -540,8 +540,8 @@ for (i = 0; i < nbins; i++) {
     are smaller than four times the page size, large size classes are smaller
     than the chunk size (see the <link
     linkend="opt.lg_chunk"><mallctl>opt.lg_chunk</mallctl></link> option), and
-    huge size classes extend from the chunk size up to one size class less than
-    the full address space size.</para>
+    huge size classes extend from the chunk size up to the largest size class
+    that does not exceed <constant>PTRDIFF_MAX</constant>.</para>
 
     <para>Allocations are packed tightly together, which can be an issue for
     multi-threaded applications.  If you need to assure that allocations do not
@@ -659,7 +659,7 @@ for (i = 0; i < nbins; i++) {
           <entry>[1280 KiB, 1536 KiB, 1792 KiB]</entry>
         </row>
         <row>
-          <entry morerows="6">Huge</entry>
+          <entry morerows="8">Huge</entry>
           <entry>256 KiB</entry>
           <entry>[2 MiB]</entry>
         </row>
@@ -687,6 +687,14 @@ for (i = 0; i < nbins; i++) {
           <entry>...</entry>
           <entry>...</entry>
         </row>
+        <row>
+          <entry>512 PiB</entry>
+          <entry>[2560 PiB, 3 EiB, 3584 PiB, 4 EiB]</entry>
+        </row>
+        <row>
+          <entry>1 EiB</entry>
+          <entry>[5 EiB, 6 EiB, 7 EiB]</entry>
+        </row>
       </tbody>
       </tgroup>
     </table>

From a3c4193280b2fbd267b68f3bce091a53b5ea0b97 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 5 Apr 2016 16:32:32 -0700
Subject: [PATCH 0180/2608] Fix a compilation warning in the ph test code.

---
 test/unit/ph.c | 21 +--------------------
 1 file changed, 1 insertion(+), 20 deletions(-)

diff --git a/test/unit/ph.c b/test/unit/ph.c
index b0e44028..103475b4 100644
--- a/test/unit/ph.c
+++ b/test/unit/ph.c
@@ -19,32 +19,14 @@ TEST_END
 TEST_BEGIN(test_ph_random)
 {
 #define	NNODES 25
-#define	NBAGS 250
 #define	SEED 42
 	sfmt_t *sfmt;
-	uint64_t bag[NNODES];
 	ph_heap_t heap;
 	node_t nodes[NNODES];
 	unsigned i, j, k;
 
 	sfmt = init_gen_rand(SEED);
-	for (i = 0; i < NBAGS; i++) {
-		switch (i) {
-		case 0:
-			/* Insert in order. */
-			for (j = 0; j < NNODES; j++)
-				bag[j] = j;
-			break;
-		case 1:
-			/* Insert in reverse order. */
-			for (j = 0; j < NNODES; j++)
-				bag[j] = NNODES - j - 1;
-			break;
-		default:
-			for (j = 0; j < NNODES; j++)
-				bag[j] = gen_rand64_range(sfmt, NNODES);
-		}
-
+	for (i = 0; i < 2; i++) {
 		for (j = 1; j <= NNODES; j++) {
 			/* Initialize heap and nodes. */
 			ph_new(&heap);
@@ -77,7 +59,6 @@ TEST_BEGIN(test_ph_random)
 	}
 	fini_gen_rand(sfmt);
 #undef NNODES
-#undef NBAGS
 #undef SEED
 }
 TEST_END

From 4a8abbb400afe695f145a487380c04a946500bc6 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 6 Apr 2016 10:32:06 -0700
Subject: [PATCH 0181/2608] Fix bitmap_sfu() regression.

Fix bitmap_sfu() to shift by LG_BITMAP_GROUP_NBITS rather than
hard-coded 6 when using linear (non-USE_TREE) bitmap search.  In
practice this affects only 64-bit systems for which sizeof(long) is not
8 (i.e. Windows), since USE_TREE is defined for 32-bit systems.

This regression was caused by b8823ab02607d6f03febd32ac504bb6188c54047
(Use linear scan for small bitmaps).

This resolves #368.
---
 include/jemalloc/internal/bitmap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h
index 2594e3a4..0e0d2476 100644
--- a/include/jemalloc/internal/bitmap.h
+++ b/include/jemalloc/internal/bitmap.h
@@ -223,7 +223,7 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo)
 		i++;
 		g = bitmap[i];
 	}
-	bit = (bit - 1) + (i << 6);
+	bit = (bit - 1) + (i << LG_BITMAP_GROUP_NBITS);
 #endif
 	bitmap_set(bitmap, binfo, bit);
 	return (bit);

From 2ee2f1ec57d9094643db60210c28b989f2e7da83 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 6 Apr 2016 10:38:47 -0700
Subject: [PATCH 0182/2608] Reduce differences between alternative bitmap
 implementations.

---
 include/jemalloc/internal/bitmap.h |  2 +-
 src/bitmap.c                       | 11 ++++-------
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h
index 0e0d2476..894695f4 100644
--- a/include/jemalloc/internal/bitmap.h
+++ b/include/jemalloc/internal/bitmap.h
@@ -223,7 +223,7 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo)
 		i++;
 		g = bitmap[i];
 	}
-	bit = (bit - 1) + (i << LG_BITMAP_GROUP_NBITS);
+	bit = (i << LG_BITMAP_GROUP_NBITS) + (bit - 1);
 #endif
 	bitmap_set(bitmap, binfo, bit);
 	return (bit);
diff --git a/src/bitmap.c b/src/bitmap.c
index b1e66271..ac0f3b38 100644
--- a/src/bitmap.c
+++ b/src/bitmap.c
@@ -74,15 +74,11 @@ bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo)
 void
 bitmap_info_init(bitmap_info_t *binfo, size_t nbits)
 {
-	size_t i;
 
 	assert(nbits > 0);
 	assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS));
 
-	i = nbits >> LG_BITMAP_GROUP_NBITS;
-	if (nbits % BITMAP_GROUP_NBITS != 0)
-		i++;
-	binfo->ngroups = i;
+	binfo->ngroups = BITMAP_BITS2GROUPS(nbits);
 	binfo->nbits = nbits;
 }
 
@@ -99,9 +95,10 @@ bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo)
 	size_t extra;
 
 	memset(bitmap, 0xffU, bitmap_size(binfo));
-	extra = (binfo->nbits % (binfo->ngroups * BITMAP_GROUP_NBITS));
+	extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK))
+	    & BITMAP_GROUP_NBITS_MASK;
 	if (extra != 0)
-		bitmap[binfo->ngroups - 1] >>= (BITMAP_GROUP_NBITS - extra);
+		bitmap[binfo->ngroups - 1] >>= extra;
 }
 
 #endif /* USE_TREE */

From c6a2c39404df9a3fb27735b93cf4cb3a76a2d4a7 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 26 Mar 2016 17:30:37 -0700
Subject: [PATCH 0183/2608] Refactor/fix ph.

Refactor ph to support configurable comparison functions.  Use a cpp
macro code generation form equivalent to the rb macros so that pairing
heaps can be used for both run heaps and chunk heaps.

Remove per node parent pointers, and instead use leftmost siblings' prev
pointers to track parents.

Fix multi-pass sibling merging to iterate over intermediate results
using a FIFO, rather than a LIFO.  Use this fixed sibling merging
implementation for both merge phases of the auxiliary twopass algorithm
(first merging the aux list, then replacing the root with its merged
children).  This fixes both degenerate merge behavior and the potential
for deep recursion.

This regression was introduced by
6bafa6678fc36483e638f1c3a0a9bf79fb89bfc9 (Pairing heap).

This resolves #371.
---
 Makefile.in                                   |   1 -
 include/jemalloc/internal/arena.h             |  20 +-
 .../jemalloc/internal/jemalloc_internal.h.in  |   5 +-
 include/jemalloc/internal/ph.h                | 552 ++++++++++--------
 include/jemalloc/internal/private_symbols.txt |  16 +-
 src/arena.c                                   |  95 ++-
 src/ph.c                                      |   2 -
 test/unit/ph.c                                | 257 +++++++-
 8 files changed, 613 insertions(+), 335 deletions(-)
 delete mode 100644 src/ph.c

diff --git a/Makefile.in b/Makefile.in
index 7f2d668a..480ce1a1 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -95,7 +95,6 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/mutex.c \
 	$(srcroot)src/nstime.c \
 	$(srcroot)src/pages.c \
-	$(srcroot)src/ph.c \
 	$(srcroot)src/prng.c \
 	$(srcroot)src/prof.c \
 	$(srcroot)src/quarantine.c \
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 09ae6894..6f0fa76a 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -160,7 +160,7 @@ struct arena_chunk_map_misc_s {
 	 * 2) arena_run_t conceptually uses this linkage for in-use non-full
 	 *    runs, rather than directly embedding linkage.
 	 */
-	ph_node_t				ph_link;
+	phn(arena_chunk_map_misc_t)		ph_link;
 
 	union {
 		/* Linkage for list of dirty runs. */
@@ -176,6 +176,7 @@ struct arena_chunk_map_misc_s {
 		arena_run_t			run;
 	};
 };
+typedef ph(arena_chunk_map_misc_t) arena_run_heap_t;
 #endif /* JEMALLOC_ARENA_STRUCTS_A */
 
 #ifdef JEMALLOC_ARENA_STRUCTS_B
@@ -278,7 +279,7 @@ struct arena_bin_s {
 	 * objects packed well, and it can also help reduce the number of
 	 * almost-empty chunks.
 	 */
-	ph_heap_t		runs;
+	arena_run_heap_t	runs;
 
 	/* Bin statistics. */
 	malloc_bin_stats_t	stats;
@@ -460,7 +461,7 @@ struct arena_s {
 	 * Quantized address-ordered heaps of this arena's available runs.  The
 	 * heaps are used for first-best-fit run allocation.
 	 */
-	ph_heap_t		runs_avail[1]; /* Dynamically sized. */
+	arena_run_heap_t	runs_avail[1]; /* Dynamically sized. */
 };
 
 /* Used in conjunction with tsd for fast arena-related context lookup. */
@@ -604,7 +605,6 @@ const arena_chunk_map_misc_t	*arena_miscelm_get_const(
 size_t	arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm);
 void	*arena_miscelm_to_rpages(const arena_chunk_map_misc_t *miscelm);
 arena_chunk_map_misc_t	*arena_rd_to_miscelm(arena_runs_dirty_link_t *rd);
-arena_chunk_map_misc_t	*arena_ph_to_miscelm(ph_node_t *ph);
 arena_chunk_map_misc_t	*arena_run_to_miscelm(arena_run_t *run);
 size_t	*arena_mapbitsp_get_mutable(arena_chunk_t *chunk, size_t pageind);
 const size_t	*arena_mapbitsp_get_const(const arena_chunk_t *chunk,
@@ -734,18 +734,6 @@ arena_rd_to_miscelm(arena_runs_dirty_link_t *rd)
 	return (miscelm);
 }
 
-JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t *
-arena_ph_to_miscelm(ph_node_t *ph)
-{
-	arena_chunk_map_misc_t *miscelm = (arena_chunk_map_misc_t *)
-	    ((uintptr_t)ph - offsetof(arena_chunk_map_misc_t, ph_link));
-
-	assert(arena_miscelm_to_pageind(miscelm) >= map_bias);
-	assert(arena_miscelm_to_pageind(miscelm) < chunk_npages);
-
-	return (miscelm);
-}
-
 JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t *
 arena_run_to_miscelm(arena_run_t *run)
 {
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index c1cccd64..55ca7140 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -161,6 +161,7 @@ static const bool config_cache_oblivious =
 #include <malloc/malloc.h>
 #endif
 
+#include "jemalloc/internal/ph.h"
 #define	RB_COMPACT
 #include "jemalloc/internal/rb.h"
 #include "jemalloc/internal/qr.h"
@@ -371,7 +372,6 @@ typedef unsigned szind_t;
 #include "jemalloc/internal/tsd.h"
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/extent.h"
-#include "jemalloc/internal/ph.h"
 #include "jemalloc/internal/arena.h"
 #include "jemalloc/internal/bitmap.h"
 #include "jemalloc/internal/base.h"
@@ -402,7 +402,6 @@ typedef unsigned szind_t;
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/bitmap.h"
-#include "jemalloc/internal/ph.h"
 #define	JEMALLOC_ARENA_STRUCTS_A
 #include "jemalloc/internal/arena.h"
 #undef JEMALLOC_ARENA_STRUCTS_A
@@ -495,7 +494,6 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/bitmap.h"
 #include "jemalloc/internal/extent.h"
-#include "jemalloc/internal/ph.h"
 #include "jemalloc/internal/arena.h"
 #include "jemalloc/internal/base.h"
 #include "jemalloc/internal/rtree.h"
@@ -527,7 +525,6 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/tsd.h"
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/extent.h"
-#include "jemalloc/internal/ph.h"
 #include "jemalloc/internal/base.h"
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/pages.h"
diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index 519f0dda..70b6e2cd 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -4,257 +4,341 @@
  * "The Pairing Heap: A New Form of Self-Adjusting Heap"
  * https://www.cs.cmu.edu/~sleator/papers/pairing-heaps.pdf
  *
- * With auxiliary list, described in a follow on paper
+ * With auxiliary twopass list, described in a follow on paper.
  *
  * "Pairing Heaps: Experiments and Analysis"
  * http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.106.2988&rep=rep1&type=pdf
  *
- * Where search/nsearch/last are not needed, ph.h outperforms rb.h by ~7x fewer
- * cpu cycles, and ~4x fewer memory references.
- *
- * Tagging parent/prev pointers on the next list was also described in the
- * original paper, such that only two pointers are needed.  This is not
- * implemented here, as it substantially increases the memory references
- * needed when ph_remove is called, almost overshadowing the other performance
- * gains.
- *
  *******************************************************************************
  */
-#ifdef JEMALLOC_H_TYPES
 
-typedef struct ph_node_s ph_node_t;
-typedef struct ph_heap_s ph_heap_t;
+#ifndef PH_H_
+#define	PH_H_
 
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-struct ph_node_s {
-	ph_node_t	*subheaps;
-	ph_node_t	*parent;
-	ph_node_t	*next;
-	ph_node_t	*prev;
-};
-
-struct ph_heap_s {
-	ph_node_t	*root;
-};
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#ifndef JEMALLOC_ENABLE_INLINE
-ph_node_t	*ph_merge_ordered(ph_node_t *heap1, ph_node_t *heap2);
-ph_node_t	*ph_merge(ph_node_t *heap1, ph_node_t *heap2);
-ph_node_t	*ph_merge_pairs(ph_node_t *subheaps);
-void	ph_merge_aux_list(ph_heap_t *l);
-void	ph_new(ph_heap_t *n);
-ph_node_t	*ph_first(ph_heap_t *l);
-void	ph_insert(ph_heap_t *l, ph_node_t *n);
-ph_node_t	*ph_remove_first(ph_heap_t *l);
-void	ph_remove(ph_heap_t *l, ph_node_t *n);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PH_C_))
-
-/* Helper routines ************************************************************/
-
-JEMALLOC_INLINE ph_node_t *
-ph_merge_ordered(ph_node_t *heap1, ph_node_t *heap2)
-{
-
-	assert(heap1 != NULL);
-	assert(heap2 != NULL);
-	assert ((uintptr_t)heap1 <= (uintptr_t)heap2);
-
-	heap2->parent = heap1;
-	heap2->prev = NULL;
-	heap2->next = heap1->subheaps;
-	if (heap1->subheaps != NULL)
-		heap1->subheaps->prev = heap2;
-	heap1->subheaps = heap2;
-	return (heap1);
+/* Node structure. */
+#define	phn(a_type)							\
+struct {								\
+	a_type	*phn_prev;						\
+	a_type	*phn_next;						\
+	a_type	*phn_lchild;						\
 }
 
-JEMALLOC_INLINE ph_node_t *
-ph_merge(ph_node_t *heap1, ph_node_t *heap2)
-{
-
-	if (heap1 == NULL)
-		return (heap2);
-	if (heap2 == NULL)
-		return (heap1);
-	/* Optional: user-settable comparison function */
-	if ((uintptr_t)heap1 < (uintptr_t)heap2)
-		return (ph_merge_ordered(heap1, heap2));
-	else
-		return (ph_merge_ordered(heap2, heap1));
+/* Root structure. */
+#define	ph(a_type)							\
+struct {								\
+	a_type	*ph_root;						\
 }
 
-JEMALLOC_INLINE ph_node_t *
-ph_merge_pairs(ph_node_t *subheaps)
-{
+/* Internal utility macros. */
+#define	phn_lchild_get(a_type, a_field, a_phn)				\
+	(a_phn->a_field.phn_lchild)
+#define	phn_lchild_set(a_type, a_field, a_phn, a_lchild) do {		\
+	a_phn->a_field.phn_lchild = a_lchild;				\
+} while (0)
 
-	if (subheaps == NULL)
-		return (NULL);
-	if (subheaps->next == NULL)
-		return (subheaps);
-	{
-		ph_node_t *l0 = subheaps;
-		ph_node_t *l1 = l0->next;
-		ph_node_t *lrest = l1->next;
+#define	phn_next_get(a_type, a_field, a_phn)				\
+	(a_phn->a_field.phn_next)
+#define	phn_prev_set(a_type, a_field, a_phn, a_prev) do {		\
+	a_phn->a_field.phn_prev = a_prev;				\
+} while (0)
 
-		if (lrest != NULL)
-			lrest->prev = NULL;
-		l1->next = NULL;
-		l1->prev = NULL;
-		l0->next = NULL;
-		l0->prev = NULL;
-		return (ph_merge(ph_merge(l0, l1), ph_merge_pairs(lrest)));
-	}
-}
+#define	phn_prev_get(a_type, a_field, a_phn)				\
+	(a_phn->a_field.phn_prev)
+#define	phn_next_set(a_type, a_field, a_phn, a_next) do {		\
+	a_phn->a_field.phn_next = a_next;				\
+} while (0)
+
+#define	phn_merge_ordered(a_type, a_field, a_phn0, a_phn1, a_cmp) do {	\
+	a_type *phn0child;						\
+									\
+	assert(a_phn0 != NULL);						\
+	assert(a_phn1 != NULL);						\
+	assert(a_cmp(a_phn0, a_phn1) <= 0);				\
+									\
+	phn_prev_set(a_type, a_field, a_phn1, a_phn0);			\
+	phn0child = phn_lchild_get(a_type, a_field, a_phn0);		\
+	phn_next_set(a_type, a_field, a_phn1, phn0child);		\
+	if (phn0child != NULL)						\
+		phn_prev_set(a_type, a_field, phn0child, a_phn1);	\
+	phn_lchild_set(a_type, a_field, a_phn0, a_phn1);		\
+} while (0)
+
+#define	phn_merge(a_type, a_field, a_phn0, a_phn1, a_cmp, r_phn) do {	\
+	if (a_phn0 == NULL)						\
+		r_phn = a_phn1;						\
+	else if (a_phn1 == NULL)					\
+		r_phn = a_phn0;						\
+	else if (a_cmp(a_phn0, a_phn1) < 0) {				\
+		phn_merge_ordered(a_type, a_field, a_phn0, a_phn1,	\
+		    a_cmp);						\
+		r_phn = a_phn0;						\
+	} else {							\
+		phn_merge_ordered(a_type, a_field, a_phn1, a_phn0,	\
+		    a_cmp);						\
+		r_phn = a_phn1;						\
+	}								\
+} while (0)
+
+#define	ph_merge_siblings(a_type, a_field, a_phn, a_cmp, r_phn) do {	\
+	a_type *head = NULL;						\
+	a_type *tail = NULL;						\
+	a_type *phn0 = a_phn;						\
+	a_type *phn1 = phn_next_get(a_type, a_field, phn0);		\
+									\
+	/*								\
+	 * Multipass merge, wherein the first two elements of a FIFO	\
+	 * are repeatedly merged, and each result is appended to the	\
+	 * singly linked FIFO, until the FIFO contains only a single	\
+	 * element.  We start with a sibling list but no reference to	\
+	 * its tail, so we do a single pass over the sibling list to	\
+	 * populate the FIFO.						\
+	 */								\
+	if (phn1 != NULL) {						\
+		a_type *phnrest = phn_next_get(a_type, a_field, phn1);	\
+		if (phnrest != NULL)					\
+			phn_prev_set(a_type, a_field, phnrest, NULL);	\
+		phn_prev_set(a_type, a_field, phn0, NULL);		\
+		phn_next_set(a_type, a_field, phn0, NULL);		\
+		phn_prev_set(a_type, a_field, phn1, NULL);		\
+		phn_next_set(a_type, a_field, phn1, NULL);		\
+		phn_merge(a_type, a_field, phn0, phn1, a_cmp, phn0);	\
+		head = tail = phn0;					\
+		phn0 = phnrest;						\
+		while (phn0 != NULL) {					\
+			phn1 = phn_next_get(a_type, a_field, phn0);	\
+			if (phn1 != NULL) {				\
+				phnrest = phn_next_get(a_type, a_field,	\
+				    phn1);				\
+				if (phnrest != NULL) {			\
+					phn_prev_set(a_type, a_field,	\
+					    phnrest, NULL);		\
+				}					\
+				phn_prev_set(a_type, a_field, phn0,	\
+				    NULL);				\
+				phn_next_set(a_type, a_field, phn0,	\
+				    NULL);				\
+				phn_prev_set(a_type, a_field, phn1,	\
+				    NULL);				\
+				phn_next_set(a_type, a_field, phn1,	\
+				    NULL);				\
+				phn_merge(a_type, a_field, phn0, phn1,	\
+				    a_cmp, phn0);			\
+				phn_next_set(a_type, a_field, tail,	\
+				    phn0);				\
+				tail = phn0;				\
+				phn0 = phnrest;				\
+			} else {					\
+				phn_next_set(a_type, a_field, tail,	\
+				    phn0);				\
+				tail = phn0;				\
+				phn0 = NULL;				\
+			}						\
+		}							\
+		phn0 = head;						\
+		phn1 = phn_next_get(a_type, a_field, phn0);		\
+		if (phn1 != NULL) {					\
+			while (true) {					\
+				head = phn_next_get(a_type, a_field,	\
+				    phn1);				\
+				assert(phn_prev_get(a_type, a_field,	\
+				    phn0) == NULL);			\
+				phn_next_set(a_type, a_field, phn0,	\
+				    NULL);				\
+				assert(phn_prev_get(a_type, a_field,	\
+				    phn1) == NULL);			\
+				phn_next_set(a_type, a_field, phn1,	\
+				    NULL);				\
+				phn_merge(a_type, a_field, phn0, phn1,	\
+				    a_cmp, phn0);			\
+				if (head == NULL)			\
+					break;				\
+				phn_next_set(a_type, a_field, tail,	\
+				    phn0);				\
+				tail = phn0;				\
+				phn0 = head;				\
+				phn1 = phn_next_get(a_type, a_field,	\
+				    phn0);				\
+			}						\
+		}							\
+	}								\
+	r_phn = phn0;							\
+} while (0)
+
+#define	ph_merge_aux(a_type, a_field, a_ph, a_cmp) do {			\
+	a_type *phn = phn_next_get(a_type, a_field, a_ph->ph_root);	\
+	if (phn != NULL) {						\
+		phn_prev_set(a_type, a_field, a_ph->ph_root, NULL);	\
+		phn_next_set(a_type, a_field, a_ph->ph_root, NULL);	\
+		phn_prev_set(a_type, a_field, phn, NULL);		\
+		ph_merge_siblings(a_type, a_field, phn, a_cmp, phn);	\
+		assert(phn_next_get(a_type, a_field, phn) == NULL);	\
+		phn_merge(a_type, a_field, a_ph->ph_root, phn, a_cmp,	\
+		    a_ph->ph_root);					\
+	}								\
+} while (0)
+
+#define	ph_merge_children(a_type, a_field, a_phn, a_cmp, r_phn) do {	\
+	a_type *lchild = phn_lchild_get(a_type, a_field, a_phn);	\
+	if (lchild == NULL)						\
+		r_phn = NULL;						\
+	else {								\
+		ph_merge_siblings(a_type, a_field, lchild, a_cmp,	\
+		    r_phn);						\
+	}								\
+} while (0)
 
 /*
- * Merge the aux list into the root node.
+ * The ph_proto() macro generates function prototypes that correspond to the
+ * functions generated by an equivalently parameterized call to ph_gen().
  */
-JEMALLOC_INLINE void
-ph_merge_aux_list(ph_heap_t *l)
-{
+#define	ph_proto(a_attr, a_prefix, a_ph_type, a_type)			\
+a_attr void	a_prefix##new(a_ph_type *ph);				\
+a_attr bool	a_prefix##empty(a_ph_type *ph);				\
+a_attr a_type	*a_prefix##first(a_ph_type *ph);			\
+a_attr void	a_prefix##insert(a_ph_type *ph, a_type *phn);		\
+a_attr a_type	*a_prefix##remove_first(a_ph_type *ph);			\
+a_attr void	a_prefix##remove(a_ph_type *ph, a_type *phn);
 
-	if (l->root == NULL)
-		return;
-	if (l->root->next != NULL) {
-		ph_node_t *l0 = l->root->next;
-		ph_node_t *l1 = l0->next;
-		ph_node_t *lrest = NULL;
-
-		/* Multipass merge. */
-		while (l1 != NULL) {
-			lrest = l1->next;
-			if (lrest != NULL)
-				lrest->prev = NULL;
-			l1->next = NULL;
-			l1->prev = NULL;
-			l0->next = NULL;
-			l0->prev = NULL;
-			l0 = ph_merge(l0, l1);
-			l1 = lrest;
-		}
-		l->root->next = NULL;
-		l->root = ph_merge(l->root, l0);
-	}
+/*
+ * The ph_gen() macro generates a type-specific pairing heap implementation,
+ * based on the above cpp macros.
+ */
+#define	ph_gen(a_attr, a_prefix, a_ph_type, a_type, a_field, a_cmp)	\
+a_attr void								\
+a_prefix##new(a_ph_type *ph)						\
+{									\
+									\
+	memset(ph, 0, sizeof(ph(a_type)));				\
+}									\
+a_attr bool								\
+a_prefix##empty(a_ph_type *ph) {					\
+									\
+	return (ph->ph_root == NULL);					\
+}									\
+a_attr a_type *								\
+a_prefix##first(a_ph_type *ph)						\
+{									\
+									\
+	if (ph->ph_root == NULL)					\
+		return (NULL);						\
+	ph_merge_aux(a_type, a_field, ph, a_cmp);			\
+	return (ph->ph_root);						\
+}									\
+a_attr void								\
+a_prefix##insert(a_ph_type *ph, a_type *phn)				\
+{									\
+									\
+	memset(&phn->a_field, 0, sizeof(phn(a_type)));			\
+									\
+	/*								\
+	 * Treat the root as an aux list during insertion, and lazily	\
+	 * merge during a_prefix##remove_first().  For elements that	\
+	 * are inserted, then removed via a_prefix##remove() before the	\
+	 * aux list is ever processed, this makes insert/remove		\
+	 * constant-time, whereas eager merging would make insert	\
+	 * O(log n).							\
+	 */								\
+	if (ph->ph_root == NULL)					\
+		ph->ph_root = phn;					\
+	else {								\
+		phn_next_set(a_type, a_field, phn, phn_next_get(a_type,	\
+		    a_field, ph->ph_root));				\
+		if (phn_next_get(a_type, a_field, ph->ph_root) !=	\
+		    NULL) {						\
+			phn_prev_set(a_type, a_field,			\
+			    phn_next_get(a_type, a_field, ph->ph_root),	\
+			    phn);					\
+		}							\
+		phn_prev_set(a_type, a_field, phn, ph->ph_root);	\
+		phn_next_set(a_type, a_field, ph->ph_root, phn);	\
+	}								\
+}									\
+a_attr a_type *								\
+a_prefix##remove_first(a_ph_type *ph)					\
+{									\
+	a_type *ret;							\
+									\
+	if (ph->ph_root == NULL)					\
+		return (NULL);						\
+	ph_merge_aux(a_type, a_field, ph, a_cmp);			\
+									\
+	ret = ph->ph_root;						\
+									\
+	ph_merge_children(a_type, a_field, ph->ph_root, a_cmp,		\
+	    ph->ph_root);						\
+									\
+	return (ret);							\
+}									\
+a_attr void								\
+a_prefix##remove(a_ph_type *ph, a_type *phn)				\
+{									\
+	a_type *replace, *parent;					\
+									\
+	/*								\
+	 * We can delete from aux list without merging it, but we need	\
+	 * to merge if we are dealing with the root node.		\
+	 */								\
+	if (ph->ph_root == phn) {					\
+		ph_merge_aux(a_type, a_field, ph, a_cmp);		\
+		if (ph->ph_root == phn) {				\
+			ph_merge_children(a_type, a_field, ph->ph_root,	\
+			    a_cmp, ph->ph_root);			\
+			return;						\
+		}							\
+	}								\
+									\
+	/* Get parent (if phn is leftmost child) before mutating. */	\
+	if ((parent = phn_prev_get(a_type, a_field, phn)) != NULL) {	\
+		if (phn_lchild_get(a_type, a_field, parent) != phn)	\
+			parent = NULL;					\
+	}								\
+	/* Find a possible replacement node, and link to parent. */	\
+	ph_merge_children(a_type, a_field, phn, a_cmp, replace);	\
+	/* Set next/prev for sibling linked list. */			\
+	if (replace != NULL) {						\
+		if (parent != NULL) {					\
+			phn_prev_set(a_type, a_field, replace, parent);	\
+			phn_lchild_set(a_type, a_field, parent,		\
+			    replace);					\
+		} else {						\
+			phn_prev_set(a_type, a_field, replace,		\
+			    phn_prev_get(a_type, a_field, phn));	\
+			if (phn_prev_get(a_type, a_field, phn) !=	\
+			    NULL) {					\
+				phn_next_set(a_type, a_field,		\
+				    phn_prev_get(a_type, a_field, phn),	\
+				    replace);				\
+			}						\
+		}							\
+		phn_next_set(a_type, a_field, replace,			\
+		    phn_next_get(a_type, a_field, phn));		\
+		if (phn_next_get(a_type, a_field, phn) != NULL) {	\
+			phn_prev_set(a_type, a_field,			\
+			    phn_next_get(a_type, a_field, phn),		\
+			    replace);					\
+		}							\
+	} else {							\
+		if (parent != NULL) {					\
+			a_type *next = phn_next_get(a_type, a_field,	\
+			    phn);					\
+			phn_lchild_set(a_type, a_field, parent, next);	\
+			if (next != NULL) {				\
+				phn_prev_set(a_type, a_field, next,	\
+				    parent);				\
+			}						\
+		} else {						\
+			assert(phn_prev_get(a_type, a_field, phn) !=	\
+			    NULL);					\
+			phn_next_set(a_type, a_field,			\
+			    phn_prev_get(a_type, a_field, phn),		\
+			    phn_next_get(a_type, a_field, phn));	\
+		}							\
+		if (phn_next_get(a_type, a_field, phn) != NULL) {	\
+			phn_prev_set(a_type, a_field,			\
+			    phn_next_get(a_type, a_field, phn),		\
+			    phn_prev_get(a_type, a_field, phn));	\
+		}							\
+	}								\
 }
 
-/* User API *******************************************************************/
-
-JEMALLOC_INLINE void
-ph_new(ph_heap_t *n)
-{
-
-	memset(n, 0, sizeof(ph_heap_t));
-}
-
-JEMALLOC_INLINE ph_node_t *
-ph_first(ph_heap_t *l)
-{
-
-	/*
-	 * For the cost of an extra pointer, a l->min could be stored instead of
-	 * merging the aux list here.  Current users always call ph_remove(l,
-	 * ph_first(l)) though, and the aux list must always be merged for
-	 * delete of the min node anyway.
-	 */
-	ph_merge_aux_list(l);
-	return (l->root);
-}
-
-JEMALLOC_INLINE void
-ph_insert(ph_heap_t *l, ph_node_t *n)
-{
-
-	memset(n, 0, sizeof(ph_node_t));
-
-	/*
-	 * Non-aux list insert:
-	 *
-	 * l->root = ph_merge(l->root, n);
-	 *
-	 * Aux list insert:
-	 */
-	if (l->root == NULL)
-		l->root = n;
-	else {
-		n->next = l->root->next;
-		if (l->root->next != NULL)
-			l->root->next->prev = n;
-		n->prev = l->root;
-		l->root->next = n;
-	}
-}
-
-JEMALLOC_INLINE ph_node_t *
-ph_remove_first(ph_heap_t *l)
-{
-	ph_node_t *ret;
-
-	ph_merge_aux_list(l);
-	if (l->root == NULL)
-		return (NULL);
-
-	ret = l->root;
-
-	l->root = ph_merge_pairs(l->root->subheaps);
-
-	return (ret);
-}
-
-JEMALLOC_INLINE void
-ph_remove(ph_heap_t *l, ph_node_t *n)
-{
-	ph_node_t *replace;
-
-	/*
-	 * We can delete from aux list without merging it, but we need to merge
-	 * if we are dealing with the root node.
-	 */
-	if (l->root == n) {
-		ph_merge_aux_list(l);
-		if (l->root == n) {
-			ph_remove_first(l);
-			return;
-		}
-	}
-
-	/* Find a possible replacement node, and link to parent. */
-	replace = ph_merge_pairs(n->subheaps);
-	if (n->parent != NULL && n->parent->subheaps == n) {
-		if (replace != NULL)
-			n->parent->subheaps = replace;
-		else
-			n->parent->subheaps = n->next;
-	}
-	/* Set next/prev for sibling linked list. */
-	if (replace != NULL) {
-		replace->parent = n->parent;
-		replace->prev = n->prev;
-		if (n->prev != NULL)
-			n->prev->next = replace;
-		replace->next = n->next;
-		if (n->next != NULL)
-			n->next->prev = replace;
-	} else {
-		if (n->prev != NULL)
-			n->prev->next = n->next;
-		if (n->next != NULL)
-			n->next->prev = n->prev;
-	}
-}
-#endif
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
+#endif /* PH_H_ */
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 969c73df..551cb937 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -82,7 +82,6 @@ arena_nthreads_dec
 arena_nthreads_get
 arena_nthreads_inc
 arena_palloc
-arena_ph_to_miscelm
 arena_postfork_child
 arena_postfork_parent
 arena_prefork
@@ -101,6 +100,12 @@ arena_ralloc_junk_large
 arena_ralloc_no_move
 arena_rd_to_miscelm
 arena_redzone_corruption
+arena_run_heap_empty
+arena_run_heap_first
+arena_run_heap_insert
+arena_run_heap_new
+arena_run_heap_remove_first
+arena_run_heap_remove
 arena_run_regind
 arena_run_to_miscelm
 arena_salloc
@@ -381,15 +386,6 @@ pages_map
 pages_purge
 pages_trim
 pages_unmap
-ph_first
-ph_insert
-ph_merge
-ph_merge_aux_list
-ph_merge_ordered
-ph_merge_pairs
-ph_new
-ph_remove_first
-ph_remove
 pow2_ceil_u32
 pow2_ceil_u64
 pow2_ceil_zu
diff --git a/src/arena.c b/src/arena.c
index 1d30de57..d884dc4c 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -59,6 +59,23 @@ arena_miscelm_size_get(const arena_chunk_map_misc_t *miscelm)
 	return (arena_mapbits_size_decode(mapbits));
 }
 
+JEMALLOC_INLINE_C int
+arena_run_addr_comp(const arena_chunk_map_misc_t *a,
+    const arena_chunk_map_misc_t *b)
+{
+	uintptr_t a_miscelm = (uintptr_t)a;
+	uintptr_t b_miscelm = (uintptr_t)b;
+
+	assert(a != NULL);
+	assert(b != NULL);
+
+	return ((a_miscelm > b_miscelm) - (a_miscelm < b_miscelm));
+}
+
+/* Generate pairing heap functions. */
+ph_gen(static UNUSED, arena_run_heap_, arena_run_heap_t, arena_chunk_map_misc_t,
+    ph_link, arena_run_addr_comp)
+
 static size_t
 run_quantize_floor_compute(size_t size)
 {
@@ -182,7 +199,7 @@ run_quantize_ceil(size_t size)
 run_quantize_t *run_quantize_ceil = JEMALLOC_N(run_quantize_ceil_impl);
 #endif
 
-static ph_heap_t *
+static arena_run_heap_t *
 arena_runs_avail_get(arena_t *arena, szind_t ind)
 {
 
@@ -200,8 +217,8 @@ arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
 	    arena_miscelm_get_const(chunk, pageind))));
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
-	ph_insert(arena_runs_avail_get(arena, ind),
-	    &arena_miscelm_get_mutable(chunk, pageind)->ph_link);
+	arena_run_heap_insert(arena_runs_avail_get(arena, ind),
+	    arena_miscelm_get_mutable(chunk, pageind));
 }
 
 static void
@@ -212,8 +229,8 @@ arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
 	    arena_miscelm_get_const(chunk, pageind))));
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
-	ph_remove(arena_runs_avail_get(arena, ind),
-	    &arena_miscelm_get_mutable(chunk, pageind)->ph_link);
+	arena_run_heap_remove(arena_runs_avail_get(arena, ind),
+	    arena_miscelm_get_mutable(chunk, pageind));
 }
 
 static void
@@ -1065,12 +1082,10 @@ arena_run_first_best_fit(arena_t *arena, size_t size)
 
 	ind = size2index(run_quantize_ceil(size));
 	for (i = ind; i < runs_avail_nclasses + runs_avail_bias; i++) {
-		ph_node_t *node = ph_first(arena_runs_avail_get(arena, i));
-		if (node != NULL) {
-			arena_chunk_map_misc_t *miscelm =
-			    arena_ph_to_miscelm(node);
+		arena_chunk_map_misc_t *miscelm = arena_run_heap_first(
+		    arena_runs_avail_get(arena, i));
+		if (miscelm != NULL)
 			return (&miscelm->run);
-		}
 	}
 
 	return (NULL);
@@ -2052,45 +2067,26 @@ arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 	    0));
 }
 
-static arena_run_t *
-arena_bin_runs_first(arena_bin_t *bin)
-{
-	ph_node_t *node;
-	arena_chunk_map_misc_t *miscelm;
-
-	node = ph_first(&bin->runs);
-	if (node == NULL)
-		return (NULL);
-	miscelm = arena_ph_to_miscelm(node);
-	return (&miscelm->run);
-}
-
 static void
 arena_bin_runs_insert(arena_bin_t *bin, arena_run_t *run)
 {
 	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
 
-	ph_insert(&bin->runs, &miscelm->ph_link);
-}
-
-static void
-arena_bin_runs_remove(arena_bin_t *bin, arena_run_t *run)
-{
-	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
-
-	ph_remove(&bin->runs, &miscelm->ph_link);
+	arena_run_heap_insert(&bin->runs, miscelm);
 }
 
 static arena_run_t *
 arena_bin_nonfull_run_tryget(arena_bin_t *bin)
 {
-	arena_run_t *run = arena_bin_runs_first(bin);
-	if (run != NULL) {
-		arena_bin_runs_remove(bin, run);
-		if (config_stats)
-			bin->stats.reruns++;
-	}
-	return (run);
+	arena_chunk_map_misc_t *miscelm;
+
+	miscelm = arena_run_heap_remove_first(&bin->runs);
+	if (miscelm == NULL)
+		return (NULL);
+	if (config_stats)
+		bin->stats.reruns++;
+
+	return (&miscelm->run);
 }
 
 static arena_run_t *
@@ -2645,13 +2641,16 @@ arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run,
 		    &chunk->node), bin);
 		arena_bin_info_t *bin_info = &arena_bin_info[binind];
 
+		/*
+		 * The following block's conditional is necessary because if the
+		 * run only contains one region, then it never gets inserted
+		 * into the non-full runs tree.
+		 */
 		if (bin_info->nregs != 1) {
-			/*
-			 * This block's conditional is necessary because if the
-			 * run only contains one region, then it never gets
-			 * inserted into the non-full runs tree.
-			 */
-			arena_bin_runs_remove(bin, run);
+			arena_chunk_map_misc_t *miscelm =
+			    arena_run_to_miscelm(run);
+
+			arena_run_heap_remove(&bin->runs, miscelm);
 		}
 	}
 }
@@ -3312,7 +3311,7 @@ arena_new(unsigned ind)
 	arena_bin_t *bin;
 
 	/* Compute arena size to incorporate sufficient runs_avail elements. */
-	arena_size = offsetof(arena_t, runs_avail) + (sizeof(ph_heap_t) *
+	arena_size = offsetof(arena_t, runs_avail) + (sizeof(arena_run_heap_t) *
 	    runs_avail_nclasses);
 	/*
 	 * Allocate arena, arena->lstats, and arena->hstats contiguously, mainly
@@ -3372,7 +3371,7 @@ arena_new(unsigned ind)
 	arena->ndirty = 0;
 
 	for(i = 0; i < runs_avail_nclasses; i++)
-		ph_new(&arena->runs_avail[i]);
+		arena_run_heap_new(&arena->runs_avail[i]);
 	qr_new(&arena->runs_dirty, rd_link);
 	qr_new(&arena->chunks_cache, cc_link);
 
@@ -3401,7 +3400,7 @@ arena_new(unsigned ind)
 		if (malloc_mutex_init(&bin->lock))
 			return (NULL);
 		bin->runcur = NULL;
-		ph_new(&bin->runs);
+		arena_run_heap_new(&bin->runs);
 		if (config_stats)
 			memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
 	}
diff --git a/src/ph.c b/src/ph.c
deleted file mode 100644
index 051a20d7..00000000
--- a/src/ph.c
+++ /dev/null
@@ -1,2 +0,0 @@
-#define	JEMALLOC_PH_C_
-#include "jemalloc/internal/jemalloc_internal.h"
diff --git a/test/unit/ph.c b/test/unit/ph.c
index 103475b4..da442f07 100644
--- a/test/unit/ph.c
+++ b/test/unit/ph.c
@@ -3,58 +3,275 @@
 typedef struct node_s node_t;
 
 struct node_s {
-	ph_node_t link;
+#define	NODE_MAGIC 0x9823af7e
+	uint32_t magic;
+	phn(node_t) link;
+	uint64_t key;
 };
 
+static int
+node_cmp(const node_t *a, const node_t *b)
+{
+	int ret;
+
+	ret = (a->key > b->key) - (a->key < b->key);
+	if (ret == 0) {
+		/*
+		 * Duplicates are not allowed in the heap, so force an
+		 * arbitrary ordering for non-identical items with equal keys.
+		 */
+		ret = (((uintptr_t)a) > ((uintptr_t)b))
+		    - (((uintptr_t)a) < ((uintptr_t)b));
+	}
+	return (ret);
+}
+
+static int
+node_cmp_magic(const node_t *a, const node_t *b) {
+
+	assert_u32_eq(a->magic, NODE_MAGIC, "Bad magic");
+	assert_u32_eq(b->magic, NODE_MAGIC, "Bad magic");
+
+	return (node_cmp(a, b));
+}
+
+typedef ph(node_t) heap_t;
+ph_gen(static, heap_, heap_t, node_t, link, node_cmp_magic);
+
+static void
+node_print(const node_t *node, unsigned depth)
+{
+	unsigned i;
+	node_t *leftmost_child, *sibling;
+
+	for (i = 0; i < depth; i++)
+		malloc_printf("\t");
+	malloc_printf("%2"FMTu64"\n", node->key);
+
+	leftmost_child = phn_lchild_get(node_t, link, node);
+	if (leftmost_child == NULL)
+		return;
+	node_print(leftmost_child, depth + 1);
+
+	for (sibling = phn_next_get(node_t, link, leftmost_child); sibling !=
+	    NULL; sibling = phn_next_get(node_t, link, sibling)) {
+		node_print(sibling, depth + 1);
+	}
+}
+
+static void
+heap_print(const heap_t *heap)
+{
+	node_t *auxelm;
+
+	malloc_printf("vvv heap %p vvv\n", heap);
+	if (heap->ph_root == NULL)
+		goto label_return;
+
+	node_print(heap->ph_root, 0);
+
+	for (auxelm = phn_next_get(node_t, link, heap->ph_root); auxelm != NULL;
+	    auxelm = phn_next_get(node_t, link, auxelm)) {
+		assert_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t,
+		    link, auxelm)), auxelm,
+		    "auxelm's prev doesn't link to auxelm");
+		node_print(auxelm, 0);
+	}
+
+label_return:
+	malloc_printf("^^^ heap %p ^^^\n", heap);
+}
+
+static unsigned
+node_validate(const node_t *node, const node_t *parent)
+{
+	unsigned nnodes = 1;
+	node_t *leftmost_child, *sibling;
+
+	if (parent != NULL) {
+		assert_d_ge(node_cmp_magic(node, parent), 0,
+		    "Child is less than parent");
+	}
+
+	leftmost_child = phn_lchild_get(node_t, link, node);
+	if (leftmost_child == NULL)
+		return (nnodes);
+	assert_ptr_eq((void *)phn_prev_get(node_t, link, leftmost_child),
+	    (void *)node, "Leftmost child does not link to node");
+	nnodes += node_validate(leftmost_child, node);
+
+	for (sibling = phn_next_get(node_t, link, leftmost_child); sibling !=
+	    NULL; sibling = phn_next_get(node_t, link, sibling)) {
+		assert_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t,
+		    link, sibling)), sibling,
+		    "sibling's prev doesn't link to sibling");
+		nnodes += node_validate(sibling, node);
+	}
+	return (nnodes);
+}
+
+static unsigned
+heap_validate(const heap_t *heap)
+{
+	unsigned nnodes = 0;
+	node_t *auxelm;
+
+	if (heap->ph_root == NULL)
+		goto label_return;
+
+	nnodes += node_validate(heap->ph_root, NULL);
+
+	for (auxelm = phn_next_get(node_t, link, heap->ph_root); auxelm != NULL;
+	    auxelm = phn_next_get(node_t, link, auxelm)) {
+		assert_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t,
+		    link, auxelm)), auxelm,
+		    "auxelm's prev doesn't link to auxelm");
+		nnodes += node_validate(auxelm, NULL);
+	}
+
+label_return:
+	if (false)
+		heap_print(heap);
+	return (nnodes);
+}
+
 TEST_BEGIN(test_ph_empty)
 {
-	ph_heap_t heap;
+	heap_t heap;
 
-	ph_new(&heap);
-
-	assert_ptr_null(ph_first(&heap), "Unexpected node");
+	heap_new(&heap);
+	assert_true(heap_empty(&heap), "Heap should be empty");
+	assert_ptr_null(heap_first(&heap), "Unexpected node");
 }
 TEST_END
 
+static void
+node_remove(heap_t *heap, node_t *node)
+{
+
+	heap_remove(heap, node);
+
+	node->magic = 0;
+}
+
+static node_t *
+node_remove_first(heap_t *heap)
+{
+	node_t *node = heap_remove_first(heap);
+	node->magic = 0;
+	return (node);
+}
+
 TEST_BEGIN(test_ph_random)
 {
 #define	NNODES 25
+#define	NBAGS 250
 #define	SEED 42
 	sfmt_t *sfmt;
-	ph_heap_t heap;
+	uint64_t bag[NNODES];
+	heap_t heap;
 	node_t nodes[NNODES];
 	unsigned i, j, k;
 
 	sfmt = init_gen_rand(SEED);
-	for (i = 0; i < 2; i++) {
+	for (i = 0; i < NBAGS; i++) {
+		switch (i) {
+		case 0:
+			/* Insert in order. */
+			for (j = 0; j < NNODES; j++)
+				bag[j] = j;
+			break;
+		case 1:
+			/* Insert in reverse order. */
+			for (j = 0; j < NNODES; j++)
+				bag[j] = NNODES - j - 1;
+			break;
+		default:
+			for (j = 0; j < NNODES; j++)
+				bag[j] = gen_rand64_range(sfmt, NNODES);
+		}
+
 		for (j = 1; j <= NNODES; j++) {
 			/* Initialize heap and nodes. */
-			ph_new(&heap);
+			heap_new(&heap);
+			assert_u_eq(heap_validate(&heap), 0,
+			    "Incorrect node count");
+			for (k = 0; k < j; k++) {
+				nodes[k].magic = NODE_MAGIC;
+				nodes[k].key = bag[k];
+			}
 
 			/* Insert nodes. */
 			for (k = 0; k < j; k++) {
-				ph_insert(&heap, &nodes[k].link);
-
-				assert_ptr_not_null(ph_first(&heap),
-				    "Heap should not be empty");
+				heap_insert(&heap, &nodes[k]);
+				if (i % 13 == 12) {
+					/* Trigger merging. */
+					assert_ptr_not_null(heap_first(&heap),
+					    "Heap should not be empty");
+				}
+				assert_u_eq(heap_validate(&heap), k + 1,
+				    "Incorrect node count");
 			}
 
+			assert_false(heap_empty(&heap),
+			    "Heap should not be empty");
+
 			/* Remove nodes. */
-			switch (i % 2) {
+			switch (i % 4) {
 			case 0:
-				for (k = 0; k < j; k++)
-					ph_remove(&heap, &nodes[k].link);
+				for (k = 0; k < j; k++) {
+					assert_u_eq(heap_validate(&heap), j - k,
+					    "Incorrect node count");
+					node_remove(&heap, &nodes[k]);
+					assert_u_eq(heap_validate(&heap), j - k
+					    - 1, "Incorrect node count");
+				}
 				break;
 			case 1:
-				for (k = j; k > 0; k--)
-					ph_remove(&heap, &nodes[k-1].link);
+				for (k = j; k > 0; k--) {
+					node_remove(&heap, &nodes[k-1]);
+					assert_u_eq(heap_validate(&heap), k - 1,
+					    "Incorrect node count");
+				}
 				break;
-			default:
+			case 2: {
+				node_t *prev = NULL;
+				for (k = 0; k < j; k++) {
+					node_t *node = node_remove_first(&heap);
+					assert_u_eq(heap_validate(&heap), j - k
+					    - 1, "Incorrect node count");
+					if (prev != NULL) {
+						assert_d_ge(node_cmp(node,
+						    prev), 0,
+						    "Bad removal order");
+					}
+					prev = node;
+				}
+				break;
+			} case 3: {
+				node_t *prev = NULL;
+				for (k = 0; k < j; k++) {
+					node_t *node = heap_first(&heap);
+					assert_u_eq(heap_validate(&heap), j - k,
+					    "Incorrect node count");
+					if (prev != NULL) {
+						assert_d_ge(node_cmp(node,
+						    prev), 0,
+						    "Bad removal order");
+					}
+					node_remove(&heap, node);
+					assert_u_eq(heap_validate(&heap), j - k
+					    - 1, "Incorrect node count");
+					prev = node;
+				}
+				break;
+			} default:
 				not_reached();
 			}
 
-			assert_ptr_null(ph_first(&heap),
-			    "Heap should not be empty");
+			assert_ptr_null(heap_first(&heap),
+			    "Heap should be empty");
+			assert_true(heap_empty(&heap), "Heap should be empty");
 		}
 	}
 	fini_gen_rand(sfmt);

From 96aa67aca89725f0b1df3257421a3d0a48eb2700 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 4 Apr 2016 19:55:19 -0400
Subject: [PATCH 0184/2608] Clean up char vs. uint8_t in junk filling code.

Consistently use uint8_t rather than char for junk filling code.
---
 include/jemalloc/internal/util.h |  4 ++--
 test/unit/junk.c                 | 16 ++++++++--------
 test/unit/zero.c                 | 16 +++++++++-------
 3 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index 949a0e0a..a0c2203d 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -41,8 +41,8 @@
 #define	MALLOC_PRINTF_BUFSIZE	4096
 
 /* Junk fill patterns. */
-#define	JEMALLOC_ALLOC_JUNK	0xa5
-#define	JEMALLOC_FREE_JUNK	0x5a
+#define	JEMALLOC_ALLOC_JUNK	((uint8_t)0xa5)
+#define	JEMALLOC_FREE_JUNK	((uint8_t)0x5a)
 
 /*
  * Wrap a cpp argument that contains commas such that it isn't broken up into
diff --git a/test/unit/junk.c b/test/unit/junk.c
index f4e62261..fecf6fae 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -29,7 +29,7 @@ arena_dalloc_junk_small_intercept(void *ptr, arena_bin_info_t *bin_info)
 
 	arena_dalloc_junk_small_orig(ptr, bin_info);
 	for (i = 0; i < bin_info->reg_size; i++) {
-		assert_c_eq(((char *)ptr)[i], JEMALLOC_FREE_JUNK,
+		assert_u_eq(((uint8_t *)ptr)[i], JEMALLOC_FREE_JUNK,
 		    "Missing junk fill for byte %zu/%zu of deallocated region",
 		    i, bin_info->reg_size);
 	}
@@ -44,7 +44,7 @@ arena_dalloc_junk_large_intercept(void *ptr, size_t usize)
 
 	arena_dalloc_junk_large_orig(ptr, usize);
 	for (i = 0; i < usize; i++) {
-		assert_c_eq(((char *)ptr)[i], JEMALLOC_FREE_JUNK,
+		assert_u_eq(((uint8_t *)ptr)[i], JEMALLOC_FREE_JUNK,
 		    "Missing junk fill for byte %zu/%zu of deallocated region",
 		    i, usize);
 	}
@@ -69,7 +69,7 @@ huge_dalloc_junk_intercept(void *ptr, size_t usize)
 static void
 test_junk(size_t sz_min, size_t sz_max)
 {
-	char *s;
+	uint8_t *s;
 	size_t sz_prev, sz, i;
 
 	if (opt_junk_free) {
@@ -82,23 +82,23 @@ test_junk(size_t sz_min, size_t sz_max)
 	}
 
 	sz_prev = 0;
-	s = (char *)mallocx(sz_min, 0);
+	s = (uint8_t *)mallocx(sz_min, 0);
 	assert_ptr_not_null((void *)s, "Unexpected mallocx() failure");
 
 	for (sz = sallocx(s, 0); sz <= sz_max;
 	    sz_prev = sz, sz = sallocx(s, 0)) {
 		if (sz_prev > 0) {
-			assert_c_eq(s[0], 'a',
+			assert_u_eq(s[0], 'a',
 			    "Previously allocated byte %zu/%zu is corrupted",
 			    ZU(0), sz_prev);
-			assert_c_eq(s[sz_prev-1], 'a',
+			assert_u_eq(s[sz_prev-1], 'a',
 			    "Previously allocated byte %zu/%zu is corrupted",
 			    sz_prev-1, sz_prev);
 		}
 
 		for (i = sz_prev; i < sz; i++) {
 			if (opt_junk_alloc) {
-				assert_c_eq(s[i], JEMALLOC_ALLOC_JUNK,
+				assert_u_eq(s[i], JEMALLOC_ALLOC_JUNK,
 				    "Newly allocated byte %zu/%zu isn't "
 				    "junk-filled", i, sz);
 			}
@@ -107,7 +107,7 @@ test_junk(size_t sz_min, size_t sz_max)
 
 		if (xallocx(s, sz+1, 0, 0) == sz) {
 			watch_junking(s);
-			s = (char *)rallocx(s, sz+1, 0);
+			s = (uint8_t *)rallocx(s, sz+1, 0);
 			assert_ptr_not_null((void *)s,
 			    "Unexpected rallocx() failure");
 			assert_true(!opt_junk_free || saw_junking,
diff --git a/test/unit/zero.c b/test/unit/zero.c
index 93afc2b8..30ebe37a 100644
--- a/test/unit/zero.c
+++ b/test/unit/zero.c
@@ -8,39 +8,41 @@ const char *malloc_conf =
 static void
 test_zero(size_t sz_min, size_t sz_max)
 {
-	char *s;
+	uint8_t *s;
 	size_t sz_prev, sz, i;
+#define	MAGIC	((uint8_t)0x61)
 
 	sz_prev = 0;
-	s = (char *)mallocx(sz_min, 0);
+	s = (uint8_t *)mallocx(sz_min, 0);
 	assert_ptr_not_null((void *)s, "Unexpected mallocx() failure");
 
 	for (sz = sallocx(s, 0); sz <= sz_max;
 	    sz_prev = sz, sz = sallocx(s, 0)) {
 		if (sz_prev > 0) {
-			assert_c_eq(s[0], 'a',
+			assert_u_eq(s[0], MAGIC,
 			    "Previously allocated byte %zu/%zu is corrupted",
 			    ZU(0), sz_prev);
-			assert_c_eq(s[sz_prev-1], 'a',
+			assert_u_eq(s[sz_prev-1], MAGIC,
 			    "Previously allocated byte %zu/%zu is corrupted",
 			    sz_prev-1, sz_prev);
 		}
 
 		for (i = sz_prev; i < sz; i++) {
-			assert_c_eq(s[i], 0x0,
+			assert_u_eq(s[i], 0x0,
 			    "Newly allocated byte %zu/%zu isn't zero-filled",
 			    i, sz);
-			s[i] = 'a';
+			s[i] = MAGIC;
 		}
 
 		if (xallocx(s, sz+1, 0, 0) == sz) {
-			s = (char *)rallocx(s, sz+1, 0);
+			s = (uint8_t *)rallocx(s, sz+1, 0);
 			assert_ptr_not_null((void *)s,
 			    "Unexpected rallocx() failure");
 		}
 	}
 
 	dallocx(s, 0);
+#undef MAGIC
 }
 
 TEST_BEGIN(test_zero_small)

From 245ae6036c09cc11a72fab4335495d95cddd5beb Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 6 Apr 2016 11:54:44 -0700
Subject: [PATCH 0185/2608] Support --with-lg-page values larger than actual
 page size.

During over-allocation in preparation for creating aligned mappings,
allocate one more page than necessary if PAGE is the actual page size,
so that trimming still succeeds even if the system returns a mapping
that has less than PAGE alignment.  This allows compiling with e.g. 64
KiB "pages" on systems that actually use 4 KiB pages.

Note that for e.g. --with-lg-page=21, it is also necessary to increase
the chunk size (e.g. --with-malloc-conf=lg_chunk:22) so that there are
at least two "pages" per chunk.  In practice this isn't a particularly
compelling configuration because so much (unusable) virtual memory is
dedicated to chunk headers.
---
 include/jemalloc/internal/bitmap.h               | 4 ++--
 include/jemalloc/internal/jemalloc_internal.h.in | 4 ++--
 src/arena.c                                      | 2 +-
 src/chunk_mmap.c                                 | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h
index 894695f4..36f38b59 100644
--- a/include/jemalloc/internal/bitmap.h
+++ b/include/jemalloc/internal/bitmap.h
@@ -17,8 +17,8 @@ typedef unsigned long bitmap_t;
 
 /*
  * Do some analysis on how big the bitmap is before we use a tree.  For a brute
- * force linear search, if we would have to call ffsl more than 2^3 times, use a
- * tree instead.
+ * force linear search, if we would have to call ffs_lu() more than 2^3 times,
+ * use a tree instead.
  */
 #if LG_BITMAP_MAXBITS - LG_BITMAP_GROUP_NBITS > 3
 #  define USE_TREE
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 55ca7140..0b57b82a 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -741,7 +741,7 @@ sa2u(size_t size, size_t alignment)
 		 * Calculate the size of the over-size run that arena_palloc()
 		 * would need to allocate in order to guarantee the alignment.
 		 */
-		if (usize + large_pad + alignment - PAGE <= arena_maxrun)
+		if (usize + large_pad + alignment <= arena_maxrun)
 			return (usize);
 	}
 
@@ -771,7 +771,7 @@ sa2u(size_t size, size_t alignment)
 	 * Calculate the multi-chunk mapping that huge_palloc() would need in
 	 * order to guarantee the alignment.
 	 */
-	if (usize + alignment - PAGE < usize) {
+	if (usize + alignment < usize) {
 		/* size_t overflow. */
 		return (0);
 	}
diff --git a/src/arena.c b/src/arena.c
index d884dc4c..3373e1d8 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2500,7 +2500,7 @@ arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 		return (NULL);
 
 	alignment = PAGE_CEILING(alignment);
-	alloc_size = usize + large_pad + alignment - PAGE;
+	alloc_size = usize + large_pad + alignment;
 
 	malloc_mutex_lock(&arena->lock);
 	run = arena_run_alloc_large(arena, alloc_size, false);
diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c
index 56b2ee42..e2e66bc9 100644
--- a/src/chunk_mmap.c
+++ b/src/chunk_mmap.c
@@ -9,7 +9,7 @@ chunk_alloc_mmap_slow(size_t size, size_t alignment, bool *zero, bool *commit)
 	void *ret;
 	size_t alloc_size;
 
-	alloc_size = size + alignment - PAGE;
+	alloc_size = size + alignment;
 	/* Beware size_t wrap-around. */
 	if (alloc_size < size)
 		return (NULL);

From 667eca2ac215153855e62a75263df7accf25cdbc Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 6 Apr 2016 13:05:21 -0700
Subject: [PATCH 0186/2608] Simplify RTREE_HEIGHT_MAX definition.

Use 1U rather than ZU(1) in macro definitions, so that the preprocessor
can evaluate the resulting expressions.
---
 include/jemalloc/internal/rtree.h | 33 ++++---------------------------
 1 file changed, 4 insertions(+), 29 deletions(-)

diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 36aa002b..8d0c584d 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -15,35 +15,10 @@ typedef struct rtree_s rtree_t;
  * machine address width.
  */
 #define	LG_RTREE_BITS_PER_LEVEL	4
-#define	RTREE_BITS_PER_LEVEL	(ZU(1) << LG_RTREE_BITS_PER_LEVEL)
-/*
- * Avoid math in RTREE_HEIGHT_MAX definition so that it can be used in cpp
- * conditionals.  The following defininitions are precomputed equivalents to:
- *
- *  #define	RTREE_HEIGHT_MAX					\
- *      ((ZU(1) << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL)
- */
-#if LG_RTREE_BITS_PER_LEVEL == 2
-#  if LG_SIZEOF_PTR == 3
-#    define RTREE_HEIGHT_MAX	16
-#  elif LG_SIZEOF_PTR == 2
-#    define RTREE_HEIGHT_MAX	8
-#  endif
-#elif LG_RTREE_BITS_PER_LEVEL == 3
-#  if LG_SIZEOF_PTR == 3
-#    define RTREE_HEIGHT_MAX	8
-#  elif LG_SIZEOF_PTR == 2
-#    define RTREE_HEIGHT_MAX	4
-#  endif
-#elif LG_RTREE_BITS_PER_LEVEL == 4
-#  if LG_SIZEOF_PTR == 3
-#    define RTREE_HEIGHT_MAX	4
-#  elif LG_SIZEOF_PTR == 2
-#    define RTREE_HEIGHT_MAX	2
-#  endif
-#else
-#  error Unsupported LG_RTREE_BITS_PER_LEVEL
-#endif
+#define	RTREE_BITS_PER_LEVEL	(1U << LG_RTREE_BITS_PER_LEVEL)
+/* Maximum rtree height. */
+#define	RTREE_HEIGHT_MAX						\
+    ((1U << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL)
 
 /* Used for two-stage lock-free node initialization. */
 #define	RTREE_NODE_INITIALIZING	((rtree_node_elm_t *)0x1)

From e7642715ac535cf88585d4e5ca191c8042cc2399 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 11 Apr 2016 18:47:18 -0700
Subject: [PATCH 0187/2608] Fix malloc_stats_print() to print correct
 opt.narenas value.

This regression was caused by 8f683b94a751c65af8f9fa25970ccf2917b96bb8
(Make opt_narenas unsigned rather than size_t.).
---
 src/stats.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/stats.c b/src/stats.c
index a7249479..87b09e58 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -468,7 +468,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 #define	OPT_WRITE_UNSIGNED(n)						\
 		if (je_mallctl("opt."#n, &uv, &usz, NULL, 0) == 0) {	\
 			malloc_cprintf(write_cb, cbopaque,		\
-			"  opt."#n": %zu\n", sv);			\
+			"  opt."#n": %u\n", uv);			\
 		}
 #define	OPT_WRITE_SIZE_T(n)						\
 		if (je_mallctl("opt."#n, &sv, &ssz, NULL, 0) == 0) {	\

From bc26d7d99b3d3dc7633a28da622087ed3daa9a94 Mon Sep 17 00:00:00 2001
From: rustyx <me@rustyx.org>
Date: Tue, 12 Apr 2016 09:50:10 +0200
Subject: [PATCH 0188/2608] Cleanup MSVC project, embed PDB data inside static
 .lib

---
 .../projects/vc2015/jemalloc/jemalloc.vcxproj | 22 +++++++++----------
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |  3 ---
 2 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 0a6c4e61..9315022d 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -106,7 +106,6 @@
     <ClCompile Include="..\..\..\..\src\mutex.c" />
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
-    <ClCompile Include="..\..\..\..\src\ph.c" />
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\quarantine.c" />
@@ -252,7 +251,7 @@
       <Optimization>Disabled</Optimization>
       <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
       <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
     </ClCompile>
     <Link>
@@ -269,7 +268,7 @@
       <PreprocessorDefinitions>JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
-      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
       <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
     </ClCompile>
     <Link>
@@ -285,7 +284,7 @@
       <Optimization>Disabled</Optimization>
       <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
       <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
     </ClCompile>
     <Link>
@@ -302,8 +301,9 @@
       <PreprocessorDefinitions>JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
-      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
-      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <DebugInformationFormat>OldStyle</DebugInformationFormat>
+      <MinimalRebuild>false</MinimalRebuild>
     </ClCompile>
     <Link>
       <SubSystem>Windows</SubSystem>
@@ -320,7 +320,7 @@
       <IntrinsicFunctions>true</IntrinsicFunctions>
       <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
       <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
     </ClCompile>
     <Link>
@@ -341,7 +341,7 @@
       <PreprocessorDefinitions>_REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
-      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
       <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
     </ClCompile>
     <Link>
@@ -361,7 +361,7 @@
       <IntrinsicFunctions>true</IntrinsicFunctions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
-      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
       <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
     </ClCompile>
     <Link>
@@ -382,8 +382,8 @@
       <PreprocessorDefinitions>_REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
-      <DisableSpecificWarnings>4090;4146;4244;4267;4334</DisableSpecificWarnings>
-      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <DebugInformationFormat>OldStyle</DebugInformationFormat>
     </ClCompile>
     <Link>
       <SubSystem>Windows</SubSystem>
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 412c24d6..88c15efa 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -229,9 +229,6 @@
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\ph.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\prng.c">
       <Filter>Source Files</Filter>
     </ClCompile>

From 00432331b83526e3bb82f7c2aba493bf254cb9c0 Mon Sep 17 00:00:00 2001
From: rustyx <me@rustyx.org>
Date: Tue, 12 Apr 2016 09:50:54 +0200
Subject: [PATCH 0189/2608] Fix 64-to-32 conversion warnings in 32-bit mode

---
 src/arena.c | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 3373e1d8..a9566af1 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1268,7 +1268,7 @@ arena_decay_backlog_npages_limit(const arena_t *arena)
 	sum = 0;
 	for (i = 0; i < SMOOTHSTEP_NSTEPS; i++)
 		sum += arena->decay_backlog[i] * h_steps[i];
-	npages_limit_backlog = (sum >> SMOOTHSTEP_BFP);
+	npages_limit_backlog = (size_t)(sum >> SMOOTHSTEP_BFP);
 
 	return (npages_limit_backlog);
 }
@@ -1276,7 +1276,7 @@ arena_decay_backlog_npages_limit(const arena_t *arena)
 static void
 arena_decay_epoch_advance(arena_t *arena, const nstime_t *time)
 {
-	uint64_t nadvance;
+	uint64_t nadvance_u64;
 	nstime_t delta;
 	size_t ndirty_delta;
 
@@ -1285,27 +1285,31 @@ arena_decay_epoch_advance(arena_t *arena, const nstime_t *time)
 
 	nstime_copy(&delta, time);
 	nstime_subtract(&delta, &arena->decay_epoch);
-	nadvance = nstime_divide(&delta, &arena->decay_interval);
-	assert(nadvance > 0);
+	nadvance_u64 = nstime_divide(&delta, &arena->decay_interval);
+	assert(nadvance_u64 > 0);
 
-	/* Add nadvance decay intervals to epoch. */
+	/* Add nadvance_u64 decay intervals to epoch. */
 	nstime_copy(&delta, &arena->decay_interval);
-	nstime_imultiply(&delta, nadvance);
+	nstime_imultiply(&delta, nadvance_u64);
 	nstime_add(&arena->decay_epoch, &delta);
 
 	/* Set a new deadline. */
 	arena_decay_deadline_init(arena);
 
 	/* Update the backlog. */
-	if (nadvance >= SMOOTHSTEP_NSTEPS) {
+	if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) {
 		memset(arena->decay_backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
 		    sizeof(size_t));
 	} else {
-		memmove(arena->decay_backlog, &arena->decay_backlog[nadvance],
-		    (SMOOTHSTEP_NSTEPS - nadvance) * sizeof(size_t));
-		if (nadvance > 1) {
+		size_t nadvance_z = (size_t)nadvance_u64;
+
+		assert((uint64_t)nadvance_z == nadvance_u64);
+
+		memmove(arena->decay_backlog, &arena->decay_backlog[nadvance_z],
+		    (SMOOTHSTEP_NSTEPS - nadvance_z) * sizeof(size_t));
+		if (nadvance_z > 1) {
 			memset(&arena->decay_backlog[SMOOTHSTEP_NSTEPS -
-			    nadvance], 0, (nadvance-1) * sizeof(size_t));
+			    nadvance_z], 0, (nadvance_z-1) * sizeof(size_t));
 		}
 	}
 	ndirty_delta = (arena->ndirty > arena->decay_ndirty) ? arena->ndirty -

From bab58ef401b0dec8230bd2d371e135009cd06924 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 12 Apr 2016 12:39:02 -0700
Subject: [PATCH 0190/2608] Fix more 64-to-32 conversion warnings.

---
 test/unit/stats.c | 10 +++++-----
 test/unit/util.c  | 12 ++++++------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/test/unit/stats.c b/test/unit/stats.c
index 6e803160..a9a3981f 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -220,11 +220,11 @@ TEST_BEGIN(test_stats_arenas_large)
 	if (config_stats) {
 		assert_zu_gt(allocated, 0,
 		    "allocated should be greater than zero");
-		assert_zu_gt(nmalloc, 0,
+		assert_u64_gt(nmalloc, 0,
 		    "nmalloc should be greater than zero");
-		assert_zu_ge(nmalloc, ndalloc,
+		assert_u64_ge(nmalloc, ndalloc,
 		    "nmalloc should be at least as large as ndalloc");
-		assert_zu_gt(nrequests, 0,
+		assert_u64_gt(nrequests, 0,
 		    "nrequests should be greater than zero");
 	}
 
@@ -262,9 +262,9 @@ TEST_BEGIN(test_stats_arenas_huge)
 	if (config_stats) {
 		assert_zu_gt(allocated, 0,
 		    "allocated should be greater than zero");
-		assert_zu_gt(nmalloc, 0,
+		assert_u64_gt(nmalloc, 0,
 		    "nmalloc should be greater than zero");
-		assert_zu_ge(nmalloc, ndalloc,
+		assert_u64_ge(nmalloc, ndalloc,
 		    "nmalloc should be at least as large as ndalloc");
 	}
 
diff --git a/test/unit/util.c b/test/unit/util.c
index d24c1c79..c4333d53 100644
--- a/test/unit/util.c
+++ b/test/unit/util.c
@@ -4,27 +4,27 @@
 	unsigned i, pow2;						\
 	t x;								\
 									\
-	assert_zu_eq(pow2_ceil_##suf(0), 0, "Unexpected result");	\
+	assert_##suf##_eq(pow2_ceil_##suf(0), 0, "Unexpected result");	\
 									\
 	for (i = 0; i < sizeof(t) * 8; i++) {				\
-		assert_zu_eq(pow2_ceil_##suf(((t)1) << i), ((t)1) << i,	\
-		    "Unexpected result");				\
+		assert_##suf##_eq(pow2_ceil_##suf(((t)1) << i), ((t)1)	\
+		    << i, "Unexpected result");				\
 	}								\
 									\
 	for (i = 2; i < sizeof(t) * 8; i++) {				\
-		assert_zu_eq(pow2_ceil_##suf((((t)1) << i) - 1),	\
+		assert_##suf##_eq(pow2_ceil_##suf((((t)1) << i) - 1),	\
 		    ((t)1) << i, "Unexpected result");			\
 	}								\
 									\
 	for (i = 0; i < sizeof(t) * 8 - 1; i++) {			\
-		assert_zu_eq(pow2_ceil_##suf((((t)1) << i) + 1),	\
+		assert_##suf##_eq(pow2_ceil_##suf((((t)1) << i) + 1),	\
 		    ((t)1) << (i+1), "Unexpected result");		\
 	}								\
 									\
 	for (pow2 = 1; pow2 < 25; pow2++) {				\
 		for (x = (((t)1) << (pow2-1)) + 1; x <= ((t)1) << pow2;	\
 		    x++) {						\
-			assert_zu_eq(pow2_ceil_##suf(x),		\
+			assert_##suf##_eq(pow2_ceil_##suf(x),		\
 			    ((t)1) << pow2,				\
 			    "Unexpected result, x=%"pri, x);		\
 		}							\

From 8413463f3a334f14c55589e57d3e82dd594ef479 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Tue, 12 Apr 2016 23:18:25 -0700
Subject: [PATCH 0191/2608] Fix a style nit.

---
 include/jemalloc/internal/ph.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index 70b6e2cd..4f91c333 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -211,7 +211,8 @@ a_prefix##new(a_ph_type *ph)						\
 	memset(ph, 0, sizeof(ph(a_type)));				\
 }									\
 a_attr bool								\
-a_prefix##empty(a_ph_type *ph) {					\
+a_prefix##empty(a_ph_type *ph)						\
+{									\
 									\
 	return (ph->ph_root == NULL);					\
 }									\

From b2c0d6322d2307458ae2b28545f8a5c9903d7ef5 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 13 Apr 2016 23:36:15 -0700
Subject: [PATCH 0192/2608] Add witness, a simple online locking validator.

This resolves #358.
---
 Makefile.in                                   |   4 +-
 include/jemalloc/internal/arena.h             | 107 ++--
 include/jemalloc/internal/base.h              |  11 +-
 include/jemalloc/internal/chunk.h             |  38 +-
 include/jemalloc/internal/chunk_dss.h         |  16 +-
 include/jemalloc/internal/ctl.h               |  24 +-
 include/jemalloc/internal/huge.h              |  10 +-
 .../jemalloc/internal/jemalloc_internal.h.in  |  36 +-
 include/jemalloc/internal/mb.h                |   6 +-
 include/jemalloc/internal/mutex.h             |  56 +-
 include/jemalloc/internal/private_symbols.txt |  15 +-
 include/jemalloc/internal/prof.h              |  78 +--
 include/jemalloc/internal/tcache.h            |  22 +-
 include/jemalloc/internal/tsd.h               |   6 +-
 include/jemalloc/internal/valgrind.h          |  12 +-
 include/jemalloc/internal/witness.h           | 103 ++++
 src/arena.c                                   | 568 +++++++++---------
 src/base.c                                    |  26 +-
 src/chunk.c                                   | 186 +++---
 src/chunk_dss.c                               |  46 +-
 src/ctl.c                                     | 427 +++++++------
 src/huge.c                                    | 106 ++--
 src/jemalloc.c                                | 385 +++++++-----
 src/mutex.c                                   |  21 +-
 src/prof.c                                    | 497 ++++++++-------
 src/quarantine.c                              |   4 +-
 src/tcache.c                                  |  91 +--
 src/tsd.c                                     |  20 +-
 src/witness.c                                 | 206 +++++++
 src/zone.c                                    |   8 +-
 test/unit/junk.c                              |   4 +-
 test/unit/prof_reset.c                        |   3 +-
 test/unit/witness.c                           | 278 +++++++++
 33 files changed, 2118 insertions(+), 1302 deletions(-)
 create mode 100644 include/jemalloc/internal/witness.h
 create mode 100644 src/witness.c
 create mode 100644 test/unit/witness.c

diff --git a/Makefile.in b/Makefile.in
index 480ce1a1..a872eb5f 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -103,7 +103,8 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/tcache.c \
 	$(srcroot)src/ticker.c \
 	$(srcroot)src/tsd.c \
-	$(srcroot)src/util.c
+	$(srcroot)src/util.c \
+	$(srcroot)src/witness.c
 ifeq ($(enable_valgrind), 1)
 C_SRCS += $(srcroot)src/valgrind.c
 endif
@@ -169,6 +170,7 @@ TESTS_UNIT := $(srcroot)test/unit/atomic.c \
 	$(srcroot)test/unit/nstime.c \
 	$(srcroot)test/unit/tsd.c \
 	$(srcroot)test/unit/util.c \
+	$(srcroot)test/unit/witness.c \
 	$(srcroot)test/unit/zero.c
 TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \
 	$(srcroot)test/integration/allocated.c \
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 6f0fa76a..2130e9a0 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -506,23 +506,25 @@ void	arena_chunk_cache_maybe_insert(arena_t *arena, extent_node_t *node,
     bool cache);
 void	arena_chunk_cache_maybe_remove(arena_t *arena, extent_node_t *node,
     bool cache);
-extent_node_t	*arena_node_alloc(arena_t *arena);
-void	arena_node_dalloc(arena_t *arena, extent_node_t *node);
-void	*arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment,
-    bool *zero);
-void	arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t usize);
-void	arena_chunk_ralloc_huge_similar(arena_t *arena, void *chunk,
+extent_node_t	*arena_node_alloc(tsd_t *tsd, arena_t *arena);
+void	arena_node_dalloc(tsd_t *tsd, arena_t *arena, extent_node_t *node);
+void	*arena_chunk_alloc_huge(tsd_t *tsd, arena_t *arena, size_t usize,
+    size_t alignment, bool *zero);
+void	arena_chunk_dalloc_huge(tsd_t *tsd, arena_t *arena, void *chunk,
+    size_t usize);
+void	arena_chunk_ralloc_huge_similar(tsd_t *tsd, arena_t *arena, void *chunk,
     size_t oldsize, size_t usize);
-void	arena_chunk_ralloc_huge_shrink(arena_t *arena, void *chunk,
+void	arena_chunk_ralloc_huge_shrink(tsd_t *tsd, arena_t *arena, void *chunk,
     size_t oldsize, size_t usize);
-bool	arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk,
+bool	arena_chunk_ralloc_huge_expand(tsd_t *tsd, arena_t *arena, void *chunk,
     size_t oldsize, size_t usize, bool *zero);
-ssize_t	arena_lg_dirty_mult_get(arena_t *arena);
-bool	arena_lg_dirty_mult_set(arena_t *arena, ssize_t lg_dirty_mult);
-ssize_t	arena_decay_time_get(arena_t *arena);
-bool	arena_decay_time_set(arena_t *arena, ssize_t decay_time);
-void	arena_maybe_purge(arena_t *arena);
-void	arena_purge(arena_t *arena, bool all);
+ssize_t	arena_lg_dirty_mult_get(tsd_t *tsd, arena_t *arena);
+bool	arena_lg_dirty_mult_set(tsd_t *tsd, arena_t *arena,
+    ssize_t lg_dirty_mult);
+ssize_t	arena_decay_time_get(tsd_t *tsd, arena_t *arena);
+bool	arena_decay_time_set(tsd_t *tsd, arena_t *arena, ssize_t decay_time);
+void	arena_purge(tsd_t *tsd, arena_t *arena, bool all);
+void	arena_maybe_purge(tsd_t *tsd, arena_t *arena);
 void	arena_tcache_fill_small(tsd_t *tsd, arena_t *arena, tcache_bin_t *tbin,
     szind_t binind, uint64_t prof_accumbytes);
 void	arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info,
@@ -542,11 +544,11 @@ void	*arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
     bool zero, tcache_t *tcache);
 void	*arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache);
-void	arena_prof_promoted(const void *ptr, size_t size);
-void	arena_dalloc_bin_junked_locked(arena_t *arena, arena_chunk_t *chunk,
-    void *ptr, arena_chunk_map_bits_t *bitselm);
-void	arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
-    size_t pageind, arena_chunk_map_bits_t *bitselm);
+void	arena_prof_promoted(tsd_t *tsd, const void *ptr, size_t size);
+void	arena_dalloc_bin_junked_locked(tsd_t *tsd, arena_t *arena,
+    arena_chunk_t *chunk, void *ptr, arena_chunk_map_bits_t *bitselm);
+void	arena_dalloc_bin(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+    void *ptr, size_t pageind, arena_chunk_map_bits_t *bitselm);
 void	arena_dalloc_small(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
     void *ptr, size_t pageind);
 #ifdef JEMALLOC_JET
@@ -555,8 +557,8 @@ extern arena_dalloc_junk_large_t *arena_dalloc_junk_large;
 #else
 void	arena_dalloc_junk_large(void *ptr, size_t usize);
 #endif
-void	arena_dalloc_large_junked_locked(arena_t *arena, arena_chunk_t *chunk,
-    void *ptr);
+void	arena_dalloc_large_junked_locked(tsd_t *tsd, arena_t *arena,
+    arena_chunk_t *chunk, void *ptr);
 void	arena_dalloc_large(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
     void *ptr);
 #ifdef JEMALLOC_JET
@@ -567,27 +569,28 @@ bool	arena_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
     size_t extra, bool zero);
 void	*arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
     size_t size, size_t alignment, bool zero, tcache_t *tcache);
-dss_prec_t	arena_dss_prec_get(arena_t *arena);
-bool	arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
+dss_prec_t	arena_dss_prec_get(tsd_t *tsd, arena_t *arena);
+bool	arena_dss_prec_set(tsd_t *tsd, arena_t *arena, dss_prec_t dss_prec);
 ssize_t	arena_lg_dirty_mult_default_get(void);
 bool	arena_lg_dirty_mult_default_set(ssize_t lg_dirty_mult);
 ssize_t	arena_decay_time_default_get(void);
 bool	arena_decay_time_default_set(ssize_t decay_time);
-void	arena_basic_stats_merge(arena_t *arena, unsigned *nthreads,
+void	arena_basic_stats_merge(tsd_t *tsd, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time,
     size_t *nactive, size_t *ndirty);
-void	arena_stats_merge(arena_t *arena, unsigned *nthreads, const char **dss,
-    ssize_t *lg_dirty_mult, ssize_t *decay_time, size_t *nactive,
-    size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats,
-    malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats);
+void	arena_stats_merge(tsd_t *tsd, arena_t *arena, unsigned *nthreads,
+    const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time,
+    size_t *nactive, size_t *ndirty, arena_stats_t *astats,
+    malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats,
+    malloc_huge_stats_t *hstats);
 unsigned	arena_nthreads_get(arena_t *arena);
 void	arena_nthreads_inc(arena_t *arena);
 void	arena_nthreads_dec(arena_t *arena);
-arena_t	*arena_new(unsigned ind);
+arena_t	*arena_new(tsd_t *tsd, unsigned ind);
 bool	arena_boot(void);
-void	arena_prefork(arena_t *arena);
-void	arena_postfork_parent(arena_t *arena);
-void	arena_postfork_child(arena_t *arena);
+void	arena_prefork(tsd_t *tsd, arena_t *arena);
+void	arena_postfork_parent(tsd_t *tsd, arena_t *arena);
+void	arena_postfork_child(tsd_t *tsd, arena_t *arena);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
@@ -644,21 +647,22 @@ void	arena_metadata_allocated_sub(arena_t *arena, size_t size);
 size_t	arena_metadata_allocated_get(arena_t *arena);
 bool	arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes);
 bool	arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes);
-bool	arena_prof_accum(arena_t *arena, uint64_t accumbytes);
+bool	arena_prof_accum(tsd_t *tsd, arena_t *arena, uint64_t accumbytes);
 szind_t	arena_ptr_small_binind_get(const void *ptr, size_t mapbits);
 szind_t	arena_bin_index(arena_t *arena, arena_bin_t *bin);
 size_t	arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info,
     const void *ptr);
-prof_tctx_t	*arena_prof_tctx_get(const void *ptr);
-void	arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx);
-void	arena_prof_tctx_reset(const void *ptr, size_t usize,
+prof_tctx_t	*arena_prof_tctx_get(tsd_t *tsd, const void *ptr);
+void	arena_prof_tctx_set(tsd_t *tsd, const void *ptr, size_t usize,
+    prof_tctx_t *tctx);
+void	arena_prof_tctx_reset(tsd_t *tsd, const void *ptr, size_t usize,
     const void *old_ptr, prof_tctx_t *old_tctx);
 void	arena_decay_ticks(tsd_t *tsd, arena_t *arena, unsigned nticks);
 void	arena_decay_tick(tsd_t *tsd, arena_t *arena);
 void	*arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
     bool zero, tcache_t *tcache, bool slow_path);
 arena_t	*arena_aalloc(const void *ptr);
-size_t	arena_salloc(const void *ptr, bool demote);
+size_t	arena_salloc(tsd_t *tsd, const void *ptr, bool demote);
 void	arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path);
 void	arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache);
 #endif
@@ -1035,7 +1039,7 @@ arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes)
 }
 
 JEMALLOC_INLINE bool
-arena_prof_accum(arena_t *arena, uint64_t accumbytes)
+arena_prof_accum(tsd_t *tsd, arena_t *arena, uint64_t accumbytes)
 {
 
 	cassert(config_prof);
@@ -1046,9 +1050,9 @@ arena_prof_accum(arena_t *arena, uint64_t accumbytes)
 	{
 		bool ret;
 
-		malloc_mutex_lock(&arena->lock);
+		malloc_mutex_lock(tsd, &arena->lock);
 		ret = arena_prof_accum_impl(arena, accumbytes);
-		malloc_mutex_unlock(&arena->lock);
+		malloc_mutex_unlock(tsd, &arena->lock);
 		return (ret);
 	}
 }
@@ -1184,7 +1188,7 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr)
 }
 
 JEMALLOC_INLINE prof_tctx_t *
-arena_prof_tctx_get(const void *ptr)
+arena_prof_tctx_get(tsd_t *tsd, const void *ptr)
 {
 	prof_tctx_t *ret;
 	arena_chunk_t *chunk;
@@ -1205,13 +1209,14 @@ arena_prof_tctx_get(const void *ptr)
 			ret = atomic_read_p(&elm->prof_tctx_pun);
 		}
 	} else
-		ret = huge_prof_tctx_get(ptr);
+		ret = huge_prof_tctx_get(tsd, ptr);
 
 	return (ret);
 }
 
 JEMALLOC_INLINE void
-arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx)
+arena_prof_tctx_set(tsd_t *tsd, const void *ptr, size_t usize,
+    prof_tctx_t *tctx)
 {
 	arena_chunk_t *chunk;
 
@@ -1242,12 +1247,12 @@ arena_prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx)
 			assert(arena_mapbits_large_get(chunk, pageind) == 0);
 		}
 	} else
-		huge_prof_tctx_set(ptr, tctx);
+		huge_prof_tctx_set(tsd, ptr, tctx);
 }
 
 JEMALLOC_INLINE void
-arena_prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr,
-    prof_tctx_t *old_tctx)
+arena_prof_tctx_reset(tsd_t *tsd, const void *ptr, size_t usize,
+    const void *old_ptr, prof_tctx_t *old_tctx)
 {
 
 	cassert(config_prof);
@@ -1270,7 +1275,7 @@ arena_prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr,
 			atomic_write_p(&elm->prof_tctx_pun,
 			    (prof_tctx_t *)(uintptr_t)1U);
 		} else
-			huge_prof_tctx_reset(ptr);
+			huge_prof_tctx_reset(tsd, ptr);
 	}
 }
 
@@ -1285,7 +1290,7 @@ arena_decay_ticks(tsd_t *tsd, arena_t *arena, unsigned nticks)
 	if (unlikely(decay_ticker == NULL))
 		return;
 	if (unlikely(ticker_ticks(decay_ticker, nticks)))
-		arena_purge(arena, false);
+		arena_purge(tsd, arena, false);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -1332,7 +1337,7 @@ arena_aalloc(const void *ptr)
 
 /* Return the size of the allocation pointed to by ptr. */
 JEMALLOC_ALWAYS_INLINE size_t
-arena_salloc(const void *ptr, bool demote)
+arena_salloc(tsd_t *tsd, const void *ptr, bool demote)
 {
 	size_t ret;
 	arena_chunk_t *chunk;
@@ -1375,7 +1380,7 @@ arena_salloc(const void *ptr, bool demote)
 			ret = index2size(binind);
 		}
 	} else
-		ret = huge_salloc(ptr);
+		ret = huge_salloc(tsd, ptr);
 
 	return (ret);
 }
@@ -1445,7 +1450,7 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
 				    pageind) - large_pad;
 			}
 		}
-		assert(s2u(size) == s2u(arena_salloc(ptr, false)));
+		assert(s2u(size) == s2u(arena_salloc(tsd, ptr, false)));
 
 		if (likely(size <= SMALL_MAXCLASS)) {
 			/* Small allocation. */
diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h
index 39e46ee4..075a2a20 100644
--- a/include/jemalloc/internal/base.h
+++ b/include/jemalloc/internal/base.h
@@ -9,12 +9,13 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-void	*base_alloc(size_t size);
-void	base_stats_get(size_t *allocated, size_t *resident, size_t *mapped);
+void	*base_alloc(tsd_t *tsd, size_t size);
+void	base_stats_get(tsd_t *tsd, size_t *allocated, size_t *resident,
+    size_t *mapped);
 bool	base_boot(void);
-void	base_prefork(void);
-void	base_postfork_parent(void);
-void	base_postfork_child(void);
+void	base_prefork(tsd_t *tsd);
+void	base_postfork_parent(tsd_t *tsd);
+void	base_postfork_child(tsd_t *tsd);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index d800478d..6c3ad9bf 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -48,28 +48,32 @@ extern size_t		chunk_npages;
 
 extern const chunk_hooks_t	chunk_hooks_default;
 
-chunk_hooks_t	chunk_hooks_get(arena_t *arena);
-chunk_hooks_t	chunk_hooks_set(arena_t *arena,
+chunk_hooks_t	chunk_hooks_get(tsd_t *tsd, arena_t *arena);
+chunk_hooks_t	chunk_hooks_set(tsd_t *tsd, arena_t *arena,
     const chunk_hooks_t *chunk_hooks);
 
-bool	chunk_register(const void *chunk, const extent_node_t *node);
+bool	chunk_register(tsd_t *tsd, const void *chunk,
+    const extent_node_t *node);
 void	chunk_deregister(const void *chunk, const extent_node_t *node);
 void	*chunk_alloc_base(size_t size);
-void	*chunk_alloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *new_addr, size_t size, size_t alignment, bool *zero,
-    bool dalloc_node);
-void	*chunk_alloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit);
-void	chunk_dalloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *chunk, size_t size, bool committed);
-void	chunk_dalloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *chunk, size_t size, bool zeroed, bool committed);
-bool	chunk_purge_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *chunk, size_t size, size_t offset, size_t length);
+void	*chunk_alloc_cache(tsd_t *tsd, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
+    bool *zero, bool dalloc_node);
+void	*chunk_alloc_wrapper(tsd_t *tsd, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
+    bool *zero, bool *commit);
+void	chunk_dalloc_cache(tsd_t *tsd, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *chunk, size_t size, bool committed);
+void	chunk_dalloc_wrapper(tsd_t *tsd, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *chunk, size_t size, bool zeroed,
+    bool committed);
+bool	chunk_purge_wrapper(tsd_t *tsd, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *chunk, size_t size, size_t offset,
+    size_t length);
 bool	chunk_boot(void);
-void	chunk_prefork(void);
-void	chunk_postfork_parent(void);
-void	chunk_postfork_child(void);
+void	chunk_prefork(tsd_t *tsd);
+void	chunk_postfork_parent(tsd_t *tsd);
+void	chunk_postfork_child(tsd_t *tsd);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/chunk_dss.h b/include/jemalloc/internal/chunk_dss.h
index 388f46be..7f3a09c7 100644
--- a/include/jemalloc/internal/chunk_dss.h
+++ b/include/jemalloc/internal/chunk_dss.h
@@ -21,15 +21,15 @@ extern const char *dss_prec_names[];
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-dss_prec_t	chunk_dss_prec_get(void);
-bool	chunk_dss_prec_set(dss_prec_t dss_prec);
-void	*chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size,
-    size_t alignment, bool *zero, bool *commit);
-bool	chunk_in_dss(void *chunk);
+dss_prec_t	chunk_dss_prec_get(tsd_t *tsd);
+bool	chunk_dss_prec_set(tsd_t *tsd, dss_prec_t dss_prec);
+void	*chunk_alloc_dss(tsd_t *tsd, arena_t *arena, void *new_addr,
+    size_t size, size_t alignment, bool *zero, bool *commit);
+bool	chunk_in_dss(tsd_t *tsd, void *chunk);
 bool	chunk_dss_boot(void);
-void	chunk_dss_prefork(void);
-void	chunk_dss_postfork_parent(void);
-void	chunk_dss_postfork_child(void);
+void	chunk_dss_prefork(tsd_t *tsd);
+void	chunk_dss_postfork_parent(tsd_t *tsd);
+void	chunk_dss_postfork_child(tsd_t *tsd);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index 9c5e9328..ec856996 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -21,13 +21,14 @@ struct ctl_named_node_s {
 	/* If (nchildren == 0), this is a terminal node. */
 	unsigned		nchildren;
 	const			ctl_node_t *children;
-	int			(*ctl)(const size_t *, size_t, void *, size_t *,
-	    void *, size_t);
+	int			(*ctl)(tsd_t *, const size_t *, size_t, void *,
+	    size_t *, void *, size_t);
 };
 
 struct ctl_indexed_node_s {
 	struct ctl_node_s	node;
-	const ctl_named_node_t	*(*index)(const size_t *, size_t, size_t);
+	const ctl_named_node_t	*(*index)(tsd_t *, const size_t *, size_t,
+	    size_t);
 };
 
 struct ctl_arena_stats_s {
@@ -68,16 +69,17 @@ struct ctl_stats_s {
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-int	ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen);
-int	ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp);
-
-int	ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
+int	ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
     void *newp, size_t newlen);
+int	ctl_nametomib(tsd_t *tsd, const char *name, size_t *mibp,
+    size_t *miblenp);
+
+int	ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen);
 bool	ctl_boot(void);
-void	ctl_prefork(void);
-void	ctl_postfork_parent(void);
-void	ctl_postfork_child(void);
+void	ctl_prefork(tsd_t *tsd);
+void	ctl_postfork_parent(tsd_t *tsd);
+void	ctl_postfork_child(tsd_t *tsd);
 
 #define	xmallctl(name, oldp, oldlenp, newp, newlen) do {		\
 	if (je_mallctl(name, oldp, oldlenp, newp, newlen)		\
diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h
index cb6f69e6..f19d3368 100644
--- a/include/jemalloc/internal/huge.h
+++ b/include/jemalloc/internal/huge.h
@@ -18,15 +18,15 @@ bool	huge_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize,
 void	*huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
     size_t usize, size_t alignment, bool zero, tcache_t *tcache);
 #ifdef JEMALLOC_JET
-typedef void (huge_dalloc_junk_t)(void *, size_t);
+typedef void (huge_dalloc_junk_t)(tsd_t *, void *, size_t);
 extern huge_dalloc_junk_t *huge_dalloc_junk;
 #endif
 void	huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache);
 arena_t	*huge_aalloc(const void *ptr);
-size_t	huge_salloc(const void *ptr);
-prof_tctx_t	*huge_prof_tctx_get(const void *ptr);
-void	huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx);
-void	huge_prof_tctx_reset(const void *ptr);
+size_t	huge_salloc(tsd_t *tsd, const void *ptr);
+prof_tctx_t	*huge_prof_tctx_get(tsd_t *tsd, const void *ptr);
+void	huge_prof_tctx_set(tsd_t *tsd, const void *ptr, prof_tctx_t *tctx);
+void	huge_prof_tctx_reset(tsd_t *tsd, const void *ptr);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 0b57b82a..ddceabca 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -368,6 +368,7 @@ typedef unsigned szind_t;
 #include "jemalloc/internal/smoothstep.h"
 #include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/witness.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/tsd.h"
 #include "jemalloc/internal/mb.h"
@@ -399,6 +400,7 @@ typedef unsigned szind_t;
 #include "jemalloc/internal/smoothstep.h"
 #include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/witness.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/bitmap.h"
@@ -465,7 +467,7 @@ void	*bootstrap_malloc(size_t size);
 void	*bootstrap_calloc(size_t num, size_t size);
 void	bootstrap_free(void *ptr);
 unsigned	narenas_total_get(void);
-arena_t	*arena_init(unsigned ind);
+arena_t	*arena_init(tsd_t *tsd, unsigned ind);
 arena_tdata_t	*arena_tdata_get_hard(tsd_t *tsd, unsigned ind);
 arena_t	*arena_choose_hard(tsd_t *tsd);
 void	arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind);
@@ -490,6 +492,7 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/smoothstep.h"
 #include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/witness.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/bitmap.h"
@@ -521,6 +524,7 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/smoothstep.h"
 #include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/witness.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/tsd.h"
 #include "jemalloc/internal/mb.h"
@@ -545,7 +549,7 @@ size_t	sa2u(size_t size, size_t alignment);
 arena_t	*arena_choose(tsd_t *tsd, arena_t *arena);
 arena_tdata_t	*arena_tdata_get(tsd_t *tsd, unsigned ind,
     bool refresh_if_missing);
-arena_t	*arena_get(unsigned ind, bool init_if_missing);
+arena_t	*arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing);
 ticker_t	*decay_ticker_get(tsd_t *tsd, unsigned ind);
 #endif
 
@@ -819,7 +823,7 @@ arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing)
 }
 
 JEMALLOC_INLINE arena_t *
-arena_get(unsigned ind, bool init_if_missing)
+arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing)
 {
 	arena_t *ret;
 
@@ -829,7 +833,7 @@ arena_get(unsigned ind, bool init_if_missing)
 	if (unlikely(ret == NULL)) {
 		ret = atomic_read_p((void *)&arenas[ind]);
 		if (init_if_missing && unlikely(ret == NULL))
-			ret = arena_init(ind);
+			ret = arena_init(tsd, ind);
 	}
 	return (ret);
 }
@@ -863,7 +867,7 @@ decay_ticker_get(tsd_t *tsd, unsigned ind)
 
 #ifndef JEMALLOC_ENABLE_INLINE
 arena_t	*iaalloc(const void *ptr);
-size_t	isalloc(const void *ptr, bool demote);
+size_t	isalloc(tsd_t *tsd, const void *ptr, bool demote);
 void	*iallocztm(tsd_t *tsd, size_t size, szind_t ind, bool zero,
     tcache_t *tcache, bool is_metadata, arena_t *arena, bool slow_path);
 void	*imalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache,
@@ -877,9 +881,9 @@ void	*ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
 void	*ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, arena_t *arena);
 void	*ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero);
-size_t	ivsalloc(const void *ptr, bool demote);
+size_t	ivsalloc(tsd_t *tsd, const void *ptr, bool demote);
 size_t	u2rz(size_t usize);
-size_t	p2rz(const void *ptr);
+size_t	p2rz(tsd_t *tsd, const void *ptr);
 void	idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata,
     bool slow_path);
 void	idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache);
@@ -914,14 +918,14 @@ iaalloc(const void *ptr)
  *   size_t sz = isalloc(ptr, config_prof);
  */
 JEMALLOC_ALWAYS_INLINE size_t
-isalloc(const void *ptr, bool demote)
+isalloc(tsd_t *tsd, const void *ptr, bool demote)
 {
 
 	assert(ptr != NULL);
 	/* Demotion only makes sense if config_prof is true. */
 	assert(config_prof || !demote);
 
-	return (arena_salloc(ptr, demote));
+	return (arena_salloc(tsd, ptr, demote));
 }
 
 JEMALLOC_ALWAYS_INLINE void *
@@ -934,7 +938,7 @@ iallocztm(tsd_t *tsd, size_t size, szind_t ind, bool zero, tcache_t *tcache,
 
 	ret = arena_malloc(tsd, arena, size, ind, zero, tcache, slow_path);
 	if (config_stats && is_metadata && likely(ret != NULL)) {
-		arena_metadata_allocated_add(iaalloc(ret), isalloc(ret,
+		arena_metadata_allocated_add(iaalloc(ret), isalloc(tsd, ret,
 		    config_prof));
 	}
 	return (ret);
@@ -982,7 +986,7 @@ ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
 	ret = arena_palloc(tsd, arena, usize, alignment, zero, tcache);
 	assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret);
 	if (config_stats && is_metadata && likely(ret != NULL)) {
-		arena_metadata_allocated_add(iaalloc(ret), isalloc(ret,
+		arena_metadata_allocated_add(iaalloc(ret), isalloc(tsd, ret,
 		    config_prof));
 	}
 	return (ret);
@@ -1005,7 +1009,7 @@ ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero)
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-ivsalloc(const void *ptr, bool demote)
+ivsalloc(tsd_t *tsd, const void *ptr, bool demote)
 {
 	extent_node_t *node;
 
@@ -1017,7 +1021,7 @@ ivsalloc(const void *ptr, bool demote)
 	assert(extent_node_addr_get(node) == ptr ||
 	    extent_node_achunk_get(node));
 
-	return (isalloc(ptr, demote));
+	return (isalloc(tsd, ptr, demote));
 }
 
 JEMALLOC_INLINE size_t
@@ -1035,9 +1039,9 @@ u2rz(size_t usize)
 }
 
 JEMALLOC_INLINE size_t
-p2rz(const void *ptr)
+p2rz(tsd_t *tsd, const void *ptr)
 {
-	size_t usize = isalloc(ptr, false);
+	size_t usize = isalloc(tsd, ptr, false);
 
 	return (u2rz(usize));
 }
@@ -1049,7 +1053,7 @@ idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata,
 
 	assert(ptr != NULL);
 	if (config_stats && is_metadata) {
-		arena_metadata_allocated_sub(iaalloc(ptr), isalloc(ptr,
+		arena_metadata_allocated_sub(iaalloc(ptr), isalloc(tsd, ptr,
 		    config_prof));
 	}
 
diff --git a/include/jemalloc/internal/mb.h b/include/jemalloc/internal/mb.h
index 3cfa7872..de54f508 100644
--- a/include/jemalloc/internal/mb.h
+++ b/include/jemalloc/internal/mb.h
@@ -104,9 +104,9 @@ mb_write(void)
 {
 	malloc_mutex_t mtx;
 
-	malloc_mutex_init(&mtx);
-	malloc_mutex_lock(&mtx);
-	malloc_mutex_unlock(&mtx);
+	malloc_mutex_init(&mtx, MALLOC_MUTEX_RANK_OMIT);
+	malloc_mutex_lock(NULL, &mtx);
+	malloc_mutex_unlock(NULL, &mtx);
 }
 #endif
 #endif
diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index f051f291..7d19a0f4 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -6,17 +6,21 @@ typedef struct malloc_mutex_s malloc_mutex_t;
 #ifdef _WIN32
 #  define MALLOC_MUTEX_INITIALIZER
 #elif (defined(JEMALLOC_OSSPIN))
-#  define MALLOC_MUTEX_INITIALIZER {0}
+#  define MALLOC_MUTEX_INITIALIZER {0, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)}
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
-#  define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER, NULL}
+#  define MALLOC_MUTEX_INITIALIZER					\
+    {PTHREAD_MUTEX_INITIALIZER, NULL, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)}
 #else
 #  if (defined(JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP) &&		\
        defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP))
 #    define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_ADAPTIVE_NP
-#    define MALLOC_MUTEX_INITIALIZER {PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP}
+#    define MALLOC_MUTEX_INITIALIZER					\
+       {PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP,				\
+        WITNESS_INITIALIZER(WITNESS_RANK_OMIT)}
 #  else
 #    define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT
-#    define MALLOC_MUTEX_INITIALIZER {PTHREAD_MUTEX_INITIALIZER}
+#    define MALLOC_MUTEX_INITIALIZER					\
+       {PTHREAD_MUTEX_INITIALIZER, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)}
 #  endif
 #endif
 
@@ -39,6 +43,7 @@ struct malloc_mutex_s {
 #else
 	pthread_mutex_t		lock;
 #endif
+	witness_t		witness;
 };
 
 #endif /* JEMALLOC_H_STRUCTS */
@@ -52,27 +57,31 @@ extern bool isthreaded;
 #  define isthreaded true
 #endif
 
-bool	malloc_mutex_init(malloc_mutex_t *mutex);
-void	malloc_mutex_prefork(malloc_mutex_t *mutex);
-void	malloc_mutex_postfork_parent(malloc_mutex_t *mutex);
-void	malloc_mutex_postfork_child(malloc_mutex_t *mutex);
-bool	mutex_boot(void);
+bool	malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
+    witness_rank_t rank);
+void	malloc_mutex_prefork(tsd_t *tsd, malloc_mutex_t *mutex);
+void	malloc_mutex_postfork_parent(tsd_t *tsd, malloc_mutex_t *mutex);
+void	malloc_mutex_postfork_child(tsd_t *tsd, malloc_mutex_t *mutex);
+bool	malloc_mutex_boot(void);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
-void	malloc_mutex_lock(malloc_mutex_t *mutex);
-void	malloc_mutex_unlock(malloc_mutex_t *mutex);
+void	malloc_mutex_lock(tsd_t *tsd, malloc_mutex_t *mutex);
+void	malloc_mutex_unlock(tsd_t *tsd, malloc_mutex_t *mutex);
+void	malloc_mutex_assert_owner(tsd_t *tsd, malloc_mutex_t *mutex);
+void	malloc_mutex_assert_not_owner(tsd_t *tsd, malloc_mutex_t *mutex);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_))
 JEMALLOC_INLINE void
-malloc_mutex_lock(malloc_mutex_t *mutex)
+malloc_mutex_lock(tsd_t *tsd, malloc_mutex_t *mutex)
 {
 
 	if (isthreaded) {
+		witness_assert_not_owner(tsd, &mutex->witness);
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 		AcquireSRWLockExclusive(&mutex->lock);
@@ -84,14 +93,19 @@ malloc_mutex_lock(malloc_mutex_t *mutex)
 #else
 		pthread_mutex_lock(&mutex->lock);
 #endif
+		if (config_debug)
+			witness_lock(tsd, &mutex->witness);
 	}
 }
 
 JEMALLOC_INLINE void
-malloc_mutex_unlock(malloc_mutex_t *mutex)
+malloc_mutex_unlock(tsd_t *tsd, malloc_mutex_t *mutex)
 {
 
 	if (isthreaded) {
+		witness_assert_owner(tsd, &mutex->witness);
+		if (config_debug)
+			witness_unlock(tsd, &mutex->witness);
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 		ReleaseSRWLockExclusive(&mutex->lock);
@@ -105,6 +119,22 @@ malloc_mutex_unlock(malloc_mutex_t *mutex)
 #endif
 	}
 }
+
+JEMALLOC_INLINE void
+malloc_mutex_assert_owner(tsd_t *tsd, malloc_mutex_t *mutex)
+{
+
+	if (config_debug)
+		witness_assert_owner(tsd, &mutex->witness);
+}
+
+JEMALLOC_INLINE void
+malloc_mutex_assert_not_owner(tsd_t *tsd, malloc_mutex_t *mutex)
+{
+
+	if (config_debug)
+		witness_assert_not_owner(tsd, &mutex->witness);
+}
 #endif
 
 #endif /* JEMALLOC_H_INLINES */
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 551cb937..be5d30e7 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -314,6 +314,9 @@ lg_floor
 malloc_cprintf
 malloc_mutex_init
 malloc_mutex_lock
+malloc_mutex_assert_not_owner
+malloc_mutex_assert_owner
+malloc_mutex_boot
 malloc_mutex_postfork_child
 malloc_mutex_postfork_parent
 malloc_mutex_prefork
@@ -333,7 +336,6 @@ malloc_write
 map_bias
 map_misc_offset
 mb_write
-mutex_boot
 narenas_tdata_cleanup
 narenas_total_get
 ncpus
@@ -548,3 +550,14 @@ valgrind_freelike_block
 valgrind_make_mem_defined
 valgrind_make_mem_noaccess
 valgrind_make_mem_undefined
+witness_assert_lockless
+witness_assert_not_owner
+witness_assert_owner
+witness_init
+witness_lock
+witness_lock_error
+witness_lockless_error
+witness_not_owner_error
+witness_owner_error
+witness_unlock
+witnesses_cleanup
diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h
index a25502a9..047bd0b7 100644
--- a/include/jemalloc/internal/prof.h
+++ b/include/jemalloc/internal/prof.h
@@ -281,7 +281,7 @@ extern uint64_t	prof_interval;
 extern size_t	lg_prof_sample;
 
 void	prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
-void	prof_malloc_sample_object(const void *ptr, size_t usize,
+void	prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
     prof_tctx_t *tctx);
 void	prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx);
 void	bt_init(prof_bt_t *bt, void **vec);
@@ -293,32 +293,32 @@ size_t	prof_bt_count(void);
 const prof_cnt_t *prof_cnt_all(void);
 typedef int (prof_dump_open_t)(bool, const char *);
 extern prof_dump_open_t *prof_dump_open;
-typedef bool (prof_dump_header_t)(bool, const prof_cnt_t *);
+typedef bool (prof_dump_header_t)(tsd_t *, bool, const prof_cnt_t *);
 extern prof_dump_header_t *prof_dump_header;
 #endif
-void	prof_idump(void);
-bool	prof_mdump(const char *filename);
-void	prof_gdump(void);
+void	prof_idump(tsd_t *tsd);
+bool	prof_mdump(tsd_t *tsd, const char *filename);
+void	prof_gdump(tsd_t *tsd);
 prof_tdata_t	*prof_tdata_init(tsd_t *tsd);
 prof_tdata_t	*prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
 void	prof_reset(tsd_t *tsd, size_t lg_sample);
 void	prof_tdata_cleanup(tsd_t *tsd);
-const char	*prof_thread_name_get(void);
-bool	prof_active_get(void);
-bool	prof_active_set(bool active);
+const char	*prof_thread_name_get(tsd_t *tsd);
+bool	prof_active_get(tsd_t *tsd);
+bool	prof_active_set(tsd_t *tsd, bool active);
 int	prof_thread_name_set(tsd_t *tsd, const char *thread_name);
-bool	prof_thread_active_get(void);
-bool	prof_thread_active_set(bool active);
-bool	prof_thread_active_init_get(void);
-bool	prof_thread_active_init_set(bool active_init);
-bool	prof_gdump_get(void);
-bool	prof_gdump_set(bool active);
+bool	prof_thread_active_get(tsd_t *tsd);
+bool	prof_thread_active_set(tsd_t *tsd, bool active);
+bool	prof_thread_active_init_get(tsd_t *tsd);
+bool	prof_thread_active_init_set(tsd_t *tsd, bool active_init);
+bool	prof_gdump_get(tsd_t *tsd);
+bool	prof_gdump_set(tsd_t *tsd, bool active);
 void	prof_boot0(void);
 void	prof_boot1(void);
-bool	prof_boot2(void);
-void	prof_prefork(void);
-void	prof_postfork_parent(void);
-void	prof_postfork_child(void);
+bool	prof_boot2(tsd_t *tsd);
+void	prof_prefork(tsd_t *tsd);
+void	prof_postfork_parent(tsd_t *tsd);
+void	prof_postfork_child(tsd_t *tsd);
 void	prof_sample_threshold_update(prof_tdata_t *tdata);
 
 #endif /* JEMALLOC_H_EXTERNS */
@@ -329,17 +329,17 @@ void	prof_sample_threshold_update(prof_tdata_t *tdata);
 bool	prof_active_get_unlocked(void);
 bool	prof_gdump_get_unlocked(void);
 prof_tdata_t	*prof_tdata_get(tsd_t *tsd, bool create);
+prof_tctx_t	*prof_tctx_get(tsd_t *tsd, const void *ptr);
+void	prof_tctx_set(tsd_t *tsd, const void *ptr, size_t usize,
+    prof_tctx_t *tctx);
+void	prof_tctx_reset(tsd_t *tsd, const void *ptr, size_t usize,
+    const void *old_ptr, prof_tctx_t *tctx);
 bool	prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit,
     prof_tdata_t **tdata_out);
 prof_tctx_t	*prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active,
     bool update);
-prof_tctx_t	*prof_tctx_get(const void *ptr);
-void	prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx);
-void	prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr,
+void	prof_malloc(tsd_t *tsd, const void *ptr, size_t usize,
     prof_tctx_t *tctx);
-void	prof_malloc_sample_object(const void *ptr, size_t usize,
-    prof_tctx_t *tctx);
-void	prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx);
 void	prof_realloc(tsd_t *tsd, const void *ptr, size_t usize,
     prof_tctx_t *tctx, bool prof_active, bool updated, const void *old_ptr,
     size_t old_usize, prof_tctx_t *old_tctx);
@@ -397,34 +397,34 @@ prof_tdata_get(tsd_t *tsd, bool create)
 }
 
 JEMALLOC_ALWAYS_INLINE prof_tctx_t *
-prof_tctx_get(const void *ptr)
+prof_tctx_get(tsd_t *tsd, const void *ptr)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	return (arena_prof_tctx_get(ptr));
+	return (arena_prof_tctx_get(tsd, ptr));
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_tctx_set(const void *ptr, size_t usize, prof_tctx_t *tctx)
+prof_tctx_set(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	arena_prof_tctx_set(ptr, usize, tctx);
+	arena_prof_tctx_set(tsd, ptr, usize, tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_tctx_reset(const void *ptr, size_t usize, const void *old_ptr,
+prof_tctx_reset(tsd_t *tsd, const void *ptr, size_t usize, const void *old_ptr,
     prof_tctx_t *old_tctx)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	arena_prof_tctx_reset(ptr, usize, old_ptr, old_tctx);
+	arena_prof_tctx_reset(tsd, ptr, usize, old_ptr, old_tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -479,17 +479,17 @@ prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update)
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_malloc(const void *ptr, size_t usize, prof_tctx_t *tctx)
+prof_malloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
-	assert(usize == isalloc(ptr, true));
+	assert(usize == isalloc(tsd, ptr, true));
 
 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
-		prof_malloc_sample_object(ptr, usize, tctx);
+		prof_malloc_sample_object(tsd, ptr, usize, tctx);
 	else
-		prof_tctx_set(ptr, usize, (prof_tctx_t *)(uintptr_t)1U);
+		prof_tctx_set(tsd, ptr, usize, (prof_tctx_t *)(uintptr_t)1U);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -503,7 +503,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 	assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
 
 	if (prof_active && !updated && ptr != NULL) {
-		assert(usize == isalloc(ptr, true));
+		assert(usize == isalloc(tsd, ptr, true));
 		if (prof_sample_accum_update(tsd, usize, true, NULL)) {
 			/*
 			 * Don't sample.  The usize passed to prof_alloc_prep()
@@ -520,9 +520,9 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 	old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U);
 
 	if (unlikely(sampled))
-		prof_malloc_sample_object(ptr, usize, tctx);
+		prof_malloc_sample_object(tsd, ptr, usize, tctx);
 	else
-		prof_tctx_reset(ptr, usize, old_ptr, old_tctx);
+		prof_tctx_reset(tsd, ptr, usize, old_ptr, old_tctx);
 
 	if (unlikely(old_sampled))
 		prof_free_sampled_object(tsd, old_usize, old_tctx);
@@ -531,10 +531,10 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 JEMALLOC_ALWAYS_INLINE void
 prof_free(tsd_t *tsd, const void *ptr, size_t usize)
 {
-	prof_tctx_t *tctx = prof_tctx_get(ptr);
+	prof_tctx_t *tctx = prof_tctx_get(tsd, ptr);
 
 	cassert(config_prof);
-	assert(usize == isalloc(ptr, true));
+	assert(usize == isalloc(tsd, ptr, true));
 
 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
 		prof_free_sampled_object(tsd, usize, tctx);
diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h
index 1edd39fd..1aa64631 100644
--- a/include/jemalloc/internal/tcache.h
+++ b/include/jemalloc/internal/tcache.h
@@ -130,7 +130,7 @@ extern size_t	tcache_maxclass;
  */
 extern tcaches_t	*tcaches;
 
-size_t	tcache_salloc(const void *ptr);
+size_t	tcache_salloc(tsd_t *tsd, const void *ptr);
 void	tcache_event_hard(tsd_t *tsd, tcache_t *tcache);
 void	*tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
     tcache_bin_t *tbin, szind_t binind, bool *tcache_success);
@@ -138,19 +138,19 @@ void	tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
     szind_t binind, unsigned rem);
 void	tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
     unsigned rem, tcache_t *tcache);
-void	tcache_arena_associate(tcache_t *tcache, arena_t *arena);
-void	tcache_arena_reassociate(tcache_t *tcache, arena_t *oldarena,
-    arena_t *newarena);
-void	tcache_arena_dissociate(tcache_t *tcache, arena_t *arena);
+void	tcache_arena_associate(tsd_t *tsd, tcache_t *tcache, arena_t *arena);
+void	tcache_arena_reassociate(tsd_t *tsd, tcache_t *tcache,
+    arena_t *oldarena, arena_t *newarena);
+void	tcache_arena_dissociate(tsd_t *tsd, tcache_t *tcache, arena_t *arena);
 tcache_t *tcache_get_hard(tsd_t *tsd);
 tcache_t *tcache_create(tsd_t *tsd, arena_t *arena);
 void	tcache_cleanup(tsd_t *tsd);
 void	tcache_enabled_cleanup(tsd_t *tsd);
-void	tcache_stats_merge(tcache_t *tcache, arena_t *arena);
+void	tcache_stats_merge(tsd_t *tsd, tcache_t *tcache, arena_t *arena);
 bool	tcaches_create(tsd_t *tsd, unsigned *r_ind);
 void	tcaches_flush(tsd_t *tsd, unsigned ind);
 void	tcaches_destroy(tsd_t *tsd, unsigned ind);
-bool	tcache_boot(void);
+bool	tcache_boot(tsd_t *tsd);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
@@ -310,7 +310,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 	 */
 	if (config_prof || (slow_path && config_fill) || unlikely(zero)) {
 		usize = index2size(binind);
-		assert(tcache_salloc(ret) == usize);
+		assert(tcache_salloc(tsd, ret) == usize);
 	}
 
 	if (likely(!zero)) {
@@ -407,7 +407,7 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	tcache_bin_t *tbin;
 	tcache_bin_info_t *tbin_info;
 
-	assert(tcache_salloc(ptr) <= SMALL_MAXCLASS);
+	assert(tcache_salloc(tsd, ptr) <= SMALL_MAXCLASS);
 
 	if (slow_path && config_fill && unlikely(opt_junk_free))
 		arena_dalloc_junk_small(ptr, &arena_bin_info[binind]);
@@ -434,8 +434,8 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size,
 	tcache_bin_info_t *tbin_info;
 
 	assert((size & PAGE_MASK) == 0);
-	assert(tcache_salloc(ptr) > SMALL_MAXCLASS);
-	assert(tcache_salloc(ptr) <= tcache_maxclass);
+	assert(tcache_salloc(tsd, ptr) > SMALL_MAXCLASS);
+	assert(tcache_salloc(tsd, ptr) <= tcache_maxclass);
 
 	binind = size2index(size);
 
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 16cc2f17..b23b3b4c 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -542,6 +542,7 @@ struct tsd_init_head_s {
     O(arenas_tdata_bypass,	bool)					\
     O(tcache_enabled,		tcache_enabled_t)			\
     O(quarantine,		quarantine_t *)				\
+    O(witnesses,		witness_list_t)				\
 
 #define	TSD_INITIALIZER {						\
     tsd_state_uninitialized,						\
@@ -554,7 +555,8 @@ struct tsd_init_head_s {
     0,									\
     false,								\
     tcache_enabled_default,						\
-    NULL								\
+    NULL,								\
+    ql_head_initializer(witnesses)					\
 }
 
 struct tsd_s {
@@ -577,7 +579,7 @@ void	*malloc_tsd_malloc(size_t size);
 void	malloc_tsd_dalloc(void *wrapper);
 void	malloc_tsd_no_cleanup(void *arg);
 void	malloc_tsd_cleanup_register(bool (*f)(void));
-bool	malloc_tsd_boot0(void);
+tsd_t	*malloc_tsd_boot0(void);
 void	malloc_tsd_boot1(void);
 #if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \
     !defined(_WIN32))
diff --git a/include/jemalloc/internal/valgrind.h b/include/jemalloc/internal/valgrind.h
index a3380df9..7c6a62fa 100644
--- a/include/jemalloc/internal/valgrind.h
+++ b/include/jemalloc/internal/valgrind.h
@@ -30,15 +30,17 @@
  * calls must be embedded in macros rather than in functions so that when
  * Valgrind reports errors, there are no extra stack frames in the backtraces.
  */
-#define	JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do {		\
-	if (unlikely(in_valgrind && cond))				\
-		VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(ptr), zero);	\
+#define	JEMALLOC_VALGRIND_MALLOC(cond, tsd, ptr, usize, zero) do {	\
+	if (unlikely(in_valgrind && cond)) {				\
+		VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(tsd, ptr),	\
+		    zero);						\
+	}								\
 } while (0)
-#define	JEMALLOC_VALGRIND_REALLOC(maybe_moved, ptr, usize,		\
+#define	JEMALLOC_VALGRIND_REALLOC(maybe_moved, tsd, ptr, usize,		\
     ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null,	\
     zero) do {								\
 	if (unlikely(in_valgrind)) {					\
-		size_t rzsize = p2rz(ptr);				\
+		size_t rzsize = p2rz(tsd, ptr);				\
 									\
 		if (!maybe_moved || ptr == old_ptr) {			\
 			VALGRIND_RESIZEINPLACE_BLOCK(ptr, old_usize,	\
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
new file mode 100644
index 00000000..22f0b2c7
--- /dev/null
+++ b/include/jemalloc/internal/witness.h
@@ -0,0 +1,103 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct witness_s witness_t;
+typedef unsigned witness_rank_t;
+typedef ql_head(witness_t) witness_list_t;
+typedef int witness_comp_t (const witness_t *, const witness_t *);
+
+/*
+ * Lock ranks.  Witnesses with rank WITNESS_RANK_OMIT are completely ignored by
+ * the witness machinery.
+ */
+#define	WITNESS_RANK_OMIT		0U
+
+#define	WITNESS_RANK_INIT		1U
+#define	WITNESS_RANK_CTL		1U
+#define	WITNESS_RANK_ARENAS		2U
+
+#define	WITNESS_RANK_PROF_DUMP		3U
+#define	WITNESS_RANK_PROF_BT2GCTX	4U
+#define	WITNESS_RANK_PROF_TDATAS	5U
+#define	WITNESS_RANK_PROF_TDATA		6U
+#define	WITNESS_RANK_PROF_GCTX		7U
+
+#define	WITNESS_RANK_ARENA		8U
+#define	WITNESS_RANK_ARENA_CHUNKS	9U
+#define	WITNESS_RANK_ARENA_NODE_CACHE	10
+
+#define	WITNESS_RANK_BASE		11U
+
+#define	WITNESS_RANK_LEAF		0xffffffffU
+#define	WITNESS_RANK_ARENA_BIN		WITNESS_RANK_LEAF
+#define	WITNESS_RANK_ARENA_HUGE		WITNESS_RANK_LEAF
+#define	WITNESS_RANK_DSS		WITNESS_RANK_LEAF
+#define	WITNESS_RANK_PROF_ACTIVE	WITNESS_RANK_LEAF
+#define	WITNESS_RANK_PROF_DUMP_SEQ	WITNESS_RANK_LEAF
+#define	WITNESS_RANK_PROF_GDUMP		WITNESS_RANK_LEAF
+#define	WITNESS_RANK_PROF_NEXT_THR_UID	WITNESS_RANK_LEAF
+#define	WITNESS_RANK_PROF_THREAD_ACTIVE_INIT	WITNESS_RANK_LEAF
+
+#define	WITNESS_INITIALIZER(rank) {"initializer", rank, NULL, {NULL, NULL}}
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+struct witness_s {
+	/* Name, used for printing lock order reversal messages. */
+	const char		*name;
+
+	/*
+	 * Witness rank, where 0 is lowest and UINT_MAX is highest.  Witnesses
+	 * must be acquired in order of increasing rank.
+	 */
+	witness_rank_t		rank;
+
+	/*
+	 * If two witnesses are of equal rank and they have the samp comp
+	 * function pointer, it is called as a last attempt to differentiate
+	 * between witnesses of equal rank.
+	 */
+	witness_comp_t		*comp;
+
+	/* Linkage for thread's currently owned locks. */
+	ql_elm(witness_t)	link;
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+void	witness_init(witness_t *witness, const char *name, witness_rank_t rank,
+    witness_comp_t *comp);
+#ifdef JEMALLOC_JET
+typedef void (witness_lock_error_t)(const witness_list_t *, const witness_t *);
+extern witness_lock_error_t *witness_lock_error;
+#endif
+void	witness_lock(tsd_t *tsd, witness_t *witness);
+void	witness_unlock(tsd_t *tsd, witness_t *witness);
+#ifdef JEMALLOC_JET
+typedef void (witness_owner_error_t)(const witness_t *);
+extern witness_owner_error_t *witness_owner_error;
+#endif
+void	witness_assert_owner(tsd_t *tsd, const witness_t *witness);
+#ifdef JEMALLOC_JET
+typedef void (witness_not_owner_error_t)(const witness_t *);
+extern witness_not_owner_error_t *witness_not_owner_error;
+#endif
+void	witness_assert_not_owner(tsd_t *tsd, const witness_t *witness);
+#ifdef JEMALLOC_JET
+typedef void (witness_lockless_error_t)(const witness_list_t *);
+extern witness_lockless_error_t *witness_lockless_error;
+#endif
+void	witness_assert_lockless(tsd_t *tsd);
+
+void	witnesses_cleanup(tsd_t *tsd);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/src/arena.c b/src/arena.c
index a9566af1..cc648e31 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -37,11 +37,12 @@ static szind_t	runs_avail_nclasses; /* Number of runs_avail trees. */
  * definition.
  */
 
-static void	arena_purge_to_limit(arena_t *arena, size_t ndirty_limit);
-static void	arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty,
-    bool cleaned, bool decommitted);
-static void	arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk,
-    arena_run_t *run, arena_bin_t *bin);
+static void	arena_purge_to_limit(tsd_t *tsd, arena_t *arena,
+    size_t ndirty_limit);
+static void	arena_run_dalloc(tsd_t *tsd, arena_t *arena, arena_run_t *run,
+    bool dirty, bool cleaned, bool decommitted);
+static void	arena_dalloc_bin_run(tsd_t *tsd, arena_t *arena,
+    arena_chunk_t *chunk, arena_run_t *run, arena_bin_t *bin);
 static void	arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk,
     arena_run_t *run, arena_bin_t *bin);
 
@@ -591,7 +592,8 @@ arena_chunk_init_spare(arena_t *arena)
 }
 
 static bool
-arena_chunk_register(arena_t *arena, arena_chunk_t *chunk, bool zero)
+arena_chunk_register(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+    bool zero)
 {
 
 	/*
@@ -602,62 +604,62 @@ arena_chunk_register(arena_t *arena, arena_chunk_t *chunk, bool zero)
 	 */
 	extent_node_init(&chunk->node, arena, chunk, chunksize, zero, true);
 	extent_node_achunk_set(&chunk->node, true);
-	return (chunk_register(chunk, &chunk->node));
+	return (chunk_register(tsd, chunk, &chunk->node));
 }
 
 static arena_chunk_t *
-arena_chunk_alloc_internal_hard(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    bool *zero, bool *commit)
+arena_chunk_alloc_internal_hard(tsd_t *tsd, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, bool *zero, bool *commit)
 {
 	arena_chunk_t *chunk;
 
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsd, &arena->lock);
 
-	chunk = (arena_chunk_t *)chunk_alloc_wrapper(arena, chunk_hooks, NULL,
-	    chunksize, chunksize, zero, commit);
+	chunk = (arena_chunk_t *)chunk_alloc_wrapper(tsd, arena, chunk_hooks,
+	    NULL, chunksize, chunksize, zero, commit);
 	if (chunk != NULL && !*commit) {
 		/* Commit header. */
 		if (chunk_hooks->commit(chunk, chunksize, 0, map_bias <<
 		    LG_PAGE, arena->ind)) {
-			chunk_dalloc_wrapper(arena, chunk_hooks, (void *)chunk,
-			    chunksize, *zero, *commit);
+			chunk_dalloc_wrapper(tsd, arena, chunk_hooks,
+			    (void *)chunk, chunksize, *zero, *commit);
 			chunk = NULL;
 		}
 	}
-	if (chunk != NULL && arena_chunk_register(arena, chunk, *zero)) {
+	if (chunk != NULL && arena_chunk_register(tsd, arena, chunk, *zero)) {
 		if (!*commit) {
 			/* Undo commit of header. */
 			chunk_hooks->decommit(chunk, chunksize, 0, map_bias <<
 			    LG_PAGE, arena->ind);
 		}
-		chunk_dalloc_wrapper(arena, chunk_hooks, (void *)chunk,
+		chunk_dalloc_wrapper(tsd, arena, chunk_hooks, (void *)chunk,
 		    chunksize, *zero, *commit);
 		chunk = NULL;
 	}
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
 	return (chunk);
 }
 
 static arena_chunk_t *
-arena_chunk_alloc_internal(arena_t *arena, bool *zero, bool *commit)
+arena_chunk_alloc_internal(tsd_t *tsd, arena_t *arena, bool *zero, bool *commit)
 {
 	arena_chunk_t *chunk;
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 
-	chunk = chunk_alloc_cache(arena, &chunk_hooks, NULL, chunksize,
+	chunk = chunk_alloc_cache(tsd, arena, &chunk_hooks, NULL, chunksize,
 	    chunksize, zero, true);
 	if (chunk != NULL) {
-		if (arena_chunk_register(arena, chunk, *zero)) {
-			chunk_dalloc_cache(arena, &chunk_hooks, chunk,
+		if (arena_chunk_register(tsd, arena, chunk, *zero)) {
+			chunk_dalloc_cache(tsd, arena, &chunk_hooks, chunk,
 			    chunksize, true);
 			return (NULL);
 		}
 		*commit = true;
 	}
 	if (chunk == NULL) {
-		chunk = arena_chunk_alloc_internal_hard(arena, &chunk_hooks,
-		    zero, commit);
+		chunk = arena_chunk_alloc_internal_hard(tsd, arena,
+		    &chunk_hooks, zero, commit);
 	}
 
 	if (config_stats && chunk != NULL) {
@@ -669,7 +671,7 @@ arena_chunk_alloc_internal(arena_t *arena, bool *zero, bool *commit)
 }
 
 static arena_chunk_t *
-arena_chunk_init_hard(arena_t *arena)
+arena_chunk_init_hard(tsd_t *tsd, arena_t *arena)
 {
 	arena_chunk_t *chunk;
 	bool zero, commit;
@@ -679,7 +681,7 @@ arena_chunk_init_hard(arena_t *arena)
 
 	zero = false;
 	commit = false;
-	chunk = arena_chunk_alloc_internal(arena, &zero, &commit);
+	chunk = arena_chunk_alloc_internal(tsd, arena, &zero, &commit);
 	if (chunk == NULL)
 		return (NULL);
 
@@ -724,14 +726,14 @@ arena_chunk_init_hard(arena_t *arena)
 }
 
 static arena_chunk_t *
-arena_chunk_alloc(arena_t *arena)
+arena_chunk_alloc(tsd_t *tsd, arena_t *arena)
 {
 	arena_chunk_t *chunk;
 
 	if (arena->spare != NULL)
 		chunk = arena_chunk_init_spare(arena);
 	else {
-		chunk = arena_chunk_init_hard(arena);
+		chunk = arena_chunk_init_hard(tsd, arena);
 		if (chunk == NULL)
 			return (NULL);
 	}
@@ -742,7 +744,7 @@ arena_chunk_alloc(arena_t *arena)
 }
 
 static void
-arena_chunk_dalloc(arena_t *arena, arena_chunk_t *chunk)
+arena_chunk_dalloc(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk)
 {
 
 	assert(arena_mapbits_allocated_get(chunk, map_bias) == 0);
@@ -782,12 +784,12 @@ arena_chunk_dalloc(arena_t *arena, arena_chunk_t *chunk)
 			 * potential for causing later access of decommitted
 			 * memory.
 			 */
-			chunk_hooks = chunk_hooks_get(arena);
+			chunk_hooks = chunk_hooks_get(tsd, arena);
 			chunk_hooks.decommit(spare, chunksize, 0, map_bias <<
 			    LG_PAGE, arena->ind);
 		}
 
-		chunk_dalloc_cache(arena, &chunk_hooks, (void *)spare,
+		chunk_dalloc_cache(tsd, arena, &chunk_hooks, (void *)spare,
 		    chunksize, committed);
 
 		if (config_stats) {
@@ -868,63 +870,64 @@ arena_huge_ralloc_stats_update_undo(arena_t *arena, size_t oldsize,
 }
 
 extent_node_t *
-arena_node_alloc(arena_t *arena)
+arena_node_alloc(tsd_t *tsd, arena_t *arena)
 {
 	extent_node_t *node;
 
-	malloc_mutex_lock(&arena->node_cache_mtx);
+	malloc_mutex_lock(tsd, &arena->node_cache_mtx);
 	node = ql_last(&arena->node_cache, ql_link);
 	if (node == NULL) {
-		malloc_mutex_unlock(&arena->node_cache_mtx);
-		return (base_alloc(sizeof(extent_node_t)));
+		malloc_mutex_unlock(tsd, &arena->node_cache_mtx);
+		return (base_alloc(tsd, sizeof(extent_node_t)));
 	}
 	ql_tail_remove(&arena->node_cache, extent_node_t, ql_link);
-	malloc_mutex_unlock(&arena->node_cache_mtx);
+	malloc_mutex_unlock(tsd, &arena->node_cache_mtx);
 	return (node);
 }
 
 void
-arena_node_dalloc(arena_t *arena, extent_node_t *node)
+arena_node_dalloc(tsd_t *tsd, arena_t *arena, extent_node_t *node)
 {
 
-	malloc_mutex_lock(&arena->node_cache_mtx);
+	malloc_mutex_lock(tsd, &arena->node_cache_mtx);
 	ql_elm_new(node, ql_link);
 	ql_tail_insert(&arena->node_cache, node, ql_link);
-	malloc_mutex_unlock(&arena->node_cache_mtx);
+	malloc_mutex_unlock(tsd, &arena->node_cache_mtx);
 }
 
 static void *
-arena_chunk_alloc_huge_hard(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    size_t usize, size_t alignment, bool *zero, size_t csize)
+arena_chunk_alloc_huge_hard(tsd_t *tsd, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, size_t usize, size_t alignment, bool *zero,
+    size_t csize)
 {
 	void *ret;
 	bool commit = true;
 
-	ret = chunk_alloc_wrapper(arena, chunk_hooks, NULL, csize, alignment,
-	    zero, &commit);
+	ret = chunk_alloc_wrapper(tsd, arena, chunk_hooks, NULL, csize,
+	    alignment, zero, &commit);
 	if (ret == NULL) {
 		/* Revert optimistic stats updates. */
-		malloc_mutex_lock(&arena->lock);
+		malloc_mutex_lock(tsd, &arena->lock);
 		if (config_stats) {
 			arena_huge_malloc_stats_update_undo(arena, usize);
 			arena->stats.mapped -= usize;
 		}
 		arena_nactive_sub(arena, usize >> LG_PAGE);
-		malloc_mutex_unlock(&arena->lock);
+		malloc_mutex_unlock(tsd, &arena->lock);
 	}
 
 	return (ret);
 }
 
 void *
-arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment,
-    bool *zero)
+arena_chunk_alloc_huge(tsd_t *tsd, arena_t *arena, size_t usize,
+    size_t alignment, bool *zero)
 {
 	void *ret;
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 	size_t csize = CHUNK_CEILING(usize);
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
 
 	/* Optimistically update stats. */
 	if (config_stats) {
@@ -933,61 +936,61 @@ arena_chunk_alloc_huge(arena_t *arena, size_t usize, size_t alignment,
 	}
 	arena_nactive_add(arena, usize >> LG_PAGE);
 
-	ret = chunk_alloc_cache(arena, &chunk_hooks, NULL, csize, alignment,
-	    zero, true);
-	malloc_mutex_unlock(&arena->lock);
+	ret = chunk_alloc_cache(tsd, arena, &chunk_hooks, NULL, csize,
+	    alignment, zero, true);
+	malloc_mutex_unlock(tsd, &arena->lock);
 	if (ret == NULL) {
-		ret = arena_chunk_alloc_huge_hard(arena, &chunk_hooks, usize,
-		    alignment, zero, csize);
+		ret = arena_chunk_alloc_huge_hard(tsd, arena, &chunk_hooks,
+		    usize, alignment, zero, csize);
 	}
 
 	return (ret);
 }
 
 void
-arena_chunk_dalloc_huge(arena_t *arena, void *chunk, size_t usize)
+arena_chunk_dalloc_huge(tsd_t *tsd, arena_t *arena, void *chunk, size_t usize)
 {
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 	size_t csize;
 
 	csize = CHUNK_CEILING(usize);
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
 	if (config_stats) {
 		arena_huge_dalloc_stats_update(arena, usize);
 		arena->stats.mapped -= usize;
 	}
 	arena_nactive_sub(arena, usize >> LG_PAGE);
 
-	chunk_dalloc_cache(arena, &chunk_hooks, chunk, csize, true);
-	malloc_mutex_unlock(&arena->lock);
+	chunk_dalloc_cache(tsd, arena, &chunk_hooks, chunk, csize, true);
+	malloc_mutex_unlock(tsd, &arena->lock);
 }
 
 void
-arena_chunk_ralloc_huge_similar(arena_t *arena, void *chunk, size_t oldsize,
-    size_t usize)
+arena_chunk_ralloc_huge_similar(tsd_t *tsd, arena_t *arena, void *chunk,
+    size_t oldsize, size_t usize)
 {
 
 	assert(CHUNK_CEILING(oldsize) == CHUNK_CEILING(usize));
 	assert(oldsize != usize);
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
 	if (config_stats)
 		arena_huge_ralloc_stats_update(arena, oldsize, usize);
 	if (oldsize < usize)
 		arena_nactive_add(arena, (usize - oldsize) >> LG_PAGE);
 	else
 		arena_nactive_sub(arena, (oldsize - usize) >> LG_PAGE);
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsd, &arena->lock);
 }
 
 void
-arena_chunk_ralloc_huge_shrink(arena_t *arena, void *chunk, size_t oldsize,
-    size_t usize)
+arena_chunk_ralloc_huge_shrink(tsd_t *tsd, arena_t *arena, void *chunk,
+    size_t oldsize, size_t usize)
 {
 	size_t udiff = oldsize - usize;
 	size_t cdiff = CHUNK_CEILING(oldsize) - CHUNK_CEILING(usize);
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
 	if (config_stats) {
 		arena_huge_ralloc_stats_update(arena, oldsize, usize);
 		if (cdiff != 0)
@@ -1000,51 +1003,52 @@ arena_chunk_ralloc_huge_shrink(arena_t *arena, void *chunk, size_t oldsize,
 		void *nchunk = (void *)((uintptr_t)chunk +
 		    CHUNK_CEILING(usize));
 
-		chunk_dalloc_cache(arena, &chunk_hooks, nchunk, cdiff, true);
+		chunk_dalloc_cache(tsd, arena, &chunk_hooks, nchunk, cdiff,
+		    true);
 	}
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsd, &arena->lock);
 }
 
 static bool
-arena_chunk_ralloc_huge_expand_hard(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *chunk, size_t oldsize, size_t usize, bool *zero, void *nchunk,
-    size_t udiff, size_t cdiff)
+arena_chunk_ralloc_huge_expand_hard(tsd_t *tsd, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *chunk, size_t oldsize, size_t usize,
+    bool *zero, void *nchunk, size_t udiff, size_t cdiff)
 {
 	bool err;
 	bool commit = true;
 
-	err = (chunk_alloc_wrapper(arena, chunk_hooks, nchunk, cdiff, chunksize,
-	    zero, &commit) == NULL);
+	err = (chunk_alloc_wrapper(tsd, arena, chunk_hooks, nchunk, cdiff,
+	    chunksize, zero, &commit) == NULL);
 	if (err) {
 		/* Revert optimistic stats updates. */
-		malloc_mutex_lock(&arena->lock);
+		malloc_mutex_lock(tsd, &arena->lock);
 		if (config_stats) {
 			arena_huge_ralloc_stats_update_undo(arena, oldsize,
 			    usize);
 			arena->stats.mapped -= cdiff;
 		}
 		arena_nactive_sub(arena, udiff >> LG_PAGE);
-		malloc_mutex_unlock(&arena->lock);
+		malloc_mutex_unlock(tsd, &arena->lock);
 	} else if (chunk_hooks->merge(chunk, CHUNK_CEILING(oldsize), nchunk,
 	    cdiff, true, arena->ind)) {
-		chunk_dalloc_wrapper(arena, chunk_hooks, nchunk, cdiff, *zero,
-		    true);
+		chunk_dalloc_wrapper(tsd, arena, chunk_hooks, nchunk, cdiff,
+		    *zero, true);
 		err = true;
 	}
 	return (err);
 }
 
 bool
-arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, size_t oldsize,
-    size_t usize, bool *zero)
+arena_chunk_ralloc_huge_expand(tsd_t *tsd, arena_t *arena, void *chunk,
+    size_t oldsize, size_t usize, bool *zero)
 {
 	bool err;
-	chunk_hooks_t chunk_hooks = chunk_hooks_get(arena);
+	chunk_hooks_t chunk_hooks = chunk_hooks_get(tsd, arena);
 	void *nchunk = (void *)((uintptr_t)chunk + CHUNK_CEILING(oldsize));
 	size_t udiff = usize - oldsize;
 	size_t cdiff = CHUNK_CEILING(usize) - CHUNK_CEILING(oldsize);
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
 
 	/* Optimistically update stats. */
 	if (config_stats) {
@@ -1053,17 +1057,17 @@ arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, size_t oldsize,
 	}
 	arena_nactive_add(arena, udiff >> LG_PAGE);
 
-	err = (chunk_alloc_cache(arena, &chunk_hooks, nchunk, cdiff, chunksize,
-	    zero, true) == NULL);
-	malloc_mutex_unlock(&arena->lock);
+	err = (chunk_alloc_cache(tsd, arena, &chunk_hooks, nchunk, cdiff,
+	    chunksize, zero, true) == NULL);
+	malloc_mutex_unlock(tsd, &arena->lock);
 	if (err) {
-		err = arena_chunk_ralloc_huge_expand_hard(arena, &chunk_hooks,
-		    chunk, oldsize, usize, zero, nchunk, udiff,
+		err = arena_chunk_ralloc_huge_expand_hard(tsd, arena,
+		    &chunk_hooks, chunk, oldsize, usize, zero, nchunk, udiff,
 		    cdiff);
 	} else if (chunk_hooks.merge(chunk, CHUNK_CEILING(oldsize), nchunk,
 	    cdiff, true, arena->ind)) {
-		chunk_dalloc_wrapper(arena, &chunk_hooks, nchunk, cdiff, *zero,
-		    true);
+		chunk_dalloc_wrapper(tsd, arena, &chunk_hooks, nchunk, cdiff,
+		    *zero, true);
 		err = true;
 	}
 
@@ -1103,7 +1107,7 @@ arena_run_alloc_large_helper(arena_t *arena, size_t size, bool zero)
 }
 
 static arena_run_t *
-arena_run_alloc_large(arena_t *arena, size_t size, bool zero)
+arena_run_alloc_large(tsd_t *tsd, arena_t *arena, size_t size, bool zero)
 {
 	arena_chunk_t *chunk;
 	arena_run_t *run;
@@ -1119,7 +1123,7 @@ arena_run_alloc_large(arena_t *arena, size_t size, bool zero)
 	/*
 	 * No usable runs.  Create a new chunk from which to allocate the run.
 	 */
-	chunk = arena_chunk_alloc(arena);
+	chunk = arena_chunk_alloc(tsd, arena);
 	if (chunk != NULL) {
 		run = &arena_miscelm_get_mutable(chunk, map_bias)->run;
 		if (arena_run_split_large(arena, run, size, zero))
@@ -1147,7 +1151,7 @@ arena_run_alloc_small_helper(arena_t *arena, size_t size, szind_t binind)
 }
 
 static arena_run_t *
-arena_run_alloc_small(arena_t *arena, size_t size, szind_t binind)
+arena_run_alloc_small(tsd_t *tsd, arena_t *arena, size_t size, szind_t binind)
 {
 	arena_chunk_t *chunk;
 	arena_run_t *run;
@@ -1164,7 +1168,7 @@ arena_run_alloc_small(arena_t *arena, size_t size, szind_t binind)
 	/*
 	 * No usable runs.  Create a new chunk from which to allocate the run.
 	 */
-	chunk = arena_chunk_alloc(arena);
+	chunk = arena_chunk_alloc(tsd, arena);
 	if (chunk != NULL) {
 		run = &arena_miscelm_get_mutable(chunk, map_bias)->run;
 		if (arena_run_split_small(arena, run, size, binind))
@@ -1189,28 +1193,28 @@ arena_lg_dirty_mult_valid(ssize_t lg_dirty_mult)
 }
 
 ssize_t
-arena_lg_dirty_mult_get(arena_t *arena)
+arena_lg_dirty_mult_get(tsd_t *tsd, arena_t *arena)
 {
 	ssize_t lg_dirty_mult;
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
 	lg_dirty_mult = arena->lg_dirty_mult;
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsd, &arena->lock);
 
 	return (lg_dirty_mult);
 }
 
 bool
-arena_lg_dirty_mult_set(arena_t *arena, ssize_t lg_dirty_mult)
+arena_lg_dirty_mult_set(tsd_t *tsd, arena_t *arena, ssize_t lg_dirty_mult)
 {
 
 	if (!arena_lg_dirty_mult_valid(lg_dirty_mult))
 		return (true);
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
 	arena->lg_dirty_mult = lg_dirty_mult;
-	arena_maybe_purge(arena);
-	malloc_mutex_unlock(&arena->lock);
+	arena_maybe_purge(tsd, arena);
+	malloc_mutex_unlock(tsd, &arena->lock);
 
 	return (false);
 }
@@ -1367,25 +1371,25 @@ arena_decay_time_valid(ssize_t decay_time)
 }
 
 ssize_t
-arena_decay_time_get(arena_t *arena)
+arena_decay_time_get(tsd_t *tsd, arena_t *arena)
 {
 	ssize_t decay_time;
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
 	decay_time = arena->decay_time;
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsd, &arena->lock);
 
 	return (decay_time);
 }
 
 bool
-arena_decay_time_set(arena_t *arena, ssize_t decay_time)
+arena_decay_time_set(tsd_t *tsd, arena_t *arena, ssize_t decay_time)
 {
 
 	if (!arena_decay_time_valid(decay_time))
 		return (true);
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
 	/*
 	 * Restart decay backlog from scratch, which may cause many dirty pages
 	 * to be immediately purged.  It would conceptually be possible to map
@@ -1395,14 +1399,14 @@ arena_decay_time_set(arena_t *arena, ssize_t decay_time)
 	 * arbitrary change during initial arena configuration.
 	 */
 	arena_decay_init(arena, decay_time);
-	arena_maybe_purge(arena);
-	malloc_mutex_unlock(&arena->lock);
+	arena_maybe_purge(tsd, arena);
+	malloc_mutex_unlock(tsd, &arena->lock);
 
 	return (false);
 }
 
 static void
-arena_maybe_purge_ratio(arena_t *arena)
+arena_maybe_purge_ratio(tsd_t *tsd, arena_t *arena)
 {
 
 	assert(opt_purge == purge_mode_ratio);
@@ -1425,12 +1429,12 @@ arena_maybe_purge_ratio(arena_t *arena)
 		 */
 		if (arena->ndirty <= threshold)
 			return;
-		arena_purge_to_limit(arena, threshold);
+		arena_purge_to_limit(tsd, arena, threshold);
 	}
 }
 
 static void
-arena_maybe_purge_decay(arena_t *arena)
+arena_maybe_purge_decay(tsd_t *tsd, arena_t *arena)
 {
 	nstime_t time;
 	size_t ndirty_limit;
@@ -1440,7 +1444,7 @@ arena_maybe_purge_decay(arena_t *arena)
 	/* Purge all or nothing if the option is disabled. */
 	if (arena->decay_time <= 0) {
 		if (arena->decay_time == 0)
-			arena_purge_to_limit(arena, 0);
+			arena_purge_to_limit(tsd, arena, 0);
 		return;
 	}
 
@@ -1461,11 +1465,11 @@ arena_maybe_purge_decay(arena_t *arena)
 	 */
 	if (arena->ndirty <= ndirty_limit)
 		return;
-	arena_purge_to_limit(arena, ndirty_limit);
+	arena_purge_to_limit(tsd, arena, ndirty_limit);
 }
 
 void
-arena_maybe_purge(arena_t *arena)
+arena_maybe_purge(tsd_t *tsd, arena_t *arena)
 {
 
 	/* Don't recursively purge. */
@@ -1473,9 +1477,9 @@ arena_maybe_purge(arena_t *arena)
 		return;
 
 	if (opt_purge == purge_mode_ratio)
-		arena_maybe_purge_ratio(arena);
+		arena_maybe_purge_ratio(tsd, arena);
 	else
-		arena_maybe_purge_decay(arena);
+		arena_maybe_purge_decay(tsd, arena);
 }
 
 static size_t
@@ -1513,7 +1517,7 @@ arena_dirty_count(arena_t *arena)
 }
 
 static size_t
-arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks,
+arena_stash_dirty(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
     size_t ndirty_limit, arena_runs_dirty_link_t *purge_runs_sentinel,
     extent_node_t *purge_chunks_sentinel)
 {
@@ -1544,7 +1548,7 @@ arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks,
 			 * dalloc_node=false argument to chunk_alloc_cache().
 			 */
 			zero = false;
-			chunk = chunk_alloc_cache(arena, chunk_hooks,
+			chunk = chunk_alloc_cache(tsd, arena, chunk_hooks,
 			    extent_node_addr_get(chunkselm),
 			    extent_node_size_get(chunkselm), chunksize, &zero,
 			    false);
@@ -1579,7 +1583,7 @@ arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks,
 			 * prior to allocation.
 			 */
 			if (chunk == arena->spare)
-				arena_chunk_alloc(arena);
+				arena_chunk_alloc(tsd, arena);
 
 			/* Temporarily allocate the free dirty run. */
 			arena_run_split_large(arena, run, run_size, false);
@@ -1603,7 +1607,7 @@ arena_stash_dirty(arena_t *arena, chunk_hooks_t *chunk_hooks,
 }
 
 static size_t
-arena_purge_stashed(arena_t *arena, chunk_hooks_t *chunk_hooks,
+arena_purge_stashed(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
     arena_runs_dirty_link_t *purge_runs_sentinel,
     extent_node_t *purge_chunks_sentinel)
 {
@@ -1615,7 +1619,7 @@ arena_purge_stashed(arena_t *arena, chunk_hooks_t *chunk_hooks,
 		nmadvise = 0;
 	npurged = 0;
 
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsd, &arena->lock);
 	for (rdelm = qr_next(purge_runs_sentinel, rd_link),
 	    chunkselm = qr_next(purge_chunks_sentinel, cc_link);
 	    rdelm != purge_runs_sentinel; rdelm = qr_next(rdelm, rd_link)) {
@@ -1654,7 +1658,7 @@ arena_purge_stashed(arena_t *arena, chunk_hooks_t *chunk_hooks,
 				flag_unzeroed = 0;
 				flags = CHUNK_MAP_DECOMMITTED;
 			} else {
-				flag_unzeroed = chunk_purge_wrapper(arena,
+				flag_unzeroed = chunk_purge_wrapper(tsd, arena,
 				    chunk_hooks, chunk, chunksize, pageind <<
 				    LG_PAGE, run_size) ? CHUNK_MAP_UNZEROED : 0;
 				flags = flag_unzeroed;
@@ -1685,7 +1689,7 @@ arena_purge_stashed(arena_t *arena, chunk_hooks_t *chunk_hooks,
 		if (config_stats)
 			nmadvise++;
 	}
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
 
 	if (config_stats) {
 		arena->stats.nmadvise += nmadvise;
@@ -1696,7 +1700,7 @@ arena_purge_stashed(arena_t *arena, chunk_hooks_t *chunk_hooks,
 }
 
 static void
-arena_unstash_purged(arena_t *arena, chunk_hooks_t *chunk_hooks,
+arena_unstash_purged(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
     arena_runs_dirty_link_t *purge_runs_sentinel,
     extent_node_t *purge_chunks_sentinel)
 {
@@ -1716,10 +1720,10 @@ arena_unstash_purged(arena_t *arena, chunk_hooks_t *chunk_hooks,
 			bool zeroed = extent_node_zeroed_get(chunkselm);
 			bool committed = extent_node_committed_get(chunkselm);
 			extent_node_dirty_remove(chunkselm);
-			arena_node_dalloc(arena, chunkselm);
+			arena_node_dalloc(tsd, arena, chunkselm);
 			chunkselm = chunkselm_next;
-			chunk_dalloc_wrapper(arena, chunk_hooks, addr, size,
-			    zeroed, committed);
+			chunk_dalloc_wrapper(tsd, arena, chunk_hooks, addr,
+			    size, zeroed, committed);
 		} else {
 			arena_chunk_t *chunk =
 			    (arena_chunk_t *)CHUNK_ADDR2BASE(rdelm);
@@ -1730,7 +1734,8 @@ arena_unstash_purged(arena_t *arena, chunk_hooks_t *chunk_hooks,
 			    pageind) != 0);
 			arena_run_t *run = &miscelm->run;
 			qr_remove(rdelm, rd_link);
-			arena_run_dalloc(arena, run, false, true, decommitted);
+			arena_run_dalloc(tsd, arena, run, false, true,
+			    decommitted);
 		}
 	}
 }
@@ -1745,9 +1750,9 @@ arena_unstash_purged(arena_t *arena, chunk_hooks_t *chunk_hooks,
  *                       (arena->ndirty >= ndirty_limit)
  */
 static void
-arena_purge_to_limit(arena_t *arena, size_t ndirty_limit)
+arena_purge_to_limit(tsd_t *tsd, arena_t *arena, size_t ndirty_limit)
 {
-	chunk_hooks_t chunk_hooks = chunk_hooks_get(arena);
+	chunk_hooks_t chunk_hooks = chunk_hooks_get(tsd, arena);
 	size_t npurge, npurged;
 	arena_runs_dirty_link_t purge_runs_sentinel;
 	extent_node_t purge_chunks_sentinel;
@@ -1768,14 +1773,14 @@ arena_purge_to_limit(arena_t *arena, size_t ndirty_limit)
 	qr_new(&purge_runs_sentinel, rd_link);
 	extent_node_dirty_linkage_init(&purge_chunks_sentinel);
 
-	npurge = arena_stash_dirty(arena, &chunk_hooks, ndirty_limit,
+	npurge = arena_stash_dirty(tsd, arena, &chunk_hooks, ndirty_limit,
 	    &purge_runs_sentinel, &purge_chunks_sentinel);
 	if (npurge == 0)
 		goto label_return;
-	npurged = arena_purge_stashed(arena, &chunk_hooks, &purge_runs_sentinel,
-	    &purge_chunks_sentinel);
+	npurged = arena_purge_stashed(tsd, arena, &chunk_hooks,
+	    &purge_runs_sentinel, &purge_chunks_sentinel);
 	assert(npurged == npurge);
-	arena_unstash_purged(arena, &chunk_hooks, &purge_runs_sentinel,
+	arena_unstash_purged(tsd, arena, &chunk_hooks, &purge_runs_sentinel,
 	    &purge_chunks_sentinel);
 
 	if (config_stats)
@@ -1786,15 +1791,15 @@ label_return:
 }
 
 void
-arena_purge(arena_t *arena, bool all)
+arena_purge(tsd_t *tsd, arena_t *arena, bool all)
 {
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
 	if (all)
-		arena_purge_to_limit(arena, 0);
+		arena_purge_to_limit(tsd, arena, 0);
 	else
-		arena_maybe_purge(arena);
-	malloc_mutex_unlock(&arena->lock);
+		arena_maybe_purge(tsd, arena);
+	malloc_mutex_unlock(tsd, &arena->lock);
 }
 
 static void
@@ -1911,8 +1916,8 @@ arena_run_size_get(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 }
 
 static void
-arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned,
-    bool decommitted)
+arena_run_dalloc(tsd_t *tsd, arena_t *arena, arena_run_t *run, bool dirty,
+    bool cleaned, bool decommitted)
 {
 	arena_chunk_t *chunk;
 	arena_chunk_map_misc_t *miscelm;
@@ -1972,7 +1977,7 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned,
 	if (size == arena_maxrun) {
 		assert(run_ind == map_bias);
 		assert(run_pages == (arena_maxrun >> LG_PAGE));
-		arena_chunk_dalloc(arena, chunk);
+		arena_chunk_dalloc(tsd, arena, chunk);
 	}
 
 	/*
@@ -1983,12 +1988,12 @@ arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty, bool cleaned,
 	 * chances of spuriously crossing the dirty page purging threshold.
 	 */
 	if (dirty)
-		arena_maybe_purge(arena);
+		arena_maybe_purge(tsd, arena);
 }
 
 static void
-arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
-    size_t oldsize, size_t newsize)
+arena_run_trim_head(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+    arena_run_t *run, size_t oldsize, size_t newsize)
 {
 	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
 	size_t pageind = arena_miscelm_to_pageind(miscelm);
@@ -2023,12 +2028,13 @@ arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 	    flag_dirty | (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk,
 	    pageind+head_npages)));
 
-	arena_run_dalloc(arena, run, false, false, (flag_decommitted != 0));
+	arena_run_dalloc(tsd, arena, run, false, false, (flag_decommitted !=
+	    0));
 }
 
 static void
-arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
-    size_t oldsize, size_t newsize, bool dirty)
+arena_run_trim_tail(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+    arena_run_t *run, size_t oldsize, size_t newsize, bool dirty)
 {
 	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
 	size_t pageind = arena_miscelm_to_pageind(miscelm);
@@ -2067,8 +2073,8 @@ arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 
 	tail_miscelm = arena_miscelm_get_mutable(chunk, pageind + head_npages);
 	tail_run = &tail_miscelm->run;
-	arena_run_dalloc(arena, tail_run, dirty, false, (flag_decommitted !=
-	    0));
+	arena_run_dalloc(tsd, arena, tail_run, dirty, false, (flag_decommitted
+	    != 0));
 }
 
 static void
@@ -2094,7 +2100,7 @@ arena_bin_nonfull_run_tryget(arena_bin_t *bin)
 }
 
 static arena_run_t *
-arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
+arena_bin_nonfull_run_get(tsd_t *tsd, arena_t *arena, arena_bin_t *bin)
 {
 	arena_run_t *run;
 	szind_t binind;
@@ -2110,19 +2116,19 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
 	bin_info = &arena_bin_info[binind];
 
 	/* Allocate a new run. */
-	malloc_mutex_unlock(&bin->lock);
+	malloc_mutex_unlock(tsd, &bin->lock);
 	/******************************/
-	malloc_mutex_lock(&arena->lock);
-	run = arena_run_alloc_small(arena, bin_info->run_size, binind);
+	malloc_mutex_lock(tsd, &arena->lock);
+	run = arena_run_alloc_small(tsd, arena, bin_info->run_size, binind);
 	if (run != NULL) {
 		/* Initialize run internals. */
 		run->binind = binind;
 		run->nfree = bin_info->nregs;
 		bitmap_init(run->bitmap, &bin_info->bitmap_info);
 	}
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsd, &arena->lock);
 	/********************************/
-	malloc_mutex_lock(&bin->lock);
+	malloc_mutex_lock(tsd, &bin->lock);
 	if (run != NULL) {
 		if (config_stats) {
 			bin->stats.nruns++;
@@ -2145,7 +2151,7 @@ arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
 
 /* Re-fill bin->runcur, then call arena_run_reg_alloc(). */
 static void *
-arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
+arena_bin_malloc_hard(tsd_t *tsd, arena_t *arena, arena_bin_t *bin)
 {
 	szind_t binind;
 	arena_bin_info_t *bin_info;
@@ -2154,7 +2160,7 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
 	binind = arena_bin_index(arena, bin);
 	bin_info = &arena_bin_info[binind];
 	bin->runcur = NULL;
-	run = arena_bin_nonfull_run_get(arena, bin);
+	run = arena_bin_nonfull_run_get(tsd, arena, bin);
 	if (bin->runcur != NULL && bin->runcur->nfree > 0) {
 		/*
 		 * Another thread updated runcur while this one ran without the
@@ -2175,9 +2181,10 @@ arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
 			 * were just deallocated from the run.
 			 */
 			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
-			if (run->nfree == bin_info->nregs)
-				arena_dalloc_bin_run(arena, chunk, run, bin);
-			else
+			if (run->nfree == bin_info->nregs) {
+				arena_dalloc_bin_run(tsd, arena, chunk, run,
+				    bin);
+			} else
 				arena_bin_lower_run(arena, chunk, run, bin);
 		}
 		return (ret);
@@ -2202,10 +2209,10 @@ arena_tcache_fill_small(tsd_t *tsd, arena_t *arena, tcache_bin_t *tbin,
 
 	assert(tbin->ncached == 0);
 
-	if (config_prof && arena_prof_accum(arena, prof_accumbytes))
-		prof_idump();
+	if (config_prof && arena_prof_accum(tsd, arena, prof_accumbytes))
+		prof_idump(tsd);
 	bin = &arena->bins[binind];
-	malloc_mutex_lock(&bin->lock);
+	malloc_mutex_lock(tsd, &bin->lock);
 	for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >>
 	    tbin->lg_fill_div); i < nfill; i++) {
 		arena_run_t *run;
@@ -2213,7 +2220,7 @@ arena_tcache_fill_small(tsd_t *tsd, arena_t *arena, tcache_bin_t *tbin,
 		if ((run = bin->runcur) != NULL && run->nfree > 0)
 			ptr = arena_run_reg_alloc(run, &arena_bin_info[binind]);
 		else
-			ptr = arena_bin_malloc_hard(arena, bin);
+			ptr = arena_bin_malloc_hard(tsd, arena, bin);
 		if (ptr == NULL) {
 			/*
 			 * OOM.  tbin->avail isn't yet filled down to its first
@@ -2240,7 +2247,7 @@ arena_tcache_fill_small(tsd_t *tsd, arena_t *arena, tcache_bin_t *tbin,
 		bin->stats.nfills++;
 		tbin->tstats.nrequests = 0;
 	}
-	malloc_mutex_unlock(&bin->lock);
+	malloc_mutex_unlock(tsd, &bin->lock);
 	tbin->ncached = i;
 	arena_decay_tick(tsd, arena);
 }
@@ -2365,14 +2372,14 @@ arena_malloc_small(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
 	bin = &arena->bins[binind];
 	usize = index2size(binind);
 
-	malloc_mutex_lock(&bin->lock);
+	malloc_mutex_lock(tsd, &bin->lock);
 	if ((run = bin->runcur) != NULL && run->nfree > 0)
 		ret = arena_run_reg_alloc(run, &arena_bin_info[binind]);
 	else
-		ret = arena_bin_malloc_hard(arena, bin);
+		ret = arena_bin_malloc_hard(tsd, arena, bin);
 
 	if (ret == NULL) {
-		malloc_mutex_unlock(&bin->lock);
+		malloc_mutex_unlock(tsd, &bin->lock);
 		return (NULL);
 	}
 
@@ -2381,9 +2388,9 @@ arena_malloc_small(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
 		bin->stats.nrequests++;
 		bin->stats.curregs++;
 	}
-	malloc_mutex_unlock(&bin->lock);
-	if (config_prof && !isthreaded && arena_prof_accum(arena, usize))
-		prof_idump();
+	malloc_mutex_unlock(tsd, &bin->lock);
+	if (config_prof && !isthreaded && arena_prof_accum(tsd, arena, usize))
+		prof_idump(tsd);
 
 	if (!zero) {
 		if (config_fill) {
@@ -2419,7 +2426,7 @@ arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
 
 	/* Large allocation. */
 	usize = index2size(binind);
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
 	if (config_cache_oblivious) {
 		uint64_t r;
 
@@ -2432,9 +2439,9 @@ arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
 		random_offset = ((uintptr_t)r) << LG_CACHELINE;
 	} else
 		random_offset = 0;
-	run = arena_run_alloc_large(arena, usize + large_pad, zero);
+	run = arena_run_alloc_large(tsd, arena, usize + large_pad, zero);
 	if (run == NULL) {
-		malloc_mutex_unlock(&arena->lock);
+		malloc_mutex_unlock(tsd, &arena->lock);
 		return (NULL);
 	}
 	miscelm = arena_run_to_miscelm(run);
@@ -2452,9 +2459,9 @@ arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
 	}
 	if (config_prof)
 		idump = arena_prof_accum_locked(arena, usize);
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsd, &arena->lock);
 	if (config_prof && idump)
-		prof_idump();
+		prof_idump(tsd);
 
 	if (!zero) {
 		if (config_fill) {
@@ -2506,10 +2513,10 @@ arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 	alignment = PAGE_CEILING(alignment);
 	alloc_size = usize + large_pad + alignment;
 
-	malloc_mutex_lock(&arena->lock);
-	run = arena_run_alloc_large(arena, alloc_size, false);
+	malloc_mutex_lock(tsd, &arena->lock);
+	run = arena_run_alloc_large(tsd, arena, alloc_size, false);
 	if (run == NULL) {
-		malloc_mutex_unlock(&arena->lock);
+		malloc_mutex_unlock(tsd, &arena->lock);
 		return (NULL);
 	}
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
@@ -2529,11 +2536,11 @@ arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 		    LG_PAGE));
 		run = &miscelm->run;
 
-		arena_run_trim_head(arena, chunk, head_run, alloc_size,
+		arena_run_trim_head(tsd, arena, chunk, head_run, alloc_size,
 		    alloc_size - leadsize);
 	}
 	if (trailsize != 0) {
-		arena_run_trim_tail(arena, chunk, run, usize + large_pad +
+		arena_run_trim_tail(tsd, arena, chunk, run, usize + large_pad +
 		    trailsize, usize + large_pad, false);
 	}
 	if (arena_run_init_large(arena, run, usize + large_pad, zero)) {
@@ -2544,8 +2551,8 @@ arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 		    run_ind) != 0);
 
 		assert(decommitted); /* Cause of OOM. */
-		arena_run_dalloc(arena, run, dirty, false, decommitted);
-		malloc_mutex_unlock(&arena->lock);
+		arena_run_dalloc(tsd, arena, run, dirty, false, decommitted);
+		malloc_mutex_unlock(tsd, &arena->lock);
 		return (NULL);
 	}
 	ret = arena_miscelm_to_rpages(miscelm);
@@ -2560,7 +2567,7 @@ arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 		arena->stats.lstats[index].nrequests++;
 		arena->stats.lstats[index].curruns++;
 	}
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsd, &arena->lock);
 
 	if (config_fill && !zero) {
 		if (unlikely(opt_junk_alloc))
@@ -2609,7 +2616,7 @@ arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 }
 
 void
-arena_prof_promoted(const void *ptr, size_t size)
+arena_prof_promoted(tsd_t *tsd, const void *ptr, size_t size)
 {
 	arena_chunk_t *chunk;
 	size_t pageind;
@@ -2618,8 +2625,8 @@ arena_prof_promoted(const void *ptr, size_t size)
 	cassert(config_prof);
 	assert(ptr != NULL);
 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
-	assert(isalloc(ptr, false) == LARGE_MINCLASS);
-	assert(isalloc(ptr, true) == LARGE_MINCLASS);
+	assert(isalloc(tsd, ptr, false) == LARGE_MINCLASS);
+	assert(isalloc(tsd, ptr, true) == LARGE_MINCLASS);
 	assert(size <= SMALL_MAXCLASS);
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
@@ -2628,8 +2635,8 @@ arena_prof_promoted(const void *ptr, size_t size)
 	assert(binind < NBINS);
 	arena_mapbits_large_binind_set(chunk, pageind, binind);
 
-	assert(isalloc(ptr, false) == LARGE_MINCLASS);
-	assert(isalloc(ptr, true) == size);
+	assert(isalloc(tsd, ptr, false) == LARGE_MINCLASS);
+	assert(isalloc(tsd, ptr, true) == size);
 }
 
 static void
@@ -2660,19 +2667,19 @@ arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run,
 }
 
 static void
-arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
-    arena_bin_t *bin)
+arena_dalloc_bin_run(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+    arena_run_t *run, arena_bin_t *bin)
 {
 
 	assert(run != bin->runcur);
 
-	malloc_mutex_unlock(&bin->lock);
+	malloc_mutex_unlock(tsd, &bin->lock);
 	/******************************/
-	malloc_mutex_lock(&arena->lock);
-	arena_run_dalloc(arena, run, true, false, false);
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
+	arena_run_dalloc(tsd, arena, run, true, false, false);
+	malloc_mutex_unlock(tsd, &arena->lock);
 	/****************************/
-	malloc_mutex_lock(&bin->lock);
+	malloc_mutex_lock(tsd, &bin->lock);
 	if (config_stats)
 		bin->stats.curruns--;
 }
@@ -2699,8 +2706,8 @@ arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 }
 
 static void
-arena_dalloc_bin_locked_impl(arena_t *arena, arena_chunk_t *chunk, void *ptr,
-    arena_chunk_map_bits_t *bitselm, bool junked)
+arena_dalloc_bin_locked_impl(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+    void *ptr, arena_chunk_map_bits_t *bitselm, bool junked)
 {
 	size_t pageind, rpages_ind;
 	arena_run_t *run;
@@ -2721,7 +2728,7 @@ arena_dalloc_bin_locked_impl(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 	arena_run_reg_dalloc(run, ptr);
 	if (run->nfree == bin_info->nregs) {
 		arena_dissociate_bin_run(chunk, run, bin);
-		arena_dalloc_bin_run(arena, chunk, run, bin);
+		arena_dalloc_bin_run(tsd, arena, chunk, run, bin);
 	} else if (run->nfree == 1 && run != bin->runcur)
 		arena_bin_lower_run(arena, chunk, run, bin);
 
@@ -2732,15 +2739,15 @@ arena_dalloc_bin_locked_impl(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 }
 
 void
-arena_dalloc_bin_junked_locked(arena_t *arena, arena_chunk_t *chunk, void *ptr,
-    arena_chunk_map_bits_t *bitselm)
+arena_dalloc_bin_junked_locked(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+    void *ptr, arena_chunk_map_bits_t *bitselm)
 {
 
-	arena_dalloc_bin_locked_impl(arena, chunk, ptr, bitselm, true);
+	arena_dalloc_bin_locked_impl(tsd, arena, chunk, ptr, bitselm, true);
 }
 
 void
-arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
+arena_dalloc_bin(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk, void *ptr,
     size_t pageind, arena_chunk_map_bits_t *bitselm)
 {
 	arena_run_t *run;
@@ -2750,9 +2757,9 @@ arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 	rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, pageind);
 	run = &arena_miscelm_get_mutable(chunk, rpages_ind)->run;
 	bin = &arena->bins[run->binind];
-	malloc_mutex_lock(&bin->lock);
-	arena_dalloc_bin_locked_impl(arena, chunk, ptr, bitselm, false);
-	malloc_mutex_unlock(&bin->lock);
+	malloc_mutex_lock(tsd, &bin->lock);
+	arena_dalloc_bin_locked_impl(tsd, arena, chunk, ptr, bitselm, false);
+	malloc_mutex_unlock(tsd, &bin->lock);
 }
 
 void
@@ -2767,7 +2774,7 @@ arena_dalloc_small(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk, void *ptr,
 		    pageind)) != BININD_INVALID);
 	}
 	bitselm = arena_bitselm_get_mutable(chunk, pageind);
-	arena_dalloc_bin(arena, chunk, ptr, pageind, bitselm);
+	arena_dalloc_bin(tsd, arena, chunk, ptr, pageind, bitselm);
 	arena_decay_tick(tsd, arena);
 }
 
@@ -2790,7 +2797,7 @@ arena_dalloc_junk_large_t *arena_dalloc_junk_large =
 #endif
 
 static void
-arena_dalloc_large_locked_impl(arena_t *arena, arena_chunk_t *chunk,
+arena_dalloc_large_locked_impl(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
     void *ptr, bool junked)
 {
 	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
@@ -2814,30 +2821,30 @@ arena_dalloc_large_locked_impl(arena_t *arena, arena_chunk_t *chunk,
 		}
 	}
 
-	arena_run_dalloc(arena, run, true, false, false);
+	arena_run_dalloc(tsd, arena, run, true, false, false);
 }
 
 void
-arena_dalloc_large_junked_locked(arena_t *arena, arena_chunk_t *chunk,
-    void *ptr)
+arena_dalloc_large_junked_locked(tsd_t *tsd, arena_t *arena,
+    arena_chunk_t *chunk, void *ptr)
 {
 
-	arena_dalloc_large_locked_impl(arena, chunk, ptr, true);
+	arena_dalloc_large_locked_impl(tsd, arena, chunk, ptr, true);
 }
 
 void
 arena_dalloc_large(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk, void *ptr)
 {
 
-	malloc_mutex_lock(&arena->lock);
-	arena_dalloc_large_locked_impl(arena, chunk, ptr, false);
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
+	arena_dalloc_large_locked_impl(tsd, arena, chunk, ptr, false);
+	malloc_mutex_unlock(tsd, &arena->lock);
 	arena_decay_tick(tsd, arena);
 }
 
 static void
-arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr,
-    size_t oldsize, size_t size)
+arena_ralloc_large_shrink(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+    void *ptr, size_t oldsize, size_t size)
 {
 	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 	arena_chunk_map_misc_t *miscelm = arena_miscelm_get_mutable(chunk,
@@ -2850,8 +2857,8 @@ arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 	 * Shrink the run, and make trailing pages available for other
 	 * allocations.
 	 */
-	malloc_mutex_lock(&arena->lock);
-	arena_run_trim_tail(arena, chunk, run, oldsize + large_pad, size +
+	malloc_mutex_lock(tsd, &arena->lock);
+	arena_run_trim_tail(tsd, arena, chunk, run, oldsize + large_pad, size +
 	    large_pad, true);
 	if (config_stats) {
 		szind_t oldindex = size2index(oldsize) - NBINS;
@@ -2869,12 +2876,12 @@ arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 		arena->stats.lstats[index].nrequests++;
 		arena->stats.lstats[index].curruns++;
 	}
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsd, &arena->lock);
 }
 
 static bool
-arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
-    size_t oldsize, size_t usize_min, size_t usize_max, bool zero)
+arena_ralloc_large_grow(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+    void *ptr, size_t oldsize, size_t usize_min, size_t usize_max, bool zero)
 {
 	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 	size_t npages = (oldsize + large_pad) >> LG_PAGE;
@@ -2884,7 +2891,7 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 	    large_pad);
 
 	/* Try to extend the run. */
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
 	if (pageind+npages >= chunk_npages || arena_mapbits_allocated_get(chunk,
 	    pageind+npages) != 0)
 		goto label_fail;
@@ -2964,11 +2971,11 @@ arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
 			arena->stats.lstats[index].nrequests++;
 			arena->stats.lstats[index].curruns++;
 		}
-		malloc_mutex_unlock(&arena->lock);
+		malloc_mutex_unlock(tsd, &arena->lock);
 		return (false);
 	}
 label_fail:
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsd, &arena->lock);
 	return (true);
 }
 
@@ -2997,7 +3004,7 @@ arena_ralloc_junk_large_t *arena_ralloc_junk_large =
  * always fail if growing an object, and the following run is already in use.
  */
 static bool
-arena_ralloc_large(void *ptr, size_t oldsize, size_t usize_min,
+arena_ralloc_large(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize_min,
     size_t usize_max, bool zero)
 {
 	arena_chunk_t *chunk;
@@ -3012,16 +3019,16 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t usize_min,
 	arena = extent_node_arena_get(&chunk->node);
 
 	if (oldsize < usize_max) {
-		bool ret = arena_ralloc_large_grow(arena, chunk, ptr, oldsize,
-		    usize_min, usize_max, zero);
+		bool ret = arena_ralloc_large_grow(tsd, arena, chunk, ptr,
+		    oldsize, usize_min, usize_max, zero);
 		if (config_fill && !ret && !zero) {
 			if (unlikely(opt_junk_alloc)) {
 				memset((void *)((uintptr_t)ptr + oldsize),
 				    JEMALLOC_ALLOC_JUNK,
-				    isalloc(ptr, config_prof) - oldsize);
+				    isalloc(tsd, ptr, config_prof) - oldsize);
 			} else if (unlikely(opt_zero)) {
 				memset((void *)((uintptr_t)ptr + oldsize), 0,
-				    isalloc(ptr, config_prof) - oldsize);
+				    isalloc(tsd, ptr, config_prof) - oldsize);
 			}
 		}
 		return (ret);
@@ -3030,7 +3037,7 @@ arena_ralloc_large(void *ptr, size_t oldsize, size_t usize_min,
 	assert(oldsize > usize_max);
 	/* Fill before shrinking in order avoid a race. */
 	arena_ralloc_junk_large(ptr, oldsize, usize_max);
-	arena_ralloc_large_shrink(arena, chunk, ptr, oldsize, usize_max);
+	arena_ralloc_large_shrink(tsd, arena, chunk, ptr, oldsize, usize_max);
 	return (false);
 }
 
@@ -3065,7 +3072,7 @@ arena_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
 		} else {
 			if (usize_max <= SMALL_MAXCLASS)
 				return (true);
-			if (arena_ralloc_large(ptr, oldsize, usize_min,
+			if (arena_ralloc_large(tsd, ptr, oldsize, usize_min,
 			    usize_max, zero))
 				return (true);
 		}
@@ -3138,25 +3145,25 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size,
 }
 
 dss_prec_t
-arena_dss_prec_get(arena_t *arena)
+arena_dss_prec_get(tsd_t *tsd, arena_t *arena)
 {
 	dss_prec_t ret;
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
 	ret = arena->dss_prec;
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsd, &arena->lock);
 	return (ret);
 }
 
 bool
-arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec)
+arena_dss_prec_set(tsd_t *tsd, arena_t *arena, dss_prec_t dss_prec)
 {
 
 	if (!have_dss)
 		return (dss_prec != dss_prec_disabled);
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
 	arena->dss_prec = dss_prec;
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsd, &arena->lock);
 	return (false);
 }
 
@@ -3213,28 +3220,29 @@ arena_basic_stats_merge_locked(arena_t *arena, unsigned *nthreads,
 }
 
 void
-arena_basic_stats_merge(arena_t *arena, unsigned *nthreads, const char **dss,
-    ssize_t *lg_dirty_mult, ssize_t *decay_time, size_t *nactive,
-    size_t *ndirty)
+arena_basic_stats_merge(tsd_t *tsd, arena_t *arena, unsigned *nthreads,
+    const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time,
+    size_t *nactive, size_t *ndirty)
 {
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
 	arena_basic_stats_merge_locked(arena, nthreads, dss, lg_dirty_mult,
 	    decay_time, nactive, ndirty);
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsd, &arena->lock);
 }
 
 void
-arena_stats_merge(arena_t *arena, unsigned *nthreads, const char **dss,
-    ssize_t *lg_dirty_mult, ssize_t *decay_time, size_t *nactive,
-    size_t *ndirty, arena_stats_t *astats, malloc_bin_stats_t *bstats,
-    malloc_large_stats_t *lstats, malloc_huge_stats_t *hstats)
+arena_stats_merge(tsd_t *tsd, arena_t *arena, unsigned *nthreads,
+    const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time,
+    size_t *nactive, size_t *ndirty, arena_stats_t *astats,
+    malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats,
+    malloc_huge_stats_t *hstats)
 {
 	unsigned i;
 
 	cassert(config_stats);
 
-	malloc_mutex_lock(&arena->lock);
+	malloc_mutex_lock(tsd, &arena->lock);
 	arena_basic_stats_merge_locked(arena, nthreads, dss, lg_dirty_mult,
 	    decay_time, nactive, ndirty);
 
@@ -3264,12 +3272,12 @@ arena_stats_merge(arena_t *arena, unsigned *nthreads, const char **dss,
 		hstats[i].ndalloc += arena->stats.hstats[i].ndalloc;
 		hstats[i].curhchunks += arena->stats.hstats[i].curhchunks;
 	}
-	malloc_mutex_unlock(&arena->lock);
+	malloc_mutex_unlock(tsd, &arena->lock);
 
 	for (i = 0; i < NBINS; i++) {
 		arena_bin_t *bin = &arena->bins[i];
 
-		malloc_mutex_lock(&bin->lock);
+		malloc_mutex_lock(tsd, &bin->lock);
 		bstats[i].nmalloc += bin->stats.nmalloc;
 		bstats[i].ndalloc += bin->stats.ndalloc;
 		bstats[i].nrequests += bin->stats.nrequests;
@@ -3281,7 +3289,7 @@ arena_stats_merge(arena_t *arena, unsigned *nthreads, const char **dss,
 		bstats[i].nruns += bin->stats.nruns;
 		bstats[i].reruns += bin->stats.reruns;
 		bstats[i].curruns += bin->stats.curruns;
-		malloc_mutex_unlock(&bin->lock);
+		malloc_mutex_unlock(tsd, &bin->lock);
 	}
 }
 
@@ -3307,7 +3315,7 @@ arena_nthreads_dec(arena_t *arena)
 }
 
 arena_t *
-arena_new(unsigned ind)
+arena_new(tsd_t *tsd, unsigned ind)
 {
 	arena_t *arena;
 	size_t arena_size;
@@ -3322,17 +3330,17 @@ arena_new(unsigned ind)
 	 * because there is no way to clean up if base_alloc() OOMs.
 	 */
 	if (config_stats) {
-		arena = (arena_t *)base_alloc(CACHELINE_CEILING(arena_size) +
-		    QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t) +
+		arena = (arena_t *)base_alloc(tsd, CACHELINE_CEILING(arena_size)
+		    + QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t) +
 		    nhclasses) * sizeof(malloc_huge_stats_t));
 	} else
-		arena = (arena_t *)base_alloc(arena_size);
+		arena = (arena_t *)base_alloc(tsd, arena_size);
 	if (arena == NULL)
 		return (NULL);
 
 	arena->ind = ind;
 	arena->nthreads = 0;
-	if (malloc_mutex_init(&arena->lock))
+	if (malloc_mutex_init(&arena->lock, "arena", WITNESS_RANK_ARENA))
 		return (NULL);
 
 	if (config_stats) {
@@ -3365,7 +3373,7 @@ arena_new(unsigned ind)
 		    (uint64_t)(uintptr_t)arena;
 	}
 
-	arena->dss_prec = chunk_dss_prec_get();
+	arena->dss_prec = chunk_dss_prec_get(tsd);
 
 	arena->spare = NULL;
 
@@ -3383,17 +3391,20 @@ arena_new(unsigned ind)
 		arena_decay_init(arena, arena_decay_time_default_get());
 
 	ql_new(&arena->huge);
-	if (malloc_mutex_init(&arena->huge_mtx))
+	if (malloc_mutex_init(&arena->huge_mtx, "arena_huge",
+	    WITNESS_RANK_ARENA_HUGE))
 		return (NULL);
 
 	extent_tree_szad_new(&arena->chunks_szad_cached);
 	extent_tree_ad_new(&arena->chunks_ad_cached);
 	extent_tree_szad_new(&arena->chunks_szad_retained);
 	extent_tree_ad_new(&arena->chunks_ad_retained);
-	if (malloc_mutex_init(&arena->chunks_mtx))
+	if (malloc_mutex_init(&arena->chunks_mtx, "arena_chunks",
+	    WITNESS_RANK_ARENA_CHUNKS))
 		return (NULL);
 	ql_new(&arena->node_cache);
-	if (malloc_mutex_init(&arena->node_cache_mtx))
+	if (malloc_mutex_init(&arena->node_cache_mtx, "arena_node_cache",
+	    WITNESS_RANK_ARENA_NODE_CACHE))
 		return (NULL);
 
 	arena->chunk_hooks = chunk_hooks_default;
@@ -3401,7 +3412,8 @@ arena_new(unsigned ind)
 	/* Initialize bins. */
 	for (i = 0; i < NBINS; i++) {
 		bin = &arena->bins[i];
-		if (malloc_mutex_init(&bin->lock))
+		if (malloc_mutex_init(&bin->lock, "arena_bin",
+		    WITNESS_RANK_ARENA_BIN))
 			return (NULL);
 		bin->runcur = NULL;
 		arena_run_heap_new(&bin->runs);
@@ -3533,7 +3545,7 @@ small_run_size_init(void)
 
 	assert(small_maxrun != 0);
 
-	small_run_tab = (bool *)base_alloc(sizeof(bool) * (small_maxrun >>
+	small_run_tab = (bool *)base_alloc(NULL, sizeof(bool) * (small_maxrun >>
 	    LG_PAGE));
 	if (small_run_tab == NULL)
 		return (true);
@@ -3560,12 +3572,12 @@ run_quantize_init(void)
 
 	run_quantize_max = chunksize + large_pad;
 
-	run_quantize_floor_tab = (size_t *)base_alloc(sizeof(size_t) *
+	run_quantize_floor_tab = (size_t *)base_alloc(NULL, sizeof(size_t) *
 	    (run_quantize_max >> LG_PAGE));
 	if (run_quantize_floor_tab == NULL)
 		return (true);
 
-	run_quantize_ceil_tab = (size_t *)base_alloc(sizeof(size_t) *
+	run_quantize_ceil_tab = (size_t *)base_alloc(NULL, sizeof(size_t) *
 	    (run_quantize_max >> LG_PAGE));
 	if (run_quantize_ceil_tab == NULL)
 		return (true);
@@ -3642,40 +3654,40 @@ arena_boot(void)
 }
 
 void
-arena_prefork(arena_t *arena)
+arena_prefork(tsd_t *tsd, arena_t *arena)
 {
 	unsigned i;
 
-	malloc_mutex_prefork(&arena->lock);
-	malloc_mutex_prefork(&arena->huge_mtx);
-	malloc_mutex_prefork(&arena->chunks_mtx);
-	malloc_mutex_prefork(&arena->node_cache_mtx);
+	malloc_mutex_prefork(tsd, &arena->lock);
+	malloc_mutex_prefork(tsd, &arena->huge_mtx);
+	malloc_mutex_prefork(tsd, &arena->chunks_mtx);
+	malloc_mutex_prefork(tsd, &arena->node_cache_mtx);
 	for (i = 0; i < NBINS; i++)
-		malloc_mutex_prefork(&arena->bins[i].lock);
+		malloc_mutex_prefork(tsd, &arena->bins[i].lock);
 }
 
 void
-arena_postfork_parent(arena_t *arena)
+arena_postfork_parent(tsd_t *tsd, arena_t *arena)
 {
 	unsigned i;
 
 	for (i = 0; i < NBINS; i++)
-		malloc_mutex_postfork_parent(&arena->bins[i].lock);
-	malloc_mutex_postfork_parent(&arena->node_cache_mtx);
-	malloc_mutex_postfork_parent(&arena->chunks_mtx);
-	malloc_mutex_postfork_parent(&arena->huge_mtx);
-	malloc_mutex_postfork_parent(&arena->lock);
+		malloc_mutex_postfork_parent(tsd, &arena->bins[i].lock);
+	malloc_mutex_postfork_parent(tsd, &arena->node_cache_mtx);
+	malloc_mutex_postfork_parent(tsd, &arena->chunks_mtx);
+	malloc_mutex_postfork_parent(tsd, &arena->huge_mtx);
+	malloc_mutex_postfork_parent(tsd, &arena->lock);
 }
 
 void
-arena_postfork_child(arena_t *arena)
+arena_postfork_child(tsd_t *tsd, arena_t *arena)
 {
 	unsigned i;
 
 	for (i = 0; i < NBINS; i++)
-		malloc_mutex_postfork_child(&arena->bins[i].lock);
-	malloc_mutex_postfork_child(&arena->node_cache_mtx);
-	malloc_mutex_postfork_child(&arena->chunks_mtx);
-	malloc_mutex_postfork_child(&arena->huge_mtx);
-	malloc_mutex_postfork_child(&arena->lock);
+		malloc_mutex_postfork_child(tsd, &arena->bins[i].lock);
+	malloc_mutex_postfork_child(tsd, &arena->node_cache_mtx);
+	malloc_mutex_postfork_child(tsd, &arena->chunks_mtx);
+	malloc_mutex_postfork_child(tsd, &arena->huge_mtx);
+	malloc_mutex_postfork_child(tsd, &arena->lock);
 }
diff --git a/src/base.c b/src/base.c
index 7cdcfed8..87b376b8 100644
--- a/src/base.c
+++ b/src/base.c
@@ -76,7 +76,7 @@ base_chunk_alloc(size_t minsize)
  * physical memory usage.
  */
 void *
-base_alloc(size_t size)
+base_alloc(tsd_t *tsd, size_t size)
 {
 	void *ret;
 	size_t csize, usize;
@@ -91,7 +91,7 @@ base_alloc(size_t size)
 
 	usize = s2u(csize);
 	extent_node_init(&key, NULL, NULL, usize, false, false);
-	malloc_mutex_lock(&base_mtx);
+	malloc_mutex_lock(tsd, &base_mtx);
 	node = extent_tree_szad_nsearch(&base_avail_szad, &key);
 	if (node != NULL) {
 		/* Use existing space. */
@@ -123,28 +123,28 @@ base_alloc(size_t size)
 	}
 	JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ret, csize);
 label_return:
-	malloc_mutex_unlock(&base_mtx);
+	malloc_mutex_unlock(tsd, &base_mtx);
 	return (ret);
 }
 
 void
-base_stats_get(size_t *allocated, size_t *resident, size_t *mapped)
+base_stats_get(tsd_t *tsd, size_t *allocated, size_t *resident, size_t *mapped)
 {
 
-	malloc_mutex_lock(&base_mtx);
+	malloc_mutex_lock(tsd, &base_mtx);
 	assert(base_allocated <= base_resident);
 	assert(base_resident <= base_mapped);
 	*allocated = base_allocated;
 	*resident = base_resident;
 	*mapped = base_mapped;
-	malloc_mutex_unlock(&base_mtx);
+	malloc_mutex_unlock(tsd, &base_mtx);
 }
 
 bool
 base_boot(void)
 {
 
-	if (malloc_mutex_init(&base_mtx))
+	if (malloc_mutex_init(&base_mtx, "base", WITNESS_RANK_BASE))
 		return (true);
 	extent_tree_szad_new(&base_avail_szad);
 	base_nodes = NULL;
@@ -153,22 +153,22 @@ base_boot(void)
 }
 
 void
-base_prefork(void)
+base_prefork(tsd_t *tsd)
 {
 
-	malloc_mutex_prefork(&base_mtx);
+	malloc_mutex_prefork(tsd, &base_mtx);
 }
 
 void
-base_postfork_parent(void)
+base_postfork_parent(tsd_t *tsd)
 {
 
-	malloc_mutex_postfork_parent(&base_mtx);
+	malloc_mutex_postfork_parent(tsd, &base_mtx);
 }
 
 void
-base_postfork_child(void)
+base_postfork_child(tsd_t *tsd)
 {
 
-	malloc_mutex_postfork_child(&base_mtx);
+	malloc_mutex_postfork_child(tsd, &base_mtx);
 }
diff --git a/src/chunk.c b/src/chunk.c
index 304d4e5a..0ee2a1a7 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -49,9 +49,10 @@ const chunk_hooks_t	chunk_hooks_default = {
  * definition.
  */
 
-static void	chunk_record(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, bool cache,
-    void *chunk, size_t size, bool zeroed, bool committed);
+static void	chunk_record(tsd_t *tsd, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, extent_tree_t *chunks_szad,
+    extent_tree_t *chunks_ad, bool cache, void *chunk, size_t size, bool zeroed,
+    bool committed);
 
 /******************************************************************************/
 
@@ -63,23 +64,23 @@ chunk_hooks_get_locked(arena_t *arena)
 }
 
 chunk_hooks_t
-chunk_hooks_get(arena_t *arena)
+chunk_hooks_get(tsd_t *tsd, arena_t *arena)
 {
 	chunk_hooks_t chunk_hooks;
 
-	malloc_mutex_lock(&arena->chunks_mtx);
+	malloc_mutex_lock(tsd, &arena->chunks_mtx);
 	chunk_hooks = chunk_hooks_get_locked(arena);
-	malloc_mutex_unlock(&arena->chunks_mtx);
+	malloc_mutex_unlock(tsd, &arena->chunks_mtx);
 
 	return (chunk_hooks);
 }
 
 chunk_hooks_t
-chunk_hooks_set(arena_t *arena, const chunk_hooks_t *chunk_hooks)
+chunk_hooks_set(tsd_t *tsd, arena_t *arena, const chunk_hooks_t *chunk_hooks)
 {
 	chunk_hooks_t old_chunk_hooks;
 
-	malloc_mutex_lock(&arena->chunks_mtx);
+	malloc_mutex_lock(tsd, &arena->chunks_mtx);
 	old_chunk_hooks = arena->chunk_hooks;
 	/*
 	 * Copy each field atomically so that it is impossible for readers to
@@ -104,14 +105,14 @@ chunk_hooks_set(arena_t *arena, const chunk_hooks_t *chunk_hooks)
 	ATOMIC_COPY_HOOK(split);
 	ATOMIC_COPY_HOOK(merge);
 #undef ATOMIC_COPY_HOOK
-	malloc_mutex_unlock(&arena->chunks_mtx);
+	malloc_mutex_unlock(tsd, &arena->chunks_mtx);
 
 	return (old_chunk_hooks);
 }
 
 static void
-chunk_hooks_assure_initialized_impl(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    bool locked)
+chunk_hooks_assure_initialized_impl(tsd_t *tsd, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, bool locked)
 {
 	static const chunk_hooks_t uninitialized_hooks =
 	    CHUNK_HOOKS_INITIALIZER;
@@ -119,27 +120,28 @@ chunk_hooks_assure_initialized_impl(arena_t *arena, chunk_hooks_t *chunk_hooks,
 	if (memcmp(chunk_hooks, &uninitialized_hooks, sizeof(chunk_hooks_t)) ==
 	    0) {
 		*chunk_hooks = locked ? chunk_hooks_get_locked(arena) :
-		    chunk_hooks_get(arena);
+		    chunk_hooks_get(tsd, arena);
 	}
 }
 
 static void
-chunk_hooks_assure_initialized_locked(arena_t *arena,
+chunk_hooks_assure_initialized_locked(tsd_t *tsd, arena_t *arena,
     chunk_hooks_t *chunk_hooks)
 {
 
-	chunk_hooks_assure_initialized_impl(arena, chunk_hooks, true);
+	chunk_hooks_assure_initialized_impl(tsd, arena, chunk_hooks, true);
 }
 
 static void
-chunk_hooks_assure_initialized(arena_t *arena, chunk_hooks_t *chunk_hooks)
+chunk_hooks_assure_initialized(tsd_t *tsd, arena_t *arena,
+    chunk_hooks_t *chunk_hooks)
 {
 
-	chunk_hooks_assure_initialized_impl(arena, chunk_hooks, false);
+	chunk_hooks_assure_initialized_impl(tsd, arena, chunk_hooks, false);
 }
 
 bool
-chunk_register(const void *chunk, const extent_node_t *node)
+chunk_register(tsd_t *tsd, const void *chunk, const extent_node_t *node)
 {
 
 	assert(extent_node_addr_get(node) == chunk);
@@ -159,7 +161,7 @@ chunk_register(const void *chunk, const extent_node_t *node)
 			high = atomic_read_z(&highchunks);
 		}
 		if (cur > high && prof_gdump_get_unlocked())
-			prof_gdump();
+			prof_gdump(tsd);
 	}
 
 	return (false);
@@ -197,7 +199,7 @@ chunk_first_best_fit(arena_t *arena, extent_tree_t *chunks_szad,
 }
 
 static void *
-chunk_recycle(arena_t *arena, chunk_hooks_t *chunk_hooks,
+chunk_recycle(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
     extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, bool cache,
     void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit,
     bool dalloc_node)
@@ -219,8 +221,8 @@ chunk_recycle(arena_t *arena, chunk_hooks_t *chunk_hooks,
 	/* Beware size_t wrap-around. */
 	if (alloc_size < size)
 		return (NULL);
-	malloc_mutex_lock(&arena->chunks_mtx);
-	chunk_hooks_assure_initialized_locked(arena, chunk_hooks);
+	malloc_mutex_lock(tsd, &arena->chunks_mtx);
+	chunk_hooks_assure_initialized_locked(tsd, arena, chunk_hooks);
 	if (new_addr != NULL) {
 		extent_node_t key;
 		extent_node_init(&key, arena, new_addr, alloc_size, false,
@@ -232,7 +234,7 @@ chunk_recycle(arena_t *arena, chunk_hooks_t *chunk_hooks,
 	}
 	if (node == NULL || (new_addr != NULL && extent_node_size_get(node) <
 	    size)) {
-		malloc_mutex_unlock(&arena->chunks_mtx);
+		malloc_mutex_unlock(tsd, &arena->chunks_mtx);
 		return (NULL);
 	}
 	leadsize = ALIGNMENT_CEILING((uintptr_t)extent_node_addr_get(node),
@@ -251,7 +253,7 @@ chunk_recycle(arena_t *arena, chunk_hooks_t *chunk_hooks,
 	if (leadsize != 0 &&
 	    chunk_hooks->split(extent_node_addr_get(node),
 	    extent_node_size_get(node), leadsize, size, false, arena->ind)) {
-		malloc_mutex_unlock(&arena->chunks_mtx);
+		malloc_mutex_unlock(tsd, &arena->chunks_mtx);
 		return (NULL);
 	}
 	/* Remove node from the tree. */
@@ -271,20 +273,21 @@ chunk_recycle(arena_t *arena, chunk_hooks_t *chunk_hooks,
 		if (chunk_hooks->split(ret, size + trailsize, size,
 		    trailsize, false, arena->ind)) {
 			if (dalloc_node && node != NULL)
-				arena_node_dalloc(arena, node);
-			malloc_mutex_unlock(&arena->chunks_mtx);
-			chunk_record(arena, chunk_hooks, chunks_szad, chunks_ad,
-			    cache, ret, size + trailsize, zeroed, committed);
+				arena_node_dalloc(tsd, arena, node);
+			malloc_mutex_unlock(tsd, &arena->chunks_mtx);
+			chunk_record(tsd, arena, chunk_hooks, chunks_szad,
+			    chunks_ad, cache, ret, size + trailsize, zeroed,
+			    committed);
 			return (NULL);
 		}
 		/* Insert the trailing space as a smaller chunk. */
 		if (node == NULL) {
-			node = arena_node_alloc(arena);
+			node = arena_node_alloc(tsd, arena);
 			if (node == NULL) {
-				malloc_mutex_unlock(&arena->chunks_mtx);
-				chunk_record(arena, chunk_hooks, chunks_szad,
-				    chunks_ad, cache, ret, size + trailsize,
-				    zeroed, committed);
+				malloc_mutex_unlock(tsd, &arena->chunks_mtx);
+				chunk_record(tsd, arena, chunk_hooks,
+				    chunks_szad, chunks_ad, cache, ret, size +
+				    trailsize, zeroed, committed);
 				return (NULL);
 			}
 		}
@@ -296,16 +299,16 @@ chunk_recycle(arena_t *arena, chunk_hooks_t *chunk_hooks,
 		node = NULL;
 	}
 	if (!committed && chunk_hooks->commit(ret, size, 0, size, arena->ind)) {
-		malloc_mutex_unlock(&arena->chunks_mtx);
-		chunk_record(arena, chunk_hooks, chunks_szad, chunks_ad, cache,
-		    ret, size, zeroed, committed);
+		malloc_mutex_unlock(tsd, &arena->chunks_mtx);
+		chunk_record(tsd, arena, chunk_hooks, chunks_szad, chunks_ad,
+		    cache, ret, size, zeroed, committed);
 		return (NULL);
 	}
-	malloc_mutex_unlock(&arena->chunks_mtx);
+	malloc_mutex_unlock(tsd, &arena->chunks_mtx);
 
 	assert(dalloc_node || node != NULL);
 	if (dalloc_node && node != NULL)
-		arena_node_dalloc(arena, node);
+		arena_node_dalloc(tsd, arena, node);
 	if (*zero) {
 		if (!zeroed)
 			memset(ret, 0, size);
@@ -328,8 +331,8 @@ chunk_recycle(arena_t *arena, chunk_hooks_t *chunk_hooks,
  * them if they are returned.
  */
 static void *
-chunk_alloc_core(arena_t *arena, void *new_addr, size_t size, size_t alignment,
-    bool *zero, bool *commit, dss_prec_t dss_prec)
+chunk_alloc_core(tsd_t *tsd, arena_t *arena, void *new_addr, size_t size,
+    size_t alignment, bool *zero, bool *commit, dss_prec_t dss_prec)
 {
 	void *ret;
 
@@ -340,8 +343,8 @@ chunk_alloc_core(arena_t *arena, void *new_addr, size_t size, size_t alignment,
 
 	/* "primary" dss. */
 	if (have_dss && dss_prec == dss_prec_primary && (ret =
-	    chunk_alloc_dss(arena, new_addr, size, alignment, zero, commit)) !=
-	    NULL)
+	    chunk_alloc_dss(tsd, arena, new_addr, size, alignment, zero,
+	    commit)) != NULL)
 		return (ret);
 	/* mmap. */
 	if ((ret = chunk_alloc_mmap(new_addr, size, alignment, zero, commit)) !=
@@ -349,8 +352,8 @@ chunk_alloc_core(arena_t *arena, void *new_addr, size_t size, size_t alignment,
 		return (ret);
 	/* "secondary" dss. */
 	if (have_dss && dss_prec == dss_prec_secondary && (ret =
-	    chunk_alloc_dss(arena, new_addr, size, alignment, zero, commit)) !=
-	    NULL)
+	    chunk_alloc_dss(tsd, arena, new_addr, size, alignment, zero,
+	    commit)) != NULL)
 		return (ret);
 
 	/* All strategies for allocation failed. */
@@ -380,8 +383,8 @@ chunk_alloc_base(size_t size)
 }
 
 void *
-chunk_alloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr,
-    size_t size, size_t alignment, bool *zero, bool dalloc_node)
+chunk_alloc_cache(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
+    void *new_addr, size_t size, size_t alignment, bool *zero, bool dalloc_node)
 {
 	void *ret;
 	bool commit;
@@ -392,7 +395,7 @@ chunk_alloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr,
 	assert((alignment & chunksize_mask) == 0);
 
 	commit = true;
-	ret = chunk_recycle(arena, chunk_hooks, &arena->chunks_szad_cached,
+	ret = chunk_recycle(tsd, arena, chunk_hooks, &arena->chunks_szad_cached,
 	    &arena->chunks_ad_cached, true, new_addr, size, alignment, zero,
 	    &commit, dalloc_node);
 	if (ret == NULL)
@@ -404,11 +407,11 @@ chunk_alloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr,
 }
 
 static arena_t *
-chunk_arena_get(unsigned arena_ind)
+chunk_arena_get(tsd_t *tsd, unsigned arena_ind)
 {
 	arena_t *arena;
 
-	arena = arena_get(arena_ind, false);
+	arena = arena_get(tsd, arena_ind, false);
 	/*
 	 * The arena we're allocating on behalf of must have been initialized
 	 * already.
@@ -422,11 +425,13 @@ chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero,
     bool *commit, unsigned arena_ind)
 {
 	void *ret;
+	tsd_t *tsd;
 	arena_t *arena;
 
-	arena = chunk_arena_get(arena_ind);
-	ret = chunk_alloc_core(arena, new_addr, size, alignment, zero, commit,
-	    arena->dss_prec);
+	tsd = tsd_fetch();
+	arena = chunk_arena_get(tsd, arena_ind);
+	ret = chunk_alloc_core(tsd, arena, new_addr, size, alignment, zero,
+	    commit, arena->dss_prec);
 	if (ret == NULL)
 		return (NULL);
 	if (config_valgrind)
@@ -436,8 +441,8 @@ chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero,
 }
 
 static void *
-chunk_alloc_retained(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr,
-    size_t size, size_t alignment, bool *zero, bool *commit)
+chunk_alloc_retained(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
+    void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit)
 {
 
 	assert(size != 0);
@@ -445,20 +450,20 @@ chunk_alloc_retained(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr,
 	assert(alignment != 0);
 	assert((alignment & chunksize_mask) == 0);
 
-	return (chunk_recycle(arena, chunk_hooks, &arena->chunks_szad_retained,
-	    &arena->chunks_ad_retained, false, new_addr, size, alignment, zero,
-	    commit, true));
+	return (chunk_recycle(tsd, arena, chunk_hooks,
+	    &arena->chunks_szad_retained, &arena->chunks_ad_retained, false,
+	    new_addr, size, alignment, zero, commit, true));
 }
 
 void *
-chunk_alloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr,
-    size_t size, size_t alignment, bool *zero, bool *commit)
+chunk_alloc_wrapper(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
+    void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit)
 {
 	void *ret;
 
-	chunk_hooks_assure_initialized(arena, chunk_hooks);
+	chunk_hooks_assure_initialized(tsd, arena, chunk_hooks);
 
-	ret = chunk_alloc_retained(arena, chunk_hooks, new_addr, size,
+	ret = chunk_alloc_retained(tsd, arena, chunk_hooks, new_addr, size,
 	    alignment, zero, commit);
 	if (ret == NULL) {
 		ret = chunk_hooks->alloc(new_addr, size, alignment, zero,
@@ -473,7 +478,7 @@ chunk_alloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *new_addr,
 }
 
 static void
-chunk_record(arena_t *arena, chunk_hooks_t *chunk_hooks,
+chunk_record(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
     extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, bool cache,
     void *chunk, size_t size, bool zeroed, bool committed)
 {
@@ -485,8 +490,8 @@ chunk_record(arena_t *arena, chunk_hooks_t *chunk_hooks,
 	unzeroed = cache || !zeroed;
 	JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(chunk, size);
 
-	malloc_mutex_lock(&arena->chunks_mtx);
-	chunk_hooks_assure_initialized_locked(arena, chunk_hooks);
+	malloc_mutex_lock(tsd, &arena->chunks_mtx);
+	chunk_hooks_assure_initialized_locked(tsd, arena, chunk_hooks);
 	extent_node_init(&key, arena, (void *)((uintptr_t)chunk + size), 0,
 	    false, false);
 	node = extent_tree_ad_nsearch(chunks_ad, &key);
@@ -511,7 +516,7 @@ chunk_record(arena_t *arena, chunk_hooks_t *chunk_hooks,
 		arena_chunk_cache_maybe_insert(arena, node, cache);
 	} else {
 		/* Coalescing forward failed, so insert a new node. */
-		node = arena_node_alloc(arena);
+		node = arena_node_alloc(tsd, arena);
 		if (node == NULL) {
 			/*
 			 * Node allocation failed, which is an exceedingly
@@ -520,8 +525,8 @@ chunk_record(arena_t *arena, chunk_hooks_t *chunk_hooks,
 			 * a virtual memory leak.
 			 */
 			if (cache) {
-				chunk_purge_wrapper(arena, chunk_hooks, chunk,
-				    size, 0, size);
+				chunk_purge_wrapper(tsd, arena, chunk_hooks,
+				    chunk, size, 0, size);
 			}
 			goto label_return;
 		}
@@ -557,16 +562,16 @@ chunk_record(arena_t *arena, chunk_hooks_t *chunk_hooks,
 		extent_tree_szad_insert(chunks_szad, node);
 		arena_chunk_cache_maybe_insert(arena, node, cache);
 
-		arena_node_dalloc(arena, prev);
+		arena_node_dalloc(tsd, arena, prev);
 	}
 
 label_return:
-	malloc_mutex_unlock(&arena->chunks_mtx);
+	malloc_mutex_unlock(tsd, &arena->chunks_mtx);
 }
 
 void
-chunk_dalloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
-    size_t size, bool committed)
+chunk_dalloc_cache(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
+    void *chunk, size_t size, bool committed)
 {
 
 	assert(chunk != NULL);
@@ -574,9 +579,9 @@ chunk_dalloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
 	assert(size != 0);
 	assert((size & chunksize_mask) == 0);
 
-	chunk_record(arena, chunk_hooks, &arena->chunks_szad_cached,
+	chunk_record(tsd, arena, chunk_hooks, &arena->chunks_szad_cached,
 	    &arena->chunks_ad_cached, true, chunk, size, false, committed);
-	arena_maybe_purge(arena);
+	arena_maybe_purge(tsd, arena);
 }
 
 static bool
@@ -584,14 +589,14 @@ chunk_dalloc_default(void *chunk, size_t size, bool committed,
     unsigned arena_ind)
 {
 
-	if (!have_dss || !chunk_in_dss(chunk))
+	if (!have_dss || !chunk_in_dss(tsd_fetch(), chunk))
 		return (chunk_dalloc_mmap(chunk, size));
 	return (true);
 }
 
 void
-chunk_dalloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
-    size_t size, bool zeroed, bool committed)
+chunk_dalloc_wrapper(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
+    void *chunk, size_t size, bool zeroed, bool committed)
 {
 
 	assert(chunk != NULL);
@@ -599,7 +604,7 @@ chunk_dalloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
 	assert(size != 0);
 	assert((size & chunksize_mask) == 0);
 
-	chunk_hooks_assure_initialized(arena, chunk_hooks);
+	chunk_hooks_assure_initialized(tsd, arena, chunk_hooks);
 	/* Try to deallocate. */
 	if (!chunk_hooks->dalloc(chunk, size, committed, arena->ind))
 		return;
@@ -610,7 +615,7 @@ chunk_dalloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
 	}
 	zeroed = !committed || !chunk_hooks->purge(chunk, size, 0, size,
 	    arena->ind);
-	chunk_record(arena, chunk_hooks, &arena->chunks_szad_retained,
+	chunk_record(tsd, arena, chunk_hooks, &arena->chunks_szad_retained,
 	    &arena->chunks_ad_retained, false, chunk, size, zeroed, committed);
 }
 
@@ -648,11 +653,11 @@ chunk_purge_default(void *chunk, size_t size, size_t offset, size_t length,
 }
 
 bool
-chunk_purge_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
-    size_t size, size_t offset, size_t length)
+chunk_purge_wrapper(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
+    void *chunk, size_t size, size_t offset, size_t length)
 {
 
-	chunk_hooks_assure_initialized(arena, chunk_hooks);
+	chunk_hooks_assure_initialized(tsd, arena, chunk_hooks);
 	return (chunk_hooks->purge(chunk, size, offset, length, arena->ind));
 }
 
@@ -673,8 +678,11 @@ chunk_merge_default(void *chunk_a, size_t size_a, void *chunk_b, size_t size_b,
 
 	if (!maps_coalesce)
 		return (true);
-	if (have_dss && chunk_in_dss(chunk_a) != chunk_in_dss(chunk_b))
-		return (true);
+	if (have_dss) {
+		tsd_t *tsd = tsd_fetch();
+		if (chunk_in_dss(tsd, chunk_a) != chunk_in_dss(tsd, chunk_b))
+			return (true);
+	}
 
 	return (false);
 }
@@ -683,7 +691,7 @@ static rtree_node_elm_t *
 chunks_rtree_node_alloc(size_t nelms)
 {
 
-	return ((rtree_node_elm_t *)base_alloc(nelms *
+	return ((rtree_node_elm_t *)base_alloc(tsd_fetch(), nelms *
 	    sizeof(rtree_node_elm_t)));
 }
 
@@ -730,22 +738,22 @@ chunk_boot(void)
 }
 
 void
-chunk_prefork(void)
+chunk_prefork(tsd_t *tsd)
 {
 
-	chunk_dss_prefork();
+	chunk_dss_prefork(tsd);
 }
 
 void
-chunk_postfork_parent(void)
+chunk_postfork_parent(tsd_t *tsd)
 {
 
-	chunk_dss_postfork_parent();
+	chunk_dss_postfork_parent(tsd);
 }
 
 void
-chunk_postfork_child(void)
+chunk_postfork_child(tsd_t *tsd)
 {
 
-	chunk_dss_postfork_child();
+	chunk_dss_postfork_child(tsd);
 }
diff --git a/src/chunk_dss.c b/src/chunk_dss.c
index 943d0e98..3b3f2433 100644
--- a/src/chunk_dss.c
+++ b/src/chunk_dss.c
@@ -41,33 +41,33 @@ chunk_dss_sbrk(intptr_t increment)
 }
 
 dss_prec_t
-chunk_dss_prec_get(void)
+chunk_dss_prec_get(tsd_t *tsd)
 {
 	dss_prec_t ret;
 
 	if (!have_dss)
 		return (dss_prec_disabled);
-	malloc_mutex_lock(&dss_mtx);
+	malloc_mutex_lock(tsd, &dss_mtx);
 	ret = dss_prec_default;
-	malloc_mutex_unlock(&dss_mtx);
+	malloc_mutex_unlock(tsd, &dss_mtx);
 	return (ret);
 }
 
 bool
-chunk_dss_prec_set(dss_prec_t dss_prec)
+chunk_dss_prec_set(tsd_t *tsd, dss_prec_t dss_prec)
 {
 
 	if (!have_dss)
 		return (dss_prec != dss_prec_disabled);
-	malloc_mutex_lock(&dss_mtx);
+	malloc_mutex_lock(tsd, &dss_mtx);
 	dss_prec_default = dss_prec;
-	malloc_mutex_unlock(&dss_mtx);
+	malloc_mutex_unlock(tsd, &dss_mtx);
 	return (false);
 }
 
 void *
-chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size, size_t alignment,
-    bool *zero, bool *commit)
+chunk_alloc_dss(tsd_t *tsd, arena_t *arena, void *new_addr, size_t size,
+    size_t alignment, bool *zero, bool *commit)
 {
 	cassert(have_dss);
 	assert(size > 0 && (size & chunksize_mask) == 0);
@@ -80,7 +80,7 @@ chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size, size_t alignment,
 	if ((intptr_t)size < 0)
 		return (NULL);
 
-	malloc_mutex_lock(&dss_mtx);
+	malloc_mutex_lock(tsd, &dss_mtx);
 	if (dss_prev != (void *)-1) {
 
 		/*
@@ -122,7 +122,7 @@ chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size, size_t alignment,
 			if ((uintptr_t)ret < (uintptr_t)dss_max ||
 			    (uintptr_t)dss_next < (uintptr_t)dss_max) {
 				/* Wrap-around. */
-				malloc_mutex_unlock(&dss_mtx);
+				malloc_mutex_unlock(tsd, &dss_mtx);
 				return (NULL);
 			}
 			incr = gap_size + cpad_size + size;
@@ -130,11 +130,11 @@ chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size, size_t alignment,
 			if (dss_prev == dss_max) {
 				/* Success. */
 				dss_max = dss_next;
-				malloc_mutex_unlock(&dss_mtx);
+				malloc_mutex_unlock(tsd, &dss_mtx);
 				if (cpad_size != 0) {
 					chunk_hooks_t chunk_hooks =
 					    CHUNK_HOOKS_INITIALIZER;
-					chunk_dalloc_wrapper(arena,
+					chunk_dalloc_wrapper(tsd, arena,
 					    &chunk_hooks, cpad, cpad_size,
 					    false, true);
 				}
@@ -149,25 +149,25 @@ chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size, size_t alignment,
 			}
 		} while (dss_prev != (void *)-1);
 	}
-	malloc_mutex_unlock(&dss_mtx);
+	malloc_mutex_unlock(tsd, &dss_mtx);
 
 	return (NULL);
 }
 
 bool
-chunk_in_dss(void *chunk)
+chunk_in_dss(tsd_t *tsd, void *chunk)
 {
 	bool ret;
 
 	cassert(have_dss);
 
-	malloc_mutex_lock(&dss_mtx);
+	malloc_mutex_lock(tsd, &dss_mtx);
 	if ((uintptr_t)chunk >= (uintptr_t)dss_base
 	    && (uintptr_t)chunk < (uintptr_t)dss_max)
 		ret = true;
 	else
 		ret = false;
-	malloc_mutex_unlock(&dss_mtx);
+	malloc_mutex_unlock(tsd, &dss_mtx);
 
 	return (ret);
 }
@@ -178,7 +178,7 @@ chunk_dss_boot(void)
 
 	cassert(have_dss);
 
-	if (malloc_mutex_init(&dss_mtx))
+	if (malloc_mutex_init(&dss_mtx, "dss", WITNESS_RANK_DSS))
 		return (true);
 	dss_base = chunk_dss_sbrk(0);
 	dss_prev = dss_base;
@@ -188,27 +188,27 @@ chunk_dss_boot(void)
 }
 
 void
-chunk_dss_prefork(void)
+chunk_dss_prefork(tsd_t *tsd)
 {
 
 	if (have_dss)
-		malloc_mutex_prefork(&dss_mtx);
+		malloc_mutex_prefork(tsd, &dss_mtx);
 }
 
 void
-chunk_dss_postfork_parent(void)
+chunk_dss_postfork_parent(tsd_t *tsd)
 {
 
 	if (have_dss)
-		malloc_mutex_postfork_parent(&dss_mtx);
+		malloc_mutex_postfork_parent(tsd, &dss_mtx);
 }
 
 void
-chunk_dss_postfork_child(void)
+chunk_dss_postfork_child(tsd_t *tsd)
 {
 
 	if (have_dss)
-		malloc_mutex_postfork_child(&dss_mtx);
+		malloc_mutex_postfork_child(tsd, &dss_mtx);
 }
 
 /******************************************************************************/
diff --git a/src/ctl.c b/src/ctl.c
index 17bd0719..50faee7b 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -42,25 +42,25 @@ ctl_indexed_node(const ctl_node_t *node)
 /* Function prototypes for non-inline static functions. */
 
 #define	CTL_PROTO(n)							\
-static int	n##_ctl(const size_t *mib, size_t miblen, void *oldp,	\
-    size_t *oldlenp, void *newp, size_t newlen);
+static int	n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,	\
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen);
 
 #define	INDEX_PROTO(n)							\
-static const ctl_named_node_t	*n##_index(const size_t *mib,		\
-    size_t miblen, size_t i);
+static const ctl_named_node_t	*n##_index(tsd_t *tsd,			\
+    const size_t *mib, size_t miblen, size_t i);
 
 static bool	ctl_arena_init(ctl_arena_stats_t *astats);
 static void	ctl_arena_clear(ctl_arena_stats_t *astats);
-static void	ctl_arena_stats_amerge(ctl_arena_stats_t *cstats,
+static void	ctl_arena_stats_amerge(tsd_t *tsd, ctl_arena_stats_t *cstats,
     arena_t *arena);
 static void	ctl_arena_stats_smerge(ctl_arena_stats_t *sstats,
     ctl_arena_stats_t *astats);
-static void	ctl_arena_refresh(arena_t *arena, unsigned i);
-static bool	ctl_grow(void);
-static void	ctl_refresh(void);
-static bool	ctl_init(void);
-static int	ctl_lookup(const char *name, ctl_node_t const **nodesp,
-    size_t *mibp, size_t *depthp);
+static void	ctl_arena_refresh(tsd_t *tsd, arena_t *arena, unsigned i);
+static bool	ctl_grow(tsd_t *tsd);
+static void	ctl_refresh(tsd_t *tsd);
+static bool	ctl_init(tsd_t *tsd);
+static int	ctl_lookup(tsd_t *tsd, const char *name,
+    ctl_node_t const **nodesp, size_t *mibp, size_t *depthp);
 
 CTL_PROTO(version)
 CTL_PROTO(epoch)
@@ -117,7 +117,7 @@ CTL_PROTO(opt_prof_accum)
 CTL_PROTO(tcache_create)
 CTL_PROTO(tcache_flush)
 CTL_PROTO(tcache_destroy)
-static void	arena_i_purge(unsigned arena_ind, bool all);
+static void	arena_i_purge(tsd_t *tsd, unsigned arena_ind, bool all);
 CTL_PROTO(arena_i_purge)
 CTL_PROTO(arena_i_decay)
 CTL_PROTO(arena_i_dss)
@@ -554,12 +554,12 @@ ctl_arena_clear(ctl_arena_stats_t *astats)
 }
 
 static void
-ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena)
+ctl_arena_stats_amerge(tsd_t *tsd, ctl_arena_stats_t *cstats, arena_t *arena)
 {
 	unsigned i;
 
 	if (config_stats) {
-		arena_stats_merge(arena, &cstats->nthreads, &cstats->dss,
+		arena_stats_merge(tsd, arena, &cstats->nthreads, &cstats->dss,
 		    &cstats->lg_dirty_mult, &cstats->decay_time,
 		    &cstats->pactive, &cstats->pdirty, &cstats->astats,
 		    cstats->bstats, cstats->lstats, cstats->hstats);
@@ -572,8 +572,8 @@ ctl_arena_stats_amerge(ctl_arena_stats_t *cstats, arena_t *arena)
 			cstats->nrequests_small += cstats->bstats[i].nrequests;
 		}
 	} else {
-		arena_basic_stats_merge(arena, &cstats->nthreads, &cstats->dss,
-		    &cstats->lg_dirty_mult, &cstats->decay_time,
+		arena_basic_stats_merge(tsd, arena, &cstats->nthreads,
+		    &cstats->dss, &cstats->lg_dirty_mult, &cstats->decay_time,
 		    &cstats->pactive, &cstats->pdirty);
 	}
 }
@@ -649,24 +649,24 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats)
 }
 
 static void
-ctl_arena_refresh(arena_t *arena, unsigned i)
+ctl_arena_refresh(tsd_t *tsd, arena_t *arena, unsigned i)
 {
 	ctl_arena_stats_t *astats = &ctl_stats.arenas[i];
 	ctl_arena_stats_t *sstats = &ctl_stats.arenas[ctl_stats.narenas];
 
 	ctl_arena_clear(astats);
-	ctl_arena_stats_amerge(astats, arena);
+	ctl_arena_stats_amerge(tsd, astats, arena);
 	/* Merge into sum stats as well. */
 	ctl_arena_stats_smerge(sstats, astats);
 }
 
 static bool
-ctl_grow(void)
+ctl_grow(tsd_t *tsd)
 {
 	ctl_arena_stats_t *astats;
 
 	/* Initialize new arena. */
-	if (arena_init(ctl_stats.narenas) == NULL)
+	if (arena_init(tsd, ctl_stats.narenas) == NULL)
 		return (true);
 
 	/* Allocate extended arena stats. */
@@ -701,7 +701,7 @@ ctl_grow(void)
 }
 
 static void
-ctl_refresh(void)
+ctl_refresh(tsd_t *tsd)
 {
 	unsigned i;
 	VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas);
@@ -713,19 +713,20 @@ ctl_refresh(void)
 	ctl_arena_clear(&ctl_stats.arenas[ctl_stats.narenas]);
 
 	for (i = 0; i < ctl_stats.narenas; i++)
-		tarenas[i] = arena_get(i, false);
+		tarenas[i] = arena_get(tsd, i, false);
 
 	for (i = 0; i < ctl_stats.narenas; i++) {
 		bool initialized = (tarenas[i] != NULL);
 
 		ctl_stats.arenas[i].initialized = initialized;
 		if (initialized)
-			ctl_arena_refresh(tarenas[i], i);
+			ctl_arena_refresh(tsd, tarenas[i], i);
 	}
 
 	if (config_stats) {
 		size_t base_allocated, base_resident, base_mapped;
-		base_stats_get(&base_allocated, &base_resident, &base_mapped);
+		base_stats_get(tsd, &base_allocated, &base_resident,
+		    &base_mapped);
 		ctl_stats.allocated =
 		    ctl_stats.arenas[ctl_stats.narenas].allocated_small +
 		    ctl_stats.arenas[ctl_stats.narenas].astats.allocated_large +
@@ -748,11 +749,11 @@ ctl_refresh(void)
 }
 
 static bool
-ctl_init(void)
+ctl_init(tsd_t *tsd)
 {
 	bool ret;
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd, &ctl_mtx);
 	if (!ctl_initialized) {
 		/*
 		 * Allocate space for one extra arena stats element, which
@@ -794,19 +795,19 @@ ctl_init(void)
 		ctl_stats.arenas[ctl_stats.narenas].initialized = true;
 
 		ctl_epoch = 0;
-		ctl_refresh();
+		ctl_refresh(tsd);
 		ctl_initialized = true;
 	}
 
 	ret = false;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsd, &ctl_mtx);
 	return (ret);
 }
 
 static int
-ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp,
-    size_t *depthp)
+ctl_lookup(tsd_t *tsd, const char *name, ctl_node_t const **nodesp,
+    size_t *mibp, size_t *depthp)
 {
 	int ret;
 	const char *elm, *tdot, *dot;
@@ -858,7 +859,7 @@ ctl_lookup(const char *name, ctl_node_t const **nodesp, size_t *mibp,
 			}
 
 			inode = ctl_indexed_node(node->children);
-			node = inode->index(mibp, *depthp, (size_t)index);
+			node = inode->index(tsd, mibp, *depthp, (size_t)index);
 			if (node == NULL) {
 				ret = ENOENT;
 				goto label_return;
@@ -902,8 +903,8 @@ label_return:
 }
 
 int
-ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen)
+ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
+    void *newp, size_t newlen)
 {
 	int ret;
 	size_t depth;
@@ -911,19 +912,19 @@ ctl_byname(const char *name, void *oldp, size_t *oldlenp, void *newp,
 	size_t mib[CTL_MAX_DEPTH];
 	const ctl_named_node_t *node;
 
-	if (!ctl_initialized && ctl_init()) {
+	if (!ctl_initialized && ctl_init(tsd)) {
 		ret = EAGAIN;
 		goto label_return;
 	}
 
 	depth = CTL_MAX_DEPTH;
-	ret = ctl_lookup(name, nodes, mib, &depth);
+	ret = ctl_lookup(tsd, name, nodes, mib, &depth);
 	if (ret != 0)
 		goto label_return;
 
 	node = ctl_named_node(nodes[depth-1]);
 	if (node != NULL && node->ctl)
-		ret = node->ctl(mib, depth, oldp, oldlenp, newp, newlen);
+		ret = node->ctl(tsd, mib, depth, oldp, oldlenp, newp, newlen);
 	else {
 		/* The name refers to a partial path through the ctl tree. */
 		ret = ENOENT;
@@ -934,29 +935,29 @@ label_return:
 }
 
 int
-ctl_nametomib(const char *name, size_t *mibp, size_t *miblenp)
+ctl_nametomib(tsd_t *tsd, const char *name, size_t *mibp, size_t *miblenp)
 {
 	int ret;
 
-	if (!ctl_initialized && ctl_init()) {
+	if (!ctl_initialized && ctl_init(tsd)) {
 		ret = EAGAIN;
 		goto label_return;
 	}
 
-	ret = ctl_lookup(name, NULL, mibp, miblenp);
+	ret = ctl_lookup(tsd, name, NULL, mibp, miblenp);
 label_return:
 	return(ret);
 }
 
 int
-ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	const ctl_named_node_t *node;
 	size_t i;
 
-	if (!ctl_initialized && ctl_init()) {
+	if (!ctl_initialized && ctl_init(tsd)) {
 		ret = EAGAIN;
 		goto label_return;
 	}
@@ -978,7 +979,7 @@ ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 
 			/* Indexed element. */
 			inode = ctl_indexed_node(node->children);
-			node = inode->index(mib, miblen, mib[i]);
+			node = inode->index(tsd, mib, miblen, mib[i]);
 			if (node == NULL) {
 				ret = ENOENT;
 				goto label_return;
@@ -988,7 +989,7 @@ ctl_bymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 
 	/* Call the ctl function. */
 	if (node && node->ctl)
-		ret = node->ctl(mib, miblen, oldp, oldlenp, newp, newlen);
+		ret = node->ctl(tsd, mib, miblen, oldp, oldlenp, newp, newlen);
 	else {
 		/* Partial MIB. */
 		ret = ENOENT;
@@ -1002,7 +1003,7 @@ bool
 ctl_boot(void)
 {
 
-	if (malloc_mutex_init(&ctl_mtx))
+	if (malloc_mutex_init(&ctl_mtx, "ctl", WITNESS_RANK_CTL))
 		return (true);
 
 	ctl_initialized = false;
@@ -1011,24 +1012,24 @@ ctl_boot(void)
 }
 
 void
-ctl_prefork(void)
+ctl_prefork(tsd_t *tsd)
 {
 
-	malloc_mutex_prefork(&ctl_mtx);
+	malloc_mutex_prefork(tsd, &ctl_mtx);
 }
 
 void
-ctl_postfork_parent(void)
+ctl_postfork_parent(tsd_t *tsd)
 {
 
-	malloc_mutex_postfork_parent(&ctl_mtx);
+	malloc_mutex_postfork_parent(tsd, &ctl_mtx);
 }
 
 void
-ctl_postfork_child(void)
+ctl_postfork_child(tsd_t *tsd)
 {
 
-	malloc_mutex_postfork_child(&ctl_mtx);
+	malloc_mutex_postfork_child(tsd, &ctl_mtx);
 }
 
 /******************************************************************************/
@@ -1085,8 +1086,8 @@ ctl_postfork_child(void)
  */
 #define	CTL_RO_CLGEN(c, l, n, v, t)					\
 static int								\
-n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
-    void *newp, size_t newlen)						\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
+    size_t *oldlenp, void *newp, size_t newlen)				\
 {									\
 	int ret;							\
 	t oldval;							\
@@ -1094,7 +1095,7 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
 	if (!(c))							\
 		return (ENOENT);					\
 	if (l)								\
-		malloc_mutex_lock(&ctl_mtx);				\
+		malloc_mutex_lock(tsd, &ctl_mtx);			\
 	READONLY();							\
 	oldval = (v);							\
 	READ(oldval, t);						\
@@ -1102,47 +1103,47 @@ n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
 	ret = 0;							\
 label_return:								\
 	if (l)								\
-		malloc_mutex_unlock(&ctl_mtx);				\
+		malloc_mutex_unlock(tsd, &ctl_mtx);			\
 	return (ret);							\
 }
 
 #define	CTL_RO_CGEN(c, n, v, t)						\
 static int								\
-n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
-    void *newp, size_t newlen)						\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
+    size_t *oldlenp, void *newp, size_t newlen)				\
 {									\
 	int ret;							\
 	t oldval;							\
 									\
 	if (!(c))							\
 		return (ENOENT);					\
-	malloc_mutex_lock(&ctl_mtx);					\
+	malloc_mutex_lock(tsd, &ctl_mtx);				\
 	READONLY();							\
 	oldval = (v);							\
 	READ(oldval, t);						\
 									\
 	ret = 0;							\
 label_return:								\
-	malloc_mutex_unlock(&ctl_mtx);					\
+	malloc_mutex_unlock(tsd, &ctl_mtx);				\
 	return (ret);							\
 }
 
 #define	CTL_RO_GEN(n, v, t)						\
 static int								\
-n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
-    void *newp, size_t newlen)						\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
+    size_t *oldlenp, void *newp, size_t newlen)				\
 {									\
 	int ret;							\
 	t oldval;							\
 									\
-	malloc_mutex_lock(&ctl_mtx);					\
+	malloc_mutex_lock(tsd, &ctl_mtx);				\
 	READONLY();							\
 	oldval = (v);							\
 	READ(oldval, t);						\
 									\
 	ret = 0;							\
 label_return:								\
-	malloc_mutex_unlock(&ctl_mtx);					\
+	malloc_mutex_unlock(tsd, &ctl_mtx);				\
 	return (ret);							\
 }
 
@@ -1152,8 +1153,8 @@ label_return:								\
  */
 #define	CTL_RO_NL_CGEN(c, n, v, t)					\
 static int								\
-n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
-    void *newp, size_t newlen)						\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
+    size_t *oldlenp, void *newp, size_t newlen)				\
 {									\
 	int ret;							\
 	t oldval;							\
@@ -1171,8 +1172,8 @@ label_return:								\
 
 #define	CTL_RO_NL_GEN(n, v, t)						\
 static int								\
-n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
-    void *newp, size_t newlen)						\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
+    size_t *oldlenp, void *newp, size_t newlen)				\
 {									\
 	int ret;							\
 	t oldval;							\
@@ -1188,17 +1189,15 @@ label_return:								\
 
 #define	CTL_TSD_RO_NL_CGEN(c, n, m, t)					\
 static int								\
-n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
-    void *newp, size_t newlen)						\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
+    size_t *oldlenp, void *newp, size_t newlen)				\
 {									\
 	int ret;							\
 	t oldval;							\
-	tsd_t *tsd;							\
 									\
 	if (!(c))							\
 		return (ENOENT);					\
 	READONLY();							\
-	tsd = tsd_fetch();						\
 	oldval = (m(tsd));						\
 	READ(oldval, t);						\
 									\
@@ -1209,8 +1208,8 @@ label_return:								\
 
 #define	CTL_RO_CONFIG_GEN(n, t)						\
 static int								\
-n##_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,	\
-    void *newp, size_t newlen)						\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
+    size_t *oldlenp, void *newp, size_t newlen)				\
 {									\
 	int ret;							\
 	t oldval;							\
@@ -1229,21 +1228,21 @@ label_return:								\
 CTL_RO_NL_GEN(version, JEMALLOC_VERSION, const char *)
 
 static int
-epoch_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+epoch_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	UNUSED uint64_t newval;
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd, &ctl_mtx);
 	WRITE(newval, uint64_t);
 	if (newp != NULL)
-		ctl_refresh();
+		ctl_refresh(tsd);
 	READ(ctl_epoch, uint64_t);
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsd, &ctl_mtx);
 	return (ret);
 }
 
@@ -1298,20 +1297,18 @@ CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool)
 /******************************************************************************/
 
 static int
-thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
-	tsd_t *tsd;
 	arena_t *oldarena;
 	unsigned newind, oldind;
 
-	tsd = tsd_fetch();
 	oldarena = arena_choose(tsd, NULL);
 	if (oldarena == NULL)
 		return (EAGAIN);
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd, &ctl_mtx);
 	newind = oldind = oldarena->ind;
 	WRITE(newind, unsigned);
 	READ(oldind, unsigned);
@@ -1325,7 +1322,7 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 		}
 
 		/* Initialize arena if necessary. */
-		newarena = arena_get(newind, true);
+		newarena = arena_get(tsd, newind, true);
 		if (newarena == NULL) {
 			ret = EAGAIN;
 			goto label_return;
@@ -1335,7 +1332,7 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 		if (config_tcache) {
 			tcache_t *tcache = tsd_tcache_get(tsd);
 			if (tcache != NULL) {
-				tcache_arena_reassociate(tcache, oldarena,
+				tcache_arena_reassociate(tsd, tcache, oldarena,
 				    newarena);
 			}
 		}
@@ -1343,7 +1340,7 @@ thread_arena_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsd, &ctl_mtx);
 	return (ret);
 }
 
@@ -1357,8 +1354,8 @@ CTL_TSD_RO_NL_CGEN(config_stats, thread_deallocatedp,
     tsd_thread_deallocatedp_get, uint64_t *)
 
 static int
-thread_tcache_enabled_ctl(const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
+thread_tcache_enabled_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	bool oldval;
@@ -1382,8 +1379,8 @@ label_return:
 }
 
 static int
-thread_tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
+thread_tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 
@@ -1401,7 +1398,7 @@ label_return:
 }
 
 static int
-thread_prof_name_ctl(const size_t *mib, size_t miblen, void *oldp,
+thread_prof_name_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
@@ -1412,20 +1409,16 @@ thread_prof_name_ctl(const size_t *mib, size_t miblen, void *oldp,
 	READ_XOR_WRITE();
 
 	if (newp != NULL) {
-		tsd_t *tsd;
-
 		if (newlen != sizeof(const char *)) {
 			ret = EINVAL;
 			goto label_return;
 		}
 
-		tsd = tsd_fetch();
-
 		if ((ret = prof_thread_name_set(tsd, *(const char **)newp)) !=
 		    0)
 			goto label_return;
 	} else {
-		const char *oldname = prof_thread_name_get();
+		const char *oldname = prof_thread_name_get(tsd);
 		READ(oldname, const char *);
 	}
 
@@ -1435,7 +1428,7 @@ label_return:
 }
 
 static int
-thread_prof_active_ctl(const size_t *mib, size_t miblen, void *oldp,
+thread_prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
@@ -1444,13 +1437,13 @@ thread_prof_active_ctl(const size_t *mib, size_t miblen, void *oldp,
 	if (!config_prof)
 		return (ENOENT);
 
-	oldval = prof_thread_active_get();
+	oldval = prof_thread_active_get(tsd);
 	if (newp != NULL) {
 		if (newlen != sizeof(bool)) {
 			ret = EINVAL;
 			goto label_return;
 		}
-		if (prof_thread_active_set(*(bool *)newp)) {
+		if (prof_thread_active_set(tsd, *(bool *)newp)) {
 			ret = EAGAIN;
 			goto label_return;
 		}
@@ -1465,19 +1458,16 @@ label_return:
 /******************************************************************************/
 
 static int
-tcache_create_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
-	tsd_t *tsd;
 	unsigned tcache_ind;
 
 	if (!config_tcache)
 		return (ENOENT);
 
-	tsd = tsd_fetch();
-
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd, &ctl_mtx);
 	READONLY();
 	if (tcaches_create(tsd, &tcache_ind)) {
 		ret = EFAULT;
@@ -1487,23 +1477,20 @@ tcache_create_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsd, &ctl_mtx);
 	return (ret);
 }
 
 static int
-tcache_flush_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
-	tsd_t *tsd;
 	unsigned tcache_ind;
 
 	if (!config_tcache)
 		return (ENOENT);
 
-	tsd = tsd_fetch();
-
 	WRITEONLY();
 	tcache_ind = UINT_MAX;
 	WRITE(tcache_ind, unsigned);
@@ -1519,18 +1506,15 @@ label_return:
 }
 
 static int
-tcache_destroy_ctl(const size_t *mib, size_t miblen, void *oldp,
+tcache_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
-	tsd_t *tsd;
 	unsigned tcache_ind;
 
 	if (!config_tcache)
 		return (ENOENT);
 
-	tsd = tsd_fetch();
-
 	WRITEONLY();
 	tcache_ind = UINT_MAX;
 	WRITE(tcache_ind, unsigned);
@@ -1548,10 +1532,10 @@ label_return:
 /******************************************************************************/
 
 static void
-arena_i_purge(unsigned arena_ind, bool all)
+arena_i_purge(tsd_t *tsd, unsigned arena_ind, bool all)
 {
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd, &ctl_mtx);
 	{
 		unsigned narenas = ctl_stats.narenas;
 
@@ -1560,43 +1544,43 @@ arena_i_purge(unsigned arena_ind, bool all)
 			VARIABLE_ARRAY(arena_t *, tarenas, narenas);
 
 			for (i = 0; i < narenas; i++)
-				tarenas[i] = arena_get(i, false);
+				tarenas[i] = arena_get(tsd, i, false);
 
 			/*
 			 * No further need to hold ctl_mtx, since narenas and
 			 * tarenas contain everything needed below.
 			 */
-			malloc_mutex_unlock(&ctl_mtx);
+			malloc_mutex_unlock(tsd, &ctl_mtx);
 
 			for (i = 0; i < narenas; i++) {
 				if (tarenas[i] != NULL)
-					arena_purge(tarenas[i], all);
+					arena_purge(tsd, tarenas[i], all);
 			}
 		} else {
 			arena_t *tarena;
 
 			assert(arena_ind < narenas);
 
-			tarena = arena_get(arena_ind, false);
+			tarena = arena_get(tsd, arena_ind, false);
 
 			/* No further need to hold ctl_mtx. */
-			malloc_mutex_unlock(&ctl_mtx);
+			malloc_mutex_unlock(tsd, &ctl_mtx);
 
 			if (tarena != NULL)
-				arena_purge(tarena, all);
+				arena_purge(tsd, tarena, all);
 		}
 	}
 }
 
 static int
-arena_i_purge_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+arena_i_purge_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 
 	READONLY();
 	WRITEONLY();
-	arena_i_purge((unsigned)mib[1], true);
+	arena_i_purge(tsd, (unsigned)mib[1], true);
 
 	ret = 0;
 label_return:
@@ -1604,14 +1588,14 @@ label_return:
 }
 
 static int
-arena_i_decay_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+arena_i_decay_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 
 	READONLY();
 	WRITEONLY();
-	arena_i_purge((unsigned)mib[1], false);
+	arena_i_purge(tsd, (unsigned)mib[1], false);
 
 	ret = 0;
 label_return:
@@ -1619,8 +1603,8 @@ label_return:
 }
 
 static int
-arena_i_dss_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	const char *dss = NULL;
@@ -1628,7 +1612,7 @@ arena_i_dss_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	dss_prec_t dss_prec_old = dss_prec_limit;
 	dss_prec_t dss_prec = dss_prec_limit;
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd, &ctl_mtx);
 	WRITE(dss, const char *);
 	if (dss != NULL) {
 		int i;
@@ -1649,20 +1633,20 @@ arena_i_dss_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	}
 
 	if (arena_ind < ctl_stats.narenas) {
-		arena_t *arena = arena_get(arena_ind, false);
+		arena_t *arena = arena_get(tsd, arena_ind, false);
 		if (arena == NULL || (dss_prec != dss_prec_limit &&
-		    arena_dss_prec_set(arena, dss_prec))) {
+		    arena_dss_prec_set(tsd, arena, dss_prec))) {
 			ret = EFAULT;
 			goto label_return;
 		}
-		dss_prec_old = arena_dss_prec_get(arena);
+		dss_prec_old = arena_dss_prec_get(tsd, arena);
 	} else {
 		if (dss_prec != dss_prec_limit &&
-		    chunk_dss_prec_set(dss_prec)) {
+		    chunk_dss_prec_set(tsd, dss_prec)) {
 			ret = EFAULT;
 			goto label_return;
 		}
-		dss_prec_old = chunk_dss_prec_get();
+		dss_prec_old = chunk_dss_prec_get(tsd);
 	}
 
 	dss = dss_prec_names[dss_prec_old];
@@ -1670,26 +1654,26 @@ arena_i_dss_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsd, &ctl_mtx);
 	return (ret);
 }
 
 static int
-arena_i_lg_dirty_mult_ctl(const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
+arena_i_lg_dirty_mult_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	unsigned arena_ind = (unsigned)mib[1];
 	arena_t *arena;
 
-	arena = arena_get(arena_ind, false);
+	arena = arena_get(tsd, arena_ind, false);
 	if (arena == NULL) {
 		ret = EFAULT;
 		goto label_return;
 	}
 
 	if (oldp != NULL && oldlenp != NULL) {
-		size_t oldval = arena_lg_dirty_mult_get(arena);
+		size_t oldval = arena_lg_dirty_mult_get(tsd, arena);
 		READ(oldval, ssize_t);
 	}
 	if (newp != NULL) {
@@ -1697,7 +1681,7 @@ arena_i_lg_dirty_mult_ctl(const size_t *mib, size_t miblen, void *oldp,
 			ret = EINVAL;
 			goto label_return;
 		}
-		if (arena_lg_dirty_mult_set(arena, *(ssize_t *)newp)) {
+		if (arena_lg_dirty_mult_set(tsd, arena, *(ssize_t *)newp)) {
 			ret = EFAULT;
 			goto label_return;
 		}
@@ -1709,21 +1693,21 @@ label_return:
 }
 
 static int
-arena_i_decay_time_ctl(const size_t *mib, size_t miblen, void *oldp,
+arena_i_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	unsigned arena_ind = (unsigned)mib[1];
 	arena_t *arena;
 
-	arena = arena_get(arena_ind, false);
+	arena = arena_get(tsd, arena_ind, false);
 	if (arena == NULL) {
 		ret = EFAULT;
 		goto label_return;
 	}
 
 	if (oldp != NULL && oldlenp != NULL) {
-		size_t oldval = arena_decay_time_get(arena);
+		size_t oldval = arena_decay_time_get(tsd, arena);
 		READ(oldval, ssize_t);
 	}
 	if (newp != NULL) {
@@ -1731,7 +1715,7 @@ arena_i_decay_time_ctl(const size_t *mib, size_t miblen, void *oldp,
 			ret = EINVAL;
 			goto label_return;
 		}
-		if (arena_decay_time_set(arena, *(ssize_t *)newp)) {
+		if (arena_decay_time_set(tsd, arena, *(ssize_t *)newp)) {
 			ret = EFAULT;
 			goto label_return;
 		}
@@ -1743,24 +1727,25 @@ label_return:
 }
 
 static int
-arena_i_chunk_hooks_ctl(const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
+arena_i_chunk_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	unsigned arena_ind = (unsigned)mib[1];
 	arena_t *arena;
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd, &ctl_mtx);
 	if (arena_ind < narenas_total_get() && (arena =
-	    arena_get(arena_ind, false)) != NULL) {
+	    arena_get(tsd, arena_ind, false)) != NULL) {
 		if (newp != NULL) {
 			chunk_hooks_t old_chunk_hooks, new_chunk_hooks;
 			WRITE(new_chunk_hooks, chunk_hooks_t);
-			old_chunk_hooks = chunk_hooks_set(arena,
+			old_chunk_hooks = chunk_hooks_set(tsd, arena,
 			    &new_chunk_hooks);
 			READ(old_chunk_hooks, chunk_hooks_t);
 		} else {
-			chunk_hooks_t old_chunk_hooks = chunk_hooks_get(arena);
+			chunk_hooks_t old_chunk_hooks = chunk_hooks_get(tsd,
+			    arena);
 			READ(old_chunk_hooks, chunk_hooks_t);
 		}
 	} else {
@@ -1769,16 +1754,16 @@ arena_i_chunk_hooks_ctl(const size_t *mib, size_t miblen, void *oldp,
 	}
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsd, &ctl_mtx);
 	return (ret);
 }
 
 static const ctl_named_node_t *
-arena_i_index(const size_t *mib, size_t miblen, size_t i)
+arena_i_index(tsd_t *tsd, const size_t *mib, size_t miblen, size_t i)
 {
-	const ctl_named_node_t * ret;
+	const ctl_named_node_t *ret;
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd, &ctl_mtx);
 	if (i > ctl_stats.narenas) {
 		ret = NULL;
 		goto label_return;
@@ -1786,20 +1771,20 @@ arena_i_index(const size_t *mib, size_t miblen, size_t i)
 
 	ret = super_arena_i_node;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsd, &ctl_mtx);
 	return (ret);
 }
 
 /******************************************************************************/
 
 static int
-arenas_narenas_ctl(const size_t *mib, size_t miblen, void *oldp,
+arenas_narenas_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	unsigned narenas;
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd, &ctl_mtx);
 	READONLY();
 	if (*oldlenp != sizeof(unsigned)) {
 		ret = EINVAL;
@@ -1810,18 +1795,18 @@ arenas_narenas_ctl(const size_t *mib, size_t miblen, void *oldp,
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsd, &ctl_mtx);
 	return (ret);
 }
 
 static int
-arenas_initialized_ctl(const size_t *mib, size_t miblen, void *oldp,
+arenas_initialized_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	unsigned nread, i;
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd, &ctl_mtx);
 	READONLY();
 	if (*oldlenp != ctl_stats.narenas * sizeof(bool)) {
 		ret = EINVAL;
@@ -1836,13 +1821,13 @@ arenas_initialized_ctl(const size_t *mib, size_t miblen, void *oldp,
 		((bool *)oldp)[i] = ctl_stats.arenas[i].initialized;
 
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsd, &ctl_mtx);
 	return (ret);
 }
 
 static int
-arenas_lg_dirty_mult_ctl(const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
+arenas_lg_dirty_mult_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 
@@ -1867,7 +1852,7 @@ label_return:
 }
 
 static int
-arenas_decay_time_ctl(const size_t *mib, size_t miblen, void *oldp,
+arenas_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
@@ -1901,7 +1886,7 @@ CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t)
 CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t)
 CTL_RO_NL_GEN(arenas_bin_i_run_size, arena_bin_info[mib[2]].run_size, size_t)
 static const ctl_named_node_t *
-arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i)
+arenas_bin_i_index(tsd_t *tsd, const size_t *mib, size_t miblen, size_t i)
 {
 
 	if (i > NBINS)
@@ -1912,7 +1897,7 @@ arenas_bin_i_index(const size_t *mib, size_t miblen, size_t i)
 CTL_RO_NL_GEN(arenas_nlruns, nlclasses, unsigned)
 CTL_RO_NL_GEN(arenas_lrun_i_size, index2size(NBINS+(szind_t)mib[2]), size_t)
 static const ctl_named_node_t *
-arenas_lrun_i_index(const size_t *mib, size_t miblen, size_t i)
+arenas_lrun_i_index(tsd_t *tsd, const size_t *mib, size_t miblen, size_t i)
 {
 
 	if (i > nlclasses)
@@ -1924,7 +1909,7 @@ CTL_RO_NL_GEN(arenas_nhchunks, nhclasses, unsigned)
 CTL_RO_NL_GEN(arenas_hchunk_i_size, index2size(NBINS+nlclasses+(szind_t)mib[2]),
     size_t)
 static const ctl_named_node_t *
-arenas_hchunk_i_index(const size_t *mib, size_t miblen, size_t i)
+arenas_hchunk_i_index(tsd_t *tsd, const size_t *mib, size_t miblen, size_t i)
 {
 
 	if (i > nhclasses)
@@ -1933,15 +1918,15 @@ arenas_hchunk_i_index(const size_t *mib, size_t miblen, size_t i)
 }
 
 static int
-arenas_extend_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+arenas_extend_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	unsigned narenas;
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd, &ctl_mtx);
 	READONLY();
-	if (ctl_grow()) {
+	if (ctl_grow(tsd)) {
 		ret = EAGAIN;
 		goto label_return;
 	}
@@ -1950,14 +1935,39 @@ arenas_extend_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsd, &ctl_mtx);
 	return (ret);
 }
 
 /******************************************************************************/
 
 static int
-prof_thread_active_init_ctl(const size_t *mib, size_t miblen, void *oldp,
+prof_thread_active_init_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen)
+{
+	int ret;
+	bool oldval;
+
+	if (!config_prof)
+		return (ENOENT);
+
+	if (newp != NULL) {
+		if (newlen != sizeof(bool)) {
+			ret = EINVAL;
+			goto label_return;
+		}
+		oldval = prof_thread_active_init_set(tsd, *(bool *)newp);
+	} else
+		oldval = prof_thread_active_init_get(tsd);
+	READ(oldval, bool);
+
+	ret = 0;
+label_return:
+	return (ret);
+}
+
+static int
+prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
@@ -1971,9 +1981,9 @@ prof_thread_active_init_ctl(const size_t *mib, size_t miblen, void *oldp,
 			ret = EINVAL;
 			goto label_return;
 		}
-		oldval = prof_thread_active_init_set(*(bool *)newp);
+		oldval = prof_active_set(tsd, *(bool *)newp);
 	} else
-		oldval = prof_thread_active_init_get();
+		oldval = prof_active_get(tsd);
 	READ(oldval, bool);
 
 	ret = 0;
@@ -1982,33 +1992,8 @@ label_return:
 }
 
 static int
-prof_active_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
-{
-	int ret;
-	bool oldval;
-
-	if (!config_prof)
-		return (ENOENT);
-
-	if (newp != NULL) {
-		if (newlen != sizeof(bool)) {
-			ret = EINVAL;
-			goto label_return;
-		}
-		oldval = prof_active_set(*(bool *)newp);
-	} else
-		oldval = prof_active_get();
-	READ(oldval, bool);
-
-	ret = 0;
-label_return:
-	return (ret);
-}
-
-static int
-prof_dump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+prof_dump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	const char *filename = NULL;
@@ -2019,7 +2004,7 @@ prof_dump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	WRITEONLY();
 	WRITE(filename, const char *);
 
-	if (prof_mdump(filename)) {
+	if (prof_mdump(tsd, filename)) {
 		ret = EFAULT;
 		goto label_return;
 	}
@@ -2030,8 +2015,8 @@ label_return:
 }
 
 static int
-prof_gdump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+prof_gdump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	bool oldval;
@@ -2044,9 +2029,9 @@ prof_gdump_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 			ret = EINVAL;
 			goto label_return;
 		}
-		oldval = prof_gdump_set(*(bool *)newp);
+		oldval = prof_gdump_set(tsd, *(bool *)newp);
 	} else
-		oldval = prof_gdump_get();
+		oldval = prof_gdump_get(tsd);
 	READ(oldval, bool);
 
 	ret = 0;
@@ -2055,12 +2040,11 @@ label_return:
 }
 
 static int
-prof_reset_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
+prof_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
 	size_t lg_sample = lg_prof_sample;
-	tsd_t *tsd;
 
 	if (!config_prof)
 		return (ENOENT);
@@ -2070,8 +2054,6 @@ prof_reset_ctl(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	if (lg_sample >= (sizeof(uint64_t) << 3))
 		lg_sample = (sizeof(uint64_t) << 3) - 1;
 
-	tsd = tsd_fetch();
-
 	prof_reset(tsd, lg_sample);
 
 	ret = 0;
@@ -2157,7 +2139,8 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curruns,
     ctl_stats.arenas[mib[2]].bstats[mib[4]].curruns, size_t)
 
 static const ctl_named_node_t *
-stats_arenas_i_bins_j_index(const size_t *mib, size_t miblen, size_t j)
+stats_arenas_i_bins_j_index(tsd_t *tsd, const size_t *mib, size_t miblen,
+    size_t j)
 {
 
 	if (j > NBINS)
@@ -2175,7 +2158,8 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_curruns,
     ctl_stats.arenas[mib[2]].lstats[mib[4]].curruns, size_t)
 
 static const ctl_named_node_t *
-stats_arenas_i_lruns_j_index(const size_t *mib, size_t miblen, size_t j)
+stats_arenas_i_lruns_j_index(tsd_t *tsd, const size_t *mib, size_t miblen,
+    size_t j)
 {
 
 	if (j > nlclasses)
@@ -2194,7 +2178,8 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hchunks_j_curhchunks,
     ctl_stats.arenas[mib[2]].hstats[mib[4]].curhchunks, size_t)
 
 static const ctl_named_node_t *
-stats_arenas_i_hchunks_j_index(const size_t *mib, size_t miblen, size_t j)
+stats_arenas_i_hchunks_j_index(tsd_t *tsd, const size_t *mib, size_t miblen,
+    size_t j)
 {
 
 	if (j > nhclasses)
@@ -2203,11 +2188,11 @@ stats_arenas_i_hchunks_j_index(const size_t *mib, size_t miblen, size_t j)
 }
 
 static const ctl_named_node_t *
-stats_arenas_i_index(const size_t *mib, size_t miblen, size_t i)
+stats_arenas_i_index(tsd_t *tsd, const size_t *mib, size_t miblen, size_t i)
 {
 	const ctl_named_node_t * ret;
 
-	malloc_mutex_lock(&ctl_mtx);
+	malloc_mutex_lock(tsd, &ctl_mtx);
 	if (i > ctl_stats.narenas || !ctl_stats.arenas[i].initialized) {
 		ret = NULL;
 		goto label_return;
@@ -2215,6 +2200,6 @@ stats_arenas_i_index(const size_t *mib, size_t miblen, size_t i)
 
 	ret = super_stats_arenas_i_node;
 label_return:
-	malloc_mutex_unlock(&ctl_mtx);
+	malloc_mutex_unlock(tsd, &ctl_mtx);
 	return (ret);
 }
diff --git a/src/huge.c b/src/huge.c
index a63c8258..3a802dee 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -15,12 +15,12 @@ huge_node_get(const void *ptr)
 }
 
 static bool
-huge_node_set(const void *ptr, extent_node_t *node)
+huge_node_set(tsd_t *tsd, const void *ptr, extent_node_t *node)
 {
 
 	assert(extent_node_addr_get(node) == ptr);
 	assert(!extent_node_achunk_get(node));
-	return (chunk_register(ptr, node));
+	return (chunk_register(tsd, ptr, node));
 }
 
 static void
@@ -68,7 +68,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 	 */
 	is_zeroed = zero;
 	arena = arena_choose(tsd, arena);
-	if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(arena,
+	if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(tsd, arena,
 	    usize, alignment, &is_zeroed)) == NULL) {
 		idalloctm(tsd, node, tcache, true, true);
 		return (NULL);
@@ -76,17 +76,17 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 
 	extent_node_init(node, arena, ret, usize, is_zeroed, true);
 
-	if (huge_node_set(ret, node)) {
-		arena_chunk_dalloc_huge(arena, ret, usize);
+	if (huge_node_set(tsd, ret, node)) {
+		arena_chunk_dalloc_huge(tsd, arena, ret, usize);
 		idalloctm(tsd, node, tcache, true, true);
 		return (NULL);
 	}
 
 	/* Insert node into huge. */
-	malloc_mutex_lock(&arena->huge_mtx);
+	malloc_mutex_lock(tsd, &arena->huge_mtx);
 	ql_elm_new(node, ql_link);
 	ql_tail_insert(&arena->huge, node, ql_link);
-	malloc_mutex_unlock(&arena->huge_mtx);
+	malloc_mutex_unlock(tsd, &arena->huge_mtx);
 
 	if (zero || (config_fill && unlikely(opt_zero))) {
 		if (!is_zeroed)
@@ -103,7 +103,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 #define	huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk_impl)
 #endif
 static void
-huge_dalloc_junk(void *ptr, size_t usize)
+huge_dalloc_junk(tsd_t *tsd, void *ptr, size_t usize)
 {
 
 	if (config_fill && have_dss && unlikely(opt_junk_free)) {
@@ -111,7 +111,7 @@ huge_dalloc_junk(void *ptr, size_t usize)
 		 * Only bother junk filling if the chunk isn't about to be
 		 * unmapped.
 		 */
-		if (!config_munmap || (have_dss && chunk_in_dss(ptr)))
+		if (!config_munmap || (have_dss && chunk_in_dss(tsd, ptr)))
 			memset(ptr, JEMALLOC_FREE_JUNK, usize);
 	}
 }
@@ -122,8 +122,8 @@ huge_dalloc_junk_t *huge_dalloc_junk = JEMALLOC_N(huge_dalloc_junk_impl);
 #endif
 
 static void
-huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize_min,
-    size_t usize_max, bool zero)
+huge_ralloc_no_move_similar(tsd_t *tsd, void *ptr, size_t oldsize,
+    size_t usize_min, size_t usize_max, bool zero)
 {
 	size_t usize, usize_next;
 	extent_node_t *node;
@@ -151,21 +151,22 @@ huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize_min,
 			    JEMALLOC_FREE_JUNK, sdiff);
 			post_zeroed = false;
 		} else {
-			post_zeroed = !chunk_purge_wrapper(arena, &chunk_hooks,
-			    ptr, CHUNK_CEILING(oldsize), usize, sdiff);
+			post_zeroed = !chunk_purge_wrapper(tsd, arena,
+			    &chunk_hooks, ptr, CHUNK_CEILING(oldsize), usize,
+			    sdiff);
 		}
 	} else
 		post_zeroed = pre_zeroed;
 
-	malloc_mutex_lock(&arena->huge_mtx);
+	malloc_mutex_lock(tsd, &arena->huge_mtx);
 	/* Update the size of the huge allocation. */
 	assert(extent_node_size_get(node) != usize);
 	extent_node_size_set(node, usize);
 	/* Update zeroed. */
 	extent_node_zeroed_set(node, post_zeroed);
-	malloc_mutex_unlock(&arena->huge_mtx);
+	malloc_mutex_unlock(tsd, &arena->huge_mtx);
 
-	arena_chunk_ralloc_huge_similar(arena, ptr, oldsize, usize);
+	arena_chunk_ralloc_huge_similar(tsd, arena, ptr, oldsize, usize);
 
 	/* Fill if necessary (growing). */
 	if (oldsize < usize) {
@@ -182,7 +183,7 @@ huge_ralloc_no_move_similar(void *ptr, size_t oldsize, size_t usize_min,
 }
 
 static bool
-huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize)
+huge_ralloc_no_move_shrink(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize)
 {
 	extent_node_t *node;
 	arena_t *arena;
@@ -193,7 +194,7 @@ huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize)
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
 	pre_zeroed = extent_node_zeroed_get(node);
-	chunk_hooks = chunk_hooks_get(arena);
+	chunk_hooks = chunk_hooks_get(tsd, arena);
 
 	assert(oldsize > usize);
 
@@ -206,42 +207,43 @@ huge_ralloc_no_move_shrink(void *ptr, size_t oldsize, size_t usize)
 	if (oldsize > usize) {
 		size_t sdiff = oldsize - usize;
 		if (config_fill && unlikely(opt_junk_free)) {
-			huge_dalloc_junk((void *)((uintptr_t)ptr + usize),
+			huge_dalloc_junk(tsd, (void *)((uintptr_t)ptr + usize),
 			    sdiff);
 			post_zeroed = false;
 		} else {
-			post_zeroed = !chunk_purge_wrapper(arena, &chunk_hooks,
-			    CHUNK_ADDR2BASE((uintptr_t)ptr + usize),
-			    CHUNK_CEILING(oldsize),
+			post_zeroed = !chunk_purge_wrapper(tsd, arena,
+			    &chunk_hooks, CHUNK_ADDR2BASE((uintptr_t)ptr +
+			    usize), CHUNK_CEILING(oldsize),
 			    CHUNK_ADDR2OFFSET((uintptr_t)ptr + usize), sdiff);
 		}
 	} else
 		post_zeroed = pre_zeroed;
 
-	malloc_mutex_lock(&arena->huge_mtx);
+	malloc_mutex_lock(tsd, &arena->huge_mtx);
 	/* Update the size of the huge allocation. */
 	extent_node_size_set(node, usize);
 	/* Update zeroed. */
 	extent_node_zeroed_set(node, post_zeroed);
-	malloc_mutex_unlock(&arena->huge_mtx);
+	malloc_mutex_unlock(tsd, &arena->huge_mtx);
 
 	/* Zap the excess chunks. */
-	arena_chunk_ralloc_huge_shrink(arena, ptr, oldsize, usize);
+	arena_chunk_ralloc_huge_shrink(tsd, arena, ptr, oldsize, usize);
 
 	return (false);
 }
 
 static bool
-huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t usize, bool zero) {
+huge_ralloc_no_move_expand(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize,
+    bool zero) {
 	extent_node_t *node;
 	arena_t *arena;
 	bool is_zeroed_subchunk, is_zeroed_chunk;
 
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
-	malloc_mutex_lock(&arena->huge_mtx);
+	malloc_mutex_lock(tsd, &arena->huge_mtx);
 	is_zeroed_subchunk = extent_node_zeroed_get(node);
-	malloc_mutex_unlock(&arena->huge_mtx);
+	malloc_mutex_unlock(tsd, &arena->huge_mtx);
 
 	/*
 	 * Copy zero into is_zeroed_chunk and pass the copy to chunk_alloc(), so
@@ -249,14 +251,14 @@ huge_ralloc_no_move_expand(void *ptr, size_t oldsize, size_t usize, bool zero) {
 	 */
 	is_zeroed_chunk = zero;
 
-	if (arena_chunk_ralloc_huge_expand(arena, ptr, oldsize, usize,
+	if (arena_chunk_ralloc_huge_expand(tsd, arena, ptr, oldsize, usize,
 	     &is_zeroed_chunk))
 		return (true);
 
-	malloc_mutex_lock(&arena->huge_mtx);
+	malloc_mutex_lock(tsd, &arena->huge_mtx);
 	/* Update the size of the huge allocation. */
 	extent_node_size_set(node, usize);
-	malloc_mutex_unlock(&arena->huge_mtx);
+	malloc_mutex_unlock(tsd, &arena->huge_mtx);
 
 	if (zero || (config_fill && unlikely(opt_zero))) {
 		if (!is_zeroed_subchunk) {
@@ -291,15 +293,15 @@ huge_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize_min,
 
 	if (CHUNK_CEILING(usize_max) > CHUNK_CEILING(oldsize)) {
 		/* Attempt to expand the allocation in-place. */
-		if (!huge_ralloc_no_move_expand(ptr, oldsize, usize_max,
+		if (!huge_ralloc_no_move_expand(tsd, ptr, oldsize, usize_max,
 		    zero)) {
 			arena_decay_tick(tsd, huge_aalloc(ptr));
 			return (false);
 		}
 		/* Try again, this time with usize_min. */
 		if (usize_min < usize_max && CHUNK_CEILING(usize_min) >
-		    CHUNK_CEILING(oldsize) && huge_ralloc_no_move_expand(ptr,
-		    oldsize, usize_min, zero)) {
+		    CHUNK_CEILING(oldsize) && huge_ralloc_no_move_expand(tsd,
+		    ptr, oldsize, usize_min, zero)) {
 			arena_decay_tick(tsd, huge_aalloc(ptr));
 			return (false);
 		}
@@ -311,15 +313,15 @@ huge_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize_min,
 	 */
 	if (CHUNK_CEILING(oldsize) >= CHUNK_CEILING(usize_min)
 	    && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(usize_max)) {
-		huge_ralloc_no_move_similar(ptr, oldsize, usize_min, usize_max,
-		    zero);
+		huge_ralloc_no_move_similar(tsd, ptr, oldsize, usize_min,
+		    usize_max, zero);
 		arena_decay_tick(tsd, huge_aalloc(ptr));
 		return (false);
 	}
 
 	/* Attempt to shrink the allocation in-place. */
 	if (CHUNK_CEILING(oldsize) > CHUNK_CEILING(usize_max)) {
-		if (!huge_ralloc_no_move_shrink(ptr, oldsize, usize_max)) {
+		if (!huge_ralloc_no_move_shrink(tsd, ptr, oldsize, usize_max)) {
 			arena_decay_tick(tsd, huge_aalloc(ptr));
 			return (false);
 		}
@@ -376,13 +378,13 @@ huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache)
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
 	huge_node_unset(ptr, node);
-	malloc_mutex_lock(&arena->huge_mtx);
+	malloc_mutex_lock(tsd, &arena->huge_mtx);
 	ql_remove(&arena->huge, node, ql_link);
-	malloc_mutex_unlock(&arena->huge_mtx);
+	malloc_mutex_unlock(tsd, &arena->huge_mtx);
 
-	huge_dalloc_junk(extent_node_addr_get(node),
+	huge_dalloc_junk(tsd, extent_node_addr_get(node),
 	    extent_node_size_get(node));
-	arena_chunk_dalloc_huge(extent_node_arena_get(node),
+	arena_chunk_dalloc_huge(tsd, extent_node_arena_get(node),
 	    extent_node_addr_get(node), extent_node_size_get(node));
 	idalloctm(tsd, node, tcache, true, true);
 
@@ -397,7 +399,7 @@ huge_aalloc(const void *ptr)
 }
 
 size_t
-huge_salloc(const void *ptr)
+huge_salloc(tsd_t *tsd, const void *ptr)
 {
 	size_t size;
 	extent_node_t *node;
@@ -405,15 +407,15 @@ huge_salloc(const void *ptr)
 
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
-	malloc_mutex_lock(&arena->huge_mtx);
+	malloc_mutex_lock(tsd, &arena->huge_mtx);
 	size = extent_node_size_get(node);
-	malloc_mutex_unlock(&arena->huge_mtx);
+	malloc_mutex_unlock(tsd, &arena->huge_mtx);
 
 	return (size);
 }
 
 prof_tctx_t *
-huge_prof_tctx_get(const void *ptr)
+huge_prof_tctx_get(tsd_t *tsd, const void *ptr)
 {
 	prof_tctx_t *tctx;
 	extent_node_t *node;
@@ -421,29 +423,29 @@ huge_prof_tctx_get(const void *ptr)
 
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
-	malloc_mutex_lock(&arena->huge_mtx);
+	malloc_mutex_lock(tsd, &arena->huge_mtx);
 	tctx = extent_node_prof_tctx_get(node);
-	malloc_mutex_unlock(&arena->huge_mtx);
+	malloc_mutex_unlock(tsd, &arena->huge_mtx);
 
 	return (tctx);
 }
 
 void
-huge_prof_tctx_set(const void *ptr, prof_tctx_t *tctx)
+huge_prof_tctx_set(tsd_t *tsd, const void *ptr, prof_tctx_t *tctx)
 {
 	extent_node_t *node;
 	arena_t *arena;
 
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
-	malloc_mutex_lock(&arena->huge_mtx);
+	malloc_mutex_lock(tsd, &arena->huge_mtx);
 	extent_node_prof_tctx_set(node, tctx);
-	malloc_mutex_unlock(&arena->huge_mtx);
+	malloc_mutex_unlock(tsd, &arena->huge_mtx);
 }
 
 void
-huge_prof_tctx_reset(const void *ptr)
+huge_prof_tctx_reset(tsd_t *tsd, const void *ptr)
 {
 
-	huge_prof_tctx_set(ptr, (prof_tctx_t *)(uintptr_t)1U);
+	huge_prof_tctx_set(tsd, ptr, (prof_tctx_t *)(uintptr_t)1U);
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 0735376e..7543dff1 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -212,7 +212,7 @@ _init_init_lock(void)
 	 * really only matters early in the process creation, before any
 	 * separate thread normally starts doing anything. */
 	if (!init_lock_initialized)
-		malloc_mutex_init(&init_lock);
+		malloc_mutex_init(&init_lock, "init", WITNESS_RANK_INIT);
 	init_lock_initialized = true;
 }
 
@@ -254,7 +254,7 @@ typedef struct {
  * definition.
  */
 
-static bool	malloc_init_hard_a0(void);
+static bool	malloc_init_hard_a0(tsd_t *tsd);
 static bool	malloc_init_hard(void);
 
 /******************************************************************************/
@@ -291,7 +291,7 @@ malloc_init_a0(void)
 {
 
 	if (unlikely(malloc_init_state == malloc_init_uninitialized))
-		return (malloc_init_hard_a0());
+		return (malloc_init_hard_a0(NULL));
 	return (false);
 }
 
@@ -319,7 +319,7 @@ a0ialloc(size_t size, bool zero, bool is_metadata)
 		return (NULL);
 
 	return (iallocztm(NULL, size, size2index(size), zero, false,
-	    is_metadata, arena_get(0, false), true));
+	    is_metadata, arena_get(NULL, 0, false), true));
 }
 
 static void
@@ -413,7 +413,7 @@ narenas_total_get(void)
 
 /* Create a new arena and insert it into the arenas array at index ind. */
 static arena_t *
-arena_init_locked(unsigned ind)
+arena_init_locked(tsd_t *tsd, unsigned ind)
 {
 	arena_t *arena;
 
@@ -427,26 +427,26 @@ arena_init_locked(unsigned ind)
 	 * Another thread may have already initialized arenas[ind] if it's an
 	 * auto arena.
 	 */
-	arena = arena_get(ind, false);
+	arena = arena_get(tsd, ind, false);
 	if (arena != NULL) {
 		assert(ind < narenas_auto);
 		return (arena);
 	}
 
 	/* Actually initialize the arena. */
-	arena = arena_new(ind);
+	arena = arena_new(tsd, ind);
 	arena_set(ind, arena);
 	return (arena);
 }
 
 arena_t *
-arena_init(unsigned ind)
+arena_init(tsd_t *tsd, unsigned ind)
 {
 	arena_t *arena;
 
-	malloc_mutex_lock(&arenas_lock);
-	arena = arena_init_locked(ind);
-	malloc_mutex_unlock(&arenas_lock);
+	malloc_mutex_lock(tsd, &arenas_lock);
+	arena = arena_init_locked(tsd, ind);
+	malloc_mutex_unlock(tsd, &arenas_lock);
 	return (arena);
 }
 
@@ -455,7 +455,7 @@ arena_bind(tsd_t *tsd, unsigned ind)
 {
 	arena_t *arena;
 
-	arena = arena_get(ind, false);
+	arena = arena_get(tsd, ind, false);
 	arena_nthreads_inc(arena);
 
 	if (tsd_nominal(tsd))
@@ -467,8 +467,8 @@ arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind)
 {
 	arena_t *oldarena, *newarena;
 
-	oldarena = arena_get(oldind, false);
-	newarena = arena_get(newind, false);
+	oldarena = arena_get(tsd, oldind, false);
+	newarena = arena_get(tsd, newind, false);
 	arena_nthreads_dec(oldarena);
 	arena_nthreads_inc(newarena);
 	tsd_arena_set(tsd, newarena);
@@ -479,7 +479,7 @@ arena_unbind(tsd_t *tsd, unsigned ind)
 {
 	arena_t *arena;
 
-	arena = arena_get(ind, false);
+	arena = arena_get(tsd, ind, false);
 	arena_nthreads_dec(arena);
 	tsd_arena_set(tsd, NULL);
 }
@@ -571,16 +571,16 @@ arena_choose_hard(tsd_t *tsd)
 
 		choose = 0;
 		first_null = narenas_auto;
-		malloc_mutex_lock(&arenas_lock);
-		assert(arena_get(0, false) != NULL);
+		malloc_mutex_lock(tsd, &arenas_lock);
+		assert(arena_get(tsd, 0, false) != NULL);
 		for (i = 1; i < narenas_auto; i++) {
-			if (arena_get(i, false) != NULL) {
+			if (arena_get(tsd, i, false) != NULL) {
 				/*
 				 * Choose the first arena that has the lowest
 				 * number of threads assigned to it.
 				 */
-				if (arena_nthreads_get(arena_get(i, false)) <
-				    arena_nthreads_get(arena_get(choose,
+				if (arena_nthreads_get(arena_get(tsd, i, false))
+				    < arena_nthreads_get(arena_get(tsd, choose,
 				    false)))
 					choose = i;
 			} else if (first_null == narenas_auto) {
@@ -597,26 +597,26 @@ arena_choose_hard(tsd_t *tsd)
 			}
 		}
 
-		if (arena_nthreads_get(arena_get(choose, false)) == 0
+		if (arena_nthreads_get(arena_get(tsd, choose, false)) == 0
 		    || first_null == narenas_auto) {
 			/*
 			 * Use an unloaded arena, or the least loaded arena if
 			 * all arenas are already initialized.
 			 */
-			ret = arena_get(choose, false);
+			ret = arena_get(tsd, choose, false);
 		} else {
 			/* Initialize a new arena. */
 			choose = first_null;
-			ret = arena_init_locked(choose);
+			ret = arena_init_locked(tsd, choose);
 			if (ret == NULL) {
-				malloc_mutex_unlock(&arenas_lock);
+				malloc_mutex_unlock(tsd, &arenas_lock);
 				return (NULL);
 			}
 		}
 		arena_bind(tsd, choose);
-		malloc_mutex_unlock(&arenas_lock);
+		malloc_mutex_unlock(tsd, &arenas_lock);
 	} else {
-		ret = arena_get(0, false);
+		ret = arena_get(tsd, 0, false);
 		arena_bind(tsd, 0);
 	}
 
@@ -681,8 +681,11 @@ stats_print_atexit(void)
 {
 
 	if (config_tcache && config_stats) {
+		tsd_t *tsd;
 		unsigned narenas, i;
 
+		tsd = tsd_fetch();
+
 		/*
 		 * Merge stats from extant threads.  This is racy, since
 		 * individual threads do not lock when recording tcache stats
@@ -691,7 +694,7 @@ stats_print_atexit(void)
 		 * continue to allocate.
 		 */
 		for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
-			arena_t *arena = arena_get(i, false);
+			arena_t *arena = arena_get(tsd, i, false);
 			if (arena != NULL) {
 				tcache_t *tcache;
 
@@ -701,11 +704,11 @@ stats_print_atexit(void)
 				 * and bin locks in the opposite order,
 				 * deadlocks may result.
 				 */
-				malloc_mutex_lock(&arena->lock);
+				malloc_mutex_lock(tsd, &arena->lock);
 				ql_foreach(tcache, &arena->tcache_ql, link) {
-					tcache_stats_merge(tcache, arena);
+					tcache_stats_merge(tsd, tcache, arena);
 				}
-				malloc_mutex_unlock(&arena->lock);
+				malloc_mutex_unlock(tsd, &arena->lock);
 			}
 		}
 	}
@@ -1056,7 +1059,8 @@ malloc_conf_init(void)
 				for (i = 0; i < dss_prec_limit; i++) {
 					if (strncmp(dss_prec_names[i], v, vlen)
 					    == 0) {
-						if (chunk_dss_prec_set(i)) {
+						if (chunk_dss_prec_set(NULL,
+						   i)) {
 							malloc_conf_error(
 							    "Error setting dss",
 							    k, klen, v, vlen);
@@ -1186,7 +1190,6 @@ malloc_conf_init(void)
 	}
 }
 
-/* init_lock must be held. */
 static bool
 malloc_init_hard_needed(void)
 {
@@ -1204,9 +1207,9 @@ malloc_init_hard_needed(void)
 	if (malloc_initializer != NO_INITIALIZER && !IS_INITIALIZER) {
 		/* Busy-wait until the initializing thread completes. */
 		do {
-			malloc_mutex_unlock(&init_lock);
+			malloc_mutex_unlock(NULL, &init_lock);
 			CPU_SPINWAIT;
-			malloc_mutex_lock(&init_lock);
+			malloc_mutex_lock(NULL, &init_lock);
 		} while (!malloc_initialized());
 		return (false);
 	}
@@ -1214,9 +1217,8 @@ malloc_init_hard_needed(void)
 	return (true);
 }
 
-/* init_lock must be held. */
 static bool
-malloc_init_hard_a0_locked(void)
+malloc_init_hard_a0_locked(tsd_t *tsd)
 {
 
 	malloc_initializer = INITIALIZER;
@@ -1242,9 +1244,9 @@ malloc_init_hard_a0_locked(void)
 		prof_boot1();
 	if (arena_boot())
 		return (true);
-	if (config_tcache && tcache_boot())
+	if (config_tcache && tcache_boot(tsd))
 		return (true);
-	if (malloc_mutex_init(&arenas_lock))
+	if (malloc_mutex_init(&arenas_lock, "arenas", WITNESS_RANK_ARENAS))
 		return (true);
 	/*
 	 * Create enough scaffolding to allow recursive allocation in
@@ -1258,38 +1260,35 @@ malloc_init_hard_a0_locked(void)
 	 * Initialize one arena here.  The rest are lazily created in
 	 * arena_choose_hard().
 	 */
-	if (arena_init(0) == NULL)
+	if (arena_init(tsd, 0) == NULL)
 		return (true);
 	malloc_init_state = malloc_init_a0_initialized;
 	return (false);
 }
 
 static bool
-malloc_init_hard_a0(void)
+malloc_init_hard_a0(tsd_t *tsd)
 {
 	bool ret;
 
-	malloc_mutex_lock(&init_lock);
-	ret = malloc_init_hard_a0_locked();
-	malloc_mutex_unlock(&init_lock);
+	malloc_mutex_lock(tsd, &init_lock);
+	ret = malloc_init_hard_a0_locked(tsd);
+	malloc_mutex_unlock(tsd, &init_lock);
 	return (ret);
 }
 
-/*
- * Initialize data structures which may trigger recursive allocation.
- *
- * init_lock must be held.
- */
+/* Initialize data structures which may trigger recursive allocation. */
 static bool
-malloc_init_hard_recursible(void)
+malloc_init_hard_recursible(tsd_t **tsd)
 {
-	bool ret = false;
+	bool ret;
 
 	malloc_init_state = malloc_init_recursible;
-	malloc_mutex_unlock(&init_lock);
+	malloc_mutex_unlock(*tsd, &init_lock);
 
 	/* LinuxThreads' pthread_setspecific() allocates. */
-	if (malloc_tsd_boot0()) {
+	*tsd = malloc_tsd_boot0();
+	if (*tsd == NULL) {
 		ret = true;
 		goto label_return;
 	}
@@ -1308,17 +1307,17 @@ malloc_init_hard_recursible(void)
 	}
 #endif
 
+	ret = false;
 label_return:
-	malloc_mutex_lock(&init_lock);
+	malloc_mutex_lock(*tsd, &init_lock);
 	return (ret);
 }
 
-/* init_lock must be held. */
 static bool
-malloc_init_hard_finish(void)
+malloc_init_hard_finish(tsd_t *tsd)
 {
 
-	if (mutex_boot())
+	if (malloc_mutex_boot())
 		return (true);
 
 	if (opt_narenas == 0) {
@@ -1343,7 +1342,7 @@ malloc_init_hard_finish(void)
 	narenas_total_set(narenas_auto);
 
 	/* Allocate and initialize arenas. */
-	arenas = (arena_t **)base_alloc(sizeof(arena_t *) *
+	arenas = (arena_t **)base_alloc(tsd, sizeof(arena_t *) *
 	    (MALLOCX_ARENA_MAX+1));
 	if (arenas == NULL)
 		return (true);
@@ -1359,38 +1358,39 @@ malloc_init_hard_finish(void)
 static bool
 malloc_init_hard(void)
 {
+	tsd_t *tsd = NULL;
 
 #if defined(_WIN32) && _WIN32_WINNT < 0x0600
 	_init_init_lock();
 #endif
-	malloc_mutex_lock(&init_lock);
+	malloc_mutex_lock(tsd, &init_lock);
 	if (!malloc_init_hard_needed()) {
-		malloc_mutex_unlock(&init_lock);
+		malloc_mutex_unlock(tsd, &init_lock);
 		return (false);
 	}
 
 	if (malloc_init_state != malloc_init_a0_initialized &&
-	    malloc_init_hard_a0_locked()) {
-		malloc_mutex_unlock(&init_lock);
+	    malloc_init_hard_a0_locked(tsd)) {
+		malloc_mutex_unlock(tsd, &init_lock);
 		return (true);
 	}
 
-	if (malloc_init_hard_recursible()) {
-		malloc_mutex_unlock(&init_lock);
+	if (malloc_init_hard_recursible(&tsd)) {
+		malloc_mutex_unlock(tsd, &init_lock);
 		return (true);
 	}
 
-	if (config_prof && prof_boot2()) {
-		malloc_mutex_unlock(&init_lock);
+	if (config_prof && prof_boot2(tsd)) {
+		malloc_mutex_unlock(tsd, &init_lock);
 		return (true);
 	}
 
-	if (malloc_init_hard_finish()) {
-		malloc_mutex_unlock(&init_lock);
+	if (malloc_init_hard_finish(tsd)) {
+		malloc_mutex_unlock(tsd, &init_lock);
 		return (true);
 	}
 
-	malloc_mutex_unlock(&init_lock);
+	malloc_mutex_unlock(tsd, &init_lock);
 	malloc_tsd_boot1();
 	return (false);
 }
@@ -1416,7 +1416,7 @@ imalloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind,
 		p = imalloc(tsd, LARGE_MINCLASS, ind_large, slow_path);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(p, usize);
+		arena_prof_promoted(tsd, p, usize);
 	} else
 		p = imalloc(tsd, usize, ind, slow_path);
 
@@ -1438,7 +1438,7 @@ imalloc_prof(tsd_t *tsd, size_t usize, szind_t ind, bool slow_path)
 		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
 	}
-	prof_malloc(p, usize, tctx);
+	prof_malloc(tsd, p, usize, tctx);
 
 	return (p);
 }
@@ -1450,7 +1450,11 @@ imalloc_body(size_t size, tsd_t **tsd, size_t *usize, bool slow_path)
 
 	if (slow_path && unlikely(malloc_init()))
 		return (NULL);
+
 	*tsd = tsd_fetch();
+
+	witness_assert_lockless(*tsd);
+
 	ind = size2index(size);
 	if (unlikely(ind >= NSIZES))
 		return (NULL);
@@ -1479,7 +1483,7 @@ imalloc_post_check(void *ret, tsd_t *tsd, size_t usize, bool slow_path)
 		set_errno(ENOMEM);
 	}
 	if (config_stats && likely(ret != NULL)) {
-		assert(usize == isalloc(ret, config_prof));
+		assert(usize == isalloc(tsd, ret, config_prof));
 		*tsd_thread_allocatedp_get(tsd) += usize;
 	}
 }
@@ -1507,9 +1511,10 @@ je_malloc(size_t size)
 		ret = imalloc_body(size, &tsd, &usize, true);
 		imalloc_post_check(ret, tsd, usize, true);
 		UTRACE(0, size, ret);
-		JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, false);
+		JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsd, ret, usize, false);
 	}
 
+	witness_assert_lockless(tsd);
 	return (ret);
 }
 
@@ -1526,7 +1531,7 @@ imemalign_prof_sample(tsd_t *tsd, size_t alignment, size_t usize,
 		p = ipalloc(tsd, LARGE_MINCLASS, alignment, false);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(p, usize);
+		arena_prof_promoted(tsd, p, usize);
 	} else
 		p = ipalloc(tsd, usize, alignment, false);
 
@@ -1548,7 +1553,7 @@ imemalign_prof(tsd_t *tsd, size_t alignment, size_t usize)
 		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
 	}
-	prof_malloc(p, usize, tctx);
+	prof_malloc(tsd, p, usize, tctx);
 
 	return (p);
 }
@@ -1565,10 +1570,12 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment)
 	assert(min_alignment != 0);
 
 	if (unlikely(malloc_init())) {
+		tsd = NULL;
 		result = NULL;
 		goto label_oom;
 	}
 	tsd = tsd_fetch();
+	witness_assert_lockless(tsd);
 	if (size == 0)
 		size = 1;
 
@@ -1603,10 +1610,12 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment)
 	ret = 0;
 label_return:
 	if (config_stats && likely(result != NULL)) {
-		assert(usize == isalloc(result, config_prof));
+		assert(usize == isalloc(tsd, result, config_prof));
 		*tsd_thread_allocatedp_get(tsd) += usize;
 	}
 	UTRACE(0, size, result);
+	JEMALLOC_VALGRIND_MALLOC(result != NULL, tsd, result, usize, false);
+	witness_assert_lockless(tsd);
 	return (ret);
 label_oom:
 	assert(result == NULL);
@@ -1616,6 +1625,7 @@ label_oom:
 		abort();
 	}
 	ret = ENOMEM;
+	witness_assert_lockless(tsd);
 	goto label_return;
 }
 
@@ -1623,9 +1633,10 @@ JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 JEMALLOC_ATTR(nonnull(1))
 je_posix_memalign(void **memptr, size_t alignment, size_t size)
 {
-	int ret = imemalign(memptr, alignment, size, sizeof(void *));
-	JEMALLOC_VALGRIND_MALLOC(ret == 0, *memptr, isalloc(*memptr,
-	    config_prof), false);
+	int ret;
+
+	ret = imemalign(memptr, alignment, size, sizeof(void *));
+
 	return (ret);
 }
 
@@ -1641,8 +1652,7 @@ je_aligned_alloc(size_t alignment, size_t size)
 		ret = NULL;
 		set_errno(err);
 	}
-	JEMALLOC_VALGRIND_MALLOC(err == 0, ret, isalloc(ret, config_prof),
-	    false);
+
 	return (ret);
 }
 
@@ -1658,7 +1668,7 @@ icalloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind, prof_tctx_t *tctx)
 		p = icalloc(tsd, LARGE_MINCLASS, ind_large);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(p, usize);
+		arena_prof_promoted(tsd, p, usize);
 	} else
 		p = icalloc(tsd, usize, ind);
 
@@ -1680,7 +1690,7 @@ icalloc_prof(tsd_t *tsd, size_t usize, szind_t ind)
 		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
 	}
-	prof_malloc(p, usize, tctx);
+	prof_malloc(tsd, p, usize, tctx);
 
 	return (p);
 }
@@ -1697,11 +1707,13 @@ je_calloc(size_t num, size_t size)
 	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 
 	if (unlikely(malloc_init())) {
+		tsd = NULL;
 		num_size = 0;
 		ret = NULL;
 		goto label_return;
 	}
 	tsd = tsd_fetch();
+	witness_assert_lockless(tsd);
 
 	num_size = num * size;
 	if (unlikely(num_size == 0)) {
@@ -1747,11 +1759,12 @@ label_return:
 		set_errno(ENOMEM);
 	}
 	if (config_stats && likely(ret != NULL)) {
-		assert(usize == isalloc(ret, config_prof));
+		assert(usize == isalloc(tsd, ret, config_prof));
 		*tsd_thread_allocatedp_get(tsd) += usize;
 	}
 	UTRACE(0, num_size, ret);
-	JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, usize, true);
+	JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsd, ret, usize, true);
+	witness_assert_lockless(tsd);
 	return (ret);
 }
 
@@ -1767,7 +1780,7 @@ irealloc_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize,
 		p = iralloc(tsd, old_ptr, old_usize, LARGE_MINCLASS, 0, false);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(p, usize);
+		arena_prof_promoted(tsd, p, usize);
 	} else
 		p = iralloc(tsd, old_ptr, old_usize, usize, 0, false);
 
@@ -1782,7 +1795,7 @@ irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize)
 	prof_tctx_t *old_tctx, *tctx;
 
 	prof_active = prof_active_get_unlocked();
-	old_tctx = prof_tctx_get(old_ptr);
+	old_tctx = prof_tctx_get(tsd, old_ptr);
 	tctx = prof_alloc_prep(tsd, usize, prof_active, true);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U))
 		p = irealloc_prof_sample(tsd, old_ptr, old_usize, usize, tctx);
@@ -1804,14 +1817,16 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 	size_t usize;
 	UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);
 
+	witness_assert_lockless(tsd);
+
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	if (config_prof && opt_prof) {
-		usize = isalloc(ptr, config_prof);
+		usize = isalloc(tsd, ptr, config_prof);
 		prof_free(tsd, ptr, usize);
 	} else if (config_stats || config_valgrind)
-		usize = isalloc(ptr, config_prof);
+		usize = isalloc(tsd, ptr, config_prof);
 	if (config_stats)
 		*tsd_thread_deallocatedp_get(tsd) += usize;
 
@@ -1819,7 +1834,7 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 		iqalloc(tsd, ptr, tcache, false);
 	else {
 		if (config_valgrind && unlikely(in_valgrind))
-			rzsize = p2rz(ptr);
+			rzsize = p2rz(tsd, ptr);
 		iqalloc(tsd, ptr, tcache, true);
 		JEMALLOC_VALGRIND_FREE(ptr, rzsize);
 	}
@@ -1830,6 +1845,8 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache)
 {
 	UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);
 
+	witness_assert_lockless(tsd);
+
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
@@ -1838,7 +1855,7 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache)
 	if (config_stats)
 		*tsd_thread_deallocatedp_get(tsd) += usize;
 	if (config_valgrind && unlikely(in_valgrind))
-		rzsize = p2rz(ptr);
+		rzsize = p2rz(tsd, ptr);
 	isqalloc(tsd, ptr, usize, tcache);
 	JEMALLOC_VALGRIND_FREE(ptr, rzsize);
 }
@@ -1869,10 +1886,13 @@ je_realloc(void *ptr, size_t size)
 		assert(malloc_initialized() || IS_INITIALIZER);
 		malloc_thread_init();
 		tsd = tsd_fetch();
+		witness_assert_lockless(tsd);
 
-		old_usize = isalloc(ptr, config_prof);
-		if (config_valgrind && unlikely(in_valgrind))
-			old_rzsize = config_prof ? p2rz(ptr) : u2rz(old_usize);
+		old_usize = isalloc(tsd, ptr, config_prof);
+		if (config_valgrind && unlikely(in_valgrind)) {
+			old_rzsize = config_prof ? p2rz(tsd, ptr) :
+			    u2rz(old_usize);
+		}
 
 		if (config_prof && opt_prof) {
 			usize = s2u(size);
@@ -1901,13 +1921,14 @@ je_realloc(void *ptr, size_t size)
 		set_errno(ENOMEM);
 	}
 	if (config_stats && likely(ret != NULL)) {
-		assert(usize == isalloc(ret, config_prof));
+		assert(usize == isalloc(tsd, ret, config_prof));
 		*tsd_thread_allocatedp_get(tsd) += usize;
 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
 	}
 	UTRACE(ptr, size, ret);
-	JEMALLOC_VALGRIND_REALLOC(true, ret, usize, true, ptr, old_usize,
+	JEMALLOC_VALGRIND_REALLOC(true, tsd, ret, usize, true, ptr, old_usize,
 	    old_rzsize, true, false);
+	witness_assert_lockless(tsd);
 	return (ret);
 }
 
@@ -1922,6 +1943,7 @@ je_free(void *ptr)
 			ifree(tsd, ptr, tcache_get(tsd, false), false);
 		else
 			ifree(tsd, ptr, tcache_get(tsd, false), true);
+		witness_assert_lockless(tsd);
 	}
 }
 
@@ -1942,7 +1964,6 @@ je_memalign(size_t alignment, size_t size)
 	void *ret JEMALLOC_CC_SILENCE_INIT(NULL);
 	if (unlikely(imemalign(&ret, alignment, size, 1) != 0))
 		ret = NULL;
-	JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, size, false);
 	return (ret);
 }
 #endif
@@ -1956,7 +1977,6 @@ je_valloc(size_t size)
 	void *ret JEMALLOC_CC_SILENCE_INIT(NULL);
 	if (unlikely(imemalign(&ret, PAGE, size, 1) != 0))
 		ret = NULL;
-	JEMALLOC_VALGRIND_MALLOC(ret != NULL, ret, size, false);
 	return (ret);
 }
 #endif
@@ -2020,7 +2040,7 @@ imallocx_flags_decode_hard(tsd_t *tsd, size_t size, int flags, size_t *usize,
 		*tcache = tcache_get(tsd, true);
 	if ((flags & MALLOCX_ARENA_MASK) != 0) {
 		unsigned arena_ind = MALLOCX_ARENA_GET(flags);
-		*arena = arena_get(arena_ind, true);
+		*arena = arena_get(tsd, arena_ind, true);
 		if (unlikely(*arena == NULL))
 			return (true);
 	} else
@@ -2076,7 +2096,7 @@ imallocx_prof_sample(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
 		    arena);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(p, usize);
+		arena_prof_promoted(tsd, p, usize);
 	} else
 		p = imallocx_flags(tsd, usize, alignment, zero, tcache, arena);
 
@@ -2108,7 +2128,7 @@ imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize)
 		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
 	}
-	prof_malloc(p, *usize, tctx);
+	prof_malloc(tsd, p, *usize, tctx);
 
 	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
 	return (p);
@@ -2154,9 +2174,12 @@ je_mallocx(size_t size, int flags)
 
 	assert(size != 0);
 
-	if (unlikely(malloc_init()))
+	if (unlikely(malloc_init())) {
+		tsd = NULL;
 		goto label_oom;
+	}
 	tsd = tsd_fetch();
+	witness_assert_lockless(tsd);
 
 	if (config_prof && opt_prof)
 		p = imallocx_prof(tsd, size, flags, &usize);
@@ -2166,11 +2189,12 @@ je_mallocx(size_t size, int flags)
 		goto label_oom;
 
 	if (config_stats) {
-		assert(usize == isalloc(p, config_prof));
+		assert(usize == isalloc(tsd, p, config_prof));
 		*tsd_thread_allocatedp_get(tsd) += usize;
 	}
 	UTRACE(0, size, p);
-	JEMALLOC_VALGRIND_MALLOC(true, p, usize, MALLOCX_ZERO_GET(flags));
+	JEMALLOC_VALGRIND_MALLOC(true, tsd, p, usize, MALLOCX_ZERO_GET(flags));
+	witness_assert_lockless(tsd);
 	return (p);
 label_oom:
 	if (config_xmalloc && unlikely(opt_xmalloc)) {
@@ -2178,6 +2202,7 @@ label_oom:
 		abort();
 	}
 	UTRACE(0, size, 0);
+	witness_assert_lockless(tsd);
 	return (NULL);
 }
 
@@ -2195,7 +2220,7 @@ irallocx_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize,
 		    zero, tcache, arena);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(p, usize);
+		arena_prof_promoted(tsd, p, usize);
 	} else {
 		p = iralloct(tsd, old_ptr, old_usize, usize, alignment, zero,
 		    tcache, arena);
@@ -2214,7 +2239,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 	prof_tctx_t *old_tctx, *tctx;
 
 	prof_active = prof_active_get_unlocked();
-	old_tctx = prof_tctx_get(old_ptr);
+	old_tctx = prof_tctx_get(tsd, old_ptr);
 	tctx = prof_alloc_prep(tsd, *usize, prof_active, true);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
 		p = irallocx_prof_sample(tsd, old_ptr, old_usize, *usize,
@@ -2237,7 +2262,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 		 * be the same as the current usize because of in-place large
 		 * reallocation.  Therefore, query the actual value of usize.
 		 */
-		*usize = isalloc(p, config_prof);
+		*usize = isalloc(tsd, p, config_prof);
 	}
 	prof_realloc(tsd, p, *usize, tctx, prof_active, true, old_ptr,
 	    old_usize, old_tctx);
@@ -2265,10 +2290,11 @@ je_rallocx(void *ptr, size_t size, int flags)
 	assert(malloc_initialized() || IS_INITIALIZER);
 	malloc_thread_init();
 	tsd = tsd_fetch();
+	witness_assert_lockless(tsd);
 
 	if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) {
 		unsigned arena_ind = MALLOCX_ARENA_GET(flags);
-		arena = arena_get(arena_ind, true);
+		arena = arena_get(tsd, arena_ind, true);
 		if (unlikely(arena == NULL))
 			goto label_oom;
 	} else
@@ -2282,7 +2308,7 @@ je_rallocx(void *ptr, size_t size, int flags)
 	} else
 		tcache = tcache_get(tsd, true);
 
-	old_usize = isalloc(ptr, config_prof);
+	old_usize = isalloc(tsd, ptr, config_prof);
 	if (config_valgrind && unlikely(in_valgrind))
 		old_rzsize = u2rz(old_usize);
 
@@ -2300,7 +2326,7 @@ je_rallocx(void *ptr, size_t size, int flags)
 		if (unlikely(p == NULL))
 			goto label_oom;
 		if (config_stats || (config_valgrind && unlikely(in_valgrind)))
-			usize = isalloc(p, config_prof);
+			usize = isalloc(tsd, p, config_prof);
 	}
 	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
 
@@ -2309,8 +2335,9 @@ je_rallocx(void *ptr, size_t size, int flags)
 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
 	}
 	UTRACE(ptr, size, p);
-	JEMALLOC_VALGRIND_REALLOC(true, p, usize, false, ptr, old_usize,
+	JEMALLOC_VALGRIND_REALLOC(true, tsd, p, usize, false, ptr, old_usize,
 	    old_rzsize, false, zero);
+	witness_assert_lockless(tsd);
 	return (p);
 label_oom:
 	if (config_xmalloc && unlikely(opt_xmalloc)) {
@@ -2318,6 +2345,7 @@ label_oom:
 		abort();
 	}
 	UTRACE(ptr, size, 0);
+	witness_assert_lockless(tsd);
 	return (NULL);
 }
 
@@ -2329,7 +2357,7 @@ ixallocx_helper(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 
 	if (ixalloc(tsd, ptr, old_usize, size, extra, alignment, zero))
 		return (old_usize);
-	usize = isalloc(ptr, config_prof);
+	usize = isalloc(tsd, ptr, config_prof);
 
 	return (usize);
 }
@@ -2357,7 +2385,7 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 	prof_tctx_t *old_tctx, *tctx;
 
 	prof_active = prof_active_get_unlocked();
-	old_tctx = prof_tctx_get(ptr);
+	old_tctx = prof_tctx_get(tsd, ptr);
 	/*
 	 * usize isn't knowable before ixalloc() returns when extra is non-zero.
 	 * Therefore, compute its maximum possible value and use that in
@@ -2413,8 +2441,9 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
 	assert(malloc_initialized() || IS_INITIALIZER);
 	malloc_thread_init();
 	tsd = tsd_fetch();
+	witness_assert_lockless(tsd);
 
-	old_usize = isalloc(ptr, config_prof);
+	old_usize = isalloc(tsd, ptr, config_prof);
 
 	/*
 	 * The API explicitly absolves itself of protecting against (size +
@@ -2449,10 +2478,11 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
 		*tsd_thread_allocatedp_get(tsd) += usize;
 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
 	}
-	JEMALLOC_VALGRIND_REALLOC(false, ptr, usize, false, ptr, old_usize,
+	JEMALLOC_VALGRIND_REALLOC(false, tsd, ptr, usize, false, ptr, old_usize,
 	    old_rzsize, false, zero);
 label_not_resized:
 	UTRACE(ptr, size, ptr);
+	witness_assert_lockless(tsd);
 	return (usize);
 }
 
@@ -2461,15 +2491,20 @@ JEMALLOC_ATTR(pure)
 je_sallocx(const void *ptr, int flags)
 {
 	size_t usize;
+	tsd_t *tsd;
 
 	assert(malloc_initialized() || IS_INITIALIZER);
 	malloc_thread_init();
 
-	if (config_ivsalloc)
-		usize = ivsalloc(ptr, config_prof);
-	else
-		usize = isalloc(ptr, config_prof);
+	tsd = tsd_fetch();
+	witness_assert_lockless(tsd);
 
+	if (config_ivsalloc)
+		usize = ivsalloc(tsd, ptr, config_prof);
+	else
+		usize = isalloc(tsd, ptr, config_prof);
+
+	witness_assert_lockless(tsd);
 	return (usize);
 }
 
@@ -2483,6 +2518,7 @@ je_dallocx(void *ptr, int flags)
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	tsd = tsd_fetch();
+	witness_assert_lockless(tsd);
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
 		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE)
 			tcache = NULL;
@@ -2493,17 +2529,21 @@ je_dallocx(void *ptr, int flags)
 
 	UTRACE(ptr, 0, 0);
 	ifree(tsd_fetch(), ptr, tcache, true);
+	witness_assert_lockless(tsd);
 }
 
 JEMALLOC_ALWAYS_INLINE_C size_t
-inallocx(size_t size, int flags)
+inallocx(tsd_t *tsd, size_t size, int flags)
 {
 	size_t usize;
 
+	witness_assert_lockless(tsd);
+
 	if (likely((flags & MALLOCX_LG_ALIGN_MASK) == 0))
 		usize = s2u(size);
 	else
 		usize = sa2u(size, MALLOCX_ALIGN_GET_SPECIFIED(flags));
+	witness_assert_lockless(tsd);
 	return (usize);
 }
 
@@ -2516,10 +2556,11 @@ je_sdallocx(void *ptr, size_t size, int flags)
 
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
-	usize = inallocx(size, flags);
-	assert(usize == isalloc(ptr, config_prof));
-
 	tsd = tsd_fetch();
+	usize = inallocx(tsd, size, flags);
+	assert(usize == isalloc(tsd, ptr, config_prof));
+
+	witness_assert_lockless(tsd);
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
 		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE)
 			tcache = NULL;
@@ -2530,6 +2571,7 @@ je_sdallocx(void *ptr, size_t size, int flags)
 
 	UTRACE(ptr, 0, 0);
 	isfree(tsd, ptr, usize, tcache);
+	witness_assert_lockless(tsd);
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
@@ -2537,16 +2579,21 @@ JEMALLOC_ATTR(pure)
 je_nallocx(size_t size, int flags)
 {
 	size_t usize;
+	tsd_t *tsd;
 
 	assert(size != 0);
 
 	if (unlikely(malloc_init()))
 		return (0);
 
-	usize = inallocx(size, flags);
+	tsd = tsd_fetch();
+	witness_assert_lockless(tsd);
+
+	usize = inallocx(tsd, size, flags);
 	if (unlikely(usize > HUGE_MAXCLASS))
 		return (0);
 
+	witness_assert_lockless(tsd);
 	return (usize);
 }
 
@@ -2554,55 +2601,82 @@ JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
     size_t newlen)
 {
+	int ret;
+	tsd_t *tsd;
 
 	if (unlikely(malloc_init()))
 		return (EAGAIN);
 
-	return (ctl_byname(name, oldp, oldlenp, newp, newlen));
+	tsd = tsd_fetch();
+	witness_assert_lockless(tsd);
+	ret = ctl_byname(tsd, name, oldp, oldlenp, newp, newlen);
+	witness_assert_lockless(tsd);
+	return (ret);
 }
 
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp)
 {
+	int ret;
+	tsd_t *tsd;
 
 	if (unlikely(malloc_init()))
 		return (EAGAIN);
 
-	return (ctl_nametomib(name, mibp, miblenp));
+	tsd = tsd_fetch();
+	witness_assert_lockless(tsd);
+	ret = ctl_nametomib(tsd, name, mibp, miblenp);
+	witness_assert_lockless(tsd);
+	return (ret);
 }
 
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
   void *newp, size_t newlen)
 {
+	int ret;
+	tsd_t *tsd;
 
 	if (unlikely(malloc_init()))
 		return (EAGAIN);
 
-	return (ctl_bymib(mib, miblen, oldp, oldlenp, newp, newlen));
+	tsd = tsd_fetch();
+	witness_assert_lockless(tsd);
+	ret = ctl_bymib(tsd, mib, miblen, oldp, oldlenp, newp, newlen);
+	witness_assert_lockless(tsd);
+	return (ret);
 }
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
     const char *opts)
 {
+	tsd_t *tsd;
 
+	tsd = tsd_fetch();
+	witness_assert_lockless(tsd);
 	stats_print(write_cb, cbopaque, opts);
+	witness_assert_lockless(tsd);
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
 je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr)
 {
 	size_t ret;
+	tsd_t *tsd;
 
 	assert(malloc_initialized() || IS_INITIALIZER);
 	malloc_thread_init();
 
-	if (config_ivsalloc)
-		ret = ivsalloc(ptr, config_prof);
-	else
-		ret = (ptr == NULL) ? 0 : isalloc(ptr, config_prof);
+	tsd = tsd_fetch();
+	witness_assert_lockless(tsd);
 
+	if (config_ivsalloc)
+		ret = ivsalloc(tsd, ptr, config_prof);
+	else
+		ret = (ptr == NULL) ? 0 : isalloc(tsd, ptr, config_prof);
+
+	witness_assert_lockless(tsd);
 	return (ret);
 }
 
@@ -2644,6 +2718,7 @@ JEMALLOC_EXPORT void
 _malloc_prefork(void)
 #endif
 {
+	tsd_t *tsd;
 	unsigned i, narenas;
 
 #ifdef JEMALLOC_MUTEX_INIT_CB
@@ -2652,18 +2727,20 @@ _malloc_prefork(void)
 #endif
 	assert(malloc_initialized());
 
+	tsd = tsd_fetch();
+
 	/* Acquire all mutexes in a safe order. */
-	ctl_prefork();
-	prof_prefork();
-	malloc_mutex_prefork(&arenas_lock);
+	ctl_prefork(tsd);
+	prof_prefork(tsd);
+	malloc_mutex_prefork(tsd, &arenas_lock);
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
 
-		if ((arena = arena_get(i, false)) != NULL)
-			arena_prefork(arena);
+		if ((arena = arena_get(tsd, i, false)) != NULL)
+			arena_prefork(tsd, arena);
 	}
-	chunk_prefork();
-	base_prefork();
+	chunk_prefork(tsd);
+	base_prefork(tsd);
 }
 
 #ifndef JEMALLOC_MUTEX_INIT_CB
@@ -2674,6 +2751,7 @@ JEMALLOC_EXPORT void
 _malloc_postfork(void)
 #endif
 {
+	tsd_t *tsd;
 	unsigned i, narenas;
 
 #ifdef JEMALLOC_MUTEX_INIT_CB
@@ -2682,39 +2760,44 @@ _malloc_postfork(void)
 #endif
 	assert(malloc_initialized());
 
+	tsd = tsd_fetch();
+
 	/* Release all mutexes, now that fork() has completed. */
-	base_postfork_parent();
-	chunk_postfork_parent();
+	base_postfork_parent(tsd);
+	chunk_postfork_parent(tsd);
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
 
-		if ((arena = arena_get(i, false)) != NULL)
-			arena_postfork_parent(arena);
+		if ((arena = arena_get(tsd, i, false)) != NULL)
+			arena_postfork_parent(tsd, arena);
 	}
-	malloc_mutex_postfork_parent(&arenas_lock);
-	prof_postfork_parent();
-	ctl_postfork_parent();
+	malloc_mutex_postfork_parent(tsd, &arenas_lock);
+	prof_postfork_parent(tsd);
+	ctl_postfork_parent(tsd);
 }
 
 void
 jemalloc_postfork_child(void)
 {
+	tsd_t *tsd;
 	unsigned i, narenas;
 
 	assert(malloc_initialized());
 
+	tsd = tsd_fetch();
+
 	/* Release all mutexes, now that fork() has completed. */
-	base_postfork_child();
-	chunk_postfork_child();
+	base_postfork_child(tsd);
+	chunk_postfork_child(tsd);
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
 
-		if ((arena = arena_get(i, false)) != NULL)
-			arena_postfork_child(arena);
+		if ((arena = arena_get(tsd, i, false)) != NULL)
+			arena_postfork_child(tsd, arena);
 	}
-	malloc_mutex_postfork_child(&arenas_lock);
-	prof_postfork_child();
-	ctl_postfork_child();
+	malloc_mutex_postfork_child(tsd, &arenas_lock);
+	prof_postfork_child(tsd);
+	ctl_postfork_child(tsd);
 }
 
 /******************************************************************************/
diff --git a/src/mutex.c b/src/mutex.c
index 2d47af97..4174f42e 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -69,7 +69,7 @@ JEMALLOC_EXPORT int	_pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
 #endif
 
 bool
-malloc_mutex_init(malloc_mutex_t *mutex)
+malloc_mutex_init(malloc_mutex_t *mutex, const char *name, witness_rank_t rank)
 {
 
 #ifdef _WIN32
@@ -103,31 +103,34 @@ malloc_mutex_init(malloc_mutex_t *mutex)
 	}
 	pthread_mutexattr_destroy(&attr);
 #endif
+	if (config_debug)
+		witness_init(&mutex->witness, name, rank, NULL);
 	return (false);
 }
 
 void
-malloc_mutex_prefork(malloc_mutex_t *mutex)
+malloc_mutex_prefork(tsd_t *tsd, malloc_mutex_t *mutex)
 {
 
-	malloc_mutex_lock(mutex);
+	malloc_mutex_lock(tsd, mutex);
 }
 
 void
-malloc_mutex_postfork_parent(malloc_mutex_t *mutex)
+malloc_mutex_postfork_parent(tsd_t *tsd, malloc_mutex_t *mutex)
 {
 
-	malloc_mutex_unlock(mutex);
+	malloc_mutex_unlock(tsd, mutex);
 }
 
 void
-malloc_mutex_postfork_child(malloc_mutex_t *mutex)
+malloc_mutex_postfork_child(tsd_t *tsd, malloc_mutex_t *mutex)
 {
 
 #ifdef JEMALLOC_MUTEX_INIT_CB
-	malloc_mutex_unlock(mutex);
+	malloc_mutex_unlock(tsd, mutex);
 #else
-	if (malloc_mutex_init(mutex)) {
+	if (malloc_mutex_init(mutex, mutex->witness.name,
+	    mutex->witness.rank)) {
 		malloc_printf("<jemalloc>: Error re-initializing mutex in "
 		    "child\n");
 		if (opt_abort)
@@ -137,7 +140,7 @@ malloc_mutex_postfork_child(malloc_mutex_t *mutex)
 }
 
 bool
-mutex_boot(void)
+malloc_mutex_boot(void)
 {
 
 #ifdef JEMALLOC_MUTEX_INIT_CB
diff --git a/src/prof.c b/src/prof.c
index b3872277..520bf90a 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -121,9 +121,9 @@ static bool		prof_booted = false;
  * definition.
  */
 
-static bool	prof_tctx_should_destroy(prof_tctx_t *tctx);
+static bool	prof_tctx_should_destroy(tsd_t *tsd, prof_tctx_t *tctx);
 static void	prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx);
-static bool	prof_tdata_should_destroy(prof_tdata_t *tdata,
+static bool	prof_tdata_should_destroy(tsd_t *tsd, prof_tdata_t *tdata,
     bool even_if_attached);
 static void	prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata,
     bool even_if_attached);
@@ -213,22 +213,23 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated)
 	}
 
 	if ((uintptr_t)tctx > (uintptr_t)1U) {
-		malloc_mutex_lock(tctx->tdata->lock);
+		malloc_mutex_lock(tsd, tctx->tdata->lock);
 		tctx->prepared = false;
-		if (prof_tctx_should_destroy(tctx))
+		if (prof_tctx_should_destroy(tsd, tctx))
 			prof_tctx_destroy(tsd, tctx);
 		else
-			malloc_mutex_unlock(tctx->tdata->lock);
+			malloc_mutex_unlock(tsd, tctx->tdata->lock);
 	}
 }
 
 void
-prof_malloc_sample_object(const void *ptr, size_t usize, prof_tctx_t *tctx)
+prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
+    prof_tctx_t *tctx)
 {
 
-	prof_tctx_set(ptr, usize, tctx);
+	prof_tctx_set(tsd, ptr, usize, tctx);
 
-	malloc_mutex_lock(tctx->tdata->lock);
+	malloc_mutex_lock(tsd, tctx->tdata->lock);
 	tctx->cnts.curobjs++;
 	tctx->cnts.curbytes += usize;
 	if (opt_prof_accum) {
@@ -236,23 +237,23 @@ prof_malloc_sample_object(const void *ptr, size_t usize, prof_tctx_t *tctx)
 		tctx->cnts.accumbytes += usize;
 	}
 	tctx->prepared = false;
-	malloc_mutex_unlock(tctx->tdata->lock);
+	malloc_mutex_unlock(tsd, tctx->tdata->lock);
 }
 
 void
 prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx)
 {
 
-	malloc_mutex_lock(tctx->tdata->lock);
+	malloc_mutex_lock(tsd, tctx->tdata->lock);
 	assert(tctx->cnts.curobjs > 0);
 	assert(tctx->cnts.curbytes >= usize);
 	tctx->cnts.curobjs--;
 	tctx->cnts.curbytes -= usize;
 
-	if (prof_tctx_should_destroy(tctx))
+	if (prof_tctx_should_destroy(tsd, tctx))
 		prof_tctx_destroy(tsd, tctx);
 	else
-		malloc_mutex_unlock(tctx->tdata->lock);
+		malloc_mutex_unlock(tsd, tctx->tdata->lock);
 }
 
 void
@@ -277,7 +278,7 @@ prof_enter(tsd_t *tsd, prof_tdata_t *tdata)
 		tdata->enq = true;
 	}
 
-	malloc_mutex_lock(&bt2gctx_mtx);
+	malloc_mutex_lock(tsd, &bt2gctx_mtx);
 }
 
 JEMALLOC_INLINE_C void
@@ -287,7 +288,7 @@ prof_leave(tsd_t *tsd, prof_tdata_t *tdata)
 	cassert(config_prof);
 	assert(tdata == prof_tdata_get(tsd, false));
 
-	malloc_mutex_unlock(&bt2gctx_mtx);
+	malloc_mutex_unlock(tsd, &bt2gctx_mtx);
 
 	if (tdata != NULL) {
 		bool idump, gdump;
@@ -300,9 +301,9 @@ prof_leave(tsd_t *tsd, prof_tdata_t *tdata)
 		tdata->enq_gdump = false;
 
 		if (idump)
-			prof_idump();
+			prof_idump(tsd);
 		if (gdump)
-			prof_gdump();
+			prof_gdump(tsd);
 	}
 }
 
@@ -585,7 +586,7 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
 	 * into this function.
 	 */
 	prof_enter(tsd, tdata_self);
-	malloc_mutex_lock(gctx->lock);
+	malloc_mutex_lock(tsd, gctx->lock);
 	assert(gctx->nlimbo != 0);
 	if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) {
 		/* Remove gctx from bt2gctx. */
@@ -593,7 +594,7 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
 			not_reached();
 		prof_leave(tsd, tdata_self);
 		/* Destroy gctx. */
-		malloc_mutex_unlock(gctx->lock);
+		malloc_mutex_unlock(tsd, gctx->lock);
 		idalloctm(tsd, gctx, tcache_get(tsd, false), true, true);
 	} else {
 		/*
@@ -601,16 +602,17 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
 		 * prof_lookup().
 		 */
 		gctx->nlimbo--;
-		malloc_mutex_unlock(gctx->lock);
+		malloc_mutex_unlock(tsd, gctx->lock);
 		prof_leave(tsd, tdata_self);
 	}
 }
 
-/* tctx->tdata->lock must be held. */
 static bool
-prof_tctx_should_destroy(prof_tctx_t *tctx)
+prof_tctx_should_destroy(tsd_t *tsd, prof_tctx_t *tctx)
 {
 
+	malloc_mutex_assert_owner(tsd, tctx->tdata->lock);
+
 	if (opt_prof_accum)
 		return (false);
 	if (tctx->cnts.curobjs != 0)
@@ -633,7 +635,6 @@ prof_gctx_should_destroy(prof_gctx_t *gctx)
 	return (true);
 }
 
-/* tctx->tdata->lock is held upon entry, and released before return. */
 static void
 prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx)
 {
@@ -641,6 +642,8 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx)
 	prof_gctx_t *gctx = tctx->gctx;
 	bool destroy_tdata, destroy_tctx, destroy_gctx;
 
+	malloc_mutex_assert_owner(tsd, tctx->tdata->lock);
+
 	assert(tctx->cnts.curobjs == 0);
 	assert(tctx->cnts.curbytes == 0);
 	assert(!opt_prof_accum);
@@ -648,10 +651,10 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx)
 	assert(tctx->cnts.accumbytes == 0);
 
 	ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL);
-	destroy_tdata = prof_tdata_should_destroy(tdata, false);
-	malloc_mutex_unlock(tdata->lock);
+	destroy_tdata = prof_tdata_should_destroy(tsd, tdata, false);
+	malloc_mutex_unlock(tsd, tdata->lock);
 
-	malloc_mutex_lock(gctx->lock);
+	malloc_mutex_lock(tsd, gctx->lock);
 	switch (tctx->state) {
 	case prof_tctx_state_nominal:
 		tctx_tree_remove(&gctx->tctxs, tctx);
@@ -691,12 +694,14 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx)
 		destroy_tctx = false;
 		destroy_gctx = false;
 	}
-	malloc_mutex_unlock(gctx->lock);
+	malloc_mutex_unlock(tsd, gctx->lock);
 	if (destroy_gctx) {
 		prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx,
 		    tdata);
 	}
 
+	malloc_mutex_assert_not_owner(tsd, tctx->tdata->lock);
+
 	if (destroy_tdata)
 		prof_tdata_destroy(tsd, tdata, false);
 
@@ -740,9 +745,9 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
 		 * Increment nlimbo, in order to avoid a race condition with
 		 * prof_tctx_destroy()/prof_gctx_try_destroy().
 		 */
-		malloc_mutex_lock(gctx.p->lock);
+		malloc_mutex_lock(tsd, gctx.p->lock);
 		gctx.p->nlimbo++;
-		malloc_mutex_unlock(gctx.p->lock);
+		malloc_mutex_unlock(tsd, gctx.p->lock);
 		new_gctx = false;
 	}
 	prof_leave(tsd, tdata);
@@ -769,11 +774,11 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
 	if (tdata == NULL)
 		return (NULL);
 
-	malloc_mutex_lock(tdata->lock);
+	malloc_mutex_lock(tsd, tdata->lock);
 	not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v);
 	if (!not_found) /* Note double negative! */
 		ret.p->prepared = true;
-	malloc_mutex_unlock(tdata->lock);
+	malloc_mutex_unlock(tsd, tdata->lock);
 	if (not_found) {
 		tcache_t *tcache;
 		void *btkey;
@@ -806,20 +811,20 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
 		ret.p->tctx_uid = tdata->tctx_uid_next++;
 		ret.p->prepared = true;
 		ret.p->state = prof_tctx_state_initializing;
-		malloc_mutex_lock(tdata->lock);
+		malloc_mutex_lock(tsd, tdata->lock);
 		error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v);
-		malloc_mutex_unlock(tdata->lock);
+		malloc_mutex_unlock(tsd, tdata->lock);
 		if (error) {
 			if (new_gctx)
 				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
 			idalloctm(tsd, ret.v, tcache, true, true);
 			return (NULL);
 		}
-		malloc_mutex_lock(gctx->lock);
+		malloc_mutex_lock(tsd, gctx->lock);
 		ret.p->state = prof_tctx_state_nominal;
 		tctx_tree_insert(&gctx->tctxs, ret.p);
 		gctx->nlimbo--;
-		malloc_mutex_unlock(gctx->lock);
+		malloc_mutex_unlock(tsd, gctx->lock);
 	}
 
 	return (ret.p);
@@ -894,11 +899,13 @@ size_t
 prof_tdata_count(void)
 {
 	size_t tdata_count = 0;
+	tsd_t *tsd;
 
-	malloc_mutex_lock(&tdatas_mtx);
+	tsd = tsd_fetch();
+	malloc_mutex_lock(tsd, &tdatas_mtx);
 	tdata_tree_iter(&tdatas, NULL, prof_tdata_count_iter,
 	    (void *)&tdata_count);
-	malloc_mutex_unlock(&tdatas_mtx);
+	malloc_mutex_unlock(tsd, &tdatas_mtx);
 
 	return (tdata_count);
 }
@@ -917,9 +924,9 @@ prof_bt_count(void)
 	if (tdata == NULL)
 		return (0);
 
-	malloc_mutex_lock(&bt2gctx_mtx);
+	malloc_mutex_lock(tsd, &bt2gctx_mtx);
 	bt_count = ckh_count(&bt2gctx);
-	malloc_mutex_unlock(&bt2gctx_mtx);
+	malloc_mutex_unlock(tsd, &bt2gctx_mtx);
 
 	return (bt_count);
 }
@@ -1032,20 +1039,21 @@ prof_dump_printf(bool propagate_err, const char *format, ...)
 	return (ret);
 }
 
-/* tctx->tdata->lock is held. */
 static void
-prof_tctx_merge_tdata(prof_tctx_t *tctx, prof_tdata_t *tdata)
+prof_tctx_merge_tdata(tsd_t *tsd, prof_tctx_t *tctx, prof_tdata_t *tdata)
 {
 
-	malloc_mutex_lock(tctx->gctx->lock);
+	malloc_mutex_assert_owner(tsd, tctx->tdata->lock);
+
+	malloc_mutex_lock(tsd, tctx->gctx->lock);
 
 	switch (tctx->state) {
 	case prof_tctx_state_initializing:
-		malloc_mutex_unlock(tctx->gctx->lock);
+		malloc_mutex_unlock(tsd, tctx->gctx->lock);
 		return;
 	case prof_tctx_state_nominal:
 		tctx->state = prof_tctx_state_dumping;
-		malloc_mutex_unlock(tctx->gctx->lock);
+		malloc_mutex_unlock(tsd, tctx->gctx->lock);
 
 		memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t));
 
@@ -1064,11 +1072,12 @@ prof_tctx_merge_tdata(prof_tctx_t *tctx, prof_tdata_t *tdata)
 	}
 }
 
-/* gctx->lock is held. */
 static void
-prof_tctx_merge_gctx(prof_tctx_t *tctx, prof_gctx_t *gctx)
+prof_tctx_merge_gctx(tsd_t *tsd, prof_tctx_t *tctx, prof_gctx_t *gctx)
 {
 
+	malloc_mutex_assert_owner(tsd, gctx->lock);
+
 	gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
 	gctx->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
 	if (opt_prof_accum) {
@@ -1077,10 +1086,12 @@ prof_tctx_merge_gctx(prof_tctx_t *tctx, prof_gctx_t *gctx)
 	}
 }
 
-/* tctx->gctx is held. */
 static prof_tctx_t *
 prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg)
 {
+	tsd_t *tsd = (tsd_t *)arg;
+
+	malloc_mutex_assert_owner(tsd, tctx->gctx->lock);
 
 	switch (tctx->state) {
 	case prof_tctx_state_nominal:
@@ -1088,7 +1099,7 @@ prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg)
 		break;
 	case prof_tctx_state_dumping:
 	case prof_tctx_state_purgatory:
-		prof_tctx_merge_gctx(tctx, tctx->gctx);
+		prof_tctx_merge_gctx(tsd, tctx, tctx->gctx);
 		break;
 	default:
 		not_reached();
@@ -1097,11 +1108,18 @@ prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg)
 	return (NULL);
 }
 
-/* gctx->lock is held. */
+struct prof_tctx_dump_iter_arg_s {
+	tsd_t	*tsd;
+	bool	propagate_err;
+};
+
 static prof_tctx_t *
-prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg)
+prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque)
 {
-	bool propagate_err = *(bool *)arg;
+	struct prof_tctx_dump_iter_arg_s *arg =
+	    (struct prof_tctx_dump_iter_arg_s *)opaque;
+
+	malloc_mutex_assert_owner(arg->tsd, tctx->gctx->lock);
 
 	switch (tctx->state) {
 	case prof_tctx_state_initializing:
@@ -1110,7 +1128,7 @@ prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg)
 		break;
 	case prof_tctx_state_dumping:
 	case prof_tctx_state_purgatory:
-		if (prof_dump_printf(propagate_err,
+		if (prof_dump_printf(arg->propagate_err,
 		    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": "
 		    "%"FMTu64"]\n", tctx->thr_uid, tctx->dump_cnts.curobjs,
 		    tctx->dump_cnts.curbytes, tctx->dump_cnts.accumobjs,
@@ -1123,12 +1141,14 @@ prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg)
 	return (NULL);
 }
 
-/* tctx->gctx is held. */
 static prof_tctx_t *
 prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg)
 {
+	tsd_t *tsd = (tsd_t *)arg;
 	prof_tctx_t *ret;
 
+	malloc_mutex_assert_owner(tsd, tctx->gctx->lock);
+
 	switch (tctx->state) {
 	case prof_tctx_state_nominal:
 		/* New since dumping started; ignore. */
@@ -1149,12 +1169,12 @@ label_return:
 }
 
 static void
-prof_dump_gctx_prep(prof_gctx_t *gctx, prof_gctx_tree_t *gctxs)
+prof_dump_gctx_prep(tsd_t *tsd, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs)
 {
 
 	cassert(config_prof);
 
-	malloc_mutex_lock(gctx->lock);
+	malloc_mutex_lock(tsd, gctx->lock);
 
 	/*
 	 * Increment nlimbo so that gctx won't go away before dump.
@@ -1166,19 +1186,26 @@ prof_dump_gctx_prep(prof_gctx_t *gctx, prof_gctx_tree_t *gctxs)
 
 	memset(&gctx->cnt_summed, 0, sizeof(prof_cnt_t));
 
-	malloc_mutex_unlock(gctx->lock);
+	malloc_mutex_unlock(tsd, gctx->lock);
 }
 
-static prof_gctx_t *
-prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *arg)
-{
-	size_t *leak_ngctx = (size_t *)arg;
+struct prof_gctx_merge_iter_arg_s {
+	tsd_t	*tsd;
+	size_t	leak_ngctx;
+};
 
-	malloc_mutex_lock(gctx->lock);
-	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter, NULL);
+static prof_gctx_t *
+prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque)
+{
+	struct prof_gctx_merge_iter_arg_s *arg =
+	    (struct prof_gctx_merge_iter_arg_s *)opaque;
+
+	malloc_mutex_lock(arg->tsd, gctx->lock);
+	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter,
+	    (void *)arg->tsd);
 	if (gctx->cnt_summed.curobjs != 0)
-		(*leak_ngctx)++;
-	malloc_mutex_unlock(gctx->lock);
+		arg->leak_ngctx++;
+	malloc_mutex_unlock(arg->tsd, gctx->lock);
 
 	return (NULL);
 }
@@ -1197,7 +1224,7 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs)
 	 */
 	while ((gctx = gctx_tree_first(gctxs)) != NULL) {
 		gctx_tree_remove(gctxs, gctx);
-		malloc_mutex_lock(gctx->lock);
+		malloc_mutex_lock(tsd, gctx->lock);
 		{
 			prof_tctx_t *next;
 
@@ -1205,7 +1232,7 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs)
 			do {
 				prof_tctx_t *to_destroy =
 				    tctx_tree_iter(&gctx->tctxs, next,
-				    prof_tctx_finish_iter, NULL);
+				    prof_tctx_finish_iter, (void *)tsd);
 				if (to_destroy != NULL) {
 					next = tctx_tree_next(&gctx->tctxs,
 					    to_destroy);
@@ -1220,19 +1247,26 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs)
 		gctx->nlimbo--;
 		if (prof_gctx_should_destroy(gctx)) {
 			gctx->nlimbo++;
-			malloc_mutex_unlock(gctx->lock);
+			malloc_mutex_unlock(tsd, gctx->lock);
 			prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
 		} else
-			malloc_mutex_unlock(gctx->lock);
+			malloc_mutex_unlock(tsd, gctx->lock);
 	}
 }
 
-static prof_tdata_t *
-prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg)
-{
-	prof_cnt_t *cnt_all = (prof_cnt_t *)arg;
+struct prof_tdata_merge_iter_arg_s {
+	tsd_t		*tsd;
+	prof_cnt_t	cnt_all;
+};
 
-	malloc_mutex_lock(tdata->lock);
+static prof_tdata_t *
+prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
+    void *opaque)
+{
+	struct prof_tdata_merge_iter_arg_s *arg =
+	    (struct prof_tdata_merge_iter_arg_s *)opaque;
+
+	malloc_mutex_lock(arg->tsd, tdata->lock);
 	if (!tdata->expired) {
 		size_t tabind;
 		union {
@@ -1244,17 +1278,17 @@ prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg)
 		memset(&tdata->cnt_summed, 0, sizeof(prof_cnt_t));
 		for (tabind = 0; !ckh_iter(&tdata->bt2tctx, &tabind, NULL,
 		    &tctx.v);)
-			prof_tctx_merge_tdata(tctx.p, tdata);
+			prof_tctx_merge_tdata(arg->tsd, tctx.p, tdata);
 
-		cnt_all->curobjs += tdata->cnt_summed.curobjs;
-		cnt_all->curbytes += tdata->cnt_summed.curbytes;
+		arg->cnt_all.curobjs += tdata->cnt_summed.curobjs;
+		arg->cnt_all.curbytes += tdata->cnt_summed.curbytes;
 		if (opt_prof_accum) {
-			cnt_all->accumobjs += tdata->cnt_summed.accumobjs;
-			cnt_all->accumbytes += tdata->cnt_summed.accumbytes;
+			arg->cnt_all.accumobjs += tdata->cnt_summed.accumobjs;
+			arg->cnt_all.accumbytes += tdata->cnt_summed.accumbytes;
 		}
 	} else
 		tdata->dumping = false;
-	malloc_mutex_unlock(tdata->lock);
+	malloc_mutex_unlock(arg->tsd, tdata->lock);
 
 	return (NULL);
 }
@@ -1283,7 +1317,7 @@ prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg)
 #define	prof_dump_header JEMALLOC_N(prof_dump_header_impl)
 #endif
 static bool
-prof_dump_header(bool propagate_err, const prof_cnt_t *cnt_all)
+prof_dump_header(tsd_t *tsd, bool propagate_err, const prof_cnt_t *cnt_all)
 {
 	bool ret;
 
@@ -1294,10 +1328,10 @@ prof_dump_header(bool propagate_err, const prof_cnt_t *cnt_all)
 	    cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes))
 		return (true);
 
-	malloc_mutex_lock(&tdatas_mtx);
+	malloc_mutex_lock(tsd, &tdatas_mtx);
 	ret = (tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter,
 	    (void *)&propagate_err) != NULL);
-	malloc_mutex_unlock(&tdatas_mtx);
+	malloc_mutex_unlock(tsd, &tdatas_mtx);
 	return (ret);
 }
 #ifdef JEMALLOC_JET
@@ -1306,15 +1340,16 @@ prof_dump_header(bool propagate_err, const prof_cnt_t *cnt_all)
 prof_dump_header_t *prof_dump_header = JEMALLOC_N(prof_dump_header_impl);
 #endif
 
-/* gctx->lock is held. */
 static bool
-prof_dump_gctx(bool propagate_err, prof_gctx_t *gctx, const prof_bt_t *bt,
-    prof_gctx_tree_t *gctxs)
+prof_dump_gctx(tsd_t *tsd, bool propagate_err, prof_gctx_t *gctx,
+    const prof_bt_t *bt, prof_gctx_tree_t *gctxs)
 {
 	bool ret;
 	unsigned i;
+	struct prof_tctx_dump_iter_arg_s prof_tctx_dump_iter_arg;
 
 	cassert(config_prof);
+	malloc_mutex_assert_owner(tsd, gctx->lock);
 
 	/* Avoid dumping such gctx's that have no useful data. */
 	if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0) ||
@@ -1348,8 +1383,10 @@ prof_dump_gctx(bool propagate_err, prof_gctx_t *gctx, const prof_bt_t *bt,
 		goto label_return;
 	}
 
+	prof_tctx_dump_iter_arg.tsd = tsd;
+	prof_tctx_dump_iter_arg.propagate_err = propagate_err;
 	if (tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter,
-	    (void *)&propagate_err) != NULL) {
+	    (void *)&prof_tctx_dump_iter_arg) != NULL) {
 		ret = true;
 		goto label_return;
 	}
@@ -1459,22 +1496,29 @@ prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx,
 	}
 }
 
+struct prof_gctx_dump_iter_arg_s {
+	tsd_t	*tsd;
+	bool	propagate_err;
+};
+
 static prof_gctx_t *
-prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *arg)
+prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque)
 {
 	prof_gctx_t *ret;
-	bool propagate_err = *(bool *)arg;
+	struct prof_gctx_dump_iter_arg_s *arg =
+	    (struct prof_gctx_dump_iter_arg_s *)opaque;
 
-	malloc_mutex_lock(gctx->lock);
+	malloc_mutex_lock(arg->tsd, gctx->lock);
 
-	if (prof_dump_gctx(propagate_err, gctx, &gctx->bt, gctxs)) {
+	if (prof_dump_gctx(arg->tsd, arg->propagate_err, gctx, &gctx->bt,
+	    gctxs)) {
 		ret = gctx;
 		goto label_return;
 	}
 
 	ret = NULL;
 label_return:
-	malloc_mutex_unlock(gctx->lock);
+	malloc_mutex_unlock(arg->tsd, gctx->lock);
 	return (ret);
 }
 
@@ -1482,13 +1526,14 @@ static bool
 prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck)
 {
 	prof_tdata_t *tdata;
-	prof_cnt_t cnt_all;
+	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
 	size_t tabind;
 	union {
 		prof_gctx_t	*p;
 		void		*v;
 	} gctx;
-	size_t leak_ngctx;
+	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
+	struct prof_gctx_dump_iter_arg_s prof_gctx_dump_iter_arg;
 	prof_gctx_tree_t gctxs;
 
 	cassert(config_prof);
@@ -1497,7 +1542,7 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck)
 	if (tdata == NULL)
 		return (true);
 
-	malloc_mutex_lock(&prof_dump_mtx);
+	malloc_mutex_lock(tsd, &prof_dump_mtx);
 	prof_enter(tsd, tdata);
 
 	/*
@@ -1506,20 +1551,24 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck)
 	 */
 	gctx_tree_new(&gctxs);
 	for (tabind = 0; !ckh_iter(&bt2gctx, &tabind, NULL, &gctx.v);)
-		prof_dump_gctx_prep(gctx.p, &gctxs);
+		prof_dump_gctx_prep(tsd, gctx.p, &gctxs);
 
 	/*
 	 * Iterate over tdatas, and for the non-expired ones snapshot their tctx
 	 * stats and merge them into the associated gctx's.
 	 */
-	memset(&cnt_all, 0, sizeof(prof_cnt_t));
-	malloc_mutex_lock(&tdatas_mtx);
-	tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter, (void *)&cnt_all);
-	malloc_mutex_unlock(&tdatas_mtx);
+	prof_tdata_merge_iter_arg.tsd = tsd;
+	memset(&prof_tdata_merge_iter_arg.cnt_all, 0, sizeof(prof_cnt_t));
+	malloc_mutex_lock(tsd, &tdatas_mtx);
+	tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter,
+	    (void *)&prof_tdata_merge_iter_arg);
+	malloc_mutex_unlock(tsd, &tdatas_mtx);
 
 	/* Merge tctx stats into gctx's. */
-	leak_ngctx = 0;
-	gctx_tree_iter(&gctxs, NULL, prof_gctx_merge_iter, (void *)&leak_ngctx);
+	prof_gctx_merge_iter_arg.tsd = tsd;
+	prof_gctx_merge_iter_arg.leak_ngctx = 0;
+	gctx_tree_iter(&gctxs, NULL, prof_gctx_merge_iter,
+	    (void *)&prof_gctx_merge_iter_arg);
 
 	prof_leave(tsd, tdata);
 
@@ -1528,12 +1577,15 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck)
 		goto label_open_close_error;
 
 	/* Dump profile header. */
-	if (prof_dump_header(propagate_err, &cnt_all))
+	if (prof_dump_header(tsd, propagate_err,
+	    &prof_tdata_merge_iter_arg.cnt_all))
 		goto label_write_error;
 
 	/* Dump per gctx profile stats. */
+	prof_gctx_dump_iter_arg.tsd = tsd;
+	prof_gctx_dump_iter_arg.propagate_err = propagate_err;
 	if (gctx_tree_iter(&gctxs, NULL, prof_gctx_dump_iter,
-	    (void *)&propagate_err) != NULL)
+	    (void *)&prof_gctx_dump_iter_arg) != NULL)
 		goto label_write_error;
 
 	/* Dump /proc/<pid>/maps if possible. */
@@ -1544,17 +1596,18 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck)
 		goto label_open_close_error;
 
 	prof_gctx_finish(tsd, &gctxs);
-	malloc_mutex_unlock(&prof_dump_mtx);
-
-	if (leakcheck)
-		prof_leakcheck(&cnt_all, leak_ngctx, filename);
+	malloc_mutex_unlock(tsd, &prof_dump_mtx);
 
+	if (leakcheck) {
+		prof_leakcheck(&prof_tdata_merge_iter_arg.cnt_all,
+		    prof_gctx_merge_iter_arg.leak_ngctx, filename);
+	}
 	return (false);
 label_write_error:
 	prof_dump_close(propagate_err);
 label_open_close_error:
 	prof_gctx_finish(tsd, &gctxs);
-	malloc_mutex_unlock(&prof_dump_mtx);
+	malloc_mutex_unlock(tsd, &prof_dump_mtx);
 	return (true);
 }
 
@@ -1594,23 +1647,21 @@ prof_fdump(void)
 		return;
 	tsd = tsd_fetch();
 
-	malloc_mutex_lock(&prof_dump_seq_mtx);
+	malloc_mutex_lock(tsd, &prof_dump_seq_mtx);
 	prof_dump_filename(filename, 'f', VSEQ_INVALID);
-	malloc_mutex_unlock(&prof_dump_seq_mtx);
+	malloc_mutex_unlock(tsd, &prof_dump_seq_mtx);
 	prof_dump(tsd, false, filename, opt_prof_leak);
 }
 
 void
-prof_idump(void)
+prof_idump(tsd_t *tsd)
 {
-	tsd_t *tsd;
 	prof_tdata_t *tdata;
 
 	cassert(config_prof);
 
-	if (!prof_booted)
+	if (!prof_booted || tsd == NULL)
 		return;
-	tsd = tsd_fetch();
 	tdata = prof_tdata_get(tsd, false);
 	if (tdata == NULL)
 		return;
@@ -1621,50 +1672,46 @@ prof_idump(void)
 
 	if (opt_prof_prefix[0] != '\0') {
 		char filename[PATH_MAX + 1];
-		malloc_mutex_lock(&prof_dump_seq_mtx);
+		malloc_mutex_lock(tsd, &prof_dump_seq_mtx);
 		prof_dump_filename(filename, 'i', prof_dump_iseq);
 		prof_dump_iseq++;
-		malloc_mutex_unlock(&prof_dump_seq_mtx);
+		malloc_mutex_unlock(tsd, &prof_dump_seq_mtx);
 		prof_dump(tsd, false, filename, false);
 	}
 }
 
 bool
-prof_mdump(const char *filename)
+prof_mdump(tsd_t *tsd, const char *filename)
 {
-	tsd_t *tsd;
 	char filename_buf[DUMP_FILENAME_BUFSIZE];
 
 	cassert(config_prof);
 
 	if (!opt_prof || !prof_booted)
 		return (true);
-	tsd = tsd_fetch();
 
 	if (filename == NULL) {
 		/* No filename specified, so automatically generate one. */
 		if (opt_prof_prefix[0] == '\0')
 			return (true);
-		malloc_mutex_lock(&prof_dump_seq_mtx);
+		malloc_mutex_lock(tsd, &prof_dump_seq_mtx);
 		prof_dump_filename(filename_buf, 'm', prof_dump_mseq);
 		prof_dump_mseq++;
-		malloc_mutex_unlock(&prof_dump_seq_mtx);
+		malloc_mutex_unlock(tsd, &prof_dump_seq_mtx);
 		filename = filename_buf;
 	}
 	return (prof_dump(tsd, true, filename, false));
 }
 
 void
-prof_gdump(void)
+prof_gdump(tsd_t *tsd)
 {
-	tsd_t *tsd;
 	prof_tdata_t *tdata;
 
 	cassert(config_prof);
 
-	if (!prof_booted)
+	if (!prof_booted || tsd == NULL)
 		return;
-	tsd = tsd_fetch();
 	tdata = prof_tdata_get(tsd, false);
 	if (tdata == NULL)
 		return;
@@ -1675,10 +1722,10 @@ prof_gdump(void)
 
 	if (opt_prof_prefix[0] != '\0') {
 		char filename[DUMP_FILENAME_BUFSIZE];
-		malloc_mutex_lock(&prof_dump_seq_mtx);
+		malloc_mutex_lock(tsd, &prof_dump_seq_mtx);
 		prof_dump_filename(filename, 'u', prof_dump_useq);
 		prof_dump_useq++;
-		malloc_mutex_unlock(&prof_dump_seq_mtx);
+		malloc_mutex_unlock(tsd, &prof_dump_seq_mtx);
 		prof_dump(tsd, false, filename, false);
 	}
 }
@@ -1707,14 +1754,14 @@ prof_bt_keycomp(const void *k1, const void *k2)
 }
 
 JEMALLOC_INLINE_C uint64_t
-prof_thr_uid_alloc(void)
+prof_thr_uid_alloc(tsd_t *tsd)
 {
 	uint64_t thr_uid;
 
-	malloc_mutex_lock(&next_thr_uid_mtx);
+	malloc_mutex_lock(tsd, &next_thr_uid_mtx);
 	thr_uid = next_thr_uid;
 	next_thr_uid++;
-	malloc_mutex_unlock(&next_thr_uid_mtx);
+	malloc_mutex_unlock(tsd, &next_thr_uid_mtx);
 
 	return (thr_uid);
 }
@@ -1759,9 +1806,9 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
 	tdata->dumping = false;
 	tdata->active = active;
 
-	malloc_mutex_lock(&tdatas_mtx);
+	malloc_mutex_lock(tsd, &tdatas_mtx);
 	tdata_tree_insert(&tdatas, tdata);
-	malloc_mutex_unlock(&tdatas_mtx);
+	malloc_mutex_unlock(tsd, &tdatas_mtx);
 
 	return (tdata);
 }
@@ -1770,13 +1817,13 @@ prof_tdata_t *
 prof_tdata_init(tsd_t *tsd)
 {
 
-	return (prof_tdata_init_impl(tsd, prof_thr_uid_alloc(), 0, NULL,
-	    prof_thread_active_init_get()));
+	return (prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd), 0, NULL,
+	    prof_thread_active_init_get(tsd)));
 }
 
-/* tdata->lock must be held. */
 static bool
-prof_tdata_should_destroy(prof_tdata_t *tdata, bool even_if_attached)
+prof_tdata_should_destroy_unlocked(tsd_t *tsd, prof_tdata_t *tdata,
+    bool even_if_attached)
 {
 
 	if (tdata->attached && !even_if_attached)
@@ -1786,18 +1833,32 @@ prof_tdata_should_destroy(prof_tdata_t *tdata, bool even_if_attached)
 	return (true);
 }
 
-/* tdatas_mtx must be held. */
+static bool
+prof_tdata_should_destroy(tsd_t *tsd, prof_tdata_t *tdata,
+    bool even_if_attached)
+{
+
+	malloc_mutex_assert_owner(tsd, tdata->lock);
+
+	return (prof_tdata_should_destroy_unlocked(tsd, tdata,
+	    even_if_attached));
+}
+
 static void
 prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
     bool even_if_attached)
 {
 	tcache_t *tcache;
 
-	assert(prof_tdata_should_destroy(tdata, even_if_attached));
+	malloc_mutex_assert_owner(tsd, &tdatas_mtx);
+
 	assert(tsd_prof_tdata_get(tsd) != tdata);
 
 	tdata_tree_remove(&tdatas, tdata);
 
+	assert(prof_tdata_should_destroy_unlocked(tsd, tdata,
+	    even_if_attached));
+
 	tcache = tcache_get(tsd, false);
 	if (tdata->thread_name != NULL)
 		idalloctm(tsd, tdata->thread_name, tcache, true, true);
@@ -1809,9 +1870,9 @@ static void
 prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached)
 {
 
-	malloc_mutex_lock(&tdatas_mtx);
+	malloc_mutex_lock(tsd, &tdatas_mtx);
 	prof_tdata_destroy_locked(tsd, tdata, even_if_attached);
-	malloc_mutex_unlock(&tdatas_mtx);
+	malloc_mutex_unlock(tsd, &tdatas_mtx);
 }
 
 static void
@@ -1819,9 +1880,9 @@ prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata)
 {
 	bool destroy_tdata;
 
-	malloc_mutex_lock(tdata->lock);
+	malloc_mutex_lock(tsd, tdata->lock);
 	if (tdata->attached) {
-		destroy_tdata = prof_tdata_should_destroy(tdata, true);
+		destroy_tdata = prof_tdata_should_destroy(tsd, tdata, true);
 		/*
 		 * Only detach if !destroy_tdata, because detaching would allow
 		 * another thread to win the race to destroy tdata.
@@ -1831,7 +1892,7 @@ prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata)
 		tsd_prof_tdata_set(tsd, NULL);
 	} else
 		destroy_tdata = false;
-	malloc_mutex_unlock(tdata->lock);
+	malloc_mutex_unlock(tsd, tdata->lock);
 	if (destroy_tdata)
 		prof_tdata_destroy(tsd, tdata, true);
 }
@@ -1851,18 +1912,18 @@ prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata)
 }
 
 static bool
-prof_tdata_expire(prof_tdata_t *tdata)
+prof_tdata_expire(tsd_t *tsd, prof_tdata_t *tdata)
 {
 	bool destroy_tdata;
 
-	malloc_mutex_lock(tdata->lock);
+	malloc_mutex_lock(tsd, tdata->lock);
 	if (!tdata->expired) {
 		tdata->expired = true;
 		destroy_tdata = tdata->attached ? false :
-		    prof_tdata_should_destroy(tdata, false);
+		    prof_tdata_should_destroy(tsd, tdata, false);
 	} else
 		destroy_tdata = false;
-	malloc_mutex_unlock(tdata->lock);
+	malloc_mutex_unlock(tsd, tdata->lock);
 
 	return (destroy_tdata);
 }
@@ -1870,8 +1931,9 @@ prof_tdata_expire(prof_tdata_t *tdata)
 static prof_tdata_t *
 prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg)
 {
+	tsd_t *tsd = (tsd_t *)arg;
 
-	return (prof_tdata_expire(tdata) ? tdata : NULL);
+	return (prof_tdata_expire(tsd, tdata) ? tdata : NULL);
 }
 
 void
@@ -1881,15 +1943,15 @@ prof_reset(tsd_t *tsd, size_t lg_sample)
 
 	assert(lg_sample < (sizeof(uint64_t) << 3));
 
-	malloc_mutex_lock(&prof_dump_mtx);
-	malloc_mutex_lock(&tdatas_mtx);
+	malloc_mutex_lock(tsd, &prof_dump_mtx);
+	malloc_mutex_lock(tsd, &tdatas_mtx);
 
 	lg_prof_sample = lg_sample;
 
 	next = NULL;
 	do {
 		prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next,
-		    prof_tdata_reset_iter, NULL);
+		    prof_tdata_reset_iter, (void *)tsd);
 		if (to_destroy != NULL) {
 			next = tdata_tree_next(&tdatas, to_destroy);
 			prof_tdata_destroy_locked(tsd, to_destroy, false);
@@ -1897,8 +1959,8 @@ prof_reset(tsd_t *tsd, size_t lg_sample)
 			next = NULL;
 	} while (next != NULL);
 
-	malloc_mutex_unlock(&tdatas_mtx);
-	malloc_mutex_unlock(&prof_dump_mtx);
+	malloc_mutex_unlock(tsd, &tdatas_mtx);
+	malloc_mutex_unlock(tsd, &prof_dump_mtx);
 }
 
 void
@@ -1915,35 +1977,33 @@ prof_tdata_cleanup(tsd_t *tsd)
 }
 
 bool
-prof_active_get(void)
+prof_active_get(tsd_t *tsd)
 {
 	bool prof_active_current;
 
-	malloc_mutex_lock(&prof_active_mtx);
+	malloc_mutex_lock(tsd, &prof_active_mtx);
 	prof_active_current = prof_active;
-	malloc_mutex_unlock(&prof_active_mtx);
+	malloc_mutex_unlock(tsd, &prof_active_mtx);
 	return (prof_active_current);
 }
 
 bool
-prof_active_set(bool active)
+prof_active_set(tsd_t *tsd, bool active)
 {
 	bool prof_active_old;
 
-	malloc_mutex_lock(&prof_active_mtx);
+	malloc_mutex_lock(tsd, &prof_active_mtx);
 	prof_active_old = prof_active;
 	prof_active = active;
-	malloc_mutex_unlock(&prof_active_mtx);
+	malloc_mutex_unlock(tsd, &prof_active_mtx);
 	return (prof_active_old);
 }
 
 const char *
-prof_thread_name_get(void)
+prof_thread_name_get(tsd_t *tsd)
 {
-	tsd_t *tsd;
 	prof_tdata_t *tdata;
 
-	tsd = tsd_fetch();
 	tdata = prof_tdata_get(tsd, true);
 	if (tdata == NULL)
 		return ("");
@@ -2006,12 +2066,10 @@ prof_thread_name_set(tsd_t *tsd, const char *thread_name)
 }
 
 bool
-prof_thread_active_get(void)
+prof_thread_active_get(tsd_t *tsd)
 {
-	tsd_t *tsd;
 	prof_tdata_t *tdata;
 
-	tsd = tsd_fetch();
 	tdata = prof_tdata_get(tsd, true);
 	if (tdata == NULL)
 		return (false);
@@ -2019,12 +2077,10 @@ prof_thread_active_get(void)
 }
 
 bool
-prof_thread_active_set(bool active)
+prof_thread_active_set(tsd_t *tsd, bool active)
 {
-	tsd_t *tsd;
 	prof_tdata_t *tdata;
 
-	tsd = tsd_fetch();
 	tdata = prof_tdata_get(tsd, true);
 	if (tdata == NULL)
 		return (true);
@@ -2033,48 +2089,48 @@ prof_thread_active_set(bool active)
 }
 
 bool
-prof_thread_active_init_get(void)
+prof_thread_active_init_get(tsd_t *tsd)
 {
 	bool active_init;
 
-	malloc_mutex_lock(&prof_thread_active_init_mtx);
+	malloc_mutex_lock(tsd, &prof_thread_active_init_mtx);
 	active_init = prof_thread_active_init;
-	malloc_mutex_unlock(&prof_thread_active_init_mtx);
+	malloc_mutex_unlock(tsd, &prof_thread_active_init_mtx);
 	return (active_init);
 }
 
 bool
-prof_thread_active_init_set(bool active_init)
+prof_thread_active_init_set(tsd_t *tsd, bool active_init)
 {
 	bool active_init_old;
 
-	malloc_mutex_lock(&prof_thread_active_init_mtx);
+	malloc_mutex_lock(tsd, &prof_thread_active_init_mtx);
 	active_init_old = prof_thread_active_init;
 	prof_thread_active_init = active_init;
-	malloc_mutex_unlock(&prof_thread_active_init_mtx);
+	malloc_mutex_unlock(tsd, &prof_thread_active_init_mtx);
 	return (active_init_old);
 }
 
 bool
-prof_gdump_get(void)
+prof_gdump_get(tsd_t *tsd)
 {
 	bool prof_gdump_current;
 
-	malloc_mutex_lock(&prof_gdump_mtx);
+	malloc_mutex_lock(tsd, &prof_gdump_mtx);
 	prof_gdump_current = prof_gdump_val;
-	malloc_mutex_unlock(&prof_gdump_mtx);
+	malloc_mutex_unlock(tsd, &prof_gdump_mtx);
 	return (prof_gdump_current);
 }
 
 bool
-prof_gdump_set(bool gdump)
+prof_gdump_set(tsd_t *tsd, bool gdump)
 {
 	bool prof_gdump_old;
 
-	malloc_mutex_lock(&prof_gdump_mtx);
+	malloc_mutex_lock(tsd, &prof_gdump_mtx);
 	prof_gdump_old = prof_gdump_val;
 	prof_gdump_val = gdump;
-	malloc_mutex_unlock(&prof_gdump_mtx);
+	malloc_mutex_unlock(tsd, &prof_gdump_mtx);
 	return (prof_gdump_old);
 }
 
@@ -2115,47 +2171,54 @@ prof_boot1(void)
 }
 
 bool
-prof_boot2(void)
+prof_boot2(tsd_t *tsd)
 {
 
 	cassert(config_prof);
 
 	if (opt_prof) {
-		tsd_t *tsd;
 		unsigned i;
 
 		lg_prof_sample = opt_lg_prof_sample;
 
 		prof_active = opt_prof_active;
-		if (malloc_mutex_init(&prof_active_mtx))
+		if (malloc_mutex_init(&prof_active_mtx, "prof_active",
+		    WITNESS_RANK_PROF_ACTIVE))
 			return (true);
 
 		prof_gdump_val = opt_prof_gdump;
-		if (malloc_mutex_init(&prof_gdump_mtx))
+		if (malloc_mutex_init(&prof_gdump_mtx, "prof_gdump",
+		    WITNESS_RANK_PROF_GDUMP))
 			return (true);
 
 		prof_thread_active_init = opt_prof_thread_active_init;
-		if (malloc_mutex_init(&prof_thread_active_init_mtx))
+		if (malloc_mutex_init(&prof_thread_active_init_mtx,
+		    "prof_thread_active_init",
+		    WITNESS_RANK_PROF_THREAD_ACTIVE_INIT))
 			return (true);
 
-		tsd = tsd_fetch();
 		if (ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash,
 		    prof_bt_keycomp))
 			return (true);
-		if (malloc_mutex_init(&bt2gctx_mtx))
+		if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx",
+		    WITNESS_RANK_PROF_BT2GCTX))
 			return (true);
 
 		tdata_tree_new(&tdatas);
-		if (malloc_mutex_init(&tdatas_mtx))
+		if (malloc_mutex_init(&tdatas_mtx, "prof_tdatas",
+		    WITNESS_RANK_PROF_TDATAS))
 			return (true);
 
 		next_thr_uid = 0;
-		if (malloc_mutex_init(&next_thr_uid_mtx))
+		if (malloc_mutex_init(&next_thr_uid_mtx, "prof_next_thr_uid",
+		    WITNESS_RANK_PROF_NEXT_THR_UID))
 			return (true);
 
-		if (malloc_mutex_init(&prof_dump_seq_mtx))
+		if (malloc_mutex_init(&prof_dump_seq_mtx, "prof_dump_seq",
+		    WITNESS_RANK_PROF_DUMP_SEQ))
 			return (true);
-		if (malloc_mutex_init(&prof_dump_mtx))
+		if (malloc_mutex_init(&prof_dump_mtx, "prof_dump",
+		    WITNESS_RANK_PROF_DUMP))
 			return (true);
 
 		if (opt_prof_final && opt_prof_prefix[0] != '\0' &&
@@ -2165,21 +2228,23 @@ prof_boot2(void)
 				abort();
 		}
 
-		gctx_locks = (malloc_mutex_t *)base_alloc(PROF_NCTX_LOCKS *
+		gctx_locks = (malloc_mutex_t *)base_alloc(tsd, PROF_NCTX_LOCKS *
 		    sizeof(malloc_mutex_t));
 		if (gctx_locks == NULL)
 			return (true);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
-			if (malloc_mutex_init(&gctx_locks[i]))
+			if (malloc_mutex_init(&gctx_locks[i], "prof_gctx",
+			    WITNESS_RANK_PROF_GCTX))
 				return (true);
 		}
 
-		tdata_locks = (malloc_mutex_t *)base_alloc(PROF_NTDATA_LOCKS *
-		    sizeof(malloc_mutex_t));
+		tdata_locks = (malloc_mutex_t *)base_alloc(tsd,
+		    PROF_NTDATA_LOCKS * sizeof(malloc_mutex_t));
 		if (tdata_locks == NULL)
 			return (true);
 		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
-			if (malloc_mutex_init(&tdata_locks[i]))
+			if (malloc_mutex_init(&tdata_locks[i], "prof_tdata",
+			    WITNESS_RANK_PROF_TDATA))
 				return (true);
 		}
 	}
@@ -2198,56 +2263,56 @@ prof_boot2(void)
 }
 
 void
-prof_prefork(void)
+prof_prefork(tsd_t *tsd)
 {
 
 	if (opt_prof) {
 		unsigned i;
 
-		malloc_mutex_prefork(&tdatas_mtx);
-		malloc_mutex_prefork(&bt2gctx_mtx);
-		malloc_mutex_prefork(&next_thr_uid_mtx);
-		malloc_mutex_prefork(&prof_dump_seq_mtx);
+		malloc_mutex_prefork(tsd, &tdatas_mtx);
+		malloc_mutex_prefork(tsd, &bt2gctx_mtx);
+		malloc_mutex_prefork(tsd, &next_thr_uid_mtx);
+		malloc_mutex_prefork(tsd, &prof_dump_seq_mtx);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++)
-			malloc_mutex_prefork(&gctx_locks[i]);
+			malloc_mutex_prefork(tsd, &gctx_locks[i]);
 		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
-			malloc_mutex_prefork(&tdata_locks[i]);
+			malloc_mutex_prefork(tsd, &tdata_locks[i]);
 	}
 }
 
 void
-prof_postfork_parent(void)
+prof_postfork_parent(tsd_t *tsd)
 {
 
 	if (opt_prof) {
 		unsigned i;
 
 		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
-			malloc_mutex_postfork_parent(&tdata_locks[i]);
+			malloc_mutex_postfork_parent(tsd, &tdata_locks[i]);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++)
-			malloc_mutex_postfork_parent(&gctx_locks[i]);
-		malloc_mutex_postfork_parent(&prof_dump_seq_mtx);
-		malloc_mutex_postfork_parent(&next_thr_uid_mtx);
-		malloc_mutex_postfork_parent(&bt2gctx_mtx);
-		malloc_mutex_postfork_parent(&tdatas_mtx);
+			malloc_mutex_postfork_parent(tsd, &gctx_locks[i]);
+		malloc_mutex_postfork_parent(tsd, &prof_dump_seq_mtx);
+		malloc_mutex_postfork_parent(tsd, &next_thr_uid_mtx);
+		malloc_mutex_postfork_parent(tsd, &bt2gctx_mtx);
+		malloc_mutex_postfork_parent(tsd, &tdatas_mtx);
 	}
 }
 
 void
-prof_postfork_child(void)
+prof_postfork_child(tsd_t *tsd)
 {
 
 	if (opt_prof) {
 		unsigned i;
 
 		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
-			malloc_mutex_postfork_child(&tdata_locks[i]);
+			malloc_mutex_postfork_child(tsd, &tdata_locks[i]);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++)
-			malloc_mutex_postfork_child(&gctx_locks[i]);
-		malloc_mutex_postfork_child(&prof_dump_seq_mtx);
-		malloc_mutex_postfork_child(&next_thr_uid_mtx);
-		malloc_mutex_postfork_child(&bt2gctx_mtx);
-		malloc_mutex_postfork_child(&tdatas_mtx);
+			malloc_mutex_postfork_child(tsd, &gctx_locks[i]);
+		malloc_mutex_postfork_child(tsd, &prof_dump_seq_mtx);
+		malloc_mutex_postfork_child(tsd, &next_thr_uid_mtx);
+		malloc_mutex_postfork_child(tsd, &bt2gctx_mtx);
+		malloc_mutex_postfork_child(tsd, &tdatas_mtx);
 	}
 }
 
diff --git a/src/quarantine.c b/src/quarantine.c
index c024deab..6cb74b37 100644
--- a/src/quarantine.c
+++ b/src/quarantine.c
@@ -99,7 +99,7 @@ static void
 quarantine_drain_one(tsd_t *tsd, quarantine_t *quarantine)
 {
 	quarantine_obj_t *obj = &quarantine->objs[quarantine->first];
-	assert(obj->usize == isalloc(obj->ptr, config_prof));
+	assert(obj->usize == isalloc(tsd, obj->ptr, config_prof));
 	idalloctm(tsd, obj->ptr, NULL, false, true);
 	quarantine->curbytes -= obj->usize;
 	quarantine->curobjs--;
@@ -119,7 +119,7 @@ void
 quarantine(tsd_t *tsd, void *ptr)
 {
 	quarantine_t *quarantine;
-	size_t usize = isalloc(ptr, config_prof);
+	size_t usize = isalloc(tsd, ptr, config_prof);
 
 	cassert(config_fill);
 	assert(opt_quarantine);
diff --git a/src/tcache.c b/src/tcache.c
index a8620c3d..a9539f64 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -24,10 +24,10 @@ static tcaches_t	*tcaches_avail;
 /******************************************************************************/
 
 size_t
-tcache_salloc(const void *ptr)
+tcache_salloc(tsd_t *tsd, const void *ptr)
 {
 
-	return (arena_salloc(ptr, false));
+	return (arena_salloc(tsd, ptr, false));
 }
 
 void
@@ -107,12 +107,13 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 		arena_bin_t *bin = &bin_arena->bins[binind];
 
 		if (config_prof && bin_arena == arena) {
-			if (arena_prof_accum(arena, tcache->prof_accumbytes))
-				prof_idump();
+			if (arena_prof_accum(tsd, arena,
+			    tcache->prof_accumbytes))
+				prof_idump(tsd);
 			tcache->prof_accumbytes = 0;
 		}
 
-		malloc_mutex_lock(&bin->lock);
+		malloc_mutex_lock(tsd, &bin->lock);
 		if (config_stats && bin_arena == arena) {
 			assert(!merged_stats);
 			merged_stats = true;
@@ -130,8 +131,8 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 				    (uintptr_t)chunk) >> LG_PAGE;
 				arena_chunk_map_bits_t *bitselm =
 				    arena_bitselm_get_mutable(chunk, pageind);
-				arena_dalloc_bin_junked_locked(bin_arena, chunk,
-				    ptr, bitselm);
+				arena_dalloc_bin_junked_locked(tsd, bin_arena,
+				    chunk, ptr, bitselm);
 			} else {
 				/*
 				 * This object was allocated via a different
@@ -143,7 +144,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 				ndeferred++;
 			}
 		}
-		malloc_mutex_unlock(&bin->lock);
+		malloc_mutex_unlock(tsd, &bin->lock);
 		arena_decay_ticks(tsd, bin_arena, nflush - ndeferred);
 	}
 	if (config_stats && !merged_stats) {
@@ -152,11 +153,11 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 		 * arena, so the stats didn't get merged.  Manually do so now.
 		 */
 		arena_bin_t *bin = &arena->bins[binind];
-		malloc_mutex_lock(&bin->lock);
+		malloc_mutex_lock(tsd, &bin->lock);
 		bin->stats.nflushes++;
 		bin->stats.nrequests += tbin->tstats.nrequests;
 		tbin->tstats.nrequests = 0;
-		malloc_mutex_unlock(&bin->lock);
+		malloc_mutex_unlock(tsd, &bin->lock);
 	}
 
 	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
@@ -189,7 +190,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 
 		if (config_prof)
 			idump = false;
-		malloc_mutex_lock(&locked_arena->lock);
+		malloc_mutex_lock(tsd, &locked_arena->lock);
 		if ((config_prof || config_stats) && locked_arena == arena) {
 			if (config_prof) {
 				idump = arena_prof_accum_locked(arena,
@@ -212,8 +213,8 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 			if (extent_node_arena_get(&chunk->node) ==
 			    locked_arena) {
-				arena_dalloc_large_junked_locked(locked_arena,
-				    chunk, ptr);
+				arena_dalloc_large_junked_locked(tsd,
+				    locked_arena, chunk, ptr);
 			} else {
 				/*
 				 * This object was allocated via a different
@@ -225,9 +226,9 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 				ndeferred++;
 			}
 		}
-		malloc_mutex_unlock(&locked_arena->lock);
+		malloc_mutex_unlock(tsd, &locked_arena->lock);
 		if (config_prof && idump)
-			prof_idump();
+			prof_idump(tsd);
 		arena_decay_ticks(tsd, locked_arena, nflush - ndeferred);
 	}
 	if (config_stats && !merged_stats) {
@@ -235,12 +236,12 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 		 * The flush loop didn't happen to flush to this thread's
 		 * arena, so the stats didn't get merged.  Manually do so now.
 		 */
-		malloc_mutex_lock(&arena->lock);
+		malloc_mutex_lock(tsd, &arena->lock);
 		arena->stats.nrequests_large += tbin->tstats.nrequests;
 		arena->stats.lstats[binind - NBINS].nrequests +=
 		    tbin->tstats.nrequests;
 		tbin->tstats.nrequests = 0;
-		malloc_mutex_unlock(&arena->lock);
+		malloc_mutex_unlock(tsd, &arena->lock);
 	}
 
 	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
@@ -251,33 +252,34 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 }
 
 void
-tcache_arena_associate(tcache_t *tcache, arena_t *arena)
+tcache_arena_associate(tsd_t *tsd, tcache_t *tcache, arena_t *arena)
 {
 
 	if (config_stats) {
 		/* Link into list of extant tcaches. */
-		malloc_mutex_lock(&arena->lock);
+		malloc_mutex_lock(tsd, &arena->lock);
 		ql_elm_new(tcache, link);
 		ql_tail_insert(&arena->tcache_ql, tcache, link);
-		malloc_mutex_unlock(&arena->lock);
+		malloc_mutex_unlock(tsd, &arena->lock);
 	}
 }
 
 void
-tcache_arena_reassociate(tcache_t *tcache, arena_t *oldarena, arena_t *newarena)
+tcache_arena_reassociate(tsd_t *tsd, tcache_t *tcache, arena_t *oldarena,
+    arena_t *newarena)
 {
 
-	tcache_arena_dissociate(tcache, oldarena);
-	tcache_arena_associate(tcache, newarena);
+	tcache_arena_dissociate(tsd, tcache, oldarena);
+	tcache_arena_associate(tsd, tcache, newarena);
 }
 
 void
-tcache_arena_dissociate(tcache_t *tcache, arena_t *arena)
+tcache_arena_dissociate(tsd_t *tsd, tcache_t *tcache, arena_t *arena)
 {
 
 	if (config_stats) {
 		/* Unlink from list of extant tcaches. */
-		malloc_mutex_lock(&arena->lock);
+		malloc_mutex_lock(tsd, &arena->lock);
 		if (config_debug) {
 			bool in_ql = false;
 			tcache_t *iter;
@@ -290,8 +292,8 @@ tcache_arena_dissociate(tcache_t *tcache, arena_t *arena)
 			assert(in_ql);
 		}
 		ql_remove(&arena->tcache_ql, tcache, link);
-		tcache_stats_merge(tcache, arena);
-		malloc_mutex_unlock(&arena->lock);
+		tcache_stats_merge(tsd, tcache, arena);
+		malloc_mutex_unlock(tsd, &arena->lock);
 	}
 }
 
@@ -327,11 +329,11 @@ tcache_create(tsd_t *tsd, arena_t *arena)
 	size = sa2u(size, CACHELINE);
 
 	tcache = ipallocztm(tsd, size, CACHELINE, true, false, true,
-	    arena_get(0, false));
+	    arena_get(tsd, 0, false));
 	if (tcache == NULL)
 		return (NULL);
 
-	tcache_arena_associate(tcache, arena);
+	tcache_arena_associate(tsd, tcache, arena);
 
 	ticker_init(&tcache->gc_ticker, TCACHE_GC_INCR);
 
@@ -358,7 +360,7 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache)
 	unsigned i;
 
 	arena = arena_choose(tsd, NULL);
-	tcache_arena_dissociate(tcache, arena);
+	tcache_arena_dissociate(tsd, tcache, arena);
 
 	for (i = 0; i < NBINS; i++) {
 		tcache_bin_t *tbin = &tcache->tbins[i];
@@ -366,9 +368,9 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache)
 
 		if (config_stats && tbin->tstats.nrequests != 0) {
 			arena_bin_t *bin = &arena->bins[i];
-			malloc_mutex_lock(&bin->lock);
+			malloc_mutex_lock(tsd, &bin->lock);
 			bin->stats.nrequests += tbin->tstats.nrequests;
-			malloc_mutex_unlock(&bin->lock);
+			malloc_mutex_unlock(tsd, &bin->lock);
 		}
 	}
 
@@ -377,17 +379,17 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache)
 		tcache_bin_flush_large(tsd, tbin, i, 0, tcache);
 
 		if (config_stats && tbin->tstats.nrequests != 0) {
-			malloc_mutex_lock(&arena->lock);
+			malloc_mutex_lock(tsd, &arena->lock);
 			arena->stats.nrequests_large += tbin->tstats.nrequests;
 			arena->stats.lstats[i - NBINS].nrequests +=
 			    tbin->tstats.nrequests;
-			malloc_mutex_unlock(&arena->lock);
+			malloc_mutex_unlock(tsd, &arena->lock);
 		}
 	}
 
 	if (config_prof && tcache->prof_accumbytes > 0 &&
-	    arena_prof_accum(arena, tcache->prof_accumbytes))
-		prof_idump();
+	    arena_prof_accum(tsd, arena, tcache->prof_accumbytes))
+		prof_idump(tsd);
 
 	idalloctm(tsd, tcache, false, true, true);
 }
@@ -413,21 +415,22 @@ tcache_enabled_cleanup(tsd_t *tsd)
 	/* Do nothing. */
 }
 
-/* Caller must own arena->lock. */
 void
-tcache_stats_merge(tcache_t *tcache, arena_t *arena)
+tcache_stats_merge(tsd_t *tsd, tcache_t *tcache, arena_t *arena)
 {
 	unsigned i;
 
 	cassert(config_stats);
 
+	malloc_mutex_assert_owner(tsd, &arena->lock);
+
 	/* Merge and reset tcache stats. */
 	for (i = 0; i < NBINS; i++) {
 		arena_bin_t *bin = &arena->bins[i];
 		tcache_bin_t *tbin = &tcache->tbins[i];
-		malloc_mutex_lock(&bin->lock);
+		malloc_mutex_lock(tsd, &bin->lock);
 		bin->stats.nrequests += tbin->tstats.nrequests;
-		malloc_mutex_unlock(&bin->lock);
+		malloc_mutex_unlock(tsd, &bin->lock);
 		tbin->tstats.nrequests = 0;
 	}
 
@@ -447,7 +450,7 @@ tcaches_create(tsd_t *tsd, unsigned *r_ind)
 	tcaches_t *elm;
 
 	if (tcaches == NULL) {
-		tcaches = base_alloc(sizeof(tcache_t *) *
+		tcaches = base_alloc(tsd, sizeof(tcache_t *) *
 		    (MALLOCX_TCACHE_MAX+1));
 		if (tcaches == NULL)
 			return (true);
@@ -455,7 +458,7 @@ tcaches_create(tsd_t *tsd, unsigned *r_ind)
 
 	if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX)
 		return (true);
-	tcache = tcache_create(tsd, arena_get(0, false));
+	tcache = tcache_create(tsd, arena_get(tsd, 0, false));
 	if (tcache == NULL)
 		return (true);
 
@@ -501,7 +504,7 @@ tcaches_destroy(tsd_t *tsd, unsigned ind)
 }
 
 bool
-tcache_boot(void)
+tcache_boot(tsd_t *tsd)
 {
 	unsigned i;
 
@@ -519,7 +522,7 @@ tcache_boot(void)
 	nhbins = size2index(tcache_maxclass) + 1;
 
 	/* Initialize tcache_bin_info. */
-	tcache_bin_info = (tcache_bin_info_t *)base_alloc(nhbins *
+	tcache_bin_info = (tcache_bin_info_t *)base_alloc(tsd, nhbins *
 	    sizeof(tcache_bin_info_t));
 	if (tcache_bin_info == NULL)
 		return (true);
diff --git a/src/tsd.c b/src/tsd.c
index 34c1573c..38d8bde4 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -106,15 +106,17 @@ MALLOC_TSD
 	}
 }
 
-bool
+tsd_t *
 malloc_tsd_boot0(void)
 {
+	tsd_t *tsd;
 
 	ncleanups = 0;
 	if (tsd_boot0())
-		return (true);
-	*tsd_arenas_tdata_bypassp_get(tsd_fetch()) = true;
-	return (false);
+		return (NULL);
+	tsd = tsd_fetch();
+	*tsd_arenas_tdata_bypassp_get(tsd) = true;
+	return (tsd);
 }
 
 void
@@ -169,10 +171,10 @@ tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block)
 	tsd_init_block_t *iter;
 
 	/* Check whether this thread has already inserted into the list. */
-	malloc_mutex_lock(&head->lock);
+	malloc_mutex_lock(NULL, &head->lock);
 	ql_foreach(iter, &head->blocks, link) {
 		if (iter->thread == self) {
-			malloc_mutex_unlock(&head->lock);
+			malloc_mutex_unlock(NULL, &head->lock);
 			return (iter->data);
 		}
 	}
@@ -180,7 +182,7 @@ tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block)
 	ql_elm_new(block, link);
 	block->thread = self;
 	ql_tail_insert(&head->blocks, block, link);
-	malloc_mutex_unlock(&head->lock);
+	malloc_mutex_unlock(NULL, &head->lock);
 	return (NULL);
 }
 
@@ -188,8 +190,8 @@ void
 tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block)
 {
 
-	malloc_mutex_lock(&head->lock);
+	malloc_mutex_lock(NULL, &head->lock);
 	ql_remove(&head->blocks, block, link);
-	malloc_mutex_unlock(&head->lock);
+	malloc_mutex_unlock(NULL, &head->lock);
 }
 #endif
diff --git a/src/witness.c b/src/witness.c
new file mode 100644
index 00000000..b7b91aca
--- /dev/null
+++ b/src/witness.c
@@ -0,0 +1,206 @@
+#define	JEMALLOC_WITNESS_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+void
+witness_init(witness_t *witness, const char *name, witness_rank_t rank,
+    witness_comp_t *comp)
+{
+
+	witness->name = name;
+	witness->rank = rank;
+	witness->comp = comp;
+}
+
+#ifdef JEMALLOC_JET
+#undef witness_lock_error
+#define	witness_lock_error JEMALLOC_N(witness_lock_error_impl)
+#endif
+static void
+witness_lock_error(const witness_list_t *witnesses, const witness_t *witness)
+{
+	witness_t *w;
+
+	malloc_printf("<jemalloc>: Lock rank order reversal:");
+	ql_foreach(w, witnesses, link) {
+		malloc_printf(" %s(%u)", w->name, w->rank);
+	}
+	malloc_printf(" %s(%u)\n", witness->name, witness->rank);
+	abort();
+}
+#ifdef JEMALLOC_JET
+#undef witness_lock_error
+#define	witness_lock_error JEMALLOC_N(witness_lock_error)
+witness_lock_error_t *witness_lock_error = JEMALLOC_N(witness_lock_error_impl);
+#endif
+
+void
+witness_lock(tsd_t *tsd, witness_t *witness)
+{
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	cassert(config_debug);
+
+	if (tsd == NULL)
+		return;
+	if (witness->rank == WITNESS_RANK_OMIT)
+		return;
+
+	witness_assert_not_owner(tsd, witness);
+
+	witnesses = tsd_witnessesp_get(tsd);
+	w = ql_last(witnesses, link);
+	if (w != NULL && w->rank >= witness->rank && (w->comp == NULL ||
+	    w->comp != witness->comp || w->comp(w, witness) > 0))
+		witness_lock_error(witnesses, witness);
+
+	ql_elm_new(witness, link);
+	ql_tail_insert(witnesses, witness, link);
+}
+
+void
+witness_unlock(tsd_t *tsd, witness_t *witness)
+{
+	witness_list_t *witnesses;
+
+	cassert(config_debug);
+
+	if (tsd == NULL)
+		return;
+	if (witness->rank == WITNESS_RANK_OMIT)
+		return;
+
+	witness_assert_owner(tsd, witness);
+
+	witnesses = tsd_witnessesp_get(tsd);
+	ql_remove(witnesses, witness, link);
+}
+
+#ifdef JEMALLOC_JET
+#undef witness_owner_error
+#define	witness_owner_error JEMALLOC_N(witness_owner_error_impl)
+#endif
+static void
+witness_owner_error(const witness_t *witness)
+{
+
+	malloc_printf("<jemalloc>: Should own %s(%u)\n", witness->name,
+	    witness->rank);
+	abort();
+}
+#ifdef JEMALLOC_JET
+#undef witness_owner_error
+#define	witness_owner_error JEMALLOC_N(witness_owner_error)
+witness_owner_error_t *witness_owner_error =
+    JEMALLOC_N(witness_owner_error_impl);
+#endif
+
+void
+witness_assert_owner(tsd_t *tsd, const witness_t *witness)
+{
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	cassert(config_debug);
+
+	if (tsd == NULL)
+		return;
+	if (witness->rank == WITNESS_RANK_OMIT)
+		return;
+
+	witnesses = tsd_witnessesp_get(tsd);
+	ql_foreach(w, witnesses, link) {
+		if (w == witness)
+			return;
+	}
+	witness_owner_error(witness);
+}
+
+#ifdef JEMALLOC_JET
+#undef witness_not_owner_error
+#define	witness_not_owner_error JEMALLOC_N(witness_not_owner_error_impl)
+#endif
+static void
+witness_not_owner_error(const witness_t *witness)
+{
+
+	malloc_printf("<jemalloc>: Should not own %s(%u)\n", witness->name,
+	    witness->rank);
+	abort();
+}
+#ifdef JEMALLOC_JET
+#undef witness_not_owner_error
+#define	witness_not_owner_error JEMALLOC_N(witness_not_owner_error)
+witness_not_owner_error_t *witness_not_owner_error =
+    JEMALLOC_N(witness_not_owner_error_impl);
+#endif
+
+void
+witness_assert_not_owner(tsd_t *tsd, const witness_t *witness)
+{
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	cassert(config_debug);
+
+	if (tsd == NULL)
+		return;
+	if (witness->rank == WITNESS_RANK_OMIT)
+		return;
+
+	witnesses = tsd_witnessesp_get(tsd);
+	ql_foreach(w, witnesses, link) {
+		if (w == witness)
+			witness_not_owner_error(witness);
+	}
+}
+
+#ifdef JEMALLOC_JET
+#undef witness_lockless_error
+#define	witness_lockless_error JEMALLOC_N(witness_lockless_error_impl)
+#endif
+static void
+witness_lockless_error(const witness_list_t *witnesses)
+{
+	witness_t *w;
+
+	malloc_printf("<jemalloc>: Should not own any locks:");
+	ql_foreach(w, witnesses, link) {
+		malloc_printf(" %s(%u)", w->name, w->rank);
+	}
+	malloc_printf("\n");
+	abort();
+}
+#ifdef JEMALLOC_JET
+#undef witness_lockless_error
+#define	witness_lockless_error JEMALLOC_N(witness_lockless_error)
+witness_lockless_error_t *witness_lockless_error =
+    JEMALLOC_N(witness_lockless_error_impl);
+#endif
+
+void
+witness_assert_lockless(tsd_t *tsd)
+{
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	cassert(config_debug);
+
+	if (tsd == NULL)
+		return;
+
+	witnesses = tsd_witnessesp_get(tsd);
+	w = ql_last(witnesses, link);
+	if (w != NULL) {
+		witness_lockless_error(witnesses);
+	}
+}
+
+void
+witnesses_cleanup(tsd_t *tsd)
+{
+
+	witness_assert_lockless(tsd);
+
+	/* Do nothing. */
+}
diff --git a/src/zone.c b/src/zone.c
index 6859b3fe..8f25051a 100644
--- a/src/zone.c
+++ b/src/zone.c
@@ -56,7 +56,7 @@ zone_size(malloc_zone_t *zone, void *ptr)
 	 * not work in practice, we must check all pointers to assure that they
 	 * reside within a mapped chunk before determining size.
 	 */
-	return (ivsalloc(ptr, config_prof));
+	return (ivsalloc(tsd_fetch(), ptr, config_prof));
 }
 
 static void *
@@ -87,7 +87,7 @@ static void
 zone_free(malloc_zone_t *zone, void *ptr)
 {
 
-	if (ivsalloc(ptr, config_prof) != 0) {
+	if (ivsalloc(tsd_fetch(), ptr, config_prof) != 0) {
 		je_free(ptr);
 		return;
 	}
@@ -99,7 +99,7 @@ static void *
 zone_realloc(malloc_zone_t *zone, void *ptr, size_t size)
 {
 
-	if (ivsalloc(ptr, config_prof) != 0)
+	if (ivsalloc(tsd_fetch(), ptr, config_prof) != 0)
 		return (je_realloc(ptr, size));
 
 	return (realloc(ptr, size));
@@ -123,7 +123,7 @@ zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size)
 {
 	size_t alloc_size;
 
-	alloc_size = ivsalloc(ptr, config_prof);
+	alloc_size = ivsalloc(tsd_fetch(), ptr, config_prof);
 	if (alloc_size != 0) {
 		assert(alloc_size == size);
 		je_free(ptr);
diff --git a/test/unit/junk.c b/test/unit/junk.c
index fecf6fae..e251a124 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -53,10 +53,10 @@ arena_dalloc_junk_large_intercept(void *ptr, size_t usize)
 }
 
 static void
-huge_dalloc_junk_intercept(void *ptr, size_t usize)
+huge_dalloc_junk_intercept(tsd_t *tsd, void *ptr, size_t usize)
 {
 
-	huge_dalloc_junk_orig(ptr, usize);
+	huge_dalloc_junk_orig(tsd, ptr, usize);
 	/*
 	 * The conditions under which junk filling actually occurs are nuanced
 	 * enough that it doesn't make sense to duplicate the decision logic in
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
index 69983e5e..83f51df8 100644
--- a/test/unit/prof_reset.c
+++ b/test/unit/prof_reset.c
@@ -94,7 +94,8 @@ TEST_END
 bool prof_dump_header_intercepted = false;
 prof_cnt_t cnt_all_copy = {0, 0, 0, 0};
 static bool
-prof_dump_header_intercept(bool propagate_err, const prof_cnt_t *cnt_all)
+prof_dump_header_intercept(tsd_t *tsd, bool propagate_err,
+    const prof_cnt_t *cnt_all)
 {
 
 	prof_dump_header_intercepted = true;
diff --git a/test/unit/witness.c b/test/unit/witness.c
new file mode 100644
index 00000000..430d8203
--- /dev/null
+++ b/test/unit/witness.c
@@ -0,0 +1,278 @@
+#include "test/jemalloc_test.h"
+
+static witness_lock_error_t *witness_lock_error_orig;
+static witness_owner_error_t *witness_owner_error_orig;
+static witness_not_owner_error_t *witness_not_owner_error_orig;
+static witness_lockless_error_t *witness_lockless_error_orig;
+
+static bool saw_lock_error;
+static bool saw_owner_error;
+static bool saw_not_owner_error;
+static bool saw_lockless_error;
+
+static void
+witness_lock_error_intercept(const witness_list_t *witnesses,
+    const witness_t *witness)
+{
+
+	saw_lock_error = true;
+}
+
+static void
+witness_owner_error_intercept(const witness_t *witness)
+{
+
+	saw_owner_error = true;
+}
+
+static void
+witness_not_owner_error_intercept(const witness_t *witness)
+{
+
+	saw_not_owner_error = true;
+}
+
+static void
+witness_lockless_error_intercept(const witness_list_t *witnesses)
+{
+
+	saw_lockless_error = true;
+}
+
+static int
+witness_comp(const witness_t *a, const witness_t *b)
+{
+
+	assert_u_eq(a->rank, b->rank, "Witnesses should have equal rank");
+
+	return (strcmp(a->name, b->name));
+}
+
+static int
+witness_comp_reverse(const witness_t *a, const witness_t *b)
+{
+
+	assert_u_eq(a->rank, b->rank, "Witnesses should have equal rank");
+
+	return (-strcmp(a->name, b->name));
+}
+
+TEST_BEGIN(test_witness)
+{
+	witness_t a, b;
+	tsd_t *tsd;
+
+	test_skip_if(!config_debug);
+
+	tsd = tsd_fetch();
+
+	witness_assert_lockless(tsd);
+
+	witness_init(&a, "a", 1, NULL);
+	witness_assert_not_owner(tsd, &a);
+	witness_lock(tsd, &a);
+	witness_assert_owner(tsd, &a);
+
+	witness_init(&b, "b", 2, NULL);
+	witness_assert_not_owner(tsd, &b);
+	witness_lock(tsd, &b);
+	witness_assert_owner(tsd, &b);
+
+	witness_unlock(tsd, &a);
+	witness_unlock(tsd, &b);
+
+	witness_assert_lockless(tsd);
+}
+TEST_END
+
+TEST_BEGIN(test_witness_comp)
+{
+	witness_t a, b, c, d;
+	tsd_t *tsd;
+
+	test_skip_if(!config_debug);
+
+	tsd = tsd_fetch();
+
+	witness_assert_lockless(tsd);
+
+	witness_init(&a, "a", 1, witness_comp);
+	witness_assert_not_owner(tsd, &a);
+	witness_lock(tsd, &a);
+	witness_assert_owner(tsd, &a);
+
+	witness_init(&b, "b", 1, witness_comp);
+	witness_assert_not_owner(tsd, &b);
+	witness_lock(tsd, &b);
+	witness_assert_owner(tsd, &b);
+	witness_unlock(tsd, &b);
+
+	witness_lock_error_orig = witness_lock_error;
+	witness_lock_error = witness_lock_error_intercept;
+	saw_lock_error = false;
+
+	witness_init(&c, "c", 1, witness_comp_reverse);
+	witness_assert_not_owner(tsd, &c);
+	assert_false(saw_lock_error, "Unexpected witness lock error");
+	witness_lock(tsd, &c);
+	assert_true(saw_lock_error, "Expected witness lock error");
+	witness_unlock(tsd, &c);
+
+	saw_lock_error = false;
+
+	witness_init(&d, "d", 1, NULL);
+	witness_assert_not_owner(tsd, &d);
+	assert_false(saw_lock_error, "Unexpected witness lock error");
+	witness_lock(tsd, &d);
+	assert_true(saw_lock_error, "Expected witness lock error");
+	witness_unlock(tsd, &d);
+
+	witness_unlock(tsd, &a);
+
+	witness_assert_lockless(tsd);
+
+	witness_lock_error = witness_lock_error_orig;
+}
+TEST_END
+
+TEST_BEGIN(test_witness_reversal)
+{
+	witness_t a, b;
+	tsd_t *tsd;
+
+	test_skip_if(!config_debug);
+
+	witness_lock_error_orig = witness_lock_error;
+	witness_lock_error = witness_lock_error_intercept;
+	saw_lock_error = false;
+
+	tsd = tsd_fetch();
+
+	witness_assert_lockless(tsd);
+
+	witness_init(&a, "a", 1, NULL);
+	witness_init(&b, "b", 2, NULL);
+
+	witness_lock(tsd, &b);
+	assert_false(saw_lock_error, "Unexpected witness lock error");
+	witness_lock(tsd, &a);
+	assert_true(saw_lock_error, "Expected witness lock error");
+
+	witness_unlock(tsd, &a);
+	witness_unlock(tsd, &b);
+
+	witness_assert_lockless(tsd);
+
+	witness_lock_error = witness_lock_error_orig;
+}
+TEST_END
+
+TEST_BEGIN(test_witness_recursive)
+{
+	witness_t a;
+	tsd_t *tsd;
+
+	test_skip_if(!config_debug);
+
+	witness_not_owner_error_orig = witness_not_owner_error;
+	witness_not_owner_error = witness_not_owner_error_intercept;
+	saw_not_owner_error = false;
+
+	witness_lock_error_orig = witness_lock_error;
+	witness_lock_error = witness_lock_error_intercept;
+	saw_lock_error = false;
+
+	tsd = tsd_fetch();
+
+	witness_assert_lockless(tsd);
+
+	witness_init(&a, "a", 1, NULL);
+
+	witness_lock(tsd, &a);
+	assert_false(saw_lock_error, "Unexpected witness lock error");
+	assert_false(saw_not_owner_error, "Unexpected witness not owner error");
+	witness_lock(tsd, &a);
+	assert_true(saw_lock_error, "Expected witness lock error");
+	assert_true(saw_not_owner_error, "Expected witness not owner error");
+
+	witness_unlock(tsd, &a);
+
+	witness_assert_lockless(tsd);
+
+	witness_owner_error = witness_owner_error_orig;
+	witness_lock_error = witness_lock_error_orig;
+
+}
+TEST_END
+
+TEST_BEGIN(test_witness_unlock_not_owned)
+{
+	witness_t a;
+	tsd_t *tsd;
+
+	test_skip_if(!config_debug);
+
+	witness_owner_error_orig = witness_owner_error;
+	witness_owner_error = witness_owner_error_intercept;
+	saw_owner_error = false;
+
+	tsd = tsd_fetch();
+
+	witness_assert_lockless(tsd);
+
+	witness_init(&a, "a", 1, NULL);
+
+	assert_false(saw_owner_error, "Unexpected owner error");
+	witness_unlock(tsd, &a);
+	assert_true(saw_owner_error, "Expected owner error");
+
+	witness_assert_lockless(tsd);
+
+	witness_owner_error = witness_owner_error_orig;
+}
+TEST_END
+
+TEST_BEGIN(test_witness_lockful)
+{
+	witness_t a;
+	tsd_t *tsd;
+
+	test_skip_if(!config_debug);
+
+	witness_lockless_error_orig = witness_lockless_error;
+	witness_lockless_error = witness_lockless_error_intercept;
+	saw_lockless_error = false;
+
+	tsd = tsd_fetch();
+
+	witness_assert_lockless(tsd);
+
+	witness_init(&a, "a", 1, NULL);
+
+	assert_false(saw_lockless_error, "Unexpected lockless error");
+	witness_assert_lockless(tsd);
+
+	witness_lock(tsd, &a);
+	witness_assert_lockless(tsd);
+	assert_true(saw_lockless_error, "Expected lockless error");
+
+	witness_unlock(tsd, &a);
+
+	witness_assert_lockless(tsd);
+
+	witness_lockless_error = witness_lockless_error_orig;
+}
+TEST_END
+
+int
+main(void)
+{
+
+	return (test(
+	    test_witness,
+	    test_witness_comp,
+	    test_witness_reversal,
+	    test_witness_recursive,
+	    test_witness_unlock_not_owned,
+	    test_witness_lockful));
+}

From a15841cc7d7c60b2c72c091d8048223789f5c4e0 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 14 Apr 2016 02:12:33 -0700
Subject: [PATCH 0193/2608] Fix a compilation error.

Fix a compilation error that occurs if Valgrind is not enabled.  This
regression was caused by b2c0d6322d2307458ae2b28545f8a5c9903d7ef5 (Add
witness, a simple online locking validator.).
---
 include/jemalloc/internal/valgrind.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/valgrind.h b/include/jemalloc/internal/valgrind.h
index 7c6a62fa..2667bf5e 100644
--- a/include/jemalloc/internal/valgrind.h
+++ b/include/jemalloc/internal/valgrind.h
@@ -83,8 +83,8 @@
 #define	JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(ptr, usize) do {} while (0)
 #define	JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ptr, usize) do {} while (0)
 #define	JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ptr, usize) do {} while (0)
-#define	JEMALLOC_VALGRIND_MALLOC(cond, ptr, usize, zero) do {} while (0)
-#define	JEMALLOC_VALGRIND_REALLOC(maybe_moved, ptr, usize,		\
+#define	JEMALLOC_VALGRIND_MALLOC(cond, tsd, ptr, usize, zero) do {} while (0)
+#define	JEMALLOC_VALGRIND_REALLOC(maybe_moved, tsd, ptr, usize,		\
     ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null,	\
     zero) do {} while (0)
 #define	JEMALLOC_VALGRIND_FREE(ptr, rzsize) do {} while (0)

From 22884243250a0f7d412ca745fbf7231d69abe771 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 14 Apr 2016 12:17:56 -0700
Subject: [PATCH 0194/2608] s/MALLOC_MUTEX_RANK_OMIT/WITNESS_RANK_OMIT/

This fixes a compilation error caused by
b2c0d6322d2307458ae2b28545f8a5c9903d7ef5 (Add witness, a simple online
locking validator.).

This resolves #375.
---
 include/jemalloc/internal/mb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/mb.h b/include/jemalloc/internal/mb.h
index de54f508..81129d08 100644
--- a/include/jemalloc/internal/mb.h
+++ b/include/jemalloc/internal/mb.h
@@ -104,7 +104,7 @@ mb_write(void)
 {
 	malloc_mutex_t mtx;
 
-	malloc_mutex_init(&mtx, MALLOC_MUTEX_RANK_OMIT);
+	malloc_mutex_init(&mtx, WITNESS_RANK_OMIT);
 	malloc_mutex_lock(NULL, &mtx);
 	malloc_mutex_unlock(NULL, &mtx);
 }

From d9394d0ca870fc20b48ee7c57c69d48effe51041 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 17 Apr 2016 12:33:39 -0700
Subject: [PATCH 0195/2608] Convert base_mtx locking protocol comments to
 assertions.

---
 src/base.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/src/base.c b/src/base.c
index 87b376b8..901553a1 100644
--- a/src/base.c
+++ b/src/base.c
@@ -13,12 +13,13 @@ static size_t		base_mapped;
 
 /******************************************************************************/
 
-/* base_mtx must be held. */
 static extent_node_t *
-base_node_try_alloc(void)
+base_node_try_alloc(tsd_t *tsd)
 {
 	extent_node_t *node;
 
+	malloc_mutex_assert_owner(tsd, &base_mtx);
+
 	if (base_nodes == NULL)
 		return (NULL);
 	node = base_nodes;
@@ -27,33 +28,34 @@ base_node_try_alloc(void)
 	return (node);
 }
 
-/* base_mtx must be held. */
 static void
-base_node_dalloc(extent_node_t *node)
+base_node_dalloc(tsd_t *tsd, extent_node_t *node)
 {
 
+	malloc_mutex_assert_owner(tsd, &base_mtx);
+
 	JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(node, sizeof(extent_node_t));
 	*(extent_node_t **)node = base_nodes;
 	base_nodes = node;
 }
 
-/* base_mtx must be held. */
 static extent_node_t *
-base_chunk_alloc(size_t minsize)
+base_chunk_alloc(tsd_t *tsd, size_t minsize)
 {
 	extent_node_t *node;
 	size_t csize, nsize;
 	void *addr;
 
+	malloc_mutex_assert_owner(tsd, &base_mtx);
 	assert(minsize != 0);
-	node = base_node_try_alloc();
+	node = base_node_try_alloc(tsd);
 	/* Allocate enough space to also carve a node out if necessary. */
 	nsize = (node == NULL) ? CACHELINE_CEILING(sizeof(extent_node_t)) : 0;
 	csize = CHUNK_CEILING(minsize + nsize);
 	addr = chunk_alloc_base(csize);
 	if (addr == NULL) {
 		if (node != NULL)
-			base_node_dalloc(node);
+			base_node_dalloc(tsd, node);
 		return (NULL);
 	}
 	base_mapped += csize;
@@ -98,7 +100,7 @@ base_alloc(tsd_t *tsd, size_t size)
 		extent_tree_szad_remove(&base_avail_szad, node);
 	} else {
 		/* Try to allocate more space. */
-		node = base_chunk_alloc(csize);
+		node = base_chunk_alloc(tsd, csize);
 	}
 	if (node == NULL) {
 		ret = NULL;
@@ -111,7 +113,7 @@ base_alloc(tsd_t *tsd, size_t size)
 		extent_node_size_set(node, extent_node_size_get(node) - csize);
 		extent_tree_szad_insert(&base_avail_szad, node);
 	} else
-		base_node_dalloc(node);
+		base_node_dalloc(tsd, node);
 	if (config_stats) {
 		base_allocated += csize;
 		/*

From 1b5830178fe73d4018233fea6858fff87d2b19df Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 17 Apr 2016 12:53:48 -0700
Subject: [PATCH 0196/2608] Fix malloc_mutex_[un]lock() to conditionally check
 witness.

Also remove tautological cassert(config_debug) calls.
---
 include/jemalloc/internal/mutex.h |  4 ++--
 src/witness.c                     | 10 ----------
 2 files changed, 2 insertions(+), 12 deletions(-)

diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index 7d19a0f4..581aaf57 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -81,7 +81,8 @@ malloc_mutex_lock(tsd_t *tsd, malloc_mutex_t *mutex)
 {
 
 	if (isthreaded) {
-		witness_assert_not_owner(tsd, &mutex->witness);
+		if (config_debug)
+			witness_assert_not_owner(tsd, &mutex->witness);
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 		AcquireSRWLockExclusive(&mutex->lock);
@@ -103,7 +104,6 @@ malloc_mutex_unlock(tsd_t *tsd, malloc_mutex_t *mutex)
 {
 
 	if (isthreaded) {
-		witness_assert_owner(tsd, &mutex->witness);
 		if (config_debug)
 			witness_unlock(tsd, &mutex->witness);
 #ifdef _WIN32
diff --git a/src/witness.c b/src/witness.c
index b7b91aca..444d200f 100644
--- a/src/witness.c
+++ b/src/witness.c
@@ -39,8 +39,6 @@ witness_lock(tsd_t *tsd, witness_t *witness)
 	witness_list_t *witnesses;
 	witness_t *w;
 
-	cassert(config_debug);
-
 	if (tsd == NULL)
 		return;
 	if (witness->rank == WITNESS_RANK_OMIT)
@@ -63,8 +61,6 @@ witness_unlock(tsd_t *tsd, witness_t *witness)
 {
 	witness_list_t *witnesses;
 
-	cassert(config_debug);
-
 	if (tsd == NULL)
 		return;
 	if (witness->rank == WITNESS_RANK_OMIT)
@@ -101,8 +97,6 @@ witness_assert_owner(tsd_t *tsd, const witness_t *witness)
 	witness_list_t *witnesses;
 	witness_t *w;
 
-	cassert(config_debug);
-
 	if (tsd == NULL)
 		return;
 	if (witness->rank == WITNESS_RANK_OMIT)
@@ -141,8 +135,6 @@ witness_assert_not_owner(tsd_t *tsd, const witness_t *witness)
 	witness_list_t *witnesses;
 	witness_t *w;
 
-	cassert(config_debug);
-
 	if (tsd == NULL)
 		return;
 	if (witness->rank == WITNESS_RANK_OMIT)
@@ -184,8 +176,6 @@ witness_assert_lockless(tsd_t *tsd)
 	witness_list_t *witnesses;
 	witness_t *w;
 
-	cassert(config_debug);
-
 	if (tsd == NULL)
 		return;
 

From 1423ee9016f1e7cb0cf3302207bcc488ce4374fc Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 17 Apr 2016 10:30:25 -0700
Subject: [PATCH 0197/2608] Fix style nits.

---
 include/jemalloc/internal/nstime.h | 2 +-
 src/tsd.c                          | 2 +-
 test/unit/junk_alloc.c             | 2 +-
 test/unit/junk_free.c              | 2 +-
 test/unit/util.c                   | 4 ++--
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h
index dcb4b47f..dc293b73 100644
--- a/include/jemalloc/internal/nstime.h
+++ b/include/jemalloc/internal/nstime.h
@@ -1,7 +1,7 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_TYPES
 
-#define JEMALLOC_CLOCK_GETTIME defined(_POSIX_MONOTONIC_CLOCK) \
+#define	JEMALLOC_CLOCK_GETTIME defined(_POSIX_MONOTONIC_CLOCK) \
     && _POSIX_MONOTONIC_CLOCK >= 0
 
 typedef struct nstime_s nstime_t;
diff --git a/src/tsd.c b/src/tsd.c
index 38d8bde4..aeaa5e18 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -77,7 +77,7 @@ tsd_cleanup(void *arg)
 		/* Do nothing. */
 		break;
 	case tsd_state_nominal:
-#define O(n, t)								\
+#define	O(n, t)								\
 		n##_cleanup(tsd);
 MALLOC_TSD
 #undef O
diff --git a/test/unit/junk_alloc.c b/test/unit/junk_alloc.c
index 8db3331d..a5895b5c 100644
--- a/test/unit/junk_alloc.c
+++ b/test/unit/junk_alloc.c
@@ -1,3 +1,3 @@
-#define JEMALLOC_TEST_JUNK_OPT "junk:alloc"
+#define	JEMALLOC_TEST_JUNK_OPT "junk:alloc"
 #include "junk.c"
 #undef JEMALLOC_TEST_JUNK_OPT
diff --git a/test/unit/junk_free.c b/test/unit/junk_free.c
index 482a61d0..bb5183c9 100644
--- a/test/unit/junk_free.c
+++ b/test/unit/junk_free.c
@@ -1,3 +1,3 @@
-#define JEMALLOC_TEST_JUNK_OPT "junk:free"
+#define	JEMALLOC_TEST_JUNK_OPT "junk:free"
 #include "junk.c"
 #undef JEMALLOC_TEST_JUNK_OPT
diff --git a/test/unit/util.c b/test/unit/util.c
index c4333d53..c958dc0f 100644
--- a/test/unit/util.c
+++ b/test/unit/util.c
@@ -162,11 +162,11 @@ TEST_BEGIN(test_malloc_snprintf_truncated)
 	char buf[BUFLEN];
 	size_t result;
 	size_t len;
-#define TEST(expected_str_untruncated, ...) do {			\
+#define	TEST(expected_str_untruncated, ...) do {			\
 	result = malloc_snprintf(buf, len, __VA_ARGS__);		\
 	assert_d_eq(strncmp(buf, expected_str_untruncated, len-1), 0,	\
 	    "Unexpected string inequality (\"%s\" vs \"%s\")",		\
-	    buf, expected_str_untruncated);		\
+	    buf, expected_str_untruncated);				\
 	assert_zu_eq(result, strlen(expected_str_untruncated),		\
 	    "Unexpected result");					\
 } while (0)

From a0c632c9d5d3fb31189ee85440b52579e37c85c1 Mon Sep 17 00:00:00 2001
From: Rajat Goel <rajatgoel@users.noreply.github.com>
Date: Mon, 18 Apr 2016 11:54:09 -0700
Subject: [PATCH 0198/2608] Update private_symbols.txt

Add 4 missing symbols
---
 include/jemalloc/internal/private_symbols.txt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index be5d30e7..8cd88d29 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -217,6 +217,8 @@ extent_node_size_get
 extent_node_size_set
 extent_node_zeroed_get
 extent_node_zeroed_set
+extent_tree_ad_destroy
+extent_tree_ad_destroy_recurse
 extent_tree_ad_empty
 extent_tree_ad_first
 extent_tree_ad_insert
@@ -234,6 +236,8 @@ extent_tree_ad_reverse_iter
 extent_tree_ad_reverse_iter_recurse
 extent_tree_ad_reverse_iter_start
 extent_tree_ad_search
+extent_tree_szad_destroy
+extent_tree_szad_destroy_recurse
 extent_tree_szad_empty
 extent_tree_szad_first
 extent_tree_szad_insert

From ab0cfe01fa354597d28303952d3b0f87d932f6d6 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 18 Apr 2016 15:11:20 -0700
Subject: [PATCH 0199/2608] Update private_symbols.txt.

Change test-related mangling to simplify symbol filtering.

The following commands can be used to detect missing/obsolete symbol
mangling, with the caveat that the full set of symbols is based on the
union of symbols generated by all configurations, some of which are
platform-specific:

./autogen.sh --enable-debug --enable-prof --enable-lazy-lock
make all tests
nm -a lib/libjemalloc.a src/*.jet.o \
  |grep " [TDBCR] " \
  |awk '{print $3}' \
  |sed -e 's/^\(je_\|jet_\(n_\)\?\)\([a-zA-Z0-9_]*\)/\3/g' \
  |LC_COLLATE=C sort -u \
  |grep -v \
   -e '^\(malloc\|calloc\|posix_memalign\|aligned_alloc\|realloc\|free\)$' \
   -e '^\(m\|r\|x\|s\|d\|sd\|n\)allocx$' \
   -e '^mallctl\(\|nametomib\|bymib\)$' \
   -e '^malloc_\(stats_print\|usable_size\|message\)$' \
   -e '^\(memalign\|valloc\)$' \
   -e '^__\(malloc\|memalign\|realloc\|free\)_hook$' \
   -e '^pthread_create$' \
  > /tmp/private_symbols.txt
---
 include/jemalloc/internal/private_symbols.txt | 39 +++++++++++++++----
 src/arena.c                                   | 24 ++++++------
 src/nstime.c                                  |  4 +-
 3 files changed, 45 insertions(+), 22 deletions(-)

diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 8cd88d29..c8799cba 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -100,12 +100,6 @@ arena_ralloc_junk_large
 arena_ralloc_no_move
 arena_rd_to_miscelm
 arena_redzone_corruption
-arena_run_heap_empty
-arena_run_heap_first
-arena_run_heap_insert
-arena_run_heap_new
-arena_run_heap_remove_first
-arena_run_heap_remove
 arena_run_regind
 arena_run_to_miscelm
 arena_salloc
@@ -132,6 +126,11 @@ atomic_sub_u
 atomic_sub_uint32
 atomic_sub_uint64
 atomic_sub_z
+atomic_write_p
+atomic_write_u
+atomic_write_uint32
+atomic_write_uint64
+atomic_write_z
 base_alloc
 base_boot
 base_postfork_child
@@ -207,6 +206,8 @@ extent_node_addr_get
 extent_node_addr_set
 extent_node_arena_get
 extent_node_arena_set
+extent_node_committed_get
+extent_node_committed_set
 extent_node_dirty_insert
 extent_node_dirty_linkage_init
 extent_node_dirty_remove
@@ -315,12 +316,13 @@ jemalloc_postfork_parent
 jemalloc_prefork
 large_maxclass
 lg_floor
+lg_prof_sample
 malloc_cprintf
-malloc_mutex_init
-malloc_mutex_lock
 malloc_mutex_assert_not_owner
 malloc_mutex_assert_owner
 malloc_mutex_boot
+malloc_mutex_init
+malloc_mutex_lock
 malloc_mutex_postfork_child
 malloc_mutex_postfork_parent
 malloc_mutex_prefork
@@ -344,6 +346,8 @@ narenas_tdata_cleanup
 narenas_total_get
 ncpus
 nhbins
+nhclasses
+nlclasses
 nstime_add
 nstime_compare
 nstime_copy
@@ -397,6 +401,7 @@ pow2_ceil_u64
 pow2_ceil_zu
 prng_lg_range
 prng_range
+prof_active
 prof_active_get
 prof_active_get_unlocked
 prof_active_set
@@ -406,6 +411,7 @@ prof_backtrace
 prof_boot0
 prof_boot1
 prof_boot2
+prof_bt_count
 prof_dump_header
 prof_dump_open
 prof_free
@@ -432,6 +438,7 @@ prof_tctx_get
 prof_tctx_reset
 prof_tctx_set
 prof_tdata_cleanup
+prof_tdata_count
 prof_tdata_get
 prof_tdata_init
 prof_tdata_reinit
@@ -519,6 +526,13 @@ ticker_tick
 ticker_ticks
 tsd_arena_get
 tsd_arena_set
+tsd_arenap_get
+tsd_arenas_tdata_bypass_get
+tsd_arenas_tdata_bypass_set
+tsd_arenas_tdata_bypassp_get
+tsd_arenas_tdata_get
+tsd_arenas_tdata_set
+tsd_arenas_tdatap_get
 tsd_boot
 tsd_boot0
 tsd_boot1
@@ -536,19 +550,28 @@ tsd_init_head
 tsd_nominal
 tsd_prof_tdata_get
 tsd_prof_tdata_set
+tsd_prof_tdatap_get
 tsd_quarantine_get
 tsd_quarantine_set
+tsd_quarantinep_get
 tsd_set
 tsd_tcache_enabled_get
 tsd_tcache_enabled_set
+tsd_tcache_enabledp_get
 tsd_tcache_get
 tsd_tcache_set
+tsd_tcachep_get
 tsd_thread_allocated_get
 tsd_thread_allocated_set
+tsd_thread_allocatedp_get
 tsd_thread_deallocated_get
 tsd_thread_deallocated_set
+tsd_thread_deallocatedp_get
 tsd_tls
 tsd_tsd
+tsd_witnesses_get
+tsd_witnesses_set
+tsd_witnessesp_get
 u2rz
 valgrind_freelike_block
 valgrind_make_mem_defined
diff --git a/src/arena.c b/src/arena.c
index cc648e31..48fa93cb 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -156,7 +156,7 @@ run_quantize_ceil_compute(size_t size)
 
 #ifdef JEMALLOC_JET
 #undef run_quantize_floor
-#define	run_quantize_floor JEMALLOC_N(run_quantize_floor_impl)
+#define	run_quantize_floor JEMALLOC_N(n_run_quantize_floor)
 #endif
 static size_t
 run_quantize_floor(size_t size)
@@ -174,12 +174,12 @@ run_quantize_floor(size_t size)
 #ifdef JEMALLOC_JET
 #undef run_quantize_floor
 #define	run_quantize_floor JEMALLOC_N(run_quantize_floor)
-run_quantize_t *run_quantize_floor = JEMALLOC_N(run_quantize_floor_impl);
+run_quantize_t *run_quantize_floor = JEMALLOC_N(n_run_quantize_floor);
 #endif
 
 #ifdef JEMALLOC_JET
 #undef run_quantize_ceil
-#define	run_quantize_ceil JEMALLOC_N(run_quantize_ceil_impl)
+#define	run_quantize_ceil JEMALLOC_N(n_run_quantize_ceil)
 #endif
 static size_t
 run_quantize_ceil(size_t size)
@@ -197,7 +197,7 @@ run_quantize_ceil(size_t size)
 #ifdef JEMALLOC_JET
 #undef run_quantize_ceil
 #define	run_quantize_ceil JEMALLOC_N(run_quantize_ceil)
-run_quantize_t *run_quantize_ceil = JEMALLOC_N(run_quantize_ceil_impl);
+run_quantize_t *run_quantize_ceil = JEMALLOC_N(n_run_quantize_ceil);
 #endif
 
 static arena_run_heap_t *
@@ -2271,7 +2271,7 @@ arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, bool zero)
 
 #ifdef JEMALLOC_JET
 #undef arena_redzone_corruption
-#define	arena_redzone_corruption JEMALLOC_N(arena_redzone_corruption_impl)
+#define	arena_redzone_corruption JEMALLOC_N(n_arena_redzone_corruption)
 #endif
 static void
 arena_redzone_corruption(void *ptr, size_t usize, bool after,
@@ -2286,7 +2286,7 @@ arena_redzone_corruption(void *ptr, size_t usize, bool after,
 #undef arena_redzone_corruption
 #define	arena_redzone_corruption JEMALLOC_N(arena_redzone_corruption)
 arena_redzone_corruption_t *arena_redzone_corruption =
-    JEMALLOC_N(arena_redzone_corruption_impl);
+    JEMALLOC_N(n_arena_redzone_corruption);
 #endif
 
 static void
@@ -2327,7 +2327,7 @@ arena_redzones_validate(void *ptr, arena_bin_info_t *bin_info, bool reset)
 
 #ifdef JEMALLOC_JET
 #undef arena_dalloc_junk_small
-#define	arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small_impl)
+#define	arena_dalloc_junk_small JEMALLOC_N(n_arena_dalloc_junk_small)
 #endif
 void
 arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info)
@@ -2342,7 +2342,7 @@ arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info)
 #undef arena_dalloc_junk_small
 #define	arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small)
 arena_dalloc_junk_small_t *arena_dalloc_junk_small =
-    JEMALLOC_N(arena_dalloc_junk_small_impl);
+    JEMALLOC_N(n_arena_dalloc_junk_small);
 #endif
 
 void
@@ -2780,7 +2780,7 @@ arena_dalloc_small(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk, void *ptr,
 
 #ifdef JEMALLOC_JET
 #undef arena_dalloc_junk_large
-#define	arena_dalloc_junk_large JEMALLOC_N(arena_dalloc_junk_large_impl)
+#define	arena_dalloc_junk_large JEMALLOC_N(n_arena_dalloc_junk_large)
 #endif
 void
 arena_dalloc_junk_large(void *ptr, size_t usize)
@@ -2793,7 +2793,7 @@ arena_dalloc_junk_large(void *ptr, size_t usize)
 #undef arena_dalloc_junk_large
 #define	arena_dalloc_junk_large JEMALLOC_N(arena_dalloc_junk_large)
 arena_dalloc_junk_large_t *arena_dalloc_junk_large =
-    JEMALLOC_N(arena_dalloc_junk_large_impl);
+    JEMALLOC_N(n_arena_dalloc_junk_large);
 #endif
 
 static void
@@ -2981,7 +2981,7 @@ label_fail:
 
 #ifdef JEMALLOC_JET
 #undef arena_ralloc_junk_large
-#define	arena_ralloc_junk_large JEMALLOC_N(arena_ralloc_junk_large_impl)
+#define	arena_ralloc_junk_large JEMALLOC_N(n_arena_ralloc_junk_large)
 #endif
 static void
 arena_ralloc_junk_large(void *ptr, size_t old_usize, size_t usize)
@@ -2996,7 +2996,7 @@ arena_ralloc_junk_large(void *ptr, size_t old_usize, size_t usize)
 #undef arena_ralloc_junk_large
 #define	arena_ralloc_junk_large JEMALLOC_N(arena_ralloc_junk_large)
 arena_ralloc_junk_large_t *arena_ralloc_junk_large =
-    JEMALLOC_N(arena_ralloc_junk_large_impl);
+    JEMALLOC_N(n_arena_ralloc_junk_large);
 #endif
 
 /*
diff --git a/src/nstime.c b/src/nstime.c
index 4cf90b58..26e49dc5 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -99,7 +99,7 @@ nstime_divide(const nstime_t *time, const nstime_t *divisor)
 
 #ifdef JEMALLOC_JET
 #undef nstime_update
-#define	nstime_update JEMALLOC_N(nstime_update_impl)
+#define	nstime_update JEMALLOC_N(n_nstime_update)
 #endif
 bool
 nstime_update(nstime_t *time)
@@ -144,5 +144,5 @@ nstime_update(nstime_t *time)
 #ifdef JEMALLOC_JET
 #undef nstime_update
 #define	nstime_update JEMALLOC_N(nstime_update)
-nstime_update_t *nstime_update = JEMALLOC_N(nstime_update_impl);
+nstime_update_t *nstime_update = JEMALLOC_N(n_nstime_update);
 #endif

From b6e07d2389d97c98c353fea4f04ec68a7d5a13a0 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 18 Apr 2016 15:42:09 -0700
Subject: [PATCH 0200/2608] Fix malloc_mutex_assert_[not_]owner() for
 --enable-lazy-lock case.

---
 include/jemalloc/internal/mutex.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index 581aaf57..5ddae11c 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -124,7 +124,7 @@ JEMALLOC_INLINE void
 malloc_mutex_assert_owner(tsd_t *tsd, malloc_mutex_t *mutex)
 {
 
-	if (config_debug)
+	if (isthreaded && config_debug)
 		witness_assert_owner(tsd, &mutex->witness);
 }
 
@@ -132,7 +132,7 @@ JEMALLOC_INLINE void
 malloc_mutex_assert_not_owner(tsd_t *tsd, malloc_mutex_t *mutex)
 {
 
-	if (config_debug)
+	if (isthreaded && config_debug)
 		witness_assert_not_owner(tsd, &mutex->witness);
 }
 #endif

From eb68842dafb6d86e8310b1db9ca0467d577091b1 Mon Sep 17 00:00:00 2001
From: hitstergtd <hitstergtd@users.noreply.github.com>
Date: Mon, 18 Apr 2016 11:08:39 +0100
Subject: [PATCH 0201/2608] Doc typo fixes.

---
 doc/jemalloc.xml.in | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 63088cd1..9814c226 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1024,7 +1024,7 @@ for (i = 0; i < nbins; i++) {
         allocate memory during application initialization and then deadlock
         internally when jemalloc in turn calls
         <function>atexit<parameter/></function>, so this option is not
-        univerally usable (though the application can register its own
+        universally usable (though the application can register its own
         <function>atexit<parameter/></function> function with equivalent
         functionality).  Therefore, this option should only be used with care;
         it is primarily intended as a performance tuning aid during application
@@ -1328,7 +1328,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         option.  Note that <function>atexit<parameter/></function> may allocate
         memory during application initialization and then deadlock internally
         when jemalloc in turn calls <function>atexit<parameter/></function>, so
-        this option is not univerally usable (though the application can
+        this option is not universally usable (though the application can
         register its own <function>atexit<parameter/></function> function with
         equivalent functionality).  This option is disabled by
         default.</para></listitem>
@@ -2070,7 +2070,7 @@ typedef struct {
           [<option>--enable-prof</option>]
         </term>
         <listitem><para>Average number of bytes allocated between
-        inverval-based profile dumps.  See the
+        interval-based profile dumps.  See the
         <link
         linkend="opt.lg_prof_interval"><mallctl>opt.lg_prof_interval</mallctl></link>
         option for additional information.</para></listitem>

From c9a4bf91702b351e73e2cd7cf9125afd076d59fe Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 22 Apr 2016 14:36:48 -0700
Subject: [PATCH 0202/2608] Reduce a variable scope.

---
 src/arena.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 48fa93cb..15023cf9 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -3320,7 +3320,6 @@ arena_new(tsd_t *tsd, unsigned ind)
 	arena_t *arena;
 	size_t arena_size;
 	unsigned i;
-	arena_bin_t *bin;
 
 	/* Compute arena size to incorporate sufficient runs_avail elements. */
 	arena_size = offsetof(arena_t, runs_avail) + (sizeof(arena_run_heap_t) *
@@ -3411,7 +3410,7 @@ arena_new(tsd_t *tsd, unsigned ind)
 
 	/* Initialize bins. */
 	for (i = 0; i < NBINS; i++) {
-		bin = &arena->bins[i];
+		arena_bin_t *bin = &arena->bins[i];
 		if (malloc_mutex_init(&bin->lock, "arena_bin",
 		    WITNESS_RANK_ARENA_BIN))
 			return (NULL);

From 66cd953514a18477eb49732e40d5c2ab5f1b12c5 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 22 Apr 2016 14:34:14 -0700
Subject: [PATCH 0203/2608] Do not allocate metadata via non-auto arenas, nor
 tcaches.

This assures that all internally allocated metadata come from the
first opt_narenas arenas, i.e. the automatically multiplexed arenas.
---
 include/jemalloc/internal/arena.h             |  28 +++--
 include/jemalloc/internal/huge.h              |   7 +-
 .../jemalloc/internal/jemalloc_internal.h.in  |  21 +++-
 include/jemalloc/internal/private_symbols.txt |   2 +
 include/jemalloc/internal/tcache.h            |  10 +-
 include/jemalloc/internal/tsd.h               |   2 +
 src/arena.c                                   |  29 +++--
 src/ckh.c                                     |  16 +--
 src/ctl.c                                     |   2 +-
 src/huge.c                                    |  28 ++---
 src/jemalloc.c                                | 112 ++++++++++++------
 src/prof.c                                    |  42 +++----
 src/quarantine.c                              |   8 +-
 src/tcache.c                                  |  20 ++--
 14 files changed, 192 insertions(+), 135 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 2130e9a0..103a4c91 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -290,10 +290,18 @@ struct arena_s {
 	unsigned		ind;
 
 	/*
-	 * Number of threads currently assigned to this arena.  This field is
-	 * synchronized via atomic operations.
+	 * Number of threads currently assigned to this arena, synchronized via
+	 * atomic operations.  Each thread has two distinct assignments, one for
+	 * application-serving allocation, and the other for internal metadata
+	 * allocation.  Internal metadata must not be allocated from arenas
+	 * created via the arenas.extend mallctl, because the arena.<i>.reset
+	 * mallctl indiscriminately discards all allocations for the affected
+	 * arena.
+	 *
+	 *   0: Application allocation.
+	 *   1: Internal metadata allocation.
 	 */
-	unsigned		nthreads;
+	unsigned		nthreads[2];
 
 	/*
 	 * There are three classes of arena operations from a locking
@@ -541,7 +549,7 @@ void	arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info);
 void	arena_quarantine_junk_small(void *ptr, size_t usize);
 void	*arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t ind, bool zero);
 void	*arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
-    bool zero, tcache_t *tcache);
+    bool zero);
 void	*arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache);
 void	arena_prof_promoted(tsd_t *tsd, const void *ptr, size_t size);
@@ -583,9 +591,9 @@ void	arena_stats_merge(tsd_t *tsd, arena_t *arena, unsigned *nthreads,
     size_t *nactive, size_t *ndirty, arena_stats_t *astats,
     malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats,
     malloc_huge_stats_t *hstats);
-unsigned	arena_nthreads_get(arena_t *arena);
-void	arena_nthreads_inc(arena_t *arena);
-void	arena_nthreads_dec(arena_t *arena);
+unsigned	arena_nthreads_get(arena_t *arena, bool internal);
+void	arena_nthreads_inc(arena_t *arena, bool internal);
+void	arena_nthreads_dec(arena_t *arena, bool internal);
 arena_t	*arena_new(tsd_t *tsd, unsigned ind);
 bool	arena_boot(void);
 void	arena_prefork(tsd_t *tsd, arena_t *arena);
@@ -1320,7 +1328,7 @@ arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind, bool zero,
 		assert(size > tcache_maxclass);
 	}
 
-	return (arena_malloc_hard(tsd, arena, size, ind, zero, tcache));
+	return (arena_malloc_hard(tsd, arena, size, ind, zero));
 }
 
 JEMALLOC_ALWAYS_INLINE arena_t *
@@ -1426,7 +1434,7 @@ arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 			}
 		}
 	} else
-		huge_dalloc(tsd, ptr, tcache);
+		huge_dalloc(tsd, ptr);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -1477,7 +1485,7 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
 			}
 		}
 	} else
-		huge_dalloc(tsd, ptr, tcache);
+		huge_dalloc(tsd, ptr);
 }
 #  endif /* JEMALLOC_ARENA_INLINE_B */
 #endif
diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h
index f19d3368..9de2055d 100644
--- a/include/jemalloc/internal/huge.h
+++ b/include/jemalloc/internal/huge.h
@@ -9,10 +9,9 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-void	*huge_malloc(tsd_t *tsd, arena_t *arena, size_t usize, bool zero,
-    tcache_t *tcache);
+void	*huge_malloc(tsd_t *tsd, arena_t *arena, size_t usize, bool zero);
 void	*huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
-    bool zero, tcache_t *tcache);
+    bool zero);
 bool	huge_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize,
     size_t usize_min, size_t usize_max, bool zero);
 void	*huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
@@ -21,7 +20,7 @@ void	*huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
 typedef void (huge_dalloc_junk_t)(tsd_t *, void *, size_t);
 extern huge_dalloc_junk_t *huge_dalloc_junk;
 #endif
-void	huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache);
+void	huge_dalloc(tsd_t *tsd, void *ptr);
 arena_t	*huge_aalloc(const void *ptr);
 size_t	huge_salloc(tsd_t *tsd, const void *ptr);
 prof_tctx_t	*huge_prof_tctx_get(tsd_t *tsd, const void *ptr);
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index ddceabca..fe58c1c6 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -443,6 +443,9 @@ extern bool	in_valgrind;
 /* Number of CPUs. */
 extern unsigned	ncpus;
 
+/* Number of arenas used for automatic multiplexing of threads and arenas. */
+extern unsigned	narenas_auto;
+
 /*
  * Arenas that are used to service external requests.  Not all elements of the
  * arenas array are necessarily used; arenas are created lazily as needed.
@@ -469,10 +472,11 @@ void	bootstrap_free(void *ptr);
 unsigned	narenas_total_get(void);
 arena_t	*arena_init(tsd_t *tsd, unsigned ind);
 arena_tdata_t	*arena_tdata_get_hard(tsd_t *tsd, unsigned ind);
-arena_t	*arena_choose_hard(tsd_t *tsd);
+arena_t	*arena_choose_hard(tsd_t *tsd, bool internal);
 void	arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind);
 void	thread_allocated_cleanup(tsd_t *tsd);
 void	thread_deallocated_cleanup(tsd_t *tsd);
+void	iarena_cleanup(tsd_t *tsd);
 void	arena_cleanup(tsd_t *tsd);
 void	arenas_tdata_cleanup(tsd_t *tsd);
 void	narenas_tdata_cleanup(tsd_t *tsd);
@@ -546,7 +550,7 @@ size_t	s2u_compute(size_t size);
 size_t	s2u_lookup(size_t size);
 size_t	s2u(size_t size);
 size_t	sa2u(size_t size, size_t alignment);
-arena_t	*arena_choose(tsd_t *tsd, arena_t *arena);
+arena_t	*arena_choose(tsd_t *tsd, arena_t *arena, bool internal);
 arena_tdata_t	*arena_tdata_get(tsd_t *tsd, unsigned ind,
     bool refresh_if_missing);
 arena_t	*arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing);
@@ -784,15 +788,16 @@ sa2u(size_t size, size_t alignment)
 
 /* Choose an arena based on a per-thread value. */
 JEMALLOC_INLINE arena_t *
-arena_choose(tsd_t *tsd, arena_t *arena)
+arena_choose(tsd_t *tsd, arena_t *arena, bool internal)
 {
 	arena_t *ret;
 
 	if (arena != NULL)
 		return (arena);
 
-	if (unlikely((ret = tsd_arena_get(tsd)) == NULL))
-		ret = arena_choose_hard(tsd);
+	ret = internal ? tsd_iarena_get(tsd) : tsd_arena_get(tsd);
+	if (unlikely(ret == NULL))
+		ret = arena_choose_hard(tsd, internal);
 
 	return (ret);
 }
@@ -935,6 +940,8 @@ iallocztm(tsd_t *tsd, size_t size, szind_t ind, bool zero, tcache_t *tcache,
 	void *ret;
 
 	assert(size != 0);
+	assert(!is_metadata || tcache == NULL);
+	assert(!is_metadata || arena == NULL || arena->ind < narenas_auto);
 
 	ret = arena_malloc(tsd, arena, size, ind, zero, tcache, slow_path);
 	if (config_stats && is_metadata && likely(ret != NULL)) {
@@ -982,6 +989,8 @@ ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
 
 	assert(usize != 0);
 	assert(usize == sa2u(usize, alignment));
+	assert(!is_metadata || tcache == NULL);
+	assert(!is_metadata || arena == NULL || arena->ind < narenas_auto);
 
 	ret = arena_palloc(tsd, arena, usize, alignment, zero, tcache);
 	assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret);
@@ -1052,6 +1061,8 @@ idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata,
 {
 
 	assert(ptr != NULL);
+	assert(!is_metadata || tcache == NULL);
+	assert(!is_metadata || iaalloc(ptr)->ind < narenas_auto);
 	if (config_stats && is_metadata) {
 		arena_metadata_allocated_sub(iaalloc(ptr), isalloc(tsd, ptr,
 		    config_prof));
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index c8799cba..eacc7c62 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -286,6 +286,7 @@ huge_ralloc_no_move
 huge_salloc
 iaalloc
 iallocztm
+iarena_cleanup
 icalloc
 icalloct
 idalloc
@@ -342,6 +343,7 @@ malloc_write
 map_bias
 map_misc_offset
 mb_write
+narenas_auto
 narenas_tdata_cleanup
 narenas_total_get
 ncpus
diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h
index 1aa64631..82724304 100644
--- a/include/jemalloc/internal/tcache.h
+++ b/include/jemalloc/internal/tcache.h
@@ -293,7 +293,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
 		bool tcache_hard_success;
-		arena = arena_choose(tsd, arena);
+		arena = arena_choose(tsd, arena, false);
 		if (unlikely(arena == NULL))
 			return (NULL);
 
@@ -354,7 +354,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 		 * Only allocate one large object at a time, because it's quite
 		 * expensive to create one and not use it.
 		 */
-		arena = arena_choose(tsd, arena);
+		arena = arena_choose(tsd, arena, false);
 		if (unlikely(arena == NULL))
 			return (NULL);
 
@@ -459,8 +459,10 @@ JEMALLOC_ALWAYS_INLINE tcache_t *
 tcaches_get(tsd_t *tsd, unsigned ind)
 {
 	tcaches_t *elm = &tcaches[ind];
-	if (unlikely(elm->tcache == NULL))
-		elm->tcache = tcache_create(tsd, arena_choose(tsd, NULL));
+	if (unlikely(elm->tcache == NULL)) {
+		elm->tcache = tcache_create(tsd, arena_choose(tsd, NULL,
+		    false));
+	}
 	return (elm->tcache);
 }
 #endif
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index b23b3b4c..1a1b5c32 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -536,6 +536,7 @@ struct tsd_init_head_s {
     O(thread_allocated,		uint64_t)				\
     O(thread_deallocated,	uint64_t)				\
     O(prof_tdata,		prof_tdata_t *)				\
+    O(iarena,			arena_t *)				\
     O(arena,			arena_t *)				\
     O(arenas_tdata,		arena_tdata_t *)			\
     O(narenas_tdata,		unsigned)				\
@@ -552,6 +553,7 @@ struct tsd_init_head_s {
     NULL,								\
     NULL,								\
     NULL,								\
+    NULL,								\
     0,									\
     false,								\
     tcache_enabled_default,						\
diff --git a/src/arena.c b/src/arena.c
index 15023cf9..0da832e2 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2478,10 +2478,10 @@ arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
 
 void *
 arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
-    bool zero, tcache_t *tcache)
+    bool zero)
 {
 
-	arena = arena_choose(tsd, arena);
+	arena = arena_choose(tsd, arena, false);
 	if (unlikely(arena == NULL))
 		return (NULL);
 
@@ -2489,7 +2489,7 @@ arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
 		return (arena_malloc_small(tsd, arena, ind, zero));
 	if (likely(size <= large_maxclass))
 		return (arena_malloc_large(tsd, arena, ind, zero));
-	return (huge_malloc(tsd, arena, index2size(ind), zero, tcache));
+	return (huge_malloc(tsd, arena, index2size(ind), zero));
 }
 
 /* Only handles large allocations that require more than page alignment. */
@@ -2506,7 +2506,7 @@ arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 
 	assert(usize == PAGE_CEILING(usize));
 
-	arena = arena_choose(tsd, arena);
+	arena = arena_choose(tsd, arena, false);
 	if (unlikely(arena == NULL))
 		return (NULL);
 
@@ -2606,10 +2606,9 @@ arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 			ret = arena_palloc_large(tsd, arena, usize, alignment,
 			    zero);
 		} else if (likely(alignment <= chunksize))
-			ret = huge_malloc(tsd, arena, usize, zero, tcache);
+			ret = huge_malloc(tsd, arena, usize, zero);
 		else {
-			ret = huge_palloc(tsd, arena, usize, alignment, zero,
-			    tcache);
+			ret = huge_palloc(tsd, arena, usize, alignment, zero);
 		}
 	}
 	return (ret);
@@ -3211,7 +3210,7 @@ arena_basic_stats_merge_locked(arena_t *arena, unsigned *nthreads,
     size_t *nactive, size_t *ndirty)
 {
 
-	*nthreads += arena_nthreads_get(arena);
+	*nthreads += arena_nthreads_get(arena, false);
 	*dss = dss_prec_names[arena->dss_prec];
 	*lg_dirty_mult = arena->lg_dirty_mult;
 	*decay_time = arena->decay_time;
@@ -3294,24 +3293,24 @@ arena_stats_merge(tsd_t *tsd, arena_t *arena, unsigned *nthreads,
 }
 
 unsigned
-arena_nthreads_get(arena_t *arena)
+arena_nthreads_get(arena_t *arena, bool internal)
 {
 
-	return (atomic_read_u(&arena->nthreads));
+	return (atomic_read_u(&arena->nthreads[internal]));
 }
 
 void
-arena_nthreads_inc(arena_t *arena)
+arena_nthreads_inc(arena_t *arena, bool internal)
 {
 
-	atomic_add_u(&arena->nthreads, 1);
+	atomic_add_u(&arena->nthreads[internal], 1);
 }
 
 void
-arena_nthreads_dec(arena_t *arena)
+arena_nthreads_dec(arena_t *arena, bool internal)
 {
 
-	atomic_sub_u(&arena->nthreads, 1);
+	atomic_sub_u(&arena->nthreads[internal], 1);
 }
 
 arena_t *
@@ -3338,7 +3337,7 @@ arena_new(tsd_t *tsd, unsigned ind)
 		return (NULL);
 
 	arena->ind = ind;
-	arena->nthreads = 0;
+	arena->nthreads[0] = arena->nthreads[1] = 0;
 	if (malloc_mutex_init(&arena->lock, "arena", WITNESS_RANK_ARENA))
 		return (NULL);
 
diff --git a/src/ckh.c b/src/ckh.c
index 07b49dd2..aa9803e8 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -271,7 +271,7 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh)
 			goto label_return;
 		}
 		tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL,
-		    true, NULL);
+		    true, arena_choose(tsd, NULL, true));
 		if (tab == NULL) {
 			ret = true;
 			goto label_return;
@@ -283,12 +283,12 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh)
 		ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
 
 		if (!ckh_rebuild(ckh, tab)) {
-			idalloctm(tsd, tab, tcache_get(tsd, false), true, true);
+			idalloctm(tsd, tab, NULL, true, true);
 			break;
 		}
 
 		/* Rebuilding failed, so back out partially rebuilt table. */
-		idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true, true);
+		idalloctm(tsd, ckh->tab, NULL, true, true);
 		ckh->tab = tab;
 		ckh->lg_curbuckets = lg_prevbuckets;
 	}
@@ -315,7 +315,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh)
 	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
 		return;
 	tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, true,
-	    NULL);
+	    arena_choose(tsd, NULL, true));
 	if (tab == NULL) {
 		/*
 		 * An OOM error isn't worth propagating, since it doesn't
@@ -330,7 +330,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh)
 	ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
 
 	if (!ckh_rebuild(ckh, tab)) {
-		idalloctm(tsd, tab, tcache_get(tsd, false), true, true);
+		idalloctm(tsd, tab, NULL, true, true);
 #ifdef CKH_COUNT
 		ckh->nshrinks++;
 #endif
@@ -338,7 +338,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh)
 	}
 
 	/* Rebuilding failed, so back out partially rebuilt table. */
-	idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true, true);
+	idalloctm(tsd, ckh->tab, NULL, true, true);
 	ckh->tab = tab;
 	ckh->lg_curbuckets = lg_prevbuckets;
 #ifdef CKH_COUNT
@@ -392,7 +392,7 @@ ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
 		goto label_return;
 	}
 	ckh->tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, true,
-	    NULL);
+	    arena_choose(tsd, NULL, true));
 	if (ckh->tab == NULL) {
 		ret = true;
 		goto label_return;
@@ -421,7 +421,7 @@ ckh_delete(tsd_t *tsd, ckh_t *ckh)
 	    (unsigned long long)ckh->nrelocs);
 #endif
 
-	idalloctm(tsd, ckh->tab, tcache_get(tsd, false), true, true);
+	idalloctm(tsd, ckh->tab, NULL, true, true);
 	if (config_debug)
 		memset(ckh, JEMALLOC_FREE_JUNK, sizeof(ckh_t));
 }
diff --git a/src/ctl.c b/src/ctl.c
index 50faee7b..fad2fdd7 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1304,7 +1304,7 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	arena_t *oldarena;
 	unsigned newind, oldind;
 
-	oldarena = arena_choose(tsd, NULL);
+	oldarena = arena_choose(tsd, NULL, false);
 	if (oldarena == NULL)
 		return (EAGAIN);
 
diff --git a/src/huge.c b/src/huge.c
index 3a802dee..bac2425f 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -31,18 +31,17 @@ huge_node_unset(const void *ptr, const extent_node_t *node)
 }
 
 void *
-huge_malloc(tsd_t *tsd, arena_t *arena, size_t usize, bool zero,
-    tcache_t *tcache)
+huge_malloc(tsd_t *tsd, arena_t *arena, size_t usize, bool zero)
 {
 
 	assert(usize == s2u(usize));
 
-	return (huge_palloc(tsd, arena, usize, chunksize, zero, tcache));
+	return (huge_palloc(tsd, arena, usize, chunksize, zero));
 }
 
 void *
 huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
-    bool zero, tcache_t *tcache)
+    bool zero)
 {
 	void *ret;
 	size_t ausize;
@@ -58,7 +57,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 
 	/* Allocate an extent node with which to track the chunk. */
 	node = ipallocztm(tsd, CACHELINE_CEILING(sizeof(extent_node_t)),
-	    CACHELINE, false, tcache, true, arena);
+	    CACHELINE, false, NULL, true, arena_choose(tsd, NULL, true));
 	if (node == NULL)
 		return (NULL);
 
@@ -67,10 +66,10 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 	 * it is possible to make correct junk/zero fill decisions below.
 	 */
 	is_zeroed = zero;
-	arena = arena_choose(tsd, arena);
+	arena = arena_choose(tsd, arena, false);
 	if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(tsd, arena,
 	    usize, alignment, &is_zeroed)) == NULL) {
-		idalloctm(tsd, node, tcache, true, true);
+		idalloctm(tsd, node, NULL, true, true);
 		return (NULL);
 	}
 
@@ -78,7 +77,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 
 	if (huge_node_set(tsd, ret, node)) {
 		arena_chunk_dalloc_huge(tsd, arena, ret, usize);
-		idalloctm(tsd, node, tcache, true, true);
+		idalloctm(tsd, node, NULL, true, true);
 		return (NULL);
 	}
 
@@ -331,12 +330,12 @@ huge_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize_min,
 
 static void *
 huge_ralloc_move_helper(tsd_t *tsd, arena_t *arena, size_t usize,
-    size_t alignment, bool zero, tcache_t *tcache)
+    size_t alignment, bool zero)
 {
 
 	if (alignment <= chunksize)
-		return (huge_malloc(tsd, arena, usize, zero, tcache));
-	return (huge_palloc(tsd, arena, usize, alignment, zero, tcache));
+		return (huge_malloc(tsd, arena, usize, zero));
+	return (huge_palloc(tsd, arena, usize, alignment, zero));
 }
 
 void *
@@ -358,8 +357,7 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize,
 	 * different size class.  In that case, fall back to allocating new
 	 * space and copying.
 	 */
-	ret = huge_ralloc_move_helper(tsd, arena, usize, alignment, zero,
-	    tcache);
+	ret = huge_ralloc_move_helper(tsd, arena, usize, alignment, zero);
 	if (ret == NULL)
 		return (NULL);
 
@@ -370,7 +368,7 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize,
 }
 
 void
-huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache)
+huge_dalloc(tsd_t *tsd, void *ptr)
 {
 	extent_node_t *node;
 	arena_t *arena;
@@ -386,7 +384,7 @@ huge_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache)
 	    extent_node_size_get(node));
 	arena_chunk_dalloc_huge(tsd, extent_node_arena_get(node),
 	    extent_node_addr_get(node), extent_node_size_get(node));
-	idalloctm(tsd, node, tcache, true, true);
+	idalloctm(tsd, node, NULL, true, true);
 
 	arena_decay_tick(tsd, arena);
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 7543dff1..3bd39c3c 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -60,7 +60,7 @@ static malloc_mutex_t	arenas_lock;
 arena_t			**arenas;
 static unsigned		narenas_total; /* Use narenas_total_*(). */
 static arena_t		*a0; /* arenas[0]; read-only after initialization. */
-static unsigned		narenas_auto; /* Read-only after initialization. */
+unsigned		narenas_auto; /* Read-only after initialization. */
 
 typedef enum {
 	malloc_init_uninitialized	= 3,
@@ -318,8 +318,8 @@ a0ialloc(size_t size, bool zero, bool is_metadata)
 	if (unlikely(malloc_init_a0()))
 		return (NULL);
 
-	return (iallocztm(NULL, size, size2index(size), zero, false,
-	    is_metadata, arena_get(NULL, 0, false), true));
+	return (iallocztm(NULL, size, size2index(size), zero, NULL,
+	    is_metadata, arena_get(NULL, 0, true), true));
 }
 
 static void
@@ -451,15 +451,19 @@ arena_init(tsd_t *tsd, unsigned ind)
 }
 
 static void
-arena_bind(tsd_t *tsd, unsigned ind)
+arena_bind(tsd_t *tsd, unsigned ind, bool internal)
 {
 	arena_t *arena;
 
 	arena = arena_get(tsd, ind, false);
-	arena_nthreads_inc(arena);
+	arena_nthreads_inc(arena, internal);
 
-	if (tsd_nominal(tsd))
-		tsd_arena_set(tsd, arena);
+	if (tsd_nominal(tsd)) {
+		if (internal)
+			tsd_iarena_set(tsd, arena);
+		else
+			tsd_arena_set(tsd, arena);
+	}
 }
 
 void
@@ -469,19 +473,22 @@ arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind)
 
 	oldarena = arena_get(tsd, oldind, false);
 	newarena = arena_get(tsd, newind, false);
-	arena_nthreads_dec(oldarena);
-	arena_nthreads_inc(newarena);
+	arena_nthreads_dec(oldarena, false);
+	arena_nthreads_inc(newarena, false);
 	tsd_arena_set(tsd, newarena);
 }
 
 static void
-arena_unbind(tsd_t *tsd, unsigned ind)
+arena_unbind(tsd_t *tsd, unsigned ind, bool internal)
 {
 	arena_t *arena;
 
 	arena = arena_get(tsd, ind, false);
-	arena_nthreads_dec(arena);
-	tsd_arena_set(tsd, NULL);
+	arena_nthreads_dec(arena, internal);
+	if (internal)
+		tsd_iarena_set(tsd, NULL);
+	else
+		tsd_arena_set(tsd, NULL);
 }
 
 arena_tdata_t *
@@ -562,14 +569,24 @@ label_return:
 
 /* Slow path, called only by arena_choose(). */
 arena_t *
-arena_choose_hard(tsd_t *tsd)
+arena_choose_hard(tsd_t *tsd, bool internal)
 {
-	arena_t *ret;
+	arena_t *ret JEMALLOC_CC_SILENCE_INIT(NULL);
 
 	if (narenas_auto > 1) {
-		unsigned i, choose, first_null;
+		unsigned i, j, choose[2], first_null;
+
+		/*
+		 * Determine binding for both non-internal and internal
+		 * allocation.
+		 *
+		 *   choose[0]: For application allocation.
+		 *   choose[1]: For internal metadata allocation.
+		 */
+
+		for (j = 0; j < 2; j++)
+			choose[j] = 0;
 
-		choose = 0;
 		first_null = narenas_auto;
 		malloc_mutex_lock(tsd, &arenas_lock);
 		assert(arena_get(tsd, 0, false) != NULL);
@@ -579,10 +596,13 @@ arena_choose_hard(tsd_t *tsd)
 				 * Choose the first arena that has the lowest
 				 * number of threads assigned to it.
 				 */
-				if (arena_nthreads_get(arena_get(tsd, i, false))
-				    < arena_nthreads_get(arena_get(tsd, choose,
-				    false)))
-					choose = i;
+				for (j = 0; j < 2; j++) {
+					if (arena_nthreads_get(arena_get(tsd, i,
+					    false), !!j) <
+					    arena_nthreads_get(arena_get(tsd,
+					    choose[j], false), !!j))
+						choose[j] = i;
+				}
 			} else if (first_null == narenas_auto) {
 				/*
 				 * Record the index of the first uninitialized
@@ -597,27 +617,35 @@ arena_choose_hard(tsd_t *tsd)
 			}
 		}
 
-		if (arena_nthreads_get(arena_get(tsd, choose, false)) == 0
-		    || first_null == narenas_auto) {
-			/*
-			 * Use an unloaded arena, or the least loaded arena if
-			 * all arenas are already initialized.
-			 */
-			ret = arena_get(tsd, choose, false);
-		} else {
-			/* Initialize a new arena. */
-			choose = first_null;
-			ret = arena_init_locked(tsd, choose);
-			if (ret == NULL) {
-				malloc_mutex_unlock(tsd, &arenas_lock);
-				return (NULL);
+		for (j = 0; j < 2; j++) {
+			if (arena_nthreads_get(arena_get(tsd, choose[j], false),
+			    !!j) == 0 || first_null != narenas_auto) {
+				/*
+				 * Use an unloaded arena, or the least loaded
+				 * arena if all arenas are already initialized.
+				 */
+				if (!!j == internal)
+					ret = arena_get(tsd, choose[j], false);
+			} else {
+				arena_t *arena;
+
+				/* Initialize a new arena. */
+				choose[j] = first_null;
+				arena = arena_init_locked(tsd, choose[j]);
+				if (arena == NULL) {
+					malloc_mutex_unlock(tsd, &arenas_lock);
+					return (NULL);
+				}
+				if (!!j == internal)
+					ret = arena;
 			}
+			arena_bind(tsd, choose[j], !!j);
 		}
-		arena_bind(tsd, choose);
 		malloc_mutex_unlock(tsd, &arenas_lock);
 	} else {
 		ret = arena_get(tsd, 0, false);
-		arena_bind(tsd, 0);
+		arena_bind(tsd, 0, false);
+		arena_bind(tsd, 0, true);
 	}
 
 	return (ret);
@@ -637,6 +665,16 @@ thread_deallocated_cleanup(tsd_t *tsd)
 	/* Do nothing. */
 }
 
+void
+iarena_cleanup(tsd_t *tsd)
+{
+	arena_t *iarena;
+
+	iarena = tsd_iarena_get(tsd);
+	if (iarena != NULL)
+		arena_unbind(tsd, iarena->ind, true);
+}
+
 void
 arena_cleanup(tsd_t *tsd)
 {
@@ -644,7 +682,7 @@ arena_cleanup(tsd_t *tsd)
 
 	arena = tsd_arena_get(tsd);
 	if (arena != NULL)
-		arena_unbind(tsd, arena->ind);
+		arena_unbind(tsd, arena->ind, false);
 }
 
 void
diff --git a/src/prof.c b/src/prof.c
index 520bf90a..82604632 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -554,7 +554,8 @@ prof_gctx_create(tsd_t *tsd, prof_bt_t *bt)
 	 */
 	size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *));
 	prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsd, size,
-	    size2index(size), false, tcache_get(tsd, true), true, NULL, true);
+	    size2index(size), false, NULL, true, arena_get(NULL, 0, true),
+	    true);
 	if (gctx == NULL)
 		return (NULL);
 	gctx->lock = prof_gctx_mutex_choose();
@@ -595,7 +596,7 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
 		prof_leave(tsd, tdata_self);
 		/* Destroy gctx. */
 		malloc_mutex_unlock(tsd, gctx->lock);
-		idalloctm(tsd, gctx, tcache_get(tsd, false), true, true);
+		idalloctm(tsd, gctx, NULL, true, true);
 	} else {
 		/*
 		 * Compensate for increment in prof_tctx_destroy() or
@@ -706,7 +707,7 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx)
 		prof_tdata_destroy(tsd, tdata, false);
 
 	if (destroy_tctx)
-		idalloctm(tsd, tctx, tcache_get(tsd, false), true, true);
+		idalloctm(tsd, tctx, NULL, true, true);
 }
 
 static bool
@@ -735,8 +736,7 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
 		if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) {
 			/* OOM. */
 			prof_leave(tsd, tdata);
-			idalloctm(tsd, gctx.v, tcache_get(tsd, false), true,
-			    true);
+			idalloctm(tsd, gctx.v, NULL, true, true);
 			return (true);
 		}
 		new_gctx = true;
@@ -780,7 +780,6 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
 		ret.p->prepared = true;
 	malloc_mutex_unlock(tsd, tdata->lock);
 	if (not_found) {
-		tcache_t *tcache;
 		void *btkey;
 		prof_gctx_t *gctx;
 		bool new_gctx, error;
@@ -794,10 +793,9 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
 			return (NULL);
 
 		/* Link a prof_tctx_t into gctx for this thread. */
-		tcache = tcache_get(tsd, true);
 		ret.v = iallocztm(tsd, sizeof(prof_tctx_t),
-		    size2index(sizeof(prof_tctx_t)), false, tcache, true, NULL,
-		    true);
+		    size2index(sizeof(prof_tctx_t)), false, NULL, true,
+		    arena_choose(tsd, NULL, true), true);
 		if (ret.p == NULL) {
 			if (new_gctx)
 				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
@@ -817,7 +815,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
 		if (error) {
 			if (new_gctx)
 				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
-			idalloctm(tsd, ret.v, tcache, true, true);
+			idalloctm(tsd, ret.v, NULL, true, true);
 			return (NULL);
 		}
 		malloc_mutex_lock(tsd, gctx->lock);
@@ -1238,8 +1236,8 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs)
 					    to_destroy);
 					tctx_tree_remove(&gctx->tctxs,
 					    to_destroy);
-					idalloctm(tsd, to_destroy,
-					    tcache_get(tsd, false), true, true);
+					idalloctm(tsd, to_destroy, NULL, true,
+					    true);
 				} else
 					next = NULL;
 			} while (next != NULL);
@@ -1771,14 +1769,13 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
     char *thread_name, bool active)
 {
 	prof_tdata_t *tdata;
-	tcache_t *tcache;
 
 	cassert(config_prof);
 
 	/* Initialize an empty cache for this thread. */
-	tcache = tcache_get(tsd, true);
 	tdata = (prof_tdata_t *)iallocztm(tsd, sizeof(prof_tdata_t),
-	    size2index(sizeof(prof_tdata_t)), false, tcache, true, NULL, true);
+	    size2index(sizeof(prof_tdata_t)), false, NULL, true, arena_get(NULL,
+	    0, true), true);
 	if (tdata == NULL)
 		return (NULL);
 
@@ -1792,7 +1789,7 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
 
 	if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS,
 	    prof_bt_hash, prof_bt_keycomp)) {
-		idalloctm(tsd, tdata, tcache, true, true);
+		idalloctm(tsd, tdata, NULL, true, true);
 		return (NULL);
 	}
 
@@ -1848,7 +1845,6 @@ static void
 prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
     bool even_if_attached)
 {
-	tcache_t *tcache;
 
 	malloc_mutex_assert_owner(tsd, &tdatas_mtx);
 
@@ -1859,11 +1855,10 @@ prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
 	assert(prof_tdata_should_destroy_unlocked(tsd, tdata,
 	    even_if_attached));
 
-	tcache = tcache_get(tsd, false);
 	if (tdata->thread_name != NULL)
-		idalloctm(tsd, tdata->thread_name, tcache, true, true);
+		idalloctm(tsd, tdata->thread_name, NULL, true, true);
 	ckh_delete(tsd, &tdata->bt2tctx);
-	idalloctm(tsd, tdata, tcache, true, true);
+	idalloctm(tsd, tdata, NULL, true, true);
 }
 
 static void
@@ -2023,8 +2018,8 @@ prof_thread_name_alloc(tsd_t *tsd, const char *thread_name)
 	if (size == 1)
 		return ("");
 
-	ret = iallocztm(tsd, size, size2index(size), false, tcache_get(tsd,
-	    true), true, NULL, true);
+	ret = iallocztm(tsd, size, size2index(size), false, NULL, true,
+	    arena_get(NULL, 0, true), true);
 	if (ret == NULL)
 		return (NULL);
 	memcpy(ret, thread_name, size);
@@ -2056,8 +2051,7 @@ prof_thread_name_set(tsd_t *tsd, const char *thread_name)
 		return (EAGAIN);
 
 	if (tdata->thread_name != NULL) {
-		idalloctm(tsd, tdata->thread_name, tcache_get(tsd, false),
-		    true, true);
+		idalloctm(tsd, tdata->thread_name, NULL, true, true);
 		tdata->thread_name = NULL;
 	}
 	if (strlen(s) > 0)
diff --git a/src/quarantine.c b/src/quarantine.c
index 6cb74b37..ff1637ec 100644
--- a/src/quarantine.c
+++ b/src/quarantine.c
@@ -30,7 +30,7 @@ quarantine_init(tsd_t *tsd, size_t lg_maxobjs)
 	size = offsetof(quarantine_t, objs) + ((ZU(1) << lg_maxobjs) *
 	    sizeof(quarantine_obj_t));
 	quarantine = (quarantine_t *)iallocztm(tsd, size, size2index(size),
-	    false, tcache_get(tsd, true), true, NULL, true);
+	    false, NULL, true, arena_get(NULL, 0, true), true);
 	if (quarantine == NULL)
 		return (NULL);
 	quarantine->curbytes = 0;
@@ -57,7 +57,7 @@ quarantine_alloc_hook_work(tsd_t *tsd)
 	if (tsd_quarantine_get(tsd) == NULL)
 		tsd_quarantine_set(tsd, quarantine);
 	else
-		idalloctm(tsd, quarantine, tcache_get(tsd, false), true, true);
+		idalloctm(tsd, quarantine, NULL, true, true);
 }
 
 static quarantine_t *
@@ -89,7 +89,7 @@ quarantine_grow(tsd_t *tsd, quarantine_t *quarantine)
 		memcpy(&ret->objs[ncopy_a], quarantine->objs, ncopy_b *
 		    sizeof(quarantine_obj_t));
 	}
-	idalloctm(tsd, quarantine, tcache_get(tsd, false), true, true);
+	idalloctm(tsd, quarantine, NULL, true, true);
 
 	tsd_quarantine_set(tsd, ret);
 	return (ret);
@@ -179,7 +179,7 @@ quarantine_cleanup(tsd_t *tsd)
 	quarantine = tsd_quarantine_get(tsd);
 	if (quarantine != NULL) {
 		quarantine_drain(tsd, quarantine, 0);
-		idalloctm(tsd, quarantine, tcache_get(tsd, false), true, true);
+		idalloctm(tsd, quarantine, NULL, true, true);
 		tsd_quarantine_set(tsd, NULL);
 	}
 }
diff --git a/src/tcache.c b/src/tcache.c
index a9539f64..ca867c72 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -97,7 +97,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 	assert(binind < NBINS);
 	assert(rem <= tbin->ncached);
 
-	arena = arena_choose(tsd, NULL);
+	arena = arena_choose(tsd, NULL, false);
 	assert(arena != NULL);
 	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
 		/* Lock the arena bin associated with the first object. */
@@ -179,7 +179,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 	assert(binind < nhbins);
 	assert(rem <= tbin->ncached);
 
-	arena = arena_choose(tsd, NULL);
+	arena = arena_choose(tsd, NULL, false);
 	assert(arena != NULL);
 	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
 		/* Lock the arena associated with the first object. */
@@ -307,7 +307,7 @@ tcache_get_hard(tsd_t *tsd)
 			tcache_enabled_set(false); /* Memoize. */
 		return (NULL);
 	}
-	arena = arena_choose(tsd, NULL);
+	arena = arena_choose(tsd, NULL, false);
 	if (unlikely(arena == NULL))
 		return (NULL);
 	return (tcache_create(tsd, arena));
@@ -328,8 +328,8 @@ tcache_create(tsd_t *tsd, arena_t *arena)
 	/* Avoid false cacheline sharing. */
 	size = sa2u(size, CACHELINE);
 
-	tcache = ipallocztm(tsd, size, CACHELINE, true, false, true,
-	    arena_get(tsd, 0, false));
+	tcache = ipallocztm(tsd, size, CACHELINE, true, NULL, true,
+	    arena_get(NULL, 0, true));
 	if (tcache == NULL)
 		return (NULL);
 
@@ -359,7 +359,7 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache)
 	arena_t *arena;
 	unsigned i;
 
-	arena = arena_choose(tsd, NULL);
+	arena = arena_choose(tsd, NULL, false);
 	tcache_arena_dissociate(tsd, tcache, arena);
 
 	for (i = 0; i < NBINS; i++) {
@@ -391,7 +391,7 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache)
 	    arena_prof_accum(tsd, arena, tcache->prof_accumbytes))
 		prof_idump(tsd);
 
-	idalloctm(tsd, tcache, false, true, true);
+	idalloctm(tsd, tcache, NULL, true, true);
 }
 
 void
@@ -446,6 +446,7 @@ tcache_stats_merge(tsd_t *tsd, tcache_t *tcache, arena_t *arena)
 bool
 tcaches_create(tsd_t *tsd, unsigned *r_ind)
 {
+	arena_t *arena;
 	tcache_t *tcache;
 	tcaches_t *elm;
 
@@ -458,7 +459,10 @@ tcaches_create(tsd_t *tsd, unsigned *r_ind)
 
 	if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX)
 		return (true);
-	tcache = tcache_create(tsd, arena_get(tsd, 0, false));
+	arena = arena_choose(tsd, NULL, true);
+	if (unlikely(arena == NULL))
+		return (true);
+	tcache = tcache_create(tsd, arena);
 	if (tcache == NULL)
 		return (true);
 

From 19ff2cefba48d1ddab8fb52e3d78f309ca2553cf Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 22 Apr 2016 14:37:17 -0700
Subject: [PATCH 0204/2608] Implement the arena.<i>.reset mallctl.

This makes it possible to discard all of an arena's allocations in a
single operation.

This resolves #146.
---
 Makefile.in                                   |   4 +-
 doc/jemalloc.xml.in                           |  17 ++
 include/jemalloc/internal/arena.h             |   5 +
 include/jemalloc/internal/extent.h            |   2 +-
 include/jemalloc/internal/private_symbols.txt |   1 +
 src/arena.c                                   | 225 +++++++++++++++---
 src/ctl.c                                     |  36 +++
 test/unit/arena_reset.c                       | 160 +++++++++++++
 8 files changed, 411 insertions(+), 39 deletions(-)
 create mode 100644 test/unit/arena_reset.c

diff --git a/Makefile.in b/Makefile.in
index a872eb5f..ddc89157 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -135,7 +135,9 @@ C_TESTLIB_SRCS := $(srcroot)test/src/btalloc.c $(srcroot)test/src/btalloc_0.c \
 	$(srcroot)test/src/SFMT.c $(srcroot)test/src/test.c \
 	$(srcroot)test/src/thd.c $(srcroot)test/src/timer.c
 C_UTIL_INTEGRATION_SRCS := $(srcroot)src/nstime.c $(srcroot)src/util.c
-TESTS_UNIT := $(srcroot)test/unit/atomic.c \
+TESTS_UNIT := \
+	$(srcroot)test/unit/arena_reset.c \
+	$(srcroot)test/unit/atomic.c \
 	$(srcroot)test/unit/bitmap.c \
 	$(srcroot)test/unit/ckh.c \
 	$(srcroot)test/unit/decay.c \
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 9814c226..7b602a51 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1558,6 +1558,23 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         details.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="arena.i.reset">
+        <term>
+          <mallctl>arena.&lt;i&gt;.reset</mallctl>
+          (<type>void</type>)
+          <literal>--</literal>
+        </term>
+        <listitem><para>Discard all of the arena's extant allocations.  This
+        interface can only be used with arenas created via <link
+        linkend="arenas.extend"><mallctl>arenas.extend</mallctl></link>.  None
+        of the arena's discarded/cached allocations may accessed afterward.  As
+        part of this requirement, all thread caches which were used to
+        allocate/deallocate in conjunction with the arena must be flushed
+        beforehand.  This interface cannot be used if running inside Valgrind,
+        nor if the <link linkend="opt.quarantine">quarantine</link> size is
+        non-zero.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="arena.i.dss">
         <term>
           <mallctl>arena.&lt;i&gt;.dss</mallctl>
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 103a4c91..f2685f6f 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -330,6 +330,10 @@ struct arena_s {
 
 	dss_prec_t		dss_prec;
 
+
+	/* Extant arena chunks. */
+	ql_head(extent_node_t)	achunks;
+
 	/*
 	 * In order to avoid rapid chunk allocation/deallocation when an arena
 	 * oscillates right on the cusp of needing a new chunk, cache the most
@@ -533,6 +537,7 @@ ssize_t	arena_decay_time_get(tsd_t *tsd, arena_t *arena);
 bool	arena_decay_time_set(tsd_t *tsd, arena_t *arena, ssize_t decay_time);
 void	arena_purge(tsd_t *tsd, arena_t *arena, bool all);
 void	arena_maybe_purge(tsd_t *tsd, arena_t *arena);
+void	arena_reset(tsd_t *tsd, arena_t *arena);
 void	arena_tcache_fill_small(tsd_t *tsd, arena_t *arena, tcache_bin_t *tbin,
     szind_t binind, uint64_t prof_accumbytes);
 void	arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info,
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 386d50ef..49d76a57 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -48,7 +48,7 @@ struct extent_node_s {
 		/* Linkage for the size/address-ordered tree. */
 		rb_node(extent_node_t)	szad_link;
 
-		/* Linkage for arena's huge and node_cache lists. */
+		/* Linkage for arena's achunks, huge, and node_cache lists. */
 		ql_elm(extent_node_t)	ql_link;
 	};
 
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index eacc7c62..c7ff8529 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -100,6 +100,7 @@ arena_ralloc_junk_large
 arena_ralloc_no_move
 arena_rd_to_miscelm
 arena_redzone_corruption
+arena_reset
 arena_run_regind
 arena_run_to_miscelm
 arena_salloc
diff --git a/src/arena.c b/src/arena.c
index 0da832e2..f752acad 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -738,14 +738,61 @@ arena_chunk_alloc(tsd_t *tsd, arena_t *arena)
 			return (NULL);
 	}
 
+	ql_elm_new(&chunk->node, ql_link);
+	ql_tail_insert(&arena->achunks, &chunk->node, ql_link);
 	arena_avail_insert(arena, chunk, map_bias, chunk_npages-map_bias);
 
 	return (chunk);
 }
 
+static void
+arena_chunk_discard(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk)
+{
+	bool committed;
+	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
+
+	chunk_deregister(chunk, &chunk->node);
+
+	committed = (arena_mapbits_decommitted_get(chunk, map_bias) == 0);
+	if (!committed) {
+		/*
+		 * Decommit the header.  Mark the chunk as decommitted even if
+		 * header decommit fails, since treating a partially committed
+		 * chunk as committed has a high potential for causing later
+		 * access of decommitted memory.
+		 */
+		chunk_hooks = chunk_hooks_get(tsd, arena);
+		chunk_hooks.decommit(chunk, chunksize, 0, map_bias << LG_PAGE,
+		    arena->ind);
+	}
+
+	chunk_dalloc_cache(tsd, arena, &chunk_hooks, (void *)chunk, chunksize,
+	    committed);
+
+	if (config_stats) {
+		arena->stats.mapped -= chunksize;
+		arena->stats.metadata_mapped -= (map_bias << LG_PAGE);
+	}
+}
+
+static void
+arena_spare_discard(tsd_t *tsd, arena_t *arena, arena_chunk_t *spare)
+{
+
+	assert(arena->spare != spare);
+
+	if (arena_mapbits_dirty_get(spare, map_bias) != 0) {
+		arena_run_dirty_remove(arena, spare, map_bias,
+		    chunk_npages-map_bias);
+	}
+
+	arena_chunk_discard(tsd, arena, spare);
+}
+
 static void
 arena_chunk_dalloc(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk)
 {
+	arena_chunk_t *spare;
 
 	assert(arena_mapbits_allocated_get(chunk, map_bias) == 0);
 	assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0);
@@ -761,43 +808,11 @@ arena_chunk_dalloc(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk)
 	/* Remove run from runs_avail, so that the arena does not use it. */
 	arena_avail_remove(arena, chunk, map_bias, chunk_npages-map_bias);
 
-	if (arena->spare != NULL) {
-		arena_chunk_t *spare = arena->spare;
-		chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
-		bool committed;
-
-		arena->spare = chunk;
-		if (arena_mapbits_dirty_get(spare, map_bias) != 0) {
-			arena_run_dirty_remove(arena, spare, map_bias,
-			    chunk_npages-map_bias);
-		}
-
-		chunk_deregister(spare, &spare->node);
-
-		committed = (arena_mapbits_decommitted_get(spare, map_bias) ==
-		    0);
-		if (!committed) {
-			/*
-			 * Decommit the header.  Mark the chunk as decommitted
-			 * even if header decommit fails, since treating a
-			 * partially committed chunk as committed has a high
-			 * potential for causing later access of decommitted
-			 * memory.
-			 */
-			chunk_hooks = chunk_hooks_get(tsd, arena);
-			chunk_hooks.decommit(spare, chunksize, 0, map_bias <<
-			    LG_PAGE, arena->ind);
-		}
-
-		chunk_dalloc_cache(tsd, arena, &chunk_hooks, (void *)spare,
-		    chunksize, committed);
-
-		if (config_stats) {
-			arena->stats.mapped -= chunksize;
-			arena->stats.metadata_mapped -= (map_bias << LG_PAGE);
-		}
-	} else
-		arena->spare = chunk;
+	ql_remove(&arena->achunks, &chunk->node, ql_link);
+	spare = arena->spare;
+	arena->spare = chunk;
+	if (spare != NULL)
+		arena_spare_discard(tsd, arena, spare);
 }
 
 static void
@@ -1802,6 +1817,140 @@ arena_purge(tsd_t *tsd, arena_t *arena, bool all)
 	malloc_mutex_unlock(tsd, &arena->lock);
 }
 
+static void
+arena_achunk_prof_reset(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk)
+{
+	size_t pageind, npages;
+
+	cassert(config_prof);
+	assert(opt_prof);
+
+	/*
+	 * Iterate over the allocated runs and remove profiled allocations from
+	 * the sample set.
+	 */
+	for (pageind = map_bias; pageind < chunk_npages; pageind += npages) {
+		if (arena_mapbits_allocated_get(chunk, pageind) != 0) {
+			if (arena_mapbits_large_get(chunk, pageind) != 0) {
+				void *ptr = (void *)((uintptr_t)chunk + (pageind
+				    << LG_PAGE));
+				size_t usize = isalloc(tsd, ptr, config_prof);
+
+				prof_free(tsd, ptr, usize);
+				npages = arena_mapbits_large_size_get(chunk,
+				    pageind) >> LG_PAGE;
+			} else {
+				/* Skip small run. */
+				size_t binind = arena_mapbits_binind_get(chunk,
+				    pageind);
+				arena_bin_info_t *bin_info =
+				    &arena_bin_info[binind];
+				npages = bin_info->run_size >> LG_PAGE;
+			}
+		} else {
+			/* Skip unallocated run. */
+			npages = arena_mapbits_unallocated_size_get(chunk,
+			    pageind) >> LG_PAGE;
+		}
+		assert(pageind + npages <= chunk_npages);
+	}
+}
+
+void
+arena_reset(tsd_t *tsd, arena_t *arena)
+{
+	unsigned i;
+	extent_node_t *node;
+
+	/*
+	 * Locking in this function is unintuitive.  The caller guarantees that
+	 * no concurrent operations are happening in this arena, but there are
+	 * still reasons that some locking is necessary:
+	 *
+	 * - Some of the functions in the transitive closure of calls assume
+	 *   appropriate locks are held, and in some cases these locks are
+	 *   temporarily dropped to avoid lock order reversal or deadlock due to
+	 *   reentry.
+	 * - mallctl("epoch", ...) may concurrently refresh stats.  While
+	 *   strictly speaking this is a "concurrent operation", disallowing
+	 *   stats refreshes would impose an inconvenient burden.
+	 */
+
+	/* Remove large allocations from prof sample set. */
+	if (config_prof && opt_prof) {
+		ql_foreach(node, &arena->achunks, ql_link) {
+			arena_achunk_prof_reset(tsd, arena,
+			    extent_node_addr_get(node));
+		}
+	}
+
+	/* Huge allocations. */
+	malloc_mutex_lock(tsd, &arena->huge_mtx);
+	for (node = ql_last(&arena->huge, ql_link); node != NULL; node =
+	    ql_last(&arena->huge, ql_link)) {
+		void *ptr = extent_node_addr_get(node);
+
+		malloc_mutex_unlock(tsd, &arena->huge_mtx);
+		/* Remove huge allocation from prof sample set. */
+		if (config_prof && opt_prof) {
+			size_t usize;
+
+			usize = isalloc(tsd, ptr, config_prof);
+			prof_free(tsd, ptr, usize);
+		}
+		huge_dalloc(tsd, ptr);
+		malloc_mutex_lock(tsd, &arena->huge_mtx);
+	}
+	malloc_mutex_unlock(tsd, &arena->huge_mtx);
+
+	malloc_mutex_lock(tsd, &arena->lock);
+
+	/* Bins. */
+	for (i = 0; i < NBINS; i++) {
+		arena_bin_t *bin = &arena->bins[i];
+		malloc_mutex_lock(tsd, &bin->lock);
+		bin->runcur = NULL;
+		arena_run_heap_new(&bin->runs);
+		if (config_stats) {
+			bin->stats.curregs = 0;
+			bin->stats.curruns = 0;
+		}
+		malloc_mutex_unlock(tsd, &bin->lock);
+	}
+
+	/*
+	 * Re-initialize runs_dirty such that the chunks_cache and runs_dirty
+	 * chains directly correspond.
+	 */
+	qr_new(&arena->runs_dirty, rd_link);
+	for (node = qr_next(&arena->chunks_cache, cc_link);
+	    node != &arena->chunks_cache; node = qr_next(node, cc_link)) {
+		qr_new(&node->rd, rd_link);
+		qr_meld(&arena->runs_dirty, &node->rd, rd_link);
+	}
+
+	/* Arena chunks. */
+	for (node = ql_last(&arena->achunks, ql_link); node != NULL; node =
+	    ql_last(&arena->achunks, ql_link)) {
+		ql_remove(&arena->achunks, node, ql_link);
+		arena_chunk_discard(tsd, arena, extent_node_addr_get(node));
+	}
+
+	/* Spare. */
+	if (arena->spare != NULL) {
+		arena_chunk_discard(tsd, arena, arena->spare);
+		arena->spare = NULL;
+	}
+
+	assert(!arena->purging);
+	arena->nactive = 0;
+
+	for(i = 0; i < runs_avail_nclasses; i++)
+		arena_run_heap_new(&arena->runs_avail[i]);
+
+	malloc_mutex_unlock(tsd, &arena->lock);
+}
+
 static void
 arena_run_coalesce(arena_t *arena, arena_chunk_t *chunk, size_t *p_size,
     size_t *p_run_ind, size_t *p_run_pages, size_t flag_dirty,
@@ -3373,6 +3522,8 @@ arena_new(tsd_t *tsd, unsigned ind)
 
 	arena->dss_prec = chunk_dss_prec_get(tsd);
 
+	ql_new(&arena->achunks);
+
 	arena->spare = NULL;
 
 	arena->lg_dirty_mult = arena_lg_dirty_mult_default_get();
diff --git a/src/ctl.c b/src/ctl.c
index fad2fdd7..2e811430 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -120,6 +120,7 @@ CTL_PROTO(tcache_destroy)
 static void	arena_i_purge(tsd_t *tsd, unsigned arena_ind, bool all);
 CTL_PROTO(arena_i_purge)
 CTL_PROTO(arena_i_decay)
+CTL_PROTO(arena_i_reset)
 CTL_PROTO(arena_i_dss)
 CTL_PROTO(arena_i_lg_dirty_mult)
 CTL_PROTO(arena_i_decay_time)
@@ -299,6 +300,7 @@ static const ctl_named_node_t	tcache_node[] = {
 static const ctl_named_node_t arena_i_node[] = {
 	{NAME("purge"),		CTL(arena_i_purge)},
 	{NAME("decay"),		CTL(arena_i_decay)},
+	{NAME("reset"),		CTL(arena_i_reset)},
 	{NAME("dss"),		CTL(arena_i_dss)},
 	{NAME("lg_dirty_mult"),	CTL(arena_i_lg_dirty_mult)},
 	{NAME("decay_time"),	CTL(arena_i_decay_time)},
@@ -1602,6 +1604,40 @@ label_return:
 	return (ret);
 }
 
+static int
+arena_i_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
+{
+	int ret;
+	unsigned arena_ind;
+	arena_t *arena;
+
+	READONLY();
+	WRITEONLY();
+
+	if ((config_valgrind && unlikely(in_valgrind)) || (config_fill &&
+	    unlikely(opt_quarantine))) {
+		ret = EFAULT;
+		goto label_return;
+	}
+
+	arena_ind = (unsigned)mib[1];
+	if (config_debug) {
+		malloc_mutex_lock(tsd, &ctl_mtx);
+		assert(arena_ind < ctl_stats.narenas);
+		malloc_mutex_unlock(tsd, &ctl_mtx);
+	}
+	assert(arena_ind >= opt_narenas);
+
+	arena = arena_get(tsd, arena_ind, false);
+
+	arena_reset(tsd, arena);
+
+	ret = 0;
+label_return:
+	return (ret);
+}
+
 static int
 arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
new file mode 100644
index 00000000..52170cc4
--- /dev/null
+++ b/test/unit/arena_reset.c
@@ -0,0 +1,160 @@
+#include "test/jemalloc_test.h"
+
+#ifdef JEMALLOC_PROF
+const char *malloc_conf = "prof:true,lg_prof_sample:0";
+#endif
+
+static unsigned
+get_nsizes_impl(const char *cmd)
+{
+	unsigned ret;
+	size_t z;
+
+	z = sizeof(unsigned);
+	assert_d_eq(mallctl(cmd, &ret, &z, NULL, 0), 0,
+	    "Unexpected mallctl(\"%s\", ...) failure", cmd);
+
+	return (ret);
+}
+
+static unsigned
+get_nsmall(void)
+{
+
+	return (get_nsizes_impl("arenas.nbins"));
+}
+
+static unsigned
+get_nlarge(void)
+{
+
+	return (get_nsizes_impl("arenas.nlruns"));
+}
+
+static unsigned
+get_nhuge(void)
+{
+
+	return (get_nsizes_impl("arenas.nhchunks"));
+}
+
+static size_t
+get_size_impl(const char *cmd, size_t ind)
+{
+	size_t ret;
+	size_t z;
+	size_t mib[4];
+	size_t miblen = 4;
+
+	z = sizeof(size_t);
+	assert_d_eq(mallctlnametomib(cmd, mib, &miblen),
+	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
+	mib[2] = ind;
+	z = sizeof(size_t);
+	assert_d_eq(mallctlbymib(mib, miblen, &ret, &z, NULL, 0),
+	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
+
+	return (ret);
+}
+
+static size_t
+get_small_size(size_t ind)
+{
+
+	return (get_size_impl("arenas.bin.0.size", ind));
+}
+
+static size_t
+get_large_size(size_t ind)
+{
+
+	return (get_size_impl("arenas.lrun.0.size", ind));
+}
+
+static size_t
+get_huge_size(size_t ind)
+{
+
+	return (get_size_impl("arenas.hchunk.0.size", ind));
+}
+
+TEST_BEGIN(test_arena_reset)
+{
+#define	NHUGE	4
+	unsigned arena_ind, nsmall, nlarge, nhuge, nptrs, i;
+	size_t sz, miblen;
+	void **ptrs;
+	size_t mib[3];
+	tsd_t *tsd;
+
+	test_skip_if((config_valgrind && unlikely(in_valgrind)) || (config_fill
+	    && unlikely(opt_quarantine)));
+
+	sz = sizeof(unsigned);
+	assert_d_eq(mallctl("arenas.extend", &arena_ind, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+
+	nsmall = get_nsmall();
+	nlarge = get_nlarge();
+	nhuge = get_nhuge() > NHUGE ? NHUGE : get_nhuge();
+	nptrs = nsmall + nlarge + nhuge;
+	ptrs = (void **)malloc(nptrs * sizeof(void *));
+	assert_ptr_not_null(ptrs, "Unexpected malloc() failure");
+
+	/* Allocate objects with a wide range of sizes. */
+	for (i = 0; i < nsmall; i++) {
+		sz = get_small_size(i);
+		ptrs[i] = mallocx(sz, MALLOCX_ARENA(arena_ind));
+		assert_ptr_not_null(ptrs[i],
+		    "Unexpected mallocx(%zu, MALLOCX_ARENA(%u)) failure", sz,
+		    arena_ind);
+	}
+	for (i = 0; i < nlarge; i++) {
+		sz = get_large_size(i);
+		ptrs[nsmall + i] = mallocx(sz, MALLOCX_ARENA(arena_ind));
+		assert_ptr_not_null(ptrs[i],
+		    "Unexpected mallocx(%zu, MALLOCX_ARENA(%u)) failure", sz,
+		    arena_ind);
+	}
+	for (i = 0; i < nhuge; i++) {
+		sz = get_huge_size(i);
+		ptrs[nsmall + nlarge + i] = mallocx(sz,
+		    MALLOCX_ARENA(arena_ind));
+		assert_ptr_not_null(ptrs[i],
+		    "Unexpected mallocx(%zu, MALLOCX_ARENA(%u)) failure", sz,
+		    arena_ind);
+	}
+
+	tsd = tsd_fetch();
+
+	/* Verify allocations. */
+	for (i = 0; i < nptrs; i++) {
+		assert_zu_gt(ivsalloc(tsd, ptrs[i], false), 0,
+		    "Allocation should have queryable size");
+	}
+
+	/* Reset. */
+	miblen = sizeof(mib)/sizeof(size_t);
+	assert_d_eq(mallctlnametomib("arena.0.reset", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[1] = (size_t)arena_ind;
+	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+
+	/* Verify allocations no longer exist. */
+	for (i = 0; i < nptrs; i++) {
+		assert_zu_eq(ivsalloc(tsd, ptrs[i], false), 0,
+		    "Allocation should no longer exist");
+	}
+
+	free(ptrs);
+}
+TEST_END
+
+int
+main(void)
+{
+
+	return (test(
+	    test_arena_reset));
+}

From 71d94828a2fa807054ea8c01486667c4bd7649b1 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 22 Apr 2016 21:27:17 -0700
Subject: [PATCH 0205/2608] Fix degenerate mb_write() compilation error.

This resolves #375.
---
 include/jemalloc/internal/mb.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/mb.h b/include/jemalloc/internal/mb.h
index 81129d08..437c86f7 100644
--- a/include/jemalloc/internal/mb.h
+++ b/include/jemalloc/internal/mb.h
@@ -42,7 +42,7 @@ mb_write(void)
 	    : /* Inputs. */
 	    : "memory" /* Clobbers. */
 	    );
-#else
+#  else
 	/*
 	 * This is hopefully enough to keep the compiler from reordering
 	 * instructions around this one.
@@ -52,7 +52,7 @@ mb_write(void)
 	    : /* Inputs. */
 	    : "memory" /* Clobbers. */
 	    );
-#endif
+#  endif
 }
 #elif (defined(__amd64__) || defined(__x86_64__))
 JEMALLOC_INLINE void
@@ -104,7 +104,7 @@ mb_write(void)
 {
 	malloc_mutex_t mtx;
 
-	malloc_mutex_init(&mtx, WITNESS_RANK_OMIT);
+	malloc_mutex_init(&mtx, "mb", WITNESS_RANK_OMIT);
 	malloc_mutex_lock(NULL, &mtx);
 	malloc_mutex_unlock(NULL, &mtx);
 }

From 259f8ebbfc025eec17695c3d14019f17e414791f Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 22 Apr 2016 22:21:31 -0700
Subject: [PATCH 0206/2608] Fix arena_choose_hard() regression.

This regression was caused by 66cd953514a18477eb49732e40d5c2ab5f1b12c5
(Do not allocate metadata via non-auto arenas, nor tcaches.).
---
 src/jemalloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 3bd39c3c..8b744e68 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -619,7 +619,7 @@ arena_choose_hard(tsd_t *tsd, bool internal)
 
 		for (j = 0; j < 2; j++) {
 			if (arena_nthreads_get(arena_get(tsd, choose[j], false),
-			    !!j) == 0 || first_null != narenas_auto) {
+			    !!j) == 0 || first_null == narenas_auto) {
 				/*
 				 * Use an unloaded arena, or the least loaded
 				 * arena if all arenas are already initialized.

From 2fe64d237cf65baa9f6056622e896949933355e5 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Mon, 25 Apr 2016 12:51:17 -0700
Subject: [PATCH 0207/2608] Fix arena_reset() test to avoid tcache.

---
 test/unit/arena_reset.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index 52170cc4..8e769de6 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -84,6 +84,7 @@ TEST_BEGIN(test_arena_reset)
 	unsigned arena_ind, nsmall, nlarge, nhuge, nptrs, i;
 	size_t sz, miblen;
 	void **ptrs;
+	int flags;
 	size_t mib[3];
 	tsd_t *tsd;
 
@@ -94,6 +95,8 @@ TEST_BEGIN(test_arena_reset)
 	assert_d_eq(mallctl("arenas.extend", &arena_ind, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
+	flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+
 	nsmall = get_nsmall();
 	nlarge = get_nlarge();
 	nhuge = get_nhuge() > NHUGE ? NHUGE : get_nhuge();
@@ -104,25 +107,21 @@ TEST_BEGIN(test_arena_reset)
 	/* Allocate objects with a wide range of sizes. */
 	for (i = 0; i < nsmall; i++) {
 		sz = get_small_size(i);
-		ptrs[i] = mallocx(sz, MALLOCX_ARENA(arena_ind));
+		ptrs[i] = mallocx(sz, flags);
 		assert_ptr_not_null(ptrs[i],
-		    "Unexpected mallocx(%zu, MALLOCX_ARENA(%u)) failure", sz,
-		    arena_ind);
+		    "Unexpected mallocx(%zu, %#x) failure", sz, flags);
 	}
 	for (i = 0; i < nlarge; i++) {
 		sz = get_large_size(i);
-		ptrs[nsmall + i] = mallocx(sz, MALLOCX_ARENA(arena_ind));
+		ptrs[nsmall + i] = mallocx(sz, flags);
 		assert_ptr_not_null(ptrs[i],
-		    "Unexpected mallocx(%zu, MALLOCX_ARENA(%u)) failure", sz,
-		    arena_ind);
+		    "Unexpected mallocx(%zu, %#x) failure", sz, flags);
 	}
 	for (i = 0; i < nhuge; i++) {
 		sz = get_huge_size(i);
-		ptrs[nsmall + nlarge + i] = mallocx(sz,
-		    MALLOCX_ARENA(arena_ind));
+		ptrs[nsmall + nlarge + i] = mallocx(sz, flags);
 		assert_ptr_not_null(ptrs[i],
-		    "Unexpected mallocx(%zu, MALLOCX_ARENA(%u)) failure", sz,
-		    arena_ind);
+		    "Unexpected mallocx(%zu, %#x) failure", sz, flags);
 	}
 
 	tsd = tsd_fetch();

From 7e6749595a570ed6686603a1bcfdf8cf49147f19 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Mon, 25 Apr 2016 13:26:54 -0700
Subject: [PATCH 0208/2608] Fix arena reset effects on large/huge stats.

Reset large curruns to 0 during arena reset.

Do not increase huge ndalloc stats during arena reset.
---
 src/arena.c | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index f752acad..c6859e3b 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -854,6 +854,17 @@ arena_huge_dalloc_stats_update(arena_t *arena, size_t usize)
 	arena->stats.hstats[index].curhchunks--;
 }
 
+static void
+arena_huge_reset_stats_cancel(arena_t *arena, size_t usize)
+{
+	szind_t index = size2index(usize) - nlclasses - NBINS;
+
+	cassert(config_stats);
+
+	arena->stats.ndalloc_huge++;
+	arena->stats.hstats[index].ndalloc--;
+}
+
 static void
 arena_huge_dalloc_stats_update_undo(arena_t *arena, size_t usize)
 {
@@ -1884,22 +1895,30 @@ arena_reset(tsd_t *tsd, arena_t *arena)
 		}
 	}
 
+	/* Reset curruns for large size classes. */
+	if (config_stats) {
+		for (i = 0; i < nlclasses; i++)
+			arena->stats.lstats[i].curruns = 0;
+	}
+
 	/* Huge allocations. */
 	malloc_mutex_lock(tsd, &arena->huge_mtx);
 	for (node = ql_last(&arena->huge, ql_link); node != NULL; node =
 	    ql_last(&arena->huge, ql_link)) {
 		void *ptr = extent_node_addr_get(node);
+		size_t usize;
 
 		malloc_mutex_unlock(tsd, &arena->huge_mtx);
-		/* Remove huge allocation from prof sample set. */
-		if (config_prof && opt_prof) {
-			size_t usize;
-
+		if (config_stats || (config_prof && opt_prof))
 			usize = isalloc(tsd, ptr, config_prof);
+		/* Remove huge allocation from prof sample set. */
+		if (config_prof && opt_prof)
 			prof_free(tsd, ptr, usize);
-		}
 		huge_dalloc(tsd, ptr);
 		malloc_mutex_lock(tsd, &arena->huge_mtx);
+		/* Cancel out unwanted effects on stats. */
+		if (config_stats)
+			arena_huge_reset_stats_cancel(arena, usize);
 	}
 	malloc_mutex_unlock(tsd, &arena->huge_mtx);
 

From 0d970a054e5477cd6cf3639366bcc0a1a4f61b11 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Mon, 25 Apr 2016 20:26:03 -0700
Subject: [PATCH 0209/2608] Use separate arena for chunk tests.

This assures that side effects of internal allocation don't impact
tests.
---
 test/integration/chunk.c | 73 +++++++++++++++++++++++++---------------
 1 file changed, 45 insertions(+), 28 deletions(-)

diff --git a/test/integration/chunk.c b/test/integration/chunk.c
index af1c9a53..ff9bf967 100644
--- a/test/integration/chunk.c
+++ b/test/integration/chunk.c
@@ -121,6 +121,10 @@ TEST_BEGIN(test_chunk)
 {
 	void *p;
 	size_t old_size, new_size, large0, large1, huge0, huge1, huge2, sz;
+	unsigned arena_ind;
+	int flags;
+	size_t hooks_mib[3], purge_mib[3];
+	size_t hooks_miblen, purge_miblen;
 	chunk_hooks_t new_hooks = {
 		chunk_alloc,
 		chunk_dalloc,
@@ -132,10 +136,19 @@ TEST_BEGIN(test_chunk)
 	};
 	bool xallocx_success_a, xallocx_success_b, xallocx_success_c;
 
+	sz = sizeof(unsigned);
+	assert_d_eq(mallctl("arenas.extend", &arena_ind, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+	flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+
 	/* Install custom chunk hooks. */
+	hooks_miblen = sizeof(hooks_mib)/sizeof(size_t);
+	assert_d_eq(mallctlnametomib("arena.0.chunk_hooks", hooks_mib,
+	    &hooks_miblen), 0, "Unexpected mallctlnametomib() failure");
+	hooks_mib[1] = (size_t)arena_ind;
 	old_size = sizeof(chunk_hooks_t);
 	new_size = sizeof(chunk_hooks_t);
-	assert_d_eq(mallctl("arena.0.chunk_hooks", &old_hooks, &old_size,
+	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, &old_hooks, &old_size,
 	    &new_hooks, new_size), 0, "Unexpected chunk_hooks error");
 	orig_hooks = old_hooks;
 	assert_ptr_ne(old_hooks.alloc, chunk_alloc, "Unexpected alloc error");
@@ -165,45 +178,49 @@ TEST_BEGIN(test_chunk)
 	    "Unexpected arenas.hchunk.2.size failure");
 
 	/* Test dalloc/decommit/purge cascade. */
+	purge_miblen = sizeof(purge_mib)/sizeof(size_t);
+	assert_d_eq(mallctlnametomib("arena.0.purge", purge_mib, &purge_miblen),
+	    0, "Unexpected mallctlnametomib() failure");
+	purge_mib[1] = (size_t)arena_ind;
 	do_dalloc = false;
 	do_decommit = false;
-	p = mallocx(huge0 * 2, 0);
+	p = mallocx(huge0 * 2, flags);
 	assert_ptr_not_null(p, "Unexpected mallocx() error");
 	did_dalloc = false;
 	did_decommit = false;
 	did_purge = false;
 	did_split = false;
-	xallocx_success_a = (xallocx(p, huge0, 0, 0) == huge0);
-	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
-	    "Unexpected arena.0.purge error");
+	xallocx_success_a = (xallocx(p, huge0, 0, flags) == huge0);
+	assert_d_eq(mallctlbymib(purge_mib, purge_miblen, NULL, NULL, NULL, 0),
+	    0, "Unexpected arena.%u.purge error", arena_ind);
 	if (xallocx_success_a) {
 		assert_true(did_dalloc, "Expected dalloc");
 		assert_false(did_decommit, "Unexpected decommit");
 		assert_true(did_purge, "Expected purge");
 	}
 	assert_true(did_split, "Expected split");
-	dallocx(p, 0);
+	dallocx(p, flags);
 	do_dalloc = true;
 
 	/* Test decommit/commit and observe split/merge. */
 	do_dalloc = false;
 	do_decommit = true;
-	p = mallocx(huge0 * 2, 0);
+	p = mallocx(huge0 * 2, flags);
 	assert_ptr_not_null(p, "Unexpected mallocx() error");
 	did_decommit = false;
 	did_commit = false;
 	did_split = false;
 	did_merge = false;
-	xallocx_success_b = (xallocx(p, huge0, 0, 0) == huge0);
-	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
-	    "Unexpected arena.0.purge error");
+	xallocx_success_b = (xallocx(p, huge0, 0, flags) == huge0);
+	assert_d_eq(mallctlbymib(purge_mib, purge_miblen, NULL, NULL, NULL, 0),
+	    0, "Unexpected arena.%u.purge error", arena_ind);
 	if (xallocx_success_b)
 		assert_true(did_split, "Expected split");
-	xallocx_success_c = (xallocx(p, huge0 * 2, 0, 0) == huge0 * 2);
+	xallocx_success_c = (xallocx(p, huge0 * 2, 0, flags) == huge0 * 2);
 	assert_b_eq(did_decommit, did_commit, "Expected decommit/commit match");
 	if (xallocx_success_b && xallocx_success_c)
 		assert_true(did_merge, "Expected merge");
-	dallocx(p, 0);
+	dallocx(p, flags);
 	do_dalloc = true;
 	do_decommit = false;
 
@@ -214,42 +231,42 @@ TEST_BEGIN(test_chunk)
 		 * successful xallocx() from size=huge2 to size=huge1 is
 		 * guaranteed to leave trailing purgeable memory.
 		 */
-		p = mallocx(huge2, 0);
+		p = mallocx(huge2, flags);
 		assert_ptr_not_null(p, "Unexpected mallocx() error");
 		did_purge = false;
-		assert_zu_eq(xallocx(p, huge1, 0, 0), huge1,
+		assert_zu_eq(xallocx(p, huge1, 0, flags), huge1,
 		    "Unexpected xallocx() failure");
 		assert_true(did_purge, "Expected purge");
-		dallocx(p, 0);
+		dallocx(p, flags);
 	}
 
 	/* Test decommit for large allocations. */
 	do_decommit = true;
-	p = mallocx(large1, 0);
+	p = mallocx(large1, flags);
 	assert_ptr_not_null(p, "Unexpected mallocx() error");
-	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
-	    "Unexpected arena.0.purge error");
+	assert_d_eq(mallctlbymib(purge_mib, purge_miblen, NULL, NULL, NULL, 0),
+	    0, "Unexpected arena.%u.purge error", arena_ind);
 	did_decommit = false;
-	assert_zu_eq(xallocx(p, large0, 0, 0), large0,
+	assert_zu_eq(xallocx(p, large0, 0, flags), large0,
 	    "Unexpected xallocx() failure");
-	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
-	    "Unexpected arena.0.purge error");
+	assert_d_eq(mallctlbymib(purge_mib, purge_miblen, NULL, NULL, NULL, 0),
+	    0, "Unexpected arena.%u.purge error", arena_ind);
 	did_commit = false;
-	assert_zu_eq(xallocx(p, large1, 0, 0), large1,
+	assert_zu_eq(xallocx(p, large1, 0, flags), large1,
 	    "Unexpected xallocx() failure");
 	assert_b_eq(did_decommit, did_commit, "Expected decommit/commit match");
-	dallocx(p, 0);
+	dallocx(p, flags);
 	do_decommit = false;
 
 	/* Make sure non-huge allocation succeeds. */
-	p = mallocx(42, 0);
+	p = mallocx(42, flags);
 	assert_ptr_not_null(p, "Unexpected mallocx() error");
-	dallocx(p, 0);
+	dallocx(p, flags);
 
 	/* Restore chunk hooks. */
-	assert_d_eq(mallctl("arena.0.chunk_hooks", NULL, NULL, &old_hooks,
-	    new_size), 0, "Unexpected chunk_hooks error");
-	assert_d_eq(mallctl("arena.0.chunk_hooks", &old_hooks, &old_size,
+	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, NULL, NULL,
+	    &old_hooks, new_size), 0, "Unexpected chunk_hooks error");
+	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, &old_hooks, &old_size,
 	    NULL, 0), 0, "Unexpected chunk_hooks error");
 	assert_ptr_eq(old_hooks.alloc, orig_hooks.alloc,
 	    "Unexpected alloc error");

From 174c0c3a9c63b3a0bfa32381148b537e9b9af96d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 25 Apr 2016 23:14:40 -0700
Subject: [PATCH 0210/2608] Fix fork()-related lock rank ordering reversals.

---
 Makefile.in                                   |  1 +
 include/jemalloc/internal/arena.h             |  5 +-
 include/jemalloc/internal/private_symbols.txt | 11 +++-
 include/jemalloc/internal/prof.h              |  3 +-
 include/jemalloc/internal/tsd.h               |  4 +-
 include/jemalloc/internal/witness.h           |  3 ++
 src/arena.c                                   | 32 +++++++++---
 src/jemalloc.c                                | 45 +++++++++++-----
 src/prof.c                                    | 52 +++++++++++++------
 src/witness.c                                 | 37 ++++++++++++-
 test/unit/fork.c                              | 39 ++++++++++++++
 11 files changed, 188 insertions(+), 44 deletions(-)
 create mode 100644 test/unit/fork.c

diff --git a/Makefile.in b/Makefile.in
index ddc89157..a98ebd62 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -141,6 +141,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/bitmap.c \
 	$(srcroot)test/unit/ckh.c \
 	$(srcroot)test/unit/decay.c \
+	$(srcroot)test/unit/fork.c \
 	$(srcroot)test/unit/hash.c \
 	$(srcroot)test/unit/junk.c \
 	$(srcroot)test/unit/junk_alloc.c \
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index f2685f6f..53e6b3ad 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -601,7 +601,10 @@ void	arena_nthreads_inc(arena_t *arena, bool internal);
 void	arena_nthreads_dec(arena_t *arena, bool internal);
 arena_t	*arena_new(tsd_t *tsd, unsigned ind);
 bool	arena_boot(void);
-void	arena_prefork(tsd_t *tsd, arena_t *arena);
+void	arena_prefork0(tsd_t *tsd, arena_t *arena);
+void	arena_prefork1(tsd_t *tsd, arena_t *arena);
+void	arena_prefork2(tsd_t *tsd, arena_t *arena);
+void	arena_prefork3(tsd_t *tsd, arena_t *arena);
 void	arena_postfork_parent(tsd_t *tsd, arena_t *arena);
 void	arena_postfork_child(tsd_t *tsd, arena_t *arena);
 
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index c7ff8529..0eb7778c 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -84,7 +84,10 @@ arena_nthreads_inc
 arena_palloc
 arena_postfork_child
 arena_postfork_parent
-arena_prefork
+arena_prefork0
+arena_prefork1
+arena_prefork2
+arena_prefork3
 arena_prof_accum
 arena_prof_accum_impl
 arena_prof_accum_locked
@@ -432,7 +435,8 @@ prof_malloc_sample_object
 prof_mdump
 prof_postfork_child
 prof_postfork_parent
-prof_prefork
+prof_prefork0
+prof_prefork1
 prof_realloc
 prof_reset
 prof_sample_accum_update
@@ -583,11 +587,14 @@ valgrind_make_mem_undefined
 witness_assert_lockless
 witness_assert_not_owner
 witness_assert_owner
+witness_fork_cleanup
 witness_init
 witness_lock
 witness_lock_error
 witness_lockless_error
 witness_not_owner_error
 witness_owner_error
+witness_postfork
+witness_prefork
 witness_unlock
 witnesses_cleanup
diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h
index 047bd0b7..4fe17875 100644
--- a/include/jemalloc/internal/prof.h
+++ b/include/jemalloc/internal/prof.h
@@ -316,7 +316,8 @@ bool	prof_gdump_set(tsd_t *tsd, bool active);
 void	prof_boot0(void);
 void	prof_boot1(void);
 bool	prof_boot2(tsd_t *tsd);
-void	prof_prefork(tsd_t *tsd);
+void	prof_prefork0(tsd_t *tsd);
+void	prof_prefork1(tsd_t *tsd);
 void	prof_postfork_parent(tsd_t *tsd);
 void	prof_postfork_child(tsd_t *tsd);
 void	prof_sample_threshold_update(prof_tdata_t *tdata);
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 1a1b5c32..4a99ee6e 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -544,6 +544,7 @@ struct tsd_init_head_s {
     O(tcache_enabled,		tcache_enabled_t)			\
     O(quarantine,		quarantine_t *)				\
     O(witnesses,		witness_list_t)				\
+    O(witness_fork,		bool)					\
 
 #define	TSD_INITIALIZER {						\
     tsd_state_uninitialized,						\
@@ -558,7 +559,8 @@ struct tsd_init_head_s {
     false,								\
     tcache_enabled_default,						\
     NULL,								\
-    ql_head_initializer(witnesses)					\
+    ql_head_initializer(witnesses),					\
+    false								\
 }
 
 struct tsd_s {
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 22f0b2c7..ecdc034a 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -94,6 +94,9 @@ extern witness_lockless_error_t *witness_lockless_error;
 void	witness_assert_lockless(tsd_t *tsd);
 
 void	witnesses_cleanup(tsd_t *tsd);
+void	witness_fork_cleanup(tsd_t *tsd);
+void	witness_prefork(tsd_t *tsd);
+void	witness_postfork(tsd_t *tsd);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/src/arena.c b/src/arena.c
index c6859e3b..969ad85d 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -3822,16 +3822,34 @@ arena_boot(void)
 }
 
 void
-arena_prefork(tsd_t *tsd, arena_t *arena)
+arena_prefork0(tsd_t *tsd, arena_t *arena)
+{
+
+	malloc_mutex_prefork(tsd, &arena->lock);
+}
+
+void
+arena_prefork1(tsd_t *tsd, arena_t *arena)
+{
+
+	malloc_mutex_prefork(tsd, &arena->chunks_mtx);
+}
+
+void
+arena_prefork2(tsd_t *tsd, arena_t *arena)
+{
+
+	malloc_mutex_prefork(tsd, &arena->node_cache_mtx);
+}
+
+void
+arena_prefork3(tsd_t *tsd, arena_t *arena)
 {
 	unsigned i;
 
-	malloc_mutex_prefork(tsd, &arena->lock);
-	malloc_mutex_prefork(tsd, &arena->huge_mtx);
-	malloc_mutex_prefork(tsd, &arena->chunks_mtx);
-	malloc_mutex_prefork(tsd, &arena->node_cache_mtx);
 	for (i = 0; i < NBINS; i++)
 		malloc_mutex_prefork(tsd, &arena->bins[i].lock);
+	malloc_mutex_prefork(tsd, &arena->huge_mtx);
 }
 
 void
@@ -3839,11 +3857,11 @@ arena_postfork_parent(tsd_t *tsd, arena_t *arena)
 {
 	unsigned i;
 
+	malloc_mutex_postfork_parent(tsd, &arena->huge_mtx);
 	for (i = 0; i < NBINS; i++)
 		malloc_mutex_postfork_parent(tsd, &arena->bins[i].lock);
 	malloc_mutex_postfork_parent(tsd, &arena->node_cache_mtx);
 	malloc_mutex_postfork_parent(tsd, &arena->chunks_mtx);
-	malloc_mutex_postfork_parent(tsd, &arena->huge_mtx);
 	malloc_mutex_postfork_parent(tsd, &arena->lock);
 }
 
@@ -3852,10 +3870,10 @@ arena_postfork_child(tsd_t *tsd, arena_t *arena)
 {
 	unsigned i;
 
+	malloc_mutex_postfork_child(tsd, &arena->huge_mtx);
 	for (i = 0; i < NBINS; i++)
 		malloc_mutex_postfork_child(tsd, &arena->bins[i].lock);
 	malloc_mutex_postfork_child(tsd, &arena->node_cache_mtx);
 	malloc_mutex_postfork_child(tsd, &arena->chunks_mtx);
-	malloc_mutex_postfork_child(tsd, &arena->huge_mtx);
 	malloc_mutex_postfork_child(tsd, &arena->lock);
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 8b744e68..a7acf5f7 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2757,7 +2757,8 @@ _malloc_prefork(void)
 #endif
 {
 	tsd_t *tsd;
-	unsigned i, narenas;
+	unsigned i, j, narenas;
+	arena_t *arena;
 
 #ifdef JEMALLOC_MUTEX_INIT_CB
 	if (!malloc_initialized())
@@ -2767,18 +2768,32 @@ _malloc_prefork(void)
 
 	tsd = tsd_fetch();
 
-	/* Acquire all mutexes in a safe order. */
-	ctl_prefork(tsd);
-	prof_prefork(tsd);
-	malloc_mutex_prefork(tsd, &arenas_lock);
-	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
-		arena_t *arena;
+	narenas = narenas_total_get();
 
-		if ((arena = arena_get(tsd, i, false)) != NULL)
-			arena_prefork(tsd, arena);
+	/* Acquire all mutexes in a safe order. */
+	witness_prefork(tsd);
+	ctl_prefork(tsd);
+	malloc_mutex_prefork(tsd, &arenas_lock);
+	prof_prefork0(tsd);
+	for (i = 0; i < 3; i++) {
+		for (j = 0; j < narenas; j++) {
+			if ((arena = arena_get(tsd, j, false)) != NULL) {
+				switch (i) {
+				case 0: arena_prefork0(tsd, arena); break;
+				case 1: arena_prefork1(tsd, arena); break;
+				case 2: arena_prefork2(tsd, arena); break;
+				default: not_reached();
+				}
+			}
+		}
 	}
-	chunk_prefork(tsd);
 	base_prefork(tsd);
+	chunk_prefork(tsd);
+	for (i = 0; i < narenas; i++) {
+		if ((arena = arena_get(tsd, i, false)) != NULL)
+			arena_prefork3(tsd, arena);
+	}
+	prof_prefork1(tsd);
 }
 
 #ifndef JEMALLOC_MUTEX_INIT_CB
@@ -2801,17 +2816,18 @@ _malloc_postfork(void)
 	tsd = tsd_fetch();
 
 	/* Release all mutexes, now that fork() has completed. */
-	base_postfork_parent(tsd);
 	chunk_postfork_parent(tsd);
+	base_postfork_parent(tsd);
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
 
 		if ((arena = arena_get(tsd, i, false)) != NULL)
 			arena_postfork_parent(tsd, arena);
 	}
-	malloc_mutex_postfork_parent(tsd, &arenas_lock);
 	prof_postfork_parent(tsd);
+	malloc_mutex_postfork_parent(tsd, &arenas_lock);
 	ctl_postfork_parent(tsd);
+	witness_postfork(tsd);
 }
 
 void
@@ -2825,17 +2841,18 @@ jemalloc_postfork_child(void)
 	tsd = tsd_fetch();
 
 	/* Release all mutexes, now that fork() has completed. */
-	base_postfork_child(tsd);
 	chunk_postfork_child(tsd);
+	base_postfork_child(tsd);
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
 
 		if ((arena = arena_get(tsd, i, false)) != NULL)
 			arena_postfork_child(tsd, arena);
 	}
-	malloc_mutex_postfork_child(tsd, &arenas_lock);
 	prof_postfork_child(tsd);
+	malloc_mutex_postfork_child(tsd, &arenas_lock);
 	ctl_postfork_child(tsd);
+	witness_postfork(tsd);
 }
 
 /******************************************************************************/
diff --git a/src/prof.c b/src/prof.c
index 82604632..92edba84 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -2257,20 +2257,32 @@ prof_boot2(tsd_t *tsd)
 }
 
 void
-prof_prefork(tsd_t *tsd)
+prof_prefork0(tsd_t *tsd)
 {
 
 	if (opt_prof) {
 		unsigned i;
 
-		malloc_mutex_prefork(tsd, &tdatas_mtx);
+		malloc_mutex_prefork(tsd, &prof_dump_mtx);
 		malloc_mutex_prefork(tsd, &bt2gctx_mtx);
-		malloc_mutex_prefork(tsd, &next_thr_uid_mtx);
-		malloc_mutex_prefork(tsd, &prof_dump_seq_mtx);
-		for (i = 0; i < PROF_NCTX_LOCKS; i++)
-			malloc_mutex_prefork(tsd, &gctx_locks[i]);
+		malloc_mutex_prefork(tsd, &tdatas_mtx);
 		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
 			malloc_mutex_prefork(tsd, &tdata_locks[i]);
+		for (i = 0; i < PROF_NCTX_LOCKS; i++)
+			malloc_mutex_prefork(tsd, &gctx_locks[i]);
+	}
+}
+
+void
+prof_prefork1(tsd_t *tsd)
+{
+
+	if (opt_prof) {
+		malloc_mutex_prefork(tsd, &prof_active_mtx);
+		malloc_mutex_prefork(tsd, &prof_dump_seq_mtx);
+		malloc_mutex_prefork(tsd, &prof_gdump_mtx);
+		malloc_mutex_prefork(tsd, &next_thr_uid_mtx);
+		malloc_mutex_prefork(tsd, &prof_thread_active_init_mtx);
 	}
 }
 
@@ -2281,14 +2293,18 @@ prof_postfork_parent(tsd_t *tsd)
 	if (opt_prof) {
 		unsigned i;
 
-		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
-			malloc_mutex_postfork_parent(tsd, &tdata_locks[i]);
+		malloc_mutex_postfork_parent(tsd, &prof_thread_active_init_mtx);
+		malloc_mutex_postfork_parent(tsd, &next_thr_uid_mtx);
+		malloc_mutex_postfork_parent(tsd, &prof_gdump_mtx);
+		malloc_mutex_postfork_parent(tsd, &prof_dump_seq_mtx);
+		malloc_mutex_postfork_parent(tsd, &prof_active_mtx);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++)
 			malloc_mutex_postfork_parent(tsd, &gctx_locks[i]);
-		malloc_mutex_postfork_parent(tsd, &prof_dump_seq_mtx);
-		malloc_mutex_postfork_parent(tsd, &next_thr_uid_mtx);
-		malloc_mutex_postfork_parent(tsd, &bt2gctx_mtx);
+		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
+			malloc_mutex_postfork_parent(tsd, &tdata_locks[i]);
 		malloc_mutex_postfork_parent(tsd, &tdatas_mtx);
+		malloc_mutex_postfork_parent(tsd, &bt2gctx_mtx);
+		malloc_mutex_postfork_parent(tsd, &prof_dump_mtx);
 	}
 }
 
@@ -2299,14 +2315,18 @@ prof_postfork_child(tsd_t *tsd)
 	if (opt_prof) {
 		unsigned i;
 
-		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
-			malloc_mutex_postfork_child(tsd, &tdata_locks[i]);
+		malloc_mutex_postfork_child(tsd, &prof_thread_active_init_mtx);
+		malloc_mutex_postfork_child(tsd, &next_thr_uid_mtx);
+		malloc_mutex_postfork_child(tsd, &prof_gdump_mtx);
+		malloc_mutex_postfork_child(tsd, &prof_dump_seq_mtx);
+		malloc_mutex_postfork_child(tsd, &prof_active_mtx);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++)
 			malloc_mutex_postfork_child(tsd, &gctx_locks[i]);
-		malloc_mutex_postfork_child(tsd, &prof_dump_seq_mtx);
-		malloc_mutex_postfork_child(tsd, &next_thr_uid_mtx);
-		malloc_mutex_postfork_child(tsd, &bt2gctx_mtx);
+		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
+			malloc_mutex_postfork_child(tsd, &tdata_locks[i]);
 		malloc_mutex_postfork_child(tsd, &tdatas_mtx);
+		malloc_mutex_postfork_child(tsd, &bt2gctx_mtx);
+		malloc_mutex_postfork_child(tsd, &prof_dump_mtx);
 	}
 }
 
diff --git a/src/witness.c b/src/witness.c
index 444d200f..b5384a29 100644
--- a/src/witness.c
+++ b/src/witness.c
@@ -48,9 +48,21 @@ witness_lock(tsd_t *tsd, witness_t *witness)
 
 	witnesses = tsd_witnessesp_get(tsd);
 	w = ql_last(witnesses, link);
-	if (w != NULL && w->rank >= witness->rank && (w->comp == NULL ||
-	    w->comp != witness->comp || w->comp(w, witness) > 0))
+	if (w == NULL) {
+		/* No other locks; do nothing. */
+	} else if (tsd_witness_fork_get(tsd) && w->rank <= witness->rank) {
+		/* Forking, and relaxed ranking satisfied. */
+	} else if (w->rank > witness->rank) {
+		/* Not forking, rank order reversal. */
 		witness_lock_error(witnesses, witness);
+	} else if (w->rank == witness->rank && (w->comp == NULL || w->comp !=
+	    witness->comp || w->comp(w, witness) > 0)) {
+		/*
+		 * Missing/incompatible comparison function, or comparison
+		 * function indicates rank order reversal.
+		 */
+		witness_lock_error(witnesses, witness);
+	}
 
 	ql_elm_new(witness, link);
 	ql_tail_insert(witnesses, witness, link);
@@ -194,3 +206,24 @@ witnesses_cleanup(tsd_t *tsd)
 
 	/* Do nothing. */
 }
+
+void
+witness_fork_cleanup(tsd_t *tsd)
+{
+
+	/* Do nothing. */
+}
+
+void
+witness_prefork(tsd_t *tsd)
+{
+
+	tsd_witness_fork_set(tsd, true);
+}
+
+void
+witness_postfork(tsd_t *tsd)
+{
+
+	tsd_witness_fork_set(tsd, false);
+}
diff --git a/test/unit/fork.c b/test/unit/fork.c
new file mode 100644
index 00000000..890bc869
--- /dev/null
+++ b/test/unit/fork.c
@@ -0,0 +1,39 @@
+#include "test/jemalloc_test.h"
+
+#include <sys/wait.h>
+
+TEST_BEGIN(test_fork)
+{
+	void *p;
+	pid_t pid;
+
+	p = malloc(1);
+	assert_ptr_not_null(p, "Unexpected malloc() failure");
+
+	pid = fork();
+	if (pid == -1) {
+		/* Error. */
+		test_fail("Unexpected fork() failure");
+	} else if (pid == 0) {
+		/* Child. */
+		exit(0);
+	} else {
+		int status;
+
+		/* Parent. */
+		free(p);
+		do {
+			if (waitpid(pid, &status, 0) == -1)
+				test_fail("Unexpected waitpid() failure");
+		} while (!WIFEXITED(status) && !WIFSIGNALED(status));
+	}
+}
+TEST_END
+
+int
+main(void)
+{
+
+	return (test(
+	    test_fork));
+}

From 108c4a11e96d57fd71751efa23ab986a236a0c7d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 26 Apr 2016 10:47:22 -0700
Subject: [PATCH 0211/2608] Fix witness/fork() interactions.

Fix witness to clear its list of owned mutexes in the child if
platform-specific malloc_mutex code re-initializes mutexes rather than
unlocking them.
---
 include/jemalloc/internal/private_symbols.txt |  3 ++-
 include/jemalloc/internal/witness.h           |  3 ++-
 src/jemalloc.c                                |  6 ++---
 src/witness.c                                 | 14 ++++++++++-
 test/unit/fork.c                              | 25 ++++++++++++++++---
 5 files changed, 42 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 0eb7778c..de884fcf 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -594,7 +594,8 @@ witness_lock_error
 witness_lockless_error
 witness_not_owner_error
 witness_owner_error
-witness_postfork
+witness_postfork_child
+witness_postfork_parent
 witness_prefork
 witness_unlock
 witnesses_cleanup
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index ecdc034a..b2e6e825 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -96,7 +96,8 @@ void	witness_assert_lockless(tsd_t *tsd);
 void	witnesses_cleanup(tsd_t *tsd);
 void	witness_fork_cleanup(tsd_t *tsd);
 void	witness_prefork(tsd_t *tsd);
-void	witness_postfork(tsd_t *tsd);
+void	witness_postfork_parent(tsd_t *tsd);
+void	witness_postfork_child(tsd_t *tsd);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/src/jemalloc.c b/src/jemalloc.c
index a7acf5f7..cd97ea16 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2770,8 +2770,8 @@ _malloc_prefork(void)
 
 	narenas = narenas_total_get();
 
-	/* Acquire all mutexes in a safe order. */
 	witness_prefork(tsd);
+	/* Acquire all mutexes in a safe order. */
 	ctl_prefork(tsd);
 	malloc_mutex_prefork(tsd, &arenas_lock);
 	prof_prefork0(tsd);
@@ -2815,6 +2815,7 @@ _malloc_postfork(void)
 
 	tsd = tsd_fetch();
 
+	witness_postfork_parent(tsd);
 	/* Release all mutexes, now that fork() has completed. */
 	chunk_postfork_parent(tsd);
 	base_postfork_parent(tsd);
@@ -2827,7 +2828,6 @@ _malloc_postfork(void)
 	prof_postfork_parent(tsd);
 	malloc_mutex_postfork_parent(tsd, &arenas_lock);
 	ctl_postfork_parent(tsd);
-	witness_postfork(tsd);
 }
 
 void
@@ -2840,6 +2840,7 @@ jemalloc_postfork_child(void)
 
 	tsd = tsd_fetch();
 
+	witness_postfork_child(tsd);
 	/* Release all mutexes, now that fork() has completed. */
 	chunk_postfork_child(tsd);
 	base_postfork_child(tsd);
@@ -2852,7 +2853,6 @@ jemalloc_postfork_child(void)
 	prof_postfork_child(tsd);
 	malloc_mutex_postfork_child(tsd, &arenas_lock);
 	ctl_postfork_child(tsd);
-	witness_postfork(tsd);
 }
 
 /******************************************************************************/
diff --git a/src/witness.c b/src/witness.c
index b5384a29..31c36a24 100644
--- a/src/witness.c
+++ b/src/witness.c
@@ -222,8 +222,20 @@ witness_prefork(tsd_t *tsd)
 }
 
 void
-witness_postfork(tsd_t *tsd)
+witness_postfork_parent(tsd_t *tsd)
 {
 
 	tsd_witness_fork_set(tsd, false);
 }
+
+void
+witness_postfork_child(tsd_t *tsd)
+{
+#ifndef JEMALLOC_MUTEX_INIT_CB
+	witness_list_t *witnesses;
+
+	witnesses = tsd_witnessesp_get(tsd);
+	ql_new(witnesses);
+#endif
+	tsd_witness_fork_set(tsd, false);
+}
diff --git a/test/unit/fork.c b/test/unit/fork.c
index 890bc869..d64f2e09 100644
--- a/test/unit/fork.c
+++ b/test/unit/fork.c
@@ -11,6 +11,13 @@ TEST_BEGIN(test_fork)
 	assert_ptr_not_null(p, "Unexpected malloc() failure");
 
 	pid = fork();
+
+	free(p);
+
+	p = malloc(64);
+	assert_ptr_not_null(p, "Unexpected malloc() failure");
+	free(p);
+
 	if (pid == -1) {
 		/* Error. */
 		test_fail("Unexpected fork() failure");
@@ -21,11 +28,23 @@ TEST_BEGIN(test_fork)
 		int status;
 
 		/* Parent. */
-		free(p);
-		do {
+		while (true) {
 			if (waitpid(pid, &status, 0) == -1)
 				test_fail("Unexpected waitpid() failure");
-		} while (!WIFEXITED(status) && !WIFSIGNALED(status));
+			if (WIFSIGNALED(status)) {
+				test_fail("Unexpected child termination due to "
+				    "signal %d", WTERMSIG(status));
+				break;
+			}
+			if (WIFEXITED(status)) {
+				if (WEXITSTATUS(status) != 0) {
+					test_fail(
+					    "Unexpected child exit value %d",
+					    WEXITSTATUS(status));
+				}
+				break;
+			}
+		}
 	}
 }
 TEST_END

From 9aa1543e9c1cdd8373985e16e4610fd84caafd85 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 3 May 2016 09:37:54 -0700
Subject: [PATCH 0212/2608] Update mallocx() OOM test to deal with smaller
 hugemax.

Depending on virtual memory resource limits, it is necessary to attempt
allocating three maximally sized objects to trigger OOM rather than just
two, since the maximum supported size is slightly less than half the
total virtual memory address space.

This fixes a test failure that was introduced by
0c516a00c4cb28cff55ce0995f756b5aae074c9e (Make *allocx() size class
overflow behavior defined.).

This resolves #379.
---
 test/integration/mallocx.c | 29 +++++++++++++++++++----------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index d82bf422..578c229a 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -69,19 +69,28 @@ TEST_END
 
 TEST_BEGIN(test_oom)
 {
+	size_t hugemax;
+	bool oom;
+	void *ptrs[3];
+	unsigned i;
 
 	/*
-	 * It should be impossible to allocate two objects that each consume
-	 * more than half the virtual address space.
+	 * It should be impossible to allocate three objects that each consume
+	 * nearly half the virtual address space.
 	 */
-	{
-		size_t hugemax = get_huge_size(get_nhuge()-1);
-		void *p = mallocx(hugemax, 0);
-		if (p != NULL) {
-			assert_ptr_null(mallocx(hugemax, 0),
-			    "Expected OOM for mallocx(size=%#zx, 0)", hugemax);
-			dallocx(p, 0);
-		}
+	hugemax = get_huge_size(get_nhuge()-1);
+	oom = false;
+	for (i = 0; i < sizeof(ptrs) / sizeof(void *); i++) {
+		ptrs[i] = mallocx(hugemax, 0);
+		if (ptrs[i] == NULL)
+			oom = true;
+	}
+	assert_true(oom,
+	    "Expected OOM during series of calls to mallocx(size=%zu, 0)",
+	    hugemax);
+	for (i = 0; i < sizeof(ptrs) / sizeof(void *); i++) {
+		if (ptrs[i] != NULL)
+			dallocx(ptrs[i], 0);
 	}
 
 #if LG_SIZEOF_PTR == 3

From 61111fe239348c6eabc5184d9a1217dc33a07662 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Fri, 26 Feb 2016 17:53:13 -0800
Subject: [PATCH 0213/2608] Remove errno overrides.

---
 include/msvc_compat/windows_extra.h | 22 +---------------------
 1 file changed, 1 insertion(+), 21 deletions(-)

diff --git a/include/msvc_compat/windows_extra.h b/include/msvc_compat/windows_extra.h
index 0c5e323f..3008faa3 100644
--- a/include/msvc_compat/windows_extra.h
+++ b/include/msvc_compat/windows_extra.h
@@ -1,26 +1,6 @@
 #ifndef MSVC_COMPAT_WINDOWS_EXTRA_H
 #define	MSVC_COMPAT_WINDOWS_EXTRA_H
 
-#ifndef ENOENT
-#  define ENOENT ERROR_PATH_NOT_FOUND
-#endif
-#ifndef EINVAL
-#  define EINVAL ERROR_BAD_ARGUMENTS
-#endif
-#ifndef EAGAIN
-#  define EAGAIN ERROR_OUTOFMEMORY
-#endif
-#ifndef EPERM
-#  define EPERM  ERROR_WRITE_FAULT
-#endif
-#ifndef EFAULT
-#  define EFAULT ERROR_INVALID_ADDRESS
-#endif
-#ifndef ENOMEM
-#  define ENOMEM ERROR_NOT_ENOUGH_MEMORY
-#endif
-#ifndef ERANGE
-#  define ERANGE ERROR_INVALID_DATA
-#endif
+#include <errno.h>
 
 #endif /* MSVC_COMPAT_WINDOWS_EXTRA_H */

From 7798c7ac1d0b165861433cc1fb045c6a9dc88f69 Mon Sep 17 00:00:00 2001
From: rustyx <me@rustyx.org>
Date: Mon, 29 Feb 2016 21:04:29 +0100
Subject: [PATCH 0214/2608] Fix MSVC project and improve MSVC lib naming (v140
 -> vc140)

---
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj |  8 ++++++--
 .../vc2015/jemalloc/jemalloc.vcxproj.filters   | 18 +++++++++++++++---
 .../vc2015/test_threads/test_threads.vcxproj   |  4 ++--
 3 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index d8ad505b..f3f0260b 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -54,6 +54,7 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal_macros.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\mb.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\mutex.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\nstime.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\pages.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\private_namespace.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\private_unnamespace.h" />
@@ -69,6 +70,7 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\size_classes.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\stats.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\tcache.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\ticker.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\tsd.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\util.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\valgrind.h" />
@@ -103,11 +105,13 @@
     <ClCompile Include="..\..\..\..\src\mutex.c" />
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
+    <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\quarantine.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\stats.c" />
     <ClCompile Include="..\..\..\..\src\tcache.c" />
+    <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
     <ClCompile Include="..\..\..\..\src\util.c" />
   </ItemGroup>
@@ -227,7 +231,7 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
     <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
     <IntDir>$(Platform)\$(Configuration)\</IntDir>
-    <TargetName>$(ProjectName)-$(PlatformToolset)-$(Configuration)</TargetName>
+    <TargetName>$(ProjectName)-vc$(PlatformToolsetVersion)-$(Configuration)</TargetName>
   </PropertyGroup>
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
     <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
@@ -236,7 +240,7 @@
   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
     <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
     <IntDir>$(Platform)\$(Configuration)\</IntDir>
-    <TargetName>$(ProjectName)-$(PlatformToolset)-$(Configuration)</TargetName>
+    <TargetName>$(ProjectName)-vc$(PlatformToolsetVersion)-$(Configuration)</TargetName>
   </PropertyGroup>
   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
     <ClCompile>
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 89a51f76..ce70632b 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -101,6 +101,9 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\mutex.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\nstime.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\pages.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
@@ -146,6 +149,9 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\tcache.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\ticker.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\tsd.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
@@ -214,9 +220,15 @@
     <ClCompile Include="..\..\..\..\src\mutex.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\nstime.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prng.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\prof.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -232,14 +244,14 @@
     <ClCompile Include="..\..\..\..\src\tcache.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ticker.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\tsd.c">
       <Filter>Source Files</Filter>
     </ClCompile>
     <ClCompile Include="..\..\..\..\src\util.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\nstime.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
   </ItemGroup>
 </Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2015/test_threads/test_threads.vcxproj b/msvc/projects/vc2015/test_threads/test_threads.vcxproj
index b681e71e..f5e9898f 100644
--- a/msvc/projects/vc2015/test_threads/test_threads.vcxproj
+++ b/msvc/projects/vc2015/test_threads/test_threads.vcxproj
@@ -223,7 +223,7 @@
     <Link>
       <SubSystem>Console</SubSystem>
       <GenerateDebugInformation>true</GenerateDebugInformation>
-      <AdditionalDependencies>jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>jemalloc-vc$(PlatformToolsetVersion)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
       <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
     </Link>
   </ItemDefinitionGroup>
@@ -306,7 +306,7 @@
       <EnableCOMDATFolding>true</EnableCOMDATFolding>
       <OptimizeReferences>true</OptimizeReferences>
       <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
-      <AdditionalDependencies>jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalDependencies>jemalloc-vc$(PlatformToolsetVersion)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
     </Link>
   </ItemDefinitionGroup>
   <ItemGroup>

From c3ab90483fc78a744a07f9d5e77888454e4c5f99 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Mon, 29 Feb 2016 14:30:19 -0800
Subject: [PATCH 0215/2608] Fix stack corruption and uninitialized var warning

Stack corruption happens in x64 bit

This resolves #347.
---
 src/arena.c      |  2 +-
 test/unit/hash.c | 13 +++++++------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 99e20fde..965c0fe2 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2423,7 +2423,7 @@ arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
 	uintptr_t random_offset;
 	arena_run_t *run;
 	arena_chunk_map_misc_t *miscelm;
-	UNUSED bool idump;
+	UNUSED bool idump JEMALLOC_CC_SILENCE_INIT(false);
 
 	/* Large allocation. */
 	usize = index2size(binind);
diff --git a/test/unit/hash.c b/test/unit/hash.c
index f50ba81b..010c9d76 100644
--- a/test/unit/hash.c
+++ b/test/unit/hash.c
@@ -64,14 +64,15 @@ static void
 hash_variant_verify_key(hash_variant_t variant, uint8_t *key)
 {
 	const int hashbytes = hash_variant_bits(variant) / 8;
-	VARIABLE_ARRAY(uint8_t, hashes, hashbytes * 256);
+	const int hashes_size = hashbytes * 256;
+	VARIABLE_ARRAY(uint8_t, hashes, hashes_size);
 	VARIABLE_ARRAY(uint8_t, final, hashbytes);
 	unsigned i;
 	uint32_t computed, expected;
 
 	memset(key, 0, KEY_SIZE);
-	memset(hashes, 0, sizeof(hashes));
-	memset(final, 0, sizeof(final));
+	memset(hashes, 0, hashes_size);
+	memset(final, 0, hashbytes);
 
 	/*
 	 * Hash keys of the form {0}, {0,1}, {0,1,2}, ..., {0,1,...,255} as the
@@ -102,17 +103,17 @@ hash_variant_verify_key(hash_variant_t variant, uint8_t *key)
 	/* Hash the result array. */
 	switch (variant) {
 	case hash_variant_x86_32: {
-		uint32_t out = hash_x86_32(hashes, hashbytes*256, 0);
+		uint32_t out = hash_x86_32(hashes, hashes_size, 0);
 		memcpy(final, &out, sizeof(out));
 		break;
 	} case hash_variant_x86_128: {
 		uint64_t out[2];
-		hash_x86_128(hashes, hashbytes*256, 0, out);
+		hash_x86_128(hashes, hashes_size, 0, out);
 		memcpy(final, out, sizeof(out));
 		break;
 	} case hash_variant_x64_128: {
 		uint64_t out[2];
-		hash_x64_128(hashes, hashbytes*256, 0, out);
+		hash_x64_128(hashes, hashes_size, 0, out);
 		memcpy(final, out, sizeof(out));
 		break;
 	} default: not_reached();

From b40253a93ec4eb79c536403491f326bb56f72c02 Mon Sep 17 00:00:00 2001
From: Rajeev Misra <rk_misra@hotmail.com>
Date: Thu, 10 Mar 2016 22:49:05 -0800
Subject: [PATCH 0216/2608] typecast address to pointer to byte to avoid
 unaligned memory access error

---
 include/jemalloc/internal/hash.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h
index 864fda81..1ff2d9a0 100644
--- a/include/jemalloc/internal/hash.h
+++ b/include/jemalloc/internal/hash.h
@@ -53,7 +53,7 @@ hash_get_block_32(const uint32_t *p, int i)
 	if (unlikely((uintptr_t)p & (sizeof(uint32_t)-1)) != 0) {
 		uint32_t ret;
 
-		memcpy(&ret, &p[i], sizeof(uint32_t));
+		memcpy(&ret, (uint8_t *)(p + i), sizeof(uint32_t));
 		return (ret);
 	}
 
@@ -68,7 +68,7 @@ hash_get_block_64(const uint64_t *p, int i)
 	if (unlikely((uintptr_t)p & (sizeof(uint64_t)-1)) != 0) {
 		uint64_t ret;
 
-		memcpy(&ret, &p[i], sizeof(uint64_t));
+		memcpy(&ret, (uint8_t *)(p + i), sizeof(uint64_t));
 		return (ret);
 	}
 

From 8d8960f635c63b918ac54e0d1005854ed7a2692b Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Wed, 30 Mar 2016 18:36:04 -0700
Subject: [PATCH 0217/2608] Fix potential chunk leaks.

Move chunk_dalloc_arena()'s implementation into chunk_dalloc_wrapper(),
so that if the dalloc hook fails, proper decommit/purge/retain cascading
occurs.  This fixes three potential chunk leaks on OOM paths, one during
dss-based chunk allocation, one during chunk header commit (currently
relevant only on Windows), and one during rtree write (e.g. if rtree
node allocation fails).

Merge chunk_purge_arena() into chunk_purge_default() (refactor, no
change to functionality).
---
 include/jemalloc/internal/chunk.h             |  6 +--
 include/jemalloc/internal/private_symbols.txt |  2 -
 src/arena.c                                   | 16 +++---
 src/chunk.c                                   | 51 ++++++-------------
 src/chunk_dss.c                               |  2 +-
 5 files changed, 26 insertions(+), 51 deletions(-)

diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index 5d193835..d800478d 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -62,12 +62,8 @@ void	*chunk_alloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks,
     void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit);
 void	chunk_dalloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks,
     void *chunk, size_t size, bool committed);
-void	chunk_dalloc_arena(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *chunk, size_t size, bool zeroed, bool committed);
 void	chunk_dalloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *chunk, size_t size, bool committed);
-bool	chunk_purge_arena(arena_t *arena, void *chunk, size_t offset,
-    size_t length);
+    void *chunk, size_t size, bool zeroed, bool committed);
 bool	chunk_purge_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks,
     void *chunk, size_t size, size_t offset, size_t length);
 bool	chunk_boot(void);
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 5880996a..5fcc6692 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -148,7 +148,6 @@ chunk_alloc_dss
 chunk_alloc_mmap
 chunk_alloc_wrapper
 chunk_boot
-chunk_dalloc_arena
 chunk_dalloc_cache
 chunk_dalloc_mmap
 chunk_dalloc_wrapper
@@ -168,7 +167,6 @@ chunk_npages
 chunk_postfork_child
 chunk_postfork_parent
 chunk_prefork
-chunk_purge_arena
 chunk_purge_wrapper
 chunk_register
 chunks_rtree
diff --git a/src/arena.c b/src/arena.c
index 965c0fe2..b7645d8b 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -617,8 +617,8 @@ arena_chunk_alloc_internal_hard(arena_t *arena, chunk_hooks_t *chunk_hooks,
 		/* Commit header. */
 		if (chunk_hooks->commit(chunk, chunksize, 0, map_bias <<
 		    LG_PAGE, arena->ind)) {
-			chunk_dalloc_wrapper(arena, chunk_hooks,
-			    (void *)chunk, chunksize, *commit);
+			chunk_dalloc_wrapper(arena, chunk_hooks, (void *)chunk,
+			    chunksize, *zero, *commit);
 			chunk = NULL;
 		}
 	}
@@ -629,7 +629,7 @@ arena_chunk_alloc_internal_hard(arena_t *arena, chunk_hooks_t *chunk_hooks,
 			    LG_PAGE, arena->ind);
 		}
 		chunk_dalloc_wrapper(arena, chunk_hooks, (void *)chunk,
-		    chunksize, *commit);
+		    chunksize, *zero, *commit);
 		chunk = NULL;
 	}
 
@@ -1024,7 +1024,7 @@ arena_chunk_ralloc_huge_expand_hard(arena_t *arena, chunk_hooks_t *chunk_hooks,
 		malloc_mutex_unlock(&arena->lock);
 	} else if (chunk_hooks->merge(chunk, CHUNK_CEILING(oldsize), nchunk,
 	    cdiff, true, arena->ind)) {
-		chunk_dalloc_arena(arena, chunk_hooks, nchunk, cdiff, *zero,
+		chunk_dalloc_wrapper(arena, chunk_hooks, nchunk, cdiff, *zero,
 		    true);
 		err = true;
 	}
@@ -1050,8 +1050,8 @@ arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, size_t oldsize,
 	}
 	arena_nactive_add(arena, udiff >> LG_PAGE);
 
-	err = (chunk_alloc_cache(arena, &arena->chunk_hooks, nchunk, cdiff,
-	    chunksize, zero, true) == NULL);
+	err = (chunk_alloc_cache(arena, &chunk_hooks, nchunk, cdiff, chunksize,
+	    zero, true) == NULL);
 	malloc_mutex_unlock(&arena->lock);
 	if (err) {
 		err = arena_chunk_ralloc_huge_expand_hard(arena, &chunk_hooks,
@@ -1059,7 +1059,7 @@ arena_chunk_ralloc_huge_expand(arena_t *arena, void *chunk, size_t oldsize,
 		    cdiff);
 	} else if (chunk_hooks.merge(chunk, CHUNK_CEILING(oldsize), nchunk,
 	    cdiff, true, arena->ind)) {
-		chunk_dalloc_arena(arena, &chunk_hooks, nchunk, cdiff, *zero,
+		chunk_dalloc_wrapper(arena, &chunk_hooks, nchunk, cdiff, *zero,
 		    true);
 		err = true;
 	}
@@ -1707,7 +1707,7 @@ arena_unstash_purged(arena_t *arena, chunk_hooks_t *chunk_hooks,
 			extent_node_dirty_remove(chunkselm);
 			arena_node_dalloc(arena, chunkselm);
 			chunkselm = chunkselm_next;
-			chunk_dalloc_arena(arena, chunk_hooks, addr, size,
+			chunk_dalloc_wrapper(arena, chunk_hooks, addr, size,
 			    zeroed, committed);
 		} else {
 			arena_chunk_t *chunk =
diff --git a/src/chunk.c b/src/chunk.c
index b179d213..304d4e5a 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -425,8 +425,8 @@ chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero,
 	arena_t *arena;
 
 	arena = chunk_arena_get(arena_ind);
-	ret = chunk_alloc_core(arena, new_addr, size, alignment, zero,
-	    commit, arena->dss_prec);
+	ret = chunk_alloc_core(arena, new_addr, size, alignment, zero, commit,
+	    arena->dss_prec);
 	if (ret == NULL)
 		return (NULL);
 	if (config_valgrind)
@@ -579,8 +579,18 @@ chunk_dalloc_cache(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
 	arena_maybe_purge(arena);
 }
 
+static bool
+chunk_dalloc_default(void *chunk, size_t size, bool committed,
+    unsigned arena_ind)
+{
+
+	if (!have_dss || !chunk_in_dss(chunk))
+		return (chunk_dalloc_mmap(chunk, size));
+	return (true);
+}
+
 void
-chunk_dalloc_arena(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
+chunk_dalloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
     size_t size, bool zeroed, bool committed)
 {
 
@@ -604,27 +614,6 @@ chunk_dalloc_arena(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
 	    &arena->chunks_ad_retained, false, chunk, size, zeroed, committed);
 }
 
-static bool
-chunk_dalloc_default(void *chunk, size_t size, bool committed,
-    unsigned arena_ind)
-{
-
-	if (!have_dss || !chunk_in_dss(chunk))
-		return (chunk_dalloc_mmap(chunk, size));
-	return (true);
-}
-
-void
-chunk_dalloc_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
-    size_t size, bool committed)
-{
-
-	chunk_hooks_assure_initialized(arena, chunk_hooks);
-	chunk_hooks->dalloc(chunk, size, committed, arena->ind);
-	if (config_valgrind && chunk_hooks->dalloc != chunk_dalloc_default)
-		JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(chunk, size);
-}
-
 static bool
 chunk_commit_default(void *chunk, size_t size, size_t offset, size_t length,
     unsigned arena_ind)
@@ -643,8 +632,9 @@ chunk_decommit_default(void *chunk, size_t size, size_t offset, size_t length,
 	    length));
 }
 
-bool
-chunk_purge_arena(arena_t *arena, void *chunk, size_t offset, size_t length)
+static bool
+chunk_purge_default(void *chunk, size_t size, size_t offset, size_t length,
+    unsigned arena_ind)
 {
 
 	assert(chunk != NULL);
@@ -657,15 +647,6 @@ chunk_purge_arena(arena_t *arena, void *chunk, size_t offset, size_t length)
 	    length));
 }
 
-static bool
-chunk_purge_default(void *chunk, size_t size, size_t offset, size_t length,
-    unsigned arena_ind)
-{
-
-	return (chunk_purge_arena(chunk_arena_get(arena_ind), chunk, offset,
-	    length));
-}
-
 bool
 chunk_purge_wrapper(arena_t *arena, chunk_hooks_t *chunk_hooks, void *chunk,
     size_t size, size_t offset, size_t length)
diff --git a/src/chunk_dss.c b/src/chunk_dss.c
index 61fc9169..943d0e98 100644
--- a/src/chunk_dss.c
+++ b/src/chunk_dss.c
@@ -136,7 +136,7 @@ chunk_alloc_dss(arena_t *arena, void *new_addr, size_t size, size_t alignment,
 					    CHUNK_HOOKS_INITIALIZER;
 					chunk_dalloc_wrapper(arena,
 					    &chunk_hooks, cpad, cpad_size,
-					    true);
+					    false, true);
 				}
 				if (*zero) {
 					JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(

From 8c83c021b0c2180d21c79a00c45c41ba8d7f4eee Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 6 Apr 2016 10:32:06 -0700
Subject: [PATCH 0218/2608] Fix bitmap_sfu() regression.

Fix bitmap_sfu() to shift by LG_BITMAP_GROUP_NBITS rather than
hard-coded 6 when using linear (non-USE_TREE) bitmap search.  In
practice this affects only 64-bit systems for which sizeof(long) is not
8 (i.e. Windows), since USE_TREE is defined for 32-bit systems.

This regression was caused by b8823ab02607d6f03febd32ac504bb6188c54047
(Use linear scan for small bitmaps).

This resolves #368.
---
 include/jemalloc/internal/bitmap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h
index 2594e3a4..0e0d2476 100644
--- a/include/jemalloc/internal/bitmap.h
+++ b/include/jemalloc/internal/bitmap.h
@@ -223,7 +223,7 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo)
 		i++;
 		g = bitmap[i];
 	}
-	bit = (bit - 1) + (i << 6);
+	bit = (bit - 1) + (i << LG_BITMAP_GROUP_NBITS);
 #endif
 	bitmap_set(bitmap, binfo, bit);
 	return (bit);

From d65db0e402374a53dcd181a09aff299aece35ff1 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 11 Apr 2016 18:47:18 -0700
Subject: [PATCH 0219/2608] Fix malloc_stats_print() to print correct
 opt.narenas value.

This regression was caused by 8f683b94a751c65af8f9fa25970ccf2917b96bb8
(Make opt_narenas unsigned rather than size_t.).
---
 src/stats.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/stats.c b/src/stats.c
index a7249479..87b09e58 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -468,7 +468,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 #define	OPT_WRITE_UNSIGNED(n)						\
 		if (je_mallctl("opt."#n, &uv, &usz, NULL, 0) == 0) {	\
 			malloc_cprintf(write_cb, cbopaque,		\
-			"  opt."#n": %zu\n", sv);			\
+			"  opt."#n": %u\n", uv);			\
 		}
 #define	OPT_WRITE_SIZE_T(n)						\
 		if (je_mallctl("opt."#n, &sv, &ssz, NULL, 0) == 0) {	\

From c3b008ec39bf585545db4f7ff96990b851957792 Mon Sep 17 00:00:00 2001
From: hitstergtd <hitstergtd@users.noreply.github.com>
Date: Mon, 18 Apr 2016 11:08:39 +0100
Subject: [PATCH 0220/2608] Doc typo fixes.

---
 doc/jemalloc.xml.in | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index bc5dbd1d..88b003a7 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1016,7 +1016,7 @@ for (i = 0; i < nbins; i++) {
         allocate memory during application initialization and then deadlock
         internally when jemalloc in turn calls
         <function>atexit<parameter/></function>, so this option is not
-        univerally usable (though the application can register its own
+        universally usable (though the application can register its own
         <function>atexit<parameter/></function> function with equivalent
         functionality).  Therefore, this option should only be used with care;
         it is primarily intended as a performance tuning aid during application
@@ -1320,7 +1320,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         option.  Note that <function>atexit<parameter/></function> may allocate
         memory during application initialization and then deadlock internally
         when jemalloc in turn calls <function>atexit<parameter/></function>, so
-        this option is not univerally usable (though the application can
+        this option is not universally usable (though the application can
         register its own <function>atexit<parameter/></function> function with
         equivalent functionality).  This option is disabled by
         default.</para></listitem>
@@ -2062,7 +2062,7 @@ typedef struct {
           [<option>--enable-prof</option>]
         </term>
         <listitem><para>Average number of bytes allocated between
-        inverval-based profile dumps.  See the
+        interval-based profile dumps.  See the
         <link
         linkend="opt.lg_prof_interval"><mallctl>opt.lg_prof_interval</mallctl></link>
         option for additional information.</para></listitem>

From de35328a101f18adc474a7c2d476f963fa02764b Mon Sep 17 00:00:00 2001
From: Jason Evans <je@fb.com>
Date: Mon, 25 Apr 2016 20:26:03 -0700
Subject: [PATCH 0221/2608] Use separate arena for chunk tests.

This assures that side effects of internal allocation don't impact
tests.
---
 test/integration/chunk.c | 73 +++++++++++++++++++++++++---------------
 1 file changed, 45 insertions(+), 28 deletions(-)

diff --git a/test/integration/chunk.c b/test/integration/chunk.c
index af1c9a53..ff9bf967 100644
--- a/test/integration/chunk.c
+++ b/test/integration/chunk.c
@@ -121,6 +121,10 @@ TEST_BEGIN(test_chunk)
 {
 	void *p;
 	size_t old_size, new_size, large0, large1, huge0, huge1, huge2, sz;
+	unsigned arena_ind;
+	int flags;
+	size_t hooks_mib[3], purge_mib[3];
+	size_t hooks_miblen, purge_miblen;
 	chunk_hooks_t new_hooks = {
 		chunk_alloc,
 		chunk_dalloc,
@@ -132,10 +136,19 @@ TEST_BEGIN(test_chunk)
 	};
 	bool xallocx_success_a, xallocx_success_b, xallocx_success_c;
 
+	sz = sizeof(unsigned);
+	assert_d_eq(mallctl("arenas.extend", &arena_ind, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+	flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+
 	/* Install custom chunk hooks. */
+	hooks_miblen = sizeof(hooks_mib)/sizeof(size_t);
+	assert_d_eq(mallctlnametomib("arena.0.chunk_hooks", hooks_mib,
+	    &hooks_miblen), 0, "Unexpected mallctlnametomib() failure");
+	hooks_mib[1] = (size_t)arena_ind;
 	old_size = sizeof(chunk_hooks_t);
 	new_size = sizeof(chunk_hooks_t);
-	assert_d_eq(mallctl("arena.0.chunk_hooks", &old_hooks, &old_size,
+	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, &old_hooks, &old_size,
 	    &new_hooks, new_size), 0, "Unexpected chunk_hooks error");
 	orig_hooks = old_hooks;
 	assert_ptr_ne(old_hooks.alloc, chunk_alloc, "Unexpected alloc error");
@@ -165,45 +178,49 @@ TEST_BEGIN(test_chunk)
 	    "Unexpected arenas.hchunk.2.size failure");
 
 	/* Test dalloc/decommit/purge cascade. */
+	purge_miblen = sizeof(purge_mib)/sizeof(size_t);
+	assert_d_eq(mallctlnametomib("arena.0.purge", purge_mib, &purge_miblen),
+	    0, "Unexpected mallctlnametomib() failure");
+	purge_mib[1] = (size_t)arena_ind;
 	do_dalloc = false;
 	do_decommit = false;
-	p = mallocx(huge0 * 2, 0);
+	p = mallocx(huge0 * 2, flags);
 	assert_ptr_not_null(p, "Unexpected mallocx() error");
 	did_dalloc = false;
 	did_decommit = false;
 	did_purge = false;
 	did_split = false;
-	xallocx_success_a = (xallocx(p, huge0, 0, 0) == huge0);
-	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
-	    "Unexpected arena.0.purge error");
+	xallocx_success_a = (xallocx(p, huge0, 0, flags) == huge0);
+	assert_d_eq(mallctlbymib(purge_mib, purge_miblen, NULL, NULL, NULL, 0),
+	    0, "Unexpected arena.%u.purge error", arena_ind);
 	if (xallocx_success_a) {
 		assert_true(did_dalloc, "Expected dalloc");
 		assert_false(did_decommit, "Unexpected decommit");
 		assert_true(did_purge, "Expected purge");
 	}
 	assert_true(did_split, "Expected split");
-	dallocx(p, 0);
+	dallocx(p, flags);
 	do_dalloc = true;
 
 	/* Test decommit/commit and observe split/merge. */
 	do_dalloc = false;
 	do_decommit = true;
-	p = mallocx(huge0 * 2, 0);
+	p = mallocx(huge0 * 2, flags);
 	assert_ptr_not_null(p, "Unexpected mallocx() error");
 	did_decommit = false;
 	did_commit = false;
 	did_split = false;
 	did_merge = false;
-	xallocx_success_b = (xallocx(p, huge0, 0, 0) == huge0);
-	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
-	    "Unexpected arena.0.purge error");
+	xallocx_success_b = (xallocx(p, huge0, 0, flags) == huge0);
+	assert_d_eq(mallctlbymib(purge_mib, purge_miblen, NULL, NULL, NULL, 0),
+	    0, "Unexpected arena.%u.purge error", arena_ind);
 	if (xallocx_success_b)
 		assert_true(did_split, "Expected split");
-	xallocx_success_c = (xallocx(p, huge0 * 2, 0, 0) == huge0 * 2);
+	xallocx_success_c = (xallocx(p, huge0 * 2, 0, flags) == huge0 * 2);
 	assert_b_eq(did_decommit, did_commit, "Expected decommit/commit match");
 	if (xallocx_success_b && xallocx_success_c)
 		assert_true(did_merge, "Expected merge");
-	dallocx(p, 0);
+	dallocx(p, flags);
 	do_dalloc = true;
 	do_decommit = false;
 
@@ -214,42 +231,42 @@ TEST_BEGIN(test_chunk)
 		 * successful xallocx() from size=huge2 to size=huge1 is
 		 * guaranteed to leave trailing purgeable memory.
 		 */
-		p = mallocx(huge2, 0);
+		p = mallocx(huge2, flags);
 		assert_ptr_not_null(p, "Unexpected mallocx() error");
 		did_purge = false;
-		assert_zu_eq(xallocx(p, huge1, 0, 0), huge1,
+		assert_zu_eq(xallocx(p, huge1, 0, flags), huge1,
 		    "Unexpected xallocx() failure");
 		assert_true(did_purge, "Expected purge");
-		dallocx(p, 0);
+		dallocx(p, flags);
 	}
 
 	/* Test decommit for large allocations. */
 	do_decommit = true;
-	p = mallocx(large1, 0);
+	p = mallocx(large1, flags);
 	assert_ptr_not_null(p, "Unexpected mallocx() error");
-	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
-	    "Unexpected arena.0.purge error");
+	assert_d_eq(mallctlbymib(purge_mib, purge_miblen, NULL, NULL, NULL, 0),
+	    0, "Unexpected arena.%u.purge error", arena_ind);
 	did_decommit = false;
-	assert_zu_eq(xallocx(p, large0, 0, 0), large0,
+	assert_zu_eq(xallocx(p, large0, 0, flags), large0,
 	    "Unexpected xallocx() failure");
-	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
-	    "Unexpected arena.0.purge error");
+	assert_d_eq(mallctlbymib(purge_mib, purge_miblen, NULL, NULL, NULL, 0),
+	    0, "Unexpected arena.%u.purge error", arena_ind);
 	did_commit = false;
-	assert_zu_eq(xallocx(p, large1, 0, 0), large1,
+	assert_zu_eq(xallocx(p, large1, 0, flags), large1,
 	    "Unexpected xallocx() failure");
 	assert_b_eq(did_decommit, did_commit, "Expected decommit/commit match");
-	dallocx(p, 0);
+	dallocx(p, flags);
 	do_decommit = false;
 
 	/* Make sure non-huge allocation succeeds. */
-	p = mallocx(42, 0);
+	p = mallocx(42, flags);
 	assert_ptr_not_null(p, "Unexpected mallocx() error");
-	dallocx(p, 0);
+	dallocx(p, flags);
 
 	/* Restore chunk hooks. */
-	assert_d_eq(mallctl("arena.0.chunk_hooks", NULL, NULL, &old_hooks,
-	    new_size), 0, "Unexpected chunk_hooks error");
-	assert_d_eq(mallctl("arena.0.chunk_hooks", &old_hooks, &old_size,
+	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, NULL, NULL,
+	    &old_hooks, new_size), 0, "Unexpected chunk_hooks error");
+	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, &old_hooks, &old_size,
 	    NULL, 0), 0, "Unexpected chunk_hooks error");
 	assert_ptr_eq(old_hooks.alloc, orig_hooks.alloc,
 	    "Unexpected alloc error");

From 2687a720870edeae4c8dae71a82c98a7509fcc8e Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 25 Apr 2016 23:14:40 -0700
Subject: [PATCH 0222/2608] Fix fork()-related lock rank ordering reversals.

---
 Makefile.in                                   |  1 +
 include/jemalloc/internal/arena.h             |  5 +-
 include/jemalloc/internal/private_symbols.txt |  8 ++-
 include/jemalloc/internal/prof.h              |  3 +-
 src/arena.c                                   | 32 +++++++++---
 src/jemalloc.c                                | 38 +++++++++-----
 src/prof.c                                    | 52 +++++++++++++------
 test/unit/fork.c                              | 39 ++++++++++++++
 8 files changed, 139 insertions(+), 39 deletions(-)
 create mode 100644 test/unit/fork.c

diff --git a/Makefile.in b/Makefile.in
index f60823f5..4b0e184f 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -138,6 +138,7 @@ TESTS_UNIT := $(srcroot)test/unit/atomic.c \
 	$(srcroot)test/unit/bitmap.c \
 	$(srcroot)test/unit/ckh.c \
 	$(srcroot)test/unit/decay.c \
+	$(srcroot)test/unit/fork.c \
 	$(srcroot)test/unit/hash.c \
 	$(srcroot)test/unit/junk.c \
 	$(srcroot)test/unit/junk_alloc.c \
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 3519873c..42a78960 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -584,7 +584,10 @@ void	arena_nthreads_inc(arena_t *arena);
 void	arena_nthreads_dec(arena_t *arena);
 arena_t	*arena_new(unsigned ind);
 bool	arena_boot(void);
-void	arena_prefork(arena_t *arena);
+void	arena_prefork0(arena_t *arena);
+void	arena_prefork1(arena_t *arena);
+void	arena_prefork2(arena_t *arena);
+void	arena_prefork3(arena_t *arena);
 void	arena_postfork_parent(arena_t *arena);
 void	arena_postfork_child(arena_t *arena);
 
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 5fcc6692..fafee81f 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -81,7 +81,10 @@ arena_nthreads_inc
 arena_palloc
 arena_postfork_child
 arena_postfork_parent
-arena_prefork
+arena_prefork0
+arena_prefork1
+arena_prefork2
+arena_prefork3
 arena_prof_accum
 arena_prof_accum_impl
 arena_prof_accum_locked
@@ -408,7 +411,8 @@ prof_malloc_sample_object
 prof_mdump
 prof_postfork_child
 prof_postfork_parent
-prof_prefork
+prof_prefork0
+prof_prefork1
 prof_realloc
 prof_reset
 prof_sample_accum_update
diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h
index a25502a9..48dd6cc5 100644
--- a/include/jemalloc/internal/prof.h
+++ b/include/jemalloc/internal/prof.h
@@ -316,7 +316,8 @@ bool	prof_gdump_set(bool active);
 void	prof_boot0(void);
 void	prof_boot1(void);
 bool	prof_boot2(void);
-void	prof_prefork(void);
+void	prof_prefork0(void);
+void	prof_prefork1(void);
 void	prof_postfork_parent(void);
 void	prof_postfork_child(void);
 void	prof_sample_threshold_update(prof_tdata_t *tdata);
diff --git a/src/arena.c b/src/arena.c
index b7645d8b..48e9b20b 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -3646,16 +3646,34 @@ arena_boot(void)
 }
 
 void
-arena_prefork(arena_t *arena)
+arena_prefork0(arena_t *arena)
+{
+
+	malloc_mutex_prefork(&arena->lock);
+}
+
+void
+arena_prefork1(arena_t *arena)
+{
+
+	malloc_mutex_prefork(&arena->chunks_mtx);
+}
+
+void
+arena_prefork2(arena_t *arena)
+{
+
+	malloc_mutex_prefork(&arena->node_cache_mtx);
+}
+
+void
+arena_prefork3(arena_t *arena)
 {
 	unsigned i;
 
-	malloc_mutex_prefork(&arena->lock);
-	malloc_mutex_prefork(&arena->huge_mtx);
-	malloc_mutex_prefork(&arena->chunks_mtx);
-	malloc_mutex_prefork(&arena->node_cache_mtx);
 	for (i = 0; i < NBINS; i++)
 		malloc_mutex_prefork(&arena->bins[i].lock);
+	malloc_mutex_prefork(&arena->huge_mtx);
 }
 
 void
@@ -3663,11 +3681,11 @@ arena_postfork_parent(arena_t *arena)
 {
 	unsigned i;
 
+	malloc_mutex_postfork_parent(&arena->huge_mtx);
 	for (i = 0; i < NBINS; i++)
 		malloc_mutex_postfork_parent(&arena->bins[i].lock);
 	malloc_mutex_postfork_parent(&arena->node_cache_mtx);
 	malloc_mutex_postfork_parent(&arena->chunks_mtx);
-	malloc_mutex_postfork_parent(&arena->huge_mtx);
 	malloc_mutex_postfork_parent(&arena->lock);
 }
 
@@ -3676,10 +3694,10 @@ arena_postfork_child(arena_t *arena)
 {
 	unsigned i;
 
+	malloc_mutex_postfork_child(&arena->huge_mtx);
 	for (i = 0; i < NBINS; i++)
 		malloc_mutex_postfork_child(&arena->bins[i].lock);
 	malloc_mutex_postfork_child(&arena->node_cache_mtx);
 	malloc_mutex_postfork_child(&arena->chunks_mtx);
-	malloc_mutex_postfork_child(&arena->huge_mtx);
 	malloc_mutex_postfork_child(&arena->lock);
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 0735376e..71207913 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2644,7 +2644,8 @@ JEMALLOC_EXPORT void
 _malloc_prefork(void)
 #endif
 {
-	unsigned i, narenas;
+	unsigned i, j, narenas;
+	arena_t *arena;
 
 #ifdef JEMALLOC_MUTEX_INIT_CB
 	if (!malloc_initialized())
@@ -2652,18 +2653,31 @@ _malloc_prefork(void)
 #endif
 	assert(malloc_initialized());
 
+	narenas = narenas_total_get();
+
 	/* Acquire all mutexes in a safe order. */
 	ctl_prefork();
-	prof_prefork();
 	malloc_mutex_prefork(&arenas_lock);
-	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
-		arena_t *arena;
-
-		if ((arena = arena_get(i, false)) != NULL)
-			arena_prefork(arena);
+	prof_prefork0();
+	for (i = 0; i < 3; i++) {
+		for (j = 0; j < narenas; j++) {
+			if ((arena = arena_get(j, false)) != NULL) {
+				switch (i) {
+				case 0: arena_prefork0(arena); break;
+				case 1: arena_prefork1(arena); break;
+				case 2: arena_prefork2(arena); break;
+				default: not_reached();
+				}
+			}
+		}
 	}
-	chunk_prefork();
 	base_prefork();
+	chunk_prefork();
+	for (i = 0; i < narenas; i++) {
+		if ((arena = arena_get(i, false)) != NULL)
+			arena_prefork3(arena);
+	}
+	prof_prefork1();
 }
 
 #ifndef JEMALLOC_MUTEX_INIT_CB
@@ -2683,16 +2697,16 @@ _malloc_postfork(void)
 	assert(malloc_initialized());
 
 	/* Release all mutexes, now that fork() has completed. */
-	base_postfork_parent();
 	chunk_postfork_parent();
+	base_postfork_parent();
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
 
 		if ((arena = arena_get(i, false)) != NULL)
 			arena_postfork_parent(arena);
 	}
-	malloc_mutex_postfork_parent(&arenas_lock);
 	prof_postfork_parent();
+	malloc_mutex_postfork_parent(&arenas_lock);
 	ctl_postfork_parent();
 }
 
@@ -2704,16 +2718,16 @@ jemalloc_postfork_child(void)
 	assert(malloc_initialized());
 
 	/* Release all mutexes, now that fork() has completed. */
-	base_postfork_child();
 	chunk_postfork_child();
+	base_postfork_child();
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
 
 		if ((arena = arena_get(i, false)) != NULL)
 			arena_postfork_child(arena);
 	}
-	malloc_mutex_postfork_child(&arenas_lock);
 	prof_postfork_child();
+	malloc_mutex_postfork_child(&arenas_lock);
 	ctl_postfork_child();
 }
 
diff --git a/src/prof.c b/src/prof.c
index b3872277..a92320d4 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -2198,20 +2198,32 @@ prof_boot2(void)
 }
 
 void
-prof_prefork(void)
+prof_prefork0(void)
 {
 
 	if (opt_prof) {
 		unsigned i;
 
-		malloc_mutex_prefork(&tdatas_mtx);
+		malloc_mutex_prefork(&prof_dump_mtx);
 		malloc_mutex_prefork(&bt2gctx_mtx);
-		malloc_mutex_prefork(&next_thr_uid_mtx);
-		malloc_mutex_prefork(&prof_dump_seq_mtx);
-		for (i = 0; i < PROF_NCTX_LOCKS; i++)
-			malloc_mutex_prefork(&gctx_locks[i]);
+		malloc_mutex_prefork(&tdatas_mtx);
 		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
 			malloc_mutex_prefork(&tdata_locks[i]);
+		for (i = 0; i < PROF_NCTX_LOCKS; i++)
+			malloc_mutex_prefork(&gctx_locks[i]);
+	}
+}
+
+void
+prof_prefork1(void)
+{
+
+	if (opt_prof) {
+		malloc_mutex_prefork(&prof_active_mtx);
+		malloc_mutex_prefork(&prof_dump_seq_mtx);
+		malloc_mutex_prefork(&prof_gdump_mtx);
+		malloc_mutex_prefork(&next_thr_uid_mtx);
+		malloc_mutex_prefork(&prof_thread_active_init_mtx);
 	}
 }
 
@@ -2222,14 +2234,18 @@ prof_postfork_parent(void)
 	if (opt_prof) {
 		unsigned i;
 
-		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
-			malloc_mutex_postfork_parent(&tdata_locks[i]);
+		malloc_mutex_postfork_parent(&prof_thread_active_init_mtx);
+		malloc_mutex_postfork_parent(&next_thr_uid_mtx);
+		malloc_mutex_postfork_parent(&prof_gdump_mtx);
+		malloc_mutex_postfork_parent(&prof_dump_seq_mtx);
+		malloc_mutex_postfork_parent(&prof_active_mtx);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++)
 			malloc_mutex_postfork_parent(&gctx_locks[i]);
-		malloc_mutex_postfork_parent(&prof_dump_seq_mtx);
-		malloc_mutex_postfork_parent(&next_thr_uid_mtx);
-		malloc_mutex_postfork_parent(&bt2gctx_mtx);
+		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
+			malloc_mutex_postfork_parent(&tdata_locks[i]);
 		malloc_mutex_postfork_parent(&tdatas_mtx);
+		malloc_mutex_postfork_parent(&bt2gctx_mtx);
+		malloc_mutex_postfork_parent(&prof_dump_mtx);
 	}
 }
 
@@ -2240,14 +2256,18 @@ prof_postfork_child(void)
 	if (opt_prof) {
 		unsigned i;
 
-		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
-			malloc_mutex_postfork_child(&tdata_locks[i]);
+		malloc_mutex_postfork_child(&prof_thread_active_init_mtx);
+		malloc_mutex_postfork_child(&next_thr_uid_mtx);
+		malloc_mutex_postfork_child(&prof_gdump_mtx);
+		malloc_mutex_postfork_child(&prof_dump_seq_mtx);
+		malloc_mutex_postfork_child(&prof_active_mtx);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++)
 			malloc_mutex_postfork_child(&gctx_locks[i]);
-		malloc_mutex_postfork_child(&prof_dump_seq_mtx);
-		malloc_mutex_postfork_child(&next_thr_uid_mtx);
-		malloc_mutex_postfork_child(&bt2gctx_mtx);
+		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
+			malloc_mutex_postfork_child(&tdata_locks[i]);
 		malloc_mutex_postfork_child(&tdatas_mtx);
+		malloc_mutex_postfork_child(&bt2gctx_mtx);
+		malloc_mutex_postfork_child(&prof_dump_mtx);
 	}
 }
 
diff --git a/test/unit/fork.c b/test/unit/fork.c
new file mode 100644
index 00000000..890bc869
--- /dev/null
+++ b/test/unit/fork.c
@@ -0,0 +1,39 @@
+#include "test/jemalloc_test.h"
+
+#include <sys/wait.h>
+
+TEST_BEGIN(test_fork)
+{
+	void *p;
+	pid_t pid;
+
+	p = malloc(1);
+	assert_ptr_not_null(p, "Unexpected malloc() failure");
+
+	pid = fork();
+	if (pid == -1) {
+		/* Error. */
+		test_fail("Unexpected fork() failure");
+	} else if (pid == 0) {
+		/* Child. */
+		exit(0);
+	} else {
+		int status;
+
+		/* Parent. */
+		free(p);
+		do {
+			if (waitpid(pid, &status, 0) == -1)
+				test_fail("Unexpected waitpid() failure");
+		} while (!WIFEXITED(status) && !WIFSIGNALED(status));
+	}
+}
+TEST_END
+
+int
+main(void)
+{
+
+	return (test(
+	    test_fork));
+}

From 1eb46ab6e7be3db65131c7098635fe079eb5f9ed Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 3 May 2016 17:18:34 -0700
Subject: [PATCH 0223/2608] Don't test fork() on Windows.

---
 test/unit/fork.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/test/unit/fork.c b/test/unit/fork.c
index d64f2e09..46c815ef 100644
--- a/test/unit/fork.c
+++ b/test/unit/fork.c
@@ -1,9 +1,12 @@
 #include "test/jemalloc_test.h"
 
+#ifndef _WIN32
 #include <sys/wait.h>
+#endif
 
 TEST_BEGIN(test_fork)
 {
+#ifndef _WIN32
 	void *p;
 	pid_t pid;
 
@@ -46,6 +49,9 @@ TEST_BEGIN(test_fork)
 			}
 		}
 	}
+#else
+	test_skip("fork(2) is irrelevant to Windows");
+#endif
 }
 TEST_END
 

From 21cda0dc42bdcb1b5b6ecdb82157a0af84c9f0c4 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 3 May 2016 12:11:36 -0700
Subject: [PATCH 0224/2608] Update ChangeLog for 4.1.1.

---
 ChangeLog | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index 69f4dbb0..9b924cdf 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,27 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
+* 4.1.1 (May 3, 2016)
+
+  This bugfix release resolves a variety of mostly minor issues, though the
+  bitmap fix is critical for 64-bit Windows.
+
+  Bug fixes:
+  - Fix the linear scan version of bitmap_sfu() to shift by the proper amount
+    even when sizeof(long) is not the same as sizeof(void *), as on 64-bit
+    Windows.  (@jasone)
+  - Fix hashing functions to avoid unaligned memory accesses (and resulting
+    crashes).  This is relevant at least to some ARM-based platforms.
+    (@rkmisra)
+  - Fix fork()-related lock rank ordering reversals.  These reversals were
+    unlikely to cause deadlocks in practice except when heap profiling was
+    enabled and active.  (@jasone)
+  - Fix various chunk leaks in OOM code paths.  (@jasone)
+  - Fix malloc_stats_print() to print opt.narenas correctly.  (@jasone)
+  - Fix MSVC-specific build/test issues.  (@rustyx, yuslepukhin)
+  - Fix a variety of test failures that were due to test fragility rather than
+    core bugs.  (@jasone)
+
 * 4.1.0 (February 28, 2016)
 
   This release is primarily about optimizations, but it also incorporates a lot

From 90827a3f3ef2099dcd480d542aacc9f44a0787e8 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 3 May 2016 15:00:42 -0700
Subject: [PATCH 0225/2608] Fix huge_palloc() regression.

Split arena_choose() into arena_[i]choose() and use arena_ichoose() for
arena lookup during internal allocation.  This fixes huge_palloc() so
that it always succeeds during extent node allocation.

This regression was introduced by
66cd953514a18477eb49732e40d5c2ab5f1b12c5 (Do not allocate metadata via
non-auto arenas, nor tcaches.).
---
 .../jemalloc/internal/jemalloc_internal.h.in  | 24 +++++++++++++++++--
 include/jemalloc/internal/private_symbols.txt |  2 ++
 include/jemalloc/internal/tcache.h            |  7 +++---
 src/arena.c                                   |  4 ++--
 src/ckh.c                                     |  6 ++---
 src/ctl.c                                     |  2 +-
 src/huge.c                                    |  5 ++--
 src/prof.c                                    |  2 +-
 src/tcache.c                                  | 10 ++++----
 9 files changed, 42 insertions(+), 20 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index fe58c1c6..62d5da29 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -550,7 +550,9 @@ size_t	s2u_compute(size_t size);
 size_t	s2u_lookup(size_t size);
 size_t	s2u(size_t size);
 size_t	sa2u(size_t size, size_t alignment);
-arena_t	*arena_choose(tsd_t *tsd, arena_t *arena, bool internal);
+arena_t	*arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal);
+arena_t	*arena_choose(tsd_t *tsd, arena_t *arena);
+arena_t	*arena_ichoose(tsd_t *tsd, arena_t *arena);
 arena_tdata_t	*arena_tdata_get(tsd_t *tsd, unsigned ind,
     bool refresh_if_missing);
 arena_t	*arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing);
@@ -788,7 +790,7 @@ sa2u(size_t size, size_t alignment)
 
 /* Choose an arena based on a per-thread value. */
 JEMALLOC_INLINE arena_t *
-arena_choose(tsd_t *tsd, arena_t *arena, bool internal)
+arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal)
 {
 	arena_t *ret;
 
@@ -802,6 +804,24 @@ arena_choose(tsd_t *tsd, arena_t *arena, bool internal)
 	return (ret);
 }
 
+JEMALLOC_INLINE arena_t *
+arena_choose(tsd_t *tsd, arena_t *arena)
+{
+
+	return (arena_choose_impl(tsd, arena, false));
+}
+
+JEMALLOC_INLINE arena_t *
+arena_ichoose(tsd_t *tsd, arena_t *arena)
+{
+
+	assert(tsd != NULL || arena != NULL);
+
+	if (tsd != NULL)
+		return (arena_choose_impl(tsd, NULL, true));
+	return (arena);
+}
+
 JEMALLOC_INLINE arena_tdata_t *
 arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing)
 {
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index de884fcf..7958a4ff 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -10,6 +10,7 @@ arena_bitselm_get_mutable
 arena_boot
 arena_choose
 arena_choose_hard
+arena_choose_impl
 arena_chunk_alloc_huge
 arena_chunk_cache_maybe_insert
 arena_chunk_cache_maybe_remove
@@ -35,6 +36,7 @@ arena_decay_time_set
 arena_dss_prec_get
 arena_dss_prec_set
 arena_get
+arena_ichoose
 arena_init
 arena_lg_dirty_mult_default_get
 arena_lg_dirty_mult_default_set
diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h
index 82724304..59f60235 100644
--- a/include/jemalloc/internal/tcache.h
+++ b/include/jemalloc/internal/tcache.h
@@ -293,7 +293,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
 		bool tcache_hard_success;
-		arena = arena_choose(tsd, arena, false);
+		arena = arena_choose(tsd, arena);
 		if (unlikely(arena == NULL))
 			return (NULL);
 
@@ -354,7 +354,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 		 * Only allocate one large object at a time, because it's quite
 		 * expensive to create one and not use it.
 		 */
-		arena = arena_choose(tsd, arena, false);
+		arena = arena_choose(tsd, arena);
 		if (unlikely(arena == NULL))
 			return (NULL);
 
@@ -460,8 +460,7 @@ tcaches_get(tsd_t *tsd, unsigned ind)
 {
 	tcaches_t *elm = &tcaches[ind];
 	if (unlikely(elm->tcache == NULL)) {
-		elm->tcache = tcache_create(tsd, arena_choose(tsd, NULL,
-		    false));
+		elm->tcache = tcache_create(tsd, arena_choose(tsd, NULL));
 	}
 	return (elm->tcache);
 }
diff --git a/src/arena.c b/src/arena.c
index 969ad85d..45c53c18 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2649,7 +2649,7 @@ arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
     bool zero)
 {
 
-	arena = arena_choose(tsd, arena, false);
+	arena = arena_choose(tsd, arena);
 	if (unlikely(arena == NULL))
 		return (NULL);
 
@@ -2674,7 +2674,7 @@ arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 
 	assert(usize == PAGE_CEILING(usize));
 
-	arena = arena_choose(tsd, arena, false);
+	arena = arena_choose(tsd, arena);
 	if (unlikely(arena == NULL))
 		return (NULL);
 
diff --git a/src/ckh.c b/src/ckh.c
index aa9803e8..25185974 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -271,7 +271,7 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh)
 			goto label_return;
 		}
 		tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL,
-		    true, arena_choose(tsd, NULL, true));
+		    true, arena_ichoose(tsd, NULL));
 		if (tab == NULL) {
 			ret = true;
 			goto label_return;
@@ -315,7 +315,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh)
 	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
 		return;
 	tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, true,
-	    arena_choose(tsd, NULL, true));
+	    arena_ichoose(tsd, NULL));
 	if (tab == NULL) {
 		/*
 		 * An OOM error isn't worth propagating, since it doesn't
@@ -392,7 +392,7 @@ ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
 		goto label_return;
 	}
 	ckh->tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, true,
-	    arena_choose(tsd, NULL, true));
+	    arena_ichoose(tsd, NULL));
 	if (ckh->tab == NULL) {
 		ret = true;
 		goto label_return;
diff --git a/src/ctl.c b/src/ctl.c
index 2e811430..e0392d0e 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1306,7 +1306,7 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	arena_t *oldarena;
 	unsigned newind, oldind;
 
-	oldarena = arena_choose(tsd, NULL, false);
+	oldarena = arena_choose(tsd, NULL);
 	if (oldarena == NULL)
 		return (EAGAIN);
 
diff --git a/src/huge.c b/src/huge.c
index bac2425f..0b3aed0d 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -56,8 +56,9 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 	assert(ausize >= chunksize);
 
 	/* Allocate an extent node with which to track the chunk. */
+	assert(tsd != NULL || arena != NULL);
 	node = ipallocztm(tsd, CACHELINE_CEILING(sizeof(extent_node_t)),
-	    CACHELINE, false, NULL, true, arena_choose(tsd, NULL, true));
+	    CACHELINE, false, NULL, true, arena_ichoose(tsd, arena));
 	if (node == NULL)
 		return (NULL);
 
@@ -66,7 +67,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 	 * it is possible to make correct junk/zero fill decisions below.
 	 */
 	is_zeroed = zero;
-	arena = arena_choose(tsd, arena, false);
+	arena = arena_choose(tsd, arena);
 	if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(tsd, arena,
 	    usize, alignment, &is_zeroed)) == NULL) {
 		idalloctm(tsd, node, NULL, true, true);
diff --git a/src/prof.c b/src/prof.c
index 92edba84..b21cd6be 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -795,7 +795,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
 		/* Link a prof_tctx_t into gctx for this thread. */
 		ret.v = iallocztm(tsd, sizeof(prof_tctx_t),
 		    size2index(sizeof(prof_tctx_t)), false, NULL, true,
-		    arena_choose(tsd, NULL, true), true);
+		    arena_ichoose(tsd, NULL), true);
 		if (ret.p == NULL) {
 			if (new_gctx)
 				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
diff --git a/src/tcache.c b/src/tcache.c
index ca867c72..88005f30 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -97,7 +97,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 	assert(binind < NBINS);
 	assert(rem <= tbin->ncached);
 
-	arena = arena_choose(tsd, NULL, false);
+	arena = arena_choose(tsd, NULL);
 	assert(arena != NULL);
 	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
 		/* Lock the arena bin associated with the first object. */
@@ -179,7 +179,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 	assert(binind < nhbins);
 	assert(rem <= tbin->ncached);
 
-	arena = arena_choose(tsd, NULL, false);
+	arena = arena_choose(tsd, NULL);
 	assert(arena != NULL);
 	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
 		/* Lock the arena associated with the first object. */
@@ -307,7 +307,7 @@ tcache_get_hard(tsd_t *tsd)
 			tcache_enabled_set(false); /* Memoize. */
 		return (NULL);
 	}
-	arena = arena_choose(tsd, NULL, false);
+	arena = arena_choose(tsd, NULL);
 	if (unlikely(arena == NULL))
 		return (NULL);
 	return (tcache_create(tsd, arena));
@@ -359,7 +359,7 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache)
 	arena_t *arena;
 	unsigned i;
 
-	arena = arena_choose(tsd, NULL, false);
+	arena = arena_choose(tsd, NULL);
 	tcache_arena_dissociate(tsd, tcache, arena);
 
 	for (i = 0; i < NBINS; i++) {
@@ -459,7 +459,7 @@ tcaches_create(tsd_t *tsd, unsigned *r_ind)
 
 	if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX)
 		return (true);
-	arena = arena_choose(tsd, NULL, true);
+	arena = arena_ichoose(tsd, NULL);
 	if (unlikely(arena == NULL))
 		return (true);
 	tcache = tcache_create(tsd, arena);

From 21e33ed317cef444d7e9421998ae3e39b2c93b6d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 3 May 2016 17:18:34 -0700
Subject: [PATCH 0226/2608] Don't test fork() on Windows.

---
 test/unit/fork.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/test/unit/fork.c b/test/unit/fork.c
index 890bc869..c0d5642d 100644
--- a/test/unit/fork.c
+++ b/test/unit/fork.c
@@ -1,9 +1,12 @@
 #include "test/jemalloc_test.h"
 
+#ifndef _WIN32
 #include <sys/wait.h>
+#endif
 
 TEST_BEGIN(test_fork)
 {
+#ifndef _WIN32
 	void *p;
 	pid_t pid;
 
@@ -27,6 +30,9 @@ TEST_BEGIN(test_fork)
 				test_fail("Unexpected waitpid() failure");
 		} while (!WIFEXITED(status) && !WIFSIGNALED(status));
 	}
+#else
+	test_skip("fork(2) is irrelevant to Windows");
+#endif
 }
 TEST_END
 

From 44d12d435aba23c4a99af1dddf20e1777690b348 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 3 May 2016 09:37:54 -0700
Subject: [PATCH 0227/2608] Update mallocx() OOM test to deal with smaller
 hugemax.

Depending on virtual memory resource limits, it is necessary to attempt
allocating three maximally sized objects to trigger OOM rather than just
two, since the maximum supported size is slightly less than half the
total virtual memory address space.

This fixes a test failure that was introduced by
0c516a00c4cb28cff55ce0995f756b5aae074c9e (Make *allocx() size class
overflow behavior defined.).

This resolves #379.
---
 test/integration/mallocx.c | 31 ++++++++++++++++++-------------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index 42eee105..c185cc63 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -70,22 +70,27 @@ TEST_END
 TEST_BEGIN(test_oom)
 {
 	size_t hugemax, size, alignment;
-
-	hugemax = get_huge_size(get_nhuge()-1);
+	bool oom;
+	void *ptrs[3];
+	unsigned i;
 
 	/*
-	 * It should be impossible to allocate two objects that each consume
-	 * more than half the virtual address space.
+	 * It should be impossible to allocate three objects that each consume
+	 * nearly half the virtual address space.
 	 */
-	{
-		void *p;
-
-		p = mallocx(hugemax, 0);
-		if (p != NULL) {
-			assert_ptr_null(mallocx(hugemax, 0),
-			    "Expected OOM for mallocx(size=%#zx, 0)", hugemax);
-			dallocx(p, 0);
-		}
+	hugemax = get_huge_size(get_nhuge()-1);
+	oom = false;
+	for (i = 0; i < sizeof(ptrs) / sizeof(void *); i++) {
+		ptrs[i] = mallocx(hugemax, 0);
+		if (ptrs[i] == NULL)
+			oom = true;
+	}
+	assert_true(oom,
+	    "Expected OOM during series of calls to mallocx(size=%zu, 0)",
+	    hugemax);
+	for (i = 0; i < sizeof(ptrs) / sizeof(void *); i++) {
+		if (ptrs[i] != NULL)
+			dallocx(ptrs[i], 0);
 	}
 
 #if LG_SIZEOF_PTR == 3

From 417c0c9ef16de156960f5dae684066e5d55381d1 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 3 May 2016 10:40:20 -0700
Subject: [PATCH 0228/2608] Add private symbols.

---
 include/jemalloc/internal/private_symbols.txt | 36 +++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index fafee81f..30516b4b 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -21,7 +21,9 @@ arena_dalloc
 arena_dalloc_bin
 arena_dalloc_bin_junked_locked
 arena_dalloc_junk_large
+arena_dalloc_junk_large_impl
 arena_dalloc_junk_small
+arena_dalloc_junk_small_impl
 arena_dalloc_large
 arena_dalloc_large_junked_locked
 arena_dalloc_small
@@ -126,6 +128,11 @@ atomic_sub_u
 atomic_sub_uint32
 atomic_sub_uint64
 atomic_sub_z
+atomic_write_p
+atomic_write_u
+atomic_write_uint32
+atomic_write_uint64
+atomic_write_z
 base_alloc
 base_boot
 base_postfork_child
@@ -201,6 +208,8 @@ extent_node_addr_get
 extent_node_addr_set
 extent_node_arena_get
 extent_node_arena_set
+extent_node_committed_get
+extent_node_committed_set
 extent_node_dirty_insert
 extent_node_dirty_linkage_init
 extent_node_dirty_remove
@@ -211,6 +220,8 @@ extent_node_size_get
 extent_node_size_set
 extent_node_zeroed_get
 extent_node_zeroed_set
+extent_tree_ad_destroy
+extent_tree_ad_destroy_recurse
 extent_tree_ad_empty
 extent_tree_ad_first
 extent_tree_ad_insert
@@ -228,6 +239,8 @@ extent_tree_ad_reverse_iter
 extent_tree_ad_reverse_iter_recurse
 extent_tree_ad_reverse_iter_start
 extent_tree_ad_search
+extent_tree_szad_destroy
+extent_tree_szad_destroy_recurse
 extent_tree_szad_empty
 extent_tree_szad_first
 extent_tree_szad_insert
@@ -305,6 +318,7 @@ jemalloc_postfork_parent
 jemalloc_prefork
 large_maxclass
 lg_floor
+lg_prof_sample
 malloc_cprintf
 malloc_mutex_init
 malloc_mutex_lock
@@ -332,6 +346,8 @@ narenas_tdata_cleanup
 narenas_total_get
 ncpus
 nhbins
+nhclasses
+nlclasses
 nstime_add
 nstime_compare
 nstime_copy
@@ -345,6 +361,7 @@ nstime_nsec
 nstime_sec
 nstime_subtract
 nstime_update
+nstime_update_impl
 opt_abort
 opt_decay_time
 opt_dss
@@ -385,6 +402,7 @@ pow2_ceil_u64
 pow2_ceil_zu
 prng_lg_range
 prng_range
+prof_active
 prof_active_get
 prof_active_get_unlocked
 prof_active_set
@@ -394,6 +412,7 @@ prof_backtrace
 prof_boot0
 prof_boot1
 prof_boot2
+prof_bt_count
 prof_dump_header
 prof_dump_open
 prof_free
@@ -421,6 +440,7 @@ prof_tctx_get
 prof_tctx_reset
 prof_tctx_set
 prof_tdata_cleanup
+prof_tdata_count
 prof_tdata_get
 prof_tdata_init
 prof_tdata_reinit
@@ -508,6 +528,13 @@ ticker_tick
 ticker_ticks
 tsd_arena_get
 tsd_arena_set
+tsd_arenap_get
+tsd_arenas_tdata_bypass_get
+tsd_arenas_tdata_bypass_set
+tsd_arenas_tdata_bypassp_get
+tsd_arenas_tdata_get
+tsd_arenas_tdata_set
+tsd_arenas_tdatap_get
 tsd_boot
 tsd_boot0
 tsd_boot1
@@ -516,6 +543,9 @@ tsd_cleanup
 tsd_cleanup_wrapper
 tsd_fetch
 tsd_get
+tsd_narenas_tdata_get
+tsd_narenas_tdata_set
+tsd_narenas_tdatap_get
 tsd_wrapper_get
 tsd_wrapper_set
 tsd_initialized
@@ -525,17 +555,23 @@ tsd_init_head
 tsd_nominal
 tsd_prof_tdata_get
 tsd_prof_tdata_set
+tsd_prof_tdatap_get
 tsd_quarantine_get
 tsd_quarantine_set
+tsd_quarantinep_get
 tsd_set
 tsd_tcache_enabled_get
 tsd_tcache_enabled_set
+tsd_tcache_enabledp_get
 tsd_tcache_get
 tsd_tcache_set
+tsd_tcachep_get
 tsd_thread_allocated_get
 tsd_thread_allocated_set
+tsd_thread_allocatedp_get
 tsd_thread_deallocated_get
 tsd_thread_deallocated_set
+tsd_thread_deallocatedp_get
 tsd_tls
 tsd_tsd
 u2rz

From 2e5eb21184cccabc829265b5f5237f3c13563be6 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 3 May 2016 12:11:36 -0700
Subject: [PATCH 0229/2608] Update ChangeLog for 4.1.1.

---
 ChangeLog | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index 9cbfbf96..af78615b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,27 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
+* 4.1.1 (May 3, 2016)
+
+  This bugfix release resolves a variety of mostly minor issues, though the
+  bitmap fix is critical for 64-bit Windows.
+
+  Bug fixes:
+  - Fix the linear scan version of bitmap_sfu() to shift by the proper amount
+    even when sizeof(long) is not the same as sizeof(void *), as on 64-bit
+    Windows.  (@jasone)
+  - Fix hashing functions to avoid unaligned memory accesses (and resulting
+    crashes).  This is relevant at least to some ARM-based platforms.
+    (@rkmisra)
+  - Fix fork()-related lock rank ordering reversals.  These reversals were
+    unlikely to cause deadlocks in practice except when heap profiling was
+    enabled and active.  (@jasone)
+  - Fix various chunk leaks in OOM code paths.  (@jasone)
+  - Fix malloc_stats_print() to print opt.narenas correctly.  (@jasone)
+  - Fix MSVC-specific build/test issues.  (@rustyx, yuslepukhin)
+  - Fix a variety of test failures that were due to test fragility rather than
+    core bugs.  (@jasone)
+
 * 4.1.0 (February 28, 2016)
 
   This release is primarily about optimizations, but it also incorporates a lot

From 7ba6e742335117eca1268757add9adac808e4e38 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 3 May 2016 17:46:07 -0700
Subject: [PATCH 0230/2608] Fix a typo.

---
 ChangeLog | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ChangeLog b/ChangeLog
index 9b924cdf..3cb32be9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -21,7 +21,7 @@ brevity.  Much more detail can be found in the git revision history:
     enabled and active.  (@jasone)
   - Fix various chunk leaks in OOM code paths.  (@jasone)
   - Fix malloc_stats_print() to print opt.narenas correctly.  (@jasone)
-  - Fix MSVC-specific build/test issues.  (@rustyx, yuslepukhin)
+  - Fix MSVC-specific build/test issues.  (@rustyx, @yuslepukhin)
   - Fix a variety of test failures that were due to test fragility rather than
     core bugs.  (@jasone)
 

From c1e9cf47f93713e9d9b7c28c13b53f90d19a2c9d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 3 May 2016 21:28:20 -0700
Subject: [PATCH 0231/2608] Link against librt for clock_gettime(2) if glibc <
 2.17.

Link libjemalloc against librt if clock_gettime(2) is in librt rather
than libc, as for versions of glibc prior to 2.17.

This resolves #349.
---
 Makefile.in  |  7 +++----
 configure.ac | 10 ++--------
 2 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index a98ebd62..1cf4bf0f 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -28,7 +28,6 @@ CFLAGS := @CFLAGS@
 LDFLAGS := @LDFLAGS@
 EXTRA_LDFLAGS := @EXTRA_LDFLAGS@
 LIBS := @LIBS@
-TESTLIBS := @TESTLIBS@
 RPATH_EXTRA := @RPATH_EXTRA@
 SO := @so@
 IMPORTLIB := @importlib@
@@ -295,15 +294,15 @@ $(STATIC_LIBS):
 
 $(objroot)test/unit/%$(EXE): $(objroot)test/unit/%.$(O) $(TESTS_UNIT_LINK_OBJS) $(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS)
 	@mkdir -p $(@D)
-	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(TESTLIBS) $(EXTRA_LDFLAGS)
+	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS)
 
 $(objroot)test/integration/%$(EXE): $(objroot)test/integration/%.$(O) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
-	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(filter -lpthread,$(LIBS))) -lm $(TESTLIBS) $(EXTRA_LDFLAGS)
+	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(filter -lpthread,$(LIBS))) -lm $(EXTRA_LDFLAGS)
 
 $(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_STRESS_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
-	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(TESTLIBS) $(EXTRA_LDFLAGS)
+	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS)
 
 build_lib_shared: $(DSOS)
 build_lib_static: $(STATIC_LIBS)
diff --git a/configure.ac b/configure.ac
index 275576bd..1a1c9704 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1239,13 +1239,8 @@ fi
 
 CPPFLAGS="$CPPFLAGS -D_REENTRANT"
 
-dnl Check whether clock_gettime(2) is in libc or librt.  This function is only
-dnl used in test code, so save the result to TESTLIBS to avoid poluting LIBS.
-SAVED_LIBS="${LIBS}"
-LIBS=
-AC_SEARCH_LIBS([clock_gettime], [rt], [TESTLIBS="${LIBS}"])
-AC_SUBST([TESTLIBS])
-LIBS="${SAVED_LIBS}"
+dnl Check whether clock_gettime(2) is in libc or librt.
+AC_SEARCH_LIBS([clock_gettime], [rt])
 
 dnl Check if the GNU-specific secure_getenv function exists.
 AC_CHECK_FUNC([secure_getenv],
@@ -1751,7 +1746,6 @@ AC_MSG_RESULT([CPPFLAGS           : ${CPPFLAGS}])
 AC_MSG_RESULT([LDFLAGS            : ${LDFLAGS}])
 AC_MSG_RESULT([EXTRA_LDFLAGS      : ${EXTRA_LDFLAGS}])
 AC_MSG_RESULT([LIBS               : ${LIBS}])
-AC_MSG_RESULT([TESTLIBS           : ${TESTLIBS}])
 AC_MSG_RESULT([RPATH_EXTRA        : ${RPATH_EXTRA}])
 AC_MSG_RESULT([])
 AC_MSG_RESULT([XSLTPROC           : ${XSLTPROC}])

From 04c3c0f9a0c910589a75604d8d0405407f1f035d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 3 May 2016 22:11:35 -0700
Subject: [PATCH 0232/2608] Add the stats.retained and
 stats.arenas.<i>.retained statistics.

This resolves #367.
---
 doc/jemalloc.xml.in               | 31 +++++++++++++++++++++++++++++++
 include/jemalloc/internal/ctl.h   |  1 +
 include/jemalloc/internal/stats.h |  8 ++++++++
 src/arena.c                       |  1 +
 src/chunk.c                       | 13 +++++++++++--
 src/ctl.c                         | 10 ++++++++++
 src/stats.c                       | 12 ++++++++----
 7 files changed, 70 insertions(+), 6 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 7b602a51..c4a44e3c 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -2186,6 +2186,25 @@ typedef struct {
         linkend="stats.resident"><mallctl>stats.resident</mallctl></link>.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="stats.retained">
+        <term>
+          <mallctl>stats.retained</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Total number of bytes in virtual memory mappings that
+        were retained rather than being returned to the operating system via
+        e.g. <citerefentry><refentrytitle>munmap</refentrytitle>
+        <manvolnum>2</manvolnum></citerefentry>.  Retained virtual memory is
+        typically untouched, decommitted, or purged, so it has no strongly
+        associated physical memory (see <link
+        linkend="arena.i.chunk_hooks">chunk hooks</link> for details).  Retained
+        memory is excluded from mapped memory statistics, e.g. <link
+        linkend="stats.mapped"><mallctl>stats.mapped</mallctl></link>.
+        </para></listitem>
+      </varlistentry>
+
       <varlistentry id="stats.arenas.i.dss">
         <term>
           <mallctl>stats.arenas.&lt;i&gt;.dss</mallctl>
@@ -2266,6 +2285,18 @@ typedef struct {
         <listitem><para>Number of mapped bytes.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="stats.arenas.i.retained">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.retained</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Number of retained bytes.  See <link
+        linkend="stats.retained"><mallctl>stats.retained</mallctl></link> for
+        details.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="stats.arenas.i.metadata.mapped">
         <term>
           <mallctl>stats.arenas.&lt;i&gt;.metadata.mapped</mallctl>
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index ec856996..c84c0de9 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -61,6 +61,7 @@ struct ctl_stats_s {
 	size_t			metadata;
 	size_t			resident;
 	size_t			mapped;
+	size_t			retained;
 	unsigned		narenas;
 	ctl_arena_stats_t	*arenas;	/* (narenas + 1) elements. */
 };
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index 705903ad..b6218178 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -102,6 +102,14 @@ struct arena_stats_s {
 	/* Number of bytes currently mapped. */
 	size_t		mapped;
 
+	/*
+	 * Number of bytes currently retained as a side effect of munmap() being
+	 * disabled/bypassed.  Retained bytes are technically mapped (though
+	 * always decommitted or purged), but they are excluded from the mapped
+	 * statistic (above).
+	 */
+	size_t		retained;
+
 	/*
 	 * Total number of purge sweeps, total number of madvise calls made,
 	 * and total pages purged in order to keep dirty unused memory under
diff --git a/src/arena.c b/src/arena.c
index 45c53c18..1172dc2c 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -3414,6 +3414,7 @@ arena_stats_merge(tsd_t *tsd, arena_t *arena, unsigned *nthreads,
 	    decay_time, nactive, ndirty);
 
 	astats->mapped += arena->stats.mapped;
+	astats->retained += arena->stats.retained;
 	astats->npurge += arena->stats.npurge;
 	astats->nmadvise += arena->stats.nmadvise;
 	astats->purged += arena->stats.purged;
diff --git a/src/chunk.c b/src/chunk.c
index 0ee2a1a7..1f2afd9d 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -444,15 +444,21 @@ static void *
 chunk_alloc_retained(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
     void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit)
 {
+	void *ret;
 
 	assert(size != 0);
 	assert((size & chunksize_mask) == 0);
 	assert(alignment != 0);
 	assert((alignment & chunksize_mask) == 0);
 
-	return (chunk_recycle(tsd, arena, chunk_hooks,
+	ret = chunk_recycle(tsd, arena, chunk_hooks,
 	    &arena->chunks_szad_retained, &arena->chunks_ad_retained, false,
-	    new_addr, size, alignment, zero, commit, true));
+	    new_addr, size, alignment, zero, commit, true);
+
+	if (config_stats && ret != NULL)
+		arena->stats.retained -= size;
+
+	return (ret);
 }
 
 void *
@@ -617,6 +623,9 @@ chunk_dalloc_wrapper(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	    arena->ind);
 	chunk_record(tsd, arena, chunk_hooks, &arena->chunks_szad_retained,
 	    &arena->chunks_ad_retained, false, chunk, size, zeroed, committed);
+
+	if (config_stats)
+		arena->stats.retained += size;
 }
 
 static bool
diff --git a/src/ctl.c b/src/ctl.c
index e0392d0e..fd5561a3 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -192,6 +192,7 @@ CTL_PROTO(stats_arenas_i_decay_time)
 CTL_PROTO(stats_arenas_i_pactive)
 CTL_PROTO(stats_arenas_i_pdirty)
 CTL_PROTO(stats_arenas_i_mapped)
+CTL_PROTO(stats_arenas_i_retained)
 CTL_PROTO(stats_arenas_i_npurge)
 CTL_PROTO(stats_arenas_i_nmadvise)
 CTL_PROTO(stats_arenas_i_purged)
@@ -204,6 +205,7 @@ CTL_PROTO(stats_active)
 CTL_PROTO(stats_metadata)
 CTL_PROTO(stats_resident)
 CTL_PROTO(stats_mapped)
+CTL_PROTO(stats_retained)
 
 /******************************************************************************/
 /* mallctl tree. */
@@ -458,6 +460,7 @@ static const ctl_named_node_t stats_arenas_i_node[] = {
 	{NAME("pactive"),	CTL(stats_arenas_i_pactive)},
 	{NAME("pdirty"),	CTL(stats_arenas_i_pdirty)},
 	{NAME("mapped"),	CTL(stats_arenas_i_mapped)},
+	{NAME("retained"),	CTL(stats_arenas_i_retained)},
 	{NAME("npurge"),	CTL(stats_arenas_i_npurge)},
 	{NAME("nmadvise"),	CTL(stats_arenas_i_nmadvise)},
 	{NAME("purged"),	CTL(stats_arenas_i_purged)},
@@ -484,6 +487,7 @@ static const ctl_named_node_t stats_node[] = {
 	{NAME("metadata"),	CTL(stats_metadata)},
 	{NAME("resident"),	CTL(stats_resident)},
 	{NAME("mapped"),	CTL(stats_mapped)},
+	{NAME("retained"),	CTL(stats_retained)},
 	{NAME("arenas"),	CHILD(indexed, stats_arenas)}
 };
 
@@ -591,6 +595,7 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats)
 
 	if (config_stats) {
 		sstats->astats.mapped += astats->astats.mapped;
+		sstats->astats.retained += astats->astats.retained;
 		sstats->astats.npurge += astats->astats.npurge;
 		sstats->astats.nmadvise += astats->astats.nmadvise;
 		sstats->astats.purged += astats->astats.purged;
@@ -745,6 +750,8 @@ ctl_refresh(tsd_t *tsd)
 		    ctl_stats.arenas[ctl_stats.narenas].pdirty) << LG_PAGE);
 		ctl_stats.mapped = base_mapped +
 		    ctl_stats.arenas[ctl_stats.narenas].astats.mapped;
+		ctl_stats.retained =
+		    ctl_stats.arenas[ctl_stats.narenas].astats.retained;
 	}
 
 	ctl_epoch++;
@@ -2108,6 +2115,7 @@ CTL_RO_CGEN(config_stats, stats_active, ctl_stats.active, size_t)
 CTL_RO_CGEN(config_stats, stats_metadata, ctl_stats.metadata, size_t)
 CTL_RO_CGEN(config_stats, stats_resident, ctl_stats.resident, size_t)
 CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats.mapped, size_t)
+CTL_RO_CGEN(config_stats, stats_retained, ctl_stats.retained, size_t)
 
 CTL_RO_GEN(stats_arenas_i_dss, ctl_stats.arenas[mib[2]].dss, const char *)
 CTL_RO_GEN(stats_arenas_i_lg_dirty_mult, ctl_stats.arenas[mib[2]].lg_dirty_mult,
@@ -2119,6 +2127,8 @@ CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t)
 CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_mapped,
     ctl_stats.arenas[mib[2]].astats.mapped, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_retained,
+    ctl_stats.arenas[mib[2]].astats.retained, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_npurge,
     ctl_stats.arenas[mib[2]].astats.npurge, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_nmadvise,
diff --git a/src/stats.c b/src/stats.c
index 87b09e58..073be4fe 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -259,7 +259,7 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	unsigned nthreads;
 	const char *dss;
 	ssize_t lg_dirty_mult, decay_time;
-	size_t page, pactive, pdirty, mapped;
+	size_t page, pactive, pdirty, mapped, retained;
 	size_t metadata_mapped, metadata_allocated;
 	uint64_t npurge, nmadvise, purged;
 	size_t small_allocated;
@@ -349,6 +349,9 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	CTL_M2_GET("stats.arenas.0.mapped", i, &mapped, size_t);
 	malloc_cprintf(write_cb, cbopaque,
 	    "mapped:                  %12zu\n", mapped);
+	CTL_M2_GET("stats.arenas.0.retained", i, &retained, size_t);
+	malloc_cprintf(write_cb, cbopaque,
+	    "retained:                %12zu\n", retained);
 	CTL_M2_GET("stats.arenas.0.metadata.mapped", i, &metadata_mapped,
 	    size_t);
 	CTL_M2_GET("stats.arenas.0.metadata.allocated", i, &metadata_allocated,
@@ -597,7 +600,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 
 	if (config_stats) {
 		size_t *cactive;
-		size_t allocated, active, metadata, resident, mapped;
+		size_t allocated, active, metadata, resident, mapped, retained;
 
 		CTL_GET("stats.cactive", &cactive, size_t *);
 		CTL_GET("stats.allocated", &allocated, size_t);
@@ -605,10 +608,11 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		CTL_GET("stats.metadata", &metadata, size_t);
 		CTL_GET("stats.resident", &resident, size_t);
 		CTL_GET("stats.mapped", &mapped, size_t);
+		CTL_GET("stats.retained", &retained, size_t);
 		malloc_cprintf(write_cb, cbopaque,
 		    "Allocated: %zu, active: %zu, metadata: %zu,"
-		    " resident: %zu, mapped: %zu\n",
-		    allocated, active, metadata, resident, mapped);
+		    " resident: %zu, mapped: %zu, retained: %zu\n",
+		    allocated, active, metadata, resident, mapped, retained);
 		malloc_cprintf(write_cb, cbopaque,
 		    "Current active ceiling: %zu\n",
 		    atomic_read_z(cactive));

From dc391adc6577b4ed0dac0ce3b1778473e67e4c17 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 4 May 2016 12:14:36 -0700
Subject: [PATCH 0233/2608] Scale leak report summary according to sampling
 probability.

This makes the numbers reported in the leak report summary closely match
those reported by jeprof.

This resolves #356.
---
 src/prof.c | 56 ++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 38 insertions(+), 18 deletions(-)

diff --git a/src/prof.c b/src/prof.c
index b21cd6be..df7f1f9b 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -828,22 +828,22 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
 	return (ret.p);
 }
 
+/*
+ * The bodies of this function and prof_leakcheck() are compiled out unless heap
+ * profiling is enabled, so that it is possible to compile jemalloc with
+ * floating point support completely disabled.  Avoiding floating point code is
+ * important on memory-constrained systems, but it also enables a workaround for
+ * versions of glibc that don't properly save/restore floating point registers
+ * during dynamic lazy symbol loading (which internally calls into whatever
+ * malloc implementation happens to be integrated into the application).  Note
+ * that some compilers (e.g.  gcc 4.8) may use floating point registers for fast
+ * memory moves, so jemalloc must be compiled with such optimizations disabled
+ * (e.g.
+ * -mno-sse) in order for the workaround to be complete.
+ */
 void
 prof_sample_threshold_update(prof_tdata_t *tdata)
 {
-	/*
-	 * The body of this function is compiled out unless heap profiling is
-	 * enabled, so that it is possible to compile jemalloc with floating
-	 * point support completely disabled.  Avoiding floating point code is
-	 * important on memory-constrained systems, but it also enables a
-	 * workaround for versions of glibc that don't properly save/restore
-	 * floating point registers during dynamic lazy symbol loading (which
-	 * internally calls into whatever malloc implementation happens to be
-	 * integrated into the application).  Note that some compilers (e.g.
-	 * gcc 4.8) may use floating point registers for fast memory moves, so
-	 * jemalloc must be compiled with such optimizations disabled (e.g.
-	 * -mno-sse) in order for the workaround to be complete.
-	 */
 #ifdef JEMALLOC_PROF
 	uint64_t r;
 	double u;
@@ -1477,21 +1477,41 @@ label_return:
 	return (ret);
 }
 
+/*
+ * See prof_sample_threshold_update() comment for why the body of this function
+ * is conditionally compiled.
+ */
 static void
 prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx,
     const char *filename)
 {
 
+#ifdef JEMALLOC_PROF
+	/*
+	 * Scaling is equivalent AdjustSamples() in jeprof, but the result may
+	 * differ slightly from what jeprof reports, because here we scale the
+	 * summary values, whereas jeprof scales each context individually and
+	 * reports the sums of the scaled values.
+	 */
 	if (cnt_all->curbytes != 0) {
-		malloc_printf("<jemalloc>: Leak summary: %"FMTu64" byte%s, %"
-		    FMTu64" object%s, %zu context%s\n",
-		    cnt_all->curbytes, (cnt_all->curbytes != 1) ? "s" : "",
-		    cnt_all->curobjs, (cnt_all->curobjs != 1) ? "s" : "",
-		    leak_ngctx, (leak_ngctx != 1) ? "s" : "");
+		double sample_period = (double)((uint64_t)1 << lg_prof_sample);
+		double ratio = (((double)cnt_all->curbytes) /
+		    (double)cnt_all->curobjs) / sample_period;
+		double scale_factor = 1.0 / (1.0 - exp(-ratio));
+		uint64_t curbytes = (uint64_t)round(((double)cnt_all->curbytes)
+		    * scale_factor);
+		uint64_t curobjs = (uint64_t)round(((double)cnt_all->curobjs) *
+		    scale_factor);
+
+		malloc_printf("<jemalloc>: Leak approximation summary: ~%"FMTu64
+		    " byte%s, ~%"FMTu64" object%s, >= %zu context%s\n",
+		    curbytes, (curbytes != 1) ? "s" : "", curobjs, (curobjs !=
+		    1) ? "s" : "", leak_ngctx, (leak_ngctx != 1) ? "s" : "");
 		malloc_printf(
 		    "<jemalloc>: Run jeprof on \"%s\" for leak detail\n",
 		    filename);
 	}
+#endif
 }
 
 struct prof_gctx_dump_iter_arg_s {

From c2f970c32b527660a33fa513a76d913c812dcf7c Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 5 May 2016 17:45:02 -0700
Subject: [PATCH 0234/2608] Modify pages_map() to support mapping uncommitted
 virtual memory.

If the OS overcommits:
- Commit all mappings in pages_map() regardless of whether the caller
  requested committed memory.
- Linux-specific: Specify MAP_NORESERVE to avoid
  unfortunate interactions with heuristic overcommit mode during
  fork(2).

This resolves #193.
---
 configure.ac                                  |   2 +
 .../internal/jemalloc_internal_defs.h.in      |   9 ++
 include/jemalloc/internal/pages.h             |   5 +-
 include/jemalloc/internal/private_symbols.txt |   1 +
 src/chunk_mmap.c                              |  10 +-
 src/jemalloc.c                                |   1 +
 src/pages.c                                   | 116 +++++++++++++++---
 7 files changed, 117 insertions(+), 27 deletions(-)

diff --git a/configure.ac b/configure.ac
index 1a1c9704..7f19715d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -305,6 +305,7 @@ case "${host}" in
   *-*-freebsd*)
 	CFLAGS="$CFLAGS"
 	abi="elf"
+	AC_DEFINE([JEMALLOC_SYSCTL_VM_OVERCOMMIT], [ ])
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	force_lazy_lock="1"
 	;;
@@ -329,6 +330,7 @@ case "${host}" in
 	CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE"
 	abi="elf"
 	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
+	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ])
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ])
 	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
 	AC_DEFINE([JEMALLOC_USE_CXX_THROW], [ ])
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 2c753719..7de0cf7c 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -214,6 +214,15 @@
 #undef JEMALLOC_ZONE
 #undef JEMALLOC_ZONE_VERSION
 
+/*
+ * Methods for determining whether the OS overcommits.
+ * JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's
+ *                                         /proc/sys/vm.overcommit_memory file.
+ * JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl.
+ */
+#undef JEMALLOC_SYSCTL_VM_OVERCOMMIT
+#undef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY
+
 /*
  * Methods for purging unused pages differ between operating systems.
  *
diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index da7eb968..e21effd1 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -9,13 +9,14 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-void	*pages_map(void *addr, size_t size);
+void	*pages_map(void *addr, size_t size, bool *commit);
 void	pages_unmap(void *addr, size_t size);
 void	*pages_trim(void *addr, size_t alloc_size, size_t leadsize,
-    size_t size);
+    size_t size, bool *commit);
 bool	pages_commit(void *addr, size_t size);
 bool	pages_decommit(void *addr, size_t size);
 bool	pages_purge(void *addr, size_t size);
+void	pages_boot(void);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 7958a4ff..0f9b99e4 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -398,6 +398,7 @@ opt_utrace
 opt_xmalloc
 opt_zero
 p2rz
+pages_boot
 pages_commit
 pages_decommit
 pages_map
diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c
index e2e66bc9..f95ae756 100644
--- a/src/chunk_mmap.c
+++ b/src/chunk_mmap.c
@@ -16,18 +16,16 @@ chunk_alloc_mmap_slow(size_t size, size_t alignment, bool *zero, bool *commit)
 	do {
 		void *pages;
 		size_t leadsize;
-		pages = pages_map(NULL, alloc_size);
+		pages = pages_map(NULL, alloc_size, commit);
 		if (pages == NULL)
 			return (NULL);
 		leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment) -
 		    (uintptr_t)pages;
-		ret = pages_trim(pages, alloc_size, leadsize, size);
+		ret = pages_trim(pages, alloc_size, leadsize, size, commit);
 	} while (ret == NULL);
 
 	assert(ret != NULL);
 	*zero = true;
-	if (!*commit)
-		*commit = pages_decommit(ret, size);
 	return (ret);
 }
 
@@ -54,7 +52,7 @@ chunk_alloc_mmap(void *new_addr, size_t size, size_t alignment, bool *zero,
 	assert(alignment != 0);
 	assert((alignment & chunksize_mask) == 0);
 
-	ret = pages_map(new_addr, size);
+	ret = pages_map(new_addr, size, commit);
 	if (ret == NULL || ret == new_addr)
 		return (ret);
 	assert(new_addr == NULL);
@@ -66,8 +64,6 @@ chunk_alloc_mmap(void *new_addr, size_t size, size_t alignment, bool *zero,
 
 	assert(ret != NULL);
 	*zero = true;
-	if (!*commit)
-		*commit = pages_decommit(ret, size);
 	return (ret);
 }
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index cd97ea16..1a26a44f 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1272,6 +1272,7 @@ malloc_init_hard_a0_locked(tsd_t *tsd)
 				abort();
 		}
 	}
+	pages_boot();
 	if (base_boot())
 		return (true);
 	if (chunk_boot())
diff --git a/src/pages.c b/src/pages.c
index 83a167f6..2a9b7e37 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -1,29 +1,49 @@
 #define	JEMALLOC_PAGES_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 
+#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
+#include <sys/sysctl.h>
+#endif
+
+/******************************************************************************/
+/* Data. */
+
+#ifndef _WIN32
+#  define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE)
+#  define PAGES_PROT_DECOMMIT (PROT_NONE)
+static int	mmap_flags;
+#endif
+static bool	os_overcommits;
+
 /******************************************************************************/
 
 void *
-pages_map(void *addr, size_t size)
+pages_map(void *addr, size_t size, bool *commit)
 {
 	void *ret;
 
 	assert(size != 0);
 
+	if (os_overcommits)
+		*commit = true;
+
 #ifdef _WIN32
 	/*
 	 * If VirtualAlloc can't allocate at the given address when one is
 	 * given, it fails and returns NULL.
 	 */
-	ret = VirtualAlloc(addr, size, MEM_COMMIT | MEM_RESERVE,
+	ret = VirtualAlloc(addr, size, MEM_RESERVE | (*commit ? MEM_COMMIT : 0),
 	    PAGE_READWRITE);
 #else
 	/*
 	 * We don't use MAP_FIXED here, because it can cause the *replacement*
 	 * of existing mappings, and we only want to create new mappings.
 	 */
-	ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
-	    -1, 0);
+	{
+		int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
+
+		ret = mmap(addr, size, prot, mmap_flags, -1, 0);
+	}
 	assert(ret != NULL);
 
 	if (ret == MAP_FAILED)
@@ -67,7 +87,8 @@ pages_unmap(void *addr, size_t size)
 }
 
 void *
-pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size)
+pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size,
+    bool *commit)
 {
 	void *ret = (void *)((uintptr_t)addr + leadsize);
 
@@ -77,7 +98,7 @@ pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size)
 		void *new_addr;
 
 		pages_unmap(addr, alloc_size);
-		new_addr = pages_map(ret, size);
+		new_addr = pages_map(ret, size, commit);
 		if (new_addr == ret)
 			return (ret);
 		if (new_addr)
@@ -101,17 +122,17 @@ static bool
 pages_commit_impl(void *addr, size_t size, bool commit)
 {
 
-#ifndef _WIN32
-	/*
-	 * The following decommit/commit implementation is functional, but
-	 * always disabled because it doesn't add value beyong improved
-	 * debugging (at the cost of extra system calls) on systems that
-	 * overcommit.
-	 */
-	if (false) {
-		int prot = commit ? (PROT_READ | PROT_WRITE) : PROT_NONE;
-		void *result = mmap(addr, size, prot, MAP_PRIVATE | MAP_ANON |
-		    MAP_FIXED, -1, 0);
+	if (os_overcommits)
+		return (true);
+
+#ifdef _WIN32
+	return (commit ? (addr != VirtualAlloc(addr, size, MEM_COMMIT,
+	    PAGE_READWRITE)) : (!VirtualFree(addr, size, MEM_DECOMMIT)));
+#else
+	{
+		int prot = commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
+		void *result = mmap(addr, size, prot, mmap_flags | MAP_FIXED,
+		    -1, 0);
 		if (result == MAP_FAILED)
 			return (true);
 		if (result != addr) {
@@ -125,7 +146,6 @@ pages_commit_impl(void *addr, size_t size, bool commit)
 		return (false);
 	}
 #endif
-	return (true);
 }
 
 bool
@@ -171,3 +191,63 @@ pages_purge(void *addr, size_t size)
 	return (unzeroed);
 }
 
+#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
+static bool
+os_overcommits_sysctl(void)
+{
+	int vm_overcommit;
+	size_t sz;
+
+	sz = sizeof(vm_overcommit);
+	if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0)
+		return (false); /* Error. */
+
+	return ((vm_overcommit & 0x3) == 0);
+}
+#endif
+
+#ifdef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY
+static bool
+os_overcommits_proc(void)
+{
+	int fd;
+	char buf[1];
+	ssize_t nread;
+
+	fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
+	if (fd == -1)
+		return (false); /* Error. */
+
+	nread = read(fd, &buf, sizeof(buf));
+	if (nread < 1)
+		return (false); /* Error. */
+	/*
+	 * /proc/sys/vm/overcommit_memory meanings:
+	 * 0: Heuristic overcommit.
+	 * 1: Always overcommit.
+	 * 2: Never overcommit.
+	 */
+	return (buf[0] == '0' || buf[0] == '1');
+}
+#endif
+
+void
+pages_boot(void)
+{
+
+#ifndef _WIN32
+	mmap_flags = MAP_PRIVATE | MAP_ANON;
+#endif
+
+#ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
+	os_overcommits = os_overcommits_sysctl();
+#elif defined(JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY)
+	os_overcommits = os_overcommits_proc();
+#  ifdef MAP_NORESERVE
+	if (os_overcommits)
+		mmap_flags |= MAP_NORESERVE;
+#  endif
+#else
+	os_overcommits = false;
+#endif
+}

From 3ef51d7f733ac6432e80fa902a779ab5b98d74f6 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 6 May 2016 12:16:00 -0700
Subject: [PATCH 0235/2608] Optimize the fast paths of calloc() and
 [m,d,sd]allocx().

This is a broader application of optimizations to malloc() and free() in
f4a0f32d340985de477bbe329ecdaecd69ed1055 (Fast-path improvement:
reduce # of branches and unnecessary operations.).

This resolves #321.
---
 include/jemalloc/internal/arena.h             |  10 +-
 .../jemalloc/internal/jemalloc_internal.h.in  |  60 +---
 include/jemalloc/internal/private_symbols.txt |   6 +-
 src/arena.c                                   |   2 +-
 src/huge.c                                    |   2 +-
 src/jemalloc.c                                | 300 +++++++-----------
 6 files changed, 137 insertions(+), 243 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 53e6b3ad..debb43f3 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -680,7 +680,8 @@ void	*arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
 arena_t	*arena_aalloc(const void *ptr);
 size_t	arena_salloc(tsd_t *tsd, const void *ptr, bool demote);
 void	arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path);
-void	arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache);
+void	arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache,
+    bool slow_path);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
@@ -1446,7 +1447,8 @@ arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
+arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache,
+    bool slow_path)
 {
 	arena_chunk_t *chunk;
 
@@ -1473,7 +1475,7 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
 			if (likely(tcache != NULL)) {
 				szind_t binind = size2index(size);
 				tcache_dalloc_small(tsd, tcache, ptr, binind,
-				    true);
+				    slow_path);
 			} else {
 				size_t pageind = ((uintptr_t)ptr -
 				    (uintptr_t)chunk) >> LG_PAGE;
@@ -1486,7 +1488,7 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
 
 			if (likely(tcache != NULL) && size <= tcache_maxclass) {
 				tcache_dalloc_large(tsd, tcache, ptr, size,
-				    true);
+				    slow_path);
 			} else {
 				arena_dalloc_large(tsd, extent_node_arena_get(
 				    &chunk->node), chunk, ptr);
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 62d5da29..fe504d8d 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -895,12 +895,8 @@ arena_t	*iaalloc(const void *ptr);
 size_t	isalloc(tsd_t *tsd, const void *ptr, bool demote);
 void	*iallocztm(tsd_t *tsd, size_t size, szind_t ind, bool zero,
     tcache_t *tcache, bool is_metadata, arena_t *arena, bool slow_path);
-void	*imalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache,
-    arena_t *arena);
-void	*imalloc(tsd_t *tsd, size_t size, szind_t ind, bool slow_path);
-void	*icalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache,
-    arena_t *arena);
-void	*icalloc(tsd_t *tsd, size_t size, szind_t ind);
+void	*ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero,
+    bool slow_path);
 void	*ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, bool is_metadata, arena_t *arena);
 void	*ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
@@ -911,11 +907,12 @@ size_t	u2rz(size_t usize);
 size_t	p2rz(tsd_t *tsd, const void *ptr);
 void	idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata,
     bool slow_path);
-void	idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache);
 void	idalloc(tsd_t *tsd, void *ptr);
 void	iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path);
-void	isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache);
-void	isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache);
+void	isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache,
+    bool slow_path);
+void	isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache,
+    bool slow_path);
 void	*iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
     size_t extra, size_t alignment, bool zero, tcache_t *tcache,
     arena_t *arena);
@@ -972,35 +969,13 @@ iallocztm(tsd_t *tsd, size_t size, szind_t ind, bool zero, tcache_t *tcache,
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-imalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache, arena_t *arena)
+ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path)
 {
 
-	return (iallocztm(tsd, size, ind, false, tcache, false, arena, true));
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-imalloc(tsd_t *tsd, size_t size, szind_t ind, bool slow_path)
-{
-
-	return (iallocztm(tsd, size, ind, false, tcache_get(tsd, true), false,
+	return (iallocztm(tsd, size, ind, zero, tcache_get(tsd, true), false,
 	    NULL, slow_path));
 }
 
-JEMALLOC_ALWAYS_INLINE void *
-icalloct(tsd_t *tsd, size_t size, szind_t ind, tcache_t *tcache, arena_t *arena)
-{
-
-	return (iallocztm(tsd, size, ind, true, tcache, false, arena, true));
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-icalloc(tsd_t *tsd, size_t size, szind_t ind)
-{
-
-	return (iallocztm(tsd, size, ind, true, tcache_get(tsd, true), false,
-	    NULL, true));
-}
-
 JEMALLOC_ALWAYS_INLINE void *
 ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, bool is_metadata, arena_t *arena)
@@ -1091,13 +1066,6 @@ idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata,
 	arena_dalloc(tsd, ptr, tcache, slow_path);
 }
 
-JEMALLOC_ALWAYS_INLINE void
-idalloct(tsd_t *tsd, void *ptr, tcache_t *tcache)
-{
-
-	idalloctm(tsd, ptr, tcache, false, true);
-}
-
 JEMALLOC_ALWAYS_INLINE void
 idalloc(tsd_t *tsd, void *ptr)
 {
@@ -1116,20 +1084,20 @@ iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 }
 
 JEMALLOC_ALWAYS_INLINE void
-isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
+isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache, bool slow_path)
 {
 
-	arena_sdalloc(tsd, ptr, size, tcache);
+	arena_sdalloc(tsd, ptr, size, tcache, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache)
+isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache, bool slow_path)
 {
 
-	if (config_fill && unlikely(opt_quarantine))
+	if (slow_path && config_fill && unlikely(opt_quarantine))
 		quarantine(tsd, ptr);
 	else
-		isdalloct(tsd, ptr, size, tcache);
+		isdalloct(tsd, ptr, size, tcache, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
@@ -1160,7 +1128,7 @@ iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
 	 */
 	copysize = (size < oldsize) ? size : oldsize;
 	memcpy(p, ptr, copysize);
-	isqalloc(tsd, ptr, oldsize, tcache);
+	isqalloc(tsd, ptr, oldsize, tcache, true);
 	return (p);
 }
 
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 0f9b99e4..e47296ff 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -291,15 +291,11 @@ huge_ralloc
 huge_ralloc_no_move
 huge_salloc
 iaalloc
+ialloc
 iallocztm
 iarena_cleanup
-icalloc
-icalloct
 idalloc
-idalloct
 idalloctm
-imalloc
-imalloct
 in_valgrind
 index2size
 index2size_compute
diff --git a/src/arena.c b/src/arena.c
index 1172dc2c..992d96f5 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -3303,7 +3303,7 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size,
 		copysize = (usize < oldsize) ? usize : oldsize;
 		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize);
 		memcpy(ret, ptr, copysize);
-		isqalloc(tsd, ptr, oldsize, tcache);
+		isqalloc(tsd, ptr, oldsize, tcache, true);
 	} else {
 		ret = huge_ralloc(tsd, arena, ptr, oldsize, usize, alignment,
 		    zero, tcache);
diff --git a/src/huge.c b/src/huge.c
index 0b3aed0d..71fb50c5 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -364,7 +364,7 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize,
 
 	copysize = (usize < oldsize) ? usize : oldsize;
 	memcpy(ret, ptr, copysize);
-	isqalloc(tsd, ptr, oldsize, tcache);
+	isqalloc(tsd, ptr, oldsize, tcache, true);
 	return (ret);
 }
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 1a26a44f..259ab4f7 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -70,10 +70,10 @@ typedef enum {
 } malloc_init_t;
 static malloc_init_t	malloc_init_state = malloc_init_uninitialized;
 
-/* 0 should be the common case.  Set to true to trigger initialization. */
+/* False should be the common case.  Set to true to trigger initialization. */
 static bool	malloc_slow = true;
 
-/* When malloc_slow != 0, set the corresponding bits for sanity check. */
+/* When malloc_slow is true, set the corresponding bits for sanity check. */
 enum {
 	flag_opt_junk_alloc	= (1U),
 	flag_opt_junk_free	= (1U << 1),
@@ -1443,7 +1443,7 @@ malloc_init_hard(void)
  */
 
 static void *
-imalloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind,
+ialloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind, bool zero,
     prof_tctx_t *tctx, bool slow_path)
 {
 	void *p;
@@ -1452,27 +1452,27 @@ imalloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind,
 		return (NULL);
 	if (usize <= SMALL_MAXCLASS) {
 		szind_t ind_large = size2index(LARGE_MINCLASS);
-		p = imalloc(tsd, LARGE_MINCLASS, ind_large, slow_path);
+		p = ialloc(tsd, LARGE_MINCLASS, ind_large, zero, slow_path);
 		if (p == NULL)
 			return (NULL);
 		arena_prof_promoted(tsd, p, usize);
 	} else
-		p = imalloc(tsd, usize, ind, slow_path);
+		p = ialloc(tsd, usize, ind, zero, slow_path);
 
 	return (p);
 }
 
 JEMALLOC_ALWAYS_INLINE_C void *
-imalloc_prof(tsd_t *tsd, size_t usize, szind_t ind, bool slow_path)
+ialloc_prof(tsd_t *tsd, size_t usize, szind_t ind, bool zero, bool slow_path)
 {
 	void *p;
 	prof_tctx_t *tctx;
 
 	tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U))
-		p = imalloc_prof_sample(tsd, usize, ind, tctx, slow_path);
+		p = ialloc_prof_sample(tsd, usize, ind, zero, tctx, slow_path);
 	else
-		p = imalloc(tsd, usize, ind, slow_path);
+		p = ialloc(tsd, usize, ind, zero, slow_path);
 	if (unlikely(p == NULL)) {
 		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
@@ -1482,16 +1482,21 @@ imalloc_prof(tsd_t *tsd, size_t usize, szind_t ind, bool slow_path)
 	return (p);
 }
 
+/*
+ * ialloc_body() is inlined so that fast and slow paths are generated separately
+ * with statically known slow_path.
+ */
 JEMALLOC_ALWAYS_INLINE_C void *
-imalloc_body(size_t size, tsd_t **tsd, size_t *usize, bool slow_path)
+ialloc_body(size_t size, bool zero, tsd_t **tsd, size_t *usize, bool slow_path)
 {
 	szind_t ind;
 
-	if (slow_path && unlikely(malloc_init()))
+	if (slow_path && unlikely(malloc_init())) {
+		*tsd = NULL;
 		return (NULL);
+	}
 
 	*tsd = tsd_fetch();
-
 	witness_assert_lockless(*tsd);
 
 	ind = size2index(size);
@@ -1505,26 +1510,30 @@ imalloc_body(size_t size, tsd_t **tsd, size_t *usize, bool slow_path)
 	}
 
 	if (config_prof && opt_prof)
-		return (imalloc_prof(*tsd, *usize, ind, slow_path));
+		return (ialloc_prof(*tsd, *usize, ind, zero, slow_path));
 
-	return (imalloc(*tsd, size, ind, slow_path));
+	return (ialloc(*tsd, size, ind, zero, slow_path));
 }
 
 JEMALLOC_ALWAYS_INLINE_C void
-imalloc_post_check(void *ret, tsd_t *tsd, size_t usize, bool slow_path)
+ialloc_post_check(void *ret, tsd_t *tsd, size_t usize, const char *func,
+    bool update_errno, bool slow_path)
 {
+
 	if (unlikely(ret == NULL)) {
 		if (slow_path && config_xmalloc && unlikely(opt_xmalloc)) {
-			malloc_write("<jemalloc>: Error in malloc(): "
-			    "out of memory\n");
+			malloc_printf("<jemalloc>: Error in %s(): out of "
+			    "memory\n", func);
 			abort();
 		}
-		set_errno(ENOMEM);
+		if (update_errno)
+			set_errno(ENOMEM);
 	}
 	if (config_stats && likely(ret != NULL)) {
 		assert(usize == isalloc(tsd, ret, config_prof));
 		*tsd_thread_allocatedp_get(tsd) += usize;
 	}
+	witness_assert_lockless(tsd);
 }
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
@@ -1540,20 +1549,15 @@ je_malloc(size_t size)
 		size = 1;
 
 	if (likely(!malloc_slow)) {
-		/*
-		 * imalloc_body() is inlined so that fast and slow paths are
-		 * generated separately with statically known slow_path.
-		 */
-		ret = imalloc_body(size, &tsd, &usize, false);
-		imalloc_post_check(ret, tsd, usize, false);
+		ret = ialloc_body(size, false, &tsd, &usize, false);
+		ialloc_post_check(ret, tsd, usize, "malloc", true, false);
 	} else {
-		ret = imalloc_body(size, &tsd, &usize, true);
-		imalloc_post_check(ret, tsd, usize, true);
+		ret = ialloc_body(size, false, &tsd, &usize, true);
+		ialloc_post_check(ret, tsd, usize, "malloc", true, true);
 		UTRACE(0, size, ret);
 		JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsd, ret, usize, false);
 	}
 
-	witness_assert_lockless(tsd);
 	return (ret);
 }
 
@@ -1695,45 +1699,6 @@ je_aligned_alloc(size_t alignment, size_t size)
 	return (ret);
 }
 
-static void *
-icalloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind, prof_tctx_t *tctx)
-{
-	void *p;
-
-	if (tctx == NULL)
-		return (NULL);
-	if (usize <= SMALL_MAXCLASS) {
-		szind_t ind_large = size2index(LARGE_MINCLASS);
-		p = icalloc(tsd, LARGE_MINCLASS, ind_large);
-		if (p == NULL)
-			return (NULL);
-		arena_prof_promoted(tsd, p, usize);
-	} else
-		p = icalloc(tsd, usize, ind);
-
-	return (p);
-}
-
-JEMALLOC_ALWAYS_INLINE_C void *
-icalloc_prof(tsd_t *tsd, size_t usize, szind_t ind)
-{
-	void *p;
-	prof_tctx_t *tctx;
-
-	tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true);
-	if (unlikely((uintptr_t)tctx != (uintptr_t)1U))
-		p = icalloc_prof_sample(tsd, usize, ind, tctx);
-	else
-		p = icalloc(tsd, usize, ind);
-	if (unlikely(p == NULL)) {
-		prof_alloc_rollback(tsd, tctx, true);
-		return (NULL);
-	}
-	prof_malloc(tsd, p, usize, tctx);
-
-	return (p);
-}
-
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
 void JEMALLOC_NOTHROW *
 JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2)
@@ -1742,68 +1707,33 @@ je_calloc(size_t num, size_t size)
 	void *ret;
 	tsd_t *tsd;
 	size_t num_size;
-	szind_t ind;
 	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 
-	if (unlikely(malloc_init())) {
-		tsd = NULL;
-		num_size = 0;
-		ret = NULL;
-		goto label_return;
-	}
-	tsd = tsd_fetch();
-	witness_assert_lockless(tsd);
-
 	num_size = num * size;
 	if (unlikely(num_size == 0)) {
 		if (num == 0 || size == 0)
 			num_size = 1;
-		else {
-			ret = NULL;
-			goto label_return;
-		}
+		else
+			num_size = HUGE_MAXCLASS + 1; /* Trigger OOM. */
 	/*
 	 * Try to avoid division here.  We know that it isn't possible to
 	 * overflow during multiplication if neither operand uses any of the
 	 * most significant half of the bits in a size_t.
 	 */
 	} else if (unlikely(((num | size) & (SIZE_T_MAX << (sizeof(size_t) <<
-	    2))) && (num_size / size != num))) {
-		/* size_t overflow. */
-		ret = NULL;
-		goto label_return;
-	}
+	    2))) && (num_size / size != num)))
+		num_size = HUGE_MAXCLASS + 1; /* size_t overflow. */
 
-	ind = size2index(num_size);
-	if (unlikely(ind >= NSIZES)) {
-		ret = NULL;
-		goto label_return;
-	}
-	if (config_prof && opt_prof) {
-		usize = index2size(ind);
-		ret = icalloc_prof(tsd, usize, ind);
+	if (likely(!malloc_slow)) {
+		ret = ialloc_body(num_size, true, &tsd, &usize, false);
+		ialloc_post_check(ret, tsd, usize, "calloc", true, false);
 	} else {
-		if (config_stats || (config_valgrind && unlikely(in_valgrind)))
-			usize = index2size(ind);
-		ret = icalloc(tsd, num_size, ind);
+		ret = ialloc_body(num_size, true, &tsd, &usize, true);
+		ialloc_post_check(ret, tsd, usize, "calloc", true, true);
+		UTRACE(0, num_size, ret);
+		JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsd, ret, usize, false);
 	}
 
-label_return:
-	if (unlikely(ret == NULL)) {
-		if (config_xmalloc && unlikely(opt_xmalloc)) {
-			malloc_write("<jemalloc>: Error in calloc(): out of "
-			    "memory\n");
-			abort();
-		}
-		set_errno(ENOMEM);
-	}
-	if (config_stats && likely(ret != NULL)) {
-		assert(usize == isalloc(tsd, ret, config_prof));
-		*tsd_thread_allocatedp_get(tsd) += usize;
-	}
-	UTRACE(0, num_size, ret);
-	JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsd, ret, usize, true);
-	witness_assert_lockless(tsd);
 	return (ret);
 }
 
@@ -1880,7 +1810,7 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 }
 
 JEMALLOC_INLINE_C void
-isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache)
+isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path)
 {
 	UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);
 
@@ -1895,7 +1825,7 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache)
 		*tsd_thread_deallocatedp_get(tsd) += usize;
 	if (config_valgrind && unlikely(in_valgrind))
 		rzsize = p2rz(tsd, ptr);
-	isqalloc(tsd, ptr, usize, tcache);
+	isqalloc(tsd, ptr, usize, tcache, slow_path);
 	JEMALLOC_VALGRIND_FREE(ptr, rzsize);
 }
 
@@ -1946,9 +1876,9 @@ je_realloc(void *ptr, size_t size)
 	} else {
 		/* realloc(NULL, size) is equivalent to malloc(size). */
 		if (likely(!malloc_slow))
-			ret = imalloc_body(size, &tsd, &usize, false);
+			ret = ialloc_body(size, false, &tsd, &usize, false);
 		else
-			ret = imalloc_body(size, &tsd, &usize, true);
+			ret = ialloc_body(size, false, &tsd, &usize, true);
 	}
 
 	if (unlikely(ret == NULL)) {
@@ -1978,6 +1908,7 @@ je_free(void *ptr)
 	UTRACE(ptr, 0, 0);
 	if (likely(ptr != NULL)) {
 		tsd_t *tsd = tsd_fetch();
+		witness_assert_lockless(tsd);
 		if (likely(!malloc_slow))
 			ifree(tsd, ptr, tcache_get(tsd, false), false);
 		else
@@ -2056,7 +1987,7 @@ JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) =
  */
 
 JEMALLOC_ALWAYS_INLINE_C bool
-imallocx_flags_decode_hard(tsd_t *tsd, size_t size, int flags, size_t *usize,
+imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize,
     size_t *alignment, bool *zero, tcache_t **tcache, arena_t **arena)
 {
 
@@ -2087,29 +2018,9 @@ imallocx_flags_decode_hard(tsd_t *tsd, size_t size, int flags, size_t *usize,
 	return (false);
 }
 
-JEMALLOC_ALWAYS_INLINE_C bool
-imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize,
-    size_t *alignment, bool *zero, tcache_t **tcache, arena_t **arena)
-{
-
-	if (likely(flags == 0)) {
-		*usize = s2u(size);
-		if (unlikely(*usize == 0 || *usize > HUGE_MAXCLASS))
-			return (true);
-		*alignment = 0;
-		*zero = false;
-		*tcache = tcache_get(tsd, true);
-		*arena = NULL;
-		return (false);
-	} else {
-		return (imallocx_flags_decode_hard(tsd, size, flags, usize,
-		    alignment, zero, tcache, arena));
-	}
-}
-
 JEMALLOC_ALWAYS_INLINE_C void *
 imallocx_flags(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
-    tcache_t *tcache, arena_t *arena)
+    tcache_t *tcache, arena_t *arena, bool slow_path)
 {
 	szind_t ind;
 
@@ -2117,14 +2028,13 @@ imallocx_flags(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
 		return (ipalloct(tsd, usize, alignment, zero, tcache, arena));
 	ind = size2index(usize);
 	assert(ind < NSIZES);
-	if (unlikely(zero))
-		return (icalloct(tsd, usize, ind, tcache, arena));
-	return (imalloct(tsd, usize, ind, tcache, arena));
+	return (iallocztm(tsd, usize, ind, zero, tcache, false, arena,
+	    slow_path));
 }
 
 static void *
 imallocx_prof_sample(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
-    tcache_t *tcache, arena_t *arena)
+    tcache_t *tcache, arena_t *arena, bool slow_path)
 {
 	void *p;
 
@@ -2132,18 +2042,20 @@ imallocx_prof_sample(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
 		assert(((alignment == 0) ? s2u(LARGE_MINCLASS) :
 		    sa2u(LARGE_MINCLASS, alignment)) == LARGE_MINCLASS);
 		p = imallocx_flags(tsd, LARGE_MINCLASS, alignment, zero, tcache,
-		    arena);
+		    arena, slow_path);
 		if (p == NULL)
 			return (NULL);
 		arena_prof_promoted(tsd, p, usize);
-	} else
-		p = imallocx_flags(tsd, usize, alignment, zero, tcache, arena);
+	} else {
+		p = imallocx_flags(tsd, usize, alignment, zero, tcache, arena,
+		    slow_path);
+	}
 
 	return (p);
 }
 
 JEMALLOC_ALWAYS_INLINE_C void *
-imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize)
+imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize, bool slow_path)
 {
 	void *p;
 	size_t alignment;
@@ -2157,10 +2069,11 @@ imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize)
 		return (NULL);
 	tctx = prof_alloc_prep(tsd, *usize, prof_active_get_unlocked(), true);
 	if (likely((uintptr_t)tctx == (uintptr_t)1U))
-		p = imallocx_flags(tsd, *usize, alignment, zero, tcache, arena);
+		p = imallocx_flags(tsd, *usize, alignment, zero, tcache, arena,
+		    slow_path);
 	else if ((uintptr_t)tctx > (uintptr_t)1U) {
 		p = imallocx_prof_sample(tsd, *usize, alignment, zero, tcache,
-		    arena);
+		    arena, slow_path);
 	} else
 		p = NULL;
 	if (unlikely(p == NULL)) {
@@ -2174,7 +2087,8 @@ imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize)
 }
 
 JEMALLOC_ALWAYS_INLINE_C void *
-imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize)
+imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize,
+    bool slow_path)
 {
 	void *p;
 	size_t alignment;
@@ -2182,24 +2096,50 @@ imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize)
 	tcache_t *tcache;
 	arena_t *arena;
 
+	if (unlikely(imallocx_flags_decode(tsd, size, flags, usize, &alignment,
+	    &zero, &tcache, &arena)))
+		return (NULL);
+	p = imallocx_flags(tsd, *usize, alignment, zero, tcache, arena,
+	    slow_path);
+	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
+	return (p);
+}
+
+JEMALLOC_ALWAYS_INLINE_C void *
+imallocx_body(size_t size, int flags, tsd_t **tsd, size_t *usize,
+    bool slow_path)
+{
+
+	if (slow_path && unlikely(malloc_init())) {
+		*tsd = NULL;
+		return (NULL);
+	}
+
+	*tsd = tsd_fetch();
+	witness_assert_lockless(*tsd);
+
 	if (likely(flags == 0)) {
 		szind_t ind = size2index(size);
 		if (unlikely(ind >= NSIZES))
 			return (NULL);
-		if (config_stats || (config_valgrind &&
-		    unlikely(in_valgrind))) {
+		if (config_stats || (config_prof && opt_prof) || (slow_path &&
+		    config_valgrind && unlikely(in_valgrind))) {
 			*usize = index2size(ind);
 			assert(*usize > 0 && *usize <= HUGE_MAXCLASS);
 		}
-		return (imalloc(tsd, size, ind, true));
+
+		if (config_prof && opt_prof) {
+			return (ialloc_prof(*tsd, *usize, ind, false,
+			    slow_path));
+		}
+
+		return (ialloc(*tsd, size, ind, false, slow_path));
 	}
 
-	if (unlikely(imallocx_flags_decode_hard(tsd, size, flags, usize,
-	    &alignment, &zero, &tcache, &arena)))
-		return (NULL);
-	p = imallocx_flags(tsd, *usize, alignment, zero, tcache, arena);
-	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
-	return (p);
+	if (config_prof && opt_prof)
+		return (imallocx_prof(*tsd, size, flags, usize, slow_path));
+
+	return (imallocx_no_prof(*tsd, size, flags, usize, slow_path));
 }
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
@@ -2213,36 +2153,18 @@ je_mallocx(size_t size, int flags)
 
 	assert(size != 0);
 
-	if (unlikely(malloc_init())) {
-		tsd = NULL;
-		goto label_oom;
+	if (likely(!malloc_slow)) {
+		p = imallocx_body(size, flags, &tsd, &usize, false);
+		ialloc_post_check(p, tsd, usize, "mallocx", false, false);
+	} else {
+		p = imallocx_body(size, flags, &tsd, &usize, true);
+		ialloc_post_check(p, tsd, usize, "mallocx", false, true);
+		UTRACE(0, size, p);
+		JEMALLOC_VALGRIND_MALLOC(p != NULL, tsd, p, usize,
+		    MALLOCX_ZERO_GET(flags));
 	}
-	tsd = tsd_fetch();
-	witness_assert_lockless(tsd);
 
-	if (config_prof && opt_prof)
-		p = imallocx_prof(tsd, size, flags, &usize);
-	else
-		p = imallocx_no_prof(tsd, size, flags, &usize);
-	if (unlikely(p == NULL))
-		goto label_oom;
-
-	if (config_stats) {
-		assert(usize == isalloc(tsd, p, config_prof));
-		*tsd_thread_allocatedp_get(tsd) += usize;
-	}
-	UTRACE(0, size, p);
-	JEMALLOC_VALGRIND_MALLOC(true, tsd, p, usize, MALLOCX_ZERO_GET(flags));
-	witness_assert_lockless(tsd);
 	return (p);
-label_oom:
-	if (config_xmalloc && unlikely(opt_xmalloc)) {
-		malloc_write("<jemalloc>: Error in mallocx(): out of memory\n");
-		abort();
-	}
-	UTRACE(0, size, 0);
-	witness_assert_lockless(tsd);
-	return (NULL);
 }
 
 static void *
@@ -2567,7 +2489,10 @@ je_dallocx(void *ptr, int flags)
 		tcache = tcache_get(tsd, false);
 
 	UTRACE(ptr, 0, 0);
-	ifree(tsd_fetch(), ptr, tcache, true);
+	if (likely(!malloc_slow))
+		ifree(tsd, ptr, tcache, false);
+	else
+		ifree(tsd, ptr, tcache, true);
 	witness_assert_lockless(tsd);
 }
 
@@ -2609,7 +2534,10 @@ je_sdallocx(void *ptr, size_t size, int flags)
 		tcache = tcache_get(tsd, false);
 
 	UTRACE(ptr, 0, 0);
-	isfree(tsd, ptr, usize, tcache);
+	if (likely(!malloc_slow))
+		isfree(tsd, ptr, usize, tcache, false);
+	else
+		isfree(tsd, ptr, usize, tcache, true);
 	witness_assert_lockless(tsd);
 }
 

From 1326010cf4a0faef7a0e8fd3e0cf62adcf56a398 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 6 May 2016 14:50:58 -0700
Subject: [PATCH 0236/2608] Update private_symbols.txt.

---
 include/jemalloc/internal/private_symbols.txt | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index e47296ff..28996206 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -547,12 +547,18 @@ tsd_cleanup
 tsd_cleanup_wrapper
 tsd_fetch
 tsd_get
-tsd_wrapper_get
-tsd_wrapper_set
+tsd_iarena_get
+tsd_iarena_set
+tsd_iarenap_get
 tsd_initialized
 tsd_init_check_recursion
 tsd_init_finish
 tsd_init_head
+tsd_narenas_tdata_get
+tsd_narenas_tdata_set
+tsd_narenas_tdatap_get
+tsd_wrapper_get
+tsd_wrapper_set
 tsd_nominal
 tsd_prof_tdata_get
 tsd_prof_tdata_set
@@ -575,6 +581,9 @@ tsd_thread_deallocated_set
 tsd_thread_deallocatedp_get
 tsd_tls
 tsd_tsd
+tsd_witness_fork_get
+tsd_witness_fork_set
+tsd_witness_forkp_get
 tsd_witnesses_get
 tsd_witnesses_set
 tsd_witnessesp_get

From 62c217e6131b845a91fcbd6372151acb300d2193 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 6 May 2016 15:22:32 -0700
Subject: [PATCH 0237/2608] Update ChangeLog.

---
 ChangeLog | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index 3cb32be9..68dedfa0 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,28 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
+* 4.2.0 (XXX)
+
+  New features:
+  - Add the arena.<i>.reset mallctl, which makes it possible to discard all of
+    an arena's allocations in a single operation.  (@jasone@)
+  - Add the stats.retained and stats.arenas.<i>.retained statistics.  (@jasone)
+  - Add the --with-version configure option.  (@jasone)
+  - Support --with-lg-page values larger than actual page size.  (@jasone)
+
+  Optimizations:
+  - Use pairing heaps rather than red-black trees for various hot data
+    structures.  (@djwatson, @jasone)
+  - Streamline fast paths of rtree operations.  (@jasone)
+  - Optimize the fast paths of calloc() and [m,d,sd]allocx().  (@jasone)
+  - Decommit unused virtual memory if the OS does not overcommit.  (@jasone)
+  - Specify MAP_NORESERVE on Linux if [heuristic] overcommit is active, in order
+    to avoid unfortunate interactions during fork(2).  (@jasone)
+
+  Bug fixes:
+  - Link against librt for clock_gettime(2) if glibc < 2.17.  (@jasone)
+  - Scale leak report summary according to sampling probability.  (@jasone)
+
 * 4.1.1 (May 3, 2016)
 
   This bugfix release resolves a variety of mostly minor issues, though the

From 919e4a0ea92fbbf9b97efdf9f31a3c800f77cd8f Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 6 May 2016 17:15:32 -0700
Subject: [PATCH 0238/2608] Add LG_QUANTUM definition for the RISC-V
 architecture.

---
 include/jemalloc/internal/jemalloc_internal.h.in | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index fe504d8d..3ce36659 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -258,6 +258,9 @@ typedef unsigned szind_t;
 #  ifdef __powerpc__
 #    define LG_QUANTUM		4
 #  endif
+#  ifdef __riscv__
+#    define LG_QUANTUM		4
+#  endif
 #  ifdef __s390__
 #    define LG_QUANTUM		4
 #  endif

From 0c12dcabc59ea9c95fc38197e7c4bc44663b0a26 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 7 May 2016 12:42:31 -0700
Subject: [PATCH 0239/2608] Fix tsd bootstrapping for a0malloc().

---
 Makefile.in              |  1 +
 src/jemalloc.c           | 58 +++++++++++++++++++++-------------------
 test/include/test/test.h |  4 +++
 test/src/test.c          | 56 +++++++++++++++++++++++++++-----------
 test/unit/a0.c           | 19 +++++++++++++
 test/unit/junk.c         |  1 -
 test/unit/tsd.c          |  5 ++++
 7 files changed, 101 insertions(+), 43 deletions(-)
 create mode 100644 test/unit/a0.c

diff --git a/Makefile.in b/Makefile.in
index 1cf4bf0f..652f01f2 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -135,6 +135,7 @@ C_TESTLIB_SRCS := $(srcroot)test/src/btalloc.c $(srcroot)test/src/btalloc_0.c \
 	$(srcroot)test/src/thd.c $(srcroot)test/src/timer.c
 C_UTIL_INTEGRATION_SRCS := $(srcroot)src/nstime.c $(srcroot)src/util.c
 TESTS_UNIT := \
+	$(srcroot)test/unit/a0.c \
 	$(srcroot)test/unit/arena_reset.c \
 	$(srcroot)test/unit/atomic.c \
 	$(srcroot)test/unit/bitmap.c \
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 259ab4f7..b1d691ed 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -254,7 +254,7 @@ typedef struct {
  * definition.
  */
 
-static bool	malloc_init_hard_a0(tsd_t *tsd);
+static bool	malloc_init_hard_a0(void);
 static bool	malloc_init_hard(void);
 
 /******************************************************************************/
@@ -291,7 +291,7 @@ malloc_init_a0(void)
 {
 
 	if (unlikely(malloc_init_state == malloc_init_uninitialized))
-		return (malloc_init_hard_a0(NULL));
+		return (malloc_init_hard_a0());
 	return (false);
 }
 
@@ -307,7 +307,7 @@ malloc_init(void)
 }
 
 /*
- * The a0*() functions are used instead of i[mcd]alloc() in situations that
+ * The a0*() functions are used instead of i{d,}alloc() in situations that
  * cannot tolerate TLS variable access.
  */
 
@@ -318,8 +318,8 @@ a0ialloc(size_t size, bool zero, bool is_metadata)
 	if (unlikely(malloc_init_a0()))
 		return (NULL);
 
-	return (iallocztm(NULL, size, size2index(size), zero, NULL,
-	    is_metadata, arena_get(NULL, 0, true), true));
+	return (iallocztm(NULL, size, size2index(size), zero, NULL, is_metadata,
+	    arena_get(NULL, 0, true), true));
 }
 
 static void
@@ -1256,7 +1256,7 @@ malloc_init_hard_needed(void)
 }
 
 static bool
-malloc_init_hard_a0_locked(tsd_t *tsd)
+malloc_init_hard_a0_locked(tsd_t **tsd)
 {
 
 	malloc_initializer = INITIALIZER;
@@ -1283,7 +1283,7 @@ malloc_init_hard_a0_locked(tsd_t *tsd)
 		prof_boot1();
 	if (arena_boot())
 		return (true);
-	if (config_tcache && tcache_boot(tsd))
+	if (config_tcache && tcache_boot(*tsd))
 		return (true);
 	if (malloc_mutex_init(&arenas_lock, "arenas", WITNESS_RANK_ARENAS))
 		return (true);
@@ -1299,38 +1299,41 @@ malloc_init_hard_a0_locked(tsd_t *tsd)
 	 * Initialize one arena here.  The rest are lazily created in
 	 * arena_choose_hard().
 	 */
-	if (arena_init(tsd, 0) == NULL)
+	if (arena_init(*tsd, 0) == NULL)
 		return (true);
+
+	/*
+	 * Initialize tsd, since some code paths cause chunk allocation, which
+	 * in turn depends on tsd.
+	 */
+	*tsd = malloc_tsd_boot0();
+	if (*tsd == NULL)
+		return (true);
+
 	malloc_init_state = malloc_init_a0_initialized;
+
 	return (false);
 }
 
 static bool
-malloc_init_hard_a0(tsd_t *tsd)
+malloc_init_hard_a0(void)
 {
 	bool ret;
+	tsd_t *tsd = NULL;
 
 	malloc_mutex_lock(tsd, &init_lock);
-	ret = malloc_init_hard_a0_locked(tsd);
+	ret = malloc_init_hard_a0_locked(&tsd);
 	malloc_mutex_unlock(tsd, &init_lock);
 	return (ret);
 }
 
 /* Initialize data structures which may trigger recursive allocation. */
 static bool
-malloc_init_hard_recursible(tsd_t **tsd)
+malloc_init_hard_recursible(tsd_t *tsd)
 {
-	bool ret;
 
 	malloc_init_state = malloc_init_recursible;
-	malloc_mutex_unlock(*tsd, &init_lock);
-
-	/* LinuxThreads' pthread_setspecific() allocates. */
-	*tsd = malloc_tsd_boot0();
-	if (*tsd == NULL) {
-		ret = true;
-		goto label_return;
-	}
+	malloc_mutex_unlock(tsd, &init_lock);
 
 	ncpus = malloc_ncpus();
 
@@ -1339,17 +1342,16 @@ malloc_init_hard_recursible(tsd_t **tsd)
 	/* LinuxThreads' pthread_atfork() allocates. */
 	if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent,
 	    jemalloc_postfork_child) != 0) {
-		ret = true;
 		malloc_write("<jemalloc>: Error in pthread_atfork()\n");
 		if (opt_abort)
 			abort();
+		malloc_mutex_lock(tsd, &init_lock);
+		return (true);
 	}
 #endif
 
-	ret = false;
-label_return:
-	malloc_mutex_lock(*tsd, &init_lock);
-	return (ret);
+	malloc_mutex_lock(tsd, &init_lock);
+	return (false);
 }
 
 static bool
@@ -1409,12 +1411,12 @@ malloc_init_hard(void)
 	}
 
 	if (malloc_init_state != malloc_init_a0_initialized &&
-	    malloc_init_hard_a0_locked(tsd)) {
+	    malloc_init_hard_a0_locked(&tsd)) {
 		malloc_mutex_unlock(tsd, &init_lock);
 		return (true);
 	}
 
-	if (malloc_init_hard_recursible(&tsd)) {
+	if (malloc_init_hard_recursible(tsd)) {
 		malloc_mutex_unlock(tsd, &init_lock);
 		return (true);
 	}
@@ -2669,6 +2671,7 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr)
  * to trigger the deadlock described above, but doing so would involve forking
  * via a library constructor that runs before jemalloc's runs.
  */
+#ifndef JEMALLOC_JET
 JEMALLOC_ATTR(constructor)
 static void
 jemalloc_constructor(void)
@@ -2676,6 +2679,7 @@ jemalloc_constructor(void)
 
 	malloc_init();
 }
+#endif
 
 #ifndef JEMALLOC_MUTEX_INIT_CB
 void
diff --git a/test/include/test/test.h b/test/include/test/test.h
index 3cf901fc..c8112eb8 100644
--- a/test/include/test/test.h
+++ b/test/include/test/test.h
@@ -311,6 +311,9 @@ label_test_end:								\
 #define	test(...)							\
 	p_test(__VA_ARGS__, NULL)
 
+#define	test_no_malloc_init(...)					\
+	p_test_no_malloc_init(__VA_ARGS__, NULL)
+
 #define	test_skip_if(e) do {						\
 	if (e) {							\
 		test_skip("%s:%s:%d: Test skipped: (%s)",		\
@@ -324,6 +327,7 @@ void	test_fail(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
 
 /* For private use by macros. */
 test_status_t	p_test(test_t *t, ...);
+test_status_t	p_test_no_malloc_init(test_t *t, ...);
 void	p_test_init(const char *name);
 void	p_test_fini(void);
 void	p_test_fail(const char *prefix, const char *message);
diff --git a/test/src/test.c b/test/src/test.c
index 8173614c..d70cc750 100644
--- a/test/src/test.c
+++ b/test/src/test.c
@@ -60,32 +60,30 @@ p_test_fini(void)
 	malloc_printf("%s: %s\n", test_name, test_status_string(test_status));
 }
 
-test_status_t
-p_test(test_t *t, ...)
+static test_status_t
+p_test_impl(bool do_malloc_init, test_t *t, va_list ap)
 {
 	test_status_t ret;
-	va_list ap;
 
-	/*
-	 * Make sure initialization occurs prior to running tests.  Tests are
-	 * special because they may use internal facilities prior to triggering
-	 * initialization as a side effect of calling into the public API.  This
-	 * is a final safety that works even if jemalloc_constructor() doesn't
-	 * run, as for MSVC builds.
-	 */
-	if (nallocx(1, 0) == 0) {
-		malloc_printf("Initialization error");
-		return (test_status_fail);
+	if (do_malloc_init) {
+		/*
+		 * Make sure initialization occurs prior to running tests.
+		 * Tests are special because they may use internal facilities
+		 * prior to triggering initialization as a side effect of
+		 * calling into the public API.
+		 */
+		if (nallocx(1, 0) == 0) {
+			malloc_printf("Initialization error");
+			return (test_status_fail);
+		}
 	}
 
 	ret = test_status_pass;
-	va_start(ap, t);
 	for (; t != NULL; t = va_arg(ap, test_t *)) {
 		t();
 		if (test_status > ret)
 			ret = test_status;
 	}
-	va_end(ap);
 
 	malloc_printf("--- %s: %u/%u, %s: %u/%u, %s: %u/%u ---\n",
 	    test_status_string(test_status_pass),
@@ -98,6 +96,34 @@ p_test(test_t *t, ...)
 	return (ret);
 }
 
+test_status_t
+p_test(test_t *t, ...)
+{
+	test_status_t ret;
+	va_list ap;
+
+	ret = test_status_pass;
+	va_start(ap, t);
+	ret = p_test_impl(true, t, ap);
+	va_end(ap);
+
+	return (ret);
+}
+
+test_status_t
+p_test_no_malloc_init(test_t *t, ...)
+{
+	test_status_t ret;
+	va_list ap;
+
+	ret = test_status_pass;
+	va_start(ap, t);
+	ret = p_test_impl(false, t, ap);
+	va_end(ap);
+
+	return (ret);
+}
+
 void
 p_test_fail(const char *prefix, const char *message)
 {
diff --git a/test/unit/a0.c b/test/unit/a0.c
new file mode 100644
index 00000000..b9ba45a3
--- /dev/null
+++ b/test/unit/a0.c
@@ -0,0 +1,19 @@
+#include "test/jemalloc_test.h"
+
+TEST_BEGIN(test_a0)
+{
+	void *p;
+
+	p = a0malloc(1);
+	assert_ptr_not_null(p, "Unexpected a0malloc() error");
+	a0dalloc(p);
+}
+TEST_END
+
+int
+main(void)
+{
+
+	return (test_no_malloc_init(
+	    test_a0));
+}
diff --git a/test/unit/junk.c b/test/unit/junk.c
index e251a124..414874a0 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -244,7 +244,6 @@ int
 main(void)
 {
 
-	assert(!config_fill || opt_junk_alloc || opt_junk_free);
 	return (test(
 	    test_junk_small,
 	    test_junk_large,
diff --git a/test/unit/tsd.c b/test/unit/tsd.c
index 8be787fd..7dde4b77 100644
--- a/test/unit/tsd.c
+++ b/test/unit/tsd.c
@@ -99,6 +99,11 @@ int
 main(void)
 {
 
+	/* Core tsd bootstrapping must happen prior to data_tsd_boot(). */
+	if (nallocx(1, 0) == 0) {
+		malloc_printf("Initialization error");
+		return (test_status_fail);
+	}
 	data_tsd_boot();
 
 	return (test(

From c1e00ef2a6442d1d047950247c757821560db329 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 10 May 2016 22:21:10 -0700
Subject: [PATCH 0240/2608] Resolve bootstrapping issues when embedded in
 FreeBSD libc.

b2c0d6322d2307458ae2b28545f8a5c9903d7ef5 (Add witness, a simple online
locking validator.) caused a broad propagation of tsd throughout the
internal API, but tsd_fetch() was designed to fail prior to tsd
bootstrapping.  Fix this by splitting tsd_t into non-nullable tsd_t and
nullable tsdn_t, and modifying all internal APIs that do not critically
rely on tsd to take nullable pointers.  Furthermore, add the
tsd_booted_get() function so that tsdn_fetch() can probe whether tsd
bootstrapping is complete and return NULL if not.  All dangerous
conversions of nullable pointers are tsdn_tsd() calls that assert-fail
on invalid conversion.
---
 include/jemalloc/internal/arena.h             | 201 +++---
 include/jemalloc/internal/base.h              |  10 +-
 include/jemalloc/internal/chunk.h             |  22 +-
 include/jemalloc/internal/chunk_dss.h         |  14 +-
 include/jemalloc/internal/ckh.h               |   8 +-
 include/jemalloc/internal/ctl.h               |  10 +-
 include/jemalloc/internal/huge.h              |  20 +-
 .../jemalloc/internal/jemalloc_internal.h.in  | 103 +--
 include/jemalloc/internal/mutex.h             |  32 +-
 include/jemalloc/internal/private_symbols.txt |   7 +-
 include/jemalloc/internal/prof.h              |  74 +--
 include/jemalloc/internal/tcache.h            |  33 +-
 include/jemalloc/internal/tsd.h               |  76 +++
 include/jemalloc/internal/valgrind.h          |  12 +-
 include/jemalloc/internal/witness.h           |  10 +-
 src/arena.c                                   | 585 +++++++++---------
 src/base.c                                    |  45 +-
 src/chunk.c                                   | 146 ++---
 src/chunk_dss.c                               |  42 +-
 src/ckh.c                                     |  42 +-
 src/ctl.c                                     | 229 +++----
 src/huge.c                                    | 152 ++---
 src/jemalloc.c                                | 518 ++++++++--------
 src/mutex.c                                   |  12 +-
 src/prof.c                                    | 475 +++++++-------
 src/quarantine.c                              |  44 +-
 src/tcache.c                                  | 123 ++--
 src/witness.c                                 |  36 +-
 src/zone.c                                    |   8 +-
 test/unit/arena_reset.c                       |   8 +-
 test/unit/ckh.c                               |  46 +-
 test/unit/junk.c                              |   4 +-
 test/unit/prof_reset.c                        |   2 +-
 test/unit/witness.c                           | 116 ++--
 34 files changed, 1709 insertions(+), 1556 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index debb43f3..b1de2b61 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -518,28 +518,28 @@ void	arena_chunk_cache_maybe_insert(arena_t *arena, extent_node_t *node,
     bool cache);
 void	arena_chunk_cache_maybe_remove(arena_t *arena, extent_node_t *node,
     bool cache);
-extent_node_t	*arena_node_alloc(tsd_t *tsd, arena_t *arena);
-void	arena_node_dalloc(tsd_t *tsd, arena_t *arena, extent_node_t *node);
-void	*arena_chunk_alloc_huge(tsd_t *tsd, arena_t *arena, size_t usize,
+extent_node_t	*arena_node_alloc(tsdn_t *tsdn, arena_t *arena);
+void	arena_node_dalloc(tsdn_t *tsdn, arena_t *arena, extent_node_t *node);
+void	*arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool *zero);
-void	arena_chunk_dalloc_huge(tsd_t *tsd, arena_t *arena, void *chunk,
+void	arena_chunk_dalloc_huge(tsdn_t *tsdn, arena_t *arena, void *chunk,
     size_t usize);
-void	arena_chunk_ralloc_huge_similar(tsd_t *tsd, arena_t *arena, void *chunk,
-    size_t oldsize, size_t usize);
-void	arena_chunk_ralloc_huge_shrink(tsd_t *tsd, arena_t *arena, void *chunk,
-    size_t oldsize, size_t usize);
-bool	arena_chunk_ralloc_huge_expand(tsd_t *tsd, arena_t *arena, void *chunk,
-    size_t oldsize, size_t usize, bool *zero);
-ssize_t	arena_lg_dirty_mult_get(tsd_t *tsd, arena_t *arena);
-bool	arena_lg_dirty_mult_set(tsd_t *tsd, arena_t *arena,
+void	arena_chunk_ralloc_huge_similar(tsdn_t *tsdn, arena_t *arena,
+    void *chunk, size_t oldsize, size_t usize);
+void	arena_chunk_ralloc_huge_shrink(tsdn_t *tsdn, arena_t *arena,
+    void *chunk, size_t oldsize, size_t usize);
+bool	arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena,
+    void *chunk, size_t oldsize, size_t usize, bool *zero);
+ssize_t	arena_lg_dirty_mult_get(tsdn_t *tsdn, arena_t *arena);
+bool	arena_lg_dirty_mult_set(tsdn_t *tsdn, arena_t *arena,
     ssize_t lg_dirty_mult);
-ssize_t	arena_decay_time_get(tsd_t *tsd, arena_t *arena);
-bool	arena_decay_time_set(tsd_t *tsd, arena_t *arena, ssize_t decay_time);
-void	arena_purge(tsd_t *tsd, arena_t *arena, bool all);
-void	arena_maybe_purge(tsd_t *tsd, arena_t *arena);
+ssize_t	arena_decay_time_get(tsdn_t *tsdn, arena_t *arena);
+bool	arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time);
+void	arena_purge(tsdn_t *tsdn, arena_t *arena, bool all);
+void	arena_maybe_purge(tsdn_t *tsdn, arena_t *arena);
 void	arena_reset(tsd_t *tsd, arena_t *arena);
-void	arena_tcache_fill_small(tsd_t *tsd, arena_t *arena, tcache_bin_t *tbin,
-    szind_t binind, uint64_t prof_accumbytes);
+void	arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena,
+    tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes);
 void	arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info,
     bool zero);
 #ifdef JEMALLOC_JET
@@ -552,17 +552,18 @@ extern arena_dalloc_junk_small_t *arena_dalloc_junk_small;
 void	arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info);
 #endif
 void	arena_quarantine_junk_small(void *ptr, size_t usize);
-void	*arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t ind, bool zero);
-void	*arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
+void	*arena_malloc_large(tsdn_t *tsdn, arena_t *arena, szind_t ind,
     bool zero);
-void	*arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize,
+void	*arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
+    szind_t ind, bool zero);
+void	*arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache);
-void	arena_prof_promoted(tsd_t *tsd, const void *ptr, size_t size);
-void	arena_dalloc_bin_junked_locked(tsd_t *tsd, arena_t *arena,
+void	arena_prof_promoted(tsdn_t *tsdn, const void *ptr, size_t size);
+void	arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena,
     arena_chunk_t *chunk, void *ptr, arena_chunk_map_bits_t *bitselm);
-void	arena_dalloc_bin(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+void	arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
     void *ptr, size_t pageind, arena_chunk_map_bits_t *bitselm);
-void	arena_dalloc_small(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+void	arena_dalloc_small(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
     void *ptr, size_t pageind);
 #ifdef JEMALLOC_JET
 typedef void (arena_dalloc_junk_large_t)(void *, size_t);
@@ -570,28 +571,28 @@ extern arena_dalloc_junk_large_t *arena_dalloc_junk_large;
 #else
 void	arena_dalloc_junk_large(void *ptr, size_t usize);
 #endif
-void	arena_dalloc_large_junked_locked(tsd_t *tsd, arena_t *arena,
+void	arena_dalloc_large_junked_locked(tsdn_t *tsdn, arena_t *arena,
     arena_chunk_t *chunk, void *ptr);
-void	arena_dalloc_large(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+void	arena_dalloc_large(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
     void *ptr);
 #ifdef JEMALLOC_JET
 typedef void (arena_ralloc_junk_large_t)(void *, size_t, size_t);
 extern arena_ralloc_junk_large_t *arena_ralloc_junk_large;
 #endif
-bool	arena_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
-    size_t extra, bool zero);
+bool	arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize,
+    size_t size, size_t extra, bool zero);
 void	*arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
     size_t size, size_t alignment, bool zero, tcache_t *tcache);
-dss_prec_t	arena_dss_prec_get(tsd_t *tsd, arena_t *arena);
-bool	arena_dss_prec_set(tsd_t *tsd, arena_t *arena, dss_prec_t dss_prec);
+dss_prec_t	arena_dss_prec_get(tsdn_t *tsdn, arena_t *arena);
+bool	arena_dss_prec_set(tsdn_t *tsdn, arena_t *arena, dss_prec_t dss_prec);
 ssize_t	arena_lg_dirty_mult_default_get(void);
 bool	arena_lg_dirty_mult_default_set(ssize_t lg_dirty_mult);
 ssize_t	arena_decay_time_default_get(void);
 bool	arena_decay_time_default_set(ssize_t decay_time);
-void	arena_basic_stats_merge(tsd_t *tsd, arena_t *arena, unsigned *nthreads,
-    const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time,
-    size_t *nactive, size_t *ndirty);
-void	arena_stats_merge(tsd_t *tsd, arena_t *arena, unsigned *nthreads,
+void	arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
+    unsigned *nthreads, const char **dss, ssize_t *lg_dirty_mult,
+    ssize_t *decay_time, size_t *nactive, size_t *ndirty);
+void	arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time,
     size_t *nactive, size_t *ndirty, arena_stats_t *astats,
     malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats,
@@ -599,14 +600,14 @@ void	arena_stats_merge(tsd_t *tsd, arena_t *arena, unsigned *nthreads,
 unsigned	arena_nthreads_get(arena_t *arena, bool internal);
 void	arena_nthreads_inc(arena_t *arena, bool internal);
 void	arena_nthreads_dec(arena_t *arena, bool internal);
-arena_t	*arena_new(tsd_t *tsd, unsigned ind);
+arena_t	*arena_new(tsdn_t *tsdn, unsigned ind);
 bool	arena_boot(void);
-void	arena_prefork0(tsd_t *tsd, arena_t *arena);
-void	arena_prefork1(tsd_t *tsd, arena_t *arena);
-void	arena_prefork2(tsd_t *tsd, arena_t *arena);
-void	arena_prefork3(tsd_t *tsd, arena_t *arena);
-void	arena_postfork_parent(tsd_t *tsd, arena_t *arena);
-void	arena_postfork_child(tsd_t *tsd, arena_t *arena);
+void	arena_prefork0(tsdn_t *tsdn, arena_t *arena);
+void	arena_prefork1(tsdn_t *tsdn, arena_t *arena);
+void	arena_prefork2(tsdn_t *tsdn, arena_t *arena);
+void	arena_prefork3(tsdn_t *tsdn, arena_t *arena);
+void	arena_postfork_parent(tsdn_t *tsdn, arena_t *arena);
+void	arena_postfork_child(tsdn_t *tsdn, arena_t *arena);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
@@ -663,24 +664,24 @@ void	arena_metadata_allocated_sub(arena_t *arena, size_t size);
 size_t	arena_metadata_allocated_get(arena_t *arena);
 bool	arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes);
 bool	arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes);
-bool	arena_prof_accum(tsd_t *tsd, arena_t *arena, uint64_t accumbytes);
+bool	arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes);
 szind_t	arena_ptr_small_binind_get(const void *ptr, size_t mapbits);
 szind_t	arena_bin_index(arena_t *arena, arena_bin_t *bin);
 size_t	arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info,
     const void *ptr);
-prof_tctx_t	*arena_prof_tctx_get(tsd_t *tsd, const void *ptr);
-void	arena_prof_tctx_set(tsd_t *tsd, const void *ptr, size_t usize,
+prof_tctx_t	*arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr);
+void	arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
     prof_tctx_t *tctx);
-void	arena_prof_tctx_reset(tsd_t *tsd, const void *ptr, size_t usize,
+void	arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize,
     const void *old_ptr, prof_tctx_t *old_tctx);
-void	arena_decay_ticks(tsd_t *tsd, arena_t *arena, unsigned nticks);
-void	arena_decay_tick(tsd_t *tsd, arena_t *arena);
-void	*arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
+void	arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks);
+void	arena_decay_tick(tsdn_t *tsdn, arena_t *arena);
+void	*arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
     bool zero, tcache_t *tcache, bool slow_path);
 arena_t	*arena_aalloc(const void *ptr);
-size_t	arena_salloc(tsd_t *tsd, const void *ptr, bool demote);
-void	arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path);
-void	arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache,
+size_t	arena_salloc(tsdn_t *tsdn, const void *ptr, bool demote);
+void	arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path);
+void	arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
     bool slow_path);
 #endif
 
@@ -1056,7 +1057,7 @@ arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes)
 }
 
 JEMALLOC_INLINE bool
-arena_prof_accum(tsd_t *tsd, arena_t *arena, uint64_t accumbytes)
+arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes)
 {
 
 	cassert(config_prof);
@@ -1067,9 +1068,9 @@ arena_prof_accum(tsd_t *tsd, arena_t *arena, uint64_t accumbytes)
 	{
 		bool ret;
 
-		malloc_mutex_lock(tsd, &arena->lock);
+		malloc_mutex_lock(tsdn, &arena->lock);
 		ret = arena_prof_accum_impl(arena, accumbytes);
-		malloc_mutex_unlock(tsd, &arena->lock);
+		malloc_mutex_unlock(tsdn, &arena->lock);
 		return (ret);
 	}
 }
@@ -1205,7 +1206,7 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr)
 }
 
 JEMALLOC_INLINE prof_tctx_t *
-arena_prof_tctx_get(tsd_t *tsd, const void *ptr)
+arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr)
 {
 	prof_tctx_t *ret;
 	arena_chunk_t *chunk;
@@ -1226,13 +1227,13 @@ arena_prof_tctx_get(tsd_t *tsd, const void *ptr)
 			ret = atomic_read_p(&elm->prof_tctx_pun);
 		}
 	} else
-		ret = huge_prof_tctx_get(tsd, ptr);
+		ret = huge_prof_tctx_get(tsdn, ptr);
 
 	return (ret);
 }
 
 JEMALLOC_INLINE void
-arena_prof_tctx_set(tsd_t *tsd, const void *ptr, size_t usize,
+arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
     prof_tctx_t *tctx)
 {
 	arena_chunk_t *chunk;
@@ -1264,11 +1265,11 @@ arena_prof_tctx_set(tsd_t *tsd, const void *ptr, size_t usize,
 			assert(arena_mapbits_large_get(chunk, pageind) == 0);
 		}
 	} else
-		huge_prof_tctx_set(tsd, ptr, tctx);
+		huge_prof_tctx_set(tsdn, ptr, tctx);
 }
 
 JEMALLOC_INLINE void
-arena_prof_tctx_reset(tsd_t *tsd, const void *ptr, size_t usize,
+arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize,
     const void *old_ptr, prof_tctx_t *old_tctx)
 {
 
@@ -1292,52 +1293,55 @@ arena_prof_tctx_reset(tsd_t *tsd, const void *ptr, size_t usize,
 			atomic_write_p(&elm->prof_tctx_pun,
 			    (prof_tctx_t *)(uintptr_t)1U);
 		} else
-			huge_prof_tctx_reset(tsd, ptr);
+			huge_prof_tctx_reset(tsdn, ptr);
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_decay_ticks(tsd_t *tsd, arena_t *arena, unsigned nticks)
+arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks)
 {
+	tsd_t *tsd;
 	ticker_t *decay_ticker;
 
-	if (unlikely(tsd == NULL))
+	if (unlikely(tsdn_null(tsdn)))
 		return;
+	tsd = tsdn_tsd(tsdn);
 	decay_ticker = decay_ticker_get(tsd, arena->ind);
 	if (unlikely(decay_ticker == NULL))
 		return;
 	if (unlikely(ticker_ticks(decay_ticker, nticks)))
-		arena_purge(tsd, arena, false);
+		arena_purge(tsdn, arena, false);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_decay_tick(tsd_t *tsd, arena_t *arena)
+arena_decay_tick(tsdn_t *tsdn, arena_t *arena)
 {
 
-	arena_decay_ticks(tsd, arena, 1);
+	arena_decay_ticks(tsdn, arena, 1);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-arena_malloc(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind, bool zero,
+arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
     tcache_t *tcache, bool slow_path)
 {
 
+	assert(!tsdn_null(tsdn) || tcache == NULL);
 	assert(size != 0);
 
 	if (likely(tcache != NULL)) {
 		if (likely(size <= SMALL_MAXCLASS)) {
-			return (tcache_alloc_small(tsd, arena, tcache, size,
-			    ind, zero, slow_path));
+			return (tcache_alloc_small(tsdn_tsd(tsdn), arena,
+			    tcache, size, ind, zero, slow_path));
 		}
 		if (likely(size <= tcache_maxclass)) {
-			return (tcache_alloc_large(tsd, arena, tcache, size,
-			    ind, zero, slow_path));
+			return (tcache_alloc_large(tsdn_tsd(tsdn), arena,
+			    tcache, size, ind, zero, slow_path));
 		}
 		/* (size > tcache_maxclass) case falls through. */
 		assert(size > tcache_maxclass);
 	}
 
-	return (arena_malloc_hard(tsd, arena, size, ind, zero));
+	return (arena_malloc_hard(tsdn, arena, size, ind, zero));
 }
 
 JEMALLOC_ALWAYS_INLINE arena_t *
@@ -1354,7 +1358,7 @@ arena_aalloc(const void *ptr)
 
 /* Return the size of the allocation pointed to by ptr. */
 JEMALLOC_ALWAYS_INLINE size_t
-arena_salloc(tsd_t *tsd, const void *ptr, bool demote)
+arena_salloc(tsdn_t *tsdn, const void *ptr, bool demote)
 {
 	size_t ret;
 	arena_chunk_t *chunk;
@@ -1397,17 +1401,18 @@ arena_salloc(tsd_t *tsd, const void *ptr, bool demote)
 			ret = index2size(binind);
 		}
 	} else
-		ret = huge_salloc(tsd, ptr);
+		ret = huge_salloc(tsdn, ptr);
 
 	return (ret);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
+arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path)
 {
 	arena_chunk_t *chunk;
 	size_t pageind, mapbits;
 
+	assert(!tsdn_null(tsdn) || tcache == NULL);
 	assert(ptr != NULL);
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
@@ -1420,11 +1425,12 @@ arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 			if (likely(tcache != NULL)) {
 				szind_t binind = arena_ptr_small_binind_get(ptr,
 				    mapbits);
-				tcache_dalloc_small(tsd, tcache, ptr, binind,
-				    slow_path);
+				tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr,
+				    binind, slow_path);
 			} else {
-				arena_dalloc_small(tsd, extent_node_arena_get(
-				    &chunk->node), chunk, ptr, pageind);
+				arena_dalloc_small(tsdn,
+				    extent_node_arena_get(&chunk->node), chunk,
+				    ptr, pageind);
 			}
 		} else {
 			size_t size = arena_mapbits_large_size_get(chunk,
@@ -1435,23 +1441,26 @@ arena_dalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 
 			if (likely(tcache != NULL) && size - large_pad <=
 			    tcache_maxclass) {
-				tcache_dalloc_large(tsd, tcache, ptr, size -
-				    large_pad, slow_path);
+				tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
+				    size - large_pad, slow_path);
 			} else {
-				arena_dalloc_large(tsd, extent_node_arena_get(
-				    &chunk->node), chunk, ptr);
+				arena_dalloc_large(tsdn,
+				    extent_node_arena_get(&chunk->node), chunk,
+				    ptr);
 			}
 		}
 	} else
-		huge_dalloc(tsd, ptr);
+		huge_dalloc(tsdn, ptr);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache,
+arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
     bool slow_path)
 {
 	arena_chunk_t *chunk;
 
+	assert(!tsdn_null(tsdn) || tcache == NULL);
+
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 	if (likely(chunk != ptr)) {
 		if (config_prof && opt_prof) {
@@ -1468,34 +1477,36 @@ arena_sdalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache,
 				    pageind) - large_pad;
 			}
 		}
-		assert(s2u(size) == s2u(arena_salloc(tsd, ptr, false)));
+		assert(s2u(size) == s2u(arena_salloc(tsdn, ptr, false)));
 
 		if (likely(size <= SMALL_MAXCLASS)) {
 			/* Small allocation. */
 			if (likely(tcache != NULL)) {
 				szind_t binind = size2index(size);
-				tcache_dalloc_small(tsd, tcache, ptr, binind,
-				    slow_path);
+				tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr,
+				    binind, slow_path);
 			} else {
 				size_t pageind = ((uintptr_t)ptr -
 				    (uintptr_t)chunk) >> LG_PAGE;
-				arena_dalloc_small(tsd, extent_node_arena_get(
-				    &chunk->node), chunk, ptr, pageind);
+				arena_dalloc_small(tsdn,
+				    extent_node_arena_get(&chunk->node), chunk,
+				    ptr, pageind);
 			}
 		} else {
 			assert(config_cache_oblivious || ((uintptr_t)ptr &
 			    PAGE_MASK) == 0);
 
 			if (likely(tcache != NULL) && size <= tcache_maxclass) {
-				tcache_dalloc_large(tsd, tcache, ptr, size,
-				    slow_path);
+				tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
+				    size, slow_path);
 			} else {
-				arena_dalloc_large(tsd, extent_node_arena_get(
-				    &chunk->node), chunk, ptr);
+				arena_dalloc_large(tsdn,
+				    extent_node_arena_get(&chunk->node), chunk,
+				    ptr);
 			}
 		}
 	} else
-		huge_dalloc(tsd, ptr);
+		huge_dalloc(tsdn, ptr);
 }
 #  endif /* JEMALLOC_ARENA_INLINE_B */
 #endif
diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h
index 075a2a20..d6b81e16 100644
--- a/include/jemalloc/internal/base.h
+++ b/include/jemalloc/internal/base.h
@@ -9,13 +9,13 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-void	*base_alloc(tsd_t *tsd, size_t size);
-void	base_stats_get(tsd_t *tsd, size_t *allocated, size_t *resident,
+void	*base_alloc(tsdn_t *tsdn, size_t size);
+void	base_stats_get(tsdn_t *tsdn, size_t *allocated, size_t *resident,
     size_t *mapped);
 bool	base_boot(void);
-void	base_prefork(tsd_t *tsd);
-void	base_postfork_parent(tsd_t *tsd);
-void	base_postfork_child(tsd_t *tsd);
+void	base_prefork(tsdn_t *tsdn);
+void	base_postfork_parent(tsdn_t *tsdn);
+void	base_postfork_child(tsdn_t *tsdn);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index 6c3ad9bf..c9fd4ecb 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -48,32 +48,32 @@ extern size_t		chunk_npages;
 
 extern const chunk_hooks_t	chunk_hooks_default;
 
-chunk_hooks_t	chunk_hooks_get(tsd_t *tsd, arena_t *arena);
-chunk_hooks_t	chunk_hooks_set(tsd_t *tsd, arena_t *arena,
+chunk_hooks_t	chunk_hooks_get(tsdn_t *tsdn, arena_t *arena);
+chunk_hooks_t	chunk_hooks_set(tsdn_t *tsdn, arena_t *arena,
     const chunk_hooks_t *chunk_hooks);
 
-bool	chunk_register(tsd_t *tsd, const void *chunk,
+bool	chunk_register(tsdn_t *tsdn, const void *chunk,
     const extent_node_t *node);
 void	chunk_deregister(const void *chunk, const extent_node_t *node);
 void	*chunk_alloc_base(size_t size);
-void	*chunk_alloc_cache(tsd_t *tsd, arena_t *arena,
+void	*chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
     bool *zero, bool dalloc_node);
-void	*chunk_alloc_wrapper(tsd_t *tsd, arena_t *arena,
+void	*chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
     bool *zero, bool *commit);
-void	chunk_dalloc_cache(tsd_t *tsd, arena_t *arena,
+void	chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *chunk, size_t size, bool committed);
-void	chunk_dalloc_wrapper(tsd_t *tsd, arena_t *arena,
+void	chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *chunk, size_t size, bool zeroed,
     bool committed);
-bool	chunk_purge_wrapper(tsd_t *tsd, arena_t *arena,
+bool	chunk_purge_wrapper(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *chunk, size_t size, size_t offset,
     size_t length);
 bool	chunk_boot(void);
-void	chunk_prefork(tsd_t *tsd);
-void	chunk_postfork_parent(tsd_t *tsd);
-void	chunk_postfork_child(tsd_t *tsd);
+void	chunk_prefork(tsdn_t *tsdn);
+void	chunk_postfork_parent(tsdn_t *tsdn);
+void	chunk_postfork_child(tsdn_t *tsdn);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/chunk_dss.h b/include/jemalloc/internal/chunk_dss.h
index 7f3a09c7..724fa579 100644
--- a/include/jemalloc/internal/chunk_dss.h
+++ b/include/jemalloc/internal/chunk_dss.h
@@ -21,15 +21,15 @@ extern const char *dss_prec_names[];
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-dss_prec_t	chunk_dss_prec_get(tsd_t *tsd);
-bool	chunk_dss_prec_set(tsd_t *tsd, dss_prec_t dss_prec);
-void	*chunk_alloc_dss(tsd_t *tsd, arena_t *arena, void *new_addr,
+dss_prec_t	chunk_dss_prec_get(tsdn_t *tsdn);
+bool	chunk_dss_prec_set(tsdn_t *tsdn, dss_prec_t dss_prec);
+void	*chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr,
     size_t size, size_t alignment, bool *zero, bool *commit);
-bool	chunk_in_dss(tsd_t *tsd, void *chunk);
+bool	chunk_in_dss(tsdn_t *tsdn, void *chunk);
 bool	chunk_dss_boot(void);
-void	chunk_dss_prefork(tsd_t *tsd);
-void	chunk_dss_postfork_parent(tsd_t *tsd);
-void	chunk_dss_postfork_child(tsd_t *tsd);
+void	chunk_dss_prefork(tsdn_t *tsdn);
+void	chunk_dss_postfork_parent(tsdn_t *tsdn);
+void	chunk_dss_postfork_child(tsdn_t *tsdn);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/ckh.h b/include/jemalloc/internal/ckh.h
index f75ad90b..46e151cd 100644
--- a/include/jemalloc/internal/ckh.h
+++ b/include/jemalloc/internal/ckh.h
@@ -64,13 +64,13 @@ struct ckh_s {
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-bool	ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
+bool	ckh_new(tsdn_t *tsdn, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
     ckh_keycomp_t *keycomp);
-void	ckh_delete(tsd_t *tsd, ckh_t *ckh);
+void	ckh_delete(tsdn_t *tsdn, ckh_t *ckh);
 size_t	ckh_count(ckh_t *ckh);
 bool	ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data);
-bool	ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data);
-bool	ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
+bool	ckh_insert(tsdn_t *tsdn, ckh_t *ckh, const void *key, const void *data);
+bool	ckh_remove(tsdn_t *tsdn, ckh_t *ckh, const void *searchkey, void **key,
     void **data);
 bool	ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data);
 void	ckh_string_hash(const void *key, size_t r_hash[2]);
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index c84c0de9..af0f6d7c 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -27,7 +27,7 @@ struct ctl_named_node_s {
 
 struct ctl_indexed_node_s {
 	struct ctl_node_s	node;
-	const ctl_named_node_t	*(*index)(tsd_t *, const size_t *, size_t,
+	const ctl_named_node_t	*(*index)(tsdn_t *, const size_t *, size_t,
 	    size_t);
 };
 
@@ -72,15 +72,15 @@ struct ctl_stats_s {
 
 int	ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
     void *newp, size_t newlen);
-int	ctl_nametomib(tsd_t *tsd, const char *name, size_t *mibp,
+int	ctl_nametomib(tsdn_t *tsdn, const char *name, size_t *mibp,
     size_t *miblenp);
 
 int	ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen);
 bool	ctl_boot(void);
-void	ctl_prefork(tsd_t *tsd);
-void	ctl_postfork_parent(tsd_t *tsd);
-void	ctl_postfork_child(tsd_t *tsd);
+void	ctl_prefork(tsdn_t *tsdn);
+void	ctl_postfork_parent(tsdn_t *tsdn);
+void	ctl_postfork_child(tsdn_t *tsdn);
 
 #define	xmallctl(name, oldp, oldlenp, newp, newlen) do {		\
 	if (je_mallctl(name, oldp, oldlenp, newp, newlen)		\
diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h
index 9de2055d..b5fa9e63 100644
--- a/include/jemalloc/internal/huge.h
+++ b/include/jemalloc/internal/huge.h
@@ -9,23 +9,23 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-void	*huge_malloc(tsd_t *tsd, arena_t *arena, size_t usize, bool zero);
-void	*huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
-    bool zero);
-bool	huge_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize,
+void	*huge_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero);
+void	*huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
+    size_t alignment, bool zero);
+bool	huge_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize,
     size_t usize_min, size_t usize_max, bool zero);
 void	*huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
     size_t usize, size_t alignment, bool zero, tcache_t *tcache);
 #ifdef JEMALLOC_JET
-typedef void (huge_dalloc_junk_t)(tsd_t *, void *, size_t);
+typedef void (huge_dalloc_junk_t)(tsdn_t *, void *, size_t);
 extern huge_dalloc_junk_t *huge_dalloc_junk;
 #endif
-void	huge_dalloc(tsd_t *tsd, void *ptr);
+void	huge_dalloc(tsdn_t *tsdn, void *ptr);
 arena_t	*huge_aalloc(const void *ptr);
-size_t	huge_salloc(tsd_t *tsd, const void *ptr);
-prof_tctx_t	*huge_prof_tctx_get(tsd_t *tsd, const void *ptr);
-void	huge_prof_tctx_set(tsd_t *tsd, const void *ptr, prof_tctx_t *tctx);
-void	huge_prof_tctx_reset(tsd_t *tsd, const void *ptr);
+size_t	huge_salloc(tsdn_t *tsdn, const void *ptr);
+prof_tctx_t	*huge_prof_tctx_get(tsdn_t *tsdn, const void *ptr);
+void	huge_prof_tctx_set(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx);
+void	huge_prof_tctx_reset(tsdn_t *tsdn, const void *ptr);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 3ce36659..69d94ec5 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -473,7 +473,7 @@ void	*bootstrap_malloc(size_t size);
 void	*bootstrap_calloc(size_t num, size_t size);
 void	bootstrap_free(void *ptr);
 unsigned	narenas_total_get(void);
-arena_t	*arena_init(tsd_t *tsd, unsigned ind);
+arena_t	*arena_init(tsdn_t *tsdn, unsigned ind);
 arena_tdata_t	*arena_tdata_get_hard(tsd_t *tsd, unsigned ind);
 arena_t	*arena_choose_hard(tsd_t *tsd, bool internal);
 void	arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind);
@@ -555,10 +555,10 @@ size_t	s2u(size_t size);
 size_t	sa2u(size_t size, size_t alignment);
 arena_t	*arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal);
 arena_t	*arena_choose(tsd_t *tsd, arena_t *arena);
-arena_t	*arena_ichoose(tsd_t *tsd, arena_t *arena);
+arena_t	*arena_ichoose(tsdn_t *tsdn, arena_t *arena);
 arena_tdata_t	*arena_tdata_get(tsd_t *tsd, unsigned ind,
     bool refresh_if_missing);
-arena_t	*arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing);
+arena_t	*arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing);
 ticker_t	*decay_ticker_get(tsd_t *tsd, unsigned ind);
 #endif
 
@@ -815,13 +815,13 @@ arena_choose(tsd_t *tsd, arena_t *arena)
 }
 
 JEMALLOC_INLINE arena_t *
-arena_ichoose(tsd_t *tsd, arena_t *arena)
+arena_ichoose(tsdn_t *tsdn, arena_t *arena)
 {
 
-	assert(tsd != NULL || arena != NULL);
+	assert(!tsdn_null(tsdn) || arena != NULL);
 
-	if (tsd != NULL)
-		return (arena_choose_impl(tsd, NULL, true));
+	if (!tsdn_null(tsdn))
+		return (arena_choose_impl(tsdn_tsd(tsdn), NULL, true));
 	return (arena);
 }
 
@@ -851,7 +851,7 @@ arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing)
 }
 
 JEMALLOC_INLINE arena_t *
-arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing)
+arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing)
 {
 	arena_t *ret;
 
@@ -861,7 +861,7 @@ arena_get(tsd_t *tsd, unsigned ind, bool init_if_missing)
 	if (unlikely(ret == NULL)) {
 		ret = atomic_read_p((void *)&arenas[ind]);
 		if (init_if_missing && unlikely(ret == NULL))
-			ret = arena_init(tsd, ind);
+			ret = arena_init(tsdn, ind);
 	}
 	return (ret);
 }
@@ -895,24 +895,24 @@ decay_ticker_get(tsd_t *tsd, unsigned ind)
 
 #ifndef JEMALLOC_ENABLE_INLINE
 arena_t	*iaalloc(const void *ptr);
-size_t	isalloc(tsd_t *tsd, const void *ptr, bool demote);
-void	*iallocztm(tsd_t *tsd, size_t size, szind_t ind, bool zero,
+size_t	isalloc(tsdn_t *tsdn, const void *ptr, bool demote);
+void	*iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero,
     tcache_t *tcache, bool is_metadata, arena_t *arena, bool slow_path);
 void	*ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero,
     bool slow_path);
-void	*ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
+void	*ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, bool is_metadata, arena_t *arena);
-void	*ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
+void	*ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, arena_t *arena);
 void	*ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero);
-size_t	ivsalloc(tsd_t *tsd, const void *ptr, bool demote);
+size_t	ivsalloc(tsdn_t *tsdn, const void *ptr, bool demote);
 size_t	u2rz(size_t usize);
-size_t	p2rz(tsd_t *tsd, const void *ptr);
-void	idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata,
+size_t	p2rz(tsdn_t *tsdn, const void *ptr);
+void	idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool is_metadata,
     bool slow_path);
 void	idalloc(tsd_t *tsd, void *ptr);
 void	iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path);
-void	isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache,
+void	isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
     bool slow_path);
 void	isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache,
     bool slow_path);
@@ -923,7 +923,7 @@ void	*iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
     size_t alignment, bool zero, tcache_t *tcache, arena_t *arena);
 void	*iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
     size_t alignment, bool zero);
-bool	ixalloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
+bool	ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t extra, size_t alignment, bool zero);
 #endif
 
@@ -939,22 +939,23 @@ iaalloc(const void *ptr)
 
 /*
  * Typical usage:
+ *   tsdn_t *tsdn = [...]
  *   void *ptr = [...]
- *   size_t sz = isalloc(ptr, config_prof);
+ *   size_t sz = isalloc(tsdn, ptr, config_prof);
  */
 JEMALLOC_ALWAYS_INLINE size_t
-isalloc(tsd_t *tsd, const void *ptr, bool demote)
+isalloc(tsdn_t *tsdn, const void *ptr, bool demote)
 {
 
 	assert(ptr != NULL);
 	/* Demotion only makes sense if config_prof is true. */
 	assert(config_prof || !demote);
 
-	return (arena_salloc(tsd, ptr, demote));
+	return (arena_salloc(tsdn, ptr, demote));
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-iallocztm(tsd_t *tsd, size_t size, szind_t ind, bool zero, tcache_t *tcache,
+iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
     bool is_metadata, arena_t *arena, bool slow_path)
 {
 	void *ret;
@@ -963,10 +964,10 @@ iallocztm(tsd_t *tsd, size_t size, szind_t ind, bool zero, tcache_t *tcache,
 	assert(!is_metadata || tcache == NULL);
 	assert(!is_metadata || arena == NULL || arena->ind < narenas_auto);
 
-	ret = arena_malloc(tsd, arena, size, ind, zero, tcache, slow_path);
+	ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path);
 	if (config_stats && is_metadata && likely(ret != NULL)) {
-		arena_metadata_allocated_add(iaalloc(ret), isalloc(tsd, ret,
-		    config_prof));
+		arena_metadata_allocated_add(iaalloc(ret),
+		    isalloc(tsdn, ret, config_prof));
 	}
 	return (ret);
 }
@@ -975,12 +976,12 @@ JEMALLOC_ALWAYS_INLINE void *
 ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path)
 {
 
-	return (iallocztm(tsd, size, ind, zero, tcache_get(tsd, true), false,
-	    NULL, slow_path));
+	return (iallocztm(tsd_tsdn(tsd), size, ind, zero, tcache_get(tsd, true),
+	    false, NULL, slow_path));
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
+ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, bool is_metadata, arena_t *arena)
 {
 	void *ret;
@@ -990,33 +991,33 @@ ipallocztm(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
 	assert(!is_metadata || tcache == NULL);
 	assert(!is_metadata || arena == NULL || arena->ind < narenas_auto);
 
-	ret = arena_palloc(tsd, arena, usize, alignment, zero, tcache);
+	ret = arena_palloc(tsdn, arena, usize, alignment, zero, tcache);
 	assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret);
 	if (config_stats && is_metadata && likely(ret != NULL)) {
-		arena_metadata_allocated_add(iaalloc(ret), isalloc(tsd, ret,
+		arena_metadata_allocated_add(iaalloc(ret), isalloc(tsdn, ret,
 		    config_prof));
 	}
 	return (ret);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-ipalloct(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
+ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, arena_t *arena)
 {
 
-	return (ipallocztm(tsd, usize, alignment, zero, tcache, false, arena));
+	return (ipallocztm(tsdn, usize, alignment, zero, tcache, false, arena));
 }
 
 JEMALLOC_ALWAYS_INLINE void *
 ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero)
 {
 
-	return (ipallocztm(tsd, usize, alignment, zero, tcache_get(tsd, true),
-	    false, NULL));
+	return (ipallocztm(tsd_tsdn(tsd), usize, alignment, zero,
+	    tcache_get(tsd, true), false, NULL));
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-ivsalloc(tsd_t *tsd, const void *ptr, bool demote)
+ivsalloc(tsdn_t *tsdn, const void *ptr, bool demote)
 {
 	extent_node_t *node;
 
@@ -1028,7 +1029,7 @@ ivsalloc(tsd_t *tsd, const void *ptr, bool demote)
 	assert(extent_node_addr_get(node) == ptr ||
 	    extent_node_achunk_get(node));
 
-	return (isalloc(tsd, ptr, demote));
+	return (isalloc(tsdn, ptr, demote));
 }
 
 JEMALLOC_INLINE size_t
@@ -1046,15 +1047,15 @@ u2rz(size_t usize)
 }
 
 JEMALLOC_INLINE size_t
-p2rz(tsd_t *tsd, const void *ptr)
+p2rz(tsdn_t *tsdn, const void *ptr)
 {
-	size_t usize = isalloc(tsd, ptr, false);
+	size_t usize = isalloc(tsdn, ptr, false);
 
 	return (u2rz(usize));
 }
 
 JEMALLOC_ALWAYS_INLINE void
-idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata,
+idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool is_metadata,
     bool slow_path)
 {
 
@@ -1062,18 +1063,18 @@ idalloctm(tsd_t *tsd, void *ptr, tcache_t *tcache, bool is_metadata,
 	assert(!is_metadata || tcache == NULL);
 	assert(!is_metadata || iaalloc(ptr)->ind < narenas_auto);
 	if (config_stats && is_metadata) {
-		arena_metadata_allocated_sub(iaalloc(ptr), isalloc(tsd, ptr,
+		arena_metadata_allocated_sub(iaalloc(ptr), isalloc(tsdn, ptr,
 		    config_prof));
 	}
 
-	arena_dalloc(tsd, ptr, tcache, slow_path);
+	arena_dalloc(tsdn, ptr, tcache, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void
 idalloc(tsd_t *tsd, void *ptr)
 {
 
-	idalloctm(tsd, ptr, tcache_get(tsd, false), false, true);
+	idalloctm(tsd_tsdn(tsd), ptr, tcache_get(tsd, false), false, true);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -1083,14 +1084,15 @@ iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 	if (slow_path && config_fill && unlikely(opt_quarantine))
 		quarantine(tsd, ptr);
 	else
-		idalloctm(tsd, ptr, tcache, false, slow_path);
+		idalloctm(tsd_tsdn(tsd), ptr, tcache, false, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-isdalloct(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache, bool slow_path)
+isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
+    bool slow_path)
 {
 
-	arena_sdalloc(tsd, ptr, size, tcache, slow_path);
+	arena_sdalloc(tsdn, ptr, size, tcache, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -1100,7 +1102,7 @@ isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache, bool slow_path)
 	if (slow_path && config_fill && unlikely(opt_quarantine))
 		quarantine(tsd, ptr);
 	else
-		isdalloct(tsd, ptr, size, tcache, slow_path);
+		isdalloct(tsd_tsdn(tsd), ptr, size, tcache, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
@@ -1113,7 +1115,7 @@ iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
 	usize = sa2u(size + extra, alignment);
 	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
 		return (NULL);
-	p = ipalloct(tsd, usize, alignment, zero, tcache, arena);
+	p = ipalloct(tsd_tsdn(tsd), usize, alignment, zero, tcache, arena);
 	if (p == NULL) {
 		if (extra == 0)
 			return (NULL);
@@ -1121,7 +1123,8 @@ iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
 		usize = sa2u(size, alignment);
 		if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
 			return (NULL);
-		p = ipalloct(tsd, usize, alignment, zero, tcache, arena);
+		p = ipalloct(tsd_tsdn(tsd), usize, alignment, zero, tcache,
+		    arena);
 		if (p == NULL)
 			return (NULL);
 	}
@@ -1167,7 +1170,7 @@ iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment,
 }
 
 JEMALLOC_ALWAYS_INLINE bool
-ixalloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t extra,
+ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra,
     size_t alignment, bool zero)
 {
 
@@ -1180,7 +1183,7 @@ ixalloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t extra,
 		return (true);
 	}
 
-	return (arena_ralloc_no_move(tsd, ptr, oldsize, size, extra, zero));
+	return (arena_ralloc_no_move(tsdn, ptr, oldsize, size, extra, zero));
 }
 #endif
 
diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index 5ddae11c..00f0b91c 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -59,9 +59,9 @@ extern bool isthreaded;
 
 bool	malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
     witness_rank_t rank);
-void	malloc_mutex_prefork(tsd_t *tsd, malloc_mutex_t *mutex);
-void	malloc_mutex_postfork_parent(tsd_t *tsd, malloc_mutex_t *mutex);
-void	malloc_mutex_postfork_child(tsd_t *tsd, malloc_mutex_t *mutex);
+void	malloc_mutex_prefork(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void	malloc_mutex_postfork_parent(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void	malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex);
 bool	malloc_mutex_boot(void);
 
 #endif /* JEMALLOC_H_EXTERNS */
@@ -69,20 +69,20 @@ bool	malloc_mutex_boot(void);
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
-void	malloc_mutex_lock(tsd_t *tsd, malloc_mutex_t *mutex);
-void	malloc_mutex_unlock(tsd_t *tsd, malloc_mutex_t *mutex);
-void	malloc_mutex_assert_owner(tsd_t *tsd, malloc_mutex_t *mutex);
-void	malloc_mutex_assert_not_owner(tsd_t *tsd, malloc_mutex_t *mutex);
+void	malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void	malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void	malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void	malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_))
 JEMALLOC_INLINE void
-malloc_mutex_lock(tsd_t *tsd, malloc_mutex_t *mutex)
+malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
 
 	if (isthreaded) {
 		if (config_debug)
-			witness_assert_not_owner(tsd, &mutex->witness);
+			witness_assert_not_owner(tsdn, &mutex->witness);
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 		AcquireSRWLockExclusive(&mutex->lock);
@@ -95,17 +95,17 @@ malloc_mutex_lock(tsd_t *tsd, malloc_mutex_t *mutex)
 		pthread_mutex_lock(&mutex->lock);
 #endif
 		if (config_debug)
-			witness_lock(tsd, &mutex->witness);
+			witness_lock(tsdn, &mutex->witness);
 	}
 }
 
 JEMALLOC_INLINE void
-malloc_mutex_unlock(tsd_t *tsd, malloc_mutex_t *mutex)
+malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
 
 	if (isthreaded) {
 		if (config_debug)
-			witness_unlock(tsd, &mutex->witness);
+			witness_unlock(tsdn, &mutex->witness);
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 		ReleaseSRWLockExclusive(&mutex->lock);
@@ -121,19 +121,19 @@ malloc_mutex_unlock(tsd_t *tsd, malloc_mutex_t *mutex)
 }
 
 JEMALLOC_INLINE void
-malloc_mutex_assert_owner(tsd_t *tsd, malloc_mutex_t *mutex)
+malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
 
 	if (isthreaded && config_debug)
-		witness_assert_owner(tsd, &mutex->witness);
+		witness_assert_owner(tsdn, &mutex->witness);
 }
 
 JEMALLOC_INLINE void
-malloc_mutex_assert_not_owner(tsd_t *tsd, malloc_mutex_t *mutex)
+malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
 
 	if (isthreaded && config_debug)
-		witness_assert_not_owner(tsd, &mutex->witness);
+		witness_assert_not_owner(tsdn, &mutex->witness);
 }
 #endif
 
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 28996206..f2b6a55d 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -496,8 +496,6 @@ tcache_alloc_easy
 tcache_alloc_large
 tcache_alloc_small
 tcache_alloc_small_hard
-tcache_arena_associate
-tcache_arena_dissociate
 tcache_arena_reassociate
 tcache_bin_flush_large
 tcache_bin_flush_small
@@ -543,6 +541,7 @@ tsd_boot
 tsd_boot0
 tsd_boot1
 tsd_booted
+tsd_booted_get
 tsd_cleanup
 tsd_cleanup_wrapper
 tsd_fetch
@@ -581,12 +580,16 @@ tsd_thread_deallocated_set
 tsd_thread_deallocatedp_get
 tsd_tls
 tsd_tsd
+tsd_tsdn
 tsd_witness_fork_get
 tsd_witness_fork_set
 tsd_witness_forkp_get
 tsd_witnesses_get
 tsd_witnesses_set
 tsd_witnessesp_get
+tsdn_fetch
+tsdn_null
+tsdn_tsd
 u2rz
 valgrind_freelike_block
 valgrind_make_mem_defined
diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h
index 4fe17875..691e153d 100644
--- a/include/jemalloc/internal/prof.h
+++ b/include/jemalloc/internal/prof.h
@@ -281,7 +281,7 @@ extern uint64_t	prof_interval;
 extern size_t	lg_prof_sample;
 
 void	prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
-void	prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
+void	prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
     prof_tctx_t *tctx);
 void	prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx);
 void	bt_init(prof_bt_t *bt, void **vec);
@@ -293,33 +293,33 @@ size_t	prof_bt_count(void);
 const prof_cnt_t *prof_cnt_all(void);
 typedef int (prof_dump_open_t)(bool, const char *);
 extern prof_dump_open_t *prof_dump_open;
-typedef bool (prof_dump_header_t)(tsd_t *, bool, const prof_cnt_t *);
+typedef bool (prof_dump_header_t)(tsdn_t *, bool, const prof_cnt_t *);
 extern prof_dump_header_t *prof_dump_header;
 #endif
-void	prof_idump(tsd_t *tsd);
+void	prof_idump(tsdn_t *tsdn);
 bool	prof_mdump(tsd_t *tsd, const char *filename);
-void	prof_gdump(tsd_t *tsd);
-prof_tdata_t	*prof_tdata_init(tsd_t *tsd);
+void	prof_gdump(tsdn_t *tsdn);
+prof_tdata_t	*prof_tdata_init(tsdn_t *tsdn);
 prof_tdata_t	*prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
-void	prof_reset(tsd_t *tsd, size_t lg_sample);
+void	prof_reset(tsdn_t *tsdn, size_t lg_sample);
 void	prof_tdata_cleanup(tsd_t *tsd);
+bool	prof_active_get(tsdn_t *tsdn);
+bool	prof_active_set(tsdn_t *tsdn, bool active);
 const char	*prof_thread_name_get(tsd_t *tsd);
-bool	prof_active_get(tsd_t *tsd);
-bool	prof_active_set(tsd_t *tsd, bool active);
 int	prof_thread_name_set(tsd_t *tsd, const char *thread_name);
 bool	prof_thread_active_get(tsd_t *tsd);
 bool	prof_thread_active_set(tsd_t *tsd, bool active);
-bool	prof_thread_active_init_get(tsd_t *tsd);
-bool	prof_thread_active_init_set(tsd_t *tsd, bool active_init);
-bool	prof_gdump_get(tsd_t *tsd);
-bool	prof_gdump_set(tsd_t *tsd, bool active);
+bool	prof_thread_active_init_get(tsdn_t *tsdn);
+bool	prof_thread_active_init_set(tsdn_t *tsdn, bool active_init);
+bool	prof_gdump_get(tsdn_t *tsdn);
+bool	prof_gdump_set(tsdn_t *tsdn, bool active);
 void	prof_boot0(void);
 void	prof_boot1(void);
-bool	prof_boot2(tsd_t *tsd);
-void	prof_prefork0(tsd_t *tsd);
-void	prof_prefork1(tsd_t *tsd);
-void	prof_postfork_parent(tsd_t *tsd);
-void	prof_postfork_child(tsd_t *tsd);
+bool	prof_boot2(tsdn_t *tsdn);
+void	prof_prefork0(tsdn_t *tsdn);
+void	prof_prefork1(tsdn_t *tsdn);
+void	prof_postfork_parent(tsdn_t *tsdn);
+void	prof_postfork_child(tsdn_t *tsdn);
 void	prof_sample_threshold_update(prof_tdata_t *tdata);
 
 #endif /* JEMALLOC_H_EXTERNS */
@@ -330,16 +330,16 @@ void	prof_sample_threshold_update(prof_tdata_t *tdata);
 bool	prof_active_get_unlocked(void);
 bool	prof_gdump_get_unlocked(void);
 prof_tdata_t	*prof_tdata_get(tsd_t *tsd, bool create);
-prof_tctx_t	*prof_tctx_get(tsd_t *tsd, const void *ptr);
-void	prof_tctx_set(tsd_t *tsd, const void *ptr, size_t usize,
+prof_tctx_t	*prof_tctx_get(tsdn_t *tsdn, const void *ptr);
+void	prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
     prof_tctx_t *tctx);
-void	prof_tctx_reset(tsd_t *tsd, const void *ptr, size_t usize,
+void	prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize,
     const void *old_ptr, prof_tctx_t *tctx);
 bool	prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit,
     prof_tdata_t **tdata_out);
 prof_tctx_t	*prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active,
     bool update);
-void	prof_malloc(tsd_t *tsd, const void *ptr, size_t usize,
+void	prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize,
     prof_tctx_t *tctx);
 void	prof_realloc(tsd_t *tsd, const void *ptr, size_t usize,
     prof_tctx_t *tctx, bool prof_active, bool updated, const void *old_ptr,
@@ -384,7 +384,7 @@ prof_tdata_get(tsd_t *tsd, bool create)
 	if (create) {
 		if (unlikely(tdata == NULL)) {
 			if (tsd_nominal(tsd)) {
-				tdata = prof_tdata_init(tsd);
+				tdata = prof_tdata_init(tsd_tsdn(tsd));
 				tsd_prof_tdata_set(tsd, tdata);
 			}
 		} else if (unlikely(tdata->expired)) {
@@ -398,34 +398,34 @@ prof_tdata_get(tsd_t *tsd, bool create)
 }
 
 JEMALLOC_ALWAYS_INLINE prof_tctx_t *
-prof_tctx_get(tsd_t *tsd, const void *ptr)
+prof_tctx_get(tsdn_t *tsdn, const void *ptr)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	return (arena_prof_tctx_get(tsd, ptr));
+	return (arena_prof_tctx_get(tsdn, ptr));
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_tctx_set(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx)
+prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	arena_prof_tctx_set(tsd, ptr, usize, tctx);
+	arena_prof_tctx_set(tsdn, ptr, usize, tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_tctx_reset(tsd_t *tsd, const void *ptr, size_t usize, const void *old_ptr,
+prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize, const void *old_ptr,
     prof_tctx_t *old_tctx)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	arena_prof_tctx_reset(tsd, ptr, usize, old_ptr, old_tctx);
+	arena_prof_tctx_reset(tsdn, ptr, usize, old_ptr, old_tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -480,17 +480,17 @@ prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update)
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_malloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx)
+prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
-	assert(usize == isalloc(tsd, ptr, true));
+	assert(usize == isalloc(tsdn, ptr, true));
 
 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
-		prof_malloc_sample_object(tsd, ptr, usize, tctx);
+		prof_malloc_sample_object(tsdn, ptr, usize, tctx);
 	else
-		prof_tctx_set(tsd, ptr, usize, (prof_tctx_t *)(uintptr_t)1U);
+		prof_tctx_set(tsdn, ptr, usize, (prof_tctx_t *)(uintptr_t)1U);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -504,7 +504,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 	assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
 
 	if (prof_active && !updated && ptr != NULL) {
-		assert(usize == isalloc(tsd, ptr, true));
+		assert(usize == isalloc(tsd_tsdn(tsd), ptr, true));
 		if (prof_sample_accum_update(tsd, usize, true, NULL)) {
 			/*
 			 * Don't sample.  The usize passed to prof_alloc_prep()
@@ -521,9 +521,9 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 	old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U);
 
 	if (unlikely(sampled))
-		prof_malloc_sample_object(tsd, ptr, usize, tctx);
+		prof_malloc_sample_object(tsd_tsdn(tsd), ptr, usize, tctx);
 	else
-		prof_tctx_reset(tsd, ptr, usize, old_ptr, old_tctx);
+		prof_tctx_reset(tsd_tsdn(tsd), ptr, usize, old_ptr, old_tctx);
 
 	if (unlikely(old_sampled))
 		prof_free_sampled_object(tsd, old_usize, old_tctx);
@@ -532,10 +532,10 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 JEMALLOC_ALWAYS_INLINE void
 prof_free(tsd_t *tsd, const void *ptr, size_t usize)
 {
-	prof_tctx_t *tctx = prof_tctx_get(tsd, ptr);
+	prof_tctx_t *tctx = prof_tctx_get(tsd_tsdn(tsd), ptr);
 
 	cassert(config_prof);
-	assert(usize == isalloc(tsd, ptr, true));
+	assert(usize == isalloc(tsd_tsdn(tsd), ptr, true));
 
 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
 		prof_free_sampled_object(tsd, usize, tctx);
diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h
index 59f60235..70883b1a 100644
--- a/include/jemalloc/internal/tcache.h
+++ b/include/jemalloc/internal/tcache.h
@@ -130,27 +130,25 @@ extern size_t	tcache_maxclass;
  */
 extern tcaches_t	*tcaches;
 
-size_t	tcache_salloc(tsd_t *tsd, const void *ptr);
+size_t	tcache_salloc(tsdn_t *tsdn, const void *ptr);
 void	tcache_event_hard(tsd_t *tsd, tcache_t *tcache);
-void	*tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
+void	*tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     tcache_bin_t *tbin, szind_t binind, bool *tcache_success);
 void	tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
     szind_t binind, unsigned rem);
 void	tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
     unsigned rem, tcache_t *tcache);
-void	tcache_arena_associate(tsd_t *tsd, tcache_t *tcache, arena_t *arena);
-void	tcache_arena_reassociate(tsd_t *tsd, tcache_t *tcache,
+void	tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache,
     arena_t *oldarena, arena_t *newarena);
-void	tcache_arena_dissociate(tsd_t *tsd, tcache_t *tcache, arena_t *arena);
 tcache_t *tcache_get_hard(tsd_t *tsd);
-tcache_t *tcache_create(tsd_t *tsd, arena_t *arena);
+tcache_t *tcache_create(tsdn_t *tsdn, arena_t *arena);
 void	tcache_cleanup(tsd_t *tsd);
 void	tcache_enabled_cleanup(tsd_t *tsd);
-void	tcache_stats_merge(tsd_t *tsd, tcache_t *tcache, arena_t *arena);
-bool	tcaches_create(tsd_t *tsd, unsigned *r_ind);
+void	tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
+bool	tcaches_create(tsdn_t *tsdn, unsigned *r_ind);
 void	tcaches_flush(tsd_t *tsd, unsigned ind);
 void	tcaches_destroy(tsd_t *tsd, unsigned ind);
-bool	tcache_boot(tsd_t *tsd);
+bool	tcache_boot(tsdn_t *tsdn);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
@@ -297,8 +295,8 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 		if (unlikely(arena == NULL))
 			return (NULL);
 
-		ret = tcache_alloc_small_hard(tsd, arena, tcache, tbin, binind,
-			&tcache_hard_success);
+		ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache,
+		    tbin, binind, &tcache_hard_success);
 		if (tcache_hard_success == false)
 			return (NULL);
 	}
@@ -310,7 +308,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 	 */
 	if (config_prof || (slow_path && config_fill) || unlikely(zero)) {
 		usize = index2size(binind);
-		assert(tcache_salloc(tsd, ret) == usize);
+		assert(tcache_salloc(tsd_tsdn(tsd), ret) == usize);
 	}
 
 	if (likely(!zero)) {
@@ -358,7 +356,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 		if (unlikely(arena == NULL))
 			return (NULL);
 
-		ret = arena_malloc_large(tsd, arena, binind, zero);
+		ret = arena_malloc_large(tsd_tsdn(tsd), arena, binind, zero);
 		if (ret == NULL)
 			return (NULL);
 	} else {
@@ -407,7 +405,7 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	tcache_bin_t *tbin;
 	tcache_bin_info_t *tbin_info;
 
-	assert(tcache_salloc(tsd, ptr) <= SMALL_MAXCLASS);
+	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SMALL_MAXCLASS);
 
 	if (slow_path && config_fill && unlikely(opt_junk_free))
 		arena_dalloc_junk_small(ptr, &arena_bin_info[binind]);
@@ -434,8 +432,8 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size,
 	tcache_bin_info_t *tbin_info;
 
 	assert((size & PAGE_MASK) == 0);
-	assert(tcache_salloc(tsd, ptr) > SMALL_MAXCLASS);
-	assert(tcache_salloc(tsd, ptr) <= tcache_maxclass);
+	assert(tcache_salloc(tsd_tsdn(tsd), ptr) > SMALL_MAXCLASS);
+	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_maxclass);
 
 	binind = size2index(size);
 
@@ -460,7 +458,8 @@ tcaches_get(tsd_t *tsd, unsigned ind)
 {
 	tcaches_t *elm = &tcaches[ind];
 	if (unlikely(elm->tcache == NULL)) {
-		elm->tcache = tcache_create(tsd, arena_choose(tsd, NULL));
+		elm->tcache = tcache_create(tsd_tsdn(tsd), arena_choose(tsd,
+		    NULL));
 	}
 	return (elm->tcache);
 }
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 4a99ee6e..bf113411 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -13,6 +13,9 @@ typedef struct tsd_init_head_s tsd_init_head_t;
 #endif
 
 typedef struct tsd_s tsd_t;
+typedef struct tsdn_s tsdn_t;
+
+#define	TSDN_NULL	((tsdn_t *)0)
 
 typedef enum {
 	tsd_state_uninitialized,
@@ -44,6 +47,7 @@ typedef enum {
  * The result is a set of generated functions, e.g.:
  *
  *   bool example_tsd_boot(void) {...}
+ *   bool example_tsd_booted_get(void) {...}
  *   example_t *example_tsd_get() {...}
  *   void example_tsd_set(example_t *val) {...}
  *
@@ -98,6 +102,8 @@ a_attr void								\
 a_name##tsd_boot1(void);						\
 a_attr bool								\
 a_name##tsd_boot(void);							\
+a_attr bool								\
+a_name##tsd_booted_get(void);						\
 a_attr a_type *								\
 a_name##tsd_get(void);							\
 a_attr void								\
@@ -201,6 +207,12 @@ a_name##tsd_boot(void)							\
 									\
 	return (a_name##tsd_boot0());					\
 }									\
+a_attr bool								\
+a_name##tsd_booted_get(void)						\
+{									\
+									\
+	return (a_name##tsd_booted);					\
+}									\
 /* Get/set. */								\
 a_attr a_type *								\
 a_name##tsd_get(void)							\
@@ -246,6 +258,12 @@ a_name##tsd_boot(void)							\
 									\
 	return (a_name##tsd_boot0());					\
 }									\
+a_attr bool								\
+a_name##tsd_booted_get(void)						\
+{									\
+									\
+	return (a_name##tsd_booted);					\
+}									\
 /* Get/set. */								\
 a_attr a_type *								\
 a_name##tsd_get(void)							\
@@ -368,6 +386,12 @@ a_name##tsd_boot(void)							\
 	a_name##tsd_boot1();						\
 	return (false);							\
 }									\
+a_attr bool								\
+a_name##tsd_booted_get(void)						\
+{									\
+									\
+	return (a_name##tsd_booted);					\
+}									\
 /* Get/set. */								\
 a_attr a_type *								\
 a_name##tsd_get(void)							\
@@ -490,6 +514,12 @@ a_name##tsd_boot(void)							\
 	a_name##tsd_boot1();						\
 	return (false);							\
 }									\
+a_attr bool								\
+a_name##tsd_booted_get(void)						\
+{									\
+									\
+	return (a_name##tsd_booted);					\
+}									\
 /* Get/set. */								\
 a_attr a_type *								\
 a_name##tsd_get(void)							\
@@ -571,6 +601,15 @@ MALLOC_TSD
 #undef O
 };
 
+/*
+ * Wrapper around tsd_t that makes it possible to avoid implicit conversion
+ * between tsd_t and tsdn_t, where tsdn_t is "nullable" and has to be
+ * explicitly converted to tsd_t, which is non-nullable.
+ */
+struct tsdn_s {
+	tsd_t	tsd;
+};
+
 static const tsd_t tsd_initializer = TSD_INITIALIZER;
 
 malloc_tsd_types(, tsd_t)
@@ -601,6 +640,7 @@ void	tsd_cleanup(void *arg);
 malloc_tsd_protos(JEMALLOC_ATTR(unused), , tsd_t)
 
 tsd_t	*tsd_fetch(void);
+tsdn_t	*tsd_tsdn(tsd_t *tsd);
 bool	tsd_nominal(tsd_t *tsd);
 #define	O(n, t)								\
 t	*tsd_##n##p_get(tsd_t *tsd);					\
@@ -608,6 +648,9 @@ t	tsd_##n##_get(tsd_t *tsd);					\
 void	tsd_##n##_set(tsd_t *tsd, t n);
 MALLOC_TSD
 #undef O
+tsdn_t	*tsdn_fetch(void);
+bool	tsdn_null(const tsdn_t *tsdn);
+tsd_t	*tsdn_tsd(tsdn_t *tsdn);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TSD_C_))
@@ -634,6 +677,13 @@ tsd_fetch(void)
 	return (tsd);
 }
 
+JEMALLOC_ALWAYS_INLINE tsdn_t *
+tsd_tsdn(tsd_t *tsd)
+{
+
+	return ((tsdn_t *)tsd);
+}
+
 JEMALLOC_INLINE bool
 tsd_nominal(tsd_t *tsd)
 {
@@ -665,6 +715,32 @@ tsd_##n##_set(tsd_t *tsd, t n)						\
 }
 MALLOC_TSD
 #undef O
+
+JEMALLOC_ALWAYS_INLINE tsdn_t *
+tsdn_fetch(void)
+{
+
+	if (!tsd_booted_get())
+		return (NULL);
+
+	return (tsd_tsdn(tsd_fetch()));
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsdn_null(const tsdn_t *tsdn)
+{
+
+	return (tsdn == NULL);
+}
+
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsdn_tsd(tsdn_t *tsdn)
+{
+
+	assert(!tsdn_null(tsdn));
+
+	return (&tsdn->tsd);
+}
 #endif
 
 #endif /* JEMALLOC_H_INLINES */
diff --git a/include/jemalloc/internal/valgrind.h b/include/jemalloc/internal/valgrind.h
index 2667bf5e..1a868082 100644
--- a/include/jemalloc/internal/valgrind.h
+++ b/include/jemalloc/internal/valgrind.h
@@ -30,17 +30,17 @@
  * calls must be embedded in macros rather than in functions so that when
  * Valgrind reports errors, there are no extra stack frames in the backtraces.
  */
-#define	JEMALLOC_VALGRIND_MALLOC(cond, tsd, ptr, usize, zero) do {	\
+#define	JEMALLOC_VALGRIND_MALLOC(cond, tsdn, ptr, usize, zero) do {	\
 	if (unlikely(in_valgrind && cond)) {				\
-		VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(tsd, ptr),	\
+		VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(tsdn, ptr),	\
 		    zero);						\
 	}								\
 } while (0)
-#define	JEMALLOC_VALGRIND_REALLOC(maybe_moved, tsd, ptr, usize,		\
+#define	JEMALLOC_VALGRIND_REALLOC(maybe_moved, tsdn, ptr, usize,	\
     ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null,	\
     zero) do {								\
 	if (unlikely(in_valgrind)) {					\
-		size_t rzsize = p2rz(tsd, ptr);				\
+		size_t rzsize = p2rz(tsdn, ptr);			\
 									\
 		if (!maybe_moved || ptr == old_ptr) {			\
 			VALGRIND_RESIZEINPLACE_BLOCK(ptr, old_usize,	\
@@ -83,8 +83,8 @@
 #define	JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(ptr, usize) do {} while (0)
 #define	JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ptr, usize) do {} while (0)
 #define	JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ptr, usize) do {} while (0)
-#define	JEMALLOC_VALGRIND_MALLOC(cond, tsd, ptr, usize, zero) do {} while (0)
-#define	JEMALLOC_VALGRIND_REALLOC(maybe_moved, tsd, ptr, usize,		\
+#define	JEMALLOC_VALGRIND_MALLOC(cond, tsdn, ptr, usize, zero) do {} while (0)
+#define	JEMALLOC_VALGRIND_REALLOC(maybe_moved, tsdn, ptr, usize,	\
     ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null,	\
     zero) do {} while (0)
 #define	JEMALLOC_VALGRIND_FREE(ptr, rzsize) do {} while (0)
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index b2e6e825..4d312eab 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -75,23 +75,23 @@ void	witness_init(witness_t *witness, const char *name, witness_rank_t rank,
 typedef void (witness_lock_error_t)(const witness_list_t *, const witness_t *);
 extern witness_lock_error_t *witness_lock_error;
 #endif
-void	witness_lock(tsd_t *tsd, witness_t *witness);
-void	witness_unlock(tsd_t *tsd, witness_t *witness);
+void	witness_lock(tsdn_t *tsdn, witness_t *witness);
+void	witness_unlock(tsdn_t *tsdn, witness_t *witness);
 #ifdef JEMALLOC_JET
 typedef void (witness_owner_error_t)(const witness_t *);
 extern witness_owner_error_t *witness_owner_error;
 #endif
-void	witness_assert_owner(tsd_t *tsd, const witness_t *witness);
+void	witness_assert_owner(tsdn_t *tsdn, const witness_t *witness);
 #ifdef JEMALLOC_JET
 typedef void (witness_not_owner_error_t)(const witness_t *);
 extern witness_not_owner_error_t *witness_not_owner_error;
 #endif
-void	witness_assert_not_owner(tsd_t *tsd, const witness_t *witness);
+void	witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness);
 #ifdef JEMALLOC_JET
 typedef void (witness_lockless_error_t)(const witness_list_t *);
 extern witness_lockless_error_t *witness_lockless_error;
 #endif
-void	witness_assert_lockless(tsd_t *tsd);
+void	witness_assert_lockless(tsdn_t *tsdn);
 
 void	witnesses_cleanup(tsd_t *tsd);
 void	witness_fork_cleanup(tsd_t *tsd);
diff --git a/src/arena.c b/src/arena.c
index 992d96f5..c605bcd3 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -37,11 +37,11 @@ static szind_t	runs_avail_nclasses; /* Number of runs_avail trees. */
  * definition.
  */
 
-static void	arena_purge_to_limit(tsd_t *tsd, arena_t *arena,
+static void	arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena,
     size_t ndirty_limit);
-static void	arena_run_dalloc(tsd_t *tsd, arena_t *arena, arena_run_t *run,
+static void	arena_run_dalloc(tsdn_t *tsdn, arena_t *arena, arena_run_t *run,
     bool dirty, bool cleaned, bool decommitted);
-static void	arena_dalloc_bin_run(tsd_t *tsd, arena_t *arena,
+static void	arena_dalloc_bin_run(tsdn_t *tsdn, arena_t *arena,
     arena_chunk_t *chunk, arena_run_t *run, arena_bin_t *bin);
 static void	arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk,
     arena_run_t *run, arena_bin_t *bin);
@@ -592,7 +592,7 @@ arena_chunk_init_spare(arena_t *arena)
 }
 
 static bool
-arena_chunk_register(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+arena_chunk_register(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
     bool zero)
 {
 
@@ -604,61 +604,62 @@ arena_chunk_register(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
 	 */
 	extent_node_init(&chunk->node, arena, chunk, chunksize, zero, true);
 	extent_node_achunk_set(&chunk->node, true);
-	return (chunk_register(tsd, chunk, &chunk->node));
+	return (chunk_register(tsdn, chunk, &chunk->node));
 }
 
 static arena_chunk_t *
-arena_chunk_alloc_internal_hard(tsd_t *tsd, arena_t *arena,
+arena_chunk_alloc_internal_hard(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, bool *zero, bool *commit)
 {
 	arena_chunk_t *chunk;
 
-	malloc_mutex_unlock(tsd, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 
-	chunk = (arena_chunk_t *)chunk_alloc_wrapper(tsd, arena, chunk_hooks,
+	chunk = (arena_chunk_t *)chunk_alloc_wrapper(tsdn, arena, chunk_hooks,
 	    NULL, chunksize, chunksize, zero, commit);
 	if (chunk != NULL && !*commit) {
 		/* Commit header. */
 		if (chunk_hooks->commit(chunk, chunksize, 0, map_bias <<
 		    LG_PAGE, arena->ind)) {
-			chunk_dalloc_wrapper(tsd, arena, chunk_hooks,
+			chunk_dalloc_wrapper(tsdn, arena, chunk_hooks,
 			    (void *)chunk, chunksize, *zero, *commit);
 			chunk = NULL;
 		}
 	}
-	if (chunk != NULL && arena_chunk_register(tsd, arena, chunk, *zero)) {
+	if (chunk != NULL && arena_chunk_register(tsdn, arena, chunk, *zero)) {
 		if (!*commit) {
 			/* Undo commit of header. */
 			chunk_hooks->decommit(chunk, chunksize, 0, map_bias <<
 			    LG_PAGE, arena->ind);
 		}
-		chunk_dalloc_wrapper(tsd, arena, chunk_hooks, (void *)chunk,
+		chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, (void *)chunk,
 		    chunksize, *zero, *commit);
 		chunk = NULL;
 	}
 
-	malloc_mutex_lock(tsd, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	return (chunk);
 }
 
 static arena_chunk_t *
-arena_chunk_alloc_internal(tsd_t *tsd, arena_t *arena, bool *zero, bool *commit)
+arena_chunk_alloc_internal(tsdn_t *tsdn, arena_t *arena, bool *zero,
+    bool *commit)
 {
 	arena_chunk_t *chunk;
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 
-	chunk = chunk_alloc_cache(tsd, arena, &chunk_hooks, NULL, chunksize,
+	chunk = chunk_alloc_cache(tsdn, arena, &chunk_hooks, NULL, chunksize,
 	    chunksize, zero, true);
 	if (chunk != NULL) {
-		if (arena_chunk_register(tsd, arena, chunk, *zero)) {
-			chunk_dalloc_cache(tsd, arena, &chunk_hooks, chunk,
+		if (arena_chunk_register(tsdn, arena, chunk, *zero)) {
+			chunk_dalloc_cache(tsdn, arena, &chunk_hooks, chunk,
 			    chunksize, true);
 			return (NULL);
 		}
 		*commit = true;
 	}
 	if (chunk == NULL) {
-		chunk = arena_chunk_alloc_internal_hard(tsd, arena,
+		chunk = arena_chunk_alloc_internal_hard(tsdn, arena,
 		    &chunk_hooks, zero, commit);
 	}
 
@@ -671,7 +672,7 @@ arena_chunk_alloc_internal(tsd_t *tsd, arena_t *arena, bool *zero, bool *commit)
 }
 
 static arena_chunk_t *
-arena_chunk_init_hard(tsd_t *tsd, arena_t *arena)
+arena_chunk_init_hard(tsdn_t *tsdn, arena_t *arena)
 {
 	arena_chunk_t *chunk;
 	bool zero, commit;
@@ -681,7 +682,7 @@ arena_chunk_init_hard(tsd_t *tsd, arena_t *arena)
 
 	zero = false;
 	commit = false;
-	chunk = arena_chunk_alloc_internal(tsd, arena, &zero, &commit);
+	chunk = arena_chunk_alloc_internal(tsdn, arena, &zero, &commit);
 	if (chunk == NULL)
 		return (NULL);
 
@@ -726,14 +727,14 @@ arena_chunk_init_hard(tsd_t *tsd, arena_t *arena)
 }
 
 static arena_chunk_t *
-arena_chunk_alloc(tsd_t *tsd, arena_t *arena)
+arena_chunk_alloc(tsdn_t *tsdn, arena_t *arena)
 {
 	arena_chunk_t *chunk;
 
 	if (arena->spare != NULL)
 		chunk = arena_chunk_init_spare(arena);
 	else {
-		chunk = arena_chunk_init_hard(tsd, arena);
+		chunk = arena_chunk_init_hard(tsdn, arena);
 		if (chunk == NULL)
 			return (NULL);
 	}
@@ -746,7 +747,7 @@ arena_chunk_alloc(tsd_t *tsd, arena_t *arena)
 }
 
 static void
-arena_chunk_discard(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk)
+arena_chunk_discard(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk)
 {
 	bool committed;
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
@@ -761,12 +762,12 @@ arena_chunk_discard(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk)
 		 * chunk as committed has a high potential for causing later
 		 * access of decommitted memory.
 		 */
-		chunk_hooks = chunk_hooks_get(tsd, arena);
+		chunk_hooks = chunk_hooks_get(tsdn, arena);
 		chunk_hooks.decommit(chunk, chunksize, 0, map_bias << LG_PAGE,
 		    arena->ind);
 	}
 
-	chunk_dalloc_cache(tsd, arena, &chunk_hooks, (void *)chunk, chunksize,
+	chunk_dalloc_cache(tsdn, arena, &chunk_hooks, (void *)chunk, chunksize,
 	    committed);
 
 	if (config_stats) {
@@ -776,7 +777,7 @@ arena_chunk_discard(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk)
 }
 
 static void
-arena_spare_discard(tsd_t *tsd, arena_t *arena, arena_chunk_t *spare)
+arena_spare_discard(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *spare)
 {
 
 	assert(arena->spare != spare);
@@ -786,11 +787,11 @@ arena_spare_discard(tsd_t *tsd, arena_t *arena, arena_chunk_t *spare)
 		    chunk_npages-map_bias);
 	}
 
-	arena_chunk_discard(tsd, arena, spare);
+	arena_chunk_discard(tsdn, arena, spare);
 }
 
 static void
-arena_chunk_dalloc(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk)
+arena_chunk_dalloc(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk)
 {
 	arena_chunk_t *spare;
 
@@ -812,7 +813,7 @@ arena_chunk_dalloc(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk)
 	spare = arena->spare;
 	arena->spare = chunk;
 	if (spare != NULL)
-		arena_spare_discard(tsd, arena, spare);
+		arena_spare_discard(tsdn, arena, spare);
 }
 
 static void
@@ -896,64 +897,64 @@ arena_huge_ralloc_stats_update_undo(arena_t *arena, size_t oldsize,
 }
 
 extent_node_t *
-arena_node_alloc(tsd_t *tsd, arena_t *arena)
+arena_node_alloc(tsdn_t *tsdn, arena_t *arena)
 {
 	extent_node_t *node;
 
-	malloc_mutex_lock(tsd, &arena->node_cache_mtx);
+	malloc_mutex_lock(tsdn, &arena->node_cache_mtx);
 	node = ql_last(&arena->node_cache, ql_link);
 	if (node == NULL) {
-		malloc_mutex_unlock(tsd, &arena->node_cache_mtx);
-		return (base_alloc(tsd, sizeof(extent_node_t)));
+		malloc_mutex_unlock(tsdn, &arena->node_cache_mtx);
+		return (base_alloc(tsdn, sizeof(extent_node_t)));
 	}
 	ql_tail_remove(&arena->node_cache, extent_node_t, ql_link);
-	malloc_mutex_unlock(tsd, &arena->node_cache_mtx);
+	malloc_mutex_unlock(tsdn, &arena->node_cache_mtx);
 	return (node);
 }
 
 void
-arena_node_dalloc(tsd_t *tsd, arena_t *arena, extent_node_t *node)
+arena_node_dalloc(tsdn_t *tsdn, arena_t *arena, extent_node_t *node)
 {
 
-	malloc_mutex_lock(tsd, &arena->node_cache_mtx);
+	malloc_mutex_lock(tsdn, &arena->node_cache_mtx);
 	ql_elm_new(node, ql_link);
 	ql_tail_insert(&arena->node_cache, node, ql_link);
-	malloc_mutex_unlock(tsd, &arena->node_cache_mtx);
+	malloc_mutex_unlock(tsdn, &arena->node_cache_mtx);
 }
 
 static void *
-arena_chunk_alloc_huge_hard(tsd_t *tsd, arena_t *arena,
+arena_chunk_alloc_huge_hard(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, size_t usize, size_t alignment, bool *zero,
     size_t csize)
 {
 	void *ret;
 	bool commit = true;
 
-	ret = chunk_alloc_wrapper(tsd, arena, chunk_hooks, NULL, csize,
+	ret = chunk_alloc_wrapper(tsdn, arena, chunk_hooks, NULL, csize,
 	    alignment, zero, &commit);
 	if (ret == NULL) {
 		/* Revert optimistic stats updates. */
-		malloc_mutex_lock(tsd, &arena->lock);
+		malloc_mutex_lock(tsdn, &arena->lock);
 		if (config_stats) {
 			arena_huge_malloc_stats_update_undo(arena, usize);
 			arena->stats.mapped -= usize;
 		}
 		arena_nactive_sub(arena, usize >> LG_PAGE);
-		malloc_mutex_unlock(tsd, &arena->lock);
+		malloc_mutex_unlock(tsdn, &arena->lock);
 	}
 
 	return (ret);
 }
 
 void *
-arena_chunk_alloc_huge(tsd_t *tsd, arena_t *arena, size_t usize,
+arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool *zero)
 {
 	void *ret;
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 	size_t csize = CHUNK_CEILING(usize);
 
-	malloc_mutex_lock(tsd, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 
 	/* Optimistically update stats. */
 	if (config_stats) {
@@ -962,11 +963,11 @@ arena_chunk_alloc_huge(tsd_t *tsd, arena_t *arena, size_t usize,
 	}
 	arena_nactive_add(arena, usize >> LG_PAGE);
 
-	ret = chunk_alloc_cache(tsd, arena, &chunk_hooks, NULL, csize,
+	ret = chunk_alloc_cache(tsdn, arena, &chunk_hooks, NULL, csize,
 	    alignment, zero, true);
-	malloc_mutex_unlock(tsd, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 	if (ret == NULL) {
-		ret = arena_chunk_alloc_huge_hard(tsd, arena, &chunk_hooks,
+		ret = arena_chunk_alloc_huge_hard(tsdn, arena, &chunk_hooks,
 		    usize, alignment, zero, csize);
 	}
 
@@ -974,49 +975,49 @@ arena_chunk_alloc_huge(tsd_t *tsd, arena_t *arena, size_t usize,
 }
 
 void
-arena_chunk_dalloc_huge(tsd_t *tsd, arena_t *arena, void *chunk, size_t usize)
+arena_chunk_dalloc_huge(tsdn_t *tsdn, arena_t *arena, void *chunk, size_t usize)
 {
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 	size_t csize;
 
 	csize = CHUNK_CEILING(usize);
-	malloc_mutex_lock(tsd, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	if (config_stats) {
 		arena_huge_dalloc_stats_update(arena, usize);
 		arena->stats.mapped -= usize;
 	}
 	arena_nactive_sub(arena, usize >> LG_PAGE);
 
-	chunk_dalloc_cache(tsd, arena, &chunk_hooks, chunk, csize, true);
-	malloc_mutex_unlock(tsd, &arena->lock);
+	chunk_dalloc_cache(tsdn, arena, &chunk_hooks, chunk, csize, true);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
 void
-arena_chunk_ralloc_huge_similar(tsd_t *tsd, arena_t *arena, void *chunk,
+arena_chunk_ralloc_huge_similar(tsdn_t *tsdn, arena_t *arena, void *chunk,
     size_t oldsize, size_t usize)
 {
 
 	assert(CHUNK_CEILING(oldsize) == CHUNK_CEILING(usize));
 	assert(oldsize != usize);
 
-	malloc_mutex_lock(tsd, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	if (config_stats)
 		arena_huge_ralloc_stats_update(arena, oldsize, usize);
 	if (oldsize < usize)
 		arena_nactive_add(arena, (usize - oldsize) >> LG_PAGE);
 	else
 		arena_nactive_sub(arena, (oldsize - usize) >> LG_PAGE);
-	malloc_mutex_unlock(tsd, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
 void
-arena_chunk_ralloc_huge_shrink(tsd_t *tsd, arena_t *arena, void *chunk,
+arena_chunk_ralloc_huge_shrink(tsdn_t *tsdn, arena_t *arena, void *chunk,
     size_t oldsize, size_t usize)
 {
 	size_t udiff = oldsize - usize;
 	size_t cdiff = CHUNK_CEILING(oldsize) - CHUNK_CEILING(usize);
 
-	malloc_mutex_lock(tsd, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	if (config_stats) {
 		arena_huge_ralloc_stats_update(arena, oldsize, usize);
 		if (cdiff != 0)
@@ -1029,35 +1030,35 @@ arena_chunk_ralloc_huge_shrink(tsd_t *tsd, arena_t *arena, void *chunk,
 		void *nchunk = (void *)((uintptr_t)chunk +
 		    CHUNK_CEILING(usize));
 
-		chunk_dalloc_cache(tsd, arena, &chunk_hooks, nchunk, cdiff,
+		chunk_dalloc_cache(tsdn, arena, &chunk_hooks, nchunk, cdiff,
 		    true);
 	}
-	malloc_mutex_unlock(tsd, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
 static bool
-arena_chunk_ralloc_huge_expand_hard(tsd_t *tsd, arena_t *arena,
+arena_chunk_ralloc_huge_expand_hard(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *chunk, size_t oldsize, size_t usize,
     bool *zero, void *nchunk, size_t udiff, size_t cdiff)
 {
 	bool err;
 	bool commit = true;
 
-	err = (chunk_alloc_wrapper(tsd, arena, chunk_hooks, nchunk, cdiff,
+	err = (chunk_alloc_wrapper(tsdn, arena, chunk_hooks, nchunk, cdiff,
 	    chunksize, zero, &commit) == NULL);
 	if (err) {
 		/* Revert optimistic stats updates. */
-		malloc_mutex_lock(tsd, &arena->lock);
+		malloc_mutex_lock(tsdn, &arena->lock);
 		if (config_stats) {
 			arena_huge_ralloc_stats_update_undo(arena, oldsize,
 			    usize);
 			arena->stats.mapped -= cdiff;
 		}
 		arena_nactive_sub(arena, udiff >> LG_PAGE);
-		malloc_mutex_unlock(tsd, &arena->lock);
+		malloc_mutex_unlock(tsdn, &arena->lock);
 	} else if (chunk_hooks->merge(chunk, CHUNK_CEILING(oldsize), nchunk,
 	    cdiff, true, arena->ind)) {
-		chunk_dalloc_wrapper(tsd, arena, chunk_hooks, nchunk, cdiff,
+		chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, nchunk, cdiff,
 		    *zero, true);
 		err = true;
 	}
@@ -1065,16 +1066,16 @@ arena_chunk_ralloc_huge_expand_hard(tsd_t *tsd, arena_t *arena,
 }
 
 bool
-arena_chunk_ralloc_huge_expand(tsd_t *tsd, arena_t *arena, void *chunk,
+arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena, void *chunk,
     size_t oldsize, size_t usize, bool *zero)
 {
 	bool err;
-	chunk_hooks_t chunk_hooks = chunk_hooks_get(tsd, arena);
+	chunk_hooks_t chunk_hooks = chunk_hooks_get(tsdn, arena);
 	void *nchunk = (void *)((uintptr_t)chunk + CHUNK_CEILING(oldsize));
 	size_t udiff = usize - oldsize;
 	size_t cdiff = CHUNK_CEILING(usize) - CHUNK_CEILING(oldsize);
 
-	malloc_mutex_lock(tsd, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 
 	/* Optimistically update stats. */
 	if (config_stats) {
@@ -1083,16 +1084,16 @@ arena_chunk_ralloc_huge_expand(tsd_t *tsd, arena_t *arena, void *chunk,
 	}
 	arena_nactive_add(arena, udiff >> LG_PAGE);
 
-	err = (chunk_alloc_cache(tsd, arena, &chunk_hooks, nchunk, cdiff,
+	err = (chunk_alloc_cache(tsdn, arena, &chunk_hooks, nchunk, cdiff,
 	    chunksize, zero, true) == NULL);
-	malloc_mutex_unlock(tsd, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 	if (err) {
-		err = arena_chunk_ralloc_huge_expand_hard(tsd, arena,
+		err = arena_chunk_ralloc_huge_expand_hard(tsdn, arena,
 		    &chunk_hooks, chunk, oldsize, usize, zero, nchunk, udiff,
 		    cdiff);
 	} else if (chunk_hooks.merge(chunk, CHUNK_CEILING(oldsize), nchunk,
 	    cdiff, true, arena->ind)) {
-		chunk_dalloc_wrapper(tsd, arena, &chunk_hooks, nchunk, cdiff,
+		chunk_dalloc_wrapper(tsdn, arena, &chunk_hooks, nchunk, cdiff,
 		    *zero, true);
 		err = true;
 	}
@@ -1133,7 +1134,7 @@ arena_run_alloc_large_helper(arena_t *arena, size_t size, bool zero)
 }
 
 static arena_run_t *
-arena_run_alloc_large(tsd_t *tsd, arena_t *arena, size_t size, bool zero)
+arena_run_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t size, bool zero)
 {
 	arena_chunk_t *chunk;
 	arena_run_t *run;
@@ -1149,7 +1150,7 @@ arena_run_alloc_large(tsd_t *tsd, arena_t *arena, size_t size, bool zero)
 	/*
 	 * No usable runs.  Create a new chunk from which to allocate the run.
 	 */
-	chunk = arena_chunk_alloc(tsd, arena);
+	chunk = arena_chunk_alloc(tsdn, arena);
 	if (chunk != NULL) {
 		run = &arena_miscelm_get_mutable(chunk, map_bias)->run;
 		if (arena_run_split_large(arena, run, size, zero))
@@ -1177,7 +1178,7 @@ arena_run_alloc_small_helper(arena_t *arena, size_t size, szind_t binind)
 }
 
 static arena_run_t *
-arena_run_alloc_small(tsd_t *tsd, arena_t *arena, size_t size, szind_t binind)
+arena_run_alloc_small(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t binind)
 {
 	arena_chunk_t *chunk;
 	arena_run_t *run;
@@ -1194,7 +1195,7 @@ arena_run_alloc_small(tsd_t *tsd, arena_t *arena, size_t size, szind_t binind)
 	/*
 	 * No usable runs.  Create a new chunk from which to allocate the run.
 	 */
-	chunk = arena_chunk_alloc(tsd, arena);
+	chunk = arena_chunk_alloc(tsdn, arena);
 	if (chunk != NULL) {
 		run = &arena_miscelm_get_mutable(chunk, map_bias)->run;
 		if (arena_run_split_small(arena, run, size, binind))
@@ -1219,28 +1220,28 @@ arena_lg_dirty_mult_valid(ssize_t lg_dirty_mult)
 }
 
 ssize_t
-arena_lg_dirty_mult_get(tsd_t *tsd, arena_t *arena)
+arena_lg_dirty_mult_get(tsdn_t *tsdn, arena_t *arena)
 {
 	ssize_t lg_dirty_mult;
 
-	malloc_mutex_lock(tsd, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	lg_dirty_mult = arena->lg_dirty_mult;
-	malloc_mutex_unlock(tsd, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 
 	return (lg_dirty_mult);
 }
 
 bool
-arena_lg_dirty_mult_set(tsd_t *tsd, arena_t *arena, ssize_t lg_dirty_mult)
+arena_lg_dirty_mult_set(tsdn_t *tsdn, arena_t *arena, ssize_t lg_dirty_mult)
 {
 
 	if (!arena_lg_dirty_mult_valid(lg_dirty_mult))
 		return (true);
 
-	malloc_mutex_lock(tsd, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	arena->lg_dirty_mult = lg_dirty_mult;
-	arena_maybe_purge(tsd, arena);
-	malloc_mutex_unlock(tsd, &arena->lock);
+	arena_maybe_purge(tsdn, arena);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 
 	return (false);
 }
@@ -1397,25 +1398,25 @@ arena_decay_time_valid(ssize_t decay_time)
 }
 
 ssize_t
-arena_decay_time_get(tsd_t *tsd, arena_t *arena)
+arena_decay_time_get(tsdn_t *tsdn, arena_t *arena)
 {
 	ssize_t decay_time;
 
-	malloc_mutex_lock(tsd, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	decay_time = arena->decay_time;
-	malloc_mutex_unlock(tsd, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 
 	return (decay_time);
 }
 
 bool
-arena_decay_time_set(tsd_t *tsd, arena_t *arena, ssize_t decay_time)
+arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time)
 {
 
 	if (!arena_decay_time_valid(decay_time))
 		return (true);
 
-	malloc_mutex_lock(tsd, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	/*
 	 * Restart decay backlog from scratch, which may cause many dirty pages
 	 * to be immediately purged.  It would conceptually be possible to map
@@ -1425,14 +1426,14 @@ arena_decay_time_set(tsd_t *tsd, arena_t *arena, ssize_t decay_time)
 	 * arbitrary change during initial arena configuration.
 	 */
 	arena_decay_init(arena, decay_time);
-	arena_maybe_purge(tsd, arena);
-	malloc_mutex_unlock(tsd, &arena->lock);
+	arena_maybe_purge(tsdn, arena);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 
 	return (false);
 }
 
 static void
-arena_maybe_purge_ratio(tsd_t *tsd, arena_t *arena)
+arena_maybe_purge_ratio(tsdn_t *tsdn, arena_t *arena)
 {
 
 	assert(opt_purge == purge_mode_ratio);
@@ -1455,12 +1456,12 @@ arena_maybe_purge_ratio(tsd_t *tsd, arena_t *arena)
 		 */
 		if (arena->ndirty <= threshold)
 			return;
-		arena_purge_to_limit(tsd, arena, threshold);
+		arena_purge_to_limit(tsdn, arena, threshold);
 	}
 }
 
 static void
-arena_maybe_purge_decay(tsd_t *tsd, arena_t *arena)
+arena_maybe_purge_decay(tsdn_t *tsdn, arena_t *arena)
 {
 	nstime_t time;
 	size_t ndirty_limit;
@@ -1470,7 +1471,7 @@ arena_maybe_purge_decay(tsd_t *tsd, arena_t *arena)
 	/* Purge all or nothing if the option is disabled. */
 	if (arena->decay_time <= 0) {
 		if (arena->decay_time == 0)
-			arena_purge_to_limit(tsd, arena, 0);
+			arena_purge_to_limit(tsdn, arena, 0);
 		return;
 	}
 
@@ -1491,11 +1492,11 @@ arena_maybe_purge_decay(tsd_t *tsd, arena_t *arena)
 	 */
 	if (arena->ndirty <= ndirty_limit)
 		return;
-	arena_purge_to_limit(tsd, arena, ndirty_limit);
+	arena_purge_to_limit(tsdn, arena, ndirty_limit);
 }
 
 void
-arena_maybe_purge(tsd_t *tsd, arena_t *arena)
+arena_maybe_purge(tsdn_t *tsdn, arena_t *arena)
 {
 
 	/* Don't recursively purge. */
@@ -1503,9 +1504,9 @@ arena_maybe_purge(tsd_t *tsd, arena_t *arena)
 		return;
 
 	if (opt_purge == purge_mode_ratio)
-		arena_maybe_purge_ratio(tsd, arena);
+		arena_maybe_purge_ratio(tsdn, arena);
 	else
-		arena_maybe_purge_decay(tsd, arena);
+		arena_maybe_purge_decay(tsdn, arena);
 }
 
 static size_t
@@ -1543,7 +1544,7 @@ arena_dirty_count(arena_t *arena)
 }
 
 static size_t
-arena_stash_dirty(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
+arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     size_t ndirty_limit, arena_runs_dirty_link_t *purge_runs_sentinel,
     extent_node_t *purge_chunks_sentinel)
 {
@@ -1574,7 +1575,7 @@ arena_stash_dirty(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			 * dalloc_node=false argument to chunk_alloc_cache().
 			 */
 			zero = false;
-			chunk = chunk_alloc_cache(tsd, arena, chunk_hooks,
+			chunk = chunk_alloc_cache(tsdn, arena, chunk_hooks,
 			    extent_node_addr_get(chunkselm),
 			    extent_node_size_get(chunkselm), chunksize, &zero,
 			    false);
@@ -1609,7 +1610,7 @@ arena_stash_dirty(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			 * prior to allocation.
 			 */
 			if (chunk == arena->spare)
-				arena_chunk_alloc(tsd, arena);
+				arena_chunk_alloc(tsdn, arena);
 
 			/* Temporarily allocate the free dirty run. */
 			arena_run_split_large(arena, run, run_size, false);
@@ -1633,7 +1634,7 @@ arena_stash_dirty(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 }
 
 static size_t
-arena_purge_stashed(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
+arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     arena_runs_dirty_link_t *purge_runs_sentinel,
     extent_node_t *purge_chunks_sentinel)
 {
@@ -1645,7 +1646,7 @@ arena_purge_stashed(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		nmadvise = 0;
 	npurged = 0;
 
-	malloc_mutex_unlock(tsd, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 	for (rdelm = qr_next(purge_runs_sentinel, rd_link),
 	    chunkselm = qr_next(purge_chunks_sentinel, cc_link);
 	    rdelm != purge_runs_sentinel; rdelm = qr_next(rdelm, rd_link)) {
@@ -1684,7 +1685,7 @@ arena_purge_stashed(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 				flag_unzeroed = 0;
 				flags = CHUNK_MAP_DECOMMITTED;
 			} else {
-				flag_unzeroed = chunk_purge_wrapper(tsd, arena,
+				flag_unzeroed = chunk_purge_wrapper(tsdn, arena,
 				    chunk_hooks, chunk, chunksize, pageind <<
 				    LG_PAGE, run_size) ? CHUNK_MAP_UNZEROED : 0;
 				flags = flag_unzeroed;
@@ -1715,7 +1716,7 @@ arena_purge_stashed(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		if (config_stats)
 			nmadvise++;
 	}
-	malloc_mutex_lock(tsd, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 
 	if (config_stats) {
 		arena->stats.nmadvise += nmadvise;
@@ -1726,7 +1727,7 @@ arena_purge_stashed(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 }
 
 static void
-arena_unstash_purged(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
+arena_unstash_purged(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     arena_runs_dirty_link_t *purge_runs_sentinel,
     extent_node_t *purge_chunks_sentinel)
 {
@@ -1746,9 +1747,9 @@ arena_unstash_purged(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			bool zeroed = extent_node_zeroed_get(chunkselm);
 			bool committed = extent_node_committed_get(chunkselm);
 			extent_node_dirty_remove(chunkselm);
-			arena_node_dalloc(tsd, arena, chunkselm);
+			arena_node_dalloc(tsdn, arena, chunkselm);
 			chunkselm = chunkselm_next;
-			chunk_dalloc_wrapper(tsd, arena, chunk_hooks, addr,
+			chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, addr,
 			    size, zeroed, committed);
 		} else {
 			arena_chunk_t *chunk =
@@ -1760,7 +1761,7 @@ arena_unstash_purged(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			    pageind) != 0);
 			arena_run_t *run = &miscelm->run;
 			qr_remove(rdelm, rd_link);
-			arena_run_dalloc(tsd, arena, run, false, true,
+			arena_run_dalloc(tsdn, arena, run, false, true,
 			    decommitted);
 		}
 	}
@@ -1776,9 +1777,9 @@ arena_unstash_purged(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
  *                       (arena->ndirty >= ndirty_limit)
  */
 static void
-arena_purge_to_limit(tsd_t *tsd, arena_t *arena, size_t ndirty_limit)
+arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, size_t ndirty_limit)
 {
-	chunk_hooks_t chunk_hooks = chunk_hooks_get(tsd, arena);
+	chunk_hooks_t chunk_hooks = chunk_hooks_get(tsdn, arena);
 	size_t npurge, npurged;
 	arena_runs_dirty_link_t purge_runs_sentinel;
 	extent_node_t purge_chunks_sentinel;
@@ -1799,14 +1800,14 @@ arena_purge_to_limit(tsd_t *tsd, arena_t *arena, size_t ndirty_limit)
 	qr_new(&purge_runs_sentinel, rd_link);
 	extent_node_dirty_linkage_init(&purge_chunks_sentinel);
 
-	npurge = arena_stash_dirty(tsd, arena, &chunk_hooks, ndirty_limit,
+	npurge = arena_stash_dirty(tsdn, arena, &chunk_hooks, ndirty_limit,
 	    &purge_runs_sentinel, &purge_chunks_sentinel);
 	if (npurge == 0)
 		goto label_return;
-	npurged = arena_purge_stashed(tsd, arena, &chunk_hooks,
+	npurged = arena_purge_stashed(tsdn, arena, &chunk_hooks,
 	    &purge_runs_sentinel, &purge_chunks_sentinel);
 	assert(npurged == npurge);
-	arena_unstash_purged(tsd, arena, &chunk_hooks, &purge_runs_sentinel,
+	arena_unstash_purged(tsdn, arena, &chunk_hooks, &purge_runs_sentinel,
 	    &purge_chunks_sentinel);
 
 	if (config_stats)
@@ -1817,15 +1818,15 @@ label_return:
 }
 
 void
-arena_purge(tsd_t *tsd, arena_t *arena, bool all)
+arena_purge(tsdn_t *tsdn, arena_t *arena, bool all)
 {
 
-	malloc_mutex_lock(tsd, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	if (all)
-		arena_purge_to_limit(tsd, arena, 0);
+		arena_purge_to_limit(tsdn, arena, 0);
 	else
-		arena_maybe_purge(tsd, arena);
-	malloc_mutex_unlock(tsd, &arena->lock);
+		arena_maybe_purge(tsdn, arena);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
 static void
@@ -1845,7 +1846,8 @@ arena_achunk_prof_reset(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk)
 			if (arena_mapbits_large_get(chunk, pageind) != 0) {
 				void *ptr = (void *)((uintptr_t)chunk + (pageind
 				    << LG_PAGE));
-				size_t usize = isalloc(tsd, ptr, config_prof);
+				size_t usize = isalloc(tsd_tsdn(tsd), ptr,
+				    config_prof);
 
 				prof_free(tsd, ptr, usize);
 				npages = arena_mapbits_large_size_get(chunk,
@@ -1902,39 +1904,39 @@ arena_reset(tsd_t *tsd, arena_t *arena)
 	}
 
 	/* Huge allocations. */
-	malloc_mutex_lock(tsd, &arena->huge_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &arena->huge_mtx);
 	for (node = ql_last(&arena->huge, ql_link); node != NULL; node =
 	    ql_last(&arena->huge, ql_link)) {
 		void *ptr = extent_node_addr_get(node);
 		size_t usize;
 
-		malloc_mutex_unlock(tsd, &arena->huge_mtx);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &arena->huge_mtx);
 		if (config_stats || (config_prof && opt_prof))
-			usize = isalloc(tsd, ptr, config_prof);
+			usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
 		/* Remove huge allocation from prof sample set. */
 		if (config_prof && opt_prof)
 			prof_free(tsd, ptr, usize);
-		huge_dalloc(tsd, ptr);
-		malloc_mutex_lock(tsd, &arena->huge_mtx);
+		huge_dalloc(tsd_tsdn(tsd), ptr);
+		malloc_mutex_lock(tsd_tsdn(tsd), &arena->huge_mtx);
 		/* Cancel out unwanted effects on stats. */
 		if (config_stats)
 			arena_huge_reset_stats_cancel(arena, usize);
 	}
-	malloc_mutex_unlock(tsd, &arena->huge_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->huge_mtx);
 
-	malloc_mutex_lock(tsd, &arena->lock);
+	malloc_mutex_lock(tsd_tsdn(tsd), &arena->lock);
 
 	/* Bins. */
 	for (i = 0; i < NBINS; i++) {
 		arena_bin_t *bin = &arena->bins[i];
-		malloc_mutex_lock(tsd, &bin->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 		bin->runcur = NULL;
 		arena_run_heap_new(&bin->runs);
 		if (config_stats) {
 			bin->stats.curregs = 0;
 			bin->stats.curruns = 0;
 		}
-		malloc_mutex_unlock(tsd, &bin->lock);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 	}
 
 	/*
@@ -1952,12 +1954,13 @@ arena_reset(tsd_t *tsd, arena_t *arena)
 	for (node = ql_last(&arena->achunks, ql_link); node != NULL; node =
 	    ql_last(&arena->achunks, ql_link)) {
 		ql_remove(&arena->achunks, node, ql_link);
-		arena_chunk_discard(tsd, arena, extent_node_addr_get(node));
+		arena_chunk_discard(tsd_tsdn(tsd), arena,
+		    extent_node_addr_get(node));
 	}
 
 	/* Spare. */
 	if (arena->spare != NULL) {
-		arena_chunk_discard(tsd, arena, arena->spare);
+		arena_chunk_discard(tsd_tsdn(tsd), arena, arena->spare);
 		arena->spare = NULL;
 	}
 
@@ -1967,7 +1970,7 @@ arena_reset(tsd_t *tsd, arena_t *arena)
 	for(i = 0; i < runs_avail_nclasses; i++)
 		arena_run_heap_new(&arena->runs_avail[i]);
 
-	malloc_mutex_unlock(tsd, &arena->lock);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock);
 }
 
 static void
@@ -2084,7 +2087,7 @@ arena_run_size_get(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 }
 
 static void
-arena_run_dalloc(tsd_t *tsd, arena_t *arena, arena_run_t *run, bool dirty,
+arena_run_dalloc(tsdn_t *tsdn, arena_t *arena, arena_run_t *run, bool dirty,
     bool cleaned, bool decommitted)
 {
 	arena_chunk_t *chunk;
@@ -2145,7 +2148,7 @@ arena_run_dalloc(tsd_t *tsd, arena_t *arena, arena_run_t *run, bool dirty,
 	if (size == arena_maxrun) {
 		assert(run_ind == map_bias);
 		assert(run_pages == (arena_maxrun >> LG_PAGE));
-		arena_chunk_dalloc(tsd, arena, chunk);
+		arena_chunk_dalloc(tsdn, arena, chunk);
 	}
 
 	/*
@@ -2156,11 +2159,11 @@ arena_run_dalloc(tsd_t *tsd, arena_t *arena, arena_run_t *run, bool dirty,
 	 * chances of spuriously crossing the dirty page purging threshold.
 	 */
 	if (dirty)
-		arena_maybe_purge(tsd, arena);
+		arena_maybe_purge(tsdn, arena);
 }
 
 static void
-arena_run_trim_head(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+arena_run_trim_head(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
     arena_run_t *run, size_t oldsize, size_t newsize)
 {
 	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
@@ -2196,12 +2199,12 @@ arena_run_trim_head(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
 	    flag_dirty | (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk,
 	    pageind+head_npages)));
 
-	arena_run_dalloc(tsd, arena, run, false, false, (flag_decommitted !=
+	arena_run_dalloc(tsdn, arena, run, false, false, (flag_decommitted !=
 	    0));
 }
 
 static void
-arena_run_trim_tail(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+arena_run_trim_tail(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
     arena_run_t *run, size_t oldsize, size_t newsize, bool dirty)
 {
 	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
@@ -2241,7 +2244,7 @@ arena_run_trim_tail(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
 
 	tail_miscelm = arena_miscelm_get_mutable(chunk, pageind + head_npages);
 	tail_run = &tail_miscelm->run;
-	arena_run_dalloc(tsd, arena, tail_run, dirty, false, (flag_decommitted
+	arena_run_dalloc(tsdn, arena, tail_run, dirty, false, (flag_decommitted
 	    != 0));
 }
 
@@ -2268,7 +2271,7 @@ arena_bin_nonfull_run_tryget(arena_bin_t *bin)
 }
 
 static arena_run_t *
-arena_bin_nonfull_run_get(tsd_t *tsd, arena_t *arena, arena_bin_t *bin)
+arena_bin_nonfull_run_get(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin)
 {
 	arena_run_t *run;
 	szind_t binind;
@@ -2284,19 +2287,19 @@ arena_bin_nonfull_run_get(tsd_t *tsd, arena_t *arena, arena_bin_t *bin)
 	bin_info = &arena_bin_info[binind];
 
 	/* Allocate a new run. */
-	malloc_mutex_unlock(tsd, &bin->lock);
+	malloc_mutex_unlock(tsdn, &bin->lock);
 	/******************************/
-	malloc_mutex_lock(tsd, &arena->lock);
-	run = arena_run_alloc_small(tsd, arena, bin_info->run_size, binind);
+	malloc_mutex_lock(tsdn, &arena->lock);
+	run = arena_run_alloc_small(tsdn, arena, bin_info->run_size, binind);
 	if (run != NULL) {
 		/* Initialize run internals. */
 		run->binind = binind;
 		run->nfree = bin_info->nregs;
 		bitmap_init(run->bitmap, &bin_info->bitmap_info);
 	}
-	malloc_mutex_unlock(tsd, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 	/********************************/
-	malloc_mutex_lock(tsd, &bin->lock);
+	malloc_mutex_lock(tsdn, &bin->lock);
 	if (run != NULL) {
 		if (config_stats) {
 			bin->stats.nruns++;
@@ -2319,7 +2322,7 @@ arena_bin_nonfull_run_get(tsd_t *tsd, arena_t *arena, arena_bin_t *bin)
 
 /* Re-fill bin->runcur, then call arena_run_reg_alloc(). */
 static void *
-arena_bin_malloc_hard(tsd_t *tsd, arena_t *arena, arena_bin_t *bin)
+arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin)
 {
 	szind_t binind;
 	arena_bin_info_t *bin_info;
@@ -2328,7 +2331,7 @@ arena_bin_malloc_hard(tsd_t *tsd, arena_t *arena, arena_bin_t *bin)
 	binind = arena_bin_index(arena, bin);
 	bin_info = &arena_bin_info[binind];
 	bin->runcur = NULL;
-	run = arena_bin_nonfull_run_get(tsd, arena, bin);
+	run = arena_bin_nonfull_run_get(tsdn, arena, bin);
 	if (bin->runcur != NULL && bin->runcur->nfree > 0) {
 		/*
 		 * Another thread updated runcur while this one ran without the
@@ -2350,7 +2353,7 @@ arena_bin_malloc_hard(tsd_t *tsd, arena_t *arena, arena_bin_t *bin)
 			 */
 			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
 			if (run->nfree == bin_info->nregs) {
-				arena_dalloc_bin_run(tsd, arena, chunk, run,
+				arena_dalloc_bin_run(tsdn, arena, chunk, run,
 				    bin);
 			} else
 				arena_bin_lower_run(arena, chunk, run, bin);
@@ -2369,7 +2372,7 @@ arena_bin_malloc_hard(tsd_t *tsd, arena_t *arena, arena_bin_t *bin)
 }
 
 void
-arena_tcache_fill_small(tsd_t *tsd, arena_t *arena, tcache_bin_t *tbin,
+arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_bin_t *tbin,
     szind_t binind, uint64_t prof_accumbytes)
 {
 	unsigned i, nfill;
@@ -2377,10 +2380,10 @@ arena_tcache_fill_small(tsd_t *tsd, arena_t *arena, tcache_bin_t *tbin,
 
 	assert(tbin->ncached == 0);
 
-	if (config_prof && arena_prof_accum(tsd, arena, prof_accumbytes))
-		prof_idump(tsd);
+	if (config_prof && arena_prof_accum(tsdn, arena, prof_accumbytes))
+		prof_idump(tsdn);
 	bin = &arena->bins[binind];
-	malloc_mutex_lock(tsd, &bin->lock);
+	malloc_mutex_lock(tsdn, &bin->lock);
 	for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >>
 	    tbin->lg_fill_div); i < nfill; i++) {
 		arena_run_t *run;
@@ -2388,7 +2391,7 @@ arena_tcache_fill_small(tsd_t *tsd, arena_t *arena, tcache_bin_t *tbin,
 		if ((run = bin->runcur) != NULL && run->nfree > 0)
 			ptr = arena_run_reg_alloc(run, &arena_bin_info[binind]);
 		else
-			ptr = arena_bin_malloc_hard(tsd, arena, bin);
+			ptr = arena_bin_malloc_hard(tsdn, arena, bin);
 		if (ptr == NULL) {
 			/*
 			 * OOM.  tbin->avail isn't yet filled down to its first
@@ -2415,9 +2418,9 @@ arena_tcache_fill_small(tsd_t *tsd, arena_t *arena, tcache_bin_t *tbin,
 		bin->stats.nfills++;
 		tbin->tstats.nrequests = 0;
 	}
-	malloc_mutex_unlock(tsd, &bin->lock);
+	malloc_mutex_unlock(tsdn, &bin->lock);
 	tbin->ncached = i;
-	arena_decay_tick(tsd, arena);
+	arena_decay_tick(tsdn, arena);
 }
 
 void
@@ -2529,7 +2532,7 @@ arena_quarantine_junk_small(void *ptr, size_t usize)
 }
 
 static void *
-arena_malloc_small(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
+arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero)
 {
 	void *ret;
 	arena_bin_t *bin;
@@ -2540,14 +2543,14 @@ arena_malloc_small(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
 	bin = &arena->bins[binind];
 	usize = index2size(binind);
 
-	malloc_mutex_lock(tsd, &bin->lock);
+	malloc_mutex_lock(tsdn, &bin->lock);
 	if ((run = bin->runcur) != NULL && run->nfree > 0)
 		ret = arena_run_reg_alloc(run, &arena_bin_info[binind]);
 	else
-		ret = arena_bin_malloc_hard(tsd, arena, bin);
+		ret = arena_bin_malloc_hard(tsdn, arena, bin);
 
 	if (ret == NULL) {
-		malloc_mutex_unlock(tsd, &bin->lock);
+		malloc_mutex_unlock(tsdn, &bin->lock);
 		return (NULL);
 	}
 
@@ -2556,9 +2559,9 @@ arena_malloc_small(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
 		bin->stats.nrequests++;
 		bin->stats.curregs++;
 	}
-	malloc_mutex_unlock(tsd, &bin->lock);
-	if (config_prof && !isthreaded && arena_prof_accum(tsd, arena, usize))
-		prof_idump(tsd);
+	malloc_mutex_unlock(tsdn, &bin->lock);
+	if (config_prof && !isthreaded && arena_prof_accum(tsdn, arena, usize))
+		prof_idump(tsdn);
 
 	if (!zero) {
 		if (config_fill) {
@@ -2578,12 +2581,12 @@ arena_malloc_small(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
 		memset(ret, 0, usize);
 	}
 
-	arena_decay_tick(tsd, arena);
+	arena_decay_tick(tsdn, arena);
 	return (ret);
 }
 
 void *
-arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
+arena_malloc_large(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero)
 {
 	void *ret;
 	size_t usize;
@@ -2594,7 +2597,7 @@ arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
 
 	/* Large allocation. */
 	usize = index2size(binind);
-	malloc_mutex_lock(tsd, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	if (config_cache_oblivious) {
 		uint64_t r;
 
@@ -2607,9 +2610,9 @@ arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
 		random_offset = ((uintptr_t)r) << LG_CACHELINE;
 	} else
 		random_offset = 0;
-	run = arena_run_alloc_large(tsd, arena, usize + large_pad, zero);
+	run = arena_run_alloc_large(tsdn, arena, usize + large_pad, zero);
 	if (run == NULL) {
-		malloc_mutex_unlock(tsd, &arena->lock);
+		malloc_mutex_unlock(tsdn, &arena->lock);
 		return (NULL);
 	}
 	miscelm = arena_run_to_miscelm(run);
@@ -2627,9 +2630,9 @@ arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
 	}
 	if (config_prof)
 		idump = arena_prof_accum_locked(arena, usize);
-	malloc_mutex_unlock(tsd, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 	if (config_prof && idump)
-		prof_idump(tsd);
+		prof_idump(tsdn);
 
 	if (!zero) {
 		if (config_fill) {
@@ -2640,29 +2643,32 @@ arena_malloc_large(tsd_t *tsd, arena_t *arena, szind_t binind, bool zero)
 		}
 	}
 
-	arena_decay_tick(tsd, arena);
+	arena_decay_tick(tsdn, arena);
 	return (ret);
 }
 
 void *
-arena_malloc_hard(tsd_t *tsd, arena_t *arena, size_t size, szind_t ind,
+arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
     bool zero)
 {
 
-	arena = arena_choose(tsd, arena);
+	assert(!tsdn_null(tsdn) || arena != NULL);
+
+	if (likely(!tsdn_null(tsdn)))
+		arena = arena_choose(tsdn_tsd(tsdn), arena);
 	if (unlikely(arena == NULL))
 		return (NULL);
 
 	if (likely(size <= SMALL_MAXCLASS))
-		return (arena_malloc_small(tsd, arena, ind, zero));
+		return (arena_malloc_small(tsdn, arena, ind, zero));
 	if (likely(size <= large_maxclass))
-		return (arena_malloc_large(tsd, arena, ind, zero));
-	return (huge_malloc(tsd, arena, index2size(ind), zero));
+		return (arena_malloc_large(tsdn, arena, ind, zero));
+	return (huge_malloc(tsdn, arena, index2size(ind), zero));
 }
 
 /* Only handles large allocations that require more than page alignment. */
 static void *
-arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
+arena_palloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
     bool zero)
 {
 	void *ret;
@@ -2672,19 +2678,21 @@ arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 	arena_chunk_map_misc_t *miscelm;
 	void *rpages;
 
+	assert(!tsdn_null(tsdn) || arena != NULL);
 	assert(usize == PAGE_CEILING(usize));
 
-	arena = arena_choose(tsd, arena);
+	if (likely(!tsdn_null(tsdn)))
+		arena = arena_choose(tsdn_tsd(tsdn), arena);
 	if (unlikely(arena == NULL))
 		return (NULL);
 
 	alignment = PAGE_CEILING(alignment);
 	alloc_size = usize + large_pad + alignment;
 
-	malloc_mutex_lock(tsd, &arena->lock);
-	run = arena_run_alloc_large(tsd, arena, alloc_size, false);
+	malloc_mutex_lock(tsdn, &arena->lock);
+	run = arena_run_alloc_large(tsdn, arena, alloc_size, false);
 	if (run == NULL) {
-		malloc_mutex_unlock(tsd, &arena->lock);
+		malloc_mutex_unlock(tsdn, &arena->lock);
 		return (NULL);
 	}
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
@@ -2704,11 +2712,11 @@ arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 		    LG_PAGE));
 		run = &miscelm->run;
 
-		arena_run_trim_head(tsd, arena, chunk, head_run, alloc_size,
+		arena_run_trim_head(tsdn, arena, chunk, head_run, alloc_size,
 		    alloc_size - leadsize);
 	}
 	if (trailsize != 0) {
-		arena_run_trim_tail(tsd, arena, chunk, run, usize + large_pad +
+		arena_run_trim_tail(tsdn, arena, chunk, run, usize + large_pad +
 		    trailsize, usize + large_pad, false);
 	}
 	if (arena_run_init_large(arena, run, usize + large_pad, zero)) {
@@ -2719,8 +2727,8 @@ arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 		    run_ind) != 0);
 
 		assert(decommitted); /* Cause of OOM. */
-		arena_run_dalloc(tsd, arena, run, dirty, false, decommitted);
-		malloc_mutex_unlock(tsd, &arena->lock);
+		arena_run_dalloc(tsdn, arena, run, dirty, false, decommitted);
+		malloc_mutex_unlock(tsdn, &arena->lock);
 		return (NULL);
 	}
 	ret = arena_miscelm_to_rpages(miscelm);
@@ -2735,7 +2743,7 @@ arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 		arena->stats.lstats[index].nrequests++;
 		arena->stats.lstats[index].curruns++;
 	}
-	malloc_mutex_unlock(tsd, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 
 	if (config_fill && !zero) {
 		if (unlikely(opt_junk_alloc))
@@ -2743,12 +2751,12 @@ arena_palloc_large(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 		else if (unlikely(opt_zero))
 			memset(ret, 0, usize);
 	}
-	arena_decay_tick(tsd, arena);
+	arena_decay_tick(tsdn, arena);
 	return (ret);
 }
 
 void *
-arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
+arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
     bool zero, tcache_t *tcache)
 {
 	void *ret;
@@ -2756,7 +2764,7 @@ arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 	if (usize <= SMALL_MAXCLASS && (alignment < PAGE || (alignment == PAGE
 	    && (usize & PAGE_MASK) == 0))) {
 		/* Small; alignment doesn't require special run placement. */
-		ret = arena_malloc(tsd, arena, usize, size2index(usize), zero,
+		ret = arena_malloc(tsdn, arena, usize, size2index(usize), zero,
 		    tcache, true);
 	} else if (usize <= large_maxclass && alignment <= PAGE) {
 		/*
@@ -2765,25 +2773,25 @@ arena_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 		 * the base of the run, so do some bit manipulation to retrieve
 		 * the base.
 		 */
-		ret = arena_malloc(tsd, arena, usize, size2index(usize), zero,
+		ret = arena_malloc(tsdn, arena, usize, size2index(usize), zero,
 		    tcache, true);
 		if (config_cache_oblivious)
 			ret = (void *)((uintptr_t)ret & ~PAGE_MASK);
 	} else {
 		if (likely(usize <= large_maxclass)) {
-			ret = arena_palloc_large(tsd, arena, usize, alignment,
+			ret = arena_palloc_large(tsdn, arena, usize, alignment,
 			    zero);
 		} else if (likely(alignment <= chunksize))
-			ret = huge_malloc(tsd, arena, usize, zero);
+			ret = huge_malloc(tsdn, arena, usize, zero);
 		else {
-			ret = huge_palloc(tsd, arena, usize, alignment, zero);
+			ret = huge_palloc(tsdn, arena, usize, alignment, zero);
 		}
 	}
 	return (ret);
 }
 
 void
-arena_prof_promoted(tsd_t *tsd, const void *ptr, size_t size)
+arena_prof_promoted(tsdn_t *tsdn, const void *ptr, size_t size)
 {
 	arena_chunk_t *chunk;
 	size_t pageind;
@@ -2792,8 +2800,8 @@ arena_prof_promoted(tsd_t *tsd, const void *ptr, size_t size)
 	cassert(config_prof);
 	assert(ptr != NULL);
 	assert(CHUNK_ADDR2BASE(ptr) != ptr);
-	assert(isalloc(tsd, ptr, false) == LARGE_MINCLASS);
-	assert(isalloc(tsd, ptr, true) == LARGE_MINCLASS);
+	assert(isalloc(tsdn, ptr, false) == LARGE_MINCLASS);
+	assert(isalloc(tsdn, ptr, true) == LARGE_MINCLASS);
 	assert(size <= SMALL_MAXCLASS);
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
@@ -2802,8 +2810,8 @@ arena_prof_promoted(tsd_t *tsd, const void *ptr, size_t size)
 	assert(binind < NBINS);
 	arena_mapbits_large_binind_set(chunk, pageind, binind);
 
-	assert(isalloc(tsd, ptr, false) == LARGE_MINCLASS);
-	assert(isalloc(tsd, ptr, true) == size);
+	assert(isalloc(tsdn, ptr, false) == LARGE_MINCLASS);
+	assert(isalloc(tsdn, ptr, true) == size);
 }
 
 static void
@@ -2834,19 +2842,19 @@ arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run,
 }
 
 static void
-arena_dalloc_bin_run(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+arena_dalloc_bin_run(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
     arena_run_t *run, arena_bin_t *bin)
 {
 
 	assert(run != bin->runcur);
 
-	malloc_mutex_unlock(tsd, &bin->lock);
+	malloc_mutex_unlock(tsdn, &bin->lock);
 	/******************************/
-	malloc_mutex_lock(tsd, &arena->lock);
-	arena_run_dalloc(tsd, arena, run, true, false, false);
-	malloc_mutex_unlock(tsd, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
+	arena_run_dalloc(tsdn, arena, run, true, false, false);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 	/****************************/
-	malloc_mutex_lock(tsd, &bin->lock);
+	malloc_mutex_lock(tsdn, &bin->lock);
 	if (config_stats)
 		bin->stats.curruns--;
 }
@@ -2873,7 +2881,7 @@ arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 }
 
 static void
-arena_dalloc_bin_locked_impl(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
     void *ptr, arena_chunk_map_bits_t *bitselm, bool junked)
 {
 	size_t pageind, rpages_ind;
@@ -2895,7 +2903,7 @@ arena_dalloc_bin_locked_impl(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
 	arena_run_reg_dalloc(run, ptr);
 	if (run->nfree == bin_info->nregs) {
 		arena_dissociate_bin_run(chunk, run, bin);
-		arena_dalloc_bin_run(tsd, arena, chunk, run, bin);
+		arena_dalloc_bin_run(tsdn, arena, chunk, run, bin);
 	} else if (run->nfree == 1 && run != bin->runcur)
 		arena_bin_lower_run(arena, chunk, run, bin);
 
@@ -2906,15 +2914,15 @@ arena_dalloc_bin_locked_impl(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
 }
 
 void
-arena_dalloc_bin_junked_locked(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
-    void *ptr, arena_chunk_map_bits_t *bitselm)
+arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena,
+    arena_chunk_t *chunk, void *ptr, arena_chunk_map_bits_t *bitselm)
 {
 
-	arena_dalloc_bin_locked_impl(tsd, arena, chunk, ptr, bitselm, true);
+	arena_dalloc_bin_locked_impl(tsdn, arena, chunk, ptr, bitselm, true);
 }
 
 void
-arena_dalloc_bin(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk, void *ptr,
+arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, void *ptr,
     size_t pageind, arena_chunk_map_bits_t *bitselm)
 {
 	arena_run_t *run;
@@ -2924,14 +2932,14 @@ arena_dalloc_bin(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk, void *ptr,
 	rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, pageind);
 	run = &arena_miscelm_get_mutable(chunk, rpages_ind)->run;
 	bin = &arena->bins[run->binind];
-	malloc_mutex_lock(tsd, &bin->lock);
-	arena_dalloc_bin_locked_impl(tsd, arena, chunk, ptr, bitselm, false);
-	malloc_mutex_unlock(tsd, &bin->lock);
+	malloc_mutex_lock(tsdn, &bin->lock);
+	arena_dalloc_bin_locked_impl(tsdn, arena, chunk, ptr, bitselm, false);
+	malloc_mutex_unlock(tsdn, &bin->lock);
 }
 
 void
-arena_dalloc_small(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk, void *ptr,
-    size_t pageind)
+arena_dalloc_small(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
+    void *ptr, size_t pageind)
 {
 	arena_chunk_map_bits_t *bitselm;
 
@@ -2941,8 +2949,8 @@ arena_dalloc_small(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk, void *ptr,
 		    pageind)) != BININD_INVALID);
 	}
 	bitselm = arena_bitselm_get_mutable(chunk, pageind);
-	arena_dalloc_bin(tsd, arena, chunk, ptr, pageind, bitselm);
-	arena_decay_tick(tsd, arena);
+	arena_dalloc_bin(tsdn, arena, chunk, ptr, pageind, bitselm);
+	arena_decay_tick(tsdn, arena);
 }
 
 #ifdef JEMALLOC_JET
@@ -2964,8 +2972,8 @@ arena_dalloc_junk_large_t *arena_dalloc_junk_large =
 #endif
 
 static void
-arena_dalloc_large_locked_impl(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
-    void *ptr, bool junked)
+arena_dalloc_large_locked_impl(tsdn_t *tsdn, arena_t *arena,
+    arena_chunk_t *chunk, void *ptr, bool junked)
 {
 	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 	arena_chunk_map_misc_t *miscelm = arena_miscelm_get_mutable(chunk,
@@ -2988,29 +2996,30 @@ arena_dalloc_large_locked_impl(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
 		}
 	}
 
-	arena_run_dalloc(tsd, arena, run, true, false, false);
+	arena_run_dalloc(tsdn, arena, run, true, false, false);
 }
 
 void
-arena_dalloc_large_junked_locked(tsd_t *tsd, arena_t *arena,
+arena_dalloc_large_junked_locked(tsdn_t *tsdn, arena_t *arena,
     arena_chunk_t *chunk, void *ptr)
 {
 
-	arena_dalloc_large_locked_impl(tsd, arena, chunk, ptr, true);
+	arena_dalloc_large_locked_impl(tsdn, arena, chunk, ptr, true);
 }
 
 void
-arena_dalloc_large(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk, void *ptr)
+arena_dalloc_large(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
+    void *ptr)
 {
 
-	malloc_mutex_lock(tsd, &arena->lock);
-	arena_dalloc_large_locked_impl(tsd, arena, chunk, ptr, false);
-	malloc_mutex_unlock(tsd, &arena->lock);
-	arena_decay_tick(tsd, arena);
+	malloc_mutex_lock(tsdn, &arena->lock);
+	arena_dalloc_large_locked_impl(tsdn, arena, chunk, ptr, false);
+	malloc_mutex_unlock(tsdn, &arena->lock);
+	arena_decay_tick(tsdn, arena);
 }
 
 static void
-arena_ralloc_large_shrink(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+arena_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
     void *ptr, size_t oldsize, size_t size)
 {
 	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
@@ -3024,8 +3033,8 @@ arena_ralloc_large_shrink(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
 	 * Shrink the run, and make trailing pages available for other
 	 * allocations.
 	 */
-	malloc_mutex_lock(tsd, &arena->lock);
-	arena_run_trim_tail(tsd, arena, chunk, run, oldsize + large_pad, size +
+	malloc_mutex_lock(tsdn, &arena->lock);
+	arena_run_trim_tail(tsdn, arena, chunk, run, oldsize + large_pad, size +
 	    large_pad, true);
 	if (config_stats) {
 		szind_t oldindex = size2index(oldsize) - NBINS;
@@ -3043,11 +3052,11 @@ arena_ralloc_large_shrink(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
 		arena->stats.lstats[index].nrequests++;
 		arena->stats.lstats[index].curruns++;
 	}
-	malloc_mutex_unlock(tsd, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
 static bool
-arena_ralloc_large_grow(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
+arena_ralloc_large_grow(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
     void *ptr, size_t oldsize, size_t usize_min, size_t usize_max, bool zero)
 {
 	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
@@ -3058,7 +3067,7 @@ arena_ralloc_large_grow(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
 	    large_pad);
 
 	/* Try to extend the run. */
-	malloc_mutex_lock(tsd, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	if (pageind+npages >= chunk_npages || arena_mapbits_allocated_get(chunk,
 	    pageind+npages) != 0)
 		goto label_fail;
@@ -3138,11 +3147,11 @@ arena_ralloc_large_grow(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk,
 			arena->stats.lstats[index].nrequests++;
 			arena->stats.lstats[index].curruns++;
 		}
-		malloc_mutex_unlock(tsd, &arena->lock);
+		malloc_mutex_unlock(tsdn, &arena->lock);
 		return (false);
 	}
 label_fail:
-	malloc_mutex_unlock(tsd, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 	return (true);
 }
 
@@ -3171,7 +3180,7 @@ arena_ralloc_junk_large_t *arena_ralloc_junk_large =
  * always fail if growing an object, and the following run is already in use.
  */
 static bool
-arena_ralloc_large(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize_min,
+arena_ralloc_large(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t usize_min,
     size_t usize_max, bool zero)
 {
 	arena_chunk_t *chunk;
@@ -3186,16 +3195,16 @@ arena_ralloc_large(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize_min,
 	arena = extent_node_arena_get(&chunk->node);
 
 	if (oldsize < usize_max) {
-		bool ret = arena_ralloc_large_grow(tsd, arena, chunk, ptr,
+		bool ret = arena_ralloc_large_grow(tsdn, arena, chunk, ptr,
 		    oldsize, usize_min, usize_max, zero);
 		if (config_fill && !ret && !zero) {
 			if (unlikely(opt_junk_alloc)) {
 				memset((void *)((uintptr_t)ptr + oldsize),
 				    JEMALLOC_ALLOC_JUNK,
-				    isalloc(tsd, ptr, config_prof) - oldsize);
+				    isalloc(tsdn, ptr, config_prof) - oldsize);
 			} else if (unlikely(opt_zero)) {
 				memset((void *)((uintptr_t)ptr + oldsize), 0,
-				    isalloc(tsd, ptr, config_prof) - oldsize);
+				    isalloc(tsdn, ptr, config_prof) - oldsize);
 			}
 		}
 		return (ret);
@@ -3204,12 +3213,12 @@ arena_ralloc_large(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize_min,
 	assert(oldsize > usize_max);
 	/* Fill before shrinking in order avoid a race. */
 	arena_ralloc_junk_large(ptr, oldsize, usize_max);
-	arena_ralloc_large_shrink(tsd, arena, chunk, ptr, oldsize, usize_max);
+	arena_ralloc_large_shrink(tsdn, arena, chunk, ptr, oldsize, usize_max);
 	return (false);
 }
 
 bool
-arena_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
+arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t extra, bool zero)
 {
 	size_t usize_min, usize_max;
@@ -3239,32 +3248,32 @@ arena_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
 		} else {
 			if (usize_max <= SMALL_MAXCLASS)
 				return (true);
-			if (arena_ralloc_large(tsd, ptr, oldsize, usize_min,
+			if (arena_ralloc_large(tsdn, ptr, oldsize, usize_min,
 			    usize_max, zero))
 				return (true);
 		}
 
 		chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-		arena_decay_tick(tsd, extent_node_arena_get(&chunk->node));
+		arena_decay_tick(tsdn, extent_node_arena_get(&chunk->node));
 		return (false);
 	} else {
-		return (huge_ralloc_no_move(tsd, ptr, oldsize, usize_min,
+		return (huge_ralloc_no_move(tsdn, ptr, oldsize, usize_min,
 		    usize_max, zero));
 	}
 }
 
 static void *
-arena_ralloc_move_helper(tsd_t *tsd, arena_t *arena, size_t usize,
+arena_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache)
 {
 
 	if (alignment == 0)
-		return (arena_malloc(tsd, arena, usize, size2index(usize), zero,
-		    tcache, true));
+		return (arena_malloc(tsdn, arena, usize, size2index(usize),
+		    zero, tcache, true));
 	usize = sa2u(usize, alignment);
 	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
 		return (NULL);
-	return (ipalloct(tsd, usize, alignment, zero, tcache, arena));
+	return (ipalloct(tsdn, usize, alignment, zero, tcache, arena));
 }
 
 void *
@@ -3282,7 +3291,8 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size,
 		size_t copysize;
 
 		/* Try to avoid moving the allocation. */
-		if (!arena_ralloc_no_move(tsd, ptr, oldsize, usize, 0, zero))
+		if (!arena_ralloc_no_move(tsd_tsdn(tsd), ptr, oldsize, usize, 0,
+		    zero))
 			return (ptr);
 
 		/*
@@ -3290,8 +3300,8 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size,
 		 * the object.  In that case, fall back to allocating new space
 		 * and copying.
 		 */
-		ret = arena_ralloc_move_helper(tsd, arena, usize, alignment,
-		    zero, tcache);
+		ret = arena_ralloc_move_helper(tsd_tsdn(tsd), arena, usize,
+		    alignment, zero, tcache);
 		if (ret == NULL)
 			return (NULL);
 
@@ -3312,25 +3322,25 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size,
 }
 
 dss_prec_t
-arena_dss_prec_get(tsd_t *tsd, arena_t *arena)
+arena_dss_prec_get(tsdn_t *tsdn, arena_t *arena)
 {
 	dss_prec_t ret;
 
-	malloc_mutex_lock(tsd, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	ret = arena->dss_prec;
-	malloc_mutex_unlock(tsd, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 	return (ret);
 }
 
 bool
-arena_dss_prec_set(tsd_t *tsd, arena_t *arena, dss_prec_t dss_prec)
+arena_dss_prec_set(tsdn_t *tsdn, arena_t *arena, dss_prec_t dss_prec)
 {
 
 	if (!have_dss)
 		return (dss_prec != dss_prec_disabled);
-	malloc_mutex_lock(tsd, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	arena->dss_prec = dss_prec;
-	malloc_mutex_unlock(tsd, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 	return (false);
 }
 
@@ -3387,19 +3397,19 @@ arena_basic_stats_merge_locked(arena_t *arena, unsigned *nthreads,
 }
 
 void
-arena_basic_stats_merge(tsd_t *tsd, arena_t *arena, unsigned *nthreads,
+arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time,
     size_t *nactive, size_t *ndirty)
 {
 
-	malloc_mutex_lock(tsd, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	arena_basic_stats_merge_locked(arena, nthreads, dss, lg_dirty_mult,
 	    decay_time, nactive, ndirty);
-	malloc_mutex_unlock(tsd, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
 void
-arena_stats_merge(tsd_t *tsd, arena_t *arena, unsigned *nthreads,
+arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time,
     size_t *nactive, size_t *ndirty, arena_stats_t *astats,
     malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats,
@@ -3409,7 +3419,7 @@ arena_stats_merge(tsd_t *tsd, arena_t *arena, unsigned *nthreads,
 
 	cassert(config_stats);
 
-	malloc_mutex_lock(tsd, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->lock);
 	arena_basic_stats_merge_locked(arena, nthreads, dss, lg_dirty_mult,
 	    decay_time, nactive, ndirty);
 
@@ -3440,12 +3450,12 @@ arena_stats_merge(tsd_t *tsd, arena_t *arena, unsigned *nthreads,
 		hstats[i].ndalloc += arena->stats.hstats[i].ndalloc;
 		hstats[i].curhchunks += arena->stats.hstats[i].curhchunks;
 	}
-	malloc_mutex_unlock(tsd, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 
 	for (i = 0; i < NBINS; i++) {
 		arena_bin_t *bin = &arena->bins[i];
 
-		malloc_mutex_lock(tsd, &bin->lock);
+		malloc_mutex_lock(tsdn, &bin->lock);
 		bstats[i].nmalloc += bin->stats.nmalloc;
 		bstats[i].ndalloc += bin->stats.ndalloc;
 		bstats[i].nrequests += bin->stats.nrequests;
@@ -3457,7 +3467,7 @@ arena_stats_merge(tsd_t *tsd, arena_t *arena, unsigned *nthreads,
 		bstats[i].nruns += bin->stats.nruns;
 		bstats[i].reruns += bin->stats.reruns;
 		bstats[i].curruns += bin->stats.curruns;
-		malloc_mutex_unlock(tsd, &bin->lock);
+		malloc_mutex_unlock(tsdn, &bin->lock);
 	}
 }
 
@@ -3483,7 +3493,7 @@ arena_nthreads_dec(arena_t *arena, bool internal)
 }
 
 arena_t *
-arena_new(tsd_t *tsd, unsigned ind)
+arena_new(tsdn_t *tsdn, unsigned ind)
 {
 	arena_t *arena;
 	size_t arena_size;
@@ -3497,11 +3507,12 @@ arena_new(tsd_t *tsd, unsigned ind)
 	 * because there is no way to clean up if base_alloc() OOMs.
 	 */
 	if (config_stats) {
-		arena = (arena_t *)base_alloc(tsd, CACHELINE_CEILING(arena_size)
-		    + QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t) +
-		    nhclasses) * sizeof(malloc_huge_stats_t));
+		arena = (arena_t *)base_alloc(tsdn,
+		    CACHELINE_CEILING(arena_size) + QUANTUM_CEILING(nlclasses *
+		    sizeof(malloc_large_stats_t) + nhclasses) *
+		    sizeof(malloc_huge_stats_t));
 	} else
-		arena = (arena_t *)base_alloc(tsd, arena_size);
+		arena = (arena_t *)base_alloc(tsdn, arena_size);
 	if (arena == NULL)
 		return (NULL);
 
@@ -3540,7 +3551,7 @@ arena_new(tsd_t *tsd, unsigned ind)
 		    (uint64_t)(uintptr_t)arena;
 	}
 
-	arena->dss_prec = chunk_dss_prec_get(tsd);
+	arena->dss_prec = chunk_dss_prec_get(tsdn);
 
 	ql_new(&arena->achunks);
 
@@ -3823,58 +3834,58 @@ arena_boot(void)
 }
 
 void
-arena_prefork0(tsd_t *tsd, arena_t *arena)
+arena_prefork0(tsdn_t *tsdn, arena_t *arena)
 {
 
-	malloc_mutex_prefork(tsd, &arena->lock);
+	malloc_mutex_prefork(tsdn, &arena->lock);
 }
 
 void
-arena_prefork1(tsd_t *tsd, arena_t *arena)
+arena_prefork1(tsdn_t *tsdn, arena_t *arena)
 {
 
-	malloc_mutex_prefork(tsd, &arena->chunks_mtx);
+	malloc_mutex_prefork(tsdn, &arena->chunks_mtx);
 }
 
 void
-arena_prefork2(tsd_t *tsd, arena_t *arena)
+arena_prefork2(tsdn_t *tsdn, arena_t *arena)
 {
 
-	malloc_mutex_prefork(tsd, &arena->node_cache_mtx);
+	malloc_mutex_prefork(tsdn, &arena->node_cache_mtx);
 }
 
 void
-arena_prefork3(tsd_t *tsd, arena_t *arena)
+arena_prefork3(tsdn_t *tsdn, arena_t *arena)
 {
 	unsigned i;
 
 	for (i = 0; i < NBINS; i++)
-		malloc_mutex_prefork(tsd, &arena->bins[i].lock);
-	malloc_mutex_prefork(tsd, &arena->huge_mtx);
+		malloc_mutex_prefork(tsdn, &arena->bins[i].lock);
+	malloc_mutex_prefork(tsdn, &arena->huge_mtx);
 }
 
 void
-arena_postfork_parent(tsd_t *tsd, arena_t *arena)
+arena_postfork_parent(tsdn_t *tsdn, arena_t *arena)
 {
 	unsigned i;
 
-	malloc_mutex_postfork_parent(tsd, &arena->huge_mtx);
+	malloc_mutex_postfork_parent(tsdn, &arena->huge_mtx);
 	for (i = 0; i < NBINS; i++)
-		malloc_mutex_postfork_parent(tsd, &arena->bins[i].lock);
-	malloc_mutex_postfork_parent(tsd, &arena->node_cache_mtx);
-	malloc_mutex_postfork_parent(tsd, &arena->chunks_mtx);
-	malloc_mutex_postfork_parent(tsd, &arena->lock);
+		malloc_mutex_postfork_parent(tsdn, &arena->bins[i].lock);
+	malloc_mutex_postfork_parent(tsdn, &arena->node_cache_mtx);
+	malloc_mutex_postfork_parent(tsdn, &arena->chunks_mtx);
+	malloc_mutex_postfork_parent(tsdn, &arena->lock);
 }
 
 void
-arena_postfork_child(tsd_t *tsd, arena_t *arena)
+arena_postfork_child(tsdn_t *tsdn, arena_t *arena)
 {
 	unsigned i;
 
-	malloc_mutex_postfork_child(tsd, &arena->huge_mtx);
+	malloc_mutex_postfork_child(tsdn, &arena->huge_mtx);
 	for (i = 0; i < NBINS; i++)
-		malloc_mutex_postfork_child(tsd, &arena->bins[i].lock);
-	malloc_mutex_postfork_child(tsd, &arena->node_cache_mtx);
-	malloc_mutex_postfork_child(tsd, &arena->chunks_mtx);
-	malloc_mutex_postfork_child(tsd, &arena->lock);
+		malloc_mutex_postfork_child(tsdn, &arena->bins[i].lock);
+	malloc_mutex_postfork_child(tsdn, &arena->node_cache_mtx);
+	malloc_mutex_postfork_child(tsdn, &arena->chunks_mtx);
+	malloc_mutex_postfork_child(tsdn, &arena->lock);
 }
diff --git a/src/base.c b/src/base.c
index 901553a1..81b0801f 100644
--- a/src/base.c
+++ b/src/base.c
@@ -14,11 +14,11 @@ static size_t		base_mapped;
 /******************************************************************************/
 
 static extent_node_t *
-base_node_try_alloc(tsd_t *tsd)
+base_node_try_alloc(tsdn_t *tsdn)
 {
 	extent_node_t *node;
 
-	malloc_mutex_assert_owner(tsd, &base_mtx);
+	malloc_mutex_assert_owner(tsdn, &base_mtx);
 
 	if (base_nodes == NULL)
 		return (NULL);
@@ -29,10 +29,10 @@ base_node_try_alloc(tsd_t *tsd)
 }
 
 static void
-base_node_dalloc(tsd_t *tsd, extent_node_t *node)
+base_node_dalloc(tsdn_t *tsdn, extent_node_t *node)
 {
 
-	malloc_mutex_assert_owner(tsd, &base_mtx);
+	malloc_mutex_assert_owner(tsdn, &base_mtx);
 
 	JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(node, sizeof(extent_node_t));
 	*(extent_node_t **)node = base_nodes;
@@ -40,22 +40,22 @@ base_node_dalloc(tsd_t *tsd, extent_node_t *node)
 }
 
 static extent_node_t *
-base_chunk_alloc(tsd_t *tsd, size_t minsize)
+base_chunk_alloc(tsdn_t *tsdn, size_t minsize)
 {
 	extent_node_t *node;
 	size_t csize, nsize;
 	void *addr;
 
-	malloc_mutex_assert_owner(tsd, &base_mtx);
+	malloc_mutex_assert_owner(tsdn, &base_mtx);
 	assert(minsize != 0);
-	node = base_node_try_alloc(tsd);
+	node = base_node_try_alloc(tsdn);
 	/* Allocate enough space to also carve a node out if necessary. */
 	nsize = (node == NULL) ? CACHELINE_CEILING(sizeof(extent_node_t)) : 0;
 	csize = CHUNK_CEILING(minsize + nsize);
 	addr = chunk_alloc_base(csize);
 	if (addr == NULL) {
 		if (node != NULL)
-			base_node_dalloc(tsd, node);
+			base_node_dalloc(tsdn, node);
 		return (NULL);
 	}
 	base_mapped += csize;
@@ -78,7 +78,7 @@ base_chunk_alloc(tsd_t *tsd, size_t minsize)
  * physical memory usage.
  */
 void *
-base_alloc(tsd_t *tsd, size_t size)
+base_alloc(tsdn_t *tsdn, size_t size)
 {
 	void *ret;
 	size_t csize, usize;
@@ -93,14 +93,14 @@ base_alloc(tsd_t *tsd, size_t size)
 
 	usize = s2u(csize);
 	extent_node_init(&key, NULL, NULL, usize, false, false);
-	malloc_mutex_lock(tsd, &base_mtx);
+	malloc_mutex_lock(tsdn, &base_mtx);
 	node = extent_tree_szad_nsearch(&base_avail_szad, &key);
 	if (node != NULL) {
 		/* Use existing space. */
 		extent_tree_szad_remove(&base_avail_szad, node);
 	} else {
 		/* Try to allocate more space. */
-		node = base_chunk_alloc(tsd, csize);
+		node = base_chunk_alloc(tsdn, csize);
 	}
 	if (node == NULL) {
 		ret = NULL;
@@ -113,7 +113,7 @@ base_alloc(tsd_t *tsd, size_t size)
 		extent_node_size_set(node, extent_node_size_get(node) - csize);
 		extent_tree_szad_insert(&base_avail_szad, node);
 	} else
-		base_node_dalloc(tsd, node);
+		base_node_dalloc(tsdn, node);
 	if (config_stats) {
 		base_allocated += csize;
 		/*
@@ -125,21 +125,22 @@ base_alloc(tsd_t *tsd, size_t size)
 	}
 	JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ret, csize);
 label_return:
-	malloc_mutex_unlock(tsd, &base_mtx);
+	malloc_mutex_unlock(tsdn, &base_mtx);
 	return (ret);
 }
 
 void
-base_stats_get(tsd_t *tsd, size_t *allocated, size_t *resident, size_t *mapped)
+base_stats_get(tsdn_t *tsdn, size_t *allocated, size_t *resident,
+    size_t *mapped)
 {
 
-	malloc_mutex_lock(tsd, &base_mtx);
+	malloc_mutex_lock(tsdn, &base_mtx);
 	assert(base_allocated <= base_resident);
 	assert(base_resident <= base_mapped);
 	*allocated = base_allocated;
 	*resident = base_resident;
 	*mapped = base_mapped;
-	malloc_mutex_unlock(tsd, &base_mtx);
+	malloc_mutex_unlock(tsdn, &base_mtx);
 }
 
 bool
@@ -155,22 +156,22 @@ base_boot(void)
 }
 
 void
-base_prefork(tsd_t *tsd)
+base_prefork(tsdn_t *tsdn)
 {
 
-	malloc_mutex_prefork(tsd, &base_mtx);
+	malloc_mutex_prefork(tsdn, &base_mtx);
 }
 
 void
-base_postfork_parent(tsd_t *tsd)
+base_postfork_parent(tsdn_t *tsdn)
 {
 
-	malloc_mutex_postfork_parent(tsd, &base_mtx);
+	malloc_mutex_postfork_parent(tsdn, &base_mtx);
 }
 
 void
-base_postfork_child(tsd_t *tsd)
+base_postfork_child(tsdn_t *tsdn)
 {
 
-	malloc_mutex_postfork_child(tsd, &base_mtx);
+	malloc_mutex_postfork_child(tsdn, &base_mtx);
 }
diff --git a/src/chunk.c b/src/chunk.c
index 1f2afd9d..adc666ff 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -49,7 +49,7 @@ const chunk_hooks_t	chunk_hooks_default = {
  * definition.
  */
 
-static void	chunk_record(tsd_t *tsd, arena_t *arena,
+static void	chunk_record(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, extent_tree_t *chunks_szad,
     extent_tree_t *chunks_ad, bool cache, void *chunk, size_t size, bool zeroed,
     bool committed);
@@ -64,23 +64,23 @@ chunk_hooks_get_locked(arena_t *arena)
 }
 
 chunk_hooks_t
-chunk_hooks_get(tsd_t *tsd, arena_t *arena)
+chunk_hooks_get(tsdn_t *tsdn, arena_t *arena)
 {
 	chunk_hooks_t chunk_hooks;
 
-	malloc_mutex_lock(tsd, &arena->chunks_mtx);
+	malloc_mutex_lock(tsdn, &arena->chunks_mtx);
 	chunk_hooks = chunk_hooks_get_locked(arena);
-	malloc_mutex_unlock(tsd, &arena->chunks_mtx);
+	malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 
 	return (chunk_hooks);
 }
 
 chunk_hooks_t
-chunk_hooks_set(tsd_t *tsd, arena_t *arena, const chunk_hooks_t *chunk_hooks)
+chunk_hooks_set(tsdn_t *tsdn, arena_t *arena, const chunk_hooks_t *chunk_hooks)
 {
 	chunk_hooks_t old_chunk_hooks;
 
-	malloc_mutex_lock(tsd, &arena->chunks_mtx);
+	malloc_mutex_lock(tsdn, &arena->chunks_mtx);
 	old_chunk_hooks = arena->chunk_hooks;
 	/*
 	 * Copy each field atomically so that it is impossible for readers to
@@ -105,13 +105,13 @@ chunk_hooks_set(tsd_t *tsd, arena_t *arena, const chunk_hooks_t *chunk_hooks)
 	ATOMIC_COPY_HOOK(split);
 	ATOMIC_COPY_HOOK(merge);
 #undef ATOMIC_COPY_HOOK
-	malloc_mutex_unlock(tsd, &arena->chunks_mtx);
+	malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 
 	return (old_chunk_hooks);
 }
 
 static void
-chunk_hooks_assure_initialized_impl(tsd_t *tsd, arena_t *arena,
+chunk_hooks_assure_initialized_impl(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, bool locked)
 {
 	static const chunk_hooks_t uninitialized_hooks =
@@ -120,28 +120,28 @@ chunk_hooks_assure_initialized_impl(tsd_t *tsd, arena_t *arena,
 	if (memcmp(chunk_hooks, &uninitialized_hooks, sizeof(chunk_hooks_t)) ==
 	    0) {
 		*chunk_hooks = locked ? chunk_hooks_get_locked(arena) :
-		    chunk_hooks_get(tsd, arena);
+		    chunk_hooks_get(tsdn, arena);
 	}
 }
 
 static void
-chunk_hooks_assure_initialized_locked(tsd_t *tsd, arena_t *arena,
+chunk_hooks_assure_initialized_locked(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks)
 {
 
-	chunk_hooks_assure_initialized_impl(tsd, arena, chunk_hooks, true);
+	chunk_hooks_assure_initialized_impl(tsdn, arena, chunk_hooks, true);
 }
 
 static void
-chunk_hooks_assure_initialized(tsd_t *tsd, arena_t *arena,
+chunk_hooks_assure_initialized(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks)
 {
 
-	chunk_hooks_assure_initialized_impl(tsd, arena, chunk_hooks, false);
+	chunk_hooks_assure_initialized_impl(tsdn, arena, chunk_hooks, false);
 }
 
 bool
-chunk_register(tsd_t *tsd, const void *chunk, const extent_node_t *node)
+chunk_register(tsdn_t *tsdn, const void *chunk, const extent_node_t *node)
 {
 
 	assert(extent_node_addr_get(node) == chunk);
@@ -161,7 +161,7 @@ chunk_register(tsd_t *tsd, const void *chunk, const extent_node_t *node)
 			high = atomic_read_z(&highchunks);
 		}
 		if (cur > high && prof_gdump_get_unlocked())
-			prof_gdump(tsd);
+			prof_gdump(tsdn);
 	}
 
 	return (false);
@@ -199,7 +199,7 @@ chunk_first_best_fit(arena_t *arena, extent_tree_t *chunks_szad,
 }
 
 static void *
-chunk_recycle(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
+chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, bool cache,
     void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit,
     bool dalloc_node)
@@ -221,8 +221,8 @@ chunk_recycle(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	/* Beware size_t wrap-around. */
 	if (alloc_size < size)
 		return (NULL);
-	malloc_mutex_lock(tsd, &arena->chunks_mtx);
-	chunk_hooks_assure_initialized_locked(tsd, arena, chunk_hooks);
+	malloc_mutex_lock(tsdn, &arena->chunks_mtx);
+	chunk_hooks_assure_initialized_locked(tsdn, arena, chunk_hooks);
 	if (new_addr != NULL) {
 		extent_node_t key;
 		extent_node_init(&key, arena, new_addr, alloc_size, false,
@@ -234,7 +234,7 @@ chunk_recycle(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	}
 	if (node == NULL || (new_addr != NULL && extent_node_size_get(node) <
 	    size)) {
-		malloc_mutex_unlock(tsd, &arena->chunks_mtx);
+		malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 		return (NULL);
 	}
 	leadsize = ALIGNMENT_CEILING((uintptr_t)extent_node_addr_get(node),
@@ -253,7 +253,7 @@ chunk_recycle(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	if (leadsize != 0 &&
 	    chunk_hooks->split(extent_node_addr_get(node),
 	    extent_node_size_get(node), leadsize, size, false, arena->ind)) {
-		malloc_mutex_unlock(tsd, &arena->chunks_mtx);
+		malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 		return (NULL);
 	}
 	/* Remove node from the tree. */
@@ -273,19 +273,19 @@ chunk_recycle(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		if (chunk_hooks->split(ret, size + trailsize, size,
 		    trailsize, false, arena->ind)) {
 			if (dalloc_node && node != NULL)
-				arena_node_dalloc(tsd, arena, node);
-			malloc_mutex_unlock(tsd, &arena->chunks_mtx);
-			chunk_record(tsd, arena, chunk_hooks, chunks_szad,
+				arena_node_dalloc(tsdn, arena, node);
+			malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
+			chunk_record(tsdn, arena, chunk_hooks, chunks_szad,
 			    chunks_ad, cache, ret, size + trailsize, zeroed,
 			    committed);
 			return (NULL);
 		}
 		/* Insert the trailing space as a smaller chunk. */
 		if (node == NULL) {
-			node = arena_node_alloc(tsd, arena);
+			node = arena_node_alloc(tsdn, arena);
 			if (node == NULL) {
-				malloc_mutex_unlock(tsd, &arena->chunks_mtx);
-				chunk_record(tsd, arena, chunk_hooks,
+				malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
+				chunk_record(tsdn, arena, chunk_hooks,
 				    chunks_szad, chunks_ad, cache, ret, size +
 				    trailsize, zeroed, committed);
 				return (NULL);
@@ -299,16 +299,16 @@ chunk_recycle(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		node = NULL;
 	}
 	if (!committed && chunk_hooks->commit(ret, size, 0, size, arena->ind)) {
-		malloc_mutex_unlock(tsd, &arena->chunks_mtx);
-		chunk_record(tsd, arena, chunk_hooks, chunks_szad, chunks_ad,
+		malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
+		chunk_record(tsdn, arena, chunk_hooks, chunks_szad, chunks_ad,
 		    cache, ret, size, zeroed, committed);
 		return (NULL);
 	}
-	malloc_mutex_unlock(tsd, &arena->chunks_mtx);
+	malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 
 	assert(dalloc_node || node != NULL);
 	if (dalloc_node && node != NULL)
-		arena_node_dalloc(tsd, arena, node);
+		arena_node_dalloc(tsdn, arena, node);
 	if (*zero) {
 		if (!zeroed)
 			memset(ret, 0, size);
@@ -331,7 +331,7 @@ chunk_recycle(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
  * them if they are returned.
  */
 static void *
-chunk_alloc_core(tsd_t *tsd, arena_t *arena, void *new_addr, size_t size,
+chunk_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit, dss_prec_t dss_prec)
 {
 	void *ret;
@@ -343,7 +343,7 @@ chunk_alloc_core(tsd_t *tsd, arena_t *arena, void *new_addr, size_t size,
 
 	/* "primary" dss. */
 	if (have_dss && dss_prec == dss_prec_primary && (ret =
-	    chunk_alloc_dss(tsd, arena, new_addr, size, alignment, zero,
+	    chunk_alloc_dss(tsdn, arena, new_addr, size, alignment, zero,
 	    commit)) != NULL)
 		return (ret);
 	/* mmap. */
@@ -352,7 +352,7 @@ chunk_alloc_core(tsd_t *tsd, arena_t *arena, void *new_addr, size_t size,
 		return (ret);
 	/* "secondary" dss. */
 	if (have_dss && dss_prec == dss_prec_secondary && (ret =
-	    chunk_alloc_dss(tsd, arena, new_addr, size, alignment, zero,
+	    chunk_alloc_dss(tsdn, arena, new_addr, size, alignment, zero,
 	    commit)) != NULL)
 		return (ret);
 
@@ -383,7 +383,7 @@ chunk_alloc_base(size_t size)
 }
 
 void *
-chunk_alloc_cache(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
+chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     void *new_addr, size_t size, size_t alignment, bool *zero, bool dalloc_node)
 {
 	void *ret;
@@ -395,9 +395,9 @@ chunk_alloc_cache(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	assert((alignment & chunksize_mask) == 0);
 
 	commit = true;
-	ret = chunk_recycle(tsd, arena, chunk_hooks, &arena->chunks_szad_cached,
-	    &arena->chunks_ad_cached, true, new_addr, size, alignment, zero,
-	    &commit, dalloc_node);
+	ret = chunk_recycle(tsdn, arena, chunk_hooks,
+	    &arena->chunks_szad_cached, &arena->chunks_ad_cached, true,
+	    new_addr, size, alignment, zero, &commit, dalloc_node);
 	if (ret == NULL)
 		return (NULL);
 	assert(commit);
@@ -407,11 +407,11 @@ chunk_alloc_cache(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 }
 
 static arena_t *
-chunk_arena_get(tsd_t *tsd, unsigned arena_ind)
+chunk_arena_get(tsdn_t *tsdn, unsigned arena_ind)
 {
 	arena_t *arena;
 
-	arena = arena_get(tsd, arena_ind, false);
+	arena = arena_get(tsdn, arena_ind, false);
 	/*
 	 * The arena we're allocating on behalf of must have been initialized
 	 * already.
@@ -425,12 +425,12 @@ chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero,
     bool *commit, unsigned arena_ind)
 {
 	void *ret;
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 	arena_t *arena;
 
-	tsd = tsd_fetch();
-	arena = chunk_arena_get(tsd, arena_ind);
-	ret = chunk_alloc_core(tsd, arena, new_addr, size, alignment, zero,
+	tsdn = tsdn_fetch();
+	arena = chunk_arena_get(tsdn, arena_ind);
+	ret = chunk_alloc_core(tsdn, arena, new_addr, size, alignment, zero,
 	    commit, arena->dss_prec);
 	if (ret == NULL)
 		return (NULL);
@@ -441,7 +441,7 @@ chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero,
 }
 
 static void *
-chunk_alloc_retained(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
+chunk_alloc_retained(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit)
 {
 	void *ret;
@@ -451,7 +451,7 @@ chunk_alloc_retained(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	assert(alignment != 0);
 	assert((alignment & chunksize_mask) == 0);
 
-	ret = chunk_recycle(tsd, arena, chunk_hooks,
+	ret = chunk_recycle(tsdn, arena, chunk_hooks,
 	    &arena->chunks_szad_retained, &arena->chunks_ad_retained, false,
 	    new_addr, size, alignment, zero, commit, true);
 
@@ -462,14 +462,14 @@ chunk_alloc_retained(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 }
 
 void *
-chunk_alloc_wrapper(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
+chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit)
 {
 	void *ret;
 
-	chunk_hooks_assure_initialized(tsd, arena, chunk_hooks);
+	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
 
-	ret = chunk_alloc_retained(tsd, arena, chunk_hooks, new_addr, size,
+	ret = chunk_alloc_retained(tsdn, arena, chunk_hooks, new_addr, size,
 	    alignment, zero, commit);
 	if (ret == NULL) {
 		ret = chunk_hooks->alloc(new_addr, size, alignment, zero,
@@ -484,7 +484,7 @@ chunk_alloc_wrapper(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 }
 
 static void
-chunk_record(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
+chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, bool cache,
     void *chunk, size_t size, bool zeroed, bool committed)
 {
@@ -496,8 +496,8 @@ chunk_record(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	unzeroed = cache || !zeroed;
 	JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(chunk, size);
 
-	malloc_mutex_lock(tsd, &arena->chunks_mtx);
-	chunk_hooks_assure_initialized_locked(tsd, arena, chunk_hooks);
+	malloc_mutex_lock(tsdn, &arena->chunks_mtx);
+	chunk_hooks_assure_initialized_locked(tsdn, arena, chunk_hooks);
 	extent_node_init(&key, arena, (void *)((uintptr_t)chunk + size), 0,
 	    false, false);
 	node = extent_tree_ad_nsearch(chunks_ad, &key);
@@ -522,7 +522,7 @@ chunk_record(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		arena_chunk_cache_maybe_insert(arena, node, cache);
 	} else {
 		/* Coalescing forward failed, so insert a new node. */
-		node = arena_node_alloc(tsd, arena);
+		node = arena_node_alloc(tsdn, arena);
 		if (node == NULL) {
 			/*
 			 * Node allocation failed, which is an exceedingly
@@ -531,7 +531,7 @@ chunk_record(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			 * a virtual memory leak.
 			 */
 			if (cache) {
-				chunk_purge_wrapper(tsd, arena, chunk_hooks,
+				chunk_purge_wrapper(tsdn, arena, chunk_hooks,
 				    chunk, size, 0, size);
 			}
 			goto label_return;
@@ -568,15 +568,15 @@ chunk_record(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		extent_tree_szad_insert(chunks_szad, node);
 		arena_chunk_cache_maybe_insert(arena, node, cache);
 
-		arena_node_dalloc(tsd, arena, prev);
+		arena_node_dalloc(tsdn, arena, prev);
 	}
 
 label_return:
-	malloc_mutex_unlock(tsd, &arena->chunks_mtx);
+	malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 }
 
 void
-chunk_dalloc_cache(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
+chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     void *chunk, size_t size, bool committed)
 {
 
@@ -585,9 +585,9 @@ chunk_dalloc_cache(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	assert(size != 0);
 	assert((size & chunksize_mask) == 0);
 
-	chunk_record(tsd, arena, chunk_hooks, &arena->chunks_szad_cached,
+	chunk_record(tsdn, arena, chunk_hooks, &arena->chunks_szad_cached,
 	    &arena->chunks_ad_cached, true, chunk, size, false, committed);
-	arena_maybe_purge(tsd, arena);
+	arena_maybe_purge(tsdn, arena);
 }
 
 static bool
@@ -595,13 +595,13 @@ chunk_dalloc_default(void *chunk, size_t size, bool committed,
     unsigned arena_ind)
 {
 
-	if (!have_dss || !chunk_in_dss(tsd_fetch(), chunk))
+	if (!have_dss || !chunk_in_dss(tsdn_fetch(), chunk))
 		return (chunk_dalloc_mmap(chunk, size));
 	return (true);
 }
 
 void
-chunk_dalloc_wrapper(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
+chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     void *chunk, size_t size, bool zeroed, bool committed)
 {
 
@@ -610,7 +610,7 @@ chunk_dalloc_wrapper(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	assert(size != 0);
 	assert((size & chunksize_mask) == 0);
 
-	chunk_hooks_assure_initialized(tsd, arena, chunk_hooks);
+	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
 	/* Try to deallocate. */
 	if (!chunk_hooks->dalloc(chunk, size, committed, arena->ind))
 		return;
@@ -621,7 +621,7 @@ chunk_dalloc_wrapper(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	}
 	zeroed = !committed || !chunk_hooks->purge(chunk, size, 0, size,
 	    arena->ind);
-	chunk_record(tsd, arena, chunk_hooks, &arena->chunks_szad_retained,
+	chunk_record(tsdn, arena, chunk_hooks, &arena->chunks_szad_retained,
 	    &arena->chunks_ad_retained, false, chunk, size, zeroed, committed);
 
 	if (config_stats)
@@ -662,11 +662,11 @@ chunk_purge_default(void *chunk, size_t size, size_t offset, size_t length,
 }
 
 bool
-chunk_purge_wrapper(tsd_t *tsd, arena_t *arena, chunk_hooks_t *chunk_hooks,
+chunk_purge_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     void *chunk, size_t size, size_t offset, size_t length)
 {
 
-	chunk_hooks_assure_initialized(tsd, arena, chunk_hooks);
+	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
 	return (chunk_hooks->purge(chunk, size, offset, length, arena->ind));
 }
 
@@ -688,8 +688,8 @@ chunk_merge_default(void *chunk_a, size_t size_a, void *chunk_b, size_t size_b,
 	if (!maps_coalesce)
 		return (true);
 	if (have_dss) {
-		tsd_t *tsd = tsd_fetch();
-		if (chunk_in_dss(tsd, chunk_a) != chunk_in_dss(tsd, chunk_b))
+		tsdn_t *tsdn = tsdn_fetch();
+		if (chunk_in_dss(tsdn, chunk_a) != chunk_in_dss(tsdn, chunk_b))
 			return (true);
 	}
 
@@ -700,7 +700,7 @@ static rtree_node_elm_t *
 chunks_rtree_node_alloc(size_t nelms)
 {
 
-	return ((rtree_node_elm_t *)base_alloc(tsd_fetch(), nelms *
+	return ((rtree_node_elm_t *)base_alloc(tsdn_fetch(), nelms *
 	    sizeof(rtree_node_elm_t)));
 }
 
@@ -747,22 +747,22 @@ chunk_boot(void)
 }
 
 void
-chunk_prefork(tsd_t *tsd)
+chunk_prefork(tsdn_t *tsdn)
 {
 
-	chunk_dss_prefork(tsd);
+	chunk_dss_prefork(tsdn);
 }
 
 void
-chunk_postfork_parent(tsd_t *tsd)
+chunk_postfork_parent(tsdn_t *tsdn)
 {
 
-	chunk_dss_postfork_parent(tsd);
+	chunk_dss_postfork_parent(tsdn);
 }
 
 void
-chunk_postfork_child(tsd_t *tsd)
+chunk_postfork_child(tsdn_t *tsdn)
 {
 
-	chunk_dss_postfork_child(tsd);
+	chunk_dss_postfork_child(tsdn);
 }
diff --git a/src/chunk_dss.c b/src/chunk_dss.c
index 3b3f2433..0b1f82bd 100644
--- a/src/chunk_dss.c
+++ b/src/chunk_dss.c
@@ -41,32 +41,32 @@ chunk_dss_sbrk(intptr_t increment)
 }
 
 dss_prec_t
-chunk_dss_prec_get(tsd_t *tsd)
+chunk_dss_prec_get(tsdn_t *tsdn)
 {
 	dss_prec_t ret;
 
 	if (!have_dss)
 		return (dss_prec_disabled);
-	malloc_mutex_lock(tsd, &dss_mtx);
+	malloc_mutex_lock(tsdn, &dss_mtx);
 	ret = dss_prec_default;
-	malloc_mutex_unlock(tsd, &dss_mtx);
+	malloc_mutex_unlock(tsdn, &dss_mtx);
 	return (ret);
 }
 
 bool
-chunk_dss_prec_set(tsd_t *tsd, dss_prec_t dss_prec)
+chunk_dss_prec_set(tsdn_t *tsdn, dss_prec_t dss_prec)
 {
 
 	if (!have_dss)
 		return (dss_prec != dss_prec_disabled);
-	malloc_mutex_lock(tsd, &dss_mtx);
+	malloc_mutex_lock(tsdn, &dss_mtx);
 	dss_prec_default = dss_prec;
-	malloc_mutex_unlock(tsd, &dss_mtx);
+	malloc_mutex_unlock(tsdn, &dss_mtx);
 	return (false);
 }
 
 void *
-chunk_alloc_dss(tsd_t *tsd, arena_t *arena, void *new_addr, size_t size,
+chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit)
 {
 	cassert(have_dss);
@@ -80,7 +80,7 @@ chunk_alloc_dss(tsd_t *tsd, arena_t *arena, void *new_addr, size_t size,
 	if ((intptr_t)size < 0)
 		return (NULL);
 
-	malloc_mutex_lock(tsd, &dss_mtx);
+	malloc_mutex_lock(tsdn, &dss_mtx);
 	if (dss_prev != (void *)-1) {
 
 		/*
@@ -122,7 +122,7 @@ chunk_alloc_dss(tsd_t *tsd, arena_t *arena, void *new_addr, size_t size,
 			if ((uintptr_t)ret < (uintptr_t)dss_max ||
 			    (uintptr_t)dss_next < (uintptr_t)dss_max) {
 				/* Wrap-around. */
-				malloc_mutex_unlock(tsd, &dss_mtx);
+				malloc_mutex_unlock(tsdn, &dss_mtx);
 				return (NULL);
 			}
 			incr = gap_size + cpad_size + size;
@@ -130,11 +130,11 @@ chunk_alloc_dss(tsd_t *tsd, arena_t *arena, void *new_addr, size_t size,
 			if (dss_prev == dss_max) {
 				/* Success. */
 				dss_max = dss_next;
-				malloc_mutex_unlock(tsd, &dss_mtx);
+				malloc_mutex_unlock(tsdn, &dss_mtx);
 				if (cpad_size != 0) {
 					chunk_hooks_t chunk_hooks =
 					    CHUNK_HOOKS_INITIALIZER;
-					chunk_dalloc_wrapper(tsd, arena,
+					chunk_dalloc_wrapper(tsdn, arena,
 					    &chunk_hooks, cpad, cpad_size,
 					    false, true);
 				}
@@ -149,25 +149,25 @@ chunk_alloc_dss(tsd_t *tsd, arena_t *arena, void *new_addr, size_t size,
 			}
 		} while (dss_prev != (void *)-1);
 	}
-	malloc_mutex_unlock(tsd, &dss_mtx);
+	malloc_mutex_unlock(tsdn, &dss_mtx);
 
 	return (NULL);
 }
 
 bool
-chunk_in_dss(tsd_t *tsd, void *chunk)
+chunk_in_dss(tsdn_t *tsdn, void *chunk)
 {
 	bool ret;
 
 	cassert(have_dss);
 
-	malloc_mutex_lock(tsd, &dss_mtx);
+	malloc_mutex_lock(tsdn, &dss_mtx);
 	if ((uintptr_t)chunk >= (uintptr_t)dss_base
 	    && (uintptr_t)chunk < (uintptr_t)dss_max)
 		ret = true;
 	else
 		ret = false;
-	malloc_mutex_unlock(tsd, &dss_mtx);
+	malloc_mutex_unlock(tsdn, &dss_mtx);
 
 	return (ret);
 }
@@ -188,27 +188,27 @@ chunk_dss_boot(void)
 }
 
 void
-chunk_dss_prefork(tsd_t *tsd)
+chunk_dss_prefork(tsdn_t *tsdn)
 {
 
 	if (have_dss)
-		malloc_mutex_prefork(tsd, &dss_mtx);
+		malloc_mutex_prefork(tsdn, &dss_mtx);
 }
 
 void
-chunk_dss_postfork_parent(tsd_t *tsd)
+chunk_dss_postfork_parent(tsdn_t *tsdn)
 {
 
 	if (have_dss)
-		malloc_mutex_postfork_parent(tsd, &dss_mtx);
+		malloc_mutex_postfork_parent(tsdn, &dss_mtx);
 }
 
 void
-chunk_dss_postfork_child(tsd_t *tsd)
+chunk_dss_postfork_child(tsdn_t *tsdn)
 {
 
 	if (have_dss)
-		malloc_mutex_postfork_child(tsd, &dss_mtx);
+		malloc_mutex_postfork_child(tsdn, &dss_mtx);
 }
 
 /******************************************************************************/
diff --git a/src/ckh.c b/src/ckh.c
index 25185974..747c1c86 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -40,8 +40,8 @@
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
-static bool	ckh_grow(tsd_t *tsd, ckh_t *ckh);
-static void	ckh_shrink(tsd_t *tsd, ckh_t *ckh);
+static bool	ckh_grow(tsdn_t *tsdn, ckh_t *ckh);
+static void	ckh_shrink(tsdn_t *tsdn, ckh_t *ckh);
 
 /******************************************************************************/
 
@@ -244,7 +244,7 @@ ckh_rebuild(ckh_t *ckh, ckhc_t *aTab)
 }
 
 static bool
-ckh_grow(tsd_t *tsd, ckh_t *ckh)
+ckh_grow(tsdn_t *tsdn, ckh_t *ckh)
 {
 	bool ret;
 	ckhc_t *tab, *ttab;
@@ -270,8 +270,8 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh)
 			ret = true;
 			goto label_return;
 		}
-		tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL,
-		    true, arena_ichoose(tsd, NULL));
+		tab = (ckhc_t *)ipallocztm(tsdn, usize, CACHELINE, true, NULL,
+		    true, arena_ichoose(tsdn, NULL));
 		if (tab == NULL) {
 			ret = true;
 			goto label_return;
@@ -283,12 +283,12 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh)
 		ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
 
 		if (!ckh_rebuild(ckh, tab)) {
-			idalloctm(tsd, tab, NULL, true, true);
+			idalloctm(tsdn, tab, NULL, true, true);
 			break;
 		}
 
 		/* Rebuilding failed, so back out partially rebuilt table. */
-		idalloctm(tsd, ckh->tab, NULL, true, true);
+		idalloctm(tsdn, ckh->tab, NULL, true, true);
 		ckh->tab = tab;
 		ckh->lg_curbuckets = lg_prevbuckets;
 	}
@@ -299,7 +299,7 @@ label_return:
 }
 
 static void
-ckh_shrink(tsd_t *tsd, ckh_t *ckh)
+ckh_shrink(tsdn_t *tsdn, ckh_t *ckh)
 {
 	ckhc_t *tab, *ttab;
 	size_t usize;
@@ -314,8 +314,8 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh)
 	usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
 	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
 		return;
-	tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, true,
-	    arena_ichoose(tsd, NULL));
+	tab = (ckhc_t *)ipallocztm(tsdn, usize, CACHELINE, true, NULL, true,
+	    arena_ichoose(tsdn, NULL));
 	if (tab == NULL) {
 		/*
 		 * An OOM error isn't worth propagating, since it doesn't
@@ -330,7 +330,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh)
 	ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
 
 	if (!ckh_rebuild(ckh, tab)) {
-		idalloctm(tsd, tab, NULL, true, true);
+		idalloctm(tsdn, tab, NULL, true, true);
 #ifdef CKH_COUNT
 		ckh->nshrinks++;
 #endif
@@ -338,7 +338,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh)
 	}
 
 	/* Rebuilding failed, so back out partially rebuilt table. */
-	idalloctm(tsd, ckh->tab, NULL, true, true);
+	idalloctm(tsdn, ckh->tab, NULL, true, true);
 	ckh->tab = tab;
 	ckh->lg_curbuckets = lg_prevbuckets;
 #ifdef CKH_COUNT
@@ -347,7 +347,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh)
 }
 
 bool
-ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
+ckh_new(tsdn_t *tsdn, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
     ckh_keycomp_t *keycomp)
 {
 	bool ret;
@@ -391,8 +391,8 @@ ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
 		ret = true;
 		goto label_return;
 	}
-	ckh->tab = (ckhc_t *)ipallocztm(tsd, usize, CACHELINE, true, NULL, true,
-	    arena_ichoose(tsd, NULL));
+	ckh->tab = (ckhc_t *)ipallocztm(tsdn, usize, CACHELINE, true, NULL,
+	    true, arena_ichoose(tsdn, NULL));
 	if (ckh->tab == NULL) {
 		ret = true;
 		goto label_return;
@@ -404,7 +404,7 @@ label_return:
 }
 
 void
-ckh_delete(tsd_t *tsd, ckh_t *ckh)
+ckh_delete(tsdn_t *tsdn, ckh_t *ckh)
 {
 
 	assert(ckh != NULL);
@@ -421,7 +421,7 @@ ckh_delete(tsd_t *tsd, ckh_t *ckh)
 	    (unsigned long long)ckh->nrelocs);
 #endif
 
-	idalloctm(tsd, ckh->tab, NULL, true, true);
+	idalloctm(tsdn, ckh->tab, NULL, true, true);
 	if (config_debug)
 		memset(ckh, JEMALLOC_FREE_JUNK, sizeof(ckh_t));
 }
@@ -456,7 +456,7 @@ ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data)
 }
 
 bool
-ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data)
+ckh_insert(tsdn_t *tsdn, ckh_t *ckh, const void *key, const void *data)
 {
 	bool ret;
 
@@ -468,7 +468,7 @@ ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data)
 #endif
 
 	while (ckh_try_insert(ckh, &key, &data)) {
-		if (ckh_grow(tsd, ckh)) {
+		if (ckh_grow(tsdn, ckh)) {
 			ret = true;
 			goto label_return;
 		}
@@ -480,7 +480,7 @@ label_return:
 }
 
 bool
-ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
+ckh_remove(tsdn_t *tsdn, ckh_t *ckh, const void *searchkey, void **key,
     void **data)
 {
 	size_t cell;
@@ -502,7 +502,7 @@ ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
 		    + LG_CKH_BUCKET_CELLS - 2)) && ckh->lg_curbuckets
 		    > ckh->lg_minbuckets) {
 			/* Ignore error due to OOM. */
-			ckh_shrink(tsd, ckh);
+			ckh_shrink(tsdn, ckh);
 		}
 
 		return (false);
diff --git a/src/ctl.c b/src/ctl.c
index fd5561a3..dad80086 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -46,20 +46,20 @@ static int	n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,	\
     void *oldp, size_t *oldlenp, void *newp, size_t newlen);
 
 #define	INDEX_PROTO(n)							\
-static const ctl_named_node_t	*n##_index(tsd_t *tsd,			\
+static const ctl_named_node_t	*n##_index(tsdn_t *tsdn,		\
     const size_t *mib, size_t miblen, size_t i);
 
 static bool	ctl_arena_init(ctl_arena_stats_t *astats);
 static void	ctl_arena_clear(ctl_arena_stats_t *astats);
-static void	ctl_arena_stats_amerge(tsd_t *tsd, ctl_arena_stats_t *cstats,
+static void	ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_stats_t *cstats,
     arena_t *arena);
 static void	ctl_arena_stats_smerge(ctl_arena_stats_t *sstats,
     ctl_arena_stats_t *astats);
-static void	ctl_arena_refresh(tsd_t *tsd, arena_t *arena, unsigned i);
-static bool	ctl_grow(tsd_t *tsd);
-static void	ctl_refresh(tsd_t *tsd);
-static bool	ctl_init(tsd_t *tsd);
-static int	ctl_lookup(tsd_t *tsd, const char *name,
+static void	ctl_arena_refresh(tsdn_t *tsdn, arena_t *arena, unsigned i);
+static bool	ctl_grow(tsdn_t *tsdn);
+static void	ctl_refresh(tsdn_t *tsdn);
+static bool	ctl_init(tsdn_t *tsdn);
+static int	ctl_lookup(tsdn_t *tsdn, const char *name,
     ctl_node_t const **nodesp, size_t *mibp, size_t *depthp);
 
 CTL_PROTO(version)
@@ -117,7 +117,7 @@ CTL_PROTO(opt_prof_accum)
 CTL_PROTO(tcache_create)
 CTL_PROTO(tcache_flush)
 CTL_PROTO(tcache_destroy)
-static void	arena_i_purge(tsd_t *tsd, unsigned arena_ind, bool all);
+static void	arena_i_purge(tsdn_t *tsdn, unsigned arena_ind, bool all);
 CTL_PROTO(arena_i_purge)
 CTL_PROTO(arena_i_decay)
 CTL_PROTO(arena_i_reset)
@@ -560,12 +560,12 @@ ctl_arena_clear(ctl_arena_stats_t *astats)
 }
 
 static void
-ctl_arena_stats_amerge(tsd_t *tsd, ctl_arena_stats_t *cstats, arena_t *arena)
+ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_stats_t *cstats, arena_t *arena)
 {
 	unsigned i;
 
 	if (config_stats) {
-		arena_stats_merge(tsd, arena, &cstats->nthreads, &cstats->dss,
+		arena_stats_merge(tsdn, arena, &cstats->nthreads, &cstats->dss,
 		    &cstats->lg_dirty_mult, &cstats->decay_time,
 		    &cstats->pactive, &cstats->pdirty, &cstats->astats,
 		    cstats->bstats, cstats->lstats, cstats->hstats);
@@ -578,7 +578,7 @@ ctl_arena_stats_amerge(tsd_t *tsd, ctl_arena_stats_t *cstats, arena_t *arena)
 			cstats->nrequests_small += cstats->bstats[i].nrequests;
 		}
 	} else {
-		arena_basic_stats_merge(tsd, arena, &cstats->nthreads,
+		arena_basic_stats_merge(tsdn, arena, &cstats->nthreads,
 		    &cstats->dss, &cstats->lg_dirty_mult, &cstats->decay_time,
 		    &cstats->pactive, &cstats->pdirty);
 	}
@@ -656,24 +656,24 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats)
 }
 
 static void
-ctl_arena_refresh(tsd_t *tsd, arena_t *arena, unsigned i)
+ctl_arena_refresh(tsdn_t *tsdn, arena_t *arena, unsigned i)
 {
 	ctl_arena_stats_t *astats = &ctl_stats.arenas[i];
 	ctl_arena_stats_t *sstats = &ctl_stats.arenas[ctl_stats.narenas];
 
 	ctl_arena_clear(astats);
-	ctl_arena_stats_amerge(tsd, astats, arena);
+	ctl_arena_stats_amerge(tsdn, astats, arena);
 	/* Merge into sum stats as well. */
 	ctl_arena_stats_smerge(sstats, astats);
 }
 
 static bool
-ctl_grow(tsd_t *tsd)
+ctl_grow(tsdn_t *tsdn)
 {
 	ctl_arena_stats_t *astats;
 
 	/* Initialize new arena. */
-	if (arena_init(tsd, ctl_stats.narenas) == NULL)
+	if (arena_init(tsdn, ctl_stats.narenas) == NULL)
 		return (true);
 
 	/* Allocate extended arena stats. */
@@ -708,7 +708,7 @@ ctl_grow(tsd_t *tsd)
 }
 
 static void
-ctl_refresh(tsd_t *tsd)
+ctl_refresh(tsdn_t *tsdn)
 {
 	unsigned i;
 	VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas);
@@ -720,19 +720,19 @@ ctl_refresh(tsd_t *tsd)
 	ctl_arena_clear(&ctl_stats.arenas[ctl_stats.narenas]);
 
 	for (i = 0; i < ctl_stats.narenas; i++)
-		tarenas[i] = arena_get(tsd, i, false);
+		tarenas[i] = arena_get(tsdn, i, false);
 
 	for (i = 0; i < ctl_stats.narenas; i++) {
 		bool initialized = (tarenas[i] != NULL);
 
 		ctl_stats.arenas[i].initialized = initialized;
 		if (initialized)
-			ctl_arena_refresh(tsd, tarenas[i], i);
+			ctl_arena_refresh(tsdn, tarenas[i], i);
 	}
 
 	if (config_stats) {
 		size_t base_allocated, base_resident, base_mapped;
-		base_stats_get(tsd, &base_allocated, &base_resident,
+		base_stats_get(tsdn, &base_allocated, &base_resident,
 		    &base_mapped);
 		ctl_stats.allocated =
 		    ctl_stats.arenas[ctl_stats.narenas].allocated_small +
@@ -758,11 +758,11 @@ ctl_refresh(tsd_t *tsd)
 }
 
 static bool
-ctl_init(tsd_t *tsd)
+ctl_init(tsdn_t *tsdn)
 {
 	bool ret;
 
-	malloc_mutex_lock(tsd, &ctl_mtx);
+	malloc_mutex_lock(tsdn, &ctl_mtx);
 	if (!ctl_initialized) {
 		/*
 		 * Allocate space for one extra arena stats element, which
@@ -804,18 +804,18 @@ ctl_init(tsd_t *tsd)
 		ctl_stats.arenas[ctl_stats.narenas].initialized = true;
 
 		ctl_epoch = 0;
-		ctl_refresh(tsd);
+		ctl_refresh(tsdn);
 		ctl_initialized = true;
 	}
 
 	ret = false;
 label_return:
-	malloc_mutex_unlock(tsd, &ctl_mtx);
+	malloc_mutex_unlock(tsdn, &ctl_mtx);
 	return (ret);
 }
 
 static int
-ctl_lookup(tsd_t *tsd, const char *name, ctl_node_t const **nodesp,
+ctl_lookup(tsdn_t *tsdn, const char *name, ctl_node_t const **nodesp,
     size_t *mibp, size_t *depthp)
 {
 	int ret;
@@ -868,7 +868,7 @@ ctl_lookup(tsd_t *tsd, const char *name, ctl_node_t const **nodesp,
 			}
 
 			inode = ctl_indexed_node(node->children);
-			node = inode->index(tsd, mibp, *depthp, (size_t)index);
+			node = inode->index(tsdn, mibp, *depthp, (size_t)index);
 			if (node == NULL) {
 				ret = ENOENT;
 				goto label_return;
@@ -921,13 +921,13 @@ ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
 	size_t mib[CTL_MAX_DEPTH];
 	const ctl_named_node_t *node;
 
-	if (!ctl_initialized && ctl_init(tsd)) {
+	if (!ctl_initialized && ctl_init(tsd_tsdn(tsd))) {
 		ret = EAGAIN;
 		goto label_return;
 	}
 
 	depth = CTL_MAX_DEPTH;
-	ret = ctl_lookup(tsd, name, nodes, mib, &depth);
+	ret = ctl_lookup(tsd_tsdn(tsd), name, nodes, mib, &depth);
 	if (ret != 0)
 		goto label_return;
 
@@ -944,16 +944,16 @@ label_return:
 }
 
 int
-ctl_nametomib(tsd_t *tsd, const char *name, size_t *mibp, size_t *miblenp)
+ctl_nametomib(tsdn_t *tsdn, const char *name, size_t *mibp, size_t *miblenp)
 {
 	int ret;
 
-	if (!ctl_initialized && ctl_init(tsd)) {
+	if (!ctl_initialized && ctl_init(tsdn)) {
 		ret = EAGAIN;
 		goto label_return;
 	}
 
-	ret = ctl_lookup(tsd, name, NULL, mibp, miblenp);
+	ret = ctl_lookup(tsdn, name, NULL, mibp, miblenp);
 label_return:
 	return(ret);
 }
@@ -966,7 +966,7 @@ ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	const ctl_named_node_t *node;
 	size_t i;
 
-	if (!ctl_initialized && ctl_init(tsd)) {
+	if (!ctl_initialized && ctl_init(tsd_tsdn(tsd))) {
 		ret = EAGAIN;
 		goto label_return;
 	}
@@ -988,7 +988,7 @@ ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 			/* Indexed element. */
 			inode = ctl_indexed_node(node->children);
-			node = inode->index(tsd, mib, miblen, mib[i]);
+			node = inode->index(tsd_tsdn(tsd), mib, miblen, mib[i]);
 			if (node == NULL) {
 				ret = ENOENT;
 				goto label_return;
@@ -1021,24 +1021,24 @@ ctl_boot(void)
 }
 
 void
-ctl_prefork(tsd_t *tsd)
+ctl_prefork(tsdn_t *tsdn)
 {
 
-	malloc_mutex_prefork(tsd, &ctl_mtx);
+	malloc_mutex_prefork(tsdn, &ctl_mtx);
 }
 
 void
-ctl_postfork_parent(tsd_t *tsd)
+ctl_postfork_parent(tsdn_t *tsdn)
 {
 
-	malloc_mutex_postfork_parent(tsd, &ctl_mtx);
+	malloc_mutex_postfork_parent(tsdn, &ctl_mtx);
 }
 
 void
-ctl_postfork_child(tsd_t *tsd)
+ctl_postfork_child(tsdn_t *tsdn)
 {
 
-	malloc_mutex_postfork_child(tsd, &ctl_mtx);
+	malloc_mutex_postfork_child(tsdn, &ctl_mtx);
 }
 
 /******************************************************************************/
@@ -1104,7 +1104,7 @@ n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
 	if (!(c))							\
 		return (ENOENT);					\
 	if (l)								\
-		malloc_mutex_lock(tsd, &ctl_mtx);			\
+		malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);		\
 	READONLY();							\
 	oldval = (v);							\
 	READ(oldval, t);						\
@@ -1112,7 +1112,7 @@ n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
 	ret = 0;							\
 label_return:								\
 	if (l)								\
-		malloc_mutex_unlock(tsd, &ctl_mtx);			\
+		malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);		\
 	return (ret);							\
 }
 
@@ -1126,14 +1126,14 @@ n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
 									\
 	if (!(c))							\
 		return (ENOENT);					\
-	malloc_mutex_lock(tsd, &ctl_mtx);				\
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);			\
 	READONLY();							\
 	oldval = (v);							\
 	READ(oldval, t);						\
 									\
 	ret = 0;							\
 label_return:								\
-	malloc_mutex_unlock(tsd, &ctl_mtx);				\
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);			\
 	return (ret);							\
 }
 
@@ -1145,14 +1145,14 @@ n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
 	int ret;							\
 	t oldval;							\
 									\
-	malloc_mutex_lock(tsd, &ctl_mtx);				\
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);			\
 	READONLY();							\
 	oldval = (v);							\
 	READ(oldval, t);						\
 									\
 	ret = 0;							\
 label_return:								\
-	malloc_mutex_unlock(tsd, &ctl_mtx);				\
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);			\
 	return (ret);							\
 }
 
@@ -1243,15 +1243,15 @@ epoch_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	int ret;
 	UNUSED uint64_t newval;
 
-	malloc_mutex_lock(tsd, &ctl_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	WRITE(newval, uint64_t);
 	if (newp != NULL)
-		ctl_refresh(tsd);
+		ctl_refresh(tsd_tsdn(tsd));
 	READ(ctl_epoch, uint64_t);
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(tsd, &ctl_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	return (ret);
 }
 
@@ -1317,7 +1317,7 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	if (oldarena == NULL)
 		return (EAGAIN);
 
-	malloc_mutex_lock(tsd, &ctl_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	newind = oldind = oldarena->ind;
 	WRITE(newind, unsigned);
 	READ(oldind, unsigned);
@@ -1331,7 +1331,7 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 		}
 
 		/* Initialize arena if necessary. */
-		newarena = arena_get(tsd, newind, true);
+		newarena = arena_get(tsd_tsdn(tsd), newind, true);
 		if (newarena == NULL) {
 			ret = EAGAIN;
 			goto label_return;
@@ -1341,15 +1341,15 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 		if (config_tcache) {
 			tcache_t *tcache = tsd_tcache_get(tsd);
 			if (tcache != NULL) {
-				tcache_arena_reassociate(tsd, tcache, oldarena,
-				    newarena);
+				tcache_arena_reassociate(tsd_tsdn(tsd), tcache,
+				    oldarena, newarena);
 			}
 		}
 	}
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(tsd, &ctl_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	return (ret);
 }
 
@@ -1476,9 +1476,9 @@ tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	if (!config_tcache)
 		return (ENOENT);
 
-	malloc_mutex_lock(tsd, &ctl_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	READONLY();
-	if (tcaches_create(tsd, &tcache_ind)) {
+	if (tcaches_create(tsd_tsdn(tsd), &tcache_ind)) {
 		ret = EFAULT;
 		goto label_return;
 	}
@@ -1486,7 +1486,7 @@ tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(tsd, &ctl_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	return (ret);
 }
 
@@ -1541,10 +1541,10 @@ label_return:
 /******************************************************************************/
 
 static void
-arena_i_purge(tsd_t *tsd, unsigned arena_ind, bool all)
+arena_i_purge(tsdn_t *tsdn, unsigned arena_ind, bool all)
 {
 
-	malloc_mutex_lock(tsd, &ctl_mtx);
+	malloc_mutex_lock(tsdn, &ctl_mtx);
 	{
 		unsigned narenas = ctl_stats.narenas;
 
@@ -1553,30 +1553,30 @@ arena_i_purge(tsd_t *tsd, unsigned arena_ind, bool all)
 			VARIABLE_ARRAY(arena_t *, tarenas, narenas);
 
 			for (i = 0; i < narenas; i++)
-				tarenas[i] = arena_get(tsd, i, false);
+				tarenas[i] = arena_get(tsdn, i, false);
 
 			/*
 			 * No further need to hold ctl_mtx, since narenas and
 			 * tarenas contain everything needed below.
 			 */
-			malloc_mutex_unlock(tsd, &ctl_mtx);
+			malloc_mutex_unlock(tsdn, &ctl_mtx);
 
 			for (i = 0; i < narenas; i++) {
 				if (tarenas[i] != NULL)
-					arena_purge(tsd, tarenas[i], all);
+					arena_purge(tsdn, tarenas[i], all);
 			}
 		} else {
 			arena_t *tarena;
 
 			assert(arena_ind < narenas);
 
-			tarena = arena_get(tsd, arena_ind, false);
+			tarena = arena_get(tsdn, arena_ind, false);
 
 			/* No further need to hold ctl_mtx. */
-			malloc_mutex_unlock(tsd, &ctl_mtx);
+			malloc_mutex_unlock(tsdn, &ctl_mtx);
 
 			if (tarena != NULL)
-				arena_purge(tsd, tarena, all);
+				arena_purge(tsdn, tarena, all);
 		}
 	}
 }
@@ -1589,7 +1589,7 @@ arena_i_purge_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	READONLY();
 	WRITEONLY();
-	arena_i_purge(tsd, (unsigned)mib[1], true);
+	arena_i_purge(tsd_tsdn(tsd), (unsigned)mib[1], true);
 
 	ret = 0;
 label_return:
@@ -1604,7 +1604,7 @@ arena_i_decay_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	READONLY();
 	WRITEONLY();
-	arena_i_purge(tsd, (unsigned)mib[1], false);
+	arena_i_purge(tsd_tsdn(tsd), (unsigned)mib[1], false);
 
 	ret = 0;
 label_return:
@@ -1630,13 +1630,13 @@ arena_i_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	arena_ind = (unsigned)mib[1];
 	if (config_debug) {
-		malloc_mutex_lock(tsd, &ctl_mtx);
+		malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 		assert(arena_ind < ctl_stats.narenas);
-		malloc_mutex_unlock(tsd, &ctl_mtx);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	}
 	assert(arena_ind >= opt_narenas);
 
-	arena = arena_get(tsd, arena_ind, false);
+	arena = arena_get(tsd_tsdn(tsd), arena_ind, false);
 
 	arena_reset(tsd, arena);
 
@@ -1655,7 +1655,7 @@ arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	dss_prec_t dss_prec_old = dss_prec_limit;
 	dss_prec_t dss_prec = dss_prec_limit;
 
-	malloc_mutex_lock(tsd, &ctl_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	WRITE(dss, const char *);
 	if (dss != NULL) {
 		int i;
@@ -1676,20 +1676,20 @@ arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	}
 
 	if (arena_ind < ctl_stats.narenas) {
-		arena_t *arena = arena_get(tsd, arena_ind, false);
+		arena_t *arena = arena_get(tsd_tsdn(tsd), arena_ind, false);
 		if (arena == NULL || (dss_prec != dss_prec_limit &&
-		    arena_dss_prec_set(tsd, arena, dss_prec))) {
+		    arena_dss_prec_set(tsd_tsdn(tsd), arena, dss_prec))) {
 			ret = EFAULT;
 			goto label_return;
 		}
-		dss_prec_old = arena_dss_prec_get(tsd, arena);
+		dss_prec_old = arena_dss_prec_get(tsd_tsdn(tsd), arena);
 	} else {
 		if (dss_prec != dss_prec_limit &&
-		    chunk_dss_prec_set(tsd, dss_prec)) {
+		    chunk_dss_prec_set(tsd_tsdn(tsd), dss_prec)) {
 			ret = EFAULT;
 			goto label_return;
 		}
-		dss_prec_old = chunk_dss_prec_get(tsd);
+		dss_prec_old = chunk_dss_prec_get(tsd_tsdn(tsd));
 	}
 
 	dss = dss_prec_names[dss_prec_old];
@@ -1697,7 +1697,7 @@ arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(tsd, &ctl_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	return (ret);
 }
 
@@ -1709,14 +1709,14 @@ arena_i_lg_dirty_mult_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	unsigned arena_ind = (unsigned)mib[1];
 	arena_t *arena;
 
-	arena = arena_get(tsd, arena_ind, false);
+	arena = arena_get(tsd_tsdn(tsd), arena_ind, false);
 	if (arena == NULL) {
 		ret = EFAULT;
 		goto label_return;
 	}
 
 	if (oldp != NULL && oldlenp != NULL) {
-		size_t oldval = arena_lg_dirty_mult_get(tsd, arena);
+		size_t oldval = arena_lg_dirty_mult_get(tsd_tsdn(tsd), arena);
 		READ(oldval, ssize_t);
 	}
 	if (newp != NULL) {
@@ -1724,7 +1724,8 @@ arena_i_lg_dirty_mult_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 			ret = EINVAL;
 			goto label_return;
 		}
-		if (arena_lg_dirty_mult_set(tsd, arena, *(ssize_t *)newp)) {
+		if (arena_lg_dirty_mult_set(tsd_tsdn(tsd), arena,
+		    *(ssize_t *)newp)) {
 			ret = EFAULT;
 			goto label_return;
 		}
@@ -1743,14 +1744,14 @@ arena_i_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	unsigned arena_ind = (unsigned)mib[1];
 	arena_t *arena;
 
-	arena = arena_get(tsd, arena_ind, false);
+	arena = arena_get(tsd_tsdn(tsd), arena_ind, false);
 	if (arena == NULL) {
 		ret = EFAULT;
 		goto label_return;
 	}
 
 	if (oldp != NULL && oldlenp != NULL) {
-		size_t oldval = arena_decay_time_get(tsd, arena);
+		size_t oldval = arena_decay_time_get(tsd_tsdn(tsd), arena);
 		READ(oldval, ssize_t);
 	}
 	if (newp != NULL) {
@@ -1758,7 +1759,8 @@ arena_i_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 			ret = EINVAL;
 			goto label_return;
 		}
-		if (arena_decay_time_set(tsd, arena, *(ssize_t *)newp)) {
+		if (arena_decay_time_set(tsd_tsdn(tsd), arena,
+		    *(ssize_t *)newp)) {
 			ret = EFAULT;
 			goto label_return;
 		}
@@ -1777,18 +1779,18 @@ arena_i_chunk_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	unsigned arena_ind = (unsigned)mib[1];
 	arena_t *arena;
 
-	malloc_mutex_lock(tsd, &ctl_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	if (arena_ind < narenas_total_get() && (arena =
-	    arena_get(tsd, arena_ind, false)) != NULL) {
+	    arena_get(tsd_tsdn(tsd), arena_ind, false)) != NULL) {
 		if (newp != NULL) {
 			chunk_hooks_t old_chunk_hooks, new_chunk_hooks;
 			WRITE(new_chunk_hooks, chunk_hooks_t);
-			old_chunk_hooks = chunk_hooks_set(tsd, arena,
+			old_chunk_hooks = chunk_hooks_set(tsd_tsdn(tsd), arena,
 			    &new_chunk_hooks);
 			READ(old_chunk_hooks, chunk_hooks_t);
 		} else {
-			chunk_hooks_t old_chunk_hooks = chunk_hooks_get(tsd,
-			    arena);
+			chunk_hooks_t old_chunk_hooks =
+			    chunk_hooks_get(tsd_tsdn(tsd), arena);
 			READ(old_chunk_hooks, chunk_hooks_t);
 		}
 	} else {
@@ -1797,16 +1799,16 @@ arena_i_chunk_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	}
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(tsd, &ctl_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	return (ret);
 }
 
 static const ctl_named_node_t *
-arena_i_index(tsd_t *tsd, const size_t *mib, size_t miblen, size_t i)
+arena_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
 {
 	const ctl_named_node_t *ret;
 
-	malloc_mutex_lock(tsd, &ctl_mtx);
+	malloc_mutex_lock(tsdn, &ctl_mtx);
 	if (i > ctl_stats.narenas) {
 		ret = NULL;
 		goto label_return;
@@ -1814,7 +1816,7 @@ arena_i_index(tsd_t *tsd, const size_t *mib, size_t miblen, size_t i)
 
 	ret = super_arena_i_node;
 label_return:
-	malloc_mutex_unlock(tsd, &ctl_mtx);
+	malloc_mutex_unlock(tsdn, &ctl_mtx);
 	return (ret);
 }
 
@@ -1827,7 +1829,7 @@ arenas_narenas_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	int ret;
 	unsigned narenas;
 
-	malloc_mutex_lock(tsd, &ctl_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	READONLY();
 	if (*oldlenp != sizeof(unsigned)) {
 		ret = EINVAL;
@@ -1838,7 +1840,7 @@ arenas_narenas_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(tsd, &ctl_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	return (ret);
 }
 
@@ -1849,7 +1851,7 @@ arenas_initialized_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	int ret;
 	unsigned nread, i;
 
-	malloc_mutex_lock(tsd, &ctl_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	READONLY();
 	if (*oldlenp != ctl_stats.narenas * sizeof(bool)) {
 		ret = EINVAL;
@@ -1864,7 +1866,7 @@ arenas_initialized_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 		((bool *)oldp)[i] = ctl_stats.arenas[i].initialized;
 
 label_return:
-	malloc_mutex_unlock(tsd, &ctl_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	return (ret);
 }
 
@@ -1929,7 +1931,7 @@ CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t)
 CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t)
 CTL_RO_NL_GEN(arenas_bin_i_run_size, arena_bin_info[mib[2]].run_size, size_t)
 static const ctl_named_node_t *
-arenas_bin_i_index(tsd_t *tsd, const size_t *mib, size_t miblen, size_t i)
+arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
 {
 
 	if (i > NBINS)
@@ -1940,7 +1942,7 @@ arenas_bin_i_index(tsd_t *tsd, const size_t *mib, size_t miblen, size_t i)
 CTL_RO_NL_GEN(arenas_nlruns, nlclasses, unsigned)
 CTL_RO_NL_GEN(arenas_lrun_i_size, index2size(NBINS+(szind_t)mib[2]), size_t)
 static const ctl_named_node_t *
-arenas_lrun_i_index(tsd_t *tsd, const size_t *mib, size_t miblen, size_t i)
+arenas_lrun_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
 {
 
 	if (i > nlclasses)
@@ -1952,7 +1954,7 @@ CTL_RO_NL_GEN(arenas_nhchunks, nhclasses, unsigned)
 CTL_RO_NL_GEN(arenas_hchunk_i_size, index2size(NBINS+nlclasses+(szind_t)mib[2]),
     size_t)
 static const ctl_named_node_t *
-arenas_hchunk_i_index(tsd_t *tsd, const size_t *mib, size_t miblen, size_t i)
+arenas_hchunk_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
 {
 
 	if (i > nhclasses)
@@ -1967,9 +1969,9 @@ arenas_extend_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	int ret;
 	unsigned narenas;
 
-	malloc_mutex_lock(tsd, &ctl_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	READONLY();
-	if (ctl_grow(tsd)) {
+	if (ctl_grow(tsd_tsdn(tsd))) {
 		ret = EAGAIN;
 		goto label_return;
 	}
@@ -1978,7 +1980,7 @@ arenas_extend_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(tsd, &ctl_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	return (ret);
 }
 
@@ -1999,9 +2001,10 @@ prof_thread_active_init_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 			ret = EINVAL;
 			goto label_return;
 		}
-		oldval = prof_thread_active_init_set(tsd, *(bool *)newp);
+		oldval = prof_thread_active_init_set(tsd_tsdn(tsd),
+		    *(bool *)newp);
 	} else
-		oldval = prof_thread_active_init_get(tsd);
+		oldval = prof_thread_active_init_get(tsd_tsdn(tsd));
 	READ(oldval, bool);
 
 	ret = 0;
@@ -2024,9 +2027,9 @@ prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 			ret = EINVAL;
 			goto label_return;
 		}
-		oldval = prof_active_set(tsd, *(bool *)newp);
+		oldval = prof_active_set(tsd_tsdn(tsd), *(bool *)newp);
 	} else
-		oldval = prof_active_get(tsd);
+		oldval = prof_active_get(tsd_tsdn(tsd));
 	READ(oldval, bool);
 
 	ret = 0;
@@ -2072,9 +2075,9 @@ prof_gdump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 			ret = EINVAL;
 			goto label_return;
 		}
-		oldval = prof_gdump_set(tsd, *(bool *)newp);
+		oldval = prof_gdump_set(tsd_tsdn(tsd), *(bool *)newp);
 	} else
-		oldval = prof_gdump_get(tsd);
+		oldval = prof_gdump_get(tsd_tsdn(tsd));
 	READ(oldval, bool);
 
 	ret = 0;
@@ -2097,7 +2100,7 @@ prof_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	if (lg_sample >= (sizeof(uint64_t) << 3))
 		lg_sample = (sizeof(uint64_t) << 3) - 1;
 
-	prof_reset(tsd, lg_sample);
+	prof_reset(tsd_tsdn(tsd), lg_sample);
 
 	ret = 0;
 label_return:
@@ -2185,7 +2188,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curruns,
     ctl_stats.arenas[mib[2]].bstats[mib[4]].curruns, size_t)
 
 static const ctl_named_node_t *
-stats_arenas_i_bins_j_index(tsd_t *tsd, const size_t *mib, size_t miblen,
+stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
     size_t j)
 {
 
@@ -2204,7 +2207,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_curruns,
     ctl_stats.arenas[mib[2]].lstats[mib[4]].curruns, size_t)
 
 static const ctl_named_node_t *
-stats_arenas_i_lruns_j_index(tsd_t *tsd, const size_t *mib, size_t miblen,
+stats_arenas_i_lruns_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
     size_t j)
 {
 
@@ -2224,7 +2227,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hchunks_j_curhchunks,
     ctl_stats.arenas[mib[2]].hstats[mib[4]].curhchunks, size_t)
 
 static const ctl_named_node_t *
-stats_arenas_i_hchunks_j_index(tsd_t *tsd, const size_t *mib, size_t miblen,
+stats_arenas_i_hchunks_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
     size_t j)
 {
 
@@ -2234,11 +2237,11 @@ stats_arenas_i_hchunks_j_index(tsd_t *tsd, const size_t *mib, size_t miblen,
 }
 
 static const ctl_named_node_t *
-stats_arenas_i_index(tsd_t *tsd, const size_t *mib, size_t miblen, size_t i)
+stats_arenas_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
 {
 	const ctl_named_node_t * ret;
 
-	malloc_mutex_lock(tsd, &ctl_mtx);
+	malloc_mutex_lock(tsdn, &ctl_mtx);
 	if (i > ctl_stats.narenas || !ctl_stats.arenas[i].initialized) {
 		ret = NULL;
 		goto label_return;
@@ -2246,6 +2249,6 @@ stats_arenas_i_index(tsd_t *tsd, const size_t *mib, size_t miblen, size_t i)
 
 	ret = super_stats_arenas_i_node;
 label_return:
-	malloc_mutex_unlock(tsd, &ctl_mtx);
+	malloc_mutex_unlock(tsdn, &ctl_mtx);
 	return (ret);
 }
diff --git a/src/huge.c b/src/huge.c
index 71fb50c5..0bf61622 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -15,12 +15,12 @@ huge_node_get(const void *ptr)
 }
 
 static bool
-huge_node_set(tsd_t *tsd, const void *ptr, extent_node_t *node)
+huge_node_set(tsdn_t *tsdn, const void *ptr, extent_node_t *node)
 {
 
 	assert(extent_node_addr_get(node) == ptr);
 	assert(!extent_node_achunk_get(node));
-	return (chunk_register(tsd, ptr, node));
+	return (chunk_register(tsdn, ptr, node));
 }
 
 static void
@@ -31,16 +31,16 @@ huge_node_unset(const void *ptr, const extent_node_t *node)
 }
 
 void *
-huge_malloc(tsd_t *tsd, arena_t *arena, size_t usize, bool zero)
+huge_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero)
 {
 
 	assert(usize == s2u(usize));
 
-	return (huge_palloc(tsd, arena, usize, chunksize, zero));
+	return (huge_palloc(tsdn, arena, usize, chunksize, zero));
 }
 
 void *
-huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
+huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
     bool zero)
 {
 	void *ret;
@@ -50,15 +50,17 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 
 	/* Allocate one or more contiguous chunks for this request. */
 
+	assert(!tsdn_null(tsdn) || arena != NULL);
+
 	ausize = sa2u(usize, alignment);
 	if (unlikely(ausize == 0 || ausize > HUGE_MAXCLASS))
 		return (NULL);
 	assert(ausize >= chunksize);
 
 	/* Allocate an extent node with which to track the chunk. */
-	assert(tsd != NULL || arena != NULL);
-	node = ipallocztm(tsd, CACHELINE_CEILING(sizeof(extent_node_t)),
-	    CACHELINE, false, NULL, true, arena_ichoose(tsd, arena));
+	assert(tsdn != NULL || arena != NULL);
+	node = ipallocztm(tsdn, CACHELINE_CEILING(sizeof(extent_node_t)),
+	    CACHELINE, false, NULL, true, arena_ichoose(tsdn, arena));
 	if (node == NULL)
 		return (NULL);
 
@@ -67,26 +69,26 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 	 * it is possible to make correct junk/zero fill decisions below.
 	 */
 	is_zeroed = zero;
-	arena = arena_choose(tsd, arena);
-	if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(tsd, arena,
-	    usize, alignment, &is_zeroed)) == NULL) {
-		idalloctm(tsd, node, NULL, true, true);
+	arena = arena_choose(tsdn_tsd(tsdn), arena);
+	if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(tsdn,
+	    arena, usize, alignment, &is_zeroed)) == NULL) {
+		idalloctm(tsdn, node, NULL, true, true);
 		return (NULL);
 	}
 
 	extent_node_init(node, arena, ret, usize, is_zeroed, true);
 
-	if (huge_node_set(tsd, ret, node)) {
-		arena_chunk_dalloc_huge(tsd, arena, ret, usize);
-		idalloctm(tsd, node, NULL, true, true);
+	if (huge_node_set(tsdn, ret, node)) {
+		arena_chunk_dalloc_huge(tsdn, arena, ret, usize);
+		idalloctm(tsdn, node, NULL, true, true);
 		return (NULL);
 	}
 
 	/* Insert node into huge. */
-	malloc_mutex_lock(tsd, &arena->huge_mtx);
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	ql_elm_new(node, ql_link);
 	ql_tail_insert(&arena->huge, node, ql_link);
-	malloc_mutex_unlock(tsd, &arena->huge_mtx);
+	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	if (zero || (config_fill && unlikely(opt_zero))) {
 		if (!is_zeroed)
@@ -94,7 +96,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 	} else if (config_fill && unlikely(opt_junk_alloc))
 		memset(ret, JEMALLOC_ALLOC_JUNK, usize);
 
-	arena_decay_tick(tsd, arena);
+	arena_decay_tick(tsdn, arena);
 	return (ret);
 }
 
@@ -103,7 +105,7 @@ huge_palloc(tsd_t *tsd, arena_t *arena, size_t usize, size_t alignment,
 #define	huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk_impl)
 #endif
 static void
-huge_dalloc_junk(tsd_t *tsd, void *ptr, size_t usize)
+huge_dalloc_junk(tsdn_t *tsdn, void *ptr, size_t usize)
 {
 
 	if (config_fill && have_dss && unlikely(opt_junk_free)) {
@@ -111,7 +113,7 @@ huge_dalloc_junk(tsd_t *tsd, void *ptr, size_t usize)
 		 * Only bother junk filling if the chunk isn't about to be
 		 * unmapped.
 		 */
-		if (!config_munmap || (have_dss && chunk_in_dss(tsd, ptr)))
+		if (!config_munmap || (have_dss && chunk_in_dss(tsdn, ptr)))
 			memset(ptr, JEMALLOC_FREE_JUNK, usize);
 	}
 }
@@ -122,7 +124,7 @@ huge_dalloc_junk_t *huge_dalloc_junk = JEMALLOC_N(huge_dalloc_junk_impl);
 #endif
 
 static void
-huge_ralloc_no_move_similar(tsd_t *tsd, void *ptr, size_t oldsize,
+huge_ralloc_no_move_similar(tsdn_t *tsdn, void *ptr, size_t oldsize,
     size_t usize_min, size_t usize_max, bool zero)
 {
 	size_t usize, usize_next;
@@ -151,22 +153,22 @@ huge_ralloc_no_move_similar(tsd_t *tsd, void *ptr, size_t oldsize,
 			    JEMALLOC_FREE_JUNK, sdiff);
 			post_zeroed = false;
 		} else {
-			post_zeroed = !chunk_purge_wrapper(tsd, arena,
+			post_zeroed = !chunk_purge_wrapper(tsdn, arena,
 			    &chunk_hooks, ptr, CHUNK_CEILING(oldsize), usize,
 			    sdiff);
 		}
 	} else
 		post_zeroed = pre_zeroed;
 
-	malloc_mutex_lock(tsd, &arena->huge_mtx);
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	/* Update the size of the huge allocation. */
 	assert(extent_node_size_get(node) != usize);
 	extent_node_size_set(node, usize);
 	/* Update zeroed. */
 	extent_node_zeroed_set(node, post_zeroed);
-	malloc_mutex_unlock(tsd, &arena->huge_mtx);
+	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
-	arena_chunk_ralloc_huge_similar(tsd, arena, ptr, oldsize, usize);
+	arena_chunk_ralloc_huge_similar(tsdn, arena, ptr, oldsize, usize);
 
 	/* Fill if necessary (growing). */
 	if (oldsize < usize) {
@@ -183,7 +185,8 @@ huge_ralloc_no_move_similar(tsd_t *tsd, void *ptr, size_t oldsize,
 }
 
 static bool
-huge_ralloc_no_move_shrink(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize)
+huge_ralloc_no_move_shrink(tsdn_t *tsdn, void *ptr, size_t oldsize,
+    size_t usize)
 {
 	extent_node_t *node;
 	arena_t *arena;
@@ -194,7 +197,7 @@ huge_ralloc_no_move_shrink(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize)
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
 	pre_zeroed = extent_node_zeroed_get(node);
-	chunk_hooks = chunk_hooks_get(tsd, arena);
+	chunk_hooks = chunk_hooks_get(tsdn, arena);
 
 	assert(oldsize > usize);
 
@@ -207,11 +210,11 @@ huge_ralloc_no_move_shrink(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize)
 	if (oldsize > usize) {
 		size_t sdiff = oldsize - usize;
 		if (config_fill && unlikely(opt_junk_free)) {
-			huge_dalloc_junk(tsd, (void *)((uintptr_t)ptr + usize),
+			huge_dalloc_junk(tsdn, (void *)((uintptr_t)ptr + usize),
 			    sdiff);
 			post_zeroed = false;
 		} else {
-			post_zeroed = !chunk_purge_wrapper(tsd, arena,
+			post_zeroed = !chunk_purge_wrapper(tsdn, arena,
 			    &chunk_hooks, CHUNK_ADDR2BASE((uintptr_t)ptr +
 			    usize), CHUNK_CEILING(oldsize),
 			    CHUNK_ADDR2OFFSET((uintptr_t)ptr + usize), sdiff);
@@ -219,31 +222,31 @@ huge_ralloc_no_move_shrink(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize)
 	} else
 		post_zeroed = pre_zeroed;
 
-	malloc_mutex_lock(tsd, &arena->huge_mtx);
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	/* Update the size of the huge allocation. */
 	extent_node_size_set(node, usize);
 	/* Update zeroed. */
 	extent_node_zeroed_set(node, post_zeroed);
-	malloc_mutex_unlock(tsd, &arena->huge_mtx);
+	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	/* Zap the excess chunks. */
-	arena_chunk_ralloc_huge_shrink(tsd, arena, ptr, oldsize, usize);
+	arena_chunk_ralloc_huge_shrink(tsdn, arena, ptr, oldsize, usize);
 
 	return (false);
 }
 
 static bool
-huge_ralloc_no_move_expand(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize,
-    bool zero) {
+huge_ralloc_no_move_expand(tsdn_t *tsdn, void *ptr, size_t oldsize,
+    size_t usize, bool zero) {
 	extent_node_t *node;
 	arena_t *arena;
 	bool is_zeroed_subchunk, is_zeroed_chunk;
 
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
-	malloc_mutex_lock(tsd, &arena->huge_mtx);
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	is_zeroed_subchunk = extent_node_zeroed_get(node);
-	malloc_mutex_unlock(tsd, &arena->huge_mtx);
+	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	/*
 	 * Copy zero into is_zeroed_chunk and pass the copy to chunk_alloc(), so
@@ -251,14 +254,14 @@ huge_ralloc_no_move_expand(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize,
 	 */
 	is_zeroed_chunk = zero;
 
-	if (arena_chunk_ralloc_huge_expand(tsd, arena, ptr, oldsize, usize,
+	if (arena_chunk_ralloc_huge_expand(tsdn, arena, ptr, oldsize, usize,
 	     &is_zeroed_chunk))
 		return (true);
 
-	malloc_mutex_lock(tsd, &arena->huge_mtx);
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	/* Update the size of the huge allocation. */
 	extent_node_size_set(node, usize);
-	malloc_mutex_unlock(tsd, &arena->huge_mtx);
+	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	if (zero || (config_fill && unlikely(opt_zero))) {
 		if (!is_zeroed_subchunk) {
@@ -279,7 +282,7 @@ huge_ralloc_no_move_expand(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize,
 }
 
 bool
-huge_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize_min,
+huge_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t usize_min,
     size_t usize_max, bool zero)
 {
 
@@ -293,16 +296,16 @@ huge_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize_min,
 
 	if (CHUNK_CEILING(usize_max) > CHUNK_CEILING(oldsize)) {
 		/* Attempt to expand the allocation in-place. */
-		if (!huge_ralloc_no_move_expand(tsd, ptr, oldsize, usize_max,
+		if (!huge_ralloc_no_move_expand(tsdn, ptr, oldsize, usize_max,
 		    zero)) {
-			arena_decay_tick(tsd, huge_aalloc(ptr));
+			arena_decay_tick(tsdn, huge_aalloc(ptr));
 			return (false);
 		}
 		/* Try again, this time with usize_min. */
 		if (usize_min < usize_max && CHUNK_CEILING(usize_min) >
-		    CHUNK_CEILING(oldsize) && huge_ralloc_no_move_expand(tsd,
+		    CHUNK_CEILING(oldsize) && huge_ralloc_no_move_expand(tsdn,
 		    ptr, oldsize, usize_min, zero)) {
-			arena_decay_tick(tsd, huge_aalloc(ptr));
+			arena_decay_tick(tsdn, huge_aalloc(ptr));
 			return (false);
 		}
 	}
@@ -313,16 +316,17 @@ huge_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize_min,
 	 */
 	if (CHUNK_CEILING(oldsize) >= CHUNK_CEILING(usize_min)
 	    && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(usize_max)) {
-		huge_ralloc_no_move_similar(tsd, ptr, oldsize, usize_min,
+		huge_ralloc_no_move_similar(tsdn, ptr, oldsize, usize_min,
 		    usize_max, zero);
-		arena_decay_tick(tsd, huge_aalloc(ptr));
+		arena_decay_tick(tsdn, huge_aalloc(ptr));
 		return (false);
 	}
 
 	/* Attempt to shrink the allocation in-place. */
 	if (CHUNK_CEILING(oldsize) > CHUNK_CEILING(usize_max)) {
-		if (!huge_ralloc_no_move_shrink(tsd, ptr, oldsize, usize_max)) {
-			arena_decay_tick(tsd, huge_aalloc(ptr));
+		if (!huge_ralloc_no_move_shrink(tsdn, ptr, oldsize,
+		    usize_max)) {
+			arena_decay_tick(tsdn, huge_aalloc(ptr));
 			return (false);
 		}
 	}
@@ -330,18 +334,18 @@ huge_ralloc_no_move(tsd_t *tsd, void *ptr, size_t oldsize, size_t usize_min,
 }
 
 static void *
-huge_ralloc_move_helper(tsd_t *tsd, arena_t *arena, size_t usize,
+huge_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool zero)
 {
 
 	if (alignment <= chunksize)
-		return (huge_malloc(tsd, arena, usize, zero));
-	return (huge_palloc(tsd, arena, usize, alignment, zero));
+		return (huge_malloc(tsdn, arena, usize, zero));
+	return (huge_palloc(tsdn, arena, usize, alignment, zero));
 }
 
 void *
-huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize,
-    size_t alignment, bool zero, tcache_t *tcache)
+huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
+    size_t usize, size_t alignment, bool zero, tcache_t *tcache)
 {
 	void *ret;
 	size_t copysize;
@@ -350,7 +354,8 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize,
 	assert(usize > 0 && usize <= HUGE_MAXCLASS);
 
 	/* Try to avoid moving the allocation. */
-	if (!huge_ralloc_no_move(tsd, ptr, oldsize, usize, usize, zero))
+	if (!huge_ralloc_no_move(tsd_tsdn(tsd), ptr, oldsize, usize, usize,
+	    zero))
 		return (ptr);
 
 	/*
@@ -358,7 +363,8 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize,
 	 * different size class.  In that case, fall back to allocating new
 	 * space and copying.
 	 */
-	ret = huge_ralloc_move_helper(tsd, arena, usize, alignment, zero);
+	ret = huge_ralloc_move_helper(tsd_tsdn(tsd), arena, usize, alignment,
+	    zero);
 	if (ret == NULL)
 		return (NULL);
 
@@ -369,7 +375,7 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t usize,
 }
 
 void
-huge_dalloc(tsd_t *tsd, void *ptr)
+huge_dalloc(tsdn_t *tsdn, void *ptr)
 {
 	extent_node_t *node;
 	arena_t *arena;
@@ -377,17 +383,17 @@ huge_dalloc(tsd_t *tsd, void *ptr)
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
 	huge_node_unset(ptr, node);
-	malloc_mutex_lock(tsd, &arena->huge_mtx);
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	ql_remove(&arena->huge, node, ql_link);
-	malloc_mutex_unlock(tsd, &arena->huge_mtx);
+	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
-	huge_dalloc_junk(tsd, extent_node_addr_get(node),
+	huge_dalloc_junk(tsdn, extent_node_addr_get(node),
 	    extent_node_size_get(node));
-	arena_chunk_dalloc_huge(tsd, extent_node_arena_get(node),
+	arena_chunk_dalloc_huge(tsdn, extent_node_arena_get(node),
 	    extent_node_addr_get(node), extent_node_size_get(node));
-	idalloctm(tsd, node, NULL, true, true);
+	idalloctm(tsdn, node, NULL, true, true);
 
-	arena_decay_tick(tsd, arena);
+	arena_decay_tick(tsdn, arena);
 }
 
 arena_t *
@@ -398,7 +404,7 @@ huge_aalloc(const void *ptr)
 }
 
 size_t
-huge_salloc(tsd_t *tsd, const void *ptr)
+huge_salloc(tsdn_t *tsdn, const void *ptr)
 {
 	size_t size;
 	extent_node_t *node;
@@ -406,15 +412,15 @@ huge_salloc(tsd_t *tsd, const void *ptr)
 
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
-	malloc_mutex_lock(tsd, &arena->huge_mtx);
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	size = extent_node_size_get(node);
-	malloc_mutex_unlock(tsd, &arena->huge_mtx);
+	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	return (size);
 }
 
 prof_tctx_t *
-huge_prof_tctx_get(tsd_t *tsd, const void *ptr)
+huge_prof_tctx_get(tsdn_t *tsdn, const void *ptr)
 {
 	prof_tctx_t *tctx;
 	extent_node_t *node;
@@ -422,29 +428,29 @@ huge_prof_tctx_get(tsd_t *tsd, const void *ptr)
 
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
-	malloc_mutex_lock(tsd, &arena->huge_mtx);
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	tctx = extent_node_prof_tctx_get(node);
-	malloc_mutex_unlock(tsd, &arena->huge_mtx);
+	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	return (tctx);
 }
 
 void
-huge_prof_tctx_set(tsd_t *tsd, const void *ptr, prof_tctx_t *tctx)
+huge_prof_tctx_set(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx)
 {
 	extent_node_t *node;
 	arena_t *arena;
 
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
-	malloc_mutex_lock(tsd, &arena->huge_mtx);
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	extent_node_prof_tctx_set(node, tctx);
-	malloc_mutex_unlock(tsd, &arena->huge_mtx);
+	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 }
 
 void
-huge_prof_tctx_reset(tsd_t *tsd, const void *ptr)
+huge_prof_tctx_reset(tsdn_t *tsdn, const void *ptr)
 {
 
-	huge_prof_tctx_set(tsd, ptr, (prof_tctx_t *)(uintptr_t)1U);
+	huge_prof_tctx_set(tsdn, ptr, (prof_tctx_t *)(uintptr_t)1U);
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index b1d691ed..40eb2eaa 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -318,15 +318,15 @@ a0ialloc(size_t size, bool zero, bool is_metadata)
 	if (unlikely(malloc_init_a0()))
 		return (NULL);
 
-	return (iallocztm(NULL, size, size2index(size), zero, NULL, is_metadata,
-	    arena_get(NULL, 0, true), true));
+	return (iallocztm(TSDN_NULL, size, size2index(size), zero, NULL,
+	    is_metadata, arena_get(TSDN_NULL, 0, true), true));
 }
 
 static void
 a0idalloc(void *ptr, bool is_metadata)
 {
 
-	idalloctm(NULL, ptr, false, is_metadata, true);
+	idalloctm(TSDN_NULL, ptr, false, is_metadata, true);
 }
 
 void *
@@ -413,7 +413,7 @@ narenas_total_get(void)
 
 /* Create a new arena and insert it into the arenas array at index ind. */
 static arena_t *
-arena_init_locked(tsd_t *tsd, unsigned ind)
+arena_init_locked(tsdn_t *tsdn, unsigned ind)
 {
 	arena_t *arena;
 
@@ -427,26 +427,26 @@ arena_init_locked(tsd_t *tsd, unsigned ind)
 	 * Another thread may have already initialized arenas[ind] if it's an
 	 * auto arena.
 	 */
-	arena = arena_get(tsd, ind, false);
+	arena = arena_get(tsdn, ind, false);
 	if (arena != NULL) {
 		assert(ind < narenas_auto);
 		return (arena);
 	}
 
 	/* Actually initialize the arena. */
-	arena = arena_new(tsd, ind);
+	arena = arena_new(tsdn, ind);
 	arena_set(ind, arena);
 	return (arena);
 }
 
 arena_t *
-arena_init(tsd_t *tsd, unsigned ind)
+arena_init(tsdn_t *tsdn, unsigned ind)
 {
 	arena_t *arena;
 
-	malloc_mutex_lock(tsd, &arenas_lock);
-	arena = arena_init_locked(tsd, ind);
-	malloc_mutex_unlock(tsd, &arenas_lock);
+	malloc_mutex_lock(tsdn, &arenas_lock);
+	arena = arena_init_locked(tsdn, ind);
+	malloc_mutex_unlock(tsdn, &arenas_lock);
 	return (arena);
 }
 
@@ -455,7 +455,7 @@ arena_bind(tsd_t *tsd, unsigned ind, bool internal)
 {
 	arena_t *arena;
 
-	arena = arena_get(tsd, ind, false);
+	arena = arena_get(tsd_tsdn(tsd), ind, false);
 	arena_nthreads_inc(arena, internal);
 
 	if (tsd_nominal(tsd)) {
@@ -471,8 +471,8 @@ arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind)
 {
 	arena_t *oldarena, *newarena;
 
-	oldarena = arena_get(tsd, oldind, false);
-	newarena = arena_get(tsd, newind, false);
+	oldarena = arena_get(tsd_tsdn(tsd), oldind, false);
+	newarena = arena_get(tsd_tsdn(tsd), newind, false);
 	arena_nthreads_dec(oldarena, false);
 	arena_nthreads_inc(newarena, false);
 	tsd_arena_set(tsd, newarena);
@@ -483,7 +483,7 @@ arena_unbind(tsd_t *tsd, unsigned ind, bool internal)
 {
 	arena_t *arena;
 
-	arena = arena_get(tsd, ind, false);
+	arena = arena_get(tsd_tsdn(tsd), ind, false);
 	arena_nthreads_dec(arena, internal);
 	if (internal)
 		tsd_iarena_set(tsd, NULL);
@@ -588,19 +588,20 @@ arena_choose_hard(tsd_t *tsd, bool internal)
 			choose[j] = 0;
 
 		first_null = narenas_auto;
-		malloc_mutex_lock(tsd, &arenas_lock);
-		assert(arena_get(tsd, 0, false) != NULL);
+		malloc_mutex_lock(tsd_tsdn(tsd), &arenas_lock);
+		assert(arena_get(tsd_tsdn(tsd), 0, false) != NULL);
 		for (i = 1; i < narenas_auto; i++) {
-			if (arena_get(tsd, i, false) != NULL) {
+			if (arena_get(tsd_tsdn(tsd), i, false) != NULL) {
 				/*
 				 * Choose the first arena that has the lowest
 				 * number of threads assigned to it.
 				 */
 				for (j = 0; j < 2; j++) {
-					if (arena_nthreads_get(arena_get(tsd, i,
-					    false), !!j) <
-					    arena_nthreads_get(arena_get(tsd,
-					    choose[j], false), !!j))
+					if (arena_nthreads_get(arena_get(
+					    tsd_tsdn(tsd), i, false), !!j) <
+					    arena_nthreads_get(arena_get(
+					    tsd_tsdn(tsd), choose[j], false),
+					    !!j))
 						choose[j] = i;
 				}
 			} else if (first_null == narenas_auto) {
@@ -618,22 +619,27 @@ arena_choose_hard(tsd_t *tsd, bool internal)
 		}
 
 		for (j = 0; j < 2; j++) {
-			if (arena_nthreads_get(arena_get(tsd, choose[j], false),
-			    !!j) == 0 || first_null == narenas_auto) {
+			if (arena_nthreads_get(arena_get(tsd_tsdn(tsd),
+			    choose[j], false), !!j) == 0 || first_null ==
+			    narenas_auto) {
 				/*
 				 * Use an unloaded arena, or the least loaded
 				 * arena if all arenas are already initialized.
 				 */
-				if (!!j == internal)
-					ret = arena_get(tsd, choose[j], false);
+				if (!!j == internal) {
+					ret = arena_get(tsd_tsdn(tsd),
+					    choose[j], false);
+				}
 			} else {
 				arena_t *arena;
 
 				/* Initialize a new arena. */
 				choose[j] = first_null;
-				arena = arena_init_locked(tsd, choose[j]);
+				arena = arena_init_locked(tsd_tsdn(tsd),
+				    choose[j]);
 				if (arena == NULL) {
-					malloc_mutex_unlock(tsd, &arenas_lock);
+					malloc_mutex_unlock(tsd_tsdn(tsd),
+					    &arenas_lock);
 					return (NULL);
 				}
 				if (!!j == internal)
@@ -641,9 +647,9 @@ arena_choose_hard(tsd_t *tsd, bool internal)
 			}
 			arena_bind(tsd, choose[j], !!j);
 		}
-		malloc_mutex_unlock(tsd, &arenas_lock);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &arenas_lock);
 	} else {
-		ret = arena_get(tsd, 0, false);
+		ret = arena_get(tsd_tsdn(tsd), 0, false);
 		arena_bind(tsd, 0, false);
 		arena_bind(tsd, 0, true);
 	}
@@ -719,10 +725,10 @@ stats_print_atexit(void)
 {
 
 	if (config_tcache && config_stats) {
-		tsd_t *tsd;
+		tsdn_t *tsdn;
 		unsigned narenas, i;
 
-		tsd = tsd_fetch();
+		tsdn = tsdn_fetch();
 
 		/*
 		 * Merge stats from extant threads.  This is racy, since
@@ -732,7 +738,7 @@ stats_print_atexit(void)
 		 * continue to allocate.
 		 */
 		for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
-			arena_t *arena = arena_get(tsd, i, false);
+			arena_t *arena = arena_get(tsdn, i, false);
 			if (arena != NULL) {
 				tcache_t *tcache;
 
@@ -742,11 +748,11 @@ stats_print_atexit(void)
 				 * and bin locks in the opposite order,
 				 * deadlocks may result.
 				 */
-				malloc_mutex_lock(tsd, &arena->lock);
+				malloc_mutex_lock(tsdn, &arena->lock);
 				ql_foreach(tcache, &arena->tcache_ql, link) {
-					tcache_stats_merge(tsd, tcache, arena);
+					tcache_stats_merge(tsdn, tcache, arena);
 				}
-				malloc_mutex_unlock(tsd, &arena->lock);
+				malloc_mutex_unlock(tsdn, &arena->lock);
 			}
 		}
 	}
@@ -1256,7 +1262,7 @@ malloc_init_hard_needed(void)
 }
 
 static bool
-malloc_init_hard_a0_locked(tsd_t **tsd)
+malloc_init_hard_a0_locked()
 {
 
 	malloc_initializer = INITIALIZER;
@@ -1283,7 +1289,7 @@ malloc_init_hard_a0_locked(tsd_t **tsd)
 		prof_boot1();
 	if (arena_boot())
 		return (true);
-	if (config_tcache && tcache_boot(*tsd))
+	if (config_tcache && tcache_boot(TSDN_NULL))
 		return (true);
 	if (malloc_mutex_init(&arenas_lock, "arenas", WITNESS_RANK_ARENAS))
 		return (true);
@@ -1299,15 +1305,7 @@ malloc_init_hard_a0_locked(tsd_t **tsd)
 	 * Initialize one arena here.  The rest are lazily created in
 	 * arena_choose_hard().
 	 */
-	if (arena_init(*tsd, 0) == NULL)
-		return (true);
-
-	/*
-	 * Initialize tsd, since some code paths cause chunk allocation, which
-	 * in turn depends on tsd.
-	 */
-	*tsd = malloc_tsd_boot0();
-	if (*tsd == NULL)
+	if (arena_init(TSDN_NULL, 0) == NULL)
 		return (true);
 
 	malloc_init_state = malloc_init_a0_initialized;
@@ -1319,21 +1317,19 @@ static bool
 malloc_init_hard_a0(void)
 {
 	bool ret;
-	tsd_t *tsd = NULL;
 
-	malloc_mutex_lock(tsd, &init_lock);
-	ret = malloc_init_hard_a0_locked(&tsd);
-	malloc_mutex_unlock(tsd, &init_lock);
+	malloc_mutex_lock(TSDN_NULL, &init_lock);
+	ret = malloc_init_hard_a0_locked();
+	malloc_mutex_unlock(TSDN_NULL, &init_lock);
 	return (ret);
 }
 
 /* Initialize data structures which may trigger recursive allocation. */
 static bool
-malloc_init_hard_recursible(tsd_t *tsd)
+malloc_init_hard_recursible(void)
 {
 
 	malloc_init_state = malloc_init_recursible;
-	malloc_mutex_unlock(tsd, &init_lock);
 
 	ncpus = malloc_ncpus();
 
@@ -1345,17 +1341,15 @@ malloc_init_hard_recursible(tsd_t *tsd)
 		malloc_write("<jemalloc>: Error in pthread_atfork()\n");
 		if (opt_abort)
 			abort();
-		malloc_mutex_lock(tsd, &init_lock);
 		return (true);
 	}
 #endif
 
-	malloc_mutex_lock(tsd, &init_lock);
 	return (false);
 }
 
 static bool
-malloc_init_hard_finish(tsd_t *tsd)
+malloc_init_hard_finish(tsdn_t *tsdn)
 {
 
 	if (malloc_mutex_boot())
@@ -1383,7 +1377,7 @@ malloc_init_hard_finish(tsd_t *tsd)
 	narenas_total_set(narenas_auto);
 
 	/* Allocate and initialize arenas. */
-	arenas = (arena_t **)base_alloc(tsd, sizeof(arena_t *) *
+	arenas = (arena_t **)base_alloc(tsdn, sizeof(arena_t *) *
 	    (MALLOCX_ARENA_MAX+1));
 	if (arenas == NULL)
 		return (true);
@@ -1399,39 +1393,43 @@ malloc_init_hard_finish(tsd_t *tsd)
 static bool
 malloc_init_hard(void)
 {
-	tsd_t *tsd = NULL;
+	tsd_t *tsd;
 
 #if defined(_WIN32) && _WIN32_WINNT < 0x0600
 	_init_init_lock();
 #endif
-	malloc_mutex_lock(tsd, &init_lock);
+	malloc_mutex_lock(TSDN_NULL, &init_lock);
 	if (!malloc_init_hard_needed()) {
-		malloc_mutex_unlock(tsd, &init_lock);
+		malloc_mutex_unlock(TSDN_NULL, &init_lock);
 		return (false);
 	}
 
 	if (malloc_init_state != malloc_init_a0_initialized &&
-	    malloc_init_hard_a0_locked(&tsd)) {
-		malloc_mutex_unlock(tsd, &init_lock);
+	    malloc_init_hard_a0_locked()) {
+		malloc_mutex_unlock(TSDN_NULL, &init_lock);
 		return (true);
 	}
 
-	if (malloc_init_hard_recursible(tsd)) {
-		malloc_mutex_unlock(tsd, &init_lock);
+	malloc_mutex_unlock(TSDN_NULL, &init_lock);
+	/* Recursive allocation relies on functional tsd. */
+	tsd = malloc_tsd_boot0();
+	if (tsd == NULL)
+		return (true);
+	if (malloc_init_hard_recursible())
+		return (true);
+	malloc_mutex_lock(tsd_tsdn(tsd), &init_lock);
+
+	if (config_prof && prof_boot2(tsd_tsdn(tsd))) {
+		malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
 		return (true);
 	}
 
-	if (config_prof && prof_boot2(tsd)) {
-		malloc_mutex_unlock(tsd, &init_lock);
+	if (malloc_init_hard_finish(tsd_tsdn(tsd))) {
+		malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
 		return (true);
 	}
 
-	if (malloc_init_hard_finish(tsd)) {
-		malloc_mutex_unlock(tsd, &init_lock);
-		return (true);
-	}
-
-	malloc_mutex_unlock(tsd, &init_lock);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
 	malloc_tsd_boot1();
 	return (false);
 }
@@ -1457,7 +1455,7 @@ ialloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind, bool zero,
 		p = ialloc(tsd, LARGE_MINCLASS, ind_large, zero, slow_path);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(tsd, p, usize);
+		arena_prof_promoted(tsd_tsdn(tsd), p, usize);
 	} else
 		p = ialloc(tsd, usize, ind, zero, slow_path);
 
@@ -1479,7 +1477,7 @@ ialloc_prof(tsd_t *tsd, size_t usize, szind_t ind, bool zero, bool slow_path)
 		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
 	}
-	prof_malloc(tsd, p, usize, tctx);
+	prof_malloc(tsd_tsdn(tsd), p, usize, tctx);
 
 	return (p);
 }
@@ -1487,19 +1485,24 @@ ialloc_prof(tsd_t *tsd, size_t usize, szind_t ind, bool zero, bool slow_path)
 /*
  * ialloc_body() is inlined so that fast and slow paths are generated separately
  * with statically known slow_path.
+ *
+ * This function guarantees that *tsdn is non-NULL on success.
  */
 JEMALLOC_ALWAYS_INLINE_C void *
-ialloc_body(size_t size, bool zero, tsd_t **tsd, size_t *usize, bool slow_path)
+ialloc_body(size_t size, bool zero, tsdn_t **tsdn, size_t *usize,
+    bool slow_path)
 {
+	tsd_t *tsd;
 	szind_t ind;
 
 	if (slow_path && unlikely(malloc_init())) {
-		*tsd = NULL;
+		*tsdn = NULL;
 		return (NULL);
 	}
 
-	*tsd = tsd_fetch();
-	witness_assert_lockless(*tsd);
+	tsd = tsd_fetch();
+	*tsdn = tsd_tsdn(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
 
 	ind = size2index(size);
 	if (unlikely(ind >= NSIZES))
@@ -1512,16 +1515,18 @@ ialloc_body(size_t size, bool zero, tsd_t **tsd, size_t *usize, bool slow_path)
 	}
 
 	if (config_prof && opt_prof)
-		return (ialloc_prof(*tsd, *usize, ind, zero, slow_path));
+		return (ialloc_prof(tsd, *usize, ind, zero, slow_path));
 
-	return (ialloc(*tsd, size, ind, zero, slow_path));
+	return (ialloc(tsd, size, ind, zero, slow_path));
 }
 
 JEMALLOC_ALWAYS_INLINE_C void
-ialloc_post_check(void *ret, tsd_t *tsd, size_t usize, const char *func,
+ialloc_post_check(void *ret, tsdn_t *tsdn, size_t usize, const char *func,
     bool update_errno, bool slow_path)
 {
 
+	assert(!tsdn_null(tsdn) || ret == NULL);
+
 	if (unlikely(ret == NULL)) {
 		if (slow_path && config_xmalloc && unlikely(opt_xmalloc)) {
 			malloc_printf("<jemalloc>: Error in %s(): out of "
@@ -1532,10 +1537,10 @@ ialloc_post_check(void *ret, tsd_t *tsd, size_t usize, const char *func,
 			set_errno(ENOMEM);
 	}
 	if (config_stats && likely(ret != NULL)) {
-		assert(usize == isalloc(tsd, ret, config_prof));
-		*tsd_thread_allocatedp_get(tsd) += usize;
+		assert(usize == isalloc(tsdn, ret, config_prof));
+		*tsd_thread_allocatedp_get(tsdn_tsd(tsdn)) += usize;
 	}
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 }
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
@@ -1544,20 +1549,20 @@ JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)
 je_malloc(size_t size)
 {
 	void *ret;
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 
 	if (size == 0)
 		size = 1;
 
 	if (likely(!malloc_slow)) {
-		ret = ialloc_body(size, false, &tsd, &usize, false);
-		ialloc_post_check(ret, tsd, usize, "malloc", true, false);
+		ret = ialloc_body(size, false, &tsdn, &usize, false);
+		ialloc_post_check(ret, tsdn, usize, "malloc", true, false);
 	} else {
-		ret = ialloc_body(size, false, &tsd, &usize, true);
-		ialloc_post_check(ret, tsd, usize, "malloc", true, true);
+		ret = ialloc_body(size, false, &tsdn, &usize, true);
+		ialloc_post_check(ret, tsdn, usize, "malloc", true, true);
 		UTRACE(0, size, ret);
-		JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsd, ret, usize, false);
+		JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsdn, ret, usize, false);
 	}
 
 	return (ret);
@@ -1576,7 +1581,7 @@ imemalign_prof_sample(tsd_t *tsd, size_t alignment, size_t usize,
 		p = ipalloc(tsd, LARGE_MINCLASS, alignment, false);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(tsd, p, usize);
+		arena_prof_promoted(tsd_tsdn(tsd), p, usize);
 	} else
 		p = ipalloc(tsd, usize, alignment, false);
 
@@ -1598,7 +1603,7 @@ imemalign_prof(tsd_t *tsd, size_t alignment, size_t usize)
 		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
 	}
-	prof_malloc(tsd, p, usize, tctx);
+	prof_malloc(tsd_tsdn(tsd), p, usize, tctx);
 
 	return (p);
 }
@@ -1620,7 +1625,7 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment)
 		goto label_oom;
 	}
 	tsd = tsd_fetch();
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	if (size == 0)
 		size = 1;
 
@@ -1655,12 +1660,13 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment)
 	ret = 0;
 label_return:
 	if (config_stats && likely(result != NULL)) {
-		assert(usize == isalloc(tsd, result, config_prof));
+		assert(usize == isalloc(tsd_tsdn(tsd), result, config_prof));
 		*tsd_thread_allocatedp_get(tsd) += usize;
 	}
 	UTRACE(0, size, result);
-	JEMALLOC_VALGRIND_MALLOC(result != NULL, tsd, result, usize, false);
-	witness_assert_lockless(tsd);
+	JEMALLOC_VALGRIND_MALLOC(result != NULL, tsd_tsdn(tsd), result, usize,
+	    false);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	return (ret);
 label_oom:
 	assert(result == NULL);
@@ -1670,7 +1676,7 @@ label_oom:
 		abort();
 	}
 	ret = ENOMEM;
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	goto label_return;
 }
 
@@ -1707,7 +1713,7 @@ JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2)
 je_calloc(size_t num, size_t size)
 {
 	void *ret;
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 	size_t num_size;
 	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 
@@ -1727,13 +1733,13 @@ je_calloc(size_t num, size_t size)
 		num_size = HUGE_MAXCLASS + 1; /* size_t overflow. */
 
 	if (likely(!malloc_slow)) {
-		ret = ialloc_body(num_size, true, &tsd, &usize, false);
-		ialloc_post_check(ret, tsd, usize, "calloc", true, false);
+		ret = ialloc_body(num_size, true, &tsdn, &usize, false);
+		ialloc_post_check(ret, tsdn, usize, "calloc", true, false);
 	} else {
-		ret = ialloc_body(num_size, true, &tsd, &usize, true);
-		ialloc_post_check(ret, tsd, usize, "calloc", true, true);
+		ret = ialloc_body(num_size, true, &tsdn, &usize, true);
+		ialloc_post_check(ret, tsdn, usize, "calloc", true, true);
 		UTRACE(0, num_size, ret);
-		JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsd, ret, usize, false);
+		JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsdn, ret, usize, false);
 	}
 
 	return (ret);
@@ -1751,7 +1757,7 @@ irealloc_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize,
 		p = iralloc(tsd, old_ptr, old_usize, LARGE_MINCLASS, 0, false);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(tsd, p, usize);
+		arena_prof_promoted(tsd_tsdn(tsd), p, usize);
 	} else
 		p = iralloc(tsd, old_ptr, old_usize, usize, 0, false);
 
@@ -1766,7 +1772,7 @@ irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize)
 	prof_tctx_t *old_tctx, *tctx;
 
 	prof_active = prof_active_get_unlocked();
-	old_tctx = prof_tctx_get(tsd, old_ptr);
+	old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_ptr);
 	tctx = prof_alloc_prep(tsd, usize, prof_active, true);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U))
 		p = irealloc_prof_sample(tsd, old_ptr, old_usize, usize, tctx);
@@ -1788,16 +1794,16 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 	size_t usize;
 	UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
 
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	if (config_prof && opt_prof) {
-		usize = isalloc(tsd, ptr, config_prof);
+		usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
 		prof_free(tsd, ptr, usize);
 	} else if (config_stats || config_valgrind)
-		usize = isalloc(tsd, ptr, config_prof);
+		usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
 	if (config_stats)
 		*tsd_thread_deallocatedp_get(tsd) += usize;
 
@@ -1805,7 +1811,7 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 		iqalloc(tsd, ptr, tcache, false);
 	else {
 		if (config_valgrind && unlikely(in_valgrind))
-			rzsize = p2rz(tsd, ptr);
+			rzsize = p2rz(tsd_tsdn(tsd), ptr);
 		iqalloc(tsd, ptr, tcache, true);
 		JEMALLOC_VALGRIND_FREE(ptr, rzsize);
 	}
@@ -1816,7 +1822,7 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path)
 {
 	UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
 
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
@@ -1826,7 +1832,7 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path)
 	if (config_stats)
 		*tsd_thread_deallocatedp_get(tsd) += usize;
 	if (config_valgrind && unlikely(in_valgrind))
-		rzsize = p2rz(tsd, ptr);
+		rzsize = p2rz(tsd_tsdn(tsd), ptr);
 	isqalloc(tsd, ptr, usize, tcache, slow_path);
 	JEMALLOC_VALGRIND_FREE(ptr, rzsize);
 }
@@ -1837,13 +1843,15 @@ JEMALLOC_ALLOC_SIZE(2)
 je_realloc(void *ptr, size_t size)
 {
 	void *ret;
-	tsd_t *tsd JEMALLOC_CC_SILENCE_INIT(NULL);
+	tsdn_t *tsdn JEMALLOC_CC_SILENCE_INIT(NULL);
 	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 	size_t old_usize = 0;
 	UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0);
 
 	if (unlikely(size == 0)) {
 		if (ptr != NULL) {
+			tsd_t *tsd;
+
 			/* realloc(ptr, 0) is equivalent to free(ptr). */
 			UTRACE(ptr, 0, 0);
 			tsd = tsd_fetch();
@@ -1854,14 +1862,17 @@ je_realloc(void *ptr, size_t size)
 	}
 
 	if (likely(ptr != NULL)) {
+		tsd_t *tsd;
+
 		assert(malloc_initialized() || IS_INITIALIZER);
 		malloc_thread_init();
 		tsd = tsd_fetch();
-		witness_assert_lockless(tsd);
 
-		old_usize = isalloc(tsd, ptr, config_prof);
+		witness_assert_lockless(tsd_tsdn(tsd));
+
+		old_usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
 		if (config_valgrind && unlikely(in_valgrind)) {
-			old_rzsize = config_prof ? p2rz(tsd, ptr) :
+			old_rzsize = config_prof ? p2rz(tsd_tsdn(tsd), ptr) :
 			    u2rz(old_usize);
 		}
 
@@ -1875,12 +1886,14 @@ je_realloc(void *ptr, size_t size)
 				usize = s2u(size);
 			ret = iralloc(tsd, ptr, old_usize, size, 0, false);
 		}
+		tsdn = tsd_tsdn(tsd);
 	} else {
 		/* realloc(NULL, size) is equivalent to malloc(size). */
 		if (likely(!malloc_slow))
-			ret = ialloc_body(size, false, &tsd, &usize, false);
+			ret = ialloc_body(size, false, &tsdn, &usize, false);
 		else
-			ret = ialloc_body(size, false, &tsd, &usize, true);
+			ret = ialloc_body(size, false, &tsdn, &usize, true);
+		assert(!tsdn_null(tsdn) || ret == NULL);
 	}
 
 	if (unlikely(ret == NULL)) {
@@ -1892,14 +1905,17 @@ je_realloc(void *ptr, size_t size)
 		set_errno(ENOMEM);
 	}
 	if (config_stats && likely(ret != NULL)) {
-		assert(usize == isalloc(tsd, ret, config_prof));
+		tsd_t *tsd;
+
+		assert(usize == isalloc(tsdn, ret, config_prof));
+		tsd = tsdn_tsd(tsdn);
 		*tsd_thread_allocatedp_get(tsd) += usize;
 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
 	}
 	UTRACE(ptr, size, ret);
-	JEMALLOC_VALGRIND_REALLOC(true, tsd, ret, usize, true, ptr, old_usize,
+	JEMALLOC_VALGRIND_REALLOC(true, tsdn, ret, usize, true, ptr, old_usize,
 	    old_rzsize, true, false);
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 	return (ret);
 }
 
@@ -1910,12 +1926,12 @@ je_free(void *ptr)
 	UTRACE(ptr, 0, 0);
 	if (likely(ptr != NULL)) {
 		tsd_t *tsd = tsd_fetch();
-		witness_assert_lockless(tsd);
+		witness_assert_lockless(tsd_tsdn(tsd));
 		if (likely(!malloc_slow))
 			ifree(tsd, ptr, tcache_get(tsd, false), false);
 		else
 			ifree(tsd, ptr, tcache_get(tsd, false), true);
-		witness_assert_lockless(tsd);
+		witness_assert_lockless(tsd_tsdn(tsd));
 	}
 }
 
@@ -2012,7 +2028,7 @@ imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize,
 		*tcache = tcache_get(tsd, true);
 	if ((flags & MALLOCX_ARENA_MASK) != 0) {
 		unsigned arena_ind = MALLOCX_ARENA_GET(flags);
-		*arena = arena_get(tsd, arena_ind, true);
+		*arena = arena_get(tsd_tsdn(tsd), arena_ind, true);
 		if (unlikely(*arena == NULL))
 			return (true);
 	} else
@@ -2021,21 +2037,21 @@ imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize,
 }
 
 JEMALLOC_ALWAYS_INLINE_C void *
-imallocx_flags(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
+imallocx_flags(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, arena_t *arena, bool slow_path)
 {
 	szind_t ind;
 
 	if (unlikely(alignment != 0))
-		return (ipalloct(tsd, usize, alignment, zero, tcache, arena));
+		return (ipalloct(tsdn, usize, alignment, zero, tcache, arena));
 	ind = size2index(usize);
 	assert(ind < NSIZES);
-	return (iallocztm(tsd, usize, ind, zero, tcache, false, arena,
+	return (iallocztm(tsdn, usize, ind, zero, tcache, false, arena,
 	    slow_path));
 }
 
 static void *
-imallocx_prof_sample(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
+imallocx_prof_sample(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, arena_t *arena, bool slow_path)
 {
 	void *p;
@@ -2043,13 +2059,13 @@ imallocx_prof_sample(tsd_t *tsd, size_t usize, size_t alignment, bool zero,
 	if (usize <= SMALL_MAXCLASS) {
 		assert(((alignment == 0) ? s2u(LARGE_MINCLASS) :
 		    sa2u(LARGE_MINCLASS, alignment)) == LARGE_MINCLASS);
-		p = imallocx_flags(tsd, LARGE_MINCLASS, alignment, zero, tcache,
-		    arena, slow_path);
+		p = imallocx_flags(tsdn, LARGE_MINCLASS, alignment, zero,
+		    tcache, arena, slow_path);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(tsd, p, usize);
+		arena_prof_promoted(tsdn, p, usize);
 	} else {
-		p = imallocx_flags(tsd, usize, alignment, zero, tcache, arena,
+		p = imallocx_flags(tsdn, usize, alignment, zero, tcache, arena,
 		    slow_path);
 	}
 
@@ -2070,19 +2086,19 @@ imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize, bool slow_path)
 	    &zero, &tcache, &arena)))
 		return (NULL);
 	tctx = prof_alloc_prep(tsd, *usize, prof_active_get_unlocked(), true);
-	if (likely((uintptr_t)tctx == (uintptr_t)1U))
-		p = imallocx_flags(tsd, *usize, alignment, zero, tcache, arena,
-		    slow_path);
-	else if ((uintptr_t)tctx > (uintptr_t)1U) {
-		p = imallocx_prof_sample(tsd, *usize, alignment, zero, tcache,
-		    arena, slow_path);
+	if (likely((uintptr_t)tctx == (uintptr_t)1U)) {
+		p = imallocx_flags(tsd_tsdn(tsd), *usize, alignment, zero,
+		    tcache, arena, slow_path);
+	} else if ((uintptr_t)tctx > (uintptr_t)1U) {
+		p = imallocx_prof_sample(tsd_tsdn(tsd), *usize, alignment, zero,
+		    tcache, arena, slow_path);
 	} else
 		p = NULL;
 	if (unlikely(p == NULL)) {
 		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
 	}
-	prof_malloc(tsd, p, *usize, tctx);
+	prof_malloc(tsd_tsdn(tsd), p, *usize, tctx);
 
 	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
 	return (p);
@@ -2101,24 +2117,27 @@ imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize,
 	if (unlikely(imallocx_flags_decode(tsd, size, flags, usize, &alignment,
 	    &zero, &tcache, &arena)))
 		return (NULL);
-	p = imallocx_flags(tsd, *usize, alignment, zero, tcache, arena,
-	    slow_path);
+	p = imallocx_flags(tsd_tsdn(tsd), *usize, alignment, zero, tcache,
+	    arena, slow_path);
 	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
 	return (p);
 }
 
+/* This function guarantees that *tsdn is non-NULL on success. */
 JEMALLOC_ALWAYS_INLINE_C void *
-imallocx_body(size_t size, int flags, tsd_t **tsd, size_t *usize,
+imallocx_body(size_t size, int flags, tsdn_t **tsdn, size_t *usize,
     bool slow_path)
 {
+	tsd_t *tsd;
 
 	if (slow_path && unlikely(malloc_init())) {
-		*tsd = NULL;
+		*tsdn = NULL;
 		return (NULL);
 	}
 
-	*tsd = tsd_fetch();
-	witness_assert_lockless(*tsd);
+	tsd = tsd_fetch();
+	*tsdn = tsd_tsdn(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
 
 	if (likely(flags == 0)) {
 		szind_t ind = size2index(size);
@@ -2131,17 +2150,17 @@ imallocx_body(size_t size, int flags, tsd_t **tsd, size_t *usize,
 		}
 
 		if (config_prof && opt_prof) {
-			return (ialloc_prof(*tsd, *usize, ind, false,
+			return (ialloc_prof(tsd, *usize, ind, false,
 			    slow_path));
 		}
 
-		return (ialloc(*tsd, size, ind, false, slow_path));
+		return (ialloc(tsd, size, ind, false, slow_path));
 	}
 
 	if (config_prof && opt_prof)
-		return (imallocx_prof(*tsd, size, flags, usize, slow_path));
+		return (imallocx_prof(tsd, size, flags, usize, slow_path));
 
-	return (imallocx_no_prof(*tsd, size, flags, usize, slow_path));
+	return (imallocx_no_prof(tsd, size, flags, usize, slow_path));
 }
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
@@ -2149,20 +2168,20 @@ void JEMALLOC_NOTHROW *
 JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)
 je_mallocx(size_t size, int flags)
 {
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 	void *p;
 	size_t usize;
 
 	assert(size != 0);
 
 	if (likely(!malloc_slow)) {
-		p = imallocx_body(size, flags, &tsd, &usize, false);
-		ialloc_post_check(p, tsd, usize, "mallocx", false, false);
+		p = imallocx_body(size, flags, &tsdn, &usize, false);
+		ialloc_post_check(p, tsdn, usize, "mallocx", false, false);
 	} else {
-		p = imallocx_body(size, flags, &tsd, &usize, true);
-		ialloc_post_check(p, tsd, usize, "mallocx", false, true);
+		p = imallocx_body(size, flags, &tsdn, &usize, true);
+		ialloc_post_check(p, tsdn, usize, "mallocx", false, true);
 		UTRACE(0, size, p);
-		JEMALLOC_VALGRIND_MALLOC(p != NULL, tsd, p, usize,
+		JEMALLOC_VALGRIND_MALLOC(p != NULL, tsdn, p, usize,
 		    MALLOCX_ZERO_GET(flags));
 	}
 
@@ -2183,7 +2202,7 @@ irallocx_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize,
 		    zero, tcache, arena);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(tsd, p, usize);
+		arena_prof_promoted(tsd_tsdn(tsd), p, usize);
 	} else {
 		p = iralloct(tsd, old_ptr, old_usize, usize, alignment, zero,
 		    tcache, arena);
@@ -2202,7 +2221,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 	prof_tctx_t *old_tctx, *tctx;
 
 	prof_active = prof_active_get_unlocked();
-	old_tctx = prof_tctx_get(tsd, old_ptr);
+	old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_ptr);
 	tctx = prof_alloc_prep(tsd, *usize, prof_active, true);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
 		p = irallocx_prof_sample(tsd, old_ptr, old_usize, *usize,
@@ -2225,7 +2244,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 		 * be the same as the current usize because of in-place large
 		 * reallocation.  Therefore, query the actual value of usize.
 		 */
-		*usize = isalloc(tsd, p, config_prof);
+		*usize = isalloc(tsd_tsdn(tsd), p, config_prof);
 	}
 	prof_realloc(tsd, p, *usize, tctx, prof_active, true, old_ptr,
 	    old_usize, old_tctx);
@@ -2253,11 +2272,11 @@ je_rallocx(void *ptr, size_t size, int flags)
 	assert(malloc_initialized() || IS_INITIALIZER);
 	malloc_thread_init();
 	tsd = tsd_fetch();
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
 
 	if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) {
 		unsigned arena_ind = MALLOCX_ARENA_GET(flags);
-		arena = arena_get(tsd, arena_ind, true);
+		arena = arena_get(tsd_tsdn(tsd), arena_ind, true);
 		if (unlikely(arena == NULL))
 			goto label_oom;
 	} else
@@ -2271,7 +2290,7 @@ je_rallocx(void *ptr, size_t size, int flags)
 	} else
 		tcache = tcache_get(tsd, true);
 
-	old_usize = isalloc(tsd, ptr, config_prof);
+	old_usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
 	if (config_valgrind && unlikely(in_valgrind))
 		old_rzsize = u2rz(old_usize);
 
@@ -2289,7 +2308,7 @@ je_rallocx(void *ptr, size_t size, int flags)
 		if (unlikely(p == NULL))
 			goto label_oom;
 		if (config_stats || (config_valgrind && unlikely(in_valgrind)))
-			usize = isalloc(tsd, p, config_prof);
+			usize = isalloc(tsd_tsdn(tsd), p, config_prof);
 	}
 	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
 
@@ -2298,9 +2317,9 @@ je_rallocx(void *ptr, size_t size, int flags)
 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
 	}
 	UTRACE(ptr, size, p);
-	JEMALLOC_VALGRIND_REALLOC(true, tsd, p, usize, false, ptr, old_usize,
-	    old_rzsize, false, zero);
-	witness_assert_lockless(tsd);
+	JEMALLOC_VALGRIND_REALLOC(true, tsd_tsdn(tsd), p, usize, false, ptr,
+	    old_usize, old_rzsize, false, zero);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	return (p);
 label_oom:
 	if (config_xmalloc && unlikely(opt_xmalloc)) {
@@ -2308,32 +2327,32 @@ label_oom:
 		abort();
 	}
 	UTRACE(ptr, size, 0);
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	return (NULL);
 }
 
 JEMALLOC_ALWAYS_INLINE_C size_t
-ixallocx_helper(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
+ixallocx_helper(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
     size_t extra, size_t alignment, bool zero)
 {
 	size_t usize;
 
-	if (ixalloc(tsd, ptr, old_usize, size, extra, alignment, zero))
+	if (ixalloc(tsdn, ptr, old_usize, size, extra, alignment, zero))
 		return (old_usize);
-	usize = isalloc(tsd, ptr, config_prof);
+	usize = isalloc(tsdn, ptr, config_prof);
 
 	return (usize);
 }
 
 static size_t
-ixallocx_prof_sample(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
+ixallocx_prof_sample(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
     size_t extra, size_t alignment, bool zero, prof_tctx_t *tctx)
 {
 	size_t usize;
 
 	if (tctx == NULL)
 		return (old_usize);
-	usize = ixallocx_helper(tsd, ptr, old_usize, size, extra, alignment,
+	usize = ixallocx_helper(tsdn, ptr, old_usize, size, extra, alignment,
 	    zero);
 
 	return (usize);
@@ -2348,7 +2367,7 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 	prof_tctx_t *old_tctx, *tctx;
 
 	prof_active = prof_active_get_unlocked();
-	old_tctx = prof_tctx_get(tsd, ptr);
+	old_tctx = prof_tctx_get(tsd_tsdn(tsd), ptr);
 	/*
 	 * usize isn't knowable before ixalloc() returns when extra is non-zero.
 	 * Therefore, compute its maximum possible value and use that in
@@ -2373,11 +2392,11 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 	tctx = prof_alloc_prep(tsd, usize_max, prof_active, false);
 
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
-		usize = ixallocx_prof_sample(tsd, ptr, old_usize, size, extra,
-		    alignment, zero, tctx);
+		usize = ixallocx_prof_sample(tsd_tsdn(tsd), ptr, old_usize,
+		    size, extra, alignment, zero, tctx);
 	} else {
-		usize = ixallocx_helper(tsd, ptr, old_usize, size, extra,
-		    alignment, zero);
+		usize = ixallocx_helper(tsd_tsdn(tsd), ptr, old_usize, size,
+		    extra, alignment, zero);
 	}
 	if (usize == old_usize) {
 		prof_alloc_rollback(tsd, tctx, false);
@@ -2404,9 +2423,9 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
 	assert(malloc_initialized() || IS_INITIALIZER);
 	malloc_thread_init();
 	tsd = tsd_fetch();
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
 
-	old_usize = isalloc(tsd, ptr, config_prof);
+	old_usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
 
 	/*
 	 * The API explicitly absolves itself of protecting against (size +
@@ -2431,8 +2450,8 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
 		usize = ixallocx_prof(tsd, ptr, old_usize, size, extra,
 		    alignment, zero);
 	} else {
-		usize = ixallocx_helper(tsd, ptr, old_usize, size, extra,
-		    alignment, zero);
+		usize = ixallocx_helper(tsd_tsdn(tsd), ptr, old_usize, size,
+		    extra, alignment, zero);
 	}
 	if (unlikely(usize == old_usize))
 		goto label_not_resized;
@@ -2441,11 +2460,11 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
 		*tsd_thread_allocatedp_get(tsd) += usize;
 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
 	}
-	JEMALLOC_VALGRIND_REALLOC(false, tsd, ptr, usize, false, ptr, old_usize,
-	    old_rzsize, false, zero);
+	JEMALLOC_VALGRIND_REALLOC(false, tsd_tsdn(tsd), ptr, usize, false, ptr,
+	    old_usize, old_rzsize, false, zero);
 label_not_resized:
 	UTRACE(ptr, size, ptr);
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	return (usize);
 }
 
@@ -2454,20 +2473,20 @@ JEMALLOC_ATTR(pure)
 je_sallocx(const void *ptr, int flags)
 {
 	size_t usize;
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 
 	assert(malloc_initialized() || IS_INITIALIZER);
 	malloc_thread_init();
 
-	tsd = tsd_fetch();
-	witness_assert_lockless(tsd);
+	tsdn = tsdn_fetch();
+	witness_assert_lockless(tsdn);
 
 	if (config_ivsalloc)
-		usize = ivsalloc(tsd, ptr, config_prof);
+		usize = ivsalloc(tsdn, ptr, config_prof);
 	else
-		usize = isalloc(tsd, ptr, config_prof);
+		usize = isalloc(tsdn, ptr, config_prof);
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 	return (usize);
 }
 
@@ -2481,7 +2500,7 @@ je_dallocx(void *ptr, int flags)
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	tsd = tsd_fetch();
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
 		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE)
 			tcache = NULL;
@@ -2495,21 +2514,21 @@ je_dallocx(void *ptr, int flags)
 		ifree(tsd, ptr, tcache, false);
 	else
 		ifree(tsd, ptr, tcache, true);
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
 }
 
 JEMALLOC_ALWAYS_INLINE_C size_t
-inallocx(tsd_t *tsd, size_t size, int flags)
+inallocx(tsdn_t *tsdn, size_t size, int flags)
 {
 	size_t usize;
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 
 	if (likely((flags & MALLOCX_LG_ALIGN_MASK) == 0))
 		usize = s2u(size);
 	else
 		usize = sa2u(size, MALLOCX_ALIGN_GET_SPECIFIED(flags));
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 	return (usize);
 }
 
@@ -2523,10 +2542,10 @@ je_sdallocx(void *ptr, size_t size, int flags)
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 	tsd = tsd_fetch();
-	usize = inallocx(tsd, size, flags);
-	assert(usize == isalloc(tsd, ptr, config_prof));
+	usize = inallocx(tsd_tsdn(tsd), size, flags);
+	assert(usize == isalloc(tsd_tsdn(tsd), ptr, config_prof));
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
 		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE)
 			tcache = NULL;
@@ -2540,7 +2559,7 @@ je_sdallocx(void *ptr, size_t size, int flags)
 		isfree(tsd, ptr, usize, tcache, false);
 	else
 		isfree(tsd, ptr, usize, tcache, true);
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
@@ -2548,21 +2567,21 @@ JEMALLOC_ATTR(pure)
 je_nallocx(size_t size, int flags)
 {
 	size_t usize;
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 
 	assert(size != 0);
 
 	if (unlikely(malloc_init()))
 		return (0);
 
-	tsd = tsd_fetch();
-	witness_assert_lockless(tsd);
+	tsdn = tsdn_fetch();
+	witness_assert_lockless(tsdn);
 
-	usize = inallocx(tsd, size, flags);
+	usize = inallocx(tsdn, size, flags);
 	if (unlikely(usize > HUGE_MAXCLASS))
 		return (0);
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 	return (usize);
 }
 
@@ -2577,9 +2596,9 @@ je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
 		return (EAGAIN);
 
 	tsd = tsd_fetch();
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	ret = ctl_byname(tsd, name, oldp, oldlenp, newp, newlen);
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	return (ret);
 }
 
@@ -2587,15 +2606,15 @@ JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp)
 {
 	int ret;
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 
 	if (unlikely(malloc_init()))
 		return (EAGAIN);
 
-	tsd = tsd_fetch();
-	witness_assert_lockless(tsd);
-	ret = ctl_nametomib(tsd, name, mibp, miblenp);
-	witness_assert_lockless(tsd);
+	tsdn = tsdn_fetch();
+	witness_assert_lockless(tsdn);
+	ret = ctl_nametomib(tsdn, name, mibp, miblenp);
+	witness_assert_lockless(tsdn);
 	return (ret);
 }
 
@@ -2610,9 +2629,9 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 		return (EAGAIN);
 
 	tsd = tsd_fetch();
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	ret = ctl_bymib(tsd, mib, miblen, oldp, oldlenp, newp, newlen);
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	return (ret);
 }
 
@@ -2620,32 +2639,32 @@ JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
     const char *opts)
 {
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 
-	tsd = tsd_fetch();
-	witness_assert_lockless(tsd);
+	tsdn = tsdn_fetch();
+	witness_assert_lockless(tsdn);
 	stats_print(write_cb, cbopaque, opts);
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
 je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr)
 {
 	size_t ret;
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 
 	assert(malloc_initialized() || IS_INITIALIZER);
 	malloc_thread_init();
 
-	tsd = tsd_fetch();
-	witness_assert_lockless(tsd);
+	tsdn = tsdn_fetch();
+	witness_assert_lockless(tsdn);
 
 	if (config_ivsalloc)
-		ret = ivsalloc(tsd, ptr, config_prof);
+		ret = ivsalloc(tsdn, ptr, config_prof);
 	else
-		ret = (ptr == NULL) ? 0 : isalloc(tsd, ptr, config_prof);
+		ret = (ptr == NULL) ? 0 : isalloc(tsdn, ptr, config_prof);
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 	return (ret);
 }
 
@@ -2705,28 +2724,35 @@ _malloc_prefork(void)
 
 	witness_prefork(tsd);
 	/* Acquire all mutexes in a safe order. */
-	ctl_prefork(tsd);
-	malloc_mutex_prefork(tsd, &arenas_lock);
-	prof_prefork0(tsd);
+	ctl_prefork(tsd_tsdn(tsd));
+	malloc_mutex_prefork(tsd_tsdn(tsd), &arenas_lock);
+	prof_prefork0(tsd_tsdn(tsd));
 	for (i = 0; i < 3; i++) {
 		for (j = 0; j < narenas; j++) {
-			if ((arena = arena_get(tsd, j, false)) != NULL) {
+			if ((arena = arena_get(tsd_tsdn(tsd), j, false)) !=
+			    NULL) {
 				switch (i) {
-				case 0: arena_prefork0(tsd, arena); break;
-				case 1: arena_prefork1(tsd, arena); break;
-				case 2: arena_prefork2(tsd, arena); break;
+				case 0:
+					arena_prefork0(tsd_tsdn(tsd), arena);
+					break;
+				case 1:
+					arena_prefork1(tsd_tsdn(tsd), arena);
+					break;
+				case 2:
+					arena_prefork2(tsd_tsdn(tsd), arena);
+					break;
 				default: not_reached();
 				}
 			}
 		}
 	}
-	base_prefork(tsd);
-	chunk_prefork(tsd);
+	base_prefork(tsd_tsdn(tsd));
+	chunk_prefork(tsd_tsdn(tsd));
 	for (i = 0; i < narenas; i++) {
-		if ((arena = arena_get(tsd, i, false)) != NULL)
-			arena_prefork3(tsd, arena);
+		if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL)
+			arena_prefork3(tsd_tsdn(tsd), arena);
 	}
-	prof_prefork1(tsd);
+	prof_prefork1(tsd_tsdn(tsd));
 }
 
 #ifndef JEMALLOC_MUTEX_INIT_CB
@@ -2750,17 +2776,17 @@ _malloc_postfork(void)
 
 	witness_postfork_parent(tsd);
 	/* Release all mutexes, now that fork() has completed. */
-	chunk_postfork_parent(tsd);
-	base_postfork_parent(tsd);
+	chunk_postfork_parent(tsd_tsdn(tsd));
+	base_postfork_parent(tsd_tsdn(tsd));
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
 
-		if ((arena = arena_get(tsd, i, false)) != NULL)
-			arena_postfork_parent(tsd, arena);
+		if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL)
+			arena_postfork_parent(tsd_tsdn(tsd), arena);
 	}
-	prof_postfork_parent(tsd);
-	malloc_mutex_postfork_parent(tsd, &arenas_lock);
-	ctl_postfork_parent(tsd);
+	prof_postfork_parent(tsd_tsdn(tsd));
+	malloc_mutex_postfork_parent(tsd_tsdn(tsd), &arenas_lock);
+	ctl_postfork_parent(tsd_tsdn(tsd));
 }
 
 void
@@ -2775,17 +2801,17 @@ jemalloc_postfork_child(void)
 
 	witness_postfork_child(tsd);
 	/* Release all mutexes, now that fork() has completed. */
-	chunk_postfork_child(tsd);
-	base_postfork_child(tsd);
+	chunk_postfork_child(tsd_tsdn(tsd));
+	base_postfork_child(tsd_tsdn(tsd));
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
 
-		if ((arena = arena_get(tsd, i, false)) != NULL)
-			arena_postfork_child(tsd, arena);
+		if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL)
+			arena_postfork_child(tsd_tsdn(tsd), arena);
 	}
-	prof_postfork_child(tsd);
-	malloc_mutex_postfork_child(tsd, &arenas_lock);
-	ctl_postfork_child(tsd);
+	prof_postfork_child(tsd_tsdn(tsd));
+	malloc_mutex_postfork_child(tsd_tsdn(tsd), &arenas_lock);
+	ctl_postfork_child(tsd_tsdn(tsd));
 }
 
 /******************************************************************************/
diff --git a/src/mutex.c b/src/mutex.c
index 4174f42e..a1fac342 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -109,25 +109,25 @@ malloc_mutex_init(malloc_mutex_t *mutex, const char *name, witness_rank_t rank)
 }
 
 void
-malloc_mutex_prefork(tsd_t *tsd, malloc_mutex_t *mutex)
+malloc_mutex_prefork(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
 
-	malloc_mutex_lock(tsd, mutex);
+	malloc_mutex_lock(tsdn, mutex);
 }
 
 void
-malloc_mutex_postfork_parent(tsd_t *tsd, malloc_mutex_t *mutex)
+malloc_mutex_postfork_parent(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
 
-	malloc_mutex_unlock(tsd, mutex);
+	malloc_mutex_unlock(tsdn, mutex);
 }
 
 void
-malloc_mutex_postfork_child(tsd_t *tsd, malloc_mutex_t *mutex)
+malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
 
 #ifdef JEMALLOC_MUTEX_INIT_CB
-	malloc_mutex_unlock(tsd, mutex);
+	malloc_mutex_unlock(tsdn, mutex);
 #else
 	if (malloc_mutex_init(mutex, mutex->witness.name,
 	    mutex->witness.rank)) {
diff --git a/src/prof.c b/src/prof.c
index df7f1f9b..c1f58d46 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -121,13 +121,13 @@ static bool		prof_booted = false;
  * definition.
  */
 
-static bool	prof_tctx_should_destroy(tsd_t *tsd, prof_tctx_t *tctx);
+static bool	prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx);
 static void	prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx);
-static bool	prof_tdata_should_destroy(tsd_t *tsd, prof_tdata_t *tdata,
+static bool	prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
     bool even_if_attached);
-static void	prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata,
+static void	prof_tdata_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
     bool even_if_attached);
-static char	*prof_thread_name_alloc(tsd_t *tsd, const char *thread_name);
+static char	*prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name);
 
 /******************************************************************************/
 /* Red-black trees. */
@@ -213,23 +213,23 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated)
 	}
 
 	if ((uintptr_t)tctx > (uintptr_t)1U) {
-		malloc_mutex_lock(tsd, tctx->tdata->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
 		tctx->prepared = false;
-		if (prof_tctx_should_destroy(tsd, tctx))
+		if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx))
 			prof_tctx_destroy(tsd, tctx);
 		else
-			malloc_mutex_unlock(tsd, tctx->tdata->lock);
+			malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
 	}
 }
 
 void
-prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
+prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
     prof_tctx_t *tctx)
 {
 
-	prof_tctx_set(tsd, ptr, usize, tctx);
+	prof_tctx_set(tsdn, ptr, usize, tctx);
 
-	malloc_mutex_lock(tsd, tctx->tdata->lock);
+	malloc_mutex_lock(tsdn, tctx->tdata->lock);
 	tctx->cnts.curobjs++;
 	tctx->cnts.curbytes += usize;
 	if (opt_prof_accum) {
@@ -237,23 +237,23 @@ prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
 		tctx->cnts.accumbytes += usize;
 	}
 	tctx->prepared = false;
-	malloc_mutex_unlock(tsd, tctx->tdata->lock);
+	malloc_mutex_unlock(tsdn, tctx->tdata->lock);
 }
 
 void
 prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx)
 {
 
-	malloc_mutex_lock(tsd, tctx->tdata->lock);
+	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
 	assert(tctx->cnts.curobjs > 0);
 	assert(tctx->cnts.curbytes >= usize);
 	tctx->cnts.curobjs--;
 	tctx->cnts.curbytes -= usize;
 
-	if (prof_tctx_should_destroy(tsd, tctx))
+	if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx))
 		prof_tctx_destroy(tsd, tctx);
 	else
-		malloc_mutex_unlock(tsd, tctx->tdata->lock);
+		malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
 }
 
 void
@@ -278,7 +278,7 @@ prof_enter(tsd_t *tsd, prof_tdata_t *tdata)
 		tdata->enq = true;
 	}
 
-	malloc_mutex_lock(tsd, &bt2gctx_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
 }
 
 JEMALLOC_INLINE_C void
@@ -288,7 +288,7 @@ prof_leave(tsd_t *tsd, prof_tdata_t *tdata)
 	cassert(config_prof);
 	assert(tdata == prof_tdata_get(tsd, false));
 
-	malloc_mutex_unlock(tsd, &bt2gctx_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
 
 	if (tdata != NULL) {
 		bool idump, gdump;
@@ -301,9 +301,9 @@ prof_leave(tsd_t *tsd, prof_tdata_t *tdata)
 		tdata->enq_gdump = false;
 
 		if (idump)
-			prof_idump(tsd);
+			prof_idump(tsd_tsdn(tsd));
 		if (gdump)
-			prof_gdump(tsd);
+			prof_gdump(tsd_tsdn(tsd));
 	}
 }
 
@@ -547,14 +547,14 @@ prof_tdata_mutex_choose(uint64_t thr_uid)
 }
 
 static prof_gctx_t *
-prof_gctx_create(tsd_t *tsd, prof_bt_t *bt)
+prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt)
 {
 	/*
 	 * Create a single allocation that has space for vec of length bt->len.
 	 */
 	size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *));
-	prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsd, size,
-	    size2index(size), false, NULL, true, arena_get(NULL, 0, true),
+	prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsdn, size,
+	    size2index(size), false, NULL, true, arena_get(TSDN_NULL, 0, true),
 	    true);
 	if (gctx == NULL)
 		return (NULL);
@@ -587,32 +587,32 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
 	 * into this function.
 	 */
 	prof_enter(tsd, tdata_self);
-	malloc_mutex_lock(tsd, gctx->lock);
+	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
 	assert(gctx->nlimbo != 0);
 	if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) {
 		/* Remove gctx from bt2gctx. */
-		if (ckh_remove(tsd, &bt2gctx, &gctx->bt, NULL, NULL))
+		if (ckh_remove(tsd_tsdn(tsd), &bt2gctx, &gctx->bt, NULL, NULL))
 			not_reached();
 		prof_leave(tsd, tdata_self);
 		/* Destroy gctx. */
-		malloc_mutex_unlock(tsd, gctx->lock);
-		idalloctm(tsd, gctx, NULL, true, true);
+		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
+		idalloctm(tsd_tsdn(tsd), gctx, NULL, true, true);
 	} else {
 		/*
 		 * Compensate for increment in prof_tctx_destroy() or
 		 * prof_lookup().
 		 */
 		gctx->nlimbo--;
-		malloc_mutex_unlock(tsd, gctx->lock);
+		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
 		prof_leave(tsd, tdata_self);
 	}
 }
 
 static bool
-prof_tctx_should_destroy(tsd_t *tsd, prof_tctx_t *tctx)
+prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx)
 {
 
-	malloc_mutex_assert_owner(tsd, tctx->tdata->lock);
+	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
 
 	if (opt_prof_accum)
 		return (false);
@@ -643,7 +643,7 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx)
 	prof_gctx_t *gctx = tctx->gctx;
 	bool destroy_tdata, destroy_tctx, destroy_gctx;
 
-	malloc_mutex_assert_owner(tsd, tctx->tdata->lock);
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
 
 	assert(tctx->cnts.curobjs == 0);
 	assert(tctx->cnts.curbytes == 0);
@@ -651,11 +651,11 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx)
 	assert(tctx->cnts.accumobjs == 0);
 	assert(tctx->cnts.accumbytes == 0);
 
-	ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL);
-	destroy_tdata = prof_tdata_should_destroy(tsd, tdata, false);
-	malloc_mutex_unlock(tsd, tdata->lock);
+	ckh_remove(tsd_tsdn(tsd), &tdata->bt2tctx, &gctx->bt, NULL, NULL);
+	destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata, false);
+	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
 
-	malloc_mutex_lock(tsd, gctx->lock);
+	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
 	switch (tctx->state) {
 	case prof_tctx_state_nominal:
 		tctx_tree_remove(&gctx->tctxs, tctx);
@@ -695,19 +695,19 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx)
 		destroy_tctx = false;
 		destroy_gctx = false;
 	}
-	malloc_mutex_unlock(tsd, gctx->lock);
+	malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
 	if (destroy_gctx) {
 		prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx,
 		    tdata);
 	}
 
-	malloc_mutex_assert_not_owner(tsd, tctx->tdata->lock);
+	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock);
 
 	if (destroy_tdata)
-		prof_tdata_destroy(tsd, tdata, false);
+		prof_tdata_destroy(tsd_tsdn(tsd), tdata, false);
 
 	if (destroy_tctx)
-		idalloctm(tsd, tctx, NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), tctx, NULL, true, true);
 }
 
 static bool
@@ -727,16 +727,16 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
 	prof_enter(tsd, tdata);
 	if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
 		/* bt has never been seen before.  Insert it. */
-		gctx.p = prof_gctx_create(tsd, bt);
+		gctx.p = prof_gctx_create(tsd_tsdn(tsd), bt);
 		if (gctx.v == NULL) {
 			prof_leave(tsd, tdata);
 			return (true);
 		}
 		btkey.p = &gctx.p->bt;
-		if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) {
+		if (ckh_insert(tsd_tsdn(tsd), &bt2gctx, btkey.v, gctx.v)) {
 			/* OOM. */
 			prof_leave(tsd, tdata);
-			idalloctm(tsd, gctx.v, NULL, true, true);
+			idalloctm(tsd_tsdn(tsd), gctx.v, NULL, true, true);
 			return (true);
 		}
 		new_gctx = true;
@@ -745,9 +745,9 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
 		 * Increment nlimbo, in order to avoid a race condition with
 		 * prof_tctx_destroy()/prof_gctx_try_destroy().
 		 */
-		malloc_mutex_lock(tsd, gctx.p->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), gctx.p->lock);
 		gctx.p->nlimbo++;
-		malloc_mutex_unlock(tsd, gctx.p->lock);
+		malloc_mutex_unlock(tsd_tsdn(tsd), gctx.p->lock);
 		new_gctx = false;
 	}
 	prof_leave(tsd, tdata);
@@ -774,11 +774,11 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
 	if (tdata == NULL)
 		return (NULL);
 
-	malloc_mutex_lock(tsd, tdata->lock);
+	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
 	not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v);
 	if (!not_found) /* Note double negative! */
 		ret.p->prepared = true;
-	malloc_mutex_unlock(tsd, tdata->lock);
+	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
 	if (not_found) {
 		void *btkey;
 		prof_gctx_t *gctx;
@@ -793,9 +793,9 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
 			return (NULL);
 
 		/* Link a prof_tctx_t into gctx for this thread. */
-		ret.v = iallocztm(tsd, sizeof(prof_tctx_t),
+		ret.v = iallocztm(tsd_tsdn(tsd), sizeof(prof_tctx_t),
 		    size2index(sizeof(prof_tctx_t)), false, NULL, true,
-		    arena_ichoose(tsd, NULL), true);
+		    arena_ichoose(tsd_tsdn(tsd), NULL), true);
 		if (ret.p == NULL) {
 			if (new_gctx)
 				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
@@ -809,20 +809,21 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
 		ret.p->tctx_uid = tdata->tctx_uid_next++;
 		ret.p->prepared = true;
 		ret.p->state = prof_tctx_state_initializing;
-		malloc_mutex_lock(tsd, tdata->lock);
-		error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v);
-		malloc_mutex_unlock(tsd, tdata->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
+		error = ckh_insert(tsd_tsdn(tsd), &tdata->bt2tctx, btkey,
+		    ret.v);
+		malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
 		if (error) {
 			if (new_gctx)
 				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
-			idalloctm(tsd, ret.v, NULL, true, true);
+			idalloctm(tsd_tsdn(tsd), ret.v, NULL, true, true);
 			return (NULL);
 		}
-		malloc_mutex_lock(tsd, gctx->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
 		ret.p->state = prof_tctx_state_nominal;
 		tctx_tree_insert(&gctx->tctxs, ret.p);
 		gctx->nlimbo--;
-		malloc_mutex_unlock(tsd, gctx->lock);
+		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
 	}
 
 	return (ret.p);
@@ -897,13 +898,13 @@ size_t
 prof_tdata_count(void)
 {
 	size_t tdata_count = 0;
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 
-	tsd = tsd_fetch();
-	malloc_mutex_lock(tsd, &tdatas_mtx);
+	tsdn = tsdn_fetch();
+	malloc_mutex_lock(tsdn, &tdatas_mtx);
 	tdata_tree_iter(&tdatas, NULL, prof_tdata_count_iter,
 	    (void *)&tdata_count);
-	malloc_mutex_unlock(tsd, &tdatas_mtx);
+	malloc_mutex_unlock(tsdn, &tdatas_mtx);
 
 	return (tdata_count);
 }
@@ -922,9 +923,9 @@ prof_bt_count(void)
 	if (tdata == NULL)
 		return (0);
 
-	malloc_mutex_lock(tsd, &bt2gctx_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
 	bt_count = ckh_count(&bt2gctx);
-	malloc_mutex_unlock(tsd, &bt2gctx_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
 
 	return (bt_count);
 }
@@ -1038,20 +1039,20 @@ prof_dump_printf(bool propagate_err, const char *format, ...)
 }
 
 static void
-prof_tctx_merge_tdata(tsd_t *tsd, prof_tctx_t *tctx, prof_tdata_t *tdata)
+prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata)
 {
 
-	malloc_mutex_assert_owner(tsd, tctx->tdata->lock);
+	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
 
-	malloc_mutex_lock(tsd, tctx->gctx->lock);
+	malloc_mutex_lock(tsdn, tctx->gctx->lock);
 
 	switch (tctx->state) {
 	case prof_tctx_state_initializing:
-		malloc_mutex_unlock(tsd, tctx->gctx->lock);
+		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
 		return;
 	case prof_tctx_state_nominal:
 		tctx->state = prof_tctx_state_dumping;
-		malloc_mutex_unlock(tsd, tctx->gctx->lock);
+		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
 
 		memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t));
 
@@ -1071,10 +1072,10 @@ prof_tctx_merge_tdata(tsd_t *tsd, prof_tctx_t *tctx, prof_tdata_t *tdata)
 }
 
 static void
-prof_tctx_merge_gctx(tsd_t *tsd, prof_tctx_t *tctx, prof_gctx_t *gctx)
+prof_tctx_merge_gctx(tsdn_t *tsdn, prof_tctx_t *tctx, prof_gctx_t *gctx)
 {
 
-	malloc_mutex_assert_owner(tsd, gctx->lock);
+	malloc_mutex_assert_owner(tsdn, gctx->lock);
 
 	gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
 	gctx->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
@@ -1087,9 +1088,9 @@ prof_tctx_merge_gctx(tsd_t *tsd, prof_tctx_t *tctx, prof_gctx_t *gctx)
 static prof_tctx_t *
 prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg)
 {
-	tsd_t *tsd = (tsd_t *)arg;
+	tsdn_t *tsdn = (tsdn_t *)arg;
 
-	malloc_mutex_assert_owner(tsd, tctx->gctx->lock);
+	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
 
 	switch (tctx->state) {
 	case prof_tctx_state_nominal:
@@ -1097,7 +1098,7 @@ prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg)
 		break;
 	case prof_tctx_state_dumping:
 	case prof_tctx_state_purgatory:
-		prof_tctx_merge_gctx(tsd, tctx, tctx->gctx);
+		prof_tctx_merge_gctx(tsdn, tctx, tctx->gctx);
 		break;
 	default:
 		not_reached();
@@ -1107,7 +1108,7 @@ prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg)
 }
 
 struct prof_tctx_dump_iter_arg_s {
-	tsd_t	*tsd;
+	tsdn_t	*tsdn;
 	bool	propagate_err;
 };
 
@@ -1117,7 +1118,7 @@ prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque)
 	struct prof_tctx_dump_iter_arg_s *arg =
 	    (struct prof_tctx_dump_iter_arg_s *)opaque;
 
-	malloc_mutex_assert_owner(arg->tsd, tctx->gctx->lock);
+	malloc_mutex_assert_owner(arg->tsdn, tctx->gctx->lock);
 
 	switch (tctx->state) {
 	case prof_tctx_state_initializing:
@@ -1142,10 +1143,10 @@ prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque)
 static prof_tctx_t *
 prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg)
 {
-	tsd_t *tsd = (tsd_t *)arg;
+	tsdn_t *tsdn = (tsdn_t *)arg;
 	prof_tctx_t *ret;
 
-	malloc_mutex_assert_owner(tsd, tctx->gctx->lock);
+	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
 
 	switch (tctx->state) {
 	case prof_tctx_state_nominal:
@@ -1167,12 +1168,12 @@ label_return:
 }
 
 static void
-prof_dump_gctx_prep(tsd_t *tsd, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs)
+prof_dump_gctx_prep(tsdn_t *tsdn, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs)
 {
 
 	cassert(config_prof);
 
-	malloc_mutex_lock(tsd, gctx->lock);
+	malloc_mutex_lock(tsdn, gctx->lock);
 
 	/*
 	 * Increment nlimbo so that gctx won't go away before dump.
@@ -1184,11 +1185,11 @@ prof_dump_gctx_prep(tsd_t *tsd, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs)
 
 	memset(&gctx->cnt_summed, 0, sizeof(prof_cnt_t));
 
-	malloc_mutex_unlock(tsd, gctx->lock);
+	malloc_mutex_unlock(tsdn, gctx->lock);
 }
 
 struct prof_gctx_merge_iter_arg_s {
-	tsd_t	*tsd;
+	tsdn_t	*tsdn;
 	size_t	leak_ngctx;
 };
 
@@ -1198,12 +1199,12 @@ prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque)
 	struct prof_gctx_merge_iter_arg_s *arg =
 	    (struct prof_gctx_merge_iter_arg_s *)opaque;
 
-	malloc_mutex_lock(arg->tsd, gctx->lock);
+	malloc_mutex_lock(arg->tsdn, gctx->lock);
 	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter,
-	    (void *)arg->tsd);
+	    (void *)arg->tsdn);
 	if (gctx->cnt_summed.curobjs != 0)
 		arg->leak_ngctx++;
-	malloc_mutex_unlock(arg->tsd, gctx->lock);
+	malloc_mutex_unlock(arg->tsdn, gctx->lock);
 
 	return (NULL);
 }
@@ -1222,7 +1223,7 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs)
 	 */
 	while ((gctx = gctx_tree_first(gctxs)) != NULL) {
 		gctx_tree_remove(gctxs, gctx);
-		malloc_mutex_lock(tsd, gctx->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
 		{
 			prof_tctx_t *next;
 
@@ -1230,14 +1231,15 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs)
 			do {
 				prof_tctx_t *to_destroy =
 				    tctx_tree_iter(&gctx->tctxs, next,
-				    prof_tctx_finish_iter, (void *)tsd);
+				    prof_tctx_finish_iter,
+				    (void *)tsd_tsdn(tsd));
 				if (to_destroy != NULL) {
 					next = tctx_tree_next(&gctx->tctxs,
 					    to_destroy);
 					tctx_tree_remove(&gctx->tctxs,
 					    to_destroy);
-					idalloctm(tsd, to_destroy, NULL, true,
-					    true);
+					idalloctm(tsd_tsdn(tsd), to_destroy,
+					    NULL, true, true);
 				} else
 					next = NULL;
 			} while (next != NULL);
@@ -1245,15 +1247,15 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs)
 		gctx->nlimbo--;
 		if (prof_gctx_should_destroy(gctx)) {
 			gctx->nlimbo++;
-			malloc_mutex_unlock(tsd, gctx->lock);
+			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
 			prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
 		} else
-			malloc_mutex_unlock(tsd, gctx->lock);
+			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
 	}
 }
 
 struct prof_tdata_merge_iter_arg_s {
-	tsd_t		*tsd;
+	tsdn_t		*tsdn;
 	prof_cnt_t	cnt_all;
 };
 
@@ -1264,7 +1266,7 @@ prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
 	struct prof_tdata_merge_iter_arg_s *arg =
 	    (struct prof_tdata_merge_iter_arg_s *)opaque;
 
-	malloc_mutex_lock(arg->tsd, tdata->lock);
+	malloc_mutex_lock(arg->tsdn, tdata->lock);
 	if (!tdata->expired) {
 		size_t tabind;
 		union {
@@ -1276,7 +1278,7 @@ prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
 		memset(&tdata->cnt_summed, 0, sizeof(prof_cnt_t));
 		for (tabind = 0; !ckh_iter(&tdata->bt2tctx, &tabind, NULL,
 		    &tctx.v);)
-			prof_tctx_merge_tdata(arg->tsd, tctx.p, tdata);
+			prof_tctx_merge_tdata(arg->tsdn, tctx.p, tdata);
 
 		arg->cnt_all.curobjs += tdata->cnt_summed.curobjs;
 		arg->cnt_all.curbytes += tdata->cnt_summed.curbytes;
@@ -1286,7 +1288,7 @@ prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
 		}
 	} else
 		tdata->dumping = false;
-	malloc_mutex_unlock(arg->tsd, tdata->lock);
+	malloc_mutex_unlock(arg->tsdn, tdata->lock);
 
 	return (NULL);
 }
@@ -1315,7 +1317,7 @@ prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg)
 #define	prof_dump_header JEMALLOC_N(prof_dump_header_impl)
 #endif
 static bool
-prof_dump_header(tsd_t *tsd, bool propagate_err, const prof_cnt_t *cnt_all)
+prof_dump_header(tsdn_t *tsdn, bool propagate_err, const prof_cnt_t *cnt_all)
 {
 	bool ret;
 
@@ -1326,10 +1328,10 @@ prof_dump_header(tsd_t *tsd, bool propagate_err, const prof_cnt_t *cnt_all)
 	    cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes))
 		return (true);
 
-	malloc_mutex_lock(tsd, &tdatas_mtx);
+	malloc_mutex_lock(tsdn, &tdatas_mtx);
 	ret = (tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter,
 	    (void *)&propagate_err) != NULL);
-	malloc_mutex_unlock(tsd, &tdatas_mtx);
+	malloc_mutex_unlock(tsdn, &tdatas_mtx);
 	return (ret);
 }
 #ifdef JEMALLOC_JET
@@ -1339,7 +1341,7 @@ prof_dump_header_t *prof_dump_header = JEMALLOC_N(prof_dump_header_impl);
 #endif
 
 static bool
-prof_dump_gctx(tsd_t *tsd, bool propagate_err, prof_gctx_t *gctx,
+prof_dump_gctx(tsdn_t *tsdn, bool propagate_err, prof_gctx_t *gctx,
     const prof_bt_t *bt, prof_gctx_tree_t *gctxs)
 {
 	bool ret;
@@ -1347,7 +1349,7 @@ prof_dump_gctx(tsd_t *tsd, bool propagate_err, prof_gctx_t *gctx,
 	struct prof_tctx_dump_iter_arg_s prof_tctx_dump_iter_arg;
 
 	cassert(config_prof);
-	malloc_mutex_assert_owner(tsd, gctx->lock);
+	malloc_mutex_assert_owner(tsdn, gctx->lock);
 
 	/* Avoid dumping such gctx's that have no useful data. */
 	if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0) ||
@@ -1381,7 +1383,7 @@ prof_dump_gctx(tsd_t *tsd, bool propagate_err, prof_gctx_t *gctx,
 		goto label_return;
 	}
 
-	prof_tctx_dump_iter_arg.tsd = tsd;
+	prof_tctx_dump_iter_arg.tsdn = tsdn;
 	prof_tctx_dump_iter_arg.propagate_err = propagate_err;
 	if (tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter,
 	    (void *)&prof_tctx_dump_iter_arg) != NULL) {
@@ -1515,7 +1517,7 @@ prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx,
 }
 
 struct prof_gctx_dump_iter_arg_s {
-	tsd_t	*tsd;
+	tsdn_t	*tsdn;
 	bool	propagate_err;
 };
 
@@ -1526,9 +1528,9 @@ prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque)
 	struct prof_gctx_dump_iter_arg_s *arg =
 	    (struct prof_gctx_dump_iter_arg_s *)opaque;
 
-	malloc_mutex_lock(arg->tsd, gctx->lock);
+	malloc_mutex_lock(arg->tsdn, gctx->lock);
 
-	if (prof_dump_gctx(arg->tsd, arg->propagate_err, gctx, &gctx->bt,
+	if (prof_dump_gctx(arg->tsdn, arg->propagate_err, gctx, &gctx->bt,
 	    gctxs)) {
 		ret = gctx;
 		goto label_return;
@@ -1536,7 +1538,7 @@ prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque)
 
 	ret = NULL;
 label_return:
-	malloc_mutex_unlock(arg->tsd, gctx->lock);
+	malloc_mutex_unlock(arg->tsdn, gctx->lock);
 	return (ret);
 }
 
@@ -1560,7 +1562,7 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck)
 	if (tdata == NULL)
 		return (true);
 
-	malloc_mutex_lock(tsd, &prof_dump_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
 	prof_enter(tsd, tdata);
 
 	/*
@@ -1569,21 +1571,21 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck)
 	 */
 	gctx_tree_new(&gctxs);
 	for (tabind = 0; !ckh_iter(&bt2gctx, &tabind, NULL, &gctx.v);)
-		prof_dump_gctx_prep(tsd, gctx.p, &gctxs);
+		prof_dump_gctx_prep(tsd_tsdn(tsd), gctx.p, &gctxs);
 
 	/*
 	 * Iterate over tdatas, and for the non-expired ones snapshot their tctx
 	 * stats and merge them into the associated gctx's.
 	 */
-	prof_tdata_merge_iter_arg.tsd = tsd;
+	prof_tdata_merge_iter_arg.tsdn = tsd_tsdn(tsd);
 	memset(&prof_tdata_merge_iter_arg.cnt_all, 0, sizeof(prof_cnt_t));
-	malloc_mutex_lock(tsd, &tdatas_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
 	tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter,
 	    (void *)&prof_tdata_merge_iter_arg);
-	malloc_mutex_unlock(tsd, &tdatas_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
 
 	/* Merge tctx stats into gctx's. */
-	prof_gctx_merge_iter_arg.tsd = tsd;
+	prof_gctx_merge_iter_arg.tsdn = tsd_tsdn(tsd);
 	prof_gctx_merge_iter_arg.leak_ngctx = 0;
 	gctx_tree_iter(&gctxs, NULL, prof_gctx_merge_iter,
 	    (void *)&prof_gctx_merge_iter_arg);
@@ -1595,12 +1597,12 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck)
 		goto label_open_close_error;
 
 	/* Dump profile header. */
-	if (prof_dump_header(tsd, propagate_err,
+	if (prof_dump_header(tsd_tsdn(tsd), propagate_err,
 	    &prof_tdata_merge_iter_arg.cnt_all))
 		goto label_write_error;
 
 	/* Dump per gctx profile stats. */
-	prof_gctx_dump_iter_arg.tsd = tsd;
+	prof_gctx_dump_iter_arg.tsdn = tsd_tsdn(tsd);
 	prof_gctx_dump_iter_arg.propagate_err = propagate_err;
 	if (gctx_tree_iter(&gctxs, NULL, prof_gctx_dump_iter,
 	    (void *)&prof_gctx_dump_iter_arg) != NULL)
@@ -1614,7 +1616,7 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck)
 		goto label_open_close_error;
 
 	prof_gctx_finish(tsd, &gctxs);
-	malloc_mutex_unlock(tsd, &prof_dump_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
 
 	if (leakcheck) {
 		prof_leakcheck(&prof_tdata_merge_iter_arg.cnt_all,
@@ -1625,7 +1627,7 @@ label_write_error:
 	prof_dump_close(propagate_err);
 label_open_close_error:
 	prof_gctx_finish(tsd, &gctxs);
-	malloc_mutex_unlock(tsd, &prof_dump_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
 	return (true);
 }
 
@@ -1665,21 +1667,23 @@ prof_fdump(void)
 		return;
 	tsd = tsd_fetch();
 
-	malloc_mutex_lock(tsd, &prof_dump_seq_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
 	prof_dump_filename(filename, 'f', VSEQ_INVALID);
-	malloc_mutex_unlock(tsd, &prof_dump_seq_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
 	prof_dump(tsd, false, filename, opt_prof_leak);
 }
 
 void
-prof_idump(tsd_t *tsd)
+prof_idump(tsdn_t *tsdn)
 {
+	tsd_t *tsd;
 	prof_tdata_t *tdata;
 
 	cassert(config_prof);
 
-	if (!prof_booted || tsd == NULL)
+	if (!prof_booted || tsdn_null(tsdn))
 		return;
+	tsd = tsdn_tsd(tsdn);
 	tdata = prof_tdata_get(tsd, false);
 	if (tdata == NULL)
 		return;
@@ -1690,10 +1694,10 @@ prof_idump(tsd_t *tsd)
 
 	if (opt_prof_prefix[0] != '\0') {
 		char filename[PATH_MAX + 1];
-		malloc_mutex_lock(tsd, &prof_dump_seq_mtx);
+		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
 		prof_dump_filename(filename, 'i', prof_dump_iseq);
 		prof_dump_iseq++;
-		malloc_mutex_unlock(tsd, &prof_dump_seq_mtx);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
 		prof_dump(tsd, false, filename, false);
 	}
 }
@@ -1712,24 +1716,26 @@ prof_mdump(tsd_t *tsd, const char *filename)
 		/* No filename specified, so automatically generate one. */
 		if (opt_prof_prefix[0] == '\0')
 			return (true);
-		malloc_mutex_lock(tsd, &prof_dump_seq_mtx);
+		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
 		prof_dump_filename(filename_buf, 'm', prof_dump_mseq);
 		prof_dump_mseq++;
-		malloc_mutex_unlock(tsd, &prof_dump_seq_mtx);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
 		filename = filename_buf;
 	}
 	return (prof_dump(tsd, true, filename, false));
 }
 
 void
-prof_gdump(tsd_t *tsd)
+prof_gdump(tsdn_t *tsdn)
 {
+	tsd_t *tsd;
 	prof_tdata_t *tdata;
 
 	cassert(config_prof);
 
-	if (!prof_booted || tsd == NULL)
+	if (!prof_booted || tsdn_null(tsdn))
 		return;
+	tsd = tsdn_tsd(tsdn);
 	tdata = prof_tdata_get(tsd, false);
 	if (tdata == NULL)
 		return;
@@ -1740,10 +1746,10 @@ prof_gdump(tsd_t *tsd)
 
 	if (opt_prof_prefix[0] != '\0') {
 		char filename[DUMP_FILENAME_BUFSIZE];
-		malloc_mutex_lock(tsd, &prof_dump_seq_mtx);
+		malloc_mutex_lock(tsdn, &prof_dump_seq_mtx);
 		prof_dump_filename(filename, 'u', prof_dump_useq);
 		prof_dump_useq++;
-		malloc_mutex_unlock(tsd, &prof_dump_seq_mtx);
+		malloc_mutex_unlock(tsdn, &prof_dump_seq_mtx);
 		prof_dump(tsd, false, filename, false);
 	}
 }
@@ -1772,20 +1778,20 @@ prof_bt_keycomp(const void *k1, const void *k2)
 }
 
 JEMALLOC_INLINE_C uint64_t
-prof_thr_uid_alloc(tsd_t *tsd)
+prof_thr_uid_alloc(tsdn_t *tsdn)
 {
 	uint64_t thr_uid;
 
-	malloc_mutex_lock(tsd, &next_thr_uid_mtx);
+	malloc_mutex_lock(tsdn, &next_thr_uid_mtx);
 	thr_uid = next_thr_uid;
 	next_thr_uid++;
-	malloc_mutex_unlock(tsd, &next_thr_uid_mtx);
+	malloc_mutex_unlock(tsdn, &next_thr_uid_mtx);
 
 	return (thr_uid);
 }
 
 static prof_tdata_t *
-prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
+prof_tdata_init_impl(tsdn_t *tsdn, uint64_t thr_uid, uint64_t thr_discrim,
     char *thread_name, bool active)
 {
 	prof_tdata_t *tdata;
@@ -1793,9 +1799,9 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
 	cassert(config_prof);
 
 	/* Initialize an empty cache for this thread. */
-	tdata = (prof_tdata_t *)iallocztm(tsd, sizeof(prof_tdata_t),
-	    size2index(sizeof(prof_tdata_t)), false, NULL, true, arena_get(NULL,
-	    0, true), true);
+	tdata = (prof_tdata_t *)iallocztm(tsdn, sizeof(prof_tdata_t),
+	    size2index(sizeof(prof_tdata_t)), false, NULL, true,
+	    arena_get(TSDN_NULL, 0, true), true);
 	if (tdata == NULL)
 		return (NULL);
 
@@ -1807,9 +1813,9 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
 	tdata->expired = false;
 	tdata->tctx_uid_next = 0;
 
-	if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS,
+	if (ckh_new(tsdn, &tdata->bt2tctx, PROF_CKH_MINITEMS,
 	    prof_bt_hash, prof_bt_keycomp)) {
-		idalloctm(tsd, tdata, NULL, true, true);
+		idalloctm(tsdn, tdata, NULL, true, true);
 		return (NULL);
 	}
 
@@ -1823,24 +1829,23 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
 	tdata->dumping = false;
 	tdata->active = active;
 
-	malloc_mutex_lock(tsd, &tdatas_mtx);
+	malloc_mutex_lock(tsdn, &tdatas_mtx);
 	tdata_tree_insert(&tdatas, tdata);
-	malloc_mutex_unlock(tsd, &tdatas_mtx);
+	malloc_mutex_unlock(tsdn, &tdatas_mtx);
 
 	return (tdata);
 }
 
 prof_tdata_t *
-prof_tdata_init(tsd_t *tsd)
+prof_tdata_init(tsdn_t *tsdn)
 {
 
-	return (prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd), 0, NULL,
-	    prof_thread_active_init_get(tsd)));
+	return (prof_tdata_init_impl(tsdn, prof_thr_uid_alloc(tsdn), 0, NULL,
+	    prof_thread_active_init_get(tsdn)));
 }
 
 static bool
-prof_tdata_should_destroy_unlocked(tsd_t *tsd, prof_tdata_t *tdata,
-    bool even_if_attached)
+prof_tdata_should_destroy_unlocked(prof_tdata_t *tdata, bool even_if_attached)
 {
 
 	if (tdata->attached && !even_if_attached)
@@ -1851,43 +1856,41 @@ prof_tdata_should_destroy_unlocked(tsd_t *tsd, prof_tdata_t *tdata,
 }
 
 static bool
-prof_tdata_should_destroy(tsd_t *tsd, prof_tdata_t *tdata,
+prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
     bool even_if_attached)
 {
 
-	malloc_mutex_assert_owner(tsd, tdata->lock);
+	malloc_mutex_assert_owner(tsdn, tdata->lock);
 
-	return (prof_tdata_should_destroy_unlocked(tsd, tdata,
-	    even_if_attached));
+	return (prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
 }
 
 static void
-prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
+prof_tdata_destroy_locked(tsdn_t *tsdn, prof_tdata_t *tdata,
     bool even_if_attached)
 {
 
-	malloc_mutex_assert_owner(tsd, &tdatas_mtx);
+	malloc_mutex_assert_owner(tsdn, &tdatas_mtx);
 
-	assert(tsd_prof_tdata_get(tsd) != tdata);
+	assert(tsdn_null(tsdn) || tsd_prof_tdata_get(tsdn_tsd(tsdn)) != tdata);
 
 	tdata_tree_remove(&tdatas, tdata);
 
-	assert(prof_tdata_should_destroy_unlocked(tsd, tdata,
-	    even_if_attached));
+	assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
 
 	if (tdata->thread_name != NULL)
-		idalloctm(tsd, tdata->thread_name, NULL, true, true);
-	ckh_delete(tsd, &tdata->bt2tctx);
-	idalloctm(tsd, tdata, NULL, true, true);
+		idalloctm(tsdn, tdata->thread_name, NULL, true, true);
+	ckh_delete(tsdn, &tdata->bt2tctx);
+	idalloctm(tsdn, tdata, NULL, true, true);
 }
 
 static void
-prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached)
+prof_tdata_destroy(tsdn_t *tsdn, prof_tdata_t *tdata, bool even_if_attached)
 {
 
-	malloc_mutex_lock(tsd, &tdatas_mtx);
-	prof_tdata_destroy_locked(tsd, tdata, even_if_attached);
-	malloc_mutex_unlock(tsd, &tdatas_mtx);
+	malloc_mutex_lock(tsdn, &tdatas_mtx);
+	prof_tdata_destroy_locked(tsdn, tdata, even_if_attached);
+	malloc_mutex_unlock(tsdn, &tdatas_mtx);
 }
 
 static void
@@ -1895,9 +1898,10 @@ prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata)
 {
 	bool destroy_tdata;
 
-	malloc_mutex_lock(tsd, tdata->lock);
+	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
 	if (tdata->attached) {
-		destroy_tdata = prof_tdata_should_destroy(tsd, tdata, true);
+		destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata,
+		    true);
 		/*
 		 * Only detach if !destroy_tdata, because detaching would allow
 		 * another thread to win the race to destroy tdata.
@@ -1907,9 +1911,9 @@ prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata)
 		tsd_prof_tdata_set(tsd, NULL);
 	} else
 		destroy_tdata = false;
-	malloc_mutex_unlock(tsd, tdata->lock);
+	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
 	if (destroy_tdata)
-		prof_tdata_destroy(tsd, tdata, true);
+		prof_tdata_destroy(tsd_tsdn(tsd), tdata, true);
 }
 
 prof_tdata_t *
@@ -1918,27 +1922,27 @@ prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata)
 	uint64_t thr_uid = tdata->thr_uid;
 	uint64_t thr_discrim = tdata->thr_discrim + 1;
 	char *thread_name = (tdata->thread_name != NULL) ?
-	    prof_thread_name_alloc(tsd, tdata->thread_name) : NULL;
+	    prof_thread_name_alloc(tsd_tsdn(tsd), tdata->thread_name) : NULL;
 	bool active = tdata->active;
 
 	prof_tdata_detach(tsd, tdata);
-	return (prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name,
-	    active));
+	return (prof_tdata_init_impl(tsd_tsdn(tsd), thr_uid, thr_discrim,
+	    thread_name, active));
 }
 
 static bool
-prof_tdata_expire(tsd_t *tsd, prof_tdata_t *tdata)
+prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata)
 {
 	bool destroy_tdata;
 
-	malloc_mutex_lock(tsd, tdata->lock);
+	malloc_mutex_lock(tsdn, tdata->lock);
 	if (!tdata->expired) {
 		tdata->expired = true;
 		destroy_tdata = tdata->attached ? false :
-		    prof_tdata_should_destroy(tsd, tdata, false);
+		    prof_tdata_should_destroy(tsdn, tdata, false);
 	} else
 		destroy_tdata = false;
-	malloc_mutex_unlock(tsd, tdata->lock);
+	malloc_mutex_unlock(tsdn, tdata->lock);
 
 	return (destroy_tdata);
 }
@@ -1946,36 +1950,36 @@ prof_tdata_expire(tsd_t *tsd, prof_tdata_t *tdata)
 static prof_tdata_t *
 prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg)
 {
-	tsd_t *tsd = (tsd_t *)arg;
+	tsdn_t *tsdn = (tsdn_t *)arg;
 
-	return (prof_tdata_expire(tsd, tdata) ? tdata : NULL);
+	return (prof_tdata_expire(tsdn, tdata) ? tdata : NULL);
 }
 
 void
-prof_reset(tsd_t *tsd, size_t lg_sample)
+prof_reset(tsdn_t *tsdn, size_t lg_sample)
 {
 	prof_tdata_t *next;
 
 	assert(lg_sample < (sizeof(uint64_t) << 3));
 
-	malloc_mutex_lock(tsd, &prof_dump_mtx);
-	malloc_mutex_lock(tsd, &tdatas_mtx);
+	malloc_mutex_lock(tsdn, &prof_dump_mtx);
+	malloc_mutex_lock(tsdn, &tdatas_mtx);
 
 	lg_prof_sample = lg_sample;
 
 	next = NULL;
 	do {
 		prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next,
-		    prof_tdata_reset_iter, (void *)tsd);
+		    prof_tdata_reset_iter, (void *)tsdn);
 		if (to_destroy != NULL) {
 			next = tdata_tree_next(&tdatas, to_destroy);
-			prof_tdata_destroy_locked(tsd, to_destroy, false);
+			prof_tdata_destroy_locked(tsdn, to_destroy, false);
 		} else
 			next = NULL;
 	} while (next != NULL);
 
-	malloc_mutex_unlock(tsd, &tdatas_mtx);
-	malloc_mutex_unlock(tsd, &prof_dump_mtx);
+	malloc_mutex_unlock(tsdn, &tdatas_mtx);
+	malloc_mutex_unlock(tsdn, &prof_dump_mtx);
 }
 
 void
@@ -1992,25 +1996,25 @@ prof_tdata_cleanup(tsd_t *tsd)
 }
 
 bool
-prof_active_get(tsd_t *tsd)
+prof_active_get(tsdn_t *tsdn)
 {
 	bool prof_active_current;
 
-	malloc_mutex_lock(tsd, &prof_active_mtx);
+	malloc_mutex_lock(tsdn, &prof_active_mtx);
 	prof_active_current = prof_active;
-	malloc_mutex_unlock(tsd, &prof_active_mtx);
+	malloc_mutex_unlock(tsdn, &prof_active_mtx);
 	return (prof_active_current);
 }
 
 bool
-prof_active_set(tsd_t *tsd, bool active)
+prof_active_set(tsdn_t *tsdn, bool active)
 {
 	bool prof_active_old;
 
-	malloc_mutex_lock(tsd, &prof_active_mtx);
+	malloc_mutex_lock(tsdn, &prof_active_mtx);
 	prof_active_old = prof_active;
 	prof_active = active;
-	malloc_mutex_unlock(tsd, &prof_active_mtx);
+	malloc_mutex_unlock(tsdn, &prof_active_mtx);
 	return (prof_active_old);
 }
 
@@ -2026,7 +2030,7 @@ prof_thread_name_get(tsd_t *tsd)
 }
 
 static char *
-prof_thread_name_alloc(tsd_t *tsd, const char *thread_name)
+prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name)
 {
 	char *ret;
 	size_t size;
@@ -2038,8 +2042,8 @@ prof_thread_name_alloc(tsd_t *tsd, const char *thread_name)
 	if (size == 1)
 		return ("");
 
-	ret = iallocztm(tsd, size, size2index(size), false, NULL, true,
-	    arena_get(NULL, 0, true), true);
+	ret = iallocztm(tsdn, size, size2index(size), false, NULL, true,
+	    arena_get(TSDN_NULL, 0, true), true);
 	if (ret == NULL)
 		return (NULL);
 	memcpy(ret, thread_name, size);
@@ -2066,12 +2070,12 @@ prof_thread_name_set(tsd_t *tsd, const char *thread_name)
 			return (EFAULT);
 	}
 
-	s = prof_thread_name_alloc(tsd, thread_name);
+	s = prof_thread_name_alloc(tsd_tsdn(tsd), thread_name);
 	if (s == NULL)
 		return (EAGAIN);
 
 	if (tdata->thread_name != NULL) {
-		idalloctm(tsd, tdata->thread_name, NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, true, true);
 		tdata->thread_name = NULL;
 	}
 	if (strlen(s) > 0)
@@ -2103,48 +2107,48 @@ prof_thread_active_set(tsd_t *tsd, bool active)
 }
 
 bool
-prof_thread_active_init_get(tsd_t *tsd)
+prof_thread_active_init_get(tsdn_t *tsdn)
 {
 	bool active_init;
 
-	malloc_mutex_lock(tsd, &prof_thread_active_init_mtx);
+	malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx);
 	active_init = prof_thread_active_init;
-	malloc_mutex_unlock(tsd, &prof_thread_active_init_mtx);
+	malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx);
 	return (active_init);
 }
 
 bool
-prof_thread_active_init_set(tsd_t *tsd, bool active_init)
+prof_thread_active_init_set(tsdn_t *tsdn, bool active_init)
 {
 	bool active_init_old;
 
-	malloc_mutex_lock(tsd, &prof_thread_active_init_mtx);
+	malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx);
 	active_init_old = prof_thread_active_init;
 	prof_thread_active_init = active_init;
-	malloc_mutex_unlock(tsd, &prof_thread_active_init_mtx);
+	malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx);
 	return (active_init_old);
 }
 
 bool
-prof_gdump_get(tsd_t *tsd)
+prof_gdump_get(tsdn_t *tsdn)
 {
 	bool prof_gdump_current;
 
-	malloc_mutex_lock(tsd, &prof_gdump_mtx);
+	malloc_mutex_lock(tsdn, &prof_gdump_mtx);
 	prof_gdump_current = prof_gdump_val;
-	malloc_mutex_unlock(tsd, &prof_gdump_mtx);
+	malloc_mutex_unlock(tsdn, &prof_gdump_mtx);
 	return (prof_gdump_current);
 }
 
 bool
-prof_gdump_set(tsd_t *tsd, bool gdump)
+prof_gdump_set(tsdn_t *tsdn, bool gdump)
 {
 	bool prof_gdump_old;
 
-	malloc_mutex_lock(tsd, &prof_gdump_mtx);
+	malloc_mutex_lock(tsdn, &prof_gdump_mtx);
 	prof_gdump_old = prof_gdump_val;
 	prof_gdump_val = gdump;
-	malloc_mutex_unlock(tsd, &prof_gdump_mtx);
+	malloc_mutex_unlock(tsdn, &prof_gdump_mtx);
 	return (prof_gdump_old);
 }
 
@@ -2185,7 +2189,7 @@ prof_boot1(void)
 }
 
 bool
-prof_boot2(tsd_t *tsd)
+prof_boot2(tsdn_t *tsdn)
 {
 
 	cassert(config_prof);
@@ -2211,7 +2215,7 @@ prof_boot2(tsd_t *tsd)
 		    WITNESS_RANK_PROF_THREAD_ACTIVE_INIT))
 			return (true);
 
-		if (ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash,
+		if (ckh_new(tsdn, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash,
 		    prof_bt_keycomp))
 			return (true);
 		if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx",
@@ -2242,8 +2246,8 @@ prof_boot2(tsd_t *tsd)
 				abort();
 		}
 
-		gctx_locks = (malloc_mutex_t *)base_alloc(tsd, PROF_NCTX_LOCKS *
-		    sizeof(malloc_mutex_t));
+		gctx_locks = (malloc_mutex_t *)base_alloc(tsdn, PROF_NCTX_LOCKS
+		    * sizeof(malloc_mutex_t));
 		if (gctx_locks == NULL)
 			return (true);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
@@ -2252,7 +2256,7 @@ prof_boot2(tsd_t *tsd)
 				return (true);
 		}
 
-		tdata_locks = (malloc_mutex_t *)base_alloc(tsd,
+		tdata_locks = (malloc_mutex_t *)base_alloc(tsdn,
 		    PROF_NTDATA_LOCKS * sizeof(malloc_mutex_t));
 		if (tdata_locks == NULL)
 			return (true);
@@ -2277,76 +2281,77 @@ prof_boot2(tsd_t *tsd)
 }
 
 void
-prof_prefork0(tsd_t *tsd)
+prof_prefork0(tsdn_t *tsdn)
 {
 
 	if (opt_prof) {
 		unsigned i;
 
-		malloc_mutex_prefork(tsd, &prof_dump_mtx);
-		malloc_mutex_prefork(tsd, &bt2gctx_mtx);
-		malloc_mutex_prefork(tsd, &tdatas_mtx);
+		malloc_mutex_prefork(tsdn, &prof_dump_mtx);
+		malloc_mutex_prefork(tsdn, &bt2gctx_mtx);
+		malloc_mutex_prefork(tsdn, &tdatas_mtx);
 		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
-			malloc_mutex_prefork(tsd, &tdata_locks[i]);
+			malloc_mutex_prefork(tsdn, &tdata_locks[i]);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++)
-			malloc_mutex_prefork(tsd, &gctx_locks[i]);
+			malloc_mutex_prefork(tsdn, &gctx_locks[i]);
 	}
 }
 
 void
-prof_prefork1(tsd_t *tsd)
+prof_prefork1(tsdn_t *tsdn)
 {
 
 	if (opt_prof) {
-		malloc_mutex_prefork(tsd, &prof_active_mtx);
-		malloc_mutex_prefork(tsd, &prof_dump_seq_mtx);
-		malloc_mutex_prefork(tsd, &prof_gdump_mtx);
-		malloc_mutex_prefork(tsd, &next_thr_uid_mtx);
-		malloc_mutex_prefork(tsd, &prof_thread_active_init_mtx);
+		malloc_mutex_prefork(tsdn, &prof_active_mtx);
+		malloc_mutex_prefork(tsdn, &prof_dump_seq_mtx);
+		malloc_mutex_prefork(tsdn, &prof_gdump_mtx);
+		malloc_mutex_prefork(tsdn, &next_thr_uid_mtx);
+		malloc_mutex_prefork(tsdn, &prof_thread_active_init_mtx);
 	}
 }
 
 void
-prof_postfork_parent(tsd_t *tsd)
+prof_postfork_parent(tsdn_t *tsdn)
 {
 
 	if (opt_prof) {
 		unsigned i;
 
-		malloc_mutex_postfork_parent(tsd, &prof_thread_active_init_mtx);
-		malloc_mutex_postfork_parent(tsd, &next_thr_uid_mtx);
-		malloc_mutex_postfork_parent(tsd, &prof_gdump_mtx);
-		malloc_mutex_postfork_parent(tsd, &prof_dump_seq_mtx);
-		malloc_mutex_postfork_parent(tsd, &prof_active_mtx);
+		malloc_mutex_postfork_parent(tsdn,
+		    &prof_thread_active_init_mtx);
+		malloc_mutex_postfork_parent(tsdn, &next_thr_uid_mtx);
+		malloc_mutex_postfork_parent(tsdn, &prof_gdump_mtx);
+		malloc_mutex_postfork_parent(tsdn, &prof_dump_seq_mtx);
+		malloc_mutex_postfork_parent(tsdn, &prof_active_mtx);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++)
-			malloc_mutex_postfork_parent(tsd, &gctx_locks[i]);
+			malloc_mutex_postfork_parent(tsdn, &gctx_locks[i]);
 		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
-			malloc_mutex_postfork_parent(tsd, &tdata_locks[i]);
-		malloc_mutex_postfork_parent(tsd, &tdatas_mtx);
-		malloc_mutex_postfork_parent(tsd, &bt2gctx_mtx);
-		malloc_mutex_postfork_parent(tsd, &prof_dump_mtx);
+			malloc_mutex_postfork_parent(tsdn, &tdata_locks[i]);
+		malloc_mutex_postfork_parent(tsdn, &tdatas_mtx);
+		malloc_mutex_postfork_parent(tsdn, &bt2gctx_mtx);
+		malloc_mutex_postfork_parent(tsdn, &prof_dump_mtx);
 	}
 }
 
 void
-prof_postfork_child(tsd_t *tsd)
+prof_postfork_child(tsdn_t *tsdn)
 {
 
 	if (opt_prof) {
 		unsigned i;
 
-		malloc_mutex_postfork_child(tsd, &prof_thread_active_init_mtx);
-		malloc_mutex_postfork_child(tsd, &next_thr_uid_mtx);
-		malloc_mutex_postfork_child(tsd, &prof_gdump_mtx);
-		malloc_mutex_postfork_child(tsd, &prof_dump_seq_mtx);
-		malloc_mutex_postfork_child(tsd, &prof_active_mtx);
+		malloc_mutex_postfork_child(tsdn, &prof_thread_active_init_mtx);
+		malloc_mutex_postfork_child(tsdn, &next_thr_uid_mtx);
+		malloc_mutex_postfork_child(tsdn, &prof_gdump_mtx);
+		malloc_mutex_postfork_child(tsdn, &prof_dump_seq_mtx);
+		malloc_mutex_postfork_child(tsdn, &prof_active_mtx);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++)
-			malloc_mutex_postfork_child(tsd, &gctx_locks[i]);
+			malloc_mutex_postfork_child(tsdn, &gctx_locks[i]);
 		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
-			malloc_mutex_postfork_child(tsd, &tdata_locks[i]);
-		malloc_mutex_postfork_child(tsd, &tdatas_mtx);
-		malloc_mutex_postfork_child(tsd, &bt2gctx_mtx);
-		malloc_mutex_postfork_child(tsd, &prof_dump_mtx);
+			malloc_mutex_postfork_child(tsdn, &tdata_locks[i]);
+		malloc_mutex_postfork_child(tsdn, &tdatas_mtx);
+		malloc_mutex_postfork_child(tsdn, &bt2gctx_mtx);
+		malloc_mutex_postfork_child(tsdn, &prof_dump_mtx);
 	}
 }
 
diff --git a/src/quarantine.c b/src/quarantine.c
index ff1637ec..18903fb5 100644
--- a/src/quarantine.c
+++ b/src/quarantine.c
@@ -13,24 +13,22 @@
 /* Function prototypes for non-inline static functions. */
 
 static quarantine_t	*quarantine_grow(tsd_t *tsd, quarantine_t *quarantine);
-static void	quarantine_drain_one(tsd_t *tsd, quarantine_t *quarantine);
-static void	quarantine_drain(tsd_t *tsd, quarantine_t *quarantine,
+static void	quarantine_drain_one(tsdn_t *tsdn, quarantine_t *quarantine);
+static void	quarantine_drain(tsdn_t *tsdn, quarantine_t *quarantine,
     size_t upper_bound);
 
 /******************************************************************************/
 
 static quarantine_t *
-quarantine_init(tsd_t *tsd, size_t lg_maxobjs)
+quarantine_init(tsdn_t *tsdn, size_t lg_maxobjs)
 {
 	quarantine_t *quarantine;
 	size_t size;
 
-	assert(tsd_nominal(tsd));
-
 	size = offsetof(quarantine_t, objs) + ((ZU(1) << lg_maxobjs) *
 	    sizeof(quarantine_obj_t));
-	quarantine = (quarantine_t *)iallocztm(tsd, size, size2index(size),
-	    false, NULL, true, arena_get(NULL, 0, true), true);
+	quarantine = (quarantine_t *)iallocztm(tsdn, size, size2index(size),
+	    false, NULL, true, arena_get(TSDN_NULL, 0, true), true);
 	if (quarantine == NULL)
 		return (NULL);
 	quarantine->curbytes = 0;
@@ -49,7 +47,7 @@ quarantine_alloc_hook_work(tsd_t *tsd)
 	if (!tsd_nominal(tsd))
 		return;
 
-	quarantine = quarantine_init(tsd, LG_MAXOBJS_INIT);
+	quarantine = quarantine_init(tsd_tsdn(tsd), LG_MAXOBJS_INIT);
 	/*
 	 * Check again whether quarantine has been initialized, because
 	 * quarantine_init() may have triggered recursive initialization.
@@ -57,7 +55,7 @@ quarantine_alloc_hook_work(tsd_t *tsd)
 	if (tsd_quarantine_get(tsd) == NULL)
 		tsd_quarantine_set(tsd, quarantine);
 	else
-		idalloctm(tsd, quarantine, NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), quarantine, NULL, true, true);
 }
 
 static quarantine_t *
@@ -65,9 +63,9 @@ quarantine_grow(tsd_t *tsd, quarantine_t *quarantine)
 {
 	quarantine_t *ret;
 
-	ret = quarantine_init(tsd, quarantine->lg_maxobjs + 1);
+	ret = quarantine_init(tsd_tsdn(tsd), quarantine->lg_maxobjs + 1);
 	if (ret == NULL) {
-		quarantine_drain_one(tsd, quarantine);
+		quarantine_drain_one(tsd_tsdn(tsd), quarantine);
 		return (quarantine);
 	}
 
@@ -89,18 +87,18 @@ quarantine_grow(tsd_t *tsd, quarantine_t *quarantine)
 		memcpy(&ret->objs[ncopy_a], quarantine->objs, ncopy_b *
 		    sizeof(quarantine_obj_t));
 	}
-	idalloctm(tsd, quarantine, NULL, true, true);
+	idalloctm(tsd_tsdn(tsd), quarantine, NULL, true, true);
 
 	tsd_quarantine_set(tsd, ret);
 	return (ret);
 }
 
 static void
-quarantine_drain_one(tsd_t *tsd, quarantine_t *quarantine)
+quarantine_drain_one(tsdn_t *tsdn, quarantine_t *quarantine)
 {
 	quarantine_obj_t *obj = &quarantine->objs[quarantine->first];
-	assert(obj->usize == isalloc(tsd, obj->ptr, config_prof));
-	idalloctm(tsd, obj->ptr, NULL, false, true);
+	assert(obj->usize == isalloc(tsdn, obj->ptr, config_prof));
+	idalloctm(tsdn, obj->ptr, NULL, false, true);
 	quarantine->curbytes -= obj->usize;
 	quarantine->curobjs--;
 	quarantine->first = (quarantine->first + 1) & ((ZU(1) <<
@@ -108,24 +106,24 @@ quarantine_drain_one(tsd_t *tsd, quarantine_t *quarantine)
 }
 
 static void
-quarantine_drain(tsd_t *tsd, quarantine_t *quarantine, size_t upper_bound)
+quarantine_drain(tsdn_t *tsdn, quarantine_t *quarantine, size_t upper_bound)
 {
 
 	while (quarantine->curbytes > upper_bound && quarantine->curobjs > 0)
-		quarantine_drain_one(tsd, quarantine);
+		quarantine_drain_one(tsdn, quarantine);
 }
 
 void
 quarantine(tsd_t *tsd, void *ptr)
 {
 	quarantine_t *quarantine;
-	size_t usize = isalloc(tsd, ptr, config_prof);
+	size_t usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
 
 	cassert(config_fill);
 	assert(opt_quarantine);
 
 	if ((quarantine = tsd_quarantine_get(tsd)) == NULL) {
-		idalloctm(tsd, ptr, NULL, false, true);
+		idalloctm(tsd_tsdn(tsd), ptr, NULL, false, true);
 		return;
 	}
 	/*
@@ -135,7 +133,7 @@ quarantine(tsd_t *tsd, void *ptr)
 	if (quarantine->curbytes + usize > opt_quarantine) {
 		size_t upper_bound = (opt_quarantine >= usize) ? opt_quarantine
 		    - usize : 0;
-		quarantine_drain(tsd, quarantine, upper_bound);
+		quarantine_drain(tsd_tsdn(tsd), quarantine, upper_bound);
 	}
 	/* Grow the quarantine ring buffer if it's full. */
 	if (quarantine->curobjs == (ZU(1) << quarantine->lg_maxobjs))
@@ -164,7 +162,7 @@ quarantine(tsd_t *tsd, void *ptr)
 		}
 	} else {
 		assert(quarantine->curbytes == 0);
-		idalloctm(tsd, ptr, NULL, false, true);
+		idalloctm(tsd_tsdn(tsd), ptr, NULL, false, true);
 	}
 }
 
@@ -178,8 +176,8 @@ quarantine_cleanup(tsd_t *tsd)
 
 	quarantine = tsd_quarantine_get(tsd);
 	if (quarantine != NULL) {
-		quarantine_drain(tsd, quarantine, 0);
-		idalloctm(tsd, quarantine, NULL, true, true);
+		quarantine_drain(tsd_tsdn(tsd), quarantine, 0);
+		idalloctm(tsd_tsdn(tsd), quarantine, NULL, true, true);
 		tsd_quarantine_set(tsd, NULL);
 	}
 }
diff --git a/src/tcache.c b/src/tcache.c
index 88005f30..175759c7 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -24,10 +24,10 @@ static tcaches_t	*tcaches_avail;
 /******************************************************************************/
 
 size_t
-tcache_salloc(tsd_t *tsd, const void *ptr)
+tcache_salloc(tsdn_t *tsdn, const void *ptr)
 {
 
-	return (arena_salloc(tsd, ptr, false));
+	return (arena_salloc(tsdn, ptr, false));
 }
 
 void
@@ -71,12 +71,12 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache)
 }
 
 void *
-tcache_alloc_small_hard(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
+tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     tcache_bin_t *tbin, szind_t binind, bool *tcache_success)
 {
 	void *ret;
 
-	arena_tcache_fill_small(tsd, arena, tbin, binind, config_prof ?
+	arena_tcache_fill_small(tsdn, arena, tbin, binind, config_prof ?
 	    tcache->prof_accumbytes : 0);
 	if (config_prof)
 		tcache->prof_accumbytes = 0;
@@ -107,13 +107,13 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 		arena_bin_t *bin = &bin_arena->bins[binind];
 
 		if (config_prof && bin_arena == arena) {
-			if (arena_prof_accum(tsd, arena,
+			if (arena_prof_accum(tsd_tsdn(tsd), arena,
 			    tcache->prof_accumbytes))
-				prof_idump(tsd);
+				prof_idump(tsd_tsdn(tsd));
 			tcache->prof_accumbytes = 0;
 		}
 
-		malloc_mutex_lock(tsd, &bin->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 		if (config_stats && bin_arena == arena) {
 			assert(!merged_stats);
 			merged_stats = true;
@@ -131,8 +131,8 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 				    (uintptr_t)chunk) >> LG_PAGE;
 				arena_chunk_map_bits_t *bitselm =
 				    arena_bitselm_get_mutable(chunk, pageind);
-				arena_dalloc_bin_junked_locked(tsd, bin_arena,
-				    chunk, ptr, bitselm);
+				arena_dalloc_bin_junked_locked(tsd_tsdn(tsd),
+				    bin_arena, chunk, ptr, bitselm);
 			} else {
 				/*
 				 * This object was allocated via a different
@@ -144,8 +144,8 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 				ndeferred++;
 			}
 		}
-		malloc_mutex_unlock(tsd, &bin->lock);
-		arena_decay_ticks(tsd, bin_arena, nflush - ndeferred);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
+		arena_decay_ticks(tsd_tsdn(tsd), bin_arena, nflush - ndeferred);
 	}
 	if (config_stats && !merged_stats) {
 		/*
@@ -153,11 +153,11 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 		 * arena, so the stats didn't get merged.  Manually do so now.
 		 */
 		arena_bin_t *bin = &arena->bins[binind];
-		malloc_mutex_lock(tsd, &bin->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 		bin->stats.nflushes++;
 		bin->stats.nrequests += tbin->tstats.nrequests;
 		tbin->tstats.nrequests = 0;
-		malloc_mutex_unlock(tsd, &bin->lock);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 	}
 
 	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
@@ -190,7 +190,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 
 		if (config_prof)
 			idump = false;
-		malloc_mutex_lock(tsd, &locked_arena->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), &locked_arena->lock);
 		if ((config_prof || config_stats) && locked_arena == arena) {
 			if (config_prof) {
 				idump = arena_prof_accum_locked(arena,
@@ -213,7 +213,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 			if (extent_node_arena_get(&chunk->node) ==
 			    locked_arena) {
-				arena_dalloc_large_junked_locked(tsd,
+				arena_dalloc_large_junked_locked(tsd_tsdn(tsd),
 				    locked_arena, chunk, ptr);
 			} else {
 				/*
@@ -226,22 +226,23 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 				ndeferred++;
 			}
 		}
-		malloc_mutex_unlock(tsd, &locked_arena->lock);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &locked_arena->lock);
 		if (config_prof && idump)
-			prof_idump(tsd);
-		arena_decay_ticks(tsd, locked_arena, nflush - ndeferred);
+			prof_idump(tsd_tsdn(tsd));
+		arena_decay_ticks(tsd_tsdn(tsd), locked_arena, nflush -
+		    ndeferred);
 	}
 	if (config_stats && !merged_stats) {
 		/*
 		 * The flush loop didn't happen to flush to this thread's
 		 * arena, so the stats didn't get merged.  Manually do so now.
 		 */
-		malloc_mutex_lock(tsd, &arena->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), &arena->lock);
 		arena->stats.nrequests_large += tbin->tstats.nrequests;
 		arena->stats.lstats[binind - NBINS].nrequests +=
 		    tbin->tstats.nrequests;
 		tbin->tstats.nrequests = 0;
-		malloc_mutex_unlock(tsd, &arena->lock);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock);
 	}
 
 	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
@@ -251,35 +252,26 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 		tbin->low_water = tbin->ncached;
 }
 
-void
-tcache_arena_associate(tsd_t *tsd, tcache_t *tcache, arena_t *arena)
+static void
+tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
 {
 
 	if (config_stats) {
 		/* Link into list of extant tcaches. */
-		malloc_mutex_lock(tsd, &arena->lock);
+		malloc_mutex_lock(tsdn, &arena->lock);
 		ql_elm_new(tcache, link);
 		ql_tail_insert(&arena->tcache_ql, tcache, link);
-		malloc_mutex_unlock(tsd, &arena->lock);
+		malloc_mutex_unlock(tsdn, &arena->lock);
 	}
 }
 
-void
-tcache_arena_reassociate(tsd_t *tsd, tcache_t *tcache, arena_t *oldarena,
-    arena_t *newarena)
-{
-
-	tcache_arena_dissociate(tsd, tcache, oldarena);
-	tcache_arena_associate(tsd, tcache, newarena);
-}
-
-void
-tcache_arena_dissociate(tsd_t *tsd, tcache_t *tcache, arena_t *arena)
+static void
+tcache_arena_dissociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
 {
 
 	if (config_stats) {
 		/* Unlink from list of extant tcaches. */
-		malloc_mutex_lock(tsd, &arena->lock);
+		malloc_mutex_lock(tsdn, &arena->lock);
 		if (config_debug) {
 			bool in_ql = false;
 			tcache_t *iter;
@@ -292,11 +284,20 @@ tcache_arena_dissociate(tsd_t *tsd, tcache_t *tcache, arena_t *arena)
 			assert(in_ql);
 		}
 		ql_remove(&arena->tcache_ql, tcache, link);
-		tcache_stats_merge(tsd, tcache, arena);
-		malloc_mutex_unlock(tsd, &arena->lock);
+		tcache_stats_merge(tsdn, tcache, arena);
+		malloc_mutex_unlock(tsdn, &arena->lock);
 	}
 }
 
+void
+tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *oldarena,
+    arena_t *newarena)
+{
+
+	tcache_arena_dissociate(tsdn, tcache, oldarena);
+	tcache_arena_associate(tsdn, tcache, newarena);
+}
+
 tcache_t *
 tcache_get_hard(tsd_t *tsd)
 {
@@ -310,11 +311,11 @@ tcache_get_hard(tsd_t *tsd)
 	arena = arena_choose(tsd, NULL);
 	if (unlikely(arena == NULL))
 		return (NULL);
-	return (tcache_create(tsd, arena));
+	return (tcache_create(tsd_tsdn(tsd), arena));
 }
 
 tcache_t *
-tcache_create(tsd_t *tsd, arena_t *arena)
+tcache_create(tsdn_t *tsdn, arena_t *arena)
 {
 	tcache_t *tcache;
 	size_t size, stack_offset;
@@ -328,12 +329,12 @@ tcache_create(tsd_t *tsd, arena_t *arena)
 	/* Avoid false cacheline sharing. */
 	size = sa2u(size, CACHELINE);
 
-	tcache = ipallocztm(tsd, size, CACHELINE, true, NULL, true,
-	    arena_get(NULL, 0, true));
+	tcache = ipallocztm(tsdn, size, CACHELINE, true, NULL, true,
+	    arena_get(TSDN_NULL, 0, true));
 	if (tcache == NULL)
 		return (NULL);
 
-	tcache_arena_associate(tsd, tcache, arena);
+	tcache_arena_associate(tsdn, tcache, arena);
 
 	ticker_init(&tcache->gc_ticker, TCACHE_GC_INCR);
 
@@ -360,7 +361,7 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache)
 	unsigned i;
 
 	arena = arena_choose(tsd, NULL);
-	tcache_arena_dissociate(tsd, tcache, arena);
+	tcache_arena_dissociate(tsd_tsdn(tsd), tcache, arena);
 
 	for (i = 0; i < NBINS; i++) {
 		tcache_bin_t *tbin = &tcache->tbins[i];
@@ -368,9 +369,9 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache)
 
 		if (config_stats && tbin->tstats.nrequests != 0) {
 			arena_bin_t *bin = &arena->bins[i];
-			malloc_mutex_lock(tsd, &bin->lock);
+			malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 			bin->stats.nrequests += tbin->tstats.nrequests;
-			malloc_mutex_unlock(tsd, &bin->lock);
+			malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 		}
 	}
 
@@ -379,19 +380,19 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache)
 		tcache_bin_flush_large(tsd, tbin, i, 0, tcache);
 
 		if (config_stats && tbin->tstats.nrequests != 0) {
-			malloc_mutex_lock(tsd, &arena->lock);
+			malloc_mutex_lock(tsd_tsdn(tsd), &arena->lock);
 			arena->stats.nrequests_large += tbin->tstats.nrequests;
 			arena->stats.lstats[i - NBINS].nrequests +=
 			    tbin->tstats.nrequests;
-			malloc_mutex_unlock(tsd, &arena->lock);
+			malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock);
 		}
 	}
 
 	if (config_prof && tcache->prof_accumbytes > 0 &&
-	    arena_prof_accum(tsd, arena, tcache->prof_accumbytes))
-		prof_idump(tsd);
+	    arena_prof_accum(tsd_tsdn(tsd), arena, tcache->prof_accumbytes))
+		prof_idump(tsd_tsdn(tsd));
 
-	idalloctm(tsd, tcache, NULL, true, true);
+	idalloctm(tsd_tsdn(tsd), tcache, NULL, true, true);
 }
 
 void
@@ -416,21 +417,21 @@ tcache_enabled_cleanup(tsd_t *tsd)
 }
 
 void
-tcache_stats_merge(tsd_t *tsd, tcache_t *tcache, arena_t *arena)
+tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
 {
 	unsigned i;
 
 	cassert(config_stats);
 
-	malloc_mutex_assert_owner(tsd, &arena->lock);
+	malloc_mutex_assert_owner(tsdn, &arena->lock);
 
 	/* Merge and reset tcache stats. */
 	for (i = 0; i < NBINS; i++) {
 		arena_bin_t *bin = &arena->bins[i];
 		tcache_bin_t *tbin = &tcache->tbins[i];
-		malloc_mutex_lock(tsd, &bin->lock);
+		malloc_mutex_lock(tsdn, &bin->lock);
 		bin->stats.nrequests += tbin->tstats.nrequests;
-		malloc_mutex_unlock(tsd, &bin->lock);
+		malloc_mutex_unlock(tsdn, &bin->lock);
 		tbin->tstats.nrequests = 0;
 	}
 
@@ -444,14 +445,14 @@ tcache_stats_merge(tsd_t *tsd, tcache_t *tcache, arena_t *arena)
 }
 
 bool
-tcaches_create(tsd_t *tsd, unsigned *r_ind)
+tcaches_create(tsdn_t *tsdn, unsigned *r_ind)
 {
 	arena_t *arena;
 	tcache_t *tcache;
 	tcaches_t *elm;
 
 	if (tcaches == NULL) {
-		tcaches = base_alloc(tsd, sizeof(tcache_t *) *
+		tcaches = base_alloc(tsdn, sizeof(tcache_t *) *
 		    (MALLOCX_TCACHE_MAX+1));
 		if (tcaches == NULL)
 			return (true);
@@ -459,10 +460,10 @@ tcaches_create(tsd_t *tsd, unsigned *r_ind)
 
 	if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX)
 		return (true);
-	arena = arena_ichoose(tsd, NULL);
+	arena = arena_ichoose(tsdn, NULL);
 	if (unlikely(arena == NULL))
 		return (true);
-	tcache = tcache_create(tsd, arena);
+	tcache = tcache_create(tsdn, arena);
 	if (tcache == NULL)
 		return (true);
 
@@ -508,7 +509,7 @@ tcaches_destroy(tsd_t *tsd, unsigned ind)
 }
 
 bool
-tcache_boot(tsd_t *tsd)
+tcache_boot(tsdn_t *tsdn)
 {
 	unsigned i;
 
@@ -526,7 +527,7 @@ tcache_boot(tsd_t *tsd)
 	nhbins = size2index(tcache_maxclass) + 1;
 
 	/* Initialize tcache_bin_info. */
-	tcache_bin_info = (tcache_bin_info_t *)base_alloc(tsd, nhbins *
+	tcache_bin_info = (tcache_bin_info_t *)base_alloc(tsdn, nhbins *
 	    sizeof(tcache_bin_info_t));
 	if (tcache_bin_info == NULL)
 		return (true);
diff --git a/src/witness.c b/src/witness.c
index 31c36a24..f5176b6f 100644
--- a/src/witness.c
+++ b/src/witness.c
@@ -34,17 +34,19 @@ witness_lock_error_t *witness_lock_error = JEMALLOC_N(witness_lock_error_impl);
 #endif
 
 void
-witness_lock(tsd_t *tsd, witness_t *witness)
+witness_lock(tsdn_t *tsdn, witness_t *witness)
 {
+	tsd_t *tsd;
 	witness_list_t *witnesses;
 	witness_t *w;
 
-	if (tsd == NULL)
+	if (tsdn_null(tsdn))
 		return;
+	tsd = tsdn_tsd(tsdn);
 	if (witness->rank == WITNESS_RANK_OMIT)
 		return;
 
-	witness_assert_not_owner(tsd, witness);
+	witness_assert_not_owner(tsdn, witness);
 
 	witnesses = tsd_witnessesp_get(tsd);
 	w = ql_last(witnesses, link);
@@ -69,16 +71,18 @@ witness_lock(tsd_t *tsd, witness_t *witness)
 }
 
 void
-witness_unlock(tsd_t *tsd, witness_t *witness)
+witness_unlock(tsdn_t *tsdn, witness_t *witness)
 {
+	tsd_t *tsd;
 	witness_list_t *witnesses;
 
-	if (tsd == NULL)
+	if (tsdn_null(tsdn))
 		return;
+	tsd = tsdn_tsd(tsdn);
 	if (witness->rank == WITNESS_RANK_OMIT)
 		return;
 
-	witness_assert_owner(tsd, witness);
+	witness_assert_owner(tsdn, witness);
 
 	witnesses = tsd_witnessesp_get(tsd);
 	ql_remove(witnesses, witness, link);
@@ -104,13 +108,15 @@ witness_owner_error_t *witness_owner_error =
 #endif
 
 void
-witness_assert_owner(tsd_t *tsd, const witness_t *witness)
+witness_assert_owner(tsdn_t *tsdn, const witness_t *witness)
 {
+	tsd_t *tsd;
 	witness_list_t *witnesses;
 	witness_t *w;
 
-	if (tsd == NULL)
+	if (tsdn_null(tsdn))
 		return;
+	tsd = tsdn_tsd(tsdn);
 	if (witness->rank == WITNESS_RANK_OMIT)
 		return;
 
@@ -142,13 +148,15 @@ witness_not_owner_error_t *witness_not_owner_error =
 #endif
 
 void
-witness_assert_not_owner(tsd_t *tsd, const witness_t *witness)
+witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness)
 {
+	tsd_t *tsd;
 	witness_list_t *witnesses;
 	witness_t *w;
 
-	if (tsd == NULL)
+	if (tsdn_null(tsdn))
 		return;
+	tsd = tsdn_tsd(tsdn);
 	if (witness->rank == WITNESS_RANK_OMIT)
 		return;
 
@@ -183,13 +191,15 @@ witness_lockless_error_t *witness_lockless_error =
 #endif
 
 void
-witness_assert_lockless(tsd_t *tsd)
+witness_assert_lockless(tsdn_t *tsdn)
 {
+	tsd_t *tsd;
 	witness_list_t *witnesses;
 	witness_t *w;
 
-	if (tsd == NULL)
+	if (tsdn_null(tsdn))
 		return;
+	tsd = tsdn_tsd(tsdn);
 
 	witnesses = tsd_witnessesp_get(tsd);
 	w = ql_last(witnesses, link);
@@ -202,7 +212,7 @@ void
 witnesses_cleanup(tsd_t *tsd)
 {
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsd_tsdn(tsd));
 
 	/* Do nothing. */
 }
diff --git a/src/zone.c b/src/zone.c
index 8f25051a..2c17123a 100644
--- a/src/zone.c
+++ b/src/zone.c
@@ -56,7 +56,7 @@ zone_size(malloc_zone_t *zone, void *ptr)
 	 * not work in practice, we must check all pointers to assure that they
 	 * reside within a mapped chunk before determining size.
 	 */
-	return (ivsalloc(tsd_fetch(), ptr, config_prof));
+	return (ivsalloc(tsdn_fetch(), ptr, config_prof));
 }
 
 static void *
@@ -87,7 +87,7 @@ static void
 zone_free(malloc_zone_t *zone, void *ptr)
 {
 
-	if (ivsalloc(tsd_fetch(), ptr, config_prof) != 0) {
+	if (ivsalloc(tsdn_fetch(), ptr, config_prof) != 0) {
 		je_free(ptr);
 		return;
 	}
@@ -99,7 +99,7 @@ static void *
 zone_realloc(malloc_zone_t *zone, void *ptr, size_t size)
 {
 
-	if (ivsalloc(tsd_fetch(), ptr, config_prof) != 0)
+	if (ivsalloc(tsdn_fetch(), ptr, config_prof) != 0)
 		return (je_realloc(ptr, size));
 
 	return (realloc(ptr, size));
@@ -123,7 +123,7 @@ zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size)
 {
 	size_t alloc_size;
 
-	alloc_size = ivsalloc(tsd_fetch(), ptr, config_prof);
+	alloc_size = ivsalloc(tsdn_fetch(), ptr, config_prof);
 	if (alloc_size != 0) {
 		assert(alloc_size == size);
 		je_free(ptr);
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index 8e769de6..8ba36c21 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -86,7 +86,7 @@ TEST_BEGIN(test_arena_reset)
 	void **ptrs;
 	int flags;
 	size_t mib[3];
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 
 	test_skip_if((config_valgrind && unlikely(in_valgrind)) || (config_fill
 	    && unlikely(opt_quarantine)));
@@ -124,11 +124,11 @@ TEST_BEGIN(test_arena_reset)
 		    "Unexpected mallocx(%zu, %#x) failure", sz, flags);
 	}
 
-	tsd = tsd_fetch();
+	tsdn = tsdn_fetch();
 
 	/* Verify allocations. */
 	for (i = 0; i < nptrs; i++) {
-		assert_zu_gt(ivsalloc(tsd, ptrs[i], false), 0,
+		assert_zu_gt(ivsalloc(tsdn, ptrs[i], false), 0,
 		    "Allocation should have queryable size");
 	}
 
@@ -142,7 +142,7 @@ TEST_BEGIN(test_arena_reset)
 
 	/* Verify allocations no longer exist. */
 	for (i = 0; i < nptrs; i++) {
-		assert_zu_eq(ivsalloc(tsd, ptrs[i], false), 0,
+		assert_zu_eq(ivsalloc(tsdn, ptrs[i], false), 0,
 		    "Allocation should no longer exist");
 	}
 
diff --git a/test/unit/ckh.c b/test/unit/ckh.c
index b1175959..961e2acb 100644
--- a/test/unit/ckh.c
+++ b/test/unit/ckh.c
@@ -2,24 +2,24 @@
 
 TEST_BEGIN(test_new_delete)
 {
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 	ckh_t ckh;
 
-	tsd = tsd_fetch();
+	tsdn = tsdn_fetch();
 
-	assert_false(ckh_new(tsd, &ckh, 2, ckh_string_hash, ckh_string_keycomp),
-	    "Unexpected ckh_new() error");
-	ckh_delete(tsd, &ckh);
+	assert_false(ckh_new(tsdn, &ckh, 2, ckh_string_hash,
+	    ckh_string_keycomp), "Unexpected ckh_new() error");
+	ckh_delete(tsdn, &ckh);
 
-	assert_false(ckh_new(tsd, &ckh, 3, ckh_pointer_hash,
+	assert_false(ckh_new(tsdn, &ckh, 3, ckh_pointer_hash,
 	    ckh_pointer_keycomp), "Unexpected ckh_new() error");
-	ckh_delete(tsd, &ckh);
+	ckh_delete(tsdn, &ckh);
 }
 TEST_END
 
 TEST_BEGIN(test_count_insert_search_remove)
 {
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 	ckh_t ckh;
 	const char *strs[] = {
 	    "a string",
@@ -30,17 +30,17 @@ TEST_BEGIN(test_count_insert_search_remove)
 	const char *missing = "A string not in the hash table.";
 	size_t i;
 
-	tsd = tsd_fetch();
+	tsdn = tsdn_fetch();
 
-	assert_false(ckh_new(tsd, &ckh, 2, ckh_string_hash, ckh_string_keycomp),
-	    "Unexpected ckh_new() error");
+	assert_false(ckh_new(tsdn, &ckh, 2, ckh_string_hash,
+	    ckh_string_keycomp), "Unexpected ckh_new() error");
 	assert_zu_eq(ckh_count(&ckh), 0,
 	    "ckh_count() should return %zu, but it returned %zu", ZU(0),
 	    ckh_count(&ckh));
 
 	/* Insert. */
 	for (i = 0; i < sizeof(strs)/sizeof(const char *); i++) {
-		ckh_insert(tsd, &ckh, strs[i], strs[i]);
+		ckh_insert(tsdn, &ckh, strs[i], strs[i]);
 		assert_zu_eq(ckh_count(&ckh), i+1,
 		    "ckh_count() should return %zu, but it returned %zu", i+1,
 		    ckh_count(&ckh));
@@ -85,7 +85,7 @@ TEST_BEGIN(test_count_insert_search_remove)
 		vp = (i & 2) ? &v.p : NULL;
 		k.p = NULL;
 		v.p = NULL;
-		assert_false(ckh_remove(tsd, &ckh, strs[i], kp, vp),
+		assert_false(ckh_remove(tsdn, &ckh, strs[i], kp, vp),
 		    "Unexpected ckh_remove() error");
 
 		ks = (i & 1) ? strs[i] : (const char *)NULL;
@@ -101,22 +101,22 @@ TEST_BEGIN(test_count_insert_search_remove)
 		    ckh_count(&ckh));
 	}
 
-	ckh_delete(tsd, &ckh);
+	ckh_delete(tsdn, &ckh);
 }
 TEST_END
 
 TEST_BEGIN(test_insert_iter_remove)
 {
 #define	NITEMS ZU(1000)
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 	ckh_t ckh;
 	void **p[NITEMS];
 	void *q, *r;
 	size_t i;
 
-	tsd = tsd_fetch();
+	tsdn = tsdn_fetch();
 
-	assert_false(ckh_new(tsd, &ckh, 2, ckh_pointer_hash,
+	assert_false(ckh_new(tsdn, &ckh, 2, ckh_pointer_hash,
 	    ckh_pointer_keycomp), "Unexpected ckh_new() error");
 
 	for (i = 0; i < NITEMS; i++) {
@@ -128,7 +128,7 @@ TEST_BEGIN(test_insert_iter_remove)
 		size_t j;
 
 		for (j = i; j < NITEMS; j++) {
-			assert_false(ckh_insert(tsd, &ckh, p[j], p[j]),
+			assert_false(ckh_insert(tsdn, &ckh, p[j], p[j]),
 			    "Unexpected ckh_insert() failure");
 			assert_false(ckh_search(&ckh, p[j], &q, &r),
 			    "Unexpected ckh_search() failure");
@@ -143,13 +143,13 @@ TEST_BEGIN(test_insert_iter_remove)
 		for (j = i + 1; j < NITEMS; j++) {
 			assert_false(ckh_search(&ckh, p[j], NULL, NULL),
 			    "Unexpected ckh_search() failure");
-			assert_false(ckh_remove(tsd, &ckh, p[j], &q, &r),
+			assert_false(ckh_remove(tsdn, &ckh, p[j], &q, &r),
 			    "Unexpected ckh_remove() failure");
 			assert_ptr_eq(p[j], q, "Key pointer mismatch");
 			assert_ptr_eq(p[j], r, "Value pointer mismatch");
 			assert_true(ckh_search(&ckh, p[j], NULL, NULL),
 			    "Unexpected ckh_search() success");
-			assert_true(ckh_remove(tsd, &ckh, p[j], &q, &r),
+			assert_true(ckh_remove(tsdn, &ckh, p[j], &q, &r),
 			    "Unexpected ckh_remove() success");
 		}
 
@@ -184,13 +184,13 @@ TEST_BEGIN(test_insert_iter_remove)
 	for (i = 0; i < NITEMS; i++) {
 		assert_false(ckh_search(&ckh, p[i], NULL, NULL),
 		    "Unexpected ckh_search() failure");
-		assert_false(ckh_remove(tsd, &ckh, p[i], &q, &r),
+		assert_false(ckh_remove(tsdn, &ckh, p[i], &q, &r),
 		    "Unexpected ckh_remove() failure");
 		assert_ptr_eq(p[i], q, "Key pointer mismatch");
 		assert_ptr_eq(p[i], r, "Value pointer mismatch");
 		assert_true(ckh_search(&ckh, p[i], NULL, NULL),
 		    "Unexpected ckh_search() success");
-		assert_true(ckh_remove(tsd, &ckh, p[i], &q, &r),
+		assert_true(ckh_remove(tsdn, &ckh, p[i], &q, &r),
 		    "Unexpected ckh_remove() success");
 		dallocx(p[i], 0);
 	}
@@ -198,7 +198,7 @@ TEST_BEGIN(test_insert_iter_remove)
 	assert_zu_eq(ckh_count(&ckh), 0,
 	    "ckh_count() should return %zu, but it returned %zu",
 	    ZU(0), ckh_count(&ckh));
-	ckh_delete(tsd, &ckh);
+	ckh_delete(tsdn, &ckh);
 #undef NITEMS
 }
 TEST_END
diff --git a/test/unit/junk.c b/test/unit/junk.c
index 414874a0..acddc601 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -53,10 +53,10 @@ arena_dalloc_junk_large_intercept(void *ptr, size_t usize)
 }
 
 static void
-huge_dalloc_junk_intercept(tsd_t *tsd, void *ptr, size_t usize)
+huge_dalloc_junk_intercept(tsdn_t *tsdn, void *ptr, size_t usize)
 {
 
-	huge_dalloc_junk_orig(tsd, ptr, usize);
+	huge_dalloc_junk_orig(tsdn, ptr, usize);
 	/*
 	 * The conditions under which junk filling actually occurs are nuanced
 	 * enough that it doesn't make sense to duplicate the decision logic in
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
index 83f51df8..5ae45fd2 100644
--- a/test/unit/prof_reset.c
+++ b/test/unit/prof_reset.c
@@ -94,7 +94,7 @@ TEST_END
 bool prof_dump_header_intercepted = false;
 prof_cnt_t cnt_all_copy = {0, 0, 0, 0};
 static bool
-prof_dump_header_intercept(tsd_t *tsd, bool propagate_err,
+prof_dump_header_intercept(tsdn_t *tsdn, bool propagate_err,
     const prof_cnt_t *cnt_all)
 {
 
diff --git a/test/unit/witness.c b/test/unit/witness.c
index 430d8203..ed172753 100644
--- a/test/unit/witness.c
+++ b/test/unit/witness.c
@@ -60,76 +60,76 @@ witness_comp_reverse(const witness_t *a, const witness_t *b)
 TEST_BEGIN(test_witness)
 {
 	witness_t a, b;
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 
 	test_skip_if(!config_debug);
 
-	tsd = tsd_fetch();
+	tsdn = tsdn_fetch();
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 
 	witness_init(&a, "a", 1, NULL);
-	witness_assert_not_owner(tsd, &a);
-	witness_lock(tsd, &a);
-	witness_assert_owner(tsd, &a);
+	witness_assert_not_owner(tsdn, &a);
+	witness_lock(tsdn, &a);
+	witness_assert_owner(tsdn, &a);
 
 	witness_init(&b, "b", 2, NULL);
-	witness_assert_not_owner(tsd, &b);
-	witness_lock(tsd, &b);
-	witness_assert_owner(tsd, &b);
+	witness_assert_not_owner(tsdn, &b);
+	witness_lock(tsdn, &b);
+	witness_assert_owner(tsdn, &b);
 
-	witness_unlock(tsd, &a);
-	witness_unlock(tsd, &b);
+	witness_unlock(tsdn, &a);
+	witness_unlock(tsdn, &b);
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 }
 TEST_END
 
 TEST_BEGIN(test_witness_comp)
 {
 	witness_t a, b, c, d;
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 
 	test_skip_if(!config_debug);
 
-	tsd = tsd_fetch();
+	tsdn = tsdn_fetch();
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 
 	witness_init(&a, "a", 1, witness_comp);
-	witness_assert_not_owner(tsd, &a);
-	witness_lock(tsd, &a);
-	witness_assert_owner(tsd, &a);
+	witness_assert_not_owner(tsdn, &a);
+	witness_lock(tsdn, &a);
+	witness_assert_owner(tsdn, &a);
 
 	witness_init(&b, "b", 1, witness_comp);
-	witness_assert_not_owner(tsd, &b);
-	witness_lock(tsd, &b);
-	witness_assert_owner(tsd, &b);
-	witness_unlock(tsd, &b);
+	witness_assert_not_owner(tsdn, &b);
+	witness_lock(tsdn, &b);
+	witness_assert_owner(tsdn, &b);
+	witness_unlock(tsdn, &b);
 
 	witness_lock_error_orig = witness_lock_error;
 	witness_lock_error = witness_lock_error_intercept;
 	saw_lock_error = false;
 
 	witness_init(&c, "c", 1, witness_comp_reverse);
-	witness_assert_not_owner(tsd, &c);
+	witness_assert_not_owner(tsdn, &c);
 	assert_false(saw_lock_error, "Unexpected witness lock error");
-	witness_lock(tsd, &c);
+	witness_lock(tsdn, &c);
 	assert_true(saw_lock_error, "Expected witness lock error");
-	witness_unlock(tsd, &c);
+	witness_unlock(tsdn, &c);
 
 	saw_lock_error = false;
 
 	witness_init(&d, "d", 1, NULL);
-	witness_assert_not_owner(tsd, &d);
+	witness_assert_not_owner(tsdn, &d);
 	assert_false(saw_lock_error, "Unexpected witness lock error");
-	witness_lock(tsd, &d);
+	witness_lock(tsdn, &d);
 	assert_true(saw_lock_error, "Expected witness lock error");
-	witness_unlock(tsd, &d);
+	witness_unlock(tsdn, &d);
 
-	witness_unlock(tsd, &a);
+	witness_unlock(tsdn, &a);
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 
 	witness_lock_error = witness_lock_error_orig;
 }
@@ -138,7 +138,7 @@ TEST_END
 TEST_BEGIN(test_witness_reversal)
 {
 	witness_t a, b;
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 
 	test_skip_if(!config_debug);
 
@@ -146,22 +146,22 @@ TEST_BEGIN(test_witness_reversal)
 	witness_lock_error = witness_lock_error_intercept;
 	saw_lock_error = false;
 
-	tsd = tsd_fetch();
+	tsdn = tsdn_fetch();
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 
 	witness_init(&a, "a", 1, NULL);
 	witness_init(&b, "b", 2, NULL);
 
-	witness_lock(tsd, &b);
+	witness_lock(tsdn, &b);
 	assert_false(saw_lock_error, "Unexpected witness lock error");
-	witness_lock(tsd, &a);
+	witness_lock(tsdn, &a);
 	assert_true(saw_lock_error, "Expected witness lock error");
 
-	witness_unlock(tsd, &a);
-	witness_unlock(tsd, &b);
+	witness_unlock(tsdn, &a);
+	witness_unlock(tsdn, &b);
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 
 	witness_lock_error = witness_lock_error_orig;
 }
@@ -170,7 +170,7 @@ TEST_END
 TEST_BEGIN(test_witness_recursive)
 {
 	witness_t a;
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 
 	test_skip_if(!config_debug);
 
@@ -182,22 +182,22 @@ TEST_BEGIN(test_witness_recursive)
 	witness_lock_error = witness_lock_error_intercept;
 	saw_lock_error = false;
 
-	tsd = tsd_fetch();
+	tsdn = tsdn_fetch();
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 
 	witness_init(&a, "a", 1, NULL);
 
-	witness_lock(tsd, &a);
+	witness_lock(tsdn, &a);
 	assert_false(saw_lock_error, "Unexpected witness lock error");
 	assert_false(saw_not_owner_error, "Unexpected witness not owner error");
-	witness_lock(tsd, &a);
+	witness_lock(tsdn, &a);
 	assert_true(saw_lock_error, "Expected witness lock error");
 	assert_true(saw_not_owner_error, "Expected witness not owner error");
 
-	witness_unlock(tsd, &a);
+	witness_unlock(tsdn, &a);
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 
 	witness_owner_error = witness_owner_error_orig;
 	witness_lock_error = witness_lock_error_orig;
@@ -208,7 +208,7 @@ TEST_END
 TEST_BEGIN(test_witness_unlock_not_owned)
 {
 	witness_t a;
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 
 	test_skip_if(!config_debug);
 
@@ -216,17 +216,17 @@ TEST_BEGIN(test_witness_unlock_not_owned)
 	witness_owner_error = witness_owner_error_intercept;
 	saw_owner_error = false;
 
-	tsd = tsd_fetch();
+	tsdn = tsdn_fetch();
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 
 	witness_init(&a, "a", 1, NULL);
 
 	assert_false(saw_owner_error, "Unexpected owner error");
-	witness_unlock(tsd, &a);
+	witness_unlock(tsdn, &a);
 	assert_true(saw_owner_error, "Expected owner error");
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 
 	witness_owner_error = witness_owner_error_orig;
 }
@@ -235,7 +235,7 @@ TEST_END
 TEST_BEGIN(test_witness_lockful)
 {
 	witness_t a;
-	tsd_t *tsd;
+	tsdn_t *tsdn;
 
 	test_skip_if(!config_debug);
 
@@ -243,22 +243,22 @@ TEST_BEGIN(test_witness_lockful)
 	witness_lockless_error = witness_lockless_error_intercept;
 	saw_lockless_error = false;
 
-	tsd = tsd_fetch();
+	tsdn = tsdn_fetch();
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 
 	witness_init(&a, "a", 1, NULL);
 
 	assert_false(saw_lockless_error, "Unexpected lockless error");
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 
-	witness_lock(tsd, &a);
-	witness_assert_lockless(tsd);
+	witness_lock(tsdn, &a);
+	witness_assert_lockless(tsdn);
 	assert_true(saw_lockless_error, "Expected lockless error");
 
-	witness_unlock(tsd, &a);
+	witness_unlock(tsdn, &a);
 
-	witness_assert_lockless(tsd);
+	witness_assert_lockless(tsdn);
 
 	witness_lockless_error = witness_lockless_error_orig;
 }

From 3a9ec676267cf215ed2591a1060f870daced2472 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 11 May 2016 00:52:16 -0700
Subject: [PATCH 0241/2608] Disable junk filling for tests that could otherwise
 easily OOM.

---
 test/integration/mallocx.c | 4 ++++
 test/integration/xallocx.c | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index 578c229a..55e1a090 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -1,5 +1,9 @@
 #include "test/jemalloc_test.h"
 
+#ifdef JEMALLOC_FILL
+const char *malloc_conf = "junk:false";
+#endif
+
 static unsigned
 get_nsizes_impl(const char *cmd)
 {
diff --git a/test/integration/xallocx.c b/test/integration/xallocx.c
index 5c4998b6..ad292bb5 100644
--- a/test/integration/xallocx.c
+++ b/test/integration/xallocx.c
@@ -1,5 +1,9 @@
 #include "test/jemalloc_test.h"
 
+#ifdef JEMALLOC_FILL
+const char *malloc_conf = "junk:false";
+#endif
+
 /*
  * Use a separate arena for xallocx() extension/contraction tests so that
  * internal allocation e.g. by heap profiling can't interpose allocations where

From 7790a0ba403b02bcb8804534c8120d605b4dc5f4 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 11 May 2016 00:52:59 -0700
Subject: [PATCH 0242/2608] Fix chunk accounting related to triggering gdump
 profiles.

Fix in place huge reallocation to update the chunk counters that are
used for triggering gdump profiles.
---
 ChangeLog  |  1 +
 src/huge.c | 15 +++++++++++++++
 2 files changed, 16 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index 68dedfa0..a9390947 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -23,6 +23,7 @@ brevity.  Much more detail can be found in the git revision history:
     to avoid unfortunate interactions during fork(2).  (@jasone)
 
   Bug fixes:
+  - Fix chunk accounting related to triggering gdump profiles.  (@jasone)
   - Link against librt for clock_gettime(2) if glibc < 2.17.  (@jasone)
   - Scale leak report summary according to sampling probability.  (@jasone)
 
diff --git a/src/huge.c b/src/huge.c
index 0bf61622..ba083684 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -23,6 +23,15 @@ huge_node_set(tsdn_t *tsdn, const void *ptr, extent_node_t *node)
 	return (chunk_register(tsdn, ptr, node));
 }
 
+static void
+huge_node_reset(tsdn_t *tsdn, const void *ptr, extent_node_t *node)
+{
+	bool err;
+
+	err = huge_node_set(tsdn, ptr, node);
+	assert(!err);
+}
+
 static void
 huge_node_unset(const void *ptr, const extent_node_t *node)
 {
@@ -162,8 +171,10 @@ huge_ralloc_no_move_similar(tsdn_t *tsdn, void *ptr, size_t oldsize,
 
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	/* Update the size of the huge allocation. */
+	huge_node_unset(ptr, node);
 	assert(extent_node_size_get(node) != usize);
 	extent_node_size_set(node, usize);
+	huge_node_reset(tsdn, ptr, node);
 	/* Update zeroed. */
 	extent_node_zeroed_set(node, post_zeroed);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
@@ -224,7 +235,9 @@ huge_ralloc_no_move_shrink(tsdn_t *tsdn, void *ptr, size_t oldsize,
 
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	/* Update the size of the huge allocation. */
+	huge_node_unset(ptr, node);
 	extent_node_size_set(node, usize);
+	huge_node_reset(tsdn, ptr, node);
 	/* Update zeroed. */
 	extent_node_zeroed_set(node, post_zeroed);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
@@ -260,7 +273,9 @@ huge_ralloc_no_move_expand(tsdn_t *tsdn, void *ptr, size_t oldsize,
 
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	/* Update the size of the huge allocation. */
+	huge_node_unset(ptr, node);
 	extent_node_size_set(node, usize);
+	huge_node_reset(tsdn, ptr, node);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	if (zero || (config_fill && unlikely(opt_zero))) {

From 73d3d58dc234315214c0d73d6badd4fdc53cbeff Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 11 May 2016 15:33:28 -0700
Subject: [PATCH 0243/2608] Optimize witness fast path.

Short-circuit commonly called witness functions so that they only
execute in debug builds, and remove equivalent guards from mutex
functions.  This avoids pointless code execution in
witness_assert_lockless(), which is typically called twice per
allocation/deallocation function invocation.

Inline commonly called witness functions so that optimized builds can
completely remove calls as dead code.
---
 .../jemalloc/internal/jemalloc_internal.h.in  |   2 +-
 include/jemalloc/internal/mutex.h             |  13 +-
 include/jemalloc/internal/witness.h           | 152 +++++++++++++++++-
 src/witness.c                                 | 122 +-------------
 4 files changed, 157 insertions(+), 132 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 69d94ec5..51bf8974 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -531,9 +531,9 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/smoothstep.h"
 #include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/tsd.h"
 #include "jemalloc/internal/witness.h"
 #include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/tsd.h"
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/base.h"
diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index 00f0b91c..52217991 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -81,8 +81,7 @@ malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
 
 	if (isthreaded) {
-		if (config_debug)
-			witness_assert_not_owner(tsdn, &mutex->witness);
+		witness_assert_not_owner(tsdn, &mutex->witness);
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 		AcquireSRWLockExclusive(&mutex->lock);
@@ -94,8 +93,7 @@ malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex)
 #else
 		pthread_mutex_lock(&mutex->lock);
 #endif
-		if (config_debug)
-			witness_lock(tsdn, &mutex->witness);
+		witness_lock(tsdn, &mutex->witness);
 	}
 }
 
@@ -104,8 +102,7 @@ malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
 
 	if (isthreaded) {
-		if (config_debug)
-			witness_unlock(tsdn, &mutex->witness);
+		witness_unlock(tsdn, &mutex->witness);
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 		ReleaseSRWLockExclusive(&mutex->lock);
@@ -124,7 +121,7 @@ JEMALLOC_INLINE void
 malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
 
-	if (isthreaded && config_debug)
+	if (isthreaded)
 		witness_assert_owner(tsdn, &mutex->witness);
 }
 
@@ -132,7 +129,7 @@ JEMALLOC_INLINE void
 malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
 
-	if (isthreaded && config_debug)
+	if (isthreaded)
 		witness_assert_not_owner(tsdn, &mutex->witness);
 }
 #endif
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 4d312eab..d78dca2d 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -74,24 +74,28 @@ void	witness_init(witness_t *witness, const char *name, witness_rank_t rank,
 #ifdef JEMALLOC_JET
 typedef void (witness_lock_error_t)(const witness_list_t *, const witness_t *);
 extern witness_lock_error_t *witness_lock_error;
+#else
+void	witness_lock_error(const witness_list_t *witnesses,
+    const witness_t *witness);
 #endif
-void	witness_lock(tsdn_t *tsdn, witness_t *witness);
-void	witness_unlock(tsdn_t *tsdn, witness_t *witness);
 #ifdef JEMALLOC_JET
 typedef void (witness_owner_error_t)(const witness_t *);
 extern witness_owner_error_t *witness_owner_error;
+#else
+void	witness_owner_error(const witness_t *witness);
 #endif
-void	witness_assert_owner(tsdn_t *tsdn, const witness_t *witness);
 #ifdef JEMALLOC_JET
 typedef void (witness_not_owner_error_t)(const witness_t *);
 extern witness_not_owner_error_t *witness_not_owner_error;
+#else
+void	witness_not_owner_error(const witness_t *witness);
 #endif
-void	witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness);
 #ifdef JEMALLOC_JET
 typedef void (witness_lockless_error_t)(const witness_list_t *);
 extern witness_lockless_error_t *witness_lockless_error;
+#else
+void	witness_lockless_error(const witness_list_t *witnesses);
 #endif
-void	witness_assert_lockless(tsdn_t *tsdn);
 
 void	witnesses_cleanup(tsd_t *tsd);
 void	witness_fork_cleanup(tsd_t *tsd);
@@ -103,5 +107,143 @@ void	witness_postfork_child(tsd_t *tsd);
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
+#ifndef JEMALLOC_ENABLE_INLINE
+void	witness_assert_owner(tsdn_t *tsdn, const witness_t *witness);
+void	witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness);
+void	witness_assert_lockless(tsdn_t *tsdn);
+void	witness_lock(tsdn_t *tsdn, witness_t *witness);
+void	witness_unlock(tsdn_t *tsdn, witness_t *witness);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_))
+JEMALLOC_INLINE void
+witness_assert_owner(tsdn_t *tsdn, const witness_t *witness)
+{
+	tsd_t *tsd;
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	if (!config_debug)
+		return;
+
+	if (tsdn_null(tsdn))
+		return;
+	tsd = tsdn_tsd(tsdn);
+	if (witness->rank == WITNESS_RANK_OMIT)
+		return;
+
+	witnesses = tsd_witnessesp_get(tsd);
+	ql_foreach(w, witnesses, link) {
+		if (w == witness)
+			return;
+	}
+	witness_owner_error(witness);
+}
+
+JEMALLOC_INLINE void
+witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness)
+{
+	tsd_t *tsd;
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	if (!config_debug)
+		return;
+
+	if (tsdn_null(tsdn))
+		return;
+	tsd = tsdn_tsd(tsdn);
+	if (witness->rank == WITNESS_RANK_OMIT)
+		return;
+
+	witnesses = tsd_witnessesp_get(tsd);
+	ql_foreach(w, witnesses, link) {
+		if (w == witness)
+			witness_not_owner_error(witness);
+	}
+}
+
+JEMALLOC_INLINE void
+witness_assert_lockless(tsdn_t *tsdn)
+{
+	tsd_t *tsd;
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	if (!config_debug)
+		return;
+
+	if (tsdn_null(tsdn))
+		return;
+	tsd = tsdn_tsd(tsdn);
+
+	witnesses = tsd_witnessesp_get(tsd);
+	w = ql_last(witnesses, link);
+	if (w != NULL)
+		witness_lockless_error(witnesses);
+}
+
+JEMALLOC_INLINE void
+witness_lock(tsdn_t *tsdn, witness_t *witness)
+{
+	tsd_t *tsd;
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	if (!config_debug)
+		return;
+
+	if (tsdn_null(tsdn))
+		return;
+	tsd = tsdn_tsd(tsdn);
+	if (witness->rank == WITNESS_RANK_OMIT)
+		return;
+
+	witness_assert_not_owner(tsdn, witness);
+
+	witnesses = tsd_witnessesp_get(tsd);
+	w = ql_last(witnesses, link);
+	if (w == NULL) {
+		/* No other locks; do nothing. */
+	} else if (tsd_witness_fork_get(tsd) && w->rank <= witness->rank) {
+		/* Forking, and relaxed ranking satisfied. */
+	} else if (w->rank > witness->rank) {
+		/* Not forking, rank order reversal. */
+		witness_lock_error(witnesses, witness);
+	} else if (w->rank == witness->rank && (w->comp == NULL || w->comp !=
+	    witness->comp || w->comp(w, witness) > 0)) {
+		/*
+		 * Missing/incompatible comparison function, or comparison
+		 * function indicates rank order reversal.
+		 */
+		witness_lock_error(witnesses, witness);
+	}
+
+	ql_elm_new(witness, link);
+	ql_tail_insert(witnesses, witness, link);
+}
+
+JEMALLOC_INLINE void
+witness_unlock(tsdn_t *tsdn, witness_t *witness)
+{
+	tsd_t *tsd;
+	witness_list_t *witnesses;
+
+	if (!config_debug)
+		return;
+
+	if (tsdn_null(tsdn))
+		return;
+	tsd = tsdn_tsd(tsdn);
+	if (witness->rank == WITNESS_RANK_OMIT)
+		return;
+
+	witness_assert_owner(tsdn, witness);
+
+	witnesses = tsd_witnessesp_get(tsd);
+	ql_remove(witnesses, witness, link);
+}
+#endif
+
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
diff --git a/src/witness.c b/src/witness.c
index f5176b6f..24312031 100644
--- a/src/witness.c
+++ b/src/witness.c
@@ -15,7 +15,7 @@ witness_init(witness_t *witness, const char *name, witness_rank_t rank,
 #undef witness_lock_error
 #define	witness_lock_error JEMALLOC_N(witness_lock_error_impl)
 #endif
-static void
+void
 witness_lock_error(const witness_list_t *witnesses, const witness_t *witness)
 {
 	witness_t *w;
@@ -33,66 +33,11 @@ witness_lock_error(const witness_list_t *witnesses, const witness_t *witness)
 witness_lock_error_t *witness_lock_error = JEMALLOC_N(witness_lock_error_impl);
 #endif
 
-void
-witness_lock(tsdn_t *tsdn, witness_t *witness)
-{
-	tsd_t *tsd;
-	witness_list_t *witnesses;
-	witness_t *w;
-
-	if (tsdn_null(tsdn))
-		return;
-	tsd = tsdn_tsd(tsdn);
-	if (witness->rank == WITNESS_RANK_OMIT)
-		return;
-
-	witness_assert_not_owner(tsdn, witness);
-
-	witnesses = tsd_witnessesp_get(tsd);
-	w = ql_last(witnesses, link);
-	if (w == NULL) {
-		/* No other locks; do nothing. */
-	} else if (tsd_witness_fork_get(tsd) && w->rank <= witness->rank) {
-		/* Forking, and relaxed ranking satisfied. */
-	} else if (w->rank > witness->rank) {
-		/* Not forking, rank order reversal. */
-		witness_lock_error(witnesses, witness);
-	} else if (w->rank == witness->rank && (w->comp == NULL || w->comp !=
-	    witness->comp || w->comp(w, witness) > 0)) {
-		/*
-		 * Missing/incompatible comparison function, or comparison
-		 * function indicates rank order reversal.
-		 */
-		witness_lock_error(witnesses, witness);
-	}
-
-	ql_elm_new(witness, link);
-	ql_tail_insert(witnesses, witness, link);
-}
-
-void
-witness_unlock(tsdn_t *tsdn, witness_t *witness)
-{
-	tsd_t *tsd;
-	witness_list_t *witnesses;
-
-	if (tsdn_null(tsdn))
-		return;
-	tsd = tsdn_tsd(tsdn);
-	if (witness->rank == WITNESS_RANK_OMIT)
-		return;
-
-	witness_assert_owner(tsdn, witness);
-
-	witnesses = tsd_witnessesp_get(tsd);
-	ql_remove(witnesses, witness, link);
-}
-
 #ifdef JEMALLOC_JET
 #undef witness_owner_error
 #define	witness_owner_error JEMALLOC_N(witness_owner_error_impl)
 #endif
-static void
+void
 witness_owner_error(const witness_t *witness)
 {
 
@@ -107,32 +52,11 @@ witness_owner_error_t *witness_owner_error =
     JEMALLOC_N(witness_owner_error_impl);
 #endif
 
-void
-witness_assert_owner(tsdn_t *tsdn, const witness_t *witness)
-{
-	tsd_t *tsd;
-	witness_list_t *witnesses;
-	witness_t *w;
-
-	if (tsdn_null(tsdn))
-		return;
-	tsd = tsdn_tsd(tsdn);
-	if (witness->rank == WITNESS_RANK_OMIT)
-		return;
-
-	witnesses = tsd_witnessesp_get(tsd);
-	ql_foreach(w, witnesses, link) {
-		if (w == witness)
-			return;
-	}
-	witness_owner_error(witness);
-}
-
 #ifdef JEMALLOC_JET
 #undef witness_not_owner_error
 #define	witness_not_owner_error JEMALLOC_N(witness_not_owner_error_impl)
 #endif
-static void
+void
 witness_not_owner_error(const witness_t *witness)
 {
 
@@ -147,31 +71,11 @@ witness_not_owner_error_t *witness_not_owner_error =
     JEMALLOC_N(witness_not_owner_error_impl);
 #endif
 
-void
-witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness)
-{
-	tsd_t *tsd;
-	witness_list_t *witnesses;
-	witness_t *w;
-
-	if (tsdn_null(tsdn))
-		return;
-	tsd = tsdn_tsd(tsdn);
-	if (witness->rank == WITNESS_RANK_OMIT)
-		return;
-
-	witnesses = tsd_witnessesp_get(tsd);
-	ql_foreach(w, witnesses, link) {
-		if (w == witness)
-			witness_not_owner_error(witness);
-	}
-}
-
 #ifdef JEMALLOC_JET
 #undef witness_lockless_error
 #define	witness_lockless_error JEMALLOC_N(witness_lockless_error_impl)
 #endif
-static void
+void
 witness_lockless_error(const witness_list_t *witnesses)
 {
 	witness_t *w;
@@ -190,24 +94,6 @@ witness_lockless_error_t *witness_lockless_error =
     JEMALLOC_N(witness_lockless_error_impl);
 #endif
 
-void
-witness_assert_lockless(tsdn_t *tsdn)
-{
-	tsd_t *tsd;
-	witness_list_t *witnesses;
-	witness_t *w;
-
-	if (tsdn_null(tsdn))
-		return;
-	tsd = tsdn_tsd(tsdn);
-
-	witnesses = tsd_witnessesp_get(tsd);
-	w = ql_last(witnesses, link);
-	if (w != NULL) {
-		witness_lockless_error(witnesses);
-	}
-}
-
 void
 witnesses_cleanup(tsd_t *tsd)
 {

From 0fc1317fc6989e1090c5bcf1713b6a7482110ccc Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 11 May 2016 16:14:20 -0700
Subject: [PATCH 0244/2608] Mangle tested functions as n_witness_* rather than
 witness_*_impl.

---
 src/witness.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/witness.c b/src/witness.c
index 24312031..23753f24 100644
--- a/src/witness.c
+++ b/src/witness.c
@@ -13,7 +13,7 @@ witness_init(witness_t *witness, const char *name, witness_rank_t rank,
 
 #ifdef JEMALLOC_JET
 #undef witness_lock_error
-#define	witness_lock_error JEMALLOC_N(witness_lock_error_impl)
+#define	witness_lock_error JEMALLOC_N(n_witness_lock_error)
 #endif
 void
 witness_lock_error(const witness_list_t *witnesses, const witness_t *witness)
@@ -30,12 +30,12 @@ witness_lock_error(const witness_list_t *witnesses, const witness_t *witness)
 #ifdef JEMALLOC_JET
 #undef witness_lock_error
 #define	witness_lock_error JEMALLOC_N(witness_lock_error)
-witness_lock_error_t *witness_lock_error = JEMALLOC_N(witness_lock_error_impl);
+witness_lock_error_t *witness_lock_error = JEMALLOC_N(n_witness_lock_error);
 #endif
 
 #ifdef JEMALLOC_JET
 #undef witness_owner_error
-#define	witness_owner_error JEMALLOC_N(witness_owner_error_impl)
+#define	witness_owner_error JEMALLOC_N(n_witness_owner_error)
 #endif
 void
 witness_owner_error(const witness_t *witness)
@@ -48,13 +48,12 @@ witness_owner_error(const witness_t *witness)
 #ifdef JEMALLOC_JET
 #undef witness_owner_error
 #define	witness_owner_error JEMALLOC_N(witness_owner_error)
-witness_owner_error_t *witness_owner_error =
-    JEMALLOC_N(witness_owner_error_impl);
+witness_owner_error_t *witness_owner_error = JEMALLOC_N(n_witness_owner_error);
 #endif
 
 #ifdef JEMALLOC_JET
 #undef witness_not_owner_error
-#define	witness_not_owner_error JEMALLOC_N(witness_not_owner_error_impl)
+#define	witness_not_owner_error JEMALLOC_N(n_witness_not_owner_error)
 #endif
 void
 witness_not_owner_error(const witness_t *witness)
@@ -68,12 +67,12 @@ witness_not_owner_error(const witness_t *witness)
 #undef witness_not_owner_error
 #define	witness_not_owner_error JEMALLOC_N(witness_not_owner_error)
 witness_not_owner_error_t *witness_not_owner_error =
-    JEMALLOC_N(witness_not_owner_error_impl);
+    JEMALLOC_N(n_witness_not_owner_error);
 #endif
 
 #ifdef JEMALLOC_JET
 #undef witness_lockless_error
-#define	witness_lockless_error JEMALLOC_N(witness_lockless_error_impl)
+#define	witness_lockless_error JEMALLOC_N(n_witness_lockless_error)
 #endif
 void
 witness_lockless_error(const witness_list_t *witnesses)
@@ -91,7 +90,7 @@ witness_lockless_error(const witness_list_t *witnesses)
 #undef witness_lockless_error
 #define	witness_lockless_error JEMALLOC_N(witness_lockless_error)
 witness_lockless_error_t *witness_lockless_error =
-    JEMALLOC_N(witness_lockless_error_impl);
+    JEMALLOC_N(n_witness_lockless_error);
 #endif
 
 void

From 1c35f63797d63a1d08507ea724ec5d8898e8d76d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 11 May 2016 16:52:58 -0700
Subject: [PATCH 0245/2608] Guard tsdn_tsd() call with tsdn_null() check.

---
 src/huge.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/huge.c b/src/huge.c
index ba083684..1aa02a0f 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -67,7 +67,6 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	assert(ausize >= chunksize);
 
 	/* Allocate an extent node with which to track the chunk. */
-	assert(tsdn != NULL || arena != NULL);
 	node = ipallocztm(tsdn, CACHELINE_CEILING(sizeof(extent_node_t)),
 	    CACHELINE, false, NULL, true, arena_ichoose(tsdn, arena));
 	if (node == NULL)
@@ -78,7 +77,8 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	 * it is possible to make correct junk/zero fill decisions below.
 	 */
 	is_zeroed = zero;
-	arena = arena_choose(tsdn_tsd(tsdn), arena);
+	if (likely(!tsdn_null(tsdn)))
+		arena = arena_choose(tsdn_tsd(tsdn), arena);
 	if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(tsdn,
 	    arena, usize, alignment, &is_zeroed)) == NULL) {
 		idalloctm(tsdn, node, NULL, true, true);

From 09f8585ce8a57baa387cc0327e51c0baffbdce6f Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 12 May 2016 14:23:50 -0700
Subject: [PATCH 0246/2608] Update ChangeLog for 4.2.0.

---
 ChangeLog | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ChangeLog b/ChangeLog
index a9390947..926209e5 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,7 +4,7 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
-* 4.2.0 (XXX)
+* 4.2.0 (May 12, 2016)
 
   New features:
   - Add the arena.<i>.reset mallctl, which makes it possible to discard all of

From dc7ff6306d7a15b53479e2fb8e5546404b82e6fc Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 12 May 2016 15:06:50 -0700
Subject: [PATCH 0247/2608] Fix a typo.

---
 ChangeLog | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ChangeLog b/ChangeLog
index 926209e5..c9ce7c4d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -8,7 +8,7 @@ brevity.  Much more detail can be found in the git revision history:
 
   New features:
   - Add the arena.<i>.reset mallctl, which makes it possible to discard all of
-    an arena's allocations in a single operation.  (@jasone@)
+    an arena's allocations in a single operation.  (@jasone)
   - Add the stats.retained and stats.arenas.<i>.retained statistics.  (@jasone)
   - Add the --with-version configure option.  (@jasone)
   - Support --with-lg-page values larger than actual page size.  (@jasone)

From a397045323d743a787c7efff17c0619dcf25f0b4 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 12 May 2016 21:07:08 -0700
Subject: [PATCH 0248/2608] Use TSDN_NULL rather than NULL as appropriate.

---
 include/jemalloc/internal/mb.h |  4 ++--
 src/jemalloc.c                 |  4 ++--
 src/tsd.c                      | 10 +++++-----
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/mb.h b/include/jemalloc/internal/mb.h
index 437c86f7..5384728f 100644
--- a/include/jemalloc/internal/mb.h
+++ b/include/jemalloc/internal/mb.h
@@ -105,8 +105,8 @@ mb_write(void)
 	malloc_mutex_t mtx;
 
 	malloc_mutex_init(&mtx, "mb", WITNESS_RANK_OMIT);
-	malloc_mutex_lock(NULL, &mtx);
-	malloc_mutex_unlock(NULL, &mtx);
+	malloc_mutex_lock(TSDN_NULL, &mtx);
+	malloc_mutex_unlock(TSDN_NULL, &mtx);
 }
 #endif
 #endif
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 40eb2eaa..941c1c85 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1251,9 +1251,9 @@ malloc_init_hard_needed(void)
 	if (malloc_initializer != NO_INITIALIZER && !IS_INITIALIZER) {
 		/* Busy-wait until the initializing thread completes. */
 		do {
-			malloc_mutex_unlock(NULL, &init_lock);
+			malloc_mutex_unlock(TSDN_NULL, &init_lock);
 			CPU_SPINWAIT;
-			malloc_mutex_lock(NULL, &init_lock);
+			malloc_mutex_lock(TSDN_NULL, &init_lock);
 		} while (!malloc_initialized());
 		return (false);
 	}
diff --git a/src/tsd.c b/src/tsd.c
index aeaa5e18..ec69a51c 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -171,10 +171,10 @@ tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block)
 	tsd_init_block_t *iter;
 
 	/* Check whether this thread has already inserted into the list. */
-	malloc_mutex_lock(NULL, &head->lock);
+	malloc_mutex_lock(TSDN_NULL, &head->lock);
 	ql_foreach(iter, &head->blocks, link) {
 		if (iter->thread == self) {
-			malloc_mutex_unlock(NULL, &head->lock);
+			malloc_mutex_unlock(TSDN_NULL, &head->lock);
 			return (iter->data);
 		}
 	}
@@ -182,7 +182,7 @@ tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block)
 	ql_elm_new(block, link);
 	block->thread = self;
 	ql_tail_insert(&head->blocks, block, link);
-	malloc_mutex_unlock(NULL, &head->lock);
+	malloc_mutex_unlock(TSDN_NULL, &head->lock);
 	return (NULL);
 }
 
@@ -190,8 +190,8 @@ void
 tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block)
 {
 
-	malloc_mutex_lock(NULL, &head->lock);
+	malloc_mutex_lock(TSDN_NULL, &head->lock);
 	ql_remove(&head->blocks, block, link);
-	malloc_mutex_unlock(NULL, &head->lock);
+	malloc_mutex_unlock(TSDN_NULL, &head->lock);
 }
 #endif

From 9a8add1510456464bc496320990ec234798bd381 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 5 Apr 2016 16:25:44 -0700
Subject: [PATCH 0249/2608] Remove Valgrind support.

---
 INSTALL                                       |   3 -
 Makefile.in                                   |   4 -
 README                                        |  12 +-
 configure.ac                                  |  30 -----
 doc/jemalloc.xml.in                           |  41 ++-----
 .../jemalloc/internal/jemalloc_internal.h.in  |  13 --
 .../internal/jemalloc_internal_defs.h.in      |   3 -
 include/jemalloc/internal/private_symbols.txt |   5 -
 include/jemalloc/internal/quarantine.h        |   3 -
 include/jemalloc/internal/valgrind.h          | 114 ------------------
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |   3 +-
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |   5 +-
 src/arena.c                                   |  41 +------
 src/base.c                                    |   3 -
 src/chunk.c                                   |  10 --
 src/chunk_dss.c                               |   5 +-
 src/ctl.c                                     |   6 +-
 src/jemalloc.c                                |  95 ++-------------
 src/quarantine.c                              |   7 +-
 src/stats.c                                   |   1 -
 src/valgrind.c                                |  34 ------
 test/unit/arena_reset.c                       |   3 +-
 test/unit/mallctl.c                           |   1 -
 23 files changed, 33 insertions(+), 409 deletions(-)
 delete mode 100644 include/jemalloc/internal/valgrind.h
 delete mode 100644 src/valgrind.c

diff --git a/INSTALL b/INSTALL
index 68787165..36306fec 100644
--- a/INSTALL
+++ b/INSTALL
@@ -169,9 +169,6 @@ any of the following arguments (not a definitive list) to 'configure':
     See the "opt.junk", "opt.zero", "opt.quarantine", and "opt.redzone" option
     documentation for usage details.
 
---disable-valgrind
-    Disable support for Valgrind.
-
 --disable-zone-allocator
     Disable zone allocator for Darwin.  This means jemalloc won't be hooked as
     the default allocator on OSX/iOS.
diff --git a/Makefile.in b/Makefile.in
index 652f01f2..34facf43 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -49,7 +49,6 @@ cfgoutputs_out := @cfgoutputs_out@
 enable_autogen := @enable_autogen@
 enable_code_coverage := @enable_code_coverage@
 enable_prof := @enable_prof@
-enable_valgrind := @enable_valgrind@
 enable_zone_allocator := @enable_zone_allocator@
 MALLOC_CONF := @JEMALLOC_CPREFIX@MALLOC_CONF
 DSO_LDFLAGS = @DSO_LDFLAGS@
@@ -104,9 +103,6 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/tsd.c \
 	$(srcroot)src/util.c \
 	$(srcroot)src/witness.c
-ifeq ($(enable_valgrind), 1)
-C_SRCS += $(srcroot)src/valgrind.c
-endif
 ifeq ($(enable_zone_allocator), 1)
 C_SRCS += $(srcroot)src/zone.c
 endif
diff --git a/README b/README
index 9b268f42..67cbf6da 100644
--- a/README
+++ b/README
@@ -3,12 +3,12 @@ fragmentation avoidance and scalable concurrency support.  jemalloc first came
 into use as the FreeBSD libc allocator in 2005, and since then it has found its
 way into numerous applications that rely on its predictable behavior.  In 2010
 jemalloc development efforts broadened to include developer support features
-such as heap profiling, Valgrind integration, and extensive monitoring/tuning
-hooks.  Modern jemalloc releases continue to be integrated back into FreeBSD,
-and therefore versatility remains critical.  Ongoing development efforts trend
-toward making jemalloc among the best allocators for a broad range of demanding
-applications, and eliminating/mitigating weaknesses that have practical
-repercussions for real world applications.
+such as heap profiling and extensive monitoring/tuning hooks.  Modern jemalloc
+releases continue to be integrated back into FreeBSD, and therefore versatility
+remains critical.  Ongoing development efforts trend toward making jemalloc
+among the best allocators for a broad range of demanding applications, and
+eliminating/mitigating weaknesses that have practical repercussions for real
+world applications.
 
 The COPYING file contains copyright and licensing information.
 
diff --git a/configure.ac b/configure.ac
index 7f19715d..df5cf25a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -988,35 +988,6 @@ if test "x$enable_utrace" = "x1" ; then
 fi
 AC_SUBST([enable_utrace])
 
-dnl Support Valgrind by default.
-AC_ARG_ENABLE([valgrind],
-  [AS_HELP_STRING([--disable-valgrind], [Disable support for Valgrind])],
-[if test "x$enable_valgrind" = "xno" ; then
-  enable_valgrind="0"
-else
-  enable_valgrind="1"
-fi
-],
-[enable_valgrind="1"]
-)
-if test "x$enable_valgrind" = "x1" ; then
-  JE_COMPILABLE([valgrind], [
-#include <valgrind/valgrind.h>
-#include <valgrind/memcheck.h>
-
-#if !defined(VALGRIND_RESIZEINPLACE_BLOCK)
-#  error "Incompatible Valgrind version"
-#endif
-], [], [je_cv_valgrind])
-  if test "x${je_cv_valgrind}" = "xno" ; then
-    enable_valgrind="0"
-  fi
-  if test "x$enable_valgrind" = "x1" ; then
-    AC_DEFINE([JEMALLOC_VALGRIND], [ ])
-  fi
-fi
-AC_SUBST([enable_valgrind])
-
 dnl Do not support the xmalloc option by default.
 AC_ARG_ENABLE([xmalloc],
   [AS_HELP_STRING([--enable-xmalloc], [Support xmalloc option])],
@@ -1782,7 +1753,6 @@ AC_MSG_RESULT([prof-gcc           : ${enable_prof_gcc}])
 AC_MSG_RESULT([tcache             : ${enable_tcache}])
 AC_MSG_RESULT([fill               : ${enable_fill}])
 AC_MSG_RESULT([utrace             : ${enable_utrace}])
-AC_MSG_RESULT([valgrind           : ${enable_valgrind}])
 AC_MSG_RESULT([xmalloc            : ${enable_xmalloc}])
 AC_MSG_RESULT([munmap             : ${enable_munmap}])
 AC_MSG_RESULT([lazy_lock          : ${enable_lazy_lock}])
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index c4a44e3c..2f8f150a 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -869,16 +869,6 @@ for (i = 0; i < nbins; i++) {
         build configuration.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="config.valgrind">
-        <term>
-          <mallctl>config.valgrind</mallctl>
-          (<type>bool</type>)
-          <literal>r-</literal>
-        </term>
-        <listitem><para><option>--enable-valgrind</option> was specified during
-        build configuration.</para></listitem>
-      </varlistentry>
-
       <varlistentry id="config.xmalloc">
         <term>
           <mallctl>config.xmalloc</mallctl>
@@ -1046,9 +1036,8 @@ for (i = 0; i < nbins; i++) {
         "false", junk filling be disabled entirely.  This is intended for
         debugging and will impact performance negatively.  This option is
         "false" by default unless <option>--enable-debug</option> is specified
-        during configuration, in which case it is "true" by default unless
-        running inside <ulink
-        url="http://valgrind.org/">Valgrind</ulink>.</para></listitem>
+        during configuration, in which case it is "true" by
+        default.</para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.quarantine">
@@ -1063,13 +1052,9 @@ for (i = 0; i < nbins; i++) {
         specified number of bytes of memory.  The quarantined memory is not
         freed until it is released from quarantine, though it is immediately
         junk-filled if the <link
-        linkend="opt.junk"><mallctl>opt.junk</mallctl></link> option is
-        enabled.  This feature is of particular use in combination with <ulink
-        url="http://valgrind.org/">Valgrind</ulink>, which can detect attempts
-        to access quarantined objects.  This is intended for debugging and will
-        impact performance negatively.  The default quarantine size is 0 unless
-        running inside Valgrind, in which case the default is 16
-        MiB.</para></listitem>
+        linkend="opt.junk"><mallctl>opt.junk</mallctl></link> option is enabled.
+        This is intended for debugging and will impact performance negatively.
+        The default quarantine size is 0.</para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.redzone">
@@ -1083,12 +1068,8 @@ for (i = 0; i < nbins; i++) {
         allocations have redzones before and after them.  Furthermore, if the
         <link linkend="opt.junk"><mallctl>opt.junk</mallctl></link> option is
         enabled, the redzones are checked for corruption during deallocation.
-        However, the primary intended purpose of this feature is to be used in
-        combination with <ulink url="http://valgrind.org/">Valgrind</ulink>,
-        which needs redzones in order to do effective buffer overflow/underflow
-        detection.  This option is intended for debugging and will impact
-        performance negatively.  This option is disabled by
-        default unless running inside Valgrind.</para></listitem>
+        This option is intended for debugging and will impact performance
+        negatively.  This option is disabled by default.</para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.zero">
@@ -1155,9 +1136,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         increased memory use.  See the <link
         linkend="opt.lg_tcache_max"><mallctl>opt.lg_tcache_max</mallctl></link>
         option for related tuning information.  This option is enabled by
-        default unless running inside <ulink
-        url="http://valgrind.org/">Valgrind</ulink>, in which case it is
-        forcefully disabled.</para></listitem>
+        default.</para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.lg_tcache_max">
@@ -2746,9 +2725,7 @@ MAPPED_LIBRARIES:
 
     <para>This implementation does not provide much detail about the problems
     it detects, because the performance impact for storing such information
-    would be prohibitive.  However, jemalloc does integrate with the most
-    excellent <ulink url="http://valgrind.org/">Valgrind</ulink> tool if the
-    <option>--enable-valgrind</option> configuration option is enabled.</para>
+    would be prohibitive.</para>
   </refsect1>
   <refsect1 id="diagnostic_messages">
     <title>DIAGNOSTIC MESSAGES</title>
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 51bf8974..4c845e30 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -113,13 +113,6 @@ static const bool config_utrace =
     false
 #endif
     ;
-static const bool config_valgrind =
-#ifdef JEMALLOC_VALGRIND
-    true
-#else
-    false
-#endif
-    ;
 static const bool config_xmalloc =
 #ifdef JEMALLOC_XMALLOC
     true
@@ -361,7 +354,6 @@ typedef unsigned szind_t;
 #endif
 
 #include "jemalloc/internal/nstime.h"
-#include "jemalloc/internal/valgrind.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/prng.h"
@@ -393,7 +385,6 @@ typedef unsigned szind_t;
 #define	JEMALLOC_H_STRUCTS
 
 #include "jemalloc/internal/nstime.h"
-#include "jemalloc/internal/valgrind.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/prng.h"
@@ -441,8 +432,6 @@ extern bool	opt_xmalloc;
 extern bool	opt_zero;
 extern unsigned	opt_narenas;
 
-extern bool	in_valgrind;
-
 /* Number of CPUs. */
 extern unsigned	ncpus;
 
@@ -489,7 +478,6 @@ void	jemalloc_postfork_parent(void);
 void	jemalloc_postfork_child(void);
 
 #include "jemalloc/internal/nstime.h"
-#include "jemalloc/internal/valgrind.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/prng.h"
@@ -521,7 +509,6 @@ void	jemalloc_postfork_child(void);
 #define	JEMALLOC_H_INLINES
 
 #include "jemalloc/internal/nstime.h"
-#include "jemalloc/internal/valgrind.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/prng.h"
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 7de0cf7c..c9aa5fd5 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -148,9 +148,6 @@
 /* Support utrace(2)-based tracing. */
 #undef JEMALLOC_UTRACE
 
-/* Support Valgrind. */
-#undef JEMALLOC_VALGRIND
-
 /* Support optional abort() on OOM. */
 #undef JEMALLOC_XMALLOC
 
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index f2b6a55d..15b8ceec 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -296,7 +296,6 @@ iallocztm
 iarena_cleanup
 idalloc
 idalloctm
-in_valgrind
 index2size
 index2size_compute
 index2size_lookup
@@ -591,10 +590,6 @@ tsdn_fetch
 tsdn_null
 tsdn_tsd
 u2rz
-valgrind_freelike_block
-valgrind_make_mem_defined
-valgrind_make_mem_noaccess
-valgrind_make_mem_undefined
 witness_assert_lockless
 witness_assert_not_owner
 witness_assert_owner
diff --git a/include/jemalloc/internal/quarantine.h b/include/jemalloc/internal/quarantine.h
index ae607399..1ab4345e 100644
--- a/include/jemalloc/internal/quarantine.h
+++ b/include/jemalloc/internal/quarantine.h
@@ -4,9 +4,6 @@
 typedef struct quarantine_obj_s quarantine_obj_t;
 typedef struct quarantine_s quarantine_t;
 
-/* Default per thread quarantine size if valgrind is enabled. */
-#define	JEMALLOC_VALGRIND_QUARANTINE_DEFAULT	(ZU(1) << 24)
-
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
diff --git a/include/jemalloc/internal/valgrind.h b/include/jemalloc/internal/valgrind.h
deleted file mode 100644
index 1a868082..00000000
--- a/include/jemalloc/internal/valgrind.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-#ifdef JEMALLOC_VALGRIND
-#include <valgrind/valgrind.h>
-
-/*
- * The size that is reported to Valgrind must be consistent through a chain of
- * malloc..realloc..realloc calls.  Request size isn't recorded anywhere in
- * jemalloc, so it is critical that all callers of these macros provide usize
- * rather than request size.  As a result, buffer overflow detection is
- * technically weakened for the standard API, though it is generally accepted
- * practice to consider any extra bytes reported by malloc_usable_size() as
- * usable space.
- */
-#define	JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(ptr, usize) do {		\
-	if (unlikely(in_valgrind))					\
-		valgrind_make_mem_noaccess(ptr, usize);			\
-} while (0)
-#define	JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ptr, usize) do {		\
-	if (unlikely(in_valgrind))					\
-		valgrind_make_mem_undefined(ptr, usize);		\
-} while (0)
-#define	JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ptr, usize) do {		\
-	if (unlikely(in_valgrind))					\
-		valgrind_make_mem_defined(ptr, usize);			\
-} while (0)
-/*
- * The VALGRIND_MALLOCLIKE_BLOCK() and VALGRIND_RESIZEINPLACE_BLOCK() macro
- * calls must be embedded in macros rather than in functions so that when
- * Valgrind reports errors, there are no extra stack frames in the backtraces.
- */
-#define	JEMALLOC_VALGRIND_MALLOC(cond, tsdn, ptr, usize, zero) do {	\
-	if (unlikely(in_valgrind && cond)) {				\
-		VALGRIND_MALLOCLIKE_BLOCK(ptr, usize, p2rz(tsdn, ptr),	\
-		    zero);						\
-	}								\
-} while (0)
-#define	JEMALLOC_VALGRIND_REALLOC(maybe_moved, tsdn, ptr, usize,	\
-    ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null,	\
-    zero) do {								\
-	if (unlikely(in_valgrind)) {					\
-		size_t rzsize = p2rz(tsdn, ptr);			\
-									\
-		if (!maybe_moved || ptr == old_ptr) {			\
-			VALGRIND_RESIZEINPLACE_BLOCK(ptr, old_usize,	\
-			    usize, rzsize);				\
-			if (zero && old_usize < usize) {		\
-				valgrind_make_mem_defined(		\
-				    (void *)((uintptr_t)ptr +		\
-				    old_usize), usize - old_usize);	\
-			}						\
-		} else {						\
-			if (!old_ptr_maybe_null || old_ptr != NULL) {	\
-				valgrind_freelike_block(old_ptr,	\
-				    old_rzsize);			\
-			}						\
-			if (!ptr_maybe_null || ptr != NULL) {		\
-				size_t copy_size = (old_usize < usize)	\
-				    ?  old_usize : usize;		\
-				size_t tail_size = usize - copy_size;	\
-				VALGRIND_MALLOCLIKE_BLOCK(ptr, usize,	\
-				    rzsize, false);			\
-				if (copy_size > 0) {			\
-					valgrind_make_mem_defined(ptr,	\
-					copy_size);			\
-				}					\
-				if (zero && tail_size > 0) {		\
-					valgrind_make_mem_defined(	\
-					    (void *)((uintptr_t)ptr +	\
-					    copy_size), tail_size);	\
-				}					\
-			}						\
-		}							\
-	}								\
-} while (0)
-#define	JEMALLOC_VALGRIND_FREE(ptr, rzsize) do {			\
-	if (unlikely(in_valgrind))					\
-		valgrind_freelike_block(ptr, rzsize);			\
-} while (0)
-#else
-#define	RUNNING_ON_VALGRIND	((unsigned)0)
-#define	JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(ptr, usize) do {} while (0)
-#define	JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ptr, usize) do {} while (0)
-#define	JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ptr, usize) do {} while (0)
-#define	JEMALLOC_VALGRIND_MALLOC(cond, tsdn, ptr, usize, zero) do {} while (0)
-#define	JEMALLOC_VALGRIND_REALLOC(maybe_moved, tsdn, ptr, usize,	\
-    ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null,	\
-    zero) do {} while (0)
-#define	JEMALLOC_VALGRIND_FREE(ptr, rzsize) do {} while (0)
-#endif
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-#ifdef JEMALLOC_VALGRIND
-void	valgrind_make_mem_noaccess(void *ptr, size_t usize);
-void	valgrind_make_mem_undefined(void *ptr, size_t usize);
-void	valgrind_make_mem_defined(void *ptr, size_t usize);
-void	valgrind_freelike_block(void *ptr, size_t usize);
-#endif
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
-
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 9315022d..432d1f24 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -74,7 +74,6 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\ticker.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\tsd.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\util.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\valgrind.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_defs.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_macros.h" />
@@ -395,4 +394,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 88c15efa..c0e568ec 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -161,9 +161,6 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\util.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\valgrind.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
     <ClInclude Include="..\..\..\..\include\msvc_compat\strings.h">
       <Filter>Header Files\msvc_compat</Filter>
     </ClInclude>
@@ -257,4 +254,4 @@
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/src/arena.c b/src/arena.c
index c605bcd3..4e6d3d60 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -350,27 +350,16 @@ JEMALLOC_INLINE_C void
 arena_run_zero(arena_chunk_t *chunk, size_t run_ind, size_t npages)
 {
 
-	JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk +
-	    (run_ind << LG_PAGE)), (npages << LG_PAGE));
 	memset((void *)((uintptr_t)chunk + (run_ind << LG_PAGE)), 0,
 	    (npages << LG_PAGE));
 }
 
-JEMALLOC_INLINE_C void
-arena_run_page_mark_zeroed(arena_chunk_t *chunk, size_t run_ind)
-{
-
-	JEMALLOC_VALGRIND_MAKE_MEM_DEFINED((void *)((uintptr_t)chunk + (run_ind
-	    << LG_PAGE)), PAGE);
-}
-
 JEMALLOC_INLINE_C void
 arena_run_page_validate_zeroed(arena_chunk_t *chunk, size_t run_ind)
 {
 	size_t i;
 	UNUSED size_t *p = (size_t *)((uintptr_t)chunk + (run_ind << LG_PAGE));
 
-	arena_run_page_mark_zeroed(chunk, run_ind);
 	for (i = 0; i < PAGE / sizeof(size_t); i++)
 		assert(p[i] == 0);
 }
@@ -471,12 +460,9 @@ arena_run_split_large_helper(arena_t *arena, arena_run_t *run, size_t size,
 	}
 
 	if (zero) {
-		if (flag_decommitted != 0) {
-			/* The run is untouched, and therefore zeroed. */
-			JEMALLOC_VALGRIND_MAKE_MEM_DEFINED((void
-			    *)((uintptr_t)chunk + (run_ind << LG_PAGE)),
-			    (need_pages << LG_PAGE));
-		} else if (flag_dirty != 0) {
+		if (flag_decommitted != 0)
+			; /* The run is untouched, and therefore zeroed. */
+		else if (flag_dirty != 0) {
 			/* The run is dirty, so all pages must be zeroed. */
 			arena_run_zero(chunk, run_ind, need_pages);
 		} else {
@@ -492,15 +478,9 @@ arena_run_split_large_helper(arena_t *arena, arena_run_t *run, size_t size,
 				else if (config_debug) {
 					arena_run_page_validate_zeroed(chunk,
 					    run_ind+i);
-				} else {
-					arena_run_page_mark_zeroed(chunk,
-					    run_ind+i);
 				}
 			}
 		}
-	} else {
-		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk +
-		    (run_ind << LG_PAGE)), (need_pages << LG_PAGE));
 	}
 
 	/*
@@ -564,8 +544,6 @@ arena_run_split_small(arena_t *arena, arena_run_t *run, size_t size,
 		if (config_debug && flag_dirty == 0 && flag_unzeroed == 0)
 			arena_run_page_validate_zeroed(chunk, run_ind+i);
 	}
-	JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED((void *)((uintptr_t)chunk +
-	    (run_ind << LG_PAGE)), (need_pages << LG_PAGE));
 	return (false);
 }
 
@@ -700,19 +678,9 @@ arena_chunk_init_hard(tsdn_t *tsdn, arena_t *arena)
 	 * the chunk is not zeroed.
 	 */
 	if (!zero) {
-		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(
-		    (void *)arena_bitselm_get_const(chunk, map_bias+1),
-		    (size_t)((uintptr_t)arena_bitselm_get_const(chunk,
-		    chunk_npages-1) -
-		    (uintptr_t)arena_bitselm_get_const(chunk, map_bias+1)));
 		for (i = map_bias+1; i < chunk_npages-1; i++)
 			arena_mapbits_internal_set(chunk, i, flag_unzeroed);
 	} else {
-		JEMALLOC_VALGRIND_MAKE_MEM_DEFINED((void
-		    *)arena_bitselm_get_const(chunk, map_bias+1),
-		    (size_t)((uintptr_t)arena_bitselm_get_const(chunk,
-		    chunk_npages-1) -
-		    (uintptr_t)arena_bitselm_get_const(chunk, map_bias+1)));
 		if (config_debug) {
 			for (i = map_bias+1; i < chunk_npages-1; i++) {
 				assert(arena_mapbits_unzeroed_get(chunk, i) ==
@@ -2571,13 +2539,11 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero)
 			} else if (unlikely(opt_zero))
 				memset(ret, 0, usize);
 		}
-		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, usize);
 	} else {
 		if (config_fill && unlikely(opt_junk_alloc)) {
 			arena_alloc_junk_small(ret, &arena_bin_info[binind],
 			    true);
 		}
-		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, usize);
 		memset(ret, 0, usize);
 	}
 
@@ -3311,7 +3277,6 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size,
 		 */
 
 		copysize = (usize < oldsize) ? usize : oldsize;
-		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, copysize);
 		memcpy(ret, ptr, copysize);
 		isqalloc(tsd, ptr, oldsize, tcache, true);
 	} else {
diff --git a/src/base.c b/src/base.c
index 81b0801f..1b0bf697 100644
--- a/src/base.c
+++ b/src/base.c
@@ -24,7 +24,6 @@ base_node_try_alloc(tsdn_t *tsdn)
 		return (NULL);
 	node = base_nodes;
 	base_nodes = *(extent_node_t **)node;
-	JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(node, sizeof(extent_node_t));
 	return (node);
 }
 
@@ -34,7 +33,6 @@ base_node_dalloc(tsdn_t *tsdn, extent_node_t *node)
 
 	malloc_mutex_assert_owner(tsdn, &base_mtx);
 
-	JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(node, sizeof(extent_node_t));
 	*(extent_node_t **)node = base_nodes;
 	base_nodes = node;
 }
@@ -123,7 +121,6 @@ base_alloc(tsdn_t *tsdn, size_t size)
 		base_resident += PAGE_CEILING((uintptr_t)ret + csize) -
 		    PAGE_CEILING((uintptr_t)ret);
 	}
-	JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ret, csize);
 label_return:
 	malloc_mutex_unlock(tsdn, &base_mtx);
 	return (ret);
diff --git a/src/chunk.c b/src/chunk.c
index adc666ff..7af7bb91 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -316,7 +316,6 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			size_t i;
 			size_t *p = (size_t *)(uintptr_t)ret;
 
-			JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ret, size);
 			for (i = 0; i < size / sizeof(size_t); i++)
 				assert(p[i] == 0);
 		}
@@ -376,8 +375,6 @@ chunk_alloc_base(size_t size)
 	ret = chunk_alloc_mmap(NULL, size, chunksize, &zero, &commit);
 	if (ret == NULL)
 		return (NULL);
-	if (config_valgrind)
-		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
 
 	return (ret);
 }
@@ -401,8 +398,6 @@ chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	if (ret == NULL)
 		return (NULL);
 	assert(commit);
-	if (config_valgrind)
-		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
 	return (ret);
 }
 
@@ -434,8 +429,6 @@ chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero,
 	    commit, arena->dss_prec);
 	if (ret == NULL)
 		return (NULL);
-	if (config_valgrind)
-		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
 
 	return (ret);
 }
@@ -478,8 +471,6 @@ chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			return (NULL);
 	}
 
-	if (config_valgrind && chunk_hooks->alloc != chunk_alloc_default)
-		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, chunksize);
 	return (ret);
 }
 
@@ -494,7 +485,6 @@ chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 
 	assert(!cache || !zeroed);
 	unzeroed = cache || !zeroed;
-	JEMALLOC_VALGRIND_MAKE_MEM_NOACCESS(chunk, size);
 
 	malloc_mutex_lock(tsdn, &arena->chunks_mtx);
 	chunk_hooks_assure_initialized_locked(tsdn, arena, chunk_hooks);
diff --git a/src/chunk_dss.c b/src/chunk_dss.c
index 0b1f82bd..d42aeb0b 100644
--- a/src/chunk_dss.c
+++ b/src/chunk_dss.c
@@ -138,11 +138,8 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					    &chunk_hooks, cpad, cpad_size,
 					    false, true);
 				}
-				if (*zero) {
-					JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(
-					    ret, size);
+				if (*zero)
 					memset(ret, 0, size);
-				}
 				if (!*commit)
 					*commit = pages_decommit(ret, size);
 				return (ret);
diff --git a/src/ctl.c b/src/ctl.c
index dad80086..d2e94269 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -86,7 +86,6 @@ CTL_PROTO(config_stats)
 CTL_PROTO(config_tcache)
 CTL_PROTO(config_tls)
 CTL_PROTO(config_utrace)
-CTL_PROTO(config_valgrind)
 CTL_PROTO(config_xmalloc)
 CTL_PROTO(opt_abort)
 CTL_PROTO(opt_dss)
@@ -260,7 +259,6 @@ static const ctl_named_node_t	config_node[] = {
 	{NAME("tcache"),	CTL(config_tcache)},
 	{NAME("tls"),		CTL(config_tls)},
 	{NAME("utrace"),	CTL(config_utrace)},
-	{NAME("valgrind"),	CTL(config_valgrind)},
 	{NAME("xmalloc"),	CTL(config_xmalloc)}
 };
 
@@ -1270,7 +1268,6 @@ CTL_RO_CONFIG_GEN(config_stats, bool)
 CTL_RO_CONFIG_GEN(config_tcache, bool)
 CTL_RO_CONFIG_GEN(config_tls, bool)
 CTL_RO_CONFIG_GEN(config_utrace, bool)
-CTL_RO_CONFIG_GEN(config_valgrind, bool)
 CTL_RO_CONFIG_GEN(config_xmalloc, bool)
 
 /******************************************************************************/
@@ -1622,8 +1619,7 @@ arena_i_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	READONLY();
 	WRITEONLY();
 
-	if ((config_valgrind && unlikely(in_valgrind)) || (config_fill &&
-	    unlikely(opt_quarantine))) {
+	if (config_fill && unlikely(opt_quarantine)) {
 		ret = EFAULT;
 		goto label_return;
 	}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 941c1c85..cfe6ed32 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -42,9 +42,6 @@ bool	opt_xmalloc = false;
 bool	opt_zero = false;
 unsigned	opt_narenas = 0;
 
-/* Initialized to true if the process is running inside Valgrind. */
-bool	in_valgrind;
-
 unsigned	ncpus;
 
 /* Protects arenas initialization. */
@@ -80,8 +77,7 @@ enum {
 	flag_opt_quarantine	= (1U << 2),
 	flag_opt_zero		= (1U << 3),
 	flag_opt_utrace		= (1U << 4),
-	flag_in_valgrind	= (1U << 5),
-	flag_opt_xmalloc	= (1U << 6)
+	flag_opt_xmalloc	= (1U << 5)
 };
 static uint8_t	malloc_slow_flags;
 
@@ -894,9 +890,6 @@ malloc_slow_flag_init(void)
 	    | (opt_utrace ? flag_opt_utrace : 0)
 	    | (opt_xmalloc ? flag_opt_xmalloc : 0);
 
-	if (config_valgrind)
-		malloc_slow_flags |= (in_valgrind ? flag_in_valgrind : 0);
-
 	malloc_slow = (malloc_slow_flags != 0);
 }
 
@@ -908,24 +901,6 @@ malloc_conf_init(void)
 	const char *opts, *k, *v;
 	size_t klen, vlen;
 
-	/*
-	 * Automatically configure valgrind before processing options.  The
-	 * valgrind option remains in jemalloc 3.x for compatibility reasons.
-	 */
-	if (config_valgrind) {
-		in_valgrind = (RUNNING_ON_VALGRIND != 0) ? true : false;
-		if (config_fill && unlikely(in_valgrind)) {
-			opt_junk = "false";
-			opt_junk_alloc = false;
-			opt_junk_free = false;
-			assert(!opt_zero);
-			opt_quarantine = JEMALLOC_VALGRIND_QUARANTINE_DEFAULT;
-			opt_redzone = true;
-		}
-		if (config_tcache && unlikely(in_valgrind))
-			opt_tcache = false;
-	}
-
 	for (i = 0; i < 4; i++) {
 		/* Get runtime configuration. */
 		switch (i) {
@@ -1183,19 +1158,7 @@ malloc_conf_init(void)
 				CONF_HANDLE_BOOL(opt_xmalloc, "xmalloc", true)
 			}
 			if (config_tcache) {
-				CONF_HANDLE_BOOL(opt_tcache, "tcache",
-				    !config_valgrind || !in_valgrind)
-				if (CONF_MATCH("tcache")) {
-					assert(config_valgrind && in_valgrind);
-					if (opt_tcache) {
-						opt_tcache = false;
-						malloc_conf_error(
-						"tcache cannot be enabled "
-						"while running inside Valgrind",
-						k, klen, v, vlen);
-					}
-					continue;
-				}
+				CONF_HANDLE_BOOL(opt_tcache, "tcache", true)
 				CONF_HANDLE_SSIZE_T(opt_lg_tcache_max,
 				    "lg_tcache_max", -1,
 				    (sizeof(size_t) << 3) - 1)
@@ -1508,8 +1471,7 @@ ialloc_body(size_t size, bool zero, tsdn_t **tsdn, size_t *usize,
 	if (unlikely(ind >= NSIZES))
 		return (NULL);
 
-	if (config_stats || (config_prof && opt_prof) || (slow_path &&
-	    config_valgrind && unlikely(in_valgrind))) {
+	if (config_stats || (config_prof && opt_prof)) {
 		*usize = index2size(ind);
 		assert(*usize > 0 && *usize <= HUGE_MAXCLASS);
 	}
@@ -1562,7 +1524,6 @@ je_malloc(size_t size)
 		ret = ialloc_body(size, false, &tsdn, &usize, true);
 		ialloc_post_check(ret, tsdn, usize, "malloc", true, true);
 		UTRACE(0, size, ret);
-		JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsdn, ret, usize, false);
 	}
 
 	return (ret);
@@ -1664,8 +1625,6 @@ label_return:
 		*tsd_thread_allocatedp_get(tsd) += usize;
 	}
 	UTRACE(0, size, result);
-	JEMALLOC_VALGRIND_MALLOC(result != NULL, tsd_tsdn(tsd), result, usize,
-	    false);
 	witness_assert_lockless(tsd_tsdn(tsd));
 	return (ret);
 label_oom:
@@ -1684,11 +1643,8 @@ JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 JEMALLOC_ATTR(nonnull(1))
 je_posix_memalign(void **memptr, size_t alignment, size_t size)
 {
-	int ret;
 
-	ret = imemalign(memptr, alignment, size, sizeof(void *));
-
-	return (ret);
+	return (imemalign(memptr, alignment, size, sizeof(void *)));
 }
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
@@ -1703,7 +1659,6 @@ je_aligned_alloc(size_t alignment, size_t size)
 		ret = NULL;
 		set_errno(err);
 	}
-
 	return (ret);
 }
 
@@ -1739,7 +1694,6 @@ je_calloc(size_t num, size_t size)
 		ret = ialloc_body(num_size, true, &tsdn, &usize, true);
 		ialloc_post_check(ret, tsdn, usize, "calloc", true, true);
 		UTRACE(0, num_size, ret);
-		JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsdn, ret, usize, false);
 	}
 
 	return (ret);
@@ -1792,7 +1746,6 @@ JEMALLOC_INLINE_C void
 ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 {
 	size_t usize;
-	UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);
 
 	witness_assert_lockless(tsd_tsdn(tsd));
 
@@ -1802,25 +1755,20 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 	if (config_prof && opt_prof) {
 		usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
 		prof_free(tsd, ptr, usize);
-	} else if (config_stats || config_valgrind)
+	} else if (config_stats)
 		usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
 	if (config_stats)
 		*tsd_thread_deallocatedp_get(tsd) += usize;
 
 	if (likely(!slow_path))
 		iqalloc(tsd, ptr, tcache, false);
-	else {
-		if (config_valgrind && unlikely(in_valgrind))
-			rzsize = p2rz(tsd_tsdn(tsd), ptr);
+	else
 		iqalloc(tsd, ptr, tcache, true);
-		JEMALLOC_VALGRIND_FREE(ptr, rzsize);
-	}
 }
 
 JEMALLOC_INLINE_C void
 isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path)
 {
-	UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);
 
 	witness_assert_lockless(tsd_tsdn(tsd));
 
@@ -1831,10 +1779,7 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path)
 		prof_free(tsd, ptr, usize);
 	if (config_stats)
 		*tsd_thread_deallocatedp_get(tsd) += usize;
-	if (config_valgrind && unlikely(in_valgrind))
-		rzsize = p2rz(tsd_tsdn(tsd), ptr);
 	isqalloc(tsd, ptr, usize, tcache, slow_path);
-	JEMALLOC_VALGRIND_FREE(ptr, rzsize);
 }
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
@@ -1846,7 +1791,6 @@ je_realloc(void *ptr, size_t size)
 	tsdn_t *tsdn JEMALLOC_CC_SILENCE_INIT(NULL);
 	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 	size_t old_usize = 0;
-	UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0);
 
 	if (unlikely(size == 0)) {
 		if (ptr != NULL) {
@@ -1871,18 +1815,13 @@ je_realloc(void *ptr, size_t size)
 		witness_assert_lockless(tsd_tsdn(tsd));
 
 		old_usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
-		if (config_valgrind && unlikely(in_valgrind)) {
-			old_rzsize = config_prof ? p2rz(tsd_tsdn(tsd), ptr) :
-			    u2rz(old_usize);
-		}
 
 		if (config_prof && opt_prof) {
 			usize = s2u(size);
 			ret = unlikely(usize == 0 || usize > HUGE_MAXCLASS) ?
 			    NULL : irealloc_prof(tsd, ptr, old_usize, usize);
 		} else {
-			if (config_stats || (config_valgrind &&
-			    unlikely(in_valgrind)))
+			if (config_stats)
 				usize = s2u(size);
 			ret = iralloc(tsd, ptr, old_usize, size, 0, false);
 		}
@@ -1913,8 +1852,6 @@ je_realloc(void *ptr, size_t size)
 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
 	}
 	UTRACE(ptr, size, ret);
-	JEMALLOC_VALGRIND_REALLOC(true, tsdn, ret, usize, true, ptr, old_usize,
-	    old_rzsize, true, false);
 	witness_assert_lockless(tsdn);
 	return (ret);
 }
@@ -2143,8 +2080,7 @@ imallocx_body(size_t size, int flags, tsdn_t **tsdn, size_t *usize,
 		szind_t ind = size2index(size);
 		if (unlikely(ind >= NSIZES))
 			return (NULL);
-		if (config_stats || (config_prof && opt_prof) || (slow_path &&
-		    config_valgrind && unlikely(in_valgrind))) {
+		if (config_stats || (config_prof && opt_prof)) {
 			*usize = index2size(ind);
 			assert(*usize > 0 && *usize <= HUGE_MAXCLASS);
 		}
@@ -2181,8 +2117,6 @@ je_mallocx(size_t size, int flags)
 		p = imallocx_body(size, flags, &tsdn, &usize, true);
 		ialloc_post_check(p, tsdn, usize, "mallocx", false, true);
 		UTRACE(0, size, p);
-		JEMALLOC_VALGRIND_MALLOC(p != NULL, tsdn, p, usize,
-		    MALLOCX_ZERO_GET(flags));
 	}
 
 	return (p);
@@ -2261,7 +2195,6 @@ je_rallocx(void *ptr, size_t size, int flags)
 	tsd_t *tsd;
 	size_t usize;
 	size_t old_usize;
-	UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0);
 	size_t alignment = MALLOCX_ALIGN_GET(flags);
 	bool zero = flags & MALLOCX_ZERO;
 	arena_t *arena;
@@ -2291,8 +2224,6 @@ je_rallocx(void *ptr, size_t size, int flags)
 		tcache = tcache_get(tsd, true);
 
 	old_usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
-	if (config_valgrind && unlikely(in_valgrind))
-		old_rzsize = u2rz(old_usize);
 
 	if (config_prof && opt_prof) {
 		usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment);
@@ -2307,7 +2238,7 @@ je_rallocx(void *ptr, size_t size, int flags)
 		     tcache, arena);
 		if (unlikely(p == NULL))
 			goto label_oom;
-		if (config_stats || (config_valgrind && unlikely(in_valgrind)))
+		if (config_stats)
 			usize = isalloc(tsd_tsdn(tsd), p, config_prof);
 	}
 	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
@@ -2317,8 +2248,6 @@ je_rallocx(void *ptr, size_t size, int flags)
 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
 	}
 	UTRACE(ptr, size, p);
-	JEMALLOC_VALGRIND_REALLOC(true, tsd_tsdn(tsd), p, usize, false, ptr,
-	    old_usize, old_rzsize, false, zero);
 	witness_assert_lockless(tsd_tsdn(tsd));
 	return (p);
 label_oom:
@@ -2413,7 +2342,6 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
 {
 	tsd_t *tsd;
 	size_t usize, old_usize;
-	UNUSED size_t old_rzsize JEMALLOC_CC_SILENCE_INIT(0);
 	size_t alignment = MALLOCX_ALIGN_GET(flags);
 	bool zero = flags & MALLOCX_ZERO;
 
@@ -2443,9 +2371,6 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
 	if (unlikely(HUGE_MAXCLASS - size < extra))
 		extra = HUGE_MAXCLASS - size;
 
-	if (config_valgrind && unlikely(in_valgrind))
-		old_rzsize = u2rz(old_usize);
-
 	if (config_prof && opt_prof) {
 		usize = ixallocx_prof(tsd, ptr, old_usize, size, extra,
 		    alignment, zero);
@@ -2460,8 +2385,6 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
 		*tsd_thread_allocatedp_get(tsd) += usize;
 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
 	}
-	JEMALLOC_VALGRIND_REALLOC(false, tsd_tsdn(tsd), ptr, usize, false, ptr,
-	    old_usize, old_rzsize, false, zero);
 label_not_resized:
 	UTRACE(ptr, size, ptr);
 	witness_assert_lockless(tsd_tsdn(tsd));
diff --git a/src/quarantine.c b/src/quarantine.c
index 18903fb5..9658ffad 100644
--- a/src/quarantine.c
+++ b/src/quarantine.c
@@ -150,12 +150,7 @@ quarantine(tsd_t *tsd, void *ptr)
 		quarantine->curbytes += usize;
 		quarantine->curobjs++;
 		if (config_fill && unlikely(opt_junk_free)) {
-			/*
-			 * Only do redzone validation if Valgrind isn't in
-			 * operation.
-			 */
-			if ((!config_valgrind || likely(!in_valgrind))
-			    && usize <= SMALL_MAXCLASS)
+			if (usize <= SMALL_MAXCLASS)
 				arena_quarantine_junk_small(ptr, usize);
 			else
 				memset(ptr, JEMALLOC_FREE_JUNK, usize);
diff --git a/src/stats.c b/src/stats.c
index 073be4fe..97f901f6 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -517,7 +517,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		OPT_WRITE_BOOL(redzone)
 		OPT_WRITE_BOOL(zero)
 		OPT_WRITE_BOOL(utrace)
-		OPT_WRITE_BOOL(valgrind)
 		OPT_WRITE_BOOL(xmalloc)
 		OPT_WRITE_BOOL(tcache)
 		OPT_WRITE_SSIZE_T(lg_tcache_max)
diff --git a/src/valgrind.c b/src/valgrind.c
deleted file mode 100644
index 8e7ef3a2..00000000
--- a/src/valgrind.c
+++ /dev/null
@@ -1,34 +0,0 @@
-#include "jemalloc/internal/jemalloc_internal.h"
-#ifndef JEMALLOC_VALGRIND
-#  error "This source file is for Valgrind integration."
-#endif
-
-#include <valgrind/memcheck.h>
-
-void
-valgrind_make_mem_noaccess(void *ptr, size_t usize)
-{
-
-	VALGRIND_MAKE_MEM_NOACCESS(ptr, usize);
-}
-
-void
-valgrind_make_mem_undefined(void *ptr, size_t usize)
-{
-
-	VALGRIND_MAKE_MEM_UNDEFINED(ptr, usize);
-}
-
-void
-valgrind_make_mem_defined(void *ptr, size_t usize)
-{
-
-	VALGRIND_MAKE_MEM_DEFINED(ptr, usize);
-}
-
-void
-valgrind_freelike_block(void *ptr, size_t usize)
-{
-
-	VALGRIND_FREELIKE_BLOCK(ptr, usize);
-}
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index 8ba36c21..c602f0ff 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -88,8 +88,7 @@ TEST_BEGIN(test_arena_reset)
 	size_t mib[3];
 	tsdn_t *tsdn;
 
-	test_skip_if((config_valgrind && unlikely(in_valgrind)) || (config_fill
-	    && unlikely(opt_quarantine)));
+	test_skip_if(config_fill && unlikely(opt_quarantine));
 
 	sz = sizeof(unsigned);
 	assert_d_eq(mallctl("arenas.extend", &arena_ind, &sz, NULL, 0), 0,
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 69f8c20c..641138ac 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -139,7 +139,6 @@ TEST_BEGIN(test_mallctl_config)
 	TEST_MALLCTL_CONFIG(tcache, bool);
 	TEST_MALLCTL_CONFIG(tls, bool);
 	TEST_MALLCTL_CONFIG(utrace, bool);
-	TEST_MALLCTL_CONFIG(valgrind, bool);
 	TEST_MALLCTL_CONFIG(xmalloc, bool);
 
 #undef TEST_MALLCTL_CONFIG

From ba5c7095175d490b1d3d008e40efa74a66de9eab Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 5 Apr 2016 16:52:36 -0700
Subject: [PATCH 0250/2608] Remove quarantine support.

---
 INSTALL                                       |   6 +-
 Makefile.in                                   |   2 -
 configure.ac                                  |   2 +-
 doc/jemalloc.xml.in                           |  17 --
 include/jemalloc/internal/arena.h             |   3 +-
 include/jemalloc/internal/huge.h              |   2 +-
 .../jemalloc/internal/jemalloc_internal.h.in  |  49 +----
 .../internal/jemalloc_internal_defs.h.in      |   2 +-
 include/jemalloc/internal/private_symbols.txt |  11 --
 include/jemalloc/internal/quarantine.h        |  57 ------
 include/jemalloc/internal/tsd.h               |   2 -
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |   2 -
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |   6 -
 src/arena.c                                   |  30 +--
 src/ctl.c                                     |   8 -
 src/huge.c                                    |  10 +-
 src/jemalloc.c                                |  67 +++----
 src/quarantine.c                              | 178 ------------------
 src/stats.c                                   |   1 -
 test/unit/arena_reset.c                       |   2 -
 test/unit/junk.c                              |   2 +-
 test/unit/mallctl.c                           |   1 -
 test/unit/quarantine.c                        | 108 -----------
 test/unit/zero.c                              |   2 +-
 24 files changed, 51 insertions(+), 519 deletions(-)
 delete mode 100644 include/jemalloc/internal/quarantine.h
 delete mode 100644 src/quarantine.c
 delete mode 100644 test/unit/quarantine.c

diff --git a/INSTALL b/INSTALL
index 36306fec..4f57b365 100644
--- a/INSTALL
+++ b/INSTALL
@@ -165,9 +165,9 @@ any of the following arguments (not a definitive list) to 'configure':
     normal jemalloc operation.
 
 --disable-fill
-    Disable support for junk/zero filling of memory, quarantine, and redzones.
-    See the "opt.junk", "opt.zero", "opt.quarantine", and "opt.redzone" option
-    documentation for usage details.
+    Disable support for junk/zero filling of memory and redzones.  See the
+    "opt.junk", "opt.zero", and "opt.redzone" option documentation for usage
+    details.
 
 --disable-zone-allocator
     Disable zone allocator for Darwin.  This means jemalloc won't be hooked as
diff --git a/Makefile.in b/Makefile.in
index 34facf43..8cd6af98 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -95,7 +95,6 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/pages.c \
 	$(srcroot)src/prng.c \
 	$(srcroot)src/prof.c \
-	$(srcroot)src/quarantine.c \
 	$(srcroot)src/rtree.c \
 	$(srcroot)src/stats.c \
 	$(srcroot)src/tcache.c \
@@ -157,7 +156,6 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/prof_thread_name.c \
 	$(srcroot)test/unit/ql.c \
 	$(srcroot)test/unit/qr.c \
-	$(srcroot)test/unit/quarantine.c \
 	$(srcroot)test/unit/rb.c \
 	$(srcroot)test/unit/rtree.c \
 	$(srcroot)test/unit/run_quantize.c \
diff --git a/configure.ac b/configure.ac
index df5cf25a..92192d4d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -946,7 +946,7 @@ fi
 dnl Support the junk/zero filling option by default.
 AC_ARG_ENABLE([fill],
   [AS_HELP_STRING([--disable-fill],
-                  [Disable support for junk/zero filling, quarantine, and redzones])],
+                  [Disable support for junk/zero filling and redzones])],
 [if test "x$enable_fill" = "xno" ; then
   enable_fill="0"
 else
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 2f8f150a..7ed03330 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1040,23 +1040,6 @@ for (i = 0; i < nbins; i++) {
         default.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="opt.quarantine">
-        <term>
-          <mallctl>opt.quarantine</mallctl>
-          (<type>size_t</type>)
-          <literal>r-</literal>
-          [<option>--enable-fill</option>]
-        </term>
-        <listitem><para>Per thread quarantine size in bytes.  If non-zero, each
-        thread maintains a FIFO object quarantine that stores up to the
-        specified number of bytes of memory.  The quarantined memory is not
-        freed until it is released from quarantine, though it is immediately
-        junk-filled if the <link
-        linkend="opt.junk"><mallctl>opt.junk</mallctl></link> option is enabled.
-        This is intended for debugging and will impact performance negatively.
-        The default quarantine size is 0.</para></listitem>
-      </varlistentry>
-
       <varlistentry id="opt.redzone">
         <term>
           <mallctl>opt.redzone</mallctl>
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index b1de2b61..1c63620d 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -551,7 +551,6 @@ extern arena_dalloc_junk_small_t *arena_dalloc_junk_small;
 #else
 void	arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info);
 #endif
-void	arena_quarantine_junk_small(void *ptr, size_t usize);
 void	*arena_malloc_large(tsdn_t *tsdn, arena_t *arena, szind_t ind,
     bool zero);
 void	*arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
@@ -581,7 +580,7 @@ extern arena_ralloc_junk_large_t *arena_ralloc_junk_large;
 #endif
 bool	arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize,
     size_t size, size_t extra, bool zero);
-void	*arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
+void	*arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
     size_t size, size_t alignment, bool zero, tcache_t *tcache);
 dss_prec_t	arena_dss_prec_get(tsdn_t *tsdn, arena_t *arena);
 bool	arena_dss_prec_set(tsdn_t *tsdn, arena_t *arena, dss_prec_t dss_prec);
diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h
index b5fa9e63..8b501e5a 100644
--- a/include/jemalloc/internal/huge.h
+++ b/include/jemalloc/internal/huge.h
@@ -14,7 +14,7 @@ void	*huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool zero);
 bool	huge_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize,
     size_t usize_min, size_t usize_max, bool zero);
-void	*huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
+void	*huge_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
     size_t usize, size_t alignment, bool zero, tcache_t *tcache);
 #ifdef JEMALLOC_JET
 typedef void (huge_dalloc_junk_t)(tsdn_t *, void *, size_t);
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 4c845e30..c6aa5743 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -377,7 +377,6 @@ typedef unsigned szind_t;
 #include "jemalloc/internal/huge.h"
 #include "jemalloc/internal/tcache.h"
 #include "jemalloc/internal/hash.h"
-#include "jemalloc/internal/quarantine.h"
 #include "jemalloc/internal/prof.h"
 
 #undef JEMALLOC_H_TYPES
@@ -412,7 +411,6 @@ typedef unsigned szind_t;
 #include "jemalloc/internal/huge.h"
 #include "jemalloc/internal/tcache.h"
 #include "jemalloc/internal/hash.h"
-#include "jemalloc/internal/quarantine.h"
 #include "jemalloc/internal/prof.h"
 
 #include "jemalloc/internal/tsd.h"
@@ -425,7 +423,6 @@ extern bool	opt_abort;
 extern const char	*opt_junk;
 extern bool	opt_junk_alloc;
 extern bool	opt_junk_free;
-extern size_t	opt_quarantine;
 extern bool	opt_redzone;
 extern bool	opt_utrace;
 extern bool	opt_xmalloc;
@@ -500,7 +497,6 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/huge.h"
 #include "jemalloc/internal/tcache.h"
 #include "jemalloc/internal/hash.h"
-#include "jemalloc/internal/quarantine.h"
 #include "jemalloc/internal/prof.h"
 #include "jemalloc/internal/tsd.h"
 
@@ -878,7 +874,6 @@ decay_ticker_get(tsd_t *tsd, unsigned ind)
 #include "jemalloc/internal/arena.h"
 #undef JEMALLOC_ARENA_INLINE_B
 #include "jemalloc/internal/hash.h"
-#include "jemalloc/internal/quarantine.h"
 
 #ifndef JEMALLOC_ENABLE_INLINE
 arena_t	*iaalloc(const void *ptr);
@@ -898,15 +893,12 @@ size_t	p2rz(tsdn_t *tsdn, const void *ptr);
 void	idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool is_metadata,
     bool slow_path);
 void	idalloc(tsd_t *tsd, void *ptr);
-void	iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path);
 void	isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
     bool slow_path);
-void	isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache,
-    bool slow_path);
-void	*iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
+void	*iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t extra, size_t alignment, bool zero, tcache_t *tcache,
     arena_t *arena);
-void	*iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
+void	*iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t alignment, bool zero, tcache_t *tcache, arena_t *arena);
 void	*iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
     size_t alignment, bool zero);
@@ -1064,16 +1056,6 @@ idalloc(tsd_t *tsd, void *ptr)
 	idalloctm(tsd_tsdn(tsd), ptr, tcache_get(tsd, false), false, true);
 }
 
-JEMALLOC_ALWAYS_INLINE void
-iqalloc(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
-{
-
-	if (slow_path && config_fill && unlikely(opt_quarantine))
-		quarantine(tsd, ptr);
-	else
-		idalloctm(tsd_tsdn(tsd), ptr, tcache, false, slow_path);
-}
-
 JEMALLOC_ALWAYS_INLINE void
 isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
     bool slow_path)
@@ -1082,18 +1064,8 @@ isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 	arena_sdalloc(tsdn, ptr, size, tcache, slow_path);
 }
 
-JEMALLOC_ALWAYS_INLINE void
-isqalloc(tsd_t *tsd, void *ptr, size_t size, tcache_t *tcache, bool slow_path)
-{
-
-	if (slow_path && config_fill && unlikely(opt_quarantine))
-		quarantine(tsd, ptr);
-	else
-		isdalloct(tsd_tsdn(tsd), ptr, size, tcache, slow_path);
-}
-
 JEMALLOC_ALWAYS_INLINE void *
-iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
+iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t extra, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena)
 {
 	void *p;
@@ -1102,7 +1074,7 @@ iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
 	usize = sa2u(size + extra, alignment);
 	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
 		return (NULL);
-	p = ipalloct(tsd_tsdn(tsd), usize, alignment, zero, tcache, arena);
+	p = ipalloct(tsdn, usize, alignment, zero, tcache, arena);
 	if (p == NULL) {
 		if (extra == 0)
 			return (NULL);
@@ -1110,8 +1082,7 @@ iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
 		usize = sa2u(size, alignment);
 		if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
 			return (NULL);
-		p = ipalloct(tsd_tsdn(tsd), usize, alignment, zero, tcache,
-		    arena);
+		p = ipalloct(tsdn, usize, alignment, zero, tcache, arena);
 		if (p == NULL)
 			return (NULL);
 	}
@@ -1121,12 +1092,12 @@ iralloct_realign(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
 	 */
 	copysize = (size < oldsize) ? size : oldsize;
 	memcpy(p, ptr, copysize);
-	isqalloc(tsd, ptr, oldsize, tcache, true);
+	isdalloct(tsdn, ptr, oldsize, tcache, true);
 	return (p);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment,
+iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment,
     bool zero, tcache_t *tcache, arena_t *arena)
 {
 
@@ -1139,11 +1110,11 @@ iralloct(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment,
 		 * Existing object alignment is inadequate; allocate new space
 		 * and copy.
 		 */
-		return (iralloct_realign(tsd, ptr, oldsize, size, 0, alignment,
+		return (iralloct_realign(tsdn, ptr, oldsize, size, 0, alignment,
 		    zero, tcache, arena));
 	}
 
-	return (arena_ralloc(tsd, arena, ptr, oldsize, size, alignment, zero,
+	return (arena_ralloc(tsdn, arena, ptr, oldsize, size, alignment, zero,
 	    tcache));
 }
 
@@ -1152,7 +1123,7 @@ iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment,
     bool zero)
 {
 
-	return (iralloct(tsd, ptr, oldsize, size, alignment, zero,
+	return (iralloct(tsd_tsdn(tsd), ptr, oldsize, size, alignment, zero,
 	    tcache_get(tsd, true), NULL));
 }
 
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index c9aa5fd5..5e5b0a78 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -142,7 +142,7 @@
  */
 #undef JEMALLOC_DSS
 
-/* Support memory filling (junk/zero/quarantine/redzone). */
+/* Support memory filling (junk/zero/redzone). */
 #undef JEMALLOC_FILL
 
 /* Support utrace(2)-based tracing. */
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 15b8ceec..02377809 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -99,7 +99,6 @@ arena_prof_tctx_reset
 arena_prof_tctx_set
 arena_ptr_small_binind_get
 arena_purge
-arena_quarantine_junk_small
 arena_ralloc
 arena_ralloc_junk_large
 arena_ralloc_no_move
@@ -303,13 +302,11 @@ index2size_tab
 ipalloc
 ipalloct
 ipallocztm
-iqalloc
 iralloc
 iralloct
 iralloct_realign
 isalloc
 isdalloct
-isqalloc
 isthreaded
 ivsalloc
 ixalloc
@@ -385,7 +382,6 @@ opt_prof_leak
 opt_prof_prefix
 opt_prof_thread_active_init
 opt_purge
-opt_quarantine
 opt_redzone
 opt_stats_print
 opt_tcache
@@ -454,10 +450,6 @@ prof_thread_active_set
 prof_thread_name_get
 prof_thread_name_set
 purge_mode_names
-quarantine
-quarantine_alloc_hook
-quarantine_alloc_hook_work
-quarantine_cleanup
 register_zone
 rtree_child_read
 rtree_child_read_hard
@@ -561,9 +553,6 @@ tsd_nominal
 tsd_prof_tdata_get
 tsd_prof_tdata_set
 tsd_prof_tdatap_get
-tsd_quarantine_get
-tsd_quarantine_set
-tsd_quarantinep_get
 tsd_set
 tsd_tcache_enabled_get
 tsd_tcache_enabled_set
diff --git a/include/jemalloc/internal/quarantine.h b/include/jemalloc/internal/quarantine.h
deleted file mode 100644
index 1ab4345e..00000000
--- a/include/jemalloc/internal/quarantine.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-typedef struct quarantine_obj_s quarantine_obj_t;
-typedef struct quarantine_s quarantine_t;
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-struct quarantine_obj_s {
-	void	*ptr;
-	size_t	usize;
-};
-
-struct quarantine_s {
-	size_t			curbytes;
-	size_t			curobjs;
-	size_t			first;
-#define	LG_MAXOBJS_INIT 10
-	size_t			lg_maxobjs;
-	quarantine_obj_t	objs[1]; /* Dynamically sized ring buffer. */
-};
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-void	quarantine_alloc_hook_work(tsd_t *tsd);
-void	quarantine(tsd_t *tsd, void *ptr);
-void	quarantine_cleanup(tsd_t *tsd);
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#ifndef JEMALLOC_ENABLE_INLINE
-void	quarantine_alloc_hook(void);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_QUARANTINE_C_))
-JEMALLOC_ALWAYS_INLINE void
-quarantine_alloc_hook(void)
-{
-	tsd_t *tsd;
-
-	assert(config_fill && opt_quarantine);
-
-	tsd = tsd_fetch();
-	if (tsd_quarantine_get(tsd) == NULL)
-		quarantine_alloc_hook_work(tsd);
-}
-#endif
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
-
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index bf113411..f4ff8d76 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -572,7 +572,6 @@ struct tsd_init_head_s {
     O(narenas_tdata,		unsigned)				\
     O(arenas_tdata_bypass,	bool)					\
     O(tcache_enabled,		tcache_enabled_t)			\
-    O(quarantine,		quarantine_t *)				\
     O(witnesses,		witness_list_t)				\
     O(witness_fork,		bool)					\
 
@@ -588,7 +587,6 @@ struct tsd_init_head_s {
     0,									\
     false,								\
     tcache_enabled_default,						\
-    NULL,								\
     ql_head_initializer(witnesses),					\
     false								\
 }
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 432d1f24..537cb6ab 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -65,7 +65,6 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\public_unnamespace.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\ql.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\qr.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\quarantine.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\rb.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\rtree.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\size_classes.h" />
@@ -107,7 +106,6 @@
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
-    <ClCompile Include="..\..\..\..\src\quarantine.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\stats.c" />
     <ClCompile Include="..\..\..\..\src\tcache.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index c0e568ec..d2b5595f 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -134,9 +134,6 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\qr.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\quarantine.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\rb.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
@@ -232,9 +229,6 @@
     <ClCompile Include="..\..\..\..\src\prof.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\quarantine.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\rtree.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/arena.c b/src/arena.c
index 4e6d3d60..607679d7 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2484,21 +2484,6 @@ arena_dalloc_junk_small_t *arena_dalloc_junk_small =
     JEMALLOC_N(n_arena_dalloc_junk_small);
 #endif
 
-void
-arena_quarantine_junk_small(void *ptr, size_t usize)
-{
-	szind_t binind;
-	arena_bin_info_t *bin_info;
-	cassert(config_fill);
-	assert(opt_junk_free);
-	assert(opt_quarantine);
-	assert(usize <= SMALL_MAXCLASS);
-
-	binind = size2index(usize);
-	bin_info = &arena_bin_info[binind];
-	arena_redzones_validate(ptr, bin_info, true);
-}
-
 static void *
 arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero)
 {
@@ -3243,8 +3228,8 @@ arena_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
 }
 
 void *
-arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size,
-    size_t alignment, bool zero, tcache_t *tcache)
+arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
+    size_t size, size_t alignment, bool zero, tcache_t *tcache)
 {
 	void *ret;
 	size_t usize;
@@ -3257,8 +3242,7 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size,
 		size_t copysize;
 
 		/* Try to avoid moving the allocation. */
-		if (!arena_ralloc_no_move(tsd_tsdn(tsd), ptr, oldsize, usize, 0,
-		    zero))
+		if (!arena_ralloc_no_move(tsdn, ptr, oldsize, usize, 0, zero))
 			return (ptr);
 
 		/*
@@ -3266,8 +3250,8 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size,
 		 * the object.  In that case, fall back to allocating new space
 		 * and copying.
 		 */
-		ret = arena_ralloc_move_helper(tsd_tsdn(tsd), arena, usize,
-		    alignment, zero, tcache);
+		ret = arena_ralloc_move_helper(tsdn, arena, usize, alignment,
+		    zero, tcache);
 		if (ret == NULL)
 			return (NULL);
 
@@ -3278,9 +3262,9 @@ arena_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize, size_t size,
 
 		copysize = (usize < oldsize) ? usize : oldsize;
 		memcpy(ret, ptr, copysize);
-		isqalloc(tsd, ptr, oldsize, tcache, true);
+		isdalloct(tsdn, ptr, oldsize, tcache, true);
 	} else {
-		ret = huge_ralloc(tsd, arena, ptr, oldsize, usize, alignment,
+		ret = huge_ralloc(tsdn, arena, ptr, oldsize, usize, alignment,
 		    zero, tcache);
 	}
 	return (ret);
diff --git a/src/ctl.c b/src/ctl.c
index d2e94269..f4c775db 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -97,7 +97,6 @@ CTL_PROTO(opt_decay_time)
 CTL_PROTO(opt_stats_print)
 CTL_PROTO(opt_junk)
 CTL_PROTO(opt_zero)
-CTL_PROTO(opt_quarantine)
 CTL_PROTO(opt_redzone)
 CTL_PROTO(opt_utrace)
 CTL_PROTO(opt_xmalloc)
@@ -273,7 +272,6 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("stats_print"),	CTL(opt_stats_print)},
 	{NAME("junk"),		CTL(opt_junk)},
 	{NAME("zero"),		CTL(opt_zero)},
-	{NAME("quarantine"),	CTL(opt_quarantine)},
 	{NAME("redzone"),	CTL(opt_redzone)},
 	{NAME("utrace"),	CTL(opt_utrace)},
 	{NAME("xmalloc"),	CTL(opt_xmalloc)},
@@ -1281,7 +1279,6 @@ CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t)
 CTL_RO_NL_GEN(opt_decay_time, opt_decay_time, ssize_t)
 CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool)
 CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, const char *)
-CTL_RO_NL_CGEN(config_fill, opt_quarantine, opt_quarantine, size_t)
 CTL_RO_NL_CGEN(config_fill, opt_redzone, opt_redzone, bool)
 CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool)
 CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool)
@@ -1619,11 +1616,6 @@ arena_i_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	READONLY();
 	WRITEONLY();
 
-	if (config_fill && unlikely(opt_quarantine)) {
-		ret = EFAULT;
-		goto label_return;
-	}
-
 	arena_ind = (unsigned)mib[1];
 	if (config_debug) {
 		malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
diff --git a/src/huge.c b/src/huge.c
index 1aa02a0f..b1ff918a 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -359,7 +359,7 @@ huge_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
 }
 
 void *
-huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
+huge_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
     size_t usize, size_t alignment, bool zero, tcache_t *tcache)
 {
 	void *ret;
@@ -369,8 +369,7 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
 	assert(usize > 0 && usize <= HUGE_MAXCLASS);
 
 	/* Try to avoid moving the allocation. */
-	if (!huge_ralloc_no_move(tsd_tsdn(tsd), ptr, oldsize, usize, usize,
-	    zero))
+	if (!huge_ralloc_no_move(tsdn, ptr, oldsize, usize, usize, zero))
 		return (ptr);
 
 	/*
@@ -378,14 +377,13 @@ huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
 	 * different size class.  In that case, fall back to allocating new
 	 * space and copying.
 	 */
-	ret = huge_ralloc_move_helper(tsd_tsdn(tsd), arena, usize, alignment,
-	    zero);
+	ret = huge_ralloc_move_helper(tsdn, arena, usize, alignment, zero);
 	if (ret == NULL)
 		return (NULL);
 
 	copysize = (usize < oldsize) ? usize : oldsize;
 	memcpy(ret, ptr, copysize);
-	isqalloc(tsd, ptr, oldsize, tcache, true);
+	isdalloct(tsdn, ptr, oldsize, tcache, true);
 	return (ret);
 }
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index cfe6ed32..4dd77e68 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -35,7 +35,6 @@ bool	opt_junk_free =
 #endif
     ;
 
-size_t	opt_quarantine = ZU(0);
 bool	opt_redzone = false;
 bool	opt_utrace = false;
 bool	opt_xmalloc = false;
@@ -74,10 +73,9 @@ static bool	malloc_slow = true;
 enum {
 	flag_opt_junk_alloc	= (1U),
 	flag_opt_junk_free	= (1U << 1),
-	flag_opt_quarantine	= (1U << 2),
-	flag_opt_zero		= (1U << 3),
-	flag_opt_utrace		= (1U << 4),
-	flag_opt_xmalloc	= (1U << 5)
+	flag_opt_zero		= (1U << 2),
+	flag_opt_utrace		= (1U << 3),
+	flag_opt_xmalloc	= (1U << 4)
 };
 static uint8_t	malloc_slow_flags;
 
@@ -265,23 +263,6 @@ malloc_initialized(void)
 	return (malloc_init_state == malloc_init_initialized);
 }
 
-JEMALLOC_ALWAYS_INLINE_C void
-malloc_thread_init(void)
-{
-
-	/*
-	 * TSD initialization can't be safely done as a side effect of
-	 * deallocation, because it is possible for a thread to do nothing but
-	 * deallocate its TLS data via free(), in which case writing to TLS
-	 * would cause write-after-free memory corruption.  The quarantine
-	 * facility *only* gets used as a side effect of deallocation, so make
-	 * a best effort attempt at initializing its TSD by hooking all
-	 * allocation events.
-	 */
-	if (config_fill && unlikely(opt_quarantine))
-		quarantine_alloc_hook();
-}
-
 JEMALLOC_ALWAYS_INLINE_C bool
 malloc_init_a0(void)
 {
@@ -297,8 +278,6 @@ malloc_init(void)
 
 	if (unlikely(!malloc_initialized()) && malloc_init_hard())
 		return (true);
-	malloc_thread_init();
-
 	return (false);
 }
 
@@ -885,7 +864,6 @@ malloc_slow_flag_init(void)
 	 */
 	malloc_slow_flags |= (opt_junk_alloc ? flag_opt_junk_alloc : 0)
 	    | (opt_junk_free ? flag_opt_junk_free : 0)
-	    | (opt_quarantine ? flag_opt_quarantine : 0)
 	    | (opt_zero ? flag_opt_zero : 0)
 	    | (opt_utrace ? flag_opt_utrace : 0)
 	    | (opt_xmalloc ? flag_opt_xmalloc : 0);
@@ -1146,8 +1124,6 @@ malloc_conf_init(void)
 					}
 					continue;
 				}
-				CONF_HANDLE_SIZE_T(opt_quarantine, "quarantine",
-				    0, SIZE_T_MAX, false)
 				CONF_HANDLE_BOOL(opt_redzone, "redzone", true)
 				CONF_HANDLE_BOOL(opt_zero, "zero", true)
 			}
@@ -1761,9 +1737,9 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 		*tsd_thread_deallocatedp_get(tsd) += usize;
 
 	if (likely(!slow_path))
-		iqalloc(tsd, ptr, tcache, false);
+		idalloctm(tsd_tsdn(tsd), ptr, tcache, false, false);
 	else
-		iqalloc(tsd, ptr, tcache, true);
+		idalloctm(tsd_tsdn(tsd), ptr, tcache, false, true);
 }
 
 JEMALLOC_INLINE_C void
@@ -1779,7 +1755,11 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path)
 		prof_free(tsd, ptr, usize);
 	if (config_stats)
 		*tsd_thread_deallocatedp_get(tsd) += usize;
-	isqalloc(tsd, ptr, usize, tcache, slow_path);
+
+	if (likely(!slow_path))
+		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, false);
+	else
+		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, true);
 }
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
@@ -1809,7 +1789,6 @@ je_realloc(void *ptr, size_t size)
 		tsd_t *tsd;
 
 		assert(malloc_initialized() || IS_INITIALIZER);
-		malloc_thread_init();
 		tsd = tsd_fetch();
 
 		witness_assert_lockless(tsd_tsdn(tsd));
@@ -2123,7 +2102,7 @@ je_mallocx(size_t size, int flags)
 }
 
 static void *
-irallocx_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize,
+irallocx_prof_sample(tsdn_t *tsdn, void *old_ptr, size_t old_usize,
     size_t usize, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena,
     prof_tctx_t *tctx)
 {
@@ -2132,13 +2111,13 @@ irallocx_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize,
 	if (tctx == NULL)
 		return (NULL);
 	if (usize <= SMALL_MAXCLASS) {
-		p = iralloct(tsd, old_ptr, old_usize, LARGE_MINCLASS, alignment,
-		    zero, tcache, arena);
+		p = iralloct(tsdn, old_ptr, old_usize, LARGE_MINCLASS,
+		    alignment, zero, tcache, arena);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(tsd_tsdn(tsd), p, usize);
+		arena_prof_promoted(tsdn, p, usize);
 	} else {
-		p = iralloct(tsd, old_ptr, old_usize, usize, alignment, zero,
+		p = iralloct(tsdn, old_ptr, old_usize, usize, alignment, zero,
 		    tcache, arena);
 	}
 
@@ -2158,11 +2137,11 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 	old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_ptr);
 	tctx = prof_alloc_prep(tsd, *usize, prof_active, true);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
-		p = irallocx_prof_sample(tsd, old_ptr, old_usize, *usize,
-		    alignment, zero, tcache, arena, tctx);
+		p = irallocx_prof_sample(tsd_tsdn(tsd), old_ptr, old_usize,
+		    *usize, alignment, zero, tcache, arena, tctx);
 	} else {
-		p = iralloct(tsd, old_ptr, old_usize, size, alignment, zero,
-		    tcache, arena);
+		p = iralloct(tsd_tsdn(tsd), old_ptr, old_usize, size, alignment,
+		    zero, tcache, arena);
 	}
 	if (unlikely(p == NULL)) {
 		prof_alloc_rollback(tsd, tctx, true);
@@ -2203,7 +2182,6 @@ je_rallocx(void *ptr, size_t size, int flags)
 	assert(ptr != NULL);
 	assert(size != 0);
 	assert(malloc_initialized() || IS_INITIALIZER);
-	malloc_thread_init();
 	tsd = tsd_fetch();
 	witness_assert_lockless(tsd_tsdn(tsd));
 
@@ -2234,8 +2212,8 @@ je_rallocx(void *ptr, size_t size, int flags)
 		if (unlikely(p == NULL))
 			goto label_oom;
 	} else {
-		p = iralloct(tsd, ptr, old_usize, size, alignment, zero,
-		     tcache, arena);
+		p = iralloct(tsd_tsdn(tsd), ptr, old_usize, size, alignment,
+		    zero, tcache, arena);
 		if (unlikely(p == NULL))
 			goto label_oom;
 		if (config_stats)
@@ -2349,7 +2327,6 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
 	assert(size != 0);
 	assert(SIZE_T_MAX - size >= extra);
 	assert(malloc_initialized() || IS_INITIALIZER);
-	malloc_thread_init();
 	tsd = tsd_fetch();
 	witness_assert_lockless(tsd_tsdn(tsd));
 
@@ -2399,7 +2376,6 @@ je_sallocx(const void *ptr, int flags)
 	tsdn_t *tsdn;
 
 	assert(malloc_initialized() || IS_INITIALIZER);
-	malloc_thread_init();
 
 	tsdn = tsdn_fetch();
 	witness_assert_lockless(tsdn);
@@ -2577,7 +2553,6 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr)
 	tsdn_t *tsdn;
 
 	assert(malloc_initialized() || IS_INITIALIZER);
-	malloc_thread_init();
 
 	tsdn = tsdn_fetch();
 	witness_assert_lockless(tsdn);
diff --git a/src/quarantine.c b/src/quarantine.c
deleted file mode 100644
index 9658ffad..00000000
--- a/src/quarantine.c
+++ /dev/null
@@ -1,178 +0,0 @@
-#define	JEMALLOC_QUARANTINE_C_
-#include "jemalloc/internal/jemalloc_internal.h"
-
-/*
- * Quarantine pointers close to NULL are used to encode state information that
- * is used for cleaning up during thread shutdown.
- */
-#define	QUARANTINE_STATE_REINCARNATED	((quarantine_t *)(uintptr_t)1)
-#define	QUARANTINE_STATE_PURGATORY	((quarantine_t *)(uintptr_t)2)
-#define	QUARANTINE_STATE_MAX		QUARANTINE_STATE_PURGATORY
-
-/******************************************************************************/
-/* Function prototypes for non-inline static functions. */
-
-static quarantine_t	*quarantine_grow(tsd_t *tsd, quarantine_t *quarantine);
-static void	quarantine_drain_one(tsdn_t *tsdn, quarantine_t *quarantine);
-static void	quarantine_drain(tsdn_t *tsdn, quarantine_t *quarantine,
-    size_t upper_bound);
-
-/******************************************************************************/
-
-static quarantine_t *
-quarantine_init(tsdn_t *tsdn, size_t lg_maxobjs)
-{
-	quarantine_t *quarantine;
-	size_t size;
-
-	size = offsetof(quarantine_t, objs) + ((ZU(1) << lg_maxobjs) *
-	    sizeof(quarantine_obj_t));
-	quarantine = (quarantine_t *)iallocztm(tsdn, size, size2index(size),
-	    false, NULL, true, arena_get(TSDN_NULL, 0, true), true);
-	if (quarantine == NULL)
-		return (NULL);
-	quarantine->curbytes = 0;
-	quarantine->curobjs = 0;
-	quarantine->first = 0;
-	quarantine->lg_maxobjs = lg_maxobjs;
-
-	return (quarantine);
-}
-
-void
-quarantine_alloc_hook_work(tsd_t *tsd)
-{
-	quarantine_t *quarantine;
-
-	if (!tsd_nominal(tsd))
-		return;
-
-	quarantine = quarantine_init(tsd_tsdn(tsd), LG_MAXOBJS_INIT);
-	/*
-	 * Check again whether quarantine has been initialized, because
-	 * quarantine_init() may have triggered recursive initialization.
-	 */
-	if (tsd_quarantine_get(tsd) == NULL)
-		tsd_quarantine_set(tsd, quarantine);
-	else
-		idalloctm(tsd_tsdn(tsd), quarantine, NULL, true, true);
-}
-
-static quarantine_t *
-quarantine_grow(tsd_t *tsd, quarantine_t *quarantine)
-{
-	quarantine_t *ret;
-
-	ret = quarantine_init(tsd_tsdn(tsd), quarantine->lg_maxobjs + 1);
-	if (ret == NULL) {
-		quarantine_drain_one(tsd_tsdn(tsd), quarantine);
-		return (quarantine);
-	}
-
-	ret->curbytes = quarantine->curbytes;
-	ret->curobjs = quarantine->curobjs;
-	if (quarantine->first + quarantine->curobjs <= (ZU(1) <<
-	    quarantine->lg_maxobjs)) {
-		/* objs ring buffer data are contiguous. */
-		memcpy(ret->objs, &quarantine->objs[quarantine->first],
-		    quarantine->curobjs * sizeof(quarantine_obj_t));
-	} else {
-		/* objs ring buffer data wrap around. */
-		size_t ncopy_a = (ZU(1) << quarantine->lg_maxobjs) -
-		    quarantine->first;
-		size_t ncopy_b = quarantine->curobjs - ncopy_a;
-
-		memcpy(ret->objs, &quarantine->objs[quarantine->first], ncopy_a
-		    * sizeof(quarantine_obj_t));
-		memcpy(&ret->objs[ncopy_a], quarantine->objs, ncopy_b *
-		    sizeof(quarantine_obj_t));
-	}
-	idalloctm(tsd_tsdn(tsd), quarantine, NULL, true, true);
-
-	tsd_quarantine_set(tsd, ret);
-	return (ret);
-}
-
-static void
-quarantine_drain_one(tsdn_t *tsdn, quarantine_t *quarantine)
-{
-	quarantine_obj_t *obj = &quarantine->objs[quarantine->first];
-	assert(obj->usize == isalloc(tsdn, obj->ptr, config_prof));
-	idalloctm(tsdn, obj->ptr, NULL, false, true);
-	quarantine->curbytes -= obj->usize;
-	quarantine->curobjs--;
-	quarantine->first = (quarantine->first + 1) & ((ZU(1) <<
-	    quarantine->lg_maxobjs) - 1);
-}
-
-static void
-quarantine_drain(tsdn_t *tsdn, quarantine_t *quarantine, size_t upper_bound)
-{
-
-	while (quarantine->curbytes > upper_bound && quarantine->curobjs > 0)
-		quarantine_drain_one(tsdn, quarantine);
-}
-
-void
-quarantine(tsd_t *tsd, void *ptr)
-{
-	quarantine_t *quarantine;
-	size_t usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
-
-	cassert(config_fill);
-	assert(opt_quarantine);
-
-	if ((quarantine = tsd_quarantine_get(tsd)) == NULL) {
-		idalloctm(tsd_tsdn(tsd), ptr, NULL, false, true);
-		return;
-	}
-	/*
-	 * Drain one or more objects if the quarantine size limit would be
-	 * exceeded by appending ptr.
-	 */
-	if (quarantine->curbytes + usize > opt_quarantine) {
-		size_t upper_bound = (opt_quarantine >= usize) ? opt_quarantine
-		    - usize : 0;
-		quarantine_drain(tsd_tsdn(tsd), quarantine, upper_bound);
-	}
-	/* Grow the quarantine ring buffer if it's full. */
-	if (quarantine->curobjs == (ZU(1) << quarantine->lg_maxobjs))
-		quarantine = quarantine_grow(tsd, quarantine);
-	/* quarantine_grow() must free a slot if it fails to grow. */
-	assert(quarantine->curobjs < (ZU(1) << quarantine->lg_maxobjs));
-	/* Append ptr if its size doesn't exceed the quarantine size. */
-	if (quarantine->curbytes + usize <= opt_quarantine) {
-		size_t offset = (quarantine->first + quarantine->curobjs) &
-		    ((ZU(1) << quarantine->lg_maxobjs) - 1);
-		quarantine_obj_t *obj = &quarantine->objs[offset];
-		obj->ptr = ptr;
-		obj->usize = usize;
-		quarantine->curbytes += usize;
-		quarantine->curobjs++;
-		if (config_fill && unlikely(opt_junk_free)) {
-			if (usize <= SMALL_MAXCLASS)
-				arena_quarantine_junk_small(ptr, usize);
-			else
-				memset(ptr, JEMALLOC_FREE_JUNK, usize);
-		}
-	} else {
-		assert(quarantine->curbytes == 0);
-		idalloctm(tsd_tsdn(tsd), ptr, NULL, false, true);
-	}
-}
-
-void
-quarantine_cleanup(tsd_t *tsd)
-{
-	quarantine_t *quarantine;
-
-	if (!config_fill)
-		return;
-
-	quarantine = tsd_quarantine_get(tsd);
-	if (quarantine != NULL) {
-		quarantine_drain(tsd_tsdn(tsd), quarantine, 0);
-		idalloctm(tsd_tsdn(tsd), quarantine, NULL, true, true);
-		tsd_quarantine_set(tsd, NULL);
-	}
-}
diff --git a/src/stats.c b/src/stats.c
index 97f901f6..16e5b1a3 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -513,7 +513,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			OPT_WRITE_SSIZE_T_MUTABLE(decay_time, arenas.decay_time)
 		OPT_WRITE_BOOL(stats_print)
 		OPT_WRITE_CHAR_P(junk)
-		OPT_WRITE_SIZE_T(quarantine)
 		OPT_WRITE_BOOL(redzone)
 		OPT_WRITE_BOOL(zero)
 		OPT_WRITE_BOOL(utrace)
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index c602f0ff..d7a02e0f 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -88,8 +88,6 @@ TEST_BEGIN(test_arena_reset)
 	size_t mib[3];
 	tsdn_t *tsdn;
 
-	test_skip_if(config_fill && unlikely(opt_quarantine));
-
 	sz = sizeof(unsigned);
 	assert_d_eq(mallctl("arenas.extend", &arena_ind, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
diff --git a/test/unit/junk.c b/test/unit/junk.c
index acddc601..cb262ec1 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -5,7 +5,7 @@
 #    define JEMALLOC_TEST_JUNK_OPT "junk:true"
 #  endif
 const char *malloc_conf =
-    "abort:false,zero:false,redzone:true,quarantine:0," JEMALLOC_TEST_JUNK_OPT;
+    "abort:false,zero:false,redzone:true," JEMALLOC_TEST_JUNK_OPT;
 #endif
 
 static arena_dalloc_junk_small_t *arena_dalloc_junk_small_orig;
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 641138ac..151e7ad0 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -168,7 +168,6 @@ TEST_BEGIN(test_mallctl_opt)
 	TEST_MALLCTL_OPT(ssize_t, decay_time, always);
 	TEST_MALLCTL_OPT(bool, stats_print, always);
 	TEST_MALLCTL_OPT(const char *, junk, fill);
-	TEST_MALLCTL_OPT(size_t, quarantine, fill);
 	TEST_MALLCTL_OPT(bool, redzone, fill);
 	TEST_MALLCTL_OPT(bool, zero, fill);
 	TEST_MALLCTL_OPT(bool, utrace, utrace);
diff --git a/test/unit/quarantine.c b/test/unit/quarantine.c
deleted file mode 100644
index bbd48a51..00000000
--- a/test/unit/quarantine.c
+++ /dev/null
@@ -1,108 +0,0 @@
-#include "test/jemalloc_test.h"
-
-#define	QUARANTINE_SIZE		8192
-#define	STRINGIFY_HELPER(x)	#x
-#define	STRINGIFY(x)		STRINGIFY_HELPER(x)
-
-#ifdef JEMALLOC_FILL
-const char *malloc_conf = "abort:false,junk:true,redzone:true,quarantine:"
-    STRINGIFY(QUARANTINE_SIZE);
-#endif
-
-void
-quarantine_clear(void)
-{
-	void *p;
-
-	p = mallocx(QUARANTINE_SIZE*2, 0);
-	assert_ptr_not_null(p, "Unexpected mallocx() failure");
-	dallocx(p, 0);
-}
-
-TEST_BEGIN(test_quarantine)
-{
-#define	SZ		ZU(256)
-#define	NQUARANTINED	(QUARANTINE_SIZE/SZ)
-	void *quarantined[NQUARANTINED+1];
-	size_t i, j;
-
-	test_skip_if(!config_fill);
-
-	assert_zu_eq(nallocx(SZ, 0), SZ,
-	    "SZ=%zu does not precisely equal a size class", SZ);
-
-	quarantine_clear();
-
-	/*
-	 * Allocate enough regions to completely fill the quarantine, plus one
-	 * more.  The last iteration occurs with a completely full quarantine,
-	 * but no regions should be drained from the quarantine until the last
-	 * deallocation occurs.  Therefore no region recycling should occur
-	 * until after this loop completes.
-	 */
-	for (i = 0; i < NQUARANTINED+1; i++) {
-		void *p = mallocx(SZ, 0);
-		assert_ptr_not_null(p, "Unexpected mallocx() failure");
-		quarantined[i] = p;
-		dallocx(p, 0);
-		for (j = 0; j < i; j++) {
-			assert_ptr_ne(p, quarantined[j],
-			    "Quarantined region recycled too early; "
-			    "i=%zu, j=%zu", i, j);
-		}
-	}
-#undef NQUARANTINED
-#undef SZ
-}
-TEST_END
-
-static bool detected_redzone_corruption;
-
-static void
-arena_redzone_corruption_replacement(void *ptr, size_t usize, bool after,
-    size_t offset, uint8_t byte)
-{
-
-	detected_redzone_corruption = true;
-}
-
-TEST_BEGIN(test_quarantine_redzone)
-{
-	char *s;
-	arena_redzone_corruption_t *arena_redzone_corruption_orig;
-
-	test_skip_if(!config_fill);
-
-	arena_redzone_corruption_orig = arena_redzone_corruption;
-	arena_redzone_corruption = arena_redzone_corruption_replacement;
-
-	/* Test underflow. */
-	detected_redzone_corruption = false;
-	s = (char *)mallocx(1, 0);
-	assert_ptr_not_null((void *)s, "Unexpected mallocx() failure");
-	s[-1] = 0xbb;
-	dallocx(s, 0);
-	assert_true(detected_redzone_corruption,
-	    "Did not detect redzone corruption");
-
-	/* Test overflow. */
-	detected_redzone_corruption = false;
-	s = (char *)mallocx(1, 0);
-	assert_ptr_not_null((void *)s, "Unexpected mallocx() failure");
-	s[sallocx(s, 0)] = 0xbb;
-	dallocx(s, 0);
-	assert_true(detected_redzone_corruption,
-	    "Did not detect redzone corruption");
-
-	arena_redzone_corruption = arena_redzone_corruption_orig;
-}
-TEST_END
-
-int
-main(void)
-{
-
-	return (test(
-	    test_quarantine,
-	    test_quarantine_redzone));
-}
diff --git a/test/unit/zero.c b/test/unit/zero.c
index 30ebe37a..80b8fe13 100644
--- a/test/unit/zero.c
+++ b/test/unit/zero.c
@@ -2,7 +2,7 @@
 
 #ifdef JEMALLOC_FILL
 const char *malloc_conf =
-    "abort:false,junk:false,zero:true,redzone:false,quarantine:0";
+    "abort:false,junk:false,zero:true,redzone:false";
 #endif
 
 static void

From 17c021c1775c2b5f5f73e3c0f0d19e9b3e9c23b9 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 5 Apr 2016 18:18:15 -0700
Subject: [PATCH 0251/2608] Remove redzone support.

This resolves #369.
---
 INSTALL                                       |   5 +-
 configure.ac                                  |   3 +-
 doc/jemalloc.xml.in                           |  15 --
 include/jemalloc/internal/arena.h             |  65 ++------
 .../jemalloc/internal/jemalloc_internal.h.in  |  25 ---
 .../internal/jemalloc_internal_defs.h.in      |   2 +-
 include/jemalloc/internal/private_symbols.txt |   4 -
 src/arena.c                                   | 153 ++----------------
 src/ctl.c                                     |   3 -
 src/jemalloc.c                                |  15 +-
 src/stats.c                                   |   1 -
 test/unit/junk.c                              |  48 +-----
 test/unit/mallctl.c                           |   1 -
 test/unit/zero.c                              |   2 +-
 14 files changed, 41 insertions(+), 301 deletions(-)

diff --git a/INSTALL b/INSTALL
index 4f57b365..e4f7bbd5 100644
--- a/INSTALL
+++ b/INSTALL
@@ -165,9 +165,8 @@ any of the following arguments (not a definitive list) to 'configure':
     normal jemalloc operation.
 
 --disable-fill
-    Disable support for junk/zero filling of memory and redzones.  See the
-    "opt.junk", "opt.zero", and "opt.redzone" option documentation for usage
-    details.
+    Disable support for junk/zero filling of memory.  See the "opt.junk" and
+    "opt.zero" option documentation for usage details.
 
 --disable-zone-allocator
     Disable zone allocator for Darwin.  This means jemalloc won't be hooked as
diff --git a/configure.ac b/configure.ac
index 92192d4d..538e53f4 100644
--- a/configure.ac
+++ b/configure.ac
@@ -945,8 +945,7 @@ fi
 
 dnl Support the junk/zero filling option by default.
 AC_ARG_ENABLE([fill],
-  [AS_HELP_STRING([--disable-fill],
-                  [Disable support for junk/zero filling and redzones])],
+  [AS_HELP_STRING([--disable-fill], [Disable support for junk/zero filling])],
 [if test "x$enable_fill" = "xno" ; then
   enable_fill="0"
 else
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 7ed03330..eddc88c1 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1040,21 +1040,6 @@ for (i = 0; i < nbins; i++) {
         default.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="opt.redzone">
-        <term>
-          <mallctl>opt.redzone</mallctl>
-          (<type>bool</type>)
-          <literal>r-</literal>
-          [<option>--enable-fill</option>]
-        </term>
-        <listitem><para>Redzones enabled/disabled.  If enabled, small
-        allocations have redzones before and after them.  Furthermore, if the
-        <link linkend="opt.junk"><mallctl>opt.junk</mallctl></link> option is
-        enabled, the redzones are checked for corruption during deallocation.
-        This option is intended for debugging and will impact performance
-        negatively.  This option is disabled by default.</para></listitem>
-      </varlistentry>
-
       <varlistentry id="opt.zero">
         <term>
           <mallctl>opt.zero</mallctl>
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 1c63620d..6e71b5f6 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -7,12 +7,6 @@
 #define	LG_RUN_MAXREGS		(LG_PAGE - LG_TINY_MIN)
 #define	RUN_MAXREGS		(1U << LG_RUN_MAXREGS)
 
-/*
- * Minimum redzone size.  Redzones may be larger than this if necessary to
- * preserve region alignment.
- */
-#define	REDZONE_MINSIZE		16
-
 /*
  * The minimum ratio of active:dirty pages per arena is computed as:
  *
@@ -205,42 +199,22 @@ struct arena_chunk_s {
  *
  * Each run has the following layout:
  *
- *               /--------------------\
- *               | pad?               |
- *               |--------------------|
- *               | redzone            |
- *   reg0_offset | region 0           |
- *               | redzone            |
- *               |--------------------| \
- *               | redzone            | |
- *               | region 1           |  > reg_interval
- *               | redzone            | /
- *               |--------------------|
- *               | ...                |
- *               | ...                |
- *               | ...                |
- *               |--------------------|
- *               | redzone            |
- *               | region nregs-1     |
- *               | redzone            |
- *               |--------------------|
- *               | alignment pad?     |
- *               \--------------------/
- *
- * reg_interval has at least the same minimum alignment as reg_size; this
- * preserves the alignment constraint that sa2u() depends on.  Alignment pad is
- * either 0 or redzone_size; it is present only if needed to align reg0_offset.
+ *   /--------------------\
+ *   | region 0           |
+ *   |--------------------|
+ *   | region 1           |
+ *   |--------------------|
+ *   | ...                |
+ *   | ...                |
+ *   | ...                |
+ *   |--------------------|
+ *   | region nregs-1     |
+ *   \--------------------/
  */
 struct arena_bin_info_s {
 	/* Size of regions in a run for this bin's size class. */
 	size_t			reg_size;
 
-	/* Redzone size. */
-	size_t			redzone_size;
-
-	/* Interval between regions (reg_size + (redzone_size << 1)). */
-	size_t			reg_interval;
-
 	/* Total size of a run for this bin's size class. */
 	size_t			run_size;
 
@@ -252,9 +226,6 @@ struct arena_bin_info_s {
 	 * bin.
 	 */
 	bitmap_info_t		bitmap_info;
-
-	/* Offset of first region in a run for this bin's size class. */
-	uint32_t		reg0_offset;
 };
 
 struct arena_bin_s {
@@ -543,9 +514,6 @@ void	arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena,
 void	arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info,
     bool zero);
 #ifdef JEMALLOC_JET
-typedef void (arena_redzone_corruption_t)(void *, size_t, bool, size_t,
-    uint8_t);
-extern arena_redzone_corruption_t *arena_redzone_corruption;
 typedef void (arena_dalloc_junk_small_t)(void *, arena_bin_info_t *);
 extern arena_dalloc_junk_small_t *arena_dalloc_junk_small;
 #else
@@ -1113,8 +1081,7 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits)
 		assert(run_binind == actual_binind);
 		bin_info = &arena_bin_info[actual_binind];
 		rpages = arena_miscelm_to_rpages(miscelm);
-		assert(((uintptr_t)ptr - ((uintptr_t)rpages +
-		    (uintptr_t)bin_info->reg0_offset)) % bin_info->reg_interval
+		assert(((uintptr_t)ptr - (uintptr_t)rpages) % bin_info->reg_size
 		    == 0);
 	}
 
@@ -1142,18 +1109,16 @@ arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr)
 	 * Freeing a pointer lower than region zero can cause assertion
 	 * failure.
 	 */
-	assert((uintptr_t)ptr >= (uintptr_t)rpages +
-	    (uintptr_t)bin_info->reg0_offset);
+	assert((uintptr_t)ptr >= (uintptr_t)rpages);
 
 	/*
 	 * Avoid doing division with a variable divisor if possible.  Using
 	 * actual division here can reduce allocator throughput by over 20%!
 	 */
-	diff = (size_t)((uintptr_t)ptr - (uintptr_t)rpages -
-	    bin_info->reg0_offset);
+	diff = (size_t)((uintptr_t)ptr - (uintptr_t)rpages);
 
 	/* Rescale (factor powers of 2 out of the numerator and denominator). */
-	interval = bin_info->reg_interval;
+	interval = bin_info->reg_size;
 	shift = ffs_zu(interval) - 1;
 	diff >>= shift;
 	interval >>= shift;
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index c6aa5743..a8c476d9 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -423,7 +423,6 @@ extern bool	opt_abort;
 extern const char	*opt_junk;
 extern bool	opt_junk_alloc;
 extern bool	opt_junk_free;
-extern bool	opt_redzone;
 extern bool	opt_utrace;
 extern bool	opt_xmalloc;
 extern bool	opt_zero;
@@ -888,8 +887,6 @@ void	*ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, arena_t *arena);
 void	*ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero);
 size_t	ivsalloc(tsdn_t *tsdn, const void *ptr, bool demote);
-size_t	u2rz(size_t usize);
-size_t	p2rz(tsdn_t *tsdn, const void *ptr);
 void	idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool is_metadata,
     bool slow_path);
 void	idalloc(tsd_t *tsd, void *ptr);
@@ -1011,28 +1008,6 @@ ivsalloc(tsdn_t *tsdn, const void *ptr, bool demote)
 	return (isalloc(tsdn, ptr, demote));
 }
 
-JEMALLOC_INLINE size_t
-u2rz(size_t usize)
-{
-	size_t ret;
-
-	if (usize <= SMALL_MAXCLASS) {
-		szind_t binind = size2index(usize);
-		ret = arena_bin_info[binind].redzone_size;
-	} else
-		ret = 0;
-
-	return (ret);
-}
-
-JEMALLOC_INLINE size_t
-p2rz(tsdn_t *tsdn, const void *ptr)
-{
-	size_t usize = isalloc(tsdn, ptr, false);
-
-	return (u2rz(usize));
-}
-
 JEMALLOC_ALWAYS_INLINE void
 idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool is_metadata,
     bool slow_path)
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 5e5b0a78..7a38c91d 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -142,7 +142,7 @@
  */
 #undef JEMALLOC_DSS
 
-/* Support memory filling (junk/zero/redzone). */
+/* Support memory filling (junk/zero). */
 #undef JEMALLOC_FILL
 
 /* Support utrace(2)-based tracing. */
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 02377809..89933426 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -103,7 +103,6 @@ arena_ralloc
 arena_ralloc_junk_large
 arena_ralloc_no_move
 arena_rd_to_miscelm
-arena_redzone_corruption
 arena_reset
 arena_run_regind
 arena_run_to_miscelm
@@ -382,13 +381,11 @@ opt_prof_leak
 opt_prof_prefix
 opt_prof_thread_active_init
 opt_purge
-opt_redzone
 opt_stats_print
 opt_tcache
 opt_utrace
 opt_xmalloc
 opt_zero
-p2rz
 pages_boot
 pages_commit
 pages_decommit
@@ -578,7 +575,6 @@ tsd_witnessesp_get
 tsdn_fetch
 tsdn_null
 tsdn_tsd
-u2rz
 witness_assert_lockless
 witness_assert_not_owner
 witness_assert_owner
diff --git a/src/arena.c b/src/arena.c
index 607679d7..9b458abf 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -314,8 +314,8 @@ arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info)
 	regind = (unsigned)bitmap_sfu(run->bitmap, &bin_info->bitmap_info);
 	miscelm = arena_run_to_miscelm(run);
 	rpages = arena_miscelm_to_rpages(miscelm);
-	ret = (void *)((uintptr_t)rpages + (uintptr_t)bin_info->reg0_offset +
-	    (uintptr_t)(bin_info->reg_interval * regind));
+	ret = (void *)((uintptr_t)rpages + (uintptr_t)(bin_info->reg_size *
+	    regind));
 	run->nfree--;
 	return (ret);
 }
@@ -333,12 +333,10 @@ arena_run_reg_dalloc(arena_run_t *run, void *ptr)
 	assert(run->nfree < bin_info->nregs);
 	/* Freeing an interior pointer can cause assertion failure. */
 	assert(((uintptr_t)ptr -
-	    ((uintptr_t)arena_miscelm_to_rpages(arena_run_to_miscelm(run)) +
-	    (uintptr_t)bin_info->reg0_offset)) %
-	    (uintptr_t)bin_info->reg_interval == 0);
+	    (uintptr_t)arena_miscelm_to_rpages(arena_run_to_miscelm(run))) %
+	    (uintptr_t)bin_info->reg_size == 0);
 	assert((uintptr_t)ptr >=
-	    (uintptr_t)arena_miscelm_to_rpages(arena_run_to_miscelm(run)) +
-	    (uintptr_t)bin_info->reg0_offset);
+	    (uintptr_t)arena_miscelm_to_rpages(arena_run_to_miscelm(run)));
 	/* Freeing an unallocated pointer can cause assertion failure. */
 	assert(bitmap_get(run->bitmap, &bin_info->bitmap_info, regind));
 
@@ -2395,73 +2393,8 @@ void
 arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, bool zero)
 {
 
-	size_t redzone_size = bin_info->redzone_size;
-
-	if (zero) {
-		memset((void *)((uintptr_t)ptr - redzone_size),
-		    JEMALLOC_ALLOC_JUNK, redzone_size);
-		memset((void *)((uintptr_t)ptr + bin_info->reg_size),
-		    JEMALLOC_ALLOC_JUNK, redzone_size);
-	} else {
-		memset((void *)((uintptr_t)ptr - redzone_size),
-		    JEMALLOC_ALLOC_JUNK, bin_info->reg_interval);
-	}
-}
-
-#ifdef JEMALLOC_JET
-#undef arena_redzone_corruption
-#define	arena_redzone_corruption JEMALLOC_N(n_arena_redzone_corruption)
-#endif
-static void
-arena_redzone_corruption(void *ptr, size_t usize, bool after,
-    size_t offset, uint8_t byte)
-{
-
-	malloc_printf("<jemalloc>: Corrupt redzone %zu byte%s %s %p "
-	    "(size %zu), byte=%#x\n", offset, (offset == 1) ? "" : "s",
-	    after ? "after" : "before", ptr, usize, byte);
-}
-#ifdef JEMALLOC_JET
-#undef arena_redzone_corruption
-#define	arena_redzone_corruption JEMALLOC_N(arena_redzone_corruption)
-arena_redzone_corruption_t *arena_redzone_corruption =
-    JEMALLOC_N(n_arena_redzone_corruption);
-#endif
-
-static void
-arena_redzones_validate(void *ptr, arena_bin_info_t *bin_info, bool reset)
-{
-	bool error = false;
-
-	if (opt_junk_alloc) {
-		size_t size = bin_info->reg_size;
-		size_t redzone_size = bin_info->redzone_size;
-		size_t i;
-
-		for (i = 1; i <= redzone_size; i++) {
-			uint8_t *byte = (uint8_t *)((uintptr_t)ptr - i);
-			if (*byte != JEMALLOC_ALLOC_JUNK) {
-				error = true;
-				arena_redzone_corruption(ptr, size, false, i,
-				    *byte);
-				if (reset)
-					*byte = JEMALLOC_ALLOC_JUNK;
-			}
-		}
-		for (i = 0; i < redzone_size; i++) {
-			uint8_t *byte = (uint8_t *)((uintptr_t)ptr + size + i);
-			if (*byte != JEMALLOC_ALLOC_JUNK) {
-				error = true;
-				arena_redzone_corruption(ptr, size, true, i,
-				    *byte);
-				if (reset)
-					*byte = JEMALLOC_ALLOC_JUNK;
-			}
-		}
-	}
-
-	if (opt_abort && error)
-		abort();
+	if (!zero)
+		memset(ptr, JEMALLOC_ALLOC_JUNK, bin_info->reg_size);
 }
 
 #ifdef JEMALLOC_JET
@@ -2471,11 +2404,8 @@ arena_redzones_validate(void *ptr, arena_bin_info_t *bin_info, bool reset)
 void
 arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info)
 {
-	size_t redzone_size = bin_info->redzone_size;
 
-	arena_redzones_validate(ptr, bin_info, false);
-	memset((void *)((uintptr_t)ptr - redzone_size), JEMALLOC_FREE_JUNK,
-	    bin_info->reg_interval);
+	memset(ptr, JEMALLOC_FREE_JUNK, bin_info->reg_size);
 }
 #ifdef JEMALLOC_JET
 #undef arena_dalloc_junk_small
@@ -3559,43 +3489,16 @@ arena_new(tsdn_t *tsdn, unsigned ind)
  *   *) bin_info->run_size <= arena_maxrun
  *   *) bin_info->nregs <= RUN_MAXREGS
  *
- * bin_info->nregs and bin_info->reg0_offset are also calculated here, since
- * these settings are all interdependent.
+ * bin_info->nregs is also calculated here, since these settings are all
+ * interdependent.
  */
 static void
 bin_info_run_size_calc(arena_bin_info_t *bin_info)
 {
-	size_t pad_size;
 	size_t try_run_size, perfect_run_size, actual_run_size;
 	uint32_t try_nregs, perfect_nregs, actual_nregs;
 
-	/*
-	 * Determine redzone size based on minimum alignment and minimum
-	 * redzone size.  Add padding to the end of the run if it is needed to
-	 * align the regions.  The padding allows each redzone to be half the
-	 * minimum alignment; without the padding, each redzone would have to
-	 * be twice as large in order to maintain alignment.
-	 */
-	if (config_fill && unlikely(opt_redzone)) {
-		size_t align_min = ZU(1) << (ffs_zu(bin_info->reg_size) - 1);
-		if (align_min <= REDZONE_MINSIZE) {
-			bin_info->redzone_size = REDZONE_MINSIZE;
-			pad_size = 0;
-		} else {
-			bin_info->redzone_size = align_min >> 1;
-			pad_size = bin_info->redzone_size;
-		}
-	} else {
-		bin_info->redzone_size = 0;
-		pad_size = 0;
-	}
-	bin_info->reg_interval = bin_info->reg_size +
-	    (bin_info->redzone_size << 1);
-
-	/*
-	 * Compute run size under ideal conditions (no redzones, no limit on run
-	 * size).
-	 */
+	/* Compute smallest run size that is an integer multiple of reg_size. */
 	try_run_size = PAGE;
 	try_nregs = (uint32_t)(try_run_size / bin_info->reg_size);
 	do {
@@ -3605,48 +3508,18 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info)
 		try_run_size += PAGE;
 		try_nregs = (uint32_t)(try_run_size / bin_info->reg_size);
 	} while (perfect_run_size != perfect_nregs * bin_info->reg_size);
+	assert(perfect_run_size <= arena_maxrun);
 	assert(perfect_nregs <= RUN_MAXREGS);
 
 	actual_run_size = perfect_run_size;
-	actual_nregs = (uint32_t)((actual_run_size - pad_size) /
-	    bin_info->reg_interval);
-
-	/*
-	 * Redzones can require enough padding that not even a single region can
-	 * fit within the number of pages that would normally be dedicated to a
-	 * run for this size class.  Increase the run size until at least one
-	 * region fits.
-	 */
-	while (actual_nregs == 0) {
-		assert(config_fill && unlikely(opt_redzone));
-
-		actual_run_size += PAGE;
-		actual_nregs = (uint32_t)((actual_run_size - pad_size) /
-		    bin_info->reg_interval);
-	}
-
-	/*
-	 * Make sure that the run will fit within an arena chunk.
-	 */
-	while (actual_run_size > arena_maxrun) {
-		actual_run_size -= PAGE;
-		actual_nregs = (uint32_t)((actual_run_size - pad_size) /
-		    bin_info->reg_interval);
-	}
-	assert(actual_nregs > 0);
-	assert(actual_run_size == s2u(actual_run_size));
+	actual_nregs = (uint32_t)((actual_run_size) / bin_info->reg_size);
 
 	/* Copy final settings. */
 	bin_info->run_size = actual_run_size;
 	bin_info->nregs = actual_nregs;
-	bin_info->reg0_offset = (uint32_t)(actual_run_size - (actual_nregs *
-	    bin_info->reg_interval) - pad_size + bin_info->redzone_size);
 
 	if (actual_run_size > small_maxrun)
 		small_maxrun = actual_run_size;
-
-	assert(bin_info->reg0_offset - bin_info->redzone_size + (bin_info->nregs
-	    * bin_info->reg_interval) + pad_size == bin_info->run_size);
 }
 
 static void
diff --git a/src/ctl.c b/src/ctl.c
index f4c775db..908a2850 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -97,7 +97,6 @@ CTL_PROTO(opt_decay_time)
 CTL_PROTO(opt_stats_print)
 CTL_PROTO(opt_junk)
 CTL_PROTO(opt_zero)
-CTL_PROTO(opt_redzone)
 CTL_PROTO(opt_utrace)
 CTL_PROTO(opt_xmalloc)
 CTL_PROTO(opt_tcache)
@@ -272,7 +271,6 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("stats_print"),	CTL(opt_stats_print)},
 	{NAME("junk"),		CTL(opt_junk)},
 	{NAME("zero"),		CTL(opt_zero)},
-	{NAME("redzone"),	CTL(opt_redzone)},
 	{NAME("utrace"),	CTL(opt_utrace)},
 	{NAME("xmalloc"),	CTL(opt_xmalloc)},
 	{NAME("tcache"),	CTL(opt_tcache)},
@@ -1279,7 +1277,6 @@ CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t)
 CTL_RO_NL_GEN(opt_decay_time, opt_decay_time, ssize_t)
 CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool)
 CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, const char *)
-CTL_RO_NL_CGEN(config_fill, opt_redzone, opt_redzone, bool)
 CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool)
 CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool)
 CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 4dd77e68..5be5961a 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -35,7 +35,6 @@ bool	opt_junk_free =
 #endif
     ;
 
-bool	opt_redzone = false;
 bool	opt_utrace = false;
 bool	opt_xmalloc = false;
 bool	opt_zero = false;
@@ -1040,16 +1039,15 @@ malloc_conf_init(void)
 
 			CONF_HANDLE_BOOL(opt_abort, "abort", true)
 			/*
-			 * Chunks always require at least one header page,
-			 * as many as 2^(LG_SIZE_CLASS_GROUP+1) data pages, and
-			 * possibly an additional page in the presence of
-			 * redzones.  In order to simplify options processing,
-			 * use a conservative bound that accommodates all these
+			 * Chunks always require at least one header page and as
+			 * many as 2^(LG_SIZE_CLASS_GROUP+1) data pages.  In
+			 * order to simplify options processing, use a
+			 * conservative bound that accommodates all these
 			 * constraints.
 			 */
 			CONF_HANDLE_SIZE_T(opt_lg_chunk, "lg_chunk", LG_PAGE +
-			    LG_SIZE_CLASS_GROUP + (config_fill ? 2 : 1),
-			    (sizeof(size_t) << 3) - 1, true)
+			    LG_SIZE_CLASS_GROUP + 1, (sizeof(size_t) << 3) - 1,
+			    true)
 			if (strncmp("dss", k, klen) == 0) {
 				int i;
 				bool match = false;
@@ -1124,7 +1122,6 @@ malloc_conf_init(void)
 					}
 					continue;
 				}
-				CONF_HANDLE_BOOL(opt_redzone, "redzone", true)
 				CONF_HANDLE_BOOL(opt_zero, "zero", true)
 			}
 			if (config_utrace) {
diff --git a/src/stats.c b/src/stats.c
index 16e5b1a3..0e1442ed 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -513,7 +513,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			OPT_WRITE_SSIZE_T_MUTABLE(decay_time, arenas.decay_time)
 		OPT_WRITE_BOOL(stats_print)
 		OPT_WRITE_CHAR_P(junk)
-		OPT_WRITE_BOOL(redzone)
 		OPT_WRITE_BOOL(zero)
 		OPT_WRITE_BOOL(utrace)
 		OPT_WRITE_BOOL(xmalloc)
diff --git a/test/unit/junk.c b/test/unit/junk.c
index cb262ec1..f74e33f6 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -5,7 +5,7 @@
 #    define JEMALLOC_TEST_JUNK_OPT "junk:true"
 #  endif
 const char *malloc_conf =
-    "abort:false,zero:false,redzone:true," JEMALLOC_TEST_JUNK_OPT;
+    "abort:false,zero:false," JEMALLOC_TEST_JUNK_OPT;
 #endif
 
 static arena_dalloc_junk_small_t *arena_dalloc_junk_small_orig;
@@ -197,49 +197,6 @@ TEST_BEGIN(test_junk_large_ralloc_shrink)
 }
 TEST_END
 
-static bool detected_redzone_corruption;
-
-static void
-arena_redzone_corruption_replacement(void *ptr, size_t usize, bool after,
-    size_t offset, uint8_t byte)
-{
-
-	detected_redzone_corruption = true;
-}
-
-TEST_BEGIN(test_junk_redzone)
-{
-	char *s;
-	arena_redzone_corruption_t *arena_redzone_corruption_orig;
-
-	test_skip_if(!config_fill);
-	test_skip_if(!opt_junk_alloc || !opt_junk_free);
-
-	arena_redzone_corruption_orig = arena_redzone_corruption;
-	arena_redzone_corruption = arena_redzone_corruption_replacement;
-
-	/* Test underflow. */
-	detected_redzone_corruption = false;
-	s = (char *)mallocx(1, 0);
-	assert_ptr_not_null((void *)s, "Unexpected mallocx() failure");
-	s[-1] = 0xbb;
-	dallocx(s, 0);
-	assert_true(detected_redzone_corruption,
-	    "Did not detect redzone corruption");
-
-	/* Test overflow. */
-	detected_redzone_corruption = false;
-	s = (char *)mallocx(1, 0);
-	assert_ptr_not_null((void *)s, "Unexpected mallocx() failure");
-	s[sallocx(s, 0)] = 0xbb;
-	dallocx(s, 0);
-	assert_true(detected_redzone_corruption,
-	    "Did not detect redzone corruption");
-
-	arena_redzone_corruption = arena_redzone_corruption_orig;
-}
-TEST_END
-
 int
 main(void)
 {
@@ -248,6 +205,5 @@ main(void)
 	    test_junk_small,
 	    test_junk_large,
 	    test_junk_huge,
-	    test_junk_large_ralloc_shrink,
-	    test_junk_redzone));
+	    test_junk_large_ralloc_shrink));
 }
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 151e7ad0..79c5147c 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -168,7 +168,6 @@ TEST_BEGIN(test_mallctl_opt)
 	TEST_MALLCTL_OPT(ssize_t, decay_time, always);
 	TEST_MALLCTL_OPT(bool, stats_print, always);
 	TEST_MALLCTL_OPT(const char *, junk, fill);
-	TEST_MALLCTL_OPT(bool, redzone, fill);
 	TEST_MALLCTL_OPT(bool, zero, fill);
 	TEST_MALLCTL_OPT(bool, utrace, utrace);
 	TEST_MALLCTL_OPT(bool, xmalloc, xmalloc);
diff --git a/test/unit/zero.c b/test/unit/zero.c
index 80b8fe13..123f0e03 100644
--- a/test/unit/zero.c
+++ b/test/unit/zero.c
@@ -2,7 +2,7 @@
 
 #ifdef JEMALLOC_FILL
 const char *malloc_conf =
-    "abort:false,junk:false,zero:true,redzone:false";
+    "abort:false,junk:false,zero:true";
 #endif
 
 static void

From b683734b437209fb8a3a4520b04a649ab7aa56ea Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 6 Apr 2016 18:30:02 -0700
Subject: [PATCH 0252/2608] Implement BITMAP_INFO_INITIALIZER(nbits).

This allows static initialization of bitmap_info_t structures.
---
 include/jemalloc/internal/bitmap.h |  86 ++++--
 src/bitmap.c                       |   6 +-
 test/unit/bitmap.c                 | 407 +++++++++++++++++++++--------
 3 files changed, 367 insertions(+), 132 deletions(-)

diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h
index 36f38b59..0d456e2d 100644
--- a/include/jemalloc/internal/bitmap.h
+++ b/include/jemalloc/internal/bitmap.h
@@ -12,7 +12,7 @@ typedef unsigned long bitmap_t;
 
 /* Number of bits per group. */
 #define	LG_BITMAP_GROUP_NBITS		(LG_SIZEOF_BITMAP + 3)
-#define	BITMAP_GROUP_NBITS		(ZU(1) << LG_BITMAP_GROUP_NBITS)
+#define	BITMAP_GROUP_NBITS		(1U << LG_BITMAP_GROUP_NBITS)
 #define	BITMAP_GROUP_NBITS_MASK		(BITMAP_GROUP_NBITS-1)
 
 /*
@@ -21,12 +21,12 @@ typedef unsigned long bitmap_t;
  * use a tree instead.
  */
 #if LG_BITMAP_MAXBITS - LG_BITMAP_GROUP_NBITS > 3
-#  define USE_TREE
+#  define BITMAP_USE_TREE
 #endif
 
 /* Number of groups required to store a given number of bits. */
 #define	BITMAP_BITS2GROUPS(nbits)					\
-    ((nbits + BITMAP_GROUP_NBITS_MASK) >> LG_BITMAP_GROUP_NBITS)
+    (((nbits) + BITMAP_GROUP_NBITS_MASK) >> LG_BITMAP_GROUP_NBITS)
 
 /*
  * Number of groups required at a particular level for a given number of bits.
@@ -40,6 +40,9 @@ typedef unsigned long bitmap_t;
 #define	BITMAP_GROUPS_L3(nbits)						\
     BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(		\
 	BITMAP_BITS2GROUPS((nbits)))))
+#define	BITMAP_GROUPS_L4(nbits)						\
+    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(		\
+	BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits))))))
 
 /*
  * Assuming the number of levels, number of groups required for a given number
@@ -53,11 +56,13 @@ typedef unsigned long bitmap_t;
     (BITMAP_GROUPS_2_LEVEL(nbits) + BITMAP_GROUPS_L2(nbits))
 #define	BITMAP_GROUPS_4_LEVEL(nbits)					\
     (BITMAP_GROUPS_3_LEVEL(nbits) + BITMAP_GROUPS_L3(nbits))
+#define	BITMAP_GROUPS_5_LEVEL(nbits)					\
+    (BITMAP_GROUPS_4_LEVEL(nbits) + BITMAP_GROUPS_L4(nbits))
 
 /*
  * Maximum number of groups required to support LG_BITMAP_MAXBITS.
  */
-#ifdef USE_TREE
+#ifdef BITMAP_USE_TREE
 
 #if LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS
 #  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_1_LEVEL(BITMAP_MAXBITS)
@@ -67,20 +72,63 @@ typedef unsigned long bitmap_t;
 #  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_3_LEVEL(BITMAP_MAXBITS)
 #elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 4
 #  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_4_LEVEL(BITMAP_MAXBITS)
+#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 5
+#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_5_LEVEL(BITMAP_MAXBITS)
 #else
 #  error "Unsupported bitmap size"
 #endif
 
-/* Maximum number of levels possible. */
-#define	BITMAP_MAX_LEVELS						\
-    (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP)				\
-    + !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP)
+/*
+ * Maximum number of levels possible.  This could be statically computed based
+ * on LG_BITMAP_MAXBITS:
+ *
+ * #define BITMAP_MAX_LEVELS \
+ *     (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP) \
+ *     + !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP)
+ *
+ * However, that would not allow the generic BITMAP_INFO_INITIALIZER() macro, so
+ * instead hardcode BITMAP_MAX_LEVELS to the largest number supported by the
+ * various cascading macros.  The only additional cost this incurs is some
+ * unused trailing entries in bitmap_info_t structures; the bitmaps themselves
+ * are not impacted.
+ */
+#define	BITMAP_MAX_LEVELS	5
 
-#else /* USE_TREE */
+#define	BITMAP_INFO_INITIALIZER(nbits) {				\
+	/* nbits. */							\
+	nbits,								\
+	/* nlevels. */							\
+	(BITMAP_GROUPS_L0(nbits) > BITMAP_GROUPS_L1(nbits)) +		\
+	    (BITMAP_GROUPS_L1(nbits) > BITMAP_GROUPS_L2(nbits)) +	\
+	    (BITMAP_GROUPS_L2(nbits) > BITMAP_GROUPS_L3(nbits)) +	\
+	    (BITMAP_GROUPS_L3(nbits) > BITMAP_GROUPS_L4(nbits)) + 1,	\
+	/* levels. */							\
+	{								\
+		{0},							\
+		{BITMAP_GROUPS_L0(nbits)},				\
+		{BITMAP_GROUPS_L1(nbits) + BITMAP_GROUPS_L0(nbits)},	\
+		{BITMAP_GROUPS_L2(nbits) + BITMAP_GROUPS_L1(nbits) +	\
+		    BITMAP_GROUPS_L0(nbits)},				\
+		{BITMAP_GROUPS_L3(nbits) + BITMAP_GROUPS_L2(nbits) +	\
+		    BITMAP_GROUPS_L1(nbits) + BITMAP_GROUPS_L0(nbits)},	\
+		{BITMAP_GROUPS_L4(nbits) + BITMAP_GROUPS_L3(nbits) +	\
+		     BITMAP_GROUPS_L2(nbits) + BITMAP_GROUPS_L1(nbits)	\
+		     + BITMAP_GROUPS_L0(nbits)}				\
+	}								\
+}
 
-#define	BITMAP_GROUPS_MAX BITMAP_BITS2GROUPS(BITMAP_MAXBITS)
+#else /* BITMAP_USE_TREE */
 
-#endif /* USE_TREE */
+#define	BITMAP_GROUPS_MAX	BITMAP_BITS2GROUPS(BITMAP_MAXBITS)
+
+#define	BITMAP_INFO_INITIALIZER(nbits) {				\
+	/* nbits. */							\
+	nbits,								\
+	/* ngroups. */							\
+	BITMAP_BITS2GROUPS(nbits)					\
+}
+
+#endif /* BITMAP_USE_TREE */
 
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
@@ -95,7 +143,7 @@ struct bitmap_info_s {
 	/* Logical number of bits in bitmap (stored at bottom level). */
 	size_t nbits;
 
-#ifdef USE_TREE
+#ifdef BITMAP_USE_TREE
 	/* Number of levels necessary for nbits. */
 	unsigned nlevels;
 
@@ -104,10 +152,10 @@ struct bitmap_info_s {
 	 * bottom to top (e.g. the bottom level is stored in levels[0]).
 	 */
 	bitmap_level_t levels[BITMAP_MAX_LEVELS+1];
-#else /* USE_TREE */
+#else /* BITMAP_USE_TREE */
 	/* Number of groups necessary for nbits. */
 	size_t ngroups;
-#endif /* USE_TREE */
+#endif /* BITMAP_USE_TREE */
 };
 
 #endif /* JEMALLOC_H_STRUCTS */
@@ -134,7 +182,7 @@ void	bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
 JEMALLOC_INLINE bool
 bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo)
 {
-#ifdef USE_TREE
+#ifdef BITMAP_USE_TREE
 	size_t rgoff = binfo->levels[binfo->nlevels].group_offset - 1;
 	bitmap_t rg = bitmap[rgoff];
 	/* The bitmap is full iff the root group is 0. */
@@ -178,7 +226,7 @@ bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
 	g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
 	*gp = g;
 	assert(bitmap_get(bitmap, binfo, bit));
-#ifdef USE_TREE
+#ifdef BITMAP_USE_TREE
 	/* Propagate group state transitions up the tree. */
 	if (g == 0) {
 		unsigned i;
@@ -207,7 +255,7 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo)
 
 	assert(!bitmap_full(bitmap, binfo));
 
-#ifdef USE_TREE
+#ifdef BITMAP_USE_TREE
 	i = binfo->nlevels - 1;
 	g = bitmap[binfo->levels[i].group_offset];
 	bit = ffs_lu(g) - 1;
@@ -247,7 +295,7 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
 	g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
 	*gp = g;
 	assert(!bitmap_get(bitmap, binfo, bit));
-#ifdef USE_TREE
+#ifdef BITMAP_USE_TREE
 	/* Propagate group state transitions up the tree. */
 	if (propagate) {
 		unsigned i;
@@ -265,7 +313,7 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
 				break;
 		}
 	}
-#endif /* USE_TREE */
+#endif /* BITMAP_USE_TREE */
 }
 
 #endif
diff --git a/src/bitmap.c b/src/bitmap.c
index ac0f3b38..66554451 100644
--- a/src/bitmap.c
+++ b/src/bitmap.c
@@ -3,7 +3,7 @@
 
 /******************************************************************************/
 
-#ifdef USE_TREE
+#ifdef BITMAP_USE_TREE
 
 void
 bitmap_info_init(bitmap_info_t *binfo, size_t nbits)
@@ -69,7 +69,7 @@ bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo)
 	}
 }
 
-#else /* USE_TREE */
+#else /* BITMAP_USE_TREE */
 
 void
 bitmap_info_init(bitmap_info_t *binfo, size_t nbits)
@@ -101,7 +101,7 @@ bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo)
 		bitmap[binfo->ngroups - 1] >>= extra;
 }
 
-#endif /* USE_TREE */
+#endif /* BITMAP_USE_TREE */
 
 size_t
 bitmap_size(const bitmap_info_t *binfo)
diff --git a/test/unit/bitmap.c b/test/unit/bitmap.c
index a2dd5463..10d47c76 100644
--- a/test/unit/bitmap.c
+++ b/test/unit/bitmap.c
@@ -1,152 +1,338 @@
 #include "test/jemalloc_test.h"
 
+#define	NBITS_TAB \
+    NB( 1) \
+    NB( 2) \
+    NB( 3) \
+    NB( 4) \
+    NB( 5) \
+    NB( 6) \
+    NB( 7) \
+    NB( 8) \
+    NB( 9) \
+    NB(10) \
+    NB(11) \
+    NB(12) \
+    NB(13) \
+    NB(14) \
+    NB(15) \
+    NB(16) \
+    NB(17) \
+    NB(18) \
+    NB(19) \
+    NB(20) \
+    NB(21) \
+    NB(22) \
+    NB(23) \
+    NB(24) \
+    NB(25) \
+    NB(26) \
+    NB(27) \
+    NB(28) \
+    NB(29) \
+    NB(30) \
+    NB(31) \
+    NB(32) \
+    \
+    NB(33) \
+    NB(34) \
+    NB(35) \
+    NB(36) \
+    NB(37) \
+    NB(38) \
+    NB(39) \
+    NB(40) \
+    NB(41) \
+    NB(42) \
+    NB(43) \
+    NB(44) \
+    NB(45) \
+    NB(46) \
+    NB(47) \
+    NB(48) \
+    NB(49) \
+    NB(50) \
+    NB(51) \
+    NB(52) \
+    NB(53) \
+    NB(54) \
+    NB(55) \
+    NB(56) \
+    NB(57) \
+    NB(58) \
+    NB(59) \
+    NB(60) \
+    NB(61) \
+    NB(62) \
+    NB(63) \
+    NB(64) \
+    NB(65) \
+    \
+    NB(126) \
+    NB(127) \
+    NB(128) \
+    NB(129) \
+    NB(130) \
+    \
+    NB(254) \
+    NB(255) \
+    NB(256) \
+    NB(257) \
+    NB(258) \
+    \
+    NB(510) \
+    NB(511) \
+    NB(512) \
+    NB(513) \
+    NB(514) \
+    \
+    NB(1024) \
+    NB(2048) \
+    NB(4096) \
+    NB(8192) \
+    NB(16384) \
+
+static void
+test_bitmap_initializer_body(const bitmap_info_t *binfo, size_t nbits)
+{
+	bitmap_info_t binfo_dyn;
+	bitmap_info_init(&binfo_dyn, nbits);
+
+	assert_zu_eq(bitmap_size(binfo), bitmap_size(&binfo_dyn),
+	    "Unexpected difference between static and dynamic initialization, "
+	    "nbits=%zu", nbits);
+	assert_zu_eq(binfo->nbits, binfo_dyn.nbits,
+	    "Unexpected difference between static and dynamic initialization, "
+	    "nbits=%zu", nbits);
+#ifdef BITMAP_USE_TREE
+	assert_u_eq(binfo->nlevels, binfo_dyn.nlevels,
+	    "Unexpected difference between static and dynamic initialization, "
+	    "nbits=%zu", nbits);
+	{
+		unsigned i;
+
+		for (i = 0; i < binfo->nlevels; i++) {
+			assert_zu_eq(binfo->levels[i].group_offset,
+			    binfo_dyn.levels[i].group_offset,
+			    "Unexpected difference between static and dynamic "
+			    "initialization, nbits=%zu, level=%u", nbits, i);
+		}
+	}
+#else
+	assert_zu_eq(binfo->ngroups, binfo_dyn.ngroups,
+	    "Unexpected difference between static and dynamic initialization");
+#endif
+}
+
+TEST_BEGIN(test_bitmap_initializer)
+{
+
+#define	NB(nbits) {							\
+		if (nbits <= BITMAP_MAXBITS) {				\
+			bitmap_info_t binfo =				\
+			    BITMAP_INFO_INITIALIZER(nbits);		\
+			test_bitmap_initializer_body(&binfo, nbits);	\
+		}							\
+	}
+	NBITS_TAB
+#undef NB
+}
+TEST_END
+
+static size_t
+test_bitmap_size_body(const bitmap_info_t *binfo, size_t nbits,
+    size_t prev_size)
+{
+	size_t size = bitmap_size(binfo);
+	assert_zu_ge(size, (nbits >> 3),
+	    "Bitmap size is smaller than expected");
+	assert_zu_ge(size, prev_size, "Bitmap size is smaller than expected");
+	return (size);
+}
+
 TEST_BEGIN(test_bitmap_size)
 {
-	size_t i, prev_size;
+	size_t nbits, prev_size;
 
 	prev_size = 0;
-	for (i = 1; i <= BITMAP_MAXBITS; i++) {
+	for (nbits = 1; nbits <= BITMAP_MAXBITS; nbits++) {
 		bitmap_info_t binfo;
-		size_t size;
-
-		bitmap_info_init(&binfo, i);
-		size = bitmap_size(&binfo);
-		assert_true(size >= prev_size,
-		    "Bitmap size is smaller than expected");
-		prev_size = size;
+		bitmap_info_init(&binfo, nbits);
+		prev_size = test_bitmap_size_body(&binfo, nbits, prev_size);
 	}
+#define	NB(nbits) {							\
+		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);	\
+		prev_size = test_bitmap_size_body(&binfo, nbits,	\
+		    prev_size);						\
+	}
+	prev_size = 0;
+	NBITS_TAB
+#undef NB
 }
 TEST_END
 
+static void
+test_bitmap_init_body(const bitmap_info_t *binfo, size_t nbits)
+{
+	size_t i;
+	bitmap_t *bitmap = (bitmap_t *)malloc(bitmap_size(binfo));
+	assert_ptr_not_null(bitmap, "Unexpected malloc() failure");
+	bitmap_init(bitmap, binfo);
+
+	for (i = 0; i < nbits; i++) {
+		assert_false(bitmap_get(bitmap, binfo, i),
+		    "Bit should be unset");
+	}
+	free(bitmap);
+}
+
 TEST_BEGIN(test_bitmap_init)
 {
-	size_t i;
+	size_t nbits;
 
-	for (i = 1; i <= BITMAP_MAXBITS; i++) {
+	for (nbits = 1; nbits <= BITMAP_MAXBITS; nbits++) {
 		bitmap_info_t binfo;
-		bitmap_info_init(&binfo, i);
-		{
-			size_t j;
-			bitmap_t *bitmap = (bitmap_t *)malloc(
-			    bitmap_size(&binfo));
-			bitmap_init(bitmap, &binfo);
-
-			for (j = 0; j < i; j++) {
-				assert_false(bitmap_get(bitmap, &binfo, j),
-				    "Bit should be unset");
-			}
-			free(bitmap);
-		}
+		bitmap_info_init(&binfo, nbits);
+		test_bitmap_init_body(&binfo, nbits);
 	}
+#define	NB(nbits) {							\
+		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);	\
+		test_bitmap_init_body(&binfo, nbits);			\
+	}
+	NBITS_TAB
+#undef NB
 }
 TEST_END
 
+static void
+test_bitmap_set_body(const bitmap_info_t *binfo, size_t nbits)
+{
+	size_t i;
+	bitmap_t *bitmap = (bitmap_t *)malloc(bitmap_size(binfo));
+	assert_ptr_not_null(bitmap, "Unexpected malloc() failure");
+	bitmap_init(bitmap, binfo);
+
+	for (i = 0; i < nbits; i++)
+		bitmap_set(bitmap, binfo, i);
+	assert_true(bitmap_full(bitmap, binfo), "All bits should be set");
+	free(bitmap);
+}
+
 TEST_BEGIN(test_bitmap_set)
 {
-	size_t i;
+	size_t nbits;
 
-	for (i = 1; i <= BITMAP_MAXBITS; i++) {
+	for (nbits = 1; nbits <= BITMAP_MAXBITS; nbits++) {
 		bitmap_info_t binfo;
-		bitmap_info_init(&binfo, i);
-		{
-			size_t j;
-			bitmap_t *bitmap = (bitmap_t *)malloc(
-			    bitmap_size(&binfo));
-			bitmap_init(bitmap, &binfo);
-
-			for (j = 0; j < i; j++)
-				bitmap_set(bitmap, &binfo, j);
-			assert_true(bitmap_full(bitmap, &binfo),
-			    "All bits should be set");
-			free(bitmap);
-		}
+		bitmap_info_init(&binfo, nbits);
+		test_bitmap_set_body(&binfo, nbits);
 	}
+#define	NB(nbits) {							\
+		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);	\
+		test_bitmap_set_body(&binfo, nbits);			\
+	}
+	NBITS_TAB
+#undef NB
 }
 TEST_END
 
+static void
+test_bitmap_unset_body(const bitmap_info_t *binfo, size_t nbits)
+{
+	size_t i;
+	bitmap_t *bitmap = (bitmap_t *)malloc(bitmap_size(binfo));
+	assert_ptr_not_null(bitmap, "Unexpected malloc() failure");
+	bitmap_init(bitmap, binfo);
+
+	for (i = 0; i < nbits; i++)
+		bitmap_set(bitmap, binfo, i);
+	assert_true(bitmap_full(bitmap, binfo), "All bits should be set");
+	for (i = 0; i < nbits; i++)
+		bitmap_unset(bitmap, binfo, i);
+	for (i = 0; i < nbits; i++)
+		bitmap_set(bitmap, binfo, i);
+	assert_true(bitmap_full(bitmap, binfo), "All bits should be set");
+	free(bitmap);
+}
+
 TEST_BEGIN(test_bitmap_unset)
 {
-	size_t i;
+	size_t nbits;
 
-	for (i = 1; i <= BITMAP_MAXBITS; i++) {
+	for (nbits = 1; nbits <= BITMAP_MAXBITS; nbits++) {
 		bitmap_info_t binfo;
-		bitmap_info_init(&binfo, i);
-		{
-			size_t j;
-			bitmap_t *bitmap = (bitmap_t *)malloc(
-			    bitmap_size(&binfo));
-			bitmap_init(bitmap, &binfo);
-
-			for (j = 0; j < i; j++)
-				bitmap_set(bitmap, &binfo, j);
-			assert_true(bitmap_full(bitmap, &binfo),
-			    "All bits should be set");
-			for (j = 0; j < i; j++)
-				bitmap_unset(bitmap, &binfo, j);
-			for (j = 0; j < i; j++)
-				bitmap_set(bitmap, &binfo, j);
-			assert_true(bitmap_full(bitmap, &binfo),
-			    "All bits should be set");
-			free(bitmap);
-		}
+		bitmap_info_init(&binfo, nbits);
+		test_bitmap_unset_body(&binfo, nbits);
 	}
+#define	NB(nbits) {							\
+		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);	\
+		test_bitmap_unset_body(&binfo, nbits);			\
+	}
+	NBITS_TAB
+#undef NB
 }
 TEST_END
 
-TEST_BEGIN(test_bitmap_sfu)
+static void
+test_bitmap_sfu_body(const bitmap_info_t *binfo, size_t nbits)
 {
 	size_t i;
+	bitmap_t *bitmap = (bitmap_t *)malloc(bitmap_size(binfo));
+	assert_ptr_not_null(bitmap, "Unexpected malloc() failure");
+	bitmap_init(bitmap, binfo);
 
-	for (i = 1; i <= BITMAP_MAXBITS; i++) {
-		bitmap_info_t binfo;
-		bitmap_info_init(&binfo, i);
-		{
-			size_t j;
-			bitmap_t *bitmap = (bitmap_t *)malloc(
-			    bitmap_size(&binfo));
-			bitmap_init(bitmap, &binfo);
-
-			/* Iteratively set bits starting at the beginning. */
-			for (j = 0; j < i; j++) {
-				assert_zd_eq(bitmap_sfu(bitmap, &binfo), j,
-				    "First unset bit should be just after "
-				    "previous first unset bit");
-			}
-			assert_true(bitmap_full(bitmap, &binfo),
-			    "All bits should be set");
-
-			/*
-			 * Iteratively unset bits starting at the end, and
-			 * verify that bitmap_sfu() reaches the unset bits.
-			 */
-			for (j = i - 1; j < i; j--) { /* (i..0] */
-				bitmap_unset(bitmap, &binfo, j);
-				assert_zd_eq(bitmap_sfu(bitmap, &binfo), j,
-				    "First unset bit should the bit previously "
-				    "unset");
-				bitmap_unset(bitmap, &binfo, j);
-			}
-			assert_false(bitmap_get(bitmap, &binfo, 0),
-			    "Bit should be unset");
-
-			/*
-			 * Iteratively set bits starting at the beginning, and
-			 * verify that bitmap_sfu() looks past them.
-			 */
-			for (j = 1; j < i; j++) {
-				bitmap_set(bitmap, &binfo, j - 1);
-				assert_zd_eq(bitmap_sfu(bitmap, &binfo), j,
-				    "First unset bit should be just after the "
-				    "bit previously set");
-				bitmap_unset(bitmap, &binfo, j);
-			}
-			assert_zd_eq(bitmap_sfu(bitmap, &binfo), i - 1,
-			    "First unset bit should be the last bit");
-			assert_true(bitmap_full(bitmap, &binfo),
-			    "All bits should be set");
-			free(bitmap);
-		}
+	/* Iteratively set bits starting at the beginning. */
+	for (i = 0; i < nbits; i++) {
+		assert_zd_eq(bitmap_sfu(bitmap, binfo), i,
+		    "First unset bit should be just after previous first unset "
+		    "bit");
 	}
+	assert_true(bitmap_full(bitmap, binfo), "All bits should be set");
+
+	/*
+	 * Iteratively unset bits starting at the end, and verify that
+	 * bitmap_sfu() reaches the unset bits.
+	 */
+	for (i = nbits - 1; i < nbits; i--) { /* (nbits..0] */
+		bitmap_unset(bitmap, binfo, i);
+		assert_zd_eq(bitmap_sfu(bitmap, binfo), i,
+		    "First unset bit should the bit previously unset");
+		bitmap_unset(bitmap, binfo, i);
+	}
+	assert_false(bitmap_get(bitmap, binfo, 0), "Bit should be unset");
+
+	/*
+	 * Iteratively set bits starting at the beginning, and verify that
+	 * bitmap_sfu() looks past them.
+	 */
+	for (i = 1; i < nbits; i++) {
+		bitmap_set(bitmap, binfo, i - 1);
+		assert_zd_eq(bitmap_sfu(bitmap, binfo), i,
+		    "First unset bit should be just after the bit previously "
+		    "set");
+		bitmap_unset(bitmap, binfo, i);
+	}
+	assert_zd_eq(bitmap_sfu(bitmap, binfo), nbits - 1,
+	    "First unset bit should be the last bit");
+	assert_true(bitmap_full(bitmap, binfo), "All bits should be set");
+	free(bitmap);
+}
+
+TEST_BEGIN(test_bitmap_sfu)
+{
+	size_t nbits;
+
+	for (nbits = 1; nbits <= BITMAP_MAXBITS; nbits++) {
+		bitmap_info_t binfo;
+		bitmap_info_init(&binfo, nbits);
+		test_bitmap_sfu_body(&binfo, nbits);
+	}
+#define	NB(nbits) {							\
+		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);	\
+		test_bitmap_sfu_body(&binfo, nbits);			\
+	}
+	NBITS_TAB
+#undef NB
 }
 TEST_END
 
@@ -155,6 +341,7 @@ main(void)
 {
 
 	return (test(
+	    test_bitmap_initializer,
 	    test_bitmap_size,
 	    test_bitmap_init,
 	    test_bitmap_set,

From 627372b459479bf8908470ba25e832c4a9a420db Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 7 Apr 2016 08:04:12 -0400
Subject: [PATCH 0253/2608] Initialize arena_bin_info at compile time rather
 than at boot time.

This resolves #370.
---
 include/jemalloc/internal/arena.h         |  15 +--
 include/jemalloc/internal/size_classes.sh |  63 ++++++++++--
 src/arena.c                               | 112 +++++++---------------
 src/jemalloc.c                            |   4 +-
 test/unit/junk.c                          |   2 +-
 5 files changed, 100 insertions(+), 96 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 6e71b5f6..866b12fe 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -470,7 +470,7 @@ extern const char	*purge_mode_names[];
 extern ssize_t		opt_lg_dirty_mult;
 extern ssize_t		opt_decay_time;
 
-extern arena_bin_info_t	arena_bin_info[NBINS];
+extern const arena_bin_info_t	arena_bin_info[NBINS];
 
 extern size_t		map_bias; /* Number of arena chunk header pages. */
 extern size_t		map_misc_offset;
@@ -511,13 +511,13 @@ void	arena_maybe_purge(tsdn_t *tsdn, arena_t *arena);
 void	arena_reset(tsd_t *tsd, arena_t *arena);
 void	arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena,
     tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes);
-void	arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info,
+void	arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info,
     bool zero);
 #ifdef JEMALLOC_JET
-typedef void (arena_dalloc_junk_small_t)(void *, arena_bin_info_t *);
+typedef void (arena_dalloc_junk_small_t)(void *, const arena_bin_info_t *);
 extern arena_dalloc_junk_small_t *arena_dalloc_junk_small;
 #else
-void	arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info);
+void	arena_dalloc_junk_small(void *ptr, const arena_bin_info_t *bin_info);
 #endif
 void	*arena_malloc_large(tsdn_t *tsdn, arena_t *arena, szind_t ind,
     bool zero);
@@ -634,7 +634,7 @@ bool	arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes);
 bool	arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes);
 szind_t	arena_ptr_small_binind_get(const void *ptr, size_t mapbits);
 szind_t	arena_bin_index(arena_t *arena, arena_bin_t *bin);
-size_t	arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info,
+size_t	arena_run_regind(arena_run_t *run, const arena_bin_info_t *bin_info,
     const void *ptr);
 prof_tctx_t	*arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr);
 void	arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
@@ -1058,7 +1058,7 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits)
 		const arena_run_t *run;
 		arena_bin_t *bin;
 		szind_t run_binind, actual_binind;
-		arena_bin_info_t *bin_info;
+		const arena_bin_info_t *bin_info;
 		const arena_chunk_map_misc_t *miscelm;
 		const void *rpages;
 
@@ -1099,7 +1099,8 @@ arena_bin_index(arena_t *arena, arena_bin_t *bin)
 }
 
 JEMALLOC_INLINE size_t
-arena_run_regind(arena_run_t *run, arena_bin_info_t *bin_info, const void *ptr)
+arena_run_regind(arena_run_t *run, const arena_bin_info_t *bin_info,
+    const void *ptr)
 {
 	size_t diff, interval, shift, regind;
 	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh
index 2b0ca29a..c9b84718 100755
--- a/include/jemalloc/internal/size_classes.sh
+++ b/include/jemalloc/internal/size_classes.sh
@@ -40,6 +40,36 @@ lg() {
   done
 }
 
+run_size() {
+  lg_p=$1
+  lg_grp=$2
+  lg_delta=$3
+  ndelta=$4
+
+  pow2 ${lg_p}; p=${pow2_result}
+
+  pow2 ${lg_grp}; grp=${pow2_result}
+  pow2 ${lg_delta}; delta=${pow2_result}
+  reg_size=$((${grp} + ${delta}*${ndelta}))
+
+  # Compute smallest run size that is an integer multiple of reg_size.
+  try_run_size=${p}
+  try_nregs=$((${try_run_size} / ${reg_size}))
+  perfect=0
+  while [ ${perfect} -eq 0 ] ; do
+    perfect_run_size=${try_run_size}
+    perfect_nregs=${try_nregs}
+
+    try_run_size=$((${try_run_size} + ${p}))
+    try_nregs=$((${try_run_size} / ${reg_size}))
+    if [ ${perfect_run_size} -eq $((${perfect_nregs} * ${reg_size})) ] ; then
+      perfect=1
+    fi
+  done
+
+  run_size_pgs=$((${perfect_run_size} / ${p}))
+}
+
 size_class() {
   index=$1
   lg_grp=$2
@@ -65,8 +95,10 @@ size_class() {
 
   if [ ${lg_size} -lt $((${lg_p} + ${lg_g})) ] ; then
     bin="yes"
+    run_size ${lg_p} ${lg_grp} ${lg_delta} ${ndelta}; pgs=${run_size_pgs}
   else
     bin="no"
+    pgs=0
   fi
   if [ ${lg_size} -lt ${lg_kmax} \
       -o ${lg_size} -eq ${lg_kmax} -a ${rem} = "no" ] ; then
@@ -74,14 +106,15 @@ size_class() {
   else
     lg_delta_lookup="no"
   fi
-  printf '    SC(%3d, %6d, %8d, %6d, %3s, %2s) \\\n' ${index} ${lg_grp} ${lg_delta} ${ndelta} ${bin} ${lg_delta_lookup}
+  printf '    SC(%3d, %6d, %8d, %6d, %3s, %3d, %2s) \\\n' ${index} ${lg_grp} ${lg_delta} ${ndelta} ${bin} ${pgs} ${lg_delta_lookup}
   # Defined upon return:
-  # - lg_delta_lookup (${lg_delta} or "no")
   # - bin ("yes" or "no")
+  # - pgs
+  # - lg_delta_lookup (${lg_delta} or "no")
 }
 
 sep_line() {
-  echo "                                               \\"
+  echo "                                                    \\"
 }
 
 size_classes() {
@@ -95,12 +128,13 @@ size_classes() {
   pow2 ${lg_g}; g=${pow2_result}
 
   echo "#define	SIZE_CLASSES \\"
-  echo "  /* index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup */ \\"
+  echo "  /* index, lg_grp, lg_delta, ndelta, bin, pgs, lg_delta_lookup */ \\"
 
   ntbins=0
   nlbins=0
   lg_tiny_maxclass='"NA"'
   nbins=0
+  slab_maxpgs=0
 
   # Tiny size classes.
   ndelta=0
@@ -114,6 +148,9 @@ size_classes() {
     fi
     if [ ${bin} != "no" ] ; then
       nbins=$((${index} + 1))
+      if [ ${pgs} -gt ${slab_maxpgs} ] ; then
+        slab_maxpgs=${pgs}
+      fi
     fi
     ntbins=$((${ntbins} + 1))
     lg_tiny_maxclass=${lg_grp} # Final written value is correct.
@@ -133,11 +170,17 @@ size_classes() {
     index=$((${index} + 1))
     lg_grp=$((${lg_grp} + 1))
     lg_delta=$((${lg_delta} + 1))
+    if [ ${pgs} -gt ${slab_maxpgs} ] ; then
+      slab_maxpgs=${pgs}
+    fi
   fi
   while [ ${ndelta} -lt ${g} ] ; do
     size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
     index=$((${index} + 1))
     ndelta=$((${ndelta} + 1))
+    if [ ${pgs} -gt ${slab_maxpgs} ] ; then
+      slab_maxpgs=${pgs}
+    fi
   done
 
   # All remaining groups.
@@ -161,6 +204,9 @@ size_classes() {
         nbins=$((${index} + 1))
         # Final written value is correct:
         small_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))"
+        if [ ${pgs} -gt ${slab_maxpgs} ] ; then
+          slab_maxpgs=${pgs}
+        fi
         if [ ${lg_g} -gt 0 ] ; then
           lg_large_minclass=$((${lg_grp} + 1))
         else
@@ -186,6 +232,7 @@ size_classes() {
   # - lg_tiny_maxclass
   # - lookup_maxclass
   # - small_maxclass
+  # - slab_maxpgs
   # - lg_large_minclass
   # - huge_maxclass
 }
@@ -200,14 +247,14 @@ cat <<EOF
  * be defined prior to inclusion, and it in turn defines:
  *
  *   LG_SIZE_CLASS_GROUP: Lg of size class count for each size doubling.
- *   SIZE_CLASSES: Complete table of
- *                 SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup)
- *                 tuples.
+ *   SIZE_CLASSES: Complete table of SC(index, lg_grp, lg_delta, ndelta, bin,
+ *                 pgs, lg_delta_lookup) tuples.
  *     index: Size class index.
  *     lg_grp: Lg group base size (no deltas added).
  *     lg_delta: Lg delta to previous size class.
  *     ndelta: Delta multiplier.  size == 1<<lg_grp + ndelta<<lg_delta
  *     bin: 'yes' if a small bin size class, 'no' otherwise.
+ *     pgs: Run page count if a small bin size class, 0 otherwise.
  *     lg_delta_lookup: Same as lg_delta if a lookup table size class, 'no'
  *                      otherwise.
  *   NTBINS: Number of tiny bins.
@@ -217,6 +264,7 @@ cat <<EOF
  *   LG_TINY_MAXCLASS: Lg of maximum tiny size class.
  *   LOOKUP_MAXCLASS: Maximum size class included in lookup table.
  *   SMALL_MAXCLASS: Maximum small size class.
+ *   SLAB_MAXPGS: Maximum pages in small size class run.
  *   LG_LARGE_MINCLASS: Lg of minimum large size class.
  *   HUGE_MAXCLASS: Maximum (huge) size class.
  */
@@ -241,6 +289,7 @@ for lg_z in ${lg_zarr} ; do
         echo "#define	LG_TINY_MAXCLASS	${lg_tiny_maxclass}"
         echo "#define	LOOKUP_MAXCLASS		${lookup_maxclass}"
         echo "#define	SMALL_MAXCLASS		${small_maxclass}"
+        echo "#define	SLAB_MAXPGS		${slab_maxpgs}"
         echo "#define	LG_LARGE_MINCLASS	${lg_large_minclass}"
         echo "#define	HUGE_MAXCLASS		${huge_maxclass}"
         echo "#endif"
diff --git a/src/arena.c b/src/arena.c
index 9b458abf..7b9f313c 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -15,14 +15,25 @@ static ssize_t	lg_dirty_mult_default;
 ssize_t		opt_decay_time = DECAY_TIME_DEFAULT;
 static ssize_t	decay_time_default;
 
-arena_bin_info_t	arena_bin_info[NBINS];
+const arena_bin_info_t	arena_bin_info[NBINS] = {
+#define	BIN_INFO_bin_yes(reg_size, run_size, nregs)			\
+	{reg_size, run_size, nregs, BITMAP_INFO_INITIALIZER(nregs)},
+#define	BIN_INFO_bin_no(reg_size, run_size, nregs)
+#define	SC(index, lg_grp, lg_delta, ndelta, bin, pgs, lg_delta_lookup)	\
+	BIN_INFO_bin_##bin((1U<<lg_grp) + (ndelta<<lg_delta),		\
+	    (pgs << LG_PAGE), (pgs << LG_PAGE) / ((1U<<lg_grp) +	\
+	    (ndelta<<lg_delta)))
+	SIZE_CLASSES
+#undef BIN_INFO_bin_yes
+#undef BIN_INFO_bin_no
+#undef SC
+};
 
 size_t		map_bias;
 size_t		map_misc_offset;
 size_t		arena_maxrun; /* Max run size for arenas. */
 size_t		large_maxclass; /* Max large size class. */
 size_t		run_quantize_max; /* Max run_quantize_*() input. */
-static size_t	small_maxrun; /* Max run size for small size classes. */
 static bool	*small_run_tab; /* Valid small run page multiples. */
 static size_t	*run_quantize_floor_tab; /* run_quantize_floor() memoization. */
 static size_t	*run_quantize_ceil_tab; /* run_quantize_ceil() memoization. */
@@ -86,7 +97,8 @@ run_quantize_floor_compute(size_t size)
 	assert(size == PAGE_CEILING(size));
 
 	/* Don't change sizes that are valid small run sizes. */
-	if (size <= small_maxrun && small_run_tab[size >> LG_PAGE])
+	if (size <= (ZU(SLAB_MAXPGS) << LG_PAGE) && small_run_tab[size >>
+	    LG_PAGE])
 		return (size);
 
 	/*
@@ -121,12 +133,12 @@ run_quantize_ceil_compute_hard(size_t size)
 		    large_pad) + 1) + large_pad);
 	} else
 		large_run_size_next = SIZE_T_MAX;
-	if (size >= small_maxrun)
+	if ((size >> LG_PAGE) >= ZU(SLAB_MAXPGS))
 		return (large_run_size_next);
 
 	while (true) {
 		size += PAGE;
-		assert(size <= small_maxrun);
+		assert(size <= (ZU(SLAB_MAXPGS) << LG_PAGE));
 		if (small_run_tab[size >> LG_PAGE]) {
 			if (large_run_size_next < size)
 				return (large_run_size_next);
@@ -301,7 +313,7 @@ arena_chunk_cache_maybe_remove(arena_t *arena, extent_node_t *node, bool dirty)
 }
 
 JEMALLOC_INLINE_C void *
-arena_run_reg_alloc(arena_run_t *run, arena_bin_info_t *bin_info)
+arena_run_reg_alloc(arena_run_t *run, const arena_bin_info_t *bin_info)
 {
 	void *ret;
 	size_t regind;
@@ -327,7 +339,7 @@ arena_run_reg_dalloc(arena_run_t *run, void *ptr)
 	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 	size_t mapbits = arena_mapbits_get(chunk, pageind);
 	szind_t binind = arena_ptr_small_binind_get(ptr, mapbits);
-	arena_bin_info_t *bin_info = &arena_bin_info[binind];
+	const arena_bin_info_t *bin_info = &arena_bin_info[binind];
 	size_t regind = arena_run_regind(run, bin_info, ptr);
 
 	assert(run->nfree < bin_info->nregs);
@@ -1822,7 +1834,7 @@ arena_achunk_prof_reset(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk)
 				/* Skip small run. */
 				size_t binind = arena_mapbits_binind_get(chunk,
 				    pageind);
-				arena_bin_info_t *bin_info =
+				const arena_bin_info_t *bin_info =
 				    &arena_bin_info[binind];
 				npages = bin_info->run_size >> LG_PAGE;
 			}
@@ -2045,7 +2057,7 @@ arena_run_size_get(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 		assert(size == PAGE || arena_mapbits_large_size_get(chunk,
 		    run_ind+(size>>LG_PAGE)-1) == 0);
 	} else {
-		arena_bin_info_t *bin_info = &arena_bin_info[run->binind];
+		const arena_bin_info_t *bin_info = &arena_bin_info[run->binind];
 		size = bin_info->run_size;
 	}
 
@@ -2241,7 +2253,7 @@ arena_bin_nonfull_run_get(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin)
 {
 	arena_run_t *run;
 	szind_t binind;
-	arena_bin_info_t *bin_info;
+	const arena_bin_info_t *bin_info;
 
 	/* Look for a usable run. */
 	run = arena_bin_nonfull_run_tryget(bin);
@@ -2291,7 +2303,7 @@ static void *
 arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin)
 {
 	szind_t binind;
-	arena_bin_info_t *bin_info;
+	const arena_bin_info_t *bin_info;
 	arena_run_t *run;
 
 	binind = arena_bin_index(arena, bin);
@@ -2390,7 +2402,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_bin_t *tbin,
 }
 
 void
-arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, bool zero)
+arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info, bool zero)
 {
 
 	if (!zero)
@@ -2402,7 +2414,7 @@ arena_alloc_junk_small(void *ptr, arena_bin_info_t *bin_info, bool zero)
 #define	arena_dalloc_junk_small JEMALLOC_N(n_arena_dalloc_junk_small)
 #endif
 void
-arena_dalloc_junk_small(void *ptr, arena_bin_info_t *bin_info)
+arena_dalloc_junk_small(void *ptr, const arena_bin_info_t *bin_info)
 {
 
 	memset(ptr, JEMALLOC_FREE_JUNK, bin_info->reg_size);
@@ -2706,7 +2718,7 @@ arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run,
 	else {
 		szind_t binind = arena_bin_index(extent_node_arena_get(
 		    &chunk->node), bin);
-		arena_bin_info_t *bin_info = &arena_bin_info[binind];
+		const arena_bin_info_t *bin_info = &arena_bin_info[binind];
 
 		/*
 		 * The following block's conditional is necessary because if the
@@ -2768,7 +2780,7 @@ arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
 	size_t pageind, rpages_ind;
 	arena_run_t *run;
 	arena_bin_t *bin;
-	arena_bin_info_t *bin_info;
+	const arena_bin_info_t *bin_info;
 	szind_t binind;
 
 	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
@@ -3483,81 +3495,24 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 	return (arena);
 }
 
-/*
- * Calculate bin_info->run_size such that it meets the following constraints:
- *
- *   *) bin_info->run_size <= arena_maxrun
- *   *) bin_info->nregs <= RUN_MAXREGS
- *
- * bin_info->nregs is also calculated here, since these settings are all
- * interdependent.
- */
-static void
-bin_info_run_size_calc(arena_bin_info_t *bin_info)
-{
-	size_t try_run_size, perfect_run_size, actual_run_size;
-	uint32_t try_nregs, perfect_nregs, actual_nregs;
-
-	/* Compute smallest run size that is an integer multiple of reg_size. */
-	try_run_size = PAGE;
-	try_nregs = (uint32_t)(try_run_size / bin_info->reg_size);
-	do {
-		perfect_run_size = try_run_size;
-		perfect_nregs = try_nregs;
-
-		try_run_size += PAGE;
-		try_nregs = (uint32_t)(try_run_size / bin_info->reg_size);
-	} while (perfect_run_size != perfect_nregs * bin_info->reg_size);
-	assert(perfect_run_size <= arena_maxrun);
-	assert(perfect_nregs <= RUN_MAXREGS);
-
-	actual_run_size = perfect_run_size;
-	actual_nregs = (uint32_t)((actual_run_size) / bin_info->reg_size);
-
-	/* Copy final settings. */
-	bin_info->run_size = actual_run_size;
-	bin_info->nregs = actual_nregs;
-
-	if (actual_run_size > small_maxrun)
-		small_maxrun = actual_run_size;
-}
-
-static void
-bin_info_init(void)
-{
-	arena_bin_info_t *bin_info;
-
-#define	BIN_INFO_INIT_bin_yes(index, size)				\
-	bin_info = &arena_bin_info[index];				\
-	bin_info->reg_size = size;					\
-	bin_info_run_size_calc(bin_info);				\
-	bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
-#define	BIN_INFO_INIT_bin_no(index, size)
-#define	SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup)	\
-	BIN_INFO_INIT_bin_##bin(index, (ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta))
-	SIZE_CLASSES
-#undef BIN_INFO_INIT_bin_yes
-#undef BIN_INFO_INIT_bin_no
-#undef SC
-}
-
 static bool
 small_run_size_init(void)
 {
 
-	assert(small_maxrun != 0);
+	assert(SLAB_MAXPGS != 0);
 
-	small_run_tab = (bool *)base_alloc(NULL, sizeof(bool) * (small_maxrun >>
-	    LG_PAGE));
+	small_run_tab = (bool *)base_alloc(NULL, sizeof(bool) * SLAB_MAXPGS);
 	if (small_run_tab == NULL)
 		return (true);
 
 #define	TAB_INIT_bin_yes(index, size) {					\
-		arena_bin_info_t *bin_info = &arena_bin_info[index];	\
+		const arena_bin_info_t *bin_info =			\
+		    &arena_bin_info[index];				\
 		small_run_tab[bin_info->run_size >> LG_PAGE] = true;	\
 	}
 #define	TAB_INIT_bin_no(index, size)
-#define	SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup)	\
+#define	SC(index, lg_grp, lg_delta, ndelta, bin, run_size,		\
+    lg_delta_lookup)							\
 	TAB_INIT_bin_##bin(index, (ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta))
 	SIZE_CLASSES
 #undef TAB_INIT_bin_yes
@@ -3643,7 +3598,6 @@ arena_boot(void)
 	nlclasses = size2index(large_maxclass) - size2index(SMALL_MAXCLASS);
 	nhclasses = NSIZES - nlclasses - NBINS;
 
-	bin_info_init();
 	if (small_run_size_init())
 		return (true);
 	if (run_quantize_init())
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 5be5961a..4eec09b8 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -81,7 +81,7 @@ static uint8_t	malloc_slow_flags;
 /* Last entry for overflow detection only.  */
 JEMALLOC_ALIGNED(CACHELINE)
 const size_t	index2size_tab[NSIZES+1] = {
-#define	SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \
+#define	SC(index, lg_grp, lg_delta, ndelta, bin, pgs, lg_delta_lookup) \
 	((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta)),
 	SIZE_CLASSES
 #undef SC
@@ -154,7 +154,7 @@ const uint8_t	size2index_tab[] = {
 #define	S2B_11(i)	S2B_10(i) S2B_10(i)
 #endif
 #define	S2B_no(i)
-#define	SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \
+#define	SC(index, lg_grp, lg_delta, ndelta, bin, pgs, lg_delta_lookup) \
 	S2B_##lg_delta_lookup(index)
 	SIZE_CLASSES
 #undef S2B_3
diff --git a/test/unit/junk.c b/test/unit/junk.c
index f74e33f6..82eddf4c 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -23,7 +23,7 @@ watch_junking(void *p)
 }
 
 static void
-arena_dalloc_junk_small_intercept(void *ptr, arena_bin_info_t *bin_info)
+arena_dalloc_junk_small_intercept(void *ptr, const arena_bin_info_t *bin_info)
 {
 	size_t i;
 

From 226c446979ba88b5d5f05e6442c024d95c5656b0 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 17 Apr 2016 16:16:11 -0700
Subject: [PATCH 0254/2608] Implement pz2ind(), pind2sz(), and psz2u().

These compute size classes and indices similarly to size2index(),
index2size() and s2u(), respectively, but using the subset of size
classes that are multiples of the page size.  Note that pszind_t and
szind_t are not interchangeable.
---
 .../jemalloc/internal/jemalloc_internal.h.in  | 80 ++++++++++++++--
 include/jemalloc/internal/private_symbols.txt |  3 +
 include/jemalloc/internal/size_classes.sh     | 43 ++++++++-
 src/arena.c                                   |  5 +-
 src/jemalloc.c                                |  4 +-
 test/unit/size_classes.c                      | 92 +++++++++++++++++--
 6 files changed, 202 insertions(+), 25 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index a8c476d9..224ceddc 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -178,6 +178,9 @@ static const bool config_cache_oblivious =
 
 #include "jemalloc/internal/jemalloc_internal_macros.h"
 
+/* Page size index type. */
+typedef unsigned pszind_t;
+
 /* Size class index type. */
 typedef unsigned szind_t;
 
@@ -525,6 +528,9 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/huge.h"
 
 #ifndef JEMALLOC_ENABLE_INLINE
+pszind_t	psz2ind(size_t psz);
+size_t	pind2sz(pszind_t pind);
+size_t	psz2u(size_t psz);
 szind_t	size2index_compute(size_t size);
 szind_t	size2index_lookup(size_t size);
 szind_t	size2index(size_t size);
@@ -545,10 +551,74 @@ ticker_t	*decay_ticker_get(tsd_t *tsd, unsigned ind);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
+JEMALLOC_INLINE pszind_t
+psz2ind(size_t psz)
+{
+
+	if (unlikely(psz > HUGE_MAXCLASS))
+		return (NPSIZES);
+	{
+		pszind_t x = lg_floor((psz<<1)-1);
+		pszind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_PAGE) ? 0 : x -
+		    (LG_SIZE_CLASS_GROUP + LG_PAGE);
+		pszind_t grp = shift << LG_SIZE_CLASS_GROUP;
+
+		pszind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ?
+		    LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1;
+
+		size_t delta_inverse_mask = ZI(-1) << lg_delta;
+		pszind_t mod = ((((psz-1) & delta_inverse_mask) >> lg_delta)) &
+		    ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
+
+		pszind_t ind = grp + mod;
+		return (ind);
+	}
+}
+
+JEMALLOC_INLINE size_t
+pind2sz(pszind_t pind)
+{
+
+	{
+		size_t grp = pind >> LG_SIZE_CLASS_GROUP;
+		size_t mod = pind & ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
+
+		size_t grp_size_mask = ~((!!grp)-1);
+		size_t grp_size = ((ZU(1) << (LG_PAGE +
+		    (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask;
+
+		size_t shift = (grp == 0) ? 1 : grp;
+		size_t lg_delta = shift + (LG_PAGE-1);
+		size_t mod_size = (mod+1) << lg_delta;
+
+		size_t sz = grp_size + mod_size;
+		return (sz);
+	}
+}
+
+JEMALLOC_INLINE size_t
+psz2u(size_t psz)
+{
+
+	if (unlikely(psz > HUGE_MAXCLASS))
+		return (0);
+	{
+		size_t x = lg_floor((psz<<1)-1);
+		size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ?
+		    LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1;
+		size_t delta = ZU(1) << lg_delta;
+		size_t delta_mask = delta - 1;
+		size_t usize = (psz + delta_mask) & ~delta_mask;
+		return (usize);
+	}
+}
+
 JEMALLOC_INLINE szind_t
 size2index_compute(size_t size)
 {
 
+	if (unlikely(size > HUGE_MAXCLASS))
+		return (NSIZES);
 #if (NTBINS != 0)
 	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
 		szind_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
@@ -557,9 +627,7 @@ size2index_compute(size_t size)
 	}
 #endif
 	{
-		szind_t x = unlikely(ZI(size) < 0) ? ((size<<1) ?
-		    (ZU(1)<<(LG_SIZEOF_PTR+3)) : ((ZU(1)<<(LG_SIZEOF_PTR+3))-1))
-		    : lg_floor((size<<1)-1);
+		szind_t x = lg_floor((size<<1)-1);
 		szind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 :
 		    x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM);
 		szind_t grp = shift << LG_SIZE_CLASS_GROUP;
@@ -645,6 +713,8 @@ JEMALLOC_ALWAYS_INLINE size_t
 s2u_compute(size_t size)
 {
 
+	if (unlikely(size > HUGE_MAXCLASS))
+		return (0);
 #if (NTBINS > 0)
 	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
 		size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
@@ -654,9 +724,7 @@ s2u_compute(size_t size)
 	}
 #endif
 	{
-		size_t x = unlikely(ZI(size) < 0) ? ((size<<1) ?
-		    (ZU(1)<<(LG_SIZEOF_PTR+3)) : ((ZU(1)<<(LG_SIZEOF_PTR+3))-1))
-		    : lg_floor((size<<1)-1);
+		size_t x = lg_floor((size<<1)-1);
 		size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
 		    ?  LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
 		size_t delta = ZU(1) << lg_delta;
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 89933426..cbafc2b1 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -393,6 +393,7 @@ pages_map
 pages_purge
 pages_trim
 pages_unmap
+pind2sz
 pow2_ceil_u32
 pow2_ceil_u64
 pow2_ceil_zu
@@ -446,6 +447,8 @@ prof_thread_active_init_set
 prof_thread_active_set
 prof_thread_name_get
 prof_thread_name_set
+psz2ind
+psz2u
 purge_mode_names
 register_zone
 rtree_child_read
diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh
index c9b84718..ecee1a0a 100755
--- a/include/jemalloc/internal/size_classes.sh
+++ b/include/jemalloc/internal/size_classes.sh
@@ -78,6 +78,21 @@ size_class() {
   lg_p=$5
   lg_kmax=$6
 
+  if [ ${lg_delta} -ge ${lg_p} ] ; then
+    psz="yes"
+  else
+    pow2 ${lg_p}; p=${pow2_result}
+    pow2 ${lg_grp}; grp=${pow2_result}
+    pow2 ${lg_delta}; delta=${pow2_result}
+    sz=$((${grp} + ${delta} * ${ndelta}))
+    npgs=$((${sz} / ${p}))
+    if [ ${sz} -eq $((${npgs} * ${p})) ] ; then
+      psz="yes"
+    else
+      psz="no"
+    fi
+  fi
+
   lg ${ndelta}; lg_ndelta=${lg_result}; pow2 ${lg_ndelta}
   if [ ${pow2_result} -lt ${ndelta} ] ; then
     rem="yes"
@@ -106,15 +121,16 @@ size_class() {
   else
     lg_delta_lookup="no"
   fi
-  printf '    SC(%3d, %6d, %8d, %6d, %3s, %3d, %2s) \\\n' ${index} ${lg_grp} ${lg_delta} ${ndelta} ${bin} ${pgs} ${lg_delta_lookup}
+  printf '    SC(%3d, %6d, %8d, %6d, %3s, %3s, %3d, %2s) \\\n' ${index} ${lg_grp} ${lg_delta} ${ndelta} ${psz} ${bin} ${pgs} ${lg_delta_lookup}
   # Defined upon return:
+  # - psz ("yes" or "no")
   # - bin ("yes" or "no")
   # - pgs
   # - lg_delta_lookup (${lg_delta} or "no")
 }
 
 sep_line() {
-  echo "                                                    \\"
+  echo "                                                         \\"
 }
 
 size_classes() {
@@ -128,12 +144,13 @@ size_classes() {
   pow2 ${lg_g}; g=${pow2_result}
 
   echo "#define	SIZE_CLASSES \\"
-  echo "  /* index, lg_grp, lg_delta, ndelta, bin, pgs, lg_delta_lookup */ \\"
+  echo "  /* index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup */ \\"
 
   ntbins=0
   nlbins=0
   lg_tiny_maxclass='"NA"'
   nbins=0
+  npsizes=0
   slab_maxpgs=0
 
   # Tiny size classes.
@@ -146,6 +163,9 @@ size_classes() {
     if [ ${lg_delta_lookup} != "no" ] ; then
       nlbins=$((${index} + 1))
     fi
+    if [ ${psz} = "yes" ] ; then
+      npsizes=$((${npsizes} + 1))
+    fi
     if [ ${bin} != "no" ] ; then
       nbins=$((${index} + 1))
       if [ ${pgs} -gt ${slab_maxpgs} ] ; then
@@ -170,6 +190,9 @@ size_classes() {
     index=$((${index} + 1))
     lg_grp=$((${lg_grp} + 1))
     lg_delta=$((${lg_delta} + 1))
+    if [ ${psz} = "yes" ] ; then
+      npsizes=$((${npsizes} + 1))
+    fi
     if [ ${pgs} -gt ${slab_maxpgs} ] ; then
       slab_maxpgs=${pgs}
     fi
@@ -178,6 +201,9 @@ size_classes() {
     size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
     index=$((${index} + 1))
     ndelta=$((${ndelta} + 1))
+    if [ ${psz} = "yes" ] ; then
+      npsizes=$((${npsizes} + 1))
+    fi
     if [ ${pgs} -gt ${slab_maxpgs} ] ; then
       slab_maxpgs=${pgs}
     fi
@@ -200,6 +226,9 @@ size_classes() {
         # Final written value is correct:
         lookup_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))"
       fi
+      if [ ${psz} = "yes" ] ; then
+        npsizes=$((${npsizes} + 1))
+      fi
       if [ ${bin} != "no" ] ; then
         nbins=$((${index} + 1))
         # Final written value is correct:
@@ -229,6 +258,7 @@ size_classes() {
   # - nlbins
   # - nbins
   # - nsizes
+  # - npsizes
   # - lg_tiny_maxclass
   # - lookup_maxclass
   # - small_maxclass
@@ -247,12 +277,13 @@ cat <<EOF
  * be defined prior to inclusion, and it in turn defines:
  *
  *   LG_SIZE_CLASS_GROUP: Lg of size class count for each size doubling.
- *   SIZE_CLASSES: Complete table of SC(index, lg_grp, lg_delta, ndelta, bin,
- *                 pgs, lg_delta_lookup) tuples.
+ *   SIZE_CLASSES: Complete table of SC(index, lg_grp, lg_delta, ndelta, psz,
+ *                 bin, pgs, lg_delta_lookup) tuples.
  *     index: Size class index.
  *     lg_grp: Lg group base size (no deltas added).
  *     lg_delta: Lg delta to previous size class.
  *     ndelta: Delta multiplier.  size == 1<<lg_grp + ndelta<<lg_delta
+ *     psz: 'yes' if a multiple of the page size, 'no' otherwise.
  *     bin: 'yes' if a small bin size class, 'no' otherwise.
  *     pgs: Run page count if a small bin size class, 0 otherwise.
  *     lg_delta_lookup: Same as lg_delta if a lookup table size class, 'no'
@@ -261,6 +292,7 @@ cat <<EOF
  *   NLBINS: Number of bins supported by the lookup table.
  *   NBINS: Number of small size class bins.
  *   NSIZES: Number of size classes.
+ *   NPSIZES: Number of size classes that are a multiple of (1U << LG_PAGE).
  *   LG_TINY_MAXCLASS: Lg of maximum tiny size class.
  *   LOOKUP_MAXCLASS: Maximum size class included in lookup table.
  *   SMALL_MAXCLASS: Maximum small size class.
@@ -286,6 +318,7 @@ for lg_z in ${lg_zarr} ; do
         echo "#define	NLBINS			${nlbins}"
         echo "#define	NBINS			${nbins}"
         echo "#define	NSIZES			${nsizes}"
+        echo "#define	NPSIZES			${npsizes}"
         echo "#define	LG_TINY_MAXCLASS	${lg_tiny_maxclass}"
         echo "#define	LOOKUP_MAXCLASS		${lookup_maxclass}"
         echo "#define	SMALL_MAXCLASS		${small_maxclass}"
diff --git a/src/arena.c b/src/arena.c
index 7b9f313c..ff119ba6 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -19,7 +19,8 @@ const arena_bin_info_t	arena_bin_info[NBINS] = {
 #define	BIN_INFO_bin_yes(reg_size, run_size, nregs)			\
 	{reg_size, run_size, nregs, BITMAP_INFO_INITIALIZER(nregs)},
 #define	BIN_INFO_bin_no(reg_size, run_size, nregs)
-#define	SC(index, lg_grp, lg_delta, ndelta, bin, pgs, lg_delta_lookup)	\
+#define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs,		\
+    lg_delta_lookup)							\
 	BIN_INFO_bin_##bin((1U<<lg_grp) + (ndelta<<lg_delta),		\
 	    (pgs << LG_PAGE), (pgs << LG_PAGE) / ((1U<<lg_grp) +	\
 	    (ndelta<<lg_delta)))
@@ -3511,7 +3512,7 @@ small_run_size_init(void)
 		small_run_tab[bin_info->run_size >> LG_PAGE] = true;	\
 	}
 #define	TAB_INIT_bin_no(index, size)
-#define	SC(index, lg_grp, lg_delta, ndelta, bin, run_size,		\
+#define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, run_size,		\
     lg_delta_lookup)							\
 	TAB_INIT_bin_##bin(index, (ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta))
 	SIZE_CLASSES
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 4eec09b8..b907d9e5 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -81,7 +81,7 @@ static uint8_t	malloc_slow_flags;
 /* Last entry for overflow detection only.  */
 JEMALLOC_ALIGNED(CACHELINE)
 const size_t	index2size_tab[NSIZES+1] = {
-#define	SC(index, lg_grp, lg_delta, ndelta, bin, pgs, lg_delta_lookup) \
+#define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup) \
 	((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta)),
 	SIZE_CLASSES
 #undef SC
@@ -154,7 +154,7 @@ const uint8_t	size2index_tab[] = {
 #define	S2B_11(i)	S2B_10(i) S2B_10(i)
 #endif
 #define	S2B_no(i)
-#define	SC(index, lg_grp, lg_delta, ndelta, bin, pgs, lg_delta_lookup) \
+#define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup) \
 	S2B_##lg_delta_lookup(index)
 	SIZE_CLASSES
 #undef S2B_3
diff --git a/test/unit/size_classes.c b/test/unit/size_classes.c
index 2e2caaf5..4e1e0ce4 100644
--- a/test/unit/size_classes.c
+++ b/test/unit/size_classes.c
@@ -80,25 +80,96 @@ TEST_BEGIN(test_size_classes)
 }
 TEST_END
 
+TEST_BEGIN(test_psize_classes)
+{
+	size_t size_class, max_size_class;
+	pszind_t pind, max_pind;
+
+	max_size_class = get_max_size_class();
+	max_pind = psz2ind(max_size_class);
+
+	for (pind = 0, size_class = pind2sz(pind); pind < max_pind ||
+	    size_class < max_size_class; pind++, size_class =
+	    pind2sz(pind)) {
+		assert_true(pind < max_pind,
+		    "Loop conditionals should be equivalent; pind=%u, "
+		    "size_class=%zu (%#zx)", pind, size_class, size_class);
+		assert_true(size_class < max_size_class,
+		    "Loop conditionals should be equivalent; pind=%u, "
+		    "size_class=%zu (%#zx)", pind, size_class, size_class);
+
+		assert_u_eq(pind, psz2ind(size_class),
+		    "psz2ind() does not reverse pind2sz(): pind=%u -->"
+		    " size_class=%zu --> pind=%u --> size_class=%zu", pind,
+		    size_class, psz2ind(size_class),
+		    pind2sz(psz2ind(size_class)));
+		assert_zu_eq(size_class, pind2sz(psz2ind(size_class)),
+		    "pind2sz() does not reverse psz2ind(): pind=%u -->"
+		    " size_class=%zu --> pind=%u --> size_class=%zu", pind,
+		    size_class, psz2ind(size_class),
+		    pind2sz(psz2ind(size_class)));
+
+		assert_u_eq(pind+1, psz2ind(size_class+1),
+		    "Next size_class does not round up properly");
+
+		assert_zu_eq(size_class, (pind > 0) ?
+		    psz2u(pind2sz(pind-1)+1) : psz2u(1),
+		    "psz2u() does not round up to size class");
+		assert_zu_eq(size_class, psz2u(size_class-1),
+		    "psz2u() does not round up to size class");
+		assert_zu_eq(size_class, psz2u(size_class),
+		    "psz2u() does not compute same size class");
+		assert_zu_eq(psz2u(size_class+1), pind2sz(pind+1),
+		    "psz2u() does not round up to next size class");
+	}
+
+	assert_u_eq(pind, psz2ind(pind2sz(pind)),
+	    "psz2ind() does not reverse pind2sz()");
+	assert_zu_eq(max_size_class, pind2sz(psz2ind(max_size_class)),
+	    "pind2sz() does not reverse psz2ind()");
+
+	assert_zu_eq(size_class, psz2u(pind2sz(pind-1)+1),
+	    "psz2u() does not round up to size class");
+	assert_zu_eq(size_class, psz2u(size_class-1),
+	    "psz2u() does not round up to size class");
+	assert_zu_eq(size_class, psz2u(size_class),
+	    "psz2u() does not compute same size class");
+}
+TEST_END
+
 TEST_BEGIN(test_overflow)
 {
 	size_t max_size_class;
 
 	max_size_class = get_max_size_class();
 
-	assert_u_ge(size2index(max_size_class+1), NSIZES,
-	    "size2index() should return >= NSIZES on overflow");
-	assert_u_ge(size2index(ZU(PTRDIFF_MAX)+1), NSIZES,
-	    "size2index() should return >= NSIZES on overflow");
-	assert_u_ge(size2index(SIZE_T_MAX), NSIZES,
-	    "size2index() should return >= NSIZES on overflow");
+	assert_u_eq(size2index(max_size_class+1), NSIZES,
+	    "size2index() should return NSIZES on overflow");
+	assert_u_eq(size2index(ZU(PTRDIFF_MAX)+1), NSIZES,
+	    "size2index() should return NSIZES on overflow");
+	assert_u_eq(size2index(SIZE_T_MAX), NSIZES,
+	    "size2index() should return NSIZES on overflow");
 
-	assert_zu_gt(s2u(max_size_class+1), HUGE_MAXCLASS,
-	    "s2u() should return > HUGE_MAXCLASS for unsupported size");
-	assert_zu_gt(s2u(ZU(PTRDIFF_MAX)+1), HUGE_MAXCLASS,
-	    "s2u() should return > HUGE_MAXCLASS for unsupported size");
+	assert_zu_eq(s2u(max_size_class+1), 0,
+	    "s2u() should return 0 for unsupported size");
+	assert_zu_eq(s2u(ZU(PTRDIFF_MAX)+1), 0,
+	    "s2u() should return 0 for unsupported size");
 	assert_zu_eq(s2u(SIZE_T_MAX), 0,
 	    "s2u() should return 0 on overflow");
+
+	assert_u_eq(psz2ind(max_size_class+1), NPSIZES,
+	    "psz2ind() should return NPSIZES on overflow");
+	assert_u_eq(psz2ind(ZU(PTRDIFF_MAX)+1), NPSIZES,
+	    "psz2ind() should return NPSIZES on overflow");
+	assert_u_eq(psz2ind(SIZE_T_MAX), NPSIZES,
+	    "psz2ind() should return NPSIZES on overflow");
+
+	assert_zu_eq(psz2u(max_size_class+1), 0,
+	    "psz2u() should return 0 for unsupported size");
+	assert_zu_eq(psz2u(ZU(PTRDIFF_MAX)+1), 0,
+	    "psz2u() should return 0 for unsupported size");
+	assert_zu_eq(psz2u(SIZE_T_MAX), 0,
+	    "psz2u() should return 0 on overflow");
 }
 TEST_END
 
@@ -108,5 +179,6 @@ main(void)
 
 	return (test(
 	    test_size_classes,
+	    test_psize_classes,
 	    test_overflow));
 }

From 7bb00ae9d656b3d3ea9a01777cf1a13ab97f2430 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 8 Apr 2016 14:17:57 -0700
Subject: [PATCH 0255/2608] Refactor runs_avail.

Use pszind_t size classes rather than szind_t size classes, and always
reserve space for NPSIZES elements.  This removes unused heaps that are
not multiples of the page size, and adds (currently) unused heaps for
all huge size classes, with the immediate benefit that the size of
arena_t allocations is constant (no longer dependent on chunk size).
---
 include/jemalloc/internal/arena.h             |  9 +--
 .../jemalloc/internal/jemalloc_internal.h.in  | 27 +++++++-
 include/jemalloc/internal/private_symbols.txt |  3 +-
 include/jemalloc/internal/size_classes.sh     | 15 +++--
 src/arena.c                                   | 61 +++++++------------
 src/jemalloc.c                                | 17 +++++-
 test/unit/run_quantize.c                      |  2 +-
 7 files changed, 81 insertions(+), 53 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 866b12fe..bb65c7a9 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -441,10 +441,12 @@ struct arena_s {
 	arena_bin_t		bins[NBINS];
 
 	/*
-	 * Quantized address-ordered heaps of this arena's available runs.  The
-	 * heaps are used for first-best-fit run allocation.
+	 * Size-segregated address-ordered heaps of this arena's available runs,
+	 * used for first-best-fit run allocation.  Runs are quantized, i.e.
+	 * they reside in the last heap which corresponds to a size class less
+	 * than or equal to the run size.
 	 */
-	arena_run_heap_t	runs_avail[1]; /* Dynamically sized. */
+	arena_run_heap_t	runs_avail[NPSIZES];
 };
 
 /* Used in conjunction with tsd for fast arena-related context lookup. */
@@ -476,7 +478,6 @@ extern size_t		map_bias; /* Number of arena chunk header pages. */
 extern size_t		map_misc_offset;
 extern size_t		arena_maxrun; /* Max run size for arenas. */
 extern size_t		large_maxclass; /* Max large size class. */
-extern size_t		run_quantize_max; /* Max run_quantize_*() input. */
 extern unsigned		nlclasses; /* Number of large size classes. */
 extern unsigned		nhclasses; /* Number of huge size classes. */
 
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 224ceddc..eabb9ce3 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -443,11 +443,16 @@ extern unsigned	narenas_auto;
  */
 extern arena_t	**arenas;
 
+/*
+ * pind2sz_tab encodes the same information as could be computed by
+ * pind2sz_compute().
+ */
+extern size_t const	pind2sz_tab[NPSIZES];
 /*
  * index2size_tab encodes the same information as could be computed (at
  * unacceptable cost in some code paths) by index2size_compute().
  */
-extern size_t const	index2size_tab[NSIZES+1];
+extern size_t const	index2size_tab[NSIZES];
 /*
  * size2index_tab is a compact lookup table that rounds request sizes up to
  * size classes.  In order to reduce cache footprint, the table is compressed,
@@ -529,6 +534,8 @@ void	jemalloc_postfork_child(void);
 
 #ifndef JEMALLOC_ENABLE_INLINE
 pszind_t	psz2ind(size_t psz);
+size_t	pind2sz_compute(pszind_t pind);
+size_t	pind2sz_lookup(pszind_t pind);
 size_t	pind2sz(pszind_t pind);
 size_t	psz2u(size_t psz);
 szind_t	size2index_compute(size_t size);
@@ -576,7 +583,7 @@ psz2ind(size_t psz)
 }
 
 JEMALLOC_INLINE size_t
-pind2sz(pszind_t pind)
+pind2sz_compute(pszind_t pind)
 {
 
 	{
@@ -596,6 +603,22 @@ pind2sz(pszind_t pind)
 	}
 }
 
+JEMALLOC_INLINE size_t
+pind2sz_lookup(pszind_t pind)
+{
+	size_t ret = (size_t)pind2sz_tab[pind];
+	assert(ret == pind2sz_compute(pind));
+	return (ret);
+}
+
+JEMALLOC_INLINE size_t
+pind2sz(pszind_t pind)
+{
+
+	assert(pind < NPSIZES);
+	return (pind2sz_lookup(pind));
+}
+
 JEMALLOC_INLINE size_t
 psz2u(size_t psz)
 {
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index cbafc2b1..e046c3b1 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -394,6 +394,8 @@ pages_purge
 pages_trim
 pages_unmap
 pind2sz
+pind2sz_compute
+pind2sz_lookup
 pow2_ceil_u32
 pow2_ceil_u64
 pow2_ceil_zu
@@ -468,7 +470,6 @@ rtree_val_read
 rtree_val_write
 run_quantize_ceil
 run_quantize_floor
-run_quantize_max
 s2u
 s2u_compute
 s2u_lookup
diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh
index ecee1a0a..d1b1db1e 100755
--- a/include/jemalloc/internal/size_classes.sh
+++ b/include/jemalloc/internal/size_classes.sh
@@ -40,6 +40,16 @@ lg() {
   done
 }
 
+reg_size_compute() {
+  lg_grp=$1
+  lg_delta=$2
+  ndelta=$3
+
+  pow2 ${lg_grp}; grp=${pow2_result}
+  pow2 ${lg_delta}; delta=${pow2_result}
+  reg_size=$((${grp} + ${delta}*${ndelta}))
+}
+
 run_size() {
   lg_p=$1
   lg_grp=$2
@@ -47,10 +57,7 @@ run_size() {
   ndelta=$4
 
   pow2 ${lg_p}; p=${pow2_result}
-
-  pow2 ${lg_grp}; grp=${pow2_result}
-  pow2 ${lg_delta}; delta=${pow2_result}
-  reg_size=$((${grp} + ${delta}*${ndelta}))
+  reg_size_compute ${lg_grp} ${lg_delta} ${ndelta}
 
   # Compute smallest run size that is an integer multiple of reg_size.
   try_run_size=${p}
diff --git a/src/arena.c b/src/arena.c
index ff119ba6..a0fd2ce6 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -34,14 +34,11 @@ size_t		map_bias;
 size_t		map_misc_offset;
 size_t		arena_maxrun; /* Max run size for arenas. */
 size_t		large_maxclass; /* Max large size class. */
-size_t		run_quantize_max; /* Max run_quantize_*() input. */
 static bool	*small_run_tab; /* Valid small run page multiples. */
 static size_t	*run_quantize_floor_tab; /* run_quantize_floor() memoization. */
 static size_t	*run_quantize_ceil_tab; /* run_quantize_ceil() memoization. */
 unsigned	nlclasses; /* Number of large size classes. */
 unsigned	nhclasses; /* Number of huge size classes. */
-static szind_t	runs_avail_bias; /* Size index for first runs_avail tree. */
-static szind_t	runs_avail_nclasses; /* Number of runs_avail trees. */
 
 /******************************************************************************/
 /*
@@ -177,7 +174,7 @@ run_quantize_floor(size_t size)
 	size_t ret;
 
 	assert(size > 0);
-	assert(size <= run_quantize_max);
+	assert(size <= HUGE_MAXCLASS);
 	assert((size & PAGE_MASK) == 0);
 
 	ret = run_quantize_floor_tab[(size >> LG_PAGE) - 1];
@@ -200,7 +197,7 @@ run_quantize_ceil(size_t size)
 	size_t ret;
 
 	assert(size > 0);
-	assert(size <= run_quantize_max);
+	assert(size <= HUGE_MAXCLASS);
 	assert((size & PAGE_MASK) == 0);
 
 	ret = run_quantize_ceil_tab[(size >> LG_PAGE) - 1];
@@ -213,25 +210,15 @@ run_quantize_ceil(size_t size)
 run_quantize_t *run_quantize_ceil = JEMALLOC_N(n_run_quantize_ceil);
 #endif
 
-static arena_run_heap_t *
-arena_runs_avail_get(arena_t *arena, szind_t ind)
-{
-
-	assert(ind >= runs_avail_bias);
-	assert(ind - runs_avail_bias < runs_avail_nclasses);
-
-	return (&arena->runs_avail[ind - runs_avail_bias]);
-}
-
 static void
 arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
     size_t npages)
 {
-	szind_t ind = size2index(run_quantize_floor(arena_miscelm_size_get(
+	pszind_t pind = psz2ind(run_quantize_floor(arena_miscelm_size_get(
 	    arena_miscelm_get_const(chunk, pageind))));
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
-	arena_run_heap_insert(arena_runs_avail_get(arena, ind),
+	arena_run_heap_insert(&arena->runs_avail[pind],
 	    arena_miscelm_get_mutable(chunk, pageind));
 }
 
@@ -239,11 +226,11 @@ static void
 arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
     size_t npages)
 {
-	szind_t ind = size2index(run_quantize_floor(arena_miscelm_size_get(
+	pszind_t pind = psz2ind(run_quantize_floor(arena_miscelm_size_get(
 	    arena_miscelm_get_const(chunk, pageind))));
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
-	arena_run_heap_remove(arena_runs_avail_get(arena, ind),
+	arena_run_heap_remove(&arena->runs_avail[pind],
 	    arena_miscelm_get_mutable(chunk, pageind));
 }
 
@@ -1088,12 +1075,13 @@ arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena, void *chunk,
 static arena_run_t *
 arena_run_first_best_fit(arena_t *arena, size_t size)
 {
-	szind_t ind, i;
+	pszind_t pind, i;
 
-	ind = size2index(run_quantize_ceil(size));
-	for (i = ind; i < runs_avail_nclasses + runs_avail_bias; i++) {
+	pind = psz2ind(run_quantize_ceil(size));
+
+	for (i = pind; pind2sz(i) <= large_maxclass; i++) {
 		arena_chunk_map_misc_t *miscelm = arena_run_heap_first(
-		    arena_runs_avail_get(arena, i));
+		    &arena->runs_avail[i]);
 		if (miscelm != NULL)
 			return (&miscelm->run);
 	}
@@ -1946,7 +1934,8 @@ arena_reset(tsd_t *tsd, arena_t *arena)
 	assert(!arena->purging);
 	arena->nactive = 0;
 
-	for(i = 0; i < runs_avail_nclasses; i++)
+	for (i = 0; i < sizeof(arena->runs_avail) / sizeof(arena_run_heap_t);
+	    i++)
 		arena_run_heap_new(&arena->runs_avail[i]);
 
 	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock);
@@ -3388,23 +3377,19 @@ arena_t *
 arena_new(tsdn_t *tsdn, unsigned ind)
 {
 	arena_t *arena;
-	size_t arena_size;
 	unsigned i;
 
-	/* Compute arena size to incorporate sufficient runs_avail elements. */
-	arena_size = offsetof(arena_t, runs_avail) + (sizeof(arena_run_heap_t) *
-	    runs_avail_nclasses);
 	/*
 	 * Allocate arena, arena->lstats, and arena->hstats contiguously, mainly
 	 * because there is no way to clean up if base_alloc() OOMs.
 	 */
 	if (config_stats) {
 		arena = (arena_t *)base_alloc(tsdn,
-		    CACHELINE_CEILING(arena_size) + QUANTUM_CEILING(nlclasses *
-		    sizeof(malloc_large_stats_t) + nhclasses) *
-		    sizeof(malloc_huge_stats_t));
+		    CACHELINE_CEILING(sizeof(arena_t)) +
+		    QUANTUM_CEILING((nlclasses * sizeof(malloc_large_stats_t)) +
+		    (nhclasses * sizeof(malloc_huge_stats_t))));
 	} else
-		arena = (arena_t *)base_alloc(tsdn, arena_size);
+		arena = (arena_t *)base_alloc(tsdn, sizeof(arena_t));
 	if (arena == NULL)
 		return (NULL);
 
@@ -3416,11 +3401,11 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 	if (config_stats) {
 		memset(&arena->stats, 0, sizeof(arena_stats_t));
 		arena->stats.lstats = (malloc_large_stats_t *)((uintptr_t)arena
-		    + CACHELINE_CEILING(arena_size));
+		    + CACHELINE_CEILING(sizeof(arena_t)));
 		memset(arena->stats.lstats, 0, nlclasses *
 		    sizeof(malloc_large_stats_t));
 		arena->stats.hstats = (malloc_huge_stats_t *)((uintptr_t)arena
-		    + CACHELINE_CEILING(arena_size) +
+		    + CACHELINE_CEILING(sizeof(arena_t)) +
 		    QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t)));
 		memset(arena->stats.hstats, 0, nhclasses *
 		    sizeof(malloc_huge_stats_t));
@@ -3454,8 +3439,10 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 	arena->nactive = 0;
 	arena->ndirty = 0;
 
-	for(i = 0; i < runs_avail_nclasses; i++)
+	for (i = 0; i < sizeof(arena->runs_avail) / sizeof(arena_run_heap_t);
+	    i++)
 		arena_run_heap_new(&arena->runs_avail[i]);
+
 	qr_new(&arena->runs_dirty, rd_link);
 	qr_new(&arena->chunks_cache, cc_link);
 
@@ -3526,6 +3513,7 @@ small_run_size_init(void)
 static bool
 run_quantize_init(void)
 {
+	size_t run_quantize_max;
 	unsigned i;
 
 	run_quantize_max = chunksize + large_pad;
@@ -3604,9 +3592,6 @@ arena_boot(void)
 	if (run_quantize_init())
 		return (true);
 
-	runs_avail_bias = size2index(PAGE);
-	runs_avail_nclasses = size2index(run_quantize_max)+1 - runs_avail_bias;
-
 	return (false);
 }
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index b907d9e5..849e9418 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -78,14 +78,25 @@ enum {
 };
 static uint8_t	malloc_slow_flags;
 
-/* Last entry for overflow detection only.  */
 JEMALLOC_ALIGNED(CACHELINE)
-const size_t	index2size_tab[NSIZES+1] = {
+const size_t	pind2sz_tab[NPSIZES] = {
+#define	PSZ_yes(lg_grp, ndelta, lg_delta)				\
+	(((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta))),
+#define	PSZ_no(lg_grp, ndelta, lg_delta)
+#define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup) \
+	PSZ_##psz(lg_grp, ndelta, lg_delta)
+	SIZE_CLASSES
+#undef PSZ_yes
+#undef PSZ_no
+#undef SC
+};
+
+JEMALLOC_ALIGNED(CACHELINE)
+const size_t	index2size_tab[NSIZES] = {
 #define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup) \
 	((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta)),
 	SIZE_CLASSES
 #undef SC
-	ZU(0)
 };
 
 JEMALLOC_ALIGNED(CACHELINE)
diff --git a/test/unit/run_quantize.c b/test/unit/run_quantize.c
index f6a2f74f..45f32018 100644
--- a/test/unit/run_quantize.c
+++ b/test/unit/run_quantize.c
@@ -111,7 +111,7 @@ TEST_BEGIN(test_monotonic)
 
 	floor_prev = 0;
 	ceil_prev = 0;
-	for (i = 1; i < run_quantize_max >> LG_PAGE; i++) {
+	for (i = 1; i <= large_maxclass >> LG_PAGE; i++) {
 		size_t run_size, floor, ceil;
 
 		run_size = i << LG_PAGE;

From 3aea827f5e7d07ce156476bba8a843640969de51 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 8 Apr 2016 14:16:19 -0700
Subject: [PATCH 0256/2608] Simplify run quantization.

---
 include/jemalloc/internal/arena.h         |   2 +-
 include/jemalloc/internal/size_classes.sh |  16 --
 src/arena.c                               | 179 ++++------------------
 src/jemalloc.c                            |   3 +-
 4 files changed, 31 insertions(+), 169 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index bb65c7a9..11863fc7 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -569,7 +569,7 @@ unsigned	arena_nthreads_get(arena_t *arena, bool internal);
 void	arena_nthreads_inc(arena_t *arena, bool internal);
 void	arena_nthreads_dec(arena_t *arena, bool internal);
 arena_t	*arena_new(tsdn_t *tsdn, unsigned ind);
-bool	arena_boot(void);
+void	arena_boot(void);
 void	arena_prefork0(tsdn_t *tsdn, arena_t *arena);
 void	arena_prefork1(tsdn_t *tsdn, arena_t *arena);
 void	arena_prefork2(tsdn_t *tsdn, arena_t *arena);
diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh
index d1b1db1e..440953ad 100755
--- a/include/jemalloc/internal/size_classes.sh
+++ b/include/jemalloc/internal/size_classes.sh
@@ -158,7 +158,6 @@ size_classes() {
   lg_tiny_maxclass='"NA"'
   nbins=0
   npsizes=0
-  slab_maxpgs=0
 
   # Tiny size classes.
   ndelta=0
@@ -175,9 +174,6 @@ size_classes() {
     fi
     if [ ${bin} != "no" ] ; then
       nbins=$((${index} + 1))
-      if [ ${pgs} -gt ${slab_maxpgs} ] ; then
-        slab_maxpgs=${pgs}
-      fi
     fi
     ntbins=$((${ntbins} + 1))
     lg_tiny_maxclass=${lg_grp} # Final written value is correct.
@@ -200,9 +196,6 @@ size_classes() {
     if [ ${psz} = "yes" ] ; then
       npsizes=$((${npsizes} + 1))
     fi
-    if [ ${pgs} -gt ${slab_maxpgs} ] ; then
-      slab_maxpgs=${pgs}
-    fi
   fi
   while [ ${ndelta} -lt ${g} ] ; do
     size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
@@ -211,9 +204,6 @@ size_classes() {
     if [ ${psz} = "yes" ] ; then
       npsizes=$((${npsizes} + 1))
     fi
-    if [ ${pgs} -gt ${slab_maxpgs} ] ; then
-      slab_maxpgs=${pgs}
-    fi
   done
 
   # All remaining groups.
@@ -240,9 +230,6 @@ size_classes() {
         nbins=$((${index} + 1))
         # Final written value is correct:
         small_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))"
-        if [ ${pgs} -gt ${slab_maxpgs} ] ; then
-          slab_maxpgs=${pgs}
-        fi
         if [ ${lg_g} -gt 0 ] ; then
           lg_large_minclass=$((${lg_grp} + 1))
         else
@@ -269,7 +256,6 @@ size_classes() {
   # - lg_tiny_maxclass
   # - lookup_maxclass
   # - small_maxclass
-  # - slab_maxpgs
   # - lg_large_minclass
   # - huge_maxclass
 }
@@ -303,7 +289,6 @@ cat <<EOF
  *   LG_TINY_MAXCLASS: Lg of maximum tiny size class.
  *   LOOKUP_MAXCLASS: Maximum size class included in lookup table.
  *   SMALL_MAXCLASS: Maximum small size class.
- *   SLAB_MAXPGS: Maximum pages in small size class run.
  *   LG_LARGE_MINCLASS: Lg of minimum large size class.
  *   HUGE_MAXCLASS: Maximum (huge) size class.
  */
@@ -329,7 +314,6 @@ for lg_z in ${lg_zarr} ; do
         echo "#define	LG_TINY_MAXCLASS	${lg_tiny_maxclass}"
         echo "#define	LOOKUP_MAXCLASS		${lookup_maxclass}"
         echo "#define	SMALL_MAXCLASS		${small_maxclass}"
-        echo "#define	SLAB_MAXPGS		${slab_maxpgs}"
         echo "#define	LG_LARGE_MINCLASS	${lg_large_minclass}"
         echo "#define	HUGE_MAXCLASS		${huge_maxclass}"
         echo "#endif"
diff --git a/src/arena.c b/src/arena.c
index a0fd2ce6..06a69856 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -34,9 +34,6 @@ size_t		map_bias;
 size_t		map_misc_offset;
 size_t		arena_maxrun; /* Max run size for arenas. */
 size_t		large_maxclass; /* Max large size class. */
-static bool	*small_run_tab; /* Valid small run page multiples. */
-static size_t	*run_quantize_floor_tab; /* run_quantize_floor() memoization. */
-static size_t	*run_quantize_ceil_tab; /* run_quantize_ceil() memoization. */
 unsigned	nlclasses; /* Number of large size classes. */
 unsigned	nhclasses; /* Number of huge size classes. */
 
@@ -86,84 +83,6 @@ arena_run_addr_comp(const arena_chunk_map_misc_t *a,
 ph_gen(static UNUSED, arena_run_heap_, arena_run_heap_t, arena_chunk_map_misc_t,
     ph_link, arena_run_addr_comp)
 
-static size_t
-run_quantize_floor_compute(size_t size)
-{
-	size_t qsize;
-
-	assert(size != 0);
-	assert(size == PAGE_CEILING(size));
-
-	/* Don't change sizes that are valid small run sizes. */
-	if (size <= (ZU(SLAB_MAXPGS) << LG_PAGE) && small_run_tab[size >>
-	    LG_PAGE])
-		return (size);
-
-	/*
-	 * Round down to the nearest run size that can actually be requested
-	 * during normal large allocation.  Add large_pad so that cache index
-	 * randomization can offset the allocation from the page boundary.
-	 */
-	qsize = index2size(size2index(size - large_pad + 1) - 1) + large_pad;
-	if (qsize <= SMALL_MAXCLASS + large_pad)
-		return (run_quantize_floor_compute(size - large_pad));
-	assert(qsize <= size);
-	return (qsize);
-}
-
-static size_t
-run_quantize_ceil_compute_hard(size_t size)
-{
-	size_t large_run_size_next;
-
-	assert(size != 0);
-	assert(size == PAGE_CEILING(size));
-
-	/*
-	 * Return the next quantized size greater than the input size.
-	 * Quantized sizes comprise the union of run sizes that back small
-	 * region runs, and run sizes that back large regions with no explicit
-	 * alignment constraints.
-	 */
-
-	if (size > SMALL_MAXCLASS) {
-		large_run_size_next = PAGE_CEILING(index2size(size2index(size -
-		    large_pad) + 1) + large_pad);
-	} else
-		large_run_size_next = SIZE_T_MAX;
-	if ((size >> LG_PAGE) >= ZU(SLAB_MAXPGS))
-		return (large_run_size_next);
-
-	while (true) {
-		size += PAGE;
-		assert(size <= (ZU(SLAB_MAXPGS) << LG_PAGE));
-		if (small_run_tab[size >> LG_PAGE]) {
-			if (large_run_size_next < size)
-				return (large_run_size_next);
-			return (size);
-		}
-	}
-}
-
-static size_t
-run_quantize_ceil_compute(size_t size)
-{
-	size_t qsize = run_quantize_floor_compute(size);
-
-	if (qsize < size) {
-		/*
-		 * Skip a quantization that may have an adequately large run,
-		 * because under-sized runs may be mixed in.  This only happens
-		 * when an unusual size is requested, i.e. for aligned
-		 * allocation, and is just one of several places where linear
-		 * search would potentially find sufficiently aligned available
-		 * memory somewhere lower.
-		 */
-		qsize = run_quantize_ceil_compute_hard(qsize);
-	}
-	return (qsize);
-}
-
 #ifdef JEMALLOC_JET
 #undef run_quantize_floor
 #define	run_quantize_floor JEMALLOC_N(n_run_quantize_floor)
@@ -172,13 +91,27 @@ static size_t
 run_quantize_floor(size_t size)
 {
 	size_t ret;
+	pszind_t pind;
 
 	assert(size > 0);
 	assert(size <= HUGE_MAXCLASS);
 	assert((size & PAGE_MASK) == 0);
 
-	ret = run_quantize_floor_tab[(size >> LG_PAGE) - 1];
-	assert(ret == run_quantize_floor_compute(size));
+	assert(size != 0);
+	assert(size == PAGE_CEILING(size));
+
+	pind = psz2ind(size - large_pad + 1);
+	if (pind == 0) {
+		/*
+		 * Avoid underflow.  This short-circuit would also do the right
+		 * thing for all sizes in the range for which there are
+		 * PAGE-spaced size classes, but it's simplest to just handle
+		 * the one case that would cause erroneous results.
+		 */
+		return (size);
+	}
+	ret = pind2sz(pind - 1) + large_pad;
+	assert(ret <= size);
 	return (ret);
 }
 #ifdef JEMALLOC_JET
@@ -200,8 +133,18 @@ run_quantize_ceil(size_t size)
 	assert(size <= HUGE_MAXCLASS);
 	assert((size & PAGE_MASK) == 0);
 
-	ret = run_quantize_ceil_tab[(size >> LG_PAGE) - 1];
-	assert(ret == run_quantize_ceil_compute(size));
+	ret = run_quantize_floor(size);
+	if (ret < size) {
+		/*
+		 * Skip a quantization that may have an adequately large run,
+		 * because under-sized runs may be mixed in.  This only happens
+		 * when an unusual size is requested, i.e. for aligned
+		 * allocation, and is just one of several places where linear
+		 * search would potentially find sufficiently aligned available
+		 * memory somewhere lower.
+		 */
+		ret = pind2sz(psz2ind(ret - large_pad + 1)) + large_pad;
+	}
 	return (ret);
 }
 #ifdef JEMALLOC_JET
@@ -3483,64 +3426,7 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 	return (arena);
 }
 
-static bool
-small_run_size_init(void)
-{
-
-	assert(SLAB_MAXPGS != 0);
-
-	small_run_tab = (bool *)base_alloc(NULL, sizeof(bool) * SLAB_MAXPGS);
-	if (small_run_tab == NULL)
-		return (true);
-
-#define	TAB_INIT_bin_yes(index, size) {					\
-		const arena_bin_info_t *bin_info =			\
-		    &arena_bin_info[index];				\
-		small_run_tab[bin_info->run_size >> LG_PAGE] = true;	\
-	}
-#define	TAB_INIT_bin_no(index, size)
-#define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, run_size,		\
-    lg_delta_lookup)							\
-	TAB_INIT_bin_##bin(index, (ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta))
-	SIZE_CLASSES
-#undef TAB_INIT_bin_yes
-#undef TAB_INIT_bin_no
-#undef SC
-
-	return (false);
-}
-
-static bool
-run_quantize_init(void)
-{
-	size_t run_quantize_max;
-	unsigned i;
-
-	run_quantize_max = chunksize + large_pad;
-
-	run_quantize_floor_tab = (size_t *)base_alloc(NULL, sizeof(size_t) *
-	    (run_quantize_max >> LG_PAGE));
-	if (run_quantize_floor_tab == NULL)
-		return (true);
-
-	run_quantize_ceil_tab = (size_t *)base_alloc(NULL, sizeof(size_t) *
-	    (run_quantize_max >> LG_PAGE));
-	if (run_quantize_ceil_tab == NULL)
-		return (true);
-
-	for (i = 1; i <= run_quantize_max >> LG_PAGE; i++) {
-		size_t run_size = i << LG_PAGE;
-
-		run_quantize_floor_tab[i-1] =
-		    run_quantize_floor_compute(run_size);
-		run_quantize_ceil_tab[i-1] =
-		    run_quantize_ceil_compute(run_size);
-	}
-
-	return (false);
-}
-
-bool
+void
 arena_boot(void)
 {
 	unsigned i;
@@ -3586,13 +3472,6 @@ arena_boot(void)
 	assert(large_maxclass > 0);
 	nlclasses = size2index(large_maxclass) - size2index(SMALL_MAXCLASS);
 	nhclasses = NSIZES - nlclasses - NBINS;
-
-	if (small_run_size_init())
-		return (true);
-	if (run_quantize_init())
-		return (true);
-
-	return (false);
 }
 
 void
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 849e9418..929f3b87 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1234,8 +1234,7 @@ malloc_init_hard_a0_locked()
 		return (true);
 	if (config_prof)
 		prof_boot1();
-	if (arena_boot())
-		return (true);
+	arena_boot();
 	if (config_tcache && tcache_boot(TSDN_NULL))
 		return (true);
 	if (malloc_mutex_init(&arenas_lock, "arenas", WITNESS_RANK_ARENAS))

From a7a6f5bc96500d4821d72cdfafe731d564460890 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 23 Mar 2016 21:09:28 -0700
Subject: [PATCH 0257/2608] Rename extent_node_t to extent_t.

---
 include/jemalloc/internal/arena.h             |  80 ++++----
 include/jemalloc/internal/chunk.h             |  11 +-
 include/jemalloc/internal/extent.h            | 169 ++++++++--------
 .../jemalloc/internal/jemalloc_internal.h.in  |  18 +-
 include/jemalloc/internal/private_symbols.txt |  41 ++--
 include/jemalloc/internal/rtree.h             |  18 +-
 include/jemalloc/internal/witness.h           |   2 +-
 src/arena.c                                   | 175 ++++++++--------
 src/base.c                                    |  74 +++----
 src/chunk.c                                   | 186 +++++++++---------
 src/extent.c                                  |  20 +-
 src/huge.c                                    | 147 +++++++-------
 src/tcache.c                                  |   9 +-
 test/unit/rtree.c                             |  25 +--
 14 files changed, 490 insertions(+), 485 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 11863fc7..93d0a327 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -177,11 +177,11 @@ typedef ph(arena_chunk_map_misc_t) arena_run_heap_t;
 /* Arena chunk header. */
 struct arena_chunk_s {
 	/*
-	 * A pointer to the arena that owns the chunk is stored within the node.
-	 * This field as a whole is used by chunks_rtree to support both
-	 * ivsalloc() and core-based debugging.
+	 * A pointer to the arena that owns the chunk is stored within the
+	 * extent structure.  This field as a whole is used by chunks_rtree to
+	 * support both ivsalloc() and core-based debugging.
 	 */
-	extent_node_t		node;
+	extent_t		extent;
 
 	/*
 	 * Map of pages within chunk that keeps track of free/large/small.  The
@@ -303,7 +303,7 @@ struct arena_s {
 
 
 	/* Extant arena chunks. */
-	ql_head(extent_node_t)	achunks;
+	ql_head(extent_t)	achunks;
 
 	/*
 	 * In order to avoid rapid chunk allocation/deallocation when an arena
@@ -345,25 +345,25 @@ struct arena_s {
 	 *        /-- arena ---\
 	 *        |            |
 	 *        |            |
-	 *        |------------|                             /- chunk -\
-	 *   ...->|chunks_cache|<--------------------------->|  /----\ |<--...
-	 *        |------------|                             |  |node| |
-	 *        |            |                             |  |    | |
-	 *        |            |    /- run -\    /- run -\   |  |    | |
-	 *        |            |    |       |    |       |   |  |    | |
-	 *        |            |    |       |    |       |   |  |    | |
-	 *        |------------|    |-------|    |-------|   |  |----| |
-	 *   ...->|runs_dirty  |<-->|rd     |<-->|rd     |<---->|rd  |<----...
-	 *        |------------|    |-------|    |-------|   |  |----| |
-	 *        |            |    |       |    |       |   |  |    | |
-	 *        |            |    |       |    |       |   |  \----/ |
-	 *        |            |    \-------/    \-------/   |         |
-	 *        |            |                             |         |
-	 *        |            |                             |         |
-	 *        \------------/                             \---------/
+	 *        |------------|                             /-- chunk --\
+	 *   ...->|chunks_cache|<--------------------------->|  /------\ |<--...
+	 *        |------------|                             |  |extent| |
+	 *        |            |                             |  |      | |
+	 *        |            |    /- run -\    /- run -\   |  |      | |
+	 *        |            |    |       |    |       |   |  |      | |
+	 *        |            |    |       |    |       |   |  |      | |
+	 *        |------------|    |-------|    |-------|   |  |------| |
+	 *   ...->|runs_dirty  |<-->|rd     |<-->|rd     |<---->|rd    |<----...
+	 *        |------------|    |-------|    |-------|   |  |------| |
+	 *        |            |    |       |    |       |   |  |      | |
+	 *        |            |    |       |    |       |   |  \------/ |
+	 *        |            |    \-------/    \-------/   |           |
+	 *        |            |                             |           |
+	 *        |            |                             |           |
+	 *        \------------/                             \-----------/
 	 */
 	arena_runs_dirty_link_t	runs_dirty;
-	extent_node_t		chunks_cache;
+	extent_t		chunks_cache;
 
 	/*
 	 * Approximate time in seconds from the creation of a set of unused
@@ -413,16 +413,16 @@ struct arena_s {
 	size_t			decay_backlog[SMOOTHSTEP_NSTEPS];
 
 	/* Extant huge allocations. */
-	ql_head(extent_node_t)	huge;
+	ql_head(extent_t)	huge;
 	/* Synchronizes all huge allocation/update/deallocation. */
 	malloc_mutex_t		huge_mtx;
 
 	/*
 	 * Trees of chunks that were previously allocated (trees differ only in
-	 * node ordering).  These are used when allocating chunks, in an attempt
-	 * to re-use address space.  Depending on function, different tree
-	 * orderings are needed, which is why there are two trees with the same
-	 * contents.
+	 * extent ordering).  These are used when allocating chunks, in an
+	 * attempt to re-use address space.  Depending on function, different
+	 * tree orderings are needed, which is why there are two trees with the
+	 * same contents.
 	 */
 	extent_tree_t		chunks_szad_cached;
 	extent_tree_t		chunks_ad_cached;
@@ -430,9 +430,9 @@ struct arena_s {
 	extent_tree_t		chunks_ad_retained;
 
 	malloc_mutex_t		chunks_mtx;
-	/* Cache of nodes that were allocated via base_alloc(). */
-	ql_head(extent_node_t)	node_cache;
-	malloc_mutex_t		node_cache_mtx;
+	/* Cache of extent structures that were allocated via base_alloc(). */
+	ql_head(extent_t)	extent_cache;
+	malloc_mutex_t		extent_cache_mtx;
 
 	/* User-configurable chunk hook functions. */
 	chunk_hooks_t		chunk_hooks;
@@ -486,12 +486,12 @@ typedef size_t (run_quantize_t)(size_t);
 extern run_quantize_t *run_quantize_floor;
 extern run_quantize_t *run_quantize_ceil;
 #endif
-void	arena_chunk_cache_maybe_insert(arena_t *arena, extent_node_t *node,
+void	arena_chunk_cache_maybe_insert(arena_t *arena, extent_t *extent,
     bool cache);
-void	arena_chunk_cache_maybe_remove(arena_t *arena, extent_node_t *node,
+void	arena_chunk_cache_maybe_remove(arena_t *arena, extent_t *extent,
     bool cache);
-extent_node_t	*arena_node_alloc(tsdn_t *tsdn, arena_t *arena);
-void	arena_node_dalloc(tsdn_t *tsdn, arena_t *arena, extent_node_t *node);
+extent_t	*arena_extent_alloc(tsdn_t *tsdn, arena_t *arena);
+void	arena_extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
 void	*arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool *zero);
 void	arena_chunk_dalloc_huge(tsdn_t *tsdn, arena_t *arena, void *chunk,
@@ -1066,7 +1066,7 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits)
 		assert(binind != BININD_INVALID);
 		assert(binind < NBINS);
 		chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-		arena = extent_node_arena_get(&chunk->node);
+		arena = extent_arena_get(&chunk->extent);
 		pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 		actual_mapbits = arena_mapbits_get(chunk, pageind);
 		assert(mapbits == actual_mapbits);
@@ -1317,7 +1317,7 @@ arena_aalloc(const void *ptr)
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
 	if (likely(chunk != ptr))
-		return (extent_node_arena_get(&chunk->node));
+		return (extent_arena_get(&chunk->extent));
 	else
 		return (huge_aalloc(ptr));
 }
@@ -1395,7 +1395,7 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path)
 				    binind, slow_path);
 			} else {
 				arena_dalloc_small(tsdn,
-				    extent_node_arena_get(&chunk->node), chunk,
+				    extent_arena_get(&chunk->extent), chunk,
 				    ptr, pageind);
 			}
 		} else {
@@ -1411,7 +1411,7 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path)
 				    size - large_pad, slow_path);
 			} else {
 				arena_dalloc_large(tsdn,
-				    extent_node_arena_get(&chunk->node), chunk,
+				    extent_arena_get(&chunk->extent), chunk,
 				    ptr);
 			}
 		}
@@ -1455,7 +1455,7 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 				size_t pageind = ((uintptr_t)ptr -
 				    (uintptr_t)chunk) >> LG_PAGE;
 				arena_dalloc_small(tsdn,
-				    extent_node_arena_get(&chunk->node), chunk,
+				    extent_arena_get(&chunk->extent), chunk,
 				    ptr, pageind);
 			}
 		} else {
@@ -1467,7 +1467,7 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 				    size, slow_path);
 			} else {
 				arena_dalloc_large(tsdn,
-				    extent_node_arena_get(&chunk->node), chunk,
+				    extent_arena_get(&chunk->extent), chunk,
 				    ptr);
 			}
 		}
diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index c9fd4ecb..4666a649 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -52,13 +52,12 @@ chunk_hooks_t	chunk_hooks_get(tsdn_t *tsdn, arena_t *arena);
 chunk_hooks_t	chunk_hooks_set(tsdn_t *tsdn, arena_t *arena,
     const chunk_hooks_t *chunk_hooks);
 
-bool	chunk_register(tsdn_t *tsdn, const void *chunk,
-    const extent_node_t *node);
-void	chunk_deregister(const void *chunk, const extent_node_t *node);
+bool	chunk_register(tsdn_t *tsdn, const void *chunk, const extent_t *extent);
+void	chunk_deregister(const void *chunk, const extent_t *extent);
 void	*chunk_alloc_base(size_t size);
 void	*chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
-    bool *zero, bool dalloc_node);
+    bool *zero, bool dalloc_extent);
 void	*chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
     bool *zero, bool *commit);
@@ -80,11 +79,11 @@ void	chunk_postfork_child(tsdn_t *tsdn);
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
-extent_node_t	*chunk_lookup(const void *chunk, bool dependent);
+extent_t	*chunk_lookup(const void *chunk, bool dependent);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_CHUNK_C_))
-JEMALLOC_INLINE extent_node_t *
+JEMALLOC_INLINE extent_t *
 chunk_lookup(const void *ptr, bool dependent)
 {
 
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 49d76a57..acc67f00 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -1,237 +1,236 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_TYPES
 
-typedef struct extent_node_s extent_node_t;
+typedef struct extent_s extent_t;
 
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
 
-/* Tree of extents.  Use accessor functions for en_* fields. */
-struct extent_node_s {
+/* Extent (span of pages).  Use accessor functions for e_* fields. */
+struct extent_s {
 	/* Arena from which this extent came, if any. */
-	arena_t			*en_arena;
+	arena_t			*e_arena;
 
-	/* Pointer to the extent that this tree node is responsible for. */
-	void			*en_addr;
+	/* Pointer to the extent that this structure is responsible for. */
+	void			*e_addr;
 
 	/* Total region size. */
-	size_t			en_size;
+	size_t			e_size;
 
 	/*
 	 * The zeroed flag is used by chunk recycling code to track whether
 	 * memory is zero-filled.
 	 */
-	bool			en_zeroed;
+	bool			e_zeroed;
 
 	/*
 	 * True if physical memory is committed to the extent, whether
 	 * explicitly or implicitly as on a system that overcommits and
 	 * satisfies physical memory needs on demand via soft page faults.
 	 */
-	bool			en_committed;
+	bool			e_committed;
 
 	/*
 	 * The achunk flag is used to validate that huge allocation lookups
 	 * don't return arena chunks.
 	 */
-	bool			en_achunk;
+	bool			e_achunk;
 
 	/* Profile counters, used for huge objects. */
-	prof_tctx_t		*en_prof_tctx;
+	prof_tctx_t		*e_prof_tctx;
 
 	/* Linkage for arena's runs_dirty and chunks_cache rings. */
 	arena_runs_dirty_link_t	rd;
-	qr(extent_node_t)	cc_link;
+	qr(extent_t)		cc_link;
 
 	union {
 		/* Linkage for the size/address-ordered tree. */
-		rb_node(extent_node_t)	szad_link;
+		rb_node(extent_t)	szad_link;
 
 		/* Linkage for arena's achunks, huge, and node_cache lists. */
-		ql_elm(extent_node_t)	ql_link;
+		ql_elm(extent_t)	ql_link;
 	};
 
 	/* Linkage for the address-ordered tree. */
-	rb_node(extent_node_t)	ad_link;
+	rb_node(extent_t)	ad_link;
 };
-typedef rb_tree(extent_node_t) extent_tree_t;
+typedef rb_tree(extent_t) extent_tree_t;
 
 #endif /* JEMALLOC_H_STRUCTS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-rb_proto(, extent_tree_szad_, extent_tree_t, extent_node_t)
+rb_proto(, extent_tree_szad_, extent_tree_t, extent_t)
 
-rb_proto(, extent_tree_ad_, extent_tree_t, extent_node_t)
+rb_proto(, extent_tree_ad_, extent_tree_t, extent_t)
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
-arena_t	*extent_node_arena_get(const extent_node_t *node);
-void	*extent_node_addr_get(const extent_node_t *node);
-size_t	extent_node_size_get(const extent_node_t *node);
-bool	extent_node_zeroed_get(const extent_node_t *node);
-bool	extent_node_committed_get(const extent_node_t *node);
-bool	extent_node_achunk_get(const extent_node_t *node);
-prof_tctx_t	*extent_node_prof_tctx_get(const extent_node_t *node);
-void	extent_node_arena_set(extent_node_t *node, arena_t *arena);
-void	extent_node_addr_set(extent_node_t *node, void *addr);
-void	extent_node_size_set(extent_node_t *node, size_t size);
-void	extent_node_zeroed_set(extent_node_t *node, bool zeroed);
-void	extent_node_committed_set(extent_node_t *node, bool committed);
-void	extent_node_achunk_set(extent_node_t *node, bool achunk);
-void	extent_node_prof_tctx_set(extent_node_t *node, prof_tctx_t *tctx);
-void	extent_node_init(extent_node_t *node, arena_t *arena, void *addr,
+arena_t	*extent_arena_get(const extent_t *extent);
+void	*extent_addr_get(const extent_t *extent);
+size_t	extent_size_get(const extent_t *extent);
+bool	extent_zeroed_get(const extent_t *extent);
+bool	extent_committed_get(const extent_t *extent);
+bool	extent_achunk_get(const extent_t *extent);
+prof_tctx_t	*extent_prof_tctx_get(const extent_t *extent);
+void	extent_arena_set(extent_t *extent, arena_t *arena);
+void	extent_addr_set(extent_t *extent, void *addr);
+void	extent_size_set(extent_t *extent, size_t size);
+void	extent_zeroed_set(extent_t *extent, bool zeroed);
+void	extent_committed_set(extent_t *extent, bool committed);
+void	extent_achunk_set(extent_t *extent, bool achunk);
+void	extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx);
+void	extent_init(extent_t *extent, arena_t *arena, void *addr,
     size_t size, bool zeroed, bool committed);
-void	extent_node_dirty_linkage_init(extent_node_t *node);
-void	extent_node_dirty_insert(extent_node_t *node,
-    arena_runs_dirty_link_t *runs_dirty, extent_node_t *chunks_dirty);
-void	extent_node_dirty_remove(extent_node_t *node);
+void	extent_dirty_linkage_init(extent_t *extent);
+void	extent_dirty_insert(extent_t *extent,
+    arena_runs_dirty_link_t *runs_dirty, extent_t *chunks_dirty);
+void	extent_dirty_remove(extent_t *extent);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_EXTENT_C_))
 JEMALLOC_INLINE arena_t *
-extent_node_arena_get(const extent_node_t *node)
+extent_arena_get(const extent_t *extent)
 {
 
-	return (node->en_arena);
+	return (extent->e_arena);
 }
 
 JEMALLOC_INLINE void *
-extent_node_addr_get(const extent_node_t *node)
+extent_addr_get(const extent_t *extent)
 {
 
-	return (node->en_addr);
+	return (extent->e_addr);
 }
 
 JEMALLOC_INLINE size_t
-extent_node_size_get(const extent_node_t *node)
+extent_size_get(const extent_t *extent)
 {
 
-	return (node->en_size);
+	return (extent->e_size);
 }
 
 JEMALLOC_INLINE bool
-extent_node_zeroed_get(const extent_node_t *node)
+extent_zeroed_get(const extent_t *extent)
 {
 
-	return (node->en_zeroed);
+	return (extent->e_zeroed);
 }
 
 JEMALLOC_INLINE bool
-extent_node_committed_get(const extent_node_t *node)
+extent_committed_get(const extent_t *extent)
 {
 
-	assert(!node->en_achunk);
-	return (node->en_committed);
+	assert(!extent->e_achunk);
+	return (extent->e_committed);
 }
 
 JEMALLOC_INLINE bool
-extent_node_achunk_get(const extent_node_t *node)
+extent_achunk_get(const extent_t *extent)
 {
 
-	return (node->en_achunk);
+	return (extent->e_achunk);
 }
 
 JEMALLOC_INLINE prof_tctx_t *
-extent_node_prof_tctx_get(const extent_node_t *node)
+extent_prof_tctx_get(const extent_t *extent)
 {
 
-	return (node->en_prof_tctx);
+	return (extent->e_prof_tctx);
 }
 
 JEMALLOC_INLINE void
-extent_node_arena_set(extent_node_t *node, arena_t *arena)
+extent_arena_set(extent_t *extent, arena_t *arena)
 {
 
-	node->en_arena = arena;
+	extent->e_arena = arena;
 }
 
 JEMALLOC_INLINE void
-extent_node_addr_set(extent_node_t *node, void *addr)
+extent_addr_set(extent_t *extent, void *addr)
 {
 
-	node->en_addr = addr;
+	extent->e_addr = addr;
 }
 
 JEMALLOC_INLINE void
-extent_node_size_set(extent_node_t *node, size_t size)
+extent_size_set(extent_t *extent, size_t size)
 {
 
-	node->en_size = size;
+	extent->e_size = size;
 }
 
 JEMALLOC_INLINE void
-extent_node_zeroed_set(extent_node_t *node, bool zeroed)
+extent_zeroed_set(extent_t *extent, bool zeroed)
 {
 
-	node->en_zeroed = zeroed;
+	extent->e_zeroed = zeroed;
 }
 
 JEMALLOC_INLINE void
-extent_node_committed_set(extent_node_t *node, bool committed)
+extent_committed_set(extent_t *extent, bool committed)
 {
 
-	node->en_committed = committed;
+	extent->e_committed = committed;
 }
 
 JEMALLOC_INLINE void
-extent_node_achunk_set(extent_node_t *node, bool achunk)
+extent_achunk_set(extent_t *extent, bool achunk)
 {
 
-	node->en_achunk = achunk;
+	extent->e_achunk = achunk;
 }
 
 JEMALLOC_INLINE void
-extent_node_prof_tctx_set(extent_node_t *node, prof_tctx_t *tctx)
+extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx)
 {
 
-	node->en_prof_tctx = tctx;
+	extent->e_prof_tctx = tctx;
 }
 
 JEMALLOC_INLINE void
-extent_node_init(extent_node_t *node, arena_t *arena, void *addr, size_t size,
+extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
     bool zeroed, bool committed)
 {
 
-	extent_node_arena_set(node, arena);
-	extent_node_addr_set(node, addr);
-	extent_node_size_set(node, size);
-	extent_node_zeroed_set(node, zeroed);
-	extent_node_committed_set(node, committed);
-	extent_node_achunk_set(node, false);
+	extent_arena_set(extent, arena);
+	extent_addr_set(extent, addr);
+	extent_size_set(extent, size);
+	extent_zeroed_set(extent, zeroed);
+	extent_committed_set(extent, committed);
+	extent_achunk_set(extent, false);
 	if (config_prof)
-		extent_node_prof_tctx_set(node, NULL);
+		extent_prof_tctx_set(extent, NULL);
 }
 
 JEMALLOC_INLINE void
-extent_node_dirty_linkage_init(extent_node_t *node)
+extent_dirty_linkage_init(extent_t *extent)
 {
 
-	qr_new(&node->rd, rd_link);
-	qr_new(node, cc_link);
+	qr_new(&extent->rd, rd_link);
+	qr_new(extent, cc_link);
 }
 
 JEMALLOC_INLINE void
-extent_node_dirty_insert(extent_node_t *node,
-    arena_runs_dirty_link_t *runs_dirty, extent_node_t *chunks_dirty)
+extent_dirty_insert(extent_t *extent,
+    arena_runs_dirty_link_t *runs_dirty, extent_t *chunks_dirty)
 {
 
-	qr_meld(runs_dirty, &node->rd, rd_link);
-	qr_meld(chunks_dirty, node, cc_link);
+	qr_meld(runs_dirty, &extent->rd, rd_link);
+	qr_meld(chunks_dirty, extent, cc_link);
 }
 
 JEMALLOC_INLINE void
-extent_node_dirty_remove(extent_node_t *node)
+extent_dirty_remove(extent_t *extent)
 {
 
-	qr_remove(&node->rd, rd_link);
-	qr_remove(node, cc_link);
+	qr_remove(&extent->rd, rd_link);
+	qr_remove(extent, cc_link);
 }
-
 #endif
 
 #endif /* JEMALLOC_H_INLINES */
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index eabb9ce3..e487db14 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -966,6 +966,7 @@ decay_ticker_get(tsd_t *tsd, unsigned ind)
 #include "jemalloc/internal/hash.h"
 
 #ifndef JEMALLOC_ENABLE_INLINE
+extent_t	*iealloc(const void *ptr);
 arena_t	*iaalloc(const void *ptr);
 size_t	isalloc(tsdn_t *tsdn, const void *ptr, bool demote);
 void	*iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero,
@@ -995,6 +996,13 @@ bool	ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
+JEMALLOC_ALWAYS_INLINE extent_t *
+iealloc(const void *ptr)
+{
+
+	return (chunk_lookup(ptr, true));
+}
+
 JEMALLOC_ALWAYS_INLINE arena_t *
 iaalloc(const void *ptr)
 {
@@ -1086,15 +1094,15 @@ ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero)
 JEMALLOC_ALWAYS_INLINE size_t
 ivsalloc(tsdn_t *tsdn, const void *ptr, bool demote)
 {
-	extent_node_t *node;
+	extent_t *extent;
 
 	/* Return 0 if ptr is not within a chunk managed by jemalloc. */
-	node = chunk_lookup(ptr, false);
-	if (node == NULL)
+	extent = chunk_lookup(ptr, false);
+	if (extent == NULL)
 		return (0);
 	/* Only arena chunks should be looked up via interior pointers. */
-	assert(extent_node_addr_get(node) == ptr ||
-	    extent_node_achunk_get(node));
+	assert(extent_addr_get(extent) == ptr ||
+	    extent_achunk_get(extent));
 
 	return (isalloc(tsdn, ptr, demote));
 }
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index e046c3b1..61b29b9d 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -35,6 +35,8 @@ arena_decay_time_get
 arena_decay_time_set
 arena_dss_prec_get
 arena_dss_prec_set
+arena_extent_alloc
+arena_extent_dalloc
 arena_get
 arena_ichoose
 arena_init
@@ -78,8 +80,6 @@ arena_miscelm_get_mutable
 arena_miscelm_to_pageind
 arena_miscelm_to_rpages
 arena_new
-arena_node_alloc
-arena_node_dalloc
 arena_nthreads_dec
 arena_nthreads_get
 arena_nthreads_inc
@@ -204,24 +204,22 @@ ctl_postfork_parent
 ctl_prefork
 decay_ticker_get
 dss_prec_names
-extent_node_achunk_get
-extent_node_achunk_set
-extent_node_addr_get
-extent_node_addr_set
-extent_node_arena_get
-extent_node_arena_set
-extent_node_committed_get
-extent_node_committed_set
-extent_node_dirty_insert
-extent_node_dirty_linkage_init
-extent_node_dirty_remove
-extent_node_init
-extent_node_prof_tctx_get
-extent_node_prof_tctx_set
-extent_node_size_get
-extent_node_size_set
-extent_node_zeroed_get
-extent_node_zeroed_set
+extent_achunk_get
+extent_achunk_set
+extent_addr_get
+extent_addr_set
+extent_arena_get
+extent_arena_set
+extent_committed_get
+extent_committed_set
+extent_dirty_insert
+extent_dirty_linkage_init
+extent_dirty_remove
+extent_init
+extent_prof_tctx_get
+extent_prof_tctx_set
+extent_size_get
+extent_size_set
 extent_tree_ad_destroy
 extent_tree_ad_destroy_recurse
 extent_tree_ad_empty
@@ -260,6 +258,8 @@ extent_tree_szad_reverse_iter
 extent_tree_szad_reverse_iter_recurse
 extent_tree_szad_reverse_iter_start
 extent_tree_szad_search
+extent_zeroed_get
+extent_zeroed_set
 ffs_llu
 ffs_lu
 ffs_u
@@ -294,6 +294,7 @@ iallocztm
 iarena_cleanup
 idalloc
 idalloctm
+iealloc
 index2size
 index2size_compute
 index2size_lookup
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 8d0c584d..45e49b74 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -39,7 +39,7 @@ struct rtree_node_elm_s {
 	union {
 		void			*pun;
 		rtree_node_elm_t	*child;
-		extent_node_t		*val;
+		extent_t		*val;
 	};
 };
 
@@ -116,17 +116,17 @@ rtree_node_elm_t	*rtree_child_tryread(rtree_node_elm_t *elm,
     bool dependent);
 rtree_node_elm_t	*rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm,
     unsigned level, bool dependent);
-extent_node_t	*rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm,
+extent_t	*rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm,
     bool dependent);
 void	rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm,
-    const extent_node_t *val);
+    const extent_t *val);
 rtree_node_elm_t	*rtree_subtree_tryread(rtree_t *rtree, unsigned level,
     bool dependent);
 rtree_node_elm_t	*rtree_subtree_read(rtree_t *rtree, unsigned level,
     bool dependent);
 
-extent_node_t	*rtree_get(rtree_t *rtree, uintptr_t key, bool dependent);
-bool	rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val);
+extent_t	*rtree_get(rtree_t *rtree, uintptr_t key, bool dependent);
+bool	rtree_set(rtree_t *rtree, uintptr_t key, const extent_t *val);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_))
@@ -186,7 +186,7 @@ rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level,
 	return (child);
 }
 
-JEMALLOC_ALWAYS_INLINE extent_node_t *
+JEMALLOC_ALWAYS_INLINE extent_t *
 rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm, bool dependent)
 {
 
@@ -209,7 +209,7 @@ rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm, bool dependent)
 }
 
 JEMALLOC_INLINE void
-rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, const extent_node_t *val)
+rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, const extent_t *val)
 {
 
 	atomic_write_p(&elm->pun, val);
@@ -240,7 +240,7 @@ rtree_subtree_read(rtree_t *rtree, unsigned level, bool dependent)
 	return (subtree);
 }
 
-JEMALLOC_ALWAYS_INLINE extent_node_t *
+JEMALLOC_ALWAYS_INLINE extent_t *
 rtree_get(rtree_t *rtree, uintptr_t key, bool dependent)
 {
 	uintptr_t subkey;
@@ -332,7 +332,7 @@ rtree_get(rtree_t *rtree, uintptr_t key, bool dependent)
 }
 
 JEMALLOC_INLINE bool
-rtree_set(rtree_t *rtree, uintptr_t key, const extent_node_t *val)
+rtree_set(rtree_t *rtree, uintptr_t key, const extent_t *val)
 {
 	uintptr_t subkey;
 	unsigned i, start_level;
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index d78dca2d..c68c9694 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -24,7 +24,7 @@ typedef int witness_comp_t (const witness_t *, const witness_t *);
 
 #define	WITNESS_RANK_ARENA		8U
 #define	WITNESS_RANK_ARENA_CHUNKS	9U
-#define	WITNESS_RANK_ARENA_NODE_CACHE	10
+#define	WITNESS_RANK_ARENA_EXTENT_CACHE	10
 
 #define	WITNESS_RANK_BASE		11U
 
diff --git a/src/arena.c b/src/arena.c
index 06a69856..b59f7f1b 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -214,32 +214,32 @@ arena_run_dirty_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
 }
 
 static size_t
-arena_chunk_dirty_npages(const extent_node_t *node)
+arena_chunk_dirty_npages(const extent_t *extent)
 {
 
-	return (extent_node_size_get(node) >> LG_PAGE);
+	return (extent_size_get(extent) >> LG_PAGE);
 }
 
 void
-arena_chunk_cache_maybe_insert(arena_t *arena, extent_node_t *node, bool cache)
+arena_chunk_cache_maybe_insert(arena_t *arena, extent_t *extent, bool cache)
 {
 
 	if (cache) {
-		extent_node_dirty_linkage_init(node);
-		extent_node_dirty_insert(node, &arena->runs_dirty,
+		extent_dirty_linkage_init(extent);
+		extent_dirty_insert(extent, &arena->runs_dirty,
 		    &arena->chunks_cache);
-		arena->ndirty += arena_chunk_dirty_npages(node);
+		arena->ndirty += arena_chunk_dirty_npages(extent);
 	}
 }
 
 void
-arena_chunk_cache_maybe_remove(arena_t *arena, extent_node_t *node, bool dirty)
+arena_chunk_cache_maybe_remove(arena_t *arena, extent_t *extent, bool dirty)
 {
 
 	if (dirty) {
-		extent_node_dirty_remove(node);
-		assert(arena->ndirty >= arena_chunk_dirty_npages(node));
-		arena->ndirty -= arena_chunk_dirty_npages(node);
+		extent_dirty_remove(extent);
+		assert(arena->ndirty >= arena_chunk_dirty_npages(extent));
+		arena->ndirty -= arena_chunk_dirty_npages(extent);
 	}
 }
 
@@ -516,14 +516,14 @@ arena_chunk_register(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
 {
 
 	/*
-	 * The extent node notion of "committed" doesn't directly apply to
-	 * arena chunks.  Arbitrarily mark them as committed.  The commit state
-	 * of runs is tracked individually, and upon chunk deallocation the
-	 * entire chunk is in a consistent commit state.
+	 * The extent notion of "committed" doesn't directly apply to arena
+	 * chunks.  Arbitrarily mark them as committed.  The commit state of
+	 * runs is tracked individually, and upon chunk deallocation the entire
+	 * chunk is in a consistent commit state.
 	 */
-	extent_node_init(&chunk->node, arena, chunk, chunksize, zero, true);
-	extent_node_achunk_set(&chunk->node, true);
-	return (chunk_register(tsdn, chunk, &chunk->node));
+	extent_init(&chunk->extent, arena, chunk, chunksize, zero, true);
+	extent_achunk_set(&chunk->extent, true);
+	return (chunk_register(tsdn, chunk, &chunk->extent));
 }
 
 static arena_chunk_t *
@@ -648,8 +648,8 @@ arena_chunk_alloc(tsdn_t *tsdn, arena_t *arena)
 			return (NULL);
 	}
 
-	ql_elm_new(&chunk->node, ql_link);
-	ql_tail_insert(&arena->achunks, &chunk->node, ql_link);
+	ql_elm_new(&chunk->extent, ql_link);
+	ql_tail_insert(&arena->achunks, &chunk->extent, ql_link);
 	arena_avail_insert(arena, chunk, map_bias, chunk_npages-map_bias);
 
 	return (chunk);
@@ -661,7 +661,7 @@ arena_chunk_discard(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk)
 	bool committed;
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 
-	chunk_deregister(chunk, &chunk->node);
+	chunk_deregister(chunk, &chunk->extent);
 
 	committed = (arena_mapbits_decommitted_get(chunk, map_bias) == 0);
 	if (!committed) {
@@ -718,7 +718,7 @@ arena_chunk_dalloc(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk)
 	/* Remove run from runs_avail, so that the arena does not use it. */
 	arena_avail_remove(arena, chunk, map_bias, chunk_npages-map_bias);
 
-	ql_remove(&arena->achunks, &chunk->node, ql_link);
+	ql_remove(&arena->achunks, &chunk->extent, ql_link);
 	spare = arena->spare;
 	arena->spare = chunk;
 	if (spare != NULL)
@@ -805,30 +805,30 @@ arena_huge_ralloc_stats_update_undo(arena_t *arena, size_t oldsize,
 	arena_huge_malloc_stats_update_undo(arena, usize);
 }
 
-extent_node_t *
-arena_node_alloc(tsdn_t *tsdn, arena_t *arena)
+extent_t *
+arena_extent_alloc(tsdn_t *tsdn, arena_t *arena)
 {
-	extent_node_t *node;
+	extent_t *extent;
 
-	malloc_mutex_lock(tsdn, &arena->node_cache_mtx);
-	node = ql_last(&arena->node_cache, ql_link);
-	if (node == NULL) {
-		malloc_mutex_unlock(tsdn, &arena->node_cache_mtx);
-		return (base_alloc(tsdn, sizeof(extent_node_t)));
+	malloc_mutex_lock(tsdn, &arena->extent_cache_mtx);
+	extent = ql_last(&arena->extent_cache, ql_link);
+	if (extent == NULL) {
+		malloc_mutex_unlock(tsdn, &arena->extent_cache_mtx);
+		return (base_alloc(tsdn, sizeof(extent_t)));
 	}
-	ql_tail_remove(&arena->node_cache, extent_node_t, ql_link);
-	malloc_mutex_unlock(tsdn, &arena->node_cache_mtx);
-	return (node);
+	ql_tail_remove(&arena->extent_cache, extent_t, ql_link);
+	malloc_mutex_unlock(tsdn, &arena->extent_cache_mtx);
+	return (extent);
 }
 
 void
-arena_node_dalloc(tsdn_t *tsdn, arena_t *arena, extent_node_t *node)
+arena_extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent)
 {
 
-	malloc_mutex_lock(tsdn, &arena->node_cache_mtx);
-	ql_elm_new(node, ql_link);
-	ql_tail_insert(&arena->node_cache, node, ql_link);
-	malloc_mutex_unlock(tsdn, &arena->node_cache_mtx);
+	malloc_mutex_lock(tsdn, &arena->extent_cache_mtx);
+	ql_elm_new(extent, ql_link);
+	ql_tail_insert(&arena->extent_cache, extent, ql_link);
+	malloc_mutex_unlock(tsdn, &arena->extent_cache_mtx);
 }
 
 static void *
@@ -1424,7 +1424,7 @@ arena_dirty_count(arena_t *arena)
 {
 	size_t ndirty = 0;
 	arena_runs_dirty_link_t *rdelm;
-	extent_node_t *chunkselm;
+	extent_t *chunkselm;
 
 	for (rdelm = qr_next(&arena->runs_dirty, rd_link),
 	    chunkselm = qr_next(&arena->chunks_cache, cc_link);
@@ -1432,7 +1432,7 @@ arena_dirty_count(arena_t *arena)
 		size_t npages;
 
 		if (rdelm == &chunkselm->rd) {
-			npages = extent_node_size_get(chunkselm) >> LG_PAGE;
+			npages = extent_size_get(chunkselm) >> LG_PAGE;
 			chunkselm = qr_next(chunkselm, cc_link);
 		} else {
 			arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
@@ -1456,10 +1456,10 @@ arena_dirty_count(arena_t *arena)
 static size_t
 arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     size_t ndirty_limit, arena_runs_dirty_link_t *purge_runs_sentinel,
-    extent_node_t *purge_chunks_sentinel)
+    extent_t *purge_chunks_sentinel)
 {
 	arena_runs_dirty_link_t *rdelm, *rdelm_next;
-	extent_node_t *chunkselm;
+	extent_t *chunkselm;
 	size_t nstashed = 0;
 
 	/* Stash runs/chunks according to ndirty_limit. */
@@ -1470,11 +1470,11 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		rdelm_next = qr_next(rdelm, rd_link);
 
 		if (rdelm == &chunkselm->rd) {
-			extent_node_t *chunkselm_next;
+			extent_t *chunkselm_next;
 			bool zero;
 			UNUSED void *chunk;
 
-			npages = extent_node_size_get(chunkselm) >> LG_PAGE;
+			npages = extent_size_get(chunkselm) >> LG_PAGE;
 			if (opt_purge == purge_mode_decay && arena->ndirty -
 			    (nstashed + npages) < ndirty_limit)
 				break;
@@ -1482,18 +1482,18 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			chunkselm_next = qr_next(chunkselm, cc_link);
 			/*
 			 * Allocate.  chunkselm remains valid due to the
-			 * dalloc_node=false argument to chunk_alloc_cache().
+			 * dalloc_extent=false argument to chunk_alloc_cache().
 			 */
 			zero = false;
 			chunk = chunk_alloc_cache(tsdn, arena, chunk_hooks,
-			    extent_node_addr_get(chunkselm),
-			    extent_node_size_get(chunkselm), chunksize, &zero,
+			    extent_addr_get(chunkselm),
+			    extent_size_get(chunkselm), chunksize, &zero,
 			    false);
-			assert(chunk == extent_node_addr_get(chunkselm));
-			assert(zero == extent_node_zeroed_get(chunkselm));
-			extent_node_dirty_insert(chunkselm, purge_runs_sentinel,
+			assert(chunk == extent_addr_get(chunkselm));
+			assert(zero == extent_zeroed_get(chunkselm));
+			extent_dirty_insert(chunkselm, purge_runs_sentinel,
 			    purge_chunks_sentinel);
-			assert(npages == (extent_node_size_get(chunkselm) >>
+			assert(npages == (extent_size_get(chunkselm) >>
 			    LG_PAGE));
 			chunkselm = chunkselm_next;
 		} else {
@@ -1546,11 +1546,11 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 static size_t
 arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     arena_runs_dirty_link_t *purge_runs_sentinel,
-    extent_node_t *purge_chunks_sentinel)
+    extent_t *purge_chunks_sentinel)
 {
 	size_t npurged, nmadvise;
 	arena_runs_dirty_link_t *rdelm;
-	extent_node_t *chunkselm;
+	extent_t *chunkselm;
 
 	if (config_stats)
 		nmadvise = 0;
@@ -1571,7 +1571,7 @@ arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			 * decommitted, or purged, depending on chunk
 			 * deallocation policy.
 			 */
-			size_t size = extent_node_size_get(chunkselm);
+			size_t size = extent_size_get(chunkselm);
 			npages = size >> LG_PAGE;
 			chunkselm = qr_next(chunkselm, cc_link);
 		} else {
@@ -1639,10 +1639,10 @@ arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 static void
 arena_unstash_purged(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     arena_runs_dirty_link_t *purge_runs_sentinel,
-    extent_node_t *purge_chunks_sentinel)
+    extent_t *purge_chunks_sentinel)
 {
 	arena_runs_dirty_link_t *rdelm, *rdelm_next;
-	extent_node_t *chunkselm;
+	extent_t *chunkselm;
 
 	/* Deallocate chunks/runs. */
 	for (rdelm = qr_next(purge_runs_sentinel, rd_link),
@@ -1650,14 +1650,13 @@ arena_unstash_purged(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	    rdelm != purge_runs_sentinel; rdelm = rdelm_next) {
 		rdelm_next = qr_next(rdelm, rd_link);
 		if (rdelm == &chunkselm->rd) {
-			extent_node_t *chunkselm_next = qr_next(chunkselm,
-			    cc_link);
-			void *addr = extent_node_addr_get(chunkselm);
-			size_t size = extent_node_size_get(chunkselm);
-			bool zeroed = extent_node_zeroed_get(chunkselm);
-			bool committed = extent_node_committed_get(chunkselm);
-			extent_node_dirty_remove(chunkselm);
-			arena_node_dalloc(tsdn, arena, chunkselm);
+			extent_t *chunkselm_next = qr_next(chunkselm, cc_link);
+			void *addr = extent_addr_get(chunkselm);
+			size_t size = extent_size_get(chunkselm);
+			bool zeroed = extent_zeroed_get(chunkselm);
+			bool committed = extent_committed_get(chunkselm);
+			extent_dirty_remove(chunkselm);
+			arena_extent_dalloc(tsdn, arena, chunkselm);
 			chunkselm = chunkselm_next;
 			chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, addr,
 			    size, zeroed, committed);
@@ -1692,7 +1691,7 @@ arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, size_t ndirty_limit)
 	chunk_hooks_t chunk_hooks = chunk_hooks_get(tsdn, arena);
 	size_t npurge, npurged;
 	arena_runs_dirty_link_t purge_runs_sentinel;
-	extent_node_t purge_chunks_sentinel;
+	extent_t purge_chunks_sentinel;
 
 	arena->purging = true;
 
@@ -1708,7 +1707,7 @@ arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, size_t ndirty_limit)
 	    arena->lg_dirty_mult) < arena->ndirty || ndirty_limit == 0);
 
 	qr_new(&purge_runs_sentinel, rd_link);
-	extent_node_dirty_linkage_init(&purge_chunks_sentinel);
+	extent_dirty_linkage_init(&purge_chunks_sentinel);
 
 	npurge = arena_stash_dirty(tsdn, arena, &chunk_hooks, ndirty_limit,
 	    &purge_runs_sentinel, &purge_chunks_sentinel);
@@ -1783,7 +1782,7 @@ void
 arena_reset(tsd_t *tsd, arena_t *arena)
 {
 	unsigned i;
-	extent_node_t *node;
+	extent_t *extent;
 
 	/*
 	 * Locking in this function is unintuitive.  The caller guarantees that
@@ -1801,9 +1800,9 @@ arena_reset(tsd_t *tsd, arena_t *arena)
 
 	/* Remove large allocations from prof sample set. */
 	if (config_prof && opt_prof) {
-		ql_foreach(node, &arena->achunks, ql_link) {
+		ql_foreach(extent, &arena->achunks, ql_link) {
 			arena_achunk_prof_reset(tsd, arena,
-			    extent_node_addr_get(node));
+			    extent_addr_get(extent));
 		}
 	}
 
@@ -1815,9 +1814,9 @@ arena_reset(tsd_t *tsd, arena_t *arena)
 
 	/* Huge allocations. */
 	malloc_mutex_lock(tsd_tsdn(tsd), &arena->huge_mtx);
-	for (node = ql_last(&arena->huge, ql_link); node != NULL; node =
+	for (extent = ql_last(&arena->huge, ql_link); extent != NULL; extent =
 	    ql_last(&arena->huge, ql_link)) {
-		void *ptr = extent_node_addr_get(node);
+		void *ptr = extent_addr_get(extent);
 		size_t usize;
 
 		malloc_mutex_unlock(tsd_tsdn(tsd), &arena->huge_mtx);
@@ -1854,18 +1853,18 @@ arena_reset(tsd_t *tsd, arena_t *arena)
 	 * chains directly correspond.
 	 */
 	qr_new(&arena->runs_dirty, rd_link);
-	for (node = qr_next(&arena->chunks_cache, cc_link);
-	    node != &arena->chunks_cache; node = qr_next(node, cc_link)) {
-		qr_new(&node->rd, rd_link);
-		qr_meld(&arena->runs_dirty, &node->rd, rd_link);
+	for (extent = qr_next(&arena->chunks_cache, cc_link);
+	    extent != &arena->chunks_cache; extent = qr_next(extent, cc_link)) {
+		qr_new(&extent->rd, rd_link);
+		qr_meld(&arena->runs_dirty, &extent->rd, rd_link);
 	}
 
 	/* Arena chunks. */
-	for (node = ql_last(&arena->achunks, ql_link); node != NULL; node =
-	    ql_last(&arena->achunks, ql_link)) {
-		ql_remove(&arena->achunks, node, ql_link);
+	for (extent = ql_last(&arena->achunks, ql_link); extent != NULL; extent
+	    = ql_last(&arena->achunks, ql_link)) {
+		ql_remove(&arena->achunks, extent, ql_link);
 		arena_chunk_discard(tsd_tsdn(tsd), arena,
-		    extent_node_addr_get(node));
+		    extent_addr_get(extent));
 	}
 
 	/* Spare. */
@@ -2649,8 +2648,8 @@ arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run,
 	if (run == bin->runcur)
 		bin->runcur = NULL;
 	else {
-		szind_t binind = arena_bin_index(extent_node_arena_get(
-		    &chunk->node), bin);
+		szind_t binind = arena_bin_index(extent_arena_get(
+		    &chunk->extent), bin);
 		const arena_bin_info_t *bin_info = &arena_bin_info[binind];
 
 		/*
@@ -3018,7 +3017,7 @@ arena_ralloc_large(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t usize_min,
 	}
 
 	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-	arena = extent_node_arena_get(&chunk->node);
+	arena = extent_arena_get(&chunk->extent);
 
 	if (oldsize < usize_max) {
 		bool ret = arena_ralloc_large_grow(tsdn, arena, chunk, ptr,
@@ -3080,7 +3079,7 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 		}
 
 		chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-		arena_decay_tick(tsdn, extent_node_arena_get(&chunk->node));
+		arena_decay_tick(tsdn, extent_arena_get(&chunk->extent));
 		return (false);
 	} else {
 		return (huge_ralloc_no_move(tsdn, ptr, oldsize, usize_min,
@@ -3404,9 +3403,9 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 	if (malloc_mutex_init(&arena->chunks_mtx, "arena_chunks",
 	    WITNESS_RANK_ARENA_CHUNKS))
 		return (NULL);
-	ql_new(&arena->node_cache);
-	if (malloc_mutex_init(&arena->node_cache_mtx, "arena_node_cache",
-	    WITNESS_RANK_ARENA_NODE_CACHE))
+	ql_new(&arena->extent_cache);
+	if (malloc_mutex_init(&arena->extent_cache_mtx, "arena_extent_cache",
+	    WITNESS_RANK_ARENA_EXTENT_CACHE))
 		return (NULL);
 
 	arena->chunk_hooks = chunk_hooks_default;
@@ -3492,7 +3491,7 @@ void
 arena_prefork2(tsdn_t *tsdn, arena_t *arena)
 {
 
-	malloc_mutex_prefork(tsdn, &arena->node_cache_mtx);
+	malloc_mutex_prefork(tsdn, &arena->extent_cache_mtx);
 }
 
 void
@@ -3513,7 +3512,7 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena)
 	malloc_mutex_postfork_parent(tsdn, &arena->huge_mtx);
 	for (i = 0; i < NBINS; i++)
 		malloc_mutex_postfork_parent(tsdn, &arena->bins[i].lock);
-	malloc_mutex_postfork_parent(tsdn, &arena->node_cache_mtx);
+	malloc_mutex_postfork_parent(tsdn, &arena->extent_cache_mtx);
 	malloc_mutex_postfork_parent(tsdn, &arena->chunks_mtx);
 	malloc_mutex_postfork_parent(tsdn, &arena->lock);
 }
@@ -3526,7 +3525,7 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena)
 	malloc_mutex_postfork_child(tsdn, &arena->huge_mtx);
 	for (i = 0; i < NBINS; i++)
 		malloc_mutex_postfork_child(tsdn, &arena->bins[i].lock);
-	malloc_mutex_postfork_child(tsdn, &arena->node_cache_mtx);
+	malloc_mutex_postfork_child(tsdn, &arena->extent_cache_mtx);
 	malloc_mutex_postfork_child(tsdn, &arena->chunks_mtx);
 	malloc_mutex_postfork_child(tsdn, &arena->lock);
 }
diff --git a/src/base.c b/src/base.c
index 1b0bf697..a9ab279e 100644
--- a/src/base.c
+++ b/src/base.c
@@ -6,59 +6,59 @@
 
 static malloc_mutex_t	base_mtx;
 static extent_tree_t	base_avail_szad;
-static extent_node_t	*base_nodes;
+static extent_t		*base_extents;
 static size_t		base_allocated;
 static size_t		base_resident;
 static size_t		base_mapped;
 
 /******************************************************************************/
 
-static extent_node_t *
-base_node_try_alloc(tsdn_t *tsdn)
+static extent_t *
+base_extent_try_alloc(tsdn_t *tsdn)
 {
-	extent_node_t *node;
+	extent_t *extent;
 
 	malloc_mutex_assert_owner(tsdn, &base_mtx);
 
-	if (base_nodes == NULL)
+	if (base_extents == NULL)
 		return (NULL);
-	node = base_nodes;
-	base_nodes = *(extent_node_t **)node;
-	return (node);
+	extent = base_extents;
+	base_extents = *(extent_t **)extent;
+	return (extent);
 }
 
 static void
-base_node_dalloc(tsdn_t *tsdn, extent_node_t *node)
+base_extent_dalloc(tsdn_t *tsdn, extent_t *extent)
 {
 
 	malloc_mutex_assert_owner(tsdn, &base_mtx);
 
-	*(extent_node_t **)node = base_nodes;
-	base_nodes = node;
+	*(extent_t **)extent = base_extents;
+	base_extents = extent;
 }
 
-static extent_node_t *
+static extent_t *
 base_chunk_alloc(tsdn_t *tsdn, size_t minsize)
 {
-	extent_node_t *node;
+	extent_t *extent;
 	size_t csize, nsize;
 	void *addr;
 
 	malloc_mutex_assert_owner(tsdn, &base_mtx);
 	assert(minsize != 0);
-	node = base_node_try_alloc(tsdn);
-	/* Allocate enough space to also carve a node out if necessary. */
-	nsize = (node == NULL) ? CACHELINE_CEILING(sizeof(extent_node_t)) : 0;
+	extent = base_extent_try_alloc(tsdn);
+	/* Allocate enough space to also carve an extent out if necessary. */
+	nsize = (extent == NULL) ? CACHELINE_CEILING(sizeof(extent_t)) : 0;
 	csize = CHUNK_CEILING(minsize + nsize);
 	addr = chunk_alloc_base(csize);
 	if (addr == NULL) {
-		if (node != NULL)
-			base_node_dalloc(tsdn, node);
+		if (extent != NULL)
+			base_extent_dalloc(tsdn, extent);
 		return (NULL);
 	}
 	base_mapped += csize;
-	if (node == NULL) {
-		node = (extent_node_t *)addr;
+	if (extent == NULL) {
+		extent = (extent_t *)addr;
 		addr = (void *)((uintptr_t)addr + nsize);
 		csize -= nsize;
 		if (config_stats) {
@@ -66,8 +66,8 @@ base_chunk_alloc(tsdn_t *tsdn, size_t minsize)
 			base_resident += PAGE_CEILING(nsize);
 		}
 	}
-	extent_node_init(node, NULL, addr, csize, true, true);
-	return (node);
+	extent_init(extent, NULL, addr, csize, true, true);
+	return (extent);
 }
 
 /*
@@ -80,8 +80,8 @@ base_alloc(tsdn_t *tsdn, size_t size)
 {
 	void *ret;
 	size_t csize, usize;
-	extent_node_t *node;
-	extent_node_t key;
+	extent_t *extent;
+	extent_t key;
 
 	/*
 	 * Round size up to nearest multiple of the cacheline size, so that
@@ -90,28 +90,28 @@ base_alloc(tsdn_t *tsdn, size_t size)
 	csize = CACHELINE_CEILING(size);
 
 	usize = s2u(csize);
-	extent_node_init(&key, NULL, NULL, usize, false, false);
+	extent_init(&key, NULL, NULL, usize, false, false);
 	malloc_mutex_lock(tsdn, &base_mtx);
-	node = extent_tree_szad_nsearch(&base_avail_szad, &key);
-	if (node != NULL) {
+	extent = extent_tree_szad_nsearch(&base_avail_szad, &key);
+	if (extent != NULL) {
 		/* Use existing space. */
-		extent_tree_szad_remove(&base_avail_szad, node);
+		extent_tree_szad_remove(&base_avail_szad, extent);
 	} else {
 		/* Try to allocate more space. */
-		node = base_chunk_alloc(tsdn, csize);
+		extent = base_chunk_alloc(tsdn, csize);
 	}
-	if (node == NULL) {
+	if (extent == NULL) {
 		ret = NULL;
 		goto label_return;
 	}
 
-	ret = extent_node_addr_get(node);
-	if (extent_node_size_get(node) > csize) {
-		extent_node_addr_set(node, (void *)((uintptr_t)ret + csize));
-		extent_node_size_set(node, extent_node_size_get(node) - csize);
-		extent_tree_szad_insert(&base_avail_szad, node);
+	ret = extent_addr_get(extent);
+	if (extent_size_get(extent) > csize) {
+		extent_addr_set(extent, (void *)((uintptr_t)ret + csize));
+		extent_size_set(extent, extent_size_get(extent) - csize);
+		extent_tree_szad_insert(&base_avail_szad, extent);
 	} else
-		base_node_dalloc(tsdn, node);
+		base_extent_dalloc(tsdn, extent);
 	if (config_stats) {
 		base_allocated += csize;
 		/*
@@ -147,7 +147,7 @@ base_boot(void)
 	if (malloc_mutex_init(&base_mtx, "base", WITNESS_RANK_BASE))
 		return (true);
 	extent_tree_szad_new(&base_avail_szad);
-	base_nodes = NULL;
+	base_extents = NULL;
 
 	return (false);
 }
diff --git a/src/chunk.c b/src/chunk.c
index 7af7bb91..d3a600a5 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -141,15 +141,15 @@ chunk_hooks_assure_initialized(tsdn_t *tsdn, arena_t *arena,
 }
 
 bool
-chunk_register(tsdn_t *tsdn, const void *chunk, const extent_node_t *node)
+chunk_register(tsdn_t *tsdn, const void *chunk, const extent_t *extent)
 {
 
-	assert(extent_node_addr_get(node) == chunk);
+	assert(extent_addr_get(extent) == chunk);
 
-	if (rtree_set(&chunks_rtree, (uintptr_t)chunk, node))
+	if (rtree_set(&chunks_rtree, (uintptr_t)chunk, extent))
 		return (true);
 	if (config_prof && opt_prof) {
-		size_t size = extent_node_size_get(node);
+		size_t size = extent_size_get(extent);
 		size_t nadd = (size == 0) ? 1 : size / chunksize;
 		size_t cur = atomic_add_z(&curchunks, nadd);
 		size_t high = atomic_read_z(&highchunks);
@@ -168,14 +168,14 @@ chunk_register(tsdn_t *tsdn, const void *chunk, const extent_node_t *node)
 }
 
 void
-chunk_deregister(const void *chunk, const extent_node_t *node)
+chunk_deregister(const void *chunk, const extent_t *extent)
 {
 	bool err;
 
 	err = rtree_set(&chunks_rtree, (uintptr_t)chunk, NULL);
 	assert(!err);
 	if (config_prof && opt_prof) {
-		size_t size = extent_node_size_get(node);
+		size_t size = extent_size_get(extent);
 		size_t nsub = (size == 0) ? 1 : size / chunksize;
 		assert(atomic_read_z(&curchunks) >= nsub);
 		atomic_sub_z(&curchunks, nsub);
@@ -186,15 +186,15 @@ chunk_deregister(const void *chunk, const extent_node_t *node)
  * Do first-best-fit chunk selection, i.e. select the lowest chunk that best
  * fits.
  */
-static extent_node_t *
+static extent_t *
 chunk_first_best_fit(arena_t *arena, extent_tree_t *chunks_szad,
     extent_tree_t *chunks_ad, size_t size)
 {
-	extent_node_t key;
+	extent_t key;
 
 	assert(size == CHUNK_CEILING(size));
 
-	extent_node_init(&key, arena, NULL, size, false, false);
+	extent_init(&key, arena, NULL, size, false, false);
 	return (extent_tree_szad_nsearch(chunks_szad, &key));
 }
 
@@ -202,20 +202,20 @@ static void *
 chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, bool cache,
     void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit,
-    bool dalloc_node)
+    bool dalloc_extent)
 {
 	void *ret;
-	extent_node_t *node;
+	extent_t *extent;
 	size_t alloc_size, leadsize, trailsize;
 	bool zeroed, committed;
 
 	assert(new_addr == NULL || alignment == chunksize);
 	/*
-	 * Cached chunks use the node linkage embedded in their headers, in
-	 * which case dalloc_node is true, and new_addr is non-NULL because
+	 * Cached chunks use the extent linkage embedded in their headers, in
+	 * which case dalloc_extent is true, and new_addr is non-NULL because
 	 * we're operating on a specific chunk.
 	 */
-	assert(dalloc_node || new_addr != NULL);
+	assert(dalloc_extent || new_addr != NULL);
 
 	alloc_size = CHUNK_CEILING(s2u(size + alignment - chunksize));
 	/* Beware size_t wrap-around. */
@@ -224,56 +224,55 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	malloc_mutex_lock(tsdn, &arena->chunks_mtx);
 	chunk_hooks_assure_initialized_locked(tsdn, arena, chunk_hooks);
 	if (new_addr != NULL) {
-		extent_node_t key;
-		extent_node_init(&key, arena, new_addr, alloc_size, false,
-		    false);
-		node = extent_tree_ad_search(chunks_ad, &key);
+		extent_t key;
+		extent_init(&key, arena, new_addr, alloc_size, false, false);
+		extent = extent_tree_ad_search(chunks_ad, &key);
 	} else {
-		node = chunk_first_best_fit(arena, chunks_szad, chunks_ad,
+		extent = chunk_first_best_fit(arena, chunks_szad, chunks_ad,
 		    alloc_size);
 	}
-	if (node == NULL || (new_addr != NULL && extent_node_size_get(node) <
+	if (extent == NULL || (new_addr != NULL && extent_size_get(extent) <
 	    size)) {
 		malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 		return (NULL);
 	}
-	leadsize = ALIGNMENT_CEILING((uintptr_t)extent_node_addr_get(node),
-	    alignment) - (uintptr_t)extent_node_addr_get(node);
+	leadsize = ALIGNMENT_CEILING((uintptr_t)extent_addr_get(extent),
+	    alignment) - (uintptr_t)extent_addr_get(extent);
 	assert(new_addr == NULL || leadsize == 0);
-	assert(extent_node_size_get(node) >= leadsize + size);
-	trailsize = extent_node_size_get(node) - leadsize - size;
-	ret = (void *)((uintptr_t)extent_node_addr_get(node) + leadsize);
-	zeroed = extent_node_zeroed_get(node);
+	assert(extent_size_get(extent) >= leadsize + size);
+	trailsize = extent_size_get(extent) - leadsize - size;
+	ret = (void *)((uintptr_t)extent_addr_get(extent) + leadsize);
+	zeroed = extent_zeroed_get(extent);
 	if (zeroed)
 		*zero = true;
-	committed = extent_node_committed_get(node);
+	committed = extent_committed_get(extent);
 	if (committed)
 		*commit = true;
 	/* Split the lead. */
 	if (leadsize != 0 &&
-	    chunk_hooks->split(extent_node_addr_get(node),
-	    extent_node_size_get(node), leadsize, size, false, arena->ind)) {
+	    chunk_hooks->split(extent_addr_get(extent),
+	    extent_size_get(extent), leadsize, size, false, arena->ind)) {
 		malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 		return (NULL);
 	}
-	/* Remove node from the tree. */
-	extent_tree_szad_remove(chunks_szad, node);
-	extent_tree_ad_remove(chunks_ad, node);
-	arena_chunk_cache_maybe_remove(arena, node, cache);
+	/* Remove extent from the tree. */
+	extent_tree_szad_remove(chunks_szad, extent);
+	extent_tree_ad_remove(chunks_ad, extent);
+	arena_chunk_cache_maybe_remove(arena, extent, cache);
 	if (leadsize != 0) {
 		/* Insert the leading space as a smaller chunk. */
-		extent_node_size_set(node, leadsize);
-		extent_tree_szad_insert(chunks_szad, node);
-		extent_tree_ad_insert(chunks_ad, node);
-		arena_chunk_cache_maybe_insert(arena, node, cache);
-		node = NULL;
+		extent_size_set(extent, leadsize);
+		extent_tree_szad_insert(chunks_szad, extent);
+		extent_tree_ad_insert(chunks_ad, extent);
+		arena_chunk_cache_maybe_insert(arena, extent, cache);
+		extent = NULL;
 	}
 	if (trailsize != 0) {
 		/* Split the trail. */
 		if (chunk_hooks->split(ret, size + trailsize, size,
 		    trailsize, false, arena->ind)) {
-			if (dalloc_node && node != NULL)
-				arena_node_dalloc(tsdn, arena, node);
+			if (dalloc_extent && extent != NULL)
+				arena_extent_dalloc(tsdn, arena, extent);
 			malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 			chunk_record(tsdn, arena, chunk_hooks, chunks_szad,
 			    chunks_ad, cache, ret, size + trailsize, zeroed,
@@ -281,9 +280,9 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			return (NULL);
 		}
 		/* Insert the trailing space as a smaller chunk. */
-		if (node == NULL) {
-			node = arena_node_alloc(tsdn, arena);
-			if (node == NULL) {
+		if (extent == NULL) {
+			extent = arena_extent_alloc(tsdn, arena);
+			if (extent == NULL) {
 				malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 				chunk_record(tsdn, arena, chunk_hooks,
 				    chunks_szad, chunks_ad, cache, ret, size +
@@ -291,12 +290,12 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 				return (NULL);
 			}
 		}
-		extent_node_init(node, arena, (void *)((uintptr_t)(ret) + size),
+		extent_init(extent, arena, (void *)((uintptr_t)(ret) + size),
 		    trailsize, zeroed, committed);
-		extent_tree_szad_insert(chunks_szad, node);
-		extent_tree_ad_insert(chunks_ad, node);
-		arena_chunk_cache_maybe_insert(arena, node, cache);
-		node = NULL;
+		extent_tree_szad_insert(chunks_szad, extent);
+		extent_tree_ad_insert(chunks_ad, extent);
+		arena_chunk_cache_maybe_insert(arena, extent, cache);
+		extent = NULL;
 	}
 	if (!committed && chunk_hooks->commit(ret, size, 0, size, arena->ind)) {
 		malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
@@ -306,9 +305,9 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	}
 	malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 
-	assert(dalloc_node || node != NULL);
-	if (dalloc_node && node != NULL)
-		arena_node_dalloc(tsdn, arena, node);
+	assert(dalloc_extent || extent != NULL);
+	if (dalloc_extent && extent != NULL)
+		arena_extent_dalloc(tsdn, arena, extent);
 	if (*zero) {
 		if (!zeroed)
 			memset(ret, 0, size);
@@ -381,7 +380,8 @@ chunk_alloc_base(size_t size)
 
 void *
 chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *new_addr, size_t size, size_t alignment, bool *zero, bool dalloc_node)
+    void *new_addr, size_t size, size_t alignment, bool *zero,
+    bool dalloc_extent)
 {
 	void *ret;
 	bool commit;
@@ -394,7 +394,7 @@ chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	commit = true;
 	ret = chunk_recycle(tsdn, arena, chunk_hooks,
 	    &arena->chunks_szad_cached, &arena->chunks_ad_cached, true,
-	    new_addr, size, alignment, zero, &commit, dalloc_node);
+	    new_addr, size, alignment, zero, &commit, dalloc_extent);
 	if (ret == NULL)
 		return (NULL);
 	assert(commit);
@@ -480,40 +480,39 @@ chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     void *chunk, size_t size, bool zeroed, bool committed)
 {
 	bool unzeroed;
-	extent_node_t *node, *prev;
-	extent_node_t key;
+	extent_t *extent, *prev;
+	extent_t key;
 
 	assert(!cache || !zeroed);
 	unzeroed = cache || !zeroed;
 
 	malloc_mutex_lock(tsdn, &arena->chunks_mtx);
 	chunk_hooks_assure_initialized_locked(tsdn, arena, chunk_hooks);
-	extent_node_init(&key, arena, (void *)((uintptr_t)chunk + size), 0,
-	    false, false);
-	node = extent_tree_ad_nsearch(chunks_ad, &key);
+	extent_init(&key, arena, (void *)((uintptr_t)chunk + size), 0, false,
+	    false);
+	extent = extent_tree_ad_nsearch(chunks_ad, &key);
 	/* Try to coalesce forward. */
-	if (node != NULL && extent_node_addr_get(node) ==
-	    extent_node_addr_get(&key) && extent_node_committed_get(node) ==
-	    committed && !chunk_hooks->merge(chunk, size,
-	    extent_node_addr_get(node), extent_node_size_get(node), false,
-	    arena->ind)) {
+	if (extent != NULL && extent_addr_get(extent) == extent_addr_get(&key)
+	    && extent_committed_get(extent) == committed &&
+	    !chunk_hooks->merge(chunk, size, extent_addr_get(extent),
+	    extent_size_get(extent), false, arena->ind)) {
 		/*
 		 * Coalesce chunk with the following address range.  This does
 		 * not change the position within chunks_ad, so only
 		 * remove/insert from/into chunks_szad.
 		 */
-		extent_tree_szad_remove(chunks_szad, node);
-		arena_chunk_cache_maybe_remove(arena, node, cache);
-		extent_node_addr_set(node, chunk);
-		extent_node_size_set(node, size + extent_node_size_get(node));
-		extent_node_zeroed_set(node, extent_node_zeroed_get(node) &&
+		extent_tree_szad_remove(chunks_szad, extent);
+		arena_chunk_cache_maybe_remove(arena, extent, cache);
+		extent_addr_set(extent, chunk);
+		extent_size_set(extent, size + extent_size_get(extent));
+		extent_zeroed_set(extent, extent_zeroed_get(extent) &&
 		    !unzeroed);
-		extent_tree_szad_insert(chunks_szad, node);
-		arena_chunk_cache_maybe_insert(arena, node, cache);
+		extent_tree_szad_insert(chunks_szad, extent);
+		arena_chunk_cache_maybe_insert(arena, extent, cache);
 	} else {
-		/* Coalescing forward failed, so insert a new node. */
-		node = arena_node_alloc(tsdn, arena);
-		if (node == NULL) {
+		/* Coalescing forward failed, so insert a new extent. */
+		extent = arena_extent_alloc(tsdn, arena);
+		if (extent == NULL) {
 			/*
 			 * Node allocation failed, which is an exceedingly
 			 * unlikely failure.  Leak chunk after making sure its
@@ -526,39 +525,38 @@ chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			}
 			goto label_return;
 		}
-		extent_node_init(node, arena, chunk, size, !unzeroed,
+		extent_init(extent, arena, chunk, size, !unzeroed,
 		    committed);
-		extent_tree_ad_insert(chunks_ad, node);
-		extent_tree_szad_insert(chunks_szad, node);
-		arena_chunk_cache_maybe_insert(arena, node, cache);
+		extent_tree_ad_insert(chunks_ad, extent);
+		extent_tree_szad_insert(chunks_szad, extent);
+		arena_chunk_cache_maybe_insert(arena, extent, cache);
 	}
 
 	/* Try to coalesce backward. */
-	prev = extent_tree_ad_prev(chunks_ad, node);
-	if (prev != NULL && (void *)((uintptr_t)extent_node_addr_get(prev) +
-	    extent_node_size_get(prev)) == chunk &&
-	    extent_node_committed_get(prev) == committed &&
-	    !chunk_hooks->merge(extent_node_addr_get(prev),
-	    extent_node_size_get(prev), chunk, size, false, arena->ind)) {
+	prev = extent_tree_ad_prev(chunks_ad, extent);
+	if (prev != NULL && (void *)((uintptr_t)extent_addr_get(prev) +
+	    extent_size_get(prev)) == chunk && extent_committed_get(prev) ==
+	    committed && !chunk_hooks->merge(extent_addr_get(prev),
+	    extent_size_get(prev), chunk, size, false, arena->ind)) {
 		/*
 		 * Coalesce chunk with the previous address range.  This does
 		 * not change the position within chunks_ad, so only
-		 * remove/insert node from/into chunks_szad.
+		 * remove/insert extent from/into chunks_szad.
 		 */
 		extent_tree_szad_remove(chunks_szad, prev);
 		extent_tree_ad_remove(chunks_ad, prev);
 		arena_chunk_cache_maybe_remove(arena, prev, cache);
-		extent_tree_szad_remove(chunks_szad, node);
-		arena_chunk_cache_maybe_remove(arena, node, cache);
-		extent_node_addr_set(node, extent_node_addr_get(prev));
-		extent_node_size_set(node, extent_node_size_get(prev) +
-		    extent_node_size_get(node));
-		extent_node_zeroed_set(node, extent_node_zeroed_get(prev) &&
-		    extent_node_zeroed_get(node));
-		extent_tree_szad_insert(chunks_szad, node);
-		arena_chunk_cache_maybe_insert(arena, node, cache);
+		extent_tree_szad_remove(chunks_szad, extent);
+		arena_chunk_cache_maybe_remove(arena, extent, cache);
+		extent_addr_set(extent, extent_addr_get(prev));
+		extent_size_set(extent, extent_size_get(prev) +
+		    extent_size_get(extent));
+		extent_zeroed_set(extent, extent_zeroed_get(prev) &&
+		    extent_zeroed_get(extent));
+		extent_tree_szad_insert(chunks_szad, extent);
+		arena_chunk_cache_maybe_insert(arena, extent, cache);
 
-		arena_node_dalloc(tsdn, arena, prev);
+		arena_extent_dalloc(tsdn, arena, prev);
 	}
 
 label_return:
diff --git a/src/extent.c b/src/extent.c
index 9f5146e5..8d24d6d6 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -15,11 +15,11 @@ extent_quantize(size_t size)
 }
 
 JEMALLOC_INLINE_C int
-extent_szad_comp(const extent_node_t *a, const extent_node_t *b)
+extent_szad_comp(const extent_t *a, const extent_t *b)
 {
 	int ret;
-	size_t a_qsize = extent_quantize(extent_node_size_get(a));
-	size_t b_qsize = extent_quantize(extent_node_size_get(b));
+	size_t a_qsize = extent_quantize(extent_size_get(a));
+	size_t b_qsize = extent_quantize(extent_size_get(b));
 
 	/*
 	 * Compare based on quantized size rather than size, in order to sort
@@ -27,8 +27,8 @@ extent_szad_comp(const extent_node_t *a, const extent_node_t *b)
 	 */
 	ret = (a_qsize > b_qsize) - (a_qsize < b_qsize);
 	if (ret == 0) {
-		uintptr_t a_addr = (uintptr_t)extent_node_addr_get(a);
-		uintptr_t b_addr = (uintptr_t)extent_node_addr_get(b);
+		uintptr_t a_addr = (uintptr_t)extent_addr_get(a);
+		uintptr_t b_addr = (uintptr_t)extent_addr_get(b);
 
 		ret = (a_addr > b_addr) - (a_addr < b_addr);
 	}
@@ -37,17 +37,17 @@ extent_szad_comp(const extent_node_t *a, const extent_node_t *b)
 }
 
 /* Generate red-black tree functions. */
-rb_gen(, extent_tree_szad_, extent_tree_t, extent_node_t, szad_link,
+rb_gen(, extent_tree_szad_, extent_tree_t, extent_t, szad_link,
     extent_szad_comp)
 
 JEMALLOC_INLINE_C int
-extent_ad_comp(const extent_node_t *a, const extent_node_t *b)
+extent_ad_comp(const extent_t *a, const extent_t *b)
 {
-	uintptr_t a_addr = (uintptr_t)extent_node_addr_get(a);
-	uintptr_t b_addr = (uintptr_t)extent_node_addr_get(b);
+	uintptr_t a_addr = (uintptr_t)extent_addr_get(a);
+	uintptr_t b_addr = (uintptr_t)extent_addr_get(b);
 
 	return ((a_addr > b_addr) - (a_addr < b_addr));
 }
 
 /* Generate red-black tree functions. */
-rb_gen(, extent_tree_ad_, extent_tree_t, extent_node_t, ad_link, extent_ad_comp)
+rb_gen(, extent_tree_ad_, extent_tree_t, extent_t, ad_link, extent_ad_comp)
diff --git a/src/huge.c b/src/huge.c
index b1ff918a..c30e78de 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -3,40 +3,40 @@
 
 /******************************************************************************/
 
-static extent_node_t *
-huge_node_get(const void *ptr)
+static extent_t *
+huge_extent_get(const void *ptr)
 {
-	extent_node_t *node;
+	extent_t *extent;
 
-	node = chunk_lookup(ptr, true);
-	assert(!extent_node_achunk_get(node));
+	extent = chunk_lookup(ptr, true);
+	assert(!extent_achunk_get(extent));
 
-	return (node);
+	return (extent);
 }
 
 static bool
-huge_node_set(tsdn_t *tsdn, const void *ptr, extent_node_t *node)
+huge_extent_set(tsdn_t *tsdn, const void *ptr, extent_t *extent)
 {
 
-	assert(extent_node_addr_get(node) == ptr);
-	assert(!extent_node_achunk_get(node));
-	return (chunk_register(tsdn, ptr, node));
+	assert(extent_addr_get(extent) == ptr);
+	assert(!extent_achunk_get(extent));
+	return (chunk_register(tsdn, ptr, extent));
 }
 
 static void
-huge_node_reset(tsdn_t *tsdn, const void *ptr, extent_node_t *node)
+huge_extent_reset(tsdn_t *tsdn, const void *ptr, extent_t *extent)
 {
 	bool err;
 
-	err = huge_node_set(tsdn, ptr, node);
+	err = huge_extent_set(tsdn, ptr, extent);
 	assert(!err);
 }
 
 static void
-huge_node_unset(const void *ptr, const extent_node_t *node)
+huge_extent_unset(const void *ptr, const extent_t *extent)
 {
 
-	chunk_deregister(ptr, node);
+	chunk_deregister(ptr, extent);
 }
 
 void *
@@ -54,7 +54,7 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 {
 	void *ret;
 	size_t ausize;
-	extent_node_t *node;
+	extent_t *extent;
 	bool is_zeroed;
 
 	/* Allocate one or more contiguous chunks for this request. */
@@ -66,10 +66,10 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 		return (NULL);
 	assert(ausize >= chunksize);
 
-	/* Allocate an extent node with which to track the chunk. */
-	node = ipallocztm(tsdn, CACHELINE_CEILING(sizeof(extent_node_t)),
+	/* Allocate an extent with which to track the chunk. */
+	extent = ipallocztm(tsdn, CACHELINE_CEILING(sizeof(extent_t)),
 	    CACHELINE, false, NULL, true, arena_ichoose(tsdn, arena));
-	if (node == NULL)
+	if (extent == NULL)
 		return (NULL);
 
 	/*
@@ -81,22 +81,22 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 		arena = arena_choose(tsdn_tsd(tsdn), arena);
 	if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(tsdn,
 	    arena, usize, alignment, &is_zeroed)) == NULL) {
-		idalloctm(tsdn, node, NULL, true, true);
+		idalloctm(tsdn, extent, NULL, true, true);
 		return (NULL);
 	}
 
-	extent_node_init(node, arena, ret, usize, is_zeroed, true);
+	extent_init(extent, arena, ret, usize, is_zeroed, true);
 
-	if (huge_node_set(tsdn, ret, node)) {
+	if (huge_extent_set(tsdn, ret, extent)) {
 		arena_chunk_dalloc_huge(tsdn, arena, ret, usize);
-		idalloctm(tsdn, node, NULL, true, true);
+		idalloctm(tsdn, extent, NULL, true, true);
 		return (NULL);
 	}
 
-	/* Insert node into huge. */
+	/* Insert extent into huge. */
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
-	ql_elm_new(node, ql_link);
-	ql_tail_insert(&arena->huge, node, ql_link);
+	ql_elm_new(extent, ql_link);
+	ql_tail_insert(&arena->huge, extent, ql_link);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	if (zero || (config_fill && unlikely(opt_zero))) {
@@ -137,7 +137,7 @@ huge_ralloc_no_move_similar(tsdn_t *tsdn, void *ptr, size_t oldsize,
     size_t usize_min, size_t usize_max, bool zero)
 {
 	size_t usize, usize_next;
-	extent_node_t *node;
+	extent_t *extent;
 	arena_t *arena;
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 	bool pre_zeroed, post_zeroed;
@@ -150,9 +150,9 @@ huge_ralloc_no_move_similar(tsdn_t *tsdn, void *ptr, size_t oldsize,
 	if (oldsize == usize)
 		return;
 
-	node = huge_node_get(ptr);
-	arena = extent_node_arena_get(node);
-	pre_zeroed = extent_node_zeroed_get(node);
+	extent = huge_extent_get(ptr);
+	arena = extent_arena_get(extent);
+	pre_zeroed = extent_zeroed_get(extent);
 
 	/* Fill if necessary (shrinking). */
 	if (oldsize > usize) {
@@ -171,12 +171,12 @@ huge_ralloc_no_move_similar(tsdn_t *tsdn, void *ptr, size_t oldsize,
 
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	/* Update the size of the huge allocation. */
-	huge_node_unset(ptr, node);
-	assert(extent_node_size_get(node) != usize);
-	extent_node_size_set(node, usize);
-	huge_node_reset(tsdn, ptr, node);
+	assert(extent_size_get(extent) != usize);
+	huge_extent_unset(ptr, extent);
+	extent_size_set(extent, usize);
+	huge_extent_reset(tsdn, ptr, extent);
 	/* Update zeroed. */
-	extent_node_zeroed_set(node, post_zeroed);
+	extent_zeroed_set(extent, post_zeroed);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	arena_chunk_ralloc_huge_similar(tsdn, arena, ptr, oldsize, usize);
@@ -199,15 +199,15 @@ static bool
 huge_ralloc_no_move_shrink(tsdn_t *tsdn, void *ptr, size_t oldsize,
     size_t usize)
 {
-	extent_node_t *node;
+	extent_t *extent;
 	arena_t *arena;
 	chunk_hooks_t chunk_hooks;
 	size_t cdiff;
 	bool pre_zeroed, post_zeroed;
 
-	node = huge_node_get(ptr);
-	arena = extent_node_arena_get(node);
-	pre_zeroed = extent_node_zeroed_get(node);
+	extent = huge_extent_get(ptr);
+	arena = extent_arena_get(extent);
+	pre_zeroed = extent_zeroed_get(extent);
 	chunk_hooks = chunk_hooks_get(tsdn, arena);
 
 	assert(oldsize > usize);
@@ -235,11 +235,11 @@ huge_ralloc_no_move_shrink(tsdn_t *tsdn, void *ptr, size_t oldsize,
 
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	/* Update the size of the huge allocation. */
-	huge_node_unset(ptr, node);
-	extent_node_size_set(node, usize);
-	huge_node_reset(tsdn, ptr, node);
+	huge_extent_unset(ptr, extent);
+	extent_size_set(extent, usize);
+	huge_extent_reset(tsdn, ptr, extent);
 	/* Update zeroed. */
-	extent_node_zeroed_set(node, post_zeroed);
+	extent_zeroed_set(extent, post_zeroed);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	/* Zap the excess chunks. */
@@ -250,15 +250,16 @@ huge_ralloc_no_move_shrink(tsdn_t *tsdn, void *ptr, size_t oldsize,
 
 static bool
 huge_ralloc_no_move_expand(tsdn_t *tsdn, void *ptr, size_t oldsize,
-    size_t usize, bool zero) {
-	extent_node_t *node;
+    size_t usize, bool zero)
+{
+	extent_t *extent;
 	arena_t *arena;
 	bool is_zeroed_subchunk, is_zeroed_chunk;
 
-	node = huge_node_get(ptr);
-	arena = extent_node_arena_get(node);
+	extent = huge_extent_get(ptr);
+	arena = extent_arena_get(extent);
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
-	is_zeroed_subchunk = extent_node_zeroed_get(node);
+	is_zeroed_subchunk = extent_zeroed_get(extent);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	/*
@@ -273,9 +274,9 @@ huge_ralloc_no_move_expand(tsdn_t *tsdn, void *ptr, size_t oldsize,
 
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	/* Update the size of the huge allocation. */
-	huge_node_unset(ptr, node);
-	extent_node_size_set(node, usize);
-	huge_node_reset(tsdn, ptr, node);
+	huge_extent_unset(ptr, extent);
+	extent_size_set(extent, usize);
+	huge_extent_reset(tsdn, ptr, extent);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	if (zero || (config_fill && unlikely(opt_zero))) {
@@ -390,21 +391,21 @@ huge_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
 void
 huge_dalloc(tsdn_t *tsdn, void *ptr)
 {
-	extent_node_t *node;
+	extent_t *extent;
 	arena_t *arena;
 
-	node = huge_node_get(ptr);
-	arena = extent_node_arena_get(node);
-	huge_node_unset(ptr, node);
+	extent = huge_extent_get(ptr);
+	arena = extent_arena_get(extent);
+	huge_extent_unset(ptr, extent);
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
-	ql_remove(&arena->huge, node, ql_link);
+	ql_remove(&arena->huge, extent, ql_link);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
-	huge_dalloc_junk(tsdn, extent_node_addr_get(node),
-	    extent_node_size_get(node));
-	arena_chunk_dalloc_huge(tsdn, extent_node_arena_get(node),
-	    extent_node_addr_get(node), extent_node_size_get(node));
-	idalloctm(tsdn, node, NULL, true, true);
+	huge_dalloc_junk(tsdn, extent_addr_get(extent),
+	    extent_size_get(extent));
+	arena_chunk_dalloc_huge(tsdn, extent_arena_get(extent),
+	    extent_addr_get(extent), extent_size_get(extent));
+	idalloctm(tsdn, extent, NULL, true, true);
 
 	arena_decay_tick(tsdn, arena);
 }
@@ -413,20 +414,20 @@ arena_t *
 huge_aalloc(const void *ptr)
 {
 
-	return (extent_node_arena_get(huge_node_get(ptr)));
+	return (extent_arena_get(huge_extent_get(ptr)));
 }
 
 size_t
 huge_salloc(tsdn_t *tsdn, const void *ptr)
 {
 	size_t size;
-	extent_node_t *node;
+	extent_t *extent;
 	arena_t *arena;
 
-	node = huge_node_get(ptr);
-	arena = extent_node_arena_get(node);
+	extent = huge_extent_get(ptr);
+	arena = extent_arena_get(extent);
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
-	size = extent_node_size_get(node);
+	size = extent_size_get(extent);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	return (size);
@@ -436,13 +437,13 @@ prof_tctx_t *
 huge_prof_tctx_get(tsdn_t *tsdn, const void *ptr)
 {
 	prof_tctx_t *tctx;
-	extent_node_t *node;
+	extent_t *extent;
 	arena_t *arena;
 
-	node = huge_node_get(ptr);
-	arena = extent_node_arena_get(node);
+	extent = huge_extent_get(ptr);
+	arena = extent_arena_get(extent);
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
-	tctx = extent_node_prof_tctx_get(node);
+	tctx = extent_prof_tctx_get(extent);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	return (tctx);
@@ -451,13 +452,13 @@ huge_prof_tctx_get(tsdn_t *tsdn, const void *ptr)
 void
 huge_prof_tctx_set(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx)
 {
-	extent_node_t *node;
+	extent_t *extent;
 	arena_t *arena;
 
-	node = huge_node_get(ptr);
-	arena = extent_node_arena_get(node);
+	extent = huge_extent_get(ptr);
+	arena = extent_arena_get(extent);
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
-	extent_node_prof_tctx_set(node, tctx);
+	extent_prof_tctx_set(extent, tctx);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 }
 
diff --git a/src/tcache.c b/src/tcache.c
index 175759c7..c4a99006 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -103,7 +103,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 		/* Lock the arena bin associated with the first object. */
 		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
 		    *(tbin->avail - 1));
-		arena_t *bin_arena = extent_node_arena_get(&chunk->node);
+		arena_t *bin_arena = extent_arena_get(&chunk->extent);
 		arena_bin_t *bin = &bin_arena->bins[binind];
 
 		if (config_prof && bin_arena == arena) {
@@ -126,7 +126,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 			ptr = *(tbin->avail - 1 - i);
 			assert(ptr != NULL);
 			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-			if (extent_node_arena_get(&chunk->node) == bin_arena) {
+			if (extent_arena_get(&chunk->extent) == bin_arena) {
 				size_t pageind = ((uintptr_t)ptr -
 				    (uintptr_t)chunk) >> LG_PAGE;
 				arena_chunk_map_bits_t *bitselm =
@@ -185,7 +185,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 		/* Lock the arena associated with the first object. */
 		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
 		    *(tbin->avail - 1));
-		arena_t *locked_arena = extent_node_arena_get(&chunk->node);
+		arena_t *locked_arena = extent_arena_get(&chunk->extent);
 		UNUSED bool idump;
 
 		if (config_prof)
@@ -211,8 +211,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 			ptr = *(tbin->avail - 1 - i);
 			assert(ptr != NULL);
 			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-			if (extent_node_arena_get(&chunk->node) ==
-			    locked_arena) {
+			if (extent_arena_get(&chunk->extent) == locked_arena) {
 				arena_dalloc_large_junked_locked(tsd_tsdn(tsd),
 				    locked_arena, chunk, ptr);
 			} else {
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index b54b3e86..30b1c541 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -32,21 +32,22 @@ TEST_END
 TEST_BEGIN(test_rtree_extrema)
 {
 	unsigned i;
-	extent_node_t node_a, node_b;
+	extent_t extent_a, extent_b;
 
 	for (i = 1; i <= (sizeof(uintptr_t) << 3); i++) {
 		rtree_t rtree;
 		assert_false(rtree_new(&rtree, i, node_alloc, node_dalloc),
 		    "Unexpected rtree_new() failure");
 
-		assert_false(rtree_set(&rtree, 0, &node_a),
+		assert_false(rtree_set(&rtree, 0, &extent_a),
 		    "Unexpected rtree_set() failure");
-		assert_ptr_eq(rtree_get(&rtree, 0, true), &node_a,
+		assert_ptr_eq(rtree_get(&rtree, 0, true), &extent_a,
 		    "rtree_get() should return previously set value");
 
-		assert_false(rtree_set(&rtree, ~((uintptr_t)0), &node_b),
+		assert_false(rtree_set(&rtree, ~((uintptr_t)0), &extent_b),
 		    "Unexpected rtree_set() failure");
-		assert_ptr_eq(rtree_get(&rtree, ~((uintptr_t)0), true), &node_b,
+		assert_ptr_eq(rtree_get(&rtree, ~((uintptr_t)0), true),
+		    &extent_b,
 		    "rtree_get() should return previously set value");
 
 		rtree_delete(&rtree);
@@ -61,18 +62,18 @@ TEST_BEGIN(test_rtree_bits)
 	for (i = 1; i < (sizeof(uintptr_t) << 3); i++) {
 		uintptr_t keys[] = {0, 1,
 		    (((uintptr_t)1) << (sizeof(uintptr_t)*8-i)) - 1};
-		extent_node_t node;
+		extent_t extent;
 		rtree_t rtree;
 
 		assert_false(rtree_new(&rtree, i, node_alloc, node_dalloc),
 		    "Unexpected rtree_new() failure");
 
 		for (j = 0; j < sizeof(keys)/sizeof(uintptr_t); j++) {
-			assert_false(rtree_set(&rtree, keys[j], &node),
+			assert_false(rtree_set(&rtree, keys[j], &extent),
 			    "Unexpected rtree_set() failure");
 			for (k = 0; k < sizeof(keys)/sizeof(uintptr_t); k++) {
 				assert_ptr_eq(rtree_get(&rtree, keys[k], true),
-				    &node, "rtree_get() should return "
+				    &extent, "rtree_get() should return "
 				    "previously set value and ignore "
 				    "insignificant key bits; i=%u, j=%u, k=%u, "
 				    "set key=%#"FMTxPTR", get key=%#"FMTxPTR, i,
@@ -101,7 +102,7 @@ TEST_BEGIN(test_rtree_random)
 	sfmt = init_gen_rand(SEED);
 	for (i = 1; i <= (sizeof(uintptr_t) << 3); i++) {
 		uintptr_t keys[NSET];
-		extent_node_t node;
+		extent_t extent;
 		unsigned j;
 		rtree_t rtree;
 
@@ -110,13 +111,13 @@ TEST_BEGIN(test_rtree_random)
 
 		for (j = 0; j < NSET; j++) {
 			keys[j] = (uintptr_t)gen_rand64(sfmt);
-			assert_false(rtree_set(&rtree, keys[j], &node),
+			assert_false(rtree_set(&rtree, keys[j], &extent),
 			    "Unexpected rtree_set() failure");
-			assert_ptr_eq(rtree_get(&rtree, keys[j], true), &node,
+			assert_ptr_eq(rtree_get(&rtree, keys[j], true), &extent,
 			    "rtree_get() should return previously set value");
 		}
 		for (j = 0; j < NSET; j++) {
-			assert_ptr_eq(rtree_get(&rtree, keys[j], true), &node,
+			assert_ptr_eq(rtree_get(&rtree, keys[j], true), &extent,
 			    "rtree_get() should return previously set value");
 		}
 

From f4a58847d3de70b359e57b57b59f4825afdb58c6 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 30 May 2016 10:45:38 -0700
Subject: [PATCH 0258/2608] Remove obsolete reference to Valgrind and
 quarantine.

---
 doc/jemalloc.xml.in | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index eddc88c1..e3c97bd8 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1517,9 +1517,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         of the arena's discarded/cached allocations may accessed afterward.  As
         part of this requirement, all thread caches which were used to
         allocate/deallocate in conjunction with the arena must be flushed
-        beforehand.  This interface cannot be used if running inside Valgrind,
-        nor if the <link linkend="opt.quarantine">quarantine</link> size is
-        non-zero.</para></listitem>
+        beforehand.</para></listitem>
       </varlistentry>
 
       <varlistentry id="arena.i.dss">

From 2d2b4e98c947f9fcaf4a9fd2215b685057e89212 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 28 Mar 2016 03:06:35 -0700
Subject: [PATCH 0259/2608] Add element acquire/release capabilities to rtree.

This makes it possible to acquire short-term "ownership" of rtree
elements so that it is possible to read an extent pointer *and* read the
extent's contents with a guarantee that the element will not be modified
until the ownership is released.  This is intended as a mechanism for
resolving rtree read/write races rather than as a way to lock extents.
---
 include/jemalloc/internal/chunk.h             |   2 +-
 include/jemalloc/internal/private_symbols.txt |  14 +-
 include/jemalloc/internal/rtree.h             | 233 ++++++++++++------
 src/chunk.c                                   |  12 +-
 src/rtree.c                                   |  23 +-
 test/unit/rtree.c                             | 153 +++++++++---
 6 files changed, 302 insertions(+), 135 deletions(-)

diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index 4666a649..9e5502ac 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -87,7 +87,7 @@ JEMALLOC_INLINE extent_t *
 chunk_lookup(const void *ptr, bool dependent)
 {
 
-	return (rtree_get(&chunks_rtree, (uintptr_t)ptr, dependent));
+	return (rtree_read(&chunks_rtree, (uintptr_t)ptr, dependent));
 }
 #endif
 
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 61b29b9d..478bc2ab 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -457,18 +457,24 @@ register_zone
 rtree_child_read
 rtree_child_read_hard
 rtree_child_tryread
+rtree_clear
 rtree_delete
-rtree_get
 rtree_new
 rtree_node_valid
-rtree_set
+rtree_elm_acquire
+rtree_elm_lookup
+rtree_elm_read
+rtree_elm_read_acquired
+rtree_elm_release
+rtree_elm_write
+rtree_elm_write_acquired
+rtree_read
 rtree_start_level
 rtree_subkey
 rtree_subtree_read
 rtree_subtree_read_hard
 rtree_subtree_tryread
-rtree_val_read
-rtree_val_write
+rtree_write
 run_quantize_ceil
 run_quantize_floor
 s2u
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 45e49b74..59a7ab3c 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -6,7 +6,7 @@
  */
 #ifdef JEMALLOC_H_TYPES
 
-typedef struct rtree_node_elm_s rtree_node_elm_t;
+typedef struct rtree_elm_s rtree_elm_t;
 typedef struct rtree_level_s rtree_level_t;
 typedef struct rtree_s rtree_t;
 
@@ -21,25 +21,24 @@ typedef struct rtree_s rtree_t;
     ((1U << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL)
 
 /* Used for two-stage lock-free node initialization. */
-#define	RTREE_NODE_INITIALIZING	((rtree_node_elm_t *)0x1)
+#define	RTREE_NODE_INITIALIZING	((rtree_elm_t *)0x1)
 
 /*
  * The node allocation callback function's argument is the number of contiguous
- * rtree_node_elm_t structures to allocate, and the resulting memory must be
- * zeroed.
+ * rtree_elm_t structures to allocate, and the resulting memory must be zeroed.
  */
-typedef rtree_node_elm_t *(rtree_node_alloc_t)(size_t);
-typedef void (rtree_node_dalloc_t)(rtree_node_elm_t *);
+typedef rtree_elm_t *(rtree_node_alloc_t)(size_t);
+typedef void (rtree_node_dalloc_t)(rtree_elm_t *);
 
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
 
-struct rtree_node_elm_s {
+struct rtree_elm_s {
 	union {
-		void			*pun;
-		rtree_node_elm_t	*child;
-		extent_t		*val;
+		void		*pun;
+		rtree_elm_t	*child;
+		extent_t	*extent;
 	};
 };
 
@@ -60,15 +59,15 @@ struct rtree_level_s {
 	 *
 	 *   levels[1] : [<unused> | 0x00000001**** | 0x00000002**** | ... ]
 	 *
-	 *   levels[2] : [val(0x000000000000) | val(0x000000000001) | ...]
+	 *   levels[2] : [extent(0x000000000000) | extent(0x000000000001) | ...]
 	 *
 	 * This has practical implications on x64, which currently uses only the
 	 * lower 47 bits of virtual address space in userland, thus leaving
 	 * subtrees[0] unused and avoiding a level of tree traversal.
 	 */
 	union {
-		void			*subtree_pun;
-		rtree_node_elm_t	*subtree;
+		void		*subtree_pun;
+		rtree_elm_t	*subtree;
 	};
 	/* Number of key bits distinguished by this level. */
 	unsigned		bits;
@@ -98,10 +97,9 @@ struct rtree_s {
 bool rtree_new(rtree_t *rtree, unsigned bits, rtree_node_alloc_t *alloc,
     rtree_node_dalloc_t *dalloc);
 void	rtree_delete(rtree_t *rtree);
-rtree_node_elm_t	*rtree_subtree_read_hard(rtree_t *rtree,
+rtree_elm_t	*rtree_subtree_read_hard(rtree_t *rtree, unsigned level);
+rtree_elm_t	*rtree_child_read_hard(rtree_t *rtree, rtree_elm_t *elm,
     unsigned level);
-rtree_node_elm_t	*rtree_child_read_hard(rtree_t *rtree,
-    rtree_node_elm_t *elm, unsigned level);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
@@ -111,22 +109,27 @@ rtree_node_elm_t	*rtree_child_read_hard(rtree_t *rtree,
 unsigned	rtree_start_level(rtree_t *rtree, uintptr_t key);
 uintptr_t	rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level);
 
-bool	rtree_node_valid(rtree_node_elm_t *node);
-rtree_node_elm_t	*rtree_child_tryread(rtree_node_elm_t *elm,
-    bool dependent);
-rtree_node_elm_t	*rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm,
+bool	rtree_node_valid(rtree_elm_t *node);
+rtree_elm_t	*rtree_child_tryread(rtree_elm_t *elm, bool dependent);
+rtree_elm_t	*rtree_child_read(rtree_t *rtree, rtree_elm_t *elm,
     unsigned level, bool dependent);
-extent_t	*rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm,
+extent_t	*rtree_elm_read(rtree_elm_t *elm, bool dependent);
+void	rtree_elm_write(rtree_elm_t *elm, const extent_t *extent);
+rtree_elm_t	*rtree_subtree_tryread(rtree_t *rtree, unsigned level,
     bool dependent);
-void	rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm,
-    const extent_t *val);
-rtree_node_elm_t	*rtree_subtree_tryread(rtree_t *rtree, unsigned level,
-    bool dependent);
-rtree_node_elm_t	*rtree_subtree_read(rtree_t *rtree, unsigned level,
+rtree_elm_t	*rtree_subtree_read(rtree_t *rtree, unsigned level,
     bool dependent);
+rtree_elm_t	*rtree_elm_lookup(rtree_t *rtree, uintptr_t key,
+    bool dependent, bool init_missing);
 
-extent_t	*rtree_get(rtree_t *rtree, uintptr_t key, bool dependent);
-bool	rtree_set(rtree_t *rtree, uintptr_t key, const extent_t *val);
+bool	rtree_write(rtree_t *rtree, uintptr_t key, const extent_t *extent);
+extent_t	*rtree_read(rtree_t *rtree, uintptr_t key, bool dependent);
+rtree_elm_t	*rtree_elm_acquire(rtree_t *rtree, uintptr_t key,
+    bool dependent, bool init_missing);
+extent_t	*rtree_elm_read_acquired(rtree_elm_t *elm);
+void	rtree_elm_write_acquired(rtree_elm_t *elm, const extent_t *extent);
+void	rtree_elm_release(rtree_elm_t *elm);
+void	rtree_clear(rtree_t *rtree, uintptr_t key);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_))
@@ -154,18 +157,18 @@ rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level)
 }
 
 JEMALLOC_ALWAYS_INLINE bool
-rtree_node_valid(rtree_node_elm_t *node)
+rtree_node_valid(rtree_elm_t *node)
 {
 
 	return ((uintptr_t)node > (uintptr_t)RTREE_NODE_INITIALIZING);
 }
 
-JEMALLOC_ALWAYS_INLINE rtree_node_elm_t *
-rtree_child_tryread(rtree_node_elm_t *elm, bool dependent)
+JEMALLOC_ALWAYS_INLINE rtree_elm_t *
+rtree_child_tryread(rtree_elm_t *elm, bool dependent)
 {
-	rtree_node_elm_t *child;
+	rtree_elm_t *child;
 
-	/* Double-checked read (first read may be stale. */
+	/* Double-checked read (first read may be stale). */
 	child = elm->child;
 	if (!dependent && !rtree_node_valid(child))
 		child = atomic_read_p(&elm->pun);
@@ -173,11 +176,11 @@ rtree_child_tryread(rtree_node_elm_t *elm, bool dependent)
 	return (child);
 }
 
-JEMALLOC_ALWAYS_INLINE rtree_node_elm_t *
-rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level,
+JEMALLOC_ALWAYS_INLINE rtree_elm_t *
+rtree_child_read(rtree_t *rtree, rtree_elm_t *elm, unsigned level,
     bool dependent)
 {
-	rtree_node_elm_t *child;
+	rtree_elm_t *child;
 
 	child = rtree_child_tryread(elm, dependent);
 	if (!dependent && unlikely(!rtree_node_valid(child)))
@@ -187,40 +190,46 @@ rtree_child_read(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level,
 }
 
 JEMALLOC_ALWAYS_INLINE extent_t *
-rtree_val_read(rtree_t *rtree, rtree_node_elm_t *elm, bool dependent)
+rtree_elm_read(rtree_elm_t *elm, bool dependent)
 {
+	extent_t *extent;
 
 	if (dependent) {
 		/*
-		 * Reading a val on behalf of a pointer to a valid allocation is
-		 * guaranteed to be a clean read even without synchronization,
-		 * because the rtree update became visible in memory before the
-		 * pointer came into existence.
+		 * Reading a value on behalf of a pointer to a valid allocation
+		 * is guaranteed to be a clean read even without
+		 * synchronization, because the rtree update became visible in
+		 * memory before the pointer came into existence.
 		 */
-		return (elm->val);
+		extent = elm->extent;
 	} else {
 		/*
 		 * An arbitrary read, e.g. on behalf of ivsalloc(), may not be
 		 * dependent on a previous rtree write, which means a stale read
 		 * could result if synchronization were omitted here.
 		 */
-		return (atomic_read_p(&elm->pun));
+		extent = (extent_t *)atomic_read_p(&elm->pun);
 	}
+
+	/* Mask the lock bit. */
+	extent = (extent_t *)((uintptr_t)extent & ~((uintptr_t)0x1));
+
+	return (extent);
 }
 
 JEMALLOC_INLINE void
-rtree_val_write(rtree_t *rtree, rtree_node_elm_t *elm, const extent_t *val)
+rtree_elm_write(rtree_elm_t *elm, const extent_t *extent)
 {
 
-	atomic_write_p(&elm->pun, val);
+	atomic_write_p(&elm->pun, extent);
 }
 
-JEMALLOC_ALWAYS_INLINE rtree_node_elm_t *
+JEMALLOC_ALWAYS_INLINE rtree_elm_t *
 rtree_subtree_tryread(rtree_t *rtree, unsigned level, bool dependent)
 {
-	rtree_node_elm_t *subtree;
+	rtree_elm_t *subtree;
 
-	/* Double-checked read (first read may be stale. */
+	/* Double-checked read (first read may be stale). */
 	subtree = rtree->levels[level].subtree;
 	if (!dependent && unlikely(!rtree_node_valid(subtree)))
 		subtree = atomic_read_p(&rtree->levels[level].subtree_pun);
@@ -228,10 +237,10 @@ rtree_subtree_tryread(rtree_t *rtree, unsigned level, bool dependent)
 	return (subtree);
 }
 
-JEMALLOC_ALWAYS_INLINE rtree_node_elm_t *
+JEMALLOC_ALWAYS_INLINE rtree_elm_t *
 rtree_subtree_read(rtree_t *rtree, unsigned level, bool dependent)
 {
-	rtree_node_elm_t *subtree;
+	rtree_elm_t *subtree;
 
 	subtree = rtree_subtree_tryread(rtree, level, dependent);
 	if (!dependent && unlikely(!rtree_node_valid(subtree)))
@@ -240,16 +249,20 @@ rtree_subtree_read(rtree_t *rtree, unsigned level, bool dependent)
 	return (subtree);
 }
 
-JEMALLOC_ALWAYS_INLINE extent_t *
-rtree_get(rtree_t *rtree, uintptr_t key, bool dependent)
+JEMALLOC_ALWAYS_INLINE rtree_elm_t *
+rtree_elm_lookup(rtree_t *rtree, uintptr_t key, bool dependent,
+    bool init_missing)
 {
 	uintptr_t subkey;
 	unsigned start_level;
-	rtree_node_elm_t *node;
+	rtree_elm_t *node;
+
+	assert(!dependent || !init_missing);
 
 	start_level = rtree_start_level(rtree, key);
 
-	node = rtree_subtree_tryread(rtree, start_level, dependent);
+	node = init_missing ? rtree_subtree_read(rtree, start_level, dependent)
+	    : rtree_subtree_tryread(rtree, start_level, dependent);
 #define	RTREE_GET_BIAS	(RTREE_HEIGHT_MAX - rtree->height)
 	switch (start_level + RTREE_GET_BIAS) {
 #define	RTREE_GET_SUBTREE(level)					\
@@ -259,7 +272,9 @@ rtree_get(rtree_t *rtree, uintptr_t key, bool dependent)
 			return (NULL);					\
 		subkey = rtree_subkey(rtree, key, level -		\
 		    RTREE_GET_BIAS);					\
-		node = rtree_child_tryread(&node[subkey], dependent);	\
+		node = init_missing ? rtree_child_read(rtree,		\
+		    &node[subkey], level - RTREE_GET_BIAS, dependent) :	\
+		    rtree_child_tryread(&node[subkey], dependent);	\
 		/* Fall through. */
 #define	RTREE_GET_LEAF(level)						\
 	case level:							\
@@ -272,8 +287,7 @@ rtree_get(rtree_t *rtree, uintptr_t key, bool dependent)
 		 * node is a leaf, so it contains values rather than	\
 		 * child pointers.					\
 		 */							\
-		return (rtree_val_read(rtree, &node[subkey],		\
-		    dependent));
+		return (&node[subkey]);
 #if RTREE_HEIGHT_MAX > 1
 	RTREE_GET_SUBTREE(0)
 #endif
@@ -332,33 +346,94 @@ rtree_get(rtree_t *rtree, uintptr_t key, bool dependent)
 }
 
 JEMALLOC_INLINE bool
-rtree_set(rtree_t *rtree, uintptr_t key, const extent_t *val)
+rtree_write(rtree_t *rtree, uintptr_t key, const extent_t *extent)
 {
-	uintptr_t subkey;
-	unsigned i, start_level;
-	rtree_node_elm_t *node, *child;
+	rtree_elm_t *elm;
 
-	start_level = rtree_start_level(rtree, key);
+	assert(extent != NULL); /* Use rtree_clear() for this case. */
+	assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0);
 
-	node = rtree_subtree_read(rtree, start_level, false);
-	if (node == NULL)
+	elm = rtree_elm_lookup(rtree, key, false, true);
+	if (elm == NULL)
 		return (true);
-	for (i = start_level; /**/; i++, node = child) {
-		subkey = rtree_subkey(rtree, key, i);
-		if (i == rtree->height - 1) {
-			/*
-			 * node is a leaf, so it contains values rather than
-			 * child pointers.
-			 */
-			rtree_val_write(rtree, &node[subkey], val);
-			return (false);
-		}
-		assert(i + 1 < rtree->height);
-		child = rtree_child_read(rtree, &node[subkey], i, false);
-		if (child == NULL)
-			return (true);
+	assert(rtree_elm_read(elm, false) == NULL);
+	rtree_elm_write(elm, extent);
+
+	return (false);
+}
+
+JEMALLOC_ALWAYS_INLINE extent_t *
+rtree_read(rtree_t *rtree, uintptr_t key, bool dependent)
+{
+	rtree_elm_t *elm;
+
+	elm = rtree_elm_lookup(rtree, key, dependent, false);
+	if (elm == NULL)
+		return (NULL);
+
+	return (rtree_elm_read(elm, dependent));
+}
+
+JEMALLOC_INLINE rtree_elm_t *
+rtree_elm_acquire(rtree_t *rtree, uintptr_t key, bool dependent,
+    bool init_missing)
+{
+	rtree_elm_t *elm;
+
+	elm = rtree_elm_lookup(rtree, key, dependent, init_missing);
+	if (!dependent && elm == NULL)
+		return (NULL);
+	{
+		extent_t *extent;
+		void *s;
+
+		do {
+			extent = rtree_elm_read(elm, false);
+			/* The least significant bit serves as a lock. */
+			s = (void *)((uintptr_t)extent | (uintptr_t)0x1);
+		} while (atomic_cas_p(&elm->pun, (void *)extent, s));
 	}
-	not_reached();
+
+	return (elm);
+}
+
+JEMALLOC_INLINE extent_t *
+rtree_elm_read_acquired(rtree_elm_t *elm)
+{
+	extent_t *extent;
+
+	assert(((uintptr_t)elm->pun & (uintptr_t)0x1) == (uintptr_t)0x1);
+	extent = (extent_t *)((uintptr_t)elm->pun & ~((uintptr_t)0x1));
+	assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0);
+
+	return (extent);
+}
+
+JEMALLOC_INLINE void
+rtree_elm_write_acquired(rtree_elm_t *elm, const extent_t *extent)
+{
+
+	assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0);
+	assert(((uintptr_t)elm->pun & (uintptr_t)0x1) == (uintptr_t)0x1);
+	elm->pun = (void *)((uintptr_t)extent | (uintptr_t)0x1);
+	assert(rtree_elm_read_acquired(elm) == extent);
+}
+
+JEMALLOC_INLINE void
+rtree_elm_release(rtree_elm_t *elm)
+{
+
+	rtree_elm_write(elm, rtree_elm_read_acquired(elm));
+}
+
+JEMALLOC_INLINE void
+rtree_clear(rtree_t *rtree, uintptr_t key)
+{
+	rtree_elm_t *elm;
+
+	elm = rtree_elm_acquire(rtree, key, true, false);
+	rtree_elm_write_acquired(elm, NULL);
+	rtree_elm_release(elm);
 }
 #endif
 
diff --git a/src/chunk.c b/src/chunk.c
index d3a600a5..31b86456 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -146,7 +146,7 @@ chunk_register(tsdn_t *tsdn, const void *chunk, const extent_t *extent)
 
 	assert(extent_addr_get(extent) == chunk);
 
-	if (rtree_set(&chunks_rtree, (uintptr_t)chunk, extent))
+	if (rtree_write(&chunks_rtree, (uintptr_t)chunk, extent))
 		return (true);
 	if (config_prof && opt_prof) {
 		size_t size = extent_size_get(extent);
@@ -170,10 +170,8 @@ chunk_register(tsdn_t *tsdn, const void *chunk, const extent_t *extent)
 void
 chunk_deregister(const void *chunk, const extent_t *extent)
 {
-	bool err;
 
-	err = rtree_set(&chunks_rtree, (uintptr_t)chunk, NULL);
-	assert(!err);
+	rtree_clear(&chunks_rtree, (uintptr_t)chunk);
 	if (config_prof && opt_prof) {
 		size_t size = extent_size_get(extent);
 		size_t nsub = (size == 0) ? 1 : size / chunksize;
@@ -684,12 +682,12 @@ chunk_merge_default(void *chunk_a, size_t size_a, void *chunk_b, size_t size_b,
 	return (false);
 }
 
-static rtree_node_elm_t *
+static rtree_elm_t *
 chunks_rtree_node_alloc(size_t nelms)
 {
 
-	return ((rtree_node_elm_t *)base_alloc(tsdn_fetch(), nelms *
-	    sizeof(rtree_node_elm_t)));
+	return ((rtree_elm_t *)base_alloc(tsdn_fetch(), nelms *
+	    sizeof(rtree_elm_t)));
 }
 
 bool
diff --git a/src/rtree.c b/src/rtree.c
index 3166b45f..71c69c41 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -8,7 +8,10 @@ hmin(unsigned ha, unsigned hb)
 	return (ha < hb ? ha : hb);
 }
 
-/* Only the most significant bits of keys passed to rtree_[gs]et() are used. */
+/*
+ * Only the most significant bits of keys passed to rtree_{read,write}() are
+ * used.
+ */
 bool
 rtree_new(rtree_t *rtree, unsigned bits, rtree_node_alloc_t *alloc,
     rtree_node_dalloc_t *dalloc)
@@ -62,7 +65,7 @@ rtree_new(rtree_t *rtree, unsigned bits, rtree_node_alloc_t *alloc,
 }
 
 static void
-rtree_delete_subtree(rtree_t *rtree, rtree_node_elm_t *node, unsigned level)
+rtree_delete_subtree(rtree_t *rtree, rtree_elm_t *node, unsigned level)
 {
 
 	if (level + 1 < rtree->height) {
@@ -70,7 +73,7 @@ rtree_delete_subtree(rtree_t *rtree, rtree_node_elm_t *node, unsigned level)
 
 		nchildren = ZU(1) << rtree->levels[level].bits;
 		for (i = 0; i < nchildren; i++) {
-			rtree_node_elm_t *child = node[i].child;
+			rtree_elm_t *child = node[i].child;
 			if (child != NULL)
 				rtree_delete_subtree(rtree, child, level + 1);
 		}
@@ -84,16 +87,16 @@ rtree_delete(rtree_t *rtree)
 	unsigned i;
 
 	for (i = 0; i < rtree->height; i++) {
-		rtree_node_elm_t *subtree = rtree->levels[i].subtree;
+		rtree_elm_t *subtree = rtree->levels[i].subtree;
 		if (subtree != NULL)
 			rtree_delete_subtree(rtree, subtree, i);
 	}
 }
 
-static rtree_node_elm_t *
-rtree_node_init(rtree_t *rtree, unsigned level, rtree_node_elm_t **elmp)
+static rtree_elm_t *
+rtree_node_init(rtree_t *rtree, unsigned level, rtree_elm_t **elmp)
 {
-	rtree_node_elm_t *node;
+	rtree_elm_t *node;
 
 	if (atomic_cas_p((void **)elmp, NULL, RTREE_NODE_INITIALIZING)) {
 		/*
@@ -114,15 +117,15 @@ rtree_node_init(rtree_t *rtree, unsigned level, rtree_node_elm_t **elmp)
 	return (node);
 }
 
-rtree_node_elm_t *
+rtree_elm_t *
 rtree_subtree_read_hard(rtree_t *rtree, unsigned level)
 {
 
 	return (rtree_node_init(rtree, level, &rtree->levels[level].subtree));
 }
 
-rtree_node_elm_t *
-rtree_child_read_hard(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level)
+rtree_elm_t *
+rtree_child_read_hard(rtree_t *rtree, rtree_elm_t *elm, unsigned level)
 {
 
 	return (rtree_node_init(rtree, level, &elm->child));
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 30b1c541..671e2c8a 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -1,20 +1,24 @@
 #include "test/jemalloc_test.h"
 
-static rtree_node_elm_t *
+static rtree_elm_t *
 node_alloc(size_t nelms)
 {
+	rtree_elm_t *node;
 
-	return ((rtree_node_elm_t *)calloc(nelms, sizeof(rtree_node_elm_t)));
+	node = (rtree_elm_t *)calloc(nelms, sizeof(rtree_elm_t));
+	assert_ptr_not_null(node, "Unexpected calloc() failure");
+
+	return (node);
 }
 
 static void
-node_dalloc(rtree_node_elm_t *node)
+node_dalloc(rtree_elm_t *node)
 {
 
 	free(node);
 }
 
-TEST_BEGIN(test_rtree_get_empty)
+TEST_BEGIN(test_rtree_read_empty)
 {
 	unsigned i;
 
@@ -22,13 +26,89 @@ TEST_BEGIN(test_rtree_get_empty)
 		rtree_t rtree;
 		assert_false(rtree_new(&rtree, i, node_alloc, node_dalloc),
 		    "Unexpected rtree_new() failure");
-		assert_ptr_null(rtree_get(&rtree, 0, false),
-		    "rtree_get() should return NULL for empty tree");
+		assert_ptr_null(rtree_read(&rtree, 0, false),
+		    "rtree_read() should return NULL for empty tree");
 		rtree_delete(&rtree);
 	}
 }
 TEST_END
 
+#define	NTHREADS	8
+#define	MAX_NBITS	18
+#define	NITERS		1000
+#define	SEED		42
+
+typedef struct {
+	unsigned	nbits;
+	rtree_t		rtree;
+	uint32_t	seed;
+} thd_start_arg_t;
+
+static void *
+thd_start(void *varg)
+{
+	thd_start_arg_t *arg = (thd_start_arg_t *)varg;
+	sfmt_t	*sfmt;
+	extent_t *extent;
+	unsigned i;
+
+	sfmt = init_gen_rand(arg->seed);
+	extent = (extent_t *)malloc(sizeof(extent));
+	assert_ptr_not_null(extent, "Unexpected malloc() failure");
+
+	for (i = 0; i < NITERS; i++) {
+		uintptr_t key = (uintptr_t)gen_rand64(sfmt);
+		if (i % 2 == 0) {
+			rtree_elm_t *elm;
+
+			elm = rtree_elm_acquire(&arg->rtree, key, false, true);
+			assert_ptr_not_null(elm,
+			    "Unexpected rtree_elm_acquire() failure");
+			rtree_elm_write_acquired(elm, extent);
+			rtree_elm_release(elm);
+
+			elm = rtree_elm_acquire(&arg->rtree, key, true, false);
+			assert_ptr_not_null(elm,
+			    "Unexpected rtree_elm_acquire() failure");
+			rtree_elm_read_acquired(elm);
+			rtree_elm_release(elm);
+		} else
+			rtree_read(&arg->rtree, key, false);
+	}
+
+	free(extent);
+	fini_gen_rand(sfmt);
+	return (NULL);
+}
+
+TEST_BEGIN(test_rtree_concurrent)
+{
+	thd_start_arg_t arg;
+	thd_t thds[NTHREADS];
+	sfmt_t *sfmt;
+	unsigned i, j;
+
+	sfmt = init_gen_rand(SEED);
+	for (i = 1; i < MAX_NBITS; i++) {
+		arg.nbits = i;
+		assert_false(rtree_new(&arg.rtree, arg.nbits, node_alloc,
+		    node_dalloc), "Unexpected rtree_new() failure");
+		arg.seed = gen_rand32(sfmt);
+		for (j = 0; j < NTHREADS; j++)
+			thd_create(&thds[j], thd_start, (void *)&arg);
+		for (j = 0; j < NTHREADS; j++)
+			thd_join(thds[j], NULL);
+		rtree_delete(&arg.rtree);
+	}
+	fini_gen_rand(sfmt);
+}
+TEST_END
+
+#undef NTHREADS
+#undef MAX_NBITS
+#undef NITERS
+#undef SEED
+
 TEST_BEGIN(test_rtree_extrema)
 {
 	unsigned i;
@@ -39,16 +119,16 @@ TEST_BEGIN(test_rtree_extrema)
 		assert_false(rtree_new(&rtree, i, node_alloc, node_dalloc),
 		    "Unexpected rtree_new() failure");
 
-		assert_false(rtree_set(&rtree, 0, &extent_a),
-		    "Unexpected rtree_set() failure");
-		assert_ptr_eq(rtree_get(&rtree, 0, true), &extent_a,
-		    "rtree_get() should return previously set value");
+		assert_false(rtree_write(&rtree, 0, &extent_a),
+		    "Unexpected rtree_write() failure, i=%u", i);
+		assert_ptr_eq(rtree_read(&rtree, 0, true), &extent_a,
+		    "rtree_read() should return previously set value, i=%u", i);
 
-		assert_false(rtree_set(&rtree, ~((uintptr_t)0), &extent_b),
-		    "Unexpected rtree_set() failure");
-		assert_ptr_eq(rtree_get(&rtree, ~((uintptr_t)0), true),
+		assert_false(rtree_write(&rtree, ~((uintptr_t)0), &extent_b),
+		    "Unexpected rtree_write() failure, i=%u", i);
+		assert_ptr_eq(rtree_read(&rtree, ~((uintptr_t)0), true),
 		    &extent_b,
-		    "rtree_get() should return previously set value");
+		    "rtree_read() should return previously set value, i=%u", i);
 
 		rtree_delete(&rtree);
 	}
@@ -69,22 +149,21 @@ TEST_BEGIN(test_rtree_bits)
 		    "Unexpected rtree_new() failure");
 
 		for (j = 0; j < sizeof(keys)/sizeof(uintptr_t); j++) {
-			assert_false(rtree_set(&rtree, keys[j], &extent),
-			    "Unexpected rtree_set() failure");
+			assert_false(rtree_write(&rtree, keys[j], &extent),
+			    "Unexpected rtree_write() failure");
 			for (k = 0; k < sizeof(keys)/sizeof(uintptr_t); k++) {
-				assert_ptr_eq(rtree_get(&rtree, keys[k], true),
-				    &extent, "rtree_get() should return "
+				assert_ptr_eq(rtree_read(&rtree, keys[k], true),
+				    &extent, "rtree_read() should return "
 				    "previously set value and ignore "
 				    "insignificant key bits; i=%u, j=%u, k=%u, "
 				    "set key=%#"FMTxPTR", get key=%#"FMTxPTR, i,
 				    j, k, keys[j], keys[k]);
 			}
-			assert_ptr_null(rtree_get(&rtree,
+			assert_ptr_null(rtree_read(&rtree,
 			    (((uintptr_t)1) << (sizeof(uintptr_t)*8-i)), false),
 			    "Only leftmost rtree leaf should be set; "
 			    "i=%u, j=%u", i, j);
-			assert_false(rtree_set(&rtree, keys[j], NULL),
-			    "Unexpected rtree_set() failure");
+			rtree_clear(&rtree, keys[j]);
 		}
 
 		rtree_delete(&rtree);
@@ -105,31 +184,36 @@ TEST_BEGIN(test_rtree_random)
 		extent_t extent;
 		unsigned j;
 		rtree_t rtree;
+		rtree_elm_t *elm;
 
 		assert_false(rtree_new(&rtree, i, node_alloc, node_dalloc),
 		    "Unexpected rtree_new() failure");
 
 		for (j = 0; j < NSET; j++) {
 			keys[j] = (uintptr_t)gen_rand64(sfmt);
-			assert_false(rtree_set(&rtree, keys[j], &extent),
-			    "Unexpected rtree_set() failure");
-			assert_ptr_eq(rtree_get(&rtree, keys[j], true), &extent,
-			    "rtree_get() should return previously set value");
+			elm = rtree_elm_acquire(&rtree, keys[j], false, true);
+			assert_ptr_not_null(elm,
+			    "Unexpected rtree_elm_acquire() failure");
+			rtree_elm_write_acquired(elm, &extent);
+			rtree_elm_release(elm);
+			assert_ptr_eq(rtree_read(&rtree, keys[j], true),
+			    &extent,
+			    "rtree_read() should return previously set value");
 		}
 		for (j = 0; j < NSET; j++) {
-			assert_ptr_eq(rtree_get(&rtree, keys[j], true), &extent,
-			    "rtree_get() should return previously set value");
+			assert_ptr_eq(rtree_read(&rtree, keys[j], true),
+			    &extent, "rtree_read() should return previously "
+			    "set value, j=%u", j);
 		}
 
 		for (j = 0; j < NSET; j++) {
-			assert_false(rtree_set(&rtree, keys[j], NULL),
-			    "Unexpected rtree_set() failure");
-			assert_ptr_null(rtree_get(&rtree, keys[j], true),
-			    "rtree_get() should return previously set value");
+			rtree_clear(&rtree, keys[j]);
+			assert_ptr_null(rtree_read(&rtree, keys[j], true),
+			    "rtree_read() should return previously set value");
 		}
 		for (j = 0; j < NSET; j++) {
-			assert_ptr_null(rtree_get(&rtree, keys[j], true),
-			    "rtree_get() should return previously set value");
+			assert_ptr_null(rtree_read(&rtree, keys[j], true),
+			    "rtree_read() should return previously set value");
 		}
 
 		rtree_delete(&rtree);
@@ -145,7 +229,8 @@ main(void)
 {
 
 	return (test(
-	    test_rtree_get_empty,
+	    test_rtree_read_empty,
+	    test_rtree_concurrent,
 	    test_rtree_extrema,
 	    test_rtree_bits,
 	    test_rtree_random));

From db72272bef91fa1b4709e89168aede0f01206d55 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 23 Mar 2016 20:29:33 -0700
Subject: [PATCH 0260/2608] Use rtree-based chunk lookups rather than pointer
 bit twiddling.

Look up chunk metadata via the radix tree, rather than using
CHUNK_ADDR2BASE().

Propagate pointer's containing extent.

Minimize extent lookups by doing a single lookup (e.g. in free()) and
propagating the pointer's extent into nearly all the functions that may
need it.
---
 include/jemalloc/internal/arena.h             | 133 +++++-----
 include/jemalloc/internal/chunk.h             |   2 +
 include/jemalloc/internal/huge.h              |  22 +-
 .../jemalloc/internal/jemalloc_internal.h.in  | 124 +++++----
 include/jemalloc/internal/private_symbols.txt |   2 +-
 include/jemalloc/internal/prof.h              |  81 +++---
 include/jemalloc/internal/tcache.h            |   2 +-
 src/arena.c                                   | 245 ++++++++++--------
 src/chunk.c                                   |   9 +
 src/ckh.c                                     |  10 +-
 src/huge.c                                    | 149 ++++-------
 src/jemalloc.c                                | 210 ++++++++-------
 src/prof.c                                    |  32 ++-
 src/tcache.c                                  |  31 ++-
 14 files changed, 548 insertions(+), 504 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 93d0a327..d441aaf5 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -526,13 +526,13 @@ void	*arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
     szind_t ind, bool zero);
 void	*arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache);
-void	arena_prof_promoted(tsdn_t *tsdn, const void *ptr, size_t size);
+void	arena_prof_promoted(tsdn_t *tsdn, const extent_t *extent,
+    const void *ptr, size_t size);
 void	arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena,
-    arena_chunk_t *chunk, void *ptr, arena_chunk_map_bits_t *bitselm);
-void	arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
-    void *ptr, size_t pageind, arena_chunk_map_bits_t *bitselm);
+    arena_chunk_t *chunk, extent_t *extent, void *ptr,
+    arena_chunk_map_bits_t *bitselm);
 void	arena_dalloc_small(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
-    void *ptr, size_t pageind);
+    extent_t *extent, void *ptr, size_t pageind);
 #ifdef JEMALLOC_JET
 typedef void (arena_dalloc_junk_large_t)(void *, size_t);
 extern arena_dalloc_junk_large_t *arena_dalloc_junk_large;
@@ -540,17 +540,17 @@ extern arena_dalloc_junk_large_t *arena_dalloc_junk_large;
 void	arena_dalloc_junk_large(void *ptr, size_t usize);
 #endif
 void	arena_dalloc_large_junked_locked(tsdn_t *tsdn, arena_t *arena,
-    arena_chunk_t *chunk, void *ptr);
+    arena_chunk_t *chunk, extent_t *extent, void *ptr);
 void	arena_dalloc_large(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
-    void *ptr);
+    extent_t *extent, void *ptr);
 #ifdef JEMALLOC_JET
 typedef void (arena_ralloc_junk_large_t)(void *, size_t, size_t);
 extern arena_ralloc_junk_large_t *arena_ralloc_junk_large;
 #endif
-bool	arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize,
-    size_t size, size_t extra, bool zero);
-void	*arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
-    size_t size, size_t alignment, bool zero, tcache_t *tcache);
+bool	arena_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, void *ptr,
+    size_t oldsize, size_t size, size_t extra, bool zero);
+void	*arena_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr,
+    size_t oldsize, size_t size, size_t alignment, bool zero, tcache_t *tcache);
 dss_prec_t	arena_dss_prec_get(tsdn_t *tsdn, arena_t *arena);
 bool	arena_dss_prec_set(tsdn_t *tsdn, arena_t *arena, dss_prec_t dss_prec);
 ssize_t	arena_lg_dirty_mult_default_get(void);
@@ -637,20 +637,23 @@ szind_t	arena_ptr_small_binind_get(const void *ptr, size_t mapbits);
 szind_t	arena_bin_index(arena_t *arena, arena_bin_t *bin);
 size_t	arena_run_regind(arena_run_t *run, const arena_bin_info_t *bin_info,
     const void *ptr);
-prof_tctx_t	*arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr);
-void	arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
-    prof_tctx_t *tctx);
-void	arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize,
-    const void *old_ptr, prof_tctx_t *old_tctx);
+prof_tctx_t	*arena_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent,
+    const void *ptr);
+void	arena_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr,
+    size_t usize, prof_tctx_t *tctx);
+void	arena_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr,
+    size_t usize, const void *old_ptr, prof_tctx_t *old_tctx);
 void	arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks);
 void	arena_decay_tick(tsdn_t *tsdn, arena_t *arena);
 void	*arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
     bool zero, tcache_t *tcache, bool slow_path);
 arena_t	*arena_aalloc(const void *ptr);
-size_t	arena_salloc(tsdn_t *tsdn, const void *ptr, bool demote);
-void	arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path);
-void	arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
-    bool slow_path);
+size_t	arena_salloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr,
+    bool demote);
+void	arena_dalloc(tsdn_t *tsdn, extent_t *extent, void *ptr,
+    tcache_t *tcache, bool slow_path);
+void	arena_sdalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t size,
+    tcache_t *tcache, bool slow_path);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
@@ -1042,7 +1045,9 @@ arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes)
 		return (ret);
 	}
 }
+#  endif /* JEMALLOC_ARENA_INLINE_A */
 
+#  ifdef JEMALLOC_ARENA_INLINE_B
 JEMALLOC_ALWAYS_INLINE szind_t
 arena_ptr_small_binind_get(const void *ptr, size_t mapbits)
 {
@@ -1051,6 +1056,7 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits)
 	binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT;
 
 	if (config_debug) {
+		const extent_t *extent;
 		arena_chunk_t *chunk;
 		arena_t *arena;
 		size_t pageind;
@@ -1065,8 +1071,9 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits)
 
 		assert(binind != BININD_INVALID);
 		assert(binind < NBINS);
-		chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-		arena = extent_arena_get(&chunk->extent);
+		extent = iealloc(ptr);
+		chunk = (arena_chunk_t *)extent_addr_get(extent);
+		arena = extent_arena_get(extent);
 		pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 		actual_mapbits = arena_mapbits_get(chunk, pageind);
 		assert(mapbits == actual_mapbits);
@@ -1088,9 +1095,7 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits)
 
 	return (binind);
 }
-#  endif /* JEMALLOC_ARENA_INLINE_A */
 
-#  ifdef JEMALLOC_ARENA_INLINE_B
 JEMALLOC_INLINE szind_t
 arena_bin_index(arena_t *arena, arena_bin_t *bin)
 {
@@ -1172,16 +1177,15 @@ arena_run_regind(arena_run_t *run, const arena_bin_info_t *bin_info,
 }
 
 JEMALLOC_INLINE prof_tctx_t *
-arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr)
+arena_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, const void *ptr)
 {
 	prof_tctx_t *ret;
-	arena_chunk_t *chunk;
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-	if (likely(chunk != ptr)) {
+	if (likely(extent_achunk_get(extent))) {
+		arena_chunk_t *chunk = (arena_chunk_t *)extent_addr_get(extent);
 		size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 		size_t mapbits = arena_mapbits_get(chunk, pageind);
 		assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
@@ -1193,22 +1197,21 @@ arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr)
 			ret = atomic_read_p(&elm->prof_tctx_pun);
 		}
 	} else
-		ret = huge_prof_tctx_get(tsdn, ptr);
+		ret = huge_prof_tctx_get(tsdn, extent, ptr);
 
 	return (ret);
 }
 
 JEMALLOC_INLINE void
-arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
-    prof_tctx_t *tctx)
+arena_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr,
+    size_t usize, prof_tctx_t *tctx)
 {
-	arena_chunk_t *chunk;
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-	if (likely(chunk != ptr)) {
+	if (likely(extent_achunk_get(extent))) {
+		arena_chunk_t *chunk = (arena_chunk_t *)extent_addr_get(extent);
 		size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 
 		assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
@@ -1231,12 +1234,12 @@ arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
 			assert(arena_mapbits_large_get(chunk, pageind) == 0);
 		}
 	} else
-		huge_prof_tctx_set(tsdn, ptr, tctx);
+		huge_prof_tctx_set(tsdn, extent, ptr, tctx);
 }
 
 JEMALLOC_INLINE void
-arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize,
-    const void *old_ptr, prof_tctx_t *old_tctx)
+arena_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr,
+    size_t usize, const void *old_ptr, prof_tctx_t *old_tctx)
 {
 
 	cassert(config_prof);
@@ -1244,7 +1247,7 @@ arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize,
 
 	if (unlikely(usize > SMALL_MAXCLASS || (ptr == old_ptr &&
 	    (uintptr_t)old_tctx > (uintptr_t)1U))) {
-		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+		arena_chunk_t *chunk = (arena_chunk_t *)extent_addr_get(extent);
 		if (likely(chunk != ptr)) {
 			size_t pageind;
 			arena_chunk_map_misc_t *elm;
@@ -1259,7 +1262,7 @@ arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize,
 			atomic_write_p(&elm->prof_tctx_pun,
 			    (prof_tctx_t *)(uintptr_t)1U);
 		} else
-			huge_prof_tctx_reset(tsdn, ptr);
+			huge_prof_tctx_reset(tsdn, extent, ptr);
 	}
 }
 
@@ -1313,28 +1316,24 @@ arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
 JEMALLOC_ALWAYS_INLINE arena_t *
 arena_aalloc(const void *ptr)
 {
-	arena_chunk_t *chunk;
 
-	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-	if (likely(chunk != ptr))
-		return (extent_arena_get(&chunk->extent));
-	else
-		return (huge_aalloc(ptr));
+	return (extent_arena_get(iealloc(ptr)));
 }
 
 /* Return the size of the allocation pointed to by ptr. */
 JEMALLOC_ALWAYS_INLINE size_t
-arena_salloc(tsdn_t *tsdn, const void *ptr, bool demote)
+arena_salloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr, bool demote)
 {
 	size_t ret;
-	arena_chunk_t *chunk;
 	size_t pageind;
 	szind_t binind;
 
 	assert(ptr != NULL);
 
-	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-	if (likely(chunk != ptr)) {
+	if (likely(extent_achunk_get(extent))) {
+		const arena_chunk_t *chunk =
+		    (const arena_chunk_t *)extent_addr_get(extent);
+
 		pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 		assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
 		binind = arena_mapbits_binind_get(chunk, pageind);
@@ -1367,22 +1366,23 @@ arena_salloc(tsdn_t *tsdn, const void *ptr, bool demote)
 			ret = index2size(binind);
 		}
 	} else
-		ret = huge_salloc(tsdn, ptr);
+		ret = huge_salloc(tsdn, extent, ptr);
 
 	return (ret);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path)
+arena_dalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, tcache_t *tcache,
+    bool slow_path)
 {
-	arena_chunk_t *chunk;
 	size_t pageind, mapbits;
 
 	assert(!tsdn_null(tsdn) || tcache == NULL);
 	assert(ptr != NULL);
 
-	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-	if (likely(chunk != ptr)) {
+	if (likely(extent_achunk_get(extent))) {
+		arena_chunk_t *chunk = (arena_chunk_t *)extent_addr_get(extent);
+
 		pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 		mapbits = arena_mapbits_get(chunk, pageind);
 		assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
@@ -1395,7 +1395,7 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path)
 				    binind, slow_path);
 			} else {
 				arena_dalloc_small(tsdn,
-				    extent_arena_get(&chunk->extent), chunk,
+				    extent_arena_get(extent), chunk, extent,
 				    ptr, pageind);
 			}
 		} else {
@@ -1411,24 +1411,24 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path)
 				    size - large_pad, slow_path);
 			} else {
 				arena_dalloc_large(tsdn,
-				    extent_arena_get(&chunk->extent), chunk,
+				    extent_arena_get(extent), chunk, extent,
 				    ptr);
 			}
 		}
 	} else
-		huge_dalloc(tsdn, ptr);
+		huge_dalloc(tsdn, extent, ptr);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
-    bool slow_path)
+arena_sdalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t size,
+    tcache_t *tcache, bool slow_path)
 {
-	arena_chunk_t *chunk;
 
 	assert(!tsdn_null(tsdn) || tcache == NULL);
 
-	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-	if (likely(chunk != ptr)) {
+	if (likely(extent_achunk_get(extent))) {
+		arena_chunk_t *chunk = (arena_chunk_t *)extent_addr_get(extent);
+
 		if (config_prof && opt_prof) {
 			size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >>
 			    LG_PAGE;
@@ -1443,7 +1443,8 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 				    pageind) - large_pad;
 			}
 		}
-		assert(s2u(size) == s2u(arena_salloc(tsdn, ptr, false)));
+		assert(s2u(size) == s2u(arena_salloc(tsdn, extent, ptr,
+		    false)));
 
 		if (likely(size <= SMALL_MAXCLASS)) {
 			/* Small allocation. */
@@ -1455,7 +1456,7 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 				size_t pageind = ((uintptr_t)ptr -
 				    (uintptr_t)chunk) >> LG_PAGE;
 				arena_dalloc_small(tsdn,
-				    extent_arena_get(&chunk->extent), chunk,
+				    extent_arena_get(extent), chunk, extent,
 				    ptr, pageind);
 			}
 		} else {
@@ -1467,12 +1468,12 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 				    size, slow_path);
 			} else {
 				arena_dalloc_large(tsdn,
-				    extent_arena_get(&chunk->extent), chunk,
+				    extent_arena_get(extent), chunk, extent,
 				    ptr);
 			}
 		}
 	} else
-		huge_dalloc(tsdn, ptr);
+		huge_dalloc(tsdn, extent, ptr);
 }
 #  endif /* JEMALLOC_ARENA_INLINE_B */
 #endif
diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index 9e5502ac..c13f2171 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -54,6 +54,8 @@ chunk_hooks_t	chunk_hooks_set(tsdn_t *tsdn, arena_t *arena,
 
 bool	chunk_register(tsdn_t *tsdn, const void *chunk, const extent_t *extent);
 void	chunk_deregister(const void *chunk, const extent_t *extent);
+void	chunk_reregister(tsdn_t *tsdn, const void *chunk,
+    const extent_t *extent);
 void	*chunk_alloc_base(size_t size);
 void	*chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h
index 8b501e5a..a385a202 100644
--- a/include/jemalloc/internal/huge.h
+++ b/include/jemalloc/internal/huge.h
@@ -12,20 +12,22 @@
 void	*huge_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero);
 void	*huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool zero);
-bool	huge_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize,
-    size_t usize_min, size_t usize_max, bool zero);
-void	*huge_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
-    size_t usize, size_t alignment, bool zero, tcache_t *tcache);
+bool	huge_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, void *ptr,
+    size_t oldsize, size_t usize_min, size_t usize_max, bool zero);
+void	*huge_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr,
+    size_t oldsize, size_t usize, size_t alignment, bool zero,
+    tcache_t *tcache);
 #ifdef JEMALLOC_JET
 typedef void (huge_dalloc_junk_t)(tsdn_t *, void *, size_t);
 extern huge_dalloc_junk_t *huge_dalloc_junk;
 #endif
-void	huge_dalloc(tsdn_t *tsdn, void *ptr);
-arena_t	*huge_aalloc(const void *ptr);
-size_t	huge_salloc(tsdn_t *tsdn, const void *ptr);
-prof_tctx_t	*huge_prof_tctx_get(tsdn_t *tsdn, const void *ptr);
-void	huge_prof_tctx_set(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx);
-void	huge_prof_tctx_reset(tsdn_t *tsdn, const void *ptr);
+void	huge_dalloc(tsdn_t *tsdn, extent_t *extent, void *ptr);
+size_t	huge_salloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr);
+prof_tctx_t	*huge_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent,
+    const void *ptr);
+void	huge_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr,
+    prof_tctx_t *tctx);
+void	huge_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index e487db14..1fc9d3d7 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -959,6 +959,20 @@ decay_ticker_get(tsd_t *tsd, unsigned ind)
 #define	JEMALLOC_ARENA_INLINE_A
 #include "jemalloc/internal/arena.h"
 #undef JEMALLOC_ARENA_INLINE_A
+
+#ifndef JEMALLOC_ENABLE_INLINE
+extent_t	*iealloc(const void *ptr);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
+JEMALLOC_ALWAYS_INLINE extent_t *
+iealloc(const void *ptr)
+{
+
+	return (chunk_lookup(ptr, true));
+}
+#endif
+
 #include "jemalloc/internal/tcache.h"
 #define	JEMALLOC_ARENA_INLINE_B
 #include "jemalloc/internal/arena.h"
@@ -968,7 +982,8 @@ decay_ticker_get(tsd_t *tsd, unsigned ind)
 #ifndef JEMALLOC_ENABLE_INLINE
 extent_t	*iealloc(const void *ptr);
 arena_t	*iaalloc(const void *ptr);
-size_t	isalloc(tsdn_t *tsdn, const void *ptr, bool demote);
+size_t	isalloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr,
+    bool demote);
 void	*iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero,
     tcache_t *tcache, bool is_metadata, arena_t *arena, bool slow_path);
 void	*ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero,
@@ -979,30 +994,23 @@ void	*ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, arena_t *arena);
 void	*ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero);
 size_t	ivsalloc(tsdn_t *tsdn, const void *ptr, bool demote);
-void	idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool is_metadata,
-    bool slow_path);
-void	idalloc(tsd_t *tsd, void *ptr);
-void	isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
-    bool slow_path);
-void	*iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
-    size_t extra, size_t alignment, bool zero, tcache_t *tcache,
-    arena_t *arena);
-void	*iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
-    size_t alignment, bool zero, tcache_t *tcache, arena_t *arena);
-void	*iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
-    size_t alignment, bool zero);
-bool	ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
-    size_t extra, size_t alignment, bool zero);
+void	idalloctm(tsdn_t *tsdn, extent_t *extent, void *ptr, tcache_t *tcache,
+    bool is_metadata, bool slow_path);
+void	idalloc(tsd_t *tsd, extent_t *extent, void *ptr);
+void	isdalloct(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t size,
+    tcache_t *tcache, bool slow_path);
+void	*iralloct_realign(tsdn_t *tsdn, extent_t *extent, void *ptr,
+    size_t oldsize, size_t size, size_t extra, size_t alignment, bool zero,
+    tcache_t *tcache, arena_t *arena);
+void	*iralloct(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize,
+    size_t size, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena);
+void	*iralloc(tsd_t *tsd, extent_t *extent, void *ptr, size_t oldsize,
+    size_t size, size_t alignment, bool zero);
+bool	ixalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize,
+    size_t size, size_t extra, size_t alignment, bool zero);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
-JEMALLOC_ALWAYS_INLINE extent_t *
-iealloc(const void *ptr)
-{
-
-	return (chunk_lookup(ptr, true));
-}
-
 JEMALLOC_ALWAYS_INLINE arena_t *
 iaalloc(const void *ptr)
 {
@@ -1016,17 +1024,18 @@ iaalloc(const void *ptr)
  * Typical usage:
  *   tsdn_t *tsdn = [...]
  *   void *ptr = [...]
- *   size_t sz = isalloc(tsdn, ptr, config_prof);
+ *   extent_t *extent = iealloc(ptr);
+ *   size_t sz = isalloc(tsdn, extent, ptr, config_prof);
  */
 JEMALLOC_ALWAYS_INLINE size_t
-isalloc(tsdn_t *tsdn, const void *ptr, bool demote)
+isalloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr, bool demote)
 {
 
 	assert(ptr != NULL);
 	/* Demotion only makes sense if config_prof is true. */
 	assert(config_prof || !demote);
 
-	return (arena_salloc(tsdn, ptr, demote));
+	return (arena_salloc(tsdn, extent, ptr, demote));
 }
 
 JEMALLOC_ALWAYS_INLINE void *
@@ -1041,8 +1050,8 @@ iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
 
 	ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path);
 	if (config_stats && is_metadata && likely(ret != NULL)) {
-		arena_metadata_allocated_add(iaalloc(ret),
-		    isalloc(tsdn, ret, config_prof));
+		arena_metadata_allocated_add(iaalloc(ret), isalloc(tsdn,
+		    iealloc(ret), ret, config_prof));
 	}
 	return (ret);
 }
@@ -1069,8 +1078,8 @@ ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
 	ret = arena_palloc(tsdn, arena, usize, alignment, zero, tcache);
 	assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret);
 	if (config_stats && is_metadata && likely(ret != NULL)) {
-		arena_metadata_allocated_add(iaalloc(ret), isalloc(tsdn, ret,
-		    config_prof));
+		arena_metadata_allocated_add(iaalloc(ret), isalloc(tsdn,
+		    iealloc(ret), ret, config_prof));
 	}
 	return (ret);
 }
@@ -1104,43 +1113,45 @@ ivsalloc(tsdn_t *tsdn, const void *ptr, bool demote)
 	assert(extent_addr_get(extent) == ptr ||
 	    extent_achunk_get(extent));
 
-	return (isalloc(tsdn, ptr, demote));
+	return (isalloc(tsdn, extent, ptr, demote));
 }
 
 JEMALLOC_ALWAYS_INLINE void
-idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool is_metadata,
-    bool slow_path)
+idalloctm(tsdn_t *tsdn, extent_t *extent, void *ptr, tcache_t *tcache,
+    bool is_metadata, bool slow_path)
 {
 
 	assert(ptr != NULL);
 	assert(!is_metadata || tcache == NULL);
 	assert(!is_metadata || iaalloc(ptr)->ind < narenas_auto);
 	if (config_stats && is_metadata) {
-		arena_metadata_allocated_sub(iaalloc(ptr), isalloc(tsdn, ptr,
-		    config_prof));
+		arena_metadata_allocated_sub(iaalloc(ptr), isalloc(tsdn, extent,
+		    ptr, config_prof));
 	}
 
-	arena_dalloc(tsdn, ptr, tcache, slow_path);
+	arena_dalloc(tsdn, extent, ptr, tcache, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-idalloc(tsd_t *tsd, void *ptr)
+idalloc(tsd_t *tsd, extent_t *extent, void *ptr)
 {
 
-	idalloctm(tsd_tsdn(tsd), ptr, tcache_get(tsd, false), false, true);
+	idalloctm(tsd_tsdn(tsd), extent, ptr, tcache_get(tsd, false), false,
+	    true);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
-    bool slow_path)
+isdalloct(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t size,
+    tcache_t *tcache, bool slow_path)
 {
 
-	arena_sdalloc(tsdn, ptr, size, tcache, slow_path);
+	arena_sdalloc(tsdn, extent, ptr, size, tcache, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
-    size_t extra, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena)
+iralloct_realign(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize,
+    size_t size, size_t extra, size_t alignment, bool zero, tcache_t *tcache,
+    arena_t *arena)
 {
 	void *p;
 	size_t usize, copysize;
@@ -1166,13 +1177,13 @@ iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 	 */
 	copysize = (size < oldsize) ? size : oldsize;
 	memcpy(p, ptr, copysize);
-	isdalloct(tsdn, ptr, oldsize, tcache, true);
+	isdalloct(tsdn, extent, ptr, oldsize, tcache, true);
 	return (p);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment,
-    bool zero, tcache_t *tcache, arena_t *arena)
+iralloct(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize, size_t size,
+    size_t alignment, bool zero, tcache_t *tcache, arena_t *arena)
 {
 
 	assert(ptr != NULL);
@@ -1184,26 +1195,26 @@ iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment,
 		 * Existing object alignment is inadequate; allocate new space
 		 * and copy.
 		 */
-		return (iralloct_realign(tsdn, ptr, oldsize, size, 0, alignment,
-		    zero, tcache, arena));
+		return (iralloct_realign(tsdn, extent, ptr, oldsize, size, 0,
+		    alignment, zero, tcache, arena));
 	}
 
-	return (arena_ralloc(tsdn, arena, ptr, oldsize, size, alignment, zero,
-	    tcache));
+	return (arena_ralloc(tsdn, arena, extent, ptr, oldsize, size, alignment,
+	    zero, tcache));
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment,
-    bool zero)
+iralloc(tsd_t *tsd, extent_t *extent, void *ptr, size_t oldsize, size_t size,
+    size_t alignment, bool zero)
 {
 
-	return (iralloct(tsd_tsdn(tsd), ptr, oldsize, size, alignment, zero,
-	    tcache_get(tsd, true), NULL));
+	return (iralloct(tsd_tsdn(tsd), extent, ptr, oldsize, size, alignment,
+	    zero, tcache_get(tsd, true), NULL));
 }
 
 JEMALLOC_ALWAYS_INLINE bool
-ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra,
-    size_t alignment, bool zero)
+ixalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize, size_t size,
+    size_t extra, size_t alignment, bool zero)
 {
 
 	assert(ptr != NULL);
@@ -1215,7 +1226,8 @@ ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra,
 		return (true);
 	}
 
-	return (arena_ralloc_no_move(tsdn, ptr, oldsize, size, extra, zero));
+	return (arena_ralloc_no_move(tsdn, extent, ptr, oldsize, size, extra,
+	    zero));
 }
 #endif
 
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 478bc2ab..5f4a4b0b 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -181,6 +181,7 @@ chunk_postfork_parent
 chunk_prefork
 chunk_purge_wrapper
 chunk_register
+chunk_reregister
 chunks_rtree
 chunksize
 chunksize_mask
@@ -277,7 +278,6 @@ hash_rotl_64
 hash_x64_128
 hash_x86_128
 hash_x86_32
-huge_aalloc
 huge_dalloc
 huge_dalloc_junk
 huge_malloc
diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h
index 691e153d..81f02d11 100644
--- a/include/jemalloc/internal/prof.h
+++ b/include/jemalloc/internal/prof.h
@@ -281,8 +281,8 @@ extern uint64_t	prof_interval;
 extern size_t	lg_prof_sample;
 
 void	prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
-void	prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
-    prof_tctx_t *tctx);
+void	prof_malloc_sample_object(tsdn_t *tsdn, extent_t *extent,
+    const void *ptr, size_t usize, prof_tctx_t *tctx);
 void	prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx);
 void	bt_init(prof_bt_t *bt, void **vec);
 void	prof_backtrace(prof_bt_t *bt);
@@ -330,21 +330,23 @@ void	prof_sample_threshold_update(prof_tdata_t *tdata);
 bool	prof_active_get_unlocked(void);
 bool	prof_gdump_get_unlocked(void);
 prof_tdata_t	*prof_tdata_get(tsd_t *tsd, bool create);
-prof_tctx_t	*prof_tctx_get(tsdn_t *tsdn, const void *ptr);
-void	prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
-    prof_tctx_t *tctx);
-void	prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize,
-    const void *old_ptr, prof_tctx_t *tctx);
+prof_tctx_t	*prof_tctx_get(tsdn_t *tsdn, const extent_t *extent,
+    const void *ptr);
+void	prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr,
+    size_t usize, prof_tctx_t *tctx);
+void	prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr,
+    size_t usize, const void *old_ptr, prof_tctx_t *tctx);
 bool	prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit,
     prof_tdata_t **tdata_out);
 prof_tctx_t	*prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active,
     bool update);
-void	prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize,
-    prof_tctx_t *tctx);
-void	prof_realloc(tsd_t *tsd, const void *ptr, size_t usize,
-    prof_tctx_t *tctx, bool prof_active, bool updated, const void *old_ptr,
-    size_t old_usize, prof_tctx_t *old_tctx);
-void	prof_free(tsd_t *tsd, const void *ptr, size_t usize);
+void	prof_malloc(tsdn_t *tsdn, extent_t *extent, const void *ptr,
+    size_t usize, prof_tctx_t *tctx);
+void	prof_realloc(tsd_t *tsd, extent_t *extent, const void *ptr,
+    size_t usize, prof_tctx_t *tctx, bool prof_active, bool updated,
+    const void *old_ptr, size_t old_usize, prof_tctx_t *old_tctx);
+void	prof_free(tsd_t *tsd, const extent_t *extent, const void *ptr,
+    size_t usize);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
@@ -398,34 +400,35 @@ prof_tdata_get(tsd_t *tsd, bool create)
 }
 
 JEMALLOC_ALWAYS_INLINE prof_tctx_t *
-prof_tctx_get(tsdn_t *tsdn, const void *ptr)
+prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, const void *ptr)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	return (arena_prof_tctx_get(tsdn, ptr));
+	return (arena_prof_tctx_get(tsdn, extent, ptr));
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx)
+prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr, size_t usize,
+    prof_tctx_t *tctx)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	arena_prof_tctx_set(tsdn, ptr, usize, tctx);
+	arena_prof_tctx_set(tsdn, extent, ptr, usize, tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_tctx_reset(tsdn_t *tsdn, const void *ptr, size_t usize, const void *old_ptr,
-    prof_tctx_t *old_tctx)
+prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr, size_t usize,
+    const void *old_ptr, prof_tctx_t *old_tctx)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	arena_prof_tctx_reset(tsdn, ptr, usize, old_ptr, old_tctx);
+	arena_prof_tctx_reset(tsdn, extent, ptr, usize, old_ptr, old_tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -480,23 +483,26 @@ prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update)
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx)
+prof_malloc(tsdn_t *tsdn, extent_t *extent, const void *ptr, size_t usize,
+    prof_tctx_t *tctx)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
-	assert(usize == isalloc(tsdn, ptr, true));
+	assert(usize == isalloc(tsdn, extent, ptr, true));
 
 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
-		prof_malloc_sample_object(tsdn, ptr, usize, tctx);
-	else
-		prof_tctx_set(tsdn, ptr, usize, (prof_tctx_t *)(uintptr_t)1U);
+		prof_malloc_sample_object(tsdn, extent, ptr, usize, tctx);
+	else {
+		prof_tctx_set(tsdn, extent, ptr, usize,
+		    (prof_tctx_t *)(uintptr_t)1U);
+	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
-    bool prof_active, bool updated, const void *old_ptr, size_t old_usize,
-    prof_tctx_t *old_tctx)
+prof_realloc(tsd_t *tsd, extent_t *extent, const void *ptr, size_t usize,
+    prof_tctx_t *tctx, bool prof_active, bool updated, const void *old_ptr,
+    size_t old_usize, prof_tctx_t *old_tctx)
 {
 	bool sampled, old_sampled;
 
@@ -504,7 +510,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 	assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
 
 	if (prof_active && !updated && ptr != NULL) {
-		assert(usize == isalloc(tsd_tsdn(tsd), ptr, true));
+		assert(usize == isalloc(tsd_tsdn(tsd), extent, ptr, true));
 		if (prof_sample_accum_update(tsd, usize, true, NULL)) {
 			/*
 			 * Don't sample.  The usize passed to prof_alloc_prep()
@@ -520,22 +526,25 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 	sampled = ((uintptr_t)tctx > (uintptr_t)1U);
 	old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U);
 
-	if (unlikely(sampled))
-		prof_malloc_sample_object(tsd_tsdn(tsd), ptr, usize, tctx);
-	else
-		prof_tctx_reset(tsd_tsdn(tsd), ptr, usize, old_ptr, old_tctx);
+	if (unlikely(sampled)) {
+		prof_malloc_sample_object(tsd_tsdn(tsd), extent, ptr, usize,
+		    tctx);
+	} else {
+		prof_tctx_reset(tsd_tsdn(tsd), extent, ptr, usize, old_ptr,
+		    old_tctx);
+	}
 
 	if (unlikely(old_sampled))
 		prof_free_sampled_object(tsd, old_usize, old_tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_free(tsd_t *tsd, const void *ptr, size_t usize)
+prof_free(tsd_t *tsd, const extent_t *extent, const void *ptr, size_t usize)
 {
-	prof_tctx_t *tctx = prof_tctx_get(tsd_tsdn(tsd), ptr);
+	prof_tctx_t *tctx = prof_tctx_get(tsd_tsdn(tsd), extent, ptr);
 
 	cassert(config_prof);
-	assert(usize == isalloc(tsd_tsdn(tsd), ptr, true));
+	assert(usize == isalloc(tsd_tsdn(tsd), extent, ptr, true));
 
 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
 		prof_free_sampled_object(tsd, usize, tctx);
diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h
index 70883b1a..d6d27506 100644
--- a/include/jemalloc/internal/tcache.h
+++ b/include/jemalloc/internal/tcache.h
@@ -371,7 +371,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 
 		if (config_prof && usize == LARGE_MINCLASS) {
 			arena_chunk_t *chunk =
-			    (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
+			    (arena_chunk_t *)extent_addr_get(iealloc(ret));
 			size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >>
 			    LG_PAGE);
 			arena_mapbits_large_binind_set(chunk, pageind,
diff --git a/src/arena.c b/src/arena.c
index b59f7f1b..3abbc623 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -45,10 +45,10 @@ unsigned	nhclasses; /* Number of huge size classes. */
 
 static void	arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena,
     size_t ndirty_limit);
-static void	arena_run_dalloc(tsdn_t *tsdn, arena_t *arena, arena_run_t *run,
-    bool dirty, bool cleaned, bool decommitted);
+static void	arena_run_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+    arena_run_t *run, bool dirty, bool cleaned, bool decommitted);
 static void	arena_dalloc_bin_run(tsdn_t *tsdn, arena_t *arena,
-    arena_chunk_t *chunk, arena_run_t *run, arena_bin_t *bin);
+    arena_chunk_t *chunk, extent_t *extent, arena_run_t *run, arena_bin_t *bin);
 static void	arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk,
     arena_run_t *run, arena_bin_t *bin);
 
@@ -264,9 +264,9 @@ arena_run_reg_alloc(arena_run_t *run, const arena_bin_info_t *bin_info)
 }
 
 JEMALLOC_INLINE_C void
-arena_run_reg_dalloc(arena_run_t *run, void *ptr)
+arena_run_reg_dalloc(arena_run_t *run, extent_t *extent, void *ptr)
 {
-	arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
+	arena_chunk_t *chunk = (arena_chunk_t *)extent_addr_get(extent);
 	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 	size_t mapbits = arena_mapbits_get(chunk, pageind);
 	szind_t binind = arena_ptr_small_binind_get(ptr, mapbits);
@@ -375,15 +375,15 @@ arena_run_split_remove(arena_t *arena, arena_chunk_t *chunk, size_t run_ind,
 }
 
 static bool
-arena_run_split_large_helper(arena_t *arena, arena_run_t *run, size_t size,
-    bool remove, bool zero)
+arena_run_split_large_helper(arena_t *arena, extent_t *extent, arena_run_t *run,
+    size_t size, bool remove, bool zero)
 {
 	arena_chunk_t *chunk;
 	arena_chunk_map_misc_t *miscelm;
 	size_t flag_dirty, flag_decommitted, run_ind, need_pages;
 	size_t flag_unzeroed_mask;
 
-	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
+	chunk = (arena_chunk_t *)extent_addr_get(extent);
 	miscelm = arena_run_to_miscelm(run);
 	run_ind = arena_miscelm_to_pageind(miscelm);
 	flag_dirty = arena_mapbits_dirty_get(chunk, run_ind);
@@ -439,22 +439,26 @@ arena_run_split_large_helper(arena_t *arena, arena_run_t *run, size_t size,
 }
 
 static bool
-arena_run_split_large(arena_t *arena, arena_run_t *run, size_t size, bool zero)
+arena_run_split_large(arena_t *arena, extent_t *extent, arena_run_t *run,
+    size_t size, bool zero)
 {
 
-	return (arena_run_split_large_helper(arena, run, size, true, zero));
+	return (arena_run_split_large_helper(arena, extent, run, size, true,
+	    zero));
 }
 
 static bool
-arena_run_init_large(arena_t *arena, arena_run_t *run, size_t size, bool zero)
+arena_run_init_large(arena_t *arena, extent_t *extent, arena_run_t *run,
+    size_t size, bool zero)
 {
 
-	return (arena_run_split_large_helper(arena, run, size, false, zero));
+	return (arena_run_split_large_helper(arena, extent, run, size, false,
+	    zero));
 }
 
 static bool
-arena_run_split_small(arena_t *arena, arena_run_t *run, size_t size,
-    szind_t binind)
+arena_run_split_small(arena_t *arena, extent_t *extent, arena_run_t *run,
+    size_t size, szind_t binind)
 {
 	arena_chunk_t *chunk;
 	arena_chunk_map_misc_t *miscelm;
@@ -462,7 +466,7 @@ arena_run_split_small(arena_t *arena, arena_run_t *run, size_t size,
 
 	assert(binind != BININD_INVALID);
 
-	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
+	chunk = (arena_chunk_t *)extent_addr_get(extent);
 	miscelm = arena_run_to_miscelm(run);
 	run_ind = arena_miscelm_to_pageind(miscelm);
 	flag_dirty = arena_mapbits_dirty_get(chunk, run_ind);
@@ -1037,7 +1041,7 @@ arena_run_alloc_large_helper(arena_t *arena, size_t size, bool zero)
 {
 	arena_run_t *run = arena_run_first_best_fit(arena, s2u(size));
 	if (run != NULL) {
-		if (arena_run_split_large(arena, run, size, zero))
+		if (arena_run_split_large(arena, iealloc(run), run, size, zero))
 			run = NULL;
 	}
 	return (run);
@@ -1063,7 +1067,7 @@ arena_run_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t size, bool zero)
 	chunk = arena_chunk_alloc(tsdn, arena);
 	if (chunk != NULL) {
 		run = &arena_miscelm_get_mutable(chunk, map_bias)->run;
-		if (arena_run_split_large(arena, run, size, zero))
+		if (arena_run_split_large(arena, iealloc(run), run, size, zero))
 			run = NULL;
 		return (run);
 	}
@@ -1081,7 +1085,8 @@ arena_run_alloc_small_helper(arena_t *arena, size_t size, szind_t binind)
 {
 	arena_run_t *run = arena_run_first_best_fit(arena, size);
 	if (run != NULL) {
-		if (arena_run_split_small(arena, run, size, binind))
+		if (arena_run_split_small(arena, iealloc(run), run, size,
+		    binind))
 			run = NULL;
 	}
 	return (run);
@@ -1108,7 +1113,8 @@ arena_run_alloc_small(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t binind)
 	chunk = arena_chunk_alloc(tsdn, arena);
 	if (chunk != NULL) {
 		run = &arena_miscelm_get_mutable(chunk, map_bias)->run;
-		if (arena_run_split_small(arena, run, size, binind))
+		if (arena_run_split_small(arena, iealloc(run), run, size,
+		    binind))
 			run = NULL;
 		return (run);
 	}
@@ -1435,8 +1441,9 @@ arena_dirty_count(arena_t *arena)
 			npages = extent_size_get(chunkselm) >> LG_PAGE;
 			chunkselm = qr_next(chunkselm, cc_link);
 		} else {
-			arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
-			    rdelm);
+			extent_t *extent = iealloc(rdelm);
+			arena_chunk_t *chunk =
+			    (arena_chunk_t *)extent_addr_get(extent);
 			arena_chunk_map_misc_t *miscelm =
 			    arena_rd_to_miscelm(rdelm);
 			size_t pageind = arena_miscelm_to_pageind(miscelm);
@@ -1497,8 +1504,9 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			    LG_PAGE));
 			chunkselm = chunkselm_next;
 		} else {
+			extent_t *extent = iealloc(rdelm);
 			arena_chunk_t *chunk =
-			    (arena_chunk_t *)CHUNK_ADDR2BASE(rdelm);
+			    (arena_chunk_t *)extent_addr_get(extent);
 			arena_chunk_map_misc_t *miscelm =
 			    arena_rd_to_miscelm(rdelm);
 			size_t pageind = arena_miscelm_to_pageind(miscelm);
@@ -1523,7 +1531,8 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 				arena_chunk_alloc(tsdn, arena);
 
 			/* Temporarily allocate the free dirty run. */
-			arena_run_split_large(arena, run, run_size, false);
+			arena_run_split_large(arena, extent, run, run_size,
+			    false);
 			/* Stash. */
 			if (false)
 				qr_new(rdelm, rd_link); /* Redundant. */
@@ -1577,8 +1586,9 @@ arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		} else {
 			size_t pageind, run_size, flag_unzeroed, flags, i;
 			bool decommitted;
+			extent_t *extent = iealloc(rdelm);
 			arena_chunk_t *chunk =
-			    (arena_chunk_t *)CHUNK_ADDR2BASE(rdelm);
+			    (arena_chunk_t *)extent_addr_get(extent);
 			arena_chunk_map_misc_t *miscelm =
 			    arena_rd_to_miscelm(rdelm);
 			pageind = arena_miscelm_to_pageind(miscelm);
@@ -1661,8 +1671,9 @@ arena_unstash_purged(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, addr,
 			    size, zeroed, committed);
 		} else {
+			extent_t *extent = iealloc(rdelm);
 			arena_chunk_t *chunk =
-			    (arena_chunk_t *)CHUNK_ADDR2BASE(rdelm);
+			    (arena_chunk_t *)extent_addr_get(extent);
 			arena_chunk_map_misc_t *miscelm =
 			    arena_rd_to_miscelm(rdelm);
 			size_t pageind = arena_miscelm_to_pageind(miscelm);
@@ -1670,7 +1681,7 @@ arena_unstash_purged(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			    pageind) != 0);
 			arena_run_t *run = &miscelm->run;
 			qr_remove(rdelm, rd_link);
-			arena_run_dalloc(tsdn, arena, run, false, true,
+			arena_run_dalloc(tsdn, arena, extent, run, false, true,
 			    decommitted);
 		}
 	}
@@ -1755,10 +1766,10 @@ arena_achunk_prof_reset(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk)
 			if (arena_mapbits_large_get(chunk, pageind) != 0) {
 				void *ptr = (void *)((uintptr_t)chunk + (pageind
 				    << LG_PAGE));
-				size_t usize = isalloc(tsd_tsdn(tsd), ptr,
-				    config_prof);
+				size_t usize = isalloc(tsd_tsdn(tsd),
+				    &chunk->extent, ptr, config_prof);
 
-				prof_free(tsd, ptr, usize);
+				prof_free(tsd, &chunk->extent, ptr, usize);
 				npages = arena_mapbits_large_size_get(chunk,
 				    pageind) >> LG_PAGE;
 			} else {
@@ -1820,12 +1831,14 @@ arena_reset(tsd_t *tsd, arena_t *arena)
 		size_t usize;
 
 		malloc_mutex_unlock(tsd_tsdn(tsd), &arena->huge_mtx);
-		if (config_stats || (config_prof && opt_prof))
-			usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
+		if (config_stats || (config_prof && opt_prof)) {
+			usize = isalloc(tsd_tsdn(tsd), extent, ptr,
+			    config_prof);
+		}
 		/* Remove huge allocation from prof sample set. */
 		if (config_prof && opt_prof)
-			prof_free(tsd, ptr, usize);
-		huge_dalloc(tsd_tsdn(tsd), ptr);
+			prof_free(tsd, extent, ptr, usize);
+		huge_dalloc(tsd_tsdn(tsd), extent, ptr);
 		malloc_mutex_lock(tsd_tsdn(tsd), &arena->huge_mtx);
 		/* Cancel out unwanted effects on stats. */
 		if (config_stats)
@@ -1997,14 +2010,14 @@ arena_run_size_get(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 }
 
 static void
-arena_run_dalloc(tsdn_t *tsdn, arena_t *arena, arena_run_t *run, bool dirty,
-    bool cleaned, bool decommitted)
+arena_run_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+    arena_run_t *run, bool dirty, bool cleaned, bool decommitted)
 {
 	arena_chunk_t *chunk;
 	arena_chunk_map_misc_t *miscelm;
 	size_t size, run_ind, run_pages, flag_dirty, flag_decommitted;
 
-	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
+	chunk = (arena_chunk_t *)extent_addr_get(extent);
 	miscelm = arena_run_to_miscelm(run);
 	run_ind = arena_miscelm_to_pageind(miscelm);
 	assert(run_ind >= map_bias);
@@ -2074,7 +2087,7 @@ arena_run_dalloc(tsdn_t *tsdn, arena_t *arena, arena_run_t *run, bool dirty,
 
 static void
 arena_run_trim_head(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
-    arena_run_t *run, size_t oldsize, size_t newsize)
+    extent_t *extent, arena_run_t *run, size_t oldsize, size_t newsize)
 {
 	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
 	size_t pageind = arena_miscelm_to_pageind(miscelm);
@@ -2109,13 +2122,14 @@ arena_run_trim_head(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
 	    flag_dirty | (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk,
 	    pageind+head_npages)));
 
-	arena_run_dalloc(tsdn, arena, run, false, false, (flag_decommitted !=
-	    0));
+	arena_run_dalloc(tsdn, arena, extent, run, false, false,
+	    (flag_decommitted != 0));
 }
 
 static void
 arena_run_trim_tail(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
-    arena_run_t *run, size_t oldsize, size_t newsize, bool dirty)
+    extent_t *extent, arena_run_t *run, size_t oldsize, size_t newsize,
+    bool dirty)
 {
 	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
 	size_t pageind = arena_miscelm_to_pageind(miscelm);
@@ -2154,8 +2168,8 @@ arena_run_trim_tail(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
 
 	tail_miscelm = arena_miscelm_get_mutable(chunk, pageind + head_npages);
 	tail_run = &tail_miscelm->run;
-	arena_run_dalloc(tsdn, arena, tail_run, dirty, false, (flag_decommitted
-	    != 0));
+	arena_run_dalloc(tsdn, arena, extent, tail_run, dirty, false,
+	    (flag_decommitted != 0));
 }
 
 static void
@@ -2251,6 +2265,7 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin)
 		assert(bin->runcur->nfree > 0);
 		ret = arena_run_reg_alloc(bin->runcur, bin_info);
 		if (run != NULL) {
+			extent_t *extent;
 			arena_chunk_t *chunk;
 
 			/*
@@ -2261,10 +2276,11 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin)
 			 * arena_bin_lower_run() must be called, as if a region
 			 * were just deallocated from the run.
 			 */
-			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
+			extent = iealloc(run);
+			chunk = (arena_chunk_t *)extent_addr_get(extent);
 			if (run->nfree == bin_info->nregs) {
-				arena_dalloc_bin_run(tsdn, arena, chunk, run,
-				    bin);
+				arena_dalloc_bin_run(tsdn, arena, chunk, extent,
+				    run, bin);
 			} else
 				arena_bin_lower_run(arena, chunk, run, bin);
 		}
@@ -2499,6 +2515,7 @@ arena_palloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	void *ret;
 	size_t alloc_size, leadsize, trailsize;
 	arena_run_t *run;
+	extent_t *extent;
 	arena_chunk_t *chunk;
 	arena_chunk_map_misc_t *miscelm;
 	void *rpages;
@@ -2520,7 +2537,8 @@ arena_palloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 		malloc_mutex_unlock(tsdn, &arena->lock);
 		return (NULL);
 	}
-	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
+	extent = iealloc(run);
+	chunk = (arena_chunk_t *)extent_addr_get(extent);
 	miscelm = arena_run_to_miscelm(run);
 	rpages = arena_miscelm_to_rpages(miscelm);
 
@@ -2531,20 +2549,22 @@ arena_palloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	if (leadsize != 0) {
 		arena_chunk_map_misc_t *head_miscelm = miscelm;
 		arena_run_t *head_run = run;
+		extent_t *head_extent = extent;
 
 		miscelm = arena_miscelm_get_mutable(chunk,
 		    arena_miscelm_to_pageind(head_miscelm) + (leadsize >>
 		    LG_PAGE));
 		run = &miscelm->run;
+		extent = iealloc(run);
 
-		arena_run_trim_head(tsdn, arena, chunk, head_run, alloc_size,
-		    alloc_size - leadsize);
+		arena_run_trim_head(tsdn, arena, chunk, head_extent, head_run,
+		    alloc_size, alloc_size - leadsize);
 	}
 	if (trailsize != 0) {
-		arena_run_trim_tail(tsdn, arena, chunk, run, usize + large_pad +
-		    trailsize, usize + large_pad, false);
+		arena_run_trim_tail(tsdn, arena, chunk, extent, run, usize +
+		    large_pad + trailsize, usize + large_pad, false);
 	}
-	if (arena_run_init_large(arena, run, usize + large_pad, zero)) {
+	if (arena_run_init_large(arena, extent, run, usize + large_pad, zero)) {
 		size_t run_ind =
 		    arena_miscelm_to_pageind(arena_run_to_miscelm(run));
 		bool dirty = (arena_mapbits_dirty_get(chunk, run_ind) != 0);
@@ -2552,7 +2572,8 @@ arena_palloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 		    run_ind) != 0);
 
 		assert(decommitted); /* Cause of OOM. */
-		arena_run_dalloc(tsdn, arena, run, dirty, false, decommitted);
+		arena_run_dalloc(tsdn, arena, extent, run, dirty, false,
+		    decommitted);
 		malloc_mutex_unlock(tsdn, &arena->lock);
 		return (NULL);
 	}
@@ -2616,7 +2637,8 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 }
 
 void
-arena_prof_promoted(tsdn_t *tsdn, const void *ptr, size_t size)
+arena_prof_promoted(tsdn_t *tsdn, const extent_t *extent, const void *ptr,
+    size_t size)
 {
 	arena_chunk_t *chunk;
 	size_t pageind;
@@ -2624,32 +2646,30 @@ arena_prof_promoted(tsdn_t *tsdn, const void *ptr, size_t size)
 
 	cassert(config_prof);
 	assert(ptr != NULL);
-	assert(CHUNK_ADDR2BASE(ptr) != ptr);
-	assert(isalloc(tsdn, ptr, false) == LARGE_MINCLASS);
-	assert(isalloc(tsdn, ptr, true) == LARGE_MINCLASS);
+	assert(extent_addr_get(extent) != ptr);
+	assert(isalloc(tsdn, extent, ptr, false) == LARGE_MINCLASS);
+	assert(isalloc(tsdn, extent, ptr, true) == LARGE_MINCLASS);
 	assert(size <= SMALL_MAXCLASS);
 
-	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
+	chunk = (arena_chunk_t *)extent_addr_get(extent);
 	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 	binind = size2index(size);
 	assert(binind < NBINS);
 	arena_mapbits_large_binind_set(chunk, pageind, binind);
 
-	assert(isalloc(tsdn, ptr, false) == LARGE_MINCLASS);
-	assert(isalloc(tsdn, ptr, true) == size);
+	assert(isalloc(tsdn, extent, ptr, false) == LARGE_MINCLASS);
+	assert(isalloc(tsdn, extent, ptr, true) == size);
 }
 
 static void
-arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run,
-    arena_bin_t *bin)
+arena_dissociate_bin_run(extent_t *extent, arena_run_t *run, arena_bin_t *bin)
 {
 
 	/* Dissociate run from bin. */
 	if (run == bin->runcur)
 		bin->runcur = NULL;
 	else {
-		szind_t binind = arena_bin_index(extent_arena_get(
-		    &chunk->extent), bin);
+		szind_t binind = arena_bin_index(extent_arena_get(extent), bin);
 		const arena_bin_info_t *bin_info = &arena_bin_info[binind];
 
 		/*
@@ -2668,7 +2688,7 @@ arena_dissociate_bin_run(arena_chunk_t *chunk, arena_run_t *run,
 
 static void
 arena_dalloc_bin_run(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
-    arena_run_t *run, arena_bin_t *bin)
+    extent_t *extent, arena_run_t *run, arena_bin_t *bin)
 {
 
 	assert(run != bin->runcur);
@@ -2676,7 +2696,7 @@ arena_dalloc_bin_run(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
 	malloc_mutex_unlock(tsdn, &bin->lock);
 	/******************************/
 	malloc_mutex_lock(tsdn, &arena->lock);
-	arena_run_dalloc(tsdn, arena, run, true, false, false);
+	arena_run_dalloc(tsdn, arena, extent, run, true, false, false);
 	malloc_mutex_unlock(tsdn, &arena->lock);
 	/****************************/
 	malloc_mutex_lock(tsdn, &bin->lock);
@@ -2707,7 +2727,7 @@ arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 
 static void
 arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
-    void *ptr, arena_chunk_map_bits_t *bitselm, bool junked)
+    extent_t *extent, void *ptr, arena_chunk_map_bits_t *bitselm, bool junked)
 {
 	size_t pageind, rpages_ind;
 	arena_run_t *run;
@@ -2725,10 +2745,10 @@ arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
 	if (!junked && config_fill && unlikely(opt_junk_free))
 		arena_dalloc_junk_small(ptr, bin_info);
 
-	arena_run_reg_dalloc(run, ptr);
+	arena_run_reg_dalloc(run, extent, ptr);
 	if (run->nfree == bin_info->nregs) {
-		arena_dissociate_bin_run(chunk, run, bin);
-		arena_dalloc_bin_run(tsdn, arena, chunk, run, bin);
+		arena_dissociate_bin_run(extent, run, bin);
+		arena_dalloc_bin_run(tsdn, arena, chunk, extent, run, bin);
 	} else if (run->nfree == 1 && run != bin->runcur)
 		arena_bin_lower_run(arena, chunk, run, bin);
 
@@ -2740,15 +2760,17 @@ arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
 
 void
 arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena,
-    arena_chunk_t *chunk, void *ptr, arena_chunk_map_bits_t *bitselm)
+    arena_chunk_t *chunk, extent_t *extent, void *ptr,
+    arena_chunk_map_bits_t *bitselm)
 {
 
-	arena_dalloc_bin_locked_impl(tsdn, arena, chunk, ptr, bitselm, true);
+	arena_dalloc_bin_locked_impl(tsdn, arena, chunk, extent, ptr, bitselm,
+	    true);
 }
 
-void
-arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, void *ptr,
-    size_t pageind, arena_chunk_map_bits_t *bitselm)
+static void
+arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
+    extent_t *extent, void *ptr, size_t pageind, arena_chunk_map_bits_t *bitselm)
 {
 	arena_run_t *run;
 	arena_bin_t *bin;
@@ -2758,13 +2780,14 @@ arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk, void *ptr,
 	run = &arena_miscelm_get_mutable(chunk, rpages_ind)->run;
 	bin = &arena->bins[run->binind];
 	malloc_mutex_lock(tsdn, &bin->lock);
-	arena_dalloc_bin_locked_impl(tsdn, arena, chunk, ptr, bitselm, false);
+	arena_dalloc_bin_locked_impl(tsdn, arena, chunk, extent, ptr, bitselm,
+	    false);
 	malloc_mutex_unlock(tsdn, &bin->lock);
 }
 
 void
 arena_dalloc_small(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
-    void *ptr, size_t pageind)
+    extent_t *extent, void *ptr, size_t pageind)
 {
 	arena_chunk_map_bits_t *bitselm;
 
@@ -2774,7 +2797,7 @@ arena_dalloc_small(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
 		    pageind)) != BININD_INVALID);
 	}
 	bitselm = arena_bitselm_get_mutable(chunk, pageind);
-	arena_dalloc_bin(tsdn, arena, chunk, ptr, pageind, bitselm);
+	arena_dalloc_bin(tsdn, arena, chunk, extent, ptr, pageind, bitselm);
 	arena_decay_tick(tsdn, arena);
 }
 
@@ -2798,7 +2821,7 @@ arena_dalloc_junk_large_t *arena_dalloc_junk_large =
 
 static void
 arena_dalloc_large_locked_impl(tsdn_t *tsdn, arena_t *arena,
-    arena_chunk_t *chunk, void *ptr, bool junked)
+    arena_chunk_t *chunk, extent_t *extent, void *ptr, bool junked)
 {
 	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 	arena_chunk_map_misc_t *miscelm = arena_miscelm_get_mutable(chunk,
@@ -2821,31 +2844,31 @@ arena_dalloc_large_locked_impl(tsdn_t *tsdn, arena_t *arena,
 		}
 	}
 
-	arena_run_dalloc(tsdn, arena, run, true, false, false);
+	arena_run_dalloc(tsdn, arena, extent, run, true, false, false);
 }
 
 void
 arena_dalloc_large_junked_locked(tsdn_t *tsdn, arena_t *arena,
-    arena_chunk_t *chunk, void *ptr)
+    arena_chunk_t *chunk, extent_t *extent, void *ptr)
 {
 
-	arena_dalloc_large_locked_impl(tsdn, arena, chunk, ptr, true);
+	arena_dalloc_large_locked_impl(tsdn, arena, chunk, extent, ptr, true);
 }
 
 void
 arena_dalloc_large(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
-    void *ptr)
+    extent_t *extent, void *ptr)
 {
 
 	malloc_mutex_lock(tsdn, &arena->lock);
-	arena_dalloc_large_locked_impl(tsdn, arena, chunk, ptr, false);
+	arena_dalloc_large_locked_impl(tsdn, arena, chunk, extent, ptr, false);
 	malloc_mutex_unlock(tsdn, &arena->lock);
 	arena_decay_tick(tsdn, arena);
 }
 
 static void
 arena_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
-    void *ptr, size_t oldsize, size_t size)
+    extent_t *extent, void *ptr, size_t oldsize, size_t size)
 {
 	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 	arena_chunk_map_misc_t *miscelm = arena_miscelm_get_mutable(chunk,
@@ -2859,8 +2882,8 @@ arena_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
 	 * allocations.
 	 */
 	malloc_mutex_lock(tsdn, &arena->lock);
-	arena_run_trim_tail(tsdn, arena, chunk, run, oldsize + large_pad, size +
-	    large_pad, true);
+	arena_run_trim_tail(tsdn, arena, chunk, extent, run, oldsize +
+	    large_pad, size + large_pad, true);
 	if (config_stats) {
 		szind_t oldindex = size2index(oldsize) - NBINS;
 		szind_t index = size2index(size) - NBINS;
@@ -2916,7 +2939,8 @@ arena_ralloc_large_grow(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
 			goto label_fail;
 
 		run = &arena_miscelm_get_mutable(chunk, pageind+npages)->run;
-		if (arena_run_split_large(arena, run, splitsize, zero))
+		if (arena_run_split_large(arena, iealloc(run), run, splitsize,
+		    zero))
 			goto label_fail;
 
 		if (config_cache_oblivious && zero) {
@@ -3005,8 +3029,8 @@ arena_ralloc_junk_large_t *arena_ralloc_junk_large =
  * always fail if growing an object, and the following run is already in use.
  */
 static bool
-arena_ralloc_large(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t usize_min,
-    size_t usize_max, bool zero)
+arena_ralloc_large(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize,
+    size_t usize_min, size_t usize_max, bool zero)
 {
 	arena_chunk_t *chunk;
 	arena_t *arena;
@@ -3016,8 +3040,8 @@ arena_ralloc_large(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t usize_min,
 		return (false);
 	}
 
-	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-	arena = extent_arena_get(&chunk->extent);
+	chunk = (arena_chunk_t *)extent_addr_get(extent);
+	arena = extent_arena_get(extent);
 
 	if (oldsize < usize_max) {
 		bool ret = arena_ralloc_large_grow(tsdn, arena, chunk, ptr,
@@ -3026,10 +3050,12 @@ arena_ralloc_large(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t usize_min,
 			if (unlikely(opt_junk_alloc)) {
 				memset((void *)((uintptr_t)ptr + oldsize),
 				    JEMALLOC_ALLOC_JUNK,
-				    isalloc(tsdn, ptr, config_prof) - oldsize);
+				    isalloc(tsdn, extent, ptr, config_prof) -
+				    oldsize);
 			} else if (unlikely(opt_zero)) {
 				memset((void *)((uintptr_t)ptr + oldsize), 0,
-				    isalloc(tsdn, ptr, config_prof) - oldsize);
+				    isalloc(tsdn, extent, ptr, config_prof) -
+				    oldsize);
 			}
 		}
 		return (ret);
@@ -3038,13 +3064,14 @@ arena_ralloc_large(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t usize_min,
 	assert(oldsize > usize_max);
 	/* Fill before shrinking in order avoid a race. */
 	arena_ralloc_junk_large(ptr, oldsize, usize_max);
-	arena_ralloc_large_shrink(tsdn, arena, chunk, ptr, oldsize, usize_max);
+	arena_ralloc_large_shrink(tsdn, arena, chunk, extent, ptr, oldsize,
+	    usize_max);
 	return (false);
 }
 
 bool
-arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
-    size_t extra, bool zero)
+arena_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize,
+    size_t size, size_t extra, bool zero)
 {
 	size_t usize_min, usize_max;
 
@@ -3057,8 +3084,6 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 	usize_min = s2u(size);
 	usize_max = s2u(size + extra);
 	if (likely(oldsize <= large_maxclass && usize_min <= large_maxclass)) {
-		arena_chunk_t *chunk;
-
 		/*
 		 * Avoid moving the allocation if the size class can be left the
 		 * same.
@@ -3073,17 +3098,16 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 		} else {
 			if (usize_max <= SMALL_MAXCLASS)
 				return (true);
-			if (arena_ralloc_large(tsdn, ptr, oldsize, usize_min,
-			    usize_max, zero))
+			if (arena_ralloc_large(tsdn, extent, ptr, oldsize,
+			    usize_min, usize_max, zero))
 				return (true);
 		}
 
-		chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-		arena_decay_tick(tsdn, extent_arena_get(&chunk->extent));
+		arena_decay_tick(tsdn, extent_arena_get(extent));
 		return (false);
 	} else {
-		return (huge_ralloc_no_move(tsdn, ptr, oldsize, usize_min,
-		    usize_max, zero));
+		return (huge_ralloc_no_move(tsdn, extent, ptr, oldsize,
+		    usize_min, usize_max, zero));
 	}
 }
 
@@ -3102,8 +3126,8 @@ arena_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
 }
 
 void *
-arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
-    size_t size, size_t alignment, bool zero, tcache_t *tcache)
+arena_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr,
+    size_t oldsize, size_t size, size_t alignment, bool zero, tcache_t *tcache)
 {
 	void *ret;
 	size_t usize;
@@ -3116,7 +3140,8 @@ arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
 		size_t copysize;
 
 		/* Try to avoid moving the allocation. */
-		if (!arena_ralloc_no_move(tsdn, ptr, oldsize, usize, 0, zero))
+		if (!arena_ralloc_no_move(tsdn, extent, ptr, oldsize, usize, 0,
+		    zero))
 			return (ptr);
 
 		/*
@@ -3136,10 +3161,10 @@ arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
 
 		copysize = (usize < oldsize) ? usize : oldsize;
 		memcpy(ret, ptr, copysize);
-		isdalloct(tsdn, ptr, oldsize, tcache, true);
+		isdalloct(tsdn, extent, ptr, oldsize, tcache, true);
 	} else {
-		ret = huge_ralloc(tsdn, arena, ptr, oldsize, usize, alignment,
-		    zero, tcache);
+		ret = huge_ralloc(tsdn, arena, extent, ptr, oldsize, usize,
+		    alignment, zero, tcache);
 	}
 	return (ret);
 }
diff --git a/src/chunk.c b/src/chunk.c
index 31b86456..e35bb30a 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -180,6 +180,15 @@ chunk_deregister(const void *chunk, const extent_t *extent)
 	}
 }
 
+void
+chunk_reregister(tsdn_t *tsdn, const void *chunk, const extent_t *extent)
+{
+	bool err;
+
+	err = chunk_register(tsdn, chunk, extent);
+	assert(!err);
+}
+
 /*
  * Do first-best-fit chunk selection, i.e. select the lowest chunk that best
  * fits.
diff --git a/src/ckh.c b/src/ckh.c
index 747c1c86..3135ee74 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -283,12 +283,12 @@ ckh_grow(tsdn_t *tsdn, ckh_t *ckh)
 		ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
 
 		if (!ckh_rebuild(ckh, tab)) {
-			idalloctm(tsdn, tab, NULL, true, true);
+			idalloctm(tsdn, iealloc(tab), tab, NULL, true, true);
 			break;
 		}
 
 		/* Rebuilding failed, so back out partially rebuilt table. */
-		idalloctm(tsdn, ckh->tab, NULL, true, true);
+		idalloctm(tsdn, iealloc(ckh->tab), ckh->tab, NULL, true, true);
 		ckh->tab = tab;
 		ckh->lg_curbuckets = lg_prevbuckets;
 	}
@@ -330,7 +330,7 @@ ckh_shrink(tsdn_t *tsdn, ckh_t *ckh)
 	ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
 
 	if (!ckh_rebuild(ckh, tab)) {
-		idalloctm(tsdn, tab, NULL, true, true);
+		idalloctm(tsdn, iealloc(tab), tab, NULL, true, true);
 #ifdef CKH_COUNT
 		ckh->nshrinks++;
 #endif
@@ -338,7 +338,7 @@ ckh_shrink(tsdn_t *tsdn, ckh_t *ckh)
 	}
 
 	/* Rebuilding failed, so back out partially rebuilt table. */
-	idalloctm(tsdn, ckh->tab, NULL, true, true);
+	idalloctm(tsdn, iealloc(ckh->tab), ckh->tab, NULL, true, true);
 	ckh->tab = tab;
 	ckh->lg_curbuckets = lg_prevbuckets;
 #ifdef CKH_COUNT
@@ -421,7 +421,7 @@ ckh_delete(tsdn_t *tsdn, ckh_t *ckh)
 	    (unsigned long long)ckh->nrelocs);
 #endif
 
-	idalloctm(tsdn, ckh->tab, NULL, true, true);
+	idalloctm(tsdn, iealloc(ckh->tab), ckh->tab, NULL, true, true);
 	if (config_debug)
 		memset(ckh, JEMALLOC_FREE_JUNK, sizeof(ckh_t));
 }
diff --git a/src/huge.c b/src/huge.c
index c30e78de..e42ea9c1 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -3,42 +3,6 @@
 
 /******************************************************************************/
 
-static extent_t *
-huge_extent_get(const void *ptr)
-{
-	extent_t *extent;
-
-	extent = chunk_lookup(ptr, true);
-	assert(!extent_achunk_get(extent));
-
-	return (extent);
-}
-
-static bool
-huge_extent_set(tsdn_t *tsdn, const void *ptr, extent_t *extent)
-{
-
-	assert(extent_addr_get(extent) == ptr);
-	assert(!extent_achunk_get(extent));
-	return (chunk_register(tsdn, ptr, extent));
-}
-
-static void
-huge_extent_reset(tsdn_t *tsdn, const void *ptr, extent_t *extent)
-{
-	bool err;
-
-	err = huge_extent_set(tsdn, ptr, extent);
-	assert(!err);
-}
-
-static void
-huge_extent_unset(const void *ptr, const extent_t *extent)
-{
-
-	chunk_deregister(ptr, extent);
-}
-
 void *
 huge_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero)
 {
@@ -81,15 +45,15 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 		arena = arena_choose(tsdn_tsd(tsdn), arena);
 	if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(tsdn,
 	    arena, usize, alignment, &is_zeroed)) == NULL) {
-		idalloctm(tsdn, extent, NULL, true, true);
+		idalloctm(tsdn, iealloc(extent), extent, NULL, true, true);
 		return (NULL);
 	}
 
 	extent_init(extent, arena, ret, usize, is_zeroed, true);
 
-	if (huge_extent_set(tsdn, ret, extent)) {
+	if (chunk_register(tsdn, ret, extent)) {
 		arena_chunk_dalloc_huge(tsdn, arena, ret, usize);
-		idalloctm(tsdn, extent, NULL, true, true);
+		idalloctm(tsdn, iealloc(extent), extent, NULL, true, true);
 		return (NULL);
 	}
 
@@ -133,11 +97,10 @@ huge_dalloc_junk_t *huge_dalloc_junk = JEMALLOC_N(huge_dalloc_junk_impl);
 #endif
 
 static void
-huge_ralloc_no_move_similar(tsdn_t *tsdn, void *ptr, size_t oldsize,
-    size_t usize_min, size_t usize_max, bool zero)
+huge_ralloc_no_move_similar(tsdn_t *tsdn, extent_t *extent, void *ptr,
+    size_t oldsize, size_t usize_min, size_t usize_max, bool zero)
 {
 	size_t usize, usize_next;
-	extent_t *extent;
 	arena_t *arena;
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 	bool pre_zeroed, post_zeroed;
@@ -150,7 +113,6 @@ huge_ralloc_no_move_similar(tsdn_t *tsdn, void *ptr, size_t oldsize,
 	if (oldsize == usize)
 		return;
 
-	extent = huge_extent_get(ptr);
 	arena = extent_arena_get(extent);
 	pre_zeroed = extent_zeroed_get(extent);
 
@@ -169,15 +131,15 @@ huge_ralloc_no_move_similar(tsdn_t *tsdn, void *ptr, size_t oldsize,
 	} else
 		post_zeroed = pre_zeroed;
 
-	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	/* Update the size of the huge allocation. */
 	assert(extent_size_get(extent) != usize);
-	huge_extent_unset(ptr, extent);
+	chunk_deregister(tsdn, ptr, extent);
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	extent_size_set(extent, usize);
-	huge_extent_reset(tsdn, ptr, extent);
+	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
+	chunk_reregister(tsdn, ptr, extent);
 	/* Update zeroed. */
 	extent_zeroed_set(extent, post_zeroed);
-	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	arena_chunk_ralloc_huge_similar(tsdn, arena, ptr, oldsize, usize);
 
@@ -196,16 +158,14 @@ huge_ralloc_no_move_similar(tsdn_t *tsdn, void *ptr, size_t oldsize,
 }
 
 static bool
-huge_ralloc_no_move_shrink(tsdn_t *tsdn, void *ptr, size_t oldsize,
-    size_t usize)
+huge_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, void *ptr,
+    size_t oldsize, size_t usize)
 {
-	extent_t *extent;
 	arena_t *arena;
 	chunk_hooks_t chunk_hooks;
 	size_t cdiff;
 	bool pre_zeroed, post_zeroed;
 
-	extent = huge_extent_get(ptr);
 	arena = extent_arena_get(extent);
 	pre_zeroed = extent_zeroed_get(extent);
 	chunk_hooks = chunk_hooks_get(tsdn, arena);
@@ -233,14 +193,14 @@ huge_ralloc_no_move_shrink(tsdn_t *tsdn, void *ptr, size_t oldsize,
 	} else
 		post_zeroed = pre_zeroed;
 
-	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	/* Update the size of the huge allocation. */
-	huge_extent_unset(ptr, extent);
+	chunk_deregister(ptr, extent);
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	extent_size_set(extent, usize);
-	huge_extent_reset(tsdn, ptr, extent);
 	/* Update zeroed. */
 	extent_zeroed_set(extent, post_zeroed);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
+	chunk_reregister(tsdn, ptr, extent);
 
 	/* Zap the excess chunks. */
 	arena_chunk_ralloc_huge_shrink(tsdn, arena, ptr, oldsize, usize);
@@ -249,14 +209,12 @@ huge_ralloc_no_move_shrink(tsdn_t *tsdn, void *ptr, size_t oldsize,
 }
 
 static bool
-huge_ralloc_no_move_expand(tsdn_t *tsdn, void *ptr, size_t oldsize,
-    size_t usize, bool zero)
+huge_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, void *ptr,
+    size_t oldsize, size_t usize, bool zero)
 {
-	extent_t *extent;
 	arena_t *arena;
 	bool is_zeroed_subchunk, is_zeroed_chunk;
 
-	extent = huge_extent_get(ptr);
 	arena = extent_arena_get(extent);
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	is_zeroed_subchunk = extent_zeroed_get(extent);
@@ -272,12 +230,12 @@ huge_ralloc_no_move_expand(tsdn_t *tsdn, void *ptr, size_t oldsize,
 	     &is_zeroed_chunk))
 		return (true);
 
-	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	/* Update the size of the huge allocation. */
-	huge_extent_unset(ptr, extent);
+	chunk_deregister(ptr, extent);
+	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	extent_size_set(extent, usize);
-	huge_extent_reset(tsdn, ptr, extent);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
+	chunk_reregister(tsdn, ptr, extent);
 
 	if (zero || (config_fill && unlikely(opt_zero))) {
 		if (!is_zeroed_subchunk) {
@@ -298,8 +256,8 @@ huge_ralloc_no_move_expand(tsdn_t *tsdn, void *ptr, size_t oldsize,
 }
 
 bool
-huge_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t usize_min,
-    size_t usize_max, bool zero)
+huge_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize,
+    size_t usize_min, size_t usize_max, bool zero)
 {
 
 	assert(s2u(oldsize) == oldsize);
@@ -312,16 +270,16 @@ huge_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t usize_min,
 
 	if (CHUNK_CEILING(usize_max) > CHUNK_CEILING(oldsize)) {
 		/* Attempt to expand the allocation in-place. */
-		if (!huge_ralloc_no_move_expand(tsdn, ptr, oldsize, usize_max,
-		    zero)) {
-			arena_decay_tick(tsdn, huge_aalloc(ptr));
+		if (!huge_ralloc_no_move_expand(tsdn, extent, ptr, oldsize,
+		    usize_max, zero)) {
+			arena_decay_tick(tsdn, extent_arena_get(extent));
 			return (false);
 		}
 		/* Try again, this time with usize_min. */
 		if (usize_min < usize_max && CHUNK_CEILING(usize_min) >
 		    CHUNK_CEILING(oldsize) && huge_ralloc_no_move_expand(tsdn,
-		    ptr, oldsize, usize_min, zero)) {
-			arena_decay_tick(tsdn, huge_aalloc(ptr));
+		    extent, ptr, oldsize, usize_min, zero)) {
+			arena_decay_tick(tsdn, extent_arena_get(extent));
 			return (false);
 		}
 	}
@@ -332,17 +290,17 @@ huge_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t usize_min,
 	 */
 	if (CHUNK_CEILING(oldsize) >= CHUNK_CEILING(usize_min)
 	    && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(usize_max)) {
-		huge_ralloc_no_move_similar(tsdn, ptr, oldsize, usize_min,
-		    usize_max, zero);
-		arena_decay_tick(tsdn, huge_aalloc(ptr));
+		huge_ralloc_no_move_similar(tsdn, extent, ptr, oldsize,
+		    usize_min, usize_max, zero);
+		arena_decay_tick(tsdn, extent_arena_get(extent));
 		return (false);
 	}
 
 	/* Attempt to shrink the allocation in-place. */
 	if (CHUNK_CEILING(oldsize) > CHUNK_CEILING(usize_max)) {
-		if (!huge_ralloc_no_move_shrink(tsdn, ptr, oldsize,
+		if (!huge_ralloc_no_move_shrink(tsdn, extent, ptr, oldsize,
 		    usize_max)) {
-			arena_decay_tick(tsdn, huge_aalloc(ptr));
+			arena_decay_tick(tsdn, extent_arena_get(extent));
 			return (false);
 		}
 	}
@@ -360,8 +318,8 @@ huge_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
 }
 
 void *
-huge_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
-    size_t usize, size_t alignment, bool zero, tcache_t *tcache)
+huge_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr,
+    size_t oldsize, size_t usize, size_t alignment, bool zero, tcache_t *tcache)
 {
 	void *ret;
 	size_t copysize;
@@ -370,7 +328,8 @@ huge_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
 	assert(usize > 0 && usize <= HUGE_MAXCLASS);
 
 	/* Try to avoid moving the allocation. */
-	if (!huge_ralloc_no_move(tsdn, ptr, oldsize, usize, usize, zero))
+	if (!huge_ralloc_no_move(tsdn, extent, ptr, oldsize, usize, usize,
+	    zero))
 		return (ptr);
 
 	/*
@@ -384,19 +343,17 @@ huge_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
 
 	copysize = (usize < oldsize) ? usize : oldsize;
 	memcpy(ret, ptr, copysize);
-	isdalloct(tsdn, ptr, oldsize, tcache, true);
+	isdalloct(tsdn, extent, ptr, oldsize, tcache, true);
 	return (ret);
 }
 
 void
-huge_dalloc(tsdn_t *tsdn, void *ptr)
+huge_dalloc(tsdn_t *tsdn, extent_t *extent, void *ptr)
 {
-	extent_t *extent;
 	arena_t *arena;
 
-	extent = huge_extent_get(ptr);
 	arena = extent_arena_get(extent);
-	huge_extent_unset(ptr, extent);
+	chunk_deregister(ptr, extent);
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	ql_remove(&arena->huge, extent, ql_link);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
@@ -405,26 +362,17 @@ huge_dalloc(tsdn_t *tsdn, void *ptr)
 	    extent_size_get(extent));
 	arena_chunk_dalloc_huge(tsdn, extent_arena_get(extent),
 	    extent_addr_get(extent), extent_size_get(extent));
-	idalloctm(tsdn, extent, NULL, true, true);
+	idalloctm(tsdn, iealloc(extent), extent, NULL, true, true);
 
 	arena_decay_tick(tsdn, arena);
 }
 
-arena_t *
-huge_aalloc(const void *ptr)
-{
-
-	return (extent_arena_get(huge_extent_get(ptr)));
-}
-
 size_t
-huge_salloc(tsdn_t *tsdn, const void *ptr)
+huge_salloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr)
 {
 	size_t size;
-	extent_t *extent;
 	arena_t *arena;
 
-	extent = huge_extent_get(ptr);
 	arena = extent_arena_get(extent);
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	size = extent_size_get(extent);
@@ -434,13 +382,13 @@ huge_salloc(tsdn_t *tsdn, const void *ptr)
 }
 
 prof_tctx_t *
-huge_prof_tctx_get(tsdn_t *tsdn, const void *ptr)
+huge_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, const void *ptr)
 {
 	prof_tctx_t *tctx;
-	extent_t *extent;
 	arena_t *arena;
 
-	extent = huge_extent_get(ptr);
+	assert(extent == iealloc(ptr));
+
 	arena = extent_arena_get(extent);
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	tctx = extent_prof_tctx_get(extent);
@@ -450,12 +398,13 @@ huge_prof_tctx_get(tsdn_t *tsdn, const void *ptr)
 }
 
 void
-huge_prof_tctx_set(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx)
+huge_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr,
+    prof_tctx_t *tctx)
 {
-	extent_t *extent;
 	arena_t *arena;
 
-	extent = huge_extent_get(ptr);
+	assert(extent == iealloc(ptr));
+
 	arena = extent_arena_get(extent);
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	extent_prof_tctx_set(extent, tctx);
@@ -463,8 +412,8 @@ huge_prof_tctx_set(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx)
 }
 
 void
-huge_prof_tctx_reset(tsdn_t *tsdn, const void *ptr)
+huge_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr)
 {
 
-	huge_prof_tctx_set(tsdn, ptr, (prof_tctx_t *)(uintptr_t)1U);
+	huge_prof_tctx_set(tsdn, extent, ptr, (prof_tctx_t *)(uintptr_t)1U);
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 929f3b87..67a3b564 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -308,10 +308,10 @@ a0ialloc(size_t size, bool zero, bool is_metadata)
 }
 
 static void
-a0idalloc(void *ptr, bool is_metadata)
+a0idalloc(extent_t *extent, void *ptr, bool is_metadata)
 {
 
-	idalloctm(TSDN_NULL, ptr, false, is_metadata, true);
+	idalloctm(TSDN_NULL, extent, ptr, false, is_metadata, true);
 }
 
 void *
@@ -325,7 +325,7 @@ void
 a0dalloc(void *ptr)
 {
 
-	a0idalloc(ptr, true);
+	a0idalloc(iealloc(ptr), ptr, true);
 }
 
 /*
@@ -365,7 +365,7 @@ bootstrap_free(void *ptr)
 	if (unlikely(ptr == NULL))
 		return;
 
-	a0idalloc(ptr, false);
+	a0idalloc(iealloc(ptr), ptr, false);
 }
 
 static void
@@ -1401,7 +1401,7 @@ ialloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind, bool zero,
 		p = ialloc(tsd, LARGE_MINCLASS, ind_large, zero, slow_path);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(tsd_tsdn(tsd), p, usize);
+		arena_prof_promoted(tsd_tsdn(tsd), iealloc(p), p, usize);
 	} else
 		p = ialloc(tsd, usize, ind, zero, slow_path);
 
@@ -1423,7 +1423,7 @@ ialloc_prof(tsd_t *tsd, size_t usize, szind_t ind, bool zero, bool slow_path)
 		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
 	}
-	prof_malloc(tsd_tsdn(tsd), p, usize, tctx);
+	prof_malloc(tsd_tsdn(tsd), iealloc(p), p, usize, tctx);
 
 	return (p);
 }
@@ -1482,7 +1482,7 @@ ialloc_post_check(void *ret, tsdn_t *tsdn, size_t usize, const char *func,
 			set_errno(ENOMEM);
 	}
 	if (config_stats && likely(ret != NULL)) {
-		assert(usize == isalloc(tsdn, ret, config_prof));
+		assert(usize == isalloc(tsdn, iealloc(ret), ret, config_prof));
 		*tsd_thread_allocatedp_get(tsdn_tsd(tsdn)) += usize;
 	}
 	witness_assert_lockless(tsdn);
@@ -1525,7 +1525,7 @@ imemalign_prof_sample(tsd_t *tsd, size_t alignment, size_t usize,
 		p = ipalloc(tsd, LARGE_MINCLASS, alignment, false);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(tsd_tsdn(tsd), p, usize);
+		arena_prof_promoted(tsd_tsdn(tsd), iealloc(p), p, usize);
 	} else
 		p = ipalloc(tsd, usize, alignment, false);
 
@@ -1547,7 +1547,7 @@ imemalign_prof(tsd_t *tsd, size_t alignment, size_t usize)
 		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
 	}
-	prof_malloc(tsd_tsdn(tsd), p, usize, tctx);
+	prof_malloc(tsd_tsdn(tsd), iealloc(p), p, usize, tctx);
 
 	return (p);
 }
@@ -1604,7 +1604,8 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment)
 	ret = 0;
 label_return:
 	if (config_stats && likely(result != NULL)) {
-		assert(usize == isalloc(tsd_tsdn(tsd), result, config_prof));
+		assert(usize == isalloc(tsd_tsdn(tsd), iealloc(result), result,
+		    config_prof));
 		*tsd_thread_allocatedp_get(tsd) += usize;
 	}
 	UTRACE(0, size, result);
@@ -1683,44 +1684,49 @@ je_calloc(size_t num, size_t size)
 }
 
 static void *
-irealloc_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize,
-    prof_tctx_t *tctx)
+irealloc_prof_sample(tsd_t *tsd, extent_t *extent, void *old_ptr,
+    size_t old_usize, size_t usize, prof_tctx_t *tctx)
 {
 	void *p;
 
 	if (tctx == NULL)
 		return (NULL);
 	if (usize <= SMALL_MAXCLASS) {
-		p = iralloc(tsd, old_ptr, old_usize, LARGE_MINCLASS, 0, false);
+		p = iralloc(tsd, extent, old_ptr, old_usize, LARGE_MINCLASS, 0,
+		    false);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(tsd_tsdn(tsd), p, usize);
+		arena_prof_promoted(tsd_tsdn(tsd), iealloc(p), p, usize);
 	} else
-		p = iralloc(tsd, old_ptr, old_usize, usize, 0, false);
+		p = iralloc(tsd, extent, old_ptr, old_usize, usize, 0, false);
 
 	return (p);
 }
 
 JEMALLOC_ALWAYS_INLINE_C void *
-irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize)
+irealloc_prof(tsd_t *tsd, extent_t *extent, void *old_ptr, size_t old_usize,
+    size_t usize)
 {
 	void *p;
+	extent_t *e;
 	bool prof_active;
 	prof_tctx_t *old_tctx, *tctx;
 
 	prof_active = prof_active_get_unlocked();
-	old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_ptr);
+	old_tctx = prof_tctx_get(tsd_tsdn(tsd), extent, old_ptr);
 	tctx = prof_alloc_prep(tsd, usize, prof_active, true);
-	if (unlikely((uintptr_t)tctx != (uintptr_t)1U))
-		p = irealloc_prof_sample(tsd, old_ptr, old_usize, usize, tctx);
-	else
-		p = iralloc(tsd, old_ptr, old_usize, usize, 0, false);
+	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
+		p = irealloc_prof_sample(tsd, extent, old_ptr, old_usize, usize,
+		    tctx);
+	} else
+		p = iralloc(tsd, extent, old_ptr, old_usize, usize, 0, false);
 	if (unlikely(p == NULL)) {
 		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
 	}
-	prof_realloc(tsd, p, usize, tctx, prof_active, true, old_ptr, old_usize,
-	    old_tctx);
+	e = (p == old_ptr) ? extent : iealloc(p);
+	prof_realloc(tsd, e, p, usize, tctx, prof_active, true,
+	    old_ptr, old_usize, old_tctx);
 
 	return (p);
 }
@@ -1728,6 +1734,7 @@ irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize)
 JEMALLOC_INLINE_C void
 ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 {
+	extent_t *extent;
 	size_t usize;
 
 	witness_assert_lockless(tsd_tsdn(tsd));
@@ -1735,22 +1742,24 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
+	extent = iealloc(ptr);
 	if (config_prof && opt_prof) {
-		usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
-		prof_free(tsd, ptr, usize);
+		usize = isalloc(tsd_tsdn(tsd), extent, ptr, config_prof);
+		prof_free(tsd, extent, ptr, usize);
 	} else if (config_stats)
-		usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
+		usize = isalloc(tsd_tsdn(tsd), extent, ptr, config_prof);
 	if (config_stats)
 		*tsd_thread_deallocatedp_get(tsd) += usize;
 
 	if (likely(!slow_path))
-		idalloctm(tsd_tsdn(tsd), ptr, tcache, false, false);
+		idalloctm(tsd_tsdn(tsd), extent, ptr, tcache, false, false);
 	else
-		idalloctm(tsd_tsdn(tsd), ptr, tcache, false, true);
+		idalloctm(tsd_tsdn(tsd), extent, ptr, tcache, false, true);
 }
 
 JEMALLOC_INLINE_C void
-isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path)
+isfree(tsd_t *tsd, extent_t *extent, void *ptr, size_t usize, tcache_t *tcache,
+    bool slow_path)
 {
 
 	witness_assert_lockless(tsd_tsdn(tsd));
@@ -1759,14 +1768,14 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path)
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	if (config_prof && opt_prof)
-		prof_free(tsd, ptr, usize);
+		prof_free(tsd, extent, ptr, usize);
 	if (config_stats)
 		*tsd_thread_deallocatedp_get(tsd) += usize;
 
 	if (likely(!slow_path))
-		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, false);
+		isdalloct(tsd_tsdn(tsd), extent, ptr, usize, tcache, false);
 	else
-		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, true);
+		isdalloct(tsd_tsdn(tsd), extent, ptr, usize, tcache, true);
 }
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
@@ -1794,22 +1803,26 @@ je_realloc(void *ptr, size_t size)
 
 	if (likely(ptr != NULL)) {
 		tsd_t *tsd;
+		extent_t *extent;
 
 		assert(malloc_initialized() || IS_INITIALIZER);
 		tsd = tsd_fetch();
 
 		witness_assert_lockless(tsd_tsdn(tsd));
 
-		old_usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
+		extent = iealloc(ptr);
+		old_usize = isalloc(tsd_tsdn(tsd), extent, ptr, config_prof);
 
 		if (config_prof && opt_prof) {
 			usize = s2u(size);
 			ret = unlikely(usize == 0 || usize > HUGE_MAXCLASS) ?
-			    NULL : irealloc_prof(tsd, ptr, old_usize, usize);
+			    NULL : irealloc_prof(tsd, extent, ptr, old_usize,
+			    usize);
 		} else {
 			if (config_stats)
 				usize = s2u(size);
-			ret = iralloc(tsd, ptr, old_usize, size, 0, false);
+			ret = iralloc(tsd, extent, ptr, old_usize, size, 0,
+			    false);
 		}
 		tsdn = tsd_tsdn(tsd);
 	} else {
@@ -1832,7 +1845,7 @@ je_realloc(void *ptr, size_t size)
 	if (config_stats && likely(ret != NULL)) {
 		tsd_t *tsd;
 
-		assert(usize == isalloc(tsdn, ret, config_prof));
+		assert(usize == isalloc(tsdn, iealloc(ret), ret, config_prof));
 		tsd = tsdn_tsd(tsdn);
 		*tsd_thread_allocatedp_get(tsd) += usize;
 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
@@ -1986,11 +1999,10 @@ imallocx_prof_sample(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
 		    tcache, arena, slow_path);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(tsdn, p, usize);
-	} else {
+		arena_prof_promoted(tsdn, iealloc(p), p, usize);
+	} else
 		p = imallocx_flags(tsdn, usize, alignment, zero, tcache, arena,
 		    slow_path);
-	}
 
 	return (p);
 }
@@ -2021,7 +2033,7 @@ imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize, bool slow_path)
 		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
 	}
-	prof_malloc(tsd_tsdn(tsd), p, *usize, tctx);
+	prof_malloc(tsd_tsdn(tsd), iealloc(p), p, *usize, tctx);
 
 	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
 	return (p);
@@ -2109,46 +2121,47 @@ je_mallocx(size_t size, int flags)
 }
 
 static void *
-irallocx_prof_sample(tsdn_t *tsdn, void *old_ptr, size_t old_usize,
-    size_t usize, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena,
-    prof_tctx_t *tctx)
+irallocx_prof_sample(tsdn_t *tsdn, extent_t *extent, void *old_ptr,
+    size_t old_usize, size_t usize, size_t alignment, bool zero,
+    tcache_t *tcache, arena_t *arena, prof_tctx_t *tctx)
 {
 	void *p;
 
 	if (tctx == NULL)
 		return (NULL);
 	if (usize <= SMALL_MAXCLASS) {
-		p = iralloct(tsdn, old_ptr, old_usize, LARGE_MINCLASS,
+		p = iralloct(tsdn, extent, old_ptr, old_usize, LARGE_MINCLASS,
 		    alignment, zero, tcache, arena);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(tsdn, p, usize);
+		arena_prof_promoted(tsdn, iealloc(p), p, usize);
 	} else {
-		p = iralloct(tsdn, old_ptr, old_usize, usize, alignment, zero,
-		    tcache, arena);
+		p = iralloct(tsdn, extent, old_ptr, old_usize, usize, alignment,
+		    zero, tcache, arena);
 	}
 
 	return (p);
 }
 
 JEMALLOC_ALWAYS_INLINE_C void *
-irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
-    size_t alignment, size_t *usize, bool zero, tcache_t *tcache,
+irallocx_prof(tsd_t *tsd, extent_t *extent, void *old_ptr, size_t old_usize,
+    size_t size, size_t alignment, size_t *usize, bool zero, tcache_t *tcache,
     arena_t *arena)
 {
 	void *p;
+	extent_t *e;
 	bool prof_active;
 	prof_tctx_t *old_tctx, *tctx;
 
 	prof_active = prof_active_get_unlocked();
-	old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_ptr);
+	old_tctx = prof_tctx_get(tsd_tsdn(tsd), extent, old_ptr);
 	tctx = prof_alloc_prep(tsd, *usize, prof_active, true);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
-		p = irallocx_prof_sample(tsd_tsdn(tsd), old_ptr, old_usize,
-		    *usize, alignment, zero, tcache, arena, tctx);
+		p = irallocx_prof_sample(tsd_tsdn(tsd), extent, old_ptr,
+		    old_usize, *usize, alignment, zero, tcache, arena, tctx);
 	} else {
-		p = iralloct(tsd_tsdn(tsd), old_ptr, old_usize, size, alignment,
-		    zero, tcache, arena);
+		p = iralloct(tsd_tsdn(tsd), extent, old_ptr, old_usize, size,
+		    alignment, zero, tcache, arena);
 	}
 	if (unlikely(p == NULL)) {
 		prof_alloc_rollback(tsd, tctx, true);
@@ -2164,9 +2177,11 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 		 * be the same as the current usize because of in-place large
 		 * reallocation.  Therefore, query the actual value of usize.
 		 */
-		*usize = isalloc(tsd_tsdn(tsd), p, config_prof);
-	}
-	prof_realloc(tsd, p, *usize, tctx, prof_active, true, old_ptr,
+		e = extent;
+		*usize = isalloc(tsd_tsdn(tsd), e, p, config_prof);
+	} else
+		e = iealloc(p);
+	prof_realloc(tsd, e, p, *usize, tctx, prof_active, true, old_ptr,
 	    old_usize, old_tctx);
 
 	return (p);
@@ -2179,6 +2194,7 @@ je_rallocx(void *ptr, size_t size, int flags)
 {
 	void *p;
 	tsd_t *tsd;
+	extent_t *extent;
 	size_t usize;
 	size_t old_usize;
 	size_t alignment = MALLOCX_ALIGN_GET(flags);
@@ -2191,6 +2207,7 @@ je_rallocx(void *ptr, size_t size, int flags)
 	assert(malloc_initialized() || IS_INITIALIZER);
 	tsd = tsd_fetch();
 	witness_assert_lockless(tsd_tsdn(tsd));
+	extent = iealloc(ptr);
 
 	if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) {
 		unsigned arena_ind = MALLOCX_ARENA_GET(flags);
@@ -2208,23 +2225,25 @@ je_rallocx(void *ptr, size_t size, int flags)
 	} else
 		tcache = tcache_get(tsd, true);
 
-	old_usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
+	old_usize = isalloc(tsd_tsdn(tsd), extent, ptr, config_prof);
 
 	if (config_prof && opt_prof) {
 		usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment);
 		if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
 			goto label_oom;
-		p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize,
-		    zero, tcache, arena);
+		p = irallocx_prof(tsd, extent, ptr, old_usize, size, alignment,
+		    &usize, zero, tcache, arena);
 		if (unlikely(p == NULL))
 			goto label_oom;
 	} else {
-		p = iralloct(tsd_tsdn(tsd), ptr, old_usize, size, alignment,
-		    zero, tcache, arena);
+		p = iralloct(tsd_tsdn(tsd), extent, ptr, old_usize, size,
+		    alignment, zero, tcache, arena);
 		if (unlikely(p == NULL))
 			goto label_oom;
-		if (config_stats)
-			usize = isalloc(tsd_tsdn(tsd), p, config_prof);
+		if (config_stats) {
+			usize = isalloc(tsd_tsdn(tsd), iealloc(p), p,
+			    config_prof);
+		}
 	}
 	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
 
@@ -2246,42 +2265,43 @@ label_oom:
 }
 
 JEMALLOC_ALWAYS_INLINE_C size_t
-ixallocx_helper(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
-    size_t extra, size_t alignment, bool zero)
+ixallocx_helper(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t old_usize,
+    size_t size, size_t extra, size_t alignment, bool zero)
 {
 	size_t usize;
 
-	if (ixalloc(tsdn, ptr, old_usize, size, extra, alignment, zero))
+	if (ixalloc(tsdn, extent, ptr, old_usize, size, extra, alignment, zero))
 		return (old_usize);
-	usize = isalloc(tsdn, ptr, config_prof);
+	usize = isalloc(tsdn, extent, ptr, config_prof);
 
 	return (usize);
 }
 
 static size_t
-ixallocx_prof_sample(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
-    size_t extra, size_t alignment, bool zero, prof_tctx_t *tctx)
+ixallocx_prof_sample(tsdn_t *tsdn, extent_t *extent, void *ptr,
+    size_t old_usize, size_t size, size_t extra, size_t alignment, bool zero,
+    prof_tctx_t *tctx)
 {
 	size_t usize;
 
 	if (tctx == NULL)
 		return (old_usize);
-	usize = ixallocx_helper(tsdn, ptr, old_usize, size, extra, alignment,
-	    zero);
+	usize = ixallocx_helper(tsdn, extent, ptr, old_usize, size, extra,
+	    alignment, zero);
 
 	return (usize);
 }
 
 JEMALLOC_ALWAYS_INLINE_C size_t
-ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
-    size_t extra, size_t alignment, bool zero)
+ixallocx_prof(tsd_t *tsd, extent_t *extent, void *ptr, size_t old_usize,
+    size_t size, size_t extra, size_t alignment, bool zero)
 {
 	size_t usize_max, usize;
 	bool prof_active;
 	prof_tctx_t *old_tctx, *tctx;
 
 	prof_active = prof_active_get_unlocked();
-	old_tctx = prof_tctx_get(tsd_tsdn(tsd), ptr);
+	old_tctx = prof_tctx_get(tsd_tsdn(tsd), extent, ptr);
 	/*
 	 * usize isn't knowable before ixalloc() returns when extra is non-zero.
 	 * Therefore, compute its maximum possible value and use that in
@@ -2306,18 +2326,18 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 	tctx = prof_alloc_prep(tsd, usize_max, prof_active, false);
 
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
-		usize = ixallocx_prof_sample(tsd_tsdn(tsd), ptr, old_usize,
-		    size, extra, alignment, zero, tctx);
+		usize = ixallocx_prof_sample(tsd_tsdn(tsd), extent, ptr,
+		    old_usize, size, extra, alignment, zero, tctx);
 	} else {
-		usize = ixallocx_helper(tsd_tsdn(tsd), ptr, old_usize, size,
-		    extra, alignment, zero);
+		usize = ixallocx_helper(tsd_tsdn(tsd), extent, ptr, old_usize,
+		    size, extra, alignment, zero);
 	}
 	if (usize == old_usize) {
 		prof_alloc_rollback(tsd, tctx, false);
 		return (usize);
 	}
-	prof_realloc(tsd, ptr, usize, tctx, prof_active, false, ptr, old_usize,
-	    old_tctx);
+	prof_realloc(tsd, extent, ptr, usize, tctx, prof_active, false, ptr,
+	    old_usize, old_tctx);
 
 	return (usize);
 }
@@ -2326,6 +2346,7 @@ JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
 je_xallocx(void *ptr, size_t size, size_t extra, int flags)
 {
 	tsd_t *tsd;
+	extent_t *extent;
 	size_t usize, old_usize;
 	size_t alignment = MALLOCX_ALIGN_GET(flags);
 	bool zero = flags & MALLOCX_ZERO;
@@ -2336,8 +2357,9 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
 	assert(malloc_initialized() || IS_INITIALIZER);
 	tsd = tsd_fetch();
 	witness_assert_lockless(tsd_tsdn(tsd));
+	extent = iealloc(ptr);
 
-	old_usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
+	old_usize = isalloc(tsd_tsdn(tsd), extent, ptr, config_prof);
 
 	/*
 	 * The API explicitly absolves itself of protecting against (size +
@@ -2356,11 +2378,11 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
 		extra = HUGE_MAXCLASS - size;
 
 	if (config_prof && opt_prof) {
-		usize = ixallocx_prof(tsd, ptr, old_usize, size, extra,
+		usize = ixallocx_prof(tsd, extent, ptr, old_usize, size, extra,
 		    alignment, zero);
 	} else {
-		usize = ixallocx_helper(tsd_tsdn(tsd), ptr, old_usize, size,
-		    extra, alignment, zero);
+		usize = ixallocx_helper(tsd_tsdn(tsd), extent, ptr, old_usize,
+		    size, extra, alignment, zero);
 	}
 	if (unlikely(usize == old_usize))
 		goto label_not_resized;
@@ -2390,7 +2412,7 @@ je_sallocx(const void *ptr, int flags)
 	if (config_ivsalloc)
 		usize = ivsalloc(tsdn, ptr, config_prof);
 	else
-		usize = isalloc(tsdn, ptr, config_prof);
+		usize = isalloc(tsdn, iealloc(ptr), ptr, config_prof);
 
 	witness_assert_lockless(tsdn);
 	return (usize);
@@ -2442,14 +2464,16 @@ JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_sdallocx(void *ptr, size_t size, int flags)
 {
 	tsd_t *tsd;
-	tcache_t *tcache;
+	extent_t *extent;
 	size_t usize;
+	tcache_t *tcache;
 
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 	tsd = tsd_fetch();
+	extent = iealloc(ptr);
 	usize = inallocx(tsd_tsdn(tsd), size, flags);
-	assert(usize == isalloc(tsd_tsdn(tsd), ptr, config_prof));
+	assert(usize == isalloc(tsd_tsdn(tsd), extent, ptr, config_prof));
 
 	witness_assert_lockless(tsd_tsdn(tsd));
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
@@ -2462,9 +2486,9 @@ je_sdallocx(void *ptr, size_t size, int flags)
 
 	UTRACE(ptr, 0, 0);
 	if (likely(!malloc_slow))
-		isfree(tsd, ptr, usize, tcache, false);
+		isfree(tsd, extent, ptr, usize, tcache, false);
 	else
-		isfree(tsd, ptr, usize, tcache, true);
+		isfree(tsd, extent, ptr, usize, tcache, true);
 	witness_assert_lockless(tsd_tsdn(tsd));
 }
 
@@ -2566,8 +2590,10 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr)
 
 	if (config_ivsalloc)
 		ret = ivsalloc(tsdn, ptr, config_prof);
-	else
-		ret = (ptr == NULL) ? 0 : isalloc(tsdn, ptr, config_prof);
+	else {
+		ret = (ptr == NULL) ? 0 : isalloc(tsdn, iealloc(ptr), ptr,
+		    config_prof);
+	}
 
 	witness_assert_lockless(tsdn);
 	return (ret);
diff --git a/src/prof.c b/src/prof.c
index c1f58d46..121dcd91 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -223,11 +223,11 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated)
 }
 
 void
-prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
-    prof_tctx_t *tctx)
+prof_malloc_sample_object(tsdn_t *tsdn, extent_t *extent, const void *ptr,
+    size_t usize, prof_tctx_t *tctx)
 {
 
-	prof_tctx_set(tsdn, ptr, usize, tctx);
+	prof_tctx_set(tsdn, extent, ptr, usize, tctx);
 
 	malloc_mutex_lock(tsdn, tctx->tdata->lock);
 	tctx->cnts.curobjs++;
@@ -596,7 +596,7 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
 		prof_leave(tsd, tdata_self);
 		/* Destroy gctx. */
 		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
-		idalloctm(tsd_tsdn(tsd), gctx, NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), iealloc(gctx), gctx, NULL, true, true);
 	} else {
 		/*
 		 * Compensate for increment in prof_tctx_destroy() or
@@ -707,7 +707,7 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx)
 		prof_tdata_destroy(tsd_tsdn(tsd), tdata, false);
 
 	if (destroy_tctx)
-		idalloctm(tsd_tsdn(tsd), tctx, NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), iealloc(tctx), tctx, NULL, true, true);
 }
 
 static bool
@@ -736,7 +736,8 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
 		if (ckh_insert(tsd_tsdn(tsd), &bt2gctx, btkey.v, gctx.v)) {
 			/* OOM. */
 			prof_leave(tsd, tdata);
-			idalloctm(tsd_tsdn(tsd), gctx.v, NULL, true, true);
+			idalloctm(tsd_tsdn(tsd), iealloc(gctx.v), gctx.v, NULL,
+			    true, true);
 			return (true);
 		}
 		new_gctx = true;
@@ -816,7 +817,8 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
 		if (error) {
 			if (new_gctx)
 				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
-			idalloctm(tsd_tsdn(tsd), ret.v, NULL, true, true);
+			idalloctm(tsd_tsdn(tsd), iealloc(ret.v), ret.v, NULL,
+			    true, true);
 			return (NULL);
 		}
 		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
@@ -1238,7 +1240,8 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs)
 					    to_destroy);
 					tctx_tree_remove(&gctx->tctxs,
 					    to_destroy);
-					idalloctm(tsd_tsdn(tsd), to_destroy,
+					idalloctm(tsd_tsdn(tsd),
+					    iealloc(to_destroy), to_destroy,
 					    NULL, true, true);
 				} else
 					next = NULL;
@@ -1815,7 +1818,7 @@ prof_tdata_init_impl(tsdn_t *tsdn, uint64_t thr_uid, uint64_t thr_discrim,
 
 	if (ckh_new(tsdn, &tdata->bt2tctx, PROF_CKH_MINITEMS,
 	    prof_bt_hash, prof_bt_keycomp)) {
-		idalloctm(tsdn, tdata, NULL, true, true);
+		idalloctm(tsdn, iealloc(tdata), tdata, NULL, true, true);
 		return (NULL);
 	}
 
@@ -1878,10 +1881,12 @@ prof_tdata_destroy_locked(tsdn_t *tsdn, prof_tdata_t *tdata,
 
 	assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
 
-	if (tdata->thread_name != NULL)
-		idalloctm(tsdn, tdata->thread_name, NULL, true, true);
+	if (tdata->thread_name != NULL) {
+		idalloctm(tsdn, iealloc(tdata->thread_name), tdata->thread_name,
+		    NULL, true, true);
+	}
 	ckh_delete(tsdn, &tdata->bt2tctx);
-	idalloctm(tsdn, tdata, NULL, true, true);
+	idalloctm(tsdn, iealloc(tdata), tdata, NULL, true, true);
 }
 
 static void
@@ -2075,7 +2080,8 @@ prof_thread_name_set(tsd_t *tsd, const char *thread_name)
 		return (EAGAIN);
 
 	if (tdata->thread_name != NULL) {
-		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), iealloc(tdata->thread_name),
+		    tdata->thread_name, NULL, true, true);
 		tdata->thread_name = NULL;
 	}
 	if (strlen(s) > 0)
diff --git a/src/tcache.c b/src/tcache.c
index c4a99006..c02f0f0c 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -27,7 +27,7 @@ size_t
 tcache_salloc(tsdn_t *tsdn, const void *ptr)
 {
 
-	return (arena_salloc(tsdn, ptr, false));
+	return (arena_salloc(tsdn, iealloc(ptr), ptr, false));
 }
 
 void
@@ -101,9 +101,8 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 	assert(arena != NULL);
 	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
 		/* Lock the arena bin associated with the first object. */
-		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
-		    *(tbin->avail - 1));
-		arena_t *bin_arena = extent_arena_get(&chunk->extent);
+		extent_t *extent = iealloc(*(tbin->avail - 1));
+		arena_t *bin_arena = extent_arena_get(extent);
 		arena_bin_t *bin = &bin_arena->bins[binind];
 
 		if (config_prof && bin_arena == arena) {
@@ -125,14 +124,17 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 		for (i = 0; i < nflush; i++) {
 			ptr = *(tbin->avail - 1 - i);
 			assert(ptr != NULL);
-			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-			if (extent_arena_get(&chunk->extent) == bin_arena) {
+
+			extent = iealloc(ptr);
+			if (extent_arena_get(extent) == bin_arena) {
+				arena_chunk_t *chunk =
+				    (arena_chunk_t *)extent_addr_get(extent);
 				size_t pageind = ((uintptr_t)ptr -
 				    (uintptr_t)chunk) >> LG_PAGE;
 				arena_chunk_map_bits_t *bitselm =
 				    arena_bitselm_get_mutable(chunk, pageind);
 				arena_dalloc_bin_junked_locked(tsd_tsdn(tsd),
-				    bin_arena, chunk, ptr, bitselm);
+				    bin_arena, chunk, extent, ptr, bitselm);
 			} else {
 				/*
 				 * This object was allocated via a different
@@ -183,9 +185,8 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 	assert(arena != NULL);
 	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
 		/* Lock the arena associated with the first object. */
-		arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(
-		    *(tbin->avail - 1));
-		arena_t *locked_arena = extent_arena_get(&chunk->extent);
+		extent_t *extent = iealloc(*(tbin->avail - 1));
+		arena_t *locked_arena = extent_arena_get(extent);
 		UNUSED bool idump;
 
 		if (config_prof)
@@ -210,10 +211,12 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 		for (i = 0; i < nflush; i++) {
 			ptr = *(tbin->avail - 1 - i);
 			assert(ptr != NULL);
-			chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
-			if (extent_arena_get(&chunk->extent) == locked_arena) {
+			extent = iealloc(ptr);
+			if (extent_arena_get(extent) == locked_arena) {
+				arena_chunk_t *chunk =
+				    (arena_chunk_t *)extent_addr_get(extent);
 				arena_dalloc_large_junked_locked(tsd_tsdn(tsd),
-				    locked_arena, chunk, ptr);
+				    locked_arena, chunk, extent, ptr);
 			} else {
 				/*
 				 * This object was allocated via a different
@@ -391,7 +394,7 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache)
 	    arena_prof_accum(tsd_tsdn(tsd), arena, tcache->prof_accumbytes))
 		prof_idump(tsd_tsdn(tsd));
 
-	idalloctm(tsd_tsdn(tsd), tcache, NULL, true, true);
+	idalloctm(tsd_tsdn(tsd), iealloc(tcache), tcache, NULL, true, true);
 }
 
 void

From 8c9be3e83732883e852d43bca2cf7724c465f93e Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 16 Apr 2016 00:36:11 -0700
Subject: [PATCH 0261/2608] Refactor rtree to always use base_alloc() for node
 allocation.

---
 include/jemalloc/internal/arena.h             |  18 +--
 include/jemalloc/internal/chunk.h             |   9 +-
 .../jemalloc/internal/jemalloc_internal.h.in  |  31 +++--
 include/jemalloc/internal/private_symbols.txt |   2 +
 include/jemalloc/internal/rtree.h             |  75 ++++++-----
 include/jemalloc/internal/tcache.h            |   4 +-
 src/arena.c                                   |  59 +++++----
 src/chunk.c                                   |  17 +--
 src/ckh.c                                     |  12 +-
 src/huge.c                                    |  18 +--
 src/jemalloc.c                                |  56 +++++----
 src/prof.c                                    |  30 ++---
 src/rtree.c                                   |  71 ++++++++---
 src/tcache.c                                  |  13 +-
 test/unit/rtree.c                             | 117 ++++++++++++------
 15 files changed, 315 insertions(+), 217 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index d441aaf5..ff3e01d8 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -633,7 +633,8 @@ size_t	arena_metadata_allocated_get(arena_t *arena);
 bool	arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes);
 bool	arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes);
 bool	arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes);
-szind_t	arena_ptr_small_binind_get(const void *ptr, size_t mapbits);
+szind_t	arena_ptr_small_binind_get(tsdn_t *tsdn, const void *ptr,
+    size_t mapbits);
 szind_t	arena_bin_index(arena_t *arena, arena_bin_t *bin);
 size_t	arena_run_regind(arena_run_t *run, const arena_bin_info_t *bin_info,
     const void *ptr);
@@ -647,7 +648,7 @@ void	arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks);
 void	arena_decay_tick(tsdn_t *tsdn, arena_t *arena);
 void	*arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
     bool zero, tcache_t *tcache, bool slow_path);
-arena_t	*arena_aalloc(const void *ptr);
+arena_t	*arena_aalloc(tsdn_t *tsdn, const void *ptr);
 size_t	arena_salloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr,
     bool demote);
 void	arena_dalloc(tsdn_t *tsdn, extent_t *extent, void *ptr,
@@ -1049,7 +1050,7 @@ arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes)
 
 #  ifdef JEMALLOC_ARENA_INLINE_B
 JEMALLOC_ALWAYS_INLINE szind_t
-arena_ptr_small_binind_get(const void *ptr, size_t mapbits)
+arena_ptr_small_binind_get(tsdn_t *tsdn, const void *ptr, size_t mapbits)
 {
 	szind_t binind;
 
@@ -1071,7 +1072,7 @@ arena_ptr_small_binind_get(const void *ptr, size_t mapbits)
 
 		assert(binind != BININD_INVALID);
 		assert(binind < NBINS);
-		extent = iealloc(ptr);
+		extent = iealloc(tsdn, ptr);
 		chunk = (arena_chunk_t *)extent_addr_get(extent);
 		arena = extent_arena_get(extent);
 		pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
@@ -1314,10 +1315,10 @@ arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
 }
 
 JEMALLOC_ALWAYS_INLINE arena_t *
-arena_aalloc(const void *ptr)
+arena_aalloc(tsdn_t *tsdn, const void *ptr)
 {
 
-	return (extent_arena_get(iealloc(ptr)));
+	return (extent_arena_get(iealloc(tsdn, ptr)));
 }
 
 /* Return the size of the allocation pointed to by ptr. */
@@ -1361,7 +1362,7 @@ arena_salloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr, bool demote)
 			 * object).
 			 */
 			assert(arena_mapbits_large_get(chunk, pageind) != 0 ||
-			    arena_ptr_small_binind_get(ptr,
+			    arena_ptr_small_binind_get(tsdn, ptr,
 			    arena_mapbits_get(chunk, pageind)) == binind);
 			ret = index2size(binind);
 		}
@@ -1389,7 +1390,8 @@ arena_dalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, tcache_t *tcache,
 		if (likely((mapbits & CHUNK_MAP_LARGE) == 0)) {
 			/* Small allocation. */
 			if (likely(tcache != NULL)) {
-				szind_t binind = arena_ptr_small_binind_get(ptr,
+				szind_t binind =
+				    arena_ptr_small_binind_get(tsdn, ptr,
 				    mapbits);
 				tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr,
 				    binind, slow_path);
diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index c13f2171..be56c2bd 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -53,7 +53,8 @@ chunk_hooks_t	chunk_hooks_set(tsdn_t *tsdn, arena_t *arena,
     const chunk_hooks_t *chunk_hooks);
 
 bool	chunk_register(tsdn_t *tsdn, const void *chunk, const extent_t *extent);
-void	chunk_deregister(const void *chunk, const extent_t *extent);
+void	chunk_deregister(tsdn_t *tsdn, const void *chunk,
+    const extent_t *extent);
 void	chunk_reregister(tsdn_t *tsdn, const void *chunk,
     const extent_t *extent);
 void	*chunk_alloc_base(size_t size);
@@ -81,15 +82,15 @@ void	chunk_postfork_child(tsdn_t *tsdn);
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
-extent_t	*chunk_lookup(const void *chunk, bool dependent);
+extent_t	*chunk_lookup(tsdn_t *tsdn, const void *chunk, bool dependent);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_CHUNK_C_))
 JEMALLOC_INLINE extent_t *
-chunk_lookup(const void *ptr, bool dependent)
+chunk_lookup(tsdn_t *tsdn, const void *ptr, bool dependent)
 {
 
-	return (rtree_read(&chunks_rtree, (uintptr_t)ptr, dependent));
+	return (rtree_read(tsdn, &chunks_rtree, (uintptr_t)ptr, dependent));
 }
 #endif
 
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 1fc9d3d7..d1306e17 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -961,15 +961,15 @@ decay_ticker_get(tsd_t *tsd, unsigned ind)
 #undef JEMALLOC_ARENA_INLINE_A
 
 #ifndef JEMALLOC_ENABLE_INLINE
-extent_t	*iealloc(const void *ptr);
+extent_t	*iealloc(tsdn_t *tsdn, const void *ptr);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
 JEMALLOC_ALWAYS_INLINE extent_t *
-iealloc(const void *ptr)
+iealloc(tsdn_t *tsdn, const void *ptr)
 {
 
-	return (chunk_lookup(ptr, true));
+	return (chunk_lookup(tsdn, ptr, true));
 }
 #endif
 
@@ -980,8 +980,7 @@ iealloc(const void *ptr)
 #include "jemalloc/internal/hash.h"
 
 #ifndef JEMALLOC_ENABLE_INLINE
-extent_t	*iealloc(const void *ptr);
-arena_t	*iaalloc(const void *ptr);
+arena_t	*iaalloc(tsdn_t *tsdn, const void *ptr);
 size_t	isalloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr,
     bool demote);
 void	*iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero,
@@ -1012,19 +1011,19 @@ bool	ixalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize,
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
 JEMALLOC_ALWAYS_INLINE arena_t *
-iaalloc(const void *ptr)
+iaalloc(tsdn_t *tsdn, const void *ptr)
 {
 
 	assert(ptr != NULL);
 
-	return (arena_aalloc(ptr));
+	return (arena_aalloc(tsdn, ptr));
 }
 
 /*
  * Typical usage:
  *   tsdn_t *tsdn = [...]
  *   void *ptr = [...]
- *   extent_t *extent = iealloc(ptr);
+ *   extent_t *extent = iealloc(tsdn, ptr);
  *   size_t sz = isalloc(tsdn, extent, ptr, config_prof);
  */
 JEMALLOC_ALWAYS_INLINE size_t
@@ -1050,8 +1049,8 @@ iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
 
 	ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path);
 	if (config_stats && is_metadata && likely(ret != NULL)) {
-		arena_metadata_allocated_add(iaalloc(ret), isalloc(tsdn,
-		    iealloc(ret), ret, config_prof));
+		arena_metadata_allocated_add(iaalloc(tsdn, ret), isalloc(tsdn,
+		    iealloc(tsdn, ret), ret, config_prof));
 	}
 	return (ret);
 }
@@ -1078,8 +1077,8 @@ ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
 	ret = arena_palloc(tsdn, arena, usize, alignment, zero, tcache);
 	assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret);
 	if (config_stats && is_metadata && likely(ret != NULL)) {
-		arena_metadata_allocated_add(iaalloc(ret), isalloc(tsdn,
-		    iealloc(ret), ret, config_prof));
+		arena_metadata_allocated_add(iaalloc(tsdn, ret), isalloc(tsdn,
+		    iealloc(tsdn, ret), ret, config_prof));
 	}
 	return (ret);
 }
@@ -1106,7 +1105,7 @@ ivsalloc(tsdn_t *tsdn, const void *ptr, bool demote)
 	extent_t *extent;
 
 	/* Return 0 if ptr is not within a chunk managed by jemalloc. */
-	extent = chunk_lookup(ptr, false);
+	extent = chunk_lookup(tsdn, ptr, false);
 	if (extent == NULL)
 		return (0);
 	/* Only arena chunks should be looked up via interior pointers. */
@@ -1123,10 +1122,10 @@ idalloctm(tsdn_t *tsdn, extent_t *extent, void *ptr, tcache_t *tcache,
 
 	assert(ptr != NULL);
 	assert(!is_metadata || tcache == NULL);
-	assert(!is_metadata || iaalloc(ptr)->ind < narenas_auto);
+	assert(!is_metadata || iaalloc(tsdn, ptr)->ind < narenas_auto);
 	if (config_stats && is_metadata) {
-		arena_metadata_allocated_sub(iaalloc(ptr), isalloc(tsdn, extent,
-		    ptr, config_prof));
+		arena_metadata_allocated_sub(iaalloc(tsdn, ptr), isalloc(tsdn,
+		    extent, ptr, config_prof));
 	}
 
 	arena_dalloc(tsdn, extent, ptr, tcache, slow_path);
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 5f4a4b0b..42c730c6 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -460,6 +460,8 @@ rtree_child_tryread
 rtree_clear
 rtree_delete
 rtree_new
+rtree_node_alloc
+rtree_node_dalloc
 rtree_node_valid
 rtree_elm_acquire
 rtree_elm_lookup
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 59a7ab3c..dbea434c 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -23,13 +23,6 @@ typedef struct rtree_s rtree_t;
 /* Used for two-stage lock-free node initialization. */
 #define	RTREE_NODE_INITIALIZING	((rtree_elm_t *)0x1)
 
-/*
- * The node allocation callback function's argument is the number of contiguous
- * rtree_elm_t structures to allocate, and the resulting memory must be zeroed.
- */
-typedef rtree_elm_t *(rtree_node_alloc_t)(size_t);
-typedef void (rtree_node_dalloc_t)(rtree_elm_t *);
-
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
@@ -79,8 +72,6 @@ struct rtree_level_s {
 };
 
 struct rtree_s {
-	rtree_node_alloc_t	*alloc;
-	rtree_node_dalloc_t	*dalloc;
 	unsigned		height;
 	/*
 	 * Precomputed table used to convert from the number of leading 0 key
@@ -94,12 +85,18 @@ struct rtree_s {
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-bool rtree_new(rtree_t *rtree, unsigned bits, rtree_node_alloc_t *alloc,
-    rtree_node_dalloc_t *dalloc);
-void	rtree_delete(rtree_t *rtree);
-rtree_elm_t	*rtree_subtree_read_hard(rtree_t *rtree, unsigned level);
-rtree_elm_t	*rtree_child_read_hard(rtree_t *rtree, rtree_elm_t *elm,
+bool rtree_new(rtree_t *rtree, unsigned bits);
+#ifdef JEMALLOC_JET
+typedef rtree_elm_t *(rtree_node_alloc_t)(tsdn_t *, rtree_t *, size_t);
+extern rtree_node_alloc_t *rtree_node_alloc;
+typedef void (rtree_node_dalloc_t)(tsdn_t *, rtree_t *, rtree_elm_t *);
+extern rtree_node_dalloc_t *rtree_node_dalloc;
+void	rtree_delete(tsdn_t *tsdn, rtree_t *rtree);
+#endif
+rtree_elm_t	*rtree_subtree_read_hard(tsdn_t *tsdn, rtree_t *rtree,
     unsigned level);
+rtree_elm_t	*rtree_child_read_hard(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_elm_t *elm, unsigned level);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
@@ -111,25 +108,27 @@ uintptr_t	rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level);
 
 bool	rtree_node_valid(rtree_elm_t *node);
 rtree_elm_t	*rtree_child_tryread(rtree_elm_t *elm, bool dependent);
-rtree_elm_t	*rtree_child_read(rtree_t *rtree, rtree_elm_t *elm,
+rtree_elm_t	*rtree_child_read(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *elm,
     unsigned level, bool dependent);
 extent_t	*rtree_elm_read(rtree_elm_t *elm, bool dependent);
 void	rtree_elm_write(rtree_elm_t *elm, const extent_t *extent);
 rtree_elm_t	*rtree_subtree_tryread(rtree_t *rtree, unsigned level,
     bool dependent);
-rtree_elm_t	*rtree_subtree_read(rtree_t *rtree, unsigned level,
-    bool dependent);
-rtree_elm_t	*rtree_elm_lookup(rtree_t *rtree, uintptr_t key,
+rtree_elm_t	*rtree_subtree_read(tsdn_t *tsdn, rtree_t *rtree,
+    unsigned level, bool dependent);
+rtree_elm_t	*rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, uintptr_t key,
     bool dependent, bool init_missing);
 
-bool	rtree_write(rtree_t *rtree, uintptr_t key, const extent_t *extent);
-extent_t	*rtree_read(rtree_t *rtree, uintptr_t key, bool dependent);
-rtree_elm_t	*rtree_elm_acquire(rtree_t *rtree, uintptr_t key,
+bool	rtree_write(tsdn_t *tsdn, rtree_t *rtree, uintptr_t key,
+    const extent_t *extent);
+extent_t	*rtree_read(tsdn_t *tsdn, rtree_t *rtree, uintptr_t key,
+    bool dependent);
+rtree_elm_t	*rtree_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, uintptr_t key,
     bool dependent, bool init_missing);
 extent_t	*rtree_elm_read_acquired(rtree_elm_t *elm);
 void	rtree_elm_write_acquired(rtree_elm_t *elm, const extent_t *extent);
 void	rtree_elm_release(rtree_elm_t *elm);
-void	rtree_clear(rtree_t *rtree, uintptr_t key);
+void	rtree_clear(tsdn_t *tsdn, rtree_t *rtree, uintptr_t key);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_))
@@ -177,14 +176,14 @@ rtree_child_tryread(rtree_elm_t *elm, bool dependent)
 }
 
 JEMALLOC_ALWAYS_INLINE rtree_elm_t *
-rtree_child_read(rtree_t *rtree, rtree_elm_t *elm, unsigned level,
+rtree_child_read(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *elm, unsigned level,
     bool dependent)
 {
 	rtree_elm_t *child;
 
 	child = rtree_child_tryread(elm, dependent);
 	if (!dependent && unlikely(!rtree_node_valid(child)))
-		child = rtree_child_read_hard(rtree, elm, level);
+		child = rtree_child_read_hard(tsdn, rtree, elm, level);
 	assert(!dependent || child != NULL);
 	return (child);
 }
@@ -238,19 +237,19 @@ rtree_subtree_tryread(rtree_t *rtree, unsigned level, bool dependent)
 }
 
 JEMALLOC_ALWAYS_INLINE rtree_elm_t *
-rtree_subtree_read(rtree_t *rtree, unsigned level, bool dependent)
+rtree_subtree_read(tsdn_t *tsdn, rtree_t *rtree, unsigned level, bool dependent)
 {
 	rtree_elm_t *subtree;
 
 	subtree = rtree_subtree_tryread(rtree, level, dependent);
 	if (!dependent && unlikely(!rtree_node_valid(subtree)))
-		subtree = rtree_subtree_read_hard(rtree, level);
+		subtree = rtree_subtree_read_hard(tsdn, rtree, level);
 	assert(!dependent || subtree != NULL);
 	return (subtree);
 }
 
 JEMALLOC_ALWAYS_INLINE rtree_elm_t *
-rtree_elm_lookup(rtree_t *rtree, uintptr_t key, bool dependent,
+rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, uintptr_t key, bool dependent,
     bool init_missing)
 {
 	uintptr_t subkey;
@@ -261,8 +260,8 @@ rtree_elm_lookup(rtree_t *rtree, uintptr_t key, bool dependent,
 
 	start_level = rtree_start_level(rtree, key);
 
-	node = init_missing ? rtree_subtree_read(rtree, start_level, dependent)
-	    : rtree_subtree_tryread(rtree, start_level, dependent);
+	node = init_missing ? rtree_subtree_read(tsdn, rtree, start_level,
+	    dependent) : rtree_subtree_tryread(rtree, start_level, dependent);
 #define	RTREE_GET_BIAS	(RTREE_HEIGHT_MAX - rtree->height)
 	switch (start_level + RTREE_GET_BIAS) {
 #define	RTREE_GET_SUBTREE(level)					\
@@ -272,7 +271,7 @@ rtree_elm_lookup(rtree_t *rtree, uintptr_t key, bool dependent,
 			return (NULL);					\
 		subkey = rtree_subkey(rtree, key, level -		\
 		    RTREE_GET_BIAS);					\
-		node = init_missing ? rtree_child_read(rtree,		\
+		node = init_missing ? rtree_child_read(tsdn, rtree,	\
 		    &node[subkey], level - RTREE_GET_BIAS, dependent) :	\
 		    rtree_child_tryread(&node[subkey], dependent);	\
 		/* Fall through. */
@@ -346,14 +345,14 @@ rtree_elm_lookup(rtree_t *rtree, uintptr_t key, bool dependent,
 }
 
 JEMALLOC_INLINE bool
-rtree_write(rtree_t *rtree, uintptr_t key, const extent_t *extent)
+rtree_write(tsdn_t *tsdn, rtree_t *rtree, uintptr_t key, const extent_t *extent)
 {
 	rtree_elm_t *elm;
 
 	assert(extent != NULL); /* Use rtree_clear() for this case. */
 	assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0);
 
-	elm = rtree_elm_lookup(rtree, key, false, true);
+	elm = rtree_elm_lookup(tsdn, rtree, key, false, true);
 	if (elm == NULL)
 		return (true);
 	assert(rtree_elm_read(elm, false) == NULL);
@@ -363,11 +362,11 @@ rtree_write(rtree_t *rtree, uintptr_t key, const extent_t *extent)
 }
 
 JEMALLOC_ALWAYS_INLINE extent_t *
-rtree_read(rtree_t *rtree, uintptr_t key, bool dependent)
+rtree_read(tsdn_t *tsdn, rtree_t *rtree, uintptr_t key, bool dependent)
 {
 	rtree_elm_t *elm;
 
-	elm = rtree_elm_lookup(rtree, key, dependent, false);
+	elm = rtree_elm_lookup(tsdn, rtree, key, dependent, false);
 	if (elm == NULL)
 		return (NULL);
 
@@ -375,12 +374,12 @@ rtree_read(rtree_t *rtree, uintptr_t key, bool dependent)
 }
 
 JEMALLOC_INLINE rtree_elm_t *
-rtree_elm_acquire(rtree_t *rtree, uintptr_t key, bool dependent,
+rtree_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, uintptr_t key, bool dependent,
     bool init_missing)
 {
 	rtree_elm_t *elm;
 
-	elm = rtree_elm_lookup(rtree, key, dependent, init_missing);
+	elm = rtree_elm_lookup(tsdn, rtree, key, dependent, init_missing);
 	if (!dependent && elm == NULL)
 		return (NULL);
 	{
@@ -427,11 +426,11 @@ rtree_elm_release(rtree_elm_t *elm)
 }
 
 JEMALLOC_INLINE void
-rtree_clear(rtree_t *rtree, uintptr_t key)
+rtree_clear(tsdn_t *tsdn, rtree_t *rtree, uintptr_t key)
 {
 	rtree_elm_t *elm;
 
-	elm = rtree_elm_acquire(rtree, key, true, false);
+	elm = rtree_elm_acquire(tsdn, rtree, key, true, false);
 	rtree_elm_write_acquired(elm, NULL);
 	rtree_elm_release(elm);
 }
diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h
index d6d27506..ee63a652 100644
--- a/include/jemalloc/internal/tcache.h
+++ b/include/jemalloc/internal/tcache.h
@@ -370,8 +370,8 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 		}
 
 		if (config_prof && usize == LARGE_MINCLASS) {
-			arena_chunk_t *chunk =
-			    (arena_chunk_t *)extent_addr_get(iealloc(ret));
+			arena_chunk_t *chunk =(arena_chunk_t *)extent_addr_get(
+			    iealloc(tsd_tsdn(tsd), ret));
 			size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >>
 			    LG_PAGE);
 			arena_mapbits_large_binind_set(chunk, pageind,
diff --git a/src/arena.c b/src/arena.c
index 3abbc623..8a93fca0 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -264,12 +264,13 @@ arena_run_reg_alloc(arena_run_t *run, const arena_bin_info_t *bin_info)
 }
 
 JEMALLOC_INLINE_C void
-arena_run_reg_dalloc(arena_run_t *run, extent_t *extent, void *ptr)
+arena_run_reg_dalloc(tsdn_t *tsdn, arena_run_t *run, extent_t *extent,
+    void *ptr)
 {
 	arena_chunk_t *chunk = (arena_chunk_t *)extent_addr_get(extent);
 	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 	size_t mapbits = arena_mapbits_get(chunk, pageind);
-	szind_t binind = arena_ptr_small_binind_get(ptr, mapbits);
+	szind_t binind = arena_ptr_small_binind_get(tsdn, ptr, mapbits);
 	const arena_bin_info_t *bin_info = &arena_bin_info[binind];
 	size_t regind = arena_run_regind(run, bin_info, ptr);
 
@@ -665,7 +666,7 @@ arena_chunk_discard(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk)
 	bool committed;
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 
-	chunk_deregister(chunk, &chunk->extent);
+	chunk_deregister(tsdn, chunk, &chunk->extent);
 
 	committed = (arena_mapbits_decommitted_get(chunk, map_bias) == 0);
 	if (!committed) {
@@ -1037,11 +1038,13 @@ arena_run_first_best_fit(arena_t *arena, size_t size)
 }
 
 static arena_run_t *
-arena_run_alloc_large_helper(arena_t *arena, size_t size, bool zero)
+arena_run_alloc_large_helper(tsdn_t *tsdn, arena_t *arena, size_t size,
+    bool zero)
 {
 	arena_run_t *run = arena_run_first_best_fit(arena, s2u(size));
 	if (run != NULL) {
-		if (arena_run_split_large(arena, iealloc(run), run, size, zero))
+		if (arena_run_split_large(arena, iealloc(tsdn, run), run, size,
+		    zero))
 			run = NULL;
 	}
 	return (run);
@@ -1057,7 +1060,7 @@ arena_run_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t size, bool zero)
 	assert(size == PAGE_CEILING(size));
 
 	/* Search the arena's chunks for the lowest best fit. */
-	run = arena_run_alloc_large_helper(arena, size, zero);
+	run = arena_run_alloc_large_helper(tsdn, arena, size, zero);
 	if (run != NULL)
 		return (run);
 
@@ -1067,7 +1070,8 @@ arena_run_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t size, bool zero)
 	chunk = arena_chunk_alloc(tsdn, arena);
 	if (chunk != NULL) {
 		run = &arena_miscelm_get_mutable(chunk, map_bias)->run;
-		if (arena_run_split_large(arena, iealloc(run), run, size, zero))
+		if (arena_run_split_large(arena, iealloc(tsdn, run), run, size,
+		    zero))
 			run = NULL;
 		return (run);
 	}
@@ -1077,15 +1081,16 @@ arena_run_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t size, bool zero)
 	 * sufficient memory available while this one dropped arena->lock in
 	 * arena_chunk_alloc(), so search one more time.
 	 */
-	return (arena_run_alloc_large_helper(arena, size, zero));
+	return (arena_run_alloc_large_helper(tsdn, arena, size, zero));
 }
 
 static arena_run_t *
-arena_run_alloc_small_helper(arena_t *arena, size_t size, szind_t binind)
+arena_run_alloc_small_helper(tsdn_t *tsdn, arena_t *arena, size_t size,
+    szind_t binind)
 {
 	arena_run_t *run = arena_run_first_best_fit(arena, size);
 	if (run != NULL) {
-		if (arena_run_split_small(arena, iealloc(run), run, size,
+		if (arena_run_split_small(arena, iealloc(tsdn, run), run, size,
 		    binind))
 			run = NULL;
 	}
@@ -1103,7 +1108,7 @@ arena_run_alloc_small(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t binind)
 	assert(binind != BININD_INVALID);
 
 	/* Search the arena's chunks for the lowest best fit. */
-	run = arena_run_alloc_small_helper(arena, size, binind);
+	run = arena_run_alloc_small_helper(tsdn, arena, size, binind);
 	if (run != NULL)
 		return (run);
 
@@ -1113,7 +1118,7 @@ arena_run_alloc_small(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t binind)
 	chunk = arena_chunk_alloc(tsdn, arena);
 	if (chunk != NULL) {
 		run = &arena_miscelm_get_mutable(chunk, map_bias)->run;
-		if (arena_run_split_small(arena, iealloc(run), run, size,
+		if (arena_run_split_small(arena, iealloc(tsdn, run), run, size,
 		    binind))
 			run = NULL;
 		return (run);
@@ -1124,7 +1129,7 @@ arena_run_alloc_small(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t binind)
 	 * sufficient memory available while this one dropped arena->lock in
 	 * arena_chunk_alloc(), so search one more time.
 	 */
-	return (arena_run_alloc_small_helper(arena, size, binind));
+	return (arena_run_alloc_small_helper(tsdn, arena, size, binind));
 }
 
 static bool
@@ -1426,7 +1431,7 @@ arena_maybe_purge(tsdn_t *tsdn, arena_t *arena)
 }
 
 static size_t
-arena_dirty_count(arena_t *arena)
+arena_dirty_count(tsdn_t *tsdn, arena_t *arena)
 {
 	size_t ndirty = 0;
 	arena_runs_dirty_link_t *rdelm;
@@ -1441,7 +1446,7 @@ arena_dirty_count(arena_t *arena)
 			npages = extent_size_get(chunkselm) >> LG_PAGE;
 			chunkselm = qr_next(chunkselm, cc_link);
 		} else {
-			extent_t *extent = iealloc(rdelm);
+			extent_t *extent = iealloc(tsdn, rdelm);
 			arena_chunk_t *chunk =
 			    (arena_chunk_t *)extent_addr_get(extent);
 			arena_chunk_map_misc_t *miscelm =
@@ -1504,7 +1509,7 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			    LG_PAGE));
 			chunkselm = chunkselm_next;
 		} else {
-			extent_t *extent = iealloc(rdelm);
+			extent_t *extent = iealloc(tsdn, rdelm);
 			arena_chunk_t *chunk =
 			    (arena_chunk_t *)extent_addr_get(extent);
 			arena_chunk_map_misc_t *miscelm =
@@ -1586,7 +1591,7 @@ arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		} else {
 			size_t pageind, run_size, flag_unzeroed, flags, i;
 			bool decommitted;
-			extent_t *extent = iealloc(rdelm);
+			extent_t *extent = iealloc(tsdn, rdelm);
 			arena_chunk_t *chunk =
 			    (arena_chunk_t *)extent_addr_get(extent);
 			arena_chunk_map_misc_t *miscelm =
@@ -1671,7 +1676,7 @@ arena_unstash_purged(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, addr,
 			    size, zeroed, committed);
 		} else {
-			extent_t *extent = iealloc(rdelm);
+			extent_t *extent = iealloc(tsdn, rdelm);
 			arena_chunk_t *chunk =
 			    (arena_chunk_t *)extent_addr_get(extent);
 			arena_chunk_map_misc_t *miscelm =
@@ -1711,7 +1716,7 @@ arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, size_t ndirty_limit)
 	 * because overhead grows nonlinearly as memory usage increases.
 	 */
 	if (false && config_debug) {
-		size_t ndirty = arena_dirty_count(arena);
+		size_t ndirty = arena_dirty_count(tsdn, arena);
 		assert(ndirty == arena->ndirty);
 	}
 	assert(opt_purge != purge_mode_ratio || (arena->nactive >>
@@ -2276,7 +2281,7 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin)
 			 * arena_bin_lower_run() must be called, as if a region
 			 * were just deallocated from the run.
 			 */
-			extent = iealloc(run);
+			extent = iealloc(tsdn, run);
 			chunk = (arena_chunk_t *)extent_addr_get(extent);
 			if (run->nfree == bin_info->nregs) {
 				arena_dalloc_bin_run(tsdn, arena, chunk, extent,
@@ -2537,7 +2542,7 @@ arena_palloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 		malloc_mutex_unlock(tsdn, &arena->lock);
 		return (NULL);
 	}
-	extent = iealloc(run);
+	extent = iealloc(tsdn, run);
 	chunk = (arena_chunk_t *)extent_addr_get(extent);
 	miscelm = arena_run_to_miscelm(run);
 	rpages = arena_miscelm_to_rpages(miscelm);
@@ -2555,7 +2560,7 @@ arena_palloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 		    arena_miscelm_to_pageind(head_miscelm) + (leadsize >>
 		    LG_PAGE));
 		run = &miscelm->run;
-		extent = iealloc(run);
+		extent = iealloc(tsdn, run);
 
 		arena_run_trim_head(tsdn, arena, chunk, head_extent, head_run,
 		    alloc_size, alloc_size - leadsize);
@@ -2745,7 +2750,7 @@ arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
 	if (!junked && config_fill && unlikely(opt_junk_free))
 		arena_dalloc_junk_small(ptr, bin_info);
 
-	arena_run_reg_dalloc(run, extent, ptr);
+	arena_run_reg_dalloc(tsdn, run, extent, ptr);
 	if (run->nfree == bin_info->nregs) {
 		arena_dissociate_bin_run(extent, run, bin);
 		arena_dalloc_bin_run(tsdn, arena, chunk, extent, run, bin);
@@ -2793,8 +2798,8 @@ arena_dalloc_small(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
 
 	if (config_debug) {
 		/* arena_ptr_small_binind_get() does extra sanity checking. */
-		assert(arena_ptr_small_binind_get(ptr, arena_mapbits_get(chunk,
-		    pageind)) != BININD_INVALID);
+		assert(arena_ptr_small_binind_get(tsdn, ptr,
+		    arena_mapbits_get(chunk, pageind)) != BININD_INVALID);
 	}
 	bitselm = arena_bitselm_get_mutable(chunk, pageind);
 	arena_dalloc_bin(tsdn, arena, chunk, extent, ptr, pageind, bitselm);
@@ -2939,8 +2944,8 @@ arena_ralloc_large_grow(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
 			goto label_fail;
 
 		run = &arena_miscelm_get_mutable(chunk, pageind+npages)->run;
-		if (arena_run_split_large(arena, iealloc(run), run, splitsize,
-		    zero))
+		if (arena_run_split_large(arena, iealloc(tsdn, run), run,
+		    splitsize, zero))
 			goto label_fail;
 
 		if (config_cache_oblivious && zero) {
diff --git a/src/chunk.c b/src/chunk.c
index e35bb30a..4443368a 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -146,8 +146,9 @@ chunk_register(tsdn_t *tsdn, const void *chunk, const extent_t *extent)
 
 	assert(extent_addr_get(extent) == chunk);
 
-	if (rtree_write(&chunks_rtree, (uintptr_t)chunk, extent))
+	if (rtree_write(tsdn, &chunks_rtree, (uintptr_t)chunk, extent))
 		return (true);
+
 	if (config_prof && opt_prof) {
 		size_t size = extent_size_get(extent);
 		size_t nadd = (size == 0) ? 1 : size / chunksize;
@@ -168,10 +169,10 @@ chunk_register(tsdn_t *tsdn, const void *chunk, const extent_t *extent)
 }
 
 void
-chunk_deregister(const void *chunk, const extent_t *extent)
+chunk_deregister(tsdn_t *tsdn, const void *chunk, const extent_t *extent)
 {
 
-	rtree_clear(&chunks_rtree, (uintptr_t)chunk);
+	rtree_clear(tsdn, &chunks_rtree, (uintptr_t)chunk);
 	if (config_prof && opt_prof) {
 		size_t size = extent_size_get(extent);
 		size_t nsub = (size == 0) ? 1 : size / chunksize;
@@ -691,14 +692,6 @@ chunk_merge_default(void *chunk_a, size_t size_a, void *chunk_b, size_t size_b,
 	return (false);
 }
 
-static rtree_elm_t *
-chunks_rtree_node_alloc(size_t nelms)
-{
-
-	return ((rtree_elm_t *)base_alloc(tsdn_fetch(), nelms *
-	    sizeof(rtree_elm_t)));
-}
-
 bool
 chunk_boot(void)
 {
@@ -735,7 +728,7 @@ chunk_boot(void)
 	if (have_dss && chunk_dss_boot())
 		return (true);
 	if (rtree_new(&chunks_rtree, (unsigned)((ZU(1) << (LG_SIZEOF_PTR+3)) -
-	    opt_lg_chunk), chunks_rtree_node_alloc, NULL))
+	    opt_lg_chunk)))
 		return (true);
 
 	return (false);
diff --git a/src/ckh.c b/src/ckh.c
index 3135ee74..2c120ac8 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -283,12 +283,14 @@ ckh_grow(tsdn_t *tsdn, ckh_t *ckh)
 		ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
 
 		if (!ckh_rebuild(ckh, tab)) {
-			idalloctm(tsdn, iealloc(tab), tab, NULL, true, true);
+			idalloctm(tsdn, iealloc(tsdn, tab), tab, NULL, true,
+			    true);
 			break;
 		}
 
 		/* Rebuilding failed, so back out partially rebuilt table. */
-		idalloctm(tsdn, iealloc(ckh->tab), ckh->tab, NULL, true, true);
+		idalloctm(tsdn, iealloc(tsdn, ckh->tab), ckh->tab, NULL, true,
+		    true);
 		ckh->tab = tab;
 		ckh->lg_curbuckets = lg_prevbuckets;
 	}
@@ -330,7 +332,7 @@ ckh_shrink(tsdn_t *tsdn, ckh_t *ckh)
 	ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
 
 	if (!ckh_rebuild(ckh, tab)) {
-		idalloctm(tsdn, iealloc(tab), tab, NULL, true, true);
+		idalloctm(tsdn, iealloc(tsdn, tab), tab, NULL, true, true);
 #ifdef CKH_COUNT
 		ckh->nshrinks++;
 #endif
@@ -338,7 +340,7 @@ ckh_shrink(tsdn_t *tsdn, ckh_t *ckh)
 	}
 
 	/* Rebuilding failed, so back out partially rebuilt table. */
-	idalloctm(tsdn, iealloc(ckh->tab), ckh->tab, NULL, true, true);
+	idalloctm(tsdn, iealloc(tsdn, ckh->tab), ckh->tab, NULL, true, true);
 	ckh->tab = tab;
 	ckh->lg_curbuckets = lg_prevbuckets;
 #ifdef CKH_COUNT
@@ -421,7 +423,7 @@ ckh_delete(tsdn_t *tsdn, ckh_t *ckh)
 	    (unsigned long long)ckh->nrelocs);
 #endif
 
-	idalloctm(tsdn, iealloc(ckh->tab), ckh->tab, NULL, true, true);
+	idalloctm(tsdn, iealloc(tsdn, ckh->tab), ckh->tab, NULL, true, true);
 	if (config_debug)
 		memset(ckh, JEMALLOC_FREE_JUNK, sizeof(ckh_t));
 }
diff --git a/src/huge.c b/src/huge.c
index e42ea9c1..0b91c369 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -45,7 +45,8 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 		arena = arena_choose(tsdn_tsd(tsdn), arena);
 	if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(tsdn,
 	    arena, usize, alignment, &is_zeroed)) == NULL) {
-		idalloctm(tsdn, iealloc(extent), extent, NULL, true, true);
+		idalloctm(tsdn, iealloc(tsdn, extent), extent, NULL, true,
+		    true);
 		return (NULL);
 	}
 
@@ -53,7 +54,8 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 
 	if (chunk_register(tsdn, ret, extent)) {
 		arena_chunk_dalloc_huge(tsdn, arena, ret, usize);
-		idalloctm(tsdn, iealloc(extent), extent, NULL, true, true);
+		idalloctm(tsdn, iealloc(tsdn, extent), extent, NULL, true,
+		    true);
 		return (NULL);
 	}
 
@@ -194,7 +196,7 @@ huge_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, void *ptr,
 		post_zeroed = pre_zeroed;
 
 	/* Update the size of the huge allocation. */
-	chunk_deregister(ptr, extent);
+	chunk_deregister(tsdn, ptr, extent);
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	extent_size_set(extent, usize);
 	/* Update zeroed. */
@@ -231,7 +233,7 @@ huge_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, void *ptr,
 		return (true);
 
 	/* Update the size of the huge allocation. */
-	chunk_deregister(ptr, extent);
+	chunk_deregister(tsdn, ptr, extent);
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	extent_size_set(extent, usize);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
@@ -353,7 +355,7 @@ huge_dalloc(tsdn_t *tsdn, extent_t *extent, void *ptr)
 	arena_t *arena;
 
 	arena = extent_arena_get(extent);
-	chunk_deregister(ptr, extent);
+	chunk_deregister(tsdn, ptr, extent);
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	ql_remove(&arena->huge, extent, ql_link);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
@@ -362,7 +364,7 @@ huge_dalloc(tsdn_t *tsdn, extent_t *extent, void *ptr)
 	    extent_size_get(extent));
 	arena_chunk_dalloc_huge(tsdn, extent_arena_get(extent),
 	    extent_addr_get(extent), extent_size_get(extent));
-	idalloctm(tsdn, iealloc(extent), extent, NULL, true, true);
+	idalloctm(tsdn, iealloc(tsdn, extent), extent, NULL, true, true);
 
 	arena_decay_tick(tsdn, arena);
 }
@@ -387,7 +389,7 @@ huge_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, const void *ptr)
 	prof_tctx_t *tctx;
 	arena_t *arena;
 
-	assert(extent == iealloc(ptr));
+	assert(extent == iealloc(tsdn, ptr));
 
 	arena = extent_arena_get(extent);
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
@@ -403,7 +405,7 @@ huge_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr,
 {
 	arena_t *arena;
 
-	assert(extent == iealloc(ptr));
+	assert(extent == iealloc(tsdn, ptr));
 
 	arena = extent_arena_get(extent);
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 67a3b564..479d8319 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -325,7 +325,7 @@ void
 a0dalloc(void *ptr)
 {
 
-	a0idalloc(iealloc(ptr), ptr, true);
+	a0idalloc(iealloc(NULL, ptr), ptr, true);
 }
 
 /*
@@ -365,7 +365,7 @@ bootstrap_free(void *ptr)
 	if (unlikely(ptr == NULL))
 		return;
 
-	a0idalloc(iealloc(ptr), ptr, false);
+	a0idalloc(iealloc(NULL, ptr), ptr, false);
 }
 
 static void
@@ -1401,7 +1401,8 @@ ialloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind, bool zero,
 		p = ialloc(tsd, LARGE_MINCLASS, ind_large, zero, slow_path);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(tsd_tsdn(tsd), iealloc(p), p, usize);
+		arena_prof_promoted(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), p), p,
+		    usize);
 	} else
 		p = ialloc(tsd, usize, ind, zero, slow_path);
 
@@ -1423,7 +1424,7 @@ ialloc_prof(tsd_t *tsd, size_t usize, szind_t ind, bool zero, bool slow_path)
 		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
 	}
-	prof_malloc(tsd_tsdn(tsd), iealloc(p), p, usize, tctx);
+	prof_malloc(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), p), p, usize, tctx);
 
 	return (p);
 }
@@ -1482,7 +1483,8 @@ ialloc_post_check(void *ret, tsdn_t *tsdn, size_t usize, const char *func,
 			set_errno(ENOMEM);
 	}
 	if (config_stats && likely(ret != NULL)) {
-		assert(usize == isalloc(tsdn, iealloc(ret), ret, config_prof));
+		assert(usize == isalloc(tsdn, iealloc(tsdn, ret), ret,
+		    config_prof));
 		*tsd_thread_allocatedp_get(tsdn_tsd(tsdn)) += usize;
 	}
 	witness_assert_lockless(tsdn);
@@ -1525,7 +1527,8 @@ imemalign_prof_sample(tsd_t *tsd, size_t alignment, size_t usize,
 		p = ipalloc(tsd, LARGE_MINCLASS, alignment, false);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(tsd_tsdn(tsd), iealloc(p), p, usize);
+		arena_prof_promoted(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), p), p,
+		    usize);
 	} else
 		p = ipalloc(tsd, usize, alignment, false);
 
@@ -1547,7 +1550,7 @@ imemalign_prof(tsd_t *tsd, size_t alignment, size_t usize)
 		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
 	}
-	prof_malloc(tsd_tsdn(tsd), iealloc(p), p, usize, tctx);
+	prof_malloc(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), p), p, usize, tctx);
 
 	return (p);
 }
@@ -1604,8 +1607,8 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment)
 	ret = 0;
 label_return:
 	if (config_stats && likely(result != NULL)) {
-		assert(usize == isalloc(tsd_tsdn(tsd), iealloc(result), result,
-		    config_prof));
+		assert(usize == isalloc(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd),
+		    result), result, config_prof));
 		*tsd_thread_allocatedp_get(tsd) += usize;
 	}
 	UTRACE(0, size, result);
@@ -1696,7 +1699,8 @@ irealloc_prof_sample(tsd_t *tsd, extent_t *extent, void *old_ptr,
 		    false);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(tsd_tsdn(tsd), iealloc(p), p, usize);
+		arena_prof_promoted(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), p), p,
+		    usize);
 	} else
 		p = iralloc(tsd, extent, old_ptr, old_usize, usize, 0, false);
 
@@ -1724,7 +1728,7 @@ irealloc_prof(tsd_t *tsd, extent_t *extent, void *old_ptr, size_t old_usize,
 		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
 	}
-	e = (p == old_ptr) ? extent : iealloc(p);
+	e = (p == old_ptr) ? extent : iealloc(tsd_tsdn(tsd), p);
 	prof_realloc(tsd, e, p, usize, tctx, prof_active, true,
 	    old_ptr, old_usize, old_tctx);
 
@@ -1742,7 +1746,7 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
-	extent = iealloc(ptr);
+	extent = iealloc(tsd_tsdn(tsd), ptr);
 	if (config_prof && opt_prof) {
 		usize = isalloc(tsd_tsdn(tsd), extent, ptr, config_prof);
 		prof_free(tsd, extent, ptr, usize);
@@ -1810,9 +1814,8 @@ je_realloc(void *ptr, size_t size)
 
 		witness_assert_lockless(tsd_tsdn(tsd));
 
-		extent = iealloc(ptr);
+		extent = iealloc(tsd_tsdn(tsd), ptr);
 		old_usize = isalloc(tsd_tsdn(tsd), extent, ptr, config_prof);
-
 		if (config_prof && opt_prof) {
 			usize = s2u(size);
 			ret = unlikely(usize == 0 || usize > HUGE_MAXCLASS) ?
@@ -1845,7 +1848,8 @@ je_realloc(void *ptr, size_t size)
 	if (config_stats && likely(ret != NULL)) {
 		tsd_t *tsd;
 
-		assert(usize == isalloc(tsdn, iealloc(ret), ret, config_prof));
+		assert(usize == isalloc(tsdn, iealloc(tsdn, ret), ret,
+		    config_prof));
 		tsd = tsdn_tsd(tsdn);
 		*tsd_thread_allocatedp_get(tsd) += usize;
 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
@@ -1999,7 +2003,7 @@ imallocx_prof_sample(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
 		    tcache, arena, slow_path);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(tsdn, iealloc(p), p, usize);
+		arena_prof_promoted(tsdn, iealloc(tsdn, p), p, usize);
 	} else
 		p = imallocx_flags(tsdn, usize, alignment, zero, tcache, arena,
 		    slow_path);
@@ -2033,7 +2037,7 @@ imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize, bool slow_path)
 		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
 	}
-	prof_malloc(tsd_tsdn(tsd), iealloc(p), p, *usize, tctx);
+	prof_malloc(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), p), p, *usize, tctx);
 
 	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
 	return (p);
@@ -2134,7 +2138,7 @@ irallocx_prof_sample(tsdn_t *tsdn, extent_t *extent, void *old_ptr,
 		    alignment, zero, tcache, arena);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(tsdn, iealloc(p), p, usize);
+		arena_prof_promoted(tsdn, iealloc(tsdn, p), p, usize);
 	} else {
 		p = iralloct(tsdn, extent, old_ptr, old_usize, usize, alignment,
 		    zero, tcache, arena);
@@ -2180,7 +2184,7 @@ irallocx_prof(tsd_t *tsd, extent_t *extent, void *old_ptr, size_t old_usize,
 		e = extent;
 		*usize = isalloc(tsd_tsdn(tsd), e, p, config_prof);
 	} else
-		e = iealloc(p);
+		e = iealloc(tsd_tsdn(tsd), p);
 	prof_realloc(tsd, e, p, *usize, tctx, prof_active, true, old_ptr,
 	    old_usize, old_tctx);
 
@@ -2207,7 +2211,7 @@ je_rallocx(void *ptr, size_t size, int flags)
 	assert(malloc_initialized() || IS_INITIALIZER);
 	tsd = tsd_fetch();
 	witness_assert_lockless(tsd_tsdn(tsd));
-	extent = iealloc(ptr);
+	extent = iealloc(tsd_tsdn(tsd), ptr);
 
 	if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) {
 		unsigned arena_ind = MALLOCX_ARENA_GET(flags);
@@ -2241,8 +2245,8 @@ je_rallocx(void *ptr, size_t size, int flags)
 		if (unlikely(p == NULL))
 			goto label_oom;
 		if (config_stats) {
-			usize = isalloc(tsd_tsdn(tsd), iealloc(p), p,
-			    config_prof);
+			usize = isalloc(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd),
+			    p), p, config_prof);
 		}
 	}
 	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
@@ -2357,7 +2361,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
 	assert(malloc_initialized() || IS_INITIALIZER);
 	tsd = tsd_fetch();
 	witness_assert_lockless(tsd_tsdn(tsd));
-	extent = iealloc(ptr);
+	extent = iealloc(tsd_tsdn(tsd), ptr);
 
 	old_usize = isalloc(tsd_tsdn(tsd), extent, ptr, config_prof);
 
@@ -2412,7 +2416,7 @@ je_sallocx(const void *ptr, int flags)
 	if (config_ivsalloc)
 		usize = ivsalloc(tsdn, ptr, config_prof);
 	else
-		usize = isalloc(tsdn, iealloc(ptr), ptr, config_prof);
+		usize = isalloc(tsdn, iealloc(tsdn, ptr), ptr, config_prof);
 
 	witness_assert_lockless(tsdn);
 	return (usize);
@@ -2471,7 +2475,7 @@ je_sdallocx(void *ptr, size_t size, int flags)
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 	tsd = tsd_fetch();
-	extent = iealloc(ptr);
+	extent = iealloc(tsd_tsdn(tsd), ptr);
 	usize = inallocx(tsd_tsdn(tsd), size, flags);
 	assert(usize == isalloc(tsd_tsdn(tsd), extent, ptr, config_prof));
 
@@ -2591,7 +2595,7 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr)
 	if (config_ivsalloc)
 		ret = ivsalloc(tsdn, ptr, config_prof);
 	else {
-		ret = (ptr == NULL) ? 0 : isalloc(tsdn, iealloc(ptr), ptr,
+		ret = (ptr == NULL) ? 0 : isalloc(tsdn, iealloc(tsdn, ptr), ptr,
 		    config_prof);
 	}
 
diff --git a/src/prof.c b/src/prof.c
index 121dcd91..03979ca3 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -596,7 +596,8 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
 		prof_leave(tsd, tdata_self);
 		/* Destroy gctx. */
 		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
-		idalloctm(tsd_tsdn(tsd), iealloc(gctx), gctx, NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), gctx), gctx,
+		    NULL, true, true);
 	} else {
 		/*
 		 * Compensate for increment in prof_tctx_destroy() or
@@ -707,7 +708,8 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx)
 		prof_tdata_destroy(tsd_tsdn(tsd), tdata, false);
 
 	if (destroy_tctx)
-		idalloctm(tsd_tsdn(tsd), iealloc(tctx), tctx, NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), tctx), tctx,
+		    NULL, true, true);
 }
 
 static bool
@@ -736,8 +738,8 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
 		if (ckh_insert(tsd_tsdn(tsd), &bt2gctx, btkey.v, gctx.v)) {
 			/* OOM. */
 			prof_leave(tsd, tdata);
-			idalloctm(tsd_tsdn(tsd), iealloc(gctx.v), gctx.v, NULL,
-			    true, true);
+			idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), gctx.v),
+			    gctx.v, NULL, true, true);
 			return (true);
 		}
 		new_gctx = true;
@@ -817,8 +819,8 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
 		if (error) {
 			if (new_gctx)
 				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
-			idalloctm(tsd_tsdn(tsd), iealloc(ret.v), ret.v, NULL,
-			    true, true);
+			idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), ret.v),
+			    ret.v, NULL, true, true);
 			return (NULL);
 		}
 		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
@@ -1241,8 +1243,8 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs)
 					tctx_tree_remove(&gctx->tctxs,
 					    to_destroy);
 					idalloctm(tsd_tsdn(tsd),
-					    iealloc(to_destroy), to_destroy,
-					    NULL, true, true);
+					    iealloc(tsd_tsdn(tsd), to_destroy),
+					    to_destroy, NULL, true, true);
 				} else
 					next = NULL;
 			} while (next != NULL);
@@ -1818,7 +1820,7 @@ prof_tdata_init_impl(tsdn_t *tsdn, uint64_t thr_uid, uint64_t thr_discrim,
 
 	if (ckh_new(tsdn, &tdata->bt2tctx, PROF_CKH_MINITEMS,
 	    prof_bt_hash, prof_bt_keycomp)) {
-		idalloctm(tsdn, iealloc(tdata), tdata, NULL, true, true);
+		idalloctm(tsdn, iealloc(tsdn, tdata), tdata, NULL, true, true);
 		return (NULL);
 	}
 
@@ -1882,11 +1884,11 @@ prof_tdata_destroy_locked(tsdn_t *tsdn, prof_tdata_t *tdata,
 	assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
 
 	if (tdata->thread_name != NULL) {
-		idalloctm(tsdn, iealloc(tdata->thread_name), tdata->thread_name,
-		    NULL, true, true);
+		idalloctm(tsdn, iealloc(tsdn, tdata->thread_name),
+		    tdata->thread_name, NULL, true, true);
 	}
 	ckh_delete(tsdn, &tdata->bt2tctx);
-	idalloctm(tsdn, iealloc(tdata), tdata, NULL, true, true);
+	idalloctm(tsdn, iealloc(tsdn, tdata), tdata, NULL, true, true);
 }
 
 static void
@@ -2080,8 +2082,8 @@ prof_thread_name_set(tsd_t *tsd, const char *thread_name)
 		return (EAGAIN);
 
 	if (tdata->thread_name != NULL) {
-		idalloctm(tsd_tsdn(tsd), iealloc(tdata->thread_name),
-		    tdata->thread_name, NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd),
+		    tdata->thread_name), tdata->thread_name, NULL, true, true);
 		tdata->thread_name = NULL;
 	}
 	if (strlen(s) > 0)
diff --git a/src/rtree.c b/src/rtree.c
index 71c69c41..c6b64cf4 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -13,8 +13,7 @@ hmin(unsigned ha, unsigned hb)
  * used.
  */
 bool
-rtree_new(rtree_t *rtree, unsigned bits, rtree_node_alloc_t *alloc,
-    rtree_node_dalloc_t *dalloc)
+rtree_new(rtree_t *rtree, unsigned bits)
 {
 	unsigned bits_in_leaf, height, i;
 
@@ -32,8 +31,6 @@ rtree_new(rtree_t *rtree, unsigned bits, rtree_node_alloc_t *alloc,
 		height = 1;
 	assert((height-1) * RTREE_BITS_PER_LEVEL + bits_in_leaf == bits);
 
-	rtree->alloc = alloc;
-	rtree->dalloc = dalloc;
 	rtree->height = height;
 
 	/* Root level. */
@@ -64,8 +61,43 @@ rtree_new(rtree_t *rtree, unsigned bits, rtree_node_alloc_t *alloc,
 	return (false);
 }
 
+#ifdef JEMALLOC_JET
+#undef rtree_node_alloc
+#define	rtree_node_alloc JEMALLOC_N(rtree_node_alloc_impl)
+#endif
+static rtree_elm_t *
+rtree_node_alloc(tsdn_t *tsdn, rtree_t *rtree, size_t nelms)
+{
+
+	return ((rtree_elm_t *)base_alloc(tsdn, nelms * sizeof(rtree_elm_t)));
+}
+#ifdef JEMALLOC_JET
+#undef rtree_node_alloc
+#define	rtree_node_alloc JEMALLOC_N(rtree_node_alloc)
+rtree_node_alloc_t *rtree_node_alloc = JEMALLOC_N(rtree_node_alloc_impl);
+#endif
+
+#ifdef JEMALLOC_JET
+#undef rtree_node_dalloc
+#define	rtree_node_dalloc JEMALLOC_N(rtree_node_dalloc_impl)
+#endif
+UNUSED static void
+rtree_node_dalloc(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *node)
+{
+
+	/* Nodes are never deleted during normal operation. */
+	not_reached();
+}
+#ifdef JEMALLOC_JET
+#undef rtree_node_dalloc
+#define	rtree_node_dalloc JEMALLOC_N(rtree_node_dalloc)
+rtree_node_dalloc_t *rtree_node_dalloc = JEMALLOC_N(rtree_node_dalloc_impl);
+#endif
+
+#ifdef JEMALLOC_JET
 static void
-rtree_delete_subtree(rtree_t *rtree, rtree_elm_t *node, unsigned level)
+rtree_delete_subtree(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *node,
+    unsigned level)
 {
 
 	if (level + 1 < rtree->height) {
@@ -74,27 +106,31 @@ rtree_delete_subtree(rtree_t *rtree, rtree_elm_t *node, unsigned level)
 		nchildren = ZU(1) << rtree->levels[level].bits;
 		for (i = 0; i < nchildren; i++) {
 			rtree_elm_t *child = node[i].child;
-			if (child != NULL)
-				rtree_delete_subtree(rtree, child, level + 1);
+			if (child != NULL) {
+				rtree_delete_subtree(tsdn, rtree, child, level +
+				    1);
+			}
 		}
 	}
-	rtree->dalloc(node);
+	rtree_node_dalloc(tsdn, rtree, node);
 }
 
 void
-rtree_delete(rtree_t *rtree)
+rtree_delete(tsdn_t *tsdn, rtree_t *rtree)
 {
 	unsigned i;
 
 	for (i = 0; i < rtree->height; i++) {
 		rtree_elm_t *subtree = rtree->levels[i].subtree;
 		if (subtree != NULL)
-			rtree_delete_subtree(rtree, subtree, i);
+			rtree_delete_subtree(tsdn, rtree, subtree, i);
 	}
 }
+#endif
 
 static rtree_elm_t *
-rtree_node_init(rtree_t *rtree, unsigned level, rtree_elm_t **elmp)
+rtree_node_init(tsdn_t *tsdn, rtree_t *rtree, unsigned level,
+    rtree_elm_t **elmp)
 {
 	rtree_elm_t *node;
 
@@ -108,7 +144,8 @@ rtree_node_init(rtree_t *rtree, unsigned level, rtree_elm_t **elmp)
 			node = atomic_read_p((void **)elmp);
 		} while (node == RTREE_NODE_INITIALIZING);
 	} else {
-		node = rtree->alloc(ZU(1) << rtree->levels[level].bits);
+		node = rtree_node_alloc(tsdn, rtree, ZU(1) <<
+		    rtree->levels[level].bits);
 		if (node == NULL)
 			return (NULL);
 		atomic_write_p((void **)elmp, node);
@@ -118,15 +155,17 @@ rtree_node_init(rtree_t *rtree, unsigned level, rtree_elm_t **elmp)
 }
 
 rtree_elm_t *
-rtree_subtree_read_hard(rtree_t *rtree, unsigned level)
+rtree_subtree_read_hard(tsdn_t *tsdn, rtree_t *rtree, unsigned level)
 {
 
-	return (rtree_node_init(rtree, level, &rtree->levels[level].subtree));
+	return (rtree_node_init(tsdn, rtree, level,
+	    &rtree->levels[level].subtree));
 }
 
 rtree_elm_t *
-rtree_child_read_hard(rtree_t *rtree, rtree_elm_t *elm, unsigned level)
+rtree_child_read_hard(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *elm,
+    unsigned level)
 {
 
-	return (rtree_node_init(rtree, level, &elm->child));
+	return (rtree_node_init(tsdn, rtree, level, &elm->child));
 }
diff --git a/src/tcache.c b/src/tcache.c
index c02f0f0c..8bd8df01 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -27,7 +27,7 @@ size_t
 tcache_salloc(tsdn_t *tsdn, const void *ptr)
 {
 
-	return (arena_salloc(tsdn, iealloc(ptr), ptr, false));
+	return (arena_salloc(tsdn, iealloc(tsdn, ptr), ptr, false));
 }
 
 void
@@ -101,7 +101,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 	assert(arena != NULL);
 	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
 		/* Lock the arena bin associated with the first object. */
-		extent_t *extent = iealloc(*(tbin->avail - 1));
+		extent_t *extent = iealloc(tsd_tsdn(tsd), *(tbin->avail - 1));
 		arena_t *bin_arena = extent_arena_get(extent);
 		arena_bin_t *bin = &bin_arena->bins[binind];
 
@@ -125,7 +125,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 			ptr = *(tbin->avail - 1 - i);
 			assert(ptr != NULL);
 
-			extent = iealloc(ptr);
+			extent = iealloc(tsd_tsdn(tsd), ptr);
 			if (extent_arena_get(extent) == bin_arena) {
 				arena_chunk_t *chunk =
 				    (arena_chunk_t *)extent_addr_get(extent);
@@ -185,7 +185,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 	assert(arena != NULL);
 	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
 		/* Lock the arena associated with the first object. */
-		extent_t *extent = iealloc(*(tbin->avail - 1));
+		extent_t *extent = iealloc(tsd_tsdn(tsd), *(tbin->avail - 1));
 		arena_t *locked_arena = extent_arena_get(extent);
 		UNUSED bool idump;
 
@@ -211,7 +211,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 		for (i = 0; i < nflush; i++) {
 			ptr = *(tbin->avail - 1 - i);
 			assert(ptr != NULL);
-			extent = iealloc(ptr);
+			extent = iealloc(tsd_tsdn(tsd), ptr);
 			if (extent_arena_get(extent) == locked_arena) {
 				arena_chunk_t *chunk =
 				    (arena_chunk_t *)extent_addr_get(extent);
@@ -394,7 +394,8 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache)
 	    arena_prof_accum(tsd_tsdn(tsd), arena, tcache->prof_accumbytes))
 		prof_idump(tsd_tsdn(tsd));
 
-	idalloctm(tsd_tsdn(tsd), iealloc(tcache), tcache, NULL, true, true);
+	idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), tcache), tcache, NULL,
+	    true, true);
 }
 
 void
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 671e2c8a..9c992e11 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -1,10 +1,18 @@
 #include "test/jemalloc_test.h"
 
+rtree_node_alloc_t *rtree_node_alloc_orig;
+rtree_node_dalloc_t *rtree_node_dalloc_orig;
+
+rtree_t *test_rtree;
+
 static rtree_elm_t *
-node_alloc(size_t nelms)
+rtree_node_alloc_intercept(tsdn_t *tsdn, rtree_t *rtree, size_t nelms)
 {
 	rtree_elm_t *node;
 
+	if (rtree != test_rtree)
+		return rtree_node_alloc_orig(tsdn, rtree, nelms);
+
 	node = (rtree_elm_t *)calloc(nelms, sizeof(rtree_elm_t));
 	assert_ptr_not_null(node, "Unexpected calloc() failure");
 
@@ -12,23 +20,33 @@ node_alloc(size_t nelms)
 }
 
 static void
-node_dalloc(rtree_elm_t *node)
+rtree_node_dalloc_intercept(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *node)
 {
 
+	if (rtree != test_rtree) {
+		rtree_node_dalloc_orig(tsdn, rtree, node);
+		return;
+	}
+
 	free(node);
 }
 
 TEST_BEGIN(test_rtree_read_empty)
 {
+	tsdn_t *tsdn;
 	unsigned i;
 
+	tsdn = tsdn_fetch();
+
 	for (i = 1; i <= (sizeof(uintptr_t) << 3); i++) {
 		rtree_t rtree;
-		assert_false(rtree_new(&rtree, i, node_alloc, node_dalloc),
+		test_rtree = &rtree;
+		assert_false(rtree_new(&rtree, i),
 		    "Unexpected rtree_new() failure");
-		assert_ptr_null(rtree_read(&rtree, 0, false),
+		assert_ptr_null(rtree_read(tsdn, &rtree, 0, false),
 		    "rtree_read() should return NULL for empty tree");
-		rtree_delete(&rtree);
+		rtree_delete(tsdn, &rtree);
+		test_rtree = NULL;
 	}
 }
 TEST_END
@@ -50,30 +68,34 @@ thd_start(void *varg)
 	thd_start_arg_t *arg = (thd_start_arg_t *)varg;
 	sfmt_t	*sfmt;
 	extent_t *extent;
+	tsdn_t *tsdn;
 	unsigned i;
 
 	sfmt = init_gen_rand(arg->seed);
 	extent = (extent_t *)malloc(sizeof(extent));
 	assert_ptr_not_null(extent, "Unexpected malloc() failure");
+	tsdn = tsdn_fetch();
 
 	for (i = 0; i < NITERS; i++) {
 		uintptr_t key = (uintptr_t)gen_rand64(sfmt);
 		if (i % 2 == 0) {
 			rtree_elm_t *elm;
 
-			elm = rtree_elm_acquire(&arg->rtree, key, false, true);
+			elm = rtree_elm_acquire(tsdn, &arg->rtree, key, false,
+			    true);
 			assert_ptr_not_null(elm,
 			    "Unexpected rtree_elm_acquire() failure");
 			rtree_elm_write_acquired(elm, extent);
 			rtree_elm_release(elm);
 
-			elm = rtree_elm_acquire(&arg->rtree, key, true, false);
+			elm = rtree_elm_acquire(tsdn, &arg->rtree, key, true,
+			    false);
 			assert_ptr_not_null(elm,
 			    "Unexpected rtree_elm_acquire() failure");
 			rtree_elm_read_acquired(elm);
 			rtree_elm_release(elm);
 		} else
-			rtree_read(&arg->rtree, key, false);
+			rtree_read(tsdn, &arg->rtree, key, false);
 	}
 
 	free(extent);
@@ -86,19 +108,23 @@ TEST_BEGIN(test_rtree_concurrent)
 	thd_start_arg_t arg;
 	thd_t thds[NTHREADS];
 	sfmt_t *sfmt;
+	tsdn_t *tsdn;
 	unsigned i, j;
 
 	sfmt = init_gen_rand(SEED);
+	tsdn = tsdn_fetch();
 	for (i = 1; i < MAX_NBITS; i++) {
 		arg.nbits = i;
-		assert_false(rtree_new(&arg.rtree, arg.nbits, node_alloc,
-		    node_dalloc), "Unexpected rtree_new() failure");
+		test_rtree = &arg.rtree;
+		assert_false(rtree_new(&arg.rtree, arg.nbits),
+		    "Unexpected rtree_new() failure");
 		arg.seed = gen_rand32(sfmt);
 		for (j = 0; j < NTHREADS; j++)
 			thd_create(&thds[j], thd_start, (void *)&arg);
 		for (j = 0; j < NTHREADS; j++)
 			thd_join(thds[j], NULL);
-		rtree_delete(&arg.rtree);
+		rtree_delete(tsdn, &arg.rtree);
+		test_rtree = NULL;
 	}
 	fini_gen_rand(sfmt);
 }
@@ -113,60 +139,70 @@ TEST_BEGIN(test_rtree_extrema)
 {
 	unsigned i;
 	extent_t extent_a, extent_b;
+	tsdn_t *tsdn;
+
+	tsdn = tsdn_fetch();
 
 	for (i = 1; i <= (sizeof(uintptr_t) << 3); i++) {
 		rtree_t rtree;
-		assert_false(rtree_new(&rtree, i, node_alloc, node_dalloc),
+		test_rtree = &rtree;
+		assert_false(rtree_new(&rtree, i),
 		    "Unexpected rtree_new() failure");
 
-		assert_false(rtree_write(&rtree, 0, &extent_a),
+		assert_false(rtree_write(tsdn, &rtree, 0, &extent_a),
 		    "Unexpected rtree_write() failure, i=%u", i);
-		assert_ptr_eq(rtree_read(&rtree, 0, true), &extent_a,
+		assert_ptr_eq(rtree_read(tsdn, &rtree, 0, true), &extent_a,
 		    "rtree_read() should return previously set value, i=%u", i);
 
-		assert_false(rtree_write(&rtree, ~((uintptr_t)0), &extent_b),
-		    "Unexpected rtree_write() failure, i=%u", i);
-		assert_ptr_eq(rtree_read(&rtree, ~((uintptr_t)0), true),
+		assert_false(rtree_write(tsdn, &rtree, ~((uintptr_t)0),
+		    &extent_b), "Unexpected rtree_write() failure, i=%u", i);
+		assert_ptr_eq(rtree_read(tsdn, &rtree, ~((uintptr_t)0), true),
 		    &extent_b,
 		    "rtree_read() should return previously set value, i=%u", i);
 
-		rtree_delete(&rtree);
+		rtree_delete(tsdn, &rtree);
+		test_rtree = NULL;
 	}
 }
 TEST_END
 
 TEST_BEGIN(test_rtree_bits)
 {
+	tsdn_t *tsdn;
 	unsigned i, j, k;
 
+	tsdn = tsdn_fetch();
+
 	for (i = 1; i < (sizeof(uintptr_t) << 3); i++) {
 		uintptr_t keys[] = {0, 1,
 		    (((uintptr_t)1) << (sizeof(uintptr_t)*8-i)) - 1};
 		extent_t extent;
 		rtree_t rtree;
 
-		assert_false(rtree_new(&rtree, i, node_alloc, node_dalloc),
+		test_rtree = &rtree;
+		assert_false(rtree_new(&rtree, i),
 		    "Unexpected rtree_new() failure");
 
 		for (j = 0; j < sizeof(keys)/sizeof(uintptr_t); j++) {
-			assert_false(rtree_write(&rtree, keys[j], &extent),
-			    "Unexpected rtree_write() failure");
+			assert_false(rtree_write(tsdn, &rtree, keys[j],
+			    &extent), "Unexpected rtree_write() failure");
 			for (k = 0; k < sizeof(keys)/sizeof(uintptr_t); k++) {
-				assert_ptr_eq(rtree_read(&rtree, keys[k], true),
-				    &extent, "rtree_read() should return "
-				    "previously set value and ignore "
+				assert_ptr_eq(rtree_read(tsdn, &rtree, keys[k],
+				    true), &extent, "rtree_read() should "
+				    "return previously set value and ignore "
 				    "insignificant key bits; i=%u, j=%u, k=%u, "
 				    "set key=%#"FMTxPTR", get key=%#"FMTxPTR, i,
 				    j, k, keys[j], keys[k]);
 			}
-			assert_ptr_null(rtree_read(&rtree,
+			assert_ptr_null(rtree_read(tsdn, &rtree,
 			    (((uintptr_t)1) << (sizeof(uintptr_t)*8-i)), false),
 			    "Only leftmost rtree leaf should be set; "
 			    "i=%u, j=%u", i, j);
-			rtree_clear(&rtree, keys[j]);
+			rtree_clear(tsdn, &rtree, keys[j]);
 		}
 
-		rtree_delete(&rtree);
+		rtree_delete(tsdn, &rtree);
+		test_rtree = NULL;
 	}
 }
 TEST_END
@@ -175,10 +211,12 @@ TEST_BEGIN(test_rtree_random)
 {
 	unsigned i;
 	sfmt_t *sfmt;
+	tsdn_t *tsdn;
 #define	NSET 16
 #define	SEED 42
 
 	sfmt = init_gen_rand(SEED);
+	tsdn = tsdn_fetch();
 	for (i = 1; i <= (sizeof(uintptr_t) << 3); i++) {
 		uintptr_t keys[NSET];
 		extent_t extent;
@@ -186,37 +224,40 @@ TEST_BEGIN(test_rtree_random)
 		rtree_t rtree;
 		rtree_elm_t *elm;
 
-		assert_false(rtree_new(&rtree, i, node_alloc, node_dalloc),
+		test_rtree = &rtree;
+		assert_false(rtree_new(&rtree, i),
 		    "Unexpected rtree_new() failure");
 
 		for (j = 0; j < NSET; j++) {
 			keys[j] = (uintptr_t)gen_rand64(sfmt);
-			elm = rtree_elm_acquire(&rtree, keys[j], false, true);
+			elm = rtree_elm_acquire(tsdn, &rtree, keys[j], false,
+			    true);
 			assert_ptr_not_null(elm,
 			    "Unexpected rtree_elm_acquire() failure");
 			rtree_elm_write_acquired(elm, &extent);
 			rtree_elm_release(elm);
-			assert_ptr_eq(rtree_read(&rtree, keys[j], true),
+			assert_ptr_eq(rtree_read(tsdn, &rtree, keys[j], true),
 			    &extent,
 			    "rtree_read() should return previously set value");
 		}
 		for (j = 0; j < NSET; j++) {
-			assert_ptr_eq(rtree_read(&rtree, keys[j], true),
+			assert_ptr_eq(rtree_read(tsdn, &rtree, keys[j], true),
 			    &extent, "rtree_read() should return previously "
 			    "set value, j=%u", j);
 		}
 
 		for (j = 0; j < NSET; j++) {
-			rtree_clear(&rtree, keys[j]);
-			assert_ptr_null(rtree_read(&rtree, keys[j], true),
+			rtree_clear(tsdn, &rtree, keys[j]);
+			assert_ptr_null(rtree_read(tsdn, &rtree, keys[j], true),
 			    "rtree_read() should return previously set value");
 		}
 		for (j = 0; j < NSET; j++) {
-			assert_ptr_null(rtree_read(&rtree, keys[j], true),
+			assert_ptr_null(rtree_read(tsdn, &rtree, keys[j], true),
 			    "rtree_read() should return previously set value");
 		}
 
-		rtree_delete(&rtree);
+		rtree_delete(tsdn, &rtree);
+		test_rtree = NULL;
 	}
 	fini_gen_rand(sfmt);
 #undef NSET
@@ -228,6 +269,12 @@ int
 main(void)
 {
 
+	rtree_node_alloc_orig = rtree_node_alloc;
+	rtree_node_alloc = rtree_node_alloc_intercept;
+	rtree_node_dalloc_orig = rtree_node_dalloc;
+	rtree_node_dalloc = rtree_node_dalloc_intercept;
+	test_rtree = NULL;
+
 	return (test(
 	    test_rtree_read_empty,
 	    test_rtree_concurrent,

From e75e9be130910a7344f553e5e6c664047a0d0464 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 17 Apr 2016 12:55:10 -0700
Subject: [PATCH 0262/2608] Add rtree element witnesses.

---
 include/jemalloc/internal/mutex.h             |  11 +-
 include/jemalloc/internal/private_symbols.txt |   4 +
 include/jemalloc/internal/rtree.h             |  76 +++++++++--
 include/jemalloc/internal/tsd.h               |   2 +
 include/jemalloc/internal/witness.h           |  16 ++-
 src/mutex.c                                   |   2 +-
 src/rtree.c                                   | 123 ++++++++++++++++++
 src/witness.c                                 |   3 +-
 test/unit/rtree.c                             |  12 +-
 test/unit/witness.c                           |  32 +++--
 10 files changed, 241 insertions(+), 40 deletions(-)

diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index 52217991..b4e01ff8 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -6,21 +6,24 @@ typedef struct malloc_mutex_s malloc_mutex_t;
 #ifdef _WIN32
 #  define MALLOC_MUTEX_INITIALIZER
 #elif (defined(JEMALLOC_OSSPIN))
-#  define MALLOC_MUTEX_INITIALIZER {0, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)}
+#  define MALLOC_MUTEX_INITIALIZER					\
+     {0, WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
 #  define MALLOC_MUTEX_INITIALIZER					\
-    {PTHREAD_MUTEX_INITIALIZER, NULL, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)}
+    {PTHREAD_MUTEX_INITIALIZER, NULL,					\
+     WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
 #else
 #  if (defined(JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP) &&		\
        defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP))
 #    define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_ADAPTIVE_NP
 #    define MALLOC_MUTEX_INITIALIZER					\
        {PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP,				\
-        WITNESS_INITIALIZER(WITNESS_RANK_OMIT)}
+        WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
 #  else
 #    define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT
 #    define MALLOC_MUTEX_INITIALIZER					\
-       {PTHREAD_MUTEX_INITIALIZER, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)}
+       {PTHREAD_MUTEX_INITIALIZER,					\
+        WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
 #  endif
 #endif
 
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 42c730c6..102f01c0 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -468,6 +468,10 @@ rtree_elm_lookup
 rtree_elm_read
 rtree_elm_read_acquired
 rtree_elm_release
+rtree_elm_witness_access
+rtree_elm_witness_acquire
+rtree_elm_witness_release
+rtree_elm_witnesses_cleanup
 rtree_elm_write
 rtree_elm_write_acquired
 rtree_read
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index dbea434c..e62ab6b9 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -7,6 +7,8 @@
 #ifdef JEMALLOC_H_TYPES
 
 typedef struct rtree_elm_s rtree_elm_t;
+typedef struct rtree_elm_witness_s rtree_elm_witness_t;
+typedef struct rtree_elm_witness_tsd_s rtree_elm_witness_tsd_t;
 typedef struct rtree_level_s rtree_level_t;
 typedef struct rtree_s rtree_t;
 
@@ -23,6 +25,29 @@ typedef struct rtree_s rtree_t;
 /* Used for two-stage lock-free node initialization. */
 #define	RTREE_NODE_INITIALIZING	((rtree_elm_t *)0x1)
 
+/*
+ * Maximum number of concurrently acquired elements per thread.  This controls
+ * how many witness_t structures are embedded in tsd.  Ideally rtree_elm_t would
+ * have a witness_t directly embedded, but that would dramatically bloat the
+ * tree.  This must contain enough entries to e.g. coalesce two extents.
+ */
+#define	RTREE_ELM_ACQUIRE_MAX	4
+
+/* Initializers for rtree_elm_witness_tsd_t. */
+#define	RTREE_ELM_WITNESS_INITIALIZER {					\
+	NULL,								\
+	WITNESS_INITIALIZER("rtree_elm", WITNESS_RANK_RTREE_ELM)	\
+}
+
+#define	RTREE_ELM_WITNESS_TSD_INITIALIZER {				\
+	{								\
+		RTREE_ELM_WITNESS_INITIALIZER,				\
+		RTREE_ELM_WITNESS_INITIALIZER,				\
+		RTREE_ELM_WITNESS_INITIALIZER,				\
+		RTREE_ELM_WITNESS_INITIALIZER				\
+	}								\
+}
+
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
@@ -35,6 +60,15 @@ struct rtree_elm_s {
 	};
 };
 
+struct rtree_elm_witness_s {
+	const rtree_elm_t	*elm;
+	witness_t		witness;
+};
+
+struct rtree_elm_witness_tsd_s {
+	rtree_elm_witness_t	witnesses[RTREE_ELM_ACQUIRE_MAX];
+};
+
 struct rtree_level_s {
 	/*
 	 * A non-NULL subtree points to a subtree rooted along the hypothetical
@@ -97,6 +131,13 @@ rtree_elm_t	*rtree_subtree_read_hard(tsdn_t *tsdn, rtree_t *rtree,
     unsigned level);
 rtree_elm_t	*rtree_child_read_hard(tsdn_t *tsdn, rtree_t *rtree,
     rtree_elm_t *elm, unsigned level);
+void	rtree_elm_witness_acquire(tsdn_t *tsdn, const rtree_t *rtree,
+    uintptr_t key, const rtree_elm_t *elm);
+void	rtree_elm_witness_access(tsdn_t *tsdn, const rtree_t *rtree,
+    const rtree_elm_t *elm);
+void	rtree_elm_witness_release(tsdn_t *tsdn, const rtree_t *rtree,
+    const rtree_elm_t *elm);
+void	rtree_elm_witnesses_cleanup(tsd_t *tsd);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
@@ -125,9 +166,11 @@ extent_t	*rtree_read(tsdn_t *tsdn, rtree_t *rtree, uintptr_t key,
     bool dependent);
 rtree_elm_t	*rtree_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, uintptr_t key,
     bool dependent, bool init_missing);
-extent_t	*rtree_elm_read_acquired(rtree_elm_t *elm);
-void	rtree_elm_write_acquired(rtree_elm_t *elm, const extent_t *extent);
-void	rtree_elm_release(rtree_elm_t *elm);
+extent_t	*rtree_elm_read_acquired(tsdn_t *tsdn, const rtree_t *rtree,
+    rtree_elm_t *elm);
+void	rtree_elm_write_acquired(tsdn_t *tsdn, const rtree_t *rtree,
+    rtree_elm_t *elm, const extent_t *extent);
+void	rtree_elm_release(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm);
 void	rtree_clear(tsdn_t *tsdn, rtree_t *rtree, uintptr_t key);
 #endif
 
@@ -393,11 +436,14 @@ rtree_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, uintptr_t key, bool dependent,
 		} while (atomic_cas_p(&elm->pun, (void *)extent, s));
 	}
 
+	if (config_debug)
+		rtree_elm_witness_acquire(tsdn, rtree, key, elm);
+
 	return (elm);
 }
 
 JEMALLOC_INLINE extent_t *
-rtree_elm_read_acquired(rtree_elm_t *elm)
+rtree_elm_read_acquired(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm)
 {
 	extent_t *extent;
 
@@ -405,24 +451,34 @@ rtree_elm_read_acquired(rtree_elm_t *elm)
 	extent = (extent_t *)((uintptr_t)elm->pun & ~((uintptr_t)0x1));
 	assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0);
 
+	if (config_debug)
+		rtree_elm_witness_access(tsdn, rtree, elm);
+
 	return (extent);
 }
 
 JEMALLOC_INLINE void
-rtree_elm_write_acquired(rtree_elm_t *elm, const extent_t *extent)
+rtree_elm_write_acquired(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm,
+    const extent_t *extent)
 {
 
 	assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0);
 	assert(((uintptr_t)elm->pun & (uintptr_t)0x1) == (uintptr_t)0x1);
+
+	if (config_debug)
+		rtree_elm_witness_access(tsdn, rtree, elm);
+
 	elm->pun = (void *)((uintptr_t)extent | (uintptr_t)0x1);
-	assert(rtree_elm_read_acquired(elm) == extent);
+	assert(rtree_elm_read_acquired(tsdn, rtree, elm) == extent);
 }
 
 JEMALLOC_INLINE void
-rtree_elm_release(rtree_elm_t *elm)
+rtree_elm_release(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm)
 {
 
-	rtree_elm_write(elm, rtree_elm_read_acquired(elm));
+	rtree_elm_write(elm, rtree_elm_read_acquired(tsdn, rtree, elm));
+	if (config_debug)
+		rtree_elm_witness_release(tsdn, rtree, elm);
 }
 
 JEMALLOC_INLINE void
@@ -431,8 +487,8 @@ rtree_clear(tsdn_t *tsdn, rtree_t *rtree, uintptr_t key)
 	rtree_elm_t *elm;
 
 	elm = rtree_elm_acquire(tsdn, rtree, key, true, false);
-	rtree_elm_write_acquired(elm, NULL);
-	rtree_elm_release(elm);
+	rtree_elm_write_acquired(tsdn, rtree, elm, NULL);
+	rtree_elm_release(tsdn, rtree, elm);
 }
 #endif
 
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index f4ff8d76..ca8915ea 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -573,6 +573,7 @@ struct tsd_init_head_s {
     O(arenas_tdata_bypass,	bool)					\
     O(tcache_enabled,		tcache_enabled_t)			\
     O(witnesses,		witness_list_t)				\
+    O(rtree_elm_witnesses,	rtree_elm_witness_tsd_t)		\
     O(witness_fork,		bool)					\
 
 #define	TSD_INITIALIZER {						\
@@ -588,6 +589,7 @@ struct tsd_init_head_s {
     false,								\
     tcache_enabled_default,						\
     ql_head_initializer(witnesses),					\
+    RTREE_ELM_WITNESS_TSD_INITIALIZER,					\
     false								\
 }
 
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index c68c9694..f15665bc 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -4,7 +4,8 @@
 typedef struct witness_s witness_t;
 typedef unsigned witness_rank_t;
 typedef ql_head(witness_t) witness_list_t;
-typedef int witness_comp_t (const witness_t *, const witness_t *);
+typedef int witness_comp_t (const witness_t *, void *, const witness_t *,
+    void *);
 
 /*
  * Lock ranks.  Witnesses with rank WITNESS_RANK_OMIT are completely ignored by
@@ -26,7 +27,8 @@ typedef int witness_comp_t (const witness_t *, const witness_t *);
 #define	WITNESS_RANK_ARENA_CHUNKS	9U
 #define	WITNESS_RANK_ARENA_EXTENT_CACHE	10
 
-#define	WITNESS_RANK_BASE		11U
+#define	WITNESS_RANK_RTREE_ELM		11U
+#define	WITNESS_RANK_BASE		12U
 
 #define	WITNESS_RANK_LEAF		0xffffffffU
 #define	WITNESS_RANK_ARENA_BIN		WITNESS_RANK_LEAF
@@ -38,7 +40,7 @@ typedef int witness_comp_t (const witness_t *, const witness_t *);
 #define	WITNESS_RANK_PROF_NEXT_THR_UID	WITNESS_RANK_LEAF
 #define	WITNESS_RANK_PROF_THREAD_ACTIVE_INIT	WITNESS_RANK_LEAF
 
-#define	WITNESS_INITIALIZER(rank) {"initializer", rank, NULL, {NULL, NULL}}
+#define	WITNESS_INITIALIZER(name, rank) {name, rank, NULL, NULL, {NULL, NULL}}
 
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
@@ -61,6 +63,9 @@ struct witness_s {
 	 */
 	witness_comp_t		*comp;
 
+	/* Opaque data, passed to comp(). */
+	void			*opaque;
+
 	/* Linkage for thread's currently owned locks. */
 	ql_elm(witness_t)	link;
 };
@@ -70,7 +75,7 @@ struct witness_s {
 #ifdef JEMALLOC_H_EXTERNS
 
 void	witness_init(witness_t *witness, const char *name, witness_rank_t rank,
-    witness_comp_t *comp);
+    witness_comp_t *comp, void *opaque);
 #ifdef JEMALLOC_JET
 typedef void (witness_lock_error_t)(const witness_list_t *, const witness_t *);
 extern witness_lock_error_t *witness_lock_error;
@@ -211,7 +216,8 @@ witness_lock(tsdn_t *tsdn, witness_t *witness)
 		/* Not forking, rank order reversal. */
 		witness_lock_error(witnesses, witness);
 	} else if (w->rank == witness->rank && (w->comp == NULL || w->comp !=
-	    witness->comp || w->comp(w, witness) > 0)) {
+	    witness->comp || w->comp(w, w->opaque, witness, witness->opaque) >
+	    0)) {
 		/*
 		 * Missing/incompatible comparison function, or comparison
 		 * function indicates rank order reversal.
diff --git a/src/mutex.c b/src/mutex.c
index a1fac342..119b8e35 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -104,7 +104,7 @@ malloc_mutex_init(malloc_mutex_t *mutex, const char *name, witness_rank_t rank)
 	pthread_mutexattr_destroy(&attr);
 #endif
 	if (config_debug)
-		witness_init(&mutex->witness, name, rank, NULL);
+		witness_init(&mutex->witness, name, rank, NULL, NULL);
 	return (false);
 }
 
diff --git a/src/rtree.c b/src/rtree.c
index c6b64cf4..504f9f2e 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -169,3 +169,126 @@ rtree_child_read_hard(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *elm,
 
 	return (rtree_node_init(tsdn, rtree, level, &elm->child));
 }
+
+static int
+rtree_elm_witness_comp(const witness_t *a, void *oa, const witness_t *b,
+    void *ob)
+{
+	uintptr_t ka = (uintptr_t)oa;
+	uintptr_t kb = (uintptr_t)ob;
+
+	assert(ka != 0);
+	assert(kb != 0);
+
+	return ((ka > kb) - (ka < kb));
+}
+
+static witness_t *
+rtree_elm_witness_alloc(tsd_t *tsd, uintptr_t key, const rtree_elm_t *elm)
+{
+	witness_t *witness;
+	size_t i;
+	rtree_elm_witness_tsd_t *witnesses = tsd_rtree_elm_witnessesp_get(tsd);
+
+	/* Iterate over entire array to detect double allocation attempts. */
+	witness = NULL;
+	for (i = 0; i < sizeof(rtree_elm_witness_tsd_t) / sizeof(witness_t);
+	    i++) {
+		rtree_elm_witness_t *rew = &witnesses->witnesses[i];
+
+		assert(rew->elm != elm);
+		if (rew->elm == NULL && witness == NULL) {
+			rew->elm = elm;
+			witness = &rew->witness;
+			witness_init(witness, "rtree_elm",
+			    WITNESS_RANK_RTREE_ELM, rtree_elm_witness_comp,
+			    (void *)key);
+		}
+	}
+	assert(witness != NULL);
+	return (witness);
+}
+
+static witness_t *
+rtree_elm_witness_find(tsd_t *tsd, const rtree_elm_t *elm)
+{
+	size_t i;
+	rtree_elm_witness_tsd_t *witnesses = tsd_rtree_elm_witnessesp_get(tsd);
+
+	for (i = 0; i < sizeof(rtree_elm_witness_tsd_t) / sizeof(witness_t);
+	    i++) {
+		rtree_elm_witness_t *rew = &witnesses->witnesses[i];
+
+		if (rew->elm == elm)
+			return (&rew->witness);
+	}
+	not_reached();
+}
+
+static void
+rtree_elm_witness_dalloc(tsd_t *tsd, witness_t *witness, const rtree_elm_t *elm)
+{
+	size_t i;
+	rtree_elm_witness_tsd_t *witnesses = tsd_rtree_elm_witnessesp_get(tsd);
+
+	for (i = 0; i < sizeof(rtree_elm_witness_tsd_t) / sizeof(witness_t);
+	    i++) {
+		rtree_elm_witness_t *rew = &witnesses->witnesses[i];
+
+		if (rew->elm == elm) {
+			rew->elm = NULL;
+			witness_init(&rew->witness, "rtree_elm",
+			    WITNESS_RANK_RTREE_ELM, rtree_elm_witness_comp,
+			    NULL);
+			    return;
+		}
+	}
+	not_reached();
+}
+
+void
+rtree_elm_witness_acquire(tsdn_t *tsdn, const rtree_t *rtree, uintptr_t key,
+    const rtree_elm_t *elm)
+{
+	witness_t *witness;
+
+	if (tsdn_null(tsdn))
+		return;
+
+	witness = rtree_elm_witness_alloc(tsdn_tsd(tsdn), key, elm);
+	witness_lock(tsdn, witness);
+}
+
+void
+rtree_elm_witness_access(tsdn_t *tsdn, const rtree_t *rtree,
+    const rtree_elm_t *elm)
+{
+	witness_t *witness;
+
+	if (tsdn_null(tsdn))
+		return;
+
+	witness = rtree_elm_witness_find(tsdn_tsd(tsdn), elm);
+	witness_assert_owner(tsdn, witness);
+}
+
+void
+rtree_elm_witness_release(tsdn_t *tsdn, const rtree_t *rtree,
+    const rtree_elm_t *elm)
+{
+	witness_t *witness;
+
+	if (tsdn_null(tsdn))
+		return;
+
+	witness = rtree_elm_witness_find(tsdn_tsd(tsdn), elm);
+	witness_unlock(tsdn, witness);
+	rtree_elm_witness_dalloc(tsdn_tsd(tsdn), witness, elm);
+}
+
+void
+rtree_elm_witnesses_cleanup(tsd_t *tsd)
+{
+
+	/* Do nothing. */
+}
diff --git a/src/witness.c b/src/witness.c
index 23753f24..8efff56d 100644
--- a/src/witness.c
+++ b/src/witness.c
@@ -3,12 +3,13 @@
 
 void
 witness_init(witness_t *witness, const char *name, witness_rank_t rank,
-    witness_comp_t *comp)
+    witness_comp_t *comp, void *opaque)
 {
 
 	witness->name = name;
 	witness->rank = rank;
 	witness->comp = comp;
+	witness->opaque = opaque;
 }
 
 #ifdef JEMALLOC_JET
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 9c992e11..786cc351 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -85,15 +85,15 @@ thd_start(void *varg)
 			    true);
 			assert_ptr_not_null(elm,
 			    "Unexpected rtree_elm_acquire() failure");
-			rtree_elm_write_acquired(elm, extent);
-			rtree_elm_release(elm);
+			rtree_elm_write_acquired(tsdn, &arg->rtree, elm, extent);
+			rtree_elm_release(tsdn, &arg->rtree, elm);
 
 			elm = rtree_elm_acquire(tsdn, &arg->rtree, key, true,
 			    false);
 			assert_ptr_not_null(elm,
 			    "Unexpected rtree_elm_acquire() failure");
-			rtree_elm_read_acquired(elm);
-			rtree_elm_release(elm);
+			rtree_elm_read_acquired(tsdn, &arg->rtree, elm);
+			rtree_elm_release(tsdn, &arg->rtree, elm);
 		} else
 			rtree_read(tsdn, &arg->rtree, key, false);
 	}
@@ -234,8 +234,8 @@ TEST_BEGIN(test_rtree_random)
 			    true);
 			assert_ptr_not_null(elm,
 			    "Unexpected rtree_elm_acquire() failure");
-			rtree_elm_write_acquired(elm, &extent);
-			rtree_elm_release(elm);
+			rtree_elm_write_acquired(tsdn, &rtree, elm, &extent);
+			rtree_elm_release(tsdn, &rtree, elm);
 			assert_ptr_eq(rtree_read(tsdn, &rtree, keys[j], true),
 			    &extent,
 			    "rtree_read() should return previously set value");
diff --git a/test/unit/witness.c b/test/unit/witness.c
index ed172753..2b012034 100644
--- a/test/unit/witness.c
+++ b/test/unit/witness.c
@@ -40,20 +40,26 @@ witness_lockless_error_intercept(const witness_list_t *witnesses)
 }
 
 static int
-witness_comp(const witness_t *a, const witness_t *b)
+witness_comp(const witness_t *a, void *oa, const witness_t *b, void *ob)
 {
 
 	assert_u_eq(a->rank, b->rank, "Witnesses should have equal rank");
 
+	assert(oa == (void *)a);
+	assert(ob == (void *)b);
+
 	return (strcmp(a->name, b->name));
 }
 
 static int
-witness_comp_reverse(const witness_t *a, const witness_t *b)
+witness_comp_reverse(const witness_t *a, void *oa, const witness_t *b, void *ob)
 {
 
 	assert_u_eq(a->rank, b->rank, "Witnesses should have equal rank");
 
+	assert(oa == (void *)a);
+	assert(ob == (void *)b);
+
 	return (-strcmp(a->name, b->name));
 }
 
@@ -68,12 +74,12 @@ TEST_BEGIN(test_witness)
 
 	witness_assert_lockless(tsdn);
 
-	witness_init(&a, "a", 1, NULL);
+	witness_init(&a, "a", 1, NULL, NULL);
 	witness_assert_not_owner(tsdn, &a);
 	witness_lock(tsdn, &a);
 	witness_assert_owner(tsdn, &a);
 
-	witness_init(&b, "b", 2, NULL);
+	witness_init(&b, "b", 2, NULL, NULL);
 	witness_assert_not_owner(tsdn, &b);
 	witness_lock(tsdn, &b);
 	witness_assert_owner(tsdn, &b);
@@ -96,12 +102,12 @@ TEST_BEGIN(test_witness_comp)
 
 	witness_assert_lockless(tsdn);
 
-	witness_init(&a, "a", 1, witness_comp);
+	witness_init(&a, "a", 1, witness_comp, &a);
 	witness_assert_not_owner(tsdn, &a);
 	witness_lock(tsdn, &a);
 	witness_assert_owner(tsdn, &a);
 
-	witness_init(&b, "b", 1, witness_comp);
+	witness_init(&b, "b", 1, witness_comp, &b);
 	witness_assert_not_owner(tsdn, &b);
 	witness_lock(tsdn, &b);
 	witness_assert_owner(tsdn, &b);
@@ -111,7 +117,7 @@ TEST_BEGIN(test_witness_comp)
 	witness_lock_error = witness_lock_error_intercept;
 	saw_lock_error = false;
 
-	witness_init(&c, "c", 1, witness_comp_reverse);
+	witness_init(&c, "c", 1, witness_comp_reverse, &c);
 	witness_assert_not_owner(tsdn, &c);
 	assert_false(saw_lock_error, "Unexpected witness lock error");
 	witness_lock(tsdn, &c);
@@ -120,7 +126,7 @@ TEST_BEGIN(test_witness_comp)
 
 	saw_lock_error = false;
 
-	witness_init(&d, "d", 1, NULL);
+	witness_init(&d, "d", 1, NULL, NULL);
 	witness_assert_not_owner(tsdn, &d);
 	assert_false(saw_lock_error, "Unexpected witness lock error");
 	witness_lock(tsdn, &d);
@@ -150,8 +156,8 @@ TEST_BEGIN(test_witness_reversal)
 
 	witness_assert_lockless(tsdn);
 
-	witness_init(&a, "a", 1, NULL);
-	witness_init(&b, "b", 2, NULL);
+	witness_init(&a, "a", 1, NULL, NULL);
+	witness_init(&b, "b", 2, NULL, NULL);
 
 	witness_lock(tsdn, &b);
 	assert_false(saw_lock_error, "Unexpected witness lock error");
@@ -186,7 +192,7 @@ TEST_BEGIN(test_witness_recursive)
 
 	witness_assert_lockless(tsdn);
 
-	witness_init(&a, "a", 1, NULL);
+	witness_init(&a, "a", 1, NULL, NULL);
 
 	witness_lock(tsdn, &a);
 	assert_false(saw_lock_error, "Unexpected witness lock error");
@@ -220,7 +226,7 @@ TEST_BEGIN(test_witness_unlock_not_owned)
 
 	witness_assert_lockless(tsdn);
 
-	witness_init(&a, "a", 1, NULL);
+	witness_init(&a, "a", 1, NULL, NULL);
 
 	assert_false(saw_owner_error, "Unexpected owner error");
 	witness_unlock(tsdn, &a);
@@ -247,7 +253,7 @@ TEST_BEGIN(test_witness_lockful)
 
 	witness_assert_lockless(tsdn);
 
-	witness_init(&a, "a", 1, NULL);
+	witness_init(&a, "a", 1, NULL, NULL);
 
 	assert_false(saw_lockless_error, "Unexpected lockless error");
 	witness_assert_lockless(tsdn);

From b2a9fae88652f39b80dc1d25fa842dae8166263d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 27 Mar 2016 18:51:12 -0700
Subject: [PATCH 0263/2608] Set/unset rtree node for last chunk of extents.

Set/unset rtree node for last chunk of extents, so that the rtree can be
used for chunk coalescing.
---
 src/chunk.c | 45 +++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 41 insertions(+), 4 deletions(-)

diff --git a/src/chunk.c b/src/chunk.c
index 4443368a..beef41fe 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -143,14 +143,35 @@ chunk_hooks_assure_initialized(tsdn_t *tsdn, arena_t *arena,
 bool
 chunk_register(tsdn_t *tsdn, const void *chunk, const extent_t *extent)
 {
+	size_t size;
+	rtree_elm_t *elm_a;
 
 	assert(extent_addr_get(extent) == chunk);
 
-	if (rtree_write(tsdn, &chunks_rtree, (uintptr_t)chunk, extent))
+	size = extent_size_get(extent);
+
+	if ((elm_a = rtree_elm_acquire(tsdn, &chunks_rtree, (uintptr_t)chunk,
+	    false, true)) == NULL)
 		return (true);
+	rtree_elm_write_acquired(tsdn, &chunks_rtree, elm_a, extent);
+	if (size > chunksize) {
+		uintptr_t last = ((uintptr_t)chunk +
+		    (uintptr_t)(CHUNK_CEILING(size - chunksize)));
+		rtree_elm_t *elm_b;
+
+		if ((elm_b = rtree_elm_acquire(tsdn, &chunks_rtree, last, false,
+		    true)) == NULL) {
+			rtree_elm_write_acquired(tsdn, &chunks_rtree, elm_a,
+			    NULL);
+			rtree_elm_release(tsdn, &chunks_rtree, elm_a);
+			return (true);
+		}
+		rtree_elm_write_acquired(tsdn, &chunks_rtree, elm_b, extent);
+		rtree_elm_release(tsdn, &chunks_rtree, elm_b);
+	}
+	rtree_elm_release(tsdn, &chunks_rtree, elm_a);
 
 	if (config_prof && opt_prof) {
-		size_t size = extent_size_get(extent);
 		size_t nadd = (size == 0) ? 1 : size / chunksize;
 		size_t cur = atomic_add_z(&curchunks, nadd);
 		size_t high = atomic_read_z(&highchunks);
@@ -171,10 +192,26 @@ chunk_register(tsdn_t *tsdn, const void *chunk, const extent_t *extent)
 void
 chunk_deregister(tsdn_t *tsdn, const void *chunk, const extent_t *extent)
 {
+	size_t size;
+	rtree_elm_t *elm_a;
+
+	size = extent_size_get(extent);
+
+	elm_a = rtree_elm_acquire(tsdn, &chunks_rtree, (uintptr_t)chunk, true,
+	    false);
+	rtree_elm_write_acquired(tsdn, &chunks_rtree, elm_a, NULL);
+	if (size > chunksize) {
+		uintptr_t last = ((uintptr_t)chunk +
+		    (uintptr_t)(CHUNK_CEILING(size - chunksize)));
+		rtree_elm_t *elm_b = rtree_elm_acquire(tsdn, &chunks_rtree,
+		    last, true, false);
+
+		rtree_elm_write_acquired(tsdn, &chunks_rtree, elm_b, NULL);
+		rtree_elm_release(tsdn, &chunks_rtree, elm_b);
+	}
+	rtree_elm_release(tsdn, &chunks_rtree, elm_a);
 
-	rtree_clear(tsdn, &chunks_rtree, (uintptr_t)chunk);
 	if (config_prof && opt_prof) {
-		size_t size = extent_size_get(extent);
 		size_t nsub = (size == 0) ? 1 : size / chunksize;
 		assert(atomic_read_z(&curchunks) >= nsub);
 		atomic_sub_z(&curchunks, nsub);

From 6f718446596c83cadc3fa53625953291655bfcdf Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 7 Apr 2016 10:14:37 -0400
Subject: [PATCH 0264/2608] Move *PAGE* definitions to pages.h.

---
 include/jemalloc/internal/jemalloc_internal.h.in | 15 ---------------
 include/jemalloc/internal/pages.h                | 15 +++++++++++++++
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index d1306e17..c7d97371 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -311,21 +311,6 @@ typedef unsigned szind_t;
 #define	CACHELINE_CEILING(s)						\
 	(((s) + CACHELINE_MASK) & ~CACHELINE_MASK)
 
-/* Page size.  LG_PAGE is determined by the configure script. */
-#ifdef PAGE_MASK
-#  undef PAGE_MASK
-#endif
-#define	PAGE		((size_t)(1U << LG_PAGE))
-#define	PAGE_MASK	((size_t)(PAGE - 1))
-
-/* Return the page base address for the page containing address a. */
-#define	PAGE_ADDR2BASE(a)						\
-	((void *)((uintptr_t)(a) & ~PAGE_MASK))
-
-/* Return the smallest pagesize multiple that is >= s. */
-#define	PAGE_CEILING(s)							\
-	(((s) + PAGE_MASK) & ~PAGE_MASK)
-
 /* Return the nearest aligned address at or below a. */
 #define	ALIGNMENT_ADDR2BASE(a, alignment)				\
 	((void *)((uintptr_t)(a) & (-(alignment))))
diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index e21effd1..16c657a0 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -1,6 +1,21 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_TYPES
 
+/* Page size.  LG_PAGE is determined by the configure script. */
+#ifdef PAGE_MASK
+#  undef PAGE_MASK
+#endif
+#define	PAGE		((size_t)(1U << LG_PAGE))
+#define	PAGE_MASK	((size_t)(PAGE - 1))
+
+/* Return the page base address for the page containing address a. */
+#define	PAGE_ADDR2BASE(a)						\
+	((void *)((uintptr_t)(a) & ~PAGE_MASK))
+
+/* Return the smallest pagesize multiple that is >= s. */
+#define	PAGE_CEILING(s)							\
+	(((s) + PAGE_MASK) & ~PAGE_MASK)
+
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS

From fae83440989e06c52acea0d06e70a4c27b9739f5 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 28 Mar 2016 03:17:10 -0700
Subject: [PATCH 0265/2608] Add extent_active_[gs]et().

Always initialize extents' runs_dirty and chunks_cache linkage.
---
 include/jemalloc/internal/extent.h            | 31 +++++++++++++------
 .../jemalloc/internal/jemalloc_internal.h.in  |  1 +
 include/jemalloc/internal/private_symbols.txt |  3 +-
 src/arena.c                                   |  6 ++--
 src/base.c                                    |  4 +--
 src/chunk.c                                   | 11 ++++---
 src/huge.c                                    |  2 +-
 7 files changed, 37 insertions(+), 21 deletions(-)

diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index acc67f00..33f5932c 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -18,6 +18,9 @@ struct extent_s {
 	/* Total region size. */
 	size_t			e_size;
 
+	/* True if extent is active (in use). */
+	bool			e_active;
+
 	/*
 	 * The zeroed flag is used by chunk recycling code to track whether
 	 * memory is zero-filled.
@@ -73,6 +76,7 @@ rb_proto(, extent_tree_ad_, extent_tree_t, extent_t)
 arena_t	*extent_arena_get(const extent_t *extent);
 void	*extent_addr_get(const extent_t *extent);
 size_t	extent_size_get(const extent_t *extent);
+bool	extent_active_get(const extent_t *extent);
 bool	extent_zeroed_get(const extent_t *extent);
 bool	extent_committed_get(const extent_t *extent);
 bool	extent_achunk_get(const extent_t *extent);
@@ -80,13 +84,13 @@ prof_tctx_t	*extent_prof_tctx_get(const extent_t *extent);
 void	extent_arena_set(extent_t *extent, arena_t *arena);
 void	extent_addr_set(extent_t *extent, void *addr);
 void	extent_size_set(extent_t *extent, size_t size);
+void	extent_active_set(extent_t *extent, bool active);
 void	extent_zeroed_set(extent_t *extent, bool zeroed);
 void	extent_committed_set(extent_t *extent, bool committed);
 void	extent_achunk_set(extent_t *extent, bool achunk);
 void	extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx);
 void	extent_init(extent_t *extent, arena_t *arena, void *addr,
-    size_t size, bool zeroed, bool committed);
-void	extent_dirty_linkage_init(extent_t *extent);
+    size_t size, bool active, bool zeroed, bool committed);
 void	extent_dirty_insert(extent_t *extent,
     arena_runs_dirty_link_t *runs_dirty, extent_t *chunks_dirty);
 void	extent_dirty_remove(extent_t *extent);
@@ -114,6 +118,13 @@ extent_size_get(const extent_t *extent)
 	return (extent->e_size);
 }
 
+JEMALLOC_INLINE bool
+extent_active_get(const extent_t *extent)
+{
+
+	return (extent->e_active);
+}
+
 JEMALLOC_INLINE bool
 extent_zeroed_get(const extent_t *extent)
 {
@@ -164,6 +175,13 @@ extent_size_set(extent_t *extent, size_t size)
 	extent->e_size = size;
 }
 
+JEMALLOC_INLINE void
+extent_active_set(extent_t *extent, bool active)
+{
+
+	extent->e_active = active;
+}
+
 JEMALLOC_INLINE void
 extent_zeroed_set(extent_t *extent, bool zeroed)
 {
@@ -194,23 +212,18 @@ extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx)
 
 JEMALLOC_INLINE void
 extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
-    bool zeroed, bool committed)
+    bool active, bool zeroed, bool committed)
 {
 
 	extent_arena_set(extent, arena);
 	extent_addr_set(extent, addr);
 	extent_size_set(extent, size);
+	extent_active_set(extent, active);
 	extent_zeroed_set(extent, zeroed);
 	extent_committed_set(extent, committed);
 	extent_achunk_set(extent, false);
 	if (config_prof)
 		extent_prof_tctx_set(extent, NULL);
-}
-
-JEMALLOC_INLINE void
-extent_dirty_linkage_init(extent_t *extent)
-{
-
 	qr_new(&extent->rd, rd_link);
 	qr_new(extent, cc_link);
 }
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index c7d97371..a7f07818 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -1093,6 +1093,7 @@ ivsalloc(tsdn_t *tsdn, const void *ptr, bool demote)
 	extent = chunk_lookup(tsdn, ptr, false);
 	if (extent == NULL)
 		return (0);
+	assert(extent_active_get(extent));
 	/* Only arena chunks should be looked up via interior pointers. */
 	assert(extent_addr_get(extent) == ptr ||
 	    extent_achunk_get(extent));
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 102f01c0..19402462 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -207,6 +207,8 @@ decay_ticker_get
 dss_prec_names
 extent_achunk_get
 extent_achunk_set
+extent_active_get
+extent_active_set
 extent_addr_get
 extent_addr_set
 extent_arena_get
@@ -214,7 +216,6 @@ extent_arena_set
 extent_committed_get
 extent_committed_set
 extent_dirty_insert
-extent_dirty_linkage_init
 extent_dirty_remove
 extent_init
 extent_prof_tctx_get
diff --git a/src/arena.c b/src/arena.c
index 8a93fca0..5a4605d0 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -225,7 +225,6 @@ arena_chunk_cache_maybe_insert(arena_t *arena, extent_t *extent, bool cache)
 {
 
 	if (cache) {
-		extent_dirty_linkage_init(extent);
 		extent_dirty_insert(extent, &arena->runs_dirty,
 		    &arena->chunks_cache);
 		arena->ndirty += arena_chunk_dirty_npages(extent);
@@ -526,7 +525,7 @@ arena_chunk_register(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
 	 * runs is tracked individually, and upon chunk deallocation the entire
 	 * chunk is in a consistent commit state.
 	 */
-	extent_init(&chunk->extent, arena, chunk, chunksize, zero, true);
+	extent_init(&chunk->extent, arena, chunk, chunksize, true, zero, true);
 	extent_achunk_set(&chunk->extent, true);
 	return (chunk_register(tsdn, chunk, &chunk->extent));
 }
@@ -1723,7 +1722,8 @@ arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, size_t ndirty_limit)
 	    arena->lg_dirty_mult) < arena->ndirty || ndirty_limit == 0);
 
 	qr_new(&purge_runs_sentinel, rd_link);
-	extent_dirty_linkage_init(&purge_chunks_sentinel);
+	extent_init(&purge_chunks_sentinel, arena, NULL, 0, false, false,
+	    false);
 
 	npurge = arena_stash_dirty(tsdn, arena, &chunk_hooks, ndirty_limit,
 	    &purge_runs_sentinel, &purge_chunks_sentinel);
diff --git a/src/base.c b/src/base.c
index a9ab279e..0176fb80 100644
--- a/src/base.c
+++ b/src/base.c
@@ -66,7 +66,7 @@ base_chunk_alloc(tsdn_t *tsdn, size_t minsize)
 			base_resident += PAGE_CEILING(nsize);
 		}
 	}
-	extent_init(extent, NULL, addr, csize, true, true);
+	extent_init(extent, NULL, addr, csize, true, true, true);
 	return (extent);
 }
 
@@ -90,7 +90,7 @@ base_alloc(tsdn_t *tsdn, size_t size)
 	csize = CACHELINE_CEILING(size);
 
 	usize = s2u(csize);
-	extent_init(&key, NULL, NULL, usize, false, false);
+	extent_init(&key, NULL, NULL, usize, false, false, false);
 	malloc_mutex_lock(tsdn, &base_mtx);
 	extent = extent_tree_szad_nsearch(&base_avail_szad, &key);
 	if (extent != NULL) {
diff --git a/src/chunk.c b/src/chunk.c
index beef41fe..055b4fcb 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -239,7 +239,7 @@ chunk_first_best_fit(arena_t *arena, extent_tree_t *chunks_szad,
 
 	assert(size == CHUNK_CEILING(size));
 
-	extent_init(&key, arena, NULL, size, false, false);
+	extent_init(&key, arena, NULL, size, false, false, false);
 	return (extent_tree_szad_nsearch(chunks_szad, &key));
 }
 
@@ -270,7 +270,8 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	chunk_hooks_assure_initialized_locked(tsdn, arena, chunk_hooks);
 	if (new_addr != NULL) {
 		extent_t key;
-		extent_init(&key, arena, new_addr, alloc_size, false, false);
+		extent_init(&key, arena, new_addr, alloc_size, false, false,
+		    false);
 		extent = extent_tree_ad_search(chunks_ad, &key);
 	} else {
 		extent = chunk_first_best_fit(arena, chunks_szad, chunks_ad,
@@ -336,7 +337,7 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			}
 		}
 		extent_init(extent, arena, (void *)((uintptr_t)(ret) + size),
-		    trailsize, zeroed, committed);
+		    trailsize, false, zeroed, committed);
 		extent_tree_szad_insert(chunks_szad, extent);
 		extent_tree_ad_insert(chunks_ad, extent);
 		arena_chunk_cache_maybe_insert(arena, extent, cache);
@@ -534,7 +535,7 @@ chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	malloc_mutex_lock(tsdn, &arena->chunks_mtx);
 	chunk_hooks_assure_initialized_locked(tsdn, arena, chunk_hooks);
 	extent_init(&key, arena, (void *)((uintptr_t)chunk + size), 0, false,
-	    false);
+	    false, false);
 	extent = extent_tree_ad_nsearch(chunks_ad, &key);
 	/* Try to coalesce forward. */
 	if (extent != NULL && extent_addr_get(extent) == extent_addr_get(&key)
@@ -570,7 +571,7 @@ chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			}
 			goto label_return;
 		}
-		extent_init(extent, arena, chunk, size, !unzeroed,
+		extent_init(extent, arena, chunk, size, false, !unzeroed,
 		    committed);
 		extent_tree_ad_insert(chunks_ad, extent);
 		extent_tree_szad_insert(chunks_szad, extent);
diff --git a/src/huge.c b/src/huge.c
index 0b91c369..b6ad4ba6 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -50,7 +50,7 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 		return (NULL);
 	}
 
-	extent_init(extent, arena, ret, usize, is_zeroed, true);
+	extent_init(extent, arena, ret, usize, true, is_zeroed, true);
 
 	if (chunk_register(tsdn, ret, extent)) {
 		arena_chunk_dalloc_huge(tsdn, arena, ret, usize);

From d78846c98978ea439c2514638de69be6d3d86a11 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 7 Apr 2016 10:24:14 -0400
Subject: [PATCH 0266/2608] Replace extent_achunk_[gs]et() with
 extent_slab_[gs]et().

---
 include/jemalloc/internal/arena.h             | 10 +++----
 include/jemalloc/internal/extent.h            | 27 ++++++++++---------
 .../jemalloc/internal/jemalloc_internal.h.in  |  3 +--
 include/jemalloc/internal/private_symbols.txt |  4 +--
 src/arena.c                                   |  6 ++---
 src/base.c                                    |  4 +--
 src/chunk.c                                   | 10 +++----
 src/huge.c                                    |  2 +-
 8 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index ff3e01d8..52e7197a 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -1185,7 +1185,7 @@ arena_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, const void *ptr)
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	if (likely(extent_achunk_get(extent))) {
+	if (likely(extent_slab_get(extent))) {
 		arena_chunk_t *chunk = (arena_chunk_t *)extent_addr_get(extent);
 		size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 		size_t mapbits = arena_mapbits_get(chunk, pageind);
@@ -1211,7 +1211,7 @@ arena_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr,
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	if (likely(extent_achunk_get(extent))) {
+	if (likely(extent_slab_get(extent))) {
 		arena_chunk_t *chunk = (arena_chunk_t *)extent_addr_get(extent);
 		size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 
@@ -1331,7 +1331,7 @@ arena_salloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr, bool demote)
 
 	assert(ptr != NULL);
 
-	if (likely(extent_achunk_get(extent))) {
+	if (likely(extent_slab_get(extent))) {
 		const arena_chunk_t *chunk =
 		    (const arena_chunk_t *)extent_addr_get(extent);
 
@@ -1381,7 +1381,7 @@ arena_dalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, tcache_t *tcache,
 	assert(!tsdn_null(tsdn) || tcache == NULL);
 	assert(ptr != NULL);
 
-	if (likely(extent_achunk_get(extent))) {
+	if (likely(extent_slab_get(extent))) {
 		arena_chunk_t *chunk = (arena_chunk_t *)extent_addr_get(extent);
 
 		pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
@@ -1428,7 +1428,7 @@ arena_sdalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t size,
 
 	assert(!tsdn_null(tsdn) || tcache == NULL);
 
-	if (likely(extent_achunk_get(extent))) {
+	if (likely(extent_slab_get(extent))) {
 		arena_chunk_t *chunk = (arena_chunk_t *)extent_addr_get(extent);
 
 		if (config_prof && opt_prof) {
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 33f5932c..d7db49d4 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -35,10 +35,11 @@ struct extent_s {
 	bool			e_committed;
 
 	/*
-	 * The achunk flag is used to validate that huge allocation lookups
-	 * don't return arena chunks.
+	 * The slab flag indicates whether the extent is used for a slab of
+	 * small regions.  This helps differentiate small size classes, and it
+	 * indicates whether interior pointers can be looked up via iealloc().
 	 */
-	bool			e_achunk;
+	bool			e_slab;
 
 	/* Profile counters, used for huge objects. */
 	prof_tctx_t		*e_prof_tctx;
@@ -79,7 +80,7 @@ size_t	extent_size_get(const extent_t *extent);
 bool	extent_active_get(const extent_t *extent);
 bool	extent_zeroed_get(const extent_t *extent);
 bool	extent_committed_get(const extent_t *extent);
-bool	extent_achunk_get(const extent_t *extent);
+bool	extent_slab_get(const extent_t *extent);
 prof_tctx_t	*extent_prof_tctx_get(const extent_t *extent);
 void	extent_arena_set(extent_t *extent, arena_t *arena);
 void	extent_addr_set(extent_t *extent, void *addr);
@@ -87,10 +88,10 @@ void	extent_size_set(extent_t *extent, size_t size);
 void	extent_active_set(extent_t *extent, bool active);
 void	extent_zeroed_set(extent_t *extent, bool zeroed);
 void	extent_committed_set(extent_t *extent, bool committed);
-void	extent_achunk_set(extent_t *extent, bool achunk);
+void	extent_slab_set(extent_t *extent, bool slab);
 void	extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx);
 void	extent_init(extent_t *extent, arena_t *arena, void *addr,
-    size_t size, bool active, bool zeroed, bool committed);
+    size_t size, bool active, bool zeroed, bool committed, bool slab);
 void	extent_dirty_insert(extent_t *extent,
     arena_runs_dirty_link_t *runs_dirty, extent_t *chunks_dirty);
 void	extent_dirty_remove(extent_t *extent);
@@ -136,15 +137,15 @@ JEMALLOC_INLINE bool
 extent_committed_get(const extent_t *extent)
 {
 
-	assert(!extent->e_achunk);
+	assert(!extent->e_slab);
 	return (extent->e_committed);
 }
 
 JEMALLOC_INLINE bool
-extent_achunk_get(const extent_t *extent)
+extent_slab_get(const extent_t *extent)
 {
 
-	return (extent->e_achunk);
+	return (extent->e_slab);
 }
 
 JEMALLOC_INLINE prof_tctx_t *
@@ -197,10 +198,10 @@ extent_committed_set(extent_t *extent, bool committed)
 }
 
 JEMALLOC_INLINE void
-extent_achunk_set(extent_t *extent, bool achunk)
+extent_slab_set(extent_t *extent, bool slab)
 {
 
-	extent->e_achunk = achunk;
+	extent->e_slab = slab;
 }
 
 JEMALLOC_INLINE void
@@ -212,7 +213,7 @@ extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx)
 
 JEMALLOC_INLINE void
 extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
-    bool active, bool zeroed, bool committed)
+    bool active, bool zeroed, bool committed, bool slab)
 {
 
 	extent_arena_set(extent, arena);
@@ -221,7 +222,7 @@ extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
 	extent_active_set(extent, active);
 	extent_zeroed_set(extent, zeroed);
 	extent_committed_set(extent, committed);
-	extent_achunk_set(extent, false);
+	extent_slab_set(extent, slab);
 	if (config_prof)
 		extent_prof_tctx_set(extent, NULL);
 	qr_new(&extent->rd, rd_link);
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index a7f07818..7afe5694 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -1095,8 +1095,7 @@ ivsalloc(tsdn_t *tsdn, const void *ptr, bool demote)
 		return (0);
 	assert(extent_active_get(extent));
 	/* Only arena chunks should be looked up via interior pointers. */
-	assert(extent_addr_get(extent) == ptr ||
-	    extent_achunk_get(extent));
+	assert(extent_addr_get(extent) == ptr || extent_slab_get(extent));
 
 	return (isalloc(tsdn, extent, ptr, demote));
 }
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 19402462..23c206bc 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -205,8 +205,6 @@ ctl_postfork_parent
 ctl_prefork
 decay_ticker_get
 dss_prec_names
-extent_achunk_get
-extent_achunk_set
 extent_active_get
 extent_active_set
 extent_addr_get
@@ -222,6 +220,8 @@ extent_prof_tctx_get
 extent_prof_tctx_set
 extent_size_get
 extent_size_set
+extent_slab_get
+extent_slab_set
 extent_tree_ad_destroy
 extent_tree_ad_destroy_recurse
 extent_tree_ad_empty
diff --git a/src/arena.c b/src/arena.c
index 5a4605d0..011320ed 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -525,8 +525,8 @@ arena_chunk_register(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
 	 * runs is tracked individually, and upon chunk deallocation the entire
 	 * chunk is in a consistent commit state.
 	 */
-	extent_init(&chunk->extent, arena, chunk, chunksize, true, zero, true);
-	extent_achunk_set(&chunk->extent, true);
+	extent_init(&chunk->extent, arena, chunk, chunksize, true, zero, true,
+	    true);
 	return (chunk_register(tsdn, chunk, &chunk->extent));
 }
 
@@ -1723,7 +1723,7 @@ arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, size_t ndirty_limit)
 
 	qr_new(&purge_runs_sentinel, rd_link);
 	extent_init(&purge_chunks_sentinel, arena, NULL, 0, false, false,
-	    false);
+	    false, false);
 
 	npurge = arena_stash_dirty(tsdn, arena, &chunk_hooks, ndirty_limit,
 	    &purge_runs_sentinel, &purge_chunks_sentinel);
diff --git a/src/base.c b/src/base.c
index 0176fb80..2a6df4dd 100644
--- a/src/base.c
+++ b/src/base.c
@@ -66,7 +66,7 @@ base_chunk_alloc(tsdn_t *tsdn, size_t minsize)
 			base_resident += PAGE_CEILING(nsize);
 		}
 	}
-	extent_init(extent, NULL, addr, csize, true, true, true);
+	extent_init(extent, NULL, addr, csize, true, true, true, false);
 	return (extent);
 }
 
@@ -90,7 +90,7 @@ base_alloc(tsdn_t *tsdn, size_t size)
 	csize = CACHELINE_CEILING(size);
 
 	usize = s2u(csize);
-	extent_init(&key, NULL, NULL, usize, false, false, false);
+	extent_init(&key, NULL, NULL, usize, false, false, false, false);
 	malloc_mutex_lock(tsdn, &base_mtx);
 	extent = extent_tree_szad_nsearch(&base_avail_szad, &key);
 	if (extent != NULL) {
diff --git a/src/chunk.c b/src/chunk.c
index 055b4fcb..b691286b 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -239,7 +239,7 @@ chunk_first_best_fit(arena_t *arena, extent_tree_t *chunks_szad,
 
 	assert(size == CHUNK_CEILING(size));
 
-	extent_init(&key, arena, NULL, size, false, false, false);
+	extent_init(&key, arena, NULL, size, false, false, false, false);
 	return (extent_tree_szad_nsearch(chunks_szad, &key));
 }
 
@@ -271,7 +271,7 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	if (new_addr != NULL) {
 		extent_t key;
 		extent_init(&key, arena, new_addr, alloc_size, false, false,
-		    false);
+		    false, false);
 		extent = extent_tree_ad_search(chunks_ad, &key);
 	} else {
 		extent = chunk_first_best_fit(arena, chunks_szad, chunks_ad,
@@ -337,7 +337,7 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			}
 		}
 		extent_init(extent, arena, (void *)((uintptr_t)(ret) + size),
-		    trailsize, false, zeroed, committed);
+		    trailsize, false, zeroed, committed, false);
 		extent_tree_szad_insert(chunks_szad, extent);
 		extent_tree_ad_insert(chunks_ad, extent);
 		arena_chunk_cache_maybe_insert(arena, extent, cache);
@@ -535,7 +535,7 @@ chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	malloc_mutex_lock(tsdn, &arena->chunks_mtx);
 	chunk_hooks_assure_initialized_locked(tsdn, arena, chunk_hooks);
 	extent_init(&key, arena, (void *)((uintptr_t)chunk + size), 0, false,
-	    false, false);
+	    false, false, false);
 	extent = extent_tree_ad_nsearch(chunks_ad, &key);
 	/* Try to coalesce forward. */
 	if (extent != NULL && extent_addr_get(extent) == extent_addr_get(&key)
@@ -572,7 +572,7 @@ chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			goto label_return;
 		}
 		extent_init(extent, arena, chunk, size, false, !unzeroed,
-		    committed);
+		    committed, false);
 		extent_tree_ad_insert(chunks_ad, extent);
 		extent_tree_szad_insert(chunks_szad, extent);
 		arena_chunk_cache_maybe_insert(arena, extent, cache);
diff --git a/src/huge.c b/src/huge.c
index b6ad4ba6..03eea139 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -50,7 +50,7 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 		return (NULL);
 	}
 
-	extent_init(extent, arena, ret, usize, true, is_zeroed, true);
+	extent_init(extent, arena, ret, usize, true, is_zeroed, true, false);
 
 	if (chunk_register(tsdn, ret, extent)) {
 		arena_chunk_dalloc_huge(tsdn, arena, ret, usize);

From 9aea58d9a242ac6d1623aa1c1fcd737823e5e51b Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 7 Apr 2016 10:34:26 -0400
Subject: [PATCH 0267/2608] Add extent_past_get().

---
 include/jemalloc/internal/extent.h            | 8 ++++++++
 include/jemalloc/internal/private_symbols.txt | 1 +
 2 files changed, 9 insertions(+)

diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index d7db49d4..d6376d5d 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -77,6 +77,7 @@ rb_proto(, extent_tree_ad_, extent_tree_t, extent_t)
 arena_t	*extent_arena_get(const extent_t *extent);
 void	*extent_addr_get(const extent_t *extent);
 size_t	extent_size_get(const extent_t *extent);
+void	*extent_past_get(const extent_t *extent);
 bool	extent_active_get(const extent_t *extent);
 bool	extent_zeroed_get(const extent_t *extent);
 bool	extent_committed_get(const extent_t *extent);
@@ -119,6 +120,13 @@ extent_size_get(const extent_t *extent)
 	return (extent->e_size);
 }
 
+JEMALLOC_INLINE void *
+extent_past_get(const extent_t *extent)
+{
+
+	return ((void *)(uintptr_t)extent->e_addr + extent->e_size);
+}
+
 JEMALLOC_INLINE bool
 extent_active_get(const extent_t *extent)
 {
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 23c206bc..f81cecc3 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -216,6 +216,7 @@ extent_committed_set
 extent_dirty_insert
 extent_dirty_remove
 extent_init
+extent_past_get
 extent_prof_tctx_get
 extent_prof_tctx_set
 extent_size_get

From f442254bdf4e15ce9f35fa667c5f0c8604c6910d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 17 May 2016 17:12:13 -0700
Subject: [PATCH 0268/2608] Fix opt_zero-triggered in-place huge reallocation
 zeroing.

Fix huge_ralloc_no_move_expand() to update the extent's zeroed attribute
based on the intersection of the previous value and that of the newly
merged trailing extent.
---
 src/huge.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/huge.c b/src/huge.c
index 03eea139..ea43236d 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -223,11 +223,10 @@ huge_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, void *ptr,
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	/*
-	 * Copy zero into is_zeroed_chunk and pass the copy to chunk_alloc(), so
-	 * that it is possible to make correct junk/zero fill decisions below.
+	 * Use is_zeroed_chunk to detect whether the trailing memory is zeroed,
+	 * update extent's zeroed field, and zero as necessary.
 	 */
-	is_zeroed_chunk = zero;
-
+	is_zeroed_chunk = false;
 	if (arena_chunk_ralloc_huge_expand(tsdn, arena, ptr, oldsize, usize,
 	     &is_zeroed_chunk))
 		return (true);
@@ -236,6 +235,7 @@ huge_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, void *ptr,
 	chunk_deregister(tsdn, ptr, extent);
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	extent_size_set(extent, usize);
+	extent_zeroed_set(extent, extent_zeroed_get(extent) && is_zeroed_chunk);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 	chunk_reregister(tsdn, ptr, extent);
 

From 93e79c5c3fcb4987f7070cf92b5807ee615553b1 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 16 May 2016 13:37:41 -0700
Subject: [PATCH 0269/2608] Remove redundant chunk argument from
 chunk_{,de,re}register().

---
 include/jemalloc/internal/chunk.h |  8 +++-----
 src/arena.c                       |  4 ++--
 src/chunk.c                       | 22 ++++++++++++----------
 src/huge.c                        | 16 ++++++++--------
 4 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index be56c2bd..05cf3d05 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -52,11 +52,9 @@ chunk_hooks_t	chunk_hooks_get(tsdn_t *tsdn, arena_t *arena);
 chunk_hooks_t	chunk_hooks_set(tsdn_t *tsdn, arena_t *arena,
     const chunk_hooks_t *chunk_hooks);
 
-bool	chunk_register(tsdn_t *tsdn, const void *chunk, const extent_t *extent);
-void	chunk_deregister(tsdn_t *tsdn, const void *chunk,
-    const extent_t *extent);
-void	chunk_reregister(tsdn_t *tsdn, const void *chunk,
-    const extent_t *extent);
+bool	chunk_register(tsdn_t *tsdn, const extent_t *extent);
+void	chunk_deregister(tsdn_t *tsdn, const extent_t *extent);
+void	chunk_reregister(tsdn_t *tsdn, const extent_t *extent);
 void	*chunk_alloc_base(size_t size);
 void	*chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
diff --git a/src/arena.c b/src/arena.c
index 011320ed..d5ac7a10 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -527,7 +527,7 @@ arena_chunk_register(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
 	 */
 	extent_init(&chunk->extent, arena, chunk, chunksize, true, zero, true,
 	    true);
-	return (chunk_register(tsdn, chunk, &chunk->extent));
+	return (chunk_register(tsdn, &chunk->extent));
 }
 
 static arena_chunk_t *
@@ -665,7 +665,7 @@ arena_chunk_discard(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk)
 	bool committed;
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 
-	chunk_deregister(tsdn, chunk, &chunk->extent);
+	chunk_deregister(tsdn, &chunk->extent);
 
 	committed = (arena_mapbits_decommitted_get(chunk, map_bias) == 0);
 	if (!committed) {
diff --git a/src/chunk.c b/src/chunk.c
index b691286b..5a7980f1 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -141,21 +141,21 @@ chunk_hooks_assure_initialized(tsdn_t *tsdn, arena_t *arena,
 }
 
 bool
-chunk_register(tsdn_t *tsdn, const void *chunk, const extent_t *extent)
+chunk_register(tsdn_t *tsdn, const extent_t *extent)
 {
+	const void *addr;
 	size_t size;
 	rtree_elm_t *elm_a;
 
-	assert(extent_addr_get(extent) == chunk);
-
+	addr = extent_addr_get(extent);
 	size = extent_size_get(extent);
 
-	if ((elm_a = rtree_elm_acquire(tsdn, &chunks_rtree, (uintptr_t)chunk,
+	if ((elm_a = rtree_elm_acquire(tsdn, &chunks_rtree, (uintptr_t)addr,
 	    false, true)) == NULL)
 		return (true);
 	rtree_elm_write_acquired(tsdn, &chunks_rtree, elm_a, extent);
 	if (size > chunksize) {
-		uintptr_t last = ((uintptr_t)chunk +
+		uintptr_t last = ((uintptr_t)addr +
 		    (uintptr_t)(CHUNK_CEILING(size - chunksize)));
 		rtree_elm_t *elm_b;
 
@@ -190,18 +190,20 @@ chunk_register(tsdn_t *tsdn, const void *chunk, const extent_t *extent)
 }
 
 void
-chunk_deregister(tsdn_t *tsdn, const void *chunk, const extent_t *extent)
+chunk_deregister(tsdn_t *tsdn, const extent_t *extent)
 {
+	const void *addr;
 	size_t size;
 	rtree_elm_t *elm_a;
 
+	addr = extent_addr_get(extent);
 	size = extent_size_get(extent);
 
-	elm_a = rtree_elm_acquire(tsdn, &chunks_rtree, (uintptr_t)chunk, true,
+	elm_a = rtree_elm_acquire(tsdn, &chunks_rtree, (uintptr_t)addr, true,
 	    false);
 	rtree_elm_write_acquired(tsdn, &chunks_rtree, elm_a, NULL);
 	if (size > chunksize) {
-		uintptr_t last = ((uintptr_t)chunk +
+		uintptr_t last = ((uintptr_t)addr +
 		    (uintptr_t)(CHUNK_CEILING(size - chunksize)));
 		rtree_elm_t *elm_b = rtree_elm_acquire(tsdn, &chunks_rtree,
 		    last, true, false);
@@ -219,11 +221,11 @@ chunk_deregister(tsdn_t *tsdn, const void *chunk, const extent_t *extent)
 }
 
 void
-chunk_reregister(tsdn_t *tsdn, const void *chunk, const extent_t *extent)
+chunk_reregister(tsdn_t *tsdn, const extent_t *extent)
 {
 	bool err;
 
-	err = chunk_register(tsdn, chunk, extent);
+	err = chunk_register(tsdn, extent);
 	assert(!err);
 }
 
diff --git a/src/huge.c b/src/huge.c
index ea43236d..48b191ad 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -52,7 +52,7 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 
 	extent_init(extent, arena, ret, usize, true, is_zeroed, true, false);
 
-	if (chunk_register(tsdn, ret, extent)) {
+	if (chunk_register(tsdn, extent)) {
 		arena_chunk_dalloc_huge(tsdn, arena, ret, usize);
 		idalloctm(tsdn, iealloc(tsdn, extent), extent, NULL, true,
 		    true);
@@ -135,11 +135,11 @@ huge_ralloc_no_move_similar(tsdn_t *tsdn, extent_t *extent, void *ptr,
 
 	/* Update the size of the huge allocation. */
 	assert(extent_size_get(extent) != usize);
-	chunk_deregister(tsdn, ptr, extent);
+	chunk_deregister(tsdn, extent);
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	extent_size_set(extent, usize);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
-	chunk_reregister(tsdn, ptr, extent);
+	chunk_reregister(tsdn, extent);
 	/* Update zeroed. */
 	extent_zeroed_set(extent, post_zeroed);
 
@@ -196,13 +196,13 @@ huge_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, void *ptr,
 		post_zeroed = pre_zeroed;
 
 	/* Update the size of the huge allocation. */
-	chunk_deregister(tsdn, ptr, extent);
+	chunk_deregister(tsdn, extent);
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	extent_size_set(extent, usize);
 	/* Update zeroed. */
 	extent_zeroed_set(extent, post_zeroed);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
-	chunk_reregister(tsdn, ptr, extent);
+	chunk_reregister(tsdn, extent);
 
 	/* Zap the excess chunks. */
 	arena_chunk_ralloc_huge_shrink(tsdn, arena, ptr, oldsize, usize);
@@ -232,12 +232,12 @@ huge_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, void *ptr,
 		return (true);
 
 	/* Update the size of the huge allocation. */
-	chunk_deregister(tsdn, ptr, extent);
+	chunk_deregister(tsdn, extent);
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	extent_size_set(extent, usize);
 	extent_zeroed_set(extent, extent_zeroed_get(extent) && is_zeroed_chunk);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
-	chunk_reregister(tsdn, ptr, extent);
+	chunk_reregister(tsdn, extent);
 
 	if (zero || (config_fill && unlikely(opt_zero))) {
 		if (!is_zeroed_subchunk) {
@@ -355,7 +355,7 @@ huge_dalloc(tsdn_t *tsdn, extent_t *extent, void *ptr)
 	arena_t *arena;
 
 	arena = extent_arena_get(extent);
-	chunk_deregister(tsdn, ptr, extent);
+	chunk_deregister(tsdn, extent);
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	ql_remove(&arena->huge, extent, ql_link);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);

From 25845db7c9fcc26a3478afd715a4fcd0798cb642 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 17 May 2016 10:36:17 -0700
Subject: [PATCH 0270/2608] Dodge ivsalloc() assertion in test code.

---
 test/unit/arena_reset.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index d7a02e0f..fa2c5cd5 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -78,6 +78,21 @@ get_huge_size(size_t ind)
 	return (get_size_impl("arenas.hchunk.0.size", ind));
 }
 
+/* Like ivsalloc(), but safe to call on discarded allocations. */
+static size_t
+vsalloc(tsdn_t *tsdn, const void *ptr)
+{
+	extent_t *extent;
+
+	extent = chunk_lookup(tsdn, ptr, false);
+	if (extent == NULL)
+		return (0);
+	if (!extent_active_get(extent))
+		return (0);
+
+	return (isalloc(tsdn, extent, ptr, false));
+}
+
 TEST_BEGIN(test_arena_reset)
 {
 #define	NHUGE	4
@@ -139,7 +154,7 @@ TEST_BEGIN(test_arena_reset)
 
 	/* Verify allocations no longer exist. */
 	for (i = 0; i < nptrs; i++) {
-		assert_zu_eq(ivsalloc(tsdn, ptrs[i], false), 0,
+		assert_zu_eq(vsalloc(tsdn, ptrs[i]), 0,
 		    "Allocation should no longer exist");
 	}
 

From ffa45a53314d3ff4376c753c5609689d0f65f0e8 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 16 May 2016 13:25:18 -0700
Subject: [PATCH 0271/2608] Use rtree rather than [sz]ad trees for chunk
 split/coalesce operations.

---
 include/jemalloc/internal/arena.h             |   9 +-
 include/jemalloc/internal/extent.h            |  14 +-
 include/jemalloc/internal/private_symbols.txt |  20 +-
 src/arena.c                                   |   2 -
 src/chunk.c                                   | 375 +++++++++++-------
 src/extent.c                                  |  12 -
 6 files changed, 234 insertions(+), 198 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 52e7197a..97b5329e 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -418,16 +418,11 @@ struct arena_s {
 	malloc_mutex_t		huge_mtx;
 
 	/*
-	 * Trees of chunks that were previously allocated (trees differ only in
-	 * extent ordering).  These are used when allocating chunks, in an
-	 * attempt to re-use address space.  Depending on function, different
-	 * tree orderings are needed, which is why there are two trees with the
-	 * same contents.
+	 * Trees of chunks that were previously allocated.  These are used when
+	 * allocating chunks, in an attempt to re-use address space.
 	 */
 	extent_tree_t		chunks_szad_cached;
-	extent_tree_t		chunks_ad_cached;
 	extent_tree_t		chunks_szad_retained;
-	extent_tree_t		chunks_ad_retained;
 
 	malloc_mutex_t		chunks_mtx;
 	/* Cache of extent structures that were allocated via base_alloc(). */
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index d6376d5d..f067a296 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -55,9 +55,6 @@ struct extent_s {
 		/* Linkage for arena's achunks, huge, and node_cache lists. */
 		ql_elm(extent_t)	ql_link;
 	};
-
-	/* Linkage for the address-ordered tree. */
-	rb_node(extent_t)	ad_link;
 };
 typedef rb_tree(extent_t) extent_tree_t;
 
@@ -67,8 +64,6 @@ typedef rb_tree(extent_t) extent_tree_t;
 
 rb_proto(, extent_tree_szad_, extent_tree_t, extent_t)
 
-rb_proto(, extent_tree_ad_, extent_tree_t, extent_t)
-
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
@@ -79,6 +74,7 @@ void	*extent_addr_get(const extent_t *extent);
 size_t	extent_size_get(const extent_t *extent);
 void	*extent_past_get(const extent_t *extent);
 bool	extent_active_get(const extent_t *extent);
+bool	extent_retained_get(const extent_t *extent);
 bool	extent_zeroed_get(const extent_t *extent);
 bool	extent_committed_get(const extent_t *extent);
 bool	extent_slab_get(const extent_t *extent);
@@ -134,6 +130,14 @@ extent_active_get(const extent_t *extent)
 	return (extent->e_active);
 }
 
+JEMALLOC_INLINE bool
+extent_retained_get(const extent_t *extent)
+{
+
+	assert(!extent->e_slab);
+	return (qr_next(&extent->rd, rd_link) == &extent->rd);
+}
+
 JEMALLOC_INLINE bool
 extent_zeroed_get(const extent_t *extent)
 {
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index f81cecc3..247b8733 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -219,29 +219,11 @@ extent_init
 extent_past_get
 extent_prof_tctx_get
 extent_prof_tctx_set
+extent_retained_get
 extent_size_get
 extent_size_set
 extent_slab_get
 extent_slab_set
-extent_tree_ad_destroy
-extent_tree_ad_destroy_recurse
-extent_tree_ad_empty
-extent_tree_ad_first
-extent_tree_ad_insert
-extent_tree_ad_iter
-extent_tree_ad_iter_recurse
-extent_tree_ad_iter_start
-extent_tree_ad_last
-extent_tree_ad_new
-extent_tree_ad_next
-extent_tree_ad_nsearch
-extent_tree_ad_prev
-extent_tree_ad_psearch
-extent_tree_ad_remove
-extent_tree_ad_reverse_iter
-extent_tree_ad_reverse_iter_recurse
-extent_tree_ad_reverse_iter_start
-extent_tree_ad_search
 extent_tree_szad_destroy
 extent_tree_szad_destroy_recurse
 extent_tree_szad_empty
diff --git a/src/arena.c b/src/arena.c
index d5ac7a10..faf23495 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -3427,9 +3427,7 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 		return (NULL);
 
 	extent_tree_szad_new(&arena->chunks_szad_cached);
-	extent_tree_ad_new(&arena->chunks_ad_cached);
 	extent_tree_szad_new(&arena->chunks_szad_retained);
-	extent_tree_ad_new(&arena->chunks_ad_retained);
 	if (malloc_mutex_init(&arena->chunks_mtx, "arena_chunks",
 	    WITNESS_RANK_ARENA_CHUNKS))
 		return (NULL);
diff --git a/src/chunk.c b/src/chunk.c
index 5a7980f1..9a9b08e3 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -50,9 +50,8 @@ const chunk_hooks_t	chunk_hooks_default = {
  */
 
 static void	chunk_record(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, extent_tree_t *chunks_szad,
-    extent_tree_t *chunks_ad, bool cache, void *chunk, size_t size, bool zeroed,
-    bool committed);
+    chunk_hooks_t *chunk_hooks, extent_tree_t *chunks_szad, bool cache,
+    void *chunk, size_t size, bool zeroed, bool committed);
 
 /******************************************************************************/
 
@@ -140,39 +139,65 @@ chunk_hooks_assure_initialized(tsdn_t *tsdn, arena_t *arena,
 	chunk_hooks_assure_initialized_impl(tsdn, arena, chunk_hooks, false);
 }
 
+static bool
+extent_rtree_acquire(tsdn_t *tsdn, const extent_t *extent, bool dependent,
+    bool init_missing, rtree_elm_t **r_elm_a, rtree_elm_t **r_elm_b)
+{
+
+	*r_elm_a = rtree_elm_acquire(tsdn, &chunks_rtree,
+	    (uintptr_t)extent_addr_get(extent), dependent, init_missing);
+	if (!dependent && *r_elm_a == NULL)
+		return (true);
+	assert(*r_elm_a != NULL);
+
+	if (extent_size_get(extent) > chunksize) {
+		uintptr_t last =
+		    (CHUNK_CEILING((uintptr_t)extent_past_get(extent) -
+		    chunksize));
+
+		*r_elm_b = rtree_elm_acquire(tsdn, &chunks_rtree, last,
+		    dependent, init_missing);
+		if (!dependent && *r_elm_b == NULL)
+			return (true);
+		assert(*r_elm_b != NULL);
+	} else
+		*r_elm_b = NULL;
+
+	return (false);
+}
+
+static void
+extent_rtree_write_acquired(tsdn_t *tsdn, rtree_elm_t *elm_a,
+    rtree_elm_t *elm_b, const extent_t *extent)
+{
+
+	rtree_elm_write_acquired(tsdn, &chunks_rtree, elm_a, extent);
+	if (elm_b != NULL)
+		rtree_elm_write_acquired(tsdn, &chunks_rtree, elm_b, extent);
+}
+
+static void
+extent_rtree_release(tsdn_t *tsdn, rtree_elm_t *elm_a, rtree_elm_t *elm_b)
+{
+
+	rtree_elm_release(tsdn, &chunks_rtree, elm_a);
+	if (elm_b != NULL)
+		rtree_elm_release(tsdn, &chunks_rtree, elm_b);
+}
+
 bool
 chunk_register(tsdn_t *tsdn, const extent_t *extent)
 {
-	const void *addr;
-	size_t size;
-	rtree_elm_t *elm_a;
+	rtree_elm_t *elm_a, *elm_b;
 
-	addr = extent_addr_get(extent);
-	size = extent_size_get(extent);
-
-	if ((elm_a = rtree_elm_acquire(tsdn, &chunks_rtree, (uintptr_t)addr,
-	    false, true)) == NULL)
+	if (extent_rtree_acquire(tsdn, extent, false, true, &elm_a, &elm_b))
 		return (true);
-	rtree_elm_write_acquired(tsdn, &chunks_rtree, elm_a, extent);
-	if (size > chunksize) {
-		uintptr_t last = ((uintptr_t)addr +
-		    (uintptr_t)(CHUNK_CEILING(size - chunksize)));
-		rtree_elm_t *elm_b;
-
-		if ((elm_b = rtree_elm_acquire(tsdn, &chunks_rtree, last, false,
-		    true)) == NULL) {
-			rtree_elm_write_acquired(tsdn, &chunks_rtree, elm_a,
-			    NULL);
-			rtree_elm_release(tsdn, &chunks_rtree, elm_a);
-			return (true);
-		}
-		rtree_elm_write_acquired(tsdn, &chunks_rtree, elm_b, extent);
-		rtree_elm_release(tsdn, &chunks_rtree, elm_b);
-	}
-	rtree_elm_release(tsdn, &chunks_rtree, elm_a);
+	extent_rtree_write_acquired(tsdn, elm_a, elm_b, extent);
+	extent_rtree_release(tsdn, elm_a, elm_b);
 
 	if (config_prof && opt_prof) {
-		size_t nadd = (size == 0) ? 1 : size / chunksize;
+		size_t nadd = (extent_size_get(extent) == 0) ? 1 :
+		    extent_size_get(extent) / chunksize;
 		size_t cur = atomic_add_z(&curchunks, nadd);
 		size_t high = atomic_read_z(&highchunks);
 		while (cur > high && atomic_cas_z(&highchunks, high, cur)) {
@@ -192,29 +217,15 @@ chunk_register(tsdn_t *tsdn, const extent_t *extent)
 void
 chunk_deregister(tsdn_t *tsdn, const extent_t *extent)
 {
-	const void *addr;
-	size_t size;
-	rtree_elm_t *elm_a;
+	rtree_elm_t *elm_a, *elm_b;
 
-	addr = extent_addr_get(extent);
-	size = extent_size_get(extent);
-
-	elm_a = rtree_elm_acquire(tsdn, &chunks_rtree, (uintptr_t)addr, true,
-	    false);
-	rtree_elm_write_acquired(tsdn, &chunks_rtree, elm_a, NULL);
-	if (size > chunksize) {
-		uintptr_t last = ((uintptr_t)addr +
-		    (uintptr_t)(CHUNK_CEILING(size - chunksize)));
-		rtree_elm_t *elm_b = rtree_elm_acquire(tsdn, &chunks_rtree,
-		    last, true, false);
-
-		rtree_elm_write_acquired(tsdn, &chunks_rtree, elm_b, NULL);
-		rtree_elm_release(tsdn, &chunks_rtree, elm_b);
-	}
-	rtree_elm_release(tsdn, &chunks_rtree, elm_a);
+	extent_rtree_acquire(tsdn, extent, true, false, &elm_a, &elm_b);
+	extent_rtree_write_acquired(tsdn, elm_a, elm_b, NULL);
+	extent_rtree_release(tsdn, elm_a, elm_b);
 
 	if (config_prof && opt_prof) {
-		size_t nsub = (size == 0) ? 1 : size / chunksize;
+		size_t nsub = (extent_size_get(extent) == 0) ? 1 :
+		    extent_size_get(extent) / chunksize;
 		assert(atomic_read_z(&curchunks) >= nsub);
 		atomic_sub_z(&curchunks, nsub);
 	}
@@ -234,8 +245,7 @@ chunk_reregister(tsdn_t *tsdn, const extent_t *extent)
  * fits.
  */
 static extent_t *
-chunk_first_best_fit(arena_t *arena, extent_tree_t *chunks_szad,
-    extent_tree_t *chunks_ad, size_t size)
+chunk_first_best_fit(arena_t *arena, extent_tree_t *chunks_szad, size_t size)
 {
 	extent_t key;
 
@@ -245,11 +255,25 @@ chunk_first_best_fit(arena_t *arena, extent_tree_t *chunks_szad,
 	return (extent_tree_szad_nsearch(chunks_szad, &key));
 }
 
+static void
+chunk_leak(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, bool cache,
+    void *addr, size_t size)
+{
+
+	/*
+	 * Leak chunk after making sure its pages have already been purged, so
+	 * that this is only a virtual memory leak.
+	 */
+	if (cache) {
+		chunk_purge_wrapper(tsdn, arena, chunk_hooks, addr, size, 0,
+		    size);
+	}
+}
+
 static void *
 chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
-    extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, bool cache,
-    void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit,
-    bool dalloc_extent)
+    extent_tree_t *chunks_szad, bool cache, void *new_addr, size_t size,
+    size_t alignment, bool *zero, bool *commit, bool dalloc_extent)
 {
 	void *ret;
 	extent_t *extent;
@@ -271,14 +295,21 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	malloc_mutex_lock(tsdn, &arena->chunks_mtx);
 	chunk_hooks_assure_initialized_locked(tsdn, arena, chunk_hooks);
 	if (new_addr != NULL) {
-		extent_t key;
-		extent_init(&key, arena, new_addr, alloc_size, false, false,
-		    false, false);
-		extent = extent_tree_ad_search(chunks_ad, &key);
-	} else {
-		extent = chunk_first_best_fit(arena, chunks_szad, chunks_ad,
-		    alloc_size);
-	}
+		rtree_elm_t *elm;
+
+		elm = rtree_elm_acquire(tsdn, &chunks_rtree,
+		    (uintptr_t)new_addr, false, false);
+		if (elm != NULL) {
+			extent = rtree_elm_read_acquired(tsdn, &chunks_rtree,
+			    elm);
+			if (extent != NULL && (extent_active_get(extent) ||
+			    extent_retained_get(extent) == cache))
+				extent = NULL;
+			rtree_elm_release(tsdn, &chunks_rtree, elm);
+		} else
+			extent = NULL;
+	} else
+		extent = chunk_first_best_fit(arena, chunks_szad, alloc_size);
 	if (extent == NULL || (new_addr != NULL && extent_size_get(extent) <
 	    size)) {
 		malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
@@ -304,15 +335,20 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		return (NULL);
 	}
 	/* Remove extent from the tree. */
+	chunk_deregister(tsdn, extent);
 	extent_tree_szad_remove(chunks_szad, extent);
-	extent_tree_ad_remove(chunks_ad, extent);
 	arena_chunk_cache_maybe_remove(arena, extent, cache);
 	if (leadsize != 0) {
 		/* Insert the leading space as a smaller chunk. */
 		extent_size_set(extent, leadsize);
-		extent_tree_szad_insert(chunks_szad, extent);
-		extent_tree_ad_insert(chunks_ad, extent);
-		arena_chunk_cache_maybe_insert(arena, extent, cache);
+		if (chunk_register(tsdn, extent)) {
+			chunk_leak(tsdn, arena, chunk_hooks, cache,
+			    extent_addr_get(extent), extent_size_get(extent));
+			arena_extent_dalloc(tsdn, arena, extent);
+		} else {
+			extent_tree_szad_insert(chunks_szad, extent);
+			arena_chunk_cache_maybe_insert(arena, extent, cache);
+		}
 		extent = NULL;
 	}
 	if (trailsize != 0) {
@@ -323,8 +359,7 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 				arena_extent_dalloc(tsdn, arena, extent);
 			malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 			chunk_record(tsdn, arena, chunk_hooks, chunks_szad,
-			    chunks_ad, cache, ret, size + trailsize, zeroed,
-			    committed);
+			    cache, ret, size + trailsize, zeroed, committed);
 			return (NULL);
 		}
 		/* Insert the trailing space as a smaller chunk. */
@@ -333,22 +368,27 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			if (extent == NULL) {
 				malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 				chunk_record(tsdn, arena, chunk_hooks,
-				    chunks_szad, chunks_ad, cache, ret, size +
-				    trailsize, zeroed, committed);
+				    chunks_szad, cache, ret, size + trailsize,
+				    zeroed, committed);
 				return (NULL);
 			}
 		}
 		extent_init(extent, arena, (void *)((uintptr_t)(ret) + size),
 		    trailsize, false, zeroed, committed, false);
-		extent_tree_szad_insert(chunks_szad, extent);
-		extent_tree_ad_insert(chunks_ad, extent);
-		arena_chunk_cache_maybe_insert(arena, extent, cache);
+		if (chunk_register(tsdn, extent)) {
+			chunk_leak(tsdn, arena, chunk_hooks, cache,
+			    extent_addr_get(extent), extent_size_get(extent));
+			arena_extent_dalloc(tsdn, arena, extent);
+		} else {
+			extent_tree_szad_insert(chunks_szad, extent);
+			arena_chunk_cache_maybe_insert(arena, extent, cache);
+		}
 		extent = NULL;
 	}
 	if (!committed && chunk_hooks->commit(ret, size, 0, size, arena->ind)) {
 		malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
-		chunk_record(tsdn, arena, chunk_hooks, chunks_szad, chunks_ad,
-		    cache, ret, size, zeroed, committed);
+		chunk_record(tsdn, arena, chunk_hooks, chunks_szad, cache, ret,
+		    size, zeroed, committed);
 		return (NULL);
 	}
 	malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
@@ -441,8 +481,8 @@ chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 
 	commit = true;
 	ret = chunk_recycle(tsdn, arena, chunk_hooks,
-	    &arena->chunks_szad_cached, &arena->chunks_ad_cached, true,
-	    new_addr, size, alignment, zero, &commit, dalloc_extent);
+	    &arena->chunks_szad_cached, true, new_addr, size, alignment, zero,
+	    &commit, dalloc_extent);
 	if (ret == NULL)
 		return (NULL);
 	assert(commit);
@@ -493,8 +533,8 @@ chunk_alloc_retained(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	assert((alignment & chunksize_mask) == 0);
 
 	ret = chunk_recycle(tsdn, arena, chunk_hooks,
-	    &arena->chunks_szad_retained, &arena->chunks_ad_retained, false,
-	    new_addr, size, alignment, zero, commit, true);
+	    &arena->chunks_szad_retained, false, new_addr, size, alignment,
+	    zero, commit, true);
 
 	if (config_stats && ret != NULL)
 		arena->stats.retained -= size;
@@ -522,89 +562,118 @@ chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	return (ret);
 }
 
+static bool
+chunk_can_coalesce(const extent_t *a, const extent_t *b)
+{
+
+	assert((void *)CHUNK_CEILING((uintptr_t)extent_past_get(a)) ==
+	    extent_addr_get(b));
+
+	if (extent_arena_get(a) != extent_arena_get(b))
+		return (false);
+	if (extent_active_get(a) != extent_active_get(b))
+		return (false);
+	if (extent_committed_get(a) != extent_committed_get(b))
+		return (false);
+	if (extent_retained_get(a) != extent_retained_get(b))
+		return (false);
+
+	return (true);
+}
+
+static void
+chunk_try_coalesce(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
+    extent_t *a, extent_t *b, extent_tree_t *chunks_szad, bool cache)
+{
+	rtree_elm_t *a_elm_a, *a_elm_b, *b_elm_a, *b_elm_b;
+
+	if (!chunk_can_coalesce(a, b))
+		return;
+
+	if (chunk_hooks->merge(extent_addr_get(a), extent_size_get(a),
+	    extent_addr_get(b), extent_size_get(b), extent_committed_get(a),
+	    arena->ind))
+		return;
+
+	/*
+	 * The rtree writes must happen while all the relevant elements are
+	 * owned, so the following code uses decomposed helper functions rather
+	 * than chunk_{,de}register() to do things in the right order.
+	 */
+	extent_rtree_acquire(tsdn, a, true, false, &a_elm_a, &a_elm_b);
+	extent_rtree_acquire(tsdn, b, true, false, &b_elm_a, &b_elm_b);
+
+	if (a_elm_b != NULL) {
+		rtree_elm_write_acquired(tsdn, &chunks_rtree, a_elm_b, NULL);
+		rtree_elm_release(tsdn, &chunks_rtree, a_elm_b);
+	}
+	if (b_elm_b != NULL) {
+		rtree_elm_write_acquired(tsdn, &chunks_rtree, b_elm_a, NULL);
+		rtree_elm_release(tsdn, &chunks_rtree, b_elm_a);
+	} else
+		b_elm_b = b_elm_a;
+
+	extent_tree_szad_remove(chunks_szad, a);
+	extent_tree_szad_remove(chunks_szad, b);
+
+	arena_chunk_cache_maybe_remove(extent_arena_get(a), a, cache);
+	arena_chunk_cache_maybe_remove(extent_arena_get(b), b, cache);
+
+	extent_size_set(a, extent_size_get(a) + extent_size_get(b));
+	extent_zeroed_set(a, extent_zeroed_get(a) && extent_zeroed_get(b));
+
+	extent_tree_szad_insert(chunks_szad, a);
+
+	extent_rtree_write_acquired(tsdn, a_elm_a, b_elm_b, a);
+	extent_rtree_release(tsdn, a_elm_a, b_elm_b);
+
+	arena_chunk_cache_maybe_insert(extent_arena_get(a), a, cache);
+
+	arena_extent_dalloc(tsdn, extent_arena_get(b), b);
+}
+
 static void
 chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
-    extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, bool cache,
-    void *chunk, size_t size, bool zeroed, bool committed)
+    extent_tree_t *chunks_szad, bool cache, void *chunk, size_t size,
+    bool zeroed, bool committed)
 {
-	bool unzeroed;
-	extent_t *extent, *prev;
-	extent_t key;
+	extent_t *extent, *prev, *next;
 
 	assert(!cache || !zeroed);
-	unzeroed = cache || !zeroed;
 
 	malloc_mutex_lock(tsdn, &arena->chunks_mtx);
 	chunk_hooks_assure_initialized_locked(tsdn, arena, chunk_hooks);
-	extent_init(&key, arena, (void *)((uintptr_t)chunk + size), 0, false,
-	    false, false, false);
-	extent = extent_tree_ad_nsearch(chunks_ad, &key);
+
+	/* Create/initialize/insert extent. */
+	extent = arena_extent_alloc(tsdn, arena);
+	if (extent == NULL) {
+		chunk_leak(tsdn, arena, chunk_hooks, cache, chunk, size);
+		goto label_return;
+	}
+	extent_init(extent, arena, chunk, size, false, !cache && zeroed,
+	    committed, false);
+	if (chunk_register(tsdn, extent)) {
+		arena_extent_dalloc(tsdn, arena, extent);
+		chunk_leak(tsdn, arena, chunk_hooks, cache, chunk, size);
+		goto label_return;
+	}
+	extent_tree_szad_insert(chunks_szad, extent);
+	arena_chunk_cache_maybe_insert(arena, extent, cache);
+
 	/* Try to coalesce forward. */
-	if (extent != NULL && extent_addr_get(extent) == extent_addr_get(&key)
-	    && extent_committed_get(extent) == committed &&
-	    !chunk_hooks->merge(chunk, size, extent_addr_get(extent),
-	    extent_size_get(extent), false, arena->ind)) {
-		/*
-		 * Coalesce chunk with the following address range.  This does
-		 * not change the position within chunks_ad, so only
-		 * remove/insert from/into chunks_szad.
-		 */
-		extent_tree_szad_remove(chunks_szad, extent);
-		arena_chunk_cache_maybe_remove(arena, extent, cache);
-		extent_addr_set(extent, chunk);
-		extent_size_set(extent, size + extent_size_get(extent));
-		extent_zeroed_set(extent, extent_zeroed_get(extent) &&
-		    !unzeroed);
-		extent_tree_szad_insert(chunks_szad, extent);
-		arena_chunk_cache_maybe_insert(arena, extent, cache);
-	} else {
-		/* Coalescing forward failed, so insert a new extent. */
-		extent = arena_extent_alloc(tsdn, arena);
-		if (extent == NULL) {
-			/*
-			 * Node allocation failed, which is an exceedingly
-			 * unlikely failure.  Leak chunk after making sure its
-			 * pages have already been purged, so that this is only
-			 * a virtual memory leak.
-			 */
-			if (cache) {
-				chunk_purge_wrapper(tsdn, arena, chunk_hooks,
-				    chunk, size, 0, size);
-			}
-			goto label_return;
-		}
-		extent_init(extent, arena, chunk, size, false, !unzeroed,
-		    committed, false);
-		extent_tree_ad_insert(chunks_ad, extent);
-		extent_tree_szad_insert(chunks_szad, extent);
-		arena_chunk_cache_maybe_insert(arena, extent, cache);
+	next = rtree_read(tsdn, &chunks_rtree,
+	    CHUNK_CEILING((uintptr_t)extent_past_get(extent)), false);
+	if (next != NULL) {
+		chunk_try_coalesce(tsdn, arena, chunk_hooks, extent, next,
+		    chunks_szad, cache);
 	}
 
 	/* Try to coalesce backward. */
-	prev = extent_tree_ad_prev(chunks_ad, extent);
-	if (prev != NULL && (void *)((uintptr_t)extent_addr_get(prev) +
-	    extent_size_get(prev)) == chunk && extent_committed_get(prev) ==
-	    committed && !chunk_hooks->merge(extent_addr_get(prev),
-	    extent_size_get(prev), chunk, size, false, arena->ind)) {
-		/*
-		 * Coalesce chunk with the previous address range.  This does
-		 * not change the position within chunks_ad, so only
-		 * remove/insert extent from/into chunks_szad.
-		 */
-		extent_tree_szad_remove(chunks_szad, prev);
-		extent_tree_ad_remove(chunks_ad, prev);
-		arena_chunk_cache_maybe_remove(arena, prev, cache);
-		extent_tree_szad_remove(chunks_szad, extent);
-		arena_chunk_cache_maybe_remove(arena, extent, cache);
-		extent_addr_set(extent, extent_addr_get(prev));
-		extent_size_set(extent, extent_size_get(prev) +
-		    extent_size_get(extent));
-		extent_zeroed_set(extent, extent_zeroed_get(prev) &&
-		    extent_zeroed_get(extent));
-		extent_tree_szad_insert(chunks_szad, extent);
-		arena_chunk_cache_maybe_insert(arena, extent, cache);
-
-		arena_extent_dalloc(tsdn, arena, prev);
+	prev = rtree_read(tsdn, &chunks_rtree,
+	    (uintptr_t)extent_addr_get(extent) - chunksize, false);
+	if (prev != NULL) {
+		chunk_try_coalesce(tsdn, arena, chunk_hooks, prev, extent,
+		    chunks_szad, cache);
 	}
 
 label_return:
@@ -621,8 +690,8 @@ chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	assert(size != 0);
 	assert((size & chunksize_mask) == 0);
 
-	chunk_record(tsdn, arena, chunk_hooks, &arena->chunks_szad_cached,
-	    &arena->chunks_ad_cached, true, chunk, size, false, committed);
+	chunk_record(tsdn, arena, chunk_hooks, &arena->chunks_szad_cached, true,
+	    chunk, size, false, committed);
 	arena_maybe_purge(tsdn, arena);
 }
 
@@ -658,7 +727,7 @@ chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	zeroed = !committed || !chunk_hooks->purge(chunk, size, 0, size,
 	    arena->ind);
 	chunk_record(tsdn, arena, chunk_hooks, &arena->chunks_szad_retained,
-	    &arena->chunks_ad_retained, false, chunk, size, zeroed, committed);
+	    false, chunk, size, zeroed, committed);
 
 	if (config_stats)
 		arena->stats.retained += size;
diff --git a/src/extent.c b/src/extent.c
index 8d24d6d6..c550e6cb 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -39,15 +39,3 @@ extent_szad_comp(const extent_t *a, const extent_t *b)
 /* Generate red-black tree functions. */
 rb_gen(, extent_tree_szad_, extent_tree_t, extent_t, szad_link,
     extent_szad_comp)
-
-JEMALLOC_INLINE_C int
-extent_ad_comp(const extent_t *a, const extent_t *b)
-{
-	uintptr_t a_addr = (uintptr_t)extent_addr_get(a);
-	uintptr_t b_addr = (uintptr_t)extent_addr_get(b);
-
-	return ((a_addr > b_addr) - (a_addr < b_addr));
-}
-
-/* Generate red-black tree functions. */
-rb_gen(, extent_tree_ad_, extent_tree_t, extent_t, ad_link, extent_ad_comp)

From fc0372a15e6486d69e6f5f4c2b656aafbb013850 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 17 May 2016 14:58:56 -0700
Subject: [PATCH 0272/2608] Replace extent_tree_szad_* with extent_heap_*.

---
 Makefile.in                                   |   1 +
 include/jemalloc/internal/arena.h             |   6 +-
 include/jemalloc/internal/extent.h            |  17 +-
 include/jemalloc/internal/private_symbols.txt |  21 +--
 src/arena.c                                   |  14 +-
 src/base.c                                    |  35 ++--
 src/chunk.c                                   |  85 ++++++----
 src/extent.c                                  | 103 ++++++++----
 test/unit/extent_quantize.c                   | 155 ++++++++++++++++++
 9 files changed, 333 insertions(+), 104 deletions(-)
 create mode 100644 test/unit/extent_quantize.c

diff --git a/Makefile.in b/Makefile.in
index 8cd6af98..7d73155a 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -136,6 +136,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/bitmap.c \
 	$(srcroot)test/unit/ckh.c \
 	$(srcroot)test/unit/decay.c \
+	$(srcroot)test/unit/extent_quantize.c \
 	$(srcroot)test/unit/fork.c \
 	$(srcroot)test/unit/hash.c \
 	$(srcroot)test/unit/junk.c \
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 97b5329e..b6bfb25c 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -418,11 +418,11 @@ struct arena_s {
 	malloc_mutex_t		huge_mtx;
 
 	/*
-	 * Trees of chunks that were previously allocated.  These are used when
+	 * Heaps of chunks that were previously allocated.  These are used when
 	 * allocating chunks, in an attempt to re-use address space.
 	 */
-	extent_tree_t		chunks_szad_cached;
-	extent_tree_t		chunks_szad_retained;
+	extent_heap_t		chunks_cached[NPSIZES];
+	extent_heap_t		chunks_retained[NPSIZES];
 
 	malloc_mutex_t		chunks_mtx;
 	/* Cache of extent structures that were allocated via base_alloc(). */
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index f067a296..82da8004 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -49,20 +49,29 @@ struct extent_s {
 	qr(extent_t)		cc_link;
 
 	union {
-		/* Linkage for the size/address-ordered tree. */
-		rb_node(extent_t)	szad_link;
+		/* Linkage for per size class address-ordered heaps. */
+		phn(extent_t)		ph_link;
 
 		/* Linkage for arena's achunks, huge, and node_cache lists. */
 		ql_elm(extent_t)	ql_link;
 	};
 };
-typedef rb_tree(extent_t) extent_tree_t;
+typedef ph(extent_t) extent_heap_t;
 
 #endif /* JEMALLOC_H_STRUCTS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-rb_proto(, extent_tree_szad_, extent_tree_t, extent_t)
+#ifdef JEMALLOC_JET
+typedef size_t (extent_size_quantize_t)(size_t);
+extern extent_size_quantize_t *extent_size_quantize_floor;
+extern extent_size_quantize_t *extent_size_quantize_ceil;
+#else
+size_t	extent_size_quantize_floor(size_t size);
+size_t	extent_size_quantize_ceil(size_t size);
+#endif
+
+ph_proto(, extent_heap_, extent_heap_t, extent_t)
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 247b8733..b3d18600 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -222,27 +222,10 @@ extent_prof_tctx_set
 extent_retained_get
 extent_size_get
 extent_size_set
+extent_size_quantize_ceil
+extent_size_quantize_floor
 extent_slab_get
 extent_slab_set
-extent_tree_szad_destroy
-extent_tree_szad_destroy_recurse
-extent_tree_szad_empty
-extent_tree_szad_first
-extent_tree_szad_insert
-extent_tree_szad_iter
-extent_tree_szad_iter_recurse
-extent_tree_szad_iter_start
-extent_tree_szad_last
-extent_tree_szad_new
-extent_tree_szad_next
-extent_tree_szad_nsearch
-extent_tree_szad_prev
-extent_tree_szad_psearch
-extent_tree_szad_remove
-extent_tree_szad_reverse_iter
-extent_tree_szad_reverse_iter_recurse
-extent_tree_szad_reverse_iter_start
-extent_tree_szad_search
 extent_zeroed_get
 extent_zeroed_set
 ffs_llu
diff --git a/src/arena.c b/src/arena.c
index faf23495..720219d3 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -3411,10 +3411,6 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 	arena->nactive = 0;
 	arena->ndirty = 0;
 
-	for (i = 0; i < sizeof(arena->runs_avail) / sizeof(arena_run_heap_t);
-	    i++)
-		arena_run_heap_new(&arena->runs_avail[i]);
-
 	qr_new(&arena->runs_dirty, rd_link);
 	qr_new(&arena->chunks_cache, cc_link);
 
@@ -3426,8 +3422,11 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 	    WITNESS_RANK_ARENA_HUGE))
 		return (NULL);
 
-	extent_tree_szad_new(&arena->chunks_szad_cached);
-	extent_tree_szad_new(&arena->chunks_szad_retained);
+	for (i = 0; i < NPSIZES; i++) {
+		extent_heap_new(&arena->chunks_cached[i]);
+		extent_heap_new(&arena->chunks_retained[i]);
+	}
+
 	if (malloc_mutex_init(&arena->chunks_mtx, "arena_chunks",
 	    WITNESS_RANK_ARENA_CHUNKS))
 		return (NULL);
@@ -3450,6 +3449,9 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 			memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
 	}
 
+	for (i = 0; i < NPSIZES; i++)
+		arena_run_heap_new(&arena->runs_avail[i]);
+
 	return (arena);
 }
 
diff --git a/src/base.c b/src/base.c
index 2a6df4dd..8816738c 100644
--- a/src/base.c
+++ b/src/base.c
@@ -5,7 +5,7 @@
 /* Data. */
 
 static malloc_mutex_t	base_mtx;
-static extent_tree_t	base_avail_szad;
+static extent_heap_t	base_avail[NSIZES];
 static extent_t		*base_extents;
 static size_t		base_allocated;
 static size_t		base_resident;
@@ -79,9 +79,9 @@ void *
 base_alloc(tsdn_t *tsdn, size_t size)
 {
 	void *ret;
-	size_t csize, usize;
+	size_t csize;
+	szind_t i;
 	extent_t *extent;
-	extent_t key;
 
 	/*
 	 * Round size up to nearest multiple of the cacheline size, so that
@@ -89,14 +89,16 @@ base_alloc(tsdn_t *tsdn, size_t size)
 	 */
 	csize = CACHELINE_CEILING(size);
 
-	usize = s2u(csize);
-	extent_init(&key, NULL, NULL, usize, false, false, false, false);
+	extent = NULL;
 	malloc_mutex_lock(tsdn, &base_mtx);
-	extent = extent_tree_szad_nsearch(&base_avail_szad, &key);
-	if (extent != NULL) {
-		/* Use existing space. */
-		extent_tree_szad_remove(&base_avail_szad, extent);
-	} else {
+	for (i = size2index(csize); i < NSIZES; i++) {
+		extent = extent_heap_remove_first(&base_avail[i]);
+		if (extent != NULL) {
+			/* Use existing space. */
+			break;
+		}
+	}
+	if (extent == NULL) {
 		/* Try to allocate more space. */
 		extent = base_chunk_alloc(tsdn, csize);
 	}
@@ -107,9 +109,16 @@ base_alloc(tsdn_t *tsdn, size_t size)
 
 	ret = extent_addr_get(extent);
 	if (extent_size_get(extent) > csize) {
+		szind_t index_floor;
+
 		extent_addr_set(extent, (void *)((uintptr_t)ret + csize));
 		extent_size_set(extent, extent_size_get(extent) - csize);
-		extent_tree_szad_insert(&base_avail_szad, extent);
+		/*
+		 * Compute the index for the largest size class that does not
+		 * exceed extent's size.
+		 */
+		index_floor = size2index(extent_size_get(extent) + 1) - 1;
+		extent_heap_insert(&base_avail[index_floor], extent);
 	} else
 		base_extent_dalloc(tsdn, extent);
 	if (config_stats) {
@@ -143,10 +152,12 @@ base_stats_get(tsdn_t *tsdn, size_t *allocated, size_t *resident,
 bool
 base_boot(void)
 {
+	szind_t i;
 
 	if (malloc_mutex_init(&base_mtx, "base", WITNESS_RANK_BASE))
 		return (true);
-	extent_tree_szad_new(&base_avail_szad);
+	for (i = 0; i < NSIZES; i++)
+		extent_heap_new(&base_avail[i]);
 	base_extents = NULL;
 
 	return (false);
diff --git a/src/chunk.c b/src/chunk.c
index 9a9b08e3..2463028b 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -50,11 +50,27 @@ const chunk_hooks_t	chunk_hooks_default = {
  */
 
 static void	chunk_record(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, extent_tree_t *chunks_szad, bool cache,
+    chunk_hooks_t *chunk_hooks, extent_heap_t extent_heaps[NPSIZES], bool cache,
     void *chunk, size_t size, bool zeroed, bool committed);
 
 /******************************************************************************/
 
+static void
+extent_heaps_insert(extent_heap_t extent_heaps[NPSIZES], extent_t *extent)
+{
+	size_t psz = extent_size_quantize_floor(extent_size_get(extent));
+	pszind_t pind = psz2ind(psz);
+	extent_heap_insert(&extent_heaps[pind], extent);
+}
+
+static void
+extent_heaps_remove(extent_heap_t extent_heaps[NPSIZES], extent_t *extent)
+{
+	size_t psz = extent_size_quantize_floor(extent_size_get(extent));
+	pszind_t pind = psz2ind(psz);
+	extent_heap_remove(&extent_heaps[pind], extent);
+}
+
 static chunk_hooks_t
 chunk_hooks_get_locked(arena_t *arena)
 {
@@ -245,14 +261,21 @@ chunk_reregister(tsdn_t *tsdn, const extent_t *extent)
  * fits.
  */
 static extent_t *
-chunk_first_best_fit(arena_t *arena, extent_tree_t *chunks_szad, size_t size)
+chunk_first_best_fit(arena_t *arena, extent_heap_t extent_heaps[NPSIZES],
+    size_t size)
 {
-	extent_t key;
+	pszind_t pind, i;
 
 	assert(size == CHUNK_CEILING(size));
 
-	extent_init(&key, arena, NULL, size, false, false, false, false);
-	return (extent_tree_szad_nsearch(chunks_szad, &key));
+	pind = psz2ind(extent_size_quantize_ceil(size));
+	for (i = pind; i < NPSIZES; i++) {
+		extent_t *extent = extent_heap_first(&extent_heaps[i]);
+		if (extent != NULL)
+			return (extent);
+	}
+
+	return (NULL);
 }
 
 static void
@@ -272,8 +295,8 @@ chunk_leak(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, bool cache,
 
 static void *
 chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
-    extent_tree_t *chunks_szad, bool cache, void *new_addr, size_t size,
-    size_t alignment, bool *zero, bool *commit, bool dalloc_extent)
+    extent_heap_t extent_heaps[NPSIZES], bool cache, void *new_addr,
+    size_t size, size_t alignment, bool *zero, bool *commit, bool dalloc_extent)
 {
 	void *ret;
 	extent_t *extent;
@@ -309,7 +332,7 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		} else
 			extent = NULL;
 	} else
-		extent = chunk_first_best_fit(arena, chunks_szad, alloc_size);
+		extent = chunk_first_best_fit(arena, extent_heaps, alloc_size);
 	if (extent == NULL || (new_addr != NULL && extent_size_get(extent) <
 	    size)) {
 		malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
@@ -334,9 +357,9 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 		return (NULL);
 	}
-	/* Remove extent from the tree. */
+	/* Remove extent from the heap. */
 	chunk_deregister(tsdn, extent);
-	extent_tree_szad_remove(chunks_szad, extent);
+	extent_heaps_remove(extent_heaps, extent);
 	arena_chunk_cache_maybe_remove(arena, extent, cache);
 	if (leadsize != 0) {
 		/* Insert the leading space as a smaller chunk. */
@@ -346,7 +369,7 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			    extent_addr_get(extent), extent_size_get(extent));
 			arena_extent_dalloc(tsdn, arena, extent);
 		} else {
-			extent_tree_szad_insert(chunks_szad, extent);
+			extent_heaps_insert(extent_heaps, extent);
 			arena_chunk_cache_maybe_insert(arena, extent, cache);
 		}
 		extent = NULL;
@@ -358,7 +381,7 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			if (dalloc_extent && extent != NULL)
 				arena_extent_dalloc(tsdn, arena, extent);
 			malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
-			chunk_record(tsdn, arena, chunk_hooks, chunks_szad,
+			chunk_record(tsdn, arena, chunk_hooks, extent_heaps,
 			    cache, ret, size + trailsize, zeroed, committed);
 			return (NULL);
 		}
@@ -368,7 +391,7 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			if (extent == NULL) {
 				malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 				chunk_record(tsdn, arena, chunk_hooks,
-				    chunks_szad, cache, ret, size + trailsize,
+				    extent_heaps, cache, ret, size + trailsize,
 				    zeroed, committed);
 				return (NULL);
 			}
@@ -380,14 +403,14 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			    extent_addr_get(extent), extent_size_get(extent));
 			arena_extent_dalloc(tsdn, arena, extent);
 		} else {
-			extent_tree_szad_insert(chunks_szad, extent);
+			extent_heaps_insert(extent_heaps, extent);
 			arena_chunk_cache_maybe_insert(arena, extent, cache);
 		}
 		extent = NULL;
 	}
 	if (!committed && chunk_hooks->commit(ret, size, 0, size, arena->ind)) {
 		malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
-		chunk_record(tsdn, arena, chunk_hooks, chunks_szad, cache, ret,
+		chunk_record(tsdn, arena, chunk_hooks, extent_heaps, cache, ret,
 		    size, zeroed, committed);
 		return (NULL);
 	}
@@ -480,9 +503,8 @@ chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	assert((alignment & chunksize_mask) == 0);
 
 	commit = true;
-	ret = chunk_recycle(tsdn, arena, chunk_hooks,
-	    &arena->chunks_szad_cached, true, new_addr, size, alignment, zero,
-	    &commit, dalloc_extent);
+	ret = chunk_recycle(tsdn, arena, chunk_hooks, arena->chunks_cached,
+	    true, new_addr, size, alignment, zero, &commit, dalloc_extent);
 	if (ret == NULL)
 		return (NULL);
 	assert(commit);
@@ -532,9 +554,8 @@ chunk_alloc_retained(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	assert(alignment != 0);
 	assert((alignment & chunksize_mask) == 0);
 
-	ret = chunk_recycle(tsdn, arena, chunk_hooks,
-	    &arena->chunks_szad_retained, false, new_addr, size, alignment,
-	    zero, commit, true);
+	ret = chunk_recycle(tsdn, arena, chunk_hooks, arena->chunks_retained,
+	    false, new_addr, size, alignment, zero, commit, true);
 
 	if (config_stats && ret != NULL)
 		arena->stats.retained -= size;
@@ -583,7 +604,7 @@ chunk_can_coalesce(const extent_t *a, const extent_t *b)
 
 static void
 chunk_try_coalesce(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
-    extent_t *a, extent_t *b, extent_tree_t *chunks_szad, bool cache)
+    extent_t *a, extent_t *b, extent_heap_t extent_heaps[NPSIZES], bool cache)
 {
 	rtree_elm_t *a_elm_a, *a_elm_b, *b_elm_a, *b_elm_b;
 
@@ -613,8 +634,8 @@ chunk_try_coalesce(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	} else
 		b_elm_b = b_elm_a;
 
-	extent_tree_szad_remove(chunks_szad, a);
-	extent_tree_szad_remove(chunks_szad, b);
+	extent_heaps_remove(extent_heaps, a);
+	extent_heaps_remove(extent_heaps, b);
 
 	arena_chunk_cache_maybe_remove(extent_arena_get(a), a, cache);
 	arena_chunk_cache_maybe_remove(extent_arena_get(b), b, cache);
@@ -622,7 +643,7 @@ chunk_try_coalesce(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	extent_size_set(a, extent_size_get(a) + extent_size_get(b));
 	extent_zeroed_set(a, extent_zeroed_get(a) && extent_zeroed_get(b));
 
-	extent_tree_szad_insert(chunks_szad, a);
+	extent_heaps_insert(extent_heaps, a);
 
 	extent_rtree_write_acquired(tsdn, a_elm_a, b_elm_b, a);
 	extent_rtree_release(tsdn, a_elm_a, b_elm_b);
@@ -634,7 +655,7 @@ chunk_try_coalesce(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 
 static void
 chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
-    extent_tree_t *chunks_szad, bool cache, void *chunk, size_t size,
+    extent_heap_t extent_heaps[NPSIZES], bool cache, void *chunk, size_t size,
     bool zeroed, bool committed)
 {
 	extent_t *extent, *prev, *next;
@@ -657,7 +678,7 @@ chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		chunk_leak(tsdn, arena, chunk_hooks, cache, chunk, size);
 		goto label_return;
 	}
-	extent_tree_szad_insert(chunks_szad, extent);
+	extent_heaps_insert(extent_heaps, extent);
 	arena_chunk_cache_maybe_insert(arena, extent, cache);
 
 	/* Try to coalesce forward. */
@@ -665,7 +686,7 @@ chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	    CHUNK_CEILING((uintptr_t)extent_past_get(extent)), false);
 	if (next != NULL) {
 		chunk_try_coalesce(tsdn, arena, chunk_hooks, extent, next,
-		    chunks_szad, cache);
+		    extent_heaps, cache);
 	}
 
 	/* Try to coalesce backward. */
@@ -673,7 +694,7 @@ chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	    (uintptr_t)extent_addr_get(extent) - chunksize, false);
 	if (prev != NULL) {
 		chunk_try_coalesce(tsdn, arena, chunk_hooks, prev, extent,
-		    chunks_szad, cache);
+		    extent_heaps, cache);
 	}
 
 label_return:
@@ -690,7 +711,7 @@ chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	assert(size != 0);
 	assert((size & chunksize_mask) == 0);
 
-	chunk_record(tsdn, arena, chunk_hooks, &arena->chunks_szad_cached, true,
+	chunk_record(tsdn, arena, chunk_hooks, arena->chunks_cached, true,
 	    chunk, size, false, committed);
 	arena_maybe_purge(tsdn, arena);
 }
@@ -726,8 +747,8 @@ chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	}
 	zeroed = !committed || !chunk_hooks->purge(chunk, size, 0, size,
 	    arena->ind);
-	chunk_record(tsdn, arena, chunk_hooks, &arena->chunks_szad_retained,
-	    false, chunk, size, zeroed, committed);
+	chunk_record(tsdn, arena, chunk_hooks, arena->chunks_retained, false,
+	    chunk, size, zeroed, committed);
 
 	if (config_stats)
 		arena->stats.retained += size;
diff --git a/src/extent.c b/src/extent.c
index c550e6cb..4757f750 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -3,39 +3,86 @@
 
 /******************************************************************************/
 
-JEMALLOC_INLINE_C size_t
-extent_quantize(size_t size)
+#ifdef JEMALLOC_JET
+#undef extent_size_quantize_floor
+#define	extent_size_quantize_floor JEMALLOC_N(n_extent_size_quantize_floor)
+#endif
+size_t
+extent_size_quantize_floor(size_t size)
 {
+	size_t ret;
+	pszind_t pind;
 
-	/*
-	 * Round down to the nearest chunk size that can actually be requested
-	 * during normal huge allocation.
-	 */
-	return (index2size(size2index(size + 1) - 1));
-}
+	assert(size > 0);
+	assert(size <= HUGE_MAXCLASS);
+	assert((size & PAGE_MASK) == 0);
 
-JEMALLOC_INLINE_C int
-extent_szad_comp(const extent_t *a, const extent_t *b)
-{
-	int ret;
-	size_t a_qsize = extent_quantize(extent_size_get(a));
-	size_t b_qsize = extent_quantize(extent_size_get(b));
+	assert(size != 0);
+	assert(size == PAGE_CEILING(size));
 
-	/*
-	 * Compare based on quantized size rather than size, in order to sort
-	 * equally useful extents only by address.
-	 */
-	ret = (a_qsize > b_qsize) - (a_qsize < b_qsize);
-	if (ret == 0) {
-		uintptr_t a_addr = (uintptr_t)extent_addr_get(a);
-		uintptr_t b_addr = (uintptr_t)extent_addr_get(b);
-
-		ret = (a_addr > b_addr) - (a_addr < b_addr);
+	pind = psz2ind(size - large_pad + 1);
+	if (pind == 0) {
+		/*
+		 * Avoid underflow.  This short-circuit would also do the right
+		 * thing for all sizes in the range for which there are
+		 * PAGE-spaced size classes, but it's simplest to just handle
+		 * the one case that would cause erroneous results.
+		 */
+		return (size);
 	}
-
+	ret = pind2sz(pind - 1) + large_pad;
+	assert(ret <= size);
 	return (ret);
 }
+#ifdef JEMALLOC_JET
+#undef extent_size_quantize_floor
+#define	extent_size_quantize_floor JEMALLOC_N(extent_size_quantize_floor)
+extent_size_quantize_t *extent_size_quantize_floor =
+    JEMALLOC_N(n_extent_size_quantize_floor);
+#endif
 
-/* Generate red-black tree functions. */
-rb_gen(, extent_tree_szad_, extent_tree_t, extent_t, szad_link,
-    extent_szad_comp)
+#ifdef JEMALLOC_JET
+#undef extent_size_quantize_ceil
+#define	extent_size_quantize_ceil JEMALLOC_N(n_extent_size_quantize_ceil)
+#endif
+size_t
+extent_size_quantize_ceil(size_t size)
+{
+	size_t ret;
+
+	assert(size > 0);
+	assert(size <= HUGE_MAXCLASS);
+	assert((size & PAGE_MASK) == 0);
+
+	ret = extent_size_quantize_floor(size);
+	if (ret < size) {
+		/*
+		 * Skip a quantization that may have an adequately large extent,
+		 * because under-sized extents may be mixed in.  This only
+		 * happens when an unusual size is requested, i.e. for aligned
+		 * allocation, and is just one of several places where linear
+		 * search would potentially find sufficiently aligned available
+		 * memory somewhere lower.
+		 */
+		ret = pind2sz(psz2ind(ret - large_pad + 1)) + large_pad;
+	}
+	return (ret);
+}
+#ifdef JEMALLOC_JET
+#undef extent_size_quantize_ceil
+#define	extent_size_quantize_ceil JEMALLOC_N(extent_size_quantize_ceil)
+extent_size_quantize_t *extent_size_quantize_ceil =
+    JEMALLOC_N(n_extent_size_quantize_ceil);
+#endif
+
+JEMALLOC_INLINE_C int
+extent_ad_comp(const extent_t *a, const extent_t *b)
+{
+	uintptr_t a_addr = (uintptr_t)extent_addr_get(a);
+	uintptr_t b_addr = (uintptr_t)extent_addr_get(b);
+
+	return ((a_addr > b_addr) - (a_addr < b_addr));
+}
+
+/* Generate pairing heap functions. */
+ph_gen(, extent_heap_, extent_heap_t, extent_t, ph_link, extent_ad_comp)
diff --git a/test/unit/extent_quantize.c b/test/unit/extent_quantize.c
new file mode 100644
index 00000000..d8846db4
--- /dev/null
+++ b/test/unit/extent_quantize.c
@@ -0,0 +1,155 @@
+#include "test/jemalloc_test.h"
+
+TEST_BEGIN(test_small_extent_size)
+{
+	unsigned nbins, i;
+	size_t sz, extent_size;
+	size_t mib[4];
+	size_t miblen = sizeof(mib) / sizeof(size_t);
+
+	/*
+	 * Iterate over all small size classes, get their extent sizes, and
+	 * verify that the quantized size is the same as the extent size.
+	 */
+
+	sz = sizeof(unsigned);
+	assert_d_eq(mallctl("arenas.nbins", &nbins, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
+
+	assert_d_eq(mallctlnametomib("arenas.bin.0.run_size", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib failure");
+	for (i = 0; i < nbins; i++) {
+		mib[2] = i;
+		sz = sizeof(size_t);
+		assert_d_eq(mallctlbymib(mib, miblen, &extent_size, &sz, NULL,
+		    0), 0, "Unexpected mallctlbymib failure");
+		assert_zu_eq(extent_size,
+		    extent_size_quantize_floor(extent_size),
+		    "Small extent quantization should be a no-op "
+		    "(extent_size=%zu)", extent_size);
+		assert_zu_eq(extent_size,
+		    extent_size_quantize_ceil(extent_size),
+		    "Small extent quantization should be a no-op "
+		    "(extent_size=%zu)", extent_size);
+	}
+}
+TEST_END
+
+TEST_BEGIN(test_large_extent_size)
+{
+	bool cache_oblivious;
+	unsigned nlruns, i;
+	size_t sz, extent_size_prev, ceil_prev;
+	size_t mib[4];
+	size_t miblen = sizeof(mib) / sizeof(size_t);
+
+	/*
+	 * Iterate over all large size classes, get their extent sizes, and
+	 * verify that the quantized size is the same as the extent size.
+	 */
+
+	sz = sizeof(bool);
+	assert_d_eq(mallctl("config.cache_oblivious", &cache_oblivious, &sz,
+	    NULL, 0), 0, "Unexpected mallctl failure");
+
+	sz = sizeof(unsigned);
+	assert_d_eq(mallctl("arenas.nlruns", &nlruns, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
+
+	assert_d_eq(mallctlnametomib("arenas.lrun.0.size", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib failure");
+	for (i = 0; i < nlruns; i++) {
+		size_t lextent_size, extent_size, floor, ceil;
+
+		mib[2] = i;
+		sz = sizeof(size_t);
+		assert_d_eq(mallctlbymib(mib, miblen, &lextent_size, &sz, NULL,
+		    0), 0, "Unexpected mallctlbymib failure");
+		extent_size = cache_oblivious ? lextent_size + PAGE :
+		    lextent_size;
+		floor = extent_size_quantize_floor(extent_size);
+		ceil = extent_size_quantize_ceil(extent_size);
+
+		assert_zu_eq(extent_size, floor,
+		    "Large run quantization should be a no-op for precise "
+		    "size (lextent_size=%zu, extent_size=%zu)", lextent_size,
+		    extent_size);
+		assert_zu_eq(extent_size, ceil,
+		    "Large run quantization should be a no-op for precise "
+		    "size (lextent_size=%zu, extent_size=%zu)", lextent_size,
+		    extent_size);
+
+		if (i > 0) {
+			assert_zu_eq(extent_size_prev,
+			    extent_size_quantize_floor(extent_size - PAGE),
+			    "Floor should be a precise size");
+			if (extent_size_prev < ceil_prev) {
+				assert_zu_eq(ceil_prev, extent_size,
+				    "Ceiling should be a precise size "
+				    "(extent_size_prev=%zu, ceil_prev=%zu, "
+				    "extent_size=%zu)", extent_size_prev,
+				    ceil_prev, extent_size);
+			}
+		}
+		extent_size_prev = floor;
+		ceil_prev = extent_size_quantize_ceil(extent_size + PAGE);
+	}
+}
+TEST_END
+
+TEST_BEGIN(test_monotonic)
+{
+	unsigned nbins, nlruns, i;
+	size_t sz, floor_prev, ceil_prev;
+
+	/*
+	 * Iterate over all extent sizes and verify that
+	 * extent_size_quantize_{floor,ceil}() are monotonic.
+	 */
+
+	sz = sizeof(unsigned);
+	assert_d_eq(mallctl("arenas.nbins", &nbins, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
+
+	sz = sizeof(unsigned);
+	assert_d_eq(mallctl("arenas.nlruns", &nlruns, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
+
+	floor_prev = 0;
+	ceil_prev = 0;
+	for (i = 1; i <= large_maxclass >> LG_PAGE; i++) {
+		size_t extent_size, floor, ceil;
+
+		extent_size = i << LG_PAGE;
+		floor = extent_size_quantize_floor(extent_size);
+		ceil = extent_size_quantize_ceil(extent_size);
+
+		assert_zu_le(floor, extent_size,
+		    "Floor should be <= (floor=%zu, extent_size=%zu, ceil=%zu)",
+		    floor, extent_size, ceil);
+		assert_zu_ge(ceil, extent_size,
+		    "Ceiling should be >= (floor=%zu, extent_size=%zu, "
+		    "ceil=%zu)", floor, extent_size, ceil);
+
+		assert_zu_le(floor_prev, floor, "Floor should be monotonic "
+		    "(floor_prev=%zu, floor=%zu, extent_size=%zu, ceil=%zu)",
+		    floor_prev, floor, extent_size, ceil);
+		assert_zu_le(ceil_prev, ceil, "Ceiling should be monotonic "
+		    "(floor=%zu, extent_size=%zu, ceil_prev=%zu, ceil=%zu)",
+		    floor, extent_size, ceil_prev, ceil);
+
+		floor_prev = floor;
+		ceil_prev = ceil;
+	}
+}
+TEST_END
+
+int
+main(void)
+{
+
+	return (test(
+	    test_small_extent_size,
+	    test_large_extent_size,
+	    test_monotonic));
+}

From 4d2d9cec5a82c80e0cabb1c4fc0473aca0cc5a09 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 17 May 2016 17:43:30 -0700
Subject: [PATCH 0273/2608] Merge chunk_alloc_base() into its only caller.

---
 include/jemalloc/internal/chunk.h             |  1 -
 include/jemalloc/internal/private_symbols.txt |  1 -
 src/base.c                                    | 10 +++++++++-
 src/chunk.c                                   | 20 -------------------
 4 files changed, 9 insertions(+), 23 deletions(-)

diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index 05cf3d05..ab102d2c 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -55,7 +55,6 @@ chunk_hooks_t	chunk_hooks_set(tsdn_t *tsdn, arena_t *arena,
 bool	chunk_register(tsdn_t *tsdn, const extent_t *extent);
 void	chunk_deregister(tsdn_t *tsdn, const extent_t *extent);
 void	chunk_reregister(tsdn_t *tsdn, const extent_t *extent);
-void	*chunk_alloc_base(size_t size);
 void	*chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
     bool *zero, bool dalloc_extent);
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index b3d18600..c237ab33 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -154,7 +154,6 @@ bootstrap_free
 bootstrap_malloc
 bt_init
 buferror
-chunk_alloc_base
 chunk_alloc_cache
 chunk_alloc_dss
 chunk_alloc_mmap
diff --git a/src/base.c b/src/base.c
index 8816738c..518f966c 100644
--- a/src/base.c
+++ b/src/base.c
@@ -50,7 +50,15 @@ base_chunk_alloc(tsdn_t *tsdn, size_t minsize)
 	/* Allocate enough space to also carve an extent out if necessary. */
 	nsize = (extent == NULL) ? CACHELINE_CEILING(sizeof(extent_t)) : 0;
 	csize = CHUNK_CEILING(minsize + nsize);
-	addr = chunk_alloc_base(csize);
+	/*
+	 * Directly call chunk_alloc_mmap() because it's critical to allocate
+	 * untouched demand-zeroed virtual memory.
+	 */
+	{
+		bool zero = true;
+		bool commit = true;
+		addr = chunk_alloc_mmap(NULL, csize, chunksize, &zero, &commit);
+	}
 	if (addr == NULL) {
 		if (extent != NULL)
 			base_extent_dalloc(tsdn, extent);
diff --git a/src/chunk.c b/src/chunk.c
index 2463028b..a32eede9 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -469,26 +469,6 @@ chunk_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 	return (NULL);
 }
 
-void *
-chunk_alloc_base(size_t size)
-{
-	void *ret;
-	bool zero, commit;
-
-	/*
-	 * Directly call chunk_alloc_mmap() rather than chunk_alloc_core()
-	 * because it's critical that chunk_alloc_base() return untouched
-	 * demand-zeroed virtual memory.
-	 */
-	zero = true;
-	commit = true;
-	ret = chunk_alloc_mmap(NULL, size, chunksize, &zero, &commit);
-	if (ret == NULL)
-		return (NULL);
-
-	return (ret);
-}
-
 void *
 chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     void *new_addr, size_t size, size_t alignment, bool *zero,

From 56e0031d7d0c69c54de4bae1ca3a2fd2823f69ff Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 17 May 2016 18:17:04 -0700
Subject: [PATCH 0274/2608] Add/use chunk_decommit_wrapper().

---
 include/jemalloc/internal/chunk.h             |  3 +++
 include/jemalloc/internal/private_symbols.txt |  1 +
 src/arena.c                                   | 14 +++++++-------
 src/chunk.c                                   |  9 +++++++++
 4 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index ab102d2c..8cd992d1 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -66,6 +66,9 @@ void	chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena,
 void	chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *chunk, size_t size, bool zeroed,
     bool committed);
+bool	chunk_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *chunk, size_t size, size_t offset,
+    size_t length);
 bool	chunk_purge_wrapper(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *chunk, size_t size, size_t offset,
     size_t length);
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index c237ab33..5261e02c 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -162,6 +162,7 @@ chunk_boot
 chunk_dalloc_cache
 chunk_dalloc_mmap
 chunk_dalloc_wrapper
+chunk_decommit_wrapper
 chunk_deregister
 chunk_dss_boot
 chunk_dss_postfork_child
diff --git a/src/arena.c b/src/arena.c
index 720219d3..ee651947 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -552,8 +552,8 @@ arena_chunk_alloc_internal_hard(tsdn_t *tsdn, arena_t *arena,
 	if (chunk != NULL && arena_chunk_register(tsdn, arena, chunk, *zero)) {
 		if (!*commit) {
 			/* Undo commit of header. */
-			chunk_hooks->decommit(chunk, chunksize, 0, map_bias <<
-			    LG_PAGE, arena->ind);
+			chunk_decommit_wrapper(tsdn, arena, chunk_hooks,
+			    chunk, chunksize, 0, map_bias << LG_PAGE);
 		}
 		chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, (void *)chunk,
 		    chunksize, *zero, *commit);
@@ -675,9 +675,8 @@ arena_chunk_discard(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk)
 		 * chunk as committed has a high potential for causing later
 		 * access of decommitted memory.
 		 */
-		chunk_hooks = chunk_hooks_get(tsdn, arena);
-		chunk_hooks.decommit(chunk, chunksize, 0, map_bias << LG_PAGE,
-		    arena->ind);
+		chunk_decommit_wrapper(tsdn, arena, &chunk_hooks, chunk,
+		    chunksize, 0, map_bias << LG_PAGE);
 	}
 
 	chunk_dalloc_cache(tsdn, arena, &chunk_hooks, (void *)chunk, chunksize,
@@ -1603,8 +1602,9 @@ arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			assert(!arena_mapbits_decommitted_get(chunk, pageind));
 			assert(!arena_mapbits_decommitted_get(chunk,
 			    pageind+npages-1));
-			decommitted = !chunk_hooks->decommit(chunk, chunksize,
-			    pageind << LG_PAGE, npages << LG_PAGE, arena->ind);
+			decommitted = !chunk_decommit_wrapper(tsdn, arena,
+			    chunk_hooks, chunk, chunksize, pageind << LG_PAGE,
+			    npages << LG_PAGE);
 			if (decommitted) {
 				flag_unzeroed = 0;
 				flags = CHUNK_MAP_DECOMMITTED;
diff --git a/src/chunk.c b/src/chunk.c
index a32eede9..0d942d66 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -752,6 +752,15 @@ chunk_decommit_default(void *chunk, size_t size, size_t offset, size_t length,
 	    length));
 }
 
+bool
+chunk_decommit_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
+    void *chunk, size_t size, size_t offset, size_t length)
+{
+
+	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
+	return (chunk_hooks->decommit(chunk, size, offset, length, arena->ind));
+}
+
 static bool
 chunk_purge_default(void *chunk, size_t size, size_t offset, size_t length,
     unsigned arena_ind)

From 384e88f4518512bd2e727cfd6b73395635bbce50 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 17 May 2016 18:29:08 -0700
Subject: [PATCH 0275/2608] Add/use chunk_commit_wrapper().

---
 include/jemalloc/internal/chunk.h             |  3 +
 include/jemalloc/internal/private_symbols.txt |  1 +
 src/arena.c                                   | 61 ++++++++++---------
 src/chunk.c                                   |  9 +++
 4 files changed, 44 insertions(+), 30 deletions(-)

diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index 8cd992d1..52a6d565 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -66,6 +66,9 @@ void	chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena,
 void	chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *chunk, size_t size, bool zeroed,
     bool committed);
+bool	chunk_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *chunk, size_t size, size_t offset,
+    size_t length);
 bool	chunk_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *chunk, size_t size, size_t offset,
     size_t length);
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 5261e02c..bd18e76b 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -159,6 +159,7 @@ chunk_alloc_dss
 chunk_alloc_mmap
 chunk_alloc_wrapper
 chunk_boot
+chunk_commit_wrapper
 chunk_dalloc_cache
 chunk_dalloc_mmap
 chunk_dalloc_wrapper
diff --git a/src/arena.c b/src/arena.c
index ee651947..5ea3fc85 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -375,8 +375,8 @@ arena_run_split_remove(arena_t *arena, arena_chunk_t *chunk, size_t run_ind,
 }
 
 static bool
-arena_run_split_large_helper(arena_t *arena, extent_t *extent, arena_run_t *run,
-    size_t size, bool remove, bool zero)
+arena_run_split_large_helper(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+    arena_run_t *run, size_t size, bool remove, bool zero)
 {
 	arena_chunk_t *chunk;
 	arena_chunk_map_misc_t *miscelm;
@@ -391,8 +391,8 @@ arena_run_split_large_helper(arena_t *arena, extent_t *extent, arena_run_t *run,
 	need_pages = (size >> LG_PAGE);
 	assert(need_pages > 0);
 
-	if (flag_decommitted != 0 && arena->chunk_hooks.commit(chunk, chunksize,
-	    run_ind << LG_PAGE, size, arena->ind))
+	if (flag_decommitted != 0 && chunk_commit_wrapper(tsdn, arena,
+	    &arena->chunk_hooks, chunk, chunksize, run_ind << LG_PAGE, size))
 		return (true);
 
 	if (remove) {
@@ -439,26 +439,26 @@ arena_run_split_large_helper(arena_t *arena, extent_t *extent, arena_run_t *run,
 }
 
 static bool
-arena_run_split_large(arena_t *arena, extent_t *extent, arena_run_t *run,
-    size_t size, bool zero)
+arena_run_split_large(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+    arena_run_t *run, size_t size, bool zero)
 {
 
-	return (arena_run_split_large_helper(arena, extent, run, size, true,
-	    zero));
+	return (arena_run_split_large_helper(tsdn, arena, extent, run, size,
+	    true, zero));
 }
 
 static bool
-arena_run_init_large(arena_t *arena, extent_t *extent, arena_run_t *run,
-    size_t size, bool zero)
+arena_run_init_large(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+    arena_run_t *run, size_t size, bool zero)
 {
 
-	return (arena_run_split_large_helper(arena, extent, run, size, false,
-	    zero));
+	return (arena_run_split_large_helper(tsdn, arena, extent, run, size,
+	    false, zero));
 }
 
 static bool
-arena_run_split_small(arena_t *arena, extent_t *extent, arena_run_t *run,
-    size_t size, szind_t binind)
+arena_run_split_small(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+    arena_run_t *run, size_t size, szind_t binind)
 {
 	arena_chunk_t *chunk;
 	arena_chunk_map_misc_t *miscelm;
@@ -474,8 +474,8 @@ arena_run_split_small(arena_t *arena, extent_t *extent, arena_run_t *run,
 	need_pages = (size >> LG_PAGE);
 	assert(need_pages > 0);
 
-	if (flag_decommitted != 0 && arena->chunk_hooks.commit(chunk, chunksize,
-	    run_ind << LG_PAGE, size, arena->ind))
+	if (flag_decommitted != 0 && chunk_commit_wrapper(tsdn, arena,
+	    &arena->chunk_hooks, chunk, chunksize, run_ind << LG_PAGE, size))
 		return (true);
 
 	arena_run_split_remove(arena, chunk, run_ind, flag_dirty,
@@ -542,8 +542,8 @@ arena_chunk_alloc_internal_hard(tsdn_t *tsdn, arena_t *arena,
 	    NULL, chunksize, chunksize, zero, commit);
 	if (chunk != NULL && !*commit) {
 		/* Commit header. */
-		if (chunk_hooks->commit(chunk, chunksize, 0, map_bias <<
-		    LG_PAGE, arena->ind)) {
+		if (chunk_commit_wrapper(tsdn, arena, chunk_hooks, chunk,
+		    chunksize, 0, map_bias << LG_PAGE)) {
 			chunk_dalloc_wrapper(tsdn, arena, chunk_hooks,
 			    (void *)chunk, chunksize, *zero, *commit);
 			chunk = NULL;
@@ -1041,8 +1041,8 @@ arena_run_alloc_large_helper(tsdn_t *tsdn, arena_t *arena, size_t size,
 {
 	arena_run_t *run = arena_run_first_best_fit(arena, s2u(size));
 	if (run != NULL) {
-		if (arena_run_split_large(arena, iealloc(tsdn, run), run, size,
-		    zero))
+		if (arena_run_split_large(tsdn, arena, iealloc(tsdn, run), run,
+		    size, zero))
 			run = NULL;
 	}
 	return (run);
@@ -1068,8 +1068,8 @@ arena_run_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t size, bool zero)
 	chunk = arena_chunk_alloc(tsdn, arena);
 	if (chunk != NULL) {
 		run = &arena_miscelm_get_mutable(chunk, map_bias)->run;
-		if (arena_run_split_large(arena, iealloc(tsdn, run), run, size,
-		    zero))
+		if (arena_run_split_large(tsdn, arena, iealloc(tsdn, run), run,
+		    size, zero))
 			run = NULL;
 		return (run);
 	}
@@ -1088,8 +1088,8 @@ arena_run_alloc_small_helper(tsdn_t *tsdn, arena_t *arena, size_t size,
 {
 	arena_run_t *run = arena_run_first_best_fit(arena, size);
 	if (run != NULL) {
-		if (arena_run_split_small(arena, iealloc(tsdn, run), run, size,
-		    binind))
+		if (arena_run_split_small(tsdn, arena, iealloc(tsdn, run), run,
+		    size, binind))
 			run = NULL;
 	}
 	return (run);
@@ -1116,8 +1116,8 @@ arena_run_alloc_small(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t binind)
 	chunk = arena_chunk_alloc(tsdn, arena);
 	if (chunk != NULL) {
 		run = &arena_miscelm_get_mutable(chunk, map_bias)->run;
-		if (arena_run_split_small(arena, iealloc(tsdn, run), run, size,
-		    binind))
+		if (arena_run_split_small(tsdn, arena, iealloc(tsdn, run), run,
+		    size, binind))
 			run = NULL;
 		return (run);
 	}
@@ -1534,8 +1534,8 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 				arena_chunk_alloc(tsdn, arena);
 
 			/* Temporarily allocate the free dirty run. */
-			arena_run_split_large(arena, extent, run, run_size,
-			    false);
+			arena_run_split_large(tsdn, arena, extent, run,
+			    run_size, false);
 			/* Stash. */
 			if (false)
 				qr_new(rdelm, rd_link); /* Redundant. */
@@ -2569,7 +2569,8 @@ arena_palloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 		arena_run_trim_tail(tsdn, arena, chunk, extent, run, usize +
 		    large_pad + trailsize, usize + large_pad, false);
 	}
-	if (arena_run_init_large(arena, extent, run, usize + large_pad, zero)) {
+	if (arena_run_init_large(tsdn, arena, extent, run, usize + large_pad,
+	    zero)) {
 		size_t run_ind =
 		    arena_miscelm_to_pageind(arena_run_to_miscelm(run));
 		bool dirty = (arena_mapbits_dirty_get(chunk, run_ind) != 0);
@@ -2944,7 +2945,7 @@ arena_ralloc_large_grow(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
 			goto label_fail;
 
 		run = &arena_miscelm_get_mutable(chunk, pageind+npages)->run;
-		if (arena_run_split_large(arena, iealloc(tsdn, run), run,
+		if (arena_run_split_large(tsdn, arena, iealloc(tsdn, run), run,
 		    splitsize, zero))
 			goto label_fail;
 
diff --git a/src/chunk.c b/src/chunk.c
index 0d942d66..f8d9e634 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -743,6 +743,15 @@ chunk_commit_default(void *chunk, size_t size, size_t offset, size_t length,
 	    length));
 }
 
+bool
+chunk_commit_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
+    void *chunk, size_t size, size_t offset, size_t length)
+{
+
+	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
+	return (chunk_hooks->commit(chunk, size, offset, length, arena->ind));
+}
+
 static bool
 chunk_decommit_default(void *chunk, size_t size, size_t offset, size_t length,
     unsigned arena_ind)

From 1ad060584f8ae4e0b9bc30c89ad7c1860ac3d89d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 18 May 2016 10:32:05 -0700
Subject: [PATCH 0276/2608] Add/use chunk_merge_wrapper().

---
 include/jemalloc/internal/arena.h             |  2 +-
 include/jemalloc/internal/chunk.h             |  2 +
 include/jemalloc/internal/private_symbols.txt |  1 +
 src/arena.c                                   | 92 ++++++++++---------
 src/chunk.c                                   | 79 +++++++++-------
 src/huge.c                                    | 20 +---
 6 files changed, 102 insertions(+), 94 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index b6bfb25c..68d1015b 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -496,7 +496,7 @@ void	arena_chunk_ralloc_huge_similar(tsdn_t *tsdn, arena_t *arena,
 void	arena_chunk_ralloc_huge_shrink(tsdn_t *tsdn, arena_t *arena,
     void *chunk, size_t oldsize, size_t usize);
 bool	arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena,
-    void *chunk, size_t oldsize, size_t usize, bool *zero);
+    extent_t *extent, size_t usize);
 ssize_t	arena_lg_dirty_mult_get(tsdn_t *tsdn, arena_t *arena);
 bool	arena_lg_dirty_mult_set(tsdn_t *tsdn, arena_t *arena,
     ssize_t lg_dirty_mult);
diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index 52a6d565..9634975f 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -75,6 +75,8 @@ bool	chunk_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
 bool	chunk_purge_wrapper(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *chunk, size_t size, size_t offset,
     size_t length);
+bool	chunk_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, extent_t *a, extent_t *b);
 bool	chunk_boot(void);
 void	chunk_prefork(tsdn_t *tsdn);
 void	chunk_postfork_parent(tsdn_t *tsdn);
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index bd18e76b..9b507b1a 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -176,6 +176,7 @@ chunk_hooks_get
 chunk_hooks_set
 chunk_in_dss
 chunk_lookup
+chunk_merge_wrapper
 chunk_npages
 chunk_postfork_child
 chunk_postfork_parent
diff --git a/src/arena.c b/src/arena.c
index 5ea3fc85..2b92733b 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -948,69 +948,71 @@ arena_chunk_ralloc_huge_shrink(tsdn_t *tsdn, arena_t *arena, void *chunk,
 	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
-static bool
-arena_chunk_ralloc_huge_expand_hard(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, void *chunk, size_t oldsize, size_t usize,
-    bool *zero, void *nchunk, size_t udiff, size_t cdiff)
-{
-	bool err;
-	bool commit = true;
-
-	err = (chunk_alloc_wrapper(tsdn, arena, chunk_hooks, nchunk, cdiff,
-	    chunksize, zero, &commit) == NULL);
-	if (err) {
-		/* Revert optimistic stats updates. */
-		malloc_mutex_lock(tsdn, &arena->lock);
-		if (config_stats) {
-			arena_huge_ralloc_stats_update_undo(arena, oldsize,
-			    usize);
-			arena->stats.mapped -= cdiff;
-		}
-		arena_nactive_sub(arena, udiff >> LG_PAGE);
-		malloc_mutex_unlock(tsdn, &arena->lock);
-	} else if (chunk_hooks->merge(chunk, CHUNK_CEILING(oldsize), nchunk,
-	    cdiff, true, arena->ind)) {
-		chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, nchunk, cdiff,
-		    *zero, true);
-		err = true;
-	}
-	return (err);
-}
-
 bool
-arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena, void *chunk,
-    size_t oldsize, size_t usize, bool *zero)
+arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+    size_t usize)
 {
 	bool err;
+	bool zero = false;
 	chunk_hooks_t chunk_hooks = chunk_hooks_get(tsdn, arena);
-	void *nchunk = (void *)((uintptr_t)chunk + CHUNK_CEILING(oldsize));
-	size_t udiff = usize - oldsize;
-	size_t cdiff = CHUNK_CEILING(usize) - CHUNK_CEILING(oldsize);
+	void *nchunk =
+	    (void *)CHUNK_CEILING((uintptr_t)extent_past_get(extent));
+	size_t udiff = usize - extent_size_get(extent);
+	size_t cdiff = CHUNK_CEILING(usize) -
+	    CHUNK_CEILING(extent_size_get(extent));
+	extent_t *trail;
 
 	malloc_mutex_lock(tsdn, &arena->lock);
 
 	/* Optimistically update stats. */
 	if (config_stats) {
-		arena_huge_ralloc_stats_update(arena, oldsize, usize);
+		arena_huge_ralloc_stats_update(arena, extent_size_get(extent),
+		    usize);
 		arena->stats.mapped += cdiff;
 	}
 	arena_nactive_add(arena, udiff >> LG_PAGE);
 
 	err = (chunk_alloc_cache(tsdn, arena, &chunk_hooks, nchunk, cdiff,
-	    chunksize, zero, true) == NULL);
+	    chunksize, &zero, true) == NULL);
 	malloc_mutex_unlock(tsdn, &arena->lock);
+
 	if (err) {
-		err = arena_chunk_ralloc_huge_expand_hard(tsdn, arena,
-		    &chunk_hooks, chunk, oldsize, usize, zero, nchunk, udiff,
-		    cdiff);
-	} else if (chunk_hooks.merge(chunk, CHUNK_CEILING(oldsize), nchunk,
-	    cdiff, true, arena->ind)) {
-		chunk_dalloc_wrapper(tsdn, arena, &chunk_hooks, nchunk, cdiff,
-		    *zero, true);
-		err = true;
+		bool commit = true;
+
+		if (chunk_alloc_wrapper(tsdn, arena, &chunk_hooks, nchunk,
+		    cdiff, chunksize, &zero, &commit) == NULL)
+			goto label_revert;
 	}
 
-	return (err);
+	trail = arena_extent_alloc(tsdn, arena);
+	if (trail == NULL) {
+		chunk_dalloc_wrapper(tsdn, arena, &chunk_hooks, nchunk, cdiff,
+		    zero, true);
+		goto label_revert;
+	}
+	extent_init(trail, arena, nchunk, cdiff, true, zero, true, false);
+	if (chunk_merge_wrapper(tsdn, arena, &chunk_hooks, extent, trail)) {
+		arena_extent_dalloc(tsdn, arena, trail);
+		chunk_dalloc_wrapper(tsdn, arena, &chunk_hooks, nchunk, cdiff,
+		    zero, true);
+		goto label_revert;
+	}
+
+	if (usize < extent_size_get(extent))
+		extent_size_set(extent, usize);
+
+	return (false);
+label_revert:
+	/* Revert optimistic stats updates. */
+	malloc_mutex_lock(tsdn, &arena->lock);
+	if (config_stats) {
+		arena_huge_ralloc_stats_update_undo(arena,
+		    extent_size_get(extent), usize);
+		arena->stats.mapped -= cdiff;
+	}
+	arena_nactive_sub(arena, udiff >> LG_PAGE);
+	malloc_mutex_unlock(tsdn, &arena->lock);
+	return (true);
 }
 
 /*
diff --git a/src/chunk.c b/src/chunk.c
index f8d9e634..59ebd291 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -586,51 +586,26 @@ static void
 chunk_try_coalesce(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     extent_t *a, extent_t *b, extent_heap_t extent_heaps[NPSIZES], bool cache)
 {
-	rtree_elm_t *a_elm_a, *a_elm_b, *b_elm_a, *b_elm_b;
 
 	if (!chunk_can_coalesce(a, b))
 		return;
 
-	if (chunk_hooks->merge(extent_addr_get(a), extent_size_get(a),
-	    extent_addr_get(b), extent_size_get(b), extent_committed_get(a),
-	    arena->ind))
-		return;
-
-	/*
-	 * The rtree writes must happen while all the relevant elements are
-	 * owned, so the following code uses decomposed helper functions rather
-	 * than chunk_{,de}register() to do things in the right order.
-	 */
-	extent_rtree_acquire(tsdn, a, true, false, &a_elm_a, &a_elm_b);
-	extent_rtree_acquire(tsdn, b, true, false, &b_elm_a, &b_elm_b);
-
-	if (a_elm_b != NULL) {
-		rtree_elm_write_acquired(tsdn, &chunks_rtree, a_elm_b, NULL);
-		rtree_elm_release(tsdn, &chunks_rtree, a_elm_b);
-	}
-	if (b_elm_b != NULL) {
-		rtree_elm_write_acquired(tsdn, &chunks_rtree, b_elm_a, NULL);
-		rtree_elm_release(tsdn, &chunks_rtree, b_elm_a);
-	} else
-		b_elm_b = b_elm_a;
-
 	extent_heaps_remove(extent_heaps, a);
 	extent_heaps_remove(extent_heaps, b);
 
 	arena_chunk_cache_maybe_remove(extent_arena_get(a), a, cache);
 	arena_chunk_cache_maybe_remove(extent_arena_get(b), b, cache);
 
-	extent_size_set(a, extent_size_get(a) + extent_size_get(b));
-	extent_zeroed_set(a, extent_zeroed_get(a) && extent_zeroed_get(b));
+	if (chunk_merge_wrapper(tsdn, arena, chunk_hooks, a, b)) {
+		extent_heaps_insert(extent_heaps, a);
+		extent_heaps_insert(extent_heaps, b);
+		arena_chunk_cache_maybe_insert(extent_arena_get(a), a, cache);
+		arena_chunk_cache_maybe_insert(extent_arena_get(b), b, cache);
+		return;
+	}
 
 	extent_heaps_insert(extent_heaps, a);
-
-	extent_rtree_write_acquired(tsdn, a_elm_a, b_elm_b, a);
-	extent_rtree_release(tsdn, a_elm_a, b_elm_b);
-
 	arena_chunk_cache_maybe_insert(extent_arena_get(a), a, cache);
-
-	arena_extent_dalloc(tsdn, extent_arena_get(b), b);
 }
 
 static void
@@ -820,6 +795,46 @@ chunk_merge_default(void *chunk_a, size_t size_a, void *chunk_b, size_t size_b,
 	return (false);
 }
 
+bool
+chunk_merge_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
+    extent_t *a, extent_t *b)
+{
+	rtree_elm_t *a_elm_a, *a_elm_b, *b_elm_a, *b_elm_b;
+
+	if (chunk_hooks->merge(extent_addr_get(a), extent_size_get(a),
+	    extent_addr_get(b), extent_size_get(b), extent_committed_get(a),
+	    arena->ind))
+		return (true);
+
+	/*
+	 * The rtree writes must happen while all the relevant elements are
+	 * owned, so the following code uses decomposed helper functions rather
+	 * than chunk_{,de}register() to do things in the right order.
+	 */
+	extent_rtree_acquire(tsdn, a, true, false, &a_elm_a, &a_elm_b);
+	extent_rtree_acquire(tsdn, b, true, false, &b_elm_a, &b_elm_b);
+
+	if (a_elm_b != NULL) {
+		rtree_elm_write_acquired(tsdn, &chunks_rtree, a_elm_b, NULL);
+		rtree_elm_release(tsdn, &chunks_rtree, a_elm_b);
+	}
+	if (b_elm_b != NULL) {
+		rtree_elm_write_acquired(tsdn, &chunks_rtree, b_elm_a, NULL);
+		rtree_elm_release(tsdn, &chunks_rtree, b_elm_a);
+	} else
+		b_elm_b = b_elm_a;
+
+	extent_size_set(a, extent_size_get(a) + extent_size_get(b));
+	extent_zeroed_set(a, extent_zeroed_get(a) && extent_zeroed_get(b));
+
+	extent_rtree_write_acquired(tsdn, a_elm_a, b_elm_b, a);
+	extent_rtree_release(tsdn, a_elm_a, b_elm_b);
+
+	arena_extent_dalloc(tsdn, extent_arena_get(b), b);
+
+	return (false);
+}
+
 bool
 chunk_boot(void)
 {
diff --git a/src/huge.c b/src/huge.c
index 48b191ad..dc0d680e 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -215,31 +215,19 @@ huge_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, void *ptr,
     size_t oldsize, size_t usize, bool zero)
 {
 	arena_t *arena;
-	bool is_zeroed_subchunk, is_zeroed_chunk;
+	bool is_zeroed_subchunk;
 
 	arena = extent_arena_get(extent);
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	is_zeroed_subchunk = extent_zeroed_get(extent);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
-	/*
-	 * Use is_zeroed_chunk to detect whether the trailing memory is zeroed,
-	 * update extent's zeroed field, and zero as necessary.
-	 */
-	is_zeroed_chunk = false;
-	if (arena_chunk_ralloc_huge_expand(tsdn, arena, ptr, oldsize, usize,
-	     &is_zeroed_chunk))
+	if (arena_chunk_ralloc_huge_expand(tsdn, arena, extent, usize))
 		return (true);
 
-	/* Update the size of the huge allocation. */
-	chunk_deregister(tsdn, extent);
-	malloc_mutex_lock(tsdn, &arena->huge_mtx);
-	extent_size_set(extent, usize);
-	extent_zeroed_set(extent, extent_zeroed_get(extent) && is_zeroed_chunk);
-	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
-	chunk_reregister(tsdn, extent);
-
 	if (zero || (config_fill && unlikely(opt_zero))) {
+		bool is_zeroed_chunk = extent_zeroed_get(extent);
+
 		if (!is_zeroed_subchunk) {
 			memset((void *)((uintptr_t)ptr + oldsize), 0,
 			    CHUNK_CEILING(oldsize) - oldsize);

From de0305a7f3e443d48e012272e1b91c44d2b129d2 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 18 May 2016 21:02:46 -0700
Subject: [PATCH 0277/2608] Add/use chunk_split_wrapper().

Remove redundant ptr/oldsize args from huge_*().

Refactor huge/chunk/arena code boundaries.
---
 include/jemalloc/internal/arena.h             |  38 +-
 include/jemalloc/internal/chunk.h             |   8 +-
 include/jemalloc/internal/huge.h              |  21 +-
 include/jemalloc/internal/private_symbols.txt |   3 +
 src/arena.c                                   | 515 +++++++++---------
 src/chunk.c                                   | 241 ++++----
 src/huge.c                                    | 258 ++++-----
 7 files changed, 564 insertions(+), 520 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 68d1015b..187b6256 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -176,13 +176,6 @@ typedef ph(arena_chunk_map_misc_t) arena_run_heap_t;
 #ifdef JEMALLOC_ARENA_STRUCTS_B
 /* Arena chunk header. */
 struct arena_chunk_s {
-	/*
-	 * A pointer to the arena that owns the chunk is stored within the
-	 * extent structure.  This field as a whole is used by chunks_rtree to
-	 * support both ivsalloc() and core-based debugging.
-	 */
-	extent_t		extent;
-
 	/*
 	 * Map of pages within chunk that keeps track of free/large/small.  The
 	 * first map_bias entries are omitted, since the chunk header does not
@@ -315,7 +308,7 @@ struct arena_s {
 	 * order to avoid interactions between multiple threads that could make
 	 * a single spare inadequate.
 	 */
-	arena_chunk_t		*spare;
+	extent_t		*spare;
 
 	/* Minimum ratio (log base 2) of nactive:ndirty. */
 	ssize_t			lg_dirty_mult;
@@ -481,22 +474,27 @@ typedef size_t (run_quantize_t)(size_t);
 extern run_quantize_t *run_quantize_floor;
 extern run_quantize_t *run_quantize_ceil;
 #endif
+extent_t	*arena_chunk_cache_alloc(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
+    bool *zero);
+void	arena_chunk_cache_dalloc(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *chunk, size_t size, bool committed);
 void	arena_chunk_cache_maybe_insert(arena_t *arena, extent_t *extent,
     bool cache);
 void	arena_chunk_cache_maybe_remove(arena_t *arena, extent_t *extent,
     bool cache);
 extent_t	*arena_extent_alloc(tsdn_t *tsdn, arena_t *arena);
 void	arena_extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
-void	*arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize,
-    size_t alignment, bool *zero);
+extent_t	*arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena,
+    size_t usize, size_t alignment, bool *zero);
 void	arena_chunk_dalloc_huge(tsdn_t *tsdn, arena_t *arena, void *chunk,
     size_t usize);
 void	arena_chunk_ralloc_huge_similar(tsdn_t *tsdn, arena_t *arena,
-    void *chunk, size_t oldsize, size_t usize);
+    extent_t *extent, size_t oldsize);
 void	arena_chunk_ralloc_huge_shrink(tsdn_t *tsdn, arena_t *arena,
-    void *chunk, size_t oldsize, size_t usize);
-bool	arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena,
-    extent_t *extent, size_t usize);
+    extent_t *extent, size_t oldsize);
+void	arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena,
+    extent_t *extent, size_t oldsize);
 ssize_t	arena_lg_dirty_mult_get(tsdn_t *tsdn, arena_t *arena);
 bool	arena_lg_dirty_mult_set(tsdn_t *tsdn, arena_t *arena,
     ssize_t lg_dirty_mult);
@@ -1193,7 +1191,7 @@ arena_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, const void *ptr)
 			ret = atomic_read_p(&elm->prof_tctx_pun);
 		}
 	} else
-		ret = huge_prof_tctx_get(tsdn, extent, ptr);
+		ret = huge_prof_tctx_get(tsdn, extent);
 
 	return (ret);
 }
@@ -1230,7 +1228,7 @@ arena_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr,
 			assert(arena_mapbits_large_get(chunk, pageind) == 0);
 		}
 	} else
-		huge_prof_tctx_set(tsdn, extent, ptr, tctx);
+		huge_prof_tctx_set(tsdn, extent, tctx);
 }
 
 JEMALLOC_INLINE void
@@ -1258,7 +1256,7 @@ arena_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr,
 			atomic_write_p(&elm->prof_tctx_pun,
 			    (prof_tctx_t *)(uintptr_t)1U);
 		} else
-			huge_prof_tctx_reset(tsdn, extent, ptr);
+			huge_prof_tctx_reset(tsdn, extent);
 	}
 }
 
@@ -1362,7 +1360,7 @@ arena_salloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr, bool demote)
 			ret = index2size(binind);
 		}
 	} else
-		ret = huge_salloc(tsdn, extent, ptr);
+		ret = huge_salloc(tsdn, extent);
 
 	return (ret);
 }
@@ -1413,7 +1411,7 @@ arena_dalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, tcache_t *tcache,
 			}
 		}
 	} else
-		huge_dalloc(tsdn, extent, ptr);
+		huge_dalloc(tsdn, extent);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -1470,7 +1468,7 @@ arena_sdalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t size,
 			}
 		}
 	} else
-		huge_dalloc(tsdn, extent, ptr);
+		huge_dalloc(tsdn, extent);
 }
 #  endif /* JEMALLOC_ARENA_INLINE_B */
 #endif
diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index 9634975f..78cc4c2d 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -55,10 +55,10 @@ chunk_hooks_t	chunk_hooks_set(tsdn_t *tsdn, arena_t *arena,
 bool	chunk_register(tsdn_t *tsdn, const extent_t *extent);
 void	chunk_deregister(tsdn_t *tsdn, const extent_t *extent);
 void	chunk_reregister(tsdn_t *tsdn, const extent_t *extent);
-void	*chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena,
+extent_t	*chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
-    bool *zero, bool dalloc_extent);
-void	*chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
+    bool *zero);
+extent_t	*chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
     bool *zero, bool *commit);
 void	chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena,
@@ -75,6 +75,8 @@ bool	chunk_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
 bool	chunk_purge_wrapper(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *chunk, size_t size, size_t offset,
     size_t length);
+extent_t	*chunk_split_wrapper(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, extent_t *extent, size_t size_a, size_t size_b);
 bool	chunk_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, extent_t *a, extent_t *b);
 bool	chunk_boot(void);
diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h
index a385a202..bdc8f847 100644
--- a/include/jemalloc/internal/huge.h
+++ b/include/jemalloc/internal/huge.h
@@ -12,22 +12,19 @@
 void	*huge_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero);
 void	*huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool zero);
-bool	huge_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, void *ptr,
-    size_t oldsize, size_t usize_min, size_t usize_max, bool zero);
-void	*huge_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr,
-    size_t oldsize, size_t usize, size_t alignment, bool zero,
-    tcache_t *tcache);
+bool	huge_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
+    size_t usize_max, bool zero);
+void	*huge_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+    size_t usize, size_t alignment, bool zero, tcache_t *tcache);
 #ifdef JEMALLOC_JET
 typedef void (huge_dalloc_junk_t)(tsdn_t *, void *, size_t);
 extern huge_dalloc_junk_t *huge_dalloc_junk;
 #endif
-void	huge_dalloc(tsdn_t *tsdn, extent_t *extent, void *ptr);
-size_t	huge_salloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr);
-prof_tctx_t	*huge_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent,
-    const void *ptr);
-void	huge_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr,
-    prof_tctx_t *tctx);
-void	huge_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr);
+void	huge_dalloc(tsdn_t *tsdn, extent_t *extent);
+size_t	huge_salloc(tsdn_t *tsdn, const extent_t *extent);
+prof_tctx_t	*huge_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent);
+void	huge_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, prof_tctx_t *tctx);
+void	huge_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 9b507b1a..34a6816f 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -12,6 +12,8 @@ arena_choose
 arena_choose_hard
 arena_choose_impl
 arena_chunk_alloc_huge
+arena_chunk_cache_alloc
+arena_chunk_cache_dalloc
 arena_chunk_cache_maybe_insert
 arena_chunk_cache_maybe_remove
 arena_chunk_dalloc_huge
@@ -184,6 +186,7 @@ chunk_prefork
 chunk_purge_wrapper
 chunk_register
 chunk_reregister
+chunk_split_wrapper
 chunks_rtree
 chunksize
 chunksize_mask
diff --git a/src/arena.c b/src/arena.c
index 2b92733b..a610ec15 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -220,6 +220,55 @@ arena_chunk_dirty_npages(const extent_t *extent)
 	return (extent_size_get(extent) >> LG_PAGE);
 }
 
+static extent_t *
+arena_chunk_cache_alloc_locked(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
+    bool *zero)
+{
+
+	malloc_mutex_assert_owner(tsdn, &arena->lock);
+
+	return (chunk_alloc_cache(tsdn, arena, chunk_hooks, new_addr, size,
+	    alignment, zero));
+}
+
+extent_t *
+arena_chunk_cache_alloc(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
+    bool *zero)
+{
+	extent_t *extent;
+
+	malloc_mutex_lock(tsdn, &arena->lock);
+	extent = arena_chunk_cache_alloc_locked(tsdn, arena, chunk_hooks,
+	    new_addr, size, alignment, zero);
+	malloc_mutex_unlock(tsdn, &arena->lock);
+
+	return (extent);
+}
+
+static void
+arena_chunk_cache_dalloc_locked(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *chunk, size_t size, bool committed)
+{
+
+	malloc_mutex_assert_owner(tsdn, &arena->lock);
+
+	chunk_dalloc_cache(tsdn, arena, chunk_hooks, chunk, size, committed);
+	arena_maybe_purge(tsdn, arena);
+}
+
+void
+arena_chunk_cache_dalloc(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *chunk, size_t size, bool committed)
+{
+
+	malloc_mutex_lock(tsdn, &arena->lock);
+	arena_chunk_cache_dalloc_locked(tsdn, arena, chunk_hooks, chunk, size,
+	    committed);
+	malloc_mutex_unlock(tsdn, &arena->lock);
+}
+
 void
 arena_chunk_cache_maybe_insert(arena_t *arena, extent_t *extent, bool cache)
 {
@@ -492,112 +541,119 @@ arena_run_split_small(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 	return (false);
 }
 
-static arena_chunk_t *
+static extent_t *
 arena_chunk_init_spare(arena_t *arena)
 {
-	arena_chunk_t *chunk;
+	extent_t *extent;
 
 	assert(arena->spare != NULL);
 
-	chunk = arena->spare;
+	extent = arena->spare;
 	arena->spare = NULL;
 
-	assert(arena_mapbits_allocated_get(chunk, map_bias) == 0);
-	assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0);
-	assert(arena_mapbits_unallocated_size_get(chunk, map_bias) ==
-	    arena_maxrun);
-	assert(arena_mapbits_unallocated_size_get(chunk, chunk_npages-1) ==
-	    arena_maxrun);
-	assert(arena_mapbits_dirty_get(chunk, map_bias) ==
-	    arena_mapbits_dirty_get(chunk, chunk_npages-1));
+	assert(arena_mapbits_allocated_get((arena_chunk_t *)
+	    extent_addr_get(extent), map_bias) == 0);
+	assert(arena_mapbits_allocated_get((arena_chunk_t *)
+	    extent_addr_get(extent), chunk_npages-1) == 0);
+	assert(arena_mapbits_unallocated_size_get((arena_chunk_t *)
+	    extent_addr_get(extent), map_bias) == arena_maxrun);
+	assert(arena_mapbits_unallocated_size_get((arena_chunk_t *)
+	    extent_addr_get(extent), chunk_npages-1) == arena_maxrun);
+	assert(arena_mapbits_dirty_get((arena_chunk_t *)
+	    extent_addr_get(extent), map_bias) ==
+	    arena_mapbits_dirty_get((arena_chunk_t *)extent_addr_get(extent),
+	    chunk_npages-1));
 
-	return (chunk);
+	return (extent);
 }
 
-static bool
-arena_chunk_register(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
-    bool zero)
-{
-
-	/*
-	 * The extent notion of "committed" doesn't directly apply to arena
-	 * chunks.  Arbitrarily mark them as committed.  The commit state of
-	 * runs is tracked individually, and upon chunk deallocation the entire
-	 * chunk is in a consistent commit state.
-	 */
-	extent_init(&chunk->extent, arena, chunk, chunksize, true, zero, true,
-	    true);
-	return (chunk_register(tsdn, &chunk->extent));
-}
-
-static arena_chunk_t *
+static extent_t *
 arena_chunk_alloc_internal_hard(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, bool *zero, bool *commit)
 {
-	arena_chunk_t *chunk;
+	extent_t *extent;
 
 	malloc_mutex_unlock(tsdn, &arena->lock);
 
-	chunk = (arena_chunk_t *)chunk_alloc_wrapper(tsdn, arena, chunk_hooks,
-	    NULL, chunksize, chunksize, zero, commit);
-	if (chunk != NULL && !*commit) {
+	extent = chunk_alloc_wrapper(tsdn, arena, chunk_hooks, NULL, chunksize,
+	    chunksize, zero, commit);
+	if (extent != NULL && !*commit) {
 		/* Commit header. */
-		if (chunk_commit_wrapper(tsdn, arena, chunk_hooks, chunk,
-		    chunksize, 0, map_bias << LG_PAGE)) {
+		if (chunk_commit_wrapper(tsdn, arena, chunk_hooks,
+		    extent_addr_get(extent), extent_size_get(extent), 0,
+		    map_bias << LG_PAGE)) {
 			chunk_dalloc_wrapper(tsdn, arena, chunk_hooks,
-			    (void *)chunk, chunksize, *zero, *commit);
-			chunk = NULL;
+			    extent_addr_get(extent), extent_size_get(extent),
+			    extent_zeroed_get(extent),
+			    extent_committed_get(extent));
+			extent = NULL;
 		}
 	}
-	if (chunk != NULL && arena_chunk_register(tsdn, arena, chunk, *zero)) {
-		if (!*commit) {
-			/* Undo commit of header. */
-			chunk_decommit_wrapper(tsdn, arena, chunk_hooks,
-			    chunk, chunksize, 0, map_bias << LG_PAGE);
+
+	if (extent != NULL) {
+		extent_slab_set(extent, true);
+
+		if (chunk_register(tsdn, extent)) {
+			if (!*commit) {
+				/* Undo commit of header. */
+				chunk_decommit_wrapper(tsdn, arena, chunk_hooks,
+				    extent_addr_get(extent),
+				    extent_size_get(extent), 0, map_bias <<
+				    LG_PAGE);
+			}
+			chunk_dalloc_wrapper(tsdn, arena, chunk_hooks,
+			    extent_addr_get(extent), extent_size_get(extent),
+			    extent_zeroed_get(extent),
+			    extent_committed_get(extent));
+			extent = NULL;
 		}
-		chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, (void *)chunk,
-		    chunksize, *zero, *commit);
-		chunk = NULL;
 	}
 
 	malloc_mutex_lock(tsdn, &arena->lock);
-	return (chunk);
+
+	return (extent);
 }
 
-static arena_chunk_t *
+static extent_t *
 arena_chunk_alloc_internal(tsdn_t *tsdn, arena_t *arena, bool *zero,
     bool *commit)
 {
-	arena_chunk_t *chunk;
+	extent_t *extent;
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 
-	chunk = chunk_alloc_cache(tsdn, arena, &chunk_hooks, NULL, chunksize,
-	    chunksize, zero, true);
-	if (chunk != NULL) {
-		if (arena_chunk_register(tsdn, arena, chunk, *zero)) {
-			chunk_dalloc_cache(tsdn, arena, &chunk_hooks, chunk,
-			    chunksize, true);
+	extent = arena_chunk_cache_alloc_locked(tsdn, arena, &chunk_hooks, NULL,
+	    chunksize, chunksize, zero);
+	if (extent != NULL) {
+		extent_slab_set(extent, true);
+
+		if (chunk_register(tsdn, extent)) {
+			arena_chunk_cache_dalloc_locked(tsdn, arena,
+			    &chunk_hooks, extent_addr_get(extent),
+			    extent_size_get(extent), true);
 			return (NULL);
 		}
 		*commit = true;
 	}
-	if (chunk == NULL) {
-		chunk = arena_chunk_alloc_internal_hard(tsdn, arena,
+	if (extent == NULL) {
+		extent = arena_chunk_alloc_internal_hard(tsdn, arena,
 		    &chunk_hooks, zero, commit);
+		if (extent == NULL)
+			return (NULL);
 	}
+	assert(extent_slab_get(extent));
 
-	if (config_stats && chunk != NULL) {
-		arena->stats.mapped += chunksize;
+	if (config_stats) {
+		arena->stats.mapped += extent_size_get(extent);
 		arena->stats.metadata_mapped += (map_bias << LG_PAGE);
 	}
 
-	return (chunk);
+	return (extent);
 }
 
-static arena_chunk_t *
+static extent_t *
 arena_chunk_init_hard(tsdn_t *tsdn, arena_t *arena)
 {
-	arena_chunk_t *chunk;
+	extent_t *extent;
 	bool zero, commit;
 	size_t flag_unzeroed, flag_decommitted, i;
 
@@ -605,8 +661,8 @@ arena_chunk_init_hard(tsdn_t *tsdn, arena_t *arena)
 
 	zero = false;
 	commit = false;
-	chunk = arena_chunk_alloc_internal(tsdn, arena, &zero, &commit);
-	if (chunk == NULL)
+	extent = arena_chunk_alloc_internal(tsdn, arena, &zero, &commit);
+	if (extent == NULL)
 		return (NULL);
 
 	/*
@@ -616,58 +672,63 @@ arena_chunk_init_hard(tsdn_t *tsdn, arena_t *arena)
 	 */
 	flag_unzeroed = (zero || !commit) ? 0 : CHUNK_MAP_UNZEROED;
 	flag_decommitted = commit ? 0 : CHUNK_MAP_DECOMMITTED;
-	arena_mapbits_unallocated_set(chunk, map_bias, arena_maxrun,
-	    flag_unzeroed | flag_decommitted);
+	arena_mapbits_unallocated_set((arena_chunk_t *)extent_addr_get(extent),
+	    map_bias, arena_maxrun, flag_unzeroed | flag_decommitted);
 	/*
 	 * There is no need to initialize the internal page map entries unless
 	 * the chunk is not zeroed.
 	 */
 	if (!zero) {
-		for (i = map_bias+1; i < chunk_npages-1; i++)
-			arena_mapbits_internal_set(chunk, i, flag_unzeroed);
+		for (i = map_bias+1; i < chunk_npages-1; i++) {
+			arena_mapbits_internal_set((arena_chunk_t *)
+			    extent_addr_get(extent), i, flag_unzeroed);
+		}
 	} else {
 		if (config_debug) {
 			for (i = map_bias+1; i < chunk_npages-1; i++) {
-				assert(arena_mapbits_unzeroed_get(chunk, i) ==
-				    flag_unzeroed);
+				assert(arena_mapbits_unzeroed_get(
+				    (arena_chunk_t *)extent_addr_get(extent), i)
+				    == flag_unzeroed);
 			}
 		}
 	}
-	arena_mapbits_unallocated_set(chunk, chunk_npages-1, arena_maxrun,
-	    flag_unzeroed);
+	arena_mapbits_unallocated_set((arena_chunk_t *)extent_addr_get(extent),
+	    chunk_npages-1, arena_maxrun, flag_unzeroed);
 
-	return (chunk);
+	return (extent);
 }
 
-static arena_chunk_t *
+static extent_t *
 arena_chunk_alloc(tsdn_t *tsdn, arena_t *arena)
 {
-	arena_chunk_t *chunk;
+	extent_t *extent;
 
 	if (arena->spare != NULL)
-		chunk = arena_chunk_init_spare(arena);
+		extent = arena_chunk_init_spare(arena);
 	else {
-		chunk = arena_chunk_init_hard(tsdn, arena);
-		if (chunk == NULL)
+		extent = arena_chunk_init_hard(tsdn, arena);
+		if (extent == NULL)
 			return (NULL);
 	}
 
-	ql_elm_new(&chunk->extent, ql_link);
-	ql_tail_insert(&arena->achunks, &chunk->extent, ql_link);
-	arena_avail_insert(arena, chunk, map_bias, chunk_npages-map_bias);
+	ql_elm_new(extent, ql_link);
+	ql_tail_insert(&arena->achunks, extent, ql_link);
+	arena_avail_insert(arena, (arena_chunk_t *)extent_addr_get(extent),
+	    map_bias, chunk_npages-map_bias);
 
-	return (chunk);
+	return (extent);
 }
 
 static void
-arena_chunk_discard(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk)
+arena_chunk_discard(tsdn_t *tsdn, arena_t *arena, extent_t *extent)
 {
 	bool committed;
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 
-	chunk_deregister(tsdn, &chunk->extent);
+	chunk_deregister(tsdn, extent);
 
-	committed = (arena_mapbits_decommitted_get(chunk, map_bias) == 0);
+	committed = (arena_mapbits_decommitted_get((arena_chunk_t *)
+	    extent_addr_get(extent), map_bias) == 0);
 	if (!committed) {
 		/*
 		 * Decommit the header.  Mark the chunk as decommitted even if
@@ -675,37 +736,42 @@ arena_chunk_discard(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk)
 		 * chunk as committed has a high potential for causing later
 		 * access of decommitted memory.
 		 */
-		chunk_decommit_wrapper(tsdn, arena, &chunk_hooks, chunk,
-		    chunksize, 0, map_bias << LG_PAGE);
+		chunk_decommit_wrapper(tsdn, arena, &chunk_hooks,
+		    extent_addr_get(extent), extent_size_get(extent), 0,
+		    map_bias << LG_PAGE);
 	}
 
-	chunk_dalloc_cache(tsdn, arena, &chunk_hooks, (void *)chunk, chunksize,
-	    committed);
+	arena_chunk_cache_dalloc_locked(tsdn, arena, &chunk_hooks,
+	    extent_addr_get(extent), extent_size_get(extent), committed);
 
 	if (config_stats) {
-		arena->stats.mapped -= chunksize;
+		arena->stats.mapped -= extent_size_get(extent);
 		arena->stats.metadata_mapped -= (map_bias << LG_PAGE);
 	}
+
+	arena_extent_dalloc(tsdn, arena, extent);
 }
 
 static void
-arena_spare_discard(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *spare)
+arena_spare_discard(tsdn_t *tsdn, arena_t *arena, extent_t *spare)
 {
 
 	assert(arena->spare != spare);
 
-	if (arena_mapbits_dirty_get(spare, map_bias) != 0) {
-		arena_run_dirty_remove(arena, spare, map_bias,
-		    chunk_npages-map_bias);
+	if (arena_mapbits_dirty_get((arena_chunk_t *)extent_addr_get(spare),
+	    map_bias) != 0) {
+		arena_run_dirty_remove(arena, (arena_chunk_t *)
+		    extent_addr_get(spare), map_bias, chunk_npages-map_bias);
 	}
 
 	arena_chunk_discard(tsdn, arena, spare);
 }
 
 static void
-arena_chunk_dalloc(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk)
+arena_chunk_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent)
 {
-	arena_chunk_t *spare;
+	arena_chunk_t *chunk = (arena_chunk_t *)extent_addr_get(extent);
+	extent_t *spare;
 
 	assert(arena_mapbits_allocated_get(chunk, map_bias) == 0);
 	assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0);
@@ -721,9 +787,9 @@ arena_chunk_dalloc(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk)
 	/* Remove run from runs_avail, so that the arena does not use it. */
 	arena_avail_remove(arena, chunk, map_bias, chunk_npages-map_bias);
 
-	ql_remove(&arena->achunks, &chunk->extent, ql_link);
+	ql_remove(&arena->achunks, extent, ql_link);
 	spare = arena->spare;
-	arena->spare = chunk;
+	arena->spare = extent;
 	if (spare != NULL)
 		arena_spare_discard(tsdn, arena, spare);
 }
@@ -778,19 +844,6 @@ arena_huge_reset_stats_cancel(arena_t *arena, size_t usize)
 	arena->stats.hstats[index].ndalloc--;
 }
 
-static void
-arena_huge_dalloc_stats_update_undo(arena_t *arena, size_t usize)
-{
-	szind_t index = size2index(usize) - nlclasses - NBINS;
-
-	cassert(config_stats);
-
-	arena->stats.ndalloc_huge--;
-	arena->stats.allocated_huge += usize;
-	arena->stats.hstats[index].ndalloc--;
-	arena->stats.hstats[index].curhchunks++;
-}
-
 static void
 arena_huge_ralloc_stats_update(arena_t *arena, size_t oldsize, size_t usize)
 {
@@ -799,15 +852,6 @@ arena_huge_ralloc_stats_update(arena_t *arena, size_t oldsize, size_t usize)
 	arena_huge_malloc_stats_update(arena, usize);
 }
 
-static void
-arena_huge_ralloc_stats_update_undo(arena_t *arena, size_t oldsize,
-    size_t usize)
-{
-
-	arena_huge_dalloc_stats_update_undo(arena, oldsize);
-	arena_huge_malloc_stats_update_undo(arena, usize);
-}
-
 extent_t *
 arena_extent_alloc(tsdn_t *tsdn, arena_t *arena)
 {
@@ -834,17 +878,17 @@ arena_extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent)
 	malloc_mutex_unlock(tsdn, &arena->extent_cache_mtx);
 }
 
-static void *
+static extent_t *
 arena_chunk_alloc_huge_hard(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, size_t usize, size_t alignment, bool *zero,
     size_t csize)
 {
-	void *ret;
+	extent_t *extent;
 	bool commit = true;
 
-	ret = chunk_alloc_wrapper(tsdn, arena, chunk_hooks, NULL, csize,
+	extent = chunk_alloc_wrapper(tsdn, arena, chunk_hooks, NULL, csize,
 	    alignment, zero, &commit);
-	if (ret == NULL) {
+	if (extent == NULL) {
 		/* Revert optimistic stats updates. */
 		malloc_mutex_lock(tsdn, &arena->lock);
 		if (config_stats) {
@@ -855,14 +899,14 @@ arena_chunk_alloc_huge_hard(tsdn_t *tsdn, arena_t *arena,
 		malloc_mutex_unlock(tsdn, &arena->lock);
 	}
 
-	return (ret);
+	return (extent);
 }
 
-void *
+extent_t *
 arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool *zero)
 {
-	void *ret;
+	extent_t *extent;
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 	size_t csize = CHUNK_CEILING(usize);
 
@@ -875,15 +919,15 @@ arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	}
 	arena_nactive_add(arena, usize >> LG_PAGE);
 
-	ret = chunk_alloc_cache(tsdn, arena, &chunk_hooks, NULL, csize,
-	    alignment, zero, true);
+	extent = arena_chunk_cache_alloc_locked(tsdn, arena, &chunk_hooks, NULL,
+	    csize, alignment, zero);
 	malloc_mutex_unlock(tsdn, &arena->lock);
-	if (ret == NULL) {
-		ret = arena_chunk_alloc_huge_hard(tsdn, arena, &chunk_hooks,
+	if (extent == NULL) {
+		extent = arena_chunk_alloc_huge_hard(tsdn, arena, &chunk_hooks,
 		    usize, alignment, zero, csize);
 	}
 
-	return (ret);
+	return (extent);
 }
 
 void
@@ -900,14 +944,16 @@ arena_chunk_dalloc_huge(tsdn_t *tsdn, arena_t *arena, void *chunk, size_t usize)
 	}
 	arena_nactive_sub(arena, usize >> LG_PAGE);
 
-	chunk_dalloc_cache(tsdn, arena, &chunk_hooks, chunk, csize, true);
+	arena_chunk_cache_dalloc_locked(tsdn, arena, &chunk_hooks, chunk, csize,
+	    true);
 	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
 void
-arena_chunk_ralloc_huge_similar(tsdn_t *tsdn, arena_t *arena, void *chunk,
-    size_t oldsize, size_t usize)
+arena_chunk_ralloc_huge_similar(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+    size_t oldsize)
 {
+	size_t usize = extent_size_get(extent);
 
 	assert(CHUNK_CEILING(oldsize) == CHUNK_CEILING(usize));
 	assert(oldsize != usize);
@@ -923,9 +969,10 @@ arena_chunk_ralloc_huge_similar(tsdn_t *tsdn, arena_t *arena, void *chunk,
 }
 
 void
-arena_chunk_ralloc_huge_shrink(tsdn_t *tsdn, arena_t *arena, void *chunk,
-    size_t oldsize, size_t usize)
+arena_chunk_ralloc_huge_shrink(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+    size_t oldsize)
 {
+	size_t usize = extent_size_get(extent);
 	size_t udiff = oldsize - usize;
 	size_t cdiff = CHUNK_CEILING(oldsize) - CHUNK_CEILING(usize);
 
@@ -936,83 +983,24 @@ arena_chunk_ralloc_huge_shrink(tsdn_t *tsdn, arena_t *arena, void *chunk,
 			arena->stats.mapped -= cdiff;
 	}
 	arena_nactive_sub(arena, udiff >> LG_PAGE);
-
-	if (cdiff != 0) {
-		chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
-		void *nchunk = (void *)((uintptr_t)chunk +
-		    CHUNK_CEILING(usize));
-
-		chunk_dalloc_cache(tsdn, arena, &chunk_hooks, nchunk, cdiff,
-		    true);
-	}
 	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
-bool
+void
 arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
-    size_t usize)
+    size_t oldsize)
 {
-	bool err;
-	bool zero = false;
-	chunk_hooks_t chunk_hooks = chunk_hooks_get(tsdn, arena);
-	void *nchunk =
-	    (void *)CHUNK_CEILING((uintptr_t)extent_past_get(extent));
-	size_t udiff = usize - extent_size_get(extent);
-	size_t cdiff = CHUNK_CEILING(usize) -
-	    CHUNK_CEILING(extent_size_get(extent));
-	extent_t *trail;
+	size_t usize = extent_size_get(extent);
+	size_t cdiff = CHUNK_CEILING(usize) - CHUNK_CEILING(oldsize);
+	size_t udiff = usize - oldsize;
 
 	malloc_mutex_lock(tsdn, &arena->lock);
-
-	/* Optimistically update stats. */
 	if (config_stats) {
-		arena_huge_ralloc_stats_update(arena, extent_size_get(extent),
-		    usize);
+		arena_huge_ralloc_stats_update(arena, oldsize, usize);
 		arena->stats.mapped += cdiff;
 	}
 	arena_nactive_add(arena, udiff >> LG_PAGE);
-
-	err = (chunk_alloc_cache(tsdn, arena, &chunk_hooks, nchunk, cdiff,
-	    chunksize, &zero, true) == NULL);
 	malloc_mutex_unlock(tsdn, &arena->lock);
-
-	if (err) {
-		bool commit = true;
-
-		if (chunk_alloc_wrapper(tsdn, arena, &chunk_hooks, nchunk,
-		    cdiff, chunksize, &zero, &commit) == NULL)
-			goto label_revert;
-	}
-
-	trail = arena_extent_alloc(tsdn, arena);
-	if (trail == NULL) {
-		chunk_dalloc_wrapper(tsdn, arena, &chunk_hooks, nchunk, cdiff,
-		    zero, true);
-		goto label_revert;
-	}
-	extent_init(trail, arena, nchunk, cdiff, true, zero, true, false);
-	if (chunk_merge_wrapper(tsdn, arena, &chunk_hooks, extent, trail)) {
-		arena_extent_dalloc(tsdn, arena, trail);
-		chunk_dalloc_wrapper(tsdn, arena, &chunk_hooks, nchunk, cdiff,
-		    zero, true);
-		goto label_revert;
-	}
-
-	if (usize < extent_size_get(extent))
-		extent_size_set(extent, usize);
-
-	return (false);
-label_revert:
-	/* Revert optimistic stats updates. */
-	malloc_mutex_lock(tsdn, &arena->lock);
-	if (config_stats) {
-		arena_huge_ralloc_stats_update_undo(arena,
-		    extent_size_get(extent), usize);
-		arena->stats.mapped -= cdiff;
-	}
-	arena_nactive_sub(arena, udiff >> LG_PAGE);
-	malloc_mutex_unlock(tsdn, &arena->lock);
-	return (true);
 }
 
 /*
@@ -1053,8 +1041,8 @@ arena_run_alloc_large_helper(tsdn_t *tsdn, arena_t *arena, size_t size,
 static arena_run_t *
 arena_run_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t size, bool zero)
 {
-	arena_chunk_t *chunk;
 	arena_run_t *run;
+	extent_t *extent;
 
 	assert(size <= arena_maxrun);
 	assert(size == PAGE_CEILING(size));
@@ -1067,9 +1055,10 @@ arena_run_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t size, bool zero)
 	/*
 	 * No usable runs.  Create a new chunk from which to allocate the run.
 	 */
-	chunk = arena_chunk_alloc(tsdn, arena);
-	if (chunk != NULL) {
-		run = &arena_miscelm_get_mutable(chunk, map_bias)->run;
+	extent = arena_chunk_alloc(tsdn, arena);
+	if (extent != NULL) {
+		run = &arena_miscelm_get_mutable((arena_chunk_t *)
+		    extent_addr_get(extent), map_bias)->run;
 		if (arena_run_split_large(tsdn, arena, iealloc(tsdn, run), run,
 		    size, zero))
 			run = NULL;
@@ -1100,8 +1089,8 @@ arena_run_alloc_small_helper(tsdn_t *tsdn, arena_t *arena, size_t size,
 static arena_run_t *
 arena_run_alloc_small(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t binind)
 {
-	arena_chunk_t *chunk;
 	arena_run_t *run;
+	extent_t *extent;
 
 	assert(size <= arena_maxrun);
 	assert(size == PAGE_CEILING(size));
@@ -1115,9 +1104,10 @@ arena_run_alloc_small(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t binind)
 	/*
 	 * No usable runs.  Create a new chunk from which to allocate the run.
 	 */
-	chunk = arena_chunk_alloc(tsdn, arena);
-	if (chunk != NULL) {
-		run = &arena_miscelm_get_mutable(chunk, map_bias)->run;
+	extent = arena_chunk_alloc(tsdn, arena);
+	if (extent != NULL) {
+		run = &arena_miscelm_get_mutable(
+		    (arena_chunk_t *)extent_addr_get(extent), map_bias)->run;
 		if (arena_run_split_small(tsdn, arena, iealloc(tsdn, run), run,
 		    size, binind))
 			run = NULL;
@@ -1420,6 +1410,8 @@ void
 arena_maybe_purge(tsdn_t *tsdn, arena_t *arena)
 {
 
+	malloc_mutex_assert_owner(tsdn, &arena->lock);
+
 	/* Don't recursively purge. */
 	if (arena->purging)
 		return;
@@ -1484,7 +1476,7 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		if (rdelm == &chunkselm->rd) {
 			extent_t *chunkselm_next;
 			bool zero;
-			UNUSED void *chunk;
+			UNUSED extent_t *extent;
 
 			npages = extent_size_get(chunkselm) >> LG_PAGE;
 			if (opt_purge == purge_mode_decay && arena->ndirty -
@@ -1492,16 +1484,12 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 				break;
 
 			chunkselm_next = qr_next(chunkselm, cc_link);
-			/*
-			 * Allocate.  chunkselm remains valid due to the
-			 * dalloc_extent=false argument to chunk_alloc_cache().
-			 */
+			/* Allocate. */
 			zero = false;
-			chunk = chunk_alloc_cache(tsdn, arena, chunk_hooks,
-			    extent_addr_get(chunkselm),
-			    extent_size_get(chunkselm), chunksize, &zero,
-			    false);
-			assert(chunk == extent_addr_get(chunkselm));
+			extent = arena_chunk_cache_alloc_locked(tsdn, arena,
+			    chunk_hooks, extent_addr_get(chunkselm),
+			    extent_size_get(chunkselm), chunksize, &zero);
+			assert(extent == chunkselm);
 			assert(zero == extent_zeroed_get(chunkselm));
 			extent_dirty_insert(chunkselm, purge_runs_sentinel,
 			    purge_chunks_sentinel);
@@ -1510,14 +1498,13 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			chunkselm = chunkselm_next;
 		} else {
 			extent_t *extent = iealloc(tsdn, rdelm);
-			arena_chunk_t *chunk =
-			    (arena_chunk_t *)extent_addr_get(extent);
 			arena_chunk_map_misc_t *miscelm =
 			    arena_rd_to_miscelm(rdelm);
 			size_t pageind = arena_miscelm_to_pageind(miscelm);
 			arena_run_t *run = &miscelm->run;
 			size_t run_size =
-			    arena_mapbits_unallocated_size_get(chunk, pageind);
+			    arena_mapbits_unallocated_size_get((arena_chunk_t *)
+			    extent_addr_get(extent), pageind);
 
 			npages = run_size >> LG_PAGE;
 			if (opt_purge == purge_mode_decay && arena->ndirty -
@@ -1525,14 +1512,16 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 				break;
 
 			assert(pageind + npages <= chunk_npages);
-			assert(arena_mapbits_dirty_get(chunk, pageind) ==
-			    arena_mapbits_dirty_get(chunk, pageind+npages-1));
+			assert(arena_mapbits_dirty_get((arena_chunk_t *)
+			    extent_addr_get(extent), pageind) ==
+			    arena_mapbits_dirty_get((arena_chunk_t *)
+			    extent_addr_get(extent), pageind+npages-1));
 
 			/*
 			 * If purging the spare chunk's run, make it available
 			 * prior to allocation.
 			 */
-			if (chunk == arena->spare)
+			if (extent == arena->spare)
 				arena_chunk_alloc(tsdn, arena);
 
 			/* Temporarily allocate the free dirty run. */
@@ -1757,8 +1746,9 @@ arena_purge(tsdn_t *tsdn, arena_t *arena, bool all)
 }
 
 static void
-arena_achunk_prof_reset(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk)
+arena_achunk_prof_reset(tsd_t *tsd, arena_t *arena, extent_t *extent)
 {
+	arena_chunk_t *chunk = (arena_chunk_t *)extent_addr_get(extent);
 	size_t pageind, npages;
 
 	cassert(config_prof);
@@ -1773,10 +1763,10 @@ arena_achunk_prof_reset(tsd_t *tsd, arena_t *arena, arena_chunk_t *chunk)
 			if (arena_mapbits_large_get(chunk, pageind) != 0) {
 				void *ptr = (void *)((uintptr_t)chunk + (pageind
 				    << LG_PAGE));
-				size_t usize = isalloc(tsd_tsdn(tsd),
-				    &chunk->extent, ptr, config_prof);
+				size_t usize = isalloc(tsd_tsdn(tsd), extent,
+				    ptr, config_prof);
 
-				prof_free(tsd, &chunk->extent, ptr, usize);
+				prof_free(tsd, extent, ptr, usize);
 				npages = arena_mapbits_large_size_get(chunk,
 				    pageind) >> LG_PAGE;
 			} else {
@@ -1819,8 +1809,7 @@ arena_reset(tsd_t *tsd, arena_t *arena)
 	/* Remove large allocations from prof sample set. */
 	if (config_prof && opt_prof) {
 		ql_foreach(extent, &arena->achunks, ql_link) {
-			arena_achunk_prof_reset(tsd, arena,
-			    extent_addr_get(extent));
+			arena_achunk_prof_reset(tsd, arena, extent);
 		}
 	}
 
@@ -1845,7 +1834,7 @@ arena_reset(tsd_t *tsd, arena_t *arena)
 		/* Remove huge allocation from prof sample set. */
 		if (config_prof && opt_prof)
 			prof_free(tsd, extent, ptr, usize);
-		huge_dalloc(tsd_tsdn(tsd), extent, ptr);
+		huge_dalloc(tsd_tsdn(tsd), extent);
 		malloc_mutex_lock(tsd_tsdn(tsd), &arena->huge_mtx);
 		/* Cancel out unwanted effects on stats. */
 		if (config_stats)
@@ -1883,8 +1872,7 @@ arena_reset(tsd_t *tsd, arena_t *arena)
 	for (extent = ql_last(&arena->achunks, ql_link); extent != NULL; extent
 	    = ql_last(&arena->achunks, ql_link)) {
 		ql_remove(&arena->achunks, extent, ql_link);
-		arena_chunk_discard(tsd_tsdn(tsd), arena,
-		    extent_addr_get(extent));
+		arena_chunk_discard(tsd_tsdn(tsd), arena, extent);
 	}
 
 	/* Spare. */
@@ -2078,7 +2066,7 @@ arena_run_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 	if (size == arena_maxrun) {
 		assert(run_ind == map_bias);
 		assert(run_pages == (arena_maxrun >> LG_PAGE));
-		arena_chunk_dalloc(tsdn, arena, chunk);
+		arena_chunk_dalloc(tsdn, arena, extent);
 	}
 
 	/*
@@ -3113,10 +3101,12 @@ arena_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize,
 
 		arena_decay_tick(tsdn, extent_arena_get(extent));
 		return (false);
-	} else {
-		return (huge_ralloc_no_move(tsdn, extent, ptr, oldsize,
-		    usize_min, usize_max, zero));
+	} else if (oldsize >= chunksize && usize_max >= chunksize) {
+		return (huge_ralloc_no_move(tsdn, extent, usize_min, usize_max,
+		    zero));
 	}
+
+	return (true);
 }
 
 static void *
@@ -3138,42 +3128,41 @@ arena_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr,
     size_t oldsize, size_t size, size_t alignment, bool zero, tcache_t *tcache)
 {
 	void *ret;
-	size_t usize;
+	size_t usize, copysize;
 
 	usize = s2u(size);
 	if (unlikely(usize == 0 || size > HUGE_MAXCLASS))
 		return (NULL);
 
 	if (likely(usize <= large_maxclass)) {
-		size_t copysize;
-
 		/* Try to avoid moving the allocation. */
 		if (!arena_ralloc_no_move(tsdn, extent, ptr, oldsize, usize, 0,
 		    zero))
 			return (ptr);
-
-		/*
-		 * size and oldsize are different enough that we need to move
-		 * the object.  In that case, fall back to allocating new space
-		 * and copying.
-		 */
-		ret = arena_ralloc_move_helper(tsdn, arena, usize, alignment,
-		    zero, tcache);
-		if (ret == NULL)
-			return (NULL);
-
-		/*
-		 * Junk/zero-filling were already done by
-		 * ipalloc()/arena_malloc().
-		 */
-
-		copysize = (usize < oldsize) ? usize : oldsize;
-		memcpy(ret, ptr, copysize);
-		isdalloct(tsdn, extent, ptr, oldsize, tcache, true);
-	} else {
-		ret = huge_ralloc(tsdn, arena, extent, ptr, oldsize, usize,
-		    alignment, zero, tcache);
 	}
+
+	if (oldsize >= chunksize && usize >= chunksize) {
+		return (huge_ralloc(tsdn, arena, extent, usize, alignment, zero,
+		    tcache));
+	}
+
+	/*
+	 * size and oldsize are different enough that we need to move the
+	 * object.  In that case, fall back to allocating new space and copying.
+	 */
+	ret = arena_ralloc_move_helper(tsdn, arena, usize, alignment, zero,
+	    tcache);
+	if (ret == NULL)
+		return (NULL);
+
+	/*
+	 * Junk/zero-filling were already done by
+	 * ipalloc()/arena_malloc().
+	 */
+
+	copysize = (usize < oldsize) ? usize : oldsize;
+	memcpy(ret, ptr, copysize);
+	isdalloct(tsdn, extent, ptr, oldsize, tcache, true);
 	return (ret);
 }
 
diff --git a/src/chunk.c b/src/chunk.c
index 59ebd291..4efba4a7 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -58,7 +58,8 @@ static void	chunk_record(tsdn_t *tsdn, arena_t *arena,
 static void
 extent_heaps_insert(extent_heap_t extent_heaps[NPSIZES], extent_t *extent)
 {
-	size_t psz = extent_size_quantize_floor(extent_size_get(extent));
+	size_t psz =
+	    extent_size_quantize_floor(CHUNK_CEILING(extent_size_get(extent)));
 	pszind_t pind = psz2ind(psz);
 	extent_heap_insert(&extent_heaps[pind], extent);
 }
@@ -66,7 +67,8 @@ extent_heaps_insert(extent_heap_t extent_heaps[NPSIZES], extent_t *extent)
 static void
 extent_heaps_remove(extent_heap_t extent_heaps[NPSIZES], extent_t *extent)
 {
-	size_t psz = extent_size_quantize_floor(extent_size_get(extent));
+	size_t psz =
+	    extent_size_quantize_floor(CHUNK_CEILING(extent_size_get(extent)));
 	pszind_t pind = psz2ind(psz);
 	extent_heap_remove(&extent_heaps[pind], extent);
 }
@@ -211,7 +213,7 @@ chunk_register(tsdn_t *tsdn, const extent_t *extent)
 	extent_rtree_write_acquired(tsdn, elm_a, elm_b, extent);
 	extent_rtree_release(tsdn, elm_a, elm_b);
 
-	if (config_prof && opt_prof) {
+	if (config_prof && opt_prof && extent_active_get(extent)) {
 		size_t nadd = (extent_size_get(extent) == 0) ? 1 :
 		    extent_size_get(extent) / chunksize;
 		size_t cur = atomic_add_z(&curchunks, nadd);
@@ -239,7 +241,7 @@ chunk_deregister(tsdn_t *tsdn, const extent_t *extent)
 	extent_rtree_write_acquired(tsdn, elm_a, elm_b, NULL);
 	extent_rtree_release(tsdn, elm_a, elm_b);
 
-	if (config_prof && opt_prof) {
+	if (config_prof && opt_prof && extent_active_get(extent)) {
 		size_t nsub = (extent_size_get(extent) == 0) ? 1 :
 		    extent_size_get(extent) / chunksize;
 		assert(atomic_read_z(&curchunks) >= nsub);
@@ -293,23 +295,15 @@ chunk_leak(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, bool cache,
 	}
 }
 
-static void *
+static extent_t *
 chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     extent_heap_t extent_heaps[NPSIZES], bool cache, void *new_addr,
-    size_t size, size_t alignment, bool *zero, bool *commit, bool dalloc_extent)
+    size_t size, size_t alignment, bool *zero, bool *commit)
 {
-	void *ret;
 	extent_t *extent;
 	size_t alloc_size, leadsize, trailsize;
-	bool zeroed, committed;
 
 	assert(new_addr == NULL || alignment == chunksize);
-	/*
-	 * Cached chunks use the extent linkage embedded in their headers, in
-	 * which case dalloc_extent is true, and new_addr is non-NULL because
-	 * we're operating on a specific chunk.
-	 */
-	assert(dalloc_extent || new_addr != NULL);
 
 	alloc_size = CHUNK_CEILING(s2u(size + alignment - chunksize));
 	/* Beware size_t wrap-around. */
@@ -338,99 +332,79 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 		return (NULL);
 	}
+	extent_heaps_remove(extent_heaps, extent);
+	arena_chunk_cache_maybe_remove(arena, extent, cache);
+
 	leadsize = ALIGNMENT_CEILING((uintptr_t)extent_addr_get(extent),
 	    alignment) - (uintptr_t)extent_addr_get(extent);
 	assert(new_addr == NULL || leadsize == 0);
 	assert(extent_size_get(extent) >= leadsize + size);
 	trailsize = extent_size_get(extent) - leadsize - size;
-	ret = (void *)((uintptr_t)extent_addr_get(extent) + leadsize);
-	zeroed = extent_zeroed_get(extent);
-	if (zeroed)
+	if (extent_zeroed_get(extent))
 		*zero = true;
-	committed = extent_committed_get(extent);
-	if (committed)
+	if (extent_committed_get(extent))
 		*commit = true;
+
 	/* Split the lead. */
-	if (leadsize != 0 &&
-	    chunk_hooks->split(extent_addr_get(extent),
-	    extent_size_get(extent), leadsize, size, false, arena->ind)) {
-		malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
-		return (NULL);
-	}
-	/* Remove extent from the heap. */
-	chunk_deregister(tsdn, extent);
-	extent_heaps_remove(extent_heaps, extent);
-	arena_chunk_cache_maybe_remove(arena, extent, cache);
 	if (leadsize != 0) {
-		/* Insert the leading space as a smaller chunk. */
-		extent_size_set(extent, leadsize);
-		if (chunk_register(tsdn, extent)) {
+		extent_t *lead = extent;
+		extent = chunk_split_wrapper(tsdn, arena, chunk_hooks, lead,
+		    leadsize, size + trailsize);
+		if (extent == NULL) {
 			chunk_leak(tsdn, arena, chunk_hooks, cache,
-			    extent_addr_get(extent), extent_size_get(extent));
-			arena_extent_dalloc(tsdn, arena, extent);
-		} else {
-			extent_heaps_insert(extent_heaps, extent);
-			arena_chunk_cache_maybe_insert(arena, extent, cache);
-		}
-		extent = NULL;
-	}
-	if (trailsize != 0) {
-		/* Split the trail. */
-		if (chunk_hooks->split(ret, size + trailsize, size,
-		    trailsize, false, arena->ind)) {
-			if (dalloc_extent && extent != NULL)
-				arena_extent_dalloc(tsdn, arena, extent);
+			    extent_addr_get(lead), extent_size_get(lead));
+			arena_extent_dalloc(tsdn, arena, lead);
 			malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
-			chunk_record(tsdn, arena, chunk_hooks, extent_heaps,
-			    cache, ret, size + trailsize, zeroed, committed);
 			return (NULL);
 		}
-		/* Insert the trailing space as a smaller chunk. */
-		if (extent == NULL) {
-			extent = arena_extent_alloc(tsdn, arena);
-			if (extent == NULL) {
-				malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
-				chunk_record(tsdn, arena, chunk_hooks,
-				    extent_heaps, cache, ret, size + trailsize,
-				    zeroed, committed);
-				return (NULL);
-			}
-		}
-		extent_init(extent, arena, (void *)((uintptr_t)(ret) + size),
-		    trailsize, false, zeroed, committed, false);
-		if (chunk_register(tsdn, extent)) {
+		extent_heaps_insert(extent_heaps, lead);
+		arena_chunk_cache_maybe_insert(arena, lead, cache);
+	}
+
+	/* Split the trail. */
+	if (trailsize != 0) {
+		extent_t *trail = chunk_split_wrapper(tsdn, arena, chunk_hooks,
+		    extent, size, trailsize);
+		if (trail == NULL) {
 			chunk_leak(tsdn, arena, chunk_hooks, cache,
 			    extent_addr_get(extent), extent_size_get(extent));
 			arena_extent_dalloc(tsdn, arena, extent);
-		} else {
-			extent_heaps_insert(extent_heaps, extent);
-			arena_chunk_cache_maybe_insert(arena, extent, cache);
+			malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
+			return (NULL);
 		}
-		extent = NULL;
+		extent_heaps_insert(extent_heaps, trail);
+		arena_chunk_cache_maybe_insert(arena, trail, cache);
 	}
-	if (!committed && chunk_hooks->commit(ret, size, 0, size, arena->ind)) {
+
+	if (!extent_committed_get(extent) &&
+	    chunk_hooks->commit(extent_addr_get(extent),
+	    extent_size_get(extent), 0, extent_size_get(extent), arena->ind)) {
 		malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
-		chunk_record(tsdn, arena, chunk_hooks, extent_heaps, cache, ret,
-		    size, zeroed, committed);
+		chunk_record(tsdn, arena, chunk_hooks, extent_heaps, cache,
+		    extent_addr_get(extent), extent_size_get(extent),
+		    extent_zeroed_get(extent), extent_committed_get(extent));
+		arena_extent_dalloc(tsdn, arena, extent);
 		return (NULL);
 	}
+
+	extent_active_set(extent, true);
+
 	malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 
-	assert(dalloc_extent || extent != NULL);
-	if (dalloc_extent && extent != NULL)
-		arena_extent_dalloc(tsdn, arena, extent);
 	if (*zero) {
-		if (!zeroed)
-			memset(ret, 0, size);
-		else if (config_debug) {
+		if (!extent_zeroed_get(extent)) {
+			memset(extent_addr_get(extent), 0,
+			    extent_size_get(extent));
+		} else if (config_debug) {
 			size_t i;
-			size_t *p = (size_t *)(uintptr_t)ret;
+			size_t *p = (size_t *)(uintptr_t)
+			    extent_addr_get(extent);
 
 			for (i = 0; i < size / sizeof(size_t); i++)
 				assert(p[i] == 0);
 		}
 	}
-	return (ret);
+	return (extent);
 }
 
 /*
@@ -469,12 +443,11 @@ chunk_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 	return (NULL);
 }
 
-void *
+extent_t *
 chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *new_addr, size_t size, size_t alignment, bool *zero,
-    bool dalloc_extent)
+    void *new_addr, size_t size, size_t alignment, bool *zero)
 {
-	void *ret;
+	extent_t *extent;
 	bool commit;
 
 	assert(size != 0);
@@ -483,12 +456,12 @@ chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	assert((alignment & chunksize_mask) == 0);
 
 	commit = true;
-	ret = chunk_recycle(tsdn, arena, chunk_hooks, arena->chunks_cached,
-	    true, new_addr, size, alignment, zero, &commit, dalloc_extent);
-	if (ret == NULL)
+	extent = chunk_recycle(tsdn, arena, chunk_hooks, arena->chunks_cached,
+	    true, new_addr, size, alignment, zero, &commit);
+	if (extent == NULL)
 		return (NULL);
 	assert(commit);
-	return (ret);
+	return (extent);
 }
 
 static arena_t *
@@ -523,44 +496,51 @@ chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero,
 	return (ret);
 }
 
-static void *
+static extent_t *
 chunk_alloc_retained(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit)
 {
-	void *ret;
+	extent_t *extent;
 
 	assert(size != 0);
 	assert((size & chunksize_mask) == 0);
 	assert(alignment != 0);
 	assert((alignment & chunksize_mask) == 0);
 
-	ret = chunk_recycle(tsdn, arena, chunk_hooks, arena->chunks_retained,
-	    false, new_addr, size, alignment, zero, commit, true);
+	extent = chunk_recycle(tsdn, arena, chunk_hooks, arena->chunks_retained,
+	    false, new_addr, size, alignment, zero, commit);
 
-	if (config_stats && ret != NULL)
+	if (config_stats && extent != NULL)
 		arena->stats.retained -= size;
 
-	return (ret);
+	return (extent);
 }
 
-void *
+extent_t *
 chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit)
 {
-	void *ret;
+	extent_t *extent;
 
 	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
 
-	ret = chunk_alloc_retained(tsdn, arena, chunk_hooks, new_addr, size,
+	extent = chunk_alloc_retained(tsdn, arena, chunk_hooks, new_addr, size,
 	    alignment, zero, commit);
-	if (ret == NULL) {
-		ret = chunk_hooks->alloc(new_addr, size, alignment, zero,
-		    commit, arena->ind);
-		if (ret == NULL)
+	if (extent == NULL) {
+		void *chunk;
+
+		extent = arena_extent_alloc(tsdn, arena);
+		if (extent == NULL)
 			return (NULL);
+		chunk = chunk_hooks->alloc(new_addr, size, alignment,
+		    zero, commit, arena->ind);
+		if (chunk == NULL)
+			return (NULL);
+		extent_init(extent, arena, chunk, size, true, zero, commit,
+		    false);
 	}
 
-	return (ret);
+	return (extent);
 }
 
 static bool
@@ -668,7 +648,6 @@ chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 
 	chunk_record(tsdn, arena, chunk_hooks, arena->chunks_cached, true,
 	    chunk, size, false, committed);
-	arena_maybe_purge(tsdn, arena);
 }
 
 static bool
@@ -779,6 +758,67 @@ chunk_split_default(void *chunk, size_t size, size_t size_a, size_t size_b,
 	return (false);
 }
 
+extent_t *
+chunk_split_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
+  extent_t *extent, size_t size_a, size_t size_b)
+{
+	extent_t *trail;
+	rtree_elm_t *lead_elm_a, *lead_elm_b, *trail_elm_a, *trail_elm_b;
+
+	assert(CHUNK_CEILING(size_a) == size_a);
+	assert(CHUNK_CEILING(extent_size_get(extent)) == size_a +
+	    CHUNK_CEILING(size_b));
+
+	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
+
+	trail = arena_extent_alloc(tsdn, arena);
+	if (trail == NULL)
+		goto label_error_a;
+
+	{
+		extent_t lead;
+
+		extent_init(&lead, arena, extent_addr_get(extent), size_a,
+		    extent_active_get(extent), extent_zeroed_get(extent),
+		    extent_committed_get(extent), extent_slab_get(extent));
+
+		if (extent_rtree_acquire(tsdn, &lead, false, true, &lead_elm_a,
+		    &lead_elm_b))
+			goto label_error_b;
+	}
+
+	extent_init(trail, arena, (void *)((uintptr_t)extent_addr_get(extent) +
+	    size_a), CHUNK_CEILING(size_b), extent_active_get(extent),
+	    extent_zeroed_get(extent), extent_committed_get(extent),
+	    extent_slab_get(extent));
+	if (extent_rtree_acquire(tsdn, trail, false, true, &trail_elm_a,
+	    &trail_elm_b))
+		goto label_error_c;
+
+	if (chunk_hooks->split(extent_addr_get(extent), size_a +
+	    CHUNK_CEILING(size_b), size_a, CHUNK_CEILING(size_b),
+	    extent_committed_get(extent), arena->ind))
+		goto label_error_d;
+
+	extent_size_set(extent, size_a);
+
+	extent_rtree_write_acquired(tsdn, lead_elm_a, lead_elm_b, extent);
+	extent_rtree_write_acquired(tsdn, trail_elm_a, trail_elm_b, trail);
+
+	extent_rtree_release(tsdn, lead_elm_a, lead_elm_b);
+	extent_rtree_release(tsdn, trail_elm_a, trail_elm_b);
+
+	return (trail);
+label_error_d:
+	extent_rtree_release(tsdn, lead_elm_a, lead_elm_b);
+label_error_c:
+	extent_rtree_release(tsdn, lead_elm_a, lead_elm_b);
+label_error_b:
+	arena_extent_dalloc(tsdn, arena, trail);
+label_error_a:
+	return (NULL);
+}
+
 static bool
 chunk_merge_default(void *chunk_a, size_t size_a, void *chunk_b, size_t size_b,
     bool committed, unsigned arena_ind)
@@ -801,6 +841,7 @@ chunk_merge_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 {
 	rtree_elm_t *a_elm_a, *a_elm_b, *b_elm_a, *b_elm_b;
 
+	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
 	if (chunk_hooks->merge(extent_addr_get(a), extent_size_get(a),
 	    extent_addr_get(b), extent_size_get(b), extent_committed_get(a),
 	    arena->ind))
diff --git a/src/huge.c b/src/huge.c
index dc0d680e..fe4c6e00 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -16,7 +16,6 @@ void *
 huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
     bool zero)
 {
-	void *ret;
 	size_t ausize;
 	extent_t *extent;
 	bool is_zeroed;
@@ -30,12 +29,6 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 		return (NULL);
 	assert(ausize >= chunksize);
 
-	/* Allocate an extent with which to track the chunk. */
-	extent = ipallocztm(tsdn, CACHELINE_CEILING(sizeof(extent_t)),
-	    CACHELINE, false, NULL, true, arena_ichoose(tsdn, arena));
-	if (extent == NULL)
-		return (NULL);
-
 	/*
 	 * Copy zero into is_zeroed and pass the copy to chunk_alloc(), so that
 	 * it is possible to make correct junk/zero fill decisions below.
@@ -43,19 +36,17 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	is_zeroed = zero;
 	if (likely(!tsdn_null(tsdn)))
 		arena = arena_choose(tsdn_tsd(tsdn), arena);
-	if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(tsdn,
-	    arena, usize, alignment, &is_zeroed)) == NULL) {
-		idalloctm(tsdn, iealloc(tsdn, extent), extent, NULL, true,
-		    true);
+	if (unlikely(arena == NULL) || (extent = arena_chunk_alloc_huge(tsdn,
+	    arena, usize, alignment, &is_zeroed)) == NULL)
 		return (NULL);
-	}
 
-	extent_init(extent, arena, ret, usize, true, is_zeroed, true, false);
+	if (usize < extent_size_get(extent))
+		extent_size_set(extent, usize);
 
 	if (chunk_register(tsdn, extent)) {
-		arena_chunk_dalloc_huge(tsdn, arena, ret, usize);
-		idalloctm(tsdn, iealloc(tsdn, extent), extent, NULL, true,
-		    true);
+		arena_chunk_dalloc_huge(tsdn, arena, extent_addr_get(extent),
+		    usize);
+		arena_extent_dalloc(tsdn, arena, extent);
 		return (NULL);
 	}
 
@@ -67,12 +58,12 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 
 	if (zero || (config_fill && unlikely(opt_zero))) {
 		if (!is_zeroed)
-			memset(ret, 0, usize);
+			memset(extent_addr_get(extent), 0, usize);
 	} else if (config_fill && unlikely(opt_junk_alloc))
-		memset(ret, JEMALLOC_ALLOC_JUNK, usize);
+		memset(extent_addr_get(extent), JEMALLOC_ALLOC_JUNK, usize);
 
 	arena_decay_tick(tsdn, arena);
-	return (ret);
+	return (extent_addr_get(extent));
 }
 
 #ifdef JEMALLOC_JET
@@ -99,11 +90,12 @@ huge_dalloc_junk_t *huge_dalloc_junk = JEMALLOC_N(huge_dalloc_junk_impl);
 #endif
 
 static void
-huge_ralloc_no_move_similar(tsdn_t *tsdn, extent_t *extent, void *ptr,
-    size_t oldsize, size_t usize_min, size_t usize_max, bool zero)
+huge_ralloc_no_move_similar(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
+    size_t usize_max, bool zero)
 {
 	size_t usize, usize_next;
-	arena_t *arena;
+	arena_t *arena = extent_arena_get(extent);
+	size_t oldsize = extent_size_get(extent);
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 	bool pre_zeroed, post_zeroed;
 
@@ -115,20 +107,19 @@ huge_ralloc_no_move_similar(tsdn_t *tsdn, extent_t *extent, void *ptr,
 	if (oldsize == usize)
 		return;
 
-	arena = extent_arena_get(extent);
 	pre_zeroed = extent_zeroed_get(extent);
 
 	/* Fill if necessary (shrinking). */
 	if (oldsize > usize) {
 		size_t sdiff = oldsize - usize;
 		if (config_fill && unlikely(opt_junk_free)) {
-			memset((void *)((uintptr_t)ptr + usize),
-			    JEMALLOC_FREE_JUNK, sdiff);
+			memset((void *)((uintptr_t)extent_addr_get(extent) +
+			    usize), JEMALLOC_FREE_JUNK, sdiff);
 			post_zeroed = false;
 		} else {
 			post_zeroed = !chunk_purge_wrapper(tsdn, arena,
-			    &chunk_hooks, ptr, CHUNK_CEILING(oldsize), usize,
-			    sdiff);
+			    &chunk_hooks, extent_addr_get(extent),
+			    CHUNK_CEILING(oldsize), usize, sdiff);
 		}
 	} else
 		post_zeroed = pre_zeroed;
@@ -143,132 +134,157 @@ huge_ralloc_no_move_similar(tsdn_t *tsdn, extent_t *extent, void *ptr,
 	/* Update zeroed. */
 	extent_zeroed_set(extent, post_zeroed);
 
-	arena_chunk_ralloc_huge_similar(tsdn, arena, ptr, oldsize, usize);
+	arena_chunk_ralloc_huge_similar(tsdn, arena, extent, oldsize);
 
 	/* Fill if necessary (growing). */
 	if (oldsize < usize) {
 		if (zero || (config_fill && unlikely(opt_zero))) {
 			if (!pre_zeroed) {
-				memset((void *)((uintptr_t)ptr + oldsize), 0,
-				    usize - oldsize);
+				memset((void *)
+				    ((uintptr_t)extent_addr_get(extent) +
+				    oldsize), 0, usize - oldsize);
 			}
 		} else if (config_fill && unlikely(opt_junk_alloc)) {
-			memset((void *)((uintptr_t)ptr + oldsize),
-			    JEMALLOC_ALLOC_JUNK, usize - oldsize);
+			memset((void *)((uintptr_t)extent_addr_get(extent) +
+			    oldsize), JEMALLOC_ALLOC_JUNK, usize - oldsize);
 		}
 	}
 }
 
 static bool
-huge_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, void *ptr,
-    size_t oldsize, size_t usize)
+huge_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize)
 {
-	arena_t *arena;
-	chunk_hooks_t chunk_hooks;
-	size_t cdiff;
-	bool pre_zeroed, post_zeroed;
-
-	arena = extent_arena_get(extent);
-	pre_zeroed = extent_zeroed_get(extent);
-	chunk_hooks = chunk_hooks_get(tsdn, arena);
+	arena_t *arena = extent_arena_get(extent);
+	size_t oldsize = extent_size_get(extent);
+	chunk_hooks_t chunk_hooks = chunk_hooks_get(tsdn, arena);
+	size_t cdiff = CHUNK_CEILING(oldsize) - CHUNK_CEILING(usize);
+	size_t sdiff = CHUNK_CEILING(usize) - usize;
 
 	assert(oldsize > usize);
 
 	/* Split excess chunks. */
-	cdiff = CHUNK_CEILING(oldsize) - CHUNK_CEILING(usize);
-	if (cdiff != 0 && chunk_hooks.split(ptr, CHUNK_CEILING(oldsize),
-	    CHUNK_CEILING(usize), cdiff, true, arena->ind))
-		return (true);
+	if (cdiff != 0) {
+		extent_t *trail = chunk_split_wrapper(tsdn, arena, &chunk_hooks,
+		    extent, CHUNK_CEILING(usize), cdiff);
+		if (trail == NULL)
+			return (true);
 
-	if (oldsize > usize) {
-		size_t sdiff = oldsize - usize;
 		if (config_fill && unlikely(opt_junk_free)) {
-			huge_dalloc_junk(tsdn, (void *)((uintptr_t)ptr + usize),
-			    sdiff);
+			huge_dalloc_junk(tsdn, extent_addr_get(trail),
+			    extent_size_get(trail));
+		}
+
+		arena_chunk_cache_dalloc(tsdn, arena, &chunk_hooks,
+		    extent_addr_get(trail), extent_size_get(trail),
+		    extent_committed_get(trail));
+
+		arena_extent_dalloc(tsdn, arena, trail);
+	}
+
+	/* Optionally fill trailing subchunk. */
+	if (sdiff != 0) {
+		bool post_zeroed;
+
+		if (config_fill && unlikely(opt_junk_free)) {
+			huge_dalloc_junk(tsdn,
+			    (void *)((uintptr_t)extent_addr_get(extent) +
+			    usize), sdiff);
 			post_zeroed = false;
 		} else {
 			post_zeroed = !chunk_purge_wrapper(tsdn, arena,
-			    &chunk_hooks, CHUNK_ADDR2BASE((uintptr_t)ptr +
-			    usize), CHUNK_CEILING(oldsize),
-			    CHUNK_ADDR2OFFSET((uintptr_t)ptr + usize), sdiff);
+			    &chunk_hooks, extent_addr_get(extent),
+			    CHUNK_CEILING(usize), usize, sdiff);
+
+			if (config_fill && unlikely(opt_zero) && !post_zeroed) {
+				memset((void *)
+				    ((uintptr_t)extent_addr_get(extent) +
+				    usize), 0, sdiff);
+			}
 		}
-	} else
-		post_zeroed = pre_zeroed;
+		extent_zeroed_set(extent, post_zeroed);
+	}
 
-	/* Update the size of the huge allocation. */
-	chunk_deregister(tsdn, extent);
-	malloc_mutex_lock(tsdn, &arena->huge_mtx);
-	extent_size_set(extent, usize);
-	/* Update zeroed. */
-	extent_zeroed_set(extent, post_zeroed);
-	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
-	chunk_reregister(tsdn, extent);
-
-	/* Zap the excess chunks. */
-	arena_chunk_ralloc_huge_shrink(tsdn, arena, ptr, oldsize, usize);
+	arena_chunk_ralloc_huge_shrink(tsdn, arena, extent, oldsize);
 
 	return (false);
 }
 
 static bool
-huge_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, void *ptr,
-    size_t oldsize, size_t usize, bool zero)
+huge_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
+    bool zero)
 {
-	arena_t *arena;
-	bool is_zeroed_subchunk;
+	arena_t *arena = extent_arena_get(extent);
+	size_t oldsize = extent_size_get(extent);
+	bool is_zeroed_subchunk = extent_zeroed_get(extent);
+	bool is_zeroed_chunk = false;
+	chunk_hooks_t chunk_hooks = chunk_hooks_get(tsdn, arena);
+	size_t cdiff = CHUNK_CEILING(usize) - CHUNK_CEILING(oldsize);
+	void *nchunk =
+	    (void *)CHUNK_CEILING((uintptr_t)extent_past_get(extent));
+	extent_t *trail;
 
-	arena = extent_arena_get(extent);
-	malloc_mutex_lock(tsdn, &arena->huge_mtx);
-	is_zeroed_subchunk = extent_zeroed_get(extent);
-	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
+	if ((trail = arena_chunk_cache_alloc(tsdn, arena, &chunk_hooks, nchunk,
+	    cdiff, chunksize, &is_zeroed_chunk)) == NULL) {
+		bool commit = true;
+		if ((trail = chunk_alloc_wrapper(tsdn, arena, &chunk_hooks,
+		    nchunk, cdiff, chunksize, &is_zeroed_chunk, &commit)) ==
+		    NULL)
+			return (true);
+	}
 
-	if (arena_chunk_ralloc_huge_expand(tsdn, arena, extent, usize))
+	if (chunk_merge_wrapper(tsdn, arena, &chunk_hooks, extent, trail)) {
+		arena_extent_dalloc(tsdn, arena, trail);
+		chunk_dalloc_wrapper(tsdn, arena, &chunk_hooks,
+		    extent_addr_get(trail), extent_size_get(trail),
+		    extent_zeroed_get(trail), extent_committed_get(trail));
 		return (true);
+	}
 
 	if (zero || (config_fill && unlikely(opt_zero))) {
-		bool is_zeroed_chunk = extent_zeroed_get(extent);
-
 		if (!is_zeroed_subchunk) {
-			memset((void *)((uintptr_t)ptr + oldsize), 0,
-			    CHUNK_CEILING(oldsize) - oldsize);
+			memset((void *)((uintptr_t)extent_addr_get(extent) +
+			    oldsize), 0, CHUNK_CEILING(oldsize) - oldsize);
 		}
 		if (!is_zeroed_chunk) {
-			memset((void *)((uintptr_t)ptr +
+			memset((void *)((uintptr_t)extent_addr_get(extent) +
 			    CHUNK_CEILING(oldsize)), 0, usize -
 			    CHUNK_CEILING(oldsize));
 		}
 	} else if (config_fill && unlikely(opt_junk_alloc)) {
-		memset((void *)((uintptr_t)ptr + oldsize), JEMALLOC_ALLOC_JUNK,
-		    usize - oldsize);
+		memset((void *)((uintptr_t)extent_addr_get(extent) + oldsize),
+		    JEMALLOC_ALLOC_JUNK, usize - oldsize);
 	}
 
+	if (usize < extent_size_get(extent))
+		extent_size_set(extent, usize);
+
+	arena_chunk_ralloc_huge_expand(tsdn, arena, extent, oldsize);
+
 	return (false);
 }
 
 bool
-huge_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize,
-    size_t usize_min, size_t usize_max, bool zero)
+huge_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
+    size_t usize_max, bool zero)
 {
 
-	assert(s2u(oldsize) == oldsize);
+	assert(s2u(extent_size_get(extent)) == extent_size_get(extent));
 	/* The following should have been caught by callers. */
 	assert(usize_min > 0 && usize_max <= HUGE_MAXCLASS);
+	/* Both allocation sizes must be huge to avoid a move. */
+	assert(extent_size_get(extent) >= chunksize && usize_max >= chunksize);
 
-	/* Both allocations must be huge to avoid a move. */
-	if (oldsize < chunksize || usize_max < chunksize)
-		return (true);
-
-	if (CHUNK_CEILING(usize_max) > CHUNK_CEILING(oldsize)) {
+	if (CHUNK_CEILING(usize_max) > CHUNK_CEILING(extent_size_get(extent))) {
 		/* Attempt to expand the allocation in-place. */
-		if (!huge_ralloc_no_move_expand(tsdn, extent, ptr, oldsize,
-		    usize_max, zero)) {
+		if (!huge_ralloc_no_move_expand(tsdn, extent, usize_max,
+		    zero)) {
 			arena_decay_tick(tsdn, extent_arena_get(extent));
 			return (false);
 		}
 		/* Try again, this time with usize_min. */
 		if (usize_min < usize_max && CHUNK_CEILING(usize_min) >
-		    CHUNK_CEILING(oldsize) && huge_ralloc_no_move_expand(tsdn,
-		    extent, ptr, oldsize, usize_min, zero)) {
+		    CHUNK_CEILING(extent_size_get(extent)) &&
+		    huge_ralloc_no_move_expand(tsdn, extent, usize_min, zero)) {
 			arena_decay_tick(tsdn, extent_arena_get(extent));
 			return (false);
 		}
@@ -278,18 +294,18 @@ huge_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize,
 	 * Avoid moving the allocation if the existing chunk size accommodates
 	 * the new size.
 	 */
-	if (CHUNK_CEILING(oldsize) >= CHUNK_CEILING(usize_min)
-	    && CHUNK_CEILING(oldsize) <= CHUNK_CEILING(usize_max)) {
-		huge_ralloc_no_move_similar(tsdn, extent, ptr, oldsize,
-		    usize_min, usize_max, zero);
+	if (CHUNK_CEILING(extent_size_get(extent)) >= CHUNK_CEILING(usize_min)
+	    && CHUNK_CEILING(extent_size_get(extent)) <=
+	    CHUNK_CEILING(usize_max)) {
+		huge_ralloc_no_move_similar(tsdn, extent, usize_min, usize_max,
+		    zero);
 		arena_decay_tick(tsdn, extent_arena_get(extent));
 		return (false);
 	}
 
 	/* Attempt to shrink the allocation in-place. */
-	if (CHUNK_CEILING(oldsize) > CHUNK_CEILING(usize_max)) {
-		if (!huge_ralloc_no_move_shrink(tsdn, extent, ptr, oldsize,
-		    usize_max)) {
+	if (CHUNK_CEILING(extent_size_get(extent)) > CHUNK_CEILING(usize_max)) {
+		if (!huge_ralloc_no_move_shrink(tsdn, extent, usize_max)) {
 			arena_decay_tick(tsdn, extent_arena_get(extent));
 			return (false);
 		}
@@ -308,22 +324,23 @@ huge_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
 }
 
 void *
-huge_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr,
-    size_t oldsize, size_t usize, size_t alignment, bool zero, tcache_t *tcache)
+huge_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
+    size_t alignment, bool zero, tcache_t *tcache)
 {
 	void *ret;
 	size_t copysize;
 
 	/* The following should have been caught by callers. */
 	assert(usize > 0 && usize <= HUGE_MAXCLASS);
+	/* Both allocation sizes must be huge to avoid a move. */
+	assert(extent_size_get(extent) >= chunksize && usize >= chunksize);
 
 	/* Try to avoid moving the allocation. */
-	if (!huge_ralloc_no_move(tsdn, extent, ptr, oldsize, usize, usize,
-	    zero))
-		return (ptr);
+	if (!huge_ralloc_no_move(tsdn, extent, usize, usize, zero))
+		return (extent_addr_get(extent));
 
 	/*
-	 * usize and oldsize are different enough that we need to use a
+	 * usize and old size are different enough that we need to use a
 	 * different size class.  In that case, fall back to allocating new
 	 * space and copying.
 	 */
@@ -331,14 +348,16 @@ huge_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr,
 	if (ret == NULL)
 		return (NULL);
 
-	copysize = (usize < oldsize) ? usize : oldsize;
-	memcpy(ret, ptr, copysize);
-	isdalloct(tsdn, extent, ptr, oldsize, tcache, true);
+	copysize = (usize < extent_size_get(extent)) ? usize :
+	    extent_size_get(extent);
+	memcpy(ret, extent_addr_get(extent), copysize);
+	isdalloct(tsdn, extent, extent_addr_get(extent),
+	    extent_size_get(extent), tcache, true);
 	return (ret);
 }
 
 void
-huge_dalloc(tsdn_t *tsdn, extent_t *extent, void *ptr)
+huge_dalloc(tsdn_t *tsdn, extent_t *extent)
 {
 	arena_t *arena;
 
@@ -352,13 +371,13 @@ huge_dalloc(tsdn_t *tsdn, extent_t *extent, void *ptr)
 	    extent_size_get(extent));
 	arena_chunk_dalloc_huge(tsdn, extent_arena_get(extent),
 	    extent_addr_get(extent), extent_size_get(extent));
-	idalloctm(tsdn, iealloc(tsdn, extent), extent, NULL, true, true);
+	arena_extent_dalloc(tsdn, arena, extent);
 
 	arena_decay_tick(tsdn, arena);
 }
 
 size_t
-huge_salloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr)
+huge_salloc(tsdn_t *tsdn, const extent_t *extent)
 {
 	size_t size;
 	arena_t *arena;
@@ -372,13 +391,11 @@ huge_salloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr)
 }
 
 prof_tctx_t *
-huge_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, const void *ptr)
+huge_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent)
 {
 	prof_tctx_t *tctx;
 	arena_t *arena;
 
-	assert(extent == iealloc(tsdn, ptr));
-
 	arena = extent_arena_get(extent);
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	tctx = extent_prof_tctx_get(extent);
@@ -388,13 +405,10 @@ huge_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, const void *ptr)
 }
 
 void
-huge_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr,
-    prof_tctx_t *tctx)
+huge_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, prof_tctx_t *tctx)
 {
 	arena_t *arena;
 
-	assert(extent == iealloc(tsdn, ptr));
-
 	arena = extent_arena_get(extent);
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	extent_prof_tctx_set(extent, tctx);
@@ -402,8 +416,8 @@ huge_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr,
 }
 
 void
-huge_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr)
+huge_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent)
 {
 
-	huge_prof_tctx_set(tsdn, extent, ptr, (prof_tctx_t *)(uintptr_t)1U);
+	huge_prof_tctx_set(tsdn, extent, (prof_tctx_t *)(uintptr_t)1U);
 }

From 6c944708222b3f1843ad224c88ffdafa02da1bb8 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 23 May 2016 14:56:35 -0700
Subject: [PATCH 0278/2608] Refactor chunk_dalloc_{cache,wrapper}() to take
 extent arguments.

Rename arena_extent_[d]alloc() to extent_[d]alloc().

Move all chunk [de]registration responsibility into chunk.c.
---
 include/jemalloc/internal/arena.h             |   7 +-
 include/jemalloc/internal/chunk.h             |   8 +-
 include/jemalloc/internal/extent.h            |   3 +
 include/jemalloc/internal/private_symbols.txt |   7 +-
 src/arena.c                                   | 106 +++-----------
 src/chunk.c                                   | 133 +++++++++---------
 src/chunk_dss.c                               |  19 ++-
 src/chunk_mmap.c                              |   1 -
 src/extent.c                                  |  26 ++++
 src/huge.c                                    |  37 ++---
 10 files changed, 148 insertions(+), 199 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 187b6256..4e5e3029 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -478,17 +478,14 @@ extent_t	*arena_chunk_cache_alloc(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
     bool *zero);
 void	arena_chunk_cache_dalloc(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, void *chunk, size_t size, bool committed);
+    chunk_hooks_t *chunk_hooks, extent_t *extent);
 void	arena_chunk_cache_maybe_insert(arena_t *arena, extent_t *extent,
     bool cache);
 void	arena_chunk_cache_maybe_remove(arena_t *arena, extent_t *extent,
     bool cache);
-extent_t	*arena_extent_alloc(tsdn_t *tsdn, arena_t *arena);
-void	arena_extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
 extent_t	*arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena,
     size_t usize, size_t alignment, bool *zero);
-void	arena_chunk_dalloc_huge(tsdn_t *tsdn, arena_t *arena, void *chunk,
-    size_t usize);
+void	arena_chunk_dalloc_huge(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
 void	arena_chunk_ralloc_huge_similar(tsdn_t *tsdn, arena_t *arena,
     extent_t *extent, size_t oldsize);
 void	arena_chunk_ralloc_huge_shrink(tsdn_t *tsdn, arena_t *arena,
diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index 78cc4c2d..cef9fe03 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -52,9 +52,6 @@ chunk_hooks_t	chunk_hooks_get(tsdn_t *tsdn, arena_t *arena);
 chunk_hooks_t	chunk_hooks_set(tsdn_t *tsdn, arena_t *arena,
     const chunk_hooks_t *chunk_hooks);
 
-bool	chunk_register(tsdn_t *tsdn, const extent_t *extent);
-void	chunk_deregister(tsdn_t *tsdn, const extent_t *extent);
-void	chunk_reregister(tsdn_t *tsdn, const extent_t *extent);
 extent_t	*chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
     bool *zero);
@@ -62,10 +59,9 @@ extent_t	*chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
     bool *zero, bool *commit);
 void	chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, void *chunk, size_t size, bool committed);
+    chunk_hooks_t *chunk_hooks, extent_t *extent);
 void	chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, void *chunk, size_t size, bool zeroed,
-    bool committed);
+    chunk_hooks_t *chunk_hooks, extent_t *extent);
 bool	chunk_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *chunk, size_t size, size_t offset,
     size_t length);
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 82da8004..cfc908d8 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -62,6 +62,9 @@ typedef ph(extent_t) extent_heap_t;
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
+extent_t	*extent_alloc(tsdn_t *tsdn, arena_t *arena);
+void	extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
+
 #ifdef JEMALLOC_JET
 typedef size_t (extent_size_quantize_t)(size_t);
 extern extent_size_quantize_t *extent_size_quantize_floor;
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 34a6816f..8998aed9 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -37,8 +37,6 @@ arena_decay_time_get
 arena_decay_time_set
 arena_dss_prec_get
 arena_dss_prec_set
-arena_extent_alloc
-arena_extent_dalloc
 arena_get
 arena_ichoose
 arena_init
@@ -166,7 +164,6 @@ chunk_dalloc_cache
 chunk_dalloc_mmap
 chunk_dalloc_wrapper
 chunk_decommit_wrapper
-chunk_deregister
 chunk_dss_boot
 chunk_dss_postfork_child
 chunk_dss_postfork_parent
@@ -184,8 +181,6 @@ chunk_postfork_child
 chunk_postfork_parent
 chunk_prefork
 chunk_purge_wrapper
-chunk_register
-chunk_reregister
 chunk_split_wrapper
 chunks_rtree
 chunksize
@@ -214,10 +209,12 @@ extent_active_get
 extent_active_set
 extent_addr_get
 extent_addr_set
+extent_alloc
 extent_arena_get
 extent_arena_set
 extent_committed_get
 extent_committed_set
+extent_dalloc
 extent_dirty_insert
 extent_dirty_remove
 extent_init
diff --git a/src/arena.c b/src/arena.c
index a610ec15..9eb08979 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -249,23 +249,22 @@ arena_chunk_cache_alloc(tsdn_t *tsdn, arena_t *arena,
 
 static void
 arena_chunk_cache_dalloc_locked(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, void *chunk, size_t size, bool committed)
+    chunk_hooks_t *chunk_hooks, extent_t *extent)
 {
 
 	malloc_mutex_assert_owner(tsdn, &arena->lock);
 
-	chunk_dalloc_cache(tsdn, arena, chunk_hooks, chunk, size, committed);
+	chunk_dalloc_cache(tsdn, arena, chunk_hooks, extent);
 	arena_maybe_purge(tsdn, arena);
 }
 
 void
 arena_chunk_cache_dalloc(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, void *chunk, size_t size, bool committed)
+    chunk_hooks_t *chunk_hooks, extent_t *extent)
 {
 
 	malloc_mutex_lock(tsdn, &arena->lock);
-	arena_chunk_cache_dalloc_locked(tsdn, arena, chunk_hooks, chunk, size,
-	    committed);
+	arena_chunk_cache_dalloc_locked(tsdn, arena, chunk_hooks, extent);
 	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
@@ -582,33 +581,14 @@ arena_chunk_alloc_internal_hard(tsdn_t *tsdn, arena_t *arena,
 		if (chunk_commit_wrapper(tsdn, arena, chunk_hooks,
 		    extent_addr_get(extent), extent_size_get(extent), 0,
 		    map_bias << LG_PAGE)) {
-			chunk_dalloc_wrapper(tsdn, arena, chunk_hooks,
-			    extent_addr_get(extent), extent_size_get(extent),
-			    extent_zeroed_get(extent),
-			    extent_committed_get(extent));
+			chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, extent);
 			extent = NULL;
 		}
 	}
 
-	if (extent != NULL) {
+	if (extent != NULL)
 		extent_slab_set(extent, true);
 
-		if (chunk_register(tsdn, extent)) {
-			if (!*commit) {
-				/* Undo commit of header. */
-				chunk_decommit_wrapper(tsdn, arena, chunk_hooks,
-				    extent_addr_get(extent),
-				    extent_size_get(extent), 0, map_bias <<
-				    LG_PAGE);
-			}
-			chunk_dalloc_wrapper(tsdn, arena, chunk_hooks,
-			    extent_addr_get(extent), extent_size_get(extent),
-			    extent_zeroed_get(extent),
-			    extent_committed_get(extent));
-			extent = NULL;
-		}
-	}
-
 	malloc_mutex_lock(tsdn, &arena->lock);
 
 	return (extent);
@@ -625,13 +605,6 @@ arena_chunk_alloc_internal(tsdn_t *tsdn, arena_t *arena, bool *zero,
 	    chunksize, chunksize, zero);
 	if (extent != NULL) {
 		extent_slab_set(extent, true);
-
-		if (chunk_register(tsdn, extent)) {
-			arena_chunk_cache_dalloc_locked(tsdn, arena,
-			    &chunk_hooks, extent_addr_get(extent),
-			    extent_size_get(extent), true);
-			return (NULL);
-		}
 		*commit = true;
 	}
 	if (extent == NULL) {
@@ -722,14 +695,13 @@ arena_chunk_alloc(tsdn_t *tsdn, arena_t *arena)
 static void
 arena_chunk_discard(tsdn_t *tsdn, arena_t *arena, extent_t *extent)
 {
-	bool committed;
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 
-	chunk_deregister(tsdn, extent);
-
-	committed = (arena_mapbits_decommitted_get((arena_chunk_t *)
-	    extent_addr_get(extent), map_bias) == 0);
-	if (!committed) {
+	extent_committed_set(extent,
+	    (arena_mapbits_decommitted_get((arena_chunk_t *)
+	    extent_addr_get(extent), map_bias) == 0));
+	extent_slab_set(extent, false);
+	if (!extent_committed_get(extent)) {
 		/*
 		 * Decommit the header.  Mark the chunk as decommitted even if
 		 * header decommit fails, since treating a partially committed
@@ -741,15 +713,12 @@ arena_chunk_discard(tsdn_t *tsdn, arena_t *arena, extent_t *extent)
 		    map_bias << LG_PAGE);
 	}
 
-	arena_chunk_cache_dalloc_locked(tsdn, arena, &chunk_hooks,
-	    extent_addr_get(extent), extent_size_get(extent), committed);
-
 	if (config_stats) {
 		arena->stats.mapped -= extent_size_get(extent);
 		arena->stats.metadata_mapped -= (map_bias << LG_PAGE);
 	}
 
-	arena_extent_dalloc(tsdn, arena, extent);
+	arena_chunk_cache_dalloc_locked(tsdn, arena, &chunk_hooks, extent);
 }
 
 static void
@@ -852,32 +821,6 @@ arena_huge_ralloc_stats_update(arena_t *arena, size_t oldsize, size_t usize)
 	arena_huge_malloc_stats_update(arena, usize);
 }
 
-extent_t *
-arena_extent_alloc(tsdn_t *tsdn, arena_t *arena)
-{
-	extent_t *extent;
-
-	malloc_mutex_lock(tsdn, &arena->extent_cache_mtx);
-	extent = ql_last(&arena->extent_cache, ql_link);
-	if (extent == NULL) {
-		malloc_mutex_unlock(tsdn, &arena->extent_cache_mtx);
-		return (base_alloc(tsdn, sizeof(extent_t)));
-	}
-	ql_tail_remove(&arena->extent_cache, extent_t, ql_link);
-	malloc_mutex_unlock(tsdn, &arena->extent_cache_mtx);
-	return (extent);
-}
-
-void
-arena_extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent)
-{
-
-	malloc_mutex_lock(tsdn, &arena->extent_cache_mtx);
-	ql_elm_new(extent, ql_link);
-	ql_tail_insert(&arena->extent_cache, extent, ql_link);
-	malloc_mutex_unlock(tsdn, &arena->extent_cache_mtx);
-}
-
 static extent_t *
 arena_chunk_alloc_huge_hard(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, size_t usize, size_t alignment, bool *zero,
@@ -931,21 +874,21 @@ arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize,
 }
 
 void
-arena_chunk_dalloc_huge(tsdn_t *tsdn, arena_t *arena, void *chunk, size_t usize)
+arena_chunk_dalloc_huge(tsdn_t *tsdn, arena_t *arena, extent_t *extent)
 {
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
-	size_t csize;
 
-	csize = CHUNK_CEILING(usize);
 	malloc_mutex_lock(tsdn, &arena->lock);
 	if (config_stats) {
-		arena_huge_dalloc_stats_update(arena, usize);
-		arena->stats.mapped -= usize;
+		arena_huge_dalloc_stats_update(arena, extent_size_get(extent));
+		arena->stats.mapped -= extent_size_get(extent);
 	}
-	arena_nactive_sub(arena, usize >> LG_PAGE);
+	arena_nactive_sub(arena, extent_size_get(extent) >> LG_PAGE);
 
-	arena_chunk_cache_dalloc_locked(tsdn, arena, &chunk_hooks, chunk, csize,
-	    true);
+	if ((extent_size_get(extent) & chunksize_mask) != 0)
+		extent_size_set(extent, CHUNK_CEILING(extent_size_get(extent)));
+
+	arena_chunk_cache_dalloc_locked(tsdn, arena, &chunk_hooks, extent);
 	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
@@ -1656,15 +1599,10 @@ arena_unstash_purged(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		rdelm_next = qr_next(rdelm, rd_link);
 		if (rdelm == &chunkselm->rd) {
 			extent_t *chunkselm_next = qr_next(chunkselm, cc_link);
-			void *addr = extent_addr_get(chunkselm);
-			size_t size = extent_size_get(chunkselm);
-			bool zeroed = extent_zeroed_get(chunkselm);
-			bool committed = extent_committed_get(chunkselm);
 			extent_dirty_remove(chunkselm);
-			arena_extent_dalloc(tsdn, arena, chunkselm);
+			chunk_dalloc_wrapper(tsdn, arena, chunk_hooks,
+			    chunkselm);
 			chunkselm = chunkselm_next;
-			chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, addr,
-			    size, zeroed, committed);
 		} else {
 			extent_t *extent = iealloc(tsdn, rdelm);
 			arena_chunk_t *chunk =
diff --git a/src/chunk.c b/src/chunk.c
index 4efba4a7..d0763212 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -51,7 +51,7 @@ const chunk_hooks_t	chunk_hooks_default = {
 
 static void	chunk_record(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, extent_heap_t extent_heaps[NPSIZES], bool cache,
-    void *chunk, size_t size, bool zeroed, bool committed);
+    extent_t *extent);
 
 /******************************************************************************/
 
@@ -203,7 +203,7 @@ extent_rtree_release(tsdn_t *tsdn, rtree_elm_t *elm_a, rtree_elm_t *elm_b)
 		rtree_elm_release(tsdn, &chunks_rtree, elm_b);
 }
 
-bool
+static bool
 chunk_register(tsdn_t *tsdn, const extent_t *extent)
 {
 	rtree_elm_t *elm_a, *elm_b;
@@ -232,7 +232,7 @@ chunk_register(tsdn_t *tsdn, const extent_t *extent)
 	return (false);
 }
 
-void
+static void
 chunk_deregister(tsdn_t *tsdn, const extent_t *extent)
 {
 	rtree_elm_t *elm_a, *elm_b;
@@ -249,15 +249,6 @@ chunk_deregister(tsdn_t *tsdn, const extent_t *extent)
 	}
 }
 
-void
-chunk_reregister(tsdn_t *tsdn, const extent_t *extent)
-{
-	bool err;
-
-	err = chunk_register(tsdn, extent);
-	assert(!err);
-}
-
 /*
  * Do first-best-fit chunk selection, i.e. select the lowest chunk that best
  * fits.
@@ -282,7 +273,7 @@ chunk_first_best_fit(arena_t *arena, extent_heap_t extent_heaps[NPSIZES],
 
 static void
 chunk_leak(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, bool cache,
-    void *addr, size_t size)
+    extent_t *extent)
 {
 
 	/*
@@ -290,9 +281,11 @@ chunk_leak(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, bool cache,
 	 * that this is only a virtual memory leak.
 	 */
 	if (cache) {
-		chunk_purge_wrapper(tsdn, arena, chunk_hooks, addr, size, 0,
-		    size);
+		chunk_purge_wrapper(tsdn, arena, chunk_hooks,
+		    extent_addr_get(extent), extent_size_get(extent), 0,
+		    extent_size_get(extent));
 	}
+	extent_dalloc(tsdn, arena, extent);
 }
 
 static extent_t *
@@ -351,9 +344,7 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		extent = chunk_split_wrapper(tsdn, arena, chunk_hooks, lead,
 		    leadsize, size + trailsize);
 		if (extent == NULL) {
-			chunk_leak(tsdn, arena, chunk_hooks, cache,
-			    extent_addr_get(lead), extent_size_get(lead));
-			arena_extent_dalloc(tsdn, arena, lead);
+			chunk_leak(tsdn, arena, chunk_hooks, cache, lead);
 			malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 			return (NULL);
 		}
@@ -366,9 +357,7 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		extent_t *trail = chunk_split_wrapper(tsdn, arena, chunk_hooks,
 		    extent, size, trailsize);
 		if (trail == NULL) {
-			chunk_leak(tsdn, arena, chunk_hooks, cache,
-			    extent_addr_get(extent), extent_size_get(extent));
-			arena_extent_dalloc(tsdn, arena, extent);
+			chunk_leak(tsdn, arena, chunk_hooks, cache, extent);
 			malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 			return (NULL);
 		}
@@ -381,9 +370,7 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	    extent_size_get(extent), 0, extent_size_get(extent), arena->ind)) {
 		malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 		chunk_record(tsdn, arena, chunk_hooks, extent_heaps, cache,
-		    extent_addr_get(extent), extent_size_get(extent),
-		    extent_zeroed_get(extent), extent_committed_get(extent));
-		arena_extent_dalloc(tsdn, arena, extent);
+		    extent);
 		return (NULL);
 	}
 
@@ -529,7 +516,7 @@ chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	if (extent == NULL) {
 		void *chunk;
 
-		extent = arena_extent_alloc(tsdn, arena);
+		extent = extent_alloc(tsdn, arena);
 		if (extent == NULL)
 			return (NULL);
 		chunk = chunk_hooks->alloc(new_addr, size, alignment,
@@ -540,6 +527,11 @@ chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		    false);
 	}
 
+	if (chunk_register(tsdn, extent)) {
+		chunk_leak(tsdn, arena, chunk_hooks, false, extent);
+		return (NULL);
+	}
+
 	return (extent);
 }
 
@@ -590,29 +582,21 @@ chunk_try_coalesce(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 
 static void
 chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
-    extent_heap_t extent_heaps[NPSIZES], bool cache, void *chunk, size_t size,
-    bool zeroed, bool committed)
+    extent_heap_t extent_heaps[NPSIZES], bool cache, extent_t *extent)
 {
-	extent_t *extent, *prev, *next;
+	extent_t *prev, *next;
 
-	assert(!cache || !zeroed);
+	assert(!cache || !extent_zeroed_get(extent));
 
 	malloc_mutex_lock(tsdn, &arena->chunks_mtx);
 	chunk_hooks_assure_initialized_locked(tsdn, arena, chunk_hooks);
 
-	/* Create/initialize/insert extent. */
-	extent = arena_extent_alloc(tsdn, arena);
-	if (extent == NULL) {
-		chunk_leak(tsdn, arena, chunk_hooks, cache, chunk, size);
-		goto label_return;
-	}
-	extent_init(extent, arena, chunk, size, false, !cache && zeroed,
-	    committed, false);
-	if (chunk_register(tsdn, extent)) {
-		arena_extent_dalloc(tsdn, arena, extent);
-		chunk_leak(tsdn, arena, chunk_hooks, cache, chunk, size);
-		goto label_return;
-	}
+	assert((extent_size_get(extent) & chunksize_mask) == 0);
+	extent_active_set(extent, false);
+	extent_zeroed_set(extent, !cache && extent_zeroed_get(extent));
+	extent_slab_set(extent, false);
+
+	assert(chunk_lookup(tsdn, extent_addr_get(extent), true) == extent);
 	extent_heaps_insert(extent_heaps, extent);
 	arena_chunk_cache_maybe_insert(arena, extent, cache);
 
@@ -632,22 +616,24 @@ chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		    extent_heaps, cache);
 	}
 
-label_return:
 	malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 }
 
 void
 chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *chunk, size_t size, bool committed)
+    extent_t *extent)
 {
 
-	assert(chunk != NULL);
-	assert(CHUNK_ADDR2BASE(chunk) == chunk);
-	assert(size != 0);
-	assert((size & chunksize_mask) == 0);
+	assert(extent_addr_get(extent) != NULL);
+	assert(CHUNK_ADDR2BASE(extent_addr_get(extent)) ==
+	    extent_addr_get(extent));
+	assert(extent_size_get(extent) != 0);
+	assert((extent_size_get(extent) & chunksize_mask) == 0);
+
+	extent_zeroed_set(extent, false);
 
 	chunk_record(tsdn, arena, chunk_hooks, arena->chunks_cached, true,
-	    chunk, size, false, committed);
+	    extent);
 }
 
 static bool
@@ -662,30 +648,40 @@ chunk_dalloc_default(void *chunk, size_t size, bool committed,
 
 void
 chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *chunk, size_t size, bool zeroed, bool committed)
+    extent_t *extent)
 {
 
-	assert(chunk != NULL);
-	assert(CHUNK_ADDR2BASE(chunk) == chunk);
-	assert(size != 0);
-	assert((size & chunksize_mask) == 0);
+	assert(extent_addr_get(extent) != NULL);
+	assert(CHUNK_ADDR2BASE(extent_addr_get(extent)) ==
+	    extent_addr_get(extent));
+	assert(extent_size_get(extent) != 0);
+	assert((extent_size_get(extent) & chunksize_mask) == 0);
 
 	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
 	/* Try to deallocate. */
-	if (!chunk_hooks->dalloc(chunk, size, committed, arena->ind))
+	if (!chunk_hooks->dalloc(extent_addr_get(extent),
+	    extent_size_get(extent), extent_committed_get(extent),
+	    arena->ind)) {
+		chunk_deregister(tsdn, extent);
+		extent_dalloc(tsdn, arena, extent);
 		return;
-	/* Try to decommit; purge if that fails. */
-	if (committed) {
-		committed = chunk_hooks->decommit(chunk, size, 0, size,
-		    arena->ind);
 	}
-	zeroed = !committed || !chunk_hooks->purge(chunk, size, 0, size,
-	    arena->ind);
-	chunk_record(tsdn, arena, chunk_hooks, arena->chunks_retained, false,
-	    chunk, size, zeroed, committed);
+	/* Try to decommit; purge if that fails. */
+	if (extent_committed_get(extent)) {
+		extent_committed_set(extent,
+		    chunk_hooks->decommit(extent_addr_get(extent),
+		    extent_size_get(extent), 0, extent_size_get(extent),
+		    arena->ind));
+	}
+	extent_zeroed_set(extent, !extent_committed_get(extent) ||
+	    !chunk_hooks->purge(extent_addr_get(extent),
+	    extent_size_get(extent), 0, extent_size_get(extent), arena->ind));
 
 	if (config_stats)
-		arena->stats.retained += size;
+		arena->stats.retained += extent_size_get(extent);
+
+	chunk_record(tsdn, arena, chunk_hooks, arena->chunks_retained, false,
+	    extent);
 }
 
 static bool
@@ -771,7 +767,7 @@ chunk_split_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 
 	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
 
-	trail = arena_extent_alloc(tsdn, arena);
+	trail = extent_alloc(tsdn, arena);
 	if (trail == NULL)
 		goto label_error_a;
 
@@ -814,7 +810,7 @@ label_error_d:
 label_error_c:
 	extent_rtree_release(tsdn, lead_elm_a, lead_elm_b);
 label_error_b:
-	arena_extent_dalloc(tsdn, arena, trail);
+	extent_dalloc(tsdn, arena, trail);
 label_error_a:
 	return (NULL);
 }
@@ -841,6 +837,9 @@ chunk_merge_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 {
 	rtree_elm_t *a_elm_a, *a_elm_b, *b_elm_a, *b_elm_b;
 
+	assert((extent_size_get(a) & chunksize_mask) == 0);
+	assert((extent_size_get(b) & chunksize_mask) == 0);
+
 	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
 	if (chunk_hooks->merge(extent_addr_get(a), extent_size_get(a),
 	    extent_addr_get(b), extent_size_get(b), extent_committed_get(a),
@@ -871,7 +870,7 @@ chunk_merge_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	extent_rtree_write_acquired(tsdn, a_elm_a, b_elm_b, a);
 	extent_rtree_release(tsdn, a_elm_a, b_elm_b);
 
-	arena_extent_dalloc(tsdn, extent_arena_get(b), b);
+	extent_dalloc(tsdn, extent_arena_get(b), b);
 
 	return (false);
 }
diff --git a/src/chunk_dss.c b/src/chunk_dss.c
index d42aeb0b..251f17b8 100644
--- a/src/chunk_dss.c
+++ b/src/chunk_dss.c
@@ -89,7 +89,8 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 		 * malloc.
 		 */
 		do {
-			void *ret, *cpad, *dss_next;
+			void *ret, *cpad_addr, *dss_next;
+			extent_t *cpad;
 			size_t gap_size, cpad_size;
 			intptr_t incr;
 			/* Avoid an unnecessary system call. */
@@ -114,10 +115,19 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			 * necessary to satisfy alignment.  This space can be
 			 * recycled for later use.
 			 */
-			cpad = (void *)((uintptr_t)dss_max + gap_size);
+			cpad_addr = (void *)((uintptr_t)dss_max + gap_size);
 			ret = (void *)ALIGNMENT_CEILING((uintptr_t)dss_max,
 			    alignment);
-			cpad_size = (uintptr_t)ret - (uintptr_t)cpad;
+			cpad_size = (uintptr_t)ret - (uintptr_t)cpad_addr;
+			if (cpad_size != 0) {
+				cpad = extent_alloc(tsdn, arena);
+				if (cpad == NULL) {
+					malloc_mutex_unlock(tsdn, &dss_mtx);
+					return (NULL);
+				}
+				extent_init(cpad, arena, cpad_addr, cpad_size,
+				    false, false, true, false);
+			}
 			dss_next = (void *)((uintptr_t)ret + size);
 			if ((uintptr_t)ret < (uintptr_t)dss_max ||
 			    (uintptr_t)dss_next < (uintptr_t)dss_max) {
@@ -135,8 +145,7 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					chunk_hooks_t chunk_hooks =
 					    CHUNK_HOOKS_INITIALIZER;
 					chunk_dalloc_wrapper(tsdn, arena,
-					    &chunk_hooks, cpad, cpad_size,
-					    false, true);
+					    &chunk_hooks, cpad);
 				}
 				if (*zero)
 					memset(ret, 0, size);
diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c
index f95ae756..13708027 100644
--- a/src/chunk_mmap.c
+++ b/src/chunk_mmap.c
@@ -73,6 +73,5 @@ chunk_dalloc_mmap(void *chunk, size_t size)
 
 	if (config_munmap)
 		pages_unmap(chunk, size);
-
 	return (!config_munmap);
 }
diff --git a/src/extent.c b/src/extent.c
index 4757f750..d7f3b6cc 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -3,6 +3,32 @@
 
 /******************************************************************************/
 
+extent_t *
+extent_alloc(tsdn_t *tsdn, arena_t *arena)
+{
+	extent_t *extent;
+
+	malloc_mutex_lock(tsdn, &arena->extent_cache_mtx);
+	extent = ql_last(&arena->extent_cache, ql_link);
+	if (extent == NULL) {
+		malloc_mutex_unlock(tsdn, &arena->extent_cache_mtx);
+		return (base_alloc(tsdn, sizeof(extent_t)));
+	}
+	ql_tail_remove(&arena->extent_cache, extent_t, ql_link);
+	malloc_mutex_unlock(tsdn, &arena->extent_cache_mtx);
+	return (extent);
+}
+
+void
+extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent)
+{
+
+	malloc_mutex_lock(tsdn, &arena->extent_cache_mtx);
+	ql_elm_new(extent, ql_link);
+	ql_tail_insert(&arena->extent_cache, extent, ql_link);
+	malloc_mutex_unlock(tsdn, &arena->extent_cache_mtx);
+}
+
 #ifdef JEMALLOC_JET
 #undef extent_size_quantize_floor
 #define	extent_size_quantize_floor JEMALLOC_N(n_extent_size_quantize_floor)
diff --git a/src/huge.c b/src/huge.c
index fe4c6e00..880e4333 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -43,13 +43,6 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	if (usize < extent_size_get(extent))
 		extent_size_set(extent, usize);
 
-	if (chunk_register(tsdn, extent)) {
-		arena_chunk_dalloc_huge(tsdn, arena, extent_addr_get(extent),
-		    usize);
-		arena_extent_dalloc(tsdn, arena, extent);
-		return (NULL);
-	}
-
 	/* Insert extent into huge. */
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	ql_elm_new(extent, ql_link);
@@ -57,10 +50,14 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	if (zero || (config_fill && unlikely(opt_zero))) {
-		if (!is_zeroed)
-			memset(extent_addr_get(extent), 0, usize);
-	} else if (config_fill && unlikely(opt_junk_alloc))
-		memset(extent_addr_get(extent), JEMALLOC_ALLOC_JUNK, usize);
+		if (!is_zeroed) {
+			memset(extent_addr_get(extent), 0,
+			    extent_size_get(extent));
+		}
+	} else if (config_fill && unlikely(opt_junk_alloc)) {
+		memset(extent_addr_get(extent), JEMALLOC_ALLOC_JUNK,
+		    extent_size_get(extent));
+	}
 
 	arena_decay_tick(tsdn, arena);
 	return (extent_addr_get(extent));
@@ -126,11 +123,9 @@ huge_ralloc_no_move_similar(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
 
 	/* Update the size of the huge allocation. */
 	assert(extent_size_get(extent) != usize);
-	chunk_deregister(tsdn, extent);
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	extent_size_set(extent, usize);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
-	chunk_reregister(tsdn, extent);
 	/* Update zeroed. */
 	extent_zeroed_set(extent, post_zeroed);
 
@@ -174,11 +169,7 @@ huge_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize)
 			    extent_size_get(trail));
 		}
 
-		arena_chunk_cache_dalloc(tsdn, arena, &chunk_hooks,
-		    extent_addr_get(trail), extent_size_get(trail),
-		    extent_committed_get(trail));
-
-		arena_extent_dalloc(tsdn, arena, trail);
+		arena_chunk_cache_dalloc(tsdn, arena, &chunk_hooks, trail);
 	}
 
 	/* Optionally fill trailing subchunk. */
@@ -233,10 +224,7 @@ huge_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 	}
 
 	if (chunk_merge_wrapper(tsdn, arena, &chunk_hooks, extent, trail)) {
-		arena_extent_dalloc(tsdn, arena, trail);
-		chunk_dalloc_wrapper(tsdn, arena, &chunk_hooks,
-		    extent_addr_get(trail), extent_size_get(trail),
-		    extent_zeroed_get(trail), extent_committed_get(trail));
+		chunk_dalloc_wrapper(tsdn, arena, &chunk_hooks, trail);
 		return (true);
 	}
 
@@ -362,16 +350,13 @@ huge_dalloc(tsdn_t *tsdn, extent_t *extent)
 	arena_t *arena;
 
 	arena = extent_arena_get(extent);
-	chunk_deregister(tsdn, extent);
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	ql_remove(&arena->huge, extent, ql_link);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	huge_dalloc_junk(tsdn, extent_addr_get(extent),
 	    extent_size_get(extent));
-	arena_chunk_dalloc_huge(tsdn, extent_arena_get(extent),
-	    extent_addr_get(extent), extent_size_get(extent));
-	arena_extent_dalloc(tsdn, arena, extent);
+	arena_chunk_dalloc_huge(tsdn, extent_arena_get(extent), extent);
 
 	arena_decay_tick(tsdn, arena);
 }

From 0eb6f08959700428f3ae0df1d8ab1cd7bd4c82bc Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 23 May 2016 15:04:40 -0700
Subject: [PATCH 0279/2608] Refactor chunk_[de]commit_wrapper() to take extent
 arguments.

---
 include/jemalloc/internal/chunk.h |  6 ++----
 src/arena.c                       | 14 ++++++--------
 src/chunk.c                       | 10 ++++++----
 3 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index cef9fe03..085b43c0 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -63,11 +63,9 @@ void	chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena,
 void	chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, extent_t *extent);
 bool	chunk_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, void *chunk, size_t size, size_t offset,
-    size_t length);
+    chunk_hooks_t *chunk_hooks, extent_t *extent, size_t offset, size_t length);
 bool	chunk_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, void *chunk, size_t size, size_t offset,
-    size_t length);
+    chunk_hooks_t *chunk_hooks, extent_t *extent, size_t offset, size_t length);
 bool	chunk_purge_wrapper(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *chunk, size_t size, size_t offset,
     size_t length);
diff --git a/src/arena.c b/src/arena.c
index 9eb08979..24793549 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -440,7 +440,7 @@ arena_run_split_large_helper(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 	assert(need_pages > 0);
 
 	if (flag_decommitted != 0 && chunk_commit_wrapper(tsdn, arena,
-	    &arena->chunk_hooks, chunk, chunksize, run_ind << LG_PAGE, size))
+	    &arena->chunk_hooks, extent, run_ind << LG_PAGE, size))
 		return (true);
 
 	if (remove) {
@@ -523,7 +523,7 @@ arena_run_split_small(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 	assert(need_pages > 0);
 
 	if (flag_decommitted != 0 && chunk_commit_wrapper(tsdn, arena,
-	    &arena->chunk_hooks, chunk, chunksize, run_ind << LG_PAGE, size))
+	    &arena->chunk_hooks, extent, run_ind << LG_PAGE, size))
 		return (true);
 
 	arena_run_split_remove(arena, chunk, run_ind, flag_dirty,
@@ -578,8 +578,7 @@ arena_chunk_alloc_internal_hard(tsdn_t *tsdn, arena_t *arena,
 	    chunksize, zero, commit);
 	if (extent != NULL && !*commit) {
 		/* Commit header. */
-		if (chunk_commit_wrapper(tsdn, arena, chunk_hooks,
-		    extent_addr_get(extent), extent_size_get(extent), 0,
+		if (chunk_commit_wrapper(tsdn, arena, chunk_hooks, extent, 0,
 		    map_bias << LG_PAGE)) {
 			chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, extent);
 			extent = NULL;
@@ -708,8 +707,7 @@ arena_chunk_discard(tsdn_t *tsdn, arena_t *arena, extent_t *extent)
 		 * chunk as committed has a high potential for causing later
 		 * access of decommitted memory.
 		 */
-		chunk_decommit_wrapper(tsdn, arena, &chunk_hooks,
-		    extent_addr_get(extent), extent_size_get(extent), 0,
+		chunk_decommit_wrapper(tsdn, arena, &chunk_hooks, extent, 0,
 		    map_bias << LG_PAGE);
 	}
 
@@ -1537,8 +1535,8 @@ arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			assert(!arena_mapbits_decommitted_get(chunk,
 			    pageind+npages-1));
 			decommitted = !chunk_decommit_wrapper(tsdn, arena,
-			    chunk_hooks, chunk, chunksize, pageind << LG_PAGE,
-			    npages << LG_PAGE);
+			    chunk_hooks, extent, pageind << LG_PAGE, npages <<
+			    LG_PAGE);
 			if (decommitted) {
 				flag_unzeroed = 0;
 				flags = CHUNK_MAP_DECOMMITTED;
diff --git a/src/chunk.c b/src/chunk.c
index d0763212..363ce8f7 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -695,11 +695,12 @@ chunk_commit_default(void *chunk, size_t size, size_t offset, size_t length,
 
 bool
 chunk_commit_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *chunk, size_t size, size_t offset, size_t length)
+    extent_t *extent, size_t offset, size_t length)
 {
 
 	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
-	return (chunk_hooks->commit(chunk, size, offset, length, arena->ind));
+	return (chunk_hooks->commit(extent_addr_get(extent),
+	    extent_size_get(extent), offset, length, arena->ind));
 }
 
 static bool
@@ -713,11 +714,12 @@ chunk_decommit_default(void *chunk, size_t size, size_t offset, size_t length,
 
 bool
 chunk_decommit_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *chunk, size_t size, size_t offset, size_t length)
+    extent_t *extent, size_t offset, size_t length)
 {
 
 	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
-	return (chunk_hooks->decommit(chunk, size, offset, length, arena->ind));
+	return (chunk_hooks->decommit(extent_addr_get(extent),
+	    extent_size_get(extent), offset, length, arena->ind));
 }
 
 static bool

From 5c6be2bdd33d1eb9d544f46f128ba93d05a01492 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 23 May 2016 15:10:25 -0700
Subject: [PATCH 0280/2608] Refactor chunk_purge_wrapper() to take extent
 argument.

---
 include/jemalloc/internal/chunk.h | 3 +--
 src/arena.c                       | 4 ++--
 src/chunk.c                       | 9 +++++----
 src/huge.c                        | 6 ++----
 4 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index 085b43c0..5f5629eb 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -67,8 +67,7 @@ bool	chunk_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
 bool	chunk_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, extent_t *extent, size_t offset, size_t length);
 bool	chunk_purge_wrapper(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, void *chunk, size_t size, size_t offset,
-    size_t length);
+    chunk_hooks_t *chunk_hooks, extent_t *extent, size_t offset, size_t length);
 extent_t	*chunk_split_wrapper(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, extent_t *extent, size_t size_a, size_t size_b);
 bool	chunk_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
diff --git a/src/arena.c b/src/arena.c
index 24793549..de3024da 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1542,8 +1542,8 @@ arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 				flags = CHUNK_MAP_DECOMMITTED;
 			} else {
 				flag_unzeroed = chunk_purge_wrapper(tsdn, arena,
-				    chunk_hooks, chunk, chunksize, pageind <<
-				    LG_PAGE, run_size) ? CHUNK_MAP_UNZEROED : 0;
+				    chunk_hooks, extent, pageind << LG_PAGE,
+				    run_size) ? CHUNK_MAP_UNZEROED : 0;
 				flags = flag_unzeroed;
 			}
 			arena_mapbits_large_set(chunk, pageind+npages-1, 0,
diff --git a/src/chunk.c b/src/chunk.c
index 363ce8f7..d20c2dfb 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -281,8 +281,7 @@ chunk_leak(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, bool cache,
 	 * that this is only a virtual memory leak.
 	 */
 	if (cache) {
-		chunk_purge_wrapper(tsdn, arena, chunk_hooks,
-		    extent_addr_get(extent), extent_size_get(extent), 0,
+		chunk_purge_wrapper(tsdn, arena, chunk_hooks, extent, 0,
 		    extent_size_get(extent));
 	}
 	extent_dalloc(tsdn, arena, extent);
@@ -739,11 +738,13 @@ chunk_purge_default(void *chunk, size_t size, size_t offset, size_t length,
 
 bool
 chunk_purge_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *chunk, size_t size, size_t offset, size_t length)
+    extent_t *extent, size_t offset, size_t length)
 {
 
 	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
-	return (chunk_hooks->purge(chunk, size, offset, length, arena->ind));
+	return (chunk_hooks->purge(extent_addr_get(extent),
+	    CHUNK_CEILING(extent_size_get(extent)), offset, length,
+	    arena->ind));
 }
 
 static bool
diff --git a/src/huge.c b/src/huge.c
index 880e4333..70383144 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -115,8 +115,7 @@ huge_ralloc_no_move_similar(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
 			post_zeroed = false;
 		} else {
 			post_zeroed = !chunk_purge_wrapper(tsdn, arena,
-			    &chunk_hooks, extent_addr_get(extent),
-			    CHUNK_CEILING(oldsize), usize, sdiff);
+			    &chunk_hooks, extent, usize, sdiff);
 		}
 	} else
 		post_zeroed = pre_zeroed;
@@ -183,8 +182,7 @@ huge_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize)
 			post_zeroed = false;
 		} else {
 			post_zeroed = !chunk_purge_wrapper(tsdn, arena,
-			    &chunk_hooks, extent_addr_get(extent),
-			    CHUNK_CEILING(usize), usize, sdiff);
+			    &chunk_hooks, extent, usize, sdiff);
 
 			if (config_fill && unlikely(opt_zero) && !post_zeroed) {
 				memset((void *)

From 47613afc34750016fdc95aaaee37c3be11f272b9 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 24 May 2016 18:22:10 -0700
Subject: [PATCH 0281/2608] Convert rtree from per chunk to per page.

Refactor [de]registration to maintain interior rtree entries for slabs.
---
 include/jemalloc/internal/chunk.h  |   4 +-
 include/jemalloc/internal/extent.h |   2 -
 src/arena.c                        |  28 +++-----
 src/chunk.c                        | 108 +++++++++++++++++++++--------
 src/huge.c                         |   4 +-
 5 files changed, 94 insertions(+), 52 deletions(-)

diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index 5f5629eb..d1137387 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -54,10 +54,10 @@ chunk_hooks_t	chunk_hooks_set(tsdn_t *tsdn, arena_t *arena,
 
 extent_t	*chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
-    bool *zero);
+    bool *zero, bool slab);
 extent_t	*chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
-    bool *zero, bool *commit);
+    bool *zero, bool *commit, bool slab);
 void	chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, extent_t *extent);
 void	chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index cfc908d8..775f89bb 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -146,7 +146,6 @@ JEMALLOC_INLINE bool
 extent_retained_get(const extent_t *extent)
 {
 
-	assert(!extent->e_slab);
 	return (qr_next(&extent->rd, rd_link) == &extent->rd);
 }
 
@@ -161,7 +160,6 @@ JEMALLOC_INLINE bool
 extent_committed_get(const extent_t *extent)
 {
 
-	assert(!extent->e_slab);
 	return (extent->e_committed);
 }
 
diff --git a/src/arena.c b/src/arena.c
index de3024da..17cf973a 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -223,13 +223,13 @@ arena_chunk_dirty_npages(const extent_t *extent)
 static extent_t *
 arena_chunk_cache_alloc_locked(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
-    bool *zero)
+    bool *zero, bool slab)
 {
 
 	malloc_mutex_assert_owner(tsdn, &arena->lock);
 
 	return (chunk_alloc_cache(tsdn, arena, chunk_hooks, new_addr, size,
-	    alignment, zero));
+	    alignment, zero, slab));
 }
 
 extent_t *
@@ -241,7 +241,7 @@ arena_chunk_cache_alloc(tsdn_t *tsdn, arena_t *arena,
 
 	malloc_mutex_lock(tsdn, &arena->lock);
 	extent = arena_chunk_cache_alloc_locked(tsdn, arena, chunk_hooks,
-	    new_addr, size, alignment, zero);
+	    new_addr, size, alignment, zero, false);
 	malloc_mutex_unlock(tsdn, &arena->lock);
 
 	return (extent);
@@ -575,7 +575,7 @@ arena_chunk_alloc_internal_hard(tsdn_t *tsdn, arena_t *arena,
 	malloc_mutex_unlock(tsdn, &arena->lock);
 
 	extent = chunk_alloc_wrapper(tsdn, arena, chunk_hooks, NULL, chunksize,
-	    chunksize, zero, commit);
+	    chunksize, zero, commit, true);
 	if (extent != NULL && !*commit) {
 		/* Commit header. */
 		if (chunk_commit_wrapper(tsdn, arena, chunk_hooks, extent, 0,
@@ -585,9 +585,6 @@ arena_chunk_alloc_internal_hard(tsdn_t *tsdn, arena_t *arena,
 		}
 	}
 
-	if (extent != NULL)
-		extent_slab_set(extent, true);
-
 	malloc_mutex_lock(tsdn, &arena->lock);
 
 	return (extent);
@@ -601,11 +598,9 @@ arena_chunk_alloc_internal(tsdn_t *tsdn, arena_t *arena, bool *zero,
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 
 	extent = arena_chunk_cache_alloc_locked(tsdn, arena, &chunk_hooks, NULL,
-	    chunksize, chunksize, zero);
-	if (extent != NULL) {
-		extent_slab_set(extent, true);
+	    chunksize, chunksize, zero, true);
+	if (extent != NULL)
 		*commit = true;
-	}
 	if (extent == NULL) {
 		extent = arena_chunk_alloc_internal_hard(tsdn, arena,
 		    &chunk_hooks, zero, commit);
@@ -699,7 +694,6 @@ arena_chunk_discard(tsdn_t *tsdn, arena_t *arena, extent_t *extent)
 	extent_committed_set(extent,
 	    (arena_mapbits_decommitted_get((arena_chunk_t *)
 	    extent_addr_get(extent), map_bias) == 0));
-	extent_slab_set(extent, false);
 	if (!extent_committed_get(extent)) {
 		/*
 		 * Decommit the header.  Mark the chunk as decommitted even if
@@ -828,7 +822,7 @@ arena_chunk_alloc_huge_hard(tsdn_t *tsdn, arena_t *arena,
 	bool commit = true;
 
 	extent = chunk_alloc_wrapper(tsdn, arena, chunk_hooks, NULL, csize,
-	    alignment, zero, &commit);
+	    alignment, zero, &commit, false);
 	if (extent == NULL) {
 		/* Revert optimistic stats updates. */
 		malloc_mutex_lock(tsdn, &arena->lock);
@@ -861,7 +855,7 @@ arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	arena_nactive_add(arena, usize >> LG_PAGE);
 
 	extent = arena_chunk_cache_alloc_locked(tsdn, arena, &chunk_hooks, NULL,
-	    csize, alignment, zero);
+	    csize, alignment, zero, false);
 	malloc_mutex_unlock(tsdn, &arena->lock);
 	if (extent == NULL) {
 		extent = arena_chunk_alloc_huge_hard(tsdn, arena, &chunk_hooks,
@@ -1429,7 +1423,8 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			zero = false;
 			extent = arena_chunk_cache_alloc_locked(tsdn, arena,
 			    chunk_hooks, extent_addr_get(chunkselm),
-			    extent_size_get(chunkselm), chunksize, &zero);
+			    extent_size_get(chunkselm), chunksize, &zero,
+			    false);
 			assert(extent == chunkselm);
 			assert(zero == extent_zeroed_get(chunkselm));
 			extent_dirty_insert(chunkselm, purge_runs_sentinel,
@@ -2561,9 +2556,8 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 			    zero);
 		} else if (likely(alignment <= chunksize))
 			ret = huge_malloc(tsdn, arena, usize, zero);
-		else {
+		else
 			ret = huge_palloc(tsdn, arena, usize, alignment, zero);
-		}
 	}
 	return (ret);
 }
diff --git a/src/chunk.c b/src/chunk.c
index d20c2dfb..2ec12b4b 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -168,10 +168,9 @@ extent_rtree_acquire(tsdn_t *tsdn, const extent_t *extent, bool dependent,
 		return (true);
 	assert(*r_elm_a != NULL);
 
-	if (extent_size_get(extent) > chunksize) {
+	if (extent_size_get(extent) > PAGE) {
 		uintptr_t last =
-		    (CHUNK_CEILING((uintptr_t)extent_past_get(extent) -
-		    chunksize));
+		    (CHUNK_CEILING((uintptr_t)extent_past_get(extent)) - PAGE);
 
 		*r_elm_b = rtree_elm_acquire(tsdn, &chunks_rtree, last,
 		    dependent, init_missing);
@@ -203,6 +202,20 @@ extent_rtree_release(tsdn_t *tsdn, rtree_elm_t *elm_a, rtree_elm_t *elm_b)
 		rtree_elm_release(tsdn, &chunks_rtree, elm_b);
 }
 
+static void
+chunk_interior_register(tsdn_t *tsdn, const extent_t *extent)
+{
+	size_t i;
+
+	assert(extent_slab_get(extent));
+
+	for (i = 1; i < (extent_size_get(extent) >> LG_PAGE) - 1; i++) {
+		rtree_write(tsdn, &chunks_rtree,
+		    (uintptr_t)extent_addr_get(extent) + (uintptr_t)(i <<
+		    LG_PAGE), extent);
+	}
+}
+
 static bool
 chunk_register(tsdn_t *tsdn, const extent_t *extent)
 {
@@ -211,6 +224,8 @@ chunk_register(tsdn_t *tsdn, const extent_t *extent)
 	if (extent_rtree_acquire(tsdn, extent, false, true, &elm_a, &elm_b))
 		return (true);
 	extent_rtree_write_acquired(tsdn, elm_a, elm_b, extent);
+	if (extent_slab_get(extent))
+		chunk_interior_register(tsdn, extent);
 	extent_rtree_release(tsdn, elm_a, elm_b);
 
 	if (config_prof && opt_prof && extent_active_get(extent)) {
@@ -232,6 +247,20 @@ chunk_register(tsdn_t *tsdn, const extent_t *extent)
 	return (false);
 }
 
+static void
+chunk_interior_deregister(tsdn_t *tsdn, const extent_t *extent)
+{
+	size_t i;
+
+	assert(extent_slab_get(extent));
+
+	for (i = 1; i < (extent_size_get(extent) >> LG_PAGE) - 1; i++) {
+		rtree_clear(tsdn, &chunks_rtree,
+		    (uintptr_t)extent_addr_get(extent) + (uintptr_t)(i <<
+		    LG_PAGE));
+	}
+}
+
 static void
 chunk_deregister(tsdn_t *tsdn, const extent_t *extent)
 {
@@ -239,6 +268,8 @@ chunk_deregister(tsdn_t *tsdn, const extent_t *extent)
 
 	extent_rtree_acquire(tsdn, extent, true, false, &elm_a, &elm_b);
 	extent_rtree_write_acquired(tsdn, elm_a, elm_b, NULL);
+	if (extent_slab_get(extent))
+		chunk_interior_deregister(tsdn, extent);
 	extent_rtree_release(tsdn, elm_a, elm_b);
 
 	if (config_prof && opt_prof && extent_active_get(extent)) {
@@ -290,7 +321,7 @@ chunk_leak(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, bool cache,
 static extent_t *
 chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     extent_heap_t extent_heaps[NPSIZES], bool cache, void *new_addr,
-    size_t size, size_t alignment, bool *zero, bool *commit)
+    size_t size, size_t alignment, bool *zero, bool *commit, bool slab)
 {
 	extent_t *extent;
 	size_t alloc_size, leadsize, trailsize;
@@ -374,6 +405,10 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	}
 
 	extent_active_set(extent, true);
+	if (slab) {
+		extent_slab_set(extent, slab);
+		chunk_interior_register(tsdn, extent);
+	}
 
 	malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 
@@ -431,7 +466,7 @@ chunk_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 
 extent_t *
 chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *new_addr, size_t size, size_t alignment, bool *zero)
+    void *new_addr, size_t size, size_t alignment, bool *zero, bool slab)
 {
 	extent_t *extent;
 	bool commit;
@@ -443,7 +478,7 @@ chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 
 	commit = true;
 	extent = chunk_recycle(tsdn, arena, chunk_hooks, arena->chunks_cached,
-	    true, new_addr, size, alignment, zero, &commit);
+	    true, new_addr, size, alignment, zero, &commit, slab);
 	if (extent == NULL)
 		return (NULL);
 	assert(commit);
@@ -484,7 +519,8 @@ chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero,
 
 static extent_t *
 chunk_alloc_retained(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit)
+    void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit,
+    bool slab)
 {
 	extent_t *extent;
 
@@ -494,7 +530,7 @@ chunk_alloc_retained(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	assert((alignment & chunksize_mask) == 0);
 
 	extent = chunk_recycle(tsdn, arena, chunk_hooks, arena->chunks_retained,
-	    false, new_addr, size, alignment, zero, commit);
+	    false, new_addr, size, alignment, zero, commit, slab);
 
 	if (config_stats && extent != NULL)
 		arena->stats.retained -= size;
@@ -502,33 +538,44 @@ chunk_alloc_retained(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	return (extent);
 }
 
+static extent_t *
+chunk_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
+    bool *zero, bool *commit, bool slab)
+{
+	extent_t *extent;
+	void *addr;
+
+	extent = extent_alloc(tsdn, arena);
+	if (extent == NULL)
+		return (NULL);
+	addr = chunk_hooks->alloc(new_addr, size, alignment, zero, commit,
+	    arena->ind);
+	if (addr == NULL)
+		return (NULL);
+	extent_init(extent, arena, addr, size, true, zero, commit, slab);
+	if (chunk_register(tsdn, extent)) {
+		chunk_leak(tsdn, arena, chunk_hooks, false, extent);
+		return (NULL);
+	}
+
+	return (extent);
+}
+
 extent_t *
 chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit)
+    void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit,
+    bool slab)
 {
 	extent_t *extent;
 
 	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
 
 	extent = chunk_alloc_retained(tsdn, arena, chunk_hooks, new_addr, size,
-	    alignment, zero, commit);
+	    alignment, zero, commit, slab);
 	if (extent == NULL) {
-		void *chunk;
-
-		extent = extent_alloc(tsdn, arena);
-		if (extent == NULL)
-			return (NULL);
-		chunk = chunk_hooks->alloc(new_addr, size, alignment,
-		    zero, commit, arena->ind);
-		if (chunk == NULL)
-			return (NULL);
-		extent_init(extent, arena, chunk, size, true, zero, commit,
-		    false);
-	}
-
-	if (chunk_register(tsdn, extent)) {
-		chunk_leak(tsdn, arena, chunk_hooks, false, extent);
-		return (NULL);
+		extent = chunk_alloc_wrapper_hard(tsdn, arena, chunk_hooks,
+		    new_addr, size, alignment, zero, commit, slab);
 	}
 
 	return (extent);
@@ -593,7 +640,10 @@ chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	assert((extent_size_get(extent) & chunksize_mask) == 0);
 	extent_active_set(extent, false);
 	extent_zeroed_set(extent, !cache && extent_zeroed_get(extent));
-	extent_slab_set(extent, false);
+	if (extent_slab_get(extent)) {
+		chunk_interior_deregister(tsdn, extent);
+		extent_slab_set(extent, false);
+	}
 
 	assert(chunk_lookup(tsdn, extent_addr_get(extent), true) == extent);
 	extent_heaps_insert(extent_heaps, extent);
@@ -609,7 +659,7 @@ chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 
 	/* Try to coalesce backward. */
 	prev = rtree_read(tsdn, &chunks_rtree,
-	    (uintptr_t)extent_addr_get(extent) - chunksize, false);
+	    (uintptr_t)extent_addr_get(extent) - PAGE, false);
 	if (prev != NULL) {
 		chunk_try_coalesce(tsdn, arena, chunk_hooks, prev, extent,
 		    extent_heaps, cache);
@@ -914,7 +964,7 @@ chunk_boot(void)
 	if (have_dss && chunk_dss_boot())
 		return (true);
 	if (rtree_new(&chunks_rtree, (unsigned)((ZU(1) << (LG_SIZEOF_PTR+3)) -
-	    opt_lg_chunk)))
+	    LG_PAGE)))
 		return (true);
 
 	return (false);
diff --git a/src/huge.c b/src/huge.c
index 70383144..31d3bcae 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -216,8 +216,8 @@ huge_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 	    cdiff, chunksize, &is_zeroed_chunk)) == NULL) {
 		bool commit = true;
 		if ((trail = chunk_alloc_wrapper(tsdn, arena, &chunk_hooks,
-		    nchunk, cdiff, chunksize, &is_zeroed_chunk, &commit)) ==
-		    NULL)
+		    nchunk, cdiff, chunksize, &is_zeroed_chunk, &commit, false))
+		    == NULL)
 			return (true);
 	}
 

From 760bf11b23b96a9c26e48ca51df9644bb382892f Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 24 May 2016 20:56:46 -0700
Subject: [PATCH 0282/2608] Add extent_dirty_[gs]et().

---
 include/jemalloc/internal/extent.h            | 25 +++++++++++++++++--
 include/jemalloc/internal/private_symbols.txt |  2 ++
 src/arena.c                                   |  2 +-
 src/base.c                                    |  2 +-
 src/chunk.c                                   | 11 ++++----
 src/chunk_dss.c                               |  2 +-
 6 files changed, 34 insertions(+), 10 deletions(-)

diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 775f89bb..a286fa9a 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -21,6 +21,9 @@ struct extent_s {
 	/* True if extent is active (in use). */
 	bool			e_active;
 
+	/* True if extent is dirty (touched). */
+	bool			e_dirty;
+
 	/*
 	 * The zeroed flag is used by chunk recycling code to track whether
 	 * memory is zero-filled.
@@ -86,6 +89,7 @@ void	*extent_addr_get(const extent_t *extent);
 size_t	extent_size_get(const extent_t *extent);
 void	*extent_past_get(const extent_t *extent);
 bool	extent_active_get(const extent_t *extent);
+bool	extent_dirty_get(const extent_t *extent);
 bool	extent_retained_get(const extent_t *extent);
 bool	extent_zeroed_get(const extent_t *extent);
 bool	extent_committed_get(const extent_t *extent);
@@ -95,12 +99,14 @@ void	extent_arena_set(extent_t *extent, arena_t *arena);
 void	extent_addr_set(extent_t *extent, void *addr);
 void	extent_size_set(extent_t *extent, size_t size);
 void	extent_active_set(extent_t *extent, bool active);
+void	extent_dirty_set(extent_t *extent, bool dirty);
 void	extent_zeroed_set(extent_t *extent, bool zeroed);
 void	extent_committed_set(extent_t *extent, bool committed);
 void	extent_slab_set(extent_t *extent, bool slab);
 void	extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx);
 void	extent_init(extent_t *extent, arena_t *arena, void *addr,
-    size_t size, bool active, bool zeroed, bool committed, bool slab);
+    size_t size, bool active, bool dirty, bool zeroed, bool committed,
+    bool slab);
 void	extent_dirty_insert(extent_t *extent,
     arena_runs_dirty_link_t *runs_dirty, extent_t *chunks_dirty);
 void	extent_dirty_remove(extent_t *extent);
@@ -142,6 +148,13 @@ extent_active_get(const extent_t *extent)
 	return (extent->e_active);
 }
 
+JEMALLOC_INLINE bool
+extent_dirty_get(const extent_t *extent)
+{
+
+	return (extent->e_dirty);
+}
+
 JEMALLOC_INLINE bool
 extent_retained_get(const extent_t *extent)
 {
@@ -205,6 +218,13 @@ extent_active_set(extent_t *extent, bool active)
 	extent->e_active = active;
 }
 
+JEMALLOC_INLINE void
+extent_dirty_set(extent_t *extent, bool dirty)
+{
+
+	extent->e_dirty = dirty;
+}
+
 JEMALLOC_INLINE void
 extent_zeroed_set(extent_t *extent, bool zeroed)
 {
@@ -235,13 +255,14 @@ extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx)
 
 JEMALLOC_INLINE void
 extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
-    bool active, bool zeroed, bool committed, bool slab)
+    bool active, bool dirty, bool zeroed, bool committed, bool slab)
 {
 
 	extent_arena_set(extent, arena);
 	extent_addr_set(extent, addr);
 	extent_size_set(extent, size);
 	extent_active_set(extent, active);
+	extent_dirty_set(extent, dirty);
 	extent_zeroed_set(extent, zeroed);
 	extent_committed_set(extent, committed);
 	extent_slab_set(extent, slab);
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 8998aed9..02bef63e 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -215,8 +215,10 @@ extent_arena_set
 extent_committed_get
 extent_committed_set
 extent_dalloc
+extent_dirty_get
 extent_dirty_insert
 extent_dirty_remove
+extent_dirty_set
 extent_init
 extent_past_get
 extent_prof_tctx_get
diff --git a/src/arena.c b/src/arena.c
index 17cf973a..a08a3245 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1644,7 +1644,7 @@ arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, size_t ndirty_limit)
 	    arena->lg_dirty_mult) < arena->ndirty || ndirty_limit == 0);
 
 	qr_new(&purge_runs_sentinel, rd_link);
-	extent_init(&purge_chunks_sentinel, arena, NULL, 0, false, false,
+	extent_init(&purge_chunks_sentinel, arena, NULL, 0, false, false, false,
 	    false, false);
 
 	npurge = arena_stash_dirty(tsdn, arena, &chunk_hooks, ndirty_limit,
diff --git a/src/base.c b/src/base.c
index 518f966c..225f522b 100644
--- a/src/base.c
+++ b/src/base.c
@@ -74,7 +74,7 @@ base_chunk_alloc(tsdn_t *tsdn, size_t minsize)
 			base_resident += PAGE_CEILING(nsize);
 		}
 	}
-	extent_init(extent, NULL, addr, csize, true, true, true, false);
+	extent_init(extent, NULL, addr, csize, true, false, true, true, false);
 	return (extent);
 }
 
diff --git a/src/chunk.c b/src/chunk.c
index 2ec12b4b..c1094ff4 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -553,7 +553,7 @@ chunk_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 	    arena->ind);
 	if (addr == NULL)
 		return (NULL);
-	extent_init(extent, arena, addr, size, true, zero, commit, slab);
+	extent_init(extent, arena, addr, size, true, false, zero, commit, slab);
 	if (chunk_register(tsdn, extent)) {
 		chunk_leak(tsdn, arena, chunk_hooks, false, extent);
 		return (NULL);
@@ -828,8 +828,9 @@ chunk_split_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		extent_t lead;
 
 		extent_init(&lead, arena, extent_addr_get(extent), size_a,
-		    extent_active_get(extent), extent_zeroed_get(extent),
-		    extent_committed_get(extent), extent_slab_get(extent));
+		    extent_active_get(extent), extent_dirty_get(extent),
+		    extent_zeroed_get(extent), extent_committed_get(extent),
+		    extent_slab_get(extent));
 
 		if (extent_rtree_acquire(tsdn, &lead, false, true, &lead_elm_a,
 		    &lead_elm_b))
@@ -838,8 +839,8 @@ chunk_split_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 
 	extent_init(trail, arena, (void *)((uintptr_t)extent_addr_get(extent) +
 	    size_a), CHUNK_CEILING(size_b), extent_active_get(extent),
-	    extent_zeroed_get(extent), extent_committed_get(extent),
-	    extent_slab_get(extent));
+	    extent_dirty_get(extent), extent_zeroed_get(extent),
+	    extent_committed_get(extent), extent_slab_get(extent));
 	if (extent_rtree_acquire(tsdn, trail, false, true, &trail_elm_a,
 	    &trail_elm_b))
 		goto label_error_c;
diff --git a/src/chunk_dss.c b/src/chunk_dss.c
index 251f17b8..6b90c53e 100644
--- a/src/chunk_dss.c
+++ b/src/chunk_dss.c
@@ -126,7 +126,7 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					return (NULL);
 				}
 				extent_init(cpad, arena, cpad_addr, cpad_size,
-				    false, false, true, false);
+				    false, true, false, true, false);
 			}
 			dss_next = (void *)((uintptr_t)ret + size);
 			if ((uintptr_t)ret < (uintptr_t)dss_max ||

From 23c52c895f96c1dc2492855438fde04d9a10869e Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 24 May 2016 21:13:36 -0700
Subject: [PATCH 0283/2608] Make extent_prof_tctx_[gs]et() atomic.

---
 include/jemalloc/internal/extent.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index a286fa9a..c3bdacb4 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -45,7 +45,10 @@ struct extent_s {
 	bool			e_slab;
 
 	/* Profile counters, used for huge objects. */
-	prof_tctx_t		*e_prof_tctx;
+	union {
+		void		*e_prof_tctx_pun;
+		prof_tctx_t	*e_prof_tctx;
+	};
 
 	/* Linkage for arena's runs_dirty and chunks_cache rings. */
 	arena_runs_dirty_link_t	rd;
@@ -187,7 +190,8 @@ JEMALLOC_INLINE prof_tctx_t *
 extent_prof_tctx_get(const extent_t *extent)
 {
 
-	return (extent->e_prof_tctx);
+	return ((prof_tctx_t *)atomic_read_p(
+	    &((extent_t *)extent)->e_prof_tctx_pun));
 }
 
 JEMALLOC_INLINE void
@@ -250,7 +254,7 @@ JEMALLOC_INLINE void
 extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx)
 {
 
-	extent->e_prof_tctx = tctx;
+	atomic_write_p(&extent->e_prof_tctx_pun, tctx);
 }
 
 JEMALLOC_INLINE void

From 741967e79d10c94005b0f4065586c1b488a21fde Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 25 May 2016 16:21:37 -0700
Subject: [PATCH 0284/2608] Remove CHUNK_ADDR2BASE() and CHUNK_ADDR2OFFSET().

---
 include/jemalloc/internal/arena.h             | 112 ++------
 include/jemalloc/internal/chunk.h             |   8 -
 include/jemalloc/internal/private_symbols.txt |   1 -
 src/arena.c                                   | 243 ++++++++++++------
 src/chunk.c                                   |   5 -
 src/chunk_dss.c                               |   4 +-
 6 files changed, 190 insertions(+), 183 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 4e5e3029..fac6fd34 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -580,10 +580,14 @@ arena_chunk_map_misc_t	*arena_miscelm_get_mutable(arena_chunk_t *chunk,
     size_t pageind);
 const arena_chunk_map_misc_t	*arena_miscelm_get_const(
     const arena_chunk_t *chunk, size_t pageind);
-size_t	arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm);
-void	*arena_miscelm_to_rpages(const arena_chunk_map_misc_t *miscelm);
-arena_chunk_map_misc_t	*arena_rd_to_miscelm(arena_runs_dirty_link_t *rd);
-arena_chunk_map_misc_t	*arena_run_to_miscelm(arena_run_t *run);
+size_t	arena_miscelm_to_pageind(const extent_t *extent,
+    const arena_chunk_map_misc_t *miscelm);
+void	*arena_miscelm_to_rpages(const extent_t *extent,
+    const arena_chunk_map_misc_t *miscelm);
+arena_chunk_map_misc_t	*arena_rd_to_miscelm(const extent_t *extent,
+    arena_runs_dirty_link_t *rd);
+arena_chunk_map_misc_t	*arena_run_to_miscelm(const extent_t *extent,
+    arena_run_t *run);
 size_t	*arena_mapbitsp_get_mutable(arena_chunk_t *chunk, size_t pageind);
 const size_t	*arena_mapbitsp_get_const(const arena_chunk_t *chunk,
     size_t pageind);
@@ -626,8 +630,6 @@ bool	arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes);
 szind_t	arena_ptr_small_binind_get(tsdn_t *tsdn, const void *ptr,
     size_t mapbits);
 szind_t	arena_bin_index(arena_t *arena, arena_bin_t *bin);
-size_t	arena_run_regind(arena_run_t *run, const arena_bin_info_t *bin_info,
-    const void *ptr);
 prof_tctx_t	*arena_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent,
     const void *ptr);
 void	arena_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr,
@@ -685,9 +687,10 @@ arena_miscelm_get_const(const arena_chunk_t *chunk, size_t pageind)
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm)
+arena_miscelm_to_pageind(const extent_t *extent,
+    const arena_chunk_map_misc_t *miscelm)
 {
-	arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm);
+	arena_chunk_t *chunk = (arena_chunk_t *)extent_addr_get(extent);
 	size_t pageind = ((uintptr_t)miscelm - ((uintptr_t)chunk +
 	    map_misc_offset)) / sizeof(arena_chunk_map_misc_t) + map_bias;
 
@@ -698,34 +701,35 @@ arena_miscelm_to_pageind(const arena_chunk_map_misc_t *miscelm)
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-arena_miscelm_to_rpages(const arena_chunk_map_misc_t *miscelm)
+arena_miscelm_to_rpages(const extent_t *extent,
+    const arena_chunk_map_misc_t *miscelm)
 {
-	arena_chunk_t *chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm);
-	size_t pageind = arena_miscelm_to_pageind(miscelm);
+	arena_chunk_t *chunk = (arena_chunk_t *)extent_addr_get(extent);
+	size_t pageind = arena_miscelm_to_pageind(extent, miscelm);
 
 	return ((void *)((uintptr_t)chunk + (pageind << LG_PAGE)));
 }
 
 JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t *
-arena_rd_to_miscelm(arena_runs_dirty_link_t *rd)
+arena_rd_to_miscelm(const extent_t *extent, arena_runs_dirty_link_t *rd)
 {
 	arena_chunk_map_misc_t *miscelm = (arena_chunk_map_misc_t
 	    *)((uintptr_t)rd - offsetof(arena_chunk_map_misc_t, rd));
 
-	assert(arena_miscelm_to_pageind(miscelm) >= map_bias);
-	assert(arena_miscelm_to_pageind(miscelm) < chunk_npages);
+	assert(arena_miscelm_to_pageind(extent, miscelm) >= map_bias);
+	assert(arena_miscelm_to_pageind(extent, miscelm) < chunk_npages);
 
 	return (miscelm);
 }
 
 JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t *
-arena_run_to_miscelm(arena_run_t *run)
+arena_run_to_miscelm(const extent_t *extent, arena_run_t *run)
 {
 	arena_chunk_map_misc_t *miscelm = (arena_chunk_map_misc_t
 	    *)((uintptr_t)run - offsetof(arena_chunk_map_misc_t, run));
 
-	assert(arena_miscelm_to_pageind(miscelm) >= map_bias);
-	assert(arena_miscelm_to_pageind(miscelm) < chunk_npages);
+	assert(arena_miscelm_to_pageind(extent, miscelm) >= map_bias);
+	assert(arena_miscelm_to_pageind(extent, miscelm) < chunk_npages);
 
 	return (miscelm);
 }
@@ -1079,7 +1083,7 @@ arena_ptr_small_binind_get(tsdn_t *tsdn, const void *ptr, size_t mapbits)
 		actual_binind = (szind_t)(bin - arena->bins);
 		assert(run_binind == actual_binind);
 		bin_info = &arena_bin_info[actual_binind];
-		rpages = arena_miscelm_to_rpages(miscelm);
+		rpages = arena_miscelm_to_rpages(extent, miscelm);
 		assert(((uintptr_t)ptr - (uintptr_t)rpages) % bin_info->reg_size
 		    == 0);
 	}
@@ -1095,78 +1099,6 @@ arena_bin_index(arena_t *arena, arena_bin_t *bin)
 	return (binind);
 }
 
-JEMALLOC_INLINE size_t
-arena_run_regind(arena_run_t *run, const arena_bin_info_t *bin_info,
-    const void *ptr)
-{
-	size_t diff, interval, shift, regind;
-	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
-	void *rpages = arena_miscelm_to_rpages(miscelm);
-
-	/*
-	 * Freeing a pointer lower than region zero can cause assertion
-	 * failure.
-	 */
-	assert((uintptr_t)ptr >= (uintptr_t)rpages);
-
-	/*
-	 * Avoid doing division with a variable divisor if possible.  Using
-	 * actual division here can reduce allocator throughput by over 20%!
-	 */
-	diff = (size_t)((uintptr_t)ptr - (uintptr_t)rpages);
-
-	/* Rescale (factor powers of 2 out of the numerator and denominator). */
-	interval = bin_info->reg_size;
-	shift = ffs_zu(interval) - 1;
-	diff >>= shift;
-	interval >>= shift;
-
-	if (interval == 1) {
-		/* The divisor was a power of 2. */
-		regind = diff;
-	} else {
-		/*
-		 * To divide by a number D that is not a power of two we
-		 * multiply by (2^21 / D) and then right shift by 21 positions.
-		 *
-		 *   X / D
-		 *
-		 * becomes
-		 *
-		 *   (X * interval_invs[D - 3]) >> SIZE_INV_SHIFT
-		 *
-		 * We can omit the first three elements, because we never
-		 * divide by 0, and 1 and 2 are both powers of two, which are
-		 * handled above.
-		 */
-#define	SIZE_INV_SHIFT	((sizeof(size_t) << 3) - LG_RUN_MAXREGS)
-#define	SIZE_INV(s)	(((ZU(1) << SIZE_INV_SHIFT) / (s)) + 1)
-		static const size_t interval_invs[] = {
-		    SIZE_INV(3),
-		    SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
-		    SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
-		    SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
-		    SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
-		    SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
-		    SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
-		    SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
-		};
-
-		if (likely(interval <= ((sizeof(interval_invs) / sizeof(size_t))
-		    + 2))) {
-			regind = (diff * interval_invs[interval - 3]) >>
-			    SIZE_INV_SHIFT;
-		} else
-			regind = diff / interval;
-#undef SIZE_INV
-#undef SIZE_INV_SHIFT
-	}
-	assert(diff == regind * interval);
-	assert(regind < bin_info->nregs);
-
-	return (regind);
-}
-
 JEMALLOC_INLINE prof_tctx_t *
 arena_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, const void *ptr)
 {
diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index d1137387..0c0084b0 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -7,14 +7,6 @@
  */
 #define	LG_CHUNK_DEFAULT	21
 
-/* Return the chunk address for allocation address a. */
-#define	CHUNK_ADDR2BASE(a)						\
-	((void *)((uintptr_t)(a) & ~chunksize_mask))
-
-/* Return the chunk offset of address a. */
-#define	CHUNK_ADDR2OFFSET(a)						\
-	((size_t)((uintptr_t)(a) & chunksize_mask))
-
 /* Return the smallest chunk multiple that is >= s. */
 #define	CHUNK_CEILING(s)						\
 	(((s) + chunksize_mask) & ~chunksize_mask)
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 02bef63e..c4466013 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -104,7 +104,6 @@ arena_ralloc_junk_large
 arena_ralloc_no_move
 arena_rd_to_miscelm
 arena_reset
-arena_run_regind
 arena_run_to_miscelm
 arena_salloc
 arena_sdalloc
diff --git a/src/arena.c b/src/arena.c
index a08a3245..39764c3f 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -49,20 +49,17 @@ static void	arena_run_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
     arena_run_t *run, bool dirty, bool cleaned, bool decommitted);
 static void	arena_dalloc_bin_run(tsdn_t *tsdn, arena_t *arena,
     arena_chunk_t *chunk, extent_t *extent, arena_run_t *run, arena_bin_t *bin);
-static void	arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk,
-    arena_run_t *run, arena_bin_t *bin);
+static void	arena_bin_lower_run(tsdn_t *tsdn, arena_t *arena,
+    extent_t *extent, arena_run_t *run, arena_bin_t *bin);
 
 /******************************************************************************/
 
 JEMALLOC_INLINE_C size_t
-arena_miscelm_size_get(const arena_chunk_map_misc_t *miscelm)
+arena_miscelm_size_get(extent_t *extent, const arena_chunk_map_misc_t *miscelm)
 {
-	arena_chunk_t *chunk;
-	size_t pageind, mapbits;
-
-	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm);
-	pageind = arena_miscelm_to_pageind(miscelm);
-	mapbits = arena_mapbits_get(chunk, pageind);
+	arena_chunk_t *chunk = (arena_chunk_t *)extent_addr_get(extent);
+	size_t pageind = arena_miscelm_to_pageind(extent, miscelm);
+	size_t mapbits = arena_mapbits_get(chunk, pageind);
 	return (arena_mapbits_size_decode(mapbits));
 }
 
@@ -154,11 +151,12 @@ run_quantize_t *run_quantize_ceil = JEMALLOC_N(n_run_quantize_ceil);
 #endif
 
 static void
-arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
+arena_avail_insert(arena_t *arena, extent_t *extent, size_t pageind,
     size_t npages)
 {
+	arena_chunk_t *chunk = (arena_chunk_t *)extent_addr_get(extent);
 	pszind_t pind = psz2ind(run_quantize_floor(arena_miscelm_size_get(
-	    arena_miscelm_get_const(chunk, pageind))));
+	    extent, arena_miscelm_get_const(chunk, pageind))));
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
 	arena_run_heap_insert(&arena->runs_avail[pind],
@@ -166,11 +164,12 @@ arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
 }
 
 static void
-arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
+arena_avail_remove(arena_t *arena, extent_t *extent, size_t pageind,
     size_t npages)
 {
+	arena_chunk_t *chunk = (arena_chunk_t *)extent_addr_get(extent);
 	pszind_t pind = psz2ind(run_quantize_floor(arena_miscelm_size_get(
-	    arena_miscelm_get_const(chunk, pageind))));
+	    extent, arena_miscelm_get_const(chunk, pageind))));
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
 	arena_run_heap_remove(&arena->runs_avail[pind],
@@ -291,9 +290,11 @@ arena_chunk_cache_maybe_remove(arena_t *arena, extent_t *extent, bool dirty)
 }
 
 JEMALLOC_INLINE_C void *
-arena_run_reg_alloc(arena_run_t *run, const arena_bin_info_t *bin_info)
+arena_run_reg_alloc(tsdn_t *tsdn, arena_run_t *run,
+    const arena_bin_info_t *bin_info)
 {
 	void *ret;
+	extent_t *extent;
 	size_t regind;
 	arena_chunk_map_misc_t *miscelm;
 	void *rpages;
@@ -301,15 +302,88 @@ arena_run_reg_alloc(arena_run_t *run, const arena_bin_info_t *bin_info)
 	assert(run->nfree > 0);
 	assert(!bitmap_full(run->bitmap, &bin_info->bitmap_info));
 
+	extent = iealloc(tsdn, run);
 	regind = (unsigned)bitmap_sfu(run->bitmap, &bin_info->bitmap_info);
-	miscelm = arena_run_to_miscelm(run);
-	rpages = arena_miscelm_to_rpages(miscelm);
+	miscelm = arena_run_to_miscelm(extent, run);
+	rpages = arena_miscelm_to_rpages(extent, miscelm);
 	ret = (void *)((uintptr_t)rpages + (uintptr_t)(bin_info->reg_size *
 	    regind));
 	run->nfree--;
 	return (ret);
 }
 
+JEMALLOC_INLINE_C size_t
+arena_run_regind(extent_t *extent, arena_run_t *run,
+    const arena_bin_info_t *bin_info, const void *ptr)
+{
+	size_t diff, interval, shift, regind;
+	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(extent, run);
+	void *rpages = arena_miscelm_to_rpages(extent, miscelm);
+
+	/*
+	 * Freeing a pointer lower than region zero can cause assertion
+	 * failure.
+	 */
+	assert((uintptr_t)ptr >= (uintptr_t)rpages);
+
+	/*
+	 * Avoid doing division with a variable divisor if possible.  Using
+	 * actual division here can reduce allocator throughput by over 20%!
+	 */
+	diff = (size_t)((uintptr_t)ptr - (uintptr_t)rpages);
+
+	/* Rescale (factor powers of 2 out of the numerator and denominator). */
+	interval = bin_info->reg_size;
+	shift = ffs_zu(interval) - 1;
+	diff >>= shift;
+	interval >>= shift;
+
+	if (interval == 1) {
+		/* The divisor was a power of 2. */
+		regind = diff;
+	} else {
+		/*
+		 * To divide by a number D that is not a power of two we
+		 * multiply by (2^21 / D) and then right shift by 21 positions.
+		 *
+		 *   X / D
+		 *
+		 * becomes
+		 *
+		 *   (X * interval_invs[D - 3]) >> SIZE_INV_SHIFT
+		 *
+		 * We can omit the first three elements, because we never
+		 * divide by 0, and 1 and 2 are both powers of two, which are
+		 * handled above.
+		 */
+#define	SIZE_INV_SHIFT	((sizeof(size_t) << 3) - LG_RUN_MAXREGS)
+#define	SIZE_INV(s)	(((ZU(1) << SIZE_INV_SHIFT) / (s)) + 1)
+		static const size_t interval_invs[] = {
+		    SIZE_INV(3),
+		    SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
+		    SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
+		    SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
+		    SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
+		    SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
+		    SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
+		    SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
+		};
+
+		if (likely(interval <= ((sizeof(interval_invs) / sizeof(size_t))
+		    + 2))) {
+			regind = (diff * interval_invs[interval - 3]) >>
+			    SIZE_INV_SHIFT;
+		} else
+			regind = diff / interval;
+#undef SIZE_INV
+#undef SIZE_INV_SHIFT
+	}
+	assert(diff == regind * interval);
+	assert(regind < bin_info->nregs);
+
+	return (regind);
+}
+
 JEMALLOC_INLINE_C void
 arena_run_reg_dalloc(tsdn_t *tsdn, arena_run_t *run, extent_t *extent,
     void *ptr)
@@ -319,15 +393,17 @@ arena_run_reg_dalloc(tsdn_t *tsdn, arena_run_t *run, extent_t *extent,
 	size_t mapbits = arena_mapbits_get(chunk, pageind);
 	szind_t binind = arena_ptr_small_binind_get(tsdn, ptr, mapbits);
 	const arena_bin_info_t *bin_info = &arena_bin_info[binind];
-	size_t regind = arena_run_regind(run, bin_info, ptr);
+	size_t regind = arena_run_regind(extent, run, bin_info, ptr);
 
 	assert(run->nfree < bin_info->nregs);
 	/* Freeing an interior pointer can cause assertion failure. */
 	assert(((uintptr_t)ptr -
-	    (uintptr_t)arena_miscelm_to_rpages(arena_run_to_miscelm(run))) %
-	    (uintptr_t)bin_info->reg_size == 0);
+	    (uintptr_t)arena_miscelm_to_rpages(extent,
+	    arena_run_to_miscelm(extent, run))) % (uintptr_t)bin_info->reg_size
+	    == 0);
 	assert((uintptr_t)ptr >=
-	    (uintptr_t)arena_miscelm_to_rpages(arena_run_to_miscelm(run)));
+	    (uintptr_t)arena_miscelm_to_rpages(extent,
+	    arena_run_to_miscelm(extent, run)));
 	/* Freeing an unallocated pointer can cause assertion failure. */
 	assert(bitmap_get(run->bitmap, &bin_info->bitmap_info, regind));
 
@@ -381,9 +457,10 @@ arena_nactive_sub(arena_t *arena, size_t sub_pages)
 }
 
 static void
-arena_run_split_remove(arena_t *arena, arena_chunk_t *chunk, size_t run_ind,
+arena_run_split_remove(arena_t *arena, extent_t *extent, size_t run_ind,
     size_t flag_dirty, size_t flag_decommitted, size_t need_pages)
 {
+	arena_chunk_t *chunk = (arena_chunk_t *)extent_addr_get(extent);
 	size_t total_pages, rem_pages;
 
 	assert(flag_dirty == 0 || flag_decommitted == 0);
@@ -395,7 +472,7 @@ arena_run_split_remove(arena_t *arena, arena_chunk_t *chunk, size_t run_ind,
 	assert(need_pages <= total_pages);
 	rem_pages = total_pages - need_pages;
 
-	arena_avail_remove(arena, chunk, run_ind, total_pages);
+	arena_avail_remove(arena, extent, run_ind, total_pages);
 	if (flag_dirty != 0)
 		arena_run_dirty_remove(arena, chunk, run_ind, total_pages);
 	arena_nactive_add(arena, need_pages);
@@ -418,7 +495,8 @@ arena_run_split_remove(arena_t *arena, arena_chunk_t *chunk, size_t run_ind,
 			arena_run_dirty_insert(arena, chunk, run_ind+need_pages,
 			    rem_pages);
 		}
-		arena_avail_insert(arena, chunk, run_ind+need_pages, rem_pages);
+		arena_avail_insert(arena, extent, run_ind+need_pages,
+		    rem_pages);
 	}
 }
 
@@ -432,8 +510,8 @@ arena_run_split_large_helper(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 	size_t flag_unzeroed_mask;
 
 	chunk = (arena_chunk_t *)extent_addr_get(extent);
-	miscelm = arena_run_to_miscelm(run);
-	run_ind = arena_miscelm_to_pageind(miscelm);
+	miscelm = arena_run_to_miscelm(extent, run);
+	run_ind = arena_miscelm_to_pageind(extent, miscelm);
 	flag_dirty = arena_mapbits_dirty_get(chunk, run_ind);
 	flag_decommitted = arena_mapbits_decommitted_get(chunk, run_ind);
 	need_pages = (size >> LG_PAGE);
@@ -444,7 +522,7 @@ arena_run_split_large_helper(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 		return (true);
 
 	if (remove) {
-		arena_run_split_remove(arena, chunk, run_ind, flag_dirty,
+		arena_run_split_remove(arena, extent, run_ind, flag_dirty,
 		    flag_decommitted, need_pages);
 	}
 
@@ -515,8 +593,8 @@ arena_run_split_small(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 	assert(binind != BININD_INVALID);
 
 	chunk = (arena_chunk_t *)extent_addr_get(extent);
-	miscelm = arena_run_to_miscelm(run);
-	run_ind = arena_miscelm_to_pageind(miscelm);
+	miscelm = arena_run_to_miscelm(extent, run);
+	run_ind = arena_miscelm_to_pageind(extent, miscelm);
 	flag_dirty = arena_mapbits_dirty_get(chunk, run_ind);
 	flag_decommitted = arena_mapbits_decommitted_get(chunk, run_ind);
 	need_pages = (size >> LG_PAGE);
@@ -526,7 +604,7 @@ arena_run_split_small(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 	    &arena->chunk_hooks, extent, run_ind << LG_PAGE, size))
 		return (true);
 
-	arena_run_split_remove(arena, chunk, run_ind, flag_dirty,
+	arena_run_split_remove(arena, extent, run_ind, flag_dirty,
 	    flag_decommitted, need_pages);
 
 	for (i = 0; i < need_pages; i++) {
@@ -680,8 +758,7 @@ arena_chunk_alloc(tsdn_t *tsdn, arena_t *arena)
 
 	ql_elm_new(extent, ql_link);
 	ql_tail_insert(&arena->achunks, extent, ql_link);
-	arena_avail_insert(arena, (arena_chunk_t *)extent_addr_get(extent),
-	    map_bias, chunk_npages-map_bias);
+	arena_avail_insert(arena, extent, map_bias, chunk_npages-map_bias);
 
 	return (extent);
 }
@@ -746,7 +823,7 @@ arena_chunk_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent)
 	    arena_mapbits_decommitted_get(chunk, chunk_npages-1));
 
 	/* Remove run from runs_avail, so that the arena does not use it. */
-	arena_avail_remove(arena, chunk, map_bias, chunk_npages-map_bias);
+	arena_avail_remove(arena, extent, map_bias, chunk_npages-map_bias);
 
 	ql_remove(&arena->achunks, extent, ql_link);
 	spare = arena->spare;
@@ -1377,8 +1454,9 @@ arena_dirty_count(tsdn_t *tsdn, arena_t *arena)
 			arena_chunk_t *chunk =
 			    (arena_chunk_t *)extent_addr_get(extent);
 			arena_chunk_map_misc_t *miscelm =
-			    arena_rd_to_miscelm(rdelm);
-			size_t pageind = arena_miscelm_to_pageind(miscelm);
+			    arena_rd_to_miscelm(extent, rdelm);
+			size_t pageind = arena_miscelm_to_pageind(extent,
+			    miscelm);
 			assert(arena_mapbits_allocated_get(chunk, pageind) ==
 			    0);
 			assert(arena_mapbits_large_get(chunk, pageind) == 0);
@@ -1435,8 +1513,9 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		} else {
 			extent_t *extent = iealloc(tsdn, rdelm);
 			arena_chunk_map_misc_t *miscelm =
-			    arena_rd_to_miscelm(rdelm);
-			size_t pageind = arena_miscelm_to_pageind(miscelm);
+			    arena_rd_to_miscelm(extent, rdelm);
+			size_t pageind = arena_miscelm_to_pageind(extent,
+			    miscelm);
 			arena_run_t *run = &miscelm->run;
 			size_t run_size =
 			    arena_mapbits_unallocated_size_get((arena_chunk_t *)
@@ -1520,8 +1599,8 @@ arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			arena_chunk_t *chunk =
 			    (arena_chunk_t *)extent_addr_get(extent);
 			arena_chunk_map_misc_t *miscelm =
-			    arena_rd_to_miscelm(rdelm);
-			pageind = arena_miscelm_to_pageind(miscelm);
+			    arena_rd_to_miscelm(extent, rdelm);
+			pageind = arena_miscelm_to_pageind(extent, miscelm);
 			run_size = arena_mapbits_large_size_get(chunk, pageind);
 			npages = run_size >> LG_PAGE;
 
@@ -1601,8 +1680,9 @@ arena_unstash_purged(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			arena_chunk_t *chunk =
 			    (arena_chunk_t *)extent_addr_get(extent);
 			arena_chunk_map_misc_t *miscelm =
-			    arena_rd_to_miscelm(rdelm);
-			size_t pageind = arena_miscelm_to_pageind(miscelm);
+			    arena_rd_to_miscelm(extent, rdelm);
+			size_t pageind = arena_miscelm_to_pageind(extent,
+			    miscelm);
 			bool decommitted = (arena_mapbits_decommitted_get(chunk,
 			    pageind) != 0);
 			arena_run_t *run = &miscelm->run;
@@ -1823,10 +1903,11 @@ arena_reset(tsd_t *tsd, arena_t *arena)
 }
 
 static void
-arena_run_coalesce(arena_t *arena, arena_chunk_t *chunk, size_t *p_size,
+arena_run_coalesce(arena_t *arena, extent_t *extent, size_t *p_size,
     size_t *p_run_ind, size_t *p_run_pages, size_t flag_dirty,
     size_t flag_decommitted)
 {
+	arena_chunk_t *chunk = (arena_chunk_t *)extent_addr_get(extent);
 	size_t size = *p_size;
 	size_t run_ind = *p_run_ind;
 	size_t run_pages = *p_run_pages;
@@ -1851,7 +1932,8 @@ arena_run_coalesce(arena_t *arena, arena_chunk_t *chunk, size_t *p_size,
 		    run_ind+run_pages+nrun_pages-1) == flag_dirty);
 		assert(arena_mapbits_decommitted_get(chunk,
 		    run_ind+run_pages+nrun_pages-1) == flag_decommitted);
-		arena_avail_remove(arena, chunk, run_ind+run_pages, nrun_pages);
+		arena_avail_remove(arena, extent, run_ind+run_pages,
+		    nrun_pages);
 
 		/*
 		 * If the successor is dirty, remove it from the set of dirty
@@ -1890,7 +1972,7 @@ arena_run_coalesce(arena_t *arena, arena_chunk_t *chunk, size_t *p_size,
 		assert(arena_mapbits_dirty_get(chunk, run_ind) == flag_dirty);
 		assert(arena_mapbits_decommitted_get(chunk, run_ind) ==
 		    flag_decommitted);
-		arena_avail_remove(arena, chunk, run_ind, prun_pages);
+		arena_avail_remove(arena, extent, run_ind, prun_pages);
 
 		/*
 		 * If the predecessor is dirty, remove it from the set of dirty
@@ -1944,8 +2026,8 @@ arena_run_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 	size_t size, run_ind, run_pages, flag_dirty, flag_decommitted;
 
 	chunk = (arena_chunk_t *)extent_addr_get(extent);
-	miscelm = arena_run_to_miscelm(run);
-	run_ind = arena_miscelm_to_pageind(miscelm);
+	miscelm = arena_run_to_miscelm(extent, run);
+	run_ind = arena_miscelm_to_pageind(extent, miscelm);
 	assert(run_ind >= map_bias);
 	assert(run_ind < chunk_npages);
 	size = arena_run_size_get(arena, chunk, run, run_ind);
@@ -1978,7 +2060,7 @@ arena_run_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 		    arena_mapbits_unzeroed_get(chunk, run_ind+run_pages-1));
 	}
 
-	arena_run_coalesce(arena, chunk, &size, &run_ind, &run_pages,
+	arena_run_coalesce(arena, extent, &size, &run_ind, &run_pages,
 	    flag_dirty, flag_decommitted);
 
 	/* Insert into runs_avail, now that coalescing is complete. */
@@ -1988,7 +2070,7 @@ arena_run_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 	    arena_mapbits_dirty_get(chunk, run_ind+run_pages-1));
 	assert(arena_mapbits_decommitted_get(chunk, run_ind) ==
 	    arena_mapbits_decommitted_get(chunk, run_ind+run_pages-1));
-	arena_avail_insert(arena, chunk, run_ind, run_pages);
+	arena_avail_insert(arena, extent, run_ind, run_pages);
 
 	if (dirty)
 		arena_run_dirty_insert(arena, chunk, run_ind, run_pages);
@@ -2015,8 +2097,8 @@ static void
 arena_run_trim_head(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
     extent_t *extent, arena_run_t *run, size_t oldsize, size_t newsize)
 {
-	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
-	size_t pageind = arena_miscelm_to_pageind(miscelm);
+	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(extent, run);
+	size_t pageind = arena_miscelm_to_pageind(extent, miscelm);
 	size_t head_npages = (oldsize - newsize) >> LG_PAGE;
 	size_t flag_dirty = arena_mapbits_dirty_get(chunk, pageind);
 	size_t flag_decommitted = arena_mapbits_decommitted_get(chunk, pageind);
@@ -2057,8 +2139,8 @@ arena_run_trim_tail(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
     extent_t *extent, arena_run_t *run, size_t oldsize, size_t newsize,
     bool dirty)
 {
-	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
-	size_t pageind = arena_miscelm_to_pageind(miscelm);
+	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(extent, run);
+	size_t pageind = arena_miscelm_to_pageind(extent, miscelm);
 	size_t head_npages = newsize >> LG_PAGE;
 	size_t flag_dirty = arena_mapbits_dirty_get(chunk, pageind);
 	size_t flag_decommitted = arena_mapbits_decommitted_get(chunk, pageind);
@@ -2099,9 +2181,9 @@ arena_run_trim_tail(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
 }
 
 static void
-arena_bin_runs_insert(arena_bin_t *bin, arena_run_t *run)
+arena_bin_runs_insert(arena_bin_t *bin, extent_t *extent, arena_run_t *run)
 {
-	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(run);
+	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(extent, run);
 
 	arena_run_heap_insert(&bin->runs, miscelm);
 }
@@ -2189,7 +2271,7 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin)
 		 */
 		void *ret;
 		assert(bin->runcur->nfree > 0);
-		ret = arena_run_reg_alloc(bin->runcur, bin_info);
+		ret = arena_run_reg_alloc(tsdn, bin->runcur, bin_info);
 		if (run != NULL) {
 			extent_t *extent;
 			arena_chunk_t *chunk;
@@ -2207,8 +2289,10 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin)
 			if (run->nfree == bin_info->nregs) {
 				arena_dalloc_bin_run(tsdn, arena, chunk, extent,
 				    run, bin);
-			} else
-				arena_bin_lower_run(arena, chunk, run, bin);
+			} else {
+				arena_bin_lower_run(tsdn, arena, extent, run,
+				    bin);
+			}
 		}
 		return (ret);
 	}
@@ -2220,7 +2304,7 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin)
 
 	assert(bin->runcur->nfree > 0);
 
-	return (arena_run_reg_alloc(bin->runcur, bin_info));
+	return (arena_run_reg_alloc(tsdn, bin->runcur, bin_info));
 }
 
 void
@@ -2240,9 +2324,10 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_bin_t *tbin,
 	    tbin->lg_fill_div); i < nfill; i++) {
 		arena_run_t *run;
 		void *ptr;
-		if ((run = bin->runcur) != NULL && run->nfree > 0)
-			ptr = arena_run_reg_alloc(run, &arena_bin_info[binind]);
-		else
+		if ((run = bin->runcur) != NULL && run->nfree > 0) {
+			ptr = arena_run_reg_alloc(tsdn, run,
+			    &arena_bin_info[binind]);
+		} else
 			ptr = arena_bin_malloc_hard(tsdn, arena, bin);
 		if (ptr == NULL) {
 			/*
@@ -2314,7 +2399,7 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero)
 
 	malloc_mutex_lock(tsdn, &bin->lock);
 	if ((run = bin->runcur) != NULL && run->nfree > 0)
-		ret = arena_run_reg_alloc(run, &arena_bin_info[binind]);
+		ret = arena_run_reg_alloc(tsdn, run, &arena_bin_info[binind]);
 	else
 		ret = arena_bin_malloc_hard(tsdn, arena, bin);
 
@@ -2359,6 +2444,7 @@ arena_malloc_large(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero)
 	size_t usize;
 	uintptr_t random_offset;
 	arena_run_t *run;
+	extent_t *extent;
 	arena_chunk_map_misc_t *miscelm;
 	UNUSED bool idump JEMALLOC_CC_SILENCE_INIT(false);
 
@@ -2382,8 +2468,9 @@ arena_malloc_large(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero)
 		malloc_mutex_unlock(tsdn, &arena->lock);
 		return (NULL);
 	}
-	miscelm = arena_run_to_miscelm(run);
-	ret = (void *)((uintptr_t)arena_miscelm_to_rpages(miscelm) +
+	extent = iealloc(tsdn, run);
+	miscelm = arena_run_to_miscelm(extent, run);
+	ret = (void *)((uintptr_t)arena_miscelm_to_rpages(extent, miscelm) +
 	    random_offset);
 	if (config_stats) {
 		szind_t index = binind - NBINS;
@@ -2465,8 +2552,8 @@ arena_palloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	}
 	extent = iealloc(tsdn, run);
 	chunk = (arena_chunk_t *)extent_addr_get(extent);
-	miscelm = arena_run_to_miscelm(run);
-	rpages = arena_miscelm_to_rpages(miscelm);
+	miscelm = arena_run_to_miscelm(extent, run);
+	rpages = arena_miscelm_to_rpages(extent, miscelm);
 
 	leadsize = ALIGNMENT_CEILING((uintptr_t)rpages, alignment) -
 	    (uintptr_t)rpages;
@@ -2478,8 +2565,8 @@ arena_palloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 		extent_t *head_extent = extent;
 
 		miscelm = arena_miscelm_get_mutable(chunk,
-		    arena_miscelm_to_pageind(head_miscelm) + (leadsize >>
-		    LG_PAGE));
+		    arena_miscelm_to_pageind(head_extent, head_miscelm) +
+		    (leadsize >> LG_PAGE));
 		run = &miscelm->run;
 		extent = iealloc(tsdn, run);
 
@@ -2492,8 +2579,8 @@ arena_palloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	}
 	if (arena_run_init_large(tsdn, arena, extent, run, usize + large_pad,
 	    zero)) {
-		size_t run_ind =
-		    arena_miscelm_to_pageind(arena_run_to_miscelm(run));
+		size_t run_ind = arena_miscelm_to_pageind(extent,
+		    arena_run_to_miscelm(extent, run));
 		bool dirty = (arena_mapbits_dirty_get(chunk, run_ind) != 0);
 		bool decommitted = (arena_mapbits_decommitted_get(chunk,
 		    run_ind) != 0);
@@ -2504,7 +2591,7 @@ arena_palloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 		malloc_mutex_unlock(tsdn, &arena->lock);
 		return (NULL);
 	}
-	ret = arena_miscelm_to_rpages(miscelm);
+	ret = arena_miscelm_to_rpages(extent, miscelm);
 
 	if (config_stats) {
 		szind_t index = size2index(usize) - NBINS;
@@ -2605,7 +2692,7 @@ arena_dissociate_bin_run(extent_t *extent, arena_run_t *run, arena_bin_t *bin)
 		 */
 		if (bin_info->nregs != 1) {
 			arena_chunk_map_misc_t *miscelm =
-			    arena_run_to_miscelm(run);
+			    arena_run_to_miscelm(extent, run);
 
 			arena_run_heap_remove(&bin->runs, miscelm);
 		}
@@ -2631,8 +2718,8 @@ arena_dalloc_bin_run(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
 }
 
 static void
-arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
-    arena_bin_t *bin)
+arena_bin_lower_run(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+    arena_run_t *run, arena_bin_t *bin)
 {
 
 	/*
@@ -2642,13 +2729,15 @@ arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
 	 */
 	if ((uintptr_t)run < (uintptr_t)bin->runcur) {
 		/* Switch runcur. */
-		if (bin->runcur->nfree > 0)
-			arena_bin_runs_insert(bin, bin->runcur);
+		if (bin->runcur->nfree > 0) {
+			arena_bin_runs_insert(bin, iealloc(tsdn, bin->runcur),
+			    bin->runcur);
+		}
 		bin->runcur = run;
 		if (config_stats)
 			bin->stats.reruns++;
 	} else
-		arena_bin_runs_insert(bin, run);
+		arena_bin_runs_insert(bin, extent, run);
 }
 
 static void
@@ -2676,7 +2765,7 @@ arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
 		arena_dissociate_bin_run(extent, run, bin);
 		arena_dalloc_bin_run(tsdn, arena, chunk, extent, run, bin);
 	} else if (run->nfree == 1 && run != bin->runcur)
-		arena_bin_lower_run(arena, chunk, run, bin);
+		arena_bin_lower_run(tsdn, arena, extent, run, bin);
 
 	if (config_stats) {
 		bin->stats.ndalloc++;
diff --git a/src/chunk.c b/src/chunk.c
index c1094ff4..ba9084e3 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -674,8 +674,6 @@ chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 {
 
 	assert(extent_addr_get(extent) != NULL);
-	assert(CHUNK_ADDR2BASE(extent_addr_get(extent)) ==
-	    extent_addr_get(extent));
 	assert(extent_size_get(extent) != 0);
 	assert((extent_size_get(extent) & chunksize_mask) == 0);
 
@@ -701,8 +699,6 @@ chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 {
 
 	assert(extent_addr_get(extent) != NULL);
-	assert(CHUNK_ADDR2BASE(extent_addr_get(extent)) ==
-	    extent_addr_get(extent));
 	assert(extent_size_get(extent) != 0);
 	assert((extent_size_get(extent) & chunksize_mask) == 0);
 
@@ -777,7 +773,6 @@ chunk_purge_default(void *chunk, size_t size, size_t offset, size_t length,
 {
 
 	assert(chunk != NULL);
-	assert(CHUNK_ADDR2BASE(chunk) == chunk);
 	assert((offset & PAGE_MASK) == 0);
 	assert(length != 0);
 	assert((length & PAGE_MASK) == 0);
diff --git a/src/chunk_dss.c b/src/chunk_dss.c
index 6b90c53e..d0fae7bc 100644
--- a/src/chunk_dss.c
+++ b/src/chunk_dss.c
@@ -108,8 +108,8 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			 * Calculate how much padding is necessary to
 			 * chunk-align the end of the DSS.
 			 */
-			gap_size = (chunksize - CHUNK_ADDR2OFFSET(dss_max)) &
-			    chunksize_mask;
+			gap_size = (chunksize - ALIGNMENT_ADDR2OFFSET(dss_max,
+			    chunksize)) & chunksize_mask;
 			/*
 			 * Compute how much chunk-aligned pad space (if any) is
 			 * necessary to satisfy alignment.  This space can be

From 4731cd47f7168e70809d83c8569f501891ecffa1 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 26 May 2016 22:12:38 -0700
Subject: [PATCH 0285/2608] Allow chunks to not be naturally aligned.

Precisely size extents for huge size classes that aren't multiples of
chunksize.
---
 include/jemalloc/internal/arena.h             |   2 -
 include/jemalloc/internal/extent.h            |  16 ++
 .../jemalloc/internal/jemalloc_internal.h.in  |  22 +--
 include/jemalloc/internal/private_symbols.txt |   3 +-
 src/arena.c                                   |  41 +----
 src/base.c                                    |   2 +-
 src/chunk.c                                   |  57 ++-----
 src/chunk_dss.c                               |  68 ++++----
 src/chunk_mmap.c                              |   1 -
 src/huge.c                                    | 145 +++---------------
 test/integration/chunk.c                      |  16 --
 11 files changed, 105 insertions(+), 268 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index fac6fd34..c298df92 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -486,8 +486,6 @@ void	arena_chunk_cache_maybe_remove(arena_t *arena, extent_t *extent,
 extent_t	*arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena,
     size_t usize, size_t alignment, bool *zero);
 void	arena_chunk_dalloc_huge(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
-void	arena_chunk_ralloc_huge_similar(tsdn_t *tsdn, arena_t *arena,
-    extent_t *extent, size_t oldsize);
 void	arena_chunk_ralloc_huge_shrink(tsdn_t *tsdn, arena_t *arena,
     extent_t *extent, size_t oldsize);
 void	arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena,
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index c3bdacb4..919b74f6 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -90,6 +90,8 @@ ph_proto(, extent_heap_, extent_heap_t, extent_t)
 arena_t	*extent_arena_get(const extent_t *extent);
 void	*extent_addr_get(const extent_t *extent);
 size_t	extent_size_get(const extent_t *extent);
+void	*extent_before_get(const extent_t *extent);
+void	*extent_last_get(const extent_t *extent);
 void	*extent_past_get(const extent_t *extent);
 bool	extent_active_get(const extent_t *extent);
 bool	extent_dirty_get(const extent_t *extent);
@@ -137,6 +139,20 @@ extent_size_get(const extent_t *extent)
 	return (extent->e_size);
 }
 
+JEMALLOC_INLINE void *
+extent_before_get(const extent_t *extent)
+{
+
+	return ((void *)(uintptr_t)extent->e_addr - PAGE);
+}
+
+JEMALLOC_INLINE void *
+extent_last_get(const extent_t *extent)
+{
+
+	return ((void *)(uintptr_t)extent->e_addr + extent->e_size - PAGE);
+}
+
 JEMALLOC_INLINE void *
 extent_past_get(const extent_t *extent)
 {
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 7afe5694..ef4e0522 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -797,14 +797,14 @@ sa2u(size_t size, size_t alignment)
 			return (usize);
 	}
 
-	/* Try for a large size class. */
-	if (likely(size <= large_maxclass) && likely(alignment < chunksize)) {
-		/*
-		 * We can't achieve subpage alignment, so round up alignment
-		 * to the minimum that can actually be supported.
-		 */
-		alignment = PAGE_CEILING(alignment);
+	/*
+	 * We can't achieve subpage alignment, so round up alignment to the
+	 * minimum that can actually be supported.
+	 */
+	alignment = PAGE_CEILING(alignment);
 
+	/* Try for a large size class. */
+	if (likely(size <= large_maxclass) && likely(alignment == PAGE)) {
 		/* Make sure result is a large size class. */
 		usize = (size <= LARGE_MINCLASS) ? LARGE_MINCLASS : s2u(size);
 
@@ -821,12 +821,6 @@ sa2u(size_t size, size_t alignment)
 	if (unlikely(alignment > HUGE_MAXCLASS))
 		return (0);
 
-	/*
-	 * We can't achieve subchunk alignment, so round up alignment to the
-	 * minimum that can actually be supported.
-	 */
-	alignment = CHUNK_CEILING(alignment);
-
 	/* Make sure result is a huge size class. */
 	if (size <= chunksize)
 		usize = chunksize;
@@ -839,7 +833,7 @@ sa2u(size_t size, size_t alignment)
 	}
 
 	/*
-	 * Calculate the multi-chunk mapping that huge_palloc() would need in
+	 * Calculate the multi-page mapping that huge_palloc() would need in
 	 * order to guarantee the alignment.
 	 */
 	if (usize + alignment < usize) {
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index c4466013..44a2cd31 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -19,7 +19,6 @@ arena_chunk_cache_maybe_remove
 arena_chunk_dalloc_huge
 arena_chunk_ralloc_huge_expand
 arena_chunk_ralloc_huge_shrink
-arena_chunk_ralloc_huge_similar
 arena_cleanup
 arena_dalloc
 arena_dalloc_bin
@@ -211,6 +210,7 @@ extent_addr_set
 extent_alloc
 extent_arena_get
 extent_arena_set
+extent_before_get
 extent_committed_get
 extent_committed_set
 extent_dalloc
@@ -219,6 +219,7 @@ extent_dirty_insert
 extent_dirty_remove
 extent_dirty_set
 extent_init
+extent_last_get
 extent_past_get
 extent_prof_tctx_get
 extent_prof_tctx_set
diff --git a/src/arena.c b/src/arena.c
index 39764c3f..aff11961 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -653,7 +653,7 @@ arena_chunk_alloc_internal_hard(tsdn_t *tsdn, arena_t *arena,
 	malloc_mutex_unlock(tsdn, &arena->lock);
 
 	extent = chunk_alloc_wrapper(tsdn, arena, chunk_hooks, NULL, chunksize,
-	    chunksize, zero, commit, true);
+	    PAGE, zero, commit, true);
 	if (extent != NULL && !*commit) {
 		/* Commit header. */
 		if (chunk_commit_wrapper(tsdn, arena, chunk_hooks, extent, 0,
@@ -676,7 +676,7 @@ arena_chunk_alloc_internal(tsdn_t *tsdn, arena_t *arena, bool *zero,
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 
 	extent = arena_chunk_cache_alloc_locked(tsdn, arena, &chunk_hooks, NULL,
-	    chunksize, chunksize, zero, true);
+	    chunksize, PAGE, zero, true);
 	if (extent != NULL)
 		*commit = true;
 	if (extent == NULL) {
@@ -892,13 +892,12 @@ arena_huge_ralloc_stats_update(arena_t *arena, size_t oldsize, size_t usize)
 
 static extent_t *
 arena_chunk_alloc_huge_hard(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, size_t usize, size_t alignment, bool *zero,
-    size_t csize)
+    chunk_hooks_t *chunk_hooks, size_t usize, size_t alignment, bool *zero)
 {
 	extent_t *extent;
 	bool commit = true;
 
-	extent = chunk_alloc_wrapper(tsdn, arena, chunk_hooks, NULL, csize,
+	extent = chunk_alloc_wrapper(tsdn, arena, chunk_hooks, NULL, usize,
 	    alignment, zero, &commit, false);
 	if (extent == NULL) {
 		/* Revert optimistic stats updates. */
@@ -920,7 +919,6 @@ arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize,
 {
 	extent_t *extent;
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
-	size_t csize = CHUNK_CEILING(usize);
 
 	malloc_mutex_lock(tsdn, &arena->lock);
 
@@ -932,11 +930,11 @@ arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	arena_nactive_add(arena, usize >> LG_PAGE);
 
 	extent = arena_chunk_cache_alloc_locked(tsdn, arena, &chunk_hooks, NULL,
-	    csize, alignment, zero, false);
+	    usize, alignment, zero, false);
 	malloc_mutex_unlock(tsdn, &arena->lock);
 	if (extent == NULL) {
 		extent = arena_chunk_alloc_huge_hard(tsdn, arena, &chunk_hooks,
-		    usize, alignment, zero, csize);
+		    usize, alignment, zero);
 	}
 
 	return (extent);
@@ -954,32 +952,10 @@ arena_chunk_dalloc_huge(tsdn_t *tsdn, arena_t *arena, extent_t *extent)
 	}
 	arena_nactive_sub(arena, extent_size_get(extent) >> LG_PAGE);
 
-	if ((extent_size_get(extent) & chunksize_mask) != 0)
-		extent_size_set(extent, CHUNK_CEILING(extent_size_get(extent)));
-
 	arena_chunk_cache_dalloc_locked(tsdn, arena, &chunk_hooks, extent);
 	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
-void
-arena_chunk_ralloc_huge_similar(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
-    size_t oldsize)
-{
-	size_t usize = extent_size_get(extent);
-
-	assert(CHUNK_CEILING(oldsize) == CHUNK_CEILING(usize));
-	assert(oldsize != usize);
-
-	malloc_mutex_lock(tsdn, &arena->lock);
-	if (config_stats)
-		arena_huge_ralloc_stats_update(arena, oldsize, usize);
-	if (oldsize < usize)
-		arena_nactive_add(arena, (usize - oldsize) >> LG_PAGE);
-	else
-		arena_nactive_sub(arena, (oldsize - usize) >> LG_PAGE);
-	malloc_mutex_unlock(tsdn, &arena->lock);
-}
-
 void
 arena_chunk_ralloc_huge_shrink(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
     size_t oldsize)
@@ -1501,8 +1477,7 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			zero = false;
 			extent = arena_chunk_cache_alloc_locked(tsdn, arena,
 			    chunk_hooks, extent_addr_get(chunkselm),
-			    extent_size_get(chunkselm), chunksize, &zero,
-			    false);
+			    extent_size_get(chunkselm), PAGE, &zero, false);
 			assert(extent == chunkselm);
 			assert(zero == extent_zeroed_get(chunkselm));
 			extent_dirty_insert(chunkselm, purge_runs_sentinel,
@@ -2641,7 +2616,7 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 		if (likely(usize <= large_maxclass)) {
 			ret = arena_palloc_large(tsdn, arena, usize, alignment,
 			    zero);
-		} else if (likely(alignment <= chunksize))
+		} else if (likely(alignment <= PAGE))
 			ret = huge_malloc(tsdn, arena, usize, zero);
 		else
 			ret = huge_palloc(tsdn, arena, usize, alignment, zero);
diff --git a/src/base.c b/src/base.c
index 225f522b..1e32d955 100644
--- a/src/base.c
+++ b/src/base.c
@@ -57,7 +57,7 @@ base_chunk_alloc(tsdn_t *tsdn, size_t minsize)
 	{
 		bool zero = true;
 		bool commit = true;
-		addr = chunk_alloc_mmap(NULL, csize, chunksize, &zero, &commit);
+		addr = chunk_alloc_mmap(NULL, csize, PAGE, &zero, &commit);
 	}
 	if (addr == NULL) {
 		if (extent != NULL)
diff --git a/src/chunk.c b/src/chunk.c
index ba9084e3..2b599610 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -58,8 +58,7 @@ static void	chunk_record(tsdn_t *tsdn, arena_t *arena,
 static void
 extent_heaps_insert(extent_heap_t extent_heaps[NPSIZES], extent_t *extent)
 {
-	size_t psz =
-	    extent_size_quantize_floor(CHUNK_CEILING(extent_size_get(extent)));
+	size_t psz = extent_size_quantize_floor(extent_size_get(extent));
 	pszind_t pind = psz2ind(psz);
 	extent_heap_insert(&extent_heaps[pind], extent);
 }
@@ -67,8 +66,7 @@ extent_heaps_insert(extent_heap_t extent_heaps[NPSIZES], extent_t *extent)
 static void
 extent_heaps_remove(extent_heap_t extent_heaps[NPSIZES], extent_t *extent)
 {
-	size_t psz =
-	    extent_size_quantize_floor(CHUNK_CEILING(extent_size_get(extent)));
+	size_t psz = extent_size_quantize_floor(extent_size_get(extent));
 	pszind_t pind = psz2ind(psz);
 	extent_heap_remove(&extent_heaps[pind], extent);
 }
@@ -169,11 +167,9 @@ extent_rtree_acquire(tsdn_t *tsdn, const extent_t *extent, bool dependent,
 	assert(*r_elm_a != NULL);
 
 	if (extent_size_get(extent) > PAGE) {
-		uintptr_t last =
-		    (CHUNK_CEILING((uintptr_t)extent_past_get(extent)) - PAGE);
-
-		*r_elm_b = rtree_elm_acquire(tsdn, &chunks_rtree, last,
-		    dependent, init_missing);
+		*r_elm_b = rtree_elm_acquire(tsdn, &chunks_rtree,
+		    (uintptr_t)extent_last_get(extent), dependent,
+		    init_missing);
 		if (!dependent && *r_elm_b == NULL)
 			return (true);
 		assert(*r_elm_b != NULL);
@@ -290,8 +286,6 @@ chunk_first_best_fit(arena_t *arena, extent_heap_t extent_heaps[NPSIZES],
 {
 	pszind_t pind, i;
 
-	assert(size == CHUNK_CEILING(size));
-
 	pind = psz2ind(extent_size_quantize_ceil(size));
 	for (i = pind; i < NPSIZES; i++) {
 		extent_t *extent = extent_heap_first(&extent_heaps[i]);
@@ -326,9 +320,7 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	extent_t *extent;
 	size_t alloc_size, leadsize, trailsize;
 
-	assert(new_addr == NULL || alignment == chunksize);
-
-	alloc_size = CHUNK_CEILING(s2u(size + alignment - chunksize));
+	alloc_size = s2u(size + alignment - PAGE);
 	/* Beware size_t wrap-around. */
 	if (alloc_size < size)
 		return (NULL);
@@ -441,9 +433,7 @@ chunk_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 	void *ret;
 
 	assert(size != 0);
-	assert((size & chunksize_mask) == 0);
 	assert(alignment != 0);
-	assert((alignment & chunksize_mask) == 0);
 
 	/* "primary" dss. */
 	if (have_dss && dss_prec == dss_prec_primary && (ret =
@@ -472,9 +462,7 @@ chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	bool commit;
 
 	assert(size != 0);
-	assert((size & chunksize_mask) == 0);
 	assert(alignment != 0);
-	assert((alignment & chunksize_mask) == 0);
 
 	commit = true;
 	extent = chunk_recycle(tsdn, arena, chunk_hooks, arena->chunks_cached,
@@ -525,9 +513,7 @@ chunk_alloc_retained(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	extent_t *extent;
 
 	assert(size != 0);
-	assert((size & chunksize_mask) == 0);
 	assert(alignment != 0);
-	assert((alignment & chunksize_mask) == 0);
 
 	extent = chunk_recycle(tsdn, arena, chunk_hooks, arena->chunks_retained,
 	    false, new_addr, size, alignment, zero, commit, slab);
@@ -551,8 +537,10 @@ chunk_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 		return (NULL);
 	addr = chunk_hooks->alloc(new_addr, size, alignment, zero, commit,
 	    arena->ind);
-	if (addr == NULL)
+	if (addr == NULL) {
+		extent_dalloc(tsdn, arena, extent);
 		return (NULL);
+	}
 	extent_init(extent, arena, addr, size, true, false, zero, commit, slab);
 	if (chunk_register(tsdn, extent)) {
 		chunk_leak(tsdn, arena, chunk_hooks, false, extent);
@@ -585,9 +573,6 @@ static bool
 chunk_can_coalesce(const extent_t *a, const extent_t *b)
 {
 
-	assert((void *)CHUNK_CEILING((uintptr_t)extent_past_get(a)) ==
-	    extent_addr_get(b));
-
 	if (extent_arena_get(a) != extent_arena_get(b))
 		return (false);
 	if (extent_active_get(a) != extent_active_get(b))
@@ -637,7 +622,6 @@ chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	malloc_mutex_lock(tsdn, &arena->chunks_mtx);
 	chunk_hooks_assure_initialized_locked(tsdn, arena, chunk_hooks);
 
-	assert((extent_size_get(extent) & chunksize_mask) == 0);
 	extent_active_set(extent, false);
 	extent_zeroed_set(extent, !cache && extent_zeroed_get(extent));
 	if (extent_slab_get(extent)) {
@@ -651,7 +635,7 @@ chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 
 	/* Try to coalesce forward. */
 	next = rtree_read(tsdn, &chunks_rtree,
-	    CHUNK_CEILING((uintptr_t)extent_past_get(extent)), false);
+	    (uintptr_t)extent_past_get(extent), false);
 	if (next != NULL) {
 		chunk_try_coalesce(tsdn, arena, chunk_hooks, extent, next,
 		    extent_heaps, cache);
@@ -659,7 +643,7 @@ chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 
 	/* Try to coalesce backward. */
 	prev = rtree_read(tsdn, &chunks_rtree,
-	    (uintptr_t)extent_addr_get(extent) - PAGE, false);
+	    (uintptr_t)extent_before_get(extent), false);
 	if (prev != NULL) {
 		chunk_try_coalesce(tsdn, arena, chunk_hooks, prev, extent,
 		    extent_heaps, cache);
@@ -675,7 +659,6 @@ chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 
 	assert(extent_addr_get(extent) != NULL);
 	assert(extent_size_get(extent) != 0);
-	assert((extent_size_get(extent) & chunksize_mask) == 0);
 
 	extent_zeroed_set(extent, false);
 
@@ -700,7 +683,6 @@ chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 
 	assert(extent_addr_get(extent) != NULL);
 	assert(extent_size_get(extent) != 0);
-	assert((extent_size_get(extent) & chunksize_mask) == 0);
 
 	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
 	/* Try to deallocate. */
@@ -788,8 +770,7 @@ chunk_purge_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 
 	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
 	return (chunk_hooks->purge(extent_addr_get(extent),
-	    CHUNK_CEILING(extent_size_get(extent)), offset, length,
-	    arena->ind));
+	    extent_size_get(extent), offset, length, arena->ind));
 }
 
 static bool
@@ -809,9 +790,7 @@ chunk_split_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	extent_t *trail;
 	rtree_elm_t *lead_elm_a, *lead_elm_b, *trail_elm_a, *trail_elm_b;
 
-	assert(CHUNK_CEILING(size_a) == size_a);
-	assert(CHUNK_CEILING(extent_size_get(extent)) == size_a +
-	    CHUNK_CEILING(size_b));
+	assert(extent_size_get(extent) == size_a + size_b);
 
 	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
 
@@ -833,16 +812,15 @@ chunk_split_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	}
 
 	extent_init(trail, arena, (void *)((uintptr_t)extent_addr_get(extent) +
-	    size_a), CHUNK_CEILING(size_b), extent_active_get(extent),
+	    size_a), size_b, extent_active_get(extent),
 	    extent_dirty_get(extent), extent_zeroed_get(extent),
 	    extent_committed_get(extent), extent_slab_get(extent));
 	if (extent_rtree_acquire(tsdn, trail, false, true, &trail_elm_a,
 	    &trail_elm_b))
 		goto label_error_c;
 
-	if (chunk_hooks->split(extent_addr_get(extent), size_a +
-	    CHUNK_CEILING(size_b), size_a, CHUNK_CEILING(size_b),
-	    extent_committed_get(extent), arena->ind))
+	if (chunk_hooks->split(extent_addr_get(extent), size_a + size_b, size_a,
+	    size_b, extent_committed_get(extent), arena->ind))
 		goto label_error_d;
 
 	extent_size_set(extent, size_a);
@@ -886,9 +864,6 @@ chunk_merge_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 {
 	rtree_elm_t *a_elm_a, *a_elm_b, *b_elm_a, *b_elm_b;
 
-	assert((extent_size_get(a) & chunksize_mask) == 0);
-	assert((extent_size_get(b) & chunksize_mask) == 0);
-
 	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
 	if (chunk_hooks->merge(extent_addr_get(a), extent_size_get(a),
 	    extent_addr_get(b), extent_size_get(b), extent_committed_get(a),
diff --git a/src/chunk_dss.c b/src/chunk_dss.c
index d0fae7bc..0119c12b 100644
--- a/src/chunk_dss.c
+++ b/src/chunk_dss.c
@@ -69,9 +69,12 @@ void *
 chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit)
 {
+	void *ret;
+	extent_t *pad;
+
 	cassert(have_dss);
-	assert(size > 0 && (size & chunksize_mask) == 0);
-	assert(alignment > 0 && (alignment & chunksize_mask) == 0);
+	assert(size > 0);
+	assert(alignment > 0);
 
 	/*
 	 * sbrk() uses a signed increment argument, so take care not to
@@ -80,19 +83,22 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 	if ((intptr_t)size < 0)
 		return (NULL);
 
+	pad = extent_alloc(tsdn, arena);
+	if (pad == NULL)
+		return (NULL);
+
 	malloc_mutex_lock(tsdn, &dss_mtx);
 	if (dss_prev != (void *)-1) {
-
 		/*
 		 * The loop is necessary to recover from races with other
 		 * threads that are using the DSS for something other than
 		 * malloc.
 		 */
-		do {
-			void *ret, *cpad_addr, *dss_next;
-			extent_t *cpad;
-			size_t gap_size, cpad_size;
+		while (true) {
+			void *pad_addr, *dss_next;
+			size_t pad_size;
 			intptr_t incr;
+
 			/* Avoid an unnecessary system call. */
 			if (new_addr != NULL && dss_max != new_addr)
 				break;
@@ -105,58 +111,48 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 				break;
 
 			/*
-			 * Calculate how much padding is necessary to
-			 * chunk-align the end of the DSS.
+			 * Compute how much pad space (if any) is necessary to
+			 * satisfy alignment.  This space can be recycled for
+			 * later use.
 			 */
-			gap_size = (chunksize - ALIGNMENT_ADDR2OFFSET(dss_max,
-			    chunksize)) & chunksize_mask;
-			/*
-			 * Compute how much chunk-aligned pad space (if any) is
-			 * necessary to satisfy alignment.  This space can be
-			 * recycled for later use.
-			 */
-			cpad_addr = (void *)((uintptr_t)dss_max + gap_size);
+			pad_addr = (void *)((uintptr_t)dss_max);
 			ret = (void *)ALIGNMENT_CEILING((uintptr_t)dss_max,
 			    alignment);
-			cpad_size = (uintptr_t)ret - (uintptr_t)cpad_addr;
-			if (cpad_size != 0) {
-				cpad = extent_alloc(tsdn, arena);
-				if (cpad == NULL) {
-					malloc_mutex_unlock(tsdn, &dss_mtx);
-					return (NULL);
-				}
-				extent_init(cpad, arena, cpad_addr, cpad_size,
+			pad_size = (uintptr_t)ret - (uintptr_t)pad_addr;
+			if (pad_size != 0) {
+				extent_init(pad, arena, pad_addr, pad_size,
 				    false, true, false, true, false);
 			}
 			dss_next = (void *)((uintptr_t)ret + size);
 			if ((uintptr_t)ret < (uintptr_t)dss_max ||
-			    (uintptr_t)dss_next < (uintptr_t)dss_max) {
-				/* Wrap-around. */
-				malloc_mutex_unlock(tsdn, &dss_mtx);
-				return (NULL);
-			}
-			incr = gap_size + cpad_size + size;
+			    (uintptr_t)dss_next < (uintptr_t)dss_max)
+				break; /* Wrap-around. */
+			incr = pad_size + size;
 			dss_prev = chunk_dss_sbrk(incr);
+			if (dss_prev == (void *)-1)
+				break;
 			if (dss_prev == dss_max) {
 				/* Success. */
 				dss_max = dss_next;
 				malloc_mutex_unlock(tsdn, &dss_mtx);
-				if (cpad_size != 0) {
+				if (pad_size != 0) {
 					chunk_hooks_t chunk_hooks =
 					    CHUNK_HOOKS_INITIALIZER;
 					chunk_dalloc_wrapper(tsdn, arena,
-					    &chunk_hooks, cpad);
-				}
+					    &chunk_hooks, pad);
+				} else
+					extent_dalloc(tsdn, arena, pad);
 				if (*zero)
 					memset(ret, 0, size);
 				if (!*commit)
 					*commit = pages_decommit(ret, size);
 				return (ret);
 			}
-		} while (dss_prev != (void *)-1);
+		}
 	}
+	/* OOM. */
 	malloc_mutex_unlock(tsdn, &dss_mtx);
-
+	extent_dalloc(tsdn, arena, pad);
 	return (NULL);
 }
 
diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c
index 13708027..e1ee26f4 100644
--- a/src/chunk_mmap.c
+++ b/src/chunk_mmap.c
@@ -50,7 +50,6 @@ chunk_alloc_mmap(void *new_addr, size_t size, size_t alignment, bool *zero,
 	 */
 
 	assert(alignment != 0);
-	assert((alignment & chunksize_mask) == 0);
 
 	ret = pages_map(new_addr, size, commit);
 	if (ret == NULL || ret == new_addr)
diff --git a/src/huge.c b/src/huge.c
index 31d3bcae..69cf034a 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -9,7 +9,7 @@ huge_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero)
 
 	assert(usize == s2u(usize));
 
-	return (huge_palloc(tsdn, arena, usize, chunksize, zero));
+	return (huge_palloc(tsdn, arena, usize, PAGE, zero));
 }
 
 void *
@@ -20,14 +20,11 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	extent_t *extent;
 	bool is_zeroed;
 
-	/* Allocate one or more contiguous chunks for this request. */
-
 	assert(!tsdn_null(tsdn) || arena != NULL);
 
 	ausize = sa2u(usize, alignment);
 	if (unlikely(ausize == 0 || ausize > HUGE_MAXCLASS))
 		return (NULL);
-	assert(ausize >= chunksize);
 
 	/*
 	 * Copy zero into is_zeroed and pass the copy to chunk_alloc(), so that
@@ -40,9 +37,6 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	    arena, usize, alignment, &is_zeroed)) == NULL)
 		return (NULL);
 
-	if (usize < extent_size_get(extent))
-		extent_size_set(extent, usize);
-
 	/* Insert extent into huge. */
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	ql_elm_new(extent, ql_link);
@@ -86,80 +80,20 @@ huge_dalloc_junk(tsdn_t *tsdn, void *ptr, size_t usize)
 huge_dalloc_junk_t *huge_dalloc_junk = JEMALLOC_N(huge_dalloc_junk_impl);
 #endif
 
-static void
-huge_ralloc_no_move_similar(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
-    size_t usize_max, bool zero)
-{
-	size_t usize, usize_next;
-	arena_t *arena = extent_arena_get(extent);
-	size_t oldsize = extent_size_get(extent);
-	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
-	bool pre_zeroed, post_zeroed;
-
-	/* Increase usize to incorporate extra. */
-	for (usize = usize_min; usize < usize_max && (usize_next = s2u(usize+1))
-	    <= oldsize; usize = usize_next)
-		; /* Do nothing. */
-
-	if (oldsize == usize)
-		return;
-
-	pre_zeroed = extent_zeroed_get(extent);
-
-	/* Fill if necessary (shrinking). */
-	if (oldsize > usize) {
-		size_t sdiff = oldsize - usize;
-		if (config_fill && unlikely(opt_junk_free)) {
-			memset((void *)((uintptr_t)extent_addr_get(extent) +
-			    usize), JEMALLOC_FREE_JUNK, sdiff);
-			post_zeroed = false;
-		} else {
-			post_zeroed = !chunk_purge_wrapper(tsdn, arena,
-			    &chunk_hooks, extent, usize, sdiff);
-		}
-	} else
-		post_zeroed = pre_zeroed;
-
-	/* Update the size of the huge allocation. */
-	assert(extent_size_get(extent) != usize);
-	malloc_mutex_lock(tsdn, &arena->huge_mtx);
-	extent_size_set(extent, usize);
-	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
-	/* Update zeroed. */
-	extent_zeroed_set(extent, post_zeroed);
-
-	arena_chunk_ralloc_huge_similar(tsdn, arena, extent, oldsize);
-
-	/* Fill if necessary (growing). */
-	if (oldsize < usize) {
-		if (zero || (config_fill && unlikely(opt_zero))) {
-			if (!pre_zeroed) {
-				memset((void *)
-				    ((uintptr_t)extent_addr_get(extent) +
-				    oldsize), 0, usize - oldsize);
-			}
-		} else if (config_fill && unlikely(opt_junk_alloc)) {
-			memset((void *)((uintptr_t)extent_addr_get(extent) +
-			    oldsize), JEMALLOC_ALLOC_JUNK, usize - oldsize);
-		}
-	}
-}
-
 static bool
 huge_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize)
 {
 	arena_t *arena = extent_arena_get(extent);
 	size_t oldsize = extent_size_get(extent);
 	chunk_hooks_t chunk_hooks = chunk_hooks_get(tsdn, arena);
-	size_t cdiff = CHUNK_CEILING(oldsize) - CHUNK_CEILING(usize);
-	size_t sdiff = CHUNK_CEILING(usize) - usize;
+	size_t diff = oldsize - usize;
 
 	assert(oldsize > usize);
 
-	/* Split excess chunks. */
-	if (cdiff != 0) {
+	/* Split excess pages. */
+	if (diff != 0) {
 		extent_t *trail = chunk_split_wrapper(tsdn, arena, &chunk_hooks,
-		    extent, CHUNK_CEILING(usize), cdiff);
+		    extent, usize, diff);
 		if (trail == NULL)
 			return (true);
 
@@ -171,28 +105,6 @@ huge_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize)
 		arena_chunk_cache_dalloc(tsdn, arena, &chunk_hooks, trail);
 	}
 
-	/* Optionally fill trailing subchunk. */
-	if (sdiff != 0) {
-		bool post_zeroed;
-
-		if (config_fill && unlikely(opt_junk_free)) {
-			huge_dalloc_junk(tsdn,
-			    (void *)((uintptr_t)extent_addr_get(extent) +
-			    usize), sdiff);
-			post_zeroed = false;
-		} else {
-			post_zeroed = !chunk_purge_wrapper(tsdn, arena,
-			    &chunk_hooks, extent, usize, sdiff);
-
-			if (config_fill && unlikely(opt_zero) && !post_zeroed) {
-				memset((void *)
-				    ((uintptr_t)extent_addr_get(extent) +
-				    usize), 0, sdiff);
-			}
-		}
-		extent_zeroed_set(extent, post_zeroed);
-	}
-
 	arena_chunk_ralloc_huge_shrink(tsdn, arena, extent, oldsize);
 
 	return (false);
@@ -204,20 +116,18 @@ huge_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 {
 	arena_t *arena = extent_arena_get(extent);
 	size_t oldsize = extent_size_get(extent);
-	bool is_zeroed_subchunk = extent_zeroed_get(extent);
-	bool is_zeroed_chunk = false;
+	bool is_zeroed_trail = false;
 	chunk_hooks_t chunk_hooks = chunk_hooks_get(tsdn, arena);
-	size_t cdiff = CHUNK_CEILING(usize) - CHUNK_CEILING(oldsize);
-	void *nchunk =
-	    (void *)CHUNK_CEILING((uintptr_t)extent_past_get(extent));
+	size_t trailsize = usize - oldsize;
 	extent_t *trail;
 
-	if ((trail = arena_chunk_cache_alloc(tsdn, arena, &chunk_hooks, nchunk,
-	    cdiff, chunksize, &is_zeroed_chunk)) == NULL) {
+	if ((trail = arena_chunk_cache_alloc(tsdn, arena, &chunk_hooks,
+	    extent_past_get(extent), trailsize, PAGE, &is_zeroed_trail)) ==
+	    NULL) {
 		bool commit = true;
 		if ((trail = chunk_alloc_wrapper(tsdn, arena, &chunk_hooks,
-		    nchunk, cdiff, chunksize, &is_zeroed_chunk, &commit, false))
-		    == NULL)
+		    extent_past_get(extent), trailsize, PAGE, &is_zeroed_trail,
+		    &commit, false)) == NULL)
 			return (true);
 	}
 
@@ -227,23 +137,15 @@ huge_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 	}
 
 	if (zero || (config_fill && unlikely(opt_zero))) {
-		if (!is_zeroed_subchunk) {
+		if (!is_zeroed_trail) {
 			memset((void *)((uintptr_t)extent_addr_get(extent) +
-			    oldsize), 0, CHUNK_CEILING(oldsize) - oldsize);
-		}
-		if (!is_zeroed_chunk) {
-			memset((void *)((uintptr_t)extent_addr_get(extent) +
-			    CHUNK_CEILING(oldsize)), 0, usize -
-			    CHUNK_CEILING(oldsize));
+			    oldsize), 0, usize - oldsize);
 		}
 	} else if (config_fill && unlikely(opt_junk_alloc)) {
 		memset((void *)((uintptr_t)extent_addr_get(extent) + oldsize),
 		    JEMALLOC_ALLOC_JUNK, usize - oldsize);
 	}
 
-	if (usize < extent_size_get(extent))
-		extent_size_set(extent, usize);
-
 	arena_chunk_ralloc_huge_expand(tsdn, arena, extent, oldsize);
 
 	return (false);
@@ -260,7 +162,7 @@ huge_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
 	/* Both allocation sizes must be huge to avoid a move. */
 	assert(extent_size_get(extent) >= chunksize && usize_max >= chunksize);
 
-	if (CHUNK_CEILING(usize_max) > CHUNK_CEILING(extent_size_get(extent))) {
+	if (usize_max > extent_size_get(extent)) {
 		/* Attempt to expand the allocation in-place. */
 		if (!huge_ralloc_no_move_expand(tsdn, extent, usize_max,
 		    zero)) {
@@ -268,9 +170,9 @@ huge_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
 			return (false);
 		}
 		/* Try again, this time with usize_min. */
-		if (usize_min < usize_max && CHUNK_CEILING(usize_min) >
-		    CHUNK_CEILING(extent_size_get(extent)) &&
-		    huge_ralloc_no_move_expand(tsdn, extent, usize_min, zero)) {
+		if (usize_min < usize_max && usize_min > extent_size_get(extent)
+		    && huge_ralloc_no_move_expand(tsdn, extent, usize_min,
+		    zero)) {
 			arena_decay_tick(tsdn, extent_arena_get(extent));
 			return (false);
 		}
@@ -280,17 +182,14 @@ huge_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
 	 * Avoid moving the allocation if the existing chunk size accommodates
 	 * the new size.
 	 */
-	if (CHUNK_CEILING(extent_size_get(extent)) >= CHUNK_CEILING(usize_min)
-	    && CHUNK_CEILING(extent_size_get(extent)) <=
-	    CHUNK_CEILING(usize_max)) {
-		huge_ralloc_no_move_similar(tsdn, extent, usize_min, usize_max,
-		    zero);
+	if (extent_size_get(extent) >= usize_min && extent_size_get(extent) <=
+	    usize_max) {
 		arena_decay_tick(tsdn, extent_arena_get(extent));
 		return (false);
 	}
 
 	/* Attempt to shrink the allocation in-place. */
-	if (CHUNK_CEILING(extent_size_get(extent)) > CHUNK_CEILING(usize_max)) {
+	if (extent_size_get(extent) > usize_max) {
 		if (!huge_ralloc_no_move_shrink(tsdn, extent, usize_max)) {
 			arena_decay_tick(tsdn, extent_arena_get(extent));
 			return (false);
@@ -304,7 +203,7 @@ huge_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool zero)
 {
 
-	if (alignment <= chunksize)
+	if (alignment <= PAGE)
 		return (huge_malloc(tsdn, arena, usize, zero));
 	return (huge_palloc(tsdn, arena, usize, alignment, zero));
 }
diff --git a/test/integration/chunk.c b/test/integration/chunk.c
index ff9bf967..092472c6 100644
--- a/test/integration/chunk.c
+++ b/test/integration/chunk.c
@@ -224,22 +224,6 @@ TEST_BEGIN(test_chunk)
 	do_dalloc = true;
 	do_decommit = false;
 
-	/* Test purge for partial-chunk huge allocations. */
-	if (huge0 * 2 > huge2) {
-		/*
-		 * There are at least four size classes per doubling, so a
-		 * successful xallocx() from size=huge2 to size=huge1 is
-		 * guaranteed to leave trailing purgeable memory.
-		 */
-		p = mallocx(huge2, flags);
-		assert_ptr_not_null(p, "Unexpected mallocx() error");
-		did_purge = false;
-		assert_zu_eq(xallocx(p, huge1, 0, flags), huge1,
-		    "Unexpected xallocx() failure");
-		assert_true(did_purge, "Expected purge");
-		dallocx(p, flags);
-	}
-
 	/* Test decommit for large allocations. */
 	do_decommit = true;
 	p = mallocx(large1, flags);

From b46261d58b449cc4c099ed2384451a2499688f0e Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 27 May 2016 18:57:15 -0700
Subject: [PATCH 0286/2608] Implement cache-oblivious support for huge size
 classes.

---
 include/jemalloc/internal/arena.h             |  21 ++--
 include/jemalloc/internal/chunk.h             |   8 +-
 include/jemalloc/internal/extent.h            |  42 +++++++
 include/jemalloc/internal/private_symbols.txt |   4 +
 include/jemalloc/internal/prng.h              |  35 ++++--
 src/arena.c                                   | 107 +++++++++---------
 src/chunk.c                                   |  98 +++++++++-------
 src/ckh.c                                     |   5 +-
 src/huge.c                                    |  88 ++++++++------
 src/prof.c                                    |   2 +-
 src/tcache.c                                  |   4 +-
 test/unit/prng.c                              |  54 +++++++--
 12 files changed, 298 insertions(+), 170 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index c298df92..4d2b25a0 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -688,7 +688,7 @@ JEMALLOC_ALWAYS_INLINE size_t
 arena_miscelm_to_pageind(const extent_t *extent,
     const arena_chunk_map_misc_t *miscelm)
 {
-	arena_chunk_t *chunk = (arena_chunk_t *)extent_addr_get(extent);
+	arena_chunk_t *chunk = (arena_chunk_t *)extent_base_get(extent);
 	size_t pageind = ((uintptr_t)miscelm - ((uintptr_t)chunk +
 	    map_misc_offset)) / sizeof(arena_chunk_map_misc_t) + map_bias;
 
@@ -702,7 +702,7 @@ JEMALLOC_ALWAYS_INLINE void *
 arena_miscelm_to_rpages(const extent_t *extent,
     const arena_chunk_map_misc_t *miscelm)
 {
-	arena_chunk_t *chunk = (arena_chunk_t *)extent_addr_get(extent);
+	arena_chunk_t *chunk = (arena_chunk_t *)extent_base_get(extent);
 	size_t pageind = arena_miscelm_to_pageind(extent, miscelm);
 
 	return ((void *)((uintptr_t)chunk + (pageind << LG_PAGE)));
@@ -1065,7 +1065,7 @@ arena_ptr_small_binind_get(tsdn_t *tsdn, const void *ptr, size_t mapbits)
 		assert(binind != BININD_INVALID);
 		assert(binind < NBINS);
 		extent = iealloc(tsdn, ptr);
-		chunk = (arena_chunk_t *)extent_addr_get(extent);
+		chunk = (arena_chunk_t *)extent_base_get(extent);
 		arena = extent_arena_get(extent);
 		pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 		actual_mapbits = arena_mapbits_get(chunk, pageind);
@@ -1106,7 +1106,7 @@ arena_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, const void *ptr)
 	assert(ptr != NULL);
 
 	if (likely(extent_slab_get(extent))) {
-		arena_chunk_t *chunk = (arena_chunk_t *)extent_addr_get(extent);
+		arena_chunk_t *chunk = (arena_chunk_t *)extent_base_get(extent);
 		size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 		size_t mapbits = arena_mapbits_get(chunk, pageind);
 		assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
@@ -1132,7 +1132,7 @@ arena_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr,
 	assert(ptr != NULL);
 
 	if (likely(extent_slab_get(extent))) {
-		arena_chunk_t *chunk = (arena_chunk_t *)extent_addr_get(extent);
+		arena_chunk_t *chunk = (arena_chunk_t *)extent_base_get(extent);
 		size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 
 		assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
@@ -1168,8 +1168,9 @@ arena_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr,
 
 	if (unlikely(usize > SMALL_MAXCLASS || (ptr == old_ptr &&
 	    (uintptr_t)old_tctx > (uintptr_t)1U))) {
-		arena_chunk_t *chunk = (arena_chunk_t *)extent_addr_get(extent);
-		if (likely(chunk != ptr)) {
+		if (likely(extent_slab_get(extent))) {
+			arena_chunk_t *chunk =
+			    (arena_chunk_t *)extent_base_get(extent);
 			size_t pageind;
 			arena_chunk_map_misc_t *elm;
 
@@ -1253,7 +1254,7 @@ arena_salloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr, bool demote)
 
 	if (likely(extent_slab_get(extent))) {
 		const arena_chunk_t *chunk =
-		    (const arena_chunk_t *)extent_addr_get(extent);
+		    (const arena_chunk_t *)extent_base_get(extent);
 
 		pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 		assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
@@ -1302,7 +1303,7 @@ arena_dalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, tcache_t *tcache,
 	assert(ptr != NULL);
 
 	if (likely(extent_slab_get(extent))) {
-		arena_chunk_t *chunk = (arena_chunk_t *)extent_addr_get(extent);
+		arena_chunk_t *chunk = (arena_chunk_t *)extent_base_get(extent);
 
 		pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 		mapbits = arena_mapbits_get(chunk, pageind);
@@ -1349,7 +1350,7 @@ arena_sdalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t size,
 	assert(!tsdn_null(tsdn) || tcache == NULL);
 
 	if (likely(extent_slab_get(extent))) {
-		arena_chunk_t *chunk = (arena_chunk_t *)extent_addr_get(extent);
+		arena_chunk_t *chunk = (arena_chunk_t *)extent_base_get(extent);
 
 		if (config_prof && opt_prof) {
 			size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >>
diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index 0c0084b0..624073d9 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -45,11 +45,11 @@ chunk_hooks_t	chunk_hooks_set(tsdn_t *tsdn, arena_t *arena,
     const chunk_hooks_t *chunk_hooks);
 
 extent_t	*chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
-    bool *zero, bool slab);
+    chunk_hooks_t *chunk_hooks, void *new_addr, size_t usize, size_t pad,
+    size_t alignment, bool *zero, bool slab);
 extent_t	*chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
-    bool *zero, bool *commit, bool slab);
+    chunk_hooks_t *chunk_hooks, void *new_addr, size_t usize, size_t pad,
+    size_t alignment, bool *zero, bool *commit, bool slab);
 void	chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, extent_t *extent);
 void	chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 919b74f6..4023f82d 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -88,8 +88,10 @@ ph_proto(, extent_heap_, extent_heap_t, extent_t)
 
 #ifndef JEMALLOC_ENABLE_INLINE
 arena_t	*extent_arena_get(const extent_t *extent);
+void	*extent_base_get(const extent_t *extent);
 void	*extent_addr_get(const extent_t *extent);
 size_t	extent_size_get(const extent_t *extent);
+size_t	extent_usize_get(const extent_t *extent);
 void	*extent_before_get(const extent_t *extent);
 void	*extent_last_get(const extent_t *extent);
 void	*extent_past_get(const extent_t *extent);
@@ -102,6 +104,7 @@ bool	extent_slab_get(const extent_t *extent);
 prof_tctx_t	*extent_prof_tctx_get(const extent_t *extent);
 void	extent_arena_set(extent_t *extent, arena_t *arena);
 void	extent_addr_set(extent_t *extent, void *addr);
+void	extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment);
 void	extent_size_set(extent_t *extent, size_t size);
 void	extent_active_set(extent_t *extent, bool active);
 void	extent_dirty_set(extent_t *extent, bool dirty);
@@ -125,10 +128,21 @@ extent_arena_get(const extent_t *extent)
 	return (extent->e_arena);
 }
 
+JEMALLOC_INLINE void *
+extent_base_get(const extent_t *extent)
+{
+
+	assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) ||
+	    !extent->e_slab);
+	return (PAGE_ADDR2BASE(extent->e_addr));
+}
+
 JEMALLOC_INLINE void *
 extent_addr_get(const extent_t *extent)
 {
 
+	assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) ||
+	    !extent->e_slab);
 	return (extent->e_addr);
 }
 
@@ -139,6 +153,14 @@ extent_size_get(const extent_t *extent)
 	return (extent->e_size);
 }
 
+JEMALLOC_INLINE size_t
+extent_usize_get(const extent_t *extent)
+{
+
+	assert(!extent->e_slab);
+	return (extent->e_size - large_pad);
+}
+
 JEMALLOC_INLINE void *
 extent_before_get(const extent_t *extent)
 {
@@ -224,6 +246,24 @@ extent_addr_set(extent_t *extent, void *addr)
 	extent->e_addr = addr;
 }
 
+JEMALLOC_INLINE void
+extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment)
+{
+
+	assert(extent_base_get(extent) == extent_addr_get(extent));
+
+	if (alignment < PAGE) {
+		unsigned lg_range = LG_PAGE -
+		    lg_floor(CACHELINE_CEILING(alignment));
+		uint64_t r =
+		    prng_lg_range(&extent_arena_get(extent)->offset_state,
+		    lg_range, true);
+		uintptr_t random_offset = ((uintptr_t)r) << lg_range;
+		extent->e_addr = (void *)((uintptr_t)extent->e_addr +
+		    random_offset);
+	}
+}
+
 JEMALLOC_INLINE void
 extent_size_set(extent_t *extent, size_t size)
 {
@@ -278,6 +318,8 @@ extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
     bool active, bool dirty, bool zeroed, bool committed, bool slab)
 {
 
+	assert(addr == PAGE_ADDR2BASE(addr) || !slab);
+
 	extent_arena_set(extent, arena);
 	extent_addr_set(extent, addr);
 	extent_size_set(extent, size);
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 44a2cd31..75a1dace 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -206,10 +206,12 @@ dss_prec_names
 extent_active_get
 extent_active_set
 extent_addr_get
+extent_addr_randomize
 extent_addr_set
 extent_alloc
 extent_arena_get
 extent_arena_set
+extent_base_get
 extent_before_get
 extent_committed_get
 extent_committed_set
@@ -230,6 +232,7 @@ extent_size_quantize_ceil
 extent_size_quantize_floor
 extent_slab_get
 extent_slab_set
+extent_usize_get
 extent_zeroed_get
 extent_zeroed_set
 ffs_llu
@@ -373,6 +376,7 @@ pow2_ceil_u64
 pow2_ceil_zu
 prng_lg_range
 prng_range
+prng_state_next
 prof_active
 prof_active_get
 prof_active_get_unlocked
diff --git a/include/jemalloc/internal/prng.h b/include/jemalloc/internal/prng.h
index 5830f8b7..ebe916f8 100644
--- a/include/jemalloc/internal/prng.h
+++ b/include/jemalloc/internal/prng.h
@@ -35,28 +35,45 @@
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
-uint64_t	prng_lg_range(uint64_t *state, unsigned lg_range);
-uint64_t	prng_range(uint64_t *state, uint64_t range);
+uint64_t	prng_state_next(uint64_t state);
+uint64_t	prng_lg_range(uint64_t *state, unsigned lg_range, bool atomic);
+uint64_t	prng_range(uint64_t *state, uint64_t range, bool atomic);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PRNG_C_))
 JEMALLOC_ALWAYS_INLINE uint64_t
-prng_lg_range(uint64_t *state, unsigned lg_range)
+prng_state_next(uint64_t state)
 {
-	uint64_t ret;
+
+	return ((state * PRNG_A) + PRNG_C);
+}
+
+JEMALLOC_ALWAYS_INLINE uint64_t
+prng_lg_range(uint64_t *state, unsigned lg_range, bool atomic)
+{
+	uint64_t ret, state1;
 
 	assert(lg_range > 0);
 	assert(lg_range <= 64);
 
-	ret = (*state * PRNG_A) + PRNG_C;
-	*state = ret;
-	ret >>= (64 - lg_range);
+	if (atomic) {
+		uint64_t state0;
+
+		do {
+			state0 = atomic_read_uint64(state);
+			state1 = prng_state_next(state0);
+		} while (atomic_cas_uint64(state, state0, state1));
+	} else {
+		state1 = prng_state_next(*state);
+		*state = state1;
+	}
+	ret = state1 >> (64 - lg_range);
 
 	return (ret);
 }
 
 JEMALLOC_ALWAYS_INLINE uint64_t
-prng_range(uint64_t *state, uint64_t range)
+prng_range(uint64_t *state, uint64_t range, bool atomic)
 {
 	uint64_t ret;
 	unsigned lg_range;
@@ -68,7 +85,7 @@ prng_range(uint64_t *state, uint64_t range)
 
 	/* Generate a result in [0..range) via repeated trial. */
 	do {
-		ret = prng_lg_range(state, lg_range);
+		ret = prng_lg_range(state, lg_range, atomic);
 	} while (ret >= range);
 
 	return (ret);
diff --git a/src/arena.c b/src/arena.c
index aff11961..4ce55577 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -57,7 +57,7 @@ static void	arena_bin_lower_run(tsdn_t *tsdn, arena_t *arena,
 JEMALLOC_INLINE_C size_t
 arena_miscelm_size_get(extent_t *extent, const arena_chunk_map_misc_t *miscelm)
 {
-	arena_chunk_t *chunk = (arena_chunk_t *)extent_addr_get(extent);
+	arena_chunk_t *chunk = (arena_chunk_t *)extent_base_get(extent);
 	size_t pageind = arena_miscelm_to_pageind(extent, miscelm);
 	size_t mapbits = arena_mapbits_get(chunk, pageind);
 	return (arena_mapbits_size_decode(mapbits));
@@ -154,7 +154,7 @@ static void
 arena_avail_insert(arena_t *arena, extent_t *extent, size_t pageind,
     size_t npages)
 {
-	arena_chunk_t *chunk = (arena_chunk_t *)extent_addr_get(extent);
+	arena_chunk_t *chunk = (arena_chunk_t *)extent_base_get(extent);
 	pszind_t pind = psz2ind(run_quantize_floor(arena_miscelm_size_get(
 	    extent, arena_miscelm_get_const(chunk, pageind))));
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
@@ -167,7 +167,7 @@ static void
 arena_avail_remove(arena_t *arena, extent_t *extent, size_t pageind,
     size_t npages)
 {
-	arena_chunk_t *chunk = (arena_chunk_t *)extent_addr_get(extent);
+	arena_chunk_t *chunk = (arena_chunk_t *)extent_base_get(extent);
 	pszind_t pind = psz2ind(run_quantize_floor(arena_miscelm_size_get(
 	    extent, arena_miscelm_get_const(chunk, pageind))));
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
@@ -221,14 +221,14 @@ arena_chunk_dirty_npages(const extent_t *extent)
 
 static extent_t *
 arena_chunk_cache_alloc_locked(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
-    bool *zero, bool slab)
+    chunk_hooks_t *chunk_hooks, void *new_addr, size_t usize, size_t pad,
+    size_t alignment, bool *zero, bool slab)
 {
 
 	malloc_mutex_assert_owner(tsdn, &arena->lock);
 
-	return (chunk_alloc_cache(tsdn, arena, chunk_hooks, new_addr, size,
-	    alignment, zero, slab));
+	return (chunk_alloc_cache(tsdn, arena, chunk_hooks, new_addr, usize,
+	    pad, alignment, zero, slab));
 }
 
 extent_t *
@@ -240,7 +240,7 @@ arena_chunk_cache_alloc(tsdn_t *tsdn, arena_t *arena,
 
 	malloc_mutex_lock(tsdn, &arena->lock);
 	extent = arena_chunk_cache_alloc_locked(tsdn, arena, chunk_hooks,
-	    new_addr, size, alignment, zero, false);
+	    new_addr, size, 0, alignment, zero, false);
 	malloc_mutex_unlock(tsdn, &arena->lock);
 
 	return (extent);
@@ -388,7 +388,7 @@ JEMALLOC_INLINE_C void
 arena_run_reg_dalloc(tsdn_t *tsdn, arena_run_t *run, extent_t *extent,
     void *ptr)
 {
-	arena_chunk_t *chunk = (arena_chunk_t *)extent_addr_get(extent);
+	arena_chunk_t *chunk = (arena_chunk_t *)extent_base_get(extent);
 	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 	size_t mapbits = arena_mapbits_get(chunk, pageind);
 	szind_t binind = arena_ptr_small_binind_get(tsdn, ptr, mapbits);
@@ -460,7 +460,7 @@ static void
 arena_run_split_remove(arena_t *arena, extent_t *extent, size_t run_ind,
     size_t flag_dirty, size_t flag_decommitted, size_t need_pages)
 {
-	arena_chunk_t *chunk = (arena_chunk_t *)extent_addr_get(extent);
+	arena_chunk_t *chunk = (arena_chunk_t *)extent_base_get(extent);
 	size_t total_pages, rem_pages;
 
 	assert(flag_dirty == 0 || flag_decommitted == 0);
@@ -509,7 +509,7 @@ arena_run_split_large_helper(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 	size_t flag_dirty, flag_decommitted, run_ind, need_pages;
 	size_t flag_unzeroed_mask;
 
-	chunk = (arena_chunk_t *)extent_addr_get(extent);
+	chunk = (arena_chunk_t *)extent_base_get(extent);
 	miscelm = arena_run_to_miscelm(extent, run);
 	run_ind = arena_miscelm_to_pageind(extent, miscelm);
 	flag_dirty = arena_mapbits_dirty_get(chunk, run_ind);
@@ -592,7 +592,7 @@ arena_run_split_small(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 
 	assert(binind != BININD_INVALID);
 
-	chunk = (arena_chunk_t *)extent_addr_get(extent);
+	chunk = (arena_chunk_t *)extent_base_get(extent);
 	miscelm = arena_run_to_miscelm(extent, run);
 	run_ind = arena_miscelm_to_pageind(extent, miscelm);
 	flag_dirty = arena_mapbits_dirty_get(chunk, run_ind);
@@ -629,16 +629,16 @@ arena_chunk_init_spare(arena_t *arena)
 	arena->spare = NULL;
 
 	assert(arena_mapbits_allocated_get((arena_chunk_t *)
-	    extent_addr_get(extent), map_bias) == 0);
+	    extent_base_get(extent), map_bias) == 0);
 	assert(arena_mapbits_allocated_get((arena_chunk_t *)
-	    extent_addr_get(extent), chunk_npages-1) == 0);
+	    extent_base_get(extent), chunk_npages-1) == 0);
 	assert(arena_mapbits_unallocated_size_get((arena_chunk_t *)
-	    extent_addr_get(extent), map_bias) == arena_maxrun);
+	    extent_base_get(extent), map_bias) == arena_maxrun);
 	assert(arena_mapbits_unallocated_size_get((arena_chunk_t *)
-	    extent_addr_get(extent), chunk_npages-1) == arena_maxrun);
+	    extent_base_get(extent), chunk_npages-1) == arena_maxrun);
 	assert(arena_mapbits_dirty_get((arena_chunk_t *)
-	    extent_addr_get(extent), map_bias) ==
-	    arena_mapbits_dirty_get((arena_chunk_t *)extent_addr_get(extent),
+	    extent_base_get(extent), map_bias) ==
+	    arena_mapbits_dirty_get((arena_chunk_t *)extent_base_get(extent),
 	    chunk_npages-1));
 
 	return (extent);
@@ -653,7 +653,7 @@ arena_chunk_alloc_internal_hard(tsdn_t *tsdn, arena_t *arena,
 	malloc_mutex_unlock(tsdn, &arena->lock);
 
 	extent = chunk_alloc_wrapper(tsdn, arena, chunk_hooks, NULL, chunksize,
-	    PAGE, zero, commit, true);
+	    0, CACHELINE, zero, commit, true);
 	if (extent != NULL && !*commit) {
 		/* Commit header. */
 		if (chunk_commit_wrapper(tsdn, arena, chunk_hooks, extent, 0,
@@ -676,7 +676,7 @@ arena_chunk_alloc_internal(tsdn_t *tsdn, arena_t *arena, bool *zero,
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 
 	extent = arena_chunk_cache_alloc_locked(tsdn, arena, &chunk_hooks, NULL,
-	    chunksize, PAGE, zero, true);
+	    chunksize, 0, CACHELINE, zero, true);
 	if (extent != NULL)
 		*commit = true;
 	if (extent == NULL) {
@@ -717,7 +717,7 @@ arena_chunk_init_hard(tsdn_t *tsdn, arena_t *arena)
 	 */
 	flag_unzeroed = (zero || !commit) ? 0 : CHUNK_MAP_UNZEROED;
 	flag_decommitted = commit ? 0 : CHUNK_MAP_DECOMMITTED;
-	arena_mapbits_unallocated_set((arena_chunk_t *)extent_addr_get(extent),
+	arena_mapbits_unallocated_set((arena_chunk_t *)extent_base_get(extent),
 	    map_bias, arena_maxrun, flag_unzeroed | flag_decommitted);
 	/*
 	 * There is no need to initialize the internal page map entries unless
@@ -726,18 +726,18 @@ arena_chunk_init_hard(tsdn_t *tsdn, arena_t *arena)
 	if (!zero) {
 		for (i = map_bias+1; i < chunk_npages-1; i++) {
 			arena_mapbits_internal_set((arena_chunk_t *)
-			    extent_addr_get(extent), i, flag_unzeroed);
+			    extent_base_get(extent), i, flag_unzeroed);
 		}
 	} else {
 		if (config_debug) {
 			for (i = map_bias+1; i < chunk_npages-1; i++) {
 				assert(arena_mapbits_unzeroed_get(
-				    (arena_chunk_t *)extent_addr_get(extent), i)
+				    (arena_chunk_t *)extent_base_get(extent), i)
 				    == flag_unzeroed);
 			}
 		}
 	}
-	arena_mapbits_unallocated_set((arena_chunk_t *)extent_addr_get(extent),
+	arena_mapbits_unallocated_set((arena_chunk_t *)extent_base_get(extent),
 	    chunk_npages-1, arena_maxrun, flag_unzeroed);
 
 	return (extent);
@@ -770,7 +770,7 @@ arena_chunk_discard(tsdn_t *tsdn, arena_t *arena, extent_t *extent)
 
 	extent_committed_set(extent,
 	    (arena_mapbits_decommitted_get((arena_chunk_t *)
-	    extent_addr_get(extent), map_bias) == 0));
+	    extent_base_get(extent), map_bias) == 0));
 	if (!extent_committed_get(extent)) {
 		/*
 		 * Decommit the header.  Mark the chunk as decommitted even if
@@ -796,10 +796,10 @@ arena_spare_discard(tsdn_t *tsdn, arena_t *arena, extent_t *spare)
 
 	assert(arena->spare != spare);
 
-	if (arena_mapbits_dirty_get((arena_chunk_t *)extent_addr_get(spare),
+	if (arena_mapbits_dirty_get((arena_chunk_t *)extent_base_get(spare),
 	    map_bias) != 0) {
 		arena_run_dirty_remove(arena, (arena_chunk_t *)
-		    extent_addr_get(spare), map_bias, chunk_npages-map_bias);
+		    extent_base_get(spare), map_bias, chunk_npages-map_bias);
 	}
 
 	arena_chunk_discard(tsdn, arena, spare);
@@ -808,7 +808,7 @@ arena_spare_discard(tsdn_t *tsdn, arena_t *arena, extent_t *spare)
 static void
 arena_chunk_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent)
 {
-	arena_chunk_t *chunk = (arena_chunk_t *)extent_addr_get(extent);
+	arena_chunk_t *chunk = (arena_chunk_t *)extent_base_get(extent);
 	extent_t *spare;
 
 	assert(arena_mapbits_allocated_get(chunk, map_bias) == 0);
@@ -898,7 +898,7 @@ arena_chunk_alloc_huge_hard(tsdn_t *tsdn, arena_t *arena,
 	bool commit = true;
 
 	extent = chunk_alloc_wrapper(tsdn, arena, chunk_hooks, NULL, usize,
-	    alignment, zero, &commit, false);
+	    large_pad, alignment, zero, &commit, false);
 	if (extent == NULL) {
 		/* Revert optimistic stats updates. */
 		malloc_mutex_lock(tsdn, &arena->lock);
@@ -930,7 +930,7 @@ arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	arena_nactive_add(arena, usize >> LG_PAGE);
 
 	extent = arena_chunk_cache_alloc_locked(tsdn, arena, &chunk_hooks, NULL,
-	    usize, alignment, zero, false);
+	    usize, large_pad, alignment, zero, false);
 	malloc_mutex_unlock(tsdn, &arena->lock);
 	if (extent == NULL) {
 		extent = arena_chunk_alloc_huge_hard(tsdn, arena, &chunk_hooks,
@@ -1046,7 +1046,7 @@ arena_run_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t size, bool zero)
 	extent = arena_chunk_alloc(tsdn, arena);
 	if (extent != NULL) {
 		run = &arena_miscelm_get_mutable((arena_chunk_t *)
-		    extent_addr_get(extent), map_bias)->run;
+		    extent_base_get(extent), map_bias)->run;
 		if (arena_run_split_large(tsdn, arena, iealloc(tsdn, run), run,
 		    size, zero))
 			run = NULL;
@@ -1095,7 +1095,7 @@ arena_run_alloc_small(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t binind)
 	extent = arena_chunk_alloc(tsdn, arena);
 	if (extent != NULL) {
 		run = &arena_miscelm_get_mutable(
-		    (arena_chunk_t *)extent_addr_get(extent), map_bias)->run;
+		    (arena_chunk_t *)extent_base_get(extent), map_bias)->run;
 		if (arena_run_split_small(tsdn, arena, iealloc(tsdn, run), run,
 		    size, binind))
 			run = NULL;
@@ -1161,7 +1161,7 @@ arena_decay_deadline_init(arena_t *arena)
 		nstime_t jitter;
 
 		nstime_init(&jitter, prng_range(&arena->decay_jitter_state,
-		    nstime_ns(&arena->decay_interval)));
+		    nstime_ns(&arena->decay_interval), false));
 		nstime_add(&arena->decay_deadline, &jitter);
 	}
 }
@@ -1428,7 +1428,7 @@ arena_dirty_count(tsdn_t *tsdn, arena_t *arena)
 		} else {
 			extent_t *extent = iealloc(tsdn, rdelm);
 			arena_chunk_t *chunk =
-			    (arena_chunk_t *)extent_addr_get(extent);
+			    (arena_chunk_t *)extent_base_get(extent);
 			arena_chunk_map_misc_t *miscelm =
 			    arena_rd_to_miscelm(extent, rdelm);
 			size_t pageind = arena_miscelm_to_pageind(extent,
@@ -1476,8 +1476,9 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			/* Allocate. */
 			zero = false;
 			extent = arena_chunk_cache_alloc_locked(tsdn, arena,
-			    chunk_hooks, extent_addr_get(chunkselm),
-			    extent_size_get(chunkselm), PAGE, &zero, false);
+			    chunk_hooks, extent_base_get(chunkselm),
+			    extent_size_get(chunkselm), 0, CACHELINE, &zero,
+			    false);
 			assert(extent == chunkselm);
 			assert(zero == extent_zeroed_get(chunkselm));
 			extent_dirty_insert(chunkselm, purge_runs_sentinel,
@@ -1494,7 +1495,7 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			arena_run_t *run = &miscelm->run;
 			size_t run_size =
 			    arena_mapbits_unallocated_size_get((arena_chunk_t *)
-			    extent_addr_get(extent), pageind);
+			    extent_base_get(extent), pageind);
 
 			npages = run_size >> LG_PAGE;
 			if (opt_purge == purge_mode_decay && arena->ndirty -
@@ -1503,9 +1504,9 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 
 			assert(pageind + npages <= chunk_npages);
 			assert(arena_mapbits_dirty_get((arena_chunk_t *)
-			    extent_addr_get(extent), pageind) ==
+			    extent_base_get(extent), pageind) ==
 			    arena_mapbits_dirty_get((arena_chunk_t *)
-			    extent_addr_get(extent), pageind+npages-1));
+			    extent_base_get(extent), pageind+npages-1));
 
 			/*
 			 * If purging the spare chunk's run, make it available
@@ -1572,7 +1573,7 @@ arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			bool decommitted;
 			extent_t *extent = iealloc(tsdn, rdelm);
 			arena_chunk_t *chunk =
-			    (arena_chunk_t *)extent_addr_get(extent);
+			    (arena_chunk_t *)extent_base_get(extent);
 			arena_chunk_map_misc_t *miscelm =
 			    arena_rd_to_miscelm(extent, rdelm);
 			pageind = arena_miscelm_to_pageind(extent, miscelm);
@@ -1653,7 +1654,7 @@ arena_unstash_purged(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		} else {
 			extent_t *extent = iealloc(tsdn, rdelm);
 			arena_chunk_t *chunk =
-			    (arena_chunk_t *)extent_addr_get(extent);
+			    (arena_chunk_t *)extent_base_get(extent);
 			arena_chunk_map_misc_t *miscelm =
 			    arena_rd_to_miscelm(extent, rdelm);
 			size_t pageind = arena_miscelm_to_pageind(extent,
@@ -1734,7 +1735,7 @@ arena_purge(tsdn_t *tsdn, arena_t *arena, bool all)
 static void
 arena_achunk_prof_reset(tsd_t *tsd, arena_t *arena, extent_t *extent)
 {
-	arena_chunk_t *chunk = (arena_chunk_t *)extent_addr_get(extent);
+	arena_chunk_t *chunk = (arena_chunk_t *)extent_base_get(extent);
 	size_t pageind, npages;
 
 	cassert(config_prof);
@@ -1809,7 +1810,7 @@ arena_reset(tsd_t *tsd, arena_t *arena)
 	malloc_mutex_lock(tsd_tsdn(tsd), &arena->huge_mtx);
 	for (extent = ql_last(&arena->huge, ql_link); extent != NULL; extent =
 	    ql_last(&arena->huge, ql_link)) {
-		void *ptr = extent_addr_get(extent);
+		void *ptr = extent_base_get(extent);
 		size_t usize;
 
 		malloc_mutex_unlock(tsd_tsdn(tsd), &arena->huge_mtx);
@@ -1882,7 +1883,7 @@ arena_run_coalesce(arena_t *arena, extent_t *extent, size_t *p_size,
     size_t *p_run_ind, size_t *p_run_pages, size_t flag_dirty,
     size_t flag_decommitted)
 {
-	arena_chunk_t *chunk = (arena_chunk_t *)extent_addr_get(extent);
+	arena_chunk_t *chunk = (arena_chunk_t *)extent_base_get(extent);
 	size_t size = *p_size;
 	size_t run_ind = *p_run_ind;
 	size_t run_pages = *p_run_pages;
@@ -2000,7 +2001,7 @@ arena_run_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 	arena_chunk_map_misc_t *miscelm;
 	size_t size, run_ind, run_pages, flag_dirty, flag_decommitted;
 
-	chunk = (arena_chunk_t *)extent_addr_get(extent);
+	chunk = (arena_chunk_t *)extent_base_get(extent);
 	miscelm = arena_run_to_miscelm(extent, run);
 	run_ind = arena_miscelm_to_pageind(extent, miscelm);
 	assert(run_ind >= map_bias);
@@ -2260,7 +2261,7 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin)
 			 * were just deallocated from the run.
 			 */
 			extent = iealloc(tsdn, run);
-			chunk = (arena_chunk_t *)extent_addr_get(extent);
+			chunk = (arena_chunk_t *)extent_base_get(extent);
 			if (run->nfree == bin_info->nregs) {
 				arena_dalloc_bin_run(tsdn, arena, chunk, extent,
 				    run, bin);
@@ -2425,7 +2426,6 @@ arena_malloc_large(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero)
 
 	/* Large allocation. */
 	usize = index2size(binind);
-	malloc_mutex_lock(tsdn, &arena->lock);
 	if (config_cache_oblivious) {
 		uint64_t r;
 
@@ -2434,10 +2434,12 @@ arena_malloc_large(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero)
 		 * that is a multiple of the cacheline size, e.g. [0 .. 63) * 64
 		 * for 4 KiB pages and 64-byte cachelines.
 		 */
-		r = prng_lg_range(&arena->offset_state, LG_PAGE - LG_CACHELINE);
+		r = prng_lg_range(&arena->offset_state, LG_PAGE - LG_CACHELINE,
+		    true);
 		random_offset = ((uintptr_t)r) << LG_CACHELINE;
 	} else
 		random_offset = 0;
+	malloc_mutex_lock(tsdn, &arena->lock);
 	run = arena_run_alloc_large(tsdn, arena, usize + large_pad, zero);
 	if (run == NULL) {
 		malloc_mutex_unlock(tsdn, &arena->lock);
@@ -2526,7 +2528,7 @@ arena_palloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 		return (NULL);
 	}
 	extent = iealloc(tsdn, run);
-	chunk = (arena_chunk_t *)extent_addr_get(extent);
+	chunk = (arena_chunk_t *)extent_base_get(extent);
 	miscelm = arena_run_to_miscelm(extent, run);
 	rpages = arena_miscelm_to_rpages(extent, miscelm);
 
@@ -2616,7 +2618,7 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 		if (likely(usize <= large_maxclass)) {
 			ret = arena_palloc_large(tsdn, arena, usize, alignment,
 			    zero);
-		} else if (likely(alignment <= PAGE))
+		} else if (likely(alignment <= CACHELINE))
 			ret = huge_malloc(tsdn, arena, usize, zero);
 		else
 			ret = huge_palloc(tsdn, arena, usize, alignment, zero);
@@ -2634,12 +2636,11 @@ arena_prof_promoted(tsdn_t *tsdn, const extent_t *extent, const void *ptr,
 
 	cassert(config_prof);
 	assert(ptr != NULL);
-	assert(extent_addr_get(extent) != ptr);
 	assert(isalloc(tsdn, extent, ptr, false) == LARGE_MINCLASS);
 	assert(isalloc(tsdn, extent, ptr, true) == LARGE_MINCLASS);
 	assert(size <= SMALL_MAXCLASS);
 
-	chunk = (arena_chunk_t *)extent_addr_get(extent);
+	chunk = (arena_chunk_t *)extent_base_get(extent);
 	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 	binind = size2index(size);
 	assert(binind < NBINS);
@@ -3030,7 +3031,7 @@ arena_ralloc_large(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize,
 		return (false);
 	}
 
-	chunk = (arena_chunk_t *)extent_addr_get(extent);
+	chunk = (arena_chunk_t *)extent_base_get(extent);
 	arena = extent_arena_get(extent);
 
 	if (oldsize < usize_max) {
diff --git a/src/chunk.c b/src/chunk.c
index 2b599610..4b213a90 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -161,7 +161,7 @@ extent_rtree_acquire(tsdn_t *tsdn, const extent_t *extent, bool dependent,
 {
 
 	*r_elm_a = rtree_elm_acquire(tsdn, &chunks_rtree,
-	    (uintptr_t)extent_addr_get(extent), dependent, init_missing);
+	    (uintptr_t)extent_base_get(extent), dependent, init_missing);
 	if (!dependent && *r_elm_a == NULL)
 		return (true);
 	assert(*r_elm_a != NULL);
@@ -207,7 +207,7 @@ chunk_interior_register(tsdn_t *tsdn, const extent_t *extent)
 
 	for (i = 1; i < (extent_size_get(extent) >> LG_PAGE) - 1; i++) {
 		rtree_write(tsdn, &chunks_rtree,
-		    (uintptr_t)extent_addr_get(extent) + (uintptr_t)(i <<
+		    (uintptr_t)extent_base_get(extent) + (uintptr_t)(i <<
 		    LG_PAGE), extent);
 	}
 }
@@ -252,7 +252,7 @@ chunk_interior_deregister(tsdn_t *tsdn, const extent_t *extent)
 
 	for (i = 1; i < (extent_size_get(extent) >> LG_PAGE) - 1; i++) {
 		rtree_clear(tsdn, &chunks_rtree,
-		    (uintptr_t)extent_addr_get(extent) + (uintptr_t)(i <<
+		    (uintptr_t)extent_base_get(extent) + (uintptr_t)(i <<
 		    LG_PAGE));
 	}
 }
@@ -315,14 +315,19 @@ chunk_leak(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, bool cache,
 static extent_t *
 chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     extent_heap_t extent_heaps[NPSIZES], bool cache, void *new_addr,
-    size_t size, size_t alignment, bool *zero, bool *commit, bool slab)
+    size_t usize, size_t pad, size_t alignment, bool *zero, bool *commit,
+    bool slab)
 {
 	extent_t *extent;
-	size_t alloc_size, leadsize, trailsize;
+	size_t size, alloc_size, leadsize, trailsize;
 
-	alloc_size = s2u(size + alignment - PAGE);
+	assert(new_addr == NULL || !slab);
+	assert(pad == 0 || !slab);
+
+	size = usize + pad;
+	alloc_size = s2u(size + PAGE_CEILING(alignment) - PAGE);
 	/* Beware size_t wrap-around. */
-	if (alloc_size < size)
+	if (alloc_size < usize)
 		return (NULL);
 	malloc_mutex_lock(tsdn, &arena->chunks_mtx);
 	chunk_hooks_assure_initialized_locked(tsdn, arena, chunk_hooks);
@@ -350,8 +355,8 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	extent_heaps_remove(extent_heaps, extent);
 	arena_chunk_cache_maybe_remove(arena, extent, cache);
 
-	leadsize = ALIGNMENT_CEILING((uintptr_t)extent_addr_get(extent),
-	    alignment) - (uintptr_t)extent_addr_get(extent);
+	leadsize = ALIGNMENT_CEILING((uintptr_t)extent_base_get(extent),
+	    PAGE_CEILING(alignment)) - (uintptr_t)extent_base_get(extent);
 	assert(new_addr == NULL || leadsize == 0);
 	assert(extent_size_get(extent) >= leadsize + size);
 	trailsize = extent_size_get(extent) - leadsize - size;
@@ -388,7 +393,7 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	}
 
 	if (!extent_committed_get(extent) &&
-	    chunk_hooks->commit(extent_addr_get(extent),
+	    chunk_hooks->commit(extent_base_get(extent),
 	    extent_size_get(extent), 0, extent_size_get(extent), arena->ind)) {
 		malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 		chunk_record(tsdn, arena, chunk_hooks, extent_heaps, cache,
@@ -396,6 +401,8 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		return (NULL);
 	}
 
+	if (pad != 0)
+		extent_addr_randomize(tsdn, extent, alignment);
 	extent_active_set(extent, true);
 	if (slab) {
 		extent_slab_set(extent, slab);
@@ -407,13 +414,13 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	if (*zero) {
 		if (!extent_zeroed_get(extent)) {
 			memset(extent_addr_get(extent), 0,
-			    extent_size_get(extent));
+			    extent_usize_get(extent));
 		} else if (config_debug) {
 			size_t i;
 			size_t *p = (size_t *)(uintptr_t)
 			    extent_addr_get(extent);
 
-			for (i = 0; i < size / sizeof(size_t); i++)
+			for (i = 0; i < usize / sizeof(size_t); i++)
 				assert(p[i] == 0);
 		}
 	}
@@ -456,17 +463,18 @@ chunk_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 
 extent_t *
 chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *new_addr, size_t size, size_t alignment, bool *zero, bool slab)
+    void *new_addr, size_t usize, size_t pad, size_t alignment, bool *zero,
+    bool slab)
 {
 	extent_t *extent;
 	bool commit;
 
-	assert(size != 0);
+	assert(usize + pad != 0);
 	assert(alignment != 0);
 
 	commit = true;
 	extent = chunk_recycle(tsdn, arena, chunk_hooks, arena->chunks_cached,
-	    true, new_addr, size, alignment, zero, &commit, slab);
+	    true, new_addr, usize, pad, alignment, zero, &commit, slab);
 	if (extent == NULL)
 		return (NULL);
 	assert(commit);
@@ -507,31 +515,34 @@ chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero,
 
 static extent_t *
 chunk_alloc_retained(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit,
-    bool slab)
+    void *new_addr, size_t usize, size_t pad, size_t alignment, bool *zero,
+    bool *commit, bool slab)
 {
 	extent_t *extent;
 
-	assert(size != 0);
+	assert(usize != 0);
 	assert(alignment != 0);
 
 	extent = chunk_recycle(tsdn, arena, chunk_hooks, arena->chunks_retained,
-	    false, new_addr, size, alignment, zero, commit, slab);
-
-	if (config_stats && extent != NULL)
+	    false, new_addr, usize, pad, alignment, zero, commit, slab);
+	if (extent != NULL && config_stats) {
+		size_t size = usize + pad;
 		arena->stats.retained -= size;
+	}
 
 	return (extent);
 }
 
 static extent_t *
 chunk_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
-    bool *zero, bool *commit, bool slab)
+    chunk_hooks_t *chunk_hooks, void *new_addr, size_t usize, size_t pad,
+    size_t alignment, bool *zero, bool *commit, bool slab)
 {
 	extent_t *extent;
+	size_t size;
 	void *addr;
 
+	size = usize + pad;
 	extent = extent_alloc(tsdn, arena);
 	if (extent == NULL)
 		return (NULL);
@@ -542,6 +553,8 @@ chunk_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 		return (NULL);
 	}
 	extent_init(extent, arena, addr, size, true, false, zero, commit, slab);
+	if (pad != 0)
+		extent_addr_randomize(tsdn, extent, alignment);
 	if (chunk_register(tsdn, extent)) {
 		chunk_leak(tsdn, arena, chunk_hooks, false, extent);
 		return (NULL);
@@ -552,18 +565,18 @@ chunk_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 
 extent_t *
 chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit,
-    bool slab)
+    void *new_addr, size_t usize, size_t pad, size_t alignment, bool *zero,
+    bool *commit, bool slab)
 {
 	extent_t *extent;
 
 	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
 
-	extent = chunk_alloc_retained(tsdn, arena, chunk_hooks, new_addr, size,
-	    alignment, zero, commit, slab);
+	extent = chunk_alloc_retained(tsdn, arena, chunk_hooks, new_addr, usize,
+	    pad, alignment, zero, commit, slab);
 	if (extent == NULL) {
 		extent = chunk_alloc_wrapper_hard(tsdn, arena, chunk_hooks,
-		    new_addr, size, alignment, zero, commit, slab);
+		    new_addr, usize, pad, alignment, zero, commit, slab);
 	}
 
 	return (extent);
@@ -629,7 +642,7 @@ chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		extent_slab_set(extent, false);
 	}
 
-	assert(chunk_lookup(tsdn, extent_addr_get(extent), true) == extent);
+	assert(chunk_lookup(tsdn, extent_base_get(extent), true) == extent);
 	extent_heaps_insert(extent_heaps, extent);
 	arena_chunk_cache_maybe_insert(arena, extent, cache);
 
@@ -657,9 +670,10 @@ chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     extent_t *extent)
 {
 
-	assert(extent_addr_get(extent) != NULL);
+	assert(extent_base_get(extent) != NULL);
 	assert(extent_size_get(extent) != 0);
 
+	extent_addr_set(extent, extent_base_get(extent));
 	extent_zeroed_set(extent, false);
 
 	chunk_record(tsdn, arena, chunk_hooks, arena->chunks_cached, true,
@@ -681,12 +695,14 @@ chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     extent_t *extent)
 {
 
-	assert(extent_addr_get(extent) != NULL);
+	assert(extent_base_get(extent) != NULL);
 	assert(extent_size_get(extent) != 0);
 
+	extent_addr_set(extent, extent_base_get(extent));
+
 	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
 	/* Try to deallocate. */
-	if (!chunk_hooks->dalloc(extent_addr_get(extent),
+	if (!chunk_hooks->dalloc(extent_base_get(extent),
 	    extent_size_get(extent), extent_committed_get(extent),
 	    arena->ind)) {
 		chunk_deregister(tsdn, extent);
@@ -696,12 +712,12 @@ chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	/* Try to decommit; purge if that fails. */
 	if (extent_committed_get(extent)) {
 		extent_committed_set(extent,
-		    chunk_hooks->decommit(extent_addr_get(extent),
+		    chunk_hooks->decommit(extent_base_get(extent),
 		    extent_size_get(extent), 0, extent_size_get(extent),
 		    arena->ind));
 	}
 	extent_zeroed_set(extent, !extent_committed_get(extent) ||
-	    !chunk_hooks->purge(extent_addr_get(extent),
+	    !chunk_hooks->purge(extent_base_get(extent),
 	    extent_size_get(extent), 0, extent_size_get(extent), arena->ind));
 
 	if (config_stats)
@@ -726,7 +742,7 @@ chunk_commit_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 {
 
 	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
-	return (chunk_hooks->commit(extent_addr_get(extent),
+	return (chunk_hooks->commit(extent_base_get(extent),
 	    extent_size_get(extent), offset, length, arena->ind));
 }
 
@@ -745,7 +761,7 @@ chunk_decommit_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 {
 
 	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
-	return (chunk_hooks->decommit(extent_addr_get(extent),
+	return (chunk_hooks->decommit(extent_base_get(extent),
 	    extent_size_get(extent), offset, length, arena->ind));
 }
 
@@ -769,7 +785,7 @@ chunk_purge_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 {
 
 	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
-	return (chunk_hooks->purge(extent_addr_get(extent),
+	return (chunk_hooks->purge(extent_base_get(extent),
 	    extent_size_get(extent), offset, length, arena->ind));
 }
 
@@ -811,7 +827,7 @@ chunk_split_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			goto label_error_b;
 	}
 
-	extent_init(trail, arena, (void *)((uintptr_t)extent_addr_get(extent) +
+	extent_init(trail, arena, (void *)((uintptr_t)extent_base_get(extent) +
 	    size_a), size_b, extent_active_get(extent),
 	    extent_dirty_get(extent), extent_zeroed_get(extent),
 	    extent_committed_get(extent), extent_slab_get(extent));
@@ -819,7 +835,7 @@ chunk_split_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	    &trail_elm_b))
 		goto label_error_c;
 
-	if (chunk_hooks->split(extent_addr_get(extent), size_a + size_b, size_a,
+	if (chunk_hooks->split(extent_base_get(extent), size_a + size_b, size_a,
 	    size_b, extent_committed_get(extent), arena->ind))
 		goto label_error_d;
 
@@ -865,8 +881,8 @@ chunk_merge_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	rtree_elm_t *a_elm_a, *a_elm_b, *b_elm_a, *b_elm_b;
 
 	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
-	if (chunk_hooks->merge(extent_addr_get(a), extent_size_get(a),
-	    extent_addr_get(b), extent_size_get(b), extent_committed_get(a),
+	if (chunk_hooks->merge(extent_base_get(a), extent_size_get(a),
+	    extent_base_get(b), extent_size_get(b), extent_committed_get(a),
 	    arena->ind))
 		return (true);
 
diff --git a/src/ckh.c b/src/ckh.c
index 2c120ac8..5ec0f60a 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -99,7 +99,8 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
 	 * Cycle through the cells in the bucket, starting at a random position.
 	 * The randomness avoids worst-case search overhead as buckets fill up.
 	 */
-	offset = (unsigned)prng_lg_range(&ckh->prng_state, LG_CKH_BUCKET_CELLS);
+	offset = (unsigned)prng_lg_range(&ckh->prng_state, LG_CKH_BUCKET_CELLS,
+	    false);
 	for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
 		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) +
 		    ((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))];
@@ -142,7 +143,7 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
 		 * bucket.
 		 */
 		i = (unsigned)prng_lg_range(&ckh->prng_state,
-		    LG_CKH_BUCKET_CELLS);
+		    LG_CKH_BUCKET_CELLS, false);
 		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
 		assert(cell->key != NULL);
 
diff --git a/src/huge.c b/src/huge.c
index 69cf034a..b00be904 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -9,7 +9,7 @@ huge_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero)
 
 	assert(usize == s2u(usize));
 
-	return (huge_palloc(tsdn, arena, usize, PAGE, zero));
+	return (huge_palloc(tsdn, arena, usize, CACHELINE, zero));
 }
 
 void *
@@ -46,11 +46,11 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	if (zero || (config_fill && unlikely(opt_zero))) {
 		if (!is_zeroed) {
 			memset(extent_addr_get(extent), 0,
-			    extent_size_get(extent));
+			    extent_usize_get(extent));
 		}
 	} else if (config_fill && unlikely(opt_junk_alloc)) {
 		memset(extent_addr_get(extent), JEMALLOC_ALLOC_JUNK,
-		    extent_size_get(extent));
+		    extent_usize_get(extent));
 	}
 
 	arena_decay_tick(tsdn, arena);
@@ -84,28 +84,28 @@ static bool
 huge_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize)
 {
 	arena_t *arena = extent_arena_get(extent);
-	size_t oldsize = extent_size_get(extent);
+	size_t oldusize = extent_usize_get(extent);
 	chunk_hooks_t chunk_hooks = chunk_hooks_get(tsdn, arena);
-	size_t diff = oldsize - usize;
+	size_t diff = extent_size_get(extent) - (usize + large_pad);
 
-	assert(oldsize > usize);
+	assert(oldusize > usize);
 
 	/* Split excess pages. */
 	if (diff != 0) {
 		extent_t *trail = chunk_split_wrapper(tsdn, arena, &chunk_hooks,
-		    extent, usize, diff);
+		    extent, usize + large_pad, diff);
 		if (trail == NULL)
 			return (true);
 
 		if (config_fill && unlikely(opt_junk_free)) {
 			huge_dalloc_junk(tsdn, extent_addr_get(trail),
-			    extent_size_get(trail));
+			    extent_usize_get(trail));
 		}
 
 		arena_chunk_cache_dalloc(tsdn, arena, &chunk_hooks, trail);
 	}
 
-	arena_chunk_ralloc_huge_shrink(tsdn, arena, extent, oldsize);
+	arena_chunk_ralloc_huge_shrink(tsdn, arena, extent, oldusize);
 
 	return (false);
 }
@@ -115,19 +115,19 @@ huge_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
     bool zero)
 {
 	arena_t *arena = extent_arena_get(extent);
-	size_t oldsize = extent_size_get(extent);
+	size_t oldusize = extent_usize_get(extent);
 	bool is_zeroed_trail = false;
 	chunk_hooks_t chunk_hooks = chunk_hooks_get(tsdn, arena);
-	size_t trailsize = usize - oldsize;
+	size_t trailsize = usize - extent_usize_get(extent);
 	extent_t *trail;
 
 	if ((trail = arena_chunk_cache_alloc(tsdn, arena, &chunk_hooks,
-	    extent_past_get(extent), trailsize, PAGE, &is_zeroed_trail)) ==
-	    NULL) {
+	    extent_past_get(extent), trailsize, CACHELINE, &is_zeroed_trail))
+	    == NULL) {
 		bool commit = true;
 		if ((trail = chunk_alloc_wrapper(tsdn, arena, &chunk_hooks,
-		    extent_past_get(extent), trailsize, PAGE, &is_zeroed_trail,
-		    &commit, false)) == NULL)
+		    extent_past_get(extent), trailsize, 0, CACHELINE,
+		    &is_zeroed_trail, &commit, false)) == NULL)
 			return (true);
 	}
 
@@ -137,16 +137,32 @@ huge_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 	}
 
 	if (zero || (config_fill && unlikely(opt_zero))) {
+		if (config_cache_oblivious) {
+			/*
+			 * Zero the trailing bytes of the original allocation's
+			 * last page, since they are in an indeterminate state.
+			 * There will always be trailing bytes, because ptr's
+			 * offset from the beginning of the run is a multiple of
+			 * CACHELINE in [0 .. PAGE).
+			 */
+			void *zbase = (void *)
+			    ((uintptr_t)extent_addr_get(extent) + oldusize);
+			void *zpast = PAGE_ADDR2BASE((void *)((uintptr_t)zbase +
+			    PAGE));
+			size_t nzero = (uintptr_t)zpast - (uintptr_t)zbase;
+			assert(nzero > 0);
+			memset(zbase, 0, nzero);
+		}
 		if (!is_zeroed_trail) {
 			memset((void *)((uintptr_t)extent_addr_get(extent) +
-			    oldsize), 0, usize - oldsize);
+			    oldusize), 0, usize - oldusize);
 		}
 	} else if (config_fill && unlikely(opt_junk_alloc)) {
-		memset((void *)((uintptr_t)extent_addr_get(extent) + oldsize),
-		    JEMALLOC_ALLOC_JUNK, usize - oldsize);
+		memset((void *)((uintptr_t)extent_addr_get(extent) + oldusize),
+		    JEMALLOC_ALLOC_JUNK, usize - oldusize);
 	}
 
-	arena_chunk_ralloc_huge_expand(tsdn, arena, extent, oldsize);
+	arena_chunk_ralloc_huge_expand(tsdn, arena, extent, oldusize);
 
 	return (false);
 }
@@ -156,13 +172,13 @@ huge_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
     size_t usize_max, bool zero)
 {
 
-	assert(s2u(extent_size_get(extent)) == extent_size_get(extent));
+	assert(s2u(extent_usize_get(extent)) == extent_usize_get(extent));
 	/* The following should have been caught by callers. */
 	assert(usize_min > 0 && usize_max <= HUGE_MAXCLASS);
 	/* Both allocation sizes must be huge to avoid a move. */
-	assert(extent_size_get(extent) >= chunksize && usize_max >= chunksize);
+	assert(extent_usize_get(extent) >= chunksize && usize_max >= chunksize);
 
-	if (usize_max > extent_size_get(extent)) {
+	if (usize_max > extent_usize_get(extent)) {
 		/* Attempt to expand the allocation in-place. */
 		if (!huge_ralloc_no_move_expand(tsdn, extent, usize_max,
 		    zero)) {
@@ -170,9 +186,9 @@ huge_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
 			return (false);
 		}
 		/* Try again, this time with usize_min. */
-		if (usize_min < usize_max && usize_min > extent_size_get(extent)
-		    && huge_ralloc_no_move_expand(tsdn, extent, usize_min,
-		    zero)) {
+		if (usize_min < usize_max && usize_min >
+		    extent_usize_get(extent) && huge_ralloc_no_move_expand(tsdn,
+		    extent, usize_min, zero)) {
 			arena_decay_tick(tsdn, extent_arena_get(extent));
 			return (false);
 		}
@@ -182,14 +198,14 @@ huge_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
 	 * Avoid moving the allocation if the existing chunk size accommodates
 	 * the new size.
 	 */
-	if (extent_size_get(extent) >= usize_min && extent_size_get(extent) <=
+	if (extent_usize_get(extent) >= usize_min && extent_usize_get(extent) <=
 	    usize_max) {
 		arena_decay_tick(tsdn, extent_arena_get(extent));
 		return (false);
 	}
 
 	/* Attempt to shrink the allocation in-place. */
-	if (extent_size_get(extent) > usize_max) {
+	if (extent_usize_get(extent) > usize_max) {
 		if (!huge_ralloc_no_move_shrink(tsdn, extent, usize_max)) {
 			arena_decay_tick(tsdn, extent_arena_get(extent));
 			return (false);
@@ -203,7 +219,7 @@ huge_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool zero)
 {
 
-	if (alignment <= PAGE)
+	if (alignment <= CACHELINE)
 		return (huge_malloc(tsdn, arena, usize, zero));
 	return (huge_palloc(tsdn, arena, usize, alignment, zero));
 }
@@ -218,7 +234,7 @@ huge_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
 	/* The following should have been caught by callers. */
 	assert(usize > 0 && usize <= HUGE_MAXCLASS);
 	/* Both allocation sizes must be huge to avoid a move. */
-	assert(extent_size_get(extent) >= chunksize && usize >= chunksize);
+	assert(extent_usize_get(extent) >= chunksize && usize >= chunksize);
 
 	/* Try to avoid moving the allocation. */
 	if (!huge_ralloc_no_move(tsdn, extent, usize, usize, zero))
@@ -233,11 +249,11 @@ huge_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
 	if (ret == NULL)
 		return (NULL);
 
-	copysize = (usize < extent_size_get(extent)) ? usize :
-	    extent_size_get(extent);
+	copysize = (usize < extent_usize_get(extent)) ? usize :
+	    extent_usize_get(extent);
 	memcpy(ret, extent_addr_get(extent), copysize);
 	isdalloct(tsdn, extent, extent_addr_get(extent),
-	    extent_size_get(extent), tcache, true);
+	    extent_usize_get(extent), tcache, true);
 	return (ret);
 }
 
@@ -252,7 +268,7 @@ huge_dalloc(tsdn_t *tsdn, extent_t *extent)
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	huge_dalloc_junk(tsdn, extent_addr_get(extent),
-	    extent_size_get(extent));
+	    extent_usize_get(extent));
 	arena_chunk_dalloc_huge(tsdn, extent_arena_get(extent), extent);
 
 	arena_decay_tick(tsdn, arena);
@@ -261,15 +277,15 @@ huge_dalloc(tsdn_t *tsdn, extent_t *extent)
 size_t
 huge_salloc(tsdn_t *tsdn, const extent_t *extent)
 {
-	size_t size;
+	size_t usize;
 	arena_t *arena;
 
 	arena = extent_arena_get(extent);
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
-	size = extent_size_get(extent);
+	usize = extent_usize_get(extent);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
-	return (size);
+	return (usize);
 }
 
 prof_tctx_t *
diff --git a/src/prof.c b/src/prof.c
index 03979ca3..5eb9a3d1 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -879,7 +879,7 @@ prof_sample_threshold_update(prof_tdata_t *tdata)
 	 *   pp 500
 	 *   (http://luc.devroye.org/rnbookindex.html)
 	 */
-	r = prng_lg_range(&tdata->prng_state, 53);
+	r = prng_lg_range(&tdata->prng_state, 53, false);
 	u = (double)r * (1.0/9007199254740992.0L);
 	tdata->bytes_until_sample = (uint64_t)(log(u) /
 	    log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
diff --git a/src/tcache.c b/src/tcache.c
index 8bd8df01..d3ef9992 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -128,7 +128,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 			extent = iealloc(tsd_tsdn(tsd), ptr);
 			if (extent_arena_get(extent) == bin_arena) {
 				arena_chunk_t *chunk =
-				    (arena_chunk_t *)extent_addr_get(extent);
+				    (arena_chunk_t *)extent_base_get(extent);
 				size_t pageind = ((uintptr_t)ptr -
 				    (uintptr_t)chunk) >> LG_PAGE;
 				arena_chunk_map_bits_t *bitselm =
@@ -214,7 +214,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 			extent = iealloc(tsd_tsdn(tsd), ptr);
 			if (extent_arena_get(extent) == locked_arena) {
 				arena_chunk_t *chunk =
-				    (arena_chunk_t *)extent_addr_get(extent);
+				    (arena_chunk_t *)extent_base_get(extent);
 				arena_dalloc_large_junked_locked(tsd_tsdn(tsd),
 				    locked_arena, chunk, extent, ptr);
 			} else {
diff --git a/test/unit/prng.c b/test/unit/prng.c
index b22bd2f5..f3234455 100644
--- a/test/unit/prng.c
+++ b/test/unit/prng.c
@@ -1,33 +1,34 @@
 #include "test/jemalloc_test.h"
 
-TEST_BEGIN(test_prng_lg_range)
+static void
+test_prng_lg_range(bool atomic)
 {
 	uint64_t sa, sb, ra, rb;
 	unsigned lg_range;
 
 	sa = 42;
-	ra = prng_lg_range(&sa, 64);
+	ra = prng_lg_range(&sa, 64, atomic);
 	sa = 42;
-	rb = prng_lg_range(&sa, 64);
+	rb = prng_lg_range(&sa, 64, atomic);
 	assert_u64_eq(ra, rb,
 	    "Repeated generation should produce repeated results");
 
 	sb = 42;
-	rb = prng_lg_range(&sb, 64);
+	rb = prng_lg_range(&sb, 64, atomic);
 	assert_u64_eq(ra, rb,
 	    "Equivalent generation should produce equivalent results");
 
 	sa = 42;
-	ra = prng_lg_range(&sa, 64);
-	rb = prng_lg_range(&sa, 64);
+	ra = prng_lg_range(&sa, 64, atomic);
+	rb = prng_lg_range(&sa, 64, atomic);
 	assert_u64_ne(ra, rb,
 	    "Full-width results must not immediately repeat");
 
 	sa = 42;
-	ra = prng_lg_range(&sa, 64);
+	ra = prng_lg_range(&sa, 64, atomic);
 	for (lg_range = 63; lg_range > 0; lg_range--) {
 		sb = 42;
-		rb = prng_lg_range(&sb, lg_range);
+		rb = prng_lg_range(&sb, lg_range, atomic);
 		assert_u64_eq((rb & (UINT64_C(0xffffffffffffffff) << lg_range)),
 		    0, "High order bits should be 0, lg_range=%u", lg_range);
 		assert_u64_eq(rb, (ra >> (64 - lg_range)),
@@ -35,9 +36,23 @@ TEST_BEGIN(test_prng_lg_range)
 		    "lg_range=%u", lg_range);
 	}
 }
+
+TEST_BEGIN(test_prng_lg_range_nonatomic)
+{
+
+	test_prng_lg_range(false);
+}
 TEST_END
 
-TEST_BEGIN(test_prng_range)
+TEST_BEGIN(test_prng_lg_range_atomic)
+{
+
+	test_prng_lg_range(true);
+}
+TEST_END
+
+static void
+test_prng_range(bool atomic)
 {
 	uint64_t range;
 #define	MAX_RANGE	10000000
@@ -50,12 +65,25 @@ TEST_BEGIN(test_prng_range)
 
 		s = range;
 		for (rep = 0; rep < NREPS; rep++) {
-			uint64_t r = prng_range(&s, range);
+			uint64_t r = prng_range(&s, range, atomic);
 
 			assert_u64_lt(r, range, "Out of range");
 		}
 	}
 }
+
+TEST_BEGIN(test_prng_range_nonatomic)
+{
+
+	test_prng_range(false);
+}
+TEST_END
+
+TEST_BEGIN(test_prng_range_atomic)
+{
+
+	test_prng_range(true);
+}
 TEST_END
 
 int
@@ -63,6 +91,8 @@ main(void)
 {
 
 	return (test(
-	    test_prng_lg_range,
-	    test_prng_range));
+	    test_prng_lg_range_nonatomic,
+	    test_prng_lg_range_atomic,
+	    test_prng_range_nonatomic,
+	    test_prng_range_atomic));
 }

From ed2c2427a7684bc8f41da54319c5dff00e177f76 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 28 May 2016 00:17:28 -0700
Subject: [PATCH 0287/2608] Use huge size class infrastructure for large size
 classes.

---
 Makefile.in                                   |   1 -
 doc/jemalloc.xml.in                           |  88 +-
 include/jemalloc/internal/arena.h             | 205 ++---
 include/jemalloc/internal/chunk.h             |   3 +-
 include/jemalloc/internal/ctl.h               |   3 +-
 include/jemalloc/internal/extent.h            |  36 +-
 include/jemalloc/internal/huge.h              |   5 +-
 .../jemalloc/internal/jemalloc_internal.h.in  |  50 +-
 include/jemalloc/internal/private_symbols.txt |  11 +-
 include/jemalloc/internal/prof.h              |   6 +-
 include/jemalloc/internal/stats.h             |  34 +-
 include/jemalloc/internal/tcache.h            |  33 +-
 src/arena.c                                   | 870 ++----------------
 src/base.c                                    |   3 +-
 src/chunk.c                                   |  27 +-
 src/chunk_dss.c                               |   2 +-
 src/ctl.c                                     | 175 +---
 src/extent.c                                  |   4 +-
 src/huge.c                                    |  67 +-
 src/jemalloc.c                                |  46 +-
 src/stats.c                                   |  86 +-
 src/tcache.c                                  |  41 +-
 src/zone.c                                    |   8 +-
 test/integration/chunk.c                      |  28 +-
 test/integration/xallocx.c                    | 103 +--
 test/unit/arena_reset.c                       |  33 +-
 test/unit/decay.c                             |  41 +-
 test/unit/extent_quantize.c                   |  41 +-
 test/unit/junk.c                              |  91 +-
 test/unit/mallctl.c                           |  23 +-
 test/unit/prof_idump.c                        |  13 +-
 test/unit/run_quantize.c                      | 149 ---
 test/unit/stats.c                             | 105 +--
 test/unit/zero.c                              |  11 +-
 34 files changed, 463 insertions(+), 1979 deletions(-)
 delete mode 100644 test/unit/run_quantize.c

diff --git a/Makefile.in b/Makefile.in
index 7d73155a..2e9bbbc2 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -159,7 +159,6 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/qr.c \
 	$(srcroot)test/unit/rb.c \
 	$(srcroot)test/unit/rtree.c \
-	$(srcroot)test/unit/run_quantize.c \
 	$(srcroot)test/unit/SFMT.c \
 	$(srcroot)test/unit/size_classes.c \
 	$(srcroot)test/unit/smoothstep.c \
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index e3c97bd8..efb4bfe4 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -417,22 +417,21 @@ for (i = 0; i < nbins; i++) {
       <parameter>write_cb</parameter>, or
       <function>malloc_message<parameter/></function> if
       <parameter>write_cb</parameter> is <constant>NULL</constant>.  This
-      function can be called repeatedly.  General information that never
-      changes during execution can be omitted by specifying "g" as a character
-      within the <parameter>opts</parameter> string.  Note that
+      function can be called repeatedly.  General information that never changes
+      during execution can be omitted by specifying "g" as a character within
+      the <parameter>opts</parameter> string.  Note that
       <function>malloc_message<parameter/></function> uses the
       <function>mallctl*<parameter/></function> functions internally, so
       inconsistent statistics can be reported if multiple threads use these
-      functions simultaneously.  If <option>--enable-stats</option> is
-      specified during configuration, &ldquo;m&rdquo; and &ldquo;a&rdquo; can
-      be specified to omit merged arena and per arena statistics, respectively;
-      &ldquo;b&rdquo;, &ldquo;l&rdquo;, and &ldquo;h&rdquo; can be specified to
-      omit per size class statistics for bins, large objects, and huge objects,
-      respectively.  Unrecognized characters are silently ignored.  Note that
-      thread caching may prevent some statistics from being completely up to
-      date, since extra locking would be required to merge counters that track
-      thread cache operations.
-      </para>
+      functions simultaneously.  If <option>--enable-stats</option> is specified
+      during configuration, &ldquo;m&rdquo; and &ldquo;a&rdquo; can be specified
+      to omit merged arena and per arena statistics, respectively;
+      &ldquo;b&rdquo; and &ldquo;l&rdquo; can be specified to omit per size
+      class statistics for bins and large objects, respectively.  Unrecognized
+      characters are silently ignored.  Note that thread caching may prevent
+      some statistics from being completely up to date, since extra locking
+      would be required to merge counters that track thread cache
+      operations.</para>
 
       <para>The <function>malloc_usable_size<parameter/></function> function
       returns the usable size of the allocation pointed to by
@@ -1888,25 +1887,6 @@ typedef struct {
         <listitem><para>Number of bytes per page run.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="arenas.nlruns">
-        <term>
-          <mallctl>arenas.nlruns</mallctl>
-          (<type>unsigned</type>)
-          <literal>r-</literal>
-        </term>
-        <listitem><para>Total number of large size classes.</para></listitem>
-      </varlistentry>
-
-      <varlistentry id="arenas.lrun.i.size">
-        <term>
-          <mallctl>arenas.lrun.&lt;i&gt;.size</mallctl>
-          (<type>size_t</type>)
-          <literal>r-</literal>
-        </term>
-        <listitem><para>Maximum size supported by this large size
-        class.</para></listitem>
-      </varlistentry>
-
       <varlistentry id="arenas.nhchunks">
         <term>
           <mallctl>arenas.nhchunks</mallctl>
@@ -2534,50 +2514,6 @@ typedef struct {
         <listitem><para>Current number of runs.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="stats.arenas.i.lruns.j.nmalloc">
-        <term>
-          <mallctl>stats.arenas.&lt;i&gt;.lruns.&lt;j&gt;.nmalloc</mallctl>
-          (<type>uint64_t</type>)
-          <literal>r-</literal>
-          [<option>--enable-stats</option>]
-        </term>
-        <listitem><para>Cumulative number of allocation requests for this size
-        class served directly by the arena.</para></listitem>
-      </varlistentry>
-
-      <varlistentry id="stats.arenas.i.lruns.j.ndalloc">
-        <term>
-          <mallctl>stats.arenas.&lt;i&gt;.lruns.&lt;j&gt;.ndalloc</mallctl>
-          (<type>uint64_t</type>)
-          <literal>r-</literal>
-          [<option>--enable-stats</option>]
-        </term>
-        <listitem><para>Cumulative number of deallocation requests for this
-        size class served directly by the arena.</para></listitem>
-      </varlistentry>
-
-      <varlistentry id="stats.arenas.i.lruns.j.nrequests">
-        <term>
-          <mallctl>stats.arenas.&lt;i&gt;.lruns.&lt;j&gt;.nrequests</mallctl>
-          (<type>uint64_t</type>)
-          <literal>r-</literal>
-          [<option>--enable-stats</option>]
-        </term>
-        <listitem><para>Cumulative number of allocation requests for this size
-        class.</para></listitem>
-      </varlistentry>
-
-      <varlistentry id="stats.arenas.i.lruns.j.curruns">
-        <term>
-          <mallctl>stats.arenas.&lt;i&gt;.lruns.&lt;j&gt;.curruns</mallctl>
-          (<type>size_t</type>)
-          <literal>r-</literal>
-          [<option>--enable-stats</option>]
-        </term>
-        <listitem><para>Current number of runs for this size class.
-        </para></listitem>
-      </varlistentry>
-
       <varlistentry id="stats.arenas.i.hchunks.j.nmalloc">
         <term>
           <mallctl>stats.arenas.&lt;i&gt;.hchunks.&lt;j&gt;.nmalloc</mallctl>
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 4d2b25a0..bf16e8e9 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -294,7 +294,6 @@ struct arena_s {
 
 	dss_prec_t		dss_prec;
 
-
 	/* Extant arena chunks. */
 	ql_head(extent_t)	achunks;
 
@@ -465,9 +464,6 @@ extern const arena_bin_info_t	arena_bin_info[NBINS];
 extern size_t		map_bias; /* Number of arena chunk header pages. */
 extern size_t		map_misc_offset;
 extern size_t		arena_maxrun; /* Max run size for arenas. */
-extern size_t		large_maxclass; /* Max large size class. */
-extern unsigned		nlclasses; /* Number of large size classes. */
-extern unsigned		nhclasses; /* Number of huge size classes. */
 
 #ifdef JEMALLOC_JET
 typedef size_t (run_quantize_t)(size_t);
@@ -485,7 +481,8 @@ void	arena_chunk_cache_maybe_remove(arena_t *arena, extent_t *extent,
     bool cache);
 extent_t	*arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena,
     size_t usize, size_t alignment, bool *zero);
-void	arena_chunk_dalloc_huge(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
+void	arena_chunk_dalloc_huge(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+    bool locked);
 void	arena_chunk_ralloc_huge_shrink(tsdn_t *tsdn, arena_t *arena,
     extent_t *extent, size_t oldsize);
 void	arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena,
@@ -508,33 +505,19 @@ extern arena_dalloc_junk_small_t *arena_dalloc_junk_small;
 #else
 void	arena_dalloc_junk_small(void *ptr, const arena_bin_info_t *bin_info);
 #endif
-void	*arena_malloc_large(tsdn_t *tsdn, arena_t *arena, szind_t ind,
-    bool zero);
 void	*arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
     szind_t ind, bool zero);
 void	*arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache);
-void	arena_prof_promoted(tsdn_t *tsdn, const extent_t *extent,
-    const void *ptr, size_t size);
+void	arena_prof_promote(tsdn_t *tsdn, extent_t *extent, const void *ptr,
+    size_t usize);
+void	arena_dalloc_promoted(tsdn_t *tsdn, extent_t *extent, void *ptr,
+    tcache_t *tcache, bool slow_path);
 void	arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena,
     arena_chunk_t *chunk, extent_t *extent, void *ptr,
     arena_chunk_map_bits_t *bitselm);
 void	arena_dalloc_small(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
     extent_t *extent, void *ptr, size_t pageind);
-#ifdef JEMALLOC_JET
-typedef void (arena_dalloc_junk_large_t)(void *, size_t);
-extern arena_dalloc_junk_large_t *arena_dalloc_junk_large;
-#else
-void	arena_dalloc_junk_large(void *ptr, size_t usize);
-#endif
-void	arena_dalloc_large_junked_locked(tsdn_t *tsdn, arena_t *arena,
-    arena_chunk_t *chunk, extent_t *extent, void *ptr);
-void	arena_dalloc_large(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
-    extent_t *extent, void *ptr);
-#ifdef JEMALLOC_JET
-typedef void (arena_ralloc_junk_large_t)(void *, size_t, size_t);
-extern arena_ralloc_junk_large_t *arena_ralloc_junk_large;
-#endif
 bool	arena_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, void *ptr,
     size_t oldsize, size_t size, size_t extra, bool zero);
 void	*arena_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr,
@@ -551,8 +534,7 @@ void	arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
 void	arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time,
     size_t *nactive, size_t *ndirty, arena_stats_t *astats,
-    malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats,
-    malloc_huge_stats_t *hstats);
+    malloc_bin_stats_t *bstats, malloc_huge_stats_t *hstats);
 unsigned	arena_nthreads_get(arena_t *arena, bool internal);
 void	arena_nthreads_inc(arena_t *arena, bool internal);
 void	arena_nthreads_dec(arena_t *arena, bool internal);
@@ -639,8 +621,7 @@ void	arena_decay_tick(tsdn_t *tsdn, arena_t *arena);
 void	*arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
     bool zero, tcache_t *tcache, bool slow_path);
 arena_t	*arena_aalloc(tsdn_t *tsdn, const void *ptr);
-size_t	arena_salloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr,
-    bool demote);
+size_t	arena_salloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr);
 void	arena_dalloc(tsdn_t *tsdn, extent_t *extent, void *ptr,
     tcache_t *tcache, bool slow_path);
 void	arena_sdalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t size,
@@ -1225,7 +1206,7 @@ arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
 			    tcache, size, ind, zero, slow_path));
 		}
 		if (likely(size <= tcache_maxclass)) {
-			return (tcache_alloc_large(tsdn_tsd(tsdn), arena,
+			return (tcache_alloc_huge(tsdn_tsd(tsdn), arena,
 			    tcache, size, ind, zero, slow_path));
 		}
 		/* (size > tcache_maxclass) case falls through. */
@@ -1244,49 +1225,25 @@ arena_aalloc(tsdn_t *tsdn, const void *ptr)
 
 /* Return the size of the allocation pointed to by ptr. */
 JEMALLOC_ALWAYS_INLINE size_t
-arena_salloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr, bool demote)
+arena_salloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr)
 {
 	size_t ret;
-	size_t pageind;
-	szind_t binind;
 
 	assert(ptr != NULL);
 
 	if (likely(extent_slab_get(extent))) {
 		const arena_chunk_t *chunk =
 		    (const arena_chunk_t *)extent_base_get(extent);
+		size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
+		szind_t binind;
 
-		pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
 		assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
 		binind = arena_mapbits_binind_get(chunk, pageind);
-		if (unlikely(binind == BININD_INVALID || (config_prof && !demote
-		    && arena_mapbits_large_get(chunk, pageind) != 0))) {
-			/*
-			 * Large allocation.  In the common case (demote), and
-			 * as this is an inline function, most callers will only
-			 * end up looking at binind to determine that ptr is a
-			 * small allocation.
-			 */
-			assert(config_cache_oblivious || ((uintptr_t)ptr &
-			    PAGE_MASK) == 0);
-			ret = arena_mapbits_large_size_get(chunk, pageind) -
-			    large_pad;
-			assert(ret != 0);
-			assert(pageind + ((ret+large_pad)>>LG_PAGE) <=
-			    chunk_npages);
-			assert(arena_mapbits_dirty_get(chunk, pageind) ==
-			    arena_mapbits_dirty_get(chunk,
-			    pageind+((ret+large_pad)>>LG_PAGE)-1));
-		} else {
-			/*
-			 * Small allocation (possibly promoted to a large
-			 * object).
-			 */
-			assert(arena_mapbits_large_get(chunk, pageind) != 0 ||
-			    arena_ptr_small_binind_get(tsdn, ptr,
-			    arena_mapbits_get(chunk, pageind)) == binind);
-			ret = index2size(binind);
-		}
+		/* Small allocation. */
+		assert(arena_mapbits_large_get(chunk, pageind) != 0 ||
+		    arena_ptr_small_binind_get(tsdn, ptr,
+		    arena_mapbits_get(chunk, pageind)) == binind);
+		ret = index2size(binind);
 	} else
 		ret = huge_salloc(tsdn, extent);
 
@@ -1297,49 +1254,40 @@ JEMALLOC_ALWAYS_INLINE void
 arena_dalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, tcache_t *tcache,
     bool slow_path)
 {
-	size_t pageind, mapbits;
 
 	assert(!tsdn_null(tsdn) || tcache == NULL);
 	assert(ptr != NULL);
 
 	if (likely(extent_slab_get(extent))) {
+		/* Small allocation. */
 		arena_chunk_t *chunk = (arena_chunk_t *)extent_base_get(extent);
-
-		pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
-		mapbits = arena_mapbits_get(chunk, pageind);
+		size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
+		size_t mapbits = arena_mapbits_get(chunk, pageind);
 		assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
-		if (likely((mapbits & CHUNK_MAP_LARGE) == 0)) {
-			/* Small allocation. */
-			if (likely(tcache != NULL)) {
-				szind_t binind =
-				    arena_ptr_small_binind_get(tsdn, ptr,
-				    mapbits);
-				tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr,
-				    binind, slow_path);
-			} else {
-				arena_dalloc_small(tsdn,
-				    extent_arena_get(extent), chunk, extent,
-				    ptr, pageind);
-			}
+		assert((mapbits & CHUNK_MAP_LARGE) == 0);
+		if (likely(tcache != NULL)) {
+			szind_t binind = arena_ptr_small_binind_get(tsdn, ptr,
+			    mapbits);
+			tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, binind,
+			    slow_path);
 		} else {
-			size_t size = arena_mapbits_large_size_get(chunk,
-			    pageind);
-
-			assert(config_cache_oblivious || ((uintptr_t)ptr &
-			    PAGE_MASK) == 0);
-
-			if (likely(tcache != NULL) && size - large_pad <=
-			    tcache_maxclass) {
-				tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
-				    size - large_pad, slow_path);
-			} else {
-				arena_dalloc_large(tsdn,
-				    extent_arena_get(extent), chunk, extent,
-				    ptr);
-			}
+			arena_dalloc_small(tsdn, extent_arena_get(extent),
+			    chunk, extent, ptr, pageind);
 		}
-	} else
-		huge_dalloc(tsdn, extent);
+	} else {
+		size_t usize = extent_usize_get(extent);
+
+		if (likely(tcache != NULL) && usize <= tcache_maxclass) {
+			if (config_prof && unlikely(usize <= SMALL_MAXCLASS)) {
+				arena_dalloc_promoted(tsdn, extent, ptr,
+				    tcache, slow_path);
+			} else {
+				tcache_dalloc_huge(tsdn_tsd(tsdn), tcache, ptr,
+				    usize, slow_path);
+			}
+		} else
+			huge_dalloc(tsdn, extent);
+	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -1348,55 +1296,34 @@ arena_sdalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t size,
 {
 
 	assert(!tsdn_null(tsdn) || tcache == NULL);
+	assert(ptr != NULL);
 
 	if (likely(extent_slab_get(extent))) {
-		arena_chunk_t *chunk = (arena_chunk_t *)extent_base_get(extent);
-
-		if (config_prof && opt_prof) {
-			size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >>
-			    LG_PAGE;
-			assert(arena_mapbits_allocated_get(chunk, pageind) !=
-			    0);
-			if (arena_mapbits_large_get(chunk, pageind) != 0) {
-				/*
-				 * Make sure to use promoted size, not request
-				 * size.
-				 */
-				size = arena_mapbits_large_size_get(chunk,
-				    pageind) - large_pad;
-			}
-		}
-		assert(s2u(size) == s2u(arena_salloc(tsdn, extent, ptr,
-		    false)));
-
-		if (likely(size <= SMALL_MAXCLASS)) {
-			/* Small allocation. */
-			if (likely(tcache != NULL)) {
-				szind_t binind = size2index(size);
-				tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr,
-				    binind, slow_path);
-			} else {
-				size_t pageind = ((uintptr_t)ptr -
-				    (uintptr_t)chunk) >> LG_PAGE;
-				arena_dalloc_small(tsdn,
-				    extent_arena_get(extent), chunk, extent,
-				    ptr, pageind);
-			}
+		/* Small allocation. */
+		if (likely(tcache != NULL)) {
+			szind_t binind = size2index(size);
+			tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, binind,
+			    slow_path);
 		} else {
-			assert(config_cache_oblivious || ((uintptr_t)ptr &
-			    PAGE_MASK) == 0);
-
-			if (likely(tcache != NULL) && size <= tcache_maxclass) {
-				tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
-				    size, slow_path);
-			} else {
-				arena_dalloc_large(tsdn,
-				    extent_arena_get(extent), chunk, extent,
-				    ptr);
-			}
+			arena_chunk_t *chunk =
+			    (arena_chunk_t *)extent_base_get(extent);
+			size_t pageind = ((uintptr_t)ptr -
+			    (uintptr_t)chunk) >> LG_PAGE;
+			arena_dalloc_small(tsdn, extent_arena_get(extent),
+			    chunk, extent, ptr, pageind);
 		}
-	} else
-		huge_dalloc(tsdn, extent);
+	} else {
+		if (likely(tcache != NULL) && size <= tcache_maxclass) {
+			if (config_prof && unlikely(size <= SMALL_MAXCLASS)) {
+				arena_dalloc_promoted(tsdn, extent, ptr,
+				    tcache, slow_path);
+			} else {
+				tcache_dalloc_huge(tsdn_tsd(tsdn), tcache, ptr,
+				    size, slow_path);
+			}
+		} else
+			huge_dalloc(tsdn, extent);
+	}
 }
 #  endif /* JEMALLOC_ARENA_INLINE_B */
 #endif
diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index 624073d9..6f50302e 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -61,7 +61,8 @@ bool	chunk_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
 bool	chunk_purge_wrapper(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, extent_t *extent, size_t offset, size_t length);
 extent_t	*chunk_split_wrapper(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, extent_t *extent, size_t size_a, size_t size_b);
+    chunk_hooks_t *chunk_hooks, extent_t *extent, size_t size_a, size_t usize_a,
+    size_t size_b, size_t usize_b);
 bool	chunk_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, extent_t *a, extent_t *b);
 bool	chunk_boot(void);
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index af0f6d7c..00deeb8a 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -51,8 +51,7 @@ struct ctl_arena_stats_s {
 	uint64_t		nrequests_small;
 
 	malloc_bin_stats_t	bstats[NBINS];
-	malloc_large_stats_t	*lstats;	/* nlclasses elements. */
-	malloc_huge_stats_t	*hstats;	/* nhclasses elements. */
+	malloc_huge_stats_t	hstats[NSIZES - NBINS];
 };
 
 struct ctl_stats_s {
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 4023f82d..4e1e97ea 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -15,9 +15,15 @@ struct extent_s {
 	/* Pointer to the extent that this structure is responsible for. */
 	void			*e_addr;
 
-	/* Total region size. */
+	/* Extent size. */
 	size_t			e_size;
 
+	/*
+	 * Usable size, typically smaller than extent size due to large_pad or
+	 * promotion of sampled small regions.
+	 */
+	size_t			e_usize;
+
 	/* True if extent is active (in use). */
 	bool			e_active;
 
@@ -106,6 +112,7 @@ void	extent_arena_set(extent_t *extent, arena_t *arena);
 void	extent_addr_set(extent_t *extent, void *addr);
 void	extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment);
 void	extent_size_set(extent_t *extent, size_t size);
+void	extent_usize_set(extent_t *extent, size_t usize);
 void	extent_active_set(extent_t *extent, bool active);
 void	extent_dirty_set(extent_t *extent, bool dirty);
 void	extent_zeroed_set(extent_t *extent, bool zeroed);
@@ -113,8 +120,8 @@ void	extent_committed_set(extent_t *extent, bool committed);
 void	extent_slab_set(extent_t *extent, bool slab);
 void	extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx);
 void	extent_init(extent_t *extent, arena_t *arena, void *addr,
-    size_t size, bool active, bool dirty, bool zeroed, bool committed,
-    bool slab);
+    size_t size, size_t usize, bool active, bool dirty, bool zeroed,
+    bool committed, bool slab);
 void	extent_dirty_insert(extent_t *extent,
     arena_runs_dirty_link_t *runs_dirty, extent_t *chunks_dirty);
 void	extent_dirty_remove(extent_t *extent);
@@ -158,7 +165,7 @@ extent_usize_get(const extent_t *extent)
 {
 
 	assert(!extent->e_slab);
-	return (extent->e_size - large_pad);
+	return (extent->e_usize);
 }
 
 JEMALLOC_INLINE void *
@@ -172,14 +179,15 @@ JEMALLOC_INLINE void *
 extent_last_get(const extent_t *extent)
 {
 
-	return ((void *)(uintptr_t)extent->e_addr + extent->e_size - PAGE);
+	return ((void *)(uintptr_t)extent->e_addr + extent_size_get(extent) -
+	    PAGE);
 }
 
 JEMALLOC_INLINE void *
 extent_past_get(const extent_t *extent)
 {
 
-	return ((void *)(uintptr_t)extent->e_addr + extent->e_size);
+	return ((void *)(uintptr_t)extent->e_addr + extent_size_get(extent));
 }
 
 JEMALLOC_INLINE bool
@@ -258,9 +266,12 @@ extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment)
 		uint64_t r =
 		    prng_lg_range(&extent_arena_get(extent)->offset_state,
 		    lg_range, true);
-		uintptr_t random_offset = ((uintptr_t)r) << lg_range;
+		uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE -
+		    lg_range);
 		extent->e_addr = (void *)((uintptr_t)extent->e_addr +
 		    random_offset);
+		assert(ALIGNMENT_ADDR2BASE(extent->e_addr, alignment) ==
+		    extent->e_addr);
 	}
 }
 
@@ -271,6 +282,13 @@ extent_size_set(extent_t *extent, size_t size)
 	extent->e_size = size;
 }
 
+JEMALLOC_INLINE void
+extent_usize_set(extent_t *extent, size_t usize)
+{
+
+	extent->e_usize = usize;
+}
+
 JEMALLOC_INLINE void
 extent_active_set(extent_t *extent, bool active)
 {
@@ -315,7 +333,8 @@ extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx)
 
 JEMALLOC_INLINE void
 extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
-    bool active, bool dirty, bool zeroed, bool committed, bool slab)
+    size_t usize, bool active, bool dirty, bool zeroed, bool committed,
+    bool slab)
 {
 
 	assert(addr == PAGE_ADDR2BASE(addr) || !slab);
@@ -323,6 +342,7 @@ extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
 	extent_arena_set(extent, arena);
 	extent_addr_set(extent, addr);
 	extent_size_set(extent, size);
+	extent_usize_set(extent, usize);
 	extent_active_set(extent, active);
 	extent_dirty_set(extent, dirty);
 	extent_zeroed_set(extent, zeroed);
diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h
index bdc8f847..836f1b50 100644
--- a/include/jemalloc/internal/huge.h
+++ b/include/jemalloc/internal/huge.h
@@ -17,9 +17,12 @@ bool	huge_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
 void	*huge_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
     size_t usize, size_t alignment, bool zero, tcache_t *tcache);
 #ifdef JEMALLOC_JET
-typedef void (huge_dalloc_junk_t)(tsdn_t *, void *, size_t);
+typedef void (huge_dalloc_junk_t)(void *, size_t);
 extern huge_dalloc_junk_t *huge_dalloc_junk;
+#else
+void	huge_dalloc_junk(void *ptr, size_t usize);
 #endif
+void	huge_dalloc_junked_locked(tsdn_t *tsdn, extent_t *extent);
 void	huge_dalloc(tsdn_t *tsdn, extent_t *extent);
 size_t	huge_salloc(tsdn_t *tsdn, const extent_t *extent);
 prof_tctx_t	*huge_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent);
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index ef4e0522..f4d26beb 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -797,33 +797,14 @@ sa2u(size_t size, size_t alignment)
 			return (usize);
 	}
 
-	/*
-	 * We can't achieve subpage alignment, so round up alignment to the
-	 * minimum that can actually be supported.
-	 */
-	alignment = PAGE_CEILING(alignment);
-
-	/* Try for a large size class. */
-	if (likely(size <= large_maxclass) && likely(alignment == PAGE)) {
-		/* Make sure result is a large size class. */
-		usize = (size <= LARGE_MINCLASS) ? LARGE_MINCLASS : s2u(size);
-
-		/*
-		 * Calculate the size of the over-size run that arena_palloc()
-		 * would need to allocate in order to guarantee the alignment.
-		 */
-		if (usize + large_pad + alignment <= arena_maxrun)
-			return (usize);
-	}
-
 	/* Huge size class.  Beware of overflow. */
 
 	if (unlikely(alignment > HUGE_MAXCLASS))
 		return (0);
 
-	/* Make sure result is a huge size class. */
-	if (size <= chunksize)
-		usize = chunksize;
+	/* Make sure result is a large size class. */
+	if (size <= LARGE_MINCLASS)
+		usize = LARGE_MINCLASS;
 	else {
 		usize = s2u(size);
 		if (usize < size) {
@@ -836,7 +817,7 @@ sa2u(size_t size, size_t alignment)
 	 * Calculate the multi-page mapping that huge_palloc() would need in
 	 * order to guarantee the alignment.
 	 */
-	if (usize + alignment < usize) {
+	if (usize + large_pad + PAGE_CEILING(alignment) < usize) {
 		/* size_t overflow. */
 		return (0);
 	}
@@ -960,8 +941,7 @@ iealloc(tsdn_t *tsdn, const void *ptr)
 
 #ifndef JEMALLOC_ENABLE_INLINE
 arena_t	*iaalloc(tsdn_t *tsdn, const void *ptr);
-size_t	isalloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr,
-    bool demote);
+size_t	isalloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr);
 void	*iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero,
     tcache_t *tcache, bool is_metadata, arena_t *arena, bool slow_path);
 void	*ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero,
@@ -971,7 +951,7 @@ void	*ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
 void	*ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, arena_t *arena);
 void	*ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero);
-size_t	ivsalloc(tsdn_t *tsdn, const void *ptr, bool demote);
+size_t	ivsalloc(tsdn_t *tsdn, const void *ptr);
 void	idalloctm(tsdn_t *tsdn, extent_t *extent, void *ptr, tcache_t *tcache,
     bool is_metadata, bool slow_path);
 void	idalloc(tsd_t *tsd, extent_t *extent, void *ptr);
@@ -1003,17 +983,15 @@ iaalloc(tsdn_t *tsdn, const void *ptr)
  *   tsdn_t *tsdn = [...]
  *   void *ptr = [...]
  *   extent_t *extent = iealloc(tsdn, ptr);
- *   size_t sz = isalloc(tsdn, extent, ptr, config_prof);
+ *   size_t sz = isalloc(tsdn, extent, ptr);
  */
 JEMALLOC_ALWAYS_INLINE size_t
-isalloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr, bool demote)
+isalloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr)
 {
 
 	assert(ptr != NULL);
-	/* Demotion only makes sense if config_prof is true. */
-	assert(config_prof || !demote);
 
-	return (arena_salloc(tsdn, extent, ptr, demote));
+	return (arena_salloc(tsdn, extent, ptr));
 }
 
 JEMALLOC_ALWAYS_INLINE void *
@@ -1029,7 +1007,7 @@ iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
 	ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path);
 	if (config_stats && is_metadata && likely(ret != NULL)) {
 		arena_metadata_allocated_add(iaalloc(tsdn, ret), isalloc(tsdn,
-		    iealloc(tsdn, ret), ret, config_prof));
+		    iealloc(tsdn, ret), ret));
 	}
 	return (ret);
 }
@@ -1057,7 +1035,7 @@ ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
 	assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret);
 	if (config_stats && is_metadata && likely(ret != NULL)) {
 		arena_metadata_allocated_add(iaalloc(tsdn, ret), isalloc(tsdn,
-		    iealloc(tsdn, ret), ret, config_prof));
+		    iealloc(tsdn, ret), ret));
 	}
 	return (ret);
 }
@@ -1079,7 +1057,7 @@ ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero)
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-ivsalloc(tsdn_t *tsdn, const void *ptr, bool demote)
+ivsalloc(tsdn_t *tsdn, const void *ptr)
 {
 	extent_t *extent;
 
@@ -1091,7 +1069,7 @@ ivsalloc(tsdn_t *tsdn, const void *ptr, bool demote)
 	/* Only arena chunks should be looked up via interior pointers. */
 	assert(extent_addr_get(extent) == ptr || extent_slab_get(extent));
 
-	return (isalloc(tsdn, extent, ptr, demote));
+	return (isalloc(tsdn, extent, ptr));
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -1104,7 +1082,7 @@ idalloctm(tsdn_t *tsdn, extent_t *extent, void *ptr, tcache_t *tcache,
 	assert(!is_metadata || iaalloc(tsdn, ptr)->ind < narenas_auto);
 	if (config_stats && is_metadata) {
 		arena_metadata_allocated_sub(iaalloc(tsdn, ptr), isalloc(tsdn,
-		    extent, ptr, config_prof));
+		    extent, ptr));
 	}
 
 	arena_dalloc(tsdn, extent, ptr, tcache, slow_path);
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 75a1dace..5f94d2c2 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -23,10 +23,8 @@ arena_cleanup
 arena_dalloc
 arena_dalloc_bin
 arena_dalloc_bin_junked_locked
-arena_dalloc_junk_large
 arena_dalloc_junk_small
-arena_dalloc_large
-arena_dalloc_large_junked_locked
+arena_dalloc_promoted
 arena_dalloc_small
 arena_decay_tick
 arena_decay_ticks
@@ -45,7 +43,6 @@ arena_lg_dirty_mult_get
 arena_lg_dirty_mult_set
 arena_malloc
 arena_malloc_hard
-arena_malloc_large
 arena_mapbits_allocated_get
 arena_mapbits_binind_get
 arena_mapbits_decommitted_get
@@ -92,7 +89,7 @@ arena_prefork3
 arena_prof_accum
 arena_prof_accum_impl
 arena_prof_accum_locked
-arena_prof_promoted
+arena_prof_promote
 arena_prof_tctx_get
 arena_prof_tctx_reset
 arena_prof_tctx_set
@@ -254,6 +251,7 @@ hash_x86_128
 hash_x86_32
 huge_dalloc
 huge_dalloc_junk
+huge_dalloc_junked_locked
 huge_malloc
 huge_palloc
 huge_prof_tctx_get
@@ -287,7 +285,6 @@ ixalloc
 jemalloc_postfork_child
 jemalloc_postfork_parent
 jemalloc_prefork
-large_maxclass
 lg_floor
 lg_prof_sample
 malloc_cprintf
@@ -320,8 +317,6 @@ narenas_tdata_cleanup
 narenas_total_get
 ncpus
 nhbins
-nhclasses
-nlclasses
 nstime_add
 nstime_compare
 nstime_copy
diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h
index 81f02d11..7da20ad0 100644
--- a/include/jemalloc/internal/prof.h
+++ b/include/jemalloc/internal/prof.h
@@ -489,7 +489,7 @@ prof_malloc(tsdn_t *tsdn, extent_t *extent, const void *ptr, size_t usize,
 
 	cassert(config_prof);
 	assert(ptr != NULL);
-	assert(usize == isalloc(tsdn, extent, ptr, true));
+	assert(usize == isalloc(tsdn, extent, ptr));
 
 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
 		prof_malloc_sample_object(tsdn, extent, ptr, usize, tctx);
@@ -510,7 +510,7 @@ prof_realloc(tsd_t *tsd, extent_t *extent, const void *ptr, size_t usize,
 	assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
 
 	if (prof_active && !updated && ptr != NULL) {
-		assert(usize == isalloc(tsd_tsdn(tsd), extent, ptr, true));
+		assert(usize == isalloc(tsd_tsdn(tsd), extent, ptr));
 		if (prof_sample_accum_update(tsd, usize, true, NULL)) {
 			/*
 			 * Don't sample.  The usize passed to prof_alloc_prep()
@@ -544,7 +544,7 @@ prof_free(tsd_t *tsd, const extent_t *extent, const void *ptr, size_t usize)
 	prof_tctx_t *tctx = prof_tctx_get(tsd_tsdn(tsd), extent, ptr);
 
 	cassert(config_prof);
-	assert(usize == isalloc(tsd_tsdn(tsd), extent, ptr, true));
+	assert(usize == isalloc(tsd_tsdn(tsd), extent, ptr));
 
 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
 		prof_free_sampled_object(tsd, usize, tctx);
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index b6218178..c9a716d7 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -3,7 +3,6 @@
 
 typedef struct tcache_bin_stats_s tcache_bin_stats_t;
 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
-typedef struct malloc_large_stats_s malloc_large_stats_t;
 typedef struct malloc_huge_stats_s malloc_huge_stats_t;
 typedef struct arena_stats_s arena_stats_t;
 typedef struct chunk_stats_s chunk_stats_t;
@@ -62,12 +61,10 @@ struct malloc_bin_stats_s {
 	size_t		curruns;
 };
 
-struct malloc_large_stats_s {
+struct malloc_huge_stats_s {
 	/*
 	 * Total number of allocation/deallocation requests served directly by
-	 * the arena.  Note that tcache may allocate an object, then recycle it
-	 * many times, resulting many increments to nrequests, but only one
-	 * each to nmalloc and ndalloc.
+	 * the arena.
 	 */
 	uint64_t	nmalloc;
 	uint64_t	ndalloc;
@@ -79,21 +76,6 @@ struct malloc_large_stats_s {
 	 */
 	uint64_t	nrequests;
 
-	/*
-	 * Current number of runs of this size class, including runs currently
-	 * cached by tcache.
-	 */
-	size_t		curruns;
-};
-
-struct malloc_huge_stats_s {
-	/*
-	 * Total number of allocation/deallocation requests served directly by
-	 * the arena.
-	 */
-	uint64_t	nmalloc;
-	uint64_t	ndalloc;
-
 	/* Current number of (multi-)chunk allocations of this size class. */
 	size_t		curhchunks;
 };
@@ -126,21 +108,13 @@ struct arena_stats_s {
 	size_t		metadata_mapped;
 	size_t		metadata_allocated; /* Protected via atomic_*_z(). */
 
-	/* Per-size-category statistics. */
-	size_t		allocated_large;
-	uint64_t	nmalloc_large;
-	uint64_t	ndalloc_large;
-	uint64_t	nrequests_large;
-
 	size_t		allocated_huge;
 	uint64_t	nmalloc_huge;
 	uint64_t	ndalloc_huge;
-
-	/* One element for each large size class. */
-	malloc_large_stats_t	*lstats;
+	uint64_t	nrequests_huge;
 
 	/* One element for each huge size class. */
-	malloc_huge_stats_t	*hstats;
+	malloc_huge_stats_t	hstats[NSIZES - NBINS];
 };
 
 #endif /* JEMALLOC_H_STRUCTS */
diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h
index ee63a652..186adf28 100644
--- a/include/jemalloc/internal/tcache.h
+++ b/include/jemalloc/internal/tcache.h
@@ -30,8 +30,8 @@ typedef struct tcaches_s tcaches_t;
  */
 #define	TCACHE_NSLOTS_SMALL_MAX		200
 
-/* Number of cache slots for large size classes. */
-#define	TCACHE_NSLOTS_LARGE		20
+/* Number of cache slots for huge size classes. */
+#define	TCACHE_NSLOTS_HUGE		20
 
 /* (1U << opt_lg_tcache_max) is used to compute tcache_maxclass. */
 #define	LG_TCACHE_MAXCLASS_DEFAULT	15
@@ -113,7 +113,7 @@ extern tcache_bin_info_t	*tcache_bin_info;
 
 /*
  * Number of tcache bins.  There are NBINS small-object bins, plus 0 or more
- * large-object bins.
+ * huge-object bins.
  */
 extern unsigned	nhbins;
 
@@ -136,7 +136,7 @@ void	*tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     tcache_bin_t *tbin, szind_t binind, bool *tcache_success);
 void	tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
     szind_t binind, unsigned rem);
-void	tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
+void	tcache_bin_flush_huge(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
     unsigned rem, tcache_t *tcache);
 void	tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache,
     arena_t *oldarena, arena_t *newarena);
@@ -163,11 +163,11 @@ void	tcache_enabled_set(bool enabled);
 void	*tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success);
 void	*tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
     size_t size, szind_t ind, bool zero, bool slow_path);
-void	*tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
+void	*tcache_alloc_huge(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
     size_t size, szind_t ind, bool zero, bool slow_path);
 void	tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr,
     szind_t binind, bool slow_path);
-void	tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr,
+void	tcache_dalloc_huge(tsd_t *tsd, tcache_t *tcache, void *ptr,
     size_t size, bool slow_path);
 tcache_t	*tcaches_get(tsd_t *tsd, unsigned ind);
 #endif
@@ -336,7 +336,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
+tcache_alloc_huge(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
     szind_t binind, bool zero, bool slow_path)
 {
 	void *ret;
@@ -349,14 +349,14 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
 		/*
-		 * Only allocate one large object at a time, because it's quite
+		 * Only allocate one huge object at a time, because it's quite
 		 * expensive to create one and not use it.
 		 */
 		arena = arena_choose(tsd, arena);
 		if (unlikely(arena == NULL))
 			return (NULL);
 
-		ret = arena_malloc_large(tsd_tsdn(tsd), arena, binind, zero);
+		ret = huge_malloc(tsd_tsdn(tsd), arena, s2u(size), zero);
 		if (ret == NULL)
 			return (NULL);
 	} else {
@@ -369,14 +369,6 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 			assert(usize <= tcache_maxclass);
 		}
 
-		if (config_prof && usize == LARGE_MINCLASS) {
-			arena_chunk_t *chunk =(arena_chunk_t *)extent_addr_get(
-			    iealloc(tsd_tsdn(tsd), ret));
-			size_t pageind = (((uintptr_t)ret - (uintptr_t)chunk) >>
-			    LG_PAGE);
-			arena_mapbits_large_binind_set(chunk, pageind,
-			    BININD_INVALID);
-		}
 		if (likely(!zero)) {
 			if (slow_path && config_fill) {
 				if (unlikely(opt_junk_alloc)) {
@@ -424,26 +416,25 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size,
+tcache_dalloc_huge(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size,
     bool slow_path)
 {
 	szind_t binind;
 	tcache_bin_t *tbin;
 	tcache_bin_info_t *tbin_info;
 
-	assert((size & PAGE_MASK) == 0);
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr) > SMALL_MAXCLASS);
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_maxclass);
 
 	binind = size2index(size);
 
 	if (slow_path && config_fill && unlikely(opt_junk_free))
-		arena_dalloc_junk_large(ptr, size);
+		huge_dalloc_junk(ptr, size);
 
 	tbin = &tcache->tbins[binind];
 	tbin_info = &tcache_bin_info[binind];
 	if (unlikely(tbin->ncached == tbin_info->ncached_max)) {
-		tcache_bin_flush_large(tsd, tbin, binind,
+		tcache_bin_flush_huge(tsd, tbin, binind,
 		    (tbin_info->ncached_max >> 1), tcache);
 	}
 	assert(tbin->ncached < tbin_info->ncached_max);
diff --git a/src/arena.c b/src/arena.c
index 4ce55577..d9882a45 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -33,9 +33,6 @@ const arena_bin_info_t	arena_bin_info[NBINS] = {
 size_t		map_bias;
 size_t		map_misc_offset;
 size_t		arena_maxrun; /* Max run size for arenas. */
-size_t		large_maxclass; /* Max large size class. */
-unsigned	nlclasses; /* Number of large size classes. */
-unsigned	nhclasses; /* Number of huge size classes. */
 
 /******************************************************************************/
 /*
@@ -447,6 +444,7 @@ static void
 arena_nactive_sub(arena_t *arena, size_t sub_pages)
 {
 
+	assert(arena->nactive >= sub_pages);
 	if (config_stats) {
 		size_t cactive_sub = CHUNK_CEILING(arena->nactive << LG_PAGE) -
 		    CHUNK_CEILING((arena->nactive - sub_pages) << LG_PAGE);
@@ -573,15 +571,6 @@ arena_run_split_large(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 	    true, zero));
 }
 
-static bool
-arena_run_init_large(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
-    arena_run_t *run, size_t size, bool zero)
-{
-
-	return (arena_run_split_large_helper(tsdn, arena, extent, run, size,
-	    false, zero));
-}
-
 static bool
 arena_run_split_small(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
     arena_run_t *run, size_t size, szind_t binind)
@@ -835,58 +824,64 @@ arena_chunk_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent)
 static void
 arena_huge_malloc_stats_update(arena_t *arena, size_t usize)
 {
-	szind_t index = size2index(usize) - nlclasses - NBINS;
+	szind_t index = size2index(usize);
+	szind_t hindex = (index >= NBINS) ? index - NBINS : 0;
 
 	cassert(config_stats);
 
 	arena->stats.nmalloc_huge++;
 	arena->stats.allocated_huge += usize;
-	arena->stats.hstats[index].nmalloc++;
-	arena->stats.hstats[index].curhchunks++;
+	arena->stats.hstats[hindex].nmalloc++;
+	arena->stats.hstats[hindex].nrequests++;
+	arena->stats.hstats[hindex].curhchunks++;
 }
 
 static void
 arena_huge_malloc_stats_update_undo(arena_t *arena, size_t usize)
 {
-	szind_t index = size2index(usize) - nlclasses - NBINS;
+	szind_t index = size2index(usize);
+	szind_t hindex = (index >= NBINS) ? index - NBINS : 0;
 
 	cassert(config_stats);
 
 	arena->stats.nmalloc_huge--;
 	arena->stats.allocated_huge -= usize;
-	arena->stats.hstats[index].nmalloc--;
-	arena->stats.hstats[index].curhchunks--;
+	arena->stats.hstats[hindex].nmalloc--;
+	arena->stats.hstats[hindex].nrequests--;
+	arena->stats.hstats[hindex].curhchunks--;
 }
 
 static void
 arena_huge_dalloc_stats_update(arena_t *arena, size_t usize)
 {
-	szind_t index = size2index(usize) - nlclasses - NBINS;
+	szind_t index = size2index(usize);
+	szind_t hindex = (index >= NBINS) ? index - NBINS : 0;
 
 	cassert(config_stats);
 
 	arena->stats.ndalloc_huge++;
 	arena->stats.allocated_huge -= usize;
-	arena->stats.hstats[index].ndalloc++;
-	arena->stats.hstats[index].curhchunks--;
+	arena->stats.hstats[hindex].ndalloc++;
+	arena->stats.hstats[hindex].curhchunks--;
 }
 
 static void
 arena_huge_reset_stats_cancel(arena_t *arena, size_t usize)
 {
-	szind_t index = size2index(usize) - nlclasses - NBINS;
+	szind_t index = size2index(usize);
+	szind_t hindex = (index >= NBINS) ? index - NBINS : 0;
 
 	cassert(config_stats);
 
 	arena->stats.ndalloc_huge++;
-	arena->stats.hstats[index].ndalloc--;
+	arena->stats.hstats[hindex].ndalloc--;
 }
 
 static void
-arena_huge_ralloc_stats_update(arena_t *arena, size_t oldsize, size_t usize)
+arena_huge_ralloc_stats_update(arena_t *arena, size_t oldusize, size_t usize)
 {
 
-	arena_huge_dalloc_stats_update(arena, oldsize);
+	arena_huge_dalloc_stats_update(arena, oldusize);
 	arena_huge_malloc_stats_update(arena, usize);
 }
 
@@ -906,7 +901,7 @@ arena_chunk_alloc_huge_hard(tsdn_t *tsdn, arena_t *arena,
 			arena_huge_malloc_stats_update_undo(arena, usize);
 			arena->stats.mapped -= usize;
 		}
-		arena_nactive_sub(arena, usize >> LG_PAGE);
+		arena_nactive_sub(arena, (usize + large_pad) >> LG_PAGE);
 		malloc_mutex_unlock(tsdn, &arena->lock);
 	}
 
@@ -927,7 +922,7 @@ arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize,
 		arena_huge_malloc_stats_update(arena, usize);
 		arena->stats.mapped += usize;
 	}
-	arena_nactive_add(arena, usize >> LG_PAGE);
+	arena_nactive_add(arena, (usize + large_pad) >> LG_PAGE);
 
 	extent = arena_chunk_cache_alloc_locked(tsdn, arena, &chunk_hooks, NULL,
 	    usize, large_pad, alignment, zero, false);
@@ -941,34 +936,35 @@ arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize,
 }
 
 void
-arena_chunk_dalloc_huge(tsdn_t *tsdn, arena_t *arena, extent_t *extent)
+arena_chunk_dalloc_huge(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+    bool locked)
 {
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 
-	malloc_mutex_lock(tsdn, &arena->lock);
+	if (!locked)
+		malloc_mutex_lock(tsdn, &arena->lock);
 	if (config_stats) {
-		arena_huge_dalloc_stats_update(arena, extent_size_get(extent));
+		arena_huge_dalloc_stats_update(arena, extent_usize_get(extent));
 		arena->stats.mapped -= extent_size_get(extent);
 	}
 	arena_nactive_sub(arena, extent_size_get(extent) >> LG_PAGE);
 
 	arena_chunk_cache_dalloc_locked(tsdn, arena, &chunk_hooks, extent);
-	malloc_mutex_unlock(tsdn, &arena->lock);
+	if (!locked)
+		malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
 void
 arena_chunk_ralloc_huge_shrink(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
-    size_t oldsize)
+    size_t oldusize)
 {
-	size_t usize = extent_size_get(extent);
-	size_t udiff = oldsize - usize;
-	size_t cdiff = CHUNK_CEILING(oldsize) - CHUNK_CEILING(usize);
+	size_t usize = extent_usize_get(extent);
+	size_t udiff = oldusize - usize;
 
 	malloc_mutex_lock(tsdn, &arena->lock);
 	if (config_stats) {
-		arena_huge_ralloc_stats_update(arena, oldsize, usize);
-		if (cdiff != 0)
-			arena->stats.mapped -= cdiff;
+		arena_huge_ralloc_stats_update(arena, oldusize, usize);
+		arena->stats.mapped -= udiff;
 	}
 	arena_nactive_sub(arena, udiff >> LG_PAGE);
 	malloc_mutex_unlock(tsdn, &arena->lock);
@@ -976,16 +972,15 @@ arena_chunk_ralloc_huge_shrink(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 
 void
 arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
-    size_t oldsize)
+    size_t oldusize)
 {
-	size_t usize = extent_size_get(extent);
-	size_t cdiff = CHUNK_CEILING(usize) - CHUNK_CEILING(oldsize);
-	size_t udiff = usize - oldsize;
+	size_t usize = extent_usize_get(extent);
+	size_t udiff = usize - oldusize;
 
 	malloc_mutex_lock(tsdn, &arena->lock);
 	if (config_stats) {
-		arena_huge_ralloc_stats_update(arena, oldsize, usize);
-		arena->stats.mapped += cdiff;
+		arena_huge_ralloc_stats_update(arena, oldusize, usize);
+		arena->stats.mapped += udiff;
 	}
 	arena_nactive_add(arena, udiff >> LG_PAGE);
 	malloc_mutex_unlock(tsdn, &arena->lock);
@@ -1003,7 +998,7 @@ arena_run_first_best_fit(arena_t *arena, size_t size)
 
 	pind = psz2ind(run_quantize_ceil(size));
 
-	for (i = pind; pind2sz(i) <= large_maxclass; i++) {
+	for (i = pind; pind2sz(i) <= arena_maxrun; i++) {
 		arena_chunk_map_misc_t *miscelm = arena_run_heap_first(
 		    &arena->runs_avail[i]);
 		if (miscelm != NULL)
@@ -1013,54 +1008,6 @@ arena_run_first_best_fit(arena_t *arena, size_t size)
 	return (NULL);
 }
 
-static arena_run_t *
-arena_run_alloc_large_helper(tsdn_t *tsdn, arena_t *arena, size_t size,
-    bool zero)
-{
-	arena_run_t *run = arena_run_first_best_fit(arena, s2u(size));
-	if (run != NULL) {
-		if (arena_run_split_large(tsdn, arena, iealloc(tsdn, run), run,
-		    size, zero))
-			run = NULL;
-	}
-	return (run);
-}
-
-static arena_run_t *
-arena_run_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t size, bool zero)
-{
-	arena_run_t *run;
-	extent_t *extent;
-
-	assert(size <= arena_maxrun);
-	assert(size == PAGE_CEILING(size));
-
-	/* Search the arena's chunks for the lowest best fit. */
-	run = arena_run_alloc_large_helper(tsdn, arena, size, zero);
-	if (run != NULL)
-		return (run);
-
-	/*
-	 * No usable runs.  Create a new chunk from which to allocate the run.
-	 */
-	extent = arena_chunk_alloc(tsdn, arena);
-	if (extent != NULL) {
-		run = &arena_miscelm_get_mutable((arena_chunk_t *)
-		    extent_base_get(extent), map_bias)->run;
-		if (arena_run_split_large(tsdn, arena, iealloc(tsdn, run), run,
-		    size, zero))
-			run = NULL;
-		return (run);
-	}
-
-	/*
-	 * arena_chunk_alloc() failed, but another thread may have made
-	 * sufficient memory available while this one dropped arena->lock in
-	 * arena_chunk_alloc(), so search one more time.
-	 */
-	return (arena_run_alloc_large_helper(tsdn, arena, size, zero));
-}
-
 static arena_run_t *
 arena_run_alloc_small_helper(tsdn_t *tsdn, arena_t *arena, size_t size,
     szind_t binind)
@@ -1700,8 +1647,8 @@ arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, size_t ndirty_limit)
 	    arena->lg_dirty_mult) < arena->ndirty || ndirty_limit == 0);
 
 	qr_new(&purge_runs_sentinel, rd_link);
-	extent_init(&purge_chunks_sentinel, arena, NULL, 0, false, false, false,
-	    false, false);
+	extent_init(&purge_chunks_sentinel, arena, NULL, 0, 0, false, false,
+	    false, false, false);
 
 	npurge = arena_stash_dirty(tsdn, arena, &chunk_hooks, ndirty_limit,
 	    &purge_runs_sentinel, &purge_chunks_sentinel);
@@ -1732,47 +1679,6 @@ arena_purge(tsdn_t *tsdn, arena_t *arena, bool all)
 	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
-static void
-arena_achunk_prof_reset(tsd_t *tsd, arena_t *arena, extent_t *extent)
-{
-	arena_chunk_t *chunk = (arena_chunk_t *)extent_base_get(extent);
-	size_t pageind, npages;
-
-	cassert(config_prof);
-	assert(opt_prof);
-
-	/*
-	 * Iterate over the allocated runs and remove profiled allocations from
-	 * the sample set.
-	 */
-	for (pageind = map_bias; pageind < chunk_npages; pageind += npages) {
-		if (arena_mapbits_allocated_get(chunk, pageind) != 0) {
-			if (arena_mapbits_large_get(chunk, pageind) != 0) {
-				void *ptr = (void *)((uintptr_t)chunk + (pageind
-				    << LG_PAGE));
-				size_t usize = isalloc(tsd_tsdn(tsd), extent,
-				    ptr, config_prof);
-
-				prof_free(tsd, extent, ptr, usize);
-				npages = arena_mapbits_large_size_get(chunk,
-				    pageind) >> LG_PAGE;
-			} else {
-				/* Skip small run. */
-				size_t binind = arena_mapbits_binind_get(chunk,
-				    pageind);
-				const arena_bin_info_t *bin_info =
-				    &arena_bin_info[binind];
-				npages = bin_info->run_size >> LG_PAGE;
-			}
-		} else {
-			/* Skip unallocated run. */
-			npages = arena_mapbits_unallocated_size_get(chunk,
-			    pageind) >> LG_PAGE;
-		}
-		assert(pageind + npages <= chunk_npages);
-	}
-}
-
 void
 arena_reset(tsd_t *tsd, arena_t *arena)
 {
@@ -1793,19 +1699,6 @@ arena_reset(tsd_t *tsd, arena_t *arena)
 	 *   stats refreshes would impose an inconvenient burden.
 	 */
 
-	/* Remove large allocations from prof sample set. */
-	if (config_prof && opt_prof) {
-		ql_foreach(extent, &arena->achunks, ql_link) {
-			arena_achunk_prof_reset(tsd, arena, extent);
-		}
-	}
-
-	/* Reset curruns for large size classes. */
-	if (config_stats) {
-		for (i = 0; i < nlclasses; i++)
-			arena->stats.lstats[i].curruns = 0;
-	}
-
 	/* Huge allocations. */
 	malloc_mutex_lock(tsd_tsdn(tsd), &arena->huge_mtx);
 	for (extent = ql_last(&arena->huge, ql_link); extent != NULL; extent =
@@ -1814,10 +1707,8 @@ arena_reset(tsd_t *tsd, arena_t *arena)
 		size_t usize;
 
 		malloc_mutex_unlock(tsd_tsdn(tsd), &arena->huge_mtx);
-		if (config_stats || (config_prof && opt_prof)) {
-			usize = isalloc(tsd_tsdn(tsd), extent, ptr,
-			    config_prof);
-		}
+		if (config_stats || (config_prof && opt_prof))
+			usize = isalloc(tsd_tsdn(tsd), extent, ptr);
 		/* Remove huge allocation from prof sample set. */
 		if (config_prof && opt_prof)
 			prof_free(tsd, extent, ptr, usize);
@@ -2069,93 +1960,6 @@ arena_run_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 		arena_maybe_purge(tsdn, arena);
 }
 
-static void
-arena_run_trim_head(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
-    extent_t *extent, arena_run_t *run, size_t oldsize, size_t newsize)
-{
-	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(extent, run);
-	size_t pageind = arena_miscelm_to_pageind(extent, miscelm);
-	size_t head_npages = (oldsize - newsize) >> LG_PAGE;
-	size_t flag_dirty = arena_mapbits_dirty_get(chunk, pageind);
-	size_t flag_decommitted = arena_mapbits_decommitted_get(chunk, pageind);
-	size_t flag_unzeroed_mask = (flag_dirty | flag_decommitted) == 0 ?
-	    CHUNK_MAP_UNZEROED : 0;
-
-	assert(oldsize > newsize);
-
-	/*
-	 * Update the chunk map so that arena_run_dalloc() can treat the
-	 * leading run as separately allocated.  Set the last element of each
-	 * run first, in case of single-page runs.
-	 */
-	assert(arena_mapbits_large_size_get(chunk, pageind) == oldsize);
-	arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty |
-	    (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk,
-	    pageind+head_npages-1)));
-	arena_mapbits_large_set(chunk, pageind, oldsize-newsize, flag_dirty |
-	    (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk, pageind)));
-
-	if (config_debug) {
-		UNUSED size_t tail_npages = newsize >> LG_PAGE;
-		assert(arena_mapbits_large_size_get(chunk,
-		    pageind+head_npages+tail_npages-1) == 0);
-		assert(arena_mapbits_dirty_get(chunk,
-		    pageind+head_npages+tail_npages-1) == flag_dirty);
-	}
-	arena_mapbits_large_set(chunk, pageind+head_npages, newsize,
-	    flag_dirty | (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk,
-	    pageind+head_npages)));
-
-	arena_run_dalloc(tsdn, arena, extent, run, false, false,
-	    (flag_decommitted != 0));
-}
-
-static void
-arena_run_trim_tail(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
-    extent_t *extent, arena_run_t *run, size_t oldsize, size_t newsize,
-    bool dirty)
-{
-	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(extent, run);
-	size_t pageind = arena_miscelm_to_pageind(extent, miscelm);
-	size_t head_npages = newsize >> LG_PAGE;
-	size_t flag_dirty = arena_mapbits_dirty_get(chunk, pageind);
-	size_t flag_decommitted = arena_mapbits_decommitted_get(chunk, pageind);
-	size_t flag_unzeroed_mask = (flag_dirty | flag_decommitted) == 0 ?
-	    CHUNK_MAP_UNZEROED : 0;
-	arena_chunk_map_misc_t *tail_miscelm;
-	arena_run_t *tail_run;
-
-	assert(oldsize > newsize);
-
-	/*
-	 * Update the chunk map so that arena_run_dalloc() can treat the
-	 * trailing run as separately allocated.  Set the last element of each
-	 * run first, in case of single-page runs.
-	 */
-	assert(arena_mapbits_large_size_get(chunk, pageind) == oldsize);
-	arena_mapbits_large_set(chunk, pageind+head_npages-1, 0, flag_dirty |
-	    (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk,
-	    pageind+head_npages-1)));
-	arena_mapbits_large_set(chunk, pageind, newsize, flag_dirty |
-	    (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk, pageind)));
-
-	if (config_debug) {
-		UNUSED size_t tail_npages = (oldsize - newsize) >> LG_PAGE;
-		assert(arena_mapbits_large_size_get(chunk,
-		    pageind+head_npages+tail_npages-1) == 0);
-		assert(arena_mapbits_dirty_get(chunk,
-		    pageind+head_npages+tail_npages-1) == flag_dirty);
-	}
-	arena_mapbits_large_set(chunk, pageind+head_npages, oldsize-newsize,
-	    flag_dirty | (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk,
-	    pageind+head_npages)));
-
-	tail_miscelm = arena_miscelm_get_mutable(chunk, pageind + head_npages);
-	tail_run = &tail_miscelm->run;
-	arena_run_dalloc(tsdn, arena, extent, tail_run, dirty, false,
-	    (flag_decommitted != 0));
-}
-
 static void
 arena_bin_runs_insert(arena_bin_t *bin, extent_t *extent, arena_run_t *run)
 {
@@ -2390,7 +2194,7 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero)
 		bin->stats.curregs++;
 	}
 	malloc_mutex_unlock(tsdn, &bin->lock);
-	if (config_prof && !isthreaded && arena_prof_accum(tsdn, arena, usize))
+	if (config_prof && arena_prof_accum(tsdn, arena, usize))
 		prof_idump(tsdn);
 
 	if (!zero) {
@@ -2413,71 +2217,6 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero)
 	return (ret);
 }
 
-void *
-arena_malloc_large(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero)
-{
-	void *ret;
-	size_t usize;
-	uintptr_t random_offset;
-	arena_run_t *run;
-	extent_t *extent;
-	arena_chunk_map_misc_t *miscelm;
-	UNUSED bool idump JEMALLOC_CC_SILENCE_INIT(false);
-
-	/* Large allocation. */
-	usize = index2size(binind);
-	if (config_cache_oblivious) {
-		uint64_t r;
-
-		/*
-		 * Compute a uniformly distributed offset within the first page
-		 * that is a multiple of the cacheline size, e.g. [0 .. 63) * 64
-		 * for 4 KiB pages and 64-byte cachelines.
-		 */
-		r = prng_lg_range(&arena->offset_state, LG_PAGE - LG_CACHELINE,
-		    true);
-		random_offset = ((uintptr_t)r) << LG_CACHELINE;
-	} else
-		random_offset = 0;
-	malloc_mutex_lock(tsdn, &arena->lock);
-	run = arena_run_alloc_large(tsdn, arena, usize + large_pad, zero);
-	if (run == NULL) {
-		malloc_mutex_unlock(tsdn, &arena->lock);
-		return (NULL);
-	}
-	extent = iealloc(tsdn, run);
-	miscelm = arena_run_to_miscelm(extent, run);
-	ret = (void *)((uintptr_t)arena_miscelm_to_rpages(extent, miscelm) +
-	    random_offset);
-	if (config_stats) {
-		szind_t index = binind - NBINS;
-
-		arena->stats.nmalloc_large++;
-		arena->stats.nrequests_large++;
-		arena->stats.allocated_large += usize;
-		arena->stats.lstats[index].nmalloc++;
-		arena->stats.lstats[index].nrequests++;
-		arena->stats.lstats[index].curruns++;
-	}
-	if (config_prof)
-		idump = arena_prof_accum_locked(arena, usize);
-	malloc_mutex_unlock(tsdn, &arena->lock);
-	if (config_prof && idump)
-		prof_idump(tsdn);
-
-	if (!zero) {
-		if (config_fill) {
-			if (unlikely(opt_junk_alloc))
-				memset(ret, JEMALLOC_ALLOC_JUNK, usize);
-			else if (unlikely(opt_zero))
-				memset(ret, 0, usize);
-		}
-	}
-
-	arena_decay_tick(tsdn, arena);
-	return (ret);
-}
-
 void *
 arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
     bool zero)
@@ -2492,106 +2231,9 @@ arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
 
 	if (likely(size <= SMALL_MAXCLASS))
 		return (arena_malloc_small(tsdn, arena, ind, zero));
-	if (likely(size <= large_maxclass))
-		return (arena_malloc_large(tsdn, arena, ind, zero));
 	return (huge_malloc(tsdn, arena, index2size(ind), zero));
 }
 
-/* Only handles large allocations that require more than page alignment. */
-static void *
-arena_palloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
-    bool zero)
-{
-	void *ret;
-	size_t alloc_size, leadsize, trailsize;
-	arena_run_t *run;
-	extent_t *extent;
-	arena_chunk_t *chunk;
-	arena_chunk_map_misc_t *miscelm;
-	void *rpages;
-
-	assert(!tsdn_null(tsdn) || arena != NULL);
-	assert(usize == PAGE_CEILING(usize));
-
-	if (likely(!tsdn_null(tsdn)))
-		arena = arena_choose(tsdn_tsd(tsdn), arena);
-	if (unlikely(arena == NULL))
-		return (NULL);
-
-	alignment = PAGE_CEILING(alignment);
-	alloc_size = usize + large_pad + alignment;
-
-	malloc_mutex_lock(tsdn, &arena->lock);
-	run = arena_run_alloc_large(tsdn, arena, alloc_size, false);
-	if (run == NULL) {
-		malloc_mutex_unlock(tsdn, &arena->lock);
-		return (NULL);
-	}
-	extent = iealloc(tsdn, run);
-	chunk = (arena_chunk_t *)extent_base_get(extent);
-	miscelm = arena_run_to_miscelm(extent, run);
-	rpages = arena_miscelm_to_rpages(extent, miscelm);
-
-	leadsize = ALIGNMENT_CEILING((uintptr_t)rpages, alignment) -
-	    (uintptr_t)rpages;
-	assert(alloc_size >= leadsize + usize);
-	trailsize = alloc_size - leadsize - usize - large_pad;
-	if (leadsize != 0) {
-		arena_chunk_map_misc_t *head_miscelm = miscelm;
-		arena_run_t *head_run = run;
-		extent_t *head_extent = extent;
-
-		miscelm = arena_miscelm_get_mutable(chunk,
-		    arena_miscelm_to_pageind(head_extent, head_miscelm) +
-		    (leadsize >> LG_PAGE));
-		run = &miscelm->run;
-		extent = iealloc(tsdn, run);
-
-		arena_run_trim_head(tsdn, arena, chunk, head_extent, head_run,
-		    alloc_size, alloc_size - leadsize);
-	}
-	if (trailsize != 0) {
-		arena_run_trim_tail(tsdn, arena, chunk, extent, run, usize +
-		    large_pad + trailsize, usize + large_pad, false);
-	}
-	if (arena_run_init_large(tsdn, arena, extent, run, usize + large_pad,
-	    zero)) {
-		size_t run_ind = arena_miscelm_to_pageind(extent,
-		    arena_run_to_miscelm(extent, run));
-		bool dirty = (arena_mapbits_dirty_get(chunk, run_ind) != 0);
-		bool decommitted = (arena_mapbits_decommitted_get(chunk,
-		    run_ind) != 0);
-
-		assert(decommitted); /* Cause of OOM. */
-		arena_run_dalloc(tsdn, arena, extent, run, dirty, false,
-		    decommitted);
-		malloc_mutex_unlock(tsdn, &arena->lock);
-		return (NULL);
-	}
-	ret = arena_miscelm_to_rpages(extent, miscelm);
-
-	if (config_stats) {
-		szind_t index = size2index(usize) - NBINS;
-
-		arena->stats.nmalloc_large++;
-		arena->stats.nrequests_large++;
-		arena->stats.allocated_large += usize;
-		arena->stats.lstats[index].nmalloc++;
-		arena->stats.lstats[index].nrequests++;
-		arena->stats.lstats[index].curruns++;
-	}
-	malloc_mutex_unlock(tsdn, &arena->lock);
-
-	if (config_fill && !zero) {
-		if (unlikely(opt_junk_alloc))
-			memset(ret, JEMALLOC_ALLOC_JUNK, usize);
-		else if (unlikely(opt_zero))
-			memset(ret, 0, usize);
-	}
-	arena_decay_tick(tsdn, arena);
-	return (ret);
-}
-
 void *
 arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
     bool zero, tcache_t *tcache)
@@ -2603,22 +2245,8 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 		/* Small; alignment doesn't require special run placement. */
 		ret = arena_malloc(tsdn, arena, usize, size2index(usize), zero,
 		    tcache, true);
-	} else if (usize <= large_maxclass && alignment <= PAGE) {
-		/*
-		 * Large; alignment doesn't require special run placement.
-		 * However, the cached pointer may be at a random offset from
-		 * the base of the run, so do some bit manipulation to retrieve
-		 * the base.
-		 */
-		ret = arena_malloc(tsdn, arena, usize, size2index(usize), zero,
-		    tcache, true);
-		if (config_cache_oblivious)
-			ret = (void *)((uintptr_t)ret & ~PAGE_MASK);
 	} else {
-		if (likely(usize <= large_maxclass)) {
-			ret = arena_palloc_large(tsdn, arena, usize, alignment,
-			    zero);
-		} else if (likely(alignment <= CACHELINE))
+		if (likely(alignment <= CACHELINE))
 			ret = huge_malloc(tsdn, arena, usize, zero);
 		else
 			ret = huge_palloc(tsdn, arena, usize, alignment, zero);
@@ -2627,27 +2255,49 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 }
 
 void
-arena_prof_promoted(tsdn_t *tsdn, const extent_t *extent, const void *ptr,
-    size_t size)
+arena_prof_promote(tsdn_t *tsdn, extent_t *extent, const void *ptr,
+    size_t usize)
 {
-	arena_chunk_t *chunk;
-	size_t pageind;
-	szind_t binind;
 
 	cassert(config_prof);
 	assert(ptr != NULL);
-	assert(isalloc(tsdn, extent, ptr, false) == LARGE_MINCLASS);
-	assert(isalloc(tsdn, extent, ptr, true) == LARGE_MINCLASS);
-	assert(size <= SMALL_MAXCLASS);
+	assert(isalloc(tsdn, extent, ptr) == LARGE_MINCLASS);
+	assert(usize <= SMALL_MAXCLASS);
 
-	chunk = (arena_chunk_t *)extent_base_get(extent);
-	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
-	binind = size2index(size);
-	assert(binind < NBINS);
-	arena_mapbits_large_binind_set(chunk, pageind, binind);
+	extent_usize_set(extent, usize);
 
-	assert(isalloc(tsdn, extent, ptr, false) == LARGE_MINCLASS);
-	assert(isalloc(tsdn, extent, ptr, true) == size);
+	assert(isalloc(tsdn, extent, ptr) == usize);
+}
+
+static size_t
+arena_prof_demote(tsdn_t *tsdn, extent_t *extent, const void *ptr)
+{
+
+	cassert(config_prof);
+	assert(ptr != NULL);
+
+	extent_usize_set(extent, LARGE_MINCLASS);
+
+	assert(isalloc(tsdn, extent, ptr) == LARGE_MINCLASS);
+
+	return (LARGE_MINCLASS);
+}
+
+void
+arena_dalloc_promoted(tsdn_t *tsdn, extent_t *extent, void *ptr,
+    tcache_t *tcache, bool slow_path)
+{
+	size_t usize;
+
+	cassert(config_prof);
+	assert(opt_prof);
+
+	usize = arena_prof_demote(tsdn, extent, ptr);
+	if (usize <= tcache_maxclass) {
+		tcache_dalloc_huge(tsdn_tsd(tsdn), tcache, ptr, usize,
+		    slow_path);
+	} else
+		huge_dalloc(tsdn, extent);
 }
 
 static void
@@ -2792,274 +2442,6 @@ arena_dalloc_small(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
 	arena_decay_tick(tsdn, arena);
 }
 
-#ifdef JEMALLOC_JET
-#undef arena_dalloc_junk_large
-#define	arena_dalloc_junk_large JEMALLOC_N(n_arena_dalloc_junk_large)
-#endif
-void
-arena_dalloc_junk_large(void *ptr, size_t usize)
-{
-
-	if (config_fill && unlikely(opt_junk_free))
-		memset(ptr, JEMALLOC_FREE_JUNK, usize);
-}
-#ifdef JEMALLOC_JET
-#undef arena_dalloc_junk_large
-#define	arena_dalloc_junk_large JEMALLOC_N(arena_dalloc_junk_large)
-arena_dalloc_junk_large_t *arena_dalloc_junk_large =
-    JEMALLOC_N(n_arena_dalloc_junk_large);
-#endif
-
-static void
-arena_dalloc_large_locked_impl(tsdn_t *tsdn, arena_t *arena,
-    arena_chunk_t *chunk, extent_t *extent, void *ptr, bool junked)
-{
-	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
-	arena_chunk_map_misc_t *miscelm = arena_miscelm_get_mutable(chunk,
-	    pageind);
-	arena_run_t *run = &miscelm->run;
-
-	if (config_fill || config_stats) {
-		size_t usize = arena_mapbits_large_size_get(chunk, pageind) -
-		    large_pad;
-
-		if (!junked)
-			arena_dalloc_junk_large(ptr, usize);
-		if (config_stats) {
-			szind_t index = size2index(usize) - NBINS;
-
-			arena->stats.ndalloc_large++;
-			arena->stats.allocated_large -= usize;
-			arena->stats.lstats[index].ndalloc++;
-			arena->stats.lstats[index].curruns--;
-		}
-	}
-
-	arena_run_dalloc(tsdn, arena, extent, run, true, false, false);
-}
-
-void
-arena_dalloc_large_junked_locked(tsdn_t *tsdn, arena_t *arena,
-    arena_chunk_t *chunk, extent_t *extent, void *ptr)
-{
-
-	arena_dalloc_large_locked_impl(tsdn, arena, chunk, extent, ptr, true);
-}
-
-void
-arena_dalloc_large(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
-    extent_t *extent, void *ptr)
-{
-
-	malloc_mutex_lock(tsdn, &arena->lock);
-	arena_dalloc_large_locked_impl(tsdn, arena, chunk, extent, ptr, false);
-	malloc_mutex_unlock(tsdn, &arena->lock);
-	arena_decay_tick(tsdn, arena);
-}
-
-static void
-arena_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
-    extent_t *extent, void *ptr, size_t oldsize, size_t size)
-{
-	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
-	arena_chunk_map_misc_t *miscelm = arena_miscelm_get_mutable(chunk,
-	    pageind);
-	arena_run_t *run = &miscelm->run;
-
-	assert(size < oldsize);
-
-	/*
-	 * Shrink the run, and make trailing pages available for other
-	 * allocations.
-	 */
-	malloc_mutex_lock(tsdn, &arena->lock);
-	arena_run_trim_tail(tsdn, arena, chunk, extent, run, oldsize +
-	    large_pad, size + large_pad, true);
-	if (config_stats) {
-		szind_t oldindex = size2index(oldsize) - NBINS;
-		szind_t index = size2index(size) - NBINS;
-
-		arena->stats.ndalloc_large++;
-		arena->stats.allocated_large -= oldsize;
-		arena->stats.lstats[oldindex].ndalloc++;
-		arena->stats.lstats[oldindex].curruns--;
-
-		arena->stats.nmalloc_large++;
-		arena->stats.nrequests_large++;
-		arena->stats.allocated_large += size;
-		arena->stats.lstats[index].nmalloc++;
-		arena->stats.lstats[index].nrequests++;
-		arena->stats.lstats[index].curruns++;
-	}
-	malloc_mutex_unlock(tsdn, &arena->lock);
-}
-
-static bool
-arena_ralloc_large_grow(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
-    void *ptr, size_t oldsize, size_t usize_min, size_t usize_max, bool zero)
-{
-	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
-	size_t npages = (oldsize + large_pad) >> LG_PAGE;
-	size_t followsize;
-
-	assert(oldsize == arena_mapbits_large_size_get(chunk, pageind) -
-	    large_pad);
-
-	/* Try to extend the run. */
-	malloc_mutex_lock(tsdn, &arena->lock);
-	if (pageind+npages >= chunk_npages || arena_mapbits_allocated_get(chunk,
-	    pageind+npages) != 0)
-		goto label_fail;
-	followsize = arena_mapbits_unallocated_size_get(chunk, pageind+npages);
-	if (oldsize + followsize >= usize_min) {
-		/*
-		 * The next run is available and sufficiently large.  Split the
-		 * following run, then merge the first part with the existing
-		 * allocation.
-		 */
-		arena_run_t *run;
-		size_t usize, splitsize, size, flag_dirty, flag_unzeroed_mask;
-
-		usize = usize_max;
-		while (oldsize + followsize < usize)
-			usize = index2size(size2index(usize)-1);
-		assert(usize >= usize_min);
-		assert(usize >= oldsize);
-		splitsize = usize - oldsize;
-		if (splitsize == 0)
-			goto label_fail;
-
-		run = &arena_miscelm_get_mutable(chunk, pageind+npages)->run;
-		if (arena_run_split_large(tsdn, arena, iealloc(tsdn, run), run,
-		    splitsize, zero))
-			goto label_fail;
-
-		if (config_cache_oblivious && zero) {
-			/*
-			 * Zero the trailing bytes of the original allocation's
-			 * last page, since they are in an indeterminate state.
-			 * There will always be trailing bytes, because ptr's
-			 * offset from the beginning of the run is a multiple of
-			 * CACHELINE in [0 .. PAGE).
-			 */
-			void *zbase = (void *)((uintptr_t)ptr + oldsize);
-			void *zpast = PAGE_ADDR2BASE((void *)((uintptr_t)zbase +
-			    PAGE));
-			size_t nzero = (uintptr_t)zpast - (uintptr_t)zbase;
-			assert(nzero > 0);
-			memset(zbase, 0, nzero);
-		}
-
-		size = oldsize + splitsize;
-		npages = (size + large_pad) >> LG_PAGE;
-
-		/*
-		 * Mark the extended run as dirty if either portion of the run
-		 * was dirty before allocation.  This is rather pedantic,
-		 * because there's not actually any sequence of events that
-		 * could cause the resulting run to be passed to
-		 * arena_run_dalloc() with the dirty argument set to false
-		 * (which is when dirty flag consistency would really matter).
-		 */
-		flag_dirty = arena_mapbits_dirty_get(chunk, pageind) |
-		    arena_mapbits_dirty_get(chunk, pageind+npages-1);
-		flag_unzeroed_mask = flag_dirty == 0 ? CHUNK_MAP_UNZEROED : 0;
-		arena_mapbits_large_set(chunk, pageind, size + large_pad,
-		    flag_dirty | (flag_unzeroed_mask &
-		    arena_mapbits_unzeroed_get(chunk, pageind)));
-		arena_mapbits_large_set(chunk, pageind+npages-1, 0, flag_dirty |
-		    (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk,
-		    pageind+npages-1)));
-
-		if (config_stats) {
-			szind_t oldindex = size2index(oldsize) - NBINS;
-			szind_t index = size2index(size) - NBINS;
-
-			arena->stats.ndalloc_large++;
-			arena->stats.allocated_large -= oldsize;
-			arena->stats.lstats[oldindex].ndalloc++;
-			arena->stats.lstats[oldindex].curruns--;
-
-			arena->stats.nmalloc_large++;
-			arena->stats.nrequests_large++;
-			arena->stats.allocated_large += size;
-			arena->stats.lstats[index].nmalloc++;
-			arena->stats.lstats[index].nrequests++;
-			arena->stats.lstats[index].curruns++;
-		}
-		malloc_mutex_unlock(tsdn, &arena->lock);
-		return (false);
-	}
-label_fail:
-	malloc_mutex_unlock(tsdn, &arena->lock);
-	return (true);
-}
-
-#ifdef JEMALLOC_JET
-#undef arena_ralloc_junk_large
-#define	arena_ralloc_junk_large JEMALLOC_N(n_arena_ralloc_junk_large)
-#endif
-static void
-arena_ralloc_junk_large(void *ptr, size_t old_usize, size_t usize)
-{
-
-	if (config_fill && unlikely(opt_junk_free)) {
-		memset((void *)((uintptr_t)ptr + usize), JEMALLOC_FREE_JUNK,
-		    old_usize - usize);
-	}
-}
-#ifdef JEMALLOC_JET
-#undef arena_ralloc_junk_large
-#define	arena_ralloc_junk_large JEMALLOC_N(arena_ralloc_junk_large)
-arena_ralloc_junk_large_t *arena_ralloc_junk_large =
-    JEMALLOC_N(n_arena_ralloc_junk_large);
-#endif
-
-/*
- * Try to resize a large allocation, in order to avoid copying.  This will
- * always fail if growing an object, and the following run is already in use.
- */
-static bool
-arena_ralloc_large(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize,
-    size_t usize_min, size_t usize_max, bool zero)
-{
-	arena_chunk_t *chunk;
-	arena_t *arena;
-
-	if (oldsize == usize_max) {
-		/* Current size class is compatible and maximal. */
-		return (false);
-	}
-
-	chunk = (arena_chunk_t *)extent_base_get(extent);
-	arena = extent_arena_get(extent);
-
-	if (oldsize < usize_max) {
-		bool ret = arena_ralloc_large_grow(tsdn, arena, chunk, ptr,
-		    oldsize, usize_min, usize_max, zero);
-		if (config_fill && !ret && !zero) {
-			if (unlikely(opt_junk_alloc)) {
-				memset((void *)((uintptr_t)ptr + oldsize),
-				    JEMALLOC_ALLOC_JUNK,
-				    isalloc(tsdn, extent, ptr, config_prof) -
-				    oldsize);
-			} else if (unlikely(opt_zero)) {
-				memset((void *)((uintptr_t)ptr + oldsize), 0,
-				    isalloc(tsdn, extent, ptr, config_prof) -
-				    oldsize);
-			}
-		}
-		return (ret);
-	}
-
-	assert(oldsize > usize_max);
-	/* Fill before shrinking in order avoid a race. */
-	arena_ralloc_junk_large(ptr, oldsize, usize_max);
-	arena_ralloc_large_shrink(tsdn, arena, chunk, extent, ptr, oldsize,
-	    usize_max);
-	return (false);
-}
-
 bool
 arena_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize,
     size_t size, size_t extra, bool zero)
@@ -3074,29 +2456,21 @@ arena_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize,
 
 	usize_min = s2u(size);
 	usize_max = s2u(size + extra);
-	if (likely(oldsize <= large_maxclass && usize_min <= large_maxclass)) {
+	if (likely(oldsize <= SMALL_MAXCLASS && usize_min <= SMALL_MAXCLASS)) {
 		/*
 		 * Avoid moving the allocation if the size class can be left the
 		 * same.
 		 */
-		if (oldsize <= SMALL_MAXCLASS) {
-			assert(arena_bin_info[size2index(oldsize)].reg_size ==
-			    oldsize);
-			if ((usize_max > SMALL_MAXCLASS ||
-			    size2index(usize_max) != size2index(oldsize)) &&
-			    (size > oldsize || usize_max < oldsize))
-				return (true);
-		} else {
-			if (usize_max <= SMALL_MAXCLASS)
-				return (true);
-			if (arena_ralloc_large(tsdn, extent, ptr, oldsize,
-			    usize_min, usize_max, zero))
-				return (true);
-		}
+		assert(arena_bin_info[size2index(oldsize)].reg_size ==
+		    oldsize);
+		if ((usize_max > SMALL_MAXCLASS || size2index(usize_max) !=
+		    size2index(oldsize)) && (size > oldsize || usize_max <
+		    oldsize))
+			return (true);
 
 		arena_decay_tick(tsdn, extent_arena_get(extent));
 		return (false);
-	} else if (oldsize >= chunksize && usize_max >= chunksize) {
+	} else if (oldsize >= LARGE_MINCLASS && usize_max >= LARGE_MINCLASS) {
 		return (huge_ralloc_no_move(tsdn, extent, usize_min, usize_max,
 		    zero));
 	}
@@ -3129,14 +2503,14 @@ arena_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr,
 	if (unlikely(usize == 0 || size > HUGE_MAXCLASS))
 		return (NULL);
 
-	if (likely(usize <= large_maxclass)) {
+	if (likely(usize <= SMALL_MAXCLASS)) {
 		/* Try to avoid moving the allocation. */
 		if (!arena_ralloc_no_move(tsdn, extent, ptr, oldsize, usize, 0,
 		    zero))
 			return (ptr);
 	}
 
-	if (oldsize >= chunksize && usize >= chunksize) {
+	if (oldsize >= LARGE_MINCLASS && usize >= LARGE_MINCLASS) {
 		return (huge_ralloc(tsdn, arena, extent, usize, alignment, zero,
 		    tcache));
 	}
@@ -3252,8 +2626,7 @@ void
 arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time,
     size_t *nactive, size_t *ndirty, arena_stats_t *astats,
-    malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats,
-    malloc_huge_stats_t *hstats)
+    malloc_bin_stats_t *bstats, malloc_huge_stats_t *hstats)
 {
 	unsigned i;
 
@@ -3270,24 +2643,15 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	astats->purged += arena->stats.purged;
 	astats->metadata_mapped += arena->stats.metadata_mapped;
 	astats->metadata_allocated += arena_metadata_allocated_get(arena);
-	astats->allocated_large += arena->stats.allocated_large;
-	astats->nmalloc_large += arena->stats.nmalloc_large;
-	astats->ndalloc_large += arena->stats.ndalloc_large;
-	astats->nrequests_large += arena->stats.nrequests_large;
 	astats->allocated_huge += arena->stats.allocated_huge;
 	astats->nmalloc_huge += arena->stats.nmalloc_huge;
 	astats->ndalloc_huge += arena->stats.ndalloc_huge;
+	astats->nrequests_huge += arena->stats.nrequests_huge;
 
-	for (i = 0; i < nlclasses; i++) {
-		lstats[i].nmalloc += arena->stats.lstats[i].nmalloc;
-		lstats[i].ndalloc += arena->stats.lstats[i].ndalloc;
-		lstats[i].nrequests += arena->stats.lstats[i].nrequests;
-		lstats[i].curruns += arena->stats.lstats[i].curruns;
-	}
-
-	for (i = 0; i < nhclasses; i++) {
+	for (i = 0; i < NSIZES - NBINS; i++) {
 		hstats[i].nmalloc += arena->stats.hstats[i].nmalloc;
 		hstats[i].ndalloc += arena->stats.hstats[i].ndalloc;
+		hstats[i].nrequests += arena->stats.hstats[i].nrequests;
 		hstats[i].curhchunks += arena->stats.hstats[i].curhchunks;
 	}
 	malloc_mutex_unlock(tsdn, &arena->lock);
@@ -3338,17 +2702,7 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 	arena_t *arena;
 	unsigned i;
 
-	/*
-	 * Allocate arena, arena->lstats, and arena->hstats contiguously, mainly
-	 * because there is no way to clean up if base_alloc() OOMs.
-	 */
-	if (config_stats) {
-		arena = (arena_t *)base_alloc(tsdn,
-		    CACHELINE_CEILING(sizeof(arena_t)) +
-		    QUANTUM_CEILING((nlclasses * sizeof(malloc_large_stats_t)) +
-		    (nhclasses * sizeof(malloc_huge_stats_t))));
-	} else
-		arena = (arena_t *)base_alloc(tsdn, sizeof(arena_t));
+	arena = (arena_t *)base_alloc(tsdn, sizeof(arena_t));
 	if (arena == NULL)
 		return (NULL);
 
@@ -3357,20 +2711,8 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 	if (malloc_mutex_init(&arena->lock, "arena", WITNESS_RANK_ARENA))
 		return (NULL);
 
-	if (config_stats) {
-		memset(&arena->stats, 0, sizeof(arena_stats_t));
-		arena->stats.lstats = (malloc_large_stats_t *)((uintptr_t)arena
-		    + CACHELINE_CEILING(sizeof(arena_t)));
-		memset(arena->stats.lstats, 0, nlclasses *
-		    sizeof(malloc_large_stats_t));
-		arena->stats.hstats = (malloc_huge_stats_t *)((uintptr_t)arena
-		    + CACHELINE_CEILING(sizeof(arena_t)) +
-		    QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t)));
-		memset(arena->stats.hstats, 0, nhclasses *
-		    sizeof(malloc_huge_stats_t));
-		if (config_tcache)
-			ql_new(&arena->tcache_ql);
-	}
+	if (config_stats && config_tcache)
+		ql_new(&arena->tcache_ql);
 
 	if (config_prof)
 		arena->prof_accumbytes = 0;
@@ -3476,18 +2818,6 @@ arena_boot(void)
 
 	arena_maxrun = chunksize - (map_bias << LG_PAGE);
 	assert(arena_maxrun > 0);
-	large_maxclass = index2size(size2index(chunksize)-1);
-	if (large_maxclass > arena_maxrun) {
-		/*
-		 * For small chunk sizes it's possible for there to be fewer
-		 * non-header pages available than are necessary to serve the
-		 * size classes just below chunksize.
-		 */
-		large_maxclass = arena_maxrun;
-	}
-	assert(large_maxclass > 0);
-	nlclasses = size2index(large_maxclass) - size2index(SMALL_MAXCLASS);
-	nhclasses = NSIZES - nlclasses - NBINS;
 }
 
 void
diff --git a/src/base.c b/src/base.c
index 1e32d955..134018a8 100644
--- a/src/base.c
+++ b/src/base.c
@@ -74,7 +74,8 @@ base_chunk_alloc(tsdn_t *tsdn, size_t minsize)
 			base_resident += PAGE_CEILING(nsize);
 		}
 	}
-	extent_init(extent, NULL, addr, csize, true, false, true, true, false);
+	extent_init(extent, NULL, addr, csize, 0, true, false, true, true,
+	    false);
 	return (extent);
 }
 
diff --git a/src/chunk.c b/src/chunk.c
index 4b213a90..8c4f741f 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -369,7 +369,7 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	if (leadsize != 0) {
 		extent_t *lead = extent;
 		extent = chunk_split_wrapper(tsdn, arena, chunk_hooks, lead,
-		    leadsize, size + trailsize);
+		    leadsize, leadsize, size + trailsize, usize + trailsize);
 		if (extent == NULL) {
 			chunk_leak(tsdn, arena, chunk_hooks, cache, lead);
 			malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
@@ -382,7 +382,7 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	/* Split the trail. */
 	if (trailsize != 0) {
 		extent_t *trail = chunk_split_wrapper(tsdn, arena, chunk_hooks,
-		    extent, size, trailsize);
+		    extent, size, usize, trailsize, trailsize);
 		if (trail == NULL) {
 			chunk_leak(tsdn, arena, chunk_hooks, cache, extent);
 			malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
@@ -390,6 +390,12 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		}
 		extent_heaps_insert(extent_heaps, trail);
 		arena_chunk_cache_maybe_insert(arena, trail, cache);
+	} else if (leadsize == 0) {
+		/*
+		 * Splitting causes usize to be set as a side effect, but no
+		 * splitting occurred.
+		 */
+		extent_usize_set(extent, usize);
 	}
 
 	if (!extent_committed_get(extent) &&
@@ -552,7 +558,8 @@ chunk_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 		extent_dalloc(tsdn, arena, extent);
 		return (NULL);
 	}
-	extent_init(extent, arena, addr, size, true, false, zero, commit, slab);
+	extent_init(extent, arena, addr, size, usize, true, false, zero, commit,
+	    slab);
 	if (pad != 0)
 		extent_addr_randomize(tsdn, extent, alignment);
 	if (chunk_register(tsdn, extent)) {
@@ -635,6 +642,7 @@ chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	malloc_mutex_lock(tsdn, &arena->chunks_mtx);
 	chunk_hooks_assure_initialized_locked(tsdn, arena, chunk_hooks);
 
+	extent_usize_set(extent, 0);
 	extent_active_set(extent, false);
 	extent_zeroed_set(extent, !cache && extent_zeroed_get(extent));
 	if (extent_slab_get(extent)) {
@@ -801,7 +809,8 @@ chunk_split_default(void *chunk, size_t size, size_t size_a, size_t size_b,
 
 extent_t *
 chunk_split_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
-  extent_t *extent, size_t size_a, size_t size_b)
+  extent_t *extent, size_t size_a, size_t usize_a, size_t size_b,
+  size_t usize_b)
 {
 	extent_t *trail;
 	rtree_elm_t *lead_elm_a, *lead_elm_b, *trail_elm_a, *trail_elm_b;
@@ -818,9 +827,9 @@ chunk_split_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		extent_t lead;
 
 		extent_init(&lead, arena, extent_addr_get(extent), size_a,
-		    extent_active_get(extent), extent_dirty_get(extent),
-		    extent_zeroed_get(extent), extent_committed_get(extent),
-		    extent_slab_get(extent));
+		    usize_a, extent_active_get(extent),
+		    extent_dirty_get(extent), extent_zeroed_get(extent),
+		    extent_committed_get(extent), extent_slab_get(extent));
 
 		if (extent_rtree_acquire(tsdn, &lead, false, true, &lead_elm_a,
 		    &lead_elm_b))
@@ -828,7 +837,7 @@ chunk_split_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	}
 
 	extent_init(trail, arena, (void *)((uintptr_t)extent_base_get(extent) +
-	    size_a), size_b, extent_active_get(extent),
+	    size_a), size_b, usize_b, extent_active_get(extent),
 	    extent_dirty_get(extent), extent_zeroed_get(extent),
 	    extent_committed_get(extent), extent_slab_get(extent));
 	if (extent_rtree_acquire(tsdn, trail, false, true, &trail_elm_a,
@@ -840,6 +849,7 @@ chunk_split_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		goto label_error_d;
 
 	extent_size_set(extent, size_a);
+	extent_usize_set(extent, usize_a);
 
 	extent_rtree_write_acquired(tsdn, lead_elm_a, lead_elm_b, extent);
 	extent_rtree_write_acquired(tsdn, trail_elm_a, trail_elm_b, trail);
@@ -905,6 +915,7 @@ chunk_merge_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		b_elm_b = b_elm_a;
 
 	extent_size_set(a, extent_size_get(a) + extent_size_get(b));
+	extent_usize_set(a, extent_usize_get(a) + extent_usize_get(b));
 	extent_zeroed_set(a, extent_zeroed_get(a) && extent_zeroed_get(b));
 
 	extent_rtree_write_acquired(tsdn, a_elm_a, b_elm_b, a);
diff --git a/src/chunk_dss.c b/src/chunk_dss.c
index 0119c12b..e92fda72 100644
--- a/src/chunk_dss.c
+++ b/src/chunk_dss.c
@@ -121,7 +121,7 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			pad_size = (uintptr_t)ret - (uintptr_t)pad_addr;
 			if (pad_size != 0) {
 				extent_init(pad, arena, pad_addr, pad_size,
-				    false, true, false, true, false);
+				    pad_size, false, true, false, true, false);
 			}
 			dss_next = (void *)((uintptr_t)ret + size);
 			if ((uintptr_t)ret < (uintptr_t)dss_max ||
diff --git a/src/ctl.c b/src/ctl.c
index 908a2850..26bc1750 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -49,7 +49,6 @@ static int	n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,	\
 static const ctl_named_node_t	*n##_index(tsdn_t *tsdn,		\
     const size_t *mib, size_t miblen, size_t i);
 
-static bool	ctl_arena_init(ctl_arena_stats_t *astats);
 static void	ctl_arena_clear(ctl_arena_stats_t *astats);
 static void	ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_stats_t *cstats,
     arena_t *arena);
@@ -127,8 +126,6 @@ CTL_PROTO(arenas_bin_i_size)
 CTL_PROTO(arenas_bin_i_nregs)
 CTL_PROTO(arenas_bin_i_run_size)
 INDEX_PROTO(arenas_bin_i)
-CTL_PROTO(arenas_lrun_i_size)
-INDEX_PROTO(arenas_lrun_i)
 CTL_PROTO(arenas_hchunk_i_size)
 INDEX_PROTO(arenas_hchunk_i)
 CTL_PROTO(arenas_narenas)
@@ -140,7 +137,6 @@ CTL_PROTO(arenas_page)
 CTL_PROTO(arenas_tcache_max)
 CTL_PROTO(arenas_nbins)
 CTL_PROTO(arenas_nhbins)
-CTL_PROTO(arenas_nlruns)
 CTL_PROTO(arenas_nhchunks)
 CTL_PROTO(arenas_extend)
 CTL_PROTO(prof_thread_active_init)
@@ -154,10 +150,6 @@ CTL_PROTO(stats_arenas_i_small_allocated)
 CTL_PROTO(stats_arenas_i_small_nmalloc)
 CTL_PROTO(stats_arenas_i_small_ndalloc)
 CTL_PROTO(stats_arenas_i_small_nrequests)
-CTL_PROTO(stats_arenas_i_large_allocated)
-CTL_PROTO(stats_arenas_i_large_nmalloc)
-CTL_PROTO(stats_arenas_i_large_ndalloc)
-CTL_PROTO(stats_arenas_i_large_nrequests)
 CTL_PROTO(stats_arenas_i_huge_allocated)
 CTL_PROTO(stats_arenas_i_huge_nmalloc)
 CTL_PROTO(stats_arenas_i_huge_ndalloc)
@@ -172,11 +164,6 @@ CTL_PROTO(stats_arenas_i_bins_j_nruns)
 CTL_PROTO(stats_arenas_i_bins_j_nreruns)
 CTL_PROTO(stats_arenas_i_bins_j_curruns)
 INDEX_PROTO(stats_arenas_i_bins_j)
-CTL_PROTO(stats_arenas_i_lruns_j_nmalloc)
-CTL_PROTO(stats_arenas_i_lruns_j_ndalloc)
-CTL_PROTO(stats_arenas_i_lruns_j_nrequests)
-CTL_PROTO(stats_arenas_i_lruns_j_curruns)
-INDEX_PROTO(stats_arenas_i_lruns_j)
 CTL_PROTO(stats_arenas_i_hchunks_j_nmalloc)
 CTL_PROTO(stats_arenas_i_hchunks_j_ndalloc)
 CTL_PROTO(stats_arenas_i_hchunks_j_nrequests)
@@ -323,17 +310,6 @@ static const ctl_indexed_node_t arenas_bin_node[] = {
 	{INDEX(arenas_bin_i)}
 };
 
-static const ctl_named_node_t arenas_lrun_i_node[] = {
-	{NAME("size"),		CTL(arenas_lrun_i_size)}
-};
-static const ctl_named_node_t super_arenas_lrun_i_node[] = {
-	{NAME(""),		CHILD(named, arenas_lrun_i)}
-};
-
-static const ctl_indexed_node_t arenas_lrun_node[] = {
-	{INDEX(arenas_lrun_i)}
-};
-
 static const ctl_named_node_t arenas_hchunk_i_node[] = {
 	{NAME("size"),		CTL(arenas_hchunk_i_size)}
 };
@@ -356,8 +332,6 @@ static const ctl_named_node_t arenas_node[] = {
 	{NAME("nbins"),		CTL(arenas_nbins)},
 	{NAME("nhbins"),	CTL(arenas_nhbins)},
 	{NAME("bin"),		CHILD(indexed, arenas_bin)},
-	{NAME("nlruns"),	CTL(arenas_nlruns)},
-	{NAME("lrun"),		CHILD(indexed, arenas_lrun)},
 	{NAME("nhchunks"),	CTL(arenas_nhchunks)},
 	{NAME("hchunk"),	CHILD(indexed, arenas_hchunk)},
 	{NAME("extend"),	CTL(arenas_extend)}
@@ -385,13 +359,6 @@ static const ctl_named_node_t stats_arenas_i_small_node[] = {
 	{NAME("nrequests"),	CTL(stats_arenas_i_small_nrequests)}
 };
 
-static const ctl_named_node_t stats_arenas_i_large_node[] = {
-	{NAME("allocated"),	CTL(stats_arenas_i_large_allocated)},
-	{NAME("nmalloc"),	CTL(stats_arenas_i_large_nmalloc)},
-	{NAME("ndalloc"),	CTL(stats_arenas_i_large_ndalloc)},
-	{NAME("nrequests"),	CTL(stats_arenas_i_large_nrequests)}
-};
-
 static const ctl_named_node_t stats_arenas_i_huge_node[] = {
 	{NAME("allocated"),	CTL(stats_arenas_i_huge_allocated)},
 	{NAME("nmalloc"),	CTL(stats_arenas_i_huge_nmalloc)},
@@ -418,20 +385,6 @@ static const ctl_indexed_node_t stats_arenas_i_bins_node[] = {
 	{INDEX(stats_arenas_i_bins_j)}
 };
 
-static const ctl_named_node_t stats_arenas_i_lruns_j_node[] = {
-	{NAME("nmalloc"),	CTL(stats_arenas_i_lruns_j_nmalloc)},
-	{NAME("ndalloc"),	CTL(stats_arenas_i_lruns_j_ndalloc)},
-	{NAME("nrequests"),	CTL(stats_arenas_i_lruns_j_nrequests)},
-	{NAME("curruns"),	CTL(stats_arenas_i_lruns_j_curruns)}
-};
-static const ctl_named_node_t super_stats_arenas_i_lruns_j_node[] = {
-	{NAME(""),		CHILD(named, stats_arenas_i_lruns_j)}
-};
-
-static const ctl_indexed_node_t stats_arenas_i_lruns_node[] = {
-	{INDEX(stats_arenas_i_lruns_j)}
-};
-
 static const ctl_named_node_t stats_arenas_i_hchunks_j_node[] = {
 	{NAME("nmalloc"),	CTL(stats_arenas_i_hchunks_j_nmalloc)},
 	{NAME("ndalloc"),	CTL(stats_arenas_i_hchunks_j_ndalloc)},
@@ -460,10 +413,8 @@ static const ctl_named_node_t stats_arenas_i_node[] = {
 	{NAME("purged"),	CTL(stats_arenas_i_purged)},
 	{NAME("metadata"),	CHILD(named, stats_arenas_i_metadata)},
 	{NAME("small"),		CHILD(named, stats_arenas_i_small)},
-	{NAME("large"),		CHILD(named, stats_arenas_i_large)},
 	{NAME("huge"),		CHILD(named, stats_arenas_i_huge)},
 	{NAME("bins"),		CHILD(indexed, stats_arenas_i_bins)},
-	{NAME("lruns"),		CHILD(indexed, stats_arenas_i_lruns)},
 	{NAME("hchunks"),	CHILD(indexed, stats_arenas_i_hchunks)}
 };
 static const ctl_named_node_t super_stats_arenas_i_node[] = {
@@ -508,27 +459,6 @@ static const ctl_named_node_t super_root_node[] = {
 
 /******************************************************************************/
 
-static bool
-ctl_arena_init(ctl_arena_stats_t *astats)
-{
-
-	if (astats->lstats == NULL) {
-		astats->lstats = (malloc_large_stats_t *)a0malloc(nlclasses *
-		    sizeof(malloc_large_stats_t));
-		if (astats->lstats == NULL)
-			return (true);
-	}
-
-	if (astats->hstats == NULL) {
-		astats->hstats = (malloc_huge_stats_t *)a0malloc(nhclasses *
-		    sizeof(malloc_huge_stats_t));
-		if (astats->hstats == NULL)
-			return (true);
-	}
-
-	return (false);
-}
-
 static void
 ctl_arena_clear(ctl_arena_stats_t *astats)
 {
@@ -546,9 +476,7 @@ ctl_arena_clear(ctl_arena_stats_t *astats)
 		astats->ndalloc_small = 0;
 		astats->nrequests_small = 0;
 		memset(astats->bstats, 0, NBINS * sizeof(malloc_bin_stats_t));
-		memset(astats->lstats, 0, nlclasses *
-		    sizeof(malloc_large_stats_t));
-		memset(astats->hstats, 0, nhclasses *
+		memset(astats->hstats, 0, (NSIZES - NBINS) *
 		    sizeof(malloc_huge_stats_t));
 	}
 }
@@ -562,7 +490,7 @@ ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_stats_t *cstats, arena_t *arena)
 		arena_stats_merge(tsdn, arena, &cstats->nthreads, &cstats->dss,
 		    &cstats->lg_dirty_mult, &cstats->decay_time,
 		    &cstats->pactive, &cstats->pdirty, &cstats->astats,
-		    cstats->bstats, cstats->lstats, cstats->hstats);
+		    cstats->bstats, cstats->hstats);
 
 		for (i = 0; i < NBINS; i++) {
 			cstats->allocated_small += cstats->bstats[i].curregs *
@@ -604,16 +532,10 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats)
 		sstats->ndalloc_small += astats->ndalloc_small;
 		sstats->nrequests_small += astats->nrequests_small;
 
-		sstats->astats.allocated_large +=
-		    astats->astats.allocated_large;
-		sstats->astats.nmalloc_large += astats->astats.nmalloc_large;
-		sstats->astats.ndalloc_large += astats->astats.ndalloc_large;
-		sstats->astats.nrequests_large +=
-		    astats->astats.nrequests_large;
-
 		sstats->astats.allocated_huge += astats->astats.allocated_huge;
 		sstats->astats.nmalloc_huge += astats->astats.nmalloc_huge;
 		sstats->astats.ndalloc_huge += astats->astats.ndalloc_huge;
+		sstats->astats.nrequests_huge += astats->astats.nrequests_huge;
 
 		for (i = 0; i < NBINS; i++) {
 			sstats->bstats[i].nmalloc += astats->bstats[i].nmalloc;
@@ -632,17 +554,11 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats)
 			sstats->bstats[i].curruns += astats->bstats[i].curruns;
 		}
 
-		for (i = 0; i < nlclasses; i++) {
-			sstats->lstats[i].nmalloc += astats->lstats[i].nmalloc;
-			sstats->lstats[i].ndalloc += astats->lstats[i].ndalloc;
-			sstats->lstats[i].nrequests +=
-			    astats->lstats[i].nrequests;
-			sstats->lstats[i].curruns += astats->lstats[i].curruns;
-		}
-
-		for (i = 0; i < nhclasses; i++) {
+		for (i = 0; i < NSIZES - NBINS; i++) {
 			sstats->hstats[i].nmalloc += astats->hstats[i].nmalloc;
 			sstats->hstats[i].ndalloc += astats->hstats[i].ndalloc;
+			sstats->hstats[i].nrequests +=
+			    astats->hstats[i].nrequests;
 			sstats->hstats[i].curhchunks +=
 			    astats->hstats[i].curhchunks;
 		}
@@ -680,10 +596,6 @@ ctl_grow(tsdn_t *tsdn)
 	memcpy(astats, ctl_stats.arenas, (ctl_stats.narenas + 1) *
 	    sizeof(ctl_arena_stats_t));
 	memset(&astats[ctl_stats.narenas + 1], 0, sizeof(ctl_arena_stats_t));
-	if (ctl_arena_init(&astats[ctl_stats.narenas + 1])) {
-		a0dalloc(astats);
-		return (true);
-	}
 	/* Swap merged stats to their new location. */
 	{
 		ctl_arena_stats_t tstats;
@@ -730,7 +642,6 @@ ctl_refresh(tsdn_t *tsdn)
 		    &base_mapped);
 		ctl_stats.allocated =
 		    ctl_stats.arenas[ctl_stats.narenas].allocated_small +
-		    ctl_stats.arenas[ctl_stats.narenas].astats.allocated_large +
 		    ctl_stats.arenas[ctl_stats.narenas].astats.allocated_huge;
 		ctl_stats.active =
 		    (ctl_stats.arenas[ctl_stats.narenas].pactive << LG_PAGE);
@@ -771,30 +682,6 @@ ctl_init(tsdn_t *tsdn)
 		}
 		memset(ctl_stats.arenas, 0, (ctl_stats.narenas + 1) *
 		    sizeof(ctl_arena_stats_t));
-
-		/*
-		 * Initialize all stats structures, regardless of whether they
-		 * ever get used.  Lazy initialization would allow errors to
-		 * cause inconsistent state to be viewable by the application.
-		 */
-		if (config_stats) {
-			unsigned i;
-			for (i = 0; i <= ctl_stats.narenas; i++) {
-				if (ctl_arena_init(&ctl_stats.arenas[i])) {
-					unsigned j;
-					for (j = 0; j < i; j++) {
-						a0dalloc(
-						    ctl_stats.arenas[j].lstats);
-						a0dalloc(
-						    ctl_stats.arenas[j].hstats);
-					}
-					a0dalloc(ctl_stats.arenas);
-					ctl_stats.arenas = NULL;
-					ret = true;
-					goto label_return;
-				}
-			}
-		}
 		ctl_stats.arenas[ctl_stats.narenas].initialized = true;
 
 		ctl_epoch = 0;
@@ -1924,25 +1811,13 @@ arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
 	return (super_arenas_bin_i_node);
 }
 
-CTL_RO_NL_GEN(arenas_nlruns, nlclasses, unsigned)
-CTL_RO_NL_GEN(arenas_lrun_i_size, index2size(NBINS+(szind_t)mib[2]), size_t)
-static const ctl_named_node_t *
-arenas_lrun_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
-{
-
-	if (i > nlclasses)
-		return (NULL);
-	return (super_arenas_lrun_i_node);
-}
-
-CTL_RO_NL_GEN(arenas_nhchunks, nhclasses, unsigned)
-CTL_RO_NL_GEN(arenas_hchunk_i_size, index2size(NBINS+nlclasses+(szind_t)mib[2]),
-    size_t)
+CTL_RO_NL_GEN(arenas_nhchunks, NSIZES - NBINS, unsigned)
+CTL_RO_NL_GEN(arenas_hchunk_i_size, index2size(NBINS+(szind_t)mib[2]), size_t)
 static const ctl_named_node_t *
 arenas_hchunk_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
 {
 
-	if (i > nhclasses)
+	if (i > NSIZES - NBINS)
 		return (NULL);
 	return (super_arenas_hchunk_i_node);
 }
@@ -2136,14 +2011,6 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_small_ndalloc,
     ctl_stats.arenas[mib[2]].ndalloc_small, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_small_nrequests,
     ctl_stats.arenas[mib[2]].nrequests_small, uint64_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_large_allocated,
-    ctl_stats.arenas[mib[2]].astats.allocated_large, size_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_large_nmalloc,
-    ctl_stats.arenas[mib[2]].astats.nmalloc_large, uint64_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_large_ndalloc,
-    ctl_stats.arenas[mib[2]].astats.ndalloc_large, uint64_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_large_nrequests,
-    ctl_stats.arenas[mib[2]].astats.nrequests_large, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_huge_allocated,
     ctl_stats.arenas[mib[2]].astats.allocated_huge, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_huge_nmalloc,
@@ -2182,32 +2049,12 @@ stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
 	return (super_stats_arenas_i_bins_j_node);
 }
 
-CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_nmalloc,
-    ctl_stats.arenas[mib[2]].lstats[mib[4]].nmalloc, uint64_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_ndalloc,
-    ctl_stats.arenas[mib[2]].lstats[mib[4]].ndalloc, uint64_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_nrequests,
-    ctl_stats.arenas[mib[2]].lstats[mib[4]].nrequests, uint64_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_lruns_j_curruns,
-    ctl_stats.arenas[mib[2]].lstats[mib[4]].curruns, size_t)
-
-static const ctl_named_node_t *
-stats_arenas_i_lruns_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
-    size_t j)
-{
-
-	if (j > nlclasses)
-		return (NULL);
-	return (super_stats_arenas_i_lruns_j_node);
-}
-
 CTL_RO_CGEN(config_stats, stats_arenas_i_hchunks_j_nmalloc,
     ctl_stats.arenas[mib[2]].hstats[mib[4]].nmalloc, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_hchunks_j_ndalloc,
     ctl_stats.arenas[mib[2]].hstats[mib[4]].ndalloc, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_hchunks_j_nrequests,
-    ctl_stats.arenas[mib[2]].hstats[mib[4]].nmalloc, /* Intentional. */
-    uint64_t)
+    ctl_stats.arenas[mib[2]].hstats[mib[4]].nrequests, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_hchunks_j_curhchunks,
     ctl_stats.arenas[mib[2]].hstats[mib[4]].curhchunks, size_t)
 
@@ -2216,7 +2063,7 @@ stats_arenas_i_hchunks_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
     size_t j)
 {
 
-	if (j > nhclasses)
+	if (j > NSIZES - NBINS)
 		return (NULL);
 	return (super_stats_arenas_i_hchunks_j_node);
 }
diff --git a/src/extent.c b/src/extent.c
index d7f3b6cc..757a6e21 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -40,7 +40,7 @@ extent_size_quantize_floor(size_t size)
 	pszind_t pind;
 
 	assert(size > 0);
-	assert(size <= HUGE_MAXCLASS);
+	assert(size - large_pad <= HUGE_MAXCLASS);
 	assert((size & PAGE_MASK) == 0);
 
 	assert(size != 0);
@@ -77,7 +77,7 @@ extent_size_quantize_ceil(size_t size)
 	size_t ret;
 
 	assert(size > 0);
-	assert(size <= HUGE_MAXCLASS);
+	assert(size - large_pad <= HUGE_MAXCLASS);
 	assert((size & PAGE_MASK) == 0);
 
 	ret = extent_size_quantize_floor(size);
diff --git a/src/huge.c b/src/huge.c
index b00be904..5375b59f 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -19,6 +19,7 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	size_t ausize;
 	extent_t *extent;
 	bool is_zeroed;
+	UNUSED bool idump JEMALLOC_CC_SILENCE_INIT(false);
 
 	assert(!tsdn_null(tsdn) || arena != NULL);
 
@@ -42,6 +43,8 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	ql_elm_new(extent, ql_link);
 	ql_tail_insert(&arena->huge, extent, ql_link);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
+	if (config_prof && arena_prof_accum(tsdn, arena, usize))
+		prof_idump(tsdn);
 
 	if (zero || (config_fill && unlikely(opt_zero))) {
 		if (!is_zeroed) {
@@ -61,8 +64,20 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 #undef huge_dalloc_junk
 #define	huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk_impl)
 #endif
+void
+huge_dalloc_junk(void *ptr, size_t usize)
+{
+
+	memset(ptr, JEMALLOC_FREE_JUNK, usize);
+}
+#ifdef JEMALLOC_JET
+#undef huge_dalloc_junk
+#define	huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk)
+huge_dalloc_junk_t *huge_dalloc_junk = JEMALLOC_N(huge_dalloc_junk_impl);
+#endif
+
 static void
-huge_dalloc_junk(tsdn_t *tsdn, void *ptr, size_t usize)
+huge_dalloc_maybe_junk(tsdn_t *tsdn, void *ptr, size_t usize)
 {
 
 	if (config_fill && have_dss && unlikely(opt_junk_free)) {
@@ -71,14 +86,10 @@ huge_dalloc_junk(tsdn_t *tsdn, void *ptr, size_t usize)
 		 * unmapped.
 		 */
 		if (!config_munmap || (have_dss && chunk_in_dss(tsdn, ptr)))
+			huge_dalloc_junk(ptr, usize);
 			memset(ptr, JEMALLOC_FREE_JUNK, usize);
 	}
 }
-#ifdef JEMALLOC_JET
-#undef huge_dalloc_junk
-#define	huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk)
-huge_dalloc_junk_t *huge_dalloc_junk = JEMALLOC_N(huge_dalloc_junk_impl);
-#endif
 
 static bool
 huge_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize)
@@ -93,12 +104,12 @@ huge_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize)
 	/* Split excess pages. */
 	if (diff != 0) {
 		extent_t *trail = chunk_split_wrapper(tsdn, arena, &chunk_hooks,
-		    extent, usize + large_pad, diff);
+		    extent, usize + large_pad, usize, diff, diff);
 		if (trail == NULL)
 			return (true);
 
 		if (config_fill && unlikely(opt_junk_free)) {
-			huge_dalloc_junk(tsdn, extent_addr_get(trail),
+			huge_dalloc_maybe_junk(tsdn, extent_addr_get(trail),
 			    extent_usize_get(trail));
 		}
 
@@ -176,7 +187,8 @@ huge_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
 	/* The following should have been caught by callers. */
 	assert(usize_min > 0 && usize_max <= HUGE_MAXCLASS);
 	/* Both allocation sizes must be huge to avoid a move. */
-	assert(extent_usize_get(extent) >= chunksize && usize_max >= chunksize);
+	assert(extent_usize_get(extent) >= LARGE_MINCLASS && usize_max >=
+	    LARGE_MINCLASS);
 
 	if (usize_max > extent_usize_get(extent)) {
 		/* Attempt to expand the allocation in-place. */
@@ -234,7 +246,8 @@ huge_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
 	/* The following should have been caught by callers. */
 	assert(usize > 0 && usize <= HUGE_MAXCLASS);
 	/* Both allocation sizes must be huge to avoid a move. */
-	assert(extent_usize_get(extent) >= chunksize && usize >= chunksize);
+	assert(extent_usize_get(extent) >= LARGE_MINCLASS && usize >=
+	    LARGE_MINCLASS);
 
 	/* Try to avoid moving the allocation. */
 	if (!huge_ralloc_no_move(tsdn, extent, usize, usize, zero))
@@ -257,21 +270,39 @@ huge_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
 	return (ret);
 }
 
-void
-huge_dalloc(tsdn_t *tsdn, extent_t *extent)
+static void
+huge_dalloc_impl(tsdn_t *tsdn, extent_t *extent, bool junked_locked)
 {
 	arena_t *arena;
 
 	arena = extent_arena_get(extent);
-	malloc_mutex_lock(tsdn, &arena->huge_mtx);
+	if (!junked_locked)
+		malloc_mutex_lock(tsdn, &arena->huge_mtx);
 	ql_remove(&arena->huge, extent, ql_link);
-	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
+	if (!junked_locked) {
+		malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
-	huge_dalloc_junk(tsdn, extent_addr_get(extent),
-	    extent_usize_get(extent));
-	arena_chunk_dalloc_huge(tsdn, extent_arena_get(extent), extent);
+		huge_dalloc_maybe_junk(tsdn, extent_addr_get(extent),
+		    extent_usize_get(extent));
+	}
+	arena_chunk_dalloc_huge(tsdn, arena, extent, junked_locked);
 
-	arena_decay_tick(tsdn, arena);
+	if (!junked_locked)
+		arena_decay_tick(tsdn, arena);
+}
+
+void
+huge_dalloc_junked_locked(tsdn_t *tsdn, extent_t *extent)
+{
+
+	huge_dalloc_impl(tsdn, extent, true);
+}
+
+void
+huge_dalloc(tsdn_t *tsdn, extent_t *extent)
+{
+
+	huge_dalloc_impl(tsdn, extent, false);
 }
 
 size_t
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 479d8319..9f8bd01e 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1401,7 +1401,7 @@ ialloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind, bool zero,
 		p = ialloc(tsd, LARGE_MINCLASS, ind_large, zero, slow_path);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), p), p,
+		arena_prof_promote(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), p), p,
 		    usize);
 	} else
 		p = ialloc(tsd, usize, ind, zero, slow_path);
@@ -1483,8 +1483,7 @@ ialloc_post_check(void *ret, tsdn_t *tsdn, size_t usize, const char *func,
 			set_errno(ENOMEM);
 	}
 	if (config_stats && likely(ret != NULL)) {
-		assert(usize == isalloc(tsdn, iealloc(tsdn, ret), ret,
-		    config_prof));
+		assert(usize == isalloc(tsdn, iealloc(tsdn, ret), ret));
 		*tsd_thread_allocatedp_get(tsdn_tsd(tsdn)) += usize;
 	}
 	witness_assert_lockless(tsdn);
@@ -1527,7 +1526,7 @@ imemalign_prof_sample(tsd_t *tsd, size_t alignment, size_t usize,
 		p = ipalloc(tsd, LARGE_MINCLASS, alignment, false);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), p), p,
+		arena_prof_promote(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), p), p,
 		    usize);
 	} else
 		p = ipalloc(tsd, usize, alignment, false);
@@ -1608,7 +1607,7 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment)
 label_return:
 	if (config_stats && likely(result != NULL)) {
 		assert(usize == isalloc(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd),
-		    result), result, config_prof));
+		    result), result));
 		*tsd_thread_allocatedp_get(tsd) += usize;
 	}
 	UTRACE(0, size, result);
@@ -1699,7 +1698,7 @@ irealloc_prof_sample(tsd_t *tsd, extent_t *extent, void *old_ptr,
 		    false);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), p), p,
+		arena_prof_promote(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), p), p,
 		    usize);
 	} else
 		p = iralloc(tsd, extent, old_ptr, old_usize, usize, 0, false);
@@ -1748,10 +1747,10 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 
 	extent = iealloc(tsd_tsdn(tsd), ptr);
 	if (config_prof && opt_prof) {
-		usize = isalloc(tsd_tsdn(tsd), extent, ptr, config_prof);
+		usize = isalloc(tsd_tsdn(tsd), extent, ptr);
 		prof_free(tsd, extent, ptr, usize);
 	} else if (config_stats)
-		usize = isalloc(tsd_tsdn(tsd), extent, ptr, config_prof);
+		usize = isalloc(tsd_tsdn(tsd), extent, ptr);
 	if (config_stats)
 		*tsd_thread_deallocatedp_get(tsd) += usize;
 
@@ -1815,7 +1814,7 @@ je_realloc(void *ptr, size_t size)
 		witness_assert_lockless(tsd_tsdn(tsd));
 
 		extent = iealloc(tsd_tsdn(tsd), ptr);
-		old_usize = isalloc(tsd_tsdn(tsd), extent, ptr, config_prof);
+		old_usize = isalloc(tsd_tsdn(tsd), extent, ptr);
 		if (config_prof && opt_prof) {
 			usize = s2u(size);
 			ret = unlikely(usize == 0 || usize > HUGE_MAXCLASS) ?
@@ -1848,8 +1847,7 @@ je_realloc(void *ptr, size_t size)
 	if (config_stats && likely(ret != NULL)) {
 		tsd_t *tsd;
 
-		assert(usize == isalloc(tsdn, iealloc(tsdn, ret), ret,
-		    config_prof));
+		assert(usize == isalloc(tsdn, iealloc(tsdn, ret), ret));
 		tsd = tsdn_tsd(tsdn);
 		*tsd_thread_allocatedp_get(tsd) += usize;
 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
@@ -2003,7 +2001,7 @@ imallocx_prof_sample(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
 		    tcache, arena, slow_path);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(tsdn, iealloc(tsdn, p), p, usize);
+		arena_prof_promote(tsdn, iealloc(tsdn, p), p, usize);
 	} else
 		p = imallocx_flags(tsdn, usize, alignment, zero, tcache, arena,
 		    slow_path);
@@ -2138,7 +2136,7 @@ irallocx_prof_sample(tsdn_t *tsdn, extent_t *extent, void *old_ptr,
 		    alignment, zero, tcache, arena);
 		if (p == NULL)
 			return (NULL);
-		arena_prof_promoted(tsdn, iealloc(tsdn, p), p, usize);
+		arena_prof_promote(tsdn, iealloc(tsdn, p), p, usize);
 	} else {
 		p = iralloct(tsdn, extent, old_ptr, old_usize, usize, alignment,
 		    zero, tcache, arena);
@@ -2182,7 +2180,7 @@ irallocx_prof(tsd_t *tsd, extent_t *extent, void *old_ptr, size_t old_usize,
 		 * reallocation.  Therefore, query the actual value of usize.
 		 */
 		e = extent;
-		*usize = isalloc(tsd_tsdn(tsd), e, p, config_prof);
+		*usize = isalloc(tsd_tsdn(tsd), e, p);
 	} else
 		e = iealloc(tsd_tsdn(tsd), p);
 	prof_realloc(tsd, e, p, *usize, tctx, prof_active, true, old_ptr,
@@ -2229,7 +2227,7 @@ je_rallocx(void *ptr, size_t size, int flags)
 	} else
 		tcache = tcache_get(tsd, true);
 
-	old_usize = isalloc(tsd_tsdn(tsd), extent, ptr, config_prof);
+	old_usize = isalloc(tsd_tsdn(tsd), extent, ptr);
 
 	if (config_prof && opt_prof) {
 		usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment);
@@ -2246,7 +2244,7 @@ je_rallocx(void *ptr, size_t size, int flags)
 			goto label_oom;
 		if (config_stats) {
 			usize = isalloc(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd),
-			    p), p, config_prof);
+			    p), p);
 		}
 	}
 	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
@@ -2276,7 +2274,7 @@ ixallocx_helper(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t old_usize,
 
 	if (ixalloc(tsdn, extent, ptr, old_usize, size, extra, alignment, zero))
 		return (old_usize);
-	usize = isalloc(tsdn, extent, ptr, config_prof);
+	usize = isalloc(tsdn, extent, ptr);
 
 	return (usize);
 }
@@ -2363,7 +2361,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
 	witness_assert_lockless(tsd_tsdn(tsd));
 	extent = iealloc(tsd_tsdn(tsd), ptr);
 
-	old_usize = isalloc(tsd_tsdn(tsd), extent, ptr, config_prof);
+	old_usize = isalloc(tsd_tsdn(tsd), extent, ptr);
 
 	/*
 	 * The API explicitly absolves itself of protecting against (size +
@@ -2414,9 +2412,9 @@ je_sallocx(const void *ptr, int flags)
 	witness_assert_lockless(tsdn);
 
 	if (config_ivsalloc)
-		usize = ivsalloc(tsdn, ptr, config_prof);
+		usize = ivsalloc(tsdn, ptr);
 	else
-		usize = isalloc(tsdn, iealloc(tsdn, ptr), ptr, config_prof);
+		usize = isalloc(tsdn, iealloc(tsdn, ptr), ptr);
 
 	witness_assert_lockless(tsdn);
 	return (usize);
@@ -2477,7 +2475,7 @@ je_sdallocx(void *ptr, size_t size, int flags)
 	tsd = tsd_fetch();
 	extent = iealloc(tsd_tsdn(tsd), ptr);
 	usize = inallocx(tsd_tsdn(tsd), size, flags);
-	assert(usize == isalloc(tsd_tsdn(tsd), extent, ptr, config_prof));
+	assert(usize == isalloc(tsd_tsdn(tsd), extent, ptr));
 
 	witness_assert_lockless(tsd_tsdn(tsd));
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
@@ -2593,10 +2591,10 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr)
 	witness_assert_lockless(tsdn);
 
 	if (config_ivsalloc)
-		ret = ivsalloc(tsdn, ptr, config_prof);
+		ret = ivsalloc(tsdn, ptr);
 	else {
-		ret = (ptr == NULL) ? 0 : isalloc(tsdn, iealloc(tsdn, ptr), ptr,
-		    config_prof);
+		ret = (ptr == NULL) ? 0 : isalloc(tsdn, iealloc(tsdn, ptr),
+		    ptr);
 	}
 
 	witness_assert_lockless(tsdn);
diff --git a/src/stats.c b/src/stats.c
index 0e1442ed..4dc48d5b 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -37,12 +37,10 @@ size_t	stats_cactive = 0;
 
 static void	stats_arena_bins_print(void (*write_cb)(void *, const char *),
     void *cbopaque, unsigned i);
-static void	stats_arena_lruns_print(void (*write_cb)(void *, const char *),
-    void *cbopaque, unsigned i);
 static void	stats_arena_hchunks_print(
     void (*write_cb)(void *, const char *), void *cbopaque, unsigned i);
 static void	stats_arena_print(void (*write_cb)(void *, const char *),
-    void *cbopaque, unsigned i, bool bins, bool large, bool huge);
+    void *cbopaque, unsigned i, bool bins, bool huge);
 
 /******************************************************************************/
 
@@ -157,64 +155,17 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	}
 }
 
-static void
-stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque,
-    unsigned i)
-{
-	unsigned nbins, nlruns, j;
-	bool in_gap;
-
-	malloc_cprintf(write_cb, cbopaque,
-	    "large:          size ind    allocated      nmalloc      ndalloc"
-	    "    nrequests      curruns\n");
-	CTL_GET("arenas.nbins", &nbins, unsigned);
-	CTL_GET("arenas.nlruns", &nlruns, unsigned);
-	for (j = 0, in_gap = false; j < nlruns; j++) {
-		uint64_t nmalloc, ndalloc, nrequests;
-		size_t run_size, curruns;
-
-		CTL_M2_M4_GET("stats.arenas.0.lruns.0.nmalloc", i, j, &nmalloc,
-		    uint64_t);
-		CTL_M2_M4_GET("stats.arenas.0.lruns.0.ndalloc", i, j, &ndalloc,
-		    uint64_t);
-		CTL_M2_M4_GET("stats.arenas.0.lruns.0.nrequests", i, j,
-		    &nrequests, uint64_t);
-		if (nrequests == 0)
-			in_gap = true;
-		else {
-			CTL_M2_GET("arenas.lrun.0.size", j, &run_size, size_t);
-			CTL_M2_M4_GET("stats.arenas.0.lruns.0.curruns", i, j,
-			    &curruns, size_t);
-			if (in_gap) {
-				malloc_cprintf(write_cb, cbopaque,
-				    "                     ---\n");
-				in_gap = false;
-			}
-			malloc_cprintf(write_cb, cbopaque,
-			    "%20zu %3u %12zu %12"FMTu64" %12"FMTu64
-			    " %12"FMTu64" %12zu\n",
-			    run_size, nbins + j, curruns * run_size, nmalloc,
-			    ndalloc, nrequests, curruns);
-		}
-	}
-	if (in_gap) {
-		malloc_cprintf(write_cb, cbopaque,
-		    "                     ---\n");
-	}
-}
-
 static void
 stats_arena_hchunks_print(void (*write_cb)(void *, const char *),
     void *cbopaque, unsigned i)
 {
-	unsigned nbins, nlruns, nhchunks, j;
+	unsigned nbins, nhchunks, j;
 	bool in_gap;
 
 	malloc_cprintf(write_cb, cbopaque,
 	    "huge:           size ind    allocated      nmalloc      ndalloc"
 	    "    nrequests   curhchunks\n");
 	CTL_GET("arenas.nbins", &nbins, unsigned);
-	CTL_GET("arenas.nlruns", &nlruns, unsigned);
 	CTL_GET("arenas.nhchunks", &nhchunks, unsigned);
 	for (j = 0, in_gap = false; j < nhchunks; j++) {
 		uint64_t nmalloc, ndalloc, nrequests;
@@ -241,7 +192,7 @@ stats_arena_hchunks_print(void (*write_cb)(void *, const char *),
 			malloc_cprintf(write_cb, cbopaque,
 			    "%20zu %3u %12zu %12"FMTu64" %12"FMTu64
 			    " %12"FMTu64" %12zu\n",
-			    hchunk_size, nbins + nlruns + j,
+			    hchunk_size, nbins + j,
 			    curhchunks * hchunk_size, nmalloc, ndalloc,
 			    nrequests, curhchunks);
 		}
@@ -254,7 +205,7 @@ stats_arena_hchunks_print(void (*write_cb)(void *, const char *),
 
 static void
 stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
-    unsigned i, bool bins, bool large, bool huge)
+    unsigned i, bool bins, bool huge)
 {
 	unsigned nthreads;
 	const char *dss;
@@ -264,8 +215,6 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	uint64_t npurge, nmadvise, purged;
 	size_t small_allocated;
 	uint64_t small_nmalloc, small_ndalloc, small_nrequests;
-	size_t large_allocated;
-	uint64_t large_nmalloc, large_ndalloc, large_nrequests;
 	size_t huge_allocated;
 	uint64_t huge_nmalloc, huge_ndalloc, huge_nrequests;
 
@@ -318,16 +267,6 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	    "small:                   %12zu %12"FMTu64" %12"FMTu64
 	    " %12"FMTu64"\n",
 	    small_allocated, small_nmalloc, small_ndalloc, small_nrequests);
-	CTL_M2_GET("stats.arenas.0.large.allocated", i, &large_allocated,
-	    size_t);
-	CTL_M2_GET("stats.arenas.0.large.nmalloc", i, &large_nmalloc, uint64_t);
-	CTL_M2_GET("stats.arenas.0.large.ndalloc", i, &large_ndalloc, uint64_t);
-	CTL_M2_GET("stats.arenas.0.large.nrequests", i, &large_nrequests,
-	    uint64_t);
-	malloc_cprintf(write_cb, cbopaque,
-	    "large:                   %12zu %12"FMTu64" %12"FMTu64
-	    " %12"FMTu64"\n",
-	    large_allocated, large_nmalloc, large_ndalloc, large_nrequests);
 	CTL_M2_GET("stats.arenas.0.huge.allocated", i, &huge_allocated, size_t);
 	CTL_M2_GET("stats.arenas.0.huge.nmalloc", i, &huge_nmalloc, uint64_t);
 	CTL_M2_GET("stats.arenas.0.huge.ndalloc", i, &huge_ndalloc, uint64_t);
@@ -340,10 +279,8 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	malloc_cprintf(write_cb, cbopaque,
 	    "total:                   %12zu %12"FMTu64" %12"FMTu64
 	    " %12"FMTu64"\n",
-	    small_allocated + large_allocated + huge_allocated,
-	    small_nmalloc + large_nmalloc + huge_nmalloc,
-	    small_ndalloc + large_ndalloc + huge_ndalloc,
-	    small_nrequests + large_nrequests + huge_nrequests);
+	    small_allocated + huge_allocated, small_nmalloc + huge_nmalloc,
+	    small_ndalloc + huge_ndalloc, small_nrequests + huge_nrequests);
 	malloc_cprintf(write_cb, cbopaque,
 	    "active:                  %12zu\n", pactive * page);
 	CTL_M2_GET("stats.arenas.0.mapped", i, &mapped, size_t);
@@ -362,8 +299,6 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 
 	if (bins)
 		stats_arena_bins_print(write_cb, cbopaque, i);
-	if (large)
-		stats_arena_lruns_print(write_cb, cbopaque, i);
 	if (huge)
 		stats_arena_hchunks_print(write_cb, cbopaque, i);
 }
@@ -379,7 +314,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	bool merged = true;
 	bool unmerged = true;
 	bool bins = true;
-	bool large = true;
 	bool huge = true;
 
 	/*
@@ -421,9 +355,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 				bins = false;
 				break;
 			case 'l':
-				large = false;
-				break;
-			case 'h':
 				huge = false;
 				break;
 			default:;
@@ -636,7 +567,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 					malloc_cprintf(write_cb, cbopaque,
 					    "\nMerged arenas stats:\n");
 					stats_arena_print(write_cb, cbopaque,
-					    narenas, bins, large, huge);
+					    narenas, bins, huge);
 				}
 			}
 		}
@@ -662,8 +593,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 						    cbopaque,
 						    "\narenas[%u]:\n", i);
 						stats_arena_print(write_cb,
-						    cbopaque, i, bins, large,
-						    huge);
+						    cbopaque, i, bins, huge);
 					}
 				}
 			}
diff --git a/src/tcache.c b/src/tcache.c
index d3ef9992..41074d34 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -27,7 +27,7 @@ size_t
 tcache_salloc(tsdn_t *tsdn, const void *ptr)
 {
 
-	return (arena_salloc(tsdn, iealloc(tsdn, ptr), ptr, false));
+	return (arena_salloc(tsdn, iealloc(tsdn, ptr), ptr));
 }
 
 void
@@ -46,7 +46,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache)
 			    tbin->ncached - tbin->low_water + (tbin->low_water
 			    >> 2));
 		} else {
-			tcache_bin_flush_large(tsd, tbin, binind, tbin->ncached
+			tcache_bin_flush_huge(tsd, tbin, binind, tbin->ncached
 			    - tbin->low_water + (tbin->low_water >> 2), tcache);
 		}
 		/*
@@ -170,7 +170,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 }
 
 void
-tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
+tcache_bin_flush_huge(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
     unsigned rem, tcache_t *tcache)
 {
 	arena_t *arena;
@@ -200,9 +200,9 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 			}
 			if (config_stats) {
 				merged_stats = true;
-				arena->stats.nrequests_large +=
+				arena->stats.nrequests_huge +=
 				    tbin->tstats.nrequests;
-				arena->stats.lstats[binind - NBINS].nrequests +=
+				arena->stats.hstats[binind - NBINS].nrequests +=
 				    tbin->tstats.nrequests;
 				tbin->tstats.nrequests = 0;
 			}
@@ -213,10 +213,8 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 			assert(ptr != NULL);
 			extent = iealloc(tsd_tsdn(tsd), ptr);
 			if (extent_arena_get(extent) == locked_arena) {
-				arena_chunk_t *chunk =
-				    (arena_chunk_t *)extent_base_get(extent);
-				arena_dalloc_large_junked_locked(tsd_tsdn(tsd),
-				    locked_arena, chunk, extent, ptr);
+				huge_dalloc_junked_locked(tsd_tsdn(tsd),
+				    extent);
 			} else {
 				/*
 				 * This object was allocated via a different
@@ -240,8 +238,8 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 		 * arena, so the stats didn't get merged.  Manually do so now.
 		 */
 		malloc_mutex_lock(tsd_tsdn(tsd), &arena->lock);
-		arena->stats.nrequests_large += tbin->tstats.nrequests;
-		arena->stats.lstats[binind - NBINS].nrequests +=
+		arena->stats.nrequests_huge += tbin->tstats.nrequests;
+		arena->stats.hstats[binind - NBINS].nrequests +=
 		    tbin->tstats.nrequests;
 		tbin->tstats.nrequests = 0;
 		malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock);
@@ -379,12 +377,12 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache)
 
 	for (; i < nhbins; i++) {
 		tcache_bin_t *tbin = &tcache->tbins[i];
-		tcache_bin_flush_large(tsd, tbin, i, 0, tcache);
+		tcache_bin_flush_huge(tsd, tbin, i, 0, tcache);
 
 		if (config_stats && tbin->tstats.nrequests != 0) {
 			malloc_mutex_lock(tsd_tsdn(tsd), &arena->lock);
-			arena->stats.nrequests_large += tbin->tstats.nrequests;
-			arena->stats.lstats[i - NBINS].nrequests +=
+			arena->stats.nrequests_huge += tbin->tstats.nrequests;
+			arena->stats.hstats[i - NBINS].nrequests +=
 			    tbin->tstats.nrequests;
 			malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock);
 		}
@@ -439,10 +437,10 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
 	}
 
 	for (; i < nhbins; i++) {
-		malloc_large_stats_t *lstats = &arena->stats.lstats[i - NBINS];
+		malloc_huge_stats_t *hstats = &arena->stats.hstats[i - NBINS];
 		tcache_bin_t *tbin = &tcache->tbins[i];
-		arena->stats.nrequests_large += tbin->tstats.nrequests;
-		lstats->nrequests += tbin->tstats.nrequests;
+		arena->stats.nrequests_huge += tbin->tstats.nrequests;
+		hstats->nrequests += tbin->tstats.nrequests;
 		tbin->tstats.nrequests = 0;
 	}
 }
@@ -516,14 +514,9 @@ tcache_boot(tsdn_t *tsdn)
 {
 	unsigned i;
 
-	/*
-	 * If necessary, clamp opt_lg_tcache_max, now that large_maxclass is
-	 * known.
-	 */
+	/* If necessary, clamp opt_lg_tcache_max. */
 	if (opt_lg_tcache_max < 0 || (1U << opt_lg_tcache_max) < SMALL_MAXCLASS)
 		tcache_maxclass = SMALL_MAXCLASS;
-	else if ((1U << opt_lg_tcache_max) > large_maxclass)
-		tcache_maxclass = large_maxclass;
 	else
 		tcache_maxclass = (1U << opt_lg_tcache_max);
 
@@ -550,7 +543,7 @@ tcache_boot(tsdn_t *tsdn)
 		stack_nelms += tcache_bin_info[i].ncached_max;
 	}
 	for (; i < nhbins; i++) {
-		tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE;
+		tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_HUGE;
 		stack_nelms += tcache_bin_info[i].ncached_max;
 	}
 
diff --git a/src/zone.c b/src/zone.c
index 2c17123a..4609503a 100644
--- a/src/zone.c
+++ b/src/zone.c
@@ -56,7 +56,7 @@ zone_size(malloc_zone_t *zone, void *ptr)
 	 * not work in practice, we must check all pointers to assure that they
 	 * reside within a mapped chunk before determining size.
 	 */
-	return (ivsalloc(tsdn_fetch(), ptr, config_prof));
+	return (ivsalloc(tsdn_fetch(), ptr));
 }
 
 static void *
@@ -87,7 +87,7 @@ static void
 zone_free(malloc_zone_t *zone, void *ptr)
 {
 
-	if (ivsalloc(tsdn_fetch(), ptr, config_prof) != 0) {
+	if (ivsalloc(tsdn_fetch(), ptr) != 0) {
 		je_free(ptr);
 		return;
 	}
@@ -99,7 +99,7 @@ static void *
 zone_realloc(malloc_zone_t *zone, void *ptr, size_t size)
 {
 
-	if (ivsalloc(tsdn_fetch(), ptr, config_prof) != 0)
+	if (ivsalloc(tsdn_fetch(), ptr) != 0)
 		return (je_realloc(ptr, size));
 
 	return (realloc(ptr, size));
@@ -123,7 +123,7 @@ zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size)
 {
 	size_t alloc_size;
 
-	alloc_size = ivsalloc(tsdn_fetch(), ptr, config_prof);
+	alloc_size = ivsalloc(tsdn_fetch(), ptr);
 	if (alloc_size != 0) {
 		assert(alloc_size == size);
 		je_free(ptr);
diff --git a/test/integration/chunk.c b/test/integration/chunk.c
index 092472c6..3aad7a8a 100644
--- a/test/integration/chunk.c
+++ b/test/integration/chunk.c
@@ -120,7 +120,7 @@ chunk_merge(void *chunk_a, size_t size_a, void *chunk_b, size_t size_b,
 TEST_BEGIN(test_chunk)
 {
 	void *p;
-	size_t old_size, new_size, large0, large1, huge0, huge1, huge2, sz;
+	size_t old_size, new_size, huge0, huge1, huge2, sz;
 	unsigned arena_ind;
 	int flags;
 	size_t hooks_mib[3], purge_mib[3];
@@ -162,14 +162,8 @@ TEST_BEGIN(test_chunk)
 	assert_ptr_ne(old_hooks.split, chunk_split, "Unexpected split error");
 	assert_ptr_ne(old_hooks.merge, chunk_merge, "Unexpected merge error");
 
-	/* Get large size classes. */
-	sz = sizeof(size_t);
-	assert_d_eq(mallctl("arenas.lrun.0.size", &large0, &sz, NULL, 0), 0,
-	    "Unexpected arenas.lrun.0.size failure");
-	assert_d_eq(mallctl("arenas.lrun.1.size", &large1, &sz, NULL, 0), 0,
-	    "Unexpected arenas.lrun.1.size failure");
-
 	/* Get huge size classes. */
+	sz = sizeof(size_t);
 	assert_d_eq(mallctl("arenas.hchunk.0.size", &huge0, &sz, NULL, 0), 0,
 	    "Unexpected arenas.hchunk.0.size failure");
 	assert_d_eq(mallctl("arenas.hchunk.1.size", &huge1, &sz, NULL, 0), 0,
@@ -224,24 +218,6 @@ TEST_BEGIN(test_chunk)
 	do_dalloc = true;
 	do_decommit = false;
 
-	/* Test decommit for large allocations. */
-	do_decommit = true;
-	p = mallocx(large1, flags);
-	assert_ptr_not_null(p, "Unexpected mallocx() error");
-	assert_d_eq(mallctlbymib(purge_mib, purge_miblen, NULL, NULL, NULL, 0),
-	    0, "Unexpected arena.%u.purge error", arena_ind);
-	did_decommit = false;
-	assert_zu_eq(xallocx(p, large0, 0, flags), large0,
-	    "Unexpected xallocx() failure");
-	assert_d_eq(mallctlbymib(purge_mib, purge_miblen, NULL, NULL, NULL, 0),
-	    0, "Unexpected arena.%u.purge error", arena_ind);
-	did_commit = false;
-	assert_zu_eq(xallocx(p, large1, 0, flags), large1,
-	    "Unexpected xallocx() failure");
-	assert_b_eq(did_decommit, did_commit, "Expected decommit/commit match");
-	dallocx(p, flags);
-	do_decommit = false;
-
 	/* Make sure non-huge allocation succeeds. */
 	p = mallocx(42, flags);
 	assert_ptr_not_null(p, "Unexpected mallocx() error");
diff --git a/test/integration/xallocx.c b/test/integration/xallocx.c
index ad292bb5..7af1b194 100644
--- a/test/integration/xallocx.c
+++ b/test/integration/xallocx.c
@@ -91,13 +91,6 @@ get_nsmall(void)
 	return (get_nsizes_impl("arenas.nbins"));
 }
 
-static unsigned
-get_nlarge(void)
-{
-
-	return (get_nsizes_impl("arenas.nlruns"));
-}
-
 static unsigned
 get_nhuge(void)
 {
@@ -131,13 +124,6 @@ get_small_size(size_t ind)
 	return (get_size_impl("arenas.bin.0.size", ind));
 }
 
-static size_t
-get_large_size(size_t ind)
-{
-
-	return (get_size_impl("arenas.lrun.0.size", ind));
-}
-
 static size_t
 get_huge_size(size_t ind)
 {
@@ -239,81 +225,14 @@ TEST_BEGIN(test_extra_small)
 }
 TEST_END
 
-TEST_BEGIN(test_extra_large)
+TEST_BEGIN(test_extra_huge)
 {
 	int flags = MALLOCX_ARENA(arena_ind());
-	size_t smallmax, large0, large1, large2, huge0, hugemax;
+	size_t smallmax, huge1, huge2, huge3, hugemax;
 	void *p;
 
 	/* Get size classes. */
 	smallmax = get_small_size(get_nsmall()-1);
-	large0 = get_large_size(0);
-	large1 = get_large_size(1);
-	large2 = get_large_size(2);
-	huge0 = get_huge_size(0);
-	hugemax = get_huge_size(get_nhuge()-1);
-
-	p = mallocx(large2, flags);
-	assert_ptr_not_null(p, "Unexpected mallocx() error");
-
-	assert_zu_eq(xallocx(p, large2, 0, flags), large2,
-	    "Unexpected xallocx() behavior");
-	/* Test size decrease with zero extra. */
-	assert_zu_eq(xallocx(p, large0, 0, flags), large0,
-	    "Unexpected xallocx() behavior");
-	assert_zu_eq(xallocx(p, smallmax, 0, flags), large0,
-	    "Unexpected xallocx() behavior");
-
-	assert_zu_eq(xallocx(p, large2, 0, flags), large2,
-	    "Unexpected xallocx() behavior");
-	/* Test size decrease with non-zero extra. */
-	assert_zu_eq(xallocx(p, large0, large2 - large0, flags), large2,
-	    "Unexpected xallocx() behavior");
-	assert_zu_eq(xallocx(p, large1, large2 - large1, flags), large2,
-	    "Unexpected xallocx() behavior");
-	assert_zu_eq(xallocx(p, large0, large1 - large0, flags), large1,
-	    "Unexpected xallocx() behavior");
-	assert_zu_eq(xallocx(p, smallmax, large0 - smallmax, flags), large0,
-	    "Unexpected xallocx() behavior");
-
-	assert_zu_eq(xallocx(p, large0, 0, flags), large0,
-	    "Unexpected xallocx() behavior");
-	/* Test size increase with zero extra. */
-	assert_zu_eq(xallocx(p, large2, 0, flags), large2,
-	    "Unexpected xallocx() behavior");
-	assert_zu_eq(xallocx(p, huge0, 0, flags), large2,
-	    "Unexpected xallocx() behavior");
-
-	assert_zu_eq(xallocx(p, large0, 0, flags), large0,
-	    "Unexpected xallocx() behavior");
-	/* Test size increase with non-zero extra. */
-	assert_zu_lt(xallocx(p, large0, huge0 - large0, flags), huge0,
-	    "Unexpected xallocx() behavior");
-
-	assert_zu_eq(xallocx(p, large0, 0, flags), large0,
-	    "Unexpected xallocx() behavior");
-	/* Test size increase with non-zero extra. */
-	assert_zu_eq(xallocx(p, large0, large2 - large0, flags), large2,
-	    "Unexpected xallocx() behavior");
-
-	assert_zu_eq(xallocx(p, large2, 0, flags), large2,
-	    "Unexpected xallocx() behavior");
-	/* Test size+extra overflow. */
-	assert_zu_lt(xallocx(p, large2, hugemax - large2 + 1, flags), huge0,
-	    "Unexpected xallocx() behavior");
-
-	dallocx(p, flags);
-}
-TEST_END
-
-TEST_BEGIN(test_extra_huge)
-{
-	int flags = MALLOCX_ARENA(arena_ind());
-	size_t largemax, huge1, huge2, huge3, hugemax;
-	void *p;
-
-	/* Get size classes. */
-	largemax = get_large_size(get_nlarge()-1);
 	huge1 = get_huge_size(1);
 	huge2 = get_huge_size(2);
 	huge3 = get_huge_size(3);
@@ -327,7 +246,7 @@ TEST_BEGIN(test_extra_huge)
 	/* Test size decrease with zero extra. */
 	assert_zu_ge(xallocx(p, huge1, 0, flags), huge1,
 	    "Unexpected xallocx() behavior");
-	assert_zu_ge(xallocx(p, largemax, 0, flags), huge1,
+	assert_zu_ge(xallocx(p, smallmax, 0, flags), huge1,
 	    "Unexpected xallocx() behavior");
 
 	assert_zu_eq(xallocx(p, huge3, 0, flags), huge3,
@@ -339,7 +258,7 @@ TEST_BEGIN(test_extra_huge)
 	    "Unexpected xallocx() behavior");
 	assert_zu_eq(xallocx(p, huge1, huge2 - huge1, flags), huge2,
 	    "Unexpected xallocx() behavior");
-	assert_zu_ge(xallocx(p, largemax, huge1 - largemax, flags), huge1,
+	assert_zu_ge(xallocx(p, smallmax, huge1 - smallmax, flags), huge1,
 	    "Unexpected xallocx() behavior");
 
 	assert_zu_ge(xallocx(p, huge1, 0, flags), huge1,
@@ -455,18 +374,6 @@ test_zero(size_t szmin, size_t szmax)
 	dallocx(p, flags);
 }
 
-TEST_BEGIN(test_zero_large)
-{
-	size_t large0, largemax;
-
-	/* Get size classes. */
-	large0 = get_large_size(0);
-	largemax = get_large_size(get_nlarge()-1);
-
-	test_zero(large0, largemax);
-}
-TEST_END
-
 TEST_BEGIN(test_zero_huge)
 {
 	size_t huge0, huge1;
@@ -490,8 +397,6 @@ main(void)
 	    test_size,
 	    test_size_extra_overflow,
 	    test_extra_small,
-	    test_extra_large,
 	    test_extra_huge,
-	    test_zero_large,
 	    test_zero_huge));
 }
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index fa2c5cd5..546d3cc8 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -24,13 +24,6 @@ get_nsmall(void)
 	return (get_nsizes_impl("arenas.nbins"));
 }
 
-static unsigned
-get_nlarge(void)
-{
-
-	return (get_nsizes_impl("arenas.nlruns"));
-}
-
 static unsigned
 get_nhuge(void)
 {
@@ -64,13 +57,6 @@ get_small_size(size_t ind)
 	return (get_size_impl("arenas.bin.0.size", ind));
 }
 
-static size_t
-get_large_size(size_t ind)
-{
-
-	return (get_size_impl("arenas.lrun.0.size", ind));
-}
-
 static size_t
 get_huge_size(size_t ind)
 {
@@ -90,13 +76,13 @@ vsalloc(tsdn_t *tsdn, const void *ptr)
 	if (!extent_active_get(extent))
 		return (0);
 
-	return (isalloc(tsdn, extent, ptr, false));
+	return (isalloc(tsdn, extent, ptr));
 }
 
 TEST_BEGIN(test_arena_reset)
 {
-#define	NHUGE	4
-	unsigned arena_ind, nsmall, nlarge, nhuge, nptrs, i;
+#define	NHUGE	32
+	unsigned arena_ind, nsmall, nhuge, nptrs, i;
 	size_t sz, miblen;
 	void **ptrs;
 	int flags;
@@ -110,9 +96,8 @@ TEST_BEGIN(test_arena_reset)
 	flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
 
 	nsmall = get_nsmall();
-	nlarge = get_nlarge();
 	nhuge = get_nhuge() > NHUGE ? NHUGE : get_nhuge();
-	nptrs = nsmall + nlarge + nhuge;
+	nptrs = nsmall + nhuge;
 	ptrs = (void **)malloc(nptrs * sizeof(void *));
 	assert_ptr_not_null(ptrs, "Unexpected malloc() failure");
 
@@ -123,15 +108,9 @@ TEST_BEGIN(test_arena_reset)
 		assert_ptr_not_null(ptrs[i],
 		    "Unexpected mallocx(%zu, %#x) failure", sz, flags);
 	}
-	for (i = 0; i < nlarge; i++) {
-		sz = get_large_size(i);
-		ptrs[nsmall + i] = mallocx(sz, flags);
-		assert_ptr_not_null(ptrs[i],
-		    "Unexpected mallocx(%zu, %#x) failure", sz, flags);
-	}
 	for (i = 0; i < nhuge; i++) {
 		sz = get_huge_size(i);
-		ptrs[nsmall + nlarge + i] = mallocx(sz, flags);
+		ptrs[nsmall + i] = mallocx(sz, flags);
 		assert_ptr_not_null(ptrs[i],
 		    "Unexpected mallocx(%zu, %#x) failure", sz, flags);
 	}
@@ -140,7 +119,7 @@ TEST_BEGIN(test_arena_reset)
 
 	/* Verify allocations. */
 	for (i = 0; i < nptrs; i++) {
-		assert_zu_gt(ivsalloc(tsdn, ptrs[i], false), 0,
+		assert_zu_gt(ivsalloc(tsdn, ptrs[i]), 0,
 		    "Allocation should have queryable size");
 	}
 
diff --git a/test/unit/decay.c b/test/unit/decay.c
index 70a2e67a..786cc934 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -1,6 +1,6 @@
 #include "test/jemalloc_test.h"
 
-const char *malloc_conf = "purge:decay,decay_time:1";
+const char *malloc_conf = "purge:decay,decay_time:1,lg_tcache_max:0";
 
 static nstime_update_t *nstime_update_orig;
 
@@ -22,7 +22,7 @@ TEST_BEGIN(test_decay_ticks)
 {
 	ticker_t *decay_ticker;
 	unsigned tick0, tick1;
-	size_t sz, huge0, large0;
+	size_t sz, huge0;
 	void *p;
 
 	test_skip_if(opt_purge != purge_mode_decay);
@@ -34,13 +34,11 @@ TEST_BEGIN(test_decay_ticks)
 	sz = sizeof(size_t);
 	assert_d_eq(mallctl("arenas.hchunk.0.size", &huge0, &sz, NULL, 0), 0,
 	    "Unexpected mallctl failure");
-	assert_d_eq(mallctl("arenas.lrun.0.size", &large0, &sz, NULL, 0), 0,
-	    "Unexpected mallctl failure");
 
 	/*
 	 * Test the standard APIs using a huge size class, since we can't
-	 * control tcache interactions (except by completely disabling tcache
-	 * for the entire test program).
+	 * control tcache interactions for small size classes (except by
+	 * completely disabling tcache for the entire test program).
 	 */
 
 	/* malloc(). */
@@ -101,15 +99,14 @@ TEST_BEGIN(test_decay_ticks)
 	assert_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()");
 
 	/*
-	 * Test the *allocx() APIs using huge, large, and small size classes,
-	 * with tcache explicitly disabled.
+	 * Test the *allocx() APIs using huge and small size classes, with
+	 * tcache explicitly disabled.
 	 */
 	{
 		unsigned i;
-		size_t allocx_sizes[3];
+		size_t allocx_sizes[2];
 		allocx_sizes[0] = huge0;
-		allocx_sizes[1] = large0;
-		allocx_sizes[2] = 1;
+		allocx_sizes[1] = 1;
 
 		for (i = 0; i < sizeof(allocx_sizes) / sizeof(size_t); i++) {
 			sz = allocx_sizes[i];
@@ -157,13 +154,13 @@ TEST_BEGIN(test_decay_ticks)
 	}
 
 	/*
-	 * Test tcache fill/flush interactions for large and small size classes,
+	 * Test tcache fill/flush interactions for huge and small size classes,
 	 * using an explicit tcache.
 	 */
 	if (config_tcache) {
 		unsigned tcache_ind, i;
 		size_t tcache_sizes[2];
-		tcache_sizes[0] = large0;
+		tcache_sizes[0] = huge0;
 		tcache_sizes[1] = 1;
 
 		sz = sizeof(unsigned);
@@ -204,14 +201,14 @@ TEST_BEGIN(test_decay_ticker)
 	uint64_t epoch;
 	uint64_t npurge0 = 0;
 	uint64_t npurge1 = 0;
-	size_t sz, large;
+	size_t sz, huge;
 	unsigned i, nupdates0;
 	nstime_t time, decay_time, deadline;
 
 	test_skip_if(opt_purge != purge_mode_decay);
 
 	/*
-	 * Allocate a bunch of large objects, pause the clock, deallocate the
+	 * Allocate a bunch of huge objects, pause the clock, deallocate the
 	 * objects, restore the clock, then [md]allocx() in a tight loop to
 	 * verify the ticker triggers purging.
 	 */
@@ -222,11 +219,11 @@ TEST_BEGIN(test_decay_ticker)
 		sz = sizeof(size_t);
 		assert_d_eq(mallctl("arenas.tcache_max", &tcache_max, &sz, NULL,
 		    0), 0, "Unexpected mallctl failure");
-		large = nallocx(tcache_max + 1, flags);
+		huge = nallocx(tcache_max + 1, flags);
 	}  else {
 		sz = sizeof(size_t);
-		assert_d_eq(mallctl("arenas.lrun.0.size", &large, &sz, NULL, 0),
-		    0, "Unexpected mallctl failure");
+		assert_d_eq(mallctl("arenas.hchunk.0.size", &huge, &sz, NULL,
+		    0), 0, "Unexpected mallctl failure");
 	}
 
 	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
@@ -238,7 +235,7 @@ TEST_BEGIN(test_decay_ticker)
 	    config_stats ? 0 : ENOENT, "Unexpected mallctl result");
 
 	for (i = 0; i < NPS; i++) {
-		ps[i] = mallocx(large, flags);
+		ps[i] = mallocx(huge, flags);
 		assert_ptr_not_null(ps[i], "Unexpected mallocx() failure");
 	}
 
@@ -296,13 +293,13 @@ TEST_BEGIN(test_decay_nonmonotonic)
 	uint64_t epoch;
 	uint64_t npurge0 = 0;
 	uint64_t npurge1 = 0;
-	size_t sz, large0;
+	size_t sz, huge0;
 	unsigned i, nupdates0;
 
 	test_skip_if(opt_purge != purge_mode_decay);
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("arenas.lrun.0.size", &large0, &sz, NULL, 0), 0,
+	assert_d_eq(mallctl("arenas.hchunk.0.size", &huge0, &sz, NULL, 0), 0,
 	    "Unexpected mallctl failure");
 
 	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
@@ -322,7 +319,7 @@ TEST_BEGIN(test_decay_nonmonotonic)
 	nstime_update = nstime_update_mock;
 
 	for (i = 0; i < NPS; i++) {
-		ps[i] = mallocx(large0, flags);
+		ps[i] = mallocx(huge0, flags);
 		assert_ptr_not_null(ps[i], "Unexpected mallocx() failure");
 	}
 
diff --git a/test/unit/extent_quantize.c b/test/unit/extent_quantize.c
index d8846db4..98c9fde4 100644
--- a/test/unit/extent_quantize.c
+++ b/test/unit/extent_quantize.c
@@ -35,16 +35,16 @@ TEST_BEGIN(test_small_extent_size)
 }
 TEST_END
 
-TEST_BEGIN(test_large_extent_size)
+TEST_BEGIN(test_huge_extent_size)
 {
 	bool cache_oblivious;
-	unsigned nlruns, i;
+	unsigned nhchunks, i;
 	size_t sz, extent_size_prev, ceil_prev;
 	size_t mib[4];
 	size_t miblen = sizeof(mib) / sizeof(size_t);
 
 	/*
-	 * Iterate over all large size classes, get their extent sizes, and
+	 * Iterate over all huge size classes, get their extent sizes, and
 	 * verify that the quantized size is the same as the extent size.
 	 */
 
@@ -53,12 +53,12 @@ TEST_BEGIN(test_large_extent_size)
 	    NULL, 0), 0, "Unexpected mallctl failure");
 
 	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.nlruns", &nlruns, &sz, NULL, 0), 0,
+	assert_d_eq(mallctl("arenas.nhchunks", &nhchunks, &sz, NULL, 0), 0,
 	    "Unexpected mallctl failure");
 
-	assert_d_eq(mallctlnametomib("arenas.lrun.0.size", mib, &miblen), 0,
+	assert_d_eq(mallctlnametomib("arenas.hchunk.0.size", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib failure");
-	for (i = 0; i < nlruns; i++) {
+	for (i = 0; i < nhchunks; i++) {
 		size_t lextent_size, extent_size, floor, ceil;
 
 		mib[2] = i;
@@ -91,33 +91,24 @@ TEST_BEGIN(test_large_extent_size)
 				    ceil_prev, extent_size);
 			}
 		}
-		extent_size_prev = floor;
-		ceil_prev = extent_size_quantize_ceil(extent_size + PAGE);
+		if (i + 1 < nhchunks) {
+			extent_size_prev = floor;
+			ceil_prev = extent_size_quantize_ceil(extent_size +
+			    PAGE);
+		}
 	}
 }
 TEST_END
 
 TEST_BEGIN(test_monotonic)
 {
-	unsigned nbins, nlruns, i;
-	size_t sz, floor_prev, ceil_prev;
-
-	/*
-	 * Iterate over all extent sizes and verify that
-	 * extent_size_quantize_{floor,ceil}() are monotonic.
-	 */
-
-	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.nbins", &nbins, &sz, NULL, 0), 0,
-	    "Unexpected mallctl failure");
-
-	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.nlruns", &nlruns, &sz, NULL, 0), 0,
-	    "Unexpected mallctl failure");
+#define	SZ_MAX	ZU(4 * 1024 * 1024)
+	unsigned i;
+	size_t floor_prev, ceil_prev;
 
 	floor_prev = 0;
 	ceil_prev = 0;
-	for (i = 1; i <= large_maxclass >> LG_PAGE; i++) {
+	for (i = 1; i <= SZ_MAX >> LG_PAGE; i++) {
 		size_t extent_size, floor, ceil;
 
 		extent_size = i << LG_PAGE;
@@ -150,6 +141,6 @@ main(void)
 
 	return (test(
 	    test_small_extent_size,
-	    test_large_extent_size,
+	    test_huge_extent_size,
 	    test_monotonic));
 }
diff --git a/test/unit/junk.c b/test/unit/junk.c
index 82eddf4c..cdf8fb3c 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -9,7 +9,6 @@ const char *malloc_conf =
 #endif
 
 static arena_dalloc_junk_small_t *arena_dalloc_junk_small_orig;
-static arena_dalloc_junk_large_t *arena_dalloc_junk_large_orig;
 static huge_dalloc_junk_t *huge_dalloc_junk_orig;
 static void *watch_for_junking;
 static bool saw_junking;
@@ -38,25 +37,10 @@ arena_dalloc_junk_small_intercept(void *ptr, const arena_bin_info_t *bin_info)
 }
 
 static void
-arena_dalloc_junk_large_intercept(void *ptr, size_t usize)
-{
-	size_t i;
-
-	arena_dalloc_junk_large_orig(ptr, usize);
-	for (i = 0; i < usize; i++) {
-		assert_u_eq(((uint8_t *)ptr)[i], JEMALLOC_FREE_JUNK,
-		    "Missing junk fill for byte %zu/%zu of deallocated region",
-		    i, usize);
-	}
-	if (ptr == watch_for_junking)
-		saw_junking = true;
-}
-
-static void
-huge_dalloc_junk_intercept(tsdn_t *tsdn, void *ptr, size_t usize)
+huge_dalloc_junk_intercept(void *ptr, size_t usize)
 {
 
-	huge_dalloc_junk_orig(tsdn, ptr, usize);
+	huge_dalloc_junk_orig(ptr, usize);
 	/*
 	 * The conditions under which junk filling actually occurs are nuanced
 	 * enough that it doesn't make sense to duplicate the decision logic in
@@ -75,8 +59,6 @@ test_junk(size_t sz_min, size_t sz_max)
 	if (opt_junk_free) {
 		arena_dalloc_junk_small_orig = arena_dalloc_junk_small;
 		arena_dalloc_junk_small = arena_dalloc_junk_small_intercept;
-		arena_dalloc_junk_large_orig = arena_dalloc_junk_large;
-		arena_dalloc_junk_large = arena_dalloc_junk_large_intercept;
 		huge_dalloc_junk_orig = huge_dalloc_junk;
 		huge_dalloc_junk = huge_dalloc_junk_intercept;
 	}
@@ -106,13 +88,18 @@ test_junk(size_t sz_min, size_t sz_max)
 		}
 
 		if (xallocx(s, sz+1, 0, 0) == sz) {
+			uint8_t *t;
 			watch_junking(s);
-			s = (uint8_t *)rallocx(s, sz+1, 0);
-			assert_ptr_not_null((void *)s,
+			t = (uint8_t *)rallocx(s, sz+1, 0);
+			assert_ptr_not_null((void *)t,
 			    "Unexpected rallocx() failure");
+			assert_ptr_ne(s, t, "Unexpected in-place rallocx()");
+			assert_zu_ge(sallocx(t, 0), sz+1,
+			    "Unexpectedly small rallocx() result");
 			assert_true(!opt_junk_free || saw_junking,
 			    "Expected region of size %zu to be junk-filled",
 			    sz);
+			s = t;
 		}
 	}
 
@@ -123,7 +110,6 @@ test_junk(size_t sz_min, size_t sz_max)
 
 	if (opt_junk_free) {
 		arena_dalloc_junk_small = arena_dalloc_junk_small_orig;
-		arena_dalloc_junk_large = arena_dalloc_junk_large_orig;
 		huge_dalloc_junk = huge_dalloc_junk_orig;
 	}
 }
@@ -136,64 +122,11 @@ TEST_BEGIN(test_junk_small)
 }
 TEST_END
 
-TEST_BEGIN(test_junk_large)
-{
-
-	test_skip_if(!config_fill);
-	test_junk(SMALL_MAXCLASS+1, large_maxclass);
-}
-TEST_END
-
 TEST_BEGIN(test_junk_huge)
 {
 
 	test_skip_if(!config_fill);
-	test_junk(large_maxclass+1, chunksize*2);
-}
-TEST_END
-
-arena_ralloc_junk_large_t *arena_ralloc_junk_large_orig;
-static void *most_recently_trimmed;
-
-static size_t
-shrink_size(size_t size)
-{
-	size_t shrink_size;
-
-	for (shrink_size = size - 1; nallocx(shrink_size, 0) == size;
-	    shrink_size--)
-		; /* Do nothing. */
-
-	return (shrink_size);
-}
-
-static void
-arena_ralloc_junk_large_intercept(void *ptr, size_t old_usize, size_t usize)
-{
-
-	arena_ralloc_junk_large_orig(ptr, old_usize, usize);
-	assert_zu_eq(old_usize, large_maxclass, "Unexpected old_usize");
-	assert_zu_eq(usize, shrink_size(large_maxclass), "Unexpected usize");
-	most_recently_trimmed = ptr;
-}
-
-TEST_BEGIN(test_junk_large_ralloc_shrink)
-{
-	void *p1, *p2;
-
-	p1 = mallocx(large_maxclass, 0);
-	assert_ptr_not_null(p1, "Unexpected mallocx() failure");
-
-	arena_ralloc_junk_large_orig = arena_ralloc_junk_large;
-	arena_ralloc_junk_large = arena_ralloc_junk_large_intercept;
-
-	p2 = rallocx(p1, shrink_size(large_maxclass), 0);
-	assert_ptr_eq(p1, p2, "Unexpected move during shrink");
-
-	arena_ralloc_junk_large = arena_ralloc_junk_large_orig;
-
-	assert_ptr_eq(most_recently_trimmed, p1,
-	    "Expected trimmed portion of region to be junk-filled");
+	test_junk(SMALL_MAXCLASS+1, chunksize*2);
 }
 TEST_END
 
@@ -203,7 +136,5 @@ main(void)
 
 	return (test(
 	    test_junk_small,
-	    test_junk_large,
-	    test_junk_huge,
-	    test_junk_large_ralloc_shrink));
+	    test_junk_huge));
 }
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 79c5147c..9ba730a6 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -596,8 +596,7 @@ TEST_BEGIN(test_arenas_constants)
 	TEST_ARENAS_CONSTANT(size_t, quantum, QUANTUM);
 	TEST_ARENAS_CONSTANT(size_t, page, PAGE);
 	TEST_ARENAS_CONSTANT(unsigned, nbins, NBINS);
-	TEST_ARENAS_CONSTANT(unsigned, nlruns, nlclasses);
-	TEST_ARENAS_CONSTANT(unsigned, nhchunks, nhclasses);
+	TEST_ARENAS_CONSTANT(unsigned, nhchunks, NSIZES - NBINS);
 
 #undef TEST_ARENAS_CONSTANT
 }
@@ -622,23 +621,6 @@ TEST_BEGIN(test_arenas_bin_constants)
 }
 TEST_END
 
-TEST_BEGIN(test_arenas_lrun_constants)
-{
-
-#define	TEST_ARENAS_LRUN_CONSTANT(t, name, expected) do {		\
-	t name;								\
-	size_t sz = sizeof(t);						\
-	assert_d_eq(mallctl("arenas.lrun.0."#name, &name, &sz, NULL,	\
-	    0), 0, "Unexpected mallctl() failure");			\
-	assert_zu_eq(name, expected, "Incorrect "#name" size");		\
-} while (0)
-
-	TEST_ARENAS_LRUN_CONSTANT(size_t, size, LARGE_MINCLASS);
-
-#undef TEST_ARENAS_LRUN_CONSTANT
-}
-TEST_END
-
 TEST_BEGIN(test_arenas_hchunk_constants)
 {
 
@@ -650,7 +632,7 @@ TEST_BEGIN(test_arenas_hchunk_constants)
 	assert_zu_eq(name, expected, "Incorrect "#name" size");		\
 } while (0)
 
-	TEST_ARENAS_HCHUNK_CONSTANT(size_t, size, chunksize);
+	TEST_ARENAS_HCHUNK_CONSTANT(size_t, size, LARGE_MINCLASS);
 
 #undef TEST_ARENAS_HCHUNK_CONSTANT
 }
@@ -721,7 +703,6 @@ main(void)
 	    test_arenas_decay_time,
 	    test_arenas_constants,
 	    test_arenas_bin_constants,
-	    test_arenas_lrun_constants,
 	    test_arenas_hchunk_constants,
 	    test_arenas_extend,
 	    test_stats_arenas));
diff --git a/test/unit/prof_idump.c b/test/unit/prof_idump.c
index bdea53ec..2b0639d8 100644
--- a/test/unit/prof_idump.c
+++ b/test/unit/prof_idump.c
@@ -1,10 +1,17 @@
 #include "test/jemalloc_test.h"
 
+const char *malloc_conf = ""
 #ifdef JEMALLOC_PROF
-const char *malloc_conf =
-    "prof:true,prof_accum:true,prof_active:false,lg_prof_sample:0,"
-    "lg_prof_interval:0";
+    "prof:true,prof_accum:true,prof_active:false,lg_prof_sample:0"
+    ",lg_prof_interval:0"
+#  ifdef JEMALLOC_TCACHE
+    ","
+#  endif
 #endif
+#ifdef JEMALLOC_TCACHE
+    "tcache:false"
+#endif
+    ;
 
 static bool did_prof_dump_open;
 
diff --git a/test/unit/run_quantize.c b/test/unit/run_quantize.c
deleted file mode 100644
index 45f32018..00000000
--- a/test/unit/run_quantize.c
+++ /dev/null
@@ -1,149 +0,0 @@
-#include "test/jemalloc_test.h"
-
-TEST_BEGIN(test_small_run_size)
-{
-	unsigned nbins, i;
-	size_t sz, run_size;
-	size_t mib[4];
-	size_t miblen = sizeof(mib) / sizeof(size_t);
-
-	/*
-	 * Iterate over all small size classes, get their run sizes, and verify
-	 * that the quantized size is the same as the run size.
-	 */
-
-	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.nbins", &nbins, &sz, NULL, 0), 0,
-	    "Unexpected mallctl failure");
-
-	assert_d_eq(mallctlnametomib("arenas.bin.0.run_size", mib, &miblen), 0,
-	    "Unexpected mallctlnametomib failure");
-	for (i = 0; i < nbins; i++) {
-		mib[2] = i;
-		sz = sizeof(size_t);
-		assert_d_eq(mallctlbymib(mib, miblen, &run_size, &sz, NULL, 0),
-		    0, "Unexpected mallctlbymib failure");
-		assert_zu_eq(run_size, run_quantize_floor(run_size),
-		    "Small run quantization should be a no-op (run_size=%zu)",
-		    run_size);
-		assert_zu_eq(run_size, run_quantize_ceil(run_size),
-		    "Small run quantization should be a no-op (run_size=%zu)",
-		    run_size);
-	}
-}
-TEST_END
-
-TEST_BEGIN(test_large_run_size)
-{
-	bool cache_oblivious;
-	unsigned nlruns, i;
-	size_t sz, run_size_prev, ceil_prev;
-	size_t mib[4];
-	size_t miblen = sizeof(mib) / sizeof(size_t);
-
-	/*
-	 * Iterate over all large size classes, get their run sizes, and verify
-	 * that the quantized size is the same as the run size.
-	 */
-
-	sz = sizeof(bool);
-	assert_d_eq(mallctl("config.cache_oblivious", &cache_oblivious, &sz,
-	    NULL, 0), 0, "Unexpected mallctl failure");
-
-	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.nlruns", &nlruns, &sz, NULL, 0), 0,
-	    "Unexpected mallctl failure");
-
-	assert_d_eq(mallctlnametomib("arenas.lrun.0.size", mib, &miblen), 0,
-	    "Unexpected mallctlnametomib failure");
-	for (i = 0; i < nlruns; i++) {
-		size_t lrun_size, run_size, floor, ceil;
-
-		mib[2] = i;
-		sz = sizeof(size_t);
-		assert_d_eq(mallctlbymib(mib, miblen, &lrun_size, &sz, NULL, 0),
-		    0, "Unexpected mallctlbymib failure");
-		run_size = cache_oblivious ? lrun_size + PAGE : lrun_size;
-		floor = run_quantize_floor(run_size);
-		ceil = run_quantize_ceil(run_size);
-
-		assert_zu_eq(run_size, floor,
-		    "Large run quantization should be a no-op for precise "
-		    "size (lrun_size=%zu, run_size=%zu)", lrun_size, run_size);
-		assert_zu_eq(run_size, ceil,
-		    "Large run quantization should be a no-op for precise "
-		    "size (lrun_size=%zu, run_size=%zu)", lrun_size, run_size);
-
-		if (i > 0) {
-			assert_zu_eq(run_size_prev, run_quantize_floor(run_size
-			    - PAGE), "Floor should be a precise size");
-			if (run_size_prev < ceil_prev) {
-				assert_zu_eq(ceil_prev, run_size,
-				    "Ceiling should be a precise size "
-				    "(run_size_prev=%zu, ceil_prev=%zu, "
-				    "run_size=%zu)", run_size_prev, ceil_prev,
-				    run_size);
-			}
-		}
-		run_size_prev = floor;
-		ceil_prev = run_quantize_ceil(run_size + PAGE);
-	}
-}
-TEST_END
-
-TEST_BEGIN(test_monotonic)
-{
-	unsigned nbins, nlruns, i;
-	size_t sz, floor_prev, ceil_prev;
-
-	/*
-	 * Iterate over all run sizes and verify that
-	 * run_quantize_{floor,ceil}() are monotonic.
-	 */
-
-	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.nbins", &nbins, &sz, NULL, 0), 0,
-	    "Unexpected mallctl failure");
-
-	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.nlruns", &nlruns, &sz, NULL, 0), 0,
-	    "Unexpected mallctl failure");
-
-	floor_prev = 0;
-	ceil_prev = 0;
-	for (i = 1; i <= large_maxclass >> LG_PAGE; i++) {
-		size_t run_size, floor, ceil;
-
-		run_size = i << LG_PAGE;
-		floor = run_quantize_floor(run_size);
-		ceil = run_quantize_ceil(run_size);
-
-		assert_zu_le(floor, run_size,
-		    "Floor should be <= (floor=%zu, run_size=%zu, ceil=%zu)",
-		    floor, run_size, ceil);
-		assert_zu_ge(ceil, run_size,
-		    "Ceiling should be >= (floor=%zu, run_size=%zu, ceil=%zu)",
-		    floor, run_size, ceil);
-
-		assert_zu_le(floor_prev, floor, "Floor should be monotonic "
-		    "(floor_prev=%zu, floor=%zu, run_size=%zu, ceil=%zu)",
-		    floor_prev, floor, run_size, ceil);
-		assert_zu_le(ceil_prev, ceil, "Ceiling should be monotonic "
-		    "(floor=%zu, run_size=%zu, ceil_prev=%zu, ceil=%zu)",
-		    floor, run_size, ceil_prev, ceil);
-
-		floor_prev = floor;
-		ceil_prev = ceil;
-	}
-}
-TEST_END
-
-int
-main(void)
-{
-
-	return (test(
-	    test_small_run_size,
-	    test_large_run_size,
-	    test_monotonic));
-}
diff --git a/test/unit/stats.c b/test/unit/stats.c
index a9a3981f..b0e318a5 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -42,7 +42,7 @@ TEST_BEGIN(test_stats_huge)
 	size_t sz;
 	int expected = config_stats ? 0 : ENOENT;
 
-	p = mallocx(large_maxclass+1, 0);
+	p = mallocx(SMALL_MAXCLASS+1, 0);
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
 
 	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0,
@@ -75,7 +75,7 @@ TEST_END
 TEST_BEGIN(test_stats_arenas_summary)
 {
 	unsigned arena;
-	void *little, *large, *huge;
+	void *little, *huge;
 	uint64_t epoch;
 	size_t sz;
 	int expected = config_stats ? 0 : ENOENT;
@@ -88,13 +88,10 @@ TEST_BEGIN(test_stats_arenas_summary)
 
 	little = mallocx(SMALL_MAXCLASS, 0);
 	assert_ptr_not_null(little, "Unexpected mallocx() failure");
-	large = mallocx(large_maxclass, 0);
-	assert_ptr_not_null(large, "Unexpected mallocx() failure");
 	huge = mallocx(chunksize, 0);
 	assert_ptr_not_null(huge, "Unexpected mallocx() failure");
 
 	dallocx(little, 0);
-	dallocx(large, 0);
 	dallocx(huge, 0);
 
 	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
@@ -188,50 +185,6 @@ TEST_BEGIN(test_stats_arenas_small)
 }
 TEST_END
 
-TEST_BEGIN(test_stats_arenas_large)
-{
-	unsigned arena;
-	void *p;
-	size_t sz, allocated;
-	uint64_t epoch, nmalloc, ndalloc, nrequests;
-	int expected = config_stats ? 0 : ENOENT;
-
-	arena = 0;
-	assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)),
-	    0, "Unexpected mallctl() failure");
-
-	p = mallocx(large_maxclass, 0);
-	assert_ptr_not_null(p, "Unexpected mallocx() failure");
-
-	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0,
-	    "Unexpected mallctl() failure");
-
-	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.arenas.0.large.allocated", &allocated, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
-	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.large.nmalloc", &nmalloc, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.large.ndalloc", &ndalloc, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.large.nrequests", &nrequests, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
-
-	if (config_stats) {
-		assert_zu_gt(allocated, 0,
-		    "allocated should be greater than zero");
-		assert_u64_gt(nmalloc, 0,
-		    "nmalloc should be greater than zero");
-		assert_u64_ge(nmalloc, ndalloc,
-		    "nmalloc should be at least as large as ndalloc");
-		assert_u64_gt(nrequests, 0,
-		    "nrequests should be greater than zero");
-	}
-
-	dallocx(p, 0);
-}
-TEST_END
-
 TEST_BEGIN(test_stats_arenas_huge)
 {
 	unsigned arena;
@@ -346,63 +299,23 @@ TEST_BEGIN(test_stats_arenas_bins)
 }
 TEST_END
 
-TEST_BEGIN(test_stats_arenas_lruns)
-{
-	unsigned arena;
-	void *p;
-	uint64_t epoch, nmalloc, ndalloc, nrequests;
-	size_t curruns, sz;
-	int expected = config_stats ? 0 : ENOENT;
-
-	arena = 0;
-	assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)),
-	    0, "Unexpected mallctl() failure");
-
-	p = mallocx(LARGE_MINCLASS, 0);
-	assert_ptr_not_null(p, "Unexpected mallocx() failure");
-
-	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0,
-	    "Unexpected mallctl() failure");
-
-	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.lruns.0.nmalloc", &nmalloc, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.lruns.0.ndalloc", &ndalloc, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.lruns.0.nrequests", &nrequests, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
-	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.arenas.0.lruns.0.curruns", &curruns, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
-
-	if (config_stats) {
-		assert_u64_gt(nmalloc, 0,
-		    "nmalloc should be greater than zero");
-		assert_u64_ge(nmalloc, ndalloc,
-		    "nmalloc should be at least as large as ndalloc");
-		assert_u64_gt(nrequests, 0,
-		    "nrequests should be greater than zero");
-		assert_u64_gt(curruns, 0,
-		    "At least one run should be currently allocated");
-	}
-
-	dallocx(p, 0);
-}
-TEST_END
-
 TEST_BEGIN(test_stats_arenas_hchunks)
 {
 	unsigned arena;
 	void *p;
 	uint64_t epoch, nmalloc, ndalloc;
-	size_t curhchunks, sz;
+	size_t curhchunks, sz, hsize;
 	int expected = config_stats ? 0 : ENOENT;
 
 	arena = 0;
 	assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)),
 	    0, "Unexpected mallctl() failure");
 
-	p = mallocx(chunksize, 0);
+	sz = sizeof(size_t);
+	assert_d_eq(mallctl("arenas.hchunk.0.size", &hsize, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+
+	p = mallocx(hsize, 0);
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
 
 	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0,
@@ -439,9 +352,7 @@ main(void)
 	    test_stats_huge,
 	    test_stats_arenas_summary,
 	    test_stats_arenas_small,
-	    test_stats_arenas_large,
 	    test_stats_arenas_huge,
 	    test_stats_arenas_bins,
-	    test_stats_arenas_lruns,
 	    test_stats_arenas_hchunks));
 }
diff --git a/test/unit/zero.c b/test/unit/zero.c
index 123f0e03..2da288ac 100644
--- a/test/unit/zero.c
+++ b/test/unit/zero.c
@@ -53,19 +53,11 @@ TEST_BEGIN(test_zero_small)
 }
 TEST_END
 
-TEST_BEGIN(test_zero_large)
-{
-
-	test_skip_if(!config_fill);
-	test_zero(SMALL_MAXCLASS+1, large_maxclass);
-}
-TEST_END
-
 TEST_BEGIN(test_zero_huge)
 {
 
 	test_skip_if(!config_fill);
-	test_zero(large_maxclass+1, chunksize*2);
+	test_zero(SMALL_MAXCLASS+1, chunksize*2);
 }
 TEST_END
 
@@ -75,6 +67,5 @@ main(void)
 
 	return (test(
 	    test_zero_small,
-	    test_zero_large,
 	    test_zero_huge));
 }

From d28e5a6696fd59a45c156b5c4dc183bb9ed21596 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 28 May 2016 17:29:03 -0700
Subject: [PATCH 0288/2608] Improve interval-based profile dump triggering.

When an allocation is large enough to trigger multiple dumps, use
modular math rather than subtraction to reset the interval counter.
Prior to this change, it was possible for a single allocation to cause
many subsequent allocations to all trigger profile dumps.

When updating usable size for a sampled object, try to cancel out
the difference between LARGE_MINCLASS and usable size from the interval
counter.
---
 include/jemalloc/internal/arena.h |  2 +-
 src/arena.c                       | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index bf16e8e9..b0c4b5f3 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -984,7 +984,7 @@ arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes)
 
 	arena->prof_accumbytes += accumbytes;
 	if (arena->prof_accumbytes >= prof_interval) {
-		arena->prof_accumbytes -= prof_interval;
+		arena->prof_accumbytes %= prof_interval;
 		return (true);
 	}
 	return (false);
diff --git a/src/arena.c b/src/arena.c
index d9882a45..0b98ec5d 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2258,6 +2258,7 @@ void
 arena_prof_promote(tsdn_t *tsdn, extent_t *extent, const void *ptr,
     size_t usize)
 {
+	arena_t *arena = extent_arena_get(extent);
 
 	cassert(config_prof);
 	assert(ptr != NULL);
@@ -2266,6 +2267,19 @@ arena_prof_promote(tsdn_t *tsdn, extent_t *extent, const void *ptr,
 
 	extent_usize_set(extent, usize);
 
+	/*
+	 * Cancel out as much of the excessive prof_accumbytes increase as
+	 * possible without underflowing.  Interval-triggered dumps occur
+	 * slightly more often than intended as a result of incomplete
+	 * canceling.
+	 */
+	malloc_mutex_lock(tsdn, &arena->lock);
+	if (arena->prof_accumbytes >= LARGE_MINCLASS - usize)
+		arena->prof_accumbytes -= LARGE_MINCLASS - usize;
+	else
+		arena->prof_accumbytes = 0;
+	malloc_mutex_unlock(tsdn, &arena->lock);
+
 	assert(isalloc(tsdn, extent, ptr) == usize);
 }
 

From 498856f44a30b31fe713a18eb2fc7c6ecf3a9f63 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 29 May 2016 18:34:50 -0700
Subject: [PATCH 0289/2608] Move slabs out of chunks.

---
 doc/jemalloc.xml.in                           |   98 +-
 include/jemalloc/internal/arena.h             |  795 +-------
 include/jemalloc/internal/bitmap.h            |    2 +-
 include/jemalloc/internal/extent.h            |   86 +-
 include/jemalloc/internal/private_symbols.txt |   45 +-
 include/jemalloc/internal/prof.h              |   44 +-
 include/jemalloc/internal/size_classes.sh     |   22 +-
 include/jemalloc/internal/stats.h             |   14 +-
 include/jemalloc/internal/tcache.h            |    2 +-
 src/arena.c                                   | 1633 ++++-------------
 src/base.c                                    |    3 +-
 src/chunk.c                                   |   11 +-
 src/chunk_dss.c                               |    2 +-
 src/ctl.c                                     |   37 +-
 src/huge.c                                    |    4 +-
 src/jemalloc.c                                |   46 +-
 src/stats.c                                   |   45 +-
 src/tcache.c                                  |    8 +-
 test/unit/extent_quantize.c                   |   10 +-
 test/unit/mallctl.c                           |    3 +-
 test/unit/stats.c                             |   18 +-
 21 files changed, 596 insertions(+), 2332 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index efb4bfe4..923097d4 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -509,26 +509,20 @@ for (i = 0; i < nbins; i++) {
 
     <para>In addition to multiple arenas, unless
     <option>--disable-tcache</option> is specified during configuration, this
-    allocator supports thread-specific caching for small and large objects, in
-    order to make it possible to completely avoid synchronization for most
-    allocation requests.  Such caching allows very fast allocation in the
-    common case, but it increases memory usage and fragmentation, since a
-    bounded number of objects can remain allocated in each thread cache.</para>
+    allocator supports thread-specific caching, in order to make it possible to
+    completely avoid synchronization for most allocation requests.  Such caching
+    allows very fast allocation in the common case, but it increases memory
+    usage and fragmentation, since a bounded number of objects can remain
+    allocated in each thread cache.</para>
 
-    <para>Memory is conceptually broken into equal-sized chunks, where the chunk
-    size is a power of two that is greater than the page size.  Chunks are
-    always aligned to multiples of the chunk size.  This alignment makes it
-    possible to find metadata for user objects very quickly.  User objects are
-    broken into three categories according to size: small, large, and huge.
-    Multiple small and large objects can reside within a single chunk, whereas
-    huge objects each have one or more chunks backing them.  Each chunk that
-    contains small and/or large objects tracks its contents as runs of
-    contiguous pages (unused, backing a set of small objects, or backing one
-    large object).  The combination of chunk alignment and chunk page maps makes
-    it possible to determine all metadata regarding small and large allocations
-    in constant time.</para>
+    <para>Memory is conceptually broken into extents.  Extents are always
+    aligned to multiples of the page size.  This alignment makes it possible to
+    find metadata for user objects quickly.  User objects are broken into two
+    categories according to size: small and large.  Contiguous small objects
+    comprise a slab, which resides within a single extent, whereas large objects
+    each have their own extents backing them.</para>
 
-    <para>Small objects are managed in groups by page runs.  Each run maintains
+    <para>Small objects are managed in groups by slabs.  Each slab maintains
     a bitmap to track which regions are in use.  Allocation requests that are no
     more than half the quantum (8 or 16, depending on architecture) are rounded
     up to the nearest power of two that is at least <code
@@ -536,11 +530,9 @@ for (i = 0; i < nbins; i++) {
     classes are multiples of the quantum, spaced such that there are four size
     classes for each doubling in size, which limits internal fragmentation to
     approximately 20% for all but the smallest size classes.  Small size classes
-    are smaller than four times the page size, large size classes are smaller
-    than the chunk size (see the <link
-    linkend="opt.lg_chunk"><mallctl>opt.lg_chunk</mallctl></link> option), and
-    huge size classes extend from the chunk size up to the largest size class
-    that does not exceed <constant>PTRDIFF_MAX</constant>.</para>
+    are smaller than four times the page size, and large size classes extend
+    from four times the page size up to the largest size class that does not
+    exceed <constant>PTRDIFF_MAX</constant>.</para>
 
     <para>Allocations are packed tightly together, which can be an issue for
     multi-threaded applications.  If you need to assure that allocations do not
@@ -560,18 +552,16 @@ for (i = 0; i < nbins; i++) {
     trivially succeeds in place as long as the pre-size and post-size both round
     up to the same size class.  No other API guarantees are made regarding
     in-place resizing, but the current implementation also tries to resize large
-    and huge allocations in place, as long as the pre-size and post-size are
-    both large or both huge.  In such cases shrinkage always succeeds for large
-    size classes, but for huge size classes the chunk allocator must support
-    splitting (see <link
+    allocations in place, as long as the pre-size and post-size are both large.
+    For shrinkage to succeed, the extent allocator must support splitting (see
+    <link
     linkend="arena.i.chunk_hooks"><mallctl>arena.&lt;i&gt;.chunk_hooks</mallctl></link>).
-    Growth only succeeds if the trailing memory is currently available, and
-    additionally for huge size classes the chunk allocator must support
-    merging.</para>
+    Growth only succeeds if the trailing memory is currently available, and the
+    extent allocator supports merging.</para>
 
-    <para>Assuming 2 MiB chunks, 4 KiB pages, and a 16-byte quantum on a
-    64-bit system, the size classes in each category are as shown in <xref
-    linkend="size_classes" xrefstyle="template:Table %n"/>.</para>
+    <para>Assuming 4 KiB pages and a 16-byte quantum on a 64-bit system, the
+    size classes in each category are as shown in <xref linkend="size_classes"
+    xrefstyle="template:Table %n"/>.</para>
 
     <table xml:id="size_classes" frame="all">
       <title>Size classes</title>
@@ -625,7 +615,7 @@ for (i = 0; i < nbins; i++) {
           <entry>[10 KiB, 12 KiB, 14 KiB]</entry>
         </row>
         <row>
-          <entry morerows="7">Large</entry>
+          <entry morerows="15">Large</entry>
           <entry>2 KiB</entry>
           <entry>[16 KiB]</entry>
         </row>
@@ -655,12 +645,7 @@ for (i = 0; i < nbins; i++) {
         </row>
         <row>
           <entry>256 KiB</entry>
-          <entry>[1280 KiB, 1536 KiB, 1792 KiB]</entry>
-        </row>
-        <row>
-          <entry morerows="8">Huge</entry>
-          <entry>256 KiB</entry>
-          <entry>[2 MiB]</entry>
+          <entry>[1280 KiB, 1536 KiB, 1792 KiB, 2 MiB]</entry>
         </row>
         <row>
           <entry>512 KiB</entry>
@@ -1875,16 +1860,16 @@ typedef struct {
           (<type>uint32_t</type>)
           <literal>r-</literal>
         </term>
-        <listitem><para>Number of regions per page run.</para></listitem>
+        <listitem><para>Number of regions per slab.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="arenas.bin.i.run_size">
+      <varlistentry id="arenas.bin.i.slab_size">
         <term>
-          <mallctl>arenas.bin.&lt;i&gt;.run_size</mallctl>
+          <mallctl>arenas.bin.&lt;i&gt;.slab_size</mallctl>
           (<type>size_t</type>)
           <literal>r-</literal>
         </term>
-        <listitem><para>Number of bytes per page run.</para></listitem>
+        <listitem><para>Number of bytes per slab.</para></listitem>
       </varlistentry>
 
       <varlistentry id="arenas.nhchunks">
@@ -2185,7 +2170,7 @@ typedef struct {
           (<type>size_t</type>)
           <literal>r-</literal>
         </term>
-        <listitem><para>Number of pages in active runs.</para></listitem>
+        <listitem><para>Number of pages in active extents.</para></listitem>
       </varlistentry>
 
       <varlistentry id="stats.arenas.i.pdirty">
@@ -2194,8 +2179,9 @@ typedef struct {
           (<type>size_t</type>)
           <literal>r-</literal>
         </term>
-        <listitem><para>Number of pages within unused runs that are potentially
-        dirty, and for which <function>madvise<parameter>...</parameter>
+        <listitem><para>Number of pages within unused extents that are
+        potentially dirty, and for which
+        <function>madvise<parameter>...</parameter>
         <parameter><constant>MADV_DONTNEED</constant></parameter></function> or
         similar has not been called.</para></listitem>
       </varlistentry>
@@ -2483,35 +2469,35 @@ typedef struct {
         <listitem><para>Cumulative number of tcache flushes.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="stats.arenas.i.bins.j.nruns">
+      <varlistentry id="stats.arenas.i.bins.j.nslabs">
         <term>
-          <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.nruns</mallctl>
+          <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.nslabs</mallctl>
           (<type>uint64_t</type>)
           <literal>r-</literal>
           [<option>--enable-stats</option>]
         </term>
-        <listitem><para>Cumulative number of runs created.</para></listitem>
+        <listitem><para>Cumulative number of slabs created.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="stats.arenas.i.bins.j.nreruns">
+      <varlistentry id="stats.arenas.i.bins.j.nreslabs">
         <term>
-          <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.nreruns</mallctl>
+          <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.nreslabs</mallctl>
           (<type>uint64_t</type>)
           <literal>r-</literal>
           [<option>--enable-stats</option>]
         </term>
-        <listitem><para>Cumulative number of times the current run from which
+        <listitem><para>Cumulative number of times the current slab from which
         to allocate changed.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="stats.arenas.i.bins.j.curruns">
+      <varlistentry id="stats.arenas.i.bins.j.curslabs">
         <term>
-          <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.curruns</mallctl>
+          <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.curslabs</mallctl>
           (<type>size_t</type>)
           <literal>r-</literal>
           [<option>--enable-stats</option>]
         </term>
-        <listitem><para>Current number of runs.</para></listitem>
+        <listitem><para>Current number of slabs.</para></listitem>
       </varlistentry>
 
       <varlistentry id="stats.arenas.i.hchunks.j.nmalloc">
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index b0c4b5f3..d66548f2 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -3,9 +3,9 @@
 
 #define	LARGE_MINCLASS		(ZU(1) << LG_LARGE_MINCLASS)
 
-/* Maximum number of regions in one run. */
-#define	LG_RUN_MAXREGS		(LG_PAGE - LG_TINY_MIN)
-#define	RUN_MAXREGS		(1U << LG_RUN_MAXREGS)
+/* Maximum number of regions in one slab. */
+#define	LG_SLAB_MAXREGS		(LG_PAGE - LG_TINY_MIN)
+#define	SLAB_MAXREGS		(1U << LG_SLAB_MAXREGS)
 
 /*
  * The minimum ratio of active:dirty pages per arena is computed as:
@@ -29,12 +29,7 @@ typedef enum {
 /* Number of event ticks between time checks. */
 #define	DECAY_NTICKS_PER_UPDATE	1000
 
-typedef struct arena_runs_dirty_link_s arena_runs_dirty_link_t;
-typedef struct arena_avail_links_s arena_avail_links_t;
-typedef struct arena_run_s arena_run_t;
-typedef struct arena_chunk_map_bits_s arena_chunk_map_bits_t;
-typedef struct arena_chunk_map_misc_s arena_chunk_map_misc_t;
-typedef struct arena_chunk_s arena_chunk_t;
+typedef struct arena_slab_data_s arena_slab_data_t;
 typedef struct arena_bin_info_s arena_bin_info_t;
 typedef struct arena_bin_s arena_bin_t;
 typedef struct arena_s arena_t;
@@ -45,152 +40,25 @@ typedef struct arena_tdata_s arena_tdata_t;
 #ifdef JEMALLOC_H_STRUCTS
 
 #ifdef JEMALLOC_ARENA_STRUCTS_A
-struct arena_run_s {
-	/* Index of bin this run is associated with. */
+struct arena_slab_data_s {
+	/* Index of bin this slab is associated with. */
 	szind_t		binind;
 
-	/* Number of free regions in run. */
+	/* Number of free regions in slab. */
 	unsigned	nfree;
 
 	/* Per region allocated/deallocated bitmap. */
 	bitmap_t	bitmap[BITMAP_GROUPS_MAX];
 };
-
-/* Each element of the chunk map corresponds to one page within the chunk. */
-struct arena_chunk_map_bits_s {
-	/*
-	 * Run address (or size) and various flags are stored together.  The bit
-	 * layout looks like (assuming 32-bit system):
-	 *
-	 *   ???????? ???????? ???nnnnn nnndumla
-	 *
-	 * ? : Unallocated: Run address for first/last pages, unset for internal
-	 *                  pages.
-	 *     Small: Run page offset.
-	 *     Large: Run page count for first page, unset for trailing pages.
-	 * n : binind for small size class, BININD_INVALID for large size class.
-	 * d : dirty?
-	 * u : unzeroed?
-	 * m : decommitted?
-	 * l : large?
-	 * a : allocated?
-	 *
-	 * Following are example bit patterns for the three types of runs.
-	 *
-	 * p : run page offset
-	 * s : run size
-	 * n : binind for size class; large objects set these to BININD_INVALID
-	 * x : don't care
-	 * - : 0
-	 * + : 1
-	 * [DUMLA] : bit set
-	 * [dumla] : bit unset
-	 *
-	 *   Unallocated (clean):
-	 *     ssssssss ssssssss sss+++++ +++dum-a
-	 *     xxxxxxxx xxxxxxxx xxxxxxxx xxx-Uxxx
-	 *     ssssssss ssssssss sss+++++ +++dUm-a
-	 *
-	 *   Unallocated (dirty):
-	 *     ssssssss ssssssss sss+++++ +++D-m-a
-	 *     xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
-	 *     ssssssss ssssssss sss+++++ +++D-m-a
-	 *
-	 *   Small:
-	 *     pppppppp pppppppp pppnnnnn nnnd---A
-	 *     pppppppp pppppppp pppnnnnn nnn----A
-	 *     pppppppp pppppppp pppnnnnn nnnd---A
-	 *
-	 *   Large:
-	 *     ssssssss ssssssss sss+++++ +++D--LA
-	 *     xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
-	 *     -------- -------- ---+++++ +++D--LA
-	 *
-	 *   Large (sampled, size <= LARGE_MINCLASS):
-	 *     ssssssss ssssssss sssnnnnn nnnD--LA
-	 *     xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
-	 *     -------- -------- ---+++++ +++D--LA
-	 *
-	 *   Large (not sampled, size == LARGE_MINCLASS):
-	 *     ssssssss ssssssss sss+++++ +++D--LA
-	 *     xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
-	 *     -------- -------- ---+++++ +++D--LA
-	 */
-	size_t				bits;
-#define	CHUNK_MAP_ALLOCATED	((size_t)0x01U)
-#define	CHUNK_MAP_LARGE		((size_t)0x02U)
-#define	CHUNK_MAP_STATE_MASK	((size_t)0x3U)
-
-#define	CHUNK_MAP_DECOMMITTED	((size_t)0x04U)
-#define	CHUNK_MAP_UNZEROED	((size_t)0x08U)
-#define	CHUNK_MAP_DIRTY		((size_t)0x10U)
-#define	CHUNK_MAP_FLAGS_MASK	((size_t)0x1cU)
-
-#define	CHUNK_MAP_BININD_SHIFT	5
-#define	BININD_INVALID		((size_t)0xffU)
-#define	CHUNK_MAP_BININD_MASK	(BININD_INVALID << CHUNK_MAP_BININD_SHIFT)
-#define	CHUNK_MAP_BININD_INVALID CHUNK_MAP_BININD_MASK
-
-#define	CHUNK_MAP_RUNIND_SHIFT	(CHUNK_MAP_BININD_SHIFT + 8)
-#define	CHUNK_MAP_SIZE_SHIFT	(CHUNK_MAP_RUNIND_SHIFT - LG_PAGE)
-#define	CHUNK_MAP_SIZE_MASK						\
-    (~(CHUNK_MAP_BININD_MASK | CHUNK_MAP_FLAGS_MASK | CHUNK_MAP_STATE_MASK))
-};
-
-struct arena_runs_dirty_link_s {
-	qr(arena_runs_dirty_link_t)	rd_link;
-};
-
-/*
- * Each arena_chunk_map_misc_t corresponds to one page within the chunk, just
- * like arena_chunk_map_bits_t.  Two separate arrays are stored within each
- * chunk header in order to improve cache locality.
- */
-struct arena_chunk_map_misc_s {
-	/*
-	 * Linkage for run heaps.  There are two disjoint uses:
-	 *
-	 * 1) arena_t's runs_avail heaps.
-	 * 2) arena_run_t conceptually uses this linkage for in-use non-full
-	 *    runs, rather than directly embedding linkage.
-	 */
-	phn(arena_chunk_map_misc_t)		ph_link;
-
-	union {
-		/* Linkage for list of dirty runs. */
-		arena_runs_dirty_link_t		rd;
-
-		/* Profile counters, used for large object runs. */
-		union {
-			void			*prof_tctx_pun;
-			prof_tctx_t		*prof_tctx;
-		};
-
-		/* Small region run metadata. */
-		arena_run_t			run;
-	};
-};
-typedef ph(arena_chunk_map_misc_t) arena_run_heap_t;
 #endif /* JEMALLOC_ARENA_STRUCTS_A */
 
 #ifdef JEMALLOC_ARENA_STRUCTS_B
-/* Arena chunk header. */
-struct arena_chunk_s {
-	/*
-	 * Map of pages within chunk that keeps track of free/large/small.  The
-	 * first map_bias entries are omitted, since the chunk header does not
-	 * need to be tracked in the map.  This omission saves a header page
-	 * for common chunk sizes (e.g. 4 MiB).
-	 */
-	arena_chunk_map_bits_t	map_bits[1]; /* Dynamically sized. */
-};
-
 /*
  * Read-only information associated with each element of arena_t's bins array
  * is stored separately, partly to reduce memory usage (only one copy, rather
  * than one per arena), but mainly to avoid false cacheline sharing.
  *
- * Each run has the following layout:
+ * Each slab has the following layout:
  *
  *   /--------------------\
  *   | region 0           |
@@ -205,45 +73,42 @@ struct arena_chunk_s {
  *   \--------------------/
  */
 struct arena_bin_info_s {
-	/* Size of regions in a run for this bin's size class. */
+	/* Size of regions in a slab for this bin's size class. */
 	size_t			reg_size;
 
-	/* Total size of a run for this bin's size class. */
-	size_t			run_size;
+	/* Total size of a slab for this bin's size class. */
+	size_t			slab_size;
 
-	/* Total number of regions in a run for this bin's size class. */
+	/* Total number of regions in a slab for this bin's size class. */
 	uint32_t		nregs;
 
 	/*
-	 * Metadata used to manipulate bitmaps for runs associated with this
+	 * Metadata used to manipulate bitmaps for slabs associated with this
 	 * bin.
 	 */
 	bitmap_info_t		bitmap_info;
 };
 
 struct arena_bin_s {
-	/*
-	 * All operations on runcur, runs, and stats require that lock be
-	 * locked.  Run allocation/deallocation are protected by the arena lock,
-	 * which may be acquired while holding one or more bin locks, but not
-	 * vise versa.
-	 */
+	/* All operations on arena_bin_t fields require lock ownership. */
 	malloc_mutex_t		lock;
 
 	/*
-	 * Current run being used to service allocations of this bin's size
-	 * class.
+	 * Current slab being used to service allocations of this bin's size
+	 * class.  slabcur is independent of slabs_{nonfull,full}; whenever
+	 * slabcur is reassigned, the previous slab must be deallocated or
+	 * inserted into slabs_{nonfull,full}.
 	 */
-	arena_run_t		*runcur;
+	extent_t		*slabcur;
 
 	/*
-	 * Heap of non-full runs.  This heap is used when looking for an
-	 * existing run when runcur is no longer usable.  We choose the
-	 * non-full run that is lowest in memory; this policy tends to keep
-	 * objects packed well, and it can also help reduce the number of
-	 * almost-empty chunks.
+	 * Heap of non-full slabs.  This heap is used to assure that new
+	 * allocations come from the non-full slab that is lowest in memory.
 	 */
-	arena_run_heap_t	runs;
+	extent_heap_t		slabs_nonfull;
+
+	/* Ring sentinel used to track full slabs. */
+	extent_t		slabs_full;
 
 	/* Bin statistics. */
 	malloc_bin_stats_t	stats;
@@ -272,7 +137,7 @@ struct arena_s {
 	 * perspective:
 	 * 1) Thread assignment (modifies nthreads) is synchronized via atomics.
 	 * 2) Bin-related operations are protected by bin locks.
-	 * 3) Chunk- and run-related operations are protected by this mutex.
+	 * 3) Chunk-related operations are protected by this mutex.
 	 */
 	malloc_mutex_t		lock;
 
@@ -294,32 +159,17 @@ struct arena_s {
 
 	dss_prec_t		dss_prec;
 
-	/* Extant arena chunks. */
-	ql_head(extent_t)	achunks;
-
-	/*
-	 * In order to avoid rapid chunk allocation/deallocation when an arena
-	 * oscillates right on the cusp of needing a new chunk, cache the most
-	 * recently freed chunk.  The spare is left in the arena's chunk trees
-	 * until it is deleted.
-	 *
-	 * There is one spare chunk per arena, rather than one spare total, in
-	 * order to avoid interactions between multiple threads that could make
-	 * a single spare inadequate.
-	 */
-	extent_t		*spare;
-
 	/* Minimum ratio (log base 2) of nactive:ndirty. */
 	ssize_t			lg_dirty_mult;
 
 	/* True if a thread is currently executing arena_purge_to_limit(). */
 	bool			purging;
 
-	/* Number of pages in active runs and huge regions. */
+	/* Number of pages in active extents. */
 	size_t			nactive;
 
 	/*
-	 * Current count of pages within unused runs that are potentially
+	 * Current count of pages within unused extents that are potentially
 	 * dirty, and for which madvise(... MADV_DONTNEED) has not been called.
 	 * By tracking this, we can institute a limit on how much dirty unused
 	 * memory is mapped for each arena.
@@ -327,35 +177,10 @@ struct arena_s {
 	size_t			ndirty;
 
 	/*
-	 * Unused dirty memory this arena manages.  Dirty memory is conceptually
-	 * tracked as an arbitrarily interleaved LRU of dirty runs and cached
-	 * chunks, but the list linkage is actually semi-duplicated in order to
-	 * avoid extra arena_chunk_map_misc_t space overhead.
-	 *
-	 *   LRU-----------------------------------------------------------MRU
-	 *
-	 *        /-- arena ---\
-	 *        |            |
-	 *        |            |
-	 *        |------------|                             /-- chunk --\
-	 *   ...->|chunks_cache|<--------------------------->|  /------\ |<--...
-	 *        |------------|                             |  |extent| |
-	 *        |            |                             |  |      | |
-	 *        |            |    /- run -\    /- run -\   |  |      | |
-	 *        |            |    |       |    |       |   |  |      | |
-	 *        |            |    |       |    |       |   |  |      | |
-	 *        |------------|    |-------|    |-------|   |  |------| |
-	 *   ...->|runs_dirty  |<-->|rd     |<-->|rd     |<---->|rd    |<----...
-	 *        |------------|    |-------|    |-------|   |  |------| |
-	 *        |            |    |       |    |       |   |  |      | |
-	 *        |            |    |       |    |       |   |  \------/ |
-	 *        |            |    \-------/    \-------/   |           |
-	 *        |            |                             |           |
-	 *        |            |                             |           |
-	 *        \------------/                             \-----------/
+	 * Ring sentinel used to track unused dirty memory.  Dirty memory is
+	 * managed as an LRU of cached extents.
 	 */
-	arena_runs_dirty_link_t	runs_dirty;
-	extent_t		chunks_cache;
+	extent_t		extents_dirty;
 
 	/*
 	 * Approximate time in seconds from the creation of a set of unused
@@ -424,16 +249,8 @@ struct arena_s {
 	/* User-configurable chunk hook functions. */
 	chunk_hooks_t		chunk_hooks;
 
-	/* bins is used to store trees of free regions. */
+	/* bins is used to store heaps of free regions. */
 	arena_bin_t		bins[NBINS];
-
-	/*
-	 * Size-segregated address-ordered heaps of this arena's available runs,
-	 * used for first-best-fit run allocation.  Runs are quantized, i.e.
-	 * they reside in the last heap which corresponds to a size class less
-	 * than or equal to the run size.
-	 */
-	arena_run_heap_t	runs_avail[NPSIZES];
 };
 
 /* Used in conjunction with tsd for fast arena-related context lookup. */
@@ -461,15 +278,6 @@ extern ssize_t		opt_decay_time;
 
 extern const arena_bin_info_t	arena_bin_info[NBINS];
 
-extern size_t		map_bias; /* Number of arena chunk header pages. */
-extern size_t		map_misc_offset;
-extern size_t		arena_maxrun; /* Max run size for arenas. */
-
-#ifdef JEMALLOC_JET
-typedef size_t (run_quantize_t)(size_t);
-extern run_quantize_t *run_quantize_floor;
-extern run_quantize_t *run_quantize_ceil;
-#endif
 extent_t	*arena_chunk_cache_alloc(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
     bool *zero);
@@ -514,10 +322,9 @@ void	arena_prof_promote(tsdn_t *tsdn, extent_t *extent, const void *ptr,
 void	arena_dalloc_promoted(tsdn_t *tsdn, extent_t *extent, void *ptr,
     tcache_t *tcache, bool slow_path);
 void	arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena,
-    arena_chunk_t *chunk, extent_t *extent, void *ptr,
-    arena_chunk_map_bits_t *bitselm);
-void	arena_dalloc_small(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
-    extent_t *extent, void *ptr, size_t pageind);
+    extent_t *extent, void *ptr);
+void	arena_dalloc_small(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+    void *ptr);
 bool	arena_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, void *ptr,
     size_t oldsize, size_t size, size_t extra, bool zero);
 void	*arena_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr,
@@ -552,70 +359,19 @@ void	arena_postfork_child(tsdn_t *tsdn, arena_t *arena);
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
-arena_chunk_map_bits_t	*arena_bitselm_get_mutable(arena_chunk_t *chunk,
-    size_t pageind);
-const arena_chunk_map_bits_t	*arena_bitselm_get_const(
-    const arena_chunk_t *chunk, size_t pageind);
-arena_chunk_map_misc_t	*arena_miscelm_get_mutable(arena_chunk_t *chunk,
-    size_t pageind);
-const arena_chunk_map_misc_t	*arena_miscelm_get_const(
-    const arena_chunk_t *chunk, size_t pageind);
-size_t	arena_miscelm_to_pageind(const extent_t *extent,
-    const arena_chunk_map_misc_t *miscelm);
-void	*arena_miscelm_to_rpages(const extent_t *extent,
-    const arena_chunk_map_misc_t *miscelm);
-arena_chunk_map_misc_t	*arena_rd_to_miscelm(const extent_t *extent,
-    arena_runs_dirty_link_t *rd);
-arena_chunk_map_misc_t	*arena_run_to_miscelm(const extent_t *extent,
-    arena_run_t *run);
-size_t	*arena_mapbitsp_get_mutable(arena_chunk_t *chunk, size_t pageind);
-const size_t	*arena_mapbitsp_get_const(const arena_chunk_t *chunk,
-    size_t pageind);
-size_t	arena_mapbitsp_read(const size_t *mapbitsp);
-size_t	arena_mapbits_get(const arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_size_decode(size_t mapbits);
-size_t	arena_mapbits_unallocated_size_get(const arena_chunk_t *chunk,
-    size_t pageind);
-size_t	arena_mapbits_large_size_get(const arena_chunk_t *chunk,
-    size_t pageind);
-size_t	arena_mapbits_small_runind_get(const arena_chunk_t *chunk,
-    size_t pageind);
-szind_t	arena_mapbits_binind_get(const arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_dirty_get(const arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_unzeroed_get(const arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_decommitted_get(const arena_chunk_t *chunk,
-    size_t pageind);
-size_t	arena_mapbits_large_get(const arena_chunk_t *chunk, size_t pageind);
-size_t	arena_mapbits_allocated_get(const arena_chunk_t *chunk, size_t pageind);
-void	arena_mapbitsp_write(size_t *mapbitsp, size_t mapbits);
-size_t	arena_mapbits_size_encode(size_t size);
-void	arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind,
-    size_t size, size_t flags);
-void	arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind,
-    size_t size);
-void	arena_mapbits_internal_set(arena_chunk_t *chunk, size_t pageind,
-    size_t flags);
-void	arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind,
-    size_t size, size_t flags);
-void	arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind,
-    szind_t binind);
-void	arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind,
-    size_t runind, szind_t binind, size_t flags);
 void	arena_metadata_allocated_add(arena_t *arena, size_t size);
 void	arena_metadata_allocated_sub(arena_t *arena, size_t size);
 size_t	arena_metadata_allocated_get(arena_t *arena);
 bool	arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes);
 bool	arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes);
 bool	arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes);
-szind_t	arena_ptr_small_binind_get(tsdn_t *tsdn, const void *ptr,
-    size_t mapbits);
 szind_t	arena_bin_index(arena_t *arena, arena_bin_t *bin);
 prof_tctx_t	*arena_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent,
     const void *ptr);
 void	arena_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr,
     size_t usize, prof_tctx_t *tctx);
 void	arena_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr,
-    size_t usize, const void *old_ptr, prof_tctx_t *old_tctx);
+    prof_tctx_t *tctx);
 void	arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks);
 void	arena_decay_tick(tsdn_t *tsdn, arena_t *arena);
 void	*arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
@@ -630,330 +386,6 @@ void	arena_sdalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t size,
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
 #  ifdef JEMALLOC_ARENA_INLINE_A
-JEMALLOC_ALWAYS_INLINE arena_chunk_map_bits_t *
-arena_bitselm_get_mutable(arena_chunk_t *chunk, size_t pageind)
-{
-
-	assert(pageind >= map_bias);
-	assert(pageind < chunk_npages);
-
-	return (&chunk->map_bits[pageind-map_bias]);
-}
-
-JEMALLOC_ALWAYS_INLINE const arena_chunk_map_bits_t *
-arena_bitselm_get_const(const arena_chunk_t *chunk, size_t pageind)
-{
-
-	return (arena_bitselm_get_mutable((arena_chunk_t *)chunk, pageind));
-}
-
-JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t *
-arena_miscelm_get_mutable(arena_chunk_t *chunk, size_t pageind)
-{
-
-	assert(pageind >= map_bias);
-	assert(pageind < chunk_npages);
-
-	return ((arena_chunk_map_misc_t *)((uintptr_t)chunk +
-	    (uintptr_t)map_misc_offset) + pageind-map_bias);
-}
-
-JEMALLOC_ALWAYS_INLINE const arena_chunk_map_misc_t *
-arena_miscelm_get_const(const arena_chunk_t *chunk, size_t pageind)
-{
-
-	return (arena_miscelm_get_mutable((arena_chunk_t *)chunk, pageind));
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-arena_miscelm_to_pageind(const extent_t *extent,
-    const arena_chunk_map_misc_t *miscelm)
-{
-	arena_chunk_t *chunk = (arena_chunk_t *)extent_base_get(extent);
-	size_t pageind = ((uintptr_t)miscelm - ((uintptr_t)chunk +
-	    map_misc_offset)) / sizeof(arena_chunk_map_misc_t) + map_bias;
-
-	assert(pageind >= map_bias);
-	assert(pageind < chunk_npages);
-
-	return (pageind);
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-arena_miscelm_to_rpages(const extent_t *extent,
-    const arena_chunk_map_misc_t *miscelm)
-{
-	arena_chunk_t *chunk = (arena_chunk_t *)extent_base_get(extent);
-	size_t pageind = arena_miscelm_to_pageind(extent, miscelm);
-
-	return ((void *)((uintptr_t)chunk + (pageind << LG_PAGE)));
-}
-
-JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t *
-arena_rd_to_miscelm(const extent_t *extent, arena_runs_dirty_link_t *rd)
-{
-	arena_chunk_map_misc_t *miscelm = (arena_chunk_map_misc_t
-	    *)((uintptr_t)rd - offsetof(arena_chunk_map_misc_t, rd));
-
-	assert(arena_miscelm_to_pageind(extent, miscelm) >= map_bias);
-	assert(arena_miscelm_to_pageind(extent, miscelm) < chunk_npages);
-
-	return (miscelm);
-}
-
-JEMALLOC_ALWAYS_INLINE arena_chunk_map_misc_t *
-arena_run_to_miscelm(const extent_t *extent, arena_run_t *run)
-{
-	arena_chunk_map_misc_t *miscelm = (arena_chunk_map_misc_t
-	    *)((uintptr_t)run - offsetof(arena_chunk_map_misc_t, run));
-
-	assert(arena_miscelm_to_pageind(extent, miscelm) >= map_bias);
-	assert(arena_miscelm_to_pageind(extent, miscelm) < chunk_npages);
-
-	return (miscelm);
-}
-
-JEMALLOC_ALWAYS_INLINE size_t *
-arena_mapbitsp_get_mutable(arena_chunk_t *chunk, size_t pageind)
-{
-
-	return (&arena_bitselm_get_mutable(chunk, pageind)->bits);
-}
-
-JEMALLOC_ALWAYS_INLINE const size_t *
-arena_mapbitsp_get_const(const arena_chunk_t *chunk, size_t pageind)
-{
-
-	return (arena_mapbitsp_get_mutable((arena_chunk_t *)chunk, pageind));
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbitsp_read(const size_t *mapbitsp)
-{
-
-	return (*mapbitsp);
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_get(const arena_chunk_t *chunk, size_t pageind)
-{
-
-	return (arena_mapbitsp_read(arena_mapbitsp_get_const(chunk, pageind)));
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_size_decode(size_t mapbits)
-{
-	size_t size;
-
-#if CHUNK_MAP_SIZE_SHIFT > 0
-	size = (mapbits & CHUNK_MAP_SIZE_MASK) >> CHUNK_MAP_SIZE_SHIFT;
-#elif CHUNK_MAP_SIZE_SHIFT == 0
-	size = mapbits & CHUNK_MAP_SIZE_MASK;
-#else
-	size = (mapbits & CHUNK_MAP_SIZE_MASK) << -CHUNK_MAP_SIZE_SHIFT;
-#endif
-
-	return (size);
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_unallocated_size_get(const arena_chunk_t *chunk, size_t pageind)
-{
-	size_t mapbits;
-
-	mapbits = arena_mapbits_get(chunk, pageind);
-	assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0);
-	return (arena_mapbits_size_decode(mapbits));
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_large_size_get(const arena_chunk_t *chunk, size_t pageind)
-{
-	size_t mapbits;
-
-	mapbits = arena_mapbits_get(chunk, pageind);
-	assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) ==
-	    (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED));
-	return (arena_mapbits_size_decode(mapbits));
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_small_runind_get(const arena_chunk_t *chunk, size_t pageind)
-{
-	size_t mapbits;
-
-	mapbits = arena_mapbits_get(chunk, pageind);
-	assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) ==
-	    CHUNK_MAP_ALLOCATED);
-	return (mapbits >> CHUNK_MAP_RUNIND_SHIFT);
-}
-
-JEMALLOC_ALWAYS_INLINE szind_t
-arena_mapbits_binind_get(const arena_chunk_t *chunk, size_t pageind)
-{
-	size_t mapbits;
-	szind_t binind;
-
-	mapbits = arena_mapbits_get(chunk, pageind);
-	binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT;
-	assert(binind < NBINS || binind == BININD_INVALID);
-	return (binind);
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_dirty_get(const arena_chunk_t *chunk, size_t pageind)
-{
-	size_t mapbits;
-
-	mapbits = arena_mapbits_get(chunk, pageind);
-	assert((mapbits & CHUNK_MAP_DECOMMITTED) == 0 || (mapbits &
-	    (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0);
-	return (mapbits & CHUNK_MAP_DIRTY);
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_unzeroed_get(const arena_chunk_t *chunk, size_t pageind)
-{
-	size_t mapbits;
-
-	mapbits = arena_mapbits_get(chunk, pageind);
-	assert((mapbits & CHUNK_MAP_DECOMMITTED) == 0 || (mapbits &
-	    (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0);
-	return (mapbits & CHUNK_MAP_UNZEROED);
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_decommitted_get(const arena_chunk_t *chunk, size_t pageind)
-{
-	size_t mapbits;
-
-	mapbits = arena_mapbits_get(chunk, pageind);
-	assert((mapbits & CHUNK_MAP_DECOMMITTED) == 0 || (mapbits &
-	    (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0);
-	return (mapbits & CHUNK_MAP_DECOMMITTED);
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_large_get(const arena_chunk_t *chunk, size_t pageind)
-{
-	size_t mapbits;
-
-	mapbits = arena_mapbits_get(chunk, pageind);
-	return (mapbits & CHUNK_MAP_LARGE);
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_allocated_get(const arena_chunk_t *chunk, size_t pageind)
-{
-	size_t mapbits;
-
-	mapbits = arena_mapbits_get(chunk, pageind);
-	return (mapbits & CHUNK_MAP_ALLOCATED);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-arena_mapbitsp_write(size_t *mapbitsp, size_t mapbits)
-{
-
-	*mapbitsp = mapbits;
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-arena_mapbits_size_encode(size_t size)
-{
-	size_t mapbits;
-
-#if CHUNK_MAP_SIZE_SHIFT > 0
-	mapbits = size << CHUNK_MAP_SIZE_SHIFT;
-#elif CHUNK_MAP_SIZE_SHIFT == 0
-	mapbits = size;
-#else
-	mapbits = size >> -CHUNK_MAP_SIZE_SHIFT;
-#endif
-
-	assert((mapbits & ~CHUNK_MAP_SIZE_MASK) == 0);
-	return (mapbits);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-arena_mapbits_unallocated_set(arena_chunk_t *chunk, size_t pageind, size_t size,
-    size_t flags)
-{
-	size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind);
-
-	assert((size & PAGE_MASK) == 0);
-	assert((flags & CHUNK_MAP_FLAGS_MASK) == flags);
-	assert((flags & CHUNK_MAP_DECOMMITTED) == 0 || (flags &
-	    (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0);
-	arena_mapbitsp_write(mapbitsp, arena_mapbits_size_encode(size) |
-	    CHUNK_MAP_BININD_INVALID | flags);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-arena_mapbits_unallocated_size_set(arena_chunk_t *chunk, size_t pageind,
-    size_t size)
-{
-	size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind);
-	size_t mapbits = arena_mapbitsp_read(mapbitsp);
-
-	assert((size & PAGE_MASK) == 0);
-	assert((mapbits & (CHUNK_MAP_LARGE|CHUNK_MAP_ALLOCATED)) == 0);
-	arena_mapbitsp_write(mapbitsp, arena_mapbits_size_encode(size) |
-	    (mapbits & ~CHUNK_MAP_SIZE_MASK));
-}
-
-JEMALLOC_ALWAYS_INLINE void
-arena_mapbits_internal_set(arena_chunk_t *chunk, size_t pageind, size_t flags)
-{
-	size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind);
-
-	assert((flags & CHUNK_MAP_UNZEROED) == flags);
-	arena_mapbitsp_write(mapbitsp, flags);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-arena_mapbits_large_set(arena_chunk_t *chunk, size_t pageind, size_t size,
-    size_t flags)
-{
-	size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind);
-
-	assert((size & PAGE_MASK) == 0);
-	assert((flags & CHUNK_MAP_FLAGS_MASK) == flags);
-	assert((flags & CHUNK_MAP_DECOMMITTED) == 0 || (flags &
-	    (CHUNK_MAP_DIRTY|CHUNK_MAP_UNZEROED)) == 0);
-	arena_mapbitsp_write(mapbitsp, arena_mapbits_size_encode(size) |
-	    CHUNK_MAP_BININD_INVALID | flags | CHUNK_MAP_LARGE |
-	    CHUNK_MAP_ALLOCATED);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-arena_mapbits_large_binind_set(arena_chunk_t *chunk, size_t pageind,
-    szind_t binind)
-{
-	size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind);
-	size_t mapbits = arena_mapbitsp_read(mapbitsp);
-
-	assert(binind <= BININD_INVALID);
-	assert(arena_mapbits_large_size_get(chunk, pageind) == LARGE_MINCLASS +
-	    large_pad);
-	arena_mapbitsp_write(mapbitsp, (mapbits & ~CHUNK_MAP_BININD_MASK) |
-	    (binind << CHUNK_MAP_BININD_SHIFT));
-}
-
-JEMALLOC_ALWAYS_INLINE void
-arena_mapbits_small_set(arena_chunk_t *chunk, size_t pageind, size_t runind,
-    szind_t binind, size_t flags)
-{
-	size_t *mapbitsp = arena_mapbitsp_get_mutable(chunk, pageind);
-
-	assert(binind < BININD_INVALID);
-	assert(pageind - runind >= map_bias);
-	assert((flags & CHUNK_MAP_UNZEROED) == flags);
-	arena_mapbitsp_write(mapbitsp, (runind << CHUNK_MAP_RUNIND_SHIFT) |
-	    (binind << CHUNK_MAP_BININD_SHIFT) | flags | CHUNK_MAP_ALLOCATED);
-}
-
 JEMALLOC_INLINE void
 arena_metadata_allocated_add(arena_t *arena, size_t size)
 {
@@ -1022,54 +454,6 @@ arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes)
 #  endif /* JEMALLOC_ARENA_INLINE_A */
 
 #  ifdef JEMALLOC_ARENA_INLINE_B
-JEMALLOC_ALWAYS_INLINE szind_t
-arena_ptr_small_binind_get(tsdn_t *tsdn, const void *ptr, size_t mapbits)
-{
-	szind_t binind;
-
-	binind = (mapbits & CHUNK_MAP_BININD_MASK) >> CHUNK_MAP_BININD_SHIFT;
-
-	if (config_debug) {
-		const extent_t *extent;
-		arena_chunk_t *chunk;
-		arena_t *arena;
-		size_t pageind;
-		size_t actual_mapbits;
-		size_t rpages_ind;
-		const arena_run_t *run;
-		arena_bin_t *bin;
-		szind_t run_binind, actual_binind;
-		const arena_bin_info_t *bin_info;
-		const arena_chunk_map_misc_t *miscelm;
-		const void *rpages;
-
-		assert(binind != BININD_INVALID);
-		assert(binind < NBINS);
-		extent = iealloc(tsdn, ptr);
-		chunk = (arena_chunk_t *)extent_base_get(extent);
-		arena = extent_arena_get(extent);
-		pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
-		actual_mapbits = arena_mapbits_get(chunk, pageind);
-		assert(mapbits == actual_mapbits);
-		assert(arena_mapbits_large_get(chunk, pageind) == 0);
-		assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
-		rpages_ind = pageind - arena_mapbits_small_runind_get(chunk,
-		    pageind);
-		miscelm = arena_miscelm_get_const(chunk, rpages_ind);
-		run = &miscelm->run;
-		run_binind = run->binind;
-		bin = &arena->bins[run_binind];
-		actual_binind = (szind_t)(bin - arena->bins);
-		assert(run_binind == actual_binind);
-		bin_info = &arena_bin_info[actual_binind];
-		rpages = arena_miscelm_to_rpages(extent, miscelm);
-		assert(((uintptr_t)ptr - (uintptr_t)rpages) % bin_info->reg_size
-		    == 0);
-	}
-
-	return (binind);
-}
-
 JEMALLOC_INLINE szind_t
 arena_bin_index(arena_t *arena, arena_bin_t *bin)
 {
@@ -1081,27 +465,13 @@ arena_bin_index(arena_t *arena, arena_bin_t *bin)
 JEMALLOC_INLINE prof_tctx_t *
 arena_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, const void *ptr)
 {
-	prof_tctx_t *ret;
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	if (likely(extent_slab_get(extent))) {
-		arena_chunk_t *chunk = (arena_chunk_t *)extent_base_get(extent);
-		size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
-		size_t mapbits = arena_mapbits_get(chunk, pageind);
-		assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
-		if (likely((mapbits & CHUNK_MAP_LARGE) == 0))
-			ret = (prof_tctx_t *)(uintptr_t)1U;
-		else {
-			arena_chunk_map_misc_t *elm =
-			    arena_miscelm_get_mutable(chunk, pageind);
-			ret = atomic_read_p(&elm->prof_tctx_pun);
-		}
-	} else
-		ret = huge_prof_tctx_get(tsdn, extent);
-
-	return (ret);
+	if (unlikely(!extent_slab_get(extent)))
+		return (huge_prof_tctx_get(tsdn, extent));
+	return ((prof_tctx_t *)(uintptr_t)1U);
 }
 
 JEMALLOC_INLINE void
@@ -1112,61 +482,20 @@ arena_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr,
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	if (likely(extent_slab_get(extent))) {
-		arena_chunk_t *chunk = (arena_chunk_t *)extent_base_get(extent);
-		size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
-
-		assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
-
-		if (unlikely(usize > SMALL_MAXCLASS || (uintptr_t)tctx >
-		    (uintptr_t)1U)) {
-			arena_chunk_map_misc_t *elm;
-
-			assert(arena_mapbits_large_get(chunk, pageind) != 0);
-
-			elm = arena_miscelm_get_mutable(chunk, pageind);
-			atomic_write_p(&elm->prof_tctx_pun, tctx);
-		} else {
-			/*
-			 * tctx must always be initialized for large runs.
-			 * Assert that the surrounding conditional logic is
-			 * equivalent to checking whether ptr refers to a large
-			 * run.
-			 */
-			assert(arena_mapbits_large_get(chunk, pageind) == 0);
-		}
-	} else
+	if (unlikely(!extent_slab_get(extent)))
 		huge_prof_tctx_set(tsdn, extent, tctx);
 }
 
 JEMALLOC_INLINE void
 arena_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr,
-    size_t usize, const void *old_ptr, prof_tctx_t *old_tctx)
+    prof_tctx_t *tctx)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
+	assert(!extent_slab_get(extent));
 
-	if (unlikely(usize > SMALL_MAXCLASS || (ptr == old_ptr &&
-	    (uintptr_t)old_tctx > (uintptr_t)1U))) {
-		if (likely(extent_slab_get(extent))) {
-			arena_chunk_t *chunk =
-			    (arena_chunk_t *)extent_base_get(extent);
-			size_t pageind;
-			arena_chunk_map_misc_t *elm;
-
-			pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >>
-			    LG_PAGE;
-			assert(arena_mapbits_allocated_get(chunk, pageind) !=
-			    0);
-			assert(arena_mapbits_large_get(chunk, pageind) != 0);
-
-			elm = arena_miscelm_get_mutable(chunk, pageind);
-			atomic_write_p(&elm->prof_tctx_pun,
-			    (prof_tctx_t *)(uintptr_t)1U);
-		} else
-			huge_prof_tctx_reset(tsdn, extent);
-	}
+	huge_prof_tctx_reset(tsdn, extent);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -1231,20 +560,9 @@ arena_salloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr)
 
 	assert(ptr != NULL);
 
-	if (likely(extent_slab_get(extent))) {
-		const arena_chunk_t *chunk =
-		    (const arena_chunk_t *)extent_base_get(extent);
-		size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
-		szind_t binind;
-
-		assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
-		binind = arena_mapbits_binind_get(chunk, pageind);
-		/* Small allocation. */
-		assert(arena_mapbits_large_get(chunk, pageind) != 0 ||
-		    arena_ptr_small_binind_get(tsdn, ptr,
-		    arena_mapbits_get(chunk, pageind)) == binind);
-		ret = index2size(binind);
-	} else
+	if (likely(extent_slab_get(extent)))
+		ret = index2size(extent_slab_data_get_const(extent)->binind);
+	else
 		ret = huge_salloc(tsdn, extent);
 
 	return (ret);
@@ -1260,19 +578,13 @@ arena_dalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, tcache_t *tcache,
 
 	if (likely(extent_slab_get(extent))) {
 		/* Small allocation. */
-		arena_chunk_t *chunk = (arena_chunk_t *)extent_base_get(extent);
-		size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
-		size_t mapbits = arena_mapbits_get(chunk, pageind);
-		assert(arena_mapbits_allocated_get(chunk, pageind) != 0);
-		assert((mapbits & CHUNK_MAP_LARGE) == 0);
 		if (likely(tcache != NULL)) {
-			szind_t binind = arena_ptr_small_binind_get(tsdn, ptr,
-			    mapbits);
+			szind_t binind = extent_slab_data_get(extent)->binind;
 			tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, binind,
 			    slow_path);
 		} else {
 			arena_dalloc_small(tsdn, extent_arena_get(extent),
-			    chunk, extent, ptr, pageind);
+			    extent, ptr);
 		}
 	} else {
 		size_t usize = extent_usize_get(extent);
@@ -1282,8 +594,8 @@ arena_dalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, tcache_t *tcache,
 				arena_dalloc_promoted(tsdn, extent, ptr,
 				    tcache, slow_path);
 			} else {
-				tcache_dalloc_huge(tsdn_tsd(tsdn), tcache, ptr,
-				    usize, slow_path);
+				tcache_dalloc_huge(tsdn_tsd(tsdn), tcache,
+				    ptr, usize, slow_path);
 			}
 		} else
 			huge_dalloc(tsdn, extent);
@@ -1302,15 +614,12 @@ arena_sdalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t size,
 		/* Small allocation. */
 		if (likely(tcache != NULL)) {
 			szind_t binind = size2index(size);
+			assert(binind == extent_slab_data_get(extent)->binind);
 			tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, binind,
 			    slow_path);
 		} else {
-			arena_chunk_t *chunk =
-			    (arena_chunk_t *)extent_base_get(extent);
-			size_t pageind = ((uintptr_t)ptr -
-			    (uintptr_t)chunk) >> LG_PAGE;
 			arena_dalloc_small(tsdn, extent_arena_get(extent),
-			    chunk, extent, ptr, pageind);
+			    extent, ptr);
 		}
 	} else {
 		if (likely(tcache != NULL) && size <= tcache_maxclass) {
diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h
index 0d456e2d..c2e34554 100644
--- a/include/jemalloc/internal/bitmap.h
+++ b/include/jemalloc/internal/bitmap.h
@@ -2,7 +2,7 @@
 #ifdef JEMALLOC_H_TYPES
 
 /* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */
-#define	LG_BITMAP_MAXBITS	LG_RUN_MAXREGS
+#define	LG_BITMAP_MAXBITS	LG_SLAB_MAXREGS
 #define	BITMAP_MAXBITS		(ZU(1) << LG_BITMAP_MAXBITS)
 
 typedef struct bitmap_level_s bitmap_level_t;
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 4e1e97ea..bfe61811 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -27,9 +27,6 @@ struct extent_s {
 	/* True if extent is active (in use). */
 	bool			e_active;
 
-	/* True if extent is dirty (touched). */
-	bool			e_dirty;
-
 	/*
 	 * The zeroed flag is used by chunk recycling code to track whether
 	 * memory is zero-filled.
@@ -50,21 +47,27 @@ struct extent_s {
 	 */
 	bool			e_slab;
 
-	/* Profile counters, used for huge objects. */
 	union {
-		void		*e_prof_tctx_pun;
-		prof_tctx_t	*e_prof_tctx;
+		/* Small region slab metadata. */
+		arena_slab_data_t	e_slab_data;
+
+		/* Profile counters, used for huge objects. */
+		union {
+			void		*e_prof_tctx_pun;
+			prof_tctx_t	*e_prof_tctx;
+		};
 	};
 
-	/* Linkage for arena's runs_dirty and chunks_cache rings. */
-	arena_runs_dirty_link_t	rd;
-	qr(extent_t)		cc_link;
+	/*
+	 * Linkage for arena's extents_dirty and arena_bin_t's slabs_full rings.
+	 */
+	qr(extent_t)		qr_link;
 
 	union {
 		/* Linkage for per size class address-ordered heaps. */
 		phn(extent_t)		ph_link;
 
-		/* Linkage for arena's achunks, huge, and node_cache lists. */
+		/* Linkage for arena's huge and extent_cache lists. */
 		ql_elm(extent_t)	ql_link;
 	};
 };
@@ -102,11 +105,12 @@ void	*extent_before_get(const extent_t *extent);
 void	*extent_last_get(const extent_t *extent);
 void	*extent_past_get(const extent_t *extent);
 bool	extent_active_get(const extent_t *extent);
-bool	extent_dirty_get(const extent_t *extent);
 bool	extent_retained_get(const extent_t *extent);
 bool	extent_zeroed_get(const extent_t *extent);
 bool	extent_committed_get(const extent_t *extent);
 bool	extent_slab_get(const extent_t *extent);
+arena_slab_data_t	*extent_slab_data_get(extent_t *extent);
+const arena_slab_data_t	*extent_slab_data_get_const(const extent_t *extent);
 prof_tctx_t	*extent_prof_tctx_get(const extent_t *extent);
 void	extent_arena_set(extent_t *extent, arena_t *arena);
 void	extent_addr_set(extent_t *extent, void *addr);
@@ -114,17 +118,15 @@ void	extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment);
 void	extent_size_set(extent_t *extent, size_t size);
 void	extent_usize_set(extent_t *extent, size_t usize);
 void	extent_active_set(extent_t *extent, bool active);
-void	extent_dirty_set(extent_t *extent, bool dirty);
 void	extent_zeroed_set(extent_t *extent, bool zeroed);
 void	extent_committed_set(extent_t *extent, bool committed);
 void	extent_slab_set(extent_t *extent, bool slab);
 void	extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx);
 void	extent_init(extent_t *extent, arena_t *arena, void *addr,
-    size_t size, size_t usize, bool active, bool dirty, bool zeroed,
-    bool committed, bool slab);
-void	extent_dirty_insert(extent_t *extent,
-    arena_runs_dirty_link_t *runs_dirty, extent_t *chunks_dirty);
-void	extent_dirty_remove(extent_t *extent);
+    size_t size, size_t usize, bool active, bool zeroed, bool committed,
+    bool slab);
+void	extent_ring_insert(extent_t *sentinel, extent_t *extent);
+void	extent_ring_remove(extent_t *extent);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_EXTENT_C_))
@@ -197,18 +199,11 @@ extent_active_get(const extent_t *extent)
 	return (extent->e_active);
 }
 
-JEMALLOC_INLINE bool
-extent_dirty_get(const extent_t *extent)
-{
-
-	return (extent->e_dirty);
-}
-
 JEMALLOC_INLINE bool
 extent_retained_get(const extent_t *extent)
 {
 
-	return (qr_next(&extent->rd, rd_link) == &extent->rd);
+	return (qr_next(extent, qr_link) == extent);
 }
 
 JEMALLOC_INLINE bool
@@ -232,6 +227,22 @@ extent_slab_get(const extent_t *extent)
 	return (extent->e_slab);
 }
 
+JEMALLOC_INLINE arena_slab_data_t *
+extent_slab_data_get(extent_t *extent)
+{
+
+	assert(extent->e_slab);
+	return (&extent->e_slab_data);
+}
+
+JEMALLOC_INLINE const arena_slab_data_t *
+extent_slab_data_get_const(const extent_t *extent)
+{
+
+	assert(extent->e_slab);
+	return (&extent->e_slab_data);
+}
+
 JEMALLOC_INLINE prof_tctx_t *
 extent_prof_tctx_get(const extent_t *extent)
 {
@@ -296,13 +307,6 @@ extent_active_set(extent_t *extent, bool active)
 	extent->e_active = active;
 }
 
-JEMALLOC_INLINE void
-extent_dirty_set(extent_t *extent, bool dirty)
-{
-
-	extent->e_dirty = dirty;
-}
-
 JEMALLOC_INLINE void
 extent_zeroed_set(extent_t *extent, bool zeroed)
 {
@@ -333,8 +337,7 @@ extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx)
 
 JEMALLOC_INLINE void
 extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
-    size_t usize, bool active, bool dirty, bool zeroed, bool committed,
-    bool slab)
+    size_t usize, bool active, bool zeroed, bool committed, bool slab)
 {
 
 	assert(addr == PAGE_ADDR2BASE(addr) || !slab);
@@ -344,31 +347,26 @@ extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
 	extent_size_set(extent, size);
 	extent_usize_set(extent, usize);
 	extent_active_set(extent, active);
-	extent_dirty_set(extent, dirty);
 	extent_zeroed_set(extent, zeroed);
 	extent_committed_set(extent, committed);
 	extent_slab_set(extent, slab);
 	if (config_prof)
 		extent_prof_tctx_set(extent, NULL);
-	qr_new(&extent->rd, rd_link);
-	qr_new(extent, cc_link);
+	qr_new(extent, qr_link);
 }
 
 JEMALLOC_INLINE void
-extent_dirty_insert(extent_t *extent,
-    arena_runs_dirty_link_t *runs_dirty, extent_t *chunks_dirty)
+extent_ring_insert(extent_t *sentinel, extent_t *extent)
 {
 
-	qr_meld(runs_dirty, &extent->rd, rd_link);
-	qr_meld(chunks_dirty, extent, cc_link);
+	qr_meld(sentinel, extent, qr_link);
 }
 
 JEMALLOC_INLINE void
-extent_dirty_remove(extent_t *extent)
+extent_ring_remove(extent_t *extent)
 {
 
-	qr_remove(&extent->rd, rd_link);
-	qr_remove(extent, cc_link);
+	qr_remove(extent, qr_link);
 }
 #endif
 
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 5f94d2c2..676c2431 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -5,8 +5,6 @@ arena_alloc_junk_small
 arena_basic_stats_merge
 arena_bin_index
 arena_bin_info
-arena_bitselm_get_const
-arena_bitselm_get_mutable
 arena_boot
 arena_choose
 arena_choose_hard
@@ -43,38 +41,11 @@ arena_lg_dirty_mult_get
 arena_lg_dirty_mult_set
 arena_malloc
 arena_malloc_hard
-arena_mapbits_allocated_get
-arena_mapbits_binind_get
-arena_mapbits_decommitted_get
-arena_mapbits_dirty_get
-arena_mapbits_get
-arena_mapbits_internal_set
-arena_mapbits_large_binind_set
-arena_mapbits_large_get
-arena_mapbits_large_set
-arena_mapbits_large_size_get
-arena_mapbits_size_decode
-arena_mapbits_size_encode
-arena_mapbits_small_runind_get
-arena_mapbits_small_set
-arena_mapbits_unallocated_set
-arena_mapbits_unallocated_size_get
-arena_mapbits_unallocated_size_set
-arena_mapbits_unzeroed_get
-arena_mapbitsp_get_const
-arena_mapbitsp_get_mutable
-arena_mapbitsp_read
-arena_mapbitsp_write
-arena_maxrun
 arena_maybe_purge
 arena_metadata_allocated_add
 arena_metadata_allocated_get
 arena_metadata_allocated_sub
 arena_migrate
-arena_miscelm_get_const
-arena_miscelm_get_mutable
-arena_miscelm_to_pageind
-arena_miscelm_to_rpages
 arena_new
 arena_nthreads_dec
 arena_nthreads_get
@@ -93,14 +64,11 @@ arena_prof_promote
 arena_prof_tctx_get
 arena_prof_tctx_reset
 arena_prof_tctx_set
-arena_ptr_small_binind_get
 arena_purge
 arena_ralloc
 arena_ralloc_junk_large
 arena_ralloc_no_move
-arena_rd_to_miscelm
 arena_reset
-arena_run_to_miscelm
 arena_salloc
 arena_sdalloc
 arena_stats_merge
@@ -213,22 +181,23 @@ extent_before_get
 extent_committed_get
 extent_committed_set
 extent_dalloc
-extent_dirty_get
-extent_dirty_insert
-extent_dirty_remove
-extent_dirty_set
 extent_init
 extent_last_get
 extent_past_get
 extent_prof_tctx_get
 extent_prof_tctx_set
 extent_retained_get
+extent_ring_insert
+extent_ring_remove
 extent_size_get
 extent_size_set
 extent_size_quantize_ceil
 extent_size_quantize_floor
+extent_slab_data_get
+extent_slab_data_get_const
 extent_slab_get
 extent_slab_set
+extent_slab_data_get
 extent_usize_get
 extent_zeroed_get
 extent_zeroed_set
@@ -309,8 +278,6 @@ malloc_tsd_no_cleanup
 malloc_vcprintf
 malloc_vsnprintf
 malloc_write
-map_bias
-map_misc_offset
 mb_write
 narenas_auto
 narenas_tdata_cleanup
@@ -451,8 +418,6 @@ rtree_subtree_read
 rtree_subtree_read_hard
 rtree_subtree_tryread
 rtree_write
-run_quantize_ceil
-run_quantize_floor
 s2u
 s2u_compute
 s2u_lookup
diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h
index 7da20ad0..8fdc27f6 100644
--- a/include/jemalloc/internal/prof.h
+++ b/include/jemalloc/internal/prof.h
@@ -335,8 +335,8 @@ prof_tctx_t	*prof_tctx_get(tsdn_t *tsdn, const extent_t *extent,
 void	prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr,
     size_t usize, prof_tctx_t *tctx);
 void	prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr,
-    size_t usize, const void *old_ptr, prof_tctx_t *tctx);
-bool	prof_sample_accum_update(tsd_t *tsd, size_t usize, bool commit,
+    prof_tctx_t *tctx);
+bool	prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
     prof_tdata_t **tdata_out);
 prof_tctx_t	*prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active,
     bool update);
@@ -344,7 +344,8 @@ void	prof_malloc(tsdn_t *tsdn, extent_t *extent, const void *ptr,
     size_t usize, prof_tctx_t *tctx);
 void	prof_realloc(tsd_t *tsd, extent_t *extent, const void *ptr,
     size_t usize, prof_tctx_t *tctx, bool prof_active, bool updated,
-    const void *old_ptr, size_t old_usize, prof_tctx_t *old_tctx);
+    extent_t *old_extent, const void *old_ptr, size_t old_usize,
+    prof_tctx_t *old_tctx);
 void	prof_free(tsd_t *tsd, const extent_t *extent, const void *ptr,
     size_t usize);
 #endif
@@ -421,14 +422,14 @@ prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr, size_t usize,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr, size_t usize,
-    const void *old_ptr, prof_tctx_t *old_tctx)
+prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr,
+    prof_tctx_t *tctx)
 {
 
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	arena_prof_tctx_reset(tsdn, extent, ptr, usize, old_ptr, old_tctx);
+	arena_prof_tctx_reset(tsdn, extent, ptr, tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -501,10 +502,10 @@ prof_malloc(tsdn_t *tsdn, extent_t *extent, const void *ptr, size_t usize,
 
 JEMALLOC_ALWAYS_INLINE void
 prof_realloc(tsd_t *tsd, extent_t *extent, const void *ptr, size_t usize,
-    prof_tctx_t *tctx, bool prof_active, bool updated, const void *old_ptr,
-    size_t old_usize, prof_tctx_t *old_tctx)
+    prof_tctx_t *tctx, bool prof_active, bool updated, extent_t *old_extent,
+    const void *old_ptr, size_t old_usize, prof_tctx_t *old_tctx)
 {
-	bool sampled, old_sampled;
+	bool sampled, old_sampled, moved;
 
 	cassert(config_prof);
 	assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
@@ -523,19 +524,30 @@ prof_realloc(tsd_t *tsd, extent_t *extent, const void *ptr, size_t usize,
 		}
 	}
 
+	/*
+	 * The following code must differentiate among eight possible cases,
+	 * based on three boolean conditions.
+	 */
 	sampled = ((uintptr_t)tctx > (uintptr_t)1U);
 	old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U);
+	moved = (ptr != old_ptr);
+
+	/*
+	 * The following block must only execute if this is a non-moving
+	 * reallocation, because for moving reallocation the old allocation will
+	 * be deallocated via a separate call.
+	 */
+	if (unlikely(old_sampled) && !moved)
+		prof_free_sampled_object(tsd, old_usize, old_tctx);
 
 	if (unlikely(sampled)) {
 		prof_malloc_sample_object(tsd_tsdn(tsd), extent, ptr, usize,
 		    tctx);
-	} else {
-		prof_tctx_reset(tsd_tsdn(tsd), extent, ptr, usize, old_ptr,
-		    old_tctx);
-	}
-
-	if (unlikely(old_sampled))
-		prof_free_sampled_object(tsd, old_usize, old_tctx);
+	} else if (moved) {
+		prof_tctx_set(tsd_tsdn(tsd), extent, ptr, usize,
+		    (prof_tctx_t *)(uintptr_t)1U);
+	} else if (unlikely(old_sampled))
+		prof_tctx_reset(tsd_tsdn(tsd), extent, ptr, tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE void
diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh
index 440953ad..b73064d1 100755
--- a/include/jemalloc/internal/size_classes.sh
+++ b/include/jemalloc/internal/size_classes.sh
@@ -50,7 +50,7 @@ reg_size_compute() {
   reg_size=$((${grp} + ${delta}*${ndelta}))
 }
 
-run_size() {
+slab_size() {
   lg_p=$1
   lg_grp=$2
   lg_delta=$3
@@ -59,22 +59,22 @@ run_size() {
   pow2 ${lg_p}; p=${pow2_result}
   reg_size_compute ${lg_grp} ${lg_delta} ${ndelta}
 
-  # Compute smallest run size that is an integer multiple of reg_size.
-  try_run_size=${p}
-  try_nregs=$((${try_run_size} / ${reg_size}))
+  # Compute smallest slab size that is an integer multiple of reg_size.
+  try_slab_size=${p}
+  try_nregs=$((${try_slab_size} / ${reg_size}))
   perfect=0
   while [ ${perfect} -eq 0 ] ; do
-    perfect_run_size=${try_run_size}
+    perfect_slab_size=${try_slab_size}
     perfect_nregs=${try_nregs}
 
-    try_run_size=$((${try_run_size} + ${p}))
-    try_nregs=$((${try_run_size} / ${reg_size}))
-    if [ ${perfect_run_size} -eq $((${perfect_nregs} * ${reg_size})) ] ; then
+    try_slab_size=$((${try_slab_size} + ${p}))
+    try_nregs=$((${try_slab_size} / ${reg_size}))
+    if [ ${perfect_slab_size} -eq $((${perfect_nregs} * ${reg_size})) ] ; then
       perfect=1
     fi
   done
 
-  run_size_pgs=$((${perfect_run_size} / ${p}))
+  slab_size_pgs=$((${perfect_slab_size} / ${p}))
 }
 
 size_class() {
@@ -117,7 +117,7 @@ size_class() {
 
   if [ ${lg_size} -lt $((${lg_p} + ${lg_g})) ] ; then
     bin="yes"
-    run_size ${lg_p} ${lg_grp} ${lg_delta} ${ndelta}; pgs=${run_size_pgs}
+    slab_size ${lg_p} ${lg_grp} ${lg_delta} ${ndelta}; pgs=${slab_size_pgs}
   else
     bin="no"
     pgs=0
@@ -278,7 +278,7 @@ cat <<EOF
  *     ndelta: Delta multiplier.  size == 1<<lg_grp + ndelta<<lg_delta
  *     psz: 'yes' if a multiple of the page size, 'no' otherwise.
  *     bin: 'yes' if a small bin size class, 'no' otherwise.
- *     pgs: Run page count if a small bin size class, 0 otherwise.
+ *     pgs: Slab page count if a small bin size class, 0 otherwise.
  *     lg_delta_lookup: Same as lg_delta if a lookup table size class, 'no'
  *                      otherwise.
  *   NTBINS: Number of tiny bins.
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index c9a716d7..881faad6 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -48,17 +48,17 @@ struct malloc_bin_stats_s {
 	/* Number of tcache flushes to this bin. */
 	uint64_t	nflushes;
 
-	/* Total number of runs created for this bin's size class. */
-	uint64_t	nruns;
+	/* Total number of slabs created for this bin's size class. */
+	uint64_t	nslabs;
 
 	/*
-	 * Total number of runs reused by extracting them from the runs tree for
-	 * this bin's size class.
+	 * Total number of slabs reused by extracting them from the slabs heap
+	 * for this bin's size class.
 	 */
-	uint64_t	reruns;
+	uint64_t	reslabs;
 
-	/* Current number of runs in this bin. */
-	size_t		curruns;
+	/* Current number of slabs in this bin. */
+	size_t		curslabs;
 };
 
 struct malloc_huge_stats_s {
diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h
index 186adf28..f1182dc9 100644
--- a/include/jemalloc/internal/tcache.h
+++ b/include/jemalloc/internal/tcache.h
@@ -24,7 +24,7 @@ typedef struct tcaches_s tcaches_t;
 /*
  * Absolute maximum number of cache slots for each small bin in the thread
  * cache.  This is an additional constraint beyond that imposed as: twice the
- * number of regions per run for this size class.
+ * number of regions per slab for this size class.
  *
  * This constant must be an even number.
  */
diff --git a/src/arena.c b/src/arena.c
index 0b98ec5d..ffde2e31 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -16,9 +16,9 @@ ssize_t		opt_decay_time = DECAY_TIME_DEFAULT;
 static ssize_t	decay_time_default;
 
 const arena_bin_info_t	arena_bin_info[NBINS] = {
-#define	BIN_INFO_bin_yes(reg_size, run_size, nregs)			\
-	{reg_size, run_size, nregs, BITMAP_INFO_INITIALIZER(nregs)},
-#define	BIN_INFO_bin_no(reg_size, run_size, nregs)
+#define	BIN_INFO_bin_yes(reg_size, slab_size, nregs)			\
+	{reg_size, slab_size, nregs, BITMAP_INFO_INITIALIZER(nregs)},
+#define	BIN_INFO_bin_no(reg_size, slab_size, nregs)
 #define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs,		\
     lg_delta_lookup)							\
 	BIN_INFO_bin_##bin((1U<<lg_grp) + (ndelta<<lg_delta),		\
@@ -30,10 +30,6 @@ const arena_bin_info_t	arena_bin_info[NBINS] = {
 #undef SC
 };
 
-size_t		map_bias;
-size_t		map_misc_offset;
-size_t		arena_maxrun; /* Max run size for arenas. */
-
 /******************************************************************************/
 /*
  * Function prototypes for static functions that are referenced prior to
@@ -42,173 +38,13 @@ size_t		arena_maxrun; /* Max run size for arenas. */
 
 static void	arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena,
     size_t ndirty_limit);
-static void	arena_run_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
-    arena_run_t *run, bool dirty, bool cleaned, bool decommitted);
-static void	arena_dalloc_bin_run(tsdn_t *tsdn, arena_t *arena,
-    arena_chunk_t *chunk, extent_t *extent, arena_run_t *run, arena_bin_t *bin);
-static void	arena_bin_lower_run(tsdn_t *tsdn, arena_t *arena,
-    extent_t *extent, arena_run_t *run, arena_bin_t *bin);
+static void	arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena,
+    extent_t *slab, arena_bin_t *bin);
+static void	arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena,
+    extent_t *slab, arena_bin_t *bin);
 
 /******************************************************************************/
 
-JEMALLOC_INLINE_C size_t
-arena_miscelm_size_get(extent_t *extent, const arena_chunk_map_misc_t *miscelm)
-{
-	arena_chunk_t *chunk = (arena_chunk_t *)extent_base_get(extent);
-	size_t pageind = arena_miscelm_to_pageind(extent, miscelm);
-	size_t mapbits = arena_mapbits_get(chunk, pageind);
-	return (arena_mapbits_size_decode(mapbits));
-}
-
-JEMALLOC_INLINE_C int
-arena_run_addr_comp(const arena_chunk_map_misc_t *a,
-    const arena_chunk_map_misc_t *b)
-{
-	uintptr_t a_miscelm = (uintptr_t)a;
-	uintptr_t b_miscelm = (uintptr_t)b;
-
-	assert(a != NULL);
-	assert(b != NULL);
-
-	return ((a_miscelm > b_miscelm) - (a_miscelm < b_miscelm));
-}
-
-/* Generate pairing heap functions. */
-ph_gen(static UNUSED, arena_run_heap_, arena_run_heap_t, arena_chunk_map_misc_t,
-    ph_link, arena_run_addr_comp)
-
-#ifdef JEMALLOC_JET
-#undef run_quantize_floor
-#define	run_quantize_floor JEMALLOC_N(n_run_quantize_floor)
-#endif
-static size_t
-run_quantize_floor(size_t size)
-{
-	size_t ret;
-	pszind_t pind;
-
-	assert(size > 0);
-	assert(size <= HUGE_MAXCLASS);
-	assert((size & PAGE_MASK) == 0);
-
-	assert(size != 0);
-	assert(size == PAGE_CEILING(size));
-
-	pind = psz2ind(size - large_pad + 1);
-	if (pind == 0) {
-		/*
-		 * Avoid underflow.  This short-circuit would also do the right
-		 * thing for all sizes in the range for which there are
-		 * PAGE-spaced size classes, but it's simplest to just handle
-		 * the one case that would cause erroneous results.
-		 */
-		return (size);
-	}
-	ret = pind2sz(pind - 1) + large_pad;
-	assert(ret <= size);
-	return (ret);
-}
-#ifdef JEMALLOC_JET
-#undef run_quantize_floor
-#define	run_quantize_floor JEMALLOC_N(run_quantize_floor)
-run_quantize_t *run_quantize_floor = JEMALLOC_N(n_run_quantize_floor);
-#endif
-
-#ifdef JEMALLOC_JET
-#undef run_quantize_ceil
-#define	run_quantize_ceil JEMALLOC_N(n_run_quantize_ceil)
-#endif
-static size_t
-run_quantize_ceil(size_t size)
-{
-	size_t ret;
-
-	assert(size > 0);
-	assert(size <= HUGE_MAXCLASS);
-	assert((size & PAGE_MASK) == 0);
-
-	ret = run_quantize_floor(size);
-	if (ret < size) {
-		/*
-		 * Skip a quantization that may have an adequately large run,
-		 * because under-sized runs may be mixed in.  This only happens
-		 * when an unusual size is requested, i.e. for aligned
-		 * allocation, and is just one of several places where linear
-		 * search would potentially find sufficiently aligned available
-		 * memory somewhere lower.
-		 */
-		ret = pind2sz(psz2ind(ret - large_pad + 1)) + large_pad;
-	}
-	return (ret);
-}
-#ifdef JEMALLOC_JET
-#undef run_quantize_ceil
-#define	run_quantize_ceil JEMALLOC_N(run_quantize_ceil)
-run_quantize_t *run_quantize_ceil = JEMALLOC_N(n_run_quantize_ceil);
-#endif
-
-static void
-arena_avail_insert(arena_t *arena, extent_t *extent, size_t pageind,
-    size_t npages)
-{
-	arena_chunk_t *chunk = (arena_chunk_t *)extent_base_get(extent);
-	pszind_t pind = psz2ind(run_quantize_floor(arena_miscelm_size_get(
-	    extent, arena_miscelm_get_const(chunk, pageind))));
-	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
-	    LG_PAGE));
-	arena_run_heap_insert(&arena->runs_avail[pind],
-	    arena_miscelm_get_mutable(chunk, pageind));
-}
-
-static void
-arena_avail_remove(arena_t *arena, extent_t *extent, size_t pageind,
-    size_t npages)
-{
-	arena_chunk_t *chunk = (arena_chunk_t *)extent_base_get(extent);
-	pszind_t pind = psz2ind(run_quantize_floor(arena_miscelm_size_get(
-	    extent, arena_miscelm_get_const(chunk, pageind))));
-	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
-	    LG_PAGE));
-	arena_run_heap_remove(&arena->runs_avail[pind],
-	    arena_miscelm_get_mutable(chunk, pageind));
-}
-
-static void
-arena_run_dirty_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
-    size_t npages)
-{
-	arena_chunk_map_misc_t *miscelm = arena_miscelm_get_mutable(chunk,
-	    pageind);
-
-	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
-	    LG_PAGE));
-	assert(arena_mapbits_dirty_get(chunk, pageind) == CHUNK_MAP_DIRTY);
-	assert(arena_mapbits_dirty_get(chunk, pageind+npages-1) ==
-	    CHUNK_MAP_DIRTY);
-
-	qr_new(&miscelm->rd, rd_link);
-	qr_meld(&arena->runs_dirty, &miscelm->rd, rd_link);
-	arena->ndirty += npages;
-}
-
-static void
-arena_run_dirty_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
-    size_t npages)
-{
-	arena_chunk_map_misc_t *miscelm = arena_miscelm_get_mutable(chunk,
-	    pageind);
-
-	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
-	    LG_PAGE));
-	assert(arena_mapbits_dirty_get(chunk, pageind) == CHUNK_MAP_DIRTY);
-	assert(arena_mapbits_dirty_get(chunk, pageind+npages-1) ==
-	    CHUNK_MAP_DIRTY);
-
-	qr_remove(&miscelm->rd, rd_link);
-	assert(arena->ndirty >= npages);
-	arena->ndirty -= npages;
-}
-
 static size_t
 arena_chunk_dirty_npages(const extent_t *extent)
 {
@@ -269,8 +105,7 @@ arena_chunk_cache_maybe_insert(arena_t *arena, extent_t *extent, bool cache)
 {
 
 	if (cache) {
-		extent_dirty_insert(extent, &arena->runs_dirty,
-		    &arena->chunks_cache);
+		extent_ring_insert(&arena->extents_dirty, extent);
 		arena->ndirty += arena_chunk_dirty_npages(extent);
 	}
 }
@@ -280,54 +115,49 @@ arena_chunk_cache_maybe_remove(arena_t *arena, extent_t *extent, bool dirty)
 {
 
 	if (dirty) {
-		extent_dirty_remove(extent);
+		extent_ring_remove(extent);
 		assert(arena->ndirty >= arena_chunk_dirty_npages(extent));
 		arena->ndirty -= arena_chunk_dirty_npages(extent);
 	}
 }
 
 JEMALLOC_INLINE_C void *
-arena_run_reg_alloc(tsdn_t *tsdn, arena_run_t *run,
+arena_slab_reg_alloc(tsdn_t *tsdn, extent_t *slab,
     const arena_bin_info_t *bin_info)
 {
 	void *ret;
-	extent_t *extent;
+	arena_slab_data_t *slab_data = extent_slab_data_get(slab);
 	size_t regind;
-	arena_chunk_map_misc_t *miscelm;
-	void *rpages;
 
-	assert(run->nfree > 0);
-	assert(!bitmap_full(run->bitmap, &bin_info->bitmap_info));
+	assert(slab_data->nfree > 0);
+	assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info));
 
-	extent = iealloc(tsdn, run);
-	regind = (unsigned)bitmap_sfu(run->bitmap, &bin_info->bitmap_info);
-	miscelm = arena_run_to_miscelm(extent, run);
-	rpages = arena_miscelm_to_rpages(extent, miscelm);
-	ret = (void *)((uintptr_t)rpages + (uintptr_t)(bin_info->reg_size *
-	    regind));
-	run->nfree--;
+	regind = (unsigned)bitmap_sfu(slab_data->bitmap,
+	    &bin_info->bitmap_info);
+	ret = (void *)((uintptr_t)extent_addr_get(slab) +
+	    (uintptr_t)(bin_info->reg_size * regind));
+	slab_data->nfree--;
 	return (ret);
 }
 
 JEMALLOC_INLINE_C size_t
-arena_run_regind(extent_t *extent, arena_run_t *run,
-    const arena_bin_info_t *bin_info, const void *ptr)
+arena_slab_regind(extent_t *slab, const arena_bin_info_t *bin_info,
+    const void *ptr)
 {
 	size_t diff, interval, shift, regind;
-	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(extent, run);
-	void *rpages = arena_miscelm_to_rpages(extent, miscelm);
 
-	/*
-	 * Freeing a pointer lower than region zero can cause assertion
-	 * failure.
-	 */
-	assert((uintptr_t)ptr >= (uintptr_t)rpages);
+	/* Freeing a pointer outside the slab can cause assertion failure. */
+	assert((uintptr_t)ptr >= (uintptr_t)extent_addr_get(slab));
+	assert((uintptr_t)ptr < (uintptr_t)extent_past_get(slab));
+	/* Freeing an interior pointer can cause assertion failure. */
+	assert(((uintptr_t)ptr - (uintptr_t)extent_addr_get(slab)) %
+	    (uintptr_t)bin_info->reg_size == 0);
 
 	/*
 	 * Avoid doing division with a variable divisor if possible.  Using
 	 * actual division here can reduce allocator throughput by over 20%!
 	 */
-	diff = (size_t)((uintptr_t)ptr - (uintptr_t)rpages);
+	diff = (size_t)((uintptr_t)ptr - (uintptr_t)extent_addr_get(slab));
 
 	/* Rescale (factor powers of 2 out of the numerator and denominator). */
 	interval = bin_info->reg_size;
@@ -353,7 +183,7 @@ arena_run_regind(extent_t *extent, arena_run_t *run,
 		 * divide by 0, and 1 and 2 are both powers of two, which are
 		 * handled above.
 		 */
-#define	SIZE_INV_SHIFT	((sizeof(size_t) << 3) - LG_RUN_MAXREGS)
+#define	SIZE_INV_SHIFT	((sizeof(size_t) << 3) - LG_SLAB_MAXREGS)
 #define	SIZE_INV(s)	(((ZU(1) << SIZE_INV_SHIFT) / (s)) + 1)
 		static const size_t interval_invs[] = {
 		    SIZE_INV(3),
@@ -382,48 +212,19 @@ arena_run_regind(extent_t *extent, arena_run_t *run,
 }
 
 JEMALLOC_INLINE_C void
-arena_run_reg_dalloc(tsdn_t *tsdn, arena_run_t *run, extent_t *extent,
-    void *ptr)
+arena_slab_reg_dalloc(tsdn_t *tsdn, extent_t *slab,
+    arena_slab_data_t *slab_data, void *ptr)
 {
-	arena_chunk_t *chunk = (arena_chunk_t *)extent_base_get(extent);
-	size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
-	size_t mapbits = arena_mapbits_get(chunk, pageind);
-	szind_t binind = arena_ptr_small_binind_get(tsdn, ptr, mapbits);
+	szind_t binind = slab_data->binind;
 	const arena_bin_info_t *bin_info = &arena_bin_info[binind];
-	size_t regind = arena_run_regind(extent, run, bin_info, ptr);
+	size_t regind = arena_slab_regind(slab, bin_info, ptr);
 
-	assert(run->nfree < bin_info->nregs);
-	/* Freeing an interior pointer can cause assertion failure. */
-	assert(((uintptr_t)ptr -
-	    (uintptr_t)arena_miscelm_to_rpages(extent,
-	    arena_run_to_miscelm(extent, run))) % (uintptr_t)bin_info->reg_size
-	    == 0);
-	assert((uintptr_t)ptr >=
-	    (uintptr_t)arena_miscelm_to_rpages(extent,
-	    arena_run_to_miscelm(extent, run)));
+	assert(slab_data->nfree < bin_info->nregs);
 	/* Freeing an unallocated pointer can cause assertion failure. */
-	assert(bitmap_get(run->bitmap, &bin_info->bitmap_info, regind));
+	assert(bitmap_get(slab_data->bitmap, &bin_info->bitmap_info, regind));
 
-	bitmap_unset(run->bitmap, &bin_info->bitmap_info, regind);
-	run->nfree++;
-}
-
-JEMALLOC_INLINE_C void
-arena_run_zero(arena_chunk_t *chunk, size_t run_ind, size_t npages)
-{
-
-	memset((void *)((uintptr_t)chunk + (run_ind << LG_PAGE)), 0,
-	    (npages << LG_PAGE));
-}
-
-JEMALLOC_INLINE_C void
-arena_run_page_validate_zeroed(arena_chunk_t *chunk, size_t run_ind)
-{
-	size_t i;
-	UNUSED size_t *p = (size_t *)((uintptr_t)chunk + (run_ind << LG_PAGE));
-
-	for (i = 0; i < PAGE / sizeof(size_t); i++)
-		assert(p[i] == 0);
+	bitmap_unset(slab_data->bitmap, &bin_info->bitmap_info, regind);
+	slab_data->nfree++;
 }
 
 static void
@@ -454,373 +255,6 @@ arena_nactive_sub(arena_t *arena, size_t sub_pages)
 	arena->nactive -= sub_pages;
 }
 
-static void
-arena_run_split_remove(arena_t *arena, extent_t *extent, size_t run_ind,
-    size_t flag_dirty, size_t flag_decommitted, size_t need_pages)
-{
-	arena_chunk_t *chunk = (arena_chunk_t *)extent_base_get(extent);
-	size_t total_pages, rem_pages;
-
-	assert(flag_dirty == 0 || flag_decommitted == 0);
-
-	total_pages = arena_mapbits_unallocated_size_get(chunk, run_ind) >>
-	    LG_PAGE;
-	assert(arena_mapbits_dirty_get(chunk, run_ind+total_pages-1) ==
-	    flag_dirty);
-	assert(need_pages <= total_pages);
-	rem_pages = total_pages - need_pages;
-
-	arena_avail_remove(arena, extent, run_ind, total_pages);
-	if (flag_dirty != 0)
-		arena_run_dirty_remove(arena, chunk, run_ind, total_pages);
-	arena_nactive_add(arena, need_pages);
-
-	/* Keep track of trailing unused pages for later use. */
-	if (rem_pages > 0) {
-		size_t flags = flag_dirty | flag_decommitted;
-		size_t flag_unzeroed_mask = (flags == 0) ?  CHUNK_MAP_UNZEROED :
-		    0;
-
-		arena_mapbits_unallocated_set(chunk, run_ind+need_pages,
-		    (rem_pages << LG_PAGE), flags |
-		    (arena_mapbits_unzeroed_get(chunk, run_ind+need_pages) &
-		    flag_unzeroed_mask));
-		arena_mapbits_unallocated_set(chunk, run_ind+total_pages-1,
-		    (rem_pages << LG_PAGE), flags |
-		    (arena_mapbits_unzeroed_get(chunk, run_ind+total_pages-1) &
-		    flag_unzeroed_mask));
-		if (flag_dirty != 0) {
-			arena_run_dirty_insert(arena, chunk, run_ind+need_pages,
-			    rem_pages);
-		}
-		arena_avail_insert(arena, extent, run_ind+need_pages,
-		    rem_pages);
-	}
-}
-
-static bool
-arena_run_split_large_helper(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
-    arena_run_t *run, size_t size, bool remove, bool zero)
-{
-	arena_chunk_t *chunk;
-	arena_chunk_map_misc_t *miscelm;
-	size_t flag_dirty, flag_decommitted, run_ind, need_pages;
-	size_t flag_unzeroed_mask;
-
-	chunk = (arena_chunk_t *)extent_base_get(extent);
-	miscelm = arena_run_to_miscelm(extent, run);
-	run_ind = arena_miscelm_to_pageind(extent, miscelm);
-	flag_dirty = arena_mapbits_dirty_get(chunk, run_ind);
-	flag_decommitted = arena_mapbits_decommitted_get(chunk, run_ind);
-	need_pages = (size >> LG_PAGE);
-	assert(need_pages > 0);
-
-	if (flag_decommitted != 0 && chunk_commit_wrapper(tsdn, arena,
-	    &arena->chunk_hooks, extent, run_ind << LG_PAGE, size))
-		return (true);
-
-	if (remove) {
-		arena_run_split_remove(arena, extent, run_ind, flag_dirty,
-		    flag_decommitted, need_pages);
-	}
-
-	if (zero) {
-		if (flag_decommitted != 0)
-			; /* The run is untouched, and therefore zeroed. */
-		else if (flag_dirty != 0) {
-			/* The run is dirty, so all pages must be zeroed. */
-			arena_run_zero(chunk, run_ind, need_pages);
-		} else {
-			/*
-			 * The run is clean, so some pages may be zeroed (i.e.
-			 * never before touched).
-			 */
-			size_t i;
-			for (i = 0; i < need_pages; i++) {
-				if (arena_mapbits_unzeroed_get(chunk, run_ind+i)
-				    != 0)
-					arena_run_zero(chunk, run_ind+i, 1);
-				else if (config_debug) {
-					arena_run_page_validate_zeroed(chunk,
-					    run_ind+i);
-				}
-			}
-		}
-	}
-
-	/*
-	 * Set the last element first, in case the run only contains one page
-	 * (i.e. both statements set the same element).
-	 */
-	flag_unzeroed_mask = (flag_dirty | flag_decommitted) == 0 ?
-	    CHUNK_MAP_UNZEROED : 0;
-	arena_mapbits_large_set(chunk, run_ind+need_pages-1, 0, flag_dirty |
-	    (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk,
-	    run_ind+need_pages-1)));
-	arena_mapbits_large_set(chunk, run_ind, size, flag_dirty |
-	    (flag_unzeroed_mask & arena_mapbits_unzeroed_get(chunk, run_ind)));
-	return (false);
-}
-
-static bool
-arena_run_split_large(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
-    arena_run_t *run, size_t size, bool zero)
-{
-
-	return (arena_run_split_large_helper(tsdn, arena, extent, run, size,
-	    true, zero));
-}
-
-static bool
-arena_run_split_small(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
-    arena_run_t *run, size_t size, szind_t binind)
-{
-	arena_chunk_t *chunk;
-	arena_chunk_map_misc_t *miscelm;
-	size_t flag_dirty, flag_decommitted, run_ind, need_pages, i;
-
-	assert(binind != BININD_INVALID);
-
-	chunk = (arena_chunk_t *)extent_base_get(extent);
-	miscelm = arena_run_to_miscelm(extent, run);
-	run_ind = arena_miscelm_to_pageind(extent, miscelm);
-	flag_dirty = arena_mapbits_dirty_get(chunk, run_ind);
-	flag_decommitted = arena_mapbits_decommitted_get(chunk, run_ind);
-	need_pages = (size >> LG_PAGE);
-	assert(need_pages > 0);
-
-	if (flag_decommitted != 0 && chunk_commit_wrapper(tsdn, arena,
-	    &arena->chunk_hooks, extent, run_ind << LG_PAGE, size))
-		return (true);
-
-	arena_run_split_remove(arena, extent, run_ind, flag_dirty,
-	    flag_decommitted, need_pages);
-
-	for (i = 0; i < need_pages; i++) {
-		size_t flag_unzeroed = arena_mapbits_unzeroed_get(chunk,
-		    run_ind+i);
-		arena_mapbits_small_set(chunk, run_ind+i, i, binind,
-		    flag_unzeroed);
-		if (config_debug && flag_dirty == 0 && flag_unzeroed == 0)
-			arena_run_page_validate_zeroed(chunk, run_ind+i);
-	}
-	return (false);
-}
-
-static extent_t *
-arena_chunk_init_spare(arena_t *arena)
-{
-	extent_t *extent;
-
-	assert(arena->spare != NULL);
-
-	extent = arena->spare;
-	arena->spare = NULL;
-
-	assert(arena_mapbits_allocated_get((arena_chunk_t *)
-	    extent_base_get(extent), map_bias) == 0);
-	assert(arena_mapbits_allocated_get((arena_chunk_t *)
-	    extent_base_get(extent), chunk_npages-1) == 0);
-	assert(arena_mapbits_unallocated_size_get((arena_chunk_t *)
-	    extent_base_get(extent), map_bias) == arena_maxrun);
-	assert(arena_mapbits_unallocated_size_get((arena_chunk_t *)
-	    extent_base_get(extent), chunk_npages-1) == arena_maxrun);
-	assert(arena_mapbits_dirty_get((arena_chunk_t *)
-	    extent_base_get(extent), map_bias) ==
-	    arena_mapbits_dirty_get((arena_chunk_t *)extent_base_get(extent),
-	    chunk_npages-1));
-
-	return (extent);
-}
-
-static extent_t *
-arena_chunk_alloc_internal_hard(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, bool *zero, bool *commit)
-{
-	extent_t *extent;
-
-	malloc_mutex_unlock(tsdn, &arena->lock);
-
-	extent = chunk_alloc_wrapper(tsdn, arena, chunk_hooks, NULL, chunksize,
-	    0, CACHELINE, zero, commit, true);
-	if (extent != NULL && !*commit) {
-		/* Commit header. */
-		if (chunk_commit_wrapper(tsdn, arena, chunk_hooks, extent, 0,
-		    map_bias << LG_PAGE)) {
-			chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, extent);
-			extent = NULL;
-		}
-	}
-
-	malloc_mutex_lock(tsdn, &arena->lock);
-
-	return (extent);
-}
-
-static extent_t *
-arena_chunk_alloc_internal(tsdn_t *tsdn, arena_t *arena, bool *zero,
-    bool *commit)
-{
-	extent_t *extent;
-	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
-
-	extent = arena_chunk_cache_alloc_locked(tsdn, arena, &chunk_hooks, NULL,
-	    chunksize, 0, CACHELINE, zero, true);
-	if (extent != NULL)
-		*commit = true;
-	if (extent == NULL) {
-		extent = arena_chunk_alloc_internal_hard(tsdn, arena,
-		    &chunk_hooks, zero, commit);
-		if (extent == NULL)
-			return (NULL);
-	}
-	assert(extent_slab_get(extent));
-
-	if (config_stats) {
-		arena->stats.mapped += extent_size_get(extent);
-		arena->stats.metadata_mapped += (map_bias << LG_PAGE);
-	}
-
-	return (extent);
-}
-
-static extent_t *
-arena_chunk_init_hard(tsdn_t *tsdn, arena_t *arena)
-{
-	extent_t *extent;
-	bool zero, commit;
-	size_t flag_unzeroed, flag_decommitted, i;
-
-	assert(arena->spare == NULL);
-
-	zero = false;
-	commit = false;
-	extent = arena_chunk_alloc_internal(tsdn, arena, &zero, &commit);
-	if (extent == NULL)
-		return (NULL);
-
-	/*
-	 * Initialize the map to contain one maximal free untouched run.  Mark
-	 * the pages as zeroed if arena_chunk_alloc_internal() returned a zeroed
-	 * or decommitted chunk.
-	 */
-	flag_unzeroed = (zero || !commit) ? 0 : CHUNK_MAP_UNZEROED;
-	flag_decommitted = commit ? 0 : CHUNK_MAP_DECOMMITTED;
-	arena_mapbits_unallocated_set((arena_chunk_t *)extent_base_get(extent),
-	    map_bias, arena_maxrun, flag_unzeroed | flag_decommitted);
-	/*
-	 * There is no need to initialize the internal page map entries unless
-	 * the chunk is not zeroed.
-	 */
-	if (!zero) {
-		for (i = map_bias+1; i < chunk_npages-1; i++) {
-			arena_mapbits_internal_set((arena_chunk_t *)
-			    extent_base_get(extent), i, flag_unzeroed);
-		}
-	} else {
-		if (config_debug) {
-			for (i = map_bias+1; i < chunk_npages-1; i++) {
-				assert(arena_mapbits_unzeroed_get(
-				    (arena_chunk_t *)extent_base_get(extent), i)
-				    == flag_unzeroed);
-			}
-		}
-	}
-	arena_mapbits_unallocated_set((arena_chunk_t *)extent_base_get(extent),
-	    chunk_npages-1, arena_maxrun, flag_unzeroed);
-
-	return (extent);
-}
-
-static extent_t *
-arena_chunk_alloc(tsdn_t *tsdn, arena_t *arena)
-{
-	extent_t *extent;
-
-	if (arena->spare != NULL)
-		extent = arena_chunk_init_spare(arena);
-	else {
-		extent = arena_chunk_init_hard(tsdn, arena);
-		if (extent == NULL)
-			return (NULL);
-	}
-
-	ql_elm_new(extent, ql_link);
-	ql_tail_insert(&arena->achunks, extent, ql_link);
-	arena_avail_insert(arena, extent, map_bias, chunk_npages-map_bias);
-
-	return (extent);
-}
-
-static void
-arena_chunk_discard(tsdn_t *tsdn, arena_t *arena, extent_t *extent)
-{
-	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
-
-	extent_committed_set(extent,
-	    (arena_mapbits_decommitted_get((arena_chunk_t *)
-	    extent_base_get(extent), map_bias) == 0));
-	if (!extent_committed_get(extent)) {
-		/*
-		 * Decommit the header.  Mark the chunk as decommitted even if
-		 * header decommit fails, since treating a partially committed
-		 * chunk as committed has a high potential for causing later
-		 * access of decommitted memory.
-		 */
-		chunk_decommit_wrapper(tsdn, arena, &chunk_hooks, extent, 0,
-		    map_bias << LG_PAGE);
-	}
-
-	if (config_stats) {
-		arena->stats.mapped -= extent_size_get(extent);
-		arena->stats.metadata_mapped -= (map_bias << LG_PAGE);
-	}
-
-	arena_chunk_cache_dalloc_locked(tsdn, arena, &chunk_hooks, extent);
-}
-
-static void
-arena_spare_discard(tsdn_t *tsdn, arena_t *arena, extent_t *spare)
-{
-
-	assert(arena->spare != spare);
-
-	if (arena_mapbits_dirty_get((arena_chunk_t *)extent_base_get(spare),
-	    map_bias) != 0) {
-		arena_run_dirty_remove(arena, (arena_chunk_t *)
-		    extent_base_get(spare), map_bias, chunk_npages-map_bias);
-	}
-
-	arena_chunk_discard(tsdn, arena, spare);
-}
-
-static void
-arena_chunk_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent)
-{
-	arena_chunk_t *chunk = (arena_chunk_t *)extent_base_get(extent);
-	extent_t *spare;
-
-	assert(arena_mapbits_allocated_get(chunk, map_bias) == 0);
-	assert(arena_mapbits_allocated_get(chunk, chunk_npages-1) == 0);
-	assert(arena_mapbits_unallocated_size_get(chunk, map_bias) ==
-	    arena_maxrun);
-	assert(arena_mapbits_unallocated_size_get(chunk, chunk_npages-1) ==
-	    arena_maxrun);
-	assert(arena_mapbits_dirty_get(chunk, map_bias) ==
-	    arena_mapbits_dirty_get(chunk, chunk_npages-1));
-	assert(arena_mapbits_decommitted_get(chunk, map_bias) ==
-	    arena_mapbits_decommitted_get(chunk, chunk_npages-1));
-
-	/* Remove run from runs_avail, so that the arena does not use it. */
-	arena_avail_remove(arena, extent, map_bias, chunk_npages-map_bias);
-
-	ql_remove(&arena->achunks, extent, ql_link);
-	spare = arena->spare;
-	arena->spare = extent;
-	if (spare != NULL)
-		arena_spare_discard(tsdn, arena, spare);
-}
-
 static void
 arena_huge_malloc_stats_update(arena_t *arena, size_t usize)
 {
@@ -986,77 +420,6 @@ arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
-/*
- * Do first-best-fit run selection, i.e. select the lowest run that best fits.
- * Run sizes are indexed, so not all candidate runs are necessarily exactly the
- * same size.
- */
-static arena_run_t *
-arena_run_first_best_fit(arena_t *arena, size_t size)
-{
-	pszind_t pind, i;
-
-	pind = psz2ind(run_quantize_ceil(size));
-
-	for (i = pind; pind2sz(i) <= arena_maxrun; i++) {
-		arena_chunk_map_misc_t *miscelm = arena_run_heap_first(
-		    &arena->runs_avail[i]);
-		if (miscelm != NULL)
-			return (&miscelm->run);
-	}
-
-	return (NULL);
-}
-
-static arena_run_t *
-arena_run_alloc_small_helper(tsdn_t *tsdn, arena_t *arena, size_t size,
-    szind_t binind)
-{
-	arena_run_t *run = arena_run_first_best_fit(arena, size);
-	if (run != NULL) {
-		if (arena_run_split_small(tsdn, arena, iealloc(tsdn, run), run,
-		    size, binind))
-			run = NULL;
-	}
-	return (run);
-}
-
-static arena_run_t *
-arena_run_alloc_small(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t binind)
-{
-	arena_run_t *run;
-	extent_t *extent;
-
-	assert(size <= arena_maxrun);
-	assert(size == PAGE_CEILING(size));
-	assert(binind != BININD_INVALID);
-
-	/* Search the arena's chunks for the lowest best fit. */
-	run = arena_run_alloc_small_helper(tsdn, arena, size, binind);
-	if (run != NULL)
-		return (run);
-
-	/*
-	 * No usable runs.  Create a new chunk from which to allocate the run.
-	 */
-	extent = arena_chunk_alloc(tsdn, arena);
-	if (extent != NULL) {
-		run = &arena_miscelm_get_mutable(
-		    (arena_chunk_t *)extent_base_get(extent), map_bias)->run;
-		if (arena_run_split_small(tsdn, arena, iealloc(tsdn, run), run,
-		    size, binind))
-			run = NULL;
-		return (run);
-	}
-
-	/*
-	 * arena_chunk_alloc() failed, but another thread may have made
-	 * sufficient memory available while this one dropped arena->lock in
-	 * arena_chunk_alloc(), so search one more time.
-	 */
-	return (arena_run_alloc_small_helper(tsdn, arena, size, binind));
-}
-
 static bool
 arena_lg_dirty_mult_valid(ssize_t lg_dirty_mult)
 {
@@ -1360,120 +723,45 @@ arena_maybe_purge(tsdn_t *tsdn, arena_t *arena)
 static size_t
 arena_dirty_count(tsdn_t *tsdn, arena_t *arena)
 {
+	extent_t *extent;
 	size_t ndirty = 0;
-	arena_runs_dirty_link_t *rdelm;
-	extent_t *chunkselm;
 
-	for (rdelm = qr_next(&arena->runs_dirty, rd_link),
-	    chunkselm = qr_next(&arena->chunks_cache, cc_link);
-	    rdelm != &arena->runs_dirty; rdelm = qr_next(rdelm, rd_link)) {
-		size_t npages;
-
-		if (rdelm == &chunkselm->rd) {
-			npages = extent_size_get(chunkselm) >> LG_PAGE;
-			chunkselm = qr_next(chunkselm, cc_link);
-		} else {
-			extent_t *extent = iealloc(tsdn, rdelm);
-			arena_chunk_t *chunk =
-			    (arena_chunk_t *)extent_base_get(extent);
-			arena_chunk_map_misc_t *miscelm =
-			    arena_rd_to_miscelm(extent, rdelm);
-			size_t pageind = arena_miscelm_to_pageind(extent,
-			    miscelm);
-			assert(arena_mapbits_allocated_get(chunk, pageind) ==
-			    0);
-			assert(arena_mapbits_large_get(chunk, pageind) == 0);
-			assert(arena_mapbits_dirty_get(chunk, pageind) != 0);
-			npages = arena_mapbits_unallocated_size_get(chunk,
-			    pageind) >> LG_PAGE;
-		}
-		ndirty += npages;
-	}
+	for (extent = qr_next(&arena->extents_dirty, qr_link); extent !=
+	    &arena->extents_dirty; extent = qr_next(extent, qr_link))
+		ndirty += extent_size_get(extent) >> LG_PAGE;
 
 	return (ndirty);
 }
 
 static size_t
 arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
-    size_t ndirty_limit, arena_runs_dirty_link_t *purge_runs_sentinel,
-    extent_t *purge_chunks_sentinel)
+    size_t ndirty_limit, extent_t *purge_extents_sentinel)
 {
-	arena_runs_dirty_link_t *rdelm, *rdelm_next;
-	extent_t *chunkselm;
+	extent_t *extent, *next;
 	size_t nstashed = 0;
 
-	/* Stash runs/chunks according to ndirty_limit. */
-	for (rdelm = qr_next(&arena->runs_dirty, rd_link),
-	    chunkselm = qr_next(&arena->chunks_cache, cc_link);
-	    rdelm != &arena->runs_dirty; rdelm = rdelm_next) {
+	/* Stash extents according to ndirty_limit. */
+	for (extent = qr_next(&arena->extents_dirty, qr_link); extent !=
+	    &arena->extents_dirty; extent = next) {
 		size_t npages;
-		rdelm_next = qr_next(rdelm, rd_link);
+		bool zero;
+		UNUSED extent_t *textent;
 
-		if (rdelm == &chunkselm->rd) {
-			extent_t *chunkselm_next;
-			bool zero;
-			UNUSED extent_t *extent;
+		npages = extent_size_get(extent) >> LG_PAGE;
+		if (opt_purge == purge_mode_decay && arena->ndirty - (nstashed +
+		    npages) < ndirty_limit)
+			break;
 
-			npages = extent_size_get(chunkselm) >> LG_PAGE;
-			if (opt_purge == purge_mode_decay && arena->ndirty -
-			    (nstashed + npages) < ndirty_limit)
-				break;
-
-			chunkselm_next = qr_next(chunkselm, cc_link);
-			/* Allocate. */
-			zero = false;
-			extent = arena_chunk_cache_alloc_locked(tsdn, arena,
-			    chunk_hooks, extent_base_get(chunkselm),
-			    extent_size_get(chunkselm), 0, CACHELINE, &zero,
-			    false);
-			assert(extent == chunkselm);
-			assert(zero == extent_zeroed_get(chunkselm));
-			extent_dirty_insert(chunkselm, purge_runs_sentinel,
-			    purge_chunks_sentinel);
-			assert(npages == (extent_size_get(chunkselm) >>
-			    LG_PAGE));
-			chunkselm = chunkselm_next;
-		} else {
-			extent_t *extent = iealloc(tsdn, rdelm);
-			arena_chunk_map_misc_t *miscelm =
-			    arena_rd_to_miscelm(extent, rdelm);
-			size_t pageind = arena_miscelm_to_pageind(extent,
-			    miscelm);
-			arena_run_t *run = &miscelm->run;
-			size_t run_size =
-			    arena_mapbits_unallocated_size_get((arena_chunk_t *)
-			    extent_base_get(extent), pageind);
-
-			npages = run_size >> LG_PAGE;
-			if (opt_purge == purge_mode_decay && arena->ndirty -
-			    (nstashed + npages) < ndirty_limit)
-				break;
-
-			assert(pageind + npages <= chunk_npages);
-			assert(arena_mapbits_dirty_get((arena_chunk_t *)
-			    extent_base_get(extent), pageind) ==
-			    arena_mapbits_dirty_get((arena_chunk_t *)
-			    extent_base_get(extent), pageind+npages-1));
-
-			/*
-			 * If purging the spare chunk's run, make it available
-			 * prior to allocation.
-			 */
-			if (extent == arena->spare)
-				arena_chunk_alloc(tsdn, arena);
-
-			/* Temporarily allocate the free dirty run. */
-			arena_run_split_large(tsdn, arena, extent, run,
-			    run_size, false);
-			/* Stash. */
-			if (false)
-				qr_new(rdelm, rd_link); /* Redundant. */
-			else {
-				assert(qr_next(rdelm, rd_link) == rdelm);
-				assert(qr_prev(rdelm, rd_link) == rdelm);
-			}
-			qr_meld(purge_runs_sentinel, rdelm, rd_link);
-		}
+		next = qr_next(extent, qr_link);
+		/* Allocate. */
+		zero = false;
+		textent = arena_chunk_cache_alloc_locked(tsdn, arena,
+		    chunk_hooks, extent_base_get(extent),
+		    extent_size_get(extent), 0, CACHELINE, &zero, false);
+		assert(textent == extent);
+		assert(zero == extent_zeroed_get(extent));
+		extent_ring_remove(extent);
+		extent_ring_insert(purge_extents_sentinel, extent);
 
 		nstashed += npages;
 		if (opt_purge == purge_mode_ratio && arena->ndirty - nstashed <=
@@ -1486,90 +774,26 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 
 static size_t
 arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
-    arena_runs_dirty_link_t *purge_runs_sentinel,
-    extent_t *purge_chunks_sentinel)
+    extent_t *purge_extents_sentinel)
 {
-	size_t npurged, nmadvise;
-	arena_runs_dirty_link_t *rdelm;
-	extent_t *chunkselm;
+	UNUSED size_t nmadvise;
+	size_t npurged;
+	extent_t *extent, *next;
 
 	if (config_stats)
 		nmadvise = 0;
 	npurged = 0;
 
-	malloc_mutex_unlock(tsdn, &arena->lock);
-	for (rdelm = qr_next(purge_runs_sentinel, rd_link),
-	    chunkselm = qr_next(purge_chunks_sentinel, cc_link);
-	    rdelm != purge_runs_sentinel; rdelm = qr_next(rdelm, rd_link)) {
-		size_t npages;
-
-		if (rdelm == &chunkselm->rd) {
-			/*
-			 * Don't actually purge the chunk here because 1)
-			 * chunkselm is embedded in the chunk and must remain
-			 * valid, and 2) we deallocate the chunk in
-			 * arena_unstash_purged(), where it is destroyed,
-			 * decommitted, or purged, depending on chunk
-			 * deallocation policy.
-			 */
-			size_t size = extent_size_get(chunkselm);
-			npages = size >> LG_PAGE;
-			chunkselm = qr_next(chunkselm, cc_link);
-		} else {
-			size_t pageind, run_size, flag_unzeroed, flags, i;
-			bool decommitted;
-			extent_t *extent = iealloc(tsdn, rdelm);
-			arena_chunk_t *chunk =
-			    (arena_chunk_t *)extent_base_get(extent);
-			arena_chunk_map_misc_t *miscelm =
-			    arena_rd_to_miscelm(extent, rdelm);
-			pageind = arena_miscelm_to_pageind(extent, miscelm);
-			run_size = arena_mapbits_large_size_get(chunk, pageind);
-			npages = run_size >> LG_PAGE;
-
-			assert(pageind + npages <= chunk_npages);
-			assert(!arena_mapbits_decommitted_get(chunk, pageind));
-			assert(!arena_mapbits_decommitted_get(chunk,
-			    pageind+npages-1));
-			decommitted = !chunk_decommit_wrapper(tsdn, arena,
-			    chunk_hooks, extent, pageind << LG_PAGE, npages <<
-			    LG_PAGE);
-			if (decommitted) {
-				flag_unzeroed = 0;
-				flags = CHUNK_MAP_DECOMMITTED;
-			} else {
-				flag_unzeroed = chunk_purge_wrapper(tsdn, arena,
-				    chunk_hooks, extent, pageind << LG_PAGE,
-				    run_size) ? CHUNK_MAP_UNZEROED : 0;
-				flags = flag_unzeroed;
-			}
-			arena_mapbits_large_set(chunk, pageind+npages-1, 0,
-			    flags);
-			arena_mapbits_large_set(chunk, pageind, run_size,
-			    flags);
-
-			/*
-			 * Set the unzeroed flag for internal pages, now that
-			 * chunk_purge_wrapper() has returned whether the pages
-			 * were zeroed as a side effect of purging.  This chunk
-			 * map modification is safe even though the arena mutex
-			 * isn't currently owned by this thread, because the run
-			 * is marked as allocated, thus protecting it from being
-			 * modified by any other thread.  As long as these
-			 * writes don't perturb the first and last elements'
-			 * CHUNK_MAP_ALLOCATED bits, behavior is well defined.
-			 */
-			for (i = 1; i < npages-1; i++) {
-				arena_mapbits_internal_set(chunk, pageind+i,
-				    flag_unzeroed);
-			}
-		}
-
-		npurged += npages;
+	for (extent = qr_next(purge_extents_sentinel, qr_link); extent !=
+	    purge_extents_sentinel; extent = next) {
 		if (config_stats)
 			nmadvise++;
+		npurged += extent_size_get(extent) >> LG_PAGE;
+
+		next = qr_next(extent, qr_link);
+		extent_ring_remove(extent);
+		chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, extent);
 	}
-	malloc_mutex_lock(tsdn, &arena->lock);
 
 	if (config_stats) {
 		arena->stats.nmadvise += nmadvise;
@@ -1579,49 +803,12 @@ arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	return (npurged);
 }
 
-static void
-arena_unstash_purged(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
-    arena_runs_dirty_link_t *purge_runs_sentinel,
-    extent_t *purge_chunks_sentinel)
-{
-	arena_runs_dirty_link_t *rdelm, *rdelm_next;
-	extent_t *chunkselm;
-
-	/* Deallocate chunks/runs. */
-	for (rdelm = qr_next(purge_runs_sentinel, rd_link),
-	    chunkselm = qr_next(purge_chunks_sentinel, cc_link);
-	    rdelm != purge_runs_sentinel; rdelm = rdelm_next) {
-		rdelm_next = qr_next(rdelm, rd_link);
-		if (rdelm == &chunkselm->rd) {
-			extent_t *chunkselm_next = qr_next(chunkselm, cc_link);
-			extent_dirty_remove(chunkselm);
-			chunk_dalloc_wrapper(tsdn, arena, chunk_hooks,
-			    chunkselm);
-			chunkselm = chunkselm_next;
-		} else {
-			extent_t *extent = iealloc(tsdn, rdelm);
-			arena_chunk_t *chunk =
-			    (arena_chunk_t *)extent_base_get(extent);
-			arena_chunk_map_misc_t *miscelm =
-			    arena_rd_to_miscelm(extent, rdelm);
-			size_t pageind = arena_miscelm_to_pageind(extent,
-			    miscelm);
-			bool decommitted = (arena_mapbits_decommitted_get(chunk,
-			    pageind) != 0);
-			arena_run_t *run = &miscelm->run;
-			qr_remove(rdelm, rd_link);
-			arena_run_dalloc(tsdn, arena, extent, run, false, true,
-			    decommitted);
-		}
-	}
-}
-
 /*
  * NB: ndirty_limit is interpreted differently depending on opt_purge:
- *   - purge_mode_ratio: Purge as few dirty run/chunks as possible to reach the
+ *   - purge_mode_ratio: Purge as few dirty extents as possible to reach the
  *                       desired state:
  *                       (arena->ndirty <= ndirty_limit)
- *   - purge_mode_decay: Purge as many dirty runs/chunks as possible without
+ *   - purge_mode_decay: Purge as many dirty extents as possible without
  *                       violating the invariant:
  *                       (arena->ndirty >= ndirty_limit)
  */
@@ -1630,8 +817,7 @@ arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, size_t ndirty_limit)
 {
 	chunk_hooks_t chunk_hooks = chunk_hooks_get(tsdn, arena);
 	size_t npurge, npurged;
-	arena_runs_dirty_link_t purge_runs_sentinel;
-	extent_t purge_chunks_sentinel;
+	extent_t purge_extents_sentinel;
 
 	arena->purging = true;
 
@@ -1646,19 +832,16 @@ arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, size_t ndirty_limit)
 	assert(opt_purge != purge_mode_ratio || (arena->nactive >>
 	    arena->lg_dirty_mult) < arena->ndirty || ndirty_limit == 0);
 
-	qr_new(&purge_runs_sentinel, rd_link);
-	extent_init(&purge_chunks_sentinel, arena, NULL, 0, 0, false, false,
-	    false, false, false);
+	extent_init(&purge_extents_sentinel, arena, NULL, 0, 0, false, false,
+	    false, false);
 
 	npurge = arena_stash_dirty(tsdn, arena, &chunk_hooks, ndirty_limit,
-	    &purge_runs_sentinel, &purge_chunks_sentinel);
+	    &purge_extents_sentinel);
 	if (npurge == 0)
 		goto label_return;
 	npurged = arena_purge_stashed(tsdn, arena, &chunk_hooks,
-	    &purge_runs_sentinel, &purge_chunks_sentinel);
+	    &purge_extents_sentinel);
 	assert(npurged == npurge);
-	arena_unstash_purged(tsdn, arena, &chunk_hooks, &purge_runs_sentinel,
-	    &purge_chunks_sentinel);
 
 	if (config_stats)
 		arena->stats.npurge++;
@@ -1679,6 +862,15 @@ arena_purge(tsdn_t *tsdn, arena_t *arena, bool all)
 	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
+static void
+arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *slab)
+{
+	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
+
+	arena_nactive_sub(arena, extent_size_get(slab) >> LG_PAGE);
+	arena_chunk_cache_dalloc_locked(tsdn, arena, &chunk_hooks, slab);
+}
+
 void
 arena_reset(tsd_t *tsd, arena_t *arena)
 {
@@ -1724,367 +916,225 @@ arena_reset(tsd_t *tsd, arena_t *arena)
 
 	/* Bins. */
 	for (i = 0; i < NBINS; i++) {
+		extent_t *slab, *next;
 		arena_bin_t *bin = &arena->bins[i];
 		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
-		bin->runcur = NULL;
-		arena_run_heap_new(&bin->runs);
+		if (bin->slabcur != NULL) {
+			arena_slab_dalloc(tsd_tsdn(tsd), arena, bin->slabcur);
+			bin->slabcur = NULL;
+		}
+		while ((slab = extent_heap_remove_first(&bin->slabs_nonfull)) !=
+		    NULL)
+			arena_slab_dalloc(tsd_tsdn(tsd), arena, slab);
+		for (slab = qr_next(&bin->slabs_full, qr_link); slab !=
+		    &bin->slabs_full; slab = next) {
+			next = qr_next(slab, qr_link);
+			arena_slab_dalloc(tsd_tsdn(tsd), arena, slab);
+		}
 		if (config_stats) {
 			bin->stats.curregs = 0;
-			bin->stats.curruns = 0;
+			bin->stats.curslabs = 0;
 		}
 		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 	}
 
-	/*
-	 * Re-initialize runs_dirty such that the chunks_cache and runs_dirty
-	 * chains directly correspond.
-	 */
-	qr_new(&arena->runs_dirty, rd_link);
-	for (extent = qr_next(&arena->chunks_cache, cc_link);
-	    extent != &arena->chunks_cache; extent = qr_next(extent, cc_link)) {
-		qr_new(&extent->rd, rd_link);
-		qr_meld(&arena->runs_dirty, &extent->rd, rd_link);
-	}
-
-	/* Arena chunks. */
-	for (extent = ql_last(&arena->achunks, ql_link); extent != NULL; extent
-	    = ql_last(&arena->achunks, ql_link)) {
-		ql_remove(&arena->achunks, extent, ql_link);
-		arena_chunk_discard(tsd_tsdn(tsd), arena, extent);
-	}
-
-	/* Spare. */
-	if (arena->spare != NULL) {
-		arena_chunk_discard(tsd_tsdn(tsd), arena, arena->spare);
-		arena->spare = NULL;
-	}
-
 	assert(!arena->purging);
 	arena->nactive = 0;
 
-	for (i = 0; i < sizeof(arena->runs_avail) / sizeof(arena_run_heap_t);
-	    i++)
-		arena_run_heap_new(&arena->runs_avail[i]);
-
 	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock);
 }
 
 static void
-arena_run_coalesce(arena_t *arena, extent_t *extent, size_t *p_size,
-    size_t *p_run_ind, size_t *p_run_pages, size_t flag_dirty,
-    size_t flag_decommitted)
+arena_bin_slabs_nonfull_insert(arena_bin_t *bin, extent_t *slab)
 {
-	arena_chunk_t *chunk = (arena_chunk_t *)extent_base_get(extent);
-	size_t size = *p_size;
-	size_t run_ind = *p_run_ind;
-	size_t run_pages = *p_run_pages;
 
-	/* Try to coalesce forward. */
-	if (run_ind + run_pages < chunk_npages &&
-	    arena_mapbits_allocated_get(chunk, run_ind+run_pages) == 0 &&
-	    arena_mapbits_dirty_get(chunk, run_ind+run_pages) == flag_dirty &&
-	    arena_mapbits_decommitted_get(chunk, run_ind+run_pages) ==
-	    flag_decommitted) {
-		size_t nrun_size = arena_mapbits_unallocated_size_get(chunk,
-		    run_ind+run_pages);
-		size_t nrun_pages = nrun_size >> LG_PAGE;
-
-		/*
-		 * Remove successor from runs_avail; the coalesced run is
-		 * inserted later.
-		 */
-		assert(arena_mapbits_unallocated_size_get(chunk,
-		    run_ind+run_pages+nrun_pages-1) == nrun_size);
-		assert(arena_mapbits_dirty_get(chunk,
-		    run_ind+run_pages+nrun_pages-1) == flag_dirty);
-		assert(arena_mapbits_decommitted_get(chunk,
-		    run_ind+run_pages+nrun_pages-1) == flag_decommitted);
-		arena_avail_remove(arena, extent, run_ind+run_pages,
-		    nrun_pages);
-
-		/*
-		 * If the successor is dirty, remove it from the set of dirty
-		 * pages.
-		 */
-		if (flag_dirty != 0) {
-			arena_run_dirty_remove(arena, chunk, run_ind+run_pages,
-			    nrun_pages);
-		}
-
-		size += nrun_size;
-		run_pages += nrun_pages;
-
-		arena_mapbits_unallocated_size_set(chunk, run_ind, size);
-		arena_mapbits_unallocated_size_set(chunk, run_ind+run_pages-1,
-		    size);
-	}
-
-	/* Try to coalesce backward. */
-	if (run_ind > map_bias && arena_mapbits_allocated_get(chunk,
-	    run_ind-1) == 0 && arena_mapbits_dirty_get(chunk, run_ind-1) ==
-	    flag_dirty && arena_mapbits_decommitted_get(chunk, run_ind-1) ==
-	    flag_decommitted) {
-		size_t prun_size = arena_mapbits_unallocated_size_get(chunk,
-		    run_ind-1);
-		size_t prun_pages = prun_size >> LG_PAGE;
-
-		run_ind -= prun_pages;
-
-		/*
-		 * Remove predecessor from runs_avail; the coalesced run is
-		 * inserted later.
-		 */
-		assert(arena_mapbits_unallocated_size_get(chunk, run_ind) ==
-		    prun_size);
-		assert(arena_mapbits_dirty_get(chunk, run_ind) == flag_dirty);
-		assert(arena_mapbits_decommitted_get(chunk, run_ind) ==
-		    flag_decommitted);
-		arena_avail_remove(arena, extent, run_ind, prun_pages);
-
-		/*
-		 * If the predecessor is dirty, remove it from the set of dirty
-		 * pages.
-		 */
-		if (flag_dirty != 0) {
-			arena_run_dirty_remove(arena, chunk, run_ind,
-			    prun_pages);
-		}
-
-		size += prun_size;
-		run_pages += prun_pages;
-
-		arena_mapbits_unallocated_size_set(chunk, run_ind, size);
-		arena_mapbits_unallocated_size_set(chunk, run_ind+run_pages-1,
-		    size);
-	}
-
-	*p_size = size;
-	*p_run_ind = run_ind;
-	*p_run_pages = run_pages;
-}
-
-static size_t
-arena_run_size_get(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
-    size_t run_ind)
-{
-	size_t size;
-
-	assert(run_ind >= map_bias);
-	assert(run_ind < chunk_npages);
-
-	if (arena_mapbits_large_get(chunk, run_ind) != 0) {
-		size = arena_mapbits_large_size_get(chunk, run_ind);
-		assert(size == PAGE || arena_mapbits_large_size_get(chunk,
-		    run_ind+(size>>LG_PAGE)-1) == 0);
-	} else {
-		const arena_bin_info_t *bin_info = &arena_bin_info[run->binind];
-		size = bin_info->run_size;
-	}
-
-	return (size);
+	assert(extent_slab_data_get(slab)->nfree > 0);
+	extent_heap_insert(&bin->slabs_nonfull, slab);
 }
 
 static void
-arena_run_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
-    arena_run_t *run, bool dirty, bool cleaned, bool decommitted)
+arena_bin_slabs_nonfull_remove(arena_bin_t *bin, extent_t *slab)
 {
-	arena_chunk_t *chunk;
-	arena_chunk_map_misc_t *miscelm;
-	size_t size, run_ind, run_pages, flag_dirty, flag_decommitted;
 
-	chunk = (arena_chunk_t *)extent_base_get(extent);
-	miscelm = arena_run_to_miscelm(extent, run);
-	run_ind = arena_miscelm_to_pageind(extent, miscelm);
-	assert(run_ind >= map_bias);
-	assert(run_ind < chunk_npages);
-	size = arena_run_size_get(arena, chunk, run, run_ind);
-	run_pages = (size >> LG_PAGE);
-	arena_nactive_sub(arena, run_pages);
-
-	/*
-	 * The run is dirty if the caller claims to have dirtied it, as well as
-	 * if it was already dirty before being allocated and the caller
-	 * doesn't claim to have cleaned it.
-	 */
-	assert(arena_mapbits_dirty_get(chunk, run_ind) ==
-	    arena_mapbits_dirty_get(chunk, run_ind+run_pages-1));
-	if (!cleaned && !decommitted && arena_mapbits_dirty_get(chunk, run_ind)
-	    != 0)
-		dirty = true;
-	flag_dirty = dirty ? CHUNK_MAP_DIRTY : 0;
-	flag_decommitted = decommitted ? CHUNK_MAP_DECOMMITTED : 0;
-
-	/* Mark pages as unallocated in the chunk map. */
-	if (dirty || decommitted) {
-		size_t flags = flag_dirty | flag_decommitted;
-		arena_mapbits_unallocated_set(chunk, run_ind, size, flags);
-		arena_mapbits_unallocated_set(chunk, run_ind+run_pages-1, size,
-		    flags);
-	} else {
-		arena_mapbits_unallocated_set(chunk, run_ind, size,
-		    arena_mapbits_unzeroed_get(chunk, run_ind));
-		arena_mapbits_unallocated_set(chunk, run_ind+run_pages-1, size,
-		    arena_mapbits_unzeroed_get(chunk, run_ind+run_pages-1));
-	}
-
-	arena_run_coalesce(arena, extent, &size, &run_ind, &run_pages,
-	    flag_dirty, flag_decommitted);
-
-	/* Insert into runs_avail, now that coalescing is complete. */
-	assert(arena_mapbits_unallocated_size_get(chunk, run_ind) ==
-	    arena_mapbits_unallocated_size_get(chunk, run_ind+run_pages-1));
-	assert(arena_mapbits_dirty_get(chunk, run_ind) ==
-	    arena_mapbits_dirty_get(chunk, run_ind+run_pages-1));
-	assert(arena_mapbits_decommitted_get(chunk, run_ind) ==
-	    arena_mapbits_decommitted_get(chunk, run_ind+run_pages-1));
-	arena_avail_insert(arena, extent, run_ind, run_pages);
-
-	if (dirty)
-		arena_run_dirty_insert(arena, chunk, run_ind, run_pages);
-
-	/* Deallocate chunk if it is now completely unused. */
-	if (size == arena_maxrun) {
-		assert(run_ind == map_bias);
-		assert(run_pages == (arena_maxrun >> LG_PAGE));
-		arena_chunk_dalloc(tsdn, arena, extent);
-	}
-
-	/*
-	 * It is okay to do dirty page processing here even if the chunk was
-	 * deallocated above, since in that case it is the spare.  Waiting
-	 * until after possible chunk deallocation to do dirty processing
-	 * allows for an old spare to be fully deallocated, thus decreasing the
-	 * chances of spuriously crossing the dirty page purging threshold.
-	 */
-	if (dirty)
-		arena_maybe_purge(tsdn, arena);
+	extent_heap_remove(&bin->slabs_nonfull, slab);
 }
 
-static void
-arena_bin_runs_insert(arena_bin_t *bin, extent_t *extent, arena_run_t *run)
+static extent_t *
+arena_bin_slabs_nonfull_tryget(arena_bin_t *bin)
 {
-	arena_chunk_map_misc_t *miscelm = arena_run_to_miscelm(extent, run);
-
-	arena_run_heap_insert(&bin->runs, miscelm);
-}
-
-static arena_run_t *
-arena_bin_nonfull_run_tryget(arena_bin_t *bin)
-{
-	arena_chunk_map_misc_t *miscelm;
-
-	miscelm = arena_run_heap_remove_first(&bin->runs);
-	if (miscelm == NULL)
+	extent_t *slab = extent_heap_remove_first(&bin->slabs_nonfull);
+	if (slab == NULL)
 		return (NULL);
 	if (config_stats)
-		bin->stats.reruns++;
-
-	return (&miscelm->run);
+		bin->stats.reslabs++;
+	return (slab);
 }
 
-static arena_run_t *
-arena_bin_nonfull_run_get(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin)
+static void
+arena_bin_slabs_full_insert(arena_bin_t *bin, extent_t *slab)
 {
-	arena_run_t *run;
-	szind_t binind;
+
+	assert(extent_slab_data_get(slab)->nfree == 0);
+	extent_ring_insert(&bin->slabs_full, slab);
+}
+
+static void
+arena_bin_slabs_full_remove(extent_t *slab)
+{
+
+	extent_ring_remove(slab);
+}
+
+static extent_t *
+arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
+    const arena_bin_info_t *bin_info)
+{
+	extent_t *slab;
+	bool zero, commit;
+
+	zero = false;
+	commit = true;
+	malloc_mutex_unlock(tsdn, &arena->lock);
+	slab = chunk_alloc_wrapper(tsdn, arena, chunk_hooks, NULL,
+	    bin_info->slab_size, 0, PAGE, &zero, &commit, true);
+	malloc_mutex_lock(tsdn, &arena->lock);
+
+	return (slab);
+}
+
+static extent_t *
+arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind,
+    const arena_bin_info_t *bin_info)
+{
+	extent_t *slab;
+	arena_slab_data_t *slab_data;
+	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
+	bool zero;
+
+	zero = false;
+	slab = arena_chunk_cache_alloc_locked(tsdn, arena, &chunk_hooks, NULL,
+	    bin_info->slab_size, 0, PAGE, &zero, true);
+	if (slab == NULL) {
+		slab = arena_slab_alloc_hard(tsdn, arena, &chunk_hooks,
+		    bin_info);
+		if (slab == NULL)
+			return (NULL);
+	}
+	assert(extent_slab_get(slab));
+
+	arena_nactive_add(arena, extent_size_get(slab) >> LG_PAGE);
+
+	/* Initialize slab internals. */
+	slab_data = extent_slab_data_get(slab);
+	slab_data->binind = binind;
+	slab_data->nfree = bin_info->nregs;
+	bitmap_init(slab_data->bitmap, &bin_info->bitmap_info);
+
+	if (config_stats)
+		arena->stats.mapped += extent_size_get(slab);
+
+	return (slab);
+}
+
+static extent_t *
+arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin,
+    szind_t binind)
+{
+	extent_t *slab;
 	const arena_bin_info_t *bin_info;
 
-	/* Look for a usable run. */
-	run = arena_bin_nonfull_run_tryget(bin);
-	if (run != NULL)
-		return (run);
-	/* No existing runs have any space available. */
+	/* Look for a usable slab. */
+	slab = arena_bin_slabs_nonfull_tryget(bin);
+	if (slab != NULL)
+		return (slab);
+	/* No existing slabs have any space available. */
 
-	binind = arena_bin_index(arena, bin);
 	bin_info = &arena_bin_info[binind];
 
-	/* Allocate a new run. */
+	/* Allocate a new slab. */
 	malloc_mutex_unlock(tsdn, &bin->lock);
 	/******************************/
 	malloc_mutex_lock(tsdn, &arena->lock);
-	run = arena_run_alloc_small(tsdn, arena, bin_info->run_size, binind);
-	if (run != NULL) {
-		/* Initialize run internals. */
-		run->binind = binind;
-		run->nfree = bin_info->nregs;
-		bitmap_init(run->bitmap, &bin_info->bitmap_info);
-	}
+	slab = arena_slab_alloc(tsdn, arena, binind, bin_info);
 	malloc_mutex_unlock(tsdn, &arena->lock);
 	/********************************/
 	malloc_mutex_lock(tsdn, &bin->lock);
-	if (run != NULL) {
+	if (slab != NULL) {
 		if (config_stats) {
-			bin->stats.nruns++;
-			bin->stats.curruns++;
+			bin->stats.nslabs++;
+			bin->stats.curslabs++;
 		}
-		return (run);
+		return (slab);
 	}
 
 	/*
-	 * arena_run_alloc_small() failed, but another thread may have made
+	 * arena_slab_alloc() failed, but another thread may have made
 	 * sufficient memory available while this one dropped bin->lock above,
 	 * so search one more time.
 	 */
-	run = arena_bin_nonfull_run_tryget(bin);
-	if (run != NULL)
-		return (run);
+	slab = arena_bin_slabs_nonfull_tryget(bin);
+	if (slab != NULL)
+		return (slab);
 
 	return (NULL);
 }
 
-/* Re-fill bin->runcur, then call arena_run_reg_alloc(). */
+/* Re-fill bin->slabcur, then call arena_slab_reg_alloc(). */
 static void *
-arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin)
+arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin,
+    szind_t binind)
 {
-	szind_t binind;
 	const arena_bin_info_t *bin_info;
-	arena_run_t *run;
+	extent_t *slab;
+
 
-	binind = arena_bin_index(arena, bin);
 	bin_info = &arena_bin_info[binind];
-	bin->runcur = NULL;
-	run = arena_bin_nonfull_run_get(tsdn, arena, bin);
-	if (bin->runcur != NULL && bin->runcur->nfree > 0) {
+	if (bin->slabcur != NULL) {
+		arena_bin_slabs_full_insert(bin, bin->slabcur);
+		bin->slabcur = NULL;
+	}
+	slab = arena_bin_nonfull_slab_get(tsdn, arena, bin, binind);
+	if (bin->slabcur != NULL) {
 		/*
-		 * Another thread updated runcur while this one ran without the
-		 * bin lock in arena_bin_nonfull_run_get().
+		 * Another thread updated slabcur while this one ran without the
+		 * bin lock in arena_bin_nonfull_slab_get().
 		 */
-		void *ret;
-		assert(bin->runcur->nfree > 0);
-		ret = arena_run_reg_alloc(tsdn, bin->runcur, bin_info);
-		if (run != NULL) {
-			extent_t *extent;
-			arena_chunk_t *chunk;
-
-			/*
-			 * arena_run_alloc_small() may have allocated run, or
-			 * it may have pulled run from the bin's run tree.
-			 * Therefore it is unsafe to make any assumptions about
-			 * how run has previously been used, and
-			 * arena_bin_lower_run() must be called, as if a region
-			 * were just deallocated from the run.
-			 */
-			extent = iealloc(tsdn, run);
-			chunk = (arena_chunk_t *)extent_base_get(extent);
-			if (run->nfree == bin_info->nregs) {
-				arena_dalloc_bin_run(tsdn, arena, chunk, extent,
-				    run, bin);
-			} else {
-				arena_bin_lower_run(tsdn, arena, extent, run,
-				    bin);
+		if (extent_slab_data_get(bin->slabcur)->nfree > 0) {
+			void *ret = arena_slab_reg_alloc(tsdn, bin->slabcur,
+			    bin_info);
+			if (slab != NULL) {
+				/*
+				 * arena_slab_alloc() may have allocated slab,
+				 * or it may have been pulled from
+				 * slabs_nonfull.  Therefore it is unsafe to
+				 * make any assumptions about how slab has
+				 * previously been used, and
+				 * arena_bin_lower_slab() must be called, as if
+				 * a region were just deallocated from the slab.
+				 */
+				if (extent_slab_data_get(slab)->nfree ==
+				    bin_info->nregs) {
+					arena_dalloc_bin_slab(tsdn, arena, slab,
+					    bin);
+				} else {
+					arena_bin_lower_slab(tsdn, arena, slab,
+					    bin);
+				}
 			}
+			return (ret);
 		}
-		return (ret);
+
+		arena_bin_slabs_full_insert(bin, bin->slabcur);
+		bin->slabcur = NULL;
 	}
 
-	if (run == NULL)
+	if (slab == NULL)
 		return (NULL);
+	bin->slabcur = slab;
 
-	bin->runcur = run;
+	assert(extent_slab_data_get(bin->slabcur)->nfree > 0);
 
-	assert(bin->runcur->nfree > 0);
-
-	return (arena_run_reg_alloc(tsdn, bin->runcur, bin_info));
+	return (arena_slab_reg_alloc(tsdn, slab, bin_info));
 }
 
 void
@@ -2102,13 +1152,14 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_bin_t *tbin,
 	malloc_mutex_lock(tsdn, &bin->lock);
 	for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >>
 	    tbin->lg_fill_div); i < nfill; i++) {
-		arena_run_t *run;
+		extent_t *slab;
 		void *ptr;
-		if ((run = bin->runcur) != NULL && run->nfree > 0) {
-			ptr = arena_run_reg_alloc(tsdn, run,
+		if ((slab = bin->slabcur) != NULL &&
+		    extent_slab_data_get(slab)->nfree > 0) {
+			ptr = arena_slab_reg_alloc(tsdn, slab,
 			    &arena_bin_info[binind]);
 		} else
-			ptr = arena_bin_malloc_hard(tsdn, arena, bin);
+			ptr = arena_bin_malloc_hard(tsdn, arena, bin, binind);
 		if (ptr == NULL) {
 			/*
 			 * OOM.  tbin->avail isn't yet filled down to its first
@@ -2171,17 +1222,18 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero)
 	void *ret;
 	arena_bin_t *bin;
 	size_t usize;
-	arena_run_t *run;
+	extent_t *slab;
 
 	assert(binind < NBINS);
 	bin = &arena->bins[binind];
 	usize = index2size(binind);
 
 	malloc_mutex_lock(tsdn, &bin->lock);
-	if ((run = bin->runcur) != NULL && run->nfree > 0)
-		ret = arena_run_reg_alloc(tsdn, run, &arena_bin_info[binind]);
+	if ((slab = bin->slabcur) != NULL && extent_slab_data_get(slab)->nfree >
+	    0)
+		ret = arena_slab_reg_alloc(tsdn, slab, &arena_bin_info[binind]);
 	else
-		ret = arena_bin_malloc_hard(tsdn, arena, bin);
+		ret = arena_bin_malloc_hard(tsdn, arena, bin, binind);
 
 	if (ret == NULL) {
 		malloc_mutex_unlock(tsdn, &bin->lock);
@@ -2242,7 +1294,7 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 
 	if (usize <= SMALL_MAXCLASS && (alignment < PAGE || (alignment == PAGE
 	    && (usize & PAGE_MASK) == 0))) {
-		/* Small; alignment doesn't require special run placement. */
+		/* Small; alignment doesn't require special slab placement. */
 		ret = arena_malloc(tsdn, arena, usize, size2index(usize), zero,
 		    tcache, true);
 	} else {
@@ -2315,97 +1367,92 @@ arena_dalloc_promoted(tsdn_t *tsdn, extent_t *extent, void *ptr,
 }
 
 static void
-arena_dissociate_bin_run(extent_t *extent, arena_run_t *run, arena_bin_t *bin)
+arena_dissociate_bin_slab(extent_t *slab, arena_bin_t *bin)
 {
 
-	/* Dissociate run from bin. */
-	if (run == bin->runcur)
-		bin->runcur = NULL;
+	/* Dissociate slab from bin. */
+	if (slab == bin->slabcur)
+		bin->slabcur = NULL;
 	else {
-		szind_t binind = arena_bin_index(extent_arena_get(extent), bin);
+		szind_t binind = extent_slab_data_get(slab)->binind;
 		const arena_bin_info_t *bin_info = &arena_bin_info[binind];
 
 		/*
 		 * The following block's conditional is necessary because if the
-		 * run only contains one region, then it never gets inserted
-		 * into the non-full runs tree.
+		 * slab only contains one region, then it never gets inserted
+		 * into the non-full slabs heap.
 		 */
-		if (bin_info->nregs != 1) {
-			arena_chunk_map_misc_t *miscelm =
-			    arena_run_to_miscelm(extent, run);
-
-			arena_run_heap_remove(&bin->runs, miscelm);
-		}
+		if (bin_info->nregs == 1)
+			arena_bin_slabs_full_remove(slab);
+		else
+			arena_bin_slabs_nonfull_remove(bin, slab);
 	}
 }
 
 static void
-arena_dalloc_bin_run(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
-    extent_t *extent, arena_run_t *run, arena_bin_t *bin)
+arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
+    arena_bin_t *bin)
 {
 
-	assert(run != bin->runcur);
+	assert(slab != bin->slabcur);
 
 	malloc_mutex_unlock(tsdn, &bin->lock);
 	/******************************/
 	malloc_mutex_lock(tsdn, &arena->lock);
-	arena_run_dalloc(tsdn, arena, extent, run, true, false, false);
+	arena_slab_dalloc(tsdn, arena, slab);
 	malloc_mutex_unlock(tsdn, &arena->lock);
 	/****************************/
 	malloc_mutex_lock(tsdn, &bin->lock);
 	if (config_stats)
-		bin->stats.curruns--;
+		bin->stats.curslabs--;
 }
 
 static void
-arena_bin_lower_run(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
-    arena_run_t *run, arena_bin_t *bin)
+arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
+    arena_bin_t *bin)
 {
 
+	assert(extent_slab_data_get(slab)->nfree > 0);
+
 	/*
-	 * Make sure that if bin->runcur is non-NULL, it refers to the lowest
-	 * non-full run.  It is okay to NULL runcur out rather than proactively
-	 * keeping it pointing at the lowest non-full run.
+	 * Make sure that if bin->slabcur is non-NULL, it refers to the lowest
+	 * non-full slab.  It is okay to NULL slabcur out rather than
+	 * proactively keeping it pointing at the lowest non-full slab.
 	 */
-	if ((uintptr_t)run < (uintptr_t)bin->runcur) {
-		/* Switch runcur. */
-		if (bin->runcur->nfree > 0) {
-			arena_bin_runs_insert(bin, iealloc(tsdn, bin->runcur),
-			    bin->runcur);
-		}
-		bin->runcur = run;
+	if (bin->slabcur != NULL && (uintptr_t)extent_addr_get(slab) <
+	    (uintptr_t)extent_addr_get(bin->slabcur)) {
+		/* Switch slabcur. */
+		if (extent_slab_data_get(bin->slabcur)->nfree > 0)
+			arena_bin_slabs_nonfull_insert(bin, bin->slabcur);
+		else
+			arena_bin_slabs_full_insert(bin, bin->slabcur);
+		bin->slabcur = slab;
 		if (config_stats)
-			bin->stats.reruns++;
+			bin->stats.reslabs++;
 	} else
-		arena_bin_runs_insert(bin, extent, run);
+		arena_bin_slabs_nonfull_insert(bin, slab);
 }
 
 static void
-arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
-    extent_t *extent, void *ptr, arena_chunk_map_bits_t *bitselm, bool junked)
+arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
+    void *ptr, bool junked)
 {
-	size_t pageind, rpages_ind;
-	arena_run_t *run;
-	arena_bin_t *bin;
-	const arena_bin_info_t *bin_info;
-	szind_t binind;
-
-	pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> LG_PAGE;
-	rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, pageind);
-	run = &arena_miscelm_get_mutable(chunk, rpages_ind)->run;
-	binind = run->binind;
-	bin = &arena->bins[binind];
-	bin_info = &arena_bin_info[binind];
+	arena_slab_data_t *slab_data = extent_slab_data_get(slab);
+	szind_t binind = slab_data->binind;
+	arena_bin_t *bin = &arena->bins[binind];
+	const arena_bin_info_t *bin_info = &arena_bin_info[binind];
 
 	if (!junked && config_fill && unlikely(opt_junk_free))
 		arena_dalloc_junk_small(ptr, bin_info);
 
-	arena_run_reg_dalloc(tsdn, run, extent, ptr);
-	if (run->nfree == bin_info->nregs) {
-		arena_dissociate_bin_run(extent, run, bin);
-		arena_dalloc_bin_run(tsdn, arena, chunk, extent, run, bin);
-	} else if (run->nfree == 1 && run != bin->runcur)
-		arena_bin_lower_run(tsdn, arena, extent, run, bin);
+	arena_slab_reg_dalloc(tsdn, slab, slab_data, ptr);
+	if (slab_data->nfree == bin_info->nregs) {
+		arena_dissociate_bin_slab(slab, bin);
+		arena_dalloc_bin_slab(tsdn, arena, slab, bin);
+	} else if (slab_data->nfree == 1 && slab != bin->slabcur) {
+		arena_bin_slabs_full_remove(slab);
+		arena_bin_lower_slab(tsdn, arena, slab, bin);
+	}
 
 	if (config_stats) {
 		bin->stats.ndalloc++;
@@ -2414,45 +1461,28 @@ arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
 }
 
 void
-arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena,
-    arena_chunk_t *chunk, extent_t *extent, void *ptr,
-    arena_chunk_map_bits_t *bitselm)
+arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+    void *ptr)
 {
 
-	arena_dalloc_bin_locked_impl(tsdn, arena, chunk, extent, ptr, bitselm,
-	    true);
+	arena_dalloc_bin_locked_impl(tsdn, arena, extent, ptr, true);
 }
 
 static void
-arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
-    extent_t *extent, void *ptr, size_t pageind, arena_chunk_map_bits_t *bitselm)
+arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr)
 {
-	arena_run_t *run;
-	arena_bin_t *bin;
-	size_t rpages_ind;
+	arena_bin_t *bin = &arena->bins[extent_slab_data_get(extent)->binind];
 
-	rpages_ind = pageind - arena_mapbits_small_runind_get(chunk, pageind);
-	run = &arena_miscelm_get_mutable(chunk, rpages_ind)->run;
-	bin = &arena->bins[run->binind];
 	malloc_mutex_lock(tsdn, &bin->lock);
-	arena_dalloc_bin_locked_impl(tsdn, arena, chunk, extent, ptr, bitselm,
-	    false);
+	arena_dalloc_bin_locked_impl(tsdn, arena, extent, ptr, false);
 	malloc_mutex_unlock(tsdn, &bin->lock);
 }
 
 void
-arena_dalloc_small(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
-    extent_t *extent, void *ptr, size_t pageind)
+arena_dalloc_small(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr)
 {
-	arena_chunk_map_bits_t *bitselm;
 
-	if (config_debug) {
-		/* arena_ptr_small_binind_get() does extra sanity checking. */
-		assert(arena_ptr_small_binind_get(tsdn, ptr,
-		    arena_mapbits_get(chunk, pageind)) != BININD_INVALID);
-	}
-	bitselm = arena_bitselm_get_mutable(chunk, pageind);
-	arena_dalloc_bin(tsdn, arena, chunk, extent, ptr, pageind, bitselm);
+	arena_dalloc_bin(tsdn, arena, extent, ptr);
 	arena_decay_tick(tsdn, arena);
 }
 
@@ -2682,9 +1712,9 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 			bstats[i].nfills += bin->stats.nfills;
 			bstats[i].nflushes += bin->stats.nflushes;
 		}
-		bstats[i].nruns += bin->stats.nruns;
-		bstats[i].reruns += bin->stats.reruns;
-		bstats[i].curruns += bin->stats.curruns;
+		bstats[i].nslabs += bin->stats.nslabs;
+		bstats[i].reslabs += bin->stats.reslabs;
+		bstats[i].curslabs += bin->stats.curslabs;
 		malloc_mutex_unlock(tsdn, &bin->lock);
 	}
 }
@@ -2745,17 +1775,13 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 
 	arena->dss_prec = chunk_dss_prec_get(tsdn);
 
-	ql_new(&arena->achunks);
-
-	arena->spare = NULL;
-
 	arena->lg_dirty_mult = arena_lg_dirty_mult_default_get();
 	arena->purging = false;
 	arena->nactive = 0;
 	arena->ndirty = 0;
 
-	qr_new(&arena->runs_dirty, rd_link);
-	qr_new(&arena->chunks_cache, cc_link);
+	extent_init(&arena->extents_dirty, arena, NULL, 0, 0, false, false,
+	    false, false);
 
 	if (opt_purge == purge_mode_decay)
 		arena_decay_init(arena, arena_decay_time_default_get());
@@ -2786,52 +1812,23 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 		if (malloc_mutex_init(&bin->lock, "arena_bin",
 		    WITNESS_RANK_ARENA_BIN))
 			return (NULL);
-		bin->runcur = NULL;
-		arena_run_heap_new(&bin->runs);
+		bin->slabcur = NULL;
+		extent_heap_new(&bin->slabs_nonfull);
+		extent_init(&bin->slabs_full, arena, NULL, 0, 0, false, false,
+		    false, false);
 		if (config_stats)
 			memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
 	}
 
-	for (i = 0; i < NPSIZES; i++)
-		arena_run_heap_new(&arena->runs_avail[i]);
-
 	return (arena);
 }
 
 void
 arena_boot(void)
 {
-	unsigned i;
 
 	arena_lg_dirty_mult_default_set(opt_lg_dirty_mult);
 	arena_decay_time_default_set(opt_decay_time);
-
-	/*
-	 * Compute the header size such that it is large enough to contain the
-	 * page map.  The page map is biased to omit entries for the header
-	 * itself, so some iteration is necessary to compute the map bias.
-	 *
-	 * 1) Compute safe header_size and map_bias values that include enough
-	 *    space for an unbiased page map.
-	 * 2) Refine map_bias based on (1) to omit the header pages in the page
-	 *    map.  The resulting map_bias may be one too small.
-	 * 3) Refine map_bias based on (2).  The result will be >= the result
-	 *    from (2), and will always be correct.
-	 */
-	map_bias = 0;
-	for (i = 0; i < 3; i++) {
-		size_t header_size = offsetof(arena_chunk_t, map_bits) +
-		    ((sizeof(arena_chunk_map_bits_t) +
-		    sizeof(arena_chunk_map_misc_t)) * (chunk_npages-map_bias));
-		map_bias = (header_size + PAGE_MASK) >> LG_PAGE;
-	}
-	assert(map_bias > 0);
-
-	map_misc_offset = offsetof(arena_chunk_t, map_bits) +
-	    sizeof(arena_chunk_map_bits_t) * (chunk_npages-map_bias);
-
-	arena_maxrun = chunksize - (map_bias << LG_PAGE);
-	assert(arena_maxrun > 0);
 }
 
 void
diff --git a/src/base.c b/src/base.c
index 134018a8..3807422c 100644
--- a/src/base.c
+++ b/src/base.c
@@ -74,8 +74,7 @@ base_chunk_alloc(tsdn_t *tsdn, size_t minsize)
 			base_resident += PAGE_CEILING(nsize);
 		}
 	}
-	extent_init(extent, NULL, addr, csize, 0, true, false, true, true,
-	    false);
+	extent_init(extent, NULL, addr, csize, 0, true, true, true, false);
 	return (extent);
 }
 
diff --git a/src/chunk.c b/src/chunk.c
index 8c4f741f..e2e9de03 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -558,8 +558,7 @@ chunk_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 		extent_dalloc(tsdn, arena, extent);
 		return (NULL);
 	}
-	extent_init(extent, arena, addr, size, usize, true, false, zero, commit,
-	    slab);
+	extent_init(extent, arena, addr, size, usize, true, zero, commit, slab);
 	if (pad != 0)
 		extent_addr_randomize(tsdn, extent, alignment);
 	if (chunk_register(tsdn, extent)) {
@@ -828,8 +827,8 @@ chunk_split_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 
 		extent_init(&lead, arena, extent_addr_get(extent), size_a,
 		    usize_a, extent_active_get(extent),
-		    extent_dirty_get(extent), extent_zeroed_get(extent),
-		    extent_committed_get(extent), extent_slab_get(extent));
+		    extent_zeroed_get(extent), extent_committed_get(extent),
+		    extent_slab_get(extent));
 
 		if (extent_rtree_acquire(tsdn, &lead, false, true, &lead_elm_a,
 		    &lead_elm_b))
@@ -838,8 +837,8 @@ chunk_split_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 
 	extent_init(trail, arena, (void *)((uintptr_t)extent_base_get(extent) +
 	    size_a), size_b, usize_b, extent_active_get(extent),
-	    extent_dirty_get(extent), extent_zeroed_get(extent),
-	    extent_committed_get(extent), extent_slab_get(extent));
+	    extent_zeroed_get(extent), extent_committed_get(extent),
+	    extent_slab_get(extent));
 	if (extent_rtree_acquire(tsdn, trail, false, true, &trail_elm_a,
 	    &trail_elm_b))
 		goto label_error_c;
diff --git a/src/chunk_dss.c b/src/chunk_dss.c
index e92fda72..f890a5cd 100644
--- a/src/chunk_dss.c
+++ b/src/chunk_dss.c
@@ -121,7 +121,7 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			pad_size = (uintptr_t)ret - (uintptr_t)pad_addr;
 			if (pad_size != 0) {
 				extent_init(pad, arena, pad_addr, pad_size,
-				    pad_size, false, true, false, true, false);
+				    pad_size, false, false, true, false);
 			}
 			dss_next = (void *)((uintptr_t)ret + size);
 			if ((uintptr_t)ret < (uintptr_t)dss_max ||
diff --git a/src/ctl.c b/src/ctl.c
index 26bc1750..34c7e1bd 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -124,7 +124,7 @@ CTL_PROTO(arena_i_chunk_hooks)
 INDEX_PROTO(arena_i)
 CTL_PROTO(arenas_bin_i_size)
 CTL_PROTO(arenas_bin_i_nregs)
-CTL_PROTO(arenas_bin_i_run_size)
+CTL_PROTO(arenas_bin_i_slab_size)
 INDEX_PROTO(arenas_bin_i)
 CTL_PROTO(arenas_hchunk_i_size)
 INDEX_PROTO(arenas_hchunk_i)
@@ -160,9 +160,9 @@ CTL_PROTO(stats_arenas_i_bins_j_nrequests)
 CTL_PROTO(stats_arenas_i_bins_j_curregs)
 CTL_PROTO(stats_arenas_i_bins_j_nfills)
 CTL_PROTO(stats_arenas_i_bins_j_nflushes)
-CTL_PROTO(stats_arenas_i_bins_j_nruns)
-CTL_PROTO(stats_arenas_i_bins_j_nreruns)
-CTL_PROTO(stats_arenas_i_bins_j_curruns)
+CTL_PROTO(stats_arenas_i_bins_j_nslabs)
+CTL_PROTO(stats_arenas_i_bins_j_nreslabs)
+CTL_PROTO(stats_arenas_i_bins_j_curslabs)
 INDEX_PROTO(stats_arenas_i_bins_j)
 CTL_PROTO(stats_arenas_i_hchunks_j_nmalloc)
 CTL_PROTO(stats_arenas_i_hchunks_j_ndalloc)
@@ -300,7 +300,7 @@ static const ctl_indexed_node_t arena_node[] = {
 static const ctl_named_node_t arenas_bin_i_node[] = {
 	{NAME("size"),		CTL(arenas_bin_i_size)},
 	{NAME("nregs"),		CTL(arenas_bin_i_nregs)},
-	{NAME("run_size"),	CTL(arenas_bin_i_run_size)}
+	{NAME("slab_size"),	CTL(arenas_bin_i_slab_size)}
 };
 static const ctl_named_node_t super_arenas_bin_i_node[] = {
 	{NAME(""),		CHILD(named, arenas_bin_i)}
@@ -373,9 +373,9 @@ static const ctl_named_node_t stats_arenas_i_bins_j_node[] = {
 	{NAME("curregs"),	CTL(stats_arenas_i_bins_j_curregs)},
 	{NAME("nfills"),	CTL(stats_arenas_i_bins_j_nfills)},
 	{NAME("nflushes"),	CTL(stats_arenas_i_bins_j_nflushes)},
-	{NAME("nruns"),		CTL(stats_arenas_i_bins_j_nruns)},
-	{NAME("nreruns"),	CTL(stats_arenas_i_bins_j_nreruns)},
-	{NAME("curruns"),	CTL(stats_arenas_i_bins_j_curruns)}
+	{NAME("nslabs"),	CTL(stats_arenas_i_bins_j_nslabs)},
+	{NAME("nreslabs"),	CTL(stats_arenas_i_bins_j_nreslabs)},
+	{NAME("curslabs"),	CTL(stats_arenas_i_bins_j_curslabs)}
 };
 static const ctl_named_node_t super_stats_arenas_i_bins_j_node[] = {
 	{NAME(""),		CHILD(named, stats_arenas_i_bins_j)}
@@ -549,9 +549,10 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats)
 				sstats->bstats[i].nflushes +=
 				    astats->bstats[i].nflushes;
 			}
-			sstats->bstats[i].nruns += astats->bstats[i].nruns;
-			sstats->bstats[i].reruns += astats->bstats[i].reruns;
-			sstats->bstats[i].curruns += astats->bstats[i].curruns;
+			sstats->bstats[i].nslabs += astats->bstats[i].nslabs;
+			sstats->bstats[i].reslabs += astats->bstats[i].reslabs;
+			sstats->bstats[i].curslabs +=
+			    astats->bstats[i].curslabs;
 		}
 
 		for (i = 0; i < NSIZES - NBINS; i++) {
@@ -1801,7 +1802,7 @@ CTL_RO_NL_GEN(arenas_nbins, NBINS, unsigned)
 CTL_RO_NL_CGEN(config_tcache, arenas_nhbins, nhbins, unsigned)
 CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t)
 CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t)
-CTL_RO_NL_GEN(arenas_bin_i_run_size, arena_bin_info[mib[2]].run_size, size_t)
+CTL_RO_NL_GEN(arenas_bin_i_slab_size, arena_bin_info[mib[2]].slab_size, size_t)
 static const ctl_named_node_t *
 arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
 {
@@ -2032,12 +2033,12 @@ CTL_RO_CGEN(config_stats && config_tcache, stats_arenas_i_bins_j_nfills,
     ctl_stats.arenas[mib[2]].bstats[mib[4]].nfills, uint64_t)
 CTL_RO_CGEN(config_stats && config_tcache, stats_arenas_i_bins_j_nflushes,
     ctl_stats.arenas[mib[2]].bstats[mib[4]].nflushes, uint64_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nruns,
-    ctl_stats.arenas[mib[2]].bstats[mib[4]].nruns, uint64_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nreruns,
-    ctl_stats.arenas[mib[2]].bstats[mib[4]].reruns, uint64_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curruns,
-    ctl_stats.arenas[mib[2]].bstats[mib[4]].curruns, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nslabs,
+    ctl_stats.arenas[mib[2]].bstats[mib[4]].nslabs, uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nreslabs,
+    ctl_stats.arenas[mib[2]].bstats[mib[4]].reslabs, uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curslabs,
+    ctl_stats.arenas[mib[2]].bstats[mib[4]].curslabs, size_t)
 
 static const ctl_named_node_t *
 stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
diff --git a/src/huge.c b/src/huge.c
index 5375b59f..5f758140 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -153,8 +153,8 @@ huge_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 			 * Zero the trailing bytes of the original allocation's
 			 * last page, since they are in an indeterminate state.
 			 * There will always be trailing bytes, because ptr's
-			 * offset from the beginning of the run is a multiple of
-			 * CACHELINE in [0 .. PAGE).
+			 * offset from the beginning of the extent is a multiple
+			 * of CACHELINE in [0 .. PAGE).
 			 */
 			void *zbase = (void *)
 			    ((uintptr_t)extent_addr_get(extent) + oldusize);
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 9f8bd01e..429667f6 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1707,28 +1707,30 @@ irealloc_prof_sample(tsd_t *tsd, extent_t *extent, void *old_ptr,
 }
 
 JEMALLOC_ALWAYS_INLINE_C void *
-irealloc_prof(tsd_t *tsd, extent_t *extent, void *old_ptr, size_t old_usize,
+irealloc_prof(tsd_t *tsd, extent_t *old_extent, void *old_ptr, size_t old_usize,
     size_t usize)
 {
 	void *p;
-	extent_t *e;
+	extent_t *extent;
 	bool prof_active;
 	prof_tctx_t *old_tctx, *tctx;
 
 	prof_active = prof_active_get_unlocked();
-	old_tctx = prof_tctx_get(tsd_tsdn(tsd), extent, old_ptr);
+	old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_extent, old_ptr);
 	tctx = prof_alloc_prep(tsd, usize, prof_active, true);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
-		p = irealloc_prof_sample(tsd, extent, old_ptr, old_usize, usize,
-		    tctx);
-	} else
-		p = iralloc(tsd, extent, old_ptr, old_usize, usize, 0, false);
+		p = irealloc_prof_sample(tsd, old_extent, old_ptr, old_usize,
+		    usize, tctx);
+	} else {
+		p = iralloc(tsd, old_extent, old_ptr, old_usize, usize, 0,
+		    false);
+	}
 	if (unlikely(p == NULL)) {
 		prof_alloc_rollback(tsd, tctx, true);
 		return (NULL);
 	}
-	e = (p == old_ptr) ? extent : iealloc(tsd_tsdn(tsd), p);
-	prof_realloc(tsd, e, p, usize, tctx, prof_active, true,
+	extent = (p == old_ptr) ? old_extent : iealloc(tsd_tsdn(tsd), p);
+	prof_realloc(tsd, extent, p, usize, tctx, prof_active, true, old_extent,
 	    old_ptr, old_usize, old_tctx);
 
 	return (p);
@@ -2146,24 +2148,24 @@ irallocx_prof_sample(tsdn_t *tsdn, extent_t *extent, void *old_ptr,
 }
 
 JEMALLOC_ALWAYS_INLINE_C void *
-irallocx_prof(tsd_t *tsd, extent_t *extent, void *old_ptr, size_t old_usize,
+irallocx_prof(tsd_t *tsd, extent_t *old_extent, void *old_ptr, size_t old_usize,
     size_t size, size_t alignment, size_t *usize, bool zero, tcache_t *tcache,
     arena_t *arena)
 {
 	void *p;
-	extent_t *e;
+	extent_t *extent;
 	bool prof_active;
 	prof_tctx_t *old_tctx, *tctx;
 
 	prof_active = prof_active_get_unlocked();
-	old_tctx = prof_tctx_get(tsd_tsdn(tsd), extent, old_ptr);
+	old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_extent, old_ptr);
 	tctx = prof_alloc_prep(tsd, *usize, prof_active, true);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
-		p = irallocx_prof_sample(tsd_tsdn(tsd), extent, old_ptr,
+		p = irallocx_prof_sample(tsd_tsdn(tsd), old_extent, old_ptr,
 		    old_usize, *usize, alignment, zero, tcache, arena, tctx);
 	} else {
-		p = iralloct(tsd_tsdn(tsd), extent, old_ptr, old_usize, size,
-		    alignment, zero, tcache, arena);
+		p = iralloct(tsd_tsdn(tsd), old_extent, old_ptr, old_usize,
+		    size, alignment, zero, tcache, arena);
 	}
 	if (unlikely(p == NULL)) {
 		prof_alloc_rollback(tsd, tctx, true);
@@ -2179,12 +2181,12 @@ irallocx_prof(tsd_t *tsd, extent_t *extent, void *old_ptr, size_t old_usize,
 		 * be the same as the current usize because of in-place large
 		 * reallocation.  Therefore, query the actual value of usize.
 		 */
-		e = extent;
-		*usize = isalloc(tsd_tsdn(tsd), e, p);
+		extent = old_extent;
+		*usize = isalloc(tsd_tsdn(tsd), extent, p);
 	} else
-		e = iealloc(tsd_tsdn(tsd), p);
-	prof_realloc(tsd, e, p, *usize, tctx, prof_active, true, old_ptr,
-	    old_usize, old_tctx);
+		extent = iealloc(tsd_tsdn(tsd), p);
+	prof_realloc(tsd, extent, p, *usize, tctx, prof_active, true,
+	    old_extent, old_ptr, old_usize, old_tctx);
 
 	return (p);
 }
@@ -2338,8 +2340,8 @@ ixallocx_prof(tsd_t *tsd, extent_t *extent, void *ptr, size_t old_usize,
 		prof_alloc_rollback(tsd, tctx, false);
 		return (usize);
 	}
-	prof_realloc(tsd, extent, ptr, usize, tctx, prof_active, false, ptr,
-	    old_usize, old_tctx);
+	prof_realloc(tsd, extent, ptr, usize, tctx, prof_active, false, extent,
+	    ptr, old_usize, old_tctx);
 
 	return (usize);
 }
diff --git a/src/stats.c b/src/stats.c
index 4dc48d5b..599e377d 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -58,29 +58,29 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	if (config_tcache) {
 		malloc_cprintf(write_cb, cbopaque,
 		    "bins:           size ind    allocated      nmalloc"
-		    "      ndalloc    nrequests      curregs      curruns regs"
-		    " pgs  util       nfills     nflushes      newruns"
-		    "       reruns\n");
+		    "      ndalloc    nrequests      curregs     curslabs regs"
+		    " pgs  util       nfills     nflushes     newslabs"
+		    "      reslabs\n");
 	} else {
 		malloc_cprintf(write_cb, cbopaque,
 		    "bins:           size ind    allocated      nmalloc"
-		    "      ndalloc    nrequests      curregs      curruns regs"
-		    " pgs  util      newruns       reruns\n");
+		    "      ndalloc    nrequests      curregs     curslabs regs"
+		    " pgs  util     newslabs      reslabs\n");
 	}
 	CTL_GET("arenas.nbins", &nbins, unsigned);
 	for (j = 0, in_gap = false; j < nbins; j++) {
-		uint64_t nruns;
+		uint64_t nslabs;
 
-		CTL_M2_M4_GET("stats.arenas.0.bins.0.nruns", i, j, &nruns,
+		CTL_M2_M4_GET("stats.arenas.0.bins.0.nslabs", i, j, &nslabs,
 		    uint64_t);
-		if (nruns == 0)
+		if (nslabs == 0)
 			in_gap = true;
 		else {
-			size_t reg_size, run_size, curregs, availregs, milli;
-			size_t curruns;
+			size_t reg_size, slab_size, curregs, availregs, milli;
+			size_t curslabs;
 			uint32_t nregs;
 			uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes;
-			uint64_t reruns;
+			uint64_t reslabs;
 			char util[6]; /* "x.yyy". */
 
 			if (in_gap) {
@@ -90,7 +90,7 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			}
 			CTL_M2_GET("arenas.bin.0.size", j, &reg_size, size_t);
 			CTL_M2_GET("arenas.bin.0.nregs", j, &nregs, uint32_t);
-			CTL_M2_GET("arenas.bin.0.run_size", j, &run_size,
+			CTL_M2_GET("arenas.bin.0.slab_size", j, &slab_size,
 			    size_t);
 			CTL_M2_M4_GET("stats.arenas.0.bins.0.nmalloc", i, j,
 			    &nmalloc, uint64_t);
@@ -106,12 +106,12 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 				CTL_M2_M4_GET("stats.arenas.0.bins.0.nflushes",
 				    i, j, &nflushes, uint64_t);
 			}
-			CTL_M2_M4_GET("stats.arenas.0.bins.0.nreruns", i, j,
-			    &reruns, uint64_t);
-			CTL_M2_M4_GET("stats.arenas.0.bins.0.curruns", i, j,
-			    &curruns, size_t);
+			CTL_M2_M4_GET("stats.arenas.0.bins.0.nreslabs", i, j,
+			    &reslabs, uint64_t);
+			CTL_M2_M4_GET("stats.arenas.0.bins.0.curslabs", i, j,
+			    &curslabs, size_t);
 
-			availregs = nregs * curruns;
+			availregs = nregs * curslabs;
 			milli = (availregs != 0) ? (1000 * curregs) / availregs
 			    : 1000;
 			assert(milli <= 1000);
@@ -134,9 +134,9 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 				    " %12zu %4u %3zu %-5s %12"FMTu64
 				    " %12"FMTu64" %12"FMTu64" %12"FMTu64"\n",
 				    reg_size, j, curregs * reg_size, nmalloc,
-				    ndalloc, nrequests, curregs, curruns, nregs,
-				    run_size / page, util, nfills, nflushes,
-				    nruns, reruns);
+				    ndalloc, nrequests, curregs, curslabs,
+				    nregs, slab_size / page, util, nfills,
+				    nflushes, nslabs, reslabs);
 			} else {
 				malloc_cprintf(write_cb, cbopaque,
 				    "%20zu %3u %12zu %12"FMTu64
@@ -144,8 +144,9 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 				    " %12zu %4u %3zu %-5s %12"FMTu64
 				    " %12"FMTu64"\n",
 				    reg_size, j, curregs * reg_size, nmalloc,
-				    ndalloc, nrequests, curregs, curruns, nregs,
-				    run_size / page, util, nruns, reruns);
+				    ndalloc, nrequests, curregs, curslabs,
+				    nregs, slab_size / page, util, nslabs,
+				    reslabs);
 			}
 		}
 	}
diff --git a/src/tcache.c b/src/tcache.c
index 41074d34..02015227 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -127,14 +127,8 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 
 			extent = iealloc(tsd_tsdn(tsd), ptr);
 			if (extent_arena_get(extent) == bin_arena) {
-				arena_chunk_t *chunk =
-				    (arena_chunk_t *)extent_base_get(extent);
-				size_t pageind = ((uintptr_t)ptr -
-				    (uintptr_t)chunk) >> LG_PAGE;
-				arena_chunk_map_bits_t *bitselm =
-				    arena_bitselm_get_mutable(chunk, pageind);
 				arena_dalloc_bin_junked_locked(tsd_tsdn(tsd),
-				    bin_arena, chunk, extent, ptr, bitselm);
+				    bin_arena, extent, ptr);
 			} else {
 				/*
 				 * This object was allocated via a different
diff --git a/test/unit/extent_quantize.c b/test/unit/extent_quantize.c
index 98c9fde4..a165aece 100644
--- a/test/unit/extent_quantize.c
+++ b/test/unit/extent_quantize.c
@@ -16,7 +16,7 @@ TEST_BEGIN(test_small_extent_size)
 	assert_d_eq(mallctl("arenas.nbins", &nbins, &sz, NULL, 0), 0,
 	    "Unexpected mallctl failure");
 
-	assert_d_eq(mallctlnametomib("arenas.bin.0.run_size", mib, &miblen), 0,
+	assert_d_eq(mallctlnametomib("arenas.bin.0.slab_size", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib failure");
 	for (i = 0; i < nbins; i++) {
 		mib[2] = i;
@@ -71,12 +71,12 @@ TEST_BEGIN(test_huge_extent_size)
 		ceil = extent_size_quantize_ceil(extent_size);
 
 		assert_zu_eq(extent_size, floor,
-		    "Large run quantization should be a no-op for precise "
-		    "size (lextent_size=%zu, extent_size=%zu)", lextent_size,
+		    "Extent quantization should be a no-op for precise size "
+		    "(lextent_size=%zu, extent_size=%zu)", lextent_size,
 		    extent_size);
 		assert_zu_eq(extent_size, ceil,
-		    "Large run quantization should be a no-op for precise "
-		    "size (lextent_size=%zu, extent_size=%zu)", lextent_size,
+		    "Extent quantization should be a no-op for precise size "
+		    "(lextent_size=%zu, extent_size=%zu)", lextent_size,
 		    extent_size);
 
 		if (i > 0) {
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 9ba730a6..872aeaa0 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -615,7 +615,8 @@ TEST_BEGIN(test_arenas_bin_constants)
 
 	TEST_ARENAS_BIN_CONSTANT(size_t, size, arena_bin_info[0].reg_size);
 	TEST_ARENAS_BIN_CONSTANT(uint32_t, nregs, arena_bin_info[0].nregs);
-	TEST_ARENAS_BIN_CONSTANT(size_t, run_size, arena_bin_info[0].run_size);
+	TEST_ARENAS_BIN_CONSTANT(size_t, slab_size,
+	    arena_bin_info[0].slab_size);
 
 #undef TEST_ARENAS_BIN_CONSTANT
 }
diff --git a/test/unit/stats.c b/test/unit/stats.c
index b0e318a5..f524c005 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -229,9 +229,9 @@ TEST_BEGIN(test_stats_arenas_bins)
 {
 	unsigned arena;
 	void *p;
-	size_t sz, curruns, curregs;
+	size_t sz, curslabs, curregs;
 	uint64_t epoch, nmalloc, ndalloc, nrequests, nfills, nflushes;
-	uint64_t nruns, nreruns;
+	uint64_t nslabs, nreslabs;
 	int expected = config_stats ? 0 : ENOENT;
 
 	arena = 0;
@@ -266,12 +266,12 @@ TEST_BEGIN(test_stats_arenas_bins)
 	    NULL, 0), config_tcache ? expected : ENOENT,
 	    "Unexpected mallctl() result");
 
-	assert_d_eq(mallctl("stats.arenas.0.bins.0.nruns", &nruns, &sz,
+	assert_d_eq(mallctl("stats.arenas.0.bins.0.nslabs", &nslabs, &sz,
 	    NULL, 0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.bins.0.nreruns", &nreruns, &sz,
+	assert_d_eq(mallctl("stats.arenas.0.bins.0.nreslabs", &nreslabs, &sz,
 	    NULL, 0), expected, "Unexpected mallctl() result");
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.arenas.0.bins.0.curruns", &curruns, &sz,
+	assert_d_eq(mallctl("stats.arenas.0.bins.0.curslabs", &curslabs, &sz,
 	    NULL, 0), expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
@@ -289,10 +289,10 @@ TEST_BEGIN(test_stats_arenas_bins)
 			assert_u64_gt(nflushes, 0,
 			    "At least one flush should have occurred");
 		}
-		assert_u64_gt(nruns, 0,
-		    "At least one run should have been allocated");
-		assert_zu_gt(curruns, 0,
-		    "At least one run should be currently allocated");
+		assert_u64_gt(nslabs, 0,
+		    "At least one slab should have been allocated");
+		assert_zu_gt(curslabs, 0,
+		    "At least one slab should be currently allocated");
 	}
 
 	dallocx(p, 0);

From 714d1640f30726a21898b34ac64b8f2fddb502c7 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 31 May 2016 14:14:03 -0700
Subject: [PATCH 0290/2608] Update private symbols.

---
 include/jemalloc/internal/private_symbols.txt | 30 ++++++++++++-------
 src/huge.c                                    |  4 +--
 2 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 676c2431..b8ed4341 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -19,7 +19,6 @@ arena_chunk_ralloc_huge_expand
 arena_chunk_ralloc_huge_shrink
 arena_cleanup
 arena_dalloc
-arena_dalloc_bin
 arena_dalloc_bin_junked_locked
 arena_dalloc_junk_small
 arena_dalloc_promoted
@@ -66,7 +65,6 @@ arena_prof_tctx_reset
 arena_prof_tctx_set
 arena_purge
 arena_ralloc
-arena_ralloc_junk_large
 arena_ralloc_no_move
 arena_reset
 arena_salloc
@@ -181,6 +179,12 @@ extent_before_get
 extent_committed_get
 extent_committed_set
 extent_dalloc
+extent_heap_empty
+extent_heap_first
+extent_heap_insert
+extent_heap_new
+extent_heap_remove
+extent_heap_remove_first
 extent_init
 extent_last_get
 extent_past_get
@@ -190,15 +194,15 @@ extent_retained_get
 extent_ring_insert
 extent_ring_remove
 extent_size_get
-extent_size_set
 extent_size_quantize_ceil
 extent_size_quantize_floor
+extent_size_set
 extent_slab_data_get
 extent_slab_data_get_const
 extent_slab_get
 extent_slab_set
-extent_slab_data_get
 extent_usize_get
+extent_usize_set
 extent_zeroed_get
 extent_zeroed_set
 ffs_llu
@@ -333,6 +337,7 @@ pages_unmap
 pind2sz
 pind2sz_compute
 pind2sz_lookup
+pind2sz_tab
 pow2_ceil_u32
 pow2_ceil_u64
 pow2_ceil_zu
@@ -396,10 +401,6 @@ rtree_child_read_hard
 rtree_child_tryread
 rtree_clear
 rtree_delete
-rtree_new
-rtree_node_alloc
-rtree_node_dalloc
-rtree_node_valid
 rtree_elm_acquire
 rtree_elm_lookup
 rtree_elm_read
@@ -411,6 +412,10 @@ rtree_elm_witness_release
 rtree_elm_witnesses_cleanup
 rtree_elm_write
 rtree_elm_write_acquired
+rtree_new
+rtree_node_alloc
+rtree_node_dalloc
+rtree_node_valid
 rtree_read
 rtree_start_level
 rtree_subkey
@@ -433,17 +438,17 @@ stats_cactive_get
 stats_cactive_sub
 stats_print
 tcache_alloc_easy
-tcache_alloc_large
+tcache_alloc_huge
 tcache_alloc_small
 tcache_alloc_small_hard
 tcache_arena_reassociate
-tcache_bin_flush_large
+tcache_bin_flush_huge
 tcache_bin_flush_small
 tcache_bin_info
 tcache_boot
 tcache_cleanup
 tcache_create
-tcache_dalloc_large
+tcache_dalloc_huge
 tcache_dalloc_small
 tcache_enabled_cleanup
 tcache_enabled_get
@@ -502,6 +507,9 @@ tsd_nominal
 tsd_prof_tdata_get
 tsd_prof_tdata_set
 tsd_prof_tdatap_get
+tsd_rtree_elm_witnesses_get
+tsd_rtree_elm_witnesses_set
+tsd_rtree_elm_witnessesp_get
 tsd_set
 tsd_tcache_enabled_get
 tsd_tcache_enabled_set
diff --git a/src/huge.c b/src/huge.c
index 5f758140..8aa3dfd2 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -62,7 +62,7 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 
 #ifdef JEMALLOC_JET
 #undef huge_dalloc_junk
-#define	huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk_impl)
+#define	huge_dalloc_junk JEMALLOC_N(n_huge_dalloc_junk)
 #endif
 void
 huge_dalloc_junk(void *ptr, size_t usize)
@@ -73,7 +73,7 @@ huge_dalloc_junk(void *ptr, size_t usize)
 #ifdef JEMALLOC_JET
 #undef huge_dalloc_junk
 #define	huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk)
-huge_dalloc_junk_t *huge_dalloc_junk = JEMALLOC_N(huge_dalloc_junk_impl);
+huge_dalloc_junk_t *huge_dalloc_junk = JEMALLOC_N(n_huge_dalloc_junk);
 #endif
 
 static void

From 7d63fed0fd0bb10bd250e40c35558f67f26469cd Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 31 May 2016 14:50:21 -0700
Subject: [PATCH 0291/2608] Rename huge to large.

---
 Makefile.in                                   |   2 +-
 doc/jemalloc.xml.in                           |  74 ++-------
 include/jemalloc/internal/arena.h             |  36 ++---
 include/jemalloc/internal/ctl.h               |   2 +-
 include/jemalloc/internal/extent.h            |   4 +-
 include/jemalloc/internal/huge.h              |  37 -----
 .../jemalloc/internal/jemalloc_internal.h.in  |  26 ++--
 include/jemalloc/internal/large.h             |  37 +++++
 include/jemalloc/internal/private_symbols.txt |  36 ++---
 include/jemalloc/internal/size_classes.sh     |   8 +-
 include/jemalloc/internal/stats.h             |  18 +--
 include/jemalloc/internal/tcache.h            |  24 +--
 include/jemalloc/internal/witness.h           |   2 +-
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |   4 +-
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |  10 +-
 src/arena.c                                   | 143 ++++++++---------
 src/chunk_dss.c                               |   2 +-
 src/ckh.c                                     |   6 +-
 src/ctl.c                                     | 146 +++++++++---------
 src/extent.c                                  |   4 +-
 src/jemalloc.c                                |  32 ++--
 src/{huge.c => large.c}                       | 119 +++++++-------
 src/stats.c                                   |  71 ++++-----
 src/tcache.c                                  |  28 ++--
 test/integration/chunk.c                      |  28 ++--
 test/integration/mallocx.c                    |  24 +--
 test/integration/overflow.c                   |   8 +-
 test/integration/rallocx.c                    |  16 +-
 test/integration/xallocx.c                    | 104 ++++++-------
 test/unit/arena_reset.c                       |  20 +--
 test/unit/decay.c                             |  42 ++---
 test/unit/extent_quantize.c                   |  16 +-
 test/unit/junk.c                              |  16 +-
 test/unit/mallctl.c                           |   8 +-
 test/unit/size_classes.c                      |   8 +-
 test/unit/stats.c                             |  48 +++---
 test/unit/zero.c                              |   4 +-
 37 files changed, 587 insertions(+), 626 deletions(-)
 delete mode 100644 include/jemalloc/internal/huge.h
 create mode 100644 include/jemalloc/internal/large.h
 rename src/{huge.c => large.c} (66%)

diff --git a/Makefile.in b/Makefile.in
index 2e9bbbc2..f90e2a4f 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -88,7 +88,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/ctl.c \
 	$(srcroot)src/extent.c \
 	$(srcroot)src/hash.c \
-	$(srcroot)src/huge.c \
+	$(srcroot)src/large.c \
 	$(srcroot)src/mb.c \
 	$(srcroot)src/mutex.c \
 	$(srcroot)src/nstime.c \
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 923097d4..7613c24c 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1872,22 +1872,22 @@ typedef struct {
         <listitem><para>Number of bytes per slab.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="arenas.nhchunks">
+      <varlistentry id="arenas.nlextents">
         <term>
-          <mallctl>arenas.nhchunks</mallctl>
+          <mallctl>arenas.nlextents</mallctl>
           (<type>unsigned</type>)
           <literal>r-</literal>
         </term>
-        <listitem><para>Total number of huge size classes.</para></listitem>
+        <listitem><para>Total number of large size classes.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="arenas.hchunk.i.size">
+      <varlistentry id="arenas.lextent.i.size">
         <term>
-          <mallctl>arenas.hchunk.&lt;i&gt;.size</mallctl>
+          <mallctl>arenas.lextent.&lt;i&gt;.size</mallctl>
           (<type>size_t</type>)
           <literal>r-</literal>
         </term>
-        <listitem><para>Maximum size supported by this huge size
+        <listitem><para>Maximum size supported by this large size
         class.</para></listitem>
       </varlistentry>
 
@@ -2361,50 +2361,6 @@ typedef struct {
         </para></listitem>
       </varlistentry>
 
-      <varlistentry id="stats.arenas.i.huge.allocated">
-        <term>
-          <mallctl>stats.arenas.&lt;i&gt;.huge.allocated</mallctl>
-          (<type>size_t</type>)
-          <literal>r-</literal>
-          [<option>--enable-stats</option>]
-        </term>
-        <listitem><para>Number of bytes currently allocated by huge objects.
-        </para></listitem>
-      </varlistentry>
-
-      <varlistentry id="stats.arenas.i.huge.nmalloc">
-        <term>
-          <mallctl>stats.arenas.&lt;i&gt;.huge.nmalloc</mallctl>
-          (<type>uint64_t</type>)
-          <literal>r-</literal>
-          [<option>--enable-stats</option>]
-        </term>
-        <listitem><para>Cumulative number of huge allocation requests served
-        directly by the arena.</para></listitem>
-      </varlistentry>
-
-      <varlistentry id="stats.arenas.i.huge.ndalloc">
-        <term>
-          <mallctl>stats.arenas.&lt;i&gt;.huge.ndalloc</mallctl>
-          (<type>uint64_t</type>)
-          <literal>r-</literal>
-          [<option>--enable-stats</option>]
-        </term>
-        <listitem><para>Cumulative number of huge deallocation requests served
-        directly by the arena.</para></listitem>
-      </varlistentry>
-
-      <varlistentry id="stats.arenas.i.huge.nrequests">
-        <term>
-          <mallctl>stats.arenas.&lt;i&gt;.huge.nrequests</mallctl>
-          (<type>uint64_t</type>)
-          <literal>r-</literal>
-          [<option>--enable-stats</option>]
-        </term>
-        <listitem><para>Cumulative number of huge allocation requests.
-        </para></listitem>
-      </varlistentry>
-
       <varlistentry id="stats.arenas.i.bins.j.nmalloc">
         <term>
           <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.nmalloc</mallctl>
@@ -2500,9 +2456,9 @@ typedef struct {
         <listitem><para>Current number of slabs.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="stats.arenas.i.hchunks.j.nmalloc">
+      <varlistentry id="stats.arenas.i.lextents.j.nmalloc">
         <term>
-          <mallctl>stats.arenas.&lt;i&gt;.hchunks.&lt;j&gt;.nmalloc</mallctl>
+          <mallctl>stats.arenas.&lt;i&gt;.lextents.&lt;j&gt;.nmalloc</mallctl>
           (<type>uint64_t</type>)
           <literal>r-</literal>
           [<option>--enable-stats</option>]
@@ -2511,9 +2467,9 @@ typedef struct {
         class served directly by the arena.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="stats.arenas.i.hchunks.j.ndalloc">
+      <varlistentry id="stats.arenas.i.lextents.j.ndalloc">
         <term>
-          <mallctl>stats.arenas.&lt;i&gt;.hchunks.&lt;j&gt;.ndalloc</mallctl>
+          <mallctl>stats.arenas.&lt;i&gt;.lextents.&lt;j&gt;.ndalloc</mallctl>
           (<type>uint64_t</type>)
           <literal>r-</literal>
           [<option>--enable-stats</option>]
@@ -2522,9 +2478,9 @@ typedef struct {
         size class served directly by the arena.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="stats.arenas.i.hchunks.j.nrequests">
+      <varlistentry id="stats.arenas.i.lextents.j.nrequests">
         <term>
-          <mallctl>stats.arenas.&lt;i&gt;.hchunks.&lt;j&gt;.nrequests</mallctl>
+          <mallctl>stats.arenas.&lt;i&gt;.lextents.&lt;j&gt;.nrequests</mallctl>
           (<type>uint64_t</type>)
           <literal>r-</literal>
           [<option>--enable-stats</option>]
@@ -2533,14 +2489,14 @@ typedef struct {
         class.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="stats.arenas.i.hchunks.j.curhchunks">
+      <varlistentry id="stats.arenas.i.lextents.j.curlextents">
         <term>
-          <mallctl>stats.arenas.&lt;i&gt;.hchunks.&lt;j&gt;.curhchunks</mallctl>
+          <mallctl>stats.arenas.&lt;i&gt;.lextents.&lt;j&gt;.curlextents</mallctl>
           (<type>size_t</type>)
           <literal>r-</literal>
           [<option>--enable-stats</option>]
         </term>
-        <listitem><para>Current number of huge allocations for this size class.
+        <listitem><para>Current number of large allocations for this size class.
         </para></listitem>
       </varlistentry>
     </variablelist>
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index d66548f2..56f78571 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -229,10 +229,10 @@ struct arena_s {
 	 */
 	size_t			decay_backlog[SMOOTHSTEP_NSTEPS];
 
-	/* Extant huge allocations. */
-	ql_head(extent_t)	huge;
-	/* Synchronizes all huge allocation/update/deallocation. */
-	malloc_mutex_t		huge_mtx;
+	/* Extant large allocations. */
+	ql_head(extent_t)	large;
+	/* Synchronizes all large allocation/update/deallocation. */
+	malloc_mutex_t		large_mtx;
 
 	/*
 	 * Heaps of chunks that were previously allocated.  These are used when
@@ -287,13 +287,13 @@ void	arena_chunk_cache_maybe_insert(arena_t *arena, extent_t *extent,
     bool cache);
 void	arena_chunk_cache_maybe_remove(arena_t *arena, extent_t *extent,
     bool cache);
-extent_t	*arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena,
+extent_t	*arena_chunk_alloc_large(tsdn_t *tsdn, arena_t *arena,
     size_t usize, size_t alignment, bool *zero);
-void	arena_chunk_dalloc_huge(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+void	arena_chunk_dalloc_large(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
     bool locked);
-void	arena_chunk_ralloc_huge_shrink(tsdn_t *tsdn, arena_t *arena,
+void	arena_chunk_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena,
     extent_t *extent, size_t oldsize);
-void	arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena,
+void	arena_chunk_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena,
     extent_t *extent, size_t oldsize);
 ssize_t	arena_lg_dirty_mult_get(tsdn_t *tsdn, arena_t *arena);
 bool	arena_lg_dirty_mult_set(tsdn_t *tsdn, arena_t *arena,
@@ -341,7 +341,7 @@ void	arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
 void	arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time,
     size_t *nactive, size_t *ndirty, arena_stats_t *astats,
-    malloc_bin_stats_t *bstats, malloc_huge_stats_t *hstats);
+    malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats);
 unsigned	arena_nthreads_get(arena_t *arena, bool internal);
 void	arena_nthreads_inc(arena_t *arena, bool internal);
 void	arena_nthreads_dec(arena_t *arena, bool internal);
@@ -470,7 +470,7 @@ arena_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, const void *ptr)
 	assert(ptr != NULL);
 
 	if (unlikely(!extent_slab_get(extent)))
-		return (huge_prof_tctx_get(tsdn, extent));
+		return (large_prof_tctx_get(tsdn, extent));
 	return ((prof_tctx_t *)(uintptr_t)1U);
 }
 
@@ -483,7 +483,7 @@ arena_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr,
 	assert(ptr != NULL);
 
 	if (unlikely(!extent_slab_get(extent)))
-		huge_prof_tctx_set(tsdn, extent, tctx);
+		large_prof_tctx_set(tsdn, extent, tctx);
 }
 
 JEMALLOC_INLINE void
@@ -495,7 +495,7 @@ arena_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr,
 	assert(ptr != NULL);
 	assert(!extent_slab_get(extent));
 
-	huge_prof_tctx_reset(tsdn, extent);
+	large_prof_tctx_reset(tsdn, extent);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -535,7 +535,7 @@ arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
 			    tcache, size, ind, zero, slow_path));
 		}
 		if (likely(size <= tcache_maxclass)) {
-			return (tcache_alloc_huge(tsdn_tsd(tsdn), arena,
+			return (tcache_alloc_large(tsdn_tsd(tsdn), arena,
 			    tcache, size, ind, zero, slow_path));
 		}
 		/* (size > tcache_maxclass) case falls through. */
@@ -563,7 +563,7 @@ arena_salloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr)
 	if (likely(extent_slab_get(extent)))
 		ret = index2size(extent_slab_data_get_const(extent)->binind);
 	else
-		ret = huge_salloc(tsdn, extent);
+		ret = large_salloc(tsdn, extent);
 
 	return (ret);
 }
@@ -594,11 +594,11 @@ arena_dalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, tcache_t *tcache,
 				arena_dalloc_promoted(tsdn, extent, ptr,
 				    tcache, slow_path);
 			} else {
-				tcache_dalloc_huge(tsdn_tsd(tsdn), tcache,
+				tcache_dalloc_large(tsdn_tsd(tsdn), tcache,
 				    ptr, usize, slow_path);
 			}
 		} else
-			huge_dalloc(tsdn, extent);
+			large_dalloc(tsdn, extent);
 	}
 }
 
@@ -627,11 +627,11 @@ arena_sdalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t size,
 				arena_dalloc_promoted(tsdn, extent, ptr,
 				    tcache, slow_path);
 			} else {
-				tcache_dalloc_huge(tsdn_tsd(tsdn), tcache, ptr,
+				tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
 				    size, slow_path);
 			}
 		} else
-			huge_dalloc(tsdn, extent);
+			large_dalloc(tsdn, extent);
 	}
 }
 #  endif /* JEMALLOC_ARENA_INLINE_B */
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index 00deeb8a..3fbac205 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -51,7 +51,7 @@ struct ctl_arena_stats_s {
 	uint64_t		nrequests_small;
 
 	malloc_bin_stats_t	bstats[NBINS];
-	malloc_huge_stats_t	hstats[NSIZES - NBINS];
+	malloc_large_stats_t	lstats[NSIZES - NBINS];
 };
 
 struct ctl_stats_s {
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index bfe61811..cf717d9e 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -51,7 +51,7 @@ struct extent_s {
 		/* Small region slab metadata. */
 		arena_slab_data_t	e_slab_data;
 
-		/* Profile counters, used for huge objects. */
+		/* Profile counters, used for large objects. */
 		union {
 			void		*e_prof_tctx_pun;
 			prof_tctx_t	*e_prof_tctx;
@@ -67,7 +67,7 @@ struct extent_s {
 		/* Linkage for per size class address-ordered heaps. */
 		phn(extent_t)		ph_link;
 
-		/* Linkage for arena's huge and extent_cache lists. */
+		/* Linkage for arena's large and extent_cache lists. */
 		ql_elm(extent_t)	ql_link;
 	};
 };
diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h
deleted file mode 100644
index 836f1b50..00000000
--- a/include/jemalloc/internal/huge.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-void	*huge_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero);
-void	*huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
-    size_t alignment, bool zero);
-bool	huge_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
-    size_t usize_max, bool zero);
-void	*huge_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
-    size_t usize, size_t alignment, bool zero, tcache_t *tcache);
-#ifdef JEMALLOC_JET
-typedef void (huge_dalloc_junk_t)(void *, size_t);
-extern huge_dalloc_junk_t *huge_dalloc_junk;
-#else
-void	huge_dalloc_junk(void *ptr, size_t usize);
-#endif
-void	huge_dalloc_junked_locked(tsdn_t *tsdn, extent_t *extent);
-void	huge_dalloc(tsdn_t *tsdn, extent_t *extent);
-size_t	huge_salloc(tsdn_t *tsdn, const extent_t *extent);
-prof_tctx_t	*huge_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent);
-void	huge_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, prof_tctx_t *tctx);
-void	huge_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent);
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index f4d26beb..58a18ae5 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -362,7 +362,7 @@ typedef unsigned szind_t;
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/pages.h"
 #include "jemalloc/internal/chunk.h"
-#include "jemalloc/internal/huge.h"
+#include "jemalloc/internal/large.h"
 #include "jemalloc/internal/tcache.h"
 #include "jemalloc/internal/hash.h"
 #include "jemalloc/internal/prof.h"
@@ -396,7 +396,7 @@ typedef unsigned szind_t;
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/pages.h"
 #include "jemalloc/internal/chunk.h"
-#include "jemalloc/internal/huge.h"
+#include "jemalloc/internal/large.h"
 #include "jemalloc/internal/tcache.h"
 #include "jemalloc/internal/hash.h"
 #include "jemalloc/internal/prof.h"
@@ -486,7 +486,7 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/pages.h"
 #include "jemalloc/internal/chunk.h"
-#include "jemalloc/internal/huge.h"
+#include "jemalloc/internal/large.h"
 #include "jemalloc/internal/tcache.h"
 #include "jemalloc/internal/hash.h"
 #include "jemalloc/internal/prof.h"
@@ -515,7 +515,7 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/pages.h"
 #include "jemalloc/internal/chunk.h"
-#include "jemalloc/internal/huge.h"
+#include "jemalloc/internal/large.h"
 
 #ifndef JEMALLOC_ENABLE_INLINE
 pszind_t	psz2ind(size_t psz);
@@ -547,7 +547,7 @@ JEMALLOC_INLINE pszind_t
 psz2ind(size_t psz)
 {
 
-	if (unlikely(psz > HUGE_MAXCLASS))
+	if (unlikely(psz > LARGE_MAXCLASS))
 		return (NPSIZES);
 	{
 		pszind_t x = lg_floor((psz<<1)-1);
@@ -608,7 +608,7 @@ JEMALLOC_INLINE size_t
 psz2u(size_t psz)
 {
 
-	if (unlikely(psz > HUGE_MAXCLASS))
+	if (unlikely(psz > LARGE_MAXCLASS))
 		return (0);
 	{
 		size_t x = lg_floor((psz<<1)-1);
@@ -625,7 +625,7 @@ JEMALLOC_INLINE szind_t
 size2index_compute(size_t size)
 {
 
-	if (unlikely(size > HUGE_MAXCLASS))
+	if (unlikely(size > LARGE_MAXCLASS))
 		return (NSIZES);
 #if (NTBINS != 0)
 	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
@@ -721,7 +721,7 @@ JEMALLOC_ALWAYS_INLINE size_t
 s2u_compute(size_t size)
 {
 
-	if (unlikely(size > HUGE_MAXCLASS))
+	if (unlikely(size > LARGE_MAXCLASS))
 		return (0);
 #if (NTBINS > 0)
 	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
@@ -797,9 +797,9 @@ sa2u(size_t size, size_t alignment)
 			return (usize);
 	}
 
-	/* Huge size class.  Beware of overflow. */
+	/* Large size class.  Beware of overflow. */
 
-	if (unlikely(alignment > HUGE_MAXCLASS))
+	if (unlikely(alignment > LARGE_MAXCLASS))
 		return (0);
 
 	/* Make sure result is a large size class. */
@@ -814,7 +814,7 @@ sa2u(size_t size, size_t alignment)
 	}
 
 	/*
-	 * Calculate the multi-page mapping that huge_palloc() would need in
+	 * Calculate the multi-page mapping that large_palloc() would need in
 	 * order to guarantee the alignment.
 	 */
 	if (usize + large_pad + PAGE_CEILING(alignment) < usize) {
@@ -1113,7 +1113,7 @@ iralloct_realign(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize,
 	size_t usize, copysize;
 
 	usize = sa2u(size + extra, alignment);
-	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
+	if (unlikely(usize == 0 || usize > LARGE_MAXCLASS))
 		return (NULL);
 	p = ipalloct(tsdn, usize, alignment, zero, tcache, arena);
 	if (p == NULL) {
@@ -1121,7 +1121,7 @@ iralloct_realign(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize,
 			return (NULL);
 		/* Try again, without extra this time. */
 		usize = sa2u(size, alignment);
-		if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
+		if (unlikely(usize == 0 || usize > LARGE_MAXCLASS))
 			return (NULL);
 		p = ipalloct(tsdn, usize, alignment, zero, tcache, arena);
 		if (p == NULL)
diff --git a/include/jemalloc/internal/large.h b/include/jemalloc/internal/large.h
new file mode 100644
index 00000000..afaa6c3c
--- /dev/null
+++ b/include/jemalloc/internal/large.h
@@ -0,0 +1,37 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+void	*large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero);
+void	*large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
+    size_t alignment, bool zero);
+bool	large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
+    size_t usize_max, bool zero);
+void	*large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+    size_t usize, size_t alignment, bool zero, tcache_t *tcache);
+#ifdef JEMALLOC_JET
+typedef void (large_dalloc_junk_t)(void *, size_t);
+extern large_dalloc_junk_t *large_dalloc_junk;
+#else
+void	large_dalloc_junk(void *ptr, size_t usize);
+#endif
+void	large_dalloc_junked_locked(tsdn_t *tsdn, extent_t *extent);
+void	large_dalloc(tsdn_t *tsdn, extent_t *extent);
+size_t	large_salloc(tsdn_t *tsdn, const extent_t *extent);
+prof_tctx_t	*large_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent);
+void	large_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, prof_tctx_t *tctx);
+void	large_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent);
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index b8ed4341..cab0fc54 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -9,14 +9,14 @@ arena_boot
 arena_choose
 arena_choose_hard
 arena_choose_impl
-arena_chunk_alloc_huge
+arena_chunk_alloc_large
 arena_chunk_cache_alloc
 arena_chunk_cache_dalloc
 arena_chunk_cache_maybe_insert
 arena_chunk_cache_maybe_remove
-arena_chunk_dalloc_huge
-arena_chunk_ralloc_huge_expand
-arena_chunk_ralloc_huge_shrink
+arena_chunk_dalloc_large
+arena_chunk_ralloc_large_expand
+arena_chunk_ralloc_large_shrink
 arena_cleanup
 arena_dalloc
 arena_dalloc_bin_junked_locked
@@ -222,17 +222,6 @@ hash_rotl_64
 hash_x64_128
 hash_x86_128
 hash_x86_32
-huge_dalloc
-huge_dalloc_junk
-huge_dalloc_junked_locked
-huge_malloc
-huge_palloc
-huge_prof_tctx_get
-huge_prof_tctx_reset
-huge_prof_tctx_set
-huge_ralloc
-huge_ralloc_no_move
-huge_salloc
 iaalloc
 ialloc
 iallocztm
@@ -258,6 +247,17 @@ ixalloc
 jemalloc_postfork_child
 jemalloc_postfork_parent
 jemalloc_prefork
+large_dalloc
+large_dalloc_junk
+large_dalloc_junked_locked
+large_malloc
+large_palloc
+large_prof_tctx_get
+large_prof_tctx_reset
+large_prof_tctx_set
+large_ralloc
+large_ralloc_no_move
+large_salloc
 lg_floor
 lg_prof_sample
 malloc_cprintf
@@ -438,17 +438,17 @@ stats_cactive_get
 stats_cactive_sub
 stats_print
 tcache_alloc_easy
-tcache_alloc_huge
+tcache_alloc_large
 tcache_alloc_small
 tcache_alloc_small_hard
 tcache_arena_reassociate
-tcache_bin_flush_huge
+tcache_bin_flush_large
 tcache_bin_flush_small
 tcache_bin_info
 tcache_boot
 tcache_cleanup
 tcache_create
-tcache_dalloc_huge
+tcache_dalloc_large
 tcache_dalloc_small
 tcache_enabled_cleanup
 tcache_enabled_get
diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh
index b73064d1..38fe4902 100755
--- a/include/jemalloc/internal/size_classes.sh
+++ b/include/jemalloc/internal/size_classes.sh
@@ -237,7 +237,7 @@ size_classes() {
         fi
       fi
       # Final written value is correct:
-      huge_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))"
+      large_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))"
       index=$((${index} + 1))
       ndelta=$((${ndelta} + 1))
     done
@@ -257,7 +257,7 @@ size_classes() {
   # - lookup_maxclass
   # - small_maxclass
   # - lg_large_minclass
-  # - huge_maxclass
+  # - large_maxclass
 }
 
 cat <<EOF
@@ -290,7 +290,7 @@ cat <<EOF
  *   LOOKUP_MAXCLASS: Maximum size class included in lookup table.
  *   SMALL_MAXCLASS: Maximum small size class.
  *   LG_LARGE_MINCLASS: Lg of minimum large size class.
- *   HUGE_MAXCLASS: Maximum (huge) size class.
+ *   LARGE_MAXCLASS: Maximum (large) size class.
  */
 
 #define	LG_SIZE_CLASS_GROUP	${lg_g}
@@ -315,7 +315,7 @@ for lg_z in ${lg_zarr} ; do
         echo "#define	LOOKUP_MAXCLASS		${lookup_maxclass}"
         echo "#define	SMALL_MAXCLASS		${small_maxclass}"
         echo "#define	LG_LARGE_MINCLASS	${lg_large_minclass}"
-        echo "#define	HUGE_MAXCLASS		${huge_maxclass}"
+        echo "#define	LARGE_MAXCLASS		${large_maxclass}"
         echo "#endif"
         echo
       done
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index 881faad6..7bba57a7 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -3,7 +3,7 @@
 
 typedef struct tcache_bin_stats_s tcache_bin_stats_t;
 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
-typedef struct malloc_huge_stats_s malloc_huge_stats_t;
+typedef struct malloc_large_stats_s malloc_large_stats_t;
 typedef struct arena_stats_s arena_stats_t;
 typedef struct chunk_stats_s chunk_stats_t;
 
@@ -61,7 +61,7 @@ struct malloc_bin_stats_s {
 	size_t		curslabs;
 };
 
-struct malloc_huge_stats_s {
+struct malloc_large_stats_s {
 	/*
 	 * Total number of allocation/deallocation requests served directly by
 	 * the arena.
@@ -77,7 +77,7 @@ struct malloc_huge_stats_s {
 	uint64_t	nrequests;
 
 	/* Current number of (multi-)chunk allocations of this size class. */
-	size_t		curhchunks;
+	size_t		curlextents;
 };
 
 struct arena_stats_s {
@@ -108,13 +108,13 @@ struct arena_stats_s {
 	size_t		metadata_mapped;
 	size_t		metadata_allocated; /* Protected via atomic_*_z(). */
 
-	size_t		allocated_huge;
-	uint64_t	nmalloc_huge;
-	uint64_t	ndalloc_huge;
-	uint64_t	nrequests_huge;
+	size_t		allocated_large;
+	uint64_t	nmalloc_large;
+	uint64_t	ndalloc_large;
+	uint64_t	nrequests_large;
 
-	/* One element for each huge size class. */
-	malloc_huge_stats_t	hstats[NSIZES - NBINS];
+	/* One element for each large size class. */
+	malloc_large_stats_t	lstats[NSIZES - NBINS];
 };
 
 #endif /* JEMALLOC_H_STRUCTS */
diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h
index f1182dc9..e7606d6e 100644
--- a/include/jemalloc/internal/tcache.h
+++ b/include/jemalloc/internal/tcache.h
@@ -30,8 +30,8 @@ typedef struct tcaches_s tcaches_t;
  */
 #define	TCACHE_NSLOTS_SMALL_MAX		200
 
-/* Number of cache slots for huge size classes. */
-#define	TCACHE_NSLOTS_HUGE		20
+/* Number of cache slots for large size classes. */
+#define	TCACHE_NSLOTS_LARGE		20
 
 /* (1U << opt_lg_tcache_max) is used to compute tcache_maxclass. */
 #define	LG_TCACHE_MAXCLASS_DEFAULT	15
@@ -113,7 +113,7 @@ extern tcache_bin_info_t	*tcache_bin_info;
 
 /*
  * Number of tcache bins.  There are NBINS small-object bins, plus 0 or more
- * huge-object bins.
+ * large-object bins.
  */
 extern unsigned	nhbins;
 
@@ -136,7 +136,7 @@ void	*tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     tcache_bin_t *tbin, szind_t binind, bool *tcache_success);
 void	tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
     szind_t binind, unsigned rem);
-void	tcache_bin_flush_huge(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
+void	tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
     unsigned rem, tcache_t *tcache);
 void	tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache,
     arena_t *oldarena, arena_t *newarena);
@@ -163,11 +163,11 @@ void	tcache_enabled_set(bool enabled);
 void	*tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success);
 void	*tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
     size_t size, szind_t ind, bool zero, bool slow_path);
-void	*tcache_alloc_huge(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
+void	*tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
     size_t size, szind_t ind, bool zero, bool slow_path);
 void	tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr,
     szind_t binind, bool slow_path);
-void	tcache_dalloc_huge(tsd_t *tsd, tcache_t *tcache, void *ptr,
+void	tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr,
     size_t size, bool slow_path);
 tcache_t	*tcaches_get(tsd_t *tsd, unsigned ind);
 #endif
@@ -336,7 +336,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-tcache_alloc_huge(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
+tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
     szind_t binind, bool zero, bool slow_path)
 {
 	void *ret;
@@ -349,14 +349,14 @@ tcache_alloc_huge(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
 		/*
-		 * Only allocate one huge object at a time, because it's quite
+		 * Only allocate one large object at a time, because it's quite
 		 * expensive to create one and not use it.
 		 */
 		arena = arena_choose(tsd, arena);
 		if (unlikely(arena == NULL))
 			return (NULL);
 
-		ret = huge_malloc(tsd_tsdn(tsd), arena, s2u(size), zero);
+		ret = large_malloc(tsd_tsdn(tsd), arena, s2u(size), zero);
 		if (ret == NULL)
 			return (NULL);
 	} else {
@@ -416,7 +416,7 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-tcache_dalloc_huge(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size,
+tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size,
     bool slow_path)
 {
 	szind_t binind;
@@ -429,12 +429,12 @@ tcache_dalloc_huge(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size,
 	binind = size2index(size);
 
 	if (slow_path && config_fill && unlikely(opt_junk_free))
-		huge_dalloc_junk(ptr, size);
+		large_dalloc_junk(ptr, size);
 
 	tbin = &tcache->tbins[binind];
 	tbin_info = &tcache_bin_info[binind];
 	if (unlikely(tbin->ncached == tbin_info->ncached_max)) {
-		tcache_bin_flush_huge(tsd, tbin, binind,
+		tcache_bin_flush_large(tsd, tbin, binind,
 		    (tbin_info->ncached_max >> 1), tcache);
 	}
 	assert(tbin->ncached < tbin_info->ncached_max);
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index f15665bc..8c56c21a 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -32,7 +32,7 @@ typedef int witness_comp_t (const witness_t *, void *, const witness_t *,
 
 #define	WITNESS_RANK_LEAF		0xffffffffU
 #define	WITNESS_RANK_ARENA_BIN		WITNESS_RANK_LEAF
-#define	WITNESS_RANK_ARENA_HUGE		WITNESS_RANK_LEAF
+#define	WITNESS_RANK_ARENA_LARGE	WITNESS_RANK_LEAF
 #define	WITNESS_RANK_DSS		WITNESS_RANK_LEAF
 #define	WITNESS_RANK_PROF_ACTIVE	WITNESS_RANK_LEAF
 #define	WITNESS_RANK_PROF_DUMP_SEQ	WITNESS_RANK_LEAF
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 537cb6ab..91c949aa 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -47,11 +47,11 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\ctl.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\extent.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\hash.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\huge.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal_decls.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal_defs.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal_macros.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\large.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\mb.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\mutex.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\nstime.h" />
@@ -98,8 +98,8 @@
     <ClCompile Include="..\..\..\..\src\ctl.c" />
     <ClCompile Include="..\..\..\..\src\extent.c" />
     <ClCompile Include="..\..\..\..\src\hash.c" />
-    <ClCompile Include="..\..\..\..\src\huge.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
+    <ClCompile Include="..\..\..\..\src\large.c" />
     <ClCompile Include="..\..\..\..\src\mb.c" />
     <ClCompile Include="..\..\..\..\src\mutex.c" />
     <ClCompile Include="..\..\..\..\src\nstime.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index d2b5595f..09d4cb20 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -80,9 +80,6 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\hash.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\huge.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
@@ -95,6 +92,9 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal_macros.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\large.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\mb.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
@@ -205,10 +205,10 @@
     <ClCompile Include="..\..\..\..\src\hash.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\huge.c">
+    <ClCompile Include="..\..\..\..\src\jemalloc.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\jemalloc.c">
+    <ClCompile Include="..\..\..\..\src\large.c">
       <Filter>Source Files</Filter>
     </ClCompile>
     <ClCompile Include="..\..\..\..\src\mb.c">
diff --git a/src/arena.c b/src/arena.c
index ffde2e31..8194ced7 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -256,71 +256,71 @@ arena_nactive_sub(arena_t *arena, size_t sub_pages)
 }
 
 static void
-arena_huge_malloc_stats_update(arena_t *arena, size_t usize)
+arena_large_malloc_stats_update(arena_t *arena, size_t usize)
 {
 	szind_t index = size2index(usize);
 	szind_t hindex = (index >= NBINS) ? index - NBINS : 0;
 
 	cassert(config_stats);
 
-	arena->stats.nmalloc_huge++;
-	arena->stats.allocated_huge += usize;
-	arena->stats.hstats[hindex].nmalloc++;
-	arena->stats.hstats[hindex].nrequests++;
-	arena->stats.hstats[hindex].curhchunks++;
+	arena->stats.nmalloc_large++;
+	arena->stats.allocated_large += usize;
+	arena->stats.lstats[hindex].nmalloc++;
+	arena->stats.lstats[hindex].nrequests++;
+	arena->stats.lstats[hindex].curlextents++;
 }
 
 static void
-arena_huge_malloc_stats_update_undo(arena_t *arena, size_t usize)
+arena_large_malloc_stats_update_undo(arena_t *arena, size_t usize)
 {
 	szind_t index = size2index(usize);
 	szind_t hindex = (index >= NBINS) ? index - NBINS : 0;
 
 	cassert(config_stats);
 
-	arena->stats.nmalloc_huge--;
-	arena->stats.allocated_huge -= usize;
-	arena->stats.hstats[hindex].nmalloc--;
-	arena->stats.hstats[hindex].nrequests--;
-	arena->stats.hstats[hindex].curhchunks--;
+	arena->stats.nmalloc_large--;
+	arena->stats.allocated_large -= usize;
+	arena->stats.lstats[hindex].nmalloc--;
+	arena->stats.lstats[hindex].nrequests--;
+	arena->stats.lstats[hindex].curlextents--;
 }
 
 static void
-arena_huge_dalloc_stats_update(arena_t *arena, size_t usize)
+arena_large_dalloc_stats_update(arena_t *arena, size_t usize)
 {
 	szind_t index = size2index(usize);
 	szind_t hindex = (index >= NBINS) ? index - NBINS : 0;
 
 	cassert(config_stats);
 
-	arena->stats.ndalloc_huge++;
-	arena->stats.allocated_huge -= usize;
-	arena->stats.hstats[hindex].ndalloc++;
-	arena->stats.hstats[hindex].curhchunks--;
+	arena->stats.ndalloc_large++;
+	arena->stats.allocated_large -= usize;
+	arena->stats.lstats[hindex].ndalloc++;
+	arena->stats.lstats[hindex].curlextents--;
 }
 
 static void
-arena_huge_reset_stats_cancel(arena_t *arena, size_t usize)
+arena_large_reset_stats_cancel(arena_t *arena, size_t usize)
 {
 	szind_t index = size2index(usize);
 	szind_t hindex = (index >= NBINS) ? index - NBINS : 0;
 
 	cassert(config_stats);
 
-	arena->stats.ndalloc_huge++;
-	arena->stats.hstats[hindex].ndalloc--;
+	arena->stats.ndalloc_large++;
+	arena->stats.lstats[hindex].ndalloc--;
 }
 
 static void
-arena_huge_ralloc_stats_update(arena_t *arena, size_t oldusize, size_t usize)
+arena_large_ralloc_stats_update(arena_t *arena, size_t oldusize, size_t usize)
 {
 
-	arena_huge_dalloc_stats_update(arena, oldusize);
-	arena_huge_malloc_stats_update(arena, usize);
+	arena_large_dalloc_stats_update(arena, oldusize);
+	arena_large_malloc_stats_update(arena, usize);
 }
 
 static extent_t *
-arena_chunk_alloc_huge_hard(tsdn_t *tsdn, arena_t *arena,
+arena_chunk_alloc_large_hard(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, size_t usize, size_t alignment, bool *zero)
 {
 	extent_t *extent;
@@ -332,7 +332,7 @@ arena_chunk_alloc_huge_hard(tsdn_t *tsdn, arena_t *arena,
 		/* Revert optimistic stats updates. */
 		malloc_mutex_lock(tsdn, &arena->lock);
 		if (config_stats) {
-			arena_huge_malloc_stats_update_undo(arena, usize);
+			arena_large_malloc_stats_update_undo(arena, usize);
 			arena->stats.mapped -= usize;
 		}
 		arena_nactive_sub(arena, (usize + large_pad) >> LG_PAGE);
@@ -343,7 +343,7 @@ arena_chunk_alloc_huge_hard(tsdn_t *tsdn, arena_t *arena,
 }
 
 extent_t *
-arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize,
+arena_chunk_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool *zero)
 {
 	extent_t *extent;
@@ -353,7 +353,7 @@ arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize,
 
 	/* Optimistically update stats. */
 	if (config_stats) {
-		arena_huge_malloc_stats_update(arena, usize);
+		arena_large_malloc_stats_update(arena, usize);
 		arena->stats.mapped += usize;
 	}
 	arena_nactive_add(arena, (usize + large_pad) >> LG_PAGE);
@@ -362,7 +362,7 @@ arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	    usize, large_pad, alignment, zero, false);
 	malloc_mutex_unlock(tsdn, &arena->lock);
 	if (extent == NULL) {
-		extent = arena_chunk_alloc_huge_hard(tsdn, arena, &chunk_hooks,
+		extent = arena_chunk_alloc_large_hard(tsdn, arena, &chunk_hooks,
 		    usize, alignment, zero);
 	}
 
@@ -370,7 +370,7 @@ arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize,
 }
 
 void
-arena_chunk_dalloc_huge(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+arena_chunk_dalloc_large(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
     bool locked)
 {
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
@@ -378,7 +378,8 @@ arena_chunk_dalloc_huge(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 	if (!locked)
 		malloc_mutex_lock(tsdn, &arena->lock);
 	if (config_stats) {
-		arena_huge_dalloc_stats_update(arena, extent_usize_get(extent));
+		arena_large_dalloc_stats_update(arena,
+		    extent_usize_get(extent));
 		arena->stats.mapped -= extent_size_get(extent);
 	}
 	arena_nactive_sub(arena, extent_size_get(extent) >> LG_PAGE);
@@ -389,7 +390,7 @@ arena_chunk_dalloc_huge(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 }
 
 void
-arena_chunk_ralloc_huge_shrink(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+arena_chunk_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
     size_t oldusize)
 {
 	size_t usize = extent_usize_get(extent);
@@ -397,7 +398,7 @@ arena_chunk_ralloc_huge_shrink(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 
 	malloc_mutex_lock(tsdn, &arena->lock);
 	if (config_stats) {
-		arena_huge_ralloc_stats_update(arena, oldusize, usize);
+		arena_large_ralloc_stats_update(arena, oldusize, usize);
 		arena->stats.mapped -= udiff;
 	}
 	arena_nactive_sub(arena, udiff >> LG_PAGE);
@@ -405,7 +406,7 @@ arena_chunk_ralloc_huge_shrink(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 }
 
 void
-arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+arena_chunk_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
     size_t oldusize)
 {
 	size_t usize = extent_usize_get(extent);
@@ -413,7 +414,7 @@ arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 
 	malloc_mutex_lock(tsdn, &arena->lock);
 	if (config_stats) {
-		arena_huge_ralloc_stats_update(arena, oldusize, usize);
+		arena_large_ralloc_stats_update(arena, oldusize, usize);
 		arena->stats.mapped += udiff;
 	}
 	arena_nactive_add(arena, udiff >> LG_PAGE);
@@ -891,26 +892,26 @@ arena_reset(tsd_t *tsd, arena_t *arena)
 	 *   stats refreshes would impose an inconvenient burden.
 	 */
 
-	/* Huge allocations. */
-	malloc_mutex_lock(tsd_tsdn(tsd), &arena->huge_mtx);
-	for (extent = ql_last(&arena->huge, ql_link); extent != NULL; extent =
-	    ql_last(&arena->huge, ql_link)) {
+	/* Large allocations. */
+	malloc_mutex_lock(tsd_tsdn(tsd), &arena->large_mtx);
+	for (extent = ql_last(&arena->large, ql_link); extent != NULL; extent =
+	    ql_last(&arena->large, ql_link)) {
 		void *ptr = extent_base_get(extent);
 		size_t usize;
 
-		malloc_mutex_unlock(tsd_tsdn(tsd), &arena->huge_mtx);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &arena->large_mtx);
 		if (config_stats || (config_prof && opt_prof))
 			usize = isalloc(tsd_tsdn(tsd), extent, ptr);
-		/* Remove huge allocation from prof sample set. */
+		/* Remove large allocation from prof sample set. */
 		if (config_prof && opt_prof)
 			prof_free(tsd, extent, ptr, usize);
-		huge_dalloc(tsd_tsdn(tsd), extent);
-		malloc_mutex_lock(tsd_tsdn(tsd), &arena->huge_mtx);
+		large_dalloc(tsd_tsdn(tsd), extent);
+		malloc_mutex_lock(tsd_tsdn(tsd), &arena->large_mtx);
 		/* Cancel out unwanted effects on stats. */
 		if (config_stats)
-			arena_huge_reset_stats_cancel(arena, usize);
+			arena_large_reset_stats_cancel(arena, usize);
 	}
-	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->huge_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->large_mtx);
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &arena->lock);
 
@@ -1283,7 +1284,7 @@ arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
 
 	if (likely(size <= SMALL_MAXCLASS))
 		return (arena_malloc_small(tsdn, arena, ind, zero));
-	return (huge_malloc(tsdn, arena, index2size(ind), zero));
+	return (large_malloc(tsdn, arena, index2size(ind), zero));
 }
 
 void *
@@ -1299,9 +1300,9 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 		    tcache, true);
 	} else {
 		if (likely(alignment <= CACHELINE))
-			ret = huge_malloc(tsdn, arena, usize, zero);
+			ret = large_malloc(tsdn, arena, usize, zero);
 		else
-			ret = huge_palloc(tsdn, arena, usize, alignment, zero);
+			ret = large_palloc(tsdn, arena, usize, alignment, zero);
 	}
 	return (ret);
 }
@@ -1360,10 +1361,10 @@ arena_dalloc_promoted(tsdn_t *tsdn, extent_t *extent, void *ptr,
 
 	usize = arena_prof_demote(tsdn, extent, ptr);
 	if (usize <= tcache_maxclass) {
-		tcache_dalloc_huge(tsdn_tsd(tsdn), tcache, ptr, usize,
+		tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr, usize,
 		    slow_path);
 	} else
-		huge_dalloc(tsdn, extent);
+		large_dalloc(tsdn, extent);
 }
 
 static void
@@ -1493,9 +1494,9 @@ arena_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize,
 	size_t usize_min, usize_max;
 
 	/* Calls with non-zero extra had to clamp extra. */
-	assert(extra == 0 || size + extra <= HUGE_MAXCLASS);
+	assert(extra == 0 || size + extra <= LARGE_MAXCLASS);
 
-	if (unlikely(size > HUGE_MAXCLASS))
+	if (unlikely(size > LARGE_MAXCLASS))
 		return (true);
 
 	usize_min = s2u(size);
@@ -1515,7 +1516,7 @@ arena_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize,
 		arena_decay_tick(tsdn, extent_arena_get(extent));
 		return (false);
 	} else if (oldsize >= LARGE_MINCLASS && usize_max >= LARGE_MINCLASS) {
-		return (huge_ralloc_no_move(tsdn, extent, usize_min, usize_max,
+		return (large_ralloc_no_move(tsdn, extent, usize_min, usize_max,
 		    zero));
 	}
 
@@ -1531,7 +1532,7 @@ arena_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
 		return (arena_malloc(tsdn, arena, usize, size2index(usize),
 		    zero, tcache, true));
 	usize = sa2u(usize, alignment);
-	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
+	if (unlikely(usize == 0 || usize > LARGE_MAXCLASS))
 		return (NULL);
 	return (ipalloct(tsdn, usize, alignment, zero, tcache, arena));
 }
@@ -1544,7 +1545,7 @@ arena_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr,
 	size_t usize, copysize;
 
 	usize = s2u(size);
-	if (unlikely(usize == 0 || size > HUGE_MAXCLASS))
+	if (unlikely(usize == 0 || size > LARGE_MAXCLASS))
 		return (NULL);
 
 	if (likely(usize <= SMALL_MAXCLASS)) {
@@ -1555,8 +1556,8 @@ arena_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr,
 	}
 
 	if (oldsize >= LARGE_MINCLASS && usize >= LARGE_MINCLASS) {
-		return (huge_ralloc(tsdn, arena, extent, usize, alignment, zero,
-		    tcache));
+		return (large_ralloc(tsdn, arena, extent, usize, alignment,
+		    zero, tcache));
 	}
 
 	/*
@@ -1670,7 +1671,7 @@ void
 arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time,
     size_t *nactive, size_t *ndirty, arena_stats_t *astats,
-    malloc_bin_stats_t *bstats, malloc_huge_stats_t *hstats)
+    malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats)
 {
 	unsigned i;
 
@@ -1687,16 +1688,16 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	astats->purged += arena->stats.purged;
 	astats->metadata_mapped += arena->stats.metadata_mapped;
 	astats->metadata_allocated += arena_metadata_allocated_get(arena);
-	astats->allocated_huge += arena->stats.allocated_huge;
-	astats->nmalloc_huge += arena->stats.nmalloc_huge;
-	astats->ndalloc_huge += arena->stats.ndalloc_huge;
-	astats->nrequests_huge += arena->stats.nrequests_huge;
+	astats->allocated_large += arena->stats.allocated_large;
+	astats->nmalloc_large += arena->stats.nmalloc_large;
+	astats->ndalloc_large += arena->stats.ndalloc_large;
+	astats->nrequests_large += arena->stats.nrequests_large;
 
 	for (i = 0; i < NSIZES - NBINS; i++) {
-		hstats[i].nmalloc += arena->stats.hstats[i].nmalloc;
-		hstats[i].ndalloc += arena->stats.hstats[i].ndalloc;
-		hstats[i].nrequests += arena->stats.hstats[i].nrequests;
-		hstats[i].curhchunks += arena->stats.hstats[i].curhchunks;
+		lstats[i].nmalloc += arena->stats.lstats[i].nmalloc;
+		lstats[i].ndalloc += arena->stats.lstats[i].ndalloc;
+		lstats[i].nrequests += arena->stats.lstats[i].nrequests;
+		lstats[i].curlextents += arena->stats.lstats[i].curlextents;
 	}
 	malloc_mutex_unlock(tsdn, &arena->lock);
 
@@ -1786,9 +1787,9 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 	if (opt_purge == purge_mode_decay)
 		arena_decay_init(arena, arena_decay_time_default_get());
 
-	ql_new(&arena->huge);
-	if (malloc_mutex_init(&arena->huge_mtx, "arena_huge",
-	    WITNESS_RANK_ARENA_HUGE))
+	ql_new(&arena->large);
+	if (malloc_mutex_init(&arena->large_mtx, "arena_large",
+	    WITNESS_RANK_ARENA_LARGE))
 		return (NULL);
 
 	for (i = 0; i < NPSIZES; i++) {
@@ -1859,7 +1860,7 @@ arena_prefork3(tsdn_t *tsdn, arena_t *arena)
 
 	for (i = 0; i < NBINS; i++)
 		malloc_mutex_prefork(tsdn, &arena->bins[i].lock);
-	malloc_mutex_prefork(tsdn, &arena->huge_mtx);
+	malloc_mutex_prefork(tsdn, &arena->large_mtx);
 }
 
 void
@@ -1867,7 +1868,7 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena)
 {
 	unsigned i;
 
-	malloc_mutex_postfork_parent(tsdn, &arena->huge_mtx);
+	malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
 	for (i = 0; i < NBINS; i++)
 		malloc_mutex_postfork_parent(tsdn, &arena->bins[i].lock);
 	malloc_mutex_postfork_parent(tsdn, &arena->extent_cache_mtx);
@@ -1880,7 +1881,7 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena)
 {
 	unsigned i;
 
-	malloc_mutex_postfork_child(tsdn, &arena->huge_mtx);
+	malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
 	for (i = 0; i < NBINS; i++)
 		malloc_mutex_postfork_child(tsdn, &arena->bins[i].lock);
 	malloc_mutex_postfork_child(tsdn, &arena->extent_cache_mtx);
diff --git a/src/chunk_dss.c b/src/chunk_dss.c
index f890a5cd..f8c968b3 100644
--- a/src/chunk_dss.c
+++ b/src/chunk_dss.c
@@ -78,7 +78,7 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 
 	/*
 	 * sbrk() uses a signed increment argument, so take care not to
-	 * interpret a huge allocation request as a negative increment.
+	 * interpret a large allocation request as a negative increment.
 	 */
 	if ((intptr_t)size < 0)
 		return (NULL);
diff --git a/src/ckh.c b/src/ckh.c
index 5ec0f60a..90a81155 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -267,7 +267,7 @@ ckh_grow(tsdn_t *tsdn, ckh_t *ckh)
 
 		lg_curcells++;
 		usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
-		if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) {
+		if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
 			ret = true;
 			goto label_return;
 		}
@@ -315,7 +315,7 @@ ckh_shrink(tsdn_t *tsdn, ckh_t *ckh)
 	lg_prevbuckets = ckh->lg_curbuckets;
 	lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1;
 	usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
-	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
+	if (unlikely(usize == 0 || usize > LARGE_MAXCLASS))
 		return;
 	tab = (ckhc_t *)ipallocztm(tsdn, usize, CACHELINE, true, NULL, true,
 	    arena_ichoose(tsdn, NULL));
@@ -390,7 +390,7 @@ ckh_new(tsdn_t *tsdn, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
 	ckh->keycomp = keycomp;
 
 	usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE);
-	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) {
+	if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
 		ret = true;
 		goto label_return;
 	}
diff --git a/src/ctl.c b/src/ctl.c
index 34c7e1bd..85ca2e86 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -126,8 +126,8 @@ CTL_PROTO(arenas_bin_i_size)
 CTL_PROTO(arenas_bin_i_nregs)
 CTL_PROTO(arenas_bin_i_slab_size)
 INDEX_PROTO(arenas_bin_i)
-CTL_PROTO(arenas_hchunk_i_size)
-INDEX_PROTO(arenas_hchunk_i)
+CTL_PROTO(arenas_lextent_i_size)
+INDEX_PROTO(arenas_lextent_i)
 CTL_PROTO(arenas_narenas)
 CTL_PROTO(arenas_initialized)
 CTL_PROTO(arenas_lg_dirty_mult)
@@ -137,7 +137,7 @@ CTL_PROTO(arenas_page)
 CTL_PROTO(arenas_tcache_max)
 CTL_PROTO(arenas_nbins)
 CTL_PROTO(arenas_nhbins)
-CTL_PROTO(arenas_nhchunks)
+CTL_PROTO(arenas_nlextents)
 CTL_PROTO(arenas_extend)
 CTL_PROTO(prof_thread_active_init)
 CTL_PROTO(prof_active)
@@ -150,10 +150,10 @@ CTL_PROTO(stats_arenas_i_small_allocated)
 CTL_PROTO(stats_arenas_i_small_nmalloc)
 CTL_PROTO(stats_arenas_i_small_ndalloc)
 CTL_PROTO(stats_arenas_i_small_nrequests)
-CTL_PROTO(stats_arenas_i_huge_allocated)
-CTL_PROTO(stats_arenas_i_huge_nmalloc)
-CTL_PROTO(stats_arenas_i_huge_ndalloc)
-CTL_PROTO(stats_arenas_i_huge_nrequests)
+CTL_PROTO(stats_arenas_i_large_allocated)
+CTL_PROTO(stats_arenas_i_large_nmalloc)
+CTL_PROTO(stats_arenas_i_large_ndalloc)
+CTL_PROTO(stats_arenas_i_large_nrequests)
 CTL_PROTO(stats_arenas_i_bins_j_nmalloc)
 CTL_PROTO(stats_arenas_i_bins_j_ndalloc)
 CTL_PROTO(stats_arenas_i_bins_j_nrequests)
@@ -164,11 +164,11 @@ CTL_PROTO(stats_arenas_i_bins_j_nslabs)
 CTL_PROTO(stats_arenas_i_bins_j_nreslabs)
 CTL_PROTO(stats_arenas_i_bins_j_curslabs)
 INDEX_PROTO(stats_arenas_i_bins_j)
-CTL_PROTO(stats_arenas_i_hchunks_j_nmalloc)
-CTL_PROTO(stats_arenas_i_hchunks_j_ndalloc)
-CTL_PROTO(stats_arenas_i_hchunks_j_nrequests)
-CTL_PROTO(stats_arenas_i_hchunks_j_curhchunks)
-INDEX_PROTO(stats_arenas_i_hchunks_j)
+CTL_PROTO(stats_arenas_i_lextents_j_nmalloc)
+CTL_PROTO(stats_arenas_i_lextents_j_ndalloc)
+CTL_PROTO(stats_arenas_i_lextents_j_nrequests)
+CTL_PROTO(stats_arenas_i_lextents_j_curlextents)
+INDEX_PROTO(stats_arenas_i_lextents_j)
 CTL_PROTO(stats_arenas_i_nthreads)
 CTL_PROTO(stats_arenas_i_dss)
 CTL_PROTO(stats_arenas_i_lg_dirty_mult)
@@ -310,15 +310,15 @@ static const ctl_indexed_node_t arenas_bin_node[] = {
 	{INDEX(arenas_bin_i)}
 };
 
-static const ctl_named_node_t arenas_hchunk_i_node[] = {
-	{NAME("size"),		CTL(arenas_hchunk_i_size)}
+static const ctl_named_node_t arenas_lextent_i_node[] = {
+	{NAME("size"),		CTL(arenas_lextent_i_size)}
 };
-static const ctl_named_node_t super_arenas_hchunk_i_node[] = {
-	{NAME(""),		CHILD(named, arenas_hchunk_i)}
+static const ctl_named_node_t super_arenas_lextent_i_node[] = {
+	{NAME(""),		CHILD(named, arenas_lextent_i)}
 };
 
-static const ctl_indexed_node_t arenas_hchunk_node[] = {
-	{INDEX(arenas_hchunk_i)}
+static const ctl_indexed_node_t arenas_lextent_node[] = {
+	{INDEX(arenas_lextent_i)}
 };
 
 static const ctl_named_node_t arenas_node[] = {
@@ -332,8 +332,8 @@ static const ctl_named_node_t arenas_node[] = {
 	{NAME("nbins"),		CTL(arenas_nbins)},
 	{NAME("nhbins"),	CTL(arenas_nhbins)},
 	{NAME("bin"),		CHILD(indexed, arenas_bin)},
-	{NAME("nhchunks"),	CTL(arenas_nhchunks)},
-	{NAME("hchunk"),	CHILD(indexed, arenas_hchunk)},
+	{NAME("nlextents"),	CTL(arenas_nlextents)},
+	{NAME("lextent"),	CHILD(indexed, arenas_lextent)},
 	{NAME("extend"),	CTL(arenas_extend)}
 };
 
@@ -359,11 +359,11 @@ static const ctl_named_node_t stats_arenas_i_small_node[] = {
 	{NAME("nrequests"),	CTL(stats_arenas_i_small_nrequests)}
 };
 
-static const ctl_named_node_t stats_arenas_i_huge_node[] = {
-	{NAME("allocated"),	CTL(stats_arenas_i_huge_allocated)},
-	{NAME("nmalloc"),	CTL(stats_arenas_i_huge_nmalloc)},
-	{NAME("ndalloc"),	CTL(stats_arenas_i_huge_ndalloc)},
-	{NAME("nrequests"),	CTL(stats_arenas_i_huge_nrequests)}
+static const ctl_named_node_t stats_arenas_i_large_node[] = {
+	{NAME("allocated"),	CTL(stats_arenas_i_large_allocated)},
+	{NAME("nmalloc"),	CTL(stats_arenas_i_large_nmalloc)},
+	{NAME("ndalloc"),	CTL(stats_arenas_i_large_ndalloc)},
+	{NAME("nrequests"),	CTL(stats_arenas_i_large_nrequests)}
 };
 
 static const ctl_named_node_t stats_arenas_i_bins_j_node[] = {
@@ -385,18 +385,18 @@ static const ctl_indexed_node_t stats_arenas_i_bins_node[] = {
 	{INDEX(stats_arenas_i_bins_j)}
 };
 
-static const ctl_named_node_t stats_arenas_i_hchunks_j_node[] = {
-	{NAME("nmalloc"),	CTL(stats_arenas_i_hchunks_j_nmalloc)},
-	{NAME("ndalloc"),	CTL(stats_arenas_i_hchunks_j_ndalloc)},
-	{NAME("nrequests"),	CTL(stats_arenas_i_hchunks_j_nrequests)},
-	{NAME("curhchunks"),	CTL(stats_arenas_i_hchunks_j_curhchunks)}
+static const ctl_named_node_t stats_arenas_i_lextents_j_node[] = {
+	{NAME("nmalloc"),	CTL(stats_arenas_i_lextents_j_nmalloc)},
+	{NAME("ndalloc"),	CTL(stats_arenas_i_lextents_j_ndalloc)},
+	{NAME("nrequests"),	CTL(stats_arenas_i_lextents_j_nrequests)},
+	{NAME("curlextents"),	CTL(stats_arenas_i_lextents_j_curlextents)}
 };
-static const ctl_named_node_t super_stats_arenas_i_hchunks_j_node[] = {
-	{NAME(""),		CHILD(named, stats_arenas_i_hchunks_j)}
+static const ctl_named_node_t super_stats_arenas_i_lextents_j_node[] = {
+	{NAME(""),		CHILD(named, stats_arenas_i_lextents_j)}
 };
 
-static const ctl_indexed_node_t stats_arenas_i_hchunks_node[] = {
-	{INDEX(stats_arenas_i_hchunks_j)}
+static const ctl_indexed_node_t stats_arenas_i_lextents_node[] = {
+	{INDEX(stats_arenas_i_lextents_j)}
 };
 
 static const ctl_named_node_t stats_arenas_i_node[] = {
@@ -413,9 +413,9 @@ static const ctl_named_node_t stats_arenas_i_node[] = {
 	{NAME("purged"),	CTL(stats_arenas_i_purged)},
 	{NAME("metadata"),	CHILD(named, stats_arenas_i_metadata)},
 	{NAME("small"),		CHILD(named, stats_arenas_i_small)},
-	{NAME("huge"),		CHILD(named, stats_arenas_i_huge)},
+	{NAME("large"),		CHILD(named, stats_arenas_i_large)},
 	{NAME("bins"),		CHILD(indexed, stats_arenas_i_bins)},
-	{NAME("hchunks"),	CHILD(indexed, stats_arenas_i_hchunks)}
+	{NAME("lextents"),	CHILD(indexed, stats_arenas_i_lextents)}
 };
 static const ctl_named_node_t super_stats_arenas_i_node[] = {
 	{NAME(""),		CHILD(named, stats_arenas_i)}
@@ -476,8 +476,8 @@ ctl_arena_clear(ctl_arena_stats_t *astats)
 		astats->ndalloc_small = 0;
 		astats->nrequests_small = 0;
 		memset(astats->bstats, 0, NBINS * sizeof(malloc_bin_stats_t));
-		memset(astats->hstats, 0, (NSIZES - NBINS) *
-		    sizeof(malloc_huge_stats_t));
+		memset(astats->lstats, 0, (NSIZES - NBINS) *
+		    sizeof(malloc_large_stats_t));
 	}
 }
 
@@ -490,7 +490,7 @@ ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_stats_t *cstats, arena_t *arena)
 		arena_stats_merge(tsdn, arena, &cstats->nthreads, &cstats->dss,
 		    &cstats->lg_dirty_mult, &cstats->decay_time,
 		    &cstats->pactive, &cstats->pdirty, &cstats->astats,
-		    cstats->bstats, cstats->hstats);
+		    cstats->bstats, cstats->lstats);
 
 		for (i = 0; i < NBINS; i++) {
 			cstats->allocated_small += cstats->bstats[i].curregs *
@@ -532,10 +532,12 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats)
 		sstats->ndalloc_small += astats->ndalloc_small;
 		sstats->nrequests_small += astats->nrequests_small;
 
-		sstats->astats.allocated_huge += astats->astats.allocated_huge;
-		sstats->astats.nmalloc_huge += astats->astats.nmalloc_huge;
-		sstats->astats.ndalloc_huge += astats->astats.ndalloc_huge;
-		sstats->astats.nrequests_huge += astats->astats.nrequests_huge;
+		sstats->astats.allocated_large +=
+		    astats->astats.allocated_large;
+		sstats->astats.nmalloc_large += astats->astats.nmalloc_large;
+		sstats->astats.ndalloc_large += astats->astats.ndalloc_large;
+		sstats->astats.nrequests_large +=
+		    astats->astats.nrequests_large;
 
 		for (i = 0; i < NBINS; i++) {
 			sstats->bstats[i].nmalloc += astats->bstats[i].nmalloc;
@@ -556,12 +558,12 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats)
 		}
 
 		for (i = 0; i < NSIZES - NBINS; i++) {
-			sstats->hstats[i].nmalloc += astats->hstats[i].nmalloc;
-			sstats->hstats[i].ndalloc += astats->hstats[i].ndalloc;
-			sstats->hstats[i].nrequests +=
-			    astats->hstats[i].nrequests;
-			sstats->hstats[i].curhchunks +=
-			    astats->hstats[i].curhchunks;
+			sstats->lstats[i].nmalloc += astats->lstats[i].nmalloc;
+			sstats->lstats[i].ndalloc += astats->lstats[i].ndalloc;
+			sstats->lstats[i].nrequests +=
+			    astats->lstats[i].nrequests;
+			sstats->lstats[i].curlextents +=
+			    astats->lstats[i].curlextents;
 		}
 	}
 }
@@ -643,7 +645,7 @@ ctl_refresh(tsdn_t *tsdn)
 		    &base_mapped);
 		ctl_stats.allocated =
 		    ctl_stats.arenas[ctl_stats.narenas].allocated_small +
-		    ctl_stats.arenas[ctl_stats.narenas].astats.allocated_huge;
+		    ctl_stats.arenas[ctl_stats.narenas].astats.allocated_large;
 		ctl_stats.active =
 		    (ctl_stats.arenas[ctl_stats.narenas].pactive << LG_PAGE);
 		ctl_stats.metadata = base_allocated +
@@ -1812,15 +1814,15 @@ arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
 	return (super_arenas_bin_i_node);
 }
 
-CTL_RO_NL_GEN(arenas_nhchunks, NSIZES - NBINS, unsigned)
-CTL_RO_NL_GEN(arenas_hchunk_i_size, index2size(NBINS+(szind_t)mib[2]), size_t)
+CTL_RO_NL_GEN(arenas_nlextents, NSIZES - NBINS, unsigned)
+CTL_RO_NL_GEN(arenas_lextent_i_size, index2size(NBINS+(szind_t)mib[2]), size_t)
 static const ctl_named_node_t *
-arenas_hchunk_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
+arenas_lextent_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
 {
 
 	if (i > NSIZES - NBINS)
 		return (NULL);
-	return (super_arenas_hchunk_i_node);
+	return (super_arenas_lextent_i_node);
 }
 
 static int
@@ -2012,14 +2014,14 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_small_ndalloc,
     ctl_stats.arenas[mib[2]].ndalloc_small, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_small_nrequests,
     ctl_stats.arenas[mib[2]].nrequests_small, uint64_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_huge_allocated,
-    ctl_stats.arenas[mib[2]].astats.allocated_huge, size_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_huge_nmalloc,
-    ctl_stats.arenas[mib[2]].astats.nmalloc_huge, uint64_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_huge_ndalloc,
-    ctl_stats.arenas[mib[2]].astats.ndalloc_huge, uint64_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_huge_nrequests,
-    ctl_stats.arenas[mib[2]].astats.nmalloc_huge, uint64_t) /* Intentional. */
+CTL_RO_CGEN(config_stats, stats_arenas_i_large_allocated,
+    ctl_stats.arenas[mib[2]].astats.allocated_large, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_large_nmalloc,
+    ctl_stats.arenas[mib[2]].astats.nmalloc_large, uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_large_ndalloc,
+    ctl_stats.arenas[mib[2]].astats.ndalloc_large, uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_large_nrequests,
+    ctl_stats.arenas[mib[2]].astats.nmalloc_large, uint64_t) /* Intentional. */
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nmalloc,
     ctl_stats.arenas[mib[2]].bstats[mib[4]].nmalloc, uint64_t)
@@ -2050,23 +2052,23 @@ stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
 	return (super_stats_arenas_i_bins_j_node);
 }
 
-CTL_RO_CGEN(config_stats, stats_arenas_i_hchunks_j_nmalloc,
-    ctl_stats.arenas[mib[2]].hstats[mib[4]].nmalloc, uint64_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_hchunks_j_ndalloc,
-    ctl_stats.arenas[mib[2]].hstats[mib[4]].ndalloc, uint64_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_hchunks_j_nrequests,
-    ctl_stats.arenas[mib[2]].hstats[mib[4]].nrequests, uint64_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_hchunks_j_curhchunks,
-    ctl_stats.arenas[mib[2]].hstats[mib[4]].curhchunks, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_nmalloc,
+    ctl_stats.arenas[mib[2]].lstats[mib[4]].nmalloc, uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_ndalloc,
+    ctl_stats.arenas[mib[2]].lstats[mib[4]].ndalloc, uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_nrequests,
+    ctl_stats.arenas[mib[2]].lstats[mib[4]].nrequests, uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_curlextents,
+    ctl_stats.arenas[mib[2]].lstats[mib[4]].curlextents, size_t)
 
 static const ctl_named_node_t *
-stats_arenas_i_hchunks_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
+stats_arenas_i_lextents_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
     size_t j)
 {
 
 	if (j > NSIZES - NBINS)
 		return (NULL);
-	return (super_stats_arenas_i_hchunks_j_node);
+	return (super_stats_arenas_i_lextents_j_node);
 }
 
 static const ctl_named_node_t *
diff --git a/src/extent.c b/src/extent.c
index 757a6e21..2f929a83 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -40,7 +40,7 @@ extent_size_quantize_floor(size_t size)
 	pszind_t pind;
 
 	assert(size > 0);
-	assert(size - large_pad <= HUGE_MAXCLASS);
+	assert(size - large_pad <= LARGE_MAXCLASS);
 	assert((size & PAGE_MASK) == 0);
 
 	assert(size != 0);
@@ -77,7 +77,7 @@ extent_size_quantize_ceil(size_t size)
 	size_t ret;
 
 	assert(size > 0);
-	assert(size - large_pad <= HUGE_MAXCLASS);
+	assert(size - large_pad <= LARGE_MAXCLASS);
 	assert((size & PAGE_MASK) == 0);
 
 	ret = extent_size_quantize_floor(size);
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 429667f6..85a592e9 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1457,7 +1457,7 @@ ialloc_body(size_t size, bool zero, tsdn_t **tsdn, size_t *usize,
 
 	if (config_stats || (config_prof && opt_prof)) {
 		*usize = index2size(ind);
-		assert(*usize > 0 && *usize <= HUGE_MAXCLASS);
+		assert(*usize > 0 && *usize <= LARGE_MAXCLASS);
 	}
 
 	if (config_prof && opt_prof)
@@ -1589,7 +1589,7 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment)
 	}
 
 	usize = sa2u(size, alignment);
-	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS)) {
+	if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
 		result = NULL;
 		goto label_oom;
 	}
@@ -1663,7 +1663,7 @@ je_calloc(size_t num, size_t size)
 		if (num == 0 || size == 0)
 			num_size = 1;
 		else
-			num_size = HUGE_MAXCLASS + 1; /* Trigger OOM. */
+			num_size = LARGE_MAXCLASS + 1; /* Trigger OOM. */
 	/*
 	 * Try to avoid division here.  We know that it isn't possible to
 	 * overflow during multiplication if neither operand uses any of the
@@ -1671,7 +1671,7 @@ je_calloc(size_t num, size_t size)
 	 */
 	} else if (unlikely(((num | size) & (SIZE_T_MAX << (sizeof(size_t) <<
 	    2))) && (num_size / size != num)))
-		num_size = HUGE_MAXCLASS + 1; /* size_t overflow. */
+		num_size = LARGE_MAXCLASS + 1; /* size_t overflow. */
 
 	if (likely(!malloc_slow)) {
 		ret = ialloc_body(num_size, true, &tsdn, &usize, false);
@@ -1819,7 +1819,7 @@ je_realloc(void *ptr, size_t size)
 		old_usize = isalloc(tsd_tsdn(tsd), extent, ptr);
 		if (config_prof && opt_prof) {
 			usize = s2u(size);
-			ret = unlikely(usize == 0 || usize > HUGE_MAXCLASS) ?
+			ret = unlikely(usize == 0 || usize > LARGE_MAXCLASS) ?
 			    NULL : irealloc_prof(tsd, extent, ptr, old_usize,
 			    usize);
 		} else {
@@ -1956,7 +1956,7 @@ imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize,
 		*alignment = MALLOCX_ALIGN_GET_SPECIFIED(flags);
 		*usize = sa2u(size, *alignment);
 	}
-	if (unlikely(*usize == 0 || *usize > HUGE_MAXCLASS))
+	if (unlikely(*usize == 0 || *usize > LARGE_MAXCLASS))
 		return (true);
 	*zero = MALLOCX_ZERO_GET(flags);
 	if ((flags & MALLOCX_TCACHE_MASK) != 0) {
@@ -2084,7 +2084,7 @@ imallocx_body(size_t size, int flags, tsdn_t **tsdn, size_t *usize,
 			return (NULL);
 		if (config_stats || (config_prof && opt_prof)) {
 			*usize = index2size(ind);
-			assert(*usize > 0 && *usize <= HUGE_MAXCLASS);
+			assert(*usize > 0 && *usize <= LARGE_MAXCLASS);
 		}
 
 		if (config_prof && opt_prof) {
@@ -2233,7 +2233,7 @@ je_rallocx(void *ptr, size_t size, int flags)
 
 	if (config_prof && opt_prof) {
 		usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment);
-		if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
+		if (unlikely(usize == 0 || usize > LARGE_MAXCLASS))
 			goto label_oom;
 		p = irallocx_prof(tsd, extent, ptr, old_usize, size, alignment,
 		    &usize, zero, tcache, arena);
@@ -2314,17 +2314,17 @@ ixallocx_prof(tsd_t *tsd, extent_t *extent, void *ptr, size_t old_usize,
 	 */
 	if (alignment == 0) {
 		usize_max = s2u(size+extra);
-		assert(usize_max > 0 && usize_max <= HUGE_MAXCLASS);
+		assert(usize_max > 0 && usize_max <= LARGE_MAXCLASS);
 	} else {
 		usize_max = sa2u(size+extra, alignment);
-		if (unlikely(usize_max == 0 || usize_max > HUGE_MAXCLASS)) {
+		if (unlikely(usize_max == 0 || usize_max > LARGE_MAXCLASS)) {
 			/*
 			 * usize_max is out of range, and chances are that
 			 * allocation will fail, but use the maximum possible
 			 * value and carry on with prof_alloc_prep(), just in
 			 * case allocation succeeds.
 			 */
-			usize_max = HUGE_MAXCLASS;
+			usize_max = LARGE_MAXCLASS;
 		}
 	}
 	tctx = prof_alloc_prep(tsd, usize_max, prof_active, false);
@@ -2368,18 +2368,18 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
 	/*
 	 * The API explicitly absolves itself of protecting against (size +
 	 * extra) numerical overflow, but we may need to clamp extra to avoid
-	 * exceeding HUGE_MAXCLASS.
+	 * exceeding LARGE_MAXCLASS.
 	 *
 	 * Ordinarily, size limit checking is handled deeper down, but here we
 	 * have to check as part of (size + extra) clamping, since we need the
 	 * clamped value in the above helper functions.
 	 */
-	if (unlikely(size > HUGE_MAXCLASS)) {
+	if (unlikely(size > LARGE_MAXCLASS)) {
 		usize = old_usize;
 		goto label_not_resized;
 	}
-	if (unlikely(HUGE_MAXCLASS - size < extra))
-		extra = HUGE_MAXCLASS - size;
+	if (unlikely(LARGE_MAXCLASS - size < extra))
+		extra = LARGE_MAXCLASS - size;
 
 	if (config_prof && opt_prof) {
 		usize = ixallocx_prof(tsd, extent, ptr, old_usize, size, extra,
@@ -2512,7 +2512,7 @@ je_nallocx(size_t size, int flags)
 	witness_assert_lockless(tsdn);
 
 	usize = inallocx(tsdn, size, flags);
-	if (unlikely(usize > HUGE_MAXCLASS))
+	if (unlikely(usize > LARGE_MAXCLASS))
 		return (0);
 
 	witness_assert_lockless(tsdn);
diff --git a/src/huge.c b/src/large.c
similarity index 66%
rename from src/huge.c
rename to src/large.c
index 8aa3dfd2..43bfb284 100644
--- a/src/huge.c
+++ b/src/large.c
@@ -1,19 +1,19 @@
-#define	JEMALLOC_HUGE_C_
+#define	JEMALLOC_LARGE_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 
 /******************************************************************************/
 
 void *
-huge_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero)
+large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero)
 {
 
 	assert(usize == s2u(usize));
 
-	return (huge_palloc(tsdn, arena, usize, CACHELINE, zero));
+	return (large_palloc(tsdn, arena, usize, CACHELINE, zero));
 }
 
 void *
-huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
+large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
     bool zero)
 {
 	size_t ausize;
@@ -24,7 +24,7 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	assert(!tsdn_null(tsdn) || arena != NULL);
 
 	ausize = sa2u(usize, alignment);
-	if (unlikely(ausize == 0 || ausize > HUGE_MAXCLASS))
+	if (unlikely(ausize == 0 || ausize > LARGE_MAXCLASS))
 		return (NULL);
 
 	/*
@@ -34,15 +34,15 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	is_zeroed = zero;
 	if (likely(!tsdn_null(tsdn)))
 		arena = arena_choose(tsdn_tsd(tsdn), arena);
-	if (unlikely(arena == NULL) || (extent = arena_chunk_alloc_huge(tsdn,
+	if (unlikely(arena == NULL) || (extent = arena_chunk_alloc_large(tsdn,
 	    arena, usize, alignment, &is_zeroed)) == NULL)
 		return (NULL);
 
-	/* Insert extent into huge. */
-	malloc_mutex_lock(tsdn, &arena->huge_mtx);
+	/* Insert extent into large. */
+	malloc_mutex_lock(tsdn, &arena->large_mtx);
 	ql_elm_new(extent, ql_link);
-	ql_tail_insert(&arena->huge, extent, ql_link);
-	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
+	ql_tail_insert(&arena->large, extent, ql_link);
+	malloc_mutex_unlock(tsdn, &arena->large_mtx);
 	if (config_prof && arena_prof_accum(tsdn, arena, usize))
 		prof_idump(tsdn);
 
@@ -61,23 +61,23 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 }
 
 #ifdef JEMALLOC_JET
-#undef huge_dalloc_junk
-#define	huge_dalloc_junk JEMALLOC_N(n_huge_dalloc_junk)
+#undef large_dalloc_junk
+#define	large_dalloc_junk JEMALLOC_N(n_large_dalloc_junk)
 #endif
 void
-huge_dalloc_junk(void *ptr, size_t usize)
+large_dalloc_junk(void *ptr, size_t usize)
 {
 
 	memset(ptr, JEMALLOC_FREE_JUNK, usize);
 }
 #ifdef JEMALLOC_JET
-#undef huge_dalloc_junk
-#define	huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk)
-huge_dalloc_junk_t *huge_dalloc_junk = JEMALLOC_N(n_huge_dalloc_junk);
+#undef large_dalloc_junk
+#define	large_dalloc_junk JEMALLOC_N(large_dalloc_junk)
+large_dalloc_junk_t *large_dalloc_junk = JEMALLOC_N(n_large_dalloc_junk);
 #endif
 
 static void
-huge_dalloc_maybe_junk(tsdn_t *tsdn, void *ptr, size_t usize)
+large_dalloc_maybe_junk(tsdn_t *tsdn, void *ptr, size_t usize)
 {
 
 	if (config_fill && have_dss && unlikely(opt_junk_free)) {
@@ -86,13 +86,13 @@ huge_dalloc_maybe_junk(tsdn_t *tsdn, void *ptr, size_t usize)
 		 * unmapped.
 		 */
 		if (!config_munmap || (have_dss && chunk_in_dss(tsdn, ptr)))
-			huge_dalloc_junk(ptr, usize);
+			large_dalloc_junk(ptr, usize);
 			memset(ptr, JEMALLOC_FREE_JUNK, usize);
 	}
 }
 
 static bool
-huge_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize)
+large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize)
 {
 	arena_t *arena = extent_arena_get(extent);
 	size_t oldusize = extent_usize_get(extent);
@@ -109,20 +109,20 @@ huge_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize)
 			return (true);
 
 		if (config_fill && unlikely(opt_junk_free)) {
-			huge_dalloc_maybe_junk(tsdn, extent_addr_get(trail),
+			large_dalloc_maybe_junk(tsdn, extent_addr_get(trail),
 			    extent_usize_get(trail));
 		}
 
 		arena_chunk_cache_dalloc(tsdn, arena, &chunk_hooks, trail);
 	}
 
-	arena_chunk_ralloc_huge_shrink(tsdn, arena, extent, oldusize);
+	arena_chunk_ralloc_large_shrink(tsdn, arena, extent, oldusize);
 
 	return (false);
 }
 
 static bool
-huge_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
+large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
     bool zero)
 {
 	arena_t *arena = extent_arena_get(extent);
@@ -173,34 +173,35 @@ huge_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 		    JEMALLOC_ALLOC_JUNK, usize - oldusize);
 	}
 
-	arena_chunk_ralloc_huge_expand(tsdn, arena, extent, oldusize);
+	arena_chunk_ralloc_large_expand(tsdn, arena, extent, oldusize);
 
 	return (false);
 }
 
 bool
-huge_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
+large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
     size_t usize_max, bool zero)
 {
 
 	assert(s2u(extent_usize_get(extent)) == extent_usize_get(extent));
 	/* The following should have been caught by callers. */
-	assert(usize_min > 0 && usize_max <= HUGE_MAXCLASS);
-	/* Both allocation sizes must be huge to avoid a move. */
+	assert(usize_min > 0 && usize_max <= LARGE_MAXCLASS);
+	/* Both allocation sizes must be large to avoid a move. */
 	assert(extent_usize_get(extent) >= LARGE_MINCLASS && usize_max >=
 	    LARGE_MINCLASS);
 
 	if (usize_max > extent_usize_get(extent)) {
 		/* Attempt to expand the allocation in-place. */
-		if (!huge_ralloc_no_move_expand(tsdn, extent, usize_max,
+		if (!large_ralloc_no_move_expand(tsdn, extent, usize_max,
 		    zero)) {
 			arena_decay_tick(tsdn, extent_arena_get(extent));
 			return (false);
 		}
 		/* Try again, this time with usize_min. */
 		if (usize_min < usize_max && usize_min >
-		    extent_usize_get(extent) && huge_ralloc_no_move_expand(tsdn,
-		    extent, usize_min, zero)) {
+		    extent_usize_get(extent) &&
+		    large_ralloc_no_move_expand(tsdn, extent, usize_min,
+		    zero)) {
 			arena_decay_tick(tsdn, extent_arena_get(extent));
 			return (false);
 		}
@@ -218,7 +219,7 @@ huge_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
 
 	/* Attempt to shrink the allocation in-place. */
 	if (extent_usize_get(extent) > usize_max) {
-		if (!huge_ralloc_no_move_shrink(tsdn, extent, usize_max)) {
+		if (!large_ralloc_no_move_shrink(tsdn, extent, usize_max)) {
 			arena_decay_tick(tsdn, extent_arena_get(extent));
 			return (false);
 		}
@@ -227,30 +228,30 @@ huge_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
 }
 
 static void *
-huge_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
+large_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool zero)
 {
 
 	if (alignment <= CACHELINE)
-		return (huge_malloc(tsdn, arena, usize, zero));
-	return (huge_palloc(tsdn, arena, usize, alignment, zero));
+		return (large_malloc(tsdn, arena, usize, zero));
+	return (large_palloc(tsdn, arena, usize, alignment, zero));
 }
 
 void *
-huge_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
+large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache)
 {
 	void *ret;
 	size_t copysize;
 
 	/* The following should have been caught by callers. */
-	assert(usize > 0 && usize <= HUGE_MAXCLASS);
-	/* Both allocation sizes must be huge to avoid a move. */
+	assert(usize > 0 && usize <= LARGE_MAXCLASS);
+	/* Both allocation sizes must be large to avoid a move. */
 	assert(extent_usize_get(extent) >= LARGE_MINCLASS && usize >=
 	    LARGE_MINCLASS);
 
 	/* Try to avoid moving the allocation. */
-	if (!huge_ralloc_no_move(tsdn, extent, usize, usize, zero))
+	if (!large_ralloc_no_move(tsdn, extent, usize, usize, zero))
 		return (extent_addr_get(extent));
 
 	/*
@@ -258,7 +259,7 @@ huge_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
 	 * different size class.  In that case, fall back to allocating new
 	 * space and copying.
 	 */
-	ret = huge_ralloc_move_helper(tsdn, arena, usize, alignment, zero);
+	ret = large_ralloc_move_helper(tsdn, arena, usize, alignment, zero);
 	if (ret == NULL)
 		return (NULL);
 
@@ -271,82 +272,82 @@ huge_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
 }
 
 static void
-huge_dalloc_impl(tsdn_t *tsdn, extent_t *extent, bool junked_locked)
+large_dalloc_impl(tsdn_t *tsdn, extent_t *extent, bool junked_locked)
 {
 	arena_t *arena;
 
 	arena = extent_arena_get(extent);
 	if (!junked_locked)
-		malloc_mutex_lock(tsdn, &arena->huge_mtx);
-	ql_remove(&arena->huge, extent, ql_link);
+		malloc_mutex_lock(tsdn, &arena->large_mtx);
+	ql_remove(&arena->large, extent, ql_link);
 	if (!junked_locked) {
-		malloc_mutex_unlock(tsdn, &arena->huge_mtx);
+		malloc_mutex_unlock(tsdn, &arena->large_mtx);
 
-		huge_dalloc_maybe_junk(tsdn, extent_addr_get(extent),
+		large_dalloc_maybe_junk(tsdn, extent_addr_get(extent),
 		    extent_usize_get(extent));
 	}
-	arena_chunk_dalloc_huge(tsdn, arena, extent, junked_locked);
+	arena_chunk_dalloc_large(tsdn, arena, extent, junked_locked);
 
 	if (!junked_locked)
 		arena_decay_tick(tsdn, arena);
 }
 
 void
-huge_dalloc_junked_locked(tsdn_t *tsdn, extent_t *extent)
+large_dalloc_junked_locked(tsdn_t *tsdn, extent_t *extent)
 {
 
-	huge_dalloc_impl(tsdn, extent, true);
+	large_dalloc_impl(tsdn, extent, true);
 }
 
 void
-huge_dalloc(tsdn_t *tsdn, extent_t *extent)
+large_dalloc(tsdn_t *tsdn, extent_t *extent)
 {
 
-	huge_dalloc_impl(tsdn, extent, false);
+	large_dalloc_impl(tsdn, extent, false);
 }
 
 size_t
-huge_salloc(tsdn_t *tsdn, const extent_t *extent)
+large_salloc(tsdn_t *tsdn, const extent_t *extent)
 {
 	size_t usize;
 	arena_t *arena;
 
 	arena = extent_arena_get(extent);
-	malloc_mutex_lock(tsdn, &arena->huge_mtx);
+	malloc_mutex_lock(tsdn, &arena->large_mtx);
 	usize = extent_usize_get(extent);
-	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
+	malloc_mutex_unlock(tsdn, &arena->large_mtx);
 
 	return (usize);
 }
 
 prof_tctx_t *
-huge_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent)
+large_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent)
 {
 	prof_tctx_t *tctx;
 	arena_t *arena;
 
 	arena = extent_arena_get(extent);
-	malloc_mutex_lock(tsdn, &arena->huge_mtx);
+	malloc_mutex_lock(tsdn, &arena->large_mtx);
 	tctx = extent_prof_tctx_get(extent);
-	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
+	malloc_mutex_unlock(tsdn, &arena->large_mtx);
 
 	return (tctx);
 }
 
 void
-huge_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, prof_tctx_t *tctx)
+large_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, prof_tctx_t *tctx)
 {
 	arena_t *arena;
 
 	arena = extent_arena_get(extent);
-	malloc_mutex_lock(tsdn, &arena->huge_mtx);
+	malloc_mutex_lock(tsdn, &arena->large_mtx);
 	extent_prof_tctx_set(extent, tctx);
-	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
+	malloc_mutex_unlock(tsdn, &arena->large_mtx);
 }
 
 void
-huge_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent)
+large_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent)
 {
 
-	huge_prof_tctx_set(tsdn, extent, (prof_tctx_t *)(uintptr_t)1U);
+	large_prof_tctx_set(tsdn, extent, (prof_tctx_t *)(uintptr_t)1U);
 }
diff --git a/src/stats.c b/src/stats.c
index 599e377d..493e409a 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -37,10 +37,10 @@ size_t	stats_cactive = 0;
 
 static void	stats_arena_bins_print(void (*write_cb)(void *, const char *),
     void *cbopaque, unsigned i);
-static void	stats_arena_hchunks_print(
+static void	stats_arena_lextents_print(
     void (*write_cb)(void *, const char *), void *cbopaque, unsigned i);
 static void	stats_arena_print(void (*write_cb)(void *, const char *),
-    void *cbopaque, unsigned i, bool bins, bool huge);
+    void *cbopaque, unsigned i, bool bins, bool large);
 
 /******************************************************************************/
 
@@ -157,34 +157,34 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 }
 
 static void
-stats_arena_hchunks_print(void (*write_cb)(void *, const char *),
+stats_arena_lextents_print(void (*write_cb)(void *, const char *),
     void *cbopaque, unsigned i)
 {
-	unsigned nbins, nhchunks, j;
+	unsigned nbins, nlextents, j;
 	bool in_gap;
 
 	malloc_cprintf(write_cb, cbopaque,
-	    "huge:           size ind    allocated      nmalloc      ndalloc"
-	    "    nrequests   curhchunks\n");
+	    "large:          size ind    allocated      nmalloc      ndalloc"
+	    "    nrequests  curlextents\n");
 	CTL_GET("arenas.nbins", &nbins, unsigned);
-	CTL_GET("arenas.nhchunks", &nhchunks, unsigned);
-	for (j = 0, in_gap = false; j < nhchunks; j++) {
+	CTL_GET("arenas.nlextents", &nlextents, unsigned);
+	for (j = 0, in_gap = false; j < nlextents; j++) {
 		uint64_t nmalloc, ndalloc, nrequests;
-		size_t hchunk_size, curhchunks;
+		size_t lextent_size, curlextents;
 
-		CTL_M2_M4_GET("stats.arenas.0.hchunks.0.nmalloc", i, j,
+		CTL_M2_M4_GET("stats.arenas.0.lextents.0.nmalloc", i, j,
 		    &nmalloc, uint64_t);
-		CTL_M2_M4_GET("stats.arenas.0.hchunks.0.ndalloc", i, j,
+		CTL_M2_M4_GET("stats.arenas.0.lextents.0.ndalloc", i, j,
 		    &ndalloc, uint64_t);
-		CTL_M2_M4_GET("stats.arenas.0.hchunks.0.nrequests", i, j,
+		CTL_M2_M4_GET("stats.arenas.0.lextents.0.nrequests", i, j,
 		    &nrequests, uint64_t);
 		if (nrequests == 0)
 			in_gap = true;
 		else {
-			CTL_M2_GET("arenas.hchunk.0.size", j, &hchunk_size,
+			CTL_M2_GET("arenas.lextent.0.size", j, &lextent_size,
 			    size_t);
-			CTL_M2_M4_GET("stats.arenas.0.hchunks.0.curhchunks", i,
-			    j, &curhchunks, size_t);
+			CTL_M2_M4_GET("stats.arenas.0.lextents.0.curlextents",
+			    i, j, &curlextents, size_t);
 			if (in_gap) {
 				malloc_cprintf(write_cb, cbopaque,
 				    "                     ---\n");
@@ -193,9 +193,9 @@ stats_arena_hchunks_print(void (*write_cb)(void *, const char *),
 			malloc_cprintf(write_cb, cbopaque,
 			    "%20zu %3u %12zu %12"FMTu64" %12"FMTu64
 			    " %12"FMTu64" %12zu\n",
-			    hchunk_size, nbins + j,
-			    curhchunks * hchunk_size, nmalloc, ndalloc,
-			    nrequests, curhchunks);
+			    lextent_size, nbins + j,
+			    curlextents * lextent_size, nmalloc, ndalloc,
+			    nrequests, curlextents);
 		}
 	}
 	if (in_gap) {
@@ -206,7 +206,7 @@ stats_arena_hchunks_print(void (*write_cb)(void *, const char *),
 
 static void
 stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
-    unsigned i, bool bins, bool huge)
+    unsigned i, bool bins, bool large)
 {
 	unsigned nthreads;
 	const char *dss;
@@ -216,8 +216,8 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	uint64_t npurge, nmadvise, purged;
 	size_t small_allocated;
 	uint64_t small_nmalloc, small_ndalloc, small_nrequests;
-	size_t huge_allocated;
-	uint64_t huge_nmalloc, huge_ndalloc, huge_nrequests;
+	size_t large_allocated;
+	uint64_t large_nmalloc, large_ndalloc, large_nrequests;
 
 	CTL_GET("arenas.page", &page, size_t);
 
@@ -268,20 +268,21 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	    "small:                   %12zu %12"FMTu64" %12"FMTu64
 	    " %12"FMTu64"\n",
 	    small_allocated, small_nmalloc, small_ndalloc, small_nrequests);
-	CTL_M2_GET("stats.arenas.0.huge.allocated", i, &huge_allocated, size_t);
-	CTL_M2_GET("stats.arenas.0.huge.nmalloc", i, &huge_nmalloc, uint64_t);
-	CTL_M2_GET("stats.arenas.0.huge.ndalloc", i, &huge_ndalloc, uint64_t);
-	CTL_M2_GET("stats.arenas.0.huge.nrequests", i, &huge_nrequests,
+	CTL_M2_GET("stats.arenas.0.large.allocated", i, &large_allocated,
+	    size_t);
+	CTL_M2_GET("stats.arenas.0.large.nmalloc", i, &large_nmalloc, uint64_t);
+	CTL_M2_GET("stats.arenas.0.large.ndalloc", i, &large_ndalloc, uint64_t);
+	CTL_M2_GET("stats.arenas.0.large.nrequests", i, &large_nrequests,
 	    uint64_t);
 	malloc_cprintf(write_cb, cbopaque,
-	    "huge:                    %12zu %12"FMTu64" %12"FMTu64
+	    "large:                   %12zu %12"FMTu64" %12"FMTu64
 	    " %12"FMTu64"\n",
-	    huge_allocated, huge_nmalloc, huge_ndalloc, huge_nrequests);
+	    large_allocated, large_nmalloc, large_ndalloc, large_nrequests);
 	malloc_cprintf(write_cb, cbopaque,
 	    "total:                   %12zu %12"FMTu64" %12"FMTu64
 	    " %12"FMTu64"\n",
-	    small_allocated + huge_allocated, small_nmalloc + huge_nmalloc,
-	    small_ndalloc + huge_ndalloc, small_nrequests + huge_nrequests);
+	    small_allocated + large_allocated, small_nmalloc + large_nmalloc,
+	    small_ndalloc + large_ndalloc, small_nrequests + large_nrequests);
 	malloc_cprintf(write_cb, cbopaque,
 	    "active:                  %12zu\n", pactive * page);
 	CTL_M2_GET("stats.arenas.0.mapped", i, &mapped, size_t);
@@ -300,8 +301,8 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 
 	if (bins)
 		stats_arena_bins_print(write_cb, cbopaque, i);
-	if (huge)
-		stats_arena_hchunks_print(write_cb, cbopaque, i);
+	if (large)
+		stats_arena_lextents_print(write_cb, cbopaque, i);
 }
 
 void
@@ -315,7 +316,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	bool merged = true;
 	bool unmerged = true;
 	bool bins = true;
-	bool huge = true;
+	bool large = true;
 
 	/*
 	 * Refresh stats, in case mallctl() was called by the application.
@@ -356,7 +357,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 				bins = false;
 				break;
 			case 'l':
-				huge = false;
+				large = false;
 				break;
 			default:;
 			}
@@ -568,7 +569,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 					malloc_cprintf(write_cb, cbopaque,
 					    "\nMerged arenas stats:\n");
 					stats_arena_print(write_cb, cbopaque,
-					    narenas, bins, huge);
+					    narenas, bins, large);
 				}
 			}
 		}
@@ -594,7 +595,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 						    cbopaque,
 						    "\narenas[%u]:\n", i);
 						stats_arena_print(write_cb,
-						    cbopaque, i, bins, huge);
+						    cbopaque, i, bins, large);
 					}
 				}
 			}
diff --git a/src/tcache.c b/src/tcache.c
index 02015227..69444fac 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -46,7 +46,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache)
 			    tbin->ncached - tbin->low_water + (tbin->low_water
 			    >> 2));
 		} else {
-			tcache_bin_flush_huge(tsd, tbin, binind, tbin->ncached
+			tcache_bin_flush_large(tsd, tbin, binind, tbin->ncached
 			    - tbin->low_water + (tbin->low_water >> 2), tcache);
 		}
 		/*
@@ -164,7 +164,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 }
 
 void
-tcache_bin_flush_huge(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
+tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
     unsigned rem, tcache_t *tcache)
 {
 	arena_t *arena;
@@ -194,9 +194,9 @@ tcache_bin_flush_huge(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 			}
 			if (config_stats) {
 				merged_stats = true;
-				arena->stats.nrequests_huge +=
+				arena->stats.nrequests_large +=
 				    tbin->tstats.nrequests;
-				arena->stats.hstats[binind - NBINS].nrequests +=
+				arena->stats.lstats[binind - NBINS].nrequests +=
 				    tbin->tstats.nrequests;
 				tbin->tstats.nrequests = 0;
 			}
@@ -207,7 +207,7 @@ tcache_bin_flush_huge(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 			assert(ptr != NULL);
 			extent = iealloc(tsd_tsdn(tsd), ptr);
 			if (extent_arena_get(extent) == locked_arena) {
-				huge_dalloc_junked_locked(tsd_tsdn(tsd),
+				large_dalloc_junked_locked(tsd_tsdn(tsd),
 				    extent);
 			} else {
 				/*
@@ -232,8 +232,8 @@ tcache_bin_flush_huge(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 		 * arena, so the stats didn't get merged.  Manually do so now.
 		 */
 		malloc_mutex_lock(tsd_tsdn(tsd), &arena->lock);
-		arena->stats.nrequests_huge += tbin->tstats.nrequests;
-		arena->stats.hstats[binind - NBINS].nrequests +=
+		arena->stats.nrequests_large += tbin->tstats.nrequests;
+		arena->stats.lstats[binind - NBINS].nrequests +=
 		    tbin->tstats.nrequests;
 		tbin->tstats.nrequests = 0;
 		malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock);
@@ -371,12 +371,12 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache)
 
 	for (; i < nhbins; i++) {
 		tcache_bin_t *tbin = &tcache->tbins[i];
-		tcache_bin_flush_huge(tsd, tbin, i, 0, tcache);
+		tcache_bin_flush_large(tsd, tbin, i, 0, tcache);
 
 		if (config_stats && tbin->tstats.nrequests != 0) {
 			malloc_mutex_lock(tsd_tsdn(tsd), &arena->lock);
-			arena->stats.nrequests_huge += tbin->tstats.nrequests;
-			arena->stats.hstats[i - NBINS].nrequests +=
+			arena->stats.nrequests_large += tbin->tstats.nrequests;
+			arena->stats.lstats[i - NBINS].nrequests +=
 			    tbin->tstats.nrequests;
 			malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock);
 		}
@@ -431,10 +431,10 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
 	}
 
 	for (; i < nhbins; i++) {
-		malloc_huge_stats_t *hstats = &arena->stats.hstats[i - NBINS];
+		malloc_large_stats_t *lstats = &arena->stats.lstats[i - NBINS];
 		tcache_bin_t *tbin = &tcache->tbins[i];
-		arena->stats.nrequests_huge += tbin->tstats.nrequests;
-		hstats->nrequests += tbin->tstats.nrequests;
+		arena->stats.nrequests_large += tbin->tstats.nrequests;
+		lstats->nrequests += tbin->tstats.nrequests;
 		tbin->tstats.nrequests = 0;
 	}
 }
@@ -537,7 +537,7 @@ tcache_boot(tsdn_t *tsdn)
 		stack_nelms += tcache_bin_info[i].ncached_max;
 	}
 	for (; i < nhbins; i++) {
-		tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_HUGE;
+		tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE;
 		stack_nelms += tcache_bin_info[i].ncached_max;
 	}
 
diff --git a/test/integration/chunk.c b/test/integration/chunk.c
index 3aad7a8a..ca87e80f 100644
--- a/test/integration/chunk.c
+++ b/test/integration/chunk.c
@@ -120,7 +120,7 @@ chunk_merge(void *chunk_a, size_t size_a, void *chunk_b, size_t size_b,
 TEST_BEGIN(test_chunk)
 {
 	void *p;
-	size_t old_size, new_size, huge0, huge1, huge2, sz;
+	size_t old_size, new_size, large0, large1, large2, sz;
 	unsigned arena_ind;
 	int flags;
 	size_t hooks_mib[3], purge_mib[3];
@@ -162,14 +162,14 @@ TEST_BEGIN(test_chunk)
 	assert_ptr_ne(old_hooks.split, chunk_split, "Unexpected split error");
 	assert_ptr_ne(old_hooks.merge, chunk_merge, "Unexpected merge error");
 
-	/* Get huge size classes. */
+	/* Get large size classes. */
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("arenas.hchunk.0.size", &huge0, &sz, NULL, 0), 0,
-	    "Unexpected arenas.hchunk.0.size failure");
-	assert_d_eq(mallctl("arenas.hchunk.1.size", &huge1, &sz, NULL, 0), 0,
-	    "Unexpected arenas.hchunk.1.size failure");
-	assert_d_eq(mallctl("arenas.hchunk.2.size", &huge2, &sz, NULL, 0), 0,
-	    "Unexpected arenas.hchunk.2.size failure");
+	assert_d_eq(mallctl("arenas.lextent.0.size", &large0, &sz, NULL, 0), 0,
+	    "Unexpected arenas.lextent.0.size failure");
+	assert_d_eq(mallctl("arenas.lextent.1.size", &large1, &sz, NULL, 0), 0,
+	    "Unexpected arenas.lextent.1.size failure");
+	assert_d_eq(mallctl("arenas.lextent.2.size", &large2, &sz, NULL, 0), 0,
+	    "Unexpected arenas.lextent.2.size failure");
 
 	/* Test dalloc/decommit/purge cascade. */
 	purge_miblen = sizeof(purge_mib)/sizeof(size_t);
@@ -178,13 +178,13 @@ TEST_BEGIN(test_chunk)
 	purge_mib[1] = (size_t)arena_ind;
 	do_dalloc = false;
 	do_decommit = false;
-	p = mallocx(huge0 * 2, flags);
+	p = mallocx(large0 * 2, flags);
 	assert_ptr_not_null(p, "Unexpected mallocx() error");
 	did_dalloc = false;
 	did_decommit = false;
 	did_purge = false;
 	did_split = false;
-	xallocx_success_a = (xallocx(p, huge0, 0, flags) == huge0);
+	xallocx_success_a = (xallocx(p, large0, 0, flags) == large0);
 	assert_d_eq(mallctlbymib(purge_mib, purge_miblen, NULL, NULL, NULL, 0),
 	    0, "Unexpected arena.%u.purge error", arena_ind);
 	if (xallocx_success_a) {
@@ -199,18 +199,18 @@ TEST_BEGIN(test_chunk)
 	/* Test decommit/commit and observe split/merge. */
 	do_dalloc = false;
 	do_decommit = true;
-	p = mallocx(huge0 * 2, flags);
+	p = mallocx(large0 * 2, flags);
 	assert_ptr_not_null(p, "Unexpected mallocx() error");
 	did_decommit = false;
 	did_commit = false;
 	did_split = false;
 	did_merge = false;
-	xallocx_success_b = (xallocx(p, huge0, 0, flags) == huge0);
+	xallocx_success_b = (xallocx(p, large0, 0, flags) == large0);
 	assert_d_eq(mallctlbymib(purge_mib, purge_miblen, NULL, NULL, NULL, 0),
 	    0, "Unexpected arena.%u.purge error", arena_ind);
 	if (xallocx_success_b)
 		assert_true(did_split, "Expected split");
-	xallocx_success_c = (xallocx(p, huge0 * 2, 0, flags) == huge0 * 2);
+	xallocx_success_c = (xallocx(p, large0 * 2, 0, flags) == large0 * 2);
 	assert_b_eq(did_decommit, did_commit, "Expected decommit/commit match");
 	if (xallocx_success_b && xallocx_success_c)
 		assert_true(did_merge, "Expected merge");
@@ -218,7 +218,7 @@ TEST_BEGIN(test_chunk)
 	do_dalloc = true;
 	do_decommit = false;
 
-	/* Make sure non-huge allocation succeeds. */
+	/* Make sure non-large allocation succeeds. */
 	p = mallocx(42, flags);
 	assert_ptr_not_null(p, "Unexpected mallocx() error");
 	dallocx(p, flags);
diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index 55e1a090..9d623eb7 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -18,10 +18,10 @@ get_nsizes_impl(const char *cmd)
 }
 
 static unsigned
-get_nhuge(void)
+get_nlarge(void)
 {
 
-	return (get_nsizes_impl("arenas.nhchunks"));
+	return (get_nsizes_impl("arenas.nlextents"));
 }
 
 static size_t
@@ -44,20 +44,20 @@ get_size_impl(const char *cmd, size_t ind)
 }
 
 static size_t
-get_huge_size(size_t ind)
+get_large_size(size_t ind)
 {
 
-	return (get_size_impl("arenas.hchunk.0.size", ind));
+	return (get_size_impl("arenas.lextent.0.size", ind));
 }
 
 TEST_BEGIN(test_overflow)
 {
-	size_t hugemax;
+	size_t largemax;
 
-	hugemax = get_huge_size(get_nhuge()-1);
+	largemax = get_large_size(get_nlarge()-1);
 
-	assert_ptr_null(mallocx(hugemax+1, 0),
-	    "Expected OOM for mallocx(size=%#zx, 0)", hugemax+1);
+	assert_ptr_null(mallocx(largemax+1, 0),
+	    "Expected OOM for mallocx(size=%#zx, 0)", largemax+1);
 
 	assert_ptr_null(mallocx(ZU(PTRDIFF_MAX)+1, 0),
 	    "Expected OOM for mallocx(size=%#zx, 0)", ZU(PTRDIFF_MAX)+1);
@@ -73,7 +73,7 @@ TEST_END
 
 TEST_BEGIN(test_oom)
 {
-	size_t hugemax;
+	size_t largemax;
 	bool oom;
 	void *ptrs[3];
 	unsigned i;
@@ -82,16 +82,16 @@ TEST_BEGIN(test_oom)
 	 * It should be impossible to allocate three objects that each consume
 	 * nearly half the virtual address space.
 	 */
-	hugemax = get_huge_size(get_nhuge()-1);
+	largemax = get_large_size(get_nlarge()-1);
 	oom = false;
 	for (i = 0; i < sizeof(ptrs) / sizeof(void *); i++) {
-		ptrs[i] = mallocx(hugemax, 0);
+		ptrs[i] = mallocx(largemax, 0);
 		if (ptrs[i] == NULL)
 			oom = true;
 	}
 	assert_true(oom,
 	    "Expected OOM during series of calls to mallocx(size=%zu, 0)",
-	    hugemax);
+	    largemax);
 	for (i = 0; i < sizeof(ptrs) / sizeof(void *); i++) {
 		if (ptrs[i] != NULL)
 			dallocx(ptrs[i], 0);
diff --git a/test/integration/overflow.c b/test/integration/overflow.c
index 303d9b2d..8dea1c95 100644
--- a/test/integration/overflow.c
+++ b/test/integration/overflow.c
@@ -2,19 +2,19 @@
 
 TEST_BEGIN(test_overflow)
 {
-	unsigned nhchunks;
+	unsigned nlextents;
 	size_t mib[4];
 	size_t sz, miblen, max_size_class;
 	void *p;
 
 	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.nhchunks", &nhchunks, &sz, NULL, 0), 0,
+	assert_d_eq(mallctl("arenas.nlextents", &nlextents, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() error");
 
 	miblen = sizeof(mib) / sizeof(size_t);
-	assert_d_eq(mallctlnametomib("arenas.hchunk.0.size", mib, &miblen), 0,
+	assert_d_eq(mallctlnametomib("arenas.lextent.0.size", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() error");
-	mib[2] = nhchunks - 1;
+	mib[2] = nlextents - 1;
 
 	sz = sizeof(size_t);
 	assert_d_eq(mallctlbymib(mib, miblen, &max_size_class, &sz, NULL, 0), 0,
diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c
index 66ad8660..6278a490 100644
--- a/test/integration/rallocx.c
+++ b/test/integration/rallocx.c
@@ -14,10 +14,10 @@ get_nsizes_impl(const char *cmd)
 }
 
 static unsigned
-get_nhuge(void)
+get_nlarge(void)
 {
 
-	return (get_nsizes_impl("arenas.nhchunks"));
+	return (get_nsizes_impl("arenas.nlextents"));
 }
 
 static size_t
@@ -40,10 +40,10 @@ get_size_impl(const char *cmd, size_t ind)
 }
 
 static size_t
-get_huge_size(size_t ind)
+get_large_size(size_t ind)
 {
 
-	return (get_size_impl("arenas.hchunk.0.size", ind));
+	return (get_size_impl("arenas.lextent.0.size", ind));
 }
 
 TEST_BEGIN(test_grow_and_shrink)
@@ -221,16 +221,16 @@ TEST_END
 
 TEST_BEGIN(test_overflow)
 {
-	size_t hugemax;
+	size_t largemax;
 	void *p;
 
-	hugemax = get_huge_size(get_nhuge()-1);
+	largemax = get_large_size(get_nlarge()-1);
 
 	p = mallocx(1, 0);
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
 
-	assert_ptr_null(rallocx(p, hugemax+1, 0),
-	    "Expected OOM for rallocx(p, size=%#zx, 0)", hugemax+1);
+	assert_ptr_null(rallocx(p, largemax+1, 0),
+	    "Expected OOM for rallocx(p, size=%#zx, 0)", largemax+1);
 
 	assert_ptr_null(rallocx(p, ZU(PTRDIFF_MAX)+1, 0),
 	    "Expected OOM for rallocx(p, size=%#zx, 0)", ZU(PTRDIFF_MAX)+1);
diff --git a/test/integration/xallocx.c b/test/integration/xallocx.c
index 7af1b194..4ff099f8 100644
--- a/test/integration/xallocx.c
+++ b/test/integration/xallocx.c
@@ -92,10 +92,10 @@ get_nsmall(void)
 }
 
 static unsigned
-get_nhuge(void)
+get_nlarge(void)
 {
 
-	return (get_nsizes_impl("arenas.nhchunks"));
+	return (get_nsizes_impl("arenas.nlextents"));
 }
 
 static size_t
@@ -125,20 +125,20 @@ get_small_size(size_t ind)
 }
 
 static size_t
-get_huge_size(size_t ind)
+get_large_size(size_t ind)
 {
 
-	return (get_size_impl("arenas.hchunk.0.size", ind));
+	return (get_size_impl("arenas.lextent.0.size", ind));
 }
 
 TEST_BEGIN(test_size)
 {
-	size_t small0, hugemax;
+	size_t small0, largemax;
 	void *p;
 
 	/* Get size classes. */
 	small0 = get_small_size(0);
-	hugemax = get_huge_size(get_nhuge()-1);
+	largemax = get_large_size(get_nlarge()-1);
 
 	p = mallocx(small0, 0);
 	assert_ptr_not_null(p, "Unexpected mallocx() error");
@@ -148,13 +148,13 @@ TEST_BEGIN(test_size)
 	    "Unexpected xallocx() behavior");
 
 	/* Test largest supported size. */
-	assert_zu_le(xallocx(p, hugemax, 0, 0), hugemax,
+	assert_zu_le(xallocx(p, largemax, 0, 0), largemax,
 	    "Unexpected xallocx() behavior");
 
 	/* Test size overflow. */
-	assert_zu_le(xallocx(p, hugemax+1, 0, 0), hugemax,
+	assert_zu_le(xallocx(p, largemax+1, 0, 0), largemax,
 	    "Unexpected xallocx() behavior");
-	assert_zu_le(xallocx(p, SIZE_T_MAX, 0, 0), hugemax,
+	assert_zu_le(xallocx(p, SIZE_T_MAX, 0, 0), largemax,
 	    "Unexpected xallocx() behavior");
 
 	dallocx(p, 0);
@@ -163,30 +163,30 @@ TEST_END
 
 TEST_BEGIN(test_size_extra_overflow)
 {
-	size_t small0, hugemax;
+	size_t small0, largemax;
 	void *p;
 
 	/* Get size classes. */
 	small0 = get_small_size(0);
-	hugemax = get_huge_size(get_nhuge()-1);
+	largemax = get_large_size(get_nlarge()-1);
 
 	p = mallocx(small0, 0);
 	assert_ptr_not_null(p, "Unexpected mallocx() error");
 
 	/* Test overflows that can be resolved by clamping extra. */
-	assert_zu_le(xallocx(p, hugemax-1, 2, 0), hugemax,
+	assert_zu_le(xallocx(p, largemax-1, 2, 0), largemax,
 	    "Unexpected xallocx() behavior");
-	assert_zu_le(xallocx(p, hugemax, 1, 0), hugemax,
+	assert_zu_le(xallocx(p, largemax, 1, 0), largemax,
 	    "Unexpected xallocx() behavior");
 
-	/* Test overflow such that hugemax-size underflows. */
-	assert_zu_le(xallocx(p, hugemax+1, 2, 0), hugemax,
+	/* Test overflow such that largemax-size underflows. */
+	assert_zu_le(xallocx(p, largemax+1, 2, 0), largemax,
 	    "Unexpected xallocx() behavior");
-	assert_zu_le(xallocx(p, hugemax+2, 3, 0), hugemax,
+	assert_zu_le(xallocx(p, largemax+2, 3, 0), largemax,
 	    "Unexpected xallocx() behavior");
-	assert_zu_le(xallocx(p, SIZE_T_MAX-2, 2, 0), hugemax,
+	assert_zu_le(xallocx(p, SIZE_T_MAX-2, 2, 0), largemax,
 	    "Unexpected xallocx() behavior");
-	assert_zu_le(xallocx(p, SIZE_T_MAX-1, 1, 0), hugemax,
+	assert_zu_le(xallocx(p, SIZE_T_MAX-1, 1, 0), largemax,
 	    "Unexpected xallocx() behavior");
 
 	dallocx(p, 0);
@@ -195,13 +195,13 @@ TEST_END
 
 TEST_BEGIN(test_extra_small)
 {
-	size_t small0, small1, hugemax;
+	size_t small0, small1, largemax;
 	void *p;
 
 	/* Get size classes. */
 	small0 = get_small_size(0);
 	small1 = get_small_size(1);
-	hugemax = get_huge_size(get_nhuge()-1);
+	largemax = get_large_size(get_nlarge()-1);
 
 	p = mallocx(small0, 0);
 	assert_ptr_not_null(p, "Unexpected mallocx() error");
@@ -216,7 +216,7 @@ TEST_BEGIN(test_extra_small)
 	    "Unexpected xallocx() behavior");
 
 	/* Test size+extra overflow. */
-	assert_zu_eq(xallocx(p, small0, hugemax - small0 + 1, 0), small0,
+	assert_zu_eq(xallocx(p, small0, largemax - small0 + 1, 0), small0,
 	    "Unexpected xallocx() behavior");
 	assert_zu_eq(xallocx(p, small0, SIZE_T_MAX - small0, 0), small0,
 	    "Unexpected xallocx() behavior");
@@ -225,66 +225,66 @@ TEST_BEGIN(test_extra_small)
 }
 TEST_END
 
-TEST_BEGIN(test_extra_huge)
+TEST_BEGIN(test_extra_large)
 {
 	int flags = MALLOCX_ARENA(arena_ind());
-	size_t smallmax, huge1, huge2, huge3, hugemax;
+	size_t smallmax, large1, large2, large3, largemax;
 	void *p;
 
 	/* Get size classes. */
 	smallmax = get_small_size(get_nsmall()-1);
-	huge1 = get_huge_size(1);
-	huge2 = get_huge_size(2);
-	huge3 = get_huge_size(3);
-	hugemax = get_huge_size(get_nhuge()-1);
+	large1 = get_large_size(1);
+	large2 = get_large_size(2);
+	large3 = get_large_size(3);
+	largemax = get_large_size(get_nlarge()-1);
 
-	p = mallocx(huge3, flags);
+	p = mallocx(large3, flags);
 	assert_ptr_not_null(p, "Unexpected mallocx() error");
 
-	assert_zu_eq(xallocx(p, huge3, 0, flags), huge3,
+	assert_zu_eq(xallocx(p, large3, 0, flags), large3,
 	    "Unexpected xallocx() behavior");
 	/* Test size decrease with zero extra. */
-	assert_zu_ge(xallocx(p, huge1, 0, flags), huge1,
+	assert_zu_ge(xallocx(p, large1, 0, flags), large1,
 	    "Unexpected xallocx() behavior");
-	assert_zu_ge(xallocx(p, smallmax, 0, flags), huge1,
+	assert_zu_ge(xallocx(p, smallmax, 0, flags), large1,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_eq(xallocx(p, huge3, 0, flags), huge3,
+	assert_zu_eq(xallocx(p, large3, 0, flags), large3,
 	    "Unexpected xallocx() behavior");
 	/* Test size decrease with non-zero extra. */
-	assert_zu_eq(xallocx(p, huge1, huge3 - huge1, flags), huge3,
+	assert_zu_eq(xallocx(p, large1, large3 - large1, flags), large3,
 	    "Unexpected xallocx() behavior");
-	assert_zu_eq(xallocx(p, huge2, huge3 - huge2, flags), huge3,
+	assert_zu_eq(xallocx(p, large2, large3 - large2, flags), large3,
 	    "Unexpected xallocx() behavior");
-	assert_zu_eq(xallocx(p, huge1, huge2 - huge1, flags), huge2,
+	assert_zu_eq(xallocx(p, large1, large2 - large1, flags), large2,
 	    "Unexpected xallocx() behavior");
-	assert_zu_ge(xallocx(p, smallmax, huge1 - smallmax, flags), huge1,
+	assert_zu_ge(xallocx(p, smallmax, large1 - smallmax, flags), large1,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_ge(xallocx(p, huge1, 0, flags), huge1,
+	assert_zu_ge(xallocx(p, large1, 0, flags), large1,
 	    "Unexpected xallocx() behavior");
 	/* Test size increase with zero extra. */
-	assert_zu_le(xallocx(p, huge3, 0, flags), huge3,
+	assert_zu_le(xallocx(p, large3, 0, flags), large3,
 	    "Unexpected xallocx() behavior");
-	assert_zu_le(xallocx(p, hugemax+1, 0, flags), huge3,
+	assert_zu_le(xallocx(p, largemax+1, 0, flags), large3,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_ge(xallocx(p, huge1, 0, flags), huge1,
+	assert_zu_ge(xallocx(p, large1, 0, flags), large1,
 	    "Unexpected xallocx() behavior");
 	/* Test size increase with non-zero extra. */
-	assert_zu_le(xallocx(p, huge1, SIZE_T_MAX - huge1, flags), hugemax,
+	assert_zu_le(xallocx(p, large1, SIZE_T_MAX - large1, flags), largemax,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_ge(xallocx(p, huge1, 0, flags), huge1,
+	assert_zu_ge(xallocx(p, large1, 0, flags), large1,
 	    "Unexpected xallocx() behavior");
 	/* Test size increase with non-zero extra. */
-	assert_zu_le(xallocx(p, huge1, huge3 - huge1, flags), huge3,
+	assert_zu_le(xallocx(p, large1, large3 - large1, flags), large3,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_eq(xallocx(p, huge3, 0, flags), huge3,
+	assert_zu_eq(xallocx(p, large3, 0, flags), large3,
 	    "Unexpected xallocx() behavior");
 	/* Test size+extra overflow. */
-	assert_zu_le(xallocx(p, huge3, hugemax - huge3 + 1, flags), hugemax,
+	assert_zu_le(xallocx(p, large3, largemax - large3 + 1, flags), largemax,
 	    "Unexpected xallocx() behavior");
 
 	dallocx(p, flags);
@@ -374,15 +374,15 @@ test_zero(size_t szmin, size_t szmax)
 	dallocx(p, flags);
 }
 
-TEST_BEGIN(test_zero_huge)
+TEST_BEGIN(test_zero_large)
 {
-	size_t huge0, huge1;
+	size_t large0, large1;
 
 	/* Get size classes. */
-	huge0 = get_huge_size(0);
-	huge1 = get_huge_size(1);
+	large0 = get_large_size(0);
+	large1 = get_large_size(1);
 
-	test_zero(huge1, huge0 * 2);
+	test_zero(large1, large0 * 2);
 }
 TEST_END
 
@@ -397,6 +397,6 @@ main(void)
 	    test_size,
 	    test_size_extra_overflow,
 	    test_extra_small,
-	    test_extra_huge,
-	    test_zero_huge));
+	    test_extra_large,
+	    test_zero_large));
 }
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index 546d3cc8..a9476b89 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -25,10 +25,10 @@ get_nsmall(void)
 }
 
 static unsigned
-get_nhuge(void)
+get_nlarge(void)
 {
 
-	return (get_nsizes_impl("arenas.nhchunks"));
+	return (get_nsizes_impl("arenas.nlextents"));
 }
 
 static size_t
@@ -58,10 +58,10 @@ get_small_size(size_t ind)
 }
 
 static size_t
-get_huge_size(size_t ind)
+get_large_size(size_t ind)
 {
 
-	return (get_size_impl("arenas.hchunk.0.size", ind));
+	return (get_size_impl("arenas.lextent.0.size", ind));
 }
 
 /* Like ivsalloc(), but safe to call on discarded allocations. */
@@ -81,8 +81,8 @@ vsalloc(tsdn_t *tsdn, const void *ptr)
 
 TEST_BEGIN(test_arena_reset)
 {
-#define	NHUGE	32
-	unsigned arena_ind, nsmall, nhuge, nptrs, i;
+#define	NLARGE	32
+	unsigned arena_ind, nsmall, nlarge, nptrs, i;
 	size_t sz, miblen;
 	void **ptrs;
 	int flags;
@@ -96,8 +96,8 @@ TEST_BEGIN(test_arena_reset)
 	flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
 
 	nsmall = get_nsmall();
-	nhuge = get_nhuge() > NHUGE ? NHUGE : get_nhuge();
-	nptrs = nsmall + nhuge;
+	nlarge = get_nlarge() > NLARGE ? NLARGE : get_nlarge();
+	nptrs = nsmall + nlarge;
 	ptrs = (void **)malloc(nptrs * sizeof(void *));
 	assert_ptr_not_null(ptrs, "Unexpected malloc() failure");
 
@@ -108,8 +108,8 @@ TEST_BEGIN(test_arena_reset)
 		assert_ptr_not_null(ptrs[i],
 		    "Unexpected mallocx(%zu, %#x) failure", sz, flags);
 	}
-	for (i = 0; i < nhuge; i++) {
-		sz = get_huge_size(i);
+	for (i = 0; i < nlarge; i++) {
+		sz = get_large_size(i);
 		ptrs[nsmall + i] = mallocx(sz, flags);
 		assert_ptr_not_null(ptrs[i],
 		    "Unexpected mallocx(%zu, %#x) failure", sz, flags);
diff --git a/test/unit/decay.c b/test/unit/decay.c
index 786cc934..592935d3 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -22,7 +22,7 @@ TEST_BEGIN(test_decay_ticks)
 {
 	ticker_t *decay_ticker;
 	unsigned tick0, tick1;
-	size_t sz, huge0;
+	size_t sz, large0;
 	void *p;
 
 	test_skip_if(opt_purge != purge_mode_decay);
@@ -32,18 +32,18 @@ TEST_BEGIN(test_decay_ticks)
 	    "Unexpected failure getting decay ticker");
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("arenas.hchunk.0.size", &huge0, &sz, NULL, 0), 0,
+	assert_d_eq(mallctl("arenas.lextent.0.size", &large0, &sz, NULL, 0), 0,
 	    "Unexpected mallctl failure");
 
 	/*
-	 * Test the standard APIs using a huge size class, since we can't
+	 * Test the standard APIs using a large size class, since we can't
 	 * control tcache interactions for small size classes (except by
 	 * completely disabling tcache for the entire test program).
 	 */
 
 	/* malloc(). */
 	tick0 = ticker_read(decay_ticker);
-	p = malloc(huge0);
+	p = malloc(large0);
 	assert_ptr_not_null(p, "Unexpected malloc() failure");
 	tick1 = ticker_read(decay_ticker);
 	assert_u32_ne(tick1, tick0, "Expected ticker to tick during malloc()");
@@ -55,7 +55,7 @@ TEST_BEGIN(test_decay_ticks)
 
 	/* calloc(). */
 	tick0 = ticker_read(decay_ticker);
-	p = calloc(1, huge0);
+	p = calloc(1, large0);
 	assert_ptr_not_null(p, "Unexpected calloc() failure");
 	tick1 = ticker_read(decay_ticker);
 	assert_u32_ne(tick1, tick0, "Expected ticker to tick during calloc()");
@@ -63,7 +63,7 @@ TEST_BEGIN(test_decay_ticks)
 
 	/* posix_memalign(). */
 	tick0 = ticker_read(decay_ticker);
-	assert_d_eq(posix_memalign(&p, sizeof(size_t), huge0), 0,
+	assert_d_eq(posix_memalign(&p, sizeof(size_t), large0), 0,
 	    "Unexpected posix_memalign() failure");
 	tick1 = ticker_read(decay_ticker);
 	assert_u32_ne(tick1, tick0,
@@ -72,7 +72,7 @@ TEST_BEGIN(test_decay_ticks)
 
 	/* aligned_alloc(). */
 	tick0 = ticker_read(decay_ticker);
-	p = aligned_alloc(sizeof(size_t), huge0);
+	p = aligned_alloc(sizeof(size_t), large0);
 	assert_ptr_not_null(p, "Unexpected aligned_alloc() failure");
 	tick1 = ticker_read(decay_ticker);
 	assert_u32_ne(tick1, tick0,
@@ -82,13 +82,13 @@ TEST_BEGIN(test_decay_ticks)
 	/* realloc(). */
 	/* Allocate. */
 	tick0 = ticker_read(decay_ticker);
-	p = realloc(NULL, huge0);
+	p = realloc(NULL, large0);
 	assert_ptr_not_null(p, "Unexpected realloc() failure");
 	tick1 = ticker_read(decay_ticker);
 	assert_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()");
 	/* Reallocate. */
 	tick0 = ticker_read(decay_ticker);
-	p = realloc(p, huge0);
+	p = realloc(p, large0);
 	assert_ptr_not_null(p, "Unexpected realloc() failure");
 	tick1 = ticker_read(decay_ticker);
 	assert_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()");
@@ -99,13 +99,13 @@ TEST_BEGIN(test_decay_ticks)
 	assert_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()");
 
 	/*
-	 * Test the *allocx() APIs using huge and small size classes, with
+	 * Test the *allocx() APIs using large and small size classes, with
 	 * tcache explicitly disabled.
 	 */
 	{
 		unsigned i;
 		size_t allocx_sizes[2];
-		allocx_sizes[0] = huge0;
+		allocx_sizes[0] = large0;
 		allocx_sizes[1] = 1;
 
 		for (i = 0; i < sizeof(allocx_sizes) / sizeof(size_t); i++) {
@@ -154,13 +154,13 @@ TEST_BEGIN(test_decay_ticks)
 	}
 
 	/*
-	 * Test tcache fill/flush interactions for huge and small size classes,
+	 * Test tcache fill/flush interactions for large and small size classes,
 	 * using an explicit tcache.
 	 */
 	if (config_tcache) {
 		unsigned tcache_ind, i;
 		size_t tcache_sizes[2];
-		tcache_sizes[0] = huge0;
+		tcache_sizes[0] = large0;
 		tcache_sizes[1] = 1;
 
 		sz = sizeof(unsigned);
@@ -201,14 +201,14 @@ TEST_BEGIN(test_decay_ticker)
 	uint64_t epoch;
 	uint64_t npurge0 = 0;
 	uint64_t npurge1 = 0;
-	size_t sz, huge;
+	size_t sz, large;
 	unsigned i, nupdates0;
 	nstime_t time, decay_time, deadline;
 
 	test_skip_if(opt_purge != purge_mode_decay);
 
 	/*
-	 * Allocate a bunch of huge objects, pause the clock, deallocate the
+	 * Allocate a bunch of large objects, pause the clock, deallocate the
 	 * objects, restore the clock, then [md]allocx() in a tight loop to
 	 * verify the ticker triggers purging.
 	 */
@@ -219,10 +219,10 @@ TEST_BEGIN(test_decay_ticker)
 		sz = sizeof(size_t);
 		assert_d_eq(mallctl("arenas.tcache_max", &tcache_max, &sz, NULL,
 		    0), 0, "Unexpected mallctl failure");
-		huge = nallocx(tcache_max + 1, flags);
+		large = nallocx(tcache_max + 1, flags);
 	}  else {
 		sz = sizeof(size_t);
-		assert_d_eq(mallctl("arenas.hchunk.0.size", &huge, &sz, NULL,
+		assert_d_eq(mallctl("arenas.lextent.0.size", &large, &sz, NULL,
 		    0), 0, "Unexpected mallctl failure");
 	}
 
@@ -235,7 +235,7 @@ TEST_BEGIN(test_decay_ticker)
 	    config_stats ? 0 : ENOENT, "Unexpected mallctl result");
 
 	for (i = 0; i < NPS; i++) {
-		ps[i] = mallocx(huge, flags);
+		ps[i] = mallocx(large, flags);
 		assert_ptr_not_null(ps[i], "Unexpected mallocx() failure");
 	}
 
@@ -293,13 +293,13 @@ TEST_BEGIN(test_decay_nonmonotonic)
 	uint64_t epoch;
 	uint64_t npurge0 = 0;
 	uint64_t npurge1 = 0;
-	size_t sz, huge0;
+	size_t sz, large0;
 	unsigned i, nupdates0;
 
 	test_skip_if(opt_purge != purge_mode_decay);
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("arenas.hchunk.0.size", &huge0, &sz, NULL, 0), 0,
+	assert_d_eq(mallctl("arenas.lextent.0.size", &large0, &sz, NULL, 0), 0,
 	    "Unexpected mallctl failure");
 
 	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
@@ -319,7 +319,7 @@ TEST_BEGIN(test_decay_nonmonotonic)
 	nstime_update = nstime_update_mock;
 
 	for (i = 0; i < NPS; i++) {
-		ps[i] = mallocx(huge0, flags);
+		ps[i] = mallocx(large0, flags);
 		assert_ptr_not_null(ps[i], "Unexpected mallocx() failure");
 	}
 
diff --git a/test/unit/extent_quantize.c b/test/unit/extent_quantize.c
index a165aece..d8928da0 100644
--- a/test/unit/extent_quantize.c
+++ b/test/unit/extent_quantize.c
@@ -35,16 +35,16 @@ TEST_BEGIN(test_small_extent_size)
 }
 TEST_END
 
-TEST_BEGIN(test_huge_extent_size)
+TEST_BEGIN(test_large_extent_size)
 {
 	bool cache_oblivious;
-	unsigned nhchunks, i;
+	unsigned nlextents, i;
 	size_t sz, extent_size_prev, ceil_prev;
 	size_t mib[4];
 	size_t miblen = sizeof(mib) / sizeof(size_t);
 
 	/*
-	 * Iterate over all huge size classes, get their extent sizes, and
+	 * Iterate over all large size classes, get their extent sizes, and
 	 * verify that the quantized size is the same as the extent size.
 	 */
 
@@ -53,12 +53,12 @@ TEST_BEGIN(test_huge_extent_size)
 	    NULL, 0), 0, "Unexpected mallctl failure");
 
 	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.nhchunks", &nhchunks, &sz, NULL, 0), 0,
+	assert_d_eq(mallctl("arenas.nlextents", &nlextents, &sz, NULL, 0), 0,
 	    "Unexpected mallctl failure");
 
-	assert_d_eq(mallctlnametomib("arenas.hchunk.0.size", mib, &miblen), 0,
+	assert_d_eq(mallctlnametomib("arenas.lextent.0.size", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib failure");
-	for (i = 0; i < nhchunks; i++) {
+	for (i = 0; i < nlextents; i++) {
 		size_t lextent_size, extent_size, floor, ceil;
 
 		mib[2] = i;
@@ -91,7 +91,7 @@ TEST_BEGIN(test_huge_extent_size)
 				    ceil_prev, extent_size);
 			}
 		}
-		if (i + 1 < nhchunks) {
+		if (i + 1 < nlextents) {
 			extent_size_prev = floor;
 			ceil_prev = extent_size_quantize_ceil(extent_size +
 			    PAGE);
@@ -141,6 +141,6 @@ main(void)
 
 	return (test(
 	    test_small_extent_size,
-	    test_huge_extent_size,
+	    test_large_extent_size,
 	    test_monotonic));
 }
diff --git a/test/unit/junk.c b/test/unit/junk.c
index cdf8fb3c..7a923509 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -9,7 +9,7 @@ const char *malloc_conf =
 #endif
 
 static arena_dalloc_junk_small_t *arena_dalloc_junk_small_orig;
-static huge_dalloc_junk_t *huge_dalloc_junk_orig;
+static large_dalloc_junk_t *large_dalloc_junk_orig;
 static void *watch_for_junking;
 static bool saw_junking;
 
@@ -37,10 +37,10 @@ arena_dalloc_junk_small_intercept(void *ptr, const arena_bin_info_t *bin_info)
 }
 
 static void
-huge_dalloc_junk_intercept(void *ptr, size_t usize)
+large_dalloc_junk_intercept(void *ptr, size_t usize)
 {
 
-	huge_dalloc_junk_orig(ptr, usize);
+	large_dalloc_junk_orig(ptr, usize);
 	/*
 	 * The conditions under which junk filling actually occurs are nuanced
 	 * enough that it doesn't make sense to duplicate the decision logic in
@@ -59,8 +59,8 @@ test_junk(size_t sz_min, size_t sz_max)
 	if (opt_junk_free) {
 		arena_dalloc_junk_small_orig = arena_dalloc_junk_small;
 		arena_dalloc_junk_small = arena_dalloc_junk_small_intercept;
-		huge_dalloc_junk_orig = huge_dalloc_junk;
-		huge_dalloc_junk = huge_dalloc_junk_intercept;
+		large_dalloc_junk_orig = large_dalloc_junk;
+		large_dalloc_junk = large_dalloc_junk_intercept;
 	}
 
 	sz_prev = 0;
@@ -110,7 +110,7 @@ test_junk(size_t sz_min, size_t sz_max)
 
 	if (opt_junk_free) {
 		arena_dalloc_junk_small = arena_dalloc_junk_small_orig;
-		huge_dalloc_junk = huge_dalloc_junk_orig;
+		large_dalloc_junk = large_dalloc_junk_orig;
 	}
 }
 
@@ -122,7 +122,7 @@ TEST_BEGIN(test_junk_small)
 }
 TEST_END
 
-TEST_BEGIN(test_junk_huge)
+TEST_BEGIN(test_junk_large)
 {
 
 	test_skip_if(!config_fill);
@@ -136,5 +136,5 @@ main(void)
 
 	return (test(
 	    test_junk_small,
-	    test_junk_huge));
+	    test_junk_large));
 }
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 872aeaa0..8eb5a60c 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -596,7 +596,7 @@ TEST_BEGIN(test_arenas_constants)
 	TEST_ARENAS_CONSTANT(size_t, quantum, QUANTUM);
 	TEST_ARENAS_CONSTANT(size_t, page, PAGE);
 	TEST_ARENAS_CONSTANT(unsigned, nbins, NBINS);
-	TEST_ARENAS_CONSTANT(unsigned, nhchunks, NSIZES - NBINS);
+	TEST_ARENAS_CONSTANT(unsigned, nlextents, NSIZES - NBINS);
 
 #undef TEST_ARENAS_CONSTANT
 }
@@ -622,13 +622,13 @@ TEST_BEGIN(test_arenas_bin_constants)
 }
 TEST_END
 
-TEST_BEGIN(test_arenas_hchunk_constants)
+TEST_BEGIN(test_arenas_lextent_constants)
 {
 
 #define	TEST_ARENAS_HCHUNK_CONSTANT(t, name, expected) do {		\
 	t name;								\
 	size_t sz = sizeof(t);						\
-	assert_d_eq(mallctl("arenas.hchunk.0."#name, &name, &sz, NULL,	\
+	assert_d_eq(mallctl("arenas.lextent.0."#name, &name, &sz, NULL,	\
 	    0), 0, "Unexpected mallctl() failure");			\
 	assert_zu_eq(name, expected, "Incorrect "#name" size");		\
 } while (0)
@@ -704,7 +704,7 @@ main(void)
 	    test_arenas_decay_time,
 	    test_arenas_constants,
 	    test_arenas_bin_constants,
-	    test_arenas_hchunk_constants,
+	    test_arenas_lextent_constants,
 	    test_arenas_extend,
 	    test_stats_arenas));
 }
diff --git a/test/unit/size_classes.c b/test/unit/size_classes.c
index 4e1e0ce4..f5a5873d 100644
--- a/test/unit/size_classes.c
+++ b/test/unit/size_classes.c
@@ -3,18 +3,18 @@
 static size_t
 get_max_size_class(void)
 {
-	unsigned nhchunks;
+	unsigned nlextents;
 	size_t mib[4];
 	size_t sz, miblen, max_size_class;
 
 	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.nhchunks", &nhchunks, &sz, NULL, 0), 0,
+	assert_d_eq(mallctl("arenas.nlextents", &nlextents, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() error");
 
 	miblen = sizeof(mib) / sizeof(size_t);
-	assert_d_eq(mallctlnametomib("arenas.hchunk.0.size", mib, &miblen), 0,
+	assert_d_eq(mallctlnametomib("arenas.lextent.0.size", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() error");
-	mib[2] = nhchunks - 1;
+	mib[2] = nlextents - 1;
 
 	sz = sizeof(size_t);
 	assert_d_eq(mallctlbymib(mib, miblen, &max_size_class, &sz, NULL, 0), 0,
diff --git a/test/unit/stats.c b/test/unit/stats.c
index f524c005..9fa9cead 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -33,7 +33,7 @@ TEST_BEGIN(test_stats_summary)
 }
 TEST_END
 
-TEST_BEGIN(test_stats_huge)
+TEST_BEGIN(test_stats_large)
 {
 	void *p;
 	uint64_t epoch;
@@ -49,14 +49,14 @@ TEST_BEGIN(test_stats_huge)
 	    "Unexpected mallctl() failure");
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.arenas.0.huge.allocated", &allocated, &sz,
+	assert_d_eq(mallctl("stats.arenas.0.large.allocated", &allocated, &sz,
 	    NULL, 0), expected, "Unexpected mallctl() result");
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.huge.nmalloc", &nmalloc, &sz, NULL,
+	assert_d_eq(mallctl("stats.arenas.0.large.nmalloc", &nmalloc, &sz, NULL,
 	    0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.huge.ndalloc", &ndalloc, &sz, NULL,
+	assert_d_eq(mallctl("stats.arenas.0.large.ndalloc", &ndalloc, &sz, NULL,
 	    0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.huge.nrequests", &nrequests, &sz,
+	assert_d_eq(mallctl("stats.arenas.0.large.nrequests", &nrequests, &sz,
 	    NULL, 0), expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
@@ -75,7 +75,7 @@ TEST_END
 TEST_BEGIN(test_stats_arenas_summary)
 {
 	unsigned arena;
-	void *little, *huge;
+	void *little, *large;
 	uint64_t epoch;
 	size_t sz;
 	int expected = config_stats ? 0 : ENOENT;
@@ -88,11 +88,11 @@ TEST_BEGIN(test_stats_arenas_summary)
 
 	little = mallocx(SMALL_MAXCLASS, 0);
 	assert_ptr_not_null(little, "Unexpected mallocx() failure");
-	huge = mallocx(chunksize, 0);
-	assert_ptr_not_null(huge, "Unexpected mallocx() failure");
+	large = mallocx(chunksize, 0);
+	assert_ptr_not_null(large, "Unexpected mallocx() failure");
 
 	dallocx(little, 0);
-	dallocx(huge, 0);
+	dallocx(large, 0);
 
 	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
@@ -185,7 +185,7 @@ TEST_BEGIN(test_stats_arenas_small)
 }
 TEST_END
 
-TEST_BEGIN(test_stats_arenas_huge)
+TEST_BEGIN(test_stats_arenas_large)
 {
 	unsigned arena;
 	void *p;
@@ -204,12 +204,12 @@ TEST_BEGIN(test_stats_arenas_huge)
 	    "Unexpected mallctl() failure");
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.arenas.0.huge.allocated", &allocated, &sz,
+	assert_d_eq(mallctl("stats.arenas.0.large.allocated", &allocated, &sz,
 	    NULL, 0), expected, "Unexpected mallctl() result");
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.huge.nmalloc", &nmalloc, &sz,
+	assert_d_eq(mallctl("stats.arenas.0.large.nmalloc", &nmalloc, &sz,
 	    NULL, 0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.huge.ndalloc", &ndalloc, &sz,
+	assert_d_eq(mallctl("stats.arenas.0.large.ndalloc", &ndalloc, &sz,
 	    NULL, 0), expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
@@ -299,12 +299,12 @@ TEST_BEGIN(test_stats_arenas_bins)
 }
 TEST_END
 
-TEST_BEGIN(test_stats_arenas_hchunks)
+TEST_BEGIN(test_stats_arenas_lextents)
 {
 	unsigned arena;
 	void *p;
 	uint64_t epoch, nmalloc, ndalloc;
-	size_t curhchunks, sz, hsize;
+	size_t curlextents, sz, hsize;
 	int expected = config_stats ? 0 : ENOENT;
 
 	arena = 0;
@@ -312,7 +312,7 @@ TEST_BEGIN(test_stats_arenas_hchunks)
 	    0, "Unexpected mallctl() failure");
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("arenas.hchunk.0.size", &hsize, &sz, NULL, 0), 0,
+	assert_d_eq(mallctl("arenas.lextent.0.size", &hsize, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
 	p = mallocx(hsize, 0);
@@ -322,20 +322,20 @@ TEST_BEGIN(test_stats_arenas_hchunks)
 	    "Unexpected mallctl() failure");
 
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.hchunks.0.nmalloc", &nmalloc, &sz,
+	assert_d_eq(mallctl("stats.arenas.0.lextents.0.nmalloc", &nmalloc, &sz,
 	    NULL, 0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.hchunks.0.ndalloc", &ndalloc, &sz,
+	assert_d_eq(mallctl("stats.arenas.0.lextents.0.ndalloc", &ndalloc, &sz,
 	    NULL, 0), expected, "Unexpected mallctl() result");
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.arenas.0.hchunks.0.curhchunks", &curhchunks,
-	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.lextents.0.curlextents",
+	    &curlextents, &sz, NULL, 0), expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
 		assert_u64_gt(nmalloc, 0,
 		    "nmalloc should be greater than zero");
 		assert_u64_ge(nmalloc, ndalloc,
 		    "nmalloc should be at least as large as ndalloc");
-		assert_u64_gt(curhchunks, 0,
+		assert_u64_gt(curlextents, 0,
 		    "At least one chunk should be currently allocated");
 	}
 
@@ -349,10 +349,10 @@ main(void)
 
 	return (test(
 	    test_stats_summary,
-	    test_stats_huge,
+	    test_stats_large,
 	    test_stats_arenas_summary,
 	    test_stats_arenas_small,
-	    test_stats_arenas_huge,
+	    test_stats_arenas_large,
 	    test_stats_arenas_bins,
-	    test_stats_arenas_hchunks));
+	    test_stats_arenas_lextents));
 }
diff --git a/test/unit/zero.c b/test/unit/zero.c
index 2da288ac..3c35f4bd 100644
--- a/test/unit/zero.c
+++ b/test/unit/zero.c
@@ -53,7 +53,7 @@ TEST_BEGIN(test_zero_small)
 }
 TEST_END
 
-TEST_BEGIN(test_zero_huge)
+TEST_BEGIN(test_zero_large)
 {
 
 	test_skip_if(!config_fill);
@@ -67,5 +67,5 @@ main(void)
 
 	return (test(
 	    test_zero_small,
-	    test_zero_huge));
+	    test_zero_large));
 }

From 9c305c9e5c485c09100a17106c6562f8352a760d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 31 May 2016 15:03:51 -0700
Subject: [PATCH 0292/2608] s/chunk_hook/extent_hook/g

---
 doc/jemalloc.xml.in                           |  18 +-
 include/jemalloc/internal/arena.h             |   8 +-
 include/jemalloc/internal/chunk.h             |  31 ++--
 include/jemalloc/internal/private_symbols.txt |   6 +-
 include/jemalloc/jemalloc_typedefs.h.in       |   2 +-
 src/arena.c                                   |  66 +++----
 src/chunk.c                                   | 175 +++++++++---------
 src/chunk_dss.c                               |   4 +-
 src/ctl.c                                     |  22 +--
 src/jemalloc.c                                |  18 +-
 src/large.c                                   |  19 +-
 test/integration/chunk.c                      |  22 +--
 12 files changed, 200 insertions(+), 191 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 7613c24c..801fd497 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -555,7 +555,7 @@ for (i = 0; i < nbins; i++) {
     allocations in place, as long as the pre-size and post-size are both large.
     For shrinkage to succeed, the extent allocator must support splitting (see
     <link
-    linkend="arena.i.chunk_hooks"><mallctl>arena.&lt;i&gt;.chunk_hooks</mallctl></link>).
+    linkend="arena.i.extent_hooks"><mallctl>arena.&lt;i&gt;.extent_hooks</mallctl></link>).
     Growth only succeeds if the trailing memory is currently available, and the
     extent allocator supports merging.</para>
 
@@ -1548,10 +1548,10 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         additional information.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="arena.i.chunk_hooks">
+      <varlistentry id="arena.i.extent_hooks">
         <term>
-          <mallctl>arena.&lt;i&gt;.chunk_hooks</mallctl>
-          (<type>chunk_hooks_t</type>)
+          <mallctl>arena.&lt;i&gt;.extent_hooks</mallctl>
+          (<type>extent_hooks_t</type>)
           <literal>rw</literal>
         </term>
         <listitem><para>Get or set the chunk management hook functions for arena
@@ -1561,7 +1561,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         control allocation for arenas created via <link
         linkend="arenas.extend"><mallctl>arenas.extend</mallctl></link> such
         that all chunks originate from an application-supplied chunk allocator
-        (by setting custom chunk hook functions just after arena creation), but
+        (by setting custom extent hook functions just after arena creation), but
         the automatically created arenas may have already created chunks prior
         to the application having an opportunity to take over chunk
         allocation.</para>
@@ -1575,8 +1575,8 @@ typedef struct {
 	chunk_purge_t		*purge;
 	chunk_split_t		*split;
 	chunk_merge_t		*merge;
-} chunk_hooks_t;]]></programlisting>
-        <para>The <type>chunk_hooks_t</type> structure comprises function
+} extent_hooks_t;]]></programlisting>
+        <para>The <type>extent_hooks_t</type> structure comprises function
         pointers which are described individually below.  jemalloc uses these
         functions to manage chunk lifetime, which starts off with allocation of
         mapped committed memory, in the simplest case followed by deallocation.
@@ -2109,8 +2109,8 @@ typedef struct {
         <manvolnum>2</manvolnum></citerefentry>.  Retained virtual memory is
         typically untouched, decommitted, or purged, so it has no strongly
         associated physical memory (see <link
-        linkend="arena.i.chunk_hooks">chunk hooks</link> for details).  Retained
-        memory is excluded from mapped memory statistics, e.g. <link
+        linkend="arena.i.extent_hooks">extent hooks</link> for details).
+        Retained memory is excluded from mapped memory statistics, e.g. <link
         linkend="stats.mapped"><mallctl>stats.mapped</mallctl></link>.
         </para></listitem>
       </varlistentry>
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 56f78571..f60b9d60 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -246,8 +246,8 @@ struct arena_s {
 	ql_head(extent_t)	extent_cache;
 	malloc_mutex_t		extent_cache_mtx;
 
-	/* User-configurable chunk hook functions. */
-	chunk_hooks_t		chunk_hooks;
+	/* User-configurable extent hook functions. */
+	extent_hooks_t		extent_hooks;
 
 	/* bins is used to store heaps of free regions. */
 	arena_bin_t		bins[NBINS];
@@ -279,10 +279,10 @@ extern ssize_t		opt_decay_time;
 extern const arena_bin_info_t	arena_bin_info[NBINS];
 
 extent_t	*arena_chunk_cache_alloc(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
+    extent_hooks_t *extent_hooks, void *new_addr, size_t size, size_t alignment,
     bool *zero);
 void	arena_chunk_cache_dalloc(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, extent_t *extent);
+    extent_hooks_t *extent_hooks, extent_t *extent);
 void	arena_chunk_cache_maybe_insert(arena_t *arena, extent_t *extent,
     bool cache);
 void	arena_chunk_cache_maybe_remove(arena_t *arena, extent_t *extent,
diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index 6f50302e..10f2ae72 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -38,33 +38,36 @@ extern size_t		chunksize;
 extern size_t		chunksize_mask; /* (chunksize - 1). */
 extern size_t		chunk_npages;
 
-extern const chunk_hooks_t	chunk_hooks_default;
+extern const extent_hooks_t	extent_hooks_default;
 
-chunk_hooks_t	chunk_hooks_get(tsdn_t *tsdn, arena_t *arena);
-chunk_hooks_t	chunk_hooks_set(tsdn_t *tsdn, arena_t *arena,
-    const chunk_hooks_t *chunk_hooks);
+extent_hooks_t	extent_hooks_get(tsdn_t *tsdn, arena_t *arena);
+extent_hooks_t	extent_hooks_set(tsdn_t *tsdn, arena_t *arena,
+    const extent_hooks_t *extent_hooks);
 
 extent_t	*chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, void *new_addr, size_t usize, size_t pad,
+    extent_hooks_t *extent_hooks, void *new_addr, size_t usize, size_t pad,
     size_t alignment, bool *zero, bool slab);
 extent_t	*chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, void *new_addr, size_t usize, size_t pad,
+    extent_hooks_t *extent_hooks, void *new_addr, size_t usize, size_t pad,
     size_t alignment, bool *zero, bool *commit, bool slab);
 void	chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, extent_t *extent);
+    extent_hooks_t *extent_hooks, extent_t *extent);
 void	chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, extent_t *extent);
+    extent_hooks_t *extent_hooks, extent_t *extent);
 bool	chunk_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, extent_t *extent, size_t offset, size_t length);
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
+    size_t length);
 bool	chunk_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, extent_t *extent, size_t offset, size_t length);
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
+    size_t length);
 bool	chunk_purge_wrapper(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, extent_t *extent, size_t offset, size_t length);
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
+    size_t length);
 extent_t	*chunk_split_wrapper(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, extent_t *extent, size_t size_a, size_t usize_a,
-    size_t size_b, size_t usize_b);
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t size_a,
+    size_t usize_a, size_t size_b, size_t usize_b);
 bool	chunk_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, extent_t *a, extent_t *b);
+    extent_hooks_t *extent_hooks, extent_t *a, extent_t *b);
 bool	chunk_boot(void);
 void	chunk_prefork(tsdn_t *tsdn);
 void	chunk_postfork_parent(tsdn_t *tsdn);
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index cab0fc54..b5fd4c0c 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -131,9 +131,6 @@ chunk_dss_postfork_parent
 chunk_dss_prec_get
 chunk_dss_prec_set
 chunk_dss_prefork
-chunk_hooks_default
-chunk_hooks_get
-chunk_hooks_set
 chunk_in_dss
 chunk_lookup
 chunk_merge_wrapper
@@ -185,6 +182,9 @@ extent_heap_insert
 extent_heap_new
 extent_heap_remove
 extent_heap_remove_first
+extent_hooks_default
+extent_hooks_get
+extent_hooks_set
 extent_init
 extent_last_get
 extent_past_get
diff --git a/include/jemalloc/jemalloc_typedefs.h.in b/include/jemalloc/jemalloc_typedefs.h.in
index fa7b350a..2b07e362 100644
--- a/include/jemalloc/jemalloc_typedefs.h.in
+++ b/include/jemalloc/jemalloc_typedefs.h.in
@@ -54,4 +54,4 @@ typedef struct {
 	chunk_purge_t		*purge;
 	chunk_split_t		*split;
 	chunk_merge_t		*merge;
-} chunk_hooks_t;
+} extent_hooks_t;
diff --git a/src/arena.c b/src/arena.c
index 8194ced7..9a8c2e26 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -54,25 +54,25 @@ arena_chunk_dirty_npages(const extent_t *extent)
 
 static extent_t *
 arena_chunk_cache_alloc_locked(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, void *new_addr, size_t usize, size_t pad,
+    extent_hooks_t *extent_hooks, void *new_addr, size_t usize, size_t pad,
     size_t alignment, bool *zero, bool slab)
 {
 
 	malloc_mutex_assert_owner(tsdn, &arena->lock);
 
-	return (chunk_alloc_cache(tsdn, arena, chunk_hooks, new_addr, usize,
+	return (chunk_alloc_cache(tsdn, arena, extent_hooks, new_addr, usize,
 	    pad, alignment, zero, slab));
 }
 
 extent_t *
 arena_chunk_cache_alloc(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
+    extent_hooks_t *extent_hooks, void *new_addr, size_t size, size_t alignment,
     bool *zero)
 {
 	extent_t *extent;
 
 	malloc_mutex_lock(tsdn, &arena->lock);
-	extent = arena_chunk_cache_alloc_locked(tsdn, arena, chunk_hooks,
+	extent = arena_chunk_cache_alloc_locked(tsdn, arena, extent_hooks,
 	    new_addr, size, 0, alignment, zero, false);
 	malloc_mutex_unlock(tsdn, &arena->lock);
 
@@ -81,22 +81,22 @@ arena_chunk_cache_alloc(tsdn_t *tsdn, arena_t *arena,
 
 static void
 arena_chunk_cache_dalloc_locked(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, extent_t *extent)
+    extent_hooks_t *extent_hooks, extent_t *extent)
 {
 
 	malloc_mutex_assert_owner(tsdn, &arena->lock);
 
-	chunk_dalloc_cache(tsdn, arena, chunk_hooks, extent);
+	chunk_dalloc_cache(tsdn, arena, extent_hooks, extent);
 	arena_maybe_purge(tsdn, arena);
 }
 
 void
 arena_chunk_cache_dalloc(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, extent_t *extent)
+    extent_hooks_t *extent_hooks, extent_t *extent)
 {
 
 	malloc_mutex_lock(tsdn, &arena->lock);
-	arena_chunk_cache_dalloc_locked(tsdn, arena, chunk_hooks, extent);
+	arena_chunk_cache_dalloc_locked(tsdn, arena, extent_hooks, extent);
 	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
@@ -321,12 +321,12 @@ arena_large_ralloc_stats_update(arena_t *arena, size_t oldusize, size_t usize)
 
 static extent_t *
 arena_chunk_alloc_large_hard(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, size_t usize, size_t alignment, bool *zero)
+    extent_hooks_t *extent_hooks, size_t usize, size_t alignment, bool *zero)
 {
 	extent_t *extent;
 	bool commit = true;
 
-	extent = chunk_alloc_wrapper(tsdn, arena, chunk_hooks, NULL, usize,
+	extent = chunk_alloc_wrapper(tsdn, arena, extent_hooks, NULL, usize,
 	    large_pad, alignment, zero, &commit, false);
 	if (extent == NULL) {
 		/* Revert optimistic stats updates. */
@@ -347,7 +347,7 @@ arena_chunk_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool *zero)
 {
 	extent_t *extent;
-	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
+	extent_hooks_t extent_hooks = CHUNK_HOOKS_INITIALIZER;
 
 	malloc_mutex_lock(tsdn, &arena->lock);
 
@@ -358,12 +358,12 @@ arena_chunk_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	}
 	arena_nactive_add(arena, (usize + large_pad) >> LG_PAGE);
 
-	extent = arena_chunk_cache_alloc_locked(tsdn, arena, &chunk_hooks, NULL,
-	    usize, large_pad, alignment, zero, false);
+	extent = arena_chunk_cache_alloc_locked(tsdn, arena, &extent_hooks,
+	    NULL, usize, large_pad, alignment, zero, false);
 	malloc_mutex_unlock(tsdn, &arena->lock);
 	if (extent == NULL) {
-		extent = arena_chunk_alloc_large_hard(tsdn, arena, &chunk_hooks,
-		    usize, alignment, zero);
+		extent = arena_chunk_alloc_large_hard(tsdn, arena,
+		    &extent_hooks, usize, alignment, zero);
 	}
 
 	return (extent);
@@ -373,7 +373,7 @@ void
 arena_chunk_dalloc_large(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
     bool locked)
 {
-	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
+	extent_hooks_t extent_hooks = CHUNK_HOOKS_INITIALIZER;
 
 	if (!locked)
 		malloc_mutex_lock(tsdn, &arena->lock);
@@ -384,7 +384,7 @@ arena_chunk_dalloc_large(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 	}
 	arena_nactive_sub(arena, extent_size_get(extent) >> LG_PAGE);
 
-	arena_chunk_cache_dalloc_locked(tsdn, arena, &chunk_hooks, extent);
+	arena_chunk_cache_dalloc_locked(tsdn, arena, &extent_hooks, extent);
 	if (!locked)
 		malloc_mutex_unlock(tsdn, &arena->lock);
 }
@@ -735,7 +735,7 @@ arena_dirty_count(tsdn_t *tsdn, arena_t *arena)
 }
 
 static size_t
-arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
+arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
     size_t ndirty_limit, extent_t *purge_extents_sentinel)
 {
 	extent_t *extent, *next;
@@ -757,7 +757,7 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		/* Allocate. */
 		zero = false;
 		textent = arena_chunk_cache_alloc_locked(tsdn, arena,
-		    chunk_hooks, extent_base_get(extent),
+		    extent_hooks, extent_base_get(extent),
 		    extent_size_get(extent), 0, CACHELINE, &zero, false);
 		assert(textent == extent);
 		assert(zero == extent_zeroed_get(extent));
@@ -774,7 +774,7 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 }
 
 static size_t
-arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
+arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
     extent_t *purge_extents_sentinel)
 {
 	UNUSED size_t nmadvise;
@@ -793,7 +793,7 @@ arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 
 		next = qr_next(extent, qr_link);
 		extent_ring_remove(extent);
-		chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, extent);
+		chunk_dalloc_wrapper(tsdn, arena, extent_hooks, extent);
 	}
 
 	if (config_stats) {
@@ -816,7 +816,7 @@ arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 static void
 arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, size_t ndirty_limit)
 {
-	chunk_hooks_t chunk_hooks = chunk_hooks_get(tsdn, arena);
+	extent_hooks_t extent_hooks = extent_hooks_get(tsdn, arena);
 	size_t npurge, npurged;
 	extent_t purge_extents_sentinel;
 
@@ -836,11 +836,11 @@ arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, size_t ndirty_limit)
 	extent_init(&purge_extents_sentinel, arena, NULL, 0, 0, false, false,
 	    false, false);
 
-	npurge = arena_stash_dirty(tsdn, arena, &chunk_hooks, ndirty_limit,
+	npurge = arena_stash_dirty(tsdn, arena, &extent_hooks, ndirty_limit,
 	    &purge_extents_sentinel);
 	if (npurge == 0)
 		goto label_return;
-	npurged = arena_purge_stashed(tsdn, arena, &chunk_hooks,
+	npurged = arena_purge_stashed(tsdn, arena, &extent_hooks,
 	    &purge_extents_sentinel);
 	assert(npurged == npurge);
 
@@ -866,10 +866,10 @@ arena_purge(tsdn_t *tsdn, arena_t *arena, bool all)
 static void
 arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *slab)
 {
-	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
+	extent_hooks_t extent_hooks = CHUNK_HOOKS_INITIALIZER;
 
 	arena_nactive_sub(arena, extent_size_get(slab) >> LG_PAGE);
-	arena_chunk_cache_dalloc_locked(tsdn, arena, &chunk_hooks, slab);
+	arena_chunk_cache_dalloc_locked(tsdn, arena, &extent_hooks, slab);
 }
 
 void
@@ -987,8 +987,8 @@ arena_bin_slabs_full_remove(extent_t *slab)
 }
 
 static extent_t *
-arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
-    const arena_bin_info_t *bin_info)
+arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t *extent_hooks, const arena_bin_info_t *bin_info)
 {
 	extent_t *slab;
 	bool zero, commit;
@@ -996,7 +996,7 @@ arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	zero = false;
 	commit = true;
 	malloc_mutex_unlock(tsdn, &arena->lock);
-	slab = chunk_alloc_wrapper(tsdn, arena, chunk_hooks, NULL,
+	slab = chunk_alloc_wrapper(tsdn, arena, extent_hooks, NULL,
 	    bin_info->slab_size, 0, PAGE, &zero, &commit, true);
 	malloc_mutex_lock(tsdn, &arena->lock);
 
@@ -1009,14 +1009,14 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 {
 	extent_t *slab;
 	arena_slab_data_t *slab_data;
-	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
+	extent_hooks_t extent_hooks = CHUNK_HOOKS_INITIALIZER;
 	bool zero;
 
 	zero = false;
-	slab = arena_chunk_cache_alloc_locked(tsdn, arena, &chunk_hooks, NULL,
+	slab = arena_chunk_cache_alloc_locked(tsdn, arena, &extent_hooks, NULL,
 	    bin_info->slab_size, 0, PAGE, &zero, true);
 	if (slab == NULL) {
-		slab = arena_slab_alloc_hard(tsdn, arena, &chunk_hooks,
+		slab = arena_slab_alloc_hard(tsdn, arena, &extent_hooks,
 		    bin_info);
 		if (slab == NULL)
 			return (NULL);
@@ -1805,7 +1805,7 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 	    WITNESS_RANK_ARENA_EXTENT_CACHE))
 		return (NULL);
 
-	arena->chunk_hooks = chunk_hooks_default;
+	arena->extent_hooks = extent_hooks_default;
 
 	/* Initialize bins. */
 	for (i = 0; i < NBINS; i++) {
diff --git a/src/chunk.c b/src/chunk.c
index e2e9de03..6ca40572 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -33,7 +33,7 @@ static bool	chunk_split_default(void *chunk, size_t size, size_t size_a,
 static bool	chunk_merge_default(void *chunk_a, size_t size_a, void *chunk_b,
     size_t size_b, bool committed, unsigned arena_ind);
 
-const chunk_hooks_t	chunk_hooks_default = {
+const extent_hooks_t	extent_hooks_default = {
 	chunk_alloc_default,
 	chunk_dalloc_default,
 	chunk_commit_default,
@@ -50,8 +50,8 @@ const chunk_hooks_t	chunk_hooks_default = {
  */
 
 static void	chunk_record(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, extent_heap_t extent_heaps[NPSIZES], bool cache,
-    extent_t *extent);
+    extent_hooks_t *extent_hooks, extent_heap_t extent_heaps[NPSIZES],
+    bool cache, extent_t *extent);
 
 /******************************************************************************/
 
@@ -71,37 +71,38 @@ extent_heaps_remove(extent_heap_t extent_heaps[NPSIZES], extent_t *extent)
 	extent_heap_remove(&extent_heaps[pind], extent);
 }
 
-static chunk_hooks_t
-chunk_hooks_get_locked(arena_t *arena)
+static extent_hooks_t
+extent_hooks_get_locked(arena_t *arena)
 {
 
-	return (arena->chunk_hooks);
+	return (arena->extent_hooks);
 }
 
-chunk_hooks_t
-chunk_hooks_get(tsdn_t *tsdn, arena_t *arena)
+extent_hooks_t
+extent_hooks_get(tsdn_t *tsdn, arena_t *arena)
 {
-	chunk_hooks_t chunk_hooks;
+	extent_hooks_t extent_hooks;
 
 	malloc_mutex_lock(tsdn, &arena->chunks_mtx);
-	chunk_hooks = chunk_hooks_get_locked(arena);
+	extent_hooks = extent_hooks_get_locked(arena);
 	malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 
-	return (chunk_hooks);
+	return (extent_hooks);
 }
 
-chunk_hooks_t
-chunk_hooks_set(tsdn_t *tsdn, arena_t *arena, const chunk_hooks_t *chunk_hooks)
+extent_hooks_t
+extent_hooks_set(tsdn_t *tsdn, arena_t *arena,
+    const extent_hooks_t *extent_hooks)
 {
-	chunk_hooks_t old_chunk_hooks;
+	extent_hooks_t old_extent_hooks;
 
 	malloc_mutex_lock(tsdn, &arena->chunks_mtx);
-	old_chunk_hooks = arena->chunk_hooks;
+	old_extent_hooks = arena->extent_hooks;
 	/*
 	 * Copy each field atomically so that it is impossible for readers to
 	 * see partially updated pointers.  There are places where readers only
 	 * need one hook function pointer (therefore no need to copy the
-	 * entirety of arena->chunk_hooks), and stale reads do not affect
+	 * entirety of arena->extent_hooks), and stale reads do not affect
 	 * correctness, so they perform unlocked reads.
 	 */
 #define	ATOMIC_COPY_HOOK(n) do {					\
@@ -109,8 +110,8 @@ chunk_hooks_set(tsdn_t *tsdn, arena_t *arena, const chunk_hooks_t *chunk_hooks)
 		chunk_##n##_t	**n;					\
 		void		**v;					\
 	} u;								\
-	u.n = &arena->chunk_hooks.n;					\
-	atomic_write_p(u.v, chunk_hooks->n);				\
+	u.n = &arena->extent_hooks.n;					\
+	atomic_write_p(u.v, extent_hooks->n);				\
 } while (0)
 	ATOMIC_COPY_HOOK(alloc);
 	ATOMIC_COPY_HOOK(dalloc);
@@ -122,37 +123,37 @@ chunk_hooks_set(tsdn_t *tsdn, arena_t *arena, const chunk_hooks_t *chunk_hooks)
 #undef ATOMIC_COPY_HOOK
 	malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 
-	return (old_chunk_hooks);
+	return (old_extent_hooks);
 }
 
 static void
-chunk_hooks_assure_initialized_impl(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, bool locked)
+extent_hooks_assure_initialized_impl(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t *extent_hooks, bool locked)
 {
-	static const chunk_hooks_t uninitialized_hooks =
+	static const extent_hooks_t uninitialized_hooks =
 	    CHUNK_HOOKS_INITIALIZER;
 
-	if (memcmp(chunk_hooks, &uninitialized_hooks, sizeof(chunk_hooks_t)) ==
-	    0) {
-		*chunk_hooks = locked ? chunk_hooks_get_locked(arena) :
-		    chunk_hooks_get(tsdn, arena);
+	if (memcmp(extent_hooks, &uninitialized_hooks, sizeof(extent_hooks_t))
+	    == 0) {
+		*extent_hooks = locked ? extent_hooks_get_locked(arena) :
+		    extent_hooks_get(tsdn, arena);
 	}
 }
 
 static void
-chunk_hooks_assure_initialized_locked(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks)
+extent_hooks_assure_initialized_locked(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t *extent_hooks)
 {
 
-	chunk_hooks_assure_initialized_impl(tsdn, arena, chunk_hooks, true);
+	extent_hooks_assure_initialized_impl(tsdn, arena, extent_hooks, true);
 }
 
 static void
-chunk_hooks_assure_initialized(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks)
+extent_hooks_assure_initialized(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t *extent_hooks)
 {
 
-	chunk_hooks_assure_initialized_impl(tsdn, arena, chunk_hooks, false);
+	extent_hooks_assure_initialized_impl(tsdn, arena, extent_hooks, false);
 }
 
 static bool
@@ -297,8 +298,8 @@ chunk_first_best_fit(arena_t *arena, extent_heap_t extent_heaps[NPSIZES],
 }
 
 static void
-chunk_leak(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, bool cache,
-    extent_t *extent)
+chunk_leak(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
+    bool cache, extent_t *extent)
 {
 
 	/*
@@ -306,14 +307,14 @@ chunk_leak(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks, bool cache,
 	 * that this is only a virtual memory leak.
 	 */
 	if (cache) {
-		chunk_purge_wrapper(tsdn, arena, chunk_hooks, extent, 0,
+		chunk_purge_wrapper(tsdn, arena, extent_hooks, extent, 0,
 		    extent_size_get(extent));
 	}
 	extent_dalloc(tsdn, arena, extent);
 }
 
 static extent_t *
-chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
+chunk_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
     extent_heap_t extent_heaps[NPSIZES], bool cache, void *new_addr,
     size_t usize, size_t pad, size_t alignment, bool *zero, bool *commit,
     bool slab)
@@ -330,7 +331,7 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	if (alloc_size < usize)
 		return (NULL);
 	malloc_mutex_lock(tsdn, &arena->chunks_mtx);
-	chunk_hooks_assure_initialized_locked(tsdn, arena, chunk_hooks);
+	extent_hooks_assure_initialized_locked(tsdn, arena, extent_hooks);
 	if (new_addr != NULL) {
 		rtree_elm_t *elm;
 
@@ -368,10 +369,10 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	/* Split the lead. */
 	if (leadsize != 0) {
 		extent_t *lead = extent;
-		extent = chunk_split_wrapper(tsdn, arena, chunk_hooks, lead,
+		extent = chunk_split_wrapper(tsdn, arena, extent_hooks, lead,
 		    leadsize, leadsize, size + trailsize, usize + trailsize);
 		if (extent == NULL) {
-			chunk_leak(tsdn, arena, chunk_hooks, cache, lead);
+			chunk_leak(tsdn, arena, extent_hooks, cache, lead);
 			malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 			return (NULL);
 		}
@@ -381,10 +382,10 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 
 	/* Split the trail. */
 	if (trailsize != 0) {
-		extent_t *trail = chunk_split_wrapper(tsdn, arena, chunk_hooks,
+		extent_t *trail = chunk_split_wrapper(tsdn, arena, extent_hooks,
 		    extent, size, usize, trailsize, trailsize);
 		if (trail == NULL) {
-			chunk_leak(tsdn, arena, chunk_hooks, cache, extent);
+			chunk_leak(tsdn, arena, extent_hooks, cache, extent);
 			malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 			return (NULL);
 		}
@@ -399,10 +400,10 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	}
 
 	if (!extent_committed_get(extent) &&
-	    chunk_hooks->commit(extent_base_get(extent),
+	    extent_hooks->commit(extent_base_get(extent),
 	    extent_size_get(extent), 0, extent_size_get(extent), arena->ind)) {
 		malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
-		chunk_record(tsdn, arena, chunk_hooks, extent_heaps, cache,
+		chunk_record(tsdn, arena, extent_hooks, extent_heaps, cache,
 		    extent);
 		return (NULL);
 	}
@@ -468,7 +469,7 @@ chunk_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 }
 
 extent_t *
-chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
+chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
     void *new_addr, size_t usize, size_t pad, size_t alignment, bool *zero,
     bool slab)
 {
@@ -479,7 +480,7 @@ chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	assert(alignment != 0);
 
 	commit = true;
-	extent = chunk_recycle(tsdn, arena, chunk_hooks, arena->chunks_cached,
+	extent = chunk_recycle(tsdn, arena, extent_hooks, arena->chunks_cached,
 	    true, new_addr, usize, pad, alignment, zero, &commit, slab);
 	if (extent == NULL)
 		return (NULL);
@@ -520,7 +521,7 @@ chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero,
 }
 
 static extent_t *
-chunk_alloc_retained(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
+chunk_alloc_retained(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
     void *new_addr, size_t usize, size_t pad, size_t alignment, bool *zero,
     bool *commit, bool slab)
 {
@@ -529,8 +530,9 @@ chunk_alloc_retained(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	assert(usize != 0);
 	assert(alignment != 0);
 
-	extent = chunk_recycle(tsdn, arena, chunk_hooks, arena->chunks_retained,
-	    false, new_addr, usize, pad, alignment, zero, commit, slab);
+	extent = chunk_recycle(tsdn, arena, extent_hooks,
+	    arena->chunks_retained, false, new_addr, usize, pad, alignment,
+	    zero, commit, slab);
 	if (extent != NULL && config_stats) {
 		size_t size = usize + pad;
 		arena->stats.retained -= size;
@@ -541,7 +543,7 @@ chunk_alloc_retained(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 
 static extent_t *
 chunk_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, void *new_addr, size_t usize, size_t pad,
+    extent_hooks_t *extent_hooks, void *new_addr, size_t usize, size_t pad,
     size_t alignment, bool *zero, bool *commit, bool slab)
 {
 	extent_t *extent;
@@ -552,7 +554,7 @@ chunk_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 	extent = extent_alloc(tsdn, arena);
 	if (extent == NULL)
 		return (NULL);
-	addr = chunk_hooks->alloc(new_addr, size, alignment, zero, commit,
+	addr = extent_hooks->alloc(new_addr, size, alignment, zero, commit,
 	    arena->ind);
 	if (addr == NULL) {
 		extent_dalloc(tsdn, arena, extent);
@@ -562,7 +564,7 @@ chunk_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 	if (pad != 0)
 		extent_addr_randomize(tsdn, extent, alignment);
 	if (chunk_register(tsdn, extent)) {
-		chunk_leak(tsdn, arena, chunk_hooks, false, extent);
+		chunk_leak(tsdn, arena, extent_hooks, false, extent);
 		return (NULL);
 	}
 
@@ -570,18 +572,18 @@ chunk_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 }
 
 extent_t *
-chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
+chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
     void *new_addr, size_t usize, size_t pad, size_t alignment, bool *zero,
     bool *commit, bool slab)
 {
 	extent_t *extent;
 
-	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
+	extent_hooks_assure_initialized(tsdn, arena, extent_hooks);
 
-	extent = chunk_alloc_retained(tsdn, arena, chunk_hooks, new_addr, usize,
-	    pad, alignment, zero, commit, slab);
+	extent = chunk_alloc_retained(tsdn, arena, extent_hooks, new_addr,
+	    usize, pad, alignment, zero, commit, slab);
 	if (extent == NULL) {
-		extent = chunk_alloc_wrapper_hard(tsdn, arena, chunk_hooks,
+		extent = chunk_alloc_wrapper_hard(tsdn, arena, extent_hooks,
 		    new_addr, usize, pad, alignment, zero, commit, slab);
 	}
 
@@ -605,7 +607,7 @@ chunk_can_coalesce(const extent_t *a, const extent_t *b)
 }
 
 static void
-chunk_try_coalesce(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
+chunk_try_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
     extent_t *a, extent_t *b, extent_heap_t extent_heaps[NPSIZES], bool cache)
 {
 
@@ -618,7 +620,7 @@ chunk_try_coalesce(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	arena_chunk_cache_maybe_remove(extent_arena_get(a), a, cache);
 	arena_chunk_cache_maybe_remove(extent_arena_get(b), b, cache);
 
-	if (chunk_merge_wrapper(tsdn, arena, chunk_hooks, a, b)) {
+	if (chunk_merge_wrapper(tsdn, arena, extent_hooks, a, b)) {
 		extent_heaps_insert(extent_heaps, a);
 		extent_heaps_insert(extent_heaps, b);
 		arena_chunk_cache_maybe_insert(extent_arena_get(a), a, cache);
@@ -631,7 +633,7 @@ chunk_try_coalesce(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 }
 
 static void
-chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
+chunk_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
     extent_heap_t extent_heaps[NPSIZES], bool cache, extent_t *extent)
 {
 	extent_t *prev, *next;
@@ -639,7 +641,7 @@ chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	assert(!cache || !extent_zeroed_get(extent));
 
 	malloc_mutex_lock(tsdn, &arena->chunks_mtx);
-	chunk_hooks_assure_initialized_locked(tsdn, arena, chunk_hooks);
+	extent_hooks_assure_initialized_locked(tsdn, arena, extent_hooks);
 
 	extent_usize_set(extent, 0);
 	extent_active_set(extent, false);
@@ -657,7 +659,7 @@ chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	next = rtree_read(tsdn, &chunks_rtree,
 	    (uintptr_t)extent_past_get(extent), false);
 	if (next != NULL) {
-		chunk_try_coalesce(tsdn, arena, chunk_hooks, extent, next,
+		chunk_try_coalesce(tsdn, arena, extent_hooks, extent, next,
 		    extent_heaps, cache);
 	}
 
@@ -665,7 +667,7 @@ chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	prev = rtree_read(tsdn, &chunks_rtree,
 	    (uintptr_t)extent_before_get(extent), false);
 	if (prev != NULL) {
-		chunk_try_coalesce(tsdn, arena, chunk_hooks, prev, extent,
+		chunk_try_coalesce(tsdn, arena, extent_hooks, prev, extent,
 		    extent_heaps, cache);
 	}
 
@@ -673,7 +675,7 @@ chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 }
 
 void
-chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
+chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
     extent_t *extent)
 {
 
@@ -683,7 +685,7 @@ chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	extent_addr_set(extent, extent_base_get(extent));
 	extent_zeroed_set(extent, false);
 
-	chunk_record(tsdn, arena, chunk_hooks, arena->chunks_cached, true,
+	chunk_record(tsdn, arena, extent_hooks, arena->chunks_cached, true,
 	    extent);
 }
 
@@ -698,7 +700,7 @@ chunk_dalloc_default(void *chunk, size_t size, bool committed,
 }
 
 void
-chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
+chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
     extent_t *extent)
 {
 
@@ -707,9 +709,9 @@ chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 
 	extent_addr_set(extent, extent_base_get(extent));
 
-	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
+	extent_hooks_assure_initialized(tsdn, arena, extent_hooks);
 	/* Try to deallocate. */
-	if (!chunk_hooks->dalloc(extent_base_get(extent),
+	if (!extent_hooks->dalloc(extent_base_get(extent),
 	    extent_size_get(extent), extent_committed_get(extent),
 	    arena->ind)) {
 		chunk_deregister(tsdn, extent);
@@ -719,18 +721,18 @@ chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	/* Try to decommit; purge if that fails. */
 	if (extent_committed_get(extent)) {
 		extent_committed_set(extent,
-		    chunk_hooks->decommit(extent_base_get(extent),
+		    extent_hooks->decommit(extent_base_get(extent),
 		    extent_size_get(extent), 0, extent_size_get(extent),
 		    arena->ind));
 	}
 	extent_zeroed_set(extent, !extent_committed_get(extent) ||
-	    !chunk_hooks->purge(extent_base_get(extent),
+	    !extent_hooks->purge(extent_base_get(extent),
 	    extent_size_get(extent), 0, extent_size_get(extent), arena->ind));
 
 	if (config_stats)
 		arena->stats.retained += extent_size_get(extent);
 
-	chunk_record(tsdn, arena, chunk_hooks, arena->chunks_retained, false,
+	chunk_record(tsdn, arena, extent_hooks, arena->chunks_retained, false,
 	    extent);
 }
 
@@ -744,12 +746,12 @@ chunk_commit_default(void *chunk, size_t size, size_t offset, size_t length,
 }
 
 bool
-chunk_commit_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
+chunk_commit_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
     extent_t *extent, size_t offset, size_t length)
 {
 
-	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
-	return (chunk_hooks->commit(extent_base_get(extent),
+	extent_hooks_assure_initialized(tsdn, arena, extent_hooks);
+	return (extent_hooks->commit(extent_base_get(extent),
 	    extent_size_get(extent), offset, length, arena->ind));
 }
 
@@ -763,12 +765,13 @@ chunk_decommit_default(void *chunk, size_t size, size_t offset, size_t length,
 }
 
 bool
-chunk_decommit_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
-    extent_t *extent, size_t offset, size_t length)
+chunk_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
+    size_t length)
 {
 
-	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
-	return (chunk_hooks->decommit(extent_base_get(extent),
+	extent_hooks_assure_initialized(tsdn, arena, extent_hooks);
+	return (extent_hooks->decommit(extent_base_get(extent),
 	    extent_size_get(extent), offset, length, arena->ind));
 }
 
@@ -787,12 +790,12 @@ chunk_purge_default(void *chunk, size_t size, size_t offset, size_t length,
 }
 
 bool
-chunk_purge_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
+chunk_purge_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
     extent_t *extent, size_t offset, size_t length)
 {
 
-	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
-	return (chunk_hooks->purge(extent_base_get(extent),
+	extent_hooks_assure_initialized(tsdn, arena, extent_hooks);
+	return (extent_hooks->purge(extent_base_get(extent),
 	    extent_size_get(extent), offset, length, arena->ind));
 }
 
@@ -807,7 +810,7 @@ chunk_split_default(void *chunk, size_t size, size_t size_a, size_t size_b,
 }
 
 extent_t *
-chunk_split_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
+chunk_split_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
   extent_t *extent, size_t size_a, size_t usize_a, size_t size_b,
   size_t usize_b)
 {
@@ -816,7 +819,7 @@ chunk_split_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 
 	assert(extent_size_get(extent) == size_a + size_b);
 
-	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
+	extent_hooks_assure_initialized(tsdn, arena, extent_hooks);
 
 	trail = extent_alloc(tsdn, arena);
 	if (trail == NULL)
@@ -843,7 +846,7 @@ chunk_split_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	    &trail_elm_b))
 		goto label_error_c;
 
-	if (chunk_hooks->split(extent_base_get(extent), size_a + size_b, size_a,
+	if (extent_hooks->split(extent_base_get(extent), size_a + size_b, size_a,
 	    size_b, extent_committed_get(extent), arena->ind))
 		goto label_error_d;
 
@@ -884,13 +887,13 @@ chunk_merge_default(void *chunk_a, size_t size_a, void *chunk_b, size_t size_b,
 }
 
 bool
-chunk_merge_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
+chunk_merge_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
     extent_t *a, extent_t *b)
 {
 	rtree_elm_t *a_elm_a, *a_elm_b, *b_elm_a, *b_elm_b;
 
-	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
-	if (chunk_hooks->merge(extent_base_get(a), extent_size_get(a),
+	extent_hooks_assure_initialized(tsdn, arena, extent_hooks);
+	if (extent_hooks->merge(extent_base_get(a), extent_size_get(a),
 	    extent_base_get(b), extent_size_get(b), extent_committed_get(a),
 	    arena->ind))
 		return (true);
diff --git a/src/chunk_dss.c b/src/chunk_dss.c
index f8c968b3..c5323dea 100644
--- a/src/chunk_dss.c
+++ b/src/chunk_dss.c
@@ -136,10 +136,10 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 				dss_max = dss_next;
 				malloc_mutex_unlock(tsdn, &dss_mtx);
 				if (pad_size != 0) {
-					chunk_hooks_t chunk_hooks =
+					extent_hooks_t extent_hooks =
 					    CHUNK_HOOKS_INITIALIZER;
 					chunk_dalloc_wrapper(tsdn, arena,
-					    &chunk_hooks, pad);
+					    &extent_hooks, pad);
 				} else
 					extent_dalloc(tsdn, arena, pad);
 				if (*zero)
diff --git a/src/ctl.c b/src/ctl.c
index 85ca2e86..5ff2a42d 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -120,7 +120,7 @@ CTL_PROTO(arena_i_reset)
 CTL_PROTO(arena_i_dss)
 CTL_PROTO(arena_i_lg_dirty_mult)
 CTL_PROTO(arena_i_decay_time)
-CTL_PROTO(arena_i_chunk_hooks)
+CTL_PROTO(arena_i_extent_hooks)
 INDEX_PROTO(arena_i)
 CTL_PROTO(arenas_bin_i_size)
 CTL_PROTO(arenas_bin_i_nregs)
@@ -287,7 +287,7 @@ static const ctl_named_node_t arena_i_node[] = {
 	{NAME("dss"),		CTL(arena_i_dss)},
 	{NAME("lg_dirty_mult"),	CTL(arena_i_lg_dirty_mult)},
 	{NAME("decay_time"),	CTL(arena_i_decay_time)},
-	{NAME("chunk_hooks"),	CTL(arena_i_chunk_hooks)}
+	{NAME("extent_hooks"),	CTL(arena_i_extent_hooks)}
 };
 static const ctl_named_node_t super_arena_i_node[] = {
 	{NAME(""),		CHILD(named, arena_i)}
@@ -1647,7 +1647,7 @@ label_return:
 }
 
 static int
-arena_i_chunk_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+arena_i_extent_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
@@ -1658,15 +1658,15 @@ arena_i_chunk_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	if (arena_ind < narenas_total_get() && (arena =
 	    arena_get(tsd_tsdn(tsd), arena_ind, false)) != NULL) {
 		if (newp != NULL) {
-			chunk_hooks_t old_chunk_hooks, new_chunk_hooks;
-			WRITE(new_chunk_hooks, chunk_hooks_t);
-			old_chunk_hooks = chunk_hooks_set(tsd_tsdn(tsd), arena,
-			    &new_chunk_hooks);
-			READ(old_chunk_hooks, chunk_hooks_t);
+			extent_hooks_t old_extent_hooks, new_extent_hooks;
+			WRITE(new_extent_hooks, extent_hooks_t);
+			old_extent_hooks = extent_hooks_set(tsd_tsdn(tsd),
+			    arena, &new_extent_hooks);
+			READ(old_extent_hooks, extent_hooks_t);
 		} else {
-			chunk_hooks_t old_chunk_hooks =
-			    chunk_hooks_get(tsd_tsdn(tsd), arena);
-			READ(old_chunk_hooks, chunk_hooks_t);
+			extent_hooks_t old_extent_hooks =
+			    extent_hooks_get(tsd_tsdn(tsd), arena);
+			READ(old_extent_hooks, extent_hooks_t);
 		}
 	} else {
 		ret = EFAULT;
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 85a592e9..a9bba12b 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -207,14 +207,16 @@ static void WINAPI
 _init_init_lock(void)
 {
 
-	/* If another constructor in the same binary is using mallctl to
-	 * e.g. setup chunk hooks, it may end up running before this one,
-	 * and malloc_init_hard will crash trying to lock the uninitialized
-	 * lock. So we force an initialization of the lock in
-	 * malloc_init_hard as well. We don't try to care about atomicity
-	 * of the accessed to the init_lock_initialized boolean, since it
-	 * really only matters early in the process creation, before any
-	 * separate thread normally starts doing anything. */
+	/*
+	 * If another constructor in the same binary is using mallctl to e.g.
+	 * set up extent hooks, it may end up running before this one, and
+	 * malloc_init_hard will crash trying to lock the uninitialized lock. So
+	 * we force an initialization of the lock in malloc_init_hard as well.
+	 * We don't try to care about atomicity of the accessed to the
+	 * init_lock_initialized boolean, since it really only matters early in
+	 * the process creation, before any separate thread normally starts
+	 * doing anything.
+	 */
 	if (!init_lock_initialized)
 		malloc_mutex_init(&init_lock, "init", WITNESS_RANK_INIT);
 	init_lock_initialized = true;
diff --git a/src/large.c b/src/large.c
index 43bfb284..ce8d32fb 100644
--- a/src/large.c
+++ b/src/large.c
@@ -96,15 +96,16 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize)
 {
 	arena_t *arena = extent_arena_get(extent);
 	size_t oldusize = extent_usize_get(extent);
-	chunk_hooks_t chunk_hooks = chunk_hooks_get(tsdn, arena);
+	extent_hooks_t extent_hooks = extent_hooks_get(tsdn, arena);
 	size_t diff = extent_size_get(extent) - (usize + large_pad);
 
 	assert(oldusize > usize);
 
 	/* Split excess pages. */
 	if (diff != 0) {
-		extent_t *trail = chunk_split_wrapper(tsdn, arena, &chunk_hooks,
-		    extent, usize + large_pad, usize, diff, diff);
+		extent_t *trail = chunk_split_wrapper(tsdn, arena,
+		    &extent_hooks, extent, usize + large_pad, usize, diff,
+		    diff);
 		if (trail == NULL)
 			return (true);
 
@@ -113,7 +114,7 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize)
 			    extent_usize_get(trail));
 		}
 
-		arena_chunk_cache_dalloc(tsdn, arena, &chunk_hooks, trail);
+		arena_chunk_cache_dalloc(tsdn, arena, &extent_hooks, trail);
 	}
 
 	arena_chunk_ralloc_large_shrink(tsdn, arena, extent, oldusize);
@@ -128,22 +129,22 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 	arena_t *arena = extent_arena_get(extent);
 	size_t oldusize = extent_usize_get(extent);
 	bool is_zeroed_trail = false;
-	chunk_hooks_t chunk_hooks = chunk_hooks_get(tsdn, arena);
+	extent_hooks_t extent_hooks = extent_hooks_get(tsdn, arena);
 	size_t trailsize = usize - extent_usize_get(extent);
 	extent_t *trail;
 
-	if ((trail = arena_chunk_cache_alloc(tsdn, arena, &chunk_hooks,
+	if ((trail = arena_chunk_cache_alloc(tsdn, arena, &extent_hooks,
 	    extent_past_get(extent), trailsize, CACHELINE, &is_zeroed_trail))
 	    == NULL) {
 		bool commit = true;
-		if ((trail = chunk_alloc_wrapper(tsdn, arena, &chunk_hooks,
+		if ((trail = chunk_alloc_wrapper(tsdn, arena, &extent_hooks,
 		    extent_past_get(extent), trailsize, 0, CACHELINE,
 		    &is_zeroed_trail, &commit, false)) == NULL)
 			return (true);
 	}
 
-	if (chunk_merge_wrapper(tsdn, arena, &chunk_hooks, extent, trail)) {
-		chunk_dalloc_wrapper(tsdn, arena, &chunk_hooks, trail);
+	if (chunk_merge_wrapper(tsdn, arena, &extent_hooks, extent, trail)) {
+		chunk_dalloc_wrapper(tsdn, arena, &extent_hooks, trail);
 		return (true);
 	}
 
diff --git a/test/integration/chunk.c b/test/integration/chunk.c
index ca87e80f..10c4ba77 100644
--- a/test/integration/chunk.c
+++ b/test/integration/chunk.c
@@ -4,8 +4,8 @@
 const char *malloc_conf = "junk:false";
 #endif
 
-static chunk_hooks_t orig_hooks;
-static chunk_hooks_t old_hooks;
+static extent_hooks_t orig_hooks;
+static extent_hooks_t old_hooks;
 
 static bool do_dalloc = true;
 static bool do_decommit;
@@ -125,7 +125,7 @@ TEST_BEGIN(test_chunk)
 	int flags;
 	size_t hooks_mib[3], purge_mib[3];
 	size_t hooks_miblen, purge_miblen;
-	chunk_hooks_t new_hooks = {
+	extent_hooks_t new_hooks = {
 		chunk_alloc,
 		chunk_dalloc,
 		chunk_commit,
@@ -141,15 +141,15 @@ TEST_BEGIN(test_chunk)
 	    "Unexpected mallctl() failure");
 	flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
 
-	/* Install custom chunk hooks. */
+	/* Install custom extent hooks. */
 	hooks_miblen = sizeof(hooks_mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib("arena.0.chunk_hooks", hooks_mib,
+	assert_d_eq(mallctlnametomib("arena.0.extent_hooks", hooks_mib,
 	    &hooks_miblen), 0, "Unexpected mallctlnametomib() failure");
 	hooks_mib[1] = (size_t)arena_ind;
-	old_size = sizeof(chunk_hooks_t);
-	new_size = sizeof(chunk_hooks_t);
+	old_size = sizeof(extent_hooks_t);
+	new_size = sizeof(extent_hooks_t);
 	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, &old_hooks, &old_size,
-	    &new_hooks, new_size), 0, "Unexpected chunk_hooks error");
+	    &new_hooks, new_size), 0, "Unexpected extent_hooks error");
 	orig_hooks = old_hooks;
 	assert_ptr_ne(old_hooks.alloc, chunk_alloc, "Unexpected alloc error");
 	assert_ptr_ne(old_hooks.dalloc, chunk_dalloc,
@@ -223,11 +223,11 @@ TEST_BEGIN(test_chunk)
 	assert_ptr_not_null(p, "Unexpected mallocx() error");
 	dallocx(p, flags);
 
-	/* Restore chunk hooks. */
+	/* Restore extent hooks. */
 	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, NULL, NULL,
-	    &old_hooks, new_size), 0, "Unexpected chunk_hooks error");
+	    &old_hooks, new_size), 0, "Unexpected extent_hooks error");
 	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, &old_hooks, &old_size,
-	    NULL, 0), 0, "Unexpected chunk_hooks error");
+	    NULL, 0), 0, "Unexpected extent_hooks error");
 	assert_ptr_eq(old_hooks.alloc, orig_hooks.alloc,
 	    "Unexpected alloc error");
 	assert_ptr_eq(old_hooks.dalloc, orig_hooks.dalloc,

From 127026ad989c06feda12371e584b4af4dffaf2db Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 1 Jun 2016 11:23:50 -0700
Subject: [PATCH 0293/2608] Rename chunk_*_t hooks to extent_*_t.

---
 doc/jemalloc.xml.in                     | 154 ++++++++++++------------
 include/jemalloc/jemalloc_typedefs.h.in |  43 +++----
 src/chunk.c                             |  58 ++++-----
 3 files changed, 129 insertions(+), 126 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 801fd497..ab90e30f 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1554,45 +1554,45 @@ malloc_conf = "xmalloc:true";]]></programlisting>
           (<type>extent_hooks_t</type>)
           <literal>rw</literal>
         </term>
-        <listitem><para>Get or set the chunk management hook functions for arena
-        &lt;i&gt;.  The functions must be capable of operating on all extant
-        chunks associated with arena &lt;i&gt;, usually by passing unknown
-        chunks to the replaced functions.  In practice, it is feasible to
-        control allocation for arenas created via <link
+        <listitem><para>Get or set the extent management hook functions for
+        arena &lt;i&gt;.  The functions must be capable of operating on all
+        extant extents associated with arena &lt;i&gt;, usually by passing
+        unknown extents to the replaced functions.  In practice, it is feasible
+        to control allocation for arenas created via <link
         linkend="arenas.extend"><mallctl>arenas.extend</mallctl></link> such
-        that all chunks originate from an application-supplied chunk allocator
+        that all extents originate from an application-supplied extent allocator
         (by setting custom extent hook functions just after arena creation), but
-        the automatically created arenas may have already created chunks prior
-        to the application having an opportunity to take over chunk
+        the automatically created arenas may have already created extents prior
+        to the application having an opportunity to take over extent
         allocation.</para>
 
         <programlisting language="C"><![CDATA[
 typedef struct {
-	chunk_alloc_t		*alloc;
-	chunk_dalloc_t		*dalloc;
-	chunk_commit_t		*commit;
-	chunk_decommit_t	*decommit;
-	chunk_purge_t		*purge;
-	chunk_split_t		*split;
-	chunk_merge_t		*merge;
+	extent_alloc_t		*alloc;
+	extent_dalloc_t		*dalloc;
+	extent_commit_t		*commit;
+	extent_decommit_t	*decommit;
+	extent_purge_t		*purge;
+	extent_split_t		*split;
+	extent_merge_t		*merge;
 } extent_hooks_t;]]></programlisting>
         <para>The <type>extent_hooks_t</type> structure comprises function
         pointers which are described individually below.  jemalloc uses these
-        functions to manage chunk lifetime, which starts off with allocation of
+        functions to manage extent lifetime, which starts off with allocation of
         mapped committed memory, in the simplest case followed by deallocation.
-        However, there are performance and platform reasons to retain chunks for
-        later reuse.  Cleanup attempts cascade from deallocation to decommit to
-        purging, which gives the chunk management functions opportunities to
+        However, there are performance and platform reasons to retain extents
+        for later reuse.  Cleanup attempts cascade from deallocation to decommit
+        to purging, which gives the extent management functions opportunities to
         reject the most permanent cleanup operations in favor of less permanent
-        (and often less costly) operations.  The chunk splitting and merging
+        (and often less costly) operations.  The extent splitting and merging
         operations can also be opted out of, but this is mainly intended to
         support platforms on which virtual memory mappings provided by the
         operating system kernel do not automatically coalesce and split, e.g.
         Windows.</para>
 
         <funcsynopsis><funcprototype>
-          <funcdef>typedef void *<function>(chunk_alloc_t)</function></funcdef>
-          <paramdef>void *<parameter>chunk</parameter></paramdef>
+          <funcdef>typedef void *<function>(extent_alloc_t)</function></funcdef>
+          <paramdef>void *<parameter>new_addr</parameter></paramdef>
           <paramdef>size_t <parameter>size</parameter></paramdef>
           <paramdef>size_t <parameter>alignment</parameter></paramdef>
           <paramdef>bool *<parameter>zero</parameter></paramdef>
@@ -1600,62 +1600,62 @@ typedef struct {
           <paramdef>unsigned <parameter>arena_ind</parameter></paramdef>
         </funcprototype></funcsynopsis>
         <literallayout></literallayout>
-        <para>A chunk allocation function conforms to the
-        <type>chunk_alloc_t</type> type and upon success returns a pointer to
+        <para>An extent allocation function conforms to the
+        <type>extent_alloc_t</type> type and upon success returns a pointer to
         <parameter>size</parameter> bytes of mapped memory on behalf of arena
-        <parameter>arena_ind</parameter> such that the chunk's base address is a
-        multiple of <parameter>alignment</parameter>, as well as setting
-        <parameter>*zero</parameter> to indicate whether the chunk is zeroed and
-        <parameter>*commit</parameter> to indicate whether the chunk is
+        <parameter>arena_ind</parameter> such that the extent's base address is
+        a multiple of <parameter>alignment</parameter>, as well as setting
+        <parameter>*zero</parameter> to indicate whether the extent is zeroed
+        and <parameter>*commit</parameter> to indicate whether the extent is
         committed.  Upon error the function returns <constant>NULL</constant>
         and leaves <parameter>*zero</parameter> and
         <parameter>*commit</parameter> unmodified.  The
-        <parameter>size</parameter> parameter is always a multiple of the chunk
+        <parameter>size</parameter> parameter is always a multiple of the page
         size.  The <parameter>alignment</parameter> parameter is always a power
-        of two at least as large as the chunk size.  Zeroing is mandatory if
+        of two at least as large as the page size.  Zeroing is mandatory if
         <parameter>*zero</parameter> is true upon function entry.  Committing is
         mandatory if <parameter>*commit</parameter> is true upon function entry.
-        If <parameter>chunk</parameter> is not <constant>NULL</constant>, the
-        returned pointer must be <parameter>chunk</parameter> on success or
+        If <parameter>new_addr</parameter> is not <constant>NULL</constant>, the
+        returned pointer must be <parameter>new_addr</parameter> on success or
         <constant>NULL</constant> on error.  Committed memory may be committed
         in absolute terms as on a system that does not overcommit, or in
         implicit terms as on a system that overcommits and satisfies physical
         memory needs on demand via soft page faults.  Note that replacing the
-        default chunk allocation function makes the arena's <link
+        default extent allocation function makes the arena's <link
         linkend="arena.i.dss"><mallctl>arena.&lt;i&gt;.dss</mallctl></link>
         setting irrelevant.</para>
 
         <funcsynopsis><funcprototype>
-          <funcdef>typedef bool <function>(chunk_dalloc_t)</function></funcdef>
-          <paramdef>void *<parameter>chunk</parameter></paramdef>
+          <funcdef>typedef bool <function>(extent_dalloc_t)</function></funcdef>
+          <paramdef>void *<parameter>addr</parameter></paramdef>
           <paramdef>size_t <parameter>size</parameter></paramdef>
           <paramdef>bool <parameter>committed</parameter></paramdef>
           <paramdef>unsigned <parameter>arena_ind</parameter></paramdef>
         </funcprototype></funcsynopsis>
         <literallayout></literallayout>
         <para>
-        A chunk deallocation function conforms to the
-        <type>chunk_dalloc_t</type> type and deallocates a
-        <parameter>chunk</parameter> of given <parameter>size</parameter> with
+        An extent deallocation function conforms to the
+        <type>extent_dalloc_t</type> type and deallocates an extent at given
+        <parameter>addr</parameter> and <parameter>size</parameter> with
         <parameter>committed</parameter>/decommited memory as indicated, on
         behalf of arena <parameter>arena_ind</parameter>, returning false upon
         success.  If the function returns true, this indicates opt-out from
-        deallocation; the virtual memory mapping associated with the chunk
+        deallocation; the virtual memory mapping associated with the extent
         remains mapped, in the same commit state, and available for future use,
         in which case it will be automatically retained for later reuse.</para>
 
         <funcsynopsis><funcprototype>
-          <funcdef>typedef bool <function>(chunk_commit_t)</function></funcdef>
-          <paramdef>void *<parameter>chunk</parameter></paramdef>
+          <funcdef>typedef bool <function>(extent_commit_t)</function></funcdef>
+          <paramdef>void *<parameter>addr</parameter></paramdef>
           <paramdef>size_t <parameter>size</parameter></paramdef>
           <paramdef>size_t <parameter>offset</parameter></paramdef>
           <paramdef>size_t <parameter>length</parameter></paramdef>
           <paramdef>unsigned <parameter>arena_ind</parameter></paramdef>
         </funcprototype></funcsynopsis>
         <literallayout></literallayout>
-        <para>A chunk commit function conforms to the
-        <type>chunk_commit_t</type> type and commits zeroed physical memory to
-        back pages within a <parameter>chunk</parameter> of given
+        <para>An extent commit function conforms to the
+        <type>extent_commit_t</type> type and commits zeroed physical memory to
+        back pages within an extent at given <parameter>addr</parameter> and
         <parameter>size</parameter> at <parameter>offset</parameter> bytes,
         extending for <parameter>length</parameter> on behalf of arena
         <parameter>arena_ind</parameter>, returning false upon success.
@@ -1666,46 +1666,48 @@ typedef struct {
         physical memory to satisfy the request.</para>
 
         <funcsynopsis><funcprototype>
-          <funcdef>typedef bool <function>(chunk_decommit_t)</function></funcdef>
-          <paramdef>void *<parameter>chunk</parameter></paramdef>
+          <funcdef>typedef bool <function>(extent_decommit_t)</function></funcdef>
+          <paramdef>void *<parameter>addr</parameter></paramdef>
           <paramdef>size_t <parameter>size</parameter></paramdef>
           <paramdef>size_t <parameter>offset</parameter></paramdef>
           <paramdef>size_t <parameter>length</parameter></paramdef>
           <paramdef>unsigned <parameter>arena_ind</parameter></paramdef>
         </funcprototype></funcsynopsis>
         <literallayout></literallayout>
-        <para>A chunk decommit function conforms to the
-        <type>chunk_decommit_t</type> type and decommits any physical memory
-        that is backing pages within a <parameter>chunk</parameter> of given
-        <parameter>size</parameter> at <parameter>offset</parameter> bytes,
-        extending for <parameter>length</parameter> on behalf of arena
+        <para>An extent decommit function conforms to the
+        <type>extent_decommit_t</type> type and decommits any physical memory
+        that is backing pages within an extent at given
+        <parameter>addr</parameter> and <parameter>size</parameter> at
+        <parameter>offset</parameter> bytes, extending for
+        <parameter>length</parameter> on behalf of arena
         <parameter>arena_ind</parameter>, returning false upon success, in which
-        case the pages will be committed via the chunk commit function before
+        case the pages will be committed via the extent commit function before
         being reused.  If the function returns true, this indicates opt-out from
         decommit; the memory remains committed and available for future use, in
         which case it will be automatically retained for later reuse.</para>
 
         <funcsynopsis><funcprototype>
-          <funcdef>typedef bool <function>(chunk_purge_t)</function></funcdef>
-          <paramdef>void *<parameter>chunk</parameter></paramdef>
+          <funcdef>typedef bool <function>(extent_purge_t)</function></funcdef>
+          <paramdef>void *<parameter>addr</parameter></paramdef>
           <paramdef>size_t<parameter>size</parameter></paramdef>
           <paramdef>size_t <parameter>offset</parameter></paramdef>
           <paramdef>size_t <parameter>length</parameter></paramdef>
           <paramdef>unsigned <parameter>arena_ind</parameter></paramdef>
         </funcprototype></funcsynopsis>
         <literallayout></literallayout>
-        <para>A chunk purge function conforms to the <type>chunk_purge_t</type>
-        type and optionally discards physical pages within the virtual memory
-        mapping associated with <parameter>chunk</parameter> of given
-        <parameter>size</parameter> at <parameter>offset</parameter> bytes,
-        extending for <parameter>length</parameter> on behalf of arena
+        <para>An extent purge function conforms to the
+        <type>extent_purge_t</type> type and optionally discards physical pages
+        within the virtual memory mapping associated with an extent at given
+        <parameter>addr</parameter> and <parameter>size</parameter> at
+        <parameter>offset</parameter> bytes, extending for
+        <parameter>length</parameter> on behalf of arena
         <parameter>arena_ind</parameter>, returning false if pages within the
         purged virtual memory range will be zero-filled the next time they are
         accessed.</para>
 
         <funcsynopsis><funcprototype>
-          <funcdef>typedef bool <function>(chunk_split_t)</function></funcdef>
-          <paramdef>void *<parameter>chunk</parameter></paramdef>
+          <funcdef>typedef bool <function>(extent_split_t)</function></funcdef>
+          <paramdef>void *<parameter>addr</parameter></paramdef>
           <paramdef>size_t <parameter>size</parameter></paramdef>
           <paramdef>size_t <parameter>size_a</parameter></paramdef>
           <paramdef>size_t <parameter>size_b</parameter></paramdef>
@@ -1713,35 +1715,35 @@ typedef struct {
           <paramdef>unsigned <parameter>arena_ind</parameter></paramdef>
         </funcprototype></funcsynopsis>
         <literallayout></literallayout>
-        <para>A chunk split function conforms to the <type>chunk_split_t</type>
-        type and optionally splits <parameter>chunk</parameter> of given
-        <parameter>size</parameter> into two adjacent chunks, the first of
-        <parameter>size_a</parameter> bytes, and the second of
-        <parameter>size_b</parameter> bytes, operating on
+        <para>An extent split function conforms to the
+        <type>extent_split_t</type> type and optionally splits an extent at
+        given <parameter>addr</parameter> and <parameter>size</parameter> into
+        two adjacent extents, the first of <parameter>size_a</parameter> bytes,
+        and the second of <parameter>size_b</parameter> bytes, operating on
         <parameter>committed</parameter>/decommitted memory as indicated, on
         behalf of arena <parameter>arena_ind</parameter>, returning false upon
-        success.  If the function returns true, this indicates that the chunk
+        success.  If the function returns true, this indicates that the extent
         remains unsplit and therefore should continue to be operated on as a
         whole.</para>
 
         <funcsynopsis><funcprototype>
-          <funcdef>typedef bool <function>(chunk_merge_t)</function></funcdef>
-          <paramdef>void *<parameter>chunk_a</parameter></paramdef>
+          <funcdef>typedef bool <function>(extent_merge_t)</function></funcdef>
+          <paramdef>void *<parameter>addr_a</parameter></paramdef>
           <paramdef>size_t <parameter>size_a</parameter></paramdef>
-          <paramdef>void *<parameter>chunk_b</parameter></paramdef>
+          <paramdef>void *<parameter>addr_b</parameter></paramdef>
           <paramdef>size_t <parameter>size_b</parameter></paramdef>
           <paramdef>bool <parameter>committed</parameter></paramdef>
           <paramdef>unsigned <parameter>arena_ind</parameter></paramdef>
         </funcprototype></funcsynopsis>
         <literallayout></literallayout>
-        <para>A chunk merge function conforms to the <type>chunk_merge_t</type>
-        type and optionally merges adjacent chunks,
-        <parameter>chunk_a</parameter> of given <parameter>size_a</parameter>
-        and <parameter>chunk_b</parameter> of given
-        <parameter>size_b</parameter> into one contiguous chunk, operating on
+        <para>An extent merge function conforms to the
+        <type>extent_merge_t</type> type and optionally merges adjacent extents,
+        at given <parameter>addr_a</parameter> and <parameter>size_a</parameter>
+        with given <parameter>addr_b</parameter> and
+        <parameter>size_b</parameter> into one contiguous extent, operating on
         <parameter>committed</parameter>/decommitted memory as indicated, on
         behalf of arena <parameter>arena_ind</parameter>, returning false upon
-        success.  If the function returns true, this indicates that the chunks
+        success.  If the function returns true, this indicates that the extents
         remain distinct mappings and therefore should continue to be operated on
         independently.</para>
         </listitem>
diff --git a/include/jemalloc/jemalloc_typedefs.h.in b/include/jemalloc/jemalloc_typedefs.h.in
index 2b07e362..99f07ab2 100644
--- a/include/jemalloc/jemalloc_typedefs.h.in
+++ b/include/jemalloc/jemalloc_typedefs.h.in
@@ -1,57 +1,58 @@
 /*
  * void *
- * chunk_alloc(void *new_addr, size_t size, size_t alignment, bool *zero,
+ * extent_alloc(void *new_addr, size_t size, size_t alignment, bool *zero,
  *     bool *commit, unsigned arena_ind);
  */
-typedef void *(chunk_alloc_t)(void *, size_t, size_t, bool *, bool *, unsigned);
+typedef void *(extent_alloc_t)(void *, size_t, size_t, bool *, bool *,
+    unsigned);
 
 /*
  * bool
- * chunk_dalloc(void *chunk, size_t size, bool committed, unsigned arena_ind);
+ * extent_dalloc(void *addr, size_t size, bool committed, unsigned arena_ind);
  */
-typedef bool (chunk_dalloc_t)(void *, size_t, bool, unsigned);
+typedef bool (extent_dalloc_t)(void *, size_t, bool, unsigned);
 
 /*
  * bool
- * chunk_commit(void *chunk, size_t size, size_t offset, size_t length,
+ * extent_commit(void *addr, size_t size, size_t offset, size_t length,
  *     unsigned arena_ind);
  */
-typedef bool (chunk_commit_t)(void *, size_t, size_t, size_t, unsigned);
+typedef bool (extent_commit_t)(void *, size_t, size_t, size_t, unsigned);
 
 /*
  * bool
- * chunk_decommit(void *chunk, size_t size, size_t offset, size_t length,
+ * extent_decommit(void *addr, size_t size, size_t offset, size_t length,
  *     unsigned arena_ind);
  */
-typedef bool (chunk_decommit_t)(void *, size_t, size_t, size_t, unsigned);
+typedef bool (extent_decommit_t)(void *, size_t, size_t, size_t, unsigned);
 
 /*
  * bool
- * chunk_purge(void *chunk, size_t size, size_t offset, size_t length,
+ * extent_purge(void *addr, size_t size, size_t offset, size_t length,
  *     unsigned arena_ind);
  */
-typedef bool (chunk_purge_t)(void *, size_t, size_t, size_t, unsigned);
+typedef bool (extent_purge_t)(void *, size_t, size_t, size_t, unsigned);
 
 /*
  * bool
- * chunk_split(void *chunk, size_t size, size_t size_a, size_t size_b,
+ * extent_split(void *addr, size_t size, size_t size_a, size_t size_b,
  *     bool committed, unsigned arena_ind);
  */
-typedef bool (chunk_split_t)(void *, size_t, size_t, size_t, bool, unsigned);
+typedef bool (extent_split_t)(void *, size_t, size_t, size_t, bool, unsigned);
 
 /*
  * bool
- * chunk_merge(void *chunk_a, size_t size_a, void *chunk_b, size_t size_b,
+ * extent_merge(void *addr_a, size_t size_a, void *addr_b, size_t size_b,
  *     bool committed, unsigned arena_ind);
  */
-typedef bool (chunk_merge_t)(void *, size_t, void *, size_t, bool, unsigned);
+typedef bool (extent_merge_t)(void *, size_t, void *, size_t, bool, unsigned);
 
 typedef struct {
-	chunk_alloc_t		*alloc;
-	chunk_dalloc_t		*dalloc;
-	chunk_commit_t		*commit;
-	chunk_decommit_t	*decommit;
-	chunk_purge_t		*purge;
-	chunk_split_t		*split;
-	chunk_merge_t		*merge;
+	extent_alloc_t		*alloc;
+	extent_dalloc_t		*dalloc;
+	extent_commit_t		*commit;
+	extent_decommit_t	*decommit;
+	extent_purge_t		*purge;
+	extent_split_t		*split;
+	extent_merge_t		*merge;
 } extent_hooks_t;
diff --git a/src/chunk.c b/src/chunk.c
index 6ca40572..78f08d49 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -18,29 +18,29 @@ size_t		chunksize;
 size_t		chunksize_mask; /* (chunksize - 1). */
 size_t		chunk_npages;
 
-static void	*chunk_alloc_default(void *new_addr, size_t size,
+static void	*extent_alloc_default(void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit, unsigned arena_ind);
-static bool	chunk_dalloc_default(void *chunk, size_t size, bool committed,
+static bool	extent_dalloc_default(void *addr, size_t size, bool committed,
     unsigned arena_ind);
-static bool	chunk_commit_default(void *chunk, size_t size, size_t offset,
+static bool	extent_commit_default(void *addr, size_t size, size_t offset,
     size_t length, unsigned arena_ind);
-static bool	chunk_decommit_default(void *chunk, size_t size, size_t offset,
+static bool	extent_decommit_default(void *addr, size_t size, size_t offset,
     size_t length, unsigned arena_ind);
-static bool	chunk_purge_default(void *chunk, size_t size, size_t offset,
+static bool	extent_purge_default(void *addr, size_t size, size_t offset,
     size_t length, unsigned arena_ind);
-static bool	chunk_split_default(void *chunk, size_t size, size_t size_a,
+static bool	extent_split_default(void *addr, size_t size, size_t size_a,
     size_t size_b, bool committed, unsigned arena_ind);
-static bool	chunk_merge_default(void *chunk_a, size_t size_a, void *chunk_b,
+static bool	extent_merge_default(void *addr_a, size_t size_a, void *addr_b,
     size_t size_b, bool committed, unsigned arena_ind);
 
 const extent_hooks_t	extent_hooks_default = {
-	chunk_alloc_default,
-	chunk_dalloc_default,
-	chunk_commit_default,
-	chunk_decommit_default,
-	chunk_purge_default,
-	chunk_split_default,
-	chunk_merge_default
+	extent_alloc_default,
+	extent_dalloc_default,
+	extent_commit_default,
+	extent_decommit_default,
+	extent_purge_default,
+	extent_split_default,
+	extent_merge_default
 };
 
 /******************************************************************************/
@@ -107,7 +107,7 @@ extent_hooks_set(tsdn_t *tsdn, arena_t *arena,
 	 */
 #define	ATOMIC_COPY_HOOK(n) do {					\
 	union {								\
-		chunk_##n##_t	**n;					\
+		extent_##n##_t	**n;					\
 		void		**v;					\
 	} u;								\
 	u.n = &arena->extent_hooks.n;					\
@@ -503,7 +503,7 @@ chunk_arena_get(tsdn_t *tsdn, unsigned arena_ind)
 }
 
 static void *
-chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero,
+extent_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero,
     bool *commit, unsigned arena_ind)
 {
 	void *ret;
@@ -690,12 +690,12 @@ chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 }
 
 static bool
-chunk_dalloc_default(void *chunk, size_t size, bool committed,
+extent_dalloc_default(void *addr, size_t size, bool committed,
     unsigned arena_ind)
 {
 
-	if (!have_dss || !chunk_in_dss(tsdn_fetch(), chunk))
-		return (chunk_dalloc_mmap(chunk, size));
+	if (!have_dss || !chunk_in_dss(tsdn_fetch(), addr))
+		return (chunk_dalloc_mmap(addr, size));
 	return (true);
 }
 
@@ -737,11 +737,11 @@ chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 }
 
 static bool
-chunk_commit_default(void *chunk, size_t size, size_t offset, size_t length,
+extent_commit_default(void *addr, size_t size, size_t offset, size_t length,
     unsigned arena_ind)
 {
 
-	return (pages_commit((void *)((uintptr_t)chunk + (uintptr_t)offset),
+	return (pages_commit((void *)((uintptr_t)addr + (uintptr_t)offset),
 	    length));
 }
 
@@ -756,11 +756,11 @@ chunk_commit_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 }
 
 static bool
-chunk_decommit_default(void *chunk, size_t size, size_t offset, size_t length,
+extent_decommit_default(void *addr, size_t size, size_t offset, size_t length,
     unsigned arena_ind)
 {
 
-	return (pages_decommit((void *)((uintptr_t)chunk + (uintptr_t)offset),
+	return (pages_decommit((void *)((uintptr_t)addr + (uintptr_t)offset),
 	    length));
 }
 
@@ -776,16 +776,16 @@ chunk_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
 }
 
 static bool
-chunk_purge_default(void *chunk, size_t size, size_t offset, size_t length,
+extent_purge_default(void *addr, size_t size, size_t offset, size_t length,
     unsigned arena_ind)
 {
 
-	assert(chunk != NULL);
+	assert(addr != NULL);
 	assert((offset & PAGE_MASK) == 0);
 	assert(length != 0);
 	assert((length & PAGE_MASK) == 0);
 
-	return (pages_purge((void *)((uintptr_t)chunk + (uintptr_t)offset),
+	return (pages_purge((void *)((uintptr_t)addr + (uintptr_t)offset),
 	    length));
 }
 
@@ -800,7 +800,7 @@ chunk_purge_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 }
 
 static bool
-chunk_split_default(void *chunk, size_t size, size_t size_a, size_t size_b,
+extent_split_default(void *addr, size_t size, size_t size_a, size_t size_b,
     bool committed, unsigned arena_ind)
 {
 
@@ -871,7 +871,7 @@ label_error_a:
 }
 
 static bool
-chunk_merge_default(void *chunk_a, size_t size_a, void *chunk_b, size_t size_b,
+extent_merge_default(void *addr_a, size_t size_a, void *addr_b, size_t size_b,
     bool committed, unsigned arena_ind)
 {
 
@@ -879,7 +879,7 @@ chunk_merge_default(void *chunk_a, size_t size_a, void *chunk_b, size_t size_b,
 		return (true);
 	if (have_dss) {
 		tsdn_t *tsdn = tsdn_fetch();
-		if (chunk_in_dss(tsdn, chunk_a) != chunk_in_dss(tsdn, chunk_b))
+		if (chunk_in_dss(tsdn, addr_a) != chunk_in_dss(tsdn, addr_b))
 			return (true);
 	}
 

From c9a76481d8e411e52240a4e4313dbbfa99801073 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 1 Jun 2016 11:35:30 -0700
Subject: [PATCH 0294/2608] Rename chunks_{cached,retained,mtx} to
 extents_{cached,retained,mtx}.

---
 include/jemalloc/internal/arena.h   | 16 +++++++--------
 include/jemalloc/internal/witness.h |  2 +-
 src/arena.c                         | 19 +++++++++--------
 src/chunk.c                         | 32 ++++++++++++++---------------
 4 files changed, 35 insertions(+), 34 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index f60b9d60..0707b863 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -235,20 +235,20 @@ struct arena_s {
 	malloc_mutex_t		large_mtx;
 
 	/*
-	 * Heaps of chunks that were previously allocated.  These are used when
-	 * allocating chunks, in an attempt to re-use address space.
+	 * Heaps of extents that were previously allocated.  These are used when
+	 * allocating extents, in an attempt to re-use address space.
 	 */
-	extent_heap_t		chunks_cached[NPSIZES];
-	extent_heap_t		chunks_retained[NPSIZES];
+	extent_heap_t		extents_cached[NPSIZES];
+	extent_heap_t		extents_retained[NPSIZES];
+	/* User-configurable extent hook functions. */
+	extent_hooks_t		extent_hooks;
+	/* Protects extents_cached, extents_retained, and extent_hooks. */
+	malloc_mutex_t		extents_mtx;
 
-	malloc_mutex_t		chunks_mtx;
 	/* Cache of extent structures that were allocated via base_alloc(). */
 	ql_head(extent_t)	extent_cache;
 	malloc_mutex_t		extent_cache_mtx;
 
-	/* User-configurable extent hook functions. */
-	extent_hooks_t		extent_hooks;
-
 	/* bins is used to store heaps of free regions. */
 	arena_bin_t		bins[NBINS];
 };
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 8c56c21a..e2f85634 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -24,7 +24,7 @@ typedef int witness_comp_t (const witness_t *, void *, const witness_t *,
 #define	WITNESS_RANK_PROF_GCTX		7U
 
 #define	WITNESS_RANK_ARENA		8U
-#define	WITNESS_RANK_ARENA_CHUNKS	9U
+#define	WITNESS_RANK_ARENA_EXTENTS	9U
 #define	WITNESS_RANK_ARENA_EXTENT_CACHE	10
 
 #define	WITNESS_RANK_RTREE_ELM		11U
diff --git a/src/arena.c b/src/arena.c
index 9a8c2e26..de6605a0 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1793,20 +1793,21 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 		return (NULL);
 
 	for (i = 0; i < NPSIZES; i++) {
-		extent_heap_new(&arena->chunks_cached[i]);
-		extent_heap_new(&arena->chunks_retained[i]);
+		extent_heap_new(&arena->extents_cached[i]);
+		extent_heap_new(&arena->extents_retained[i]);
 	}
 
-	if (malloc_mutex_init(&arena->chunks_mtx, "arena_chunks",
-	    WITNESS_RANK_ARENA_CHUNKS))
+	arena->extent_hooks = extent_hooks_default;
+
+	if (malloc_mutex_init(&arena->extents_mtx, "arena_extents",
+	    WITNESS_RANK_ARENA_EXTENTS))
 		return (NULL);
+
 	ql_new(&arena->extent_cache);
 	if (malloc_mutex_init(&arena->extent_cache_mtx, "arena_extent_cache",
 	    WITNESS_RANK_ARENA_EXTENT_CACHE))
 		return (NULL);
 
-	arena->extent_hooks = extent_hooks_default;
-
 	/* Initialize bins. */
 	for (i = 0; i < NBINS; i++) {
 		arena_bin_t *bin = &arena->bins[i];
@@ -1843,7 +1844,7 @@ void
 arena_prefork1(tsdn_t *tsdn, arena_t *arena)
 {
 
-	malloc_mutex_prefork(tsdn, &arena->chunks_mtx);
+	malloc_mutex_prefork(tsdn, &arena->extents_mtx);
 }
 
 void
@@ -1872,7 +1873,7 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena)
 	for (i = 0; i < NBINS; i++)
 		malloc_mutex_postfork_parent(tsdn, &arena->bins[i].lock);
 	malloc_mutex_postfork_parent(tsdn, &arena->extent_cache_mtx);
-	malloc_mutex_postfork_parent(tsdn, &arena->chunks_mtx);
+	malloc_mutex_postfork_parent(tsdn, &arena->extents_mtx);
 	malloc_mutex_postfork_parent(tsdn, &arena->lock);
 }
 
@@ -1885,6 +1886,6 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena)
 	for (i = 0; i < NBINS; i++)
 		malloc_mutex_postfork_child(tsdn, &arena->bins[i].lock);
 	malloc_mutex_postfork_child(tsdn, &arena->extent_cache_mtx);
-	malloc_mutex_postfork_child(tsdn, &arena->chunks_mtx);
+	malloc_mutex_postfork_child(tsdn, &arena->extents_mtx);
 	malloc_mutex_postfork_child(tsdn, &arena->lock);
 }
diff --git a/src/chunk.c b/src/chunk.c
index 78f08d49..2ac44b0a 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -83,9 +83,9 @@ extent_hooks_get(tsdn_t *tsdn, arena_t *arena)
 {
 	extent_hooks_t extent_hooks;
 
-	malloc_mutex_lock(tsdn, &arena->chunks_mtx);
+	malloc_mutex_lock(tsdn, &arena->extents_mtx);
 	extent_hooks = extent_hooks_get_locked(arena);
-	malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
+	malloc_mutex_unlock(tsdn, &arena->extents_mtx);
 
 	return (extent_hooks);
 }
@@ -96,7 +96,7 @@ extent_hooks_set(tsdn_t *tsdn, arena_t *arena,
 {
 	extent_hooks_t old_extent_hooks;
 
-	malloc_mutex_lock(tsdn, &arena->chunks_mtx);
+	malloc_mutex_lock(tsdn, &arena->extents_mtx);
 	old_extent_hooks = arena->extent_hooks;
 	/*
 	 * Copy each field atomically so that it is impossible for readers to
@@ -121,7 +121,7 @@ extent_hooks_set(tsdn_t *tsdn, arena_t *arena,
 	ATOMIC_COPY_HOOK(split);
 	ATOMIC_COPY_HOOK(merge);
 #undef ATOMIC_COPY_HOOK
-	malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
+	malloc_mutex_unlock(tsdn, &arena->extents_mtx);
 
 	return (old_extent_hooks);
 }
@@ -330,7 +330,7 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 	/* Beware size_t wrap-around. */
 	if (alloc_size < usize)
 		return (NULL);
-	malloc_mutex_lock(tsdn, &arena->chunks_mtx);
+	malloc_mutex_lock(tsdn, &arena->extents_mtx);
 	extent_hooks_assure_initialized_locked(tsdn, arena, extent_hooks);
 	if (new_addr != NULL) {
 		rtree_elm_t *elm;
@@ -350,7 +350,7 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 		extent = chunk_first_best_fit(arena, extent_heaps, alloc_size);
 	if (extent == NULL || (new_addr != NULL && extent_size_get(extent) <
 	    size)) {
-		malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
+		malloc_mutex_unlock(tsdn, &arena->extents_mtx);
 		return (NULL);
 	}
 	extent_heaps_remove(extent_heaps, extent);
@@ -373,7 +373,7 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 		    leadsize, leadsize, size + trailsize, usize + trailsize);
 		if (extent == NULL) {
 			chunk_leak(tsdn, arena, extent_hooks, cache, lead);
-			malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
+			malloc_mutex_unlock(tsdn, &arena->extents_mtx);
 			return (NULL);
 		}
 		extent_heaps_insert(extent_heaps, lead);
@@ -386,7 +386,7 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 		    extent, size, usize, trailsize, trailsize);
 		if (trail == NULL) {
 			chunk_leak(tsdn, arena, extent_hooks, cache, extent);
-			malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
+			malloc_mutex_unlock(tsdn, &arena->extents_mtx);
 			return (NULL);
 		}
 		extent_heaps_insert(extent_heaps, trail);
@@ -402,7 +402,7 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 	if (!extent_committed_get(extent) &&
 	    extent_hooks->commit(extent_base_get(extent),
 	    extent_size_get(extent), 0, extent_size_get(extent), arena->ind)) {
-		malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
+		malloc_mutex_unlock(tsdn, &arena->extents_mtx);
 		chunk_record(tsdn, arena, extent_hooks, extent_heaps, cache,
 		    extent);
 		return (NULL);
@@ -416,7 +416,7 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 		chunk_interior_register(tsdn, extent);
 	}
 
-	malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
+	malloc_mutex_unlock(tsdn, &arena->extents_mtx);
 
 	if (*zero) {
 		if (!extent_zeroed_get(extent)) {
@@ -480,7 +480,7 @@ chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 	assert(alignment != 0);
 
 	commit = true;
-	extent = chunk_recycle(tsdn, arena, extent_hooks, arena->chunks_cached,
+	extent = chunk_recycle(tsdn, arena, extent_hooks, arena->extents_cached,
 	    true, new_addr, usize, pad, alignment, zero, &commit, slab);
 	if (extent == NULL)
 		return (NULL);
@@ -531,7 +531,7 @@ chunk_alloc_retained(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 	assert(alignment != 0);
 
 	extent = chunk_recycle(tsdn, arena, extent_hooks,
-	    arena->chunks_retained, false, new_addr, usize, pad, alignment,
+	    arena->extents_retained, false, new_addr, usize, pad, alignment,
 	    zero, commit, slab);
 	if (extent != NULL && config_stats) {
 		size_t size = usize + pad;
@@ -640,7 +640,7 @@ chunk_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 
 	assert(!cache || !extent_zeroed_get(extent));
 
-	malloc_mutex_lock(tsdn, &arena->chunks_mtx);
+	malloc_mutex_lock(tsdn, &arena->extents_mtx);
 	extent_hooks_assure_initialized_locked(tsdn, arena, extent_hooks);
 
 	extent_usize_set(extent, 0);
@@ -671,7 +671,7 @@ chunk_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 		    extent_heaps, cache);
 	}
 
-	malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
+	malloc_mutex_unlock(tsdn, &arena->extents_mtx);
 }
 
 void
@@ -685,7 +685,7 @@ chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 	extent_addr_set(extent, extent_base_get(extent));
 	extent_zeroed_set(extent, false);
 
-	chunk_record(tsdn, arena, extent_hooks, arena->chunks_cached, true,
+	chunk_record(tsdn, arena, extent_hooks, arena->extents_cached, true,
 	    extent);
 }
 
@@ -732,7 +732,7 @@ chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 	if (config_stats)
 		arena->stats.retained += extent_size_get(extent);
 
-	chunk_record(tsdn, arena, extent_hooks, arena->chunks_retained, false,
+	chunk_record(tsdn, arena, extent_hooks, arena->extents_retained, false,
 	    extent);
 }
 

From 4a55daa363e9622a1c98a129bbda1b7086773fa6 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 1 Jun 2016 11:56:45 -0700
Subject: [PATCH 0295/2608] 
 s/CHUNK_HOOKS_INITIALIZER/EXTENT_HOOKS_INITIALIZER/g

---
 include/jemalloc/internal/chunk.h  | 10 ----------
 include/jemalloc/internal/extent.h | 10 ++++++++++
 src/arena.c                        |  8 ++++----
 src/chunk.c                        |  2 +-
 src/chunk_dss.c                    |  2 +-
 5 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index 10f2ae72..a6f6f7c3 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -11,16 +11,6 @@
 #define	CHUNK_CEILING(s)						\
 	(((s) + chunksize_mask) & ~chunksize_mask)
 
-#define	CHUNK_HOOKS_INITIALIZER {					\
-    NULL,								\
-    NULL,								\
-    NULL,								\
-    NULL,								\
-    NULL,								\
-    NULL,								\
-    NULL								\
-}
-
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index cf717d9e..b590ae0f 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -3,6 +3,16 @@
 
 typedef struct extent_s extent_t;
 
+#define	EXTENT_HOOKS_INITIALIZER {					\
+    NULL,								\
+    NULL,								\
+    NULL,								\
+    NULL,								\
+    NULL,								\
+    NULL,								\
+    NULL								\
+}
+
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
diff --git a/src/arena.c b/src/arena.c
index de6605a0..075082b1 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -347,7 +347,7 @@ arena_chunk_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool *zero)
 {
 	extent_t *extent;
-	extent_hooks_t extent_hooks = CHUNK_HOOKS_INITIALIZER;
+	extent_hooks_t extent_hooks = EXTENT_HOOKS_INITIALIZER;
 
 	malloc_mutex_lock(tsdn, &arena->lock);
 
@@ -373,7 +373,7 @@ void
 arena_chunk_dalloc_large(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
     bool locked)
 {
-	extent_hooks_t extent_hooks = CHUNK_HOOKS_INITIALIZER;
+	extent_hooks_t extent_hooks = EXTENT_HOOKS_INITIALIZER;
 
 	if (!locked)
 		malloc_mutex_lock(tsdn, &arena->lock);
@@ -866,7 +866,7 @@ arena_purge(tsdn_t *tsdn, arena_t *arena, bool all)
 static void
 arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *slab)
 {
-	extent_hooks_t extent_hooks = CHUNK_HOOKS_INITIALIZER;
+	extent_hooks_t extent_hooks = EXTENT_HOOKS_INITIALIZER;
 
 	arena_nactive_sub(arena, extent_size_get(slab) >> LG_PAGE);
 	arena_chunk_cache_dalloc_locked(tsdn, arena, &extent_hooks, slab);
@@ -1009,7 +1009,7 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 {
 	extent_t *slab;
 	arena_slab_data_t *slab_data;
-	extent_hooks_t extent_hooks = CHUNK_HOOKS_INITIALIZER;
+	extent_hooks_t extent_hooks = EXTENT_HOOKS_INITIALIZER;
 	bool zero;
 
 	zero = false;
diff --git a/src/chunk.c b/src/chunk.c
index 2ac44b0a..7f80bb42 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -131,7 +131,7 @@ extent_hooks_assure_initialized_impl(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t *extent_hooks, bool locked)
 {
 	static const extent_hooks_t uninitialized_hooks =
-	    CHUNK_HOOKS_INITIALIZER;
+	    EXTENT_HOOKS_INITIALIZER;
 
 	if (memcmp(extent_hooks, &uninitialized_hooks, sizeof(extent_hooks_t))
 	    == 0) {
diff --git a/src/chunk_dss.c b/src/chunk_dss.c
index c5323dea..9fa4ad81 100644
--- a/src/chunk_dss.c
+++ b/src/chunk_dss.c
@@ -137,7 +137,7 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 				malloc_mutex_unlock(tsdn, &dss_mtx);
 				if (pad_size != 0) {
 					extent_hooks_t extent_hooks =
-					    CHUNK_HOOKS_INITIALIZER;
+					    EXTENT_HOOKS_INITIALIZER;
 					chunk_dalloc_wrapper(tsdn, arena,
 					    &extent_hooks, pad);
 				} else

From 0c4932eb1e522211297ae40435ab6d3bd74242bc Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 1 Jun 2016 12:10:39 -0700
Subject: [PATCH 0296/2608] s/chunk_lookup/extent_lookup/g,
 s/chunks_rtree/extents_rtree/g

---
 include/jemalloc/internal/chunk.h             | 15 -------
 include/jemalloc/internal/extent.h            | 12 ++++++
 .../jemalloc/internal/jemalloc_internal.h.in  |  6 +--
 include/jemalloc/internal/private_symbols.txt |  5 ++-
 src/chunk.c                                   | 41 ++++++++-----------
 src/extent.c                                  | 16 ++++++++
 src/jemalloc.c                                |  2 +
 test/unit/arena_reset.c                       |  2 +-
 8 files changed, 55 insertions(+), 44 deletions(-)

diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index a6f6f7c3..ddfa0046 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -22,8 +22,6 @@
 extern size_t		opt_lg_chunk;
 extern const char	*opt_dss;
 
-extern rtree_t		chunks_rtree;
-
 extern size_t		chunksize;
 extern size_t		chunksize_mask; /* (chunksize - 1). */
 extern size_t		chunk_npages;
@@ -67,19 +65,6 @@ void	chunk_postfork_child(tsdn_t *tsdn);
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
-#ifndef JEMALLOC_ENABLE_INLINE
-extent_t	*chunk_lookup(tsdn_t *tsdn, const void *chunk, bool dependent);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_CHUNK_C_))
-JEMALLOC_INLINE extent_t *
-chunk_lookup(tsdn_t *tsdn, const void *ptr, bool dependent)
-{
-
-	return (rtree_read(tsdn, &chunks_rtree, (uintptr_t)ptr, dependent));
-}
-#endif
-
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
 
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index b590ae0f..8552f701 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -87,6 +87,8 @@ typedef ph(extent_t) extent_heap_t;
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
+extern rtree_t		extents_rtree;
+
 extent_t	*extent_alloc(tsdn_t *tsdn, arena_t *arena);
 void	extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
 
@@ -101,11 +103,14 @@ size_t	extent_size_quantize_ceil(size_t size);
 
 ph_proto(, extent_heap_, extent_heap_t, extent_t)
 
+bool	extent_boot(void);
+
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
+extent_t	*extent_lookup(tsdn_t *tsdn, const void *chunk, bool dependent);
 arena_t	*extent_arena_get(const extent_t *extent);
 void	*extent_base_get(const extent_t *extent);
 void	*extent_addr_get(const extent_t *extent);
@@ -140,6 +145,13 @@ void	extent_ring_remove(extent_t *extent);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_EXTENT_C_))
+JEMALLOC_INLINE extent_t *
+extent_lookup(tsdn_t *tsdn, const void *ptr, bool dependent)
+{
+
+	return (rtree_read(tsdn, &extents_rtree, (uintptr_t)ptr, dependent));
+}
+
 JEMALLOC_INLINE arena_t *
 extent_arena_get(const extent_t *extent)
 {
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 58a18ae5..fb3991bc 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -510,9 +510,9 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/witness.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/mb.h"
+#include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/base.h"
-#include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/pages.h"
 #include "jemalloc/internal/chunk.h"
 #include "jemalloc/internal/large.h"
@@ -929,7 +929,7 @@ JEMALLOC_ALWAYS_INLINE extent_t *
 iealloc(tsdn_t *tsdn, const void *ptr)
 {
 
-	return (chunk_lookup(tsdn, ptr, true));
+	return (extent_lookup(tsdn, ptr, true));
 }
 #endif
 
@@ -1062,7 +1062,7 @@ ivsalloc(tsdn_t *tsdn, const void *ptr)
 	extent_t *extent;
 
 	/* Return 0 if ptr is not within a chunk managed by jemalloc. */
-	extent = chunk_lookup(tsdn, ptr, false);
+	extent = extent_lookup(tsdn, ptr, false);
 	if (extent == NULL)
 		return (0);
 	assert(extent_active_get(extent));
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index b5fd4c0c..92f91e44 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -132,7 +132,6 @@ chunk_dss_prec_get
 chunk_dss_prec_set
 chunk_dss_prefork
 chunk_in_dss
-chunk_lookup
 chunk_merge_wrapper
 chunk_npages
 chunk_postfork_child
@@ -140,7 +139,6 @@ chunk_postfork_parent
 chunk_prefork
 chunk_purge_wrapper
 chunk_split_wrapper
-chunks_rtree
 chunksize
 chunksize_mask
 ckh_count
@@ -173,6 +171,7 @@ extent_arena_get
 extent_arena_set
 extent_base_get
 extent_before_get
+extent_boot
 extent_committed_get
 extent_committed_set
 extent_dalloc
@@ -187,6 +186,7 @@ extent_hooks_get
 extent_hooks_set
 extent_init
 extent_last_get
+extent_lookup
 extent_past_get
 extent_prof_tctx_get
 extent_prof_tctx_set
@@ -205,6 +205,7 @@ extent_usize_get
 extent_usize_set
 extent_zeroed_get
 extent_zeroed_set
+extents_rtree
 ffs_llu
 ffs_lu
 ffs_u
diff --git a/src/chunk.c b/src/chunk.c
index 7f80bb42..3ce6e015 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -11,8 +11,6 @@ size_t		opt_lg_chunk = 0;
 static size_t	curchunks;
 static size_t	highchunks;
 
-rtree_t		chunks_rtree;
-
 /* Various chunk-related settings. */
 size_t		chunksize;
 size_t		chunksize_mask; /* (chunksize - 1). */
@@ -161,14 +159,14 @@ extent_rtree_acquire(tsdn_t *tsdn, const extent_t *extent, bool dependent,
     bool init_missing, rtree_elm_t **r_elm_a, rtree_elm_t **r_elm_b)
 {
 
-	*r_elm_a = rtree_elm_acquire(tsdn, &chunks_rtree,
+	*r_elm_a = rtree_elm_acquire(tsdn, &extents_rtree,
 	    (uintptr_t)extent_base_get(extent), dependent, init_missing);
 	if (!dependent && *r_elm_a == NULL)
 		return (true);
 	assert(*r_elm_a != NULL);
 
 	if (extent_size_get(extent) > PAGE) {
-		*r_elm_b = rtree_elm_acquire(tsdn, &chunks_rtree,
+		*r_elm_b = rtree_elm_acquire(tsdn, &extents_rtree,
 		    (uintptr_t)extent_last_get(extent), dependent,
 		    init_missing);
 		if (!dependent && *r_elm_b == NULL)
@@ -185,18 +183,18 @@ extent_rtree_write_acquired(tsdn_t *tsdn, rtree_elm_t *elm_a,
     rtree_elm_t *elm_b, const extent_t *extent)
 {
 
-	rtree_elm_write_acquired(tsdn, &chunks_rtree, elm_a, extent);
+	rtree_elm_write_acquired(tsdn, &extents_rtree, elm_a, extent);
 	if (elm_b != NULL)
-		rtree_elm_write_acquired(tsdn, &chunks_rtree, elm_b, extent);
+		rtree_elm_write_acquired(tsdn, &extents_rtree, elm_b, extent);
 }
 
 static void
 extent_rtree_release(tsdn_t *tsdn, rtree_elm_t *elm_a, rtree_elm_t *elm_b)
 {
 
-	rtree_elm_release(tsdn, &chunks_rtree, elm_a);
+	rtree_elm_release(tsdn, &extents_rtree, elm_a);
 	if (elm_b != NULL)
-		rtree_elm_release(tsdn, &chunks_rtree, elm_b);
+		rtree_elm_release(tsdn, &extents_rtree, elm_b);
 }
 
 static void
@@ -207,7 +205,7 @@ chunk_interior_register(tsdn_t *tsdn, const extent_t *extent)
 	assert(extent_slab_get(extent));
 
 	for (i = 1; i < (extent_size_get(extent) >> LG_PAGE) - 1; i++) {
-		rtree_write(tsdn, &chunks_rtree,
+		rtree_write(tsdn, &extents_rtree,
 		    (uintptr_t)extent_base_get(extent) + (uintptr_t)(i <<
 		    LG_PAGE), extent);
 	}
@@ -252,7 +250,7 @@ chunk_interior_deregister(tsdn_t *tsdn, const extent_t *extent)
 	assert(extent_slab_get(extent));
 
 	for (i = 1; i < (extent_size_get(extent) >> LG_PAGE) - 1; i++) {
-		rtree_clear(tsdn, &chunks_rtree,
+		rtree_clear(tsdn, &extents_rtree,
 		    (uintptr_t)extent_base_get(extent) + (uintptr_t)(i <<
 		    LG_PAGE));
 	}
@@ -335,15 +333,15 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 	if (new_addr != NULL) {
 		rtree_elm_t *elm;
 
-		elm = rtree_elm_acquire(tsdn, &chunks_rtree,
+		elm = rtree_elm_acquire(tsdn, &extents_rtree,
 		    (uintptr_t)new_addr, false, false);
 		if (elm != NULL) {
-			extent = rtree_elm_read_acquired(tsdn, &chunks_rtree,
+			extent = rtree_elm_read_acquired(tsdn, &extents_rtree,
 			    elm);
 			if (extent != NULL && (extent_active_get(extent) ||
 			    extent_retained_get(extent) == cache))
 				extent = NULL;
-			rtree_elm_release(tsdn, &chunks_rtree, elm);
+			rtree_elm_release(tsdn, &extents_rtree, elm);
 		} else
 			extent = NULL;
 	} else
@@ -651,12 +649,12 @@ chunk_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 		extent_slab_set(extent, false);
 	}
 
-	assert(chunk_lookup(tsdn, extent_base_get(extent), true) == extent);
+	assert(extent_lookup(tsdn, extent_base_get(extent), true) == extent);
 	extent_heaps_insert(extent_heaps, extent);
 	arena_chunk_cache_maybe_insert(arena, extent, cache);
 
 	/* Try to coalesce forward. */
-	next = rtree_read(tsdn, &chunks_rtree,
+	next = rtree_read(tsdn, &extents_rtree,
 	    (uintptr_t)extent_past_get(extent), false);
 	if (next != NULL) {
 		chunk_try_coalesce(tsdn, arena, extent_hooks, extent, next,
@@ -664,7 +662,7 @@ chunk_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 	}
 
 	/* Try to coalesce backward. */
-	prev = rtree_read(tsdn, &chunks_rtree,
+	prev = rtree_read(tsdn, &extents_rtree,
 	    (uintptr_t)extent_before_get(extent), false);
 	if (prev != NULL) {
 		chunk_try_coalesce(tsdn, arena, extent_hooks, prev, extent,
@@ -907,12 +905,12 @@ chunk_merge_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 	extent_rtree_acquire(tsdn, b, true, false, &b_elm_a, &b_elm_b);
 
 	if (a_elm_b != NULL) {
-		rtree_elm_write_acquired(tsdn, &chunks_rtree, a_elm_b, NULL);
-		rtree_elm_release(tsdn, &chunks_rtree, a_elm_b);
+		rtree_elm_write_acquired(tsdn, &extents_rtree, a_elm_b, NULL);
+		rtree_elm_release(tsdn, &extents_rtree, a_elm_b);
 	}
 	if (b_elm_b != NULL) {
-		rtree_elm_write_acquired(tsdn, &chunks_rtree, b_elm_a, NULL);
-		rtree_elm_release(tsdn, &chunks_rtree, b_elm_a);
+		rtree_elm_write_acquired(tsdn, &extents_rtree, b_elm_a, NULL);
+		rtree_elm_release(tsdn, &extents_rtree, b_elm_a);
 	} else
 		b_elm_b = b_elm_a;
 
@@ -963,9 +961,6 @@ chunk_boot(void)
 
 	if (have_dss && chunk_dss_boot())
 		return (true);
-	if (rtree_new(&chunks_rtree, (unsigned)((ZU(1) << (LG_SIZEOF_PTR+3)) -
-	    LG_PAGE)))
-		return (true);
 
 	return (false);
 }
diff --git a/src/extent.c b/src/extent.c
index 2f929a83..3e62e3bc 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1,6 +1,11 @@
 #define	JEMALLOC_EXTENT_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 
+/******************************************************************************/
+/* Data. */
+
+rtree_t		extents_rtree;
+
 /******************************************************************************/
 
 extent_t *
@@ -112,3 +117,14 @@ extent_ad_comp(const extent_t *a, const extent_t *b)
 
 /* Generate pairing heap functions. */
 ph_gen(, extent_heap_, extent_heap_t, extent_t, ph_link, extent_ad_comp)
+
+bool
+extent_boot(void)
+{
+
+	if (rtree_new(&extents_rtree, (unsigned)((ZU(1) << (LG_SIZEOF_PTR+3)) -
+	    LG_PAGE)))
+		return (true);
+
+	return (false);
+}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index a9bba12b..03e61df6 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1232,6 +1232,8 @@ malloc_init_hard_a0_locked()
 		return (true);
 	if (chunk_boot())
 		return (true);
+	if (extent_boot())
+		return (true);
 	if (ctl_boot())
 		return (true);
 	if (config_prof)
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index a9476b89..61caf3c5 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -70,7 +70,7 @@ vsalloc(tsdn_t *tsdn, const void *ptr)
 {
 	extent_t *extent;
 
-	extent = chunk_lookup(tsdn, ptr, false);
+	extent = extent_lookup(tsdn, ptr, false);
 	if (extent == NULL)
 		return (0);
 	if (!extent_active_get(extent))

From 22588dda6e09f63246064e2e692dc0dded2e8e35 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 1 Jun 2016 12:59:02 -0700
Subject: [PATCH 0297/2608] Rename most remaining *chunk* APIs to *extent*.

---
 Makefile.in                                   |  10 +-
 include/jemalloc/internal/arena.h             |  18 +-
 include/jemalloc/internal/chunk.h             |  36 -
 include/jemalloc/internal/extent.h            |  41 +-
 .../internal/{chunk_dss.h => extent_dss.h}    |  16 +-
 .../internal/{chunk_mmap.h => extent_mmap.h}  |   4 +-
 include/jemalloc/internal/private_symbols.txt |  60 +-
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |   8 +-
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |  24 +-
 src/arena.c                                   |  58 +-
 src/base.c                                    |   8 +-
 src/chunk.c                                   | 936 ------------------
 src/ctl.c                                     |   4 +-
 src/extent.c                                  | 929 +++++++++++++++++
 src/{chunk_dss.c => extent_dss.c}             |  32 +-
 src/{chunk_mmap.c => extent_mmap.c}           |  12 +-
 src/jemalloc.c                                |   8 +-
 src/large.c                                   |  28 +-
 test/integration/{chunk.c => extent.c}        |  82 +-
 19 files changed, 1153 insertions(+), 1161 deletions(-)
 rename include/jemalloc/internal/{chunk_dss.h => extent_dss.h} (71%)
 rename include/jemalloc/internal/{chunk_mmap.h => extent_mmap.h} (84%)
 rename src/{chunk_dss.c => extent_dss.c} (86%)
 rename src/{chunk_mmap.c => extent_mmap.c} (83%)
 rename test/integration/{chunk.c => extent.c} (72%)

diff --git a/Makefile.in b/Makefile.in
index f90e2a4f..a24fde95 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -82,11 +82,11 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/base.c \
 	$(srcroot)src/bitmap.c \
 	$(srcroot)src/chunk.c \
-	$(srcroot)src/chunk_dss.c \
-	$(srcroot)src/chunk_mmap.c \
 	$(srcroot)src/ckh.c \
 	$(srcroot)src/ctl.c \
 	$(srcroot)src/extent.c \
+	$(srcroot)src/extent_dss.c \
+	$(srcroot)src/extent_mmap.c \
 	$(srcroot)src/hash.c \
 	$(srcroot)src/large.c \
 	$(srcroot)src/mb.c \
@@ -171,16 +171,16 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/zero.c
 TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \
 	$(srcroot)test/integration/allocated.c \
-	$(srcroot)test/integration/sdallocx.c \
+	$(srcroot)test/integration/extent.c \
 	$(srcroot)test/integration/mallocx.c \
 	$(srcroot)test/integration/MALLOCX_ARENA.c \
 	$(srcroot)test/integration/overflow.c \
 	$(srcroot)test/integration/posix_memalign.c \
 	$(srcroot)test/integration/rallocx.c \
+	$(srcroot)test/integration/sdallocx.c \
 	$(srcroot)test/integration/thread_arena.c \
 	$(srcroot)test/integration/thread_tcache_enabled.c \
-	$(srcroot)test/integration/xallocx.c \
-	$(srcroot)test/integration/chunk.c
+	$(srcroot)test/integration/xallocx.c
 TESTS_STRESS := $(srcroot)test/stress/microbench.c
 TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_STRESS)
 
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 0707b863..bc169756 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -278,22 +278,22 @@ extern ssize_t		opt_decay_time;
 
 extern const arena_bin_info_t	arena_bin_info[NBINS];
 
-extent_t	*arena_chunk_cache_alloc(tsdn_t *tsdn, arena_t *arena,
+extent_t	*arena_extent_cache_alloc(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t *extent_hooks, void *new_addr, size_t size, size_t alignment,
     bool *zero);
-void	arena_chunk_cache_dalloc(tsdn_t *tsdn, arena_t *arena,
+void	arena_extent_cache_dalloc(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t *extent_hooks, extent_t *extent);
-void	arena_chunk_cache_maybe_insert(arena_t *arena, extent_t *extent,
+void	arena_extent_cache_maybe_insert(arena_t *arena, extent_t *extent,
     bool cache);
-void	arena_chunk_cache_maybe_remove(arena_t *arena, extent_t *extent,
+void	arena_extent_cache_maybe_remove(arena_t *arena, extent_t *extent,
     bool cache);
-extent_t	*arena_chunk_alloc_large(tsdn_t *tsdn, arena_t *arena,
+extent_t	*arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena,
     size_t usize, size_t alignment, bool *zero);
-void	arena_chunk_dalloc_large(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
-    bool locked);
-void	arena_chunk_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena,
+void	arena_extent_dalloc_large(tsdn_t *tsdn, arena_t *arena,
+    extent_t *extent, bool locked);
+void	arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena,
     extent_t *extent, size_t oldsize);
-void	arena_chunk_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena,
+void	arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena,
     extent_t *extent, size_t oldsize);
 ssize_t	arena_lg_dirty_mult_get(tsdn_t *tsdn, arena_t *arena);
 bool	arena_lg_dirty_mult_set(tsdn_t *tsdn, arena_t *arena,
diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index ddfa0046..7a5ebbca 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -26,40 +26,7 @@ extern size_t		chunksize;
 extern size_t		chunksize_mask; /* (chunksize - 1). */
 extern size_t		chunk_npages;
 
-extern const extent_hooks_t	extent_hooks_default;
-
-extent_hooks_t	extent_hooks_get(tsdn_t *tsdn, arena_t *arena);
-extent_hooks_t	extent_hooks_set(tsdn_t *tsdn, arena_t *arena,
-    const extent_hooks_t *extent_hooks);
-
-extent_t	*chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, void *new_addr, size_t usize, size_t pad,
-    size_t alignment, bool *zero, bool slab);
-extent_t	*chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, void *new_addr, size_t usize, size_t pad,
-    size_t alignment, bool *zero, bool *commit, bool slab);
-void	chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent);
-void	chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent);
-bool	chunk_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
-    size_t length);
-bool	chunk_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
-    size_t length);
-bool	chunk_purge_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
-    size_t length);
-extent_t	*chunk_split_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t size_a,
-    size_t usize_a, size_t size_b, size_t usize_b);
-bool	chunk_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *a, extent_t *b);
 bool	chunk_boot(void);
-void	chunk_prefork(tsdn_t *tsdn);
-void	chunk_postfork_parent(tsdn_t *tsdn);
-void	chunk_postfork_child(tsdn_t *tsdn);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
@@ -67,6 +34,3 @@ void	chunk_postfork_child(tsdn_t *tsdn);
 
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
-
-#include "jemalloc/internal/chunk_dss.h"
-#include "jemalloc/internal/chunk_mmap.h"
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 8552f701..a41a15ff 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -38,7 +38,7 @@ struct extent_s {
 	bool			e_active;
 
 	/*
-	 * The zeroed flag is used by chunk recycling code to track whether
+	 * The zeroed flag is used by extent recycling code to track whether
 	 * memory is zero-filled.
 	 */
 	bool			e_zeroed;
@@ -87,11 +87,16 @@ typedef ph(extent_t) extent_heap_t;
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-extern rtree_t		extents_rtree;
+extern rtree_t			extents_rtree;
+extern const extent_hooks_t	extent_hooks_default;
 
 extent_t	*extent_alloc(tsdn_t *tsdn, arena_t *arena);
 void	extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
 
+extent_hooks_t	extent_hooks_get(tsdn_t *tsdn, arena_t *arena);
+extent_hooks_t	extent_hooks_set(tsdn_t *tsdn, arena_t *arena,
+    const extent_hooks_t *extent_hooks);
+
 #ifdef JEMALLOC_JET
 typedef size_t (extent_size_quantize_t)(size_t);
 extern extent_size_quantize_t *extent_size_quantize_floor;
@@ -103,6 +108,34 @@ size_t	extent_size_quantize_ceil(size_t size);
 
 ph_proto(, extent_heap_, extent_heap_t, extent_t)
 
+extent_t	*extent_alloc_cache(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t *extent_hooks, void *new_addr, size_t usize, size_t pad,
+    size_t alignment, bool *zero, bool slab);
+extent_t	*extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t *extent_hooks, void *new_addr, size_t usize, size_t pad,
+    size_t alignment, bool *zero, bool *commit, bool slab);
+void	extent_dalloc_cache(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t *extent_hooks, extent_t *extent);
+void	extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t *extent_hooks, extent_t *extent);
+bool	extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
+    size_t length);
+bool	extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
+    size_t length);
+bool	extent_purge_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
+    size_t length);
+extent_t	*extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t size_a,
+    size_t usize_a, size_t size_b, size_t usize_b);
+bool	extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t *extent_hooks, extent_t *a, extent_t *b);
+void	extent_prefork(tsdn_t *tsdn);
+void	extent_postfork_parent(tsdn_t *tsdn);
+void	extent_postfork_child(tsdn_t *tsdn);
+
 bool	extent_boot(void);
 
 #endif /* JEMALLOC_H_EXTERNS */
@@ -110,7 +143,7 @@ bool	extent_boot(void);
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
-extent_t	*extent_lookup(tsdn_t *tsdn, const void *chunk, bool dependent);
+extent_t	*extent_lookup(tsdn_t *tsdn, const void *ptr, bool dependent);
 arena_t	*extent_arena_get(const extent_t *extent);
 void	*extent_base_get(const extent_t *extent);
 void	*extent_addr_get(const extent_t *extent);
@@ -395,3 +428,5 @@ extent_ring_remove(extent_t *extent)
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
 
+#include "jemalloc/internal/extent_dss.h"
+#include "jemalloc/internal/extent_mmap.h"
diff --git a/include/jemalloc/internal/chunk_dss.h b/include/jemalloc/internal/extent_dss.h
similarity index 71%
rename from include/jemalloc/internal/chunk_dss.h
rename to include/jemalloc/internal/extent_dss.h
index 724fa579..43573775 100644
--- a/include/jemalloc/internal/chunk_dss.h
+++ b/include/jemalloc/internal/extent_dss.h
@@ -21,15 +21,15 @@ extern const char *dss_prec_names[];
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-dss_prec_t	chunk_dss_prec_get(tsdn_t *tsdn);
-bool	chunk_dss_prec_set(tsdn_t *tsdn, dss_prec_t dss_prec);
-void	*chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr,
+dss_prec_t	extent_dss_prec_get(tsdn_t *tsdn);
+bool	extent_dss_prec_set(tsdn_t *tsdn, dss_prec_t dss_prec);
+void	*extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr,
     size_t size, size_t alignment, bool *zero, bool *commit);
-bool	chunk_in_dss(tsdn_t *tsdn, void *chunk);
-bool	chunk_dss_boot(void);
-void	chunk_dss_prefork(tsdn_t *tsdn);
-void	chunk_dss_postfork_parent(tsdn_t *tsdn);
-void	chunk_dss_postfork_child(tsdn_t *tsdn);
+bool	extent_in_dss(tsdn_t *tsdn, void *addr);
+bool	extent_dss_boot(void);
+void	extent_dss_prefork(tsdn_t *tsdn);
+void	extent_dss_postfork_parent(tsdn_t *tsdn);
+void	extent_dss_postfork_child(tsdn_t *tsdn);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/chunk_mmap.h b/include/jemalloc/internal/extent_mmap.h
similarity index 84%
rename from include/jemalloc/internal/chunk_mmap.h
rename to include/jemalloc/internal/extent_mmap.h
index 6f2d0ac2..3c1a7884 100644
--- a/include/jemalloc/internal/chunk_mmap.h
+++ b/include/jemalloc/internal/extent_mmap.h
@@ -9,9 +9,9 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-void	*chunk_alloc_mmap(void *new_addr, size_t size, size_t alignment,
+void	*extent_alloc_mmap(void *new_addr, size_t size, size_t alignment,
     bool *zero, bool *commit);
-bool	chunk_dalloc_mmap(void *chunk, size_t size);
+bool	extent_dalloc_mmap(void *addr, size_t size);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 92f91e44..a2f093ee 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -9,14 +9,6 @@ arena_boot
 arena_choose
 arena_choose_hard
 arena_choose_impl
-arena_chunk_alloc_large
-arena_chunk_cache_alloc
-arena_chunk_cache_dalloc
-arena_chunk_cache_maybe_insert
-arena_chunk_cache_maybe_remove
-arena_chunk_dalloc_large
-arena_chunk_ralloc_large_expand
-arena_chunk_ralloc_large_shrink
 arena_cleanup
 arena_dalloc
 arena_dalloc_bin_junked_locked
@@ -31,6 +23,14 @@ arena_decay_time_get
 arena_decay_time_set
 arena_dss_prec_get
 arena_dss_prec_set
+arena_extent_alloc_large
+arena_extent_cache_alloc
+arena_extent_cache_dalloc
+arena_extent_cache_maybe_insert
+arena_extent_cache_maybe_remove
+arena_extent_dalloc_large
+arena_extent_ralloc_large_expand
+arena_extent_ralloc_large_shrink
 arena_get
 arena_ichoose
 arena_init
@@ -115,30 +115,8 @@ bootstrap_free
 bootstrap_malloc
 bt_init
 buferror
-chunk_alloc_cache
-chunk_alloc_dss
-chunk_alloc_mmap
-chunk_alloc_wrapper
 chunk_boot
-chunk_commit_wrapper
-chunk_dalloc_cache
-chunk_dalloc_mmap
-chunk_dalloc_wrapper
-chunk_decommit_wrapper
-chunk_dss_boot
-chunk_dss_postfork_child
-chunk_dss_postfork_parent
-chunk_dss_prec_get
-chunk_dss_prec_set
-chunk_dss_prefork
-chunk_in_dss
-chunk_merge_wrapper
 chunk_npages
-chunk_postfork_child
-chunk_postfork_parent
-chunk_prefork
-chunk_purge_wrapper
-chunk_split_wrapper
 chunksize
 chunksize_mask
 ckh_count
@@ -167,14 +145,29 @@ extent_addr_get
 extent_addr_randomize
 extent_addr_set
 extent_alloc
+extent_alloc_cache
+extent_alloc_dss
+extent_alloc_mmap
+extent_alloc_wrapper
 extent_arena_get
 extent_arena_set
 extent_base_get
 extent_before_get
 extent_boot
+extent_commit_wrapper
 extent_committed_get
 extent_committed_set
 extent_dalloc
+extent_dalloc_cache
+extent_dalloc_mmap
+extent_dalloc_wrapper
+extent_decommit_wrapper
+extent_dss_boot
+extent_dss_postfork_child
+extent_dss_postfork_parent
+extent_dss_prec_get
+extent_dss_prec_set
+extent_dss_prefork
 extent_heap_empty
 extent_heap_first
 extent_heap_insert
@@ -184,12 +177,18 @@ extent_heap_remove_first
 extent_hooks_default
 extent_hooks_get
 extent_hooks_set
+extent_in_dss
 extent_init
 extent_last_get
 extent_lookup
+extent_merge_wrapper
 extent_past_get
+extent_postfork_child
+extent_postfork_parent
+extent_prefork
 extent_prof_tctx_get
 extent_prof_tctx_set
+extent_purge_wrapper
 extent_retained_get
 extent_ring_insert
 extent_ring_remove
@@ -201,6 +200,7 @@ extent_slab_data_get
 extent_slab_data_get_const
 extent_slab_get
 extent_slab_set
+extent_split_wrapper
 extent_usize_get
 extent_usize_set
 extent_zeroed_get
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 91c949aa..59f52f96 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -41,11 +41,11 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\base.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\bitmap.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\chunk.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\chunk_dss.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\chunk_mmap.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\ckh.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\ctl.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\extent.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\extent_dss.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\extent_mmap.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\hash.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal_decls.h" />
@@ -92,11 +92,11 @@
     <ClCompile Include="..\..\..\..\src\base.c" />
     <ClCompile Include="..\..\..\..\src\bitmap.c" />
     <ClCompile Include="..\..\..\..\src\chunk.c" />
-    <ClCompile Include="..\..\..\..\src\chunk_dss.c" />
-    <ClCompile Include="..\..\..\..\src\chunk_mmap.c" />
     <ClCompile Include="..\..\..\..\src\ckh.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
     <ClCompile Include="..\..\..\..\src\extent.c" />
+    <ClCompile Include="..\..\..\..\src\extent_dss.c" />
+    <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
     <ClCompile Include="..\..\..\..\src\hash.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
     <ClCompile Include="..\..\..\..\src\large.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 09d4cb20..159b2e72 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -62,12 +62,6 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\chunk.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\chunk_dss.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\chunk_mmap.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\ckh.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
@@ -77,6 +71,12 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\extent.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\extent_dss.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\extent_mmap.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\hash.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
@@ -187,12 +187,6 @@
     <ClCompile Include="..\..\..\..\src\chunk.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\chunk_dss.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\..\src\chunk_mmap.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\ckh.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -202,6 +196,12 @@
     <ClCompile Include="..\..\..\..\src\extent.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\extent_dss.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\extent_mmap.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hash.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/arena.c b/src/arena.c
index 075082b1..990e0e89 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -46,33 +46,33 @@ static void	arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena,
 /******************************************************************************/
 
 static size_t
-arena_chunk_dirty_npages(const extent_t *extent)
+arena_extent_dirty_npages(const extent_t *extent)
 {
 
 	return (extent_size_get(extent) >> LG_PAGE);
 }
 
 static extent_t *
-arena_chunk_cache_alloc_locked(tsdn_t *tsdn, arena_t *arena,
+arena_extent_cache_alloc_locked(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t *extent_hooks, void *new_addr, size_t usize, size_t pad,
     size_t alignment, bool *zero, bool slab)
 {
 
 	malloc_mutex_assert_owner(tsdn, &arena->lock);
 
-	return (chunk_alloc_cache(tsdn, arena, extent_hooks, new_addr, usize,
+	return (extent_alloc_cache(tsdn, arena, extent_hooks, new_addr, usize,
 	    pad, alignment, zero, slab));
 }
 
 extent_t *
-arena_chunk_cache_alloc(tsdn_t *tsdn, arena_t *arena,
+arena_extent_cache_alloc(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t *extent_hooks, void *new_addr, size_t size, size_t alignment,
     bool *zero)
 {
 	extent_t *extent;
 
 	malloc_mutex_lock(tsdn, &arena->lock);
-	extent = arena_chunk_cache_alloc_locked(tsdn, arena, extent_hooks,
+	extent = arena_extent_cache_alloc_locked(tsdn, arena, extent_hooks,
 	    new_addr, size, 0, alignment, zero, false);
 	malloc_mutex_unlock(tsdn, &arena->lock);
 
@@ -80,44 +80,44 @@ arena_chunk_cache_alloc(tsdn_t *tsdn, arena_t *arena,
 }
 
 static void
-arena_chunk_cache_dalloc_locked(tsdn_t *tsdn, arena_t *arena,
+arena_extent_cache_dalloc_locked(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t *extent_hooks, extent_t *extent)
 {
 
 	malloc_mutex_assert_owner(tsdn, &arena->lock);
 
-	chunk_dalloc_cache(tsdn, arena, extent_hooks, extent);
+	extent_dalloc_cache(tsdn, arena, extent_hooks, extent);
 	arena_maybe_purge(tsdn, arena);
 }
 
 void
-arena_chunk_cache_dalloc(tsdn_t *tsdn, arena_t *arena,
+arena_extent_cache_dalloc(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t *extent_hooks, extent_t *extent)
 {
 
 	malloc_mutex_lock(tsdn, &arena->lock);
-	arena_chunk_cache_dalloc_locked(tsdn, arena, extent_hooks, extent);
+	arena_extent_cache_dalloc_locked(tsdn, arena, extent_hooks, extent);
 	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
 void
-arena_chunk_cache_maybe_insert(arena_t *arena, extent_t *extent, bool cache)
+arena_extent_cache_maybe_insert(arena_t *arena, extent_t *extent, bool cache)
 {
 
 	if (cache) {
 		extent_ring_insert(&arena->extents_dirty, extent);
-		arena->ndirty += arena_chunk_dirty_npages(extent);
+		arena->ndirty += arena_extent_dirty_npages(extent);
 	}
 }
 
 void
-arena_chunk_cache_maybe_remove(arena_t *arena, extent_t *extent, bool dirty)
+arena_extent_cache_maybe_remove(arena_t *arena, extent_t *extent, bool dirty)
 {
 
 	if (dirty) {
 		extent_ring_remove(extent);
-		assert(arena->ndirty >= arena_chunk_dirty_npages(extent));
-		arena->ndirty -= arena_chunk_dirty_npages(extent);
+		assert(arena->ndirty >= arena_extent_dirty_npages(extent));
+		arena->ndirty -= arena_extent_dirty_npages(extent);
 	}
 }
 
@@ -320,13 +320,13 @@ arena_large_ralloc_stats_update(arena_t *arena, size_t oldusize, size_t usize)
 }
 
 static extent_t *
-arena_chunk_alloc_large_hard(tsdn_t *tsdn, arena_t *arena,
+arena_extent_alloc_large_hard(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t *extent_hooks, size_t usize, size_t alignment, bool *zero)
 {
 	extent_t *extent;
 	bool commit = true;
 
-	extent = chunk_alloc_wrapper(tsdn, arena, extent_hooks, NULL, usize,
+	extent = extent_alloc_wrapper(tsdn, arena, extent_hooks, NULL, usize,
 	    large_pad, alignment, zero, &commit, false);
 	if (extent == NULL) {
 		/* Revert optimistic stats updates. */
@@ -343,7 +343,7 @@ arena_chunk_alloc_large_hard(tsdn_t *tsdn, arena_t *arena,
 }
 
 extent_t *
-arena_chunk_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
+arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool *zero)
 {
 	extent_t *extent;
@@ -358,11 +358,11 @@ arena_chunk_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	}
 	arena_nactive_add(arena, (usize + large_pad) >> LG_PAGE);
 
-	extent = arena_chunk_cache_alloc_locked(tsdn, arena, &extent_hooks,
+	extent = arena_extent_cache_alloc_locked(tsdn, arena, &extent_hooks,
 	    NULL, usize, large_pad, alignment, zero, false);
 	malloc_mutex_unlock(tsdn, &arena->lock);
 	if (extent == NULL) {
-		extent = arena_chunk_alloc_large_hard(tsdn, arena,
+		extent = arena_extent_alloc_large_hard(tsdn, arena,
 		    &extent_hooks, usize, alignment, zero);
 	}
 
@@ -370,7 +370,7 @@ arena_chunk_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 }
 
 void
-arena_chunk_dalloc_large(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+arena_extent_dalloc_large(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
     bool locked)
 {
 	extent_hooks_t extent_hooks = EXTENT_HOOKS_INITIALIZER;
@@ -384,13 +384,13 @@ arena_chunk_dalloc_large(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 	}
 	arena_nactive_sub(arena, extent_size_get(extent) >> LG_PAGE);
 
-	arena_chunk_cache_dalloc_locked(tsdn, arena, &extent_hooks, extent);
+	arena_extent_cache_dalloc_locked(tsdn, arena, &extent_hooks, extent);
 	if (!locked)
 		malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
 void
-arena_chunk_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
     size_t oldusize)
 {
 	size_t usize = extent_usize_get(extent);
@@ -406,7 +406,7 @@ arena_chunk_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 }
 
 void
-arena_chunk_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
     size_t oldusize)
 {
 	size_t usize = extent_usize_get(extent);
@@ -756,7 +756,7 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 		next = qr_next(extent, qr_link);
 		/* Allocate. */
 		zero = false;
-		textent = arena_chunk_cache_alloc_locked(tsdn, arena,
+		textent = arena_extent_cache_alloc_locked(tsdn, arena,
 		    extent_hooks, extent_base_get(extent),
 		    extent_size_get(extent), 0, CACHELINE, &zero, false);
 		assert(textent == extent);
@@ -793,7 +793,7 @@ arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 
 		next = qr_next(extent, qr_link);
 		extent_ring_remove(extent);
-		chunk_dalloc_wrapper(tsdn, arena, extent_hooks, extent);
+		extent_dalloc_wrapper(tsdn, arena, extent_hooks, extent);
 	}
 
 	if (config_stats) {
@@ -869,7 +869,7 @@ arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *slab)
 	extent_hooks_t extent_hooks = EXTENT_HOOKS_INITIALIZER;
 
 	arena_nactive_sub(arena, extent_size_get(slab) >> LG_PAGE);
-	arena_chunk_cache_dalloc_locked(tsdn, arena, &extent_hooks, slab);
+	arena_extent_cache_dalloc_locked(tsdn, arena, &extent_hooks, slab);
 }
 
 void
@@ -996,7 +996,7 @@ arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena,
 	zero = false;
 	commit = true;
 	malloc_mutex_unlock(tsdn, &arena->lock);
-	slab = chunk_alloc_wrapper(tsdn, arena, extent_hooks, NULL,
+	slab = extent_alloc_wrapper(tsdn, arena, extent_hooks, NULL,
 	    bin_info->slab_size, 0, PAGE, &zero, &commit, true);
 	malloc_mutex_lock(tsdn, &arena->lock);
 
@@ -1013,7 +1013,7 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 	bool zero;
 
 	zero = false;
-	slab = arena_chunk_cache_alloc_locked(tsdn, arena, &extent_hooks, NULL,
+	slab = arena_extent_cache_alloc_locked(tsdn, arena, &extent_hooks, NULL,
 	    bin_info->slab_size, 0, PAGE, &zero, true);
 	if (slab == NULL) {
 		slab = arena_slab_alloc_hard(tsdn, arena, &extent_hooks,
@@ -1774,7 +1774,7 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 		    (uint64_t)(uintptr_t)arena;
 	}
 
-	arena->dss_prec = chunk_dss_prec_get(tsdn);
+	arena->dss_prec = extent_dss_prec_get(tsdn);
 
 	arena->lg_dirty_mult = arena_lg_dirty_mult_default_get();
 	arena->purging = false;
diff --git a/src/base.c b/src/base.c
index 3807422c..667786e1 100644
--- a/src/base.c
+++ b/src/base.c
@@ -38,7 +38,7 @@ base_extent_dalloc(tsdn_t *tsdn, extent_t *extent)
 }
 
 static extent_t *
-base_chunk_alloc(tsdn_t *tsdn, size_t minsize)
+base_extent_alloc(tsdn_t *tsdn, size_t minsize)
 {
 	extent_t *extent;
 	size_t csize, nsize;
@@ -51,13 +51,13 @@ base_chunk_alloc(tsdn_t *tsdn, size_t minsize)
 	nsize = (extent == NULL) ? CACHELINE_CEILING(sizeof(extent_t)) : 0;
 	csize = CHUNK_CEILING(minsize + nsize);
 	/*
-	 * Directly call chunk_alloc_mmap() because it's critical to allocate
+	 * Directly call extent_alloc_mmap() because it's critical to allocate
 	 * untouched demand-zeroed virtual memory.
 	 */
 	{
 		bool zero = true;
 		bool commit = true;
-		addr = chunk_alloc_mmap(NULL, csize, PAGE, &zero, &commit);
+		addr = extent_alloc_mmap(NULL, csize, PAGE, &zero, &commit);
 	}
 	if (addr == NULL) {
 		if (extent != NULL)
@@ -108,7 +108,7 @@ base_alloc(tsdn_t *tsdn, size_t size)
 	}
 	if (extent == NULL) {
 		/* Try to allocate more space. */
-		extent = base_chunk_alloc(tsdn, csize);
+		extent = base_extent_alloc(tsdn, csize);
 	}
 	if (extent == NULL) {
 		ret = NULL;
diff --git a/src/chunk.c b/src/chunk.c
index 3ce6e015..d750f715 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -7,924 +7,12 @@
 const char	*opt_dss = DSS_DEFAULT;
 size_t		opt_lg_chunk = 0;
 
-/* Used exclusively for gdump triggering. */
-static size_t	curchunks;
-static size_t	highchunks;
-
 /* Various chunk-related settings. */
 size_t		chunksize;
 size_t		chunksize_mask; /* (chunksize - 1). */
 size_t		chunk_npages;
 
-static void	*extent_alloc_default(void *new_addr, size_t size,
-    size_t alignment, bool *zero, bool *commit, unsigned arena_ind);
-static bool	extent_dalloc_default(void *addr, size_t size, bool committed,
-    unsigned arena_ind);
-static bool	extent_commit_default(void *addr, size_t size, size_t offset,
-    size_t length, unsigned arena_ind);
-static bool	extent_decommit_default(void *addr, size_t size, size_t offset,
-    size_t length, unsigned arena_ind);
-static bool	extent_purge_default(void *addr, size_t size, size_t offset,
-    size_t length, unsigned arena_ind);
-static bool	extent_split_default(void *addr, size_t size, size_t size_a,
-    size_t size_b, bool committed, unsigned arena_ind);
-static bool	extent_merge_default(void *addr_a, size_t size_a, void *addr_b,
-    size_t size_b, bool committed, unsigned arena_ind);
-
-const extent_hooks_t	extent_hooks_default = {
-	extent_alloc_default,
-	extent_dalloc_default,
-	extent_commit_default,
-	extent_decommit_default,
-	extent_purge_default,
-	extent_split_default,
-	extent_merge_default
-};
-
 /******************************************************************************/
-/*
- * Function prototypes for static functions that are referenced prior to
- * definition.
- */
-
-static void	chunk_record(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_heap_t extent_heaps[NPSIZES],
-    bool cache, extent_t *extent);
-
-/******************************************************************************/
-
-static void
-extent_heaps_insert(extent_heap_t extent_heaps[NPSIZES], extent_t *extent)
-{
-	size_t psz = extent_size_quantize_floor(extent_size_get(extent));
-	pszind_t pind = psz2ind(psz);
-	extent_heap_insert(&extent_heaps[pind], extent);
-}
-
-static void
-extent_heaps_remove(extent_heap_t extent_heaps[NPSIZES], extent_t *extent)
-{
-	size_t psz = extent_size_quantize_floor(extent_size_get(extent));
-	pszind_t pind = psz2ind(psz);
-	extent_heap_remove(&extent_heaps[pind], extent);
-}
-
-static extent_hooks_t
-extent_hooks_get_locked(arena_t *arena)
-{
-
-	return (arena->extent_hooks);
-}
-
-extent_hooks_t
-extent_hooks_get(tsdn_t *tsdn, arena_t *arena)
-{
-	extent_hooks_t extent_hooks;
-
-	malloc_mutex_lock(tsdn, &arena->extents_mtx);
-	extent_hooks = extent_hooks_get_locked(arena);
-	malloc_mutex_unlock(tsdn, &arena->extents_mtx);
-
-	return (extent_hooks);
-}
-
-extent_hooks_t
-extent_hooks_set(tsdn_t *tsdn, arena_t *arena,
-    const extent_hooks_t *extent_hooks)
-{
-	extent_hooks_t old_extent_hooks;
-
-	malloc_mutex_lock(tsdn, &arena->extents_mtx);
-	old_extent_hooks = arena->extent_hooks;
-	/*
-	 * Copy each field atomically so that it is impossible for readers to
-	 * see partially updated pointers.  There are places where readers only
-	 * need one hook function pointer (therefore no need to copy the
-	 * entirety of arena->extent_hooks), and stale reads do not affect
-	 * correctness, so they perform unlocked reads.
-	 */
-#define	ATOMIC_COPY_HOOK(n) do {					\
-	union {								\
-		extent_##n##_t	**n;					\
-		void		**v;					\
-	} u;								\
-	u.n = &arena->extent_hooks.n;					\
-	atomic_write_p(u.v, extent_hooks->n);				\
-} while (0)
-	ATOMIC_COPY_HOOK(alloc);
-	ATOMIC_COPY_HOOK(dalloc);
-	ATOMIC_COPY_HOOK(commit);
-	ATOMIC_COPY_HOOK(decommit);
-	ATOMIC_COPY_HOOK(purge);
-	ATOMIC_COPY_HOOK(split);
-	ATOMIC_COPY_HOOK(merge);
-#undef ATOMIC_COPY_HOOK
-	malloc_mutex_unlock(tsdn, &arena->extents_mtx);
-
-	return (old_extent_hooks);
-}
-
-static void
-extent_hooks_assure_initialized_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, bool locked)
-{
-	static const extent_hooks_t uninitialized_hooks =
-	    EXTENT_HOOKS_INITIALIZER;
-
-	if (memcmp(extent_hooks, &uninitialized_hooks, sizeof(extent_hooks_t))
-	    == 0) {
-		*extent_hooks = locked ? extent_hooks_get_locked(arena) :
-		    extent_hooks_get(tsdn, arena);
-	}
-}
-
-static void
-extent_hooks_assure_initialized_locked(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks)
-{
-
-	extent_hooks_assure_initialized_impl(tsdn, arena, extent_hooks, true);
-}
-
-static void
-extent_hooks_assure_initialized(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks)
-{
-
-	extent_hooks_assure_initialized_impl(tsdn, arena, extent_hooks, false);
-}
-
-static bool
-extent_rtree_acquire(tsdn_t *tsdn, const extent_t *extent, bool dependent,
-    bool init_missing, rtree_elm_t **r_elm_a, rtree_elm_t **r_elm_b)
-{
-
-	*r_elm_a = rtree_elm_acquire(tsdn, &extents_rtree,
-	    (uintptr_t)extent_base_get(extent), dependent, init_missing);
-	if (!dependent && *r_elm_a == NULL)
-		return (true);
-	assert(*r_elm_a != NULL);
-
-	if (extent_size_get(extent) > PAGE) {
-		*r_elm_b = rtree_elm_acquire(tsdn, &extents_rtree,
-		    (uintptr_t)extent_last_get(extent), dependent,
-		    init_missing);
-		if (!dependent && *r_elm_b == NULL)
-			return (true);
-		assert(*r_elm_b != NULL);
-	} else
-		*r_elm_b = NULL;
-
-	return (false);
-}
-
-static void
-extent_rtree_write_acquired(tsdn_t *tsdn, rtree_elm_t *elm_a,
-    rtree_elm_t *elm_b, const extent_t *extent)
-{
-
-	rtree_elm_write_acquired(tsdn, &extents_rtree, elm_a, extent);
-	if (elm_b != NULL)
-		rtree_elm_write_acquired(tsdn, &extents_rtree, elm_b, extent);
-}
-
-static void
-extent_rtree_release(tsdn_t *tsdn, rtree_elm_t *elm_a, rtree_elm_t *elm_b)
-{
-
-	rtree_elm_release(tsdn, &extents_rtree, elm_a);
-	if (elm_b != NULL)
-		rtree_elm_release(tsdn, &extents_rtree, elm_b);
-}
-
-static void
-chunk_interior_register(tsdn_t *tsdn, const extent_t *extent)
-{
-	size_t i;
-
-	assert(extent_slab_get(extent));
-
-	for (i = 1; i < (extent_size_get(extent) >> LG_PAGE) - 1; i++) {
-		rtree_write(tsdn, &extents_rtree,
-		    (uintptr_t)extent_base_get(extent) + (uintptr_t)(i <<
-		    LG_PAGE), extent);
-	}
-}
-
-static bool
-chunk_register(tsdn_t *tsdn, const extent_t *extent)
-{
-	rtree_elm_t *elm_a, *elm_b;
-
-	if (extent_rtree_acquire(tsdn, extent, false, true, &elm_a, &elm_b))
-		return (true);
-	extent_rtree_write_acquired(tsdn, elm_a, elm_b, extent);
-	if (extent_slab_get(extent))
-		chunk_interior_register(tsdn, extent);
-	extent_rtree_release(tsdn, elm_a, elm_b);
-
-	if (config_prof && opt_prof && extent_active_get(extent)) {
-		size_t nadd = (extent_size_get(extent) == 0) ? 1 :
-		    extent_size_get(extent) / chunksize;
-		size_t cur = atomic_add_z(&curchunks, nadd);
-		size_t high = atomic_read_z(&highchunks);
-		while (cur > high && atomic_cas_z(&highchunks, high, cur)) {
-			/*
-			 * Don't refresh cur, because it may have decreased
-			 * since this thread lost the highchunks update race.
-			 */
-			high = atomic_read_z(&highchunks);
-		}
-		if (cur > high && prof_gdump_get_unlocked())
-			prof_gdump(tsdn);
-	}
-
-	return (false);
-}
-
-static void
-chunk_interior_deregister(tsdn_t *tsdn, const extent_t *extent)
-{
-	size_t i;
-
-	assert(extent_slab_get(extent));
-
-	for (i = 1; i < (extent_size_get(extent) >> LG_PAGE) - 1; i++) {
-		rtree_clear(tsdn, &extents_rtree,
-		    (uintptr_t)extent_base_get(extent) + (uintptr_t)(i <<
-		    LG_PAGE));
-	}
-}
-
-static void
-chunk_deregister(tsdn_t *tsdn, const extent_t *extent)
-{
-	rtree_elm_t *elm_a, *elm_b;
-
-	extent_rtree_acquire(tsdn, extent, true, false, &elm_a, &elm_b);
-	extent_rtree_write_acquired(tsdn, elm_a, elm_b, NULL);
-	if (extent_slab_get(extent))
-		chunk_interior_deregister(tsdn, extent);
-	extent_rtree_release(tsdn, elm_a, elm_b);
-
-	if (config_prof && opt_prof && extent_active_get(extent)) {
-		size_t nsub = (extent_size_get(extent) == 0) ? 1 :
-		    extent_size_get(extent) / chunksize;
-		assert(atomic_read_z(&curchunks) >= nsub);
-		atomic_sub_z(&curchunks, nsub);
-	}
-}
-
-/*
- * Do first-best-fit chunk selection, i.e. select the lowest chunk that best
- * fits.
- */
-static extent_t *
-chunk_first_best_fit(arena_t *arena, extent_heap_t extent_heaps[NPSIZES],
-    size_t size)
-{
-	pszind_t pind, i;
-
-	pind = psz2ind(extent_size_quantize_ceil(size));
-	for (i = pind; i < NPSIZES; i++) {
-		extent_t *extent = extent_heap_first(&extent_heaps[i]);
-		if (extent != NULL)
-			return (extent);
-	}
-
-	return (NULL);
-}
-
-static void
-chunk_leak(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
-    bool cache, extent_t *extent)
-{
-
-	/*
-	 * Leak chunk after making sure its pages have already been purged, so
-	 * that this is only a virtual memory leak.
-	 */
-	if (cache) {
-		chunk_purge_wrapper(tsdn, arena, extent_hooks, extent, 0,
-		    extent_size_get(extent));
-	}
-	extent_dalloc(tsdn, arena, extent);
-}
-
-static extent_t *
-chunk_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
-    extent_heap_t extent_heaps[NPSIZES], bool cache, void *new_addr,
-    size_t usize, size_t pad, size_t alignment, bool *zero, bool *commit,
-    bool slab)
-{
-	extent_t *extent;
-	size_t size, alloc_size, leadsize, trailsize;
-
-	assert(new_addr == NULL || !slab);
-	assert(pad == 0 || !slab);
-
-	size = usize + pad;
-	alloc_size = s2u(size + PAGE_CEILING(alignment) - PAGE);
-	/* Beware size_t wrap-around. */
-	if (alloc_size < usize)
-		return (NULL);
-	malloc_mutex_lock(tsdn, &arena->extents_mtx);
-	extent_hooks_assure_initialized_locked(tsdn, arena, extent_hooks);
-	if (new_addr != NULL) {
-		rtree_elm_t *elm;
-
-		elm = rtree_elm_acquire(tsdn, &extents_rtree,
-		    (uintptr_t)new_addr, false, false);
-		if (elm != NULL) {
-			extent = rtree_elm_read_acquired(tsdn, &extents_rtree,
-			    elm);
-			if (extent != NULL && (extent_active_get(extent) ||
-			    extent_retained_get(extent) == cache))
-				extent = NULL;
-			rtree_elm_release(tsdn, &extents_rtree, elm);
-		} else
-			extent = NULL;
-	} else
-		extent = chunk_first_best_fit(arena, extent_heaps, alloc_size);
-	if (extent == NULL || (new_addr != NULL && extent_size_get(extent) <
-	    size)) {
-		malloc_mutex_unlock(tsdn, &arena->extents_mtx);
-		return (NULL);
-	}
-	extent_heaps_remove(extent_heaps, extent);
-	arena_chunk_cache_maybe_remove(arena, extent, cache);
-
-	leadsize = ALIGNMENT_CEILING((uintptr_t)extent_base_get(extent),
-	    PAGE_CEILING(alignment)) - (uintptr_t)extent_base_get(extent);
-	assert(new_addr == NULL || leadsize == 0);
-	assert(extent_size_get(extent) >= leadsize + size);
-	trailsize = extent_size_get(extent) - leadsize - size;
-	if (extent_zeroed_get(extent))
-		*zero = true;
-	if (extent_committed_get(extent))
-		*commit = true;
-
-	/* Split the lead. */
-	if (leadsize != 0) {
-		extent_t *lead = extent;
-		extent = chunk_split_wrapper(tsdn, arena, extent_hooks, lead,
-		    leadsize, leadsize, size + trailsize, usize + trailsize);
-		if (extent == NULL) {
-			chunk_leak(tsdn, arena, extent_hooks, cache, lead);
-			malloc_mutex_unlock(tsdn, &arena->extents_mtx);
-			return (NULL);
-		}
-		extent_heaps_insert(extent_heaps, lead);
-		arena_chunk_cache_maybe_insert(arena, lead, cache);
-	}
-
-	/* Split the trail. */
-	if (trailsize != 0) {
-		extent_t *trail = chunk_split_wrapper(tsdn, arena, extent_hooks,
-		    extent, size, usize, trailsize, trailsize);
-		if (trail == NULL) {
-			chunk_leak(tsdn, arena, extent_hooks, cache, extent);
-			malloc_mutex_unlock(tsdn, &arena->extents_mtx);
-			return (NULL);
-		}
-		extent_heaps_insert(extent_heaps, trail);
-		arena_chunk_cache_maybe_insert(arena, trail, cache);
-	} else if (leadsize == 0) {
-		/*
-		 * Splitting causes usize to be set as a side effect, but no
-		 * splitting occurred.
-		 */
-		extent_usize_set(extent, usize);
-	}
-
-	if (!extent_committed_get(extent) &&
-	    extent_hooks->commit(extent_base_get(extent),
-	    extent_size_get(extent), 0, extent_size_get(extent), arena->ind)) {
-		malloc_mutex_unlock(tsdn, &arena->extents_mtx);
-		chunk_record(tsdn, arena, extent_hooks, extent_heaps, cache,
-		    extent);
-		return (NULL);
-	}
-
-	if (pad != 0)
-		extent_addr_randomize(tsdn, extent, alignment);
-	extent_active_set(extent, true);
-	if (slab) {
-		extent_slab_set(extent, slab);
-		chunk_interior_register(tsdn, extent);
-	}
-
-	malloc_mutex_unlock(tsdn, &arena->extents_mtx);
-
-	if (*zero) {
-		if (!extent_zeroed_get(extent)) {
-			memset(extent_addr_get(extent), 0,
-			    extent_usize_get(extent));
-		} else if (config_debug) {
-			size_t i;
-			size_t *p = (size_t *)(uintptr_t)
-			    extent_addr_get(extent);
-
-			for (i = 0; i < usize / sizeof(size_t); i++)
-				assert(p[i] == 0);
-		}
-	}
-	return (extent);
-}
-
-/*
- * If the caller specifies (!*zero), it is still possible to receive zeroed
- * memory, in which case *zero is toggled to true.  arena_chunk_alloc() takes
- * advantage of this to avoid demanding zeroed chunks, but taking advantage of
- * them if they are returned.
- */
-static void *
-chunk_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
-    size_t alignment, bool *zero, bool *commit, dss_prec_t dss_prec)
-{
-	void *ret;
-
-	assert(size != 0);
-	assert(alignment != 0);
-
-	/* "primary" dss. */
-	if (have_dss && dss_prec == dss_prec_primary && (ret =
-	    chunk_alloc_dss(tsdn, arena, new_addr, size, alignment, zero,
-	    commit)) != NULL)
-		return (ret);
-	/* mmap. */
-	if ((ret = chunk_alloc_mmap(new_addr, size, alignment, zero, commit)) !=
-	    NULL)
-		return (ret);
-	/* "secondary" dss. */
-	if (have_dss && dss_prec == dss_prec_secondary && (ret =
-	    chunk_alloc_dss(tsdn, arena, new_addr, size, alignment, zero,
-	    commit)) != NULL)
-		return (ret);
-
-	/* All strategies for allocation failed. */
-	return (NULL);
-}
-
-extent_t *
-chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
-    void *new_addr, size_t usize, size_t pad, size_t alignment, bool *zero,
-    bool slab)
-{
-	extent_t *extent;
-	bool commit;
-
-	assert(usize + pad != 0);
-	assert(alignment != 0);
-
-	commit = true;
-	extent = chunk_recycle(tsdn, arena, extent_hooks, arena->extents_cached,
-	    true, new_addr, usize, pad, alignment, zero, &commit, slab);
-	if (extent == NULL)
-		return (NULL);
-	assert(commit);
-	return (extent);
-}
-
-static arena_t *
-chunk_arena_get(tsdn_t *tsdn, unsigned arena_ind)
-{
-	arena_t *arena;
-
-	arena = arena_get(tsdn, arena_ind, false);
-	/*
-	 * The arena we're allocating on behalf of must have been initialized
-	 * already.
-	 */
-	assert(arena != NULL);
-	return (arena);
-}
-
-static void *
-extent_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero,
-    bool *commit, unsigned arena_ind)
-{
-	void *ret;
-	tsdn_t *tsdn;
-	arena_t *arena;
-
-	tsdn = tsdn_fetch();
-	arena = chunk_arena_get(tsdn, arena_ind);
-	ret = chunk_alloc_core(tsdn, arena, new_addr, size, alignment, zero,
-	    commit, arena->dss_prec);
-	if (ret == NULL)
-		return (NULL);
-
-	return (ret);
-}
-
-static extent_t *
-chunk_alloc_retained(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
-    void *new_addr, size_t usize, size_t pad, size_t alignment, bool *zero,
-    bool *commit, bool slab)
-{
-	extent_t *extent;
-
-	assert(usize != 0);
-	assert(alignment != 0);
-
-	extent = chunk_recycle(tsdn, arena, extent_hooks,
-	    arena->extents_retained, false, new_addr, usize, pad, alignment,
-	    zero, commit, slab);
-	if (extent != NULL && config_stats) {
-		size_t size = usize + pad;
-		arena->stats.retained -= size;
-	}
-
-	return (extent);
-}
-
-static extent_t *
-chunk_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, void *new_addr, size_t usize, size_t pad,
-    size_t alignment, bool *zero, bool *commit, bool slab)
-{
-	extent_t *extent;
-	size_t size;
-	void *addr;
-
-	size = usize + pad;
-	extent = extent_alloc(tsdn, arena);
-	if (extent == NULL)
-		return (NULL);
-	addr = extent_hooks->alloc(new_addr, size, alignment, zero, commit,
-	    arena->ind);
-	if (addr == NULL) {
-		extent_dalloc(tsdn, arena, extent);
-		return (NULL);
-	}
-	extent_init(extent, arena, addr, size, usize, true, zero, commit, slab);
-	if (pad != 0)
-		extent_addr_randomize(tsdn, extent, alignment);
-	if (chunk_register(tsdn, extent)) {
-		chunk_leak(tsdn, arena, extent_hooks, false, extent);
-		return (NULL);
-	}
-
-	return (extent);
-}
-
-extent_t *
-chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
-    void *new_addr, size_t usize, size_t pad, size_t alignment, bool *zero,
-    bool *commit, bool slab)
-{
-	extent_t *extent;
-
-	extent_hooks_assure_initialized(tsdn, arena, extent_hooks);
-
-	extent = chunk_alloc_retained(tsdn, arena, extent_hooks, new_addr,
-	    usize, pad, alignment, zero, commit, slab);
-	if (extent == NULL) {
-		extent = chunk_alloc_wrapper_hard(tsdn, arena, extent_hooks,
-		    new_addr, usize, pad, alignment, zero, commit, slab);
-	}
-
-	return (extent);
-}
-
-static bool
-chunk_can_coalesce(const extent_t *a, const extent_t *b)
-{
-
-	if (extent_arena_get(a) != extent_arena_get(b))
-		return (false);
-	if (extent_active_get(a) != extent_active_get(b))
-		return (false);
-	if (extent_committed_get(a) != extent_committed_get(b))
-		return (false);
-	if (extent_retained_get(a) != extent_retained_get(b))
-		return (false);
-
-	return (true);
-}
-
-static void
-chunk_try_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
-    extent_t *a, extent_t *b, extent_heap_t extent_heaps[NPSIZES], bool cache)
-{
-
-	if (!chunk_can_coalesce(a, b))
-		return;
-
-	extent_heaps_remove(extent_heaps, a);
-	extent_heaps_remove(extent_heaps, b);
-
-	arena_chunk_cache_maybe_remove(extent_arena_get(a), a, cache);
-	arena_chunk_cache_maybe_remove(extent_arena_get(b), b, cache);
-
-	if (chunk_merge_wrapper(tsdn, arena, extent_hooks, a, b)) {
-		extent_heaps_insert(extent_heaps, a);
-		extent_heaps_insert(extent_heaps, b);
-		arena_chunk_cache_maybe_insert(extent_arena_get(a), a, cache);
-		arena_chunk_cache_maybe_insert(extent_arena_get(b), b, cache);
-		return;
-	}
-
-	extent_heaps_insert(extent_heaps, a);
-	arena_chunk_cache_maybe_insert(extent_arena_get(a), a, cache);
-}
-
-static void
-chunk_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
-    extent_heap_t extent_heaps[NPSIZES], bool cache, extent_t *extent)
-{
-	extent_t *prev, *next;
-
-	assert(!cache || !extent_zeroed_get(extent));
-
-	malloc_mutex_lock(tsdn, &arena->extents_mtx);
-	extent_hooks_assure_initialized_locked(tsdn, arena, extent_hooks);
-
-	extent_usize_set(extent, 0);
-	extent_active_set(extent, false);
-	extent_zeroed_set(extent, !cache && extent_zeroed_get(extent));
-	if (extent_slab_get(extent)) {
-		chunk_interior_deregister(tsdn, extent);
-		extent_slab_set(extent, false);
-	}
-
-	assert(extent_lookup(tsdn, extent_base_get(extent), true) == extent);
-	extent_heaps_insert(extent_heaps, extent);
-	arena_chunk_cache_maybe_insert(arena, extent, cache);
-
-	/* Try to coalesce forward. */
-	next = rtree_read(tsdn, &extents_rtree,
-	    (uintptr_t)extent_past_get(extent), false);
-	if (next != NULL) {
-		chunk_try_coalesce(tsdn, arena, extent_hooks, extent, next,
-		    extent_heaps, cache);
-	}
-
-	/* Try to coalesce backward. */
-	prev = rtree_read(tsdn, &extents_rtree,
-	    (uintptr_t)extent_before_get(extent), false);
-	if (prev != NULL) {
-		chunk_try_coalesce(tsdn, arena, extent_hooks, prev, extent,
-		    extent_heaps, cache);
-	}
-
-	malloc_mutex_unlock(tsdn, &arena->extents_mtx);
-}
-
-void
-chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
-    extent_t *extent)
-{
-
-	assert(extent_base_get(extent) != NULL);
-	assert(extent_size_get(extent) != 0);
-
-	extent_addr_set(extent, extent_base_get(extent));
-	extent_zeroed_set(extent, false);
-
-	chunk_record(tsdn, arena, extent_hooks, arena->extents_cached, true,
-	    extent);
-}
-
-static bool
-extent_dalloc_default(void *addr, size_t size, bool committed,
-    unsigned arena_ind)
-{
-
-	if (!have_dss || !chunk_in_dss(tsdn_fetch(), addr))
-		return (chunk_dalloc_mmap(addr, size));
-	return (true);
-}
-
-void
-chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
-    extent_t *extent)
-{
-
-	assert(extent_base_get(extent) != NULL);
-	assert(extent_size_get(extent) != 0);
-
-	extent_addr_set(extent, extent_base_get(extent));
-
-	extent_hooks_assure_initialized(tsdn, arena, extent_hooks);
-	/* Try to deallocate. */
-	if (!extent_hooks->dalloc(extent_base_get(extent),
-	    extent_size_get(extent), extent_committed_get(extent),
-	    arena->ind)) {
-		chunk_deregister(tsdn, extent);
-		extent_dalloc(tsdn, arena, extent);
-		return;
-	}
-	/* Try to decommit; purge if that fails. */
-	if (extent_committed_get(extent)) {
-		extent_committed_set(extent,
-		    extent_hooks->decommit(extent_base_get(extent),
-		    extent_size_get(extent), 0, extent_size_get(extent),
-		    arena->ind));
-	}
-	extent_zeroed_set(extent, !extent_committed_get(extent) ||
-	    !extent_hooks->purge(extent_base_get(extent),
-	    extent_size_get(extent), 0, extent_size_get(extent), arena->ind));
-
-	if (config_stats)
-		arena->stats.retained += extent_size_get(extent);
-
-	chunk_record(tsdn, arena, extent_hooks, arena->extents_retained, false,
-	    extent);
-}
-
-static bool
-extent_commit_default(void *addr, size_t size, size_t offset, size_t length,
-    unsigned arena_ind)
-{
-
-	return (pages_commit((void *)((uintptr_t)addr + (uintptr_t)offset),
-	    length));
-}
-
-bool
-chunk_commit_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
-    extent_t *extent, size_t offset, size_t length)
-{
-
-	extent_hooks_assure_initialized(tsdn, arena, extent_hooks);
-	return (extent_hooks->commit(extent_base_get(extent),
-	    extent_size_get(extent), offset, length, arena->ind));
-}
-
-static bool
-extent_decommit_default(void *addr, size_t size, size_t offset, size_t length,
-    unsigned arena_ind)
-{
-
-	return (pages_decommit((void *)((uintptr_t)addr + (uintptr_t)offset),
-	    length));
-}
-
-bool
-chunk_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
-    size_t length)
-{
-
-	extent_hooks_assure_initialized(tsdn, arena, extent_hooks);
-	return (extent_hooks->decommit(extent_base_get(extent),
-	    extent_size_get(extent), offset, length, arena->ind));
-}
-
-static bool
-extent_purge_default(void *addr, size_t size, size_t offset, size_t length,
-    unsigned arena_ind)
-{
-
-	assert(addr != NULL);
-	assert((offset & PAGE_MASK) == 0);
-	assert(length != 0);
-	assert((length & PAGE_MASK) == 0);
-
-	return (pages_purge((void *)((uintptr_t)addr + (uintptr_t)offset),
-	    length));
-}
-
-bool
-chunk_purge_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
-    extent_t *extent, size_t offset, size_t length)
-{
-
-	extent_hooks_assure_initialized(tsdn, arena, extent_hooks);
-	return (extent_hooks->purge(extent_base_get(extent),
-	    extent_size_get(extent), offset, length, arena->ind));
-}
-
-static bool
-extent_split_default(void *addr, size_t size, size_t size_a, size_t size_b,
-    bool committed, unsigned arena_ind)
-{
-
-	if (!maps_coalesce)
-		return (true);
-	return (false);
-}
-
-extent_t *
-chunk_split_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
-  extent_t *extent, size_t size_a, size_t usize_a, size_t size_b,
-  size_t usize_b)
-{
-	extent_t *trail;
-	rtree_elm_t *lead_elm_a, *lead_elm_b, *trail_elm_a, *trail_elm_b;
-
-	assert(extent_size_get(extent) == size_a + size_b);
-
-	extent_hooks_assure_initialized(tsdn, arena, extent_hooks);
-
-	trail = extent_alloc(tsdn, arena);
-	if (trail == NULL)
-		goto label_error_a;
-
-	{
-		extent_t lead;
-
-		extent_init(&lead, arena, extent_addr_get(extent), size_a,
-		    usize_a, extent_active_get(extent),
-		    extent_zeroed_get(extent), extent_committed_get(extent),
-		    extent_slab_get(extent));
-
-		if (extent_rtree_acquire(tsdn, &lead, false, true, &lead_elm_a,
-		    &lead_elm_b))
-			goto label_error_b;
-	}
-
-	extent_init(trail, arena, (void *)((uintptr_t)extent_base_get(extent) +
-	    size_a), size_b, usize_b, extent_active_get(extent),
-	    extent_zeroed_get(extent), extent_committed_get(extent),
-	    extent_slab_get(extent));
-	if (extent_rtree_acquire(tsdn, trail, false, true, &trail_elm_a,
-	    &trail_elm_b))
-		goto label_error_c;
-
-	if (extent_hooks->split(extent_base_get(extent), size_a + size_b, size_a,
-	    size_b, extent_committed_get(extent), arena->ind))
-		goto label_error_d;
-
-	extent_size_set(extent, size_a);
-	extent_usize_set(extent, usize_a);
-
-	extent_rtree_write_acquired(tsdn, lead_elm_a, lead_elm_b, extent);
-	extent_rtree_write_acquired(tsdn, trail_elm_a, trail_elm_b, trail);
-
-	extent_rtree_release(tsdn, lead_elm_a, lead_elm_b);
-	extent_rtree_release(tsdn, trail_elm_a, trail_elm_b);
-
-	return (trail);
-label_error_d:
-	extent_rtree_release(tsdn, lead_elm_a, lead_elm_b);
-label_error_c:
-	extent_rtree_release(tsdn, lead_elm_a, lead_elm_b);
-label_error_b:
-	extent_dalloc(tsdn, arena, trail);
-label_error_a:
-	return (NULL);
-}
-
-static bool
-extent_merge_default(void *addr_a, size_t size_a, void *addr_b, size_t size_b,
-    bool committed, unsigned arena_ind)
-{
-
-	if (!maps_coalesce)
-		return (true);
-	if (have_dss) {
-		tsdn_t *tsdn = tsdn_fetch();
-		if (chunk_in_dss(tsdn, addr_a) != chunk_in_dss(tsdn, addr_b))
-			return (true);
-	}
-
-	return (false);
-}
-
-bool
-chunk_merge_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
-    extent_t *a, extent_t *b)
-{
-	rtree_elm_t *a_elm_a, *a_elm_b, *b_elm_a, *b_elm_b;
-
-	extent_hooks_assure_initialized(tsdn, arena, extent_hooks);
-	if (extent_hooks->merge(extent_base_get(a), extent_size_get(a),
-	    extent_base_get(b), extent_size_get(b), extent_committed_get(a),
-	    arena->ind))
-		return (true);
-
-	/*
-	 * The rtree writes must happen while all the relevant elements are
-	 * owned, so the following code uses decomposed helper functions rather
-	 * than chunk_{,de}register() to do things in the right order.
-	 */
-	extent_rtree_acquire(tsdn, a, true, false, &a_elm_a, &a_elm_b);
-	extent_rtree_acquire(tsdn, b, true, false, &b_elm_a, &b_elm_b);
-
-	if (a_elm_b != NULL) {
-		rtree_elm_write_acquired(tsdn, &extents_rtree, a_elm_b, NULL);
-		rtree_elm_release(tsdn, &extents_rtree, a_elm_b);
-	}
-	if (b_elm_b != NULL) {
-		rtree_elm_write_acquired(tsdn, &extents_rtree, b_elm_a, NULL);
-		rtree_elm_release(tsdn, &extents_rtree, b_elm_a);
-	} else
-		b_elm_b = b_elm_a;
-
-	extent_size_set(a, extent_size_get(a) + extent_size_get(b));
-	extent_usize_set(a, extent_usize_get(a) + extent_usize_get(b));
-	extent_zeroed_set(a, extent_zeroed_get(a) && extent_zeroed_get(b));
-
-	extent_rtree_write_acquired(tsdn, a_elm_a, b_elm_b, a);
-	extent_rtree_release(tsdn, a_elm_a, b_elm_b);
-
-	extent_dalloc(tsdn, extent_arena_get(b), b);
-
-	return (false);
-}
 
 bool
 chunk_boot(void)
@@ -959,29 +47,5 @@ chunk_boot(void)
 	chunksize_mask = chunksize - 1;
 	chunk_npages = (chunksize >> LG_PAGE);
 
-	if (have_dss && chunk_dss_boot())
-		return (true);
-
 	return (false);
 }
-
-void
-chunk_prefork(tsdn_t *tsdn)
-{
-
-	chunk_dss_prefork(tsdn);
-}
-
-void
-chunk_postfork_parent(tsdn_t *tsdn)
-{
-
-	chunk_dss_postfork_parent(tsdn);
-}
-
-void
-chunk_postfork_child(tsdn_t *tsdn)
-{
-
-	chunk_dss_postfork_child(tsdn);
-}
diff --git a/src/ctl.c b/src/ctl.c
index 5ff2a42d..61f3aa1c 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1560,11 +1560,11 @@ arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 		dss_prec_old = arena_dss_prec_get(tsd_tsdn(tsd), arena);
 	} else {
 		if (dss_prec != dss_prec_limit &&
-		    chunk_dss_prec_set(tsd_tsdn(tsd), dss_prec)) {
+		    extent_dss_prec_set(tsd_tsdn(tsd), dss_prec)) {
 			ret = EFAULT;
 			goto label_return;
 		}
-		dss_prec_old = chunk_dss_prec_get(tsd_tsdn(tsd));
+		dss_prec_old = extent_dss_prec_get(tsd_tsdn(tsd));
 	}
 
 	dss = dss_prec_names[dss_prec_old];
diff --git a/src/extent.c b/src/extent.c
index 3e62e3bc..9f3ddd95 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -6,6 +6,45 @@
 
 rtree_t		extents_rtree;
 
+static void	*extent_alloc_default(void *new_addr, size_t size,
+    size_t alignment, bool *zero, bool *commit, unsigned arena_ind);
+static bool	extent_dalloc_default(void *addr, size_t size, bool committed,
+    unsigned arena_ind);
+static bool	extent_commit_default(void *addr, size_t size, size_t offset,
+    size_t length, unsigned arena_ind);
+static bool	extent_decommit_default(void *addr, size_t size, size_t offset,
+    size_t length, unsigned arena_ind);
+static bool	extent_purge_default(void *addr, size_t size, size_t offset,
+    size_t length, unsigned arena_ind);
+static bool	extent_split_default(void *addr, size_t size, size_t size_a,
+    size_t size_b, bool committed, unsigned arena_ind);
+static bool	extent_merge_default(void *addr_a, size_t size_a, void *addr_b,
+    size_t size_b, bool committed, unsigned arena_ind);
+
+const extent_hooks_t	extent_hooks_default = {
+	extent_alloc_default,
+	extent_dalloc_default,
+	extent_commit_default,
+	extent_decommit_default,
+	extent_purge_default,
+	extent_split_default,
+	extent_merge_default
+};
+
+/* Used exclusively for gdump triggering. */
+static size_t	curchunks;
+static size_t	highchunks;
+
+/******************************************************************************/
+/*
+ * Function prototypes for static functions that are referenced prior to
+ * definition.
+ */
+
+static void	extent_record(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t *extent_hooks, extent_heap_t extent_heaps[NPSIZES],
+    bool cache, extent_t *extent);
+
 /******************************************************************************/
 
 extent_t *
@@ -34,6 +73,91 @@ extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent)
 	malloc_mutex_unlock(tsdn, &arena->extent_cache_mtx);
 }
 
+static extent_hooks_t
+extent_hooks_get_locked(arena_t *arena)
+{
+
+	return (arena->extent_hooks);
+}
+
+extent_hooks_t
+extent_hooks_get(tsdn_t *tsdn, arena_t *arena)
+{
+	extent_hooks_t extent_hooks;
+
+	malloc_mutex_lock(tsdn, &arena->extents_mtx);
+	extent_hooks = extent_hooks_get_locked(arena);
+	malloc_mutex_unlock(tsdn, &arena->extents_mtx);
+
+	return (extent_hooks);
+}
+
+extent_hooks_t
+extent_hooks_set(tsdn_t *tsdn, arena_t *arena,
+    const extent_hooks_t *extent_hooks)
+{
+	extent_hooks_t old_extent_hooks;
+
+	malloc_mutex_lock(tsdn, &arena->extents_mtx);
+	old_extent_hooks = arena->extent_hooks;
+	/*
+	 * Copy each field atomically so that it is impossible for readers to
+	 * see partially updated pointers.  There are places where readers only
+	 * need one hook function pointer (therefore no need to copy the
+	 * entirety of arena->extent_hooks), and stale reads do not affect
+	 * correctness, so they perform unlocked reads.
+	 */
+#define	ATOMIC_COPY_HOOK(n) do {					\
+	union {								\
+		extent_##n##_t	**n;					\
+		void		**v;					\
+	} u;								\
+	u.n = &arena->extent_hooks.n;					\
+	atomic_write_p(u.v, extent_hooks->n);				\
+} while (0)
+	ATOMIC_COPY_HOOK(alloc);
+	ATOMIC_COPY_HOOK(dalloc);
+	ATOMIC_COPY_HOOK(commit);
+	ATOMIC_COPY_HOOK(decommit);
+	ATOMIC_COPY_HOOK(purge);
+	ATOMIC_COPY_HOOK(split);
+	ATOMIC_COPY_HOOK(merge);
+#undef ATOMIC_COPY_HOOK
+	malloc_mutex_unlock(tsdn, &arena->extents_mtx);
+
+	return (old_extent_hooks);
+}
+
+static void
+extent_hooks_assure_initialized_impl(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t *extent_hooks, bool locked)
+{
+	static const extent_hooks_t uninitialized_hooks =
+	    EXTENT_HOOKS_INITIALIZER;
+
+	if (memcmp(extent_hooks, &uninitialized_hooks, sizeof(extent_hooks_t))
+	    == 0) {
+		*extent_hooks = locked ? extent_hooks_get_locked(arena) :
+		    extent_hooks_get(tsdn, arena);
+	}
+}
+
+static void
+extent_hooks_assure_initialized_locked(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t *extent_hooks)
+{
+
+	extent_hooks_assure_initialized_impl(tsdn, arena, extent_hooks, true);
+}
+
+static void
+extent_hooks_assure_initialized(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t *extent_hooks)
+{
+
+	extent_hooks_assure_initialized_impl(tsdn, arena, extent_hooks, false);
+}
+
 #ifdef JEMALLOC_JET
 #undef extent_size_quantize_floor
 #define	extent_size_quantize_floor JEMALLOC_N(n_extent_size_quantize_floor)
@@ -118,6 +242,787 @@ extent_ad_comp(const extent_t *a, const extent_t *b)
 /* Generate pairing heap functions. */
 ph_gen(, extent_heap_, extent_heap_t, extent_t, ph_link, extent_ad_comp)
 
+static void
+extent_heaps_insert(extent_heap_t extent_heaps[NPSIZES], extent_t *extent)
+{
+	size_t psz = extent_size_quantize_floor(extent_size_get(extent));
+	pszind_t pind = psz2ind(psz);
+	extent_heap_insert(&extent_heaps[pind], extent);
+}
+
+static void
+extent_heaps_remove(extent_heap_t extent_heaps[NPSIZES], extent_t *extent)
+{
+	size_t psz = extent_size_quantize_floor(extent_size_get(extent));
+	pszind_t pind = psz2ind(psz);
+	extent_heap_remove(&extent_heaps[pind], extent);
+}
+
+static bool
+extent_rtree_acquire(tsdn_t *tsdn, const extent_t *extent, bool dependent,
+    bool init_missing, rtree_elm_t **r_elm_a, rtree_elm_t **r_elm_b)
+{
+
+	*r_elm_a = rtree_elm_acquire(tsdn, &extents_rtree,
+	    (uintptr_t)extent_base_get(extent), dependent, init_missing);
+	if (!dependent && *r_elm_a == NULL)
+		return (true);
+	assert(*r_elm_a != NULL);
+
+	if (extent_size_get(extent) > PAGE) {
+		*r_elm_b = rtree_elm_acquire(tsdn, &extents_rtree,
+		    (uintptr_t)extent_last_get(extent), dependent,
+		    init_missing);
+		if (!dependent && *r_elm_b == NULL)
+			return (true);
+		assert(*r_elm_b != NULL);
+	} else
+		*r_elm_b = NULL;
+
+	return (false);
+}
+
+static void
+extent_rtree_write_acquired(tsdn_t *tsdn, rtree_elm_t *elm_a,
+    rtree_elm_t *elm_b, const extent_t *extent)
+{
+
+	rtree_elm_write_acquired(tsdn, &extents_rtree, elm_a, extent);
+	if (elm_b != NULL)
+		rtree_elm_write_acquired(tsdn, &extents_rtree, elm_b, extent);
+}
+
+static void
+extent_rtree_release(tsdn_t *tsdn, rtree_elm_t *elm_a, rtree_elm_t *elm_b)
+{
+
+	rtree_elm_release(tsdn, &extents_rtree, elm_a);
+	if (elm_b != NULL)
+		rtree_elm_release(tsdn, &extents_rtree, elm_b);
+}
+
+static void
+extent_interior_register(tsdn_t *tsdn, const extent_t *extent)
+{
+	size_t i;
+
+	assert(extent_slab_get(extent));
+
+	for (i = 1; i < (extent_size_get(extent) >> LG_PAGE) - 1; i++) {
+		rtree_write(tsdn, &extents_rtree,
+		    (uintptr_t)extent_base_get(extent) + (uintptr_t)(i <<
+		    LG_PAGE), extent);
+	}
+}
+
+static bool
+extent_register(tsdn_t *tsdn, const extent_t *extent)
+{
+	rtree_elm_t *elm_a, *elm_b;
+
+	if (extent_rtree_acquire(tsdn, extent, false, true, &elm_a, &elm_b))
+		return (true);
+	extent_rtree_write_acquired(tsdn, elm_a, elm_b, extent);
+	if (extent_slab_get(extent))
+		extent_interior_register(tsdn, extent);
+	extent_rtree_release(tsdn, elm_a, elm_b);
+
+	if (config_prof && opt_prof && extent_active_get(extent)) {
+		size_t nadd = (extent_size_get(extent) == 0) ? 1 :
+		    extent_size_get(extent) / chunksize;
+		size_t cur = atomic_add_z(&curchunks, nadd);
+		size_t high = atomic_read_z(&highchunks);
+		while (cur > high && atomic_cas_z(&highchunks, high, cur)) {
+			/*
+			 * Don't refresh cur, because it may have decreased
+			 * since this thread lost the highchunks update race.
+			 */
+			high = atomic_read_z(&highchunks);
+		}
+		if (cur > high && prof_gdump_get_unlocked())
+			prof_gdump(tsdn);
+	}
+
+	return (false);
+}
+
+static void
+extent_interior_deregister(tsdn_t *tsdn, const extent_t *extent)
+{
+	size_t i;
+
+	assert(extent_slab_get(extent));
+
+	for (i = 1; i < (extent_size_get(extent) >> LG_PAGE) - 1; i++) {
+		rtree_clear(tsdn, &extents_rtree,
+		    (uintptr_t)extent_base_get(extent) + (uintptr_t)(i <<
+		    LG_PAGE));
+	}
+}
+
+static void
+extent_deregister(tsdn_t *tsdn, const extent_t *extent)
+{
+	rtree_elm_t *elm_a, *elm_b;
+
+	extent_rtree_acquire(tsdn, extent, true, false, &elm_a, &elm_b);
+	extent_rtree_write_acquired(tsdn, elm_a, elm_b, NULL);
+	if (extent_slab_get(extent))
+		extent_interior_deregister(tsdn, extent);
+	extent_rtree_release(tsdn, elm_a, elm_b);
+
+	if (config_prof && opt_prof && extent_active_get(extent)) {
+		size_t nsub = (extent_size_get(extent) == 0) ? 1 :
+		    extent_size_get(extent) / chunksize;
+		assert(atomic_read_z(&curchunks) >= nsub);
+		atomic_sub_z(&curchunks, nsub);
+	}
+}
+
+/*
+ * Do first-best-fit extent selection, i.e. select the lowest extent that best
+ * fits.
+ */
+static extent_t *
+extent_first_best_fit(arena_t *arena, extent_heap_t extent_heaps[NPSIZES],
+    size_t size)
+{
+	pszind_t pind, i;
+
+	pind = psz2ind(extent_size_quantize_ceil(size));
+	for (i = pind; i < NPSIZES; i++) {
+		extent_t *extent = extent_heap_first(&extent_heaps[i]);
+		if (extent != NULL)
+			return (extent);
+	}
+
+	return (NULL);
+}
+
+static void
+extent_leak(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
+    bool cache, extent_t *extent)
+{
+
+	/*
+	 * Leak extent after making sure its pages have already been purged, so
+	 * that this is only a virtual memory leak.
+	 */
+	if (cache) {
+		extent_purge_wrapper(tsdn, arena, extent_hooks, extent, 0,
+		    extent_size_get(extent));
+	}
+	extent_dalloc(tsdn, arena, extent);
+}
+
+static extent_t *
+extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
+    extent_heap_t extent_heaps[NPSIZES], bool cache, void *new_addr,
+    size_t usize, size_t pad, size_t alignment, bool *zero, bool *commit,
+    bool slab)
+{
+	extent_t *extent;
+	size_t size, alloc_size, leadsize, trailsize;
+
+	assert(new_addr == NULL || !slab);
+	assert(pad == 0 || !slab);
+
+	size = usize + pad;
+	alloc_size = s2u(size + PAGE_CEILING(alignment) - PAGE);
+	/* Beware size_t wrap-around. */
+	if (alloc_size < usize)
+		return (NULL);
+	malloc_mutex_lock(tsdn, &arena->extents_mtx);
+	extent_hooks_assure_initialized_locked(tsdn, arena, extent_hooks);
+	if (new_addr != NULL) {
+		rtree_elm_t *elm;
+
+		elm = rtree_elm_acquire(tsdn, &extents_rtree,
+		    (uintptr_t)new_addr, false, false);
+		if (elm != NULL) {
+			extent = rtree_elm_read_acquired(tsdn, &extents_rtree,
+			    elm);
+			if (extent != NULL && (extent_active_get(extent) ||
+			    extent_retained_get(extent) == cache))
+				extent = NULL;
+			rtree_elm_release(tsdn, &extents_rtree, elm);
+		} else
+			extent = NULL;
+	} else
+		extent = extent_first_best_fit(arena, extent_heaps, alloc_size);
+	if (extent == NULL || (new_addr != NULL && extent_size_get(extent) <
+	    size)) {
+		malloc_mutex_unlock(tsdn, &arena->extents_mtx);
+		return (NULL);
+	}
+	extent_heaps_remove(extent_heaps, extent);
+	arena_extent_cache_maybe_remove(arena, extent, cache);
+
+	leadsize = ALIGNMENT_CEILING((uintptr_t)extent_base_get(extent),
+	    PAGE_CEILING(alignment)) - (uintptr_t)extent_base_get(extent);
+	assert(new_addr == NULL || leadsize == 0);
+	assert(extent_size_get(extent) >= leadsize + size);
+	trailsize = extent_size_get(extent) - leadsize - size;
+	if (extent_zeroed_get(extent))
+		*zero = true;
+	if (extent_committed_get(extent))
+		*commit = true;
+
+	/* Split the lead. */
+	if (leadsize != 0) {
+		extent_t *lead = extent;
+		extent = extent_split_wrapper(tsdn, arena, extent_hooks, lead,
+		    leadsize, leadsize, size + trailsize, usize + trailsize);
+		if (extent == NULL) {
+			extent_leak(tsdn, arena, extent_hooks, cache, lead);
+			malloc_mutex_unlock(tsdn, &arena->extents_mtx);
+			return (NULL);
+		}
+		extent_heaps_insert(extent_heaps, lead);
+		arena_extent_cache_maybe_insert(arena, lead, cache);
+	}
+
+	/* Split the trail. */
+	if (trailsize != 0) {
+		extent_t *trail = extent_split_wrapper(tsdn, arena,
+		    extent_hooks, extent, size, usize, trailsize, trailsize);
+		if (trail == NULL) {
+			extent_leak(tsdn, arena, extent_hooks, cache, extent);
+			malloc_mutex_unlock(tsdn, &arena->extents_mtx);
+			return (NULL);
+		}
+		extent_heaps_insert(extent_heaps, trail);
+		arena_extent_cache_maybe_insert(arena, trail, cache);
+	} else if (leadsize == 0) {
+		/*
+		 * Splitting causes usize to be set as a side effect, but no
+		 * splitting occurred.
+		 */
+		extent_usize_set(extent, usize);
+	}
+
+	if (!extent_committed_get(extent) &&
+	    extent_hooks->commit(extent_base_get(extent),
+	    extent_size_get(extent), 0, extent_size_get(extent), arena->ind)) {
+		malloc_mutex_unlock(tsdn, &arena->extents_mtx);
+		extent_record(tsdn, arena, extent_hooks, extent_heaps, cache,
+		    extent);
+		return (NULL);
+	}
+
+	if (pad != 0)
+		extent_addr_randomize(tsdn, extent, alignment);
+	extent_active_set(extent, true);
+	if (slab) {
+		extent_slab_set(extent, slab);
+		extent_interior_register(tsdn, extent);
+	}
+
+	malloc_mutex_unlock(tsdn, &arena->extents_mtx);
+
+	if (*zero) {
+		if (!extent_zeroed_get(extent)) {
+			memset(extent_addr_get(extent), 0,
+			    extent_usize_get(extent));
+		} else if (config_debug) {
+			size_t i;
+			size_t *p = (size_t *)(uintptr_t)
+			    extent_addr_get(extent);
+
+			for (i = 0; i < usize / sizeof(size_t); i++)
+				assert(p[i] == 0);
+		}
+	}
+	return (extent);
+}
+
+/*
+ * If the caller specifies (!*zero), it is still possible to receive zeroed
+ * memory, in which case *zero is toggled to true.  arena_extent_alloc() takes
+ * advantage of this to avoid demanding zeroed extents, but taking advantage of
+ * them if they are returned.
+ */
+static void *
+extent_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
+    size_t alignment, bool *zero, bool *commit, dss_prec_t dss_prec)
+{
+	void *ret;
+
+	assert(size != 0);
+	assert(alignment != 0);
+
+	/* "primary" dss. */
+	if (have_dss && dss_prec == dss_prec_primary && (ret =
+	    extent_alloc_dss(tsdn, arena, new_addr, size, alignment, zero,
+	    commit)) != NULL)
+		return (ret);
+	/* mmap. */
+	if ((ret = extent_alloc_mmap(new_addr, size, alignment, zero, commit))
+	    != NULL)
+		return (ret);
+	/* "secondary" dss. */
+	if (have_dss && dss_prec == dss_prec_secondary && (ret =
+	    extent_alloc_dss(tsdn, arena, new_addr, size, alignment, zero,
+	    commit)) != NULL)
+		return (ret);
+
+	/* All strategies for allocation failed. */
+	return (NULL);
+}
+
+extent_t *
+extent_alloc_cache(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
+    void *new_addr, size_t usize, size_t pad, size_t alignment, bool *zero,
+    bool slab)
+{
+	extent_t *extent;
+	bool commit;
+
+	assert(usize + pad != 0);
+	assert(alignment != 0);
+
+	commit = true;
+	extent = extent_recycle(tsdn, arena, extent_hooks,
+	    arena->extents_cached, true, new_addr, usize, pad, alignment, zero,
+	    &commit, slab);
+	if (extent == NULL)
+		return (NULL);
+	assert(commit);
+	return (extent);
+}
+
+static void *
+extent_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero,
+    bool *commit, unsigned arena_ind)
+{
+	void *ret;
+	tsdn_t *tsdn;
+	arena_t *arena;
+
+	tsdn = tsdn_fetch();
+	arena = arena_get(tsdn, arena_ind, false);
+	/*
+	 * The arena we're allocating on behalf of must have been initialized
+	 * already.
+	 */
+	assert(arena != NULL);
+	ret = extent_alloc_core(tsdn, arena, new_addr, size, alignment, zero,
+	    commit, arena->dss_prec);
+	if (ret == NULL)
+		return (NULL);
+
+	return (ret);
+}
+
+static extent_t *
+extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t *extent_hooks, void *new_addr, size_t usize, size_t pad,
+    size_t alignment, bool *zero, bool *commit, bool slab)
+{
+	extent_t *extent;
+
+	assert(usize != 0);
+	assert(alignment != 0);
+
+	extent = extent_recycle(tsdn, arena, extent_hooks,
+	    arena->extents_retained, false, new_addr, usize, pad, alignment,
+	    zero, commit, slab);
+	if (extent != NULL && config_stats) {
+		size_t size = usize + pad;
+		arena->stats.retained -= size;
+	}
+
+	return (extent);
+}
+
+static extent_t *
+extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t *extent_hooks, void *new_addr, size_t usize, size_t pad,
+    size_t alignment, bool *zero, bool *commit, bool slab)
+{
+	extent_t *extent;
+	size_t size;
+	void *addr;
+
+	size = usize + pad;
+	extent = extent_alloc(tsdn, arena);
+	if (extent == NULL)
+		return (NULL);
+	addr = extent_hooks->alloc(new_addr, size, alignment, zero, commit,
+	    arena->ind);
+	if (addr == NULL) {
+		extent_dalloc(tsdn, arena, extent);
+		return (NULL);
+	}
+	extent_init(extent, arena, addr, size, usize, true, zero, commit, slab);
+	if (pad != 0)
+		extent_addr_randomize(tsdn, extent, alignment);
+	if (extent_register(tsdn, extent)) {
+		extent_leak(tsdn, arena, extent_hooks, false, extent);
+		return (NULL);
+	}
+
+	return (extent);
+}
+
+extent_t *
+extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
+    void *new_addr, size_t usize, size_t pad, size_t alignment, bool *zero,
+    bool *commit, bool slab)
+{
+	extent_t *extent;
+
+	extent_hooks_assure_initialized(tsdn, arena, extent_hooks);
+
+	extent = extent_alloc_retained(tsdn, arena, extent_hooks, new_addr,
+	    usize, pad, alignment, zero, commit, slab);
+	if (extent == NULL) {
+		extent = extent_alloc_wrapper_hard(tsdn, arena, extent_hooks,
+		    new_addr, usize, pad, alignment, zero, commit, slab);
+	}
+
+	return (extent);
+}
+
+static bool
+extent_can_coalesce(const extent_t *a, const extent_t *b)
+{
+
+	if (extent_arena_get(a) != extent_arena_get(b))
+		return (false);
+	if (extent_active_get(a) != extent_active_get(b))
+		return (false);
+	if (extent_committed_get(a) != extent_committed_get(b))
+		return (false);
+	if (extent_retained_get(a) != extent_retained_get(b))
+		return (false);
+
+	return (true);
+}
+
+static void
+extent_try_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
+    extent_t *a, extent_t *b, extent_heap_t extent_heaps[NPSIZES], bool cache)
+{
+
+	if (!extent_can_coalesce(a, b))
+		return;
+
+	extent_heaps_remove(extent_heaps, a);
+	extent_heaps_remove(extent_heaps, b);
+
+	arena_extent_cache_maybe_remove(extent_arena_get(a), a, cache);
+	arena_extent_cache_maybe_remove(extent_arena_get(b), b, cache);
+
+	if (extent_merge_wrapper(tsdn, arena, extent_hooks, a, b)) {
+		extent_heaps_insert(extent_heaps, a);
+		extent_heaps_insert(extent_heaps, b);
+		arena_extent_cache_maybe_insert(extent_arena_get(a), a, cache);
+		arena_extent_cache_maybe_insert(extent_arena_get(b), b, cache);
+		return;
+	}
+
+	extent_heaps_insert(extent_heaps, a);
+	arena_extent_cache_maybe_insert(extent_arena_get(a), a, cache);
+}
+
+static void
+extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
+    extent_heap_t extent_heaps[NPSIZES], bool cache, extent_t *extent)
+{
+	extent_t *prev, *next;
+
+	assert(!cache || !extent_zeroed_get(extent));
+
+	malloc_mutex_lock(tsdn, &arena->extents_mtx);
+	extent_hooks_assure_initialized_locked(tsdn, arena, extent_hooks);
+
+	extent_usize_set(extent, 0);
+	extent_active_set(extent, false);
+	extent_zeroed_set(extent, !cache && extent_zeroed_get(extent));
+	if (extent_slab_get(extent)) {
+		extent_interior_deregister(tsdn, extent);
+		extent_slab_set(extent, false);
+	}
+
+	assert(extent_lookup(tsdn, extent_base_get(extent), true) == extent);
+	extent_heaps_insert(extent_heaps, extent);
+	arena_extent_cache_maybe_insert(arena, extent, cache);
+
+	/* Try to coalesce forward. */
+	next = rtree_read(tsdn, &extents_rtree,
+	    (uintptr_t)extent_past_get(extent), false);
+	if (next != NULL) {
+		extent_try_coalesce(tsdn, arena, extent_hooks, extent, next,
+		    extent_heaps, cache);
+	}
+
+	/* Try to coalesce backward. */
+	prev = rtree_read(tsdn, &extents_rtree,
+	    (uintptr_t)extent_before_get(extent), false);
+	if (prev != NULL) {
+		extent_try_coalesce(tsdn, arena, extent_hooks, prev, extent,
+		    extent_heaps, cache);
+	}
+
+	malloc_mutex_unlock(tsdn, &arena->extents_mtx);
+}
+
+void
+extent_dalloc_cache(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
+    extent_t *extent)
+{
+
+	assert(extent_base_get(extent) != NULL);
+	assert(extent_size_get(extent) != 0);
+
+	extent_addr_set(extent, extent_base_get(extent));
+	extent_zeroed_set(extent, false);
+
+	extent_record(tsdn, arena, extent_hooks, arena->extents_cached, true,
+	    extent);
+}
+
+static bool
+extent_dalloc_default(void *addr, size_t size, bool committed,
+    unsigned arena_ind)
+{
+
+	if (!have_dss || !extent_in_dss(tsdn_fetch(), addr))
+		return (extent_dalloc_mmap(addr, size));
+	return (true);
+}
+
+void
+extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t *extent_hooks, extent_t *extent)
+{
+
+	assert(extent_base_get(extent) != NULL);
+	assert(extent_size_get(extent) != 0);
+
+	extent_addr_set(extent, extent_base_get(extent));
+
+	extent_hooks_assure_initialized(tsdn, arena, extent_hooks);
+	/* Try to deallocate. */
+	if (!extent_hooks->dalloc(extent_base_get(extent),
+	    extent_size_get(extent), extent_committed_get(extent),
+	    arena->ind)) {
+		extent_deregister(tsdn, extent);
+		extent_dalloc(tsdn, arena, extent);
+		return;
+	}
+	/* Try to decommit; purge if that fails. */
+	if (extent_committed_get(extent)) {
+		extent_committed_set(extent,
+		    extent_hooks->decommit(extent_base_get(extent),
+		    extent_size_get(extent), 0, extent_size_get(extent),
+		    arena->ind));
+	}
+	extent_zeroed_set(extent, !extent_committed_get(extent) ||
+	    !extent_hooks->purge(extent_base_get(extent),
+	    extent_size_get(extent), 0, extent_size_get(extent), arena->ind));
+
+	if (config_stats)
+		arena->stats.retained += extent_size_get(extent);
+
+	extent_record(tsdn, arena, extent_hooks, arena->extents_retained, false,
+	    extent);
+}
+
+static bool
+extent_commit_default(void *addr, size_t size, size_t offset, size_t length,
+    unsigned arena_ind)
+{
+
+	return (pages_commit((void *)((uintptr_t)addr + (uintptr_t)offset),
+	    length));
+}
+
+bool
+extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
+    size_t length)
+{
+
+	extent_hooks_assure_initialized(tsdn, arena, extent_hooks);
+	return (extent_hooks->commit(extent_base_get(extent),
+	    extent_size_get(extent), offset, length, arena->ind));
+}
+
+static bool
+extent_decommit_default(void *addr, size_t size, size_t offset, size_t length,
+    unsigned arena_ind)
+{
+
+	return (pages_decommit((void *)((uintptr_t)addr + (uintptr_t)offset),
+	    length));
+}
+
+bool
+extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
+    size_t length)
+{
+
+	extent_hooks_assure_initialized(tsdn, arena, extent_hooks);
+	return (extent_hooks->decommit(extent_base_get(extent),
+	    extent_size_get(extent), offset, length, arena->ind));
+}
+
+static bool
+extent_purge_default(void *addr, size_t size, size_t offset, size_t length,
+    unsigned arena_ind)
+{
+
+	assert(addr != NULL);
+	assert((offset & PAGE_MASK) == 0);
+	assert(length != 0);
+	assert((length & PAGE_MASK) == 0);
+
+	return (pages_purge((void *)((uintptr_t)addr + (uintptr_t)offset),
+	    length));
+}
+
+bool
+extent_purge_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
+    extent_t *extent, size_t offset, size_t length)
+{
+
+	extent_hooks_assure_initialized(tsdn, arena, extent_hooks);
+	return (extent_hooks->purge(extent_base_get(extent),
+	    extent_size_get(extent), offset, length, arena->ind));
+}
+
+static bool
+extent_split_default(void *addr, size_t size, size_t size_a, size_t size_b,
+    bool committed, unsigned arena_ind)
+{
+
+	if (!maps_coalesce)
+		return (true);
+	return (false);
+}
+
+extent_t *
+extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
+  extent_t *extent, size_t size_a, size_t usize_a, size_t size_b,
+  size_t usize_b)
+{
+	extent_t *trail;
+	rtree_elm_t *lead_elm_a, *lead_elm_b, *trail_elm_a, *trail_elm_b;
+
+	assert(extent_size_get(extent) == size_a + size_b);
+
+	extent_hooks_assure_initialized(tsdn, arena, extent_hooks);
+
+	trail = extent_alloc(tsdn, arena);
+	if (trail == NULL)
+		goto label_error_a;
+
+	{
+		extent_t lead;
+
+		extent_init(&lead, arena, extent_addr_get(extent), size_a,
+		    usize_a, extent_active_get(extent),
+		    extent_zeroed_get(extent), extent_committed_get(extent),
+		    extent_slab_get(extent));
+
+		if (extent_rtree_acquire(tsdn, &lead, false, true, &lead_elm_a,
+		    &lead_elm_b))
+			goto label_error_b;
+	}
+
+	extent_init(trail, arena, (void *)((uintptr_t)extent_base_get(extent) +
+	    size_a), size_b, usize_b, extent_active_get(extent),
+	    extent_zeroed_get(extent), extent_committed_get(extent),
+	    extent_slab_get(extent));
+	if (extent_rtree_acquire(tsdn, trail, false, true, &trail_elm_a,
+	    &trail_elm_b))
+		goto label_error_c;
+
+	if (extent_hooks->split(extent_base_get(extent), size_a + size_b,
+	    size_a, size_b, extent_committed_get(extent), arena->ind))
+		goto label_error_d;
+
+	extent_size_set(extent, size_a);
+	extent_usize_set(extent, usize_a);
+
+	extent_rtree_write_acquired(tsdn, lead_elm_a, lead_elm_b, extent);
+	extent_rtree_write_acquired(tsdn, trail_elm_a, trail_elm_b, trail);
+
+	extent_rtree_release(tsdn, lead_elm_a, lead_elm_b);
+	extent_rtree_release(tsdn, trail_elm_a, trail_elm_b);
+
+	return (trail);
+label_error_d:
+	extent_rtree_release(tsdn, lead_elm_a, lead_elm_b);
+label_error_c:
+	extent_rtree_release(tsdn, lead_elm_a, lead_elm_b);
+label_error_b:
+	extent_dalloc(tsdn, arena, trail);
+label_error_a:
+	return (NULL);
+}
+
+static bool
+extent_merge_default(void *addr_a, size_t size_a, void *addr_b, size_t size_b,
+    bool committed, unsigned arena_ind)
+{
+
+	if (!maps_coalesce)
+		return (true);
+	if (have_dss) {
+		tsdn_t *tsdn = tsdn_fetch();
+		if (extent_in_dss(tsdn, addr_a) != extent_in_dss(tsdn, addr_b))
+			return (true);
+	}
+
+	return (false);
+}
+
+bool
+extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
+    extent_t *a, extent_t *b)
+{
+	rtree_elm_t *a_elm_a, *a_elm_b, *b_elm_a, *b_elm_b;
+
+	extent_hooks_assure_initialized(tsdn, arena, extent_hooks);
+	if (extent_hooks->merge(extent_base_get(a), extent_size_get(a),
+	    extent_base_get(b), extent_size_get(b), extent_committed_get(a),
+	    arena->ind))
+		return (true);
+
+	/*
+	 * The rtree writes must happen while all the relevant elements are
+	 * owned, so the following code uses decomposed helper functions rather
+	 * than extent_{,de}register() to do things in the right order.
+	 */
+	extent_rtree_acquire(tsdn, a, true, false, &a_elm_a, &a_elm_b);
+	extent_rtree_acquire(tsdn, b, true, false, &b_elm_a, &b_elm_b);
+
+	if (a_elm_b != NULL) {
+		rtree_elm_write_acquired(tsdn, &extents_rtree, a_elm_b, NULL);
+		rtree_elm_release(tsdn, &extents_rtree, a_elm_b);
+	}
+	if (b_elm_b != NULL) {
+		rtree_elm_write_acquired(tsdn, &extents_rtree, b_elm_a, NULL);
+		rtree_elm_release(tsdn, &extents_rtree, b_elm_a);
+	} else
+		b_elm_b = b_elm_a;
+
+	extent_size_set(a, extent_size_get(a) + extent_size_get(b));
+	extent_usize_set(a, extent_usize_get(a) + extent_usize_get(b));
+	extent_zeroed_set(a, extent_zeroed_get(a) && extent_zeroed_get(b));
+
+	extent_rtree_write_acquired(tsdn, a_elm_a, b_elm_b, a);
+	extent_rtree_release(tsdn, a_elm_a, b_elm_b);
+
+	extent_dalloc(tsdn, extent_arena_get(b), b);
+
+	return (false);
+}
+
 bool
 extent_boot(void)
 {
@@ -126,5 +1031,29 @@ extent_boot(void)
 	    LG_PAGE)))
 		return (true);
 
+	if (have_dss && extent_dss_boot())
+		return (true);
+
 	return (false);
 }
+
+void
+extent_prefork(tsdn_t *tsdn)
+{
+
+	extent_dss_prefork(tsdn);
+}
+
+void
+extent_postfork_parent(tsdn_t *tsdn)
+{
+
+	extent_dss_postfork_parent(tsdn);
+}
+
+void
+extent_postfork_child(tsdn_t *tsdn)
+{
+
+	extent_dss_postfork_child(tsdn);
+}
diff --git a/src/chunk_dss.c b/src/extent_dss.c
similarity index 86%
rename from src/chunk_dss.c
rename to src/extent_dss.c
index 9fa4ad81..0e34a440 100644
--- a/src/chunk_dss.c
+++ b/src/extent_dss.c
@@ -1,4 +1,4 @@
-#define	JEMALLOC_CHUNK_DSS_C_
+#define	JEMALLOC_EXTENT_DSS_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 /******************************************************************************/
 /* Data. */
@@ -29,7 +29,7 @@ static void		*dss_max;
 /******************************************************************************/
 
 static void *
-chunk_dss_sbrk(intptr_t increment)
+extent_dss_sbrk(intptr_t increment)
 {
 
 #ifdef JEMALLOC_DSS
@@ -41,7 +41,7 @@ chunk_dss_sbrk(intptr_t increment)
 }
 
 dss_prec_t
-chunk_dss_prec_get(tsdn_t *tsdn)
+extent_dss_prec_get(tsdn_t *tsdn)
 {
 	dss_prec_t ret;
 
@@ -54,7 +54,7 @@ chunk_dss_prec_get(tsdn_t *tsdn)
 }
 
 bool
-chunk_dss_prec_set(tsdn_t *tsdn, dss_prec_t dss_prec)
+extent_dss_prec_set(tsdn_t *tsdn, dss_prec_t dss_prec)
 {
 
 	if (!have_dss)
@@ -66,7 +66,7 @@ chunk_dss_prec_set(tsdn_t *tsdn, dss_prec_t dss_prec)
 }
 
 void *
-chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
+extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit)
 {
 	void *ret;
@@ -104,7 +104,7 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 				break;
 
 			/* Get the current end of the DSS. */
-			dss_max = chunk_dss_sbrk(0);
+			dss_max = extent_dss_sbrk(0);
 
 			/* Make sure the earlier condition still holds. */
 			if (new_addr != NULL && dss_max != new_addr)
@@ -128,7 +128,7 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			    (uintptr_t)dss_next < (uintptr_t)dss_max)
 				break; /* Wrap-around. */
 			incr = pad_size + size;
-			dss_prev = chunk_dss_sbrk(incr);
+			dss_prev = extent_dss_sbrk(incr);
 			if (dss_prev == (void *)-1)
 				break;
 			if (dss_prev == dss_max) {
@@ -138,7 +138,7 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 				if (pad_size != 0) {
 					extent_hooks_t extent_hooks =
 					    EXTENT_HOOKS_INITIALIZER;
-					chunk_dalloc_wrapper(tsdn, arena,
+					extent_dalloc_wrapper(tsdn, arena,
 					    &extent_hooks, pad);
 				} else
 					extent_dalloc(tsdn, arena, pad);
@@ -157,15 +157,15 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 }
 
 bool
-chunk_in_dss(tsdn_t *tsdn, void *chunk)
+extent_in_dss(tsdn_t *tsdn, void *addr)
 {
 	bool ret;
 
 	cassert(have_dss);
 
 	malloc_mutex_lock(tsdn, &dss_mtx);
-	if ((uintptr_t)chunk >= (uintptr_t)dss_base
-	    && (uintptr_t)chunk < (uintptr_t)dss_max)
+	if ((uintptr_t)addr >= (uintptr_t)dss_base
+	    && (uintptr_t)addr < (uintptr_t)dss_max)
 		ret = true;
 	else
 		ret = false;
@@ -175,14 +175,14 @@ chunk_in_dss(tsdn_t *tsdn, void *chunk)
 }
 
 bool
-chunk_dss_boot(void)
+extent_dss_boot(void)
 {
 
 	cassert(have_dss);
 
 	if (malloc_mutex_init(&dss_mtx, "dss", WITNESS_RANK_DSS))
 		return (true);
-	dss_base = chunk_dss_sbrk(0);
+	dss_base = extent_dss_sbrk(0);
 	dss_prev = dss_base;
 	dss_max = dss_base;
 
@@ -190,7 +190,7 @@ chunk_dss_boot(void)
 }
 
 void
-chunk_dss_prefork(tsdn_t *tsdn)
+extent_dss_prefork(tsdn_t *tsdn)
 {
 
 	if (have_dss)
@@ -198,7 +198,7 @@ chunk_dss_prefork(tsdn_t *tsdn)
 }
 
 void
-chunk_dss_postfork_parent(tsdn_t *tsdn)
+extent_dss_postfork_parent(tsdn_t *tsdn)
 {
 
 	if (have_dss)
@@ -206,7 +206,7 @@ chunk_dss_postfork_parent(tsdn_t *tsdn)
 }
 
 void
-chunk_dss_postfork_child(tsdn_t *tsdn)
+extent_dss_postfork_child(tsdn_t *tsdn)
 {
 
 	if (have_dss)
diff --git a/src/chunk_mmap.c b/src/extent_mmap.c
similarity index 83%
rename from src/chunk_mmap.c
rename to src/extent_mmap.c
index e1ee26f4..0dd3247e 100644
--- a/src/chunk_mmap.c
+++ b/src/extent_mmap.c
@@ -1,10 +1,10 @@
-#define	JEMALLOC_CHUNK_MMAP_C_
+#define	JEMALLOC_EXTENT_MMAP_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 
 /******************************************************************************/
 
 static void *
-chunk_alloc_mmap_slow(size_t size, size_t alignment, bool *zero, bool *commit)
+extent_alloc_mmap_slow(size_t size, size_t alignment, bool *zero, bool *commit)
 {
 	void *ret;
 	size_t alloc_size;
@@ -30,7 +30,7 @@ chunk_alloc_mmap_slow(size_t size, size_t alignment, bool *zero, bool *commit)
 }
 
 void *
-chunk_alloc_mmap(void *new_addr, size_t size, size_t alignment, bool *zero,
+extent_alloc_mmap(void *new_addr, size_t size, size_t alignment, bool *zero,
     bool *commit)
 {
 	void *ret;
@@ -58,7 +58,7 @@ chunk_alloc_mmap(void *new_addr, size_t size, size_t alignment, bool *zero,
 	offset = ALIGNMENT_ADDR2OFFSET(ret, alignment);
 	if (offset != 0) {
 		pages_unmap(ret, size);
-		return (chunk_alloc_mmap_slow(size, alignment, zero, commit));
+		return (extent_alloc_mmap_slow(size, alignment, zero, commit));
 	}
 
 	assert(ret != NULL);
@@ -67,10 +67,10 @@ chunk_alloc_mmap(void *new_addr, size_t size, size_t alignment, bool *zero,
 }
 
 bool
-chunk_dalloc_mmap(void *chunk, size_t size)
+extent_dalloc_mmap(void *addr, size_t size)
 {
 
 	if (config_munmap)
-		pages_unmap(chunk, size);
+		pages_unmap(addr, size);
 	return (!config_munmap);
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 03e61df6..82d2e6b3 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1067,7 +1067,7 @@ malloc_conf_init(void)
 				for (i = 0; i < dss_prec_limit; i++) {
 					if (strncmp(dss_prec_names[i], v, vlen)
 					    == 0) {
-						if (chunk_dss_prec_set(NULL,
+						if (extent_dss_prec_set(NULL,
 						   i)) {
 							malloc_conf_error(
 							    "Error setting dss",
@@ -2686,7 +2686,7 @@ _malloc_prefork(void)
 		}
 	}
 	base_prefork(tsd_tsdn(tsd));
-	chunk_prefork(tsd_tsdn(tsd));
+	extent_prefork(tsd_tsdn(tsd));
 	for (i = 0; i < narenas; i++) {
 		if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL)
 			arena_prefork3(tsd_tsdn(tsd), arena);
@@ -2715,7 +2715,7 @@ _malloc_postfork(void)
 
 	witness_postfork_parent(tsd);
 	/* Release all mutexes, now that fork() has completed. */
-	chunk_postfork_parent(tsd_tsdn(tsd));
+	extent_postfork_parent(tsd_tsdn(tsd));
 	base_postfork_parent(tsd_tsdn(tsd));
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
@@ -2740,7 +2740,7 @@ jemalloc_postfork_child(void)
 
 	witness_postfork_child(tsd);
 	/* Release all mutexes, now that fork() has completed. */
-	chunk_postfork_child(tsd_tsdn(tsd));
+	extent_postfork_child(tsd_tsdn(tsd));
 	base_postfork_child(tsd_tsdn(tsd));
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
diff --git a/src/large.c b/src/large.c
index ce8d32fb..60a0745e 100644
--- a/src/large.c
+++ b/src/large.c
@@ -28,13 +28,13 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 		return (NULL);
 
 	/*
-	 * Copy zero into is_zeroed and pass the copy to chunk_alloc(), so that
+	 * Copy zero into is_zeroed and pass the copy to extent_alloc(), so that
 	 * it is possible to make correct junk/zero fill decisions below.
 	 */
 	is_zeroed = zero;
 	if (likely(!tsdn_null(tsdn)))
 		arena = arena_choose(tsdn_tsd(tsdn), arena);
-	if (unlikely(arena == NULL) || (extent = arena_chunk_alloc_large(tsdn,
+	if (unlikely(arena == NULL) || (extent = arena_extent_alloc_large(tsdn,
 	    arena, usize, alignment, &is_zeroed)) == NULL)
 		return (NULL);
 
@@ -82,10 +82,10 @@ large_dalloc_maybe_junk(tsdn_t *tsdn, void *ptr, size_t usize)
 
 	if (config_fill && have_dss && unlikely(opt_junk_free)) {
 		/*
-		 * Only bother junk filling if the chunk isn't about to be
+		 * Only bother junk filling if the extent isn't about to be
 		 * unmapped.
 		 */
-		if (!config_munmap || (have_dss && chunk_in_dss(tsdn, ptr)))
+		if (!config_munmap || (have_dss && extent_in_dss(tsdn, ptr)))
 			large_dalloc_junk(ptr, usize);
 			memset(ptr, JEMALLOC_FREE_JUNK, usize);
 	}
@@ -103,7 +103,7 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize)
 
 	/* Split excess pages. */
 	if (diff != 0) {
-		extent_t *trail = chunk_split_wrapper(tsdn, arena,
+		extent_t *trail = extent_split_wrapper(tsdn, arena,
 		    &extent_hooks, extent, usize + large_pad, usize, diff,
 		    diff);
 		if (trail == NULL)
@@ -114,10 +114,10 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize)
 			    extent_usize_get(trail));
 		}
 
-		arena_chunk_cache_dalloc(tsdn, arena, &extent_hooks, trail);
+		arena_extent_cache_dalloc(tsdn, arena, &extent_hooks, trail);
 	}
 
-	arena_chunk_ralloc_large_shrink(tsdn, arena, extent, oldusize);
+	arena_extent_ralloc_large_shrink(tsdn, arena, extent, oldusize);
 
 	return (false);
 }
@@ -133,18 +133,18 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 	size_t trailsize = usize - extent_usize_get(extent);
 	extent_t *trail;
 
-	if ((trail = arena_chunk_cache_alloc(tsdn, arena, &extent_hooks,
+	if ((trail = arena_extent_cache_alloc(tsdn, arena, &extent_hooks,
 	    extent_past_get(extent), trailsize, CACHELINE, &is_zeroed_trail))
 	    == NULL) {
 		bool commit = true;
-		if ((trail = chunk_alloc_wrapper(tsdn, arena, &extent_hooks,
+		if ((trail = extent_alloc_wrapper(tsdn, arena, &extent_hooks,
 		    extent_past_get(extent), trailsize, 0, CACHELINE,
 		    &is_zeroed_trail, &commit, false)) == NULL)
 			return (true);
 	}
 
-	if (chunk_merge_wrapper(tsdn, arena, &extent_hooks, extent, trail)) {
-		chunk_dalloc_wrapper(tsdn, arena, &extent_hooks, trail);
+	if (extent_merge_wrapper(tsdn, arena, &extent_hooks, extent, trail)) {
+		extent_dalloc_wrapper(tsdn, arena, &extent_hooks, trail);
 		return (true);
 	}
 
@@ -174,7 +174,7 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 		    JEMALLOC_ALLOC_JUNK, usize - oldusize);
 	}
 
-	arena_chunk_ralloc_large_expand(tsdn, arena, extent, oldusize);
+	arena_extent_ralloc_large_expand(tsdn, arena, extent, oldusize);
 
 	return (false);
 }
@@ -209,7 +209,7 @@ large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
 	}
 
 	/*
-	 * Avoid moving the allocation if the existing chunk size accommodates
+	 * Avoid moving the allocation if the existing extent size accommodates
 	 * the new size.
 	 */
 	if (extent_usize_get(extent) >= usize_min && extent_usize_get(extent) <=
@@ -287,7 +287,7 @@ large_dalloc_impl(tsdn_t *tsdn, extent_t *extent, bool junked_locked)
 		large_dalloc_maybe_junk(tsdn, extent_addr_get(extent),
 		    extent_usize_get(extent));
 	}
-	arena_chunk_dalloc_large(tsdn, arena, extent, junked_locked);
+	arena_extent_dalloc_large(tsdn, arena, extent, junked_locked);
 
 	if (!junked_locked)
 		arena_decay_tick(tsdn, arena);
diff --git a/test/integration/chunk.c b/test/integration/extent.c
similarity index 72%
rename from test/integration/chunk.c
rename to test/integration/extent.c
index 10c4ba77..15b96a00 100644
--- a/test/integration/chunk.c
+++ b/test/integration/extent.c
@@ -25,7 +25,7 @@ static bool did_merge;
 #endif
 
 void *
-chunk_alloc(void *new_addr, size_t size, size_t alignment, bool *zero,
+extent_alloc(void *new_addr, size_t size, size_t alignment, bool *zero,
     bool *commit, unsigned arena_ind)
 {
 
@@ -38,86 +38,86 @@ chunk_alloc(void *new_addr, size_t size, size_t alignment, bool *zero,
 }
 
 bool
-chunk_dalloc(void *chunk, size_t size, bool committed, unsigned arena_ind)
+extent_dalloc(void *addr, size_t size, bool committed, unsigned arena_ind)
 {
 
-	TRACE_HOOK("%s(chunk=%p, size=%zu, committed=%s, arena_ind=%u)\n",
-	    __func__, chunk, size, committed ? "true" : "false", arena_ind);
+	TRACE_HOOK("%s(addr=%p, size=%zu, committed=%s, arena_ind=%u)\n",
+	    __func__, addr, size, committed ? "true" : "false", arena_ind);
 	did_dalloc = true;
 	if (!do_dalloc)
 		return (true);
-	return (old_hooks.dalloc(chunk, size, committed, arena_ind));
+	return (old_hooks.dalloc(addr, size, committed, arena_ind));
 }
 
 bool
-chunk_commit(void *chunk, size_t size, size_t offset, size_t length,
+extent_commit(void *addr, size_t size, size_t offset, size_t length,
     unsigned arena_ind)
 {
 	bool err;
 
-	TRACE_HOOK("%s(chunk=%p, size=%zu, offset=%zu, length=%zu, "
-	    "arena_ind=%u)\n", __func__, chunk, size, offset, length,
+	TRACE_HOOK("%s(addr=%p, size=%zu, offset=%zu, length=%zu, "
+	    "arena_ind=%u)\n", __func__, addr, size, offset, length,
 	    arena_ind);
-	err = old_hooks.commit(chunk, size, offset, length, arena_ind);
+	err = old_hooks.commit(addr, size, offset, length, arena_ind);
 	did_commit = !err;
 	return (err);
 }
 
 bool
-chunk_decommit(void *chunk, size_t size, size_t offset, size_t length,
+extent_decommit(void *addr, size_t size, size_t offset, size_t length,
     unsigned arena_ind)
 {
 	bool err;
 
-	TRACE_HOOK("%s(chunk=%p, size=%zu, offset=%zu, length=%zu, "
-	    "arena_ind=%u)\n", __func__, chunk, size, offset, length,
+	TRACE_HOOK("%s(addr=%p, size=%zu, offset=%zu, length=%zu, "
+	    "arena_ind=%u)\n", __func__, addr, size, offset, length,
 	    arena_ind);
 	if (!do_decommit)
 		return (true);
-	err = old_hooks.decommit(chunk, size, offset, length, arena_ind);
+	err = old_hooks.decommit(addr, size, offset, length, arena_ind);
 	did_decommit = !err;
 	return (err);
 }
 
 bool
-chunk_purge(void *chunk, size_t size, size_t offset, size_t length,
+extent_purge(void *addr, size_t size, size_t offset, size_t length,
     unsigned arena_ind)
 {
 
-	TRACE_HOOK("%s(chunk=%p, size=%zu, offset=%zu, length=%zu "
-	    "arena_ind=%u)\n", __func__, chunk, size, offset, length,
+	TRACE_HOOK("%s(addr=%p, size=%zu, offset=%zu, length=%zu "
+	    "arena_ind=%u)\n", __func__, addr, size, offset, length,
 	    arena_ind);
 	did_purge = true;
-	return (old_hooks.purge(chunk, size, offset, length, arena_ind));
+	return (old_hooks.purge(addr, size, offset, length, arena_ind));
 }
 
 bool
-chunk_split(void *chunk, size_t size, size_t size_a, size_t size_b,
+extent_split(void *addr, size_t size, size_t size_a, size_t size_b,
     bool committed, unsigned arena_ind)
 {
 
-	TRACE_HOOK("%s(chunk=%p, size=%zu, size_a=%zu, size_b=%zu, "
-	    "committed=%s, arena_ind=%u)\n", __func__, chunk, size, size_a,
+	TRACE_HOOK("%s(addr=%p, size=%zu, size_a=%zu, size_b=%zu, "
+	    "committed=%s, arena_ind=%u)\n", __func__, addr, size, size_a,
 	    size_b, committed ? "true" : "false", arena_ind);
 	did_split = true;
-	return (old_hooks.split(chunk, size, size_a, size_b, committed,
+	return (old_hooks.split(addr, size, size_a, size_b, committed,
 	    arena_ind));
 }
 
 bool
-chunk_merge(void *chunk_a, size_t size_a, void *chunk_b, size_t size_b,
+extent_merge(void *addr_a, size_t size_a, void *addr_b, size_t size_b,
     bool committed, unsigned arena_ind)
 {
 
-	TRACE_HOOK("%s(chunk_a=%p, size_a=%zu, chunk_b=%p size_b=%zu, "
-	    "committed=%s, arena_ind=%u)\n", __func__, chunk_a, size_a, chunk_b,
+	TRACE_HOOK("%s(addr_a=%p, size_a=%zu, addr_b=%p size_b=%zu, "
+	    "committed=%s, arena_ind=%u)\n", __func__, addr_a, size_a, addr_b,
 	    size_b, committed ? "true" : "false", arena_ind);
 	did_merge = true;
-	return (old_hooks.merge(chunk_a, size_a, chunk_b, size_b,
+	return (old_hooks.merge(addr_a, size_a, addr_b, size_b,
 	    committed, arena_ind));
 }
 
-TEST_BEGIN(test_chunk)
+TEST_BEGIN(test_extent)
 {
 	void *p;
 	size_t old_size, new_size, large0, large1, large2, sz;
@@ -126,13 +126,13 @@ TEST_BEGIN(test_chunk)
 	size_t hooks_mib[3], purge_mib[3];
 	size_t hooks_miblen, purge_miblen;
 	extent_hooks_t new_hooks = {
-		chunk_alloc,
-		chunk_dalloc,
-		chunk_commit,
-		chunk_decommit,
-		chunk_purge,
-		chunk_split,
-		chunk_merge
+		extent_alloc,
+		extent_dalloc,
+		extent_commit,
+		extent_decommit,
+		extent_purge,
+		extent_split,
+		extent_merge
 	};
 	bool xallocx_success_a, xallocx_success_b, xallocx_success_c;
 
@@ -151,16 +151,16 @@ TEST_BEGIN(test_chunk)
 	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, &old_hooks, &old_size,
 	    &new_hooks, new_size), 0, "Unexpected extent_hooks error");
 	orig_hooks = old_hooks;
-	assert_ptr_ne(old_hooks.alloc, chunk_alloc, "Unexpected alloc error");
-	assert_ptr_ne(old_hooks.dalloc, chunk_dalloc,
+	assert_ptr_ne(old_hooks.alloc, extent_alloc, "Unexpected alloc error");
+	assert_ptr_ne(old_hooks.dalloc, extent_dalloc,
 	    "Unexpected dalloc error");
-	assert_ptr_ne(old_hooks.commit, chunk_commit,
+	assert_ptr_ne(old_hooks.commit, extent_commit,
 	    "Unexpected commit error");
-	assert_ptr_ne(old_hooks.decommit, chunk_decommit,
+	assert_ptr_ne(old_hooks.decommit, extent_decommit,
 	    "Unexpected decommit error");
-	assert_ptr_ne(old_hooks.purge, chunk_purge, "Unexpected purge error");
-	assert_ptr_ne(old_hooks.split, chunk_split, "Unexpected split error");
-	assert_ptr_ne(old_hooks.merge, chunk_merge, "Unexpected merge error");
+	assert_ptr_ne(old_hooks.purge, extent_purge, "Unexpected purge error");
+	assert_ptr_ne(old_hooks.split, extent_split, "Unexpected split error");
+	assert_ptr_ne(old_hooks.merge, extent_merge, "Unexpected merge error");
 
 	/* Get large size classes. */
 	sz = sizeof(size_t);
@@ -249,5 +249,5 @@ int
 main(void)
 {
 
-	return (test(test_chunk));
+	return (test(test_extent));
 }

From 03eea4fb8b464ff399ef3118207feb6b376ceded Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 1 Jun 2016 13:14:18 -0700
Subject: [PATCH 0298/2608] Better document --enable-ivsalloc.

---
 INSTALL                                               |  7 ++++---
 include/jemalloc/internal/jemalloc_internal.h.in      | 11 +++++++++--
 include/jemalloc/internal/jemalloc_internal_defs.h.in |  2 +-
 3 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/INSTALL b/INSTALL
index e4f7bbd5..00c428b1 100644
--- a/INSTALL
+++ b/INSTALL
@@ -124,9 +124,10 @@ any of the following arguments (not a definitive list) to 'configure':
     option documentation for usage details.
 
 --enable-ivsalloc
-    Enable validation code, which verifies that pointers reside within
-    jemalloc-owned chunks before dereferencing them.  This incurs a minor
-    performance hit.
+    Enable validation code for malloc_usable_size() and sallocx(), which
+    verifies that pointers reside within jemalloc-owned extents before
+    dereferencing metadata.  This incurs a minor performance hit, and causes
+    the functions to return 0 for failed lookups.
 
 --enable-prof
     Enable heap profiling and leak detection functionality.  See the "opt.prof"
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index fb3991bc..243aae6c 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -1061,12 +1061,19 @@ ivsalloc(tsdn_t *tsdn, const void *ptr)
 {
 	extent_t *extent;
 
-	/* Return 0 if ptr is not within a chunk managed by jemalloc. */
+	/*
+	 * Return 0 if ptr is not within an extent managed by jemalloc.  This
+	 * function has two extra costs relative to isalloc():
+	 * - The extent_lookup() call cannot claim to be a dependent lookup,
+	 *   which induces rtree lookup load dependencies.
+	 * - The lookup may fail, so there is an extra branch to check for
+	 *   failure.
+	 * */
 	extent = extent_lookup(tsdn, ptr, false);
 	if (extent == NULL)
 		return (0);
 	assert(extent_active_get(extent));
-	/* Only arena chunks should be looked up via interior pointers. */
+	/* Only slab members should be looked up via interior pointers. */
 	assert(extent_addr_get(extent) == ptr || extent_slab_get(extent));
 
 	return (isalloc(tsdn, extent, ptr));
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 7a38c91d..6721bc85 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -195,7 +195,7 @@
 
 /*
  * JEMALLOC_IVSALLOC enables ivsalloc(), which verifies that pointers reside
- * within jemalloc-owned chunks before dereferencing them.
+ * within jemalloc-owned extents before dereferencing them.
  */
 #undef JEMALLOC_IVSALLOC
 

From 751f2c332d91209c5ae3234fa444a20850067960 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 1 Jun 2016 13:40:48 -0700
Subject: [PATCH 0299/2608] Remove obsolete stats.arenas.<i>.metadata.mapped
 mallctl.

Rename stats.arenas.<i>.metadata.allocated mallctl to
stats.arenas.<i>.metadata .
---
 doc/jemalloc.xml.in                           | 25 ++++--------------
 include/jemalloc/internal/arena.h             | 18 ++++++-------
 .../jemalloc/internal/jemalloc_internal.h.in  |  8 +++---
 include/jemalloc/internal/private_symbols.txt |  6 ++---
 include/jemalloc/internal/stats.h             |  8 ++----
 src/arena.c                                   |  3 +--
 src/ctl.c                                     | 26 +++++--------------
 src/stats.c                                   | 13 +++-------
 8 files changed, 34 insertions(+), 73 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index ab90e30f..185f955a 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -2059,10 +2059,8 @@ typedef struct {
         </term>
         <listitem><para>Total number of bytes dedicated to metadata, which
         comprise base allocations used for bootstrap-sensitive internal
-        allocator data structures, arena chunk headers (see <link
-        linkend="stats.arenas.i.metadata.mapped"><mallctl>stats.arenas.&lt;i&gt;.metadata.mapped</mallctl></link>),
-        and internal allocations (see <link
-        linkend="stats.arenas.i.metadata.allocated"><mallctl>stats.arenas.&lt;i&gt;.metadata.allocated</mallctl></link>).</para></listitem>
+        allocator data structures and internal allocations (see <link
+        linkend="stats.arenas.i.metadata"><mallctl>stats.arenas.&lt;i&gt;.metadata</mallctl></link>).</para></listitem>
       </varlistentry>
 
       <varlistentry id="stats.resident">
@@ -2210,20 +2208,9 @@ typedef struct {
         details.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="stats.arenas.i.metadata.mapped">
+      <varlistentry id="stats.arenas.i.metadata">
         <term>
-          <mallctl>stats.arenas.&lt;i&gt;.metadata.mapped</mallctl>
-          (<type>size_t</type>)
-          <literal>r-</literal>
-          [<option>--enable-stats</option>]
-        </term>
-        <listitem><para>Number of mapped bytes in arena chunk headers, which
-        track the states of the non-metadata pages.</para></listitem>
-      </varlistentry>
-
-      <varlistentry id="stats.arenas.i.metadata.allocated">
-        <term>
-          <mallctl>stats.arenas.&lt;i&gt;.metadata.allocated</mallctl>
+          <mallctl>stats.arenas.&lt;i&gt;.metadata</mallctl>
           (<type>size_t</type>)
           <literal>r-</literal>
           [<option>--enable-stats</option>]
@@ -2232,9 +2219,7 @@ typedef struct {
         Internal allocations differ from application-originated allocations in
         that they are for internal use, and that they are omitted from heap
         profiles.  This statistic is reported separately from <link
-        linkend="stats.metadata"><mallctl>stats.metadata</mallctl></link> and
-        <link
-        linkend="stats.arenas.i.metadata.mapped"><mallctl>stats.arenas.&lt;i&gt;.metadata.mapped</mallctl></link>
+        linkend="stats.metadata"><mallctl>stats.metadata</mallctl></link>
         because it overlaps with e.g. the <link
         linkend="stats.allocated"><mallctl>stats.allocated</mallctl></link> and
         <link linkend="stats.active"><mallctl>stats.active</mallctl></link>
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index bc169756..fc0a755f 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -359,9 +359,9 @@ void	arena_postfork_child(tsdn_t *tsdn, arena_t *arena);
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
-void	arena_metadata_allocated_add(arena_t *arena, size_t size);
-void	arena_metadata_allocated_sub(arena_t *arena, size_t size);
-size_t	arena_metadata_allocated_get(arena_t *arena);
+void	arena_metadata_add(arena_t *arena, size_t size);
+void	arena_metadata_sub(arena_t *arena, size_t size);
+size_t	arena_metadata_get(arena_t *arena);
 bool	arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes);
 bool	arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes);
 bool	arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes);
@@ -387,24 +387,24 @@ void	arena_sdalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t size,
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
 #  ifdef JEMALLOC_ARENA_INLINE_A
 JEMALLOC_INLINE void
-arena_metadata_allocated_add(arena_t *arena, size_t size)
+arena_metadata_add(arena_t *arena, size_t size)
 {
 
-	atomic_add_z(&arena->stats.metadata_allocated, size);
+	atomic_add_z(&arena->stats.metadata, size);
 }
 
 JEMALLOC_INLINE void
-arena_metadata_allocated_sub(arena_t *arena, size_t size)
+arena_metadata_sub(arena_t *arena, size_t size)
 {
 
-	atomic_sub_z(&arena->stats.metadata_allocated, size);
+	atomic_sub_z(&arena->stats.metadata, size);
 }
 
 JEMALLOC_INLINE size_t
-arena_metadata_allocated_get(arena_t *arena)
+arena_metadata_get(arena_t *arena)
 {
 
-	return (atomic_read_z(&arena->stats.metadata_allocated));
+	return (atomic_read_z(&arena->stats.metadata));
 }
 
 JEMALLOC_INLINE bool
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 243aae6c..176487ef 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -1006,7 +1006,7 @@ iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
 
 	ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path);
 	if (config_stats && is_metadata && likely(ret != NULL)) {
-		arena_metadata_allocated_add(iaalloc(tsdn, ret), isalloc(tsdn,
+		arena_metadata_add(iaalloc(tsdn, ret), isalloc(tsdn,
 		    iealloc(tsdn, ret), ret));
 	}
 	return (ret);
@@ -1034,7 +1034,7 @@ ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
 	ret = arena_palloc(tsdn, arena, usize, alignment, zero, tcache);
 	assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret);
 	if (config_stats && is_metadata && likely(ret != NULL)) {
-		arena_metadata_allocated_add(iaalloc(tsdn, ret), isalloc(tsdn,
+		arena_metadata_add(iaalloc(tsdn, ret), isalloc(tsdn,
 		    iealloc(tsdn, ret), ret));
 	}
 	return (ret);
@@ -1088,8 +1088,8 @@ idalloctm(tsdn_t *tsdn, extent_t *extent, void *ptr, tcache_t *tcache,
 	assert(!is_metadata || tcache == NULL);
 	assert(!is_metadata || iaalloc(tsdn, ptr)->ind < narenas_auto);
 	if (config_stats && is_metadata) {
-		arena_metadata_allocated_sub(iaalloc(tsdn, ptr), isalloc(tsdn,
-		    extent, ptr));
+		arena_metadata_sub(iaalloc(tsdn, ptr), isalloc(tsdn, extent,
+		    ptr));
 	}
 
 	arena_dalloc(tsdn, extent, ptr, tcache, slow_path);
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index a2f093ee..be81d746 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -41,9 +41,9 @@ arena_lg_dirty_mult_set
 arena_malloc
 arena_malloc_hard
 arena_maybe_purge
-arena_metadata_allocated_add
-arena_metadata_allocated_get
-arena_metadata_allocated_sub
+arena_metadata_add
+arena_metadata_get
+arena_metadata_sub
 arena_migrate
 arena_new
 arena_nthreads_dec
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index 7bba57a7..d5eea8e7 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -101,12 +101,8 @@ struct arena_stats_s {
 	uint64_t	nmadvise;
 	uint64_t	purged;
 
-	/*
-	 * Number of bytes currently mapped purely for metadata purposes, and
-	 * number of bytes currently allocated for internal metadata.
-	 */
-	size_t		metadata_mapped;
-	size_t		metadata_allocated; /* Protected via atomic_*_z(). */
+	/* Number of bytes currently allocated for internal metadata. */
+	size_t		metadata; /* Protected via atomic_*_z(). */
 
 	size_t		allocated_large;
 	uint64_t	nmalloc_large;
diff --git a/src/arena.c b/src/arena.c
index 990e0e89..c77db5d0 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1686,8 +1686,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	astats->npurge += arena->stats.npurge;
 	astats->nmadvise += arena->stats.nmadvise;
 	astats->purged += arena->stats.purged;
-	astats->metadata_mapped += arena->stats.metadata_mapped;
-	astats->metadata_allocated += arena_metadata_allocated_get(arena);
+	astats->metadata += arena_metadata_get(arena);
 	astats->allocated_large += arena->stats.allocated_large;
 	astats->nmalloc_large += arena->stats.nmalloc_large;
 	astats->ndalloc_large += arena->stats.ndalloc_large;
diff --git a/src/ctl.c b/src/ctl.c
index 61f3aa1c..b91ea135 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -180,8 +180,7 @@ CTL_PROTO(stats_arenas_i_retained)
 CTL_PROTO(stats_arenas_i_npurge)
 CTL_PROTO(stats_arenas_i_nmadvise)
 CTL_PROTO(stats_arenas_i_purged)
-CTL_PROTO(stats_arenas_i_metadata_mapped)
-CTL_PROTO(stats_arenas_i_metadata_allocated)
+CTL_PROTO(stats_arenas_i_metadata)
 INDEX_PROTO(stats_arenas_i)
 CTL_PROTO(stats_cactive)
 CTL_PROTO(stats_allocated)
@@ -347,11 +346,6 @@ static const ctl_named_node_t	prof_node[] = {
 	{NAME("lg_sample"),	CTL(lg_prof_sample)}
 };
 
-static const ctl_named_node_t stats_arenas_i_metadata_node[] = {
-	{NAME("mapped"),	CTL(stats_arenas_i_metadata_mapped)},
-	{NAME("allocated"),	CTL(stats_arenas_i_metadata_allocated)}
-};
-
 static const ctl_named_node_t stats_arenas_i_small_node[] = {
 	{NAME("allocated"),	CTL(stats_arenas_i_small_allocated)},
 	{NAME("nmalloc"),	CTL(stats_arenas_i_small_nmalloc)},
@@ -411,7 +405,7 @@ static const ctl_named_node_t stats_arenas_i_node[] = {
 	{NAME("npurge"),	CTL(stats_arenas_i_npurge)},
 	{NAME("nmadvise"),	CTL(stats_arenas_i_nmadvise)},
 	{NAME("purged"),	CTL(stats_arenas_i_purged)},
-	{NAME("metadata"),	CHILD(named, stats_arenas_i_metadata)},
+	{NAME("metadata"),	CTL(stats_arenas_i_metadata)},
 	{NAME("small"),		CHILD(named, stats_arenas_i_small)},
 	{NAME("large"),		CHILD(named, stats_arenas_i_large)},
 	{NAME("bins"),		CHILD(indexed, stats_arenas_i_bins)},
@@ -522,10 +516,7 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats)
 		sstats->astats.nmadvise += astats->astats.nmadvise;
 		sstats->astats.purged += astats->astats.purged;
 
-		sstats->astats.metadata_mapped +=
-		    astats->astats.metadata_mapped;
-		sstats->astats.metadata_allocated +=
-		    astats->astats.metadata_allocated;
+		sstats->astats.metadata += astats->astats.metadata;
 
 		sstats->allocated_small += astats->allocated_small;
 		sstats->nmalloc_small += astats->nmalloc_small;
@@ -649,11 +640,8 @@ ctl_refresh(tsdn_t *tsdn)
 		ctl_stats.active =
 		    (ctl_stats.arenas[ctl_stats.narenas].pactive << LG_PAGE);
 		ctl_stats.metadata = base_allocated +
-		    ctl_stats.arenas[ctl_stats.narenas].astats.metadata_mapped +
-		    ctl_stats.arenas[ctl_stats.narenas].astats
-		    .metadata_allocated;
+		    ctl_stats.arenas[ctl_stats.narenas].astats.metadata;
 		ctl_stats.resident = base_resident +
-		    ctl_stats.arenas[ctl_stats.narenas].astats.metadata_mapped +
 		    ((ctl_stats.arenas[ctl_stats.narenas].pactive +
 		    ctl_stats.arenas[ctl_stats.narenas].pdirty) << LG_PAGE);
 		ctl_stats.mapped = base_mapped +
@@ -2001,10 +1989,8 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_nmadvise,
     ctl_stats.arenas[mib[2]].astats.nmadvise, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_purged,
     ctl_stats.arenas[mib[2]].astats.purged, uint64_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_metadata_mapped,
-    ctl_stats.arenas[mib[2]].astats.metadata_mapped, size_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_metadata_allocated,
-    ctl_stats.arenas[mib[2]].astats.metadata_allocated, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_metadata,
+    ctl_stats.arenas[mib[2]].astats.metadata, size_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_small_allocated,
     ctl_stats.arenas[mib[2]].allocated_small, size_t)
diff --git a/src/stats.c b/src/stats.c
index 493e409a..d8815855 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -211,8 +211,7 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	unsigned nthreads;
 	const char *dss;
 	ssize_t lg_dirty_mult, decay_time;
-	size_t page, pactive, pdirty, mapped, retained;
-	size_t metadata_mapped, metadata_allocated;
+	size_t page, pactive, pdirty, mapped, retained, metadata;
 	uint64_t npurge, nmadvise, purged;
 	size_t small_allocated;
 	uint64_t small_nmalloc, small_ndalloc, small_nrequests;
@@ -291,13 +290,9 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	CTL_M2_GET("stats.arenas.0.retained", i, &retained, size_t);
 	malloc_cprintf(write_cb, cbopaque,
 	    "retained:                %12zu\n", retained);
-	CTL_M2_GET("stats.arenas.0.metadata.mapped", i, &metadata_mapped,
-	    size_t);
-	CTL_M2_GET("stats.arenas.0.metadata.allocated", i, &metadata_allocated,
-	    size_t);
-	malloc_cprintf(write_cb, cbopaque,
-	    "metadata: mapped: %zu, allocated: %zu\n",
-	    metadata_mapped, metadata_allocated);
+	CTL_M2_GET("stats.arenas.0.metadata", i, &metadata, size_t);
+	malloc_cprintf(write_cb, cbopaque, "metadata:                %12zu\n",
+	    metadata);
 
 	if (bins)
 		stats_arena_bins_print(write_cb, cbopaque, i);

From a83a31c1c5976d960bf9ef98ed7d9066d3d5b0f2 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 1 Jun 2016 13:52:30 -0700
Subject: [PATCH 0300/2608] Relax opt_lg_chunk clamping constraints.

---
 src/jemalloc.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 82d2e6b3..c5fd4419 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1051,16 +1051,8 @@ malloc_conf_init(void)
 			}
 
 			CONF_HANDLE_BOOL(opt_abort, "abort", true)
-			/*
-			 * Chunks always require at least one header page and as
-			 * many as 2^(LG_SIZE_CLASS_GROUP+1) data pages.  In
-			 * order to simplify options processing, use a
-			 * conservative bound that accommodates all these
-			 * constraints.
-			 */
-			CONF_HANDLE_SIZE_T(opt_lg_chunk, "lg_chunk", LG_PAGE +
-			    LG_SIZE_CLASS_GROUP + 1, (sizeof(size_t) << 3) - 1,
-			    true)
+			CONF_HANDLE_SIZE_T(opt_lg_chunk, "lg_chunk", LG_PAGE,
+			    (sizeof(size_t) << 3) - 1, true)
 			if (strncmp("dss", k, klen) == 0) {
 				int i;
 				bool match = false;

From a43db1c6088914d1a488abb93315e858c018419b Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 1 Jun 2016 13:53:05 -0700
Subject: [PATCH 0301/2608] Relax NBINS constraint (max 255 --> max 256).

---
 include/jemalloc/internal/size_classes.sh | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh
index 38fe4902..5a57f87d 100755
--- a/include/jemalloc/internal/size_classes.sh
+++ b/include/jemalloc/internal/size_classes.sh
@@ -331,11 +331,9 @@ cat <<EOF
 #undef SIZE_CLASSES_DEFINED
 /*
  * The size2index_tab lookup table uses uint8_t to encode each bin index, so we
- * cannot support more than 256 small size classes.  Further constrain NBINS to
- * 255 since all small size classes, plus a "not small" size class must be
- * stored in 8 bits of arena_chunk_map_bits_t's bits field.
+ * cannot support more than 256 small size classes.
  */
-#if (NBINS > 255)
+#if (NBINS > 256)
 #  error "Too many small size classes"
 #endif
 

From c8c3cbdf47f97c2661395efc572b12ff2a7de05f Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 1 Jun 2016 13:53:56 -0700
Subject: [PATCH 0302/2608] Miscellaneous s/chunk/extent/ updates.

---
 doc/jemalloc.xml.in                             | 17 +++++++++--------
 .../internal/jemalloc_internal_defs.h.in        |  4 ++--
 include/jemalloc/internal/rtree.h               |  2 +-
 include/jemalloc/internal/stats.h               |  3 +--
 src/zone.c                                      |  2 +-
 test/integration/aligned_alloc.c                |  1 -
 test/integration/posix_memalign.c               |  1 -
 test/unit/mallctl.c                             |  6 +++---
 8 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 185f955a..74daf6a8 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -2087,12 +2087,12 @@ typedef struct {
           <literal>r-</literal>
           [<option>--enable-stats</option>]
         </term>
-        <listitem><para>Total number of bytes in active chunks mapped by the
-        allocator.  This is a multiple of the chunk size, and is larger than
-        <link linkend="stats.active"><mallctl>stats.active</mallctl></link>.
-        This does not include inactive chunks, even those that contain unused
-        dirty pages, which means that there is no strict ordering between this
-        and <link
+        <listitem><para>Total number of bytes in active extents mapped by the
+        allocator.  This is larger than <link
+        linkend="stats.active"><mallctl>stats.active</mallctl></link>.  This
+        does not include inactive extents, even those that contain unused dirty
+        pages, which means that there is no strict ordering between this and
+        <link
         linkend="stats.resident"><mallctl>stats.resident</mallctl></link>.</para></listitem>
       </varlistentry>
 
@@ -2737,9 +2737,10 @@ MAPPED_LIBRARIES:
     <para>To dump core whenever a problem occurs:
       <screen>ln -s 'abort:true' /etc/malloc.conf</screen>
     </para>
-    <para>To specify in the source a chunk size that is 16 MiB:
+    <para>To specify in the source that only one arena should be automatically
+    created:
       <programlisting language="C"><![CDATA[
-malloc_conf = "lg_chunk:24";]]></programlisting></para>
+malloc_conf = "narenas:1";]]></programlisting></para>
   </refsect1>
   <refsect1 id="see_also">
     <title>SEE ALSO</title>
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 6721bc85..49e2cf06 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -137,7 +137,7 @@
 #undef JEMALLOC_TCACHE
 
 /*
- * JEMALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage
+ * JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage
  * segment (DSS).
  */
 #undef JEMALLOC_DSS
@@ -176,7 +176,7 @@
 #undef JEMALLOC_MAPS_COALESCE
 
 /*
- * If defined, use munmap() to unmap freed chunks, rather than storing them for
+ * If defined, use munmap() to unmap freed extents, rather than storing them for
  * later reuse.  This is disabled by default on Linux because common sequences
  * of mmap()/munmap() calls will cause virtual memory map holes.
  */
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index e62ab6b9..af52f9ff 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -1,6 +1,6 @@
 /*
  * This radix tree implementation is tailored to the singular purpose of
- * associating metadata with chunks that are currently owned by jemalloc.
+ * associating metadata with extents that are currently owned by jemalloc.
  *
  *******************************************************************************
  */
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index d5eea8e7..da019605 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -5,7 +5,6 @@ typedef struct tcache_bin_stats_s tcache_bin_stats_t;
 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
 typedef struct malloc_large_stats_s malloc_large_stats_t;
 typedef struct arena_stats_s arena_stats_t;
-typedef struct chunk_stats_s chunk_stats_t;
 
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
@@ -76,7 +75,7 @@ struct malloc_large_stats_s {
 	 */
 	uint64_t	nrequests;
 
-	/* Current number of (multi-)chunk allocations of this size class. */
+	/* Current number of allocations of this size class. */
 	size_t		curlextents;
 };
 
diff --git a/src/zone.c b/src/zone.c
index 4609503a..ca235da4 100644
--- a/src/zone.c
+++ b/src/zone.c
@@ -54,7 +54,7 @@ zone_size(malloc_zone_t *zone, void *ptr)
 	 * our zone into two parts, and use one as the default allocator and
 	 * the other as the default deallocator/reallocator.  Since that will
 	 * not work in practice, we must check all pointers to assure that they
-	 * reside within a mapped chunk before determining size.
+	 * reside within a mapped extent before determining size.
 	 */
 	return (ivsalloc(tsdn_fetch(), ptr));
 }
diff --git a/test/integration/aligned_alloc.c b/test/integration/aligned_alloc.c
index 60900148..ec2f5a7b 100644
--- a/test/integration/aligned_alloc.c
+++ b/test/integration/aligned_alloc.c
@@ -1,6 +1,5 @@
 #include "test/jemalloc_test.h"
 
-#define	CHUNK 0x400000
 /* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */
 #define	MAXALIGN ((size_t)0x2000000LU)
 #define	NITER 4
diff --git a/test/integration/posix_memalign.c b/test/integration/posix_memalign.c
index 19741c6c..d5e39b63 100644
--- a/test/integration/posix_memalign.c
+++ b/test/integration/posix_memalign.c
@@ -1,6 +1,5 @@
 #include "test/jemalloc_test.h"
 
-#define	CHUNK 0x400000
 /* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */
 #define	MAXALIGN ((size_t)0x2000000LU)
 #define	NITER 4
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 8eb5a60c..1954bfc5 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -625,7 +625,7 @@ TEST_END
 TEST_BEGIN(test_arenas_lextent_constants)
 {
 
-#define	TEST_ARENAS_HCHUNK_CONSTANT(t, name, expected) do {		\
+#define	TEST_ARENAS_LEXTENT_CONSTANT(t, name, expected) do {		\
 	t name;								\
 	size_t sz = sizeof(t);						\
 	assert_d_eq(mallctl("arenas.lextent.0."#name, &name, &sz, NULL,	\
@@ -633,9 +633,9 @@ TEST_BEGIN(test_arenas_lextent_constants)
 	assert_zu_eq(name, expected, "Incorrect "#name" size");		\
 } while (0)
 
-	TEST_ARENAS_HCHUNK_CONSTANT(size_t, size, LARGE_MINCLASS);
+	TEST_ARENAS_LEXTENT_CONSTANT(size_t, size, LARGE_MINCLASS);
 
-#undef TEST_ARENAS_HCHUNK_CONSTANT
+#undef TEST_ARENAS_LEXTENT_CONSTANT
 }
 TEST_END
 

From b14fdaaca0f3a8affac1bef468c3e446c56abbe2 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 1 Jun 2016 16:17:31 -0700
Subject: [PATCH 0303/2608] Add a missing prof_alloc_rollback() call.

In the case where prof_alloc_prep() is called with an over-estimate of
allocation size, and sampling doesn't end up being triggered, the tctx
must be discarded.
---
 include/jemalloc/internal/prof.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h
index 8fdc27f6..0fdee08c 100644
--- a/include/jemalloc/internal/prof.h
+++ b/include/jemalloc/internal/prof.h
@@ -520,6 +520,7 @@ prof_realloc(tsd_t *tsd, extent_t *extent, const void *ptr, size_t usize,
 			 * though its actual usize was insufficient to cross the
 			 * sample threshold.
 			 */
+			prof_alloc_rollback(tsd, tctx, true);
 			tctx = (prof_tctx_t *)(uintptr_t)1U;
 		}
 	}

From 819417580e60f063d2a0dc83044a1b4d8994e50a Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 1 Jun 2016 16:19:22 -0700
Subject: [PATCH 0304/2608] Fix rallocx() sampling code to not eagerly commit
 sampler update.

rallocx() for an alignment-constrained request may end up with a
smaller-than-worst-case size if in-place reallocation succeeds due to
serendipitous alignment.  In such cases, sampling may not happen.
---
 src/jemalloc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index c5fd4419..2d33464c 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2155,7 +2155,7 @@ irallocx_prof(tsd_t *tsd, extent_t *old_extent, void *old_ptr, size_t old_usize,
 
 	prof_active = prof_active_get_unlocked();
 	old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_extent, old_ptr);
-	tctx = prof_alloc_prep(tsd, *usize, prof_active, true);
+	tctx = prof_alloc_prep(tsd, *usize, prof_active, false);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
 		p = irallocx_prof_sample(tsd_tsdn(tsd), old_extent, old_ptr,
 		    old_usize, *usize, alignment, zero, tcache, arena, tctx);
@@ -2164,7 +2164,7 @@ irallocx_prof(tsd_t *tsd, extent_t *old_extent, void *old_ptr, size_t old_usize,
 		    size, alignment, zero, tcache, arena);
 	}
 	if (unlikely(p == NULL)) {
-		prof_alloc_rollback(tsd, tctx, true);
+		prof_alloc_rollback(tsd, tctx, false);
 		return (NULL);
 	}
 
@@ -2181,7 +2181,7 @@ irallocx_prof(tsd_t *tsd, extent_t *old_extent, void *old_ptr, size_t old_usize,
 		*usize = isalloc(tsd_tsdn(tsd), extent, p);
 	} else
 		extent = iealloc(tsd_tsdn(tsd), p);
-	prof_realloc(tsd, extent, p, *usize, tctx, prof_active, true,
+	prof_realloc(tsd, extent, p, *usize, tctx, prof_active, false,
 	    old_extent, old_ptr, old_usize, old_tctx);
 
 	return (p);

From 37f0e346064a201f9938650ecf7e359cb260d079 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 1 Jun 2016 16:23:31 -0700
Subject: [PATCH 0305/2608] Reduce NSZS, since NSIZES (was nsizes) can not be
 so large.

---
 test/integration/rallocx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c
index 6278a490..030fb479 100644
--- a/test/integration/rallocx.c
+++ b/test/integration/rallocx.c
@@ -52,7 +52,7 @@ TEST_BEGIN(test_grow_and_shrink)
 	size_t tsz;
 #define	NCYCLES 3
 	unsigned i, j;
-#define	NSZS 2500
+#define	NSZS 1024
 	size_t szs[NSZS];
 #define	MAXSZ ZU(12 * 1024 * 1024)
 

From e28b43a739b841c0ca7b0c0e3faf46e40bfc4616 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 1 Jun 2016 16:24:49 -0700
Subject: [PATCH 0306/2608] Remove some unnecessary locking.

---
 src/large.c | 22 ++--------------------
 1 file changed, 2 insertions(+), 20 deletions(-)

diff --git a/src/large.c b/src/large.c
index 60a0745e..952d4644 100644
--- a/src/large.c
+++ b/src/large.c
@@ -310,40 +310,22 @@ large_dalloc(tsdn_t *tsdn, extent_t *extent)
 size_t
 large_salloc(tsdn_t *tsdn, const extent_t *extent)
 {
-	size_t usize;
-	arena_t *arena;
 
-	arena = extent_arena_get(extent);
-	malloc_mutex_lock(tsdn, &arena->large_mtx);
-	usize = extent_usize_get(extent);
-	malloc_mutex_unlock(tsdn, &arena->large_mtx);
-
-	return (usize);
+	return (extent_usize_get(extent));
 }
 
 prof_tctx_t *
 large_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent)
 {
-	prof_tctx_t *tctx;
-	arena_t *arena;
 
-	arena = extent_arena_get(extent);
-	malloc_mutex_lock(tsdn, &arena->large_mtx);
-	tctx = extent_prof_tctx_get(extent);
-	malloc_mutex_unlock(tsdn, &arena->large_mtx);
-
-	return (tctx);
+	return (extent_prof_tctx_get(extent));
 }
 
 void
 large_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, prof_tctx_t *tctx)
 {
-	arena_t *arena;
 
-	arena = extent_arena_get(extent);
-	malloc_mutex_lock(tsdn, &arena->large_mtx);
 	extent_prof_tctx_set(extent, tctx);
-	malloc_mutex_unlock(tsdn, &arena->large_mtx);
 }
 
 void

From 7be2ebc23f0f145e095e7230d7d8a202b8dcc55e Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 2 Jun 2016 11:11:35 -0700
Subject: [PATCH 0307/2608] Make tsd cleanup functions optional, remove noop
 cleanup functions.

---
 .../jemalloc/internal/jemalloc_internal.h.in  |  4 ---
 include/jemalloc/internal/private_symbols.txt |  7 ----
 include/jemalloc/internal/rtree.h             |  1 -
 include/jemalloc/internal/tcache.h            |  1 -
 include/jemalloc/internal/tsd.h               | 34 +++++++++----------
 include/jemalloc/internal/witness.h           |  1 -
 src/jemalloc.c                                | 28 ---------------
 src/rtree.c                                   |  7 ----
 src/tcache.c                                  |  7 ----
 src/tsd.c                                     |  7 +++-
 src/witness.c                                 |  7 ----
 11 files changed, 23 insertions(+), 81 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 176487ef..5b809bfa 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -455,13 +455,9 @@ arena_t	*arena_init(tsdn_t *tsdn, unsigned ind);
 arena_tdata_t	*arena_tdata_get_hard(tsd_t *tsd, unsigned ind);
 arena_t	*arena_choose_hard(tsd_t *tsd, bool internal);
 void	arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind);
-void	thread_allocated_cleanup(tsd_t *tsd);
-void	thread_deallocated_cleanup(tsd_t *tsd);
 void	iarena_cleanup(tsd_t *tsd);
 void	arena_cleanup(tsd_t *tsd);
 void	arenas_tdata_cleanup(tsd_t *tsd);
-void	narenas_tdata_cleanup(tsd_t *tsd);
-void	arenas_tdata_bypass_cleanup(tsd_t *tsd);
 void	jemalloc_prefork(void);
 void	jemalloc_postfork_parent(void);
 void	jemalloc_postfork_child(void);
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index be81d746..d4e5525f 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -74,7 +74,6 @@ arena_tcache_fill_small
 arena_tdata_get
 arena_tdata_get_hard
 arenas
-arenas_tdata_bypass_cleanup
 arenas_tdata_cleanup
 atomic_add_p
 atomic_add_u
@@ -285,7 +284,6 @@ malloc_vsnprintf
 malloc_write
 mb_write
 narenas_auto
-narenas_tdata_cleanup
 narenas_total_get
 ncpus
 nhbins
@@ -410,7 +408,6 @@ rtree_elm_release
 rtree_elm_witness_access
 rtree_elm_witness_acquire
 rtree_elm_witness_release
-rtree_elm_witnesses_cleanup
 rtree_elm_write
 rtree_elm_write_acquired
 rtree_new
@@ -451,7 +448,6 @@ tcache_cleanup
 tcache_create
 tcache_dalloc_large
 tcache_dalloc_small
-tcache_enabled_cleanup
 tcache_enabled_get
 tcache_enabled_set
 tcache_event
@@ -467,8 +463,6 @@ tcaches_create
 tcaches_destroy
 tcaches_flush
 tcaches_get
-thread_allocated_cleanup
-thread_deallocated_cleanup
 ticker_copy
 ticker_init
 ticker_read
@@ -539,7 +533,6 @@ tsdn_tsd
 witness_assert_lockless
 witness_assert_not_owner
 witness_assert_owner
-witness_fork_cleanup
 witness_init
 witness_lock
 witness_lock_error
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index af52f9ff..a47a79ea 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -137,7 +137,6 @@ void	rtree_elm_witness_access(tsdn_t *tsdn, const rtree_t *rtree,
     const rtree_elm_t *elm);
 void	rtree_elm_witness_release(tsdn_t *tsdn, const rtree_t *rtree,
     const rtree_elm_t *elm);
-void	rtree_elm_witnesses_cleanup(tsd_t *tsd);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h
index e7606d6e..933255ce 100644
--- a/include/jemalloc/internal/tcache.h
+++ b/include/jemalloc/internal/tcache.h
@@ -143,7 +143,6 @@ void	tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache,
 tcache_t *tcache_get_hard(tsd_t *tsd);
 tcache_t *tcache_create(tsdn_t *tsdn, arena_t *arena);
 void	tcache_cleanup(tsd_t *tsd);
-void	tcache_enabled_cleanup(tsd_t *tsd);
 void	tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
 bool	tcaches_create(tsdn_t *tsdn, unsigned *r_ind);
 void	tcaches_flush(tsd_t *tsd, unsigned ind);
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index ca8915ea..988edf56 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -561,20 +561,20 @@ struct tsd_init_head_s {
 #endif
 
 #define	MALLOC_TSD							\
-/*  O(name,			type) */				\
-    O(tcache,			tcache_t *)				\
-    O(thread_allocated,		uint64_t)				\
-    O(thread_deallocated,	uint64_t)				\
-    O(prof_tdata,		prof_tdata_t *)				\
-    O(iarena,			arena_t *)				\
-    O(arena,			arena_t *)				\
-    O(arenas_tdata,		arena_tdata_t *)			\
-    O(narenas_tdata,		unsigned)				\
-    O(arenas_tdata_bypass,	bool)					\
-    O(tcache_enabled,		tcache_enabled_t)			\
-    O(witnesses,		witness_list_t)				\
-    O(rtree_elm_witnesses,	rtree_elm_witness_tsd_t)		\
-    O(witness_fork,		bool)					\
+/*  O(name,			type,			cleanup) */	\
+    O(tcache,			tcache_t *,		yes)		\
+    O(thread_allocated,		uint64_t,		no)		\
+    O(thread_deallocated,	uint64_t,		no)		\
+    O(prof_tdata,		prof_tdata_t *,		yes)		\
+    O(iarena,			arena_t *,		yes)		\
+    O(arena,			arena_t *,		yes)		\
+    O(arenas_tdata,		arena_tdata_t *,	yes)		\
+    O(narenas_tdata,		unsigned,		no)		\
+    O(arenas_tdata_bypass,	bool,			no)		\
+    O(tcache_enabled,		tcache_enabled_t,	no)		\
+    O(witnesses,		witness_list_t,		yes)		\
+    O(rtree_elm_witnesses,	rtree_elm_witness_tsd_t,no)		\
+    O(witness_fork,		bool,			no)		\
 
 #define	TSD_INITIALIZER {						\
     tsd_state_uninitialized,						\
@@ -595,7 +595,7 @@ struct tsd_init_head_s {
 
 struct tsd_s {
 	tsd_state_t	state;
-#define	O(n, t)								\
+#define	O(n, t, c)							\
 	t		n;
 MALLOC_TSD
 #undef O
@@ -642,7 +642,7 @@ malloc_tsd_protos(JEMALLOC_ATTR(unused), , tsd_t)
 tsd_t	*tsd_fetch(void);
 tsdn_t	*tsd_tsdn(tsd_t *tsd);
 bool	tsd_nominal(tsd_t *tsd);
-#define	O(n, t)								\
+#define	O(n, t, c)							\
 t	*tsd_##n##p_get(tsd_t *tsd);					\
 t	tsd_##n##_get(tsd_t *tsd);					\
 void	tsd_##n##_set(tsd_t *tsd, t n);
@@ -691,7 +691,7 @@ tsd_nominal(tsd_t *tsd)
 	return (tsd->state == tsd_state_nominal);
 }
 
-#define	O(n, t)								\
+#define	O(n, t, c)							\
 JEMALLOC_ALWAYS_INLINE t *						\
 tsd_##n##p_get(tsd_t *tsd)						\
 {									\
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index e2f85634..9a2a6760 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -103,7 +103,6 @@ void	witness_lockless_error(const witness_list_t *witnesses);
 #endif
 
 void	witnesses_cleanup(tsd_t *tsd);
-void	witness_fork_cleanup(tsd_t *tsd);
 void	witness_prefork(tsd_t *tsd);
 void	witness_postfork_parent(tsd_t *tsd);
 void	witness_postfork_child(tsd_t *tsd);
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 2d33464c..10074013 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -644,20 +644,6 @@ arena_choose_hard(tsd_t *tsd, bool internal)
 	return (ret);
 }
 
-void
-thread_allocated_cleanup(tsd_t *tsd)
-{
-
-	/* Do nothing. */
-}
-
-void
-thread_deallocated_cleanup(tsd_t *tsd)
-{
-
-	/* Do nothing. */
-}
-
 void
 iarena_cleanup(tsd_t *tsd)
 {
@@ -693,20 +679,6 @@ arenas_tdata_cleanup(tsd_t *tsd)
 	}
 }
 
-void
-narenas_tdata_cleanup(tsd_t *tsd)
-{
-
-	/* Do nothing. */
-}
-
-void
-arenas_tdata_bypass_cleanup(tsd_t *tsd)
-{
-
-	/* Do nothing. */
-}
-
 static void
 stats_print_atexit(void)
 {
diff --git a/src/rtree.c b/src/rtree.c
index 504f9f2e..b6027303 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -285,10 +285,3 @@ rtree_elm_witness_release(tsdn_t *tsdn, const rtree_t *rtree,
 	witness_unlock(tsdn, witness);
 	rtree_elm_witness_dalloc(tsdn_tsd(tsdn), witness, elm);
 }
-
-void
-rtree_elm_witnesses_cleanup(tsd_t *tsd)
-{
-
-	/* Do nothing. */
-}
diff --git a/src/tcache.c b/src/tcache.c
index 69444fac..96e54e1a 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -404,13 +404,6 @@ tcache_cleanup(tsd_t *tsd)
 	}
 }
 
-void
-tcache_enabled_cleanup(tsd_t *tsd)
-{
-
-	/* Do nothing. */
-}
-
 void
 tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
 {
diff --git a/src/tsd.c b/src/tsd.c
index ec69a51c..5d9fc9f9 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -77,9 +77,14 @@ tsd_cleanup(void *arg)
 		/* Do nothing. */
 		break;
 	case tsd_state_nominal:
-#define	O(n, t)								\
+#define	MALLOC_TSD_cleanup_yes(n, t)					\
 		n##_cleanup(tsd);
+#define	MALLOC_TSD_cleanup_no(n, t)
+#define	O(n, t, c)							\
+		MALLOC_TSD_cleanup_##c(n, t)
 MALLOC_TSD
+#undef MALLOC_TSD_cleanup_yes
+#undef MALLOC_TSD_cleanup_no
 #undef O
 		tsd->state = tsd_state_purgatory;
 		tsd_set(tsd);
diff --git a/src/witness.c b/src/witness.c
index 8efff56d..0f5c0d73 100644
--- a/src/witness.c
+++ b/src/witness.c
@@ -103,13 +103,6 @@ witnesses_cleanup(tsd_t *tsd)
 	/* Do nothing. */
 }
 
-void
-witness_fork_cleanup(tsd_t *tsd)
-{
-
-	/* Do nothing. */
-}
-
 void
 witness_prefork(tsd_t *tsd)
 {

From 6f29a8392403f70bfa1080964a65540b6f3699fe Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 2 Jun 2016 18:43:10 -0700
Subject: [PATCH 0308/2608] Add rtree lookup path caching.

rtree-based extent lookups remain more expensive than chunk-based run
lookups, but with this optimization the fast path slowdown is ~3 CPU
cycles per metadata lookup (on Intel Core i7-4980HQ), versus ~11 cycles
prior.  The path caching speedup tends to degrade gracefully unless
allocated memory is spread far apart (as is the case when using a
mixture of sbrk() and mmap()).
---
 include/jemalloc/internal/extent.h            |   5 +-
 include/jemalloc/internal/private_symbols.txt |   5 +
 include/jemalloc/internal/rtree.h             | 181 ++++++++++++++----
 include/jemalloc/internal/tsd.h               |  19 ++
 src/extent.c                                  |  65 ++++---
 src/rtree.c                                   |   3 +-
 test/unit/rtree.c                             |  83 ++++----
 7 files changed, 267 insertions(+), 94 deletions(-)

diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index a41a15ff..8b8dbe80 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -181,8 +181,11 @@ void	extent_ring_remove(extent_t *extent);
 JEMALLOC_INLINE extent_t *
 extent_lookup(tsdn_t *tsdn, const void *ptr, bool dependent)
 {
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	return (rtree_read(tsdn, &extents_rtree, (uintptr_t)ptr, dependent));
+	return (rtree_read(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr,
+	    dependent));
 }
 
 JEMALLOC_INLINE arena_t *
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index d4e5525f..07e7f287 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -399,6 +399,7 @@ rtree_child_read
 rtree_child_read_hard
 rtree_child_tryread
 rtree_clear
+rtree_ctx_start_level
 rtree_delete
 rtree_elm_acquire
 rtree_elm_lookup
@@ -502,6 +503,9 @@ tsd_nominal
 tsd_prof_tdata_get
 tsd_prof_tdata_set
 tsd_prof_tdatap_get
+tsd_rtree_ctx_get
+tsd_rtree_ctx_set
+tsd_rtree_ctxp_get
 tsd_rtree_elm_witnesses_get
 tsd_rtree_elm_witnesses_set
 tsd_rtree_elm_witnessesp_get
@@ -529,6 +533,7 @@ tsd_witnesses_set
 tsd_witnessesp_get
 tsdn_fetch
 tsdn_null
+tsdn_rtree_ctx
 tsdn_tsd
 witness_assert_lockless
 witness_assert_not_owner
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index a47a79ea..fc88dfec 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -10,6 +10,7 @@ typedef struct rtree_elm_s rtree_elm_t;
 typedef struct rtree_elm_witness_s rtree_elm_witness_t;
 typedef struct rtree_elm_witness_tsd_s rtree_elm_witness_tsd_t;
 typedef struct rtree_level_s rtree_level_t;
+typedef struct rtree_ctx_s rtree_ctx_t;
 typedef struct rtree_s rtree_t;
 
 /*
@@ -25,6 +26,13 @@ typedef struct rtree_s rtree_t;
 /* Used for two-stage lock-free node initialization. */
 #define	RTREE_NODE_INITIALIZING	((rtree_elm_t *)0x1)
 
+#define	RTREE_CTX_INITIALIZER	{					\
+	false,								\
+	0,								\
+	0,								\
+	{NULL /* C initializes all trailing elements to NULL. */}	\
+}
+
 /*
  * Maximum number of concurrently acquired elements per thread.  This controls
  * how many witness_t structures are embedded in tsd.  Ideally rtree_elm_t would
@@ -78,9 +86,9 @@ struct rtree_level_s {
 	 *
 	 * Suppose keys comprise 48 bits, and LG_RTREE_BITS_PER_LEVEL is 4.
 	 * This results in a 3-level tree, and the leftmost leaf can be directly
-	 * accessed via subtrees[2], the subtree prefixed by 0x0000 (excluding
-	 * 0x00000000) can be accessed via subtrees[1], and the remainder of the
-	 * tree can be accessed via subtrees[0].
+	 * accessed via levels[2], the subtree prefixed by 0x0000 (excluding
+	 * 0x00000000) can be accessed via levels[1], and the remainder of the
+	 * tree can be accessed via levels[0].
 	 *
 	 *   levels[0] : [<unused> | 0x0001******** | 0x0002******** | ...]
 	 *
@@ -90,7 +98,7 @@ struct rtree_level_s {
 	 *
 	 * This has practical implications on x64, which currently uses only the
 	 * lower 47 bits of virtual address space in userland, thus leaving
-	 * subtrees[0] unused and avoiding a level of tree traversal.
+	 * levels[0] unused and avoiding a level of tree traversal.
 	 */
 	union {
 		void		*subtree_pun;
@@ -105,13 +113,31 @@ struct rtree_level_s {
 	unsigned		cumbits;
 };
 
+struct rtree_ctx_s {
+	/* If false, key/elms have not yet been initialized by a lookup. */
+	bool		valid;
+	/* Key that corresponds to the tree path recorded in elms. */
+	uintptr_t	key;
+	/* Memoized rtree_start_level(key). */
+	unsigned	start_level;
+	/*
+	 * A path through rtree, driven by key.  Only elements that could
+	 * actually be used for subsequent lookups are initialized, i.e. if
+	 * start_level = rtree_start_level(key) is non-zero, the first
+	 * start_level elements are uninitialized.  The last element contains a
+	 * pointer to the leaf node element that corresponds to key, so that
+	 * exact matches require no tree node offset computation.
+	 */
+	rtree_elm_t	*elms[RTREE_HEIGHT_MAX + 1];
+};
+
 struct rtree_s {
 	unsigned		height;
 	/*
 	 * Precomputed table used to convert from the number of leading 0 key
 	 * bits to which subtree level to start at.
 	 */
-	unsigned		start_level[RTREE_HEIGHT_MAX];
+	unsigned		start_level[RTREE_HEIGHT_MAX + 1];
 	rtree_level_t		levels[RTREE_HEIGHT_MAX];
 };
 
@@ -143,7 +169,9 @@ void	rtree_elm_witness_release(tsdn_t *tsdn, const rtree_t *rtree,
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
-unsigned	rtree_start_level(rtree_t *rtree, uintptr_t key);
+unsigned	rtree_start_level(const rtree_t *rtree, uintptr_t key);
+unsigned	rtree_ctx_start_level(const rtree_t *rtree,
+    const rtree_ctx_t *rtree_ctx, uintptr_t key);
 uintptr_t	rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level);
 
 bool	rtree_node_valid(rtree_elm_t *node);
@@ -156,33 +184,55 @@ rtree_elm_t	*rtree_subtree_tryread(rtree_t *rtree, unsigned level,
     bool dependent);
 rtree_elm_t	*rtree_subtree_read(tsdn_t *tsdn, rtree_t *rtree,
     unsigned level, bool dependent);
-rtree_elm_t	*rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, uintptr_t key,
-    bool dependent, bool init_missing);
+rtree_elm_t	*rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, bool init_missing);
 
-bool	rtree_write(tsdn_t *tsdn, rtree_t *rtree, uintptr_t key,
-    const extent_t *extent);
-extent_t	*rtree_read(tsdn_t *tsdn, rtree_t *rtree, uintptr_t key,
-    bool dependent);
-rtree_elm_t	*rtree_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, uintptr_t key,
-    bool dependent, bool init_missing);
+bool	rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+    uintptr_t key, const extent_t *extent);
+extent_t	*rtree_read(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent);
+rtree_elm_t	*rtree_elm_acquire(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, bool init_missing);
 extent_t	*rtree_elm_read_acquired(tsdn_t *tsdn, const rtree_t *rtree,
     rtree_elm_t *elm);
 void	rtree_elm_write_acquired(tsdn_t *tsdn, const rtree_t *rtree,
     rtree_elm_t *elm, const extent_t *extent);
 void	rtree_elm_release(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm);
-void	rtree_clear(tsdn_t *tsdn, rtree_t *rtree, uintptr_t key);
+void	rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+    uintptr_t key);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_))
 JEMALLOC_ALWAYS_INLINE unsigned
-rtree_start_level(rtree_t *rtree, uintptr_t key)
+rtree_start_level(const rtree_t *rtree, uintptr_t key)
 {
 	unsigned start_level;
 
 	if (unlikely(key == 0))
 		return (rtree->height - 1);
 
-	start_level = rtree->start_level[lg_floor(key) >>
+	start_level = rtree->start_level[(lg_floor(key) + 1) >>
+	    LG_RTREE_BITS_PER_LEVEL];
+	assert(start_level < rtree->height);
+	return (start_level);
+}
+
+JEMALLOC_ALWAYS_INLINE unsigned
+rtree_ctx_start_level(const rtree_t *rtree, const rtree_ctx_t *rtree_ctx,
+    uintptr_t key)
+{
+	unsigned start_level;
+	uintptr_t key_diff;
+
+	/* Compute the difference between old and new lookup keys. */
+	key_diff = key ^ rtree_ctx->key;
+	assert(key_diff != 0); /* Handled in rtree_elm_lookup(). */
+
+	/*
+	 * Compute the last traversal path element at which the keys' paths
+	 * are the same.
+	 */
+	start_level = rtree->start_level[(lg_floor(key_diff) + 1) >>
 	    LG_RTREE_BITS_PER_LEVEL];
 	assert(start_level < rtree->height);
 	return (start_level);
@@ -291,8 +341,8 @@ rtree_subtree_read(tsdn_t *tsdn, rtree_t *rtree, unsigned level, bool dependent)
 }
 
 JEMALLOC_ALWAYS_INLINE rtree_elm_t *
-rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, uintptr_t key, bool dependent,
-    bool init_missing)
+rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+    uintptr_t key, bool dependent, bool init_missing)
 {
 	uintptr_t subkey;
 	unsigned start_level;
@@ -300,35 +350,95 @@ rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, uintptr_t key, bool dependent,
 
 	assert(!dependent || !init_missing);
 
-	start_level = rtree_start_level(rtree, key);
+	if (dependent || init_missing) {
+		if (likely(rtree_ctx->valid)) {
+			if (key == rtree_ctx->key)
+				return (rtree_ctx->elms[rtree->height]);
+			else {
+				unsigned no_ctx_start_level =
+				    rtree_start_level(rtree, key);
+				unsigned ctx_start_level;
+
+				if (likely(no_ctx_start_level <=
+				    rtree_ctx->start_level && (ctx_start_level =
+				    rtree_ctx_start_level(rtree, rtree_ctx,
+				    key)) >= rtree_ctx->start_level)) {
+					start_level = ctx_start_level;
+					node = rtree_ctx->elms[ctx_start_level];
+				} else {
+					start_level = no_ctx_start_level;
+					node = init_missing ?
+					    rtree_subtree_read(tsdn, rtree,
+					    no_ctx_start_level, dependent) :
+					    rtree_subtree_tryread(rtree,
+					    no_ctx_start_level, dependent);
+					rtree_ctx->start_level =
+					    no_ctx_start_level;
+					rtree_ctx->elms[no_ctx_start_level] =
+					    node;
+				}
+			}
+		} else {
+			unsigned no_ctx_start_level = rtree_start_level(rtree,
+			    key);
+
+			start_level = no_ctx_start_level;
+			node = init_missing ? rtree_subtree_read(tsdn, rtree,
+			    no_ctx_start_level, dependent) :
+			    rtree_subtree_tryread(rtree, no_ctx_start_level,
+			    dependent);
+			rtree_ctx->valid = true;
+			rtree_ctx->start_level = no_ctx_start_level;
+			rtree_ctx->elms[no_ctx_start_level] = node;
+		}
+		rtree_ctx->key = key;
+	} else {
+		start_level = rtree_start_level(rtree, key);
+		node = init_missing ? rtree_subtree_read(tsdn, rtree,
+		    start_level, dependent) : rtree_subtree_tryread(rtree,
+		    start_level, dependent);
+	}
 
-	node = init_missing ? rtree_subtree_read(tsdn, rtree, start_level,
-	    dependent) : rtree_subtree_tryread(rtree, start_level, dependent);
 #define	RTREE_GET_BIAS	(RTREE_HEIGHT_MAX - rtree->height)
 	switch (start_level + RTREE_GET_BIAS) {
 #define	RTREE_GET_SUBTREE(level)					\
 	case level:							\
 		assert(level < (RTREE_HEIGHT_MAX-1));			\
-		if (!dependent && unlikely(!rtree_node_valid(node)))	\
+		if (!dependent && unlikely(!rtree_node_valid(node))) {	\
+			if (init_missing)				\
+				rtree_ctx->valid = false;		\
 			return (NULL);					\
+		}							\
 		subkey = rtree_subkey(rtree, key, level -		\
 		    RTREE_GET_BIAS);					\
 		node = init_missing ? rtree_child_read(tsdn, rtree,	\
 		    &node[subkey], level - RTREE_GET_BIAS, dependent) :	\
 		    rtree_child_tryread(&node[subkey], dependent);	\
+		if (dependent || init_missing) {			\
+			rtree_ctx->elms[level - RTREE_GET_BIAS + 1] =	\
+			    node;					\
+		}							\
 		/* Fall through. */
 #define	RTREE_GET_LEAF(level)						\
 	case level:							\
 		assert(level == (RTREE_HEIGHT_MAX-1));			\
-		if (!dependent && unlikely(!rtree_node_valid(node)))	\
+		if (!dependent && unlikely(!rtree_node_valid(node))) {	\
+			if (init_missing)				\
+				rtree_ctx->valid = false;		\
 			return (NULL);					\
+		}							\
 		subkey = rtree_subkey(rtree, key, level -		\
 		    RTREE_GET_BIAS);					\
 		/*							\
 		 * node is a leaf, so it contains values rather than	\
 		 * child pointers.					\
 		 */							\
-		return (&node[subkey]);
+		node = &node[subkey];					\
+		if (dependent || init_missing) {			\
+			rtree_ctx->elms[level - RTREE_GET_BIAS + 1] =	\
+			    node;					\
+		}							\
+		return (node);
 #if RTREE_HEIGHT_MAX > 1
 	RTREE_GET_SUBTREE(0)
 #endif
@@ -387,14 +497,15 @@ rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, uintptr_t key, bool dependent,
 }
 
 JEMALLOC_INLINE bool
-rtree_write(tsdn_t *tsdn, rtree_t *rtree, uintptr_t key, const extent_t *extent)
+rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
+    const extent_t *extent)
 {
 	rtree_elm_t *elm;
 
 	assert(extent != NULL); /* Use rtree_clear() for this case. */
 	assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0);
 
-	elm = rtree_elm_lookup(tsdn, rtree, key, false, true);
+	elm = rtree_elm_lookup(tsdn, rtree, rtree_ctx, key, false, true);
 	if (elm == NULL)
 		return (true);
 	assert(rtree_elm_read(elm, false) == NULL);
@@ -404,11 +515,12 @@ rtree_write(tsdn_t *tsdn, rtree_t *rtree, uintptr_t key, const extent_t *extent)
 }
 
 JEMALLOC_ALWAYS_INLINE extent_t *
-rtree_read(tsdn_t *tsdn, rtree_t *rtree, uintptr_t key, bool dependent)
+rtree_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
+    bool dependent)
 {
 	rtree_elm_t *elm;
 
-	elm = rtree_elm_lookup(tsdn, rtree, key, dependent, false);
+	elm = rtree_elm_lookup(tsdn, rtree, rtree_ctx, key, dependent, false);
 	if (elm == NULL)
 		return (NULL);
 
@@ -416,12 +528,13 @@ rtree_read(tsdn_t *tsdn, rtree_t *rtree, uintptr_t key, bool dependent)
 }
 
 JEMALLOC_INLINE rtree_elm_t *
-rtree_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, uintptr_t key, bool dependent,
-    bool init_missing)
+rtree_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+    uintptr_t key, bool dependent, bool init_missing)
 {
 	rtree_elm_t *elm;
 
-	elm = rtree_elm_lookup(tsdn, rtree, key, dependent, init_missing);
+	elm = rtree_elm_lookup(tsdn, rtree, rtree_ctx, key, dependent,
+	    init_missing);
 	if (!dependent && elm == NULL)
 		return (NULL);
 	{
@@ -481,11 +594,11 @@ rtree_elm_release(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm)
 }
 
 JEMALLOC_INLINE void
-rtree_clear(tsdn_t *tsdn, rtree_t *rtree, uintptr_t key)
+rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key)
 {
 	rtree_elm_t *elm;
 
-	elm = rtree_elm_acquire(tsdn, rtree, key, true, false);
+	elm = rtree_elm_acquire(tsdn, rtree, rtree_ctx, key, true, false);
 	rtree_elm_write_acquired(tsdn, rtree, elm, NULL);
 	rtree_elm_release(tsdn, rtree, elm);
 }
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 988edf56..2355f9c6 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -572,6 +572,7 @@ struct tsd_init_head_s {
     O(narenas_tdata,		unsigned,		no)		\
     O(arenas_tdata_bypass,	bool,			no)		\
     O(tcache_enabled,		tcache_enabled_t,	no)		\
+    O(rtree_ctx,		rtree_ctx_t,		no)		\
     O(witnesses,		witness_list_t,		yes)		\
     O(rtree_elm_witnesses,	rtree_elm_witness_tsd_t,no)		\
     O(witness_fork,		bool,			no)		\
@@ -588,6 +589,7 @@ struct tsd_init_head_s {
     0,									\
     false,								\
     tcache_enabled_default,						\
+    RTREE_CTX_INITIALIZER,						\
     ql_head_initializer(witnesses),					\
     RTREE_ELM_WITNESS_TSD_INITIALIZER,					\
     false								\
@@ -651,6 +653,7 @@ MALLOC_TSD
 tsdn_t	*tsdn_fetch(void);
 bool	tsdn_null(const tsdn_t *tsdn);
 tsd_t	*tsdn_tsd(tsdn_t *tsdn);
+rtree_ctx_t	*tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TSD_C_))
@@ -741,6 +744,22 @@ tsdn_tsd(tsdn_t *tsdn)
 
 	return (&tsdn->tsd);
 }
+
+JEMALLOC_ALWAYS_INLINE rtree_ctx_t *
+tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback)
+{
+
+	/*
+	 * If tsd cannot be accessed, initialize the fallback rtree_ctx and
+	 * return a pointer to it.
+	 */
+	if (unlikely(tsdn_null(tsdn))) {
+		static const rtree_ctx_t rtree_ctx = RTREE_CTX_INITIALIZER;
+		memcpy(fallback, &rtree_ctx, sizeof(rtree_ctx_t));
+		return (fallback);
+	}
+	return (tsd_rtree_ctxp_get(tsdn_tsd(tsdn)));
+}
 #endif
 
 #endif /* JEMALLOC_H_INLINES */
diff --git a/src/extent.c b/src/extent.c
index 9f3ddd95..0c41c066 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -259,18 +259,19 @@ extent_heaps_remove(extent_heap_t extent_heaps[NPSIZES], extent_t *extent)
 }
 
 static bool
-extent_rtree_acquire(tsdn_t *tsdn, const extent_t *extent, bool dependent,
-    bool init_missing, rtree_elm_t **r_elm_a, rtree_elm_t **r_elm_b)
+extent_rtree_acquire(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
+    const extent_t *extent, bool dependent, bool init_missing,
+    rtree_elm_t **r_elm_a, rtree_elm_t **r_elm_b)
 {
 
-	*r_elm_a = rtree_elm_acquire(tsdn, &extents_rtree,
+	*r_elm_a = rtree_elm_acquire(tsdn, &extents_rtree, rtree_ctx,
 	    (uintptr_t)extent_base_get(extent), dependent, init_missing);
 	if (!dependent && *r_elm_a == NULL)
 		return (true);
 	assert(*r_elm_a != NULL);
 
 	if (extent_size_get(extent) > PAGE) {
-		*r_elm_b = rtree_elm_acquire(tsdn, &extents_rtree,
+		*r_elm_b = rtree_elm_acquire(tsdn, &extents_rtree, rtree_ctx,
 		    (uintptr_t)extent_last_get(extent), dependent,
 		    init_missing);
 		if (!dependent && *r_elm_b == NULL)
@@ -302,14 +303,15 @@ extent_rtree_release(tsdn_t *tsdn, rtree_elm_t *elm_a, rtree_elm_t *elm_b)
 }
 
 static void
-extent_interior_register(tsdn_t *tsdn, const extent_t *extent)
+extent_interior_register(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
+    const extent_t *extent)
 {
 	size_t i;
 
 	assert(extent_slab_get(extent));
 
 	for (i = 1; i < (extent_size_get(extent) >> LG_PAGE) - 1; i++) {
-		rtree_write(tsdn, &extents_rtree,
+		rtree_write(tsdn, &extents_rtree, rtree_ctx,
 		    (uintptr_t)extent_base_get(extent) + (uintptr_t)(i <<
 		    LG_PAGE), extent);
 	}
@@ -318,13 +320,16 @@ extent_interior_register(tsdn_t *tsdn, const extent_t *extent)
 static bool
 extent_register(tsdn_t *tsdn, const extent_t *extent)
 {
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	rtree_elm_t *elm_a, *elm_b;
 
-	if (extent_rtree_acquire(tsdn, extent, false, true, &elm_a, &elm_b))
+	if (extent_rtree_acquire(tsdn, rtree_ctx, extent, false, true, &elm_a,
+	    &elm_b))
 		return (true);
 	extent_rtree_write_acquired(tsdn, elm_a, elm_b, extent);
 	if (extent_slab_get(extent))
-		extent_interior_register(tsdn, extent);
+		extent_interior_register(tsdn, rtree_ctx, extent);
 	extent_rtree_release(tsdn, elm_a, elm_b);
 
 	if (config_prof && opt_prof && extent_active_get(extent)) {
@@ -347,14 +352,15 @@ extent_register(tsdn_t *tsdn, const extent_t *extent)
 }
 
 static void
-extent_interior_deregister(tsdn_t *tsdn, const extent_t *extent)
+extent_interior_deregister(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
+    const extent_t *extent)
 {
 	size_t i;
 
 	assert(extent_slab_get(extent));
 
 	for (i = 1; i < (extent_size_get(extent) >> LG_PAGE) - 1; i++) {
-		rtree_clear(tsdn, &extents_rtree,
+		rtree_clear(tsdn, &extents_rtree, rtree_ctx,
 		    (uintptr_t)extent_base_get(extent) + (uintptr_t)(i <<
 		    LG_PAGE));
 	}
@@ -363,12 +369,15 @@ extent_interior_deregister(tsdn_t *tsdn, const extent_t *extent)
 static void
 extent_deregister(tsdn_t *tsdn, const extent_t *extent)
 {
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	rtree_elm_t *elm_a, *elm_b;
 
-	extent_rtree_acquire(tsdn, extent, true, false, &elm_a, &elm_b);
+	extent_rtree_acquire(tsdn, rtree_ctx, extent, true, false, &elm_a,
+	    &elm_b);
 	extent_rtree_write_acquired(tsdn, elm_a, elm_b, NULL);
 	if (extent_slab_get(extent))
-		extent_interior_deregister(tsdn, extent);
+		extent_interior_deregister(tsdn, rtree_ctx, extent);
 	extent_rtree_release(tsdn, elm_a, elm_b);
 
 	if (config_prof && opt_prof && extent_active_get(extent)) {
@@ -422,6 +431,8 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
     bool slab)
 {
 	extent_t *extent;
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	size_t size, alloc_size, leadsize, trailsize;
 
 	assert(new_addr == NULL || !slab);
@@ -437,7 +448,7 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 	if (new_addr != NULL) {
 		rtree_elm_t *elm;
 
-		elm = rtree_elm_acquire(tsdn, &extents_rtree,
+		elm = rtree_elm_acquire(tsdn, &extents_rtree, rtree_ctx,
 		    (uintptr_t)new_addr, false, false);
 		if (elm != NULL) {
 			extent = rtree_elm_read_acquired(tsdn, &extents_rtree,
@@ -515,7 +526,7 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 	extent_active_set(extent, true);
 	if (slab) {
 		extent_slab_set(extent, slab);
-		extent_interior_register(tsdn, extent);
+		extent_interior_register(tsdn, rtree_ctx, extent);
 	}
 
 	malloc_mutex_unlock(tsdn, &arena->extents_mtx);
@@ -731,6 +742,8 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
     extent_heap_t extent_heaps[NPSIZES], bool cache, extent_t *extent)
 {
 	extent_t *prev, *next;
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
 	assert(!cache || !extent_zeroed_get(extent));
 
@@ -741,7 +754,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 	extent_active_set(extent, false);
 	extent_zeroed_set(extent, !cache && extent_zeroed_get(extent));
 	if (extent_slab_get(extent)) {
-		extent_interior_deregister(tsdn, extent);
+		extent_interior_deregister(tsdn, rtree_ctx, extent);
 		extent_slab_set(extent, false);
 	}
 
@@ -750,7 +763,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 	arena_extent_cache_maybe_insert(arena, extent, cache);
 
 	/* Try to coalesce forward. */
-	next = rtree_read(tsdn, &extents_rtree,
+	next = rtree_read(tsdn, &extents_rtree, rtree_ctx,
 	    (uintptr_t)extent_past_get(extent), false);
 	if (next != NULL) {
 		extent_try_coalesce(tsdn, arena, extent_hooks, extent, next,
@@ -758,7 +771,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 	}
 
 	/* Try to coalesce backward. */
-	prev = rtree_read(tsdn, &extents_rtree,
+	prev = rtree_read(tsdn, &extents_rtree, rtree_ctx,
 	    (uintptr_t)extent_before_get(extent), false);
 	if (prev != NULL) {
 		extent_try_coalesce(tsdn, arena, extent_hooks, prev, extent,
@@ -910,6 +923,8 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
   size_t usize_b)
 {
 	extent_t *trail;
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	rtree_elm_t *lead_elm_a, *lead_elm_b, *trail_elm_a, *trail_elm_b;
 
 	assert(extent_size_get(extent) == size_a + size_b);
@@ -928,8 +943,8 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 		    extent_zeroed_get(extent), extent_committed_get(extent),
 		    extent_slab_get(extent));
 
-		if (extent_rtree_acquire(tsdn, &lead, false, true, &lead_elm_a,
-		    &lead_elm_b))
+		if (extent_rtree_acquire(tsdn, rtree_ctx, &lead, false, true,
+		    &lead_elm_a, &lead_elm_b))
 			goto label_error_b;
 	}
 
@@ -937,8 +952,8 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 	    size_a), size_b, usize_b, extent_active_get(extent),
 	    extent_zeroed_get(extent), extent_committed_get(extent),
 	    extent_slab_get(extent));
-	if (extent_rtree_acquire(tsdn, trail, false, true, &trail_elm_a,
-	    &trail_elm_b))
+	if (extent_rtree_acquire(tsdn, rtree_ctx, trail, false, true,
+	    &trail_elm_a, &trail_elm_b))
 		goto label_error_c;
 
 	if (extent_hooks->split(extent_base_get(extent), size_a + size_b,
@@ -985,6 +1000,8 @@ bool
 extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
     extent_t *a, extent_t *b)
 {
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	rtree_elm_t *a_elm_a, *a_elm_b, *b_elm_a, *b_elm_b;
 
 	extent_hooks_assure_initialized(tsdn, arena, extent_hooks);
@@ -998,8 +1015,10 @@ extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 	 * owned, so the following code uses decomposed helper functions rather
 	 * than extent_{,de}register() to do things in the right order.
 	 */
-	extent_rtree_acquire(tsdn, a, true, false, &a_elm_a, &a_elm_b);
-	extent_rtree_acquire(tsdn, b, true, false, &b_elm_a, &b_elm_b);
+	extent_rtree_acquire(tsdn, rtree_ctx, a, true, false, &a_elm_a,
+	    &a_elm_b);
+	extent_rtree_acquire(tsdn, rtree_ctx, b, true, false, &b_elm_a,
+	    &b_elm_b);
 
 	if (a_elm_b != NULL) {
 		rtree_elm_write_acquired(tsdn, &extents_rtree, a_elm_b, NULL);
diff --git a/src/rtree.c b/src/rtree.c
index b6027303..421de3e8 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -52,11 +52,12 @@ rtree_new(rtree_t *rtree, unsigned bits)
 		rtree->levels[height-1].cumbits = bits;
 	}
 
-	/* Compute lookup table to be used by rtree_start_level(). */
+	/* Compute lookup table to be used by rtree_[ctx_]start_level(). */
 	for (i = 0; i < RTREE_HEIGHT_MAX; i++) {
 		rtree->start_level[i] = hmin(RTREE_HEIGHT_MAX - 1 - i, height -
 		    1);
 	}
+	rtree->start_level[RTREE_HEIGHT_MAX] = 0;
 
 	return (false);
 }
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 786cc351..a05834fa 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -40,10 +40,11 @@ TEST_BEGIN(test_rtree_read_empty)
 
 	for (i = 1; i <= (sizeof(uintptr_t) << 3); i++) {
 		rtree_t rtree;
+		rtree_ctx_t rtree_ctx = RTREE_CTX_INITIALIZER;
 		test_rtree = &rtree;
 		assert_false(rtree_new(&rtree, i),
 		    "Unexpected rtree_new() failure");
-		assert_ptr_null(rtree_read(tsdn, &rtree, 0, false),
+		assert_ptr_null(rtree_read(tsdn, &rtree, &rtree_ctx, 0, false),
 		    "rtree_read() should return NULL for empty tree");
 		rtree_delete(tsdn, &rtree);
 		test_rtree = NULL;
@@ -66,7 +67,8 @@ static void *
 thd_start(void *varg)
 {
 	thd_start_arg_t *arg = (thd_start_arg_t *)varg;
-	sfmt_t	*sfmt;
+	rtree_ctx_t rtree_ctx = RTREE_CTX_INITIALIZER;
+	sfmt_t *sfmt;
 	extent_t *extent;
 	tsdn_t *tsdn;
 	unsigned i;
@@ -81,21 +83,22 @@ thd_start(void *varg)
 		if (i % 2 == 0) {
 			rtree_elm_t *elm;
 
-			elm = rtree_elm_acquire(tsdn, &arg->rtree, key, false,
-			    true);
+			elm = rtree_elm_acquire(tsdn, &arg->rtree, &rtree_ctx,
+			    key, false, true);
 			assert_ptr_not_null(elm,
 			    "Unexpected rtree_elm_acquire() failure");
-			rtree_elm_write_acquired(tsdn, &arg->rtree, elm, extent);
+			rtree_elm_write_acquired(tsdn, &arg->rtree, elm,
+			    extent);
 			rtree_elm_release(tsdn, &arg->rtree, elm);
 
-			elm = rtree_elm_acquire(tsdn, &arg->rtree, key, true,
-			    false);
+			elm = rtree_elm_acquire(tsdn, &arg->rtree, &rtree_ctx,
+			    key, true, false);
 			assert_ptr_not_null(elm,
 			    "Unexpected rtree_elm_acquire() failure");
 			rtree_elm_read_acquired(tsdn, &arg->rtree, elm);
 			rtree_elm_release(tsdn, &arg->rtree, elm);
 		} else
-			rtree_read(tsdn, &arg->rtree, key, false);
+			rtree_read(tsdn, &arg->rtree, &rtree_ctx, key, false);
 	}
 
 	free(extent);
@@ -145,19 +148,22 @@ TEST_BEGIN(test_rtree_extrema)
 
 	for (i = 1; i <= (sizeof(uintptr_t) << 3); i++) {
 		rtree_t rtree;
+		rtree_ctx_t rtree_ctx = RTREE_CTX_INITIALIZER;
 		test_rtree = &rtree;
 		assert_false(rtree_new(&rtree, i),
 		    "Unexpected rtree_new() failure");
 
-		assert_false(rtree_write(tsdn, &rtree, 0, &extent_a),
-		    "Unexpected rtree_write() failure, i=%u", i);
-		assert_ptr_eq(rtree_read(tsdn, &rtree, 0, true), &extent_a,
+		assert_false(rtree_write(tsdn, &rtree, &rtree_ctx, 0,
+		    &extent_a), "Unexpected rtree_write() failure, i=%u", i);
+		assert_ptr_eq(rtree_read(tsdn, &rtree, &rtree_ctx, 0, true),
+		    &extent_a,
 		    "rtree_read() should return previously set value, i=%u", i);
 
-		assert_false(rtree_write(tsdn, &rtree, ~((uintptr_t)0),
-		    &extent_b), "Unexpected rtree_write() failure, i=%u", i);
-		assert_ptr_eq(rtree_read(tsdn, &rtree, ~((uintptr_t)0), true),
-		    &extent_b,
+		assert_false(rtree_write(tsdn, &rtree, &rtree_ctx,
+		    ~((uintptr_t)0), &extent_b),
+		    "Unexpected rtree_write() failure, i=%u", i);
+		assert_ptr_eq(rtree_read(tsdn, &rtree, &rtree_ctx,
+		    ~((uintptr_t)0), true), &extent_b,
 		    "rtree_read() should return previously set value, i=%u", i);
 
 		rtree_delete(tsdn, &rtree);
@@ -178,27 +184,30 @@ TEST_BEGIN(test_rtree_bits)
 		    (((uintptr_t)1) << (sizeof(uintptr_t)*8-i)) - 1};
 		extent_t extent;
 		rtree_t rtree;
+		rtree_ctx_t rtree_ctx = RTREE_CTX_INITIALIZER;
 
 		test_rtree = &rtree;
 		assert_false(rtree_new(&rtree, i),
 		    "Unexpected rtree_new() failure");
 
 		for (j = 0; j < sizeof(keys)/sizeof(uintptr_t); j++) {
-			assert_false(rtree_write(tsdn, &rtree, keys[j],
-			    &extent), "Unexpected rtree_write() failure");
+			assert_false(rtree_write(tsdn, &rtree, &rtree_ctx,
+			    keys[j], &extent),
+			    "Unexpected rtree_write() failure");
 			for (k = 0; k < sizeof(keys)/sizeof(uintptr_t); k++) {
-				assert_ptr_eq(rtree_read(tsdn, &rtree, keys[k],
-				    true), &extent, "rtree_read() should "
-				    "return previously set value and ignore "
-				    "insignificant key bits; i=%u, j=%u, k=%u, "
-				    "set key=%#"FMTxPTR", get key=%#"FMTxPTR, i,
-				    j, k, keys[j], keys[k]);
+				assert_ptr_eq(rtree_read(tsdn, &rtree,
+				    &rtree_ctx, keys[k], true), &extent,
+				    "rtree_read() should return previously set "
+				    "value and ignore insignificant key bits; "
+				    "i=%u, j=%u, k=%u, set key=%#"FMTxPTR", "
+				    "get key=%#"FMTxPTR, i, j, k, keys[j],
+				    keys[k]);
 			}
-			assert_ptr_null(rtree_read(tsdn, &rtree,
+			assert_ptr_null(rtree_read(tsdn, &rtree, &rtree_ctx,
 			    (((uintptr_t)1) << (sizeof(uintptr_t)*8-i)), false),
 			    "Only leftmost rtree leaf should be set; "
 			    "i=%u, j=%u", i, j);
-			rtree_clear(tsdn, &rtree, keys[j]);
+			rtree_clear(tsdn, &rtree, &rtree_ctx, keys[j]);
 		}
 
 		rtree_delete(tsdn, &rtree);
@@ -222,6 +231,7 @@ TEST_BEGIN(test_rtree_random)
 		extent_t extent;
 		unsigned j;
 		rtree_t rtree;
+		rtree_ctx_t rtree_ctx = RTREE_CTX_INITIALIZER;
 		rtree_elm_t *elm;
 
 		test_rtree = &rtree;
@@ -230,29 +240,32 @@ TEST_BEGIN(test_rtree_random)
 
 		for (j = 0; j < NSET; j++) {
 			keys[j] = (uintptr_t)gen_rand64(sfmt);
-			elm = rtree_elm_acquire(tsdn, &rtree, keys[j], false,
-			    true);
+			elm = rtree_elm_acquire(tsdn, &rtree, &rtree_ctx,
+			    keys[j], false, true);
 			assert_ptr_not_null(elm,
 			    "Unexpected rtree_elm_acquire() failure");
 			rtree_elm_write_acquired(tsdn, &rtree, elm, &extent);
 			rtree_elm_release(tsdn, &rtree, elm);
-			assert_ptr_eq(rtree_read(tsdn, &rtree, keys[j], true),
-			    &extent,
+			assert_ptr_eq(rtree_read(tsdn, &rtree, &rtree_ctx,
+			    keys[j], true), &extent,
 			    "rtree_read() should return previously set value");
 		}
 		for (j = 0; j < NSET; j++) {
-			assert_ptr_eq(rtree_read(tsdn, &rtree, keys[j], true),
-			    &extent, "rtree_read() should return previously "
-			    "set value, j=%u", j);
+			assert_ptr_eq(rtree_read(tsdn, &rtree, &rtree_ctx,
+			    keys[j], true), &extent,
+			    "rtree_read() should return previously set value, "
+			    "j=%u", j);
 		}
 
 		for (j = 0; j < NSET; j++) {
-			rtree_clear(tsdn, &rtree, keys[j]);
-			assert_ptr_null(rtree_read(tsdn, &rtree, keys[j], true),
+			rtree_clear(tsdn, &rtree, &rtree_ctx, keys[j]);
+			assert_ptr_null(rtree_read(tsdn, &rtree, &rtree_ctx,
+			    keys[j], true),
 			    "rtree_read() should return previously set value");
 		}
 		for (j = 0; j < NSET; j++) {
-			assert_ptr_null(rtree_read(tsdn, &rtree, keys[j], true),
+			assert_ptr_null(rtree_read(tsdn, &rtree, &rtree_ctx,
+			    keys[j], true),
 			    "rtree_read() should return previously set value");
 		}
 

From f8f0542194e2a7fb0eff8a20143a26fe4a6ea6a5 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 3 Jun 2016 12:05:53 -0700
Subject: [PATCH 0309/2608] Modify extent hook functions to take an (extent_t
 *) argument.

This facilitates the application accessing its own extent allocator
metadata during hook invocations.

This resolves #259.
---
 doc/jemalloc.xml.in                     |  14 +-
 include/jemalloc/internal/arena.h       |  16 +-
 include/jemalloc/internal/extent.h      |  33 +--
 include/jemalloc/jemalloc_typedefs.h.in |  28 ++-
 src/arena.c                             |  48 ++--
 src/ctl.c                               |  16 +-
 src/extent.c                            | 307 +++++++++++-------------
 src/extent_dss.c                        |   2 +-
 src/large.c                             |   4 +-
 test/integration/extent.c               | 159 ++++++------
 10 files changed, 312 insertions(+), 315 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 74daf6a8..bfc0073b 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1551,7 +1551,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
       <varlistentry id="arena.i.extent_hooks">
         <term>
           <mallctl>arena.&lt;i&gt;.extent_hooks</mallctl>
-          (<type>extent_hooks_t</type>)
+          (<type>extent_hooks_t *</type>)
           <literal>rw</literal>
         </term>
         <listitem><para>Get or set the extent management hook functions for
@@ -1567,7 +1567,8 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         allocation.</para>
 
         <programlisting language="C"><![CDATA[
-typedef struct {
+typedef extent_hooks_s extent_hooks_t;
+struct extent_hooks_s {
 	extent_alloc_t		*alloc;
 	extent_dalloc_t		*dalloc;
 	extent_commit_t		*commit;
@@ -1575,7 +1576,7 @@ typedef struct {
 	extent_purge_t		*purge;
 	extent_split_t		*split;
 	extent_merge_t		*merge;
-} extent_hooks_t;]]></programlisting>
+};]]></programlisting>
         <para>The <type>extent_hooks_t</type> structure comprises function
         pointers which are described individually below.  jemalloc uses these
         functions to manage extent lifetime, which starts off with allocation of
@@ -1592,6 +1593,7 @@ typedef struct {
 
         <funcsynopsis><funcprototype>
           <funcdef>typedef void *<function>(extent_alloc_t)</function></funcdef>
+          <paramdef>extent_hooks_t *<parameter>extent_hooks</parameter></paramdef>
           <paramdef>void *<parameter>new_addr</parameter></paramdef>
           <paramdef>size_t <parameter>size</parameter></paramdef>
           <paramdef>size_t <parameter>alignment</parameter></paramdef>
@@ -1627,6 +1629,7 @@ typedef struct {
 
         <funcsynopsis><funcprototype>
           <funcdef>typedef bool <function>(extent_dalloc_t)</function></funcdef>
+          <paramdef>extent_hooks_t *<parameter>extent_hooks</parameter></paramdef>
           <paramdef>void *<parameter>addr</parameter></paramdef>
           <paramdef>size_t <parameter>size</parameter></paramdef>
           <paramdef>bool <parameter>committed</parameter></paramdef>
@@ -1646,6 +1649,7 @@ typedef struct {
 
         <funcsynopsis><funcprototype>
           <funcdef>typedef bool <function>(extent_commit_t)</function></funcdef>
+          <paramdef>extent_hooks_t *<parameter>extent_hooks</parameter></paramdef>
           <paramdef>void *<parameter>addr</parameter></paramdef>
           <paramdef>size_t <parameter>size</parameter></paramdef>
           <paramdef>size_t <parameter>offset</parameter></paramdef>
@@ -1667,6 +1671,7 @@ typedef struct {
 
         <funcsynopsis><funcprototype>
           <funcdef>typedef bool <function>(extent_decommit_t)</function></funcdef>
+          <paramdef>extent_hooks_t *<parameter>extent_hooks</parameter></paramdef>
           <paramdef>void *<parameter>addr</parameter></paramdef>
           <paramdef>size_t <parameter>size</parameter></paramdef>
           <paramdef>size_t <parameter>offset</parameter></paramdef>
@@ -1688,6 +1693,7 @@ typedef struct {
 
         <funcsynopsis><funcprototype>
           <funcdef>typedef bool <function>(extent_purge_t)</function></funcdef>
+          <paramdef>extent_hooks_t *<parameter>extent_hooks</parameter></paramdef>
           <paramdef>void *<parameter>addr</parameter></paramdef>
           <paramdef>size_t<parameter>size</parameter></paramdef>
           <paramdef>size_t <parameter>offset</parameter></paramdef>
@@ -1707,6 +1713,7 @@ typedef struct {
 
         <funcsynopsis><funcprototype>
           <funcdef>typedef bool <function>(extent_split_t)</function></funcdef>
+          <paramdef>extent_hooks_t *<parameter>extent_hooks</parameter></paramdef>
           <paramdef>void *<parameter>addr</parameter></paramdef>
           <paramdef>size_t <parameter>size</parameter></paramdef>
           <paramdef>size_t <parameter>size_a</parameter></paramdef>
@@ -1728,6 +1735,7 @@ typedef struct {
 
         <funcsynopsis><funcprototype>
           <funcdef>typedef bool <function>(extent_merge_t)</function></funcdef>
+          <paramdef>extent_hooks_t *<parameter>extent_hooks</parameter></paramdef>
           <paramdef>void *<parameter>addr_a</parameter></paramdef>
           <paramdef>size_t <parameter>size_a</parameter></paramdef>
           <paramdef>void *<parameter>addr_b</parameter></paramdef>
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index fc0a755f..3c931c34 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -240,11 +240,15 @@ struct arena_s {
 	 */
 	extent_heap_t		extents_cached[NPSIZES];
 	extent_heap_t		extents_retained[NPSIZES];
-	/* User-configurable extent hook functions. */
-	extent_hooks_t		extent_hooks;
-	/* Protects extents_cached, extents_retained, and extent_hooks. */
+	/* Protects extents_cached and extents_retained. */
 	malloc_mutex_t		extents_mtx;
 
+	/* User-configurable extent hook functions. */
+	union {
+		extent_hooks_t		*extent_hooks;
+		void			*extent_hooks_pun;
+	};
+
 	/* Cache of extent structures that were allocated via base_alloc(). */
 	ql_head(extent_t)	extent_cache;
 	malloc_mutex_t		extent_cache_mtx;
@@ -279,10 +283,10 @@ extern ssize_t		opt_decay_time;
 extern const arena_bin_info_t	arena_bin_info[NBINS];
 
 extent_t	*arena_extent_cache_alloc(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, void *new_addr, size_t size, size_t alignment,
-    bool *zero);
+    extent_hooks_t **r_extent_hooks, void *new_addr, size_t size,
+    size_t alignment, bool *zero);
 void	arena_extent_cache_dalloc(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent);
+    extent_hooks_t **r_extent_hooks, extent_t *extent);
 void	arena_extent_cache_maybe_insert(arena_t *arena, extent_t *extent,
     bool cache);
 void	arena_extent_cache_maybe_remove(arena_t *arena, extent_t *extent,
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 8b8dbe80..6e155206 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -3,15 +3,7 @@
 
 typedef struct extent_s extent_t;
 
-#define	EXTENT_HOOKS_INITIALIZER {					\
-    NULL,								\
-    NULL,								\
-    NULL,								\
-    NULL,								\
-    NULL,								\
-    NULL,								\
-    NULL								\
-}
+#define	EXTENT_HOOKS_INITIALIZER	NULL
 
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
@@ -93,9 +85,8 @@ extern const extent_hooks_t	extent_hooks_default;
 extent_t	*extent_alloc(tsdn_t *tsdn, arena_t *arena);
 void	extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
 
-extent_hooks_t	extent_hooks_get(tsdn_t *tsdn, arena_t *arena);
-extent_hooks_t	extent_hooks_set(tsdn_t *tsdn, arena_t *arena,
-    const extent_hooks_t *extent_hooks);
+extent_hooks_t	*extent_hooks_get(arena_t *arena);
+extent_hooks_t	*extent_hooks_set(arena_t *arena, extent_hooks_t *extent_hooks);
 
 #ifdef JEMALLOC_JET
 typedef size_t (extent_size_quantize_t)(size_t);
@@ -109,29 +100,29 @@ size_t	extent_size_quantize_ceil(size_t size);
 ph_proto(, extent_heap_, extent_heap_t, extent_t)
 
 extent_t	*extent_alloc_cache(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, void *new_addr, size_t usize, size_t pad,
+    extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
     size_t alignment, bool *zero, bool slab);
 extent_t	*extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, void *new_addr, size_t usize, size_t pad,
+    extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
     size_t alignment, bool *zero, bool *commit, bool slab);
 void	extent_dalloc_cache(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent);
+    extent_hooks_t **r_extent_hooks, extent_t *extent);
 void	extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent);
+    extent_hooks_t **r_extent_hooks, extent_t *extent);
 bool	extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
+    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
     size_t length);
 bool	extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
+    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
     size_t length);
 bool	extent_purge_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
+    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
     size_t length);
 extent_t	*extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t size_a,
+    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a,
     size_t usize_a, size_t size_b, size_t usize_b);
 bool	extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *a, extent_t *b);
+    extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b);
 void	extent_prefork(tsdn_t *tsdn);
 void	extent_postfork_parent(tsdn_t *tsdn);
 void	extent_postfork_child(tsdn_t *tsdn);
diff --git a/include/jemalloc/jemalloc_typedefs.h.in b/include/jemalloc/jemalloc_typedefs.h.in
index 99f07ab2..e5ba7166 100644
--- a/include/jemalloc/jemalloc_typedefs.h.in
+++ b/include/jemalloc/jemalloc_typedefs.h.in
@@ -1,53 +1,61 @@
+typedef struct extent_hooks_s extent_hooks_t;
+
 /*
  * void *
  * extent_alloc(void *new_addr, size_t size, size_t alignment, bool *zero,
  *     bool *commit, unsigned arena_ind);
  */
-typedef void *(extent_alloc_t)(void *, size_t, size_t, bool *, bool *,
-    unsigned);
+typedef void *(extent_alloc_t)(extent_hooks_t *, void *, size_t, size_t, bool *,
+    bool *, unsigned);
 
 /*
  * bool
  * extent_dalloc(void *addr, size_t size, bool committed, unsigned arena_ind);
  */
-typedef bool (extent_dalloc_t)(void *, size_t, bool, unsigned);
+typedef bool (extent_dalloc_t)(extent_hooks_t *, void *, size_t, bool,
+    unsigned);
 
 /*
  * bool
  * extent_commit(void *addr, size_t size, size_t offset, size_t length,
  *     unsigned arena_ind);
  */
-typedef bool (extent_commit_t)(void *, size_t, size_t, size_t, unsigned);
+typedef bool (extent_commit_t)(extent_hooks_t *, void *, size_t, size_t, size_t,
+    unsigned);
 
 /*
  * bool
  * extent_decommit(void *addr, size_t size, size_t offset, size_t length,
  *     unsigned arena_ind);
  */
-typedef bool (extent_decommit_t)(void *, size_t, size_t, size_t, unsigned);
+typedef bool (extent_decommit_t)(extent_hooks_t *, void *, size_t, size_t,
+    size_t, unsigned);
 
 /*
  * bool
  * extent_purge(void *addr, size_t size, size_t offset, size_t length,
  *     unsigned arena_ind);
  */
-typedef bool (extent_purge_t)(void *, size_t, size_t, size_t, unsigned);
+typedef bool (extent_purge_t)(extent_hooks_t *, void *, size_t, size_t, size_t,
+    unsigned);
 
 /*
  * bool
  * extent_split(void *addr, size_t size, size_t size_a, size_t size_b,
  *     bool committed, unsigned arena_ind);
  */
-typedef bool (extent_split_t)(void *, size_t, size_t, size_t, bool, unsigned);
+typedef bool (extent_split_t)(extent_hooks_t *, void *, size_t, size_t, size_t,
+    bool, unsigned);
 
 /*
  * bool
  * extent_merge(void *addr_a, size_t size_a, void *addr_b, size_t size_b,
  *     bool committed, unsigned arena_ind);
  */
-typedef bool (extent_merge_t)(void *, size_t, void *, size_t, bool, unsigned);
+typedef bool (extent_merge_t)(extent_hooks_t *, void *, size_t, void *, size_t,
+    bool, unsigned);
 
-typedef struct {
+struct extent_hooks_s {
 	extent_alloc_t		*alloc;
 	extent_dalloc_t		*dalloc;
 	extent_commit_t		*commit;
@@ -55,4 +63,4 @@ typedef struct {
 	extent_purge_t		*purge;
 	extent_split_t		*split;
 	extent_merge_t		*merge;
-} extent_hooks_t;
+};
diff --git a/src/arena.c b/src/arena.c
index c77db5d0..32e1915c 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -54,25 +54,25 @@ arena_extent_dirty_npages(const extent_t *extent)
 
 static extent_t *
 arena_extent_cache_alloc_locked(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, void *new_addr, size_t usize, size_t pad,
+    extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
     size_t alignment, bool *zero, bool slab)
 {
 
 	malloc_mutex_assert_owner(tsdn, &arena->lock);
 
-	return (extent_alloc_cache(tsdn, arena, extent_hooks, new_addr, usize,
+	return (extent_alloc_cache(tsdn, arena, r_extent_hooks, new_addr, usize,
 	    pad, alignment, zero, slab));
 }
 
 extent_t *
 arena_extent_cache_alloc(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, void *new_addr, size_t size, size_t alignment,
-    bool *zero)
+    extent_hooks_t **r_extent_hooks, void *new_addr, size_t size,
+    size_t alignment, bool *zero)
 {
 	extent_t *extent;
 
 	malloc_mutex_lock(tsdn, &arena->lock);
-	extent = arena_extent_cache_alloc_locked(tsdn, arena, extent_hooks,
+	extent = arena_extent_cache_alloc_locked(tsdn, arena, r_extent_hooks,
 	    new_addr, size, 0, alignment, zero, false);
 	malloc_mutex_unlock(tsdn, &arena->lock);
 
@@ -81,22 +81,22 @@ arena_extent_cache_alloc(tsdn_t *tsdn, arena_t *arena,
 
 static void
 arena_extent_cache_dalloc_locked(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent)
+    extent_hooks_t **r_extent_hooks, extent_t *extent)
 {
 
 	malloc_mutex_assert_owner(tsdn, &arena->lock);
 
-	extent_dalloc_cache(tsdn, arena, extent_hooks, extent);
+	extent_dalloc_cache(tsdn, arena, r_extent_hooks, extent);
 	arena_maybe_purge(tsdn, arena);
 }
 
 void
 arena_extent_cache_dalloc(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent)
+    extent_hooks_t **r_extent_hooks, extent_t *extent)
 {
 
 	malloc_mutex_lock(tsdn, &arena->lock);
-	arena_extent_cache_dalloc_locked(tsdn, arena, extent_hooks, extent);
+	arena_extent_cache_dalloc_locked(tsdn, arena, r_extent_hooks, extent);
 	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
@@ -321,12 +321,12 @@ arena_large_ralloc_stats_update(arena_t *arena, size_t oldusize, size_t usize)
 
 static extent_t *
 arena_extent_alloc_large_hard(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, size_t usize, size_t alignment, bool *zero)
+    extent_hooks_t **r_extent_hooks, size_t usize, size_t alignment, bool *zero)
 {
 	extent_t *extent;
 	bool commit = true;
 
-	extent = extent_alloc_wrapper(tsdn, arena, extent_hooks, NULL, usize,
+	extent = extent_alloc_wrapper(tsdn, arena, r_extent_hooks, NULL, usize,
 	    large_pad, alignment, zero, &commit, false);
 	if (extent == NULL) {
 		/* Revert optimistic stats updates. */
@@ -347,7 +347,7 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool *zero)
 {
 	extent_t *extent;
-	extent_hooks_t extent_hooks = EXTENT_HOOKS_INITIALIZER;
+	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
 
 	malloc_mutex_lock(tsdn, &arena->lock);
 
@@ -373,7 +373,7 @@ void
 arena_extent_dalloc_large(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
     bool locked)
 {
-	extent_hooks_t extent_hooks = EXTENT_HOOKS_INITIALIZER;
+	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
 
 	if (!locked)
 		malloc_mutex_lock(tsdn, &arena->lock);
@@ -735,7 +735,7 @@ arena_dirty_count(tsdn_t *tsdn, arena_t *arena)
 }
 
 static size_t
-arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
+arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
     size_t ndirty_limit, extent_t *purge_extents_sentinel)
 {
 	extent_t *extent, *next;
@@ -757,7 +757,7 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 		/* Allocate. */
 		zero = false;
 		textent = arena_extent_cache_alloc_locked(tsdn, arena,
-		    extent_hooks, extent_base_get(extent),
+		    r_extent_hooks, extent_base_get(extent),
 		    extent_size_get(extent), 0, CACHELINE, &zero, false);
 		assert(textent == extent);
 		assert(zero == extent_zeroed_get(extent));
@@ -774,8 +774,8 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 }
 
 static size_t
-arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
-    extent_t *purge_extents_sentinel)
+arena_purge_stashed(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *purge_extents_sentinel)
 {
 	UNUSED size_t nmadvise;
 	size_t npurged;
@@ -793,7 +793,7 @@ arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 
 		next = qr_next(extent, qr_link);
 		extent_ring_remove(extent);
-		extent_dalloc_wrapper(tsdn, arena, extent_hooks, extent);
+		extent_dalloc_wrapper(tsdn, arena, r_extent_hooks, extent);
 	}
 
 	if (config_stats) {
@@ -816,7 +816,7 @@ arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 static void
 arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, size_t ndirty_limit)
 {
-	extent_hooks_t extent_hooks = extent_hooks_get(tsdn, arena);
+	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
 	size_t npurge, npurged;
 	extent_t purge_extents_sentinel;
 
@@ -866,7 +866,7 @@ arena_purge(tsdn_t *tsdn, arena_t *arena, bool all)
 static void
 arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *slab)
 {
-	extent_hooks_t extent_hooks = EXTENT_HOOKS_INITIALIZER;
+	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
 
 	arena_nactive_sub(arena, extent_size_get(slab) >> LG_PAGE);
 	arena_extent_cache_dalloc_locked(tsdn, arena, &extent_hooks, slab);
@@ -988,7 +988,7 @@ arena_bin_slabs_full_remove(extent_t *slab)
 
 static extent_t *
 arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, const arena_bin_info_t *bin_info)
+    extent_hooks_t **r_extent_hooks, const arena_bin_info_t *bin_info)
 {
 	extent_t *slab;
 	bool zero, commit;
@@ -996,7 +996,7 @@ arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena,
 	zero = false;
 	commit = true;
 	malloc_mutex_unlock(tsdn, &arena->lock);
-	slab = extent_alloc_wrapper(tsdn, arena, extent_hooks, NULL,
+	slab = extent_alloc_wrapper(tsdn, arena, r_extent_hooks, NULL,
 	    bin_info->slab_size, 0, PAGE, &zero, &commit, true);
 	malloc_mutex_lock(tsdn, &arena->lock);
 
@@ -1009,7 +1009,7 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 {
 	extent_t *slab;
 	arena_slab_data_t *slab_data;
-	extent_hooks_t extent_hooks = EXTENT_HOOKS_INITIALIZER;
+	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
 	bool zero;
 
 	zero = false;
@@ -1796,7 +1796,7 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 		extent_heap_new(&arena->extents_retained[i]);
 	}
 
-	arena->extent_hooks = extent_hooks_default;
+	arena->extent_hooks = (extent_hooks_t *)&extent_hooks_default;
 
 	if (malloc_mutex_init(&arena->extents_mtx, "arena_extents",
 	    WITNESS_RANK_ARENA_EXTENTS))
diff --git a/src/ctl.c b/src/ctl.c
index b91ea135..813d5fab 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1646,15 +1646,15 @@ arena_i_extent_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	if (arena_ind < narenas_total_get() && (arena =
 	    arena_get(tsd_tsdn(tsd), arena_ind, false)) != NULL) {
 		if (newp != NULL) {
-			extent_hooks_t old_extent_hooks, new_extent_hooks;
-			WRITE(new_extent_hooks, extent_hooks_t);
-			old_extent_hooks = extent_hooks_set(tsd_tsdn(tsd),
-			    arena, &new_extent_hooks);
-			READ(old_extent_hooks, extent_hooks_t);
+			extent_hooks_t *old_extent_hooks, *new_extent_hooks;
+			WRITE(new_extent_hooks, extent_hooks_t *);
+			old_extent_hooks = extent_hooks_set(arena,
+			    new_extent_hooks);
+			READ(old_extent_hooks, extent_hooks_t *);
 		} else {
-			extent_hooks_t old_extent_hooks =
-			    extent_hooks_get(tsd_tsdn(tsd), arena);
-			READ(old_extent_hooks, extent_hooks_t);
+			extent_hooks_t *old_extent_hooks =
+			    extent_hooks_get(arena);
+			READ(old_extent_hooks, extent_hooks_t *);
 		}
 	} else {
 		ret = EFAULT;
diff --git a/src/extent.c b/src/extent.c
index 0c41c066..0c66e31a 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -6,20 +6,23 @@
 
 rtree_t		extents_rtree;
 
-static void	*extent_alloc_default(void *new_addr, size_t size,
-    size_t alignment, bool *zero, bool *commit, unsigned arena_ind);
-static bool	extent_dalloc_default(void *addr, size_t size, bool committed,
+static void	*extent_alloc_default(extent_hooks_t *extent_hooks,
+    void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit,
+    unsigned arena_ind);
+static bool	extent_dalloc_default(extent_hooks_t *extent_hooks, void *addr,
+    size_t size, bool committed, unsigned arena_ind);
+static bool	extent_commit_default(extent_hooks_t *extent_hooks, void *addr,
+    size_t size, size_t offset, size_t length, unsigned arena_ind);
+static bool	extent_decommit_default(extent_hooks_t *extent_hooks,
+    void *addr, size_t size, size_t offset, size_t length, unsigned arena_ind);
+static bool	extent_purge_default(extent_hooks_t *extent_hooks, void *addr,
+    size_t size, size_t offset, size_t length, unsigned arena_ind);
+static bool	extent_split_default(extent_hooks_t *extent_hooks, void *addr,
+    size_t size, size_t size_a, size_t size_b, bool committed,
+    unsigned arena_ind);
+static bool	extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a,
+    size_t size_a, void *addr_b, size_t size_b, bool committed,
     unsigned arena_ind);
-static bool	extent_commit_default(void *addr, size_t size, size_t offset,
-    size_t length, unsigned arena_ind);
-static bool	extent_decommit_default(void *addr, size_t size, size_t offset,
-    size_t length, unsigned arena_ind);
-static bool	extent_purge_default(void *addr, size_t size, size_t offset,
-    size_t length, unsigned arena_ind);
-static bool	extent_split_default(void *addr, size_t size, size_t size_a,
-    size_t size_b, bool committed, unsigned arena_ind);
-static bool	extent_merge_default(void *addr_a, size_t size_a, void *addr_b,
-    size_t size_b, bool committed, unsigned arena_ind);
 
 const extent_hooks_t	extent_hooks_default = {
 	extent_alloc_default,
@@ -42,7 +45,7 @@ static size_t	highchunks;
  */
 
 static void	extent_record(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_heap_t extent_heaps[NPSIZES],
+    extent_hooks_t **r_extent_hooks, extent_heap_t extent_heaps[NPSIZES],
     bool cache, extent_t *extent);
 
 /******************************************************************************/
@@ -73,89 +76,34 @@ extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent)
 	malloc_mutex_unlock(tsdn, &arena->extent_cache_mtx);
 }
 
-static extent_hooks_t
-extent_hooks_get_locked(arena_t *arena)
+extent_hooks_t *
+extent_hooks_get(arena_t *arena)
 {
 
-	return (arena->extent_hooks);
+	return ((extent_hooks_t *)atomic_read_p(&arena->extent_hooks_pun));
 }
 
-extent_hooks_t
-extent_hooks_get(tsdn_t *tsdn, arena_t *arena)
+extent_hooks_t *
+extent_hooks_set(arena_t *arena, extent_hooks_t *extent_hooks)
 {
-	extent_hooks_t extent_hooks;
+	extent_hooks_t *old_extent_hooks = extent_hooks_get(arena);
+	union {
+		extent_hooks_t	**h;
+		void		**v;
+	} u;
 
-	malloc_mutex_lock(tsdn, &arena->extents_mtx);
-	extent_hooks = extent_hooks_get_locked(arena);
-	malloc_mutex_unlock(tsdn, &arena->extents_mtx);
-
-	return (extent_hooks);
-}
-
-extent_hooks_t
-extent_hooks_set(tsdn_t *tsdn, arena_t *arena,
-    const extent_hooks_t *extent_hooks)
-{
-	extent_hooks_t old_extent_hooks;
-
-	malloc_mutex_lock(tsdn, &arena->extents_mtx);
-	old_extent_hooks = arena->extent_hooks;
-	/*
-	 * Copy each field atomically so that it is impossible for readers to
-	 * see partially updated pointers.  There are places where readers only
-	 * need one hook function pointer (therefore no need to copy the
-	 * entirety of arena->extent_hooks), and stale reads do not affect
-	 * correctness, so they perform unlocked reads.
-	 */
-#define	ATOMIC_COPY_HOOK(n) do {					\
-	union {								\
-		extent_##n##_t	**n;					\
-		void		**v;					\
-	} u;								\
-	u.n = &arena->extent_hooks.n;					\
-	atomic_write_p(u.v, extent_hooks->n);				\
-} while (0)
-	ATOMIC_COPY_HOOK(alloc);
-	ATOMIC_COPY_HOOK(dalloc);
-	ATOMIC_COPY_HOOK(commit);
-	ATOMIC_COPY_HOOK(decommit);
-	ATOMIC_COPY_HOOK(purge);
-	ATOMIC_COPY_HOOK(split);
-	ATOMIC_COPY_HOOK(merge);
-#undef ATOMIC_COPY_HOOK
-	malloc_mutex_unlock(tsdn, &arena->extents_mtx);
+	u.h = &arena->extent_hooks;
+	atomic_write_p(u.v, extent_hooks);
 
 	return (old_extent_hooks);
 }
 
 static void
-extent_hooks_assure_initialized_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, bool locked)
-{
-	static const extent_hooks_t uninitialized_hooks =
-	    EXTENT_HOOKS_INITIALIZER;
-
-	if (memcmp(extent_hooks, &uninitialized_hooks, sizeof(extent_hooks_t))
-	    == 0) {
-		*extent_hooks = locked ? extent_hooks_get_locked(arena) :
-		    extent_hooks_get(tsdn, arena);
-	}
-}
-
-static void
-extent_hooks_assure_initialized_locked(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks)
+extent_hooks_assure_initialized(arena_t *arena, extent_hooks_t **r_extent_hooks)
 {
 
-	extent_hooks_assure_initialized_impl(tsdn, arena, extent_hooks, true);
-}
-
-static void
-extent_hooks_assure_initialized(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks)
-{
-
-	extent_hooks_assure_initialized_impl(tsdn, arena, extent_hooks, false);
+	if (*r_extent_hooks == EXTENT_HOOKS_INITIALIZER)
+		*r_extent_hooks = extent_hooks_get(arena);
 }
 
 #ifdef JEMALLOC_JET
@@ -409,7 +357,7 @@ extent_first_best_fit(arena_t *arena, extent_heap_t extent_heaps[NPSIZES],
 }
 
 static void
-extent_leak(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
+extent_leak(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
     bool cache, extent_t *extent)
 {
 
@@ -418,14 +366,14 @@ extent_leak(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 	 * that this is only a virtual memory leak.
 	 */
 	if (cache) {
-		extent_purge_wrapper(tsdn, arena, extent_hooks, extent, 0,
+		extent_purge_wrapper(tsdn, arena, r_extent_hooks, extent, 0,
 		    extent_size_get(extent));
 	}
 	extent_dalloc(tsdn, arena, extent);
 }
 
 static extent_t *
-extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
+extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
     extent_heap_t extent_heaps[NPSIZES], bool cache, void *new_addr,
     size_t usize, size_t pad, size_t alignment, bool *zero, bool *commit,
     bool slab)
@@ -444,7 +392,7 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 	if (alloc_size < usize)
 		return (NULL);
 	malloc_mutex_lock(tsdn, &arena->extents_mtx);
-	extent_hooks_assure_initialized_locked(tsdn, arena, extent_hooks);
+	extent_hooks_assure_initialized(arena, r_extent_hooks);
 	if (new_addr != NULL) {
 		rtree_elm_t *elm;
 
@@ -482,10 +430,11 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 	/* Split the lead. */
 	if (leadsize != 0) {
 		extent_t *lead = extent;
-		extent = extent_split_wrapper(tsdn, arena, extent_hooks, lead,
-		    leadsize, leadsize, size + trailsize, usize + trailsize);
+		extent = extent_split_wrapper(tsdn, arena, r_extent_hooks,
+		    lead, leadsize, leadsize, size + trailsize, usize +
+		    trailsize);
 		if (extent == NULL) {
-			extent_leak(tsdn, arena, extent_hooks, cache, lead);
+			extent_leak(tsdn, arena, r_extent_hooks, cache, lead);
 			malloc_mutex_unlock(tsdn, &arena->extents_mtx);
 			return (NULL);
 		}
@@ -496,9 +445,10 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 	/* Split the trail. */
 	if (trailsize != 0) {
 		extent_t *trail = extent_split_wrapper(tsdn, arena,
-		    extent_hooks, extent, size, usize, trailsize, trailsize);
+		    r_extent_hooks, extent, size, usize, trailsize, trailsize);
 		if (trail == NULL) {
-			extent_leak(tsdn, arena, extent_hooks, cache, extent);
+			extent_leak(tsdn, arena, r_extent_hooks, cache,
+			    extent);
 			malloc_mutex_unlock(tsdn, &arena->extents_mtx);
 			return (NULL);
 		}
@@ -513,10 +463,10 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 	}
 
 	if (!extent_committed_get(extent) &&
-	    extent_hooks->commit(extent_base_get(extent),
+	    (*r_extent_hooks)->commit(*r_extent_hooks, extent_base_get(extent),
 	    extent_size_get(extent), 0, extent_size_get(extent), arena->ind)) {
 		malloc_mutex_unlock(tsdn, &arena->extents_mtx);
-		extent_record(tsdn, arena, extent_hooks, extent_heaps, cache,
+		extent_record(tsdn, arena, r_extent_hooks, extent_heaps, cache,
 		    extent);
 		return (NULL);
 	}
@@ -582,9 +532,9 @@ extent_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 }
 
 extent_t *
-extent_alloc_cache(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
-    void *new_addr, size_t usize, size_t pad, size_t alignment, bool *zero,
-    bool slab)
+extent_alloc_cache(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
+    size_t alignment, bool *zero, bool slab)
 {
 	extent_t *extent;
 	bool commit;
@@ -593,7 +543,7 @@ extent_alloc_cache(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 	assert(alignment != 0);
 
 	commit = true;
-	extent = extent_recycle(tsdn, arena, extent_hooks,
+	extent = extent_recycle(tsdn, arena, r_extent_hooks,
 	    arena->extents_cached, true, new_addr, usize, pad, alignment, zero,
 	    &commit, slab);
 	if (extent == NULL)
@@ -603,13 +553,15 @@ extent_alloc_cache(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 }
 
 static void *
-extent_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero,
-    bool *commit, unsigned arena_ind)
+extent_alloc_default(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
+    size_t alignment, bool *zero, bool *commit, unsigned arena_ind)
 {
 	void *ret;
 	tsdn_t *tsdn;
 	arena_t *arena;
 
+	assert(extent_hooks == &extent_hooks_default);
+
 	tsdn = tsdn_fetch();
 	arena = arena_get(tsdn, arena_ind, false);
 	/*
@@ -627,7 +579,7 @@ extent_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero,
 
 static extent_t *
 extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, void *new_addr, size_t usize, size_t pad,
+    extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
     size_t alignment, bool *zero, bool *commit, bool slab)
 {
 	extent_t *extent;
@@ -635,7 +587,7 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
 	assert(usize != 0);
 	assert(alignment != 0);
 
-	extent = extent_recycle(tsdn, arena, extent_hooks,
+	extent = extent_recycle(tsdn, arena, r_extent_hooks,
 	    arena->extents_retained, false, new_addr, usize, pad, alignment,
 	    zero, commit, slab);
 	if (extent != NULL && config_stats) {
@@ -648,7 +600,7 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
 
 static extent_t *
 extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, void *new_addr, size_t usize, size_t pad,
+    extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
     size_t alignment, bool *zero, bool *commit, bool slab)
 {
 	extent_t *extent;
@@ -659,8 +611,8 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 	extent = extent_alloc(tsdn, arena);
 	if (extent == NULL)
 		return (NULL);
-	addr = extent_hooks->alloc(new_addr, size, alignment, zero, commit,
-	    arena->ind);
+	addr = (*r_extent_hooks)->alloc(*r_extent_hooks, new_addr, size,
+	    alignment, zero, commit, arena->ind);
 	if (addr == NULL) {
 		extent_dalloc(tsdn, arena, extent);
 		return (NULL);
@@ -669,7 +621,7 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 	if (pad != 0)
 		extent_addr_randomize(tsdn, extent, alignment);
 	if (extent_register(tsdn, extent)) {
-		extent_leak(tsdn, arena, extent_hooks, false, extent);
+		extent_leak(tsdn, arena, r_extent_hooks, false, extent);
 		return (NULL);
 	}
 
@@ -677,18 +629,18 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 }
 
 extent_t *
-extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
-    void *new_addr, size_t usize, size_t pad, size_t alignment, bool *zero,
-    bool *commit, bool slab)
+extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
+    size_t alignment, bool *zero, bool *commit, bool slab)
 {
 	extent_t *extent;
 
-	extent_hooks_assure_initialized(tsdn, arena, extent_hooks);
+	extent_hooks_assure_initialized(arena, r_extent_hooks);
 
-	extent = extent_alloc_retained(tsdn, arena, extent_hooks, new_addr,
+	extent = extent_alloc_retained(tsdn, arena, r_extent_hooks, new_addr,
 	    usize, pad, alignment, zero, commit, slab);
 	if (extent == NULL) {
-		extent = extent_alloc_wrapper_hard(tsdn, arena, extent_hooks,
+		extent = extent_alloc_wrapper_hard(tsdn, arena, r_extent_hooks,
 		    new_addr, usize, pad, alignment, zero, commit, slab);
 	}
 
@@ -712,8 +664,9 @@ extent_can_coalesce(const extent_t *a, const extent_t *b)
 }
 
 static void
-extent_try_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
-    extent_t *a, extent_t *b, extent_heap_t extent_heaps[NPSIZES], bool cache)
+extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b,
+    extent_heap_t extent_heaps[NPSIZES], bool cache)
 {
 
 	if (!extent_can_coalesce(a, b))
@@ -725,7 +678,7 @@ extent_try_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 	arena_extent_cache_maybe_remove(extent_arena_get(a), a, cache);
 	arena_extent_cache_maybe_remove(extent_arena_get(b), b, cache);
 
-	if (extent_merge_wrapper(tsdn, arena, extent_hooks, a, b)) {
+	if (extent_merge_wrapper(tsdn, arena, r_extent_hooks, a, b)) {
 		extent_heaps_insert(extent_heaps, a);
 		extent_heaps_insert(extent_heaps, b);
 		arena_extent_cache_maybe_insert(extent_arena_get(a), a, cache);
@@ -738,7 +691,7 @@ extent_try_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 }
 
 static void
-extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
+extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
     extent_heap_t extent_heaps[NPSIZES], bool cache, extent_t *extent)
 {
 	extent_t *prev, *next;
@@ -748,7 +701,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 	assert(!cache || !extent_zeroed_get(extent));
 
 	malloc_mutex_lock(tsdn, &arena->extents_mtx);
-	extent_hooks_assure_initialized_locked(tsdn, arena, extent_hooks);
+	extent_hooks_assure_initialized(arena, r_extent_hooks);
 
 	extent_usize_set(extent, 0);
 	extent_active_set(extent, false);
@@ -766,7 +719,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 	next = rtree_read(tsdn, &extents_rtree, rtree_ctx,
 	    (uintptr_t)extent_past_get(extent), false);
 	if (next != NULL) {
-		extent_try_coalesce(tsdn, arena, extent_hooks, extent, next,
+		extent_try_coalesce(tsdn, arena, r_extent_hooks, extent, next,
 		    extent_heaps, cache);
 	}
 
@@ -774,7 +727,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 	prev = rtree_read(tsdn, &extents_rtree, rtree_ctx,
 	    (uintptr_t)extent_before_get(extent), false);
 	if (prev != NULL) {
-		extent_try_coalesce(tsdn, arena, extent_hooks, prev, extent,
+		extent_try_coalesce(tsdn, arena, r_extent_hooks, prev, extent,
 		    extent_heaps, cache);
 	}
 
@@ -782,8 +735,8 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 }
 
 void
-extent_dalloc_cache(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
-    extent_t *extent)
+extent_dalloc_cache(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *extent)
 {
 
 	assert(extent_base_get(extent) != NULL);
@@ -792,15 +745,17 @@ extent_dalloc_cache(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 	extent_addr_set(extent, extent_base_get(extent));
 	extent_zeroed_set(extent, false);
 
-	extent_record(tsdn, arena, extent_hooks, arena->extents_cached, true,
+	extent_record(tsdn, arena, r_extent_hooks, arena->extents_cached, true,
 	    extent);
 }
 
 static bool
-extent_dalloc_default(void *addr, size_t size, bool committed,
-    unsigned arena_ind)
+extent_dalloc_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    bool committed, unsigned arena_ind)
 {
 
+	assert(extent_hooks == &extent_hooks_default);
+
 	if (!have_dss || !extent_in_dss(tsdn_fetch(), addr))
 		return (extent_dalloc_mmap(addr, size));
 	return (true);
@@ -808,7 +763,7 @@ extent_dalloc_default(void *addr, size_t size, bool committed,
 
 void
 extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent)
+    extent_hooks_t **r_extent_hooks, extent_t *extent)
 {
 
 	assert(extent_base_get(extent) != NULL);
@@ -816,9 +771,9 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 
 	extent_addr_set(extent, extent_base_get(extent));
 
-	extent_hooks_assure_initialized(tsdn, arena, extent_hooks);
+	extent_hooks_assure_initialized(arena, r_extent_hooks);
 	/* Try to deallocate. */
-	if (!extent_hooks->dalloc(extent_base_get(extent),
+	if (!(*r_extent_hooks)->dalloc(*r_extent_hooks, extent_base_get(extent),
 	    extent_size_get(extent), extent_committed_get(extent),
 	    arena->ind)) {
 		extent_deregister(tsdn, extent);
@@ -828,66 +783,73 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 	/* Try to decommit; purge if that fails. */
 	if (extent_committed_get(extent)) {
 		extent_committed_set(extent,
-		    extent_hooks->decommit(extent_base_get(extent),
-		    extent_size_get(extent), 0, extent_size_get(extent),
-		    arena->ind));
+		    (*r_extent_hooks)->decommit(*r_extent_hooks,
+		    extent_base_get(extent), extent_size_get(extent), 0,
+		    extent_size_get(extent), arena->ind));
 	}
 	extent_zeroed_set(extent, !extent_committed_get(extent) ||
-	    !extent_hooks->purge(extent_base_get(extent),
+	    !(*r_extent_hooks)->purge(*r_extent_hooks, extent_base_get(extent),
 	    extent_size_get(extent), 0, extent_size_get(extent), arena->ind));
 
 	if (config_stats)
 		arena->stats.retained += extent_size_get(extent);
 
-	extent_record(tsdn, arena, extent_hooks, arena->extents_retained, false,
-	    extent);
+	extent_record(tsdn, arena, r_extent_hooks, arena->extents_retained,
+	    false, extent);
 }
 
 static bool
-extent_commit_default(void *addr, size_t size, size_t offset, size_t length,
-    unsigned arena_ind)
+extent_commit_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    size_t offset, size_t length, unsigned arena_ind)
 {
 
+	assert(extent_hooks == &extent_hooks_default);
+
 	return (pages_commit((void *)((uintptr_t)addr + (uintptr_t)offset),
 	    length));
 }
 
 bool
 extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
+    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
     size_t length)
 {
 
-	extent_hooks_assure_initialized(tsdn, arena, extent_hooks);
-	return (extent_hooks->commit(extent_base_get(extent),
-	    extent_size_get(extent), offset, length, arena->ind));
+	extent_hooks_assure_initialized(arena, r_extent_hooks);
+	return ((*r_extent_hooks)->commit(*r_extent_hooks,
+	    extent_base_get(extent), extent_size_get(extent), offset, length,
+	    arena->ind));
 }
 
 static bool
-extent_decommit_default(void *addr, size_t size, size_t offset, size_t length,
-    unsigned arena_ind)
+extent_decommit_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    size_t offset, size_t length, unsigned arena_ind)
 {
 
+	assert(extent_hooks == &extent_hooks_default);
+
 	return (pages_decommit((void *)((uintptr_t)addr + (uintptr_t)offset),
 	    length));
 }
 
 bool
 extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
+    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
     size_t length)
 {
 
-	extent_hooks_assure_initialized(tsdn, arena, extent_hooks);
-	return (extent_hooks->decommit(extent_base_get(extent),
-	    extent_size_get(extent), offset, length, arena->ind));
+	extent_hooks_assure_initialized(arena, r_extent_hooks);
+	return ((*r_extent_hooks)->decommit(*r_extent_hooks,
+	    extent_base_get(extent), extent_size_get(extent), offset, length,
+	    arena->ind));
 }
 
 static bool
-extent_purge_default(void *addr, size_t size, size_t offset, size_t length,
-    unsigned arena_ind)
+extent_purge_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    size_t offset, size_t length, unsigned arena_ind)
 {
 
+	assert(extent_hooks == &extent_hooks_default);
 	assert(addr != NULL);
 	assert((offset & PAGE_MASK) == 0);
 	assert(length != 0);
@@ -898,29 +860,33 @@ extent_purge_default(void *addr, size_t size, size_t offset, size_t length,
 }
 
 bool
-extent_purge_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
-    extent_t *extent, size_t offset, size_t length)
+extent_purge_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    size_t length)
 {
 
-	extent_hooks_assure_initialized(tsdn, arena, extent_hooks);
-	return (extent_hooks->purge(extent_base_get(extent),
-	    extent_size_get(extent), offset, length, arena->ind));
+	extent_hooks_assure_initialized(arena, r_extent_hooks);
+	return ((*r_extent_hooks)->purge(*r_extent_hooks,
+	    extent_base_get(extent), extent_size_get(extent), offset, length,
+	    arena->ind));
 }
 
 static bool
-extent_split_default(void *addr, size_t size, size_t size_a, size_t size_b,
-    bool committed, unsigned arena_ind)
+extent_split_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    size_t size_a, size_t size_b, bool committed, unsigned arena_ind)
 {
 
+	assert(extent_hooks == &extent_hooks_default);
+
 	if (!maps_coalesce)
 		return (true);
 	return (false);
 }
 
 extent_t *
-extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
-  extent_t *extent, size_t size_a, size_t usize_a, size_t size_b,
-  size_t usize_b)
+extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a,
+    size_t usize_a, size_t size_b, size_t usize_b)
 {
 	extent_t *trail;
 	rtree_ctx_t rtree_ctx_fallback;
@@ -929,7 +895,7 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 
 	assert(extent_size_get(extent) == size_a + size_b);
 
-	extent_hooks_assure_initialized(tsdn, arena, extent_hooks);
+	extent_hooks_assure_initialized(arena, r_extent_hooks);
 
 	trail = extent_alloc(tsdn, arena);
 	if (trail == NULL)
@@ -956,8 +922,9 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 	    &trail_elm_a, &trail_elm_b))
 		goto label_error_c;
 
-	if (extent_hooks->split(extent_base_get(extent), size_a + size_b,
-	    size_a, size_b, extent_committed_get(extent), arena->ind))
+	if ((*r_extent_hooks)->split(*r_extent_hooks, extent_base_get(extent),
+	    size_a + size_b, size_a, size_b, extent_committed_get(extent),
+	    arena->ind))
 		goto label_error_d;
 
 	extent_size_set(extent, size_a);
@@ -981,10 +948,12 @@ label_error_a:
 }
 
 static bool
-extent_merge_default(void *addr_a, size_t size_a, void *addr_b, size_t size_b,
-    bool committed, unsigned arena_ind)
+extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
+    void *addr_b, size_t size_b, bool committed, unsigned arena_ind)
 {
 
+	assert(extent_hooks == &extent_hooks_default);
+
 	if (!maps_coalesce)
 		return (true);
 	if (have_dss) {
@@ -997,17 +966,17 @@ extent_merge_default(void *addr_a, size_t size_a, void *addr_b, size_t size_b,
 }
 
 bool
-extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
-    extent_t *a, extent_t *b)
+extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b)
 {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	rtree_elm_t *a_elm_a, *a_elm_b, *b_elm_a, *b_elm_b;
 
-	extent_hooks_assure_initialized(tsdn, arena, extent_hooks);
-	if (extent_hooks->merge(extent_base_get(a), extent_size_get(a),
-	    extent_base_get(b), extent_size_get(b), extent_committed_get(a),
-	    arena->ind))
+	extent_hooks_assure_initialized(arena, r_extent_hooks);
+	if ((*r_extent_hooks)->merge(*r_extent_hooks, extent_base_get(a),
+	    extent_size_get(a), extent_base_get(b), extent_size_get(b),
+	    extent_committed_get(a), arena->ind))
 		return (true);
 
 	/*
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 0e34a440..9cc1d8f7 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -136,7 +136,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 				dss_max = dss_next;
 				malloc_mutex_unlock(tsdn, &dss_mtx);
 				if (pad_size != 0) {
-					extent_hooks_t extent_hooks =
+					extent_hooks_t *extent_hooks =
 					    EXTENT_HOOKS_INITIALIZER;
 					extent_dalloc_wrapper(tsdn, arena,
 					    &extent_hooks, pad);
diff --git a/src/large.c b/src/large.c
index 952d4644..5c7b4453 100644
--- a/src/large.c
+++ b/src/large.c
@@ -96,7 +96,7 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize)
 {
 	arena_t *arena = extent_arena_get(extent);
 	size_t oldusize = extent_usize_get(extent);
-	extent_hooks_t extent_hooks = extent_hooks_get(tsdn, arena);
+	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
 	size_t diff = extent_size_get(extent) - (usize + large_pad);
 
 	assert(oldusize > usize);
@@ -129,7 +129,7 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 	arena_t *arena = extent_arena_get(extent);
 	size_t oldusize = extent_usize_get(extent);
 	bool is_zeroed_trail = false;
-	extent_hooks_t extent_hooks = extent_hooks_get(tsdn, arena);
+	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
 	size_t trailsize = usize - extent_usize_get(extent);
 	extent_t *trail;
 
diff --git a/test/integration/extent.c b/test/integration/extent.c
index 15b96a00..78176711 100644
--- a/test/integration/extent.c
+++ b/test/integration/extent.c
@@ -4,8 +4,8 @@
 const char *malloc_conf = "junk:false";
 #endif
 
-static extent_hooks_t orig_hooks;
-static extent_hooks_t old_hooks;
+static extent_hooks_t *orig_hooks;
+static extent_hooks_t *old_hooks;
 
 static bool do_dalloc = true;
 static bool do_decommit;
@@ -24,96 +24,111 @@ static bool did_merge;
 #  define TRACE_HOOK(fmt, ...)
 #endif
 
-void *
-extent_alloc(void *new_addr, size_t size, size_t alignment, bool *zero,
-    bool *commit, unsigned arena_ind)
+static void *
+extent_alloc(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
+    size_t alignment, bool *zero, bool *commit, unsigned arena_ind)
 {
 
-	TRACE_HOOK("%s(new_addr=%p, size=%zu, alignment=%zu, *zero=%s, "
-	    "*commit=%s, arena_ind=%u)\n", __func__, new_addr, size, alignment,
-	    *zero ?  "true" : "false", *commit ? "true" : "false", arena_ind);
+	TRACE_HOOK("%s(extent_hooks=%p, new_addr=%p, size=%zu, alignment=%zu, "
+	    "*zero=%s, *commit=%s, arena_ind=%u)\n", __func__, extent_hooks,
+	    new_addr, size, alignment, *zero ?  "true" : "false", *commit ?
+	    "true" : "false", arena_ind);
+	assert(extent_hooks->alloc == extent_alloc);
 	did_alloc = true;
-	return (old_hooks.alloc(new_addr, size, alignment, zero, commit,
-	    arena_ind));
+	return (old_hooks->alloc(old_hooks, new_addr, size, alignment, zero,
+	    commit, arena_ind));
 }
 
-bool
-extent_dalloc(void *addr, size_t size, bool committed, unsigned arena_ind)
+static bool
+extent_dalloc(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    bool committed, unsigned arena_ind)
 {
 
-	TRACE_HOOK("%s(addr=%p, size=%zu, committed=%s, arena_ind=%u)\n",
-	    __func__, addr, size, committed ? "true" : "false", arena_ind);
+	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, committed=%s, "
+	    "arena_ind=%u)\n", __func__, extent_hooks, addr, size, committed ?
+	    "true" : "false", arena_ind);
+	assert(extent_hooks->dalloc == extent_dalloc);
 	did_dalloc = true;
 	if (!do_dalloc)
 		return (true);
-	return (old_hooks.dalloc(addr, size, committed, arena_ind));
+	return (old_hooks->dalloc(old_hooks, addr, size, committed, arena_ind));
 }
 
-bool
-extent_commit(void *addr, size_t size, size_t offset, size_t length,
-    unsigned arena_ind)
+static bool
+extent_commit(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    size_t offset, size_t length, unsigned arena_ind)
 {
 	bool err;
 
-	TRACE_HOOK("%s(addr=%p, size=%zu, offset=%zu, length=%zu, "
-	    "arena_ind=%u)\n", __func__, addr, size, offset, length,
+	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
+	    "length=%zu, arena_ind=%u)\n", __func__, extent_hooks, addr, size,
+	    offset, length, arena_ind);
+	assert(extent_hooks->commit == extent_commit);
+	err = old_hooks->commit(old_hooks, addr, size, offset, length,
 	    arena_ind);
-	err = old_hooks.commit(addr, size, offset, length, arena_ind);
 	did_commit = !err;
 	return (err);
 }
 
-bool
-extent_decommit(void *addr, size_t size, size_t offset, size_t length,
-    unsigned arena_ind)
+static bool
+extent_decommit(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    size_t offset, size_t length, unsigned arena_ind)
 {
 	bool err;
 
-	TRACE_HOOK("%s(addr=%p, size=%zu, offset=%zu, length=%zu, "
-	    "arena_ind=%u)\n", __func__, addr, size, offset, length,
-	    arena_ind);
+	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
+	    "length=%zu, arena_ind=%u)\n", __func__, extent_hooks, addr, size,
+	    offset, length, arena_ind);
+	assert(extent_hooks->decommit == extent_decommit);
 	if (!do_decommit)
 		return (true);
-	err = old_hooks.decommit(addr, size, offset, length, arena_ind);
+	err = old_hooks->decommit(old_hooks, addr, size, offset, length,
+	    arena_ind);
 	did_decommit = !err;
 	return (err);
 }
 
-bool
-extent_purge(void *addr, size_t size, size_t offset, size_t length,
-    unsigned arena_ind)
+static bool
+extent_purge(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    size_t offset, size_t length, unsigned arena_ind)
 {
 
-	TRACE_HOOK("%s(addr=%p, size=%zu, offset=%zu, length=%zu "
-	    "arena_ind=%u)\n", __func__, addr, size, offset, length,
-	    arena_ind);
+	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
+	    "length=%zu arena_ind=%u)\n", __func__, extent_hooks, addr, size,
+	    offset, length, arena_ind);
+	assert(extent_hooks->purge == extent_purge);
 	did_purge = true;
-	return (old_hooks.purge(addr, size, offset, length, arena_ind));
-}
-
-bool
-extent_split(void *addr, size_t size, size_t size_a, size_t size_b,
-    bool committed, unsigned arena_ind)
-{
-
-	TRACE_HOOK("%s(addr=%p, size=%zu, size_a=%zu, size_b=%zu, "
-	    "committed=%s, arena_ind=%u)\n", __func__, addr, size, size_a,
-	    size_b, committed ? "true" : "false", arena_ind);
-	did_split = true;
-	return (old_hooks.split(addr, size, size_a, size_b, committed,
+	return (old_hooks->purge(old_hooks, addr, size, offset, length,
 	    arena_ind));
 }
 
-bool
-extent_merge(void *addr_a, size_t size_a, void *addr_b, size_t size_b,
-    bool committed, unsigned arena_ind)
+static bool
+extent_split(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    size_t size_a, size_t size_b, bool committed, unsigned arena_ind)
 {
 
-	TRACE_HOOK("%s(addr_a=%p, size_a=%zu, addr_b=%p size_b=%zu, "
-	    "committed=%s, arena_ind=%u)\n", __func__, addr_a, size_a, addr_b,
-	    size_b, committed ? "true" : "false", arena_ind);
+	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, size_a=%zu, "
+	    "size_b=%zu, committed=%s, arena_ind=%u)\n", __func__, extent_hooks,
+	    addr, size, size_a, size_b, committed ? "true" : "false",
+	    arena_ind);
+	assert(extent_hooks->split == extent_split);
+	did_split = true;
+	return (old_hooks->split(old_hooks, addr, size, size_a, size_b,
+	    committed, arena_ind));
+}
+
+static bool
+extent_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
+    void *addr_b, size_t size_b, bool committed, unsigned arena_ind)
+{
+
+	TRACE_HOOK("%s(extent_hooks=%p, addr_a=%p, size_a=%zu, addr_b=%p "
+	    "size_b=%zu, committed=%s, arena_ind=%u)\n", __func__, extent_hooks,
+	    addr_a, size_a, addr_b, size_b, committed ? "true" : "false",
+	    arena_ind);
+	assert(extent_hooks->merge == extent_merge);
 	did_merge = true;
-	return (old_hooks.merge(addr_a, size_a, addr_b, size_b,
+	return (old_hooks->merge(old_hooks, addr_a, size_a, addr_b, size_b,
 	    committed, arena_ind));
 }
 
@@ -125,7 +140,7 @@ TEST_BEGIN(test_extent)
 	int flags;
 	size_t hooks_mib[3], purge_mib[3];
 	size_t hooks_miblen, purge_miblen;
-	extent_hooks_t new_hooks = {
+	extent_hooks_t hooks = {
 		extent_alloc,
 		extent_dalloc,
 		extent_commit,
@@ -134,6 +149,7 @@ TEST_BEGIN(test_extent)
 		extent_split,
 		extent_merge
 	};
+	extent_hooks_t *new_hooks = &hooks;
 	bool xallocx_success_a, xallocx_success_b, xallocx_success_c;
 
 	sz = sizeof(unsigned);
@@ -146,21 +162,21 @@ TEST_BEGIN(test_extent)
 	assert_d_eq(mallctlnametomib("arena.0.extent_hooks", hooks_mib,
 	    &hooks_miblen), 0, "Unexpected mallctlnametomib() failure");
 	hooks_mib[1] = (size_t)arena_ind;
-	old_size = sizeof(extent_hooks_t);
-	new_size = sizeof(extent_hooks_t);
+	old_size = sizeof(extent_hooks_t *);
+	new_size = sizeof(extent_hooks_t *);
 	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, &old_hooks, &old_size,
 	    &new_hooks, new_size), 0, "Unexpected extent_hooks error");
 	orig_hooks = old_hooks;
-	assert_ptr_ne(old_hooks.alloc, extent_alloc, "Unexpected alloc error");
-	assert_ptr_ne(old_hooks.dalloc, extent_dalloc,
+	assert_ptr_ne(old_hooks->alloc, extent_alloc, "Unexpected alloc error");
+	assert_ptr_ne(old_hooks->dalloc, extent_dalloc,
 	    "Unexpected dalloc error");
-	assert_ptr_ne(old_hooks.commit, extent_commit,
+	assert_ptr_ne(old_hooks->commit, extent_commit,
 	    "Unexpected commit error");
-	assert_ptr_ne(old_hooks.decommit, extent_decommit,
+	assert_ptr_ne(old_hooks->decommit, extent_decommit,
 	    "Unexpected decommit error");
-	assert_ptr_ne(old_hooks.purge, extent_purge, "Unexpected purge error");
-	assert_ptr_ne(old_hooks.split, extent_split, "Unexpected split error");
-	assert_ptr_ne(old_hooks.merge, extent_merge, "Unexpected merge error");
+	assert_ptr_ne(old_hooks->purge, extent_purge, "Unexpected purge error");
+	assert_ptr_ne(old_hooks->split, extent_split, "Unexpected split error");
+	assert_ptr_ne(old_hooks->merge, extent_merge, "Unexpected merge error");
 
 	/* Get large size classes. */
 	sz = sizeof(size_t);
@@ -228,19 +244,20 @@ TEST_BEGIN(test_extent)
 	    &old_hooks, new_size), 0, "Unexpected extent_hooks error");
 	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, &old_hooks, &old_size,
 	    NULL, 0), 0, "Unexpected extent_hooks error");
-	assert_ptr_eq(old_hooks.alloc, orig_hooks.alloc,
+	assert_ptr_eq(old_hooks, orig_hooks, "Unexpected hooks error");
+	assert_ptr_eq(old_hooks->alloc, orig_hooks->alloc,
 	    "Unexpected alloc error");
-	assert_ptr_eq(old_hooks.dalloc, orig_hooks.dalloc,
+	assert_ptr_eq(old_hooks->dalloc, orig_hooks->dalloc,
 	    "Unexpected dalloc error");
-	assert_ptr_eq(old_hooks.commit, orig_hooks.commit,
+	assert_ptr_eq(old_hooks->commit, orig_hooks->commit,
 	    "Unexpected commit error");
-	assert_ptr_eq(old_hooks.decommit, orig_hooks.decommit,
+	assert_ptr_eq(old_hooks->decommit, orig_hooks->decommit,
 	    "Unexpected decommit error");
-	assert_ptr_eq(old_hooks.purge, orig_hooks.purge,
+	assert_ptr_eq(old_hooks->purge, orig_hooks->purge,
 	    "Unexpected purge error");
-	assert_ptr_eq(old_hooks.split, orig_hooks.split,
+	assert_ptr_eq(old_hooks->split, orig_hooks->split,
 	    "Unexpected split error");
-	assert_ptr_eq(old_hooks.merge, orig_hooks.merge,
+	assert_ptr_eq(old_hooks->merge, orig_hooks->merge,
 	    "Unexpected merge error");
 }
 TEST_END

From 8835cf3bed1888fc0110b0c59dbf2ce1288a7a8c Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 3 Jun 2016 19:25:13 -0700
Subject: [PATCH 0310/2608] Fix locking order reversal in arena_reset().

---
 src/arena.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 32e1915c..7dcf12d5 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -917,20 +917,28 @@ arena_reset(tsd_t *tsd, arena_t *arena)
 
 	/* Bins. */
 	for (i = 0; i < NBINS; i++) {
-		extent_t *slab, *next;
+		extent_t *slab;
 		arena_bin_t *bin = &arena->bins[i];
 		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 		if (bin->slabcur != NULL) {
-			arena_slab_dalloc(tsd_tsdn(tsd), arena, bin->slabcur);
+			slab = bin->slabcur;
 			bin->slabcur = NULL;
+			malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
+			arena_slab_dalloc(tsd_tsdn(tsd), arena, slab);
+			malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 		}
 		while ((slab = extent_heap_remove_first(&bin->slabs_nonfull)) !=
-		    NULL)
+		    NULL) {
+			malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 			arena_slab_dalloc(tsd_tsdn(tsd), arena, slab);
+			malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
+		}
 		for (slab = qr_next(&bin->slabs_full, qr_link); slab !=
-		    &bin->slabs_full; slab = next) {
-			next = qr_next(slab, qr_link);
+		    &bin->slabs_full; slab = qr_next(&bin->slabs_full,
+		    qr_link)) {
+			malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 			arena_slab_dalloc(tsd_tsdn(tsd), arena, slab);
+			malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 		}
 		if (config_stats) {
 			bin->stats.curregs = 0;

From f02fec8839856fad3106f429f9316e844557e99f Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 3 Jun 2016 19:39:14 -0700
Subject: [PATCH 0311/2608] Silence a bogus compiler warning.

---
 src/ctl.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/ctl.c b/src/ctl.c
index 813d5fab..535f1eab 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1646,7 +1646,9 @@ arena_i_extent_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	if (arena_ind < narenas_total_get() && (arena =
 	    arena_get(tsd_tsdn(tsd), arena_ind, false)) != NULL) {
 		if (newp != NULL) {
-			extent_hooks_t *old_extent_hooks, *new_extent_hooks;
+			extent_hooks_t *old_extent_hooks;
+			extent_hooks_t *new_extent_hooks
+			    JEMALLOC_CC_SILENCE_INIT(NULL);
 			WRITE(new_extent_hooks, extent_hooks_t *);
 			old_extent_hooks = extent_hooks_set(arena,
 			    new_extent_hooks);

From 04942c3d9068647b2d4600b72ab6aaeb4ebf920c Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 3 Jun 2016 20:04:30 -0700
Subject: [PATCH 0312/2608] Remove a stray memset(), and fix a junk filling
 test regression.

---
 include/jemalloc/internal/large.h             |  3 +++
 include/jemalloc/internal/private_symbols.txt |  1 +
 src/large.c                                   | 13 ++++++++--
 test/unit/junk.c                              | 24 +++++++++++++++----
 4 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/large.h b/include/jemalloc/internal/large.h
index afaa6c3c..8345f89e 100644
--- a/include/jemalloc/internal/large.h
+++ b/include/jemalloc/internal/large.h
@@ -19,8 +19,11 @@ void	*large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 #ifdef JEMALLOC_JET
 typedef void (large_dalloc_junk_t)(void *, size_t);
 extern large_dalloc_junk_t *large_dalloc_junk;
+typedef void (large_dalloc_maybe_junk_t)(tsdn_t *, void *, size_t);
+extern large_dalloc_maybe_junk_t *large_dalloc_maybe_junk;
 #else
 void	large_dalloc_junk(void *ptr, size_t usize);
+void	large_dalloc_maybe_junk(tsdn_t *tsdn, void *ptr, size_t usize);
 #endif
 void	large_dalloc_junked_locked(tsdn_t *tsdn, extent_t *extent);
 void	large_dalloc(tsdn_t *tsdn, extent_t *extent);
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 07e7f287..d2c882d3 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -250,6 +250,7 @@ jemalloc_prefork
 large_dalloc
 large_dalloc_junk
 large_dalloc_junked_locked
+large_dalloc_maybe_junk
 large_malloc
 large_palloc
 large_prof_tctx_get
diff --git a/src/large.c b/src/large.c
index 5c7b4453..325b5f10 100644
--- a/src/large.c
+++ b/src/large.c
@@ -76,7 +76,11 @@ large_dalloc_junk(void *ptr, size_t usize)
 large_dalloc_junk_t *large_dalloc_junk = JEMALLOC_N(n_large_dalloc_junk);
 #endif
 
-static void
+#ifdef JEMALLOC_JET
+#undef large_dalloc_maybe_junk
+#define	large_dalloc_maybe_junk JEMALLOC_N(n_large_dalloc_maybe_junk)
+#endif
+void
 large_dalloc_maybe_junk(tsdn_t *tsdn, void *ptr, size_t usize)
 {
 
@@ -87,9 +91,14 @@ large_dalloc_maybe_junk(tsdn_t *tsdn, void *ptr, size_t usize)
 		 */
 		if (!config_munmap || (have_dss && extent_in_dss(tsdn, ptr)))
 			large_dalloc_junk(ptr, usize);
-			memset(ptr, JEMALLOC_FREE_JUNK, usize);
 	}
 }
+#ifdef JEMALLOC_JET
+#undef large_dalloc_maybe_junk
+#define	large_dalloc_maybe_junk JEMALLOC_N(large_dalloc_maybe_junk)
+large_dalloc_maybe_junk_t *large_dalloc_maybe_junk =
+    JEMALLOC_N(n_large_dalloc_maybe_junk);
+#endif
 
 static bool
 large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize)
diff --git a/test/unit/junk.c b/test/unit/junk.c
index 7a923509..dea0f615 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -10,6 +10,7 @@ const char *malloc_conf =
 
 static arena_dalloc_junk_small_t *arena_dalloc_junk_small_orig;
 static large_dalloc_junk_t *large_dalloc_junk_orig;
+static large_dalloc_maybe_junk_t *large_dalloc_maybe_junk_orig;
 static void *watch_for_junking;
 static bool saw_junking;
 
@@ -39,13 +40,23 @@ arena_dalloc_junk_small_intercept(void *ptr, const arena_bin_info_t *bin_info)
 static void
 large_dalloc_junk_intercept(void *ptr, size_t usize)
 {
+	size_t i;
 
 	large_dalloc_junk_orig(ptr, usize);
-	/*
-	 * The conditions under which junk filling actually occurs are nuanced
-	 * enough that it doesn't make sense to duplicate the decision logic in
-	 * test code, so don't actually check that the region is junk-filled.
-	 */
+	for (i = 0; i < usize; i++) {
+		assert_u_eq(((uint8_t *)ptr)[i], JEMALLOC_FREE_JUNK,
+		    "Missing junk fill for byte %zu/%zu of deallocated region",
+		    i, usize);
+	}
+	if (ptr == watch_for_junking)
+		saw_junking = true;
+}
+
+static void
+large_dalloc_maybe_junk_intercept(tsdn_t *tsdn, void *ptr, size_t usize)
+{
+
+	large_dalloc_maybe_junk_orig(tsdn, ptr, usize);
 	if (ptr == watch_for_junking)
 		saw_junking = true;
 }
@@ -61,6 +72,8 @@ test_junk(size_t sz_min, size_t sz_max)
 		arena_dalloc_junk_small = arena_dalloc_junk_small_intercept;
 		large_dalloc_junk_orig = large_dalloc_junk;
 		large_dalloc_junk = large_dalloc_junk_intercept;
+		large_dalloc_maybe_junk_orig = large_dalloc_maybe_junk;
+		large_dalloc_maybe_junk = large_dalloc_maybe_junk_intercept;
 	}
 
 	sz_prev = 0;
@@ -111,6 +124,7 @@ test_junk(size_t sz_min, size_t sz_max)
 	if (opt_junk_free) {
 		arena_dalloc_junk_small = arena_dalloc_junk_small_orig;
 		large_dalloc_junk = large_dalloc_junk_orig;
+		large_dalloc_maybe_junk = large_dalloc_maybe_junk_orig;
 	}
 }
 

From 42faa9e3e0b4a9347c46153356163bd921c6e90c Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 5 Jun 2016 14:01:32 -0700
Subject: [PATCH 0313/2608] Work around legitimate xallocx() failures during
 testing.

With the removal of subchunk size class infrastructure, there are no
large size classes that are guaranteed to be re-expandable in place
unless munmap() is disabled.  Work around these legitimate failures with
rallocx() fallback calls.  If there were no test configuration for which
the xallocx() calls succeeded, it would be important to override the
extent hooks for testing purposes, but by default these tests don't use
the rallocx() fallbacks on Linux, so test coverage is still sufficient.
---
 test/integration/xallocx.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/test/integration/xallocx.c b/test/integration/xallocx.c
index 4ff099f8..72db818a 100644
--- a/test/integration/xallocx.c
+++ b/test/integration/xallocx.c
@@ -249,8 +249,10 @@ TEST_BEGIN(test_extra_large)
 	assert_zu_ge(xallocx(p, smallmax, 0, flags), large1,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_eq(xallocx(p, large3, 0, flags), large3,
-	    "Unexpected xallocx() behavior");
+	if (xallocx(p, large3, 0, flags) != large3) {
+		p = rallocx(p, large3, flags);
+		assert_ptr_not_null(p, "Unexpected rallocx() failure");
+	}
 	/* Test size decrease with non-zero extra. */
 	assert_zu_eq(xallocx(p, large1, large3 - large1, flags), large3,
 	    "Unexpected xallocx() behavior");
@@ -281,8 +283,10 @@ TEST_BEGIN(test_extra_large)
 	assert_zu_le(xallocx(p, large1, large3 - large1, flags), large3,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_eq(xallocx(p, large3, 0, flags), large3,
-	    "Unexpected xallocx() behavior");
+	if (xallocx(p, large3, 0, flags) != large3) {
+		p = rallocx(p, large3, flags);
+		assert_ptr_not_null(p, "Unexpected rallocx() failure");
+	}
 	/* Test size+extra overflow. */
 	assert_zu_le(xallocx(p, large3, largemax - large3 + 1, flags), largemax,
 	    "Unexpected xallocx() behavior");
@@ -360,8 +364,10 @@ test_zero(size_t szmin, size_t szmax)
 
 	for (sz = szmin; sz < szmax; sz = nsz) {
 		nsz = nallocx(sz+1, flags);
-		assert_zu_eq(xallocx(p, sz+1, 0, flags), nsz,
-		    "Unexpected xallocx() failure");
+		if (xallocx(p, sz+1, 0, flags) != nsz) {
+			p = rallocx(p, sz+1, flags);
+			assert_ptr_not_null(p, "Unexpected rallocx() failure");
+		}
 		assert_false(validate_fill(p, FILL_BYTE, 0, sz),
 		    "Memory not filled: sz=%zu", sz);
 		assert_false(validate_fill(p, 0x00, sz, nsz-sz),

From c4bb17f891768cb57d4559d9ffb53c304448dcdc Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 5 Jun 2016 14:43:20 -0700
Subject: [PATCH 0314/2608] Fix gdump triggering regression.

Now that extents are not multiples of chunksize, it's necessary to track
pages rather than chunks.
---
 src/extent.c | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index 0c66e31a..0ea10fb9 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -35,8 +35,8 @@ const extent_hooks_t	extent_hooks_default = {
 };
 
 /* Used exclusively for gdump triggering. */
-static size_t	curchunks;
-static size_t	highchunks;
+static size_t	curpages;
+static size_t	highpages;
 
 /******************************************************************************/
 /*
@@ -281,16 +281,15 @@ extent_register(tsdn_t *tsdn, const extent_t *extent)
 	extent_rtree_release(tsdn, elm_a, elm_b);
 
 	if (config_prof && opt_prof && extent_active_get(extent)) {
-		size_t nadd = (extent_size_get(extent) == 0) ? 1 :
-		    extent_size_get(extent) / chunksize;
-		size_t cur = atomic_add_z(&curchunks, nadd);
-		size_t high = atomic_read_z(&highchunks);
-		while (cur > high && atomic_cas_z(&highchunks, high, cur)) {
+		size_t nadd = extent_size_get(extent) >> LG_PAGE;
+		size_t cur = atomic_add_z(&curpages, nadd);
+		size_t high = atomic_read_z(&highpages);
+		while (cur > high && atomic_cas_z(&highpages, high, cur)) {
 			/*
 			 * Don't refresh cur, because it may have decreased
-			 * since this thread lost the highchunks update race.
+			 * since this thread lost the highpages update race.
 			 */
-			high = atomic_read_z(&highchunks);
+			high = atomic_read_z(&highpages);
 		}
 		if (cur > high && prof_gdump_get_unlocked())
 			prof_gdump(tsdn);
@@ -329,10 +328,9 @@ extent_deregister(tsdn_t *tsdn, const extent_t *extent)
 	extent_rtree_release(tsdn, elm_a, elm_b);
 
 	if (config_prof && opt_prof && extent_active_get(extent)) {
-		size_t nsub = (extent_size_get(extent) == 0) ? 1 :
-		    extent_size_get(extent) / chunksize;
-		assert(atomic_read_z(&curchunks) >= nsub);
-		atomic_sub_z(&curchunks, nsub);
+		size_t nsub = extent_size_get(extent) >> LG_PAGE;
+		assert(atomic_read_z(&curpages) >= nsub);
+		atomic_sub_z(&curpages, nsub);
 	}
 }
 

From 4e910fc958a9df0b05ff91666c6a8cf6c76b0e76 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 5 Jun 2016 15:27:20 -0700
Subject: [PATCH 0315/2608] Fix extent_alloc_dss() regressions.

Page-align the gap, if any, and add/use extent_dalloc_gap(), which
registers the gap extent before deallocation.
---
 include/jemalloc/internal/extent.h            |  1 +
 include/jemalloc/internal/private_symbols.txt |  1 +
 src/extent.c                                  | 12 ++++++
 src/extent_dss.c                              | 41 +++++++++----------
 4 files changed, 33 insertions(+), 22 deletions(-)

diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 6e155206..d7944c1c 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -105,6 +105,7 @@ extent_t	*extent_alloc_cache(tsdn_t *tsdn, arena_t *arena,
 extent_t	*extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
     size_t alignment, bool *zero, bool *commit, bool slab);
+void	extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
 void	extent_dalloc_cache(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent);
 void	extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index d2c882d3..a489e14a 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -158,6 +158,7 @@ extent_committed_get
 extent_committed_set
 extent_dalloc
 extent_dalloc_cache
+extent_dalloc_gap
 extent_dalloc_mmap
 extent_dalloc_wrapper
 extent_decommit_wrapper
diff --git a/src/extent.c b/src/extent.c
index 0ea10fb9..838cb73b 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -732,6 +732,18 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	malloc_mutex_unlock(tsdn, &arena->extents_mtx);
 }
 
+void
+extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent)
+{
+	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
+
+	if (extent_register(tsdn, extent)) {
+		extent_leak(tsdn, arena, &extent_hooks, false, extent);
+		return;
+	}
+	extent_dalloc_wrapper(tsdn, arena, &extent_hooks, extent);
+}
+
 void
 extent_dalloc_cache(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent)
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 9cc1d8f7..9c5cd25a 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -70,7 +70,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit)
 {
 	void *ret;
-	extent_t *pad;
+	extent_t *gap;
 
 	cassert(have_dss);
 	assert(size > 0);
@@ -83,8 +83,8 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 	if ((intptr_t)size < 0)
 		return (NULL);
 
-	pad = extent_alloc(tsdn, arena);
-	if (pad == NULL)
+	gap = extent_alloc(tsdn, arena);
+	if (gap == NULL)
 		return (NULL);
 
 	malloc_mutex_lock(tsdn, &dss_mtx);
@@ -95,8 +95,8 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 		 * malloc.
 		 */
 		while (true) {
-			void *pad_addr, *dss_next;
-			size_t pad_size;
+			void *gap_addr, *dss_next;
+			size_t gap_size;
 			intptr_t incr;
 
 			/* Avoid an unnecessary system call. */
@@ -111,23 +111,23 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 				break;
 
 			/*
-			 * Compute how much pad space (if any) is necessary to
+			 * Compute how much gap space (if any) is necessary to
 			 * satisfy alignment.  This space can be recycled for
 			 * later use.
 			 */
-			pad_addr = (void *)((uintptr_t)dss_max);
-			ret = (void *)ALIGNMENT_CEILING((uintptr_t)dss_max,
-			    alignment);
-			pad_size = (uintptr_t)ret - (uintptr_t)pad_addr;
-			if (pad_size != 0) {
-				extent_init(pad, arena, pad_addr, pad_size,
-				    pad_size, false, false, true, false);
+			gap_addr = (void *)(PAGE_CEILING((uintptr_t)dss_max));
+			ret = (void *)ALIGNMENT_CEILING((uintptr_t)gap_addr,
+			    PAGE_CEILING(alignment));
+			gap_size = (uintptr_t)ret - (uintptr_t)gap_addr;
+			if (gap_size != 0) {
+				extent_init(gap, arena, gap_addr, gap_size,
+				    gap_size, false, false, true, false);
 			}
 			dss_next = (void *)((uintptr_t)ret + size);
 			if ((uintptr_t)ret < (uintptr_t)dss_max ||
 			    (uintptr_t)dss_next < (uintptr_t)dss_max)
 				break; /* Wrap-around. */
-			incr = pad_size + size;
+			incr = gap_size + size;
 			dss_prev = extent_dss_sbrk(incr);
 			if (dss_prev == (void *)-1)
 				break;
@@ -135,13 +135,10 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 				/* Success. */
 				dss_max = dss_next;
 				malloc_mutex_unlock(tsdn, &dss_mtx);
-				if (pad_size != 0) {
-					extent_hooks_t *extent_hooks =
-					    EXTENT_HOOKS_INITIALIZER;
-					extent_dalloc_wrapper(tsdn, arena,
-					    &extent_hooks, pad);
-				} else
-					extent_dalloc(tsdn, arena, pad);
+				if (gap_size != 0)
+					extent_dalloc_gap(tsdn, arena, gap);
+				else
+					extent_dalloc(tsdn, arena, gap);
 				if (*zero)
 					memset(ret, 0, size);
 				if (!*commit)
@@ -152,7 +149,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 	}
 	/* OOM. */
 	malloc_mutex_unlock(tsdn, &dss_mtx);
-	extent_dalloc(tsdn, arena, pad);
+	extent_dalloc(tsdn, arena, gap);
 	return (NULL);
 }
 

From 9a645c612f1d80e234915de4211816504f4d0869 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 5 Jun 2016 20:39:25 -0700
Subject: [PATCH 0316/2608] Fix an extent [de]allocation/[de]registration race.

Deregister extents before deallocation, so that subsequent
reallocation/registration doesn't race with deregistration.
---
 src/extent.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index 838cb73b..824b08d4 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -298,6 +298,13 @@ extent_register(tsdn_t *tsdn, const extent_t *extent)
 	return (false);
 }
 
+static void
+extent_reregister(tsdn_t *tsdn, const extent_t *extent)
+{
+	bool err = extent_register(tsdn, extent);
+	assert(!err);
+}
+
 static void
 extent_interior_deregister(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
     const extent_t *extent)
@@ -314,7 +321,7 @@ extent_interior_deregister(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
 }
 
 static void
-extent_deregister(tsdn_t *tsdn, const extent_t *extent)
+extent_deregister(tsdn_t *tsdn, extent_t *extent)
 {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
@@ -323,8 +330,10 @@ extent_deregister(tsdn_t *tsdn, const extent_t *extent)
 	extent_rtree_acquire(tsdn, rtree_ctx, extent, true, false, &elm_a,
 	    &elm_b);
 	extent_rtree_write_acquired(tsdn, elm_a, elm_b, NULL);
-	if (extent_slab_get(extent))
+	if (extent_slab_get(extent)) {
 		extent_interior_deregister(tsdn, rtree_ctx, extent);
+		extent_slab_set(extent, false);
+	}
 	extent_rtree_release(tsdn, elm_a, elm_b);
 
 	if (config_prof && opt_prof && extent_active_get(extent)) {
@@ -782,14 +791,18 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 	extent_addr_set(extent, extent_base_get(extent));
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
-	/* Try to deallocate. */
+	/*
+	 * Try to deallocate.  Deregister first to avoid a race with other
+	 * allocating threads, and reregister if deallocation fails.
+	 */
+	extent_deregister(tsdn, extent);
 	if (!(*r_extent_hooks)->dalloc(*r_extent_hooks, extent_base_get(extent),
 	    extent_size_get(extent), extent_committed_get(extent),
 	    arena->ind)) {
-		extent_deregister(tsdn, extent);
 		extent_dalloc(tsdn, arena, extent);
 		return;
 	}
+	extent_reregister(tsdn, extent);
 	/* Try to decommit; purge if that fails. */
 	if (extent_committed_get(extent)) {
 		extent_committed_set(extent,

From 487093d999c7d45bee8047604fda1b5ba0f3f382 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 5 Jun 2016 22:08:20 -0700
Subject: [PATCH 0317/2608] Fix regressions related extent splitting failures.

Fix a fundamental extent_split_wrapper() bug in an error path.

Fix extent_recycle() to deregister unsplittable extents before leaking
them.

Relax xallocx() test assertions so that unsplittable extents don't cause
test failures.
---
 src/extent.c               | 4 +++-
 test/integration/xallocx.c | 8 +++++---
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index 824b08d4..841afa42 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -441,6 +441,7 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		    lead, leadsize, leadsize, size + trailsize, usize +
 		    trailsize);
 		if (extent == NULL) {
+			extent_deregister(tsdn, lead);
 			extent_leak(tsdn, arena, r_extent_hooks, cache, lead);
 			malloc_mutex_unlock(tsdn, &arena->extents_mtx);
 			return (NULL);
@@ -454,6 +455,7 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		extent_t *trail = extent_split_wrapper(tsdn, arena,
 		    r_extent_hooks, extent, size, usize, trailsize, trailsize);
 		if (trail == NULL) {
+			extent_deregister(tsdn, extent);
 			extent_leak(tsdn, arena, r_extent_hooks, cache,
 			    extent);
 			malloc_mutex_unlock(tsdn, &arena->extents_mtx);
@@ -961,7 +963,7 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
 
 	return (trail);
 label_error_d:
-	extent_rtree_release(tsdn, lead_elm_a, lead_elm_b);
+	extent_rtree_release(tsdn, trail_elm_a, trail_elm_b);
 label_error_c:
 	extent_rtree_release(tsdn, lead_elm_a, lead_elm_b);
 label_error_b:
diff --git a/test/integration/xallocx.c b/test/integration/xallocx.c
index 72db818a..4dcf08da 100644
--- a/test/integration/xallocx.c
+++ b/test/integration/xallocx.c
@@ -258,7 +258,7 @@ TEST_BEGIN(test_extra_large)
 	    "Unexpected xallocx() behavior");
 	assert_zu_eq(xallocx(p, large2, large3 - large2, flags), large3,
 	    "Unexpected xallocx() behavior");
-	assert_zu_eq(xallocx(p, large1, large2 - large1, flags), large2,
+	assert_zu_ge(xallocx(p, large1, large2 - large1, flags), large2,
 	    "Unexpected xallocx() behavior");
 	assert_zu_ge(xallocx(p, smallmax, large1 - smallmax, flags), large1,
 	    "Unexpected xallocx() behavior");
@@ -357,8 +357,10 @@ test_zero(size_t szmin, size_t szmax)
 
 	/* Shrink in place so that we can expect growing in place to succeed. */
 	sz = szmin;
-	assert_zu_eq(xallocx(p, sz, 0, flags), sz,
-	    "Unexpected xallocx() error");
+	if (xallocx(p, sz, 0, flags) != sz) {
+		p = rallocx(p, sz, flags);
+		assert_ptr_not_null(p, "Unexpected rallocx() failure");
+	}
 	assert_false(validate_fill(p, FILL_BYTE, 0, sz),
 	    "Memory not filled: sz=%zu", sz);
 

From 0c5cec833fb2b075222ee19a41c03f32178d3be1 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 5 Jun 2016 22:30:31 -0700
Subject: [PATCH 0318/2608] Relax extent hook tests to work with unsplittable
 extents.

---
 test/integration/extent.c | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/test/integration/extent.c b/test/integration/extent.c
index 78176711..10de8bbf 100644
--- a/test/integration/extent.c
+++ b/test/integration/extent.c
@@ -15,6 +15,7 @@ static bool did_dalloc;
 static bool did_commit;
 static bool did_decommit;
 static bool did_purge;
+static bool tried_split;
 static bool did_split;
 static bool did_merge;
 
@@ -106,30 +107,35 @@ static bool
 extent_split(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t size_a, size_t size_b, bool committed, unsigned arena_ind)
 {
+	bool err;
 
 	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, size_a=%zu, "
 	    "size_b=%zu, committed=%s, arena_ind=%u)\n", __func__, extent_hooks,
 	    addr, size, size_a, size_b, committed ? "true" : "false",
 	    arena_ind);
 	assert(extent_hooks->split == extent_split);
-	did_split = true;
-	return (old_hooks->split(old_hooks, addr, size, size_a, size_b,
-	    committed, arena_ind));
+	tried_split = true;
+	err = old_hooks->split(old_hooks, addr, size, size_a, size_b, committed,
+	    arena_ind);
+	did_split = !err;
+	return (err);
 }
 
 static bool
 extent_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
     void *addr_b, size_t size_b, bool committed, unsigned arena_ind)
 {
+	bool err;
 
 	TRACE_HOOK("%s(extent_hooks=%p, addr_a=%p, size_a=%zu, addr_b=%p "
 	    "size_b=%zu, committed=%s, arena_ind=%u)\n", __func__, extent_hooks,
 	    addr_a, size_a, addr_b, size_b, committed ? "true" : "false",
 	    arena_ind);
 	assert(extent_hooks->merge == extent_merge);
-	did_merge = true;
-	return (old_hooks->merge(old_hooks, addr_a, size_a, addr_b, size_b,
-	    committed, arena_ind));
+	err = old_hooks->merge(old_hooks, addr_a, size_a, addr_b, size_b,
+	    committed, arena_ind);
+	did_merge = !err;
+	return (err);
 }
 
 TEST_BEGIN(test_extent)
@@ -199,6 +205,7 @@ TEST_BEGIN(test_extent)
 	did_dalloc = false;
 	did_decommit = false;
 	did_purge = false;
+	tried_split = false;
 	did_split = false;
 	xallocx_success_a = (xallocx(p, large0, 0, flags) == large0);
 	assert_d_eq(mallctlbymib(purge_mib, purge_miblen, NULL, NULL, NULL, 0),
@@ -208,7 +215,7 @@ TEST_BEGIN(test_extent)
 		assert_false(did_decommit, "Unexpected decommit");
 		assert_true(did_purge, "Expected purge");
 	}
-	assert_true(did_split, "Expected split");
+	assert_true(tried_split, "Expected split");
 	dallocx(p, flags);
 	do_dalloc = true;
 
@@ -219,6 +226,7 @@ TEST_BEGIN(test_extent)
 	assert_ptr_not_null(p, "Unexpected mallocx() error");
 	did_decommit = false;
 	did_commit = false;
+	tried_split = false;
 	did_split = false;
 	did_merge = false;
 	xallocx_success_b = (xallocx(p, large0, 0, flags) == large0);
@@ -227,7 +235,10 @@ TEST_BEGIN(test_extent)
 	if (xallocx_success_b)
 		assert_true(did_split, "Expected split");
 	xallocx_success_c = (xallocx(p, large0 * 2, 0, flags) == large0 * 2);
-	assert_b_eq(did_decommit, did_commit, "Expected decommit/commit match");
+	if (did_split) {
+		assert_b_eq(did_decommit, did_commit,
+		    "Expected decommit/commit match");
+	}
 	if (xallocx_success_b && xallocx_success_c)
 		assert_true(did_merge, "Expected merge");
 	dallocx(p, flags);

From 10b9087b14d96e6d7f2d14788668bdc346c383ad Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 5 Jun 2016 23:24:52 -0700
Subject: [PATCH 0319/2608] Set 'committed' in extent_[de]commit_wrapper().

---
 src/extent.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index 841afa42..51a47644 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -807,10 +807,8 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 	extent_reregister(tsdn, extent);
 	/* Try to decommit; purge if that fails. */
 	if (extent_committed_get(extent)) {
-		extent_committed_set(extent,
-		    (*r_extent_hooks)->decommit(*r_extent_hooks,
-		    extent_base_get(extent), extent_size_get(extent), 0,
-		    extent_size_get(extent), arena->ind));
+		extent_decommit_wrapper(tsdn, arena, r_extent_hooks, extent,
+		    0, extent_size_get(extent));
 	}
 	extent_zeroed_set(extent, !extent_committed_get(extent) ||
 	    !(*r_extent_hooks)->purge(*r_extent_hooks, extent_base_get(extent),
@@ -839,11 +837,14 @@ extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
     size_t length)
 {
+	bool err;
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
-	return ((*r_extent_hooks)->commit(*r_extent_hooks,
+	err = (*r_extent_hooks)->commit(*r_extent_hooks,
 	    extent_base_get(extent), extent_size_get(extent), offset, length,
-	    arena->ind));
+	    arena->ind);
+	extent_committed_set(extent, extent_committed_get(extent) || !err);
+	return (err);
 }
 
 static bool
@@ -862,11 +863,15 @@ extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
     size_t length)
 {
+	bool err;
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
-	return ((*r_extent_hooks)->decommit(*r_extent_hooks,
+
+	err = (*r_extent_hooks)->decommit(*r_extent_hooks,
 	    extent_base_get(extent), extent_size_get(extent), offset, length,
-	    arena->ind));
+	    arena->ind);
+	extent_committed_set(extent, extent_committed_get(extent) && err);
+	return (err);
 }
 
 static bool

From 02a475d89aad1a7f94b3a102923a6527e05ca055 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 6 Jun 2016 15:32:01 -0700
Subject: [PATCH 0320/2608] Use extent_commit_wrapper() rather than directly
 calling commit hook.

As a side effect this causes the extent's 'committed' flag to be
updated.
---
 src/extent.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index 51a47644..32f34887 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -471,9 +471,8 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		extent_usize_set(extent, usize);
 	}
 
-	if (!extent_committed_get(extent) &&
-	    (*r_extent_hooks)->commit(*r_extent_hooks, extent_base_get(extent),
-	    extent_size_get(extent), 0, extent_size_get(extent), arena->ind)) {
+	if (!extent_committed_get(extent) && extent_commit_wrapper(tsdn, arena,
+	    r_extent_hooks, extent, 0, extent_size_get(extent))) {
 		malloc_mutex_unlock(tsdn, &arena->extents_mtx);
 		extent_record(tsdn, arena, r_extent_hooks, extent_heaps, cache,
 		    extent);

From cc289f40b6e7b119566abf8a1c09c97e08597a3d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 7 Jun 2016 13:37:22 -0700
Subject: [PATCH 0321/2608] Propagate tsdn to default extent hooks.

This avoids bootstrapping issues for configurations that require
allocation during tsd initialization.

This resolves #390.
---
 src/extent.c | 103 ++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 78 insertions(+), 25 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index 32f34887..cb67a27c 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -560,11 +560,24 @@ extent_alloc_cache(tsdn_t *tsdn, arena_t *arena,
 	return (extent);
 }
 
+static void *
+extent_alloc_default_impl(tsdn_t *tsdn, arena_t *arena, void *new_addr,
+    size_t size, size_t alignment, bool *zero, bool *commit)
+{
+	void *ret;
+
+	ret = extent_alloc_core(tsdn, arena, new_addr, size, alignment, zero,
+	    commit, arena->dss_prec);
+	if (ret == NULL)
+		return (NULL);
+
+	return (ret);
+}
+
 static void *
 extent_alloc_default(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit, unsigned arena_ind)
 {
-	void *ret;
 	tsdn_t *tsdn;
 	arena_t *arena;
 
@@ -577,12 +590,9 @@ extent_alloc_default(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
 	 * already.
 	 */
 	assert(arena != NULL);
-	ret = extent_alloc_core(tsdn, arena, new_addr, size, alignment, zero,
-	    commit, arena->dss_prec);
-	if (ret == NULL)
-		return (NULL);
 
-	return (ret);
+	return (extent_alloc_default_impl(tsdn, arena, new_addr, size,
+	    alignment, zero, commit));
 }
 
 static extent_t *
@@ -619,8 +629,14 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 	extent = extent_alloc(tsdn, arena);
 	if (extent == NULL)
 		return (NULL);
-	addr = (*r_extent_hooks)->alloc(*r_extent_hooks, new_addr, size,
-	    alignment, zero, commit, arena->ind);
+	if (*r_extent_hooks == &extent_hooks_default) {
+		/* Call directly to propagate tsdn. */
+		addr = extent_alloc_default_impl(tsdn, arena, new_addr, size,
+		    alignment, zero, commit);
+	} else {
+		addr = (*r_extent_hooks)->alloc(*r_extent_hooks, new_addr, size,
+		    alignment, zero, commit, arena->ind);
+	}
 	if (addr == NULL) {
 		extent_dalloc(tsdn, arena, extent);
 		return (NULL);
@@ -769,22 +785,34 @@ extent_dalloc_cache(tsdn_t *tsdn, arena_t *arena,
 	    extent);
 }
 
+static bool
+extent_dalloc_default_impl(tsdn_t *tsdn, void *addr, size_t size)
+{
+
+	if (!have_dss || !extent_in_dss(tsdn, addr))
+		return (extent_dalloc_mmap(addr, size));
+	return (true);
+}
+
+
 static bool
 extent_dalloc_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
     bool committed, unsigned arena_ind)
 {
+	tsdn_t *tsdn;
 
 	assert(extent_hooks == &extent_hooks_default);
 
-	if (!have_dss || !extent_in_dss(tsdn_fetch(), addr))
-		return (extent_dalloc_mmap(addr, size));
-	return (true);
+	tsdn = tsdn_fetch();
+
+	return (extent_dalloc_default_impl(tsdn, addr, size));
 }
 
 void
 extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent)
 {
+	bool err;
 
 	assert(extent_base_get(extent) != NULL);
 	assert(extent_size_get(extent) != 0);
@@ -797,9 +825,17 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 	 * allocating threads, and reregister if deallocation fails.
 	 */
 	extent_deregister(tsdn, extent);
-	if (!(*r_extent_hooks)->dalloc(*r_extent_hooks, extent_base_get(extent),
-	    extent_size_get(extent), extent_committed_get(extent),
-	    arena->ind)) {
+	if (*r_extent_hooks == &extent_hooks_default) {
+		/* Call directly to propagate tsdn. */
+		err = extent_dalloc_default_impl(tsdn, extent_base_get(extent),
+		    extent_size_get(extent));
+	} else {
+		err = (*r_extent_hooks)->dalloc(*r_extent_hooks,
+		    extent_base_get(extent), extent_size_get(extent),
+		    extent_committed_get(extent), arena->ind);
+	}
+
+	if (!err) {
 		extent_dalloc(tsdn, arena, extent);
 		return;
 	}
@@ -977,35 +1013,52 @@ label_error_a:
 }
 
 static bool
-extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
-    void *addr_b, size_t size_b, bool committed, unsigned arena_ind)
+extent_merge_default_impl(tsdn_t *tsdn, void *addr_a, void *addr_b)
 {
 
-	assert(extent_hooks == &extent_hooks_default);
-
 	if (!maps_coalesce)
 		return (true);
-	if (have_dss) {
-		tsdn_t *tsdn = tsdn_fetch();
-		if (extent_in_dss(tsdn, addr_a) != extent_in_dss(tsdn, addr_b))
+	if (have_dss && extent_in_dss(tsdn, addr_a) != extent_in_dss(tsdn,
+	    addr_b))
 			return (true);
-	}
 
 	return (false);
 }
 
+static bool
+extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
+    void *addr_b, size_t size_b, bool committed, unsigned arena_ind)
+{
+	tsdn_t *tsdn;
+
+	assert(extent_hooks == &extent_hooks_default);
+
+	tsdn = tsdn_fetch();
+
+	return (extent_merge_default_impl(tsdn, addr_a, addr_b));
+}
+
 bool
 extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b)
 {
+	bool err;
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	rtree_elm_t *a_elm_a, *a_elm_b, *b_elm_a, *b_elm_b;
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
-	if ((*r_extent_hooks)->merge(*r_extent_hooks, extent_base_get(a),
-	    extent_size_get(a), extent_base_get(b), extent_size_get(b),
-	    extent_committed_get(a), arena->ind))
+	if (*r_extent_hooks == &extent_hooks_default) {
+		/* Call directly to propagate tsdn. */
+		err = extent_merge_default_impl(tsdn, extent_base_get(a),
+		    extent_base_get(b));
+	} else {
+		err = (*r_extent_hooks)->merge(*r_extent_hooks,
+		    extent_base_get(a), extent_size_get(a), extent_base_get(b),
+		    extent_size_get(b), extent_committed_get(a), arena->ind);
+	}
+
+	if (err)
 		return (true);
 
 	/*

From 09d7bdb314ec4d6a8618d6958809b540f8d3cff6 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 7 Jun 2016 14:00:58 -0700
Subject: [PATCH 0322/2608] Propagate tsdn to default chunk hooks.

This avoids bootstrapping issues for configurations that require
allocation during tsd initialization.

This resolves #390.
---
 src/chunk.c | 82 ++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 62 insertions(+), 20 deletions(-)

diff --git a/src/chunk.c b/src/chunk.c
index adc666ff..bff59088 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -421,15 +421,11 @@ chunk_arena_get(tsdn_t *tsdn, unsigned arena_ind)
 }
 
 static void *
-chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero,
-    bool *commit, unsigned arena_ind)
+chunk_alloc_default_impl(tsdn_t *tsdn, arena_t *arena, void *new_addr,
+    size_t size, size_t alignment, bool *zero, bool *commit)
 {
 	void *ret;
-	tsdn_t *tsdn;
-	arena_t *arena;
 
-	tsdn = tsdn_fetch();
-	arena = chunk_arena_get(tsdn, arena_ind);
 	ret = chunk_alloc_core(tsdn, arena, new_addr, size, alignment, zero,
 	    commit, arena->dss_prec);
 	if (ret == NULL)
@@ -440,6 +436,20 @@ chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero,
 	return (ret);
 }
 
+static void *
+chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero,
+    bool *commit, unsigned arena_ind)
+{
+	tsdn_t *tsdn;
+	arena_t *arena;
+
+	tsdn = tsdn_fetch();
+	arena = chunk_arena_get(tsdn, arena_ind);
+
+	return (chunk_alloc_default_impl(tsdn, arena, new_addr, size, alignment,
+	    zero, commit));
+}
+
 static void *
 chunk_alloc_retained(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit)
@@ -472,8 +482,15 @@ chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	ret = chunk_alloc_retained(tsdn, arena, chunk_hooks, new_addr, size,
 	    alignment, zero, commit);
 	if (ret == NULL) {
-		ret = chunk_hooks->alloc(new_addr, size, alignment, zero,
-		    commit, arena->ind);
+		if (chunk_hooks->alloc == chunk_alloc_default) {
+			/* Call directly to propagate tsdn. */
+			ret = chunk_alloc_default_impl(tsdn, arena, new_addr,
+			    size, alignment, zero, commit);
+		} else {
+			ret = chunk_hooks->alloc(new_addr, size, alignment,
+			    zero, commit, arena->ind);
+		}
+
 		if (ret == NULL)
 			return (NULL);
 	}
@@ -590,20 +607,31 @@ chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	arena_maybe_purge(tsdn, arena);
 }
 
+static bool
+chunk_dalloc_default_impl(tsdn_t *tsdn, void *chunk, size_t size)
+{
+
+	if (!have_dss || !chunk_in_dss(tsdn, chunk))
+		return (chunk_dalloc_mmap(chunk, size));
+	return (true);
+}
+
 static bool
 chunk_dalloc_default(void *chunk, size_t size, bool committed,
     unsigned arena_ind)
 {
+	tsdn_t *tsdn;
 
-	if (!have_dss || !chunk_in_dss(tsdn_fetch(), chunk))
-		return (chunk_dalloc_mmap(chunk, size));
-	return (true);
+	tsdn = tsdn_fetch();
+
+	return (chunk_dalloc_default_impl(tsdn, chunk, size));
 }
 
 void
 chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
     void *chunk, size_t size, bool zeroed, bool committed)
 {
+	bool err;
 
 	assert(chunk != NULL);
 	assert(CHUNK_ADDR2BASE(chunk) == chunk);
@@ -612,7 +640,13 @@ chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 
 	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
 	/* Try to deallocate. */
-	if (!chunk_hooks->dalloc(chunk, size, committed, arena->ind))
+	if (chunk_hooks->dalloc == chunk_dalloc_default) {
+		/* Call directly to propagate tsdn. */
+		err = chunk_dalloc_default_impl(tsdn, chunk, size);
+	} else
+		err = chunk_hooks->dalloc(chunk, size, committed, arena->ind);
+
+	if (!err)
 		return;
 	/* Try to decommit; purge if that fails. */
 	if (committed) {
@@ -681,26 +715,34 @@ chunk_split_default(void *chunk, size_t size, size_t size_a, size_t size_b,
 }
 
 static bool
-chunk_merge_default(void *chunk_a, size_t size_a, void *chunk_b, size_t size_b,
-    bool committed, unsigned arena_ind)
+chunk_merge_default_impl(tsdn_t *tsdn, void *chunk_a, void *chunk_b)
 {
 
 	if (!maps_coalesce)
 		return (true);
-	if (have_dss) {
-		tsdn_t *tsdn = tsdn_fetch();
-		if (chunk_in_dss(tsdn, chunk_a) != chunk_in_dss(tsdn, chunk_b))
-			return (true);
-	}
+	if (have_dss && chunk_in_dss(tsdn, chunk_a) != chunk_in_dss(tsdn,
+	    chunk_b))
+		return (true);
 
 	return (false);
 }
 
+static bool
+chunk_merge_default(void *chunk_a, size_t size_a, void *chunk_b, size_t size_b,
+    bool committed, unsigned arena_ind)
+{
+	tsdn_t *tsdn;
+
+	tsdn = tsdn_fetch();
+
+	return (chunk_merge_default_impl(tsdn, chunk_a, chunk_b));
+}
+
 static rtree_node_elm_t *
 chunks_rtree_node_alloc(size_t nelms)
 {
 
-	return ((rtree_node_elm_t *)base_alloc(tsdn_fetch(), nelms *
+	return ((rtree_node_elm_t *)base_alloc(TSDN_NULL, nelms *
 	    sizeof(rtree_node_elm_t)));
 }
 

From de23f6fce7fa49b3ac9c1a6e36131464ecce2f01 Mon Sep 17 00:00:00 2001
From: Elliot Ronaghan <ronawho@gmail.com>
Date: Fri, 27 May 2016 16:20:23 -0700
Subject: [PATCH 0323/2608] Fix mixed decl in nstime.c

Fix mixed decl in the gettimeofday() branch of nstime_update()
---
 src/nstime.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/nstime.c b/src/nstime.c
index 26e49dc5..aad2c260 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -128,9 +128,11 @@ nstime_update(nstime_t *time)
 		time->ns = ts.tv_sec * BILLION + ts.tv_nsec;
 	}
 #else
-	struct timeval tv;
-	gettimeofday(&tv, NULL);
-	time->ns = tv.tv_sec * BILLION + tv.tv_usec * 1000;
+	{
+		struct timeval tv;
+		gettimeofday(&tv, NULL);
+		time->ns = tv.tv_sec * BILLION + tv.tv_usec * 1000;
+	}
 #endif
 
 	/* Handle non-monotonic clocks. */

From 48384dc2d841bac9ab1e1ef24addbccabed43c20 Mon Sep 17 00:00:00 2001
From: Elliot Ronaghan <ronawho@gmail.com>
Date: Fri, 27 May 2016 16:20:23 -0700
Subject: [PATCH 0324/2608] Fix mixed decl in nstime.c

Fix mixed decl in the gettimeofday() branch of nstime_update()
---
 src/nstime.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/nstime.c b/src/nstime.c
index 26e49dc5..aad2c260 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -128,9 +128,11 @@ nstime_update(nstime_t *time)
 		time->ns = ts.tv_sec * BILLION + ts.tv_nsec;
 	}
 #else
-	struct timeval tv;
-	gettimeofday(&tv, NULL);
-	time->ns = tv.tv_sec * BILLION + tv.tv_usec * 1000;
+	{
+		struct timeval tv;
+		gettimeofday(&tv, NULL);
+		time->ns = tv.tv_sec * BILLION + tv.tv_usec * 1000;
+	}
 #endif
 
 	/* Handle non-monotonic clocks. */

From dd752c1ffd92b9d7573aa5ce49bcc5e74588c3dc Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 7 Jun 2016 14:15:49 -0700
Subject: [PATCH 0325/2608] Fix potential VM map fragmentation regression.

Revert 245ae6036c09cc11a72fab4335495d95cddd5beb (Support --with-lg-page
values larger than actual page size.), because it could cause VM map
fragmentation if the kernel grows mmap()ed memory downward.

This resolves #391.
---
 include/jemalloc/internal/jemalloc_internal.h.in | 2 +-
 src/extent_mmap.c                                | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 5b809bfa..8ba4a19a 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -813,7 +813,7 @@ sa2u(size_t size, size_t alignment)
 	 * Calculate the multi-page mapping that large_palloc() would need in
 	 * order to guarantee the alignment.
 	 */
-	if (usize + large_pad + PAGE_CEILING(alignment) < usize) {
+	if (usize + large_pad + PAGE_CEILING(alignment) - PAGE < usize) {
 		/* size_t overflow. */
 		return (0);
 	}
diff --git a/src/extent_mmap.c b/src/extent_mmap.c
index 0dd3247e..23dd4f88 100644
--- a/src/extent_mmap.c
+++ b/src/extent_mmap.c
@@ -9,7 +9,7 @@ extent_alloc_mmap_slow(size_t size, size_t alignment, bool *zero, bool *commit)
 	void *ret;
 	size_t alloc_size;
 
-	alloc_size = size + alignment;
+	alloc_size = size + alignment - PAGE;
 	/* Beware size_t wrap-around. */
 	if (alloc_size < size)
 		return (NULL);

From 05a9e4ac651eb0c728e83fd883425c4894a2ae2b Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 7 Jun 2016 14:19:50 -0700
Subject: [PATCH 0326/2608] Fix potential VM map fragmentation regression.

Revert 245ae6036c09cc11a72fab4335495d95cddd5beb (Support --with-lg-page
values larger than actual page size.), because it could cause VM map
fragmentation if the kernel grows mmap()ed memory downward.

This resolves #391.
---
 include/jemalloc/internal/jemalloc_internal.h.in | 4 ++--
 src/arena.c                                      | 2 +-
 src/chunk_mmap.c                                 | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 51bf8974..8f82edd4 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -754,7 +754,7 @@ sa2u(size_t size, size_t alignment)
 		 * Calculate the size of the over-size run that arena_palloc()
 		 * would need to allocate in order to guarantee the alignment.
 		 */
-		if (usize + large_pad + alignment <= arena_maxrun)
+		if (usize + large_pad + alignment - PAGE <= arena_maxrun)
 			return (usize);
 	}
 
@@ -784,7 +784,7 @@ sa2u(size_t size, size_t alignment)
 	 * Calculate the multi-chunk mapping that huge_palloc() would need in
 	 * order to guarantee the alignment.
 	 */
-	if (usize + alignment < usize) {
+	if (usize + alignment - PAGE < usize) {
 		/* size_t overflow. */
 		return (0);
 	}
diff --git a/src/arena.c b/src/arena.c
index c605bcd3..ce62590b 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2687,7 +2687,7 @@ arena_palloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 		return (NULL);
 
 	alignment = PAGE_CEILING(alignment);
-	alloc_size = usize + large_pad + alignment;
+	alloc_size = usize + large_pad + alignment - PAGE;
 
 	malloc_mutex_lock(tsdn, &arena->lock);
 	run = arena_run_alloc_large(tsdn, arena, alloc_size, false);
diff --git a/src/chunk_mmap.c b/src/chunk_mmap.c
index f95ae756..73fc497a 100644
--- a/src/chunk_mmap.c
+++ b/src/chunk_mmap.c
@@ -9,7 +9,7 @@ chunk_alloc_mmap_slow(size_t size, size_t alignment, bool *zero, bool *commit)
 	void *ret;
 	size_t alloc_size;
 
-	alloc_size = size + alignment;
+	alloc_size = size + alignment - PAGE;
 	/* Beware size_t wrap-around. */
 	if (alloc_size < size)
 		return (NULL);

From 9de0094e6e085f186afd608ad874402a589ed576 Mon Sep 17 00:00:00 2001
From: Elliot Ronaghan <ronawho@gmail.com>
Date: Tue, 7 Jun 2016 14:27:24 -0700
Subject: [PATCH 0327/2608] Fix a Valgrind regression in calloc().

This regression was caused by 3ef51d7f733ac6432e80fa902a779ab5b98d74f6
(Optimize the fast paths of calloc() and [m,d,sd]allocx().).
---
 src/jemalloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 40eb2eaa..4d7c481f 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1739,7 +1739,7 @@ je_calloc(size_t num, size_t size)
 		ret = ialloc_body(num_size, true, &tsdn, &usize, true);
 		ialloc_post_check(ret, tsdn, usize, "calloc", true, true);
 		UTRACE(0, num_size, ret);
-		JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsdn, ret, usize, false);
+		JEMALLOC_VALGRIND_MALLOC(ret != NULL, tsdn, ret, usize, true);
 	}
 
 	return (ret);

From c7d52980275b22f64c672ebaede28dc95d4f1820 Mon Sep 17 00:00:00 2001
From: Elliot Ronaghan <ronawho@gmail.com>
Date: Tue, 7 Jun 2016 14:30:39 -0700
Subject: [PATCH 0328/2608] Fix a Valgrind regression in chunk_alloc_wrapper().

This regression was caused by d412624b25eed2b5c52b7d94a71070d3aab03cb4
(Move retaining out of default chunk hooks).
---
 src/chunk.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/chunk.c b/src/chunk.c
index bff59088..f292c980 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -493,10 +493,12 @@ chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 
 		if (ret == NULL)
 			return (NULL);
+
+		if (config_valgrind && chunk_hooks->alloc !=
+		    chunk_alloc_default)
+			JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, chunksize);
 	}
 
-	if (config_valgrind && chunk_hooks->alloc != chunk_alloc_default)
-		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, chunksize);
 	return (ret);
 }
 

From a7fdcc8b09d197d64d1772d1e8025add44f48f0a Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 17 May 2016 17:12:13 -0700
Subject: [PATCH 0329/2608] Fix opt_zero-triggered in-place huge reallocation
 zeroing.

Fix huge_ralloc_no_move_expand() to update the extent's zeroed attribute
based on the intersection of the previous value and that of the newly
merged trailing extent.
---
 src/huge.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/huge.c b/src/huge.c
index 1aa02a0f..3a2877ca 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -262,19 +262,19 @@ huge_ralloc_no_move_expand(tsdn_t *tsdn, void *ptr, size_t oldsize,
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	/*
-	 * Copy zero into is_zeroed_chunk and pass the copy to chunk_alloc(), so
-	 * that it is possible to make correct junk/zero fill decisions below.
+	 * Use is_zeroed_chunk to detect whether the trailing memory is zeroed,
+	 * update extent's zeroed field, and zero as necessary.
 	 */
-	is_zeroed_chunk = zero;
-
+	is_zeroed_chunk = false;
 	if (arena_chunk_ralloc_huge_expand(tsdn, arena, ptr, oldsize, usize,
 	     &is_zeroed_chunk))
 		return (true);
 
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
-	/* Update the size of the huge allocation. */
 	huge_node_unset(ptr, node);
 	extent_node_size_set(node, usize);
+	extent_node_zeroed_set(node, extent_node_zeroed_get(node) &&
+	    is_zeroed_chunk);
 	huge_node_reset(tsdn, ptr, node);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 

From 20cd2de5ef622c3af8b3e4aba897aff7ddd451a7 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 1 Jun 2016 16:17:31 -0700
Subject: [PATCH 0330/2608] Add a missing prof_alloc_rollback() call.

In the case where prof_alloc_prep() is called with an over-estimate of
allocation size, and sampling doesn't end up being triggered, the tctx
must be discarded.
---
 include/jemalloc/internal/prof.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h
index 691e153d..21dff5fb 100644
--- a/include/jemalloc/internal/prof.h
+++ b/include/jemalloc/internal/prof.h
@@ -513,6 +513,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 			 * though its actual usize was insufficient to cross the
 			 * sample threshold.
 			 */
+			prof_alloc_rollback(tsd, tctx, true);
 			tctx = (prof_tctx_t *)(uintptr_t)1U;
 		}
 	}

From fa09fe798aa4f9f4f503059c9146227df2b8c18d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 1 Jun 2016 16:19:22 -0700
Subject: [PATCH 0331/2608] Fix rallocx() sampling code to not eagerly commit
 sampler update.

rallocx() for an alignment-constrained request may end up with a
smaller-than-worst-case size if in-place reallocation succeeds due to
serendipitous alignment.  In such cases, sampling may not happen.
---
 src/jemalloc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 4d7c481f..5d1f4937 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2222,7 +2222,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 
 	prof_active = prof_active_get_unlocked();
 	old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_ptr);
-	tctx = prof_alloc_prep(tsd, *usize, prof_active, true);
+	tctx = prof_alloc_prep(tsd, *usize, prof_active, false);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
 		p = irallocx_prof_sample(tsd, old_ptr, old_usize, *usize,
 		    alignment, zero, tcache, arena, tctx);
@@ -2231,7 +2231,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 		    tcache, arena);
 	}
 	if (unlikely(p == NULL)) {
-		prof_alloc_rollback(tsd, tctx, true);
+		prof_alloc_rollback(tsd, tctx, false);
 		return (NULL);
 	}
 
@@ -2246,7 +2246,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 		 */
 		*usize = isalloc(tsd_tsdn(tsd), p, config_prof);
 	}
-	prof_realloc(tsd, p, *usize, tctx, prof_active, true, old_ptr,
+	prof_realloc(tsd, p, *usize, tctx, prof_active, false, old_ptr,
 	    old_usize, old_tctx);
 
 	return (p);

From b9b35562893afd258d0dc94787acfe53153e5c65 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 7 Jun 2016 14:40:43 -0700
Subject: [PATCH 0332/2608] Update ChangeLog for 4.2.1.

---
 ChangeLog | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index c9ce7c4d..532255d1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,17 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
+* 4.2.1 (June 8, 2016)
+
+  Bug fixes:
+  - Fix bootstrapping issues for configurations that require allocation during
+    tsd initialization (e.g. --disable-tls).  (@cferris1000, @jasone)
+  - Fix gettimeofday() version of nstime_update().  (@ronawho)
+  - Fix Valgrind regressions in calloc() and chunk_alloc_wrapper().  (@ronawho)
+  - Fix potential VM map fragmentation regression.  (@jasone)
+  - Fix opt_zero-triggered in-place huge reallocation zeroing.  (@jasone)
+  - Fix heap profiling context leaks in reallocation edge cases.  (@jasone)
+
 * 4.2.0 (May 12, 2016)
 
   New features:

From 5271b673b2e93dd655fde6f2ee1ea849da5cb3d2 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 7 Jun 2016 14:40:43 -0700
Subject: [PATCH 0333/2608] Update ChangeLog for 4.2.1.

---
 ChangeLog | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index 926209e5..ed62e0e7 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,17 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
+* 4.2.1 (June 8, 2016)
+
+  Bug fixes:
+  - Fix bootstrapping issues for configurations that require allocation during
+    tsd initialization (e.g. --disable-tls).  (@cferris1000, @jasone)
+  - Fix gettimeofday() version of nstime_update().  (@ronawho)
+  - Fix Valgrind regressions in calloc() and chunk_alloc_wrapper().  (@ronawho)
+  - Fix potential VM map fragmentation regression.  (@jasone)
+  - Fix opt_zero-triggered in-place huge reallocation zeroing.  (@jasone)
+  - Fix heap profiling context leaks in reallocation edge cases.  (@jasone)
+
 * 4.2.0 (May 12, 2016)
 
   New features:

From fbd7956d45eda7e2717e263b983b6fd9d51b83a6 Mon Sep 17 00:00:00 2001
From: Elliot Ronaghan <ronawho@gmail.com>
Date: Wed, 8 Jun 2016 14:20:32 -0700
Subject: [PATCH 0334/2608] Work around a weird pgi bug in test/unit/math.c

pgi fails to compile math.c, reporting that `-INFINITY` in `pt_norm_expected[]`
is a "Non-constant" expression. A simplified version of this failure is:

```c
#include <math.h>

static double inf1, inf2 = INFINITY;  // no complaints
static double inf3 = INFINITY;        // suddenly INFINITY is "Non-constant"

int main() { }
```

```sh
PGC-S-0074-Non-constant expression in initializer (t.c: 4)
```

pgi errors on the declaration of inf3, and will compile fine if that line is
removed. I've reported this bug to pgi, but in the meantime I just switched to
using (DBL_MAX + DBL_MAX) to work around this bug.
---
 test/unit/math.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/test/unit/math.c b/test/unit/math.c
index ebec77a6..adb72bed 100644
--- a/test/unit/math.c
+++ b/test/unit/math.c
@@ -5,6 +5,10 @@
 
 #include <float.h>
 
+#ifdef __PGI
+#undef INFINITY
+#endif
+
 #ifndef INFINITY
 #define	INFINITY (DBL_MAX + DBL_MAX)
 #endif

From 8a1a794b0c79268ed9570719579b18b55e0bfd2d Mon Sep 17 00:00:00 2001
From: Elliot Ronaghan <ronawho@gmail.com>
Date: Wed, 8 Jun 2016 14:48:55 -0700
Subject: [PATCH 0335/2608] Don't use compact red-black trees with the pgi
 compiler

Some bug (either in the red-black tree code, or in the pgi compiler) seems to
cause red-black trees to become unbalanced. This issue seems to go away if we
don't use compact red-black trees. Since red-black trees don't seem to be used
much anymore, I opted for what seems to be an easy fix here instead of digging
in and trying to find the root cause of the bug.

Some context in case it's helpful:

I experienced a ton of segfaults while using pgi as Chapel's target compiler
with jemalloc 4.0.4. The little bit of debugging I did pointed me somewhere
deep in red-black tree manipulation, but I didn't get a chance to investigate
further. It looks like 4.2.0 replaced most uses of red-black trees with
pairing-heaps, which seems to avoid whatever bug I was hitting.

However, `make check_unit` was still failing on the rb test, so I figured the
core issue was just being masked. Here's the `make check_unit` failure:

```sh
=== test/unit/rb ===
test_rb_empty: pass
tree_recurse:test/unit/rb.c:90: Failed assertion: (((_Bool) (((uintptr_t) (left_node)->link.rbn_right_red) & ((size_t)1)))) == (false) --> true != false: Node should be black
test_rb_random:test/unit/rb.c:274: Failed assertion: (imbalances) == (0) --> 1 != 0: Tree is unbalanced
tree_recurse:test/unit/rb.c:90: Failed assertion: (((_Bool) (((uintptr_t) (left_node)->link.rbn_right_red) & ((size_t)1)))) == (false) --> true != false: Node should be black
test_rb_random:test/unit/rb.c:274: Failed assertion: (imbalances) == (0) --> 1 != 0: Tree is unbalanced
node_remove:test/unit/rb.c:190: Failed assertion: (imbalances) == (0) --> 2 != 0: Tree is unbalanced
<jemalloc>: test/unit/rb.c:43: Failed assertion: "pathp[-1].cmp < 0"
test/test.sh: line 22: 12926 Aborted
Test harness error
```

While starting to debug I saw the RB_COMPACT option and decided to check if
turning that off resolved the bug. It seems to have fixed it (`make check_unit`
passes and the segfaults under Chapel are gone) so it seems like on okay
work-around. I'd imagine this has performance implications for red-black trees
under pgi, but if they're not going to be used much anymore it's probably not a
big deal.
---
 include/jemalloc/internal/jemalloc_internal.h.in | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 8ba4a19a..c35280fa 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -155,7 +155,9 @@ static const bool config_cache_oblivious =
 #endif
 
 #include "jemalloc/internal/ph.h"
+#ifndef __PGI
 #define	RB_COMPACT
+#endif
 #include "jemalloc/internal/rb.h"
 #include "jemalloc/internal/qr.h"
 #include "jemalloc/internal/ql.h"

From 91278fbddfb5a0adf265b1b9907a1509b4e1e18c Mon Sep 17 00:00:00 2001
From: Mike Hommey <mh@glandium.org>
Date: Thu, 9 Jun 2016 17:10:16 +0900
Subject: [PATCH 0336/2608] Add an AppVeyor config

This builds jemalloc and runs all checks with:
- MSVC 2015 64-bits
- MSVC 2015 32-bits
- MINGW64 (from msys2)
- MINGW32 (from msys2)

Normally, AppVeyor configs are named appveyor.yml, but it is possible to
configure the .yml file name in the AppVeyor project settings such that
the file stays "hidden", like typical travis configs.
---
 .appveyor.yml | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 .appveyor.yml

diff --git a/.appveyor.yml b/.appveyor.yml
new file mode 100644
index 00000000..ddd5c571
--- /dev/null
+++ b/.appveyor.yml
@@ -0,0 +1,28 @@
+version: '{build}'
+
+environment:
+  matrix:
+  - MSYSTEM: MINGW64
+    CPU: x86_64
+    MSVC: amd64
+  - MSYSTEM: MINGW32
+    CPU: i686
+    MSVC: x86
+  - MSYSTEM: MINGW64
+    CPU: x86_64
+  - MSYSTEM: MINGW32
+    CPU: i686
+
+install:
+  - set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH%
+  - if defined MSVC call "c:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" %MSVC%
+  - if defined MSVC pacman --noconfirm -Rsc mingw-w64-%CPU%-gcc gcc
+  - pacman --noconfirm -Suy mingw-w64-%CPU%-make
+
+build_script:
+  - bash -c "autoconf"
+  - bash -c "./configure"
+  - mingw32-make -j3
+  - file lib/jemalloc.dll
+  - mingw32-make -j3 tests
+  - mingw32-make -k check

From 0dad5b77196903ba2d6bc64a0d9fdda8e2e9ad78 Mon Sep 17 00:00:00 2001
From: Mike Hommey <mh@glandium.org>
Date: Thu, 9 Jun 2016 22:00:18 +0900
Subject: [PATCH 0337/2608] Fix extent_*_get to build with MSVC

---
 include/jemalloc/internal/extent.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index d7944c1c..dbdc8051 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -224,22 +224,22 @@ JEMALLOC_INLINE void *
 extent_before_get(const extent_t *extent)
 {
 
-	return ((void *)(uintptr_t)extent->e_addr - PAGE);
+	return ((void *)((uintptr_t)extent->e_addr - PAGE));
 }
 
 JEMALLOC_INLINE void *
 extent_last_get(const extent_t *extent)
 {
 
-	return ((void *)(uintptr_t)extent->e_addr + extent_size_get(extent) -
-	    PAGE);
+	return ((void *)((uintptr_t)extent->e_addr + extent_size_get(extent) -
+	    PAGE));
 }
 
 JEMALLOC_INLINE void *
 extent_past_get(const extent_t *extent)
 {
 
-	return ((void *)(uintptr_t)extent->e_addr + extent_size_get(extent));
+	return ((void *)((uintptr_t)extent->e_addr + extent_size_get(extent)));
 }
 
 JEMALLOC_INLINE bool

From c2942e2c0e097e7c75a3addd0b9c87758f91692e Mon Sep 17 00:00:00 2001
From: Mike Hommey <mh@glandium.org>
Date: Thu, 9 Jun 2016 23:17:39 +0900
Subject: [PATCH 0338/2608] Define 64-bits atomics unconditionally

They are used on all platforms in prng.h.
---
 include/jemalloc/internal/atomic.h | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h
index 3f15ea14..3936f68b 100644
--- a/include/jemalloc/internal/atomic.h
+++ b/include/jemalloc/internal/atomic.h
@@ -66,8 +66,7 @@ void	atomic_write_u(unsigned *p, unsigned x);
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_))
 /******************************************************************************/
 /* 64-bit operations. */
-#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
-#  if (defined(__amd64__) || defined(__x86_64__))
+#if (defined(__amd64__) || defined(__x86_64__))
 JEMALLOC_INLINE uint64_t
 atomic_add_uint64(uint64_t *p, uint64_t x)
 {
@@ -125,7 +124,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
 	    : "memory" /* Clobbers. */
 	    );
 }
-#  elif (defined(JEMALLOC_C11ATOMICS))
+#elif (defined(JEMALLOC_C11ATOMICS))
 JEMALLOC_INLINE uint64_t
 atomic_add_uint64(uint64_t *p, uint64_t x)
 {
@@ -153,7 +152,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
 	volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
 	atomic_store(a, x);
 }
-#  elif (defined(JEMALLOC_ATOMIC9))
+#elif (defined(JEMALLOC_ATOMIC9))
 JEMALLOC_INLINE uint64_t
 atomic_add_uint64(uint64_t *p, uint64_t x)
 {
@@ -193,7 +192,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
 
 	atomic_store_rel_long(p, x);
 }
-#  elif (defined(JEMALLOC_OSATOMIC))
+#elif (defined(JEMALLOC_OSATOMIC))
 JEMALLOC_INLINE uint64_t
 atomic_add_uint64(uint64_t *p, uint64_t x)
 {
@@ -225,7 +224,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
 		o = atomic_read_uint64(p);
 	} while (atomic_cas_uint64(p, o, x));
 }
-#  elif (defined(_MSC_VER))
+#elif (defined(_MSC_VER))
 JEMALLOC_INLINE uint64_t
 atomic_add_uint64(uint64_t *p, uint64_t x)
 {
@@ -255,7 +254,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
 
 	InterlockedExchange64(p, x);
 }
-#  elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || \
+#elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || \
     defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8))
 JEMALLOC_INLINE uint64_t
 atomic_add_uint64(uint64_t *p, uint64_t x)
@@ -284,9 +283,8 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
 
 	__sync_lock_test_and_set(p, x);
 }
-#  else
-#    error "Missing implementation for 64-bit atomic operations"
-#  endif
+#else
+#  error "Missing implementation for 64-bit atomic operations"
 #endif
 
 /******************************************************************************/

From 2ea7742e6ffa7fb20538c0e2dba6ceec80cbe8d9 Mon Sep 17 00:00:00 2001
From: Mike Hommey <mh@glandium.org>
Date: Fri, 10 Jun 2016 00:17:19 +0900
Subject: [PATCH 0339/2608] Add Travis-CI configuration

---
 .travis.yml | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 .travis.yml

diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 00000000..1fed4f8e
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,29 @@
+language: c
+
+matrix:
+  include:
+    - os: linux
+      compiler: gcc
+    - os: linux
+      compiler: gcc
+      env:
+        - EXTRA_FLAGS=-m32
+      addons:
+        apt:
+          packages:
+          - gcc-multilib
+    - os: osx
+      compiler: clang
+    - os: osx
+      compiler: clang
+      env:
+        - EXTRA_FLAGS=-m32
+
+before_script:
+  - autoconf
+  - ./configure${EXTRA_FLAGS:+ CC="$CC $EXTRA_FLAGS"}
+  - make -j3
+  - make -j3 tests
+
+script:
+  - make check

From ccd64160736c6e94f84a8bf045ecbbc6a4228604 Mon Sep 17 00:00:00 2001
From: Elliot Ronaghan <ronawho@gmail.com>
Date: Tue, 14 Jun 2016 13:18:08 -0700
Subject: [PATCH 0340/2608] Add -dynamic for integration and stress tests with
 Cray compiler wrappers

Cray systems come with compiler wrappers to simplify building parallel
applications. CC is the C++ wrapper, and cc is the C wrapper.

The wrappers call the base {Cray, Intel, PGI, or GNU} compiler with vendor
specific flags. The "Programming Environment" (prgenv) that's currently loaded
determines the base compiler. e.g. compiling with gnu looks something like:

    module load PrgEnv-gnu
    cc hello.c

On most systems the wrappers defaults to `-static` mode, which causes them to
only look for static libraries, and not for any dynamic ones (even if the
dynamic version was explicitly listed.)

The integration and stress tests expect to be using the .so, so we have to run
the with -dynamic so that wrapper will find/use the .so.
---
 Makefile.in  |  5 +++--
 configure.ac | 18 ++++++++++++++++++
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index a24fde95..4347706c 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -56,6 +56,7 @@ SOREV = @SOREV@
 PIC_CFLAGS = @PIC_CFLAGS@
 CTARGET = @CTARGET@
 LDTARGET = @LDTARGET@
+TEST_LD_MODE = @TEST_LD_MODE@
 MKLIB = @MKLIB@
 AR = @AR@
 ARFLAGS = @ARFLAGS@
@@ -293,11 +294,11 @@ $(objroot)test/unit/%$(EXE): $(objroot)test/unit/%.$(O) $(TESTS_UNIT_LINK_OBJS)
 
 $(objroot)test/integration/%$(EXE): $(objroot)test/integration/%.$(O) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
-	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(filter -lpthread,$(LIBS))) -lm $(EXTRA_LDFLAGS)
+	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(filter -lpthread,$(LIBS))) -lm $(EXTRA_LDFLAGS)
 
 $(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_STRESS_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
-	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS)
+	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS)
 
 build_lib_shared: $(DSOS)
 build_lib_static: $(STATIC_LIBS)
diff --git a/configure.ac b/configure.ac
index 538e53f4..ede0993e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -131,6 +131,18 @@ if test "x$GCC" != "xyes" ; then
                                [je_cv_msvc=no])])
 fi
 
+dnl check if a cray prgenv wrapper compiler is being used
+je_cv_cray_prgenv_wrapper=""
+if test "x${PE_ENV}" != "x" ; then
+  case "${CC}" in
+    CC|cc)
+	je_cv_cray_prgenv_wrapper="yes"
+	;;
+    *)
+       ;;
+  esac
+fi
+
 if test "x$CFLAGS" = "x" ; then
   no_CFLAGS="yes"
   if test "x$GCC" = "xyes" ; then
@@ -269,11 +281,16 @@ SOREV="${so}.${rev}"
 PIC_CFLAGS='-fPIC -DPIC'
 CTARGET='-o $@'
 LDTARGET='-o $@'
+TEST_LD_MODE=
 EXTRA_LDFLAGS=
 ARFLAGS='crus'
 AROUT=' $@'
 CC_MM=1
 
+if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then
+  TEST_LD_MODE='-dynamic'
+fi
+
 AN_MAKEVAR([AR], [AC_PROG_AR])
 AN_PROGRAM([ar], [AC_PROG_AR])
 AC_DEFUN([AC_PROG_AR], [AC_CHECK_TOOL(AR, ar, :)])
@@ -432,6 +449,7 @@ AC_SUBST([SOREV])
 AC_SUBST([PIC_CFLAGS])
 AC_SUBST([CTARGET])
 AC_SUBST([LDTARGET])
+AC_SUBST([TEST_LD_MODE])
 AC_SUBST([MKLIB])
 AC_SUBST([ARFLAGS])
 AC_SUBST([AROUT])

From ae3314785bf9726e5a97e5c98f70dcb12e6a7a90 Mon Sep 17 00:00:00 2001
From: Elliot Ronaghan <ronawho@gmail.com>
Date: Tue, 14 Jun 2016 14:20:28 -0700
Subject: [PATCH 0341/2608] Fix librt detection when using a Cray compiler
 wrapper

The Cray compiler wrappers will often add `-lrt` to the base compiler with
`-static` linking (the default at most sites.) However, `-lrt` isn't
automatically added with `-dynamic`. This means that if jemalloc was built with
`-static`, but then used in a program with `-dynamic` jemalloc won't have
detected that librt is a dependency.

The integration and stress tests use -dynamic, which is causing undefined
references to clock_gettime().

This just adds an extra check for librt (ignoring the autoconf cache) with
`-dynamic` thrown. It also stops filtering librt from the integration tests.

With this `make check` passes for:
 - PrgEnv-gnu
 - PrgEnv-intel
 - PrgEnv-pgi

PrgEnv-cray still needs more work (will be in a separate patch.)
---
 Makefile.in  |  2 +-
 configure.ac | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/Makefile.in b/Makefile.in
index 4347706c..b78e1500 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -294,7 +294,7 @@ $(objroot)test/unit/%$(EXE): $(objroot)test/unit/%.$(O) $(TESTS_UNIT_LINK_OBJS)
 
 $(objroot)test/integration/%$(EXE): $(objroot)test/integration/%.$(O) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
-	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(filter -lpthread,$(LIBS))) -lm $(EXTRA_LDFLAGS)
+	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(filter -lrt -lpthread,$(LIBS))) -lm $(EXTRA_LDFLAGS)
 
 $(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_STRESS_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
diff --git a/configure.ac b/configure.ac
index ede0993e..e5164ba8 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1232,6 +1232,20 @@ CPPFLAGS="$CPPFLAGS -D_REENTRANT"
 dnl Check whether clock_gettime(2) is in libc or librt.
 AC_SEARCH_LIBS([clock_gettime], [rt])
 
+dnl Cray wrapper compiler often adds `-lrt` when using `-static`. Check with
+dnl `-dynamic` as well in case a user tries to dynamically link in jemalloc
+if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then
+  if test "$ac_cv_search_clock_gettime" != "-lrt"; then
+    SAVED_CFLAGS="${CFLAGS}"
+
+    unset ac_cv_search_clock_gettime
+    JE_CFLAGS_APPEND([-dynamic])
+    AC_SEARCH_LIBS([clock_gettime], [rt])
+
+    CFLAGS="${SAVED_CFLAGS}"
+  fi
+fi
+
 dnl Check if the GNU-specific secure_getenv function exists.
 AC_CHECK_FUNC([secure_getenv],
               [have_secure_getenv="1"],

From 1167e9eff342d3c0f39bb7e8aabc40a34ac0b2fe Mon Sep 17 00:00:00 2001
From: Elliot Ronaghan <ronawho@gmail.com>
Date: Fri, 17 Jun 2016 13:28:39 -0700
Subject: [PATCH 0342/2608] Check for __builtin_unreachable at configure time

Add a configure check for __builtin_unreachable instead of basing its
availability on the __GNUC__ version. On OS X using gcc (a real gcc, not the
bundled version that's just a gcc front-end) leads to a linker assertion:

    https://github.com/jemalloc/jemalloc/issues/266

It turns out that this is caused by a gcc bug resulting from the use of
__builtin_unreachable():

    https://gcc.gnu.org/bugzilla/show_bug.cgi?id=57438

To work around this bug, check that __builtin_unreachable() actually works at
configure time, and if it doesn't use abort() instead. The check is based on
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=57438#c21.

With this `make check` passes with a homebrew installed gcc-5 and gcc-6.
---
 configure.ac                                  | 17 ++++++++++++++
 .../internal/jemalloc_internal_defs.h.in      |  6 +++++
 include/jemalloc/internal/util.h              | 22 +++++--------------
 3 files changed, 29 insertions(+), 16 deletions(-)

diff --git a/configure.ac b/configure.ac
index e5164ba8..ad7ace52 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1038,6 +1038,23 @@ if test "x$enable_cache_oblivious" = "x1" ; then
 fi
 AC_SUBST([enable_cache_oblivious])
 
+
+
+JE_COMPILABLE([a program using __builtin_unreachable], [
+void foo (void) {
+  __builtin_unreachable();
+}
+], [
+	{
+		foo();
+	}
+], [je_cv_gcc_builtin_unreachable])
+if test "x${je_cv_gcc_builtin_ffsl}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_INTERNAL_UNREACHABLE], [__builtin_unreachable])
+else
+  AC_DEFINE([JEMALLOC_INTERNAL_UNREACHABLE], [abort])
+fi
+
 dnl ============================================================================
 dnl Check for  __builtin_ffsl(), then ffsl(3), and fail if neither are found.
 dnl One of those two functions should (theoretically) exist on all platforms
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 49e2cf06..cebd6a53 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -185,6 +185,12 @@
 /* TLS is used to map arenas and magazine caches to threads. */
 #undef JEMALLOC_TLS
 
+/*
+ * Used to mark unreachable code to quiet "end of non-void" compiler warnings.
+ * Don't use this directly; instead use unreachable() from util.h
+ */
+#undef JEMALLOC_INTERNAL_UNREACHABLE
+
 /*
  * ffs*() functions to use for bitmapping.  Don't use these directly; instead,
  * use ffs_*() from util.h.
diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index a0c2203d..aee00d6d 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -61,30 +61,20 @@
 #	define JEMALLOC_CC_SILENCE_INIT(v)
 #endif
 
-#define	JEMALLOC_GNUC_PREREQ(major, minor)				\
-    (!defined(__clang__) &&						\
-    (__GNUC__ > (major) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor))))
-#ifndef __has_builtin
-#  define __has_builtin(builtin) (0)
-#endif
-#define	JEMALLOC_CLANG_HAS_BUILTIN(builtin)				\
-    (defined(__clang__) && __has_builtin(builtin))
-
 #ifdef __GNUC__
 #	define likely(x)   __builtin_expect(!!(x), 1)
 #	define unlikely(x) __builtin_expect(!!(x), 0)
-#  if JEMALLOC_GNUC_PREREQ(4, 6) ||					\
-      JEMALLOC_CLANG_HAS_BUILTIN(__builtin_unreachable)
-#	define unreachable() __builtin_unreachable()
-#  else
-#	define unreachable() abort()
-#  endif
 #else
 #	define likely(x)   !!(x)
 #	define unlikely(x) !!(x)
-#	define unreachable() abort()
 #endif
 
+#if !defined(JEMALLOC_INTERNAL_UNREACHABLE)
+#  error JEMALLOC_INTERNAL_UNREACHABLE should have been defined by configure
+#endif
+
+#define unreachable() JEMALLOC_INTERNAL_UNREACHABLE()
+
 #include "jemalloc/internal/assert.h"
 
 /* Use to assert a particular configuration, e.g., cassert(config_debug). */

From e37720cb4a4661fd3b8ece01b1b1d83020ced99f Mon Sep 17 00:00:00 2001
From: rustyx <me@rustyx.org>
Date: Wed, 22 Jun 2016 22:28:20 +0200
Subject: [PATCH 0343/2608] Fix MSVC project

---
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj        |  5 ++++-
 .../projects/vc2015/jemalloc/jemalloc.vcxproj.filters | 11 ++++++++++-
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 59f52f96..da75a968 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -68,11 +68,13 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\rb.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\rtree.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\size_classes.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\smoothstep.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\stats.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\tcache.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\ticker.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\tsd.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\util.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\witness.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_defs.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_macros.h" />
@@ -112,6 +114,7 @@
     <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
     <ClCompile Include="..\..\..\..\src\util.c" />
+    <ClCompile Include="..\..\..\..\src\witness.c" />
   </ItemGroup>
   <PropertyGroup Label="Globals">
     <ProjectGuid>{8D6BB292-9E1C-413D-9F98-4864BDC1514A}</ProjectGuid>
@@ -392,4 +395,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 159b2e72..57395e70 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -170,6 +170,12 @@
     <ClInclude Include="..\..\..\..\include\msvc_compat\C99\stdint.h">
       <Filter>Header Files\msvc_compat\C99</Filter>
     </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\witness.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\smoothstep.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="..\..\..\..\src\arena.c">
@@ -247,5 +253,8 @@
     <ClCompile Include="..\..\..\..\src\util.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\witness.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
-</Project>
+</Project>
\ No newline at end of file

From aec07531bcfbdceaad020aee0048581d72a8c26f Mon Sep 17 00:00:00 2001
From: Elliot Ronaghan <ronawho@gmail.com>
Date: Tue, 14 Jun 2016 15:26:07 -0700
Subject: [PATCH 0344/2608] Add initial support for building with the cray
 compiler

Get jemalloc building and passing `make check_unit` with cray 8.4. An inlining
bug in 8.4 results in internal errors while trying to build jemalloc. This has
already been reported and fixed for the 8.5 release.

In order to work around the inlining bug, disable gnu compatibility and limit
ipa optimizations.

I copied the msvc compiler check for cray, but note that we perform the test
even if we think we're using gcc because cray pretends to be gcc if `-hgnu`
(which is enabled by default) is used. I couldn't come up with a principled way
to check for the inlining bug, so instead I just checked compiler versions.

The build had lots of warnings I need to address and cray doesn't support -MM
or -MT for dependency tracking, so I had to do `make CC_MM=`.
---
 configure.ac | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/configure.ac b/configure.ac
index ad7ace52..1f0cc1f3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -118,6 +118,7 @@ dnl If CFLAGS isn't defined, set CFLAGS to something reasonable.  Otherwise,
 dnl just prevent autoconf from molesting CFLAGS.
 CFLAGS=$CFLAGS
 AC_PROG_CC
+
 if test "x$GCC" != "xyes" ; then
   AC_CACHE_CHECK([whether compiler is MSVC],
                  [je_cv_msvc],
@@ -143,6 +144,30 @@ if test "x${PE_ENV}" != "x" ; then
   esac
 fi
 
+AC_CACHE_CHECK([whether compiler is cray],
+              [je_cv_cray],
+              [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],
+                                                  [
+#ifndef _CRAYC
+  int fail[-1];
+#endif
+])],
+                            [je_cv_cray=yes],
+                            [je_cv_cray=no])])
+
+if test "x${je_cv_cray}" = "xyes" ; then
+  AC_CACHE_CHECK([whether cray compiler version is 8.4],
+                [je_cv_cray_84],
+                [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],
+                                                      [
+#if !(_RELEASE_MAJOR == 8 && _RELEASE_MINOR == 4)
+  int fail[-1];
+#endif
+])],
+                              [je_cv_cray_84=yes],
+                              [je_cv_cray_84=no])])
+fi
+
 if test "x$CFLAGS" = "x" ; then
   no_CFLAGS="yes"
   if test "x$GCC" = "xyes" ; then
@@ -164,6 +189,13 @@ if test "x$CFLAGS" = "x" ; then
     JE_CFLAGS_APPEND([-FS])
     CPPFLAGS="$CPPFLAGS -I${srcdir}/include/msvc_compat"
   fi
+  if test "x$je_cv_cray" = "xyes" ; then
+    dnl cray compiler 8.4 has an inlining bug
+    if test "x$je_cv_cray_84" = "xyes" ; then
+      JE_CFLAGS_APPEND([-hipa2])
+      JE_CFLAGS_APPEND([-hnognu])
+    fi
+  fi
 fi
 dnl Append EXTRA_CFLAGS to CFLAGS, if defined.
 if test "x$EXTRA_CFLAGS" != "x" ; then

From 3ef67930e025cbc68735d0ebd2de7690c816658c Mon Sep 17 00:00:00 2001
From: Elliot Ronaghan <ronawho@gmail.com>
Date: Tue, 14 Jun 2016 15:26:29 -0700
Subject: [PATCH 0345/2608] Disable automatic dependency generation for the
 Cray compiler

Cray only supports `-M` for generating dependency files. It does not support
`-MM` or `-MT`, so don't try to use them. I just reused the existing mechanism
for turning auto-dependency generation off (`CC_MM=`), but it might be more
principled to add a configure test to check if the compiler supports `-MM` and
`-MT`, instead of manually tracking which compilers don't support those flags.
---
 configure.ac | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/configure.ac b/configure.ac
index 1f0cc1f3..db616e0e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -323,6 +323,10 @@ if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then
   TEST_LD_MODE='-dynamic'
 fi
 
+if test "x${je_cv_cray}" = "xyes" ; then
+  CC_MM=
+fi
+
 AN_MAKEVAR([AR], [AC_PROG_AR])
 AN_PROGRAM([ar], [AC_PROG_AR])
 AC_DEFUN([AC_PROG_AR], [AC_CHECK_TOOL(AR, ar, :)])

From 3dee73faf2c9c1008a4e3281cd7dd0123d16a8d3 Mon Sep 17 00:00:00 2001
From: Elliot Ronaghan <ronawho@gmail.com>
Date: Tue, 14 Jun 2016 15:26:53 -0700
Subject: [PATCH 0346/2608] Add Cray compiler's equivalent of -Werror before
 __attribute__ checks

Cray uses -herror_on_warning instead of -Werror. Use it everywhere -Werror is
currently used for __attribute__ checks so configure actually detects they're
not supported.
---
 configure.ac | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/configure.ac b/configure.ac
index db616e0e..13ce6860 100644
--- a/configure.ac
+++ b/configure.ac
@@ -504,6 +504,7 @@ fi
 dnl Check for tls_model attribute support (clang 3.0 still lacks support).
 SAVED_CFLAGS="${CFLAGS}"
 JE_CFLAGS_APPEND([-Werror])
+JE_CFLAGS_APPEND([-herror_on_warning])
 JE_COMPILABLE([tls_model attribute], [],
               [static __thread int
                __attribute__((tls_model("initial-exec"), unused)) foo;
@@ -519,6 +520,7 @@ fi
 dnl Check for alloc_size attribute support.
 SAVED_CFLAGS="${CFLAGS}"
 JE_CFLAGS_APPEND([-Werror])
+JE_CFLAGS_APPEND([-herror_on_warning])
 JE_COMPILABLE([alloc_size attribute], [#include <stdlib.h>],
               [void *foo(size_t size) __attribute__((alloc_size(1)));],
               [je_cv_alloc_size])
@@ -529,6 +531,7 @@ fi
 dnl Check for format(gnu_printf, ...) attribute support.
 SAVED_CFLAGS="${CFLAGS}"
 JE_CFLAGS_APPEND([-Werror])
+JE_CFLAGS_APPEND([-herror_on_warning])
 JE_COMPILABLE([format(gnu_printf, ...) attribute], [#include <stdlib.h>],
               [void *foo(const char *format, ...) __attribute__((format(gnu_printf, 1, 2)));],
               [je_cv_format_gnu_printf])
@@ -539,6 +542,7 @@ fi
 dnl Check for format(printf, ...) attribute support.
 SAVED_CFLAGS="${CFLAGS}"
 JE_CFLAGS_APPEND([-Werror])
+JE_CFLAGS_APPEND([-herror_on_warning])
 JE_COMPILABLE([format(printf, ...) attribute], [#include <stdlib.h>],
               [void *foo(const char *format, ...) __attribute__((format(printf, 1, 2)));],
               [je_cv_format_printf])

From 47b34dd39850b8b157c67887d9b6bf7bd3095796 Mon Sep 17 00:00:00 2001
From: Elliot Ronaghan <ronawho@gmail.com>
Date: Thu, 7 Jul 2016 15:06:01 -0700
Subject: [PATCH 0347/2608] Disable irrelevant Cray compiler warnings if
 cc-silence is enabled

Cray is pretty warning-happy, so disable ones that aren't helpful. Each warning
has a numeric value instead of having named flags to disable specific warnings.
Disable warnings 128 and 1357.

128:  Ignore unreachable code warning. Cray warns about `not_reached()` not
      being reachable in a couple of places because it detects that some loops
      will never terminate.

1357: Ignore warning about redefinition of malloc and friends

With this patch, Cray 8.4.0 and 8.5.1 build cleanly and pass `make check`
---
 configure.ac | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/configure.ac b/configure.ac
index 13ce6860..e1639d51 100644
--- a/configure.ac
+++ b/configure.ac
@@ -195,6 +195,12 @@ if test "x$CFLAGS" = "x" ; then
       JE_CFLAGS_APPEND([-hipa2])
       JE_CFLAGS_APPEND([-hnognu])
     fi
+    if test "x$enable_cc_silence" != "xno" ; then
+      dnl ignore unreachable code warning
+      JE_CFLAGS_APPEND([-hnomessage=128])
+      dnl ignore redefinition of "malloc", "free", etc warning
+      JE_CFLAGS_APPEND([-hnomessage=1357])
+    fi
   fi
 fi
 dnl Append EXTRA_CFLAGS to CFLAGS, if defined.

From 4abaee5d13a54c677cd132c481dbf7621f785fec Mon Sep 17 00:00:00 2001
From: Mike Hommey <mh@glandium.org>
Date: Fri, 8 Jul 2016 13:28:16 +0900
Subject: [PATCH 0348/2608] Avoid getting the same default zone twice in a row.

847ff22 added a call to malloc_default_zone() before the main loop in
register_zone, effectively making malloc_default_zone() called twice
without any different outcome expected in the returned result.

It is also called once at the beginning, and a second time at the end
of the loop block.

Instead, call it only once per iteration.
---
 src/zone.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/zone.c b/src/zone.c
index ca235da4..9432f45a 100644
--- a/src/zone.c
+++ b/src/zone.c
@@ -246,7 +246,6 @@ register_zone(void)
 	malloc_zone_register(&zone);
 
 	do {
-		default_zone = malloc_default_zone();
 		/*
 		 * Unregister and reregister the default zone.  On OSX >= 10.6,
 		 * unregistering takes the last registered zone and places it
@@ -272,5 +271,7 @@ register_zone(void)
 			malloc_zone_unregister(purgeable_zone);
 			malloc_zone_register(purgeable_zone);
 		}
-	} while (malloc_default_zone() != &zone);
+
+		default_zone = malloc_default_zone();
+	} while (default_zone != &zone);
 }

From 19c9a3e828ed46f1576521c264640e60bd0cb01f Mon Sep 17 00:00:00 2001
From: Mike Hommey <mh@glandium.org>
Date: Fri, 8 Jul 2016 13:35:35 +0900
Subject: [PATCH 0349/2608] Change how the default zone is found

On OSX 10.12, malloc_default_zone returns a special zone that is not
present in the list of registered zones. That zone uses a "lite zone"
if one is present (apparently enabled when malloc stack logging is
enabled), or the first registered zone otherwise. In practice this
means unless malloc stack logging is enabled, the first registered
zone is the default.

So get the list of zones to get the first one, instead of relying on
malloc_default_zone.
---
 src/zone.c | 31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/src/zone.c b/src/zone.c
index 9432f45a..92381614 100644
--- a/src/zone.c
+++ b/src/zone.c
@@ -168,6 +168,33 @@ zone_force_unlock(malloc_zone_t *zone)
 		jemalloc_postfork_parent();
 }
 
+static malloc_zone_t *get_default_zone()
+{
+	malloc_zone_t **zones = NULL;
+	unsigned int num_zones = 0;
+
+	/*
+	 * On OSX 10.12, malloc_default_zone returns a special zone that is not
+	 * present in the list of registered zones. That zone uses a "lite zone"
+	 * if one is present (apparently enabled when malloc stack logging is
+	 * enabled), or the first registered zone otherwise. In practice this
+	 * means unless malloc stack logging is enabled, the first registered
+	 * zone is the default.
+	 * So get the list of zones to get the first one, instead of relying on
+	 * malloc_default_zone.
+	 */
+        if (KERN_SUCCESS != malloc_get_all_zones(0, NULL, (vm_address_t**) &zones,
+	                                         &num_zones)) {
+		/* Reset the value in case the failure happened after it was set. */
+		num_zones = 0;
+	}
+
+	if (num_zones)
+		return zones[0];
+
+	return malloc_default_zone();
+}
+
 JEMALLOC_ATTR(constructor)
 void
 register_zone(void)
@@ -177,7 +204,7 @@ register_zone(void)
 	 * If something else replaced the system default zone allocator, don't
 	 * register jemalloc's.
 	 */
-	malloc_zone_t *default_zone = malloc_default_zone();
+	malloc_zone_t *default_zone = get_default_zone();
 	malloc_zone_t *purgeable_zone = NULL;
 	if (!default_zone->zone_name ||
 	    strcmp(default_zone->zone_name, "DefaultMallocZone") != 0) {
@@ -272,6 +299,6 @@ register_zone(void)
 			malloc_zone_register(purgeable_zone);
 		}
 
-		default_zone = malloc_default_zone();
+		default_zone = get_default_zone();
 	} while (default_zone != &zone);
 }

From c716c1e5318c19569581637fd33220999c0d6a4b Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 12 Sep 2016 11:56:24 -0700
Subject: [PATCH 0350/2608] Update project URL.

---
 README              | 2 +-
 doc/jemalloc.xml.in | 2 +-
 jemalloc.pc.in      | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/README b/README
index 67cbf6da..3a6e0d27 100644
--- a/README
+++ b/README
@@ -17,4 +17,4 @@ jemalloc.
 
 The ChangeLog file contains a brief summary of changes for each release.
 
-URL: http://www.canonware.com/jemalloc/
+URL: http://jemalloc.net/
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index bfc0073b..006e9e06 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -52,7 +52,7 @@
     <title>LIBRARY</title>
     <para>This manual describes jemalloc @jemalloc_version@.  More information
     can be found at the <ulink
-    url="http://www.canonware.com/jemalloc/">jemalloc website</ulink>.</para>
+    url="http://jemalloc.net/">jemalloc website</ulink>.</para>
   </refsect1>
   <refsynopsisdiv>
     <title>SYNOPSIS</title>
diff --git a/jemalloc.pc.in b/jemalloc.pc.in
index 1a3ad9b3..a318e8dd 100644
--- a/jemalloc.pc.in
+++ b/jemalloc.pc.in
@@ -6,7 +6,7 @@ install_suffix=@install_suffix@
 
 Name: jemalloc
 Description: A general purpose malloc(3) implementation that emphasizes fragmentation avoidance and scalable concurrency support.
-URL: http://www.canonware.com/jemalloc
+URL: http://jemalloc.net/
 Version: @jemalloc_version@
 Cflags: -I${includedir}
 Libs: -L${libdir} -ljemalloc${install_suffix}

From d4ce47e7fb6af53fd0460052100382c728b58566 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 12 Sep 2016 16:12:15 -0700
Subject: [PATCH 0351/2608] Change html manual encoding to UTF-8.

This works around GitHub's broken automatic reformatting from ISO-8859-1
to UTF-8 when serving static html.

Remove <parameter/> from e.g. <function>malloc<parameter/></function>,
add a custom template that does not append parentheses, and manually
specify them, e.g. <function>malloc()</function>.  This works around
apparently broken XSL formatting that causes <code/> to be emitted in
html (rather than <code></code>, or better yet, nothing).
---
 doc/html.xsl.in     |   1 +
 doc/jemalloc.xml.in | 182 ++++++++++++++++++++++----------------------
 doc/stylesheet.xsl  |   5 +-
 3 files changed, 96 insertions(+), 92 deletions(-)

diff --git a/doc/html.xsl.in b/doc/html.xsl.in
index a91d9746..ec4fa655 100644
--- a/doc/html.xsl.in
+++ b/doc/html.xsl.in
@@ -1,4 +1,5 @@
 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
   <xsl:import href="@XSLROOT@/html/docbook.xsl"/>
   <xsl:import href="@abs_srcroot@doc/stylesheet.xsl"/>
+  <xsl:output method="xml" encoding="utf-8"/>
 </xsl:stylesheet>
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 006e9e06..8000461f 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -180,20 +180,20 @@
     <refsect2>
       <title>Standard API</title>
 
-      <para>The <function>malloc<parameter/></function> function allocates
+      <para>The <function>malloc()</function> function allocates
       <parameter>size</parameter> bytes of uninitialized memory.  The allocated
       space is suitably aligned (after possible pointer coercion) for storage
       of any type of object.</para>
 
-      <para>The <function>calloc<parameter/></function> function allocates
+      <para>The <function>calloc()</function> function allocates
       space for <parameter>number</parameter> objects, each
       <parameter>size</parameter> bytes in length.  The result is identical to
-      calling <function>malloc<parameter/></function> with an argument of
+      calling <function>malloc()</function> with an argument of
       <parameter>number</parameter> * <parameter>size</parameter>, with the
       exception that the allocated memory is explicitly initialized to zero
       bytes.</para>
 
-      <para>The <function>posix_memalign<parameter/></function> function
+      <para>The <function>posix_memalign()</function> function
       allocates <parameter>size</parameter> bytes of memory such that the
       allocation's base address is a multiple of
       <parameter>alignment</parameter>, and returns the allocation in the value
@@ -201,7 +201,7 @@
       <parameter>alignment</parameter> must be a power of 2 at least as large as
       <code language="C">sizeof(<type>void *</type>)</code>.</para>
 
-      <para>The <function>aligned_alloc<parameter/></function> function
+      <para>The <function>aligned_alloc()</function> function
       allocates <parameter>size</parameter> bytes of memory such that the
       allocation's base address is a multiple of
       <parameter>alignment</parameter>.  The requested
@@ -209,7 +209,7 @@
       undefined if <parameter>size</parameter> is not an integral multiple of
       <parameter>alignment</parameter>.</para>
 
-      <para>The <function>realloc<parameter/></function> function changes the
+      <para>The <function>realloc()</function> function changes the
       size of the previously allocated memory referenced by
       <parameter>ptr</parameter> to <parameter>size</parameter> bytes.  The
       contents of the memory are unchanged up to the lesser of the new and old
@@ -217,26 +217,26 @@
       portion of the memory are undefined.  Upon success, the memory referenced
       by <parameter>ptr</parameter> is freed and a pointer to the newly
       allocated memory is returned.  Note that
-      <function>realloc<parameter/></function> may move the memory allocation,
+      <function>realloc()</function> may move the memory allocation,
       resulting in a different return value than <parameter>ptr</parameter>.
       If <parameter>ptr</parameter> is <constant>NULL</constant>, the
-      <function>realloc<parameter/></function> function behaves identically to
-      <function>malloc<parameter/></function> for the specified size.</para>
+      <function>realloc()</function> function behaves identically to
+      <function>malloc()</function> for the specified size.</para>
 
-      <para>The <function>free<parameter/></function> function causes the
+      <para>The <function>free()</function> function causes the
       allocated memory referenced by <parameter>ptr</parameter> to be made
       available for future allocations.  If <parameter>ptr</parameter> is
       <constant>NULL</constant>, no action occurs.</para>
     </refsect2>
     <refsect2>
       <title>Non-standard API</title>
-      <para>The <function>mallocx<parameter/></function>,
-      <function>rallocx<parameter/></function>,
-      <function>xallocx<parameter/></function>,
-      <function>sallocx<parameter/></function>,
-      <function>dallocx<parameter/></function>,
-      <function>sdallocx<parameter/></function>, and
-      <function>nallocx<parameter/></function> functions all have a
+      <para>The <function>mallocx()</function>,
+      <function>rallocx()</function>,
+      <function>xallocx()</function>,
+      <function>sallocx()</function>,
+      <function>dallocx()</function>,
+      <function>sdallocx()</function>, and
+      <function>nallocx()</function> functions all have a
       <parameter>flags</parameter> argument that can be used to specify
       options.  The functions only check the options that are contextually
       relevant.  Use bitwise or (<code language="C">|</code>) operations to
@@ -307,19 +307,19 @@
         </variablelist>
       </para>
 
-      <para>The <function>mallocx<parameter/></function> function allocates at
+      <para>The <function>mallocx()</function> function allocates at
       least <parameter>size</parameter> bytes of memory, and returns a pointer
       to the base address of the allocation.  Behavior is undefined if
       <parameter>size</parameter> is <constant>0</constant>.</para>
 
-      <para>The <function>rallocx<parameter/></function> function resizes the
+      <para>The <function>rallocx()</function> function resizes the
       allocation at <parameter>ptr</parameter> to be at least
       <parameter>size</parameter> bytes, and returns a pointer to the base
       address of the resulting allocation, which may or may not have moved from
       its original location.  Behavior is undefined if
       <parameter>size</parameter> is <constant>0</constant>.</para>
 
-      <para>The <function>xallocx<parameter/></function> function resizes the
+      <para>The <function>xallocx()</function> function resizes the
       allocation at <parameter>ptr</parameter> in place to be at least
       <parameter>size</parameter> bytes, and returns the real size of the
       allocation.  If <parameter>extra</parameter> is non-zero, an attempt is
@@ -332,32 +332,32 @@
       language="C">(<parameter>size</parameter> + <parameter>extra</parameter>
       &gt; <constant>SIZE_T_MAX</constant>)</code>.</para>
 
-      <para>The <function>sallocx<parameter/></function> function returns the
+      <para>The <function>sallocx()</function> function returns the
       real size of the allocation at <parameter>ptr</parameter>.</para>
 
-      <para>The <function>dallocx<parameter/></function> function causes the
+      <para>The <function>dallocx()</function> function causes the
       memory referenced by <parameter>ptr</parameter> to be made available for
       future allocations.</para>
 
-      <para>The <function>sdallocx<parameter/></function> function is an
-      extension of <function>dallocx<parameter/></function> with a
+      <para>The <function>sdallocx()</function> function is an
+      extension of <function>dallocx()</function> with a
       <parameter>size</parameter> parameter to allow the caller to pass in the
       allocation size as an optimization.  The minimum valid input size is the
       original requested size of the allocation, and the maximum valid input
       size is the corresponding value returned by
-      <function>nallocx<parameter/></function> or
-      <function>sallocx<parameter/></function>.</para>
+      <function>nallocx()</function> or
+      <function>sallocx()</function>.</para>
 
-      <para>The <function>nallocx<parameter/></function> function allocates no
+      <para>The <function>nallocx()</function> function allocates no
       memory, but it performs the same size computation as the
-      <function>mallocx<parameter/></function> function, and returns the real
+      <function>mallocx()</function> function, and returns the real
       size of the allocation that would result from the equivalent
-      <function>mallocx<parameter/></function> function call, or
+      <function>mallocx()</function> function call, or
       <constant>0</constant> if the inputs exceed the maximum supported size
       class and/or alignment.  Behavior is undefined if
       <parameter>size</parameter> is <constant>0</constant>.</para>
 
-      <para>The <function>mallctl<parameter/></function> function provides a
+      <para>The <function>mallctl()</function> function provides a
       general interface for introspecting the memory allocator, as well as
       setting modifiable parameters and triggering actions.  The
       period-separated <parameter>name</parameter> argument specifies a
@@ -372,12 +372,12 @@
       <parameter>newlen</parameter>; otherwise pass <constant>NULL</constant>
       and <constant>0</constant>.</para>
 
-      <para>The <function>mallctlnametomib<parameter/></function> function
+      <para>The <function>mallctlnametomib()</function> function
       provides a way to avoid repeated name lookups for applications that
       repeatedly query the same portion of the namespace, by translating a name
       to a &ldquo;Management Information Base&rdquo; (MIB) that can be passed
-      repeatedly to <function>mallctlbymib<parameter/></function>.  Upon
-      successful return from <function>mallctlnametomib<parameter/></function>,
+      repeatedly to <function>mallctlbymib()</function>.  Upon
+      successful return from <function>mallctlnametomib()</function>,
       <parameter>mibp</parameter> contains an array of
       <parameter>*miblenp</parameter> integers, where
       <parameter>*miblenp</parameter> is the lesser of the number of components
@@ -410,18 +410,18 @@ for (i = 0; i < nbins; i++) {
 	/* Do something with bin_size... */
 }]]></programlisting></para>
 
-      <para>The <function>malloc_stats_print<parameter/></function> function
+      <para>The <function>malloc_stats_print()</function> function
       writes human-readable summary statistics via the
       <parameter>write_cb</parameter> callback function pointer and
       <parameter>cbopaque</parameter> data passed to
       <parameter>write_cb</parameter>, or
-      <function>malloc_message<parameter/></function> if
+      <function>malloc_message()</function> if
       <parameter>write_cb</parameter> is <constant>NULL</constant>.  This
       function can be called repeatedly.  General information that never changes
       during execution can be omitted by specifying "g" as a character within
       the <parameter>opts</parameter> string.  Note that
-      <function>malloc_message<parameter/></function> uses the
-      <function>mallctl*<parameter/></function> functions internally, so
+      <function>malloc_message()</function> uses the
+      <function>mallctl*()</function> functions internally, so
       inconsistent statistics can be reported if multiple threads use these
       functions simultaneously.  If <option>--enable-stats</option> is specified
       during configuration, &ldquo;m&rdquo; and &ldquo;a&rdquo; can be specified
@@ -433,15 +433,15 @@ for (i = 0; i < nbins; i++) {
       would be required to merge counters that track thread cache
       operations.</para>
 
-      <para>The <function>malloc_usable_size<parameter/></function> function
+      <para>The <function>malloc_usable_size()</function> function
       returns the usable size of the allocation pointed to by
       <parameter>ptr</parameter>.  The return value may be larger than the size
       that was requested during allocation.  The
-      <function>malloc_usable_size<parameter/></function> function is not a
-      mechanism for in-place <function>realloc<parameter/></function>; rather
+      <function>malloc_usable_size()</function> function is not a
+      mechanism for in-place <function>realloc()</function>; rather
       it is provided solely as a tool for introspection purposes.  Any
       discrepancy between the requested allocation size and the size reported
-      by <function>malloc_usable_size<parameter/></function> should not be
+      by <function>malloc_usable_size()</function> should not be
       depended on, since such behavior is entirely implementation-dependent.
       </para>
     </refsect2>
@@ -459,7 +459,7 @@ for (i = 0; i < nbins; i++) {
     environment variable <envar>MALLOC_CONF</envar>, will be interpreted, in
     that order, from left to right as options.  Note that
     <varname>malloc_conf</varname> may be read before
-    <function>main<parameter/></function> is entered, so the declaration of
+    <function>main()</function> is entered, so the declaration of
     <varname>malloc_conf</varname> should specify an initializer that contains
     the final value to be read by jemalloc.  <option>--with-malloc-conf</option>
     and <varname>malloc_conf</varname> are compile-time mechanisms, whereas
@@ -540,14 +540,14 @@ for (i = 0; i < nbins; i++) {
     nearest multiple of the cacheline size, or specify cacheline alignment when
     allocating.</para>
 
-    <para>The <function>realloc<parameter/></function>,
-    <function>rallocx<parameter/></function>, and
-    <function>xallocx<parameter/></function> functions may resize allocations
+    <para>The <function>realloc()</function>,
+    <function>rallocx()</function>, and
+    <function>xallocx()</function> functions may resize allocations
     without moving them under limited circumstances.  Unlike the
-    <function>*allocx<parameter/></function> API, the standard API does not
+    <function>*allocx()</function> API, the standard API does not
     officially round up the usable size of an allocation to the nearest size
     class, so technically it is necessary to call
-    <function>realloc<parameter/></function> to grow e.g. a 9-byte allocation to
+    <function>realloc()</function> to grow e.g. a 9-byte allocation to
     16 bytes, or shrink a 16-byte allocation to 9 bytes.  Growth and shrinkage
     trivially succeeds in place as long as the pre-size and post-size both round
     up to the same size class.  No other API guarantees are made regarding
@@ -686,7 +686,7 @@ for (i = 0; i < nbins; i++) {
   <refsect1 id="mallctl_namespace">
     <title>MALLCTL NAMESPACE</title>
     <para>The following names are defined in the namespace accessible via the
-    <function>mallctl*<parameter/></function> functions.  Value types are
+    <function>mallctl*()</function> functions.  Value types are
     specified in parentheses, their readable/writable statuses are encoded as
     <literal>rw</literal>, <literal>r-</literal>, <literal>-w</literal>, or
     <literal>--</literal>, and required build configuration flags follow, if
@@ -717,7 +717,7 @@ for (i = 0; i < nbins; i++) {
           <literal>rw</literal>
         </term>
         <listitem><para>If a value is passed in, refresh the data from which
-        the <function>mallctl*<parameter/></function> functions report values,
+        the <function>mallctl*()</function> functions report values,
         and increment the epoch.  Return the current epoch.  This is useful for
         detecting whether another thread caused a refresh.</para></listitem>
       </varlistentry>
@@ -987,19 +987,19 @@ for (i = 0; i < nbins; i++) {
           <literal>r-</literal>
         </term>
         <listitem><para>Enable/disable statistics printing at exit.  If
-        enabled, the <function>malloc_stats_print<parameter/></function>
+        enabled, the <function>malloc_stats_print()</function>
         function is called at program exit via an
         <citerefentry><refentrytitle>atexit</refentrytitle>
         <manvolnum>3</manvolnum></citerefentry> function.  If
         <option>--enable-stats</option> is specified during configuration, this
         has the potential to cause deadlock for a multi-threaded process that
         exits while one or more threads are executing in the memory allocation
-        functions.  Furthermore, <function>atexit<parameter/></function> may
+        functions.  Furthermore, <function>atexit()</function> may
         allocate memory during application initialization and then deadlock
         internally when jemalloc in turn calls
-        <function>atexit<parameter/></function>, so this option is not
+        <function>atexit()</function>, so this option is not
         universally usable (though the application can register its own
-        <function>atexit<parameter/></function> function with equivalent
+        <function>atexit()</function> function with equivalent
         functionality).  Therefore, this option should only be used with care;
         it is primarily intended as a performance tuning aid during application
         development.  This option is disabled by default.</para></listitem>
@@ -1034,8 +1034,8 @@ for (i = 0; i < nbins; i++) {
         <listitem><para>Zero filling enabled/disabled.  If enabled, each byte
         of uninitialized allocated memory will be initialized to 0.  Note that
         this initialization only happens once for each byte, so
-        <function>realloc<parameter/></function> and
-        <function>rallocx<parameter/></function> calls do not zero memory that
+        <function>realloc()</function> and
+        <function>rallocx()</function> calls do not zero memory that
         was previously allocated.  This is intended for debugging and will
         impact performance negatively.  This option is disabled by default.
         </para></listitem>
@@ -1256,11 +1256,11 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         <filename>&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.f.heap</filename>,
         where <literal>&lt;prefix&gt;</literal> is controlled by the <link
         linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link>
-        option.  Note that <function>atexit<parameter/></function> may allocate
+        option.  Note that <function>atexit()</function> may allocate
         memory during application initialization and then deadlock internally
-        when jemalloc in turn calls <function>atexit<parameter/></function>, so
+        when jemalloc in turn calls <function>atexit()</function>, so
         this option is not universally usable (though the application can
-        register its own <function>atexit<parameter/></function> function with
+        register its own <function>atexit()</function> function with
         equivalent functionality).  This option is disabled by
         default.</para></listitem>
       </varlistentry>
@@ -1319,7 +1319,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         <link
         linkend="thread.allocated"><mallctl>thread.allocated</mallctl></link>
         mallctl.  This is useful for avoiding the overhead of repeated
-        <function>mallctl*<parameter/></function> calls.</para></listitem>
+        <function>mallctl*()</function> calls.</para></listitem>
       </varlistentry>
 
       <varlistentry id="thread.deallocated">
@@ -1346,7 +1346,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         <link
         linkend="thread.deallocated"><mallctl>thread.deallocated</mallctl></link>
         mallctl.  This is useful for avoiding the overhead of repeated
-        <function>mallctl*<parameter/></function> calls.</para></listitem>
+        <function>mallctl*()</function> calls.</para></listitem>
       </varlistentry>
 
       <varlistentry id="thread.tcache.enabled">
@@ -2189,8 +2189,8 @@ struct extent_hooks_s {
         </term>
         <listitem><para>Number of pages within unused extents that are
         potentially dirty, and for which
-        <function>madvise<parameter>...</parameter>
-        <parameter><constant>MADV_DONTNEED</constant></parameter></function> or
+        <function>madvise(<parameter>...</parameter>
+        <parameter><constant>MADV_DONTNEED</constant></parameter>)</function> or
         similar has not been called.</para></listitem>
       </varlistentry>
 
@@ -2253,8 +2253,8 @@ struct extent_hooks_s {
           <literal>r-</literal>
           [<option>--enable-stats</option>]
         </term>
-        <listitem><para>Number of <function>madvise<parameter>...</parameter>
-        <parameter><constant>MADV_DONTNEED</constant></parameter></function> or
+        <listitem><para>Number of <function>madvise(<parameter>...</parameter>
+        <parameter><constant>MADV_DONTNEED</constant></parameter>)</function> or
         similar calls made to purge dirty pages.</para></listitem>
       </varlistentry>
 
@@ -2579,10 +2579,10 @@ MAPPED_LIBRARIES:
     to override the function which emits the text strings forming the errors
     and warnings if for some reason the <constant>STDERR_FILENO</constant> file
     descriptor is not suitable for this.
-    <function>malloc_message<parameter/></function> takes the
+    <function>malloc_message()</function> takes the
     <parameter>cbopaque</parameter> pointer argument that is
     <constant>NULL</constant> unless overridden by the arguments in a call to
-    <function>malloc_stats_print<parameter/></function>, followed by a string
+    <function>malloc_stats_print()</function>, followed by a string
     pointer.  Please note that doing anything which tries to allocate memory in
     this function is likely to result in a crash or deadlock.</para>
 
@@ -2593,15 +2593,15 @@ MAPPED_LIBRARIES:
     <title>RETURN VALUES</title>
     <refsect2>
       <title>Standard API</title>
-      <para>The <function>malloc<parameter/></function> and
-      <function>calloc<parameter/></function> functions return a pointer to the
+      <para>The <function>malloc()</function> and
+      <function>calloc()</function> functions return a pointer to the
       allocated memory if successful; otherwise a <constant>NULL</constant>
       pointer is returned and <varname>errno</varname> is set to
       <errorname>ENOMEM</errorname>.</para>
 
-      <para>The <function>posix_memalign<parameter/></function> function
+      <para>The <function>posix_memalign()</function> function
       returns the value 0 if successful; otherwise it returns an error value.
-      The <function>posix_memalign<parameter/></function> function will fail
+      The <function>posix_memalign()</function> function will fail
       if:
         <variablelist>
           <varlistentry>
@@ -2620,11 +2620,11 @@ MAPPED_LIBRARIES:
         </variablelist>
       </para>
 
-      <para>The <function>aligned_alloc<parameter/></function> function returns
+      <para>The <function>aligned_alloc()</function> function returns
       a pointer to the allocated memory if successful; otherwise a
       <constant>NULL</constant> pointer is returned and
       <varname>errno</varname> is set.  The
-      <function>aligned_alloc<parameter/></function> function will fail if:
+      <function>aligned_alloc()</function> function will fail if:
         <variablelist>
           <varlistentry>
             <term><errorname>EINVAL</errorname></term>
@@ -2641,44 +2641,44 @@ MAPPED_LIBRARIES:
         </variablelist>
       </para>
 
-      <para>The <function>realloc<parameter/></function> function returns a
+      <para>The <function>realloc()</function> function returns a
       pointer, possibly identical to <parameter>ptr</parameter>, to the
       allocated memory if successful; otherwise a <constant>NULL</constant>
       pointer is returned, and <varname>errno</varname> is set to
       <errorname>ENOMEM</errorname> if the error was the result of an
-      allocation failure.  The <function>realloc<parameter/></function>
+      allocation failure.  The <function>realloc()</function>
       function always leaves the original buffer intact when an error occurs.
       </para>
 
-      <para>The <function>free<parameter/></function> function returns no
+      <para>The <function>free()</function> function returns no
       value.</para>
     </refsect2>
     <refsect2>
       <title>Non-standard API</title>
-      <para>The <function>mallocx<parameter/></function> and
-      <function>rallocx<parameter/></function> functions return a pointer to
+      <para>The <function>mallocx()</function> and
+      <function>rallocx()</function> functions return a pointer to
       the allocated memory if successful; otherwise a <constant>NULL</constant>
       pointer is returned to indicate insufficient contiguous memory was
       available to service the allocation request.  </para>
 
-      <para>The <function>xallocx<parameter/></function> function returns the
+      <para>The <function>xallocx()</function> function returns the
       real size of the resulting resized allocation pointed to by
       <parameter>ptr</parameter>, which is a value less than
       <parameter>size</parameter> if the allocation could not be adequately
       grown in place.  </para>
 
-      <para>The <function>sallocx<parameter/></function> function returns the
+      <para>The <function>sallocx()</function> function returns the
       real size of the allocation pointed to by <parameter>ptr</parameter>.
       </para>
 
-      <para>The <function>nallocx<parameter/></function> returns the real size
+      <para>The <function>nallocx()</function> returns the real size
       that would result from a successful equivalent
-      <function>mallocx<parameter/></function> function call, or zero if
+      <function>mallocx()</function> function call, or zero if
       insufficient memory is available to perform the size computation.  </para>
 
-      <para>The <function>mallctl<parameter/></function>,
-      <function>mallctlnametomib<parameter/></function>, and
-      <function>mallctlbymib<parameter/></function> functions return 0 on
+      <para>The <function>mallctl()</function>,
+      <function>mallctlnametomib()</function>, and
+      <function>mallctlbymib()</function> functions return 0 on
       success; otherwise they return an error value.  The functions will fail
       if:
         <variablelist>
@@ -2714,13 +2714,13 @@ MAPPED_LIBRARIES:
             <term><errorname>EFAULT</errorname></term>
 
             <listitem><para>An interface with side effects failed in some way
-            not directly related to <function>mallctl*<parameter/></function>
+            not directly related to <function>mallctl*()</function>
             read/write processing.</para></listitem>
           </varlistentry>
         </variablelist>
       </para>
 
-      <para>The <function>malloc_usable_size<parameter/></function> function
+      <para>The <function>malloc_usable_size()</function> function
       returns the usable size of the allocation pointed to by
       <parameter>ptr</parameter>.  </para>
     </refsect2>
@@ -2769,13 +2769,13 @@ malloc_conf = "narenas:1";]]></programlisting></para>
   </refsect1>
   <refsect1 id="standards">
     <title>STANDARDS</title>
-    <para>The <function>malloc<parameter/></function>,
-    <function>calloc<parameter/></function>,
-    <function>realloc<parameter/></function>, and
-    <function>free<parameter/></function> functions conform to ISO/IEC
+    <para>The <function>malloc()</function>,
+    <function>calloc()</function>,
+    <function>realloc()</function>, and
+    <function>free()</function> functions conform to ISO/IEC
     9899:1990 (&ldquo;ISO C90&rdquo;).</para>
 
-    <para>The <function>posix_memalign<parameter/></function> function conforms
+    <para>The <function>posix_memalign()</function> function conforms
     to IEEE Std 1003.1-2001 (&ldquo;POSIX.1&rdquo;).</para>
   </refsect1>
 </refentry>
diff --git a/doc/stylesheet.xsl b/doc/stylesheet.xsl
index 4e334a86..bc8bc2a9 100644
--- a/doc/stylesheet.xsl
+++ b/doc/stylesheet.xsl
@@ -1,6 +1,9 @@
 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
   <xsl:param name="funcsynopsis.style">ansi</xsl:param>
-  <xsl:param name="function.parens" select="1"/>
+  <xsl:param name="function.parens" select="0"/>
+  <xsl:template match="function">
+    <xsl:call-template name="inline.monoseq"/>
+  </xsl:template>
   <xsl:template match="mallctl">
     "<xsl:call-template name="inline.monoseq"/>"
   </xsl:template>

From 9ebbfca93fd98e31d2dbc2bddffcc4624df516dc Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 12 Sep 2016 16:44:33 -0700
Subject: [PATCH 0352/2608] Change html manual encoding to UTF-8.

This works around GitHub's broken automatic reformatting from ISO-8859-1
to UTF-8 when serving static html.

Remove <parameter/> from e.g. <function>malloc<parameter/></function>,
add a custom template that does not append parentheses, and manually
specify them, e.g. <function>malloc()</function>.  This works around
apparently broken XSL formatting that causes <code/> to be emitted in
html (rather than <code></code>, or better yet, nothing).
---
 doc/html.xsl.in     |   1 +
 doc/jemalloc.xml.in | 174 ++++++++++++++++++++++----------------------
 doc/stylesheet.xsl  |   5 +-
 3 files changed, 92 insertions(+), 88 deletions(-)

diff --git a/doc/html.xsl.in b/doc/html.xsl.in
index a91d9746..ec4fa655 100644
--- a/doc/html.xsl.in
+++ b/doc/html.xsl.in
@@ -1,4 +1,5 @@
 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
   <xsl:import href="@XSLROOT@/html/docbook.xsl"/>
   <xsl:import href="@abs_srcroot@doc/stylesheet.xsl"/>
+  <xsl:output method="xml" encoding="utf-8"/>
 </xsl:stylesheet>
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index c4a44e3c..88172296 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -180,20 +180,20 @@
     <refsect2>
       <title>Standard API</title>
 
-      <para>The <function>malloc<parameter/></function> function allocates
+      <para>The <function>malloc()</function> function allocates
       <parameter>size</parameter> bytes of uninitialized memory.  The allocated
       space is suitably aligned (after possible pointer coercion) for storage
       of any type of object.</para>
 
-      <para>The <function>calloc<parameter/></function> function allocates
+      <para>The <function>calloc()</function> function allocates
       space for <parameter>number</parameter> objects, each
       <parameter>size</parameter> bytes in length.  The result is identical to
-      calling <function>malloc<parameter/></function> with an argument of
+      calling <function>malloc()</function> with an argument of
       <parameter>number</parameter> * <parameter>size</parameter>, with the
       exception that the allocated memory is explicitly initialized to zero
       bytes.</para>
 
-      <para>The <function>posix_memalign<parameter/></function> function
+      <para>The <function>posix_memalign()</function> function
       allocates <parameter>size</parameter> bytes of memory such that the
       allocation's base address is a multiple of
       <parameter>alignment</parameter>, and returns the allocation in the value
@@ -201,7 +201,7 @@
       <parameter>alignment</parameter> must be a power of 2 at least as large as
       <code language="C">sizeof(<type>void *</type>)</code>.</para>
 
-      <para>The <function>aligned_alloc<parameter/></function> function
+      <para>The <function>aligned_alloc()</function> function
       allocates <parameter>size</parameter> bytes of memory such that the
       allocation's base address is a multiple of
       <parameter>alignment</parameter>.  The requested
@@ -209,7 +209,7 @@
       undefined if <parameter>size</parameter> is not an integral multiple of
       <parameter>alignment</parameter>.</para>
 
-      <para>The <function>realloc<parameter/></function> function changes the
+      <para>The <function>realloc()</function> function changes the
       size of the previously allocated memory referenced by
       <parameter>ptr</parameter> to <parameter>size</parameter> bytes.  The
       contents of the memory are unchanged up to the lesser of the new and old
@@ -217,26 +217,26 @@
       portion of the memory are undefined.  Upon success, the memory referenced
       by <parameter>ptr</parameter> is freed and a pointer to the newly
       allocated memory is returned.  Note that
-      <function>realloc<parameter/></function> may move the memory allocation,
+      <function>realloc()</function> may move the memory allocation,
       resulting in a different return value than <parameter>ptr</parameter>.
       If <parameter>ptr</parameter> is <constant>NULL</constant>, the
-      <function>realloc<parameter/></function> function behaves identically to
-      <function>malloc<parameter/></function> for the specified size.</para>
+      <function>realloc()</function> function behaves identically to
+      <function>malloc()</function> for the specified size.</para>
 
-      <para>The <function>free<parameter/></function> function causes the
+      <para>The <function>free()</function> function causes the
       allocated memory referenced by <parameter>ptr</parameter> to be made
       available for future allocations.  If <parameter>ptr</parameter> is
       <constant>NULL</constant>, no action occurs.</para>
     </refsect2>
     <refsect2>
       <title>Non-standard API</title>
-      <para>The <function>mallocx<parameter/></function>,
-      <function>rallocx<parameter/></function>,
-      <function>xallocx<parameter/></function>,
-      <function>sallocx<parameter/></function>,
-      <function>dallocx<parameter/></function>,
-      <function>sdallocx<parameter/></function>, and
-      <function>nallocx<parameter/></function> functions all have a
+      <para>The <function>mallocx()</function>,
+      <function>rallocx()</function>,
+      <function>xallocx()</function>,
+      <function>sallocx()</function>,
+      <function>dallocx()</function>,
+      <function>sdallocx()</function>, and
+      <function>nallocx()</function> functions all have a
       <parameter>flags</parameter> argument that can be used to specify
       options.  The functions only check the options that are contextually
       relevant.  Use bitwise or (<code language="C">|</code>) operations to
@@ -307,19 +307,19 @@
         </variablelist>
       </para>
 
-      <para>The <function>mallocx<parameter/></function> function allocates at
+      <para>The <function>mallocx()</function> function allocates at
       least <parameter>size</parameter> bytes of memory, and returns a pointer
       to the base address of the allocation.  Behavior is undefined if
       <parameter>size</parameter> is <constant>0</constant>.</para>
 
-      <para>The <function>rallocx<parameter/></function> function resizes the
+      <para>The <function>rallocx()</function> function resizes the
       allocation at <parameter>ptr</parameter> to be at least
       <parameter>size</parameter> bytes, and returns a pointer to the base
       address of the resulting allocation, which may or may not have moved from
       its original location.  Behavior is undefined if
       <parameter>size</parameter> is <constant>0</constant>.</para>
 
-      <para>The <function>xallocx<parameter/></function> function resizes the
+      <para>The <function>xallocx()</function> function resizes the
       allocation at <parameter>ptr</parameter> in place to be at least
       <parameter>size</parameter> bytes, and returns the real size of the
       allocation.  If <parameter>extra</parameter> is non-zero, an attempt is
@@ -332,32 +332,32 @@
       language="C">(<parameter>size</parameter> + <parameter>extra</parameter>
       &gt; <constant>SIZE_T_MAX</constant>)</code>.</para>
 
-      <para>The <function>sallocx<parameter/></function> function returns the
+      <para>The <function>sallocx()</function> function returns the
       real size of the allocation at <parameter>ptr</parameter>.</para>
 
-      <para>The <function>dallocx<parameter/></function> function causes the
+      <para>The <function>dallocx()</function> function causes the
       memory referenced by <parameter>ptr</parameter> to be made available for
       future allocations.</para>
 
-      <para>The <function>sdallocx<parameter/></function> function is an
-      extension of <function>dallocx<parameter/></function> with a
+      <para>The <function>sdallocx()</function> function is an
+      extension of <function>dallocx()</function> with a
       <parameter>size</parameter> parameter to allow the caller to pass in the
       allocation size as an optimization.  The minimum valid input size is the
       original requested size of the allocation, and the maximum valid input
       size is the corresponding value returned by
-      <function>nallocx<parameter/></function> or
-      <function>sallocx<parameter/></function>.</para>
+      <function>nallocx()</function> or
+      <function>sallocx()</function>.</para>
 
-      <para>The <function>nallocx<parameter/></function> function allocates no
+      <para>The <function>nallocx()</function> function allocates no
       memory, but it performs the same size computation as the
-      <function>mallocx<parameter/></function> function, and returns the real
+      <function>mallocx()</function> function, and returns the real
       size of the allocation that would result from the equivalent
-      <function>mallocx<parameter/></function> function call, or
+      <function>mallocx()</function> function call, or
       <constant>0</constant> if the inputs exceed the maximum supported size
       class and/or alignment.  Behavior is undefined if
       <parameter>size</parameter> is <constant>0</constant>.</para>
 
-      <para>The <function>mallctl<parameter/></function> function provides a
+      <para>The <function>mallctl()</function> function provides a
       general interface for introspecting the memory allocator, as well as
       setting modifiable parameters and triggering actions.  The
       period-separated <parameter>name</parameter> argument specifies a
@@ -372,12 +372,12 @@
       <parameter>newlen</parameter>; otherwise pass <constant>NULL</constant>
       and <constant>0</constant>.</para>
 
-      <para>The <function>mallctlnametomib<parameter/></function> function
+      <para>The <function>mallctlnametomib()</function> function
       provides a way to avoid repeated name lookups for applications that
       repeatedly query the same portion of the namespace, by translating a name
       to a &ldquo;Management Information Base&rdquo; (MIB) that can be passed
-      repeatedly to <function>mallctlbymib<parameter/></function>.  Upon
-      successful return from <function>mallctlnametomib<parameter/></function>,
+      repeatedly to <function>mallctlbymib()</function>.  Upon
+      successful return from <function>mallctlnametomib()</function>,
       <parameter>mibp</parameter> contains an array of
       <parameter>*miblenp</parameter> integers, where
       <parameter>*miblenp</parameter> is the lesser of the number of components
@@ -410,18 +410,18 @@ for (i = 0; i < nbins; i++) {
 	/* Do something with bin_size... */
 }]]></programlisting></para>
 
-      <para>The <function>malloc_stats_print<parameter/></function> function
+      <para>The <function>malloc_stats_print()</function> function
       writes human-readable summary statistics via the
       <parameter>write_cb</parameter> callback function pointer and
       <parameter>cbopaque</parameter> data passed to
       <parameter>write_cb</parameter>, or
-      <function>malloc_message<parameter/></function> if
+      <function>malloc_message()</function> if
       <parameter>write_cb</parameter> is <constant>NULL</constant>.  This
       function can be called repeatedly.  General information that never
       changes during execution can be omitted by specifying "g" as a character
       within the <parameter>opts</parameter> string.  Note that
-      <function>malloc_message<parameter/></function> uses the
-      <function>mallctl*<parameter/></function> functions internally, so
+      <function>malloc_message()</function> uses the
+      <function>mallctl*()</function> functions internally, so
       inconsistent statistics can be reported if multiple threads use these
       functions simultaneously.  If <option>--enable-stats</option> is
       specified during configuration, &ldquo;m&rdquo; and &ldquo;a&rdquo; can
@@ -434,15 +434,15 @@ for (i = 0; i < nbins; i++) {
       thread cache operations.
       </para>
 
-      <para>The <function>malloc_usable_size<parameter/></function> function
+      <para>The <function>malloc_usable_size()</function> function
       returns the usable size of the allocation pointed to by
       <parameter>ptr</parameter>.  The return value may be larger than the size
       that was requested during allocation.  The
-      <function>malloc_usable_size<parameter/></function> function is not a
-      mechanism for in-place <function>realloc<parameter/></function>; rather
+      <function>malloc_usable_size()</function> function is not a
+      mechanism for in-place <function>realloc()</function>; rather
       it is provided solely as a tool for introspection purposes.  Any
       discrepancy between the requested allocation size and the size reported
-      by <function>malloc_usable_size<parameter/></function> should not be
+      by <function>malloc_usable_size()</function> should not be
       depended on, since such behavior is entirely implementation-dependent.
       </para>
     </refsect2>
@@ -460,7 +460,7 @@ for (i = 0; i < nbins; i++) {
     environment variable <envar>MALLOC_CONF</envar>, will be interpreted, in
     that order, from left to right as options.  Note that
     <varname>malloc_conf</varname> may be read before
-    <function>main<parameter/></function> is entered, so the declaration of
+    <function>main()</function> is entered, so the declaration of
     <varname>malloc_conf</varname> should specify an initializer that contains
     the final value to be read by jemalloc.  <option>--with-malloc-conf</option>
     and <varname>malloc_conf</varname> are compile-time mechanisms, whereas
@@ -549,14 +549,14 @@ for (i = 0; i < nbins; i++) {
     nearest multiple of the cacheline size, or specify cacheline alignment when
     allocating.</para>
 
-    <para>The <function>realloc<parameter/></function>,
-    <function>rallocx<parameter/></function>, and
-    <function>xallocx<parameter/></function> functions may resize allocations
+    <para>The <function>realloc()</function>,
+    <function>rallocx()</function>, and
+    <function>xallocx()</function> functions may resize allocations
     without moving them under limited circumstances.  Unlike the
-    <function>*allocx<parameter/></function> API, the standard API does not
+    <function>*allocx()</function> API, the standard API does not
     officially round up the usable size of an allocation to the nearest size
     class, so technically it is necessary to call
-    <function>realloc<parameter/></function> to grow e.g. a 9-byte allocation to
+    <function>realloc()</function> to grow e.g. a 9-byte allocation to
     16 bytes, or shrink a 16-byte allocation to 9 bytes.  Growth and shrinkage
     trivially succeeds in place as long as the pre-size and post-size both round
     up to the same size class.  No other API guarantees are made regarding
@@ -702,7 +702,7 @@ for (i = 0; i < nbins; i++) {
   <refsect1 id="mallctl_namespace">
     <title>MALLCTL NAMESPACE</title>
     <para>The following names are defined in the namespace accessible via the
-    <function>mallctl*<parameter/></function> functions.  Value types are
+    <function>mallctl*()</function> functions.  Value types are
     specified in parentheses, their readable/writable statuses are encoded as
     <literal>rw</literal>, <literal>r-</literal>, <literal>-w</literal>, or
     <literal>--</literal>, and required build configuration flags follow, if
@@ -733,7 +733,7 @@ for (i = 0; i < nbins; i++) {
           <literal>rw</literal>
         </term>
         <listitem><para>If a value is passed in, refresh the data from which
-        the <function>mallctl*<parameter/></function> functions report values,
+        the <function>mallctl*()</function> functions report values,
         and increment the epoch.  Return the current epoch.  This is useful for
         detecting whether another thread caused a refresh.</para></listitem>
       </varlistentry>
@@ -1013,19 +1013,19 @@ for (i = 0; i < nbins; i++) {
           <literal>r-</literal>
         </term>
         <listitem><para>Enable/disable statistics printing at exit.  If
-        enabled, the <function>malloc_stats_print<parameter/></function>
+        enabled, the <function>malloc_stats_print()</function>
         function is called at program exit via an
         <citerefentry><refentrytitle>atexit</refentrytitle>
         <manvolnum>3</manvolnum></citerefentry> function.  If
         <option>--enable-stats</option> is specified during configuration, this
         has the potential to cause deadlock for a multi-threaded process that
         exits while one or more threads are executing in the memory allocation
-        functions.  Furthermore, <function>atexit<parameter/></function> may
+        functions.  Furthermore, <function>atexit()</function> may
         allocate memory during application initialization and then deadlock
         internally when jemalloc in turn calls
-        <function>atexit<parameter/></function>, so this option is not
+        <function>atexit()</function>, so this option is not
         universally usable (though the application can register its own
-        <function>atexit<parameter/></function> function with equivalent
+        <function>atexit()</function> function with equivalent
         functionality).  Therefore, this option should only be used with care;
         it is primarily intended as a performance tuning aid during application
         development.  This option is disabled by default.</para></listitem>
@@ -1101,8 +1101,8 @@ for (i = 0; i < nbins; i++) {
         <listitem><para>Zero filling enabled/disabled.  If enabled, each byte
         of uninitialized allocated memory will be initialized to 0.  Note that
         this initialization only happens once for each byte, so
-        <function>realloc<parameter/></function> and
-        <function>rallocx<parameter/></function> calls do not zero memory that
+        <function>realloc()</function> and
+        <function>rallocx()</function> calls do not zero memory that
         was previously allocated.  This is intended for debugging and will
         impact performance negatively.  This option is disabled by default.
         </para></listitem>
@@ -1325,11 +1325,11 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         <filename>&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.f.heap</filename>,
         where <literal>&lt;prefix&gt;</literal> is controlled by the <link
         linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link>
-        option.  Note that <function>atexit<parameter/></function> may allocate
+        option.  Note that <function>atexit()</function> may allocate
         memory during application initialization and then deadlock internally
-        when jemalloc in turn calls <function>atexit<parameter/></function>, so
+        when jemalloc in turn calls <function>atexit()</function>, so
         this option is not universally usable (though the application can
-        register its own <function>atexit<parameter/></function> function with
+        register its own <function>atexit()</function> function with
         equivalent functionality).  This option is disabled by
         default.</para></listitem>
       </varlistentry>
@@ -1388,7 +1388,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         <link
         linkend="thread.allocated"><mallctl>thread.allocated</mallctl></link>
         mallctl.  This is useful for avoiding the overhead of repeated
-        <function>mallctl*<parameter/></function> calls.</para></listitem>
+        <function>mallctl*()</function> calls.</para></listitem>
       </varlistentry>
 
       <varlistentry id="thread.deallocated">
@@ -1415,7 +1415,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         <link
         linkend="thread.deallocated"><mallctl>thread.deallocated</mallctl></link>
         mallctl.  This is useful for avoiding the overhead of repeated
-        <function>mallctl*<parameter/></function> calls.</para></listitem>
+        <function>mallctl*()</function> calls.</para></listitem>
       </varlistentry>
 
       <varlistentry id="thread.tcache.enabled">
@@ -2763,10 +2763,10 @@ MAPPED_LIBRARIES:
     to override the function which emits the text strings forming the errors
     and warnings if for some reason the <constant>STDERR_FILENO</constant> file
     descriptor is not suitable for this.
-    <function>malloc_message<parameter/></function> takes the
+    <function>malloc_message()</function> takes the
     <parameter>cbopaque</parameter> pointer argument that is
     <constant>NULL</constant> unless overridden by the arguments in a call to
-    <function>malloc_stats_print<parameter/></function>, followed by a string
+    <function>malloc_stats_print()</function>, followed by a string
     pointer.  Please note that doing anything which tries to allocate memory in
     this function is likely to result in a crash or deadlock.</para>
 
@@ -2777,15 +2777,15 @@ MAPPED_LIBRARIES:
     <title>RETURN VALUES</title>
     <refsect2>
       <title>Standard API</title>
-      <para>The <function>malloc<parameter/></function> and
-      <function>calloc<parameter/></function> functions return a pointer to the
+      <para>The <function>malloc()</function> and
+      <function>calloc()</function> functions return a pointer to the
       allocated memory if successful; otherwise a <constant>NULL</constant>
       pointer is returned and <varname>errno</varname> is set to
       <errorname>ENOMEM</errorname>.</para>
 
-      <para>The <function>posix_memalign<parameter/></function> function
+      <para>The <function>posix_memalign()</function> function
       returns the value 0 if successful; otherwise it returns an error value.
-      The <function>posix_memalign<parameter/></function> function will fail
+      The <function>posix_memalign()</function> function will fail
       if:
         <variablelist>
           <varlistentry>
@@ -2804,11 +2804,11 @@ MAPPED_LIBRARIES:
         </variablelist>
       </para>
 
-      <para>The <function>aligned_alloc<parameter/></function> function returns
+      <para>The <function>aligned_alloc()</function> function returns
       a pointer to the allocated memory if successful; otherwise a
       <constant>NULL</constant> pointer is returned and
       <varname>errno</varname> is set.  The
-      <function>aligned_alloc<parameter/></function> function will fail if:
+      <function>aligned_alloc()</function> function will fail if:
         <variablelist>
           <varlistentry>
             <term><errorname>EINVAL</errorname></term>
@@ -2825,44 +2825,44 @@ MAPPED_LIBRARIES:
         </variablelist>
       </para>
 
-      <para>The <function>realloc<parameter/></function> function returns a
+      <para>The <function>realloc()</function> function returns a
       pointer, possibly identical to <parameter>ptr</parameter>, to the
       allocated memory if successful; otherwise a <constant>NULL</constant>
       pointer is returned, and <varname>errno</varname> is set to
       <errorname>ENOMEM</errorname> if the error was the result of an
-      allocation failure.  The <function>realloc<parameter/></function>
+      allocation failure.  The <function>realloc()</function>
       function always leaves the original buffer intact when an error occurs.
       </para>
 
-      <para>The <function>free<parameter/></function> function returns no
+      <para>The <function>free()</function> function returns no
       value.</para>
     </refsect2>
     <refsect2>
       <title>Non-standard API</title>
-      <para>The <function>mallocx<parameter/></function> and
-      <function>rallocx<parameter/></function> functions return a pointer to
+      <para>The <function>mallocx()</function> and
+      <function>rallocx()</function> functions return a pointer to
       the allocated memory if successful; otherwise a <constant>NULL</constant>
       pointer is returned to indicate insufficient contiguous memory was
       available to service the allocation request.  </para>
 
-      <para>The <function>xallocx<parameter/></function> function returns the
+      <para>The <function>xallocx()</function> function returns the
       real size of the resulting resized allocation pointed to by
       <parameter>ptr</parameter>, which is a value less than
       <parameter>size</parameter> if the allocation could not be adequately
       grown in place.  </para>
 
-      <para>The <function>sallocx<parameter/></function> function returns the
+      <para>The <function>sallocx()</function> function returns the
       real size of the allocation pointed to by <parameter>ptr</parameter>.
       </para>
 
-      <para>The <function>nallocx<parameter/></function> returns the real size
+      <para>The <function>nallocx()</function> returns the real size
       that would result from a successful equivalent
-      <function>mallocx<parameter/></function> function call, or zero if
+      <function>mallocx()</function> function call, or zero if
       insufficient memory is available to perform the size computation.  </para>
 
-      <para>The <function>mallctl<parameter/></function>,
-      <function>mallctlnametomib<parameter/></function>, and
-      <function>mallctlbymib<parameter/></function> functions return 0 on
+      <para>The <function>mallctl()</function>,
+      <function>mallctlnametomib()</function>, and
+      <function>mallctlbymib()</function> functions return 0 on
       success; otherwise they return an error value.  The functions will fail
       if:
         <variablelist>
@@ -2898,13 +2898,13 @@ MAPPED_LIBRARIES:
             <term><errorname>EFAULT</errorname></term>
 
             <listitem><para>An interface with side effects failed in some way
-            not directly related to <function>mallctl*<parameter/></function>
+            not directly related to <function>mallctl*()</function>
             read/write processing.</para></listitem>
           </varlistentry>
         </variablelist>
       </para>
 
-      <para>The <function>malloc_usable_size<parameter/></function> function
+      <para>The <function>malloc_usable_size()</function> function
       returns the usable size of the allocation pointed to by
       <parameter>ptr</parameter>.  </para>
     </refsect2>
@@ -2952,13 +2952,13 @@ malloc_conf = "lg_chunk:24";]]></programlisting></para>
   </refsect1>
   <refsect1 id="standards">
     <title>STANDARDS</title>
-    <para>The <function>malloc<parameter/></function>,
-    <function>calloc<parameter/></function>,
-    <function>realloc<parameter/></function>, and
-    <function>free<parameter/></function> functions conform to ISO/IEC
+    <para>The <function>malloc()</function>,
+    <function>calloc()</function>,
+    <function>realloc()</function>, and
+    <function>free()</function> functions conform to ISO/IEC
     9899:1990 (&ldquo;ISO C90&rdquo;).</para>
 
-    <para>The <function>posix_memalign<parameter/></function> function conforms
+    <para>The <function>posix_memalign()</function> function conforms
     to IEEE Std 1003.1-2001 (&ldquo;POSIX.1&rdquo;).</para>
   </refsect1>
 </refentry>
diff --git a/doc/stylesheet.xsl b/doc/stylesheet.xsl
index 4e334a86..bc8bc2a9 100644
--- a/doc/stylesheet.xsl
+++ b/doc/stylesheet.xsl
@@ -1,6 +1,9 @@
 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
   <xsl:param name="funcsynopsis.style">ansi</xsl:param>
-  <xsl:param name="function.parens" select="1"/>
+  <xsl:param name="function.parens" select="0"/>
+  <xsl:template match="function">
+    <xsl:call-template name="inline.monoseq"/>
+  </xsl:template>
   <xsl:template match="mallctl">
     "<xsl:call-template name="inline.monoseq"/>"
   </xsl:template>

From 17c4b8de5f4ef2732dfa83cbc86e2cf112f48635 Mon Sep 17 00:00:00 2001
From: Josh Gao <jmg116@gmail.com>
Date: Thu, 15 Sep 2016 14:33:28 -0700
Subject: [PATCH 0353/2608] Fix -Wundef in _MSC_VER check.

---
 include/jemalloc/jemalloc_macros.h.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in
index 129240ed..673ffd9b 100644
--- a/include/jemalloc/jemalloc_macros.h.in
+++ b/include/jemalloc/jemalloc_macros.h.in
@@ -37,7 +37,7 @@
 #  define JEMALLOC_CXX_THROW
 #endif
 
-#if _MSC_VER
+#if defined(_MSC_VER)
 #  define JEMALLOC_ATTR(s)
 #  define JEMALLOC_ALIGNED(s) __declspec(align(s))
 #  define JEMALLOC_ALLOC_SIZE(s)

From 1cb399b630db16892069cb37b6b0853ca318bb77 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 22 Sep 2016 09:13:45 -0700
Subject: [PATCH 0354/2608] Fix arena_bind().

When tsd is not in nominal state (e.g. during thread termination), we
should not increment nthreads.
---
 src/jemalloc.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 10074013..24158552 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -442,15 +442,16 @@ arena_bind(tsd_t *tsd, unsigned ind, bool internal)
 {
 	arena_t *arena;
 
+	if (!tsd_nominal(tsd))
+		return;
+
 	arena = arena_get(tsd_tsdn(tsd), ind, false);
 	arena_nthreads_inc(arena, internal);
 
-	if (tsd_nominal(tsd)) {
-		if (internal)
-			tsd_iarena_set(tsd, arena);
-		else
-			tsd_arena_set(tsd, arena);
-	}
+	if (internal)
+		tsd_iarena_set(tsd, arena);
+	else
+		tsd_arena_set(tsd, arena);
 }
 
 void

From bc49157d21e6ec14a41c7b852370d2e6d9509da2 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 22 Sep 2016 11:53:19 -0700
Subject: [PATCH 0355/2608] Fix extent_recycle() to exclude other arenas'
 extents.

When attempting to recycle an extent at a specified address, check that
the extent belongs to the correct arena.
---
 src/extent.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/extent.c b/src/extent.c
index cb67a27c..29c9d2be 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -408,7 +408,8 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		if (elm != NULL) {
 			extent = rtree_elm_read_acquired(tsdn, &extents_rtree,
 			    elm);
-			if (extent != NULL && (extent_active_get(extent) ||
+			if (extent != NULL && (extent_arena_get(extent) != arena
+			    || extent_active_get(extent) ||
 			    extent_retained_get(extent) == cache))
 				extent = NULL;
 			rtree_elm_release(tsdn, &extents_rtree, elm);

From f6d01ff4b7322eeed56c61a11e3e3397765d3f22 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 22 Sep 2016 11:57:28 -0700
Subject: [PATCH 0356/2608] Protect extents_dirty access with extents_mtx.

This fixes race conditions during purging.
---
 include/jemalloc/internal/arena.h             |  21 ++--
 include/jemalloc/internal/extent.h            |   3 +
 include/jemalloc/internal/private_symbols.txt |   1 +
 src/arena.c                                   |  29 +++--
 src/extent.c                                  | 116 ++++++++++++------
 5 files changed, 112 insertions(+), 58 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 3c931c34..cee90b50 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -176,12 +176,6 @@ struct arena_s {
 	 */
 	size_t			ndirty;
 
-	/*
-	 * Ring sentinel used to track unused dirty memory.  Dirty memory is
-	 * managed as an LRU of cached extents.
-	 */
-	extent_t		extents_dirty;
-
 	/*
 	 * Approximate time in seconds from the creation of a set of unused
 	 * dirty pages until an equivalent set of unused dirty pages is purged
@@ -240,7 +234,12 @@ struct arena_s {
 	 */
 	extent_heap_t		extents_cached[NPSIZES];
 	extent_heap_t		extents_retained[NPSIZES];
-	/* Protects extents_cached and extents_retained. */
+	/*
+	 * Ring sentinel used to track unused dirty memory.  Dirty memory is
+	 * managed as an LRU of cached extents.
+	 */
+	extent_t		extents_dirty;
+	/* Protects extents_{cached,retained,dirty}. */
 	malloc_mutex_t		extents_mtx;
 
 	/* User-configurable extent hook functions. */
@@ -287,10 +286,10 @@ extent_t	*arena_extent_cache_alloc(tsdn_t *tsdn, arena_t *arena,
     size_t alignment, bool *zero);
 void	arena_extent_cache_dalloc(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent);
-void	arena_extent_cache_maybe_insert(arena_t *arena, extent_t *extent,
-    bool cache);
-void	arena_extent_cache_maybe_remove(arena_t *arena, extent_t *extent,
-    bool cache);
+void	arena_extent_cache_maybe_insert(tsdn_t *tsdn, arena_t *arena,
+    extent_t *extent, bool cache);
+void	arena_extent_cache_maybe_remove(tsdn_t *tsdn, arena_t *arena,
+    extent_t *extent, bool cache);
 extent_t	*arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena,
     size_t usize, size_t alignment, bool *zero);
 void	arena_extent_dalloc_large(tsdn_t *tsdn, arena_t *arena,
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index dbdc8051..eeebdf0f 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -99,6 +99,9 @@ size_t	extent_size_quantize_ceil(size_t size);
 
 ph_proto(, extent_heap_, extent_heap_t, extent_t)
 
+extent_t	*extent_alloc_cache_locked(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
+    size_t alignment, bool *zero, bool slab);
 extent_t	*extent_alloc_cache(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
     size_t alignment, bool *zero, bool slab);
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index a489e14a..ae60f6c4 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -145,6 +145,7 @@ extent_addr_randomize
 extent_addr_set
 extent_alloc
 extent_alloc_cache
+extent_alloc_cache_locked
 extent_alloc_dss
 extent_alloc_mmap
 extent_alloc_wrapper
diff --git a/src/arena.c b/src/arena.c
index 7dcf12d5..da9e9859 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -101,9 +101,12 @@ arena_extent_cache_dalloc(tsdn_t *tsdn, arena_t *arena,
 }
 
 void
-arena_extent_cache_maybe_insert(arena_t *arena, extent_t *extent, bool cache)
+arena_extent_cache_maybe_insert(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+    bool cache)
 {
 
+	malloc_mutex_assert_owner(tsdn, &arena->extents_mtx);
+
 	if (cache) {
 		extent_ring_insert(&arena->extents_dirty, extent);
 		arena->ndirty += arena_extent_dirty_npages(extent);
@@ -111,9 +114,12 @@ arena_extent_cache_maybe_insert(arena_t *arena, extent_t *extent, bool cache)
 }
 
 void
-arena_extent_cache_maybe_remove(arena_t *arena, extent_t *extent, bool dirty)
+arena_extent_cache_maybe_remove(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+    bool dirty)
 {
 
+	malloc_mutex_assert_owner(tsdn, &arena->extents_mtx);
+
 	if (dirty) {
 		extent_ring_remove(extent);
 		assert(arena->ndirty >= arena_extent_dirty_npages(extent));
@@ -727,6 +733,8 @@ arena_dirty_count(tsdn_t *tsdn, arena_t *arena)
 	extent_t *extent;
 	size_t ndirty = 0;
 
+	malloc_mutex_assert_owner(tsdn, &arena->extents_mtx);
+
 	for (extent = qr_next(&arena->extents_dirty, qr_link); extent !=
 	    &arena->extents_dirty; extent = qr_next(extent, qr_link))
 		ndirty += extent_size_get(extent) >> LG_PAGE;
@@ -741,6 +749,8 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	extent_t *extent, *next;
 	size_t nstashed = 0;
 
+	malloc_mutex_lock(tsdn, &arena->extents_mtx);
+
 	/* Stash extents according to ndirty_limit. */
 	for (extent = qr_next(&arena->extents_dirty, qr_link); extent !=
 	    &arena->extents_dirty; extent = next) {
@@ -756,9 +766,9 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		next = qr_next(extent, qr_link);
 		/* Allocate. */
 		zero = false;
-		textent = arena_extent_cache_alloc_locked(tsdn, arena,
-		    r_extent_hooks, extent_base_get(extent),
-		    extent_size_get(extent), 0, CACHELINE, &zero, false);
+		textent = extent_alloc_cache_locked(tsdn, arena, r_extent_hooks,
+		    extent_base_get(extent), extent_size_get(extent), 0,
+		    CACHELINE, &zero, false);
 		assert(textent == extent);
 		assert(zero == extent_zeroed_get(extent));
 		extent_ring_remove(extent);
@@ -770,6 +780,7 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 			break;
 	}
 
+	malloc_mutex_unlock(tsdn, &arena->extents_mtx);
 	return (nstashed);
 }
 
@@ -1788,9 +1799,6 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 	arena->nactive = 0;
 	arena->ndirty = 0;
 
-	extent_init(&arena->extents_dirty, arena, NULL, 0, 0, false, false,
-	    false, false);
-
 	if (opt_purge == purge_mode_decay)
 		arena_decay_init(arena, arena_decay_time_default_get());
 
@@ -1804,12 +1812,15 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 		extent_heap_new(&arena->extents_retained[i]);
 	}
 
-	arena->extent_hooks = (extent_hooks_t *)&extent_hooks_default;
+	extent_init(&arena->extents_dirty, arena, NULL, 0, 0, false, false,
+	    false, false);
 
 	if (malloc_mutex_init(&arena->extents_mtx, "arena_extents",
 	    WITNESS_RANK_ARENA_EXTENTS))
 		return (NULL);
 
+	arena->extent_hooks = (extent_hooks_t *)&extent_hooks_default;
+
 	ql_new(&arena->extent_cache);
 	if (malloc_mutex_init(&arena->extent_cache_mtx, "arena_extent_cache",
 	    WITNESS_RANK_ARENA_EXTENT_CACHE))
diff --git a/src/extent.c b/src/extent.c
index 29c9d2be..522cbb9b 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -191,18 +191,26 @@ extent_ad_comp(const extent_t *a, const extent_t *b)
 ph_gen(, extent_heap_, extent_heap_t, extent_t, ph_link, extent_ad_comp)
 
 static void
-extent_heaps_insert(extent_heap_t extent_heaps[NPSIZES], extent_t *extent)
+extent_heaps_insert(tsdn_t *tsdn, extent_heap_t extent_heaps[NPSIZES],
+    extent_t *extent)
 {
 	size_t psz = extent_size_quantize_floor(extent_size_get(extent));
 	pszind_t pind = psz2ind(psz);
+
+	malloc_mutex_assert_owner(tsdn, &extent_arena_get(extent)->extents_mtx);
+
 	extent_heap_insert(&extent_heaps[pind], extent);
 }
 
 static void
-extent_heaps_remove(extent_heap_t extent_heaps[NPSIZES], extent_t *extent)
+extent_heaps_remove(tsdn_t *tsdn, extent_heap_t extent_heaps[NPSIZES],
+    extent_t *extent)
 {
 	size_t psz = extent_size_quantize_floor(extent_size_get(extent));
 	pszind_t pind = psz2ind(psz);
+
+	malloc_mutex_assert_owner(tsdn, &extent_arena_get(extent)->extents_mtx);
+
 	extent_heap_remove(&extent_heaps[pind], extent);
 }
 
@@ -381,9 +389,9 @@ extent_leak(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 
 static extent_t *
 extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    extent_heap_t extent_heaps[NPSIZES], bool cache, void *new_addr,
-    size_t usize, size_t pad, size_t alignment, bool *zero, bool *commit,
-    bool slab)
+    extent_heap_t extent_heaps[NPSIZES], bool locked, bool cache,
+    void *new_addr, size_t usize, size_t pad, size_t alignment, bool *zero,
+    bool *commit, bool slab)
 {
 	extent_t *extent;
 	rtree_ctx_t rtree_ctx_fallback;
@@ -398,7 +406,8 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	/* Beware size_t wrap-around. */
 	if (alloc_size < usize)
 		return (NULL);
-	malloc_mutex_lock(tsdn, &arena->extents_mtx);
+	if (!locked)
+		malloc_mutex_lock(tsdn, &arena->extents_mtx);
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 	if (new_addr != NULL) {
 		rtree_elm_t *elm;
@@ -419,11 +428,12 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		extent = extent_first_best_fit(arena, extent_heaps, alloc_size);
 	if (extent == NULL || (new_addr != NULL && extent_size_get(extent) <
 	    size)) {
-		malloc_mutex_unlock(tsdn, &arena->extents_mtx);
+		if (!locked)
+			malloc_mutex_unlock(tsdn, &arena->extents_mtx);
 		return (NULL);
 	}
-	extent_heaps_remove(extent_heaps, extent);
-	arena_extent_cache_maybe_remove(arena, extent, cache);
+	extent_heaps_remove(tsdn, extent_heaps, extent);
+	arena_extent_cache_maybe_remove(tsdn, arena, extent, cache);
 
 	leadsize = ALIGNMENT_CEILING((uintptr_t)extent_base_get(extent),
 	    PAGE_CEILING(alignment)) - (uintptr_t)extent_base_get(extent);
@@ -444,11 +454,12 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		if (extent == NULL) {
 			extent_deregister(tsdn, lead);
 			extent_leak(tsdn, arena, r_extent_hooks, cache, lead);
-			malloc_mutex_unlock(tsdn, &arena->extents_mtx);
+			if (!locked)
+				malloc_mutex_unlock(tsdn, &arena->extents_mtx);
 			return (NULL);
 		}
-		extent_heaps_insert(extent_heaps, lead);
-		arena_extent_cache_maybe_insert(arena, lead, cache);
+		extent_heaps_insert(tsdn, extent_heaps, lead);
+		arena_extent_cache_maybe_insert(tsdn, arena, lead, cache);
 	}
 
 	/* Split the trail. */
@@ -459,11 +470,12 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 			extent_deregister(tsdn, extent);
 			extent_leak(tsdn, arena, r_extent_hooks, cache,
 			    extent);
-			malloc_mutex_unlock(tsdn, &arena->extents_mtx);
+			if (!locked)
+				malloc_mutex_unlock(tsdn, &arena->extents_mtx);
 			return (NULL);
 		}
-		extent_heaps_insert(extent_heaps, trail);
-		arena_extent_cache_maybe_insert(arena, trail, cache);
+		extent_heaps_insert(tsdn, extent_heaps, trail);
+		arena_extent_cache_maybe_insert(tsdn, arena, trail, cache);
 	} else if (leadsize == 0) {
 		/*
 		 * Splitting causes usize to be set as a side effect, but no
@@ -474,7 +486,8 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 
 	if (!extent_committed_get(extent) && extent_commit_wrapper(tsdn, arena,
 	    r_extent_hooks, extent, 0, extent_size_get(extent))) {
-		malloc_mutex_unlock(tsdn, &arena->extents_mtx);
+		if (!locked)
+			malloc_mutex_unlock(tsdn, &arena->extents_mtx);
 		extent_record(tsdn, arena, r_extent_hooks, extent_heaps, cache,
 		    extent);
 		return (NULL);
@@ -488,7 +501,8 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		extent_interior_register(tsdn, rtree_ctx, extent);
 	}
 
-	malloc_mutex_unlock(tsdn, &arena->extents_mtx);
+	if (!locked)
+		malloc_mutex_unlock(tsdn, &arena->extents_mtx);
 
 	if (*zero) {
 		if (!extent_zeroed_get(extent)) {
@@ -540,27 +554,51 @@ extent_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 	return (NULL);
 }
 
-extent_t *
-extent_alloc_cache(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
-    size_t alignment, bool *zero, bool slab)
+static extent_t *
+extent_alloc_cache_impl(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, bool locked, void *new_addr, size_t usize,
+    size_t pad, size_t alignment, bool *zero, bool slab)
 {
 	extent_t *extent;
 	bool commit;
 
 	assert(usize + pad != 0);
 	assert(alignment != 0);
+	if (locked)
+		malloc_mutex_assert_owner(tsdn, &arena->extents_mtx);
 
 	commit = true;
 	extent = extent_recycle(tsdn, arena, r_extent_hooks,
-	    arena->extents_cached, true, new_addr, usize, pad, alignment, zero,
-	    &commit, slab);
+	    arena->extents_cached, locked, true, new_addr, usize, pad,
+	    alignment, zero, &commit, slab);
 	if (extent == NULL)
 		return (NULL);
 	assert(commit);
 	return (extent);
 }
 
+extent_t *
+extent_alloc_cache_locked(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
+    size_t alignment, bool *zero, bool slab)
+{
+
+	malloc_mutex_assert_owner(tsdn, &arena->extents_mtx);
+
+	return (extent_alloc_cache_impl(tsdn, arena, r_extent_hooks, true,
+	    new_addr, usize, pad, alignment, zero, slab));
+}
+
+extent_t *
+extent_alloc_cache(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
+    size_t alignment, bool *zero, bool slab)
+{
+
+	return (extent_alloc_cache_impl(tsdn, arena, r_extent_hooks, false,
+	    new_addr, usize, pad, alignment, zero, slab));
+}
+
 static void *
 extent_alloc_default_impl(tsdn_t *tsdn, arena_t *arena, void *new_addr,
     size_t size, size_t alignment, bool *zero, bool *commit)
@@ -607,8 +645,8 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
 	assert(alignment != 0);
 
 	extent = extent_recycle(tsdn, arena, r_extent_hooks,
-	    arena->extents_retained, false, new_addr, usize, pad, alignment,
-	    zero, commit, slab);
+	    arena->extents_retained, false, false, new_addr, usize, pad,
+	    alignment, zero, commit, slab);
 	if (extent != NULL && config_stats) {
 		size_t size = usize + pad;
 		arena->stats.retained -= size;
@@ -697,22 +735,24 @@ extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
 	if (!extent_can_coalesce(a, b))
 		return;
 
-	extent_heaps_remove(extent_heaps, a);
-	extent_heaps_remove(extent_heaps, b);
+	extent_heaps_remove(tsdn, extent_heaps, a);
+	extent_heaps_remove(tsdn, extent_heaps, b);
 
-	arena_extent_cache_maybe_remove(extent_arena_get(a), a, cache);
-	arena_extent_cache_maybe_remove(extent_arena_get(b), b, cache);
+	arena_extent_cache_maybe_remove(tsdn, extent_arena_get(a), a, cache);
+	arena_extent_cache_maybe_remove(tsdn, extent_arena_get(b), b, cache);
 
 	if (extent_merge_wrapper(tsdn, arena, r_extent_hooks, a, b)) {
-		extent_heaps_insert(extent_heaps, a);
-		extent_heaps_insert(extent_heaps, b);
-		arena_extent_cache_maybe_insert(extent_arena_get(a), a, cache);
-		arena_extent_cache_maybe_insert(extent_arena_get(b), b, cache);
+		extent_heaps_insert(tsdn, extent_heaps, a);
+		extent_heaps_insert(tsdn, extent_heaps, b);
+		arena_extent_cache_maybe_insert(tsdn, extent_arena_get(a), a,
+		    cache);
+		arena_extent_cache_maybe_insert(tsdn, extent_arena_get(b), b,
+		    cache);
 		return;
 	}
 
-	extent_heaps_insert(extent_heaps, a);
-	arena_extent_cache_maybe_insert(extent_arena_get(a), a, cache);
+	extent_heaps_insert(tsdn, extent_heaps, a);
+	arena_extent_cache_maybe_insert(tsdn, extent_arena_get(a), a, cache);
 }
 
 static void
@@ -737,8 +777,8 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	}
 
 	assert(extent_lookup(tsdn, extent_base_get(extent), true) == extent);
-	extent_heaps_insert(extent_heaps, extent);
-	arena_extent_cache_maybe_insert(arena, extent, cache);
+	extent_heaps_insert(tsdn, extent_heaps, extent);
+	arena_extent_cache_maybe_insert(tsdn, arena, extent, cache);
 
 	/* Try to coalesce forward. */
 	next = rtree_read(tsdn, &extents_rtree, rtree_ctx,
@@ -1021,7 +1061,7 @@ extent_merge_default_impl(tsdn_t *tsdn, void *addr_a, void *addr_b)
 		return (true);
 	if (have_dss && extent_in_dss(tsdn, addr_a) != extent_in_dss(tsdn,
 	    addr_b))
-			return (true);
+		return (true);
 
 	return (false);
 }

From fd96974040b54538a43951f630e9fea461408384 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 23 Sep 2016 12:11:01 -0700
Subject: [PATCH 0357/2608] Add new_addr validation in extent_recycle().

---
 src/extent.c | 34 ++++++++++++++++++++++++++++------
 1 file changed, 28 insertions(+), 6 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index 522cbb9b..3ab48ca0 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -400,6 +400,25 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 
 	assert(new_addr == NULL || !slab);
 	assert(pad == 0 || !slab);
+	if (config_debug && new_addr != NULL) {
+		extent_t *prev;
+
+		/*
+		 * Non-NULL new_addr has two use cases:
+		 *
+		 *   1) Recycle a known-extant extent, e.g. during purging.
+		 *   2) Perform in-place expanding reallocation.
+		 *
+		 * Regardless of use case, new_addr must either refer to a
+		 * non-existing extent, or to the base of an extant extent,
+		 * since only active slabs support interior lookups (which of
+		 * course cannot be recycled).
+		 */
+		assert(PAGE_ADDR2BASE(new_addr) == new_addr);
+		prev = extent_lookup(tsdn, (void *)((uintptr_t)new_addr - PAGE),
+		    false);
+		assert(prev == NULL || extent_past_get(prev) == new_addr);
+	}
 
 	size = usize + pad;
 	alloc_size = s2u(size + PAGE_CEILING(alignment) - PAGE);
@@ -417,17 +436,20 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		if (elm != NULL) {
 			extent = rtree_elm_read_acquired(tsdn, &extents_rtree,
 			    elm);
-			if (extent != NULL && (extent_arena_get(extent) != arena
-			    || extent_active_get(extent) ||
-			    extent_retained_get(extent) == cache))
-				extent = NULL;
+			if (extent != NULL) {
+				assert(extent_base_get(extent) == new_addr);
+				if (extent_arena_get(extent) != arena ||
+				    extent_size_get(extent) < size ||
+				    extent_active_get(extent) ||
+				    extent_retained_get(extent) == cache)
+					extent = NULL;
+			}
 			rtree_elm_release(tsdn, &extents_rtree, elm);
 		} else
 			extent = NULL;
 	} else
 		extent = extent_first_best_fit(arena, extent_heaps, alloc_size);
-	if (extent == NULL || (new_addr != NULL && extent_size_get(extent) <
-	    size)) {
+	if (extent == NULL) {
 		if (!locked)
 			malloc_mutex_unlock(tsdn, &arena->extents_mtx);
 		return (NULL);

From e3187ec6b6a349b3add5c27c470ff7f7f040c1d5 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 23 Sep 2016 12:16:55 -0700
Subject: [PATCH 0358/2608] Fix large_dalloc_impl() to always lock large_mtx.

---
 src/large.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/large.c b/src/large.c
index 325b5f10..34b3bdb5 100644
--- a/src/large.c
+++ b/src/large.c
@@ -281,18 +281,21 @@ large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
 	return (ret);
 }
 
+/*
+ * junked_locked indicates whether the extent's data have been junk-filled, and
+ * whether the arena's lock is currently held.  The arena's large_mtx is
+ * independent of these considerations.
+ */
 static void
 large_dalloc_impl(tsdn_t *tsdn, extent_t *extent, bool junked_locked)
 {
 	arena_t *arena;
 
 	arena = extent_arena_get(extent);
-	if (!junked_locked)
-		malloc_mutex_lock(tsdn, &arena->large_mtx);
+	malloc_mutex_lock(tsdn, &arena->large_mtx);
 	ql_remove(&arena->large, extent, ql_link);
+	malloc_mutex_unlock(tsdn, &arena->large_mtx);
 	if (!junked_locked) {
-		malloc_mutex_unlock(tsdn, &arena->large_mtx);
-
 		large_dalloc_maybe_junk(tsdn, extent_addr_get(extent),
 		    extent_usize_get(extent));
 	}

From 73868b60f22d40404572d124aa7e08de1d70724f Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 23 Sep 2016 12:17:42 -0700
Subject: [PATCH 0359/2608] Fix extent_{before,last,past}() to return
 page-aligned results.

---
 include/jemalloc/internal/extent.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index eeebdf0f..528759b0 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -227,22 +227,23 @@ JEMALLOC_INLINE void *
 extent_before_get(const extent_t *extent)
 {
 
-	return ((void *)((uintptr_t)extent->e_addr - PAGE));
+	return ((void *)((uintptr_t)extent_base_get(extent) - PAGE));
 }
 
 JEMALLOC_INLINE void *
 extent_last_get(const extent_t *extent)
 {
 
-	return ((void *)((uintptr_t)extent->e_addr + extent_size_get(extent) -
-	    PAGE));
+	return ((void *)((uintptr_t)extent_base_get(extent) +
+	    extent_size_get(extent) - PAGE));
 }
 
 JEMALLOC_INLINE void *
 extent_past_get(const extent_t *extent)
 {
 
-	return ((void *)((uintptr_t)extent->e_addr + extent_size_get(extent)));
+	return ((void *)((uintptr_t)extent_base_get(extent) +
+	    extent_size_get(extent)));
 }
 
 JEMALLOC_INLINE bool

From 0222fb41d1fc8e882f7872999ddaa09193d58912 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 23 Sep 2016 12:18:36 -0700
Subject: [PATCH 0360/2608] Add various mutex ownership assertions.

---
 include/jemalloc/internal/arena.h |  2 ++
 src/arena.c                       |  2 ++
 src/extent.c                      | 16 ++++++++++------
 3 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index cee90b50..1758dd02 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -521,6 +521,8 @@ JEMALLOC_ALWAYS_INLINE void
 arena_decay_tick(tsdn_t *tsdn, arena_t *arena)
 {
 
+	malloc_mutex_assert_not_owner(tsdn, &arena->lock);
+
 	arena_decay_ticks(tsdn, arena, 1);
 }
 
diff --git a/src/arena.c b/src/arena.c
index da9e9859..42cd3b09 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -383,6 +383,8 @@ arena_extent_dalloc_large(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 
 	if (!locked)
 		malloc_mutex_lock(tsdn, &arena->lock);
+	else
+		malloc_mutex_assert_owner(tsdn, &arena->lock);
 	if (config_stats) {
 		arena_large_dalloc_stats_update(arena,
 		    extent_usize_get(extent));
diff --git a/src/extent.c b/src/extent.c
index 3ab48ca0..f88c4240 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -356,11 +356,13 @@ extent_deregister(tsdn_t *tsdn, extent_t *extent)
  * fits.
  */
 static extent_t *
-extent_first_best_fit(arena_t *arena, extent_heap_t extent_heaps[NPSIZES],
-    size_t size)
+extent_first_best_fit(tsdn_t *tsdn, arena_t *arena,
+    extent_heap_t extent_heaps[NPSIZES], size_t size)
 {
 	pszind_t pind, i;
 
+	malloc_mutex_assert_owner(tsdn, &arena->extents_mtx);
+
 	pind = psz2ind(extent_size_quantize_ceil(size));
 	for (i = pind; i < NPSIZES; i++) {
 		extent_t *extent = extent_heap_first(&extent_heaps[i]);
@@ -398,6 +400,8 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	size_t size, alloc_size, leadsize, trailsize;
 
+	if (locked)
+		malloc_mutex_assert_owner(tsdn, &arena->extents_mtx);
 	assert(new_addr == NULL || !slab);
 	assert(pad == 0 || !slab);
 	if (config_debug && new_addr != NULL) {
@@ -447,8 +451,10 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 			rtree_elm_release(tsdn, &extents_rtree, elm);
 		} else
 			extent = NULL;
-	} else
-		extent = extent_first_best_fit(arena, extent_heaps, alloc_size);
+	} else {
+		extent = extent_first_best_fit(tsdn, arena, extent_heaps,
+		    alloc_size);
+	}
 	if (extent == NULL) {
 		if (!locked)
 			malloc_mutex_unlock(tsdn, &arena->extents_mtx);
@@ -586,8 +592,6 @@ extent_alloc_cache_impl(tsdn_t *tsdn, arena_t *arena,
 
 	assert(usize + pad != 0);
 	assert(alignment != 0);
-	if (locked)
-		malloc_mutex_assert_owner(tsdn, &arena->extents_mtx);
 
 	commit = true;
 	extent = extent_recycle(tsdn, arena, r_extent_hooks,

From 61f467e16a2b925a0e77241b87b5d1f1fbcb96d0 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 23 Sep 2016 12:18:57 -0700
Subject: [PATCH 0361/2608] Avoid self assignment in tsd_set().

---
 include/jemalloc/internal/tsd.h | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 2355f9c6..5df5f673 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -226,7 +226,8 @@ a_name##tsd_set(a_type *val)						\
 {									\
 									\
 	assert(a_name##tsd_booted);					\
-	a_name##tsd_tls = (*val);					\
+	if (likely(&a_name##tsd_tls != val))				\
+		a_name##tsd_tls = (*val);				\
 	if (a_cleanup != malloc_tsd_no_cleanup)				\
 		a_name##tsd_initialized = true;				\
 }
@@ -277,7 +278,8 @@ a_name##tsd_set(a_type *val)						\
 {									\
 									\
 	assert(a_name##tsd_booted);					\
-	a_name##tsd_tls = (*val);					\
+	if (likely(&a_name##tsd_tls != val))				\
+		a_name##tsd_tls = (*val);				\
 	if (a_cleanup != malloc_tsd_no_cleanup) {			\
 		if (pthread_setspecific(a_name##tsd_tsd,		\
 		    (void *)(&a_name##tsd_tls))) {			\
@@ -409,7 +411,8 @@ a_name##tsd_set(a_type *val)						\
 									\
 	assert(a_name##tsd_booted);					\
 	wrapper = a_name##tsd_wrapper_get();				\
-	wrapper->val = *(val);						\
+	if (likely(&wrapper->val != val))				\
+		wrapper->val = *(val);					\
 	if (a_cleanup != malloc_tsd_no_cleanup)				\
 		wrapper->initialized = true;				\
 }
@@ -537,7 +540,8 @@ a_name##tsd_set(a_type *val)						\
 									\
 	assert(a_name##tsd_booted);					\
 	wrapper = a_name##tsd_wrapper_get();				\
-	wrapper->val = *(val);						\
+	if (likely(&wrapper->val != val))				\
+		wrapper->val = *(val);					\
 	if (a_cleanup != malloc_tsd_no_cleanup)				\
 		wrapper->initialized = true;				\
 }

From 57ed894f8ac154d30faf9449a76d2792cdad3850 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 22 Sep 2016 09:13:45 -0700
Subject: [PATCH 0362/2608] Fix arena_bind().

When tsd is not in nominal state (e.g. during thread termination), we
should not increment nthreads.
---
 src/jemalloc.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 5d1f4937..c1ecc73c 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -455,15 +455,16 @@ arena_bind(tsd_t *tsd, unsigned ind, bool internal)
 {
 	arena_t *arena;
 
+	if (!tsd_nominal(tsd))
+		return;
+
 	arena = arena_get(tsd_tsdn(tsd), ind, false);
 	arena_nthreads_inc(arena, internal);
 
-	if (tsd_nominal(tsd)) {
-		if (internal)
-			tsd_iarena_set(tsd, arena);
-		else
-			tsd_arena_set(tsd, arena);
-	}
+	if (internal)
+		tsd_iarena_set(tsd, arena);
+	else
+		tsd_arena_set(tsd, arena);
 }
 
 void

From a6a8e40f7d24ee9f4f6abf1c03e1e0d7e3763084 Mon Sep 17 00:00:00 2001
From: Elliot Ronaghan <ronawho@gmail.com>
Date: Fri, 10 Jun 2016 16:28:35 -0700
Subject: [PATCH 0363/2608] Fix a valgrind regression in chunk_recycle()

Fix a latent valgrind bug exposed by d412624b25eed2b5c52b7d94a71070d3aab03cb4
(Move retaining out of default chunk hooks).
---
 src/chunk.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/chunk.c b/src/chunk.c
index f292c980..dff537f5 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -316,10 +316,11 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			size_t i;
 			size_t *p = (size_t *)(uintptr_t)ret;
 
-			JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ret, size);
 			for (i = 0; i < size / sizeof(size_t); i++)
 				assert(p[i] == 0);
 		}
+		if (config_valgrind)
+			JEMALLOC_VALGRIND_MAKE_MEM_DEFINED(ret, size);
 	}
 	return (ret);
 }

From c096ccfe111ca3c849a0104517a2a1f123985307 Mon Sep 17 00:00:00 2001
From: Elliot Ronaghan <ronawho@gmail.com>
Date: Tue, 12 Jul 2016 15:52:18 -0700
Subject: [PATCH 0364/2608] Fix a bug in __builtin_unreachable configure check

In 1167e9e, I accidentally tested je_cv_gcc_builtin_ffsl instead of
je_cv_gcc_builtin_unreachable (copy-paste error), which meant that
JEMALLOC_INTERNAL_UNREACHABLE was always getting defined as abort even if
__builtin_unreachable support was detected.
---
 configure.ac | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index e1639d51..1e85101b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1095,7 +1095,7 @@ void foo (void) {
 		foo();
 	}
 ], [je_cv_gcc_builtin_unreachable])
-if test "x${je_cv_gcc_builtin_ffsl}" = "xyes" ; then
+if test "x${je_cv_gcc_builtin_unreachable}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_INTERNAL_UNREACHABLE], [__builtin_unreachable])
 else
   AC_DEFINE([JEMALLOC_INTERNAL_UNREACHABLE], [abort])

From d1207f0d371eade218f7572743b5eddedc7fff94 Mon Sep 17 00:00:00 2001
From: Elliot Ronaghan <ronawho@gmail.com>
Date: Fri, 17 Jun 2016 13:28:39 -0700
Subject: [PATCH 0365/2608] Check for __builtin_unreachable at configure time

Add a configure check for __builtin_unreachable instead of basing its
availability on the __GNUC__ version. On OS X using gcc (a real gcc, not the
bundled version that's just a gcc front-end) leads to a linker assertion:

    https://github.com/jemalloc/jemalloc/issues/266

It turns out that this is caused by a gcc bug resulting from the use of
__builtin_unreachable():

    https://gcc.gnu.org/bugzilla/show_bug.cgi?id=57438

To work around this bug, check that __builtin_unreachable() actually works at
configure time, and if it doesn't use abort() instead. The check is based on
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=57438#c21.

With this `make check` passes with a homebrew installed gcc-5 and gcc-6.
---
 configure.ac                                  | 17 ++++++++++++++
 .../internal/jemalloc_internal_defs.h.in      |  6 +++++
 include/jemalloc/internal/util.h              | 22 +++++--------------
 3 files changed, 29 insertions(+), 16 deletions(-)

diff --git a/configure.ac b/configure.ac
index 7f19715d..6fe366c6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1050,6 +1050,23 @@ if test "x$enable_cache_oblivious" = "x1" ; then
 fi
 AC_SUBST([enable_cache_oblivious])
 
+
+
+JE_COMPILABLE([a program using __builtin_unreachable], [
+void foo (void) {
+  __builtin_unreachable();
+}
+], [
+	{
+		foo();
+	}
+], [je_cv_gcc_builtin_unreachable])
+if test "x${je_cv_gcc_builtin_ffsl}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_INTERNAL_UNREACHABLE], [__builtin_unreachable])
+else
+  AC_DEFINE([JEMALLOC_INTERNAL_UNREACHABLE], [abort])
+fi
+
 dnl ============================================================================
 dnl Check for  __builtin_ffsl(), then ffsl(3), and fail if neither are found.
 dnl One of those two functions should (theoretically) exist on all platforms
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 7de0cf7c..22396b75 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -188,6 +188,12 @@
 /* TLS is used to map arenas and magazine caches to threads. */
 #undef JEMALLOC_TLS
 
+/*
+ * Used to mark unreachable code to quiet "end of non-void" compiler warnings.
+ * Don't use this directly; instead use unreachable() from util.h
+ */
+#undef JEMALLOC_INTERNAL_UNREACHABLE
+
 /*
  * ffs*() functions to use for bitmapping.  Don't use these directly; instead,
  * use ffs_*() from util.h.
diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index a0c2203d..aee00d6d 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -61,30 +61,20 @@
 #	define JEMALLOC_CC_SILENCE_INIT(v)
 #endif
 
-#define	JEMALLOC_GNUC_PREREQ(major, minor)				\
-    (!defined(__clang__) &&						\
-    (__GNUC__ > (major) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor))))
-#ifndef __has_builtin
-#  define __has_builtin(builtin) (0)
-#endif
-#define	JEMALLOC_CLANG_HAS_BUILTIN(builtin)				\
-    (defined(__clang__) && __has_builtin(builtin))
-
 #ifdef __GNUC__
 #	define likely(x)   __builtin_expect(!!(x), 1)
 #	define unlikely(x) __builtin_expect(!!(x), 0)
-#  if JEMALLOC_GNUC_PREREQ(4, 6) ||					\
-      JEMALLOC_CLANG_HAS_BUILTIN(__builtin_unreachable)
-#	define unreachable() __builtin_unreachable()
-#  else
-#	define unreachable() abort()
-#  endif
 #else
 #	define likely(x)   !!(x)
 #	define unlikely(x) !!(x)
-#	define unreachable() abort()
 #endif
 
+#if !defined(JEMALLOC_INTERNAL_UNREACHABLE)
+#  error JEMALLOC_INTERNAL_UNREACHABLE should have been defined by configure
+#endif
+
+#define unreachable() JEMALLOC_INTERNAL_UNREACHABLE()
+
 #include "jemalloc/internal/assert.h"
 
 /* Use to assert a particular configuration, e.g., cassert(config_debug). */

From 38a96f07ac3b351aeb9f3d685eaf0d4f9f01195a Mon Sep 17 00:00:00 2001
From: Elliot Ronaghan <ronawho@gmail.com>
Date: Tue, 12 Jul 2016 15:52:18 -0700
Subject: [PATCH 0366/2608] Fix a bug in __builtin_unreachable configure check

In 1167e9e, I accidentally tested je_cv_gcc_builtin_ffsl instead of
je_cv_gcc_builtin_unreachable (copy-paste error), which meant that
JEMALLOC_INTERNAL_UNREACHABLE was always getting defined as abort even if
__builtin_unreachable support was detected.
---
 configure.ac | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 6fe366c6..a58eb5e9 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1061,7 +1061,7 @@ void foo (void) {
 		foo();
 	}
 ], [je_cv_gcc_builtin_unreachable])
-if test "x${je_cv_gcc_builtin_ffsl}" = "xyes" ; then
+if test "x${je_cv_gcc_builtin_unreachable}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_INTERNAL_UNREACHABLE], [__builtin_unreachable])
 else
   AC_DEFINE([JEMALLOC_INTERNAL_UNREACHABLE], [abort])

From 5ff18391331a0d56b01ddce3e37cd1c7a2b439a2 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 26 Sep 2016 11:00:32 -0700
Subject: [PATCH 0367/2608] Formatting fixes.

---
 src/zone.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/src/zone.c b/src/zone.c
index 92381614..52d07f30 100644
--- a/src/zone.c
+++ b/src/zone.c
@@ -168,7 +168,8 @@ zone_force_unlock(malloc_zone_t *zone)
 		jemalloc_postfork_parent();
 }
 
-static malloc_zone_t *get_default_zone()
+static malloc_zone_t *
+get_default_zone(void)
 {
 	malloc_zone_t **zones = NULL;
 	unsigned int num_zones = 0;
@@ -179,20 +180,22 @@ static malloc_zone_t *get_default_zone()
 	 * if one is present (apparently enabled when malloc stack logging is
 	 * enabled), or the first registered zone otherwise. In practice this
 	 * means unless malloc stack logging is enabled, the first registered
-	 * zone is the default.
-	 * So get the list of zones to get the first one, instead of relying on
-	 * malloc_default_zone.
+	 * zone is the default.  So get the list of zones to get the first one,
+	 * instead of relying on malloc_default_zone.
 	 */
-        if (KERN_SUCCESS != malloc_get_all_zones(0, NULL, (vm_address_t**) &zones,
-	                                         &num_zones)) {
-		/* Reset the value in case the failure happened after it was set. */
+        if (KERN_SUCCESS != malloc_get_all_zones(0, NULL,
+	    (vm_address_t**)&zones, &num_zones)) {
+		/*
+		 * Reset the value in case the failure happened after it was
+		 * set.
+		 */
 		num_zones = 0;
 	}
 
 	if (num_zones)
-		return zones[0];
+		return (zones[0]);
 
-	return malloc_default_zone();
+	return (malloc_default_zone());
 }
 
 JEMALLOC_ATTR(constructor)

From 11b5da7533f703f6274eae98d767dd1381fced15 Mon Sep 17 00:00:00 2001
From: Mike Hommey <mh@glandium.org>
Date: Fri, 8 Jul 2016 13:35:35 +0900
Subject: [PATCH 0368/2608] Change how the default zone is found

On OSX 10.12, malloc_default_zone returns a special zone that is not
present in the list of registered zones. That zone uses a "lite zone"
if one is present (apparently enabled when malloc stack logging is
enabled), or the first registered zone otherwise. In practice this
means unless malloc stack logging is enabled, the first registered
zone is the default.

So get the list of zones to get the first one, instead of relying on
malloc_default_zone.
---
 src/zone.c | 33 +++++++++++++++++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/src/zone.c b/src/zone.c
index 2c17123a..bf5c9301 100644
--- a/src/zone.c
+++ b/src/zone.c
@@ -168,6 +168,33 @@ zone_force_unlock(malloc_zone_t *zone)
 		jemalloc_postfork_parent();
 }
 
+static malloc_zone_t *get_default_zone()
+{
+	malloc_zone_t **zones = NULL;
+	unsigned int num_zones = 0;
+
+	/*
+	 * On OSX 10.12, malloc_default_zone returns a special zone that is not
+	 * present in the list of registered zones. That zone uses a "lite zone"
+	 * if one is present (apparently enabled when malloc stack logging is
+	 * enabled), or the first registered zone otherwise. In practice this
+	 * means unless malloc stack logging is enabled, the first registered
+	 * zone is the default.
+	 * So get the list of zones to get the first one, instead of relying on
+	 * malloc_default_zone.
+	 */
+        if (KERN_SUCCESS != malloc_get_all_zones(0, NULL, (vm_address_t**) &zones,
+	                                         &num_zones)) {
+		/* Reset the value in case the failure happened after it was set. */
+		num_zones = 0;
+	}
+
+	if (num_zones)
+		return zones[0];
+
+	return malloc_default_zone();
+}
+
 JEMALLOC_ATTR(constructor)
 void
 register_zone(void)
@@ -177,7 +204,7 @@ register_zone(void)
 	 * If something else replaced the system default zone allocator, don't
 	 * register jemalloc's.
 	 */
-	malloc_zone_t *default_zone = malloc_default_zone();
+	malloc_zone_t *default_zone = get_default_zone();
 	malloc_zone_t *purgeable_zone = NULL;
 	if (!default_zone->zone_name ||
 	    strcmp(default_zone->zone_name, "DefaultMallocZone") != 0) {
@@ -272,5 +299,7 @@ register_zone(void)
 			malloc_zone_unregister(purgeable_zone);
 			malloc_zone_register(purgeable_zone);
 		}
-	} while (malloc_default_zone() != &zone);
+
+		default_zone = get_default_zone();
+	} while (default_zone != &zone);
 }

From 57cddffca6a9481973fad4e09577f8c64b7950d6 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 26 Sep 2016 11:00:32 -0700
Subject: [PATCH 0369/2608] Formatting fixes.

---
 src/zone.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/src/zone.c b/src/zone.c
index bf5c9301..89a3062c 100644
--- a/src/zone.c
+++ b/src/zone.c
@@ -168,7 +168,8 @@ zone_force_unlock(malloc_zone_t *zone)
 		jemalloc_postfork_parent();
 }
 
-static malloc_zone_t *get_default_zone()
+static malloc_zone_t *
+get_default_zone(void)
 {
 	malloc_zone_t **zones = NULL;
 	unsigned int num_zones = 0;
@@ -179,20 +180,22 @@ static malloc_zone_t *get_default_zone()
 	 * if one is present (apparently enabled when malloc stack logging is
 	 * enabled), or the first registered zone otherwise. In practice this
 	 * means unless malloc stack logging is enabled, the first registered
-	 * zone is the default.
-	 * So get the list of zones to get the first one, instead of relying on
-	 * malloc_default_zone.
+	 * zone is the default.  So get the list of zones to get the first one,
+	 * instead of relying on malloc_default_zone.
 	 */
-        if (KERN_SUCCESS != malloc_get_all_zones(0, NULL, (vm_address_t**) &zones,
-	                                         &num_zones)) {
-		/* Reset the value in case the failure happened after it was set. */
+        if (KERN_SUCCESS != malloc_get_all_zones(0, NULL,
+	    (vm_address_t**)&zones, &num_zones)) {
+		/*
+		 * Reset the value in case the failure happened after it was
+		 * set.
+		 */
 		num_zones = 0;
 	}
 
 	if (num_zones)
-		return zones[0];
+		return (zones[0]);
 
-	return malloc_default_zone();
+	return (malloc_default_zone());
 }
 
 JEMALLOC_ATTR(constructor)

From 50a865e15adbc2c28dc92ac4e9c63b63835ac77b Mon Sep 17 00:00:00 2001
From: Elliot Ronaghan <ronawho@gmail.com>
Date: Wed, 8 Jun 2016 14:20:32 -0700
Subject: [PATCH 0370/2608] Work around a weird pgi bug in test/unit/math.c

pgi fails to compile math.c, reporting that `-INFINITY` in `pt_norm_expected[]`
is a "Non-constant" expression. A simplified version of this failure is:

```c
#include <math.h>

static double inf1, inf2 = INFINITY;  // no complaints
static double inf3 = INFINITY;        // suddenly INFINITY is "Non-constant"

int main() { }
```

```sh
PGC-S-0074-Non-constant expression in initializer (t.c: 4)
```

pgi errors on the declaration of inf3, and will compile fine if that line is
removed. I've reported this bug to pgi, but in the meantime I just switched to
using (DBL_MAX + DBL_MAX) to work around this bug.
---
 test/unit/math.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/test/unit/math.c b/test/unit/math.c
index ebec77a6..adb72bed 100644
--- a/test/unit/math.c
+++ b/test/unit/math.c
@@ -5,6 +5,10 @@
 
 #include <float.h>
 
+#ifdef __PGI
+#undef INFINITY
+#endif
+
 #ifndef INFINITY
 #define	INFINITY (DBL_MAX + DBL_MAX)
 #endif

From 5acef864f22384064b5bfb4ad3b50114b9ef214b Mon Sep 17 00:00:00 2001
From: Elliot Ronaghan <ronawho@gmail.com>
Date: Wed, 8 Jun 2016 14:48:55 -0700
Subject: [PATCH 0371/2608] Don't use compact red-black trees with the pgi
 compiler

Some bug (either in the red-black tree code, or in the pgi compiler) seems to
cause red-black trees to become unbalanced. This issue seems to go away if we
don't use compact red-black trees. Since red-black trees don't seem to be used
much anymore, I opted for what seems to be an easy fix here instead of digging
in and trying to find the root cause of the bug.

Some context in case it's helpful:

I experienced a ton of segfaults while using pgi as Chapel's target compiler
with jemalloc 4.0.4. The little bit of debugging I did pointed me somewhere
deep in red-black tree manipulation, but I didn't get a chance to investigate
further. It looks like 4.2.0 replaced most uses of red-black trees with
pairing-heaps, which seems to avoid whatever bug I was hitting.

However, `make check_unit` was still failing on the rb test, so I figured the
core issue was just being masked. Here's the `make check_unit` failure:

```sh
=== test/unit/rb ===
test_rb_empty: pass
tree_recurse:test/unit/rb.c:90: Failed assertion: (((_Bool) (((uintptr_t) (left_node)->link.rbn_right_red) & ((size_t)1)))) == (false) --> true != false: Node should be black
test_rb_random:test/unit/rb.c:274: Failed assertion: (imbalances) == (0) --> 1 != 0: Tree is unbalanced
tree_recurse:test/unit/rb.c:90: Failed assertion: (((_Bool) (((uintptr_t) (left_node)->link.rbn_right_red) & ((size_t)1)))) == (false) --> true != false: Node should be black
test_rb_random:test/unit/rb.c:274: Failed assertion: (imbalances) == (0) --> 1 != 0: Tree is unbalanced
node_remove:test/unit/rb.c:190: Failed assertion: (imbalances) == (0) --> 2 != 0: Tree is unbalanced
<jemalloc>: test/unit/rb.c:43: Failed assertion: "pathp[-1].cmp < 0"
test/test.sh: line 22: 12926 Aborted
Test harness error
```

While starting to debug I saw the RB_COMPACT option and decided to check if
turning that off resolved the bug. It seems to have fixed it (`make check_unit`
passes and the segfaults under Chapel are gone) so it seems like on okay
work-around. I'd imagine this has performance implications for red-black trees
under pgi, but if they're not going to be used much anymore it's probably not a
big deal.
---
 include/jemalloc/internal/jemalloc_internal.h.in | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 8f82edd4..1e45bc92 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -162,7 +162,9 @@ static const bool config_cache_oblivious =
 #endif
 
 #include "jemalloc/internal/ph.h"
+#ifndef __PGI
 #define	RB_COMPACT
+#endif
 #include "jemalloc/internal/rb.h"
 #include "jemalloc/internal/qr.h"
 #include "jemalloc/internal/ql.h"

From 3573fb93ceb309c51484753d5f1a95df09f97185 Mon Sep 17 00:00:00 2001
From: Elliot Ronaghan <ronawho@gmail.com>
Date: Tue, 14 Jun 2016 13:18:08 -0700
Subject: [PATCH 0372/2608] Add -dynamic for integration and stress tests with
 Cray compiler wrappers

Cray systems come with compiler wrappers to simplify building parallel
applications. CC is the C++ wrapper, and cc is the C wrapper.

The wrappers call the base {Cray, Intel, PGI, or GNU} compiler with vendor
specific flags. The "Programming Environment" (prgenv) that's currently loaded
determines the base compiler. e.g. compiling with gnu looks something like:

    module load PrgEnv-gnu
    cc hello.c

On most systems the wrappers defaults to `-static` mode, which causes them to
only look for static libraries, and not for any dynamic ones (even if the
dynamic version was explicitly listed.)

The integration and stress tests expect to be using the .so, so we have to run
the with -dynamic so that wrapper will find/use the .so.
---
 Makefile.in  |  5 +++--
 configure.ac | 18 ++++++++++++++++++
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index 652f01f2..54b56c2e 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -57,6 +57,7 @@ SOREV = @SOREV@
 PIC_CFLAGS = @PIC_CFLAGS@
 CTARGET = @CTARGET@
 LDTARGET = @LDTARGET@
+TEST_LD_MODE = @TEST_LD_MODE@
 MKLIB = @MKLIB@
 AR = @AR@
 ARFLAGS = @ARFLAGS@
@@ -299,11 +300,11 @@ $(objroot)test/unit/%$(EXE): $(objroot)test/unit/%.$(O) $(TESTS_UNIT_LINK_OBJS)
 
 $(objroot)test/integration/%$(EXE): $(objroot)test/integration/%.$(O) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
-	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(filter -lpthread,$(LIBS))) -lm $(EXTRA_LDFLAGS)
+	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(filter -lpthread,$(LIBS))) -lm $(EXTRA_LDFLAGS)
 
 $(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_STRESS_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
-	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS)
+	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS)
 
 build_lib_shared: $(DSOS)
 build_lib_static: $(STATIC_LIBS)
diff --git a/configure.ac b/configure.ac
index a58eb5e9..44af36ac 100644
--- a/configure.ac
+++ b/configure.ac
@@ -131,6 +131,18 @@ if test "x$GCC" != "xyes" ; then
                                [je_cv_msvc=no])])
 fi
 
+dnl check if a cray prgenv wrapper compiler is being used
+je_cv_cray_prgenv_wrapper=""
+if test "x${PE_ENV}" != "x" ; then
+  case "${CC}" in
+    CC|cc)
+	je_cv_cray_prgenv_wrapper="yes"
+	;;
+    *)
+       ;;
+  esac
+fi
+
 if test "x$CFLAGS" = "x" ; then
   no_CFLAGS="yes"
   if test "x$GCC" = "xyes" ; then
@@ -269,11 +281,16 @@ SOREV="${so}.${rev}"
 PIC_CFLAGS='-fPIC -DPIC'
 CTARGET='-o $@'
 LDTARGET='-o $@'
+TEST_LD_MODE=
 EXTRA_LDFLAGS=
 ARFLAGS='crus'
 AROUT=' $@'
 CC_MM=1
 
+if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then
+  TEST_LD_MODE='-dynamic'
+fi
+
 AN_MAKEVAR([AR], [AC_PROG_AR])
 AN_PROGRAM([ar], [AC_PROG_AR])
 AC_DEFUN([AC_PROG_AR], [AC_CHECK_TOOL(AR, ar, :)])
@@ -432,6 +449,7 @@ AC_SUBST([SOREV])
 AC_SUBST([PIC_CFLAGS])
 AC_SUBST([CTARGET])
 AC_SUBST([LDTARGET])
+AC_SUBST([TEST_LD_MODE])
 AC_SUBST([MKLIB])
 AC_SUBST([ARFLAGS])
 AC_SUBST([AROUT])

From b770d2da1d94962860ea396b6e6dc83153ebdb55 Mon Sep 17 00:00:00 2001
From: Elliot Ronaghan <ronawho@gmail.com>
Date: Tue, 14 Jun 2016 14:20:28 -0700
Subject: [PATCH 0373/2608] Fix librt detection when using a Cray compiler
 wrapper

The Cray compiler wrappers will often add `-lrt` to the base compiler with
`-static` linking (the default at most sites.) However, `-lrt` isn't
automatically added with `-dynamic`. This means that if jemalloc was built with
`-static`, but then used in a program with `-dynamic` jemalloc won't have
detected that librt is a dependency.

The integration and stress tests use -dynamic, which is causing undefined
references to clock_gettime().

This just adds an extra check for librt (ignoring the autoconf cache) with
`-dynamic` thrown. It also stops filtering librt from the integration tests.

With this `make check` passes for:
 - PrgEnv-gnu
 - PrgEnv-intel
 - PrgEnv-pgi

PrgEnv-cray still needs more work (will be in a separate patch.)
---
 Makefile.in  |  2 +-
 configure.ac | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/Makefile.in b/Makefile.in
index 54b56c2e..964ad870 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -300,7 +300,7 @@ $(objroot)test/unit/%$(EXE): $(objroot)test/unit/%.$(O) $(TESTS_UNIT_LINK_OBJS)
 
 $(objroot)test/integration/%$(EXE): $(objroot)test/integration/%.$(O) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
-	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(filter -lpthread,$(LIBS))) -lm $(EXTRA_LDFLAGS)
+	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(filter -lrt -lpthread,$(LIBS))) -lm $(EXTRA_LDFLAGS)
 
 $(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_STRESS_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
diff --git a/configure.ac b/configure.ac
index 44af36ac..2c6009a1 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1279,6 +1279,20 @@ CPPFLAGS="$CPPFLAGS -D_REENTRANT"
 dnl Check whether clock_gettime(2) is in libc or librt.
 AC_SEARCH_LIBS([clock_gettime], [rt])
 
+dnl Cray wrapper compiler often adds `-lrt` when using `-static`. Check with
+dnl `-dynamic` as well in case a user tries to dynamically link in jemalloc
+if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then
+  if test "$ac_cv_search_clock_gettime" != "-lrt"; then
+    SAVED_CFLAGS="${CFLAGS}"
+
+    unset ac_cv_search_clock_gettime
+    JE_CFLAGS_APPEND([-dynamic])
+    AC_SEARCH_LIBS([clock_gettime], [rt])
+
+    CFLAGS="${SAVED_CFLAGS}"
+  fi
+fi
+
 dnl Check if the GNU-specific secure_getenv function exists.
 AC_CHECK_FUNC([secure_getenv],
               [have_secure_getenv="1"],

From 8701bc70791e3717d155fb39da9868a0dbf09be6 Mon Sep 17 00:00:00 2001
From: Elliot Ronaghan <ronawho@gmail.com>
Date: Tue, 14 Jun 2016 15:26:07 -0700
Subject: [PATCH 0374/2608] Add initial support for building with the cray
 compiler

Get jemalloc building and passing `make check_unit` with cray 8.4. An inlining
bug in 8.4 results in internal errors while trying to build jemalloc. This has
already been reported and fixed for the 8.5 release.

In order to work around the inlining bug, disable gnu compatibility and limit
ipa optimizations.

I copied the msvc compiler check for cray, but note that we perform the test
even if we think we're using gcc because cray pretends to be gcc if `-hgnu`
(which is enabled by default) is used. I couldn't come up with a principled way
to check for the inlining bug, so instead I just checked compiler versions.

The build had lots of warnings I need to address and cray doesn't support -MM
or -MT for dependency tracking, so I had to do `make CC_MM=`.
---
 configure.ac | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/configure.ac b/configure.ac
index 2c6009a1..77c1b72e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -118,6 +118,7 @@ dnl If CFLAGS isn't defined, set CFLAGS to something reasonable.  Otherwise,
 dnl just prevent autoconf from molesting CFLAGS.
 CFLAGS=$CFLAGS
 AC_PROG_CC
+
 if test "x$GCC" != "xyes" ; then
   AC_CACHE_CHECK([whether compiler is MSVC],
                  [je_cv_msvc],
@@ -143,6 +144,30 @@ if test "x${PE_ENV}" != "x" ; then
   esac
 fi
 
+AC_CACHE_CHECK([whether compiler is cray],
+              [je_cv_cray],
+              [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],
+                                                  [
+#ifndef _CRAYC
+  int fail[-1];
+#endif
+])],
+                            [je_cv_cray=yes],
+                            [je_cv_cray=no])])
+
+if test "x${je_cv_cray}" = "xyes" ; then
+  AC_CACHE_CHECK([whether cray compiler version is 8.4],
+                [je_cv_cray_84],
+                [AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],
+                                                      [
+#if !(_RELEASE_MAJOR == 8 && _RELEASE_MINOR == 4)
+  int fail[-1];
+#endif
+])],
+                              [je_cv_cray_84=yes],
+                              [je_cv_cray_84=no])])
+fi
+
 if test "x$CFLAGS" = "x" ; then
   no_CFLAGS="yes"
   if test "x$GCC" = "xyes" ; then
@@ -164,6 +189,13 @@ if test "x$CFLAGS" = "x" ; then
     JE_CFLAGS_APPEND([-FS])
     CPPFLAGS="$CPPFLAGS -I${srcdir}/include/msvc_compat"
   fi
+  if test "x$je_cv_cray" = "xyes" ; then
+    dnl cray compiler 8.4 has an inlining bug
+    if test "x$je_cv_cray_84" = "xyes" ; then
+      JE_CFLAGS_APPEND([-hipa2])
+      JE_CFLAGS_APPEND([-hnognu])
+    fi
+  fi
 fi
 dnl Append EXTRA_CFLAGS to CFLAGS, if defined.
 if test "x$EXTRA_CFLAGS" != "x" ; then

From 1d42a99027991b6abc2fb8b3e99f136b7ad36751 Mon Sep 17 00:00:00 2001
From: Elliot Ronaghan <ronawho@gmail.com>
Date: Tue, 14 Jun 2016 15:26:29 -0700
Subject: [PATCH 0375/2608] Disable automatic dependency generation for the
 Cray compiler

Cray only supports `-M` for generating dependency files. It does not support
`-MM` or `-MT`, so don't try to use them. I just reused the existing mechanism
for turning auto-dependency generation off (`CC_MM=`), but it might be more
principled to add a configure test to check if the compiler supports `-MM` and
`-MT`, instead of manually tracking which compilers don't support those flags.
---
 configure.ac | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/configure.ac b/configure.ac
index 77c1b72e..a864ce65 100644
--- a/configure.ac
+++ b/configure.ac
@@ -323,6 +323,10 @@ if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then
   TEST_LD_MODE='-dynamic'
 fi
 
+if test "x${je_cv_cray}" = "xyes" ; then
+  CC_MM=
+fi
+
 AN_MAKEVAR([AR], [AC_PROG_AR])
 AN_PROGRAM([ar], [AC_PROG_AR])
 AC_DEFUN([AC_PROG_AR], [AC_CHECK_TOOL(AR, ar, :)])

From 4b525183988bc88f2c49b59ed50d1610656e0fd6 Mon Sep 17 00:00:00 2001
From: Elliot Ronaghan <ronawho@gmail.com>
Date: Tue, 14 Jun 2016 15:26:53 -0700
Subject: [PATCH 0376/2608] Add Cray compiler's equivalent of -Werror before
 __attribute__ checks

Cray uses -herror_on_warning instead of -Werror. Use it everywhere -Werror is
currently used for __attribute__ checks so configure actually detects they're
not supported.
---
 configure.ac | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/configure.ac b/configure.ac
index a864ce65..f1a34c3b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -504,6 +504,7 @@ fi
 dnl Check for tls_model attribute support (clang 3.0 still lacks support).
 SAVED_CFLAGS="${CFLAGS}"
 JE_CFLAGS_APPEND([-Werror])
+JE_CFLAGS_APPEND([-herror_on_warning])
 JE_COMPILABLE([tls_model attribute], [],
               [static __thread int
                __attribute__((tls_model("initial-exec"), unused)) foo;
@@ -519,6 +520,7 @@ fi
 dnl Check for alloc_size attribute support.
 SAVED_CFLAGS="${CFLAGS}"
 JE_CFLAGS_APPEND([-Werror])
+JE_CFLAGS_APPEND([-herror_on_warning])
 JE_COMPILABLE([alloc_size attribute], [#include <stdlib.h>],
               [void *foo(size_t size) __attribute__((alloc_size(1)));],
               [je_cv_alloc_size])
@@ -529,6 +531,7 @@ fi
 dnl Check for format(gnu_printf, ...) attribute support.
 SAVED_CFLAGS="${CFLAGS}"
 JE_CFLAGS_APPEND([-Werror])
+JE_CFLAGS_APPEND([-herror_on_warning])
 JE_COMPILABLE([format(gnu_printf, ...) attribute], [#include <stdlib.h>],
               [void *foo(const char *format, ...) __attribute__((format(gnu_printf, 1, 2)));],
               [je_cv_format_gnu_printf])
@@ -539,6 +542,7 @@ fi
 dnl Check for format(printf, ...) attribute support.
 SAVED_CFLAGS="${CFLAGS}"
 JE_CFLAGS_APPEND([-Werror])
+JE_CFLAGS_APPEND([-herror_on_warning])
 JE_COMPILABLE([format(printf, ...) attribute], [#include <stdlib.h>],
               [void *foo(const char *format, ...) __attribute__((format(printf, 1, 2)));],
               [je_cv_format_printf])

From c128167bca9b652dd7cd90fd724ff0d02e66289f Mon Sep 17 00:00:00 2001
From: Elliot Ronaghan <ronawho@gmail.com>
Date: Thu, 7 Jul 2016 15:06:01 -0700
Subject: [PATCH 0377/2608] Disable irrelevant Cray compiler warnings if
 cc-silence is enabled

Cray is pretty warning-happy, so disable ones that aren't helpful. Each warning
has a numeric value instead of having named flags to disable specific warnings.
Disable warnings 128 and 1357.

128:  Ignore unreachable code warning. Cray warns about `not_reached()` not
      being reachable in a couple of places because it detects that some loops
      will never terminate.

1357: Ignore warning about redefinition of malloc and friends

With this patch, Cray 8.4.0 and 8.5.1 build cleanly and pass `make check`
---
 configure.ac | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/configure.ac b/configure.ac
index f1a34c3b..f1fc4935 100644
--- a/configure.ac
+++ b/configure.ac
@@ -195,6 +195,12 @@ if test "x$CFLAGS" = "x" ; then
       JE_CFLAGS_APPEND([-hipa2])
       JE_CFLAGS_APPEND([-hnognu])
     fi
+    if test "x$enable_cc_silence" != "xno" ; then
+      dnl ignore unreachable code warning
+      JE_CFLAGS_APPEND([-hnomessage=128])
+      dnl ignore redefinition of "malloc", "free", etc warning
+      JE_CFLAGS_APPEND([-hnomessage=1357])
+    fi
   fi
 fi
 dnl Append EXTRA_CFLAGS to CFLAGS, if defined.

From df0d273a07b0ca5ea4a9d8e140e1fa6425430e4a Mon Sep 17 00:00:00 2001
From: Eric Le Bihan <eric.le.bihan.dev@free.fr>
Date: Thu, 14 Jul 2016 22:44:01 +0200
Subject: [PATCH 0378/2608] Fix LG_QUANTUM definition for sparc64

GCC 4.9.3 cross-compiled for sparc64 defines __sparc_v9__, not
__sparc64__ nor __sparcv9. This prevents LG_QUANTUM from being defined
properly. Adding this new value to the check solves the issue.
---
 include/jemalloc/internal/jemalloc_internal.h.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index c35280fa..086726d3 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -232,7 +232,7 @@ typedef unsigned szind_t;
 #  ifdef __alpha__
 #    define LG_QUANTUM		4
 #  endif
-#  if (defined(__sparc64__) || defined(__sparcv9))
+#  if (defined(__sparc64__) || defined(__sparcv9) || defined(__sparc_v9__))
 #    define LG_QUANTUM		4
 #  endif
 #  if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64))

From b54c0c2925fd5acd63fd3957aa9e177c3fd8d27f Mon Sep 17 00:00:00 2001
From: Eric Le Bihan <eric.le.bihan.dev@free.fr>
Date: Thu, 14 Jul 2016 22:44:01 +0200
Subject: [PATCH 0379/2608] Fix LG_QUANTUM definition for sparc64

GCC 4.9.3 cross-compiled for sparc64 defines __sparc_v9__, not
__sparc64__ nor __sparcv9. This prevents LG_QUANTUM from being defined
properly. Adding this new value to the check solves the issue.
---
 include/jemalloc/internal/jemalloc_internal.h.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 1e45bc92..6a0aa005 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -236,7 +236,7 @@ typedef unsigned szind_t;
 #  ifdef __alpha__
 #    define LG_QUANTUM		4
 #  endif
-#  if (defined(__sparc64__) || defined(__sparcv9))
+#  if (defined(__sparc64__) || defined(__sparcv9) || defined(__sparc_v9__))
 #    define LG_QUANTUM		4
 #  endif
 #  if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64))

From 020c32859d19873e8555d848785f0b584d4249f9 Mon Sep 17 00:00:00 2001
From: Bai <ShuangxueBai@users.noreply.github.com>
Date: Sun, 28 Aug 2016 13:51:57 +0800
Subject: [PATCH 0380/2608] Readme.txt error for building in the Windows

The command can't work using sh -C sh -c "./autogen.sh CC=cl --enable-lazy-lock=no".
Change the position of the colon, the command of autogen work.
---
 msvc/ReadMe.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/msvc/ReadMe.txt b/msvc/ReadMe.txt
index 02b97f74..b1c2fc5c 100644
--- a/msvc/ReadMe.txt
+++ b/msvc/ReadMe.txt
@@ -17,7 +17,7 @@ How to build jemalloc for Windows
    (note: x86/x64 doesn't matter at this point)
 
 5. Generate header files:
-   sh -c "./autogen.sh CC=cl --enable-lazy-lock=no"
+   sh -c "./autogen.sh" CC=cl --enable-lazy-lock=no
 
 6. Now the project can be opened and built in Visual Studio:
    msvc\jemalloc_vc2015.sln

From 15da5f5d9d0e44cceb66bb13fad969d51145a616 Mon Sep 17 00:00:00 2001
From: Bai <ShuangxueBai@users.noreply.github.com>
Date: Sun, 28 Aug 2016 13:51:57 +0800
Subject: [PATCH 0381/2608] Readme.txt error for building in the Windows

The command can't work using sh -C sh -c "./autogen.sh CC=cl --enable-lazy-lock=no".
Change the position of the colon, the command of autogen work.
---
 msvc/ReadMe.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/msvc/ReadMe.txt b/msvc/ReadMe.txt
index 02b97f74..b1c2fc5c 100644
--- a/msvc/ReadMe.txt
+++ b/msvc/ReadMe.txt
@@ -17,7 +17,7 @@ How to build jemalloc for Windows
    (note: x86/x64 doesn't matter at this point)
 
 5. Generate header files:
-   sh -c "./autogen.sh CC=cl --enable-lazy-lock=no"
+   sh -c "./autogen.sh" CC=cl --enable-lazy-lock=no
 
 6. Now the project can be opened and built in Visual Studio:
    msvc\jemalloc_vc2015.sln

From ea68cd25b6455b2c408200c3947c0715f6a7e4a7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20K=C3=B6ckerbauer?=
 <thomas.koeckerbauer@dynatrace.com>
Date: Wed, 7 Sep 2016 08:34:17 +0200
Subject: [PATCH 0382/2608] use install command determined by configure

---
 Makefile.in | 41 +++++++++++++++++++++--------------------
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index b78e1500..ec863079 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -61,6 +61,7 @@ MKLIB = @MKLIB@
 AR = @AR@
 ARFLAGS = @ARFLAGS@
 CC_MM = @CC_MM@
+INSTALL = @INSTALL@
 
 ifeq (macho, $(ABI))
 TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH="$(objroot)lib"
@@ -305,54 +306,54 @@ build_lib_static: $(STATIC_LIBS)
 build_lib: build_lib_shared build_lib_static
 
 install_bin:
-	install -d $(BINDIR)
+	$(INSTALL) -d $(BINDIR)
 	@for b in $(BINS); do \
-	echo "install -m 755 $$b $(BINDIR)"; \
-	install -m 755 $$b $(BINDIR); \
+	echo "$(INSTALL) -m 755 $$b $(BINDIR)"; \
+	$(INSTALL) -m 755 $$b $(BINDIR); \
 done
 
 install_include:
-	install -d $(INCLUDEDIR)/jemalloc
+	$(INSTALL) -d $(INCLUDEDIR)/jemalloc
 	@for h in $(C_HDRS); do \
-	echo "install -m 644 $$h $(INCLUDEDIR)/jemalloc"; \
-	install -m 644 $$h $(INCLUDEDIR)/jemalloc; \
+	echo "$(INSTALL) -m 644 $$h $(INCLUDEDIR)/jemalloc"; \
+	$(INSTALL) -m 644 $$h $(INCLUDEDIR)/jemalloc; \
 done
 
 install_lib_shared: $(DSOS)
-	install -d $(LIBDIR)
-	install -m 755 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) $(LIBDIR)
+	$(INSTALL) -d $(LIBDIR)
+	$(INSTALL) -m 755 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) $(LIBDIR)
 ifneq ($(SOREV),$(SO))
 	ln -sf $(LIBJEMALLOC).$(SOREV) $(LIBDIR)/$(LIBJEMALLOC).$(SO)
 endif
 
 install_lib_static: $(STATIC_LIBS)
-	install -d $(LIBDIR)
+	$(INSTALL) -d $(LIBDIR)
 	@for l in $(STATIC_LIBS); do \
-	echo "install -m 755 $$l $(LIBDIR)"; \
-	install -m 755 $$l $(LIBDIR); \
+	echo "$(INSTALL) -m 755 $$l $(LIBDIR)"; \
+	$(INSTALL) -m 755 $$l $(LIBDIR); \
 done
 
 install_lib_pc: $(PC)
-	install -d $(LIBDIR)/pkgconfig
+	$(INSTALL) -d $(LIBDIR)/pkgconfig
 	@for l in $(PC); do \
-	echo "install -m 644 $$l $(LIBDIR)/pkgconfig"; \
-	install -m 644 $$l $(LIBDIR)/pkgconfig; \
+	echo "$(INSTALL) -m 644 $$l $(LIBDIR)/pkgconfig"; \
+	$(INSTALL) -m 644 $$l $(LIBDIR)/pkgconfig; \
 done
 
 install_lib: install_lib_shared install_lib_static install_lib_pc
 
 install_doc_html:
-	install -d $(DATADIR)/doc/jemalloc$(install_suffix)
+	$(INSTALL) -d $(DATADIR)/doc/jemalloc$(install_suffix)
 	@for d in $(DOCS_HTML); do \
-	echo "install -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix)"; \
-	install -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix); \
+	echo "$(INSTALL) -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix)"; \
+	$(INSTALL) -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix); \
 done
 
 install_doc_man:
-	install -d $(MANDIR)/man3
+	$(INSTALL) -d $(MANDIR)/man3
 	@for d in $(DOCS_MAN3); do \
-	echo "install -m 644 $$d $(MANDIR)/man3"; \
-	install -m 644 $$d $(MANDIR)/man3; \
+	echo "$(INSTALL) -m 644 $$d $(MANDIR)/man3"; \
+	$(INSTALL) -m 644 $$d $(MANDIR)/man3; \
 done
 
 install_doc: install_doc_html install_doc_man

From 92009b19d623748fdf8b65aea732c27ec4dd257b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20K=C3=B6ckerbauer?=
 <thomas.koeckerbauer@dynatrace.com>
Date: Wed, 7 Sep 2016 08:34:17 +0200
Subject: [PATCH 0383/2608] use install command determined by configure

---
 Makefile.in | 41 +++++++++++++++++++++--------------------
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index 964ad870..8789c451 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -62,6 +62,7 @@ MKLIB = @MKLIB@
 AR = @AR@
 ARFLAGS = @ARFLAGS@
 CC_MM = @CC_MM@
+INSTALL = @INSTALL@
 
 ifeq (macho, $(ABI))
 TEST_LIBRARY_PATH := DYLD_FALLBACK_LIBRARY_PATH="$(objroot)lib"
@@ -311,54 +312,54 @@ build_lib_static: $(STATIC_LIBS)
 build_lib: build_lib_shared build_lib_static
 
 install_bin:
-	install -d $(BINDIR)
+	$(INSTALL) -d $(BINDIR)
 	@for b in $(BINS); do \
-	echo "install -m 755 $$b $(BINDIR)"; \
-	install -m 755 $$b $(BINDIR); \
+	echo "$(INSTALL) -m 755 $$b $(BINDIR)"; \
+	$(INSTALL) -m 755 $$b $(BINDIR); \
 done
 
 install_include:
-	install -d $(INCLUDEDIR)/jemalloc
+	$(INSTALL) -d $(INCLUDEDIR)/jemalloc
 	@for h in $(C_HDRS); do \
-	echo "install -m 644 $$h $(INCLUDEDIR)/jemalloc"; \
-	install -m 644 $$h $(INCLUDEDIR)/jemalloc; \
+	echo "$(INSTALL) -m 644 $$h $(INCLUDEDIR)/jemalloc"; \
+	$(INSTALL) -m 644 $$h $(INCLUDEDIR)/jemalloc; \
 done
 
 install_lib_shared: $(DSOS)
-	install -d $(LIBDIR)
-	install -m 755 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) $(LIBDIR)
+	$(INSTALL) -d $(LIBDIR)
+	$(INSTALL) -m 755 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) $(LIBDIR)
 ifneq ($(SOREV),$(SO))
 	ln -sf $(LIBJEMALLOC).$(SOREV) $(LIBDIR)/$(LIBJEMALLOC).$(SO)
 endif
 
 install_lib_static: $(STATIC_LIBS)
-	install -d $(LIBDIR)
+	$(INSTALL) -d $(LIBDIR)
 	@for l in $(STATIC_LIBS); do \
-	echo "install -m 755 $$l $(LIBDIR)"; \
-	install -m 755 $$l $(LIBDIR); \
+	echo "$(INSTALL) -m 755 $$l $(LIBDIR)"; \
+	$(INSTALL) -m 755 $$l $(LIBDIR); \
 done
 
 install_lib_pc: $(PC)
-	install -d $(LIBDIR)/pkgconfig
+	$(INSTALL) -d $(LIBDIR)/pkgconfig
 	@for l in $(PC); do \
-	echo "install -m 644 $$l $(LIBDIR)/pkgconfig"; \
-	install -m 644 $$l $(LIBDIR)/pkgconfig; \
+	echo "$(INSTALL) -m 644 $$l $(LIBDIR)/pkgconfig"; \
+	$(INSTALL) -m 644 $$l $(LIBDIR)/pkgconfig; \
 done
 
 install_lib: install_lib_shared install_lib_static install_lib_pc
 
 install_doc_html:
-	install -d $(DATADIR)/doc/jemalloc$(install_suffix)
+	$(INSTALL) -d $(DATADIR)/doc/jemalloc$(install_suffix)
 	@for d in $(DOCS_HTML); do \
-	echo "install -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix)"; \
-	install -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix); \
+	echo "$(INSTALL) -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix)"; \
+	$(INSTALL) -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix); \
 done
 
 install_doc_man:
-	install -d $(MANDIR)/man3
+	$(INSTALL) -d $(MANDIR)/man3
 	@for d in $(DOCS_MAN3); do \
-	echo "install -m 644 $$d $(MANDIR)/man3"; \
-	install -m 644 $$d $(MANDIR)/man3; \
+	echo "$(INSTALL) -m 644 $$d $(MANDIR)/man3"; \
+	$(INSTALL) -m 644 $$d $(MANDIR)/man3; \
 done
 
 install_doc: install_doc_html install_doc_man

From 43d4d7c37376e2ac5faf9c0adf30647d7c288106 Mon Sep 17 00:00:00 2001
From: Mike Hommey <mh@glandium.org>
Date: Fri, 10 Jun 2016 00:17:19 +0900
Subject: [PATCH 0384/2608] Add Travis-CI configuration

---
 .travis.yml | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 .travis.yml

diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 00000000..1fed4f8e
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,29 @@
+language: c
+
+matrix:
+  include:
+    - os: linux
+      compiler: gcc
+    - os: linux
+      compiler: gcc
+      env:
+        - EXTRA_FLAGS=-m32
+      addons:
+        apt:
+          packages:
+          - gcc-multilib
+    - os: osx
+      compiler: clang
+    - os: osx
+      compiler: clang
+      env:
+        - EXTRA_FLAGS=-m32
+
+before_script:
+  - autoconf
+  - ./configure${EXTRA_FLAGS:+ CC="$CC $EXTRA_FLAGS"}
+  - make -j3
+  - make -j3 tests
+
+script:
+  - make check

From 3bb044c80766e1cbe14e246ec2232650859a0dc9 Mon Sep 17 00:00:00 2001
From: Mike Hommey <mh@glandium.org>
Date: Thu, 9 Jun 2016 17:10:16 +0900
Subject: [PATCH 0385/2608] Add an AppVeyor config

This builds jemalloc and runs all checks with:
- MSVC 2015 64-bits
- MSVC 2015 32-bits
- MINGW64 (from msys2)
- MINGW32 (from msys2)

Normally, AppVeyor configs are named appveyor.yml, but it is possible to
configure the .yml file name in the AppVeyor project settings such that
the file stays "hidden", like typical travis configs.
---
 .appveyor.yml | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 .appveyor.yml

diff --git a/.appveyor.yml b/.appveyor.yml
new file mode 100644
index 00000000..ddd5c571
--- /dev/null
+++ b/.appveyor.yml
@@ -0,0 +1,28 @@
+version: '{build}'
+
+environment:
+  matrix:
+  - MSYSTEM: MINGW64
+    CPU: x86_64
+    MSVC: amd64
+  - MSYSTEM: MINGW32
+    CPU: i686
+    MSVC: x86
+  - MSYSTEM: MINGW64
+    CPU: x86_64
+  - MSYSTEM: MINGW32
+    CPU: i686
+
+install:
+  - set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH%
+  - if defined MSVC call "c:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" %MSVC%
+  - if defined MSVC pacman --noconfirm -Rsc mingw-w64-%CPU%-gcc gcc
+  - pacman --noconfirm -Suy mingw-w64-%CPU%-make
+
+build_script:
+  - bash -c "autoconf"
+  - bash -c "./configure"
+  - mingw32-make -j3
+  - file lib/jemalloc.dll
+  - mingw32-make -j3 tests
+  - mingw32-make -k check

From 3c8c3e9e9b59b6e34a222816a05f0a01a68919b3 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 26 Sep 2016 15:55:40 -0700
Subject: [PATCH 0386/2608] Close file descriptor after reading
 "/proc/sys/vm/overcommit_memory".

This bug was introduced by c2f970c32b527660a33fa513a76d913c812dcf7c
(Modify pages_map() to support mapping uncommitted virtual memory.).

This resolves #399.
---
 src/pages.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/pages.c b/src/pages.c
index 2a9b7e37..05b0d690 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -219,6 +219,7 @@ os_overcommits_proc(void)
 		return (false); /* Error. */
 
 	nread = read(fd, &buf, sizeof(buf));
+	close(fd);
 	if (nread < 1)
 		return (false); /* Error. */
 	/*

From 79647fe4650ddd5325ce2cedf0fced772eed3f71 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 26 Sep 2016 15:55:40 -0700
Subject: [PATCH 0387/2608] Close file descriptor after reading
 "/proc/sys/vm/overcommit_memory".

This bug was introduced by c2f970c32b527660a33fa513a76d913c812dcf7c
(Modify pages_map() to support mapping uncommitted virtual memory.).

This resolves #399.
---
 src/pages.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/pages.c b/src/pages.c
index 2a9b7e37..05b0d690 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -219,6 +219,7 @@ os_overcommits_proc(void)
 		return (false); /* Error. */
 
 	nread = read(fd, &buf, sizeof(buf));
+	close(fd);
 	if (nread < 1)
 		return (false); /* Error. */
 	/*

From 42e79c58a0fb2c9ba87771523f3fc00e44876d20 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 29 Sep 2016 09:49:19 -0700
Subject: [PATCH 0388/2608] Update extent hook function prototype comments.

---
 include/jemalloc/jemalloc_typedefs.h.in | 27 +++++++++++++------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/jemalloc_typedefs.h.in b/include/jemalloc/jemalloc_typedefs.h.in
index e5ba7166..1049d7c7 100644
--- a/include/jemalloc/jemalloc_typedefs.h.in
+++ b/include/jemalloc/jemalloc_typedefs.h.in
@@ -2,55 +2,56 @@ typedef struct extent_hooks_s extent_hooks_t;
 
 /*
  * void *
- * extent_alloc(void *new_addr, size_t size, size_t alignment, bool *zero,
- *     bool *commit, unsigned arena_ind);
+ * extent_alloc(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
+ *     size_t alignment, bool *zero, bool *commit, unsigned arena_ind);
  */
 typedef void *(extent_alloc_t)(extent_hooks_t *, void *, size_t, size_t, bool *,
     bool *, unsigned);
 
 /*
  * bool
- * extent_dalloc(void *addr, size_t size, bool committed, unsigned arena_ind);
+ * extent_dalloc(extent_hooks_t *extent_hooks, void *addr, size_t size,
+ *     bool committed, unsigned arena_ind);
  */
 typedef bool (extent_dalloc_t)(extent_hooks_t *, void *, size_t, bool,
     unsigned);
 
 /*
  * bool
- * extent_commit(void *addr, size_t size, size_t offset, size_t length,
- *     unsigned arena_ind);
+ * extent_commit(extent_hooks_t *extent_hooks, void *addr, size_t size,
+ *     size_t offset, size_t length, unsigned arena_ind);
  */
 typedef bool (extent_commit_t)(extent_hooks_t *, void *, size_t, size_t, size_t,
     unsigned);
 
 /*
  * bool
- * extent_decommit(void *addr, size_t size, size_t offset, size_t length,
- *     unsigned arena_ind);
+ * extent_decommit(extent_hooks_t *extent_hooks, void *addr, size_t size,
+ *     size_t offset, size_t length, unsigned arena_ind);
  */
 typedef bool (extent_decommit_t)(extent_hooks_t *, void *, size_t, size_t,
     size_t, unsigned);
 
 /*
  * bool
- * extent_purge(void *addr, size_t size, size_t offset, size_t length,
- *     unsigned arena_ind);
+ * extent_purge(extent_hooks_t *extent_hooks, void *addr, size_t size,
+ *     size_t offset, size_t length, unsigned arena_ind);
  */
 typedef bool (extent_purge_t)(extent_hooks_t *, void *, size_t, size_t, size_t,
     unsigned);
 
 /*
  * bool
- * extent_split(void *addr, size_t size, size_t size_a, size_t size_b,
- *     bool committed, unsigned arena_ind);
+ * extent_split(extent_hooks_t *extent_hooks, void *addr, size_t size,
+ *     size_t size_a, size_t size_b, bool committed, unsigned arena_ind);
  */
 typedef bool (extent_split_t)(extent_hooks_t *, void *, size_t, size_t, size_t,
     bool, unsigned);
 
 /*
  * bool
- * extent_merge(void *addr_a, size_t size_a, void *addr_b, size_t size_b,
- *     bool committed, unsigned arena_ind);
+ * extent_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
+ *     void *addr_b, size_t size_b, bool committed, unsigned arena_ind);
  */
 typedef bool (extent_merge_t)(extent_hooks_t *, void *, size_t, void *, size_t,
     bool, unsigned);

From d51139c33c180a59dcee0c3880b8261f075139b3 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 29 Sep 2016 09:50:35 -0700
Subject: [PATCH 0389/2608] Verify extent hook functions receive correct
 extent_hooks pointer.

---
 test/integration/extent.c | 69 +++++++++++++++++++++++++++++----------
 1 file changed, 52 insertions(+), 17 deletions(-)

diff --git a/test/integration/extent.c b/test/integration/extent.c
index 10de8bbf..8acdad82 100644
--- a/test/integration/extent.c
+++ b/test/integration/extent.c
@@ -4,6 +4,34 @@
 const char *malloc_conf = "junk:false";
 #endif
 
+static void	*extent_alloc(extent_hooks_t *extent_hooks, void *new_addr,
+    size_t size, size_t alignment, bool *zero, bool *commit,
+    unsigned arena_ind);
+static bool	extent_dalloc(extent_hooks_t *extent_hooks, void *addr,
+    size_t size, bool committed, unsigned arena_ind);
+static bool	extent_commit(extent_hooks_t *extent_hooks, void *addr,
+    size_t size, size_t offset, size_t length, unsigned arena_ind);
+static bool	extent_decommit(extent_hooks_t *extent_hooks, void *addr,
+    size_t size, size_t offset, size_t length, unsigned arena_ind);
+static bool	extent_purge(extent_hooks_t *extent_hooks, void *addr,
+    size_t size, size_t offset, size_t length, unsigned arena_ind);
+static bool	extent_split(extent_hooks_t *extent_hooks, void *addr,
+    size_t size, size_t size_a, size_t size_b, bool committed,
+    unsigned arena_ind);
+static bool	extent_merge(extent_hooks_t *extent_hooks, void *addr_a,
+    size_t size_a, void *addr_b, size_t size_b, bool committed,
+    unsigned arena_ind);
+
+static extent_hooks_t hooks = {
+	extent_alloc,
+	extent_dalloc,
+	extent_commit,
+	extent_decommit,
+	extent_purge,
+	extent_split,
+	extent_merge
+};
+static extent_hooks_t *new_hooks = &hooks;
 static extent_hooks_t *orig_hooks;
 static extent_hooks_t *old_hooks;
 
@@ -34,7 +62,9 @@ extent_alloc(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
 	    "*zero=%s, *commit=%s, arena_ind=%u)\n", __func__, extent_hooks,
 	    new_addr, size, alignment, *zero ?  "true" : "false", *commit ?
 	    "true" : "false", arena_ind);
-	assert(extent_hooks->alloc == extent_alloc);
+	assert_ptr_eq(extent_hooks, new_hooks,
+	    "extent_hooks should be same as pointer used to set hooks");
+	assert_ptr_eq(extent_hooks->alloc, extent_alloc, "Wrong hook function");
 	did_alloc = true;
 	return (old_hooks->alloc(old_hooks, new_addr, size, alignment, zero,
 	    commit, arena_ind));
@@ -48,7 +78,10 @@ extent_dalloc(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, committed=%s, "
 	    "arena_ind=%u)\n", __func__, extent_hooks, addr, size, committed ?
 	    "true" : "false", arena_ind);
-	assert(extent_hooks->dalloc == extent_dalloc);
+	assert_ptr_eq(extent_hooks, new_hooks,
+	    "extent_hooks should be same as pointer used to set hooks");
+	assert_ptr_eq(extent_hooks->dalloc, extent_dalloc,
+	    "Wrong hook function");
 	did_dalloc = true;
 	if (!do_dalloc)
 		return (true);
@@ -64,7 +97,10 @@ extent_commit(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
 	    "length=%zu, arena_ind=%u)\n", __func__, extent_hooks, addr, size,
 	    offset, length, arena_ind);
-	assert(extent_hooks->commit == extent_commit);
+	assert_ptr_eq(extent_hooks, new_hooks,
+	    "extent_hooks should be same as pointer used to set hooks");
+	assert_ptr_eq(extent_hooks->commit, extent_commit,
+	    "Wrong hook function");
 	err = old_hooks->commit(old_hooks, addr, size, offset, length,
 	    arena_ind);
 	did_commit = !err;
@@ -80,7 +116,10 @@ extent_decommit(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
 	    "length=%zu, arena_ind=%u)\n", __func__, extent_hooks, addr, size,
 	    offset, length, arena_ind);
-	assert(extent_hooks->decommit == extent_decommit);
+	assert_ptr_eq(extent_hooks, new_hooks,
+	    "extent_hooks should be same as pointer used to set hooks");
+	assert_ptr_eq(extent_hooks->decommit, extent_decommit,
+	    "Wrong hook function");
 	if (!do_decommit)
 		return (true);
 	err = old_hooks->decommit(old_hooks, addr, size, offset, length,
@@ -97,7 +136,9 @@ extent_purge(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
 	    "length=%zu arena_ind=%u)\n", __func__, extent_hooks, addr, size,
 	    offset, length, arena_ind);
-	assert(extent_hooks->purge == extent_purge);
+	assert_ptr_eq(extent_hooks, new_hooks,
+	    "extent_hooks should be same as pointer used to set hooks");
+	assert_ptr_eq(extent_hooks->purge, extent_purge, "Wrong hook function");
 	did_purge = true;
 	return (old_hooks->purge(old_hooks, addr, size, offset, length,
 	    arena_ind));
@@ -113,7 +154,9 @@ extent_split(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	    "size_b=%zu, committed=%s, arena_ind=%u)\n", __func__, extent_hooks,
 	    addr, size, size_a, size_b, committed ? "true" : "false",
 	    arena_ind);
-	assert(extent_hooks->split == extent_split);
+	assert_ptr_eq(extent_hooks, new_hooks,
+	    "extent_hooks should be same as pointer used to set hooks");
+	assert_ptr_eq(extent_hooks->split, extent_split, "Wrong hook function");
 	tried_split = true;
 	err = old_hooks->split(old_hooks, addr, size, size_a, size_b, committed,
 	    arena_ind);
@@ -131,7 +174,9 @@ extent_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
 	    "size_b=%zu, committed=%s, arena_ind=%u)\n", __func__, extent_hooks,
 	    addr_a, size_a, addr_b, size_b, committed ? "true" : "false",
 	    arena_ind);
-	assert(extent_hooks->merge == extent_merge);
+	assert_ptr_eq(extent_hooks, new_hooks,
+	    "extent_hooks should be same as pointer used to set hooks");
+	assert_ptr_eq(extent_hooks->merge, extent_merge, "Wrong hook function");
 	err = old_hooks->merge(old_hooks, addr_a, size_a, addr_b, size_b,
 	    committed, arena_ind);
 	did_merge = !err;
@@ -146,16 +191,6 @@ TEST_BEGIN(test_extent)
 	int flags;
 	size_t hooks_mib[3], purge_mib[3];
 	size_t hooks_miblen, purge_miblen;
-	extent_hooks_t hooks = {
-		extent_alloc,
-		extent_dalloc,
-		extent_commit,
-		extent_decommit,
-		extent_purge,
-		extent_split,
-		extent_merge
-	};
-	extent_hooks_t *new_hooks = &hooks;
 	bool xallocx_success_a, xallocx_success_b, xallocx_success_c;
 
 	sz = sizeof(unsigned);

From 871a9498e13572f99451ed88db36cda6c9fecf8f Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 3 Oct 2016 14:18:55 -0700
Subject: [PATCH 0390/2608] Fix size class overflow bugs.

Avoid calling s2u() on raw extent sizes in extent_recycle().

Clamp psz2ind() (implemented as psz2ind_clamp()) when inserting/removing
into/from size-segregated extent heaps.
---
 .../jemalloc/internal/jemalloc_internal.h.in  | 22 ++++++++++++++++---
 include/jemalloc/internal/private_symbols.txt |  2 ++
 src/arena.c                                   |  4 ++--
 src/extent.c                                  | 10 ++++++---
 4 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 086726d3..ba8a9296 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -516,7 +516,9 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/large.h"
 
 #ifndef JEMALLOC_ENABLE_INLINE
+pszind_t	psz2ind_impl(size_t psz, bool clamp);
 pszind_t	psz2ind(size_t psz);
+pszind_t	psz2ind_clamp(size_t psz);
 size_t	pind2sz_compute(pszind_t pind);
 size_t	pind2sz_lookup(pszind_t pind);
 size_t	pind2sz(pszind_t pind);
@@ -541,12 +543,12 @@ ticker_t	*decay_ticker_get(tsd_t *tsd, unsigned ind);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
-JEMALLOC_INLINE pszind_t
-psz2ind(size_t psz)
+JEMALLOC_ALWAYS_INLINE pszind_t
+psz2ind_impl(size_t psz, bool clamp)
 {
 
 	if (unlikely(psz > LARGE_MAXCLASS))
-		return (NPSIZES);
+		return (clamp ? NPSIZES-1 : NPSIZES);
 	{
 		pszind_t x = lg_floor((psz<<1)-1);
 		pszind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_PAGE) ? 0 : x -
@@ -565,6 +567,20 @@ psz2ind(size_t psz)
 	}
 }
 
+JEMALLOC_INLINE pszind_t
+psz2ind(size_t psz)
+{
+
+	return (psz2ind_impl(psz, false));
+}
+
+JEMALLOC_INLINE pszind_t
+psz2ind_clamp(size_t psz)
+{
+
+	return (psz2ind_impl(psz, true));
+}
+
 JEMALLOC_INLINE size_t
 pind2sz_compute(pszind_t pind)
 {
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index ae60f6c4..d633272a 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -395,6 +395,8 @@ prof_thread_active_set
 prof_thread_name_get
 prof_thread_name_set
 psz2ind
+psz2ind_clamp
+psz2ind_impl
 psz2u
 purge_mode_names
 register_zone
diff --git a/src/arena.c b/src/arena.c
index 42cd3b09..2e23c0b7 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -769,8 +769,8 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		/* Allocate. */
 		zero = false;
 		textent = extent_alloc_cache_locked(tsdn, arena, r_extent_hooks,
-		    extent_base_get(extent), extent_size_get(extent), 0,
-		    CACHELINE, &zero, false);
+		    extent_base_get(extent), extent_size_get(extent), 0, PAGE,
+		    &zero, false);
 		assert(textent == extent);
 		assert(zero == extent_zeroed_get(extent));
 		extent_ring_remove(extent);
diff --git a/src/extent.c b/src/extent.c
index f88c4240..63516c68 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -195,7 +195,7 @@ extent_heaps_insert(tsdn_t *tsdn, extent_heap_t extent_heaps[NPSIZES],
     extent_t *extent)
 {
 	size_t psz = extent_size_quantize_floor(extent_size_get(extent));
-	pszind_t pind = psz2ind(psz);
+	pszind_t pind = psz2ind_clamp(psz);
 
 	malloc_mutex_assert_owner(tsdn, &extent_arena_get(extent)->extents_mtx);
 
@@ -207,7 +207,7 @@ extent_heaps_remove(tsdn_t *tsdn, extent_heap_t extent_heaps[NPSIZES],
     extent_t *extent)
 {
 	size_t psz = extent_size_quantize_floor(extent_size_get(extent));
-	pszind_t pind = psz2ind(psz);
+	pszind_t pind = psz2ind_clamp(psz);
 
 	malloc_mutex_assert_owner(tsdn, &extent_arena_get(extent)->extents_mtx);
 
@@ -364,6 +364,7 @@ extent_first_best_fit(tsdn_t *tsdn, arena_t *arena,
 	malloc_mutex_assert_owner(tsdn, &arena->extents_mtx);
 
 	pind = psz2ind(extent_size_quantize_ceil(size));
+	assert(pind < NPSIZES);
 	for (i = pind; i < NPSIZES; i++) {
 		extent_t *extent = extent_heap_first(&extent_heaps[i]);
 		if (extent != NULL)
@@ -419,13 +420,16 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		 * course cannot be recycled).
 		 */
 		assert(PAGE_ADDR2BASE(new_addr) == new_addr);
+		assert(pad == 0);
+		assert(alignment <= PAGE);
 		prev = extent_lookup(tsdn, (void *)((uintptr_t)new_addr - PAGE),
 		    false);
 		assert(prev == NULL || extent_past_get(prev) == new_addr);
 	}
 
 	size = usize + pad;
-	alloc_size = s2u(size + PAGE_CEILING(alignment) - PAGE);
+	alloc_size = (new_addr != NULL) ? size : s2u(size +
+	    PAGE_CEILING(alignment) - PAGE);
 	/* Beware size_t wrap-around. */
 	if (alloc_size < usize)
 		return (NULL);

From a5a8d7ae8dd638cf538cae3b340afe6d1d787954 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 3 Oct 2016 14:45:27 -0700
Subject: [PATCH 0391/2608] Remove a size class assertion from
 extent_size_quantize_floor().

Extent coalescence can result in legitimate calls to
extent_size_quantize_floor() with size larger than LARGE_MAXCLASS.
---
 src/extent.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/extent.c b/src/extent.c
index 63516c68..e4d3ccdb 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -117,7 +117,6 @@ extent_size_quantize_floor(size_t size)
 	pszind_t pind;
 
 	assert(size > 0);
-	assert(size - large_pad <= LARGE_MAXCLASS);
 	assert((size & PAGE_MASK) == 0);
 
 	assert(size != 0);

From b6c0867142ced63a21003b2f449b20b248e1cc4a Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 3 Oct 2016 10:37:12 -0700
Subject: [PATCH 0392/2608] Reduce "thread.arena" mallctl contention.

This resolves #460.
---
 src/ctl.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index 535f1eab..87fd8c75 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1186,14 +1186,13 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	if (oldarena == NULL)
 		return (EAGAIN);
 
-	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	newind = oldind = oldarena->ind;
 	WRITE(newind, unsigned);
 	READ(oldind, unsigned);
 	if (newind != oldind) {
 		arena_t *newarena;
 
-		if (newind >= ctl_stats.narenas) {
+		if (newind >= narenas_total_get()) {
 			/* New arena index is out of range. */
 			ret = EFAULT;
 			goto label_return;
@@ -1218,7 +1217,6 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	return (ret);
 }
 

From af33e9a59735a2ee72132d3dd6e23fae6d296e34 Mon Sep 17 00:00:00 2001
From: Mike Hommey <mh@glandium.org>
Date: Thu, 9 Jun 2016 23:17:39 +0900
Subject: [PATCH 0393/2608] Define 64-bits atomics unconditionally

They are used on all platforms in prng.h.
---
 include/jemalloc/internal/atomic.h | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h
index 3f15ea14..3936f68b 100644
--- a/include/jemalloc/internal/atomic.h
+++ b/include/jemalloc/internal/atomic.h
@@ -66,8 +66,7 @@ void	atomic_write_u(unsigned *p, unsigned x);
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_))
 /******************************************************************************/
 /* 64-bit operations. */
-#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
-#  if (defined(__amd64__) || defined(__x86_64__))
+#if (defined(__amd64__) || defined(__x86_64__))
 JEMALLOC_INLINE uint64_t
 atomic_add_uint64(uint64_t *p, uint64_t x)
 {
@@ -125,7 +124,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
 	    : "memory" /* Clobbers. */
 	    );
 }
-#  elif (defined(JEMALLOC_C11ATOMICS))
+#elif (defined(JEMALLOC_C11ATOMICS))
 JEMALLOC_INLINE uint64_t
 atomic_add_uint64(uint64_t *p, uint64_t x)
 {
@@ -153,7 +152,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
 	volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
 	atomic_store(a, x);
 }
-#  elif (defined(JEMALLOC_ATOMIC9))
+#elif (defined(JEMALLOC_ATOMIC9))
 JEMALLOC_INLINE uint64_t
 atomic_add_uint64(uint64_t *p, uint64_t x)
 {
@@ -193,7 +192,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
 
 	atomic_store_rel_long(p, x);
 }
-#  elif (defined(JEMALLOC_OSATOMIC))
+#elif (defined(JEMALLOC_OSATOMIC))
 JEMALLOC_INLINE uint64_t
 atomic_add_uint64(uint64_t *p, uint64_t x)
 {
@@ -225,7 +224,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
 		o = atomic_read_uint64(p);
 	} while (atomic_cas_uint64(p, o, x));
 }
-#  elif (defined(_MSC_VER))
+#elif (defined(_MSC_VER))
 JEMALLOC_INLINE uint64_t
 atomic_add_uint64(uint64_t *p, uint64_t x)
 {
@@ -255,7 +254,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
 
 	InterlockedExchange64(p, x);
 }
-#  elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || \
+#elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || \
     defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8))
 JEMALLOC_INLINE uint64_t
 atomic_add_uint64(uint64_t *p, uint64_t x)
@@ -284,9 +283,8 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
 
 	__sync_lock_test_and_set(p, x);
 }
-#  else
-#    error "Missing implementation for 64-bit atomic operations"
-#  endif
+#else
+#  error "Missing implementation for 64-bit atomic operations"
 #endif
 
 /******************************************************************************/

From c19b48fe73b18eb39182e15cb31f4695a7e10b7f Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 12 May 2016 15:06:50 -0700
Subject: [PATCH 0394/2608] Fix a typo.

---
 ChangeLog | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ChangeLog b/ChangeLog
index ed62e0e7..532255d1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -19,7 +19,7 @@ brevity.  Much more detail can be found in the git revision history:
 
   New features:
   - Add the arena.<i>.reset mallctl, which makes it possible to discard all of
-    an arena's allocations in a single operation.  (@jasone@)
+    an arena's allocations in a single operation.  (@jasone)
   - Add the stats.retained and stats.arenas.<i>.retained statistics.  (@jasone)
   - Add the --with-version configure option.  (@jasone)
   - Support --with-lg-page values larger than actual page size.  (@jasone)

From bcd5424b1c5361534b3c535326d0b87f223a42e1 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 12 May 2016 21:07:08 -0700
Subject: [PATCH 0395/2608] Use TSDN_NULL rather than NULL as appropriate.

---
 include/jemalloc/internal/mb.h |  4 ++--
 src/jemalloc.c                 |  4 ++--
 src/tsd.c                      | 10 +++++-----
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/mb.h b/include/jemalloc/internal/mb.h
index 437c86f7..5384728f 100644
--- a/include/jemalloc/internal/mb.h
+++ b/include/jemalloc/internal/mb.h
@@ -105,8 +105,8 @@ mb_write(void)
 	malloc_mutex_t mtx;
 
 	malloc_mutex_init(&mtx, "mb", WITNESS_RANK_OMIT);
-	malloc_mutex_lock(NULL, &mtx);
-	malloc_mutex_unlock(NULL, &mtx);
+	malloc_mutex_lock(TSDN_NULL, &mtx);
+	malloc_mutex_unlock(TSDN_NULL, &mtx);
 }
 #endif
 #endif
diff --git a/src/jemalloc.c b/src/jemalloc.c
index c1ecc73c..0d776b69 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1252,9 +1252,9 @@ malloc_init_hard_needed(void)
 	if (malloc_initializer != NO_INITIALIZER && !IS_INITIALIZER) {
 		/* Busy-wait until the initializing thread completes. */
 		do {
-			malloc_mutex_unlock(NULL, &init_lock);
+			malloc_mutex_unlock(TSDN_NULL, &init_lock);
 			CPU_SPINWAIT;
-			malloc_mutex_lock(NULL, &init_lock);
+			malloc_mutex_lock(TSDN_NULL, &init_lock);
 		} while (!malloc_initialized());
 		return (false);
 	}
diff --git a/src/tsd.c b/src/tsd.c
index aeaa5e18..ec69a51c 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -171,10 +171,10 @@ tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block)
 	tsd_init_block_t *iter;
 
 	/* Check whether this thread has already inserted into the list. */
-	malloc_mutex_lock(NULL, &head->lock);
+	malloc_mutex_lock(TSDN_NULL, &head->lock);
 	ql_foreach(iter, &head->blocks, link) {
 		if (iter->thread == self) {
-			malloc_mutex_unlock(NULL, &head->lock);
+			malloc_mutex_unlock(TSDN_NULL, &head->lock);
 			return (iter->data);
 		}
 	}
@@ -182,7 +182,7 @@ tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block)
 	ql_elm_new(block, link);
 	block->thread = self;
 	ql_tail_insert(&head->blocks, block, link);
-	malloc_mutex_unlock(NULL, &head->lock);
+	malloc_mutex_unlock(TSDN_NULL, &head->lock);
 	return (NULL);
 }
 
@@ -190,8 +190,8 @@ void
 tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block)
 {
 
-	malloc_mutex_lock(NULL, &head->lock);
+	malloc_mutex_lock(TSDN_NULL, &head->lock);
 	ql_remove(&head->blocks, block, link);
-	malloc_mutex_unlock(NULL, &head->lock);
+	malloc_mutex_unlock(TSDN_NULL, &head->lock);
 }
 #endif

From 1abb49f09d98e265ad92a831a056ccdfb4cf6041 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 17 Apr 2016 16:16:11 -0700
Subject: [PATCH 0396/2608] Implement pz2ind(), pind2sz(), and psz2u().

These compute size classes and indices similarly to size2index(),
index2size() and s2u(), respectively, but using the subset of size
classes that are multiples of the page size.  Note that pszind_t and
szind_t are not interchangeable.
---
 .../jemalloc/internal/jemalloc_internal.h.in  | 80 ++++++++++++++--
 include/jemalloc/internal/private_symbols.txt |  3 +
 include/jemalloc/internal/size_classes.sh     | 46 ++++++++--
 src/arena.c                                   |  4 +-
 src/jemalloc.c                                |  4 +-
 test/unit/size_classes.c                      | 92 +++++++++++++++++--
 6 files changed, 202 insertions(+), 27 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 6a0aa005..76dff3fe 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -187,6 +187,9 @@ static const bool config_cache_oblivious =
 
 #include "jemalloc/internal/jemalloc_internal_macros.h"
 
+/* Page size index type. */
+typedef unsigned pszind_t;
+
 /* Size class index type. */
 typedef unsigned szind_t;
 
@@ -545,6 +548,9 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/huge.h"
 
 #ifndef JEMALLOC_ENABLE_INLINE
+pszind_t	psz2ind(size_t psz);
+size_t	pind2sz(pszind_t pind);
+size_t	psz2u(size_t psz);
 szind_t	size2index_compute(size_t size);
 szind_t	size2index_lookup(size_t size);
 szind_t	size2index(size_t size);
@@ -565,10 +571,74 @@ ticker_t	*decay_ticker_get(tsd_t *tsd, unsigned ind);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
+JEMALLOC_INLINE pszind_t
+psz2ind(size_t psz)
+{
+
+	if (unlikely(psz > HUGE_MAXCLASS))
+		return (NPSIZES);
+	{
+		pszind_t x = lg_floor((psz<<1)-1);
+		pszind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_PAGE) ? 0 : x -
+		    (LG_SIZE_CLASS_GROUP + LG_PAGE);
+		pszind_t grp = shift << LG_SIZE_CLASS_GROUP;
+
+		pszind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ?
+		    LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1;
+
+		size_t delta_inverse_mask = ZI(-1) << lg_delta;
+		pszind_t mod = ((((psz-1) & delta_inverse_mask) >> lg_delta)) &
+		    ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
+
+		pszind_t ind = grp + mod;
+		return (ind);
+	}
+}
+
+JEMALLOC_INLINE size_t
+pind2sz(pszind_t pind)
+{
+
+	{
+		size_t grp = pind >> LG_SIZE_CLASS_GROUP;
+		size_t mod = pind & ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
+
+		size_t grp_size_mask = ~((!!grp)-1);
+		size_t grp_size = ((ZU(1) << (LG_PAGE +
+		    (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask;
+
+		size_t shift = (grp == 0) ? 1 : grp;
+		size_t lg_delta = shift + (LG_PAGE-1);
+		size_t mod_size = (mod+1) << lg_delta;
+
+		size_t sz = grp_size + mod_size;
+		return (sz);
+	}
+}
+
+JEMALLOC_INLINE size_t
+psz2u(size_t psz)
+{
+
+	if (unlikely(psz > HUGE_MAXCLASS))
+		return (0);
+	{
+		size_t x = lg_floor((psz<<1)-1);
+		size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ?
+		    LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1;
+		size_t delta = ZU(1) << lg_delta;
+		size_t delta_mask = delta - 1;
+		size_t usize = (psz + delta_mask) & ~delta_mask;
+		return (usize);
+	}
+}
+
 JEMALLOC_INLINE szind_t
 size2index_compute(size_t size)
 {
 
+	if (unlikely(size > HUGE_MAXCLASS))
+		return (NSIZES);
 #if (NTBINS != 0)
 	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
 		szind_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
@@ -577,9 +647,7 @@ size2index_compute(size_t size)
 	}
 #endif
 	{
-		szind_t x = unlikely(ZI(size) < 0) ? ((size<<1) ?
-		    (ZU(1)<<(LG_SIZEOF_PTR+3)) : ((ZU(1)<<(LG_SIZEOF_PTR+3))-1))
-		    : lg_floor((size<<1)-1);
+		szind_t x = lg_floor((size<<1)-1);
 		szind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 :
 		    x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM);
 		szind_t grp = shift << LG_SIZE_CLASS_GROUP;
@@ -665,6 +733,8 @@ JEMALLOC_ALWAYS_INLINE size_t
 s2u_compute(size_t size)
 {
 
+	if (unlikely(size > HUGE_MAXCLASS))
+		return (0);
 #if (NTBINS > 0)
 	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
 		size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
@@ -674,9 +744,7 @@ s2u_compute(size_t size)
 	}
 #endif
 	{
-		size_t x = unlikely(ZI(size) < 0) ? ((size<<1) ?
-		    (ZU(1)<<(LG_SIZEOF_PTR+3)) : ((ZU(1)<<(LG_SIZEOF_PTR+3))-1))
-		    : lg_floor((size<<1)-1);
+		size_t x = lg_floor((size<<1)-1);
 		size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
 		    ?  LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
 		size_t delta = ZU(1) << lg_delta;
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index f2b6a55d..a9d69739 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -401,6 +401,7 @@ pages_map
 pages_purge
 pages_trim
 pages_unmap
+pind2sz
 pow2_ceil_u32
 pow2_ceil_u64
 pow2_ceil_zu
@@ -454,6 +455,8 @@ prof_thread_active_init_set
 prof_thread_active_set
 prof_thread_name_get
 prof_thread_name_set
+psz2ind
+psz2u
 purge_mode_names
 quarantine
 quarantine_alloc_hook
diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh
index 2b0ca29a..f6fbce4e 100755
--- a/include/jemalloc/internal/size_classes.sh
+++ b/include/jemalloc/internal/size_classes.sh
@@ -48,6 +48,21 @@ size_class() {
   lg_p=$5
   lg_kmax=$6
 
+  if [ ${lg_delta} -ge ${lg_p} ] ; then
+    psz="yes"
+  else
+    pow2 ${lg_p}; p=${pow2_result}
+    pow2 ${lg_grp}; grp=${pow2_result}
+    pow2 ${lg_delta}; delta=${pow2_result}
+    sz=$((${grp} + ${delta} * ${ndelta}))
+    npgs=$((${sz} / ${p}))
+    if [ ${sz} -eq $((${npgs} * ${p})) ] ; then
+      psz="yes"
+    else
+      psz="no"
+    fi
+  fi
+
   lg ${ndelta}; lg_ndelta=${lg_result}; pow2 ${lg_ndelta}
   if [ ${pow2_result} -lt ${ndelta} ] ; then
     rem="yes"
@@ -74,14 +89,15 @@ size_class() {
   else
     lg_delta_lookup="no"
   fi
-  printf '    SC(%3d, %6d, %8d, %6d, %3s, %2s) \\\n' ${index} ${lg_grp} ${lg_delta} ${ndelta} ${bin} ${lg_delta_lookup}
+  printf '    SC(%3d, %6d, %8d, %6d, %3s, %3s, %2s) \\\n' ${index} ${lg_grp} ${lg_delta} ${ndelta} ${psz} ${bin} ${lg_delta_lookup}
   # Defined upon return:
-  # - lg_delta_lookup (${lg_delta} or "no")
+  # - psz ("yes" or "no")
   # - bin ("yes" or "no")
+  # - lg_delta_lookup (${lg_delta} or "no")
 }
 
 sep_line() {
-  echo "                                               \\"
+  echo "                                                         \\"
 }
 
 size_classes() {
@@ -95,12 +111,13 @@ size_classes() {
   pow2 ${lg_g}; g=${pow2_result}
 
   echo "#define	SIZE_CLASSES \\"
-  echo "  /* index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup */ \\"
+  echo "  /* index, lg_grp, lg_delta, ndelta, psz, bin, lg_delta_lookup */ \\"
 
   ntbins=0
   nlbins=0
   lg_tiny_maxclass='"NA"'
   nbins=0
+  npsizes=0
 
   # Tiny size classes.
   ndelta=0
@@ -112,6 +129,9 @@ size_classes() {
     if [ ${lg_delta_lookup} != "no" ] ; then
       nlbins=$((${index} + 1))
     fi
+    if [ ${psz} = "yes" ] ; then
+      npsizes=$((${npsizes} + 1))
+    fi
     if [ ${bin} != "no" ] ; then
       nbins=$((${index} + 1))
     fi
@@ -133,11 +153,17 @@ size_classes() {
     index=$((${index} + 1))
     lg_grp=$((${lg_grp} + 1))
     lg_delta=$((${lg_delta} + 1))
+    if [ ${psz} = "yes" ] ; then
+      npsizes=$((${npsizes} + 1))
+    fi
   fi
   while [ ${ndelta} -lt ${g} ] ; do
     size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
     index=$((${index} + 1))
     ndelta=$((${ndelta} + 1))
+    if [ ${psz} = "yes" ] ; then
+      npsizes=$((${npsizes} + 1))
+    fi
   done
 
   # All remaining groups.
@@ -157,6 +183,9 @@ size_classes() {
         # Final written value is correct:
         lookup_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))"
       fi
+      if [ ${psz} = "yes" ] ; then
+        npsizes=$((${npsizes} + 1))
+      fi
       if [ ${bin} != "no" ] ; then
         nbins=$((${index} + 1))
         # Final written value is correct:
@@ -183,6 +212,7 @@ size_classes() {
   # - nlbins
   # - nbins
   # - nsizes
+  # - npsizes
   # - lg_tiny_maxclass
   # - lookup_maxclass
   # - small_maxclass
@@ -200,13 +230,13 @@ cat <<EOF
  * be defined prior to inclusion, and it in turn defines:
  *
  *   LG_SIZE_CLASS_GROUP: Lg of size class count for each size doubling.
- *   SIZE_CLASSES: Complete table of
- *                 SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup)
- *                 tuples.
+ *   SIZE_CLASSES: Complete table of SC(index, lg_grp, lg_delta, ndelta, psz,
+ *                 bin, lg_delta_lookup) tuples.
  *     index: Size class index.
  *     lg_grp: Lg group base size (no deltas added).
  *     lg_delta: Lg delta to previous size class.
  *     ndelta: Delta multiplier.  size == 1<<lg_grp + ndelta<<lg_delta
+ *     psz: 'yes' if a multiple of the page size, 'no' otherwise.
  *     bin: 'yes' if a small bin size class, 'no' otherwise.
  *     lg_delta_lookup: Same as lg_delta if a lookup table size class, 'no'
  *                      otherwise.
@@ -214,6 +244,7 @@ cat <<EOF
  *   NLBINS: Number of bins supported by the lookup table.
  *   NBINS: Number of small size class bins.
  *   NSIZES: Number of size classes.
+ *   NPSIZES: Number of size classes that are a multiple of (1U << LG_PAGE).
  *   LG_TINY_MAXCLASS: Lg of maximum tiny size class.
  *   LOOKUP_MAXCLASS: Maximum size class included in lookup table.
  *   SMALL_MAXCLASS: Maximum small size class.
@@ -238,6 +269,7 @@ for lg_z in ${lg_zarr} ; do
         echo "#define	NLBINS			${nlbins}"
         echo "#define	NBINS			${nbins}"
         echo "#define	NSIZES			${nsizes}"
+        echo "#define	NPSIZES			${npsizes}"
         echo "#define	LG_TINY_MAXCLASS	${lg_tiny_maxclass}"
         echo "#define	LOOKUP_MAXCLASS		${lookup_maxclass}"
         echo "#define	SMALL_MAXCLASS		${small_maxclass}"
diff --git a/src/arena.c b/src/arena.c
index ce62590b..a28c0772 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -3711,7 +3711,7 @@ bin_info_init(void)
 	bin_info_run_size_calc(bin_info);				\
 	bitmap_info_init(&bin_info->bitmap_info, bin_info->nregs);
 #define	BIN_INFO_INIT_bin_no(index, size)
-#define	SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup)	\
+#define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, lg_delta_lookup)	\
 	BIN_INFO_INIT_bin_##bin(index, (ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta))
 	SIZE_CLASSES
 #undef BIN_INFO_INIT_bin_yes
@@ -3735,7 +3735,7 @@ small_run_size_init(void)
 		small_run_tab[bin_info->run_size >> LG_PAGE] = true;	\
 	}
 #define	TAB_INIT_bin_no(index, size)
-#define	SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup)	\
+#define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, lg_delta_lookup)	\
 	TAB_INIT_bin_##bin(index, (ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta))
 	SIZE_CLASSES
 #undef TAB_INIT_bin_yes
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 0d776b69..46457834 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -88,7 +88,7 @@ static uint8_t	malloc_slow_flags;
 /* Last entry for overflow detection only.  */
 JEMALLOC_ALIGNED(CACHELINE)
 const size_t	index2size_tab[NSIZES+1] = {
-#define	SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \
+#define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, lg_delta_lookup)	\
 	((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta)),
 	SIZE_CLASSES
 #undef SC
@@ -161,7 +161,7 @@ const uint8_t	size2index_tab[] = {
 #define	S2B_11(i)	S2B_10(i) S2B_10(i)
 #endif
 #define	S2B_no(i)
-#define	SC(index, lg_grp, lg_delta, ndelta, bin, lg_delta_lookup) \
+#define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, lg_delta_lookup)	\
 	S2B_##lg_delta_lookup(index)
 	SIZE_CLASSES
 #undef S2B_3
diff --git a/test/unit/size_classes.c b/test/unit/size_classes.c
index 2e2caaf5..4e1e0ce4 100644
--- a/test/unit/size_classes.c
+++ b/test/unit/size_classes.c
@@ -80,25 +80,96 @@ TEST_BEGIN(test_size_classes)
 }
 TEST_END
 
+TEST_BEGIN(test_psize_classes)
+{
+	size_t size_class, max_size_class;
+	pszind_t pind, max_pind;
+
+	max_size_class = get_max_size_class();
+	max_pind = psz2ind(max_size_class);
+
+	for (pind = 0, size_class = pind2sz(pind); pind < max_pind ||
+	    size_class < max_size_class; pind++, size_class =
+	    pind2sz(pind)) {
+		assert_true(pind < max_pind,
+		    "Loop conditionals should be equivalent; pind=%u, "
+		    "size_class=%zu (%#zx)", pind, size_class, size_class);
+		assert_true(size_class < max_size_class,
+		    "Loop conditionals should be equivalent; pind=%u, "
+		    "size_class=%zu (%#zx)", pind, size_class, size_class);
+
+		assert_u_eq(pind, psz2ind(size_class),
+		    "psz2ind() does not reverse pind2sz(): pind=%u -->"
+		    " size_class=%zu --> pind=%u --> size_class=%zu", pind,
+		    size_class, psz2ind(size_class),
+		    pind2sz(psz2ind(size_class)));
+		assert_zu_eq(size_class, pind2sz(psz2ind(size_class)),
+		    "pind2sz() does not reverse psz2ind(): pind=%u -->"
+		    " size_class=%zu --> pind=%u --> size_class=%zu", pind,
+		    size_class, psz2ind(size_class),
+		    pind2sz(psz2ind(size_class)));
+
+		assert_u_eq(pind+1, psz2ind(size_class+1),
+		    "Next size_class does not round up properly");
+
+		assert_zu_eq(size_class, (pind > 0) ?
+		    psz2u(pind2sz(pind-1)+1) : psz2u(1),
+		    "psz2u() does not round up to size class");
+		assert_zu_eq(size_class, psz2u(size_class-1),
+		    "psz2u() does not round up to size class");
+		assert_zu_eq(size_class, psz2u(size_class),
+		    "psz2u() does not compute same size class");
+		assert_zu_eq(psz2u(size_class+1), pind2sz(pind+1),
+		    "psz2u() does not round up to next size class");
+	}
+
+	assert_u_eq(pind, psz2ind(pind2sz(pind)),
+	    "psz2ind() does not reverse pind2sz()");
+	assert_zu_eq(max_size_class, pind2sz(psz2ind(max_size_class)),
+	    "pind2sz() does not reverse psz2ind()");
+
+	assert_zu_eq(size_class, psz2u(pind2sz(pind-1)+1),
+	    "psz2u() does not round up to size class");
+	assert_zu_eq(size_class, psz2u(size_class-1),
+	    "psz2u() does not round up to size class");
+	assert_zu_eq(size_class, psz2u(size_class),
+	    "psz2u() does not compute same size class");
+}
+TEST_END
+
 TEST_BEGIN(test_overflow)
 {
 	size_t max_size_class;
 
 	max_size_class = get_max_size_class();
 
-	assert_u_ge(size2index(max_size_class+1), NSIZES,
-	    "size2index() should return >= NSIZES on overflow");
-	assert_u_ge(size2index(ZU(PTRDIFF_MAX)+1), NSIZES,
-	    "size2index() should return >= NSIZES on overflow");
-	assert_u_ge(size2index(SIZE_T_MAX), NSIZES,
-	    "size2index() should return >= NSIZES on overflow");
+	assert_u_eq(size2index(max_size_class+1), NSIZES,
+	    "size2index() should return NSIZES on overflow");
+	assert_u_eq(size2index(ZU(PTRDIFF_MAX)+1), NSIZES,
+	    "size2index() should return NSIZES on overflow");
+	assert_u_eq(size2index(SIZE_T_MAX), NSIZES,
+	    "size2index() should return NSIZES on overflow");
 
-	assert_zu_gt(s2u(max_size_class+1), HUGE_MAXCLASS,
-	    "s2u() should return > HUGE_MAXCLASS for unsupported size");
-	assert_zu_gt(s2u(ZU(PTRDIFF_MAX)+1), HUGE_MAXCLASS,
-	    "s2u() should return > HUGE_MAXCLASS for unsupported size");
+	assert_zu_eq(s2u(max_size_class+1), 0,
+	    "s2u() should return 0 for unsupported size");
+	assert_zu_eq(s2u(ZU(PTRDIFF_MAX)+1), 0,
+	    "s2u() should return 0 for unsupported size");
 	assert_zu_eq(s2u(SIZE_T_MAX), 0,
 	    "s2u() should return 0 on overflow");
+
+	assert_u_eq(psz2ind(max_size_class+1), NPSIZES,
+	    "psz2ind() should return NPSIZES on overflow");
+	assert_u_eq(psz2ind(ZU(PTRDIFF_MAX)+1), NPSIZES,
+	    "psz2ind() should return NPSIZES on overflow");
+	assert_u_eq(psz2ind(SIZE_T_MAX), NPSIZES,
+	    "psz2ind() should return NPSIZES on overflow");
+
+	assert_zu_eq(psz2u(max_size_class+1), 0,
+	    "psz2u() should return 0 for unsupported size");
+	assert_zu_eq(psz2u(ZU(PTRDIFF_MAX)+1), 0,
+	    "psz2u() should return 0 for unsupported size");
+	assert_zu_eq(psz2u(SIZE_T_MAX), 0,
+	    "psz2u() should return 0 on overflow");
 }
 TEST_END
 
@@ -108,5 +179,6 @@ main(void)
 
 	return (test(
 	    test_size_classes,
+	    test_psize_classes,
 	    test_overflow));
 }

From f193fd80cf1f99bce2bc9f5f4a8b149219965da2 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 8 Apr 2016 14:17:57 -0700
Subject: [PATCH 0397/2608] Refactor runs_avail.

Use pszind_t size classes rather than szind_t size classes, and always
reserve space for NPSIZES elements.  This removes unused heaps that are
not multiples of the page size, and adds (currently) unused heaps for
all huge size classes, with the immediate benefit that the size of
arena_t allocations is constant (no longer dependent on chunk size).
---
 include/jemalloc/internal/arena.h             |  9 +--
 .../jemalloc/internal/jemalloc_internal.h.in  | 27 +++++++-
 include/jemalloc/internal/private_symbols.txt |  3 +-
 src/arena.c                                   | 61 +++++++------------
 src/jemalloc.c                                | 17 +++++-
 test/unit/run_quantize.c                      |  2 +-
 6 files changed, 70 insertions(+), 49 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index b1de2b61..06007705 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -470,10 +470,12 @@ struct arena_s {
 	arena_bin_t		bins[NBINS];
 
 	/*
-	 * Quantized address-ordered heaps of this arena's available runs.  The
-	 * heaps are used for first-best-fit run allocation.
+	 * Size-segregated address-ordered heaps of this arena's available runs,
+	 * used for first-best-fit run allocation.  Runs are quantized, i.e.
+	 * they reside in the last heap which corresponds to a size class less
+	 * than or equal to the run size.
 	 */
-	arena_run_heap_t	runs_avail[1]; /* Dynamically sized. */
+	arena_run_heap_t	runs_avail[NPSIZES];
 };
 
 /* Used in conjunction with tsd for fast arena-related context lookup. */
@@ -505,7 +507,6 @@ extern size_t		map_bias; /* Number of arena chunk header pages. */
 extern size_t		map_misc_offset;
 extern size_t		arena_maxrun; /* Max run size for arenas. */
 extern size_t		large_maxclass; /* Max large size class. */
-extern size_t		run_quantize_max; /* Max run_quantize_*() input. */
 extern unsigned		nlclasses; /* Number of large size classes. */
 extern unsigned		nhclasses; /* Number of huge size classes. */
 
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 76dff3fe..9708df99 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -460,11 +460,16 @@ extern unsigned	narenas_auto;
  */
 extern arena_t	**arenas;
 
+/*
+ * pind2sz_tab encodes the same information as could be computed by
+ * pind2sz_compute().
+ */
+extern size_t const	pind2sz_tab[NPSIZES];
 /*
  * index2size_tab encodes the same information as could be computed (at
  * unacceptable cost in some code paths) by index2size_compute().
  */
-extern size_t const	index2size_tab[NSIZES+1];
+extern size_t const	index2size_tab[NSIZES];
 /*
  * size2index_tab is a compact lookup table that rounds request sizes up to
  * size classes.  In order to reduce cache footprint, the table is compressed,
@@ -549,6 +554,8 @@ void	jemalloc_postfork_child(void);
 
 #ifndef JEMALLOC_ENABLE_INLINE
 pszind_t	psz2ind(size_t psz);
+size_t	pind2sz_compute(pszind_t pind);
+size_t	pind2sz_lookup(pszind_t pind);
 size_t	pind2sz(pszind_t pind);
 size_t	psz2u(size_t psz);
 szind_t	size2index_compute(size_t size);
@@ -596,7 +603,7 @@ psz2ind(size_t psz)
 }
 
 JEMALLOC_INLINE size_t
-pind2sz(pszind_t pind)
+pind2sz_compute(pszind_t pind)
 {
 
 	{
@@ -616,6 +623,22 @@ pind2sz(pszind_t pind)
 	}
 }
 
+JEMALLOC_INLINE size_t
+pind2sz_lookup(pszind_t pind)
+{
+	size_t ret = (size_t)pind2sz_tab[pind];
+	assert(ret == pind2sz_compute(pind));
+	return (ret);
+}
+
+JEMALLOC_INLINE size_t
+pind2sz(pszind_t pind)
+{
+
+	assert(pind < NPSIZES);
+	return (pind2sz_lookup(pind));
+}
+
 JEMALLOC_INLINE size_t
 psz2u(size_t psz)
 {
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index a9d69739..c59f82be 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -402,6 +402,8 @@ pages_purge
 pages_trim
 pages_unmap
 pind2sz
+pind2sz_compute
+pind2sz_lookup
 pow2_ceil_u32
 pow2_ceil_u64
 pow2_ceil_zu
@@ -480,7 +482,6 @@ rtree_val_read
 rtree_val_write
 run_quantize_ceil
 run_quantize_floor
-run_quantize_max
 s2u
 s2u_compute
 s2u_lookup
diff --git a/src/arena.c b/src/arena.c
index a28c0772..990d9faf 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -21,15 +21,12 @@ size_t		map_bias;
 size_t		map_misc_offset;
 size_t		arena_maxrun; /* Max run size for arenas. */
 size_t		large_maxclass; /* Max large size class. */
-size_t		run_quantize_max; /* Max run_quantize_*() input. */
 static size_t	small_maxrun; /* Max run size for small size classes. */
 static bool	*small_run_tab; /* Valid small run page multiples. */
 static size_t	*run_quantize_floor_tab; /* run_quantize_floor() memoization. */
 static size_t	*run_quantize_ceil_tab; /* run_quantize_ceil() memoization. */
 unsigned	nlclasses; /* Number of large size classes. */
 unsigned	nhclasses; /* Number of huge size classes. */
-static szind_t	runs_avail_bias; /* Size index for first runs_avail tree. */
-static szind_t	runs_avail_nclasses; /* Number of runs_avail trees. */
 
 /******************************************************************************/
 /*
@@ -164,7 +161,7 @@ run_quantize_floor(size_t size)
 	size_t ret;
 
 	assert(size > 0);
-	assert(size <= run_quantize_max);
+	assert(size <= HUGE_MAXCLASS);
 	assert((size & PAGE_MASK) == 0);
 
 	ret = run_quantize_floor_tab[(size >> LG_PAGE) - 1];
@@ -187,7 +184,7 @@ run_quantize_ceil(size_t size)
 	size_t ret;
 
 	assert(size > 0);
-	assert(size <= run_quantize_max);
+	assert(size <= HUGE_MAXCLASS);
 	assert((size & PAGE_MASK) == 0);
 
 	ret = run_quantize_ceil_tab[(size >> LG_PAGE) - 1];
@@ -200,25 +197,15 @@ run_quantize_ceil(size_t size)
 run_quantize_t *run_quantize_ceil = JEMALLOC_N(n_run_quantize_ceil);
 #endif
 
-static arena_run_heap_t *
-arena_runs_avail_get(arena_t *arena, szind_t ind)
-{
-
-	assert(ind >= runs_avail_bias);
-	assert(ind - runs_avail_bias < runs_avail_nclasses);
-
-	return (&arena->runs_avail[ind - runs_avail_bias]);
-}
-
 static void
 arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
     size_t npages)
 {
-	szind_t ind = size2index(run_quantize_floor(arena_miscelm_size_get(
+	pszind_t pind = psz2ind(run_quantize_floor(arena_miscelm_size_get(
 	    arena_miscelm_get_const(chunk, pageind))));
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
-	arena_run_heap_insert(arena_runs_avail_get(arena, ind),
+	arena_run_heap_insert(&arena->runs_avail[pind],
 	    arena_miscelm_get_mutable(chunk, pageind));
 }
 
@@ -226,11 +213,11 @@ static void
 arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
     size_t npages)
 {
-	szind_t ind = size2index(run_quantize_floor(arena_miscelm_size_get(
+	pszind_t pind = psz2ind(run_quantize_floor(arena_miscelm_size_get(
 	    arena_miscelm_get_const(chunk, pageind))));
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
-	arena_run_heap_remove(arena_runs_avail_get(arena, ind),
+	arena_run_heap_remove(&arena->runs_avail[pind],
 	    arena_miscelm_get_mutable(chunk, pageind));
 }
 
@@ -1109,12 +1096,13 @@ arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena, void *chunk,
 static arena_run_t *
 arena_run_first_best_fit(arena_t *arena, size_t size)
 {
-	szind_t ind, i;
+	pszind_t pind, i;
 
-	ind = size2index(run_quantize_ceil(size));
-	for (i = ind; i < runs_avail_nclasses + runs_avail_bias; i++) {
+	pind = psz2ind(run_quantize_ceil(size));
+
+	for (i = pind; pind2sz(i) <= large_maxclass; i++) {
 		arena_chunk_map_misc_t *miscelm = arena_run_heap_first(
-		    arena_runs_avail_get(arena, i));
+		    &arena->runs_avail[i]);
 		if (miscelm != NULL)
 			return (&miscelm->run);
 	}
@@ -1967,7 +1955,8 @@ arena_reset(tsd_t *tsd, arena_t *arena)
 	assert(!arena->purging);
 	arena->nactive = 0;
 
-	for(i = 0; i < runs_avail_nclasses; i++)
+	for (i = 0; i < sizeof(arena->runs_avail) / sizeof(arena_run_heap_t);
+	    i++)
 		arena_run_heap_new(&arena->runs_avail[i]);
 
 	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock);
@@ -3496,23 +3485,19 @@ arena_t *
 arena_new(tsdn_t *tsdn, unsigned ind)
 {
 	arena_t *arena;
-	size_t arena_size;
 	unsigned i;
 
-	/* Compute arena size to incorporate sufficient runs_avail elements. */
-	arena_size = offsetof(arena_t, runs_avail) + (sizeof(arena_run_heap_t) *
-	    runs_avail_nclasses);
 	/*
 	 * Allocate arena, arena->lstats, and arena->hstats contiguously, mainly
 	 * because there is no way to clean up if base_alloc() OOMs.
 	 */
 	if (config_stats) {
 		arena = (arena_t *)base_alloc(tsdn,
-		    CACHELINE_CEILING(arena_size) + QUANTUM_CEILING(nlclasses *
-		    sizeof(malloc_large_stats_t) + nhclasses) *
-		    sizeof(malloc_huge_stats_t));
+		    CACHELINE_CEILING(sizeof(arena_t)) +
+		    QUANTUM_CEILING((nlclasses * sizeof(malloc_large_stats_t)) +
+		    (nhclasses * sizeof(malloc_huge_stats_t))));
 	} else
-		arena = (arena_t *)base_alloc(tsdn, arena_size);
+		arena = (arena_t *)base_alloc(tsdn, sizeof(arena_t));
 	if (arena == NULL)
 		return (NULL);
 
@@ -3524,11 +3509,11 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 	if (config_stats) {
 		memset(&arena->stats, 0, sizeof(arena_stats_t));
 		arena->stats.lstats = (malloc_large_stats_t *)((uintptr_t)arena
-		    + CACHELINE_CEILING(arena_size));
+		    + CACHELINE_CEILING(sizeof(arena_t)));
 		memset(arena->stats.lstats, 0, nlclasses *
 		    sizeof(malloc_large_stats_t));
 		arena->stats.hstats = (malloc_huge_stats_t *)((uintptr_t)arena
-		    + CACHELINE_CEILING(arena_size) +
+		    + CACHELINE_CEILING(sizeof(arena_t)) +
 		    QUANTUM_CEILING(nlclasses * sizeof(malloc_large_stats_t)));
 		memset(arena->stats.hstats, 0, nhclasses *
 		    sizeof(malloc_huge_stats_t));
@@ -3562,8 +3547,10 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 	arena->nactive = 0;
 	arena->ndirty = 0;
 
-	for(i = 0; i < runs_avail_nclasses; i++)
+	for (i = 0; i < sizeof(arena->runs_avail) / sizeof(arena_run_heap_t);
+	    i++)
 		arena_run_heap_new(&arena->runs_avail[i]);
+
 	qr_new(&arena->runs_dirty, rd_link);
 	qr_new(&arena->chunks_cache, cc_link);
 
@@ -3748,6 +3735,7 @@ small_run_size_init(void)
 static bool
 run_quantize_init(void)
 {
+	size_t run_quantize_max;
 	unsigned i;
 
 	run_quantize_max = chunksize + large_pad;
@@ -3827,9 +3815,6 @@ arena_boot(void)
 	if (run_quantize_init())
 		return (true);
 
-	runs_avail_bias = size2index(PAGE);
-	runs_avail_nclasses = size2index(run_quantize_max)+1 - runs_avail_bias;
-
 	return (false);
 }
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 46457834..b9ff65f0 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -85,14 +85,25 @@ enum {
 };
 static uint8_t	malloc_slow_flags;
 
-/* Last entry for overflow detection only.  */
 JEMALLOC_ALIGNED(CACHELINE)
-const size_t	index2size_tab[NSIZES+1] = {
+const size_t	pind2sz_tab[NPSIZES] = {
+#define	PSZ_yes(lg_grp, ndelta, lg_delta)				\
+	(((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta))),
+#define	PSZ_no(lg_grp, ndelta, lg_delta)
+#define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, lg_delta_lookup)	\
+	PSZ_##psz(lg_grp, ndelta, lg_delta)
+	SIZE_CLASSES
+#undef PSZ_yes
+#undef PSZ_no
+#undef SC
+};
+
+JEMALLOC_ALIGNED(CACHELINE)
+const size_t	index2size_tab[NSIZES] = {
 #define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, lg_delta_lookup)	\
 	((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta)),
 	SIZE_CLASSES
 #undef SC
-	ZU(0)
 };
 
 JEMALLOC_ALIGNED(CACHELINE)
diff --git a/test/unit/run_quantize.c b/test/unit/run_quantize.c
index f6a2f74f..45f32018 100644
--- a/test/unit/run_quantize.c
+++ b/test/unit/run_quantize.c
@@ -111,7 +111,7 @@ TEST_BEGIN(test_monotonic)
 
 	floor_prev = 0;
 	ceil_prev = 0;
-	for (i = 1; i < run_quantize_max >> LG_PAGE; i++) {
+	for (i = 1; i <= large_maxclass >> LG_PAGE; i++) {
 		size_t run_size, floor, ceil;
 
 		run_size = i << LG_PAGE;

From 5d8db15db91c85d47b343cfc07fc6ea736f0de48 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 8 Apr 2016 14:16:19 -0700
Subject: [PATCH 0398/2608] Simplify run quantization.

---
 include/jemalloc/internal/arena.h |   2 +-
 src/arena.c                       | 180 +++++-------------------------
 src/jemalloc.c                    |   3 +-
 3 files changed, 31 insertions(+), 154 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 06007705..fe20ab68 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -602,7 +602,7 @@ unsigned	arena_nthreads_get(arena_t *arena, bool internal);
 void	arena_nthreads_inc(arena_t *arena, bool internal);
 void	arena_nthreads_dec(arena_t *arena, bool internal);
 arena_t	*arena_new(tsdn_t *tsdn, unsigned ind);
-bool	arena_boot(void);
+void	arena_boot(void);
 void	arena_prefork0(tsdn_t *tsdn, arena_t *arena);
 void	arena_prefork1(tsdn_t *tsdn, arena_t *arena);
 void	arena_prefork2(tsdn_t *tsdn, arena_t *arena);
diff --git a/src/arena.c b/src/arena.c
index 990d9faf..522483b3 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -21,10 +21,6 @@ size_t		map_bias;
 size_t		map_misc_offset;
 size_t		arena_maxrun; /* Max run size for arenas. */
 size_t		large_maxclass; /* Max large size class. */
-static size_t	small_maxrun; /* Max run size for small size classes. */
-static bool	*small_run_tab; /* Valid small run page multiples. */
-static size_t	*run_quantize_floor_tab; /* run_quantize_floor() memoization. */
-static size_t	*run_quantize_ceil_tab; /* run_quantize_ceil() memoization. */
 unsigned	nlclasses; /* Number of large size classes. */
 unsigned	nhclasses; /* Number of huge size classes. */
 
@@ -74,83 +70,6 @@ arena_run_addr_comp(const arena_chunk_map_misc_t *a,
 ph_gen(static UNUSED, arena_run_heap_, arena_run_heap_t, arena_chunk_map_misc_t,
     ph_link, arena_run_addr_comp)
 
-static size_t
-run_quantize_floor_compute(size_t size)
-{
-	size_t qsize;
-
-	assert(size != 0);
-	assert(size == PAGE_CEILING(size));
-
-	/* Don't change sizes that are valid small run sizes. */
-	if (size <= small_maxrun && small_run_tab[size >> LG_PAGE])
-		return (size);
-
-	/*
-	 * Round down to the nearest run size that can actually be requested
-	 * during normal large allocation.  Add large_pad so that cache index
-	 * randomization can offset the allocation from the page boundary.
-	 */
-	qsize = index2size(size2index(size - large_pad + 1) - 1) + large_pad;
-	if (qsize <= SMALL_MAXCLASS + large_pad)
-		return (run_quantize_floor_compute(size - large_pad));
-	assert(qsize <= size);
-	return (qsize);
-}
-
-static size_t
-run_quantize_ceil_compute_hard(size_t size)
-{
-	size_t large_run_size_next;
-
-	assert(size != 0);
-	assert(size == PAGE_CEILING(size));
-
-	/*
-	 * Return the next quantized size greater than the input size.
-	 * Quantized sizes comprise the union of run sizes that back small
-	 * region runs, and run sizes that back large regions with no explicit
-	 * alignment constraints.
-	 */
-
-	if (size > SMALL_MAXCLASS) {
-		large_run_size_next = PAGE_CEILING(index2size(size2index(size -
-		    large_pad) + 1) + large_pad);
-	} else
-		large_run_size_next = SIZE_T_MAX;
-	if (size >= small_maxrun)
-		return (large_run_size_next);
-
-	while (true) {
-		size += PAGE;
-		assert(size <= small_maxrun);
-		if (small_run_tab[size >> LG_PAGE]) {
-			if (large_run_size_next < size)
-				return (large_run_size_next);
-			return (size);
-		}
-	}
-}
-
-static size_t
-run_quantize_ceil_compute(size_t size)
-{
-	size_t qsize = run_quantize_floor_compute(size);
-
-	if (qsize < size) {
-		/*
-		 * Skip a quantization that may have an adequately large run,
-		 * because under-sized runs may be mixed in.  This only happens
-		 * when an unusual size is requested, i.e. for aligned
-		 * allocation, and is just one of several places where linear
-		 * search would potentially find sufficiently aligned available
-		 * memory somewhere lower.
-		 */
-		qsize = run_quantize_ceil_compute_hard(qsize);
-	}
-	return (qsize);
-}
-
 #ifdef JEMALLOC_JET
 #undef run_quantize_floor
 #define	run_quantize_floor JEMALLOC_N(n_run_quantize_floor)
@@ -159,13 +78,27 @@ static size_t
 run_quantize_floor(size_t size)
 {
 	size_t ret;
+	pszind_t pind;
 
 	assert(size > 0);
 	assert(size <= HUGE_MAXCLASS);
 	assert((size & PAGE_MASK) == 0);
 
-	ret = run_quantize_floor_tab[(size >> LG_PAGE) - 1];
-	assert(ret == run_quantize_floor_compute(size));
+	assert(size != 0);
+	assert(size == PAGE_CEILING(size));
+
+	pind = psz2ind(size - large_pad + 1);
+	if (pind == 0) {
+		/*
+		 * Avoid underflow.  This short-circuit would also do the right
+		 * thing for all sizes in the range for which there are
+		 * PAGE-spaced size classes, but it's simplest to just handle
+		 * the one case that would cause erroneous results.
+		 */
+		return (size);
+	}
+	ret = pind2sz(pind - 1) + large_pad;
+	assert(ret <= size);
 	return (ret);
 }
 #ifdef JEMALLOC_JET
@@ -187,8 +120,18 @@ run_quantize_ceil(size_t size)
 	assert(size <= HUGE_MAXCLASS);
 	assert((size & PAGE_MASK) == 0);
 
-	ret = run_quantize_ceil_tab[(size >> LG_PAGE) - 1];
-	assert(ret == run_quantize_ceil_compute(size));
+	ret = run_quantize_floor(size);
+	if (ret < size) {
+		/*
+		 * Skip a quantization that may have an adequately large run,
+		 * because under-sized runs may be mixed in.  This only happens
+		 * when an unusual size is requested, i.e. for aligned
+		 * allocation, and is just one of several places where linear
+		 * search would potentially find sufficiently aligned available
+		 * memory somewhere lower.
+		 */
+		ret = pind2sz(psz2ind(ret - large_pad + 1)) + large_pad;
+	}
 	return (ret);
 }
 #ifdef JEMALLOC_JET
@@ -3680,9 +3623,6 @@ bin_info_run_size_calc(arena_bin_info_t *bin_info)
 	bin_info->reg0_offset = (uint32_t)(actual_run_size - (actual_nregs *
 	    bin_info->reg_interval) - pad_size + bin_info->redzone_size);
 
-	if (actual_run_size > small_maxrun)
-		small_maxrun = actual_run_size;
-
 	assert(bin_info->reg0_offset - bin_info->redzone_size + (bin_info->nregs
 	    * bin_info->reg_interval) + pad_size == bin_info->run_size);
 }
@@ -3706,63 +3646,7 @@ bin_info_init(void)
 #undef SC
 }
 
-static bool
-small_run_size_init(void)
-{
-
-	assert(small_maxrun != 0);
-
-	small_run_tab = (bool *)base_alloc(NULL, sizeof(bool) * (small_maxrun >>
-	    LG_PAGE));
-	if (small_run_tab == NULL)
-		return (true);
-
-#define	TAB_INIT_bin_yes(index, size) {					\
-		arena_bin_info_t *bin_info = &arena_bin_info[index];	\
-		small_run_tab[bin_info->run_size >> LG_PAGE] = true;	\
-	}
-#define	TAB_INIT_bin_no(index, size)
-#define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, lg_delta_lookup)	\
-	TAB_INIT_bin_##bin(index, (ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta))
-	SIZE_CLASSES
-#undef TAB_INIT_bin_yes
-#undef TAB_INIT_bin_no
-#undef SC
-
-	return (false);
-}
-
-static bool
-run_quantize_init(void)
-{
-	size_t run_quantize_max;
-	unsigned i;
-
-	run_quantize_max = chunksize + large_pad;
-
-	run_quantize_floor_tab = (size_t *)base_alloc(NULL, sizeof(size_t) *
-	    (run_quantize_max >> LG_PAGE));
-	if (run_quantize_floor_tab == NULL)
-		return (true);
-
-	run_quantize_ceil_tab = (size_t *)base_alloc(NULL, sizeof(size_t) *
-	    (run_quantize_max >> LG_PAGE));
-	if (run_quantize_ceil_tab == NULL)
-		return (true);
-
-	for (i = 1; i <= run_quantize_max >> LG_PAGE; i++) {
-		size_t run_size = i << LG_PAGE;
-
-		run_quantize_floor_tab[i-1] =
-		    run_quantize_floor_compute(run_size);
-		run_quantize_ceil_tab[i-1] =
-		    run_quantize_ceil_compute(run_size);
-	}
-
-	return (false);
-}
-
-bool
+void
 arena_boot(void)
 {
 	unsigned i;
@@ -3810,12 +3694,6 @@ arena_boot(void)
 	nhclasses = NSIZES - nlclasses - NBINS;
 
 	bin_info_init();
-	if (small_run_size_init())
-		return (true);
-	if (run_quantize_init())
-		return (true);
-
-	return (false);
 }
 
 void
diff --git a/src/jemalloc.c b/src/jemalloc.c
index b9ff65f0..d3bb596d 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1299,8 +1299,7 @@ malloc_init_hard_a0_locked()
 		return (true);
 	if (config_prof)
 		prof_boot1();
-	if (arena_boot())
-		return (true);
+	arena_boot();
 	if (config_tcache && tcache_boot(TSDN_NULL))
 		return (true);
 	if (malloc_mutex_init(&arenas_lock, "arenas", WITNESS_RANK_ARENAS))

From e0164bc63c25d6f7b02ef69c3e4f307ce395cf71 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 7 Oct 2016 08:47:16 -0700
Subject: [PATCH 0399/2608] Refine nstime_update().

Add missing #include <time.h>.  The critical time facilities appear to
have been transitively included via unistd.h and sys/time.h, but in
principle this omission was capable of having caused
clock_gettime(CLOCK_MONOTONIC, ...) to have been overlooked in favor of
gettimeofday(), which in turn could cause spurious non-monotonic time
updates.

Refactor nstime_get() out of nstime_update() and add configure tests for
all variants.

Add CLOCK_MONOTONIC_RAW support (Linux-specific) and
mach_absolute_time() support (OS X-specific).

Do not fall back to clock_gettime(CLOCK_REALTIME, ...).  This was a
fragile Linux-specific workaround, which we're unlikely to use at all
now that clock_gettime(CLOCK_MONOTONIC_RAW, ...) is supported, and if we
have no choice besides non-monotonic clocks, gettimeofday() is only
incrementally worse.
---
 configure.ac                                  | 49 ++++++++++--
 .../internal/jemalloc_internal_decls.h        |  4 +
 .../internal/jemalloc_internal_defs.h.in      | 15 ++++
 include/jemalloc/internal/nstime.h            |  3 -
 src/nstime.c                                  | 76 ++++++++++++-------
 5 files changed, 109 insertions(+), 38 deletions(-)

diff --git a/configure.ac b/configure.ac
index 1e85101b..0ec710a9 100644
--- a/configure.ac
+++ b/configure.ac
@@ -345,11 +345,11 @@ dnl
 dnl Define cpp macros in CPPFLAGS, rather than doing AC_DEFINE(macro), since the
 dnl definitions need to be seen before any headers are included, which is a pain
 dnl to make happen otherwise.
+CFLAGS="$CFLAGS"
 default_munmap="1"
 maps_coalesce="1"
 case "${host}" in
   *-*-darwin* | *-*-ios*)
-	CFLAGS="$CFLAGS"
 	abi="macho"
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	RPATH=""
@@ -362,30 +362,26 @@ case "${host}" in
 	sbrk_deprecated="1"
 	;;
   *-*-freebsd*)
-	CFLAGS="$CFLAGS"
 	abi="elf"
 	AC_DEFINE([JEMALLOC_SYSCTL_VM_OVERCOMMIT], [ ])
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	force_lazy_lock="1"
 	;;
   *-*-dragonfly*)
-	CFLAGS="$CFLAGS"
 	abi="elf"
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	;;
   *-*-openbsd*)
-	CFLAGS="$CFLAGS"
 	abi="elf"
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	force_tls="0"
 	;;
   *-*-bitrig*)
-	CFLAGS="$CFLAGS"
 	abi="elf"
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	;;
   *-*-linux*)
-	CFLAGS="$CFLAGS"
+	dnl secure_getenv() is exposed by _GNU_SOURCE.
 	CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE"
 	abi="elf"
 	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
@@ -404,13 +400,12 @@ case "${host}" in
 #error aout
 #endif
 ]])],
-                          [CFLAGS="$CFLAGS"; abi="elf"],
+                          [abi="elf"],
                           [abi="aout"])
 	AC_MSG_RESULT([$abi])
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	;;
   *-*-solaris2*)
-	CFLAGS="$CFLAGS"
 	abi="elf"
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	RPATH='-Wl,-R,$(1)'
@@ -1309,6 +1304,44 @@ if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then
   fi
 fi
 
+dnl check for CLOCK_MONOTONIC_RAW (Linux-specific).
+JE_COMPILABLE([clock_gettime(CLOCK_MONOTONIC_RAW, ...)], [
+#include <time.h>
+], [
+	struct timespec ts;
+
+	clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
+], [je_cv_clock_monotonic_raw])
+if test "x${je_cv_clock_monotonic_raw}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_CLOCK_MONOTONIC_RAW])
+fi
+
+dnl check for CLOCK_MONOTONIC.
+JE_COMPILABLE([clock_gettime(CLOCK_MONOTONIC, ...)], [
+#include <unistd.h>
+#include <time.h>
+], [
+	struct timespec ts;
+
+	clock_gettime(CLOCK_MONOTONIC, &ts);
+#if !defined(_POSIX_MONOTONIC_CLOCK) || _POSIX_MONOTONIC_CLOCK < 0
+#  error _POSIX_MONOTONIC_CLOCK missing/invalid
+#endif
+], [je_cv_clock_monotonic])
+if test "x${je_cv_clock_monotonic}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_CLOCK_MONOTONIC])
+fi
+
+dnl Check for mach_absolute_time().
+JE_COMPILABLE([mach_absolute_time()], [
+#include <mach/mach_time.h>
+], [
+	mach_absolute_time();
+], [je_cv_mach_absolute_time])
+if test "x${je_cv_mach_absolute_time}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_MACH_ABSOLUTE_TIME])
+fi
+
 dnl Check if the GNU-specific secure_getenv function exists.
 AC_CHECK_FUNC([secure_getenv],
               [have_secure_getenv="1"],
diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
index 2b8ca5d0..910b2fc6 100644
--- a/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -19,6 +19,10 @@
 #  include <pthread.h>
 #  include <errno.h>
 #  include <sys/time.h>
+#  include <time.h>
+#  ifdef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME
+#    include <mach/mach_time.h>
+#  endif
 #endif
 #include <sys/types.h>
 
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index cebd6a53..70b32871 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -76,6 +76,21 @@
  */
 #undef JEMALLOC_HAVE_ISSETUGID
 
+/*
+ * Defined if clock_gettime(CLOCK_MONOTONIC_RAW, ...) is available.
+ */
+#undef JEMALLOC_HAVE_CLOCK_MONOTONIC_RAW
+
+/*
+ * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available.
+ */
+#undef JEMALLOC_HAVE_CLOCK_MONOTONIC
+
+/*
+ * Defined if mach_absolute_time() is available.
+ */
+#undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME
+
 /*
  * Defined if _malloc_thread_cleanup() exists.  At least in the case of
  * FreeBSD, pthread_key_create() allocates, which if used during malloc
diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h
index dc293b73..c892bac8 100644
--- a/include/jemalloc/internal/nstime.h
+++ b/include/jemalloc/internal/nstime.h
@@ -1,9 +1,6 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_TYPES
 
-#define	JEMALLOC_CLOCK_GETTIME defined(_POSIX_MONOTONIC_CLOCK) \
-    && _POSIX_MONOTONIC_CLOCK >= 0
-
 typedef struct nstime_s nstime_t;
 
 /* Maximum supported number of seconds (~584 years). */
diff --git a/src/nstime.c b/src/nstime.c
index aad2c260..cfb1c8e1 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -97,6 +97,54 @@ nstime_divide(const nstime_t *time, const nstime_t *divisor)
 	return (time->ns / divisor->ns);
 }
 
+#ifdef _WIN32
+static void
+nstime_get(nstime_t *time)
+{
+	FILETIME ft;
+	uint64_t ticks_100ns;
+
+	GetSystemTimeAsFileTime(&ft);
+	ticks_100ns = (((uint64_t)ft.dwHighDateTime) << 32) | ft.dwLowDateTime;
+
+	nstime_init(time, ticks_100ns * 100);
+}
+#elif JEMALLOC_HAVE_CLOCK_MONOTONIC_RAW
+static void
+nstime_get(nstime_t *time)
+{
+	struct timespec ts;
+
+	clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
+	nstime_init2(time, ts.tv_sec, ts.tv_nsec);
+}
+#elif JEMALLOC_HAVE_CLOCK_MONOTONIC
+static void
+nstime_get(nstime_t *time)
+{
+	struct timespec ts;
+
+	clock_gettime(CLOCK_MONOTONIC, &ts);
+	nstime_init2(time, ts.tv_sec, ts.tv_nsec);
+}
+#elif JEMALLOC_HAVE_MACH_ABSOLUTE_TIME
+static void
+nstime_get(nstime_t *time)
+{
+
+	nstime_init(time, mach_absolute_time());
+}
+#else
+static void
+nstime_get(nstime_t *time)
+{
+	struct timeval tv;
+
+	gettimeofday(&tv, NULL);
+	nstime_init2(time, tv.tv_sec, tv.tv_usec * 1000);
+}
+#endif
+
 #ifdef JEMALLOC_JET
 #undef nstime_update
 #define	nstime_update JEMALLOC_N(n_nstime_update)
@@ -107,33 +155,7 @@ nstime_update(nstime_t *time)
 	nstime_t old_time;
 
 	nstime_copy(&old_time, time);
-
-#ifdef _WIN32
-	{
-		FILETIME ft;
-		uint64_t ticks;
-		GetSystemTimeAsFileTime(&ft);
-		ticks = (((uint64_t)ft.dwHighDateTime) << 32) |
-		    ft.dwLowDateTime;
-		time->ns = ticks * 100;
-	}
-#elif JEMALLOC_CLOCK_GETTIME
-	{
-		struct timespec ts;
-
-		if (sysconf(_SC_MONOTONIC_CLOCK) > 0)
-			clock_gettime(CLOCK_MONOTONIC, &ts);
-		else
-			clock_gettime(CLOCK_REALTIME, &ts);
-		time->ns = ts.tv_sec * BILLION + ts.tv_nsec;
-	}
-#else
-	{
-		struct timeval tv;
-		gettimeofday(&tv, NULL);
-		time->ns = tv.tv_sec * BILLION + tv.tv_usec * 1000;
-	}
-#endif
+	nstime_get(time);
 
 	/* Handle non-monotonic clocks. */
 	if (unlikely(nstime_compare(&old_time, time) > 0)) {

From b732c395b7755622dd469fb27a463adcfd3b3152 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 7 Oct 2016 08:47:16 -0700
Subject: [PATCH 0400/2608] Refine nstime_update().

Add missing #include <time.h>.  The critical time facilities appear to
have been transitively included via unistd.h and sys/time.h, but in
principle this omission was capable of having caused
clock_gettime(CLOCK_MONOTONIC, ...) to have been overlooked in favor of
gettimeofday(), which in turn could cause spurious non-monotonic time
updates.

Refactor nstime_get() out of nstime_update() and add configure tests for
all variants.

Add CLOCK_MONOTONIC_RAW support (Linux-specific) and
mach_absolute_time() support (OS X-specific).

Do not fall back to clock_gettime(CLOCK_REALTIME, ...).  This was a
fragile Linux-specific workaround, which we're unlikely to use at all
now that clock_gettime(CLOCK_MONOTONIC_RAW, ...) is supported, and if we
have no choice besides non-monotonic clocks, gettimeofday() is only
incrementally worse.
---
 configure.ac                                  | 49 ++++++++++--
 .../internal/jemalloc_internal_decls.h        |  4 +
 .../internal/jemalloc_internal_defs.h.in      | 15 ++++
 include/jemalloc/internal/nstime.h            |  3 -
 src/nstime.c                                  | 76 ++++++++++++-------
 5 files changed, 109 insertions(+), 38 deletions(-)

diff --git a/configure.ac b/configure.ac
index f1fc4935..a7da9ffe 100644
--- a/configure.ac
+++ b/configure.ac
@@ -345,11 +345,11 @@ dnl
 dnl Define cpp macros in CPPFLAGS, rather than doing AC_DEFINE(macro), since the
 dnl definitions need to be seen before any headers are included, which is a pain
 dnl to make happen otherwise.
+CFLAGS="$CFLAGS"
 default_munmap="1"
 maps_coalesce="1"
 case "${host}" in
   *-*-darwin* | *-*-ios*)
-	CFLAGS="$CFLAGS"
 	abi="macho"
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	RPATH=""
@@ -362,30 +362,26 @@ case "${host}" in
 	sbrk_deprecated="1"
 	;;
   *-*-freebsd*)
-	CFLAGS="$CFLAGS"
 	abi="elf"
 	AC_DEFINE([JEMALLOC_SYSCTL_VM_OVERCOMMIT], [ ])
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	force_lazy_lock="1"
 	;;
   *-*-dragonfly*)
-	CFLAGS="$CFLAGS"
 	abi="elf"
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	;;
   *-*-openbsd*)
-	CFLAGS="$CFLAGS"
 	abi="elf"
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	force_tls="0"
 	;;
   *-*-bitrig*)
-	CFLAGS="$CFLAGS"
 	abi="elf"
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	;;
   *-*-linux*)
-	CFLAGS="$CFLAGS"
+	dnl secure_getenv() is exposed by _GNU_SOURCE.
 	CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE"
 	abi="elf"
 	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
@@ -404,13 +400,12 @@ case "${host}" in
 #error aout
 #endif
 ]])],
-                          [CFLAGS="$CFLAGS"; abi="elf"],
+                          [abi="elf"],
                           [abi="aout"])
 	AC_MSG_RESULT([$abi])
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	;;
   *-*-solaris2*)
-	CFLAGS="$CFLAGS"
 	abi="elf"
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	RPATH='-Wl,-R,$(1)'
@@ -1339,6 +1334,44 @@ if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then
   fi
 fi
 
+dnl check for CLOCK_MONOTONIC_RAW (Linux-specific).
+JE_COMPILABLE([clock_gettime(CLOCK_MONOTONIC_RAW, ...)], [
+#include <time.h>
+], [
+	struct timespec ts;
+
+	clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
+], [je_cv_clock_monotonic_raw])
+if test "x${je_cv_clock_monotonic_raw}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_CLOCK_MONOTONIC_RAW])
+fi
+
+dnl check for CLOCK_MONOTONIC.
+JE_COMPILABLE([clock_gettime(CLOCK_MONOTONIC, ...)], [
+#include <unistd.h>
+#include <time.h>
+], [
+	struct timespec ts;
+
+	clock_gettime(CLOCK_MONOTONIC, &ts);
+#if !defined(_POSIX_MONOTONIC_CLOCK) || _POSIX_MONOTONIC_CLOCK < 0
+#  error _POSIX_MONOTONIC_CLOCK missing/invalid
+#endif
+], [je_cv_clock_monotonic])
+if test "x${je_cv_clock_monotonic}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_CLOCK_MONOTONIC])
+fi
+
+dnl Check for mach_absolute_time().
+JE_COMPILABLE([mach_absolute_time()], [
+#include <mach/mach_time.h>
+], [
+	mach_absolute_time();
+], [je_cv_mach_absolute_time])
+if test "x${je_cv_mach_absolute_time}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_MACH_ABSOLUTE_TIME])
+fi
+
 dnl Check if the GNU-specific secure_getenv function exists.
 AC_CHECK_FUNC([secure_getenv],
               [have_secure_getenv="1"],
diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
index 2b8ca5d0..910b2fc6 100644
--- a/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -19,6 +19,10 @@
 #  include <pthread.h>
 #  include <errno.h>
 #  include <sys/time.h>
+#  include <time.h>
+#  ifdef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME
+#    include <mach/mach_time.h>
+#  endif
 #endif
 #include <sys/types.h>
 
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 22396b75..d10c8a4f 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -76,6 +76,21 @@
  */
 #undef JEMALLOC_HAVE_ISSETUGID
 
+/*
+ * Defined if clock_gettime(CLOCK_MONOTONIC_RAW, ...) is available.
+ */
+#undef JEMALLOC_HAVE_CLOCK_MONOTONIC_RAW
+
+/*
+ * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available.
+ */
+#undef JEMALLOC_HAVE_CLOCK_MONOTONIC
+
+/*
+ * Defined if mach_absolute_time() is available.
+ */
+#undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME
+
 /*
  * Defined if _malloc_thread_cleanup() exists.  At least in the case of
  * FreeBSD, pthread_key_create() allocates, which if used during malloc
diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h
index dc293b73..c892bac8 100644
--- a/include/jemalloc/internal/nstime.h
+++ b/include/jemalloc/internal/nstime.h
@@ -1,9 +1,6 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_TYPES
 
-#define	JEMALLOC_CLOCK_GETTIME defined(_POSIX_MONOTONIC_CLOCK) \
-    && _POSIX_MONOTONIC_CLOCK >= 0
-
 typedef struct nstime_s nstime_t;
 
 /* Maximum supported number of seconds (~584 years). */
diff --git a/src/nstime.c b/src/nstime.c
index aad2c260..cfb1c8e1 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -97,6 +97,54 @@ nstime_divide(const nstime_t *time, const nstime_t *divisor)
 	return (time->ns / divisor->ns);
 }
 
+#ifdef _WIN32
+static void
+nstime_get(nstime_t *time)
+{
+	FILETIME ft;
+	uint64_t ticks_100ns;
+
+	GetSystemTimeAsFileTime(&ft);
+	ticks_100ns = (((uint64_t)ft.dwHighDateTime) << 32) | ft.dwLowDateTime;
+
+	nstime_init(time, ticks_100ns * 100);
+}
+#elif JEMALLOC_HAVE_CLOCK_MONOTONIC_RAW
+static void
+nstime_get(nstime_t *time)
+{
+	struct timespec ts;
+
+	clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
+	nstime_init2(time, ts.tv_sec, ts.tv_nsec);
+}
+#elif JEMALLOC_HAVE_CLOCK_MONOTONIC
+static void
+nstime_get(nstime_t *time)
+{
+	struct timespec ts;
+
+	clock_gettime(CLOCK_MONOTONIC, &ts);
+	nstime_init2(time, ts.tv_sec, ts.tv_nsec);
+}
+#elif JEMALLOC_HAVE_MACH_ABSOLUTE_TIME
+static void
+nstime_get(nstime_t *time)
+{
+
+	nstime_init(time, mach_absolute_time());
+}
+#else
+static void
+nstime_get(nstime_t *time)
+{
+	struct timeval tv;
+
+	gettimeofday(&tv, NULL);
+	nstime_init2(time, tv.tv_sec, tv.tv_usec * 1000);
+}
+#endif
+
 #ifdef JEMALLOC_JET
 #undef nstime_update
 #define	nstime_update JEMALLOC_N(n_nstime_update)
@@ -107,33 +155,7 @@ nstime_update(nstime_t *time)
 	nstime_t old_time;
 
 	nstime_copy(&old_time, time);
-
-#ifdef _WIN32
-	{
-		FILETIME ft;
-		uint64_t ticks;
-		GetSystemTimeAsFileTime(&ft);
-		ticks = (((uint64_t)ft.dwHighDateTime) << 32) |
-		    ft.dwLowDateTime;
-		time->ns = ticks * 100;
-	}
-#elif JEMALLOC_CLOCK_GETTIME
-	{
-		struct timespec ts;
-
-		if (sysconf(_SC_MONOTONIC_CLOCK) > 0)
-			clock_gettime(CLOCK_MONOTONIC, &ts);
-		else
-			clock_gettime(CLOCK_REALTIME, &ts);
-		time->ns = ts.tv_sec * BILLION + ts.tv_nsec;
-	}
-#else
-	{
-		struct timeval tv;
-		gettimeofday(&tv, NULL);
-		time->ns = tv.tv_sec * BILLION + tv.tv_usec * 1000;
-	}
-#endif
+	nstime_get(time);
 
 	/* Handle non-monotonic clocks. */
 	if (unlikely(nstime_compare(&old_time, time) > 0)) {

From ee0c74b77a24dc4fdaad2c950bcf621b6fa54095 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 10 Oct 2016 20:32:19 -0700
Subject: [PATCH 0401/2608] Refactor arena->decay_* into arena->decay.*
 (arena_decay_t).

---
 include/jemalloc/internal/arena.h | 99 +++++++++++++++++--------------
 src/arena.c                       | 76 ++++++++++++------------
 2 files changed, 91 insertions(+), 84 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 1758dd02..3830e548 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -31,6 +31,7 @@ typedef enum {
 
 typedef struct arena_slab_data_s arena_slab_data_t;
 typedef struct arena_bin_info_s arena_bin_info_t;
+typedef struct arena_decay_s arena_decay_t;
 typedef struct arena_bin_s arena_bin_t;
 typedef struct arena_s arena_t;
 typedef struct arena_tdata_s arena_tdata_t;
@@ -89,6 +90,56 @@ struct arena_bin_info_s {
 	bitmap_info_t		bitmap_info;
 };
 
+struct arena_decay_s {
+	/*
+	 * Approximate time in seconds from the creation of a set of unused
+	 * dirty pages until an equivalent set of unused dirty pages is purged
+	 * and/or reused.
+	 */
+	ssize_t			time;
+	/* decay_time / SMOOTHSTEP_NSTEPS. */
+	nstime_t		interval;
+	/*
+	 * Time at which the current decay interval logically started.  We do
+	 * not actually advance to a new epoch until sometime after it starts
+	 * because of scheduling and computation delays, and it is even possible
+	 * to completely skip epochs.  In all cases, during epoch advancement we
+	 * merge all relevant activity into the most recently recorded epoch.
+	 */
+	nstime_t		epoch;
+	/* decay_deadline randomness generator. */
+	uint64_t		jitter_state;
+	/*
+	 * Deadline for current epoch.  This is the sum of decay_interval and
+	 * per epoch jitter which is a uniform random variable in
+	 * [0..decay_interval).  Epochs always advance by precise multiples of
+	 * decay_interval, but we randomize the deadline to reduce the
+	 * likelihood of arenas purging in lockstep.
+	 */
+	nstime_t		deadline;
+	/*
+	 * Number of dirty pages at beginning of current epoch.  During epoch
+	 * advancement we use the delta between decay_ndirty and ndirty to
+	 * determine how many dirty pages, if any, were generated, and record
+	 * the result in decay_backlog.
+	 */
+	size_t			ndirty;
+	/*
+	 * Memoized result of arena_decay_backlog_npages_limit() corresponding
+	 * to the current contents of decay_backlog, i.e. the limit on how many
+	 * pages are allowed to exist for the decay epochs.
+	 */
+	size_t			backlog_npages_limit;
+	/*
+	 * Trailing log of how many unused dirty pages were generated during
+	 * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last
+	 * element is the most recent epoch.  Corresponding epoch times are
+	 * relative to decay_epoch.
+	 */
+	size_t			backlog[SMOOTHSTEP_NSTEPS];
+
+};
+
 struct arena_bin_s {
 	/* All operations on arena_bin_t fields require lock ownership. */
 	malloc_mutex_t		lock;
@@ -176,52 +227,8 @@ struct arena_s {
 	 */
 	size_t			ndirty;
 
-	/*
-	 * Approximate time in seconds from the creation of a set of unused
-	 * dirty pages until an equivalent set of unused dirty pages is purged
-	 * and/or reused.
-	 */
-	ssize_t			decay_time;
-	/* decay_time / SMOOTHSTEP_NSTEPS. */
-	nstime_t		decay_interval;
-	/*
-	 * Time at which the current decay interval logically started.  We do
-	 * not actually advance to a new epoch until sometime after it starts
-	 * because of scheduling and computation delays, and it is even possible
-	 * to completely skip epochs.  In all cases, during epoch advancement we
-	 * merge all relevant activity into the most recently recorded epoch.
-	 */
-	nstime_t		decay_epoch;
-	/* decay_deadline randomness generator. */
-	uint64_t		decay_jitter_state;
-	/*
-	 * Deadline for current epoch.  This is the sum of decay_interval and
-	 * per epoch jitter which is a uniform random variable in
-	 * [0..decay_interval).  Epochs always advance by precise multiples of
-	 * decay_interval, but we randomize the deadline to reduce the
-	 * likelihood of arenas purging in lockstep.
-	 */
-	nstime_t		decay_deadline;
-	/*
-	 * Number of dirty pages at beginning of current epoch.  During epoch
-	 * advancement we use the delta between decay_ndirty and ndirty to
-	 * determine how many dirty pages, if any, were generated, and record
-	 * the result in decay_backlog.
-	 */
-	size_t			decay_ndirty;
-	/*
-	 * Memoized result of arena_decay_backlog_npages_limit() corresponding
-	 * to the current contents of decay_backlog, i.e. the limit on how many
-	 * pages are allowed to exist for the decay epochs.
-	 */
-	size_t			decay_backlog_npages_limit;
-	/*
-	 * Trailing log of how many unused dirty pages were generated during
-	 * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last
-	 * element is the most recent epoch.  Corresponding epoch times are
-	 * relative to decay_epoch.
-	 */
-	size_t			decay_backlog[SMOOTHSTEP_NSTEPS];
+	/* Decay-based purging state. */
+	arena_decay_t		decay;
 
 	/* Extant large allocations. */
 	ql_head(extent_t)	large;
diff --git a/src/arena.c b/src/arena.c
index 2e23c0b7..9750208d 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -474,14 +474,14 @@ arena_decay_deadline_init(arena_t *arena)
 	 * Generate a new deadline that is uniformly random within the next
 	 * epoch after the current one.
 	 */
-	nstime_copy(&arena->decay_deadline, &arena->decay_epoch);
-	nstime_add(&arena->decay_deadline, &arena->decay_interval);
-	if (arena->decay_time > 0) {
+	nstime_copy(&arena->decay.deadline, &arena->decay.epoch);
+	nstime_add(&arena->decay.deadline, &arena->decay.interval);
+	if (arena->decay.time > 0) {
 		nstime_t jitter;
 
-		nstime_init(&jitter, prng_range(&arena->decay_jitter_state,
-		    nstime_ns(&arena->decay_interval), false));
-		nstime_add(&arena->decay_deadline, &jitter);
+		nstime_init(&jitter, prng_range(&arena->decay.jitter_state,
+		    nstime_ns(&arena->decay.interval), false));
+		nstime_add(&arena->decay.deadline, &jitter);
 	}
 }
 
@@ -491,7 +491,7 @@ arena_decay_deadline_reached(const arena_t *arena, const nstime_t *time)
 
 	assert(opt_purge == purge_mode_decay);
 
-	return (nstime_compare(&arena->decay_deadline, time) <= 0);
+	return (nstime_compare(&arena->decay.deadline, time) <= 0);
 }
 
 static size_t
@@ -516,7 +516,7 @@ arena_decay_backlog_npages_limit(const arena_t *arena)
 	 */
 	sum = 0;
 	for (i = 0; i < SMOOTHSTEP_NSTEPS; i++)
-		sum += arena->decay_backlog[i] * h_steps[i];
+		sum += arena->decay.backlog[i] * h_steps[i];
 	npages_limit_backlog = (size_t)(sum >> SMOOTHSTEP_BFP);
 
 	return (npages_limit_backlog);
@@ -533,39 +533,39 @@ arena_decay_epoch_advance(arena_t *arena, const nstime_t *time)
 	assert(arena_decay_deadline_reached(arena, time));
 
 	nstime_copy(&delta, time);
-	nstime_subtract(&delta, &arena->decay_epoch);
-	nadvance_u64 = nstime_divide(&delta, &arena->decay_interval);
+	nstime_subtract(&delta, &arena->decay.epoch);
+	nadvance_u64 = nstime_divide(&delta, &arena->decay.interval);
 	assert(nadvance_u64 > 0);
 
 	/* Add nadvance_u64 decay intervals to epoch. */
-	nstime_copy(&delta, &arena->decay_interval);
+	nstime_copy(&delta, &arena->decay.interval);
 	nstime_imultiply(&delta, nadvance_u64);
-	nstime_add(&arena->decay_epoch, &delta);
+	nstime_add(&arena->decay.epoch, &delta);
 
 	/* Set a new deadline. */
 	arena_decay_deadline_init(arena);
 
 	/* Update the backlog. */
 	if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) {
-		memset(arena->decay_backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
+		memset(arena->decay.backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
 		    sizeof(size_t));
 	} else {
 		size_t nadvance_z = (size_t)nadvance_u64;
 
 		assert((uint64_t)nadvance_z == nadvance_u64);
 
-		memmove(arena->decay_backlog, &arena->decay_backlog[nadvance_z],
+		memmove(arena->decay.backlog, &arena->decay.backlog[nadvance_z],
 		    (SMOOTHSTEP_NSTEPS - nadvance_z) * sizeof(size_t));
 		if (nadvance_z > 1) {
-			memset(&arena->decay_backlog[SMOOTHSTEP_NSTEPS -
+			memset(&arena->decay.backlog[SMOOTHSTEP_NSTEPS -
 			    nadvance_z], 0, (nadvance_z-1) * sizeof(size_t));
 		}
 	}
-	ndirty_delta = (arena->ndirty > arena->decay_ndirty) ? arena->ndirty -
-	    arena->decay_ndirty : 0;
-	arena->decay_ndirty = arena->ndirty;
-	arena->decay_backlog[SMOOTHSTEP_NSTEPS-1] = ndirty_delta;
-	arena->decay_backlog_npages_limit =
+	ndirty_delta = (arena->ndirty > arena->decay.ndirty) ? arena->ndirty -
+	    arena->decay.ndirty : 0;
+	arena->decay.ndirty = arena->ndirty;
+	arena->decay.backlog[SMOOTHSTEP_NSTEPS-1] = ndirty_delta;
+	arena->decay.backlog_npages_limit =
 	    arena_decay_backlog_npages_limit(arena);
 }
 
@@ -576,11 +576,11 @@ arena_decay_npages_limit(arena_t *arena)
 
 	assert(opt_purge == purge_mode_decay);
 
-	npages_limit = arena->decay_backlog_npages_limit;
+	npages_limit = arena->decay.backlog_npages_limit;
 
 	/* Add in any dirty pages created during the current epoch. */
-	if (arena->ndirty > arena->decay_ndirty)
-		npages_limit += arena->ndirty - arena->decay_ndirty;
+	if (arena->ndirty > arena->decay.ndirty)
+		npages_limit += arena->ndirty - arena->decay.ndirty;
 
 	return (npages_limit);
 }
@@ -589,19 +589,19 @@ static void
 arena_decay_init(arena_t *arena, ssize_t decay_time)
 {
 
-	arena->decay_time = decay_time;
+	arena->decay.time = decay_time;
 	if (decay_time > 0) {
-		nstime_init2(&arena->decay_interval, decay_time, 0);
-		nstime_idivide(&arena->decay_interval, SMOOTHSTEP_NSTEPS);
+		nstime_init2(&arena->decay.interval, decay_time, 0);
+		nstime_idivide(&arena->decay.interval, SMOOTHSTEP_NSTEPS);
 	}
 
-	nstime_init(&arena->decay_epoch, 0);
-	nstime_update(&arena->decay_epoch);
-	arena->decay_jitter_state = (uint64_t)(uintptr_t)arena;
+	nstime_init(&arena->decay.epoch, 0);
+	nstime_update(&arena->decay.epoch);
+	arena->decay.jitter_state = (uint64_t)(uintptr_t)arena;
 	arena_decay_deadline_init(arena);
-	arena->decay_ndirty = arena->ndirty;
-	arena->decay_backlog_npages_limit = 0;
-	memset(arena->decay_backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t));
+	arena->decay.ndirty = arena->ndirty;
+	arena->decay.backlog_npages_limit = 0;
+	memset(arena->decay.backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t));
 }
 
 static bool
@@ -621,7 +621,7 @@ arena_decay_time_get(tsdn_t *tsdn, arena_t *arena)
 	ssize_t decay_time;
 
 	malloc_mutex_lock(tsdn, &arena->lock);
-	decay_time = arena->decay_time;
+	decay_time = arena->decay.time;
 	malloc_mutex_unlock(tsdn, &arena->lock);
 
 	return (decay_time);
@@ -687,16 +687,16 @@ arena_maybe_purge_decay(tsdn_t *tsdn, arena_t *arena)
 	assert(opt_purge == purge_mode_decay);
 
 	/* Purge all or nothing if the option is disabled. */
-	if (arena->decay_time <= 0) {
-		if (arena->decay_time == 0)
+	if (arena->decay.time <= 0) {
+		if (arena->decay.time == 0)
 			arena_purge_to_limit(tsdn, arena, 0);
 		return;
 	}
 
-	nstime_copy(&time, &arena->decay_epoch);
+	nstime_copy(&time, &arena->decay.epoch);
 	if (unlikely(nstime_update(&time))) {
 		/* Time went backwards.  Force an epoch advance. */
-		nstime_copy(&time, &arena->decay_deadline);
+		nstime_copy(&time, &arena->decay.deadline);
 	}
 
 	if (arena_decay_deadline_reached(arena, &time))
@@ -1671,7 +1671,7 @@ arena_basic_stats_merge_locked(arena_t *arena, unsigned *nthreads,
 	*nthreads += arena_nthreads_get(arena, false);
 	*dss = dss_prec_names[arena->dss_prec];
 	*lg_dirty_mult = arena->lg_dirty_mult;
-	*decay_time = arena->decay_time;
+	*decay_time = arena->decay.time;
 	*nactive += arena->nactive;
 	*ndirty += arena->ndirty;
 }

From 5f11fb7d43795e9e2f5d72c8a43a042baaee9b63 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 10 Oct 2016 22:15:10 -0700
Subject: [PATCH 0402/2608] Do not advance decay epoch when time goes
 backwards.

Instead, move the epoch backward in time.  Additionally, add
nstime_monotonic() and use it in debug builds to assert that time only
goes backward if nstime_update() is using a non-monotonic time source.
---
 include/jemalloc/internal/nstime.h            |  3 +++
 include/jemalloc/internal/private_symbols.txt |  1 +
 src/arena.c                                   | 21 ++++++++++++++----
 src/nstime.c                                  | 22 +++++++++++++++++++
 test/unit/decay.c                             | 13 ++++++++++-
 test/unit/nstime.c                            |  9 +++++++-
 6 files changed, 63 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h
index c892bac8..93b27dc8 100644
--- a/include/jemalloc/internal/nstime.h
+++ b/include/jemalloc/internal/nstime.h
@@ -31,9 +31,12 @@ void	nstime_imultiply(nstime_t *time, uint64_t multiplier);
 void	nstime_idivide(nstime_t *time, uint64_t divisor);
 uint64_t	nstime_divide(const nstime_t *time, const nstime_t *divisor);
 #ifdef JEMALLOC_JET
+typedef bool (nstime_monotonic_t)(void);
+extern nstime_monotonic_t *nstime_monotonic;
 typedef bool (nstime_update_t)(nstime_t *);
 extern nstime_update_t *nstime_update;
 #else
+bool	nstime_monotonic(void);
 bool	nstime_update(nstime_t *time);
 #endif
 
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index d633272a..f9d6e9a7 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -298,6 +298,7 @@ nstime_idivide
 nstime_imultiply
 nstime_init
 nstime_init2
+nstime_monotonic
 nstime_ns
 nstime_nsec
 nstime_sec
diff --git a/src/arena.c b/src/arena.c
index 9750208d..f53a4643 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -693,10 +693,23 @@ arena_maybe_purge_decay(tsdn_t *tsdn, arena_t *arena)
 		return;
 	}
 
-	nstime_copy(&time, &arena->decay.epoch);
-	if (unlikely(nstime_update(&time))) {
-		/* Time went backwards.  Force an epoch advance. */
-		nstime_copy(&time, &arena->decay.deadline);
+	nstime_init(&time, 0);
+	nstime_update(&time);
+	if (unlikely(!nstime_monotonic() && nstime_compare(&arena->decay.epoch,
+	    &time) > 0)) {
+		/*
+		 * Time went backwards.  Move the epoch back in time, with the
+		 * expectation that time typically flows forward for long enough
+		 * periods of time that epochs complete.  Unfortunately,
+		 * this strategy is susceptible to clock jitter triggering
+		 * premature epoch advances, but clock jitter estimation and
+		 * compensation isn't feasible here because calls into this code
+		 * are event-driven.
+		 */
+		nstime_copy(&arena->decay.epoch, &time);
+	} else {
+		/* Verify that time does not go backwards. */
+		assert(nstime_compare(&arena->decay.epoch, &time) <= 0);
 	}
 
 	if (arena_decay_deadline_reached(arena, &time))
diff --git a/src/nstime.c b/src/nstime.c
index cfb1c8e1..c420c88d 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -98,6 +98,7 @@ nstime_divide(const nstime_t *time, const nstime_t *divisor)
 }
 
 #ifdef _WIN32
+#  define NSTIME_MONOTONIC true
 static void
 nstime_get(nstime_t *time)
 {
@@ -110,6 +111,7 @@ nstime_get(nstime_t *time)
 	nstime_init(time, ticks_100ns * 100);
 }
 #elif JEMALLOC_HAVE_CLOCK_MONOTONIC_RAW
+#  define NSTIME_MONOTONIC true
 static void
 nstime_get(nstime_t *time)
 {
@@ -119,6 +121,7 @@ nstime_get(nstime_t *time)
 	nstime_init2(time, ts.tv_sec, ts.tv_nsec);
 }
 #elif JEMALLOC_HAVE_CLOCK_MONOTONIC
+#  define NSTIME_MONOTONIC true
 static void
 nstime_get(nstime_t *time)
 {
@@ -128,6 +131,7 @@ nstime_get(nstime_t *time)
 	nstime_init2(time, ts.tv_sec, ts.tv_nsec);
 }
 #elif JEMALLOC_HAVE_MACH_ABSOLUTE_TIME
+#  define NSTIME_MONOTONIC true
 static void
 nstime_get(nstime_t *time)
 {
@@ -135,6 +139,7 @@ nstime_get(nstime_t *time)
 	nstime_init(time, mach_absolute_time());
 }
 #else
+#  define NSTIME_MONOTONIC false
 static void
 nstime_get(nstime_t *time)
 {
@@ -145,6 +150,23 @@ nstime_get(nstime_t *time)
 }
 #endif
 
+#ifdef JEMALLOC_JET
+#undef nstime_monotonic
+#define	nstime_monotonic JEMALLOC_N(n_nstime_monotonic)
+#endif
+bool
+nstime_monotonic(void)
+{
+
+	return (NSTIME_MONOTONIC);
+#undef NSTIME_MONOTONIC
+}
+#ifdef JEMALLOC_JET
+#undef nstime_monotonic
+#define	nstime_monotonic JEMALLOC_N(nstime_monotonic)
+nstime_monotonic_t *nstime_monotonic = JEMALLOC_N(n_nstime_monotonic);
+#endif
+
 #ifdef JEMALLOC_JET
 #undef nstime_update
 #define	nstime_update JEMALLOC_N(n_nstime_update)
diff --git a/test/unit/decay.c b/test/unit/decay.c
index 592935d3..b465a5a2 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -2,12 +2,20 @@
 
 const char *malloc_conf = "purge:decay,decay_time:1,lg_tcache_max:0";
 
+static nstime_monotonic_t *nstime_monotonic_orig;
 static nstime_update_t *nstime_update_orig;
 
 static unsigned nupdates_mock;
 static nstime_t time_mock;
 static bool nonmonotonic_mock;
 
+static bool
+nstime_monotonic_mock(void)
+{
+
+	return (false);
+}
+
 static bool
 nstime_update_mock(nstime_t *time)
 {
@@ -315,7 +323,9 @@ TEST_BEGIN(test_decay_nonmonotonic)
 	nstime_update(&time_mock);
 	nonmonotonic_mock = true;
 
+	nstime_monotonic_orig = nstime_monotonic;
 	nstime_update_orig = nstime_update;
+	nstime_monotonic = nstime_monotonic_mock;
 	nstime_update = nstime_update_mock;
 
 	for (i = 0; i < NPS; i++) {
@@ -339,8 +349,9 @@ TEST_BEGIN(test_decay_nonmonotonic)
 	    config_stats ? 0 : ENOENT, "Unexpected mallctl result");
 
 	if (config_stats)
-		assert_u64_gt(npurge1, npurge0, "Expected purging to occur");
+		assert_u64_eq(npurge0, npurge1, "Unexpected purging occurred");
 
+	nstime_monotonic = nstime_monotonic_orig;
 	nstime_update = nstime_update_orig;
 #undef NPS
 }
diff --git a/test/unit/nstime.c b/test/unit/nstime.c
index cd7d9a6d..0368bc26 100644
--- a/test/unit/nstime.c
+++ b/test/unit/nstime.c
@@ -176,6 +176,13 @@ TEST_BEGIN(test_nstime_divide)
 }
 TEST_END
 
+TEST_BEGIN(test_nstime_monotonic)
+{
+
+	nstime_monotonic();
+}
+TEST_END
+
 TEST_BEGIN(test_nstime_update)
 {
 	nstime_t nst;
@@ -198,7 +205,6 @@ TEST_BEGIN(test_nstime_update)
 		assert_d_eq(nstime_compare(&nst, &nst0), 0,
 		    "Time should not have been modified");
 	}
-
 }
 TEST_END
 
@@ -216,5 +222,6 @@ main(void)
 	    test_nstime_imultiply,
 	    test_nstime_idivide,
 	    test_nstime_divide,
+	    test_nstime_monotonic,
 	    test_nstime_update));
 }

From 94e7ffa9794792d2ec70269a0ab9c282a32aa2ec Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 10 Oct 2016 20:32:19 -0700
Subject: [PATCH 0403/2608] Refactor arena->decay_* into arena->decay.*
 (arena_decay_t).

---
 include/jemalloc/internal/arena.h | 99 +++++++++++++++++--------------
 src/arena.c                       | 76 ++++++++++++------------
 2 files changed, 91 insertions(+), 84 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index fe20ab68..048e203c 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -42,6 +42,7 @@ typedef struct arena_chunk_map_bits_s arena_chunk_map_bits_t;
 typedef struct arena_chunk_map_misc_s arena_chunk_map_misc_t;
 typedef struct arena_chunk_s arena_chunk_t;
 typedef struct arena_bin_info_s arena_bin_info_t;
+typedef struct arena_decay_s arena_decay_t;
 typedef struct arena_bin_s arena_bin_t;
 typedef struct arena_s arena_t;
 typedef struct arena_tdata_s arena_tdata_t;
@@ -257,6 +258,56 @@ struct arena_bin_info_s {
 	uint32_t		reg0_offset;
 };
 
+struct arena_decay_s {
+	/*
+	 * Approximate time in seconds from the creation of a set of unused
+	 * dirty pages until an equivalent set of unused dirty pages is purged
+	 * and/or reused.
+	 */
+	ssize_t			time;
+	/* decay_time / SMOOTHSTEP_NSTEPS. */
+	nstime_t		interval;
+	/*
+	 * Time at which the current decay interval logically started.  We do
+	 * not actually advance to a new epoch until sometime after it starts
+	 * because of scheduling and computation delays, and it is even possible
+	 * to completely skip epochs.  In all cases, during epoch advancement we
+	 * merge all relevant activity into the most recently recorded epoch.
+	 */
+	nstime_t		epoch;
+	/* decay_deadline randomness generator. */
+	uint64_t		jitter_state;
+	/*
+	 * Deadline for current epoch.  This is the sum of decay_interval and
+	 * per epoch jitter which is a uniform random variable in
+	 * [0..decay_interval).  Epochs always advance by precise multiples of
+	 * decay_interval, but we randomize the deadline to reduce the
+	 * likelihood of arenas purging in lockstep.
+	 */
+	nstime_t		deadline;
+	/*
+	 * Number of dirty pages at beginning of current epoch.  During epoch
+	 * advancement we use the delta between decay_ndirty and ndirty to
+	 * determine how many dirty pages, if any, were generated, and record
+	 * the result in decay_backlog.
+	 */
+	size_t			ndirty;
+	/*
+	 * Memoized result of arena_decay_backlog_npages_limit() corresponding
+	 * to the current contents of decay_backlog, i.e. the limit on how many
+	 * pages are allowed to exist for the decay epochs.
+	 */
+	size_t			backlog_npages_limit;
+	/*
+	 * Trailing log of how many unused dirty pages were generated during
+	 * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last
+	 * element is the most recent epoch.  Corresponding epoch times are
+	 * relative to decay_epoch.
+	 */
+	size_t			backlog[SMOOTHSTEP_NSTEPS];
+
+};
+
 struct arena_bin_s {
 	/*
 	 * All operations on runcur, runs, and stats require that lock be
@@ -394,52 +445,8 @@ struct arena_s {
 	arena_runs_dirty_link_t	runs_dirty;
 	extent_node_t		chunks_cache;
 
-	/*
-	 * Approximate time in seconds from the creation of a set of unused
-	 * dirty pages until an equivalent set of unused dirty pages is purged
-	 * and/or reused.
-	 */
-	ssize_t			decay_time;
-	/* decay_time / SMOOTHSTEP_NSTEPS. */
-	nstime_t		decay_interval;
-	/*
-	 * Time at which the current decay interval logically started.  We do
-	 * not actually advance to a new epoch until sometime after it starts
-	 * because of scheduling and computation delays, and it is even possible
-	 * to completely skip epochs.  In all cases, during epoch advancement we
-	 * merge all relevant activity into the most recently recorded epoch.
-	 */
-	nstime_t		decay_epoch;
-	/* decay_deadline randomness generator. */
-	uint64_t		decay_jitter_state;
-	/*
-	 * Deadline for current epoch.  This is the sum of decay_interval and
-	 * per epoch jitter which is a uniform random variable in
-	 * [0..decay_interval).  Epochs always advance by precise multiples of
-	 * decay_interval, but we randomize the deadline to reduce the
-	 * likelihood of arenas purging in lockstep.
-	 */
-	nstime_t		decay_deadline;
-	/*
-	 * Number of dirty pages at beginning of current epoch.  During epoch
-	 * advancement we use the delta between decay_ndirty and ndirty to
-	 * determine how many dirty pages, if any, were generated, and record
-	 * the result in decay_backlog.
-	 */
-	size_t			decay_ndirty;
-	/*
-	 * Memoized result of arena_decay_backlog_npages_limit() corresponding
-	 * to the current contents of decay_backlog, i.e. the limit on how many
-	 * pages are allowed to exist for the decay epochs.
-	 */
-	size_t			decay_backlog_npages_limit;
-	/*
-	 * Trailing log of how many unused dirty pages were generated during
-	 * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last
-	 * element is the most recent epoch.  Corresponding epoch times are
-	 * relative to decay_epoch.
-	 */
-	size_t			decay_backlog[SMOOTHSTEP_NSTEPS];
+	/* Decay-based purging state. */
+	arena_decay_t		decay;
 
 	/* Extant huge allocations. */
 	ql_head(extent_node_t)	huge;
diff --git a/src/arena.c b/src/arena.c
index 522483b3..8f2e5d84 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1187,14 +1187,14 @@ arena_decay_deadline_init(arena_t *arena)
 	 * Generate a new deadline that is uniformly random within the next
 	 * epoch after the current one.
 	 */
-	nstime_copy(&arena->decay_deadline, &arena->decay_epoch);
-	nstime_add(&arena->decay_deadline, &arena->decay_interval);
-	if (arena->decay_time > 0) {
+	nstime_copy(&arena->decay.deadline, &arena->decay.epoch);
+	nstime_add(&arena->decay.deadline, &arena->decay.interval);
+	if (arena->decay.time > 0) {
 		nstime_t jitter;
 
-		nstime_init(&jitter, prng_range(&arena->decay_jitter_state,
-		    nstime_ns(&arena->decay_interval)));
-		nstime_add(&arena->decay_deadline, &jitter);
+		nstime_init(&jitter, prng_range(&arena->decay.jitter_state,
+		    nstime_ns(&arena->decay.interval)));
+		nstime_add(&arena->decay.deadline, &jitter);
 	}
 }
 
@@ -1204,7 +1204,7 @@ arena_decay_deadline_reached(const arena_t *arena, const nstime_t *time)
 
 	assert(opt_purge == purge_mode_decay);
 
-	return (nstime_compare(&arena->decay_deadline, time) <= 0);
+	return (nstime_compare(&arena->decay.deadline, time) <= 0);
 }
 
 static size_t
@@ -1229,7 +1229,7 @@ arena_decay_backlog_npages_limit(const arena_t *arena)
 	 */
 	sum = 0;
 	for (i = 0; i < SMOOTHSTEP_NSTEPS; i++)
-		sum += arena->decay_backlog[i] * h_steps[i];
+		sum += arena->decay.backlog[i] * h_steps[i];
 	npages_limit_backlog = (size_t)(sum >> SMOOTHSTEP_BFP);
 
 	return (npages_limit_backlog);
@@ -1246,39 +1246,39 @@ arena_decay_epoch_advance(arena_t *arena, const nstime_t *time)
 	assert(arena_decay_deadline_reached(arena, time));
 
 	nstime_copy(&delta, time);
-	nstime_subtract(&delta, &arena->decay_epoch);
-	nadvance_u64 = nstime_divide(&delta, &arena->decay_interval);
+	nstime_subtract(&delta, &arena->decay.epoch);
+	nadvance_u64 = nstime_divide(&delta, &arena->decay.interval);
 	assert(nadvance_u64 > 0);
 
 	/* Add nadvance_u64 decay intervals to epoch. */
-	nstime_copy(&delta, &arena->decay_interval);
+	nstime_copy(&delta, &arena->decay.interval);
 	nstime_imultiply(&delta, nadvance_u64);
-	nstime_add(&arena->decay_epoch, &delta);
+	nstime_add(&arena->decay.epoch, &delta);
 
 	/* Set a new deadline. */
 	arena_decay_deadline_init(arena);
 
 	/* Update the backlog. */
 	if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) {
-		memset(arena->decay_backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
+		memset(arena->decay.backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
 		    sizeof(size_t));
 	} else {
 		size_t nadvance_z = (size_t)nadvance_u64;
 
 		assert((uint64_t)nadvance_z == nadvance_u64);
 
-		memmove(arena->decay_backlog, &arena->decay_backlog[nadvance_z],
+		memmove(arena->decay.backlog, &arena->decay.backlog[nadvance_z],
 		    (SMOOTHSTEP_NSTEPS - nadvance_z) * sizeof(size_t));
 		if (nadvance_z > 1) {
-			memset(&arena->decay_backlog[SMOOTHSTEP_NSTEPS -
+			memset(&arena->decay.backlog[SMOOTHSTEP_NSTEPS -
 			    nadvance_z], 0, (nadvance_z-1) * sizeof(size_t));
 		}
 	}
-	ndirty_delta = (arena->ndirty > arena->decay_ndirty) ? arena->ndirty -
-	    arena->decay_ndirty : 0;
-	arena->decay_ndirty = arena->ndirty;
-	arena->decay_backlog[SMOOTHSTEP_NSTEPS-1] = ndirty_delta;
-	arena->decay_backlog_npages_limit =
+	ndirty_delta = (arena->ndirty > arena->decay.ndirty) ? arena->ndirty -
+	    arena->decay.ndirty : 0;
+	arena->decay.ndirty = arena->ndirty;
+	arena->decay.backlog[SMOOTHSTEP_NSTEPS-1] = ndirty_delta;
+	arena->decay.backlog_npages_limit =
 	    arena_decay_backlog_npages_limit(arena);
 }
 
@@ -1289,11 +1289,11 @@ arena_decay_npages_limit(arena_t *arena)
 
 	assert(opt_purge == purge_mode_decay);
 
-	npages_limit = arena->decay_backlog_npages_limit;
+	npages_limit = arena->decay.backlog_npages_limit;
 
 	/* Add in any dirty pages created during the current epoch. */
-	if (arena->ndirty > arena->decay_ndirty)
-		npages_limit += arena->ndirty - arena->decay_ndirty;
+	if (arena->ndirty > arena->decay.ndirty)
+		npages_limit += arena->ndirty - arena->decay.ndirty;
 
 	return (npages_limit);
 }
@@ -1302,19 +1302,19 @@ static void
 arena_decay_init(arena_t *arena, ssize_t decay_time)
 {
 
-	arena->decay_time = decay_time;
+	arena->decay.time = decay_time;
 	if (decay_time > 0) {
-		nstime_init2(&arena->decay_interval, decay_time, 0);
-		nstime_idivide(&arena->decay_interval, SMOOTHSTEP_NSTEPS);
+		nstime_init2(&arena->decay.interval, decay_time, 0);
+		nstime_idivide(&arena->decay.interval, SMOOTHSTEP_NSTEPS);
 	}
 
-	nstime_init(&arena->decay_epoch, 0);
-	nstime_update(&arena->decay_epoch);
-	arena->decay_jitter_state = (uint64_t)(uintptr_t)arena;
+	nstime_init(&arena->decay.epoch, 0);
+	nstime_update(&arena->decay.epoch);
+	arena->decay.jitter_state = (uint64_t)(uintptr_t)arena;
 	arena_decay_deadline_init(arena);
-	arena->decay_ndirty = arena->ndirty;
-	arena->decay_backlog_npages_limit = 0;
-	memset(arena->decay_backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t));
+	arena->decay.ndirty = arena->ndirty;
+	arena->decay.backlog_npages_limit = 0;
+	memset(arena->decay.backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t));
 }
 
 static bool
@@ -1334,7 +1334,7 @@ arena_decay_time_get(tsdn_t *tsdn, arena_t *arena)
 	ssize_t decay_time;
 
 	malloc_mutex_lock(tsdn, &arena->lock);
-	decay_time = arena->decay_time;
+	decay_time = arena->decay.time;
 	malloc_mutex_unlock(tsdn, &arena->lock);
 
 	return (decay_time);
@@ -1400,16 +1400,16 @@ arena_maybe_purge_decay(tsdn_t *tsdn, arena_t *arena)
 	assert(opt_purge == purge_mode_decay);
 
 	/* Purge all or nothing if the option is disabled. */
-	if (arena->decay_time <= 0) {
-		if (arena->decay_time == 0)
+	if (arena->decay.time <= 0) {
+		if (arena->decay.time == 0)
 			arena_purge_to_limit(tsdn, arena, 0);
 		return;
 	}
 
-	nstime_copy(&time, &arena->decay_epoch);
+	nstime_copy(&time, &arena->decay.epoch);
 	if (unlikely(nstime_update(&time))) {
 		/* Time went backwards.  Force an epoch advance. */
-		nstime_copy(&time, &arena->decay_deadline);
+		nstime_copy(&time, &arena->decay.deadline);
 	}
 
 	if (arena_decay_deadline_reached(arena, &time))
@@ -3323,7 +3323,7 @@ arena_basic_stats_merge_locked(arena_t *arena, unsigned *nthreads,
 	*nthreads += arena_nthreads_get(arena, false);
 	*dss = dss_prec_names[arena->dss_prec];
 	*lg_dirty_mult = arena->lg_dirty_mult;
-	*decay_time = arena->decay_time;
+	*decay_time = arena->decay.time;
 	*nactive += arena->nactive;
 	*ndirty += arena->ndirty;
 }

From 45a5bf677299eb152c3c47836bd5d946234ce40e Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 10 Oct 2016 22:15:10 -0700
Subject: [PATCH 0404/2608] Do not advance decay epoch when time goes
 backwards.

Instead, move the epoch backward in time.  Additionally, add
nstime_monotonic() and use it in debug builds to assert that time only
goes backward if nstime_update() is using a non-monotonic time source.
---
 include/jemalloc/internal/nstime.h            |  3 +++
 include/jemalloc/internal/private_symbols.txt |  1 +
 src/arena.c                                   | 21 ++++++++++++++----
 src/nstime.c                                  | 22 +++++++++++++++++++
 test/unit/decay.c                             | 13 ++++++++++-
 test/unit/nstime.c                            |  9 +++++++-
 6 files changed, 63 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h
index c892bac8..93b27dc8 100644
--- a/include/jemalloc/internal/nstime.h
+++ b/include/jemalloc/internal/nstime.h
@@ -31,9 +31,12 @@ void	nstime_imultiply(nstime_t *time, uint64_t multiplier);
 void	nstime_idivide(nstime_t *time, uint64_t divisor);
 uint64_t	nstime_divide(const nstime_t *time, const nstime_t *divisor);
 #ifdef JEMALLOC_JET
+typedef bool (nstime_monotonic_t)(void);
+extern nstime_monotonic_t *nstime_monotonic;
 typedef bool (nstime_update_t)(nstime_t *);
 extern nstime_update_t *nstime_update;
 #else
+bool	nstime_monotonic(void);
 bool	nstime_update(nstime_t *time);
 #endif
 
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index c59f82be..cd6681c8 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -360,6 +360,7 @@ nstime_idivide
 nstime_imultiply
 nstime_init
 nstime_init2
+nstime_monotonic
 nstime_ns
 nstime_nsec
 nstime_sec
diff --git a/src/arena.c b/src/arena.c
index 8f2e5d84..dc04acc4 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1406,10 +1406,23 @@ arena_maybe_purge_decay(tsdn_t *tsdn, arena_t *arena)
 		return;
 	}
 
-	nstime_copy(&time, &arena->decay.epoch);
-	if (unlikely(nstime_update(&time))) {
-		/* Time went backwards.  Force an epoch advance. */
-		nstime_copy(&time, &arena->decay.deadline);
+	nstime_init(&time, 0);
+	nstime_update(&time);
+	if (unlikely(!nstime_monotonic() && nstime_compare(&arena->decay.epoch,
+	    &time) > 0)) {
+		/*
+		 * Time went backwards.  Move the epoch back in time, with the
+		 * expectation that time typically flows forward for long enough
+		 * periods of time that epochs complete.  Unfortunately,
+		 * this strategy is susceptible to clock jitter triggering
+		 * premature epoch advances, but clock jitter estimation and
+		 * compensation isn't feasible here because calls into this code
+		 * are event-driven.
+		 */
+		nstime_copy(&arena->decay.epoch, &time);
+	} else {
+		/* Verify that time does not go backwards. */
+		assert(nstime_compare(&arena->decay.epoch, &time) <= 0);
 	}
 
 	if (arena_decay_deadline_reached(arena, &time))
diff --git a/src/nstime.c b/src/nstime.c
index cfb1c8e1..c420c88d 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -98,6 +98,7 @@ nstime_divide(const nstime_t *time, const nstime_t *divisor)
 }
 
 #ifdef _WIN32
+#  define NSTIME_MONOTONIC true
 static void
 nstime_get(nstime_t *time)
 {
@@ -110,6 +111,7 @@ nstime_get(nstime_t *time)
 	nstime_init(time, ticks_100ns * 100);
 }
 #elif JEMALLOC_HAVE_CLOCK_MONOTONIC_RAW
+#  define NSTIME_MONOTONIC true
 static void
 nstime_get(nstime_t *time)
 {
@@ -119,6 +121,7 @@ nstime_get(nstime_t *time)
 	nstime_init2(time, ts.tv_sec, ts.tv_nsec);
 }
 #elif JEMALLOC_HAVE_CLOCK_MONOTONIC
+#  define NSTIME_MONOTONIC true
 static void
 nstime_get(nstime_t *time)
 {
@@ -128,6 +131,7 @@ nstime_get(nstime_t *time)
 	nstime_init2(time, ts.tv_sec, ts.tv_nsec);
 }
 #elif JEMALLOC_HAVE_MACH_ABSOLUTE_TIME
+#  define NSTIME_MONOTONIC true
 static void
 nstime_get(nstime_t *time)
 {
@@ -135,6 +139,7 @@ nstime_get(nstime_t *time)
 	nstime_init(time, mach_absolute_time());
 }
 #else
+#  define NSTIME_MONOTONIC false
 static void
 nstime_get(nstime_t *time)
 {
@@ -145,6 +150,23 @@ nstime_get(nstime_t *time)
 }
 #endif
 
+#ifdef JEMALLOC_JET
+#undef nstime_monotonic
+#define	nstime_monotonic JEMALLOC_N(n_nstime_monotonic)
+#endif
+bool
+nstime_monotonic(void)
+{
+
+	return (NSTIME_MONOTONIC);
+#undef NSTIME_MONOTONIC
+}
+#ifdef JEMALLOC_JET
+#undef nstime_monotonic
+#define	nstime_monotonic JEMALLOC_N(nstime_monotonic)
+nstime_monotonic_t *nstime_monotonic = JEMALLOC_N(n_nstime_monotonic);
+#endif
+
 #ifdef JEMALLOC_JET
 #undef nstime_update
 #define	nstime_update JEMALLOC_N(n_nstime_update)
diff --git a/test/unit/decay.c b/test/unit/decay.c
index 70a2e67a..b7e4e25e 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -2,12 +2,20 @@
 
 const char *malloc_conf = "purge:decay,decay_time:1";
 
+static nstime_monotonic_t *nstime_monotonic_orig;
 static nstime_update_t *nstime_update_orig;
 
 static unsigned nupdates_mock;
 static nstime_t time_mock;
 static bool nonmonotonic_mock;
 
+static bool
+nstime_monotonic_mock(void)
+{
+
+	return (false);
+}
+
 static bool
 nstime_update_mock(nstime_t *time)
 {
@@ -318,7 +326,9 @@ TEST_BEGIN(test_decay_nonmonotonic)
 	nstime_update(&time_mock);
 	nonmonotonic_mock = true;
 
+	nstime_monotonic_orig = nstime_monotonic;
 	nstime_update_orig = nstime_update;
+	nstime_monotonic = nstime_monotonic_mock;
 	nstime_update = nstime_update_mock;
 
 	for (i = 0; i < NPS; i++) {
@@ -342,8 +352,9 @@ TEST_BEGIN(test_decay_nonmonotonic)
 	    config_stats ? 0 : ENOENT, "Unexpected mallctl result");
 
 	if (config_stats)
-		assert_u64_gt(npurge1, npurge0, "Expected purging to occur");
+		assert_u64_eq(npurge0, npurge1, "Unexpected purging occurred");
 
+	nstime_monotonic = nstime_monotonic_orig;
 	nstime_update = nstime_update_orig;
 #undef NPS
 }
diff --git a/test/unit/nstime.c b/test/unit/nstime.c
index cd7d9a6d..0368bc26 100644
--- a/test/unit/nstime.c
+++ b/test/unit/nstime.c
@@ -176,6 +176,13 @@ TEST_BEGIN(test_nstime_divide)
 }
 TEST_END
 
+TEST_BEGIN(test_nstime_monotonic)
+{
+
+	nstime_monotonic();
+}
+TEST_END
+
 TEST_BEGIN(test_nstime_update)
 {
 	nstime_t nst;
@@ -198,7 +205,6 @@ TEST_BEGIN(test_nstime_update)
 		assert_d_eq(nstime_compare(&nst, &nst0), 0,
 		    "Time should not have been modified");
 	}
-
 }
 TEST_END
 
@@ -216,5 +222,6 @@ main(void)
 	    test_nstime_imultiply,
 	    test_nstime_idivide,
 	    test_nstime_divide,
+	    test_nstime_monotonic,
 	    test_nstime_update));
 }

From 48993ed5368506013fa1dcbc72b299409b7f5716 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 11 Oct 2016 15:28:43 -0700
Subject: [PATCH 0405/2608] Fix decay tests to all adapt to nstime_monotonic().

---
 test/unit/decay.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/test/unit/decay.c b/test/unit/decay.c
index b465a5a2..333a722c 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -7,13 +7,13 @@ static nstime_update_t *nstime_update_orig;
 
 static unsigned nupdates_mock;
 static nstime_t time_mock;
-static bool nonmonotonic_mock;
+static bool monotonic_mock;
 
 static bool
 nstime_monotonic_mock(void)
 {
 
-	return (false);
+	return (monotonic_mock);
 }
 
 static bool
@@ -21,9 +21,9 @@ nstime_update_mock(nstime_t *time)
 {
 
 	nupdates_mock++;
-	if (!nonmonotonic_mock)
+	if (monotonic_mock)
 		nstime_copy(time, &time_mock);
-	return (nonmonotonic_mock);
+	return (!monotonic_mock);
 }
 
 TEST_BEGIN(test_decay_ticks)
@@ -250,9 +250,11 @@ TEST_BEGIN(test_decay_ticker)
 	nupdates_mock = 0;
 	nstime_init(&time_mock, 0);
 	nstime_update(&time_mock);
-	nonmonotonic_mock = false;
+	monotonic_mock = true;
 
+	nstime_monotonic_orig = nstime_monotonic;
 	nstime_update_orig = nstime_update;
+	nstime_monotonic = nstime_monotonic_mock;
 	nstime_update = nstime_update_mock;
 
 	for (i = 0; i < NPS; i++) {
@@ -264,6 +266,7 @@ TEST_BEGIN(test_decay_ticker)
 		    "Expected nstime_update() to be called");
 	}
 
+	nstime_monotonic = nstime_monotonic_orig;
 	nstime_update = nstime_update_orig;
 
 	nstime_init(&time, 0);
@@ -321,7 +324,7 @@ TEST_BEGIN(test_decay_nonmonotonic)
 	nupdates_mock = 0;
 	nstime_init(&time_mock, 0);
 	nstime_update(&time_mock);
-	nonmonotonic_mock = true;
+	monotonic_mock = false;
 
 	nstime_monotonic_orig = nstime_monotonic;
 	nstime_update_orig = nstime_update;

From b4b4a77848f1c726134ace82509b6adb9f8e7055 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 11 Oct 2016 15:30:01 -0700
Subject: [PATCH 0406/2608] Fix and simplify decay-based purging.

Simplify decay-based purging attempts to only be triggered when the
epoch is advanced, rather than every time purgeable memory increases.
In a correctly functioning system (not previously the case; see below),
this only causes a behavior difference if during subsequent purge
attempts the least recently used (LRU) purgeable memory extent is
initially too large to be purged, but that memory is reused between
attempts and one or more of the next LRU purgeable memory extents are
small enough to be purged.  In practice this is an arbitrary behavior
change that is within the set of acceptable behaviors.

As for the purging fix, assure that arena->decay.ndirty is recorded
*after* the epoch advance and associated purging occurs.  Prior to this
fix, it was possible for purging during epoch advance to cause a
substantially underrepresentative (arena->ndirty - arena->decay.ndirty),
i.e. the number of dirty pages attributed to the current epoch was too
low, and a series of unintended purges could result.  This fix is also
relevant in the context of the simplification described above, but the
bug's impact would be limited to over-purging at epoch advances.
---
 include/jemalloc/internal/arena.h |  29 +++-----
 src/arena.c                       | 111 ++++++++++++++++--------------
 2 files changed, 70 insertions(+), 70 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 3830e548..3bb1f19c 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -97,7 +97,7 @@ struct arena_decay_s {
 	 * and/or reused.
 	 */
 	ssize_t			time;
-	/* decay_time / SMOOTHSTEP_NSTEPS. */
+	/* time / SMOOTHSTEP_NSTEPS. */
 	nstime_t		interval;
 	/*
 	 * Time at which the current decay interval logically started.  We do
@@ -107,37 +107,30 @@ struct arena_decay_s {
 	 * merge all relevant activity into the most recently recorded epoch.
 	 */
 	nstime_t		epoch;
-	/* decay_deadline randomness generator. */
+	/* Deadline randomness generator. */
 	uint64_t		jitter_state;
 	/*
-	 * Deadline for current epoch.  This is the sum of decay_interval and
-	 * per epoch jitter which is a uniform random variable in
-	 * [0..decay_interval).  Epochs always advance by precise multiples of
-	 * decay_interval, but we randomize the deadline to reduce the
-	 * likelihood of arenas purging in lockstep.
+	 * Deadline for current epoch.  This is the sum of interval and per
+	 * epoch jitter which is a uniform random variable in [0..interval).
+	 * Epochs always advance by precise multiples of interval, but we
+	 * randomize the deadline to reduce the likelihood of arenas purging in
+	 * lockstep.
 	 */
 	nstime_t		deadline;
 	/*
 	 * Number of dirty pages at beginning of current epoch.  During epoch
-	 * advancement we use the delta between decay_ndirty and ndirty to
-	 * determine how many dirty pages, if any, were generated, and record
-	 * the result in decay_backlog.
+	 * advancement we use the delta between arena->decay.ndirty and
+	 * arena->ndirty to determine how many dirty pages, if any, were
+	 * generated.
 	 */
 	size_t			ndirty;
-	/*
-	 * Memoized result of arena_decay_backlog_npages_limit() corresponding
-	 * to the current contents of decay_backlog, i.e. the limit on how many
-	 * pages are allowed to exist for the decay epochs.
-	 */
-	size_t			backlog_npages_limit;
 	/*
 	 * Trailing log of how many unused dirty pages were generated during
 	 * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last
 	 * element is the most recent epoch.  Corresponding epoch times are
-	 * relative to decay_epoch.
+	 * relative to epoch.
 	 */
 	size_t			backlog[SMOOTHSTEP_NSTEPS];
-
 };
 
 struct arena_bin_s {
diff --git a/src/arena.c b/src/arena.c
index f53a4643..2f0291e5 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -523,11 +523,41 @@ arena_decay_backlog_npages_limit(const arena_t *arena)
 }
 
 static void
-arena_decay_epoch_advance(arena_t *arena, const nstime_t *time)
+arena_decay_backlog_update_last(arena_t *arena)
+{
+	size_t ndirty_delta = (arena->ndirty > arena->decay.ndirty) ?
+	    arena->ndirty - arena->decay.ndirty : 0;
+	arena->decay.backlog[SMOOTHSTEP_NSTEPS-1] = ndirty_delta;
+}
+
+static void
+arena_decay_backlog_update(arena_t *arena, uint64_t nadvance_u64)
+{
+
+	if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) {
+		memset(arena->decay.backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
+		    sizeof(size_t));
+	} else {
+		size_t nadvance_z = (size_t)nadvance_u64;
+
+		assert((uint64_t)nadvance_z == nadvance_u64);
+
+		memmove(arena->decay.backlog, &arena->decay.backlog[nadvance_z],
+		    (SMOOTHSTEP_NSTEPS - nadvance_z) * sizeof(size_t));
+		if (nadvance_z > 1) {
+			memset(&arena->decay.backlog[SMOOTHSTEP_NSTEPS -
+			    nadvance_z], 0, (nadvance_z-1) * sizeof(size_t));
+		}
+	}
+
+	arena_decay_backlog_update_last(arena);
+}
+
+static void
+arena_decay_epoch_advance_helper(arena_t *arena, const nstime_t *time)
 {
 	uint64_t nadvance_u64;
 	nstime_t delta;
-	size_t ndirty_delta;
 
 	assert(opt_purge == purge_mode_decay);
 	assert(arena_decay_deadline_reached(arena, time));
@@ -546,43 +576,25 @@ arena_decay_epoch_advance(arena_t *arena, const nstime_t *time)
 	arena_decay_deadline_init(arena);
 
 	/* Update the backlog. */
-	if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) {
-		memset(arena->decay.backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
-		    sizeof(size_t));
-	} else {
-		size_t nadvance_z = (size_t)nadvance_u64;
-
-		assert((uint64_t)nadvance_z == nadvance_u64);
-
-		memmove(arena->decay.backlog, &arena->decay.backlog[nadvance_z],
-		    (SMOOTHSTEP_NSTEPS - nadvance_z) * sizeof(size_t));
-		if (nadvance_z > 1) {
-			memset(&arena->decay.backlog[SMOOTHSTEP_NSTEPS -
-			    nadvance_z], 0, (nadvance_z-1) * sizeof(size_t));
-		}
-	}
-	ndirty_delta = (arena->ndirty > arena->decay.ndirty) ? arena->ndirty -
-	    arena->decay.ndirty : 0;
-	arena->decay.ndirty = arena->ndirty;
-	arena->decay.backlog[SMOOTHSTEP_NSTEPS-1] = ndirty_delta;
-	arena->decay.backlog_npages_limit =
-	    arena_decay_backlog_npages_limit(arena);
+	arena_decay_backlog_update(arena, nadvance_u64);
 }
 
-static size_t
-arena_decay_npages_limit(arena_t *arena)
+static void
+arena_decay_epoch_advance_purge(tsdn_t *tsdn, arena_t *arena)
 {
-	size_t npages_limit;
+	size_t ndirty_limit = arena_decay_backlog_npages_limit(arena);
 
-	assert(opt_purge == purge_mode_decay);
+	if (arena->ndirty > ndirty_limit)
+		arena_purge_to_limit(tsdn, arena, ndirty_limit);
+	arena->decay.ndirty = arena->ndirty;
+}
 
-	npages_limit = arena->decay.backlog_npages_limit;
+static void
+arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, const nstime_t *time)
+{
 
-	/* Add in any dirty pages created during the current epoch. */
-	if (arena->ndirty > arena->decay.ndirty)
-		npages_limit += arena->ndirty - arena->decay.ndirty;
-
-	return (npages_limit);
+	arena_decay_epoch_advance_helper(arena, time);
+	arena_decay_epoch_advance_purge(tsdn, arena);
 }
 
 static void
@@ -600,7 +612,6 @@ arena_decay_init(arena_t *arena, ssize_t decay_time)
 	arena->decay.jitter_state = (uint64_t)(uintptr_t)arena;
 	arena_decay_deadline_init(arena);
 	arena->decay.ndirty = arena->ndirty;
-	arena->decay.backlog_npages_limit = 0;
 	memset(arena->decay.backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t));
 }
 
@@ -682,7 +693,6 @@ static void
 arena_maybe_purge_decay(tsdn_t *tsdn, arena_t *arena)
 {
 	nstime_t time;
-	size_t ndirty_limit;
 
 	assert(opt_purge == purge_mode_decay);
 
@@ -698,32 +708,29 @@ arena_maybe_purge_decay(tsdn_t *tsdn, arena_t *arena)
 	if (unlikely(!nstime_monotonic() && nstime_compare(&arena->decay.epoch,
 	    &time) > 0)) {
 		/*
-		 * Time went backwards.  Move the epoch back in time, with the
-		 * expectation that time typically flows forward for long enough
-		 * periods of time that epochs complete.  Unfortunately,
-		 * this strategy is susceptible to clock jitter triggering
-		 * premature epoch advances, but clock jitter estimation and
-		 * compensation isn't feasible here because calls into this code
-		 * are event-driven.
+		 * Time went backwards.  Move the epoch back in time and
+		 * generate a new deadline, with the expectation that time
+		 * typically flows forward for long enough periods of time that
+		 * epochs complete.  Unfortunately, this strategy is susceptible
+		 * to clock jitter triggering premature epoch advances, but
+		 * clock jitter estimation and compensation isn't feasible here
+		 * because calls into this code are event-driven.
 		 */
 		nstime_copy(&arena->decay.epoch, &time);
+		arena_decay_deadline_init(arena);
 	} else {
 		/* Verify that time does not go backwards. */
 		assert(nstime_compare(&arena->decay.epoch, &time) <= 0);
 	}
 
-	if (arena_decay_deadline_reached(arena, &time))
-		arena_decay_epoch_advance(arena, &time);
-
-	ndirty_limit = arena_decay_npages_limit(arena);
-
 	/*
-	 * Don't try to purge unless the number of purgeable pages exceeds the
-	 * current limit.
+	 * If the deadline has been reached, advance to the current epoch and
+	 * purge to the new limit if necessary.  Note that dirty pages created
+	 * during the current epoch are not subject to purge until a future
+	 * epoch, so as a result purging only happens during epoch advances.
 	 */
-	if (arena->ndirty <= ndirty_limit)
-		return;
-	arena_purge_to_limit(tsdn, arena, ndirty_limit);
+	if (arena_decay_deadline_reached(arena, &time))
+		arena_decay_epoch_advance(tsdn, arena, &time);
 }
 
 void

From a14712b4b87df5aa95446f91832ea4846a8f1475 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 11 Oct 2016 15:28:43 -0700
Subject: [PATCH 0407/2608] Fix decay tests to all adapt to nstime_monotonic().

---
 test/unit/decay.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/test/unit/decay.c b/test/unit/decay.c
index b7e4e25e..e169ae24 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -7,13 +7,13 @@ static nstime_update_t *nstime_update_orig;
 
 static unsigned nupdates_mock;
 static nstime_t time_mock;
-static bool nonmonotonic_mock;
+static bool monotonic_mock;
 
 static bool
 nstime_monotonic_mock(void)
 {
 
-	return (false);
+	return (monotonic_mock);
 }
 
 static bool
@@ -21,9 +21,9 @@ nstime_update_mock(nstime_t *time)
 {
 
 	nupdates_mock++;
-	if (!nonmonotonic_mock)
+	if (monotonic_mock)
 		nstime_copy(time, &time_mock);
-	return (nonmonotonic_mock);
+	return (!monotonic_mock);
 }
 
 TEST_BEGIN(test_decay_ticks)
@@ -253,9 +253,11 @@ TEST_BEGIN(test_decay_ticker)
 	nupdates_mock = 0;
 	nstime_init(&time_mock, 0);
 	nstime_update(&time_mock);
-	nonmonotonic_mock = false;
+	monotonic_mock = true;
 
+	nstime_monotonic_orig = nstime_monotonic;
 	nstime_update_orig = nstime_update;
+	nstime_monotonic = nstime_monotonic_mock;
 	nstime_update = nstime_update_mock;
 
 	for (i = 0; i < NPS; i++) {
@@ -267,6 +269,7 @@ TEST_BEGIN(test_decay_ticker)
 		    "Expected nstime_update() to be called");
 	}
 
+	nstime_monotonic = nstime_monotonic_orig;
 	nstime_update = nstime_update_orig;
 
 	nstime_init(&time, 0);
@@ -324,7 +327,7 @@ TEST_BEGIN(test_decay_nonmonotonic)
 	nupdates_mock = 0;
 	nstime_init(&time_mock, 0);
 	nstime_update(&time_mock);
-	nonmonotonic_mock = true;
+	monotonic_mock = false;
 
 	nstime_monotonic_orig = nstime_monotonic;
 	nstime_update_orig = nstime_update;

From d419bb09ef6700dde95c74e1f1752f81e5d15d92 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 11 Oct 2016 15:30:01 -0700
Subject: [PATCH 0408/2608] Fix and simplify decay-based purging.

Simplify decay-based purging attempts to only be triggered when the
epoch is advanced, rather than every time purgeable memory increases.
In a correctly functioning system (not previously the case; see below),
this only causes a behavior difference if during subsequent purge
attempts the least recently used (LRU) purgeable memory extent is
initially too large to be purged, but that memory is reused between
attempts and one or more of the next LRU purgeable memory extents are
small enough to be purged.  In practice this is an arbitrary behavior
change that is within the set of acceptable behaviors.

As for the purging fix, assure that arena->decay.ndirty is recorded
*after* the epoch advance and associated purging occurs.  Prior to this
fix, it was possible for purging during epoch advance to cause a
substantially underrepresentative (arena->ndirty - arena->decay.ndirty),
i.e. the number of dirty pages attributed to the current epoch was too
low, and a series of unintended purges could result.  This fix is also
relevant in the context of the simplification described above, but the
bug's impact would be limited to over-purging at epoch advances.
---
 include/jemalloc/internal/arena.h |  29 +++-----
 src/arena.c                       | 111 ++++++++++++++++--------------
 2 files changed, 70 insertions(+), 70 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 048e203c..1277d080 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -265,7 +265,7 @@ struct arena_decay_s {
 	 * and/or reused.
 	 */
 	ssize_t			time;
-	/* decay_time / SMOOTHSTEP_NSTEPS. */
+	/* time / SMOOTHSTEP_NSTEPS. */
 	nstime_t		interval;
 	/*
 	 * Time at which the current decay interval logically started.  We do
@@ -275,37 +275,30 @@ struct arena_decay_s {
 	 * merge all relevant activity into the most recently recorded epoch.
 	 */
 	nstime_t		epoch;
-	/* decay_deadline randomness generator. */
+	/* Deadline randomness generator. */
 	uint64_t		jitter_state;
 	/*
-	 * Deadline for current epoch.  This is the sum of decay_interval and
-	 * per epoch jitter which is a uniform random variable in
-	 * [0..decay_interval).  Epochs always advance by precise multiples of
-	 * decay_interval, but we randomize the deadline to reduce the
-	 * likelihood of arenas purging in lockstep.
+	 * Deadline for current epoch.  This is the sum of interval and per
+	 * epoch jitter which is a uniform random variable in [0..interval).
+	 * Epochs always advance by precise multiples of interval, but we
+	 * randomize the deadline to reduce the likelihood of arenas purging in
+	 * lockstep.
 	 */
 	nstime_t		deadline;
 	/*
 	 * Number of dirty pages at beginning of current epoch.  During epoch
-	 * advancement we use the delta between decay_ndirty and ndirty to
-	 * determine how many dirty pages, if any, were generated, and record
-	 * the result in decay_backlog.
+	 * advancement we use the delta between arena->decay.ndirty and
+	 * arena->ndirty to determine how many dirty pages, if any, were
+	 * generated.
 	 */
 	size_t			ndirty;
-	/*
-	 * Memoized result of arena_decay_backlog_npages_limit() corresponding
-	 * to the current contents of decay_backlog, i.e. the limit on how many
-	 * pages are allowed to exist for the decay epochs.
-	 */
-	size_t			backlog_npages_limit;
 	/*
 	 * Trailing log of how many unused dirty pages were generated during
 	 * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last
 	 * element is the most recent epoch.  Corresponding epoch times are
-	 * relative to decay_epoch.
+	 * relative to epoch.
 	 */
 	size_t			backlog[SMOOTHSTEP_NSTEPS];
-
 };
 
 struct arena_bin_s {
diff --git a/src/arena.c b/src/arena.c
index dc04acc4..90b9d822 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1236,11 +1236,41 @@ arena_decay_backlog_npages_limit(const arena_t *arena)
 }
 
 static void
-arena_decay_epoch_advance(arena_t *arena, const nstime_t *time)
+arena_decay_backlog_update_last(arena_t *arena)
+{
+	size_t ndirty_delta = (arena->ndirty > arena->decay.ndirty) ?
+	    arena->ndirty - arena->decay.ndirty : 0;
+	arena->decay.backlog[SMOOTHSTEP_NSTEPS-1] = ndirty_delta;
+}
+
+static void
+arena_decay_backlog_update(arena_t *arena, uint64_t nadvance_u64)
+{
+
+	if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) {
+		memset(arena->decay.backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
+		    sizeof(size_t));
+	} else {
+		size_t nadvance_z = (size_t)nadvance_u64;
+
+		assert((uint64_t)nadvance_z == nadvance_u64);
+
+		memmove(arena->decay.backlog, &arena->decay.backlog[nadvance_z],
+		    (SMOOTHSTEP_NSTEPS - nadvance_z) * sizeof(size_t));
+		if (nadvance_z > 1) {
+			memset(&arena->decay.backlog[SMOOTHSTEP_NSTEPS -
+			    nadvance_z], 0, (nadvance_z-1) * sizeof(size_t));
+		}
+	}
+
+	arena_decay_backlog_update_last(arena);
+}
+
+static void
+arena_decay_epoch_advance_helper(arena_t *arena, const nstime_t *time)
 {
 	uint64_t nadvance_u64;
 	nstime_t delta;
-	size_t ndirty_delta;
 
 	assert(opt_purge == purge_mode_decay);
 	assert(arena_decay_deadline_reached(arena, time));
@@ -1259,43 +1289,25 @@ arena_decay_epoch_advance(arena_t *arena, const nstime_t *time)
 	arena_decay_deadline_init(arena);
 
 	/* Update the backlog. */
-	if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) {
-		memset(arena->decay.backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
-		    sizeof(size_t));
-	} else {
-		size_t nadvance_z = (size_t)nadvance_u64;
-
-		assert((uint64_t)nadvance_z == nadvance_u64);
-
-		memmove(arena->decay.backlog, &arena->decay.backlog[nadvance_z],
-		    (SMOOTHSTEP_NSTEPS - nadvance_z) * sizeof(size_t));
-		if (nadvance_z > 1) {
-			memset(&arena->decay.backlog[SMOOTHSTEP_NSTEPS -
-			    nadvance_z], 0, (nadvance_z-1) * sizeof(size_t));
-		}
-	}
-	ndirty_delta = (arena->ndirty > arena->decay.ndirty) ? arena->ndirty -
-	    arena->decay.ndirty : 0;
-	arena->decay.ndirty = arena->ndirty;
-	arena->decay.backlog[SMOOTHSTEP_NSTEPS-1] = ndirty_delta;
-	arena->decay.backlog_npages_limit =
-	    arena_decay_backlog_npages_limit(arena);
+	arena_decay_backlog_update(arena, nadvance_u64);
 }
 
-static size_t
-arena_decay_npages_limit(arena_t *arena)
+static void
+arena_decay_epoch_advance_purge(tsdn_t *tsdn, arena_t *arena)
 {
-	size_t npages_limit;
+	size_t ndirty_limit = arena_decay_backlog_npages_limit(arena);
 
-	assert(opt_purge == purge_mode_decay);
+	if (arena->ndirty > ndirty_limit)
+		arena_purge_to_limit(tsdn, arena, ndirty_limit);
+	arena->decay.ndirty = arena->ndirty;
+}
 
-	npages_limit = arena->decay.backlog_npages_limit;
+static void
+arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, const nstime_t *time)
+{
 
-	/* Add in any dirty pages created during the current epoch. */
-	if (arena->ndirty > arena->decay.ndirty)
-		npages_limit += arena->ndirty - arena->decay.ndirty;
-
-	return (npages_limit);
+	arena_decay_epoch_advance_helper(arena, time);
+	arena_decay_epoch_advance_purge(tsdn, arena);
 }
 
 static void
@@ -1313,7 +1325,6 @@ arena_decay_init(arena_t *arena, ssize_t decay_time)
 	arena->decay.jitter_state = (uint64_t)(uintptr_t)arena;
 	arena_decay_deadline_init(arena);
 	arena->decay.ndirty = arena->ndirty;
-	arena->decay.backlog_npages_limit = 0;
 	memset(arena->decay.backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t));
 }
 
@@ -1395,7 +1406,6 @@ static void
 arena_maybe_purge_decay(tsdn_t *tsdn, arena_t *arena)
 {
 	nstime_t time;
-	size_t ndirty_limit;
 
 	assert(opt_purge == purge_mode_decay);
 
@@ -1411,32 +1421,29 @@ arena_maybe_purge_decay(tsdn_t *tsdn, arena_t *arena)
 	if (unlikely(!nstime_monotonic() && nstime_compare(&arena->decay.epoch,
 	    &time) > 0)) {
 		/*
-		 * Time went backwards.  Move the epoch back in time, with the
-		 * expectation that time typically flows forward for long enough
-		 * periods of time that epochs complete.  Unfortunately,
-		 * this strategy is susceptible to clock jitter triggering
-		 * premature epoch advances, but clock jitter estimation and
-		 * compensation isn't feasible here because calls into this code
-		 * are event-driven.
+		 * Time went backwards.  Move the epoch back in time and
+		 * generate a new deadline, with the expectation that time
+		 * typically flows forward for long enough periods of time that
+		 * epochs complete.  Unfortunately, this strategy is susceptible
+		 * to clock jitter triggering premature epoch advances, but
+		 * clock jitter estimation and compensation isn't feasible here
+		 * because calls into this code are event-driven.
 		 */
 		nstime_copy(&arena->decay.epoch, &time);
+		arena_decay_deadline_init(arena);
 	} else {
 		/* Verify that time does not go backwards. */
 		assert(nstime_compare(&arena->decay.epoch, &time) <= 0);
 	}
 
-	if (arena_decay_deadline_reached(arena, &time))
-		arena_decay_epoch_advance(arena, &time);
-
-	ndirty_limit = arena_decay_npages_limit(arena);
-
 	/*
-	 * Don't try to purge unless the number of purgeable pages exceeds the
-	 * current limit.
+	 * If the deadline has been reached, advance to the current epoch and
+	 * purge to the new limit if necessary.  Note that dirty pages created
+	 * during the current epoch are not subject to purge until a future
+	 * epoch, so as a result purging only happens during epoch advances.
 	 */
-	if (arena->ndirty <= ndirty_limit)
-		return;
-	arena_purge_to_limit(tsdn, arena, ndirty_limit);
+	if (arena_decay_deadline_reached(arena, &time))
+		arena_decay_epoch_advance(tsdn, arena, &time);
 }
 
 void

From 63b5657aa566ceab270ff6e9d4f366233d2d0b79 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 12 Oct 2016 10:40:27 -0700
Subject: [PATCH 0409/2608] Remove ratio-based purging.

Make decay-based purging the default (and only) mode.

Remove associated mallctls:
- opt.purge
- opt.lg_dirty_mult
- arena.<i>.lg_dirty_mult
- arenas.lg_dirty_mult
- stats.arenas.<i>.lg_dirty_mult

This resolves #385.
---
 Makefile.in                                   |   8 +-
 doc/jemalloc.xml.in                           |  76 ---------
 include/jemalloc/internal/arena.h             |  38 +----
 include/jemalloc/internal/ctl.h               |   1 -
 include/jemalloc/internal/private_symbols.txt |   7 -
 src/arena.c                                   | 152 ++----------------
 src/ctl.c                                     |  85 +---------
 src/jemalloc.c                                |  19 ---
 src/stats.c                                   |  52 ++----
 test/unit/decay.c                             |   8 +-
 test/unit/mallctl.c                           |  77 ---------
 11 files changed, 38 insertions(+), 485 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index ec863079..5feb71d1 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -374,17 +374,15 @@ stress_dir:
 check_dir: check_unit_dir check_integration_dir
 
 check_unit: tests_unit check_unit_dir
-	$(MALLOC_CONF)="purge:ratio" $(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%)
-	$(MALLOC_CONF)="purge:decay" $(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%)
+	$(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%)
 check_integration_prof: tests_integration check_integration_dir
 ifeq ($(enable_prof), 1)
 	$(MALLOC_CONF)="prof:true" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%)
 	$(MALLOC_CONF)="prof:true,prof_active:false" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%)
 endif
 check_integration_decay: tests_integration check_integration_dir
-	$(MALLOC_CONF)="purge:decay,decay_time:-1" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%)
-	$(MALLOC_CONF)="purge:decay,decay_time:0" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%)
-	$(MALLOC_CONF)="purge:decay" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%)
+	$(MALLOC_CONF)="decay_time:-1" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%)
+	$(MALLOC_CONF)="decay_time:0" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%)
 check_integration: tests_integration check_integration_dir
 	$(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%)
 stress: tests_stress stress_dir
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 8000461f..f5a72473 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -924,42 +924,6 @@ for (i = 0; i < nbins; i++) {
         number of CPUs, or one if there is a single CPU.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="opt.purge">
-        <term>
-          <mallctl>opt.purge</mallctl>
-          (<type>const char *</type>)
-          <literal>r-</literal>
-        </term>
-        <listitem><para>Purge mode is &ldquo;ratio&rdquo; (default) or
-        &ldquo;decay&rdquo;.  See <link
-        linkend="opt.lg_dirty_mult"><mallctl>opt.lg_dirty_mult</mallctl></link>
-        for details of the ratio mode.  See <link
-        linkend="opt.decay_time"><mallctl>opt.decay_time</mallctl></link> for
-        details of the decay mode.</para></listitem>
-      </varlistentry>
-
-      <varlistentry id="opt.lg_dirty_mult">
-        <term>
-          <mallctl>opt.lg_dirty_mult</mallctl>
-          (<type>ssize_t</type>)
-          <literal>r-</literal>
-        </term>
-        <listitem><para>Per-arena minimum ratio (log base 2) of active to dirty
-        pages.  Some dirty unused pages may be allowed to accumulate, within
-        the limit set by the ratio (or one chunk worth of dirty pages,
-        whichever is greater), before informing the kernel about some of those
-        pages via <citerefentry><refentrytitle>madvise</refentrytitle>
-        <manvolnum>2</manvolnum></citerefentry> or a similar system call.  This
-        provides the kernel with sufficient information to recycle dirty pages
-        if physical memory becomes scarce and the pages remain unused.  The
-        default minimum ratio is 8:1 (2^3:1); an option value of -1 will
-        disable dirty page purging.  See <link
-        linkend="arenas.lg_dirty_mult"><mallctl>arenas.lg_dirty_mult</mallctl></link>
-        and <link
-        linkend="arena.i.lg_dirty_mult"><mallctl>arena.&lt;i&gt;.lg_dirty_mult</mallctl></link>
-        for related dynamic control options.</para></listitem>
-      </varlistentry>
-
       <varlistentry id="opt.decay_time">
         <term>
           <mallctl>opt.decay_time</mallctl>
@@ -1518,20 +1482,6 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         settings.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="arena.i.lg_dirty_mult">
-        <term>
-          <mallctl>arena.&lt;i&gt;.lg_dirty_mult</mallctl>
-          (<type>ssize_t</type>)
-          <literal>rw</literal>
-        </term>
-        <listitem><para>Current per-arena minimum ratio (log base 2) of active
-        to dirty pages for arena &lt;i&gt;.  Each time this interface is set and
-        the ratio is increased, pages are synchronously purged as necessary to
-        impose the new ratio.  See <link
-        linkend="opt.lg_dirty_mult"><mallctl>opt.lg_dirty_mult</mallctl></link>
-        for additional information.</para></listitem>
-      </varlistentry>
-
       <varlistentry id="arena.i.decay_time">
         <term>
           <mallctl>arena.&lt;i&gt;.decay_time</mallctl>
@@ -1778,20 +1728,6 @@ struct extent_hooks_s {
         initialized.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="arenas.lg_dirty_mult">
-        <term>
-          <mallctl>arenas.lg_dirty_mult</mallctl>
-          (<type>ssize_t</type>)
-          <literal>rw</literal>
-        </term>
-        <listitem><para>Current default per-arena minimum ratio (log base 2) of
-        active to dirty pages, used to initialize <link
-        linkend="arena.i.lg_dirty_mult"><mallctl>arena.&lt;i&gt;.lg_dirty_mult</mallctl></link>
-        during arena creation.  See <link
-        linkend="opt.lg_dirty_mult"><mallctl>opt.lg_dirty_mult</mallctl></link>
-        for additional information.</para></listitem>
-      </varlistentry>
-
       <varlistentry id="arenas.decay_time">
         <term>
           <mallctl>arenas.decay_time</mallctl>
@@ -2137,18 +2073,6 @@ struct extent_hooks_s {
         </para></listitem>
       </varlistentry>
 
-      <varlistentry id="stats.arenas.i.lg_dirty_mult">
-        <term>
-          <mallctl>stats.arenas.&lt;i&gt;.lg_dirty_mult</mallctl>
-          (<type>ssize_t</type>)
-          <literal>r-</literal>
-        </term>
-        <listitem><para>Minimum ratio (log base 2) of active to dirty pages.
-        See <link
-        linkend="opt.lg_dirty_mult"><mallctl>opt.lg_dirty_mult</mallctl></link>
-        for details.</para></listitem>
-      </varlistentry>
-
       <varlistentry id="stats.arenas.i.decay_time">
         <term>
           <mallctl>stats.arenas.&lt;i&gt;.decay_time</mallctl>
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 3bb1f19c..4e20af48 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -7,23 +7,6 @@
 #define	LG_SLAB_MAXREGS		(LG_PAGE - LG_TINY_MIN)
 #define	SLAB_MAXREGS		(1U << LG_SLAB_MAXREGS)
 
-/*
- * The minimum ratio of active:dirty pages per arena is computed as:
- *
- *   (nactive >> lg_dirty_mult) >= ndirty
- *
- * So, supposing that lg_dirty_mult is 3, there can be no less than 8 times as
- * many active pages as dirty pages.
- */
-#define	LG_DIRTY_MULT_DEFAULT	3
-
-typedef enum {
-	purge_mode_ratio = 0,
-	purge_mode_decay = 1,
-
-	purge_mode_limit = 2
-} purge_mode_t;
-#define	PURGE_DEFAULT		purge_mode_ratio
 /* Default decay time in seconds. */
 #define	DECAY_TIME_DEFAULT	10
 /* Number of event ticks between time checks. */
@@ -203,9 +186,6 @@ struct arena_s {
 
 	dss_prec_t		dss_prec;
 
-	/* Minimum ratio (log base 2) of nactive:ndirty. */
-	ssize_t			lg_dirty_mult;
-
 	/* True if a thread is currently executing arena_purge_to_limit(). */
 	bool			purging;
 
@@ -274,9 +254,6 @@ static const size_t	large_pad =
 #endif
     ;
 
-extern purge_mode_t	opt_purge;
-extern const char	*purge_mode_names[];
-extern ssize_t		opt_lg_dirty_mult;
 extern ssize_t		opt_decay_time;
 
 extern const arena_bin_info_t	arena_bin_info[NBINS];
@@ -298,9 +275,6 @@ void	arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena,
     extent_t *extent, size_t oldsize);
 void	arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena,
     extent_t *extent, size_t oldsize);
-ssize_t	arena_lg_dirty_mult_get(tsdn_t *tsdn, arena_t *arena);
-bool	arena_lg_dirty_mult_set(tsdn_t *tsdn, arena_t *arena,
-    ssize_t lg_dirty_mult);
 ssize_t	arena_decay_time_get(tsdn_t *tsdn, arena_t *arena);
 bool	arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time);
 void	arena_purge(tsdn_t *tsdn, arena_t *arena, bool all);
@@ -334,17 +308,15 @@ void	*arena_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr,
     size_t oldsize, size_t size, size_t alignment, bool zero, tcache_t *tcache);
 dss_prec_t	arena_dss_prec_get(tsdn_t *tsdn, arena_t *arena);
 bool	arena_dss_prec_set(tsdn_t *tsdn, arena_t *arena, dss_prec_t dss_prec);
-ssize_t	arena_lg_dirty_mult_default_get(void);
-bool	arena_lg_dirty_mult_default_set(ssize_t lg_dirty_mult);
 ssize_t	arena_decay_time_default_get(void);
 bool	arena_decay_time_default_set(ssize_t decay_time);
 void	arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
-    unsigned *nthreads, const char **dss, ssize_t *lg_dirty_mult,
-    ssize_t *decay_time, size_t *nactive, size_t *ndirty);
+    unsigned *nthreads, const char **dss, ssize_t *decay_time, size_t *nactive,
+    size_t *ndirty);
 void	arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
-    const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time,
-    size_t *nactive, size_t *ndirty, arena_stats_t *astats,
-    malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats);
+    const char **dss, ssize_t *decay_time, size_t *nactive, size_t *ndirty,
+    arena_stats_t *astats, malloc_bin_stats_t *bstats,
+    malloc_large_stats_t *lstats);
 unsigned	arena_nthreads_get(arena_t *arena, bool internal);
 void	arena_nthreads_inc(arena_t *arena, bool internal);
 void	arena_nthreads_dec(arena_t *arena, bool internal);
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index 3fbac205..4d4f3043 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -35,7 +35,6 @@ struct ctl_arena_stats_s {
 	bool			initialized;
 	unsigned		nthreads;
 	const char		*dss;
-	ssize_t			lg_dirty_mult;
 	ssize_t			decay_time;
 	size_t			pactive;
 	size_t			pdirty;
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index f9d6e9a7..e52e7fed 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -34,10 +34,6 @@ arena_extent_ralloc_large_shrink
 arena_get
 arena_ichoose
 arena_init
-arena_lg_dirty_mult_default_get
-arena_lg_dirty_mult_default_set
-arena_lg_dirty_mult_get
-arena_lg_dirty_mult_set
 arena_malloc
 arena_malloc_hard
 arena_maybe_purge
@@ -311,7 +307,6 @@ opt_junk
 opt_junk_alloc
 opt_junk_free
 opt_lg_chunk
-opt_lg_dirty_mult
 opt_lg_prof_interval
 opt_lg_prof_sample
 opt_lg_tcache_max
@@ -324,7 +319,6 @@ opt_prof_gdump
 opt_prof_leak
 opt_prof_prefix
 opt_prof_thread_active_init
-opt_purge
 opt_stats_print
 opt_tcache
 opt_utrace
@@ -399,7 +393,6 @@ psz2ind
 psz2ind_clamp
 psz2ind_impl
 psz2u
-purge_mode_names
 register_zone
 rtree_child_read
 rtree_child_read_hard
diff --git a/src/arena.c b/src/arena.c
index 2f0291e5..3de02373 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -4,14 +4,6 @@
 /******************************************************************************/
 /* Data. */
 
-purge_mode_t	opt_purge = PURGE_DEFAULT;
-const char	*purge_mode_names[] = {
-	"ratio",
-	"decay",
-	"N/A"
-};
-ssize_t		opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT;
-static ssize_t	lg_dirty_mult_default;
 ssize_t		opt_decay_time = DECAY_TIME_DEFAULT;
 static ssize_t	decay_time_default;
 
@@ -429,47 +421,10 @@ arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
-static bool
-arena_lg_dirty_mult_valid(ssize_t lg_dirty_mult)
-{
-
-	return (lg_dirty_mult >= -1 && lg_dirty_mult < (ssize_t)(sizeof(size_t)
-	    << 3));
-}
-
-ssize_t
-arena_lg_dirty_mult_get(tsdn_t *tsdn, arena_t *arena)
-{
-	ssize_t lg_dirty_mult;
-
-	malloc_mutex_lock(tsdn, &arena->lock);
-	lg_dirty_mult = arena->lg_dirty_mult;
-	malloc_mutex_unlock(tsdn, &arena->lock);
-
-	return (lg_dirty_mult);
-}
-
-bool
-arena_lg_dirty_mult_set(tsdn_t *tsdn, arena_t *arena, ssize_t lg_dirty_mult)
-{
-
-	if (!arena_lg_dirty_mult_valid(lg_dirty_mult))
-		return (true);
-
-	malloc_mutex_lock(tsdn, &arena->lock);
-	arena->lg_dirty_mult = lg_dirty_mult;
-	arena_maybe_purge(tsdn, arena);
-	malloc_mutex_unlock(tsdn, &arena->lock);
-
-	return (false);
-}
-
 static void
 arena_decay_deadline_init(arena_t *arena)
 {
 
-	assert(opt_purge == purge_mode_decay);
-
 	/*
 	 * Generate a new deadline that is uniformly random within the next
 	 * epoch after the current one.
@@ -489,8 +444,6 @@ static bool
 arena_decay_deadline_reached(const arena_t *arena, const nstime_t *time)
 {
 
-	assert(opt_purge == purge_mode_decay);
-
 	return (nstime_compare(&arena->decay.deadline, time) <= 0);
 }
 
@@ -507,8 +460,6 @@ arena_decay_backlog_npages_limit(const arena_t *arena)
 	size_t npages_limit_backlog;
 	unsigned i;
 
-	assert(opt_purge == purge_mode_decay);
-
 	/*
 	 * For each element of decay_backlog, multiply by the corresponding
 	 * fixed-point smoothstep decay factor.  Sum the products, then divide
@@ -559,7 +510,6 @@ arena_decay_epoch_advance_helper(arena_t *arena, const nstime_t *time)
 	uint64_t nadvance_u64;
 	nstime_t delta;
 
-	assert(opt_purge == purge_mode_decay);
 	assert(arena_decay_deadline_reached(arena, time));
 
 	nstime_copy(&delta, time);
@@ -662,40 +612,10 @@ arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time)
 }
 
 static void
-arena_maybe_purge_ratio(tsdn_t *tsdn, arena_t *arena)
-{
-
-	assert(opt_purge == purge_mode_ratio);
-
-	/* Don't purge if the option is disabled. */
-	if (arena->lg_dirty_mult < 0)
-		return;
-
-	/*
-	 * Iterate, since preventing recursive purging could otherwise leave too
-	 * many dirty pages.
-	 */
-	while (true) {
-		size_t threshold = (arena->nactive >> arena->lg_dirty_mult);
-		if (threshold < chunk_npages)
-			threshold = chunk_npages;
-		/*
-		 * Don't purge unless the number of purgeable pages exceeds the
-		 * threshold.
-		 */
-		if (arena->ndirty <= threshold)
-			return;
-		arena_purge_to_limit(tsdn, arena, threshold);
-	}
-}
-
-static void
-arena_maybe_purge_decay(tsdn_t *tsdn, arena_t *arena)
+arena_maybe_purge_helper(tsdn_t *tsdn, arena_t *arena)
 {
 	nstime_t time;
 
-	assert(opt_purge == purge_mode_decay);
-
 	/* Purge all or nothing if the option is disabled. */
 	if (arena->decay.time <= 0) {
 		if (arena->decay.time == 0)
@@ -743,10 +663,7 @@ arena_maybe_purge(tsdn_t *tsdn, arena_t *arena)
 	if (arena->purging)
 		return;
 
-	if (opt_purge == purge_mode_ratio)
-		arena_maybe_purge_ratio(tsdn, arena);
-	else
-		arena_maybe_purge_decay(tsdn, arena);
+	arena_maybe_purge_helper(tsdn, arena);
 }
 
 static size_t
@@ -781,8 +698,7 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		UNUSED extent_t *textent;
 
 		npages = extent_size_get(extent) >> LG_PAGE;
-		if (opt_purge == purge_mode_decay && arena->ndirty - (nstashed +
-		    npages) < ndirty_limit)
+		if (arena->ndirty - (nstashed + npages) < ndirty_limit)
 			break;
 
 		next = qr_next(extent, qr_link);
@@ -797,9 +713,6 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		extent_ring_insert(purge_extents_sentinel, extent);
 
 		nstashed += npages;
-		if (opt_purge == purge_mode_ratio && arena->ndirty - nstashed <=
-		    ndirty_limit)
-			break;
 	}
 
 	malloc_mutex_unlock(tsdn, &arena->extents_mtx);
@@ -838,13 +751,8 @@ arena_purge_stashed(tsdn_t *tsdn, arena_t *arena,
 }
 
 /*
- * NB: ndirty_limit is interpreted differently depending on opt_purge:
- *   - purge_mode_ratio: Purge as few dirty extents as possible to reach the
- *                       desired state:
- *                       (arena->ndirty <= ndirty_limit)
- *   - purge_mode_decay: Purge as many dirty extents as possible without
- *                       violating the invariant:
- *                       (arena->ndirty >= ndirty_limit)
+ *   ndirty_limit: Purge as many dirty extents as possible without violating the
+ *   invariant: (arena->ndirty >= ndirty_limit)
  */
 static void
 arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, size_t ndirty_limit)
@@ -863,9 +771,6 @@ arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, size_t ndirty_limit)
 		size_t ndirty = arena_dirty_count(tsdn, arena);
 		assert(ndirty == arena->ndirty);
 	}
-	assert(opt_purge != purge_mode_ratio || (arena->nactive >>
-	    arena->lg_dirty_mult) < arena->ndirty || ndirty_limit == 0);
-
 	extent_init(&purge_extents_sentinel, arena, NULL, 0, 0, false, false,
 	    false, false);
 
@@ -1644,25 +1549,6 @@ arena_dss_prec_set(tsdn_t *tsdn, arena_t *arena, dss_prec_t dss_prec)
 	return (false);
 }
 
-ssize_t
-arena_lg_dirty_mult_default_get(void)
-{
-
-	return ((ssize_t)atomic_read_z((size_t *)&lg_dirty_mult_default));
-}
-
-bool
-arena_lg_dirty_mult_default_set(ssize_t lg_dirty_mult)
-{
-
-	if (opt_purge != purge_mode_ratio)
-		return (true);
-	if (!arena_lg_dirty_mult_valid(lg_dirty_mult))
-		return (true);
-	atomic_write_z((size_t *)&lg_dirty_mult_default, (size_t)lg_dirty_mult);
-	return (false);
-}
-
 ssize_t
 arena_decay_time_default_get(void)
 {
@@ -1674,8 +1560,6 @@ bool
 arena_decay_time_default_set(ssize_t decay_time)
 {
 
-	if (opt_purge != purge_mode_decay)
-		return (true);
 	if (!arena_decay_time_valid(decay_time))
 		return (true);
 	atomic_write_z((size_t *)&decay_time_default, (size_t)decay_time);
@@ -1684,13 +1568,11 @@ arena_decay_time_default_set(ssize_t decay_time)
 
 static void
 arena_basic_stats_merge_locked(arena_t *arena, unsigned *nthreads,
-    const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time,
-    size_t *nactive, size_t *ndirty)
+    const char **dss, ssize_t *decay_time, size_t *nactive, size_t *ndirty)
 {
 
 	*nthreads += arena_nthreads_get(arena, false);
 	*dss = dss_prec_names[arena->dss_prec];
-	*lg_dirty_mult = arena->lg_dirty_mult;
 	*decay_time = arena->decay.time;
 	*nactive += arena->nactive;
 	*ndirty += arena->ndirty;
@@ -1698,29 +1580,28 @@ arena_basic_stats_merge_locked(arena_t *arena, unsigned *nthreads,
 
 void
 arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
-    const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time,
-    size_t *nactive, size_t *ndirty)
+    const char **dss, ssize_t *decay_time, size_t *nactive, size_t *ndirty)
 {
 
 	malloc_mutex_lock(tsdn, &arena->lock);
-	arena_basic_stats_merge_locked(arena, nthreads, dss, lg_dirty_mult,
-	    decay_time, nactive, ndirty);
+	arena_basic_stats_merge_locked(arena, nthreads, dss, decay_time,
+	    nactive, ndirty);
 	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
 void
 arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
-    const char **dss, ssize_t *lg_dirty_mult, ssize_t *decay_time,
-    size_t *nactive, size_t *ndirty, arena_stats_t *astats,
-    malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats)
+    const char **dss, ssize_t *decay_time, size_t *nactive, size_t *ndirty,
+    arena_stats_t *astats, malloc_bin_stats_t *bstats,
+    malloc_large_stats_t *lstats)
 {
 	unsigned i;
 
 	cassert(config_stats);
 
 	malloc_mutex_lock(tsdn, &arena->lock);
-	arena_basic_stats_merge_locked(arena, nthreads, dss, lg_dirty_mult,
-	    decay_time, nactive, ndirty);
+	arena_basic_stats_merge_locked(arena, nthreads, dss, decay_time,
+	    nactive, ndirty);
 
 	astats->mapped += arena->stats.mapped;
 	astats->retained += arena->stats.retained;
@@ -1816,13 +1697,11 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 
 	arena->dss_prec = extent_dss_prec_get(tsdn);
 
-	arena->lg_dirty_mult = arena_lg_dirty_mult_default_get();
 	arena->purging = false;
 	arena->nactive = 0;
 	arena->ndirty = 0;
 
-	if (opt_purge == purge_mode_decay)
-		arena_decay_init(arena, arena_decay_time_default_get());
+	arena_decay_init(arena, arena_decay_time_default_get());
 
 	ql_new(&arena->large);
 	if (malloc_mutex_init(&arena->large_mtx, "arena_large",
@@ -1869,7 +1748,6 @@ void
 arena_boot(void)
 {
 
-	arena_lg_dirty_mult_default_set(opt_lg_dirty_mult);
 	arena_decay_time_default_set(opt_decay_time);
 }
 
diff --git a/src/ctl.c b/src/ctl.c
index 87fd8c75..b00991a6 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -90,8 +90,6 @@ CTL_PROTO(opt_abort)
 CTL_PROTO(opt_dss)
 CTL_PROTO(opt_lg_chunk)
 CTL_PROTO(opt_narenas)
-CTL_PROTO(opt_purge)
-CTL_PROTO(opt_lg_dirty_mult)
 CTL_PROTO(opt_decay_time)
 CTL_PROTO(opt_stats_print)
 CTL_PROTO(opt_junk)
@@ -118,7 +116,6 @@ CTL_PROTO(arena_i_purge)
 CTL_PROTO(arena_i_decay)
 CTL_PROTO(arena_i_reset)
 CTL_PROTO(arena_i_dss)
-CTL_PROTO(arena_i_lg_dirty_mult)
 CTL_PROTO(arena_i_decay_time)
 CTL_PROTO(arena_i_extent_hooks)
 INDEX_PROTO(arena_i)
@@ -130,7 +127,6 @@ CTL_PROTO(arenas_lextent_i_size)
 INDEX_PROTO(arenas_lextent_i)
 CTL_PROTO(arenas_narenas)
 CTL_PROTO(arenas_initialized)
-CTL_PROTO(arenas_lg_dirty_mult)
 CTL_PROTO(arenas_decay_time)
 CTL_PROTO(arenas_quantum)
 CTL_PROTO(arenas_page)
@@ -171,7 +167,6 @@ CTL_PROTO(stats_arenas_i_lextents_j_curlextents)
 INDEX_PROTO(stats_arenas_i_lextents_j)
 CTL_PROTO(stats_arenas_i_nthreads)
 CTL_PROTO(stats_arenas_i_dss)
-CTL_PROTO(stats_arenas_i_lg_dirty_mult)
 CTL_PROTO(stats_arenas_i_decay_time)
 CTL_PROTO(stats_arenas_i_pactive)
 CTL_PROTO(stats_arenas_i_pdirty)
@@ -251,8 +246,6 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("dss"),		CTL(opt_dss)},
 	{NAME("lg_chunk"),	CTL(opt_lg_chunk)},
 	{NAME("narenas"),	CTL(opt_narenas)},
-	{NAME("purge"),		CTL(opt_purge)},
-	{NAME("lg_dirty_mult"),	CTL(opt_lg_dirty_mult)},
 	{NAME("decay_time"),	CTL(opt_decay_time)},
 	{NAME("stats_print"),	CTL(opt_stats_print)},
 	{NAME("junk"),		CTL(opt_junk)},
@@ -284,7 +277,6 @@ static const ctl_named_node_t arena_i_node[] = {
 	{NAME("decay"),		CTL(arena_i_decay)},
 	{NAME("reset"),		CTL(arena_i_reset)},
 	{NAME("dss"),		CTL(arena_i_dss)},
-	{NAME("lg_dirty_mult"),	CTL(arena_i_lg_dirty_mult)},
 	{NAME("decay_time"),	CTL(arena_i_decay_time)},
 	{NAME("extent_hooks"),	CTL(arena_i_extent_hooks)}
 };
@@ -323,7 +315,6 @@ static const ctl_indexed_node_t arenas_lextent_node[] = {
 static const ctl_named_node_t arenas_node[] = {
 	{NAME("narenas"),	CTL(arenas_narenas)},
 	{NAME("initialized"),	CTL(arenas_initialized)},
-	{NAME("lg_dirty_mult"),	CTL(arenas_lg_dirty_mult)},
 	{NAME("decay_time"),	CTL(arenas_decay_time)},
 	{NAME("quantum"),	CTL(arenas_quantum)},
 	{NAME("page"),		CTL(arenas_page)},
@@ -396,7 +387,6 @@ static const ctl_indexed_node_t stats_arenas_i_lextents_node[] = {
 static const ctl_named_node_t stats_arenas_i_node[] = {
 	{NAME("nthreads"),	CTL(stats_arenas_i_nthreads)},
 	{NAME("dss"),		CTL(stats_arenas_i_dss)},
-	{NAME("lg_dirty_mult"),	CTL(stats_arenas_i_lg_dirty_mult)},
 	{NAME("decay_time"),	CTL(stats_arenas_i_decay_time)},
 	{NAME("pactive"),	CTL(stats_arenas_i_pactive)},
 	{NAME("pdirty"),	CTL(stats_arenas_i_pdirty)},
@@ -459,7 +449,6 @@ ctl_arena_clear(ctl_arena_stats_t *astats)
 
 	astats->nthreads = 0;
 	astats->dss = dss_prec_names[dss_prec_limit];
-	astats->lg_dirty_mult = -1;
 	astats->decay_time = -1;
 	astats->pactive = 0;
 	astats->pdirty = 0;
@@ -482,9 +471,8 @@ ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_stats_t *cstats, arena_t *arena)
 
 	if (config_stats) {
 		arena_stats_merge(tsdn, arena, &cstats->nthreads, &cstats->dss,
-		    &cstats->lg_dirty_mult, &cstats->decay_time,
-		    &cstats->pactive, &cstats->pdirty, &cstats->astats,
-		    cstats->bstats, cstats->lstats);
+		    &cstats->decay_time, &cstats->pactive, &cstats->pdirty,
+		    &cstats->astats, cstats->bstats, cstats->lstats);
 
 		for (i = 0; i < NBINS; i++) {
 			cstats->allocated_small += cstats->bstats[i].curregs *
@@ -495,8 +483,8 @@ ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_stats_t *cstats, arena_t *arena)
 		}
 	} else {
 		arena_basic_stats_merge(tsdn, arena, &cstats->nthreads,
-		    &cstats->dss, &cstats->lg_dirty_mult, &cstats->decay_time,
-		    &cstats->pactive, &cstats->pdirty);
+		    &cstats->dss, &cstats->decay_time, &cstats->pactive,
+		    &cstats->pdirty);
 	}
 }
 
@@ -1150,8 +1138,6 @@ CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
 CTL_RO_NL_GEN(opt_dss, opt_dss, const char *)
 CTL_RO_NL_GEN(opt_lg_chunk, opt_lg_chunk, size_t)
 CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned)
-CTL_RO_NL_GEN(opt_purge, purge_mode_names[opt_purge], const char *)
-CTL_RO_NL_GEN(opt_lg_dirty_mult, opt_lg_dirty_mult, ssize_t)
 CTL_RO_NL_GEN(opt_decay_time, opt_decay_time, ssize_t)
 CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool)
 CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, const char *)
@@ -1562,41 +1548,6 @@ label_return:
 	return (ret);
 }
 
-static int
-arena_i_lg_dirty_mult_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen)
-{
-	int ret;
-	unsigned arena_ind = (unsigned)mib[1];
-	arena_t *arena;
-
-	arena = arena_get(tsd_tsdn(tsd), arena_ind, false);
-	if (arena == NULL) {
-		ret = EFAULT;
-		goto label_return;
-	}
-
-	if (oldp != NULL && oldlenp != NULL) {
-		size_t oldval = arena_lg_dirty_mult_get(tsd_tsdn(tsd), arena);
-		READ(oldval, ssize_t);
-	}
-	if (newp != NULL) {
-		if (newlen != sizeof(ssize_t)) {
-			ret = EINVAL;
-			goto label_return;
-		}
-		if (arena_lg_dirty_mult_set(tsd_tsdn(tsd), arena,
-		    *(ssize_t *)newp)) {
-			ret = EFAULT;
-			goto label_return;
-		}
-	}
-
-	ret = 0;
-label_return:
-	return (ret);
-}
-
 static int
 arena_i_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
@@ -1733,32 +1684,6 @@ label_return:
 	return (ret);
 }
 
-static int
-arenas_lg_dirty_mult_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen)
-{
-	int ret;
-
-	if (oldp != NULL && oldlenp != NULL) {
-		size_t oldval = arena_lg_dirty_mult_default_get();
-		READ(oldval, ssize_t);
-	}
-	if (newp != NULL) {
-		if (newlen != sizeof(ssize_t)) {
-			ret = EINVAL;
-			goto label_return;
-		}
-		if (arena_lg_dirty_mult_default_set(*(ssize_t *)newp)) {
-			ret = EFAULT;
-			goto label_return;
-		}
-	}
-
-	ret = 0;
-label_return:
-	return (ret);
-}
-
 static int
 arenas_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
@@ -1972,8 +1897,6 @@ CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats.mapped, size_t)
 CTL_RO_CGEN(config_stats, stats_retained, ctl_stats.retained, size_t)
 
 CTL_RO_GEN(stats_arenas_i_dss, ctl_stats.arenas[mib[2]].dss, const char *)
-CTL_RO_GEN(stats_arenas_i_lg_dirty_mult, ctl_stats.arenas[mib[2]].lg_dirty_mult,
-    ssize_t)
 CTL_RO_GEN(stats_arenas_i_decay_time, ctl_stats.arenas[mib[2]].decay_time,
     ssize_t)
 CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 24158552..580b23f9 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1053,25 +1053,6 @@ malloc_conf_init(void)
 			}
 			CONF_HANDLE_UNSIGNED(opt_narenas, "narenas", 1,
 			    UINT_MAX, false)
-			if (strncmp("purge", k, klen) == 0) {
-				int i;
-				bool match = false;
-				for (i = 0; i < purge_mode_limit; i++) {
-					if (strncmp(purge_mode_names[i], v,
-					    vlen) == 0) {
-						opt_purge = (purge_mode_t)i;
-						match = true;
-						break;
-					}
-				}
-				if (!match) {
-					malloc_conf_error("Invalid conf value",
-					    k, klen, v, vlen);
-				}
-				continue;
-			}
-			CONF_HANDLE_SSIZE_T(opt_lg_dirty_mult, "lg_dirty_mult",
-			    -1, (sizeof(size_t) << 3) - 1)
 			CONF_HANDLE_SSIZE_T(opt_decay_time, "decay_time", -1,
 			    NSTIME_SEC_MAX);
 			CONF_HANDLE_BOOL(opt_stats_print, "stats_print", true)
diff --git a/src/stats.c b/src/stats.c
index d8815855..185ccac6 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -210,7 +210,7 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 {
 	unsigned nthreads;
 	const char *dss;
-	ssize_t lg_dirty_mult, decay_time;
+	ssize_t decay_time;
 	size_t page, pactive, pdirty, mapped, retained, metadata;
 	uint64_t npurge, nmadvise, purged;
 	size_t small_allocated;
@@ -226,25 +226,12 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	CTL_M2_GET("stats.arenas.0.dss", i, &dss, const char *);
 	malloc_cprintf(write_cb, cbopaque, "dss allocation precedence: %s\n",
 	    dss);
-	CTL_M2_GET("stats.arenas.0.lg_dirty_mult", i, &lg_dirty_mult, ssize_t);
-	if (opt_purge == purge_mode_ratio) {
-		if (lg_dirty_mult >= 0) {
-			malloc_cprintf(write_cb, cbopaque,
-			    "min active:dirty page ratio: %u:1\n",
-			    (1U << lg_dirty_mult));
-		} else {
-			malloc_cprintf(write_cb, cbopaque,
-			    "min active:dirty page ratio: N/A\n");
-		}
-	}
 	CTL_M2_GET("stats.arenas.0.decay_time", i, &decay_time, ssize_t);
-	if (opt_purge == purge_mode_decay) {
-		if (decay_time >= 0) {
-			malloc_cprintf(write_cb, cbopaque, "decay time: %zd\n",
-			    decay_time);
-		} else
-			malloc_cprintf(write_cb, cbopaque, "decay time: N/A\n");
-	}
+	if (decay_time >= 0) {
+		malloc_cprintf(write_cb, cbopaque, "decay time: %zd\n",
+		    decay_time);
+	} else
+		malloc_cprintf(write_cb, cbopaque, "decay time: N/A\n");
 	CTL_M2_GET("stats.arenas.0.pactive", i, &pactive, size_t);
 	CTL_M2_GET("stats.arenas.0.pdirty", i, &pdirty, size_t);
 	CTL_M2_GET("stats.arenas.0.npurge", i, &npurge, uint64_t);
@@ -433,12 +420,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		OPT_WRITE_CHAR_P(dss)
 		OPT_WRITE_UNSIGNED(narenas)
 		OPT_WRITE_CHAR_P(purge)
-		if (opt_purge == purge_mode_ratio) {
-			OPT_WRITE_SSIZE_T_MUTABLE(lg_dirty_mult,
-			    arenas.lg_dirty_mult)
-		}
-		if (opt_purge == purge_mode_decay)
-			OPT_WRITE_SSIZE_T_MUTABLE(decay_time, arenas.decay_time)
+		OPT_WRITE_SSIZE_T_MUTABLE(decay_time, arenas.decay_time)
 		OPT_WRITE_BOOL(stats_print)
 		OPT_WRITE_CHAR_P(junk)
 		OPT_WRITE_BOOL(zero)
@@ -479,24 +461,10 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		CTL_GET("arenas.page", &sv, size_t);
 		malloc_cprintf(write_cb, cbopaque, "Page size: %zu\n", sv);
 
-		CTL_GET("arenas.lg_dirty_mult", &ssv, ssize_t);
-		if (opt_purge == purge_mode_ratio) {
-			if (ssv >= 0) {
-				malloc_cprintf(write_cb, cbopaque,
-				    "Min active:dirty page ratio per arena: "
-				    "%u:1\n", (1U << ssv));
-			} else {
-				malloc_cprintf(write_cb, cbopaque,
-				    "Min active:dirty page ratio per arena: "
-				    "N/A\n");
-			}
-		}
 		CTL_GET("arenas.decay_time", &ssv, ssize_t);
-		if (opt_purge == purge_mode_decay) {
-			malloc_cprintf(write_cb, cbopaque,
-			    "Unused dirty page decay time: %zd%s\n",
-			    ssv, (ssv < 0) ? " (no decay)" : "");
-		}
+		malloc_cprintf(write_cb, cbopaque,
+		    "Unused dirty page decay time: %zd%s\n", ssv, (ssv < 0) ?
+		    " (no decay)" : "");
 		if (je_mallctl("arenas.tcache_max", &sv, &ssz, NULL, 0) == 0) {
 			malloc_cprintf(write_cb, cbopaque,
 			    "Maximum thread-cached size class: %zu\n", sv);
diff --git a/test/unit/decay.c b/test/unit/decay.c
index 333a722c..058a58cb 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -1,6 +1,6 @@
 #include "test/jemalloc_test.h"
 
-const char *malloc_conf = "purge:decay,decay_time:1,lg_tcache_max:0";
+const char *malloc_conf = "decay_time:1,lg_tcache_max:0";
 
 static nstime_monotonic_t *nstime_monotonic_orig;
 static nstime_update_t *nstime_update_orig;
@@ -33,8 +33,6 @@ TEST_BEGIN(test_decay_ticks)
 	size_t sz, large0;
 	void *p;
 
-	test_skip_if(opt_purge != purge_mode_decay);
-
 	decay_ticker = decay_ticker_get(tsd_fetch(), 0);
 	assert_ptr_not_null(decay_ticker,
 	    "Unexpected failure getting decay ticker");
@@ -213,8 +211,6 @@ TEST_BEGIN(test_decay_ticker)
 	unsigned i, nupdates0;
 	nstime_t time, decay_time, deadline;
 
-	test_skip_if(opt_purge != purge_mode_decay);
-
 	/*
 	 * Allocate a bunch of large objects, pause the clock, deallocate the
 	 * objects, restore the clock, then [md]allocx() in a tight loop to
@@ -307,8 +303,6 @@ TEST_BEGIN(test_decay_nonmonotonic)
 	size_t sz, large0;
 	unsigned i, nupdates0;
 
-	test_skip_if(opt_purge != purge_mode_decay);
-
 	sz = sizeof(size_t);
 	assert_d_eq(mallctl("arenas.lextent.0.size", &large0, &sz, NULL, 0), 0,
 	    "Unexpected mallctl failure");
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 1954bfc5..ee57dd5c 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -163,8 +163,6 @@ TEST_BEGIN(test_mallctl_opt)
 	TEST_MALLCTL_OPT(size_t, lg_chunk, always);
 	TEST_MALLCTL_OPT(const char *, dss, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);
-	TEST_MALLCTL_OPT(const char *, purge, always);
-	TEST_MALLCTL_OPT(ssize_t, lg_dirty_mult, always);
 	TEST_MALLCTL_OPT(ssize_t, decay_time, always);
 	TEST_MALLCTL_OPT(bool, stats_print, always);
 	TEST_MALLCTL_OPT(const char *, junk, fill);
@@ -349,47 +347,11 @@ TEST_BEGIN(test_thread_arena)
 }
 TEST_END
 
-TEST_BEGIN(test_arena_i_lg_dirty_mult)
-{
-	ssize_t lg_dirty_mult, orig_lg_dirty_mult, prev_lg_dirty_mult;
-	size_t sz = sizeof(ssize_t);
-
-	test_skip_if(opt_purge != purge_mode_ratio);
-
-	assert_d_eq(mallctl("arena.0.lg_dirty_mult", &orig_lg_dirty_mult, &sz,
-	    NULL, 0), 0, "Unexpected mallctl() failure");
-
-	lg_dirty_mult = -2;
-	assert_d_eq(mallctl("arena.0.lg_dirty_mult", NULL, NULL,
-	    &lg_dirty_mult, sizeof(ssize_t)), EFAULT,
-	    "Unexpected mallctl() success");
-
-	lg_dirty_mult = (sizeof(size_t) << 3);
-	assert_d_eq(mallctl("arena.0.lg_dirty_mult", NULL, NULL,
-	    &lg_dirty_mult, sizeof(ssize_t)), EFAULT,
-	    "Unexpected mallctl() success");
-
-	for (prev_lg_dirty_mult = orig_lg_dirty_mult, lg_dirty_mult = -1;
-	    lg_dirty_mult < (ssize_t)(sizeof(size_t) << 3); prev_lg_dirty_mult
-	    = lg_dirty_mult, lg_dirty_mult++) {
-		ssize_t old_lg_dirty_mult;
-
-		assert_d_eq(mallctl("arena.0.lg_dirty_mult", &old_lg_dirty_mult,
-		    &sz, &lg_dirty_mult, sizeof(ssize_t)), 0,
-		    "Unexpected mallctl() failure");
-		assert_zd_eq(old_lg_dirty_mult, prev_lg_dirty_mult,
-		    "Unexpected old arena.0.lg_dirty_mult");
-	}
-}
-TEST_END
-
 TEST_BEGIN(test_arena_i_decay_time)
 {
 	ssize_t decay_time, orig_decay_time, prev_decay_time;
 	size_t sz = sizeof(ssize_t);
 
-	test_skip_if(opt_purge != purge_mode_decay);
-
 	assert_d_eq(mallctl("arena.0.decay_time", &orig_decay_time, &sz,
 	    NULL, 0), 0, "Unexpected mallctl() failure");
 
@@ -515,47 +477,11 @@ TEST_BEGIN(test_arenas_initialized)
 }
 TEST_END
 
-TEST_BEGIN(test_arenas_lg_dirty_mult)
-{
-	ssize_t lg_dirty_mult, orig_lg_dirty_mult, prev_lg_dirty_mult;
-	size_t sz = sizeof(ssize_t);
-
-	test_skip_if(opt_purge != purge_mode_ratio);
-
-	assert_d_eq(mallctl("arenas.lg_dirty_mult", &orig_lg_dirty_mult, &sz,
-	    NULL, 0), 0, "Unexpected mallctl() failure");
-
-	lg_dirty_mult = -2;
-	assert_d_eq(mallctl("arenas.lg_dirty_mult", NULL, NULL,
-	    &lg_dirty_mult, sizeof(ssize_t)), EFAULT,
-	    "Unexpected mallctl() success");
-
-	lg_dirty_mult = (sizeof(size_t) << 3);
-	assert_d_eq(mallctl("arenas.lg_dirty_mult", NULL, NULL,
-	    &lg_dirty_mult, sizeof(ssize_t)), EFAULT,
-	    "Unexpected mallctl() success");
-
-	for (prev_lg_dirty_mult = orig_lg_dirty_mult, lg_dirty_mult = -1;
-	    lg_dirty_mult < (ssize_t)(sizeof(size_t) << 3); prev_lg_dirty_mult =
-	    lg_dirty_mult, lg_dirty_mult++) {
-		ssize_t old_lg_dirty_mult;
-
-		assert_d_eq(mallctl("arenas.lg_dirty_mult", &old_lg_dirty_mult,
-		    &sz, &lg_dirty_mult, sizeof(ssize_t)), 0,
-		    "Unexpected mallctl() failure");
-		assert_zd_eq(old_lg_dirty_mult, prev_lg_dirty_mult,
-		    "Unexpected old arenas.lg_dirty_mult");
-	}
-}
-TEST_END
-
 TEST_BEGIN(test_arenas_decay_time)
 {
 	ssize_t decay_time, orig_decay_time, prev_decay_time;
 	size_t sz = sizeof(ssize_t);
 
-	test_skip_if(opt_purge != purge_mode_decay);
-
 	assert_d_eq(mallctl("arenas.decay_time", &orig_decay_time, &sz,
 	    NULL, 0), 0, "Unexpected mallctl() failure");
 
@@ -669,7 +595,6 @@ TEST_BEGIN(test_stats_arenas)
 
 	TEST_STATS_ARENAS(unsigned, nthreads);
 	TEST_STATS_ARENAS(const char *, dss);
-	TEST_STATS_ARENAS(ssize_t, lg_dirty_mult);
 	TEST_STATS_ARENAS(ssize_t, decay_time);
 	TEST_STATS_ARENAS(size_t, pactive);
 	TEST_STATS_ARENAS(size_t, pdirty);
@@ -694,13 +619,11 @@ main(void)
 	    test_tcache_none,
 	    test_tcache,
 	    test_thread_arena,
-	    test_arena_i_lg_dirty_mult,
 	    test_arena_i_decay_time,
 	    test_arena_i_purge,
 	    test_arena_i_decay,
 	    test_arena_i_dss,
 	    test_arenas_initialized,
-	    test_arenas_lg_dirty_mult,
 	    test_arenas_decay_time,
 	    test_arenas_constants,
 	    test_arenas_bin_constants,

From 9acd5cf178eca9bc8a7f36a8c392b799a120bcbf Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 12 Oct 2016 11:49:19 -0700
Subject: [PATCH 0410/2608] Remove all vestiges of chunks.

Remove mallctls:
- opt.lg_chunk
- stats.cactive

This resolves #464.
---
 INSTALL                                       |  6 +--
 Makefile.in                                   |  2 -
 doc/jemalloc.xml.in                           | 31 -----------
 include/jemalloc/internal/chunk.h             | 36 -------------
 include/jemalloc/internal/extent_dss.h        |  2 +
 .../jemalloc/internal/jemalloc_internal.h.in  |  4 --
 include/jemalloc/internal/private_symbols.txt |  9 ----
 include/jemalloc/internal/stats.h             | 41 ---------------
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |  4 +-
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |  8 +--
 src/arena.c                                   | 13 -----
 src/base.c                                    | 12 ++---
 src/chunk.c                                   | 51 -------------------
 src/ctl.c                                     |  6 ---
 src/extent_dss.c                              |  2 +
 src/jemalloc.c                                |  4 --
 src/stats.c                                   | 11 ----
 test/unit/junk.c                              |  2 +-
 test/unit/lg_chunk.c                          | 26 ----------
 test/unit/mallctl.c                           |  1 -
 test/unit/prof_gdump.c                        |  8 +--
 test/unit/stats.c                             | 15 ++----
 test/unit/zero.c                              |  2 +-
 23 files changed, 26 insertions(+), 270 deletions(-)
 delete mode 100644 include/jemalloc/internal/chunk.h
 delete mode 100644 src/chunk.c
 delete mode 100644 test/unit/lg_chunk.c

diff --git a/INSTALL b/INSTALL
index 00c428b1..a31871b0 100644
--- a/INSTALL
+++ b/INSTALL
@@ -91,10 +91,10 @@ any of the following arguments (not a definitive list) to 'configure':
 --with-malloc-conf=<malloc_conf>
     Embed <malloc_conf> as a run-time options string that is processed prior to
     the malloc_conf global variable, the /etc/malloc.conf symlink, and the
-    MALLOC_CONF environment variable.  For example, to change the default chunk
-    size to 256 KiB:
+    MALLOC_CONF environment variable.  For example, to change the default decay
+    time to 30 seconds:
 
-      --with-malloc-conf=lg_chunk:18
+      --with-malloc-conf=decay_time:30
 
 --disable-cc-silence
     Disable code that silences non-useful compiler warnings.  This is mainly
diff --git a/Makefile.in b/Makefile.in
index 5feb71d1..f6f06211 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -83,7 +83,6 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/atomic.c \
 	$(srcroot)src/base.c \
 	$(srcroot)src/bitmap.c \
-	$(srcroot)src/chunk.c \
 	$(srcroot)src/ckh.c \
 	$(srcroot)src/ctl.c \
 	$(srcroot)src/extent.c \
@@ -144,7 +143,6 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/junk.c \
 	$(srcroot)test/unit/junk_alloc.c \
 	$(srcroot)test/unit/junk_free.c \
-	$(srcroot)test/unit/lg_chunk.c \
 	$(srcroot)test/unit/mallctl.c \
 	$(srcroot)test/unit/math.c \
 	$(srcroot)test/unit/mq.c \
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index f5a72473..5ba44d23 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -900,19 +900,6 @@ for (i = 0; i < nbins; i++) {
         </para></listitem>
       </varlistentry>
 
-      <varlistentry id="opt.lg_chunk">
-        <term>
-          <mallctl>opt.lg_chunk</mallctl>
-          (<type>size_t</type>)
-          <literal>r-</literal>
-        </term>
-        <listitem><para>Virtual memory chunk size (log base 2).  If a chunk
-        size outside the supported size range is specified, the size is
-        silently clipped to the minimum/maximum supported size.  The default
-        chunk size is 2 MiB (2^21).
-        </para></listitem>
-      </varlistentry>
-
       <varlistentry id="opt.narenas">
         <term>
           <mallctl>opt.narenas</mallctl>
@@ -1949,24 +1936,6 @@ struct extent_hooks_s {
         option for additional information.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="stats.cactive">
-        <term>
-          <mallctl>stats.cactive</mallctl>
-          (<type>size_t *</type>)
-          <literal>r-</literal>
-          [<option>--enable-stats</option>]
-        </term>
-        <listitem><para>Pointer to a counter that contains an approximate count
-        of the current number of bytes in active pages.  The estimate may be
-        high, but never low, because each arena rounds up when computing its
-        contribution to the counter.  Note that the <link
-        linkend="epoch"><mallctl>epoch</mallctl></link> mallctl has no bearing
-        on this counter.  Furthermore, counter consistency is maintained via
-        atomic operations, so it is necessary to use an atomic operation in
-        order to guarantee a consistent read when dereferencing the pointer.
-        </para></listitem>
-      </varlistentry>
-
       <varlistentry id="stats.allocated">
         <term>
           <mallctl>stats.allocated</mallctl>
diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
deleted file mode 100644
index 7a5ebbca..00000000
--- a/include/jemalloc/internal/chunk.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-/*
- * Size and alignment of memory chunks that are allocated by the OS's virtual
- * memory system.
- */
-#define	LG_CHUNK_DEFAULT	21
-
-/* Return the smallest chunk multiple that is >= s. */
-#define	CHUNK_CEILING(s)						\
-	(((s) + chunksize_mask) & ~chunksize_mask)
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-extern size_t		opt_lg_chunk;
-extern const char	*opt_dss;
-
-extern size_t		chunksize;
-extern size_t		chunksize_mask; /* (chunksize - 1). */
-extern size_t		chunk_npages;
-
-bool	chunk_boot(void);
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
diff --git a/include/jemalloc/internal/extent_dss.h b/include/jemalloc/internal/extent_dss.h
index 43573775..0aabc2ec 100644
--- a/include/jemalloc/internal/extent_dss.h
+++ b/include/jemalloc/internal/extent_dss.h
@@ -21,6 +21,8 @@ extern const char *dss_prec_names[];
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
+extern const char	*opt_dss;
+
 dss_prec_t	extent_dss_prec_get(tsdn_t *tsdn);
 bool	extent_dss_prec_set(tsdn_t *tsdn, dss_prec_t dss_prec);
 void	*extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr,
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index ba8a9296..b69ddb18 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -363,7 +363,6 @@ typedef unsigned szind_t;
 #include "jemalloc/internal/base.h"
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/pages.h"
-#include "jemalloc/internal/chunk.h"
 #include "jemalloc/internal/large.h"
 #include "jemalloc/internal/tcache.h"
 #include "jemalloc/internal/hash.h"
@@ -397,7 +396,6 @@ typedef unsigned szind_t;
 #include "jemalloc/internal/base.h"
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/pages.h"
-#include "jemalloc/internal/chunk.h"
 #include "jemalloc/internal/large.h"
 #include "jemalloc/internal/tcache.h"
 #include "jemalloc/internal/hash.h"
@@ -483,7 +481,6 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/base.h"
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/pages.h"
-#include "jemalloc/internal/chunk.h"
 #include "jemalloc/internal/large.h"
 #include "jemalloc/internal/tcache.h"
 #include "jemalloc/internal/hash.h"
@@ -512,7 +509,6 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/base.h"
 #include "jemalloc/internal/pages.h"
-#include "jemalloc/internal/chunk.h"
 #include "jemalloc/internal/large.h"
 
 #ifndef JEMALLOC_ENABLE_INLINE
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index e52e7fed..d1f39cff 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -110,10 +110,6 @@ bootstrap_free
 bootstrap_malloc
 bt_init
 buferror
-chunk_boot
-chunk_npages
-chunksize
-chunksize_mask
 ckh_count
 ckh_delete
 ckh_insert
@@ -306,7 +302,6 @@ opt_dss
 opt_junk
 opt_junk_alloc
 opt_junk_free
-opt_lg_chunk
 opt_lg_prof_interval
 opt_lg_prof_sample
 opt_lg_tcache_max
@@ -430,10 +425,6 @@ size2index
 size2index_compute
 size2index_lookup
 size2index_tab
-stats_cactive
-stats_cactive_add
-stats_cactive_get
-stats_cactive_sub
 stats_print
 tcache_alloc_easy
 tcache_alloc_large
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index da019605..52279f56 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -118,8 +118,6 @@ struct arena_stats_s {
 
 extern bool	opt_stats_print;
 
-extern size_t	stats_cactive;
-
 void	stats_print(void (*write)(void *, const char *), void *cbopaque,
     const char *opts);
 
@@ -127,44 +125,5 @@ void	stats_print(void (*write)(void *, const char *), void *cbopaque,
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
-#ifndef JEMALLOC_ENABLE_INLINE
-size_t	stats_cactive_get(void);
-void	stats_cactive_add(size_t size);
-void	stats_cactive_sub(size_t size);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_STATS_C_))
-JEMALLOC_INLINE size_t
-stats_cactive_get(void)
-{
-
-	return (atomic_read_z(&stats_cactive));
-}
-
-JEMALLOC_INLINE void
-stats_cactive_add(size_t size)
-{
-	UNUSED size_t cactive;
-
-	assert(size > 0);
-	assert((size & chunksize_mask) == 0);
-
-	cactive = atomic_add_z(&stats_cactive, size);
-	assert(cactive - size < cactive);
-}
-
-JEMALLOC_INLINE void
-stats_cactive_sub(size_t size)
-{
-	UNUSED size_t cactive;
-
-	assert(size > 0);
-	assert((size & chunksize_mask) == 0);
-
-	cactive = atomic_sub_z(&stats_cactive, size);
-	assert(cactive + size > cactive);
-}
-#endif
-
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index da75a968..e5ecb351 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -40,7 +40,6 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\atomic.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\base.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\bitmap.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\chunk.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\ckh.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\ctl.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\extent.h" />
@@ -93,7 +92,6 @@
     <ClCompile Include="..\..\..\..\src\atomic.c" />
     <ClCompile Include="..\..\..\..\src\base.c" />
     <ClCompile Include="..\..\..\..\src\bitmap.c" />
-    <ClCompile Include="..\..\..\..\src\chunk.c" />
     <ClCompile Include="..\..\..\..\src\ckh.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
     <ClCompile Include="..\..\..\..\src\extent.c" />
@@ -395,4 +393,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 57395e70..74b45112 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -59,9 +59,6 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\bitmap.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\chunk.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\ckh.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
@@ -190,9 +187,6 @@
     <ClCompile Include="..\..\..\..\src\bitmap.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\chunk.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\ckh.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -257,4 +251,4 @@
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/src/arena.c b/src/arena.c
index 3de02373..2b8aead7 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -229,13 +229,6 @@ static void
 arena_nactive_add(arena_t *arena, size_t add_pages)
 {
 
-	if (config_stats) {
-		size_t cactive_add = CHUNK_CEILING((arena->nactive +
-		    add_pages) << LG_PAGE) - CHUNK_CEILING(arena->nactive <<
-		    LG_PAGE);
-		if (cactive_add != 0)
-			stats_cactive_add(cactive_add);
-	}
 	arena->nactive += add_pages;
 }
 
@@ -244,12 +237,6 @@ arena_nactive_sub(arena_t *arena, size_t sub_pages)
 {
 
 	assert(arena->nactive >= sub_pages);
-	if (config_stats) {
-		size_t cactive_sub = CHUNK_CEILING(arena->nactive << LG_PAGE) -
-		    CHUNK_CEILING((arena->nactive - sub_pages) << LG_PAGE);
-		if (cactive_sub != 0)
-			stats_cactive_sub(cactive_sub);
-	}
 	arena->nactive -= sub_pages;
 }
 
diff --git a/src/base.c b/src/base.c
index 667786e1..9c3f36cd 100644
--- a/src/base.c
+++ b/src/base.c
@@ -41,7 +41,7 @@ static extent_t *
 base_extent_alloc(tsdn_t *tsdn, size_t minsize)
 {
 	extent_t *extent;
-	size_t csize, nsize;
+	size_t esize, nsize;
 	void *addr;
 
 	malloc_mutex_assert_owner(tsdn, &base_mtx);
@@ -49,7 +49,7 @@ base_extent_alloc(tsdn_t *tsdn, size_t minsize)
 	extent = base_extent_try_alloc(tsdn);
 	/* Allocate enough space to also carve an extent out if necessary. */
 	nsize = (extent == NULL) ? CACHELINE_CEILING(sizeof(extent_t)) : 0;
-	csize = CHUNK_CEILING(minsize + nsize);
+	esize = PAGE_CEILING(minsize + nsize);
 	/*
 	 * Directly call extent_alloc_mmap() because it's critical to allocate
 	 * untouched demand-zeroed virtual memory.
@@ -57,24 +57,24 @@ base_extent_alloc(tsdn_t *tsdn, size_t minsize)
 	{
 		bool zero = true;
 		bool commit = true;
-		addr = extent_alloc_mmap(NULL, csize, PAGE, &zero, &commit);
+		addr = extent_alloc_mmap(NULL, esize, PAGE, &zero, &commit);
 	}
 	if (addr == NULL) {
 		if (extent != NULL)
 			base_extent_dalloc(tsdn, extent);
 		return (NULL);
 	}
-	base_mapped += csize;
+	base_mapped += esize;
 	if (extent == NULL) {
 		extent = (extent_t *)addr;
 		addr = (void *)((uintptr_t)addr + nsize);
-		csize -= nsize;
+		esize -= nsize;
 		if (config_stats) {
 			base_allocated += nsize;
 			base_resident += PAGE_CEILING(nsize);
 		}
 	}
-	extent_init(extent, NULL, addr, csize, 0, true, true, true, false);
+	extent_init(extent, NULL, addr, esize, 0, true, true, true, false);
 	return (extent);
 }
 
diff --git a/src/chunk.c b/src/chunk.c
deleted file mode 100644
index d750f715..00000000
--- a/src/chunk.c
+++ /dev/null
@@ -1,51 +0,0 @@
-#define	JEMALLOC_CHUNK_C_
-#include "jemalloc/internal/jemalloc_internal.h"
-
-/******************************************************************************/
-/* Data. */
-
-const char	*opt_dss = DSS_DEFAULT;
-size_t		opt_lg_chunk = 0;
-
-/* Various chunk-related settings. */
-size_t		chunksize;
-size_t		chunksize_mask; /* (chunksize - 1). */
-size_t		chunk_npages;
-
-/******************************************************************************/
-
-bool
-chunk_boot(void)
-{
-#ifdef _WIN32
-	SYSTEM_INFO info;
-	GetSystemInfo(&info);
-
-	/*
-	 * Verify actual page size is equal to or an integral multiple of
-	 * configured page size.
-	 */
-	if (info.dwPageSize & ((1U << LG_PAGE) - 1))
-		return (true);
-
-	/*
-	 * Configure chunksize (if not set) to match granularity (usually 64K),
-	 * so pages_map will always take fast path.
-	 */
-	if (!opt_lg_chunk) {
-		opt_lg_chunk = ffs_u((unsigned)info.dwAllocationGranularity)
-		    - 1;
-	}
-#else
-	if (!opt_lg_chunk)
-		opt_lg_chunk = LG_CHUNK_DEFAULT;
-#endif
-
-	/* Set variables according to the value of opt_lg_chunk. */
-	chunksize = (ZU(1) << opt_lg_chunk);
-	assert(chunksize >= PAGE);
-	chunksize_mask = chunksize - 1;
-	chunk_npages = (chunksize >> LG_PAGE);
-
-	return (false);
-}
diff --git a/src/ctl.c b/src/ctl.c
index b00991a6..b4e2208c 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -88,7 +88,6 @@ CTL_PROTO(config_utrace)
 CTL_PROTO(config_xmalloc)
 CTL_PROTO(opt_abort)
 CTL_PROTO(opt_dss)
-CTL_PROTO(opt_lg_chunk)
 CTL_PROTO(opt_narenas)
 CTL_PROTO(opt_decay_time)
 CTL_PROTO(opt_stats_print)
@@ -177,7 +176,6 @@ CTL_PROTO(stats_arenas_i_nmadvise)
 CTL_PROTO(stats_arenas_i_purged)
 CTL_PROTO(stats_arenas_i_metadata)
 INDEX_PROTO(stats_arenas_i)
-CTL_PROTO(stats_cactive)
 CTL_PROTO(stats_allocated)
 CTL_PROTO(stats_active)
 CTL_PROTO(stats_metadata)
@@ -244,7 +242,6 @@ static const ctl_named_node_t	config_node[] = {
 static const ctl_named_node_t opt_node[] = {
 	{NAME("abort"),		CTL(opt_abort)},
 	{NAME("dss"),		CTL(opt_dss)},
-	{NAME("lg_chunk"),	CTL(opt_lg_chunk)},
 	{NAME("narenas"),	CTL(opt_narenas)},
 	{NAME("decay_time"),	CTL(opt_decay_time)},
 	{NAME("stats_print"),	CTL(opt_stats_print)},
@@ -410,7 +407,6 @@ static const ctl_indexed_node_t stats_arenas_node[] = {
 };
 
 static const ctl_named_node_t stats_node[] = {
-	{NAME("cactive"),	CTL(stats_cactive)},
 	{NAME("allocated"),	CTL(stats_allocated)},
 	{NAME("active"),	CTL(stats_active)},
 	{NAME("metadata"),	CTL(stats_metadata)},
@@ -1136,7 +1132,6 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool)
 
 CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
 CTL_RO_NL_GEN(opt_dss, opt_dss, const char *)
-CTL_RO_NL_GEN(opt_lg_chunk, opt_lg_chunk, size_t)
 CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned)
 CTL_RO_NL_GEN(opt_decay_time, opt_decay_time, ssize_t)
 CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool)
@@ -1888,7 +1883,6 @@ CTL_RO_NL_CGEN(config_prof, lg_prof_sample, lg_prof_sample, size_t)
 
 /******************************************************************************/
 
-CTL_RO_CGEN(config_stats, stats_cactive, &stats_cactive, size_t *)
 CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats.allocated, size_t)
 CTL_RO_CGEN(config_stats, stats_active, ctl_stats.active, size_t)
 CTL_RO_CGEN(config_stats, stats_metadata, ctl_stats.metadata, size_t)
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 9c5cd25a..e0e6635d 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -3,6 +3,8 @@
 /******************************************************************************/
 /* Data. */
 
+const char	*opt_dss = DSS_DEFAULT;
+
 const char	*dss_prec_names[] = {
 	"disabled",
 	"primary",
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 580b23f9..95cd0545 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1024,8 +1024,6 @@ malloc_conf_init(void)
 			}
 
 			CONF_HANDLE_BOOL(opt_abort, "abort", true)
-			CONF_HANDLE_SIZE_T(opt_lg_chunk, "lg_chunk", LG_PAGE,
-			    (sizeof(size_t) << 3) - 1, true)
 			if (strncmp("dss", k, klen) == 0) {
 				int i;
 				bool match = false;
@@ -1176,8 +1174,6 @@ malloc_init_hard_a0_locked()
 	pages_boot();
 	if (base_boot())
 		return (true);
-	if (chunk_boot())
-		return (true);
 	if (extent_boot())
 		return (true);
 	if (ctl_boot())
diff --git a/src/stats.c b/src/stats.c
index 185ccac6..ca716d5e 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -30,8 +30,6 @@
 
 bool	opt_stats_print = false;
 
-size_t	stats_cactive = 0;
-
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
@@ -416,7 +414,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		malloc_cprintf(write_cb, cbopaque,
 		    "Run-time option settings:\n");
 		OPT_WRITE_BOOL(abort)
-		OPT_WRITE_SIZE_T(lg_chunk)
 		OPT_WRITE_CHAR_P(dss)
 		OPT_WRITE_UNSIGNED(narenas)
 		OPT_WRITE_CHAR_P(purge)
@@ -486,16 +483,11 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 				    "Average profile dump interval: N/A\n");
 			}
 		}
-		CTL_GET("opt.lg_chunk", &sv, size_t);
-		malloc_cprintf(write_cb, cbopaque,
-		    "Chunk size: %zu (2^%zu)\n", (ZU(1) << sv), sv);
 	}
 
 	if (config_stats) {
-		size_t *cactive;
 		size_t allocated, active, metadata, resident, mapped, retained;
 
-		CTL_GET("stats.cactive", &cactive, size_t *);
 		CTL_GET("stats.allocated", &allocated, size_t);
 		CTL_GET("stats.active", &active, size_t);
 		CTL_GET("stats.metadata", &metadata, size_t);
@@ -506,9 +498,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    "Allocated: %zu, active: %zu, metadata: %zu,"
 		    " resident: %zu, mapped: %zu, retained: %zu\n",
 		    allocated, active, metadata, resident, mapped, retained);
-		malloc_cprintf(write_cb, cbopaque,
-		    "Current active ceiling: %zu\n",
-		    atomic_read_z(cactive));
 
 		if (merged) {
 			unsigned narenas;
diff --git a/test/unit/junk.c b/test/unit/junk.c
index dea0f615..fe453b6c 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -140,7 +140,7 @@ TEST_BEGIN(test_junk_large)
 {
 
 	test_skip_if(!config_fill);
-	test_junk(SMALL_MAXCLASS+1, chunksize*2);
+	test_junk(SMALL_MAXCLASS+1, (1U << (LG_LARGE_MINCLASS+1)));
 }
 TEST_END
 
diff --git a/test/unit/lg_chunk.c b/test/unit/lg_chunk.c
deleted file mode 100644
index 7e5df381..00000000
--- a/test/unit/lg_chunk.c
+++ /dev/null
@@ -1,26 +0,0 @@
-#include "test/jemalloc_test.h"
-
-/*
- * Make sure that opt.lg_chunk clamping is sufficient.  In practice, this test
- * program will fail a debug assertion during initialization and abort (rather
- * than the test soft-failing) if clamping is insufficient.
- */
-const char *malloc_conf = "lg_chunk:0";
-
-TEST_BEGIN(test_lg_chunk_clamp)
-{
-	void *p;
-
-	p = mallocx(1, 0);
-	assert_ptr_not_null(p, "Unexpected mallocx() failure");
-	dallocx(p, 0);
-}
-TEST_END
-
-int
-main(void)
-{
-
-	return (test(
-	    test_lg_chunk_clamp));
-}
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index ee57dd5c..0e979a11 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -160,7 +160,6 @@ TEST_BEGIN(test_mallctl_opt)
 } while (0)
 
 	TEST_MALLCTL_OPT(bool, abort, always);
-	TEST_MALLCTL_OPT(size_t, lg_chunk, always);
 	TEST_MALLCTL_OPT(const char *, dss, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);
 	TEST_MALLCTL_OPT(ssize_t, decay_time, always);
diff --git a/test/unit/prof_gdump.c b/test/unit/prof_gdump.c
index a0e6ee92..ca93f300 100644
--- a/test/unit/prof_gdump.c
+++ b/test/unit/prof_gdump.c
@@ -34,12 +34,12 @@ TEST_BEGIN(test_gdump)
 	prof_dump_open = prof_dump_open_intercept;
 
 	did_prof_dump_open = false;
-	p = mallocx(chunksize, 0);
+	p = mallocx((1U << LG_LARGE_MINCLASS), 0);
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
 	assert_true(did_prof_dump_open, "Expected a profile dump");
 
 	did_prof_dump_open = false;
-	q = mallocx(chunksize, 0);
+	q = mallocx((1U << LG_LARGE_MINCLASS), 0);
 	assert_ptr_not_null(q, "Unexpected mallocx() failure");
 	assert_true(did_prof_dump_open, "Expected a profile dump");
 
@@ -50,7 +50,7 @@ TEST_BEGIN(test_gdump)
 	    "Unexpected mallctl failure while disabling prof.gdump");
 	assert(gdump_old);
 	did_prof_dump_open = false;
-	r = mallocx(chunksize, 0);
+	r = mallocx((1U << LG_LARGE_MINCLASS), 0);
 	assert_ptr_not_null(q, "Unexpected mallocx() failure");
 	assert_false(did_prof_dump_open, "Unexpected profile dump");
 
@@ -61,7 +61,7 @@ TEST_BEGIN(test_gdump)
 	    "Unexpected mallctl failure while enabling prof.gdump");
 	assert(!gdump_old);
 	did_prof_dump_open = false;
-	s = mallocx(chunksize, 0);
+	s = mallocx((1U << LG_LARGE_MINCLASS), 0);
 	assert_ptr_not_null(q, "Unexpected mallocx() failure");
 	assert_true(did_prof_dump_open, "Expected a profile dump");
 
diff --git a/test/unit/stats.c b/test/unit/stats.c
index 9fa9cead..ed0d3fe9 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -2,14 +2,9 @@
 
 TEST_BEGIN(test_stats_summary)
 {
-	size_t *cactive;
 	size_t sz, allocated, active, resident, mapped;
 	int expected = config_stats ? 0 : ENOENT;
 
-	sz = sizeof(cactive);
-	assert_d_eq(mallctl("stats.cactive", &cactive, &sz, NULL, 0), expected,
-	    "Unexpected mallctl() result");
-
 	sz = sizeof(size_t);
 	assert_d_eq(mallctl("stats.allocated", &allocated, &sz, NULL, 0),
 	    expected, "Unexpected mallctl() result");
@@ -21,8 +16,6 @@ TEST_BEGIN(test_stats_summary)
 	    "Unexpected mallctl() result");
 
 	if (config_stats) {
-		assert_zu_le(active, *cactive,
-		    "active should be no larger than cactive");
 		assert_zu_le(allocated, active,
 		    "allocated should be no larger than active");
 		assert_zu_lt(active, resident,
@@ -88,12 +81,14 @@ TEST_BEGIN(test_stats_arenas_summary)
 
 	little = mallocx(SMALL_MAXCLASS, 0);
 	assert_ptr_not_null(little, "Unexpected mallocx() failure");
-	large = mallocx(chunksize, 0);
+	large = mallocx((1U << LG_LARGE_MINCLASS), 0);
 	assert_ptr_not_null(large, "Unexpected mallocx() failure");
 
 	dallocx(little, 0);
 	dallocx(large, 0);
 
+	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
+	    config_tcache ? 0 : ENOENT, "Unexpected mallctl() result");
 	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
@@ -197,7 +192,7 @@ TEST_BEGIN(test_stats_arenas_large)
 	assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)),
 	    0, "Unexpected mallctl() failure");
 
-	p = mallocx(chunksize, 0);
+	p = mallocx((1U << LG_LARGE_MINCLASS), 0);
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
 
 	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0,
@@ -336,7 +331,7 @@ TEST_BEGIN(test_stats_arenas_lextents)
 		assert_u64_ge(nmalloc, ndalloc,
 		    "nmalloc should be at least as large as ndalloc");
 		assert_u64_gt(curlextents, 0,
-		    "At least one chunk should be currently allocated");
+		    "At least one extent should be currently allocated");
 	}
 
 	dallocx(p, 0);
diff --git a/test/unit/zero.c b/test/unit/zero.c
index 3c35f4bd..c025c831 100644
--- a/test/unit/zero.c
+++ b/test/unit/zero.c
@@ -57,7 +57,7 @@ TEST_BEGIN(test_zero_large)
 {
 
 	test_skip_if(!config_fill);
-	test_zero(SMALL_MAXCLASS+1, chunksize*2);
+	test_zero(SMALL_MAXCLASS+1, (1U << (LG_LARGE_MINCLASS+1)));
 }
 TEST_END
 

From a2539fab95008bee7fc1e4651d24b6a0427b88ce Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 12 Oct 2016 22:58:40 -0700
Subject: [PATCH 0411/2608] Disallow 0x5a junk filling when running in
 Valgrind.

Explicitly disallow junk:true and junk:free runtime settings when
running in Valgrind, since deallocation-time junk filling and redzone
validation cause false positive Valgrind reports.

This resolves #470.
---
 src/jemalloc.c | 34 ++++++++++++++++++++++++++++------
 1 file changed, 28 insertions(+), 6 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index d3bb596d..8731934f 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1161,9 +1161,20 @@ malloc_conf_init(void)
 			if (config_fill) {
 				if (CONF_MATCH("junk")) {
 					if (CONF_MATCH_VALUE("true")) {
-						opt_junk = "true";
-						opt_junk_alloc = opt_junk_free =
-						    true;
+						if (config_valgrind &&
+						    unlikely(in_valgrind)) {
+							malloc_conf_error(
+							"Deallocation-time "
+							"junk filling cannot "
+							"be enabled while "
+							"running inside "
+							"Valgrind", k, klen, v,
+							vlen);
+						} else {
+							opt_junk = "true";
+							opt_junk_alloc = true;
+							opt_junk_free = true;
+						}
 					} else if (CONF_MATCH_VALUE("false")) {
 						opt_junk = "false";
 						opt_junk_alloc = opt_junk_free =
@@ -1173,9 +1184,20 @@ malloc_conf_init(void)
 						opt_junk_alloc = true;
 						opt_junk_free = false;
 					} else if (CONF_MATCH_VALUE("free")) {
-						opt_junk = "free";
-						opt_junk_alloc = false;
-						opt_junk_free = true;
+						if (config_valgrind &&
+						    unlikely(in_valgrind)) {
+							malloc_conf_error(
+							"Deallocation-time "
+							"junk filling cannot "
+							"be enabled while "
+							"running inside "
+							"Valgrind", k, klen, v,
+							vlen);
+						} else {
+							opt_junk = "free";
+							opt_junk_alloc = false;
+							opt_junk_free = true;
+						}
 					} else {
 						malloc_conf_error(
 						    "Invalid conf value", k,

From e5effef428b5bf941e1697f6000c97f1ce734756 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 13 Oct 2016 14:47:50 -0700
Subject: [PATCH 0412/2608] Add/use adaptive spinning.

Add spin_t and spin_{init,adaptive}(), which provide a simple
abstraction for adaptive spinning.

Adaptively spin during busy waits in bootstrapping and rtree node
initialization.
---
 Makefile.in                                   |  1 +
 .../jemalloc/internal/jemalloc_internal.h.in  |  4 ++
 include/jemalloc/internal/spin.h              | 51 +++++++++++++++++++
 src/jemalloc.c                                |  5 +-
 src/rtree.c                                   |  5 +-
 src/spin.c                                    |  2 +
 6 files changed, 66 insertions(+), 2 deletions(-)
 create mode 100644 include/jemalloc/internal/spin.h
 create mode 100644 src/spin.c

diff --git a/Makefile.in b/Makefile.in
index f6f06211..9e063095 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -98,6 +98,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/prof.c \
 	$(srcroot)src/rtree.c \
 	$(srcroot)src/stats.c \
+	$(srcroot)src/spin.c \
 	$(srcroot)src/tcache.c \
 	$(srcroot)src/ticker.c \
 	$(srcroot)src/tsd.c \
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index b69ddb18..1d02c20e 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -346,6 +346,7 @@ typedef unsigned szind_t;
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/spin.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/ckh.h"
@@ -375,6 +376,7 @@ typedef unsigned szind_t;
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/spin.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/ckh.h"
@@ -465,6 +467,7 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/spin.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/ckh.h"
@@ -494,6 +497,7 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/spin.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/ckh.h"
diff --git a/include/jemalloc/internal/spin.h b/include/jemalloc/internal/spin.h
new file mode 100644
index 00000000..9ef5ceb9
--- /dev/null
+++ b/include/jemalloc/internal/spin.h
@@ -0,0 +1,51 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct spin_s spin_t;
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+struct spin_s {
+	unsigned iteration;
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+void	spin_init(spin_t *spin);
+void	spin_adaptive(spin_t *spin);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_SPIN_C_))
+JEMALLOC_INLINE void
+spin_init(spin_t *spin)
+{
+
+	spin->iteration = 0;
+}
+
+JEMALLOC_INLINE void
+spin_adaptive(spin_t *spin)
+{
+	volatile uint64_t i;
+
+	for (i = 0; i < (KQU(1) << spin->iteration); i++)
+		CPU_SPINWAIT;
+
+	if (spin->iteration < 63)
+		spin->iteration++;
+}
+
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
+
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 95cd0545..0348b8ac 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1142,10 +1142,13 @@ malloc_init_hard_needed(void)
 	}
 #ifdef JEMALLOC_THREADED_INIT
 	if (malloc_initializer != NO_INITIALIZER && !IS_INITIALIZER) {
+		spin_t spinner;
+
 		/* Busy-wait until the initializing thread completes. */
+		spin_init(&spinner);
 		do {
 			malloc_mutex_unlock(TSDN_NULL, &init_lock);
-			CPU_SPINWAIT;
+			spin_adaptive(&spinner);
 			malloc_mutex_lock(TSDN_NULL, &init_lock);
 		} while (!malloc_initialized());
 		return (false);
diff --git a/src/rtree.c b/src/rtree.c
index 421de3e8..d4a705ae 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -136,12 +136,15 @@ rtree_node_init(tsdn_t *tsdn, rtree_t *rtree, unsigned level,
 	rtree_elm_t *node;
 
 	if (atomic_cas_p((void **)elmp, NULL, RTREE_NODE_INITIALIZING)) {
+		spin_t spinner;
+
 		/*
 		 * Another thread is already in the process of initializing.
 		 * Spin-wait until initialization is complete.
 		 */
+		spin_init(&spinner);
 		do {
-			CPU_SPINWAIT;
+			spin_adaptive(&spinner);
 			node = atomic_read_p((void **)elmp);
 		} while (node == RTREE_NODE_INITIALIZING);
 	} else {
diff --git a/src/spin.c b/src/spin.c
new file mode 100644
index 00000000..5242d95a
--- /dev/null
+++ b/src/spin.c
@@ -0,0 +1,2 @@
+#define	JEMALLOC_SPIN_C_
+#include "jemalloc/internal/jemalloc_internal.h"

From 9737685943fedf5796ff157306ca70aaa25750c7 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 13 Oct 2016 14:47:50 -0700
Subject: [PATCH 0413/2608] Add/use adaptive spinning.

Add spin_t and spin_{init,adaptive}(), which provide a simple
abstraction for adaptive spinning.

Adaptively spin during busy waits in bootstrapping and rtree node
initialization.
---
 Makefile.in                                   |  1 +
 .../jemalloc/internal/jemalloc_internal.h.in  |  4 ++
 include/jemalloc/internal/spin.h              | 51 +++++++++++++++++++
 src/jemalloc.c                                |  5 +-
 src/rtree.c                                   |  5 +-
 src/spin.c                                    |  2 +
 6 files changed, 66 insertions(+), 2 deletions(-)
 create mode 100644 include/jemalloc/internal/spin.h
 create mode 100644 src/spin.c

diff --git a/Makefile.in b/Makefile.in
index 8789c451..a2d5594b 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -101,6 +101,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/quarantine.c \
 	$(srcroot)src/rtree.c \
 	$(srcroot)src/stats.c \
+	$(srcroot)src/spin.c \
 	$(srcroot)src/tcache.c \
 	$(srcroot)src/ticker.c \
 	$(srcroot)src/tsd.c \
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 9708df99..d644cea3 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -369,6 +369,7 @@ typedef unsigned szind_t;
 #include "jemalloc/internal/valgrind.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/spin.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/ckh.h"
@@ -401,6 +402,7 @@ typedef unsigned szind_t;
 #include "jemalloc/internal/valgrind.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/spin.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/ckh.h"
@@ -502,6 +504,7 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/valgrind.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/spin.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/ckh.h"
@@ -534,6 +537,7 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/valgrind.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/spin.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/ckh.h"
diff --git a/include/jemalloc/internal/spin.h b/include/jemalloc/internal/spin.h
new file mode 100644
index 00000000..9ef5ceb9
--- /dev/null
+++ b/include/jemalloc/internal/spin.h
@@ -0,0 +1,51 @@
+/******************************************************************************/
+#ifdef JEMALLOC_H_TYPES
+
+typedef struct spin_s spin_t;
+
+#endif /* JEMALLOC_H_TYPES */
+/******************************************************************************/
+#ifdef JEMALLOC_H_STRUCTS
+
+struct spin_s {
+	unsigned iteration;
+};
+
+#endif /* JEMALLOC_H_STRUCTS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_EXTERNS
+
+#endif /* JEMALLOC_H_EXTERNS */
+/******************************************************************************/
+#ifdef JEMALLOC_H_INLINES
+
+#ifndef JEMALLOC_ENABLE_INLINE
+void	spin_init(spin_t *spin);
+void	spin_adaptive(spin_t *spin);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_SPIN_C_))
+JEMALLOC_INLINE void
+spin_init(spin_t *spin)
+{
+
+	spin->iteration = 0;
+}
+
+JEMALLOC_INLINE void
+spin_adaptive(spin_t *spin)
+{
+	volatile uint64_t i;
+
+	for (i = 0; i < (KQU(1) << spin->iteration); i++)
+		CPU_SPINWAIT;
+
+	if (spin->iteration < 63)
+		spin->iteration++;
+}
+
+#endif
+
+#endif /* JEMALLOC_H_INLINES */
+/******************************************************************************/
+
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 8731934f..aec2a5eb 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1283,10 +1283,13 @@ malloc_init_hard_needed(void)
 	}
 #ifdef JEMALLOC_THREADED_INIT
 	if (malloc_initializer != NO_INITIALIZER && !IS_INITIALIZER) {
+		spin_t spinner;
+
 		/* Busy-wait until the initializing thread completes. */
+		spin_init(&spinner);
 		do {
 			malloc_mutex_unlock(TSDN_NULL, &init_lock);
-			CPU_SPINWAIT;
+			spin_adaptive(&spinner);
 			malloc_mutex_lock(TSDN_NULL, &init_lock);
 		} while (!malloc_initialized());
 		return (false);
diff --git a/src/rtree.c b/src/rtree.c
index 3166b45f..5590034b 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -96,12 +96,15 @@ rtree_node_init(rtree_t *rtree, unsigned level, rtree_node_elm_t **elmp)
 	rtree_node_elm_t *node;
 
 	if (atomic_cas_p((void **)elmp, NULL, RTREE_NODE_INITIALIZING)) {
+		spin_t spinner;
+
 		/*
 		 * Another thread is already in the process of initializing.
 		 * Spin-wait until initialization is complete.
 		 */
+		spin_init(&spinner);
 		do {
-			CPU_SPINWAIT;
+			spin_adaptive(&spinner);
 			node = atomic_read_p((void **)elmp);
 		} while (node == RTREE_NODE_INITIALIZING);
 	} else {
diff --git a/src/spin.c b/src/spin.c
new file mode 100644
index 00000000..5242d95a
--- /dev/null
+++ b/src/spin.c
@@ -0,0 +1,2 @@
+#define	JEMALLOC_SPIN_C_
+#include "jemalloc/internal/jemalloc_internal.h"

From e2bcf037d445a84a71c7997670819ebd0a893b4a Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 13 Oct 2016 12:18:38 -0700
Subject: [PATCH 0414/2608] Make dss operations lockless.

Rather than protecting dss operations with a mutex, use atomic
operations.  This has negligible impact on synchronization overhead
during typical dss allocation, but is a substantial improvement for
chunk_in_dss() and the newly added chunk_dss_mergeable(), which can be
called multiple times during chunk deallocations.

This change also has the advantage of avoiding tsd in deallocation paths
associated with purging, which resolves potential deadlocks during
thread exit due to attempted tsd resurrection.

This resolves #425.
---
 include/jemalloc/internal/chunk.h             |   3 -
 include/jemalloc/internal/chunk_dss.h         |  12 +-
 include/jemalloc/internal/huge.h              |   2 +-
 include/jemalloc/internal/private_symbols.txt |   7 +-
 src/arena.c                                   |   2 +-
 src/chunk.c                                   |  46 +----
 src/chunk_dss.c                               | 193 ++++++++++--------
 src/ctl.c                                     |   4 +-
 src/huge.c                                    |   8 +-
 src/jemalloc.c                                |   6 +-
 test/unit/junk.c                              |   4 +-
 11 files changed, 134 insertions(+), 153 deletions(-)

diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index c9fd4ecb..e199a037 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -71,9 +71,6 @@ bool	chunk_purge_wrapper(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *chunk, size_t size, size_t offset,
     size_t length);
 bool	chunk_boot(void);
-void	chunk_prefork(tsdn_t *tsdn);
-void	chunk_postfork_parent(tsdn_t *tsdn);
-void	chunk_postfork_child(tsdn_t *tsdn);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/chunk_dss.h b/include/jemalloc/internal/chunk_dss.h
index 724fa579..da8511ba 100644
--- a/include/jemalloc/internal/chunk_dss.h
+++ b/include/jemalloc/internal/chunk_dss.h
@@ -21,15 +21,13 @@ extern const char *dss_prec_names[];
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-dss_prec_t	chunk_dss_prec_get(tsdn_t *tsdn);
-bool	chunk_dss_prec_set(tsdn_t *tsdn, dss_prec_t dss_prec);
+dss_prec_t	chunk_dss_prec_get(void);
+bool	chunk_dss_prec_set(dss_prec_t dss_prec);
 void	*chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr,
     size_t size, size_t alignment, bool *zero, bool *commit);
-bool	chunk_in_dss(tsdn_t *tsdn, void *chunk);
-bool	chunk_dss_boot(void);
-void	chunk_dss_prefork(tsdn_t *tsdn);
-void	chunk_dss_postfork_parent(tsdn_t *tsdn);
-void	chunk_dss_postfork_child(tsdn_t *tsdn);
+bool	chunk_in_dss(void *chunk);
+bool	chunk_dss_mergeable(void *chunk_a, void *chunk_b);
+void	chunk_dss_boot(void);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/huge.h b/include/jemalloc/internal/huge.h
index b5fa9e63..22184d9b 100644
--- a/include/jemalloc/internal/huge.h
+++ b/include/jemalloc/internal/huge.h
@@ -17,7 +17,7 @@ bool	huge_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize,
 void	*huge_ralloc(tsd_t *tsd, arena_t *arena, void *ptr, size_t oldsize,
     size_t usize, size_t alignment, bool zero, tcache_t *tcache);
 #ifdef JEMALLOC_JET
-typedef void (huge_dalloc_junk_t)(tsdn_t *, void *, size_t);
+typedef void (huge_dalloc_junk_t)(void *, size_t);
 extern huge_dalloc_junk_t *huge_dalloc_junk;
 #endif
 void	huge_dalloc(tsdn_t *tsdn, void *ptr);
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index cd6681c8..642c3de7 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -167,20 +167,15 @@ chunk_dalloc_mmap
 chunk_dalloc_wrapper
 chunk_deregister
 chunk_dss_boot
-chunk_dss_postfork_child
-chunk_dss_postfork_parent
+chunk_dss_mergeable
 chunk_dss_prec_get
 chunk_dss_prec_set
-chunk_dss_prefork
 chunk_hooks_default
 chunk_hooks_get
 chunk_hooks_set
 chunk_in_dss
 chunk_lookup
 chunk_npages
-chunk_postfork_child
-chunk_postfork_parent
-chunk_prefork
 chunk_purge_wrapper
 chunk_register
 chunks_rtree
diff --git a/src/arena.c b/src/arena.c
index 90b9d822..76514955 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -3499,7 +3499,7 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 		    (uint64_t)(uintptr_t)arena;
 	}
 
-	arena->dss_prec = chunk_dss_prec_get(tsdn);
+	arena->dss_prec = chunk_dss_prec_get();
 
 	ql_new(&arena->achunks);
 
diff --git a/src/chunk.c b/src/chunk.c
index dff537f5..302b98cb 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -611,10 +611,10 @@ chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 }
 
 static bool
-chunk_dalloc_default_impl(tsdn_t *tsdn, void *chunk, size_t size)
+chunk_dalloc_default_impl(void *chunk, size_t size)
 {
 
-	if (!have_dss || !chunk_in_dss(tsdn, chunk))
+	if (!have_dss || !chunk_in_dss(chunk))
 		return (chunk_dalloc_mmap(chunk, size));
 	return (true);
 }
@@ -623,11 +623,8 @@ static bool
 chunk_dalloc_default(void *chunk, size_t size, bool committed,
     unsigned arena_ind)
 {
-	tsdn_t *tsdn;
 
-	tsdn = tsdn_fetch();
-
-	return (chunk_dalloc_default_impl(tsdn, chunk, size));
+	return (chunk_dalloc_default_impl(chunk, size));
 }
 
 void
@@ -645,7 +642,7 @@ chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	/* Try to deallocate. */
 	if (chunk_hooks->dalloc == chunk_dalloc_default) {
 		/* Call directly to propagate tsdn. */
-		err = chunk_dalloc_default_impl(tsdn, chunk, size);
+		err = chunk_dalloc_default_impl(chunk, size);
 	} else
 		err = chunk_hooks->dalloc(chunk, size, committed, arena->ind);
 
@@ -718,13 +715,12 @@ chunk_split_default(void *chunk, size_t size, size_t size_a, size_t size_b,
 }
 
 static bool
-chunk_merge_default_impl(tsdn_t *tsdn, void *chunk_a, void *chunk_b)
+chunk_merge_default_impl(void *chunk_a, void *chunk_b)
 {
 
 	if (!maps_coalesce)
 		return (true);
-	if (have_dss && chunk_in_dss(tsdn, chunk_a) != chunk_in_dss(tsdn,
-	    chunk_b))
+	if (have_dss && !chunk_dss_mergeable(chunk_a, chunk_b))
 		return (true);
 
 	return (false);
@@ -734,11 +730,8 @@ static bool
 chunk_merge_default(void *chunk_a, size_t size_a, void *chunk_b, size_t size_b,
     bool committed, unsigned arena_ind)
 {
-	tsdn_t *tsdn;
 
-	tsdn = tsdn_fetch();
-
-	return (chunk_merge_default_impl(tsdn, chunk_a, chunk_b));
+	return (chunk_merge_default_impl(chunk_a, chunk_b));
 }
 
 static rtree_node_elm_t *
@@ -782,32 +775,11 @@ chunk_boot(void)
 	chunksize_mask = chunksize - 1;
 	chunk_npages = (chunksize >> LG_PAGE);
 
-	if (have_dss && chunk_dss_boot())
-		return (true);
+	if (have_dss)
+		chunk_dss_boot();
 	if (rtree_new(&chunks_rtree, (unsigned)((ZU(1) << (LG_SIZEOF_PTR+3)) -
 	    opt_lg_chunk), chunks_rtree_node_alloc, NULL))
 		return (true);
 
 	return (false);
 }
-
-void
-chunk_prefork(tsdn_t *tsdn)
-{
-
-	chunk_dss_prefork(tsdn);
-}
-
-void
-chunk_postfork_parent(tsdn_t *tsdn)
-{
-
-	chunk_dss_postfork_parent(tsdn);
-}
-
-void
-chunk_postfork_child(tsdn_t *tsdn)
-{
-
-	chunk_dss_postfork_child(tsdn);
-}
diff --git a/src/chunk_dss.c b/src/chunk_dss.c
index 0b1f82bd..85a13548 100644
--- a/src/chunk_dss.c
+++ b/src/chunk_dss.c
@@ -10,20 +10,19 @@ const char	*dss_prec_names[] = {
 	"N/A"
 };
 
-/* Current dss precedence default, used when creating new arenas. */
-static dss_prec_t	dss_prec_default = DSS_PREC_DEFAULT;
-
 /*
- * Protects sbrk() calls.  This avoids malloc races among threads, though it
- * does not protect against races with threads that call sbrk() directly.
+ * Current dss precedence default, used when creating new arenas.  NB: This is
+ * stored as unsigned rather than dss_prec_t because in principle there's no
+ * guarantee that sizeof(dss_prec_t) is the same as sizeof(unsigned), and we use
+ * atomic operations to synchronize the setting.
  */
-static malloc_mutex_t	dss_mtx;
+static unsigned		dss_prec_default = (unsigned)DSS_PREC_DEFAULT;
 
 /* Base address of the DSS. */
 static void		*dss_base;
-/* Current end of the DSS, or ((void *)-1) if the DSS is exhausted. */
-static void		*dss_prev;
-/* Current upper limit on DSS addresses. */
+/* Atomic boolean indicating whether the DSS is exhausted. */
+static unsigned		dss_exhausted;
+/* Atomic current upper limit on DSS addresses. */
 static void		*dss_max;
 
 /******************************************************************************/
@@ -41,30 +40,59 @@ chunk_dss_sbrk(intptr_t increment)
 }
 
 dss_prec_t
-chunk_dss_prec_get(tsdn_t *tsdn)
+chunk_dss_prec_get(void)
 {
 	dss_prec_t ret;
 
 	if (!have_dss)
 		return (dss_prec_disabled);
-	malloc_mutex_lock(tsdn, &dss_mtx);
-	ret = dss_prec_default;
-	malloc_mutex_unlock(tsdn, &dss_mtx);
+	ret = (dss_prec_t)atomic_read_u(&dss_prec_default);
 	return (ret);
 }
 
 bool
-chunk_dss_prec_set(tsdn_t *tsdn, dss_prec_t dss_prec)
+chunk_dss_prec_set(dss_prec_t dss_prec)
 {
 
 	if (!have_dss)
 		return (dss_prec != dss_prec_disabled);
-	malloc_mutex_lock(tsdn, &dss_mtx);
-	dss_prec_default = dss_prec;
-	malloc_mutex_unlock(tsdn, &dss_mtx);
+	atomic_write_u(&dss_prec_default, (unsigned)dss_prec);
 	return (false);
 }
 
+static void *
+chunk_dss_max_update(void *new_addr)
+{
+	void *max_cur;
+	spin_t spinner;
+
+	/*
+	 * Get the current end of the DSS as max_cur and assure that dss_max is
+	 * up to date.
+	 */
+	spin_init(&spinner);
+	while (true) {
+		void *max_prev = atomic_read_p(&dss_max);
+
+		max_cur = chunk_dss_sbrk(0);
+		if ((uintptr_t)max_prev > (uintptr_t)max_cur) {
+			/*
+			 * Another thread optimistically updated dss_max.  Wait
+			 * for it to finish.
+			 */
+			spin_adaptive(&spinner);
+			continue;
+		}
+		if (!atomic_cas_p(&dss_max, max_prev, max_cur))
+			break;
+	}
+	/* Fixed new_addr can only be supported if it is at the edge of DSS. */
+	if (new_addr != NULL && max_cur != new_addr)
+		return (NULL);
+
+	return (max_cur);
+}
+
 void *
 chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit)
@@ -80,28 +108,20 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 	if ((intptr_t)size < 0)
 		return (NULL);
 
-	malloc_mutex_lock(tsdn, &dss_mtx);
-	if (dss_prev != (void *)-1) {
-
+	if (!atomic_read_u(&dss_exhausted)) {
 		/*
 		 * The loop is necessary to recover from races with other
 		 * threads that are using the DSS for something other than
 		 * malloc.
 		 */
-		do {
-			void *ret, *cpad, *dss_next;
+		while (true) {
+			void *ret, *cpad, *max_cur, *dss_next, *dss_prev;
 			size_t gap_size, cpad_size;
 			intptr_t incr;
-			/* Avoid an unnecessary system call. */
-			if (new_addr != NULL && dss_max != new_addr)
-				break;
 
-			/* Get the current end of the DSS. */
-			dss_max = chunk_dss_sbrk(0);
-
-			/* Make sure the earlier condition still holds. */
-			if (new_addr != NULL && dss_max != new_addr)
-				break;
+			max_cur = chunk_dss_max_update(new_addr);
+			if (max_cur == NULL)
+				goto label_oom;
 
 			/*
 			 * Calculate how much padding is necessary to
@@ -120,17 +140,23 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			cpad_size = (uintptr_t)ret - (uintptr_t)cpad;
 			dss_next = (void *)((uintptr_t)ret + size);
 			if ((uintptr_t)ret < (uintptr_t)dss_max ||
-			    (uintptr_t)dss_next < (uintptr_t)dss_max) {
-				/* Wrap-around. */
-				malloc_mutex_unlock(tsdn, &dss_mtx);
-				return (NULL);
-			}
+			    (uintptr_t)dss_next < (uintptr_t)dss_max)
+				goto label_oom; /* Wrap-around. */
 			incr = gap_size + cpad_size + size;
+
+			/*
+			 * Optimistically update dss_max, and roll back below if
+			 * sbrk() fails.  No other thread will try to extend the
+			 * DSS while dss_max is greater than the current DSS
+			 * max reported by sbrk(0).
+			 */
+			if (atomic_cas_p(&dss_max, max_cur, dss_next))
+				continue;
+
+			/* Try to allocate. */
 			dss_prev = chunk_dss_sbrk(incr);
-			if (dss_prev == dss_max) {
+			if (dss_prev == max_cur) {
 				/* Success. */
-				dss_max = dss_next;
-				malloc_mutex_unlock(tsdn, &dss_mtx);
 				if (cpad_size != 0) {
 					chunk_hooks_t chunk_hooks =
 					    CHUNK_HOOKS_INITIALIZER;
@@ -147,68 +173,65 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					*commit = pages_decommit(ret, size);
 				return (ret);
 			}
-		} while (dss_prev != (void *)-1);
-	}
-	malloc_mutex_unlock(tsdn, &dss_mtx);
 
+			/*
+			 * Failure, whether due to OOM or a race with a raw
+			 * sbrk() call from outside the allocator.  Try to roll
+			 * back optimistic dss_max update; if rollback fails,
+			 * it's due to another caller of this function having
+			 * succeeded since this invocation started, in which
+			 * case rollback is not necessary.
+			 */
+			atomic_cas_p(&dss_max, dss_next, max_cur);
+			if (dss_prev == (void *)-1) {
+				/* OOM. */
+				atomic_write_u(&dss_exhausted, (unsigned)true);
+				goto label_oom;
+			}
+		}
+	}
+label_oom:
 	return (NULL);
 }
 
-bool
-chunk_in_dss(tsdn_t *tsdn, void *chunk)
+static bool
+chunk_in_dss_helper(void *chunk, void *max)
 {
-	bool ret;
 
-	cassert(have_dss);
-
-	malloc_mutex_lock(tsdn, &dss_mtx);
-	if ((uintptr_t)chunk >= (uintptr_t)dss_base
-	    && (uintptr_t)chunk < (uintptr_t)dss_max)
-		ret = true;
-	else
-		ret = false;
-	malloc_mutex_unlock(tsdn, &dss_mtx);
-
-	return (ret);
+	return ((uintptr_t)chunk >= (uintptr_t)dss_base && (uintptr_t)chunk <
+	    (uintptr_t)max);
 }
 
 bool
+chunk_in_dss(void *chunk)
+{
+
+	cassert(have_dss);
+
+	return (chunk_in_dss_helper(chunk, atomic_read_p(&dss_max)));
+}
+
+bool
+chunk_dss_mergeable(void *chunk_a, void *chunk_b)
+{
+	void *max;
+
+	cassert(have_dss);
+
+	max = atomic_read_p(&dss_max);
+	return (chunk_in_dss_helper(chunk_a, max) ==
+	    chunk_in_dss_helper(chunk_b, max));
+}
+
+void
 chunk_dss_boot(void)
 {
 
 	cassert(have_dss);
 
-	if (malloc_mutex_init(&dss_mtx, "dss", WITNESS_RANK_DSS))
-		return (true);
 	dss_base = chunk_dss_sbrk(0);
-	dss_prev = dss_base;
+	dss_exhausted = (unsigned)(dss_base == (void *)-1);
 	dss_max = dss_base;
-
-	return (false);
-}
-
-void
-chunk_dss_prefork(tsdn_t *tsdn)
-{
-
-	if (have_dss)
-		malloc_mutex_prefork(tsdn, &dss_mtx);
-}
-
-void
-chunk_dss_postfork_parent(tsdn_t *tsdn)
-{
-
-	if (have_dss)
-		malloc_mutex_postfork_parent(tsdn, &dss_mtx);
-}
-
-void
-chunk_dss_postfork_child(tsdn_t *tsdn)
-{
-
-	if (have_dss)
-		malloc_mutex_postfork_child(tsdn, &dss_mtx);
 }
 
 /******************************************************************************/
diff --git a/src/ctl.c b/src/ctl.c
index dad80086..5d2c8db4 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1685,11 +1685,11 @@ arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 		dss_prec_old = arena_dss_prec_get(tsd_tsdn(tsd), arena);
 	} else {
 		if (dss_prec != dss_prec_limit &&
-		    chunk_dss_prec_set(tsd_tsdn(tsd), dss_prec)) {
+		    chunk_dss_prec_set(dss_prec)) {
 			ret = EFAULT;
 			goto label_return;
 		}
-		dss_prec_old = chunk_dss_prec_get(tsd_tsdn(tsd));
+		dss_prec_old = chunk_dss_prec_get();
 	}
 
 	dss = dss_prec_names[dss_prec_old];
diff --git a/src/huge.c b/src/huge.c
index 3a2877ca..19ca3f03 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -114,7 +114,7 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 #define	huge_dalloc_junk JEMALLOC_N(huge_dalloc_junk_impl)
 #endif
 static void
-huge_dalloc_junk(tsdn_t *tsdn, void *ptr, size_t usize)
+huge_dalloc_junk(void *ptr, size_t usize)
 {
 
 	if (config_fill && have_dss && unlikely(opt_junk_free)) {
@@ -122,7 +122,7 @@ huge_dalloc_junk(tsdn_t *tsdn, void *ptr, size_t usize)
 		 * Only bother junk filling if the chunk isn't about to be
 		 * unmapped.
 		 */
-		if (!config_munmap || (have_dss && chunk_in_dss(tsdn, ptr)))
+		if (!config_munmap || (have_dss && chunk_in_dss(ptr)))
 			memset(ptr, JEMALLOC_FREE_JUNK, usize);
 	}
 }
@@ -221,7 +221,7 @@ huge_ralloc_no_move_shrink(tsdn_t *tsdn, void *ptr, size_t oldsize,
 	if (oldsize > usize) {
 		size_t sdiff = oldsize - usize;
 		if (config_fill && unlikely(opt_junk_free)) {
-			huge_dalloc_junk(tsdn, (void *)((uintptr_t)ptr + usize),
+			huge_dalloc_junk((void *)((uintptr_t)ptr + usize),
 			    sdiff);
 			post_zeroed = false;
 		} else {
@@ -402,7 +402,7 @@ huge_dalloc(tsdn_t *tsdn, void *ptr)
 	ql_remove(&arena->huge, node, ql_link);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
-	huge_dalloc_junk(tsdn, extent_node_addr_get(node),
+	huge_dalloc_junk(extent_node_addr_get(node),
 	    extent_node_size_get(node));
 	arena_chunk_dalloc_huge(tsdn, extent_node_arena_get(node),
 	    extent_node_addr_get(node), extent_node_size_get(node));
diff --git a/src/jemalloc.c b/src/jemalloc.c
index aec2a5eb..b0ebf810 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1115,8 +1115,7 @@ malloc_conf_init(void)
 				for (i = 0; i < dss_prec_limit; i++) {
 					if (strncmp(dss_prec_names[i], v, vlen)
 					    == 0) {
-						if (chunk_dss_prec_set(NULL,
-						   i)) {
+						if (chunk_dss_prec_set(i)) {
 							malloc_conf_error(
 							    "Error setting dss",
 							    k, klen, v, vlen);
@@ -2783,7 +2782,6 @@ _malloc_prefork(void)
 		}
 	}
 	base_prefork(tsd_tsdn(tsd));
-	chunk_prefork(tsd_tsdn(tsd));
 	for (i = 0; i < narenas; i++) {
 		if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL)
 			arena_prefork3(tsd_tsdn(tsd), arena);
@@ -2812,7 +2810,6 @@ _malloc_postfork(void)
 
 	witness_postfork_parent(tsd);
 	/* Release all mutexes, now that fork() has completed. */
-	chunk_postfork_parent(tsd_tsdn(tsd));
 	base_postfork_parent(tsd_tsdn(tsd));
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
@@ -2837,7 +2834,6 @@ jemalloc_postfork_child(void)
 
 	witness_postfork_child(tsd);
 	/* Release all mutexes, now that fork() has completed. */
-	chunk_postfork_child(tsd_tsdn(tsd));
 	base_postfork_child(tsd_tsdn(tsd));
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
diff --git a/test/unit/junk.c b/test/unit/junk.c
index acddc601..460bd524 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -53,10 +53,10 @@ arena_dalloc_junk_large_intercept(void *ptr, size_t usize)
 }
 
 static void
-huge_dalloc_junk_intercept(tsdn_t *tsdn, void *ptr, size_t usize)
+huge_dalloc_junk_intercept(void *ptr, size_t usize)
 {
 
-	huge_dalloc_junk_orig(tsdn, ptr, usize);
+	huge_dalloc_junk_orig(ptr, usize);
 	/*
 	 * The conditions under which junk filling actually occurs are nuanced
 	 * enough that it doesn't make sense to duplicate the decision logic in

From 577d4572b0821a15e5370f9bf566d884b7cf707c Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 13 Oct 2016 12:18:38 -0700
Subject: [PATCH 0415/2608] Make dss operations lockless.

Rather than protecting dss operations with a mutex, use atomic
operations.  This has negligible impact on synchronization overhead
during typical dss allocation, but is a substantial improvement for
extent_in_dss() and the newly added extent_dss_mergeable(), which can be
called multiple times during extent deallocations.

This change also has the advantage of avoiding tsd in deallocation paths
associated with purging, which resolves potential deadlocks during
thread exit due to attempted tsd resurrection.

This resolves #425.
---
 include/jemalloc/internal/extent.h            |   3 -
 include/jemalloc/internal/extent_dss.h        |  12 +-
 include/jemalloc/internal/large.h             |   4 +-
 include/jemalloc/internal/private_symbols.txt |   7 +-
 src/arena.c                                   |   2 +-
 src/ctl.c                                     |   4 +-
 src/extent.c                                  |  48 +----
 src/extent_dss.c                              | 196 ++++++++++--------
 src/jemalloc.c                                |   6 +-
 src/large.c                                   |   8 +-
 test/unit/junk.c                              |   4 +-
 11 files changed, 139 insertions(+), 155 deletions(-)

diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 528759b0..08d30365 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -127,9 +127,6 @@ extent_t	*extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
     size_t usize_a, size_t size_b, size_t usize_b);
 bool	extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b);
-void	extent_prefork(tsdn_t *tsdn);
-void	extent_postfork_parent(tsdn_t *tsdn);
-void	extent_postfork_child(tsdn_t *tsdn);
 
 bool	extent_boot(void);
 
diff --git a/include/jemalloc/internal/extent_dss.h b/include/jemalloc/internal/extent_dss.h
index 0aabc2ec..f2dac52e 100644
--- a/include/jemalloc/internal/extent_dss.h
+++ b/include/jemalloc/internal/extent_dss.h
@@ -23,15 +23,13 @@ extern const char *dss_prec_names[];
 
 extern const char	*opt_dss;
 
-dss_prec_t	extent_dss_prec_get(tsdn_t *tsdn);
-bool	extent_dss_prec_set(tsdn_t *tsdn, dss_prec_t dss_prec);
+dss_prec_t	extent_dss_prec_get(void);
+bool	extent_dss_prec_set(dss_prec_t dss_prec);
 void	*extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr,
     size_t size, size_t alignment, bool *zero, bool *commit);
-bool	extent_in_dss(tsdn_t *tsdn, void *addr);
-bool	extent_dss_boot(void);
-void	extent_dss_prefork(tsdn_t *tsdn);
-void	extent_dss_postfork_parent(tsdn_t *tsdn);
-void	extent_dss_postfork_child(tsdn_t *tsdn);
+bool	extent_in_dss(void *addr);
+bool	extent_dss_mergeable(void *addr_a, void *addr_b);
+void	extent_dss_boot(void);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/large.h b/include/jemalloc/internal/large.h
index 8345f89e..f3d382b5 100644
--- a/include/jemalloc/internal/large.h
+++ b/include/jemalloc/internal/large.h
@@ -19,11 +19,11 @@ void	*large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 #ifdef JEMALLOC_JET
 typedef void (large_dalloc_junk_t)(void *, size_t);
 extern large_dalloc_junk_t *large_dalloc_junk;
-typedef void (large_dalloc_maybe_junk_t)(tsdn_t *, void *, size_t);
+typedef void (large_dalloc_maybe_junk_t)(void *, size_t);
 extern large_dalloc_maybe_junk_t *large_dalloc_maybe_junk;
 #else
 void	large_dalloc_junk(void *ptr, size_t usize);
-void	large_dalloc_maybe_junk(tsdn_t *tsdn, void *ptr, size_t usize);
+void	large_dalloc_maybe_junk(void *ptr, size_t usize);
 #endif
 void	large_dalloc_junked_locked(tsdn_t *tsdn, extent_t *extent);
 void	large_dalloc(tsdn_t *tsdn, extent_t *extent);
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index d1f39cff..8d573b76 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -156,11 +156,9 @@ extent_dalloc_mmap
 extent_dalloc_wrapper
 extent_decommit_wrapper
 extent_dss_boot
-extent_dss_postfork_child
-extent_dss_postfork_parent
+extent_dss_mergeable
 extent_dss_prec_get
 extent_dss_prec_set
-extent_dss_prefork
 extent_heap_empty
 extent_heap_first
 extent_heap_insert
@@ -176,9 +174,6 @@ extent_last_get
 extent_lookup
 extent_merge_wrapper
 extent_past_get
-extent_postfork_child
-extent_postfork_parent
-extent_prefork
 extent_prof_tctx_get
 extent_prof_tctx_set
 extent_purge_wrapper
diff --git a/src/arena.c b/src/arena.c
index 2b8aead7..ce289594 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1682,7 +1682,7 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 		    (uint64_t)(uintptr_t)arena;
 	}
 
-	arena->dss_prec = extent_dss_prec_get(tsdn);
+	arena->dss_prec = extent_dss_prec_get();
 
 	arena->purging = false;
 	arena->nactive = 0;
diff --git a/src/ctl.c b/src/ctl.c
index b4e2208c..067b6772 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1527,11 +1527,11 @@ arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 		dss_prec_old = arena_dss_prec_get(tsd_tsdn(tsd), arena);
 	} else {
 		if (dss_prec != dss_prec_limit &&
-		    extent_dss_prec_set(tsd_tsdn(tsd), dss_prec)) {
+		    extent_dss_prec_set(dss_prec)) {
 			ret = EFAULT;
 			goto label_return;
 		}
-		dss_prec_old = extent_dss_prec_get(tsd_tsdn(tsd));
+		dss_prec_old = extent_dss_prec_get();
 	}
 
 	dss = dss_prec_names[dss_prec_old];
diff --git a/src/extent.c b/src/extent.c
index e4d3ccdb..e4ceb8fd 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -856,10 +856,10 @@ extent_dalloc_cache(tsdn_t *tsdn, arena_t *arena,
 }
 
 static bool
-extent_dalloc_default_impl(tsdn_t *tsdn, void *addr, size_t size)
+extent_dalloc_default_impl(void *addr, size_t size)
 {
 
-	if (!have_dss || !extent_in_dss(tsdn, addr))
+	if (!have_dss || !extent_in_dss(addr))
 		return (extent_dalloc_mmap(addr, size));
 	return (true);
 }
@@ -869,13 +869,10 @@ static bool
 extent_dalloc_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
     bool committed, unsigned arena_ind)
 {
-	tsdn_t *tsdn;
 
 	assert(extent_hooks == &extent_hooks_default);
 
-	tsdn = tsdn_fetch();
-
-	return (extent_dalloc_default_impl(tsdn, addr, size));
+	return (extent_dalloc_default_impl(addr, size));
 }
 
 void
@@ -897,7 +894,7 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 	extent_deregister(tsdn, extent);
 	if (*r_extent_hooks == &extent_hooks_default) {
 		/* Call directly to propagate tsdn. */
-		err = extent_dalloc_default_impl(tsdn, extent_base_get(extent),
+		err = extent_dalloc_default_impl(extent_base_get(extent),
 		    extent_size_get(extent));
 	} else {
 		err = (*r_extent_hooks)->dalloc(*r_extent_hooks,
@@ -1083,13 +1080,12 @@ label_error_a:
 }
 
 static bool
-extent_merge_default_impl(tsdn_t *tsdn, void *addr_a, void *addr_b)
+extent_merge_default_impl(void *addr_a, void *addr_b)
 {
 
 	if (!maps_coalesce)
 		return (true);
-	if (have_dss && extent_in_dss(tsdn, addr_a) != extent_in_dss(tsdn,
-	    addr_b))
+	if (have_dss && !extent_dss_mergeable(addr_a, addr_b))
 		return (true);
 
 	return (false);
@@ -1099,13 +1095,10 @@ static bool
 extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
     void *addr_b, size_t size_b, bool committed, unsigned arena_ind)
 {
-	tsdn_t *tsdn;
 
 	assert(extent_hooks == &extent_hooks_default);
 
-	tsdn = tsdn_fetch();
-
-	return (extent_merge_default_impl(tsdn, addr_a, addr_b));
+	return (extent_merge_default_impl(addr_a, addr_b));
 }
 
 bool
@@ -1120,7 +1113,7 @@ extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 	if (*r_extent_hooks == &extent_hooks_default) {
 		/* Call directly to propagate tsdn. */
-		err = extent_merge_default_impl(tsdn, extent_base_get(a),
+		err = extent_merge_default_impl(extent_base_get(a),
 		    extent_base_get(b));
 	} else {
 		err = (*r_extent_hooks)->merge(*r_extent_hooks,
@@ -1171,29 +1164,8 @@ extent_boot(void)
 	    LG_PAGE)))
 		return (true);
 
-	if (have_dss && extent_dss_boot())
-		return (true);
+	if (have_dss)
+		extent_dss_boot();
 
 	return (false);
 }
-
-void
-extent_prefork(tsdn_t *tsdn)
-{
-
-	extent_dss_prefork(tsdn);
-}
-
-void
-extent_postfork_parent(tsdn_t *tsdn)
-{
-
-	extent_dss_postfork_parent(tsdn);
-}
-
-void
-extent_postfork_child(tsdn_t *tsdn)
-{
-
-	extent_dss_postfork_child(tsdn);
-}
diff --git a/src/extent_dss.c b/src/extent_dss.c
index e0e6635d..31fe8fe2 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -12,20 +12,19 @@ const char	*dss_prec_names[] = {
 	"N/A"
 };
 
-/* Current dss precedence default, used when creating new arenas. */
-static dss_prec_t	dss_prec_default = DSS_PREC_DEFAULT;
-
 /*
- * Protects sbrk() calls.  This avoids malloc races among threads, though it
- * does not protect against races with threads that call sbrk() directly.
+ * Current dss precedence default, used when creating new arenas.  NB: This is
+ * stored as unsigned rather than dss_prec_t because in principle there's no
+ * guarantee that sizeof(dss_prec_t) is the same as sizeof(unsigned), and we use
+ * atomic operations to synchronize the setting.
  */
-static malloc_mutex_t	dss_mtx;
+static unsigned		dss_prec_default = (unsigned)DSS_PREC_DEFAULT;
 
 /* Base address of the DSS. */
 static void		*dss_base;
-/* Current end of the DSS, or ((void *)-1) if the DSS is exhausted. */
-static void		*dss_prev;
-/* Current upper limit on DSS addresses. */
+/* Atomic boolean indicating whether the DSS is exhausted. */
+static unsigned		dss_exhausted;
+/* Atomic current upper limit on DSS addresses. */
 static void		*dss_max;
 
 /******************************************************************************/
@@ -43,35 +42,63 @@ extent_dss_sbrk(intptr_t increment)
 }
 
 dss_prec_t
-extent_dss_prec_get(tsdn_t *tsdn)
+extent_dss_prec_get(void)
 {
 	dss_prec_t ret;
 
 	if (!have_dss)
 		return (dss_prec_disabled);
-	malloc_mutex_lock(tsdn, &dss_mtx);
-	ret = dss_prec_default;
-	malloc_mutex_unlock(tsdn, &dss_mtx);
+	ret = (dss_prec_t)atomic_read_u(&dss_prec_default);
 	return (ret);
 }
 
 bool
-extent_dss_prec_set(tsdn_t *tsdn, dss_prec_t dss_prec)
+extent_dss_prec_set(dss_prec_t dss_prec)
 {
 
 	if (!have_dss)
 		return (dss_prec != dss_prec_disabled);
-	malloc_mutex_lock(tsdn, &dss_mtx);
-	dss_prec_default = dss_prec;
-	malloc_mutex_unlock(tsdn, &dss_mtx);
+	atomic_write_u(&dss_prec_default, (unsigned)dss_prec);
 	return (false);
 }
 
+static void *
+extent_dss_max_update(void *new_addr)
+{
+	void *max_cur;
+	spin_t spinner;
+
+	/*
+	 * Get the current end of the DSS as max_cur and assure that dss_max is
+	 * up to date.
+	 */
+	spin_init(&spinner);
+	while (true) {
+		void *max_prev = atomic_read_p(&dss_max);
+
+		max_cur = extent_dss_sbrk(0);
+		if ((uintptr_t)max_prev > (uintptr_t)max_cur) {
+			/*
+			 * Another thread optimistically updated dss_max.  Wait
+			 * for it to finish.
+			 */
+			spin_adaptive(&spinner);
+			continue;
+		}
+		if (!atomic_cas_p(&dss_max, max_prev, max_cur))
+			break;
+	}
+	/* Fixed new_addr can only be supported if it is at the edge of DSS. */
+	if (new_addr != NULL && max_cur != new_addr)
+		return (NULL);
+
+	return (max_cur);
+}
+
 void *
 extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit)
 {
-	void *ret;
 	extent_t *gap;
 
 	cassert(have_dss);
@@ -89,35 +116,27 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 	if (gap == NULL)
 		return (NULL);
 
-	malloc_mutex_lock(tsdn, &dss_mtx);
-	if (dss_prev != (void *)-1) {
+	if (!atomic_read_u(&dss_exhausted)) {
 		/*
 		 * The loop is necessary to recover from races with other
 		 * threads that are using the DSS for something other than
 		 * malloc.
 		 */
 		while (true) {
-			void *gap_addr, *dss_next;
+			void *ret, *max_cur, *gap_addr, *dss_next, *dss_prev;
 			size_t gap_size;
 			intptr_t incr;
 
-			/* Avoid an unnecessary system call. */
-			if (new_addr != NULL && dss_max != new_addr)
-				break;
-
-			/* Get the current end of the DSS. */
-			dss_max = extent_dss_sbrk(0);
-
-			/* Make sure the earlier condition still holds. */
-			if (new_addr != NULL && dss_max != new_addr)
-				break;
+			max_cur = extent_dss_max_update(new_addr);
+			if (max_cur == NULL)
+				goto label_oom;
 
 			/*
 			 * Compute how much gap space (if any) is necessary to
 			 * satisfy alignment.  This space can be recycled for
 			 * later use.
 			 */
-			gap_addr = (void *)(PAGE_CEILING((uintptr_t)dss_max));
+			gap_addr = (void *)(PAGE_CEILING((uintptr_t)max_cur));
 			ret = (void *)ALIGNMENT_CEILING((uintptr_t)gap_addr,
 			    PAGE_CEILING(alignment));
 			gap_size = (uintptr_t)ret - (uintptr_t)gap_addr;
@@ -126,17 +145,24 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 				    gap_size, false, false, true, false);
 			}
 			dss_next = (void *)((uintptr_t)ret + size);
-			if ((uintptr_t)ret < (uintptr_t)dss_max ||
-			    (uintptr_t)dss_next < (uintptr_t)dss_max)
-				break; /* Wrap-around. */
+			if ((uintptr_t)ret < (uintptr_t)max_cur ||
+			    (uintptr_t)dss_next < (uintptr_t)max_cur)
+				goto label_oom; /* Wrap-around. */
 			incr = gap_size + size;
+
+			/*
+			 * Optimistically update dss_max, and roll back below if
+			 * sbrk() fails.  No other thread will try to extend the
+			 * DSS while dss_max is greater than the current DSS
+			 * max reported by sbrk(0).
+			 */
+			if (atomic_cas_p(&dss_max, max_cur, dss_next))
+				continue;
+
+			/* Try to allocate. */
 			dss_prev = extent_dss_sbrk(incr);
-			if (dss_prev == (void *)-1)
-				break;
-			if (dss_prev == dss_max) {
+			if (dss_prev == max_cur) {
 				/* Success. */
-				dss_max = dss_next;
-				malloc_mutex_unlock(tsdn, &dss_mtx);
 				if (gap_size != 0)
 					extent_dalloc_gap(tsdn, arena, gap);
 				else
@@ -147,69 +173,69 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					*commit = pages_decommit(ret, size);
 				return (ret);
 			}
+			/*
+			 * Failure, whether due to OOM or a race with a raw
+			 * sbrk() call from outside the allocator.  Try to roll
+			 * back optimistic dss_max update; if rollback fails,
+			 * it's due to another caller of this function having
+			 * succeeded since this invocation started, in which
+			 * case rollback is not necessary.
+			 */
+			atomic_cas_p(&dss_max, dss_next, max_cur);
+			if (dss_prev == (void *)-1) {
+				/* OOM. */
+				atomic_write_u(&dss_exhausted, (unsigned)true);
+				goto label_oom;
+			}
 		}
 	}
-	/* OOM. */
-	malloc_mutex_unlock(tsdn, &dss_mtx);
+label_oom:
 	extent_dalloc(tsdn, arena, gap);
 	return (NULL);
 }
 
-bool
-extent_in_dss(tsdn_t *tsdn, void *addr)
+static bool
+extent_in_dss_helper(void *addr, void *max)
 {
-	bool ret;
 
-	cassert(have_dss);
-
-	malloc_mutex_lock(tsdn, &dss_mtx);
-	if ((uintptr_t)addr >= (uintptr_t)dss_base
-	    && (uintptr_t)addr < (uintptr_t)dss_max)
-		ret = true;
-	else
-		ret = false;
-	malloc_mutex_unlock(tsdn, &dss_mtx);
-
-	return (ret);
+	return ((uintptr_t)addr >= (uintptr_t)dss_base && (uintptr_t)addr <
+	    (uintptr_t)max);
 }
 
 bool
+extent_in_dss(void *addr)
+{
+
+	cassert(have_dss);
+
+	return (extent_in_dss_helper(addr, atomic_read_p(&dss_max)));
+}
+
+bool
+extent_dss_mergeable(void *addr_a, void *addr_b)
+{
+	void *max;
+
+	cassert(have_dss);
+
+	if ((uintptr_t)addr_a < (uintptr_t)dss_base && (uintptr_t)addr_b <
+	    (uintptr_t)dss_base)
+		return (true);
+
+	max = atomic_read_p(&dss_max);
+	return (extent_in_dss_helper(addr_a, max) ==
+	    extent_in_dss_helper(addr_b, max));
+}
+
+void
 extent_dss_boot(void)
 {
 
 	cassert(have_dss);
 
-	if (malloc_mutex_init(&dss_mtx, "dss", WITNESS_RANK_DSS))
-		return (true);
 	dss_base = extent_dss_sbrk(0);
-	dss_prev = dss_base;
+	dss_exhausted = (unsigned)(dss_base == (void *)-1);
 	dss_max = dss_base;
-
-	return (false);
-}
-
-void
-extent_dss_prefork(tsdn_t *tsdn)
-{
-
-	if (have_dss)
-		malloc_mutex_prefork(tsdn, &dss_mtx);
-}
-
-void
-extent_dss_postfork_parent(tsdn_t *tsdn)
-{
-
-	if (have_dss)
-		malloc_mutex_postfork_parent(tsdn, &dss_mtx);
-}
-
-void
-extent_dss_postfork_child(tsdn_t *tsdn)
-{
-
-	if (have_dss)
-		malloc_mutex_postfork_child(tsdn, &dss_mtx);
 }
 
 /******************************************************************************/
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 0348b8ac..5108d15f 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1030,8 +1030,7 @@ malloc_conf_init(void)
 				for (i = 0; i < dss_prec_limit; i++) {
 					if (strncmp(dss_prec_names[i], v, vlen)
 					    == 0) {
-						if (extent_dss_prec_set(NULL,
-						   i)) {
+						if (extent_dss_prec_set(i)) {
 							malloc_conf_error(
 							    "Error setting dss",
 							    k, klen, v, vlen);
@@ -2631,7 +2630,6 @@ _malloc_prefork(void)
 		}
 	}
 	base_prefork(tsd_tsdn(tsd));
-	extent_prefork(tsd_tsdn(tsd));
 	for (i = 0; i < narenas; i++) {
 		if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL)
 			arena_prefork3(tsd_tsdn(tsd), arena);
@@ -2660,7 +2658,6 @@ _malloc_postfork(void)
 
 	witness_postfork_parent(tsd);
 	/* Release all mutexes, now that fork() has completed. */
-	extent_postfork_parent(tsd_tsdn(tsd));
 	base_postfork_parent(tsd_tsdn(tsd));
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
@@ -2685,7 +2682,6 @@ jemalloc_postfork_child(void)
 
 	witness_postfork_child(tsd);
 	/* Release all mutexes, now that fork() has completed. */
-	extent_postfork_child(tsd_tsdn(tsd));
 	base_postfork_child(tsd_tsdn(tsd));
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
diff --git a/src/large.c b/src/large.c
index 34b3bdb5..23af1830 100644
--- a/src/large.c
+++ b/src/large.c
@@ -81,7 +81,7 @@ large_dalloc_junk_t *large_dalloc_junk = JEMALLOC_N(n_large_dalloc_junk);
 #define	large_dalloc_maybe_junk JEMALLOC_N(n_large_dalloc_maybe_junk)
 #endif
 void
-large_dalloc_maybe_junk(tsdn_t *tsdn, void *ptr, size_t usize)
+large_dalloc_maybe_junk(void *ptr, size_t usize)
 {
 
 	if (config_fill && have_dss && unlikely(opt_junk_free)) {
@@ -89,7 +89,7 @@ large_dalloc_maybe_junk(tsdn_t *tsdn, void *ptr, size_t usize)
 		 * Only bother junk filling if the extent isn't about to be
 		 * unmapped.
 		 */
-		if (!config_munmap || (have_dss && extent_in_dss(tsdn, ptr)))
+		if (!config_munmap || (have_dss && extent_in_dss(ptr)))
 			large_dalloc_junk(ptr, usize);
 	}
 }
@@ -119,7 +119,7 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize)
 			return (true);
 
 		if (config_fill && unlikely(opt_junk_free)) {
-			large_dalloc_maybe_junk(tsdn, extent_addr_get(trail),
+			large_dalloc_maybe_junk(extent_addr_get(trail),
 			    extent_usize_get(trail));
 		}
 
@@ -296,7 +296,7 @@ large_dalloc_impl(tsdn_t *tsdn, extent_t *extent, bool junked_locked)
 	ql_remove(&arena->large, extent, ql_link);
 	malloc_mutex_unlock(tsdn, &arena->large_mtx);
 	if (!junked_locked) {
-		large_dalloc_maybe_junk(tsdn, extent_addr_get(extent),
+		large_dalloc_maybe_junk(extent_addr_get(extent),
 		    extent_usize_get(extent));
 	}
 	arena_extent_dalloc_large(tsdn, arena, extent, junked_locked);
diff --git a/test/unit/junk.c b/test/unit/junk.c
index fe453b6c..680f0d21 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -53,10 +53,10 @@ large_dalloc_junk_intercept(void *ptr, size_t usize)
 }
 
 static void
-large_dalloc_maybe_junk_intercept(tsdn_t *tsdn, void *ptr, size_t usize)
+large_dalloc_maybe_junk_intercept(void *ptr, size_t usize)
 {
 
-	large_dalloc_maybe_junk_orig(tsdn, ptr, usize);
+	large_dalloc_maybe_junk_orig(ptr, usize);
 	if (ptr == watch_for_junking)
 		saw_junking = true;
 }

From b54d160dc4507eab5fb64e2e293019c5d3afd18c Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 20 Oct 2016 23:59:12 -0700
Subject: [PATCH 0416/2608] Do not (recursively) allocate within tsd_fetch().

Refactor tsd so that tsdn_fetch() does not trigger allocation, since
allocation could cause infinite recursion.

This resolves #458.
---
 include/jemalloc/internal/ckh.h               |  8 +-
 .../jemalloc/internal/jemalloc_internal.h.in  | 10 +--
 include/jemalloc/internal/private_symbols.txt |  2 +
 include/jemalloc/internal/prof.h              |  8 +-
 include/jemalloc/internal/tcache.h            |  2 +-
 include/jemalloc/internal/tsd.h               | 74 +++++++++++----
 src/ckh.c                                     | 49 +++++-----
 src/ctl.c                                     |  4 +-
 src/jemalloc.c                                |  2 +-
 src/prof.c                                    | 89 +++++++++----------
 src/tcache.c                                  |  8 +-
 test/unit/ckh.c                               | 42 ++++-----
 test/unit/tsd.c                               |  6 +-
 13 files changed, 172 insertions(+), 132 deletions(-)

diff --git a/include/jemalloc/internal/ckh.h b/include/jemalloc/internal/ckh.h
index 46e151cd..f75ad90b 100644
--- a/include/jemalloc/internal/ckh.h
+++ b/include/jemalloc/internal/ckh.h
@@ -64,13 +64,13 @@ struct ckh_s {
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-bool	ckh_new(tsdn_t *tsdn, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
+bool	ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
     ckh_keycomp_t *keycomp);
-void	ckh_delete(tsdn_t *tsdn, ckh_t *ckh);
+void	ckh_delete(tsd_t *tsd, ckh_t *ckh);
 size_t	ckh_count(ckh_t *ckh);
 bool	ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data);
-bool	ckh_insert(tsdn_t *tsdn, ckh_t *ckh, const void *key, const void *data);
-bool	ckh_remove(tsdn_t *tsdn, ckh_t *ckh, const void *searchkey, void **key,
+bool	ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data);
+bool	ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
     void **data);
 bool	ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data);
 void	ckh_string_hash(const void *key, size_t r_hash[2]);
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 1d02c20e..fac0ea39 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -535,7 +535,7 @@ size_t	s2u(size_t size);
 size_t	sa2u(size_t size, size_t alignment);
 arena_t	*arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal);
 arena_t	*arena_choose(tsd_t *tsd, arena_t *arena);
-arena_t	*arena_ichoose(tsdn_t *tsdn, arena_t *arena);
+arena_t	*arena_ichoose(tsd_t *tsd, arena_t *arena);
 arena_tdata_t	*arena_tdata_get(tsd_t *tsd, unsigned ind,
     bool refresh_if_missing);
 arena_t	*arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing);
@@ -862,14 +862,10 @@ arena_choose(tsd_t *tsd, arena_t *arena)
 }
 
 JEMALLOC_INLINE arena_t *
-arena_ichoose(tsdn_t *tsdn, arena_t *arena)
+arena_ichoose(tsd_t *tsd, arena_t *arena)
 {
 
-	assert(!tsdn_null(tsdn) || arena != NULL);
-
-	if (!tsdn_null(tsdn))
-		return (arena_choose_impl(tsdn_tsd(tsdn), NULL, true));
-	return (arena);
+	return (arena_choose_impl(tsd, arena, true));
 }
 
 JEMALLOC_INLINE arena_tdata_t *
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 8d573b76..1bf79ca8 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -471,7 +471,9 @@ tsd_booted_get
 tsd_cleanup
 tsd_cleanup_wrapper
 tsd_fetch
+tsd_fetch_impl
 tsd_get
+tsd_get_allocates
 tsd_iarena_get
 tsd_iarena_set
 tsd_iarenap_get
diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h
index 0fdee08c..2d1791b9 100644
--- a/include/jemalloc/internal/prof.h
+++ b/include/jemalloc/internal/prof.h
@@ -299,9 +299,9 @@ extern prof_dump_header_t *prof_dump_header;
 void	prof_idump(tsdn_t *tsdn);
 bool	prof_mdump(tsd_t *tsd, const char *filename);
 void	prof_gdump(tsdn_t *tsdn);
-prof_tdata_t	*prof_tdata_init(tsdn_t *tsdn);
+prof_tdata_t	*prof_tdata_init(tsd_t *tsd);
 prof_tdata_t	*prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
-void	prof_reset(tsdn_t *tsdn, size_t lg_sample);
+void	prof_reset(tsd_t *tsd, size_t lg_sample);
 void	prof_tdata_cleanup(tsd_t *tsd);
 bool	prof_active_get(tsdn_t *tsdn);
 bool	prof_active_set(tsdn_t *tsdn, bool active);
@@ -315,7 +315,7 @@ bool	prof_gdump_get(tsdn_t *tsdn);
 bool	prof_gdump_set(tsdn_t *tsdn, bool active);
 void	prof_boot0(void);
 void	prof_boot1(void);
-bool	prof_boot2(tsdn_t *tsdn);
+bool	prof_boot2(tsd_t *tsd);
 void	prof_prefork0(tsdn_t *tsdn);
 void	prof_prefork1(tsdn_t *tsdn);
 void	prof_postfork_parent(tsdn_t *tsdn);
@@ -387,7 +387,7 @@ prof_tdata_get(tsd_t *tsd, bool create)
 	if (create) {
 		if (unlikely(tdata == NULL)) {
 			if (tsd_nominal(tsd)) {
-				tdata = prof_tdata_init(tsd_tsdn(tsd));
+				tdata = prof_tdata_init(tsd);
 				tsd_prof_tdata_set(tsd, tdata);
 			}
 		} else if (unlikely(tdata->expired)) {
diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h
index 933255ce..25a1ad02 100644
--- a/include/jemalloc/internal/tcache.h
+++ b/include/jemalloc/internal/tcache.h
@@ -144,7 +144,7 @@ tcache_t *tcache_get_hard(tsd_t *tsd);
 tcache_t *tcache_create(tsdn_t *tsdn, arena_t *arena);
 void	tcache_cleanup(tsd_t *tsd);
 void	tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
-bool	tcaches_create(tsdn_t *tsdn, unsigned *r_ind);
+bool	tcaches_create(tsd_t *tsd, unsigned *r_ind);
 void	tcaches_flush(tsd_t *tsd, unsigned ind);
 void	tcaches_destroy(tsd_t *tsd, unsigned ind);
 bool	tcache_boot(tsdn_t *tsdn);
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 5df5f673..b33de703 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -48,7 +48,7 @@ typedef enum {
  *
  *   bool example_tsd_boot(void) {...}
  *   bool example_tsd_booted_get(void) {...}
- *   example_t *example_tsd_get() {...}
+ *   example_t *example_tsd_get(bool init) {...}
  *   void example_tsd_set(example_t *val) {...}
  *
  * Note that all of the functions deal in terms of (a_type *) rather than
@@ -105,7 +105,7 @@ a_name##tsd_boot(void);							\
 a_attr bool								\
 a_name##tsd_booted_get(void);						\
 a_attr a_type *								\
-a_name##tsd_get(void);							\
+a_name##tsd_get(bool init);						\
 a_attr void								\
 a_name##tsd_set(a_type *val);
 
@@ -213,9 +213,15 @@ a_name##tsd_booted_get(void)						\
 									\
 	return (a_name##tsd_booted);					\
 }									\
+a_attr bool								\
+a_name##tsd_get_allocates(void)						\
+{									\
+									\
+	return (false);							\
+}									\
 /* Get/set. */								\
 a_attr a_type *								\
-a_name##tsd_get(void)							\
+a_name##tsd_get(bool init)						\
 {									\
 									\
 	assert(a_name##tsd_booted);					\
@@ -265,9 +271,15 @@ a_name##tsd_booted_get(void)						\
 									\
 	return (a_name##tsd_booted);					\
 }									\
+a_attr bool								\
+a_name##tsd_get_allocates(void)						\
+{									\
+									\
+	return (false);							\
+}									\
 /* Get/set. */								\
 a_attr a_type *								\
-a_name##tsd_get(void)							\
+a_name##tsd_get(bool init)						\
 {									\
 									\
 	assert(a_name##tsd_booted);					\
@@ -327,14 +339,14 @@ a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper)			\
 	}								\
 }									\
 a_attr a_name##tsd_wrapper_t *						\
-a_name##tsd_wrapper_get(void)						\
+a_name##tsd_wrapper_get(bool init)					\
 {									\
 	DWORD error = GetLastError();					\
 	a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *)	\
 	    TlsGetValue(a_name##tsd_tsd);				\
 	SetLastError(error);						\
 									\
-	if (unlikely(wrapper == NULL)) {				\
+	if (init && unlikely(wrapper == NULL)) {			\
 		wrapper = (a_name##tsd_wrapper_t *)			\
 		    malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t));	\
 		if (wrapper == NULL) {					\
@@ -394,14 +406,22 @@ a_name##tsd_booted_get(void)						\
 									\
 	return (a_name##tsd_booted);					\
 }									\
+a_attr bool								\
+a_name##tsd_get_allocates(void)						\
+{									\
+									\
+	return (true);							\
+}									\
 /* Get/set. */								\
 a_attr a_type *								\
-a_name##tsd_get(void)							\
+a_name##tsd_get(bool init)						\
 {									\
 	a_name##tsd_wrapper_t *wrapper;					\
 									\
 	assert(a_name##tsd_booted);					\
-	wrapper = a_name##tsd_wrapper_get();				\
+	wrapper = a_name##tsd_wrapper_get(init);			\
+	if (a_name##tsd_get_allocates() && !init && wrapper == NULL)	\
+		return (NULL);						\
 	return (&wrapper->val);						\
 }									\
 a_attr void								\
@@ -410,7 +430,7 @@ a_name##tsd_set(a_type *val)						\
 	a_name##tsd_wrapper_t *wrapper;					\
 									\
 	assert(a_name##tsd_booted);					\
-	wrapper = a_name##tsd_wrapper_get();				\
+	wrapper = a_name##tsd_wrapper_get(true);			\
 	if (likely(&wrapper->val != val))				\
 		wrapper->val = *(val);					\
 	if (a_cleanup != malloc_tsd_no_cleanup)				\
@@ -455,12 +475,12 @@ a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper)			\
 	}								\
 }									\
 a_attr a_name##tsd_wrapper_t *						\
-a_name##tsd_wrapper_get(void)						\
+a_name##tsd_wrapper_get(bool init)					\
 {									\
 	a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *)	\
 	    pthread_getspecific(a_name##tsd_tsd);			\
 									\
-	if (unlikely(wrapper == NULL)) {				\
+	if (init && unlikely(wrapper == NULL)) {			\
 		tsd_init_block_t block;					\
 		wrapper = tsd_init_check_recursion(			\
 		    &a_name##tsd_init_head, &block);			\
@@ -523,14 +543,22 @@ a_name##tsd_booted_get(void)						\
 									\
 	return (a_name##tsd_booted);					\
 }									\
+a_attr bool								\
+a_name##tsd_get_allocates(void)						\
+{									\
+									\
+	return (true);							\
+}									\
 /* Get/set. */								\
 a_attr a_type *								\
-a_name##tsd_get(void)							\
+a_name##tsd_get(bool init)						\
 {									\
 	a_name##tsd_wrapper_t *wrapper;					\
 									\
 	assert(a_name##tsd_booted);					\
-	wrapper = a_name##tsd_wrapper_get();				\
+	wrapper = a_name##tsd_wrapper_get(init);			\
+	if (a_name##tsd_get_allocates() && !init && wrapper == NULL)	\
+		return (NULL);						\
 	return (&wrapper->val);						\
 }									\
 a_attr void								\
@@ -539,7 +567,7 @@ a_name##tsd_set(a_type *val)						\
 	a_name##tsd_wrapper_t *wrapper;					\
 									\
 	assert(a_name##tsd_booted);					\
-	wrapper = a_name##tsd_wrapper_get();				\
+	wrapper = a_name##tsd_wrapper_get(true);			\
 	if (likely(&wrapper->val != val))				\
 		wrapper->val = *(val);					\
 	if (a_cleanup != malloc_tsd_no_cleanup)				\
@@ -645,6 +673,7 @@ void	tsd_cleanup(void *arg);
 #ifndef JEMALLOC_ENABLE_INLINE
 malloc_tsd_protos(JEMALLOC_ATTR(unused), , tsd_t)
 
+tsd_t	*tsd_fetch_impl(bool init);
 tsd_t	*tsd_fetch(void);
 tsdn_t	*tsd_tsdn(tsd_t *tsd);
 bool	tsd_nominal(tsd_t *tsd);
@@ -665,9 +694,13 @@ malloc_tsd_externs(, tsd_t)
 malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, , tsd_t, tsd_initializer, tsd_cleanup)
 
 JEMALLOC_ALWAYS_INLINE tsd_t *
-tsd_fetch(void)
+tsd_fetch_impl(bool init)
 {
-	tsd_t *tsd = tsd_get();
+	tsd_t *tsd = tsd_get(init);
+
+	if (!init && tsd_get_allocates() && tsd == NULL)
+		return (NULL);
+	assert(tsd != NULL);
 
 	if (unlikely(tsd->state != tsd_state_nominal)) {
 		if (tsd->state == tsd_state_uninitialized) {
@@ -684,6 +717,13 @@ tsd_fetch(void)
 	return (tsd);
 }
 
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsd_fetch(void)
+{
+
+	return (tsd_fetch_impl(true));
+}
+
 JEMALLOC_ALWAYS_INLINE tsdn_t *
 tsd_tsdn(tsd_t *tsd)
 {
@@ -730,7 +770,7 @@ tsdn_fetch(void)
 	if (!tsd_booted_get())
 		return (NULL);
 
-	return (tsd_tsdn(tsd_fetch()));
+	return (tsd_tsdn(tsd_fetch_impl(false)));
 }
 
 JEMALLOC_ALWAYS_INLINE bool
diff --git a/src/ckh.c b/src/ckh.c
index 90a81155..75376017 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -40,8 +40,8 @@
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
-static bool	ckh_grow(tsdn_t *tsdn, ckh_t *ckh);
-static void	ckh_shrink(tsdn_t *tsdn, ckh_t *ckh);
+static bool	ckh_grow(tsd_t *tsd, ckh_t *ckh);
+static void	ckh_shrink(tsd_t *tsd, ckh_t *ckh);
 
 /******************************************************************************/
 
@@ -245,7 +245,7 @@ ckh_rebuild(ckh_t *ckh, ckhc_t *aTab)
 }
 
 static bool
-ckh_grow(tsdn_t *tsdn, ckh_t *ckh)
+ckh_grow(tsd_t *tsd, ckh_t *ckh)
 {
 	bool ret;
 	ckhc_t *tab, *ttab;
@@ -271,8 +271,8 @@ ckh_grow(tsdn_t *tsdn, ckh_t *ckh)
 			ret = true;
 			goto label_return;
 		}
-		tab = (ckhc_t *)ipallocztm(tsdn, usize, CACHELINE, true, NULL,
-		    true, arena_ichoose(tsdn, NULL));
+		tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE,
+		    true, NULL, true, arena_ichoose(tsd, NULL));
 		if (tab == NULL) {
 			ret = true;
 			goto label_return;
@@ -284,14 +284,14 @@ ckh_grow(tsdn_t *tsdn, ckh_t *ckh)
 		ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
 
 		if (!ckh_rebuild(ckh, tab)) {
-			idalloctm(tsdn, iealloc(tsdn, tab), tab, NULL, true,
-			    true);
+			idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), tab),
+			    tab, NULL, true, true);
 			break;
 		}
 
 		/* Rebuilding failed, so back out partially rebuilt table. */
-		idalloctm(tsdn, iealloc(tsdn, ckh->tab), ckh->tab, NULL, true,
-		    true);
+		idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), ckh->tab),
+		    ckh->tab, NULL, true, true);
 		ckh->tab = tab;
 		ckh->lg_curbuckets = lg_prevbuckets;
 	}
@@ -302,7 +302,7 @@ label_return:
 }
 
 static void
-ckh_shrink(tsdn_t *tsdn, ckh_t *ckh)
+ckh_shrink(tsd_t *tsd, ckh_t *ckh)
 {
 	ckhc_t *tab, *ttab;
 	size_t usize;
@@ -317,8 +317,8 @@ ckh_shrink(tsdn_t *tsdn, ckh_t *ckh)
 	usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
 	if (unlikely(usize == 0 || usize > LARGE_MAXCLASS))
 		return;
-	tab = (ckhc_t *)ipallocztm(tsdn, usize, CACHELINE, true, NULL, true,
-	    arena_ichoose(tsdn, NULL));
+	tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE, true, NULL,
+	    true, arena_ichoose(tsd, NULL));
 	if (tab == NULL) {
 		/*
 		 * An OOM error isn't worth propagating, since it doesn't
@@ -333,7 +333,8 @@ ckh_shrink(tsdn_t *tsdn, ckh_t *ckh)
 	ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
 
 	if (!ckh_rebuild(ckh, tab)) {
-		idalloctm(tsdn, iealloc(tsdn, tab), tab, NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), tab), tab, NULL,
+		    true, true);
 #ifdef CKH_COUNT
 		ckh->nshrinks++;
 #endif
@@ -341,7 +342,8 @@ ckh_shrink(tsdn_t *tsdn, ckh_t *ckh)
 	}
 
 	/* Rebuilding failed, so back out partially rebuilt table. */
-	idalloctm(tsdn, iealloc(tsdn, ckh->tab), ckh->tab, NULL, true, true);
+	idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), ckh->tab), ckh->tab,
+	    NULL, true, true);
 	ckh->tab = tab;
 	ckh->lg_curbuckets = lg_prevbuckets;
 #ifdef CKH_COUNT
@@ -350,7 +352,7 @@ ckh_shrink(tsdn_t *tsdn, ckh_t *ckh)
 }
 
 bool
-ckh_new(tsdn_t *tsdn, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
+ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
     ckh_keycomp_t *keycomp)
 {
 	bool ret;
@@ -394,8 +396,8 @@ ckh_new(tsdn_t *tsdn, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
 		ret = true;
 		goto label_return;
 	}
-	ckh->tab = (ckhc_t *)ipallocztm(tsdn, usize, CACHELINE, true, NULL,
-	    true, arena_ichoose(tsdn, NULL));
+	ckh->tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE, true,
+	    NULL, true, arena_ichoose(tsd, NULL));
 	if (ckh->tab == NULL) {
 		ret = true;
 		goto label_return;
@@ -407,7 +409,7 @@ label_return:
 }
 
 void
-ckh_delete(tsdn_t *tsdn, ckh_t *ckh)
+ckh_delete(tsd_t *tsd, ckh_t *ckh)
 {
 
 	assert(ckh != NULL);
@@ -424,7 +426,8 @@ ckh_delete(tsdn_t *tsdn, ckh_t *ckh)
 	    (unsigned long long)ckh->nrelocs);
 #endif
 
-	idalloctm(tsdn, iealloc(tsdn, ckh->tab), ckh->tab, NULL, true, true);
+	idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), ckh->tab), ckh->tab,
+	    NULL, true, true);
 	if (config_debug)
 		memset(ckh, JEMALLOC_FREE_JUNK, sizeof(ckh_t));
 }
@@ -459,7 +462,7 @@ ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data)
 }
 
 bool
-ckh_insert(tsdn_t *tsdn, ckh_t *ckh, const void *key, const void *data)
+ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data)
 {
 	bool ret;
 
@@ -471,7 +474,7 @@ ckh_insert(tsdn_t *tsdn, ckh_t *ckh, const void *key, const void *data)
 #endif
 
 	while (ckh_try_insert(ckh, &key, &data)) {
-		if (ckh_grow(tsdn, ckh)) {
+		if (ckh_grow(tsd, ckh)) {
 			ret = true;
 			goto label_return;
 		}
@@ -483,7 +486,7 @@ label_return:
 }
 
 bool
-ckh_remove(tsdn_t *tsdn, ckh_t *ckh, const void *searchkey, void **key,
+ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
     void **data)
 {
 	size_t cell;
@@ -505,7 +508,7 @@ ckh_remove(tsdn_t *tsdn, ckh_t *ckh, const void *searchkey, void **key,
 		    + LG_CKH_BUCKET_CELLS - 2)) && ckh->lg_curbuckets
 		    > ckh->lg_minbuckets) {
 			/* Ignore error due to OOM. */
-			ckh_shrink(tsdn, ckh);
+			ckh_shrink(tsd, ckh);
 		}
 
 		return (false);
diff --git a/src/ctl.c b/src/ctl.c
index 067b6772..47b4768b 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1326,7 +1326,7 @@ tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	READONLY();
-	if (tcaches_create(tsd_tsdn(tsd), &tcache_ind)) {
+	if (tcaches_create(tsd, &tcache_ind)) {
 		ret = EFAULT;
 		goto label_return;
 	}
@@ -1871,7 +1871,7 @@ prof_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	if (lg_sample >= (sizeof(uint64_t) << 3))
 		lg_sample = (sizeof(uint64_t) << 3) - 1;
 
-	prof_reset(tsd_tsdn(tsd), lg_sample);
+	prof_reset(tsd, lg_sample);
 
 	ret = 0;
 label_return:
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 5108d15f..1f951e2f 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1313,7 +1313,7 @@ malloc_init_hard(void)
 		return (true);
 	malloc_mutex_lock(tsd_tsdn(tsd), &init_lock);
 
-	if (config_prof && prof_boot2(tsd_tsdn(tsd))) {
+	if (config_prof && prof_boot2(tsd)) {
 		malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
 		return (true);
 	}
diff --git a/src/prof.c b/src/prof.c
index 5eb9a3d1..4bafb39a 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -125,7 +125,7 @@ static bool	prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx);
 static void	prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx);
 static bool	prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
     bool even_if_attached);
-static void	prof_tdata_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
+static void	prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata,
     bool even_if_attached);
 static char	*prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name);
 
@@ -591,7 +591,7 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
 	assert(gctx->nlimbo != 0);
 	if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) {
 		/* Remove gctx from bt2gctx. */
-		if (ckh_remove(tsd_tsdn(tsd), &bt2gctx, &gctx->bt, NULL, NULL))
+		if (ckh_remove(tsd, &bt2gctx, &gctx->bt, NULL, NULL))
 			not_reached();
 		prof_leave(tsd, tdata_self);
 		/* Destroy gctx. */
@@ -652,7 +652,7 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx)
 	assert(tctx->cnts.accumobjs == 0);
 	assert(tctx->cnts.accumbytes == 0);
 
-	ckh_remove(tsd_tsdn(tsd), &tdata->bt2tctx, &gctx->bt, NULL, NULL);
+	ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL);
 	destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata, false);
 	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
 
@@ -705,7 +705,7 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx)
 	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock);
 
 	if (destroy_tdata)
-		prof_tdata_destroy(tsd_tsdn(tsd), tdata, false);
+		prof_tdata_destroy(tsd, tdata, false);
 
 	if (destroy_tctx)
 		idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), tctx), tctx,
@@ -735,7 +735,7 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
 			return (true);
 		}
 		btkey.p = &gctx.p->bt;
-		if (ckh_insert(tsd_tsdn(tsd), &bt2gctx, btkey.v, gctx.v)) {
+		if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) {
 			/* OOM. */
 			prof_leave(tsd, tdata);
 			idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), gctx.v),
@@ -798,7 +798,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
 		/* Link a prof_tctx_t into gctx for this thread. */
 		ret.v = iallocztm(tsd_tsdn(tsd), sizeof(prof_tctx_t),
 		    size2index(sizeof(prof_tctx_t)), false, NULL, true,
-		    arena_ichoose(tsd_tsdn(tsd), NULL), true);
+		    arena_ichoose(tsd, NULL), true);
 		if (ret.p == NULL) {
 			if (new_gctx)
 				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
@@ -813,8 +813,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
 		ret.p->prepared = true;
 		ret.p->state = prof_tctx_state_initializing;
 		malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
-		error = ckh_insert(tsd_tsdn(tsd), &tdata->bt2tctx, btkey,
-		    ret.v);
+		error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v);
 		malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
 		if (error) {
 			if (new_gctx)
@@ -1796,7 +1795,7 @@ prof_thr_uid_alloc(tsdn_t *tsdn)
 }
 
 static prof_tdata_t *
-prof_tdata_init_impl(tsdn_t *tsdn, uint64_t thr_uid, uint64_t thr_discrim,
+prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
     char *thread_name, bool active)
 {
 	prof_tdata_t *tdata;
@@ -1804,7 +1803,7 @@ prof_tdata_init_impl(tsdn_t *tsdn, uint64_t thr_uid, uint64_t thr_discrim,
 	cassert(config_prof);
 
 	/* Initialize an empty cache for this thread. */
-	tdata = (prof_tdata_t *)iallocztm(tsdn, sizeof(prof_tdata_t),
+	tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd), sizeof(prof_tdata_t),
 	    size2index(sizeof(prof_tdata_t)), false, NULL, true,
 	    arena_get(TSDN_NULL, 0, true), true);
 	if (tdata == NULL)
@@ -1818,9 +1817,10 @@ prof_tdata_init_impl(tsdn_t *tsdn, uint64_t thr_uid, uint64_t thr_discrim,
 	tdata->expired = false;
 	tdata->tctx_uid_next = 0;
 
-	if (ckh_new(tsdn, &tdata->bt2tctx, PROF_CKH_MINITEMS,
-	    prof_bt_hash, prof_bt_keycomp)) {
-		idalloctm(tsdn, iealloc(tsdn, tdata), tdata, NULL, true, true);
+	if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash,
+	    prof_bt_keycomp)) {
+		idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), tdata), tdata,
+		    NULL, true, true);
 		return (NULL);
 	}
 
@@ -1834,19 +1834,19 @@ prof_tdata_init_impl(tsdn_t *tsdn, uint64_t thr_uid, uint64_t thr_discrim,
 	tdata->dumping = false;
 	tdata->active = active;
 
-	malloc_mutex_lock(tsdn, &tdatas_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
 	tdata_tree_insert(&tdatas, tdata);
-	malloc_mutex_unlock(tsdn, &tdatas_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
 
 	return (tdata);
 }
 
 prof_tdata_t *
-prof_tdata_init(tsdn_t *tsdn)
+prof_tdata_init(tsd_t *tsd)
 {
 
-	return (prof_tdata_init_impl(tsdn, prof_thr_uid_alloc(tsdn), 0, NULL,
-	    prof_thread_active_init_get(tsdn)));
+	return (prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0,
+	    NULL, prof_thread_active_init_get(tsd_tsdn(tsd))));
 }
 
 static bool
@@ -1871,33 +1871,32 @@ prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
 }
 
 static void
-prof_tdata_destroy_locked(tsdn_t *tsdn, prof_tdata_t *tdata,
+prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
     bool even_if_attached)
 {
 
-	malloc_mutex_assert_owner(tsdn, &tdatas_mtx);
-
-	assert(tsdn_null(tsdn) || tsd_prof_tdata_get(tsdn_tsd(tsdn)) != tdata);
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tdatas_mtx);
 
 	tdata_tree_remove(&tdatas, tdata);
 
 	assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
 
 	if (tdata->thread_name != NULL) {
-		idalloctm(tsdn, iealloc(tsdn, tdata->thread_name),
-		    tdata->thread_name, NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd),
+		    tdata->thread_name), tdata->thread_name, NULL, true, true);
 	}
-	ckh_delete(tsdn, &tdata->bt2tctx);
-	idalloctm(tsdn, iealloc(tsdn, tdata), tdata, NULL, true, true);
+	ckh_delete(tsd, &tdata->bt2tctx);
+	idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), tdata), tdata, NULL,
+	    true, true);
 }
 
 static void
-prof_tdata_destroy(tsdn_t *tsdn, prof_tdata_t *tdata, bool even_if_attached)
+prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached)
 {
 
-	malloc_mutex_lock(tsdn, &tdatas_mtx);
-	prof_tdata_destroy_locked(tsdn, tdata, even_if_attached);
-	malloc_mutex_unlock(tsdn, &tdatas_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
+	prof_tdata_destroy_locked(tsd, tdata, even_if_attached);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
 }
 
 static void
@@ -1920,7 +1919,7 @@ prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata)
 		destroy_tdata = false;
 	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
 	if (destroy_tdata)
-		prof_tdata_destroy(tsd_tsdn(tsd), tdata, true);
+		prof_tdata_destroy(tsd, tdata, true);
 }
 
 prof_tdata_t *
@@ -1933,8 +1932,8 @@ prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata)
 	bool active = tdata->active;
 
 	prof_tdata_detach(tsd, tdata);
-	return (prof_tdata_init_impl(tsd_tsdn(tsd), thr_uid, thr_discrim,
-	    thread_name, active));
+	return (prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name,
+	    active));
 }
 
 static bool
@@ -1963,30 +1962,30 @@ prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg)
 }
 
 void
-prof_reset(tsdn_t *tsdn, size_t lg_sample)
+prof_reset(tsd_t *tsd, size_t lg_sample)
 {
 	prof_tdata_t *next;
 
 	assert(lg_sample < (sizeof(uint64_t) << 3));
 
-	malloc_mutex_lock(tsdn, &prof_dump_mtx);
-	malloc_mutex_lock(tsdn, &tdatas_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
 
 	lg_prof_sample = lg_sample;
 
 	next = NULL;
 	do {
 		prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next,
-		    prof_tdata_reset_iter, (void *)tsdn);
+		    prof_tdata_reset_iter, (void *)tsd);
 		if (to_destroy != NULL) {
 			next = tdata_tree_next(&tdatas, to_destroy);
-			prof_tdata_destroy_locked(tsdn, to_destroy, false);
+			prof_tdata_destroy_locked(tsd, to_destroy, false);
 		} else
 			next = NULL;
 	} while (next != NULL);
 
-	malloc_mutex_unlock(tsdn, &tdatas_mtx);
-	malloc_mutex_unlock(tsdn, &prof_dump_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
 }
 
 void
@@ -2197,7 +2196,7 @@ prof_boot1(void)
 }
 
 bool
-prof_boot2(tsdn_t *tsdn)
+prof_boot2(tsd_t *tsd)
 {
 
 	cassert(config_prof);
@@ -2223,7 +2222,7 @@ prof_boot2(tsdn_t *tsdn)
 		    WITNESS_RANK_PROF_THREAD_ACTIVE_INIT))
 			return (true);
 
-		if (ckh_new(tsdn, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash,
+		if (ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash,
 		    prof_bt_keycomp))
 			return (true);
 		if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx",
@@ -2254,8 +2253,8 @@ prof_boot2(tsdn_t *tsdn)
 				abort();
 		}
 
-		gctx_locks = (malloc_mutex_t *)base_alloc(tsdn, PROF_NCTX_LOCKS
-		    * sizeof(malloc_mutex_t));
+		gctx_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
+		    PROF_NCTX_LOCKS * sizeof(malloc_mutex_t));
 		if (gctx_locks == NULL)
 			return (true);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
@@ -2264,7 +2263,7 @@ prof_boot2(tsdn_t *tsdn)
 				return (true);
 		}
 
-		tdata_locks = (malloc_mutex_t *)base_alloc(tsdn,
+		tdata_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
 		    PROF_NTDATA_LOCKS * sizeof(malloc_mutex_t));
 		if (tdata_locks == NULL)
 			return (true);
diff --git a/src/tcache.c b/src/tcache.c
index 96e54e1a..98c18a04 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -433,14 +433,14 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
 }
 
 bool
-tcaches_create(tsdn_t *tsdn, unsigned *r_ind)
+tcaches_create(tsd_t *tsd, unsigned *r_ind)
 {
 	arena_t *arena;
 	tcache_t *tcache;
 	tcaches_t *elm;
 
 	if (tcaches == NULL) {
-		tcaches = base_alloc(tsdn, sizeof(tcache_t *) *
+		tcaches = base_alloc(tsd_tsdn(tsd), sizeof(tcache_t *) *
 		    (MALLOCX_TCACHE_MAX+1));
 		if (tcaches == NULL)
 			return (true);
@@ -448,10 +448,10 @@ tcaches_create(tsdn_t *tsdn, unsigned *r_ind)
 
 	if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX)
 		return (true);
-	arena = arena_ichoose(tsdn, NULL);
+	arena = arena_ichoose(tsd, NULL);
 	if (unlikely(arena == NULL))
 		return (true);
-	tcache = tcache_create(tsdn, arena);
+	tcache = tcache_create(tsd_tsdn(tsd), arena);
 	if (tcache == NULL)
 		return (true);
 
diff --git a/test/unit/ckh.c b/test/unit/ckh.c
index 961e2acb..2cbc2268 100644
--- a/test/unit/ckh.c
+++ b/test/unit/ckh.c
@@ -2,24 +2,24 @@
 
 TEST_BEGIN(test_new_delete)
 {
-	tsdn_t *tsdn;
+	tsd_t *tsd;
 	ckh_t ckh;
 
-	tsdn = tsdn_fetch();
+	tsd = tsd_fetch();
 
-	assert_false(ckh_new(tsdn, &ckh, 2, ckh_string_hash,
+	assert_false(ckh_new(tsd, &ckh, 2, ckh_string_hash,
 	    ckh_string_keycomp), "Unexpected ckh_new() error");
-	ckh_delete(tsdn, &ckh);
+	ckh_delete(tsd, &ckh);
 
-	assert_false(ckh_new(tsdn, &ckh, 3, ckh_pointer_hash,
+	assert_false(ckh_new(tsd, &ckh, 3, ckh_pointer_hash,
 	    ckh_pointer_keycomp), "Unexpected ckh_new() error");
-	ckh_delete(tsdn, &ckh);
+	ckh_delete(tsd, &ckh);
 }
 TEST_END
 
 TEST_BEGIN(test_count_insert_search_remove)
 {
-	tsdn_t *tsdn;
+	tsd_t *tsd;
 	ckh_t ckh;
 	const char *strs[] = {
 	    "a string",
@@ -30,9 +30,9 @@ TEST_BEGIN(test_count_insert_search_remove)
 	const char *missing = "A string not in the hash table.";
 	size_t i;
 
-	tsdn = tsdn_fetch();
+	tsd = tsd_fetch();
 
-	assert_false(ckh_new(tsdn, &ckh, 2, ckh_string_hash,
+	assert_false(ckh_new(tsd, &ckh, 2, ckh_string_hash,
 	    ckh_string_keycomp), "Unexpected ckh_new() error");
 	assert_zu_eq(ckh_count(&ckh), 0,
 	    "ckh_count() should return %zu, but it returned %zu", ZU(0),
@@ -40,7 +40,7 @@ TEST_BEGIN(test_count_insert_search_remove)
 
 	/* Insert. */
 	for (i = 0; i < sizeof(strs)/sizeof(const char *); i++) {
-		ckh_insert(tsdn, &ckh, strs[i], strs[i]);
+		ckh_insert(tsd, &ckh, strs[i], strs[i]);
 		assert_zu_eq(ckh_count(&ckh), i+1,
 		    "ckh_count() should return %zu, but it returned %zu", i+1,
 		    ckh_count(&ckh));
@@ -85,7 +85,7 @@ TEST_BEGIN(test_count_insert_search_remove)
 		vp = (i & 2) ? &v.p : NULL;
 		k.p = NULL;
 		v.p = NULL;
-		assert_false(ckh_remove(tsdn, &ckh, strs[i], kp, vp),
+		assert_false(ckh_remove(tsd, &ckh, strs[i], kp, vp),
 		    "Unexpected ckh_remove() error");
 
 		ks = (i & 1) ? strs[i] : (const char *)NULL;
@@ -101,22 +101,22 @@ TEST_BEGIN(test_count_insert_search_remove)
 		    ckh_count(&ckh));
 	}
 
-	ckh_delete(tsdn, &ckh);
+	ckh_delete(tsd, &ckh);
 }
 TEST_END
 
 TEST_BEGIN(test_insert_iter_remove)
 {
 #define	NITEMS ZU(1000)
-	tsdn_t *tsdn;
+	tsd_t *tsd;
 	ckh_t ckh;
 	void **p[NITEMS];
 	void *q, *r;
 	size_t i;
 
-	tsdn = tsdn_fetch();
+	tsd = tsd_fetch();
 
-	assert_false(ckh_new(tsdn, &ckh, 2, ckh_pointer_hash,
+	assert_false(ckh_new(tsd, &ckh, 2, ckh_pointer_hash,
 	    ckh_pointer_keycomp), "Unexpected ckh_new() error");
 
 	for (i = 0; i < NITEMS; i++) {
@@ -128,7 +128,7 @@ TEST_BEGIN(test_insert_iter_remove)
 		size_t j;
 
 		for (j = i; j < NITEMS; j++) {
-			assert_false(ckh_insert(tsdn, &ckh, p[j], p[j]),
+			assert_false(ckh_insert(tsd, &ckh, p[j], p[j]),
 			    "Unexpected ckh_insert() failure");
 			assert_false(ckh_search(&ckh, p[j], &q, &r),
 			    "Unexpected ckh_search() failure");
@@ -143,13 +143,13 @@ TEST_BEGIN(test_insert_iter_remove)
 		for (j = i + 1; j < NITEMS; j++) {
 			assert_false(ckh_search(&ckh, p[j], NULL, NULL),
 			    "Unexpected ckh_search() failure");
-			assert_false(ckh_remove(tsdn, &ckh, p[j], &q, &r),
+			assert_false(ckh_remove(tsd, &ckh, p[j], &q, &r),
 			    "Unexpected ckh_remove() failure");
 			assert_ptr_eq(p[j], q, "Key pointer mismatch");
 			assert_ptr_eq(p[j], r, "Value pointer mismatch");
 			assert_true(ckh_search(&ckh, p[j], NULL, NULL),
 			    "Unexpected ckh_search() success");
-			assert_true(ckh_remove(tsdn, &ckh, p[j], &q, &r),
+			assert_true(ckh_remove(tsd, &ckh, p[j], &q, &r),
 			    "Unexpected ckh_remove() success");
 		}
 
@@ -184,13 +184,13 @@ TEST_BEGIN(test_insert_iter_remove)
 	for (i = 0; i < NITEMS; i++) {
 		assert_false(ckh_search(&ckh, p[i], NULL, NULL),
 		    "Unexpected ckh_search() failure");
-		assert_false(ckh_remove(tsdn, &ckh, p[i], &q, &r),
+		assert_false(ckh_remove(tsd, &ckh, p[i], &q, &r),
 		    "Unexpected ckh_remove() failure");
 		assert_ptr_eq(p[i], q, "Key pointer mismatch");
 		assert_ptr_eq(p[i], r, "Value pointer mismatch");
 		assert_true(ckh_search(&ckh, p[i], NULL, NULL),
 		    "Unexpected ckh_search() success");
-		assert_true(ckh_remove(tsdn, &ckh, p[i], &q, &r),
+		assert_true(ckh_remove(tsd, &ckh, p[i], &q, &r),
 		    "Unexpected ckh_remove() success");
 		dallocx(p[i], 0);
 	}
@@ -198,7 +198,7 @@ TEST_BEGIN(test_insert_iter_remove)
 	assert_zu_eq(ckh_count(&ckh), 0,
 	    "ckh_count() should return %zu, but it returned %zu",
 	    ZU(0), ckh_count(&ckh));
-	ckh_delete(tsdn, &ckh);
+	ckh_delete(tsd, &ckh);
 #undef NITEMS
 }
 TEST_END
diff --git a/test/unit/tsd.c b/test/unit/tsd.c
index 7dde4b77..4e2622a3 100644
--- a/test/unit/tsd.c
+++ b/test/unit/tsd.c
@@ -58,18 +58,18 @@ thd_start(void *arg)
 	data_t d = (data_t)(uintptr_t)arg;
 	void *p;
 
-	assert_x_eq(*data_tsd_get(), DATA_INIT,
+	assert_x_eq(*data_tsd_get(true), DATA_INIT,
 	    "Initial tsd get should return initialization value");
 
 	p = malloc(1);
 	assert_ptr_not_null(p, "Unexpected malloc() failure");
 
 	data_tsd_set(&d);
-	assert_x_eq(*data_tsd_get(), d,
+	assert_x_eq(*data_tsd_get(true), d,
 	    "After tsd set, tsd get should return value that was set");
 
 	d = 0;
-	assert_x_eq(*data_tsd_get(), (data_t)(uintptr_t)arg,
+	assert_x_eq(*data_tsd_get(true), (data_t)(uintptr_t)arg,
 	    "Resetting local data should have no effect on tsd");
 
 	free(p);

From 962a2979e353f876f3725417179f201e671d9dbb Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 20 Oct 2016 23:59:12 -0700
Subject: [PATCH 0417/2608] Do not (recursively) allocate within tsd_fetch().

Refactor tsd so that tsdn_fetch() does not trigger allocation, since
allocation could cause infinite recursion.

This resolves #458.
---
 include/jemalloc/internal/ckh.h               |  8 +-
 .../jemalloc/internal/jemalloc_internal.h.in  | 11 +--
 include/jemalloc/internal/private_symbols.txt |  3 +
 include/jemalloc/internal/prof.h              |  8 +-
 include/jemalloc/internal/tcache.h            |  2 +-
 include/jemalloc/internal/tsd.h               | 74 ++++++++++++----
 src/ckh.c                                     | 42 ++++-----
 src/ctl.c                                     |  4 +-
 src/huge.c                                    |  4 +-
 src/jemalloc.c                                |  9 +-
 src/prof.c                                    | 85 +++++++++----------
 src/tcache.c                                  |  8 +-
 test/unit/ckh.c                               | 42 ++++-----
 test/unit/tsd.c                               |  6 +-
 14 files changed, 176 insertions(+), 130 deletions(-)

diff --git a/include/jemalloc/internal/ckh.h b/include/jemalloc/internal/ckh.h
index 46e151cd..f75ad90b 100644
--- a/include/jemalloc/internal/ckh.h
+++ b/include/jemalloc/internal/ckh.h
@@ -64,13 +64,13 @@ struct ckh_s {
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-bool	ckh_new(tsdn_t *tsdn, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
+bool	ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
     ckh_keycomp_t *keycomp);
-void	ckh_delete(tsdn_t *tsdn, ckh_t *ckh);
+void	ckh_delete(tsd_t *tsd, ckh_t *ckh);
 size_t	ckh_count(ckh_t *ckh);
 bool	ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data);
-bool	ckh_insert(tsdn_t *tsdn, ckh_t *ckh, const void *key, const void *data);
-bool	ckh_remove(tsdn_t *tsdn, ckh_t *ckh, const void *searchkey, void **key,
+bool	ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data);
+bool	ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
     void **data);
 bool	ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data);
 void	ckh_string_hash(const void *key, size_t r_hash[2]);
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index d644cea3..fdc8fef9 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -479,6 +479,7 @@ extern size_t const	index2size_tab[NSIZES];
  */
 extern uint8_t const	size2index_tab[];
 
+arena_t	*a0get(void);
 void	*a0malloc(size_t size);
 void	a0dalloc(void *ptr);
 void	*bootstrap_malloc(size_t size);
@@ -574,7 +575,7 @@ size_t	s2u(size_t size);
 size_t	sa2u(size_t size, size_t alignment);
 arena_t	*arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal);
 arena_t	*arena_choose(tsd_t *tsd, arena_t *arena);
-arena_t	*arena_ichoose(tsdn_t *tsdn, arena_t *arena);
+arena_t	*arena_ichoose(tsd_t *tsd, arena_t *arena);
 arena_tdata_t	*arena_tdata_get(tsd_t *tsd, unsigned ind,
     bool refresh_if_missing);
 arena_t	*arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing);
@@ -912,14 +913,10 @@ arena_choose(tsd_t *tsd, arena_t *arena)
 }
 
 JEMALLOC_INLINE arena_t *
-arena_ichoose(tsdn_t *tsdn, arena_t *arena)
+arena_ichoose(tsd_t *tsd, arena_t *arena)
 {
 
-	assert(!tsdn_null(tsdn) || arena != NULL);
-
-	if (!tsdn_null(tsdn))
-		return (arena_choose_impl(tsdn_tsd(tsdn), NULL, true));
-	return (arena);
+	return (arena_choose_impl(tsd, arena, true));
 }
 
 JEMALLOC_INLINE arena_tdata_t *
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 642c3de7..62211790 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -1,4 +1,5 @@
 a0dalloc
+a0get
 a0malloc
 arena_aalloc
 arena_alloc_junk_small
@@ -545,7 +546,9 @@ tsd_booted_get
 tsd_cleanup
 tsd_cleanup_wrapper
 tsd_fetch
+tsd_fetch_impl
 tsd_get
+tsd_get_allocates
 tsd_iarena_get
 tsd_iarena_set
 tsd_iarenap_get
diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h
index 21dff5fb..8293b71e 100644
--- a/include/jemalloc/internal/prof.h
+++ b/include/jemalloc/internal/prof.h
@@ -299,9 +299,9 @@ extern prof_dump_header_t *prof_dump_header;
 void	prof_idump(tsdn_t *tsdn);
 bool	prof_mdump(tsd_t *tsd, const char *filename);
 void	prof_gdump(tsdn_t *tsdn);
-prof_tdata_t	*prof_tdata_init(tsdn_t *tsdn);
+prof_tdata_t	*prof_tdata_init(tsd_t *tsd);
 prof_tdata_t	*prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
-void	prof_reset(tsdn_t *tsdn, size_t lg_sample);
+void	prof_reset(tsd_t *tsd, size_t lg_sample);
 void	prof_tdata_cleanup(tsd_t *tsd);
 bool	prof_active_get(tsdn_t *tsdn);
 bool	prof_active_set(tsdn_t *tsdn, bool active);
@@ -315,7 +315,7 @@ bool	prof_gdump_get(tsdn_t *tsdn);
 bool	prof_gdump_set(tsdn_t *tsdn, bool active);
 void	prof_boot0(void);
 void	prof_boot1(void);
-bool	prof_boot2(tsdn_t *tsdn);
+bool	prof_boot2(tsd_t *tsd);
 void	prof_prefork0(tsdn_t *tsdn);
 void	prof_prefork1(tsdn_t *tsdn);
 void	prof_postfork_parent(tsdn_t *tsdn);
@@ -384,7 +384,7 @@ prof_tdata_get(tsd_t *tsd, bool create)
 	if (create) {
 		if (unlikely(tdata == NULL)) {
 			if (tsd_nominal(tsd)) {
-				tdata = prof_tdata_init(tsd_tsdn(tsd));
+				tdata = prof_tdata_init(tsd);
 				tsd_prof_tdata_set(tsd, tdata);
 			}
 		} else if (unlikely(tdata->expired)) {
diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h
index 70883b1a..01ba062d 100644
--- a/include/jemalloc/internal/tcache.h
+++ b/include/jemalloc/internal/tcache.h
@@ -145,7 +145,7 @@ tcache_t *tcache_create(tsdn_t *tsdn, arena_t *arena);
 void	tcache_cleanup(tsd_t *tsd);
 void	tcache_enabled_cleanup(tsd_t *tsd);
 void	tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
-bool	tcaches_create(tsdn_t *tsdn, unsigned *r_ind);
+bool	tcaches_create(tsd_t *tsd, unsigned *r_ind);
 void	tcaches_flush(tsd_t *tsd, unsigned ind);
 void	tcaches_destroy(tsd_t *tsd, unsigned ind);
 bool	tcache_boot(tsdn_t *tsdn);
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index bf113411..9055acaf 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -48,7 +48,7 @@ typedef enum {
  *
  *   bool example_tsd_boot(void) {...}
  *   bool example_tsd_booted_get(void) {...}
- *   example_t *example_tsd_get() {...}
+ *   example_t *example_tsd_get(bool init) {...}
  *   void example_tsd_set(example_t *val) {...}
  *
  * Note that all of the functions deal in terms of (a_type *) rather than
@@ -105,7 +105,7 @@ a_name##tsd_boot(void);							\
 a_attr bool								\
 a_name##tsd_booted_get(void);						\
 a_attr a_type *								\
-a_name##tsd_get(void);							\
+a_name##tsd_get(bool init);						\
 a_attr void								\
 a_name##tsd_set(a_type *val);
 
@@ -213,9 +213,15 @@ a_name##tsd_booted_get(void)						\
 									\
 	return (a_name##tsd_booted);					\
 }									\
+a_attr bool								\
+a_name##tsd_get_allocates(void)						\
+{									\
+									\
+	return (false);							\
+}									\
 /* Get/set. */								\
 a_attr a_type *								\
-a_name##tsd_get(void)							\
+a_name##tsd_get(bool init)						\
 {									\
 									\
 	assert(a_name##tsd_booted);					\
@@ -264,9 +270,15 @@ a_name##tsd_booted_get(void)						\
 									\
 	return (a_name##tsd_booted);					\
 }									\
+a_attr bool								\
+a_name##tsd_get_allocates(void)						\
+{									\
+									\
+	return (false);							\
+}									\
 /* Get/set. */								\
 a_attr a_type *								\
-a_name##tsd_get(void)							\
+a_name##tsd_get(bool init)						\
 {									\
 									\
 	assert(a_name##tsd_booted);					\
@@ -325,14 +337,14 @@ a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper)			\
 	}								\
 }									\
 a_attr a_name##tsd_wrapper_t *						\
-a_name##tsd_wrapper_get(void)						\
+a_name##tsd_wrapper_get(bool init)					\
 {									\
 	DWORD error = GetLastError();					\
 	a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *)	\
 	    TlsGetValue(a_name##tsd_tsd);				\
 	SetLastError(error);						\
 									\
-	if (unlikely(wrapper == NULL)) {				\
+	if (init && unlikely(wrapper == NULL)) {			\
 		wrapper = (a_name##tsd_wrapper_t *)			\
 		    malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t));	\
 		if (wrapper == NULL) {					\
@@ -392,14 +404,22 @@ a_name##tsd_booted_get(void)						\
 									\
 	return (a_name##tsd_booted);					\
 }									\
+a_attr bool								\
+a_name##tsd_get_allocates(void)						\
+{									\
+									\
+	return (true);							\
+}									\
 /* Get/set. */								\
 a_attr a_type *								\
-a_name##tsd_get(void)							\
+a_name##tsd_get(bool init)						\
 {									\
 	a_name##tsd_wrapper_t *wrapper;					\
 									\
 	assert(a_name##tsd_booted);					\
-	wrapper = a_name##tsd_wrapper_get();				\
+	wrapper = a_name##tsd_wrapper_get(init);			\
+	if (a_name##tsd_get_allocates() && !init && wrapper == NULL)	\
+		return (NULL);						\
 	return (&wrapper->val);						\
 }									\
 a_attr void								\
@@ -408,7 +428,7 @@ a_name##tsd_set(a_type *val)						\
 	a_name##tsd_wrapper_t *wrapper;					\
 									\
 	assert(a_name##tsd_booted);					\
-	wrapper = a_name##tsd_wrapper_get();				\
+	wrapper = a_name##tsd_wrapper_get(true);			\
 	wrapper->val = *(val);						\
 	if (a_cleanup != malloc_tsd_no_cleanup)				\
 		wrapper->initialized = true;				\
@@ -452,12 +472,12 @@ a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper)			\
 	}								\
 }									\
 a_attr a_name##tsd_wrapper_t *						\
-a_name##tsd_wrapper_get(void)						\
+a_name##tsd_wrapper_get(bool init)					\
 {									\
 	a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *)	\
 	    pthread_getspecific(a_name##tsd_tsd);			\
 									\
-	if (unlikely(wrapper == NULL)) {				\
+	if (init && unlikely(wrapper == NULL)) {			\
 		tsd_init_block_t block;					\
 		wrapper = tsd_init_check_recursion(			\
 		    &a_name##tsd_init_head, &block);			\
@@ -520,14 +540,22 @@ a_name##tsd_booted_get(void)						\
 									\
 	return (a_name##tsd_booted);					\
 }									\
+a_attr bool								\
+a_name##tsd_get_allocates(void)						\
+{									\
+									\
+	return (true);							\
+}									\
 /* Get/set. */								\
 a_attr a_type *								\
-a_name##tsd_get(void)							\
+a_name##tsd_get(bool init)						\
 {									\
 	a_name##tsd_wrapper_t *wrapper;					\
 									\
 	assert(a_name##tsd_booted);					\
-	wrapper = a_name##tsd_wrapper_get();				\
+	wrapper = a_name##tsd_wrapper_get(init);			\
+	if (a_name##tsd_get_allocates() && !init && wrapper == NULL)	\
+		return (NULL);						\
 	return (&wrapper->val);						\
 }									\
 a_attr void								\
@@ -536,7 +564,7 @@ a_name##tsd_set(a_type *val)						\
 	a_name##tsd_wrapper_t *wrapper;					\
 									\
 	assert(a_name##tsd_booted);					\
-	wrapper = a_name##tsd_wrapper_get();				\
+	wrapper = a_name##tsd_wrapper_get(true);			\
 	wrapper->val = *(val);						\
 	if (a_cleanup != malloc_tsd_no_cleanup)				\
 		wrapper->initialized = true;				\
@@ -639,6 +667,7 @@ void	tsd_cleanup(void *arg);
 #ifndef JEMALLOC_ENABLE_INLINE
 malloc_tsd_protos(JEMALLOC_ATTR(unused), , tsd_t)
 
+tsd_t	*tsd_fetch_impl(bool init);
 tsd_t	*tsd_fetch(void);
 tsdn_t	*tsd_tsdn(tsd_t *tsd);
 bool	tsd_nominal(tsd_t *tsd);
@@ -658,9 +687,13 @@ malloc_tsd_externs(, tsd_t)
 malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, , tsd_t, tsd_initializer, tsd_cleanup)
 
 JEMALLOC_ALWAYS_INLINE tsd_t *
-tsd_fetch(void)
+tsd_fetch_impl(bool init)
 {
-	tsd_t *tsd = tsd_get();
+	tsd_t *tsd = tsd_get(init);
+
+	if (!init && tsd_get_allocates() && tsd == NULL)
+		return (NULL);
+	assert(tsd != NULL);
 
 	if (unlikely(tsd->state != tsd_state_nominal)) {
 		if (tsd->state == tsd_state_uninitialized) {
@@ -677,6 +710,13 @@ tsd_fetch(void)
 	return (tsd);
 }
 
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsd_fetch(void)
+{
+
+	return (tsd_fetch_impl(true));
+}
+
 JEMALLOC_ALWAYS_INLINE tsdn_t *
 tsd_tsdn(tsd_t *tsd)
 {
@@ -723,7 +763,7 @@ tsdn_fetch(void)
 	if (!tsd_booted_get())
 		return (NULL);
 
-	return (tsd_tsdn(tsd_fetch()));
+	return (tsd_tsdn(tsd_fetch_impl(false)));
 }
 
 JEMALLOC_ALWAYS_INLINE bool
diff --git a/src/ckh.c b/src/ckh.c
index 747c1c86..3be671c3 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -40,8 +40,8 @@
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
-static bool	ckh_grow(tsdn_t *tsdn, ckh_t *ckh);
-static void	ckh_shrink(tsdn_t *tsdn, ckh_t *ckh);
+static bool	ckh_grow(tsd_t *tsd, ckh_t *ckh);
+static void	ckh_shrink(tsd_t *tsd, ckh_t *ckh);
 
 /******************************************************************************/
 
@@ -244,7 +244,7 @@ ckh_rebuild(ckh_t *ckh, ckhc_t *aTab)
 }
 
 static bool
-ckh_grow(tsdn_t *tsdn, ckh_t *ckh)
+ckh_grow(tsd_t *tsd, ckh_t *ckh)
 {
 	bool ret;
 	ckhc_t *tab, *ttab;
@@ -270,8 +270,8 @@ ckh_grow(tsdn_t *tsdn, ckh_t *ckh)
 			ret = true;
 			goto label_return;
 		}
-		tab = (ckhc_t *)ipallocztm(tsdn, usize, CACHELINE, true, NULL,
-		    true, arena_ichoose(tsdn, NULL));
+		tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE,
+		    true, NULL, true, arena_ichoose(tsd, NULL));
 		if (tab == NULL) {
 			ret = true;
 			goto label_return;
@@ -283,12 +283,12 @@ ckh_grow(tsdn_t *tsdn, ckh_t *ckh)
 		ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
 
 		if (!ckh_rebuild(ckh, tab)) {
-			idalloctm(tsdn, tab, NULL, true, true);
+			idalloctm(tsd_tsdn(tsd), tab, NULL, true, true);
 			break;
 		}
 
 		/* Rebuilding failed, so back out partially rebuilt table. */
-		idalloctm(tsdn, ckh->tab, NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, true, true);
 		ckh->tab = tab;
 		ckh->lg_curbuckets = lg_prevbuckets;
 	}
@@ -299,7 +299,7 @@ label_return:
 }
 
 static void
-ckh_shrink(tsdn_t *tsdn, ckh_t *ckh)
+ckh_shrink(tsd_t *tsd, ckh_t *ckh)
 {
 	ckhc_t *tab, *ttab;
 	size_t usize;
@@ -314,8 +314,8 @@ ckh_shrink(tsdn_t *tsdn, ckh_t *ckh)
 	usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
 	if (unlikely(usize == 0 || usize > HUGE_MAXCLASS))
 		return;
-	tab = (ckhc_t *)ipallocztm(tsdn, usize, CACHELINE, true, NULL, true,
-	    arena_ichoose(tsdn, NULL));
+	tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE, true, NULL,
+	    true, arena_ichoose(tsd, NULL));
 	if (tab == NULL) {
 		/*
 		 * An OOM error isn't worth propagating, since it doesn't
@@ -330,7 +330,7 @@ ckh_shrink(tsdn_t *tsdn, ckh_t *ckh)
 	ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
 
 	if (!ckh_rebuild(ckh, tab)) {
-		idalloctm(tsdn, tab, NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), tab, NULL, true, true);
 #ifdef CKH_COUNT
 		ckh->nshrinks++;
 #endif
@@ -338,7 +338,7 @@ ckh_shrink(tsdn_t *tsdn, ckh_t *ckh)
 	}
 
 	/* Rebuilding failed, so back out partially rebuilt table. */
-	idalloctm(tsdn, ckh->tab, NULL, true, true);
+	idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, true, true);
 	ckh->tab = tab;
 	ckh->lg_curbuckets = lg_prevbuckets;
 #ifdef CKH_COUNT
@@ -347,7 +347,7 @@ ckh_shrink(tsdn_t *tsdn, ckh_t *ckh)
 }
 
 bool
-ckh_new(tsdn_t *tsdn, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
+ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
     ckh_keycomp_t *keycomp)
 {
 	bool ret;
@@ -391,8 +391,8 @@ ckh_new(tsdn_t *tsdn, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
 		ret = true;
 		goto label_return;
 	}
-	ckh->tab = (ckhc_t *)ipallocztm(tsdn, usize, CACHELINE, true, NULL,
-	    true, arena_ichoose(tsdn, NULL));
+	ckh->tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE, true,
+	    NULL, true, arena_ichoose(tsd, NULL));
 	if (ckh->tab == NULL) {
 		ret = true;
 		goto label_return;
@@ -404,7 +404,7 @@ label_return:
 }
 
 void
-ckh_delete(tsdn_t *tsdn, ckh_t *ckh)
+ckh_delete(tsd_t *tsd, ckh_t *ckh)
 {
 
 	assert(ckh != NULL);
@@ -421,7 +421,7 @@ ckh_delete(tsdn_t *tsdn, ckh_t *ckh)
 	    (unsigned long long)ckh->nrelocs);
 #endif
 
-	idalloctm(tsdn, ckh->tab, NULL, true, true);
+	idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, true, true);
 	if (config_debug)
 		memset(ckh, JEMALLOC_FREE_JUNK, sizeof(ckh_t));
 }
@@ -456,7 +456,7 @@ ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data)
 }
 
 bool
-ckh_insert(tsdn_t *tsdn, ckh_t *ckh, const void *key, const void *data)
+ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data)
 {
 	bool ret;
 
@@ -468,7 +468,7 @@ ckh_insert(tsdn_t *tsdn, ckh_t *ckh, const void *key, const void *data)
 #endif
 
 	while (ckh_try_insert(ckh, &key, &data)) {
-		if (ckh_grow(tsdn, ckh)) {
+		if (ckh_grow(tsd, ckh)) {
 			ret = true;
 			goto label_return;
 		}
@@ -480,7 +480,7 @@ label_return:
 }
 
 bool
-ckh_remove(tsdn_t *tsdn, ckh_t *ckh, const void *searchkey, void **key,
+ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
     void **data)
 {
 	size_t cell;
@@ -502,7 +502,7 @@ ckh_remove(tsdn_t *tsdn, ckh_t *ckh, const void *searchkey, void **key,
 		    + LG_CKH_BUCKET_CELLS - 2)) && ckh->lg_curbuckets
 		    > ckh->lg_minbuckets) {
 			/* Ignore error due to OOM. */
-			ckh_shrink(tsdn, ckh);
+			ckh_shrink(tsd, ckh);
 		}
 
 		return (false);
diff --git a/src/ctl.c b/src/ctl.c
index 5d2c8db4..bc78b205 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1478,7 +1478,7 @@ tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	READONLY();
-	if (tcaches_create(tsd_tsdn(tsd), &tcache_ind)) {
+	if (tcaches_create(tsd, &tcache_ind)) {
 		ret = EFAULT;
 		goto label_return;
 	}
@@ -2100,7 +2100,7 @@ prof_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	if (lg_sample >= (sizeof(uint64_t) << 3))
 		lg_sample = (sizeof(uint64_t) << 3) - 1;
 
-	prof_reset(tsd_tsdn(tsd), lg_sample);
+	prof_reset(tsd, lg_sample);
 
 	ret = 0;
 label_return:
diff --git a/src/huge.c b/src/huge.c
index 19ca3f03..62e6932b 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -54,6 +54,7 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 {
 	void *ret;
 	size_t ausize;
+	arena_t *iarena;
 	extent_node_t *node;
 	bool is_zeroed;
 
@@ -67,8 +68,9 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	assert(ausize >= chunksize);
 
 	/* Allocate an extent node with which to track the chunk. */
+	iarena = (!tsdn_null(tsdn)) ? arena_ichoose(tsdn_tsd(tsdn), NULL) : a0get();
 	node = ipallocztm(tsdn, CACHELINE_CEILING(sizeof(extent_node_t)),
-	    CACHELINE, false, NULL, true, arena_ichoose(tsdn, arena));
+	    CACHELINE, false, NULL, true, iarena);
 	if (node == NULL)
 		return (NULL);
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index b0ebf810..53fcae34 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -340,6 +340,13 @@ a0idalloc(void *ptr, bool is_metadata)
 	idalloctm(TSDN_NULL, ptr, false, is_metadata, true);
 }
 
+arena_t *
+a0get(void)
+{
+
+	return (a0);
+}
+
 void *
 a0malloc(size_t size)
 {
@@ -1454,7 +1461,7 @@ malloc_init_hard(void)
 		return (true);
 	malloc_mutex_lock(tsd_tsdn(tsd), &init_lock);
 
-	if (config_prof && prof_boot2(tsd_tsdn(tsd))) {
+	if (config_prof && prof_boot2(tsd)) {
 		malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
 		return (true);
 	}
diff --git a/src/prof.c b/src/prof.c
index c1f58d46..140d5b22 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -125,7 +125,7 @@ static bool	prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx);
 static void	prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx);
 static bool	prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
     bool even_if_attached);
-static void	prof_tdata_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
+static void	prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata,
     bool even_if_attached);
 static char	*prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name);
 
@@ -591,7 +591,7 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
 	assert(gctx->nlimbo != 0);
 	if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) {
 		/* Remove gctx from bt2gctx. */
-		if (ckh_remove(tsd_tsdn(tsd), &bt2gctx, &gctx->bt, NULL, NULL))
+		if (ckh_remove(tsd, &bt2gctx, &gctx->bt, NULL, NULL))
 			not_reached();
 		prof_leave(tsd, tdata_self);
 		/* Destroy gctx. */
@@ -651,7 +651,7 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx)
 	assert(tctx->cnts.accumobjs == 0);
 	assert(tctx->cnts.accumbytes == 0);
 
-	ckh_remove(tsd_tsdn(tsd), &tdata->bt2tctx, &gctx->bt, NULL, NULL);
+	ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL);
 	destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata, false);
 	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
 
@@ -704,7 +704,7 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx)
 	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock);
 
 	if (destroy_tdata)
-		prof_tdata_destroy(tsd_tsdn(tsd), tdata, false);
+		prof_tdata_destroy(tsd, tdata, false);
 
 	if (destroy_tctx)
 		idalloctm(tsd_tsdn(tsd), tctx, NULL, true, true);
@@ -733,7 +733,7 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
 			return (true);
 		}
 		btkey.p = &gctx.p->bt;
-		if (ckh_insert(tsd_tsdn(tsd), &bt2gctx, btkey.v, gctx.v)) {
+		if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) {
 			/* OOM. */
 			prof_leave(tsd, tdata);
 			idalloctm(tsd_tsdn(tsd), gctx.v, NULL, true, true);
@@ -795,7 +795,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
 		/* Link a prof_tctx_t into gctx for this thread. */
 		ret.v = iallocztm(tsd_tsdn(tsd), sizeof(prof_tctx_t),
 		    size2index(sizeof(prof_tctx_t)), false, NULL, true,
-		    arena_ichoose(tsd_tsdn(tsd), NULL), true);
+		    arena_ichoose(tsd, NULL), true);
 		if (ret.p == NULL) {
 			if (new_gctx)
 				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
@@ -810,8 +810,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
 		ret.p->prepared = true;
 		ret.p->state = prof_tctx_state_initializing;
 		malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
-		error = ckh_insert(tsd_tsdn(tsd), &tdata->bt2tctx, btkey,
-		    ret.v);
+		error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v);
 		malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
 		if (error) {
 			if (new_gctx)
@@ -1791,7 +1790,7 @@ prof_thr_uid_alloc(tsdn_t *tsdn)
 }
 
 static prof_tdata_t *
-prof_tdata_init_impl(tsdn_t *tsdn, uint64_t thr_uid, uint64_t thr_discrim,
+prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
     char *thread_name, bool active)
 {
 	prof_tdata_t *tdata;
@@ -1799,7 +1798,7 @@ prof_tdata_init_impl(tsdn_t *tsdn, uint64_t thr_uid, uint64_t thr_discrim,
 	cassert(config_prof);
 
 	/* Initialize an empty cache for this thread. */
-	tdata = (prof_tdata_t *)iallocztm(tsdn, sizeof(prof_tdata_t),
+	tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd), sizeof(prof_tdata_t),
 	    size2index(sizeof(prof_tdata_t)), false, NULL, true,
 	    arena_get(TSDN_NULL, 0, true), true);
 	if (tdata == NULL)
@@ -1813,9 +1812,9 @@ prof_tdata_init_impl(tsdn_t *tsdn, uint64_t thr_uid, uint64_t thr_discrim,
 	tdata->expired = false;
 	tdata->tctx_uid_next = 0;
 
-	if (ckh_new(tsdn, &tdata->bt2tctx, PROF_CKH_MINITEMS,
-	    prof_bt_hash, prof_bt_keycomp)) {
-		idalloctm(tsdn, tdata, NULL, true, true);
+	if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash,
+	    prof_bt_keycomp)) {
+		idalloctm(tsd_tsdn(tsd), tdata, NULL, true, true);
 		return (NULL);
 	}
 
@@ -1829,19 +1828,19 @@ prof_tdata_init_impl(tsdn_t *tsdn, uint64_t thr_uid, uint64_t thr_discrim,
 	tdata->dumping = false;
 	tdata->active = active;
 
-	malloc_mutex_lock(tsdn, &tdatas_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
 	tdata_tree_insert(&tdatas, tdata);
-	malloc_mutex_unlock(tsdn, &tdatas_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
 
 	return (tdata);
 }
 
 prof_tdata_t *
-prof_tdata_init(tsdn_t *tsdn)
+prof_tdata_init(tsd_t *tsd)
 {
 
-	return (prof_tdata_init_impl(tsdn, prof_thr_uid_alloc(tsdn), 0, NULL,
-	    prof_thread_active_init_get(tsdn)));
+	return (prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0,
+	    NULL, prof_thread_active_init_get(tsd_tsdn(tsd))));
 }
 
 static bool
@@ -1866,31 +1865,29 @@ prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
 }
 
 static void
-prof_tdata_destroy_locked(tsdn_t *tsdn, prof_tdata_t *tdata,
+prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
     bool even_if_attached)
 {
 
-	malloc_mutex_assert_owner(tsdn, &tdatas_mtx);
-
-	assert(tsdn_null(tsdn) || tsd_prof_tdata_get(tsdn_tsd(tsdn)) != tdata);
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tdatas_mtx);
 
 	tdata_tree_remove(&tdatas, tdata);
 
 	assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
 
 	if (tdata->thread_name != NULL)
-		idalloctm(tsdn, tdata->thread_name, NULL, true, true);
-	ckh_delete(tsdn, &tdata->bt2tctx);
-	idalloctm(tsdn, tdata, NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, true, true);
+	ckh_delete(tsd, &tdata->bt2tctx);
+	idalloctm(tsd_tsdn(tsd), tdata, NULL, true, true);
 }
 
 static void
-prof_tdata_destroy(tsdn_t *tsdn, prof_tdata_t *tdata, bool even_if_attached)
+prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached)
 {
 
-	malloc_mutex_lock(tsdn, &tdatas_mtx);
-	prof_tdata_destroy_locked(tsdn, tdata, even_if_attached);
-	malloc_mutex_unlock(tsdn, &tdatas_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
+	prof_tdata_destroy_locked(tsd, tdata, even_if_attached);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
 }
 
 static void
@@ -1913,7 +1910,7 @@ prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata)
 		destroy_tdata = false;
 	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
 	if (destroy_tdata)
-		prof_tdata_destroy(tsd_tsdn(tsd), tdata, true);
+		prof_tdata_destroy(tsd, tdata, true);
 }
 
 prof_tdata_t *
@@ -1926,8 +1923,8 @@ prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata)
 	bool active = tdata->active;
 
 	prof_tdata_detach(tsd, tdata);
-	return (prof_tdata_init_impl(tsd_tsdn(tsd), thr_uid, thr_discrim,
-	    thread_name, active));
+	return (prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name,
+	    active));
 }
 
 static bool
@@ -1956,30 +1953,30 @@ prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg)
 }
 
 void
-prof_reset(tsdn_t *tsdn, size_t lg_sample)
+prof_reset(tsd_t *tsd, size_t lg_sample)
 {
 	prof_tdata_t *next;
 
 	assert(lg_sample < (sizeof(uint64_t) << 3));
 
-	malloc_mutex_lock(tsdn, &prof_dump_mtx);
-	malloc_mutex_lock(tsdn, &tdatas_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
 
 	lg_prof_sample = lg_sample;
 
 	next = NULL;
 	do {
 		prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next,
-		    prof_tdata_reset_iter, (void *)tsdn);
+		    prof_tdata_reset_iter, (void *)tsd);
 		if (to_destroy != NULL) {
 			next = tdata_tree_next(&tdatas, to_destroy);
-			prof_tdata_destroy_locked(tsdn, to_destroy, false);
+			prof_tdata_destroy_locked(tsd, to_destroy, false);
 		} else
 			next = NULL;
 	} while (next != NULL);
 
-	malloc_mutex_unlock(tsdn, &tdatas_mtx);
-	malloc_mutex_unlock(tsdn, &prof_dump_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
 }
 
 void
@@ -2189,7 +2186,7 @@ prof_boot1(void)
 }
 
 bool
-prof_boot2(tsdn_t *tsdn)
+prof_boot2(tsd_t *tsd)
 {
 
 	cassert(config_prof);
@@ -2215,7 +2212,7 @@ prof_boot2(tsdn_t *tsdn)
 		    WITNESS_RANK_PROF_THREAD_ACTIVE_INIT))
 			return (true);
 
-		if (ckh_new(tsdn, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash,
+		if (ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash,
 		    prof_bt_keycomp))
 			return (true);
 		if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx",
@@ -2246,8 +2243,8 @@ prof_boot2(tsdn_t *tsdn)
 				abort();
 		}
 
-		gctx_locks = (malloc_mutex_t *)base_alloc(tsdn, PROF_NCTX_LOCKS
-		    * sizeof(malloc_mutex_t));
+		gctx_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
+		    PROF_NCTX_LOCKS * sizeof(malloc_mutex_t));
 		if (gctx_locks == NULL)
 			return (true);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
@@ -2256,7 +2253,7 @@ prof_boot2(tsdn_t *tsdn)
 				return (true);
 		}
 
-		tdata_locks = (malloc_mutex_t *)base_alloc(tsdn,
+		tdata_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
 		    PROF_NTDATA_LOCKS * sizeof(malloc_mutex_t));
 		if (tdata_locks == NULL)
 			return (true);
diff --git a/src/tcache.c b/src/tcache.c
index 175759c7..f97aa420 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -445,14 +445,14 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
 }
 
 bool
-tcaches_create(tsdn_t *tsdn, unsigned *r_ind)
+tcaches_create(tsd_t *tsd, unsigned *r_ind)
 {
 	arena_t *arena;
 	tcache_t *tcache;
 	tcaches_t *elm;
 
 	if (tcaches == NULL) {
-		tcaches = base_alloc(tsdn, sizeof(tcache_t *) *
+		tcaches = base_alloc(tsd_tsdn(tsd), sizeof(tcache_t *) *
 		    (MALLOCX_TCACHE_MAX+1));
 		if (tcaches == NULL)
 			return (true);
@@ -460,10 +460,10 @@ tcaches_create(tsdn_t *tsdn, unsigned *r_ind)
 
 	if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX)
 		return (true);
-	arena = arena_ichoose(tsdn, NULL);
+	arena = arena_ichoose(tsd, NULL);
 	if (unlikely(arena == NULL))
 		return (true);
-	tcache = tcache_create(tsdn, arena);
+	tcache = tcache_create(tsd_tsdn(tsd), arena);
 	if (tcache == NULL)
 		return (true);
 
diff --git a/test/unit/ckh.c b/test/unit/ckh.c
index 961e2acb..2cbc2268 100644
--- a/test/unit/ckh.c
+++ b/test/unit/ckh.c
@@ -2,24 +2,24 @@
 
 TEST_BEGIN(test_new_delete)
 {
-	tsdn_t *tsdn;
+	tsd_t *tsd;
 	ckh_t ckh;
 
-	tsdn = tsdn_fetch();
+	tsd = tsd_fetch();
 
-	assert_false(ckh_new(tsdn, &ckh, 2, ckh_string_hash,
+	assert_false(ckh_new(tsd, &ckh, 2, ckh_string_hash,
 	    ckh_string_keycomp), "Unexpected ckh_new() error");
-	ckh_delete(tsdn, &ckh);
+	ckh_delete(tsd, &ckh);
 
-	assert_false(ckh_new(tsdn, &ckh, 3, ckh_pointer_hash,
+	assert_false(ckh_new(tsd, &ckh, 3, ckh_pointer_hash,
 	    ckh_pointer_keycomp), "Unexpected ckh_new() error");
-	ckh_delete(tsdn, &ckh);
+	ckh_delete(tsd, &ckh);
 }
 TEST_END
 
 TEST_BEGIN(test_count_insert_search_remove)
 {
-	tsdn_t *tsdn;
+	tsd_t *tsd;
 	ckh_t ckh;
 	const char *strs[] = {
 	    "a string",
@@ -30,9 +30,9 @@ TEST_BEGIN(test_count_insert_search_remove)
 	const char *missing = "A string not in the hash table.";
 	size_t i;
 
-	tsdn = tsdn_fetch();
+	tsd = tsd_fetch();
 
-	assert_false(ckh_new(tsdn, &ckh, 2, ckh_string_hash,
+	assert_false(ckh_new(tsd, &ckh, 2, ckh_string_hash,
 	    ckh_string_keycomp), "Unexpected ckh_new() error");
 	assert_zu_eq(ckh_count(&ckh), 0,
 	    "ckh_count() should return %zu, but it returned %zu", ZU(0),
@@ -40,7 +40,7 @@ TEST_BEGIN(test_count_insert_search_remove)
 
 	/* Insert. */
 	for (i = 0; i < sizeof(strs)/sizeof(const char *); i++) {
-		ckh_insert(tsdn, &ckh, strs[i], strs[i]);
+		ckh_insert(tsd, &ckh, strs[i], strs[i]);
 		assert_zu_eq(ckh_count(&ckh), i+1,
 		    "ckh_count() should return %zu, but it returned %zu", i+1,
 		    ckh_count(&ckh));
@@ -85,7 +85,7 @@ TEST_BEGIN(test_count_insert_search_remove)
 		vp = (i & 2) ? &v.p : NULL;
 		k.p = NULL;
 		v.p = NULL;
-		assert_false(ckh_remove(tsdn, &ckh, strs[i], kp, vp),
+		assert_false(ckh_remove(tsd, &ckh, strs[i], kp, vp),
 		    "Unexpected ckh_remove() error");
 
 		ks = (i & 1) ? strs[i] : (const char *)NULL;
@@ -101,22 +101,22 @@ TEST_BEGIN(test_count_insert_search_remove)
 		    ckh_count(&ckh));
 	}
 
-	ckh_delete(tsdn, &ckh);
+	ckh_delete(tsd, &ckh);
 }
 TEST_END
 
 TEST_BEGIN(test_insert_iter_remove)
 {
 #define	NITEMS ZU(1000)
-	tsdn_t *tsdn;
+	tsd_t *tsd;
 	ckh_t ckh;
 	void **p[NITEMS];
 	void *q, *r;
 	size_t i;
 
-	tsdn = tsdn_fetch();
+	tsd = tsd_fetch();
 
-	assert_false(ckh_new(tsdn, &ckh, 2, ckh_pointer_hash,
+	assert_false(ckh_new(tsd, &ckh, 2, ckh_pointer_hash,
 	    ckh_pointer_keycomp), "Unexpected ckh_new() error");
 
 	for (i = 0; i < NITEMS; i++) {
@@ -128,7 +128,7 @@ TEST_BEGIN(test_insert_iter_remove)
 		size_t j;
 
 		for (j = i; j < NITEMS; j++) {
-			assert_false(ckh_insert(tsdn, &ckh, p[j], p[j]),
+			assert_false(ckh_insert(tsd, &ckh, p[j], p[j]),
 			    "Unexpected ckh_insert() failure");
 			assert_false(ckh_search(&ckh, p[j], &q, &r),
 			    "Unexpected ckh_search() failure");
@@ -143,13 +143,13 @@ TEST_BEGIN(test_insert_iter_remove)
 		for (j = i + 1; j < NITEMS; j++) {
 			assert_false(ckh_search(&ckh, p[j], NULL, NULL),
 			    "Unexpected ckh_search() failure");
-			assert_false(ckh_remove(tsdn, &ckh, p[j], &q, &r),
+			assert_false(ckh_remove(tsd, &ckh, p[j], &q, &r),
 			    "Unexpected ckh_remove() failure");
 			assert_ptr_eq(p[j], q, "Key pointer mismatch");
 			assert_ptr_eq(p[j], r, "Value pointer mismatch");
 			assert_true(ckh_search(&ckh, p[j], NULL, NULL),
 			    "Unexpected ckh_search() success");
-			assert_true(ckh_remove(tsdn, &ckh, p[j], &q, &r),
+			assert_true(ckh_remove(tsd, &ckh, p[j], &q, &r),
 			    "Unexpected ckh_remove() success");
 		}
 
@@ -184,13 +184,13 @@ TEST_BEGIN(test_insert_iter_remove)
 	for (i = 0; i < NITEMS; i++) {
 		assert_false(ckh_search(&ckh, p[i], NULL, NULL),
 		    "Unexpected ckh_search() failure");
-		assert_false(ckh_remove(tsdn, &ckh, p[i], &q, &r),
+		assert_false(ckh_remove(tsd, &ckh, p[i], &q, &r),
 		    "Unexpected ckh_remove() failure");
 		assert_ptr_eq(p[i], q, "Key pointer mismatch");
 		assert_ptr_eq(p[i], r, "Value pointer mismatch");
 		assert_true(ckh_search(&ckh, p[i], NULL, NULL),
 		    "Unexpected ckh_search() success");
-		assert_true(ckh_remove(tsdn, &ckh, p[i], &q, &r),
+		assert_true(ckh_remove(tsd, &ckh, p[i], &q, &r),
 		    "Unexpected ckh_remove() success");
 		dallocx(p[i], 0);
 	}
@@ -198,7 +198,7 @@ TEST_BEGIN(test_insert_iter_remove)
 	assert_zu_eq(ckh_count(&ckh), 0,
 	    "ckh_count() should return %zu, but it returned %zu",
 	    ZU(0), ckh_count(&ckh));
-	ckh_delete(tsdn, &ckh);
+	ckh_delete(tsd, &ckh);
 #undef NITEMS
 }
 TEST_END
diff --git a/test/unit/tsd.c b/test/unit/tsd.c
index 7dde4b77..4e2622a3 100644
--- a/test/unit/tsd.c
+++ b/test/unit/tsd.c
@@ -58,18 +58,18 @@ thd_start(void *arg)
 	data_t d = (data_t)(uintptr_t)arg;
 	void *p;
 
-	assert_x_eq(*data_tsd_get(), DATA_INIT,
+	assert_x_eq(*data_tsd_get(true), DATA_INIT,
 	    "Initial tsd get should return initialization value");
 
 	p = malloc(1);
 	assert_ptr_not_null(p, "Unexpected malloc() failure");
 
 	data_tsd_set(&d);
-	assert_x_eq(*data_tsd_get(), d,
+	assert_x_eq(*data_tsd_get(true), d,
 	    "After tsd set, tsd get should return value that was set");
 
 	d = 0;
-	assert_x_eq(*data_tsd_get(), (data_t)(uintptr_t)arg,
+	assert_x_eq(*data_tsd_get(true), (data_t)(uintptr_t)arg,
 	    "Resetting local data should have no effect on tsd");
 
 	free(p);

From 5569b4a42c85f951f783b315bd49668a6fa764e3 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 25 Oct 2016 21:52:36 -0700
Subject: [PATCH 0418/2608] Use --whole-archive when linking integration tests
 on MinGW.

Prior to this change, the malloc_conf weak symbol provided by the
jemalloc dynamic library is always used, even if the application
provides a malloc_conf symbol.  Use the --whole-archive linker option
to allow the weak symbol to be overridden.
---
 Makefile.in | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/Makefile.in b/Makefile.in
index a2d5594b..de282186 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -125,6 +125,11 @@ DSOS := $(objroot)lib/$(LIBJEMALLOC).$(SOREV)
 ifneq ($(SOREV),$(SO))
 DSOS += $(objroot)lib/$(LIBJEMALLOC).$(SO)
 endif
+ifeq (pecoff, $(ABI))
+LJEMALLOC := -Wl,--whole-archive -L$(objroot)lib -l$(LIBJEMALLOC) -Wl,--no-whole-archive
+else
+LJEMALLOC := $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
+endif
 PC := $(objroot)jemalloc.pc
 MAN3 := $(objroot)doc/jemalloc$(install_suffix).3
 DOCS_XML := $(objroot)doc/jemalloc$(install_suffix).xml
@@ -136,7 +141,11 @@ C_TESTLIB_SRCS := $(srcroot)test/src/btalloc.c $(srcroot)test/src/btalloc_0.c \
 	$(srcroot)test/src/mtx.c $(srcroot)test/src/mq.c \
 	$(srcroot)test/src/SFMT.c $(srcroot)test/src/test.c \
 	$(srcroot)test/src/thd.c $(srcroot)test/src/timer.c
+ifeq (pecoff, $(ABI))
+C_UTIL_INTEGRATION_SRCS :=
+else
 C_UTIL_INTEGRATION_SRCS := $(srcroot)src/nstime.c $(srcroot)src/util.c
+endif
 TESTS_UNIT := \
 	$(srcroot)test/unit/a0.c \
 	$(srcroot)test/unit/arena_reset.c \
@@ -302,7 +311,7 @@ $(objroot)test/unit/%$(EXE): $(objroot)test/unit/%.$(O) $(TESTS_UNIT_LINK_OBJS)
 
 $(objroot)test/integration/%$(EXE): $(objroot)test/integration/%.$(O) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
-	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(filter -lrt -lpthread,$(LIBS))) -lm $(EXTRA_LDFLAGS)
+	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LJEMALLOC) $(LDFLAGS) $(filter-out -lm,$(filter -lrt -lpthread,$(LIBS))) -lm $(EXTRA_LDFLAGS)
 
 $(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_STRESS_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)

From 7b24c6e5570062495243f1e55131b395adb31e33 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 25 Oct 2016 21:52:36 -0700
Subject: [PATCH 0419/2608] Use --whole-archive when linking integration tests
 on MinGW.

Prior to this change, the malloc_conf weak symbol provided by the
jemalloc dynamic library is always used, even if the application
provides a malloc_conf symbol.  Use the --whole-archive linker option
to allow the weak symbol to be overridden.
---
 Makefile.in | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/Makefile.in b/Makefile.in
index 9e063095..d509d551 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -119,6 +119,11 @@ DSOS := $(objroot)lib/$(LIBJEMALLOC).$(SOREV)
 ifneq ($(SOREV),$(SO))
 DSOS += $(objroot)lib/$(LIBJEMALLOC).$(SO)
 endif
+ifeq (pecoff, $(ABI))
+LJEMALLOC := -Wl,--whole-archive -L$(objroot)lib -l$(LIBJEMALLOC) -Wl,--no-whole-archive
+else
+LJEMALLOC := $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
+endif
 PC := $(objroot)jemalloc.pc
 MAN3 := $(objroot)doc/jemalloc$(install_suffix).3
 DOCS_XML := $(objroot)doc/jemalloc$(install_suffix).xml
@@ -130,7 +135,11 @@ C_TESTLIB_SRCS := $(srcroot)test/src/btalloc.c $(srcroot)test/src/btalloc_0.c \
 	$(srcroot)test/src/mtx.c $(srcroot)test/src/mq.c \
 	$(srcroot)test/src/SFMT.c $(srcroot)test/src/test.c \
 	$(srcroot)test/src/thd.c $(srcroot)test/src/timer.c
+ifeq (pecoff, $(ABI))
+C_UTIL_INTEGRATION_SRCS :=
+else
 C_UTIL_INTEGRATION_SRCS := $(srcroot)src/nstime.c $(srcroot)src/util.c
+endif
 TESTS_UNIT := \
 	$(srcroot)test/unit/a0.c \
 	$(srcroot)test/unit/arena_reset.c \
@@ -294,7 +303,7 @@ $(objroot)test/unit/%$(EXE): $(objroot)test/unit/%.$(O) $(TESTS_UNIT_LINK_OBJS)
 
 $(objroot)test/integration/%$(EXE): $(objroot)test/integration/%.$(O) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
-	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(filter -lrt -lpthread,$(LIBS))) -lm $(EXTRA_LDFLAGS)
+	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LJEMALLOC) $(LDFLAGS) $(filter-out -lm,$(filter -lrt -lpthread,$(LIBS))) -lm $(EXTRA_LDFLAGS)
 
 $(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_STRESS_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)

From 583c32c3056bff7606570e7836d33bf8fd18d299 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 27 Oct 2016 15:41:43 -0700
Subject: [PATCH 0420/2608] Do not force lazy lock on Windows.

This reverts 13473c7c66a81a4dc1cf11a97e9c8b1dbb785b64, which was
intended to work around bootstrapping issues when linking statically.
However, this actually causes problems in various other configurations,
so this reversion may force a future fix for the underlying problem, if
it still exists.
---
 configure.ac | 1 -
 1 file changed, 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 0ec710a9..9ece7860 100644
--- a/configure.ac
+++ b/configure.ac
@@ -426,7 +426,6 @@ case "${host}" in
   *-*-mingw* | *-*-cygwin*)
 	abi="pecoff"
 	force_tls="0"
-	force_lazy_lock="1"
 	maps_coalesce="0"
 	RPATH=""
 	so="dll"

From c44fa92db5cccf557d1ced431da6aa5ded58ed16 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 27 Oct 2016 17:10:56 -0700
Subject: [PATCH 0421/2608] Only use --whole-archive with gcc.

Conditionalize use of --whole-archive on the platform plus compiler,
rather than on the ABI.  This fixes a regression caused by
7b24c6e5570062495243f1e55131b395adb31e33 (Use --whole-archive when
linking integration tests on MinGW.).
---
 Makefile.in     | 5 +++--
 configure.ac    | 3 +++
 msvc/ReadMe.txt | 2 +-
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index d509d551..eb77d9f2 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -51,6 +51,7 @@ enable_code_coverage := @enable_code_coverage@
 enable_prof := @enable_prof@
 enable_zone_allocator := @enable_zone_allocator@
 MALLOC_CONF := @JEMALLOC_CPREFIX@MALLOC_CONF
+link_whole_archive := @link_whole_archive@
 DSO_LDFLAGS = @DSO_LDFLAGS@
 SOREV = @SOREV@
 PIC_CFLAGS = @PIC_CFLAGS@
@@ -119,7 +120,7 @@ DSOS := $(objroot)lib/$(LIBJEMALLOC).$(SOREV)
 ifneq ($(SOREV),$(SO))
 DSOS += $(objroot)lib/$(LIBJEMALLOC).$(SO)
 endif
-ifeq (pecoff, $(ABI))
+ifeq (1, $(link_whole_archive))
 LJEMALLOC := -Wl,--whole-archive -L$(objroot)lib -l$(LIBJEMALLOC) -Wl,--no-whole-archive
 else
 LJEMALLOC := $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
@@ -135,7 +136,7 @@ C_TESTLIB_SRCS := $(srcroot)test/src/btalloc.c $(srcroot)test/src/btalloc_0.c \
 	$(srcroot)test/src/mtx.c $(srcroot)test/src/mq.c \
 	$(srcroot)test/src/SFMT.c $(srcroot)test/src/test.c \
 	$(srcroot)test/src/thd.c $(srcroot)test/src/timer.c
-ifeq (pecoff, $(ABI))
+ifeq (1, $(link_whole_archive))
 C_UTIL_INTEGRATION_SRCS :=
 else
 C_UTIL_INTEGRATION_SRCS := $(srcroot)src/nstime.c $(srcroot)src/util.c
diff --git a/configure.ac b/configure.ac
index 9ece7860..f27c61f1 100644
--- a/configure.ac
+++ b/configure.ac
@@ -313,6 +313,7 @@ o="$ac_objext"
 a="a"
 exe="$ac_exeext"
 libprefix="lib"
+link_whole_archive="0"
 DSO_LDFLAGS='-shared -Wl,-soname,$(@F)'
 RPATH='-Wl,-rpath,$(1)'
 SOREV="${so}.${rev}"
@@ -442,6 +443,7 @@ case "${host}" in
         else
 	  importlib="${so}"
 	  DSO_LDFLAGS="-shared"
+	  link_whole_archive="1"
 	fi
 	a="lib"
 	libprefix=""
@@ -479,6 +481,7 @@ AC_SUBST([o])
 AC_SUBST([a])
 AC_SUBST([exe])
 AC_SUBST([libprefix])
+AC_SUBST([link_whole_archive])
 AC_SUBST([DSO_LDFLAGS])
 AC_SUBST([EXTRA_LDFLAGS])
 AC_SUBST([SOREV])
diff --git a/msvc/ReadMe.txt b/msvc/ReadMe.txt
index b1c2fc5c..77d567da 100644
--- a/msvc/ReadMe.txt
+++ b/msvc/ReadMe.txt
@@ -17,7 +17,7 @@ How to build jemalloc for Windows
    (note: x86/x64 doesn't matter at this point)
 
 5. Generate header files:
-   sh -c "./autogen.sh" CC=cl --enable-lazy-lock=no
+   sh -c "CC=cl ./autogen.sh"
 
 6. Now the project can be opened and built in Visual Studio:
    msvc\jemalloc_vc2015.sln

From d76cfec319760c71bf3d30b9960c9e666785c461 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 27 Oct 2016 21:23:48 -0700
Subject: [PATCH 0422/2608] Only link with libm (-lm) if necessary.

This fixes warnings when building with MSVC.
---
 Makefile.in  |  7 ++++---
 configure.ac | 15 ++++++++++++---
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index eb77d9f2..e00dbebc 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -62,6 +62,7 @@ MKLIB = @MKLIB@
 AR = @AR@
 ARFLAGS = @ARFLAGS@
 CC_MM = @CC_MM@
+LM := @LM@
 INSTALL = @INSTALL@
 
 ifeq (macho, $(ABI))
@@ -300,15 +301,15 @@ $(STATIC_LIBS):
 
 $(objroot)test/unit/%$(EXE): $(objroot)test/unit/%.$(O) $(TESTS_UNIT_LINK_OBJS) $(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS)
 	@mkdir -p $(@D)
-	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS)
+	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS)
 
 $(objroot)test/integration/%$(EXE): $(objroot)test/integration/%.$(O) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
-	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LJEMALLOC) $(LDFLAGS) $(filter-out -lm,$(filter -lrt -lpthread,$(LIBS))) -lm $(EXTRA_LDFLAGS)
+	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LJEMALLOC) $(LDFLAGS) $(filter-out -lm,$(filter -lrt -lpthread,$(LIBS))) $(LM) $(EXTRA_LDFLAGS)
 
 $(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_STRESS_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
-	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS)
+	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS)
 
 build_lib_shared: $(DSOS)
 build_lib_static: $(STATIC_LIBS)
diff --git a/configure.ac b/configure.ac
index f27c61f1..a9d75111 100644
--- a/configure.ac
+++ b/configure.ac
@@ -494,6 +494,15 @@ AC_SUBST([ARFLAGS])
 AC_SUBST([AROUT])
 AC_SUBST([CC_MM])
 
+dnl Determine whether libm must be linked to use e.g. log(3).
+AC_SEARCH_LIBS([log], [m], , [AC_MSG_ERROR([Missing math functions])])
+if test "x$ac_cv_search_log" != "xnone required" ; then
+  LM="$ac_cv_search_log"
+else
+  LM=
+fi
+AC_SUBST(LM)
+
 JE_COMPILABLE([__attribute__ syntax],
               [static __attribute__((unused)) void foo(void){}],
               [],
@@ -940,9 +949,9 @@ fi
 AC_MSG_CHECKING([configured backtracing method])
 AC_MSG_RESULT([$backtrace_method])
 if test "x$enable_prof" = "x1" ; then
-  if test "x$abi" != "xpecoff"; then
-    dnl Heap profiling uses the log(3) function.
-    LIBS="$LIBS -lm"
+  dnl Heap profiling uses the log(3) function.
+  if test "x$LM" != "x" ; then
+    LIBS="$LIBS $LM"
   fi
 
   AC_DEFINE([JEMALLOC_PROF], [ ])

From 48d4adfbeb32fcc7f455547d89641cd1fc459361 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 27 Oct 2016 21:26:33 -0700
Subject: [PATCH 0423/2608] Avoid negation of unsigned numbers.

Rather than relying on two's complement negation for alignment mask
generation, use bitwise not and addition.  This dodges warnings from
MSVC, and should be strength-reduced by compiler optimization anyway.
---
 include/jemalloc/internal/jemalloc_internal.h.in | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index fac0ea39..0e4ffd91 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -315,7 +315,7 @@ typedef unsigned szind_t;
 
 /* Return the nearest aligned address at or below a. */
 #define	ALIGNMENT_ADDR2BASE(a, alignment)				\
-	((void *)((uintptr_t)(a) & (-(alignment))))
+	((void *)((uintptr_t)(a) & ((~(alignment)) + 1)))
 
 /* Return the offset between a and the nearest aligned address at or below a. */
 #define	ALIGNMENT_ADDR2OFFSET(a, alignment)				\
@@ -323,7 +323,7 @@ typedef unsigned szind_t;
 
 /* Return the smallest alignment multiple that is >= s. */
 #define	ALIGNMENT_CEILING(s, alignment)					\
-	(((s) + (alignment - 1)) & (-(alignment)))
+	(((s) + (alignment - 1)) & ((~(alignment)) + 1))
 
 /* Declare a variable-length array. */
 #if __STDC_VERSION__ < 199901L

From 17aa187f6b3c5c320653167acf1e85d3d8645e62 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 27 Oct 2016 21:29:00 -0700
Subject: [PATCH 0424/2608] Add cast to silence (harmless) conversion warning.

---
 test/unit/tsd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/unit/tsd.c b/test/unit/tsd.c
index 4e2622a3..d5f96ac3 100644
--- a/test/unit/tsd.c
+++ b/test/unit/tsd.c
@@ -79,7 +79,7 @@ thd_start(void *arg)
 TEST_BEGIN(test_tsd_main_thread)
 {
 
-	thd_start((void *) 0xa5f3e329);
+	thd_start((void *)(uintptr_t)0xa5f3e329);
 }
 TEST_END
 

From 44df4a45cf587db8adee7edff4acfb96bfd3d670 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 27 Oct 2016 21:29:59 -0700
Subject: [PATCH 0425/2608] Explicitly cast negative constants meant for use as
 unsigned.

---
 test/unit/util.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/test/unit/util.c b/test/unit/util.c
index c958dc0f..b1f9abd9 100644
--- a/test/unit/util.c
+++ b/test/unit/util.c
@@ -75,6 +75,7 @@ TEST_BEGIN(test_malloc_strtoumax)
 	};
 #define	ERR(e)		e, #e
 #define	KUMAX(x)	((uintmax_t)x##ULL)
+#define	KSMAX(x)	((uintmax_t)(intmax_t)x##LL)
 	struct test_s tests[] = {
 		{"0",		"0",	-1,	ERR(EINVAL),	UINTMAX_MAX},
 		{"0",		"0",	1,	ERR(EINVAL),	UINTMAX_MAX},
@@ -87,13 +88,13 @@ TEST_BEGIN(test_malloc_strtoumax)
 
 		{"42",		"",	0,	ERR(0),		KUMAX(42)},
 		{"+42",		"",	0,	ERR(0),		KUMAX(42)},
-		{"-42",		"",	0,	ERR(0),		KUMAX(-42)},
+		{"-42",		"",	0,	ERR(0),		KSMAX(-42)},
 		{"042",		"",	0,	ERR(0),		KUMAX(042)},
 		{"+042",	"",	0,	ERR(0),		KUMAX(042)},
-		{"-042",	"",	0,	ERR(0),		KUMAX(-042)},
+		{"-042",	"",	0,	ERR(0),		KSMAX(-042)},
 		{"0x42",	"",	0,	ERR(0),		KUMAX(0x42)},
 		{"+0x42",	"",	0,	ERR(0),		KUMAX(0x42)},
-		{"-0x42",	"",	0,	ERR(0),		KUMAX(-0x42)},
+		{"-0x42",	"",	0,	ERR(0),		KSMAX(-0x42)},
 
 		{"0",		"",	0,	ERR(0),		KUMAX(0)},
 		{"1",		"",	0,	ERR(0),		KUMAX(1)},
@@ -130,6 +131,7 @@ TEST_BEGIN(test_malloc_strtoumax)
 	};
 #undef ERR
 #undef KUMAX
+#undef KSMAX
 	unsigned i;
 
 	for (i = 0; i < sizeof(tests)/sizeof(struct test_s); i++) {

From 977103c897225a4ab0380f09adc67b4c43143521 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 27 Oct 2016 21:31:25 -0700
Subject: [PATCH 0426/2608] Uniformly cast mallctl[bymib]() oldp/newp arguments
 to (void *).

This avoids warnings in some cases, and is otherwise generally good
hygiene.
---
 .../vc2015/test_threads/test_threads.cpp      |   6 +-
 src/stats.c                                   |  44 +++--
 src/tcache.c                                  |   4 +-
 src/util.c                                    |   2 +-
 test/integration/MALLOCX_ARENA.c              |   4 +-
 test/integration/allocated.c                  |  17 +-
 test/integration/extent.c                     |  27 +--
 test/integration/mallocx.c                    |   4 +-
 test/integration/overflow.c                   |   8 +-
 test/integration/rallocx.c                    |   4 +-
 test/integration/thread_arena.c               |  10 +-
 test/integration/thread_tcache_enabled.c      |  39 ++--
 test/integration/xallocx.c                    |   8 +-
 test/unit/arena_reset.c                       |   8 +-
 test/unit/decay.c                             |  48 ++---
 test/unit/extent_quantize.c                   |  18 +-
 test/unit/mallctl.c                           | 178 ++++++++++--------
 test/unit/prof_accum.c                        |   5 +-
 test/unit/prof_active.c                       |   5 +-
 test/unit/prof_gdump.c                        |  13 +-
 test/unit/prof_idump.c                        |   5 +-
 test/unit/prof_reset.c                        |  13 +-
 test/unit/prof_thread_name.c                  |  22 ++-
 test/unit/size_classes.c                      |   8 +-
 test/unit/stats.c                             | 175 +++++++++--------
 25 files changed, 358 insertions(+), 317 deletions(-)

diff --git a/msvc/projects/vc2015/test_threads/test_threads.cpp b/msvc/projects/vc2015/test_threads/test_threads.cpp
index c8cb7d66..a3d1a792 100644
--- a/msvc/projects/vc2015/test_threads/test_threads.cpp
+++ b/msvc/projects/vc2015/test_threads/test_threads.cpp
@@ -21,7 +21,7 @@ int test_threads()
   je_malloc_conf = "narenas:3";
   int narenas = 0;
   size_t sz = sizeof(narenas);
-  je_mallctl("opt.narenas", &narenas, &sz, NULL, 0);
+  je_mallctl("opt.narenas", (void *)&narenas, &sz, NULL, 0);
   if (narenas != 3) {
     printf("Error: unexpected number of arenas: %d\n", narenas);
     return 1;
@@ -33,7 +33,7 @@ int test_threads()
   je_malloc_stats_print(NULL, NULL, NULL);
   size_t allocated1;
   size_t sz1 = sizeof(allocated1);
-  je_mallctl("stats.active", &allocated1, &sz1, NULL, 0);
+  je_mallctl("stats.active", (void *)&allocated1, &sz1, NULL, 0);
   printf("\nPress Enter to start threads...\n");
   getchar();
   printf("Starting %d threads x %d x %d iterations...\n", numThreads, numIter1, numIter2);
@@ -78,7 +78,7 @@ int test_threads()
   }
   je_malloc_stats_print(NULL, NULL, NULL);
   size_t allocated2;
-  je_mallctl("stats.active", &allocated2, &sz1, NULL, 0);
+  je_mallctl("stats.active", (void *)&allocated2, &sz1, NULL, 0);
   size_t leaked = allocated2 - allocated1;
   printf("\nDone. Leaked: %zd bytes\n", leaked);
   bool failed = leaked > 65536; // in case C++ runtime allocated something (e.g. iostream locale or facet)
diff --git a/src/stats.c b/src/stats.c
index ca716d5e..689299fa 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -3,7 +3,7 @@
 
 #define	CTL_GET(n, v, t) do {						\
 	size_t sz = sizeof(t);						\
-	xmallctl(n, v, &sz, NULL, 0);					\
+	xmallctl(n, (void *)v, &sz, NULL, 0);				\
 } while (0)
 
 #define	CTL_M2_GET(n, i, v, t) do {					\
@@ -12,7 +12,7 @@
 	size_t sz = sizeof(t);						\
 	xmallctlnametomib(n, mib, &miblen);				\
 	mib[2] = (i);							\
-	xmallctlbymib(mib, miblen, v, &sz, NULL, 0);			\
+	xmallctlbymib(mib, miblen, (void *)v, &sz, NULL, 0);		\
 } while (0)
 
 #define	CTL_M2_M4_GET(n, i, j, v, t) do {				\
@@ -22,7 +22,7 @@
 	xmallctlnametomib(n, mib, &miblen);				\
 	mib[2] = (i);							\
 	mib[4] = (j);							\
-	xmallctlbymib(mib, miblen, v, &sz, NULL, 0);			\
+	xmallctlbymib(mib, miblen, (void *)v, &sz, NULL, 0);		\
 } while (0)
 
 /******************************************************************************/
@@ -368,45 +368,51 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    "config.malloc_conf: \"%s\"\n", config_malloc_conf);
 
 #define	OPT_WRITE_BOOL(n)						\
-		if (je_mallctl("opt."#n, &bv, &bsz, NULL, 0) == 0) {	\
+		if (je_mallctl("opt."#n, (void *)&bv, &bsz, NULL, 0) ==	\
+		    0) {						\
 			malloc_cprintf(write_cb, cbopaque,		\
 			    "  opt."#n": %s\n", bv ? "true" : "false");	\
 		}
 #define	OPT_WRITE_BOOL_MUTABLE(n, m) {					\
 		bool bv2;						\
-		if (je_mallctl("opt."#n, &bv, &bsz, NULL, 0) == 0 &&	\
-		    je_mallctl(#m, &bv2, &bsz, NULL, 0) == 0) {		\
+		if (je_mallctl("opt."#n, (void *)&bv, &bsz, NULL, 0) ==	\
+		    0 && je_mallctl(#m, &bv2, &bsz, NULL, 0) == 0) {	\
 			malloc_cprintf(write_cb, cbopaque,		\
 			    "  opt."#n": %s ("#m": %s)\n", bv ? "true"	\
 			    : "false", bv2 ? "true" : "false");		\
 		}							\
 }
 #define	OPT_WRITE_UNSIGNED(n)						\
-		if (je_mallctl("opt."#n, &uv, &usz, NULL, 0) == 0) {	\
+		if (je_mallctl("opt."#n, (void *)&uv, &usz, NULL, 0) ==	\
+		    0) {						\
 			malloc_cprintf(write_cb, cbopaque,		\
 			"  opt."#n": %u\n", uv);			\
 		}
 #define	OPT_WRITE_SIZE_T(n)						\
-		if (je_mallctl("opt."#n, &sv, &ssz, NULL, 0) == 0) {	\
+		if (je_mallctl("opt."#n, (void *)&sv, &ssz, NULL, 0) ==	\
+		    0) {						\
 			malloc_cprintf(write_cb, cbopaque,		\
 			"  opt."#n": %zu\n", sv);			\
 		}
 #define	OPT_WRITE_SSIZE_T(n)						\
-		if (je_mallctl("opt."#n, &ssv, &sssz, NULL, 0) == 0) {	\
+		if (je_mallctl("opt."#n, (void *)&ssv, &sssz, NULL, 0)	\
+		    == 0) {						\
 			malloc_cprintf(write_cb, cbopaque,		\
 			    "  opt."#n": %zd\n", ssv);			\
 		}
 #define	OPT_WRITE_SSIZE_T_MUTABLE(n, m) {				\
 		ssize_t ssv2;						\
-		if (je_mallctl("opt."#n, &ssv, &sssz, NULL, 0) == 0 &&	\
-		    je_mallctl(#m, &ssv2, &sssz, NULL, 0) == 0) {	\
+		if (je_mallctl("opt."#n, (void *)&ssv, &sssz, NULL, 0)	\
+		    == 0 && je_mallctl(#m, &ssv2, &sssz, NULL, 0) ==	\
+		    0) {						\
 			malloc_cprintf(write_cb, cbopaque,		\
 			    "  opt."#n": %zd ("#m": %zd)\n",		\
 			    ssv, ssv2);					\
 		}							\
 }
 #define	OPT_WRITE_CHAR_P(n)						\
-		if (je_mallctl("opt."#n, &cpv, &cpsz, NULL, 0) == 0) {	\
+		if (je_mallctl("opt."#n, (void *)&cpv, &cpsz, NULL, 0)	\
+		    == 0) {						\
 			malloc_cprintf(write_cb, cbopaque,		\
 			    "  opt."#n": \"%s\"\n", cpv);		\
 		}
@@ -462,11 +468,13 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		malloc_cprintf(write_cb, cbopaque,
 		    "Unused dirty page decay time: %zd%s\n", ssv, (ssv < 0) ?
 		    " (no decay)" : "");
-		if (je_mallctl("arenas.tcache_max", &sv, &ssz, NULL, 0) == 0) {
+		if (je_mallctl("arenas.tcache_max", (void *)&sv, &ssz, NULL, 0)
+		    == 0) {
 			malloc_cprintf(write_cb, cbopaque,
 			    "Maximum thread-cached size class: %zu\n", sv);
 		}
-		if (je_mallctl("opt.prof", &bv, &bsz, NULL, 0) == 0 && bv) {
+		if (je_mallctl("opt.prof", (void *)&bv, &bsz, NULL, 0) == 0 &&
+		    bv) {
 			CTL_GET("prof.lg_sample", &sv, size_t);
 			malloc_cprintf(write_cb, cbopaque,
 			    "Average profile sample interval: %"FMTu64
@@ -509,8 +517,8 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 				unsigned i, ninitialized;
 
 				isz = sizeof(bool) * narenas;
-				xmallctl("arenas.initialized", initialized,
-				    &isz, NULL, 0);
+				xmallctl("arenas.initialized",
+				    (void *)initialized, &isz, NULL, 0);
 				for (i = ninitialized = 0; i < narenas; i++) {
 					if (initialized[i])
 						ninitialized++;
@@ -538,8 +546,8 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 				unsigned i;
 
 				isz = sizeof(bool) * narenas;
-				xmallctl("arenas.initialized", initialized,
-				    &isz, NULL, 0);
+				xmallctl("arenas.initialized",
+				    (void *)initialized, &isz, NULL, 0);
 
 				for (i = 0; i < narenas; i++) {
 					if (initialized[i]) {
diff --git a/src/tcache.c b/src/tcache.c
index 98c18a04..7f5b291c 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -502,10 +502,10 @@ tcache_boot(tsdn_t *tsdn)
 	unsigned i;
 
 	/* If necessary, clamp opt_lg_tcache_max. */
-	if (opt_lg_tcache_max < 0 || (1U << opt_lg_tcache_max) < SMALL_MAXCLASS)
+	if (opt_lg_tcache_max < 0 || (ZU(1) << opt_lg_tcache_max) < SMALL_MAXCLASS)
 		tcache_maxclass = SMALL_MAXCLASS;
 	else
-		tcache_maxclass = (1U << opt_lg_tcache_max);
+		tcache_maxclass = (ZU(1) << opt_lg_tcache_max);
 
 	nhbins = size2index(tcache_maxclass) + 1;
 
diff --git a/src/util.c b/src/util.c
index a1c4a2a4..881a7fd1 100644
--- a/src/util.c
+++ b/src/util.c
@@ -200,7 +200,7 @@ malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base)
 		p++;
 	}
 	if (neg)
-		ret = -ret;
+		ret = (uintmax_t)(-((intmax_t)ret));
 
 	if (p == ns) {
 		/* No conversion performed. */
diff --git a/test/integration/MALLOCX_ARENA.c b/test/integration/MALLOCX_ARENA.c
index 30c203ae..910a096f 100644
--- a/test/integration/MALLOCX_ARENA.c
+++ b/test/integration/MALLOCX_ARENA.c
@@ -19,8 +19,8 @@ thd_start(void *arg)
 	size_t sz;
 
 	sz = sizeof(arena_ind);
-	assert_d_eq(mallctl("arenas.extend", &arena_ind, &sz, NULL, 0), 0,
-	    "Error in arenas.extend");
+	assert_d_eq(mallctl("arenas.extend", (void *)&arena_ind, &sz, NULL, 0),
+	    0, "Error in arenas.extend");
 
 	if (thread_ind % 4 != 3) {
 		size_t mib[3];
diff --git a/test/integration/allocated.c b/test/integration/allocated.c
index 3630e80c..6ce145b3 100644
--- a/test/integration/allocated.c
+++ b/test/integration/allocated.c
@@ -18,14 +18,14 @@ thd_start(void *arg)
 	size_t sz, usize;
 
 	sz = sizeof(a0);
-	if ((err = mallctl("thread.allocated", &a0, &sz, NULL, 0))) {
+	if ((err = mallctl("thread.allocated", (void *)&a0, &sz, NULL, 0))) {
 		if (err == ENOENT)
 			goto label_ENOENT;
 		test_fail("%s(): Error in mallctl(): %s", __func__,
 		    strerror(err));
 	}
 	sz = sizeof(ap0);
-	if ((err = mallctl("thread.allocatedp", &ap0, &sz, NULL, 0))) {
+	if ((err = mallctl("thread.allocatedp", (void *)&ap0, &sz, NULL, 0))) {
 		if (err == ENOENT)
 			goto label_ENOENT;
 		test_fail("%s(): Error in mallctl(): %s", __func__,
@@ -36,14 +36,15 @@ thd_start(void *arg)
 	    "storage");
 
 	sz = sizeof(d0);
-	if ((err = mallctl("thread.deallocated", &d0, &sz, NULL, 0))) {
+	if ((err = mallctl("thread.deallocated", (void *)&d0, &sz, NULL, 0))) {
 		if (err == ENOENT)
 			goto label_ENOENT;
 		test_fail("%s(): Error in mallctl(): %s", __func__,
 		    strerror(err));
 	}
 	sz = sizeof(dp0);
-	if ((err = mallctl("thread.deallocatedp", &dp0, &sz, NULL, 0))) {
+	if ((err = mallctl("thread.deallocatedp", (void *)&dp0, &sz, NULL,
+	    0))) {
 		if (err == ENOENT)
 			goto label_ENOENT;
 		test_fail("%s(): Error in mallctl(): %s", __func__,
@@ -57,9 +58,9 @@ thd_start(void *arg)
 	assert_ptr_not_null(p, "Unexpected malloc() error");
 
 	sz = sizeof(a1);
-	mallctl("thread.allocated", &a1, &sz, NULL, 0);
+	mallctl("thread.allocated", (void *)&a1, &sz, NULL, 0);
 	sz = sizeof(ap1);
-	mallctl("thread.allocatedp", &ap1, &sz, NULL, 0);
+	mallctl("thread.allocatedp", (void *)&ap1, &sz, NULL, 0);
 	assert_u64_eq(*ap1, a1,
 	    "Dereferenced \"thread.allocatedp\" value should equal "
 	    "\"thread.allocated\" value");
@@ -74,9 +75,9 @@ thd_start(void *arg)
 	free(p);
 
 	sz = sizeof(d1);
-	mallctl("thread.deallocated", &d1, &sz, NULL, 0);
+	mallctl("thread.deallocated", (void *)&d1, &sz, NULL, 0);
 	sz = sizeof(dp1);
-	mallctl("thread.deallocatedp", &dp1, &sz, NULL, 0);
+	mallctl("thread.deallocatedp", (void *)&dp1, &sz, NULL, 0);
 	assert_u64_eq(*dp1, d1,
 	    "Dereferenced \"thread.deallocatedp\" value should equal "
 	    "\"thread.deallocated\" value");
diff --git a/test/integration/extent.c b/test/integration/extent.c
index 8acdad82..2af20ce2 100644
--- a/test/integration/extent.c
+++ b/test/integration/extent.c
@@ -194,8 +194,8 @@ TEST_BEGIN(test_extent)
 	bool xallocx_success_a, xallocx_success_b, xallocx_success_c;
 
 	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.extend", &arena_ind, &sz, NULL, 0), 0,
-	    "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("arenas.extend", (void *)&arena_ind, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
 	flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
 
 	/* Install custom extent hooks. */
@@ -205,8 +205,9 @@ TEST_BEGIN(test_extent)
 	hooks_mib[1] = (size_t)arena_ind;
 	old_size = sizeof(extent_hooks_t *);
 	new_size = sizeof(extent_hooks_t *);
-	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, &old_hooks, &old_size,
-	    &new_hooks, new_size), 0, "Unexpected extent_hooks error");
+	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks,
+	    &old_size, (void *)&new_hooks, new_size), 0,
+	    "Unexpected extent_hooks error");
 	orig_hooks = old_hooks;
 	assert_ptr_ne(old_hooks->alloc, extent_alloc, "Unexpected alloc error");
 	assert_ptr_ne(old_hooks->dalloc, extent_dalloc,
@@ -221,12 +222,12 @@ TEST_BEGIN(test_extent)
 
 	/* Get large size classes. */
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("arenas.lextent.0.size", &large0, &sz, NULL, 0), 0,
-	    "Unexpected arenas.lextent.0.size failure");
-	assert_d_eq(mallctl("arenas.lextent.1.size", &large1, &sz, NULL, 0), 0,
-	    "Unexpected arenas.lextent.1.size failure");
-	assert_d_eq(mallctl("arenas.lextent.2.size", &large2, &sz, NULL, 0), 0,
-	    "Unexpected arenas.lextent.2.size failure");
+	assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL,
+	    0), 0, "Unexpected arenas.lextent.0.size failure");
+	assert_d_eq(mallctl("arenas.lextent.1.size", (void *)&large1, &sz, NULL,
+	    0), 0, "Unexpected arenas.lextent.1.size failure");
+	assert_d_eq(mallctl("arenas.lextent.2.size", (void *)&large2, &sz, NULL,
+	    0), 0, "Unexpected arenas.lextent.2.size failure");
 
 	/* Test dalloc/decommit/purge cascade. */
 	purge_miblen = sizeof(purge_mib)/sizeof(size_t);
@@ -287,9 +288,9 @@ TEST_BEGIN(test_extent)
 
 	/* Restore extent hooks. */
 	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, NULL, NULL,
-	    &old_hooks, new_size), 0, "Unexpected extent_hooks error");
-	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, &old_hooks, &old_size,
-	    NULL, 0), 0, "Unexpected extent_hooks error");
+	    (void *)&old_hooks, new_size), 0, "Unexpected extent_hooks error");
+	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks,
+	    &old_size, NULL, 0), 0, "Unexpected extent_hooks error");
 	assert_ptr_eq(old_hooks, orig_hooks, "Unexpected hooks error");
 	assert_ptr_eq(old_hooks->alloc, orig_hooks->alloc,
 	    "Unexpected alloc error");
diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index 9d623eb7..4fd290c0 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -11,7 +11,7 @@ get_nsizes_impl(const char *cmd)
 	size_t z;
 
 	z = sizeof(unsigned);
-	assert_d_eq(mallctl(cmd, &ret, &z, NULL, 0), 0,
+	assert_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
 	    "Unexpected mallctl(\"%s\", ...) failure", cmd);
 
 	return (ret);
@@ -37,7 +37,7 @@ get_size_impl(const char *cmd, size_t ind)
 	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
 	mib[2] = ind;
 	z = sizeof(size_t);
-	assert_d_eq(mallctlbymib(mib, miblen, &ret, &z, NULL, 0),
+	assert_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
 	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
 
 	return (ret);
diff --git a/test/integration/overflow.c b/test/integration/overflow.c
index 8dea1c95..3e1e15f9 100644
--- a/test/integration/overflow.c
+++ b/test/integration/overflow.c
@@ -8,8 +8,8 @@ TEST_BEGIN(test_overflow)
 	void *p;
 
 	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.nlextents", &nlextents, &sz, NULL, 0), 0,
-	    "Unexpected mallctl() error");
+	assert_d_eq(mallctl("arenas.nlextents", (void *)&nlextents, &sz, NULL,
+	    0), 0, "Unexpected mallctl() error");
 
 	miblen = sizeof(mib) / sizeof(size_t);
 	assert_d_eq(mallctlnametomib("arenas.lextent.0.size", mib, &miblen), 0,
@@ -17,8 +17,8 @@ TEST_BEGIN(test_overflow)
 	mib[2] = nlextents - 1;
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctlbymib(mib, miblen, &max_size_class, &sz, NULL, 0), 0,
-	    "Unexpected mallctlbymib() error");
+	assert_d_eq(mallctlbymib(mib, miblen, (void *)&max_size_class, &sz,
+	    NULL, 0), 0, "Unexpected mallctlbymib() error");
 
 	assert_ptr_null(malloc(max_size_class + 1),
 	    "Expected OOM due to over-sized allocation request");
diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c
index 030fb479..dd89e8cb 100644
--- a/test/integration/rallocx.c
+++ b/test/integration/rallocx.c
@@ -7,7 +7,7 @@ get_nsizes_impl(const char *cmd)
 	size_t z;
 
 	z = sizeof(unsigned);
-	assert_d_eq(mallctl(cmd, &ret, &z, NULL, 0), 0,
+	assert_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
 	    "Unexpected mallctl(\"%s\", ...) failure", cmd);
 
 	return (ret);
@@ -33,7 +33,7 @@ get_size_impl(const char *cmd, size_t ind)
 	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
 	mib[2] = ind;
 	z = sizeof(size_t);
-	assert_d_eq(mallctlbymib(mib, miblen, &ret, &z, NULL, 0),
+	assert_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
 	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
 
 	return (ret);
diff --git a/test/integration/thread_arena.c b/test/integration/thread_arena.c
index 67be5351..7a35a635 100644
--- a/test/integration/thread_arena.c
+++ b/test/integration/thread_arena.c
@@ -16,8 +16,8 @@ thd_start(void *arg)
 	free(p);
 
 	size = sizeof(arena_ind);
-	if ((err = mallctl("thread.arena", &arena_ind, &size, &main_arena_ind,
-	    sizeof(main_arena_ind)))) {
+	if ((err = mallctl("thread.arena", (void *)&arena_ind, &size,
+	    (void *)&main_arena_ind, sizeof(main_arena_ind)))) {
 		char buf[BUFERROR_BUF];
 
 		buferror(err, buf, sizeof(buf));
@@ -25,7 +25,8 @@ thd_start(void *arg)
 	}
 
 	size = sizeof(arena_ind);
-	if ((err = mallctl("thread.arena", &arena_ind, &size, NULL, 0))) {
+	if ((err = mallctl("thread.arena", (void *)&arena_ind, &size, NULL,
+	    0))) {
 		char buf[BUFERROR_BUF];
 
 		buferror(err, buf, sizeof(buf));
@@ -50,7 +51,8 @@ TEST_BEGIN(test_thread_arena)
 	assert_ptr_not_null(p, "Error in malloc()");
 
 	size = sizeof(arena_ind);
-	if ((err = mallctl("thread.arena", &arena_ind, &size, NULL, 0))) {
+	if ((err = mallctl("thread.arena", (void *)&arena_ind, &size, NULL,
+	    0))) {
 		char buf[BUFERROR_BUF];
 
 		buferror(err, buf, sizeof(buf));
diff --git a/test/integration/thread_tcache_enabled.c b/test/integration/thread_tcache_enabled.c
index f4e89c68..2c2825e1 100644
--- a/test/integration/thread_tcache_enabled.c
+++ b/test/integration/thread_tcache_enabled.c
@@ -16,7 +16,8 @@ thd_start(void *arg)
 	bool e0, e1;
 
 	sz = sizeof(bool);
-	if ((err = mallctl("thread.tcache.enabled", &e0, &sz, NULL, 0))) {
+	if ((err = mallctl("thread.tcache.enabled", (void *)&e0, &sz, NULL,
+	    0))) {
 		if (err == ENOENT) {
 			assert_false(config_tcache,
 			    "ENOENT should only be returned if tcache is "
@@ -27,53 +28,53 @@ thd_start(void *arg)
 
 	if (e0) {
 		e1 = false;
-		assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz),
-		    0, "Unexpected mallctl() error");
+		assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+		    (void *)&e1, sz), 0, "Unexpected mallctl() error");
 		assert_true(e0, "tcache should be enabled");
 	}
 
 	e1 = true;
-	assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0,
-	    "Unexpected mallctl() error");
+	assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
 	assert_false(e0, "tcache should be disabled");
 
 	e1 = true;
-	assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0,
-	    "Unexpected mallctl() error");
+	assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
 	assert_true(e0, "tcache should be enabled");
 
 	e1 = false;
-	assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0,
-	    "Unexpected mallctl() error");
+	assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
 	assert_true(e0, "tcache should be enabled");
 
 	e1 = false;
-	assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0,
-	    "Unexpected mallctl() error");
+	assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
 	assert_false(e0, "tcache should be disabled");
 
 	free(malloc(1));
 	e1 = true;
-	assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0,
-	    "Unexpected mallctl() error");
+	assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
 	assert_false(e0, "tcache should be disabled");
 
 	free(malloc(1));
 	e1 = true;
-	assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0,
-	    "Unexpected mallctl() error");
+	assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
 	assert_true(e0, "tcache should be enabled");
 
 	free(malloc(1));
 	e1 = false;
-	assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0,
-	    "Unexpected mallctl() error");
+	assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
 	assert_true(e0, "tcache should be enabled");
 
 	free(malloc(1));
 	e1 = false;
-	assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0,
-	    "Unexpected mallctl() error");
+	assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
 	assert_false(e0, "tcache should be disabled");
 
 	free(malloc(1));
diff --git a/test/integration/xallocx.c b/test/integration/xallocx.c
index 4dcf08da..f6083728 100644
--- a/test/integration/xallocx.c
+++ b/test/integration/xallocx.c
@@ -16,8 +16,8 @@ arena_ind(void)
 
 	if (ind == 0) {
 		size_t sz = sizeof(ind);
-		assert_d_eq(mallctl("arenas.extend", &ind, &sz, NULL, 0), 0,
-		    "Unexpected mallctl failure creating arena");
+		assert_d_eq(mallctl("arenas.extend", (void *)&ind, &sz, NULL,
+		    0), 0, "Unexpected mallctl failure creating arena");
 	}
 
 	return (ind);
@@ -78,7 +78,7 @@ get_nsizes_impl(const char *cmd)
 	size_t z;
 
 	z = sizeof(unsigned);
-	assert_d_eq(mallctl(cmd, &ret, &z, NULL, 0), 0,
+	assert_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
 	    "Unexpected mallctl(\"%s\", ...) failure", cmd);
 
 	return (ret);
@@ -111,7 +111,7 @@ get_size_impl(const char *cmd, size_t ind)
 	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
 	mib[2] = ind;
 	z = sizeof(size_t);
-	assert_d_eq(mallctlbymib(mib, miblen, &ret, &z, NULL, 0),
+	assert_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
 	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
 
 	return (ret);
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index 61caf3c5..6c944b2e 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -11,7 +11,7 @@ get_nsizes_impl(const char *cmd)
 	size_t z;
 
 	z = sizeof(unsigned);
-	assert_d_eq(mallctl(cmd, &ret, &z, NULL, 0), 0,
+	assert_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
 	    "Unexpected mallctl(\"%s\", ...) failure", cmd);
 
 	return (ret);
@@ -44,7 +44,7 @@ get_size_impl(const char *cmd, size_t ind)
 	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
 	mib[2] = ind;
 	z = sizeof(size_t);
-	assert_d_eq(mallctlbymib(mib, miblen, &ret, &z, NULL, 0),
+	assert_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
 	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
 
 	return (ret);
@@ -90,8 +90,8 @@ TEST_BEGIN(test_arena_reset)
 	tsdn_t *tsdn;
 
 	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.extend", &arena_ind, &sz, NULL, 0), 0,
-	    "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("arenas.extend", (void *)&arena_ind, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
 
 	flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
 
diff --git a/test/unit/decay.c b/test/unit/decay.c
index 058a58cb..7efecf0f 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -38,8 +38,8 @@ TEST_BEGIN(test_decay_ticks)
 	    "Unexpected failure getting decay ticker");
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("arenas.lextent.0.size", &large0, &sz, NULL, 0), 0,
-	    "Unexpected mallctl failure");
+	assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL,
+	    0), 0, "Unexpected mallctl failure");
 
 	/*
 	 * Test the standard APIs using a large size class, since we can't
@@ -170,8 +170,8 @@ TEST_BEGIN(test_decay_ticks)
 		tcache_sizes[1] = 1;
 
 		sz = sizeof(unsigned);
-		assert_d_eq(mallctl("tcache.create", &tcache_ind, &sz, NULL, 0),
-		    0, "Unexpected mallctl failure");
+		assert_d_eq(mallctl("tcache.create", (void *)&tcache_ind, &sz,
+		    NULL, 0), 0, "Unexpected mallctl failure");
 
 		for (i = 0; i < sizeof(tcache_sizes) / sizeof(size_t); i++) {
 			sz = tcache_sizes[i];
@@ -188,7 +188,7 @@ TEST_BEGIN(test_decay_ticks)
 			dallocx(p, MALLOCX_TCACHE(tcache_ind));
 			tick0 = ticker_read(decay_ticker);
 			assert_d_eq(mallctl("tcache.flush", NULL, NULL,
-			    &tcache_ind, sizeof(unsigned)), 0,
+			    (void *)&tcache_ind, sizeof(unsigned)), 0,
 			    "Unexpected mallctl failure");
 			tick1 = ticker_read(decay_ticker);
 			assert_u32_ne(tick1, tick0,
@@ -221,8 +221,8 @@ TEST_BEGIN(test_decay_ticker)
 		size_t tcache_max;
 
 		sz = sizeof(size_t);
-		assert_d_eq(mallctl("arenas.tcache_max", &tcache_max, &sz, NULL,
-		    0), 0, "Unexpected mallctl failure");
+		assert_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max,
+		    &sz, NULL, 0), 0, "Unexpected mallctl failure");
 		large = nallocx(tcache_max + 1, flags);
 	}  else {
 		sz = sizeof(size_t);
@@ -232,11 +232,11 @@ TEST_BEGIN(test_decay_ticker)
 
 	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl failure");
-	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(uint64_t)), 0,
-	    "Unexpected mallctl failure");
+	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
+	    sizeof(uint64_t)), 0, "Unexpected mallctl failure");
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge0, &sz, NULL, 0),
-	    config_stats ? 0 : ENOENT, "Unexpected mallctl result");
+	assert_d_eq(mallctl("stats.arenas.0.npurge", (void *)&npurge0, &sz,
+	    NULL, 0), config_stats ? 0 : ENOENT, "Unexpected mallctl result");
 
 	for (i = 0; i < NPS; i++) {
 		ps[i] = mallocx(large, flags);
@@ -276,11 +276,11 @@ TEST_BEGIN(test_decay_ticker)
 			assert_ptr_not_null(p, "Unexpected mallocx() failure");
 			dallocx(p, flags);
 		}
-		assert_d_eq(mallctl("epoch", NULL, NULL, &epoch,
+		assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
 		    sizeof(uint64_t)), 0, "Unexpected mallctl failure");
 		sz = sizeof(uint64_t);
-		assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge1, &sz,
-		    NULL, 0), config_stats ? 0 : ENOENT,
+		assert_d_eq(mallctl("stats.arenas.0.npurge", (void *)&npurge1,
+		    &sz, NULL, 0), config_stats ? 0 : ENOENT,
 		    "Unexpected mallctl result");
 
 		nstime_update(&time);
@@ -304,16 +304,16 @@ TEST_BEGIN(test_decay_nonmonotonic)
 	unsigned i, nupdates0;
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("arenas.lextent.0.size", &large0, &sz, NULL, 0), 0,
-	    "Unexpected mallctl failure");
+	assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL,
+	    0), 0, "Unexpected mallctl failure");
 
 	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl failure");
-	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(uint64_t)), 0,
-	    "Unexpected mallctl failure");
+	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
+	    sizeof(uint64_t)), 0, "Unexpected mallctl failure");
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge0, &sz, NULL, 0),
-	    config_stats ? 0 : ENOENT, "Unexpected mallctl result");
+	assert_d_eq(mallctl("stats.arenas.0.npurge", (void *)&npurge0, &sz,
+	    NULL, 0), config_stats ? 0 : ENOENT, "Unexpected mallctl result");
 
 	nupdates_mock = 0;
 	nstime_init(&time_mock, 0);
@@ -339,11 +339,11 @@ TEST_BEGIN(test_decay_nonmonotonic)
 		    "Expected nstime_update() to be called");
 	}
 
-	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(uint64_t)), 0,
-	    "Unexpected mallctl failure");
+	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
+	    sizeof(uint64_t)), 0, "Unexpected mallctl failure");
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge1, &sz, NULL, 0),
-	    config_stats ? 0 : ENOENT, "Unexpected mallctl result");
+	assert_d_eq(mallctl("stats.arenas.0.npurge", (void *)&npurge1, &sz,
+	    NULL, 0), config_stats ? 0 : ENOENT, "Unexpected mallctl result");
 
 	if (config_stats)
 		assert_u64_eq(npurge0, npurge1, "Unexpected purging occurred");
diff --git a/test/unit/extent_quantize.c b/test/unit/extent_quantize.c
index d8928da0..43fa3604 100644
--- a/test/unit/extent_quantize.c
+++ b/test/unit/extent_quantize.c
@@ -13,7 +13,7 @@ TEST_BEGIN(test_small_extent_size)
 	 */
 
 	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.nbins", &nbins, &sz, NULL, 0), 0,
+	assert_d_eq(mallctl("arenas.nbins", (void *)&nbins, &sz, NULL, 0), 0,
 	    "Unexpected mallctl failure");
 
 	assert_d_eq(mallctlnametomib("arenas.bin.0.slab_size", mib, &miblen), 0,
@@ -21,8 +21,8 @@ TEST_BEGIN(test_small_extent_size)
 	for (i = 0; i < nbins; i++) {
 		mib[2] = i;
 		sz = sizeof(size_t);
-		assert_d_eq(mallctlbymib(mib, miblen, &extent_size, &sz, NULL,
-		    0), 0, "Unexpected mallctlbymib failure");
+		assert_d_eq(mallctlbymib(mib, miblen, (void *)&extent_size, &sz,
+		    NULL, 0), 0, "Unexpected mallctlbymib failure");
 		assert_zu_eq(extent_size,
 		    extent_size_quantize_floor(extent_size),
 		    "Small extent quantization should be a no-op "
@@ -49,12 +49,12 @@ TEST_BEGIN(test_large_extent_size)
 	 */
 
 	sz = sizeof(bool);
-	assert_d_eq(mallctl("config.cache_oblivious", &cache_oblivious, &sz,
-	    NULL, 0), 0, "Unexpected mallctl failure");
+	assert_d_eq(mallctl("config.cache_oblivious", (void *)&cache_oblivious,
+	    &sz, NULL, 0), 0, "Unexpected mallctl failure");
 
 	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.nlextents", &nlextents, &sz, NULL, 0), 0,
-	    "Unexpected mallctl failure");
+	assert_d_eq(mallctl("arenas.nlextents", (void *)&nlextents, &sz, NULL,
+	    0), 0, "Unexpected mallctl failure");
 
 	assert_d_eq(mallctlnametomib("arenas.lextent.0.size", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib failure");
@@ -63,8 +63,8 @@ TEST_BEGIN(test_large_extent_size)
 
 		mib[2] = i;
 		sz = sizeof(size_t);
-		assert_d_eq(mallctlbymib(mib, miblen, &lextent_size, &sz, NULL,
-		    0), 0, "Unexpected mallctlbymib failure");
+		assert_d_eq(mallctlbymib(mib, miblen, (void *)&lextent_size,
+		    &sz, NULL, 0), 0, "Unexpected mallctlbymib failure");
 		extent_size = cache_oblivious ? lextent_size + PAGE :
 		    lextent_size;
 		floor = extent_size_quantize_floor(extent_size);
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 0e979a11..5073c7b1 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -12,16 +12,18 @@ TEST_BEGIN(test_mallctl_errors)
 	    EPERM, "mallctl() should return EPERM on attempt to write "
 	    "read-only value");
 
-	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)-1),
-	    EINVAL, "mallctl() should return EINVAL for input size mismatch");
-	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)+1),
-	    EINVAL, "mallctl() should return EINVAL for input size mismatch");
+	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
+	    sizeof(epoch)-1), EINVAL,
+	    "mallctl() should return EINVAL for input size mismatch");
+	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
+	    sizeof(epoch)+1), EINVAL,
+	    "mallctl() should return EINVAL for input size mismatch");
 
 	sz = sizeof(epoch)-1;
-	assert_d_eq(mallctl("epoch", &epoch, &sz, NULL, 0), EINVAL,
+	assert_d_eq(mallctl("epoch", (void *)&epoch, &sz, NULL, 0), EINVAL,
 	    "mallctl() should return EINVAL for output size mismatch");
 	sz = sizeof(epoch)+1;
-	assert_d_eq(mallctl("epoch", &epoch, &sz, NULL, 0), EINVAL,
+	assert_d_eq(mallctl("epoch", (void *)&epoch, &sz, NULL, 0), EINVAL,
 	    "mallctl() should return EINVAL for output size mismatch");
 }
 TEST_END
@@ -56,18 +58,20 @@ TEST_BEGIN(test_mallctlbymib_errors)
 	assert_d_eq(mallctlnametomib("epoch", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &epoch,
+	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&epoch,
 	    sizeof(epoch)-1), EINVAL,
 	    "mallctlbymib() should return EINVAL for input size mismatch");
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &epoch,
+	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&epoch,
 	    sizeof(epoch)+1), EINVAL,
 	    "mallctlbymib() should return EINVAL for input size mismatch");
 
 	sz = sizeof(epoch)-1;
-	assert_d_eq(mallctlbymib(mib, miblen, &epoch, &sz, NULL, 0), EINVAL,
+	assert_d_eq(mallctlbymib(mib, miblen, (void *)&epoch, &sz, NULL, 0),
+	    EINVAL,
 	    "mallctlbymib() should return EINVAL for output size mismatch");
 	sz = sizeof(epoch)+1;
-	assert_d_eq(mallctlbymib(mib, miblen, &epoch, &sz, NULL, 0), EINVAL,
+	assert_d_eq(mallctlbymib(mib, miblen, (void *)&epoch, &sz, NULL, 0),
+	    EINVAL,
 	    "mallctlbymib() should return EINVAL for output size mismatch");
 }
 TEST_END
@@ -83,18 +87,19 @@ TEST_BEGIN(test_mallctl_read_write)
 	assert_zu_eq(sz, sizeof(old_epoch), "Unexpected output size");
 
 	/* Read. */
-	assert_d_eq(mallctl("epoch", &old_epoch, &sz, NULL, 0), 0,
+	assert_d_eq(mallctl("epoch", (void *)&old_epoch, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 	assert_zu_eq(sz, sizeof(old_epoch), "Unexpected output size");
 
 	/* Write. */
-	assert_d_eq(mallctl("epoch", NULL, NULL, &new_epoch, sizeof(new_epoch)),
-	    0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&new_epoch,
+	    sizeof(new_epoch)), 0, "Unexpected mallctl() failure");
 	assert_zu_eq(sz, sizeof(old_epoch), "Unexpected output size");
 
 	/* Read+write. */
-	assert_d_eq(mallctl("epoch", &old_epoch, &sz, &new_epoch,
-	    sizeof(new_epoch)), 0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("epoch", (void *)&old_epoch, &sz,
+	    (void *)&new_epoch, sizeof(new_epoch)), 0,
+	    "Unexpected mallctl() failure");
 	assert_zu_eq(sz, sizeof(old_epoch), "Unexpected output size");
 }
 TEST_END
@@ -120,8 +125,8 @@ TEST_BEGIN(test_mallctl_config)
 #define	TEST_MALLCTL_CONFIG(config, t) do {				\
 	t oldval;							\
 	size_t sz = sizeof(oldval);					\
-	assert_d_eq(mallctl("config."#config, &oldval, &sz, NULL, 0),	\
-	    0, "Unexpected mallctl() failure");				\
+	assert_d_eq(mallctl("config."#config, (void *)&oldval, &sz,	\
+	    NULL, 0), 0, "Unexpected mallctl() failure");		\
 	assert_b_eq(oldval, config_##config, "Incorrect config value");	\
 	assert_zu_eq(sz, sizeof(oldval), "Unexpected output size");	\
 } while (0)
@@ -153,7 +158,8 @@ TEST_BEGIN(test_mallctl_opt)
 	t oldval;							\
 	size_t sz = sizeof(oldval);					\
 	int expected = config_##config ? 0 : ENOENT;			\
-	int result = mallctl("opt."#opt, &oldval, &sz, NULL, 0);	\
+	int result = mallctl("opt."#opt, (void *)&oldval, &sz, NULL,	\
+	    0);								\
 	assert_d_eq(result, expected,					\
 	    "Unexpected mallctl() result for opt."#opt);		\
 	assert_zu_eq(sz, sizeof(oldval), "Unexpected output size");	\
@@ -191,7 +197,7 @@ TEST_BEGIN(test_manpage_example)
 	size_t len, miblen;
 
 	len = sizeof(nbins);
-	assert_d_eq(mallctl("arenas.nbins", &nbins, &len, NULL, 0), 0,
+	assert_d_eq(mallctl("arenas.nbins", (void *)&nbins, &len, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
 	miblen = 4;
@@ -202,8 +208,8 @@ TEST_BEGIN(test_manpage_example)
 
 		mib[2] = i;
 		len = sizeof(bin_size);
-		assert_d_eq(mallctlbymib(mib, miblen, &bin_size, &len, NULL, 0),
-		    0, "Unexpected mallctlbymib() failure");
+		assert_d_eq(mallctlbymib(mib, miblen, (void *)&bin_size, &len,
+		    NULL, 0), 0, "Unexpected mallctlbymib() failure");
 		/* Do something with bin_size... */
 	}
 }
@@ -252,25 +258,25 @@ TEST_BEGIN(test_tcache)
 	/* Create tcaches. */
 	for (i = 0; i < NTCACHES; i++) {
 		sz = sizeof(unsigned);
-		assert_d_eq(mallctl("tcache.create", &tis[i], &sz, NULL, 0), 0,
-		    "Unexpected mallctl() failure, i=%u", i);
+		assert_d_eq(mallctl("tcache.create", (void *)&tis[i], &sz, NULL,
+		    0), 0, "Unexpected mallctl() failure, i=%u", i);
 	}
 
 	/* Exercise tcache ID recycling. */
 	for (i = 0; i < NTCACHES; i++) {
-		assert_d_eq(mallctl("tcache.destroy", NULL, NULL, &tis[i],
-		    sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u",
-		    i);
+		assert_d_eq(mallctl("tcache.destroy", NULL, NULL,
+		    (void *)&tis[i], sizeof(unsigned)), 0,
+		    "Unexpected mallctl() failure, i=%u", i);
 	}
 	for (i = 0; i < NTCACHES; i++) {
 		sz = sizeof(unsigned);
-		assert_d_eq(mallctl("tcache.create", &tis[i], &sz, NULL, 0), 0,
-		    "Unexpected mallctl() failure, i=%u", i);
+		assert_d_eq(mallctl("tcache.create", (void *)&tis[i], &sz, NULL,
+		    0), 0, "Unexpected mallctl() failure, i=%u", i);
 	}
 
 	/* Flush empty tcaches. */
 	for (i = 0; i < NTCACHES; i++) {
-		assert_d_eq(mallctl("tcache.flush", NULL, NULL, &tis[i],
+		assert_d_eq(mallctl("tcache.flush", NULL, NULL, (void *)&tis[i],
 		    sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u",
 		    i);
 	}
@@ -315,16 +321,16 @@ TEST_BEGIN(test_tcache)
 
 	/* Flush some non-empty tcaches. */
 	for (i = 0; i < NTCACHES/2; i++) {
-		assert_d_eq(mallctl("tcache.flush", NULL, NULL, &tis[i],
+		assert_d_eq(mallctl("tcache.flush", NULL, NULL, (void *)&tis[i],
 		    sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u",
 		    i);
 	}
 
 	/* Destroy tcaches. */
 	for (i = 0; i < NTCACHES; i++) {
-		assert_d_eq(mallctl("tcache.destroy", NULL, NULL, &tis[i],
-		    sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u",
-		    i);
+		assert_d_eq(mallctl("tcache.destroy", NULL, NULL,
+		    (void *)&tis[i], sizeof(unsigned)), 0,
+		    "Unexpected mallctl() failure, i=%u", i);
 	}
 }
 TEST_END
@@ -334,15 +340,17 @@ TEST_BEGIN(test_thread_arena)
 	unsigned arena_old, arena_new, narenas;
 	size_t sz = sizeof(unsigned);
 
-	assert_d_eq(mallctl("arenas.narenas", &narenas, &sz, NULL, 0), 0,
-	    "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
 	assert_u_eq(narenas, opt_narenas, "Number of arenas incorrect");
 	arena_new = narenas - 1;
-	assert_d_eq(mallctl("thread.arena", &arena_old, &sz, &arena_new,
-	    sizeof(unsigned)), 0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("thread.arena", (void *)&arena_old, &sz,
+	    (void *)&arena_new, sizeof(unsigned)), 0,
+	    "Unexpected mallctl() failure");
 	arena_new = 0;
-	assert_d_eq(mallctl("thread.arena", &arena_old, &sz, &arena_new,
-	    sizeof(unsigned)), 0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("thread.arena", (void *)&arena_old, &sz,
+	    (void *)&arena_new, sizeof(unsigned)), 0,
+	    "Unexpected mallctl() failure");
 }
 TEST_END
 
@@ -351,25 +359,25 @@ TEST_BEGIN(test_arena_i_decay_time)
 	ssize_t decay_time, orig_decay_time, prev_decay_time;
 	size_t sz = sizeof(ssize_t);
 
-	assert_d_eq(mallctl("arena.0.decay_time", &orig_decay_time, &sz,
+	assert_d_eq(mallctl("arena.0.decay_time", (void *)&orig_decay_time, &sz,
 	    NULL, 0), 0, "Unexpected mallctl() failure");
 
 	decay_time = -2;
 	assert_d_eq(mallctl("arena.0.decay_time", NULL, NULL,
-	    &decay_time, sizeof(ssize_t)), EFAULT,
+	    (void *)&decay_time, sizeof(ssize_t)), EFAULT,
 	    "Unexpected mallctl() success");
 
 	decay_time = 0x7fffffff;
 	assert_d_eq(mallctl("arena.0.decay_time", NULL, NULL,
-	    &decay_time, sizeof(ssize_t)), 0,
+	    (void *)&decay_time, sizeof(ssize_t)), 0,
 	    "Unexpected mallctl() failure");
 
 	for (prev_decay_time = decay_time, decay_time = -1;
 	    decay_time < 20; prev_decay_time = decay_time, decay_time++) {
 		ssize_t old_decay_time;
 
-		assert_d_eq(mallctl("arena.0.decay_time", &old_decay_time,
-		    &sz, &decay_time, sizeof(ssize_t)), 0,
+		assert_d_eq(mallctl("arena.0.decay_time", (void *)&old_decay_time,
+		    &sz, (void *)&decay_time, sizeof(ssize_t)), 0,
 		    "Unexpected mallctl() failure");
 		assert_zd_eq(old_decay_time, prev_decay_time,
 		    "Unexpected old arena.0.decay_time");
@@ -387,8 +395,8 @@ TEST_BEGIN(test_arena_i_purge)
 	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
-	assert_d_eq(mallctl("arenas.narenas", &narenas, &sz, NULL, 0), 0,
-	    "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
 	assert_d_eq(mallctlnametomib("arena.0.purge", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = narenas;
@@ -407,8 +415,8 @@ TEST_BEGIN(test_arena_i_decay)
 	assert_d_eq(mallctl("arena.0.decay", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
-	assert_d_eq(mallctl("arenas.narenas", &narenas, &sz, NULL, 0), 0,
-	    "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
 	assert_d_eq(mallctlnametomib("arena.0.decay", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = narenas;
@@ -429,31 +437,35 @@ TEST_BEGIN(test_arena_i_dss)
 	    "Unexpected mallctlnametomib() error");
 
 	dss_prec_new = "disabled";
-	assert_d_eq(mallctlbymib(mib, miblen, &dss_prec_old, &sz, &dss_prec_new,
-	    sizeof(dss_prec_new)), 0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz,
+	    (void *)&dss_prec_new, sizeof(dss_prec_new)), 0,
+	    "Unexpected mallctl() failure");
 	assert_str_ne(dss_prec_old, "primary",
 	    "Unexpected default for dss precedence");
 
-	assert_d_eq(mallctlbymib(mib, miblen, &dss_prec_new, &sz, &dss_prec_old,
-	    sizeof(dss_prec_old)), 0, "Unexpected mallctl() failure");
-
-	assert_d_eq(mallctlbymib(mib, miblen, &dss_prec_old, &sz, NULL, 0), 0,
+	assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_new, &sz,
+	    (void *)&dss_prec_old, sizeof(dss_prec_old)), 0,
 	    "Unexpected mallctl() failure");
+
+	assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz, NULL,
+	    0), 0, "Unexpected mallctl() failure");
 	assert_str_ne(dss_prec_old, "primary",
 	    "Unexpected value for dss precedence");
 
 	mib[1] = narenas_total_get();
 	dss_prec_new = "disabled";
-	assert_d_eq(mallctlbymib(mib, miblen, &dss_prec_old, &sz, &dss_prec_new,
-	    sizeof(dss_prec_new)), 0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz,
+	    (void *)&dss_prec_new, sizeof(dss_prec_new)), 0,
+	    "Unexpected mallctl() failure");
 	assert_str_ne(dss_prec_old, "primary",
 	    "Unexpected default for dss precedence");
 
-	assert_d_eq(mallctlbymib(mib, miblen, &dss_prec_new, &sz, &dss_prec_old,
-	    sizeof(dss_prec_new)), 0, "Unexpected mallctl() failure");
-
-	assert_d_eq(mallctlbymib(mib, miblen, &dss_prec_old, &sz, NULL, 0), 0,
+	assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_new, &sz,
+	    (void *)&dss_prec_old, sizeof(dss_prec_new)), 0,
 	    "Unexpected mallctl() failure");
+
+	assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz, NULL,
+	    0), 0, "Unexpected mallctl() failure");
 	assert_str_ne(dss_prec_old, "primary",
 	    "Unexpected value for dss precedence");
 }
@@ -464,14 +476,14 @@ TEST_BEGIN(test_arenas_initialized)
 	unsigned narenas;
 	size_t sz = sizeof(narenas);
 
-	assert_d_eq(mallctl("arenas.narenas", &narenas, &sz, NULL, 0), 0,
-	    "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
 	{
 		VARIABLE_ARRAY(bool, initialized, narenas);
 
 		sz = narenas * sizeof(bool);
-		assert_d_eq(mallctl("arenas.initialized", initialized, &sz,
-		    NULL, 0), 0, "Unexpected mallctl() failure");
+		assert_d_eq(mallctl("arenas.initialized", (void *)initialized,
+		    &sz, NULL, 0), 0, "Unexpected mallctl() failure");
 	}
 }
 TEST_END
@@ -481,26 +493,26 @@ TEST_BEGIN(test_arenas_decay_time)
 	ssize_t decay_time, orig_decay_time, prev_decay_time;
 	size_t sz = sizeof(ssize_t);
 
-	assert_d_eq(mallctl("arenas.decay_time", &orig_decay_time, &sz,
+	assert_d_eq(mallctl("arenas.decay_time", (void *)&orig_decay_time, &sz,
 	    NULL, 0), 0, "Unexpected mallctl() failure");
 
 	decay_time = -2;
 	assert_d_eq(mallctl("arenas.decay_time", NULL, NULL,
-	    &decay_time, sizeof(ssize_t)), EFAULT,
+	    (void *)&decay_time, sizeof(ssize_t)), EFAULT,
 	    "Unexpected mallctl() success");
 
 	decay_time = 0x7fffffff;
 	assert_d_eq(mallctl("arenas.decay_time", NULL, NULL,
-	    &decay_time, sizeof(ssize_t)), 0,
+	    (void *)&decay_time, sizeof(ssize_t)), 0,
 	    "Expected mallctl() failure");
 
 	for (prev_decay_time = decay_time, decay_time = -1;
 	    decay_time < 20; prev_decay_time = decay_time, decay_time++) {
 		ssize_t old_decay_time;
 
-		assert_d_eq(mallctl("arenas.decay_time", &old_decay_time,
-		    &sz, &decay_time, sizeof(ssize_t)), 0,
-		    "Unexpected mallctl() failure");
+		assert_d_eq(mallctl("arenas.decay_time",
+		    (void *)&old_decay_time, &sz, (void *)&decay_time,
+		    sizeof(ssize_t)), 0, "Unexpected mallctl() failure");
 		assert_zd_eq(old_decay_time, prev_decay_time,
 		    "Unexpected old arenas.decay_time");
 	}
@@ -513,8 +525,8 @@ TEST_BEGIN(test_arenas_constants)
 #define	TEST_ARENAS_CONSTANT(t, name, expected) do {			\
 	t name;								\
 	size_t sz = sizeof(t);						\
-	assert_d_eq(mallctl("arenas."#name, &name, &sz, NULL, 0), 0,	\
-	    "Unexpected mallctl() failure");				\
+	assert_d_eq(mallctl("arenas."#name, (void *)&name, &sz, NULL,	\
+	    0), 0, "Unexpected mallctl() failure");			\
 	assert_zu_eq(name, expected, "Incorrect "#name" size");		\
 } while (0)
 
@@ -533,8 +545,8 @@ TEST_BEGIN(test_arenas_bin_constants)
 #define	TEST_ARENAS_BIN_CONSTANT(t, name, expected) do {		\
 	t name;								\
 	size_t sz = sizeof(t);						\
-	assert_d_eq(mallctl("arenas.bin.0."#name, &name, &sz, NULL, 0),	\
-	    0, "Unexpected mallctl() failure");				\
+	assert_d_eq(mallctl("arenas.bin.0."#name, (void *)&name, &sz,	\
+	    NULL, 0), 0, "Unexpected mallctl() failure");		\
 	assert_zu_eq(name, expected, "Incorrect "#name" size");		\
 } while (0)
 
@@ -553,8 +565,8 @@ TEST_BEGIN(test_arenas_lextent_constants)
 #define	TEST_ARENAS_LEXTENT_CONSTANT(t, name, expected) do {		\
 	t name;								\
 	size_t sz = sizeof(t);						\
-	assert_d_eq(mallctl("arenas.lextent.0."#name, &name, &sz, NULL,	\
-	    0), 0, "Unexpected mallctl() failure");			\
+	assert_d_eq(mallctl("arenas.lextent.0."#name, (void *)&name,	\
+	    &sz, NULL, 0), 0, "Unexpected mallctl() failure");		\
 	assert_zu_eq(name, expected, "Incorrect "#name" size");		\
 } while (0)
 
@@ -569,12 +581,12 @@ TEST_BEGIN(test_arenas_extend)
 	unsigned narenas_before, arena, narenas_after;
 	size_t sz = sizeof(unsigned);
 
-	assert_d_eq(mallctl("arenas.narenas", &narenas_before, &sz, NULL, 0), 0,
-	    "Unexpected mallctl() failure");
-	assert_d_eq(mallctl("arenas.extend", &arena, &sz, NULL, 0), 0,
-	    "Unexpected mallctl() failure");
-	assert_d_eq(mallctl("arenas.narenas", &narenas_after, &sz, NULL, 0), 0,
+	assert_d_eq(mallctl("arenas.narenas", (void *)&narenas_before, &sz,
+	    NULL, 0), 0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("arenas.extend", (void *)&arena, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("arenas.narenas", (void *)&narenas_after, &sz, NULL,
+	    0), 0, "Unexpected mallctl() failure");
 
 	assert_u_eq(narenas_before+1, narenas_after,
 	    "Unexpected number of arenas before versus after extension");
@@ -588,8 +600,8 @@ TEST_BEGIN(test_stats_arenas)
 #define	TEST_STATS_ARENAS(t, name) do {					\
 	t name;								\
 	size_t sz = sizeof(t);						\
-	assert_d_eq(mallctl("stats.arenas.0."#name, &name, &sz, NULL,	\
-	    0), 0, "Unexpected mallctl() failure");			\
+	assert_d_eq(mallctl("stats.arenas.0."#name, (void *)&name, &sz,	\
+	    NULL, 0), 0, "Unexpected mallctl() failure");		\
 } while (0)
 
 	TEST_STATS_ARENAS(unsigned, nthreads);
diff --git a/test/unit/prof_accum.c b/test/unit/prof_accum.c
index fd229e0f..d941b5bc 100644
--- a/test/unit/prof_accum.c
+++ b/test/unit/prof_accum.c
@@ -68,8 +68,9 @@ TEST_BEGIN(test_idump)
 	test_skip_if(!config_prof);
 
 	active = true;
-	assert_d_eq(mallctl("prof.active", NULL, NULL, &active, sizeof(active)),
-	    0, "Unexpected mallctl failure while activating profiling");
+	assert_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
+	    sizeof(active)), 0,
+	    "Unexpected mallctl failure while activating profiling");
 
 	prof_dump_open = prof_dump_open_intercept;
 
diff --git a/test/unit/prof_active.c b/test/unit/prof_active.c
index 81490957..d00943a4 100644
--- a/test/unit/prof_active.c
+++ b/test/unit/prof_active.c
@@ -12,7 +12,7 @@ mallctl_bool_get(const char *name, bool expected, const char *func, int line)
 	size_t sz;
 
 	sz = sizeof(old);
-	assert_d_eq(mallctl(name, &old, &sz, NULL, 0), 0,
+	assert_d_eq(mallctl(name, (void *)&old, &sz, NULL, 0), 0,
 	    "%s():%d: Unexpected mallctl failure reading %s", func, line, name);
 	assert_b_eq(old, expected, "%s():%d: Unexpected %s value", func, line,
 	    name);
@@ -26,7 +26,8 @@ mallctl_bool_set(const char *name, bool old_expected, bool val_new,
 	size_t sz;
 
 	sz = sizeof(old);
-	assert_d_eq(mallctl(name, &old, &sz, &val_new, sizeof(val_new)), 0,
+	assert_d_eq(mallctl(name, (void *)&old, &sz, (void *)&val_new,
+	    sizeof(val_new)), 0,
 	    "%s():%d: Unexpected mallctl failure reading/writing %s", func,
 	    line, name);
 	assert_b_eq(old, old_expected, "%s():%d: Unexpected %s value", func,
diff --git a/test/unit/prof_gdump.c b/test/unit/prof_gdump.c
index ca93f300..cb99acdf 100644
--- a/test/unit/prof_gdump.c
+++ b/test/unit/prof_gdump.c
@@ -28,8 +28,9 @@ TEST_BEGIN(test_gdump)
 	test_skip_if(!config_prof);
 
 	active = true;
-	assert_d_eq(mallctl("prof.active", NULL, NULL, &active, sizeof(active)),
-	    0, "Unexpected mallctl failure while activating profiling");
+	assert_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
+	    sizeof(active)), 0,
+	    "Unexpected mallctl failure while activating profiling");
 
 	prof_dump_open = prof_dump_open_intercept;
 
@@ -45,8 +46,8 @@ TEST_BEGIN(test_gdump)
 
 	gdump = false;
 	sz = sizeof(gdump_old);
-	assert_d_eq(mallctl("prof.gdump", &gdump_old, &sz, &gdump,
-	    sizeof(gdump)), 0,
+	assert_d_eq(mallctl("prof.gdump", (void *)&gdump_old, &sz,
+	    (void *)&gdump, sizeof(gdump)), 0,
 	    "Unexpected mallctl failure while disabling prof.gdump");
 	assert(gdump_old);
 	did_prof_dump_open = false;
@@ -56,8 +57,8 @@ TEST_BEGIN(test_gdump)
 
 	gdump = true;
 	sz = sizeof(gdump_old);
-	assert_d_eq(mallctl("prof.gdump", &gdump_old, &sz, &gdump,
-	    sizeof(gdump)), 0,
+	assert_d_eq(mallctl("prof.gdump", (void *)&gdump_old, &sz,
+	    (void *)&gdump, sizeof(gdump)), 0,
 	    "Unexpected mallctl failure while enabling prof.gdump");
 	assert(!gdump_old);
 	did_prof_dump_open = false;
diff --git a/test/unit/prof_idump.c b/test/unit/prof_idump.c
index 2b0639d8..c293350f 100644
--- a/test/unit/prof_idump.c
+++ b/test/unit/prof_idump.c
@@ -36,8 +36,9 @@ TEST_BEGIN(test_idump)
 	test_skip_if(!config_prof);
 
 	active = true;
-	assert_d_eq(mallctl("prof.active", NULL, NULL, &active, sizeof(active)),
-	    0, "Unexpected mallctl failure while activating profiling");
+	assert_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
+	    sizeof(active)), 0,
+	    "Unexpected mallctl failure while activating profiling");
 
 	prof_dump_open = prof_dump_open_intercept;
 
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
index 5ae45fd2..59d70796 100644
--- a/test/unit/prof_reset.c
+++ b/test/unit/prof_reset.c
@@ -20,8 +20,8 @@ static void
 set_prof_active(bool active)
 {
 
-	assert_d_eq(mallctl("prof.active", NULL, NULL, &active, sizeof(active)),
-	    0, "Unexpected mallctl failure");
+	assert_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
+	    sizeof(active)), 0, "Unexpected mallctl failure");
 }
 
 static size_t
@@ -30,7 +30,8 @@ get_lg_prof_sample(void)
 	size_t lg_prof_sample;
 	size_t sz = sizeof(size_t);
 
-	assert_d_eq(mallctl("prof.lg_sample", &lg_prof_sample, &sz, NULL, 0), 0,
+	assert_d_eq(mallctl("prof.lg_sample", (void *)&lg_prof_sample, &sz,
+	    NULL, 0), 0,
 	    "Unexpected mallctl failure while reading profiling sample rate");
 	return (lg_prof_sample);
 }
@@ -39,7 +40,7 @@ static void
 do_prof_reset(size_t lg_prof_sample)
 {
 	assert_d_eq(mallctl("prof.reset", NULL, NULL,
-	    &lg_prof_sample, sizeof(size_t)), 0,
+	    (void *)&lg_prof_sample, sizeof(size_t)), 0,
 	    "Unexpected mallctl failure while resetting profile data");
 	assert_zu_eq(lg_prof_sample, get_lg_prof_sample(),
 	    "Expected profile sample rate change");
@@ -54,8 +55,8 @@ TEST_BEGIN(test_prof_reset_basic)
 	test_skip_if(!config_prof);
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("opt.lg_prof_sample", &lg_prof_sample_orig, &sz,
-	    NULL, 0), 0,
+	assert_d_eq(mallctl("opt.lg_prof_sample", (void *)&lg_prof_sample_orig,
+	    &sz, NULL, 0), 0,
 	    "Unexpected mallctl failure while reading profiling sample rate");
 	assert_zu_eq(lg_prof_sample_orig, 0,
 	    "Unexpected profiling sample rate");
diff --git a/test/unit/prof_thread_name.c b/test/unit/prof_thread_name.c
index f501158d..9ec54977 100644
--- a/test/unit/prof_thread_name.c
+++ b/test/unit/prof_thread_name.c
@@ -12,8 +12,9 @@ mallctl_thread_name_get_impl(const char *thread_name_expected, const char *func,
 	size_t sz;
 
 	sz = sizeof(thread_name_old);
-	assert_d_eq(mallctl("thread.prof.name", &thread_name_old, &sz, NULL, 0),
-	    0, "%s():%d: Unexpected mallctl failure reading thread.prof.name",
+	assert_d_eq(mallctl("thread.prof.name", (void *)&thread_name_old, &sz,
+	    NULL, 0), 0,
+	    "%s():%d: Unexpected mallctl failure reading thread.prof.name",
 	    func, line);
 	assert_str_eq(thread_name_old, thread_name_expected,
 	    "%s():%d: Unexpected thread.prof.name value", func, line);
@@ -26,8 +27,8 @@ mallctl_thread_name_set_impl(const char *thread_name, const char *func,
     int line)
 {
 
-	assert_d_eq(mallctl("thread.prof.name", NULL, NULL, &thread_name,
-	    sizeof(thread_name)), 0,
+	assert_d_eq(mallctl("thread.prof.name", NULL, NULL,
+	    (void *)&thread_name, sizeof(thread_name)), 0,
 	    "%s():%d: Unexpected mallctl failure reading thread.prof.name",
 	    func, line);
 	mallctl_thread_name_get_impl(thread_name, func, line);
@@ -46,15 +47,15 @@ TEST_BEGIN(test_prof_thread_name_validation)
 
 	/* NULL input shouldn't be allowed. */
 	thread_name = NULL;
-	assert_d_eq(mallctl("thread.prof.name", NULL, NULL, &thread_name,
-	    sizeof(thread_name)), EFAULT,
+	assert_d_eq(mallctl("thread.prof.name", NULL, NULL,
+	    (void *)&thread_name, sizeof(thread_name)), EFAULT,
 	    "Unexpected mallctl result writing \"%s\" to thread.prof.name",
 	    thread_name);
 
 	/* '\n' shouldn't be allowed. */
 	thread_name = "hi\nthere";
-	assert_d_eq(mallctl("thread.prof.name", NULL, NULL, &thread_name,
-	    sizeof(thread_name)), EFAULT,
+	assert_d_eq(mallctl("thread.prof.name", NULL, NULL,
+	    (void *)&thread_name, sizeof(thread_name)), EFAULT,
 	    "Unexpected mallctl result writing \"%s\" to thread.prof.name",
 	    thread_name);
 
@@ -64,8 +65,9 @@ TEST_BEGIN(test_prof_thread_name_validation)
 		size_t sz;
 
 		sz = sizeof(thread_name_old);
-		assert_d_eq(mallctl("thread.prof.name", &thread_name_old, &sz,
-		    &thread_name, sizeof(thread_name)), EPERM,
+		assert_d_eq(mallctl("thread.prof.name",
+		    (void *)&thread_name_old, &sz, (void *)&thread_name,
+		    sizeof(thread_name)), EPERM,
 		    "Unexpected mallctl result writing \"%s\" to "
 		    "thread.prof.name", thread_name);
 	}
diff --git a/test/unit/size_classes.c b/test/unit/size_classes.c
index f5a5873d..d4875549 100644
--- a/test/unit/size_classes.c
+++ b/test/unit/size_classes.c
@@ -8,8 +8,8 @@ get_max_size_class(void)
 	size_t sz, miblen, max_size_class;
 
 	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.nlextents", &nlextents, &sz, NULL, 0), 0,
-	    "Unexpected mallctl() error");
+	assert_d_eq(mallctl("arenas.nlextents", (void *)&nlextents, &sz, NULL,
+	    0), 0, "Unexpected mallctl() error");
 
 	miblen = sizeof(mib) / sizeof(size_t);
 	assert_d_eq(mallctlnametomib("arenas.lextent.0.size", mib, &miblen), 0,
@@ -17,8 +17,8 @@ get_max_size_class(void)
 	mib[2] = nlextents - 1;
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctlbymib(mib, miblen, &max_size_class, &sz, NULL, 0), 0,
-	    "Unexpected mallctlbymib() error");
+	assert_d_eq(mallctlbymib(mib, miblen, (void *)&max_size_class, &sz,
+	    NULL, 0), 0, "Unexpected mallctlbymib() error");
 
 	return (max_size_class);
 }
diff --git a/test/unit/stats.c b/test/unit/stats.c
index ed0d3fe9..a99a88f0 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -6,14 +6,14 @@ TEST_BEGIN(test_stats_summary)
 	int expected = config_stats ? 0 : ENOENT;
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.allocated", &allocated, &sz, NULL, 0),
+	assert_d_eq(mallctl("stats.allocated", (void *)&allocated, &sz, NULL,
+	    0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.active", (void *)&active, &sz, NULL, 0),
 	    expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.active", &active, &sz, NULL, 0), expected,
-	    "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.resident", &resident, &sz, NULL, 0),
+	assert_d_eq(mallctl("stats.resident", (void *)&resident, &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.mapped", (void *)&mapped, &sz, NULL, 0),
 	    expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.mapped", &mapped, &sz, NULL, 0), expected,
-	    "Unexpected mallctl() result");
 
 	if (config_stats) {
 		assert_zu_le(allocated, active,
@@ -38,19 +38,21 @@ TEST_BEGIN(test_stats_large)
 	p = mallocx(SMALL_MAXCLASS+1, 0);
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
 
-	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0,
-	    "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	    0, "Unexpected mallctl() failure");
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.arenas.0.large.allocated", &allocated, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.large.allocated",
+	    (void *)&allocated, &sz, NULL, 0), expected,
+	    "Unexpected mallctl() result");
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.large.nmalloc", &nmalloc, &sz, NULL,
-	    0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.large.ndalloc", &ndalloc, &sz, NULL,
-	    0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.large.nrequests", &nrequests, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.large.nmalloc", (void *)&nmalloc,
+	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.large.ndalloc", (void *)&ndalloc,
+	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.large.nrequests",
+	    (void *)&nrequests, &sz, NULL, 0), expected,
+	    "Unexpected mallctl() result");
 
 	if (config_stats) {
 		assert_zu_gt(allocated, 0,
@@ -76,8 +78,8 @@ TEST_BEGIN(test_stats_arenas_summary)
 	uint64_t npurge, nmadvise, purged;
 
 	arena = 0;
-	assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)),
-	    0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("thread.arena", NULL, NULL, (void *)&arena,
+	    sizeof(arena)), 0, "Unexpected mallctl() failure");
 
 	little = mallocx(SMALL_MAXCLASS, 0);
 	assert_ptr_not_null(little, "Unexpected mallocx() failure");
@@ -92,19 +94,19 @@ TEST_BEGIN(test_stats_arenas_summary)
 	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
-	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0,
-	    "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	    0, "Unexpected mallctl() failure");
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.arenas.0.mapped", &mapped, &sz, NULL, 0),
-	    expected, "Unexepected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.mapped", (void *)&mapped, &sz, NULL,
+	    0), expected, "Unexepected mallctl() result");
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge, &sz, NULL, 0),
-	    expected, "Unexepected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.nmadvise", &nmadvise, &sz, NULL, 0),
-	    expected, "Unexepected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.purged", &purged, &sz, NULL, 0),
-	    expected, "Unexepected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.npurge", (void *)&npurge, &sz, NULL,
+	    0), expected, "Unexepected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.nmadvise", (void *)&nmadvise, &sz,
+	    NULL, 0), expected, "Unexepected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.purged", (void *)&purged, &sz, NULL,
+	    0), expected, "Unexepected mallctl() result");
 
 	if (config_stats) {
 		assert_u64_gt(npurge, 0,
@@ -142,8 +144,8 @@ TEST_BEGIN(test_stats_arenas_small)
 	no_lazy_lock(); /* Lazy locking would dodge tcache testing. */
 
 	arena = 0;
-	assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)),
-	    0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("thread.arena", NULL, NULL, (void *)&arena,
+	    sizeof(arena)), 0, "Unexpected mallctl() failure");
 
 	p = mallocx(SMALL_MAXCLASS, 0);
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
@@ -151,19 +153,21 @@ TEST_BEGIN(test_stats_arenas_small)
 	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
 	    config_tcache ? 0 : ENOENT, "Unexpected mallctl() result");
 
-	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0,
-	    "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	    0, "Unexpected mallctl() failure");
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.arenas.0.small.allocated", &allocated, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.small.allocated",
+	    (void *)&allocated, &sz, NULL, 0), expected,
+	    "Unexpected mallctl() result");
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.small.nmalloc", &nmalloc, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.small.ndalloc", &ndalloc, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.small.nrequests", &nrequests, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.small.nmalloc", (void *)&nmalloc,
+	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.small.ndalloc", (void *)&ndalloc,
+	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.small.nrequests",
+	    (void *)&nrequests, &sz, NULL, 0), expected,
+	    "Unexpected mallctl() result");
 
 	if (config_stats) {
 		assert_zu_gt(allocated, 0,
@@ -189,23 +193,24 @@ TEST_BEGIN(test_stats_arenas_large)
 	int expected = config_stats ? 0 : ENOENT;
 
 	arena = 0;
-	assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)),
-	    0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("thread.arena", NULL, NULL, (void *)&arena,
+	    sizeof(arena)), 0, "Unexpected mallctl() failure");
 
 	p = mallocx((1U << LG_LARGE_MINCLASS), 0);
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
 
-	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0,
-	    "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	    0, "Unexpected mallctl() failure");
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.arenas.0.large.allocated", &allocated, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.large.allocated",
+	    (void *)&allocated, &sz, NULL, 0), expected,
+	    "Unexpected mallctl() result");
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.large.nmalloc", &nmalloc, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.large.ndalloc", &ndalloc, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.large.nmalloc", (void *)&nmalloc,
+	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.large.ndalloc", (void *)&ndalloc,
+	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
 		assert_zu_gt(allocated, 0,
@@ -230,8 +235,8 @@ TEST_BEGIN(test_stats_arenas_bins)
 	int expected = config_stats ? 0 : ENOENT;
 
 	arena = 0;
-	assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)),
-	    0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("thread.arena", NULL, NULL, (void *)&arena,
+	    sizeof(arena)), 0, "Unexpected mallctl() failure");
 
 	p = mallocx(arena_bin_info[0].reg_size, 0);
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
@@ -239,35 +244,36 @@ TEST_BEGIN(test_stats_arenas_bins)
 	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
 	    config_tcache ? 0 : ENOENT, "Unexpected mallctl() result");
 
-	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0,
-	    "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	    0, "Unexpected mallctl() failure");
 
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.bins.0.nmalloc", &nmalloc, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.bins.0.ndalloc", &ndalloc, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.bins.0.nrequests", &nrequests, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.bins.0.nmalloc", (void *)&nmalloc,
+	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.bins.0.ndalloc", (void *)&ndalloc,
+	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.bins.0.nrequests",
+	    (void *)&nrequests, &sz, NULL, 0), expected,
+	    "Unexpected mallctl() result");
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.arenas.0.bins.0.curregs", &curregs, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.bins.0.curregs", (void *)&curregs,
+	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
 
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.bins.0.nfills", &nfills, &sz,
-	    NULL, 0), config_tcache ? expected : ENOENT,
+	assert_d_eq(mallctl("stats.arenas.0.bins.0.nfills", (void *)&nfills,
+	    &sz, NULL, 0), config_tcache ? expected : ENOENT,
 	    "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.bins.0.nflushes", &nflushes, &sz,
-	    NULL, 0), config_tcache ? expected : ENOENT,
+	assert_d_eq(mallctl("stats.arenas.0.bins.0.nflushes", (void *)&nflushes,
+	    &sz, NULL, 0), config_tcache ? expected : ENOENT,
 	    "Unexpected mallctl() result");
 
-	assert_d_eq(mallctl("stats.arenas.0.bins.0.nslabs", &nslabs, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.bins.0.nreslabs", &nreslabs, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.bins.0.nslabs", (void *)&nslabs,
+	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.bins.0.nreslabs", (void *)&nreslabs,
+	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.arenas.0.bins.0.curslabs", &curslabs, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.bins.0.curslabs", (void *)&curslabs,
+	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
 		assert_u64_gt(nmalloc, 0,
@@ -303,27 +309,30 @@ TEST_BEGIN(test_stats_arenas_lextents)
 	int expected = config_stats ? 0 : ENOENT;
 
 	arena = 0;
-	assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)),
-	    0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("thread.arena", NULL, NULL, (void *)&arena,
+	    sizeof(arena)), 0, "Unexpected mallctl() failure");
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("arenas.lextent.0.size", &hsize, &sz, NULL, 0), 0,
-	    "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&hsize, &sz, NULL,
+	    0), 0, "Unexpected mallctl() failure");
 
 	p = mallocx(hsize, 0);
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
 
-	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0,
-	    "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	    0, "Unexpected mallctl() failure");
 
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.lextents.0.nmalloc", &nmalloc, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.lextents.0.ndalloc", &ndalloc, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.lextents.0.nmalloc",
+	    (void *)&nmalloc, &sz, NULL, 0), expected,
+	    "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.lextents.0.ndalloc",
+	    (void *)&ndalloc, &sz, NULL, 0), expected,
+	    "Unexpected mallctl() result");
 	sz = sizeof(size_t);
 	assert_d_eq(mallctl("stats.arenas.0.lextents.0.curlextents",
-	    &curlextents, &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	    (void *)&curlextents, &sz, NULL, 0), expected,
+	    "Unexpected mallctl() result");
 
 	if (config_stats) {
 		assert_u64_gt(nmalloc, 0,

From 68e14c988460907c4b135feb3eb5fccd28953feb Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 28 Oct 2016 00:16:55 -0700
Subject: [PATCH 0427/2608] Fix over-sized allocation of rtree leaf nodes.

Use the correct level metadata when allocating child nodes so that leaf
nodes don't end up over-sized (2^16 elements vs 2^4 elements).
---
 src/rtree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rtree.c b/src/rtree.c
index d4a705ae..0a42a982 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -171,7 +171,7 @@ rtree_child_read_hard(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *elm,
     unsigned level)
 {
 
-	return (rtree_node_init(tsdn, rtree, level, &elm->child));
+	return (rtree_node_init(tsdn, rtree, level+1, &elm->child));
 }
 
 static int

From dc553d52d82380ab0c99bb0fa97ae5c1f11ab2be Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 28 Oct 2016 00:41:15 -0700
Subject: [PATCH 0428/2608] Fix over-sized allocation of rtree leaf nodes.

Use the correct level metadata when allocating child nodes so that leaf
nodes don't end up over-sized (2^16 elements vs 2^4 elements).
---
 src/rtree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/rtree.c b/src/rtree.c
index 5590034b..f2e2997d 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -128,5 +128,5 @@ rtree_node_elm_t *
 rtree_child_read_hard(rtree_t *rtree, rtree_node_elm_t *elm, unsigned level)
 {
 
-	return (rtree_node_init(rtree, level, &elm->child));
+	return (rtree_node_init(rtree, level+1, &elm->child));
 }

From 1eb801bcad74f4b7eb4d5ab3ce2d67935c10ac58 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 27 Oct 2016 15:41:43 -0700
Subject: [PATCH 0429/2608] Do not force lazy lock on Windows.

This reverts 13473c7c66a81a4dc1cf11a97e9c8b1dbb785b64, which was
intended to work around bootstrapping issues when linking statically.
However, this actually causes problems in various other configurations,
so this reversion may force a future fix for the underlying problem, if
it still exists.
---
 configure.ac | 1 -
 1 file changed, 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index a7da9ffe..d5ffd4ca 100644
--- a/configure.ac
+++ b/configure.ac
@@ -426,7 +426,6 @@ case "${host}" in
   *-*-mingw* | *-*-cygwin*)
 	abi="pecoff"
 	force_tls="0"
-	force_lazy_lock="1"
 	maps_coalesce="0"
 	RPATH=""
 	so="dll"

From 875ff15e6a99f4ff4d8aaaaf76d9dc5f3d8f1e39 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 27 Oct 2016 17:10:56 -0700
Subject: [PATCH 0430/2608] Only use --whole-archive with gcc.

Conditionalize use of --whole-archive on the platform plus compiler,
rather than on the ABI.  This fixes a regression caused by
7b24c6e5570062495243f1e55131b395adb31e33 (Use --whole-archive when
linking integration tests on MinGW.).
---
 Makefile.in     | 5 +++--
 configure.ac    | 3 +++
 msvc/ReadMe.txt | 2 +-
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index de282186..11e94b7c 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -52,6 +52,7 @@ enable_prof := @enable_prof@
 enable_valgrind := @enable_valgrind@
 enable_zone_allocator := @enable_zone_allocator@
 MALLOC_CONF := @JEMALLOC_CPREFIX@MALLOC_CONF
+link_whole_archive := @link_whole_archive@
 DSO_LDFLAGS = @DSO_LDFLAGS@
 SOREV = @SOREV@
 PIC_CFLAGS = @PIC_CFLAGS@
@@ -125,7 +126,7 @@ DSOS := $(objroot)lib/$(LIBJEMALLOC).$(SOREV)
 ifneq ($(SOREV),$(SO))
 DSOS += $(objroot)lib/$(LIBJEMALLOC).$(SO)
 endif
-ifeq (pecoff, $(ABI))
+ifeq (1, $(link_whole_archive))
 LJEMALLOC := -Wl,--whole-archive -L$(objroot)lib -l$(LIBJEMALLOC) -Wl,--no-whole-archive
 else
 LJEMALLOC := $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
@@ -141,7 +142,7 @@ C_TESTLIB_SRCS := $(srcroot)test/src/btalloc.c $(srcroot)test/src/btalloc_0.c \
 	$(srcroot)test/src/mtx.c $(srcroot)test/src/mq.c \
 	$(srcroot)test/src/SFMT.c $(srcroot)test/src/test.c \
 	$(srcroot)test/src/thd.c $(srcroot)test/src/timer.c
-ifeq (pecoff, $(ABI))
+ifeq (1, $(link_whole_archive))
 C_UTIL_INTEGRATION_SRCS :=
 else
 C_UTIL_INTEGRATION_SRCS := $(srcroot)src/nstime.c $(srcroot)src/util.c
diff --git a/configure.ac b/configure.ac
index d5ffd4ca..c45d8214 100644
--- a/configure.ac
+++ b/configure.ac
@@ -313,6 +313,7 @@ o="$ac_objext"
 a="a"
 exe="$ac_exeext"
 libprefix="lib"
+link_whole_archive="0"
 DSO_LDFLAGS='-shared -Wl,-soname,$(@F)'
 RPATH='-Wl,-rpath,$(1)'
 SOREV="${so}.${rev}"
@@ -442,6 +443,7 @@ case "${host}" in
         else
 	  importlib="${so}"
 	  DSO_LDFLAGS="-shared"
+	  link_whole_archive="1"
 	fi
 	a="lib"
 	libprefix=""
@@ -479,6 +481,7 @@ AC_SUBST([o])
 AC_SUBST([a])
 AC_SUBST([exe])
 AC_SUBST([libprefix])
+AC_SUBST([link_whole_archive])
 AC_SUBST([DSO_LDFLAGS])
 AC_SUBST([EXTRA_LDFLAGS])
 AC_SUBST([SOREV])
diff --git a/msvc/ReadMe.txt b/msvc/ReadMe.txt
index b1c2fc5c..77d567da 100644
--- a/msvc/ReadMe.txt
+++ b/msvc/ReadMe.txt
@@ -17,7 +17,7 @@ How to build jemalloc for Windows
    (note: x86/x64 doesn't matter at this point)
 
 5. Generate header files:
-   sh -c "./autogen.sh" CC=cl --enable-lazy-lock=no
+   sh -c "CC=cl ./autogen.sh"
 
 6. Now the project can be opened and built in Visual Studio:
    msvc\jemalloc_vc2015.sln

From e7d6779918d3e18178418e69e8ca496f2f6446f7 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 27 Oct 2016 21:23:48 -0700
Subject: [PATCH 0431/2608] Only link with libm (-lm) if necessary.

This fixes warnings when building with MSVC.
---
 Makefile.in  |  7 ++++---
 configure.ac | 15 ++++++++++++---
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index 11e94b7c..9d6b2dba 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -63,6 +63,7 @@ MKLIB = @MKLIB@
 AR = @AR@
 ARFLAGS = @ARFLAGS@
 CC_MM = @CC_MM@
+LM := @LM@
 INSTALL = @INSTALL@
 
 ifeq (macho, $(ABI))
@@ -308,15 +309,15 @@ $(STATIC_LIBS):
 
 $(objroot)test/unit/%$(EXE): $(objroot)test/unit/%.$(O) $(TESTS_UNIT_LINK_OBJS) $(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS)
 	@mkdir -p $(@D)
-	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS)
+	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS)
 
 $(objroot)test/integration/%$(EXE): $(objroot)test/integration/%.$(O) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
-	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LJEMALLOC) $(LDFLAGS) $(filter-out -lm,$(filter -lrt -lpthread,$(LIBS))) -lm $(EXTRA_LDFLAGS)
+	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LJEMALLOC) $(LDFLAGS) $(filter-out -lm,$(filter -lrt -lpthread,$(LIBS))) $(LM) $(EXTRA_LDFLAGS)
 
 $(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_STRESS_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
-	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS)
+	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS)
 
 build_lib_shared: $(DSOS)
 build_lib_static: $(STATIC_LIBS)
diff --git a/configure.ac b/configure.ac
index c45d8214..d369d6cc 100644
--- a/configure.ac
+++ b/configure.ac
@@ -494,6 +494,15 @@ AC_SUBST([ARFLAGS])
 AC_SUBST([AROUT])
 AC_SUBST([CC_MM])
 
+dnl Determine whether libm must be linked to use e.g. log(3).
+AC_SEARCH_LIBS([log], [m], , [AC_MSG_ERROR([Missing math functions])])
+if test "x$ac_cv_search_log" != "xnone required" ; then
+  LM="$ac_cv_search_log"
+else
+  LM=
+fi
+AC_SUBST(LM)
+
 JE_COMPILABLE([__attribute__ syntax],
               [static __attribute__((unused)) void foo(void){}],
               [],
@@ -940,9 +949,9 @@ fi
 AC_MSG_CHECKING([configured backtracing method])
 AC_MSG_RESULT([$backtrace_method])
 if test "x$enable_prof" = "x1" ; then
-  if test "x$abi" != "xpecoff"; then
-    dnl Heap profiling uses the log(3) function.
-    LIBS="$LIBS -lm"
+  dnl Heap profiling uses the log(3) function.
+  if test "x$LM" != "x" ; then
+    LIBS="$LIBS $LM"
   fi
 
   AC_DEFINE([JEMALLOC_PROF], [ ])

From 963289df13115001f85b028744dd5f4070b9dc05 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 28 Oct 2016 10:44:39 -0700
Subject: [PATCH 0432/2608] Periodically purge in memory-intensive integration
 tests.

This resolves #393.
---
 test/integration/aligned_alloc.c  | 13 ++++++++++---
 test/integration/mallocx.c        |  7 +++++++
 test/integration/posix_memalign.c | 13 ++++++++++---
 3 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/test/integration/aligned_alloc.c b/test/integration/aligned_alloc.c
index ec2f5a7b..3f1c67ab 100644
--- a/test/integration/aligned_alloc.c
+++ b/test/integration/aligned_alloc.c
@@ -1,8 +1,6 @@
 #include "test/jemalloc_test.h"
 
-/* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */
-#define	MAXALIGN ((size_t)0x2000000LU)
-#define	NITER 4
+#define	MAXALIGN (((size_t)1) << 25)
 
 TEST_BEGIN(test_alignment_errors)
 {
@@ -73,6 +71,7 @@ TEST_END
 
 TEST_BEGIN(test_alignment_and_size)
 {
+#define	NITER 4
 	size_t alignment, size, total;
 	unsigned i;
 	void *ps[NITER];
@@ -109,7 +108,15 @@ TEST_BEGIN(test_alignment_and_size)
 				}
 			}
 		}
+		/*
+		 * On systems which can't merge extents, this test generates a
+		 * lot of dirty memory very quickly.  Purge between cycles to
+		 * avoid potential OOM on e.g. 32-bit Windows.
+		 */
+		assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+		    "Unexpected mallctl error");
 	}
+#undef NITER
 }
 TEST_END
 
diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index 4fd290c0..175be8e1 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -196,6 +196,13 @@ TEST_BEGIN(test_alignment_and_size)
 				}
 			}
 		}
+		/*
+		 * On systems which can't merge extents, this test generates a
+		 * lot of dirty memory very quickly.  Purge between cycles to
+		 * avoid potential OOM on e.g. 32-bit Windows.
+		 */
+		assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+		    "Unexpected mallctl error");
 	}
 #undef MAXALIGN
 #undef NITER
diff --git a/test/integration/posix_memalign.c b/test/integration/posix_memalign.c
index d5e39b63..a64886d6 100644
--- a/test/integration/posix_memalign.c
+++ b/test/integration/posix_memalign.c
@@ -1,8 +1,6 @@
 #include "test/jemalloc_test.h"
 
-/* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */
-#define	MAXALIGN ((size_t)0x2000000LU)
-#define	NITER 4
+#define	MAXALIGN (((size_t)1) << 25)
 
 TEST_BEGIN(test_alignment_errors)
 {
@@ -65,6 +63,7 @@ TEST_END
 
 TEST_BEGIN(test_alignment_and_size)
 {
+#define	NITER 4
 	size_t alignment, size, total;
 	unsigned i;
 	int err;
@@ -103,7 +102,15 @@ TEST_BEGIN(test_alignment_and_size)
 				}
 			}
 		}
+		/*
+		 * On systems which can't merge extents, this test generates a
+		 * lot of dirty memory very quickly.  Purge between cycles to
+		 * avoid potential OOM on e.g. 32-bit Windows.
+		 */
+		assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+		    "Unexpected mallctl error");
 	}
+#undef NITER
 }
 TEST_END
 

From 970d29325770a5006dc227f30481024c8661df51 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 28 Oct 2016 11:00:36 -0700
Subject: [PATCH 0433/2608] Periodically purge in memory-intensive integration
 tests.

This resolves #393.
---
 test/integration/mallocx.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index 175be8e1..3b8097ed 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -139,6 +139,13 @@ TEST_BEGIN(test_basic)
 		rsz = sallocx(p, 0);
 		assert_zu_eq(nsz, rsz, "nallocx()/sallocx() rsize mismatch");
 		dallocx(p, 0);
+		/*
+		 * On systems which can't merge extents, this test generates a
+		 * lot of dirty memory very quickly.  Purge between cycles to
+		 * avoid potential OOM on e.g. 32-bit Windows.
+		 */
+		assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+		    "Unexpected mallctl error");
 	}
 #undef MAXSZ
 }

From bde815dc40c636523382912ecba443cc50b0eccd Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 28 Oct 2016 11:23:24 -0700
Subject: [PATCH 0434/2608] Reduce memory requirements for regression tests.

This is intended to drop memory usage to a level that AppVeyor test
instances can handle.

This resolves #393.
---
 test/integration/aligned_alloc.c  | 23 ++++++++++------
 test/integration/mallocx.c        | 44 ++++++++++++++++++-------------
 test/integration/posix_memalign.c | 23 ++++++++++------
 3 files changed, 55 insertions(+), 35 deletions(-)

diff --git a/test/integration/aligned_alloc.c b/test/integration/aligned_alloc.c
index 3f1c67ab..36fb6997 100644
--- a/test/integration/aligned_alloc.c
+++ b/test/integration/aligned_alloc.c
@@ -1,6 +1,19 @@
 #include "test/jemalloc_test.h"
 
-#define	MAXALIGN (((size_t)1) << 25)
+#define	MAXALIGN (((size_t)1) << 23)
+
+/*
+ * On systems which can't merge extents, tests that call this function generate
+ * a lot of dirty memory very quickly.  Purging between cycles mitigates
+ * potential OOM on e.g. 32-bit Windows.
+ */
+static void
+purge(void)
+{
+
+	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctl error");
+}
 
 TEST_BEGIN(test_alignment_errors)
 {
@@ -108,13 +121,7 @@ TEST_BEGIN(test_alignment_and_size)
 				}
 			}
 		}
-		/*
-		 * On systems which can't merge extents, this test generates a
-		 * lot of dirty memory very quickly.  Purge between cycles to
-		 * avoid potential OOM on e.g. 32-bit Windows.
-		 */
-		assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
-		    "Unexpected mallctl error");
+		purge();
 	}
 #undef NITER
 }
diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index 3b8097ed..2298f729 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -50,6 +50,19 @@ get_large_size(size_t ind)
 	return (get_size_impl("arenas.lextent.0.size", ind));
 }
 
+/*
+ * On systems which can't merge extents, tests that call this function generate
+ * a lot of dirty memory very quickly.  Purging between cycles mitigates
+ * potential OOM on e.g. 32-bit Windows.
+ */
+static void
+purge(void)
+{
+
+	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctl error");
+}
+
 TEST_BEGIN(test_overflow)
 {
 	size_t largemax;
@@ -96,6 +109,7 @@ TEST_BEGIN(test_oom)
 		if (ptrs[i] != NULL)
 			dallocx(ptrs[i], 0);
 	}
+	purge();
 
 #if LG_SIZEOF_PTR == 3
 	assert_ptr_null(mallocx(0x8000000000000000ULL,
@@ -113,7 +127,7 @@ TEST_END
 
 TEST_BEGIN(test_basic)
 {
-#define	MAXSZ (((size_t)1) << 26)
+#define	MAXSZ (((size_t)1) << 23)
 	size_t sz;
 
 	for (sz = 1; sz < MAXSZ; sz = nallocx(sz, 0) + 1) {
@@ -122,30 +136,28 @@ TEST_BEGIN(test_basic)
 		nsz = nallocx(sz, 0);
 		assert_zu_ne(nsz, 0, "Unexpected nallocx() error");
 		p = mallocx(sz, 0);
-		assert_ptr_not_null(p, "Unexpected mallocx() error");
+		assert_ptr_not_null(p,
+		    "Unexpected mallocx(size=%zx, flags=0) error", sz);
 		rsz = sallocx(p, 0);
 		assert_zu_ge(rsz, sz, "Real size smaller than expected");
 		assert_zu_eq(nsz, rsz, "nallocx()/sallocx() size mismatch");
 		dallocx(p, 0);
 
 		p = mallocx(sz, 0);
-		assert_ptr_not_null(p, "Unexpected mallocx() error");
+		assert_ptr_not_null(p,
+		    "Unexpected mallocx(size=%zx, flags=0) error", sz);
 		dallocx(p, 0);
 
 		nsz = nallocx(sz, MALLOCX_ZERO);
 		assert_zu_ne(nsz, 0, "Unexpected nallocx() error");
 		p = mallocx(sz, MALLOCX_ZERO);
-		assert_ptr_not_null(p, "Unexpected mallocx() error");
+		assert_ptr_not_null(p,
+		    "Unexpected mallocx(size=%zx, flags=MALLOCX_ZERO) error",
+		    nsz);
 		rsz = sallocx(p, 0);
 		assert_zu_eq(nsz, rsz, "nallocx()/sallocx() rsize mismatch");
 		dallocx(p, 0);
-		/*
-		 * On systems which can't merge extents, this test generates a
-		 * lot of dirty memory very quickly.  Purge between cycles to
-		 * avoid potential OOM on e.g. 32-bit Windows.
-		 */
-		assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
-		    "Unexpected mallctl error");
+		purge();
 	}
 #undef MAXSZ
 }
@@ -153,7 +165,7 @@ TEST_END
 
 TEST_BEGIN(test_alignment_and_size)
 {
-#define	MAXALIGN (((size_t)1) << 25)
+#define	MAXALIGN (((size_t)1) << 23)
 #define	NITER 4
 	size_t nsz, rsz, sz, alignment, total;
 	unsigned i;
@@ -203,13 +215,7 @@ TEST_BEGIN(test_alignment_and_size)
 				}
 			}
 		}
-		/*
-		 * On systems which can't merge extents, this test generates a
-		 * lot of dirty memory very quickly.  Purge between cycles to
-		 * avoid potential OOM on e.g. 32-bit Windows.
-		 */
-		assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
-		    "Unexpected mallctl error");
+		purge();
 	}
 #undef MAXALIGN
 #undef NITER
diff --git a/test/integration/posix_memalign.c b/test/integration/posix_memalign.c
index a64886d6..9f3156ac 100644
--- a/test/integration/posix_memalign.c
+++ b/test/integration/posix_memalign.c
@@ -1,6 +1,19 @@
 #include "test/jemalloc_test.h"
 
-#define	MAXALIGN (((size_t)1) << 25)
+#define	MAXALIGN (((size_t)1) << 23)
+
+/*
+ * On systems which can't merge extents, tests that call this function generate
+ * a lot of dirty memory very quickly.  Purging between cycles mitigates
+ * potential OOM on e.g. 32-bit Windows.
+ */
+static void
+purge(void)
+{
+
+	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctl error");
+}
 
 TEST_BEGIN(test_alignment_errors)
 {
@@ -102,13 +115,7 @@ TEST_BEGIN(test_alignment_and_size)
 				}
 			}
 		}
-		/*
-		 * On systems which can't merge extents, this test generates a
-		 * lot of dirty memory very quickly.  Purge between cycles to
-		 * avoid potential OOM on e.g. 32-bit Windows.
-		 */
-		assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
-		    "Unexpected mallctl error");
+		purge();
 	}
 #undef NITER
 }

From 2c53faf352ca7722f1a776c8c381b01da5b4fa96 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 28 Oct 2016 10:44:39 -0700
Subject: [PATCH 0435/2608] Periodically purge in memory-intensive integration
 tests.

This resolves #393.
---
 test/integration/aligned_alloc.c  | 13 ++++++++++---
 test/integration/mallocx.c        |  7 +++++++
 test/integration/posix_memalign.c | 13 ++++++++++---
 3 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/test/integration/aligned_alloc.c b/test/integration/aligned_alloc.c
index 60900148..80bb38f8 100644
--- a/test/integration/aligned_alloc.c
+++ b/test/integration/aligned_alloc.c
@@ -1,9 +1,7 @@
 #include "test/jemalloc_test.h"
 
 #define	CHUNK 0x400000
-/* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */
-#define	MAXALIGN ((size_t)0x2000000LU)
-#define	NITER 4
+#define	MAXALIGN (((size_t)1) << 25)
 
 TEST_BEGIN(test_alignment_errors)
 {
@@ -74,6 +72,7 @@ TEST_END
 
 TEST_BEGIN(test_alignment_and_size)
 {
+#define	NITER 4
 	size_t alignment, size, total;
 	unsigned i;
 	void *ps[NITER];
@@ -110,7 +109,15 @@ TEST_BEGIN(test_alignment_and_size)
 				}
 			}
 		}
+		/*
+		 * On systems which can't merge extents, this test generates a
+		 * lot of dirty memory very quickly.  Purge between cycles to
+		 * avoid potential OOM on e.g. 32-bit Windows.
+		 */
+		assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+		    "Unexpected mallctl error");
 	}
+#undef NITER
 }
 TEST_END
 
diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index 55e1a090..69ce781e 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -196,6 +196,13 @@ TEST_BEGIN(test_alignment_and_size)
 				}
 			}
 		}
+		/*
+		 * On systems which can't merge extents, this test generates a
+		 * lot of dirty memory very quickly.  Purge between cycles to
+		 * avoid potential OOM on e.g. 32-bit Windows.
+		 */
+		assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+		    "Unexpected mallctl error");
 	}
 #undef MAXALIGN
 #undef NITER
diff --git a/test/integration/posix_memalign.c b/test/integration/posix_memalign.c
index 19741c6c..171bcea7 100644
--- a/test/integration/posix_memalign.c
+++ b/test/integration/posix_memalign.c
@@ -1,9 +1,7 @@
 #include "test/jemalloc_test.h"
 
 #define	CHUNK 0x400000
-/* #define MAXALIGN ((size_t)UINT64_C(0x80000000000)) */
-#define	MAXALIGN ((size_t)0x2000000LU)
-#define	NITER 4
+#define	MAXALIGN (((size_t)1) << 25)
 
 TEST_BEGIN(test_alignment_errors)
 {
@@ -66,6 +64,7 @@ TEST_END
 
 TEST_BEGIN(test_alignment_and_size)
 {
+#define	NITER 4
 	size_t alignment, size, total;
 	unsigned i;
 	int err;
@@ -104,7 +103,15 @@ TEST_BEGIN(test_alignment_and_size)
 				}
 			}
 		}
+		/*
+		 * On systems which can't merge extents, this test generates a
+		 * lot of dirty memory very quickly.  Purge between cycles to
+		 * avoid potential OOM on e.g. 32-bit Windows.
+		 */
+		assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+		    "Unexpected mallctl error");
 	}
+#undef NITER
 }
 TEST_END
 

From eaecaad8ea9fd9cd8b57e49834b5e3332f911c40 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 28 Oct 2016 11:00:36 -0700
Subject: [PATCH 0436/2608] Periodically purge in memory-intensive integration
 tests.

This resolves #393.
---
 test/integration/mallocx.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index 69ce781e..79ab4940 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -139,6 +139,13 @@ TEST_BEGIN(test_basic)
 		rsz = sallocx(p, 0);
 		assert_zu_eq(nsz, rsz, "nallocx()/sallocx() rsize mismatch");
 		dallocx(p, 0);
+		/*
+		 * On systems which can't merge extents, this test generates a
+		 * lot of dirty memory very quickly.  Purge between cycles to
+		 * avoid potential OOM on e.g. 32-bit Windows.
+		 */
+		assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+		    "Unexpected mallctl error");
 	}
 #undef MAXSZ
 }

From b99c72f3d29e3590ae81959922d0032a29dbace9 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 28 Oct 2016 11:23:24 -0700
Subject: [PATCH 0437/2608] Reduce memory requirements for regression tests.

This is intended to drop memory usage to a level that AppVeyor test
instances can handle.

This resolves #393.
---
 test/integration/aligned_alloc.c  | 23 ++++++++++------
 test/integration/mallocx.c        | 44 ++++++++++++++++++-------------
 test/integration/posix_memalign.c | 23 ++++++++++------
 3 files changed, 55 insertions(+), 35 deletions(-)

diff --git a/test/integration/aligned_alloc.c b/test/integration/aligned_alloc.c
index 80bb38f8..58438421 100644
--- a/test/integration/aligned_alloc.c
+++ b/test/integration/aligned_alloc.c
@@ -1,7 +1,20 @@
 #include "test/jemalloc_test.h"
 
 #define	CHUNK 0x400000
-#define	MAXALIGN (((size_t)1) << 25)
+#define	MAXALIGN (((size_t)1) << 23)
+
+/*
+ * On systems which can't merge extents, tests that call this function generate
+ * a lot of dirty memory very quickly.  Purging between cycles mitigates
+ * potential OOM on e.g. 32-bit Windows.
+ */
+static void
+purge(void)
+{
+
+	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctl error");
+}
 
 TEST_BEGIN(test_alignment_errors)
 {
@@ -109,13 +122,7 @@ TEST_BEGIN(test_alignment_and_size)
 				}
 			}
 		}
-		/*
-		 * On systems which can't merge extents, this test generates a
-		 * lot of dirty memory very quickly.  Purge between cycles to
-		 * avoid potential OOM on e.g. 32-bit Windows.
-		 */
-		assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
-		    "Unexpected mallctl error");
+		purge();
 	}
 #undef NITER
 }
diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index 79ab4940..43b76eba 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -50,6 +50,19 @@ get_huge_size(size_t ind)
 	return (get_size_impl("arenas.hchunk.0.size", ind));
 }
 
+/*
+ * On systems which can't merge extents, tests that call this function generate
+ * a lot of dirty memory very quickly.  Purging between cycles mitigates
+ * potential OOM on e.g. 32-bit Windows.
+ */
+static void
+purge(void)
+{
+
+	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctl error");
+}
+
 TEST_BEGIN(test_overflow)
 {
 	size_t hugemax;
@@ -96,6 +109,7 @@ TEST_BEGIN(test_oom)
 		if (ptrs[i] != NULL)
 			dallocx(ptrs[i], 0);
 	}
+	purge();
 
 #if LG_SIZEOF_PTR == 3
 	assert_ptr_null(mallocx(0x8000000000000000ULL,
@@ -113,7 +127,7 @@ TEST_END
 
 TEST_BEGIN(test_basic)
 {
-#define	MAXSZ (((size_t)1) << 26)
+#define	MAXSZ (((size_t)1) << 23)
 	size_t sz;
 
 	for (sz = 1; sz < MAXSZ; sz = nallocx(sz, 0) + 1) {
@@ -122,30 +136,28 @@ TEST_BEGIN(test_basic)
 		nsz = nallocx(sz, 0);
 		assert_zu_ne(nsz, 0, "Unexpected nallocx() error");
 		p = mallocx(sz, 0);
-		assert_ptr_not_null(p, "Unexpected mallocx() error");
+		assert_ptr_not_null(p,
+		    "Unexpected mallocx(size=%zx, flags=0) error", sz);
 		rsz = sallocx(p, 0);
 		assert_zu_ge(rsz, sz, "Real size smaller than expected");
 		assert_zu_eq(nsz, rsz, "nallocx()/sallocx() size mismatch");
 		dallocx(p, 0);
 
 		p = mallocx(sz, 0);
-		assert_ptr_not_null(p, "Unexpected mallocx() error");
+		assert_ptr_not_null(p,
+		    "Unexpected mallocx(size=%zx, flags=0) error", sz);
 		dallocx(p, 0);
 
 		nsz = nallocx(sz, MALLOCX_ZERO);
 		assert_zu_ne(nsz, 0, "Unexpected nallocx() error");
 		p = mallocx(sz, MALLOCX_ZERO);
-		assert_ptr_not_null(p, "Unexpected mallocx() error");
+		assert_ptr_not_null(p,
+		    "Unexpected mallocx(size=%zx, flags=MALLOCX_ZERO) error",
+		    nsz);
 		rsz = sallocx(p, 0);
 		assert_zu_eq(nsz, rsz, "nallocx()/sallocx() rsize mismatch");
 		dallocx(p, 0);
-		/*
-		 * On systems which can't merge extents, this test generates a
-		 * lot of dirty memory very quickly.  Purge between cycles to
-		 * avoid potential OOM on e.g. 32-bit Windows.
-		 */
-		assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
-		    "Unexpected mallctl error");
+		purge();
 	}
 #undef MAXSZ
 }
@@ -153,7 +165,7 @@ TEST_END
 
 TEST_BEGIN(test_alignment_and_size)
 {
-#define	MAXALIGN (((size_t)1) << 25)
+#define	MAXALIGN (((size_t)1) << 23)
 #define	NITER 4
 	size_t nsz, rsz, sz, alignment, total;
 	unsigned i;
@@ -203,13 +215,7 @@ TEST_BEGIN(test_alignment_and_size)
 				}
 			}
 		}
-		/*
-		 * On systems which can't merge extents, this test generates a
-		 * lot of dirty memory very quickly.  Purge between cycles to
-		 * avoid potential OOM on e.g. 32-bit Windows.
-		 */
-		assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
-		    "Unexpected mallctl error");
+		purge();
 	}
 #undef MAXALIGN
 #undef NITER
diff --git a/test/integration/posix_memalign.c b/test/integration/posix_memalign.c
index 171bcea7..e22e1020 100644
--- a/test/integration/posix_memalign.c
+++ b/test/integration/posix_memalign.c
@@ -1,7 +1,20 @@
 #include "test/jemalloc_test.h"
 
 #define	CHUNK 0x400000
-#define	MAXALIGN (((size_t)1) << 25)
+#define	MAXALIGN (((size_t)1) << 23)
+
+/*
+ * On systems which can't merge extents, tests that call this function generate
+ * a lot of dirty memory very quickly.  Purging between cycles mitigates
+ * potential OOM on e.g. 32-bit Windows.
+ */
+static void
+purge(void)
+{
+
+	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctl error");
+}
 
 TEST_BEGIN(test_alignment_errors)
 {
@@ -103,13 +116,7 @@ TEST_BEGIN(test_alignment_and_size)
 				}
 			}
 		}
-		/*
-		 * On systems which can't merge extents, this test generates a
-		 * lot of dirty memory very quickly.  Purge between cycles to
-		 * avoid potential OOM on e.g. 32-bit Windows.
-		 */
-		assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
-		    "Unexpected mallctl error");
+		purge();
 	}
 #undef NITER
 }

From 830938840865fe236ae2bdc0abdb0d5778146859 Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Fri, 28 Oct 2016 13:51:52 -0700
Subject: [PATCH 0438/2608] Support static linking of jemalloc with glibc

glibc defines its malloc implementation with several weak and strong
symbols:

strong_alias (__libc_calloc, __calloc) weak_alias (__libc_calloc, calloc)
strong_alias (__libc_free, __cfree) weak_alias (__libc_free, cfree)
strong_alias (__libc_free, __free) strong_alias (__libc_free, free)
strong_alias (__libc_malloc, __malloc) strong_alias (__libc_malloc, malloc)

The issue is not with the weak symbols, but that other parts of glibc
depend on __libc_malloc explicitly.  Defining them in terms of jemalloc
API's allows the linker to drop glibc's malloc.o completely from the link,
and static linking no longer results in symbol collisions.

Another wrinkle: jemalloc during initialization calls sysconf to
get the number of CPU's.  GLIBC allocates for the first time before
setting up isspace (and other related) tables, which are used by
sysconf.  Instead, use the pthread API to get the number of
CPUs with GLIBC, which seems to work.

This resolves #442.
---
 .../internal/jemalloc_internal_decls.h        |  3 ++
 src/jemalloc.c                                | 31 +++++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
index 910b2fc6..1d7f2075 100644
--- a/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -17,6 +17,9 @@
 #    include <sys/uio.h>
 #  endif
 #  include <pthread.h>
+#  ifdef JEMALLOC_GLIBC_MALLOC_HOOK
+#    include <sched.h>
+#  endif
 #  include <errno.h>
 #  include <sys/time.h>
 #  include <time.h>
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 1f951e2f..816cc73c 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -749,6 +749,18 @@ malloc_ncpus(void)
 	SYSTEM_INFO si;
 	GetSystemInfo(&si);
 	result = si.dwNumberOfProcessors;
+#elif defined(JEMALLOC_GLIBC_MALLOC_HOOK)
+	/*
+	 * glibc's sysconf() uses isspace().  glibc allocates for the first time
+	 * *before* setting up the isspace tables.  Therefore we need a
+	 * different method to get the number of CPUs.
+	 */
+	{
+		cpu_set_t set;
+
+		pthread_getaffinity_np(pthread_self(), sizeof(set), &set);
+		result = CPU_COUNT(&set);
+	}
 #else
 	result = sysconf(_SC_NPROCESSORS_ONLN);
 #endif
@@ -1882,6 +1894,25 @@ JEMALLOC_EXPORT void *(*__realloc_hook)(void *ptr, size_t size) = je_realloc;
 JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) =
     je_memalign;
 # endif
+
+/*
+ * To enable static linking with glibc, the libc specific malloc interface must
+ * be implemented also, so none of glibc's malloc.o functions are added to the
+ * link.
+ */
+#define	ALIAS(je_fn)	__attribute__((alias (#je_fn), used))
+/* To force macro expansion of je_ prefix before stringification. */
+#define	PREALIAS(je_fn)  ALIAS(je_fn)
+void	*__libc_malloc(size_t size) PREALIAS(je_malloc);
+void	__libc_free(void* ptr) PREALIAS(je_free);
+void	*__libc_realloc(void* ptr, size_t size) PREALIAS(je_realloc);
+void	*__libc_calloc(size_t n, size_t size) PREALIAS(je_calloc);
+void	*__libc_memalign(size_t align, size_t s) PREALIAS(je_memalign);
+void	*__libc_valloc(size_t size) PREALIAS(je_valloc);
+int	__posix_memalign(void** r, size_t a, size_t s)
+    PREALIAS(je_posix_memalign);
+#undef PREALIAS
+#undef ALIAS
 #endif
 
 /*

From ed84764a2a6d766a74fa1df3223d69977d732510 Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Fri, 28 Oct 2016 13:51:52 -0700
Subject: [PATCH 0439/2608] Support static linking of jemalloc with glibc

glibc defines its malloc implementation with several weak and strong
symbols:

strong_alias (__libc_calloc, __calloc) weak_alias (__libc_calloc, calloc)
strong_alias (__libc_free, __cfree) weak_alias (__libc_free, cfree)
strong_alias (__libc_free, __free) strong_alias (__libc_free, free)
strong_alias (__libc_malloc, __malloc) strong_alias (__libc_malloc, malloc)

The issue is not with the weak symbols, but that other parts of glibc
depend on __libc_malloc explicitly.  Defining them in terms of jemalloc
API's allows the linker to drop glibc's malloc.o completely from the link,
and static linking no longer results in symbol collisions.

Another wrinkle: jemalloc during initialization calls sysconf to
get the number of CPU's.  GLIBC allocates for the first time before
setting up isspace (and other related) tables, which are used by
sysconf.  Instead, use the pthread API to get the number of
CPUs with GLIBC, which seems to work.

This resolves #442.
---
 .../internal/jemalloc_internal_decls.h        |  3 ++
 src/jemalloc.c                                | 31 +++++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
index 910b2fc6..1d7f2075 100644
--- a/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -17,6 +17,9 @@
 #    include <sys/uio.h>
 #  endif
 #  include <pthread.h>
+#  ifdef JEMALLOC_GLIBC_MALLOC_HOOK
+#    include <sched.h>
+#  endif
 #  include <errno.h>
 #  include <sys/time.h>
 #  include <time.h>
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 53fcae34..b370f9c8 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -808,6 +808,18 @@ malloc_ncpus(void)
 	SYSTEM_INFO si;
 	GetSystemInfo(&si);
 	result = si.dwNumberOfProcessors;
+#elif defined(JEMALLOC_GLIBC_MALLOC_HOOK)
+	/*
+	 * glibc's sysconf() uses isspace().  glibc allocates for the first time
+	 * *before* setting up the isspace tables.  Therefore we need a
+	 * different method to get the number of CPUs.
+	 */
+	{
+		cpu_set_t set;
+
+		pthread_getaffinity_np(pthread_self(), sizeof(set), &set);
+		result = CPU_COUNT(&set);
+	}
 #else
 	result = sysconf(_SC_NPROCESSORS_ONLN);
 #endif
@@ -2036,6 +2048,25 @@ JEMALLOC_EXPORT void *(*__realloc_hook)(void *ptr, size_t size) = je_realloc;
 JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) =
     je_memalign;
 # endif
+
+/*
+ * To enable static linking with glibc, the libc specific malloc interface must
+ * be implemented also, so none of glibc's malloc.o functions are added to the
+ * link.
+ */
+#define	ALIAS(je_fn)	__attribute__((alias (#je_fn), used))
+/* To force macro expansion of je_ prefix before stringification. */
+#define	PREALIAS(je_fn)  ALIAS(je_fn)
+void	*__libc_malloc(size_t size) PREALIAS(je_malloc);
+void	__libc_free(void* ptr) PREALIAS(je_free);
+void	*__libc_realloc(void* ptr, size_t size) PREALIAS(je_realloc);
+void	*__libc_calloc(size_t n, size_t size) PREALIAS(je_calloc);
+void	*__libc_memalign(size_t align, size_t s) PREALIAS(je_memalign);
+void	*__libc_valloc(size_t size) PREALIAS(je_valloc);
+int	__posix_memalign(void** r, size_t a, size_t s)
+    PREALIAS(je_posix_memalign);
+#undef PREALIAS
+#undef ALIAS
 #endif
 
 /*

From 6ec2d8e279136f97b43078c7267f923a7cb0e571 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 28 Oct 2016 23:03:25 -0700
Subject: [PATCH 0440/2608] Do not mark malloc_conf as weak for unit tests.

This is generally correct (no need for weak symbols since no jemalloc
library is involved in the link phase), and avoids linking problems
(apparently unininitialized non-NULL malloc_conf) when using cygwin with
gcc.
---
 src/jemalloc.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 816cc73c..7484dd22 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -5,7 +5,11 @@
 /* Data. */
 
 /* Runtime configuration options. */
-const char	*je_malloc_conf JEMALLOC_ATTR(weak);
+const char	*je_malloc_conf
+#ifndef JEMALLOC_JET
+    JEMALLOC_ATTR(weak)
+#endif
+    ;
 bool	opt_abort =
 #ifdef JEMALLOC_DEBUG
     true

From 35799a50308b5c88ba8ed41f4e48d3b619482c7d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 28 Oct 2016 23:03:25 -0700
Subject: [PATCH 0441/2608] Do not mark malloc_conf as weak for unit tests.

This is generally correct (no need for weak symbols since no jemalloc
library is involved in the link phase), and avoids linking problems
(apparently unininitialized non-NULL malloc_conf) when using cygwin with
gcc.
---
 src/jemalloc.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index b370f9c8..24357635 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -5,7 +5,11 @@
 /* Data. */
 
 /* Runtime configuration options. */
-const char	*je_malloc_conf JEMALLOC_ATTR(weak);
+const char	*je_malloc_conf
+#ifndef JEMALLOC_JET
+    JEMALLOC_ATTR(weak)
+#endif
+    ;
 bool	opt_abort =
 #ifdef JEMALLOC_DEBUG
     true

From 1dcd0aa07fa16d2f70494baf997ac85b41de2ef1 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 28 Oct 2016 23:59:42 -0700
Subject: [PATCH 0442/2608] Do not mark malloc_conf as weak on Windows.

This works around malloc_conf not being properly initialized by at least
the cygwin toolchain.  Prior build system changes to use
-Wl,--[no-]whole-archive may be necessary for malloc_conf resolution to
work properly as a non-weak symbol (not tested).
---
 src/jemalloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 7484dd22..3e0605ec 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -6,7 +6,7 @@
 
 /* Runtime configuration options. */
 const char	*je_malloc_conf
-#ifndef JEMALLOC_JET
+#ifndef _WIN32
     JEMALLOC_ATTR(weak)
 #endif
     ;

From e46f8f97bc4dc3298e1d3b452ee69616fdbcd43e Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 28 Oct 2016 23:59:42 -0700
Subject: [PATCH 0443/2608] Do not mark malloc_conf as weak on Windows.

This works around malloc_conf not being properly initialized by at least
the cygwin toolchain.  Prior build system changes to use
-Wl,--[no-]whole-archive may be necessary for malloc_conf resolution to
work properly as a non-weak symbol (not tested).
---
 src/jemalloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 24357635..8210086d 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -6,7 +6,7 @@
 
 /* Runtime configuration options. */
 const char	*je_malloc_conf
-#ifndef JEMALLOC_JET
+#ifndef _WIN32
     JEMALLOC_ATTR(weak)
 #endif
     ;

From af0e28fd942d6f3b6198aebdeea6043b3542d096 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 29 Oct 2016 22:14:55 -0700
Subject: [PATCH 0444/2608] Fix EXTRA_CFLAGS to not affect configuration.

---
 Makefile.in  | 3 ++-
 configure.ac | 6 ++----
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index e00dbebc..e4aaaf21 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -24,7 +24,8 @@ abs_objroot := @abs_objroot@
 
 # Build parameters.
 CPPFLAGS := @CPPFLAGS@ -I$(srcroot)include -I$(objroot)include
-CFLAGS := @CFLAGS@
+EXTRA_CFLAGS := @EXTRA_CFLAGS@
+CFLAGS := @CFLAGS@ $(EXTRA_CFLAGS)
 LDFLAGS := @LDFLAGS@
 EXTRA_LDFLAGS := @EXTRA_LDFLAGS@
 LIBS := @LIBS@
diff --git a/configure.ac b/configure.ac
index a9d75111..2dff55b2 100644
--- a/configure.ac
+++ b/configure.ac
@@ -203,10 +203,7 @@ if test "x$CFLAGS" = "x" ; then
     fi
   fi
 fi
-dnl Append EXTRA_CFLAGS to CFLAGS, if defined.
-if test "x$EXTRA_CFLAGS" != "x" ; then
-  JE_CFLAGS_APPEND([$EXTRA_CFLAGS])
-fi
+AC_SUBST([EXTRA_CFLAGS])
 AC_PROG_CPP
 
 AC_C_BIGENDIAN([ac_cv_big_endian=1], [ac_cv_big_endian=0])
@@ -1853,6 +1850,7 @@ AC_MSG_RESULT([])
 AC_MSG_RESULT([CONFIG             : ${CONFIG}])
 AC_MSG_RESULT([CC                 : ${CC}])
 AC_MSG_RESULT([CFLAGS             : ${CFLAGS}])
+AC_MSG_RESULT([EXTRA_CFLAGS       : ${EXTRA_CFLAGS}])
 AC_MSG_RESULT([CPPFLAGS           : ${CPPFLAGS}])
 AC_MSG_RESULT([LDFLAGS            : ${LDFLAGS}])
 AC_MSG_RESULT([EXTRA_LDFLAGS      : ${EXTRA_LDFLAGS}])

From d87037a62c651f96ac4fb5a8c9db668697ee96e0 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 29 Oct 2016 22:41:04 -0700
Subject: [PATCH 0445/2608] Use syscall(2) rather than {open,read,close}(2)
 during boot.

Some applications wrap various system calls, and if they call the
allocator in their wrappers, unexpected reentry can result.  This is not
a general solution (many other syscalls are spread throughout the code),
but this resolves a bootstrapping issue that is apparently common.

This resolves #443.
---
 src/pages.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/src/pages.c b/src/pages.c
index 05b0d690..84e22160 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -207,6 +207,11 @@ os_overcommits_sysctl(void)
 #endif
 
 #ifdef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY
+/*
+ * Use syscall(2) rather than {open,read,close}(2) when possible to avoid
+ * reentry during bootstrapping if another library has interposed system call
+ * wrappers.
+ */
 static bool
 os_overcommits_proc(void)
 {
@@ -214,12 +219,26 @@ os_overcommits_proc(void)
 	char buf[1];
 	ssize_t nread;
 
+#ifdef SYS_open
+	fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY);
+#else
 	fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
+#endif
 	if (fd == -1)
 		return (false); /* Error. */
 
+#ifdef SYS_read
+	nread = (ssize_t)syscall(SYS_read, fd, &buf, sizeof(buf));
+#else
 	nread = read(fd, &buf, sizeof(buf));
+#endif
+
+#ifdef SYS_close
+	syscall(SYS_close, fd);
+#else
 	close(fd);
+#endif
+
 	if (nread < 1)
 		return (false); /* Error. */
 	/*

From 35a108c809038179b7d9932447c75e02171dc3e4 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 29 Oct 2016 22:14:55 -0700
Subject: [PATCH 0446/2608] Fix EXTRA_CFLAGS to not affect configuration.

---
 Makefile.in  | 3 ++-
 configure.ac | 6 ++----
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index 9d6b2dba..d13c7f10 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -24,7 +24,8 @@ abs_objroot := @abs_objroot@
 
 # Build parameters.
 CPPFLAGS := @CPPFLAGS@ -I$(srcroot)include -I$(objroot)include
-CFLAGS := @CFLAGS@
+EXTRA_CFLAGS := @EXTRA_CFLAGS@
+CFLAGS := @CFLAGS@ $(EXTRA_CFLAGS)
 LDFLAGS := @LDFLAGS@
 EXTRA_LDFLAGS := @EXTRA_LDFLAGS@
 LIBS := @LIBS@
diff --git a/configure.ac b/configure.ac
index d369d6cc..1a89ef1a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -203,10 +203,7 @@ if test "x$CFLAGS" = "x" ; then
     fi
   fi
 fi
-dnl Append EXTRA_CFLAGS to CFLAGS, if defined.
-if test "x$EXTRA_CFLAGS" != "x" ; then
-  JE_CFLAGS_APPEND([$EXTRA_CFLAGS])
-fi
+AC_SUBST([EXTRA_CFLAGS])
 AC_PROG_CPP
 
 AC_C_BIGENDIAN([ac_cv_big_endian=1], [ac_cv_big_endian=0])
@@ -1883,6 +1880,7 @@ AC_MSG_RESULT([])
 AC_MSG_RESULT([CONFIG             : ${CONFIG}])
 AC_MSG_RESULT([CC                 : ${CC}])
 AC_MSG_RESULT([CFLAGS             : ${CFLAGS}])
+AC_MSG_RESULT([EXTRA_CFLAGS       : ${EXTRA_CFLAGS}])
 AC_MSG_RESULT([CPPFLAGS           : ${CPPFLAGS}])
 AC_MSG_RESULT([LDFLAGS            : ${LDFLAGS}])
 AC_MSG_RESULT([EXTRA_LDFLAGS      : ${EXTRA_LDFLAGS}])

From c443b67561891ae68d688daf5f8ce37820cdba2b Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 29 Oct 2016 22:41:04 -0700
Subject: [PATCH 0447/2608] Use syscall(2) rather than {open,read,close}(2)
 during boot.

Some applications wrap various system calls, and if they call the
allocator in their wrappers, unexpected reentry can result.  This is not
a general solution (many other syscalls are spread throughout the code),
but this resolves a bootstrapping issue that is apparently common.

This resolves #443.
---
 src/pages.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/src/pages.c b/src/pages.c
index 05b0d690..84e22160 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -207,6 +207,11 @@ os_overcommits_sysctl(void)
 #endif
 
 #ifdef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY
+/*
+ * Use syscall(2) rather than {open,read,close}(2) when possible to avoid
+ * reentry during bootstrapping if another library has interposed system call
+ * wrappers.
+ */
 static bool
 os_overcommits_proc(void)
 {
@@ -214,12 +219,26 @@ os_overcommits_proc(void)
 	char buf[1];
 	ssize_t nread;
 
+#ifdef SYS_open
+	fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY);
+#else
 	fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
+#endif
 	if (fd == -1)
 		return (false); /* Error. */
 
+#ifdef SYS_read
+	nread = (ssize_t)syscall(SYS_read, fd, &buf, sizeof(buf));
+#else
 	nread = read(fd, &buf, sizeof(buf));
+#endif
+
+#ifdef SYS_close
+	syscall(SYS_close, fd);
+#else
 	close(fd);
+#endif
+
 	if (nread < 1)
 		return (false); /* Error. */
 	/*

From 6c80321aed4b620acba4ced1a7a5871377cbc396 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 29 Oct 2016 22:55:08 -0700
Subject: [PATCH 0448/2608] Use CLOCK_MONOTONIC_COARSE rather than
 COARSE_MONOTONIC_RAW.

The raw clock variant is slow (even relative to plain CLOCK_MONOTONIC),
whereas the coarse clock variant is faster than CLOCK_MONOTONIC, but
still has resolution (~1ms) that is adequate for our purposes.

This resolves #479.
---
 configure.ac                                         | 12 ++++++------
 .../jemalloc/internal/jemalloc_internal_defs.h.in    |  4 ++--
 src/nstime.c                                         |  4 ++--
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/configure.ac b/configure.ac
index 2dff55b2..b2616b9a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1312,16 +1312,16 @@ if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then
   fi
 fi
 
-dnl check for CLOCK_MONOTONIC_RAW (Linux-specific).
-JE_COMPILABLE([clock_gettime(CLOCK_MONOTONIC_RAW, ...)], [
+dnl check for CLOCK_MONOTONIC_COARSE (Linux-specific).
+JE_COMPILABLE([clock_gettime(CLOCK_MONOTONIC_COARSE, ...)], [
 #include <time.h>
 ], [
 	struct timespec ts;
 
-	clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
-], [je_cv_clock_monotonic_raw])
-if test "x${je_cv_clock_monotonic_raw}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_CLOCK_MONOTONIC_RAW])
+	clock_gettime(CLOCK_MONOTONIC_COARSE, &ts);
+], [je_cv_clock_monotonic_coarse])
+if test "x${je_cv_clock_monotonic_coarse}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE])
 fi
 
 dnl check for CLOCK_MONOTONIC.
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 70b32871..0ba960ba 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -77,9 +77,9 @@
 #undef JEMALLOC_HAVE_ISSETUGID
 
 /*
- * Defined if clock_gettime(CLOCK_MONOTONIC_RAW, ...) is available.
+ * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available.
  */
-#undef JEMALLOC_HAVE_CLOCK_MONOTONIC_RAW
+#undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE
 
 /*
  * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available.
diff --git a/src/nstime.c b/src/nstime.c
index c420c88d..0948e29f 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -110,14 +110,14 @@ nstime_get(nstime_t *time)
 
 	nstime_init(time, ticks_100ns * 100);
 }
-#elif JEMALLOC_HAVE_CLOCK_MONOTONIC_RAW
+#elif JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE
 #  define NSTIME_MONOTONIC true
 static void
 nstime_get(nstime_t *time)
 {
 	struct timespec ts;
 
-	clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
+	clock_gettime(CLOCK_MONOTONIC_COARSE, &ts);
 	nstime_init2(time, ts.tv_sec, ts.tv_nsec);
 }
 #elif JEMALLOC_HAVE_CLOCK_MONOTONIC

From 1d57c03e331ec9763ba55476a53aa1716f1bc8e1 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 29 Oct 2016 22:55:08 -0700
Subject: [PATCH 0449/2608] Use CLOCK_MONOTONIC_COARSE rather than
 COARSE_MONOTONIC_RAW.

The raw clock variant is slow (even relative to plain CLOCK_MONOTONIC),
whereas the coarse clock variant is faster than CLOCK_MONOTONIC, but
still has resolution (~1ms) that is adequate for our purposes.

This resolves #479.
---
 configure.ac                                         | 12 ++++++------
 .../jemalloc/internal/jemalloc_internal_defs.h.in    |  4 ++--
 src/nstime.c                                         |  4 ++--
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/configure.ac b/configure.ac
index 1a89ef1a..40681d16 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1342,16 +1342,16 @@ if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then
   fi
 fi
 
-dnl check for CLOCK_MONOTONIC_RAW (Linux-specific).
-JE_COMPILABLE([clock_gettime(CLOCK_MONOTONIC_RAW, ...)], [
+dnl check for CLOCK_MONOTONIC_COARSE (Linux-specific).
+JE_COMPILABLE([clock_gettime(CLOCK_MONOTONIC_COARSE, ...)], [
 #include <time.h>
 ], [
 	struct timespec ts;
 
-	clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
-], [je_cv_clock_monotonic_raw])
-if test "x${je_cv_clock_monotonic_raw}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_CLOCK_MONOTONIC_RAW])
+	clock_gettime(CLOCK_MONOTONIC_COARSE, &ts);
+], [je_cv_clock_monotonic_coarse])
+if test "x${je_cv_clock_monotonic_coarse}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE])
 fi
 
 dnl check for CLOCK_MONOTONIC.
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index d10c8a4f..6824ab74 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -77,9 +77,9 @@
 #undef JEMALLOC_HAVE_ISSETUGID
 
 /*
- * Defined if clock_gettime(CLOCK_MONOTONIC_RAW, ...) is available.
+ * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available.
  */
-#undef JEMALLOC_HAVE_CLOCK_MONOTONIC_RAW
+#undef JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE
 
 /*
  * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available.
diff --git a/src/nstime.c b/src/nstime.c
index c420c88d..0948e29f 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -110,14 +110,14 @@ nstime_get(nstime_t *time)
 
 	nstime_init(time, ticks_100ns * 100);
 }
-#elif JEMALLOC_HAVE_CLOCK_MONOTONIC_RAW
+#elif JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE
 #  define NSTIME_MONOTONIC true
 static void
 nstime_get(nstime_t *time)
 {
 	struct timespec ts;
 
-	clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
+	clock_gettime(CLOCK_MONOTONIC_COARSE, &ts);
 	nstime_init2(time, ts.tv_sec, ts.tv_nsec);
 }
 #elif JEMALLOC_HAVE_CLOCK_MONOTONIC

From 6a834d94bb863d1abd0175a07e98f4b9797fa435 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 31 Oct 2016 11:45:41 -0700
Subject: [PATCH 0450/2608] Refactor witness_unlock() to fix undefined test
 behavior.

This resolves #396.
---
 include/jemalloc/internal/private_symbols.txt |  1 +
 include/jemalloc/internal/witness.h           | 39 +++++++++++++------
 2 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 1bf79ca8..315d2872 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -530,6 +530,7 @@ witness_lock
 witness_lock_error
 witness_lockless_error
 witness_not_owner_error
+witness_owner
 witness_owner_error
 witness_postfork_child
 witness_postfork_parent
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 9a2a6760..3bc5ebe8 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -112,6 +112,7 @@ void	witness_postfork_child(tsd_t *tsd);
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
+bool	witness_owner(tsd_t *tsd, const witness_t *witness);
 void	witness_assert_owner(tsdn_t *tsdn, const witness_t *witness);
 void	witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness);
 void	witness_assert_lockless(tsdn_t *tsdn);
@@ -120,12 +121,25 @@ void	witness_unlock(tsdn_t *tsdn, witness_t *witness);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_))
+JEMALLOC_INLINE bool
+witness_owner(tsd_t *tsd, const witness_t *witness)
+{
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	witnesses = tsd_witnessesp_get(tsd);
+	ql_foreach(w, witnesses, link) {
+		if (w == witness)
+			return (true);
+	}
+
+	return (false);
+}
+
 JEMALLOC_INLINE void
 witness_assert_owner(tsdn_t *tsdn, const witness_t *witness)
 {
 	tsd_t *tsd;
-	witness_list_t *witnesses;
-	witness_t *w;
 
 	if (!config_debug)
 		return;
@@ -136,11 +150,8 @@ witness_assert_owner(tsdn_t *tsdn, const witness_t *witness)
 	if (witness->rank == WITNESS_RANK_OMIT)
 		return;
 
-	witnesses = tsd_witnessesp_get(tsd);
-	ql_foreach(w, witnesses, link) {
-		if (w == witness)
-			return;
-	}
+	if (witness_owner(tsd, witness))
+		return;
 	witness_owner_error(witness);
 }
 
@@ -243,10 +254,16 @@ witness_unlock(tsdn_t *tsdn, witness_t *witness)
 	if (witness->rank == WITNESS_RANK_OMIT)
 		return;
 
-	witness_assert_owner(tsdn, witness);
-
-	witnesses = tsd_witnessesp_get(tsd);
-	ql_remove(witnesses, witness, link);
+	/*
+	 * Check whether owner before removal, rather than relying on
+	 * witness_assert_owner() to abort, so that unit tests can test this
+	 * function's failure mode without causing undefined behavior.
+	 */
+	if (witness_owner(tsd, witness)) {
+		witnesses = tsd_witnessesp_get(tsd);
+		ql_remove(witnesses, witness, link);
+	} else
+		witness_assert_owner(tsdn, witness);
 }
 #endif
 

From 4752a54eebe1945d1cf9eeecaccbca3ed743f240 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 31 Oct 2016 11:45:41 -0700
Subject: [PATCH 0451/2608] Refactor witness_unlock() to fix undefined test
 behavior.

This resolves #396.
---
 include/jemalloc/internal/private_symbols.txt |  1 +
 include/jemalloc/internal/witness.h           | 39 +++++++++++++------
 2 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 62211790..09ff8324 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -607,6 +607,7 @@ witness_lock
 witness_lock_error
 witness_lockless_error
 witness_not_owner_error
+witness_owner
 witness_owner_error
 witness_postfork_child
 witness_postfork_parent
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index d78dca2d..cdf15d79 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -108,6 +108,7 @@ void	witness_postfork_child(tsd_t *tsd);
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
+bool	witness_owner(tsd_t *tsd, const witness_t *witness);
 void	witness_assert_owner(tsdn_t *tsdn, const witness_t *witness);
 void	witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness);
 void	witness_assert_lockless(tsdn_t *tsdn);
@@ -116,12 +117,25 @@ void	witness_unlock(tsdn_t *tsdn, witness_t *witness);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_))
+JEMALLOC_INLINE bool
+witness_owner(tsd_t *tsd, const witness_t *witness)
+{
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	witnesses = tsd_witnessesp_get(tsd);
+	ql_foreach(w, witnesses, link) {
+		if (w == witness)
+			return (true);
+	}
+
+	return (false);
+}
+
 JEMALLOC_INLINE void
 witness_assert_owner(tsdn_t *tsdn, const witness_t *witness)
 {
 	tsd_t *tsd;
-	witness_list_t *witnesses;
-	witness_t *w;
 
 	if (!config_debug)
 		return;
@@ -132,11 +146,8 @@ witness_assert_owner(tsdn_t *tsdn, const witness_t *witness)
 	if (witness->rank == WITNESS_RANK_OMIT)
 		return;
 
-	witnesses = tsd_witnessesp_get(tsd);
-	ql_foreach(w, witnesses, link) {
-		if (w == witness)
-			return;
-	}
+	if (witness_owner(tsd, witness))
+		return;
 	witness_owner_error(witness);
 }
 
@@ -238,10 +249,16 @@ witness_unlock(tsdn_t *tsdn, witness_t *witness)
 	if (witness->rank == WITNESS_RANK_OMIT)
 		return;
 
-	witness_assert_owner(tsdn, witness);
-
-	witnesses = tsd_witnessesp_get(tsd);
-	ql_remove(witnesses, witness, link);
+	/*
+	 * Check whether owner before removal, rather than relying on
+	 * witness_assert_owner() to abort, so that unit tests can test this
+	 * function's failure mode without causing undefined behavior.
+	 */
+	if (witness_owner(tsd, witness)) {
+		witnesses = tsd_witnessesp_get(tsd);
+		ql_remove(witnesses, witness, link);
+	} else
+		witness_assert_owner(tsdn, witness);
 }
 #endif
 

From 90b60eeae4f557fae99158f9899bd7b01a9ac662 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 31 Oct 2016 15:28:22 -0700
Subject: [PATCH 0452/2608] Add an assertion in witness_owner().

---
 include/jemalloc/internal/witness.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 3bc5ebe8..26024ac2 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -121,12 +121,15 @@ void	witness_unlock(tsdn_t *tsdn, witness_t *witness);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_))
+/* Helper, not intended for direct use. */
 JEMALLOC_INLINE bool
 witness_owner(tsd_t *tsd, const witness_t *witness)
 {
 	witness_list_t *witnesses;
 	witness_t *w;
 
+	cassert(config_debug);
+
 	witnesses = tsd_witnessesp_get(tsd);
 	ql_foreach(w, witnesses, link) {
 		if (w == witness)

From b93f63b3ebab068efdcfa2ea3e19055bfc2e7f82 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 31 Oct 2016 16:32:33 -0700
Subject: [PATCH 0453/2608] Fix extent_rtree acquire() to release element on
 error.

This resolves #480.
---
 src/extent.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/extent.c b/src/extent.c
index e4ceb8fd..809777a1 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -229,8 +229,10 @@ extent_rtree_acquire(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
 		*r_elm_b = rtree_elm_acquire(tsdn, &extents_rtree, rtree_ctx,
 		    (uintptr_t)extent_last_get(extent), dependent,
 		    init_missing);
-		if (!dependent && *r_elm_b == NULL)
+		if (!dependent && *r_elm_b == NULL) {
+			rtree_elm_release(tsdn, &extents_rtree, *r_elm_a);
 			return (true);
+		}
 		assert(*r_elm_b != NULL);
 	} else
 		*r_elm_b = NULL;

From 0ba5b9b6189e16a983d8922d8c5cb6ab421906e8 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 31 Oct 2016 22:30:49 -0700
Subject: [PATCH 0454/2608] Add "J" (JSON) support to malloc_stats_print().

This resolves #474.
---
 doc/jemalloc.xml.in |   44 +-
 src/stats.c         | 1061 +++++++++++++++++++++++++++++--------------
 2 files changed, 754 insertions(+), 351 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 5ba44d23..22b3d803 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -410,28 +410,28 @@ for (i = 0; i < nbins; i++) {
 	/* Do something with bin_size... */
 }]]></programlisting></para>
 
-      <para>The <function>malloc_stats_print()</function> function
-      writes human-readable summary statistics via the
-      <parameter>write_cb</parameter> callback function pointer and
-      <parameter>cbopaque</parameter> data passed to
-      <parameter>write_cb</parameter>, or
-      <function>malloc_message()</function> if
-      <parameter>write_cb</parameter> is <constant>NULL</constant>.  This
-      function can be called repeatedly.  General information that never changes
-      during execution can be omitted by specifying "g" as a character within
-      the <parameter>opts</parameter> string.  Note that
-      <function>malloc_message()</function> uses the
-      <function>mallctl*()</function> functions internally, so
-      inconsistent statistics can be reported if multiple threads use these
-      functions simultaneously.  If <option>--enable-stats</option> is specified
-      during configuration, &ldquo;m&rdquo; and &ldquo;a&rdquo; can be specified
-      to omit merged arena and per arena statistics, respectively;
-      &ldquo;b&rdquo; and &ldquo;l&rdquo; can be specified to omit per size
-      class statistics for bins and large objects, respectively.  Unrecognized
-      characters are silently ignored.  Note that thread caching may prevent
-      some statistics from being completely up to date, since extra locking
-      would be required to merge counters that track thread cache
-      operations.</para>
+      <para>The <function>malloc_stats_print()</function> function writes
+      summary statistics via the <parameter>write_cb</parameter> callback
+      function pointer and <parameter>cbopaque</parameter> data passed to
+      <parameter>write_cb</parameter>, or <function>malloc_message()</function>
+      if <parameter>write_cb</parameter> is <constant>NULL</constant>.  The
+      statistics are presented in human-readable form unless "J" is specified as
+      a character within the <parameter>opts</parameter> string, in which case
+      the statistics are presented in <ulink url="http://www.json.org/">JSON
+      format</ulink>.  This function can be called repeatedly.  General
+      information that never changes during execution can be omitted by
+      specifying "g" as a character within the <parameter>opts</parameter>
+      string.  Note that <function>malloc_message()</function> uses the
+      <function>mallctl*()</function> functions internally, so inconsistent
+      statistics can be reported if multiple threads use these functions
+      simultaneously.  If <option>--enable-stats</option> is specified during
+      configuration, &ldquo;m&rdquo; and &ldquo;a&rdquo; can be specified to
+      omit merged arena and per arena statistics, respectively; &ldquo;b&rdquo;
+      and &ldquo;l&rdquo; can be specified to omit per size class statistics for
+      bins and large objects, respectively.  Unrecognized characters are
+      silently ignored.  Note that thread caching may prevent some statistics
+      from being completely up to date, since extra locking would be required to
+      merge counters that track thread cache operations.</para>
 
       <para>The <function>malloc_usable_size()</function> function
       returns the usable size of the allocation pointed to by
diff --git a/src/stats.c b/src/stats.c
index 689299fa..dbff6c27 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -30,84 +30,108 @@
 
 bool	opt_stats_print = false;
 
-/******************************************************************************/
-/* Function prototypes for non-inline static functions. */
-
-static void	stats_arena_bins_print(void (*write_cb)(void *, const char *),
-    void *cbopaque, unsigned i);
-static void	stats_arena_lextents_print(
-    void (*write_cb)(void *, const char *), void *cbopaque, unsigned i);
-static void	stats_arena_print(void (*write_cb)(void *, const char *),
-    void *cbopaque, unsigned i, bool bins, bool large);
-
 /******************************************************************************/
 
 static void
 stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
-    unsigned i)
+    bool json, bool large, unsigned i)
 {
 	size_t page;
-	bool config_tcache, in_gap;
+	bool config_tcache, in_gap, in_gap_prev;
 	unsigned nbins, j;
 
 	CTL_GET("arenas.page", &page, size_t);
 
-	CTL_GET("config.tcache", &config_tcache, bool);
-	if (config_tcache) {
-		malloc_cprintf(write_cb, cbopaque,
-		    "bins:           size ind    allocated      nmalloc"
-		    "      ndalloc    nrequests      curregs     curslabs regs"
-		    " pgs  util       nfills     nflushes     newslabs"
-		    "      reslabs\n");
-	} else {
-		malloc_cprintf(write_cb, cbopaque,
-		    "bins:           size ind    allocated      nmalloc"
-		    "      ndalloc    nrequests      curregs     curslabs regs"
-		    " pgs  util     newslabs      reslabs\n");
-	}
 	CTL_GET("arenas.nbins", &nbins, unsigned);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"bins\": [\n");
+	} else {
+		CTL_GET("config.tcache", &config_tcache, bool);
+		if (config_tcache) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "bins:           size ind    allocated      nmalloc"
+			    "      ndalloc    nrequests      curregs"
+			    "     curslabs regs pgs  util       nfills"
+			    "     nflushes     newslabs      reslabs\n");
+		} else {
+			malloc_cprintf(write_cb, cbopaque,
+			    "bins:           size ind    allocated      nmalloc"
+			    "      ndalloc    nrequests      curregs"
+			    "     curslabs regs pgs  util     newslabs"
+			    "      reslabs\n");
+		}
+	}
 	for (j = 0, in_gap = false; j < nbins; j++) {
 		uint64_t nslabs;
+		size_t reg_size, slab_size, curregs;
+		size_t curslabs;
+		uint32_t nregs;
+		uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes;
+		uint64_t nreslabs;
 
 		CTL_M2_M4_GET("stats.arenas.0.bins.0.nslabs", i, j, &nslabs,
 		    uint64_t);
+		in_gap_prev = in_gap;
 		if (nslabs == 0)
 			in_gap = true;
-		else {
-			size_t reg_size, slab_size, curregs, availregs, milli;
-			size_t curslabs;
-			uint32_t nregs;
-			uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes;
-			uint64_t reslabs;
-			char util[6]; /* "x.yyy". */
 
-			if (in_gap) {
-				malloc_cprintf(write_cb, cbopaque,
-				    "                     ---\n");
-				in_gap = false;
-			}
-			CTL_M2_GET("arenas.bin.0.size", j, &reg_size, size_t);
-			CTL_M2_GET("arenas.bin.0.nregs", j, &nregs, uint32_t);
-			CTL_M2_GET("arenas.bin.0.slab_size", j, &slab_size,
-			    size_t);
-			CTL_M2_M4_GET("stats.arenas.0.bins.0.nmalloc", i, j,
-			    &nmalloc, uint64_t);
-			CTL_M2_M4_GET("stats.arenas.0.bins.0.ndalloc", i, j,
-			    &ndalloc, uint64_t);
-			CTL_M2_M4_GET("stats.arenas.0.bins.0.curregs", i, j,
-			    &curregs, size_t);
-			CTL_M2_M4_GET("stats.arenas.0.bins.0.nrequests", i, j,
-			    &nrequests, uint64_t);
+		if (!json && in_gap_prev && !in_gap) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "                     ---\n");
+		}
+
+		CTL_M2_GET("arenas.bin.0.size", j, &reg_size, size_t);
+		CTL_M2_GET("arenas.bin.0.nregs", j, &nregs, uint32_t);
+		CTL_M2_GET("arenas.bin.0.slab_size", j, &slab_size, size_t);
+
+		CTL_M2_M4_GET("stats.arenas.0.bins.0.nmalloc", i, j, &nmalloc,
+		    uint64_t);
+		CTL_M2_M4_GET("stats.arenas.0.bins.0.ndalloc", i, j, &ndalloc,
+		    uint64_t);
+		CTL_M2_M4_GET("stats.arenas.0.bins.0.curregs", i, j, &curregs,
+		    size_t);
+		CTL_M2_M4_GET("stats.arenas.0.bins.0.nrequests", i, j,
+		    &nrequests, uint64_t);
+		if (config_tcache) {
+			CTL_M2_M4_GET("stats.arenas.0.bins.0.nfills", i, j,
+			    &nfills, uint64_t);
+			CTL_M2_M4_GET("stats.arenas.0.bins.0.nflushes", i, j,
+			    &nflushes, uint64_t);
+		}
+		CTL_M2_M4_GET("stats.arenas.0.bins.0.nreslabs", i, j, &nreslabs,
+		    uint64_t);
+		CTL_M2_M4_GET("stats.arenas.0.bins.0.curslabs", i, j, &curslabs,
+		    size_t);
+
+		if (json) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t\t{\n"
+			    "\t\t\t\t\t\t\"nmalloc\": %"FMTu64",\n"
+			    "\t\t\t\t\t\t\"ndalloc\": %"FMTu64",\n"
+			    "\t\t\t\t\t\t\"curregs\": %zu,\n"
+			    "\t\t\t\t\t\t\"nrequests\": %"FMTu64",\n",
+			    nmalloc,
+			    ndalloc,
+			    curregs,
+			    nrequests);
 			if (config_tcache) {
-				CTL_M2_M4_GET("stats.arenas.0.bins.0.nfills", i,
-				    j, &nfills, uint64_t);
-				CTL_M2_M4_GET("stats.arenas.0.bins.0.nflushes",
-				    i, j, &nflushes, uint64_t);
+				malloc_cprintf(write_cb, cbopaque,
+				    "\t\t\t\t\t\t\"nfills\": %"FMTu64",\n"
+				    "\t\t\t\t\t\t\"nflushes\": %"FMTu64",\n",
+				    nfills,
+				    nflushes);
 			}
-			CTL_M2_M4_GET("stats.arenas.0.bins.0.nreslabs", i, j,
-			    &reslabs, uint64_t);
-			CTL_M2_M4_GET("stats.arenas.0.bins.0.curslabs", i, j,
-			    &curslabs, size_t);
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t\t\t\"nreslabs\": %"FMTu64",\n"
+			    "\t\t\t\t\t\t\"curslabs\": %zu\n"
+			    "\t\t\t\t\t}%s\n",
+			    nreslabs,
+			    curslabs,
+			    (j + 1 < nbins) ? "," : "");
+		} else if (!in_gap) {
+			size_t availregs, milli;
+			char util[6]; /* "x.yyy". */
 
 			availregs = nregs * curslabs;
 			milli = (availregs != 0) ? (1000 * curregs) / availregs
@@ -134,7 +158,7 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 				    reg_size, j, curregs * reg_size, nmalloc,
 				    ndalloc, nrequests, curregs, curslabs,
 				    nregs, slab_size / page, util, nfills,
-				    nflushes, nslabs, reslabs);
+				    nflushes, nslabs, nreslabs);
 			} else {
 				malloc_cprintf(write_cb, cbopaque,
 				    "%20zu %3u %12zu %12"FMTu64
@@ -144,28 +168,38 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 				    reg_size, j, curregs * reg_size, nmalloc,
 				    ndalloc, nrequests, curregs, curslabs,
 				    nregs, slab_size / page, util, nslabs,
-				    reslabs);
+				    nreslabs);
 			}
 		}
 	}
-	if (in_gap) {
+	if (json) {
 		malloc_cprintf(write_cb, cbopaque,
-		    "                     ---\n");
+		    "\t\t\t\t]%s\n", large ? "," : "");
+	} else {
+		if (in_gap) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "                     ---\n");
+		}
 	}
 }
 
 static void
 stats_arena_lextents_print(void (*write_cb)(void *, const char *),
-    void *cbopaque, unsigned i)
+    void *cbopaque, bool json, unsigned i)
 {
 	unsigned nbins, nlextents, j;
-	bool in_gap;
+	bool in_gap, in_gap_prev;
 
-	malloc_cprintf(write_cb, cbopaque,
-	    "large:          size ind    allocated      nmalloc      ndalloc"
-	    "    nrequests  curlextents\n");
 	CTL_GET("arenas.nbins", &nbins, unsigned);
 	CTL_GET("arenas.nlextents", &nlextents, unsigned);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"lextents\": [\n");
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "large:          size ind    allocated      nmalloc"
+		    "      ndalloc    nrequests  curlextents\n");
+	}
 	for (j = 0, in_gap = false; j < nlextents; j++) {
 		uint64_t nmalloc, ndalloc, nrequests;
 		size_t lextent_size, curlextents;
@@ -176,18 +210,26 @@ stats_arena_lextents_print(void (*write_cb)(void *, const char *),
 		    &ndalloc, uint64_t);
 		CTL_M2_M4_GET("stats.arenas.0.lextents.0.nrequests", i, j,
 		    &nrequests, uint64_t);
+		in_gap_prev = in_gap;
 		if (nrequests == 0)
 			in_gap = true;
-		else {
-			CTL_M2_GET("arenas.lextent.0.size", j, &lextent_size,
-			    size_t);
-			CTL_M2_M4_GET("stats.arenas.0.lextents.0.curlextents",
-			    i, j, &curlextents, size_t);
-			if (in_gap) {
-				malloc_cprintf(write_cb, cbopaque,
-				    "                     ---\n");
-				in_gap = false;
-			}
+
+		if (!json && in_gap_prev && !in_gap) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "                     ---\n");
+		}
+
+		CTL_M2_GET("arenas.lextent.0.size", j, &lextent_size, size_t);
+		CTL_M2_M4_GET("stats.arenas.0.lextents.0.curlextents", i, j,
+		    &curlextents, size_t);
+		if (json) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t\t{\n"
+			    "\t\t\t\t\t\t\"curlextents\": %zu\n"
+			    "\t\t\t\t\t}%s\n",
+			    curlextents,
+			    (j + 1 < nlextents) ? "," : "");
+		} else if (!in_gap) {
 			malloc_cprintf(write_cb, cbopaque,
 			    "%20zu %3u %12zu %12"FMTu64" %12"FMTu64
 			    " %12"FMTu64" %12zu\n",
@@ -196,15 +238,20 @@ stats_arena_lextents_print(void (*write_cb)(void *, const char *),
 			    nrequests, curlextents);
 		}
 	}
-	if (in_gap) {
+	if (json) {
 		malloc_cprintf(write_cb, cbopaque,
-		    "                     ---\n");
+		    "\t\t\t\t]\n");
+	} else {
+		if (in_gap) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "                     ---\n");
+		}
 	}
 }
 
 static void
 stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
-    unsigned i, bool bins, bool large)
+    bool json, unsigned i, bool bins, bool large)
 {
 	unsigned nthreads;
 	const char *dss;
@@ -219,70 +266,616 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	CTL_GET("arenas.page", &page, size_t);
 
 	CTL_M2_GET("stats.arenas.0.nthreads", i, &nthreads, unsigned);
-	malloc_cprintf(write_cb, cbopaque,
-	    "assigned threads: %u\n", nthreads);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"nthreads\": %u,\n", nthreads);
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "assigned threads: %u\n", nthreads);
+	}
+
 	CTL_M2_GET("stats.arenas.0.dss", i, &dss, const char *);
-	malloc_cprintf(write_cb, cbopaque, "dss allocation precedence: %s\n",
-	    dss);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"dss\": \"%s\",\n", dss);
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "dss allocation precedence: %s\n", dss);
+	}
+
 	CTL_M2_GET("stats.arenas.0.decay_time", i, &decay_time, ssize_t);
-	if (decay_time >= 0) {
-		malloc_cprintf(write_cb, cbopaque, "decay time: %zd\n",
-		    decay_time);
-	} else
-		malloc_cprintf(write_cb, cbopaque, "decay time: N/A\n");
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"decay_time\": %zd,\n", decay_time);
+	} else {
+		if (decay_time >= 0) {
+			malloc_cprintf(write_cb, cbopaque, "decay time: %zd\n",
+			    decay_time);
+		} else
+			malloc_cprintf(write_cb, cbopaque, "decay time: N/A\n");
+	}
+
 	CTL_M2_GET("stats.arenas.0.pactive", i, &pactive, size_t);
 	CTL_M2_GET("stats.arenas.0.pdirty", i, &pdirty, size_t);
 	CTL_M2_GET("stats.arenas.0.npurge", i, &npurge, uint64_t);
 	CTL_M2_GET("stats.arenas.0.nmadvise", i, &nmadvise, uint64_t);
 	CTL_M2_GET("stats.arenas.0.purged", i, &purged, uint64_t);
-	malloc_cprintf(write_cb, cbopaque,
-	    "purging: dirty: %zu, sweeps: %"FMTu64", madvises: %"FMTu64", "
-	    "purged: %"FMTu64"\n", pdirty, npurge, nmadvise, purged);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"pactive\": %zu,\n", pactive);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"pdirty\": %zu,\n", pdirty);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"npurge\": %"FMTu64",\n", npurge);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"nmadvise\": %"FMTu64",\n", nmadvise);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"purged\": %"FMTu64",\n", purged);
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "purging: dirty: %zu, sweeps: %"FMTu64", madvises: %"FMTu64
+		    ", ""purged: %"FMTu64"\n", pdirty, npurge, nmadvise,
+		    purged);
+	}
 
-	malloc_cprintf(write_cb, cbopaque,
-	    "                            allocated      nmalloc      ndalloc"
-	    "    nrequests\n");
 	CTL_M2_GET("stats.arenas.0.small.allocated", i, &small_allocated,
 	    size_t);
 	CTL_M2_GET("stats.arenas.0.small.nmalloc", i, &small_nmalloc, uint64_t);
 	CTL_M2_GET("stats.arenas.0.small.ndalloc", i, &small_ndalloc, uint64_t);
 	CTL_M2_GET("stats.arenas.0.small.nrequests", i, &small_nrequests,
 	    uint64_t);
-	malloc_cprintf(write_cb, cbopaque,
-	    "small:                   %12zu %12"FMTu64" %12"FMTu64
-	    " %12"FMTu64"\n",
-	    small_allocated, small_nmalloc, small_ndalloc, small_nrequests);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"small\": {\n");
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"allocated\": %zu,\n", small_allocated);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"nmalloc\": %"FMTu64",\n", small_nmalloc);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"ndalloc\": %"FMTu64",\n", small_ndalloc);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"nrequests\": %"FMTu64"\n", small_nrequests);
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t},\n");
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "                            allocated      nmalloc"
+		    "      ndalloc    nrequests\n");
+		malloc_cprintf(write_cb, cbopaque,
+		    "small:                   %12zu %12"FMTu64" %12"FMTu64
+		    " %12"FMTu64"\n",
+		    small_allocated, small_nmalloc, small_ndalloc,
+		    small_nrequests);
+	}
+
 	CTL_M2_GET("stats.arenas.0.large.allocated", i, &large_allocated,
 	    size_t);
 	CTL_M2_GET("stats.arenas.0.large.nmalloc", i, &large_nmalloc, uint64_t);
 	CTL_M2_GET("stats.arenas.0.large.ndalloc", i, &large_ndalloc, uint64_t);
 	CTL_M2_GET("stats.arenas.0.large.nrequests", i, &large_nrequests,
 	    uint64_t);
-	malloc_cprintf(write_cb, cbopaque,
-	    "large:                   %12zu %12"FMTu64" %12"FMTu64
-	    " %12"FMTu64"\n",
-	    large_allocated, large_nmalloc, large_ndalloc, large_nrequests);
-	malloc_cprintf(write_cb, cbopaque,
-	    "total:                   %12zu %12"FMTu64" %12"FMTu64
-	    " %12"FMTu64"\n",
-	    small_allocated + large_allocated, small_nmalloc + large_nmalloc,
-	    small_ndalloc + large_ndalloc, small_nrequests + large_nrequests);
-	malloc_cprintf(write_cb, cbopaque,
-	    "active:                  %12zu\n", pactive * page);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"large\": {\n");
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"allocated\": %zu,\n", large_allocated);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"nmalloc\": %"FMTu64",\n", large_nmalloc);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"ndalloc\": %"FMTu64",\n", large_ndalloc);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"nrequests\": %"FMTu64"\n", large_nrequests);
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t},\n");
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "large:                   %12zu %12"FMTu64" %12"FMTu64
+		    " %12"FMTu64"\n",
+		    large_allocated, large_nmalloc, large_ndalloc,
+		    large_nrequests);
+		malloc_cprintf(write_cb, cbopaque,
+		    "total:                   %12zu %12"FMTu64" %12"FMTu64
+		    " %12"FMTu64"\n",
+		    small_allocated + large_allocated, small_nmalloc +
+		    large_nmalloc, small_ndalloc + large_ndalloc,
+		    small_nrequests + large_nrequests);
+	}
+	if (!json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "active:                  %12zu\n", pactive * page);
+	}
+
 	CTL_M2_GET("stats.arenas.0.mapped", i, &mapped, size_t);
-	malloc_cprintf(write_cb, cbopaque,
-	    "mapped:                  %12zu\n", mapped);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"mapped\": %zu,\n", mapped);
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "mapped:                  %12zu\n", mapped);
+	}
+
 	CTL_M2_GET("stats.arenas.0.retained", i, &retained, size_t);
-	malloc_cprintf(write_cb, cbopaque,
-	    "retained:                %12zu\n", retained);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"retained\": %zu,\n", retained);
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "retained:                %12zu\n", retained);
+	}
+
 	CTL_M2_GET("stats.arenas.0.metadata", i, &metadata, size_t);
-	malloc_cprintf(write_cb, cbopaque, "metadata:                %12zu\n",
-	    metadata);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"metatata\": %zu%s\n", metadata, (bins || large) ?
+		    "," : "");
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "metadata:                %12zu\n", metadata);
+	}
 
 	if (bins)
-		stats_arena_bins_print(write_cb, cbopaque, i);
+		stats_arena_bins_print(write_cb, cbopaque, json, large, i);
 	if (large)
-		stats_arena_lextents_print(write_cb, cbopaque, i);
+		stats_arena_lextents_print(write_cb, cbopaque, json, i);
+}
+
+static void
+stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
+    bool json, bool merged, bool unmerged)
+{
+	const char *cpv;
+	bool bv;
+	unsigned uv;
+	uint32_t u32v;
+	uint64_t u64v;
+	ssize_t ssv;
+	size_t sv, bsz, usz, ssz, sssz, cpsz;
+
+	bsz = sizeof(bool);
+	usz = sizeof(unsigned);
+	ssz = sizeof(size_t);
+	sssz = sizeof(ssize_t);
+	cpsz = sizeof(const char *);
+
+	CTL_GET("version", &cpv, const char *);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		"\t\t\"version\": \"%s\",\n", cpv);
+	} else
+		malloc_cprintf(write_cb, cbopaque, "Version: %s\n", cpv);
+
+	/* config. */
+#define	CONFIG_WRITE_BOOL_JSON(n, c)					\
+	if (json) {							\
+		CTL_GET("config."#n, &bv, bool);			\
+		malloc_cprintf(write_cb, cbopaque,			\
+		    "\t\t\t\""#n"\": %s%s\n", bv ? "true" : "false",	\
+		    (c));						\
+	}
+
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\"config\": {\n");
+	}
+
+	CONFIG_WRITE_BOOL_JSON(cache_oblivious, ",")
+
+	CTL_GET("config.debug", &bv, bool);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"debug\": %s,\n", bv ? "true" : "false");
+	} else {
+		malloc_cprintf(write_cb, cbopaque, "Assertions %s\n",
+		    bv ? "enabled" : "disabled");
+	}
+
+	CONFIG_WRITE_BOOL_JSON(fill, ",")
+	CONFIG_WRITE_BOOL_JSON(lazy_lock, ",")
+
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"malloc_conf\": \"%s\",\n",
+		    config_malloc_conf);
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "config.malloc_conf: \"%s\"\n", config_malloc_conf);
+	}
+
+	CONFIG_WRITE_BOOL_JSON(munmap, ",")
+	CONFIG_WRITE_BOOL_JSON(prof, ",")
+	CONFIG_WRITE_BOOL_JSON(prof_libgcc, ",")
+	CONFIG_WRITE_BOOL_JSON(prof_libunwind, ",")
+	CONFIG_WRITE_BOOL_JSON(stats, ",")
+	CONFIG_WRITE_BOOL_JSON(tcache, ",")
+	CONFIG_WRITE_BOOL_JSON(tls, ",")
+	CONFIG_WRITE_BOOL_JSON(utrace, ",")
+	CONFIG_WRITE_BOOL_JSON(xmalloc, "")
+
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t},\n");
+	}
+#undef CONFIG_WRITE_BOOL_JSON
+
+	/* opt. */
+#define	OPT_WRITE_BOOL(n, c)						\
+	if (je_mallctl("opt."#n, (void *)&bv, &bsz, NULL, 0) == 0) {	\
+		if (json) {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "\t\t\t\""#n"\": %s%s\n", bv ? "true" :	\
+			    "false", (c));				\
+		} else {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "  opt."#n": %s\n", bv ? "true" : "false");	\
+		}							\
+	}
+#define	OPT_WRITE_BOOL_MUTABLE(n, m, c) {				\
+	bool bv2;							\
+	if (je_mallctl("opt."#n, (void *)&bv, &bsz, NULL, 0) == 0 &&	\
+	    je_mallctl(#m, &bv2, &bsz, NULL, 0) == 0) {			\
+		if (json) {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "\t\t\t\""#n"\": %s%s\n", bv ? "true" :	\
+			    "false", (c));				\
+		} else {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "  opt."#n": %s ("#m": %s)\n", bv ? "true"	\
+			    : "false", bv2 ? "true" : "false");		\
+		}							\
+	}								\
+}
+#define	OPT_WRITE_UNSIGNED(n, c)					\
+	if (je_mallctl("opt."#n, (void *)&uv, &usz, NULL, 0) == 0) {	\
+		if (json) {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "\t\t\t\""#n"\": %u%s\n", uv, (c));		\
+		} else {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			"  opt."#n": %u\n", uv);			\
+		}							\
+	}
+#define	OPT_WRITE_SIZE_T(n)						\
+	if (je_mallctl("opt."#n, (void *)&sv, &ssz, NULL, 0) == 0) {	\
+		if (json) {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "\t\t\t\""#n"\": %zu%s\n", sv, (c));	\
+		} else {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			"  opt."#n": %zu\n", sv);			\
+		}							\
+	}
+#define	OPT_WRITE_SSIZE_T(n, c)						\
+	if (je_mallctl("opt."#n, (void *)&ssv, &sssz, NULL, 0) == 0) {	\
+		if (json) {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "\t\t\t\""#n"\": %zd%s\n", ssv, (c));	\
+		} else {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "  opt."#n": %zd\n", ssv);			\
+		}							\
+	}
+#define	OPT_WRITE_SSIZE_T_MUTABLE(n, m, c) {				\
+	ssize_t ssv2;							\
+	if (je_mallctl("opt."#n, (void *)&ssv, &sssz, NULL, 0) == 0 &&	\
+	    je_mallctl(#m, &ssv2, &sssz, NULL, 0) == 0) {		\
+		if (json) {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "\t\t\t\""#n"\": %zd%s\n", ssv, (c));	\
+		} else {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "  opt."#n": %zd ("#m": %zd)\n",		\
+			    ssv, ssv2);					\
+		}							\
+	}								\
+}
+#define	OPT_WRITE_CHAR_P(n, c)						\
+	if (je_mallctl("opt."#n, (void *)&cpv, &cpsz, NULL, 0) == 0) {	\
+		if (json) {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "\t\t\t\""#n"\": \"%s\"%s\n", cpv, (c));	\
+		} else {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "  opt."#n": \"%s\"\n", cpv);		\
+		}							\
+	}
+
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\"opt\": {\n");
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "Run-time option settings:\n");
+	}
+	OPT_WRITE_BOOL(abort, ",")
+	OPT_WRITE_CHAR_P(dss, ",")
+	OPT_WRITE_UNSIGNED(narenas, ",")
+	OPT_WRITE_CHAR_P(purge, ",")
+	OPT_WRITE_SSIZE_T_MUTABLE(decay_time, arenas.decay_time, ",")
+	OPT_WRITE_CHAR_P(junk, ",")
+	OPT_WRITE_BOOL(zero, ",")
+	OPT_WRITE_BOOL(utrace, ",")
+	OPT_WRITE_BOOL(xmalloc, ",")
+	OPT_WRITE_BOOL(tcache, ",")
+	OPT_WRITE_SSIZE_T(lg_tcache_max, ",")
+	OPT_WRITE_BOOL(prof, ",")
+	OPT_WRITE_CHAR_P(prof_prefix, ",")
+	OPT_WRITE_BOOL_MUTABLE(prof_active, prof.active, ",")
+	OPT_WRITE_BOOL_MUTABLE(prof_thread_active_init, prof.thread_active_init,
+	    ",")
+	OPT_WRITE_SSIZE_T_MUTABLE(lg_prof_sample, prof.lg_sample, ",")
+	OPT_WRITE_BOOL(prof_accum, ",")
+	OPT_WRITE_SSIZE_T(lg_prof_interval, ",")
+	OPT_WRITE_BOOL(prof_gdump, ",")
+	OPT_WRITE_BOOL(prof_final, ",")
+	OPT_WRITE_BOOL(prof_leak, ",")
+	/*
+	 * stats_print is always emitted, so as long as stats_print comes last
+	 * it's safe to unconditionally omit the comma here (rather than having
+	 * to conditionally omit it elsewhere depending on configuration).
+	 */
+	OPT_WRITE_BOOL(stats_print, "")
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t},\n");
+	}
+
+#undef OPT_WRITE_BOOL
+#undef OPT_WRITE_BOOL_MUTABLE
+#undef OPT_WRITE_SIZE_T
+#undef OPT_WRITE_SSIZE_T
+#undef OPT_WRITE_CHAR_P
+
+	/* arenas. */
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\"arenas\": {\n");
+	}
+
+	CTL_GET("arenas.narenas", &uv, unsigned);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"narenas\": %u,\n", uv);
+	} else
+		malloc_cprintf(write_cb, cbopaque, "Arenas: %u\n", uv);
+
+	CTL_GET("arenas.decay_time", &ssv, ssize_t);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"decay_time\": %zd,\n", ssv);
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "Unused dirty page decay time: %zd%s\n", ssv, (ssv < 0) ?
+		    " (no decay)" : "");
+	}
+
+	CTL_GET("arenas.quantum", &sv, size_t);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"quantum\": %zu,\n", sv);
+	} else
+		malloc_cprintf(write_cb, cbopaque, "Quantum size: %zu\n", sv);
+
+	CTL_GET("arenas.page", &sv, size_t);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"page\": %zu,\n", sv);
+	} else
+		malloc_cprintf(write_cb, cbopaque, "Page size: %zu\n", sv);
+
+	if (je_mallctl("arenas.tcache_max", (void *)&sv, &ssz, NULL, 0) == 0) {
+		if (json) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\"tcache_max\": %zu,\n", sv);
+		} else {
+			malloc_cprintf(write_cb, cbopaque,
+			    "Maximum thread-cached size class: %zu\n", sv);
+		}
+	}
+
+	if (json) {
+		unsigned nbins, nlextents, i;
+
+		CTL_GET("arenas.nbins", &nbins, unsigned);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"nbins\": %u,\n", nbins);
+
+		CTL_GET("arenas.nhbins", &uv, unsigned);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"nhbins\": %u,\n", uv);
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"bin\": [\n");
+		for (i = 0; i < nbins; i++) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t{\n");
+
+			CTL_M2_GET("arenas.bin.0.size", i, &sv, size_t);
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t\t\"size\": %zu,\n", sv);
+
+			CTL_M2_GET("arenas.bin.0.nregs", i, &u32v, uint32_t);
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t\t\"nregs\": %"FMTu32",\n", u32v);
+
+			CTL_M2_GET("arenas.bin.0.slab_size", i, &sv, size_t);
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t\t\"slab_size\": %zu\n", sv);
+
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t}%s\n", (i + 1 < nbins) ? "," : "");
+		}
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t],\n");
+
+		CTL_GET("arenas.nlextents", &nlextents, unsigned);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"nlextents\": %u,\n", nlextents);
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"lextent\": [\n");
+		for (i = 0; i < nlextents; i++) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t{\n");
+
+			CTL_M2_GET("arenas.lextent.0.size", i, &sv, size_t);
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t\t\"size\": %zu\n", sv);
+
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t}%s\n", (i + 1 < nlextents) ? "," : "");
+		}
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t]\n");
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t},\n");
+	}
+
+	/* prof. */
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\"prof\": {\n");
+
+		CTL_GET("prof.thread_active_init", &bv, bool);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"thread_active_init\": %s,\n", bv ? "true" :
+		    "false");
+
+		CTL_GET("prof.active", &bv, bool);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"active\": %s,\n", bv ? "true" : "false");
+
+		CTL_GET("prof.gdump", &bv, bool);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"gdump\": %s,\n", bv ? "true" : "false");
+
+		CTL_GET("prof.interval", &u64v, uint64_t);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"interval\": %"FMTu64",\n", u64v);
+
+		CTL_GET("prof.lg_sample", &ssv, ssize_t);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"lg_sample\": %zd\n", ssv);
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t}%s\n", (config_stats || merged || unmerged) ? "," :
+		    "");
+	}
+}
+
+static void
+stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
+    bool json, bool merged, bool unmerged, bool bins, bool large)
+{
+	size_t allocated, active, metadata, resident, mapped, retained;
+
+	CTL_GET("stats.allocated", &allocated, size_t);
+	CTL_GET("stats.active", &active, size_t);
+	CTL_GET("stats.metadata", &metadata, size_t);
+	CTL_GET("stats.resident", &resident, size_t);
+	CTL_GET("stats.mapped", &mapped, size_t);
+	CTL_GET("stats.retained", &retained, size_t);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\"stats\": {\n");
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"allocated\": %zu,\n", allocated);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"active\": %zu,\n", active);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"metadata\": %zu,\n", metadata);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"resident\": %zu,\n", resident);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"mapped\": %zu,\n", mapped);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"retained\": %zu\n", retained);
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t}%s\n", (merged || unmerged) ? "," : "");
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "Allocated: %zu, active: %zu, metadata: %zu,"
+		    " resident: %zu, mapped: %zu, retained: %zu\n",
+		    allocated, active, metadata, resident, mapped, retained);
+	}
+
+	if (merged || unmerged) {
+		unsigned narenas;
+
+		if (json) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\"stats.arenas\": {\n");
+		}
+
+		CTL_GET("arenas.narenas", &narenas, unsigned);
+		{
+			VARIABLE_ARRAY(bool, initialized, narenas);
+			size_t isz;
+			unsigned i, j, ninitialized;
+
+			isz = sizeof(bool) * narenas;
+			xmallctl("arenas.initialized", (void *)initialized,
+			    &isz, NULL, 0);
+			for (i = ninitialized = 0; i < narenas; i++) {
+				if (initialized[i])
+					ninitialized++;
+			}
+
+			/* Merged stats. */
+			if (merged && (ninitialized > 1 || !unmerged)) {
+				/* Print merged arena stats. */
+				if (json) {
+					malloc_cprintf(write_cb, cbopaque,
+					    "\t\t\t\"merged\": {\n");
+				} else {
+					malloc_cprintf(write_cb, cbopaque,
+					    "\nMerged arenas stats:\n");
+				}
+				stats_arena_print(write_cb, cbopaque, json,
+				    narenas, bins, large);
+				if (json) {
+					malloc_cprintf(write_cb, cbopaque,
+					    "\t\t\t}%s\n", (ninitialized > 1) ?
+					    "," : "");
+				}
+			}
+
+			/* Unmerged stats. */
+			for (i = j = 0; i < narenas; i++) {
+				if (initialized[i]) {
+					if (json) {
+						j++;
+						malloc_cprintf(write_cb,
+						    cbopaque,
+						    "\t\t\t\"%u\": {\n", i);
+					} else {
+						malloc_cprintf(write_cb,
+						    cbopaque, "\narenas[%u]:\n",
+						    i);
+					}
+					stats_arena_print(write_cb, cbopaque,
+					    json, i, bins, large);
+					if (json) {
+						malloc_cprintf(write_cb,
+						    cbopaque,
+						    "\t\t\t}%s\n", (j <
+						    ninitialized) ? "," : "");
+					}
+				}
+			}
+		}
+
+		if (json) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t}\n");
+		}
+	}
 }
 
 void
@@ -292,6 +885,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	int err;
 	uint64_t epoch;
 	size_t u64sz;
+	bool json = false;
 	bool general = true;
 	bool merged = true;
 	bool unmerged = true;
@@ -324,6 +918,9 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 
 		for (i = 0; opts[i] != '\0'; i++) {
 			switch (opts[i]) {
+			case 'J':
+				json = true;
+				break;
 			case 'g':
 				general = false;
 				break;
@@ -344,222 +941,28 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		}
 	}
 
-	malloc_cprintf(write_cb, cbopaque,
-	    "___ Begin jemalloc statistics ___\n");
-	if (general) {
-		const char *cpv;
-		bool bv;
-		unsigned uv;
-		ssize_t ssv;
-		size_t sv, bsz, usz, ssz, sssz, cpsz;
-
-		bsz = sizeof(bool);
-		usz = sizeof(unsigned);
-		ssz = sizeof(size_t);
-		sssz = sizeof(ssize_t);
-		cpsz = sizeof(const char *);
-
-		CTL_GET("version", &cpv, const char *);
-		malloc_cprintf(write_cb, cbopaque, "Version: %s\n", cpv);
-		CTL_GET("config.debug", &bv, bool);
-		malloc_cprintf(write_cb, cbopaque, "Assertions %s\n",
-		    bv ? "enabled" : "disabled");
+	if (json) {
 		malloc_cprintf(write_cb, cbopaque,
-		    "config.malloc_conf: \"%s\"\n", config_malloc_conf);
-
-#define	OPT_WRITE_BOOL(n)						\
-		if (je_mallctl("opt."#n, (void *)&bv, &bsz, NULL, 0) ==	\
-		    0) {						\
-			malloc_cprintf(write_cb, cbopaque,		\
-			    "  opt."#n": %s\n", bv ? "true" : "false");	\
-		}
-#define	OPT_WRITE_BOOL_MUTABLE(n, m) {					\
-		bool bv2;						\
-		if (je_mallctl("opt."#n, (void *)&bv, &bsz, NULL, 0) ==	\
-		    0 && je_mallctl(#m, &bv2, &bsz, NULL, 0) == 0) {	\
-			malloc_cprintf(write_cb, cbopaque,		\
-			    "  opt."#n": %s ("#m": %s)\n", bv ? "true"	\
-			    : "false", bv2 ? "true" : "false");		\
-		}							\
-}
-#define	OPT_WRITE_UNSIGNED(n)						\
-		if (je_mallctl("opt."#n, (void *)&uv, &usz, NULL, 0) ==	\
-		    0) {						\
-			malloc_cprintf(write_cb, cbopaque,		\
-			"  opt."#n": %u\n", uv);			\
-		}
-#define	OPT_WRITE_SIZE_T(n)						\
-		if (je_mallctl("opt."#n, (void *)&sv, &ssz, NULL, 0) ==	\
-		    0) {						\
-			malloc_cprintf(write_cb, cbopaque,		\
-			"  opt."#n": %zu\n", sv);			\
-		}
-#define	OPT_WRITE_SSIZE_T(n)						\
-		if (je_mallctl("opt."#n, (void *)&ssv, &sssz, NULL, 0)	\
-		    == 0) {						\
-			malloc_cprintf(write_cb, cbopaque,		\
-			    "  opt."#n": %zd\n", ssv);			\
-		}
-#define	OPT_WRITE_SSIZE_T_MUTABLE(n, m) {				\
-		ssize_t ssv2;						\
-		if (je_mallctl("opt."#n, (void *)&ssv, &sssz, NULL, 0)	\
-		    == 0 && je_mallctl(#m, &ssv2, &sssz, NULL, 0) ==	\
-		    0) {						\
-			malloc_cprintf(write_cb, cbopaque,		\
-			    "  opt."#n": %zd ("#m": %zd)\n",		\
-			    ssv, ssv2);					\
-		}							\
-}
-#define	OPT_WRITE_CHAR_P(n)						\
-		if (je_mallctl("opt."#n, (void *)&cpv, &cpsz, NULL, 0)	\
-		    == 0) {						\
-			malloc_cprintf(write_cb, cbopaque,		\
-			    "  opt."#n": \"%s\"\n", cpv);		\
-		}
-
+		    "{\n"
+		    "\t\"jemalloc\": {\n");
+	} else {
 		malloc_cprintf(write_cb, cbopaque,
-		    "Run-time option settings:\n");
-		OPT_WRITE_BOOL(abort)
-		OPT_WRITE_CHAR_P(dss)
-		OPT_WRITE_UNSIGNED(narenas)
-		OPT_WRITE_CHAR_P(purge)
-		OPT_WRITE_SSIZE_T_MUTABLE(decay_time, arenas.decay_time)
-		OPT_WRITE_BOOL(stats_print)
-		OPT_WRITE_CHAR_P(junk)
-		OPT_WRITE_BOOL(zero)
-		OPT_WRITE_BOOL(utrace)
-		OPT_WRITE_BOOL(xmalloc)
-		OPT_WRITE_BOOL(tcache)
-		OPT_WRITE_SSIZE_T(lg_tcache_max)
-		OPT_WRITE_BOOL(prof)
-		OPT_WRITE_CHAR_P(prof_prefix)
-		OPT_WRITE_BOOL_MUTABLE(prof_active, prof.active)
-		OPT_WRITE_BOOL_MUTABLE(prof_thread_active_init,
-		    prof.thread_active_init)
-		OPT_WRITE_SSIZE_T(lg_prof_sample)
-		OPT_WRITE_BOOL(prof_accum)
-		OPT_WRITE_SSIZE_T(lg_prof_interval)
-		OPT_WRITE_BOOL(prof_gdump)
-		OPT_WRITE_BOOL(prof_final)
-		OPT_WRITE_BOOL(prof_leak)
-
-#undef OPT_WRITE_BOOL
-#undef OPT_WRITE_BOOL_MUTABLE
-#undef OPT_WRITE_SIZE_T
-#undef OPT_WRITE_SSIZE_T
-#undef OPT_WRITE_CHAR_P
-
-		malloc_cprintf(write_cb, cbopaque, "CPUs: %u\n", ncpus);
-
-		CTL_GET("arenas.narenas", &uv, unsigned);
-		malloc_cprintf(write_cb, cbopaque, "Arenas: %u\n", uv);
-
-		malloc_cprintf(write_cb, cbopaque, "Pointer size: %zu\n",
-		    sizeof(void *));
-
-		CTL_GET("arenas.quantum", &sv, size_t);
-		malloc_cprintf(write_cb, cbopaque, "Quantum size: %zu\n",
-		    sv);
-
-		CTL_GET("arenas.page", &sv, size_t);
-		malloc_cprintf(write_cb, cbopaque, "Page size: %zu\n", sv);
-
-		CTL_GET("arenas.decay_time", &ssv, ssize_t);
-		malloc_cprintf(write_cb, cbopaque,
-		    "Unused dirty page decay time: %zd%s\n", ssv, (ssv < 0) ?
-		    " (no decay)" : "");
-		if (je_mallctl("arenas.tcache_max", (void *)&sv, &ssz, NULL, 0)
-		    == 0) {
-			malloc_cprintf(write_cb, cbopaque,
-			    "Maximum thread-cached size class: %zu\n", sv);
-		}
-		if (je_mallctl("opt.prof", (void *)&bv, &bsz, NULL, 0) == 0 &&
-		    bv) {
-			CTL_GET("prof.lg_sample", &sv, size_t);
-			malloc_cprintf(write_cb, cbopaque,
-			    "Average profile sample interval: %"FMTu64
-			    " (2^%zu)\n", (((uint64_t)1U) << sv), sv);
-
-			CTL_GET("opt.lg_prof_interval", &ssv, ssize_t);
-			if (ssv >= 0) {
-				malloc_cprintf(write_cb, cbopaque,
-				    "Average profile dump interval: %"FMTu64
-				    " (2^%zd)\n",
-				    (((uint64_t)1U) << ssv), ssv);
-			} else {
-				malloc_cprintf(write_cb, cbopaque,
-				    "Average profile dump interval: N/A\n");
-			}
-		}
+		    "___ Begin jemalloc statistics ___\n");
 	}
 
+	if (general)
+		stats_general_print(write_cb, cbopaque, json, merged, unmerged);
 	if (config_stats) {
-		size_t allocated, active, metadata, resident, mapped, retained;
-
-		CTL_GET("stats.allocated", &allocated, size_t);
-		CTL_GET("stats.active", &active, size_t);
-		CTL_GET("stats.metadata", &metadata, size_t);
-		CTL_GET("stats.resident", &resident, size_t);
-		CTL_GET("stats.mapped", &mapped, size_t);
-		CTL_GET("stats.retained", &retained, size_t);
-		malloc_cprintf(write_cb, cbopaque,
-		    "Allocated: %zu, active: %zu, metadata: %zu,"
-		    " resident: %zu, mapped: %zu, retained: %zu\n",
-		    allocated, active, metadata, resident, mapped, retained);
-
-		if (merged) {
-			unsigned narenas;
-
-			CTL_GET("arenas.narenas", &narenas, unsigned);
-			{
-				VARIABLE_ARRAY(bool, initialized, narenas);
-				size_t isz;
-				unsigned i, ninitialized;
-
-				isz = sizeof(bool) * narenas;
-				xmallctl("arenas.initialized",
-				    (void *)initialized, &isz, NULL, 0);
-				for (i = ninitialized = 0; i < narenas; i++) {
-					if (initialized[i])
-						ninitialized++;
-				}
-
-				if (ninitialized > 1 || !unmerged) {
-					/* Print merged arena stats. */
-					malloc_cprintf(write_cb, cbopaque,
-					    "\nMerged arenas stats:\n");
-					stats_arena_print(write_cb, cbopaque,
-					    narenas, bins, large);
-				}
-			}
-		}
-
-		if (unmerged) {
-			unsigned narenas;
-
-			/* Print stats for each arena. */
-
-			CTL_GET("arenas.narenas", &narenas, unsigned);
-			{
-				VARIABLE_ARRAY(bool, initialized, narenas);
-				size_t isz;
-				unsigned i;
-
-				isz = sizeof(bool) * narenas;
-				xmallctl("arenas.initialized",
-				    (void *)initialized, &isz, NULL, 0);
-
-				for (i = 0; i < narenas; i++) {
-					if (initialized[i]) {
-						malloc_cprintf(write_cb,
-						    cbopaque,
-						    "\narenas[%u]:\n", i);
-						stats_arena_print(write_cb,
-						    cbopaque, i, bins, large);
-					}
-				}
-			}
-		}
+		stats_print_helper(write_cb, cbopaque, json, merged, unmerged,
+		    bins, large);
+	}
+
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t}\n"
+		    "}\n");
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "--- End jemalloc statistics ---\n");
 	}
-	malloc_cprintf(write_cb, cbopaque, "--- End jemalloc statistics ---\n");
 }

From 2a2d1b6e86bcad3b0025b6b62689e9d0e666e155 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 1 Nov 2016 13:25:42 -0700
Subject: [PATCH 0455/2608] Use <quote>...</quote> rather than
 &ldquo;...&rdquo; or "..." in XML.

---
 doc/jemalloc.xml.in | 62 +++++++++++++++++++++++----------------------
 doc/stylesheet.xsl  |  2 +-
 2 files changed, 33 insertions(+), 31 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 22b3d803..747cc071 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -375,7 +375,7 @@
       <para>The <function>mallctlnametomib()</function> function
       provides a way to avoid repeated name lookups for applications that
       repeatedly query the same portion of the namespace, by translating a name
-      to a &ldquo;Management Information Base&rdquo; (MIB) that can be passed
+      to a <quote>Management Information Base</quote> (MIB) that can be passed
       repeatedly to <function>mallctlbymib()</function>.  Upon
       successful return from <function>mallctlnametomib()</function>,
       <parameter>mibp</parameter> contains an array of
@@ -415,20 +415,21 @@ for (i = 0; i < nbins; i++) {
       function pointer and <parameter>cbopaque</parameter> data passed to
       <parameter>write_cb</parameter>, or <function>malloc_message()</function>
       if <parameter>write_cb</parameter> is <constant>NULL</constant>.  The
-      statistics are presented in human-readable form unless "J" is specified as
-      a character within the <parameter>opts</parameter> string, in which case
-      the statistics are presented in <ulink url="http://www.json.org/">JSON
-      format</ulink>.  This function can be called repeatedly.  General
-      information that never changes during execution can be omitted by
-      specifying "g" as a character within the <parameter>opts</parameter>
-      string.  Note that <function>malloc_message()</function> uses the
+      statistics are presented in human-readable form unless <quote>J</quote> is
+      specified as a character within the <parameter>opts</parameter> string, in
+      which case the statistics are presented in <ulink
+      url="http://www.json.org/">JSON format</ulink>.  This function can be
+      called repeatedly.  General information that never changes during
+      execution can be omitted by specifying <quote>g</quote> as a character
+      within the <parameter>opts</parameter> string.  Note that
+      <function>malloc_message()</function> uses the
       <function>mallctl*()</function> functions internally, so inconsistent
       statistics can be reported if multiple threads use these functions
       simultaneously.  If <option>--enable-stats</option> is specified during
-      configuration, &ldquo;m&rdquo; and &ldquo;a&rdquo; can be specified to
-      omit merged arena and per arena statistics, respectively; &ldquo;b&rdquo;
-      and &ldquo;l&rdquo; can be specified to omit per size class statistics for
-      bins and large objects, respectively.  Unrecognized characters are
+      configuration, <quote>m</quote> and <quote>a</quote> can be specified to
+      omit merged arena and per arena statistics, respectively; <quote>b</quote>
+      and <quote>l</quote> can be specified to omit per size class statistics
+      for bins and large objects, respectively.  Unrecognized characters are
       silently ignored.  Note that thread caching may prevent some statistics
       from being completely up to date, since extra locking would be required to
       merge counters that track thread cache operations.</para>
@@ -454,7 +455,7 @@ for (i = 0; i < nbins; i++) {
 
     <para>The string specified via <option>--with-malloc-conf</option>, the
     string pointed to by the global variable <varname>malloc_conf</varname>, the
-    &ldquo;name&rdquo; of the file referenced by the symbolic link named
+    <quote>name</quote> of the file referenced by the symbolic link named
     <filename class="symlink">/etc/malloc.conf</filename>, and the value of the
     environment variable <envar>MALLOC_CONF</envar>, will be interpreted, in
     that order, from left to right as options.  Note that
@@ -891,12 +892,12 @@ for (i = 0; i < nbins; i++) {
         settings are supported if
         <citerefentry><refentrytitle>sbrk</refentrytitle>
         <manvolnum>2</manvolnum></citerefentry> is supported by the operating
-        system: &ldquo;disabled&rdquo;, &ldquo;primary&rdquo;, and
-        &ldquo;secondary&rdquo;; otherwise only &ldquo;disabled&rdquo; is
-        supported.  The default is &ldquo;secondary&rdquo; if
+        system: <quote>disabled</quote>, <quote>primary</quote>, and
+        <quote>secondary</quote>; otherwise only <quote>disabled</quote> is
+        supported.  The default is <quote>secondary</quote> if
         <citerefentry><refentrytitle>sbrk</refentrytitle>
         <manvolnum>2</manvolnum></citerefentry> is supported by the operating
-        system; &ldquo;disabled&rdquo; otherwise.
+        system; <quote>disabled</quote> otherwise.
         </para></listitem>
       </varlistentry>
 
@@ -963,15 +964,16 @@ for (i = 0; i < nbins; i++) {
           <literal>r-</literal>
           [<option>--enable-fill</option>]
         </term>
-        <listitem><para>Junk filling.  If set to "alloc", each byte of
-        uninitialized allocated memory will be initialized to
-        <literal>0xa5</literal>.  If set to "free", all deallocated memory will
-        be initialized to <literal>0x5a</literal>.  If set to "true", both
-        allocated and deallocated memory will be initialized, and if set to
-        "false", junk filling be disabled entirely.  This is intended for
-        debugging and will impact performance negatively.  This option is
-        "false" by default unless <option>--enable-debug</option> is specified
-        during configuration, in which case it is "true" by
+        <listitem><para>Junk filling.  If set to <quote>alloc</quote>, each byte
+        of uninitialized allocated memory will be initialized to
+        <literal>0xa5</literal>.  If set to <quote>free</quote>, all deallocated
+        memory will be initialized to <literal>0x5a</literal>.  If set to
+        <quote>true</quote>, both allocated and deallocated memory will be
+        initialized, and if set to <quote>false</quote>, junk filling be
+        disabled entirely.  This is intended for debugging and will impact
+        performance negatively.  This option is <quote>false</quote> by default
+        unless <option>--enable-debug</option> is specified during
+        configuration, in which case it is <quote>true</quote> by
         default.</para></listitem>
       </varlistentry>
 
@@ -2445,7 +2447,7 @@ MAPPED_LIBRARIES:
     of run-time assertions that catch application errors such as double-free,
     write-after-free, etc.</para>
 
-    <para>Programs often accidentally depend on &ldquo;uninitialized&rdquo;
+    <para>Programs often accidentally depend on <quote>uninitialized</quote>
     memory actually being filled with zero bytes.  Junk filling
     (see the <link linkend="opt.junk"><mallctl>opt.junk</mallctl></link>
     option) tends to expose such bugs in the form of obviously incorrect
@@ -2480,7 +2482,7 @@ MAPPED_LIBRARIES:
     this function is likely to result in a crash or deadlock.</para>
 
     <para>All messages are prefixed by
-    &ldquo;<computeroutput>&lt;jemalloc&gt;: </computeroutput>&rdquo;.</para>
+    <quote><computeroutput>&lt;jemalloc&gt;: </computeroutput></quote>.</para>
   </refsect1>
   <refsect1 id="return_values">
     <title>RETURN VALUES</title>
@@ -2666,9 +2668,9 @@ malloc_conf = "narenas:1";]]></programlisting></para>
     <function>calloc()</function>,
     <function>realloc()</function>, and
     <function>free()</function> functions conform to ISO/IEC
-    9899:1990 (&ldquo;ISO C90&rdquo;).</para>
+    9899:1990 (<quote>ISO C90</quote>).</para>
 
     <para>The <function>posix_memalign()</function> function conforms
-    to IEEE Std 1003.1-2001 (&ldquo;POSIX.1&rdquo;).</para>
+    to IEEE Std 1003.1-2001 (<quote>POSIX.1</quote>).</para>
   </refsect1>
 </refentry>
diff --git a/doc/stylesheet.xsl b/doc/stylesheet.xsl
index bc8bc2a9..619365d8 100644
--- a/doc/stylesheet.xsl
+++ b/doc/stylesheet.xsl
@@ -5,6 +5,6 @@
     <xsl:call-template name="inline.monoseq"/>
   </xsl:template>
   <xsl:template match="mallctl">
-    "<xsl:call-template name="inline.monoseq"/>"
+    <quote><xsl:call-template name="inline.monoseq"/></quote>
   </xsl:template>
 </xsl:stylesheet>

From 7b0a8b74f048c397ff2276c9d2f5311fce70bd89 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 1 Nov 2016 15:26:35 -0700
Subject: [PATCH 0456/2608] malloc_stats_print() fixes/cleanups.

Fix and clean up various malloc_stats_print() issues caused by
0ba5b9b6189e16a983d8922d8c5cb6ab421906e8 (Add "J" (JSON) support to
malloc_stats_print().).
---
 src/stats.c | 21 +++------------------
 1 file changed, 3 insertions(+), 18 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index dbff6c27..44f8c528 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -73,8 +73,7 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		CTL_M2_M4_GET("stats.arenas.0.bins.0.nslabs", i, j, &nslabs,
 		    uint64_t);
 		in_gap_prev = in_gap;
-		if (nslabs == 0)
-			in_gap = true;
+		in_gap = (nslabs == 0);
 
 		if (!json && in_gap_prev && !in_gap) {
 			malloc_cprintf(write_cb, cbopaque,
@@ -211,8 +210,7 @@ stats_arena_lextents_print(void (*write_cb)(void *, const char *),
 		CTL_M2_M4_GET("stats.arenas.0.lextents.0.nrequests", i, j,
 		    &nrequests, uint64_t);
 		in_gap_prev = in_gap;
-		if (nrequests == 0)
-			in_gap = true;
+		in_gap = (nrequests == 0);
 
 		if (!json && in_gap_prev && !in_gap) {
 			malloc_cprintf(write_cb, cbopaque,
@@ -314,8 +312,7 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	} else {
 		malloc_cprintf(write_cb, cbopaque,
 		    "purging: dirty: %zu, sweeps: %"FMTu64", madvises: %"FMTu64
-		    ", ""purged: %"FMTu64"\n", pdirty, npurge, nmadvise,
-		    purged);
+		    ", purged: %"FMTu64"\n", pdirty, npurge, nmadvise, purged);
 	}
 
 	CTL_M2_GET("stats.arenas.0.small.allocated", i, &small_allocated,
@@ -538,16 +535,6 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			"  opt."#n": %u\n", uv);			\
 		}							\
 	}
-#define	OPT_WRITE_SIZE_T(n)						\
-	if (je_mallctl("opt."#n, (void *)&sv, &ssz, NULL, 0) == 0) {	\
-		if (json) {						\
-			malloc_cprintf(write_cb, cbopaque,		\
-			    "\t\t\t\""#n"\": %zu%s\n", sv, (c));	\
-		} else {						\
-			malloc_cprintf(write_cb, cbopaque,		\
-			"  opt."#n": %zu\n", sv);			\
-		}							\
-	}
 #define	OPT_WRITE_SSIZE_T(n, c)						\
 	if (je_mallctl("opt."#n, (void *)&ssv, &sssz, NULL, 0) == 0) {	\
 		if (json) {						\
@@ -593,7 +580,6 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	OPT_WRITE_BOOL(abort, ",")
 	OPT_WRITE_CHAR_P(dss, ",")
 	OPT_WRITE_UNSIGNED(narenas, ",")
-	OPT_WRITE_CHAR_P(purge, ",")
 	OPT_WRITE_SSIZE_T_MUTABLE(decay_time, arenas.decay_time, ",")
 	OPT_WRITE_CHAR_P(junk, ",")
 	OPT_WRITE_BOOL(zero, ",")
@@ -625,7 +611,6 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 
 #undef OPT_WRITE_BOOL
 #undef OPT_WRITE_BOOL_MUTABLE
-#undef OPT_WRITE_SIZE_T
 #undef OPT_WRITE_SSIZE_T
 #undef OPT_WRITE_CHAR_P
 

From b599b32280e1142856b0b96293a71e1684b1ccfb Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 1 Nov 2016 15:28:54 -0700
Subject: [PATCH 0457/2608] Add "J" (JSON) support to malloc_stats_print().

This resolves #474.
---
 doc/jemalloc.xml.in |   43 +-
 src/stats.c         | 1267 +++++++++++++++++++++++++++++--------------
 2 files changed, 894 insertions(+), 416 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 88172296..30b2bdf8 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -410,29 +410,30 @@ for (i = 0; i < nbins; i++) {
 	/* Do something with bin_size... */
 }]]></programlisting></para>
 
-      <para>The <function>malloc_stats_print()</function> function
-      writes human-readable summary statistics via the
-      <parameter>write_cb</parameter> callback function pointer and
-      <parameter>cbopaque</parameter> data passed to
-      <parameter>write_cb</parameter>, or
-      <function>malloc_message()</function> if
-      <parameter>write_cb</parameter> is <constant>NULL</constant>.  This
-      function can be called repeatedly.  General information that never
-      changes during execution can be omitted by specifying "g" as a character
+      <para>The <function>malloc_stats_print()</function> function writes
+      summary statistics via the <parameter>write_cb</parameter> callback
+      function pointer and <parameter>cbopaque</parameter> data passed to
+      <parameter>write_cb</parameter>, or <function>malloc_message()</function>
+      if <parameter>write_cb</parameter> is <constant>NULL</constant>.  The
+      statistics are presented in human-readable form unless <quote>J</quote> is
+      specified as a character within the <parameter>opts</parameter> string, in
+      which case the statistics are presented in <ulink
+      url="http://www.json.org/">JSON format</ulink>.  This function can be
+      called repeatedly.  General information that never changes during
+      execution can be omitted by specifying <quote>g</quote> as a character
       within the <parameter>opts</parameter> string.  Note that
       <function>malloc_message()</function> uses the
-      <function>mallctl*()</function> functions internally, so
-      inconsistent statistics can be reported if multiple threads use these
-      functions simultaneously.  If <option>--enable-stats</option> is
-      specified during configuration, &ldquo;m&rdquo; and &ldquo;a&rdquo; can
-      be specified to omit merged arena and per arena statistics, respectively;
-      &ldquo;b&rdquo;, &ldquo;l&rdquo;, and &ldquo;h&rdquo; can be specified to
-      omit per size class statistics for bins, large objects, and huge objects,
-      respectively.  Unrecognized characters are silently ignored.  Note that
-      thread caching may prevent some statistics from being completely up to
-      date, since extra locking would be required to merge counters that track
-      thread cache operations.
-      </para>
+      <function>mallctl*()</function> functions internally, so inconsistent
+      statistics can be reported if multiple threads use these functions
+      simultaneously.  If <option>--enable-stats</option> is specified during
+      configuration, <quote>m</quote> and <quote>a</quote> can be specified to
+      omit merged arena and per arena statistics, respectively;
+      <quote>b</quote>, <quote>l</quote>, and <quote>h</quote> can be specified
+      to omit per size class statistics for bins, large objects, and huge
+      objects, respectively.  Unrecognized characters are silently ignored.
+      Note that thread caching may prevent some statistics from being completely
+      up to date, since extra locking would be required to merge counters that
+      track thread cache operations.</para>
 
       <para>The <function>malloc_usable_size()</function> function
       returns the usable size of the allocation pointed to by
diff --git a/src/stats.c b/src/stats.c
index 073be4fe..bd8af399 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -32,86 +32,107 @@ bool	opt_stats_print = false;
 
 size_t	stats_cactive = 0;
 
-/******************************************************************************/
-/* Function prototypes for non-inline static functions. */
-
-static void	stats_arena_bins_print(void (*write_cb)(void *, const char *),
-    void *cbopaque, unsigned i);
-static void	stats_arena_lruns_print(void (*write_cb)(void *, const char *),
-    void *cbopaque, unsigned i);
-static void	stats_arena_hchunks_print(
-    void (*write_cb)(void *, const char *), void *cbopaque, unsigned i);
-static void	stats_arena_print(void (*write_cb)(void *, const char *),
-    void *cbopaque, unsigned i, bool bins, bool large, bool huge);
-
 /******************************************************************************/
 
 static void
 stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
-    unsigned i)
+    bool json, bool large, bool huge, unsigned i)
 {
 	size_t page;
-	bool config_tcache, in_gap;
+	bool config_tcache, in_gap, in_gap_prev;
 	unsigned nbins, j;
 
 	CTL_GET("arenas.page", &page, size_t);
 
-	CTL_GET("config.tcache", &config_tcache, bool);
-	if (config_tcache) {
-		malloc_cprintf(write_cb, cbopaque,
-		    "bins:           size ind    allocated      nmalloc"
-		    "      ndalloc    nrequests      curregs      curruns regs"
-		    " pgs  util       nfills     nflushes      newruns"
-		    "       reruns\n");
-	} else {
-		malloc_cprintf(write_cb, cbopaque,
-		    "bins:           size ind    allocated      nmalloc"
-		    "      ndalloc    nrequests      curregs      curruns regs"
-		    " pgs  util      newruns       reruns\n");
-	}
 	CTL_GET("arenas.nbins", &nbins, unsigned);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"bins\": [\n");
+	} else {
+		CTL_GET("config.tcache", &config_tcache, bool);
+		if (config_tcache) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "bins:           size ind    allocated      nmalloc"
+			    "      ndalloc    nrequests      curregs"
+			    "      curruns regs pgs  util       nfills"
+			    "     nflushes      newruns       reruns\n");
+		} else {
+			malloc_cprintf(write_cb, cbopaque,
+			    "bins:           size ind    allocated      nmalloc"
+			    "      ndalloc    nrequests      curregs"
+			    "      curruns regs pgs  util      newruns"
+			    "       reruns\n");
+		}
+	}
 	for (j = 0, in_gap = false; j < nbins; j++) {
 		uint64_t nruns;
+		size_t reg_size, run_size, curregs;
+		size_t curruns;
+		uint32_t nregs;
+		uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes;
+		uint64_t nreruns;
 
 		CTL_M2_M4_GET("stats.arenas.0.bins.0.nruns", i, j, &nruns,
 		    uint64_t);
-		if (nruns == 0)
-			in_gap = true;
-		else {
-			size_t reg_size, run_size, curregs, availregs, milli;
-			size_t curruns;
-			uint32_t nregs;
-			uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes;
-			uint64_t reruns;
-			char util[6]; /* "x.yyy". */
+		in_gap_prev = in_gap;
+		in_gap = (nruns == 0);
 
-			if (in_gap) {
-				malloc_cprintf(write_cb, cbopaque,
-				    "                     ---\n");
-				in_gap = false;
-			}
-			CTL_M2_GET("arenas.bin.0.size", j, &reg_size, size_t);
-			CTL_M2_GET("arenas.bin.0.nregs", j, &nregs, uint32_t);
-			CTL_M2_GET("arenas.bin.0.run_size", j, &run_size,
-			    size_t);
-			CTL_M2_M4_GET("stats.arenas.0.bins.0.nmalloc", i, j,
-			    &nmalloc, uint64_t);
-			CTL_M2_M4_GET("stats.arenas.0.bins.0.ndalloc", i, j,
-			    &ndalloc, uint64_t);
-			CTL_M2_M4_GET("stats.arenas.0.bins.0.curregs", i, j,
-			    &curregs, size_t);
-			CTL_M2_M4_GET("stats.arenas.0.bins.0.nrequests", i, j,
-			    &nrequests, uint64_t);
+		if (!json && in_gap_prev && !in_gap) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "                     ---\n");
+		}
+
+		CTL_M2_GET("arenas.bin.0.size", j, &reg_size, size_t);
+		CTL_M2_GET("arenas.bin.0.nregs", j, &nregs, uint32_t);
+		CTL_M2_GET("arenas.bin.0.run_size", j, &run_size, size_t);
+
+		CTL_M2_M4_GET("stats.arenas.0.bins.0.nmalloc", i, j, &nmalloc,
+		    uint64_t);
+		CTL_M2_M4_GET("stats.arenas.0.bins.0.ndalloc", i, j, &ndalloc,
+		    uint64_t);
+		CTL_M2_M4_GET("stats.arenas.0.bins.0.curregs", i, j, &curregs,
+		    size_t);
+		CTL_M2_M4_GET("stats.arenas.0.bins.0.nrequests", i, j,
+		    &nrequests, uint64_t);
+		if (config_tcache) {
+			CTL_M2_M4_GET("stats.arenas.0.bins.0.nfills", i, j,
+			    &nfills, uint64_t);
+			CTL_M2_M4_GET("stats.arenas.0.bins.0.nflushes", i, j,
+			    &nflushes, uint64_t);
+		}
+		CTL_M2_M4_GET("stats.arenas.0.bins.0.nreruns", i, j, &nreruns,
+		    uint64_t);
+		CTL_M2_M4_GET("stats.arenas.0.bins.0.curruns", i, j, &curruns,
+		    size_t);
+
+		if (json) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t\t{\n"
+			    "\t\t\t\t\t\t\"nmalloc\": %"FMTu64",\n"
+			    "\t\t\t\t\t\t\"ndalloc\": %"FMTu64",\n"
+			    "\t\t\t\t\t\t\"curregs\": %zu,\n"
+			    "\t\t\t\t\t\t\"nrequests\": %"FMTu64",\n",
+			    nmalloc,
+			    ndalloc,
+			    curregs,
+			    nrequests);
 			if (config_tcache) {
-				CTL_M2_M4_GET("stats.arenas.0.bins.0.nfills", i,
-				    j, &nfills, uint64_t);
-				CTL_M2_M4_GET("stats.arenas.0.bins.0.nflushes",
-				    i, j, &nflushes, uint64_t);
+				malloc_cprintf(write_cb, cbopaque,
+				    "\t\t\t\t\t\t\"nfills\": %"FMTu64",\n"
+				    "\t\t\t\t\t\t\"nflushes\": %"FMTu64",\n",
+				    nfills,
+				    nflushes);
 			}
-			CTL_M2_M4_GET("stats.arenas.0.bins.0.nreruns", i, j,
-			    &reruns, uint64_t);
-			CTL_M2_M4_GET("stats.arenas.0.bins.0.curruns", i, j,
-			    &curruns, size_t);
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t\t\t\"nreruns\": %"FMTu64",\n"
+			    "\t\t\t\t\t\t\"curruns\": %zu\n"
+			    "\t\t\t\t\t}%s\n",
+			    nreruns,
+			    curruns,
+			    (j + 1 < nbins) ? "," : "");
+		} else if (!in_gap) {
+			size_t availregs, milli;
+			char util[6]; /* "x.yyy". */
 
 			availregs = nregs * curruns;
 			milli = (availregs != 0) ? (1000 * curregs) / availregs
@@ -138,7 +159,7 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 				    reg_size, j, curregs * reg_size, nmalloc,
 				    ndalloc, nrequests, curregs, curruns, nregs,
 				    run_size / page, util, nfills, nflushes,
-				    nruns, reruns);
+				    nruns, nreruns);
 			} else {
 				malloc_cprintf(write_cb, cbopaque,
 				    "%20zu %3u %12zu %12"FMTu64
@@ -147,28 +168,38 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 				    " %12"FMTu64"\n",
 				    reg_size, j, curregs * reg_size, nmalloc,
 				    ndalloc, nrequests, curregs, curruns, nregs,
-				    run_size / page, util, nruns, reruns);
+				    run_size / page, util, nruns, nreruns);
 			}
 		}
 	}
-	if (in_gap) {
+	if (json) {
 		malloc_cprintf(write_cb, cbopaque,
-		    "                     ---\n");
+		    "\t\t\t\t]%s\n", (large || huge) ? "," : "");
+	} else {
+		if (in_gap) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "                     ---\n");
+		}
 	}
 }
 
 static void
 stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque,
-    unsigned i)
+    bool json, bool huge, unsigned i)
 {
 	unsigned nbins, nlruns, j;
-	bool in_gap;
+	bool in_gap, in_gap_prev;
 
-	malloc_cprintf(write_cb, cbopaque,
-	    "large:          size ind    allocated      nmalloc      ndalloc"
-	    "    nrequests      curruns\n");
 	CTL_GET("arenas.nbins", &nbins, unsigned);
 	CTL_GET("arenas.nlruns", &nlruns, unsigned);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"lruns\": [\n");
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "large:          size ind    allocated      nmalloc"
+		    "      ndalloc    nrequests      curruns\n");
+	}
 	for (j = 0, in_gap = false; j < nlruns; j++) {
 		uint64_t nmalloc, ndalloc, nrequests;
 		size_t run_size, curruns;
@@ -179,17 +210,25 @@ stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    uint64_t);
 		CTL_M2_M4_GET("stats.arenas.0.lruns.0.nrequests", i, j,
 		    &nrequests, uint64_t);
-		if (nrequests == 0)
-			in_gap = true;
-		else {
-			CTL_M2_GET("arenas.lrun.0.size", j, &run_size, size_t);
-			CTL_M2_M4_GET("stats.arenas.0.lruns.0.curruns", i, j,
-			    &curruns, size_t);
-			if (in_gap) {
-				malloc_cprintf(write_cb, cbopaque,
-				    "                     ---\n");
-				in_gap = false;
-			}
+		in_gap_prev = in_gap;
+		in_gap = (nrequests == 0);
+
+		if (!json && in_gap_prev && !in_gap) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "                     ---\n");
+		}
+
+		CTL_M2_GET("arenas.lrun.0.size", j, &run_size, size_t);
+		CTL_M2_M4_GET("stats.arenas.0.lruns.0.curruns", i, j, &curruns,
+		    size_t);
+		if (json) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t\t{\n"
+			    "\t\t\t\t\t\t\"curruns\": %zu\n"
+			    "\t\t\t\t\t}%s\n",
+			    curruns,
+			    (j + 1 < nlruns) ? "," : "");
+		} else if (!in_gap) {
 			malloc_cprintf(write_cb, cbopaque,
 			    "%20zu %3u %12zu %12"FMTu64" %12"FMTu64
 			    " %12"FMTu64" %12zu\n",
@@ -197,25 +236,35 @@ stats_arena_lruns_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			    ndalloc, nrequests, curruns);
 		}
 	}
-	if (in_gap) {
+	if (json) {
 		malloc_cprintf(write_cb, cbopaque,
-		    "                     ---\n");
+		    "\t\t\t\t]%s\n", huge ? "," : "");
+	} else {
+		if (in_gap) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "                     ---\n");
+		}
 	}
 }
 
 static void
 stats_arena_hchunks_print(void (*write_cb)(void *, const char *),
-    void *cbopaque, unsigned i)
+    void *cbopaque, bool json, unsigned i)
 {
 	unsigned nbins, nlruns, nhchunks, j;
-	bool in_gap;
+	bool in_gap, in_gap_prev;
 
-	malloc_cprintf(write_cb, cbopaque,
-	    "huge:           size ind    allocated      nmalloc      ndalloc"
-	    "    nrequests   curhchunks\n");
 	CTL_GET("arenas.nbins", &nbins, unsigned);
 	CTL_GET("arenas.nlruns", &nlruns, unsigned);
 	CTL_GET("arenas.nhchunks", &nhchunks, unsigned);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"hchunks\": [\n");
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "huge:           size ind    allocated      nmalloc"
+		    "      ndalloc    nrequests   curhchunks\n");
+	}
 	for (j = 0, in_gap = false; j < nhchunks; j++) {
 		uint64_t nmalloc, ndalloc, nrequests;
 		size_t hchunk_size, curhchunks;
@@ -226,18 +275,25 @@ stats_arena_hchunks_print(void (*write_cb)(void *, const char *),
 		    &ndalloc, uint64_t);
 		CTL_M2_M4_GET("stats.arenas.0.hchunks.0.nrequests", i, j,
 		    &nrequests, uint64_t);
-		if (nrequests == 0)
-			in_gap = true;
-		else {
-			CTL_M2_GET("arenas.hchunk.0.size", j, &hchunk_size,
-			    size_t);
-			CTL_M2_M4_GET("stats.arenas.0.hchunks.0.curhchunks", i,
-			    j, &curhchunks, size_t);
-			if (in_gap) {
-				malloc_cprintf(write_cb, cbopaque,
-				    "                     ---\n");
-				in_gap = false;
-			}
+		in_gap_prev = in_gap;
+		in_gap = (nrequests == 0);
+
+		if (!json && in_gap_prev && !in_gap) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "                     ---\n");
+		}
+
+		CTL_M2_GET("arenas.hchunk.0.size", j, &hchunk_size, size_t);
+		CTL_M2_M4_GET("stats.arenas.0.hchunks.0.curhchunks", i, j,
+		    &curhchunks, size_t);
+		if (json) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t\t{\n"
+			    "\t\t\t\t\t\t\"curhchunks\": %zu\n"
+			    "\t\t\t\t\t}%s\n",
+			    curhchunks,
+			    (j + 1 < nhchunks) ? "," : "");
+		} else if (!in_gap) {
 			malloc_cprintf(write_cb, cbopaque,
 			    "%20zu %3u %12zu %12"FMTu64" %12"FMTu64
 			    " %12"FMTu64" %12zu\n",
@@ -246,15 +302,20 @@ stats_arena_hchunks_print(void (*write_cb)(void *, const char *),
 			    nrequests, curhchunks);
 		}
 	}
-	if (in_gap) {
+	if (json) {
 		malloc_cprintf(write_cb, cbopaque,
-		    "                     ---\n");
+		    "\t\t\t\t]\n");
+	} else {
+		if (in_gap) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "                     ---\n");
+		}
 	}
 }
 
 static void
 stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
-    unsigned i, bool bins, bool large, bool huge)
+    bool json, unsigned i, bool bins, bool large, bool huge)
 {
 	unsigned nthreads;
 	const char *dss;
@@ -272,100 +333,731 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	CTL_GET("arenas.page", &page, size_t);
 
 	CTL_M2_GET("stats.arenas.0.nthreads", i, &nthreads, unsigned);
-	malloc_cprintf(write_cb, cbopaque,
-	    "assigned threads: %u\n", nthreads);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"nthreads\": %u,\n", nthreads);
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "assigned threads: %u\n", nthreads);
+	}
+
 	CTL_M2_GET("stats.arenas.0.dss", i, &dss, const char *);
-	malloc_cprintf(write_cb, cbopaque, "dss allocation precedence: %s\n",
-	    dss);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"dss\": \"%s\",\n", dss);
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "dss allocation precedence: %s\n", dss);
+	}
+
 	CTL_M2_GET("stats.arenas.0.lg_dirty_mult", i, &lg_dirty_mult, ssize_t);
-	if (opt_purge == purge_mode_ratio) {
-		if (lg_dirty_mult >= 0) {
-			malloc_cprintf(write_cb, cbopaque,
-			    "min active:dirty page ratio: %u:1\n",
-			    (1U << lg_dirty_mult));
-		} else {
-			malloc_cprintf(write_cb, cbopaque,
-			    "min active:dirty page ratio: N/A\n");
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"lg_dirty_mult\": %zd,\n", lg_dirty_mult);
+	} else {
+		if (opt_purge == purge_mode_ratio) {
+			if (lg_dirty_mult >= 0) {
+				malloc_cprintf(write_cb, cbopaque,
+				    "min active:dirty page ratio: %u:1\n",
+				    (1U << lg_dirty_mult));
+			} else {
+				malloc_cprintf(write_cb, cbopaque,
+				    "min active:dirty page ratio: N/A\n");
+			}
 		}
 	}
+
 	CTL_M2_GET("stats.arenas.0.decay_time", i, &decay_time, ssize_t);
-	if (opt_purge == purge_mode_decay) {
-		if (decay_time >= 0) {
-			malloc_cprintf(write_cb, cbopaque, "decay time: %zd\n",
-			    decay_time);
-		} else
-			malloc_cprintf(write_cb, cbopaque, "decay time: N/A\n");
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"decay_time\": %zd,\n", decay_time);
+	} else {
+		if (opt_purge == purge_mode_decay) {
+			if (decay_time >= 0) {
+				malloc_cprintf(write_cb, cbopaque,
+				    "decay time: %zd\n", decay_time);
+			} else {
+				malloc_cprintf(write_cb, cbopaque,
+				    "decay time: N/A\n");
+			}
+		}
 	}
+
 	CTL_M2_GET("stats.arenas.0.pactive", i, &pactive, size_t);
 	CTL_M2_GET("stats.arenas.0.pdirty", i, &pdirty, size_t);
 	CTL_M2_GET("stats.arenas.0.npurge", i, &npurge, uint64_t);
 	CTL_M2_GET("stats.arenas.0.nmadvise", i, &nmadvise, uint64_t);
 	CTL_M2_GET("stats.arenas.0.purged", i, &purged, uint64_t);
-	malloc_cprintf(write_cb, cbopaque,
-	    "purging: dirty: %zu, sweeps: %"FMTu64", madvises: %"FMTu64", "
-	    "purged: %"FMTu64"\n", pdirty, npurge, nmadvise, purged);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"pactive\": %zu,\n", pactive);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"pdirty\": %zu,\n", pdirty);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"npurge\": %"FMTu64",\n", npurge);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"nmadvise\": %"FMTu64",\n", nmadvise);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"purged\": %"FMTu64",\n", purged);
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "purging: dirty: %zu, sweeps: %"FMTu64", madvises: %"FMTu64
+		    ", purged: %"FMTu64"\n", pdirty, npurge, nmadvise, purged);
+	}
 
-	malloc_cprintf(write_cb, cbopaque,
-	    "                            allocated      nmalloc      ndalloc"
-	    "    nrequests\n");
 	CTL_M2_GET("stats.arenas.0.small.allocated", i, &small_allocated,
 	    size_t);
 	CTL_M2_GET("stats.arenas.0.small.nmalloc", i, &small_nmalloc, uint64_t);
 	CTL_M2_GET("stats.arenas.0.small.ndalloc", i, &small_ndalloc, uint64_t);
 	CTL_M2_GET("stats.arenas.0.small.nrequests", i, &small_nrequests,
 	    uint64_t);
-	malloc_cprintf(write_cb, cbopaque,
-	    "small:                   %12zu %12"FMTu64" %12"FMTu64
-	    " %12"FMTu64"\n",
-	    small_allocated, small_nmalloc, small_ndalloc, small_nrequests);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"small\": {\n");
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"allocated\": %zu,\n", small_allocated);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"nmalloc\": %"FMTu64",\n", small_nmalloc);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"ndalloc\": %"FMTu64",\n", small_ndalloc);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"nrequests\": %"FMTu64"\n", small_nrequests);
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t},\n");
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "                            allocated      nmalloc"
+		    "      ndalloc    nrequests\n");
+		malloc_cprintf(write_cb, cbopaque,
+		    "small:                   %12zu %12"FMTu64" %12"FMTu64
+		    " %12"FMTu64"\n",
+		    small_allocated, small_nmalloc, small_ndalloc,
+		    small_nrequests);
+	}
+
 	CTL_M2_GET("stats.arenas.0.large.allocated", i, &large_allocated,
 	    size_t);
 	CTL_M2_GET("stats.arenas.0.large.nmalloc", i, &large_nmalloc, uint64_t);
 	CTL_M2_GET("stats.arenas.0.large.ndalloc", i, &large_ndalloc, uint64_t);
 	CTL_M2_GET("stats.arenas.0.large.nrequests", i, &large_nrequests,
 	    uint64_t);
-	malloc_cprintf(write_cb, cbopaque,
-	    "large:                   %12zu %12"FMTu64" %12"FMTu64
-	    " %12"FMTu64"\n",
-	    large_allocated, large_nmalloc, large_ndalloc, large_nrequests);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"large\": {\n");
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"allocated\": %zu,\n", large_allocated);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"nmalloc\": %"FMTu64",\n", large_nmalloc);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"ndalloc\": %"FMTu64",\n", large_ndalloc);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"nrequests\": %"FMTu64"\n", large_nrequests);
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t},\n");
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "large:                   %12zu %12"FMTu64" %12"FMTu64
+		    " %12"FMTu64"\n",
+		    large_allocated, large_nmalloc, large_ndalloc,
+		    large_nrequests);
+	}
+
 	CTL_M2_GET("stats.arenas.0.huge.allocated", i, &huge_allocated, size_t);
 	CTL_M2_GET("stats.arenas.0.huge.nmalloc", i, &huge_nmalloc, uint64_t);
 	CTL_M2_GET("stats.arenas.0.huge.ndalloc", i, &huge_ndalloc, uint64_t);
 	CTL_M2_GET("stats.arenas.0.huge.nrequests", i, &huge_nrequests,
 	    uint64_t);
-	malloc_cprintf(write_cb, cbopaque,
-	    "huge:                    %12zu %12"FMTu64" %12"FMTu64
-	    " %12"FMTu64"\n",
-	    huge_allocated, huge_nmalloc, huge_ndalloc, huge_nrequests);
-	malloc_cprintf(write_cb, cbopaque,
-	    "total:                   %12zu %12"FMTu64" %12"FMTu64
-	    " %12"FMTu64"\n",
-	    small_allocated + large_allocated + huge_allocated,
-	    small_nmalloc + large_nmalloc + huge_nmalloc,
-	    small_ndalloc + large_ndalloc + huge_ndalloc,
-	    small_nrequests + large_nrequests + huge_nrequests);
-	malloc_cprintf(write_cb, cbopaque,
-	    "active:                  %12zu\n", pactive * page);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"huge\": {\n");
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"allocated\": %zu,\n", huge_allocated);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"nmalloc\": %"FMTu64",\n", huge_nmalloc);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"ndalloc\": %"FMTu64",\n", huge_ndalloc);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"nrequests\": %"FMTu64"\n", huge_nrequests);
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t},\n");
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "huge:                    %12zu %12"FMTu64" %12"FMTu64
+		    " %12"FMTu64"\n",
+		    huge_allocated, huge_nmalloc, huge_ndalloc, huge_nrequests);
+		malloc_cprintf(write_cb, cbopaque,
+		    "total:                   %12zu %12"FMTu64" %12"FMTu64
+		    " %12"FMTu64"\n",
+		    small_allocated + large_allocated + huge_allocated,
+		    small_nmalloc + large_nmalloc + huge_nmalloc,
+		    small_ndalloc + large_ndalloc + huge_ndalloc,
+		    small_nrequests + large_nrequests + huge_nrequests);
+	}
+	if (!json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "active:                  %12zu\n", pactive * page);
+	}
+
 	CTL_M2_GET("stats.arenas.0.mapped", i, &mapped, size_t);
-	malloc_cprintf(write_cb, cbopaque,
-	    "mapped:                  %12zu\n", mapped);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"mapped\": %zu,\n", mapped);
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "mapped:                  %12zu\n", mapped);
+	}
+
 	CTL_M2_GET("stats.arenas.0.retained", i, &retained, size_t);
-	malloc_cprintf(write_cb, cbopaque,
-	    "retained:                %12zu\n", retained);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"retained\": %zu,\n", retained);
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "retained:                %12zu\n", retained);
+	}
+
 	CTL_M2_GET("stats.arenas.0.metadata.mapped", i, &metadata_mapped,
 	    size_t);
 	CTL_M2_GET("stats.arenas.0.metadata.allocated", i, &metadata_allocated,
 	    size_t);
-	malloc_cprintf(write_cb, cbopaque,
-	    "metadata: mapped: %zu, allocated: %zu\n",
-	    metadata_mapped, metadata_allocated);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"metadata\": {\n");
 
-	if (bins)
-		stats_arena_bins_print(write_cb, cbopaque, i);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"mapped\": %zu,\n", metadata_mapped);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\t\"allocated\": %zu\n", metadata_allocated);
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t},\n");
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "metadata: mapped: %zu, allocated: %zu\n",
+		    metadata_mapped, metadata_allocated);
+	}
+
+	if (bins) {
+		stats_arena_bins_print(write_cb, cbopaque, json, large, huge,
+		    i);
+	}
 	if (large)
-		stats_arena_lruns_print(write_cb, cbopaque, i);
+		stats_arena_lruns_print(write_cb, cbopaque, json, huge, i);
 	if (huge)
-		stats_arena_hchunks_print(write_cb, cbopaque, i);
+		stats_arena_hchunks_print(write_cb, cbopaque, json, i);
+}
+
+static void
+stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
+    bool json, bool merged, bool unmerged)
+{
+	const char *cpv;
+	bool bv;
+	unsigned uv;
+	uint32_t u32v;
+	uint64_t u64v;
+	ssize_t ssv;
+	size_t sv, bsz, usz, ssz, sssz, cpsz;
+
+	bsz = sizeof(bool);
+	usz = sizeof(unsigned);
+	ssz = sizeof(size_t);
+	sssz = sizeof(ssize_t);
+	cpsz = sizeof(const char *);
+
+	CTL_GET("version", &cpv, const char *);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		"\t\t\"version\": \"%s\",\n", cpv);
+	} else
+		malloc_cprintf(write_cb, cbopaque, "Version: %s\n", cpv);
+
+	/* config. */
+#define	CONFIG_WRITE_BOOL_JSON(n, c)					\
+	if (json) {							\
+		CTL_GET("config."#n, &bv, bool);			\
+		malloc_cprintf(write_cb, cbopaque,			\
+		    "\t\t\t\""#n"\": %s%s\n", bv ? "true" : "false",	\
+		    (c));						\
+	}
+
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\"config\": {\n");
+	}
+
+	CONFIG_WRITE_BOOL_JSON(cache_oblivious, ",")
+
+	CTL_GET("config.debug", &bv, bool);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"debug\": %s,\n", bv ? "true" : "false");
+	} else {
+		malloc_cprintf(write_cb, cbopaque, "Assertions %s\n",
+		    bv ? "enabled" : "disabled");
+	}
+
+	CONFIG_WRITE_BOOL_JSON(fill, ",")
+	CONFIG_WRITE_BOOL_JSON(lazy_lock, ",")
+
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"malloc_conf\": \"%s\",\n",
+		    config_malloc_conf);
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "config.malloc_conf: \"%s\"\n", config_malloc_conf);
+	}
+
+	CONFIG_WRITE_BOOL_JSON(munmap, ",")
+	CONFIG_WRITE_BOOL_JSON(prof, ",")
+	CONFIG_WRITE_BOOL_JSON(prof_libgcc, ",")
+	CONFIG_WRITE_BOOL_JSON(prof_libunwind, ",")
+	CONFIG_WRITE_BOOL_JSON(stats, ",")
+	CONFIG_WRITE_BOOL_JSON(tcache, ",")
+	CONFIG_WRITE_BOOL_JSON(tls, ",")
+	CONFIG_WRITE_BOOL_JSON(utrace, ",")
+	CONFIG_WRITE_BOOL_JSON(valgrind, ",")
+	CONFIG_WRITE_BOOL_JSON(xmalloc, "")
+
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t},\n");
+	}
+#undef CONFIG_WRITE_BOOL_JSON
+
+	/* opt. */
+#define	OPT_WRITE_BOOL(n, c)						\
+	if (je_mallctl("opt."#n, (void *)&bv, &bsz, NULL, 0) == 0) {	\
+		if (json) {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "\t\t\t\""#n"\": %s%s\n", bv ? "true" :	\
+			    "false", (c));				\
+		} else {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "  opt."#n": %s\n", bv ? "true" : "false");	\
+		}							\
+	}
+#define	OPT_WRITE_BOOL_MUTABLE(n, m, c) {				\
+	bool bv2;							\
+	if (je_mallctl("opt."#n, (void *)&bv, &bsz, NULL, 0) == 0 &&	\
+	    je_mallctl(#m, &bv2, &bsz, NULL, 0) == 0) {			\
+		if (json) {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "\t\t\t\""#n"\": %s%s\n", bv ? "true" :	\
+			    "false", (c));				\
+		} else {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "  opt."#n": %s ("#m": %s)\n", bv ? "true"	\
+			    : "false", bv2 ? "true" : "false");		\
+		}							\
+	}								\
+}
+#define	OPT_WRITE_UNSIGNED(n, c)					\
+	if (je_mallctl("opt."#n, (void *)&uv, &usz, NULL, 0) == 0) {	\
+		if (json) {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "\t\t\t\""#n"\": %u%s\n", uv, (c));		\
+		} else {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			"  opt."#n": %u\n", uv);			\
+		}							\
+	}
+#define	OPT_WRITE_SIZE_T(n, c)						\
+	if (je_mallctl("opt."#n, (void *)&sv, &ssz, NULL, 0) == 0) {	\
+		if (json) {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "\t\t\t\""#n"\": %zu%s\n", sv, (c));	\
+		} else {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			"  opt."#n": %zu\n", sv);			\
+		}							\
+	}
+#define	OPT_WRITE_SSIZE_T(n, c)						\
+	if (je_mallctl("opt."#n, (void *)&ssv, &sssz, NULL, 0) == 0) {	\
+		if (json) {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "\t\t\t\""#n"\": %zd%s\n", ssv, (c));	\
+		} else {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "  opt."#n": %zd\n", ssv);			\
+		}							\
+	}
+#define	OPT_WRITE_SSIZE_T_MUTABLE(n, m, c) {				\
+	ssize_t ssv2;							\
+	if (je_mallctl("opt."#n, (void *)&ssv, &sssz, NULL, 0) == 0 &&	\
+	    je_mallctl(#m, &ssv2, &sssz, NULL, 0) == 0) {		\
+		if (json) {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "\t\t\t\""#n"\": %zd%s\n", ssv, (c));	\
+		} else {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "  opt."#n": %zd ("#m": %zd)\n",		\
+			    ssv, ssv2);					\
+		}							\
+	}								\
+}
+#define	OPT_WRITE_CHAR_P(n, c)						\
+	if (je_mallctl("opt."#n, (void *)&cpv, &cpsz, NULL, 0) == 0) {	\
+		if (json) {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "\t\t\t\""#n"\": \"%s\"%s\n", cpv, (c));	\
+		} else {						\
+			malloc_cprintf(write_cb, cbopaque,		\
+			    "  opt."#n": \"%s\"\n", cpv);		\
+		}							\
+	}
+
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\"opt\": {\n");
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "Run-time option settings:\n");
+	}
+	OPT_WRITE_BOOL(abort, ",")
+	OPT_WRITE_SIZE_T(lg_chunk, ",")
+	OPT_WRITE_CHAR_P(dss, ",")
+	OPT_WRITE_UNSIGNED(narenas, ",")
+	OPT_WRITE_CHAR_P(purge, ",")
+	if (json || opt_purge == purge_mode_ratio) {
+		OPT_WRITE_SSIZE_T_MUTABLE(lg_dirty_mult,
+		    arenas.lg_dirty_mult, ",")
+	}
+	if (json || opt_purge == purge_mode_decay) {
+		OPT_WRITE_SSIZE_T_MUTABLE(decay_time, arenas.decay_time, ",")
+	}
+	OPT_WRITE_CHAR_P(junk, ",")
+	OPT_WRITE_SIZE_T(quarantine, ",")
+	OPT_WRITE_BOOL(redzone, ",")
+	OPT_WRITE_BOOL(zero, ",")
+	OPT_WRITE_BOOL(utrace, ",")
+	OPT_WRITE_BOOL(xmalloc, ",")
+	OPT_WRITE_BOOL(tcache, ",")
+	OPT_WRITE_SSIZE_T(lg_tcache_max, ",")
+	OPT_WRITE_BOOL(prof, ",")
+	OPT_WRITE_CHAR_P(prof_prefix, ",")
+	OPT_WRITE_BOOL_MUTABLE(prof_active, prof.active, ",")
+	OPT_WRITE_BOOL_MUTABLE(prof_thread_active_init, prof.thread_active_init,
+	    ",")
+	OPT_WRITE_SSIZE_T_MUTABLE(lg_prof_sample, prof.lg_sample, ",")
+	OPT_WRITE_BOOL(prof_accum, ",")
+	OPT_WRITE_SSIZE_T(lg_prof_interval, ",")
+	OPT_WRITE_BOOL(prof_gdump, ",")
+	OPT_WRITE_BOOL(prof_final, ",")
+	OPT_WRITE_BOOL(prof_leak, ",")
+	/*
+	 * stats_print is always emitted, so as long as stats_print comes last
+	 * it's safe to unconditionally omit the comma here (rather than having
+	 * to conditionally omit it elsewhere depending on configuration).
+	 */
+	OPT_WRITE_BOOL(stats_print, "")
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t},\n");
+	}
+
+#undef OPT_WRITE_BOOL
+#undef OPT_WRITE_BOOL_MUTABLE
+#undef OPT_WRITE_SIZE_T
+#undef OPT_WRITE_SSIZE_T
+#undef OPT_WRITE_CHAR_P
+
+	/* arenas. */
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\"arenas\": {\n");
+	}
+
+	CTL_GET("arenas.narenas", &uv, unsigned);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"narenas\": %u,\n", uv);
+	} else
+		malloc_cprintf(write_cb, cbopaque, "Arenas: %u\n", uv);
+
+	CTL_GET("arenas.lg_dirty_mult", &ssv, ssize_t);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"lg_dirty_mult\": %zd,\n", ssv);
+	} else if (opt_purge == purge_mode_ratio) {
+		if (ssv >= 0) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "Min active:dirty page ratio per arena: "
+			    "%u:1\n", (1U << ssv));
+		} else {
+			malloc_cprintf(write_cb, cbopaque,
+			    "Min active:dirty page ratio per arena: "
+			    "N/A\n");
+		}
+	}
+	CTL_GET("arenas.decay_time", &ssv, ssize_t);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"decay_time\": %zd,\n", ssv);
+	} else if (opt_purge == purge_mode_decay) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "Unused dirty page decay time: %zd%s\n",
+		    ssv, (ssv < 0) ? " (no decay)" : "");
+	}
+
+	CTL_GET("arenas.quantum", &sv, size_t);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"quantum\": %zu,\n", sv);
+	} else
+		malloc_cprintf(write_cb, cbopaque, "Quantum size: %zu\n", sv);
+
+	CTL_GET("arenas.page", &sv, size_t);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"page\": %zu,\n", sv);
+	} else
+		malloc_cprintf(write_cb, cbopaque, "Page size: %zu\n", sv);
+
+	if (je_mallctl("arenas.tcache_max", (void *)&sv, &ssz, NULL, 0) == 0) {
+		if (json) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\"tcache_max\": %zu,\n", sv);
+		} else {
+			malloc_cprintf(write_cb, cbopaque,
+			    "Maximum thread-cached size class: %zu\n", sv);
+		}
+	}
+
+	if (json) {
+		unsigned nbins, nlruns, nhchunks, i;
+
+		CTL_GET("arenas.nbins", &nbins, unsigned);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"nbins\": %u,\n", nbins);
+
+		CTL_GET("arenas.nhbins", &uv, unsigned);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"nhbins\": %u,\n", uv);
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"bin\": [\n");
+		for (i = 0; i < nbins; i++) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t{\n");
+
+			CTL_M2_GET("arenas.bin.0.size", i, &sv, size_t);
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t\t\"size\": %zu,\n", sv);
+
+			CTL_M2_GET("arenas.bin.0.nregs", i, &u32v, uint32_t);
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t\t\"nregs\": %"FMTu32",\n", u32v);
+
+			CTL_M2_GET("arenas.bin.0.run_size", i, &sv, size_t);
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t\t\"run_size\": %zu\n", sv);
+
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t}%s\n", (i + 1 < nbins) ? "," : "");
+		}
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t],\n");
+
+		CTL_GET("arenas.nlruns", &nlruns, unsigned);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"nlruns\": %u,\n", nlruns);
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"lrun\": [\n");
+		for (i = 0; i < nlruns; i++) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t{\n");
+
+			CTL_M2_GET("arenas.lrun.0.size", i, &sv, size_t);
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t\t\"size\": %zu\n", sv);
+
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t}%s\n", (i + 1 < nlruns) ? "," : "");
+		}
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t],\n");
+
+		CTL_GET("arenas.nhchunks", &nhchunks, unsigned);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"nhchunks\": %u,\n", nhchunks);
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"hchunk\": [\n");
+		for (i = 0; i < nhchunks; i++) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t{\n");
+
+			CTL_M2_GET("arenas.hchunk.0.size", i, &sv, size_t);
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t\t\"size\": %zu\n", sv);
+
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t}%s\n", (i + 1 < nhchunks) ? "," : "");
+		}
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t]\n");
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t},\n");
+	}
+
+	/* prof. */
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\"prof\": {\n");
+
+		CTL_GET("prof.thread_active_init", &bv, bool);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"thread_active_init\": %s,\n", bv ? "true" :
+		    "false");
+
+		CTL_GET("prof.active", &bv, bool);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"active\": %s,\n", bv ? "true" : "false");
+
+		CTL_GET("prof.gdump", &bv, bool);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"gdump\": %s,\n", bv ? "true" : "false");
+
+		CTL_GET("prof.interval", &u64v, uint64_t);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"interval\": %"FMTu64",\n", u64v);
+
+		CTL_GET("prof.lg_sample", &ssv, ssize_t);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"lg_sample\": %zd\n", ssv);
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t}%s\n", (config_stats || merged || unmerged) ? "," :
+		    "");
+	}
+}
+
+static void
+stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
+    bool json, bool merged, bool unmerged, bool bins, bool large, bool huge)
+{
+	size_t *cactive;
+	size_t allocated, active, metadata, resident, mapped, retained;
+
+	CTL_GET("stats.cactive", &cactive, size_t *);
+	CTL_GET("stats.allocated", &allocated, size_t);
+	CTL_GET("stats.active", &active, size_t);
+	CTL_GET("stats.metadata", &metadata, size_t);
+	CTL_GET("stats.resident", &resident, size_t);
+	CTL_GET("stats.mapped", &mapped, size_t);
+	CTL_GET("stats.retained", &retained, size_t);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\"stats\": {\n");
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"cactive\": %zu,\n", atomic_read_z(cactive));
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"allocated\": %zu,\n", allocated);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"active\": %zu,\n", active);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"metadata\": %zu,\n", metadata);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"resident\": %zu,\n", resident);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"mapped\": %zu,\n", mapped);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"retained\": %zu\n", retained);
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t}%s\n", (merged || unmerged) ? "," : "");
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "Allocated: %zu, active: %zu, metadata: %zu,"
+		    " resident: %zu, mapped: %zu, retained: %zu\n",
+		    allocated, active, metadata, resident, mapped, retained);
+		malloc_cprintf(write_cb, cbopaque,
+		    "Current active ceiling: %zu\n",
+		    atomic_read_z(cactive));
+	}
+
+	if (merged || unmerged) {
+		unsigned narenas;
+
+		if (json) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\"stats.arenas\": {\n");
+		}
+
+		CTL_GET("arenas.narenas", &narenas, unsigned);
+		{
+			VARIABLE_ARRAY(bool, initialized, narenas);
+			size_t isz;
+			unsigned i, j, ninitialized;
+
+			isz = sizeof(bool) * narenas;
+			xmallctl("arenas.initialized", (void *)initialized,
+			    &isz, NULL, 0);
+			for (i = ninitialized = 0; i < narenas; i++) {
+				if (initialized[i])
+					ninitialized++;
+			}
+
+			/* Merged stats. */
+			if (merged && (ninitialized > 1 || !unmerged)) {
+				/* Print merged arena stats. */
+				if (json) {
+					malloc_cprintf(write_cb, cbopaque,
+					    "\t\t\t\"merged\": {\n");
+				} else {
+					malloc_cprintf(write_cb, cbopaque,
+					    "\nMerged arenas stats:\n");
+				}
+				stats_arena_print(write_cb, cbopaque, json,
+				    narenas, bins, large, huge);
+				if (json) {
+					malloc_cprintf(write_cb, cbopaque,
+					    "\t\t\t}%s\n", (ninitialized > 1) ?
+					    "," : "");
+				}
+			}
+
+			/* Unmerged stats. */
+			for (i = j = 0; i < narenas; i++) {
+				if (initialized[i]) {
+					if (json) {
+						j++;
+						malloc_cprintf(write_cb,
+						    cbopaque,
+						    "\t\t\t\"%u\": {\n", i);
+					} else {
+						malloc_cprintf(write_cb,
+						    cbopaque, "\narenas[%u]:\n",
+						    i);
+					}
+					stats_arena_print(write_cb, cbopaque,
+					    json, i, bins, large, huge);
+					if (json) {
+						malloc_cprintf(write_cb,
+						    cbopaque,
+						    "\t\t\t}%s\n", (j <
+						    ninitialized) ? "," : "");
+					}
+				}
+			}
+		}
+
+		if (json) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t}\n");
+		}
+	}
 }
 
 void
@@ -375,6 +1067,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	int err;
 	uint64_t epoch;
 	size_t u64sz;
+	bool json = false;
 	bool general = true;
 	bool merged = true;
 	bool unmerged = true;
@@ -408,6 +1101,9 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 
 		for (i = 0; opts[i] != '\0'; i++) {
 			switch (opts[i]) {
+			case 'J':
+				json = true;
+				break;
 			case 'g':
 				general = false;
 				break;
@@ -431,246 +1127,27 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		}
 	}
 
-	malloc_cprintf(write_cb, cbopaque,
-	    "___ Begin jemalloc statistics ___\n");
-	if (general) {
-		const char *cpv;
-		bool bv;
-		unsigned uv;
-		ssize_t ssv;
-		size_t sv, bsz, usz, ssz, sssz, cpsz;
-
-		bsz = sizeof(bool);
-		usz = sizeof(unsigned);
-		ssz = sizeof(size_t);
-		sssz = sizeof(ssize_t);
-		cpsz = sizeof(const char *);
-
-		CTL_GET("version", &cpv, const char *);
-		malloc_cprintf(write_cb, cbopaque, "Version: %s\n", cpv);
-		CTL_GET("config.debug", &bv, bool);
-		malloc_cprintf(write_cb, cbopaque, "Assertions %s\n",
-		    bv ? "enabled" : "disabled");
+	if (json) {
 		malloc_cprintf(write_cb, cbopaque,
-		    "config.malloc_conf: \"%s\"\n", config_malloc_conf);
-
-#define	OPT_WRITE_BOOL(n)						\
-		if (je_mallctl("opt."#n, &bv, &bsz, NULL, 0) == 0) {	\
-			malloc_cprintf(write_cb, cbopaque,		\
-			    "  opt."#n": %s\n", bv ? "true" : "false");	\
-		}
-#define	OPT_WRITE_BOOL_MUTABLE(n, m) {					\
-		bool bv2;						\
-		if (je_mallctl("opt."#n, &bv, &bsz, NULL, 0) == 0 &&	\
-		    je_mallctl(#m, &bv2, &bsz, NULL, 0) == 0) {		\
-			malloc_cprintf(write_cb, cbopaque,		\
-			    "  opt."#n": %s ("#m": %s)\n", bv ? "true"	\
-			    : "false", bv2 ? "true" : "false");		\
-		}							\
-}
-#define	OPT_WRITE_UNSIGNED(n)						\
-		if (je_mallctl("opt."#n, &uv, &usz, NULL, 0) == 0) {	\
-			malloc_cprintf(write_cb, cbopaque,		\
-			"  opt."#n": %u\n", uv);			\
-		}
-#define	OPT_WRITE_SIZE_T(n)						\
-		if (je_mallctl("opt."#n, &sv, &ssz, NULL, 0) == 0) {	\
-			malloc_cprintf(write_cb, cbopaque,		\
-			"  opt."#n": %zu\n", sv);			\
-		}
-#define	OPT_WRITE_SSIZE_T(n)						\
-		if (je_mallctl("opt."#n, &ssv, &sssz, NULL, 0) == 0) {	\
-			malloc_cprintf(write_cb, cbopaque,		\
-			    "  opt."#n": %zd\n", ssv);			\
-		}
-#define	OPT_WRITE_SSIZE_T_MUTABLE(n, m) {				\
-		ssize_t ssv2;						\
-		if (je_mallctl("opt."#n, &ssv, &sssz, NULL, 0) == 0 &&	\
-		    je_mallctl(#m, &ssv2, &sssz, NULL, 0) == 0) {	\
-			malloc_cprintf(write_cb, cbopaque,		\
-			    "  opt."#n": %zd ("#m": %zd)\n",		\
-			    ssv, ssv2);					\
-		}							\
-}
-#define	OPT_WRITE_CHAR_P(n)						\
-		if (je_mallctl("opt."#n, &cpv, &cpsz, NULL, 0) == 0) {	\
-			malloc_cprintf(write_cb, cbopaque,		\
-			    "  opt."#n": \"%s\"\n", cpv);		\
-		}
-
+		    "{\n"
+		    "\t\"jemalloc\": {\n");
+	} else {
 		malloc_cprintf(write_cb, cbopaque,
-		    "Run-time option settings:\n");
-		OPT_WRITE_BOOL(abort)
-		OPT_WRITE_SIZE_T(lg_chunk)
-		OPT_WRITE_CHAR_P(dss)
-		OPT_WRITE_UNSIGNED(narenas)
-		OPT_WRITE_CHAR_P(purge)
-		if (opt_purge == purge_mode_ratio) {
-			OPT_WRITE_SSIZE_T_MUTABLE(lg_dirty_mult,
-			    arenas.lg_dirty_mult)
-		}
-		if (opt_purge == purge_mode_decay)
-			OPT_WRITE_SSIZE_T_MUTABLE(decay_time, arenas.decay_time)
-		OPT_WRITE_BOOL(stats_print)
-		OPT_WRITE_CHAR_P(junk)
-		OPT_WRITE_SIZE_T(quarantine)
-		OPT_WRITE_BOOL(redzone)
-		OPT_WRITE_BOOL(zero)
-		OPT_WRITE_BOOL(utrace)
-		OPT_WRITE_BOOL(valgrind)
-		OPT_WRITE_BOOL(xmalloc)
-		OPT_WRITE_BOOL(tcache)
-		OPT_WRITE_SSIZE_T(lg_tcache_max)
-		OPT_WRITE_BOOL(prof)
-		OPT_WRITE_CHAR_P(prof_prefix)
-		OPT_WRITE_BOOL_MUTABLE(prof_active, prof.active)
-		OPT_WRITE_BOOL_MUTABLE(prof_thread_active_init,
-		    prof.thread_active_init)
-		OPT_WRITE_SSIZE_T(lg_prof_sample)
-		OPT_WRITE_BOOL(prof_accum)
-		OPT_WRITE_SSIZE_T(lg_prof_interval)
-		OPT_WRITE_BOOL(prof_gdump)
-		OPT_WRITE_BOOL(prof_final)
-		OPT_WRITE_BOOL(prof_leak)
-
-#undef OPT_WRITE_BOOL
-#undef OPT_WRITE_BOOL_MUTABLE
-#undef OPT_WRITE_SIZE_T
-#undef OPT_WRITE_SSIZE_T
-#undef OPT_WRITE_CHAR_P
-
-		malloc_cprintf(write_cb, cbopaque, "CPUs: %u\n", ncpus);
-
-		CTL_GET("arenas.narenas", &uv, unsigned);
-		malloc_cprintf(write_cb, cbopaque, "Arenas: %u\n", uv);
-
-		malloc_cprintf(write_cb, cbopaque, "Pointer size: %zu\n",
-		    sizeof(void *));
-
-		CTL_GET("arenas.quantum", &sv, size_t);
-		malloc_cprintf(write_cb, cbopaque, "Quantum size: %zu\n",
-		    sv);
-
-		CTL_GET("arenas.page", &sv, size_t);
-		malloc_cprintf(write_cb, cbopaque, "Page size: %zu\n", sv);
-
-		CTL_GET("arenas.lg_dirty_mult", &ssv, ssize_t);
-		if (opt_purge == purge_mode_ratio) {
-			if (ssv >= 0) {
-				malloc_cprintf(write_cb, cbopaque,
-				    "Min active:dirty page ratio per arena: "
-				    "%u:1\n", (1U << ssv));
-			} else {
-				malloc_cprintf(write_cb, cbopaque,
-				    "Min active:dirty page ratio per arena: "
-				    "N/A\n");
-			}
-		}
-		CTL_GET("arenas.decay_time", &ssv, ssize_t);
-		if (opt_purge == purge_mode_decay) {
-			malloc_cprintf(write_cb, cbopaque,
-			    "Unused dirty page decay time: %zd%s\n",
-			    ssv, (ssv < 0) ? " (no decay)" : "");
-		}
-		if (je_mallctl("arenas.tcache_max", &sv, &ssz, NULL, 0) == 0) {
-			malloc_cprintf(write_cb, cbopaque,
-			    "Maximum thread-cached size class: %zu\n", sv);
-		}
-		if (je_mallctl("opt.prof", &bv, &bsz, NULL, 0) == 0 && bv) {
-			CTL_GET("prof.lg_sample", &sv, size_t);
-			malloc_cprintf(write_cb, cbopaque,
-			    "Average profile sample interval: %"FMTu64
-			    " (2^%zu)\n", (((uint64_t)1U) << sv), sv);
-
-			CTL_GET("opt.lg_prof_interval", &ssv, ssize_t);
-			if (ssv >= 0) {
-				malloc_cprintf(write_cb, cbopaque,
-				    "Average profile dump interval: %"FMTu64
-				    " (2^%zd)\n",
-				    (((uint64_t)1U) << ssv), ssv);
-			} else {
-				malloc_cprintf(write_cb, cbopaque,
-				    "Average profile dump interval: N/A\n");
-			}
-		}
-		CTL_GET("opt.lg_chunk", &sv, size_t);
-		malloc_cprintf(write_cb, cbopaque,
-		    "Chunk size: %zu (2^%zu)\n", (ZU(1) << sv), sv);
+		    "___ Begin jemalloc statistics ___\n");
 	}
 
+	if (general)
+		stats_general_print(write_cb, cbopaque, json, merged, unmerged);
 	if (config_stats) {
-		size_t *cactive;
-		size_t allocated, active, metadata, resident, mapped, retained;
-
-		CTL_GET("stats.cactive", &cactive, size_t *);
-		CTL_GET("stats.allocated", &allocated, size_t);
-		CTL_GET("stats.active", &active, size_t);
-		CTL_GET("stats.metadata", &metadata, size_t);
-		CTL_GET("stats.resident", &resident, size_t);
-		CTL_GET("stats.mapped", &mapped, size_t);
-		CTL_GET("stats.retained", &retained, size_t);
-		malloc_cprintf(write_cb, cbopaque,
-		    "Allocated: %zu, active: %zu, metadata: %zu,"
-		    " resident: %zu, mapped: %zu, retained: %zu\n",
-		    allocated, active, metadata, resident, mapped, retained);
-		malloc_cprintf(write_cb, cbopaque,
-		    "Current active ceiling: %zu\n",
-		    atomic_read_z(cactive));
-
-		if (merged) {
-			unsigned narenas;
-
-			CTL_GET("arenas.narenas", &narenas, unsigned);
-			{
-				VARIABLE_ARRAY(bool, initialized, narenas);
-				size_t isz;
-				unsigned i, ninitialized;
-
-				isz = sizeof(bool) * narenas;
-				xmallctl("arenas.initialized", initialized,
-				    &isz, NULL, 0);
-				for (i = ninitialized = 0; i < narenas; i++) {
-					if (initialized[i])
-						ninitialized++;
-				}
-
-				if (ninitialized > 1 || !unmerged) {
-					/* Print merged arena stats. */
-					malloc_cprintf(write_cb, cbopaque,
-					    "\nMerged arenas stats:\n");
-					stats_arena_print(write_cb, cbopaque,
-					    narenas, bins, large, huge);
-				}
-			}
-		}
-
-		if (unmerged) {
-			unsigned narenas;
-
-			/* Print stats for each arena. */
-
-			CTL_GET("arenas.narenas", &narenas, unsigned);
-			{
-				VARIABLE_ARRAY(bool, initialized, narenas);
-				size_t isz;
-				unsigned i;
-
-				isz = sizeof(bool) * narenas;
-				xmallctl("arenas.initialized", initialized,
-				    &isz, NULL, 0);
-
-				for (i = 0; i < narenas; i++) {
-					if (initialized[i]) {
-						malloc_cprintf(write_cb,
-						    cbopaque,
-						    "\narenas[%u]:\n", i);
-						stats_arena_print(write_cb,
-						    cbopaque, i, bins, large,
-						    huge);
-					}
-				}
-			}
-		}
+		stats_print_helper(write_cb, cbopaque, json, merged, unmerged,
+		    bins, large, huge);
+	}
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t}\n"
+		    "}\n");
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "--- End jemalloc statistics ---\n");
 	}
-	malloc_cprintf(write_cb, cbopaque, "--- End jemalloc statistics ---\n");
 }

From f19bedb04cd3c2f603569ca9a40c8c66b05c3a90 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 1 Nov 2016 13:25:42 -0700
Subject: [PATCH 0458/2608] Use <quote>...</quote> rather than
 &ldquo;...&rdquo; or "..." in XML.

---
 doc/jemalloc.xml.in | 39 ++++++++++++++++++++-------------------
 doc/stylesheet.xsl  |  2 +-
 2 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 30b2bdf8..6da59f43 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -375,7 +375,7 @@
       <para>The <function>mallctlnametomib()</function> function
       provides a way to avoid repeated name lookups for applications that
       repeatedly query the same portion of the namespace, by translating a name
-      to a &ldquo;Management Information Base&rdquo; (MIB) that can be passed
+      to a <quote>Management Information Base</quote> (MIB) that can be passed
       repeatedly to <function>mallctlbymib()</function>.  Upon
       successful return from <function>mallctlnametomib()</function>,
       <parameter>mibp</parameter> contains an array of
@@ -456,7 +456,7 @@ for (i = 0; i < nbins; i++) {
 
     <para>The string specified via <option>--with-malloc-conf</option>, the
     string pointed to by the global variable <varname>malloc_conf</varname>, the
-    &ldquo;name&rdquo; of the file referenced by the symbolic link named
+    <quote>name</quote> of the file referenced by the symbolic link named
     <filename class="symlink">/etc/malloc.conf</filename>, and the value of the
     environment variable <envar>MALLOC_CONF</envar>, will be interpreted, in
     that order, from left to right as options.  Note that
@@ -918,12 +918,12 @@ for (i = 0; i < nbins; i++) {
         settings are supported if
         <citerefentry><refentrytitle>sbrk</refentrytitle>
         <manvolnum>2</manvolnum></citerefentry> is supported by the operating
-        system: &ldquo;disabled&rdquo;, &ldquo;primary&rdquo;, and
-        &ldquo;secondary&rdquo;; otherwise only &ldquo;disabled&rdquo; is
-        supported.  The default is &ldquo;secondary&rdquo; if
+        system: <quote>disabled</quote>, <quote>primary</quote>, and
+        <quote>secondary</quote>; otherwise only <quote>disabled</quote> is
+        supported.  The default is <quote>secondary</quote> if
         <citerefentry><refentrytitle>sbrk</refentrytitle>
         <manvolnum>2</manvolnum></citerefentry> is supported by the operating
-        system; &ldquo;disabled&rdquo; otherwise.
+        system; <quote>disabled</quote> otherwise.
         </para></listitem>
       </varlistentry>
 
@@ -1039,15 +1039,16 @@ for (i = 0; i < nbins; i++) {
           <literal>r-</literal>
           [<option>--enable-fill</option>]
         </term>
-        <listitem><para>Junk filling.  If set to "alloc", each byte of
-        uninitialized allocated memory will be initialized to
-        <literal>0xa5</literal>.  If set to "free", all deallocated memory will
-        be initialized to <literal>0x5a</literal>.  If set to "true", both
-        allocated and deallocated memory will be initialized, and if set to
-        "false", junk filling be disabled entirely.  This is intended for
-        debugging and will impact performance negatively.  This option is
-        "false" by default unless <option>--enable-debug</option> is specified
-        during configuration, in which case it is "true" by default unless
+        <listitem><para>Junk filling.  If set to <quote>alloc</quote>, each byte
+        of uninitialized allocated memory will be initialized to
+        <literal>0xa5</literal>.  If set to <quote>free</quote>, all deallocated
+        memory will be initialized to <literal>0x5a</literal>.  If set to
+        <quote>true</quote>, both allocated and deallocated memory will be
+        initialized, and if set to <quote>false</quote>, junk filling be
+        disabled entirely.  This is intended for debugging and will impact
+        performance negatively.  This option is <quote>false</quote> by default
+        unless <option>--enable-debug</option> is specified during
+        configuration, in which case it is <quote>true</quote> by default unless
         running inside <ulink
         url="http://valgrind.org/">Valgrind</ulink>.</para></listitem>
       </varlistentry>
@@ -2735,7 +2736,7 @@ MAPPED_LIBRARIES:
     of run-time assertions that catch application errors such as double-free,
     write-after-free, etc.</para>
 
-    <para>Programs often accidentally depend on &ldquo;uninitialized&rdquo;
+    <para>Programs often accidentally depend on <quote>uninitialized</quote>
     memory actually being filled with zero bytes.  Junk filling
     (see the <link linkend="opt.junk"><mallctl>opt.junk</mallctl></link>
     option) tends to expose such bugs in the form of obviously incorrect
@@ -2772,7 +2773,7 @@ MAPPED_LIBRARIES:
     this function is likely to result in a crash or deadlock.</para>
 
     <para>All messages are prefixed by
-    &ldquo;<computeroutput>&lt;jemalloc&gt;: </computeroutput>&rdquo;.</para>
+    <quote><computeroutput>&lt;jemalloc&gt;: </computeroutput></quote>.</para>
   </refsect1>
   <refsect1 id="return_values">
     <title>RETURN VALUES</title>
@@ -2957,9 +2958,9 @@ malloc_conf = "lg_chunk:24";]]></programlisting></para>
     <function>calloc()</function>,
     <function>realloc()</function>, and
     <function>free()</function> functions conform to ISO/IEC
-    9899:1990 (&ldquo;ISO C90&rdquo;).</para>
+    9899:1990 (<quote>ISO C90</quote>).</para>
 
     <para>The <function>posix_memalign()</function> function conforms
-    to IEEE Std 1003.1-2001 (&ldquo;POSIX.1&rdquo;).</para>
+    to IEEE Std 1003.1-2001 (<quote>POSIX.1</quote>).</para>
   </refsect1>
 </refentry>
diff --git a/doc/stylesheet.xsl b/doc/stylesheet.xsl
index bc8bc2a9..619365d8 100644
--- a/doc/stylesheet.xsl
+++ b/doc/stylesheet.xsl
@@ -5,6 +5,6 @@
     <xsl:call-template name="inline.monoseq"/>
   </xsl:template>
   <xsl:template match="mallctl">
-    "<xsl:call-template name="inline.monoseq"/>"
+    <quote><xsl:call-template name="inline.monoseq"/></quote>
   </xsl:template>
 </xsl:stylesheet>

From 07ee4c5ff4c60e8e04eb452e2ef154c47fa118a4 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 2 Nov 2016 08:54:07 -0700
Subject: [PATCH 0459/2608] Force no lazy-lock on Windows.

Monitoring thread creation is unimplemented for Windows, which means
lazy-lock cannot function correctly.

This resolves #310.
---
 configure.ac | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/configure.ac b/configure.ac
index 40681d16..4bdd66aa 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1434,9 +1434,17 @@ fi
 ],
 [enable_lazy_lock=""]
 )
-if test "x$enable_lazy_lock" = "x" -a "x${force_lazy_lock}" = "x1" ; then
-  AC_MSG_RESULT([Forcing lazy-lock to avoid allocator/threading bootstrap issues])
-  enable_lazy_lock="1"
+if test "x${enable_lazy_lock}" = "x" ; then
+  if test "x${force_lazy_lock}" = "x1" ; then
+    AC_MSG_RESULT([Forcing lazy-lock to avoid allocator/threading bootstrap issues])
+    enable_lazy_lock="1"
+  else
+    enable_lazy_lock="0"
+  fi
+fi
+if test "x${enable_lazy_lock}" = "x1" -a "x${abi}" = "xpecoff" ; then
+  AC_MSG_RESULT([Forcing no lazy-lock because thread creation monitoring is unimplemented])
+  enable_lazy_lock="0"
 fi
 if test "x$enable_lazy_lock" = "x1" ; then
   if test "x$abi" != "xpecoff" ; then
@@ -1447,8 +1455,6 @@ if test "x$enable_lazy_lock" = "x1" ; then
       ])
   fi
   AC_DEFINE([JEMALLOC_LAZY_LOCK], [ ])
-else
-  enable_lazy_lock="0"
 fi
 AC_SUBST([enable_lazy_lock])
 

From eee1ca655e348b0602c96b702332c28d53869fff Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 2 Nov 2016 08:54:07 -0700
Subject: [PATCH 0460/2608] Force no lazy-lock on Windows.

Monitoring thread creation is unimplemented for Windows, which means
lazy-lock cannot function correctly.

This resolves #310.
---
 configure.ac | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/configure.ac b/configure.ac
index b2616b9a..0d10143c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1404,9 +1404,17 @@ fi
 ],
 [enable_lazy_lock=""]
 )
-if test "x$enable_lazy_lock" = "x" -a "x${force_lazy_lock}" = "x1" ; then
-  AC_MSG_RESULT([Forcing lazy-lock to avoid allocator/threading bootstrap issues])
-  enable_lazy_lock="1"
+if test "x${enable_lazy_lock}" = "x" ; then
+  if test "x${force_lazy_lock}" = "x1" ; then
+    AC_MSG_RESULT([Forcing lazy-lock to avoid allocator/threading bootstrap issues])
+    enable_lazy_lock="1"
+  else
+    enable_lazy_lock="0"
+  fi
+fi
+if test "x${enable_lazy_lock}" = "x1" -a "x${abi}" = "xpecoff" ; then
+  AC_MSG_RESULT([Forcing no lazy-lock because thread creation monitoring is unimplemented])
+  enable_lazy_lock="0"
 fi
 if test "x$enable_lazy_lock" = "x1" ; then
   if test "x$abi" != "xpecoff" ; then
@@ -1417,8 +1425,6 @@ if test "x$enable_lazy_lock" = "x1" ; then
       ])
   fi
   AC_DEFINE([JEMALLOC_LAZY_LOCK], [ ])
-else
-  enable_lazy_lock="0"
 fi
 AC_SUBST([enable_lazy_lock])
 

From b54072dfeece6d25aa7c6b1a503ae350bfe6e1a0 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 2 Nov 2016 18:05:19 -0700
Subject: [PATCH 0461/2608] Call _exit(2) rather than exit(3) in forked child.

_exit(2) is async-signal-safe, whereas exit(3) is not.
---
 test/unit/fork.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/unit/fork.c b/test/unit/fork.c
index 46c815ef..c530797c 100644
--- a/test/unit/fork.c
+++ b/test/unit/fork.c
@@ -26,7 +26,7 @@ TEST_BEGIN(test_fork)
 		test_fail("Unexpected fork() failure");
 	} else if (pid == 0) {
 		/* Child. */
-		exit(0);
+		_exit(0);
 	} else {
 		int status;
 

From d9f7b2a4307f7ff9f7a139b33d366d44e8a8b83d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 2 Nov 2016 18:06:40 -0700
Subject: [PATCH 0462/2608] Fix/refactor zone allocator integration code.

Fix zone_force_unlock() to reinitialize, rather than unlocking mutexes,
since OS X 10.12 cannot tolerate a child unlocking mutexes that were
locked by its parent.

Refactor; this was a side effect of experimenting with zone
{de,re}registration during fork(2).
---
 include/jemalloc/internal/private_symbols.txt |   2 +-
 src/zone.c                                    | 191 ++++++++++--------
 2 files changed, 108 insertions(+), 85 deletions(-)

diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 315d2872..29936a87 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -383,7 +383,6 @@ psz2ind
 psz2ind_clamp
 psz2ind_impl
 psz2u
-register_zone
 rtree_child_read
 rtree_child_read_hard
 rtree_child_tryread
@@ -537,3 +536,4 @@ witness_postfork_parent
 witness_prefork
 witness_unlock
 witnesses_cleanup
+zone_register
diff --git a/src/zone.c b/src/zone.c
index 52d07f30..66ba02b9 100644
--- a/src/zone.c
+++ b/src/zone.c
@@ -4,7 +4,7 @@
 #endif
 
 /*
- * The malloc_default_purgeable_zone function is only available on >= 10.6.
+ * The malloc_default_purgeable_zone() function is only available on >= 10.6.
  * We need to check whether it is present at runtime, thus the weak_import.
  */
 extern malloc_zone_t *malloc_default_purgeable_zone(void)
@@ -13,8 +13,9 @@ JEMALLOC_ATTR(weak_import);
 /******************************************************************************/
 /* Data. */
 
-static malloc_zone_t zone;
-static struct malloc_introspection_t zone_introspect;
+static malloc_zone_t *default_zone, *purgeable_zone;
+static malloc_zone_t jemalloc_zone;
+static struct malloc_introspection_t jemalloc_zone_introspect;
 
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
@@ -164,12 +165,68 @@ static void
 zone_force_unlock(malloc_zone_t *zone)
 {
 
+	/*
+	 * Call jemalloc_postfork_child() rather than
+	 * jemalloc_postfork_parent(), because this function is executed by both
+	 * parent and child.  The parent can tolerate having state
+	 * reinitialized, but the child cannot unlock mutexes that were locked
+	 * by the parent.
+	 */
 	if (isthreaded)
-		jemalloc_postfork_parent();
+		jemalloc_postfork_child();
+}
+
+static void
+zone_init(void)
+{
+
+	jemalloc_zone.size = (void *)zone_size;
+	jemalloc_zone.malloc = (void *)zone_malloc;
+	jemalloc_zone.calloc = (void *)zone_calloc;
+	jemalloc_zone.valloc = (void *)zone_valloc;
+	jemalloc_zone.free = (void *)zone_free;
+	jemalloc_zone.realloc = (void *)zone_realloc;
+	jemalloc_zone.destroy = (void *)zone_destroy;
+	jemalloc_zone.zone_name = "jemalloc_zone";
+	jemalloc_zone.batch_malloc = NULL;
+	jemalloc_zone.batch_free = NULL;
+	jemalloc_zone.introspect = &jemalloc_zone_introspect;
+	jemalloc_zone.version = JEMALLOC_ZONE_VERSION;
+#if (JEMALLOC_ZONE_VERSION >= 5)
+	jemalloc_zone.memalign = zone_memalign;
+#endif
+#if (JEMALLOC_ZONE_VERSION >= 6)
+	jemalloc_zone.free_definite_size = zone_free_definite_size;
+#endif
+#if (JEMALLOC_ZONE_VERSION >= 8)
+	jemalloc_zone.pressure_relief = NULL;
+#endif
+
+	jemalloc_zone_introspect.enumerator = NULL;
+	jemalloc_zone_introspect.good_size = (void *)zone_good_size;
+	jemalloc_zone_introspect.check = NULL;
+	jemalloc_zone_introspect.print = NULL;
+	jemalloc_zone_introspect.log = NULL;
+	jemalloc_zone_introspect.force_lock = (void *)zone_force_lock;
+	jemalloc_zone_introspect.force_unlock = (void *)zone_force_unlock;
+	jemalloc_zone_introspect.statistics = NULL;
+#if (JEMALLOC_ZONE_VERSION >= 6)
+	jemalloc_zone_introspect.zone_locked = NULL;
+#endif
+#if (JEMALLOC_ZONE_VERSION >= 7)
+	jemalloc_zone_introspect.enable_discharge_checking = NULL;
+	jemalloc_zone_introspect.disable_discharge_checking = NULL;
+	jemalloc_zone_introspect.discharge = NULL;
+#  ifdef __BLOCKS__
+	jemalloc_zone_introspect.enumerate_discharged_pointers = NULL;
+#  else
+	jemalloc_zone_introspect.enumerate_unavailable_without_blocks = NULL;
+#  endif
+#endif
 }
 
 static malloc_zone_t *
-get_default_zone(void)
+zone_default_get(void)
 {
 	malloc_zone_t **zones = NULL;
 	unsigned int num_zones = 0;
@@ -183,7 +240,7 @@ get_default_zone(void)
 	 * zone is the default.  So get the list of zones to get the first one,
 	 * instead of relying on malloc_default_zone.
 	 */
-        if (KERN_SUCCESS != malloc_get_all_zones(0, NULL,
+	if (KERN_SUCCESS != malloc_get_all_zones(0, NULL,
 	    (vm_address_t**)&zones, &num_zones)) {
 		/*
 		 * Reset the value in case the failure happened after it was
@@ -198,82 +255,11 @@ get_default_zone(void)
 	return (malloc_default_zone());
 }
 
-JEMALLOC_ATTR(constructor)
-void
-register_zone(void)
+/* As written, this function can only promote jemalloc_zone. */
+static void
+zone_promote(void)
 {
-
-	/*
-	 * If something else replaced the system default zone allocator, don't
-	 * register jemalloc's.
-	 */
-	malloc_zone_t *default_zone = get_default_zone();
-	malloc_zone_t *purgeable_zone = NULL;
-	if (!default_zone->zone_name ||
-	    strcmp(default_zone->zone_name, "DefaultMallocZone") != 0) {
-		return;
-	}
-
-	zone.size = (void *)zone_size;
-	zone.malloc = (void *)zone_malloc;
-	zone.calloc = (void *)zone_calloc;
-	zone.valloc = (void *)zone_valloc;
-	zone.free = (void *)zone_free;
-	zone.realloc = (void *)zone_realloc;
-	zone.destroy = (void *)zone_destroy;
-	zone.zone_name = "jemalloc_zone";
-	zone.batch_malloc = NULL;
-	zone.batch_free = NULL;
-	zone.introspect = &zone_introspect;
-	zone.version = JEMALLOC_ZONE_VERSION;
-#if (JEMALLOC_ZONE_VERSION >= 5)
-	zone.memalign = zone_memalign;
-#endif
-#if (JEMALLOC_ZONE_VERSION >= 6)
-	zone.free_definite_size = zone_free_definite_size;
-#endif
-#if (JEMALLOC_ZONE_VERSION >= 8)
-	zone.pressure_relief = NULL;
-#endif
-
-	zone_introspect.enumerator = NULL;
-	zone_introspect.good_size = (void *)zone_good_size;
-	zone_introspect.check = NULL;
-	zone_introspect.print = NULL;
-	zone_introspect.log = NULL;
-	zone_introspect.force_lock = (void *)zone_force_lock;
-	zone_introspect.force_unlock = (void *)zone_force_unlock;
-	zone_introspect.statistics = NULL;
-#if (JEMALLOC_ZONE_VERSION >= 6)
-	zone_introspect.zone_locked = NULL;
-#endif
-#if (JEMALLOC_ZONE_VERSION >= 7)
-	zone_introspect.enable_discharge_checking = NULL;
-	zone_introspect.disable_discharge_checking = NULL;
-	zone_introspect.discharge = NULL;
-#ifdef __BLOCKS__
-	zone_introspect.enumerate_discharged_pointers = NULL;
-#else
-	zone_introspect.enumerate_unavailable_without_blocks = NULL;
-#endif
-#endif
-
-	/*
-	 * The default purgeable zone is created lazily by OSX's libc.  It uses
-	 * the default zone when it is created for "small" allocations
-	 * (< 15 KiB), but assumes the default zone is a scalable_zone.  This
-	 * obviously fails when the default zone is the jemalloc zone, so
-	 * malloc_default_purgeable_zone is called beforehand so that the
-	 * default purgeable zone is created when the default zone is still
-	 * a scalable_zone.  As purgeable zones only exist on >= 10.6, we need
-	 * to check for the existence of malloc_default_purgeable_zone() at
-	 * run time.
-	 */
-	if (malloc_default_purgeable_zone != NULL)
-		purgeable_zone = malloc_default_purgeable_zone();
-
-	/* Register the custom zone.  At this point it won't be the default. */
-	malloc_zone_register(&zone);
+	malloc_zone_t *zone;
 
 	do {
 		/*
@@ -286,6 +272,7 @@ register_zone(void)
 		 */
 		malloc_zone_unregister(default_zone);
 		malloc_zone_register(default_zone);
+
 		/*
 		 * On OSX 10.6, having the default purgeable zone appear before
 		 * the default zone makes some things crash because it thinks it
@@ -297,11 +284,47 @@ register_zone(void)
 		 * above, i.e. the default zone.  Registering it again then puts
 		 * it at the end, obviously after the default zone.
 		 */
-		if (purgeable_zone) {
+		if (purgeable_zone != NULL) {
 			malloc_zone_unregister(purgeable_zone);
 			malloc_zone_register(purgeable_zone);
 		}
 
-		default_zone = get_default_zone();
-	} while (default_zone != &zone);
+		zone = zone_default_get();
+	} while (zone != &jemalloc_zone);
+}
+
+JEMALLOC_ATTR(constructor)
+void
+zone_register(void)
+{
+
+	/*
+	 * If something else replaced the system default zone allocator, don't
+	 * register jemalloc's.
+	 */
+	default_zone = zone_default_get();
+	if (!default_zone->zone_name || strcmp(default_zone->zone_name,
+	    "DefaultMallocZone") != 0)
+		return;
+
+	/*
+	 * The default purgeable zone is created lazily by OSX's libc.  It uses
+	 * the default zone when it is created for "small" allocations
+	 * (< 15 KiB), but assumes the default zone is a scalable_zone.  This
+	 * obviously fails when the default zone is the jemalloc zone, so
+	 * malloc_default_purgeable_zone() is called beforehand so that the
+	 * default purgeable zone is created when the default zone is still
+	 * a scalable_zone.  As purgeable zones only exist on >= 10.6, we need
+	 * to check for the existence of malloc_default_purgeable_zone() at
+	 * run time.
+	 */
+	purgeable_zone = (malloc_default_purgeable_zone == NULL) ? NULL :
+	    malloc_default_purgeable_zone();
+
+	/* Register the custom zone.  At this point it won't be the default. */
+	zone_init();
+	malloc_zone_register(&jemalloc_zone);
+
+	/* Promote the custom zone to be default. */
+	zone_promote();
 }

From 795f6689dec28f161afbf5964ef1b17288dd384d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 2 Nov 2016 18:09:45 -0700
Subject: [PATCH 0463/2608] Add os_unfair_lock support.

OS X 10.12 deprecated OSSpinLock; os_unfair_lock is the recommended
replacement.
---
 configure.ac                                       | 14 ++++++++++++++
 .../jemalloc/internal/jemalloc_internal_decls.h    |  3 +++
 .../jemalloc/internal/jemalloc_internal_defs.h.in  |  5 +++++
 include/jemalloc/internal/mutex.h                  |  9 +++++++++
 src/mutex.c                                        |  2 ++
 test/include/test/mtx.h                            |  2 ++
 test/src/mtx.c                                     |  7 +++++++
 7 files changed, 42 insertions(+)

diff --git a/configure.ac b/configure.ac
index 0d10143c..2713cbc2 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1612,6 +1612,20 @@ if test "x${je_cv_builtin_clz}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_BUILTIN_CLZ], [ ])
 fi
 
+dnl ============================================================================
+dnl Check for os_unfair_lock operations as provided on Darwin.
+
+JE_COMPILABLE([Darwin os_unfair_lock_*()], [
+#include <os/lock.h>
+], [
+	os_unfair_lock lock = OS_UNFAIR_LOCK_INIT;
+	os_unfair_lock_lock(&lock);
+	os_unfair_lock_unlock(&lock);
+], [je_cv_os_unfair_lock])
+if test "x${je_cv_os_unfair_lock}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_OS_UNFAIR_LOCK], [ ])
+fi
+
 dnl ============================================================================
 dnl Check for spinlock(3) operations as provided on Darwin.
 
diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
index 1d7f2075..c907d910 100644
--- a/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -17,6 +17,9 @@
 #    include <sys/uio.h>
 #  endif
 #  include <pthread.h>
+#  ifdef JEMALLOC_OS_UNFAIR_LOCK
+#    include <os/lock.h>
+#  endif
 #  ifdef JEMALLOC_GLIBC_MALLOC_HOOK
 #    include <sched.h>
 #  endif
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 0ba960ba..dcbad728 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -60,6 +60,11 @@
  */
 #undef JEMALLOC_HAVE_MADVISE
 
+/*
+ * Defined if os_unfair_lock_*() functions are available, as provided by Darwin.
+ */
+#undef JEMALLOC_OS_UNFAIR_LOCK
+
 /*
  * Defined if OSSpin*() functions are available, as provided by Darwin, and
  * documented in the spinlock(3) manual page.
diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index b4e01ff8..d5b3693c 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -5,6 +5,9 @@ typedef struct malloc_mutex_s malloc_mutex_t;
 
 #ifdef _WIN32
 #  define MALLOC_MUTEX_INITIALIZER
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+#  define MALLOC_MUTEX_INITIALIZER					\
+     {OS_UNFAIR_LOCK_INIT, WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
 #elif (defined(JEMALLOC_OSSPIN))
 #  define MALLOC_MUTEX_INITIALIZER					\
      {0, WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
@@ -38,6 +41,8 @@ struct malloc_mutex_s {
 #  else
 	CRITICAL_SECTION	lock;
 #  endif
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+	os_unfair_lock		lock;
 #elif (defined(JEMALLOC_OSSPIN))
 	OSSpinLock		lock;
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
@@ -91,6 +96,8 @@ malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex)
 #  else
 		EnterCriticalSection(&mutex->lock);
 #  endif
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+		os_unfair_lock_lock(&mutex->lock);
 #elif (defined(JEMALLOC_OSSPIN))
 		OSSpinLockLock(&mutex->lock);
 #else
@@ -112,6 +119,8 @@ malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex)
 #  else
 		LeaveCriticalSection(&mutex->lock);
 #  endif
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+		os_unfair_lock_unlock(&mutex->lock);
 #elif (defined(JEMALLOC_OSSPIN))
 		OSSpinLockUnlock(&mutex->lock);
 #else
diff --git a/src/mutex.c b/src/mutex.c
index 119b8e35..b757ba86 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -80,6 +80,8 @@ malloc_mutex_init(malloc_mutex_t *mutex, const char *name, witness_rank_t rank)
 	    _CRT_SPINCOUNT))
 		return (true);
 #  endif
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+	mutex->lock = OS_UNFAIR_LOCK_INIT;
 #elif (defined(JEMALLOC_OSSPIN))
 	mutex->lock = 0;
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
diff --git a/test/include/test/mtx.h b/test/include/test/mtx.h
index bbe822f5..58afbc3d 100644
--- a/test/include/test/mtx.h
+++ b/test/include/test/mtx.h
@@ -8,6 +8,8 @@
 typedef struct {
 #ifdef _WIN32
 	CRITICAL_SECTION	lock;
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+	os_unfair_lock		lock;
 #elif (defined(JEMALLOC_OSSPIN))
 	OSSpinLock		lock;
 #else
diff --git a/test/src/mtx.c b/test/src/mtx.c
index 73bd02f6..8a5dfdd9 100644
--- a/test/src/mtx.c
+++ b/test/src/mtx.c
@@ -11,6 +11,8 @@ mtx_init(mtx_t *mtx)
 #ifdef _WIN32
 	if (!InitializeCriticalSectionAndSpinCount(&mtx->lock, _CRT_SPINCOUNT))
 		return (true);
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+	mtx->lock = OS_UNFAIR_LOCK_INIT;
 #elif (defined(JEMALLOC_OSSPIN))
 	mtx->lock = 0;
 #else
@@ -33,6 +35,7 @@ mtx_fini(mtx_t *mtx)
 {
 
 #ifdef _WIN32
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
 #elif (defined(JEMALLOC_OSSPIN))
 #else
 	pthread_mutex_destroy(&mtx->lock);
@@ -45,6 +48,8 @@ mtx_lock(mtx_t *mtx)
 
 #ifdef _WIN32
 	EnterCriticalSection(&mtx->lock);
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+	os_unfair_lock_lock(&mtx->lock);
 #elif (defined(JEMALLOC_OSSPIN))
 	OSSpinLockLock(&mtx->lock);
 #else
@@ -58,6 +63,8 @@ mtx_unlock(mtx_t *mtx)
 
 #ifdef _WIN32
 	LeaveCriticalSection(&mtx->lock);
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+	os_unfair_lock_unlock(&mtx->lock);
 #elif (defined(JEMALLOC_OSSPIN))
 	OSSpinLockUnlock(&mtx->lock);
 #else

From d82f2b3473daef45d92d2a83d11487b22f9db4d3 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 2 Nov 2016 19:18:33 -0700
Subject: [PATCH 0464/2608] Do not use syscall(2) on OS X 10.12 (deprecated).

---
 configure.ac                                    | 17 +++++++++++++++++
 .../internal/jemalloc_internal_defs.h.in        |  3 +++
 src/pages.c                                     |  6 +++---
 src/util.c                                      |  2 +-
 4 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/configure.ac b/configure.ac
index 2713cbc2..6fc44c3e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1350,6 +1350,23 @@ if test "x${je_cv_mach_absolute_time}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_MACH_ABSOLUTE_TIME])
 fi
 
+dnl Check if syscall(2) is usable.  Treat warnings as errors, so that e.g. OS X
+dnl 10.12's deprecation warning prevents use.
+SAVED_CFLAGS="${CFLAGS}"
+JE_CFLAGS_APPEND([-Werror])
+JE_COMPILABLE([syscall(2)], [
+#define _GNU_SOURCE
+#include <sys/syscall.h>
+#include <unistd.h>
+], [
+	syscall(SYS_write, 2, "hello", 5);
+],
+              [je_cv_syscall])
+CFLAGS="${SAVED_CFLAGS}"
+if test "x$je_cv_syscall" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_SYSCALL], [ ])
+fi
+
 dnl Check if the GNU-specific secure_getenv function exists.
 AC_CHECK_FUNC([secure_getenv],
               [have_secure_getenv="1"],
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index dcbad728..4d2daea8 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -71,6 +71,9 @@
  */
 #undef JEMALLOC_OSSPIN
 
+/* Defined if syscall(2) is available. */
+#undef JEMALLOC_HAVE_SYSCALL
+
 /*
  * Defined if secure_getenv(3) is available.
  */
diff --git a/src/pages.c b/src/pages.c
index 84e22160..647952ac 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -219,7 +219,7 @@ os_overcommits_proc(void)
 	char buf[1];
 	ssize_t nread;
 
-#ifdef SYS_open
+#if defined(JEMALLOC_HAVE_SYSCALL) && defined(SYS_open)
 	fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY);
 #else
 	fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
@@ -227,13 +227,13 @@ os_overcommits_proc(void)
 	if (fd == -1)
 		return (false); /* Error. */
 
-#ifdef SYS_read
+#if defined(JEMALLOC_HAVE_SYSCALL) && defined(SYS_read)
 	nread = (ssize_t)syscall(SYS_read, fd, &buf, sizeof(buf));
 #else
 	nread = read(fd, &buf, sizeof(buf));
 #endif
 
-#ifdef SYS_close
+#if defined(JEMALLOC_HAVE_SYSCALL) && defined(SYS_close)
 	syscall(SYS_close, fd);
 #else
 	close(fd);
diff --git a/src/util.c b/src/util.c
index 881a7fd1..5b8175bc 100644
--- a/src/util.c
+++ b/src/util.c
@@ -49,7 +49,7 @@ static void
 wrtmessage(void *cbopaque, const char *s)
 {
 
-#ifdef SYS_write
+#if defined(JEMALLOC_HAVE_SYSCALL) && defined(SYS_write)
 	/*
 	 * Use syscall(2) rather than write(2) when possible in order to avoid
 	 * the possibility of memory allocation within libc.  This is necessary

From 31db315f17a48380d11d5dd67dde154adf571573 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 2 Nov 2016 18:05:19 -0700
Subject: [PATCH 0465/2608] Call _exit(2) rather than exit(3) in forked child.

_exit(2) is async-signal-safe, whereas exit(3) is not.
---
 test/unit/fork.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/unit/fork.c b/test/unit/fork.c
index 46c815ef..c530797c 100644
--- a/test/unit/fork.c
+++ b/test/unit/fork.c
@@ -26,7 +26,7 @@ TEST_BEGIN(test_fork)
 		test_fail("Unexpected fork() failure");
 	} else if (pid == 0) {
 		/* Child. */
-		exit(0);
+		_exit(0);
 	} else {
 		int status;
 

From a99e0fa2d21917cbcefd8b7a9a2128ae0399d88f Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 2 Nov 2016 18:06:40 -0700
Subject: [PATCH 0466/2608] Fix/refactor zone allocator integration code.

Fix zone_force_unlock() to reinitialize, rather than unlocking mutexes,
since OS X 10.12 cannot tolerate a child unlocking mutexes that were
locked by its parent.

Refactor; this was a side effect of experimenting with zone
{de,re}registration during fork(2).
---
 include/jemalloc/internal/private_symbols.txt |   2 +-
 src/zone.c                                    | 192 ++++++++++--------
 2 files changed, 108 insertions(+), 86 deletions(-)

diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 09ff8324..17499523 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -461,7 +461,6 @@ quarantine
 quarantine_alloc_hook
 quarantine_alloc_hook_work
 quarantine_cleanup
-register_zone
 rtree_child_read
 rtree_child_read_hard
 rtree_child_tryread
@@ -614,3 +613,4 @@ witness_postfork_parent
 witness_prefork
 witness_unlock
 witnesses_cleanup
+zone_register
diff --git a/src/zone.c b/src/zone.c
index 89a3062c..0571920e 100644
--- a/src/zone.c
+++ b/src/zone.c
@@ -4,7 +4,7 @@
 #endif
 
 /*
- * The malloc_default_purgeable_zone function is only available on >= 10.6.
+ * The malloc_default_purgeable_zone() function is only available on >= 10.6.
  * We need to check whether it is present at runtime, thus the weak_import.
  */
 extern malloc_zone_t *malloc_default_purgeable_zone(void)
@@ -13,8 +13,9 @@ JEMALLOC_ATTR(weak_import);
 /******************************************************************************/
 /* Data. */
 
-static malloc_zone_t zone;
-static struct malloc_introspection_t zone_introspect;
+static malloc_zone_t *default_zone, *purgeable_zone;
+static malloc_zone_t jemalloc_zone;
+static struct malloc_introspection_t jemalloc_zone_introspect;
 
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
@@ -164,12 +165,68 @@ static void
 zone_force_unlock(malloc_zone_t *zone)
 {
 
+	/*
+	 * Call jemalloc_postfork_child() rather than
+	 * jemalloc_postfork_parent(), because this function is executed by both
+	 * parent and child.  The parent can tolerate having state
+	 * reinitialized, but the child cannot unlock mutexes that were locked
+	 * by the parent.
+	 */
 	if (isthreaded)
-		jemalloc_postfork_parent();
+		jemalloc_postfork_child();
+}
+
+static void
+zone_init(void)
+{
+
+	jemalloc_zone.size = (void *)zone_size;
+	jemalloc_zone.malloc = (void *)zone_malloc;
+	jemalloc_zone.calloc = (void *)zone_calloc;
+	jemalloc_zone.valloc = (void *)zone_valloc;
+	jemalloc_zone.free = (void *)zone_free;
+	jemalloc_zone.realloc = (void *)zone_realloc;
+	jemalloc_zone.destroy = (void *)zone_destroy;
+	jemalloc_zone.zone_name = "jemalloc_zone";
+	jemalloc_zone.batch_malloc = NULL;
+	jemalloc_zone.batch_free = NULL;
+	jemalloc_zone.introspect = &jemalloc_zone_introspect;
+	jemalloc_zone.version = JEMALLOC_ZONE_VERSION;
+#if (JEMALLOC_ZONE_VERSION >= 5)
+	jemalloc_zone.memalign = zone_memalign;
+#endif
+#if (JEMALLOC_ZONE_VERSION >= 6)
+	jemalloc_zone.free_definite_size = zone_free_definite_size;
+#endif
+#if (JEMALLOC_ZONE_VERSION >= 8)
+	jemalloc_zone.pressure_relief = NULL;
+#endif
+
+	jemalloc_zone_introspect.enumerator = NULL;
+	jemalloc_zone_introspect.good_size = (void *)zone_good_size;
+	jemalloc_zone_introspect.check = NULL;
+	jemalloc_zone_introspect.print = NULL;
+	jemalloc_zone_introspect.log = NULL;
+	jemalloc_zone_introspect.force_lock = (void *)zone_force_lock;
+	jemalloc_zone_introspect.force_unlock = (void *)zone_force_unlock;
+	jemalloc_zone_introspect.statistics = NULL;
+#if (JEMALLOC_ZONE_VERSION >= 6)
+	jemalloc_zone_introspect.zone_locked = NULL;
+#endif
+#if (JEMALLOC_ZONE_VERSION >= 7)
+	jemalloc_zone_introspect.enable_discharge_checking = NULL;
+	jemalloc_zone_introspect.disable_discharge_checking = NULL;
+	jemalloc_zone_introspect.discharge = NULL;
+#  ifdef __BLOCKS__
+	jemalloc_zone_introspect.enumerate_discharged_pointers = NULL;
+#  else
+	jemalloc_zone_introspect.enumerate_unavailable_without_blocks = NULL;
+#  endif
+#endif
 }
 
 static malloc_zone_t *
-get_default_zone(void)
+zone_default_get(void)
 {
 	malloc_zone_t **zones = NULL;
 	unsigned int num_zones = 0;
@@ -183,7 +240,7 @@ get_default_zone(void)
 	 * zone is the default.  So get the list of zones to get the first one,
 	 * instead of relying on malloc_default_zone.
 	 */
-        if (KERN_SUCCESS != malloc_get_all_zones(0, NULL,
+	if (KERN_SUCCESS != malloc_get_all_zones(0, NULL,
 	    (vm_address_t**)&zones, &num_zones)) {
 		/*
 		 * Reset the value in case the failure happened after it was
@@ -198,85 +255,13 @@ get_default_zone(void)
 	return (malloc_default_zone());
 }
 
-JEMALLOC_ATTR(constructor)
-void
-register_zone(void)
+/* As written, this function can only promote jemalloc_zone. */
+static void
+zone_promote(void)
 {
-
-	/*
-	 * If something else replaced the system default zone allocator, don't
-	 * register jemalloc's.
-	 */
-	malloc_zone_t *default_zone = get_default_zone();
-	malloc_zone_t *purgeable_zone = NULL;
-	if (!default_zone->zone_name ||
-	    strcmp(default_zone->zone_name, "DefaultMallocZone") != 0) {
-		return;
-	}
-
-	zone.size = (void *)zone_size;
-	zone.malloc = (void *)zone_malloc;
-	zone.calloc = (void *)zone_calloc;
-	zone.valloc = (void *)zone_valloc;
-	zone.free = (void *)zone_free;
-	zone.realloc = (void *)zone_realloc;
-	zone.destroy = (void *)zone_destroy;
-	zone.zone_name = "jemalloc_zone";
-	zone.batch_malloc = NULL;
-	zone.batch_free = NULL;
-	zone.introspect = &zone_introspect;
-	zone.version = JEMALLOC_ZONE_VERSION;
-#if (JEMALLOC_ZONE_VERSION >= 5)
-	zone.memalign = zone_memalign;
-#endif
-#if (JEMALLOC_ZONE_VERSION >= 6)
-	zone.free_definite_size = zone_free_definite_size;
-#endif
-#if (JEMALLOC_ZONE_VERSION >= 8)
-	zone.pressure_relief = NULL;
-#endif
-
-	zone_introspect.enumerator = NULL;
-	zone_introspect.good_size = (void *)zone_good_size;
-	zone_introspect.check = NULL;
-	zone_introspect.print = NULL;
-	zone_introspect.log = NULL;
-	zone_introspect.force_lock = (void *)zone_force_lock;
-	zone_introspect.force_unlock = (void *)zone_force_unlock;
-	zone_introspect.statistics = NULL;
-#if (JEMALLOC_ZONE_VERSION >= 6)
-	zone_introspect.zone_locked = NULL;
-#endif
-#if (JEMALLOC_ZONE_VERSION >= 7)
-	zone_introspect.enable_discharge_checking = NULL;
-	zone_introspect.disable_discharge_checking = NULL;
-	zone_introspect.discharge = NULL;
-#ifdef __BLOCKS__
-	zone_introspect.enumerate_discharged_pointers = NULL;
-#else
-	zone_introspect.enumerate_unavailable_without_blocks = NULL;
-#endif
-#endif
-
-	/*
-	 * The default purgeable zone is created lazily by OSX's libc.  It uses
-	 * the default zone when it is created for "small" allocations
-	 * (< 15 KiB), but assumes the default zone is a scalable_zone.  This
-	 * obviously fails when the default zone is the jemalloc zone, so
-	 * malloc_default_purgeable_zone is called beforehand so that the
-	 * default purgeable zone is created when the default zone is still
-	 * a scalable_zone.  As purgeable zones only exist on >= 10.6, we need
-	 * to check for the existence of malloc_default_purgeable_zone() at
-	 * run time.
-	 */
-	if (malloc_default_purgeable_zone != NULL)
-		purgeable_zone = malloc_default_purgeable_zone();
-
-	/* Register the custom zone.  At this point it won't be the default. */
-	malloc_zone_register(&zone);
+	malloc_zone_t *zone;
 
 	do {
-		default_zone = malloc_default_zone();
 		/*
 		 * Unregister and reregister the default zone.  On OSX >= 10.6,
 		 * unregistering takes the last registered zone and places it
@@ -287,6 +272,7 @@ register_zone(void)
 		 */
 		malloc_zone_unregister(default_zone);
 		malloc_zone_register(default_zone);
+
 		/*
 		 * On OSX 10.6, having the default purgeable zone appear before
 		 * the default zone makes some things crash because it thinks it
@@ -298,11 +284,47 @@ register_zone(void)
 		 * above, i.e. the default zone.  Registering it again then puts
 		 * it at the end, obviously after the default zone.
 		 */
-		if (purgeable_zone) {
+		if (purgeable_zone != NULL) {
 			malloc_zone_unregister(purgeable_zone);
 			malloc_zone_register(purgeable_zone);
 		}
 
-		default_zone = get_default_zone();
-	} while (default_zone != &zone);
+		zone = zone_default_get();
+	} while (zone != &jemalloc_zone);
+}
+
+JEMALLOC_ATTR(constructor)
+void
+zone_register(void)
+{
+
+	/*
+	 * If something else replaced the system default zone allocator, don't
+	 * register jemalloc's.
+	 */
+	default_zone = zone_default_get();
+	if (!default_zone->zone_name || strcmp(default_zone->zone_name,
+	    "DefaultMallocZone") != 0)
+		return;
+
+	/*
+	 * The default purgeable zone is created lazily by OSX's libc.  It uses
+	 * the default zone when it is created for "small" allocations
+	 * (< 15 KiB), but assumes the default zone is a scalable_zone.  This
+	 * obviously fails when the default zone is the jemalloc zone, so
+	 * malloc_default_purgeable_zone() is called beforehand so that the
+	 * default purgeable zone is created when the default zone is still
+	 * a scalable_zone.  As purgeable zones only exist on >= 10.6, we need
+	 * to check for the existence of malloc_default_purgeable_zone() at
+	 * run time.
+	 */
+	purgeable_zone = (malloc_default_purgeable_zone == NULL) ? NULL :
+	    malloc_default_purgeable_zone();
+
+	/* Register the custom zone.  At this point it won't be the default. */
+	zone_init();
+	malloc_zone_register(&jemalloc_zone);
+
+	/* Promote the custom zone to be default. */
+	zone_promote();
 }

From 3f2b8d9cfaebdf0565da3f1ea6e8af11874eae8f Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 2 Nov 2016 18:09:45 -0700
Subject: [PATCH 0467/2608] Add os_unfair_lock support.

OS X 10.12 deprecated OSSpinLock; os_unfair_lock is the recommended
replacement.
---
 configure.ac                                       | 14 ++++++++++++++
 .../jemalloc/internal/jemalloc_internal_decls.h    |  3 +++
 .../jemalloc/internal/jemalloc_internal_defs.h.in  |  5 +++++
 include/jemalloc/internal/mutex.h                  |  9 +++++++++
 src/mutex.c                                        |  2 ++
 test/include/test/mtx.h                            |  2 ++
 test/src/mtx.c                                     |  7 +++++++
 7 files changed, 42 insertions(+)

diff --git a/configure.ac b/configure.ac
index 4bdd66aa..db51556e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1642,6 +1642,20 @@ if test "x${je_cv_builtin_clz}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_BUILTIN_CLZ], [ ])
 fi
 
+dnl ============================================================================
+dnl Check for os_unfair_lock operations as provided on Darwin.
+
+JE_COMPILABLE([Darwin os_unfair_lock_*()], [
+#include <os/lock.h>
+], [
+	os_unfair_lock lock = OS_UNFAIR_LOCK_INIT;
+	os_unfair_lock_lock(&lock);
+	os_unfair_lock_unlock(&lock);
+], [je_cv_os_unfair_lock])
+if test "x${je_cv_os_unfair_lock}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_OS_UNFAIR_LOCK], [ ])
+fi
+
 dnl ============================================================================
 dnl Check for spinlock(3) operations as provided on Darwin.
 
diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
index 1d7f2075..c907d910 100644
--- a/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -17,6 +17,9 @@
 #    include <sys/uio.h>
 #  endif
 #  include <pthread.h>
+#  ifdef JEMALLOC_OS_UNFAIR_LOCK
+#    include <os/lock.h>
+#  endif
 #  ifdef JEMALLOC_GLIBC_MALLOC_HOOK
 #    include <sched.h>
 #  endif
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 6824ab74..385801b7 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -60,6 +60,11 @@
  */
 #undef JEMALLOC_HAVE_MADVISE
 
+/*
+ * Defined if os_unfair_lock_*() functions are available, as provided by Darwin.
+ */
+#undef JEMALLOC_OS_UNFAIR_LOCK
+
 /*
  * Defined if OSSpin*() functions are available, as provided by Darwin, and
  * documented in the spinlock(3) manual page.
diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index 52217991..b442d2d4 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -5,6 +5,9 @@ typedef struct malloc_mutex_s malloc_mutex_t;
 
 #ifdef _WIN32
 #  define MALLOC_MUTEX_INITIALIZER
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+#  define MALLOC_MUTEX_INITIALIZER					\
+     {OS_UNFAIR_LOCK_INIT, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)}
 #elif (defined(JEMALLOC_OSSPIN))
 #  define MALLOC_MUTEX_INITIALIZER {0, WITNESS_INITIALIZER(WITNESS_RANK_OMIT)}
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
@@ -35,6 +38,8 @@ struct malloc_mutex_s {
 #  else
 	CRITICAL_SECTION	lock;
 #  endif
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+	os_unfair_lock		lock;
 #elif (defined(JEMALLOC_OSSPIN))
 	OSSpinLock		lock;
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
@@ -88,6 +93,8 @@ malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex)
 #  else
 		EnterCriticalSection(&mutex->lock);
 #  endif
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+		os_unfair_lock_lock(&mutex->lock);
 #elif (defined(JEMALLOC_OSSPIN))
 		OSSpinLockLock(&mutex->lock);
 #else
@@ -109,6 +116,8 @@ malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex)
 #  else
 		LeaveCriticalSection(&mutex->lock);
 #  endif
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+		os_unfair_lock_unlock(&mutex->lock);
 #elif (defined(JEMALLOC_OSSPIN))
 		OSSpinLockUnlock(&mutex->lock);
 #else
diff --git a/src/mutex.c b/src/mutex.c
index a1fac342..6333e73d 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -80,6 +80,8 @@ malloc_mutex_init(malloc_mutex_t *mutex, const char *name, witness_rank_t rank)
 	    _CRT_SPINCOUNT))
 		return (true);
 #  endif
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+	mutex->lock = OS_UNFAIR_LOCK_INIT;
 #elif (defined(JEMALLOC_OSSPIN))
 	mutex->lock = 0;
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
diff --git a/test/include/test/mtx.h b/test/include/test/mtx.h
index bbe822f5..58afbc3d 100644
--- a/test/include/test/mtx.h
+++ b/test/include/test/mtx.h
@@ -8,6 +8,8 @@
 typedef struct {
 #ifdef _WIN32
 	CRITICAL_SECTION	lock;
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+	os_unfair_lock		lock;
 #elif (defined(JEMALLOC_OSSPIN))
 	OSSpinLock		lock;
 #else
diff --git a/test/src/mtx.c b/test/src/mtx.c
index 73bd02f6..8a5dfdd9 100644
--- a/test/src/mtx.c
+++ b/test/src/mtx.c
@@ -11,6 +11,8 @@ mtx_init(mtx_t *mtx)
 #ifdef _WIN32
 	if (!InitializeCriticalSectionAndSpinCount(&mtx->lock, _CRT_SPINCOUNT))
 		return (true);
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+	mtx->lock = OS_UNFAIR_LOCK_INIT;
 #elif (defined(JEMALLOC_OSSPIN))
 	mtx->lock = 0;
 #else
@@ -33,6 +35,7 @@ mtx_fini(mtx_t *mtx)
 {
 
 #ifdef _WIN32
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
 #elif (defined(JEMALLOC_OSSPIN))
 #else
 	pthread_mutex_destroy(&mtx->lock);
@@ -45,6 +48,8 @@ mtx_lock(mtx_t *mtx)
 
 #ifdef _WIN32
 	EnterCriticalSection(&mtx->lock);
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+	os_unfair_lock_lock(&mtx->lock);
 #elif (defined(JEMALLOC_OSSPIN))
 	OSSpinLockLock(&mtx->lock);
 #else
@@ -58,6 +63,8 @@ mtx_unlock(mtx_t *mtx)
 
 #ifdef _WIN32
 	LeaveCriticalSection(&mtx->lock);
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+	os_unfair_lock_unlock(&mtx->lock);
 #elif (defined(JEMALLOC_OSSPIN))
 	OSSpinLockUnlock(&mtx->lock);
 #else

From da206df10bc51f547d05563ebf17291f3f9471b0 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 2 Nov 2016 19:18:33 -0700
Subject: [PATCH 0468/2608] Do not use syscall(2) on OS X 10.12 (deprecated).

---
 configure.ac                                    | 17 +++++++++++++++++
 .../internal/jemalloc_internal_defs.h.in        |  3 +++
 src/pages.c                                     |  6 +++---
 src/util.c                                      |  2 +-
 4 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/configure.ac b/configure.ac
index db51556e..65b2f150 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1380,6 +1380,23 @@ if test "x${je_cv_mach_absolute_time}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_MACH_ABSOLUTE_TIME])
 fi
 
+dnl Check if syscall(2) is usable.  Treat warnings as errors, so that e.g. OS X
+dnl 10.12's deprecation warning prevents use.
+SAVED_CFLAGS="${CFLAGS}"
+JE_CFLAGS_APPEND([-Werror])
+JE_COMPILABLE([syscall(2)], [
+#define _GNU_SOURCE
+#include <sys/syscall.h>
+#include <unistd.h>
+], [
+	syscall(SYS_write, 2, "hello", 5);
+],
+              [je_cv_syscall])
+CFLAGS="${SAVED_CFLAGS}"
+if test "x$je_cv_syscall" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_SYSCALL], [ ])
+fi
+
 dnl Check if the GNU-specific secure_getenv function exists.
 AC_CHECK_FUNC([secure_getenv],
               [have_secure_getenv="1"],
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 385801b7..9b3dca50 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -71,6 +71,9 @@
  */
 #undef JEMALLOC_OSSPIN
 
+/* Defined if syscall(2) is available. */
+#undef JEMALLOC_HAVE_SYSCALL
+
 /*
  * Defined if secure_getenv(3) is available.
  */
diff --git a/src/pages.c b/src/pages.c
index 84e22160..647952ac 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -219,7 +219,7 @@ os_overcommits_proc(void)
 	char buf[1];
 	ssize_t nread;
 
-#ifdef SYS_open
+#if defined(JEMALLOC_HAVE_SYSCALL) && defined(SYS_open)
 	fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY);
 #else
 	fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
@@ -227,13 +227,13 @@ os_overcommits_proc(void)
 	if (fd == -1)
 		return (false); /* Error. */
 
-#ifdef SYS_read
+#if defined(JEMALLOC_HAVE_SYSCALL) && defined(SYS_read)
 	nread = (ssize_t)syscall(SYS_read, fd, &buf, sizeof(buf));
 #else
 	nread = read(fd, &buf, sizeof(buf));
 #endif
 
-#ifdef SYS_close
+#if defined(JEMALLOC_HAVE_SYSCALL) && defined(SYS_close)
 	syscall(SYS_close, fd);
 #else
 	close(fd);
diff --git a/src/util.c b/src/util.c
index a1c4a2a4..79052674 100644
--- a/src/util.c
+++ b/src/util.c
@@ -49,7 +49,7 @@ static void
 wrtmessage(void *cbopaque, const char *s)
 {
 
-#ifdef SYS_write
+#if defined(JEMALLOC_HAVE_SYSCALL) && defined(SYS_write)
 	/*
 	 * Use syscall(2) rather than write(2) when possible in order to avoid
 	 * the possibility of memory allocation within libc.  This is necessary

From 83ebf2fda5603fe07fcb3ff25c0dd5ad939204d8 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 2 Nov 2016 19:50:44 -0700
Subject: [PATCH 0469/2608] Fix sycall(2) configure test for Linux.

---
 configure.ac | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index 6fc44c3e..7df6627a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -379,7 +379,7 @@ case "${host}" in
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	;;
   *-*-linux*)
-	dnl secure_getenv() is exposed by _GNU_SOURCE.
+	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
 	CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE"
 	abi="elf"
 	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
@@ -1355,7 +1355,6 @@ dnl 10.12's deprecation warning prevents use.
 SAVED_CFLAGS="${CFLAGS}"
 JE_CFLAGS_APPEND([-Werror])
 JE_COMPILABLE([syscall(2)], [
-#define _GNU_SOURCE
 #include <sys/syscall.h>
 #include <unistd.h>
 ], [

From eca3bc01316bbd06ebc5e42f3fe9146ef9552754 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 2 Nov 2016 19:50:44 -0700
Subject: [PATCH 0470/2608] Fix sycall(2) configure test for Linux.

---
 configure.ac | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index 65b2f150..3a4f1505 100644
--- a/configure.ac
+++ b/configure.ac
@@ -379,7 +379,7 @@ case "${host}" in
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	;;
   *-*-linux*)
-	dnl secure_getenv() is exposed by _GNU_SOURCE.
+	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
 	CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE"
 	abi="elf"
 	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
@@ -1385,7 +1385,6 @@ dnl 10.12's deprecation warning prevents use.
 SAVED_CFLAGS="${CFLAGS}"
 JE_CFLAGS_APPEND([-Werror])
 JE_COMPILABLE([syscall(2)], [
-#define _GNU_SOURCE
 #include <sys/syscall.h>
 #include <unistd.h>
 ], [

From 6c56e194b02ec3888ed35c111d01ef2e62c6c808 Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Wed, 2 Nov 2016 18:22:32 -0700
Subject: [PATCH 0471/2608] Check for existance of CPU_COUNT macro before using
 it.

This resolves #485.
---
 src/jemalloc.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 8210086d..38650ff0 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -812,8 +812,10 @@ malloc_ncpus(void)
 	SYSTEM_INFO si;
 	GetSystemInfo(&si);
 	result = si.dwNumberOfProcessors;
-#elif defined(JEMALLOC_GLIBC_MALLOC_HOOK)
+#elif defined(JEMALLOC_GLIBC_MALLOC_HOOK) && defined(CPU_COUNT)
 	/*
+	 * glibc >= 2.6 has the CPU_COUNT macro.
+	 *
 	 * glibc's sysconf() uses isspace().  glibc allocates for the first time
 	 * *before* setting up the isspace tables.  Therefore we need a
 	 * different method to get the number of CPUs.
@@ -2053,6 +2055,7 @@ JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) =
     je_memalign;
 # endif
 
+#ifdef CPU_COUNT
 /*
  * To enable static linking with glibc, the libc specific malloc interface must
  * be implemented also, so none of glibc's malloc.o functions are added to the
@@ -2071,6 +2074,9 @@ int	__posix_memalign(void** r, size_t a, size_t s)
     PREALIAS(je_posix_memalign);
 #undef PREALIAS
 #undef ALIAS
+
+#endif
+
 #endif
 
 /*

From 712fde79fda767ce1eec7cf1c967feeae90b0c21 Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Wed, 2 Nov 2016 18:22:32 -0700
Subject: [PATCH 0472/2608] Check for existance of CPU_COUNT macro before using
 it.

This resolves #485.
---
 src/jemalloc.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 3e0605ec..86030172 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -753,8 +753,10 @@ malloc_ncpus(void)
 	SYSTEM_INFO si;
 	GetSystemInfo(&si);
 	result = si.dwNumberOfProcessors;
-#elif defined(JEMALLOC_GLIBC_MALLOC_HOOK)
+#elif defined(JEMALLOC_GLIBC_MALLOC_HOOK) && defined(CPU_COUNT)
 	/*
+	 * glibc >= 2.6 has the CPU_COUNT macro.
+	 *
 	 * glibc's sysconf() uses isspace().  glibc allocates for the first time
 	 * *before* setting up the isspace tables.  Therefore we need a
 	 * different method to get the number of CPUs.
@@ -1899,6 +1901,7 @@ JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) =
     je_memalign;
 # endif
 
+#ifdef CPU_COUNT
 /*
  * To enable static linking with glibc, the libc specific malloc interface must
  * be implemented also, so none of glibc's malloc.o functions are added to the
@@ -1917,6 +1920,9 @@ int	__posix_memalign(void** r, size_t a, size_t s)
     PREALIAS(je_posix_memalign);
 #undef PREALIAS
 #undef ALIAS
+
+#endif
+
 #endif
 
 /*

From 25f7bbcf28f5c83b11149989b3552d87c1f3c5e9 Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Mon, 31 Oct 2016 16:23:33 -0700
Subject: [PATCH 0473/2608] Fix long spinning in rtree_node_init

rtree_node_init spinlocks the node, allocates, and then sets the node.
This is under heavy contention at the top of the tree if many threads
start to allocate at the same time.

Instead, take a per-rtree sleeping mutex to reduce spinning.  Tested
both pthreads and osx OSSpinLock, and both reduce spinning adequately

Previous benchmark time:
./ttest1 500 100
~15s

New benchmark time:
./ttest1 500 100
.57s
---
 include/jemalloc/internal/rtree.h   |  6 ++----
 include/jemalloc/internal/witness.h |  3 ++-
 src/rtree.c                         | 23 +++++++++--------------
 test/unit/rtree.c                   |  2 ++
 4 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index fc88dfec..9c6cc22f 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -23,9 +23,6 @@ typedef struct rtree_s rtree_t;
 #define	RTREE_HEIGHT_MAX						\
     ((1U << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL)
 
-/* Used for two-stage lock-free node initialization. */
-#define	RTREE_NODE_INITIALIZING	((rtree_elm_t *)0x1)
-
 #define	RTREE_CTX_INITIALIZER	{					\
 	false,								\
 	0,								\
@@ -139,6 +136,7 @@ struct rtree_s {
 	 */
 	unsigned		start_level[RTREE_HEIGHT_MAX + 1];
 	rtree_level_t		levels[RTREE_HEIGHT_MAX];
+	malloc_mutex_t		init_lock;
 };
 
 #endif /* JEMALLOC_H_STRUCTS */
@@ -251,7 +249,7 @@ JEMALLOC_ALWAYS_INLINE bool
 rtree_node_valid(rtree_elm_t *node)
 {
 
-	return ((uintptr_t)node > (uintptr_t)RTREE_NODE_INITIALIZING);
+	return ((uintptr_t)node != (uintptr_t)0);
 }
 
 JEMALLOC_ALWAYS_INLINE rtree_elm_t *
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 26024ac2..86ddb64a 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -28,7 +28,8 @@ typedef int witness_comp_t (const witness_t *, void *, const witness_t *,
 #define	WITNESS_RANK_ARENA_EXTENT_CACHE	10
 
 #define	WITNESS_RANK_RTREE_ELM		11U
-#define	WITNESS_RANK_BASE		12U
+#define	WITNESS_RANK_RTREE		12U
+#define	WITNESS_RANK_BASE		13U
 
 #define	WITNESS_RANK_LEAF		0xffffffffU
 #define	WITNESS_RANK_ARENA_BIN		WITNESS_RANK_LEAF
diff --git a/src/rtree.c b/src/rtree.c
index 0a42a982..b6b9ed76 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -59,6 +59,8 @@ rtree_new(rtree_t *rtree, unsigned bits)
 	}
 	rtree->start_level[RTREE_HEIGHT_MAX] = 0;
 
+	malloc_mutex_init(&rtree->init_lock, "rtree", WITNESS_RANK_RTREE);
+
 	return (false);
 }
 
@@ -135,25 +137,18 @@ rtree_node_init(tsdn_t *tsdn, rtree_t *rtree, unsigned level,
 {
 	rtree_elm_t *node;
 
-	if (atomic_cas_p((void **)elmp, NULL, RTREE_NODE_INITIALIZING)) {
-		spin_t spinner;
-
-		/*
-		 * Another thread is already in the process of initializing.
-		 * Spin-wait until initialization is complete.
-		 */
-		spin_init(&spinner);
-		do {
-			spin_adaptive(&spinner);
-			node = atomic_read_p((void **)elmp);
-		} while (node == RTREE_NODE_INITIALIZING);
-	} else {
+	malloc_mutex_lock(tsdn, &rtree->init_lock);
+	node = atomic_read_p((void**)elmp);
+	if (node == NULL) {
 		node = rtree_node_alloc(tsdn, rtree, ZU(1) <<
 		    rtree->levels[level].bits);
-		if (node == NULL)
+		if (node == NULL) {
+			malloc_mutex_unlock(tsdn, &rtree->init_lock);
 			return (NULL);
+		}
 		atomic_write_p((void **)elmp, node);
 	}
+	malloc_mutex_unlock(tsdn, &rtree->init_lock);
 
 	return (node);
 }
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index a05834fa..03f4e269 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -13,8 +13,10 @@ rtree_node_alloc_intercept(tsdn_t *tsdn, rtree_t *rtree, size_t nelms)
 	if (rtree != test_rtree)
 		return rtree_node_alloc_orig(tsdn, rtree, nelms);
 
+	malloc_mutex_unlock(tsdn, &rtree->init_lock);
 	node = (rtree_elm_t *)calloc(nelms, sizeof(rtree_elm_t));
 	assert_ptr_not_null(node, "Unexpected calloc() failure");
+	malloc_mutex_lock(tsdn, &rtree->init_lock);
 
 	return (node);
 }

From 69f027b8558c30e184921ce22bb187aafcad8a02 Mon Sep 17 00:00:00 2001
From: Samuel Moritz <samuel.moritz@gmail.com>
Date: Mon, 25 Jul 2016 19:33:27 +0200
Subject: [PATCH 0474/2608] Support Debian GNU/kFreeBSD.

Treat it exactly like Linux since they both use GNU libc.
---
 configure.ac | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 7df6627a..d926c8f2 100644
--- a/configure.ac
+++ b/configure.ac
@@ -378,7 +378,7 @@ case "${host}" in
 	abi="elf"
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	;;
-  *-*-linux*)
+  *-*-linux* | *-*-kfreebsd*)
 	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
 	CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE"
 	abi="elf"

From 04e1328ef143a5b6490f971716d9934719d752bf Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 2 Nov 2016 19:45:01 -0700
Subject: [PATCH 0475/2608] Update ChangeLog for 4.3.0.

---
 ChangeLog | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index 532255d1..b43a467e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,43 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
+* 4.3.0 (November 3, 2016)
+
+  This is the first release that passes the test suite for multiple Windows
+  configurations, thanks in large part to @glandium setting up continuous
+  integration via AppVeyor (and Travis CI for Linux and OS X).
+
+  New features:
+  - Add "J" (JSON) support to malloc_stats_print().  (@jasone)
+  - Add Cray compiler support.  (@ronawho)
+
+  Optimizations:
+  - Add/use adaptive spinning for bootstrapping and radix tree node
+    initialization.  (@jasone)
+
+  Bug fixes:
+  - Fix stats.arenas.<i>.nthreads accounting.  (@interwq)
+  - Fix and simplify decay-based purging.  (@jasone)
+  - Make DSS (sbrk(2)-related) operations lockless, which resolves potential
+    deadlocks during thread exit.  (@jasone)
+  - Fix over-sized allocation of radix tree leaf nodes.  (@mjp41, @ogaun,
+    @jasone)
+  - Fix EXTRA_CFLAGS to not affect configuration.  (@jasone)
+  - Fix a Valgrind integration bug.  (@ronawho)
+  - Disallow 0x5a junk filling when running in Valgrind.  (@jasone)
+  - Fix a file descriptor leak on Linux.  This regression was first released in
+    4.2.0.  (@jasone)
+  - Fix static linking of jemalloc with glibc.  (@djwatson)
+  - Use syscall(2) rather than {open,read,close}(2) during boot on Linux.  This
+    works around other libraries' system call wrappers performing reentrant
+    allocation.  (@jasone)
+  - Fix OS X default zone replacement to work with OS X 10.12.  (@glandium,
+    @jasone)
+  - Fix TSD fetches to avoid (recursive) allocation.  This is relevant to
+    non-TLS and Windows configurations.  (@jasone)
+  - Fix malloc_conf overriding to work on Windows.  (@jasone)
+  - Forcibly disable lazy-lock on Windows (was forcibly *enabled*).  (@jasone)
+
 * 4.2.1 (June 8, 2016)
 
   Bug fixes:

From 62de7680cafbbc7abfbd0d5e9fd2ec5dafb54849 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 12 Sep 2016 11:56:24 -0700
Subject: [PATCH 0476/2608] Update project URL.

---
 README              | 2 +-
 doc/jemalloc.xml.in | 2 +-
 jemalloc.pc.in      | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/README b/README
index 9b268f42..5ff24a9e 100644
--- a/README
+++ b/README
@@ -17,4 +17,4 @@ jemalloc.
 
 The ChangeLog file contains a brief summary of changes for each release.
 
-URL: http://www.canonware.com/jemalloc/
+URL: http://jemalloc.net/
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 6da59f43..3d2e721d 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -52,7 +52,7 @@
     <title>LIBRARY</title>
     <para>This manual describes jemalloc @jemalloc_version@.  More information
     can be found at the <ulink
-    url="http://www.canonware.com/jemalloc/">jemalloc website</ulink>.</para>
+    url="http://jemalloc.net/">jemalloc website</ulink>.</para>
   </refsect1>
   <refsynopsisdiv>
     <title>SYNOPSIS</title>
diff --git a/jemalloc.pc.in b/jemalloc.pc.in
index 1a3ad9b3..a318e8dd 100644
--- a/jemalloc.pc.in
+++ b/jemalloc.pc.in
@@ -6,7 +6,7 @@ install_suffix=@install_suffix@
 
 Name: jemalloc
 Description: A general purpose malloc(3) implementation that emphasizes fragmentation avoidance and scalable concurrency support.
-URL: http://www.canonware.com/jemalloc
+URL: http://jemalloc.net/
 Version: @jemalloc_version@
 Cflags: -I${includedir}
 Libs: -L${libdir} -ljemalloc${install_suffix}

From 1ceae2f8cbd803ba6904ac432dbe6fcef324ba21 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 2 Nov 2016 19:45:01 -0700
Subject: [PATCH 0477/2608] Update ChangeLog for 4.3.0.

---
 ChangeLog | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index 532255d1..b43a467e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,43 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
+* 4.3.0 (November 3, 2016)
+
+  This is the first release that passes the test suite for multiple Windows
+  configurations, thanks in large part to @glandium setting up continuous
+  integration via AppVeyor (and Travis CI for Linux and OS X).
+
+  New features:
+  - Add "J" (JSON) support to malloc_stats_print().  (@jasone)
+  - Add Cray compiler support.  (@ronawho)
+
+  Optimizations:
+  - Add/use adaptive spinning for bootstrapping and radix tree node
+    initialization.  (@jasone)
+
+  Bug fixes:
+  - Fix stats.arenas.<i>.nthreads accounting.  (@interwq)
+  - Fix and simplify decay-based purging.  (@jasone)
+  - Make DSS (sbrk(2)-related) operations lockless, which resolves potential
+    deadlocks during thread exit.  (@jasone)
+  - Fix over-sized allocation of radix tree leaf nodes.  (@mjp41, @ogaun,
+    @jasone)
+  - Fix EXTRA_CFLAGS to not affect configuration.  (@jasone)
+  - Fix a Valgrind integration bug.  (@ronawho)
+  - Disallow 0x5a junk filling when running in Valgrind.  (@jasone)
+  - Fix a file descriptor leak on Linux.  This regression was first released in
+    4.2.0.  (@jasone)
+  - Fix static linking of jemalloc with glibc.  (@djwatson)
+  - Use syscall(2) rather than {open,read,close}(2) during boot on Linux.  This
+    works around other libraries' system call wrappers performing reentrant
+    allocation.  (@jasone)
+  - Fix OS X default zone replacement to work with OS X 10.12.  (@glandium,
+    @jasone)
+  - Fix TSD fetches to avoid (recursive) allocation.  This is relevant to
+    non-TLS and Windows configurations.  (@jasone)
+  - Fix malloc_conf overriding to work on Windows.  (@jasone)
+  - Forcibly disable lazy-lock on Windows (was forcibly *enabled*).  (@jasone)
+
 * 4.2.1 (June 8, 2016)
 
   Bug fixes:

From dd3ed23aea5108d6b5d812c9bf6d347bb50dab56 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 3 Nov 2016 14:55:58 -0700
Subject: [PATCH 0478/2608] Update symbol mangling.

---
 include/jemalloc/internal/private_symbols.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 17499523..8972b37b 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -401,6 +401,7 @@ pages_unmap
 pind2sz
 pind2sz_compute
 pind2sz_lookup
+pind2sz_tab
 pow2_ceil_u32
 pow2_ceil_u64
 pow2_ceil_zu
@@ -487,6 +488,8 @@ size2index
 size2index_compute
 size2index_lookup
 size2index_tab
+spin_adaptive
+spin_init
 stats_cactive
 stats_cactive_add
 stats_cactive_get

From 4f7d8c2dee624536e30e2fba907e2ce469b2cdf7 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 3 Nov 2016 15:00:02 -0700
Subject: [PATCH 0479/2608] Update symbol mangling.

---
 include/jemalloc/internal/private_symbols.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 29936a87..2e2c11d8 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -419,6 +419,8 @@ size2index
 size2index_compute
 size2index_lookup
 size2index_tab
+spin_adaptive
+spin_init
 stats_print
 tcache_alloc_easy
 tcache_alloc_large

From 8dd5ea87cac39d9a90dbe40d13267ec02df0214c Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 3 Nov 2016 17:25:54 -0700
Subject: [PATCH 0480/2608] Fix extent_alloc_cache[_locked]() to support
 decommitted allocation.

Fix extent_alloc_cache[_locked]() to support decommitted allocation, and
use this ability in arena_stash_dirty(), so that decommitted extents are
not needlessly committed during purging.  In practice this does not
happen on any currently supported systems, because both extent merging
and decommit must be implemented; all supported systems implement one
xor the other.
---
 include/jemalloc/internal/extent.h |  4 ++--
 src/arena.c                        | 11 ++++++-----
 src/extent.c                       | 20 +++++++++-----------
 src/large.c                        |  4 ++--
 4 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 08d30365..673cac2f 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -101,10 +101,10 @@ ph_proto(, extent_heap_, extent_heap_t, extent_t)
 
 extent_t	*extent_alloc_cache_locked(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
-    size_t alignment, bool *zero, bool slab);
+    size_t alignment, bool *zero, bool *commit, bool slab);
 extent_t	*extent_alloc_cache(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
-    size_t alignment, bool *zero, bool slab);
+    size_t alignment, bool *zero, bool *commit, bool slab);
 extent_t	*extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
     size_t alignment, bool *zero, bool *commit, bool slab);
diff --git a/src/arena.c b/src/arena.c
index ce289594..fd3c5531 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -49,11 +49,12 @@ arena_extent_cache_alloc_locked(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
     size_t alignment, bool *zero, bool slab)
 {
+	bool commit = true;
 
 	malloc_mutex_assert_owner(tsdn, &arena->lock);
 
 	return (extent_alloc_cache(tsdn, arena, r_extent_hooks, new_addr, usize,
-	    pad, alignment, zero, slab));
+	    pad, alignment, zero, &commit, slab));
 }
 
 extent_t *
@@ -681,7 +682,7 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	for (extent = qr_next(&arena->extents_dirty, qr_link); extent !=
 	    &arena->extents_dirty; extent = next) {
 		size_t npages;
-		bool zero;
+		bool zero, commit;
 		UNUSED extent_t *textent;
 
 		npages = extent_size_get(extent) >> LG_PAGE;
@@ -691,9 +692,10 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		next = qr_next(extent, qr_link);
 		/* Allocate. */
 		zero = false;
+		commit = false;
 		textent = extent_alloc_cache_locked(tsdn, arena, r_extent_hooks,
 		    extent_base_get(extent), extent_size_get(extent), 0, PAGE,
-		    &zero, false);
+		    &zero, &commit, false);
 		assert(textent == extent);
 		assert(zero == extent_zeroed_get(extent));
 		extent_ring_remove(extent);
@@ -943,9 +945,8 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 	extent_t *slab;
 	arena_slab_data_t *slab_data;
 	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
-	bool zero;
+	bool zero = false;
 
-	zero = false;
 	slab = arena_extent_cache_alloc_locked(tsdn, arena, &extent_hooks, NULL,
 	    bin_info->slab_size, 0, PAGE, &zero, true);
 	if (slab == NULL) {
diff --git a/src/extent.c b/src/extent.c
index 809777a1..ad78c879 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -517,8 +517,9 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		extent_usize_set(extent, usize);
 	}
 
-	if (!extent_committed_get(extent) && extent_commit_wrapper(tsdn, arena,
-	    r_extent_hooks, extent, 0, extent_size_get(extent))) {
+	if (commit && !extent_committed_get(extent) &&
+	    extent_commit_wrapper(tsdn, arena, r_extent_hooks, extent, 0,
+	    extent_size_get(extent))) {
 		if (!locked)
 			malloc_mutex_unlock(tsdn, &arena->extents_mtx);
 		extent_record(tsdn, arena, r_extent_hooks, extent_heaps, cache,
@@ -590,44 +591,41 @@ extent_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 static extent_t *
 extent_alloc_cache_impl(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, bool locked, void *new_addr, size_t usize,
-    size_t pad, size_t alignment, bool *zero, bool slab)
+    size_t pad, size_t alignment, bool *zero, bool *commit, bool slab)
 {
 	extent_t *extent;
-	bool commit;
 
 	assert(usize + pad != 0);
 	assert(alignment != 0);
 
-	commit = true;
 	extent = extent_recycle(tsdn, arena, r_extent_hooks,
 	    arena->extents_cached, locked, true, new_addr, usize, pad,
-	    alignment, zero, &commit, slab);
+	    alignment, zero, commit, slab);
 	if (extent == NULL)
 		return (NULL);
-	assert(commit);
 	return (extent);
 }
 
 extent_t *
 extent_alloc_cache_locked(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
-    size_t alignment, bool *zero, bool slab)
+    size_t alignment, bool *zero, bool *commit, bool slab)
 {
 
 	malloc_mutex_assert_owner(tsdn, &arena->extents_mtx);
 
 	return (extent_alloc_cache_impl(tsdn, arena, r_extent_hooks, true,
-	    new_addr, usize, pad, alignment, zero, slab));
+	    new_addr, usize, pad, alignment, zero, commit, slab));
 }
 
 extent_t *
 extent_alloc_cache(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
-    size_t alignment, bool *zero, bool slab)
+    size_t alignment, bool *zero, bool *commit, bool slab)
 {
 
 	return (extent_alloc_cache_impl(tsdn, arena, r_extent_hooks, false,
-	    new_addr, usize, pad, alignment, zero, slab));
+	    new_addr, usize, pad, alignment, zero, commit, slab));
 }
 
 static void *
diff --git a/src/large.c b/src/large.c
index 23af1830..1bae9399 100644
--- a/src/large.c
+++ b/src/large.c
@@ -143,8 +143,8 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 	extent_t *trail;
 
 	if ((trail = arena_extent_cache_alloc(tsdn, arena, &extent_hooks,
-	    extent_past_get(extent), trailsize, CACHELINE, &is_zeroed_trail))
-	    == NULL) {
+	    extent_past_get(extent), trailsize, CACHELINE, &is_zeroed_trail)) ==
+	    NULL) {
 		bool commit = true;
 		if ((trail = extent_alloc_wrapper(tsdn, arena, &extent_hooks,
 		    extent_past_get(extent), trailsize, 0, CACHELINE,

From ea9961acdbc9f7e2c95a3b55ce0ac1024af5d167 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 3 Nov 2016 21:18:50 -0700
Subject: [PATCH 0481/2608] Fix psz/pind edge cases.

Add an "over-size" extent heap in which to store extents which exceed
the maximum size class (plus cache-oblivious padding, if enabled).
Remove psz2ind_clamp() and use psz2ind() instead so that trying to
allocate the maximum size class can in principle succeed.  In practice,
this allows assertions to hold so that OOM errors can be successfully
generated.
---
 include/jemalloc/internal/arena.h             |  4 +--
 .../jemalloc/internal/jemalloc_internal.h.in  | 28 ++++------------
 include/jemalloc/internal/private_symbols.txt |  2 --
 src/arena.c                                   |  2 +-
 src/extent.c                                  | 21 ++++++------
 src/jemalloc.c                                |  3 +-
 test/unit/size_classes.c                      | 32 ++++++++++---------
 7 files changed, 39 insertions(+), 53 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 4e20af48..ce9d8b5e 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -212,8 +212,8 @@ struct arena_s {
 	 * Heaps of extents that were previously allocated.  These are used when
 	 * allocating extents, in an attempt to re-use address space.
 	 */
-	extent_heap_t		extents_cached[NPSIZES];
-	extent_heap_t		extents_retained[NPSIZES];
+	extent_heap_t		extents_cached[NPSIZES+1];
+	extent_heap_t		extents_retained[NPSIZES+1];
 	/*
 	 * Ring sentinel used to track unused dirty memory.  Dirty memory is
 	 * managed as an LRU of cached extents.
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 0e4ffd91..85b34012 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -434,7 +434,7 @@ extern arena_t	**arenas;
  * pind2sz_tab encodes the same information as could be computed by
  * pind2sz_compute().
  */
-extern size_t const	pind2sz_tab[NPSIZES];
+extern size_t const	pind2sz_tab[NPSIZES+1];
 /*
  * index2size_tab encodes the same information as could be computed (at
  * unacceptable cost in some code paths) by index2size_compute().
@@ -516,9 +516,7 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/large.h"
 
 #ifndef JEMALLOC_ENABLE_INLINE
-pszind_t	psz2ind_impl(size_t psz, bool clamp);
 pszind_t	psz2ind(size_t psz);
-pszind_t	psz2ind_clamp(size_t psz);
 size_t	pind2sz_compute(pszind_t pind);
 size_t	pind2sz_lookup(pszind_t pind);
 size_t	pind2sz(pszind_t pind);
@@ -544,11 +542,11 @@ ticker_t	*decay_ticker_get(tsd_t *tsd, unsigned ind);
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
 JEMALLOC_ALWAYS_INLINE pszind_t
-psz2ind_impl(size_t psz, bool clamp)
+psz2ind(size_t psz)
 {
 
 	if (unlikely(psz > LARGE_MAXCLASS))
-		return (clamp ? NPSIZES-1 : NPSIZES);
+		return (NPSIZES);
 	{
 		pszind_t x = lg_floor((psz<<1)-1);
 		pszind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_PAGE) ? 0 : x -
@@ -567,24 +565,12 @@ psz2ind_impl(size_t psz, bool clamp)
 	}
 }
 
-JEMALLOC_INLINE pszind_t
-psz2ind(size_t psz)
-{
-
-	return (psz2ind_impl(psz, false));
-}
-
-JEMALLOC_INLINE pszind_t
-psz2ind_clamp(size_t psz)
-{
-
-	return (psz2ind_impl(psz, true));
-}
-
 JEMALLOC_INLINE size_t
 pind2sz_compute(pszind_t pind)
 {
 
+	if (unlikely(pind == NPSIZES))
+		return (LARGE_MAXCLASS + PAGE);
 	{
 		size_t grp = pind >> LG_SIZE_CLASS_GROUP;
 		size_t mod = pind & ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
@@ -614,7 +600,7 @@ JEMALLOC_INLINE size_t
 pind2sz(pszind_t pind)
 {
 
-	assert(pind < NPSIZES);
+	assert(pind < NPSIZES+1);
 	return (pind2sz_lookup(pind));
 }
 
@@ -623,7 +609,7 @@ psz2u(size_t psz)
 {
 
 	if (unlikely(psz > LARGE_MAXCLASS))
-		return (0);
+		return (LARGE_MAXCLASS + PAGE);
 	{
 		size_t x = lg_floor((psz<<1)-1);
 		size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ?
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 2e2c11d8..2949de10 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -380,8 +380,6 @@ prof_thread_active_set
 prof_thread_name_get
 prof_thread_name_set
 psz2ind
-psz2ind_clamp
-psz2ind_impl
 psz2u
 rtree_child_read
 rtree_child_read_hard
diff --git a/src/arena.c b/src/arena.c
index fd3c5531..dd8e4d9c 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1696,7 +1696,7 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 	    WITNESS_RANK_ARENA_LARGE))
 		return (NULL);
 
-	for (i = 0; i < NPSIZES; i++) {
+	for (i = 0; i < NPSIZES+1; i++) {
 		extent_heap_new(&arena->extents_cached[i]);
 		extent_heap_new(&arena->extents_retained[i]);
 	}
diff --git a/src/extent.c b/src/extent.c
index ad78c879..a802ad90 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -45,7 +45,7 @@ static size_t	highpages;
  */
 
 static void	extent_record(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_heap_t extent_heaps[NPSIZES],
+    extent_hooks_t **r_extent_hooks, extent_heap_t extent_heaps[NPSIZES+1],
     bool cache, extent_t *extent);
 
 /******************************************************************************/
@@ -190,11 +190,11 @@ extent_ad_comp(const extent_t *a, const extent_t *b)
 ph_gen(, extent_heap_, extent_heap_t, extent_t, ph_link, extent_ad_comp)
 
 static void
-extent_heaps_insert(tsdn_t *tsdn, extent_heap_t extent_heaps[NPSIZES],
+extent_heaps_insert(tsdn_t *tsdn, extent_heap_t extent_heaps[NPSIZES+1],
     extent_t *extent)
 {
 	size_t psz = extent_size_quantize_floor(extent_size_get(extent));
-	pszind_t pind = psz2ind_clamp(psz);
+	pszind_t pind = psz2ind(psz);
 
 	malloc_mutex_assert_owner(tsdn, &extent_arena_get(extent)->extents_mtx);
 
@@ -202,11 +202,11 @@ extent_heaps_insert(tsdn_t *tsdn, extent_heap_t extent_heaps[NPSIZES],
 }
 
 static void
-extent_heaps_remove(tsdn_t *tsdn, extent_heap_t extent_heaps[NPSIZES],
+extent_heaps_remove(tsdn_t *tsdn, extent_heap_t extent_heaps[NPSIZES+1],
     extent_t *extent)
 {
 	size_t psz = extent_size_quantize_floor(extent_size_get(extent));
-	pszind_t pind = psz2ind_clamp(psz);
+	pszind_t pind = psz2ind(psz);
 
 	malloc_mutex_assert_owner(tsdn, &extent_arena_get(extent)->extents_mtx);
 
@@ -358,15 +358,14 @@ extent_deregister(tsdn_t *tsdn, extent_t *extent)
  */
 static extent_t *
 extent_first_best_fit(tsdn_t *tsdn, arena_t *arena,
-    extent_heap_t extent_heaps[NPSIZES], size_t size)
+    extent_heap_t extent_heaps[NPSIZES+1], size_t size)
 {
 	pszind_t pind, i;
 
 	malloc_mutex_assert_owner(tsdn, &arena->extents_mtx);
 
 	pind = psz2ind(extent_size_quantize_ceil(size));
-	assert(pind < NPSIZES);
-	for (i = pind; i < NPSIZES; i++) {
+	for (i = pind; i < NPSIZES+1; i++) {
 		extent_t *extent = extent_heap_first(&extent_heaps[i]);
 		if (extent != NULL)
 			return (extent);
@@ -393,7 +392,7 @@ extent_leak(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 
 static extent_t *
 extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    extent_heap_t extent_heaps[NPSIZES], bool locked, bool cache,
+    extent_heap_t extent_heaps[NPSIZES+1], bool locked, bool cache,
     void *new_addr, size_t usize, size_t pad, size_t alignment, bool *zero,
     bool *commit, bool slab)
 {
@@ -758,7 +757,7 @@ extent_can_coalesce(const extent_t *a, const extent_t *b)
 static void
 extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b,
-    extent_heap_t extent_heaps[NPSIZES], bool cache)
+    extent_heap_t extent_heaps[NPSIZES+1], bool cache)
 {
 
 	if (!extent_can_coalesce(a, b))
@@ -786,7 +785,7 @@ extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
 
 static void
 extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    extent_heap_t extent_heaps[NPSIZES], bool cache, extent_t *extent)
+    extent_heap_t extent_heaps[NPSIZES+1], bool cache, extent_t *extent)
 {
 	extent_t *prev, *next;
 	rtree_ctx_t rtree_ctx_fallback;
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 86030172..dc0add49 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -83,7 +83,7 @@ enum {
 static uint8_t	malloc_slow_flags;
 
 JEMALLOC_ALIGNED(CACHELINE)
-const size_t	pind2sz_tab[NPSIZES] = {
+const size_t	pind2sz_tab[NPSIZES+1] = {
 #define	PSZ_yes(lg_grp, ndelta, lg_delta)				\
 	(((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta))),
 #define	PSZ_no(lg_grp, ndelta, lg_delta)
@@ -93,6 +93,7 @@ const size_t	pind2sz_tab[NPSIZES] = {
 #undef PSZ_yes
 #undef PSZ_no
 #undef SC
+	(LARGE_MAXCLASS + PAGE)
 };
 
 JEMALLOC_ALIGNED(CACHELINE)
diff --git a/test/unit/size_classes.c b/test/unit/size_classes.c
index d4875549..9b47b204 100644
--- a/test/unit/size_classes.c
+++ b/test/unit/size_classes.c
@@ -82,19 +82,18 @@ TEST_END
 
 TEST_BEGIN(test_psize_classes)
 {
-	size_t size_class, max_size_class;
+	size_t size_class, max_psz;
 	pszind_t pind, max_pind;
 
-	max_size_class = get_max_size_class();
-	max_pind = psz2ind(max_size_class);
+	max_psz = get_max_size_class() + PAGE;
+	max_pind = psz2ind(max_psz);
 
-	for (pind = 0, size_class = pind2sz(pind); pind < max_pind ||
-	    size_class < max_size_class; pind++, size_class =
-	    pind2sz(pind)) {
+	for (pind = 0, size_class = pind2sz(pind); pind < max_pind || size_class
+	    < max_psz; pind++, size_class = pind2sz(pind)) {
 		assert_true(pind < max_pind,
 		    "Loop conditionals should be equivalent; pind=%u, "
 		    "size_class=%zu (%#zx)", pind, size_class, size_class);
-		assert_true(size_class < max_size_class,
+		assert_true(size_class < max_psz,
 		    "Loop conditionals should be equivalent; pind=%u, "
 		    "size_class=%zu (%#zx)", pind, size_class, size_class);
 
@@ -125,7 +124,7 @@ TEST_BEGIN(test_psize_classes)
 
 	assert_u_eq(pind, psz2ind(pind2sz(pind)),
 	    "psz2ind() does not reverse pind2sz()");
-	assert_zu_eq(max_size_class, pind2sz(psz2ind(max_size_class)),
+	assert_zu_eq(max_psz, pind2sz(psz2ind(max_psz)),
 	    "pind2sz() does not reverse psz2ind()");
 
 	assert_zu_eq(size_class, psz2u(pind2sz(pind-1)+1),
@@ -139,9 +138,10 @@ TEST_END
 
 TEST_BEGIN(test_overflow)
 {
-	size_t max_size_class;
+	size_t max_size_class, max_psz;
 
 	max_size_class = get_max_size_class();
+	max_psz = max_size_class + PAGE;
 
 	assert_u_eq(size2index(max_size_class+1), NSIZES,
 	    "size2index() should return NSIZES on overflow");
@@ -164,12 +164,14 @@ TEST_BEGIN(test_overflow)
 	assert_u_eq(psz2ind(SIZE_T_MAX), NPSIZES,
 	    "psz2ind() should return NPSIZES on overflow");
 
-	assert_zu_eq(psz2u(max_size_class+1), 0,
-	    "psz2u() should return 0 for unsupported size");
-	assert_zu_eq(psz2u(ZU(PTRDIFF_MAX)+1), 0,
-	    "psz2u() should return 0 for unsupported size");
-	assert_zu_eq(psz2u(SIZE_T_MAX), 0,
-	    "psz2u() should return 0 on overflow");
+	assert_zu_eq(psz2u(max_size_class+1), max_psz,
+	    "psz2u() should return (LARGE_MAXCLASS + PAGE) for unsupported"
+	    " size");
+	assert_zu_eq(psz2u(ZU(PTRDIFF_MAX)+1), max_psz,
+	    "psz2u() should return (LARGE_MAXCLASS + PAGE) for unsupported "
+	    "size");
+	assert_zu_eq(psz2u(SIZE_T_MAX), max_psz,
+	    "psz2u() should return (LARGE_MAXCLASS + PAGE) on overflow");
 }
 TEST_END
 

From 4a7852137d8b6598fdb90ea8e1fd3bc8a8b94a3a Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 3 Nov 2016 21:14:59 -0700
Subject: [PATCH 0482/2608] Fix extent_recycle()'s cache-oblivious padding
 support.

Add padding *after* computing the size class, so that the optimal size
class isn't skipped during search for a usable extent.  This regression
was caused by b46261d58b449cc4c099ed2384451a2499688f0e (Implement
cache-oblivious support for huge size classes.).
---
 src/extent.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index a802ad90..e190adc4 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -427,12 +427,13 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		assert(prev == NULL || extent_past_get(prev) == new_addr);
 	}
 
-	size = usize + pad;
-	alloc_size = (new_addr != NULL) ? size : s2u(size +
-	    PAGE_CEILING(alignment) - PAGE);
-	/* Beware size_t wrap-around. */
-	if (alloc_size < usize)
+	alloc_size = ((new_addr != NULL) ? usize : s2u(usize +
+	    PAGE_CEILING(alignment) - PAGE)) + pad;
+	if (alloc_size > LARGE_MAXCLASS + pad || alloc_size < usize) {
+		/* Too large, possibly wrapped around. */
 		return (NULL);
+	}
+	size = usize + pad;
 	if (!locked)
 		malloc_mutex_lock(tsdn, &arena->extents_mtx);
 	extent_hooks_assure_initialized(arena, r_extent_hooks);

From a967fae362f55ee7162fb48776dfac69d4f28d1c Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 3 Nov 2016 23:49:21 -0700
Subject: [PATCH 0483/2608] Fix/simplify extent_recycle() allocation size
 computations.

Do not call s2u() during alloc_size computation, since any necessary
ceiling increase is taken care of later by extent_first_best_fit() -->
extent_size_quantize_ceil(), and the s2u() call may erroneously cause a
higher quantization result.

Remove an overly strict overflow check that was added in
4a7852137d8b6598fdb90ea8e1fd3bc8a8b94a3a (Fix extent_recycle()'s
cache-oblivious padding support.).
---
 src/extent.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index e190adc4..4027e8b7 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -405,6 +405,7 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		malloc_mutex_assert_owner(tsdn, &arena->extents_mtx);
 	assert(new_addr == NULL || !slab);
 	assert(pad == 0 || !slab);
+	assert(alignment > 0);
 	if (config_debug && new_addr != NULL) {
 		extent_t *prev;
 
@@ -427,13 +428,11 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		assert(prev == NULL || extent_past_get(prev) == new_addr);
 	}
 
-	alloc_size = ((new_addr != NULL) ? usize : s2u(usize +
-	    PAGE_CEILING(alignment) - PAGE)) + pad;
-	if (alloc_size > LARGE_MAXCLASS + pad || alloc_size < usize) {
-		/* Too large, possibly wrapped around. */
-		return (NULL);
-	}
 	size = usize + pad;
+	alloc_size = size + PAGE_CEILING(alignment) - PAGE;
+	/* Beware size_t wrap-around. */
+	if (alloc_size < usize)
+		return (NULL);
 	if (!locked)
 		malloc_mutex_lock(tsdn, &arena->extents_mtx);
 	extent_hooks_assure_initialized(arena, r_extent_hooks);

From 076087692750ca6ba43b52c77e03d185002e5371 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 4 Nov 2016 00:02:43 -0700
Subject: [PATCH 0484/2608] Update ChangeLog for 4.3.0.

---
 ChangeLog | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index b43a467e..ac2e4d3c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,7 +4,7 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
-* 4.3.0 (November 3, 2016)
+* 4.3.0 (November 4, 2016)
 
   This is the first release that passes the test suite for multiple Windows
   configurations, thanks in large part to @glandium setting up continuous
@@ -19,6 +19,9 @@ brevity.  Much more detail can be found in the git revision history:
     initialization.  (@jasone)
 
   Bug fixes:
+  - Fix large allocation to search starting in the optimal size class heap,
+    which can substantially reduce virtual memory churn and fragmentation.  This
+    regression was first released in 4.0.0.  (@mjp41, @jasone)
   - Fix stats.arenas.<i>.nthreads accounting.  (@interwq)
   - Fix and simplify decay-based purging.  (@jasone)
   - Make DSS (sbrk(2)-related) operations lockless, which resolves potential
@@ -29,13 +32,16 @@ brevity.  Much more detail can be found in the git revision history:
   - Fix a Valgrind integration bug.  (@ronawho)
   - Disallow 0x5a junk filling when running in Valgrind.  (@jasone)
   - Fix a file descriptor leak on Linux.  This regression was first released in
-    4.2.0.  (@jasone)
+    4.2.0.  (@vsarunas, @jasone)
   - Fix static linking of jemalloc with glibc.  (@djwatson)
   - Use syscall(2) rather than {open,read,close}(2) during boot on Linux.  This
     works around other libraries' system call wrappers performing reentrant
-    allocation.  (@jasone)
+    allocation.  (@kspinka, @Whissi, @jasone)
   - Fix OS X default zone replacement to work with OS X 10.12.  (@glandium,
     @jasone)
+  - Fix cached memory management to avoid needless commit/decommit operations
+    during purging, which resolves permanent virtual memory map fragmentation
+    issues on Windows.  (@mjp41, @jasone)
   - Fix TSD fetches to avoid (recursive) allocation.  This is relevant to
     non-TLS and Windows configurations.  (@jasone)
   - Fix malloc_conf overriding to work on Windows.  (@jasone)

From 6d2a57cfbbd9991ef120b9773e971053deec776a Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 3 Nov 2016 21:57:17 -0700
Subject: [PATCH 0485/2608] Use -std=gnu11 if available.

This supersedes -std=gnu99, and enables C11 atomics.
---
 configure.ac | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index d926c8f2..6f29ce0a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -171,9 +171,15 @@ fi
 if test "x$CFLAGS" = "x" ; then
   no_CFLAGS="yes"
   if test "x$GCC" = "xyes" ; then
-    JE_CFLAGS_APPEND([-std=gnu99])
-    if test "x$je_cv_cflags_appended" = "x-std=gnu99" ; then
+dnl    JE_CFLAGS_APPEND([-std=gnu99])
+    JE_CFLAGS_APPEND([-std=gnu11])
+    if test "x$je_cv_cflags_appended" = "x-std=gnu11" ; then
       AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT])
+    else
+      JE_CFLAGS_APPEND([-std=gnu99])
+      if test "x$je_cv_cflags_appended" = "x-std=gnu99" ; then
+        AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT])
+      fi
     fi
     JE_CFLAGS_APPEND([-Wall])
     JE_CFLAGS_APPEND([-Werror=declaration-after-statement])

From e9012630acf897ce7016e427354bb46fbe893fe1 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 3 Nov 2016 17:11:01 -0700
Subject: [PATCH 0486/2608] Fix chunk_alloc_cache() to support decommitted
 allocation.

Fix chunk_alloc_cache() to support decommitted allocation, and use this
ability in arena_chunk_alloc_internal() and arena_stash_dirty(), so that
chunks don't get permanently stuck in a hybrid state.

This resolves #487.
---
 include/jemalloc/internal/chunk.h |  2 +-
 src/arena.c                       | 16 ++++++++++------
 src/chunk.c                       |  8 +++-----
 3 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index e199a037..38c9a012 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -58,7 +58,7 @@ void	chunk_deregister(const void *chunk, const extent_node_t *node);
 void	*chunk_alloc_base(size_t size);
 void	*chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
-    bool *zero, bool dalloc_node);
+    bool *zero, bool *commit, bool dalloc_node);
 void	*chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
     bool *zero, bool *commit);
diff --git a/src/arena.c b/src/arena.c
index 76514955..a7fe34a9 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -30,6 +30,8 @@ unsigned	nhclasses; /* Number of huge size classes. */
  * definition.
  */
 
+static void	arena_chunk_dalloc(tsdn_t *tsdn, arena_t *arena,
+    arena_chunk_t *chunk);
 static void	arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena,
     size_t ndirty_limit);
 static void	arena_run_dalloc(tsdn_t *tsdn, arena_t *arena, arena_run_t *run,
@@ -579,14 +581,13 @@ arena_chunk_alloc_internal(tsdn_t *tsdn, arena_t *arena, bool *zero,
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 
 	chunk = chunk_alloc_cache(tsdn, arena, &chunk_hooks, NULL, chunksize,
-	    chunksize, zero, true);
+	    chunksize, zero, commit, true);
 	if (chunk != NULL) {
 		if (arena_chunk_register(tsdn, arena, chunk, *zero)) {
 			chunk_dalloc_cache(tsdn, arena, &chunk_hooks, chunk,
 			    chunksize, true);
 			return (NULL);
 		}
-		*commit = true;
 	}
 	if (chunk == NULL) {
 		chunk = arena_chunk_alloc_internal_hard(tsdn, arena,
@@ -883,6 +884,7 @@ arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	void *ret;
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 	size_t csize = CHUNK_CEILING(usize);
+	bool commit = true;
 
 	malloc_mutex_lock(tsdn, &arena->lock);
 
@@ -894,7 +896,7 @@ arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	arena_nactive_add(arena, usize >> LG_PAGE);
 
 	ret = chunk_alloc_cache(tsdn, arena, &chunk_hooks, NULL, csize,
-	    alignment, zero, true);
+	    alignment, zero, &commit, true);
 	malloc_mutex_unlock(tsdn, &arena->lock);
 	if (ret == NULL) {
 		ret = arena_chunk_alloc_huge_hard(tsdn, arena, &chunk_hooks,
@@ -1004,6 +1006,7 @@ arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena, void *chunk,
 	void *nchunk = (void *)((uintptr_t)chunk + CHUNK_CEILING(oldsize));
 	size_t udiff = usize - oldsize;
 	size_t cdiff = CHUNK_CEILING(usize) - CHUNK_CEILING(oldsize);
+	bool commit = true;
 
 	malloc_mutex_lock(tsdn, &arena->lock);
 
@@ -1015,7 +1018,7 @@ arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena, void *chunk,
 	arena_nactive_add(arena, udiff >> LG_PAGE);
 
 	err = (chunk_alloc_cache(tsdn, arena, &chunk_hooks, nchunk, cdiff,
-	    chunksize, zero, true) == NULL);
+	    chunksize, zero, &commit, true) == NULL);
 	malloc_mutex_unlock(tsdn, &arena->lock);
 	if (err) {
 		err = arena_chunk_ralloc_huge_expand_hard(tsdn, arena,
@@ -1512,7 +1515,7 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 
 		if (rdelm == &chunkselm->rd) {
 			extent_node_t *chunkselm_next;
-			bool zero;
+			bool zero, commit;
 			UNUSED void *chunk;
 
 			npages = extent_node_size_get(chunkselm) >> LG_PAGE;
@@ -1526,10 +1529,11 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			 * dalloc_node=false argument to chunk_alloc_cache().
 			 */
 			zero = false;
+			commit = false;
 			chunk = chunk_alloc_cache(tsdn, arena, chunk_hooks,
 			    extent_node_addr_get(chunkselm),
 			    extent_node_size_get(chunkselm), chunksize, &zero,
-			    false);
+			    &commit, false);
 			assert(chunk == extent_node_addr_get(chunkselm));
 			assert(zero == extent_node_zeroed_get(chunkselm));
 			extent_node_dirty_insert(chunkselm, purge_runs_sentinel,
diff --git a/src/chunk.c b/src/chunk.c
index 302b98cb..07e26f77 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -385,23 +385,21 @@ chunk_alloc_base(size_t size)
 
 void *
 chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *new_addr, size_t size, size_t alignment, bool *zero, bool dalloc_node)
+    void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit,
+    bool dalloc_node)
 {
 	void *ret;
-	bool commit;
 
 	assert(size != 0);
 	assert((size & chunksize_mask) == 0);
 	assert(alignment != 0);
 	assert((alignment & chunksize_mask) == 0);
 
-	commit = true;
 	ret = chunk_recycle(tsdn, arena, chunk_hooks,
 	    &arena->chunks_szad_cached, &arena->chunks_ad_cached, true,
-	    new_addr, size, alignment, zero, &commit, dalloc_node);
+	    new_addr, size, alignment, zero, commit, dalloc_node);
 	if (ret == NULL)
 		return (NULL);
-	assert(commit);
 	if (config_valgrind)
 		JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, size);
 	return (ret);

From 32896a902bb962a06261d81c9be22e16210692db Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 3 Nov 2016 22:21:34 -0700
Subject: [PATCH 0487/2608] Fix large allocation to search optimal size class
 heap.

Fix arena_run_alloc_large_helper() to not convert size to usize when
searching for the first best fit via arena_run_first_best_fit().  This
allows the search to consider the optimal quantized size class, so that
e.g. allocating and deallocating 40 KiB in a tight loop can reuse the
same memory.

This regression was nominally caused by
5707d6f952c71baa2f19102479859012982ac821 (Quantize szad trees by size
class.), but it did not commonly cause problems until
8a03cf039cd06f9fa6972711195055d865673966 (Implement cache index
randomization for large allocations.).  These regressions were first
released in 4.0.0.

This resolves #487.
---
 src/arena.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arena.c b/src/arena.c
index a7fe34a9..43c3ccf2 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1059,7 +1059,7 @@ arena_run_first_best_fit(arena_t *arena, size_t size)
 static arena_run_t *
 arena_run_alloc_large_helper(arena_t *arena, size_t size, bool zero)
 {
-	arena_run_t *run = arena_run_first_best_fit(arena, s2u(size));
+	arena_run_t *run = arena_run_first_best_fit(arena, size);
 	if (run != NULL) {
 		if (arena_run_split_large(arena, run, size, zero))
 			run = NULL;

From 213667fe26ee30cb724d390c6047821960c57a34 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 4 Nov 2016 00:02:43 -0700
Subject: [PATCH 0488/2608] Update ChangeLog for 4.3.0.

---
 ChangeLog | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index b43a467e..ac2e4d3c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,7 +4,7 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
-* 4.3.0 (November 3, 2016)
+* 4.3.0 (November 4, 2016)
 
   This is the first release that passes the test suite for multiple Windows
   configurations, thanks in large part to @glandium setting up continuous
@@ -19,6 +19,9 @@ brevity.  Much more detail can be found in the git revision history:
     initialization.  (@jasone)
 
   Bug fixes:
+  - Fix large allocation to search starting in the optimal size class heap,
+    which can substantially reduce virtual memory churn and fragmentation.  This
+    regression was first released in 4.0.0.  (@mjp41, @jasone)
   - Fix stats.arenas.<i>.nthreads accounting.  (@interwq)
   - Fix and simplify decay-based purging.  (@jasone)
   - Make DSS (sbrk(2)-related) operations lockless, which resolves potential
@@ -29,13 +32,16 @@ brevity.  Much more detail can be found in the git revision history:
   - Fix a Valgrind integration bug.  (@ronawho)
   - Disallow 0x5a junk filling when running in Valgrind.  (@jasone)
   - Fix a file descriptor leak on Linux.  This regression was first released in
-    4.2.0.  (@jasone)
+    4.2.0.  (@vsarunas, @jasone)
   - Fix static linking of jemalloc with glibc.  (@djwatson)
   - Use syscall(2) rather than {open,read,close}(2) during boot on Linux.  This
     works around other libraries' system call wrappers performing reentrant
-    allocation.  (@jasone)
+    allocation.  (@kspinka, @Whissi, @jasone)
   - Fix OS X default zone replacement to work with OS X 10.12.  (@glandium,
     @jasone)
+  - Fix cached memory management to avoid needless commit/decommit operations
+    during purging, which resolves permanent virtual memory map fragmentation
+    issues on Windows.  (@mjp41, @jasone)
   - Fix TSD fetches to avoid (recursive) allocation.  This is relevant to
     non-TLS and Windows configurations.  (@jasone)
   - Fix malloc_conf overriding to work on Windows.  (@jasone)

From cb3ad659f0e5c909b7df5c9cbe84ff429754c669 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 3 Nov 2016 21:57:17 -0700
Subject: [PATCH 0489/2608] Use -std=gnu11 if available.

This supersedes -std=gnu99, and enables C11 atomics.
---
 configure.ac | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index 3a4f1505..104fd994 100644
--- a/configure.ac
+++ b/configure.ac
@@ -171,9 +171,15 @@ fi
 if test "x$CFLAGS" = "x" ; then
   no_CFLAGS="yes"
   if test "x$GCC" = "xyes" ; then
-    JE_CFLAGS_APPEND([-std=gnu99])
-    if test "x$je_cv_cflags_appended" = "x-std=gnu99" ; then
+dnl    JE_CFLAGS_APPEND([-std=gnu99])
+    JE_CFLAGS_APPEND([-std=gnu11])
+    if test "x$je_cv_cflags_appended" = "x-std=gnu11" ; then
       AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT])
+    else
+      JE_CFLAGS_APPEND([-std=gnu99])
+      if test "x$je_cv_cflags_appended" = "x-std=gnu99" ; then
+        AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT])
+      fi
     fi
     JE_CFLAGS_APPEND([-Wall])
     JE_CFLAGS_APPEND([-Werror=declaration-after-statement])

From d30b3ea51a6cb3abe88251a0fd769b0438c88a6e Mon Sep 17 00:00:00 2001
From: Matthew Parkinson <parky@outlook.com>
Date: Fri, 4 Nov 2016 10:27:32 +0000
Subject: [PATCH 0490/2608] Fixes to Visual Studio Project files

---
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj |  2 ++
 .../vc2015/jemalloc/jemalloc.vcxproj.filters   | 18 ++++++++++++------
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index e5ecb351..75ea8fba 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -68,6 +68,7 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\rtree.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\size_classes.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\smoothstep.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\spin.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\stats.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\tcache.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\ticker.h" />
@@ -107,6 +108,7 @@
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
+    <ClCompile Include="..\..\..\..\src\spin.c" />
     <ClCompile Include="..\..\..\..\src\stats.c" />
     <ClCompile Include="..\..\..\..\src\tcache.c" />
     <ClCompile Include="..\..\..\..\src\ticker.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 74b45112..a328a6f9 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -140,6 +140,12 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\size_classes.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\smoothstep.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\spin.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\stats.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
@@ -155,6 +161,9 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\util.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\witness.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
     <ClInclude Include="..\..\..\..\include\msvc_compat\strings.h">
       <Filter>Header Files\msvc_compat</Filter>
     </ClInclude>
@@ -167,12 +176,6 @@
     <ClInclude Include="..\..\..\..\include\msvc_compat\C99\stdint.h">
       <Filter>Header Files\msvc_compat\C99</Filter>
     </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\witness.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\smoothstep.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="..\..\..\..\src\arena.c">
@@ -232,6 +235,9 @@
     <ClCompile Include="..\..\..\..\src\rtree.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\spin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\stats.c">
       <Filter>Source Files</Filter>
     </ClCompile>

From 77635bf532488b180024ef50c0583e43111fee0f Mon Sep 17 00:00:00 2001
From: Matthew Parkinson <parky@outlook.com>
Date: Fri, 4 Nov 2016 10:27:32 +0000
Subject: [PATCH 0491/2608] Fixes to Visual Studio Project files

---
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj   |  6 +++++-
 .../vc2015/jemalloc/jemalloc.vcxproj.filters     | 16 ++++++++++++++--
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 9315022d..8342ab3a 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -69,12 +69,14 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\rb.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\rtree.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\size_classes.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\smoothstep.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\spin.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\stats.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\tcache.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\ticker.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\tsd.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\util.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\valgrind.h" />
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\witness.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_defs.h" />
     <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_macros.h" />
@@ -110,11 +112,13 @@
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\quarantine.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
+    <ClCompile Include="..\..\..\..\src\spin.c" />
     <ClCompile Include="..\..\..\..\src\stats.c" />
     <ClCompile Include="..\..\..\..\src\tcache.c" />
     <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
     <ClCompile Include="..\..\..\..\src\util.c" />
+    <ClCompile Include="..\..\..\..\src\witness.c" />
   </ItemGroup>
   <PropertyGroup Label="Globals">
     <ProjectGuid>{8D6BB292-9E1C-413D-9F98-4864BDC1514A}</ProjectGuid>
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 88c15efa..37f0f02a 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -146,6 +146,12 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\size_classes.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\smoothstep.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\spin.h">
+      <Filter>Header Files\internal</Filter>
+    </ClInclude>
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\stats.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
@@ -161,7 +167,7 @@
     <ClInclude Include="..\..\..\..\include\jemalloc\internal\util.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\valgrind.h">
+    <ClInclude Include="..\..\..\..\include\jemalloc\internal\witness.h">
       <Filter>Header Files\internal</Filter>
     </ClInclude>
     <ClInclude Include="..\..\..\..\include\msvc_compat\strings.h">
@@ -241,6 +247,9 @@
     <ClCompile Include="..\..\..\..\src\rtree.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\spin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\stats.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -256,5 +265,8 @@
     <ClCompile Include="..\..\..\..\src\util.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\witness.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>

From 28b7e42e44a1a77218a941d9dfe5bb643d884219 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 4 Nov 2016 15:00:08 -0700
Subject: [PATCH 0492/2608] Fix arena data structure size calculation.

Fix paren placement so that QUANTUM_CEILING() applies to the correct
portion of the expression that computes how much memory to base_alloc().
In practice this bug had no impact.  This was caused by
5d8db15db91c85d47b343cfc07fc6ea736f0de48 (Simplify run quantization.),
which in turn fixed an over-allocation regression caused by
3c4d92e82a31f652a7c77ca937a02d0185085b06 (Add per size class huge
allocation statistics.).
---
 src/arena.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 43c3ccf2..d737ec9a 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -3461,8 +3461,8 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 	if (config_stats) {
 		arena = (arena_t *)base_alloc(tsdn,
 		    CACHELINE_CEILING(sizeof(arena_t)) +
-		    QUANTUM_CEILING((nlclasses * sizeof(malloc_large_stats_t)) +
-		    (nhclasses * sizeof(malloc_huge_stats_t))));
+		    QUANTUM_CEILING((nlclasses * sizeof(malloc_large_stats_t)))
+		    + (nhclasses * sizeof(malloc_huge_stats_t)));
 	} else
 		arena = (arena_t *)base_alloc(tsdn, sizeof(arena_t));
 	if (arena == NULL)

From e0a9e78374f56bc7a27258ced08d89bfc436d8af Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 4 Nov 2016 15:15:24 -0700
Subject: [PATCH 0493/2608] Update ChangeLog for 4.3.0.

---
 ChangeLog | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index ac2e4d3c..118df96f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -28,6 +28,8 @@ brevity.  Much more detail can be found in the git revision history:
     deadlocks during thread exit.  (@jasone)
   - Fix over-sized allocation of radix tree leaf nodes.  (@mjp41, @ogaun,
     @jasone)
+  - Fix over-sized allocation of arena_t (plus associated stats) data
+    structures.  (@jasone, @interwq)
   - Fix EXTRA_CFLAGS to not affect configuration.  (@jasone)
   - Fix a Valgrind integration bug.  (@ronawho)
   - Disallow 0x5a junk filling when running in Valgrind.  (@jasone)

From 23f04ef9b7f6b48fd41ae895a21f351e6f549b7d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 4 Nov 2016 15:15:24 -0700
Subject: [PATCH 0494/2608] Update ChangeLog for 4.3.0.

---
 ChangeLog | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index ac2e4d3c..118df96f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -28,6 +28,8 @@ brevity.  Much more detail can be found in the git revision history:
     deadlocks during thread exit.  (@jasone)
   - Fix over-sized allocation of radix tree leaf nodes.  (@mjp41, @ogaun,
     @jasone)
+  - Fix over-sized allocation of arena_t (plus associated stats) data
+    structures.  (@jasone, @interwq)
   - Fix EXTRA_CFLAGS to not affect configuration.  (@jasone)
   - Fix a Valgrind integration bug.  (@ronawho)
   - Disallow 0x5a junk filling when running in Valgrind.  (@jasone)

From a4e83e859353ea19dc8377088eae31520d291550 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 7 Nov 2016 09:37:12 -0800
Subject: [PATCH 0495/2608] Fix run leak.

Fix arena_run_first_best_fit() to search all potentially non-empty
runs_avail heaps, rather than ignoring the heap that contains runs
larger than large_maxclass, but less than chunksize.

This fixes a regression caused by
f193fd80cf1f99bce2bc9f5f4a8b149219965da2 (Refactor runs_avail.).

This resolves #493.
---
 src/arena.c              | 12 +++++++-----
 test/unit/run_quantize.c |  2 +-
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index d737ec9a..49f04931 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -150,6 +150,8 @@ arena_avail_insert(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
 	    arena_miscelm_get_const(chunk, pageind))));
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
+	assert((npages << LG_PAGE) < chunksize);
+	assert(pind2sz(pind) <= chunksize);
 	arena_run_heap_insert(&arena->runs_avail[pind],
 	    arena_miscelm_get_mutable(chunk, pageind));
 }
@@ -162,6 +164,8 @@ arena_avail_remove(arena_t *arena, arena_chunk_t *chunk, size_t pageind,
 	    arena_miscelm_get_const(chunk, pageind))));
 	assert(npages == (arena_mapbits_unallocated_size_get(chunk, pageind) >>
 	    LG_PAGE));
+	assert((npages << LG_PAGE) < chunksize);
+	assert(pind2sz(pind) <= chunksize);
 	arena_run_heap_remove(&arena->runs_avail[pind],
 	    arena_miscelm_get_mutable(chunk, pageind));
 }
@@ -1046,7 +1050,7 @@ arena_run_first_best_fit(arena_t *arena, size_t size)
 
 	pind = psz2ind(run_quantize_ceil(size));
 
-	for (i = pind; pind2sz(i) <= large_maxclass; i++) {
+	for (i = pind; pind2sz(i) <= chunksize; i++) {
 		arena_chunk_map_misc_t *miscelm = arena_run_heap_first(
 		    &arena->runs_avail[i]);
 		if (miscelm != NULL)
@@ -1922,8 +1926,7 @@ arena_reset(tsd_t *tsd, arena_t *arena)
 	assert(!arena->purging);
 	arena->nactive = 0;
 
-	for (i = 0; i < sizeof(arena->runs_avail) / sizeof(arena_run_heap_t);
-	    i++)
+	for (i = 0; i < NPSIZES; i++)
 		arena_run_heap_new(&arena->runs_avail[i]);
 
 	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock);
@@ -3514,8 +3517,7 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 	arena->nactive = 0;
 	arena->ndirty = 0;
 
-	for (i = 0; i < sizeof(arena->runs_avail) / sizeof(arena_run_heap_t);
-	    i++)
+	for (i = 0; i < NPSIZES; i++)
 		arena_run_heap_new(&arena->runs_avail[i]);
 
 	qr_new(&arena->runs_dirty, rd_link);
diff --git a/test/unit/run_quantize.c b/test/unit/run_quantize.c
index 45f32018..b1ca6356 100644
--- a/test/unit/run_quantize.c
+++ b/test/unit/run_quantize.c
@@ -111,7 +111,7 @@ TEST_BEGIN(test_monotonic)
 
 	floor_prev = 0;
 	ceil_prev = 0;
-	for (i = 1; i <= large_maxclass >> LG_PAGE; i++) {
+	for (i = 1; i <= chunksize >> LG_PAGE; i++) {
 		size_t run_size, floor, ceil;
 
 		run_size = i << LG_PAGE;

From 04b463546e57ecd9ebc334739881a1c69623813a Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 7 Nov 2016 10:52:44 -0800
Subject: [PATCH 0496/2608] Refactor prng to not use 64-bit atomics on 32-bit
 platforms.

This resolves #495.
---
 include/jemalloc/internal/arena.h             |   2 +-
 include/jemalloc/internal/extent.h            |   4 +-
 include/jemalloc/internal/private_symbols.txt |  12 +-
 include/jemalloc/internal/prng.h              | 155 +++++++++++--
 src/arena.c                                   |   6 +-
 src/ckh.c                                     |   8 +-
 src/prof.c                                    |   2 +-
 test/unit/prng.c                              | 209 ++++++++++++++++--
 8 files changed, 340 insertions(+), 58 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index ce9d8b5e..dbd334e6 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -182,7 +182,7 @@ struct arena_s {
 	 * PRNG state for cache index randomization of large allocation base
 	 * pointers.
 	 */
-	uint64_t		offset_state;
+	size_t			offset_state;
 
 	dss_prec_t		dss_prec;
 
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 673cac2f..531d853c 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -325,8 +325,8 @@ extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment)
 	if (alignment < PAGE) {
 		unsigned lg_range = LG_PAGE -
 		    lg_floor(CACHELINE_CEILING(alignment));
-		uint64_t r =
-		    prng_lg_range(&extent_arena_get(extent)->offset_state,
+		size_t r =
+		    prng_lg_range_zu(&extent_arena_get(extent)->offset_state,
 		    lg_range, true);
 		uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE -
 		    lg_range);
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 2949de10..f178daf7 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -328,9 +328,15 @@ pind2sz_tab
 pow2_ceil_u32
 pow2_ceil_u64
 pow2_ceil_zu
-prng_lg_range
-prng_range
-prng_state_next
+prng_lg_range_u32
+prng_lg_range_u64
+prng_lg_range_zu
+prng_range_u32
+prng_range_u64
+prng_range_zu
+prng_state_next_u32
+prng_state_next_u64
+prng_state_next_zu
 prof_active
 prof_active_get
 prof_active_get_unlocked
diff --git a/include/jemalloc/internal/prng.h b/include/jemalloc/internal/prng.h
index ebe916f8..c2bda19c 100644
--- a/include/jemalloc/internal/prng.h
+++ b/include/jemalloc/internal/prng.h
@@ -19,8 +19,12 @@
  * the next has a cycle of 4, etc.  For this reason, we prefer to use the upper
  * bits.
  */
-#define	PRNG_A	UINT64_C(6364136223846793005)
-#define	PRNG_C	UINT64_C(1442695040888963407)
+
+#define	PRNG_A_32	UINT32_C(1103515241)
+#define	PRNG_C_32	UINT32_C(12347)
+
+#define	PRNG_A_64	UINT64_C(6364136223846793005)
+#define	PRNG_C_64	UINT64_C(1442695040888963407)
 
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
@@ -35,45 +39,133 @@
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
-uint64_t	prng_state_next(uint64_t state);
-uint64_t	prng_lg_range(uint64_t *state, unsigned lg_range, bool atomic);
-uint64_t	prng_range(uint64_t *state, uint64_t range, bool atomic);
+uint32_t	prng_state_next_u32(uint32_t state);
+uint64_t	prng_state_next_u64(uint64_t state);
+size_t	prng_state_next_zu(size_t state);
+
+uint32_t	prng_lg_range_u32(uint32_t *state, unsigned lg_range,
+    bool atomic);
+uint64_t	prng_lg_range_u64(uint64_t *state, unsigned lg_range);
+size_t	prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic);
+
+uint32_t	prng_range_u32(uint32_t *state, uint32_t range, bool atomic);
+uint64_t	prng_range_u64(uint64_t *state, uint64_t range);
+size_t	prng_range_zu(size_t *state, size_t range, bool atomic);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PRNG_C_))
-JEMALLOC_ALWAYS_INLINE uint64_t
-prng_state_next(uint64_t state)
+JEMALLOC_ALWAYS_INLINE uint32_t
+prng_state_next_u32(uint32_t state)
 {
 
-	return ((state * PRNG_A) + PRNG_C);
+	return ((state * PRNG_A_32) + PRNG_C_32);
 }
 
 JEMALLOC_ALWAYS_INLINE uint64_t
-prng_lg_range(uint64_t *state, unsigned lg_range, bool atomic)
+prng_state_next_u64(uint64_t state)
+{
+
+	return ((state * PRNG_A_64) + PRNG_C_64);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+prng_state_next_zu(size_t state)
+{
+
+#if LG_SIZEOF_PTR == 2
+	return ((state * PRNG_A_32) + PRNG_C_32);
+#elif LG_SIZEOF_PTR == 3
+	return ((state * PRNG_A_64) + PRNG_C_64);
+#else
+#error Unsupported pointer size
+#endif
+}
+
+JEMALLOC_ALWAYS_INLINE uint32_t
+prng_lg_range_u32(uint32_t *state, unsigned lg_range, bool atomic)
+{
+	uint32_t ret, state1;
+
+	assert(lg_range > 0);
+	assert(lg_range <= 32);
+
+	if (atomic) {
+		uint32_t state0;
+
+		do {
+			state0 = atomic_read_uint32(state);
+			state1 = prng_state_next_u32(state0);
+		} while (atomic_cas_uint32(state, state0, state1));
+	} else {
+		state1 = prng_state_next_u32(*state);
+		*state = state1;
+	}
+	ret = state1 >> (32 - lg_range);
+
+	return (ret);
+}
+
+/* 64-bit atomic operations cannot be supported on all relevant platforms. */
+JEMALLOC_ALWAYS_INLINE uint64_t
+prng_lg_range_u64(uint64_t *state, unsigned lg_range)
 {
 	uint64_t ret, state1;
 
 	assert(lg_range > 0);
 	assert(lg_range <= 64);
 
-	if (atomic) {
-		uint64_t state0;
-
-		do {
-			state0 = atomic_read_uint64(state);
-			state1 = prng_state_next(state0);
-		} while (atomic_cas_uint64(state, state0, state1));
-	} else {
-		state1 = prng_state_next(*state);
-		*state = state1;
-	}
+	state1 = prng_state_next_u64(*state);
+	*state = state1;
 	ret = state1 >> (64 - lg_range);
 
 	return (ret);
 }
 
+JEMALLOC_ALWAYS_INLINE size_t
+prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic)
+{
+	size_t ret, state1;
+
+	assert(lg_range > 0);
+	assert(lg_range <= ZU(1) << (3 + LG_SIZEOF_PTR));
+
+	if (atomic) {
+		size_t state0;
+
+		do {
+			state0 = atomic_read_z(state);
+			state1 = prng_state_next_zu(state0);
+		} while (atomic_cas_z(state, state0, state1));
+	} else {
+		state1 = prng_state_next_zu(*state);
+		*state = state1;
+	}
+	ret = state1 >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) - lg_range);
+
+	return (ret);
+}
+
+JEMALLOC_ALWAYS_INLINE uint32_t
+prng_range_u32(uint32_t *state, uint32_t range, bool atomic)
+{
+	uint32_t ret;
+	unsigned lg_range;
+
+	assert(range > 1);
+
+	/* Compute the ceiling of lg(range). */
+	lg_range = ffs_u32(pow2_ceil_u32(range)) - 1;
+
+	/* Generate a result in [0..range) via repeated trial. */
+	do {
+		ret = prng_lg_range_u32(state, lg_range, atomic);
+	} while (ret >= range);
+
+	return (ret);
+}
+
 JEMALLOC_ALWAYS_INLINE uint64_t
-prng_range(uint64_t *state, uint64_t range, bool atomic)
+prng_range_u64(uint64_t *state, uint64_t range)
 {
 	uint64_t ret;
 	unsigned lg_range;
@@ -85,7 +177,26 @@ prng_range(uint64_t *state, uint64_t range, bool atomic)
 
 	/* Generate a result in [0..range) via repeated trial. */
 	do {
-		ret = prng_lg_range(state, lg_range, atomic);
+		ret = prng_lg_range_u64(state, lg_range);
+	} while (ret >= range);
+
+	return (ret);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+prng_range_zu(size_t *state, size_t range, bool atomic)
+{
+	size_t ret;
+	unsigned lg_range;
+
+	assert(range > 1);
+
+	/* Compute the ceiling of lg(range). */
+	lg_range = ffs_u64(pow2_ceil_u64(range)) - 1;
+
+	/* Generate a result in [0..range) via repeated trial. */
+	do {
+		ret = prng_lg_range_zu(state, lg_range, atomic);
 	} while (ret >= range);
 
 	return (ret);
diff --git a/src/arena.c b/src/arena.c
index dd8e4d9c..4b104a0e 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -422,8 +422,8 @@ arena_decay_deadline_init(arena_t *arena)
 	if (arena->decay.time > 0) {
 		nstime_t jitter;
 
-		nstime_init(&jitter, prng_range(&arena->decay.jitter_state,
-		    nstime_ns(&arena->decay.interval), false));
+		nstime_init(&jitter, prng_range_u64(&arena->decay.jitter_state,
+		    nstime_ns(&arena->decay.interval)));
 		nstime_add(&arena->decay.deadline, &jitter);
 	}
 }
@@ -1680,7 +1680,7 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 		 * deterministic seed.
 		 */
 		arena->offset_state = config_debug ? ind :
-		    (uint64_t)(uintptr_t)arena;
+		    (size_t)(uintptr_t)arena;
 	}
 
 	arena->dss_prec = extent_dss_prec_get();
diff --git a/src/ckh.c b/src/ckh.c
index 75376017..6f16565f 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -99,8 +99,8 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
 	 * Cycle through the cells in the bucket, starting at a random position.
 	 * The randomness avoids worst-case search overhead as buckets fill up.
 	 */
-	offset = (unsigned)prng_lg_range(&ckh->prng_state, LG_CKH_BUCKET_CELLS,
-	    false);
+	offset = (unsigned)prng_lg_range_u64(&ckh->prng_state,
+	    LG_CKH_BUCKET_CELLS);
 	for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
 		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) +
 		    ((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))];
@@ -142,8 +142,8 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
 		 * were an item for which both hashes indicated the same
 		 * bucket.
 		 */
-		i = (unsigned)prng_lg_range(&ckh->prng_state,
-		    LG_CKH_BUCKET_CELLS, false);
+		i = (unsigned)prng_lg_range_u64(&ckh->prng_state,
+		    LG_CKH_BUCKET_CELLS);
 		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
 		assert(cell->key != NULL);
 
diff --git a/src/prof.c b/src/prof.c
index 4bafb39a..19c8fb71 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -878,7 +878,7 @@ prof_sample_threshold_update(prof_tdata_t *tdata)
 	 *   pp 500
 	 *   (http://luc.devroye.org/rnbookindex.html)
 	 */
-	r = prng_lg_range(&tdata->prng_state, 53, false);
+	r = prng_lg_range_u64(&tdata->prng_state, 53);
 	u = (double)r * (1.0/9007199254740992.0L);
 	tdata->bytes_until_sample = (uint64_t)(log(u) /
 	    log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
diff --git a/test/unit/prng.c b/test/unit/prng.c
index f3234455..111fa59f 100644
--- a/test/unit/prng.c
+++ b/test/unit/prng.c
@@ -1,34 +1,71 @@
 #include "test/jemalloc_test.h"
 
 static void
-test_prng_lg_range(bool atomic)
+test_prng_lg_range_u32(bool atomic)
+{
+	uint32_t sa, sb, ra, rb;
+	unsigned lg_range;
+
+	sa = 42;
+	ra = prng_lg_range_u32(&sa, 32, atomic);
+	sa = 42;
+	rb = prng_lg_range_u32(&sa, 32, atomic);
+	assert_u32_eq(ra, rb,
+	    "Repeated generation should produce repeated results");
+
+	sb = 42;
+	rb = prng_lg_range_u32(&sb, 32, atomic);
+	assert_u32_eq(ra, rb,
+	    "Equivalent generation should produce equivalent results");
+
+	sa = 42;
+	ra = prng_lg_range_u32(&sa, 32, atomic);
+	rb = prng_lg_range_u32(&sa, 32, atomic);
+	assert_u32_ne(ra, rb,
+	    "Full-width results must not immediately repeat");
+
+	sa = 42;
+	ra = prng_lg_range_u32(&sa, 32, atomic);
+	for (lg_range = 31; lg_range > 0; lg_range--) {
+		sb = 42;
+		rb = prng_lg_range_u32(&sb, lg_range, atomic);
+		assert_u32_eq((rb & (UINT32_C(0xffffffff) << lg_range)),
+		    0, "High order bits should be 0, lg_range=%u", lg_range);
+		assert_u32_eq(rb, (ra >> (32 - lg_range)),
+		    "Expected high order bits of full-width result, "
+		    "lg_range=%u", lg_range);
+	}
+}
+
+static void
+test_prng_lg_range_u64(void)
 {
 	uint64_t sa, sb, ra, rb;
 	unsigned lg_range;
 
 	sa = 42;
-	ra = prng_lg_range(&sa, 64, atomic);
+	ra = prng_lg_range_u64(&sa, 64);
 	sa = 42;
-	rb = prng_lg_range(&sa, 64, atomic);
+	rb = prng_lg_range_u64(&sa, 64);
 	assert_u64_eq(ra, rb,
 	    "Repeated generation should produce repeated results");
 
 	sb = 42;
-	rb = prng_lg_range(&sb, 64, atomic);
+	rb = prng_lg_range_u64(&sb, 64);
 	assert_u64_eq(ra, rb,
 	    "Equivalent generation should produce equivalent results");
 
 	sa = 42;
-	ra = prng_lg_range(&sa, 64, atomic);
-	rb = prng_lg_range(&sa, 64, atomic);
+	ra = prng_lg_range_u64(&sa, 64);
+	rb = prng_lg_range_u64(&sa, 64);
 	assert_u64_ne(ra, rb,
 	    "Full-width results must not immediately repeat");
 
 	sa = 42;
-	ra = prng_lg_range(&sa, 64, atomic);
+	ra = prng_lg_range_u64(&sa, 64);
 	for (lg_range = 63; lg_range > 0; lg_range--) {
 		sb = 42;
-		rb = prng_lg_range(&sb, lg_range, atomic);
+		rb = prng_lg_range_u64(&sb, lg_range);
 		assert_u64_eq((rb & (UINT64_C(0xffffffffffffffff) << lg_range)),
 		    0, "High order bits should be 0, lg_range=%u", lg_range);
 		assert_u64_eq(rb, (ra >> (64 - lg_range)),
@@ -37,22 +74,102 @@ test_prng_lg_range(bool atomic)
 	}
 }
 
-TEST_BEGIN(test_prng_lg_range_nonatomic)
+static void
+test_prng_lg_range_zu(bool atomic)
+{
+	uint64_t sa, sb, ra, rb;
+	unsigned lg_range;
+
+	sa = 42;
+	ra = prng_lg_range_zu(&sa, 64, atomic);
+	sa = 42;
+	rb = prng_lg_range_zu(&sa, 64, atomic);
+	assert_zu_eq(ra, rb,
+	    "Repeated generation should produce repeated results");
+
+	sb = 42;
+	rb = prng_lg_range_zu(&sb, 64, atomic);
+	assert_zu_eq(ra, rb,
+	    "Equivalent generation should produce equivalent results");
+
+	sa = 42;
+	ra = prng_lg_range_zu(&sa, 64, atomic);
+	rb = prng_lg_range_zu(&sa, 64, atomic);
+	assert_zu_ne(ra, rb,
+	    "Full-width results must not immediately repeat");
+
+	sa = 42;
+	ra = prng_lg_range_zu(&sa, 64, atomic);
+	for (lg_range = (ZU(1) << (3 + LG_SIZEOF_PTR)) - 1; lg_range > 0;
+	    lg_range--) {
+		sb = 42;
+		rb = prng_lg_range_zu(&sb, lg_range, atomic);
+		assert_zu_eq((rb & (SIZE_T_MAX << lg_range)),
+		    0, "High order bits should be 0, lg_range=%u", lg_range);
+		assert_zu_eq(rb, (ra >> (64 - lg_range)),
+		    "Expected high order bits of full-width result, "
+		    "lg_range=%u", lg_range);
+	}
+}
+
+TEST_BEGIN(test_prng_lg_range_u32_nonatomic)
 {
 
-	test_prng_lg_range(false);
+	test_prng_lg_range_u32(false);
 }
 TEST_END
 
-TEST_BEGIN(test_prng_lg_range_atomic)
+TEST_BEGIN(test_prng_lg_range_u32_atomic)
 {
 
-	test_prng_lg_range(true);
+	test_prng_lg_range_u32(true);
+}
+TEST_END
+
+TEST_BEGIN(test_prng_lg_range_u64_nonatomic)
+{
+
+	test_prng_lg_range_u64();
+}
+TEST_END
+
+TEST_BEGIN(test_prng_lg_range_zu_nonatomic)
+{
+
+	test_prng_lg_range_zu(false);
+}
+TEST_END
+
+TEST_BEGIN(test_prng_lg_range_zu_atomic)
+{
+
+	test_prng_lg_range_zu(true);
 }
 TEST_END
 
 static void
-test_prng_range(bool atomic)
+test_prng_range_u32(bool atomic)
+{
+	uint32_t range;
+#define	MAX_RANGE	10000000
+#define	RANGE_STEP	97
+#define	NREPS		10
+
+	for (range = 2; range < MAX_RANGE; range += RANGE_STEP) {
+		uint32_t s;
+		unsigned rep;
+
+		s = range;
+		for (rep = 0; rep < NREPS; rep++) {
+			uint32_t r = prng_range_u32(&s, range, atomic);
+
+			assert_u32_lt(r, range, "Out of range");
+		}
+	}
+}
+
+static void
+test_prng_range_u64(void)
 {
 	uint64_t range;
 #define	MAX_RANGE	10000000
@@ -65,24 +182,66 @@ test_prng_range(bool atomic)
 
 		s = range;
 		for (rep = 0; rep < NREPS; rep++) {
-			uint64_t r = prng_range(&s, range, atomic);
+			uint64_t r = prng_range_u64(&s, range);
 
 			assert_u64_lt(r, range, "Out of range");
 		}
 	}
 }
 
-TEST_BEGIN(test_prng_range_nonatomic)
+static void
+test_prng_range_zu(bool atomic)
+{
+	size_t range;
+#define	MAX_RANGE	10000000
+#define	RANGE_STEP	97
+#define	NREPS		10
+
+	for (range = 2; range < MAX_RANGE; range += RANGE_STEP) {
+		size_t s;
+		unsigned rep;
+
+		s = range;
+		for (rep = 0; rep < NREPS; rep++) {
+			size_t r = prng_range_zu(&s, range, atomic);
+
+			assert_zu_lt(r, range, "Out of range");
+		}
+	}
+}
+
+TEST_BEGIN(test_prng_range_u32_nonatomic)
 {
 
-	test_prng_range(false);
+	test_prng_range_u32(false);
 }
 TEST_END
 
-TEST_BEGIN(test_prng_range_atomic)
+TEST_BEGIN(test_prng_range_u32_atomic)
 {
 
-	test_prng_range(true);
+	test_prng_range_u32(true);
+}
+TEST_END
+
+TEST_BEGIN(test_prng_range_u64_nonatomic)
+{
+
+	test_prng_range_u64();
+}
+TEST_END
+
+TEST_BEGIN(test_prng_range_zu_nonatomic)
+{
+
+	test_prng_range_zu(false);
+}
+TEST_END
+
+TEST_BEGIN(test_prng_range_zu_atomic)
+{
+
+	test_prng_range_zu(true);
 }
 TEST_END
 
@@ -91,8 +250,14 @@ main(void)
 {
 
 	return (test(
-	    test_prng_lg_range_nonatomic,
-	    test_prng_lg_range_atomic,
-	    test_prng_range_nonatomic,
-	    test_prng_range_atomic));
+	    test_prng_lg_range_u32_nonatomic,
+	    test_prng_lg_range_u32_atomic,
+	    test_prng_lg_range_u64_nonatomic,
+	    test_prng_lg_range_zu_nonatomic,
+	    test_prng_lg_range_zu_atomic,
+	    test_prng_range_u32_nonatomic,
+	    test_prng_range_u32_atomic,
+	    test_prng_range_u64_nonatomic,
+	    test_prng_range_zu_nonatomic,
+	    test_prng_range_zu_atomic));
 }

From 2e46b13ad545c599679f931c9c60ce06adae3859 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 7 Nov 2016 10:53:35 -0800
Subject: [PATCH 0497/2608] Revert "Define 64-bits atomics unconditionally"

This reverts commit c2942e2c0e097e7c75a3addd0b9c87758f91692e.

This resolves #495.
---
 include/jemalloc/internal/atomic.h | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h
index 3936f68b..3f15ea14 100644
--- a/include/jemalloc/internal/atomic.h
+++ b/include/jemalloc/internal/atomic.h
@@ -66,7 +66,8 @@ void	atomic_write_u(unsigned *p, unsigned x);
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_))
 /******************************************************************************/
 /* 64-bit operations. */
-#if (defined(__amd64__) || defined(__x86_64__))
+#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
+#  if (defined(__amd64__) || defined(__x86_64__))
 JEMALLOC_INLINE uint64_t
 atomic_add_uint64(uint64_t *p, uint64_t x)
 {
@@ -124,7 +125,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
 	    : "memory" /* Clobbers. */
 	    );
 }
-#elif (defined(JEMALLOC_C11ATOMICS))
+#  elif (defined(JEMALLOC_C11ATOMICS))
 JEMALLOC_INLINE uint64_t
 atomic_add_uint64(uint64_t *p, uint64_t x)
 {
@@ -152,7 +153,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
 	volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
 	atomic_store(a, x);
 }
-#elif (defined(JEMALLOC_ATOMIC9))
+#  elif (defined(JEMALLOC_ATOMIC9))
 JEMALLOC_INLINE uint64_t
 atomic_add_uint64(uint64_t *p, uint64_t x)
 {
@@ -192,7 +193,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
 
 	atomic_store_rel_long(p, x);
 }
-#elif (defined(JEMALLOC_OSATOMIC))
+#  elif (defined(JEMALLOC_OSATOMIC))
 JEMALLOC_INLINE uint64_t
 atomic_add_uint64(uint64_t *p, uint64_t x)
 {
@@ -224,7 +225,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
 		o = atomic_read_uint64(p);
 	} while (atomic_cas_uint64(p, o, x));
 }
-#elif (defined(_MSC_VER))
+#  elif (defined(_MSC_VER))
 JEMALLOC_INLINE uint64_t
 atomic_add_uint64(uint64_t *p, uint64_t x)
 {
@@ -254,7 +255,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
 
 	InterlockedExchange64(p, x);
 }
-#elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || \
+#  elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || \
     defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8))
 JEMALLOC_INLINE uint64_t
 atomic_add_uint64(uint64_t *p, uint64_t x)
@@ -283,8 +284,9 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
 
 	__sync_lock_test_and_set(p, x);
 }
-#else
-#  error "Missing implementation for 64-bit atomic operations"
+#  else
+#    error "Missing implementation for 64-bit atomic operations"
+#  endif
 #endif
 
 /******************************************************************************/

From cda59f99701bc3acc569023e197abddc548330f4 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 7 Nov 2016 11:27:48 -0800
Subject: [PATCH 0498/2608] Rename atomic_*_{uint32,uint64,u}() to
 atomic_*_{u32,u64,zu}().

This change conforms to naming conventions throughout the codebase.
---
 include/jemalloc/internal/arena.h             |   6 +-
 include/jemalloc/internal/atomic.h            | 198 +++++++++---------
 include/jemalloc/internal/private_symbols.txt |  24 +--
 include/jemalloc/internal/prng.h              |   8 +-
 include/jemalloc/internal/stats.h             |   2 +-
 src/arena.c                                   |   4 +-
 src/extent.c                                  |  12 +-
 test/unit/atomic.c                            |  24 +--
 8 files changed, 139 insertions(+), 139 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index dbd334e6..f518c31f 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -365,21 +365,21 @@ JEMALLOC_INLINE void
 arena_metadata_add(arena_t *arena, size_t size)
 {
 
-	atomic_add_z(&arena->stats.metadata, size);
+	atomic_add_zu(&arena->stats.metadata, size);
 }
 
 JEMALLOC_INLINE void
 arena_metadata_sub(arena_t *arena, size_t size)
 {
 
-	atomic_sub_z(&arena->stats.metadata, size);
+	atomic_sub_zu(&arena->stats.metadata, size);
 }
 
 JEMALLOC_INLINE size_t
 arena_metadata_get(arena_t *arena)
 {
 
-	return (atomic_read_z(&arena->stats.metadata));
+	return (atomic_read_zu(&arena->stats.metadata));
 }
 
 JEMALLOC_INLINE bool
diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h
index 3f15ea14..4b5b4ea9 100644
--- a/include/jemalloc/internal/atomic.h
+++ b/include/jemalloc/internal/atomic.h
@@ -9,10 +9,12 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-#define	atomic_read_uint64(p)	atomic_add_uint64(p, 0)
-#define	atomic_read_uint32(p)	atomic_add_uint32(p, 0)
+#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
+#define	atomic_read_u64(p)	atomic_add_u64(p, 0)
+#endif
+#define	atomic_read_u32(p)	atomic_add_u32(p, 0)
 #define	atomic_read_p(p)	atomic_add_p(p, NULL)
-#define	atomic_read_z(p)	atomic_add_z(p, 0)
+#define	atomic_read_zu(p)	atomic_add_zu(p, 0)
 #define	atomic_read_u(p)	atomic_add_u(p, 0)
 
 #endif /* JEMALLOC_H_EXTERNS */
@@ -41,22 +43,24 @@
  */
 
 #ifndef JEMALLOC_ENABLE_INLINE
-uint64_t	atomic_add_uint64(uint64_t *p, uint64_t x);
-uint64_t	atomic_sub_uint64(uint64_t *p, uint64_t x);
-bool	atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s);
-void	atomic_write_uint64(uint64_t *p, uint64_t x);
-uint32_t	atomic_add_uint32(uint32_t *p, uint32_t x);
-uint32_t	atomic_sub_uint32(uint32_t *p, uint32_t x);
-bool	atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s);
-void	atomic_write_uint32(uint32_t *p, uint32_t x);
+#  if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
+uint64_t	atomic_add_u64(uint64_t *p, uint64_t x);
+uint64_t	atomic_sub_u64(uint64_t *p, uint64_t x);
+bool	atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s);
+void	atomic_write_u64(uint64_t *p, uint64_t x);
+#  endif
+uint32_t	atomic_add_u32(uint32_t *p, uint32_t x);
+uint32_t	atomic_sub_u32(uint32_t *p, uint32_t x);
+bool	atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s);
+void	atomic_write_u32(uint32_t *p, uint32_t x);
 void	*atomic_add_p(void **p, void *x);
 void	*atomic_sub_p(void **p, void *x);
 bool	atomic_cas_p(void **p, void *c, void *s);
 void	atomic_write_p(void **p, const void *x);
-size_t	atomic_add_z(size_t *p, size_t x);
-size_t	atomic_sub_z(size_t *p, size_t x);
-bool	atomic_cas_z(size_t *p, size_t c, size_t s);
-void	atomic_write_z(size_t *p, size_t x);
+size_t	atomic_add_zu(size_t *p, size_t x);
+size_t	atomic_sub_zu(size_t *p, size_t x);
+bool	atomic_cas_zu(size_t *p, size_t c, size_t s);
+void	atomic_write_zu(size_t *p, size_t x);
 unsigned	atomic_add_u(unsigned *p, unsigned x);
 unsigned	atomic_sub_u(unsigned *p, unsigned x);
 bool	atomic_cas_u(unsigned *p, unsigned c, unsigned s);
@@ -69,7 +73,7 @@ void	atomic_write_u(unsigned *p, unsigned x);
 #if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
 #  if (defined(__amd64__) || defined(__x86_64__))
 JEMALLOC_INLINE uint64_t
-atomic_add_uint64(uint64_t *p, uint64_t x)
+atomic_add_u64(uint64_t *p, uint64_t x)
 {
 	uint64_t t = x;
 
@@ -83,7 +87,7 @@ atomic_add_uint64(uint64_t *p, uint64_t x)
 }
 
 JEMALLOC_INLINE uint64_t
-atomic_sub_uint64(uint64_t *p, uint64_t x)
+atomic_sub_u64(uint64_t *p, uint64_t x)
 {
 	uint64_t t;
 
@@ -99,7 +103,7 @@ atomic_sub_uint64(uint64_t *p, uint64_t x)
 }
 
 JEMALLOC_INLINE bool
-atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s)
+atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s)
 {
 	uint8_t success;
 
@@ -115,7 +119,7 @@ atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s)
 }
 
 JEMALLOC_INLINE void
-atomic_write_uint64(uint64_t *p, uint64_t x)
+atomic_write_u64(uint64_t *p, uint64_t x)
 {
 
 	asm volatile (
@@ -127,35 +131,35 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
 }
 #  elif (defined(JEMALLOC_C11ATOMICS))
 JEMALLOC_INLINE uint64_t
-atomic_add_uint64(uint64_t *p, uint64_t x)
+atomic_add_u64(uint64_t *p, uint64_t x)
 {
 	volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
 	return (atomic_fetch_add(a, x) + x);
 }
 
 JEMALLOC_INLINE uint64_t
-atomic_sub_uint64(uint64_t *p, uint64_t x)
+atomic_sub_u64(uint64_t *p, uint64_t x)
 {
 	volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
 	return (atomic_fetch_sub(a, x) - x);
 }
 
 JEMALLOC_INLINE bool
-atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s)
+atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s)
 {
 	volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
 	return (!atomic_compare_exchange_strong(a, &c, s));
 }
 
 JEMALLOC_INLINE void
-atomic_write_uint64(uint64_t *p, uint64_t x)
+atomic_write_u64(uint64_t *p, uint64_t x)
 {
 	volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
 	atomic_store(a, x);
 }
 #  elif (defined(JEMALLOC_ATOMIC9))
 JEMALLOC_INLINE uint64_t
-atomic_add_uint64(uint64_t *p, uint64_t x)
+atomic_add_u64(uint64_t *p, uint64_t x)
 {
 
 	/*
@@ -168,7 +172,7 @@ atomic_add_uint64(uint64_t *p, uint64_t x)
 }
 
 JEMALLOC_INLINE uint64_t
-atomic_sub_uint64(uint64_t *p, uint64_t x)
+atomic_sub_u64(uint64_t *p, uint64_t x)
 {
 
 	assert(sizeof(uint64_t) == sizeof(unsigned long));
@@ -177,7 +181,7 @@ atomic_sub_uint64(uint64_t *p, uint64_t x)
 }
 
 JEMALLOC_INLINE bool
-atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s)
+atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s)
 {
 
 	assert(sizeof(uint64_t) == sizeof(unsigned long));
@@ -186,7 +190,7 @@ atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s)
 }
 
 JEMALLOC_INLINE void
-atomic_write_uint64(uint64_t *p, uint64_t x)
+atomic_write_u64(uint64_t *p, uint64_t x)
 {
 
 	assert(sizeof(uint64_t) == sizeof(unsigned long));
@@ -195,53 +199,53 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
 }
 #  elif (defined(JEMALLOC_OSATOMIC))
 JEMALLOC_INLINE uint64_t
-atomic_add_uint64(uint64_t *p, uint64_t x)
+atomic_add_u64(uint64_t *p, uint64_t x)
 {
 
 	return (OSAtomicAdd64((int64_t)x, (int64_t *)p));
 }
 
 JEMALLOC_INLINE uint64_t
-atomic_sub_uint64(uint64_t *p, uint64_t x)
+atomic_sub_u64(uint64_t *p, uint64_t x)
 {
 
 	return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p));
 }
 
 JEMALLOC_INLINE bool
-atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s)
+atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s)
 {
 
 	return (!OSAtomicCompareAndSwap64(c, s, (int64_t *)p));
 }
 
 JEMALLOC_INLINE void
-atomic_write_uint64(uint64_t *p, uint64_t x)
+atomic_write_u64(uint64_t *p, uint64_t x)
 {
 	uint64_t o;
 
 	/*The documented OSAtomic*() API does not expose an atomic exchange. */
 	do {
-		o = atomic_read_uint64(p);
-	} while (atomic_cas_uint64(p, o, x));
+		o = atomic_read_u64(p);
+	} while (atomic_cas_u64(p, o, x));
 }
 #  elif (defined(_MSC_VER))
 JEMALLOC_INLINE uint64_t
-atomic_add_uint64(uint64_t *p, uint64_t x)
+atomic_add_u64(uint64_t *p, uint64_t x)
 {
 
 	return (InterlockedExchangeAdd64(p, x) + x);
 }
 
 JEMALLOC_INLINE uint64_t
-atomic_sub_uint64(uint64_t *p, uint64_t x)
+atomic_sub_u64(uint64_t *p, uint64_t x)
 {
 
 	return (InterlockedExchangeAdd64(p, -((int64_t)x)) - x);
 }
 
 JEMALLOC_INLINE bool
-atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s)
+atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s)
 {
 	uint64_t o;
 
@@ -250,7 +254,7 @@ atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s)
 }
 
 JEMALLOC_INLINE void
-atomic_write_uint64(uint64_t *p, uint64_t x)
+atomic_write_u64(uint64_t *p, uint64_t x)
 {
 
 	InterlockedExchange64(p, x);
@@ -258,28 +262,28 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
 #  elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || \
     defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8))
 JEMALLOC_INLINE uint64_t
-atomic_add_uint64(uint64_t *p, uint64_t x)
+atomic_add_u64(uint64_t *p, uint64_t x)
 {
 
 	return (__sync_add_and_fetch(p, x));
 }
 
 JEMALLOC_INLINE uint64_t
-atomic_sub_uint64(uint64_t *p, uint64_t x)
+atomic_sub_u64(uint64_t *p, uint64_t x)
 {
 
 	return (__sync_sub_and_fetch(p, x));
 }
 
 JEMALLOC_INLINE bool
-atomic_cas_uint64(uint64_t *p, uint64_t c, uint64_t s)
+atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s)
 {
 
 	return (!__sync_bool_compare_and_swap(p, c, s));
 }
 
 JEMALLOC_INLINE void
-atomic_write_uint64(uint64_t *p, uint64_t x)
+atomic_write_u64(uint64_t *p, uint64_t x)
 {
 
 	__sync_lock_test_and_set(p, x);
@@ -293,7 +297,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
 /* 32-bit operations. */
 #if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
 JEMALLOC_INLINE uint32_t
-atomic_add_uint32(uint32_t *p, uint32_t x)
+atomic_add_u32(uint32_t *p, uint32_t x)
 {
 	uint32_t t = x;
 
@@ -307,7 +311,7 @@ atomic_add_uint32(uint32_t *p, uint32_t x)
 }
 
 JEMALLOC_INLINE uint32_t
-atomic_sub_uint32(uint32_t *p, uint32_t x)
+atomic_sub_u32(uint32_t *p, uint32_t x)
 {
 	uint32_t t;
 
@@ -323,7 +327,7 @@ atomic_sub_uint32(uint32_t *p, uint32_t x)
 }
 
 JEMALLOC_INLINE bool
-atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s)
+atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s)
 {
 	uint8_t success;
 
@@ -339,7 +343,7 @@ atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s)
 }
 
 JEMALLOC_INLINE void
-atomic_write_uint32(uint32_t *p, uint32_t x)
+atomic_write_u32(uint32_t *p, uint32_t x)
 {
 
 	asm volatile (
@@ -351,109 +355,109 @@ atomic_write_uint32(uint32_t *p, uint32_t x)
 }
 #  elif (defined(JEMALLOC_C11ATOMICS))
 JEMALLOC_INLINE uint32_t
-atomic_add_uint32(uint32_t *p, uint32_t x)
+atomic_add_u32(uint32_t *p, uint32_t x)
 {
 	volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p;
 	return (atomic_fetch_add(a, x) + x);
 }
 
 JEMALLOC_INLINE uint32_t
-atomic_sub_uint32(uint32_t *p, uint32_t x)
+atomic_sub_u32(uint32_t *p, uint32_t x)
 {
 	volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p;
 	return (atomic_fetch_sub(a, x) - x);
 }
 
 JEMALLOC_INLINE bool
-atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s)
+atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s)
 {
 	volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p;
 	return (!atomic_compare_exchange_strong(a, &c, s));
 }
 
 JEMALLOC_INLINE void
-atomic_write_uint32(uint32_t *p, uint32_t x)
+atomic_write_u32(uint32_t *p, uint32_t x)
 {
 	volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p;
 	atomic_store(a, x);
 }
 #elif (defined(JEMALLOC_ATOMIC9))
 JEMALLOC_INLINE uint32_t
-atomic_add_uint32(uint32_t *p, uint32_t x)
+atomic_add_u32(uint32_t *p, uint32_t x)
 {
 
 	return (atomic_fetchadd_32(p, x) + x);
 }
 
 JEMALLOC_INLINE uint32_t
-atomic_sub_uint32(uint32_t *p, uint32_t x)
+atomic_sub_u32(uint32_t *p, uint32_t x)
 {
 
 	return (atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x);
 }
 
 JEMALLOC_INLINE bool
-atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s)
+atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s)
 {
 
 	return (!atomic_cmpset_32(p, c, s));
 }
 
 JEMALLOC_INLINE void
-atomic_write_uint32(uint32_t *p, uint32_t x)
+atomic_write_u32(uint32_t *p, uint32_t x)
 {
 
 	atomic_store_rel_32(p, x);
 }
 #elif (defined(JEMALLOC_OSATOMIC))
 JEMALLOC_INLINE uint32_t
-atomic_add_uint32(uint32_t *p, uint32_t x)
+atomic_add_u32(uint32_t *p, uint32_t x)
 {
 
 	return (OSAtomicAdd32((int32_t)x, (int32_t *)p));
 }
 
 JEMALLOC_INLINE uint32_t
-atomic_sub_uint32(uint32_t *p, uint32_t x)
+atomic_sub_u32(uint32_t *p, uint32_t x)
 {
 
 	return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p));
 }
 
 JEMALLOC_INLINE bool
-atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s)
+atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s)
 {
 
 	return (!OSAtomicCompareAndSwap32(c, s, (int32_t *)p));
 }
 
 JEMALLOC_INLINE void
-atomic_write_uint32(uint32_t *p, uint32_t x)
+atomic_write_u32(uint32_t *p, uint32_t x)
 {
 	uint32_t o;
 
 	/*The documented OSAtomic*() API does not expose an atomic exchange. */
 	do {
-		o = atomic_read_uint32(p);
-	} while (atomic_cas_uint32(p, o, x));
+		o = atomic_read_u32(p);
+	} while (atomic_cas_u32(p, o, x));
 }
 #elif (defined(_MSC_VER))
 JEMALLOC_INLINE uint32_t
-atomic_add_uint32(uint32_t *p, uint32_t x)
+atomic_add_u32(uint32_t *p, uint32_t x)
 {
 
 	return (InterlockedExchangeAdd(p, x) + x);
 }
 
 JEMALLOC_INLINE uint32_t
-atomic_sub_uint32(uint32_t *p, uint32_t x)
+atomic_sub_u32(uint32_t *p, uint32_t x)
 {
 
 	return (InterlockedExchangeAdd(p, -((int32_t)x)) - x);
 }
 
 JEMALLOC_INLINE bool
-atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s)
+atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s)
 {
 	uint32_t o;
 
@@ -462,7 +466,7 @@ atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s)
 }
 
 JEMALLOC_INLINE void
-atomic_write_uint32(uint32_t *p, uint32_t x)
+atomic_write_u32(uint32_t *p, uint32_t x)
 {
 
 	InterlockedExchange(p, x);
@@ -470,28 +474,28 @@ atomic_write_uint32(uint32_t *p, uint32_t x)
 #elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || \
  defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4))
 JEMALLOC_INLINE uint32_t
-atomic_add_uint32(uint32_t *p, uint32_t x)
+atomic_add_u32(uint32_t *p, uint32_t x)
 {
 
 	return (__sync_add_and_fetch(p, x));
 }
 
 JEMALLOC_INLINE uint32_t
-atomic_sub_uint32(uint32_t *p, uint32_t x)
+atomic_sub_u32(uint32_t *p, uint32_t x)
 {
 
 	return (__sync_sub_and_fetch(p, x));
 }
 
 JEMALLOC_INLINE bool
-atomic_cas_uint32(uint32_t *p, uint32_t c, uint32_t s)
+atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s)
 {
 
 	return (!__sync_bool_compare_and_swap(p, c, s));
 }
 
 JEMALLOC_INLINE void
-atomic_write_uint32(uint32_t *p, uint32_t x)
+atomic_write_u32(uint32_t *p, uint32_t x)
 {
 
 	__sync_lock_test_and_set(p, x);
@@ -507,9 +511,9 @@ atomic_add_p(void **p, void *x)
 {
 
 #if (LG_SIZEOF_PTR == 3)
-	return ((void *)atomic_add_uint64((uint64_t *)p, (uint64_t)x));
+	return ((void *)atomic_add_u64((uint64_t *)p, (uint64_t)x));
 #elif (LG_SIZEOF_PTR == 2)
-	return ((void *)atomic_add_uint32((uint32_t *)p, (uint32_t)x));
+	return ((void *)atomic_add_u32((uint32_t *)p, (uint32_t)x));
 #endif
 }
 
@@ -518,11 +522,9 @@ atomic_sub_p(void **p, void *x)
 {
 
 #if (LG_SIZEOF_PTR == 3)
-	return ((void *)atomic_add_uint64((uint64_t *)p,
-	    (uint64_t)-((int64_t)x)));
+	return ((void *)atomic_add_u64((uint64_t *)p, (uint64_t)-((int64_t)x)));
 #elif (LG_SIZEOF_PTR == 2)
-	return ((void *)atomic_add_uint32((uint32_t *)p,
-	    (uint32_t)-((int32_t)x)));
+	return ((void *)atomic_add_u32((uint32_t *)p, (uint32_t)-((int32_t)x)));
 #endif
 }
 
@@ -531,9 +533,9 @@ atomic_cas_p(void **p, void *c, void *s)
 {
 
 #if (LG_SIZEOF_PTR == 3)
-	return (atomic_cas_uint64((uint64_t *)p, (uint64_t)c, (uint64_t)s));
+	return (atomic_cas_u64((uint64_t *)p, (uint64_t)c, (uint64_t)s));
 #elif (LG_SIZEOF_PTR == 2)
-	return (atomic_cas_uint32((uint32_t *)p, (uint32_t)c, (uint32_t)s));
+	return (atomic_cas_u32((uint32_t *)p, (uint32_t)c, (uint32_t)s));
 #endif
 }
 
@@ -542,57 +544,55 @@ atomic_write_p(void **p, const void *x)
 {
 
 #if (LG_SIZEOF_PTR == 3)
-	atomic_write_uint64((uint64_t *)p, (uint64_t)x);
+	atomic_write_u64((uint64_t *)p, (uint64_t)x);
 #elif (LG_SIZEOF_PTR == 2)
-	atomic_write_uint32((uint32_t *)p, (uint32_t)x);
+	atomic_write_u32((uint32_t *)p, (uint32_t)x);
 #endif
 }
 
 /******************************************************************************/
 /* size_t operations. */
 JEMALLOC_INLINE size_t
-atomic_add_z(size_t *p, size_t x)
+atomic_add_zu(size_t *p, size_t x)
 {
 
 #if (LG_SIZEOF_PTR == 3)
-	return ((size_t)atomic_add_uint64((uint64_t *)p, (uint64_t)x));
+	return ((size_t)atomic_add_u64((uint64_t *)p, (uint64_t)x));
 #elif (LG_SIZEOF_PTR == 2)
-	return ((size_t)atomic_add_uint32((uint32_t *)p, (uint32_t)x));
+	return ((size_t)atomic_add_u32((uint32_t *)p, (uint32_t)x));
 #endif
 }
 
 JEMALLOC_INLINE size_t
-atomic_sub_z(size_t *p, size_t x)
+atomic_sub_zu(size_t *p, size_t x)
 {
 
 #if (LG_SIZEOF_PTR == 3)
-	return ((size_t)atomic_add_uint64((uint64_t *)p,
-	    (uint64_t)-((int64_t)x)));
+	return ((size_t)atomic_add_u64((uint64_t *)p, (uint64_t)-((int64_t)x)));
 #elif (LG_SIZEOF_PTR == 2)
-	return ((size_t)atomic_add_uint32((uint32_t *)p,
-	    (uint32_t)-((int32_t)x)));
+	return ((size_t)atomic_add_u32((uint32_t *)p, (uint32_t)-((int32_t)x)));
 #endif
 }
 
 JEMALLOC_INLINE bool
-atomic_cas_z(size_t *p, size_t c, size_t s)
+atomic_cas_zu(size_t *p, size_t c, size_t s)
 {
 
 #if (LG_SIZEOF_PTR == 3)
-	return (atomic_cas_uint64((uint64_t *)p, (uint64_t)c, (uint64_t)s));
+	return (atomic_cas_u64((uint64_t *)p, (uint64_t)c, (uint64_t)s));
 #elif (LG_SIZEOF_PTR == 2)
-	return (atomic_cas_uint32((uint32_t *)p, (uint32_t)c, (uint32_t)s));
+	return (atomic_cas_u32((uint32_t *)p, (uint32_t)c, (uint32_t)s));
 #endif
 }
 
 JEMALLOC_INLINE void
-atomic_write_z(size_t *p, size_t x)
+atomic_write_zu(size_t *p, size_t x)
 {
 
 #if (LG_SIZEOF_PTR == 3)
-	atomic_write_uint64((uint64_t *)p, (uint64_t)x);
+	atomic_write_u64((uint64_t *)p, (uint64_t)x);
 #elif (LG_SIZEOF_PTR == 2)
-	atomic_write_uint32((uint32_t *)p, (uint32_t)x);
+	atomic_write_u32((uint32_t *)p, (uint32_t)x);
 #endif
 }
 
@@ -603,9 +603,9 @@ atomic_add_u(unsigned *p, unsigned x)
 {
 
 #if (LG_SIZEOF_INT == 3)
-	return ((unsigned)atomic_add_uint64((uint64_t *)p, (uint64_t)x));
+	return ((unsigned)atomic_add_u64((uint64_t *)p, (uint64_t)x));
 #elif (LG_SIZEOF_INT == 2)
-	return ((unsigned)atomic_add_uint32((uint32_t *)p, (uint32_t)x));
+	return ((unsigned)atomic_add_u32((uint32_t *)p, (uint32_t)x));
 #endif
 }
 
@@ -614,10 +614,10 @@ atomic_sub_u(unsigned *p, unsigned x)
 {
 
 #if (LG_SIZEOF_INT == 3)
-	return ((unsigned)atomic_add_uint64((uint64_t *)p,
+	return ((unsigned)atomic_add_u64((uint64_t *)p,
 	    (uint64_t)-((int64_t)x)));
 #elif (LG_SIZEOF_INT == 2)
-	return ((unsigned)atomic_add_uint32((uint32_t *)p,
+	return ((unsigned)atomic_add_u32((uint32_t *)p,
 	    (uint32_t)-((int32_t)x)));
 #endif
 }
@@ -627,9 +627,9 @@ atomic_cas_u(unsigned *p, unsigned c, unsigned s)
 {
 
 #if (LG_SIZEOF_INT == 3)
-	return (atomic_cas_uint64((uint64_t *)p, (uint64_t)c, (uint64_t)s));
+	return (atomic_cas_u64((uint64_t *)p, (uint64_t)c, (uint64_t)s));
 #elif (LG_SIZEOF_INT == 2)
-	return (atomic_cas_uint32((uint32_t *)p, (uint32_t)c, (uint32_t)s));
+	return (atomic_cas_u32((uint32_t *)p, (uint32_t)c, (uint32_t)s));
 #endif
 }
 
@@ -638,9 +638,9 @@ atomic_write_u(unsigned *p, unsigned x)
 {
 
 #if (LG_SIZEOF_INT == 3)
-	atomic_write_uint64((uint64_t *)p, (uint64_t)x);
+	atomic_write_u64((uint64_t *)p, (uint64_t)x);
 #elif (LG_SIZEOF_INT == 2)
-	atomic_write_uint32((uint32_t *)p, (uint32_t)x);
+	atomic_write_u32((uint32_t *)p, (uint32_t)x);
 #endif
 }
 
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index f178daf7..707ede3a 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -73,24 +73,24 @@ arenas
 arenas_tdata_cleanup
 atomic_add_p
 atomic_add_u
-atomic_add_uint32
-atomic_add_uint64
-atomic_add_z
+atomic_add_u32
+atomic_add_u64
+atomic_add_zu
 atomic_cas_p
 atomic_cas_u
-atomic_cas_uint32
-atomic_cas_uint64
-atomic_cas_z
+atomic_cas_u32
+atomic_cas_u64
+atomic_cas_zu
 atomic_sub_p
 atomic_sub_u
-atomic_sub_uint32
-atomic_sub_uint64
-atomic_sub_z
+atomic_sub_u32
+atomic_sub_u64
+atomic_sub_zu
 atomic_write_p
 atomic_write_u
-atomic_write_uint32
-atomic_write_uint64
-atomic_write_z
+atomic_write_u32
+atomic_write_u64
+atomic_write_zu
 base_alloc
 base_boot
 base_postfork_child
diff --git a/include/jemalloc/internal/prng.h b/include/jemalloc/internal/prng.h
index c2bda19c..94fd55a7 100644
--- a/include/jemalloc/internal/prng.h
+++ b/include/jemalloc/internal/prng.h
@@ -93,9 +93,9 @@ prng_lg_range_u32(uint32_t *state, unsigned lg_range, bool atomic)
 		uint32_t state0;
 
 		do {
-			state0 = atomic_read_uint32(state);
+			state0 = atomic_read_u32(state);
 			state1 = prng_state_next_u32(state0);
-		} while (atomic_cas_uint32(state, state0, state1));
+		} while (atomic_cas_u32(state, state0, state1));
 	} else {
 		state1 = prng_state_next_u32(*state);
 		*state = state1;
@@ -133,9 +133,9 @@ prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic)
 		size_t state0;
 
 		do {
-			state0 = atomic_read_z(state);
+			state0 = atomic_read_zu(state);
 			state1 = prng_state_next_zu(state0);
-		} while (atomic_cas_z(state, state0, state1));
+		} while (atomic_cas_zu(state, state0, state1));
 	} else {
 		state1 = prng_state_next_zu(*state);
 		*state = state1;
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index 52279f56..a7368a72 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -101,7 +101,7 @@ struct arena_stats_s {
 	uint64_t	purged;
 
 	/* Number of bytes currently allocated for internal metadata. */
-	size_t		metadata; /* Protected via atomic_*_z(). */
+	size_t		metadata; /* Protected via atomic_*_zu(). */
 
 	size_t		allocated_large;
 	uint64_t	nmalloc_large;
diff --git a/src/arena.c b/src/arena.c
index 4b104a0e..ff7b0cd0 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1541,7 +1541,7 @@ ssize_t
 arena_decay_time_default_get(void)
 {
 
-	return ((ssize_t)atomic_read_z((size_t *)&decay_time_default));
+	return ((ssize_t)atomic_read_zu((size_t *)&decay_time_default));
 }
 
 bool
@@ -1550,7 +1550,7 @@ arena_decay_time_default_set(ssize_t decay_time)
 
 	if (!arena_decay_time_valid(decay_time))
 		return (true);
-	atomic_write_z((size_t *)&decay_time_default, (size_t)decay_time);
+	atomic_write_zu((size_t *)&decay_time_default, (size_t)decay_time);
 	return (false);
 }
 
diff --git a/src/extent.c b/src/extent.c
index 4027e8b7..34ac63e8 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -291,14 +291,14 @@ extent_register(tsdn_t *tsdn, const extent_t *extent)
 
 	if (config_prof && opt_prof && extent_active_get(extent)) {
 		size_t nadd = extent_size_get(extent) >> LG_PAGE;
-		size_t cur = atomic_add_z(&curpages, nadd);
-		size_t high = atomic_read_z(&highpages);
-		while (cur > high && atomic_cas_z(&highpages, high, cur)) {
+		size_t cur = atomic_add_zu(&curpages, nadd);
+		size_t high = atomic_read_zu(&highpages);
+		while (cur > high && atomic_cas_zu(&highpages, high, cur)) {
 			/*
 			 * Don't refresh cur, because it may have decreased
 			 * since this thread lost the highpages update race.
 			 */
-			high = atomic_read_z(&highpages);
+			high = atomic_read_zu(&highpages);
 		}
 		if (cur > high && prof_gdump_get_unlocked())
 			prof_gdump(tsdn);
@@ -347,8 +347,8 @@ extent_deregister(tsdn_t *tsdn, extent_t *extent)
 
 	if (config_prof && opt_prof && extent_active_get(extent)) {
 		size_t nsub = extent_size_get(extent) >> LG_PAGE;
-		assert(atomic_read_z(&curpages) >= nsub);
-		atomic_sub_z(&curpages, nsub);
+		assert(atomic_read_zu(&curpages) >= nsub);
+		atomic_sub_zu(&curpages, nsub);
 	}
 }
 
diff --git a/test/unit/atomic.c b/test/unit/atomic.c
index bdd74f65..b8933a69 100644
--- a/test/unit/atomic.c
+++ b/test/unit/atomic.c
@@ -65,23 +65,23 @@ typedef struct p##_test_s p##_test_t;
 	}								\
 } while (0)
 
-TEST_STRUCT(uint64, uint64_t)
-TEST_BEGIN(test_atomic_uint64)
+TEST_STRUCT(u64, uint64_t)
+TEST_BEGIN(test_atomic_u64)
 {
 
 #if !(LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
 	test_skip("64-bit atomic operations not supported");
 #else
-	TEST_BODY(uint64, uint64_t, uint64_t, u64, FMTx64);
+	TEST_BODY(u64, uint64_t, uint64_t, u64, FMTx64);
 #endif
 }
 TEST_END
 
-TEST_STRUCT(uint32, uint32_t)
-TEST_BEGIN(test_atomic_uint32)
+TEST_STRUCT(u32, uint32_t)
+TEST_BEGIN(test_atomic_u32)
 {
 
-	TEST_BODY(uint32, uint32_t, uint32_t, u32, "#"FMTx32);
+	TEST_BODY(u32, uint32_t, uint32_t, u32, "#"FMTx32);
 }
 TEST_END
 
@@ -93,11 +93,11 @@ TEST_BEGIN(test_atomic_p)
 }
 TEST_END
 
-TEST_STRUCT(z, size_t)
-TEST_BEGIN(test_atomic_z)
+TEST_STRUCT(zu, size_t)
+TEST_BEGIN(test_atomic_zu)
 {
 
-	TEST_BODY(z, size_t, size_t, zu, "#zx");
+	TEST_BODY(zu, size_t, size_t, zu, "#zx");
 }
 TEST_END
 
@@ -114,9 +114,9 @@ main(void)
 {
 
 	return (test(
-	    test_atomic_uint64,
-	    test_atomic_uint32,
+	    test_atomic_u64,
+	    test_atomic_u32,
 	    test_atomic_p,
-	    test_atomic_z,
+	    test_atomic_zu,
 	    test_atomic_u));
 }

From 5e0373c81506b89707471ef25f0f94d0fb6c0255 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 7 Nov 2016 11:50:11 -0800
Subject: [PATCH 0499/2608] Fix test_prng_lg_range_zu() to work on 32-bit
 systems.

---
 test/unit/prng.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/test/unit/prng.c b/test/unit/prng.c
index 111fa59f..80c9d733 100644
--- a/test/unit/prng.c
+++ b/test/unit/prng.c
@@ -77,38 +77,38 @@ test_prng_lg_range_u64(void)
 static void
 test_prng_lg_range_zu(bool atomic)
 {
-	uint64_t sa, sb, ra, rb;
+	size_t sa, sb, ra, rb;
 	unsigned lg_range;
 
 	sa = 42;
-	ra = prng_lg_range_zu(&sa, 64, atomic);
+	ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
 	sa = 42;
-	rb = prng_lg_range_zu(&sa, 64, atomic);
+	rb = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
 	assert_zu_eq(ra, rb,
 	    "Repeated generation should produce repeated results");
 
 	sb = 42;
-	rb = prng_lg_range_zu(&sb, 64, atomic);
+	rb = prng_lg_range_zu(&sb, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
 	assert_zu_eq(ra, rb,
 	    "Equivalent generation should produce equivalent results");
 
 	sa = 42;
-	ra = prng_lg_range_zu(&sa, 64, atomic);
-	rb = prng_lg_range_zu(&sa, 64, atomic);
+	ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
+	rb = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
 	assert_zu_ne(ra, rb,
 	    "Full-width results must not immediately repeat");
 
 	sa = 42;
-	ra = prng_lg_range_zu(&sa, 64, atomic);
+	ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
 	for (lg_range = (ZU(1) << (3 + LG_SIZEOF_PTR)) - 1; lg_range > 0;
 	    lg_range--) {
 		sb = 42;
 		rb = prng_lg_range_zu(&sb, lg_range, atomic);
 		assert_zu_eq((rb & (SIZE_T_MAX << lg_range)),
 		    0, "High order bits should be 0, lg_range=%u", lg_range);
-		assert_zu_eq(rb, (ra >> (64 - lg_range)),
-		    "Expected high order bits of full-width result, "
-		    "lg_range=%u", lg_range);
+		assert_zu_eq(rb, (ra >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) -
+		    lg_range)), "Expected high order bits of full-width "
+		    "result, lg_range=%u", lg_range);
 	}
 }
 

From 5d6cb6eb66b05261cccd2b416f50ad98d1735229 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 7 Nov 2016 10:52:44 -0800
Subject: [PATCH 0500/2608] Refactor prng to not use 64-bit atomics on 32-bit
 platforms.

This resolves #495.
---
 include/jemalloc/internal/arena.h             |   2 +-
 include/jemalloc/internal/private_symbols.txt |  11 +-
 include/jemalloc/internal/prng.h              | 152 +++++++++++-
 src/arena.c                                   |   7 +-
 src/ckh.c                                     |   5 +-
 src/prof.c                                    |   2 +-
 test/unit/prng.c                              | 219 +++++++++++++++++-
 7 files changed, 365 insertions(+), 33 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 1277d080..f39ce54b 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -370,7 +370,7 @@ struct arena_s {
 	 * PRNG state for cache index randomization of large allocation base
 	 * pointers.
 	 */
-	uint64_t		offset_state;
+	size_t			offset_state;
 
 	dss_prec_t		dss_prec;
 
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 8972b37b..87c8c9b7 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -405,8 +405,15 @@ pind2sz_tab
 pow2_ceil_u32
 pow2_ceil_u64
 pow2_ceil_zu
-prng_lg_range
-prng_range
+prng_lg_range_u32
+prng_lg_range_u64
+prng_lg_range_zu
+prng_range_u32
+prng_range_u64
+prng_range_zu
+prng_state_next_u32
+prng_state_next_u64
+prng_state_next_zu
 prof_active
 prof_active_get
 prof_active_get_unlocked
diff --git a/include/jemalloc/internal/prng.h b/include/jemalloc/internal/prng.h
index 5830f8b7..c2bda19c 100644
--- a/include/jemalloc/internal/prng.h
+++ b/include/jemalloc/internal/prng.h
@@ -19,8 +19,12 @@
  * the next has a cycle of 4, etc.  For this reason, we prefer to use the upper
  * bits.
  */
-#define	PRNG_A	UINT64_C(6364136223846793005)
-#define	PRNG_C	UINT64_C(1442695040888963407)
+
+#define	PRNG_A_32	UINT32_C(1103515241)
+#define	PRNG_C_32	UINT32_C(12347)
+
+#define	PRNG_A_64	UINT64_C(6364136223846793005)
+#define	PRNG_C_64	UINT64_C(1442695040888963407)
 
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
@@ -35,28 +39,133 @@
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
-uint64_t	prng_lg_range(uint64_t *state, unsigned lg_range);
-uint64_t	prng_range(uint64_t *state, uint64_t range);
+uint32_t	prng_state_next_u32(uint32_t state);
+uint64_t	prng_state_next_u64(uint64_t state);
+size_t	prng_state_next_zu(size_t state);
+
+uint32_t	prng_lg_range_u32(uint32_t *state, unsigned lg_range,
+    bool atomic);
+uint64_t	prng_lg_range_u64(uint64_t *state, unsigned lg_range);
+size_t	prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic);
+
+uint32_t	prng_range_u32(uint32_t *state, uint32_t range, bool atomic);
+uint64_t	prng_range_u64(uint64_t *state, uint64_t range);
+size_t	prng_range_zu(size_t *state, size_t range, bool atomic);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PRNG_C_))
-JEMALLOC_ALWAYS_INLINE uint64_t
-prng_lg_range(uint64_t *state, unsigned lg_range)
+JEMALLOC_ALWAYS_INLINE uint32_t
+prng_state_next_u32(uint32_t state)
 {
-	uint64_t ret;
+
+	return ((state * PRNG_A_32) + PRNG_C_32);
+}
+
+JEMALLOC_ALWAYS_INLINE uint64_t
+prng_state_next_u64(uint64_t state)
+{
+
+	return ((state * PRNG_A_64) + PRNG_C_64);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+prng_state_next_zu(size_t state)
+{
+
+#if LG_SIZEOF_PTR == 2
+	return ((state * PRNG_A_32) + PRNG_C_32);
+#elif LG_SIZEOF_PTR == 3
+	return ((state * PRNG_A_64) + PRNG_C_64);
+#else
+#error Unsupported pointer size
+#endif
+}
+
+JEMALLOC_ALWAYS_INLINE uint32_t
+prng_lg_range_u32(uint32_t *state, unsigned lg_range, bool atomic)
+{
+	uint32_t ret, state1;
+
+	assert(lg_range > 0);
+	assert(lg_range <= 32);
+
+	if (atomic) {
+		uint32_t state0;
+
+		do {
+			state0 = atomic_read_uint32(state);
+			state1 = prng_state_next_u32(state0);
+		} while (atomic_cas_uint32(state, state0, state1));
+	} else {
+		state1 = prng_state_next_u32(*state);
+		*state = state1;
+	}
+	ret = state1 >> (32 - lg_range);
+
+	return (ret);
+}
+
+/* 64-bit atomic operations cannot be supported on all relevant platforms. */
+JEMALLOC_ALWAYS_INLINE uint64_t
+prng_lg_range_u64(uint64_t *state, unsigned lg_range)
+{
+	uint64_t ret, state1;
 
 	assert(lg_range > 0);
 	assert(lg_range <= 64);
 
-	ret = (*state * PRNG_A) + PRNG_C;
-	*state = ret;
-	ret >>= (64 - lg_range);
+	state1 = prng_state_next_u64(*state);
+	*state = state1;
+	ret = state1 >> (64 - lg_range);
+
+	return (ret);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic)
+{
+	size_t ret, state1;
+
+	assert(lg_range > 0);
+	assert(lg_range <= ZU(1) << (3 + LG_SIZEOF_PTR));
+
+	if (atomic) {
+		size_t state0;
+
+		do {
+			state0 = atomic_read_z(state);
+			state1 = prng_state_next_zu(state0);
+		} while (atomic_cas_z(state, state0, state1));
+	} else {
+		state1 = prng_state_next_zu(*state);
+		*state = state1;
+	}
+	ret = state1 >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) - lg_range);
+
+	return (ret);
+}
+
+JEMALLOC_ALWAYS_INLINE uint32_t
+prng_range_u32(uint32_t *state, uint32_t range, bool atomic)
+{
+	uint32_t ret;
+	unsigned lg_range;
+
+	assert(range > 1);
+
+	/* Compute the ceiling of lg(range). */
+	lg_range = ffs_u32(pow2_ceil_u32(range)) - 1;
+
+	/* Generate a result in [0..range) via repeated trial. */
+	do {
+		ret = prng_lg_range_u32(state, lg_range, atomic);
+	} while (ret >= range);
 
 	return (ret);
 }
 
 JEMALLOC_ALWAYS_INLINE uint64_t
-prng_range(uint64_t *state, uint64_t range)
+prng_range_u64(uint64_t *state, uint64_t range)
 {
 	uint64_t ret;
 	unsigned lg_range;
@@ -68,7 +177,26 @@ prng_range(uint64_t *state, uint64_t range)
 
 	/* Generate a result in [0..range) via repeated trial. */
 	do {
-		ret = prng_lg_range(state, lg_range);
+		ret = prng_lg_range_u64(state, lg_range);
+	} while (ret >= range);
+
+	return (ret);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+prng_range_zu(size_t *state, size_t range, bool atomic)
+{
+	size_t ret;
+	unsigned lg_range;
+
+	assert(range > 1);
+
+	/* Compute the ceiling of lg(range). */
+	lg_range = ffs_u64(pow2_ceil_u64(range)) - 1;
+
+	/* Generate a result in [0..range) via repeated trial. */
+	do {
+		ret = prng_lg_range_zu(state, lg_range, atomic);
 	} while (ret >= range);
 
 	return (ret);
diff --git a/src/arena.c b/src/arena.c
index 49f04931..e196b133 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1199,7 +1199,7 @@ arena_decay_deadline_init(arena_t *arena)
 	if (arena->decay.time > 0) {
 		nstime_t jitter;
 
-		nstime_init(&jitter, prng_range(&arena->decay.jitter_state,
+		nstime_init(&jitter, prng_range_u64(&arena->decay.jitter_state,
 		    nstime_ns(&arena->decay.interval)));
 		nstime_add(&arena->decay.deadline, &jitter);
 	}
@@ -2565,7 +2565,8 @@ arena_malloc_large(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero)
 		 * that is a multiple of the cacheline size, e.g. [0 .. 63) * 64
 		 * for 4 KiB pages and 64-byte cachelines.
 		 */
-		r = prng_lg_range(&arena->offset_state, LG_PAGE - LG_CACHELINE);
+		r = prng_lg_range_zu(&arena->offset_state, LG_PAGE -
+		    LG_CACHELINE, false);
 		random_offset = ((uintptr_t)r) << LG_CACHELINE;
 	} else
 		random_offset = 0;
@@ -3503,7 +3504,7 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 		 * deterministic seed.
 		 */
 		arena->offset_state = config_debug ? ind :
-		    (uint64_t)(uintptr_t)arena;
+		    (size_t)(uintptr_t)arena;
 	}
 
 	arena->dss_prec = chunk_dss_prec_get();
diff --git a/src/ckh.c b/src/ckh.c
index 3be671c3..159bd8ae 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -99,7 +99,8 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
 	 * Cycle through the cells in the bucket, starting at a random position.
 	 * The randomness avoids worst-case search overhead as buckets fill up.
 	 */
-	offset = (unsigned)prng_lg_range(&ckh->prng_state, LG_CKH_BUCKET_CELLS);
+	offset = (unsigned)prng_lg_range_u64(&ckh->prng_state,
+	    LG_CKH_BUCKET_CELLS);
 	for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
 		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) +
 		    ((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))];
@@ -141,7 +142,7 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
 		 * were an item for which both hashes indicated the same
 		 * bucket.
 		 */
-		i = (unsigned)prng_lg_range(&ckh->prng_state,
+		i = (unsigned)prng_lg_range_u64(&ckh->prng_state,
 		    LG_CKH_BUCKET_CELLS);
 		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
 		assert(cell->key != NULL);
diff --git a/src/prof.c b/src/prof.c
index 140d5b22..c89dade1 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -874,7 +874,7 @@ prof_sample_threshold_update(prof_tdata_t *tdata)
 	 *   pp 500
 	 *   (http://luc.devroye.org/rnbookindex.html)
 	 */
-	r = prng_lg_range(&tdata->prng_state, 53);
+	r = prng_lg_range_u64(&tdata->prng_state, 53);
 	u = (double)r * (1.0/9007199254740992.0L);
 	tdata->bytes_until_sample = (uint64_t)(log(u) /
 	    log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
diff --git a/test/unit/prng.c b/test/unit/prng.c
index b22bd2f5..80c9d733 100644
--- a/test/unit/prng.c
+++ b/test/unit/prng.c
@@ -1,33 +1,71 @@
 #include "test/jemalloc_test.h"
 
-TEST_BEGIN(test_prng_lg_range)
+static void
+test_prng_lg_range_u32(bool atomic)
+{
+	uint32_t sa, sb, ra, rb;
+	unsigned lg_range;
+
+	sa = 42;
+	ra = prng_lg_range_u32(&sa, 32, atomic);
+	sa = 42;
+	rb = prng_lg_range_u32(&sa, 32, atomic);
+	assert_u32_eq(ra, rb,
+	    "Repeated generation should produce repeated results");
+
+	sb = 42;
+	rb = prng_lg_range_u32(&sb, 32, atomic);
+	assert_u32_eq(ra, rb,
+	    "Equivalent generation should produce equivalent results");
+
+	sa = 42;
+	ra = prng_lg_range_u32(&sa, 32, atomic);
+	rb = prng_lg_range_u32(&sa, 32, atomic);
+	assert_u32_ne(ra, rb,
+	    "Full-width results must not immediately repeat");
+
+	sa = 42;
+	ra = prng_lg_range_u32(&sa, 32, atomic);
+	for (lg_range = 31; lg_range > 0; lg_range--) {
+		sb = 42;
+		rb = prng_lg_range_u32(&sb, lg_range, atomic);
+		assert_u32_eq((rb & (UINT32_C(0xffffffff) << lg_range)),
+		    0, "High order bits should be 0, lg_range=%u", lg_range);
+		assert_u32_eq(rb, (ra >> (32 - lg_range)),
+		    "Expected high order bits of full-width result, "
+		    "lg_range=%u", lg_range);
+	}
+}
+
+static void
+test_prng_lg_range_u64(void)
 {
 	uint64_t sa, sb, ra, rb;
 	unsigned lg_range;
 
 	sa = 42;
-	ra = prng_lg_range(&sa, 64);
+	ra = prng_lg_range_u64(&sa, 64);
 	sa = 42;
-	rb = prng_lg_range(&sa, 64);
+	rb = prng_lg_range_u64(&sa, 64);
 	assert_u64_eq(ra, rb,
 	    "Repeated generation should produce repeated results");
 
 	sb = 42;
-	rb = prng_lg_range(&sb, 64);
+	rb = prng_lg_range_u64(&sb, 64);
 	assert_u64_eq(ra, rb,
 	    "Equivalent generation should produce equivalent results");
 
 	sa = 42;
-	ra = prng_lg_range(&sa, 64);
-	rb = prng_lg_range(&sa, 64);
+	ra = prng_lg_range_u64(&sa, 64);
+	rb = prng_lg_range_u64(&sa, 64);
 	assert_u64_ne(ra, rb,
 	    "Full-width results must not immediately repeat");
 
 	sa = 42;
-	ra = prng_lg_range(&sa, 64);
+	ra = prng_lg_range_u64(&sa, 64);
 	for (lg_range = 63; lg_range > 0; lg_range--) {
 		sb = 42;
-		rb = prng_lg_range(&sb, lg_range);
+		rb = prng_lg_range_u64(&sb, lg_range);
 		assert_u64_eq((rb & (UINT64_C(0xffffffffffffffff) << lg_range)),
 		    0, "High order bits should be 0, lg_range=%u", lg_range);
 		assert_u64_eq(rb, (ra >> (64 - lg_range)),
@@ -35,9 +73,103 @@ TEST_BEGIN(test_prng_lg_range)
 		    "lg_range=%u", lg_range);
 	}
 }
+
+static void
+test_prng_lg_range_zu(bool atomic)
+{
+	size_t sa, sb, ra, rb;
+	unsigned lg_range;
+
+	sa = 42;
+	ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
+	sa = 42;
+	rb = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
+	assert_zu_eq(ra, rb,
+	    "Repeated generation should produce repeated results");
+
+	sb = 42;
+	rb = prng_lg_range_zu(&sb, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
+	assert_zu_eq(ra, rb,
+	    "Equivalent generation should produce equivalent results");
+
+	sa = 42;
+	ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
+	rb = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
+	assert_zu_ne(ra, rb,
+	    "Full-width results must not immediately repeat");
+
+	sa = 42;
+	ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
+	for (lg_range = (ZU(1) << (3 + LG_SIZEOF_PTR)) - 1; lg_range > 0;
+	    lg_range--) {
+		sb = 42;
+		rb = prng_lg_range_zu(&sb, lg_range, atomic);
+		assert_zu_eq((rb & (SIZE_T_MAX << lg_range)),
+		    0, "High order bits should be 0, lg_range=%u", lg_range);
+		assert_zu_eq(rb, (ra >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) -
+		    lg_range)), "Expected high order bits of full-width "
+		    "result, lg_range=%u", lg_range);
+	}
+}
+
+TEST_BEGIN(test_prng_lg_range_u32_nonatomic)
+{
+
+	test_prng_lg_range_u32(false);
+}
 TEST_END
 
-TEST_BEGIN(test_prng_range)
+TEST_BEGIN(test_prng_lg_range_u32_atomic)
+{
+
+	test_prng_lg_range_u32(true);
+}
+TEST_END
+
+TEST_BEGIN(test_prng_lg_range_u64_nonatomic)
+{
+
+	test_prng_lg_range_u64();
+}
+TEST_END
+
+TEST_BEGIN(test_prng_lg_range_zu_nonatomic)
+{
+
+	test_prng_lg_range_zu(false);
+}
+TEST_END
+
+TEST_BEGIN(test_prng_lg_range_zu_atomic)
+{
+
+	test_prng_lg_range_zu(true);
+}
+TEST_END
+
+static void
+test_prng_range_u32(bool atomic)
+{
+	uint32_t range;
+#define	MAX_RANGE	10000000
+#define	RANGE_STEP	97
+#define	NREPS		10
+
+	for (range = 2; range < MAX_RANGE; range += RANGE_STEP) {
+		uint32_t s;
+		unsigned rep;
+
+		s = range;
+		for (rep = 0; rep < NREPS; rep++) {
+			uint32_t r = prng_range_u32(&s, range, atomic);
+
+			assert_u32_lt(r, range, "Out of range");
+		}
+	}
+}
+
+static void
+test_prng_range_u64(void)
 {
 	uint64_t range;
 #define	MAX_RANGE	10000000
@@ -50,12 +182,67 @@ TEST_BEGIN(test_prng_range)
 
 		s = range;
 		for (rep = 0; rep < NREPS; rep++) {
-			uint64_t r = prng_range(&s, range);
+			uint64_t r = prng_range_u64(&s, range);
 
 			assert_u64_lt(r, range, "Out of range");
 		}
 	}
 }
+
+static void
+test_prng_range_zu(bool atomic)
+{
+	size_t range;
+#define	MAX_RANGE	10000000
+#define	RANGE_STEP	97
+#define	NREPS		10
+
+	for (range = 2; range < MAX_RANGE; range += RANGE_STEP) {
+		size_t s;
+		unsigned rep;
+
+		s = range;
+		for (rep = 0; rep < NREPS; rep++) {
+			size_t r = prng_range_zu(&s, range, atomic);
+
+			assert_zu_lt(r, range, "Out of range");
+		}
+	}
+}
+
+TEST_BEGIN(test_prng_range_u32_nonatomic)
+{
+
+	test_prng_range_u32(false);
+}
+TEST_END
+
+TEST_BEGIN(test_prng_range_u32_atomic)
+{
+
+	test_prng_range_u32(true);
+}
+TEST_END
+
+TEST_BEGIN(test_prng_range_u64_nonatomic)
+{
+
+	test_prng_range_u64();
+}
+TEST_END
+
+TEST_BEGIN(test_prng_range_zu_nonatomic)
+{
+
+	test_prng_range_zu(false);
+}
+TEST_END
+
+TEST_BEGIN(test_prng_range_zu_atomic)
+{
+
+	test_prng_range_zu(true);
+}
 TEST_END
 
 int
@@ -63,6 +250,14 @@ main(void)
 {
 
 	return (test(
-	    test_prng_lg_range,
-	    test_prng_range));
+	    test_prng_lg_range_u32_nonatomic,
+	    test_prng_lg_range_u32_atomic,
+	    test_prng_lg_range_u64_nonatomic,
+	    test_prng_lg_range_zu_nonatomic,
+	    test_prng_lg_range_zu_atomic,
+	    test_prng_range_u32_nonatomic,
+	    test_prng_range_u32_atomic,
+	    test_prng_range_u64_nonatomic,
+	    test_prng_range_zu_nonatomic,
+	    test_prng_range_zu_atomic));
 }

From 7b8e74f48fb1e00792db82c9889679c180e037f6 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 7 Nov 2016 11:41:34 -0800
Subject: [PATCH 0501/2608] Revert "Define 64-bits atomics unconditionally"

This reverts commit af33e9a59735a2ee72132d3dd6e23fae6d296e34.

This resolves #495.
---
 include/jemalloc/internal/atomic.h | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h
index 3936f68b..3f15ea14 100644
--- a/include/jemalloc/internal/atomic.h
+++ b/include/jemalloc/internal/atomic.h
@@ -66,7 +66,8 @@ void	atomic_write_u(unsigned *p, unsigned x);
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_))
 /******************************************************************************/
 /* 64-bit operations. */
-#if (defined(__amd64__) || defined(__x86_64__))
+#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
+#  if (defined(__amd64__) || defined(__x86_64__))
 JEMALLOC_INLINE uint64_t
 atomic_add_uint64(uint64_t *p, uint64_t x)
 {
@@ -124,7 +125,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
 	    : "memory" /* Clobbers. */
 	    );
 }
-#elif (defined(JEMALLOC_C11ATOMICS))
+#  elif (defined(JEMALLOC_C11ATOMICS))
 JEMALLOC_INLINE uint64_t
 atomic_add_uint64(uint64_t *p, uint64_t x)
 {
@@ -152,7 +153,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
 	volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
 	atomic_store(a, x);
 }
-#elif (defined(JEMALLOC_ATOMIC9))
+#  elif (defined(JEMALLOC_ATOMIC9))
 JEMALLOC_INLINE uint64_t
 atomic_add_uint64(uint64_t *p, uint64_t x)
 {
@@ -192,7 +193,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
 
 	atomic_store_rel_long(p, x);
 }
-#elif (defined(JEMALLOC_OSATOMIC))
+#  elif (defined(JEMALLOC_OSATOMIC))
 JEMALLOC_INLINE uint64_t
 atomic_add_uint64(uint64_t *p, uint64_t x)
 {
@@ -224,7 +225,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
 		o = atomic_read_uint64(p);
 	} while (atomic_cas_uint64(p, o, x));
 }
-#elif (defined(_MSC_VER))
+#  elif (defined(_MSC_VER))
 JEMALLOC_INLINE uint64_t
 atomic_add_uint64(uint64_t *p, uint64_t x)
 {
@@ -254,7 +255,7 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
 
 	InterlockedExchange64(p, x);
 }
-#elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || \
+#  elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || \
     defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8))
 JEMALLOC_INLINE uint64_t
 atomic_add_uint64(uint64_t *p, uint64_t x)
@@ -283,8 +284,9 @@ atomic_write_uint64(uint64_t *p, uint64_t x)
 
 	__sync_lock_test_and_set(p, x);
 }
-#else
-#  error "Missing implementation for 64-bit atomic operations"
+#  else
+#    error "Missing implementation for 64-bit atomic operations"
+#  endif
 #endif
 
 /******************************************************************************/

From 85dae2ff4990d86644cf1b2bcce98e6b4afa340b Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 7 Nov 2016 16:22:02 -0800
Subject: [PATCH 0502/2608] Update ChangeLog for 4.3.1.

---
 ChangeLog | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index 118df96f..587685d0 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,14 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
+* 4.3.1 (November 7, 2016)
+
+  Bug fixes:
+  - Fix a severe virtual memory leak.  This regression was first released in
+    4.3.0.  (@interwq, @jasone)
+  - Refactor atomic and prng APIs to restore support for 32-bit platforms that
+    use pre-C11 toolchains, e.g. FreeBSD's mips.  (@jasone)
+
 * 4.3.0 (November 4, 2016)
 
   This is the first release that passes the test suite for multiple Windows

From b0f56583b7f7abcdc00df42a0ae102bc64c5bd72 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 7 Nov 2016 16:22:02 -0800
Subject: [PATCH 0503/2608] Update ChangeLog for 4.3.1.

---
 ChangeLog | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index 118df96f..587685d0 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,14 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
+* 4.3.1 (November 7, 2016)
+
+  Bug fixes:
+  - Fix a severe virtual memory leak.  This regression was first released in
+    4.3.0.  (@interwq, @jasone)
+  - Refactor atomic and prng APIs to restore support for 32-bit platforms that
+    use pre-C11 toolchains, e.g. FreeBSD's mips.  (@jasone)
+
 * 4.3.0 (November 4, 2016)
 
   This is the first release that passes the test suite for multiple Windows

From c233dd5e40ad7a6c79ff39f4ea8a3936b95b0458 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 10 Nov 2016 15:02:05 -0800
Subject: [PATCH 0504/2608] Update config.{guess,sub} from upstream.

---
 build-aux/config.guess | 174 +++++++++++++++++++++++++----------------
 build-aux/config.sub   |  76 ++++++++++++------
 2 files changed, 160 insertions(+), 90 deletions(-)

diff --git a/build-aux/config.guess b/build-aux/config.guess
index 1f5c50c0..2e9ad7fe 100755
--- a/build-aux/config.guess
+++ b/build-aux/config.guess
@@ -1,8 +1,8 @@
 #! /bin/sh
 # Attempt to guess a canonical system name.
-#   Copyright 1992-2014 Free Software Foundation, Inc.
+#   Copyright 1992-2016 Free Software Foundation, Inc.
 
-timestamp='2014-03-23'
+timestamp='2016-10-02'
 
 # This file is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
@@ -24,12 +24,12 @@ timestamp='2014-03-23'
 # program.  This Exception is an additional permission under section 7
 # of the GNU General Public License, version 3 ("GPLv3").
 #
-# Originally written by Per Bothner.
+# Originally written by Per Bothner; maintained since 2000 by Ben Elliston.
 #
 # You can get the latest version of this script from:
-# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
+# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
 #
-# Please send patches with a ChangeLog entry to config-patches@gnu.org.
+# Please send patches to <config-patches@gnu.org>.
 
 
 me=`echo "$0" | sed -e 's,.*/,,'`
@@ -50,7 +50,7 @@ version="\
 GNU config.guess ($timestamp)
 
 Originally written by Per Bothner.
-Copyright 1992-2014 Free Software Foundation, Inc.
+Copyright 1992-2016 Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -168,19 +168,29 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	# Note: NetBSD doesn't particularly care about the vendor
 	# portion of the name.  We always set it to "unknown".
 	sysctl="sysctl -n hw.machine_arch"
-	UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \
-	    /usr/sbin/$sysctl 2>/dev/null || echo unknown)`
+	UNAME_MACHINE_ARCH=`(uname -p 2>/dev/null || \
+	    /sbin/$sysctl 2>/dev/null || \
+	    /usr/sbin/$sysctl 2>/dev/null || \
+	    echo unknown)`
 	case "${UNAME_MACHINE_ARCH}" in
 	    armeb) machine=armeb-unknown ;;
 	    arm*) machine=arm-unknown ;;
 	    sh3el) machine=shl-unknown ;;
 	    sh3eb) machine=sh-unknown ;;
 	    sh5el) machine=sh5le-unknown ;;
+	    earmv*)
+		arch=`echo ${UNAME_MACHINE_ARCH} | sed -e 's,^e\(armv[0-9]\).*$,\1,'`
+		endian=`echo ${UNAME_MACHINE_ARCH} | sed -ne 's,^.*\(eb\)$,\1,p'`
+		machine=${arch}${endian}-unknown
+		;;
 	    *) machine=${UNAME_MACHINE_ARCH}-unknown ;;
 	esac
 	# The Operating System including object format, if it has switched
-	# to ELF recently, or will in the future.
+	# to ELF recently (or will in the future) and ABI.
 	case "${UNAME_MACHINE_ARCH}" in
+	    earm*)
+		os=netbsdelf
+		;;
 	    arm*|i386|m68k|ns32k|sh3*|sparc|vax)
 		eval $set_cc_for_build
 		if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
@@ -197,6 +207,13 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 		os=netbsd
 		;;
 	esac
+	# Determine ABI tags.
+	case "${UNAME_MACHINE_ARCH}" in
+	    earm*)
+		expr='s/^earmv[0-9]/-eabi/;s/eb$//'
+		abi=`echo ${UNAME_MACHINE_ARCH} | sed -e "$expr"`
+		;;
+	esac
 	# The OS release
 	# Debian GNU/NetBSD machines have a different userland, and
 	# thus, need a distinct triplet. However, they do not need
@@ -207,13 +224,13 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 		release='-gnu'
 		;;
 	    *)
-		release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'`
+		release=`echo ${UNAME_RELEASE} | sed -e 's/[-_].*//' | cut -d. -f1,2`
 		;;
 	esac
 	# Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM:
 	# contains redundant information, the shorter form:
 	# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
-	echo "${machine}-${os}${release}"
+	echo "${machine}-${os}${release}${abi}"
 	exit ;;
     *:Bitrig:*:*)
 	UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'`
@@ -223,6 +240,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'`
 	echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE}
 	exit ;;
+    *:LibertyBSD:*:*)
+	UNAME_MACHINE_ARCH=`arch | sed 's/^.*BSD\.//'`
+	echo ${UNAME_MACHINE_ARCH}-unknown-libertybsd${UNAME_RELEASE}
+	exit ;;
     *:ekkoBSD:*:*)
 	echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE}
 	exit ;;
@@ -235,6 +256,9 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
     *:MirBSD:*:*)
 	echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE}
 	exit ;;
+    *:Sortix:*:*)
+	echo ${UNAME_MACHINE}-unknown-sortix
+	exit ;;
     alpha:OSF1:*:*)
 	case $UNAME_RELEASE in
 	*4.0)
@@ -251,42 +275,42 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^  The alpha \(.*\) processor.*$/\1/p' | head -n 1`
 	case "$ALPHA_CPU_TYPE" in
 	    "EV4 (21064)")
-		UNAME_MACHINE="alpha" ;;
+		UNAME_MACHINE=alpha ;;
 	    "EV4.5 (21064)")
-		UNAME_MACHINE="alpha" ;;
+		UNAME_MACHINE=alpha ;;
 	    "LCA4 (21066/21068)")
-		UNAME_MACHINE="alpha" ;;
+		UNAME_MACHINE=alpha ;;
 	    "EV5 (21164)")
-		UNAME_MACHINE="alphaev5" ;;
+		UNAME_MACHINE=alphaev5 ;;
 	    "EV5.6 (21164A)")
-		UNAME_MACHINE="alphaev56" ;;
+		UNAME_MACHINE=alphaev56 ;;
 	    "EV5.6 (21164PC)")
-		UNAME_MACHINE="alphapca56" ;;
+		UNAME_MACHINE=alphapca56 ;;
 	    "EV5.7 (21164PC)")
-		UNAME_MACHINE="alphapca57" ;;
+		UNAME_MACHINE=alphapca57 ;;
 	    "EV6 (21264)")
-		UNAME_MACHINE="alphaev6" ;;
+		UNAME_MACHINE=alphaev6 ;;
 	    "EV6.7 (21264A)")
-		UNAME_MACHINE="alphaev67" ;;
+		UNAME_MACHINE=alphaev67 ;;
 	    "EV6.8CB (21264C)")
-		UNAME_MACHINE="alphaev68" ;;
+		UNAME_MACHINE=alphaev68 ;;
 	    "EV6.8AL (21264B)")
-		UNAME_MACHINE="alphaev68" ;;
+		UNAME_MACHINE=alphaev68 ;;
 	    "EV6.8CX (21264D)")
-		UNAME_MACHINE="alphaev68" ;;
+		UNAME_MACHINE=alphaev68 ;;
 	    "EV6.9A (21264/EV69A)")
-		UNAME_MACHINE="alphaev69" ;;
+		UNAME_MACHINE=alphaev69 ;;
 	    "EV7 (21364)")
-		UNAME_MACHINE="alphaev7" ;;
+		UNAME_MACHINE=alphaev7 ;;
 	    "EV7.9 (21364A)")
-		UNAME_MACHINE="alphaev79" ;;
+		UNAME_MACHINE=alphaev79 ;;
 	esac
 	# A Pn.n version is a patched version.
 	# A Vn.n version is a released version.
 	# A Tn.n version is a released field test version.
 	# A Xn.n version is an unreleased experimental baselevel.
 	# 1.2 uses "1.2" for uname -r.
-	echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
+	echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`
 	# Reset EXIT trap before exiting to avoid spurious non-zero exit code.
 	exitcode=$?
 	trap '' 0
@@ -359,16 +383,16 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	exit ;;
     i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
 	eval $set_cc_for_build
-	SUN_ARCH="i386"
+	SUN_ARCH=i386
 	# If there is a compiler, see if it is configured for 64-bit objects.
 	# Note that the Sun cc does not turn __LP64__ into 1 like gcc does.
 	# This test works for both compilers.
-	if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
+	if [ "$CC_FOR_BUILD" != no_compiler_found ]; then
 	    if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \
-		(CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
+		(CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
 		grep IS_64BIT_ARCH >/dev/null
 	    then
-		SUN_ARCH="x86_64"
+		SUN_ARCH=x86_64
 	    fi
 	fi
 	echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
@@ -393,7 +417,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	exit ;;
     sun*:*:4.2BSD:*)
 	UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null`
-	test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3
+	test "x${UNAME_RELEASE}" = x && UNAME_RELEASE=3
 	case "`/bin/arch`" in
 	    sun3)
 		echo m68k-sun-sunos${UNAME_RELEASE}
@@ -579,8 +603,9 @@ EOF
 	else
 		IBM_ARCH=powerpc
 	fi
-	if [ -x /usr/bin/oslevel ] ; then
-		IBM_REV=`/usr/bin/oslevel`
+	if [ -x /usr/bin/lslpp ] ; then
+		IBM_REV=`/usr/bin/lslpp -Lqc bos.rte.libc |
+			   awk -F: '{ print $3 }' | sed s/[0-9]*$/0/`
 	else
 		IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
 	fi
@@ -617,13 +642,13 @@ EOF
 		    sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
 		    sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
 		    case "${sc_cpu_version}" in
-		      523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0
-		      528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1
+		      523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0
+		      528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1
 		      532)                      # CPU_PA_RISC2_0
 			case "${sc_kernel_bits}" in
-			  32) HP_ARCH="hppa2.0n" ;;
-			  64) HP_ARCH="hppa2.0w" ;;
-			  '') HP_ARCH="hppa2.0" ;;   # HP-UX 10.20
+			  32) HP_ARCH=hppa2.0n ;;
+			  64) HP_ARCH=hppa2.0w ;;
+			  '') HP_ARCH=hppa2.0 ;;   # HP-UX 10.20
 			esac ;;
 		    esac
 		fi
@@ -662,11 +687,11 @@ EOF
 		    exit (0);
 		}
 EOF
-		    (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
+		    (CCOPTS="" $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
 		    test -z "$HP_ARCH" && HP_ARCH=hppa
 		fi ;;
 	esac
-	if [ ${HP_ARCH} = "hppa2.0w" ]
+	if [ ${HP_ARCH} = hppa2.0w ]
 	then
 	    eval $set_cc_for_build
 
@@ -679,12 +704,12 @@ EOF
 	    # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess
 	    # => hppa64-hp-hpux11.23
 
-	    if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) |
+	    if echo __LP64__ | (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) |
 		grep -q __LP64__
 	    then
-		HP_ARCH="hppa2.0w"
+		HP_ARCH=hppa2.0w
 	    else
-		HP_ARCH="hppa64"
+		HP_ARCH=hppa64
 	    fi
 	fi
 	echo ${HP_ARCH}-hp-hpux${HPUX_REV}
@@ -789,14 +814,14 @@ EOF
 	echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
 	exit ;;
     F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
-	FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
-	FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+	FUJITSU_PROC=`uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`
+	FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'`
 	FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
 	echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
 	exit ;;
     5000:UNIX_System_V:4.*:*)
-	FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
-	FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
+	FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'`
+	FUJITSU_REL=`echo ${UNAME_RELEASE} | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/'`
 	echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
 	exit ;;
     i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
@@ -878,7 +903,7 @@ EOF
 	exit ;;
     *:GNU/*:*:*)
 	# other systems with GNU libc and userland
-	echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC}
+	echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC}
 	exit ;;
     i*86:Minix:*:*)
 	echo ${UNAME_MACHINE}-pc-minix
@@ -901,7 +926,7 @@ EOF
 	  EV68*) UNAME_MACHINE=alphaev68 ;;
 	esac
 	objdump --private-headers /bin/sh | grep -q ld.so.1
-	if test "$?" = 0 ; then LIBC="gnulibc1" ; fi
+	if test "$?" = 0 ; then LIBC=gnulibc1 ; fi
 	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
 	exit ;;
     arc:Linux:*:* | arceb:Linux:*:*)
@@ -932,6 +957,9 @@ EOF
     crisv32:Linux:*:*)
 	echo ${UNAME_MACHINE}-axis-linux-${LIBC}
 	exit ;;
+    e2k:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	exit ;;
     frv:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
 	exit ;;
@@ -944,6 +972,9 @@ EOF
     ia64:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
 	exit ;;
+    k1om:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	exit ;;
     m32r*:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
 	exit ;;
@@ -969,6 +1000,9 @@ EOF
 	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'`
 	test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; }
 	;;
+    mips64el:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	exit ;;
     openrisc*:Linux:*:*)
 	echo or1k-unknown-linux-${LIBC}
 	exit ;;
@@ -1001,6 +1035,9 @@ EOF
     ppcle:Linux:*:*)
 	echo powerpcle-unknown-linux-${LIBC}
 	exit ;;
+    riscv32:Linux:*:* | riscv64:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	exit ;;
     s390:Linux:*:* | s390x:Linux:*:*)
 	echo ${UNAME_MACHINE}-ibm-linux-${LIBC}
 	exit ;;
@@ -1020,7 +1057,7 @@ EOF
 	echo ${UNAME_MACHINE}-dec-linux-${LIBC}
 	exit ;;
     x86_64:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo ${UNAME_MACHINE}-pc-linux-${LIBC}
 	exit ;;
     xtensa*:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
@@ -1099,7 +1136,7 @@ EOF
 	# uname -m prints for DJGPP always 'pc', but it prints nothing about
 	# the processor, so we play safe by assuming i586.
 	# Note: whatever this is, it MUST be the same as what config.sub
-	# prints for the "djgpp" host, or else GDB configury will decide that
+	# prints for the "djgpp" host, or else GDB configure will decide that
 	# this is a cross-build.
 	echo i586-pc-msdosdjgpp
 	exit ;;
@@ -1248,6 +1285,9 @@ EOF
     SX-8R:SUPER-UX:*:*)
 	echo sx8r-nec-superux${UNAME_RELEASE}
 	exit ;;
+    SX-ACE:SUPER-UX:*:*)
+	echo sxace-nec-superux${UNAME_RELEASE}
+	exit ;;
     Power*:Rhapsody:*:*)
 	echo powerpc-apple-rhapsody${UNAME_RELEASE}
 	exit ;;
@@ -1261,9 +1301,9 @@ EOF
 	    UNAME_PROCESSOR=powerpc
 	fi
 	if test `echo "$UNAME_RELEASE" | sed -e 's/\..*//'` -le 10 ; then
-	    if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
+	    if [ "$CC_FOR_BUILD" != no_compiler_found ]; then
 		if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
-		    (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
+		    (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
 		    grep IS_64BIT_ARCH >/dev/null
 		then
 		    case $UNAME_PROCESSOR in
@@ -1285,7 +1325,7 @@ EOF
 	exit ;;
     *:procnto*:*:* | *:QNX:[0123456789]*:*)
 	UNAME_PROCESSOR=`uname -p`
-	if test "$UNAME_PROCESSOR" = "x86"; then
+	if test "$UNAME_PROCESSOR" = x86; then
 		UNAME_PROCESSOR=i386
 		UNAME_MACHINE=pc
 	fi
@@ -1316,7 +1356,7 @@ EOF
 	# "uname -m" is not consistent, so use $cputype instead. 386
 	# is converted to i386 for consistency with other x86
 	# operating systems.
-	if test "$cputype" = "386"; then
+	if test "$cputype" = 386; then
 	    UNAME_MACHINE=i386
 	else
 	    UNAME_MACHINE="$cputype"
@@ -1358,7 +1398,7 @@ EOF
 	echo i386-pc-xenix
 	exit ;;
     i*86:skyos:*:*)
-	echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//'
+	echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE} | sed -e 's/ .*$//'`
 	exit ;;
     i*86:rdos:*:*)
 	echo ${UNAME_MACHINE}-pc-rdos
@@ -1369,23 +1409,25 @@ EOF
     x86_64:VMkernel:*:*)
 	echo ${UNAME_MACHINE}-unknown-esx
 	exit ;;
+    amd64:Isilon\ OneFS:*:*)
+	echo x86_64-unknown-onefs
+	exit ;;
 esac
 
 cat >&2 <<EOF
 $0: unable to guess system type
 
-This script, last modified $timestamp, has failed to recognize
-the operating system you are using. It is advised that you
-download the most up to date version of the config scripts from
+This script (version $timestamp), has failed to recognize the
+operating system you are using. If your script is old, overwrite
+config.guess and config.sub with the latest versions from:
 
-  http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
+  http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
 and
-  http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD
+  http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub
 
-If the version you run ($0) is already up to date, please
-send the following data and any information you think might be
-pertinent to <config-patches@gnu.org> in order to provide the needed
-information to handle your system.
+If $0 has already been updated, send the following data and any
+information you think might be pertinent to config-patches@gnu.org to
+provide the necessary information to handle your system.
 
 config.guess timestamp = $timestamp
 
diff --git a/build-aux/config.sub b/build-aux/config.sub
index 0ccff770..dd2ca93c 100755
--- a/build-aux/config.sub
+++ b/build-aux/config.sub
@@ -1,8 +1,8 @@
 #! /bin/sh
 # Configuration validation subroutine script.
-#   Copyright 1992-2014 Free Software Foundation, Inc.
+#   Copyright 1992-2016 Free Software Foundation, Inc.
 
-timestamp='2014-05-01'
+timestamp='2016-11-04'
 
 # This file is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
@@ -25,7 +25,7 @@ timestamp='2014-05-01'
 # of the GNU General Public License, version 3 ("GPLv3").
 
 
-# Please send patches with a ChangeLog entry to config-patches@gnu.org.
+# Please send patches to <config-patches@gnu.org>.
 #
 # Configuration subroutine to validate and canonicalize a configuration type.
 # Supply the specified configuration type as an argument.
@@ -33,7 +33,7 @@ timestamp='2014-05-01'
 # Otherwise, we print the canonical config type on stdout and succeed.
 
 # You can get the latest version of this script from:
-# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD
+# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub
 
 # This file is supposed to be the same for all GNU packages
 # and recognize all the CPU types, system types and aliases
@@ -53,8 +53,7 @@ timestamp='2014-05-01'
 me=`echo "$0" | sed -e 's,.*/,,'`
 
 usage="\
-Usage: $0 [OPTION] CPU-MFR-OPSYS
-       $0 [OPTION] ALIAS
+Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS
 
 Canonicalize a configuration name.
 
@@ -68,7 +67,7 @@ Report bugs and patches to <config-patches@gnu.org>."
 version="\
 GNU config.sub ($timestamp)
 
-Copyright 1992-2014 Free Software Foundation, Inc.
+Copyright 1992-2016 Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -117,8 +116,8 @@ maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
 case $maybe_os in
   nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \
   linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \
-  knetbsd*-gnu* | netbsd*-gnu* | \
-  kopensolaris*-gnu* | \
+  knetbsd*-gnu* | netbsd*-gnu* | netbsd*-eabi* | \
+  kopensolaris*-gnu* | cloudabi*-eabi* | \
   storm-chaos* | os2-emx* | rtmk-nova*)
     os=-$maybe_os
     basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
@@ -255,12 +254,13 @@ case $basic_machine in
 	| arc | arceb \
 	| arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv7[arm] \
 	| avr | avr32 \
+	| ba \
 	| be32 | be64 \
 	| bfin \
 	| c4x | c8051 | clipper \
 	| d10v | d30v | dlx | dsp16xx \
-	| epiphany \
-	| fido | fr30 | frv \
+	| e2k | epiphany \
+	| fido | fr30 | frv | ft32 \
 	| h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
 	| hexagon \
 	| i370 | i860 | i960 | ia64 \
@@ -301,10 +301,12 @@ case $basic_machine in
 	| open8 | or1k | or1knd | or32 \
 	| pdp10 | pdp11 | pj | pjl \
 	| powerpc | powerpc64 | powerpc64le | powerpcle \
+	| pru \
 	| pyramid \
+	| riscv32 | riscv64 \
 	| rl78 | rx \
 	| score \
-	| sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
+	| sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[234]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
 	| sh64 | sh64le \
 	| sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \
 	| sparcv8 | sparcv9 | sparcv9b | sparcv9v \
@@ -312,6 +314,7 @@ case $basic_machine in
 	| tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \
 	| ubicom32 \
 	| v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \
+	| visium \
 	| we32k \
 	| x86 | xc16x | xstormy16 | xtensa \
 	| z8k | z80)
@@ -326,6 +329,9 @@ case $basic_machine in
 	c6x)
 		basic_machine=tic6x-unknown
 		;;
+	leon|leon[3-9])
+		basic_machine=sparc-$basic_machine
+		;;
 	m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | nvptx | picochip)
 		basic_machine=$basic_machine-unknown
 		os=-none
@@ -371,12 +377,13 @@ case $basic_machine in
 	| alphapca5[67]-* | alpha64pca5[67]-* | arc-* | arceb-* \
 	| arm-*  | armbe-* | armle-* | armeb-* | armv*-* \
 	| avr-* | avr32-* \
+	| ba-* \
 	| be32-* | be64-* \
 	| bfin-* | bs2000-* \
 	| c[123]* | c30-* | [cjt]90-* | c4x-* \
 	| c8051-* | clipper-* | craynv-* | cydra-* \
 	| d10v-* | d30v-* | dlx-* \
-	| elxsi-* \
+	| e2k-* | elxsi-* \
 	| f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
 	| h8300-* | h8500-* \
 	| hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
@@ -422,13 +429,15 @@ case $basic_machine in
 	| orion-* \
 	| pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
 	| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \
+	| pru-* \
 	| pyramid-* \
+	| riscv32-* | riscv64-* \
 	| rl78-* | romp-* | rs6000-* | rx-* \
 	| sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \
 	| shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
 	| sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \
 	| sparclite-* \
-	| sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx?-* \
+	| sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx*-* \
 	| tahoe-* \
 	| tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \
 	| tile*-* \
@@ -436,6 +445,7 @@ case $basic_machine in
 	| ubicom32-* \
 	| v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \
 	| vax-* \
+	| visium-* \
 	| we32k-* \
 	| x86-* | x86_64-* | xc16x-* | xps100-* \
 	| xstormy16-* | xtensa*-* \
@@ -512,6 +522,9 @@ case $basic_machine in
 		basic_machine=i386-pc
 		os=-aros
 		;;
+	asmjs)
+		basic_machine=asmjs-unknown
+		;;
 	aux)
 		basic_machine=m68k-apple
 		os=-aux
@@ -632,6 +645,14 @@ case $basic_machine in
 		basic_machine=m68k-bull
 		os=-sysv3
 		;;
+	e500v[12])
+		basic_machine=powerpc-unknown
+		os=$os"spe"
+		;;
+	e500v[12]-*)
+		basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
+		os=$os"spe"
+		;;
 	ebmon29k)
 		basic_machine=a29k-amd
 		os=-ebmon
@@ -773,6 +794,9 @@ case $basic_machine in
 		basic_machine=m68k-isi
 		os=-sysv
 		;;
+	leon-*|leon[3-9]-*)
+		basic_machine=sparc-`echo $basic_machine | sed 's/-.*//'`
+		;;
 	m68knommu)
 		basic_machine=m68k-unknown
 		os=-linux
@@ -828,6 +852,10 @@ case $basic_machine in
 		basic_machine=powerpc-unknown
 		os=-morphos
 		;;
+	moxiebox)
+		basic_machine=moxie-unknown
+		os=-moxiebox
+		;;
 	msdos)
 		basic_machine=i386-pc
 		os=-msdos
@@ -1004,7 +1032,7 @@ case $basic_machine in
 	ppc-* | ppcbe-*)
 		basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
 		;;
-	ppcle | powerpclittle | ppc-le | powerpc-little)
+	ppcle | powerpclittle)
 		basic_machine=powerpcle-unknown
 		;;
 	ppcle-* | powerpclittle-*)
@@ -1014,7 +1042,7 @@ case $basic_machine in
 		;;
 	ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'`
 		;;
-	ppc64le | powerpc64little | ppc64-le | powerpc64-little)
+	ppc64le | powerpc64little)
 		basic_machine=powerpc64le-unknown
 		;;
 	ppc64le-* | powerpc64little-*)
@@ -1360,27 +1388,28 @@ case $os in
 	      | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \
 	      | -sym* | -kopensolaris* | -plan9* \
 	      | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
-	      | -aos* | -aros* \
+	      | -aos* | -aros* | -cloudabi* | -sortix* \
 	      | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
 	      | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
 	      | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \
-	      | -bitrig* | -openbsd* | -solidbsd* \
+	      | -bitrig* | -openbsd* | -solidbsd* | -libertybsd* \
 	      | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \
 	      | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
 	      | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
 	      | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
 	      | -chorusos* | -chorusrdb* | -cegcc* \
 	      | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
-	      | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \
+	      | -midipix* | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \
 	      | -linux-newlib* | -linux-musl* | -linux-uclibc* \
-	      | -uxpv* | -beos* | -mpeix* | -udk* \
+	      | -uxpv* | -beos* | -mpeix* | -udk* | -moxiebox* \
 	      | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
 	      | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
 	      | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \
 	      | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
 	      | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \
 	      | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \
-	      | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es* | -tirtos*)
+	      | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es* \
+	      | -onefs* | -tirtos* | -phoenix* | -fuchsia*)
 	# Remember, each alternative MUST END IN *, to match a version number.
 		;;
 	-qnx*)
@@ -1404,9 +1433,6 @@ case $os in
 	-mac*)
 		os=`echo $os | sed -e 's|mac|macos|'`
 		;;
-	# Apple iOS
-	-ios*)
-		;;
 	-linux-dietlibc)
 		os=-linux-dietlibc
 		;;
@@ -1515,6 +1541,8 @@ case $os in
 		;;
 	-nacl*)
 		;;
+	-ios)
+		;;
 	-none)
 		;;
 	*)

From b4486dce242f2ae6a80990e09acf8080c164d1e7 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 10 Nov 2016 15:02:05 -0800
Subject: [PATCH 0505/2608] Update config.{guess,sub} from upstream.

---
 build-aux/config.guess | 174 +++++++++++++++++++++++++----------------
 build-aux/config.sub   |  76 ++++++++++++------
 2 files changed, 160 insertions(+), 90 deletions(-)

diff --git a/build-aux/config.guess b/build-aux/config.guess
index 1f5c50c0..2e9ad7fe 100755
--- a/build-aux/config.guess
+++ b/build-aux/config.guess
@@ -1,8 +1,8 @@
 #! /bin/sh
 # Attempt to guess a canonical system name.
-#   Copyright 1992-2014 Free Software Foundation, Inc.
+#   Copyright 1992-2016 Free Software Foundation, Inc.
 
-timestamp='2014-03-23'
+timestamp='2016-10-02'
 
 # This file is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
@@ -24,12 +24,12 @@ timestamp='2014-03-23'
 # program.  This Exception is an additional permission under section 7
 # of the GNU General Public License, version 3 ("GPLv3").
 #
-# Originally written by Per Bothner.
+# Originally written by Per Bothner; maintained since 2000 by Ben Elliston.
 #
 # You can get the latest version of this script from:
-# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
+# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
 #
-# Please send patches with a ChangeLog entry to config-patches@gnu.org.
+# Please send patches to <config-patches@gnu.org>.
 
 
 me=`echo "$0" | sed -e 's,.*/,,'`
@@ -50,7 +50,7 @@ version="\
 GNU config.guess ($timestamp)
 
 Originally written by Per Bothner.
-Copyright 1992-2014 Free Software Foundation, Inc.
+Copyright 1992-2016 Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -168,19 +168,29 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	# Note: NetBSD doesn't particularly care about the vendor
 	# portion of the name.  We always set it to "unknown".
 	sysctl="sysctl -n hw.machine_arch"
-	UNAME_MACHINE_ARCH=`(/sbin/$sysctl 2>/dev/null || \
-	    /usr/sbin/$sysctl 2>/dev/null || echo unknown)`
+	UNAME_MACHINE_ARCH=`(uname -p 2>/dev/null || \
+	    /sbin/$sysctl 2>/dev/null || \
+	    /usr/sbin/$sysctl 2>/dev/null || \
+	    echo unknown)`
 	case "${UNAME_MACHINE_ARCH}" in
 	    armeb) machine=armeb-unknown ;;
 	    arm*) machine=arm-unknown ;;
 	    sh3el) machine=shl-unknown ;;
 	    sh3eb) machine=sh-unknown ;;
 	    sh5el) machine=sh5le-unknown ;;
+	    earmv*)
+		arch=`echo ${UNAME_MACHINE_ARCH} | sed -e 's,^e\(armv[0-9]\).*$,\1,'`
+		endian=`echo ${UNAME_MACHINE_ARCH} | sed -ne 's,^.*\(eb\)$,\1,p'`
+		machine=${arch}${endian}-unknown
+		;;
 	    *) machine=${UNAME_MACHINE_ARCH}-unknown ;;
 	esac
 	# The Operating System including object format, if it has switched
-	# to ELF recently, or will in the future.
+	# to ELF recently (or will in the future) and ABI.
 	case "${UNAME_MACHINE_ARCH}" in
+	    earm*)
+		os=netbsdelf
+		;;
 	    arm*|i386|m68k|ns32k|sh3*|sparc|vax)
 		eval $set_cc_for_build
 		if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
@@ -197,6 +207,13 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 		os=netbsd
 		;;
 	esac
+	# Determine ABI tags.
+	case "${UNAME_MACHINE_ARCH}" in
+	    earm*)
+		expr='s/^earmv[0-9]/-eabi/;s/eb$//'
+		abi=`echo ${UNAME_MACHINE_ARCH} | sed -e "$expr"`
+		;;
+	esac
 	# The OS release
 	# Debian GNU/NetBSD machines have a different userland, and
 	# thus, need a distinct triplet. However, they do not need
@@ -207,13 +224,13 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 		release='-gnu'
 		;;
 	    *)
-		release=`echo ${UNAME_RELEASE}|sed -e 's/[-_].*/\./'`
+		release=`echo ${UNAME_RELEASE} | sed -e 's/[-_].*//' | cut -d. -f1,2`
 		;;
 	esac
 	# Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM:
 	# contains redundant information, the shorter form:
 	# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
-	echo "${machine}-${os}${release}"
+	echo "${machine}-${os}${release}${abi}"
 	exit ;;
     *:Bitrig:*:*)
 	UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'`
@@ -223,6 +240,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'`
 	echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE}
 	exit ;;
+    *:LibertyBSD:*:*)
+	UNAME_MACHINE_ARCH=`arch | sed 's/^.*BSD\.//'`
+	echo ${UNAME_MACHINE_ARCH}-unknown-libertybsd${UNAME_RELEASE}
+	exit ;;
     *:ekkoBSD:*:*)
 	echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE}
 	exit ;;
@@ -235,6 +256,9 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
     *:MirBSD:*:*)
 	echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE}
 	exit ;;
+    *:Sortix:*:*)
+	echo ${UNAME_MACHINE}-unknown-sortix
+	exit ;;
     alpha:OSF1:*:*)
 	case $UNAME_RELEASE in
 	*4.0)
@@ -251,42 +275,42 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^  The alpha \(.*\) processor.*$/\1/p' | head -n 1`
 	case "$ALPHA_CPU_TYPE" in
 	    "EV4 (21064)")
-		UNAME_MACHINE="alpha" ;;
+		UNAME_MACHINE=alpha ;;
 	    "EV4.5 (21064)")
-		UNAME_MACHINE="alpha" ;;
+		UNAME_MACHINE=alpha ;;
 	    "LCA4 (21066/21068)")
-		UNAME_MACHINE="alpha" ;;
+		UNAME_MACHINE=alpha ;;
 	    "EV5 (21164)")
-		UNAME_MACHINE="alphaev5" ;;
+		UNAME_MACHINE=alphaev5 ;;
 	    "EV5.6 (21164A)")
-		UNAME_MACHINE="alphaev56" ;;
+		UNAME_MACHINE=alphaev56 ;;
 	    "EV5.6 (21164PC)")
-		UNAME_MACHINE="alphapca56" ;;
+		UNAME_MACHINE=alphapca56 ;;
 	    "EV5.7 (21164PC)")
-		UNAME_MACHINE="alphapca57" ;;
+		UNAME_MACHINE=alphapca57 ;;
 	    "EV6 (21264)")
-		UNAME_MACHINE="alphaev6" ;;
+		UNAME_MACHINE=alphaev6 ;;
 	    "EV6.7 (21264A)")
-		UNAME_MACHINE="alphaev67" ;;
+		UNAME_MACHINE=alphaev67 ;;
 	    "EV6.8CB (21264C)")
-		UNAME_MACHINE="alphaev68" ;;
+		UNAME_MACHINE=alphaev68 ;;
 	    "EV6.8AL (21264B)")
-		UNAME_MACHINE="alphaev68" ;;
+		UNAME_MACHINE=alphaev68 ;;
 	    "EV6.8CX (21264D)")
-		UNAME_MACHINE="alphaev68" ;;
+		UNAME_MACHINE=alphaev68 ;;
 	    "EV6.9A (21264/EV69A)")
-		UNAME_MACHINE="alphaev69" ;;
+		UNAME_MACHINE=alphaev69 ;;
 	    "EV7 (21364)")
-		UNAME_MACHINE="alphaev7" ;;
+		UNAME_MACHINE=alphaev7 ;;
 	    "EV7.9 (21364A)")
-		UNAME_MACHINE="alphaev79" ;;
+		UNAME_MACHINE=alphaev79 ;;
 	esac
 	# A Pn.n version is a patched version.
 	# A Vn.n version is a released version.
 	# A Tn.n version is a released field test version.
 	# A Xn.n version is an unreleased experimental baselevel.
 	# 1.2 uses "1.2" for uname -r.
-	echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
+	echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`
 	# Reset EXIT trap before exiting to avoid spurious non-zero exit code.
 	exitcode=$?
 	trap '' 0
@@ -359,16 +383,16 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	exit ;;
     i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
 	eval $set_cc_for_build
-	SUN_ARCH="i386"
+	SUN_ARCH=i386
 	# If there is a compiler, see if it is configured for 64-bit objects.
 	# Note that the Sun cc does not turn __LP64__ into 1 like gcc does.
 	# This test works for both compilers.
-	if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
+	if [ "$CC_FOR_BUILD" != no_compiler_found ]; then
 	    if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \
-		(CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
+		(CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
 		grep IS_64BIT_ARCH >/dev/null
 	    then
-		SUN_ARCH="x86_64"
+		SUN_ARCH=x86_64
 	    fi
 	fi
 	echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
@@ -393,7 +417,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	exit ;;
     sun*:*:4.2BSD:*)
 	UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null`
-	test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3
+	test "x${UNAME_RELEASE}" = x && UNAME_RELEASE=3
 	case "`/bin/arch`" in
 	    sun3)
 		echo m68k-sun-sunos${UNAME_RELEASE}
@@ -579,8 +603,9 @@ EOF
 	else
 		IBM_ARCH=powerpc
 	fi
-	if [ -x /usr/bin/oslevel ] ; then
-		IBM_REV=`/usr/bin/oslevel`
+	if [ -x /usr/bin/lslpp ] ; then
+		IBM_REV=`/usr/bin/lslpp -Lqc bos.rte.libc |
+			   awk -F: '{ print $3 }' | sed s/[0-9]*$/0/`
 	else
 		IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
 	fi
@@ -617,13 +642,13 @@ EOF
 		    sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
 		    sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
 		    case "${sc_cpu_version}" in
-		      523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0
-		      528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1
+		      523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0
+		      528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1
 		      532)                      # CPU_PA_RISC2_0
 			case "${sc_kernel_bits}" in
-			  32) HP_ARCH="hppa2.0n" ;;
-			  64) HP_ARCH="hppa2.0w" ;;
-			  '') HP_ARCH="hppa2.0" ;;   # HP-UX 10.20
+			  32) HP_ARCH=hppa2.0n ;;
+			  64) HP_ARCH=hppa2.0w ;;
+			  '') HP_ARCH=hppa2.0 ;;   # HP-UX 10.20
 			esac ;;
 		    esac
 		fi
@@ -662,11 +687,11 @@ EOF
 		    exit (0);
 		}
 EOF
-		    (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
+		    (CCOPTS="" $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
 		    test -z "$HP_ARCH" && HP_ARCH=hppa
 		fi ;;
 	esac
-	if [ ${HP_ARCH} = "hppa2.0w" ]
+	if [ ${HP_ARCH} = hppa2.0w ]
 	then
 	    eval $set_cc_for_build
 
@@ -679,12 +704,12 @@ EOF
 	    # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess
 	    # => hppa64-hp-hpux11.23
 
-	    if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) |
+	    if echo __LP64__ | (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) |
 		grep -q __LP64__
 	    then
-		HP_ARCH="hppa2.0w"
+		HP_ARCH=hppa2.0w
 	    else
-		HP_ARCH="hppa64"
+		HP_ARCH=hppa64
 	    fi
 	fi
 	echo ${HP_ARCH}-hp-hpux${HPUX_REV}
@@ -789,14 +814,14 @@ EOF
 	echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
 	exit ;;
     F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
-	FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'`
-	FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
+	FUJITSU_PROC=`uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`
+	FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'`
 	FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
 	echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
 	exit ;;
     5000:UNIX_System_V:4.*:*)
-	FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'`
-	FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'`
+	FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'`
+	FUJITSU_REL=`echo ${UNAME_RELEASE} | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/'`
 	echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
 	exit ;;
     i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
@@ -878,7 +903,7 @@ EOF
 	exit ;;
     *:GNU/*:*:*)
 	# other systems with GNU libc and userland
-	echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC}
+	echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC}
 	exit ;;
     i*86:Minix:*:*)
 	echo ${UNAME_MACHINE}-pc-minix
@@ -901,7 +926,7 @@ EOF
 	  EV68*) UNAME_MACHINE=alphaev68 ;;
 	esac
 	objdump --private-headers /bin/sh | grep -q ld.so.1
-	if test "$?" = 0 ; then LIBC="gnulibc1" ; fi
+	if test "$?" = 0 ; then LIBC=gnulibc1 ; fi
 	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
 	exit ;;
     arc:Linux:*:* | arceb:Linux:*:*)
@@ -932,6 +957,9 @@ EOF
     crisv32:Linux:*:*)
 	echo ${UNAME_MACHINE}-axis-linux-${LIBC}
 	exit ;;
+    e2k:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	exit ;;
     frv:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
 	exit ;;
@@ -944,6 +972,9 @@ EOF
     ia64:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
 	exit ;;
+    k1om:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	exit ;;
     m32r*:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
 	exit ;;
@@ -969,6 +1000,9 @@ EOF
 	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'`
 	test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; }
 	;;
+    mips64el:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	exit ;;
     openrisc*:Linux:*:*)
 	echo or1k-unknown-linux-${LIBC}
 	exit ;;
@@ -1001,6 +1035,9 @@ EOF
     ppcle:Linux:*:*)
 	echo powerpcle-unknown-linux-${LIBC}
 	exit ;;
+    riscv32:Linux:*:* | riscv64:Linux:*:*)
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	exit ;;
     s390:Linux:*:* | s390x:Linux:*:*)
 	echo ${UNAME_MACHINE}-ibm-linux-${LIBC}
 	exit ;;
@@ -1020,7 +1057,7 @@ EOF
 	echo ${UNAME_MACHINE}-dec-linux-${LIBC}
 	exit ;;
     x86_64:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo ${UNAME_MACHINE}-pc-linux-${LIBC}
 	exit ;;
     xtensa*:Linux:*:*)
 	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
@@ -1099,7 +1136,7 @@ EOF
 	# uname -m prints for DJGPP always 'pc', but it prints nothing about
 	# the processor, so we play safe by assuming i586.
 	# Note: whatever this is, it MUST be the same as what config.sub
-	# prints for the "djgpp" host, or else GDB configury will decide that
+	# prints for the "djgpp" host, or else GDB configure will decide that
 	# this is a cross-build.
 	echo i586-pc-msdosdjgpp
 	exit ;;
@@ -1248,6 +1285,9 @@ EOF
     SX-8R:SUPER-UX:*:*)
 	echo sx8r-nec-superux${UNAME_RELEASE}
 	exit ;;
+    SX-ACE:SUPER-UX:*:*)
+	echo sxace-nec-superux${UNAME_RELEASE}
+	exit ;;
     Power*:Rhapsody:*:*)
 	echo powerpc-apple-rhapsody${UNAME_RELEASE}
 	exit ;;
@@ -1261,9 +1301,9 @@ EOF
 	    UNAME_PROCESSOR=powerpc
 	fi
 	if test `echo "$UNAME_RELEASE" | sed -e 's/\..*//'` -le 10 ; then
-	    if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then
+	    if [ "$CC_FOR_BUILD" != no_compiler_found ]; then
 		if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
-		    (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \
+		    (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
 		    grep IS_64BIT_ARCH >/dev/null
 		then
 		    case $UNAME_PROCESSOR in
@@ -1285,7 +1325,7 @@ EOF
 	exit ;;
     *:procnto*:*:* | *:QNX:[0123456789]*:*)
 	UNAME_PROCESSOR=`uname -p`
-	if test "$UNAME_PROCESSOR" = "x86"; then
+	if test "$UNAME_PROCESSOR" = x86; then
 		UNAME_PROCESSOR=i386
 		UNAME_MACHINE=pc
 	fi
@@ -1316,7 +1356,7 @@ EOF
 	# "uname -m" is not consistent, so use $cputype instead. 386
 	# is converted to i386 for consistency with other x86
 	# operating systems.
-	if test "$cputype" = "386"; then
+	if test "$cputype" = 386; then
 	    UNAME_MACHINE=i386
 	else
 	    UNAME_MACHINE="$cputype"
@@ -1358,7 +1398,7 @@ EOF
 	echo i386-pc-xenix
 	exit ;;
     i*86:skyos:*:*)
-	echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//'
+	echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE} | sed -e 's/ .*$//'`
 	exit ;;
     i*86:rdos:*:*)
 	echo ${UNAME_MACHINE}-pc-rdos
@@ -1369,23 +1409,25 @@ EOF
     x86_64:VMkernel:*:*)
 	echo ${UNAME_MACHINE}-unknown-esx
 	exit ;;
+    amd64:Isilon\ OneFS:*:*)
+	echo x86_64-unknown-onefs
+	exit ;;
 esac
 
 cat >&2 <<EOF
 $0: unable to guess system type
 
-This script, last modified $timestamp, has failed to recognize
-the operating system you are using. It is advised that you
-download the most up to date version of the config scripts from
+This script (version $timestamp), has failed to recognize the
+operating system you are using. If your script is old, overwrite
+config.guess and config.sub with the latest versions from:
 
-  http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD
+  http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
 and
-  http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD
+  http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub
 
-If the version you run ($0) is already up to date, please
-send the following data and any information you think might be
-pertinent to <config-patches@gnu.org> in order to provide the needed
-information to handle your system.
+If $0 has already been updated, send the following data and any
+information you think might be pertinent to config-patches@gnu.org to
+provide the necessary information to handle your system.
 
 config.guess timestamp = $timestamp
 
diff --git a/build-aux/config.sub b/build-aux/config.sub
index 0ccff770..dd2ca93c 100755
--- a/build-aux/config.sub
+++ b/build-aux/config.sub
@@ -1,8 +1,8 @@
 #! /bin/sh
 # Configuration validation subroutine script.
-#   Copyright 1992-2014 Free Software Foundation, Inc.
+#   Copyright 1992-2016 Free Software Foundation, Inc.
 
-timestamp='2014-05-01'
+timestamp='2016-11-04'
 
 # This file is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
@@ -25,7 +25,7 @@ timestamp='2014-05-01'
 # of the GNU General Public License, version 3 ("GPLv3").
 
 
-# Please send patches with a ChangeLog entry to config-patches@gnu.org.
+# Please send patches to <config-patches@gnu.org>.
 #
 # Configuration subroutine to validate and canonicalize a configuration type.
 # Supply the specified configuration type as an argument.
@@ -33,7 +33,7 @@ timestamp='2014-05-01'
 # Otherwise, we print the canonical config type on stdout and succeed.
 
 # You can get the latest version of this script from:
-# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD
+# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub
 
 # This file is supposed to be the same for all GNU packages
 # and recognize all the CPU types, system types and aliases
@@ -53,8 +53,7 @@ timestamp='2014-05-01'
 me=`echo "$0" | sed -e 's,.*/,,'`
 
 usage="\
-Usage: $0 [OPTION] CPU-MFR-OPSYS
-       $0 [OPTION] ALIAS
+Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS
 
 Canonicalize a configuration name.
 
@@ -68,7 +67,7 @@ Report bugs and patches to <config-patches@gnu.org>."
 version="\
 GNU config.sub ($timestamp)
 
-Copyright 1992-2014 Free Software Foundation, Inc.
+Copyright 1992-2016 Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -117,8 +116,8 @@ maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
 case $maybe_os in
   nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \
   linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \
-  knetbsd*-gnu* | netbsd*-gnu* | \
-  kopensolaris*-gnu* | \
+  knetbsd*-gnu* | netbsd*-gnu* | netbsd*-eabi* | \
+  kopensolaris*-gnu* | cloudabi*-eabi* | \
   storm-chaos* | os2-emx* | rtmk-nova*)
     os=-$maybe_os
     basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
@@ -255,12 +254,13 @@ case $basic_machine in
 	| arc | arceb \
 	| arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv7[arm] \
 	| avr | avr32 \
+	| ba \
 	| be32 | be64 \
 	| bfin \
 	| c4x | c8051 | clipper \
 	| d10v | d30v | dlx | dsp16xx \
-	| epiphany \
-	| fido | fr30 | frv \
+	| e2k | epiphany \
+	| fido | fr30 | frv | ft32 \
 	| h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
 	| hexagon \
 	| i370 | i860 | i960 | ia64 \
@@ -301,10 +301,12 @@ case $basic_machine in
 	| open8 | or1k | or1knd | or32 \
 	| pdp10 | pdp11 | pj | pjl \
 	| powerpc | powerpc64 | powerpc64le | powerpcle \
+	| pru \
 	| pyramid \
+	| riscv32 | riscv64 \
 	| rl78 | rx \
 	| score \
-	| sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
+	| sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[234]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
 	| sh64 | sh64le \
 	| sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \
 	| sparcv8 | sparcv9 | sparcv9b | sparcv9v \
@@ -312,6 +314,7 @@ case $basic_machine in
 	| tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \
 	| ubicom32 \
 	| v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \
+	| visium \
 	| we32k \
 	| x86 | xc16x | xstormy16 | xtensa \
 	| z8k | z80)
@@ -326,6 +329,9 @@ case $basic_machine in
 	c6x)
 		basic_machine=tic6x-unknown
 		;;
+	leon|leon[3-9])
+		basic_machine=sparc-$basic_machine
+		;;
 	m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | nvptx | picochip)
 		basic_machine=$basic_machine-unknown
 		os=-none
@@ -371,12 +377,13 @@ case $basic_machine in
 	| alphapca5[67]-* | alpha64pca5[67]-* | arc-* | arceb-* \
 	| arm-*  | armbe-* | armle-* | armeb-* | armv*-* \
 	| avr-* | avr32-* \
+	| ba-* \
 	| be32-* | be64-* \
 	| bfin-* | bs2000-* \
 	| c[123]* | c30-* | [cjt]90-* | c4x-* \
 	| c8051-* | clipper-* | craynv-* | cydra-* \
 	| d10v-* | d30v-* | dlx-* \
-	| elxsi-* \
+	| e2k-* | elxsi-* \
 	| f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
 	| h8300-* | h8500-* \
 	| hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
@@ -422,13 +429,15 @@ case $basic_machine in
 	| orion-* \
 	| pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
 	| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \
+	| pru-* \
 	| pyramid-* \
+	| riscv32-* | riscv64-* \
 	| rl78-* | romp-* | rs6000-* | rx-* \
 	| sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \
 	| shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
 	| sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \
 	| sparclite-* \
-	| sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx?-* \
+	| sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx*-* \
 	| tahoe-* \
 	| tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \
 	| tile*-* \
@@ -436,6 +445,7 @@ case $basic_machine in
 	| ubicom32-* \
 	| v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \
 	| vax-* \
+	| visium-* \
 	| we32k-* \
 	| x86-* | x86_64-* | xc16x-* | xps100-* \
 	| xstormy16-* | xtensa*-* \
@@ -512,6 +522,9 @@ case $basic_machine in
 		basic_machine=i386-pc
 		os=-aros
 		;;
+	asmjs)
+		basic_machine=asmjs-unknown
+		;;
 	aux)
 		basic_machine=m68k-apple
 		os=-aux
@@ -632,6 +645,14 @@ case $basic_machine in
 		basic_machine=m68k-bull
 		os=-sysv3
 		;;
+	e500v[12])
+		basic_machine=powerpc-unknown
+		os=$os"spe"
+		;;
+	e500v[12]-*)
+		basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
+		os=$os"spe"
+		;;
 	ebmon29k)
 		basic_machine=a29k-amd
 		os=-ebmon
@@ -773,6 +794,9 @@ case $basic_machine in
 		basic_machine=m68k-isi
 		os=-sysv
 		;;
+	leon-*|leon[3-9]-*)
+		basic_machine=sparc-`echo $basic_machine | sed 's/-.*//'`
+		;;
 	m68knommu)
 		basic_machine=m68k-unknown
 		os=-linux
@@ -828,6 +852,10 @@ case $basic_machine in
 		basic_machine=powerpc-unknown
 		os=-morphos
 		;;
+	moxiebox)
+		basic_machine=moxie-unknown
+		os=-moxiebox
+		;;
 	msdos)
 		basic_machine=i386-pc
 		os=-msdos
@@ -1004,7 +1032,7 @@ case $basic_machine in
 	ppc-* | ppcbe-*)
 		basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
 		;;
-	ppcle | powerpclittle | ppc-le | powerpc-little)
+	ppcle | powerpclittle)
 		basic_machine=powerpcle-unknown
 		;;
 	ppcle-* | powerpclittle-*)
@@ -1014,7 +1042,7 @@ case $basic_machine in
 		;;
 	ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'`
 		;;
-	ppc64le | powerpc64little | ppc64-le | powerpc64-little)
+	ppc64le | powerpc64little)
 		basic_machine=powerpc64le-unknown
 		;;
 	ppc64le-* | powerpc64little-*)
@@ -1360,27 +1388,28 @@ case $os in
 	      | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \
 	      | -sym* | -kopensolaris* | -plan9* \
 	      | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
-	      | -aos* | -aros* \
+	      | -aos* | -aros* | -cloudabi* | -sortix* \
 	      | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
 	      | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
 	      | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \
-	      | -bitrig* | -openbsd* | -solidbsd* \
+	      | -bitrig* | -openbsd* | -solidbsd* | -libertybsd* \
 	      | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \
 	      | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
 	      | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
 	      | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
 	      | -chorusos* | -chorusrdb* | -cegcc* \
 	      | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
-	      | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \
+	      | -midipix* | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \
 	      | -linux-newlib* | -linux-musl* | -linux-uclibc* \
-	      | -uxpv* | -beos* | -mpeix* | -udk* \
+	      | -uxpv* | -beos* | -mpeix* | -udk* | -moxiebox* \
 	      | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
 	      | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
 	      | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \
 	      | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
 	      | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \
 	      | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \
-	      | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es* | -tirtos*)
+	      | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es* \
+	      | -onefs* | -tirtos* | -phoenix* | -fuchsia*)
 	# Remember, each alternative MUST END IN *, to match a version number.
 		;;
 	-qnx*)
@@ -1404,9 +1433,6 @@ case $os in
 	-mac*)
 		os=`echo $os | sed -e 's|mac|macos|'`
 		;;
-	# Apple iOS
-	-ios*)
-		;;
 	-linux-dietlibc)
 		os=-linux-dietlibc
 		;;
@@ -1515,6 +1541,8 @@ case $os in
 		;;
 	-nacl*)
 		;;
+	-ios)
+		;;
 	-none)
 		;;
 	*)

From 32d69e967e40fd5546d2705551dd6f37575ffe81 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 10 Nov 2016 15:35:29 -0800
Subject: [PATCH 0506/2608] Add configure support for *-*-linux-android.

This is tailored to Android, i.e. more specific than the *-*-linux*
configuration.

This resolves #471.
---
 configure.ac | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/configure.ac b/configure.ac
index 6f29ce0a..053e5d7e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -384,6 +384,18 @@ case "${host}" in
 	abi="elf"
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	;;
+  *-*-linux-android)
+	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
+	CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE"
+	abi="elf"
+	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
+	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ])
+	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ])
+	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
+	AC_DEFINE([JEMALLOC_C11ATOMICS])
+	force_tls="0"
+	default_munmap="0"
+	;;
   *-*-linux* | *-*-kfreebsd*)
 	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
 	CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE"

From 092d760817d4e3f16f465d77bf2b83375dc2db39 Mon Sep 17 00:00:00 2001
From: Samuel Moritz <samuel.moritz@gmail.com>
Date: Mon, 25 Jul 2016 19:33:27 +0200
Subject: [PATCH 0507/2608] Support Debian GNU/kFreeBSD.

Treat it exactly like Linux since they both use GNU libc.
---
 configure.ac | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 104fd994..451eb622 100644
--- a/configure.ac
+++ b/configure.ac
@@ -384,7 +384,7 @@ case "${host}" in
 	abi="elf"
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	;;
-  *-*-linux*)
+  *-*-linux* | *-*-kfreebsd*)
 	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
 	CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE"
 	abi="elf"

From e916d55ba10ea940d3c04b1d7ca6319fc0e7ca12 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 10 Nov 2016 15:35:29 -0800
Subject: [PATCH 0508/2608] Add configure support for *-*-linux-android.

This is tailored to Android, i.e. more specific than the *-*-linux*
configuration.

This resolves #471.
---
 configure.ac | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/configure.ac b/configure.ac
index 451eb622..4645d63b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -384,6 +384,18 @@ case "${host}" in
 	abi="elf"
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	;;
+  *-*-linux-android)
+	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
+	CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE"
+	abi="elf"
+	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
+	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ])
+	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ])
+	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
+	AC_DEFINE([JEMALLOC_C11ATOMICS])
+	force_tls="0"
+	default_munmap="0"
+	;;
   *-*-linux* | *-*-kfreebsd*)
 	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
 	CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE"

From 2cdf07aba971d1e21edc203e7d4073b6ce8e72b9 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 11 Nov 2016 21:14:29 -0800
Subject: [PATCH 0509/2608] Fix extent_quantize() to handle
 greater-than-huge-size extents.

Allocation requests can't directly create extents that exceed
HUGE_MAXCLASS, but extent merging can create them.

This fixes a regression caused by
8a03cf039cd06f9fa6972711195055d865673966 (Implement cache index
randomization for large allocations.) and first released in 4.0.0.

This resolves #497.
---
 src/extent.c | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index 9f5146e5..828f627c 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -3,15 +3,29 @@
 
 /******************************************************************************/
 
+/*
+ * Round down to the nearest chunk size that can actually be requested during
+ * normal huge allocation.
+ */
 JEMALLOC_INLINE_C size_t
 extent_quantize(size_t size)
 {
+	size_t ret;
+	szind_t ind;
 
-	/*
-	 * Round down to the nearest chunk size that can actually be requested
-	 * during normal huge allocation.
-	 */
-	return (index2size(size2index(size + 1) - 1));
+	assert(size > 0);
+
+	ind = size2index(size + 1);
+	if (ind == NSIZES) {
+		/*
+		 * Allocation requests can't directly create extents that exceed
+		 * HUGE_MAXCLASS, but extent merging can create them.
+		 */
+		return (HUGE_MAXCLASS);
+	}
+	ret = index2size(ind - 1);
+	assert(ret <= size);
+	return (ret);
 }
 
 JEMALLOC_INLINE_C int

From b9408d77a63a54fd331f9b81c884f68e6d57f2e5 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 11 Nov 2016 21:58:05 -0800
Subject: [PATCH 0510/2608] Fix/simplify chunk_recycle() allocation size
 computations.

Remove outer CHUNK_CEILING(s2u(...)) from alloc_size computation, since
s2u() may overflow (and return 0), and CHUNK_CEILING() is only needed
around the alignment portion of the computation.

This fixes a regression caused by
5707d6f952c71baa2f19102479859012982ac821 (Quantize szad trees by size
class.) and first released in 4.0.0.

This resolves #497.
---
 src/chunk.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/chunk.c b/src/chunk.c
index 07e26f77..d7002873 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -209,7 +209,10 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	size_t alloc_size, leadsize, trailsize;
 	bool zeroed, committed;
 
+	assert(CHUNK_CEILING(size) == size);
+	assert(alignment > 0);
 	assert(new_addr == NULL || alignment == chunksize);
+	assert(CHUNK_ADDR2BASE(new_addr) == new_addr);
 	/*
 	 * Cached chunks use the node linkage embedded in their headers, in
 	 * which case dalloc_node is true, and new_addr is non-NULL because
@@ -217,7 +220,7 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	 */
 	assert(dalloc_node || new_addr != NULL);
 
-	alloc_size = CHUNK_CEILING(s2u(size + alignment - chunksize));
+	alloc_size = size + CHUNK_CEILING(alignment) - chunksize;
 	/* Beware size_t wrap-around. */
 	if (alloc_size < size)
 		return (NULL);

From a2af09f0253769cb08656ae8828a3d70123ce5bd Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 11 Nov 2016 22:15:27 -0800
Subject: [PATCH 0511/2608] Remove overly restrictive stats_cactive_{add,sub}()
 assertions.

This fixes a regression caused by
40ee9aa9577ea5eb6616c10b9e6b0fa7e6796821 (Fix stats.cactive accounting
regression.) and first released in 4.1.0.
---
 include/jemalloc/internal/stats.h | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index b6218178..04e7dae1 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -175,25 +175,21 @@ stats_cactive_get(void)
 JEMALLOC_INLINE void
 stats_cactive_add(size_t size)
 {
-	UNUSED size_t cactive;
 
 	assert(size > 0);
 	assert((size & chunksize_mask) == 0);
 
-	cactive = atomic_add_z(&stats_cactive, size);
-	assert(cactive - size < cactive);
+	atomic_add_z(&stats_cactive, size);
 }
 
 JEMALLOC_INLINE void
 stats_cactive_sub(size_t size)
 {
-	UNUSED size_t cactive;
 
 	assert(size > 0);
 	assert((size & chunksize_mask) == 0);
 
-	cactive = atomic_sub_z(&stats_cactive, size);
-	assert(cactive + size > cactive);
+	atomic_sub_z(&stats_cactive, size);
 }
 #endif
 

From 1aeea0f39143a658caf34c44ac59cb7fc6bb5749 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 11 Nov 2016 22:46:55 -0800
Subject: [PATCH 0512/2608] Simplify extent_quantize().

2cdf07aba971d1e21edc203e7d4073b6ce8e72b9 (Fix extent_quantize() to
handle greater-than-huge-size extents.) solved a non-problem; the
expression passed in to index2size() was never too large.  However the
expression could in principle underflow, so fix the actual (latent) bug
and remove unnecessary complexity.
---
 src/extent.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index 828f627c..b1b894dc 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -16,12 +16,9 @@ extent_quantize(size_t size)
 	assert(size > 0);
 
 	ind = size2index(size + 1);
-	if (ind == NSIZES) {
-		/*
-		 * Allocation requests can't directly create extents that exceed
-		 * HUGE_MAXCLASS, but extent merging can create them.
-		 */
-		return (HUGE_MAXCLASS);
+	if (ind == 0) {
+		/* Avoid underflow. */
+		return (index2size(0));
 	}
 	ret = index2size(ind - 1);
 	assert(ret <= size);

From ded4f38ffd64144158ffe600a3309b3339800cf4 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 11 Nov 2016 23:49:40 -0800
Subject: [PATCH 0513/2608] Reduce memory usage for sdallocx()
 test_alignment_and_size.

---
 test/integration/sdallocx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/integration/sdallocx.c b/test/integration/sdallocx.c
index b84817d7..f92e0589 100644
--- a/test/integration/sdallocx.c
+++ b/test/integration/sdallocx.c
@@ -1,7 +1,7 @@
 #include "test/jemalloc_test.h"
 
-#define	MAXALIGN (((size_t)1) << 25)
-#define	NITER 4
+#define	MAXALIGN (((size_t)1) << 22)
+#define	NITER 3
 
 TEST_BEGIN(test_basic)
 {

From c25e711cf97bc64d31e6bbc35fe24e0f55c657d8 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 11 Nov 2016 23:49:40 -0800
Subject: [PATCH 0514/2608] Reduce memory usage for sdallocx()
 test_alignment_and_size.

---
 test/integration/sdallocx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/integration/sdallocx.c b/test/integration/sdallocx.c
index b84817d7..f92e0589 100644
--- a/test/integration/sdallocx.c
+++ b/test/integration/sdallocx.c
@@ -1,7 +1,7 @@
 #include "test/jemalloc_test.h"
 
-#define	MAXALIGN (((size_t)1) << 25)
-#define	NITER 4
+#define	MAXALIGN (((size_t)1) << 22)
+#define	NITER 3
 
 TEST_BEGIN(test_basic)
 {

From 45f83a2ac6a9cd3b21675766127ee85910e54156 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 12 Nov 2016 09:47:07 -0800
Subject: [PATCH 0515/2608] Add JE_RUNNABLE() and use it for os_unfair_lock_*()
 test.

This resolves #494.
---
 configure.ac | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 4645d63b..2bd18ff7 100644
--- a/configure.ac
+++ b/configure.ac
@@ -43,6 +43,17 @@ AC_CACHE_CHECK([whether $1 is compilable],
                                [$4=no])])
 ])
 
+dnl JE_RUNNABLE(label, hcode, mcode, rvar)
+AC_DEFUN([JE_RUNNABLE],
+[
+AC_CACHE_CHECK([whether $1 is runnable],
+               [$4],
+               [AC_RUN_IFELSE([AC_LANG_PROGRAM([$2],
+                                               [$3])],
+                              [$4=yes],
+                              [$4=no])])
+])
+
 dnl ============================================================================
 
 CONFIG=`echo ${ac_configure_args} | sed -e 's#'"'"'\([^ ]*\)'"'"'#\1#g'`
@@ -1679,7 +1690,11 @@ fi
 dnl ============================================================================
 dnl Check for os_unfair_lock operations as provided on Darwin.
 
-JE_COMPILABLE([Darwin os_unfair_lock_*()], [
+dnl Run the test program rather than merely compiling so that dyld lazy symbol
+dnl binding doesn't result in a false positive if building with a version of
+dnl XCode (>7) that supports the API on a system that is too old to actually
+dnl implement it (<10.12).
+JE_RUNNABLE([Darwin os_unfair_lock_*()], [
 #include <os/lock.h>
 ], [
 	os_unfair_lock lock = OS_UNFAIR_LOCK_INIT;

From a2e601a2236315fb6f994ff364ea442ed0aed07b Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 12 Nov 2016 09:47:07 -0800
Subject: [PATCH 0516/2608] Add JE_RUNNABLE() and use it for os_unfair_lock_*()
 test.

This resolves #494.
---
 configure.ac | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 053e5d7e..24136736 100644
--- a/configure.ac
+++ b/configure.ac
@@ -43,6 +43,17 @@ AC_CACHE_CHECK([whether $1 is compilable],
                                [$4=no])])
 ])
 
+dnl JE_RUNNABLE(label, hcode, mcode, rvar)
+AC_DEFUN([JE_RUNNABLE],
+[
+AC_CACHE_CHECK([whether $1 is runnable],
+               [$4],
+               [AC_RUN_IFELSE([AC_LANG_PROGRAM([$2],
+                                               [$3])],
+                              [$4=yes],
+                              [$4=no])])
+])
+
 dnl ============================================================================
 
 CONFIG=`echo ${ac_configure_args} | sed -e 's#'"'"'\([^ ]*\)'"'"'#\1#g'`
@@ -1649,7 +1660,11 @@ fi
 dnl ============================================================================
 dnl Check for os_unfair_lock operations as provided on Darwin.
 
-JE_COMPILABLE([Darwin os_unfair_lock_*()], [
+dnl Run the test program rather than merely compiling so that dyld lazy symbol
+dnl binding doesn't result in a false positive if building with a version of
+dnl XCode (>7) that supports the API on a system that is too old to actually
+dnl implement it (<10.12).
+JE_RUNNABLE([Darwin os_unfair_lock_*()], [
 #include <os/lock.h>
 ], [
 	os_unfair_lock lock = OS_UNFAIR_LOCK_INIT;

From c0a667112cf33968b425dfbb50594aba54ea850b Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 15 Nov 2016 10:31:06 -0800
Subject: [PATCH 0517/2608] Fix arena_reset() crashing bug.

This regression was caused by 498856f44a30b31fe713a18eb2fc7c6ecf3a9f63
(Move slabs out of chunks.).
---
 src/arena.c | 83 +++++++++++++++++++++++++++--------------------------
 1 file changed, 42 insertions(+), 41 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index ff7b0cd0..ef374d35 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -799,6 +799,47 @@ arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *slab)
 	arena_extent_cache_dalloc_locked(tsdn, arena, &extent_hooks, slab);
 }
 
+static void
+arena_bin_slabs_nonfull_insert(arena_bin_t *bin, extent_t *slab)
+{
+
+	assert(extent_slab_data_get(slab)->nfree > 0);
+	extent_heap_insert(&bin->slabs_nonfull, slab);
+}
+
+static void
+arena_bin_slabs_nonfull_remove(arena_bin_t *bin, extent_t *slab)
+{
+
+	extent_heap_remove(&bin->slabs_nonfull, slab);
+}
+
+static extent_t *
+arena_bin_slabs_nonfull_tryget(arena_bin_t *bin)
+{
+	extent_t *slab = extent_heap_remove_first(&bin->slabs_nonfull);
+	if (slab == NULL)
+		return (NULL);
+	if (config_stats)
+		bin->stats.reslabs++;
+	return (slab);
+}
+
+static void
+arena_bin_slabs_full_insert(arena_bin_t *bin, extent_t *slab)
+{
+
+	assert(extent_slab_data_get(slab)->nfree == 0);
+	extent_ring_insert(&bin->slabs_full, slab);
+}
+
+static void
+arena_bin_slabs_full_remove(extent_t *slab)
+{
+
+	extent_ring_remove(slab);
+}
+
 void
 arena_reset(tsd_t *tsd, arena_t *arena)
 {
@@ -863,6 +904,7 @@ arena_reset(tsd_t *tsd, arena_t *arena)
 		for (slab = qr_next(&bin->slabs_full, qr_link); slab !=
 		    &bin->slabs_full; slab = qr_next(&bin->slabs_full,
 		    qr_link)) {
+			arena_bin_slabs_full_remove(slab);
 			malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 			arena_slab_dalloc(tsd_tsdn(tsd), arena, slab);
 			malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
@@ -880,47 +922,6 @@ arena_reset(tsd_t *tsd, arena_t *arena)
 	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock);
 }
 
-static void
-arena_bin_slabs_nonfull_insert(arena_bin_t *bin, extent_t *slab)
-{
-
-	assert(extent_slab_data_get(slab)->nfree > 0);
-	extent_heap_insert(&bin->slabs_nonfull, slab);
-}
-
-static void
-arena_bin_slabs_nonfull_remove(arena_bin_t *bin, extent_t *slab)
-{
-
-	extent_heap_remove(&bin->slabs_nonfull, slab);
-}
-
-static extent_t *
-arena_bin_slabs_nonfull_tryget(arena_bin_t *bin)
-{
-	extent_t *slab = extent_heap_remove_first(&bin->slabs_nonfull);
-	if (slab == NULL)
-		return (NULL);
-	if (config_stats)
-		bin->stats.reslabs++;
-	return (slab);
-}
-
-static void
-arena_bin_slabs_full_insert(arena_bin_t *bin, extent_t *slab)
-{
-
-	assert(extent_slab_data_get(slab)->nfree == 0);
-	extent_ring_insert(&bin->slabs_full, slab);
-}
-
-static void
-arena_bin_slabs_full_remove(extent_t *slab)
-{
-
-	extent_ring_remove(slab);
-}
-
 static extent_t *
 arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, const arena_bin_info_t *bin_info)

From a38acf716eefc5284e89a35be74229ef3545d007 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 15 Nov 2016 13:07:53 -0800
Subject: [PATCH 0518/2608] Add extent serial numbers.

Add extent serial numbers and use them where appropriate as a sort key
that is higher priority than address, so that the allocation policy
prefers older extents.

This resolves #147.
---
 include/jemalloc/internal/arena.h             | 11 ++-
 include/jemalloc/internal/extent.h            | 74 ++++++++++++++++++-
 include/jemalloc/internal/private_symbols.txt |  6 ++
 src/arena.c                                   | 27 ++++---
 src/base.c                                    | 12 ++-
 src/extent.c                                  | 28 +++----
 src/extent_dss.c                              |  3 +-
 7 files changed, 126 insertions(+), 35 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index f518c31f..28d63c68 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -130,7 +130,8 @@ struct arena_bin_s {
 
 	/*
 	 * Heap of non-full slabs.  This heap is used to assure that new
-	 * allocations come from the non-full slab that is lowest in memory.
+	 * allocations come from the non-full slab that is oldest/lowest in
+	 * memory.
 	 */
 	extent_heap_t		slabs_nonfull;
 
@@ -184,6 +185,9 @@ struct arena_s {
 	 */
 	size_t			offset_state;
 
+	/* Extent serial number generator state. */
+	size_t			extent_sn_next;
+
 	dss_prec_t		dss_prec;
 
 	/* True if a thread is currently executing arena_purge_to_limit(). */
@@ -224,8 +228,8 @@ struct arena_s {
 
 	/* User-configurable extent hook functions. */
 	union {
-		extent_hooks_t		*extent_hooks;
-		void			*extent_hooks_pun;
+		extent_hooks_t	*extent_hooks;
+		void		*extent_hooks_pun;
 	};
 
 	/* Cache of extent structures that were allocated via base_alloc(). */
@@ -320,6 +324,7 @@ void	arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 unsigned	arena_nthreads_get(arena_t *arena, bool internal);
 void	arena_nthreads_inc(arena_t *arena, bool internal);
 void	arena_nthreads_dec(arena_t *arena, bool internal);
+size_t	arena_extent_sn_next(arena_t *arena);
 arena_t	*arena_new(tsdn_t *tsdn, unsigned ind);
 void	arena_boot(void);
 void	arena_prefork0(tsdn_t *tsdn, arena_t *arena);
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 531d853c..3c5573ee 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -26,6 +26,20 @@ struct extent_s {
 	 */
 	size_t			e_usize;
 
+	/*
+	 * Serial number (potentially non-unique).
+	 *
+	 * In principle serial numbers can wrap around on 32-bit systems if
+	 * JEMALLOC_MUNMAP is defined, but as long as comparison functions fall
+	 * back on address comparison for equal serial numbers, stable (if
+	 * imperfect) ordering is maintained.
+	 *
+	 * Serial numbers may not be unique even in the absence of wrap-around,
+	 * e.g. when splitting an extent and assigning the same serial number to
+	 * both resulting adjacent extents.
+	 */
+	size_t			e_sn;
+
 	/* True if extent is active (in use). */
 	bool			e_active;
 
@@ -66,7 +80,7 @@ struct extent_s {
 	qr(extent_t)		qr_link;
 
 	union {
-		/* Linkage for per size class address-ordered heaps. */
+		/* Linkage for per size class sn/address-ordered heaps. */
 		phn(extent_t)		ph_link;
 
 		/* Linkage for arena's large and extent_cache lists. */
@@ -144,6 +158,7 @@ size_t	extent_usize_get(const extent_t *extent);
 void	*extent_before_get(const extent_t *extent);
 void	*extent_last_get(const extent_t *extent);
 void	*extent_past_get(const extent_t *extent);
+size_t	extent_sn_get(const extent_t *extent);
 bool	extent_active_get(const extent_t *extent);
 bool	extent_retained_get(const extent_t *extent);
 bool	extent_zeroed_get(const extent_t *extent);
@@ -157,16 +172,20 @@ void	extent_addr_set(extent_t *extent, void *addr);
 void	extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment);
 void	extent_size_set(extent_t *extent, size_t size);
 void	extent_usize_set(extent_t *extent, size_t usize);
+void	extent_sn_set(extent_t *extent, size_t sn);
 void	extent_active_set(extent_t *extent, bool active);
 void	extent_zeroed_set(extent_t *extent, bool zeroed);
 void	extent_committed_set(extent_t *extent, bool committed);
 void	extent_slab_set(extent_t *extent, bool slab);
 void	extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx);
 void	extent_init(extent_t *extent, arena_t *arena, void *addr,
-    size_t size, size_t usize, bool active, bool zeroed, bool committed,
-    bool slab);
+    size_t size, size_t usize, size_t sn, bool active, bool zeroed,
+    bool committed, bool slab);
 void	extent_ring_insert(extent_t *sentinel, extent_t *extent);
 void	extent_ring_remove(extent_t *extent);
+int	extent_sn_comp(const extent_t *a, const extent_t *b);
+int	extent_ad_comp(const extent_t *a, const extent_t *b);
+int	extent_snad_comp(const extent_t *a, const extent_t *b);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_EXTENT_C_))
@@ -243,6 +262,13 @@ extent_past_get(const extent_t *extent)
 	    extent_size_get(extent)));
 }
 
+JEMALLOC_INLINE size_t
+extent_sn_get(const extent_t *extent)
+{
+
+	return (extent->e_sn);
+}
+
 JEMALLOC_INLINE bool
 extent_active_get(const extent_t *extent)
 {
@@ -351,6 +377,13 @@ extent_usize_set(extent_t *extent, size_t usize)
 	extent->e_usize = usize;
 }
 
+JEMALLOC_INLINE void
+extent_sn_set(extent_t *extent, size_t sn)
+{
+
+	extent->e_sn = sn;
+}
+
 JEMALLOC_INLINE void
 extent_active_set(extent_t *extent, bool active)
 {
@@ -388,7 +421,8 @@ extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx)
 
 JEMALLOC_INLINE void
 extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
-    size_t usize, bool active, bool zeroed, bool committed, bool slab)
+    size_t usize, size_t sn, bool active, bool zeroed, bool committed,
+    bool slab)
 {
 
 	assert(addr == PAGE_ADDR2BASE(addr) || !slab);
@@ -397,6 +431,7 @@ extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
 	extent_addr_set(extent, addr);
 	extent_size_set(extent, size);
 	extent_usize_set(extent, usize);
+	extent_sn_set(extent, sn);
 	extent_active_set(extent, active);
 	extent_zeroed_set(extent, zeroed);
 	extent_committed_set(extent, committed);
@@ -419,6 +454,37 @@ extent_ring_remove(extent_t *extent)
 
 	qr_remove(extent, qr_link);
 }
+
+JEMALLOC_INLINE int
+extent_sn_comp(const extent_t *a, const extent_t *b)
+{
+	size_t a_sn = extent_sn_get(a);
+	size_t b_sn = extent_sn_get(b);
+
+	return ((a_sn > b_sn) - (a_sn < b_sn));
+}
+
+JEMALLOC_INLINE int
+extent_ad_comp(const extent_t *a, const extent_t *b)
+{
+	uintptr_t a_addr = (uintptr_t)extent_addr_get(a);
+	uintptr_t b_addr = (uintptr_t)extent_addr_get(b);
+
+	return ((a_addr > b_addr) - (a_addr < b_addr));
+}
+
+JEMALLOC_INLINE int
+extent_snad_comp(const extent_t *a, const extent_t *b)
+{
+	int ret;
+
+	ret = extent_sn_comp(a, b);
+	if (ret != 0)
+		return (ret);
+
+	ret = extent_ad_comp(a, b);
+	return (ret);
+}
 #endif
 
 #endif /* JEMALLOC_H_INLINES */
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 707ede3a..4560d702 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -31,6 +31,7 @@ arena_extent_cache_maybe_remove
 arena_extent_dalloc_large
 arena_extent_ralloc_large_expand
 arena_extent_ralloc_large_shrink
+arena_extent_sn_next
 arena_get
 arena_ichoose
 arena_init
@@ -132,6 +133,7 @@ decay_ticker_get
 dss_prec_names
 extent_active_get
 extent_active_set
+extent_ad_comp
 extent_addr_get
 extent_addr_randomize
 extent_addr_set
@@ -188,6 +190,10 @@ extent_slab_data_get
 extent_slab_data_get_const
 extent_slab_get
 extent_slab_set
+extent_sn_comp
+extent_sn_get
+extent_sn_set
+extent_snad_comp
 extent_split_wrapper
 extent_usize_get
 extent_usize_set
diff --git a/src/arena.c b/src/arena.c
index ef374d35..75a92edc 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -760,7 +760,7 @@ arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, size_t ndirty_limit)
 		size_t ndirty = arena_dirty_count(tsdn, arena);
 		assert(ndirty == arena->ndirty);
 	}
-	extent_init(&purge_extents_sentinel, arena, NULL, 0, 0, false, false,
+	extent_init(&purge_extents_sentinel, arena, NULL, 0, 0, 0, false, false,
 	    false, false);
 
 	npurge = arena_stash_dirty(tsdn, arena, &extent_hooks, ndirty_limit,
@@ -1351,12 +1351,12 @@ arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
 	assert(extent_slab_data_get(slab)->nfree > 0);
 
 	/*
-	 * Make sure that if bin->slabcur is non-NULL, it refers to the lowest
-	 * non-full slab.  It is okay to NULL slabcur out rather than
-	 * proactively keeping it pointing at the lowest non-full slab.
+	 * Make sure that if bin->slabcur is non-NULL, it refers to the
+	 * oldest/lowest non-full slab.  It is okay to NULL slabcur out rather
+	 * than proactively keeping it pointing at the oldest/lowest non-full
+	 * slab.
 	 */
-	if (bin->slabcur != NULL && (uintptr_t)extent_addr_get(slab) <
-	    (uintptr_t)extent_addr_get(bin->slabcur)) {
+	if (bin->slabcur != NULL && extent_snad_comp(bin->slabcur, slab) > 0) {
 		/* Switch slabcur. */
 		if (extent_slab_data_get(bin->slabcur)->nfree > 0)
 			arena_bin_slabs_nonfull_insert(bin, bin->slabcur);
@@ -1651,6 +1651,13 @@ arena_nthreads_dec(arena_t *arena, bool internal)
 	atomic_sub_u(&arena->nthreads[internal], 1);
 }
 
+size_t
+arena_extent_sn_next(arena_t *arena)
+{
+
+	return (atomic_add_zu(&arena->extent_sn_next, 1) - 1);
+}
+
 arena_t *
 arena_new(tsdn_t *tsdn, unsigned ind)
 {
@@ -1684,6 +1691,8 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 		    (size_t)(uintptr_t)arena;
 	}
 
+	arena->extent_sn_next = 0;
+
 	arena->dss_prec = extent_dss_prec_get();
 
 	arena->purging = false;
@@ -1702,7 +1711,7 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 		extent_heap_new(&arena->extents_retained[i]);
 	}
 
-	extent_init(&arena->extents_dirty, arena, NULL, 0, 0, false, false,
+	extent_init(&arena->extents_dirty, arena, NULL, 0, 0, 0, false, false,
 	    false, false);
 
 	if (malloc_mutex_init(&arena->extents_mtx, "arena_extents",
@@ -1724,8 +1733,8 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 			return (NULL);
 		bin->slabcur = NULL;
 		extent_heap_new(&bin->slabs_nonfull);
-		extent_init(&bin->slabs_full, arena, NULL, 0, 0, false, false,
-		    false, false);
+		extent_init(&bin->slabs_full, arena, NULL, 0, 0, 0, false,
+		    false, false, false);
 		if (config_stats)
 			memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
 	}
diff --git a/src/base.c b/src/base.c
index 9c3f36cd..4764d9c9 100644
--- a/src/base.c
+++ b/src/base.c
@@ -5,6 +5,7 @@
 /* Data. */
 
 static malloc_mutex_t	base_mtx;
+static size_t		base_extent_sn_next;
 static extent_heap_t	base_avail[NSIZES];
 static extent_t		*base_extents;
 static size_t		base_allocated;
@@ -37,6 +38,14 @@ base_extent_dalloc(tsdn_t *tsdn, extent_t *extent)
 	base_extents = extent;
 }
 
+static void
+base_extent_init(extent_t *extent, void *addr, size_t size)
+{
+	size_t sn = atomic_add_zu(&base_extent_sn_next, 1) - 1;
+
+	extent_init(extent, NULL, addr, size, 0, sn, true, true, true, false);
+}
+
 static extent_t *
 base_extent_alloc(tsdn_t *tsdn, size_t minsize)
 {
@@ -74,7 +83,7 @@ base_extent_alloc(tsdn_t *tsdn, size_t minsize)
 			base_resident += PAGE_CEILING(nsize);
 		}
 	}
-	extent_init(extent, NULL, addr, esize, 0, true, true, true, false);
+	base_extent_init(extent, addr, esize);
 	return (extent);
 }
 
@@ -164,6 +173,7 @@ base_boot(void)
 
 	if (malloc_mutex_init(&base_mtx, "base", WITNESS_RANK_BASE))
 		return (true);
+	base_extent_sn_next = 0;
 	for (i = 0; i < NSIZES; i++)
 		extent_heap_new(&base_avail[i]);
 	base_extents = NULL;
diff --git a/src/extent.c b/src/extent.c
index 34ac63e8..be6cadc3 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -177,17 +177,8 @@ extent_size_quantize_t *extent_size_quantize_ceil =
     JEMALLOC_N(n_extent_size_quantize_ceil);
 #endif
 
-JEMALLOC_INLINE_C int
-extent_ad_comp(const extent_t *a, const extent_t *b)
-{
-	uintptr_t a_addr = (uintptr_t)extent_addr_get(a);
-	uintptr_t b_addr = (uintptr_t)extent_addr_get(b);
-
-	return ((a_addr > b_addr) - (a_addr < b_addr));
-}
-
 /* Generate pairing heap functions. */
-ph_gen(, extent_heap_, extent_heap_t, extent_t, ph_link, extent_ad_comp)
+ph_gen(, extent_heap_, extent_heap_t, extent_t, ph_link, extent_snad_comp)
 
 static void
 extent_heaps_insert(tsdn_t *tsdn, extent_heap_t extent_heaps[NPSIZES+1],
@@ -353,8 +344,8 @@ extent_deregister(tsdn_t *tsdn, extent_t *extent)
 }
 
 /*
- * Do first-best-fit extent selection, i.e. select the lowest extent that best
- * fits.
+ * Do first-best-fit extent selection, i.e. select the oldest/lowest extent that
+ * best fits.
  */
 static extent_t *
 extent_first_best_fit(tsdn_t *tsdn, arena_t *arena,
@@ -708,7 +699,8 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 		extent_dalloc(tsdn, arena, extent);
 		return (NULL);
 	}
-	extent_init(extent, arena, addr, size, usize, true, zero, commit, slab);
+	extent_init(extent, arena, addr, size, usize,
+	    arena_extent_sn_next(arena), true, zero, commit, slab);
 	if (pad != 0)
 		extent_addr_randomize(tsdn, extent, alignment);
 	if (extent_register(tsdn, extent)) {
@@ -1036,7 +1028,7 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
 		extent_t lead;
 
 		extent_init(&lead, arena, extent_addr_get(extent), size_a,
-		    usize_a, extent_active_get(extent),
+		    usize_a, extent_sn_get(extent), extent_active_get(extent),
 		    extent_zeroed_get(extent), extent_committed_get(extent),
 		    extent_slab_get(extent));
 
@@ -1046,9 +1038,9 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
 	}
 
 	extent_init(trail, arena, (void *)((uintptr_t)extent_base_get(extent) +
-	    size_a), size_b, usize_b, extent_active_get(extent),
-	    extent_zeroed_get(extent), extent_committed_get(extent),
-	    extent_slab_get(extent));
+	    size_a), size_b, usize_b, extent_sn_get(extent),
+	    extent_active_get(extent), extent_zeroed_get(extent),
+	    extent_committed_get(extent), extent_slab_get(extent));
 	if (extent_rtree_acquire(tsdn, rtree_ctx, trail, false, true,
 	    &trail_elm_a, &trail_elm_b))
 		goto label_error_c;
@@ -1145,6 +1137,8 @@ extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
 
 	extent_size_set(a, extent_size_get(a) + extent_size_get(b));
 	extent_usize_set(a, extent_usize_get(a) + extent_usize_get(b));
+	extent_sn_set(a, (extent_sn_get(a) < extent_sn_get(b)) ?
+	    extent_sn_get(a) : extent_sn_get(b));
 	extent_zeroed_set(a, extent_zeroed_get(a) && extent_zeroed_get(b));
 
 	extent_rtree_write_acquired(tsdn, a_elm_a, b_elm_b, a);
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 31fe8fe2..1169d496 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -142,7 +142,8 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			gap_size = (uintptr_t)ret - (uintptr_t)gap_addr;
 			if (gap_size != 0) {
 				extent_init(gap, arena, gap_addr, gap_size,
-				    gap_size, false, false, true, false);
+				    gap_size, arena_extent_sn_next(arena),
+				    false, false, true, false);
 			}
 			dss_next = (void *)((uintptr_t)ret + size);
 			if ((uintptr_t)ret < (uintptr_t)max_cur ||

From 2c951545011f201c89765b9ada656e99538d95aa Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 14 Nov 2016 23:29:21 -0800
Subject: [PATCH 0519/2608] Add packing test, which verifies stable layout
 policy.

---
 Makefile.in      |   1 +
 test/unit/pack.c | 167 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 168 insertions(+)
 create mode 100644 test/unit/pack.c

diff --git a/Makefile.in b/Makefile.in
index e4aaaf21..5e6f0c1a 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -160,6 +160,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/math.c \
 	$(srcroot)test/unit/mq.c \
 	$(srcroot)test/unit/mtx.c \
+	$(srcroot)test/unit/pack.c \
 	$(srcroot)test/unit/ph.c \
 	$(srcroot)test/unit/prng.c \
 	$(srcroot)test/unit/prof_accum.c \
diff --git a/test/unit/pack.c b/test/unit/pack.c
new file mode 100644
index 00000000..8071183e
--- /dev/null
+++ b/test/unit/pack.c
@@ -0,0 +1,167 @@
+#include "test/jemalloc_test.h"
+
+/* Immediately purge to minimize fragmentation. */
+const char *malloc_conf = "decay_time:-1";
+
+/*
+ * Size class that is a divisor of the page size, ideally 4+ regions per run.
+ */
+#if LG_PAGE <= 14
+#define	SZ	(ZU(1) << (LG_PAGE - 2))
+#else
+#define	SZ	4096
+#endif
+
+/*
+ * Number of slabs to consume at high water mark.  Should be at least 2 so that
+ * if mmap()ed memory grows downward, downward growth of mmap()ed memory is
+ * tested.
+ */
+#define	NSLABS	8
+
+static unsigned
+binind_compute(void)
+{
+	size_t sz;
+	unsigned nbins, i;
+
+	sz = sizeof(nbins);
+	assert_d_eq(mallctl("arenas.nbins", &nbins, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
+
+	for (i = 0; i < nbins; i++) {
+		size_t mib[4];
+		size_t miblen = sizeof(mib)/sizeof(size_t);
+		size_t size;
+
+		assert_d_eq(mallctlnametomib("arenas.bin.0.size", mib,
+		    &miblen), 0, "Unexpected mallctlnametomb failure");
+		mib[2] = (size_t)i;
+
+		sz = sizeof(size);
+		assert_d_eq(mallctlbymib(mib, miblen, &size, &sz, NULL, 0), 0,
+		    "Unexpected mallctlbymib failure");
+		if (size == SZ)
+			return (i);
+	}
+
+	test_fail("Unable to compute nregs_per_run");
+	return (0);
+}
+
+static size_t
+nregs_per_run_compute(void)
+{
+	uint32_t nregs;
+	size_t sz;
+	unsigned binind = binind_compute();
+	size_t mib[4];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+
+	assert_d_eq(mallctlnametomib("arenas.bin.0.nregs", mib, &miblen), 0,
+	    "Unexpected mallctlnametomb failure");
+	mib[2] = (size_t)binind;
+	sz = sizeof(nregs);
+	assert_d_eq(mallctlbymib(mib, miblen, &nregs, &sz, NULL,
+	    0), 0, "Unexpected mallctlbymib failure");
+	return (nregs);
+}
+
+static unsigned
+arenas_extend_mallctl(void)
+{
+	unsigned arena_ind;
+	size_t sz;
+
+	sz = sizeof(arena_ind);
+	assert_d_eq(mallctl("arenas.extend", &arena_ind, &sz, NULL, 0), 0,
+	    "Error in arenas.extend");
+
+	return (arena_ind);
+}
+
+static void
+arena_reset_mallctl(unsigned arena_ind)
+{
+	size_t mib[3];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+
+	assert_d_eq(mallctlnametomib("arena.0.reset", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[1] = (size_t)arena_ind;
+	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+}
+
+TEST_BEGIN(test_pack)
+{
+	unsigned arena_ind = arenas_extend_mallctl();
+	size_t nregs_per_run = nregs_per_run_compute();
+	size_t nregs = nregs_per_run * NSLABS;
+	VARIABLE_ARRAY(void *, ptrs, nregs);
+	size_t i, j, offset;
+
+	/* Fill matrix. */
+	for (i = offset = 0; i < NSLABS; i++) {
+		for (j = 0; j < nregs_per_run; j++) {
+			void *p = mallocx(SZ, MALLOCX_ARENA(arena_ind) |
+			    MALLOCX_TCACHE_NONE);
+			assert_ptr_not_null(p,
+			    "Unexpected mallocx(%zu, MALLOCX_ARENA(%u) |"
+			    " MALLOCX_TCACHE_NONE) failure, run=%zu, reg=%zu",
+			    SZ, arena_ind, i, j);
+			ptrs[(i * nregs_per_run) + j] = p;
+		}
+	}
+
+	/*
+	 * Free all but one region of each run, but rotate which region is
+	 * preserved, so that subsequent allocations exercise the within-run
+	 * layout policy.
+	 */
+	offset = 0;
+	for (i = offset = 0;
+	    i < NSLABS;
+	    i++, offset = (offset + 1) % nregs_per_run) {
+		for (j = 0; j < nregs_per_run; j++) {
+			void *p = ptrs[(i * nregs_per_run) + j];
+			if (offset == j)
+				continue;
+			dallocx(p, MALLOCX_ARENA(arena_ind) |
+			    MALLOCX_TCACHE_NONE);
+		}
+	}
+
+	/*
+	 * Logically refill matrix, skipping preserved regions and verifying
+	 * that the matrix is unmodified.
+	 */
+	offset = 0;
+	for (i = offset = 0;
+	    i < NSLABS;
+	    i++, offset = (offset + 1) % nregs_per_run) {
+		for (j = 0; j < nregs_per_run; j++) {
+			void *p;
+
+			if (offset == j)
+				continue;
+			p = mallocx(SZ, MALLOCX_ARENA(arena_ind) |
+			    MALLOCX_TCACHE_NONE);
+			assert_ptr_eq(p, ptrs[(i * nregs_per_run) + j],
+			    "Unexpected refill discrepancy, run=%zu, reg=%zu\n",
+			    i, j);
+		}
+	}
+
+	/* Clean up. */
+	arena_reset_mallctl(arena_ind);
+}
+TEST_END
+
+int
+main(void)
+{
+
+	return (test(
+	    test_pack));
+}

From 5c77af98b16a0f5b15bc807f2b323a91fe2a048b Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 14 Nov 2016 18:27:23 -0800
Subject: [PATCH 0520/2608] Add extent serial numbers.

Add extent serial numbers and use them where appropriate as a sort key
that is higher priority than address, so that the allocation policy
prefers older extents.

This resolves #147.
---
 include/jemalloc/internal/arena.h             |  15 +-
 include/jemalloc/internal/chunk.h             |  11 +-
 include/jemalloc/internal/extent.h            |  41 ++++-
 include/jemalloc/internal/private_symbols.txt |  41 ++---
 src/arena.c                                   | 148 ++++++++++++------
 src/base.c                                    |  24 ++-
 src/chunk.c                                   | 115 +++++++-------
 src/chunk_dss.c                               |   3 +-
 src/extent.c                                  |  49 +++---
 src/huge.c                                    |  16 +-
 10 files changed, 299 insertions(+), 164 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index f39ce54b..30e2bdd6 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -374,10 +374,12 @@ struct arena_s {
 
 	dss_prec_t		dss_prec;
 
-
 	/* Extant arena chunks. */
 	ql_head(extent_node_t)	achunks;
 
+	/* Extent serial number generator state. */
+	size_t			extent_sn_next;
+
 	/*
 	 * In order to avoid rapid chunk allocation/deallocation when an arena
 	 * oscillates right on the cusp of needing a new chunk, cache the most
@@ -453,9 +455,9 @@ struct arena_s {
 	 * orderings are needed, which is why there are two trees with the same
 	 * contents.
 	 */
-	extent_tree_t		chunks_szad_cached;
+	extent_tree_t		chunks_szsnad_cached;
 	extent_tree_t		chunks_ad_cached;
-	extent_tree_t		chunks_szad_retained;
+	extent_tree_t		chunks_szsnad_retained;
 	extent_tree_t		chunks_ad_retained;
 
 	malloc_mutex_t		chunks_mtx;
@@ -522,13 +524,13 @@ void	arena_chunk_cache_maybe_remove(arena_t *arena, extent_node_t *node,
 extent_node_t	*arena_node_alloc(tsdn_t *tsdn, arena_t *arena);
 void	arena_node_dalloc(tsdn_t *tsdn, arena_t *arena, extent_node_t *node);
 void	*arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize,
-    size_t alignment, bool *zero);
+    size_t alignment, size_t *sn, bool *zero);
 void	arena_chunk_dalloc_huge(tsdn_t *tsdn, arena_t *arena, void *chunk,
-    size_t usize);
+    size_t usize, size_t sn);
 void	arena_chunk_ralloc_huge_similar(tsdn_t *tsdn, arena_t *arena,
     void *chunk, size_t oldsize, size_t usize);
 void	arena_chunk_ralloc_huge_shrink(tsdn_t *tsdn, arena_t *arena,
-    void *chunk, size_t oldsize, size_t usize);
+    void *chunk, size_t oldsize, size_t usize, size_t sn);
 bool	arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena,
     void *chunk, size_t oldsize, size_t usize, bool *zero);
 ssize_t	arena_lg_dirty_mult_get(tsdn_t *tsdn, arena_t *arena);
@@ -601,6 +603,7 @@ void	arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 unsigned	arena_nthreads_get(arena_t *arena, bool internal);
 void	arena_nthreads_inc(arena_t *arena, bool internal);
 void	arena_nthreads_dec(arena_t *arena, bool internal);
+size_t	arena_extent_sn_next(arena_t *arena);
 arena_t	*arena_new(tsdn_t *tsdn, unsigned ind);
 void	arena_boot(void);
 void	arena_prefork0(tsdn_t *tsdn, arena_t *arena);
diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index 38c9a012..50b9904b 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -58,15 +58,16 @@ void	chunk_deregister(const void *chunk, const extent_node_t *node);
 void	*chunk_alloc_base(size_t size);
 void	*chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
-    bool *zero, bool *commit, bool dalloc_node);
+    size_t *sn, bool *zero, bool *commit, bool dalloc_node);
 void	*chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *new_addr, size_t size, size_t alignment,
-    bool *zero, bool *commit);
+    size_t *sn, bool *zero, bool *commit);
 void	chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, void *chunk, size_t size, bool committed);
-void	chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, void *chunk, size_t size, bool zeroed,
+    chunk_hooks_t *chunk_hooks, void *chunk, size_t size, size_t sn,
     bool committed);
+void	chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
+    chunk_hooks_t *chunk_hooks, void *chunk, size_t size, size_t sn,
+    bool zeroed, bool committed);
 bool	chunk_purge_wrapper(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *chunk, size_t size, size_t offset,
     size_t length);
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 49d76a57..168ffe64 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -18,6 +18,20 @@ struct extent_node_s {
 	/* Total region size. */
 	size_t			en_size;
 
+	/*
+	 * Serial number (potentially non-unique).
+	 *
+	 * In principle serial numbers can wrap around on 32-bit systems if
+	 * JEMALLOC_MUNMAP is defined, but as long as comparison functions fall
+	 * back on address comparison for equal serial numbers, stable (if
+	 * imperfect) ordering is maintained.
+	 *
+	 * Serial numbers may not be unique even in the absence of wrap-around,
+	 * e.g. when splitting an extent and assigning the same serial number to
+	 * both resulting adjacent extents.
+	 */
+	size_t			en_sn;
+
 	/*
 	 * The zeroed flag is used by chunk recycling code to track whether
 	 * memory is zero-filled.
@@ -45,8 +59,8 @@ struct extent_node_s {
 	qr(extent_node_t)	cc_link;
 
 	union {
-		/* Linkage for the size/address-ordered tree. */
-		rb_node(extent_node_t)	szad_link;
+		/* Linkage for the size/sn/address-ordered tree. */
+		rb_node(extent_node_t)	szsnad_link;
 
 		/* Linkage for arena's achunks, huge, and node_cache lists. */
 		ql_elm(extent_node_t)	ql_link;
@@ -61,7 +75,7 @@ typedef rb_tree(extent_node_t) extent_tree_t;
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-rb_proto(, extent_tree_szad_, extent_tree_t, extent_node_t)
+rb_proto(, extent_tree_szsnad_, extent_tree_t, extent_node_t)
 
 rb_proto(, extent_tree_ad_, extent_tree_t, extent_node_t)
 
@@ -73,6 +87,7 @@ rb_proto(, extent_tree_ad_, extent_tree_t, extent_node_t)
 arena_t	*extent_node_arena_get(const extent_node_t *node);
 void	*extent_node_addr_get(const extent_node_t *node);
 size_t	extent_node_size_get(const extent_node_t *node);
+size_t	extent_node_sn_get(const extent_node_t *node);
 bool	extent_node_zeroed_get(const extent_node_t *node);
 bool	extent_node_committed_get(const extent_node_t *node);
 bool	extent_node_achunk_get(const extent_node_t *node);
@@ -80,12 +95,13 @@ prof_tctx_t	*extent_node_prof_tctx_get(const extent_node_t *node);
 void	extent_node_arena_set(extent_node_t *node, arena_t *arena);
 void	extent_node_addr_set(extent_node_t *node, void *addr);
 void	extent_node_size_set(extent_node_t *node, size_t size);
+void	extent_node_sn_set(extent_node_t *node, size_t sn);
 void	extent_node_zeroed_set(extent_node_t *node, bool zeroed);
 void	extent_node_committed_set(extent_node_t *node, bool committed);
 void	extent_node_achunk_set(extent_node_t *node, bool achunk);
 void	extent_node_prof_tctx_set(extent_node_t *node, prof_tctx_t *tctx);
 void	extent_node_init(extent_node_t *node, arena_t *arena, void *addr,
-    size_t size, bool zeroed, bool committed);
+    size_t size, size_t sn, bool zeroed, bool committed);
 void	extent_node_dirty_linkage_init(extent_node_t *node);
 void	extent_node_dirty_insert(extent_node_t *node,
     arena_runs_dirty_link_t *runs_dirty, extent_node_t *chunks_dirty);
@@ -114,6 +130,13 @@ extent_node_size_get(const extent_node_t *node)
 	return (node->en_size);
 }
 
+JEMALLOC_INLINE size_t
+extent_node_sn_get(const extent_node_t *node)
+{
+
+	return (node->en_sn);
+}
+
 JEMALLOC_INLINE bool
 extent_node_zeroed_get(const extent_node_t *node)
 {
@@ -164,6 +187,13 @@ extent_node_size_set(extent_node_t *node, size_t size)
 	node->en_size = size;
 }
 
+JEMALLOC_INLINE void
+extent_node_sn_set(extent_node_t *node, size_t sn)
+{
+
+	node->en_sn = sn;
+}
+
 JEMALLOC_INLINE void
 extent_node_zeroed_set(extent_node_t *node, bool zeroed)
 {
@@ -194,12 +224,13 @@ extent_node_prof_tctx_set(extent_node_t *node, prof_tctx_t *tctx)
 
 JEMALLOC_INLINE void
 extent_node_init(extent_node_t *node, arena_t *arena, void *addr, size_t size,
-    bool zeroed, bool committed)
+    size_t sn, bool zeroed, bool committed)
 {
 
 	extent_node_arena_set(node, arena);
 	extent_node_addr_set(node, addr);
 	extent_node_size_set(node, size);
+	extent_node_sn_set(node, sn);
 	extent_node_zeroed_set(node, zeroed);
 	extent_node_committed_set(node, committed);
 	extent_node_achunk_set(node, false);
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 87c8c9b7..71bfb94d 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -36,6 +36,7 @@ arena_decay_time_get
 arena_decay_time_set
 arena_dss_prec_get
 arena_dss_prec_set
+arena_extent_sn_next
 arena_get
 arena_ichoose
 arena_init
@@ -218,6 +219,8 @@ extent_node_prof_tctx_get
 extent_node_prof_tctx_set
 extent_node_size_get
 extent_node_size_set
+extent_node_sn_get
+extent_node_sn_set
 extent_node_zeroed_get
 extent_node_zeroed_set
 extent_tree_ad_destroy
@@ -239,25 +242,25 @@ extent_tree_ad_reverse_iter
 extent_tree_ad_reverse_iter_recurse
 extent_tree_ad_reverse_iter_start
 extent_tree_ad_search
-extent_tree_szad_destroy
-extent_tree_szad_destroy_recurse
-extent_tree_szad_empty
-extent_tree_szad_first
-extent_tree_szad_insert
-extent_tree_szad_iter
-extent_tree_szad_iter_recurse
-extent_tree_szad_iter_start
-extent_tree_szad_last
-extent_tree_szad_new
-extent_tree_szad_next
-extent_tree_szad_nsearch
-extent_tree_szad_prev
-extent_tree_szad_psearch
-extent_tree_szad_remove
-extent_tree_szad_reverse_iter
-extent_tree_szad_reverse_iter_recurse
-extent_tree_szad_reverse_iter_start
-extent_tree_szad_search
+extent_tree_szsnad_destroy
+extent_tree_szsnad_destroy_recurse
+extent_tree_szsnad_empty
+extent_tree_szsnad_first
+extent_tree_szsnad_insert
+extent_tree_szsnad_iter
+extent_tree_szsnad_iter_recurse
+extent_tree_szsnad_iter_start
+extent_tree_szsnad_last
+extent_tree_szsnad_new
+extent_tree_szsnad_next
+extent_tree_szsnad_nsearch
+extent_tree_szsnad_prev
+extent_tree_szsnad_psearch
+extent_tree_szsnad_remove
+extent_tree_szsnad_reverse_iter
+extent_tree_szsnad_reverse_iter_recurse
+extent_tree_szsnad_reverse_iter_start
+extent_tree_szsnad_search
 ffs_llu
 ffs_lu
 ffs_u
diff --git a/src/arena.c b/src/arena.c
index e196b133..a3f9899f 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -38,8 +38,8 @@ static void	arena_run_dalloc(tsdn_t *tsdn, arena_t *arena, arena_run_t *run,
     bool dirty, bool cleaned, bool decommitted);
 static void	arena_dalloc_bin_run(tsdn_t *tsdn, arena_t *arena,
     arena_chunk_t *chunk, arena_run_t *run, arena_bin_t *bin);
-static void	arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk,
-    arena_run_t *run, arena_bin_t *bin);
+static void	arena_bin_lower_run(arena_t *arena, arena_run_t *run,
+    arena_bin_t *bin);
 
 /******************************************************************************/
 
@@ -55,8 +55,31 @@ arena_miscelm_size_get(const arena_chunk_map_misc_t *miscelm)
 	return (arena_mapbits_size_decode(mapbits));
 }
 
+JEMALLOC_INLINE_C const extent_node_t *
+arena_miscelm_extent_get(const arena_chunk_map_misc_t *miscelm)
+{
+	arena_chunk_t *chunk;
+
+	chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(miscelm);
+	return (&chunk->node);
+}
+
 JEMALLOC_INLINE_C int
-arena_run_addr_comp(const arena_chunk_map_misc_t *a,
+arena_sn_comp(const arena_chunk_map_misc_t *a, const arena_chunk_map_misc_t *b)
+{
+	uint64_t a_sn, b_sn;
+
+	assert(a != NULL);
+	assert(b != NULL);
+
+	a_sn = extent_node_sn_get(arena_miscelm_extent_get(a));
+	b_sn = extent_node_sn_get(arena_miscelm_extent_get(b));
+
+	return ((a_sn > b_sn) - (a_sn < b_sn));
+}
+
+JEMALLOC_INLINE_C int
+arena_ad_comp(const arena_chunk_map_misc_t *a,
     const arena_chunk_map_misc_t *b)
 {
 	uintptr_t a_miscelm = (uintptr_t)a;
@@ -68,9 +91,26 @@ arena_run_addr_comp(const arena_chunk_map_misc_t *a,
 	return ((a_miscelm > b_miscelm) - (a_miscelm < b_miscelm));
 }
 
+JEMALLOC_INLINE_C int
+arena_snad_comp(const arena_chunk_map_misc_t *a,
+    const arena_chunk_map_misc_t *b)
+{
+	int ret;
+
+	assert(a != NULL);
+	assert(b != NULL);
+
+	ret = arena_sn_comp(a, b);
+	if (ret != 0)
+		return (ret);
+
+	ret = arena_ad_comp(a, b);
+	return (ret);
+}
+
 /* Generate pairing heap functions. */
 ph_gen(static UNUSED, arena_run_heap_, arena_run_heap_t, arena_chunk_map_misc_t,
-    ph_link, arena_run_addr_comp)
+    ph_link, arena_snad_comp)
 
 #ifdef JEMALLOC_JET
 #undef run_quantize_floor
@@ -529,7 +569,7 @@ arena_chunk_init_spare(arena_t *arena)
 
 static bool
 arena_chunk_register(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
-    bool zero)
+    size_t sn, bool zero)
 {
 
 	/*
@@ -538,7 +578,7 @@ arena_chunk_register(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
 	 * of runs is tracked individually, and upon chunk deallocation the
 	 * entire chunk is in a consistent commit state.
 	 */
-	extent_node_init(&chunk->node, arena, chunk, chunksize, zero, true);
+	extent_node_init(&chunk->node, arena, chunk, chunksize, sn, zero, true);
 	extent_node_achunk_set(&chunk->node, true);
 	return (chunk_register(tsdn, chunk, &chunk->node));
 }
@@ -548,28 +588,30 @@ arena_chunk_alloc_internal_hard(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, bool *zero, bool *commit)
 {
 	arena_chunk_t *chunk;
+	size_t sn;
 
 	malloc_mutex_unlock(tsdn, &arena->lock);
 
 	chunk = (arena_chunk_t *)chunk_alloc_wrapper(tsdn, arena, chunk_hooks,
-	    NULL, chunksize, chunksize, zero, commit);
+	    NULL, chunksize, chunksize, &sn, zero, commit);
 	if (chunk != NULL && !*commit) {
 		/* Commit header. */
 		if (chunk_hooks->commit(chunk, chunksize, 0, map_bias <<
 		    LG_PAGE, arena->ind)) {
 			chunk_dalloc_wrapper(tsdn, arena, chunk_hooks,
-			    (void *)chunk, chunksize, *zero, *commit);
+			    (void *)chunk, chunksize, sn, *zero, *commit);
 			chunk = NULL;
 		}
 	}
-	if (chunk != NULL && arena_chunk_register(tsdn, arena, chunk, *zero)) {
+	if (chunk != NULL && arena_chunk_register(tsdn, arena, chunk, sn,
+	    *zero)) {
 		if (!*commit) {
 			/* Undo commit of header. */
 			chunk_hooks->decommit(chunk, chunksize, 0, map_bias <<
 			    LG_PAGE, arena->ind);
 		}
 		chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, (void *)chunk,
-		    chunksize, *zero, *commit);
+		    chunksize, sn, *zero, *commit);
 		chunk = NULL;
 	}
 
@@ -583,13 +625,14 @@ arena_chunk_alloc_internal(tsdn_t *tsdn, arena_t *arena, bool *zero,
 {
 	arena_chunk_t *chunk;
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
+	size_t sn;
 
 	chunk = chunk_alloc_cache(tsdn, arena, &chunk_hooks, NULL, chunksize,
-	    chunksize, zero, commit, true);
+	    chunksize, &sn, zero, commit, true);
 	if (chunk != NULL) {
-		if (arena_chunk_register(tsdn, arena, chunk, *zero)) {
+		if (arena_chunk_register(tsdn, arena, chunk, sn, *zero)) {
 			chunk_dalloc_cache(tsdn, arena, &chunk_hooks, chunk,
-			    chunksize, true);
+			    chunksize, sn, true);
 			return (NULL);
 		}
 	}
@@ -684,11 +727,13 @@ arena_chunk_alloc(tsdn_t *tsdn, arena_t *arena)
 static void
 arena_chunk_discard(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk)
 {
+	size_t sn;
 	bool committed;
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 
 	chunk_deregister(chunk, &chunk->node);
 
+	sn = extent_node_sn_get(&chunk->node);
 	committed = (arena_mapbits_decommitted_get(chunk, map_bias) == 0);
 	if (!committed) {
 		/*
@@ -703,7 +748,7 @@ arena_chunk_discard(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk)
 	}
 
 	chunk_dalloc_cache(tsdn, arena, &chunk_hooks, (void *)chunk, chunksize,
-	    committed);
+	    sn, committed);
 
 	if (config_stats) {
 		arena->stats.mapped -= chunksize;
@@ -859,14 +904,14 @@ arena_node_dalloc(tsdn_t *tsdn, arena_t *arena, extent_node_t *node)
 
 static void *
 arena_chunk_alloc_huge_hard(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, size_t usize, size_t alignment, bool *zero,
-    size_t csize)
+    chunk_hooks_t *chunk_hooks, size_t usize, size_t alignment, size_t *sn,
+    bool *zero, size_t csize)
 {
 	void *ret;
 	bool commit = true;
 
 	ret = chunk_alloc_wrapper(tsdn, arena, chunk_hooks, NULL, csize,
-	    alignment, zero, &commit);
+	    alignment, sn, zero, &commit);
 	if (ret == NULL) {
 		/* Revert optimistic stats updates. */
 		malloc_mutex_lock(tsdn, &arena->lock);
@@ -883,7 +928,7 @@ arena_chunk_alloc_huge_hard(tsdn_t *tsdn, arena_t *arena,
 
 void *
 arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize,
-    size_t alignment, bool *zero)
+    size_t alignment, size_t *sn, bool *zero)
 {
 	void *ret;
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
@@ -900,18 +945,19 @@ arena_chunk_alloc_huge(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	arena_nactive_add(arena, usize >> LG_PAGE);
 
 	ret = chunk_alloc_cache(tsdn, arena, &chunk_hooks, NULL, csize,
-	    alignment, zero, &commit, true);
+	    alignment, sn, zero, &commit, true);
 	malloc_mutex_unlock(tsdn, &arena->lock);
 	if (ret == NULL) {
 		ret = arena_chunk_alloc_huge_hard(tsdn, arena, &chunk_hooks,
-		    usize, alignment, zero, csize);
+		    usize, alignment, sn, zero, csize);
 	}
 
 	return (ret);
 }
 
 void
-arena_chunk_dalloc_huge(tsdn_t *tsdn, arena_t *arena, void *chunk, size_t usize)
+arena_chunk_dalloc_huge(tsdn_t *tsdn, arena_t *arena, void *chunk, size_t usize,
+    size_t sn)
 {
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 	size_t csize;
@@ -924,7 +970,7 @@ arena_chunk_dalloc_huge(tsdn_t *tsdn, arena_t *arena, void *chunk, size_t usize)
 	}
 	arena_nactive_sub(arena, usize >> LG_PAGE);
 
-	chunk_dalloc_cache(tsdn, arena, &chunk_hooks, chunk, csize, true);
+	chunk_dalloc_cache(tsdn, arena, &chunk_hooks, chunk, csize, sn, true);
 	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
@@ -948,7 +994,7 @@ arena_chunk_ralloc_huge_similar(tsdn_t *tsdn, arena_t *arena, void *chunk,
 
 void
 arena_chunk_ralloc_huge_shrink(tsdn_t *tsdn, arena_t *arena, void *chunk,
-    size_t oldsize, size_t usize)
+    size_t oldsize, size_t usize, size_t sn)
 {
 	size_t udiff = oldsize - usize;
 	size_t cdiff = CHUNK_CEILING(oldsize) - CHUNK_CEILING(usize);
@@ -967,7 +1013,7 @@ arena_chunk_ralloc_huge_shrink(tsdn_t *tsdn, arena_t *arena, void *chunk,
 		    CHUNK_CEILING(usize));
 
 		chunk_dalloc_cache(tsdn, arena, &chunk_hooks, nchunk, cdiff,
-		    true);
+		    sn, true);
 	}
 	malloc_mutex_unlock(tsdn, &arena->lock);
 }
@@ -975,13 +1021,13 @@ arena_chunk_ralloc_huge_shrink(tsdn_t *tsdn, arena_t *arena, void *chunk,
 static bool
 arena_chunk_ralloc_huge_expand_hard(tsdn_t *tsdn, arena_t *arena,
     chunk_hooks_t *chunk_hooks, void *chunk, size_t oldsize, size_t usize,
-    bool *zero, void *nchunk, size_t udiff, size_t cdiff)
+    size_t *sn, bool *zero, void *nchunk, size_t udiff, size_t cdiff)
 {
 	bool err;
 	bool commit = true;
 
 	err = (chunk_alloc_wrapper(tsdn, arena, chunk_hooks, nchunk, cdiff,
-	    chunksize, zero, &commit) == NULL);
+	    chunksize, sn, zero, &commit) == NULL);
 	if (err) {
 		/* Revert optimistic stats updates. */
 		malloc_mutex_lock(tsdn, &arena->lock);
@@ -995,7 +1041,7 @@ arena_chunk_ralloc_huge_expand_hard(tsdn_t *tsdn, arena_t *arena,
 	} else if (chunk_hooks->merge(chunk, CHUNK_CEILING(oldsize), nchunk,
 	    cdiff, true, arena->ind)) {
 		chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, nchunk, cdiff,
-		    *zero, true);
+		    *sn, *zero, true);
 		err = true;
 	}
 	return (err);
@@ -1010,6 +1056,7 @@ arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena, void *chunk,
 	void *nchunk = (void *)((uintptr_t)chunk + CHUNK_CEILING(oldsize));
 	size_t udiff = usize - oldsize;
 	size_t cdiff = CHUNK_CEILING(usize) - CHUNK_CEILING(oldsize);
+	size_t sn;
 	bool commit = true;
 
 	malloc_mutex_lock(tsdn, &arena->lock);
@@ -1022,16 +1069,16 @@ arena_chunk_ralloc_huge_expand(tsdn_t *tsdn, arena_t *arena, void *chunk,
 	arena_nactive_add(arena, udiff >> LG_PAGE);
 
 	err = (chunk_alloc_cache(tsdn, arena, &chunk_hooks, nchunk, cdiff,
-	    chunksize, zero, &commit, true) == NULL);
+	    chunksize, &sn, zero, &commit, true) == NULL);
 	malloc_mutex_unlock(tsdn, &arena->lock);
 	if (err) {
 		err = arena_chunk_ralloc_huge_expand_hard(tsdn, arena,
-		    &chunk_hooks, chunk, oldsize, usize, zero, nchunk, udiff,
-		    cdiff);
+		    &chunk_hooks, chunk, oldsize, usize, &sn, zero, nchunk,
+		    udiff, cdiff);
 	} else if (chunk_hooks.merge(chunk, CHUNK_CEILING(oldsize), nchunk,
 	    cdiff, true, arena->ind)) {
 		chunk_dalloc_wrapper(tsdn, arena, &chunk_hooks, nchunk, cdiff,
-		    *zero, true);
+		    sn, *zero, true);
 		err = true;
 	}
 
@@ -1519,6 +1566,7 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 
 		if (rdelm == &chunkselm->rd) {
 			extent_node_t *chunkselm_next;
+			size_t sn;
 			bool zero, commit;
 			UNUSED void *chunk;
 
@@ -1536,8 +1584,8 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			commit = false;
 			chunk = chunk_alloc_cache(tsdn, arena, chunk_hooks,
 			    extent_node_addr_get(chunkselm),
-			    extent_node_size_get(chunkselm), chunksize, &zero,
-			    &commit, false);
+			    extent_node_size_get(chunkselm), chunksize, &sn,
+			    &zero, &commit, false);
 			assert(chunk == extent_node_addr_get(chunkselm));
 			assert(zero == extent_node_zeroed_get(chunkselm));
 			extent_node_dirty_insert(chunkselm, purge_runs_sentinel,
@@ -1703,13 +1751,14 @@ arena_unstash_purged(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			    cc_link);
 			void *addr = extent_node_addr_get(chunkselm);
 			size_t size = extent_node_size_get(chunkselm);
+			size_t sn = extent_node_sn_get(chunkselm);
 			bool zeroed = extent_node_zeroed_get(chunkselm);
 			bool committed = extent_node_committed_get(chunkselm);
 			extent_node_dirty_remove(chunkselm);
 			arena_node_dalloc(tsdn, arena, chunkselm);
 			chunkselm = chunkselm_next;
 			chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, addr,
-			    size, zeroed, committed);
+			    size, sn, zeroed, committed);
 		} else {
 			arena_chunk_t *chunk =
 			    (arena_chunk_t *)CHUNK_ADDR2BASE(rdelm);
@@ -2315,7 +2364,7 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin)
 				arena_dalloc_bin_run(tsdn, arena, chunk, run,
 				    bin);
 			} else
-				arena_bin_lower_run(arena, chunk, run, bin);
+				arena_bin_lower_run(arena, run, bin);
 		}
 		return (ret);
 	}
@@ -2820,16 +2869,18 @@ arena_dalloc_bin_run(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
 }
 
 static void
-arena_bin_lower_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
-    arena_bin_t *bin)
+arena_bin_lower_run(arena_t *arena, arena_run_t *run, arena_bin_t *bin)
 {
 
 	/*
-	 * Make sure that if bin->runcur is non-NULL, it refers to the lowest
-	 * non-full run.  It is okay to NULL runcur out rather than proactively
-	 * keeping it pointing at the lowest non-full run.
+	 * Make sure that if bin->runcur is non-NULL, it refers to the
+	 * oldest/lowest non-full run.  It is okay to NULL runcur out rather
+	 * than proactively keeping it pointing at the oldest/lowest non-full
+	 * run.
 	 */
-	if ((uintptr_t)run < (uintptr_t)bin->runcur) {
+	if (bin->runcur != NULL &&
+	    arena_snad_comp(arena_run_to_miscelm(bin->runcur),
+	    arena_run_to_miscelm(run)) > 0) {
 		/* Switch runcur. */
 		if (bin->runcur->nfree > 0)
 			arena_bin_runs_insert(bin, bin->runcur);
@@ -2865,7 +2916,7 @@ arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
 		arena_dissociate_bin_run(chunk, run, bin);
 		arena_dalloc_bin_run(tsdn, arena, chunk, run, bin);
 	} else if (run->nfree == 1 && run != bin->runcur)
-		arena_bin_lower_run(arena, chunk, run, bin);
+		arena_bin_lower_run(arena, run, bin);
 
 	if (config_stats) {
 		bin->stats.ndalloc++;
@@ -3452,6 +3503,13 @@ arena_nthreads_dec(arena_t *arena, bool internal)
 	atomic_sub_u(&arena->nthreads[internal], 1);
 }
 
+size_t
+arena_extent_sn_next(arena_t *arena)
+{
+
+	return (atomic_add_z(&arena->extent_sn_next, 1) - 1);
+}
+
 arena_t *
 arena_new(tsdn_t *tsdn, unsigned ind)
 {
@@ -3511,6 +3569,8 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 
 	ql_new(&arena->achunks);
 
+	arena->extent_sn_next = 0;
+
 	arena->spare = NULL;
 
 	arena->lg_dirty_mult = arena_lg_dirty_mult_default_get();
@@ -3532,9 +3592,9 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 	    WITNESS_RANK_ARENA_HUGE))
 		return (NULL);
 
-	extent_tree_szad_new(&arena->chunks_szad_cached);
+	extent_tree_szsnad_new(&arena->chunks_szsnad_cached);
 	extent_tree_ad_new(&arena->chunks_ad_cached);
-	extent_tree_szad_new(&arena->chunks_szad_retained);
+	extent_tree_szsnad_new(&arena->chunks_szsnad_retained);
 	extent_tree_ad_new(&arena->chunks_ad_retained);
 	if (malloc_mutex_init(&arena->chunks_mtx, "arena_chunks",
 	    WITNESS_RANK_ARENA_CHUNKS))
diff --git a/src/base.c b/src/base.c
index 81b0801f..5681a3f3 100644
--- a/src/base.c
+++ b/src/base.c
@@ -5,7 +5,8 @@
 /* Data. */
 
 static malloc_mutex_t	base_mtx;
-static extent_tree_t	base_avail_szad;
+static size_t		base_extent_sn_next;
+static extent_tree_t	base_avail_szsnad;
 static extent_node_t	*base_nodes;
 static size_t		base_allocated;
 static size_t		base_resident;
@@ -39,6 +40,14 @@ base_node_dalloc(tsdn_t *tsdn, extent_node_t *node)
 	base_nodes = node;
 }
 
+static void
+base_extent_node_init(extent_node_t *node, void *addr, size_t size)
+{
+	size_t sn = atomic_add_z(&base_extent_sn_next, 1) - 1;
+
+	extent_node_init(node, NULL, addr, size, sn, true, true);
+}
+
 static extent_node_t *
 base_chunk_alloc(tsdn_t *tsdn, size_t minsize)
 {
@@ -68,7 +77,7 @@ base_chunk_alloc(tsdn_t *tsdn, size_t minsize)
 			base_resident += PAGE_CEILING(nsize);
 		}
 	}
-	extent_node_init(node, NULL, addr, csize, true, true);
+	base_extent_node_init(node, addr, csize);
 	return (node);
 }
 
@@ -92,12 +101,12 @@ base_alloc(tsdn_t *tsdn, size_t size)
 	csize = CACHELINE_CEILING(size);
 
 	usize = s2u(csize);
-	extent_node_init(&key, NULL, NULL, usize, false, false);
+	extent_node_init(&key, NULL, NULL, usize, 0, false, false);
 	malloc_mutex_lock(tsdn, &base_mtx);
-	node = extent_tree_szad_nsearch(&base_avail_szad, &key);
+	node = extent_tree_szsnad_nsearch(&base_avail_szsnad, &key);
 	if (node != NULL) {
 		/* Use existing space. */
-		extent_tree_szad_remove(&base_avail_szad, node);
+		extent_tree_szsnad_remove(&base_avail_szsnad, node);
 	} else {
 		/* Try to allocate more space. */
 		node = base_chunk_alloc(tsdn, csize);
@@ -111,7 +120,7 @@ base_alloc(tsdn_t *tsdn, size_t size)
 	if (extent_node_size_get(node) > csize) {
 		extent_node_addr_set(node, (void *)((uintptr_t)ret + csize));
 		extent_node_size_set(node, extent_node_size_get(node) - csize);
-		extent_tree_szad_insert(&base_avail_szad, node);
+		extent_tree_szsnad_insert(&base_avail_szsnad, node);
 	} else
 		base_node_dalloc(tsdn, node);
 	if (config_stats) {
@@ -149,7 +158,8 @@ base_boot(void)
 
 	if (malloc_mutex_init(&base_mtx, "base", WITNESS_RANK_BASE))
 		return (true);
-	extent_tree_szad_new(&base_avail_szad);
+	base_extent_sn_next = 0;
+	extent_tree_szsnad_new(&base_avail_szsnad);
 	base_nodes = NULL;
 
 	return (false);
diff --git a/src/chunk.c b/src/chunk.c
index d7002873..c1c514a8 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -50,9 +50,9 @@ const chunk_hooks_t	chunk_hooks_default = {
  */
 
 static void	chunk_record(tsdn_t *tsdn, arena_t *arena,
-    chunk_hooks_t *chunk_hooks, extent_tree_t *chunks_szad,
-    extent_tree_t *chunks_ad, bool cache, void *chunk, size_t size, bool zeroed,
-    bool committed);
+    chunk_hooks_t *chunk_hooks, extent_tree_t *chunks_szsnad,
+    extent_tree_t *chunks_ad, bool cache, void *chunk, size_t size, size_t sn,
+    bool zeroed, bool committed);
 
 /******************************************************************************/
 
@@ -183,26 +183,25 @@ chunk_deregister(const void *chunk, const extent_node_t *node)
 }
 
 /*
- * Do first-best-fit chunk selection, i.e. select the lowest chunk that best
- * fits.
+ * Do first-best-fit chunk selection, i.e. select the oldest/lowest chunk that
+ * best fits.
  */
 static extent_node_t *
-chunk_first_best_fit(arena_t *arena, extent_tree_t *chunks_szad,
-    extent_tree_t *chunks_ad, size_t size)
+chunk_first_best_fit(arena_t *arena, extent_tree_t *chunks_szsnad, size_t size)
 {
 	extent_node_t key;
 
 	assert(size == CHUNK_CEILING(size));
 
-	extent_node_init(&key, arena, NULL, size, false, false);
-	return (extent_tree_szad_nsearch(chunks_szad, &key));
+	extent_node_init(&key, arena, NULL, size, 0, false, false);
+	return (extent_tree_szsnad_nsearch(chunks_szsnad, &key));
 }
 
 static void *
 chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
-    extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, bool cache,
-    void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit,
-    bool dalloc_node)
+    extent_tree_t *chunks_szsnad, extent_tree_t *chunks_ad, bool cache,
+    void *new_addr, size_t size, size_t alignment, size_t *sn, bool *zero,
+    bool *commit, bool dalloc_node)
 {
 	void *ret;
 	extent_node_t *node;
@@ -228,12 +227,11 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	chunk_hooks_assure_initialized_locked(tsdn, arena, chunk_hooks);
 	if (new_addr != NULL) {
 		extent_node_t key;
-		extent_node_init(&key, arena, new_addr, alloc_size, false,
+		extent_node_init(&key, arena, new_addr, alloc_size, 0, false,
 		    false);
 		node = extent_tree_ad_search(chunks_ad, &key);
 	} else {
-		node = chunk_first_best_fit(arena, chunks_szad, chunks_ad,
-		    alloc_size);
+		node = chunk_first_best_fit(arena, chunks_szsnad, alloc_size);
 	}
 	if (node == NULL || (new_addr != NULL && extent_node_size_get(node) <
 	    size)) {
@@ -246,6 +244,7 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	assert(extent_node_size_get(node) >= leadsize + size);
 	trailsize = extent_node_size_get(node) - leadsize - size;
 	ret = (void *)((uintptr_t)extent_node_addr_get(node) + leadsize);
+	*sn = extent_node_sn_get(node);
 	zeroed = extent_node_zeroed_get(node);
 	if (zeroed)
 		*zero = true;
@@ -260,13 +259,13 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		return (NULL);
 	}
 	/* Remove node from the tree. */
-	extent_tree_szad_remove(chunks_szad, node);
+	extent_tree_szsnad_remove(chunks_szsnad, node);
 	extent_tree_ad_remove(chunks_ad, node);
 	arena_chunk_cache_maybe_remove(arena, node, cache);
 	if (leadsize != 0) {
 		/* Insert the leading space as a smaller chunk. */
 		extent_node_size_set(node, leadsize);
-		extent_tree_szad_insert(chunks_szad, node);
+		extent_tree_szsnad_insert(chunks_szsnad, node);
 		extent_tree_ad_insert(chunks_ad, node);
 		arena_chunk_cache_maybe_insert(arena, node, cache);
 		node = NULL;
@@ -278,9 +277,9 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			if (dalloc_node && node != NULL)
 				arena_node_dalloc(tsdn, arena, node);
 			malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
-			chunk_record(tsdn, arena, chunk_hooks, chunks_szad,
-			    chunks_ad, cache, ret, size + trailsize, zeroed,
-			    committed);
+			chunk_record(tsdn, arena, chunk_hooks, chunks_szsnad,
+			    chunks_ad, cache, ret, size + trailsize, *sn,
+			    zeroed, committed);
 			return (NULL);
 		}
 		/* Insert the trailing space as a smaller chunk. */
@@ -289,22 +288,22 @@ chunk_recycle(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			if (node == NULL) {
 				malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
 				chunk_record(tsdn, arena, chunk_hooks,
-				    chunks_szad, chunks_ad, cache, ret, size +
-				    trailsize, zeroed, committed);
+				    chunks_szsnad, chunks_ad, cache, ret, size
+				    + trailsize, *sn, zeroed, committed);
 				return (NULL);
 			}
 		}
 		extent_node_init(node, arena, (void *)((uintptr_t)(ret) + size),
-		    trailsize, zeroed, committed);
-		extent_tree_szad_insert(chunks_szad, node);
+		    trailsize, *sn, zeroed, committed);
+		extent_tree_szsnad_insert(chunks_szsnad, node);
 		extent_tree_ad_insert(chunks_ad, node);
 		arena_chunk_cache_maybe_insert(arena, node, cache);
 		node = NULL;
 	}
 	if (!committed && chunk_hooks->commit(ret, size, 0, size, arena->ind)) {
 		malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
-		chunk_record(tsdn, arena, chunk_hooks, chunks_szad, chunks_ad,
-		    cache, ret, size, zeroed, committed);
+		chunk_record(tsdn, arena, chunk_hooks, chunks_szsnad, chunks_ad,
+		    cache, ret, size, *sn, zeroed, committed);
 		return (NULL);
 	}
 	malloc_mutex_unlock(tsdn, &arena->chunks_mtx);
@@ -388,8 +387,8 @@ chunk_alloc_base(size_t size)
 
 void *
 chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit,
-    bool dalloc_node)
+    void *new_addr, size_t size, size_t alignment, size_t *sn, bool *zero,
+    bool *commit, bool dalloc_node)
 {
 	void *ret;
 
@@ -399,8 +398,8 @@ chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	assert((alignment & chunksize_mask) == 0);
 
 	ret = chunk_recycle(tsdn, arena, chunk_hooks,
-	    &arena->chunks_szad_cached, &arena->chunks_ad_cached, true,
-	    new_addr, size, alignment, zero, commit, dalloc_node);
+	    &arena->chunks_szsnad_cached, &arena->chunks_ad_cached, true,
+	    new_addr, size, alignment, sn, zero, commit, dalloc_node);
 	if (ret == NULL)
 		return (NULL);
 	if (config_valgrind)
@@ -454,7 +453,8 @@ chunk_alloc_default(void *new_addr, size_t size, size_t alignment, bool *zero,
 
 static void *
 chunk_alloc_retained(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit)
+    void *new_addr, size_t size, size_t alignment, size_t *sn, bool *zero,
+    bool *commit)
 {
 	void *ret;
 
@@ -464,8 +464,8 @@ chunk_alloc_retained(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	assert((alignment & chunksize_mask) == 0);
 
 	ret = chunk_recycle(tsdn, arena, chunk_hooks,
-	    &arena->chunks_szad_retained, &arena->chunks_ad_retained, false,
-	    new_addr, size, alignment, zero, commit, true);
+	    &arena->chunks_szsnad_retained, &arena->chunks_ad_retained, false,
+	    new_addr, size, alignment, sn, zero, commit, true);
 
 	if (config_stats && ret != NULL)
 		arena->stats.retained -= size;
@@ -475,14 +475,15 @@ chunk_alloc_retained(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 
 void *
 chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit)
+    void *new_addr, size_t size, size_t alignment, size_t *sn, bool *zero,
+    bool *commit)
 {
 	void *ret;
 
 	chunk_hooks_assure_initialized(tsdn, arena, chunk_hooks);
 
 	ret = chunk_alloc_retained(tsdn, arena, chunk_hooks, new_addr, size,
-	    alignment, zero, commit);
+	    alignment, sn, zero, commit);
 	if (ret == NULL) {
 		if (chunk_hooks->alloc == chunk_alloc_default) {
 			/* Call directly to propagate tsdn. */
@@ -496,6 +497,8 @@ chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		if (ret == NULL)
 			return (NULL);
 
+		*sn = arena_extent_sn_next(arena);
+
 		if (config_valgrind && chunk_hooks->alloc !=
 		    chunk_alloc_default)
 			JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(ret, chunksize);
@@ -506,8 +509,8 @@ chunk_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 
 static void
 chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
-    extent_tree_t *chunks_szad, extent_tree_t *chunks_ad, bool cache,
-    void *chunk, size_t size, bool zeroed, bool committed)
+    extent_tree_t *chunks_szsnad, extent_tree_t *chunks_ad, bool cache,
+    void *chunk, size_t size, size_t sn, bool zeroed, bool committed)
 {
 	bool unzeroed;
 	extent_node_t *node, *prev;
@@ -519,7 +522,7 @@ chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 
 	malloc_mutex_lock(tsdn, &arena->chunks_mtx);
 	chunk_hooks_assure_initialized_locked(tsdn, arena, chunk_hooks);
-	extent_node_init(&key, arena, (void *)((uintptr_t)chunk + size), 0,
+	extent_node_init(&key, arena, (void *)((uintptr_t)chunk + size), 0, 0,
 	    false, false);
 	node = extent_tree_ad_nsearch(chunks_ad, &key);
 	/* Try to coalesce forward. */
@@ -531,15 +534,17 @@ chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		/*
 		 * Coalesce chunk with the following address range.  This does
 		 * not change the position within chunks_ad, so only
-		 * remove/insert from/into chunks_szad.
+		 * remove/insert from/into chunks_szsnad.
 		 */
-		extent_tree_szad_remove(chunks_szad, node);
+		extent_tree_szsnad_remove(chunks_szsnad, node);
 		arena_chunk_cache_maybe_remove(arena, node, cache);
 		extent_node_addr_set(node, chunk);
 		extent_node_size_set(node, size + extent_node_size_get(node));
+		if (sn < extent_node_sn_get(node))
+			extent_node_sn_set(node, sn);
 		extent_node_zeroed_set(node, extent_node_zeroed_get(node) &&
 		    !unzeroed);
-		extent_tree_szad_insert(chunks_szad, node);
+		extent_tree_szsnad_insert(chunks_szsnad, node);
 		arena_chunk_cache_maybe_insert(arena, node, cache);
 	} else {
 		/* Coalescing forward failed, so insert a new node. */
@@ -557,10 +562,10 @@ chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			}
 			goto label_return;
 		}
-		extent_node_init(node, arena, chunk, size, !unzeroed,
+		extent_node_init(node, arena, chunk, size, sn, !unzeroed,
 		    committed);
 		extent_tree_ad_insert(chunks_ad, node);
-		extent_tree_szad_insert(chunks_szad, node);
+		extent_tree_szsnad_insert(chunks_szsnad, node);
 		arena_chunk_cache_maybe_insert(arena, node, cache);
 	}
 
@@ -574,19 +579,21 @@ chunk_record(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 		/*
 		 * Coalesce chunk with the previous address range.  This does
 		 * not change the position within chunks_ad, so only
-		 * remove/insert node from/into chunks_szad.
+		 * remove/insert node from/into chunks_szsnad.
 		 */
-		extent_tree_szad_remove(chunks_szad, prev);
+		extent_tree_szsnad_remove(chunks_szsnad, prev);
 		extent_tree_ad_remove(chunks_ad, prev);
 		arena_chunk_cache_maybe_remove(arena, prev, cache);
-		extent_tree_szad_remove(chunks_szad, node);
+		extent_tree_szsnad_remove(chunks_szsnad, node);
 		arena_chunk_cache_maybe_remove(arena, node, cache);
 		extent_node_addr_set(node, extent_node_addr_get(prev));
 		extent_node_size_set(node, extent_node_size_get(prev) +
 		    extent_node_size_get(node));
+		if (extent_node_sn_get(prev) < extent_node_sn_get(node))
+			extent_node_sn_set(node, extent_node_sn_get(prev));
 		extent_node_zeroed_set(node, extent_node_zeroed_get(prev) &&
 		    extent_node_zeroed_get(node));
-		extent_tree_szad_insert(chunks_szad, node);
+		extent_tree_szsnad_insert(chunks_szsnad, node);
 		arena_chunk_cache_maybe_insert(arena, node, cache);
 
 		arena_node_dalloc(tsdn, arena, prev);
@@ -598,7 +605,7 @@ label_return:
 
 void
 chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *chunk, size_t size, bool committed)
+    void *chunk, size_t size, size_t sn, bool committed)
 {
 
 	assert(chunk != NULL);
@@ -606,8 +613,9 @@ chunk_dalloc_cache(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	assert(size != 0);
 	assert((size & chunksize_mask) == 0);
 
-	chunk_record(tsdn, arena, chunk_hooks, &arena->chunks_szad_cached,
-	    &arena->chunks_ad_cached, true, chunk, size, false, committed);
+	chunk_record(tsdn, arena, chunk_hooks, &arena->chunks_szsnad_cached,
+	    &arena->chunks_ad_cached, true, chunk, size, sn, false,
+	    committed);
 	arena_maybe_purge(tsdn, arena);
 }
 
@@ -630,7 +638,7 @@ chunk_dalloc_default(void *chunk, size_t size, bool committed,
 
 void
 chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
-    void *chunk, size_t size, bool zeroed, bool committed)
+    void *chunk, size_t size, size_t sn, bool zeroed, bool committed)
 {
 	bool err;
 
@@ -656,8 +664,9 @@ chunk_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 	}
 	zeroed = !committed || !chunk_hooks->purge(chunk, size, 0, size,
 	    arena->ind);
-	chunk_record(tsdn, arena, chunk_hooks, &arena->chunks_szad_retained,
-	    &arena->chunks_ad_retained, false, chunk, size, zeroed, committed);
+	chunk_record(tsdn, arena, chunk_hooks, &arena->chunks_szsnad_retained,
+	    &arena->chunks_ad_retained, false, chunk, size, sn, zeroed,
+	    committed);
 
 	if (config_stats)
 		arena->stats.retained += size;
diff --git a/src/chunk_dss.c b/src/chunk_dss.c
index 85a13548..ee3f8388 100644
--- a/src/chunk_dss.c
+++ b/src/chunk_dss.c
@@ -162,7 +162,8 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					    CHUNK_HOOKS_INITIALIZER;
 					chunk_dalloc_wrapper(tsdn, arena,
 					    &chunk_hooks, cpad, cpad_size,
-					    false, true);
+					    arena_extent_sn_next(arena), false,
+					    true);
 				}
 				if (*zero) {
 					JEMALLOC_VALGRIND_MAKE_MEM_UNDEFINED(
diff --git a/src/extent.c b/src/extent.c
index b1b894dc..a56018f8 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -26,30 +26,22 @@ extent_quantize(size_t size)
 }
 
 JEMALLOC_INLINE_C int
-extent_szad_comp(const extent_node_t *a, const extent_node_t *b)
+extent_sz_comp(const extent_node_t *a, const extent_node_t *b)
 {
-	int ret;
 	size_t a_qsize = extent_quantize(extent_node_size_get(a));
 	size_t b_qsize = extent_quantize(extent_node_size_get(b));
 
-	/*
-	 * Compare based on quantized size rather than size, in order to sort
-	 * equally useful extents only by address.
-	 */
-	ret = (a_qsize > b_qsize) - (a_qsize < b_qsize);
-	if (ret == 0) {
-		uintptr_t a_addr = (uintptr_t)extent_node_addr_get(a);
-		uintptr_t b_addr = (uintptr_t)extent_node_addr_get(b);
-
-		ret = (a_addr > b_addr) - (a_addr < b_addr);
-	}
-
-	return (ret);
+	return ((a_qsize > b_qsize) - (a_qsize < b_qsize));
 }
 
-/* Generate red-black tree functions. */
-rb_gen(, extent_tree_szad_, extent_tree_t, extent_node_t, szad_link,
-    extent_szad_comp)
+JEMALLOC_INLINE_C int
+extent_sn_comp(const extent_node_t *a, const extent_node_t *b)
+{
+	uint64_t a_sn = (uintptr_t)extent_node_sn_get(a);
+	uint64_t b_sn = (uintptr_t)extent_node_sn_get(b);
+
+	return ((a_sn > b_sn) - (a_sn < b_sn));
+}
 
 JEMALLOC_INLINE_C int
 extent_ad_comp(const extent_node_t *a, const extent_node_t *b)
@@ -60,5 +52,26 @@ extent_ad_comp(const extent_node_t *a, const extent_node_t *b)
 	return ((a_addr > b_addr) - (a_addr < b_addr));
 }
 
+JEMALLOC_INLINE_C int
+extent_szsnad_comp(const extent_node_t *a, const extent_node_t *b)
+{
+	int ret;
+
+	ret = extent_sz_comp(a, b);
+	if (ret != 0)
+		return (ret);
+
+	ret = extent_sn_comp(a, b);
+	if (ret != 0)
+		return (ret);
+
+	ret = extent_ad_comp(a, b);
+	return (ret);
+}
+
+/* Generate red-black tree functions. */
+rb_gen(, extent_tree_szsnad_, extent_tree_t, extent_node_t, szsnad_link,
+    extent_szsnad_comp)
+
 /* Generate red-black tree functions. */
 rb_gen(, extent_tree_ad_, extent_tree_t, extent_node_t, ad_link, extent_ad_comp)
diff --git a/src/huge.c b/src/huge.c
index 62e6932b..8abd8c00 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -56,6 +56,7 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	size_t ausize;
 	arena_t *iarena;
 	extent_node_t *node;
+	size_t sn;
 	bool is_zeroed;
 
 	/* Allocate one or more contiguous chunks for this request. */
@@ -68,7 +69,8 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	assert(ausize >= chunksize);
 
 	/* Allocate an extent node with which to track the chunk. */
-	iarena = (!tsdn_null(tsdn)) ? arena_ichoose(tsdn_tsd(tsdn), NULL) : a0get();
+	iarena = (!tsdn_null(tsdn)) ? arena_ichoose(tsdn_tsd(tsdn), NULL) :
+	    a0get();
 	node = ipallocztm(tsdn, CACHELINE_CEILING(sizeof(extent_node_t)),
 	    CACHELINE, false, NULL, true, iarena);
 	if (node == NULL)
@@ -82,15 +84,15 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	if (likely(!tsdn_null(tsdn)))
 		arena = arena_choose(tsdn_tsd(tsdn), arena);
 	if (unlikely(arena == NULL) || (ret = arena_chunk_alloc_huge(tsdn,
-	    arena, usize, alignment, &is_zeroed)) == NULL) {
+	    arena, usize, alignment, &sn, &is_zeroed)) == NULL) {
 		idalloctm(tsdn, node, NULL, true, true);
 		return (NULL);
 	}
 
-	extent_node_init(node, arena, ret, usize, is_zeroed, true);
+	extent_node_init(node, arena, ret, usize, sn, is_zeroed, true);
 
 	if (huge_node_set(tsdn, ret, node)) {
-		arena_chunk_dalloc_huge(tsdn, arena, ret, usize);
+		arena_chunk_dalloc_huge(tsdn, arena, ret, usize, sn);
 		idalloctm(tsdn, node, NULL, true, true);
 		return (NULL);
 	}
@@ -245,7 +247,8 @@ huge_ralloc_no_move_shrink(tsdn_t *tsdn, void *ptr, size_t oldsize,
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
 	/* Zap the excess chunks. */
-	arena_chunk_ralloc_huge_shrink(tsdn, arena, ptr, oldsize, usize);
+	arena_chunk_ralloc_huge_shrink(tsdn, arena, ptr, oldsize, usize,
+	    extent_node_sn_get(node));
 
 	return (false);
 }
@@ -407,7 +410,8 @@ huge_dalloc(tsdn_t *tsdn, void *ptr)
 	huge_dalloc_junk(extent_node_addr_get(node),
 	    extent_node_size_get(node));
 	arena_chunk_dalloc_huge(tsdn, extent_node_arena_get(node),
-	    extent_node_addr_get(node), extent_node_size_get(node));
+	    extent_node_addr_get(node), extent_node_size_get(node),
+	    extent_node_sn_get(node));
 	idalloctm(tsdn, node, NULL, true, true);
 
 	arena_decay_tick(tsdn, arena);

From 6a71d37a755cea0a1d89f35dbe7f986abd7c4ca6 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 14 Nov 2016 23:29:21 -0800
Subject: [PATCH 0521/2608] Add packing test, which verifies stable layout
 policy.

---
 Makefile.in      |   1 +
 test/unit/pack.c | 206 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 207 insertions(+)
 create mode 100644 test/unit/pack.c

diff --git a/Makefile.in b/Makefile.in
index d13c7f10..836d4e9c 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -166,6 +166,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/math.c \
 	$(srcroot)test/unit/mq.c \
 	$(srcroot)test/unit/mtx.c \
+	$(srcroot)test/unit/pack.c \
 	$(srcroot)test/unit/ph.c \
 	$(srcroot)test/unit/prng.c \
 	$(srcroot)test/unit/prof_accum.c \
diff --git a/test/unit/pack.c b/test/unit/pack.c
new file mode 100644
index 00000000..77ef1437
--- /dev/null
+++ b/test/unit/pack.c
@@ -0,0 +1,206 @@
+#include "test/jemalloc_test.h"
+
+const char *malloc_conf =
+    /* Use smallest possible chunk size. */
+    "lg_chunk:0"
+    /* Immediately purge to minimize fragmentation. */
+    ",lg_dirty_mult:-1"
+    ",decay_time:-1"
+    ;
+
+/*
+ * Size class that is a divisor of the page size, ideally 4+ regions per run.
+ */
+#if LG_PAGE <= 14
+#define	SZ	(ZU(1) << (LG_PAGE - 2))
+#else
+#define	SZ	4096
+#endif
+
+/*
+ * Number of chunks to consume at high water mark.  Should be at least 2 so that
+ * if mmap()ed memory grows downward, downward growth of mmap()ed memory is
+ * tested.
+ */
+#define	NCHUNKS	8
+
+static unsigned
+binind_compute(void)
+{
+	size_t sz;
+	unsigned nbins, i;
+
+	sz = sizeof(nbins);
+	assert_d_eq(mallctl("arenas.nbins", &nbins, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
+
+	for (i = 0; i < nbins; i++) {
+		size_t mib[4];
+		size_t miblen = sizeof(mib)/sizeof(size_t);
+		size_t size;
+
+		assert_d_eq(mallctlnametomib("arenas.bin.0.size", mib,
+		    &miblen), 0, "Unexpected mallctlnametomb failure");
+		mib[2] = (size_t)i;
+
+		sz = sizeof(size);
+		assert_d_eq(mallctlbymib(mib, miblen, &size, &sz, NULL, 0), 0,
+		    "Unexpected mallctlbymib failure");
+		if (size == SZ)
+			return (i);
+	}
+
+	test_fail("Unable to compute nregs_per_run");
+	return (0);
+}
+
+static size_t
+nregs_per_run_compute(void)
+{
+	uint32_t nregs;
+	size_t sz;
+	unsigned binind = binind_compute();
+	size_t mib[4];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+
+	assert_d_eq(mallctlnametomib("arenas.bin.0.nregs", mib, &miblen), 0,
+	    "Unexpected mallctlnametomb failure");
+	mib[2] = (size_t)binind;
+	sz = sizeof(nregs);
+	assert_d_eq(mallctlbymib(mib, miblen, &nregs, &sz, NULL,
+	    0), 0, "Unexpected mallctlbymib failure");
+	return (nregs);
+}
+
+static size_t
+npages_per_run_compute(void)
+{
+	size_t sz;
+	unsigned binind = binind_compute();
+	size_t mib[4];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+	size_t run_size;
+
+	assert_d_eq(mallctlnametomib("arenas.bin.0.run_size", mib, &miblen), 0,
+	    "Unexpected mallctlnametomb failure");
+	mib[2] = (size_t)binind;
+	sz = sizeof(run_size);
+	assert_d_eq(mallctlbymib(mib, miblen, &run_size, &sz, NULL,
+	    0), 0, "Unexpected mallctlbymib failure");
+	return (run_size >> LG_PAGE);
+}
+
+static size_t
+npages_per_chunk_compute(void)
+{
+
+	return ((chunksize >> LG_PAGE) - map_bias);
+}
+
+static size_t
+nruns_per_chunk_compute(void)
+{
+
+	return (npages_per_chunk_compute() / npages_per_run_compute());
+}
+
+static unsigned
+arenas_extend_mallctl(void)
+{
+	unsigned arena_ind;
+	size_t sz;
+
+	sz = sizeof(arena_ind);
+	assert_d_eq(mallctl("arenas.extend", &arena_ind, &sz, NULL, 0), 0,
+	    "Error in arenas.extend");
+
+	return (arena_ind);
+}
+
+static void
+arena_reset_mallctl(unsigned arena_ind)
+{
+	size_t mib[3];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+
+	assert_d_eq(mallctlnametomib("arena.0.reset", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[1] = (size_t)arena_ind;
+	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+}
+
+TEST_BEGIN(test_pack)
+{
+	unsigned arena_ind = arenas_extend_mallctl();
+	size_t nregs_per_run = nregs_per_run_compute();
+	size_t nruns_per_chunk = nruns_per_chunk_compute();
+	size_t nruns = nruns_per_chunk * NCHUNKS;
+	size_t nregs = nregs_per_run * nruns;
+	VARIABLE_ARRAY(void *, ptrs, nregs);
+	size_t i, j, offset;
+
+	/* Fill matrix. */
+	for (i = offset = 0; i < nruns; i++) {
+		for (j = 0; j < nregs_per_run; j++) {
+			void *p = mallocx(SZ, MALLOCX_ARENA(arena_ind) |
+			    MALLOCX_TCACHE_NONE);
+			assert_ptr_not_null(p,
+			    "Unexpected mallocx(%zu, MALLOCX_ARENA(%u) |"
+			    " MALLOCX_TCACHE_NONE) failure, run=%zu, reg=%zu",
+			    SZ, arena_ind, i, j);
+			ptrs[(i * nregs_per_run) + j] = p;
+		}
+	}
+
+	/*
+	 * Free all but one region of each run, but rotate which region is
+	 * preserved, so that subsequent allocations exercise the within-run
+	 * layout policy.
+	 */
+	offset = 0;
+	for (i = offset = 0;
+	    i < nruns;
+	    i++, offset = (offset + 1) % nregs_per_run) {
+		for (j = 0; j < nregs_per_run; j++) {
+			void *p = ptrs[(i * nregs_per_run) + j];
+			if (offset == j)
+				continue;
+			dallocx(p, MALLOCX_ARENA(arena_ind) |
+			    MALLOCX_TCACHE_NONE);
+		}
+	}
+
+	/*
+	 * Logically refill matrix, skipping preserved regions and verifying
+	 * that the matrix is unmodified.
+	 */
+	offset = 0;
+	for (i = offset = 0;
+	    i < nruns;
+	    i++, offset = (offset + 1) % nregs_per_run) {
+		for (j = 0; j < nregs_per_run; j++) {
+			void *p;
+
+			if (offset == j)
+				continue;
+			p = mallocx(SZ, MALLOCX_ARENA(arena_ind) |
+			    MALLOCX_TCACHE_NONE);
+			assert_ptr_eq(p, ptrs[(i * nregs_per_run) + j],
+			    "Unexpected refill discrepancy, run=%zu, reg=%zu\n",
+			    i, j);
+		}
+	}
+
+	/* Clean up. */
+	arena_reset_mallctl(arena_ind);
+}
+TEST_END
+
+int
+main(void)
+{
+
+	return (test(
+	    test_pack));
+}

From 2379479225e5be5f93626e13a37577c76a670fb3 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 15 Nov 2016 13:47:22 -0800
Subject: [PATCH 0522/2608] Consistently use size_t rather than uint64_t for
 extent serial numbers.

---
 src/arena.c  | 2 +-
 src/extent.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index a3f9899f..87eead81 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -67,7 +67,7 @@ arena_miscelm_extent_get(const arena_chunk_map_misc_t *miscelm)
 JEMALLOC_INLINE_C int
 arena_sn_comp(const arena_chunk_map_misc_t *a, const arena_chunk_map_misc_t *b)
 {
-	uint64_t a_sn, b_sn;
+	size_t a_sn, b_sn;
 
 	assert(a != NULL);
 	assert(b != NULL);
diff --git a/src/extent.c b/src/extent.c
index a56018f8..218156c6 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -37,8 +37,8 @@ extent_sz_comp(const extent_node_t *a, const extent_node_t *b)
 JEMALLOC_INLINE_C int
 extent_sn_comp(const extent_node_t *a, const extent_node_t *b)
 {
-	uint64_t a_sn = (uintptr_t)extent_node_sn_get(a);
-	uint64_t b_sn = (uintptr_t)extent_node_sn_get(b);
+	size_t a_sn = extent_node_sn_get(a);
+	size_t b_sn = extent_node_sn_get(b);
 
 	return ((a_sn > b_sn) - (a_sn < b_sn));
 }

From 87004d238cc8db5ea531ef1900a5e6386ccd3daf Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 27 Oct 2016 21:26:33 -0700
Subject: [PATCH 0523/2608] Avoid negation of unsigned numbers.

Rather than relying on two's complement negation for alignment mask
generation, use bitwise not and addition.  This dodges warnings from
MSVC, and should be strength-reduced by compiler optimization anyway.
---
 include/jemalloc/internal/jemalloc_internal.h.in | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index fdc8fef9..e7ace7d8 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -337,7 +337,7 @@ typedef unsigned szind_t;
 
 /* Return the nearest aligned address at or below a. */
 #define	ALIGNMENT_ADDR2BASE(a, alignment)				\
-	((void *)((uintptr_t)(a) & (-(alignment))))
+	((void *)((uintptr_t)(a) & ((~(alignment)) + 1)))
 
 /* Return the offset between a and the nearest aligned address at or below a. */
 #define	ALIGNMENT_ADDR2OFFSET(a, alignment)				\
@@ -345,7 +345,7 @@ typedef unsigned szind_t;
 
 /* Return the smallest alignment multiple that is >= s. */
 #define	ALIGNMENT_CEILING(s, alignment)					\
-	(((s) + (alignment - 1)) & (-(alignment)))
+	(((s) + (alignment - 1)) & ((~(alignment)) + 1))
 
 /* Declare a variable-length array. */
 #if __STDC_VERSION__ < 199901L

From 72c587a411050d3283052ebfccf3cc64803f2152 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 27 Oct 2016 21:29:00 -0700
Subject: [PATCH 0524/2608] Add cast to silence (harmless) conversion warning.

---
 test/unit/tsd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/unit/tsd.c b/test/unit/tsd.c
index 4e2622a3..d5f96ac3 100644
--- a/test/unit/tsd.c
+++ b/test/unit/tsd.c
@@ -79,7 +79,7 @@ thd_start(void *arg)
 TEST_BEGIN(test_tsd_main_thread)
 {
 
-	thd_start((void *) 0xa5f3e329);
+	thd_start((void *)(uintptr_t)0xa5f3e329);
 }
 TEST_END
 

From 84ae60357702bcb9be9584ca1d727fb53ec0491c Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 27 Oct 2016 21:29:59 -0700
Subject: [PATCH 0525/2608] Explicitly cast negative constants meant for use as
 unsigned.

---
 test/unit/util.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/test/unit/util.c b/test/unit/util.c
index c958dc0f..b1f9abd9 100644
--- a/test/unit/util.c
+++ b/test/unit/util.c
@@ -75,6 +75,7 @@ TEST_BEGIN(test_malloc_strtoumax)
 	};
 #define	ERR(e)		e, #e
 #define	KUMAX(x)	((uintmax_t)x##ULL)
+#define	KSMAX(x)	((uintmax_t)(intmax_t)x##LL)
 	struct test_s tests[] = {
 		{"0",		"0",	-1,	ERR(EINVAL),	UINTMAX_MAX},
 		{"0",		"0",	1,	ERR(EINVAL),	UINTMAX_MAX},
@@ -87,13 +88,13 @@ TEST_BEGIN(test_malloc_strtoumax)
 
 		{"42",		"",	0,	ERR(0),		KUMAX(42)},
 		{"+42",		"",	0,	ERR(0),		KUMAX(42)},
-		{"-42",		"",	0,	ERR(0),		KUMAX(-42)},
+		{"-42",		"",	0,	ERR(0),		KSMAX(-42)},
 		{"042",		"",	0,	ERR(0),		KUMAX(042)},
 		{"+042",	"",	0,	ERR(0),		KUMAX(042)},
-		{"-042",	"",	0,	ERR(0),		KUMAX(-042)},
+		{"-042",	"",	0,	ERR(0),		KSMAX(-042)},
 		{"0x42",	"",	0,	ERR(0),		KUMAX(0x42)},
 		{"+0x42",	"",	0,	ERR(0),		KUMAX(0x42)},
-		{"-0x42",	"",	0,	ERR(0),		KUMAX(-0x42)},
+		{"-0x42",	"",	0,	ERR(0),		KSMAX(-0x42)},
 
 		{"0",		"",	0,	ERR(0),		KUMAX(0)},
 		{"1",		"",	0,	ERR(0),		KUMAX(1)},
@@ -130,6 +131,7 @@ TEST_BEGIN(test_malloc_strtoumax)
 	};
 #undef ERR
 #undef KUMAX
+#undef KSMAX
 	unsigned i;
 
 	for (i = 0; i < sizeof(tests)/sizeof(struct test_s); i++) {

From 8f61fdedb908c29905103b22dda32ceb29cd8ede Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 27 Oct 2016 21:31:25 -0700
Subject: [PATCH 0526/2608] Uniformly cast mallctl[bymib]() oldp/newp arguments
 to (void *).

This avoids warnings in some cases, and is otherwise generally good
hygiene.
---
 doc/jemalloc.xml.in                           |   2 +-
 .../vc2015/test_threads/test_threads.cpp      |   6 +-
 src/stats.c                                   |  13 +-
 src/tcache.c                                  |   4 +-
 src/util.c                                    |   2 +-
 test/integration/MALLOCX_ARENA.c              |   4 +-
 test/integration/allocated.c                  |  17 +-
 test/integration/chunk.c                      |  35 +--
 test/integration/mallocx.c                    |   4 +-
 test/integration/overflow.c                   |   8 +-
 test/integration/rallocx.c                    |   4 +-
 test/integration/thread_arena.c               |  10 +-
 test/integration/thread_tcache_enabled.c      |  39 +--
 test/integration/xallocx.c                    |   8 +-
 test/unit/arena_reset.c                       |   8 +-
 test/unit/decay.c                             |  56 ++---
 test/unit/mallctl.c                           | 211 +++++++++--------
 test/unit/pack.c                              |  14 +-
 test/unit/prof_accum.c                        |   5 +-
 test/unit/prof_active.c                       |   5 +-
 test/unit/prof_gdump.c                        |  13 +-
 test/unit/prof_idump.c                        |   5 +-
 test/unit/prof_reset.c                        |  13 +-
 test/unit/prof_thread_name.c                  |  22 +-
 test/unit/run_quantize.c                      |  20 +-
 test/unit/size_classes.c                      |   8 +-
 test/unit/stats.c                             | 223 +++++++++---------
 27 files changed, 397 insertions(+), 362 deletions(-)
 mode change 100644 => 100755 msvc/projects/vc2015/test_threads/test_threads.cpp
 mode change 100644 => 100755 src/stats.c
 mode change 100644 => 100755 src/tcache.c
 mode change 100644 => 100755 src/util.c
 mode change 100644 => 100755 test/integration/MALLOCX_ARENA.c
 mode change 100644 => 100755 test/integration/allocated.c
 mode change 100644 => 100755 test/integration/mallocx.c
 mode change 100644 => 100755 test/integration/overflow.c
 mode change 100644 => 100755 test/integration/rallocx.c
 mode change 100644 => 100755 test/integration/thread_arena.c
 mode change 100644 => 100755 test/integration/thread_tcache_enabled.c
 mode change 100644 => 100755 test/integration/xallocx.c
 mode change 100644 => 100755 test/unit/arena_reset.c
 mode change 100644 => 100755 test/unit/decay.c
 mode change 100644 => 100755 test/unit/mallctl.c
 mode change 100644 => 100755 test/unit/prof_accum.c
 mode change 100644 => 100755 test/unit/prof_active.c
 mode change 100644 => 100755 test/unit/prof_gdump.c
 mode change 100644 => 100755 test/unit/prof_idump.c
 mode change 100644 => 100755 test/unit/prof_reset.c
 mode change 100644 => 100755 test/unit/prof_thread_name.c
 mode change 100644 => 100755 test/unit/size_classes.c
 mode change 100644 => 100755 test/unit/stats.c

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 3d2e721d..d9c83452 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -406,7 +406,7 @@ for (i = 0; i < nbins; i++) {
 
 	mib[2] = i;
 	len = sizeof(bin_size);
-	mallctlbymib(mib, miblen, &bin_size, &len, NULL, 0);
+	mallctlbymib(mib, miblen, (void *)&bin_size, &len, NULL, 0);
 	/* Do something with bin_size... */
 }]]></programlisting></para>
 
diff --git a/msvc/projects/vc2015/test_threads/test_threads.cpp b/msvc/projects/vc2015/test_threads/test_threads.cpp
old mode 100644
new mode 100755
index c8cb7d66..a3d1a792
--- a/msvc/projects/vc2015/test_threads/test_threads.cpp
+++ b/msvc/projects/vc2015/test_threads/test_threads.cpp
@@ -21,7 +21,7 @@ int test_threads()
   je_malloc_conf = "narenas:3";
   int narenas = 0;
   size_t sz = sizeof(narenas);
-  je_mallctl("opt.narenas", &narenas, &sz, NULL, 0);
+  je_mallctl("opt.narenas", (void *)&narenas, &sz, NULL, 0);
   if (narenas != 3) {
     printf("Error: unexpected number of arenas: %d\n", narenas);
     return 1;
@@ -33,7 +33,7 @@ int test_threads()
   je_malloc_stats_print(NULL, NULL, NULL);
   size_t allocated1;
   size_t sz1 = sizeof(allocated1);
-  je_mallctl("stats.active", &allocated1, &sz1, NULL, 0);
+  je_mallctl("stats.active", (void *)&allocated1, &sz1, NULL, 0);
   printf("\nPress Enter to start threads...\n");
   getchar();
   printf("Starting %d threads x %d x %d iterations...\n", numThreads, numIter1, numIter2);
@@ -78,7 +78,7 @@ int test_threads()
   }
   je_malloc_stats_print(NULL, NULL, NULL);
   size_t allocated2;
-  je_mallctl("stats.active", &allocated2, &sz1, NULL, 0);
+  je_mallctl("stats.active", (void *)&allocated2, &sz1, NULL, 0);
   size_t leaked = allocated2 - allocated1;
   printf("\nDone. Leaked: %zd bytes\n", leaked);
   bool failed = leaked > 65536; // in case C++ runtime allocated something (e.g. iostream locale or facet)
diff --git a/src/stats.c b/src/stats.c
old mode 100644
new mode 100755
index bd8af399..1360f3bd
--- a/src/stats.c
+++ b/src/stats.c
@@ -3,7 +3,7 @@
 
 #define	CTL_GET(n, v, t) do {						\
 	size_t sz = sizeof(t);						\
-	xmallctl(n, v, &sz, NULL, 0);					\
+	xmallctl(n, (void *)v, &sz, NULL, 0);				\
 } while (0)
 
 #define	CTL_M2_GET(n, i, v, t) do {					\
@@ -12,7 +12,7 @@
 	size_t sz = sizeof(t);						\
 	xmallctlnametomib(n, mib, &miblen);				\
 	mib[2] = (i);							\
-	xmallctlbymib(mib, miblen, v, &sz, NULL, 0);			\
+	xmallctlbymib(mib, miblen, (void *)v, &sz, NULL, 0);		\
 } while (0)
 
 #define	CTL_M2_M4_GET(n, i, j, v, t) do {				\
@@ -22,7 +22,7 @@
 	xmallctlnametomib(n, mib, &miblen);				\
 	mib[2] = (i);							\
 	mib[4] = (j);							\
-	xmallctlbymib(mib, miblen, v, &sz, NULL, 0);			\
+	xmallctlbymib(mib, miblen, (void *)v, &sz, NULL, 0);		\
 } while (0)
 
 /******************************************************************************/
@@ -647,7 +647,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 #define	OPT_WRITE_BOOL_MUTABLE(n, m, c) {				\
 	bool bv2;							\
 	if (je_mallctl("opt."#n, (void *)&bv, &bsz, NULL, 0) == 0 &&	\
-	    je_mallctl(#m, &bv2, &bsz, NULL, 0) == 0) {			\
+	    je_mallctl(#m, &bv2, (void *)&bsz, NULL, 0) == 0) {		\
 		if (json) {						\
 			malloc_cprintf(write_cb, cbopaque,		\
 			    "\t\t\t\""#n"\": %s%s\n", bv ? "true" :	\
@@ -692,7 +692,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 #define	OPT_WRITE_SSIZE_T_MUTABLE(n, m, c) {				\
 	ssize_t ssv2;							\
 	if (je_mallctl("opt."#n, (void *)&ssv, &sssz, NULL, 0) == 0 &&	\
-	    je_mallctl(#m, &ssv2, &sssz, NULL, 0) == 0) {		\
+	    je_mallctl(#m, (void *)&ssv2, &sssz, NULL, 0) == 0) {	\
 		if (json) {						\
 			malloc_cprintf(write_cb, cbopaque,		\
 			    "\t\t\t\""#n"\": %zd%s\n", ssv, (c));	\
@@ -1084,7 +1084,8 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	 * */
 	epoch = 1;
 	u64sz = sizeof(uint64_t);
-	err = je_mallctl("epoch", &epoch, &u64sz, &epoch, sizeof(uint64_t));
+	err = je_mallctl("epoch", (void *)&epoch, &u64sz, (void *)&epoch,
+	    sizeof(uint64_t));
 	if (err != 0) {
 		if (err == EAGAIN) {
 			malloc_write("<jemalloc>: Memory allocation failure in "
diff --git a/src/tcache.c b/src/tcache.c
old mode 100644
new mode 100755
index f97aa420..344b5eeb
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -517,12 +517,12 @@ tcache_boot(tsdn_t *tsdn)
 	 * If necessary, clamp opt_lg_tcache_max, now that large_maxclass is
 	 * known.
 	 */
-	if (opt_lg_tcache_max < 0 || (1U << opt_lg_tcache_max) < SMALL_MAXCLASS)
+	if (opt_lg_tcache_max < 0 || (ZU(1) << opt_lg_tcache_max) < SMALL_MAXCLASS)
 		tcache_maxclass = SMALL_MAXCLASS;
 	else if ((1U << opt_lg_tcache_max) > large_maxclass)
 		tcache_maxclass = large_maxclass;
 	else
-		tcache_maxclass = (1U << opt_lg_tcache_max);
+		tcache_maxclass = (ZU(1) << opt_lg_tcache_max);
 
 	nhbins = size2index(tcache_maxclass) + 1;
 
diff --git a/src/util.c b/src/util.c
old mode 100644
new mode 100755
index 79052674..5b8175bc
--- a/src/util.c
+++ b/src/util.c
@@ -200,7 +200,7 @@ malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base)
 		p++;
 	}
 	if (neg)
-		ret = -ret;
+		ret = (uintmax_t)(-((intmax_t)ret));
 
 	if (p == ns) {
 		/* No conversion performed. */
diff --git a/test/integration/MALLOCX_ARENA.c b/test/integration/MALLOCX_ARENA.c
old mode 100644
new mode 100755
index 30c203ae..910a096f
--- a/test/integration/MALLOCX_ARENA.c
+++ b/test/integration/MALLOCX_ARENA.c
@@ -19,8 +19,8 @@ thd_start(void *arg)
 	size_t sz;
 
 	sz = sizeof(arena_ind);
-	assert_d_eq(mallctl("arenas.extend", &arena_ind, &sz, NULL, 0), 0,
-	    "Error in arenas.extend");
+	assert_d_eq(mallctl("arenas.extend", (void *)&arena_ind, &sz, NULL, 0),
+	    0, "Error in arenas.extend");
 
 	if (thread_ind % 4 != 3) {
 		size_t mib[3];
diff --git a/test/integration/allocated.c b/test/integration/allocated.c
old mode 100644
new mode 100755
index 3630e80c..6ce145b3
--- a/test/integration/allocated.c
+++ b/test/integration/allocated.c
@@ -18,14 +18,14 @@ thd_start(void *arg)
 	size_t sz, usize;
 
 	sz = sizeof(a0);
-	if ((err = mallctl("thread.allocated", &a0, &sz, NULL, 0))) {
+	if ((err = mallctl("thread.allocated", (void *)&a0, &sz, NULL, 0))) {
 		if (err == ENOENT)
 			goto label_ENOENT;
 		test_fail("%s(): Error in mallctl(): %s", __func__,
 		    strerror(err));
 	}
 	sz = sizeof(ap0);
-	if ((err = mallctl("thread.allocatedp", &ap0, &sz, NULL, 0))) {
+	if ((err = mallctl("thread.allocatedp", (void *)&ap0, &sz, NULL, 0))) {
 		if (err == ENOENT)
 			goto label_ENOENT;
 		test_fail("%s(): Error in mallctl(): %s", __func__,
@@ -36,14 +36,15 @@ thd_start(void *arg)
 	    "storage");
 
 	sz = sizeof(d0);
-	if ((err = mallctl("thread.deallocated", &d0, &sz, NULL, 0))) {
+	if ((err = mallctl("thread.deallocated", (void *)&d0, &sz, NULL, 0))) {
 		if (err == ENOENT)
 			goto label_ENOENT;
 		test_fail("%s(): Error in mallctl(): %s", __func__,
 		    strerror(err));
 	}
 	sz = sizeof(dp0);
-	if ((err = mallctl("thread.deallocatedp", &dp0, &sz, NULL, 0))) {
+	if ((err = mallctl("thread.deallocatedp", (void *)&dp0, &sz, NULL,
+	    0))) {
 		if (err == ENOENT)
 			goto label_ENOENT;
 		test_fail("%s(): Error in mallctl(): %s", __func__,
@@ -57,9 +58,9 @@ thd_start(void *arg)
 	assert_ptr_not_null(p, "Unexpected malloc() error");
 
 	sz = sizeof(a1);
-	mallctl("thread.allocated", &a1, &sz, NULL, 0);
+	mallctl("thread.allocated", (void *)&a1, &sz, NULL, 0);
 	sz = sizeof(ap1);
-	mallctl("thread.allocatedp", &ap1, &sz, NULL, 0);
+	mallctl("thread.allocatedp", (void *)&ap1, &sz, NULL, 0);
 	assert_u64_eq(*ap1, a1,
 	    "Dereferenced \"thread.allocatedp\" value should equal "
 	    "\"thread.allocated\" value");
@@ -74,9 +75,9 @@ thd_start(void *arg)
 	free(p);
 
 	sz = sizeof(d1);
-	mallctl("thread.deallocated", &d1, &sz, NULL, 0);
+	mallctl("thread.deallocated", (void *)&d1, &sz, NULL, 0);
 	sz = sizeof(dp1);
-	mallctl("thread.deallocatedp", &dp1, &sz, NULL, 0);
+	mallctl("thread.deallocatedp", (void *)&dp1, &sz, NULL, 0);
 	assert_u64_eq(*dp1, d1,
 	    "Dereferenced \"thread.deallocatedp\" value should equal "
 	    "\"thread.deallocated\" value");
diff --git a/test/integration/chunk.c b/test/integration/chunk.c
index ff9bf967..94cf0025 100644
--- a/test/integration/chunk.c
+++ b/test/integration/chunk.c
@@ -137,8 +137,8 @@ TEST_BEGIN(test_chunk)
 	bool xallocx_success_a, xallocx_success_b, xallocx_success_c;
 
 	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.extend", &arena_ind, &sz, NULL, 0), 0,
-	    "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("arenas.extend", (void *)&arena_ind, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
 	flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
 
 	/* Install custom chunk hooks. */
@@ -148,8 +148,9 @@ TEST_BEGIN(test_chunk)
 	hooks_mib[1] = (size_t)arena_ind;
 	old_size = sizeof(chunk_hooks_t);
 	new_size = sizeof(chunk_hooks_t);
-	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, &old_hooks, &old_size,
-	    &new_hooks, new_size), 0, "Unexpected chunk_hooks error");
+	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks,
+	    &old_size, (void *)&new_hooks, new_size), 0,
+	    "Unexpected chunk_hooks error");
 	orig_hooks = old_hooks;
 	assert_ptr_ne(old_hooks.alloc, chunk_alloc, "Unexpected alloc error");
 	assert_ptr_ne(old_hooks.dalloc, chunk_dalloc,
@@ -164,18 +165,18 @@ TEST_BEGIN(test_chunk)
 
 	/* Get large size classes. */
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("arenas.lrun.0.size", &large0, &sz, NULL, 0), 0,
-	    "Unexpected arenas.lrun.0.size failure");
-	assert_d_eq(mallctl("arenas.lrun.1.size", &large1, &sz, NULL, 0), 0,
-	    "Unexpected arenas.lrun.1.size failure");
+	assert_d_eq(mallctl("arenas.lrun.0.size", (void *)&large0, &sz, NULL,
+	    0), 0, "Unexpected arenas.lrun.0.size failure");
+	assert_d_eq(mallctl("arenas.lrun.1.size", (void *)&large1, &sz, NULL,
+	    0), 0, "Unexpected arenas.lrun.1.size failure");
 
 	/* Get huge size classes. */
-	assert_d_eq(mallctl("arenas.hchunk.0.size", &huge0, &sz, NULL, 0), 0,
-	    "Unexpected arenas.hchunk.0.size failure");
-	assert_d_eq(mallctl("arenas.hchunk.1.size", &huge1, &sz, NULL, 0), 0,
-	    "Unexpected arenas.hchunk.1.size failure");
-	assert_d_eq(mallctl("arenas.hchunk.2.size", &huge2, &sz, NULL, 0), 0,
-	    "Unexpected arenas.hchunk.2.size failure");
+	assert_d_eq(mallctl("arenas.hchunk.0.size", (void *)&huge0, &sz, NULL,
+	    0), 0, "Unexpected arenas.hchunk.0.size failure");
+	assert_d_eq(mallctl("arenas.hchunk.1.size", (void *)&huge1, &sz, NULL,
+	    0), 0, "Unexpected arenas.hchunk.1.size failure");
+	assert_d_eq(mallctl("arenas.hchunk.2.size", (void *)&huge2, &sz, NULL,
+	    0), 0, "Unexpected arenas.hchunk.2.size failure");
 
 	/* Test dalloc/decommit/purge cascade. */
 	purge_miblen = sizeof(purge_mib)/sizeof(size_t);
@@ -265,9 +266,9 @@ TEST_BEGIN(test_chunk)
 
 	/* Restore chunk hooks. */
 	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, NULL, NULL,
-	    &old_hooks, new_size), 0, "Unexpected chunk_hooks error");
-	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, &old_hooks, &old_size,
-	    NULL, 0), 0, "Unexpected chunk_hooks error");
+	    (void *)&old_hooks, new_size), 0, "Unexpected chunk_hooks error");
+	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks,
+	    &old_size, NULL, 0), 0, "Unexpected chunk_hooks error");
 	assert_ptr_eq(old_hooks.alloc, orig_hooks.alloc,
 	    "Unexpected alloc error");
 	assert_ptr_eq(old_hooks.dalloc, orig_hooks.dalloc,
diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
old mode 100644
new mode 100755
index 43b76eba..d709eb30
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -11,7 +11,7 @@ get_nsizes_impl(const char *cmd)
 	size_t z;
 
 	z = sizeof(unsigned);
-	assert_d_eq(mallctl(cmd, &ret, &z, NULL, 0), 0,
+	assert_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
 	    "Unexpected mallctl(\"%s\", ...) failure", cmd);
 
 	return (ret);
@@ -37,7 +37,7 @@ get_size_impl(const char *cmd, size_t ind)
 	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
 	mib[2] = ind;
 	z = sizeof(size_t);
-	assert_d_eq(mallctlbymib(mib, miblen, &ret, &z, NULL, 0),
+	assert_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
 	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
 
 	return (ret);
diff --git a/test/integration/overflow.c b/test/integration/overflow.c
old mode 100644
new mode 100755
index 303d9b2d..84a35652
--- a/test/integration/overflow.c
+++ b/test/integration/overflow.c
@@ -8,8 +8,8 @@ TEST_BEGIN(test_overflow)
 	void *p;
 
 	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.nhchunks", &nhchunks, &sz, NULL, 0), 0,
-	    "Unexpected mallctl() error");
+	assert_d_eq(mallctl("arenas.nhchunks", (void *)&nhchunks, &sz, NULL, 0),
+	    0, "Unexpected mallctl() error");
 
 	miblen = sizeof(mib) / sizeof(size_t);
 	assert_d_eq(mallctlnametomib("arenas.hchunk.0.size", mib, &miblen), 0,
@@ -17,8 +17,8 @@ TEST_BEGIN(test_overflow)
 	mib[2] = nhchunks - 1;
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctlbymib(mib, miblen, &max_size_class, &sz, NULL, 0), 0,
-	    "Unexpected mallctlbymib() error");
+	assert_d_eq(mallctlbymib(mib, miblen, (void *)&max_size_class, &sz,
+	    NULL, 0), 0, "Unexpected mallctlbymib() error");
 
 	assert_ptr_null(malloc(max_size_class + 1),
 	    "Expected OOM due to over-sized allocation request");
diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c
old mode 100644
new mode 100755
index 66ad8660..506bf1c9
--- a/test/integration/rallocx.c
+++ b/test/integration/rallocx.c
@@ -7,7 +7,7 @@ get_nsizes_impl(const char *cmd)
 	size_t z;
 
 	z = sizeof(unsigned);
-	assert_d_eq(mallctl(cmd, &ret, &z, NULL, 0), 0,
+	assert_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
 	    "Unexpected mallctl(\"%s\", ...) failure", cmd);
 
 	return (ret);
@@ -33,7 +33,7 @@ get_size_impl(const char *cmd, size_t ind)
 	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
 	mib[2] = ind;
 	z = sizeof(size_t);
-	assert_d_eq(mallctlbymib(mib, miblen, &ret, &z, NULL, 0),
+	assert_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
 	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
 
 	return (ret);
diff --git a/test/integration/thread_arena.c b/test/integration/thread_arena.c
old mode 100644
new mode 100755
index 67be5351..7a35a635
--- a/test/integration/thread_arena.c
+++ b/test/integration/thread_arena.c
@@ -16,8 +16,8 @@ thd_start(void *arg)
 	free(p);
 
 	size = sizeof(arena_ind);
-	if ((err = mallctl("thread.arena", &arena_ind, &size, &main_arena_ind,
-	    sizeof(main_arena_ind)))) {
+	if ((err = mallctl("thread.arena", (void *)&arena_ind, &size,
+	    (void *)&main_arena_ind, sizeof(main_arena_ind)))) {
 		char buf[BUFERROR_BUF];
 
 		buferror(err, buf, sizeof(buf));
@@ -25,7 +25,8 @@ thd_start(void *arg)
 	}
 
 	size = sizeof(arena_ind);
-	if ((err = mallctl("thread.arena", &arena_ind, &size, NULL, 0))) {
+	if ((err = mallctl("thread.arena", (void *)&arena_ind, &size, NULL,
+	    0))) {
 		char buf[BUFERROR_BUF];
 
 		buferror(err, buf, sizeof(buf));
@@ -50,7 +51,8 @@ TEST_BEGIN(test_thread_arena)
 	assert_ptr_not_null(p, "Error in malloc()");
 
 	size = sizeof(arena_ind);
-	if ((err = mallctl("thread.arena", &arena_ind, &size, NULL, 0))) {
+	if ((err = mallctl("thread.arena", (void *)&arena_ind, &size, NULL,
+	    0))) {
 		char buf[BUFERROR_BUF];
 
 		buferror(err, buf, sizeof(buf));
diff --git a/test/integration/thread_tcache_enabled.c b/test/integration/thread_tcache_enabled.c
old mode 100644
new mode 100755
index f4e89c68..2c2825e1
--- a/test/integration/thread_tcache_enabled.c
+++ b/test/integration/thread_tcache_enabled.c
@@ -16,7 +16,8 @@ thd_start(void *arg)
 	bool e0, e1;
 
 	sz = sizeof(bool);
-	if ((err = mallctl("thread.tcache.enabled", &e0, &sz, NULL, 0))) {
+	if ((err = mallctl("thread.tcache.enabled", (void *)&e0, &sz, NULL,
+	    0))) {
 		if (err == ENOENT) {
 			assert_false(config_tcache,
 			    "ENOENT should only be returned if tcache is "
@@ -27,53 +28,53 @@ thd_start(void *arg)
 
 	if (e0) {
 		e1 = false;
-		assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz),
-		    0, "Unexpected mallctl() error");
+		assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+		    (void *)&e1, sz), 0, "Unexpected mallctl() error");
 		assert_true(e0, "tcache should be enabled");
 	}
 
 	e1 = true;
-	assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0,
-	    "Unexpected mallctl() error");
+	assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
 	assert_false(e0, "tcache should be disabled");
 
 	e1 = true;
-	assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0,
-	    "Unexpected mallctl() error");
+	assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
 	assert_true(e0, "tcache should be enabled");
 
 	e1 = false;
-	assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0,
-	    "Unexpected mallctl() error");
+	assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
 	assert_true(e0, "tcache should be enabled");
 
 	e1 = false;
-	assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0,
-	    "Unexpected mallctl() error");
+	assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
 	assert_false(e0, "tcache should be disabled");
 
 	free(malloc(1));
 	e1 = true;
-	assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0,
-	    "Unexpected mallctl() error");
+	assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
 	assert_false(e0, "tcache should be disabled");
 
 	free(malloc(1));
 	e1 = true;
-	assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0,
-	    "Unexpected mallctl() error");
+	assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
 	assert_true(e0, "tcache should be enabled");
 
 	free(malloc(1));
 	e1 = false;
-	assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0,
-	    "Unexpected mallctl() error");
+	assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
 	assert_true(e0, "tcache should be enabled");
 
 	free(malloc(1));
 	e1 = false;
-	assert_d_eq(mallctl("thread.tcache.enabled", &e0, &sz, &e1, sz), 0,
-	    "Unexpected mallctl() error");
+	assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
 	assert_false(e0, "tcache should be disabled");
 
 	free(malloc(1));
diff --git a/test/integration/xallocx.c b/test/integration/xallocx.c
old mode 100644
new mode 100755
index ad292bb5..67e0a0e7
--- a/test/integration/xallocx.c
+++ b/test/integration/xallocx.c
@@ -16,8 +16,8 @@ arena_ind(void)
 
 	if (ind == 0) {
 		size_t sz = sizeof(ind);
-		assert_d_eq(mallctl("arenas.extend", &ind, &sz, NULL, 0), 0,
-		    "Unexpected mallctl failure creating arena");
+		assert_d_eq(mallctl("arenas.extend", (void *)&ind, &sz, NULL,
+		    0), 0, "Unexpected mallctl failure creating arena");
 	}
 
 	return (ind);
@@ -78,7 +78,7 @@ get_nsizes_impl(const char *cmd)
 	size_t z;
 
 	z = sizeof(unsigned);
-	assert_d_eq(mallctl(cmd, &ret, &z, NULL, 0), 0,
+	assert_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
 	    "Unexpected mallctl(\"%s\", ...) failure", cmd);
 
 	return (ret);
@@ -118,7 +118,7 @@ get_size_impl(const char *cmd, size_t ind)
 	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
 	mib[2] = ind;
 	z = sizeof(size_t);
-	assert_d_eq(mallctlbymib(mib, miblen, &ret, &z, NULL, 0),
+	assert_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
 	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
 
 	return (ret);
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
old mode 100644
new mode 100755
index 8ba36c21..adf9baa5
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -11,7 +11,7 @@ get_nsizes_impl(const char *cmd)
 	size_t z;
 
 	z = sizeof(unsigned);
-	assert_d_eq(mallctl(cmd, &ret, &z, NULL, 0), 0,
+	assert_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
 	    "Unexpected mallctl(\"%s\", ...) failure", cmd);
 
 	return (ret);
@@ -51,7 +51,7 @@ get_size_impl(const char *cmd, size_t ind)
 	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
 	mib[2] = ind;
 	z = sizeof(size_t);
-	assert_d_eq(mallctlbymib(mib, miblen, &ret, &z, NULL, 0),
+	assert_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
 	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
 
 	return (ret);
@@ -92,8 +92,8 @@ TEST_BEGIN(test_arena_reset)
 	    && unlikely(opt_quarantine)));
 
 	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.extend", &arena_ind, &sz, NULL, 0), 0,
-	    "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("arenas.extend", (void *)&arena_ind, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
 
 	flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
 
diff --git a/test/unit/decay.c b/test/unit/decay.c
old mode 100644
new mode 100755
index e169ae24..5af8f807
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -40,10 +40,10 @@ TEST_BEGIN(test_decay_ticks)
 	    "Unexpected failure getting decay ticker");
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("arenas.hchunk.0.size", &huge0, &sz, NULL, 0), 0,
-	    "Unexpected mallctl failure");
-	assert_d_eq(mallctl("arenas.lrun.0.size", &large0, &sz, NULL, 0), 0,
-	    "Unexpected mallctl failure");
+	assert_d_eq(mallctl("arenas.hchunk.0.size", (void *)&huge0, &sz, NULL,
+	    0), 0, "Unexpected mallctl failure");
+	assert_d_eq(mallctl("arenas.lrun.0.size", (void *)&large0, &sz, NULL,
+	    0), 0, "Unexpected mallctl failure");
 
 	/*
 	 * Test the standard APIs using a huge size class, since we can't
@@ -175,8 +175,8 @@ TEST_BEGIN(test_decay_ticks)
 		tcache_sizes[1] = 1;
 
 		sz = sizeof(unsigned);
-		assert_d_eq(mallctl("tcache.create", &tcache_ind, &sz, NULL, 0),
-		    0, "Unexpected mallctl failure");
+		assert_d_eq(mallctl("tcache.create", (void *)&tcache_ind, &sz,
+		    NULL, 0), 0, "Unexpected mallctl failure");
 
 		for (i = 0; i < sizeof(tcache_sizes) / sizeof(size_t); i++) {
 			sz = tcache_sizes[i];
@@ -193,7 +193,7 @@ TEST_BEGIN(test_decay_ticks)
 			dallocx(p, MALLOCX_TCACHE(tcache_ind));
 			tick0 = ticker_read(decay_ticker);
 			assert_d_eq(mallctl("tcache.flush", NULL, NULL,
-			    &tcache_ind, sizeof(unsigned)), 0,
+			    (void *)&tcache_ind, sizeof(unsigned)), 0,
 			    "Unexpected mallctl failure");
 			tick1 = ticker_read(decay_ticker);
 			assert_u32_ne(tick1, tick0,
@@ -228,22 +228,22 @@ TEST_BEGIN(test_decay_ticker)
 		size_t tcache_max;
 
 		sz = sizeof(size_t);
-		assert_d_eq(mallctl("arenas.tcache_max", &tcache_max, &sz, NULL,
-		    0), 0, "Unexpected mallctl failure");
+		assert_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max,
+		    &sz, NULL, 0), 0, "Unexpected mallctl failure");
 		large = nallocx(tcache_max + 1, flags);
 	}  else {
 		sz = sizeof(size_t);
-		assert_d_eq(mallctl("arenas.lrun.0.size", &large, &sz, NULL, 0),
-		    0, "Unexpected mallctl failure");
+		assert_d_eq(mallctl("arenas.lrun.0.size", (void *)&large, &sz,
+		    NULL, 0), 0, "Unexpected mallctl failure");
 	}
 
 	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl failure");
-	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(uint64_t)), 0,
-	    "Unexpected mallctl failure");
+	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
+	    sizeof(uint64_t)), 0, "Unexpected mallctl failure");
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge0, &sz, NULL, 0),
-	    config_stats ? 0 : ENOENT, "Unexpected mallctl result");
+	assert_d_eq(mallctl("stats.arenas.0.npurge", (void *)&npurge0, &sz,
+	    NULL, 0), config_stats ? 0 : ENOENT, "Unexpected mallctl result");
 
 	for (i = 0; i < NPS; i++) {
 		ps[i] = mallocx(large, flags);
@@ -283,11 +283,11 @@ TEST_BEGIN(test_decay_ticker)
 			assert_ptr_not_null(p, "Unexpected mallocx() failure");
 			dallocx(p, flags);
 		}
-		assert_d_eq(mallctl("epoch", NULL, NULL, &epoch,
+		assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
 		    sizeof(uint64_t)), 0, "Unexpected mallctl failure");
 		sz = sizeof(uint64_t);
-		assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge1, &sz,
-		    NULL, 0), config_stats ? 0 : ENOENT,
+		assert_d_eq(mallctl("stats.arenas.0.npurge", (void *)&npurge1,
+		    &sz, NULL, 0), config_stats ? 0 : ENOENT,
 		    "Unexpected mallctl result");
 
 		nstime_update(&time);
@@ -313,16 +313,16 @@ TEST_BEGIN(test_decay_nonmonotonic)
 	test_skip_if(opt_purge != purge_mode_decay);
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("arenas.lrun.0.size", &large0, &sz, NULL, 0), 0,
-	    "Unexpected mallctl failure");
+	assert_d_eq(mallctl("arenas.lrun.0.size", (void *)&large0, &sz, NULL,
+	    0), 0, "Unexpected mallctl failure");
 
 	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl failure");
-	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(uint64_t)), 0,
-	    "Unexpected mallctl failure");
+	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
+	    sizeof(uint64_t)), 0, "Unexpected mallctl failure");
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge0, &sz, NULL, 0),
-	    config_stats ? 0 : ENOENT, "Unexpected mallctl result");
+	assert_d_eq(mallctl("stats.arenas.0.npurge", (void *)&npurge0, &sz,
+	    NULL, 0), config_stats ? 0 : ENOENT, "Unexpected mallctl result");
 
 	nupdates_mock = 0;
 	nstime_init(&time_mock, 0);
@@ -348,11 +348,11 @@ TEST_BEGIN(test_decay_nonmonotonic)
 		    "Expected nstime_update() to be called");
 	}
 
-	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(uint64_t)), 0,
-	    "Unexpected mallctl failure");
+	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
+	    sizeof(uint64_t)), 0, "Unexpected mallctl failure");
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge1, &sz, NULL, 0),
-	    config_stats ? 0 : ENOENT, "Unexpected mallctl result");
+	assert_d_eq(mallctl("stats.arenas.0.npurge", (void *)&npurge1, &sz,
+	    NULL, 0), config_stats ? 0 : ENOENT, "Unexpected mallctl result");
 
 	if (config_stats)
 		assert_u64_eq(npurge0, npurge1, "Unexpected purging occurred");
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
old mode 100644
new mode 100755
index 69f8c20c..2353c92c
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -12,16 +12,18 @@ TEST_BEGIN(test_mallctl_errors)
 	    EPERM, "mallctl() should return EPERM on attempt to write "
 	    "read-only value");
 
-	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)-1),
-	    EINVAL, "mallctl() should return EINVAL for input size mismatch");
-	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)+1),
-	    EINVAL, "mallctl() should return EINVAL for input size mismatch");
+	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
+	    sizeof(epoch)-1), EINVAL,
+	    "mallctl() should return EINVAL for input size mismatch");
+	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
+	    sizeof(epoch)+1), EINVAL,
+	    "mallctl() should return EINVAL for input size mismatch");
 
 	sz = sizeof(epoch)-1;
-	assert_d_eq(mallctl("epoch", &epoch, &sz, NULL, 0), EINVAL,
+	assert_d_eq(mallctl("epoch", (void *)&epoch, &sz, NULL, 0), EINVAL,
 	    "mallctl() should return EINVAL for output size mismatch");
 	sz = sizeof(epoch)+1;
-	assert_d_eq(mallctl("epoch", &epoch, &sz, NULL, 0), EINVAL,
+	assert_d_eq(mallctl("epoch", (void *)&epoch, &sz, NULL, 0), EINVAL,
 	    "mallctl() should return EINVAL for output size mismatch");
 }
 TEST_END
@@ -56,18 +58,20 @@ TEST_BEGIN(test_mallctlbymib_errors)
 	assert_d_eq(mallctlnametomib("epoch", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &epoch,
+	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&epoch,
 	    sizeof(epoch)-1), EINVAL,
 	    "mallctlbymib() should return EINVAL for input size mismatch");
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &epoch,
+	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&epoch,
 	    sizeof(epoch)+1), EINVAL,
 	    "mallctlbymib() should return EINVAL for input size mismatch");
 
 	sz = sizeof(epoch)-1;
-	assert_d_eq(mallctlbymib(mib, miblen, &epoch, &sz, NULL, 0), EINVAL,
+	assert_d_eq(mallctlbymib(mib, miblen, (void *)&epoch, &sz, NULL, 0),
+	    EINVAL,
 	    "mallctlbymib() should return EINVAL for output size mismatch");
 	sz = sizeof(epoch)+1;
-	assert_d_eq(mallctlbymib(mib, miblen, &epoch, &sz, NULL, 0), EINVAL,
+	assert_d_eq(mallctlbymib(mib, miblen, (void *)&epoch, &sz, NULL, 0),
+	    EINVAL,
 	    "mallctlbymib() should return EINVAL for output size mismatch");
 }
 TEST_END
@@ -83,18 +87,19 @@ TEST_BEGIN(test_mallctl_read_write)
 	assert_zu_eq(sz, sizeof(old_epoch), "Unexpected output size");
 
 	/* Read. */
-	assert_d_eq(mallctl("epoch", &old_epoch, &sz, NULL, 0), 0,
+	assert_d_eq(mallctl("epoch", (void *)&old_epoch, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 	assert_zu_eq(sz, sizeof(old_epoch), "Unexpected output size");
 
 	/* Write. */
-	assert_d_eq(mallctl("epoch", NULL, NULL, &new_epoch, sizeof(new_epoch)),
-	    0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&new_epoch,
+	    sizeof(new_epoch)), 0, "Unexpected mallctl() failure");
 	assert_zu_eq(sz, sizeof(old_epoch), "Unexpected output size");
 
 	/* Read+write. */
-	assert_d_eq(mallctl("epoch", &old_epoch, &sz, &new_epoch,
-	    sizeof(new_epoch)), 0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("epoch", (void *)&old_epoch, &sz,
+	    (void *)&new_epoch, sizeof(new_epoch)), 0,
+	    "Unexpected mallctl() failure");
 	assert_zu_eq(sz, sizeof(old_epoch), "Unexpected output size");
 }
 TEST_END
@@ -120,8 +125,8 @@ TEST_BEGIN(test_mallctl_config)
 #define	TEST_MALLCTL_CONFIG(config, t) do {				\
 	t oldval;							\
 	size_t sz = sizeof(oldval);					\
-	assert_d_eq(mallctl("config."#config, &oldval, &sz, NULL, 0),	\
-	    0, "Unexpected mallctl() failure");				\
+	assert_d_eq(mallctl("config."#config, (void *)&oldval, &sz,	\
+	    NULL, 0), 0, "Unexpected mallctl() failure");		\
 	assert_b_eq(oldval, config_##config, "Incorrect config value");	\
 	assert_zu_eq(sz, sizeof(oldval), "Unexpected output size");	\
 } while (0)
@@ -154,7 +159,8 @@ TEST_BEGIN(test_mallctl_opt)
 	t oldval;							\
 	size_t sz = sizeof(oldval);					\
 	int expected = config_##config ? 0 : ENOENT;			\
-	int result = mallctl("opt."#opt, &oldval, &sz, NULL, 0);	\
+	int result = mallctl("opt."#opt, (void *)&oldval, &sz, NULL,	\
+	    0);								\
 	assert_d_eq(result, expected,					\
 	    "Unexpected mallctl() result for opt."#opt);		\
 	assert_zu_eq(sz, sizeof(oldval), "Unexpected output size");	\
@@ -197,7 +203,7 @@ TEST_BEGIN(test_manpage_example)
 	size_t len, miblen;
 
 	len = sizeof(nbins);
-	assert_d_eq(mallctl("arenas.nbins", &nbins, &len, NULL, 0), 0,
+	assert_d_eq(mallctl("arenas.nbins", (void *)&nbins, &len, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
 	miblen = 4;
@@ -208,8 +214,8 @@ TEST_BEGIN(test_manpage_example)
 
 		mib[2] = i;
 		len = sizeof(bin_size);
-		assert_d_eq(mallctlbymib(mib, miblen, &bin_size, &len, NULL, 0),
-		    0, "Unexpected mallctlbymib() failure");
+		assert_d_eq(mallctlbymib(mib, miblen, (void *)&bin_size, &len,
+		    NULL, 0), 0, "Unexpected mallctlbymib() failure");
 		/* Do something with bin_size... */
 	}
 }
@@ -258,25 +264,25 @@ TEST_BEGIN(test_tcache)
 	/* Create tcaches. */
 	for (i = 0; i < NTCACHES; i++) {
 		sz = sizeof(unsigned);
-		assert_d_eq(mallctl("tcache.create", &tis[i], &sz, NULL, 0), 0,
-		    "Unexpected mallctl() failure, i=%u", i);
+		assert_d_eq(mallctl("tcache.create", (void *)&tis[i], &sz, NULL,
+		    0), 0, "Unexpected mallctl() failure, i=%u", i);
 	}
 
 	/* Exercise tcache ID recycling. */
 	for (i = 0; i < NTCACHES; i++) {
-		assert_d_eq(mallctl("tcache.destroy", NULL, NULL, &tis[i],
-		    sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u",
-		    i);
+		assert_d_eq(mallctl("tcache.destroy", NULL, NULL,
+		    (void *)&tis[i], sizeof(unsigned)), 0,
+		    "Unexpected mallctl() failure, i=%u", i);
 	}
 	for (i = 0; i < NTCACHES; i++) {
 		sz = sizeof(unsigned);
-		assert_d_eq(mallctl("tcache.create", &tis[i], &sz, NULL, 0), 0,
-		    "Unexpected mallctl() failure, i=%u", i);
+		assert_d_eq(mallctl("tcache.create", (void *)&tis[i], &sz, NULL,
+		    0), 0, "Unexpected mallctl() failure, i=%u", i);
 	}
 
 	/* Flush empty tcaches. */
 	for (i = 0; i < NTCACHES; i++) {
-		assert_d_eq(mallctl("tcache.flush", NULL, NULL, &tis[i],
+		assert_d_eq(mallctl("tcache.flush", NULL, NULL, (void *)&tis[i],
 		    sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u",
 		    i);
 	}
@@ -321,16 +327,16 @@ TEST_BEGIN(test_tcache)
 
 	/* Flush some non-empty tcaches. */
 	for (i = 0; i < NTCACHES/2; i++) {
-		assert_d_eq(mallctl("tcache.flush", NULL, NULL, &tis[i],
+		assert_d_eq(mallctl("tcache.flush", NULL, NULL, (void *)&tis[i],
 		    sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u",
 		    i);
 	}
 
 	/* Destroy tcaches. */
 	for (i = 0; i < NTCACHES; i++) {
-		assert_d_eq(mallctl("tcache.destroy", NULL, NULL, &tis[i],
-		    sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u",
-		    i);
+		assert_d_eq(mallctl("tcache.destroy", NULL, NULL,
+		    (void *)&tis[i], sizeof(unsigned)), 0,
+		    "Unexpected mallctl() failure, i=%u", i);
 	}
 }
 TEST_END
@@ -340,15 +346,17 @@ TEST_BEGIN(test_thread_arena)
 	unsigned arena_old, arena_new, narenas;
 	size_t sz = sizeof(unsigned);
 
-	assert_d_eq(mallctl("arenas.narenas", &narenas, &sz, NULL, 0), 0,
-	    "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
 	assert_u_eq(narenas, opt_narenas, "Number of arenas incorrect");
 	arena_new = narenas - 1;
-	assert_d_eq(mallctl("thread.arena", &arena_old, &sz, &arena_new,
-	    sizeof(unsigned)), 0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("thread.arena", (void *)&arena_old, &sz,
+	    (void *)&arena_new, sizeof(unsigned)), 0,
+	    "Unexpected mallctl() failure");
 	arena_new = 0;
-	assert_d_eq(mallctl("thread.arena", &arena_old, &sz, &arena_new,
-	    sizeof(unsigned)), 0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("thread.arena", (void *)&arena_old, &sz,
+	    (void *)&arena_new, sizeof(unsigned)), 0,
+	    "Unexpected mallctl() failure");
 }
 TEST_END
 
@@ -359,17 +367,18 @@ TEST_BEGIN(test_arena_i_lg_dirty_mult)
 
 	test_skip_if(opt_purge != purge_mode_ratio);
 
-	assert_d_eq(mallctl("arena.0.lg_dirty_mult", &orig_lg_dirty_mult, &sz,
-	    NULL, 0), 0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("arena.0.lg_dirty_mult",
+	    (void *)&orig_lg_dirty_mult, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
 
 	lg_dirty_mult = -2;
 	assert_d_eq(mallctl("arena.0.lg_dirty_mult", NULL, NULL,
-	    &lg_dirty_mult, sizeof(ssize_t)), EFAULT,
+	    (void *)&lg_dirty_mult, sizeof(ssize_t)), EFAULT,
 	    "Unexpected mallctl() success");
 
 	lg_dirty_mult = (sizeof(size_t) << 3);
 	assert_d_eq(mallctl("arena.0.lg_dirty_mult", NULL, NULL,
-	    &lg_dirty_mult, sizeof(ssize_t)), EFAULT,
+	    (void *)&lg_dirty_mult, sizeof(ssize_t)), EFAULT,
 	    "Unexpected mallctl() success");
 
 	for (prev_lg_dirty_mult = orig_lg_dirty_mult, lg_dirty_mult = -1;
@@ -377,9 +386,9 @@ TEST_BEGIN(test_arena_i_lg_dirty_mult)
 	    = lg_dirty_mult, lg_dirty_mult++) {
 		ssize_t old_lg_dirty_mult;
 
-		assert_d_eq(mallctl("arena.0.lg_dirty_mult", &old_lg_dirty_mult,
-		    &sz, &lg_dirty_mult, sizeof(ssize_t)), 0,
-		    "Unexpected mallctl() failure");
+		assert_d_eq(mallctl("arena.0.lg_dirty_mult",
+		    (void *)&old_lg_dirty_mult, &sz, (void *)&lg_dirty_mult,
+		    sizeof(ssize_t)), 0, "Unexpected mallctl() failure");
 		assert_zd_eq(old_lg_dirty_mult, prev_lg_dirty_mult,
 		    "Unexpected old arena.0.lg_dirty_mult");
 	}
@@ -393,25 +402,25 @@ TEST_BEGIN(test_arena_i_decay_time)
 
 	test_skip_if(opt_purge != purge_mode_decay);
 
-	assert_d_eq(mallctl("arena.0.decay_time", &orig_decay_time, &sz,
+	assert_d_eq(mallctl("arena.0.decay_time", (void *)&orig_decay_time, &sz,
 	    NULL, 0), 0, "Unexpected mallctl() failure");
 
 	decay_time = -2;
 	assert_d_eq(mallctl("arena.0.decay_time", NULL, NULL,
-	    &decay_time, sizeof(ssize_t)), EFAULT,
+	    (void *)&decay_time, sizeof(ssize_t)), EFAULT,
 	    "Unexpected mallctl() success");
 
 	decay_time = 0x7fffffff;
 	assert_d_eq(mallctl("arena.0.decay_time", NULL, NULL,
-	    &decay_time, sizeof(ssize_t)), 0,
+	    (void *)&decay_time, sizeof(ssize_t)), 0,
 	    "Unexpected mallctl() failure");
 
 	for (prev_decay_time = decay_time, decay_time = -1;
 	    decay_time < 20; prev_decay_time = decay_time, decay_time++) {
 		ssize_t old_decay_time;
 
-		assert_d_eq(mallctl("arena.0.decay_time", &old_decay_time,
-		    &sz, &decay_time, sizeof(ssize_t)), 0,
+		assert_d_eq(mallctl("arena.0.decay_time", (void *)&old_decay_time,
+		    &sz, (void *)&decay_time, sizeof(ssize_t)), 0,
 		    "Unexpected mallctl() failure");
 		assert_zd_eq(old_decay_time, prev_decay_time,
 		    "Unexpected old arena.0.decay_time");
@@ -429,8 +438,8 @@ TEST_BEGIN(test_arena_i_purge)
 	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
-	assert_d_eq(mallctl("arenas.narenas", &narenas, &sz, NULL, 0), 0,
-	    "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
 	assert_d_eq(mallctlnametomib("arena.0.purge", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = narenas;
@@ -449,8 +458,8 @@ TEST_BEGIN(test_arena_i_decay)
 	assert_d_eq(mallctl("arena.0.decay", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
-	assert_d_eq(mallctl("arenas.narenas", &narenas, &sz, NULL, 0), 0,
-	    "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
 	assert_d_eq(mallctlnametomib("arena.0.decay", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = narenas;
@@ -471,31 +480,35 @@ TEST_BEGIN(test_arena_i_dss)
 	    "Unexpected mallctlnametomib() error");
 
 	dss_prec_new = "disabled";
-	assert_d_eq(mallctlbymib(mib, miblen, &dss_prec_old, &sz, &dss_prec_new,
-	    sizeof(dss_prec_new)), 0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz,
+	    (void *)&dss_prec_new, sizeof(dss_prec_new)), 0,
+	    "Unexpected mallctl() failure");
 	assert_str_ne(dss_prec_old, "primary",
 	    "Unexpected default for dss precedence");
 
-	assert_d_eq(mallctlbymib(mib, miblen, &dss_prec_new, &sz, &dss_prec_old,
-	    sizeof(dss_prec_old)), 0, "Unexpected mallctl() failure");
-
-	assert_d_eq(mallctlbymib(mib, miblen, &dss_prec_old, &sz, NULL, 0), 0,
+	assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_new, &sz,
+	    (void *)&dss_prec_old, sizeof(dss_prec_old)), 0,
 	    "Unexpected mallctl() failure");
+
+	assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz, NULL,
+	    0), 0, "Unexpected mallctl() failure");
 	assert_str_ne(dss_prec_old, "primary",
 	    "Unexpected value for dss precedence");
 
 	mib[1] = narenas_total_get();
 	dss_prec_new = "disabled";
-	assert_d_eq(mallctlbymib(mib, miblen, &dss_prec_old, &sz, &dss_prec_new,
-	    sizeof(dss_prec_new)), 0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz,
+	    (void *)&dss_prec_new, sizeof(dss_prec_new)), 0,
+	    "Unexpected mallctl() failure");
 	assert_str_ne(dss_prec_old, "primary",
 	    "Unexpected default for dss precedence");
 
-	assert_d_eq(mallctlbymib(mib, miblen, &dss_prec_new, &sz, &dss_prec_old,
-	    sizeof(dss_prec_new)), 0, "Unexpected mallctl() failure");
-
-	assert_d_eq(mallctlbymib(mib, miblen, &dss_prec_old, &sz, NULL, 0), 0,
+	assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_new, &sz,
+	    (void *)&dss_prec_old, sizeof(dss_prec_new)), 0,
 	    "Unexpected mallctl() failure");
+
+	assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz, NULL,
+	    0), 0, "Unexpected mallctl() failure");
 	assert_str_ne(dss_prec_old, "primary",
 	    "Unexpected value for dss precedence");
 }
@@ -506,14 +519,14 @@ TEST_BEGIN(test_arenas_initialized)
 	unsigned narenas;
 	size_t sz = sizeof(narenas);
 
-	assert_d_eq(mallctl("arenas.narenas", &narenas, &sz, NULL, 0), 0,
-	    "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
 	{
 		VARIABLE_ARRAY(bool, initialized, narenas);
 
 		sz = narenas * sizeof(bool);
-		assert_d_eq(mallctl("arenas.initialized", initialized, &sz,
-		    NULL, 0), 0, "Unexpected mallctl() failure");
+		assert_d_eq(mallctl("arenas.initialized", (void *)initialized,
+		    &sz, NULL, 0), 0, "Unexpected mallctl() failure");
 	}
 }
 TEST_END
@@ -525,17 +538,17 @@ TEST_BEGIN(test_arenas_lg_dirty_mult)
 
 	test_skip_if(opt_purge != purge_mode_ratio);
 
-	assert_d_eq(mallctl("arenas.lg_dirty_mult", &orig_lg_dirty_mult, &sz,
-	    NULL, 0), 0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("arenas.lg_dirty_mult", (void *)&orig_lg_dirty_mult,
+	    &sz, NULL, 0), 0, "Unexpected mallctl() failure");
 
 	lg_dirty_mult = -2;
 	assert_d_eq(mallctl("arenas.lg_dirty_mult", NULL, NULL,
-	    &lg_dirty_mult, sizeof(ssize_t)), EFAULT,
+	    (void *)&lg_dirty_mult, sizeof(ssize_t)), EFAULT,
 	    "Unexpected mallctl() success");
 
 	lg_dirty_mult = (sizeof(size_t) << 3);
 	assert_d_eq(mallctl("arenas.lg_dirty_mult", NULL, NULL,
-	    &lg_dirty_mult, sizeof(ssize_t)), EFAULT,
+	    (void *)&lg_dirty_mult, sizeof(ssize_t)), EFAULT,
 	    "Unexpected mallctl() success");
 
 	for (prev_lg_dirty_mult = orig_lg_dirty_mult, lg_dirty_mult = -1;
@@ -543,9 +556,9 @@ TEST_BEGIN(test_arenas_lg_dirty_mult)
 	    lg_dirty_mult, lg_dirty_mult++) {
 		ssize_t old_lg_dirty_mult;
 
-		assert_d_eq(mallctl("arenas.lg_dirty_mult", &old_lg_dirty_mult,
-		    &sz, &lg_dirty_mult, sizeof(ssize_t)), 0,
-		    "Unexpected mallctl() failure");
+		assert_d_eq(mallctl("arenas.lg_dirty_mult",
+		    (void *)&old_lg_dirty_mult, &sz, (void *)&lg_dirty_mult,
+		    sizeof(ssize_t)), 0, "Unexpected mallctl() failure");
 		assert_zd_eq(old_lg_dirty_mult, prev_lg_dirty_mult,
 		    "Unexpected old arenas.lg_dirty_mult");
 	}
@@ -559,26 +572,26 @@ TEST_BEGIN(test_arenas_decay_time)
 
 	test_skip_if(opt_purge != purge_mode_decay);
 
-	assert_d_eq(mallctl("arenas.decay_time", &orig_decay_time, &sz,
+	assert_d_eq(mallctl("arenas.decay_time", (void *)&orig_decay_time, &sz,
 	    NULL, 0), 0, "Unexpected mallctl() failure");
 
 	decay_time = -2;
 	assert_d_eq(mallctl("arenas.decay_time", NULL, NULL,
-	    &decay_time, sizeof(ssize_t)), EFAULT,
+	    (void *)&decay_time, sizeof(ssize_t)), EFAULT,
 	    "Unexpected mallctl() success");
 
 	decay_time = 0x7fffffff;
 	assert_d_eq(mallctl("arenas.decay_time", NULL, NULL,
-	    &decay_time, sizeof(ssize_t)), 0,
+	    (void *)&decay_time, sizeof(ssize_t)), 0,
 	    "Expected mallctl() failure");
 
 	for (prev_decay_time = decay_time, decay_time = -1;
 	    decay_time < 20; prev_decay_time = decay_time, decay_time++) {
 		ssize_t old_decay_time;
 
-		assert_d_eq(mallctl("arenas.decay_time", &old_decay_time,
-		    &sz, &decay_time, sizeof(ssize_t)), 0,
-		    "Unexpected mallctl() failure");
+		assert_d_eq(mallctl("arenas.decay_time",
+		    (void *)&old_decay_time, &sz, (void *)&decay_time,
+		    sizeof(ssize_t)), 0, "Unexpected mallctl() failure");
 		assert_zd_eq(old_decay_time, prev_decay_time,
 		    "Unexpected old arenas.decay_time");
 	}
@@ -591,8 +604,8 @@ TEST_BEGIN(test_arenas_constants)
 #define	TEST_ARENAS_CONSTANT(t, name, expected) do {			\
 	t name;								\
 	size_t sz = sizeof(t);						\
-	assert_d_eq(mallctl("arenas."#name, &name, &sz, NULL, 0), 0,	\
-	    "Unexpected mallctl() failure");				\
+	assert_d_eq(mallctl("arenas."#name, (void *)&name, &sz, NULL,	\
+	    0), 0, "Unexpected mallctl() failure");			\
 	assert_zu_eq(name, expected, "Incorrect "#name" size");		\
 } while (0)
 
@@ -612,8 +625,8 @@ TEST_BEGIN(test_arenas_bin_constants)
 #define	TEST_ARENAS_BIN_CONSTANT(t, name, expected) do {		\
 	t name;								\
 	size_t sz = sizeof(t);						\
-	assert_d_eq(mallctl("arenas.bin.0."#name, &name, &sz, NULL, 0),	\
-	    0, "Unexpected mallctl() failure");				\
+	assert_d_eq(mallctl("arenas.bin.0."#name, (void *)&name, &sz,	\
+	    NULL, 0), 0, "Unexpected mallctl() failure");		\
 	assert_zu_eq(name, expected, "Incorrect "#name" size");		\
 } while (0)
 
@@ -631,8 +644,8 @@ TEST_BEGIN(test_arenas_lrun_constants)
 #define	TEST_ARENAS_LRUN_CONSTANT(t, name, expected) do {		\
 	t name;								\
 	size_t sz = sizeof(t);						\
-	assert_d_eq(mallctl("arenas.lrun.0."#name, &name, &sz, NULL,	\
-	    0), 0, "Unexpected mallctl() failure");			\
+	assert_d_eq(mallctl("arenas.lrun.0."#name, (void *)&name, &sz,	\
+	    NULL, 0), 0, "Unexpected mallctl() failure");		\
 	assert_zu_eq(name, expected, "Incorrect "#name" size");		\
 } while (0)
 
@@ -648,8 +661,8 @@ TEST_BEGIN(test_arenas_hchunk_constants)
 #define	TEST_ARENAS_HCHUNK_CONSTANT(t, name, expected) do {		\
 	t name;								\
 	size_t sz = sizeof(t);						\
-	assert_d_eq(mallctl("arenas.hchunk.0."#name, &name, &sz, NULL,	\
-	    0), 0, "Unexpected mallctl() failure");			\
+	assert_d_eq(mallctl("arenas.hchunk.0."#name, (void *)&name,	\
+	    &sz, NULL, 0), 0, "Unexpected mallctl() failure");		\
 	assert_zu_eq(name, expected, "Incorrect "#name" size");		\
 } while (0)
 
@@ -664,12 +677,12 @@ TEST_BEGIN(test_arenas_extend)
 	unsigned narenas_before, arena, narenas_after;
 	size_t sz = sizeof(unsigned);
 
-	assert_d_eq(mallctl("arenas.narenas", &narenas_before, &sz, NULL, 0), 0,
-	    "Unexpected mallctl() failure");
-	assert_d_eq(mallctl("arenas.extend", &arena, &sz, NULL, 0), 0,
-	    "Unexpected mallctl() failure");
-	assert_d_eq(mallctl("arenas.narenas", &narenas_after, &sz, NULL, 0), 0,
+	assert_d_eq(mallctl("arenas.narenas", (void *)&narenas_before, &sz,
+	    NULL, 0), 0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("arenas.extend", (void *)&arena, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("arenas.narenas", (void *)&narenas_after, &sz, NULL,
+	    0), 0, "Unexpected mallctl() failure");
 
 	assert_u_eq(narenas_before+1, narenas_after,
 	    "Unexpected number of arenas before versus after extension");
@@ -683,8 +696,8 @@ TEST_BEGIN(test_stats_arenas)
 #define	TEST_STATS_ARENAS(t, name) do {					\
 	t name;								\
 	size_t sz = sizeof(t);						\
-	assert_d_eq(mallctl("stats.arenas.0."#name, &name, &sz, NULL,	\
-	    0), 0, "Unexpected mallctl() failure");			\
+	assert_d_eq(mallctl("stats.arenas.0."#name, (void *)&name, &sz,	\
+	    NULL, 0), 0, "Unexpected mallctl() failure");		\
 } while (0)
 
 	TEST_STATS_ARENAS(unsigned, nthreads);
diff --git a/test/unit/pack.c b/test/unit/pack.c
index 77ef1437..0b6ffcd2 100644
--- a/test/unit/pack.c
+++ b/test/unit/pack.c
@@ -31,7 +31,7 @@ binind_compute(void)
 	unsigned nbins, i;
 
 	sz = sizeof(nbins);
-	assert_d_eq(mallctl("arenas.nbins", &nbins, &sz, NULL, 0), 0,
+	assert_d_eq(mallctl("arenas.nbins", (void *)&nbins, &sz, NULL, 0), 0,
 	    "Unexpected mallctl failure");
 
 	for (i = 0; i < nbins; i++) {
@@ -44,8 +44,8 @@ binind_compute(void)
 		mib[2] = (size_t)i;
 
 		sz = sizeof(size);
-		assert_d_eq(mallctlbymib(mib, miblen, &size, &sz, NULL, 0), 0,
-		    "Unexpected mallctlbymib failure");
+		assert_d_eq(mallctlbymib(mib, miblen, (void *)&size, &sz, NULL,
+		    0), 0, "Unexpected mallctlbymib failure");
 		if (size == SZ)
 			return (i);
 	}
@@ -67,7 +67,7 @@ nregs_per_run_compute(void)
 	    "Unexpected mallctlnametomb failure");
 	mib[2] = (size_t)binind;
 	sz = sizeof(nregs);
-	assert_d_eq(mallctlbymib(mib, miblen, &nregs, &sz, NULL,
+	assert_d_eq(mallctlbymib(mib, miblen, (void *)&nregs, &sz, NULL,
 	    0), 0, "Unexpected mallctlbymib failure");
 	return (nregs);
 }
@@ -85,7 +85,7 @@ npages_per_run_compute(void)
 	    "Unexpected mallctlnametomb failure");
 	mib[2] = (size_t)binind;
 	sz = sizeof(run_size);
-	assert_d_eq(mallctlbymib(mib, miblen, &run_size, &sz, NULL,
+	assert_d_eq(mallctlbymib(mib, miblen, (void *)&run_size, &sz, NULL,
 	    0), 0, "Unexpected mallctlbymib failure");
 	return (run_size >> LG_PAGE);
 }
@@ -111,8 +111,8 @@ arenas_extend_mallctl(void)
 	size_t sz;
 
 	sz = sizeof(arena_ind);
-	assert_d_eq(mallctl("arenas.extend", &arena_ind, &sz, NULL, 0), 0,
-	    "Error in arenas.extend");
+	assert_d_eq(mallctl("arenas.extend", (void *)&arena_ind, &sz, NULL, 0),
+	    0, "Error in arenas.extend");
 
 	return (arena_ind);
 }
diff --git a/test/unit/prof_accum.c b/test/unit/prof_accum.c
old mode 100644
new mode 100755
index fd229e0f..d941b5bc
--- a/test/unit/prof_accum.c
+++ b/test/unit/prof_accum.c
@@ -68,8 +68,9 @@ TEST_BEGIN(test_idump)
 	test_skip_if(!config_prof);
 
 	active = true;
-	assert_d_eq(mallctl("prof.active", NULL, NULL, &active, sizeof(active)),
-	    0, "Unexpected mallctl failure while activating profiling");
+	assert_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
+	    sizeof(active)), 0,
+	    "Unexpected mallctl failure while activating profiling");
 
 	prof_dump_open = prof_dump_open_intercept;
 
diff --git a/test/unit/prof_active.c b/test/unit/prof_active.c
old mode 100644
new mode 100755
index 81490957..d00943a4
--- a/test/unit/prof_active.c
+++ b/test/unit/prof_active.c
@@ -12,7 +12,7 @@ mallctl_bool_get(const char *name, bool expected, const char *func, int line)
 	size_t sz;
 
 	sz = sizeof(old);
-	assert_d_eq(mallctl(name, &old, &sz, NULL, 0), 0,
+	assert_d_eq(mallctl(name, (void *)&old, &sz, NULL, 0), 0,
 	    "%s():%d: Unexpected mallctl failure reading %s", func, line, name);
 	assert_b_eq(old, expected, "%s():%d: Unexpected %s value", func, line,
 	    name);
@@ -26,7 +26,8 @@ mallctl_bool_set(const char *name, bool old_expected, bool val_new,
 	size_t sz;
 
 	sz = sizeof(old);
-	assert_d_eq(mallctl(name, &old, &sz, &val_new, sizeof(val_new)), 0,
+	assert_d_eq(mallctl(name, (void *)&old, &sz, (void *)&val_new,
+	    sizeof(val_new)), 0,
 	    "%s():%d: Unexpected mallctl failure reading/writing %s", func,
 	    line, name);
 	assert_b_eq(old, old_expected, "%s():%d: Unexpected %s value", func,
diff --git a/test/unit/prof_gdump.c b/test/unit/prof_gdump.c
old mode 100644
new mode 100755
index a0e6ee92..996cb670
--- a/test/unit/prof_gdump.c
+++ b/test/unit/prof_gdump.c
@@ -28,8 +28,9 @@ TEST_BEGIN(test_gdump)
 	test_skip_if(!config_prof);
 
 	active = true;
-	assert_d_eq(mallctl("prof.active", NULL, NULL, &active, sizeof(active)),
-	    0, "Unexpected mallctl failure while activating profiling");
+	assert_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
+	    sizeof(active)), 0,
+	    "Unexpected mallctl failure while activating profiling");
 
 	prof_dump_open = prof_dump_open_intercept;
 
@@ -45,8 +46,8 @@ TEST_BEGIN(test_gdump)
 
 	gdump = false;
 	sz = sizeof(gdump_old);
-	assert_d_eq(mallctl("prof.gdump", &gdump_old, &sz, &gdump,
-	    sizeof(gdump)), 0,
+	assert_d_eq(mallctl("prof.gdump", (void *)&gdump_old, &sz,
+	    (void *)&gdump, sizeof(gdump)), 0,
 	    "Unexpected mallctl failure while disabling prof.gdump");
 	assert(gdump_old);
 	did_prof_dump_open = false;
@@ -56,8 +57,8 @@ TEST_BEGIN(test_gdump)
 
 	gdump = true;
 	sz = sizeof(gdump_old);
-	assert_d_eq(mallctl("prof.gdump", &gdump_old, &sz, &gdump,
-	    sizeof(gdump)), 0,
+	assert_d_eq(mallctl("prof.gdump", (void *)&gdump_old, &sz,
+	    (void *)&gdump, sizeof(gdump)), 0,
 	    "Unexpected mallctl failure while enabling prof.gdump");
 	assert(!gdump_old);
 	did_prof_dump_open = false;
diff --git a/test/unit/prof_idump.c b/test/unit/prof_idump.c
old mode 100644
new mode 100755
index bdea53ec..16c6462d
--- a/test/unit/prof_idump.c
+++ b/test/unit/prof_idump.c
@@ -29,8 +29,9 @@ TEST_BEGIN(test_idump)
 	test_skip_if(!config_prof);
 
 	active = true;
-	assert_d_eq(mallctl("prof.active", NULL, NULL, &active, sizeof(active)),
-	    0, "Unexpected mallctl failure while activating profiling");
+	assert_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
+	    sizeof(active)), 0,
+	    "Unexpected mallctl failure while activating profiling");
 
 	prof_dump_open = prof_dump_open_intercept;
 
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
old mode 100644
new mode 100755
index 5ae45fd2..59d70796
--- a/test/unit/prof_reset.c
+++ b/test/unit/prof_reset.c
@@ -20,8 +20,8 @@ static void
 set_prof_active(bool active)
 {
 
-	assert_d_eq(mallctl("prof.active", NULL, NULL, &active, sizeof(active)),
-	    0, "Unexpected mallctl failure");
+	assert_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
+	    sizeof(active)), 0, "Unexpected mallctl failure");
 }
 
 static size_t
@@ -30,7 +30,8 @@ get_lg_prof_sample(void)
 	size_t lg_prof_sample;
 	size_t sz = sizeof(size_t);
 
-	assert_d_eq(mallctl("prof.lg_sample", &lg_prof_sample, &sz, NULL, 0), 0,
+	assert_d_eq(mallctl("prof.lg_sample", (void *)&lg_prof_sample, &sz,
+	    NULL, 0), 0,
 	    "Unexpected mallctl failure while reading profiling sample rate");
 	return (lg_prof_sample);
 }
@@ -39,7 +40,7 @@ static void
 do_prof_reset(size_t lg_prof_sample)
 {
 	assert_d_eq(mallctl("prof.reset", NULL, NULL,
-	    &lg_prof_sample, sizeof(size_t)), 0,
+	    (void *)&lg_prof_sample, sizeof(size_t)), 0,
 	    "Unexpected mallctl failure while resetting profile data");
 	assert_zu_eq(lg_prof_sample, get_lg_prof_sample(),
 	    "Expected profile sample rate change");
@@ -54,8 +55,8 @@ TEST_BEGIN(test_prof_reset_basic)
 	test_skip_if(!config_prof);
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("opt.lg_prof_sample", &lg_prof_sample_orig, &sz,
-	    NULL, 0), 0,
+	assert_d_eq(mallctl("opt.lg_prof_sample", (void *)&lg_prof_sample_orig,
+	    &sz, NULL, 0), 0,
 	    "Unexpected mallctl failure while reading profiling sample rate");
 	assert_zu_eq(lg_prof_sample_orig, 0,
 	    "Unexpected profiling sample rate");
diff --git a/test/unit/prof_thread_name.c b/test/unit/prof_thread_name.c
old mode 100644
new mode 100755
index f501158d..9ec54977
--- a/test/unit/prof_thread_name.c
+++ b/test/unit/prof_thread_name.c
@@ -12,8 +12,9 @@ mallctl_thread_name_get_impl(const char *thread_name_expected, const char *func,
 	size_t sz;
 
 	sz = sizeof(thread_name_old);
-	assert_d_eq(mallctl("thread.prof.name", &thread_name_old, &sz, NULL, 0),
-	    0, "%s():%d: Unexpected mallctl failure reading thread.prof.name",
+	assert_d_eq(mallctl("thread.prof.name", (void *)&thread_name_old, &sz,
+	    NULL, 0), 0,
+	    "%s():%d: Unexpected mallctl failure reading thread.prof.name",
 	    func, line);
 	assert_str_eq(thread_name_old, thread_name_expected,
 	    "%s():%d: Unexpected thread.prof.name value", func, line);
@@ -26,8 +27,8 @@ mallctl_thread_name_set_impl(const char *thread_name, const char *func,
     int line)
 {
 
-	assert_d_eq(mallctl("thread.prof.name", NULL, NULL, &thread_name,
-	    sizeof(thread_name)), 0,
+	assert_d_eq(mallctl("thread.prof.name", NULL, NULL,
+	    (void *)&thread_name, sizeof(thread_name)), 0,
 	    "%s():%d: Unexpected mallctl failure reading thread.prof.name",
 	    func, line);
 	mallctl_thread_name_get_impl(thread_name, func, line);
@@ -46,15 +47,15 @@ TEST_BEGIN(test_prof_thread_name_validation)
 
 	/* NULL input shouldn't be allowed. */
 	thread_name = NULL;
-	assert_d_eq(mallctl("thread.prof.name", NULL, NULL, &thread_name,
-	    sizeof(thread_name)), EFAULT,
+	assert_d_eq(mallctl("thread.prof.name", NULL, NULL,
+	    (void *)&thread_name, sizeof(thread_name)), EFAULT,
 	    "Unexpected mallctl result writing \"%s\" to thread.prof.name",
 	    thread_name);
 
 	/* '\n' shouldn't be allowed. */
 	thread_name = "hi\nthere";
-	assert_d_eq(mallctl("thread.prof.name", NULL, NULL, &thread_name,
-	    sizeof(thread_name)), EFAULT,
+	assert_d_eq(mallctl("thread.prof.name", NULL, NULL,
+	    (void *)&thread_name, sizeof(thread_name)), EFAULT,
 	    "Unexpected mallctl result writing \"%s\" to thread.prof.name",
 	    thread_name);
 
@@ -64,8 +65,9 @@ TEST_BEGIN(test_prof_thread_name_validation)
 		size_t sz;
 
 		sz = sizeof(thread_name_old);
-		assert_d_eq(mallctl("thread.prof.name", &thread_name_old, &sz,
-		    &thread_name, sizeof(thread_name)), EPERM,
+		assert_d_eq(mallctl("thread.prof.name",
+		    (void *)&thread_name_old, &sz, (void *)&thread_name,
+		    sizeof(thread_name)), EPERM,
 		    "Unexpected mallctl result writing \"%s\" to "
 		    "thread.prof.name", thread_name);
 	}
diff --git a/test/unit/run_quantize.c b/test/unit/run_quantize.c
index b1ca6356..089176f3 100644
--- a/test/unit/run_quantize.c
+++ b/test/unit/run_quantize.c
@@ -13,7 +13,7 @@ TEST_BEGIN(test_small_run_size)
 	 */
 
 	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.nbins", &nbins, &sz, NULL, 0), 0,
+	assert_d_eq(mallctl("arenas.nbins", (void *)&nbins, &sz, NULL, 0), 0,
 	    "Unexpected mallctl failure");
 
 	assert_d_eq(mallctlnametomib("arenas.bin.0.run_size", mib, &miblen), 0,
@@ -21,8 +21,8 @@ TEST_BEGIN(test_small_run_size)
 	for (i = 0; i < nbins; i++) {
 		mib[2] = i;
 		sz = sizeof(size_t);
-		assert_d_eq(mallctlbymib(mib, miblen, &run_size, &sz, NULL, 0),
-		    0, "Unexpected mallctlbymib failure");
+		assert_d_eq(mallctlbymib(mib, miblen, (void *)&run_size, &sz,
+		    NULL, 0), 0, "Unexpected mallctlbymib failure");
 		assert_zu_eq(run_size, run_quantize_floor(run_size),
 		    "Small run quantization should be a no-op (run_size=%zu)",
 		    run_size);
@@ -47,11 +47,11 @@ TEST_BEGIN(test_large_run_size)
 	 */
 
 	sz = sizeof(bool);
-	assert_d_eq(mallctl("config.cache_oblivious", &cache_oblivious, &sz,
-	    NULL, 0), 0, "Unexpected mallctl failure");
+	assert_d_eq(mallctl("config.cache_oblivious", (void *)&cache_oblivious,
+	    &sz, NULL, 0), 0, "Unexpected mallctl failure");
 
 	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.nlruns", &nlruns, &sz, NULL, 0), 0,
+	assert_d_eq(mallctl("arenas.nlruns", (void *)&nlruns, &sz, NULL, 0), 0,
 	    "Unexpected mallctl failure");
 
 	assert_d_eq(mallctlnametomib("arenas.lrun.0.size", mib, &miblen), 0,
@@ -61,8 +61,8 @@ TEST_BEGIN(test_large_run_size)
 
 		mib[2] = i;
 		sz = sizeof(size_t);
-		assert_d_eq(mallctlbymib(mib, miblen, &lrun_size, &sz, NULL, 0),
-		    0, "Unexpected mallctlbymib failure");
+		assert_d_eq(mallctlbymib(mib, miblen, (void *)&lrun_size, &sz,
+		    NULL, 0), 0, "Unexpected mallctlbymib failure");
 		run_size = cache_oblivious ? lrun_size + PAGE : lrun_size;
 		floor = run_quantize_floor(run_size);
 		ceil = run_quantize_ceil(run_size);
@@ -102,11 +102,11 @@ TEST_BEGIN(test_monotonic)
 	 */
 
 	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.nbins", &nbins, &sz, NULL, 0), 0,
+	assert_d_eq(mallctl("arenas.nbins", (void *)&nbins, &sz, NULL, 0), 0,
 	    "Unexpected mallctl failure");
 
 	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.nlruns", &nlruns, &sz, NULL, 0), 0,
+	assert_d_eq(mallctl("arenas.nlruns", (void *)&nlruns, &sz, NULL, 0), 0,
 	    "Unexpected mallctl failure");
 
 	floor_prev = 0;
diff --git a/test/unit/size_classes.c b/test/unit/size_classes.c
old mode 100644
new mode 100755
index 4e1e0ce4..81cc6061
--- a/test/unit/size_classes.c
+++ b/test/unit/size_classes.c
@@ -8,8 +8,8 @@ get_max_size_class(void)
 	size_t sz, miblen, max_size_class;
 
 	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.nhchunks", &nhchunks, &sz, NULL, 0), 0,
-	    "Unexpected mallctl() error");
+	assert_d_eq(mallctl("arenas.nhchunks", (void *)&nhchunks, &sz, NULL, 0),
+	    0, "Unexpected mallctl() error");
 
 	miblen = sizeof(mib) / sizeof(size_t);
 	assert_d_eq(mallctlnametomib("arenas.hchunk.0.size", mib, &miblen), 0,
@@ -17,8 +17,8 @@ get_max_size_class(void)
 	mib[2] = nhchunks - 1;
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctlbymib(mib, miblen, &max_size_class, &sz, NULL, 0), 0,
-	    "Unexpected mallctlbymib() error");
+	assert_d_eq(mallctlbymib(mib, miblen, (void *)&max_size_class, &sz,
+	    NULL, 0), 0, "Unexpected mallctlbymib() error");
 
 	return (max_size_class);
 }
diff --git a/test/unit/stats.c b/test/unit/stats.c
old mode 100644
new mode 100755
index a9a3981f..315717df
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -7,18 +7,18 @@ TEST_BEGIN(test_stats_summary)
 	int expected = config_stats ? 0 : ENOENT;
 
 	sz = sizeof(cactive);
-	assert_d_eq(mallctl("stats.cactive", &cactive, &sz, NULL, 0), expected,
-	    "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.cactive", (void *)&cactive, &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.allocated", &allocated, &sz, NULL, 0),
+	assert_d_eq(mallctl("stats.allocated", (void *)&allocated, &sz, NULL,
+	    0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.active", (void *)&active, &sz, NULL, 0),
 	    expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.active", &active, &sz, NULL, 0), expected,
-	    "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.resident", &resident, &sz, NULL, 0),
+	assert_d_eq(mallctl("stats.resident", (void *)&resident, &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.mapped", (void *)&mapped, &sz, NULL, 0),
 	    expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.mapped", &mapped, &sz, NULL, 0), expected,
-	    "Unexpected mallctl() result");
 
 	if (config_stats) {
 		assert_zu_le(active, *cactive,
@@ -45,19 +45,19 @@ TEST_BEGIN(test_stats_huge)
 	p = mallocx(large_maxclass+1, 0);
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
 
-	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0,
-	    "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	    0, "Unexpected mallctl() failure");
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.arenas.0.huge.allocated", &allocated, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.huge.allocated", (void *)&allocated,
+	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.huge.nmalloc", &nmalloc, &sz, NULL,
-	    0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.huge.ndalloc", &ndalloc, &sz, NULL,
-	    0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.huge.nrequests", &nrequests, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.huge.nmalloc", (void *)&nmalloc,
+	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.huge.ndalloc", (void *)&ndalloc,
+	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.huge.nrequests", (void *)&nrequests,
+	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
 		assert_zu_gt(allocated, 0,
@@ -83,8 +83,8 @@ TEST_BEGIN(test_stats_arenas_summary)
 	uint64_t npurge, nmadvise, purged;
 
 	arena = 0;
-	assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)),
-	    0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("thread.arena", NULL, NULL, (void *)&arena,
+	    sizeof(arena)), 0, "Unexpected mallctl() failure");
 
 	little = mallocx(SMALL_MAXCLASS, 0);
 	assert_ptr_not_null(little, "Unexpected mallocx() failure");
@@ -100,19 +100,19 @@ TEST_BEGIN(test_stats_arenas_summary)
 	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
-	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0,
-	    "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	    0, "Unexpected mallctl() failure");
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.arenas.0.mapped", &mapped, &sz, NULL, 0),
-	    expected, "Unexepected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.mapped", (void *)&mapped, &sz, NULL,
+	    0), expected, "Unexepected mallctl() result");
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.npurge", &npurge, &sz, NULL, 0),
-	    expected, "Unexepected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.nmadvise", &nmadvise, &sz, NULL, 0),
-	    expected, "Unexepected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.purged", &purged, &sz, NULL, 0),
-	    expected, "Unexepected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.npurge", (void *)&npurge, &sz, NULL,
+	    0), expected, "Unexepected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.nmadvise", (void *)&nmadvise, &sz,
+	    NULL, 0), expected, "Unexepected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.purged", (void *)&purged, &sz, NULL,
+	    0), expected, "Unexepected mallctl() result");
 
 	if (config_stats) {
 		assert_u64_gt(npurge, 0,
@@ -150,8 +150,8 @@ TEST_BEGIN(test_stats_arenas_small)
 	no_lazy_lock(); /* Lazy locking would dodge tcache testing. */
 
 	arena = 0;
-	assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)),
-	    0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("thread.arena", NULL, NULL, (void *)&arena,
+	    sizeof(arena)), 0, "Unexpected mallctl() failure");
 
 	p = mallocx(SMALL_MAXCLASS, 0);
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
@@ -159,19 +159,21 @@ TEST_BEGIN(test_stats_arenas_small)
 	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
 	    config_tcache ? 0 : ENOENT, "Unexpected mallctl() result");
 
-	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0,
-	    "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	    0, "Unexpected mallctl() failure");
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.arenas.0.small.allocated", &allocated, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.small.allocated",
+	    (void *)&allocated, &sz, NULL, 0), expected,
+	    "Unexpected mallctl() result");
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.small.nmalloc", &nmalloc, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.small.ndalloc", &ndalloc, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.small.nrequests", &nrequests, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.small.nmalloc", (void *)&nmalloc,
+	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.small.ndalloc", (void *)&ndalloc,
+	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.small.nrequests",
+	    (void *)&nrequests, &sz, NULL, 0), expected,
+	    "Unexpected mallctl() result");
 
 	if (config_stats) {
 		assert_zu_gt(allocated, 0,
@@ -197,25 +199,27 @@ TEST_BEGIN(test_stats_arenas_large)
 	int expected = config_stats ? 0 : ENOENT;
 
 	arena = 0;
-	assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)),
-	    0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("thread.arena", NULL, NULL, (void *)&arena,
+	    sizeof(arena)), 0, "Unexpected mallctl() failure");
 
 	p = mallocx(large_maxclass, 0);
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
 
-	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0,
-	    "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	    0, "Unexpected mallctl() failure");
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.arenas.0.large.allocated", &allocated, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.large.allocated",
+	    (void *)&allocated, &sz, NULL, 0), expected,
+	    "Unexpected mallctl() result");
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.large.nmalloc", &nmalloc, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.large.ndalloc", &ndalloc, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.large.nrequests", &nrequests, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.large.nmalloc", (void *)&nmalloc,
+	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.large.ndalloc", (void *)&ndalloc,
+	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.large.nrequests",
+	    (void *)&nrequests, &sz, NULL, 0), expected,
+	    "Unexpected mallctl() result");
 
 	if (config_stats) {
 		assert_zu_gt(allocated, 0,
@@ -241,23 +245,23 @@ TEST_BEGIN(test_stats_arenas_huge)
 	int expected = config_stats ? 0 : ENOENT;
 
 	arena = 0;
-	assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)),
-	    0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("thread.arena", NULL, NULL, (void *)&arena,
+	    sizeof(arena)), 0, "Unexpected mallctl() failure");
 
 	p = mallocx(chunksize, 0);
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
 
-	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0,
-	    "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	    0, "Unexpected mallctl() failure");
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.arenas.0.huge.allocated", &allocated, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.huge.allocated", (void *)&allocated,
+	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.huge.nmalloc", &nmalloc, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.huge.ndalloc", &ndalloc, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.huge.nmalloc", (void *)&nmalloc,
+	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.huge.ndalloc", (void *)&ndalloc,
+	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
 		assert_zu_gt(allocated, 0,
@@ -282,8 +286,8 @@ TEST_BEGIN(test_stats_arenas_bins)
 	int expected = config_stats ? 0 : ENOENT;
 
 	arena = 0;
-	assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)),
-	    0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("thread.arena", NULL, NULL, (void *)&arena,
+	    sizeof(arena)), 0, "Unexpected mallctl() failure");
 
 	p = mallocx(arena_bin_info[0].reg_size, 0);
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
@@ -291,35 +295,36 @@ TEST_BEGIN(test_stats_arenas_bins)
 	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
 	    config_tcache ? 0 : ENOENT, "Unexpected mallctl() result");
 
-	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0,
-	    "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	    0, "Unexpected mallctl() failure");
 
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.bins.0.nmalloc", &nmalloc, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.bins.0.ndalloc", &ndalloc, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.bins.0.nrequests", &nrequests, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.bins.0.nmalloc", (void *)&nmalloc,
+	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.bins.0.ndalloc", (void *)&ndalloc,
+	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.bins.0.nrequests",
+	    (void *)&nrequests, &sz, NULL, 0), expected,
+	    "Unexpected mallctl() result");
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.arenas.0.bins.0.curregs", &curregs, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.bins.0.curregs", (void *)&curregs,
+	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
 
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.bins.0.nfills", &nfills, &sz,
-	    NULL, 0), config_tcache ? expected : ENOENT,
+	assert_d_eq(mallctl("stats.arenas.0.bins.0.nfills", (void *)&nfills,
+	    &sz, NULL, 0), config_tcache ? expected : ENOENT,
 	    "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.bins.0.nflushes", &nflushes, &sz,
-	    NULL, 0), config_tcache ? expected : ENOENT,
+	assert_d_eq(mallctl("stats.arenas.0.bins.0.nflushes", (void *)&nflushes,
+	    &sz, NULL, 0), config_tcache ? expected : ENOENT,
 	    "Unexpected mallctl() result");
 
-	assert_d_eq(mallctl("stats.arenas.0.bins.0.nruns", &nruns, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.bins.0.nreruns", &nreruns, &sz,
+	assert_d_eq(mallctl("stats.arenas.0.bins.0.nruns", (void *)&nruns, &sz,
 	    NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.bins.0.nreruns", (void *)&nreruns,
+	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.arenas.0.bins.0.curruns", &curruns, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.bins.0.curruns", (void *)&curruns,
+	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
 		assert_u64_gt(nmalloc, 0,
@@ -355,25 +360,26 @@ TEST_BEGIN(test_stats_arenas_lruns)
 	int expected = config_stats ? 0 : ENOENT;
 
 	arena = 0;
-	assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)),
-	    0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("thread.arena", NULL, NULL, (void *)&arena,
+	    sizeof(arena)), 0, "Unexpected mallctl() failure");
 
 	p = mallocx(LARGE_MINCLASS, 0);
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
 
-	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0,
-	    "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	    0, "Unexpected mallctl() failure");
 
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.lruns.0.nmalloc", &nmalloc, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.lruns.0.ndalloc", &ndalloc, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.lruns.0.nrequests", &nrequests, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.lruns.0.nmalloc", (void *)&nmalloc,
+	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.lruns.0.ndalloc", (void *)&ndalloc,
+	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.lruns.0.nrequests",
+	    (void *)&nrequests, &sz, NULL, 0), expected,
+	    "Unexpected mallctl() result");
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.arenas.0.lruns.0.curruns", &curruns, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.lruns.0.curruns", (void *)&curruns,
+	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
 		assert_u64_gt(nmalloc, 0,
@@ -399,23 +405,26 @@ TEST_BEGIN(test_stats_arenas_hchunks)
 	int expected = config_stats ? 0 : ENOENT;
 
 	arena = 0;
-	assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena, sizeof(arena)),
-	    0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("thread.arena", NULL, NULL, (void *)&arena,
+	    sizeof(arena)), 0, "Unexpected mallctl() failure");
 
 	p = mallocx(chunksize, 0);
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
 
-	assert_d_eq(mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch)), 0,
-	    "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	    0, "Unexpected mallctl() failure");
 
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.hchunks.0.nmalloc", &nmalloc, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.hchunks.0.ndalloc", &ndalloc, &sz,
-	    NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.hchunks.0.nmalloc",
+	    (void *)&nmalloc, &sz, NULL, 0), expected,
+	    "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.hchunks.0.ndalloc",
+	    (void *)&ndalloc, &sz, NULL, 0), expected,
+	    "Unexpected mallctl() result");
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.arenas.0.hchunks.0.curhchunks", &curhchunks,
-	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.hchunks.0.curhchunks",
+	    (void *)&curhchunks, &sz, NULL, 0), expected,
+	    "Unexpected mallctl() result");
 
 	if (config_stats) {
 		assert_u64_gt(nmalloc, 0,

From 8a4528bdd16eee1b1c00c4305ee246568d382b0a Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 15 Nov 2016 15:01:03 -0800
Subject: [PATCH 0527/2608] Uniformly cast mallctl[bymib]() oldp/newp arguments
 to (void *).

This avoids warnings in some cases, and is otherwise generally good
hygiene.
---
 doc/jemalloc.xml.in |  2 +-
 src/stats.c         |  7 ++++---
 test/unit/pack.c    | 12 ++++++------
 3 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 747cc071..250a2a83 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -406,7 +406,7 @@ for (i = 0; i < nbins; i++) {
 
 	mib[2] = i;
 	len = sizeof(bin_size);
-	mallctlbymib(mib, miblen, &bin_size, &len, NULL, 0);
+	mallctlbymib(mib, miblen, (void *)&bin_size, &len, NULL, 0);
 	/* Do something with bin_size... */
 }]]></programlisting></para>
 
diff --git a/src/stats.c b/src/stats.c
index 44f8c528..3072b2ab 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -513,7 +513,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 #define	OPT_WRITE_BOOL_MUTABLE(n, m, c) {				\
 	bool bv2;							\
 	if (je_mallctl("opt."#n, (void *)&bv, &bsz, NULL, 0) == 0 &&	\
-	    je_mallctl(#m, &bv2, &bsz, NULL, 0) == 0) {			\
+	    je_mallctl(#m, (void *)&bv2, &bsz, NULL, 0) == 0) {		\
 		if (json) {						\
 			malloc_cprintf(write_cb, cbopaque,		\
 			    "\t\t\t\""#n"\": %s%s\n", bv ? "true" :	\
@@ -548,7 +548,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 #define	OPT_WRITE_SSIZE_T_MUTABLE(n, m, c) {				\
 	ssize_t ssv2;							\
 	if (je_mallctl("opt."#n, (void *)&ssv, &sssz, NULL, 0) == 0 &&	\
-	    je_mallctl(#m, &ssv2, &sssz, NULL, 0) == 0) {		\
+	    je_mallctl(#m, (void *)&ssv2, &sssz, NULL, 0) == 0) {	\
 		if (json) {						\
 			malloc_cprintf(write_cb, cbopaque,		\
 			    "\t\t\t\""#n"\": %zd%s\n", ssv, (c));	\
@@ -886,7 +886,8 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	 * */
 	epoch = 1;
 	u64sz = sizeof(uint64_t);
-	err = je_mallctl("epoch", &epoch, &u64sz, &epoch, sizeof(uint64_t));
+	err = je_mallctl("epoch", (void *)&epoch, &u64sz, (void *)&epoch,
+	    sizeof(uint64_t));
 	if (err != 0) {
 		if (err == EAGAIN) {
 			malloc_write("<jemalloc>: Memory allocation failure in "
diff --git a/test/unit/pack.c b/test/unit/pack.c
index 8071183e..10df08e3 100644
--- a/test/unit/pack.c
+++ b/test/unit/pack.c
@@ -26,7 +26,7 @@ binind_compute(void)
 	unsigned nbins, i;
 
 	sz = sizeof(nbins);
-	assert_d_eq(mallctl("arenas.nbins", &nbins, &sz, NULL, 0), 0,
+	assert_d_eq(mallctl("arenas.nbins", (void *)&nbins, &sz, NULL, 0), 0,
 	    "Unexpected mallctl failure");
 
 	for (i = 0; i < nbins; i++) {
@@ -39,8 +39,8 @@ binind_compute(void)
 		mib[2] = (size_t)i;
 
 		sz = sizeof(size);
-		assert_d_eq(mallctlbymib(mib, miblen, &size, &sz, NULL, 0), 0,
-		    "Unexpected mallctlbymib failure");
+		assert_d_eq(mallctlbymib(mib, miblen, (void *)&size, &sz, NULL,
+		    0), 0, "Unexpected mallctlbymib failure");
 		if (size == SZ)
 			return (i);
 	}
@@ -62,7 +62,7 @@ nregs_per_run_compute(void)
 	    "Unexpected mallctlnametomb failure");
 	mib[2] = (size_t)binind;
 	sz = sizeof(nregs);
-	assert_d_eq(mallctlbymib(mib, miblen, &nregs, &sz, NULL,
+	assert_d_eq(mallctlbymib(mib, miblen, (void *)&nregs, &sz, NULL,
 	    0), 0, "Unexpected mallctlbymib failure");
 	return (nregs);
 }
@@ -74,8 +74,8 @@ arenas_extend_mallctl(void)
 	size_t sz;
 
 	sz = sizeof(arena_ind);
-	assert_d_eq(mallctl("arenas.extend", &arena_ind, &sz, NULL, 0), 0,
-	    "Error in arenas.extend");
+	assert_d_eq(mallctl("arenas.extend", (void *)&arena_ind, &sz, NULL, 0),
+	    0, "Error in arenas.extend");
 
 	return (arena_ind);
 }

From 6468dd52f3049c3a792ab8a9eb8ce53227523c0b Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 15 Nov 2016 21:08:28 -0800
Subject: [PATCH 0528/2608] Fix an MSVC compiler warning.

---
 src/tcache.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tcache.c b/src/tcache.c
index 344b5eeb..21540ff4 100755
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -519,7 +519,7 @@ tcache_boot(tsdn_t *tsdn)
 	 */
 	if (opt_lg_tcache_max < 0 || (ZU(1) << opt_lg_tcache_max) < SMALL_MAXCLASS)
 		tcache_maxclass = SMALL_MAXCLASS;
-	else if ((1U << opt_lg_tcache_max) > large_maxclass)
+	else if ((ZU(1) << opt_lg_tcache_max) > large_maxclass)
 		tcache_maxclass = large_maxclass;
 	else
 		tcache_maxclass = (ZU(1) << opt_lg_tcache_max);

From 4066b4ef57c3b14bb57696539455809ce4a714c3 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 16 Nov 2016 10:40:00 -0800
Subject: [PATCH 0529/2608] Revert "Add JE_RUNNABLE() and use it for
 os_unfair_lock_*() test."

This reverts commit a2e601a2236315fb6f994ff364ea442ed0aed07b.

JE_RUNNABLE() causes general cross-compilation issues.
---
 configure.ac | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/configure.ac b/configure.ac
index 24136736..053e5d7e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -43,17 +43,6 @@ AC_CACHE_CHECK([whether $1 is compilable],
                                [$4=no])])
 ])
 
-dnl JE_RUNNABLE(label, hcode, mcode, rvar)
-AC_DEFUN([JE_RUNNABLE],
-[
-AC_CACHE_CHECK([whether $1 is runnable],
-               [$4],
-               [AC_RUN_IFELSE([AC_LANG_PROGRAM([$2],
-                                               [$3])],
-                              [$4=yes],
-                              [$4=no])])
-])
-
 dnl ============================================================================
 
 CONFIG=`echo ${ac_configure_args} | sed -e 's#'"'"'\([^ ]*\)'"'"'#\1#g'`
@@ -1660,11 +1649,7 @@ fi
 dnl ============================================================================
 dnl Check for os_unfair_lock operations as provided on Darwin.
 
-dnl Run the test program rather than merely compiling so that dyld lazy symbol
-dnl binding doesn't result in a false positive if building with a version of
-dnl XCode (>7) that supports the API on a system that is too old to actually
-dnl implement it (<10.12).
-JE_RUNNABLE([Darwin os_unfair_lock_*()], [
+JE_COMPILABLE([Darwin os_unfair_lock_*()], [
 #include <os/lock.h>
 ], [
 	os_unfair_lock lock = OS_UNFAIR_LOCK_INIT;

From 2a24dc24761b9b5b8f9af30618d2f4fb17d59a59 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 16 Nov 2016 10:40:48 -0800
Subject: [PATCH 0530/2608] Revert "Add JE_RUNNABLE() and use it for
 os_unfair_lock_*() test."

This reverts commit 45f83a2ac6a9cd3b21675766127ee85910e54156.

JE_RUNNABLE() causes general cross-compilation issues.
---
 configure.ac | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/configure.ac b/configure.ac
index 2bd18ff7..4645d63b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -43,17 +43,6 @@ AC_CACHE_CHECK([whether $1 is compilable],
                                [$4=no])])
 ])
 
-dnl JE_RUNNABLE(label, hcode, mcode, rvar)
-AC_DEFUN([JE_RUNNABLE],
-[
-AC_CACHE_CHECK([whether $1 is runnable],
-               [$4],
-               [AC_RUN_IFELSE([AC_LANG_PROGRAM([$2],
-                                               [$3])],
-                              [$4=yes],
-                              [$4=no])])
-])
-
 dnl ============================================================================
 
 CONFIG=`echo ${ac_configure_args} | sed -e 's#'"'"'\([^ ]*\)'"'"'#\1#g'`
@@ -1690,11 +1679,7 @@ fi
 dnl ============================================================================
 dnl Check for os_unfair_lock operations as provided on Darwin.
 
-dnl Run the test program rather than merely compiling so that dyld lazy symbol
-dnl binding doesn't result in a false positive if building with a version of
-dnl XCode (>7) that supports the API on a system that is too old to actually
-dnl implement it (<10.12).
-JE_RUNNABLE([Darwin os_unfair_lock_*()], [
+JE_COMPILABLE([Darwin os_unfair_lock_*()], [
 #include <os/lock.h>
 ], [
 	os_unfair_lock lock = OS_UNFAIR_LOCK_INIT;

From 9b94c015af7fe7f86b19c71059be8758afd06fe3 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 16 Nov 2016 10:56:40 -0800
Subject: [PATCH 0531/2608] Document how to use --cache configure option.

This resolves #494.
---
 INSTALL | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/INSTALL b/INSTALL
index a31871b0..2e963546 100644
--- a/INSTALL
+++ b/INSTALL
@@ -324,6 +324,21 @@ LDFLAGS="?"
 PATH="?"
     'configure' uses this to find programs.
 
+In some cases it may be necessary to work around configuration results that do
+not match reality.  For example, OS X 10.12 in conjunction with XCode 8 adds
+the os_unfair_lock_*() API, but if XCode 8 is used to build jemalloc on older
+versions of OS X, the configure script will determine that os_unfair_lock_*()
+is compilable, yet run-time failures will result.  To work around this
+(ignoring that MACOSX_DEPLOYMENT_TARGET may be the correct fix), create a cache
+file (called e.g. darwin.cache) with the following contents to override the
+relevant configuration variable defined in configure.ac:
+
+    je_cv_os_unfair_lock=no
+
+Invoke configure as such:
+
+    ./configure --cache=darwin.cache
+
 === Advanced compilation =======================================================
 
 To build only parts of jemalloc, use the following targets:

From 8e3fb7f4172b6b98d68ae605d0cc69cde3937d94 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 16 Nov 2016 10:56:40 -0800
Subject: [PATCH 0532/2608] Document how to use --cache configure option.

This resolves #494.
---
 INSTALL | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/INSTALL b/INSTALL
index 68787165..fcfd576e 100644
--- a/INSTALL
+++ b/INSTALL
@@ -327,6 +327,21 @@ LDFLAGS="?"
 PATH="?"
     'configure' uses this to find programs.
 
+In some cases it may be necessary to work around configuration results that do
+not match reality.  For example, OS X 10.12 in conjunction with XCode 8 adds
+the os_unfair_lock_*() API, but if XCode 8 is used to build jemalloc on older
+versions of OS X, the configure script will determine that os_unfair_lock_*()
+is compilable, yet run-time failures will result.  To work around this
+(ignoring that MACOSX_DEPLOYMENT_TARGET may be the correct fix), create a cache
+file (called e.g. darwin.cache) with the following contents to override the
+relevant configuration variable defined in configure.ac:
+
+    je_cv_os_unfair_lock=no
+
+Invoke configure as such:
+
+    ./configure --cache=darwin.cache
+
 === Advanced compilation =======================================================
 
 To build only parts of jemalloc, use the following targets:

From 95974c04403bbfb2fe3031e86785d354ef5e5906 Mon Sep 17 00:00:00 2001
From: Maks Naumov <maksqwe1@ukr.net>
Date: Wed, 16 Nov 2016 09:56:29 +0200
Subject: [PATCH 0533/2608] Remove size_t -> unsigned -> size_t conversion.

---
 src/arena.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 75a92edc..488bfd47 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -131,8 +131,7 @@ arena_slab_reg_alloc(tsdn_t *tsdn, extent_t *slab,
 	assert(slab_data->nfree > 0);
 	assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info));
 
-	regind = (unsigned)bitmap_sfu(slab_data->bitmap,
-	    &bin_info->bitmap_info);
+	regind = bitmap_sfu(slab_data->bitmap, &bin_info->bitmap_info);
 	ret = (void *)((uintptr_t)extent_addr_get(slab) +
 	    (uintptr_t)(bin_info->reg_size * regind));
 	slab_data->nfree--;

From aec5a051e8848f06aa041c9ebfc092a2626356fc Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 16 Nov 2016 18:28:38 -0800
Subject: [PATCH 0534/2608] Avoid gcc type-limits warnings.

---
 src/jemalloc.c | 42 ++++++++++++++++++++++++++++++------------
 1 file changed, 30 insertions(+), 12 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index dc0add49..2c405b72 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -976,7 +976,11 @@ malloc_conf_init(void)
 				if (cont)				\
 					continue;			\
 			}
-#define	CONF_HANDLE_T_U(t, o, n, min, max, clip)			\
+#define	CONF_MIN_no(um, min)	false
+#define	CONF_MIN_yes(um, min)	((um) < (min))
+#define	CONF_MAX_no(um, max)	false
+#define	CONF_MAX_yes(um, max)	((um) > (max))
+#define	CONF_HANDLE_T_U(t, o, n, min, max, check_min, check_max, clip)	\
 			if (CONF_MATCH(n)) {				\
 				uintmax_t um;				\
 				char *end;				\
@@ -989,15 +993,19 @@ malloc_conf_init(void)
 					    "Invalid conf value",	\
 					    k, klen, v, vlen);		\
 				} else if (clip) {			\
-					if ((min) != 0 && um < (min))	\
+					if (CONF_MIN_##check_min(um,	\
+					    (min)))			\
 						o = (t)(min);		\
-					else if (um > (max))		\
+					else if (CONF_MAX_##check_max(	\
+					    um, (max)))			\
 						o = (t)(max);		\
 					else				\
 						o = (t)um;		\
 				} else {				\
-					if (((min) != 0 && um < (min))	\
-					    || um > (max)) {		\
+					if (CONF_MIN_##check_min(um,	\
+					    (min)) ||			\
+					    CONF_MAX_##check_max(um,	\
+					    (max))) {			\
 						malloc_conf_error(	\
 						    "Out-of-range "	\
 						    "conf value",	\
@@ -1007,10 +1015,13 @@ malloc_conf_init(void)
 				}					\
 				continue;				\
 			}
-#define	CONF_HANDLE_UNSIGNED(o, n, min, max, clip)			\
-			CONF_HANDLE_T_U(unsigned, o, n, min, max, clip)
-#define	CONF_HANDLE_SIZE_T(o, n, min, max, clip)			\
-			CONF_HANDLE_T_U(size_t, o, n, min, max, clip)
+#define	CONF_HANDLE_UNSIGNED(o, n, min, max, check_min, check_max,	\
+    clip)								\
+			CONF_HANDLE_T_U(unsigned, o, n, min, max,	\
+			    check_min, check_max, clip)
+#define	CONF_HANDLE_SIZE_T(o, n, min, max, check_min, check_max, clip)	\
+			CONF_HANDLE_T_U(size_t, o, n, min, max,		\
+			    check_min, check_max, clip)
 #define	CONF_HANDLE_SSIZE_T(o, n, min, max)				\
 			if (CONF_MATCH(n)) {				\
 				long l;					\
@@ -1068,7 +1079,7 @@ malloc_conf_init(void)
 				continue;
 			}
 			CONF_HANDLE_UNSIGNED(opt_narenas, "narenas", 1,
-			    UINT_MAX, false)
+			    UINT_MAX, yes, no, false)
 			CONF_HANDLE_SSIZE_T(opt_decay_time, "decay_time", -1,
 			    NSTIME_SEC_MAX);
 			CONF_HANDLE_BOOL(opt_stats_print, "stats_print", true)
@@ -1120,8 +1131,8 @@ malloc_conf_init(void)
 				CONF_HANDLE_BOOL(opt_prof_thread_active_init,
 				    "prof_thread_active_init", true)
 				CONF_HANDLE_SIZE_T(opt_lg_prof_sample,
-				    "lg_prof_sample", 0,
-				    (sizeof(uint64_t) << 3) - 1, true)
+				    "lg_prof_sample", 0, (sizeof(uint64_t) << 3)
+				    - 1, no, yes, true)
 				CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum",
 				    true)
 				CONF_HANDLE_SSIZE_T(opt_lg_prof_interval,
@@ -1137,7 +1148,14 @@ malloc_conf_init(void)
 			malloc_conf_error("Invalid conf pair", k, klen, v,
 			    vlen);
 #undef CONF_MATCH
+#undef CONF_MATCH_VALUE
 #undef CONF_HANDLE_BOOL
+#undef CONF_MIN_no
+#undef CONF_MIN_yes
+#undef CONF_MAX_no
+#undef CONF_MAX_yes
+#undef CONF_HANDLE_T_U
+#undef CONF_HANDLE_UNSIGNED
 #undef CONF_HANDLE_SIZE_T
 #undef CONF_HANDLE_SSIZE_T
 #undef CONF_HANDLE_CHAR_P

From 3ea838d2a20f036ea0d1424262b788fc23686746 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 16 Nov 2016 18:28:38 -0800
Subject: [PATCH 0535/2608] Avoid gcc type-limits warnings.

---
 src/jemalloc.c | 46 ++++++++++++++++++++++++++++++++--------------
 1 file changed, 32 insertions(+), 14 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 38650ff0..bbd6712d 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1056,7 +1056,11 @@ malloc_conf_init(void)
 				if (cont)				\
 					continue;			\
 			}
-#define	CONF_HANDLE_T_U(t, o, n, min, max, clip)			\
+#define	CONF_MIN_no(um, min)	false
+#define	CONF_MIN_yes(um, min)	((um) < (min))
+#define	CONF_MAX_no(um, max)	false
+#define	CONF_MAX_yes(um, max)	((um) > (max))
+#define	CONF_HANDLE_T_U(t, o, n, min, max, check_min, check_max, clip)	\
 			if (CONF_MATCH(n)) {				\
 				uintmax_t um;				\
 				char *end;				\
@@ -1069,15 +1073,19 @@ malloc_conf_init(void)
 					    "Invalid conf value",	\
 					    k, klen, v, vlen);		\
 				} else if (clip) {			\
-					if ((min) != 0 && um < (min))	\
+					if (CONF_MIN_##check_min(um,	\
+					    (min)))			\
 						o = (t)(min);		\
-					else if (um > (max))		\
+					else if (CONF_MAX_##check_max(	\
+					    um, (max)))			\
 						o = (t)(max);		\
 					else				\
 						o = (t)um;		\
 				} else {				\
-					if (((min) != 0 && um < (min))	\
-					    || um > (max)) {		\
+					if (CONF_MIN_##check_min(um,	\
+					    (min)) ||			\
+					    CONF_MAX_##check_max(um,	\
+					    (max))) {			\
 						malloc_conf_error(	\
 						    "Out-of-range "	\
 						    "conf value",	\
@@ -1087,10 +1095,13 @@ malloc_conf_init(void)
 				}					\
 				continue;				\
 			}
-#define	CONF_HANDLE_UNSIGNED(o, n, min, max, clip)			\
-			CONF_HANDLE_T_U(unsigned, o, n, min, max, clip)
-#define	CONF_HANDLE_SIZE_T(o, n, min, max, clip)			\
-			CONF_HANDLE_T_U(size_t, o, n, min, max, clip)
+#define	CONF_HANDLE_UNSIGNED(o, n, min, max, check_min, check_max,	\
+    clip)								\
+			CONF_HANDLE_T_U(unsigned, o, n, min, max,	\
+			    check_min, check_max, clip)
+#define	CONF_HANDLE_SIZE_T(o, n, min, max, check_min, check_max, clip)	\
+			CONF_HANDLE_T_U(size_t, o, n, min, max,		\
+			    check_min, check_max, clip)
 #define	CONF_HANDLE_SSIZE_T(o, n, min, max)				\
 			if (CONF_MATCH(n)) {				\
 				long l;					\
@@ -1133,7 +1144,7 @@ malloc_conf_init(void)
 			 */
 			CONF_HANDLE_SIZE_T(opt_lg_chunk, "lg_chunk", LG_PAGE +
 			    LG_SIZE_CLASS_GROUP + (config_fill ? 2 : 1),
-			    (sizeof(size_t) << 3) - 1, true)
+			    (sizeof(size_t) << 3) - 1, yes, yes, true)
 			if (strncmp("dss", k, klen) == 0) {
 				int i;
 				bool match = false;
@@ -1159,7 +1170,7 @@ malloc_conf_init(void)
 				continue;
 			}
 			CONF_HANDLE_UNSIGNED(opt_narenas, "narenas", 1,
-			    UINT_MAX, false)
+			    UINT_MAX, yes, no, false)
 			if (strncmp("purge", k, klen) == 0) {
 				int i;
 				bool match = false;
@@ -1230,7 +1241,7 @@ malloc_conf_init(void)
 					continue;
 				}
 				CONF_HANDLE_SIZE_T(opt_quarantine, "quarantine",
-				    0, SIZE_T_MAX, false)
+				    0, SIZE_T_MAX, no, no, false)
 				CONF_HANDLE_BOOL(opt_redzone, "redzone", true)
 				CONF_HANDLE_BOOL(opt_zero, "zero", true)
 			}
@@ -1267,8 +1278,8 @@ malloc_conf_init(void)
 				CONF_HANDLE_BOOL(opt_prof_thread_active_init,
 				    "prof_thread_active_init", true)
 				CONF_HANDLE_SIZE_T(opt_lg_prof_sample,
-				    "lg_prof_sample", 0,
-				    (sizeof(uint64_t) << 3) - 1, true)
+				    "lg_prof_sample", 0, (sizeof(uint64_t) << 3)
+				    - 1, no, yes, true)
 				CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum",
 				    true)
 				CONF_HANDLE_SSIZE_T(opt_lg_prof_interval,
@@ -1284,7 +1295,14 @@ malloc_conf_init(void)
 			malloc_conf_error("Invalid conf pair", k, klen, v,
 			    vlen);
 #undef CONF_MATCH
+#undef CONF_MATCH_VALUE
 #undef CONF_HANDLE_BOOL
+#undef CONF_MIN_no
+#undef CONF_MIN_yes
+#undef CONF_MAX_no
+#undef CONF_MAX_yes
+#undef CONF_HANDLE_T_U
+#undef CONF_HANDLE_UNSIGNED
 #undef CONF_HANDLE_SIZE_T
 #undef CONF_HANDLE_SSIZE_T
 #undef CONF_HANDLE_CHAR_P

From 0d6a472db9cb4bcd0406b1770822ee5cc895a7d2 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 16 Nov 2016 18:53:59 -0800
Subject: [PATCH 0536/2608] Avoid gcc tautological-compare warnings.

---
 include/jemalloc/internal/valgrind.h | 26 ++++++++++++++++++++------
 src/jemalloc.c                       | 12 ++++++------
 2 files changed, 26 insertions(+), 12 deletions(-)

diff --git a/include/jemalloc/internal/valgrind.h b/include/jemalloc/internal/valgrind.h
index 1a868082..877a142b 100644
--- a/include/jemalloc/internal/valgrind.h
+++ b/include/jemalloc/internal/valgrind.h
@@ -36,13 +36,25 @@
 		    zero);						\
 	}								\
 } while (0)
-#define	JEMALLOC_VALGRIND_REALLOC(maybe_moved, tsdn, ptr, usize,	\
-    ptr_maybe_null, old_ptr, old_usize, old_rzsize, old_ptr_maybe_null,	\
-    zero) do {								\
+#define	JEMALLOC_VALGRIND_REALLOC_MOVED_no(ptr, old_ptr)		\
+    (false)
+#define	JEMALLOC_VALGRIND_REALLOC_MOVED_maybe(ptr, old_ptr)		\
+    ((ptr) != (old_ptr))
+#define	JEMALLOC_VALGRIND_REALLOC_PTR_NULL_no(ptr)			\
+    (false)
+#define	JEMALLOC_VALGRIND_REALLOC_PTR_NULL_maybe(ptr)			\
+    (ptr == NULL)
+#define	JEMALLOC_VALGRIND_REALLOC_OLD_PTR_NULL_no(old_ptr)		\
+    (false)
+#define	JEMALLOC_VALGRIND_REALLOC_OLD_PTR_NULL_maybe(old_ptr)		\
+    (old_ptr == NULL)
+#define	JEMALLOC_VALGRIND_REALLOC(moved, tsdn, ptr, usize, ptr_null,	\
+    old_ptr, old_usize, old_rzsize, old_ptr_null, zero) do {		\
 	if (unlikely(in_valgrind)) {					\
 		size_t rzsize = p2rz(tsdn, ptr);			\
 									\
-		if (!maybe_moved || ptr == old_ptr) {			\
+		if (!JEMALLOC_VALGRIND_REALLOC_MOVED_##moved(ptr,	\
+		    old_ptr)) {						\
 			VALGRIND_RESIZEINPLACE_BLOCK(ptr, old_usize,	\
 			    usize, rzsize);				\
 			if (zero && old_usize < usize) {		\
@@ -51,11 +63,13 @@
 				    old_usize), usize - old_usize);	\
 			}						\
 		} else {						\
-			if (!old_ptr_maybe_null || old_ptr != NULL) {	\
+			if (!JEMALLOC_VALGRIND_REALLOC_OLD_PTR_NULL_##	\
+			    old_ptr_null(old_ptr)) {			\
 				valgrind_freelike_block(old_ptr,	\
 				    old_rzsize);			\
 			}						\
-			if (!ptr_maybe_null || ptr != NULL) {		\
+			if (!JEMALLOC_VALGRIND_REALLOC_PTR_NULL_##	\
+			    ptr_null(ptr)) {				\
 				size_t copy_size = (old_usize < usize)	\
 				    ?  old_usize : usize;		\
 				size_t tail_size = usize - copy_size;	\
diff --git a/src/jemalloc.c b/src/jemalloc.c
index bbd6712d..14c1c4d8 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1991,8 +1991,8 @@ je_realloc(void *ptr, size_t size)
 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
 	}
 	UTRACE(ptr, size, ret);
-	JEMALLOC_VALGRIND_REALLOC(true, tsdn, ret, usize, true, ptr, old_usize,
-	    old_rzsize, true, false);
+	JEMALLOC_VALGRIND_REALLOC(maybe, tsdn, ret, usize, maybe, ptr,
+	    old_usize, old_rzsize, maybe, false);
 	witness_assert_lockless(tsdn);
 	return (ret);
 }
@@ -2418,8 +2418,8 @@ je_rallocx(void *ptr, size_t size, int flags)
 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
 	}
 	UTRACE(ptr, size, p);
-	JEMALLOC_VALGRIND_REALLOC(true, tsd_tsdn(tsd), p, usize, false, ptr,
-	    old_usize, old_rzsize, false, zero);
+	JEMALLOC_VALGRIND_REALLOC(maybe, tsd_tsdn(tsd), p, usize, no, ptr,
+	    old_usize, old_rzsize, no, zero);
 	witness_assert_lockless(tsd_tsdn(tsd));
 	return (p);
 label_oom:
@@ -2561,8 +2561,8 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
 		*tsd_thread_allocatedp_get(tsd) += usize;
 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
 	}
-	JEMALLOC_VALGRIND_REALLOC(false, tsd_tsdn(tsd), ptr, usize, false, ptr,
-	    old_usize, old_rzsize, false, zero);
+	JEMALLOC_VALGRIND_REALLOC(no, tsd_tsdn(tsd), ptr, usize, no, ptr,
+	    old_usize, old_rzsize, no, zero);
 label_not_resized:
 	UTRACE(ptr, size, ptr);
 	witness_assert_lockless(tsd_tsdn(tsd));

From f7ca1c9bc321feffe056f5d154c287f63e324020 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 16 Nov 2016 19:41:09 -0800
Subject: [PATCH 0537/2608] Remove a residual comment.

---
 configure.ac | 1 -
 1 file changed, 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 053e5d7e..72021e09 100644
--- a/configure.ac
+++ b/configure.ac
@@ -171,7 +171,6 @@ fi
 if test "x$CFLAGS" = "x" ; then
   no_CFLAGS="yes"
   if test "x$GCC" = "xyes" ; then
-dnl    JE_CFLAGS_APPEND([-std=gnu99])
     JE_CFLAGS_APPEND([-std=gnu11])
     if test "x$je_cv_cflags_appended" = "x-std=gnu11" ; then
       AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT])

From e7ca53bac2dbd281779b431b6ab29e9c39984256 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 16 Nov 2016 19:41:09 -0800
Subject: [PATCH 0538/2608] Remove a residual comment.

---
 configure.ac | 1 -
 1 file changed, 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 4645d63b..fbdc9fc9 100644
--- a/configure.ac
+++ b/configure.ac
@@ -171,7 +171,6 @@ fi
 if test "x$CFLAGS" = "x" ; then
   no_CFLAGS="yes"
   if test "x$GCC" = "xyes" ; then
-dnl    JE_CFLAGS_APPEND([-std=gnu99])
     JE_CFLAGS_APPEND([-std=gnu11])
     if test "x$je_cv_cflags_appended" = "x-std=gnu11" ; then
       AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT])

From a64123ce13545ef57a36285cc9d33978e59fc3da Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 17 Nov 2016 10:24:51 -0800
Subject: [PATCH 0539/2608] Refactor madvise(2) configuration.

Add feature tests for the MADV_FREE and MADV_DONTNEED flags to
madvise(2), so that MADV_FREE is detected and used for Linux kernel
versions 4.5 and newer.  Refactor pages_purge() so that on systems which
support both flags, MADV_FREE is preferred over MADV_DONTNEED.

This resolves #387.
---
 configure.ac                                  | 38 ++++++++++++-------
 .../internal/jemalloc_internal_defs.h.in      | 21 +++++-----
 src/pages.c                                   | 10 ++---
 3 files changed, 39 insertions(+), 30 deletions(-)

diff --git a/configure.ac b/configure.ac
index 72021e09..15897bee 100644
--- a/configure.ac
+++ b/configure.ac
@@ -354,7 +354,6 @@ maps_coalesce="1"
 case "${host}" in
   *-*-darwin* | *-*-ios*)
 	abi="macho"
-	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	RPATH=""
 	LD_PRELOAD_VAR="DYLD_INSERT_LIBRARIES"
 	so="dylib"
@@ -367,21 +366,17 @@ case "${host}" in
   *-*-freebsd*)
 	abi="elf"
 	AC_DEFINE([JEMALLOC_SYSCTL_VM_OVERCOMMIT], [ ])
-	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	force_lazy_lock="1"
 	;;
   *-*-dragonfly*)
 	abi="elf"
-	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	;;
   *-*-openbsd*)
 	abi="elf"
-	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	force_tls="0"
 	;;
   *-*-bitrig*)
 	abi="elf"
-	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	;;
   *-*-linux-android)
 	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
@@ -389,7 +384,6 @@ case "${host}" in
 	abi="elf"
 	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
 	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ])
-	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ])
 	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
 	AC_DEFINE([JEMALLOC_C11ATOMICS])
 	force_tls="0"
@@ -401,7 +395,6 @@ case "${host}" in
 	abi="elf"
 	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
 	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ])
-	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ])
 	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
 	AC_DEFINE([JEMALLOC_USE_CXX_THROW], [ ])
 	default_munmap="0"
@@ -418,11 +411,9 @@ case "${host}" in
                           [abi="elf"],
                           [abi="aout"])
 	AC_MSG_RESULT([$abi])
-	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	;;
   *-*-solaris2*)
 	abi="elf"
-	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	RPATH='-Wl,-R,$(1)'
 	dnl Solaris needs this for sigwait().
 	CPPFLAGS="$CPPFLAGS -D_POSIX_PTHREAD_SEMANTICS"
@@ -1580,12 +1571,33 @@ dnl Check for madvise(2).
 JE_COMPILABLE([madvise(2)], [
 #include <sys/mman.h>
 ], [
-	{
-		madvise((void *)0, 0, 0);
-	}
+	madvise((void *)0, 0, 0);
 ], [je_cv_madvise])
 if test "x${je_cv_madvise}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_MADVISE], [ ])
+  dnl Check for madvise(..., MADV_FREE).
+  JE_COMPILABLE([madvise(..., MADV_FREE)], [
+#include <sys/mman.h>
+], [
+	madvise((void *)0, 0, MADV_FREE);
+], [je_cv_madv_free])
+  if test "x${je_cv_madv_free}" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
+  fi
+
+  dnl Check for madvise(..., MADV_DONTNEED).
+  JE_COMPILABLE([madvise(..., MADV_DONTNEED)], [
+#include <sys/mman.h>
+], [
+	madvise((void *)0, 0, MADV_DONTNEED);
+], [je_cv_madv_dontneed])
+  if test "x${je_cv_madv_dontneed}" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ])
+  fi
+
+  if test "x${je_cv_madv_free}" = "xyes" \
+       -o "x${je_cv_madv_dontneed}" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_HAVE_MADVISE], [ ])
+  fi
 fi
 
 dnl ============================================================================
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 4d2daea8..baf12d43 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -55,11 +55,6 @@
  */
 #undef JEMALLOC_HAVE_BUILTIN_CLZ
 
-/*
- * Defined if madvise(2) is available.
- */
-#undef JEMALLOC_HAVE_MADVISE
-
 /*
  * Defined if os_unfair_lock_*() functions are available, as provided by Darwin.
  */
@@ -249,18 +244,20 @@
 #undef JEMALLOC_SYSCTL_VM_OVERCOMMIT
 #undef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY
 
+/* Defined if madvise(2) is available. */
+#undef JEMALLOC_HAVE_MADVISE
+
 /*
  * Methods for purging unused pages differ between operating systems.
  *
- *   madvise(..., MADV_DONTNEED) : On Linux, this immediately discards pages,
- *                                 such that new pages will be demand-zeroed if
- *                                 the address region is later touched.
- *   madvise(..., MADV_FREE) : On FreeBSD and Darwin, this marks pages as being
- *                             unused, such that they will be discarded rather
- *                             than swapped out.
+ *   madvise(..., MADV_FREE) : This marks pages as being unused, such that they
+ *                             will be discarded rather than swapped out.
+ *   madvise(..., MADV_DONTNEED) : This immediately discards pages, such that
+ *                                 new pages will be demand-zeroed if the
+ *                                 address region is later touched.
  */
-#undef JEMALLOC_PURGE_MADVISE_DONTNEED
 #undef JEMALLOC_PURGE_MADVISE_FREE
+#undef JEMALLOC_PURGE_MADVISE_DONTNEED
 
 /* Define if operating system has alloca.h header. */
 #undef JEMALLOC_HAS_ALLOCA_H
diff --git a/src/pages.c b/src/pages.c
index 647952ac..395ace99 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -171,14 +171,14 @@ pages_purge(void *addr, size_t size)
 	VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE);
 	unzeroed = true;
 #elif defined(JEMALLOC_HAVE_MADVISE)
-#  ifdef JEMALLOC_PURGE_MADVISE_DONTNEED
-#    define JEMALLOC_MADV_PURGE MADV_DONTNEED
-#    define JEMALLOC_MADV_ZEROS true
-#  elif defined(JEMALLOC_PURGE_MADVISE_FREE)
+#  if defined(JEMALLOC_PURGE_MADVISE_FREE)
 #    define JEMALLOC_MADV_PURGE MADV_FREE
 #    define JEMALLOC_MADV_ZEROS false
+#  elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED)
+#    define JEMALLOC_MADV_PURGE MADV_DONTNEED
+#    define JEMALLOC_MADV_ZEROS true
 #  else
-#    error "No madvise(2) flag defined for purging unused dirty pages."
+#    error No madvise(2) flag defined for purging unused dirty pages
 #  endif
 	int err = madvise(addr, size, JEMALLOC_MADV_PURGE);
 	unzeroed = (!JEMALLOC_MADV_ZEROS || err != 0);

From 62f2d84e7aebaa25df32a0ae882cc856e5218687 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 17 Nov 2016 10:24:51 -0800
Subject: [PATCH 0540/2608] Refactor madvise(2) configuration.

Add feature tests for the MADV_FREE and MADV_DONTNEED flags to
madvise(2), so that MADV_FREE is detected and used for Linux kernel
versions 4.5 and newer.  Refactor pages_purge() so that on systems which
support both flags, MADV_FREE is preferred over MADV_DONTNEED.

This resolves #387.
---
 configure.ac                                  | 38 ++++++++++++-------
 .../internal/jemalloc_internal_defs.h.in      | 21 +++++-----
 src/pages.c                                   | 10 ++---
 3 files changed, 39 insertions(+), 30 deletions(-)

diff --git a/configure.ac b/configure.ac
index fbdc9fc9..68602e19 100644
--- a/configure.ac
+++ b/configure.ac
@@ -354,7 +354,6 @@ maps_coalesce="1"
 case "${host}" in
   *-*-darwin* | *-*-ios*)
 	abi="macho"
-	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	RPATH=""
 	LD_PRELOAD_VAR="DYLD_INSERT_LIBRARIES"
 	so="dylib"
@@ -367,21 +366,17 @@ case "${host}" in
   *-*-freebsd*)
 	abi="elf"
 	AC_DEFINE([JEMALLOC_SYSCTL_VM_OVERCOMMIT], [ ])
-	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	force_lazy_lock="1"
 	;;
   *-*-dragonfly*)
 	abi="elf"
-	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	;;
   *-*-openbsd*)
 	abi="elf"
-	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	force_tls="0"
 	;;
   *-*-bitrig*)
 	abi="elf"
-	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	;;
   *-*-linux-android)
 	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
@@ -389,7 +384,6 @@ case "${host}" in
 	abi="elf"
 	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
 	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ])
-	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ])
 	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
 	AC_DEFINE([JEMALLOC_C11ATOMICS])
 	force_tls="0"
@@ -401,7 +395,6 @@ case "${host}" in
 	abi="elf"
 	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
 	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ])
-	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ])
 	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
 	AC_DEFINE([JEMALLOC_USE_CXX_THROW], [ ])
 	default_munmap="0"
@@ -418,11 +411,9 @@ case "${host}" in
                           [abi="elf"],
                           [abi="aout"])
 	AC_MSG_RESULT([$abi])
-	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	;;
   *-*-solaris2*)
 	abi="elf"
-	AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
 	RPATH='-Wl,-R,$(1)'
 	dnl Solaris needs this for sigwait().
 	CPPFLAGS="$CPPFLAGS -D_POSIX_PTHREAD_SEMANTICS"
@@ -1610,12 +1601,33 @@ dnl Check for madvise(2).
 JE_COMPILABLE([madvise(2)], [
 #include <sys/mman.h>
 ], [
-	{
-		madvise((void *)0, 0, 0);
-	}
+	madvise((void *)0, 0, 0);
 ], [je_cv_madvise])
 if test "x${je_cv_madvise}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_MADVISE], [ ])
+  dnl Check for madvise(..., MADV_FREE).
+  JE_COMPILABLE([madvise(..., MADV_FREE)], [
+#include <sys/mman.h>
+], [
+	madvise((void *)0, 0, MADV_FREE);
+], [je_cv_madv_free])
+  if test "x${je_cv_madv_free}" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
+  fi
+
+  dnl Check for madvise(..., MADV_DONTNEED).
+  JE_COMPILABLE([madvise(..., MADV_DONTNEED)], [
+#include <sys/mman.h>
+], [
+	madvise((void *)0, 0, MADV_DONTNEED);
+], [je_cv_madv_dontneed])
+  if test "x${je_cv_madv_dontneed}" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ])
+  fi
+
+  if test "x${je_cv_madv_free}" = "xyes" \
+       -o "x${je_cv_madv_dontneed}" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_HAVE_MADVISE], [ ])
+  fi
 fi
 
 dnl ============================================================================
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 9b3dca50..5419513b 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -55,11 +55,6 @@
  */
 #undef JEMALLOC_HAVE_BUILTIN_CLZ
 
-/*
- * Defined if madvise(2) is available.
- */
-#undef JEMALLOC_HAVE_MADVISE
-
 /*
  * Defined if os_unfair_lock_*() functions are available, as provided by Darwin.
  */
@@ -252,18 +247,20 @@
 #undef JEMALLOC_SYSCTL_VM_OVERCOMMIT
 #undef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY
 
+/* Defined if madvise(2) is available. */
+#undef JEMALLOC_HAVE_MADVISE
+
 /*
  * Methods for purging unused pages differ between operating systems.
  *
- *   madvise(..., MADV_DONTNEED) : On Linux, this immediately discards pages,
- *                                 such that new pages will be demand-zeroed if
- *                                 the address region is later touched.
- *   madvise(..., MADV_FREE) : On FreeBSD and Darwin, this marks pages as being
- *                             unused, such that they will be discarded rather
- *                             than swapped out.
+ *   madvise(..., MADV_FREE) : This marks pages as being unused, such that they
+ *                             will be discarded rather than swapped out.
+ *   madvise(..., MADV_DONTNEED) : This immediately discards pages, such that
+ *                                 new pages will be demand-zeroed if the
+ *                                 address region is later touched.
  */
-#undef JEMALLOC_PURGE_MADVISE_DONTNEED
 #undef JEMALLOC_PURGE_MADVISE_FREE
+#undef JEMALLOC_PURGE_MADVISE_DONTNEED
 
 /* Define if operating system has alloca.h header. */
 #undef JEMALLOC_HAS_ALLOCA_H
diff --git a/src/pages.c b/src/pages.c
index 647952ac..395ace99 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -171,14 +171,14 @@ pages_purge(void *addr, size_t size)
 	VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE);
 	unzeroed = true;
 #elif defined(JEMALLOC_HAVE_MADVISE)
-#  ifdef JEMALLOC_PURGE_MADVISE_DONTNEED
-#    define JEMALLOC_MADV_PURGE MADV_DONTNEED
-#    define JEMALLOC_MADV_ZEROS true
-#  elif defined(JEMALLOC_PURGE_MADVISE_FREE)
+#  if defined(JEMALLOC_PURGE_MADVISE_FREE)
 #    define JEMALLOC_MADV_PURGE MADV_FREE
 #    define JEMALLOC_MADV_ZEROS false
+#  elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED)
+#    define JEMALLOC_MADV_PURGE MADV_DONTNEED
+#    define JEMALLOC_MADV_ZEROS true
 #  else
-#    error "No madvise(2) flag defined for purging unused dirty pages."
+#    error No madvise(2) flag defined for purging unused dirty pages
 #  endif
 	int err = madvise(addr, size, JEMALLOC_MADV_PURGE);
 	unzeroed = (!JEMALLOC_MADV_ZEROS || err != 0);

From fda60be799e7929191f3844dd55b685549b6d867 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 17 Nov 2016 11:50:52 -0800
Subject: [PATCH 0541/2608] Update a comment.

---
 include/jemalloc/internal/arena.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 28d63c68..ad400839 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -165,7 +165,7 @@ struct arena_s {
 	 * perspective:
 	 * 1) Thread assignment (modifies nthreads) is synchronized via atomics.
 	 * 2) Bin-related operations are protected by bin locks.
-	 * 3) Chunk-related operations are protected by this mutex.
+	 * 3) Extent-related operations are protected by this mutex.
 	 */
 	malloc_mutex_t		lock;
 

From 5234be21333e341252ed7223570d790970694d80 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 17 Nov 2016 15:14:57 -0800
Subject: [PATCH 0542/2608] Add pthread_atfork(3) feature test.

Some versions of Android provide a pthreads library without providing
pthread_atfork(), so in practice a separate feature test is necessary
for the latter.
---
 configure.ac                                          | 8 ++++++++
 include/jemalloc/internal/jemalloc_internal_defs.h.in | 3 +++
 src/jemalloc.c                                        | 5 +++--
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index 15897bee..25dcc2c2 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1299,6 +1299,14 @@ if test "x$abi" != "xpecoff" ; then
   AC_CHECK_LIB([pthread], [pthread_create], [LIBS="$LIBS -lpthread"],
                [AC_SEARCH_LIBS([pthread_create], , ,
                                AC_MSG_ERROR([libpthread is missing]))])
+  JE_COMPILABLE([pthread_atfork(3)], [
+#include <pthread.h>
+], [
+  pthread_atfork((void *)0, (void *)0, (void *)0);
+], [je_cv_pthread_atfork])
+  if test "x${je_cv_pthread_atfork}" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_HAVE_PTHREAD_ATFORK], [ ])
+  fi
 fi
 
 CPPFLAGS="$CPPFLAGS -D_REENTRANT"
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index baf12d43..c345214b 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -79,6 +79,9 @@
  */
 #undef JEMALLOC_HAVE_ISSETUGID
 
+/* Defined if pthread_atfork(3) is available. */
+#undef JEMALLOC_HAVE_PTHREAD_ATFORK
+
 /*
  * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available.
  */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 2c405b72..7df3fc9e 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1264,8 +1264,9 @@ malloc_init_hard_recursible(void)
 
 	ncpus = malloc_ncpus();
 
-#if (!defined(JEMALLOC_MUTEX_INIT_CB) && !defined(JEMALLOC_ZONE) \
-    && !defined(_WIN32) && !defined(__native_client__))
+#if (defined(JEMALLOC_HAVE_PTHREAD_ATFORK) && !defined(JEMALLOC_MUTEX_INIT_CB) \
+    && !defined(JEMALLOC_ZONE) && !defined(_WIN32) && \
+    !defined(__native_client__))
 	/* LinuxThreads' pthread_atfork() allocates. */
 	if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent,
 	    jemalloc_postfork_child) != 0) {

From 949a27fc329e6a55a94857a401765d017f13f8ff Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 17 Nov 2016 15:14:57 -0800
Subject: [PATCH 0543/2608] Add pthread_atfork(3) feature test.

Some versions of Android provide a pthreads library without providing
pthread_atfork(), so in practice a separate feature test is necessary
for the latter.
---
 configure.ac                                          | 8 ++++++++
 include/jemalloc/internal/jemalloc_internal_defs.h.in | 3 +++
 src/jemalloc.c                                        | 5 +++--
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index 68602e19..197414c5 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1329,6 +1329,14 @@ if test "x$abi" != "xpecoff" ; then
   AC_CHECK_LIB([pthread], [pthread_create], [LIBS="$LIBS -lpthread"],
                [AC_SEARCH_LIBS([pthread_create], , ,
                                AC_MSG_ERROR([libpthread is missing]))])
+  JE_COMPILABLE([pthread_atfork(3)], [
+#include <pthread.h>
+], [
+  pthread_atfork((void *)0, (void *)0, (void *)0);
+], [je_cv_pthread_atfork])
+  if test "x${je_cv_pthread_atfork}" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_HAVE_PTHREAD_ATFORK], [ ])
+  fi
 fi
 
 CPPFLAGS="$CPPFLAGS -D_REENTRANT"
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 5419513b..d530119b 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -79,6 +79,9 @@
  */
 #undef JEMALLOC_HAVE_ISSETUGID
 
+/* Defined if pthread_atfork(3) is available. */
+#undef JEMALLOC_HAVE_PTHREAD_ATFORK
+
 /*
  * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available.
  */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 14c1c4d8..baead664 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1411,8 +1411,9 @@ malloc_init_hard_recursible(void)
 
 	ncpus = malloc_ncpus();
 
-#if (!defined(JEMALLOC_MUTEX_INIT_CB) && !defined(JEMALLOC_ZONE) \
-    && !defined(_WIN32) && !defined(__native_client__))
+#if (defined(JEMALLOC_HAVE_PTHREAD_ATFORK) && !defined(JEMALLOC_MUTEX_INIT_CB) \
+    && !defined(JEMALLOC_ZONE) && !defined(_WIN32) && \
+    !defined(__native_client__))
 	/* LinuxThreads' pthread_atfork() allocates. */
 	if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent,
 	    jemalloc_postfork_child) != 0) {

From c3b85f25857cf4022be7b186adc1e20206d4c74f Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 22 Nov 2016 10:58:23 -0800
Subject: [PATCH 0544/2608] Style fixes.

---
 include/jemalloc/internal/util.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index aee00d6d..119696bb 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -56,17 +56,17 @@
  * uninitialized.
  */
 #ifdef JEMALLOC_CC_SILENCE
-#	define JEMALLOC_CC_SILENCE_INIT(v) = v
+#  define JEMALLOC_CC_SILENCE_INIT(v) = v
 #else
-#	define JEMALLOC_CC_SILENCE_INIT(v)
+#  define JEMALLOC_CC_SILENCE_INIT(v)
 #endif
 
 #ifdef __GNUC__
-#	define likely(x)   __builtin_expect(!!(x), 1)
-#	define unlikely(x) __builtin_expect(!!(x), 0)
+#  define likely(x)   __builtin_expect(!!(x), 1)
+#  define unlikely(x) __builtin_expect(!!(x), 0)
 #else
-#	define likely(x)   !!(x)
-#	define unlikely(x) !!(x)
+#  define likely(x)   !!(x)
+#  define unlikely(x) !!(x)
 #endif
 
 #if !defined(JEMALLOC_INTERNAL_UNREACHABLE)

From 32127949a3045330b8058bbf474e619e1bf2f05f Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 22 Nov 2016 10:58:58 -0800
Subject: [PATCH 0545/2608] Enable overriding JEMALLOC_{ALLOC,FREE}_JUNK.

This resolves #509.
---
 include/jemalloc/internal/util.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index 119696bb..d9f97416 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -41,8 +41,12 @@
 #define	MALLOC_PRINTF_BUFSIZE	4096
 
 /* Junk fill patterns. */
-#define	JEMALLOC_ALLOC_JUNK	((uint8_t)0xa5)
-#define	JEMALLOC_FREE_JUNK	((uint8_t)0x5a)
+#ifndef JEMALLOC_ALLOC_JUNK
+#  define JEMALLOC_ALLOC_JUNK	((uint8_t)0xa5)
+#endif
+#ifndef JEMALLOC_FREE_JUNK
+#  define JEMALLOC_FREE_JUNK	((uint8_t)0x5a)
+#endif
 
 /*
  * Wrap a cpp argument that contains commas such that it isn't broken up into

From fc11f3cb8443c029f54bf9ba21574b0f61996dd2 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 22 Nov 2016 10:58:58 -0800
Subject: [PATCH 0546/2608] Enable overriding JEMALLOC_{ALLOC,FREE}_JUNK.

This resolves #509.
---
 include/jemalloc/internal/util.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index aee00d6d..4b56d652 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -41,8 +41,12 @@
 #define	MALLOC_PRINTF_BUFSIZE	4096
 
 /* Junk fill patterns. */
-#define	JEMALLOC_ALLOC_JUNK	((uint8_t)0xa5)
-#define	JEMALLOC_FREE_JUNK	((uint8_t)0x5a)
+#ifndef JEMALLOC_ALLOC_JUNK
+#  define JEMALLOC_ALLOC_JUNK	((uint8_t)0xa5)
+#endif
+#ifndef JEMALLOC_FREE_JUNK
+#  define JEMALLOC_FREE_JUNK	((uint8_t)0x5a)
+#endif
 
 /*
  * Wrap a cpp argument that contains commas such that it isn't broken up into

From eb29d7ec0e2c4994e10ec40d42265a86e569500c Mon Sep 17 00:00:00 2001
From: John Szakmeister <john@szakmeister.net>
Date: Wed, 23 Nov 2016 15:32:35 -0500
Subject: [PATCH 0547/2608] Implement a more reliable detection scheme for
 os_unfair_lock.

The core issue here is the weak linking of the symbol, and in certain
environments--for instance, using the latest Xcode (8.1) with the latest
SDK (10.12)--os_unfair_lock may resolve even though you're compiling on
a host that doesn't support it (10.11).

We can use the availability macros to circumvent this problem, and
detect that we're not compiling for a target that is going to support
them and error out at compile time.  The other alternative is to do a
runtime check, but that presents issues for cross-compiling.
---
 configure.ac | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/configure.ac b/configure.ac
index 25dcc2c2..2e3ef364 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1670,10 +1670,15 @@ dnl Check for os_unfair_lock operations as provided on Darwin.
 
 JE_COMPILABLE([Darwin os_unfair_lock_*()], [
 #include <os/lock.h>
+#include <AvailabilityMacros.h>
 ], [
+	#if MAC_OS_X_VERSION_MIN_REQUIRED < 101200
+	#error "os_unfair_lock is not supported"
+	#else
 	os_unfair_lock lock = OS_UNFAIR_LOCK_INIT;
 	os_unfair_lock_lock(&lock);
 	os_unfair_lock_unlock(&lock);
+	#endif
 ], [je_cv_os_unfair_lock])
 if test "x${je_cv_os_unfair_lock}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_OS_UNFAIR_LOCK], [ ])

From e98a620c59ac20b13e2de796164cc67f050ed2bf Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 17 Nov 2016 13:36:17 -0800
Subject: [PATCH 0548/2608] Mark partially purged arena chunks as non-hugepage.

Add the pages_[no]huge() functions, which toggle huge page state via
madvise(..., MADV_[NO]HUGEPAGE) calls.

The first time a page run is purged from within an arena chunk, call
pages_nohuge() to tell the kernel to make no further attempts to back
the chunk with huge pages.  Upon arena chunk deletion, restore the
associated virtual memory to its original state via pages_huge().

This resolves #243.
---
 Makefile.in                                   |  1 +
 configure.ac                                  | 14 +++++++--
 include/jemalloc/internal/arena.h             |  8 +++++
 .../internal/jemalloc_internal_defs.h.in      |  6 ++++
 include/jemalloc/internal/pages.h             |  2 ++
 include/jemalloc/internal/private_symbols.txt |  2 ++
 src/arena.c                                   | 24 +++++++++++++-
 src/pages.c                                   | 31 ++++++++++++++++++-
 test/unit/pages.c                             | 27 ++++++++++++++++
 9 files changed, 110 insertions(+), 5 deletions(-)
 create mode 100644 test/unit/pages.c

diff --git a/Makefile.in b/Makefile.in
index 836d4e9c..c7053639 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -167,6 +167,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/mq.c \
 	$(srcroot)test/unit/mtx.c \
 	$(srcroot)test/unit/pack.c \
+	$(srcroot)test/unit/pages.c \
 	$(srcroot)test/unit/ph.c \
 	$(srcroot)test/unit/prng.c \
 	$(srcroot)test/unit/prof_accum.c \
diff --git a/configure.ac b/configure.ac
index 197414c5..3fdd1b93 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1612,6 +1612,8 @@ JE_COMPILABLE([madvise(2)], [
 	madvise((void *)0, 0, 0);
 ], [je_cv_madvise])
 if test "x${je_cv_madvise}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_MADVISE], [ ])
+
   dnl Check for madvise(..., MADV_FREE).
   JE_COMPILABLE([madvise(..., MADV_FREE)], [
 #include <sys/mman.h>
@@ -1632,9 +1634,15 @@ if test "x${je_cv_madvise}" = "xyes" ; then
     AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ])
   fi
 
-  if test "x${je_cv_madv_free}" = "xyes" \
-       -o "x${je_cv_madv_dontneed}" = "xyes" ; then
-    AC_DEFINE([JEMALLOC_HAVE_MADVISE], [ ])
+  dnl Check for madvise(..., MADV_[NO]HUGEPAGE).
+  JE_COMPILABLE([madvise(..., MADV_[[NO]]HUGEPAGE)], [
+#include <sys/mman.h>
+], [
+	madvise((void *)0, 0, MADV_HUGEPAGE);
+	madvise((void *)0, 0, MADV_NOHUGEPAGE);
+], [je_cv_thp])
+  if test "x${je_cv_thp}" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_THP], [ ])
   fi
 fi
 
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 30e2bdd6..ce4e6029 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -190,6 +190,14 @@ struct arena_chunk_s {
 	 */
 	extent_node_t		node;
 
+	/*
+	 * True if memory could be backed by transparent huge pages.  This is
+	 * only directly relevant to Linux, since it is the only supported
+	 * platform on which jemalloc interacts with explicit transparent huge
+	 * page controls.
+	 */
+	bool			hugepage;
+
 	/*
 	 * Map of pages within chunk that keeps track of free/large/small.  The
 	 * first map_bias entries are omitted, since the chunk header does not
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index d530119b..d7f3ef1c 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -265,6 +265,12 @@
 #undef JEMALLOC_PURGE_MADVISE_FREE
 #undef JEMALLOC_PURGE_MADVISE_DONTNEED
 
+/*
+ * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE
+ * arguments to madvise(2).
+ */
+#undef JEMALLOC_THP
+
 /* Define if operating system has alloca.h header. */
 #undef JEMALLOC_HAS_ALLOCA_H
 
diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index e21effd1..4ae9f156 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -16,6 +16,8 @@ void	*pages_trim(void *addr, size_t alloc_size, size_t leadsize,
 bool	pages_commit(void *addr, size_t size);
 bool	pages_decommit(void *addr, size_t size);
 bool	pages_purge(void *addr, size_t size);
+bool	pages_huge(void *addr, size_t size);
+bool	pages_nohuge(void *addr, size_t size);
 void	pages_boot(void);
 
 #endif /* JEMALLOC_H_EXTERNS */
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 71bfb94d..c1c6c409 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -397,7 +397,9 @@ p2rz
 pages_boot
 pages_commit
 pages_decommit
+pages_huge
 pages_map
+pages_nohuge
 pages_purge
 pages_trim
 pages_unmap
diff --git a/src/arena.c b/src/arena.c
index 87eead81..648a8da3 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -664,6 +664,8 @@ arena_chunk_init_hard(tsdn_t *tsdn, arena_t *arena)
 	if (chunk == NULL)
 		return (NULL);
 
+	chunk->hugepage = true;
+
 	/*
 	 * Initialize the map to contain one maximal free untouched run.  Mark
 	 * the pages as zeroed if arena_chunk_alloc_internal() returned a zeroed
@@ -727,13 +729,14 @@ arena_chunk_alloc(tsdn_t *tsdn, arena_t *arena)
 static void
 arena_chunk_discard(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk)
 {
-	size_t sn;
+	size_t sn, hugepage;
 	bool committed;
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 
 	chunk_deregister(chunk, &chunk->node);
 
 	sn = extent_node_sn_get(&chunk->node);
+	hugepage = chunk->hugepage;
 	committed = (arena_mapbits_decommitted_get(chunk, map_bias) == 0);
 	if (!committed) {
 		/*
@@ -746,6 +749,14 @@ arena_chunk_discard(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk)
 		chunk_hooks.decommit(chunk, chunksize, 0, map_bias << LG_PAGE,
 		    arena->ind);
 	}
+	if (!hugepage) {
+		/*
+		 * Convert chunk back to the default state, so that all
+		 * subsequent chunk allocations start out with chunks that can
+		 * be backed by transparent huge pages.
+		 */
+		pages_huge(chunk, chunksize);
+	}
 
 	chunk_dalloc_cache(tsdn, arena, &chunk_hooks, (void *)chunk, chunksize,
 	    sn, committed);
@@ -1682,6 +1693,17 @@ arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 			run_size = arena_mapbits_large_size_get(chunk, pageind);
 			npages = run_size >> LG_PAGE;
 
+			/*
+			 * If this is the first run purged within chunk, mark
+			 * the chunk as non-huge.  This will prevent all use of
+			 * transparent huge pages for this chunk until the chunk
+			 * as a whole is deallocated.
+			 */
+			if (chunk->hugepage) {
+				pages_nohuge(chunk, chunksize);
+				chunk->hugepage = false;
+			}
+
 			assert(pageind + npages <= chunk_npages);
 			assert(!arena_mapbits_decommitted_get(chunk, pageind));
 			assert(!arena_mapbits_decommitted_get(chunk,
diff --git a/src/pages.c b/src/pages.c
index 395ace99..a56d10b7 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -170,7 +170,8 @@ pages_purge(void *addr, size_t size)
 #ifdef _WIN32
 	VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE);
 	unzeroed = true;
-#elif defined(JEMALLOC_HAVE_MADVISE)
+#elif (defined(JEMALLOC_PURGE_MADVISE_FREE) || \
+    defined(JEMALLOC_PURGE_MADVISE_FREE))
 #  if defined(JEMALLOC_PURGE_MADVISE_FREE)
 #    define JEMALLOC_MADV_PURGE MADV_FREE
 #    define JEMALLOC_MADV_ZEROS false
@@ -191,6 +192,34 @@ pages_purge(void *addr, size_t size)
 	return (unzeroed);
 }
 
+bool
+pages_huge(void *addr, size_t size)
+{
+
+	assert(PAGE_ADDR2BASE(addr) == addr);
+	assert(PAGE_CEILING(size) == size);
+
+#ifdef JEMALLOC_THP
+	return (madvise(addr, size, MADV_HUGEPAGE) != 0);
+#else
+	return (false);
+#endif
+}
+
+bool
+pages_nohuge(void *addr, size_t size)
+{
+
+	assert(PAGE_ADDR2BASE(addr) == addr);
+	assert(PAGE_CEILING(size) == size);
+
+#ifdef JEMALLOC_THP
+	return (madvise(addr, size, MADV_NOHUGEPAGE) != 0);
+#else
+	return (false);
+#endif
+}
+
 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
 static bool
 os_overcommits_sysctl(void)
diff --git a/test/unit/pages.c b/test/unit/pages.c
new file mode 100644
index 00000000..d31a35e6
--- /dev/null
+++ b/test/unit/pages.c
@@ -0,0 +1,27 @@
+#include "test/jemalloc_test.h"
+
+TEST_BEGIN(test_pages_huge)
+{
+	bool commit;
+	void *pages;
+
+	commit = true;
+	pages = pages_map(NULL, PAGE, &commit);
+	assert_ptr_not_null(pages, "Unexpected pages_map() error");
+
+	assert_false(pages_huge(pages, PAGE),
+	    "Unexpected pages_huge() result");
+	assert_false(pages_nohuge(pages, PAGE),
+	    "Unexpected pages_nohuge() result");
+
+	pages_unmap(pages, PAGE);
+}
+TEST_END
+
+int
+main(void)
+{
+
+	return (test(
+	    test_pages_huge));
+}

From a05d4da4d861c054260431ba113be42040173271 Mon Sep 17 00:00:00 2001
From: John Szakmeister <john@szakmeister.net>
Date: Wed, 23 Nov 2016 15:32:35 -0500
Subject: [PATCH 0549/2608] Implement a more reliable detection scheme for
 os_unfair_lock.

The core issue here is the weak linking of the symbol, and in certain
environments--for instance, using the latest Xcode (8.1) with the latest
SDK (10.12)--os_unfair_lock may resolve even though you're compiling on
a host that doesn't support it (10.11).

We can use the availability macros to circumvent this problem, and
detect that we're not compiling for a target that is going to support
them and error out at compile time.  The other alternative is to do a
runtime check, but that presents issues for cross-compiling.
---
 configure.ac | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/configure.ac b/configure.ac
index 3fdd1b93..762b0c8a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1708,10 +1708,15 @@ dnl Check for os_unfair_lock operations as provided on Darwin.
 
 JE_COMPILABLE([Darwin os_unfair_lock_*()], [
 #include <os/lock.h>
+#include <AvailabilityMacros.h>
 ], [
+	#if MAC_OS_X_VERSION_MIN_REQUIRED < 101200
+	#error "os_unfair_lock is not supported"
+	#else
 	os_unfair_lock lock = OS_UNFAIR_LOCK_INIT;
 	os_unfair_lock_lock(&lock);
 	os_unfair_lock_unlock(&lock);
+	#endif
 ], [je_cv_os_unfair_lock])
 if test "x${je_cv_os_unfair_lock}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_OS_UNFAIR_LOCK], [ ])

From 7179351a45f00fa943cfe23b555036615b91ce9d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 30 Nov 2016 09:57:12 -0800
Subject: [PATCH 0550/2608] Update configure cache file example.

---
 INSTALL | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/INSTALL b/INSTALL
index 2e963546..48025e84 100644
--- a/INSTALL
+++ b/INSTALL
@@ -325,19 +325,13 @@ PATH="?"
     'configure' uses this to find programs.
 
 In some cases it may be necessary to work around configuration results that do
-not match reality.  For example, OS X 10.12 in conjunction with XCode 8 adds
-the os_unfair_lock_*() API, but if XCode 8 is used to build jemalloc on older
-versions of OS X, the configure script will determine that os_unfair_lock_*()
-is compilable, yet run-time failures will result.  To work around this
-(ignoring that MACOSX_DEPLOYMENT_TARGET may be the correct fix), create a cache
-file (called e.g. darwin.cache) with the following contents to override the
-relevant configuration variable defined in configure.ac:
+not match reality.  For example, Linux 4.5 added support for the MADV_FREE flag
+to madvise(2), which can cause problems if building on a host with MADV_FREE
+support and deploying to a target without.  To work around this, use a cache
+file to override the relevant configuration variable defined in configure.ac,
+e.g.:
 
-    je_cv_os_unfair_lock=no
-
-Invoke configure as such:
-
-    ./configure --cache=darwin.cache
+    echo "je_cv_madv_free=no" > config.cache && ./configure -C
 
 === Advanced compilation =======================================================
 

From 34a7e37a71f95c957b70f88c2039eead6a33e83e Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 3 Dec 2016 16:06:19 -0800
Subject: [PATCH 0551/2608] Fix pages_purge() when using MADV_DONTNEED.

This fixes a regression caused by
e98a620c59ac20b13e2de796164cc67f050ed2bf (Mark partially purged arena
chunks as non-hugepage.).
---
 src/pages.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pages.c b/src/pages.c
index a56d10b7..1311a5e6 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -171,7 +171,7 @@ pages_purge(void *addr, size_t size)
 	VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE);
 	unzeroed = true;
 #elif (defined(JEMALLOC_PURGE_MADVISE_FREE) || \
-    defined(JEMALLOC_PURGE_MADVISE_FREE))
+    defined(JEMALLOC_PURGE_MADVISE_DONTNEED))
 #  if defined(JEMALLOC_PURGE_MADVISE_FREE)
 #    define JEMALLOC_MADV_PURGE MADV_FREE
 #    define JEMALLOC_MADV_ZEROS false

From e1b2970d28d26454de9345e1510c6bae257e82e4 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 30 Nov 2016 09:57:12 -0800
Subject: [PATCH 0552/2608] Update configure cache file example.

---
 INSTALL | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/INSTALL b/INSTALL
index fcfd576e..626ef7f0 100644
--- a/INSTALL
+++ b/INSTALL
@@ -328,19 +328,13 @@ PATH="?"
     'configure' uses this to find programs.
 
 In some cases it may be necessary to work around configuration results that do
-not match reality.  For example, OS X 10.12 in conjunction with XCode 8 adds
-the os_unfair_lock_*() API, but if XCode 8 is used to build jemalloc on older
-versions of OS X, the configure script will determine that os_unfair_lock_*()
-is compilable, yet run-time failures will result.  To work around this
-(ignoring that MACOSX_DEPLOYMENT_TARGET may be the correct fix), create a cache
-file (called e.g. darwin.cache) with the following contents to override the
-relevant configuration variable defined in configure.ac:
+not match reality.  For example, Linux 4.5 added support for the MADV_FREE flag
+to madvise(2), which can cause problems if building on a host with MADV_FREE
+support and deploying to a target without.  To work around this, use a cache
+file to override the relevant configuration variable defined in configure.ac,
+e.g.:
 
-    je_cv_os_unfair_lock=no
-
-Invoke configure as such:
-
-    ./configure --cache=darwin.cache
+    echo "je_cv_madv_free=no" > config.cache && ./configure -C
 
 === Advanced compilation =======================================================
 

From acb7b1f53e25fcad89375512e6eaea8115dc6af5 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 3 Dec 2016 16:47:36 -0800
Subject: [PATCH 0553/2608] Add --disable-syscall.

This resolves #517.
---
 INSTALL                                       |  5 +++
 configure.ac                                  | 31 +++++++++++++------
 .../internal/jemalloc_internal_defs.h.in      |  4 +--
 src/pages.c                                   |  6 ++--
 src/util.c                                    |  2 +-
 5 files changed, 33 insertions(+), 15 deletions(-)

diff --git a/INSTALL b/INSTALL
index 48025e84..4cad3ee2 100644
--- a/INSTALL
+++ b/INSTALL
@@ -203,6 +203,11 @@ any of the following arguments (not a definitive list) to 'configure':
     most extreme case increases physical memory usage for the 16 KiB size class
     to 20 KiB.
 
+--disable-syscall
+    Disable use of syscall(2) rather than {open,read,write,close}(2).  This is
+    intended as a workaround for systems that place security limitations on
+    syscall(2).
+
 --with-xslroot=<path>
     Specify where to find DocBook XSL stylesheets when building the
     documentation.
diff --git a/configure.ac b/configure.ac
index 2e3ef364..ab2c0bee 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1366,20 +1366,33 @@ if test "x${je_cv_mach_absolute_time}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_MACH_ABSOLUTE_TIME])
 fi
 
-dnl Check if syscall(2) is usable.  Treat warnings as errors, so that e.g. OS X
-dnl 10.12's deprecation warning prevents use.
-SAVED_CFLAGS="${CFLAGS}"
-JE_CFLAGS_APPEND([-Werror])
-JE_COMPILABLE([syscall(2)], [
+dnl Use syscall(2) (if available) by default.
+AC_ARG_ENABLE([syscall],
+  [AS_HELP_STRING([--disable-syscall], [Disable use of syscall(2)])],
+[if test "x$enable_syscall" = "xno" ; then
+  enable_syscall="0"
+else
+  enable_syscall="1"
+fi
+],
+[enable_syscall="1"]
+)
+if test "x$enable_syscall" = "x1" ; then
+  dnl Check if syscall(2) is usable.  Treat warnings as errors, so that e.g. OS
+  dnl X 10.12's deprecation warning prevents use.
+  SAVED_CFLAGS="${CFLAGS}"
+  JE_CFLAGS_APPEND([-Werror])
+  JE_COMPILABLE([syscall(2)], [
 #include <sys/syscall.h>
 #include <unistd.h>
 ], [
 	syscall(SYS_write, 2, "hello", 5);
 ],
-              [je_cv_syscall])
-CFLAGS="${SAVED_CFLAGS}"
-if test "x$je_cv_syscall" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_SYSCALL], [ ])
+                [je_cv_syscall])
+  CFLAGS="${SAVED_CFLAGS}"
+  if test "x$je_cv_syscall" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_USE_SYSCALL], [ ])
+  fi
 fi
 
 dnl Check if the GNU-specific secure_getenv function exists.
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index c345214b..aa0c0474 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -66,8 +66,8 @@
  */
 #undef JEMALLOC_OSSPIN
 
-/* Defined if syscall(2) is available. */
-#undef JEMALLOC_HAVE_SYSCALL
+/* Defined if syscall(2) is usable. */
+#undef JEMALLOC_USE_SYSCALL
 
 /*
  * Defined if secure_getenv(3) is available.
diff --git a/src/pages.c b/src/pages.c
index 395ace99..6af228ac 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -219,7 +219,7 @@ os_overcommits_proc(void)
 	char buf[1];
 	ssize_t nread;
 
-#if defined(JEMALLOC_HAVE_SYSCALL) && defined(SYS_open)
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
 	fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY);
 #else
 	fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
@@ -227,13 +227,13 @@ os_overcommits_proc(void)
 	if (fd == -1)
 		return (false); /* Error. */
 
-#if defined(JEMALLOC_HAVE_SYSCALL) && defined(SYS_read)
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read)
 	nread = (ssize_t)syscall(SYS_read, fd, &buf, sizeof(buf));
 #else
 	nread = read(fd, &buf, sizeof(buf));
 #endif
 
-#if defined(JEMALLOC_HAVE_SYSCALL) && defined(SYS_close)
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
 	syscall(SYS_close, fd);
 #else
 	close(fd);
diff --git a/src/util.c b/src/util.c
index 5b8175bc..dd8c2363 100644
--- a/src/util.c
+++ b/src/util.c
@@ -49,7 +49,7 @@ static void
 wrtmessage(void *cbopaque, const char *s)
 {
 
-#if defined(JEMALLOC_HAVE_SYSCALL) && defined(SYS_write)
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_write)
 	/*
 	 * Use syscall(2) rather than write(2) when possible in order to avoid
 	 * the possibility of memory allocation within libc.  This is necessary

From 145f3cd17340ae3a1af8aad3bdce8ddcc626ec67 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 3 Dec 2016 16:47:36 -0800
Subject: [PATCH 0554/2608] Add --disable-syscall.

This resolves #517.
---
 INSTALL                                       |  5 +++
 configure.ac                                  | 31 +++++++++++++------
 .../internal/jemalloc_internal_defs.h.in      |  4 +--
 src/pages.c                                   |  6 ++--
 src/util.c                                    |  2 +-
 5 files changed, 33 insertions(+), 15 deletions(-)

diff --git a/INSTALL b/INSTALL
index 626ef7f0..cce3ed71 100644
--- a/INSTALL
+++ b/INSTALL
@@ -206,6 +206,11 @@ any of the following arguments (not a definitive list) to 'configure':
     most extreme case increases physical memory usage for the 16 KiB size class
     to 20 KiB.
 
+--disable-syscall
+    Disable use of syscall(2) rather than {open,read,write,close}(2).  This is
+    intended as a workaround for systems that place security limitations on
+    syscall(2).
+
 --with-xslroot=<path>
     Specify where to find DocBook XSL stylesheets when building the
     documentation.
diff --git a/configure.ac b/configure.ac
index 762b0c8a..9573c302 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1396,20 +1396,33 @@ if test "x${je_cv_mach_absolute_time}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_MACH_ABSOLUTE_TIME])
 fi
 
-dnl Check if syscall(2) is usable.  Treat warnings as errors, so that e.g. OS X
-dnl 10.12's deprecation warning prevents use.
-SAVED_CFLAGS="${CFLAGS}"
-JE_CFLAGS_APPEND([-Werror])
-JE_COMPILABLE([syscall(2)], [
+dnl Use syscall(2) (if available) by default.
+AC_ARG_ENABLE([syscall],
+  [AS_HELP_STRING([--disable-syscall], [Disable use of syscall(2)])],
+[if test "x$enable_syscall" = "xno" ; then
+  enable_syscall="0"
+else
+  enable_syscall="1"
+fi
+],
+[enable_syscall="1"]
+)
+if test "x$enable_syscall" = "x1" ; then
+  dnl Check if syscall(2) is usable.  Treat warnings as errors, so that e.g. OS
+  dnl X 10.12's deprecation warning prevents use.
+  SAVED_CFLAGS="${CFLAGS}"
+  JE_CFLAGS_APPEND([-Werror])
+  JE_COMPILABLE([syscall(2)], [
 #include <sys/syscall.h>
 #include <unistd.h>
 ], [
 	syscall(SYS_write, 2, "hello", 5);
 ],
-              [je_cv_syscall])
-CFLAGS="${SAVED_CFLAGS}"
-if test "x$je_cv_syscall" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_SYSCALL], [ ])
+                [je_cv_syscall])
+  CFLAGS="${SAVED_CFLAGS}"
+  if test "x$je_cv_syscall" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_USE_SYSCALL], [ ])
+  fi
 fi
 
 dnl Check if the GNU-specific secure_getenv function exists.
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index d7f3ef1c..def4ba55 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -66,8 +66,8 @@
  */
 #undef JEMALLOC_OSSPIN
 
-/* Defined if syscall(2) is available. */
-#undef JEMALLOC_HAVE_SYSCALL
+/* Defined if syscall(2) is usable. */
+#undef JEMALLOC_USE_SYSCALL
 
 /*
  * Defined if secure_getenv(3) is available.
diff --git a/src/pages.c b/src/pages.c
index 1311a5e6..5f0c9669 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -248,7 +248,7 @@ os_overcommits_proc(void)
 	char buf[1];
 	ssize_t nread;
 
-#if defined(JEMALLOC_HAVE_SYSCALL) && defined(SYS_open)
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
 	fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY);
 #else
 	fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
@@ -256,13 +256,13 @@ os_overcommits_proc(void)
 	if (fd == -1)
 		return (false); /* Error. */
 
-#if defined(JEMALLOC_HAVE_SYSCALL) && defined(SYS_read)
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read)
 	nread = (ssize_t)syscall(SYS_read, fd, &buf, sizeof(buf));
 #else
 	nread = read(fd, &buf, sizeof(buf));
 #endif
 
-#if defined(JEMALLOC_HAVE_SYSCALL) && defined(SYS_close)
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
 	syscall(SYS_close, fd);
 #else
 	close(fd);
diff --git a/src/util.c b/src/util.c
index 5b8175bc..dd8c2363 100755
--- a/src/util.c
+++ b/src/util.c
@@ -49,7 +49,7 @@ static void
 wrtmessage(void *cbopaque, const char *s)
 {
 
-#if defined(JEMALLOC_HAVE_SYSCALL) && defined(SYS_write)
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_write)
 	/*
 	 * Use syscall(2) rather than write(2) when possible in order to avoid
 	 * the possibility of memory allocation within libc.  This is necessary

From fbe30158184c28f00f109cf4b8870c554e996bab Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 3 Dec 2016 12:22:59 -0800
Subject: [PATCH 0555/2608] Update ChangeLog for 4.4.0.

---
 ChangeLog | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index 587685d0..f75edd93 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,33 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
+* 4.4.0 (December 3, 2016)
+
+  New features:
+  - Add configure support for *-*-linux-android.  (@cferris1000, @jasone)
+  - Add the --disable-syscall configure option, for use on systems that place
+    security-motivated limitations on syscall(2).  (@jasone)
+  - Add support for Debian GNU/kFreeBSD.  (@thesam)
+
+  Optimizations:
+  - Add extent serial numbers and use them where appropriate as a sort key that
+    is higher priority than address, so that the allocation policy prefers older
+    extents.  This tends to improve locality (decrease fragmentation) when
+    memory grows downward.  (@jasone)
+  - Refactor madvise(2) configuration so that MADV_FREE is detected and utilized
+    on Linux 4.5 and newer.  (@jasone)
+  - Mark partially purged arena chunks as non-huge-page.  This improves
+    interaction with Linux's transparent huge page functionality.  (@jasone)
+
+  Bug fixes:
+  - Fix size class computations for edge conditions involving extremely large
+    allocations.  This regression was first released in 4.0.0.  (@jasone,
+    @ingvarha)
+  - Remove overly restrictive assertions related to the cactive statistic.  This
+    regression was first released in 4.1.0.  (@jasone)
+  - Implement a more reliable detection scheme for os_unfair_lock on macOS.
+    (@jszakmeister)
+
 * 4.3.1 (November 7, 2016)
 
   Bug fixes:

From 2d1bb8980fff829c58dabbf122224f577879a32c Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 3 Dec 2016 12:22:59 -0800
Subject: [PATCH 0556/2608] Update ChangeLog for 4.4.0.

---
 ChangeLog | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index 587685d0..f75edd93 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,33 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
+* 4.4.0 (December 3, 2016)
+
+  New features:
+  - Add configure support for *-*-linux-android.  (@cferris1000, @jasone)
+  - Add the --disable-syscall configure option, for use on systems that place
+    security-motivated limitations on syscall(2).  (@jasone)
+  - Add support for Debian GNU/kFreeBSD.  (@thesam)
+
+  Optimizations:
+  - Add extent serial numbers and use them where appropriate as a sort key that
+    is higher priority than address, so that the allocation policy prefers older
+    extents.  This tends to improve locality (decrease fragmentation) when
+    memory grows downward.  (@jasone)
+  - Refactor madvise(2) configuration so that MADV_FREE is detected and utilized
+    on Linux 4.5 and newer.  (@jasone)
+  - Mark partially purged arena chunks as non-huge-page.  This improves
+    interaction with Linux's transparent huge page functionality.  (@jasone)
+
+  Bug fixes:
+  - Fix size class computations for edge conditions involving extremely large
+    allocations.  This regression was first released in 4.0.0.  (@jasone,
+    @ingvarha)
+  - Remove overly restrictive assertions related to the cactive statistic.  This
+    regression was first released in 4.1.0.  (@jasone)
+  - Implement a more reliable detection scheme for os_unfair_lock on macOS.
+    (@jszakmeister)
+
 * 4.3.1 (November 7, 2016)
 
   Bug fixes:

From d4c5aceb7cb5c5cf7a6dfd62e072c7dd12188998 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 12 Dec 2016 18:04:20 -0800
Subject: [PATCH 0557/2608] Add a_type parameter to qr_{meld,split}().

---
 include/jemalloc/internal/extent.h |  2 +-
 include/jemalloc/internal/qr.h     |  8 ++++----
 test/unit/qr.c                     | 12 ++++++------
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 3c5573ee..d5690c08 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -445,7 +445,7 @@ JEMALLOC_INLINE void
 extent_ring_insert(extent_t *sentinel, extent_t *extent)
 {
 
-	qr_meld(sentinel, extent, qr_link);
+	qr_meld(sentinel, extent, extent_t, qr_link);
 }
 
 JEMALLOC_INLINE void
diff --git a/include/jemalloc/internal/qr.h b/include/jemalloc/internal/qr.h
index 0fbaec25..3b5d0276 100644
--- a/include/jemalloc/internal/qr.h
+++ b/include/jemalloc/internal/qr.h
@@ -31,8 +31,8 @@ struct {								\
 	(a_qrelm)->a_field.qre_next = (a_qr);				\
     } while (0)
 
-#define	qr_meld(a_qr_a, a_qr_b, a_field) do {				\
-	void *t;							\
+#define	qr_meld(a_qr_a, a_qr_b, a_type, a_field) do {			\
+	a_type *t;							\
 	(a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_b);	\
 	(a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_a);	\
 	t = (a_qr_a)->a_field.qre_prev;					\
@@ -44,8 +44,8 @@ struct {								\
  * qr_meld() and qr_split() are functionally equivalent, so there's no need to
  * have two copies of the code.
  */
-#define	qr_split(a_qr_a, a_qr_b, a_field)				\
-	qr_meld((a_qr_a), (a_qr_b), a_field)
+#define	qr_split(a_qr_a, a_qr_b, a_type, a_field)			\
+	qr_meld((a_qr_a), (a_qr_b), a_type, a_field)
 
 #define	qr_remove(a_qr, a_field) do {					\
 	(a_qr)->a_field.qre_prev->a_field.qre_next			\
diff --git a/test/unit/qr.c b/test/unit/qr.c
index a2a2d902..8b764e11 100644
--- a/test/unit/qr.c
+++ b/test/unit/qr.c
@@ -215,22 +215,22 @@ TEST_BEGIN(test_qr_meld_split)
 	for (i = 1; i < NENTRIES; i++)
 		qr_after_insert(&entries[i - 1], &entries[i], link);
 
-	qr_split(&entries[0], &entries[SPLIT_INDEX], link);
+	qr_split(&entries[0], &entries[SPLIT_INDEX], ring_t, link);
 	test_split_entries(entries);
 
-	qr_meld(&entries[0], &entries[SPLIT_INDEX], link);
+	qr_meld(&entries[0], &entries[SPLIT_INDEX], ring_t, link);
 	test_entries_ring(entries);
 
-	qr_meld(&entries[0], &entries[SPLIT_INDEX], link);
+	qr_meld(&entries[0], &entries[SPLIT_INDEX], ring_t, link);
 	test_split_entries(entries);
 
-	qr_split(&entries[0], &entries[SPLIT_INDEX], link);
+	qr_split(&entries[0], &entries[SPLIT_INDEX], ring_t, link);
 	test_entries_ring(entries);
 
-	qr_split(&entries[0], &entries[0], link);
+	qr_split(&entries[0], &entries[0], ring_t, link);
 	test_entries_ring(entries);
 
-	qr_meld(&entries[0], &entries[0], link);
+	qr_meld(&entries[0], &entries[0], ring_t, link);
 	test_entries_ring(entries);
 }
 TEST_END

From 2319152d9f5d9b33eebc36a50ccf4239f31c1ad9 Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Sun, 23 Oct 2016 15:56:30 -0700
Subject: [PATCH 0558/2608] jemalloc cpp new/delete bindings

Adds cpp bindings for jemalloc, along with necessary autoconf settings.
This is mostly to add sized deallocation support, which can't be added
from C directly.  Sized deallocation is ~10% microbench improvement.

* Import ax_cxx_compile_stdcxx.m4 from the autoconf repo, seems like the
  easiest way to get c++14 detection.
* Adds various other changes, like CXXFLAGS, to configure.ac.
* Adds new rules to Makefile.in for src/jemalloc-cpp.cpp, and a basic
  unittest.
* Both new and delete are overridden, to ensure jemalloc is used for
  both.
* TODO future enhancement of avoiding extra PLT thunks for new and
  delete - sdallocx and malloc are publicly exported jemalloc symbols,
  using an alias would link them directly.  Unfortunately, was having
  trouble getting it to play nice with jemalloc's namespace support.

Testing:
Tested gcc 4.8, gcc 5, gcc 5.2, clang 4.0.  Only gcc >= 5 has sized
deallocation support, verified that the rest build correctly.

Tested mac osx and Centos.

Tested --with-jemalloc-prefix and --without-export.

This resolves #202.
---
 .gitignore                                    |   7 +
 INSTALL                                       |   4 +
 Makefile.in                                   |  85 ++-
 bin/jemalloc-config.in                        |   4 +
 configure.ac                                  |  72 +++
 .../jemalloc/internal/jemalloc_internal.h.in  |  13 +-
 .../internal/jemalloc_internal_macros.h       |   2 +-
 include/jemalloc/internal/rtree.h             |   8 +-
 m4/ax_cxx_compile_stdcxx.m4                   | 562 ++++++++++++++++++
 src/jemalloc_cpp.cpp                          | 140 +++++
 test/include/test/jemalloc_test.h.in          |  11 +-
 test/include/test/test.h                      |   4 +-
 test/integration/cpp/basic.cpp                |  18 +
 13 files changed, 905 insertions(+), 25 deletions(-)
 create mode 100644 m4/ax_cxx_compile_stdcxx.m4
 create mode 100644 src/jemalloc_cpp.cpp
 create mode 100644 test/integration/cpp/basic.cpp

diff --git a/.gitignore b/.gitignore
index 08278d08..548c7d1a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -54,6 +54,13 @@ test/include/test/jemalloc_test_defs.h
 /test/integration/*.gcno
 /test/integration/*.out
 
+/test/integration/cpp/[A-Za-z]*
+!/test/integration/cpp/[A-Za-z]*.*
+/test/integration/cpp/*.[od]
+/test/integration/cpp/*.gcda
+/test/integration/cpp/*.gcno
+/test/integration/cpp/*.out
+
 /test/src/*.[od]
 /test/src/*.gcda
 /test/src/*.gcno
diff --git a/INSTALL b/INSTALL
index 4cad3ee2..c7ca5a73 100644
--- a/INSTALL
+++ b/INSTALL
@@ -208,6 +208,10 @@ any of the following arguments (not a definitive list) to 'configure':
     intended as a workaround for systems that place security limitations on
     syscall(2).
 
+--disable-cxx
+    Disable C++ integration.  This will cause new and delete operator
+    implementations to be omitted.
+
 --with-xslroot=<path>
     Specify where to find DocBook XSL stylesheets when building the
     documentation.
diff --git a/Makefile.in b/Makefile.in
index 5e6f0c1a..22d9d86f 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -9,6 +9,7 @@ vpath % .
 SHELL := /bin/sh
 
 CC := @CC@
+CXX := @CXX@
 
 # Configuration parameters.
 DESTDIR =
@@ -26,6 +27,8 @@ abs_objroot := @abs_objroot@
 CPPFLAGS := @CPPFLAGS@ -I$(srcroot)include -I$(objroot)include
 EXTRA_CFLAGS := @EXTRA_CFLAGS@
 CFLAGS := @CFLAGS@ $(EXTRA_CFLAGS)
+EXTRA_CXXFLAGS := @EXTRA_CXXFLAGS@
+CXXFLAGS := @CXXFLAGS@ $(EXTRA_CXXFLAGS)
 LDFLAGS := @LDFLAGS@
 EXTRA_LDFLAGS := @EXTRA_LDFLAGS@
 LIBS := @LIBS@
@@ -140,8 +143,10 @@ C_TESTLIB_SRCS := $(srcroot)test/src/btalloc.c $(srcroot)test/src/btalloc_0.c \
 	$(srcroot)test/src/thd.c $(srcroot)test/src/timer.c
 ifeq (1, $(link_whole_archive))
 C_UTIL_INTEGRATION_SRCS :=
+C_UTIL_CPP_SRCS :=
 else
 C_UTIL_INTEGRATION_SRCS := $(srcroot)src/nstime.c $(srcroot)src/util.c
+C_UTIL_CPP_SRCS := $(srcroot)src/nstime.c $(srcroot)src/util.c
 endif
 TESTS_UNIT := \
 	$(srcroot)test/unit/a0.c \
@@ -195,11 +200,21 @@ TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \
 	$(srcroot)test/integration/thread_arena.c \
 	$(srcroot)test/integration/thread_tcache_enabled.c \
 	$(srcroot)test/integration/xallocx.c
+ifeq (@enable_cxx@, 1)
+CPP_SRCS := $(srcroot)src/jemalloc_cpp.cpp
+TESTS_INTEGRATION_CPP := $(srcroot)test/integration/cpp/basic.cpp
+else
+CPP_SRCS :=
+TESTS_INTEGRATION_CPP :=
+endif
 TESTS_STRESS := $(srcroot)test/stress/microbench.c
-TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_STRESS)
+
+TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_INTEGRATION_CPP) $(TESTS_STRESS)
 
 C_OBJS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.$(O))
+CPP_OBJS := $(CPP_SRCS:$(srcroot)%.cpp=$(objroot)%.$(O))
 C_PIC_OBJS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.pic.$(O))
+CPP_PIC_OBJS := $(CPP_SRCS:$(srcroot)%.cpp=$(objroot)%.pic.$(O))
 C_JET_OBJS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.jet.$(O))
 C_TESTLIB_UNIT_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.unit.$(O))
 C_TESTLIB_INTEGRATION_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.integration.$(O))
@@ -209,15 +224,17 @@ C_TESTLIB_OBJS := $(C_TESTLIB_UNIT_OBJS) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_
 
 TESTS_UNIT_OBJS := $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%.$(O))
 TESTS_INTEGRATION_OBJS := $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%.$(O))
+TESTS_INTEGRATION_CPP_OBJS := $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%.$(O))
 TESTS_STRESS_OBJS := $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%.$(O))
 TESTS_OBJS := $(TESTS_UNIT_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_STRESS_OBJS)
+TESTS_CPP_OBJS := $(TESTS_INTEGRATION_CPP_OBJS)
 
 .PHONY: all dist build_doc_html build_doc_man build_doc
 .PHONY: install_bin install_include install_lib
 .PHONY: install_doc_html install_doc_man install_doc install
 .PHONY: tests check clean distclean relclean
 
-.SECONDARY : $(TESTS_OBJS)
+.SECONDARY : $(TESTS_OBJS) $(TESTS_CPP_OBJS)
 
 # Default target.
 all: build_lib
@@ -239,15 +256,21 @@ build_doc: $(DOCS)
 #
 ifdef CC_MM
 -include $(C_OBJS:%.$(O)=%.d)
+-include $(CPP_OBJS:%.$(O)=%.d)
 -include $(C_PIC_OBJS:%.$(O)=%.d)
+-include $(CPP_PIC_OBJS:%.$(O)=%.d)
 -include $(C_JET_OBJS:%.$(O)=%.d)
 -include $(C_TESTLIB_OBJS:%.$(O)=%.d)
 -include $(TESTS_OBJS:%.$(O)=%.d)
+-include $(TESTS_CPP_OBJS:%.$(O)=%.d)
 endif
 
 $(C_OBJS): $(objroot)src/%.$(O): $(srcroot)src/%.c
+$(CPP_OBJS): $(objroot)src/%.$(O): $(srcroot)src/%.cpp
 $(C_PIC_OBJS): $(objroot)src/%.pic.$(O): $(srcroot)src/%.c
 $(C_PIC_OBJS): CFLAGS += $(PIC_CFLAGS)
+$(CPP_PIC_OBJS): $(objroot)src/%.pic.$(O): $(srcroot)src/%.cpp
+$(CPP_PIC_OBJS): CXXFLAGS += $(PIC_CFLAGS)
 $(C_JET_OBJS): $(objroot)src/%.jet.$(O): $(srcroot)src/%.c
 $(C_JET_OBJS): CFLAGS += -DJEMALLOC_JET
 $(C_TESTLIB_UNIT_OBJS): $(objroot)test/src/%.unit.$(O): $(srcroot)test/src/%.c
@@ -260,11 +283,14 @@ $(C_TESTLIB_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST -DJEMALLOC_STRESS_T
 $(C_TESTLIB_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include
 $(TESTS_UNIT_OBJS): CPPFLAGS += -DJEMALLOC_UNIT_TEST
 $(TESTS_INTEGRATION_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_TEST
+$(TESTS_INTEGRATION_CPP_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_CPP_TEST
 $(TESTS_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST
 $(TESTS_OBJS): $(objroot)test/%.$(O): $(srcroot)test/%.c
+$(TESTS_CPP_OBJS): $(objroot)test/%.$(O): $(srcroot)test/%.cpp
 $(TESTS_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include
+$(TESTS_CPP_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include
 ifneq ($(IMPORTLIB),$(SO))
-$(C_OBJS) $(C_JET_OBJS): CPPFLAGS += -DDLLEXPORT
+$(CPP_OBJS) $(C_OBJS) $(C_JET_OBJS): CPPFLAGS += -DDLLEXPORT
 endif
 
 ifndef CC_MM
@@ -272,8 +298,8 @@ ifndef CC_MM
 HEADER_DIRS = $(srcroot)include/jemalloc/internal \
 	$(objroot)include/jemalloc $(objroot)include/jemalloc/internal
 HEADERS = $(wildcard $(foreach dir,$(HEADER_DIRS),$(dir)/*.h))
-$(C_OBJS) $(C_PIC_OBJS) $(C_JET_OBJS) $(C_TESTLIB_OBJS) $(TESTS_OBJS): $(HEADERS)
-$(TESTS_OBJS): $(objroot)test/include/test/jemalloc_test.h
+$(C_OBJS) $(CPP_OBJS) $(C_PIC_OBJS) $(CPP_PIC_OBJS) $(C_JET_OBJS) $(C_TESTLIB_OBJS) $(TESTS_OBJS): $(HEADERS)
+$(TESTS_OBJS) $(TESTS_CPP_OBJS): $(objroot)test/include/test/jemalloc_test.h
 endif
 
 $(C_OBJS) $(C_PIC_OBJS) $(C_JET_OBJS) $(C_TESTLIB_OBJS) $(TESTS_OBJS): %.$(O):
@@ -283,19 +309,26 @@ ifdef CC_MM
 	@$(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.$(O)=%.d) $<
 endif
 
+$(CPP_OBJS) $(CPP_PIC_OBJS) $(TESTS_CPP_OBJS): %.$(O):
+	@mkdir -p $(@D)
+	$(CXX) $(CXXFLAGS) -c $(CPPFLAGS) $(CTARGET) $<
+ifdef CC_MM
+	@$(CXX) -MM $(CPPFLAGS) -MT $@ -o $(@:%.$(O)=%.d) $<
+endif
+
 ifneq ($(SOREV),$(SO))
 %.$(SO) : %.$(SOREV)
 	@mkdir -p $(@D)
 	ln -sf $(<F) $@
 endif
 
-$(objroot)lib/$(LIBJEMALLOC).$(SOREV) : $(if $(PIC_CFLAGS),$(C_PIC_OBJS),$(C_OBJS))
+$(objroot)lib/$(LIBJEMALLOC).$(SOREV) : $(if $(PIC_CFLAGS),$(C_PIC_OBJS),$(C_OBJS)) $(if $(PIC_CFLAGS),$(CPP_PIC_OBJS),$(CPP_OBJS))
 	@mkdir -p $(@D)
 	$(CC) $(DSO_LDFLAGS) $(call RPATH,$(RPATH_EXTRA)) $(LDTARGET) $+ $(LDFLAGS) $(LIBS) $(EXTRA_LDFLAGS)
 
-$(objroot)lib/$(LIBJEMALLOC)_pic.$(A) : $(C_PIC_OBJS)
-$(objroot)lib/$(LIBJEMALLOC).$(A) : $(C_OBJS)
-$(objroot)lib/$(LIBJEMALLOC)_s.$(A) : $(C_OBJS)
+$(objroot)lib/$(LIBJEMALLOC)_pic.$(A) : $(C_PIC_OBJS) $(CPP_PIC_OBJS)
+$(objroot)lib/$(LIBJEMALLOC).$(A) : $(C_OBJS) $(CPP_OBJS)
+$(objroot)lib/$(LIBJEMALLOC)_s.$(A) : $(C_OBJS) $(CPP_OBJS)
 
 $(STATIC_LIBS):
 	@mkdir -p $(@D)
@@ -307,7 +340,11 @@ $(objroot)test/unit/%$(EXE): $(objroot)test/unit/%.$(O) $(TESTS_UNIT_LINK_OBJS)
 
 $(objroot)test/integration/%$(EXE): $(objroot)test/integration/%.$(O) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
-	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LJEMALLOC) $(LDFLAGS) $(filter-out -lm,$(filter -lrt -lpthread,$(LIBS))) $(LM) $(EXTRA_LDFLAGS)
+	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LJEMALLOC) $(LDFLAGS) $(filter-out -lm,$(filter -lrt -lpthread -lstdc++,$(LIBS))) $(LM) $(EXTRA_LDFLAGS)
+
+$(objroot)test/integration/cpp/%$(EXE): $(objroot)test/integration/cpp/%.$(O) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
+	@mkdir -p $(@D)
+	$(CXX) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS)
 
 $(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_STRESS_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
@@ -373,7 +410,7 @@ install_doc: install_doc_html install_doc_man
 install: install_bin install_include install_lib install_doc
 
 tests_unit: $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%$(EXE))
-tests_integration: $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%$(EXE))
+tests_integration: $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%$(EXE)) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%$(EXE))
 tests_stress: $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%$(EXE))
 tests: tests_unit tests_integration tests_stress
 
@@ -389,14 +426,14 @@ check_unit: tests_unit check_unit_dir
 	$(SHELL) $(objroot)test/test.sh $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%)
 check_integration_prof: tests_integration check_integration_dir
 ifeq ($(enable_prof), 1)
-	$(MALLOC_CONF)="prof:true" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%)
-	$(MALLOC_CONF)="prof:true,prof_active:false" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%)
+	$(MALLOC_CONF)="prof:true" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%)
+	$(MALLOC_CONF)="prof:true,prof_active:false" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%)
 endif
 check_integration_decay: tests_integration check_integration_dir
-	$(MALLOC_CONF)="decay_time:-1" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%)
-	$(MALLOC_CONF)="decay_time:0" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%)
+	$(MALLOC_CONF)="decay_time:-1" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%)
+	$(MALLOC_CONF)="decay_time:0" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%)
 check_integration: tests_integration check_integration_dir
-	$(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%)
+	$(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%)
 stress: tests_stress stress_dir
 	$(SHELL) $(objroot)test/test.sh $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%)
 check: check_unit check_integration check_integration_decay check_integration_prof
@@ -412,6 +449,7 @@ coverage_integration: check_integration
 	$(SHELL) $(srcroot)coverage.sh $(srcroot)src integration $(C_UTIL_INTEGRATION_OBJS)
 	$(SHELL) $(srcroot)coverage.sh $(srcroot)test/src integration $(C_TESTLIB_INTEGRATION_OBJS)
 	$(SHELL) $(srcroot)coverage.sh $(srcroot)test/integration integration $(TESTS_INTEGRATION_OBJS)
+	$(SHELL) $(srcroot)coverage.sh $(srcroot)test/integration/cpp integration $(TESTS_INTEGRATION_CPP_OBJS)
 
 coverage_stress: stress
 	$(SHELL) $(srcroot)coverage.sh $(srcroot)src pic $(C_PIC_OBJS)
@@ -428,20 +466,29 @@ coverage: check
 	$(SHELL) $(srcroot)coverage.sh $(srcroot)test/src stress $(C_TESTLIB_STRESS_OBJS)
 	$(SHELL) $(srcroot)coverage.sh $(srcroot)test/unit unit $(TESTS_UNIT_OBJS) $(TESTS_UNIT_AUX_OBJS)
 	$(SHELL) $(srcroot)coverage.sh $(srcroot)test/integration integration $(TESTS_INTEGRATION_OBJS)
+	$(SHELL) $(srcroot)coverage.sh $(srcroot)test/integration/cpp integration $(TESTS_INTEGRATION_CPP_OBJS)
 	$(SHELL) $(srcroot)coverage.sh $(srcroot)test/stress integration $(TESTS_STRESS_OBJS)
 endif
 
 clean:
 	rm -f $(C_OBJS)
+	rm -f $(CPP_OBJS)
 	rm -f $(C_PIC_OBJS)
+	rm -f $(CPP_PIC_OBJS)
 	rm -f $(C_JET_OBJS)
 	rm -f $(C_TESTLIB_OBJS)
 	rm -f $(C_OBJS:%.$(O)=%.d)
 	rm -f $(C_OBJS:%.$(O)=%.gcda)
 	rm -f $(C_OBJS:%.$(O)=%.gcno)
+	rm -f $(CPP_OBJS:%.$(O)=%.d)
+	rm -f $(CPP_OBJS:%.$(O)=%.gcda)
+	rm -f $(CPP_OBJS:%.$(O)=%.gcno)
 	rm -f $(C_PIC_OBJS:%.$(O)=%.d)
 	rm -f $(C_PIC_OBJS:%.$(O)=%.gcda)
 	rm -f $(C_PIC_OBJS:%.$(O)=%.gcno)
+	rm -f $(CPP_PIC_OBJS:%.$(O)=%.d)
+	rm -f $(CPP_PIC_OBJS:%.$(O)=%.gcda)
+	rm -f $(CPP_PIC_OBJS:%.$(O)=%.gcno)
 	rm -f $(C_JET_OBJS:%.$(O)=%.d)
 	rm -f $(C_JET_OBJS:%.$(O)=%.gcda)
 	rm -f $(C_JET_OBJS:%.$(O)=%.gcno)
@@ -454,6 +501,12 @@ clean:
 	rm -f $(TESTS_OBJS:%.$(O)=%.gcda)
 	rm -f $(TESTS_OBJS:%.$(O)=%.gcno)
 	rm -f $(TESTS_OBJS:%.$(O)=%.out)
+	rm -f $(TESTS_CPP_OBJS:%.$(O)=%$(EXE))
+	rm -f $(TESTS_CPP_OBJS)
+	rm -f $(TESTS_CPP_OBJS:%.$(O)=%.d)
+	rm -f $(TESTS_CPP_OBJS:%.$(O)=%.gcda)
+	rm -f $(TESTS_CPP_OBJS:%.$(O)=%.gcno)
+	rm -f $(TESTS_CPP_OBJS:%.$(O)=%.out)
 	rm -f $(DSOS) $(STATIC_LIBS)
 	rm -f $(objroot)*.gcov.*
 
diff --git a/bin/jemalloc-config.in b/bin/jemalloc-config.in
index b016c8d3..80eca2e6 100644
--- a/bin/jemalloc-config.in
+++ b/bin/jemalloc-config.in
@@ -18,6 +18,7 @@ Options:
   --cc         : Print compiler used to build jemalloc.
   --cflags     : Print compiler flags used to build jemalloc.
   --cppflags   : Print preprocessor flags used to build jemalloc.
+  --cxxflags   : Print C++ compiler flags used to build jemalloc.
   --ldflags    : Print library flags used to build jemalloc.
   --libs       : Print libraries jemalloc was linked against.
 EOF
@@ -67,6 +68,9 @@ case "$1" in
 --cppflags)
 	echo "@CPPFLAGS@"
 	;;
+--cxxflags)
+	echo "@CXXFLAGS@"
+	;;
 --ldflags)
 	echo "@LDFLAGS@ @EXTRA_LDFLAGS@"
 	;;
diff --git a/configure.ac b/configure.ac
index ab2c0bee..f85b2693 100644
--- a/configure.ac
+++ b/configure.ac
@@ -29,6 +29,29 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
 )
 ])
 
+dnl JE_CXXFLAGS_APPEND(cflag)
+AC_DEFUN([JE_CXXFLAGS_APPEND],
+[
+AC_MSG_CHECKING([whether compiler supports $1])
+TCXXFLAGS="${CXXFLAGS}"
+if test "x${CXXFLAGS}" = "x" ; then
+  CXXFLAGS="$1"
+else
+  CXXFLAGS="${CXXFLAGS} $1"
+fi
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
+[[
+]], [[
+    return 0;
+]])],
+              [je_cv_cflags_appended=$1]
+              AC_MSG_RESULT([yes]),
+              [je_cv_cflags_appended=]
+              AC_MSG_RESULT([no])
+              [CXXFLAGS="${TCXXFLAGS}"]
+)
+])
+
 dnl JE_COMPILABLE(label, hcode, mcode, rvar)
 dnl 
 dnl Use AC_LINK_IFELSE() rather than AC_COMPILE_IFELSE() so that linker errors
@@ -211,6 +234,46 @@ fi
 AC_SUBST([EXTRA_CFLAGS])
 AC_PROG_CPP
 
+AC_ARG_ENABLE([cxx],
+  [AS_HELP_STRING([--disable-cxx], [Disable C++ integration])],
+if test "x$enable_cxx" = "xno" ; then
+  enable_cxx="0"
+else
+  enable_cxx="1"
+fi
+,
+enable_cxx="1"
+)
+if test "x$enable_cxx" = "x1" ; then
+  CXXFLAGS=""
+  dnl Require at least c++14, which is the first version to support sized
+  dnl deallocation.  C++ support is not compiled otherwise.
+  m4_include([m4/ax_cxx_compile_stdcxx.m4])
+  AX_CXX_COMPILE_STDCXX([14], [noext], [optional])
+  if test "x${HAVE_CXX14}" = "x1" ; then
+    JE_CXXFLAGS_APPEND([-Wall])
+    JE_CXXFLAGS_APPEND([-g3])
+
+    SAVED_LIBS="${LIBS}"
+    LIBS="${LIBS} -lstdc++"
+    JE_COMPILABLE([libstdc++ linkage], [
+#include <stdlib.h>
+], [[
+	int *arr = (int *)malloc(sizeof(int) * 42);
+	if (arr == NULL)
+		return (1);
+]], [je_cv_libstdcxx])
+    if test "x${je_cv_libstdcxx}" = "xno" ; then
+      LIBS="${SAVED_LIBS}"
+    fi
+  else
+    enable_cxx="0"
+  fi
+fi
+AC_SUBST([enable_cxx])
+AC_SUBST([CXXFLAGS])
+AC_SUBST([EXTRA_CXXFLAGS])
+
 AC_C_BIGENDIAN([ac_cv_big_endian=1], [ac_cv_big_endian=0])
 if test "x${ac_cv_big_endian}" = "x1" ; then
   AC_DEFINE_UNQUOTED([JEMALLOC_BIG_ENDIAN], [ ])
@@ -516,6 +579,7 @@ if test "x${je_cv_attribute}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_ATTR], [ ])
   if test "x${GCC}" = "xyes" -a "x${abi}" = "xelf"; then
     JE_CFLAGS_APPEND([-fvisibility=hidden])
+    JE_CXXFLAGS_APPEND([-fvisibility=hidden])
   fi
 fi
 dnl Check for tls_model attribute support (clang 3.0 still lacks support).
@@ -824,11 +888,14 @@ if test "x$enable_debug" = "x0" -a "x$no_CFLAGS" = "xyes" ; then
   if test "x${optimize}" = "xyes" ; then
     if test "x$GCC" = "xyes" ; then
       JE_CFLAGS_APPEND([-O3])
+      JE_CXXFLAGS_APPEND([-O3])
       JE_CFLAGS_APPEND([-funroll-loops])
     elif test "x$je_cv_msvc" = "xyes" ; then
       JE_CFLAGS_APPEND([-O2])
+      JE_CXXFLAGS_APPEND([-O2])
     else
       JE_CFLAGS_APPEND([-O])
+      JE_CXXFLAGS_APPEND([-O])
     fi
   fi
 fi
@@ -1943,8 +2010,12 @@ AC_MSG_RESULT([CC                 : ${CC}])
 AC_MSG_RESULT([CFLAGS             : ${CFLAGS}])
 AC_MSG_RESULT([EXTRA_CFLAGS       : ${EXTRA_CFLAGS}])
 AC_MSG_RESULT([CPPFLAGS           : ${CPPFLAGS}])
+AC_MSG_RESULT([CXX                : ${CXX}])
+AC_MSG_RESULT([CXXFLAGS           : ${CXXFLAGS}])
+AC_MSG_RESULT([EXTRA_CXXFLAGS     : ${EXTRA_CXXFLAGS}])
 AC_MSG_RESULT([LDFLAGS            : ${LDFLAGS}])
 AC_MSG_RESULT([EXTRA_LDFLAGS      : ${EXTRA_LDFLAGS}])
+AC_MSG_RESULT([DSO_LDFLAGS        : ${DSO_LDFLAGS}])
 AC_MSG_RESULT([LIBS               : ${LIBS}])
 AC_MSG_RESULT([RPATH_EXTRA        : ${RPATH_EXTRA}])
 AC_MSG_RESULT([])
@@ -1985,4 +2056,5 @@ AC_MSG_RESULT([munmap             : ${enable_munmap}])
 AC_MSG_RESULT([lazy_lock          : ${enable_lazy_lock}])
 AC_MSG_RESULT([tls                : ${enable_tls}])
 AC_MSG_RESULT([cache-oblivious    : ${enable_cache_oblivious}])
+AC_MSG_RESULT([cxx                : ${enable_cxx}])
 AC_MSG_RESULT([===============================================================================])
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 85b34012..ba5207d8 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -1,6 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_H
 #define	JEMALLOC_INTERNAL_H
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #include "jemalloc_internal_defs.h"
 #include "jemalloc/internal/jemalloc_internal_decls.h"
 
@@ -135,7 +139,7 @@ static const bool config_cache_oblivious =
 #endif
     ;
 
-#ifdef JEMALLOC_C11ATOMICS
+#if defined(JEMALLOC_C11ATOMICS) && !defined(__cplusplus)
 #include <stdatomic.h>
 #endif
 
@@ -888,7 +892,7 @@ arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing)
 
 	ret = arenas[ind];
 	if (unlikely(ret == NULL)) {
-		ret = atomic_read_p((void *)&arenas[ind]);
+		ret = (arena_t *)atomic_read_p((void **)&arenas[ind]);
 		if (init_if_missing && unlikely(ret == NULL))
 			ret = arena_init(tsdn, ind);
 	}
@@ -1194,4 +1198,9 @@ ixalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize, size_t size,
 
 #undef JEMALLOC_H_INLINES
 /******************************************************************************/
+
+#ifdef __cplusplus
+}
+#endif
+
 #endif /* JEMALLOC_INTERNAL_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h
index a08ba772..57492049 100644
--- a/include/jemalloc/internal/jemalloc_internal_macros.h
+++ b/include/jemalloc/internal/jemalloc_internal_macros.h
@@ -52,6 +52,6 @@
 #  define	__DECONST(type, var)	((type)(uintptr_t)(const void *)(var))
 #endif
 
-#ifndef JEMALLOC_HAS_RESTRICT
+#if !defined(JEMALLOC_HAS_RESTRICT) || defined(__cplusplus)
 #  define restrict
 #endif
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 9c6cc22f..b2a2800e 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -260,7 +260,7 @@ rtree_child_tryread(rtree_elm_t *elm, bool dependent)
 	/* Double-checked read (first read may be stale). */
 	child = elm->child;
 	if (!dependent && !rtree_node_valid(child))
-		child = atomic_read_p(&elm->pun);
+		child = (rtree_elm_t *)atomic_read_p(&elm->pun);
 	assert(!dependent || child != NULL);
 	return (child);
 }
@@ -320,8 +320,10 @@ rtree_subtree_tryread(rtree_t *rtree, unsigned level, bool dependent)
 
 	/* Double-checked read (first read may be stale). */
 	subtree = rtree->levels[level].subtree;
-	if (!dependent && unlikely(!rtree_node_valid(subtree)))
-		subtree = atomic_read_p(&rtree->levels[level].subtree_pun);
+	if (!dependent && unlikely(!rtree_node_valid(subtree))) {
+		subtree = (rtree_elm_t *)atomic_read_p(
+		    &rtree->levels[level].subtree_pun);
+	}
 	assert(!dependent || subtree != NULL);
 	return (subtree);
 }
diff --git a/m4/ax_cxx_compile_stdcxx.m4 b/m4/ax_cxx_compile_stdcxx.m4
new file mode 100644
index 00000000..2c18e49c
--- /dev/null
+++ b/m4/ax_cxx_compile_stdcxx.m4
@@ -0,0 +1,562 @@
+# ===========================================================================
+#   http://www.gnu.org/software/autoconf-archive/ax_cxx_compile_stdcxx.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+#   AX_CXX_COMPILE_STDCXX(VERSION, [ext|noext], [mandatory|optional])
+#
+# DESCRIPTION
+#
+#   Check for baseline language coverage in the compiler for the specified
+#   version of the C++ standard.  If necessary, add switches to CXX and
+#   CXXCPP to enable support.  VERSION may be '11' (for the C++11 standard)
+#   or '14' (for the C++14 standard).
+#
+#   The second argument, if specified, indicates whether you insist on an
+#   extended mode (e.g. -std=gnu++11) or a strict conformance mode (e.g.
+#   -std=c++11).  If neither is specified, you get whatever works, with
+#   preference for an extended mode.
+#
+#   The third argument, if specified 'mandatory' or if left unspecified,
+#   indicates that baseline support for the specified C++ standard is
+#   required and that the macro should error out if no mode with that
+#   support is found.  If specified 'optional', then configuration proceeds
+#   regardless, after defining HAVE_CXX${VERSION} if and only if a
+#   supporting mode is found.
+#
+# LICENSE
+#
+#   Copyright (c) 2008 Benjamin Kosnik <bkoz@redhat.com>
+#   Copyright (c) 2012 Zack Weinberg <zackw@panix.com>
+#   Copyright (c) 2013 Roy Stogner <roystgnr@ices.utexas.edu>
+#   Copyright (c) 2014, 2015 Google Inc.; contributed by Alexey Sokolov <sokolov@google.com>
+#   Copyright (c) 2015 Paul Norman <penorman@mac.com>
+#   Copyright (c) 2015 Moritz Klammler <moritz@klammler.eu>
+#
+#   Copying and distribution of this file, with or without modification, are
+#   permitted in any medium without royalty provided the copyright notice
+#   and this notice are preserved.  This file is offered as-is, without any
+#   warranty.
+
+#serial 4
+
+dnl  This macro is based on the code from the AX_CXX_COMPILE_STDCXX_11 macro
+dnl  (serial version number 13).
+
+AC_DEFUN([AX_CXX_COMPILE_STDCXX], [dnl
+  m4_if([$1], [11], [],
+        [$1], [14], [],
+        [$1], [17], [m4_fatal([support for C++17 not yet implemented in AX_CXX_COMPILE_STDCXX])],
+        [m4_fatal([invalid first argument `$1' to AX_CXX_COMPILE_STDCXX])])dnl
+  m4_if([$2], [], [],
+        [$2], [ext], [],
+        [$2], [noext], [],
+        [m4_fatal([invalid second argument `$2' to AX_CXX_COMPILE_STDCXX])])dnl
+  m4_if([$3], [], [ax_cxx_compile_cxx$1_required=true],
+        [$3], [mandatory], [ax_cxx_compile_cxx$1_required=true],
+        [$3], [optional], [ax_cxx_compile_cxx$1_required=false],
+        [m4_fatal([invalid third argument `$3' to AX_CXX_COMPILE_STDCXX])])
+  AC_LANG_PUSH([C++])dnl
+  ac_success=no
+  AC_CACHE_CHECK(whether $CXX supports C++$1 features by default,
+  ax_cv_cxx_compile_cxx$1,
+  [AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])],
+    [ax_cv_cxx_compile_cxx$1=yes],
+    [ax_cv_cxx_compile_cxx$1=no])])
+  if test x$ax_cv_cxx_compile_cxx$1 = xyes; then
+    ac_success=yes
+  fi
+
+  m4_if([$2], [noext], [], [dnl
+  if test x$ac_success = xno; then
+    for switch in -std=gnu++$1 -std=gnu++0x; do
+      cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_$switch])
+      AC_CACHE_CHECK(whether $CXX supports C++$1 features with $switch,
+                     $cachevar,
+        [ac_save_CXX="$CXX"
+         CXX="$CXX $switch"
+         AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])],
+          [eval $cachevar=yes],
+          [eval $cachevar=no])
+         CXX="$ac_save_CXX"])
+      if eval test x\$$cachevar = xyes; then
+        CXX="$CXX $switch"
+        if test -n "$CXXCPP" ; then
+          CXXCPP="$CXXCPP $switch"
+        fi
+        ac_success=yes
+        break
+      fi
+    done
+  fi])
+
+  m4_if([$2], [ext], [], [dnl
+  if test x$ac_success = xno; then
+    dnl HP's aCC needs +std=c++11 according to:
+    dnl http://h21007.www2.hp.com/portal/download/files/unprot/aCxx/PDF_Release_Notes/769149-001.pdf
+    dnl Cray's crayCC needs "-h std=c++11"
+    for switch in -std=c++$1 -std=c++0x +std=c++$1 "-h std=c++$1"; do
+      cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_$switch])
+      AC_CACHE_CHECK(whether $CXX supports C++$1 features with $switch,
+                     $cachevar,
+        [ac_save_CXX="$CXX"
+         CXX="$CXX $switch"
+         AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])],
+          [eval $cachevar=yes],
+          [eval $cachevar=no])
+         CXX="$ac_save_CXX"])
+      if eval test x\$$cachevar = xyes; then
+        CXX="$CXX $switch"
+        if test -n "$CXXCPP" ; then
+          CXXCPP="$CXXCPP $switch"
+        fi
+        ac_success=yes
+        break
+      fi
+    done
+  fi])
+  AC_LANG_POP([C++])
+  if test x$ax_cxx_compile_cxx$1_required = xtrue; then
+    if test x$ac_success = xno; then
+      AC_MSG_ERROR([*** A compiler with support for C++$1 language features is required.])
+    fi
+  fi
+  if test x$ac_success = xno; then
+    HAVE_CXX$1=0
+    AC_MSG_NOTICE([No compiler with C++$1 support was found])
+  else
+    HAVE_CXX$1=1
+    AC_DEFINE(HAVE_CXX$1,1,
+              [define if the compiler supports basic C++$1 syntax])
+  fi
+  AC_SUBST(HAVE_CXX$1)
+])
+
+
+dnl  Test body for checking C++11 support
+
+m4_define([_AX_CXX_COMPILE_STDCXX_testbody_11],
+  _AX_CXX_COMPILE_STDCXX_testbody_new_in_11
+)
+
+
+dnl  Test body for checking C++14 support
+
+m4_define([_AX_CXX_COMPILE_STDCXX_testbody_14],
+  _AX_CXX_COMPILE_STDCXX_testbody_new_in_11
+  _AX_CXX_COMPILE_STDCXX_testbody_new_in_14
+)
+
+
+dnl  Tests for new features in C++11
+
+m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_11], [[
+
+// If the compiler admits that it is not ready for C++11, why torture it?
+// Hopefully, this will speed up the test.
+
+#ifndef __cplusplus
+
+#error "This is not a C++ compiler"
+
+#elif __cplusplus < 201103L
+
+#error "This is not a C++11 compiler"
+
+#else
+
+namespace cxx11
+{
+
+  namespace test_static_assert
+  {
+
+    template <typename T>
+    struct check
+    {
+      static_assert(sizeof(int) <= sizeof(T), "not big enough");
+    };
+
+  }
+
+  namespace test_final_override
+  {
+
+    struct Base
+    {
+      virtual void f() {}
+    };
+
+    struct Derived : public Base
+    {
+      virtual void f() override {}
+    };
+
+  }
+
+  namespace test_double_right_angle_brackets
+  {
+
+    template < typename T >
+    struct check {};
+
+    typedef check<void> single_type;
+    typedef check<check<void>> double_type;
+    typedef check<check<check<void>>> triple_type;
+    typedef check<check<check<check<void>>>> quadruple_type;
+
+  }
+
+  namespace test_decltype
+  {
+
+    int
+    f()
+    {
+      int a = 1;
+      decltype(a) b = 2;
+      return a + b;
+    }
+
+  }
+
+  namespace test_type_deduction
+  {
+
+    template < typename T1, typename T2 >
+    struct is_same
+    {
+      static const bool value = false;
+    };
+
+    template < typename T >
+    struct is_same<T, T>
+    {
+      static const bool value = true;
+    };
+
+    template < typename T1, typename T2 >
+    auto
+    add(T1 a1, T2 a2) -> decltype(a1 + a2)
+    {
+      return a1 + a2;
+    }
+
+    int
+    test(const int c, volatile int v)
+    {
+      static_assert(is_same<int, decltype(0)>::value == true, "");
+      static_assert(is_same<int, decltype(c)>::value == false, "");
+      static_assert(is_same<int, decltype(v)>::value == false, "");
+      auto ac = c;
+      auto av = v;
+      auto sumi = ac + av + 'x';
+      auto sumf = ac + av + 1.0;
+      static_assert(is_same<int, decltype(ac)>::value == true, "");
+      static_assert(is_same<int, decltype(av)>::value == true, "");
+      static_assert(is_same<int, decltype(sumi)>::value == true, "");
+      static_assert(is_same<int, decltype(sumf)>::value == false, "");
+      static_assert(is_same<int, decltype(add(c, v))>::value == true, "");
+      return (sumf > 0.0) ? sumi : add(c, v);
+    }
+
+  }
+
+  namespace test_noexcept
+  {
+
+    int f() { return 0; }
+    int g() noexcept { return 0; }
+
+    static_assert(noexcept(f()) == false, "");
+    static_assert(noexcept(g()) == true, "");
+
+  }
+
+  namespace test_constexpr
+  {
+
+    template < typename CharT >
+    unsigned long constexpr
+    strlen_c_r(const CharT *const s, const unsigned long acc) noexcept
+    {
+      return *s ? strlen_c_r(s + 1, acc + 1) : acc;
+    }
+
+    template < typename CharT >
+    unsigned long constexpr
+    strlen_c(const CharT *const s) noexcept
+    {
+      return strlen_c_r(s, 0UL);
+    }
+
+    static_assert(strlen_c("") == 0UL, "");
+    static_assert(strlen_c("1") == 1UL, "");
+    static_assert(strlen_c("example") == 7UL, "");
+    static_assert(strlen_c("another\0example") == 7UL, "");
+
+  }
+
+  namespace test_rvalue_references
+  {
+
+    template < int N >
+    struct answer
+    {
+      static constexpr int value = N;
+    };
+
+    answer<1> f(int&)       { return answer<1>(); }
+    answer<2> f(const int&) { return answer<2>(); }
+    answer<3> f(int&&)      { return answer<3>(); }
+
+    void
+    test()
+    {
+      int i = 0;
+      const int c = 0;
+      static_assert(decltype(f(i))::value == 1, "");
+      static_assert(decltype(f(c))::value == 2, "");
+      static_assert(decltype(f(0))::value == 3, "");
+    }
+
+  }
+
+  namespace test_uniform_initialization
+  {
+
+    struct test
+    {
+      static const int zero {};
+      static const int one {1};
+    };
+
+    static_assert(test::zero == 0, "");
+    static_assert(test::one == 1, "");
+
+  }
+
+  namespace test_lambdas
+  {
+
+    void
+    test1()
+    {
+      auto lambda1 = [](){};
+      auto lambda2 = lambda1;
+      lambda1();
+      lambda2();
+    }
+
+    int
+    test2()
+    {
+      auto a = [](int i, int j){ return i + j; }(1, 2);
+      auto b = []() -> int { return '0'; }();
+      auto c = [=](){ return a + b; }();
+      auto d = [&](){ return c; }();
+      auto e = [a, &b](int x) mutable {
+        const auto identity = [](int y){ return y; };
+        for (auto i = 0; i < a; ++i)
+          a += b--;
+        return x + identity(a + b);
+      }(0);
+      return a + b + c + d + e;
+    }
+
+    int
+    test3()
+    {
+      const auto nullary = [](){ return 0; };
+      const auto unary = [](int x){ return x; };
+      using nullary_t = decltype(nullary);
+      using unary_t = decltype(unary);
+      const auto higher1st = [](nullary_t f){ return f(); };
+      const auto higher2nd = [unary](nullary_t f1){
+        return [unary, f1](unary_t f2){ return f2(unary(f1())); };
+      };
+      return higher1st(nullary) + higher2nd(nullary)(unary);
+    }
+
+  }
+
+  namespace test_variadic_templates
+  {
+
+    template <int...>
+    struct sum;
+
+    template <int N0, int... N1toN>
+    struct sum<N0, N1toN...>
+    {
+      static constexpr auto value = N0 + sum<N1toN...>::value;
+    };
+
+    template <>
+    struct sum<>
+    {
+      static constexpr auto value = 0;
+    };
+
+    static_assert(sum<>::value == 0, "");
+    static_assert(sum<1>::value == 1, "");
+    static_assert(sum<23>::value == 23, "");
+    static_assert(sum<1, 2>::value == 3, "");
+    static_assert(sum<5, 5, 11>::value == 21, "");
+    static_assert(sum<2, 3, 5, 7, 11, 13>::value == 41, "");
+
+  }
+
+  // http://stackoverflow.com/questions/13728184/template-aliases-and-sfinae
+  // Clang 3.1 fails with headers of libstd++ 4.8.3 when using std::function
+  // because of this.
+  namespace test_template_alias_sfinae
+  {
+
+    struct foo {};
+
+    template<typename T>
+    using member = typename T::member_type;
+
+    template<typename T>
+    void func(...) {}
+
+    template<typename T>
+    void func(member<T>*) {}
+
+    void test();
+
+    void test() { func<foo>(0); }
+
+  }
+
+}  // namespace cxx11
+
+#endif  // __cplusplus >= 201103L
+
+]])
+
+
+dnl  Tests for new features in C++14
+
+m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_14], [[
+
+// If the compiler admits that it is not ready for C++14, why torture it?
+// Hopefully, this will speed up the test.
+
+#ifndef __cplusplus
+
+#error "This is not a C++ compiler"
+
+#elif __cplusplus < 201402L
+
+#error "This is not a C++14 compiler"
+
+#else
+
+namespace cxx14
+{
+
+  namespace test_polymorphic_lambdas
+  {
+
+    int
+    test()
+    {
+      const auto lambda = [](auto&&... args){
+        const auto istiny = [](auto x){
+          return (sizeof(x) == 1UL) ? 1 : 0;
+        };
+        const int aretiny[] = { istiny(args)... };
+        return aretiny[0];
+      };
+      return lambda(1, 1L, 1.0f, '1');
+    }
+
+  }
+
+  namespace test_binary_literals
+  {
+
+    constexpr auto ivii = 0b0000000000101010;
+    static_assert(ivii == 42, "wrong value");
+
+  }
+
+  namespace test_generalized_constexpr
+  {
+
+    template < typename CharT >
+    constexpr unsigned long
+    strlen_c(const CharT *const s) noexcept
+    {
+      auto length = 0UL;
+      for (auto p = s; *p; ++p)
+        ++length;
+      return length;
+    }
+
+    static_assert(strlen_c("") == 0UL, "");
+    static_assert(strlen_c("x") == 1UL, "");
+    static_assert(strlen_c("test") == 4UL, "");
+    static_assert(strlen_c("another\0test") == 7UL, "");
+
+  }
+
+  namespace test_lambda_init_capture
+  {
+
+    int
+    test()
+    {
+      auto x = 0;
+      const auto lambda1 = [a = x](int b){ return a + b; };
+      const auto lambda2 = [a = lambda1(x)](){ return a; };
+      return lambda2();
+    }
+
+  }
+
+  namespace test_digit_seperators
+  {
+
+    constexpr auto ten_million = 100'000'000;
+    static_assert(ten_million == 100000000, "");
+
+  }
+
+  namespace test_return_type_deduction
+  {
+
+    auto f(int& x) { return x; }
+    decltype(auto) g(int& x) { return x; }
+
+    template < typename T1, typename T2 >
+    struct is_same
+    {
+      static constexpr auto value = false;
+    };
+
+    template < typename T >
+    struct is_same<T, T>
+    {
+      static constexpr auto value = true;
+    };
+
+    int
+    test()
+    {
+      auto x = 0;
+      static_assert(is_same<int, decltype(f(x))>::value, "");
+      static_assert(is_same<int&, decltype(g(x))>::value, "");
+      return x;
+    }
+
+  }
+
+}  // namespace cxx14
+
+#endif  // __cplusplus >= 201402L
+
+]])
diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
new file mode 100644
index 00000000..4c5756b3
--- /dev/null
+++ b/src/jemalloc_cpp.cpp
@@ -0,0 +1,140 @@
+#include <mutex>
+#include <new>
+
+#define	JEMALLOC_CPP_CPP_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+// All operators in this file are exported.
+
+// Possibly alias hidden versions of malloc and sdallocx to avoid an extra plt
+// thunk?
+//
+// extern __typeof (sdallocx) sdallocx_int
+//  __attribute ((alias ("sdallocx"),
+//		visibility ("hidden")));
+//
+// ... but it needs to work with jemalloc namespaces.
+
+void	*operator new(std::size_t size);
+void	*operator new[](std::size_t size);
+void	*operator new(std::size_t size, const std::nothrow_t&) noexcept;
+void	*operator new[](std::size_t size, const std::nothrow_t&) noexcept;
+void	operator delete(void* ptr) noexcept;
+void	operator delete[](void* ptr) noexcept;
+void	operator delete(void* ptr, const std::nothrow_t&) noexcept;
+void	operator delete[](void* ptr, const std::nothrow_t&) noexcept;
+
+#if __cpp_sized_deallocation >= 201309
+/* C++14's sized-delete operators. */
+void	operator delete(void* ptr, std::size_t size) noexcept;
+void	operator delete[](void* ptr, std::size_t size) noexcept;
+#endif
+
+
+template <bool IsNoExcept>
+JEMALLOC_INLINE
+void *
+newImpl(std::size_t size) noexcept(IsNoExcept)
+{
+	void* ptr = je_malloc(size);
+	if (likely(ptr != nullptr))
+		return (ptr);
+
+	while (ptr == nullptr) {
+		std::new_handler handler;
+		// GCC-4.8 and clang 4.0 do not have std::get_new_handler.
+		{
+			static std::mutex mtx;
+			std::lock_guard<std::mutex> lock(mtx);
+
+			handler = std::set_new_handler(nullptr);
+			std::set_new_handler(handler);
+		}
+		if (handler == nullptr)
+			break;
+
+		try {
+			handler();
+		} catch (const std::bad_alloc&) {
+			break;
+		}
+
+		ptr = je_malloc(size);
+	}
+
+	if (ptr == nullptr && !IsNoExcept)
+		std::__throw_bad_alloc();
+	return (ptr);
+}
+
+void *
+operator new(std::size_t size)
+{
+
+	return (newImpl<false>(size));
+}
+
+void *
+operator new[](std::size_t size)
+{
+
+	return (newImpl<false>(size));
+}
+
+void *
+operator new(std::size_t size, const std::nothrow_t&) noexcept
+{
+
+	return (newImpl<true>(size));
+}
+
+void *
+operator new[](std::size_t size, const std::nothrow_t&) noexcept
+{
+
+  return (newImpl<true>(size));
+}
+
+void
+operator delete(void* ptr) noexcept
+{
+
+	je_free(ptr);
+}
+
+void
+operator delete[](void* ptr) noexcept
+{
+
+	je_free(ptr);
+}
+
+void
+operator delete(void* ptr, const std::nothrow_t&) noexcept
+{
+
+	je_free(ptr);
+}
+
+void operator delete[](void* ptr, const std::nothrow_t&) noexcept
+{
+
+	je_free(ptr);
+}
+
+#if __cpp_sized_deallocation >= 201309
+
+void
+operator delete(void* ptr, std::size_t size) noexcept
+{
+
+	je_sdallocx(ptr, size, /*flags=*/0);
+}
+
+void operator delete[](void* ptr, std::size_t size) noexcept
+{
+
+	je_sdallocx(ptr, size, /*flags=*/0);
+}
+
+#endif  // __cpp_sized_deallocation
diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in
index 1f36e469..66485c0e 100644
--- a/test/include/test/jemalloc_test.h.in
+++ b/test/include/test/jemalloc_test.h.in
@@ -1,3 +1,7 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #include <limits.h>
 #ifndef SIZE_T_MAX
 #  define SIZE_T_MAX	SIZE_MAX
@@ -47,7 +51,8 @@
  * expose the minimum necessary internal utility code (to avoid re-implementing
  * essentially identical code within the test infrastructure).
  */
-#elif defined(JEMALLOC_INTEGRATION_TEST)
+#elif defined(JEMALLOC_INTEGRATION_TEST) || \
+    defined(JEMALLOC_INTEGRATION_CPP_TEST)
 #  define JEMALLOC_MANGLE
 #  include "jemalloc/jemalloc@install_suffix@.h"
 #  include "jemalloc/internal/jemalloc_internal_defs.h"
@@ -161,3 +166,7 @@ static const bool config_debug =
 	if (!(e))							\
 		not_implemented();					\
 } while (0)
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/test/include/test/test.h b/test/include/test/test.h
index c8112eb8..8c69fc2e 100644
--- a/test/include/test/test.h
+++ b/test/include/test/test.h
@@ -8,8 +8,8 @@
 		char message[ASSERT_BUFSIZE];				\
 		malloc_snprintf(prefix, sizeof(prefix),			\
 		    "%s:%s:%d: Failed assertion: "			\
-		    "(%s) "#cmp" (%s) --> "				\
-		    "%"pri" "#neg_cmp" %"pri": ",			\
+		    "(%s) " #cmp " (%s) --> "				\
+		    "%" pri " " #neg_cmp " %" pri ": ",			\
 		    __func__, __FILE__, __LINE__,			\
 		    #a, #b, a_, b_);					\
 		malloc_snprintf(message, sizeof(message), __VA_ARGS__);	\
diff --git a/test/integration/cpp/basic.cpp b/test/integration/cpp/basic.cpp
new file mode 100644
index 00000000..eeb93c47
--- /dev/null
+++ b/test/integration/cpp/basic.cpp
@@ -0,0 +1,18 @@
+#include <memory>
+#include "test/jemalloc_test.h"
+
+TEST_BEGIN(test_basic)
+{
+	auto foo = new long(4);
+	assert_ptr_not_null(foo, "Unexpected new[] failure");
+	delete foo;
+}
+TEST_END
+
+int
+main()
+{
+
+	return (test(
+	    test_basic));
+}

From 69c26cdb01d9d42c3317ca0bdf8632e351d805e4 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 13 Dec 2016 13:38:11 -0800
Subject: [PATCH 0559/2608] Add some missing explicit casts.

---
 include/jemalloc/internal/tsd.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index b33de703..c4f010ae 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -482,13 +482,14 @@ a_name##tsd_wrapper_get(bool init)					\
 									\
 	if (init && unlikely(wrapper == NULL)) {			\
 		tsd_init_block_t block;					\
-		wrapper = tsd_init_check_recursion(			\
-		    &a_name##tsd_init_head, &block);			\
+		wrapper = (a_name##tsd_wrapper_t *)			\
+		    tsd_init_check_recursion(&a_name##tsd_init_head,	\
+		    &block);						\
 		if (wrapper)						\
 		    return (wrapper);					\
 		wrapper = (a_name##tsd_wrapper_t *)			\
 		    malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t));	\
-		block.data = wrapper;					\
+		block.data = (void *)wrapper;				\
 		if (wrapper == NULL) {					\
 			malloc_write("<jemalloc>: Error allocating"	\
 			    " TSD for "#a_name"\n");			\

From 590ee2a6e0fcb3de63809473f0da8c9aff627e1e Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 13 Dec 2016 14:53:10 -0800
Subject: [PATCH 0560/2608] Update Travis-CI config for C++ integration.

---
 .travis.yml | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 1fed4f8e..bda54773 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,27 +1,24 @@
-language: c
+language: generic
 
 matrix:
   include:
     - os: linux
-      compiler: gcc
+      env: CC=gcc CXX=g++
+      env: CC=clang CXX=clang++
     - os: linux
-      compiler: gcc
-      env:
-        - EXTRA_FLAGS=-m32
+      env: CC=gcc CXX=g++ EXTRA_FLAGS=-m32
+      env: CC=clang CXX=clang++ EXTRA_FLAGS=-m32
       addons:
         apt:
           packages:
           - gcc-multilib
     - os: osx
-      compiler: clang
-    - os: osx
-      compiler: clang
-      env:
-        - EXTRA_FLAGS=-m32
+      env: CC=clang CXX=clang++
+      env: CC=clang CXX=clang++ EXTRA_FLAGS=-m32
 
 before_script:
   - autoconf
-  - ./configure${EXTRA_FLAGS:+ CC="$CC $EXTRA_FLAGS"}
+  - ./configure${EXTRA_FLAGS:+ CC="$CC $EXTRA_FLAGS" CXX="$CXX $EXTRA_FLAGS"}
   - make -j3
   - make -j3 tests
 

From a965a9cb12ac9d01cb0ff114e486c63585b72999 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 13 Dec 2016 16:19:20 -0800
Subject: [PATCH 0561/2608] Re-expand the Travis-CI build matrix.

---
 .travis.yml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index bda54773..97641eca 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,9 +4,15 @@ matrix:
   include:
     - os: linux
       env: CC=gcc CXX=g++
+    - os: linux
       env: CC=clang CXX=clang++
     - os: linux
       env: CC=gcc CXX=g++ EXTRA_FLAGS=-m32
+      addons:
+        apt:
+          packages:
+          - gcc-multilib
+    - os: linux
       env: CC=clang CXX=clang++ EXTRA_FLAGS=-m32
       addons:
         apt:
@@ -14,6 +20,7 @@ matrix:
           - gcc-multilib
     - os: osx
       env: CC=clang CXX=clang++
+    - os: osx
       env: CC=clang CXX=clang++ EXTRA_FLAGS=-m32
 
 before_script:

From 194d6f9de8ff92841b67f38a2a6a06818e3240dd Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 16 Dec 2016 07:18:55 -0800
Subject: [PATCH 0562/2608] Restructure *CFLAGS/*CXXFLAGS configuration.

Convert CFLAGS/CXXFLAGS to be concatenations:

  CFLAGS := CONFIGURE_CFLAGS SPECIFIED_CFLAGS EXTRA_CFLAGS
  CXXFLAGS := CONFIGURE_CXXFLAGS SPECIFIED_CXXFLAGS EXTRA_CXXFLAGS

This ordering makes it possible to override the flags set by the
configure script both during and after configuration, with
CFLAGS/CXXFLAGS and EXTRA_CFLAGS/EXTRA_CXXFLAGS, respectively.

This resolves #504.
---
 INSTALL      |  19 ++--
 Makefile.in  |   8 +-
 configure.ac | 274 ++++++++++++++++++++++++++++-----------------------
 3 files changed, 168 insertions(+), 133 deletions(-)

diff --git a/INSTALL b/INSTALL
index c7ca5a73..6e593219 100644
--- a/INSTALL
+++ b/INSTALL
@@ -307,17 +307,18 @@ The following environment variables (not a definitive list) impact configure's
 behavior:
 
 CFLAGS="?"
-    Pass these flags to the compiler.  You probably shouldn't define this unless
-    you know what you are doing.  (Use EXTRA_CFLAGS instead.)
+CXXFLAGS="?"
+    Pass these flags to the C/C++ compiler.  Any flags set by the configure
+    script are prepended, which means explicitly set flags generally take
+    precedence.  Take care when specifying flags such as -Werror, because
+    configure tests may be affected in undesirable ways.
 
 EXTRA_CFLAGS="?"
-    Append these flags to CFLAGS.  This makes it possible to add flags such as
-    -Werror, while allowing the configure script to determine what other flags
-    are appropriate for the specified configuration.
-
-    The configure script specifically checks whether an optimization flag (-O*)
-    is specified in EXTRA_CFLAGS, and refrains from specifying an optimization
-    level if it finds that one has already been specified.
+EXTRA_CXXFLAGS="?"
+    Append these flags to CFLAGS/CXXFLAGS, without passing them to the
+    compiler(s) during configuration.  This makes it possible to add flags such
+    as -Werror, while allowing the configure script to determine what other
+    flags are appropriate for the specified configuration.
 
 CPPFLAGS="?"
     Pass these flags to the C preprocessor.  Note that CFLAGS is not passed to
diff --git a/Makefile.in b/Makefile.in
index 22d9d86f..7c1989b8 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -25,10 +25,14 @@ abs_objroot := @abs_objroot@
 
 # Build parameters.
 CPPFLAGS := @CPPFLAGS@ -I$(srcroot)include -I$(objroot)include
+CONFIGURE_CFLAGS := @CONFIGURE_CFLAGS@
+SPECIFIED_CFLAGS := @SPECIFIED_CFLAGS@
 EXTRA_CFLAGS := @EXTRA_CFLAGS@
-CFLAGS := @CFLAGS@ $(EXTRA_CFLAGS)
+CFLAGS := $(strip $(CONFIGURE_CFLAGS) $(SPECIFIED_CFLAGS) $(EXTRA_CFLAGS))
+CONFIGURE_CXXFLAGS := @CONFIGURE_CXXFLAGS@
+SPECIFIED_CXXFLAGS := @SPECIFIED_CXXFLAGS@
 EXTRA_CXXFLAGS := @EXTRA_CXXFLAGS@
-CXXFLAGS := @CXXFLAGS@ $(EXTRA_CXXFLAGS)
+CXXFLAGS := $(strip $(CONFIGURE_CXXFLAGS) $(SPECIFIED_CXXFLAGS) $(EXTRA_CXXFLAGS))
 LDFLAGS := @LDFLAGS@
 EXTRA_LDFLAGS := @EXTRA_LDFLAGS@
 LIBS := @LIBS@
diff --git a/configure.ac b/configure.ac
index f85b2693..bf4ea8f7 100644
--- a/configure.ac
+++ b/configure.ac
@@ -6,50 +6,87 @@ AC_CONFIG_AUX_DIR([build-aux])
 dnl ============================================================================
 dnl Custom macro definitions.
 
-dnl JE_CFLAGS_APPEND(cflag)
-AC_DEFUN([JE_CFLAGS_APPEND],
+dnl JE_CONCAT_VVV(r, a, b)
+dnl 
+dnl Set $r to the concatenation of $a and $b, with a space separating them iff
+dnl both $a and $b are non-emty.
+AC_DEFUN([JE_CONCAT_VVV],
+if test "x[$]{$2}" = "x" -o "x[$]{$3}" = "x" ; then
+  $1="[$]{$2}[$]{$3}"
+else
+  $1="[$]{$2} [$]{$3}"
+fi
+)
+
+dnl JE_APPEND_VS(a, b)
+dnl 
+dnl Set $a to the concatenation of $a and b, with a space separating them iff
+dnl both $a and b are non-empty.
+AC_DEFUN([JE_APPEND_VS],
+  T_APPEND_V=$2
+  JE_CONCAT_VVV($1, $1, T_APPEND_V)
+)
+
+CONFIGURE_CFLAGS=
+SPECIFIED_CFLAGS="${CFLAGS}"
+dnl JE_CFLAGS_ADD(cflag)
+dnl 
+dnl CFLAGS is the concatenation of CONFIGURE_CFLAGS and SPECIFIED_CFLAGS
+dnl (ignoring EXTRA_CFLAGS, which does not impact configure tests.  This macro
+dnl appends to CONFIGURE_CFLAGS and regenerates CFLAGS.
+AC_DEFUN([JE_CFLAGS_ADD],
 [
 AC_MSG_CHECKING([whether compiler supports $1])
-TCFLAGS="${CFLAGS}"
-if test "x${CFLAGS}" = "x" ; then
-  CFLAGS="$1"
-else
-  CFLAGS="${CFLAGS} $1"
-fi
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+JE_APPEND_VS(CONFIGURE_CFLAGS, $1)
+JE_CONCAT_VVV(CFLAGS, CONFIGURE_CFLAGS, SPECIFIED_CFLAGS)
 AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
 [[
 ]], [[
     return 0;
 ]])],
-              [je_cv_cflags_appended=$1]
+              [je_cv_cflags_added=$1]
               AC_MSG_RESULT([yes]),
-              [je_cv_cflags_appended=]
+              [je_cv_cflags_added=]
               AC_MSG_RESULT([no])
-              [CFLAGS="${TCFLAGS}"]
+              [CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"]
 )
+JE_CONCAT_VVV(CFLAGS, CONFIGURE_CFLAGS, SPECIFIED_CFLAGS)
 ])
 
-dnl JE_CXXFLAGS_APPEND(cflag)
-AC_DEFUN([JE_CXXFLAGS_APPEND],
+dnl JE_CFLAGS_SAVE()
+dnl JE_CFLAGS_RESTORE()
+dnl 
+dnl Save/restore CFLAGS.  Nesting is not supported.
+AC_DEFUN([JE_CFLAGS_SAVE],
+SAVED_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+)
+AC_DEFUN([JE_CFLAGS_RESTORE],
+CONFIGURE_CFLAGS="${SAVED_CONFIGURE_CFLAGS}"
+JE_CONCAT_VVV(CFLAGS, CONFIGURE_CFLAGS, SPECIFIED_CFLAGS)
+)
+
+CONFIGURE_CXXFLAGS=
+SPECIFIED_CXXFLAGS="${CXXFLAGS}"
+dnl JE_CXXFLAGS_ADD(cxxflag)
+AC_DEFUN([JE_CXXFLAGS_ADD],
 [
 AC_MSG_CHECKING([whether compiler supports $1])
-TCXXFLAGS="${CXXFLAGS}"
-if test "x${CXXFLAGS}" = "x" ; then
-  CXXFLAGS="$1"
-else
-  CXXFLAGS="${CXXFLAGS} $1"
-fi
+T_CONFIGURE_CXXFLAGS="${CONFIGURE_CXXFLAGS}"
+JE_APPEND_VS(CONFIGURE_CXXFLAGS, $1)
+JE_CONCAT_VVV(CXXFLAGS, CONFIGURE_CXXFLAGS, SPECIFIED_CXXFLAGS)
 AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
 [[
 ]], [[
     return 0;
 ]])],
-              [je_cv_cflags_appended=$1]
+              [je_cv_cxxflags_added=$1]
               AC_MSG_RESULT([yes]),
-              [je_cv_cflags_appended=]
+              [je_cv_cxxflags_added=]
               AC_MSG_RESULT([no])
-              [CXXFLAGS="${TCXXFLAGS}"]
+              [CONFIGURE_CXXFLAGS="${T_CONFIGURE_CXXFLAGS}"]
 )
+JE_CONCAT_VVV(CXXFLAGS, CONFIGURE_CXXFLAGS, SPECIFIED_CXXFLAGS)
 ])
 
 dnl JE_COMPILABLE(label, hcode, mcode, rvar)
@@ -191,46 +228,45 @@ if test "x${je_cv_cray}" = "xyes" ; then
                               [je_cv_cray_84=no])])
 fi
 
-if test "x$CFLAGS" = "x" ; then
-  no_CFLAGS="yes"
-  if test "x$GCC" = "xyes" ; then
-    JE_CFLAGS_APPEND([-std=gnu11])
-    if test "x$je_cv_cflags_appended" = "x-std=gnu11" ; then
+if test "x$GCC" = "xyes" ; then
+  JE_CFLAGS_ADD([-std=gnu11])
+  if test "x$je_cv_cflags_added" = "x-std=gnu11" ; then
+    AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT])
+  else
+    JE_CFLAGS_ADD([-std=gnu99])
+    if test "x$je_cv_cflags_added" = "x-std=gnu99" ; then
       AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT])
-    else
-      JE_CFLAGS_APPEND([-std=gnu99])
-      if test "x$je_cv_cflags_appended" = "x-std=gnu99" ; then
-        AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT])
-      fi
     fi
-    JE_CFLAGS_APPEND([-Wall])
-    JE_CFLAGS_APPEND([-Werror=declaration-after-statement])
-    JE_CFLAGS_APPEND([-Wshorten-64-to-32])
-    JE_CFLAGS_APPEND([-Wsign-compare])
-    JE_CFLAGS_APPEND([-pipe])
-    JE_CFLAGS_APPEND([-g3])
-  elif test "x$je_cv_msvc" = "xyes" ; then
-    CC="$CC -nologo"
-    JE_CFLAGS_APPEND([-Zi])
-    JE_CFLAGS_APPEND([-MT])
-    JE_CFLAGS_APPEND([-W3])
-    JE_CFLAGS_APPEND([-FS])
-    CPPFLAGS="$CPPFLAGS -I${srcdir}/include/msvc_compat"
   fi
-  if test "x$je_cv_cray" = "xyes" ; then
-    dnl cray compiler 8.4 has an inlining bug
-    if test "x$je_cv_cray_84" = "xyes" ; then
-      JE_CFLAGS_APPEND([-hipa2])
-      JE_CFLAGS_APPEND([-hnognu])
-    fi
-    if test "x$enable_cc_silence" != "xno" ; then
-      dnl ignore unreachable code warning
-      JE_CFLAGS_APPEND([-hnomessage=128])
-      dnl ignore redefinition of "malloc", "free", etc warning
-      JE_CFLAGS_APPEND([-hnomessage=1357])
-    fi
+  JE_CFLAGS_ADD([-Wall])
+  JE_CFLAGS_ADD([-Werror=declaration-after-statement])
+  JE_CFLAGS_ADD([-Wshorten-64-to-32])
+  JE_CFLAGS_ADD([-Wsign-compare])
+  JE_CFLAGS_ADD([-pipe])
+  JE_CFLAGS_ADD([-g3])
+elif test "x$je_cv_msvc" = "xyes" ; then
+  CC="$CC -nologo"
+  JE_CFLAGS_ADD([-Zi])
+  JE_CFLAGS_ADD([-MT])
+  JE_CFLAGS_ADD([-W3])
+  JE_CFLAGS_ADD([-FS])
+  JE_APPEND_VS(CPPFLAGS, -I${srcdir}/include/msvc_compat)
+fi
+if test "x$je_cv_cray" = "xyes" ; then
+  dnl cray compiler 8.4 has an inlining bug
+  if test "x$je_cv_cray_84" = "xyes" ; then
+    JE_CFLAGS_ADD([-hipa2])
+    JE_CFLAGS_ADD([-hnognu])
+  fi
+  if test "x$enable_cc_silence" != "xno" ; then
+    dnl ignore unreachable code warning
+    JE_CFLAGS_ADD([-hnomessage=128])
+    dnl ignore redefinition of "malloc", "free", etc warning
+    JE_CFLAGS_ADD([-hnomessage=1357])
   fi
 fi
+AC_SUBST([CONFIGURE_CFLAGS])
+AC_SUBST([SPECIFIED_CFLAGS])
 AC_SUBST([EXTRA_CFLAGS])
 AC_PROG_CPP
 
@@ -245,17 +281,16 @@ fi
 enable_cxx="1"
 )
 if test "x$enable_cxx" = "x1" ; then
-  CXXFLAGS=""
   dnl Require at least c++14, which is the first version to support sized
   dnl deallocation.  C++ support is not compiled otherwise.
   m4_include([m4/ax_cxx_compile_stdcxx.m4])
   AX_CXX_COMPILE_STDCXX([14], [noext], [optional])
   if test "x${HAVE_CXX14}" = "x1" ; then
-    JE_CXXFLAGS_APPEND([-Wall])
-    JE_CXXFLAGS_APPEND([-g3])
+    JE_CXXFLAGS_ADD([-Wall])
+    JE_CXXFLAGS_ADD([-g3])
 
     SAVED_LIBS="${LIBS}"
-    LIBS="${LIBS} -lstdc++"
+    JE_APPEND_VS(LIBS, -lstdc++)
     JE_COMPILABLE([libstdc++ linkage], [
 #include <stdlib.h>
 ], [[
@@ -271,7 +306,8 @@ if test "x$enable_cxx" = "x1" ; then
   fi
 fi
 AC_SUBST([enable_cxx])
-AC_SUBST([CXXFLAGS])
+AC_SUBST([CONFIGURE_CXXFLAGS])
+AC_SUBST([SPECIFIED_CXXFLAGS])
 AC_SUBST([EXTRA_CXXFLAGS])
 
 AC_C_BIGENDIAN([ac_cv_big_endian=1], [ac_cv_big_endian=0])
@@ -280,7 +316,7 @@ if test "x${ac_cv_big_endian}" = "x1" ; then
 fi
 
 if test "x${je_cv_msvc}" = "xyes" -a "x${ac_cv_header_inttypes_h}" = "xno"; then
-  CPPFLAGS="$CPPFLAGS -I${srcdir}/include/msvc_compat/C99"
+  JE_APPEND_VS(CPPFLAGS, -I${srcdir}/include/msvc_compat/C99)
 fi
 
 if test "x${je_cv_msvc}" = "xyes" ; then
@@ -411,7 +447,6 @@ dnl
 dnl Define cpp macros in CPPFLAGS, rather than doing AC_DEFINE(macro), since the
 dnl definitions need to be seen before any headers are included, which is a pain
 dnl to make happen otherwise.
-CFLAGS="$CFLAGS"
 default_munmap="1"
 maps_coalesce="1"
 case "${host}" in
@@ -443,7 +478,7 @@ case "${host}" in
 	;;
   *-*-linux-android)
 	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
-	CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE"
+	JE_APPEND_VS(CPPFLAGS, -D_GNU_SOURCE)
 	abi="elf"
 	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
 	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ])
@@ -454,7 +489,7 @@ case "${host}" in
 	;;
   *-*-linux* | *-*-kfreebsd*)
 	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
-	CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE"
+	JE_APPEND_VS(CPPFLAGS, -D_GNU_SOURCE)
 	abi="elf"
 	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
 	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ])
@@ -479,8 +514,8 @@ case "${host}" in
 	abi="elf"
 	RPATH='-Wl,-R,$(1)'
 	dnl Solaris needs this for sigwait().
-	CPPFLAGS="$CPPFLAGS -D_POSIX_PTHREAD_SEMANTICS"
-	LIBS="$LIBS -lposix4 -lsocket -lnsl"
+	JE_APPEND_VS(CPPFLAGS, -D_POSIX_PTHREAD_SEMANTICS)
+	JE_APPEND_VS(LIBS, -lposix4 -lsocket -lnsl)
 	;;
   *-ibm-aix*)
 	if "$LG_SIZEOF_PTR" = "8"; then
@@ -578,20 +613,20 @@ JE_COMPILABLE([__attribute__ syntax],
 if test "x${je_cv_attribute}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_ATTR], [ ])
   if test "x${GCC}" = "xyes" -a "x${abi}" = "xelf"; then
-    JE_CFLAGS_APPEND([-fvisibility=hidden])
-    JE_CXXFLAGS_APPEND([-fvisibility=hidden])
+    JE_CFLAGS_ADD([-fvisibility=hidden])
+    JE_CXXFLAGS_ADD([-fvisibility=hidden])
   fi
 fi
 dnl Check for tls_model attribute support (clang 3.0 still lacks support).
-SAVED_CFLAGS="${CFLAGS}"
-JE_CFLAGS_APPEND([-Werror])
-JE_CFLAGS_APPEND([-herror_on_warning])
+JE_CFLAGS_SAVE()
+JE_CFLAGS_ADD([-Werror])
+JE_CFLAGS_ADD([-herror_on_warning])
 JE_COMPILABLE([tls_model attribute], [],
               [static __thread int
                __attribute__((tls_model("initial-exec"), unused)) foo;
                foo = 0;],
               [je_cv_tls_model])
-CFLAGS="${SAVED_CFLAGS}"
+JE_CFLAGS_RESTORE()
 if test "x${je_cv_tls_model}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_TLS_MODEL],
             [__attribute__((tls_model("initial-exec")))])
@@ -599,35 +634,35 @@ else
   AC_DEFINE([JEMALLOC_TLS_MODEL], [ ])
 fi
 dnl Check for alloc_size attribute support.
-SAVED_CFLAGS="${CFLAGS}"
-JE_CFLAGS_APPEND([-Werror])
-JE_CFLAGS_APPEND([-herror_on_warning])
+JE_CFLAGS_SAVE()
+JE_CFLAGS_ADD([-Werror])
+JE_CFLAGS_ADD([-herror_on_warning])
 JE_COMPILABLE([alloc_size attribute], [#include <stdlib.h>],
               [void *foo(size_t size) __attribute__((alloc_size(1)));],
               [je_cv_alloc_size])
-CFLAGS="${SAVED_CFLAGS}"
+JE_CFLAGS_RESTORE()
 if test "x${je_cv_alloc_size}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_ATTR_ALLOC_SIZE], [ ])
 fi
 dnl Check for format(gnu_printf, ...) attribute support.
-SAVED_CFLAGS="${CFLAGS}"
-JE_CFLAGS_APPEND([-Werror])
-JE_CFLAGS_APPEND([-herror_on_warning])
+JE_CFLAGS_SAVE()
+JE_CFLAGS_ADD([-Werror])
+JE_CFLAGS_ADD([-herror_on_warning])
 JE_COMPILABLE([format(gnu_printf, ...) attribute], [#include <stdlib.h>],
               [void *foo(const char *format, ...) __attribute__((format(gnu_printf, 1, 2)));],
               [je_cv_format_gnu_printf])
-CFLAGS="${SAVED_CFLAGS}"
+JE_CFLAGS_RESTORE()
 if test "x${je_cv_format_gnu_printf}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF], [ ])
 fi
 dnl Check for format(printf, ...) attribute support.
-SAVED_CFLAGS="${CFLAGS}"
-JE_CFLAGS_APPEND([-Werror])
-JE_CFLAGS_APPEND([-herror_on_warning])
+JE_CFLAGS_SAVE()
+JE_CFLAGS_ADD([-Werror])
+JE_CFLAGS_ADD([-herror_on_warning])
 JE_COMPILABLE([format(printf, ...) attribute], [#include <stdlib.h>],
               [void *foo(const char *format, ...) __attribute__((format(printf, 1, 2)));],
               [je_cv_format_printf])
-CFLAGS="${SAVED_CFLAGS}"
+JE_CFLAGS_RESTORE()
 if test "x${je_cv_format_printf}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_ATTR_FORMAT_PRINTF], [ ])
 fi
@@ -689,9 +724,9 @@ if test "x$enable_code_coverage" = "x1" ; then
   deoptimize="no"
   echo "$CFLAGS $EXTRA_CFLAGS" | grep '\-O' >/dev/null || deoptimize="yes"
   if test "x${deoptimize}" = "xyes" ; then
-    JE_CFLAGS_APPEND([-O0])
+    JE_CFLAGS_ADD([-O0])
   fi
-  JE_CFLAGS_APPEND([-fprofile-arcs -ftest-coverage])
+  JE_CFLAGS_ADD([-fprofile-arcs -ftest-coverage])
   EXTRA_LDFLAGS="$EXTRA_LDFLAGS -fprofile-arcs -ftest-coverage"
   AC_DEFINE([JEMALLOC_CODE_COVERAGE], [ ])
 fi
@@ -881,22 +916,17 @@ if test "x$enable_ivsalloc" = "x1" ; then
 fi
 
 dnl Only optimize if not debugging.
-if test "x$enable_debug" = "x0" -a "x$no_CFLAGS" = "xyes" ; then
-  dnl Make sure that an optimization flag was not specified in EXTRA_CFLAGS.
-  optimize="no"
-  echo "$CFLAGS $EXTRA_CFLAGS" | grep '\-O' >/dev/null || optimize="yes"
-  if test "x${optimize}" = "xyes" ; then
-    if test "x$GCC" = "xyes" ; then
-      JE_CFLAGS_APPEND([-O3])
-      JE_CXXFLAGS_APPEND([-O3])
-      JE_CFLAGS_APPEND([-funroll-loops])
-    elif test "x$je_cv_msvc" = "xyes" ; then
-      JE_CFLAGS_APPEND([-O2])
-      JE_CXXFLAGS_APPEND([-O2])
-    else
-      JE_CFLAGS_APPEND([-O])
-      JE_CXXFLAGS_APPEND([-O])
-    fi
+if test "x$enable_debug" = "x0" ; then
+  if test "x$GCC" = "xyes" ; then
+    JE_CFLAGS_ADD([-O3])
+    JE_CXXFLAGS_ADD([-O3])
+    JE_CFLAGS_ADD([-funroll-loops])
+  elif test "x$je_cv_msvc" = "xyes" ; then
+    JE_CFLAGS_ADD([-O2])
+    JE_CXXFLAGS_ADD([-O2])
+  else
+    JE_CFLAGS_ADD([-O])
+    JE_CXXFLAGS_ADD([-O])
   fi
 fi
 
@@ -960,10 +990,10 @@ fi,
 if test "x$backtrace_method" = "x" -a "x$enable_prof_libunwind" = "x1" ; then
   AC_CHECK_HEADERS([libunwind.h], , [enable_prof_libunwind="0"])
   if test "x$LUNWIND" = "x-lunwind" ; then
-    AC_CHECK_LIB([unwind], [unw_backtrace], [LIBS="$LIBS $LUNWIND"],
+    AC_CHECK_LIB([unwind], [unw_backtrace], [JE_APPEND_VS(LIBS, $LUNWIND)],
                  [enable_prof_libunwind="0"])
   else
-    LIBS="$LIBS $LUNWIND"
+    JE_APPEND_VS(LIBS, $LUNWIND)
   fi
   if test "x${enable_prof_libunwind}" = "x1" ; then
     backtrace_method="libunwind"
@@ -985,7 +1015,7 @@ fi
 if test "x$backtrace_method" = "x" -a "x$enable_prof_libgcc" = "x1" \
      -a "x$GCC" = "xyes" ; then
   AC_CHECK_HEADERS([unwind.h], , [enable_prof_libgcc="0"])
-  AC_CHECK_LIB([gcc], [_Unwind_Backtrace], [LIBS="$LIBS -lgcc"], [enable_prof_libgcc="0"])
+  AC_CHECK_LIB([gcc], [_Unwind_Backtrace], [JE_APPEND_VS(LIBS, -lgcc)], [enable_prof_libgcc="0"])
   if test "x${enable_prof_libgcc}" = "x1" ; then
     backtrace_method="libgcc"
     AC_DEFINE([JEMALLOC_PROF_LIBGCC], [ ])
@@ -1007,7 +1037,7 @@ fi
 )
 if test "x$backtrace_method" = "x" -a "x$enable_prof_gcc" = "x1" \
      -a "x$GCC" = "xyes" ; then
-  JE_CFLAGS_APPEND([-fno-omit-frame-pointer])
+  JE_CFLAGS_ADD([-fno-omit-frame-pointer])
   backtrace_method="gcc intrinsics"
   AC_DEFINE([JEMALLOC_PROF_GCC], [ ])
 else
@@ -1022,9 +1052,7 @@ AC_MSG_CHECKING([configured backtracing method])
 AC_MSG_RESULT([$backtrace_method])
 if test "x$enable_prof" = "x1" ; then
   dnl Heap profiling uses the log(3) function.
-  if test "x$LM" != "x" ; then
-    LIBS="$LIBS $LM"
-  fi
+  JE_APPEND_VS(LIBS, $LM)
 
   AC_DEFINE([JEMALLOC_PROF], [ ])
 fi
@@ -1363,7 +1391,7 @@ if test "x$abi" != "xpecoff" ; then
   AC_CHECK_HEADERS([pthread.h], , [AC_MSG_ERROR([pthread.h is missing])])
   dnl Some systems may embed pthreads functionality in libc; check for libpthread
   dnl first, but try libc too before failing.
-  AC_CHECK_LIB([pthread], [pthread_create], [LIBS="$LIBS -lpthread"],
+  AC_CHECK_LIB([pthread], [pthread_create], [JE_APPEND_VS(LIBS, -lpthread)],
                [AC_SEARCH_LIBS([pthread_create], , ,
                                AC_MSG_ERROR([libpthread is missing]))])
   JE_COMPILABLE([pthread_atfork(3)], [
@@ -1376,7 +1404,7 @@ if test "x$abi" != "xpecoff" ; then
   fi
 fi
 
-CPPFLAGS="$CPPFLAGS -D_REENTRANT"
+JE_APPEND_VS(CFLAGS, -D_REENTRANT)
 
 dnl Check whether clock_gettime(2) is in libc or librt.
 AC_SEARCH_LIBS([clock_gettime], [rt])
@@ -1385,13 +1413,13 @@ dnl Cray wrapper compiler often adds `-lrt` when using `-static`. Check with
 dnl `-dynamic` as well in case a user tries to dynamically link in jemalloc
 if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then
   if test "$ac_cv_search_clock_gettime" != "-lrt"; then
-    SAVED_CFLAGS="${CFLAGS}"
+    JE_CFLAGS_SAVE()
 
     unset ac_cv_search_clock_gettime
-    JE_CFLAGS_APPEND([-dynamic])
+    JE_CFLAGS_ADD([-dynamic])
     AC_SEARCH_LIBS([clock_gettime], [rt])
 
-    CFLAGS="${SAVED_CFLAGS}"
+    JE_CFLAGS_RESTORE()
   fi
 fi
 
@@ -1447,8 +1475,8 @@ fi
 if test "x$enable_syscall" = "x1" ; then
   dnl Check if syscall(2) is usable.  Treat warnings as errors, so that e.g. OS
   dnl X 10.12's deprecation warning prevents use.
-  SAVED_CFLAGS="${CFLAGS}"
-  JE_CFLAGS_APPEND([-Werror])
+  JE_CFLAGS_SAVE()
+  JE_CFLAGS_ADD([-Werror])
   JE_COMPILABLE([syscall(2)], [
 #include <sys/syscall.h>
 #include <unistd.h>
@@ -1456,7 +1484,7 @@ if test "x$enable_syscall" = "x1" ; then
 	syscall(SYS_write, 2, "hello", 5);
 ],
                 [je_cv_syscall])
-  CFLAGS="${SAVED_CFLAGS}"
+  JE_CFLAGS_RESTORE()
   if test "x$je_cv_syscall" = "xyes" ; then
     AC_DEFINE([JEMALLOC_USE_SYSCALL], [ ])
   fi
@@ -1532,7 +1560,7 @@ if test "x$enable_lazy_lock" = "x1" ; then
   if test "x$abi" != "xpecoff" ; then
     AC_CHECK_HEADERS([dlfcn.h], , [AC_MSG_ERROR([dlfcn.h is missing])])
     AC_CHECK_FUNC([dlsym], [],
-      [AC_CHECK_LIB([dl], [dlsym], [LIBS="$LIBS -ldl"],
+      [AC_CHECK_LIB([dl], [dlsym], [JE_APPEND_VS(LIBS, -ldl)],
                     [AC_MSG_ERROR([libdl is missing])])
       ])
   fi
@@ -2007,11 +2035,13 @@ AC_MSG_RESULT([library revision   : ${rev}])
 AC_MSG_RESULT([])
 AC_MSG_RESULT([CONFIG             : ${CONFIG}])
 AC_MSG_RESULT([CC                 : ${CC}])
-AC_MSG_RESULT([CFLAGS             : ${CFLAGS}])
+AC_MSG_RESULT([CONFIGURE_CFLAGS   : ${CONFIGURE_CFLAGS}])
+AC_MSG_RESULT([SPECIFIED_CFLAGS   : ${SPECIFIED_CFLAGS}])
 AC_MSG_RESULT([EXTRA_CFLAGS       : ${EXTRA_CFLAGS}])
 AC_MSG_RESULT([CPPFLAGS           : ${CPPFLAGS}])
 AC_MSG_RESULT([CXX                : ${CXX}])
-AC_MSG_RESULT([CXXFLAGS           : ${CXXFLAGS}])
+AC_MSG_RESULT([CONFIGURE_CXXFLAGS : ${CONFIGURE_CXXFLAGS}])
+AC_MSG_RESULT([SPECIFIED_CXXFLAGS : ${SPECIFIED_CXXFLAGS}])
 AC_MSG_RESULT([EXTRA_CXXFLAGS     : ${EXTRA_CXXFLAGS}])
 AC_MSG_RESULT([LDFLAGS            : ${LDFLAGS}])
 AC_MSG_RESULT([EXTRA_LDFLAGS      : ${EXTRA_LDFLAGS}])

From bacb6afc6c5a83c5bf2e5e04a6db99600046e971 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 21 Dec 2016 12:33:17 -0800
Subject: [PATCH 0563/2608] Simplify arena_slab_regind().

Rewrite arena_slab_regind() to provide sufficient constant data for
the compiler to perform division strength reduction.  This replaces
more general manual strength reduction that was implemented before
arena_bin_info was compile-time-constant.  It would be possible to
slightly improve on the compiler-generated division code by taking
advantage of range limits that the compiler doesn't know about.
---
 Makefile.in                                   |  1 +
 include/jemalloc/internal/arena.h             |  3 +
 include/jemalloc/internal/private_symbols.txt |  1 +
 src/arena.c                                   | 85 ++++++-------------
 test/unit/slab.c                              | 35 ++++++++
 5 files changed, 66 insertions(+), 59 deletions(-)
 create mode 100644 test/unit/slab.c

diff --git a/Makefile.in b/Makefile.in
index 7c1989b8..22b11349 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -184,6 +184,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/rtree.c \
 	$(srcroot)test/unit/SFMT.c \
 	$(srcroot)test/unit/size_classes.c \
+	$(srcroot)test/unit/slab.c \
 	$(srcroot)test/unit/smoothstep.c \
 	$(srcroot)test/unit/stats.c \
 	$(srcroot)test/unit/ticker.c \
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index ad400839..d6b1a2b0 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -271,6 +271,9 @@ void	arena_extent_cache_maybe_insert(tsdn_t *tsdn, arena_t *arena,
     extent_t *extent, bool cache);
 void	arena_extent_cache_maybe_remove(tsdn_t *tsdn, arena_t *arena,
     extent_t *extent, bool cache);
+#ifdef JEMALLOC_JET
+size_t	arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr);
+#endif
 extent_t	*arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena,
     size_t usize, size_t alignment, bool *zero);
 void	arena_extent_dalloc_large(tsdn_t *tsdn, arena_t *arena,
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 4560d702..63974880 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -66,6 +66,7 @@ arena_ralloc_no_move
 arena_reset
 arena_salloc
 arena_sdalloc
+arena_slab_regind
 arena_stats_merge
 arena_tcache_fill_small
 arena_tdata_get
diff --git a/src/arena.c b/src/arena.c
index 488bfd47..73fea528 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -138,73 +138,41 @@ arena_slab_reg_alloc(tsdn_t *tsdn, extent_t *slab,
 	return (ret);
 }
 
-JEMALLOC_INLINE_C size_t
-arena_slab_regind(extent_t *slab, const arena_bin_info_t *bin_info,
-    const void *ptr)
+#ifndef JEMALLOC_JET
+JEMALLOC_INLINE_C
+#endif
+size_t
+arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr)
 {
-	size_t diff, interval, shift, regind;
+	size_t diff, regind;
 
 	/* Freeing a pointer outside the slab can cause assertion failure. */
 	assert((uintptr_t)ptr >= (uintptr_t)extent_addr_get(slab));
 	assert((uintptr_t)ptr < (uintptr_t)extent_past_get(slab));
 	/* Freeing an interior pointer can cause assertion failure. */
 	assert(((uintptr_t)ptr - (uintptr_t)extent_addr_get(slab)) %
-	    (uintptr_t)bin_info->reg_size == 0);
+	    (uintptr_t)arena_bin_info[binind].reg_size == 0);
 
-	/*
-	 * Avoid doing division with a variable divisor if possible.  Using
-	 * actual division here can reduce allocator throughput by over 20%!
-	 */
+	/* Avoid doing division with a variable divisor. */
 	diff = (size_t)((uintptr_t)ptr - (uintptr_t)extent_addr_get(slab));
-
-	/* Rescale (factor powers of 2 out of the numerator and denominator). */
-	interval = bin_info->reg_size;
-	shift = ffs_zu(interval) - 1;
-	diff >>= shift;
-	interval >>= shift;
-
-	if (interval == 1) {
-		/* The divisor was a power of 2. */
-		regind = diff;
-	} else {
-		/*
-		 * To divide by a number D that is not a power of two we
-		 * multiply by (2^21 / D) and then right shift by 21 positions.
-		 *
-		 *   X / D
-		 *
-		 * becomes
-		 *
-		 *   (X * interval_invs[D - 3]) >> SIZE_INV_SHIFT
-		 *
-		 * We can omit the first three elements, because we never
-		 * divide by 0, and 1 and 2 are both powers of two, which are
-		 * handled above.
-		 */
-#define	SIZE_INV_SHIFT	((sizeof(size_t) << 3) - LG_SLAB_MAXREGS)
-#define	SIZE_INV(s)	(((ZU(1) << SIZE_INV_SHIFT) / (s)) + 1)
-		static const size_t interval_invs[] = {
-		    SIZE_INV(3),
-		    SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
-		    SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
-		    SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
-		    SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
-		    SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
-		    SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
-		    SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
-		};
-
-		if (likely(interval <= ((sizeof(interval_invs) / sizeof(size_t))
-		    + 2))) {
-			regind = (diff * interval_invs[interval - 3]) >>
-			    SIZE_INV_SHIFT;
-		} else
-			regind = diff / interval;
-#undef SIZE_INV
-#undef SIZE_INV_SHIFT
+	switch (binind) {
+#define	REGIND_bin_yes(index, reg_size)					\
+	case index:							\
+		regind = diff / (reg_size);				\
+		assert(diff == regind * (reg_size));			\
+		break;
+#define	REGIND_bin_no(index, reg_size)
+#define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs,		\
+    lg_delta_lookup)							\
+	REGIND_bin_##bin(index, (1U<<lg_grp) + (ndelta<<lg_delta))
+	SIZE_CLASSES
+#undef REGIND_bin_yes
+#undef REGIND_bin_no
+#undef SC
+	default: not_reached();
 	}
-	assert(diff == regind * interval);
-	assert(regind < bin_info->nregs);
+
+	assert(regind < arena_bin_info[binind].nregs);
 
 	return (regind);
 }
@@ -215,7 +183,7 @@ arena_slab_reg_dalloc(tsdn_t *tsdn, extent_t *slab,
 {
 	szind_t binind = slab_data->binind;
 	const arena_bin_info_t *bin_info = &arena_bin_info[binind];
-	size_t regind = arena_slab_regind(slab, bin_info, ptr);
+	size_t regind = arena_slab_regind(slab, binind, ptr);
 
 	assert(slab_data->nfree < bin_info->nregs);
 	/* Freeing an unallocated pointer can cause assertion failure. */
@@ -1022,7 +990,6 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin,
 	const arena_bin_info_t *bin_info;
 	extent_t *slab;
 
-
 	bin_info = &arena_bin_info[binind];
 	if (bin->slabcur != NULL) {
 		arena_bin_slabs_full_insert(bin, bin->slabcur);
diff --git a/test/unit/slab.c b/test/unit/slab.c
new file mode 100644
index 00000000..42e82a8b
--- /dev/null
+++ b/test/unit/slab.c
@@ -0,0 +1,35 @@
+#include "test/jemalloc_test.h"
+
+TEST_BEGIN(test_arena_slab_regind)
+{
+	szind_t binind;
+
+	for (binind = 0; binind < NBINS; binind++) {
+		size_t regind;
+		extent_t slab;
+		const arena_bin_info_t *bin_info = &arena_bin_info[binind];
+		extent_init(&slab, NULL, mallocx(bin_info->slab_size,
+		    MALLOCX_LG_ALIGN(LG_PAGE)), bin_info->slab_size, 0, 0, true,
+		    false, true, true);
+		assert_ptr_not_null(extent_addr_get(&slab),
+		    "Unexpected malloc() failure");
+		for (regind = 0; regind < bin_info->nregs; regind++) {
+			void *reg = (void *)((uintptr_t)extent_addr_get(&slab) +
+			    (bin_info->reg_size * regind));
+			assert_zu_eq(arena_slab_regind(&slab, binind, reg),
+			    regind,
+			    "Incorrect region index computed for size %zu",
+			    bin_info->reg_size);
+		}
+		free(extent_addr_get(&slab));
+	}
+}
+TEST_END
+
+int
+main(void)
+{
+
+	return (test(
+	    test_arena_slab_regind));
+}

From eab3b180e59d6b23fee5fd2165f96402e7341cba Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 23 Dec 2016 11:15:44 -0800
Subject: [PATCH 0564/2608] Fix JSON-mode output for !config_stats and/or
 !config_prof cases.

These bugs were introduced by 0ba5b9b6189e16a983d8922d8c5cb6ab421906e8
(Add "J" (JSON) support to malloc_stats_print().), which was backported
as b599b32280e1142856b0b96293a71e1684b1ccfb (with the same bugs except
the inapplicable "metatata" misspelling) and first released in 4.3.0.
---
 src/stats.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index 3072b2ab..e150a27f 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -407,7 +407,7 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	CTL_M2_GET("stats.arenas.0.metadata", i, &metadata, size_t);
 	if (json) {
 		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\"metatata\": %zu%s\n", metadata, (bins || large) ?
+		    "\t\t\t\t\"metadata\": %zu%s\n", metadata, (bins || large) ?
 		    "," : "");
 	} else {
 		malloc_cprintf(write_cb, cbopaque,
@@ -422,7 +422,7 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 
 static void
 stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
-    bool json, bool merged, bool unmerged)
+    bool json, bool more)
 {
 	const char *cpv;
 	bool bv;
@@ -717,11 +717,11 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    "\t\t\t]\n");
 
 		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t},\n");
+		    "\t\t}%s\n", (config_prof || more) ? "," : "");
 	}
 
 	/* prof. */
-	if (json) {
+	if (config_prof && json) {
 		malloc_cprintf(write_cb, cbopaque,
 		    "\t\t\"prof\": {\n");
 
@@ -747,8 +747,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    "\t\t\t\"lg_sample\": %zd\n", ssv);
 
 		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t}%s\n", (config_stats || merged || unmerged) ? "," :
-		    "");
+		    "\t\t}%s\n", more ? "," : "");
 	}
 }
 
@@ -872,8 +871,8 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	size_t u64sz;
 	bool json = false;
 	bool general = true;
-	bool merged = true;
-	bool unmerged = true;
+	bool merged = config_stats;
+	bool unmerged = config_stats;
 	bool bins = true;
 	bool large = true;
 
@@ -936,8 +935,10 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    "___ Begin jemalloc statistics ___\n");
 	}
 
-	if (general)
-		stats_general_print(write_cb, cbopaque, json, merged, unmerged);
+	if (general) {
+		bool more = (merged || unmerged);
+		stats_general_print(write_cb, cbopaque, json, more);
+	}
 	if (config_stats) {
 		stats_print_helper(write_cb, cbopaque, json, merged, unmerged,
 		    bins, large);

From c1baa0a9b7b05ebf98221dc7deb12c28e170a399 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 17 Nov 2016 13:36:17 -0800
Subject: [PATCH 0565/2608] Add huge page configuration and pages_[no}huge().

Add the --with-lg-hugepage configure option, but automatically configure
LG_HUGEPAGE even if it isn't specified.

Add the pages_[no]huge() functions, which toggle huge page state via
madvise(..., MADV_[NO]HUGEPAGE) calls.
---
 INSTALL                                       |  5 +++
 Makefile.in                                   |  1 +
 configure.ac                                  | 44 +++++++++++++++++--
 .../jemalloc/internal/jemalloc_internal.h.in  |  7 +++
 .../internal/jemalloc_internal_defs.h.in      | 13 ++++++
 include/jemalloc/internal/pages.h             | 14 +++++-
 include/jemalloc/internal/private_symbols.txt |  2 +
 src/pages.c                                   | 31 ++++++++++++-
 test/unit/pages.c                             | 30 +++++++++++++
 9 files changed, 141 insertions(+), 6 deletions(-)
 create mode 100644 test/unit/pages.c

diff --git a/INSTALL b/INSTALL
index 6e593219..d7496612 100644
--- a/INSTALL
+++ b/INSTALL
@@ -227,6 +227,11 @@ any of the following arguments (not a definitive list) to 'configure':
     --with-lg-page, but its primary use case is for integration with FreeBSD's
     libc, wherein jemalloc is embedded.
 
+--with-lg-hugepage=<lg-hugepage>
+    Specify the base 2 log of the system huge page size.  This option is useful
+    when cross compiling, or when overriding the default for systems that do
+    not explicitly support huge pages.
+
 --with-lg-size-class-group=<lg-size-class-group>
     Specify the base 2 log of how many size classes to use for each doubling in
     size.  By default jemalloc uses <lg-size-class-group>=2, which results in
diff --git a/Makefile.in b/Makefile.in
index 22b11349..052688bd 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -170,6 +170,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/mq.c \
 	$(srcroot)test/unit/mtx.c \
 	$(srcroot)test/unit/pack.c \
+	$(srcroot)test/unit/pages.c \
 	$(srcroot)test/unit/ph.c \
 	$(srcroot)test/unit/prng.c \
 	$(srcroot)test/unit/prof_accum.c \
diff --git a/configure.ac b/configure.ac
index bf4ea8f7..f886aeb5 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1317,6 +1317,36 @@ else
    AC_MSG_ERROR([cannot determine value for LG_PAGE])
 fi
 
+AC_ARG_WITH([lg_hugepage],
+  [AS_HELP_STRING([--with-lg-hugepage=<lg-hugepage>],
+   [Base 2 log of sytem huge page size])],
+  [je_cv_lg_hugepage="${with_lg_hugepage}"],
+  [je_cv_lg_hugepage=""])
+if test "x${je_cv_lg_hugepage}" = "x" ; then
+  dnl Look in /proc/meminfo (Linux-specific) for information on the default huge
+  dnl page size, if any.  The relevant line looks like:
+  dnl
+  dnl   Hugepagesize:       2048 kB
+  if test -e "/proc/meminfo" ; then
+    hpsk=[`cat /proc/meminfo 2>/dev/null | \
+          grep -e '^Hugepagesize:[[:space:]]\+[0-9]\+[[:space:]]kB$' | \
+          awk '{print $2}'`]
+    if test "x${hpsk}" != "x" ; then
+      je_cv_lg_hugepage=10
+      while test "${hpsk}" -gt 1 ; do
+        hpsk="$((hpsk / 2))"
+        je_cv_lg_hugepage="$((je_cv_lg_hugepage + 1))"
+      done
+    fi
+  fi
+
+  dnl Set default if unable to automatically configure.
+  if test "x${je_cv_lg_hugepage}" = "x" ; then
+    je_cv_lg_hugepage=21
+  fi
+fi
+AC_DEFINE_UNQUOTED([LG_HUGEPAGE], [${je_cv_lg_hugepage}])
+
 AC_ARG_WITH([lg_page_sizes],
   [AS_HELP_STRING([--with-lg-page-sizes=<lg-page-sizes>],
    [Base 2 logs of system page sizes to support])],
@@ -1690,6 +1720,8 @@ JE_COMPILABLE([madvise(2)], [
 	madvise((void *)0, 0, 0);
 ], [je_cv_madvise])
 if test "x${je_cv_madvise}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_MADVISE], [ ])
+
   dnl Check for madvise(..., MADV_FREE).
   JE_COMPILABLE([madvise(..., MADV_FREE)], [
 #include <sys/mman.h>
@@ -1710,9 +1742,15 @@ if test "x${je_cv_madvise}" = "xyes" ; then
     AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ])
   fi
 
-  if test "x${je_cv_madv_free}" = "xyes" \
-       -o "x${je_cv_madv_dontneed}" = "xyes" ; then
-    AC_DEFINE([JEMALLOC_HAVE_MADVISE], [ ])
+  dnl Check for madvise(..., MADV_[NO]HUGEPAGE).
+  JE_COMPILABLE([madvise(..., MADV_[[NO]]HUGEPAGE)], [
+#include <sys/mman.h>
+], [
+	madvise((void *)0, 0, MADV_HUGEPAGE);
+	madvise((void *)0, 0, MADV_NOHUGEPAGE);
+], [je_cv_thp])
+  if test "x${je_cv_thp}" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_THP], [ ])
   fi
 fi
 
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index ba5207d8..bfa84a22 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -138,6 +138,13 @@ static const bool config_cache_oblivious =
     false
 #endif
     ;
+static const bool have_thp =
+#ifdef JEMALLOC_THP
+    true
+#else
+    false
+#endif
+    ;
 
 #if defined(JEMALLOC_C11ATOMICS) && !defined(__cplusplus)
 #include <stdatomic.h>
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index aa0c0474..722c41dd 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -187,6 +187,13 @@
 /* One page is 2^LG_PAGE bytes. */
 #undef LG_PAGE
 
+/*
+ * One huge page is 2^LG_HUGEPAGE bytes.  Note that this is defined even if the
+ * system does not explicitly support huge pages; system calls that require
+ * explicit huge page support are separately configured.
+ */
+#undef LG_HUGEPAGE
+
 /*
  * If defined, adjacent virtual memory mappings with identical attributes
  * automatically coalesce, and they fragment when changes are made to subranges.
@@ -262,6 +269,12 @@
 #undef JEMALLOC_PURGE_MADVISE_FREE
 #undef JEMALLOC_PURGE_MADVISE_DONTNEED
 
+/*
+ * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE
+ * arguments to madvise(2).
+ */
+#undef JEMALLOC_THP
+
 /* Define if operating system has alloca.h header. */
 #undef JEMALLOC_HAS_ALLOCA_H
 
diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index 16c657a0..034a8aac 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -7,15 +7,23 @@
 #endif
 #define	PAGE		((size_t)(1U << LG_PAGE))
 #define	PAGE_MASK	((size_t)(PAGE - 1))
-
 /* Return the page base address for the page containing address a. */
 #define	PAGE_ADDR2BASE(a)						\
 	((void *)((uintptr_t)(a) & ~PAGE_MASK))
-
 /* Return the smallest pagesize multiple that is >= s. */
 #define	PAGE_CEILING(s)							\
 	(((s) + PAGE_MASK) & ~PAGE_MASK)
 
+/* Huge page size.  LG_HUGEPAGE is determined by the configure script. */
+#define	HUGEPAGE	((size_t)(1U << LG_HUGEPAGE))
+#define	HUGEPAGE_MASK	((size_t)(HUGEPAGE - 1))
+/* Return the huge page base address for the huge page containing address a. */
+#define	HUGEPAGE_ADDR2BASE(a)						\
+	((void *)((uintptr_t)(a) & ~HUGEPAGE_MASK))
+/* Return the smallest pagesize multiple that is >= s. */
+#define	HUGEPAGE_CEILING(s)						\
+	(((s) + HUGEPAGE_MASK) & ~HUGEPAGE_MASK)
+
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
@@ -31,6 +39,8 @@ void	*pages_trim(void *addr, size_t alloc_size, size_t leadsize,
 bool	pages_commit(void *addr, size_t size);
 bool	pages_decommit(void *addr, size_t size);
 bool	pages_purge(void *addr, size_t size);
+bool	pages_huge(void *addr, size_t size);
+bool	pages_nohuge(void *addr, size_t size);
 void	pages_boot(void);
 
 #endif /* JEMALLOC_H_EXTERNS */
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 63974880..1facc928 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -324,7 +324,9 @@ opt_zero
 pages_boot
 pages_commit
 pages_decommit
+pages_huge
 pages_map
+pages_nohuge
 pages_purge
 pages_trim
 pages_unmap
diff --git a/src/pages.c b/src/pages.c
index 6af228ac..8bef6fac 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -170,7 +170,8 @@ pages_purge(void *addr, size_t size)
 #ifdef _WIN32
 	VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE);
 	unzeroed = true;
-#elif defined(JEMALLOC_HAVE_MADVISE)
+#elif (defined(JEMALLOC_PURGE_MADVISE_FREE) || \
+    defined(JEMALLOC_PURGE_MADVISE_DONTNEED))
 #  if defined(JEMALLOC_PURGE_MADVISE_FREE)
 #    define JEMALLOC_MADV_PURGE MADV_FREE
 #    define JEMALLOC_MADV_ZEROS false
@@ -191,6 +192,34 @@ pages_purge(void *addr, size_t size)
 	return (unzeroed);
 }
 
+bool
+pages_huge(void *addr, size_t size)
+{
+
+	assert(HUGEPAGE_ADDR2BASE(addr) == addr);
+	assert(HUGEPAGE_CEILING(size) == size);
+
+#ifdef JEMALLOC_THP
+	return (madvise(addr, size, MADV_HUGEPAGE) != 0);
+#else
+	return (true);
+#endif
+}
+
+bool
+pages_nohuge(void *addr, size_t size)
+{
+
+	assert(HUGEPAGE_ADDR2BASE(addr) == addr);
+	assert(HUGEPAGE_CEILING(size) == size);
+
+#ifdef JEMALLOC_THP
+	return (madvise(addr, size, MADV_NOHUGEPAGE) != 0);
+#else
+	return (false);
+#endif
+}
+
 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
 static bool
 os_overcommits_sysctl(void)
diff --git a/test/unit/pages.c b/test/unit/pages.c
new file mode 100644
index 00000000..f297215a
--- /dev/null
+++ b/test/unit/pages.c
@@ -0,0 +1,30 @@
+#include "test/jemalloc_test.h"
+
+TEST_BEGIN(test_pages_huge)
+{
+	size_t alloc_size;
+	bool commit;
+	void *pages, *hugepage;
+
+	alloc_size = HUGEPAGE * 2 - PAGE;
+	commit = true;
+	pages = pages_map(NULL, alloc_size, &commit);
+	assert_ptr_not_null(pages, "Unexpected pages_map() error");
+
+	hugepage = (void *)(ALIGNMENT_CEILING((uintptr_t)pages, HUGEPAGE));
+	assert_b_ne(pages_huge(hugepage, HUGEPAGE), have_thp,
+	    "Unexpected pages_huge() result");
+	assert_false(pages_nohuge(hugepage, HUGEPAGE),
+	    "Unexpected pages_nohuge() result");
+
+	pages_unmap(pages, alloc_size);
+}
+TEST_END
+
+int
+main(void)
+{
+
+	return (test(
+	    test_pages_huge));
+}

From 411697adcda2fd75e135cdcdafb95f2bd295dc7f Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 21 Nov 2016 23:23:03 -0800
Subject: [PATCH 0566/2608] Use exponential series to size extents.

If virtual memory is retained, allocate extents such that their sizes
form an exponentially growing series.  This limits the number of
disjoint virtual memory ranges so that extent merging can be effective
even if multiple arenas' extent allocation requests are highly
interleaved.

This resolves #462.
---
 include/jemalloc/internal/arena.h |   9 ++
 src/arena.c                       |   3 +
 src/extent.c                      | 219 +++++++++++++++++++++++++-----
 3 files changed, 199 insertions(+), 32 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index d6b1a2b0..afa8984d 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -232,6 +232,15 @@ struct arena_s {
 		void		*extent_hooks_pun;
 	};
 
+	/*
+	 * Next extent size class in a growing series to use when satisfying a
+	 * request via the extent hooks (only if !config_munmap).  This limits
+	 * the number of disjoint virtual memory ranges so that extent merging
+	 * can be effective even if multiple arenas' extent allocation requests
+	 * are highly interleaved.
+	 */
+	pszind_t		extent_grow_next;
+
 	/* Cache of extent structures that were allocated via base_alloc(). */
 	ql_head(extent_t)	extent_cache;
 	malloc_mutex_t		extent_cache_mtx;
diff --git a/src/arena.c b/src/arena.c
index 73fea528..c3587044 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1686,6 +1686,9 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 
 	arena->extent_hooks = (extent_hooks_t *)&extent_hooks_default;
 
+	if (!config_munmap)
+		arena->extent_grow_next = psz2ind(HUGEPAGE);
+
 	ql_new(&arena->extent_cache);
 	if (malloc_mutex_init(&arena->extent_cache_mtx, "arena_extent_cache",
 	    WITNESS_RANK_ARENA_EXTENT_CACHE))
diff --git a/src/extent.c b/src/extent.c
index be6cadc3..586e8d33 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -265,6 +265,41 @@ extent_interior_register(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
 	}
 }
 
+static void
+extent_gprof_add(tsdn_t *tsdn, const extent_t *extent)
+{
+
+	cassert(config_prof);
+
+	if (opt_prof && extent_active_get(extent)) {
+		size_t nadd = extent_size_get(extent) >> LG_PAGE;
+		size_t cur = atomic_add_zu(&curpages, nadd);
+		size_t high = atomic_read_zu(&highpages);
+		while (cur > high && atomic_cas_zu(&highpages, high, cur)) {
+			/*
+			 * Don't refresh cur, because it may have decreased
+			 * since this thread lost the highpages update race.
+			 */
+			high = atomic_read_zu(&highpages);
+		}
+		if (cur > high && prof_gdump_get_unlocked())
+			prof_gdump(tsdn);
+	}
+}
+
+static void
+extent_gprof_sub(tsdn_t *tsdn, const extent_t *extent)
+{
+
+	cassert(config_prof);
+
+	if (opt_prof && extent_active_get(extent)) {
+		size_t nsub = extent_size_get(extent) >> LG_PAGE;
+		assert(atomic_read_zu(&curpages) >= nsub);
+		atomic_sub_zu(&curpages, nsub);
+	}
+}
+
 static bool
 extent_register(tsdn_t *tsdn, const extent_t *extent)
 {
@@ -280,20 +315,8 @@ extent_register(tsdn_t *tsdn, const extent_t *extent)
 		extent_interior_register(tsdn, rtree_ctx, extent);
 	extent_rtree_release(tsdn, elm_a, elm_b);
 
-	if (config_prof && opt_prof && extent_active_get(extent)) {
-		size_t nadd = extent_size_get(extent) >> LG_PAGE;
-		size_t cur = atomic_add_zu(&curpages, nadd);
-		size_t high = atomic_read_zu(&highpages);
-		while (cur > high && atomic_cas_zu(&highpages, high, cur)) {
-			/*
-			 * Don't refresh cur, because it may have decreased
-			 * since this thread lost the highpages update race.
-			 */
-			high = atomic_read_zu(&highpages);
-		}
-		if (cur > high && prof_gdump_get_unlocked())
-			prof_gdump(tsdn);
-	}
+	if (config_prof)
+		extent_gprof_add(tsdn, extent);
 
 	return (false);
 }
@@ -336,11 +359,8 @@ extent_deregister(tsdn_t *tsdn, extent_t *extent)
 	}
 	extent_rtree_release(tsdn, elm_a, elm_b);
 
-	if (config_prof && opt_prof && extent_active_get(extent)) {
-		size_t nsub = extent_size_get(extent) >> LG_PAGE;
-		assert(atomic_read_zu(&curpages) >= nsub);
-		atomic_sub_zu(&curpages, nsub);
-	}
+	if (config_prof)
+		extent_gprof_sub(tsdn, extent);
 }
 
 /*
@@ -507,14 +527,16 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		extent_usize_set(extent, usize);
 	}
 
-	if (commit && !extent_committed_get(extent) &&
-	    extent_commit_wrapper(tsdn, arena, r_extent_hooks, extent, 0,
-	    extent_size_get(extent))) {
-		if (!locked)
-			malloc_mutex_unlock(tsdn, &arena->extents_mtx);
-		extent_record(tsdn, arena, r_extent_hooks, extent_heaps, cache,
-		    extent);
-		return (NULL);
+	if (*commit && !extent_committed_get(extent)) {
+		if (extent_commit_wrapper(tsdn, arena, r_extent_hooks, extent,
+		    0, extent_size_get(extent))) {
+			if (!locked)
+				malloc_mutex_unlock(tsdn, &arena->extents_mtx);
+			extent_record(tsdn, arena, r_extent_hooks, extent_heaps,
+			    cache, extent);
+			return (NULL);
+		}
+		extent_zeroed_set(extent, true);
 	}
 
 	if (pad != 0)
@@ -591,8 +613,6 @@ extent_alloc_cache_impl(tsdn_t *tsdn, arena_t *arena,
 	extent = extent_recycle(tsdn, arena, r_extent_hooks,
 	    arena->extents_cached, locked, true, new_addr, usize, pad,
 	    alignment, zero, commit, slab);
-	if (extent == NULL)
-		return (NULL);
 	return (extent);
 }
 
@@ -626,9 +646,6 @@ extent_alloc_default_impl(tsdn_t *tsdn, arena_t *arena, void *new_addr,
 
 	ret = extent_alloc_core(tsdn, arena, new_addr, size, alignment, zero,
 	    commit, arena->dss_prec);
-	if (ret == NULL)
-		return (NULL);
-
 	return (ret);
 }
 
@@ -653,6 +670,136 @@ extent_alloc_default(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
 	    alignment, zero, commit));
 }
 
+static void
+extent_retain(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
+    extent_t *extent)
+{
+
+	if (config_stats)
+		arena->stats.retained += extent_size_get(extent);
+	extent_record(tsdn, arena, r_extent_hooks, arena->extents_retained,
+	    false, extent);
+}
+
+/*
+ * If virtual memory is retained, create increasingly larger extents from which
+ * to split requested extents in order to limit the total number of disjoint
+ * virtual memory ranges retained by each arena.
+ */
+static extent_t *
+extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
+    size_t alignment, bool *zero, bool *commit, bool slab)
+{
+	extent_t *extent;
+	void *ptr;
+	size_t size, alloc_size, alloc_size_min, leadsize, trailsize;
+	bool zeroed, committed;
+
+	/*
+	 * Check whether the next extent size in the series would be large
+	 * enough to satisfy this request.  If no, just bail, so that e.g. a
+	 * series of unsatisfiable allocation requests doesn't cause unused
+	 * extent creation as a side effect.
+	 */
+	size = usize + pad;
+	alloc_size = pind2sz(arena->extent_grow_next);
+	alloc_size_min = size + PAGE_CEILING(alignment) - PAGE;
+	/* Beware size_t wrap-around. */
+	if (alloc_size_min < usize)
+		return (NULL);
+	if (alloc_size < alloc_size_min)
+		return (NULL);
+	extent = extent_alloc(tsdn, arena);
+	if (extent == NULL)
+		return (NULL);
+	zeroed = false;
+	committed = false;
+	ptr = extent_alloc_core(tsdn, arena, new_addr, alloc_size, PAGE,
+	    &zeroed, &committed, arena->dss_prec);
+	extent_init(extent, arena, ptr, alloc_size, alloc_size,
+	    arena_extent_sn_next(arena), false, zeroed, committed, false);
+	if (ptr == NULL || extent_register(tsdn, extent)) {
+		extent_dalloc(tsdn, arena, extent);
+		return (NULL);
+	}
+	/*
+	 * Set the extent as active *after registration so that no gprof-related
+	 * accounting occurs during registration.
+	 */
+	extent_active_set(extent, true);
+
+	leadsize = ALIGNMENT_CEILING((uintptr_t)ptr, PAGE_CEILING(alignment)) -
+	    (uintptr_t)ptr;
+	assert(new_addr == NULL || leadsize == 0);
+	assert(alloc_size >= leadsize + size);
+	trailsize = alloc_size - leadsize - size;
+	if (extent_zeroed_get(extent))
+		*zero = true;
+	if (extent_committed_get(extent))
+		*commit = true;
+
+	/* Split the lead. */
+	if (leadsize != 0) {
+		extent_t *lead = extent;
+		extent = extent_split_wrapper(tsdn, arena, r_extent_hooks, lead,
+		    leadsize, leadsize, size + trailsize, usize + trailsize);
+		if (extent == NULL) {
+			extent_deregister(tsdn, lead);
+			extent_leak(tsdn, arena, r_extent_hooks, false, lead);
+			return (NULL);
+		}
+		extent_retain(tsdn, arena, r_extent_hooks, lead);
+	}
+
+	/* Split the trail. */
+	if (trailsize != 0) {
+		extent_t *trail = extent_split_wrapper(tsdn, arena,
+		    r_extent_hooks, extent, size, usize, trailsize, trailsize);
+		if (trail == NULL) {
+			extent_deregister(tsdn, extent);
+			extent_leak(tsdn, arena, r_extent_hooks, false, extent);
+			return (NULL);
+		}
+		extent_retain(tsdn, arena, r_extent_hooks, trail);
+	} else if (leadsize == 0) {
+		/*
+		 * Splitting causes usize to be set as a side effect, but no
+		 * splitting occurred.
+		 */
+		extent_usize_set(extent, usize);
+	}
+
+	if (*commit && !extent_committed_get(extent)) {
+		if (extent_commit_wrapper(tsdn, arena, r_extent_hooks, extent,
+		    0, extent_size_get(extent))) {
+			extent_retain(tsdn, arena, r_extent_hooks, extent);
+			return (NULL);
+		}
+		extent_zeroed_set(extent, true);
+	}
+
+	if (config_prof) {
+		/* Adjust gprof stats now that extent is final size. */
+		extent_gprof_add(tsdn, extent);
+	}
+	if (pad != 0)
+		extent_addr_randomize(tsdn, extent, alignment);
+	if (slab) {
+		rtree_ctx_t rtree_ctx_fallback;
+		rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn,
+		    &rtree_ctx_fallback);
+
+		extent_slab_set(extent, true);
+		extent_interior_register(tsdn, rtree_ctx, extent);
+	}
+	if (*zero && !extent_zeroed_get(extent))
+		memset(extent_addr_get(extent), 0, extent_usize_get(extent));
+	if (arena->extent_grow_next + 1 < NPSIZES)
+		arena->extent_grow_next++;
+	return (extent);
+}
+
 static extent_t *
 extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
@@ -669,6 +816,12 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
 	if (extent != NULL && config_stats) {
 		size_t size = usize + pad;
 		arena->stats.retained -= size;
+		if (config_prof)
+			extent_gprof_add(tsdn, extent);
+	}
+	if (!config_munmap && extent == NULL) {
+		extent = extent_grow_retained(tsdn, arena, r_extent_hooks,
+		    new_addr, usize, pad, alignment, zero, commit, slab);
 	}
 
 	return (extent);
@@ -909,6 +1062,8 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 
 	if (config_stats)
 		arena->stats.retained += extent_size_get(extent);
+	if (config_prof)
+		extent_gprof_sub(tsdn, extent);
 
 	extent_record(tsdn, arena, r_extent_hooks, arena->extents_retained,
 	    false, extent);

From 884fa22b8c8a23831eb4090fa92d191d6e3e394e Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 3 Dec 2016 12:40:36 -0800
Subject: [PATCH 0567/2608] Rename arena_decay_t's ndirty to nunpurged.

---
 include/jemalloc/internal/arena.h | 2 +-
 src/arena.c                       | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index afa8984d..6532b08a 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -106,7 +106,7 @@ struct arena_decay_s {
 	 * arena->ndirty to determine how many dirty pages, if any, were
 	 * generated.
 	 */
-	size_t			ndirty;
+	size_t			nunpurged;
 	/*
 	 * Trailing log of how many unused dirty pages were generated during
 	 * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last
diff --git a/src/arena.c b/src/arena.c
index c3587044..0eb6150b 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -431,8 +431,8 @@ arena_decay_backlog_npages_limit(const arena_t *arena)
 static void
 arena_decay_backlog_update_last(arena_t *arena)
 {
-	size_t ndirty_delta = (arena->ndirty > arena->decay.ndirty) ?
-	    arena->ndirty - arena->decay.ndirty : 0;
+	size_t ndirty_delta = (arena->ndirty > arena->decay.nunpurged) ?
+	    arena->ndirty - arena->decay.nunpurged : 0;
 	arena->decay.backlog[SMOOTHSTEP_NSTEPS-1] = ndirty_delta;
 }
 
@@ -491,7 +491,7 @@ arena_decay_epoch_advance_purge(tsdn_t *tsdn, arena_t *arena)
 
 	if (arena->ndirty > ndirty_limit)
 		arena_purge_to_limit(tsdn, arena, ndirty_limit);
-	arena->decay.ndirty = arena->ndirty;
+	arena->decay.nunpurged = arena->ndirty;
 }
 
 static void
@@ -516,7 +516,7 @@ arena_decay_init(arena_t *arena, ssize_t decay_time)
 	nstime_update(&arena->decay.epoch);
 	arena->decay.jitter_state = (uint64_t)(uintptr_t)arena;
 	arena_decay_deadline_init(arena);
-	arena->decay.ndirty = arena->ndirty;
+	arena->decay.nunpurged = arena->ndirty;
 	memset(arena->decay.backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t));
 }
 

From a6e86810d83aba0d94d0f6423ed09e8e6e0909fa Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 3 Dec 2016 15:38:25 -0800
Subject: [PATCH 0568/2608] Refactor purging and splitting/merging.

Split purging into lazy and forced variants.  Use the forced variant for
zeroing dss.

Add support for NULL function pointers as an opt-out mechanism for the
dalloc, commit, decommit, purge_lazy, purge_forced, split, and merge
fields of extent_hooks_t.

Add short-circuiting checks in large_ralloc_no_move_{shrink,expand}() so
that no attempt is made if splitting/merging is not supported.

This resolves #268.
---
 doc/jemalloc.xml.in                           |  29 ++--
 include/jemalloc/internal/arena.h             |   4 +-
 include/jemalloc/internal/extent.h            |   5 +-
 include/jemalloc/internal/pages.h             |  35 ++++-
 include/jemalloc/internal/private_symbols.txt |   6 +-
 include/jemalloc/jemalloc_typedefs.h.in       |   3 +-
 src/extent.c                                  | 134 ++++++++++++++----
 src/extent_dss.c                              |  14 +-
 src/large.c                                   |   6 +
 src/pages.c                                   |  43 +++---
 test/integration/extent.c                     |  63 +++++---
 11 files changed, 258 insertions(+), 84 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 250a2a83..990aacf3 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1512,7 +1512,8 @@ struct extent_hooks_s {
 	extent_dalloc_t		*dalloc;
 	extent_commit_t		*commit;
 	extent_decommit_t	*decommit;
-	extent_purge_t		*purge;
+	extent_purge_t		*purge_lazy;
+	extent_purge_t		*purge_forced;
 	extent_split_t		*split;
 	extent_merge_t		*merge;
 };]]></programlisting>
@@ -1522,13 +1523,12 @@ struct extent_hooks_s {
         mapped committed memory, in the simplest case followed by deallocation.
         However, there are performance and platform reasons to retain extents
         for later reuse.  Cleanup attempts cascade from deallocation to decommit
-        to purging, which gives the extent management functions opportunities to
-        reject the most permanent cleanup operations in favor of less permanent
-        (and often less costly) operations.  The extent splitting and merging
-        operations can also be opted out of, but this is mainly intended to
-        support platforms on which virtual memory mappings provided by the
-        operating system kernel do not automatically coalesce and split, e.g.
-        Windows.</para>
+        to lazy purging to forced purging, which gives the extent management
+        functions opportunities to reject the most permanent cleanup operations
+        in favor of less permanent (and often less costly) operations.  All
+        operations except allocation can be universally opted out of by setting
+        the hook pointers to <constant>NULL</constant>, or selectively opted out
+        of by returning failure.</para>
 
         <funcsynopsis><funcprototype>
           <funcdef>typedef void *<function>(extent_alloc_t)</function></funcdef>
@@ -1634,21 +1634,24 @@ struct extent_hooks_s {
           <funcdef>typedef bool <function>(extent_purge_t)</function></funcdef>
           <paramdef>extent_hooks_t *<parameter>extent_hooks</parameter></paramdef>
           <paramdef>void *<parameter>addr</parameter></paramdef>
-          <paramdef>size_t<parameter>size</parameter></paramdef>
+          <paramdef>size_t <parameter>size</parameter></paramdef>
           <paramdef>size_t <parameter>offset</parameter></paramdef>
           <paramdef>size_t <parameter>length</parameter></paramdef>
           <paramdef>unsigned <parameter>arena_ind</parameter></paramdef>
         </funcprototype></funcsynopsis>
         <literallayout></literallayout>
         <para>An extent purge function conforms to the
-        <type>extent_purge_t</type> type and optionally discards physical pages
+        <type>extent_purge_t</type> type and discards physical pages
         within the virtual memory mapping associated with an extent at given
         <parameter>addr</parameter> and <parameter>size</parameter> at
         <parameter>offset</parameter> bytes, extending for
         <parameter>length</parameter> on behalf of arena
-        <parameter>arena_ind</parameter>, returning false if pages within the
-        purged virtual memory range will be zero-filled the next time they are
-        accessed.</para>
+        <parameter>arena_ind</parameter>.  A lazy extent purge function can
+        delay purging indefinitely and leave the pages within the purged virtual
+        memory range in an indeterminite state, whereas a forced extent purge
+        function immediately purges, and the pages within the virtual memory
+        range will be zero-filled the next time they are accessed.  If the
+        function returns true, this indicates failure to purge.</para>
 
         <funcsynopsis><funcprototype>
           <funcdef>typedef bool <function>(extent_split_t)</function></funcdef>
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 6532b08a..a8c2976c 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -198,8 +198,8 @@ struct arena_s {
 
 	/*
 	 * Current count of pages within unused extents that are potentially
-	 * dirty, and for which madvise(... MADV_DONTNEED) has not been called.
-	 * By tracking this, we can institute a limit on how much dirty unused
+	 * dirty, and for which pages_purge_*() has not been called.  By
+	 * tracking this, we can institute a limit on how much dirty unused
 	 * memory is mapped for each arena.
 	 */
 	size_t			ndirty;
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index d5690c08..33b85145 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -133,7 +133,10 @@ bool	extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
 bool	extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
     size_t length);
-bool	extent_purge_wrapper(tsdn_t *tsdn, arena_t *arena,
+bool	extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    size_t length);
+bool	extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
     size_t length);
 extent_t	*extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index 034a8aac..98e4f38a 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -24,6 +24,23 @@
 #define	HUGEPAGE_CEILING(s)						\
 	(((s) + HUGEPAGE_MASK) & ~HUGEPAGE_MASK)
 
+/* PAGES_CAN_PURGE_LAZY is defined if lazy purging is supported. */
+#if defined(_WIN32) || defined(JEMALLOC_PURGE_MADVISE_FREE)
+#  define PAGES_CAN_PURGE_LAZY
+#endif
+/*
+ * PAGES_CAN_PURGE_FORCED is defined if forced purging is supported.
+ *
+ * The only supported way to hard-purge on Windows is to decommit and then
+ * re-commit, but doing so is racy, and if re-commit fails it's a pain to
+ * propagate the "poisoned" memory state.  Since we typically decommit as the
+ * next step after purging on Windows anyway, there's no point in adding such
+ * complexity.
+ */
+#if !defined(_WIN32) && defined(JEMALLOC_PURGE_MADVISE_DONTNEED)
+#  define PAGES_CAN_PURGE_FORCED
+#endif
+
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
@@ -32,13 +49,29 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
+static const bool pages_can_purge_lazy =
+#ifdef PAGES_CAN_PURGE_LAZY
+    true
+#else
+    false
+#endif
+    ;
+static const bool pages_can_purge_forced =
+#ifdef PAGES_CAN_PURGE_FORCED
+    true
+#else
+    false
+#endif
+    ;
+
 void	*pages_map(void *addr, size_t size, bool *commit);
 void	pages_unmap(void *addr, size_t size);
 void	*pages_trim(void *addr, size_t alloc_size, size_t leadsize,
     size_t size, bool *commit);
 bool	pages_commit(void *addr, size_t size);
 bool	pages_decommit(void *addr, size_t size);
-bool	pages_purge(void *addr, size_t size);
+bool	pages_purge_lazy(void *addr, size_t size);
+bool	pages_purge_forced(void *addr, size_t size);
 bool	pages_huge(void *addr, size_t size);
 bool	pages_nohuge(void *addr, size_t size);
 void	pages_boot(void);
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 1facc928..7aa622fb 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -179,7 +179,8 @@ extent_merge_wrapper
 extent_past_get
 extent_prof_tctx_get
 extent_prof_tctx_set
-extent_purge_wrapper
+extent_purge_forced_wrapper
+extent_purge_lazy_wrapper
 extent_retained_get
 extent_ring_insert
 extent_ring_remove
@@ -327,7 +328,8 @@ pages_decommit
 pages_huge
 pages_map
 pages_nohuge
-pages_purge
+pages_purge_forced
+pages_purge_lazy
 pages_trim
 pages_unmap
 pind2sz
diff --git a/include/jemalloc/jemalloc_typedefs.h.in b/include/jemalloc/jemalloc_typedefs.h.in
index 1049d7c7..91b5a8dc 100644
--- a/include/jemalloc/jemalloc_typedefs.h.in
+++ b/include/jemalloc/jemalloc_typedefs.h.in
@@ -61,7 +61,8 @@ struct extent_hooks_s {
 	extent_dalloc_t		*dalloc;
 	extent_commit_t		*commit;
 	extent_decommit_t	*decommit;
-	extent_purge_t		*purge;
+	extent_purge_t		*purge_lazy;
+	extent_purge_t		*purge_forced;
 	extent_split_t		*split;
 	extent_merge_t		*merge;
 };
diff --git a/src/extent.c b/src/extent.c
index 586e8d33..827a9213 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -15,23 +15,47 @@ static bool	extent_commit_default(extent_hooks_t *extent_hooks, void *addr,
     size_t size, size_t offset, size_t length, unsigned arena_ind);
 static bool	extent_decommit_default(extent_hooks_t *extent_hooks,
     void *addr, size_t size, size_t offset, size_t length, unsigned arena_ind);
-static bool	extent_purge_default(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, size_t offset, size_t length, unsigned arena_ind);
+#ifdef PAGES_CAN_PURGE_LAZY
+static bool	extent_purge_lazy_default(extent_hooks_t *extent_hooks,
+    void *addr, size_t size, size_t offset, size_t length, unsigned arena_ind);
+#endif
+#ifdef PAGES_CAN_PURGE_FORCED
+static bool	extent_purge_forced_default(extent_hooks_t *extent_hooks,
+    void *addr, size_t size, size_t offset, size_t length, unsigned arena_ind);
+#endif
+#ifdef JEMALLOC_MAPS_COALESCE
 static bool	extent_split_default(extent_hooks_t *extent_hooks, void *addr,
     size_t size, size_t size_a, size_t size_b, bool committed,
     unsigned arena_ind);
 static bool	extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a,
     size_t size_a, void *addr_b, size_t size_b, bool committed,
     unsigned arena_ind);
+#endif
 
 const extent_hooks_t	extent_hooks_default = {
 	extent_alloc_default,
 	extent_dalloc_default,
 	extent_commit_default,
-	extent_decommit_default,
-	extent_purge_default,
+	extent_decommit_default
+#ifdef PAGES_CAN_PURGE_LAZY
+	,
+	extent_purge_lazy_default
+#else
+	,
+	NULL
+#endif
+#ifdef PAGES_CAN_PURGE_FORCED
+	,
+	extent_purge_forced_default
+#else
+	,
+	NULL
+#endif
+#ifdef JEMALLOC_MAPS_COALESCE
+	,
 	extent_split_default,
 	extent_merge_default
+#endif
 };
 
 /* Used exclusively for gdump triggering. */
@@ -395,8 +419,11 @@ extent_leak(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	 * that this is only a virtual memory leak.
 	 */
 	if (cache) {
-		extent_purge_wrapper(tsdn, arena, r_extent_hooks, extent, 0,
-		    extent_size_get(extent));
+		if (extent_purge_lazy_wrapper(tsdn, arena, r_extent_hooks,
+		    extent, 0, extent_size_get(extent))) {
+			extent_purge_forced_wrapper(tsdn, arena, r_extent_hooks,
+			    extent, 0, extent_size_get(extent));
+		}
 	}
 	extent_dalloc(tsdn, arena, extent);
 }
@@ -1023,7 +1050,7 @@ void
 extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent)
 {
-	bool err;
+	bool err, zeroed;
 
 	assert(extent_base_get(extent) != NULL);
 	assert(extent_size_get(extent) != 0);
@@ -1041,9 +1068,10 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 		err = extent_dalloc_default_impl(extent_base_get(extent),
 		    extent_size_get(extent));
 	} else {
-		err = (*r_extent_hooks)->dalloc(*r_extent_hooks,
+		err = ((*r_extent_hooks)->dalloc == NULL ||
+		    (*r_extent_hooks)->dalloc(*r_extent_hooks,
 		    extent_base_get(extent), extent_size_get(extent),
-		    extent_committed_get(extent), arena->ind);
+		    extent_committed_get(extent), arena->ind));
 	}
 
 	if (!err) {
@@ -1052,13 +1080,24 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 	}
 	extent_reregister(tsdn, extent);
 	/* Try to decommit; purge if that fails. */
-	if (extent_committed_get(extent)) {
-		extent_decommit_wrapper(tsdn, arena, r_extent_hooks, extent,
-		    0, extent_size_get(extent));
-	}
-	extent_zeroed_set(extent, !extent_committed_get(extent) ||
-	    !(*r_extent_hooks)->purge(*r_extent_hooks, extent_base_get(extent),
-	    extent_size_get(extent), 0, extent_size_get(extent), arena->ind));
+	if (!extent_committed_get(extent))
+		zeroed = true;
+	else if (!extent_decommit_wrapper(tsdn, arena, r_extent_hooks, extent,
+	    0, extent_size_get(extent)))
+		zeroed = true;
+	else if ((*r_extent_hooks)->purge_lazy != NULL &&
+	    !(*r_extent_hooks)->purge_lazy(*r_extent_hooks,
+	    extent_base_get(extent), extent_size_get(extent), 0,
+	    extent_size_get(extent), arena->ind))
+		zeroed = false;
+	else if ((*r_extent_hooks)->purge_forced != NULL &&
+	    !(*r_extent_hooks)->purge_forced(*r_extent_hooks,
+	    extent_base_get(extent), extent_size_get(extent), 0,
+	    extent_size_get(extent), arena->ind))
+		zeroed = true;
+	else
+		zeroed = false;
+	extent_zeroed_set(extent, zeroed);
 
 	if (config_stats)
 		arena->stats.retained += extent_size_get(extent);
@@ -1088,9 +1127,9 @@ extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
 	bool err;
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
-	err = (*r_extent_hooks)->commit(*r_extent_hooks,
-	    extent_base_get(extent), extent_size_get(extent), offset, length,
-	    arena->ind);
+	err = ((*r_extent_hooks)->commit == NULL ||
+	    (*r_extent_hooks)->commit(*r_extent_hooks, extent_base_get(extent),
+	    extent_size_get(extent), offset, length, arena->ind));
 	extent_committed_set(extent, extent_committed_get(extent) || !err);
 	return (err);
 }
@@ -1115,15 +1154,17 @@ extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 
-	err = (*r_extent_hooks)->decommit(*r_extent_hooks,
+	err = ((*r_extent_hooks)->decommit == NULL ||
+	    (*r_extent_hooks)->decommit(*r_extent_hooks,
 	    extent_base_get(extent), extent_size_get(extent), offset, length,
-	    arena->ind);
+	    arena->ind));
 	extent_committed_set(extent, extent_committed_get(extent) && err);
 	return (err);
 }
 
+#ifdef PAGES_CAN_PURGE_LAZY
 static bool
-extent_purge_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
+extent_purge_lazy_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind)
 {
 
@@ -1133,22 +1174,55 @@ extent_purge_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	assert(length != 0);
 	assert((length & PAGE_MASK) == 0);
 
-	return (pages_purge((void *)((uintptr_t)addr + (uintptr_t)offset),
+	return (pages_purge_lazy((void *)((uintptr_t)addr + (uintptr_t)offset),
 	    length));
 }
+#endif
 
 bool
-extent_purge_wrapper(tsdn_t *tsdn, arena_t *arena,
+extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
     size_t length)
 {
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
-	return ((*r_extent_hooks)->purge(*r_extent_hooks,
+	return ((*r_extent_hooks)->purge_lazy == NULL ||
+	    (*r_extent_hooks)->purge_lazy(*r_extent_hooks,
 	    extent_base_get(extent), extent_size_get(extent), offset, length,
 	    arena->ind));
 }
 
+#ifdef PAGES_CAN_PURGE_FORCED
+static bool
+extent_purge_forced_default(extent_hooks_t *extent_hooks, void *addr,
+    size_t size, size_t offset, size_t length, unsigned arena_ind)
+{
+
+	assert(extent_hooks == &extent_hooks_default);
+	assert(addr != NULL);
+	assert((offset & PAGE_MASK) == 0);
+	assert(length != 0);
+	assert((length & PAGE_MASK) == 0);
+
+	return (pages_purge_forced((void *)((uintptr_t)addr +
+	    (uintptr_t)offset), length));
+}
+#endif
+
+bool
+extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    size_t length)
+{
+
+	extent_hooks_assure_initialized(arena, r_extent_hooks);
+	return ((*r_extent_hooks)->purge_forced == NULL ||
+	    (*r_extent_hooks)->purge_forced(*r_extent_hooks,
+	    extent_base_get(extent), extent_size_get(extent), offset, length,
+	    arena->ind));
+}
+
+#ifdef JEMALLOC_MAPS_COALESCE
 static bool
 extent_split_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t size_a, size_t size_b, bool committed, unsigned arena_ind)
@@ -1160,6 +1234,7 @@ extent_split_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
 		return (true);
 	return (false);
 }
+#endif
 
 extent_t *
 extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
@@ -1175,6 +1250,9 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 
+	if ((*r_extent_hooks)->split == NULL)
+		return (NULL);
+
 	trail = extent_alloc(tsdn, arena);
 	if (trail == NULL)
 		goto label_error_a;
@@ -1237,6 +1315,7 @@ extent_merge_default_impl(void *addr_a, void *addr_b)
 	return (false);
 }
 
+#ifdef JEMALLOC_MAPS_COALESCE
 static bool
 extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
     void *addr_b, size_t size_b, bool committed, unsigned arena_ind)
@@ -1246,6 +1325,7 @@ extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
 
 	return (extent_merge_default_impl(addr_a, addr_b));
 }
+#endif
 
 bool
 extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
@@ -1257,6 +1337,10 @@ extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
 	rtree_elm_t *a_elm_a, *a_elm_b, *b_elm_a, *b_elm_b;
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
+
+	if ((*r_extent_hooks)->merge == NULL)
+		return (true);
+
 	if (*r_extent_hooks == &extent_hooks_default) {
 		/* Call directly to propagate tsdn. */
 		err = extent_merge_default_impl(extent_base_get(a),
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 1169d496..0f0c689b 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -168,10 +168,20 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					extent_dalloc_gap(tsdn, arena, gap);
 				else
 					extent_dalloc(tsdn, arena, gap);
-				if (*zero)
-					memset(ret, 0, size);
 				if (!*commit)
 					*commit = pages_decommit(ret, size);
+				if (*zero && *commit) {
+					extent_hooks_t *extent_hooks =
+					    EXTENT_HOOKS_INITIALIZER;
+					extent_t extent;
+
+					extent_init(&extent, arena, ret, size,
+					    size, 0, true, false, true, false);
+					if (extent_purge_forced_wrapper(tsdn,
+					    arena, &extent_hooks, &extent, 0,
+					    size))
+						memset(ret, 0, size);
+				}
 				return (ret);
 			}
 			/*
diff --git a/src/large.c b/src/large.c
index 1bae9399..ec22e64c 100644
--- a/src/large.c
+++ b/src/large.c
@@ -110,6 +110,9 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize)
 
 	assert(oldusize > usize);
 
+	if (extent_hooks->split == NULL)
+		return (true);
+
 	/* Split excess pages. */
 	if (diff != 0) {
 		extent_t *trail = extent_split_wrapper(tsdn, arena,
@@ -142,6 +145,9 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 	size_t trailsize = usize - extent_usize_get(extent);
 	extent_t *trail;
 
+	if (extent_hooks->merge == NULL)
+		return (true);
+
 	if ((trail = arena_extent_cache_alloc(tsdn, arena, &extent_hooks,
 	    extent_past_get(extent), trailsize, CACHELINE, &is_zeroed_trail)) ==
 	    NULL) {
diff --git a/src/pages.c b/src/pages.c
index 8bef6fac..d5a0a21c 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -163,33 +163,34 @@ pages_decommit(void *addr, size_t size)
 }
 
 bool
-pages_purge(void *addr, size_t size)
+pages_purge_lazy(void *addr, size_t size)
 {
-	bool unzeroed;
+
+	if (!pages_can_purge_lazy)
+		return (true);
 
 #ifdef _WIN32
 	VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE);
-	unzeroed = true;
-#elif (defined(JEMALLOC_PURGE_MADVISE_FREE) || \
-    defined(JEMALLOC_PURGE_MADVISE_DONTNEED))
-#  if defined(JEMALLOC_PURGE_MADVISE_FREE)
-#    define JEMALLOC_MADV_PURGE MADV_FREE
-#    define JEMALLOC_MADV_ZEROS false
-#  elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED)
-#    define JEMALLOC_MADV_PURGE MADV_DONTNEED
-#    define JEMALLOC_MADV_ZEROS true
-#  else
-#    error No madvise(2) flag defined for purging unused dirty pages
-#  endif
-	int err = madvise(addr, size, JEMALLOC_MADV_PURGE);
-	unzeroed = (!JEMALLOC_MADV_ZEROS || err != 0);
-#  undef JEMALLOC_MADV_PURGE
-#  undef JEMALLOC_MADV_ZEROS
+#elif defined(JEMALLOC_PURGE_MADVISE_FREE)
+	madvise(addr, size, MADV_FREE);
 #else
-	/* Last resort no-op. */
-	unzeroed = true;
+	not_reached();
+#endif
+	return (false);
+}
+
+bool
+pages_purge_forced(void *addr, size_t size)
+{
+
+	if (!pages_can_purge_forced)
+		return (true);
+
+#if defined(JEMALLOC_PURGE_MADVISE_DONTNEED)
+	return (madvise(addr, size, MADV_DONTNEED) != 0);
+#else
+	not_reached();
 #endif
-	return (unzeroed);
 }
 
 bool
diff --git a/test/integration/extent.c b/test/integration/extent.c
index 2af20ce2..b0fc52d6 100644
--- a/test/integration/extent.c
+++ b/test/integration/extent.c
@@ -13,7 +13,9 @@ static bool	extent_commit(extent_hooks_t *extent_hooks, void *addr,
     size_t size, size_t offset, size_t length, unsigned arena_ind);
 static bool	extent_decommit(extent_hooks_t *extent_hooks, void *addr,
     size_t size, size_t offset, size_t length, unsigned arena_ind);
-static bool	extent_purge(extent_hooks_t *extent_hooks, void *addr,
+static bool	extent_purge_lazy(extent_hooks_t *extent_hooks, void *addr,
+    size_t size, size_t offset, size_t length, unsigned arena_ind);
+static bool	extent_purge_forced(extent_hooks_t *extent_hooks, void *addr,
     size_t size, size_t offset, size_t length, unsigned arena_ind);
 static bool	extent_split(extent_hooks_t *extent_hooks, void *addr,
     size_t size, size_t size_a, size_t size_b, bool committed,
@@ -27,7 +29,8 @@ static extent_hooks_t hooks = {
 	extent_dalloc,
 	extent_commit,
 	extent_decommit,
-	extent_purge,
+	extent_purge_lazy,
+	extent_purge_forced,
 	extent_split,
 	extent_merge
 };
@@ -42,7 +45,8 @@ static bool did_alloc;
 static bool did_dalloc;
 static bool did_commit;
 static bool did_decommit;
-static bool did_purge;
+static bool did_purge_lazy;
+static bool did_purge_forced;
 static bool tried_split;
 static bool did_split;
 static bool did_merge;
@@ -129,7 +133,7 @@ extent_decommit(extent_hooks_t *extent_hooks, void *addr, size_t size,
 }
 
 static bool
-extent_purge(extent_hooks_t *extent_hooks, void *addr, size_t size,
+extent_purge_lazy(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind)
 {
 
@@ -138,9 +142,29 @@ extent_purge(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	    offset, length, arena_ind);
 	assert_ptr_eq(extent_hooks, new_hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	assert_ptr_eq(extent_hooks->purge, extent_purge, "Wrong hook function");
-	did_purge = true;
-	return (old_hooks->purge(old_hooks, addr, size, offset, length,
+	assert_ptr_eq(extent_hooks->purge_lazy, extent_purge_lazy,
+	    "Wrong hook function");
+	did_purge_lazy = true;
+	return (old_hooks->purge_lazy == NULL ||
+	    old_hooks->purge_lazy(old_hooks, addr, size, offset, length,
+	    arena_ind));
+}
+
+static bool
+extent_purge_forced(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    size_t offset, size_t length, unsigned arena_ind)
+{
+
+	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
+	    "length=%zu arena_ind=%u)\n", __func__, extent_hooks, addr, size,
+	    offset, length, arena_ind);
+	assert_ptr_eq(extent_hooks, new_hooks,
+	    "extent_hooks should be same as pointer used to set hooks");
+	assert_ptr_eq(extent_hooks->purge_forced, extent_purge_forced,
+	    "Wrong hook function");
+	did_purge_forced = true;
+	return (old_hooks->purge_forced == NULL ||
+	    old_hooks->purge_forced(old_hooks, addr, size, offset, length,
 	    arena_ind));
 }
 
@@ -158,8 +182,8 @@ extent_split(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	    "extent_hooks should be same as pointer used to set hooks");
 	assert_ptr_eq(extent_hooks->split, extent_split, "Wrong hook function");
 	tried_split = true;
-	err = old_hooks->split(old_hooks, addr, size, size_a, size_b, committed,
-	    arena_ind);
+	err = (old_hooks->split == NULL || old_hooks->split(old_hooks, addr,
+	    size, size_a, size_b, committed, arena_ind));
 	did_split = !err;
 	return (err);
 }
@@ -177,8 +201,8 @@ extent_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
 	assert_ptr_eq(extent_hooks, new_hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
 	assert_ptr_eq(extent_hooks->merge, extent_merge, "Wrong hook function");
-	err = old_hooks->merge(old_hooks, addr_a, size_a, addr_b, size_b,
-	    committed, arena_ind);
+	err = (old_hooks->merge == NULL || old_hooks->merge(old_hooks, addr_a,
+	    size_a, addr_b, size_b, committed, arena_ind));
 	did_merge = !err;
 	return (err);
 }
@@ -216,7 +240,10 @@ TEST_BEGIN(test_extent)
 	    "Unexpected commit error");
 	assert_ptr_ne(old_hooks->decommit, extent_decommit,
 	    "Unexpected decommit error");
-	assert_ptr_ne(old_hooks->purge, extent_purge, "Unexpected purge error");
+	assert_ptr_ne(old_hooks->purge_lazy, extent_purge_lazy,
+	    "Unexpected purge_lazy error");
+	assert_ptr_ne(old_hooks->purge_forced, extent_purge_forced,
+	    "Unexpected purge_forced error");
 	assert_ptr_ne(old_hooks->split, extent_split, "Unexpected split error");
 	assert_ptr_ne(old_hooks->merge, extent_merge, "Unexpected merge error");
 
@@ -240,7 +267,8 @@ TEST_BEGIN(test_extent)
 	assert_ptr_not_null(p, "Unexpected mallocx() error");
 	did_dalloc = false;
 	did_decommit = false;
-	did_purge = false;
+	did_purge_lazy = false;
+	did_purge_forced = false;
 	tried_split = false;
 	did_split = false;
 	xallocx_success_a = (xallocx(p, large0, 0, flags) == large0);
@@ -249,7 +277,8 @@ TEST_BEGIN(test_extent)
 	if (xallocx_success_a) {
 		assert_true(did_dalloc, "Expected dalloc");
 		assert_false(did_decommit, "Unexpected decommit");
-		assert_true(did_purge, "Expected purge");
+		assert_true(did_purge_lazy || did_purge_forced,
+		    "Expected purge");
 	}
 	assert_true(tried_split, "Expected split");
 	dallocx(p, flags);
@@ -300,8 +329,10 @@ TEST_BEGIN(test_extent)
 	    "Unexpected commit error");
 	assert_ptr_eq(old_hooks->decommit, orig_hooks->decommit,
 	    "Unexpected decommit error");
-	assert_ptr_eq(old_hooks->purge, orig_hooks->purge,
-	    "Unexpected purge error");
+	assert_ptr_eq(old_hooks->purge_lazy, orig_hooks->purge_lazy,
+	    "Unexpected purge_lazy error");
+	assert_ptr_eq(old_hooks->purge_forced, orig_hooks->purge_forced,
+	    "Unexpected purge_forced error");
 	assert_ptr_eq(old_hooks->split, orig_hooks->split,
 	    "Unexpected split error");
 	assert_ptr_eq(old_hooks->merge, orig_hooks->merge,

From a0dd3a4483e2e72ee80e70424a6522f873f2c7ff Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 22 Dec 2016 16:39:10 -0600
Subject: [PATCH 0569/2608] Implement per arena base allocators.

Add/rename related mallctls:
- Add stats.arenas.<i>.base .
- Rename stats.arenas.<i>.metadata to stats.arenas.<i>.internal .
- Add stats.arenas.<i>.resident .

Modify the arenas.extend mallctl to take an optional (extent_hooks_t *)
argument so that it is possible for all base allocations to be serviced
by the specified extent hooks.

This resolves #463.
---
 Makefile.in                                   |   1 +
 doc/jemalloc.xml.in                           |  63 ++-
 include/jemalloc/internal/arena.h             |  44 +-
 include/jemalloc/internal/base.h              |  76 ++-
 .../jemalloc/internal/jemalloc_internal.h.in  |  53 ++-
 include/jemalloc/internal/private_symbols.txt |  13 +-
 include/jemalloc/internal/stats.h             |   5 +-
 src/arena.c                                   |  50 +-
 src/base.c                                    | 439 ++++++++++++------
 src/ctl.c                                     |  50 +-
 src/extent.c                                  |  36 +-
 src/jemalloc.c                                |  35 +-
 src/prof.c                                    |   6 +-
 src/rtree.c                                   |   3 +-
 src/stats.c                                   |  27 +-
 src/tcache.c                                  |   8 +-
 test/integration/extent.c                     | 115 +++--
 test/unit/base.c                              | 274 +++++++++++
 18 files changed, 957 insertions(+), 341 deletions(-)
 create mode 100644 test/unit/base.c

diff --git a/Makefile.in b/Makefile.in
index 052688bd..d8704923 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -156,6 +156,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/a0.c \
 	$(srcroot)test/unit/arena_reset.c \
 	$(srcroot)test/unit/atomic.c \
+	$(srcroot)test/unit/base.c \
 	$(srcroot)test/unit/bitmap.c \
 	$(srcroot)test/unit/ckh.c \
 	$(srcroot)test/unit/decay.c \
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 990aacf3..5923481a 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1500,9 +1500,9 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         to control allocation for arenas created via <link
         linkend="arenas.extend"><mallctl>arenas.extend</mallctl></link> such
         that all extents originate from an application-supplied extent allocator
-        (by setting custom extent hook functions just after arena creation), but
-        the automatically created arenas may have already created extents prior
-        to the application having an opportunity to take over extent
+        (by specifying the custom extent hook functions during arena creation),
+        but the automatically created arenas will have already created extents
+        prior to the application having an opportunity to take over extent
         allocation.</para>
 
         <programlisting language="C"><![CDATA[
@@ -1832,11 +1832,12 @@ struct extent_hooks_s {
       <varlistentry id="arenas.extend">
         <term>
           <mallctl>arenas.extend</mallctl>
-          (<type>unsigned</type>)
-          <literal>r-</literal>
+          (<type>unsigned</type>, <type>extent_hooks_t *</type>)
+          <literal>rw</literal>
         </term>
-        <listitem><para>Extend the array of arenas by appending a new arena,
-        and returning the new arena index.</para></listitem>
+        <listitem><para>Extend the array of arenas by appending a new arena with
+        optionally specified extent hooks, and returning the new arena
+        index.</para></listitem>
       </varlistentry>
 
       <varlistentry id="prof.thread_active_init">
@@ -1976,9 +1977,11 @@ struct extent_hooks_s {
           [<option>--enable-stats</option>]
         </term>
         <listitem><para>Total number of bytes dedicated to metadata, which
-        comprise base allocations used for bootstrap-sensitive internal
-        allocator data structures and internal allocations (see <link
-        linkend="stats.arenas.i.metadata"><mallctl>stats.arenas.&lt;i&gt;.metadata</mallctl></link>).</para></listitem>
+        comprise base allocations used for bootstrap-sensitive allocator
+        metadata structures (see <link
+        linkend="stats.arenas.i.base"><mallctl>stats.arenas.&lt;i&gt;.base</mallctl></link>)
+        and internal allocations (see <link
+        linkend="stats.arenas.i.internal"><mallctl>stats.arenas.&lt;i&gt;.internal</mallctl></link>).</para></listitem>
       </varlistentry>
 
       <varlistentry id="stats.resident">
@@ -2114,9 +2117,21 @@ struct extent_hooks_s {
         details.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="stats.arenas.i.metadata">
+      <varlistentry id="stats.arenas.i.base">
         <term>
-          <mallctl>stats.arenas.&lt;i&gt;.metadata</mallctl>
+          <mallctl>stats.arenas.&lt;i&gt;.base</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>
+        Number of bytes dedicated to bootstrap-sensitive allocator metadata
+        structures.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="stats.arenas.i.internal">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.internal</mallctl>
           (<type>size_t</type>)
           <literal>r-</literal>
           [<option>--enable-stats</option>]
@@ -2124,13 +2139,23 @@ struct extent_hooks_s {
         <listitem><para>Number of bytes dedicated to internal allocations.
         Internal allocations differ from application-originated allocations in
         that they are for internal use, and that they are omitted from heap
-        profiles.  This statistic is reported separately from <link
-        linkend="stats.metadata"><mallctl>stats.metadata</mallctl></link>
-        because it overlaps with e.g. the <link
-        linkend="stats.allocated"><mallctl>stats.allocated</mallctl></link> and
-        <link linkend="stats.active"><mallctl>stats.active</mallctl></link>
-        statistics, whereas the other metadata statistics do
-        not.</para></listitem>
+        profiles.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="stats.arenas.i.resident">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.resident</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Maximum number of bytes in physically resident data
+        pages mapped by the arena, comprising all pages dedicated to allocator
+        metadata, pages backing active allocations, and unused dirty pages.
+        This is a maximum rather than precise because pages may not actually be
+        physically resident if they correspond to demand-zeroed virtual memory
+        that has not yet been touched.  This is a multiple of the page
+        size.</para></listitem>
       </varlistentry>
 
       <varlistentry id="stats.arenas.i.npurge">
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index a8c2976c..d889852e 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -143,9 +143,6 @@ struct arena_bin_s {
 };
 
 struct arena_s {
-	/* This arena's index within the arenas array. */
-	unsigned		ind;
-
 	/*
 	 * Number of threads currently assigned to this arena, synchronized via
 	 * atomic operations.  Each thread has two distinct assignments, one for
@@ -226,12 +223,6 @@ struct arena_s {
 	/* Protects extents_{cached,retained,dirty}. */
 	malloc_mutex_t		extents_mtx;
 
-	/* User-configurable extent hook functions. */
-	union {
-		extent_hooks_t	*extent_hooks;
-		void		*extent_hooks_pun;
-	};
-
 	/*
 	 * Next extent size class in a growing series to use when satisfying a
 	 * request via the extent hooks (only if !config_munmap).  This limits
@@ -247,6 +238,9 @@ struct arena_s {
 
 	/* bins is used to store heaps of free regions. */
 	arena_bin_t		bins[NBINS];
+
+	/* Base allocator, from which arena metadata are allocated. */
+	base_t			*base;
 };
 
 /* Used in conjunction with tsd for fast arena-related context lookup. */
@@ -337,7 +331,7 @@ unsigned	arena_nthreads_get(arena_t *arena, bool internal);
 void	arena_nthreads_inc(arena_t *arena, bool internal);
 void	arena_nthreads_dec(arena_t *arena, bool internal);
 size_t	arena_extent_sn_next(arena_t *arena);
-arena_t	*arena_new(tsdn_t *tsdn, unsigned ind);
+arena_t	*arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
 void	arena_boot(void);
 void	arena_prefork0(tsdn_t *tsdn, arena_t *arena);
 void	arena_prefork1(tsdn_t *tsdn, arena_t *arena);
@@ -351,9 +345,10 @@ void	arena_postfork_child(tsdn_t *tsdn, arena_t *arena);
 #ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
-void	arena_metadata_add(arena_t *arena, size_t size);
-void	arena_metadata_sub(arena_t *arena, size_t size);
-size_t	arena_metadata_get(arena_t *arena);
+unsigned	arena_ind_get(const arena_t *arena);
+void	arena_internal_add(arena_t *arena, size_t size);
+void	arena_internal_sub(arena_t *arena, size_t size);
+size_t	arena_internal_get(arena_t *arena);
 bool	arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes);
 bool	arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes);
 bool	arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes);
@@ -378,25 +373,32 @@ void	arena_sdalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t size,
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
 #  ifdef JEMALLOC_ARENA_INLINE_A
-JEMALLOC_INLINE void
-arena_metadata_add(arena_t *arena, size_t size)
+JEMALLOC_INLINE unsigned
+arena_ind_get(const arena_t *arena)
 {
 
-	atomic_add_zu(&arena->stats.metadata, size);
+	return (base_ind_get(arena->base));
 }
 
 JEMALLOC_INLINE void
-arena_metadata_sub(arena_t *arena, size_t size)
+arena_internal_add(arena_t *arena, size_t size)
 {
 
-	atomic_sub_zu(&arena->stats.metadata, size);
+	atomic_add_zu(&arena->stats.internal, size);
+}
+
+JEMALLOC_INLINE void
+arena_internal_sub(arena_t *arena, size_t size)
+{
+
+	atomic_sub_zu(&arena->stats.internal, size);
 }
 
 JEMALLOC_INLINE size_t
-arena_metadata_get(arena_t *arena)
+arena_internal_get(arena_t *arena)
 {
 
-	return (atomic_read_zu(&arena->stats.metadata));
+	return (atomic_read_zu(&arena->stats.internal));
 }
 
 JEMALLOC_INLINE bool
@@ -499,7 +501,7 @@ arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks)
 	if (unlikely(tsdn_null(tsdn)))
 		return;
 	tsd = tsdn_tsd(tsdn);
-	decay_ticker = decay_ticker_get(tsd, arena->ind);
+	decay_ticker = decay_ticker_get(tsd, arena_ind_get(arena));
 	if (unlikely(decay_ticker == NULL))
 		return;
 	if (unlikely(ticker_ticks(decay_ticker, nticks)))
diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h
index d6b81e16..a54a5502 100644
--- a/include/jemalloc/internal/base.h
+++ b/include/jemalloc/internal/base.h
@@ -1,25 +1,87 @@
 /******************************************************************************/
 #ifdef JEMALLOC_H_TYPES
 
+typedef struct base_block_s base_block_t;
+typedef struct base_s base_t;
+
 #endif /* JEMALLOC_H_TYPES */
 /******************************************************************************/
 #ifdef JEMALLOC_H_STRUCTS
 
+/* Embedded at the beginning of every block of base-managed virtual memory. */
+struct base_block_s {
+	/* Total size of block's virtual memory mapping. */
+	size_t		size;
+
+	/* Next block in list of base's blocks. */
+	base_block_t	*next;
+
+	/* Tracks unused trailing space. */
+	extent_t	extent;
+};
+
+struct base_s {
+	/* Associated arena's index within the arenas array. */
+	unsigned	ind;
+
+	/* User-configurable extent hook functions. */
+	union {
+		extent_hooks_t	*extent_hooks;
+		void		*extent_hooks_pun;
+	};
+
+	/* Protects base_alloc() and base_stats_get() operations. */
+	malloc_mutex_t	mtx;
+
+	/* Serial number generation state. */
+	size_t		extent_sn_next;
+
+	/* Chain of all blocks associated with base. */
+	base_block_t	*blocks;
+
+	/* Heap of extents that track unused trailing space within blocks. */
+	extent_heap_t	avail[NSIZES];
+
+	/* Stats, only maintained if config_stats. */
+	size_t		allocated;
+	size_t		resident;
+	size_t		mapped;
+};
+
 #endif /* JEMALLOC_H_STRUCTS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
-void	*base_alloc(tsdn_t *tsdn, size_t size);
-void	base_stats_get(tsdn_t *tsdn, size_t *allocated, size_t *resident,
-    size_t *mapped);
-bool	base_boot(void);
-void	base_prefork(tsdn_t *tsdn);
-void	base_postfork_parent(tsdn_t *tsdn);
-void	base_postfork_child(tsdn_t *tsdn);
+base_t	*b0get(void);
+base_t	*base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
+void	base_delete(base_t *base);
+extent_hooks_t	*base_extent_hooks_get(base_t *base);
+extent_hooks_t	*base_extent_hooks_set(base_t *base,
+    extent_hooks_t *extent_hooks);
+void	*base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment);
+void	base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated,
+    size_t *resident, size_t *mapped);
+void	base_prefork(tsdn_t *tsdn, base_t *base);
+void	base_postfork_parent(tsdn_t *tsdn, base_t *base);
+void	base_postfork_child(tsdn_t *tsdn, base_t *base);
+bool	base_boot(tsdn_t *tsdn);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
 #ifdef JEMALLOC_H_INLINES
 
+#ifndef JEMALLOC_ENABLE_INLINE
+unsigned	base_ind_get(const base_t *base);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_BASE_C_))
+JEMALLOC_INLINE unsigned
+base_ind_get(const base_t *base)
+{
+
+	return (base->ind);
+}
+#endif
+
 #endif /* JEMALLOC_H_INLINES */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index bfa84a22..11a27366 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -370,9 +370,9 @@ typedef unsigned szind_t;
 #include "jemalloc/internal/tsd.h"
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/extent.h"
+#include "jemalloc/internal/base.h"
 #include "jemalloc/internal/arena.h"
 #include "jemalloc/internal/bitmap.h"
-#include "jemalloc/internal/base.h"
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/pages.h"
 #include "jemalloc/internal/large.h"
@@ -403,10 +403,10 @@ typedef unsigned szind_t;
 #include "jemalloc/internal/arena.h"
 #undef JEMALLOC_ARENA_STRUCTS_A
 #include "jemalloc/internal/extent.h"
+#include "jemalloc/internal/base.h"
 #define	JEMALLOC_ARENA_STRUCTS_B
 #include "jemalloc/internal/arena.h"
 #undef JEMALLOC_ARENA_STRUCTS_B
-#include "jemalloc/internal/base.h"
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/pages.h"
 #include "jemalloc/internal/large.h"
@@ -464,7 +464,7 @@ void	*bootstrap_malloc(size_t size);
 void	*bootstrap_calloc(size_t num, size_t size);
 void	bootstrap_free(void *ptr);
 unsigned	narenas_total_get(void);
-arena_t	*arena_init(tsdn_t *tsdn, unsigned ind);
+arena_t	*arena_init(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
 arena_tdata_t	*arena_tdata_get_hard(tsd_t *tsd, unsigned ind);
 arena_t	*arena_choose_hard(tsd_t *tsd, bool internal);
 void	arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind);
@@ -491,8 +491,8 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/bitmap.h"
 #include "jemalloc/internal/extent.h"
-#include "jemalloc/internal/arena.h"
 #include "jemalloc/internal/base.h"
+#include "jemalloc/internal/arena.h"
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/pages.h"
 #include "jemalloc/internal/large.h"
@@ -900,8 +900,10 @@ arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing)
 	ret = arenas[ind];
 	if (unlikely(ret == NULL)) {
 		ret = (arena_t *)atomic_read_p((void **)&arenas[ind]);
-		if (init_if_missing && unlikely(ret == NULL))
-			ret = arena_init(tsdn, ind);
+		if (init_if_missing && unlikely(ret == NULL)) {
+			ret = arena_init(tsdn, ind,
+			    (extent_hooks_t *)&extent_hooks_default);
+		}
 	}
 	return (ret);
 }
@@ -950,17 +952,17 @@ iealloc(tsdn_t *tsdn, const void *ptr)
 arena_t	*iaalloc(tsdn_t *tsdn, const void *ptr);
 size_t	isalloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr);
 void	*iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero,
-    tcache_t *tcache, bool is_metadata, arena_t *arena, bool slow_path);
+    tcache_t *tcache, bool is_internal, arena_t *arena, bool slow_path);
 void	*ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero,
     bool slow_path);
 void	*ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
-    tcache_t *tcache, bool is_metadata, arena_t *arena);
+    tcache_t *tcache, bool is_internal, arena_t *arena);
 void	*ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, arena_t *arena);
 void	*ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero);
 size_t	ivsalloc(tsdn_t *tsdn, const void *ptr);
 void	idalloctm(tsdn_t *tsdn, extent_t *extent, void *ptr, tcache_t *tcache,
-    bool is_metadata, bool slow_path);
+    bool is_internal, bool slow_path);
 void	idalloc(tsd_t *tsd, extent_t *extent, void *ptr);
 void	isdalloct(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t size,
     tcache_t *tcache, bool slow_path);
@@ -1003,17 +1005,18 @@ isalloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr)
 
 JEMALLOC_ALWAYS_INLINE void *
 iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
-    bool is_metadata, arena_t *arena, bool slow_path)
+    bool is_internal, arena_t *arena, bool slow_path)
 {
 	void *ret;
 
 	assert(size != 0);
-	assert(!is_metadata || tcache == NULL);
-	assert(!is_metadata || arena == NULL || arena->ind < narenas_auto);
+	assert(!is_internal || tcache == NULL);
+	assert(!is_internal || arena == NULL || arena_ind_get(arena) <
+	    narenas_auto);
 
 	ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path);
-	if (config_stats && is_metadata && likely(ret != NULL)) {
-		arena_metadata_add(iaalloc(tsdn, ret), isalloc(tsdn,
+	if (config_stats && is_internal && likely(ret != NULL)) {
+		arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn,
 		    iealloc(tsdn, ret), ret));
 	}
 	return (ret);
@@ -1029,19 +1032,20 @@ ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path)
 
 JEMALLOC_ALWAYS_INLINE void *
 ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
-    tcache_t *tcache, bool is_metadata, arena_t *arena)
+    tcache_t *tcache, bool is_internal, arena_t *arena)
 {
 	void *ret;
 
 	assert(usize != 0);
 	assert(usize == sa2u(usize, alignment));
-	assert(!is_metadata || tcache == NULL);
-	assert(!is_metadata || arena == NULL || arena->ind < narenas_auto);
+	assert(!is_internal || tcache == NULL);
+	assert(!is_internal || arena == NULL || arena_ind_get(arena) <
+	    narenas_auto);
 
 	ret = arena_palloc(tsdn, arena, usize, alignment, zero, tcache);
 	assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret);
-	if (config_stats && is_metadata && likely(ret != NULL)) {
-		arena_metadata_add(iaalloc(tsdn, ret), isalloc(tsdn,
+	if (config_stats && is_internal && likely(ret != NULL)) {
+		arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn,
 		    iealloc(tsdn, ret), ret));
 	}
 	return (ret);
@@ -1088,14 +1092,15 @@ ivsalloc(tsdn_t *tsdn, const void *ptr)
 
 JEMALLOC_ALWAYS_INLINE void
 idalloctm(tsdn_t *tsdn, extent_t *extent, void *ptr, tcache_t *tcache,
-    bool is_metadata, bool slow_path)
+    bool is_internal, bool slow_path)
 {
 
 	assert(ptr != NULL);
-	assert(!is_metadata || tcache == NULL);
-	assert(!is_metadata || iaalloc(tsdn, ptr)->ind < narenas_auto);
-	if (config_stats && is_metadata) {
-		arena_metadata_sub(iaalloc(tsdn, ptr), isalloc(tsdn, extent,
+	assert(!is_internal || tcache == NULL);
+	assert(!is_internal || arena_ind_get(iaalloc(tsdn, ptr)) <
+	    narenas_auto);
+	if (config_stats && is_internal) {
+		arena_internal_sub(iaalloc(tsdn, ptr), isalloc(tsdn, extent,
 		    ptr));
 	}
 
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 7aa622fb..36960f08 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -34,13 +34,14 @@ arena_extent_ralloc_large_shrink
 arena_extent_sn_next
 arena_get
 arena_ichoose
+arena_ind_get
 arena_init
+arena_internal_add
+arena_internal_get
+arena_internal_sub
 arena_malloc
 arena_malloc_hard
 arena_maybe_purge
-arena_metadata_add
-arena_metadata_get
-arena_metadata_sub
 arena_migrate
 arena_new
 arena_nthreads_dec
@@ -93,8 +94,14 @@ atomic_write_u
 atomic_write_u32
 atomic_write_u64
 atomic_write_zu
+b0get
 base_alloc
 base_boot
+base_delete
+base_extent_hooks_get
+base_extent_hooks_set
+base_ind_get
+base_new
 base_postfork_child
 base_postfork_parent
 base_prefork
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index a7368a72..bea4e3e7 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -100,8 +100,9 @@ struct arena_stats_s {
 	uint64_t	nmadvise;
 	uint64_t	purged;
 
-	/* Number of bytes currently allocated for internal metadata. */
-	size_t		metadata; /* Protected via atomic_*_zu(). */
+	size_t		base;
+	size_t		internal; /* Protected via atomic_*_zu(). */
+	size_t		resident;
 
 	size_t		allocated_large;
 	uint64_t	nmalloc_large;
diff --git a/src/arena.c b/src/arena.c
index 0eb6150b..d5e87ead 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1550,6 +1550,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     arena_stats_t *astats, malloc_bin_stats_t *bstats,
     malloc_large_stats_t *lstats)
 {
+	size_t base_allocated, base_resident, base_mapped;
 	unsigned i;
 
 	cassert(config_stats);
@@ -1558,12 +1559,18 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	arena_basic_stats_merge_locked(arena, nthreads, dss, decay_time,
 	    nactive, ndirty);
 
-	astats->mapped += arena->stats.mapped;
+	base_stats_get(tsdn, arena->base, &base_allocated, &base_resident,
+	    &base_mapped);
+
+	astats->mapped += base_mapped + arena->stats.mapped;
 	astats->retained += arena->stats.retained;
 	astats->npurge += arena->stats.npurge;
 	astats->nmadvise += arena->stats.nmadvise;
 	astats->purged += arena->stats.purged;
-	astats->metadata += arena_metadata_get(arena);
+	astats->base += base_allocated;
+	astats->internal += arena_internal_get(arena);
+	astats->resident += base_resident + (((arena->nactive + arena->ndirty)
+	    << LG_PAGE));
 	astats->allocated_large += arena->stats.allocated_large;
 	astats->nmalloc_large += arena->stats.nmalloc_large;
 	astats->ndalloc_large += arena->stats.ndalloc_large;
@@ -1625,19 +1632,27 @@ arena_extent_sn_next(arena_t *arena)
 }
 
 arena_t *
-arena_new(tsdn_t *tsdn, unsigned ind)
+arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks)
 {
 	arena_t *arena;
+	base_t *base;
 	unsigned i;
 
-	arena = (arena_t *)base_alloc(tsdn, sizeof(arena_t));
-	if (arena == NULL)
-		return (NULL);
+	if (ind == 0)
+		base = b0get();
+	else {
+		base = base_new(tsdn, ind, extent_hooks);
+		if (base == NULL)
+			return (NULL);
+	}
+
+	arena = (arena_t *)base_alloc(tsdn, base, sizeof(arena_t), CACHELINE);
+	if (arena == NULL)
+		goto label_error;
 
-	arena->ind = ind;
 	arena->nthreads[0] = arena->nthreads[1] = 0;
 	if (malloc_mutex_init(&arena->lock, "arena", WITNESS_RANK_ARENA))
-		return (NULL);
+		goto label_error;
 
 	if (config_stats && config_tcache)
 		ql_new(&arena->tcache_ql);
@@ -1670,7 +1685,7 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 	ql_new(&arena->large);
 	if (malloc_mutex_init(&arena->large_mtx, "arena_large",
 	    WITNESS_RANK_ARENA_LARGE))
-		return (NULL);
+		goto label_error;
 
 	for (i = 0; i < NPSIZES+1; i++) {
 		extent_heap_new(&arena->extents_cached[i]);
@@ -1682,9 +1697,7 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 
 	if (malloc_mutex_init(&arena->extents_mtx, "arena_extents",
 	    WITNESS_RANK_ARENA_EXTENTS))
-		return (NULL);
-
-	arena->extent_hooks = (extent_hooks_t *)&extent_hooks_default;
+		goto label_error;
 
 	if (!config_munmap)
 		arena->extent_grow_next = psz2ind(HUGEPAGE);
@@ -1692,14 +1705,14 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 	ql_new(&arena->extent_cache);
 	if (malloc_mutex_init(&arena->extent_cache_mtx, "arena_extent_cache",
 	    WITNESS_RANK_ARENA_EXTENT_CACHE))
-		return (NULL);
+		goto label_error;
 
 	/* Initialize bins. */
 	for (i = 0; i < NBINS; i++) {
 		arena_bin_t *bin = &arena->bins[i];
 		if (malloc_mutex_init(&bin->lock, "arena_bin",
 		    WITNESS_RANK_ARENA_BIN))
-			return (NULL);
+			goto label_error;
 		bin->slabcur = NULL;
 		extent_heap_new(&bin->slabs_nonfull);
 		extent_init(&bin->slabs_full, arena, NULL, 0, 0, 0, false,
@@ -1708,7 +1721,13 @@ arena_new(tsdn_t *tsdn, unsigned ind)
 			memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
 	}
 
+	arena->base = base;
+
 	return (arena);
+label_error:
+	if (ind != 0)
+		base_delete(base);
+	return (NULL);
 }
 
 void
@@ -1744,6 +1763,7 @@ arena_prefork3(tsdn_t *tsdn, arena_t *arena)
 {
 	unsigned i;
 
+	base_prefork(tsdn, arena->base);
 	for (i = 0; i < NBINS; i++)
 		malloc_mutex_prefork(tsdn, &arena->bins[i].lock);
 	malloc_mutex_prefork(tsdn, &arena->large_mtx);
@@ -1757,6 +1777,7 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena)
 	malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
 	for (i = 0; i < NBINS; i++)
 		malloc_mutex_postfork_parent(tsdn, &arena->bins[i].lock);
+	base_postfork_parent(tsdn, arena->base);
 	malloc_mutex_postfork_parent(tsdn, &arena->extent_cache_mtx);
 	malloc_mutex_postfork_parent(tsdn, &arena->extents_mtx);
 	malloc_mutex_postfork_parent(tsdn, &arena->lock);
@@ -1770,6 +1791,7 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena)
 	malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
 	for (i = 0; i < NBINS; i++)
 		malloc_mutex_postfork_child(tsdn, &arena->bins[i].lock);
+	base_postfork_child(tsdn, arena->base);
 	malloc_mutex_postfork_child(tsdn, &arena->extent_cache_mtx);
 	malloc_mutex_postfork_child(tsdn, &arena->extents_mtx);
 	malloc_mutex_postfork_child(tsdn, &arena->lock);
diff --git a/src/base.c b/src/base.c
index 4764d9c9..5eab7cd5 100644
--- a/src/base.c
+++ b/src/base.c
@@ -4,112 +4,308 @@
 /******************************************************************************/
 /* Data. */
 
-static malloc_mutex_t	base_mtx;
-static size_t		base_extent_sn_next;
-static extent_heap_t	base_avail[NSIZES];
-static extent_t		*base_extents;
-static size_t		base_allocated;
-static size_t		base_resident;
-static size_t		base_mapped;
+static base_t	*b0;
 
 /******************************************************************************/
 
-static extent_t *
-base_extent_try_alloc(tsdn_t *tsdn)
+static void *
+base_map(extent_hooks_t *extent_hooks, unsigned ind, size_t size)
 {
-	extent_t *extent;
+	void *addr;
+	bool zero = true;
+	bool commit = true;
 
-	malloc_mutex_assert_owner(tsdn, &base_mtx);
+	assert(size == HUGEPAGE_CEILING(size));
 
-	if (base_extents == NULL)
-		return (NULL);
-	extent = base_extents;
-	base_extents = *(extent_t **)extent;
-	return (extent);
+	if (extent_hooks == &extent_hooks_default)
+		addr = extent_alloc_mmap(NULL, size, PAGE, &zero, &commit);
+	else {
+		addr = extent_hooks->alloc(extent_hooks, NULL, size, PAGE,
+		    &zero, &commit, ind);
+	}
+
+	return (addr);
 }
 
 static void
-base_extent_dalloc(tsdn_t *tsdn, extent_t *extent)
+base_unmap(extent_hooks_t *extent_hooks, unsigned ind, void *addr, size_t size)
 {
 
-	malloc_mutex_assert_owner(tsdn, &base_mtx);
-
-	*(extent_t **)extent = base_extents;
-	base_extents = extent;
+	/*
+	 * Cascade through dalloc, decommit, purge_lazy, and purge_forced,
+	 * stopping at first success.  This cascade is performed for consistency
+	 * with the cascade in extent_dalloc_wrapper() because an application's
+	 * custom hooks may not support e.g. dalloc.  This function is only ever
+	 * called as a side effect of arena destruction, so although it might
+	 * seem pointless to do anything besides dalloc here, the application
+	 * may in fact want the end state of all associated virtual memory to in
+	 * some consistent-but-allocated state.
+	 */
+	if (extent_hooks == &extent_hooks_default) {
+		if (!extent_dalloc_mmap(addr, size))
+			return;
+		if (!pages_decommit(addr, size))
+			return;
+		if (!pages_purge_lazy(addr, size))
+			return;
+		if (!pages_purge_forced(addr, size))
+			return;
+		/* Nothing worked.  This should never happen. */
+		not_reached();
+	} else {
+		if (extent_hooks->dalloc != NULL &&
+		    !extent_hooks->dalloc(extent_hooks, addr, size, true, ind))
+			return;
+		if (extent_hooks->decommit != NULL &&
+		    !extent_hooks->decommit(extent_hooks, addr, size, 0, size,
+		    ind))
+			return;
+		if (extent_hooks->purge_lazy != NULL &&
+		    !extent_hooks->purge_lazy(extent_hooks, addr, size, 0, size,
+		    ind))
+			return;
+		if (extent_hooks->purge_forced != NULL &&
+		    !extent_hooks->purge_forced(extent_hooks, addr, size, 0,
+		    size, ind))
+			return;
+		/* Nothing worked.  That's the application's problem. */
+	}
 }
 
 static void
-base_extent_init(extent_t *extent, void *addr, size_t size)
+base_extent_init(size_t *extent_sn_next, extent_t *extent, void *addr,
+    size_t size)
 {
-	size_t sn = atomic_add_zu(&base_extent_sn_next, 1) - 1;
+	size_t sn;
+
+	sn = *extent_sn_next;
+	(*extent_sn_next)++;
 
 	extent_init(extent, NULL, addr, size, 0, sn, true, true, true, false);
 }
 
-static extent_t *
-base_extent_alloc(tsdn_t *tsdn, size_t minsize)
+static void *
+base_extent_bump_alloc_helper(extent_t *extent, size_t *gap_size, size_t size,
+    size_t alignment)
 {
-	extent_t *extent;
-	size_t esize, nsize;
-	void *addr;
+	void *ret;
 
-	malloc_mutex_assert_owner(tsdn, &base_mtx);
-	assert(minsize != 0);
-	extent = base_extent_try_alloc(tsdn);
-	/* Allocate enough space to also carve an extent out if necessary. */
-	nsize = (extent == NULL) ? CACHELINE_CEILING(sizeof(extent_t)) : 0;
-	esize = PAGE_CEILING(minsize + nsize);
-	/*
-	 * Directly call extent_alloc_mmap() because it's critical to allocate
-	 * untouched demand-zeroed virtual memory.
-	 */
-	{
-		bool zero = true;
-		bool commit = true;
-		addr = extent_alloc_mmap(NULL, esize, PAGE, &zero, &commit);
+	assert(alignment == ALIGNMENT_CEILING(alignment, QUANTUM));
+	assert(size == ALIGNMENT_CEILING(size, alignment));
+
+	*gap_size = ALIGNMENT_CEILING((uintptr_t)extent_addr_get(extent),
+	    alignment) - (uintptr_t)extent_addr_get(extent);
+	ret = (void *)((uintptr_t)extent_addr_get(extent) + *gap_size);
+	assert(extent_size_get(extent) >= *gap_size + size);
+	extent_init(extent, NULL, (void *)((uintptr_t)extent_addr_get(extent) +
+	    *gap_size + size), extent_size_get(extent) - *gap_size - size, 0,
+	    extent_sn_get(extent), true, true, true, false);
+	return (ret);
+}
+
+static void
+base_extent_bump_alloc_post(tsdn_t *tsdn, base_t *base, extent_t *extent,
+    size_t gap_size, void *addr, size_t size)
+{
+
+	if (extent_size_get(extent) > 0) {
+		/*
+		 * Compute the index for the largest size class that does not
+		 * exceed extent's size.
+		 */
+		szind_t index_floor = size2index(extent_size_get(extent) + 1) -
+		    1;
+		extent_heap_insert(&base->avail[index_floor], extent);
 	}
-	if (addr == NULL) {
-		if (extent != NULL)
-			base_extent_dalloc(tsdn, extent);
-		return (NULL);
+
+	if (config_stats) {
+		base->allocated += size;
+		/*
+		 * Add one PAGE to base_resident for every page boundary that is
+		 * crossed by the new allocation.
+		 */
+		base->resident += PAGE_CEILING((uintptr_t)addr + size) -
+		    PAGE_CEILING((uintptr_t)addr - gap_size);
+		assert(base->allocated <= base->resident);
+		assert(base->resident <= base->mapped);
 	}
-	base_mapped += esize;
-	if (extent == NULL) {
-		extent = (extent_t *)addr;
-		addr = (void *)((uintptr_t)addr + nsize);
-		esize -= nsize;
-		if (config_stats) {
-			base_allocated += nsize;
-			base_resident += PAGE_CEILING(nsize);
-		}
-	}
-	base_extent_init(extent, addr, esize);
-	return (extent);
+}
+
+static void *
+base_extent_bump_alloc(tsdn_t *tsdn, base_t *base, extent_t *extent,
+    size_t size, size_t alignment)
+{
+	void *ret;
+	size_t gap_size;
+
+	ret = base_extent_bump_alloc_helper(extent, &gap_size, size, alignment);
+	base_extent_bump_alloc_post(tsdn, base, extent, gap_size, ret, size);
+	return (ret);
 }
 
 /*
- * base_alloc() guarantees demand-zeroed memory, in order to make multi-page
- * sparse data structures such as radix tree nodes efficient with respect to
- * physical memory usage.
+ * Allocate a block of virtual memory that is large enough to start with a
+ * base_block_t header, followed by an object of specified size and alignment.
+ * On success a pointer to the initialized base_block_t header is returned.
+ */
+static base_block_t *
+base_block_alloc(extent_hooks_t *extent_hooks, unsigned ind,
+    size_t *extent_sn_next, size_t size, size_t alignment)
+{
+	base_block_t *block;
+	size_t usize, header_size, gap_size, block_size;
+
+	alignment = ALIGNMENT_CEILING(alignment, QUANTUM);
+	usize = ALIGNMENT_CEILING(size, alignment);
+	header_size = sizeof(base_block_t);
+	gap_size = ALIGNMENT_CEILING(header_size, alignment) - header_size;
+	block_size = HUGEPAGE_CEILING(header_size + gap_size + usize);
+	block = (base_block_t *)base_map(extent_hooks, ind, block_size);
+	if (block == NULL)
+		return (NULL);
+	block->size = block_size;
+	block->next = NULL;
+	assert(block_size >= header_size);
+	base_extent_init(extent_sn_next, &block->extent,
+	    (void *)((uintptr_t)block + header_size), block_size - header_size);
+	return (block);
+}
+
+/*
+ * Allocate an extent that is at least as large as specified size, with
+ * specified alignment.
+ */
+static extent_t *
+base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment)
+{
+	extent_hooks_t *extent_hooks = base_extent_hooks_get(base);
+	base_block_t *block;
+
+	malloc_mutex_assert_owner(tsdn, &base->mtx);
+
+	block = base_block_alloc(extent_hooks, base_ind_get(base),
+	    &base->extent_sn_next, size, alignment);
+	if (block == NULL)
+		return (NULL);
+	block->next = base->blocks;
+	base->blocks = block;
+	if (config_stats) {
+		base->allocated += sizeof(base_block_t);
+		base->resident += PAGE_CEILING(sizeof(base_block_t));
+		base->mapped += block->size;
+		assert(base->allocated <= base->resident);
+		assert(base->resident <= base->mapped);
+	}
+	return (&block->extent);
+}
+
+base_t *
+b0get(void)
+{
+
+	return (b0);
+}
+
+base_t *
+base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks)
+{
+	base_t *base;
+	size_t extent_sn_next, base_alignment, base_size, gap_size;
+	base_block_t *block;
+	szind_t i;
+
+	extent_sn_next = 0;
+	block = base_block_alloc(extent_hooks, ind, &extent_sn_next,
+	    sizeof(base_t), QUANTUM);
+	if (block == NULL)
+		return (NULL);
+
+	base_alignment = CACHELINE;
+	base_size = ALIGNMENT_CEILING(sizeof(base_t), base_alignment);
+	base = (base_t *)base_extent_bump_alloc_helper(&block->extent,
+	    &gap_size, base_size, base_alignment);
+	base->ind = ind;
+	base->extent_hooks = extent_hooks;
+	if (malloc_mutex_init(&base->mtx, "base", WITNESS_RANK_BASE)) {
+		base_unmap(extent_hooks, ind, block, block->size);
+		return (NULL);
+	}
+	base->extent_sn_next = extent_sn_next;
+	base->blocks = block;
+	for (i = 0; i < NSIZES; i++)
+		extent_heap_new(&base->avail[i]);
+	if (config_stats) {
+		base->allocated = sizeof(base_block_t);
+		base->resident = PAGE_CEILING(sizeof(base_block_t));
+		base->mapped = block->size;
+		assert(base->allocated <= base->resident);
+		assert(base->resident <= base->mapped);
+	}
+	base_extent_bump_alloc_post(tsdn, base, &block->extent, gap_size, base,
+	    base_size);
+
+	return (base);
+}
+
+void
+base_delete(base_t *base)
+{
+	extent_hooks_t *extent_hooks = base_extent_hooks_get(base);
+	base_block_t *next = base->blocks;
+	do {
+		base_block_t *block = next;
+		next = block->next;
+		base_unmap(extent_hooks, base_ind_get(base), block,
+		    block->size);
+	} while (next != NULL);
+}
+
+extent_hooks_t *
+base_extent_hooks_get(base_t *base)
+{
+
+	return ((extent_hooks_t *)atomic_read_p(&base->extent_hooks_pun));
+}
+
+extent_hooks_t *
+base_extent_hooks_set(base_t *base, extent_hooks_t *extent_hooks)
+{
+	extent_hooks_t *old_extent_hooks = base_extent_hooks_get(base);
+	union {
+		extent_hooks_t	**h;
+		void		**v;
+	} u;
+
+	u.h = &base->extent_hooks;
+	atomic_write_p(u.v, extent_hooks);
+
+	return (old_extent_hooks);
+}
+
+/*
+ * base_alloc() returns zeroed memory, which is always demand-zeroed for the
+ * auto arenas, in order to make multi-page sparse data structures such as radix
+ * tree nodes efficient with respect to physical memory usage.  Upon success a
+ * pointer to at least size bytes with specified alignment is returned.  Note
+ * that size is rounded up to the nearest multiple of alignment to avoid false
+ * sharing.
  */
 void *
-base_alloc(tsdn_t *tsdn, size_t size)
+base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment)
 {
 	void *ret;
-	size_t csize;
+	size_t usize, asize;
 	szind_t i;
 	extent_t *extent;
 
-	/*
-	 * Round size up to nearest multiple of the cacheline size, so that
-	 * there is no chance of false cache line sharing.
-	 */
-	csize = CACHELINE_CEILING(size);
+	alignment = QUANTUM_CEILING(alignment);
+	usize = ALIGNMENT_CEILING(size, alignment);
+	asize = usize + alignment - QUANTUM;
 
 	extent = NULL;
-	malloc_mutex_lock(tsdn, &base_mtx);
-	for (i = size2index(csize); i < NSIZES; i++) {
-		extent = extent_heap_remove_first(&base_avail[i]);
+	malloc_mutex_lock(tsdn, &base->mtx);
+	for (i = size2index(asize); i < NSIZES; i++) {
+		extent = extent_heap_remove_first(&base->avail[i]);
 		if (extent != NULL) {
 			/* Use existing space. */
 			break;
@@ -117,87 +313,60 @@ base_alloc(tsdn_t *tsdn, size_t size)
 	}
 	if (extent == NULL) {
 		/* Try to allocate more space. */
-		extent = base_extent_alloc(tsdn, csize);
+		extent = base_extent_alloc(tsdn, base, usize, alignment);
 	}
 	if (extent == NULL) {
 		ret = NULL;
 		goto label_return;
 	}
 
-	ret = extent_addr_get(extent);
-	if (extent_size_get(extent) > csize) {
-		szind_t index_floor;
-
-		extent_addr_set(extent, (void *)((uintptr_t)ret + csize));
-		extent_size_set(extent, extent_size_get(extent) - csize);
-		/*
-		 * Compute the index for the largest size class that does not
-		 * exceed extent's size.
-		 */
-		index_floor = size2index(extent_size_get(extent) + 1) - 1;
-		extent_heap_insert(&base_avail[index_floor], extent);
-	} else
-		base_extent_dalloc(tsdn, extent);
-	if (config_stats) {
-		base_allocated += csize;
-		/*
-		 * Add one PAGE to base_resident for every page boundary that is
-		 * crossed by the new allocation.
-		 */
-		base_resident += PAGE_CEILING((uintptr_t)ret + csize) -
-		    PAGE_CEILING((uintptr_t)ret);
-	}
+	ret = base_extent_bump_alloc(tsdn, base, extent, usize, alignment);
 label_return:
-	malloc_mutex_unlock(tsdn, &base_mtx);
+	malloc_mutex_unlock(tsdn, &base->mtx);
 	return (ret);
 }
 
 void
-base_stats_get(tsdn_t *tsdn, size_t *allocated, size_t *resident,
+base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, size_t *resident,
     size_t *mapped)
 {
 
-	malloc_mutex_lock(tsdn, &base_mtx);
-	assert(base_allocated <= base_resident);
-	assert(base_resident <= base_mapped);
-	*allocated = base_allocated;
-	*resident = base_resident;
-	*mapped = base_mapped;
-	malloc_mutex_unlock(tsdn, &base_mtx);
+	cassert(config_stats);
+
+	malloc_mutex_lock(tsdn, &base->mtx);
+	assert(base->allocated <= base->resident);
+	assert(base->resident <= base->mapped);
+	*allocated = base->allocated;
+	*resident = base->resident;
+	*mapped = base->mapped;
+	malloc_mutex_unlock(tsdn, &base->mtx);
+}
+
+void
+base_prefork(tsdn_t *tsdn, base_t *base)
+{
+
+	malloc_mutex_prefork(tsdn, &base->mtx);
+}
+
+void
+base_postfork_parent(tsdn_t *tsdn, base_t *base)
+{
+
+	malloc_mutex_postfork_parent(tsdn, &base->mtx);
+}
+
+void
+base_postfork_child(tsdn_t *tsdn, base_t *base)
+{
+
+	malloc_mutex_postfork_child(tsdn, &base->mtx);
 }
 
 bool
-base_boot(void)
-{
-	szind_t i;
-
-	if (malloc_mutex_init(&base_mtx, "base", WITNESS_RANK_BASE))
-		return (true);
-	base_extent_sn_next = 0;
-	for (i = 0; i < NSIZES; i++)
-		extent_heap_new(&base_avail[i]);
-	base_extents = NULL;
-
-	return (false);
-}
-
-void
-base_prefork(tsdn_t *tsdn)
+base_boot(tsdn_t *tsdn)
 {
 
-	malloc_mutex_prefork(tsdn, &base_mtx);
-}
-
-void
-base_postfork_parent(tsdn_t *tsdn)
-{
-
-	malloc_mutex_postfork_parent(tsdn, &base_mtx);
-}
-
-void
-base_postfork_child(tsdn_t *tsdn)
-{
-
-	malloc_mutex_postfork_child(tsdn, &base_mtx);
+	b0 = base_new(tsdn, 0, (extent_hooks_t *)&extent_hooks_default);
+	return (b0 == NULL);
 }
diff --git a/src/ctl.c b/src/ctl.c
index 47b4768b..964896ab 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -55,7 +55,7 @@ static void	ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_stats_t *cstats,
 static void	ctl_arena_stats_smerge(ctl_arena_stats_t *sstats,
     ctl_arena_stats_t *astats);
 static void	ctl_arena_refresh(tsdn_t *tsdn, arena_t *arena, unsigned i);
-static bool	ctl_grow(tsdn_t *tsdn);
+static bool	ctl_grow(tsdn_t *tsdn, extent_hooks_t *extent_hooks);
 static void	ctl_refresh(tsdn_t *tsdn);
 static bool	ctl_init(tsdn_t *tsdn);
 static int	ctl_lookup(tsdn_t *tsdn, const char *name,
@@ -174,7 +174,9 @@ CTL_PROTO(stats_arenas_i_retained)
 CTL_PROTO(stats_arenas_i_npurge)
 CTL_PROTO(stats_arenas_i_nmadvise)
 CTL_PROTO(stats_arenas_i_purged)
-CTL_PROTO(stats_arenas_i_metadata)
+CTL_PROTO(stats_arenas_i_base)
+CTL_PROTO(stats_arenas_i_internal)
+CTL_PROTO(stats_arenas_i_resident)
 INDEX_PROTO(stats_arenas_i)
 CTL_PROTO(stats_allocated)
 CTL_PROTO(stats_active)
@@ -392,7 +394,9 @@ static const ctl_named_node_t stats_arenas_i_node[] = {
 	{NAME("npurge"),	CTL(stats_arenas_i_npurge)},
 	{NAME("nmadvise"),	CTL(stats_arenas_i_nmadvise)},
 	{NAME("purged"),	CTL(stats_arenas_i_purged)},
-	{NAME("metadata"),	CTL(stats_arenas_i_metadata)},
+	{NAME("base"),		CTL(stats_arenas_i_base)},
+	{NAME("internal"),	CTL(stats_arenas_i_internal)},
+	{NAME("resident"),	CTL(stats_arenas_i_resident)},
 	{NAME("small"),		CHILD(named, stats_arenas_i_small)},
 	{NAME("large"),		CHILD(named, stats_arenas_i_large)},
 	{NAME("bins"),		CHILD(indexed, stats_arenas_i_bins)},
@@ -500,7 +504,9 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats)
 		sstats->astats.nmadvise += astats->astats.nmadvise;
 		sstats->astats.purged += astats->astats.purged;
 
-		sstats->astats.metadata += astats->astats.metadata;
+		sstats->astats.base += astats->astats.base;
+		sstats->astats.internal += astats->astats.internal;
+		sstats->astats.resident += astats->astats.resident;
 
 		sstats->allocated_small += astats->allocated_small;
 		sstats->nmalloc_small += astats->nmalloc_small;
@@ -556,12 +562,12 @@ ctl_arena_refresh(tsdn_t *tsdn, arena_t *arena, unsigned i)
 }
 
 static bool
-ctl_grow(tsdn_t *tsdn)
+ctl_grow(tsdn_t *tsdn, extent_hooks_t *extent_hooks)
 {
 	ctl_arena_stats_t *astats;
 
 	/* Initialize new arena. */
-	if (arena_init(tsdn, ctl_stats.narenas) == NULL)
+	if (arena_init(tsdn, ctl_stats.narenas, extent_hooks) == NULL)
 		return (true);
 
 	/* Allocate extended arena stats. */
@@ -615,20 +621,17 @@ ctl_refresh(tsdn_t *tsdn)
 	}
 
 	if (config_stats) {
-		size_t base_allocated, base_resident, base_mapped;
-		base_stats_get(tsdn, &base_allocated, &base_resident,
-		    &base_mapped);
 		ctl_stats.allocated =
 		    ctl_stats.arenas[ctl_stats.narenas].allocated_small +
 		    ctl_stats.arenas[ctl_stats.narenas].astats.allocated_large;
 		ctl_stats.active =
 		    (ctl_stats.arenas[ctl_stats.narenas].pactive << LG_PAGE);
-		ctl_stats.metadata = base_allocated +
-		    ctl_stats.arenas[ctl_stats.narenas].astats.metadata;
-		ctl_stats.resident = base_resident +
-		    ((ctl_stats.arenas[ctl_stats.narenas].pactive +
-		    ctl_stats.arenas[ctl_stats.narenas].pdirty) << LG_PAGE);
-		ctl_stats.mapped = base_mapped +
+		ctl_stats.metadata =
+		    ctl_stats.arenas[ctl_stats.narenas].astats.base +
+		    ctl_stats.arenas[ctl_stats.narenas].astats.internal;
+		ctl_stats.resident =
+		    ctl_stats.arenas[ctl_stats.narenas].astats.resident;
+		ctl_stats.mapped =
 		    ctl_stats.arenas[ctl_stats.narenas].astats.mapped;
 		ctl_stats.retained =
 		    ctl_stats.arenas[ctl_stats.narenas].astats.retained;
@@ -1167,7 +1170,7 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	if (oldarena == NULL)
 		return (EAGAIN);
 
-	newind = oldind = oldarena->ind;
+	newind = oldind = arena_ind_get(oldarena);
 	WRITE(newind, unsigned);
 	READ(oldind, unsigned);
 	if (newind != oldind) {
@@ -1738,11 +1741,14 @@ arenas_extend_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
+	extent_hooks_t *extent_hooks;
 	unsigned narenas;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
-	READONLY();
-	if (ctl_grow(tsd_tsdn(tsd))) {
+
+	extent_hooks = (extent_hooks_t *)&extent_hooks_default;
+	WRITE(extent_hooks, extent_hooks_t *);
+	if (ctl_grow(tsd_tsdn(tsd), extent_hooks)) {
 		ret = EAGAIN;
 		goto label_return;
 	}
@@ -1906,8 +1912,12 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_nmadvise,
     ctl_stats.arenas[mib[2]].astats.nmadvise, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_purged,
     ctl_stats.arenas[mib[2]].astats.purged, uint64_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_metadata,
-    ctl_stats.arenas[mib[2]].astats.metadata, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_base,
+    ctl_stats.arenas[mib[2]].astats.base, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_internal,
+    ctl_stats.arenas[mib[2]].astats.internal, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_resident,
+    ctl_stats.arenas[mib[2]].astats.resident, size_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_small_allocated,
     ctl_stats.arenas[mib[2]].allocated_small, size_t)
diff --git a/src/extent.c b/src/extent.c
index 827a9213..6eabde31 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -83,7 +83,8 @@ extent_alloc(tsdn_t *tsdn, arena_t *arena)
 	extent = ql_last(&arena->extent_cache, ql_link);
 	if (extent == NULL) {
 		malloc_mutex_unlock(tsdn, &arena->extent_cache_mtx);
-		return (base_alloc(tsdn, sizeof(extent_t)));
+		return (base_alloc(tsdn, arena->base, sizeof(extent_t),
+		    QUANTUM));
 	}
 	ql_tail_remove(&arena->extent_cache, extent_t, ql_link);
 	malloc_mutex_unlock(tsdn, &arena->extent_cache_mtx);
@@ -104,22 +105,14 @@ extent_hooks_t *
 extent_hooks_get(arena_t *arena)
 {
 
-	return ((extent_hooks_t *)atomic_read_p(&arena->extent_hooks_pun));
+	return (base_extent_hooks_get(arena->base));
 }
 
 extent_hooks_t *
 extent_hooks_set(arena_t *arena, extent_hooks_t *extent_hooks)
 {
-	extent_hooks_t *old_extent_hooks = extent_hooks_get(arena);
-	union {
-		extent_hooks_t	**h;
-		void		**v;
-	} u;
 
-	u.h = &arena->extent_hooks;
-	atomic_write_p(u.v, extent_hooks);
-
-	return (old_extent_hooks);
+	return (base_extent_hooks_set(arena->base, extent_hooks));
 }
 
 static void
@@ -873,7 +866,7 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 		    alignment, zero, commit);
 	} else {
 		addr = (*r_extent_hooks)->alloc(*r_extent_hooks, new_addr, size,
-		    alignment, zero, commit, arena->ind);
+		    alignment, zero, commit, arena_ind_get(arena));
 	}
 	if (addr == NULL) {
 		extent_dalloc(tsdn, arena, extent);
@@ -1071,7 +1064,7 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 		err = ((*r_extent_hooks)->dalloc == NULL ||
 		    (*r_extent_hooks)->dalloc(*r_extent_hooks,
 		    extent_base_get(extent), extent_size_get(extent),
-		    extent_committed_get(extent), arena->ind));
+		    extent_committed_get(extent), arena_ind_get(arena)));
 	}
 
 	if (!err) {
@@ -1088,12 +1081,12 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 	else if ((*r_extent_hooks)->purge_lazy != NULL &&
 	    !(*r_extent_hooks)->purge_lazy(*r_extent_hooks,
 	    extent_base_get(extent), extent_size_get(extent), 0,
-	    extent_size_get(extent), arena->ind))
+	    extent_size_get(extent), arena_ind_get(arena)))
 		zeroed = false;
 	else if ((*r_extent_hooks)->purge_forced != NULL &&
 	    !(*r_extent_hooks)->purge_forced(*r_extent_hooks,
 	    extent_base_get(extent), extent_size_get(extent), 0,
-	    extent_size_get(extent), arena->ind))
+	    extent_size_get(extent), arena_ind_get(arena)))
 		zeroed = true;
 	else
 		zeroed = false;
@@ -1129,7 +1122,7 @@ extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 	err = ((*r_extent_hooks)->commit == NULL ||
 	    (*r_extent_hooks)->commit(*r_extent_hooks, extent_base_get(extent),
-	    extent_size_get(extent), offset, length, arena->ind));
+	    extent_size_get(extent), offset, length, arena_ind_get(arena)));
 	extent_committed_set(extent, extent_committed_get(extent) || !err);
 	return (err);
 }
@@ -1157,7 +1150,7 @@ extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
 	err = ((*r_extent_hooks)->decommit == NULL ||
 	    (*r_extent_hooks)->decommit(*r_extent_hooks,
 	    extent_base_get(extent), extent_size_get(extent), offset, length,
-	    arena->ind));
+	    arena_ind_get(arena)));
 	extent_committed_set(extent, extent_committed_get(extent) && err);
 	return (err);
 }
@@ -1189,7 +1182,7 @@ extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena,
 	return ((*r_extent_hooks)->purge_lazy == NULL ||
 	    (*r_extent_hooks)->purge_lazy(*r_extent_hooks,
 	    extent_base_get(extent), extent_size_get(extent), offset, length,
-	    arena->ind));
+	    arena_ind_get(arena)));
 }
 
 #ifdef PAGES_CAN_PURGE_FORCED
@@ -1219,7 +1212,7 @@ extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena,
 	return ((*r_extent_hooks)->purge_forced == NULL ||
 	    (*r_extent_hooks)->purge_forced(*r_extent_hooks,
 	    extent_base_get(extent), extent_size_get(extent), offset, length,
-	    arena->ind));
+	    arena_ind_get(arena)));
 }
 
 #ifdef JEMALLOC_MAPS_COALESCE
@@ -1280,7 +1273,7 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
 
 	if ((*r_extent_hooks)->split(*r_extent_hooks, extent_base_get(extent),
 	    size_a + size_b, size_a, size_b, extent_committed_get(extent),
-	    arena->ind))
+	    arena_ind_get(arena)))
 		goto label_error_d;
 
 	extent_size_set(extent, size_a);
@@ -1348,7 +1341,8 @@ extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
 	} else {
 		err = (*r_extent_hooks)->merge(*r_extent_hooks,
 		    extent_base_get(a), extent_size_get(a), extent_base_get(b),
-		    extent_size_get(b), extent_committed_get(a), arena->ind);
+		    extent_size_get(b), extent_committed_get(a),
+		    arena_ind_get(arena));
 	}
 
 	if (err)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 7df3fc9e..2c49401f 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -304,21 +304,21 @@ malloc_init(void)
  */
 
 static void *
-a0ialloc(size_t size, bool zero, bool is_metadata)
+a0ialloc(size_t size, bool zero, bool is_internal)
 {
 
 	if (unlikely(malloc_init_a0()))
 		return (NULL);
 
 	return (iallocztm(TSDN_NULL, size, size2index(size), zero, NULL,
-	    is_metadata, arena_get(TSDN_NULL, 0, true), true));
+	    is_internal, arena_get(TSDN_NULL, 0, true), true));
 }
 
 static void
-a0idalloc(extent_t *extent, void *ptr, bool is_metadata)
+a0idalloc(extent_t *extent, void *ptr, bool is_internal)
 {
 
-	idalloctm(TSDN_NULL, extent, ptr, false, is_metadata, true);
+	idalloctm(TSDN_NULL, extent, ptr, false, is_internal, true);
 }
 
 void *
@@ -405,7 +405,7 @@ narenas_total_get(void)
 
 /* Create a new arena and insert it into the arenas array at index ind. */
 static arena_t *
-arena_init_locked(tsdn_t *tsdn, unsigned ind)
+arena_init_locked(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks)
 {
 	arena_t *arena;
 
@@ -426,18 +426,18 @@ arena_init_locked(tsdn_t *tsdn, unsigned ind)
 	}
 
 	/* Actually initialize the arena. */
-	arena = arena_new(tsdn, ind);
+	arena = arena_new(tsdn, ind, extent_hooks);
 	arena_set(ind, arena);
 	return (arena);
 }
 
 arena_t *
-arena_init(tsdn_t *tsdn, unsigned ind)
+arena_init(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks)
 {
 	arena_t *arena;
 
 	malloc_mutex_lock(tsdn, &arenas_lock);
-	arena = arena_init_locked(tsdn, ind);
+	arena = arena_init_locked(tsdn, ind, extent_hooks);
 	malloc_mutex_unlock(tsdn, &arenas_lock);
 	return (arena);
 }
@@ -629,7 +629,8 @@ arena_choose_hard(tsd_t *tsd, bool internal)
 				/* Initialize a new arena. */
 				choose[j] = first_null;
 				arena = arena_init_locked(tsd_tsdn(tsd),
-				    choose[j]);
+				    choose[j],
+				    (extent_hooks_t *)&extent_hooks_default);
 				if (arena == NULL) {
 					malloc_mutex_unlock(tsd_tsdn(tsd),
 					    &arenas_lock);
@@ -657,7 +658,7 @@ iarena_cleanup(tsd_t *tsd)
 
 	iarena = tsd_iarena_get(tsd);
 	if (iarena != NULL)
-		arena_unbind(tsd, iarena->ind, true);
+		arena_unbind(tsd, arena_ind_get(iarena), true);
 }
 
 void
@@ -667,7 +668,7 @@ arena_cleanup(tsd_t *tsd)
 
 	arena = tsd_arena_get(tsd);
 	if (arena != NULL)
-		arena_unbind(tsd, arena->ind, false);
+		arena_unbind(tsd, arena_ind_get(arena), false);
 }
 
 void
@@ -1211,7 +1212,7 @@ malloc_init_hard_a0_locked()
 		}
 	}
 	pages_boot();
-	if (base_boot())
+	if (base_boot(TSDN_NULL))
 		return (true);
 	if (extent_boot())
 		return (true);
@@ -1236,7 +1237,8 @@ malloc_init_hard_a0_locked()
 	 * Initialize one arena here.  The rest are lazily created in
 	 * arena_choose_hard().
 	 */
-	if (arena_init(TSDN_NULL, 0) == NULL)
+	if (arena_init(TSDN_NULL, 0, (extent_hooks_t *)&extent_hooks_default) ==
+	    NULL)
 		return (true);
 
 	malloc_init_state = malloc_init_a0_initialized;
@@ -1309,8 +1311,8 @@ malloc_init_hard_finish(tsdn_t *tsdn)
 	narenas_total_set(narenas_auto);
 
 	/* Allocate and initialize arenas. */
-	arenas = (arena_t **)base_alloc(tsdn, sizeof(arena_t *) *
-	    (MALLOCX_ARENA_MAX+1));
+	arenas = (arena_t **)base_alloc(tsdn, a0->base, sizeof(arena_t *) *
+	    (MALLOCX_ARENA_MAX+1), CACHELINE);
 	if (arenas == NULL)
 		return (true);
 	/* Copy the pointer to the one arena that was already initialized. */
@@ -2690,7 +2692,6 @@ _malloc_prefork(void)
 			}
 		}
 	}
-	base_prefork(tsd_tsdn(tsd));
 	for (i = 0; i < narenas; i++) {
 		if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL)
 			arena_prefork3(tsd_tsdn(tsd), arena);
@@ -2719,7 +2720,6 @@ _malloc_postfork(void)
 
 	witness_postfork_parent(tsd);
 	/* Release all mutexes, now that fork() has completed. */
-	base_postfork_parent(tsd_tsdn(tsd));
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
 
@@ -2743,7 +2743,6 @@ jemalloc_postfork_child(void)
 
 	witness_postfork_child(tsd);
 	/* Release all mutexes, now that fork() has completed. */
-	base_postfork_child(tsd_tsdn(tsd));
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
 
diff --git a/src/prof.c b/src/prof.c
index 19c8fb71..b9a9d659 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -2254,7 +2254,8 @@ prof_boot2(tsd_t *tsd)
 		}
 
 		gctx_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
-		    PROF_NCTX_LOCKS * sizeof(malloc_mutex_t));
+		    b0get(), PROF_NCTX_LOCKS * sizeof(malloc_mutex_t),
+		    CACHELINE);
 		if (gctx_locks == NULL)
 			return (true);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
@@ -2264,7 +2265,8 @@ prof_boot2(tsd_t *tsd)
 		}
 
 		tdata_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
-		    PROF_NTDATA_LOCKS * sizeof(malloc_mutex_t));
+		    b0get(), PROF_NTDATA_LOCKS * sizeof(malloc_mutex_t),
+		    CACHELINE);
 		if (tdata_locks == NULL)
 			return (true);
 		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
diff --git a/src/rtree.c b/src/rtree.c
index b6b9ed76..fd5e85df 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -72,7 +72,8 @@ static rtree_elm_t *
 rtree_node_alloc(tsdn_t *tsdn, rtree_t *rtree, size_t nelms)
 {
 
-	return ((rtree_elm_t *)base_alloc(tsdn, nelms * sizeof(rtree_elm_t)));
+	return ((rtree_elm_t *)base_alloc(tsdn, b0get(), nelms *
+	    sizeof(rtree_elm_t), CACHELINE));
 }
 #ifdef JEMALLOC_JET
 #undef rtree_node_alloc
diff --git a/src/stats.c b/src/stats.c
index e150a27f..0a3deaaa 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -254,7 +254,8 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	unsigned nthreads;
 	const char *dss;
 	ssize_t decay_time;
-	size_t page, pactive, pdirty, mapped, retained, metadata;
+	size_t page, pactive, pdirty, mapped, retained;
+	size_t base, internal, resident;
 	uint64_t npurge, nmadvise, purged;
 	size_t small_allocated;
 	uint64_t small_nmalloc, small_ndalloc, small_nrequests;
@@ -404,14 +405,32 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    "retained:                %12zu\n", retained);
 	}
 
-	CTL_M2_GET("stats.arenas.0.metadata", i, &metadata, size_t);
+	CTL_M2_GET("stats.arenas.0.base", i, &base, size_t);
 	if (json) {
 		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\"metadata\": %zu%s\n", metadata, (bins || large) ?
+		    "\t\t\t\t\"base\": %zu,\n", base);
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "base:                    %12zu\n", base);
+	}
+
+	CTL_M2_GET("stats.arenas.0.internal", i, &internal, size_t);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"internal\": %zu,\n", internal);
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "internal:                %12zu\n", internal);
+	}
+
+	CTL_M2_GET("stats.arenas.0.resident", i, &resident, size_t);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"resident\": %zu%s\n", resident, (bins || large) ?
 		    "," : "");
 	} else {
 		malloc_cprintf(write_cb, cbopaque,
-		    "metadata:                %12zu\n", metadata);
+		    "resident:                %12zu\n", resident);
 	}
 
 	if (bins)
diff --git a/src/tcache.c b/src/tcache.c
index 7f5b291c..fad52777 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -440,8 +440,8 @@ tcaches_create(tsd_t *tsd, unsigned *r_ind)
 	tcaches_t *elm;
 
 	if (tcaches == NULL) {
-		tcaches = base_alloc(tsd_tsdn(tsd), sizeof(tcache_t *) *
-		    (MALLOCX_TCACHE_MAX+1));
+		tcaches = base_alloc(tsd_tsdn(tsd), b0get(), sizeof(tcache_t *)
+		    * (MALLOCX_TCACHE_MAX+1), CACHELINE);
 		if (tcaches == NULL)
 			return (true);
 	}
@@ -510,8 +510,8 @@ tcache_boot(tsdn_t *tsdn)
 	nhbins = size2index(tcache_maxclass) + 1;
 
 	/* Initialize tcache_bin_info. */
-	tcache_bin_info = (tcache_bin_info_t *)base_alloc(tsdn, nhbins *
-	    sizeof(tcache_bin_info_t));
+	tcache_bin_info = (tcache_bin_info_t *)base_alloc(tsdn, b0get(), nhbins
+	    * sizeof(tcache_bin_info_t), CACHELINE);
 	if (tcache_bin_info == NULL)
 		return (true);
 	stack_nelms = 0;
diff --git a/test/integration/extent.c b/test/integration/extent.c
index b0fc52d6..e2bd0054 100644
--- a/test/integration/extent.c
+++ b/test/integration/extent.c
@@ -71,7 +71,7 @@ extent_alloc(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
 	assert_ptr_eq(extent_hooks->alloc, extent_alloc, "Wrong hook function");
 	did_alloc = true;
 	return (old_hooks->alloc(old_hooks, new_addr, size, alignment, zero,
-	    commit, arena_ind));
+	    commit, 0));
 }
 
 static bool
@@ -89,7 +89,7 @@ extent_dalloc(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	did_dalloc = true;
 	if (!do_dalloc)
 		return (true);
-	return (old_hooks->dalloc(old_hooks, addr, size, committed, arena_ind));
+	return (old_hooks->dalloc(old_hooks, addr, size, committed, 0));
 }
 
 static bool
@@ -105,8 +105,7 @@ extent_commit(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	    "extent_hooks should be same as pointer used to set hooks");
 	assert_ptr_eq(extent_hooks->commit, extent_commit,
 	    "Wrong hook function");
-	err = old_hooks->commit(old_hooks, addr, size, offset, length,
-	    arena_ind);
+	err = old_hooks->commit(old_hooks, addr, size, offset, length, 0);
 	did_commit = !err;
 	return (err);
 }
@@ -126,8 +125,7 @@ extent_decommit(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	    "Wrong hook function");
 	if (!do_decommit)
 		return (true);
-	err = old_hooks->decommit(old_hooks, addr, size, offset, length,
-	    arena_ind);
+	err = old_hooks->decommit(old_hooks, addr, size, offset, length, 0);
 	did_decommit = !err;
 	return (err);
 }
@@ -146,8 +144,7 @@ extent_purge_lazy(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	    "Wrong hook function");
 	did_purge_lazy = true;
 	return (old_hooks->purge_lazy == NULL ||
-	    old_hooks->purge_lazy(old_hooks, addr, size, offset, length,
-	    arena_ind));
+	    old_hooks->purge_lazy(old_hooks, addr, size, offset, length, 0));
 }
 
 static bool
@@ -164,8 +161,7 @@ extent_purge_forced(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	    "Wrong hook function");
 	did_purge_forced = true;
 	return (old_hooks->purge_forced == NULL ||
-	    old_hooks->purge_forced(old_hooks, addr, size, offset, length,
-	    arena_ind));
+	    old_hooks->purge_forced(old_hooks, addr, size, offset, length, 0));
 }
 
 static bool
@@ -183,7 +179,7 @@ extent_split(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	assert_ptr_eq(extent_hooks->split, extent_split, "Wrong hook function");
 	tried_split = true;
 	err = (old_hooks->split == NULL || old_hooks->split(old_hooks, addr,
-	    size, size_a, size_b, committed, arena_ind));
+	    size, size_a, size_b, committed, 0));
 	did_split = !err;
 	return (err);
 }
@@ -202,51 +198,23 @@ extent_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
 	    "extent_hooks should be same as pointer used to set hooks");
 	assert_ptr_eq(extent_hooks->merge, extent_merge, "Wrong hook function");
 	err = (old_hooks->merge == NULL || old_hooks->merge(old_hooks, addr_a,
-	    size_a, addr_b, size_b, committed, arena_ind));
+	    size_a, addr_b, size_b, committed, 0));
 	did_merge = !err;
 	return (err);
 }
 
-TEST_BEGIN(test_extent)
+static void
+test_extent_body(unsigned arena_ind)
 {
 	void *p;
-	size_t old_size, new_size, large0, large1, large2, sz;
-	unsigned arena_ind;
+	size_t large0, large1, large2, sz;
+	size_t purge_mib[3];
+	size_t purge_miblen;
 	int flags;
-	size_t hooks_mib[3], purge_mib[3];
-	size_t hooks_miblen, purge_miblen;
 	bool xallocx_success_a, xallocx_success_b, xallocx_success_c;
 
-	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.extend", (void *)&arena_ind, &sz, NULL, 0),
-	    0, "Unexpected mallctl() failure");
 	flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
 
-	/* Install custom extent hooks. */
-	hooks_miblen = sizeof(hooks_mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib("arena.0.extent_hooks", hooks_mib,
-	    &hooks_miblen), 0, "Unexpected mallctlnametomib() failure");
-	hooks_mib[1] = (size_t)arena_ind;
-	old_size = sizeof(extent_hooks_t *);
-	new_size = sizeof(extent_hooks_t *);
-	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks,
-	    &old_size, (void *)&new_hooks, new_size), 0,
-	    "Unexpected extent_hooks error");
-	orig_hooks = old_hooks;
-	assert_ptr_ne(old_hooks->alloc, extent_alloc, "Unexpected alloc error");
-	assert_ptr_ne(old_hooks->dalloc, extent_dalloc,
-	    "Unexpected dalloc error");
-	assert_ptr_ne(old_hooks->commit, extent_commit,
-	    "Unexpected commit error");
-	assert_ptr_ne(old_hooks->decommit, extent_decommit,
-	    "Unexpected decommit error");
-	assert_ptr_ne(old_hooks->purge_lazy, extent_purge_lazy,
-	    "Unexpected purge_lazy error");
-	assert_ptr_ne(old_hooks->purge_forced, extent_purge_forced,
-	    "Unexpected purge_forced error");
-	assert_ptr_ne(old_hooks->split, extent_split, "Unexpected split error");
-	assert_ptr_ne(old_hooks->merge, extent_merge, "Unexpected merge error");
-
 	/* Get large size classes. */
 	sz = sizeof(size_t);
 	assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL,
@@ -314,6 +282,45 @@ TEST_BEGIN(test_extent)
 	p = mallocx(42, flags);
 	assert_ptr_not_null(p, "Unexpected mallocx() error");
 	dallocx(p, flags);
+}
+
+TEST_BEGIN(test_extent_manual_hook)
+{
+	unsigned arena_ind;
+	size_t old_size, new_size, sz;
+	size_t hooks_mib[3];
+	size_t hooks_miblen;
+
+	sz = sizeof(unsigned);
+	assert_d_eq(mallctl("arenas.extend", (void *)&arena_ind, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
+
+	/* Install custom extent hooks. */
+	hooks_miblen = sizeof(hooks_mib)/sizeof(size_t);
+	assert_d_eq(mallctlnametomib("arena.0.extent_hooks", hooks_mib,
+	    &hooks_miblen), 0, "Unexpected mallctlnametomib() failure");
+	hooks_mib[1] = (size_t)arena_ind;
+	old_size = sizeof(extent_hooks_t *);
+	new_size = sizeof(extent_hooks_t *);
+	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks,
+	    &old_size, (void *)&new_hooks, new_size), 0,
+	    "Unexpected extent_hooks error");
+	orig_hooks = old_hooks;
+	assert_ptr_ne(old_hooks->alloc, extent_alloc, "Unexpected alloc error");
+	assert_ptr_ne(old_hooks->dalloc, extent_dalloc,
+	    "Unexpected dalloc error");
+	assert_ptr_ne(old_hooks->commit, extent_commit,
+	    "Unexpected commit error");
+	assert_ptr_ne(old_hooks->decommit, extent_decommit,
+	    "Unexpected decommit error");
+	assert_ptr_ne(old_hooks->purge_lazy, extent_purge_lazy,
+	    "Unexpected purge_lazy error");
+	assert_ptr_ne(old_hooks->purge_forced, extent_purge_forced,
+	    "Unexpected purge_forced error");
+	assert_ptr_ne(old_hooks->split, extent_split, "Unexpected split error");
+	assert_ptr_ne(old_hooks->merge, extent_merge, "Unexpected merge error");
+
+	test_extent_body(arena_ind);
 
 	/* Restore extent hooks. */
 	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, NULL, NULL,
@@ -340,9 +347,25 @@ TEST_BEGIN(test_extent)
 }
 TEST_END
 
+TEST_BEGIN(test_extent_auto_hook)
+{
+	unsigned arena_ind;
+	size_t new_size, sz;
+
+	sz = sizeof(unsigned);
+	new_size = sizeof(extent_hooks_t *);
+	assert_d_eq(mallctl("arenas.extend", (void *)&arena_ind, &sz,
+	    (void *)&new_hooks, new_size), 0, "Unexpected mallctl() failure");
+
+	test_extent_body(arena_ind);
+}
+TEST_END
+
 int
 main(void)
 {
 
-	return (test(test_extent));
+	return (test(
+	    test_extent_manual_hook,
+	    test_extent_auto_hook));
 }
diff --git a/test/unit/base.c b/test/unit/base.c
new file mode 100644
index 00000000..6a082a5e
--- /dev/null
+++ b/test/unit/base.c
@@ -0,0 +1,274 @@
+#include "test/jemalloc_test.h"
+
+static void	*extent_alloc_hook(extent_hooks_t *extent_hooks, void *new_addr,
+    size_t size, size_t alignment, bool *zero, bool *commit,
+    unsigned arena_ind);
+static bool	extent_dalloc_hook(extent_hooks_t *extent_hooks, void *addr,
+    size_t size, bool committed, unsigned arena_ind);
+static bool	extent_decommit_hook(extent_hooks_t *extent_hooks, void *addr,
+    size_t size, size_t offset, size_t length, unsigned arena_ind);
+static bool	extent_purge_lazy_hook(extent_hooks_t *extent_hooks, void *addr,
+    size_t size, size_t offset, size_t length, unsigned arena_ind);
+static bool	extent_purge_forced_hook(extent_hooks_t *extent_hooks,
+    void *addr, size_t size, size_t offset, size_t length, unsigned arena_ind);
+
+static extent_hooks_t hooks_not_null = {
+	extent_alloc_hook,
+	extent_dalloc_hook,
+	NULL, /* commit */
+	extent_decommit_hook,
+	extent_purge_lazy_hook,
+	extent_purge_forced_hook,
+	NULL, /* split */
+	NULL /* merge */
+};
+
+static extent_hooks_t hooks_null = {
+	extent_alloc_hook,
+	NULL, /* dalloc */
+	NULL, /* commit */
+	NULL, /* decommit */
+	NULL, /* purge_lazy */
+	NULL, /* purge_forced */
+	NULL, /* split */
+	NULL /* merge */
+};
+
+static bool	did_alloc;
+static bool	did_dalloc;
+static bool	did_decommit;
+static bool	did_purge_lazy;
+static bool	did_purge_forced;
+
+#if 0
+#  define TRACE_HOOK(fmt, ...) malloc_printf(fmt, __VA_ARGS__)
+#else
+#  define TRACE_HOOK(fmt, ...)
+#endif
+
+static void *
+extent_alloc_hook(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
+    size_t alignment, bool *zero, bool *commit, unsigned arena_ind)
+{
+
+	TRACE_HOOK("%s(extent_hooks=%p, new_addr=%p, size=%zu, alignment=%zu, "
+	    "*zero=%s, *commit=%s, arena_ind=%u)\n", __func__, extent_hooks,
+	    new_addr, size, alignment, *zero ?  "true" : "false", *commit ?
+	    "true" : "false", arena_ind);
+	did_alloc = true;
+	return (extent_hooks_default.alloc(
+	    (extent_hooks_t *)&extent_hooks_default, new_addr, size, alignment,
+	    zero, commit, 0));
+}
+
+static bool
+extent_dalloc_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    bool committed, unsigned arena_ind)
+{
+
+	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, committed=%s, "
+	    "arena_ind=%u)\n", __func__, extent_hooks, addr, size, committed ?
+	    "true" : "false", arena_ind);
+	did_dalloc = true;
+	return (true); /* Cause cascade. */
+}
+
+static bool
+extent_decommit_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    size_t offset, size_t length, unsigned arena_ind)
+{
+
+	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
+	    "length=%zu, arena_ind=%u)\n", __func__, extent_hooks, addr, size,
+	    offset, length, arena_ind);
+	did_decommit = true;
+	return (true); /* Cause cascade. */
+}
+
+static bool
+extent_purge_lazy_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    size_t offset, size_t length, unsigned arena_ind)
+{
+
+	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
+	    "length=%zu arena_ind=%u)\n", __func__, extent_hooks, addr, size,
+	    offset, length, arena_ind);
+	did_purge_lazy = true;
+	return (true); /* Cause cascade. */
+}
+
+static bool
+extent_purge_forced_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    size_t offset, size_t length, unsigned arena_ind)
+{
+
+	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
+	    "length=%zu arena_ind=%u)\n", __func__, extent_hooks, addr, size,
+	    offset, length, arena_ind);
+	did_purge_forced = true;
+	return (true); /* Cause cascade. */
+}
+
+TEST_BEGIN(test_base_hooks_default)
+{
+	tsdn_t *tsdn;
+	base_t *base;
+	size_t allocated0, allocated1, resident, mapped;
+
+	tsdn = tsdn_fetch();
+	base = base_new(tsdn, 0, (extent_hooks_t *)&extent_hooks_default);
+
+	base_stats_get(tsdn, base, &allocated0, &resident, &mapped);
+	assert_zu_ge(allocated0, sizeof(base_t),
+	    "Base header should count as allocated");
+
+	assert_ptr_not_null(base_alloc(tsdn, base, 42, 1),
+	    "Unexpected base_alloc() failure");
+
+	base_stats_get(tsdn, base, &allocated1, &resident, &mapped);
+	assert_zu_ge(allocated1 - allocated0, 42,
+	    "At least 42 bytes were allocated by base_alloc()");
+
+	base_delete(base);
+}
+TEST_END
+
+TEST_BEGIN(test_base_hooks_null)
+{
+	tsdn_t *tsdn;
+	base_t *base;
+	size_t allocated0, allocated1, resident, mapped;
+
+	tsdn = tsdn_fetch();
+	base = base_new(tsdn, 0, (extent_hooks_t *)&hooks_null);
+	assert_ptr_not_null(base, "Unexpected base_new() failure");
+
+	base_stats_get(tsdn, base, &allocated0, &resident, &mapped);
+	assert_zu_ge(allocated0, sizeof(base_t),
+	    "Base header should count as allocated");
+
+	assert_ptr_not_null(base_alloc(tsdn, base, 42, 1),
+	    "Unexpected base_alloc() failure");
+
+	base_stats_get(tsdn, base, &allocated1, &resident, &mapped);
+	assert_zu_ge(allocated1 - allocated0, 42,
+	    "At least 42 bytes were allocated by base_alloc()");
+
+	base_delete(base);
+}
+TEST_END
+
+TEST_BEGIN(test_base_hooks_not_null)
+{
+	tsdn_t *tsdn;
+	base_t *base;
+	void *p, *q, *r, *r_exp;
+
+	tsdn = tsdn_fetch();
+	did_alloc = false;
+	base = base_new(tsdn, 0, (extent_hooks_t *)&hooks_not_null);
+	assert_ptr_not_null(base, "Unexpected base_new() failure");
+	assert_true(did_alloc, "Expected alloc hook call");
+
+	/*
+	 * Check for tight packing at specified alignment under simple
+	 * conditions.
+	 */
+	{
+		const size_t alignments[] = {
+			1,
+			QUANTUM,
+			QUANTUM << 1,
+			CACHELINE,
+			CACHELINE << 1,
+		};
+		unsigned i;
+
+		for (i = 0; i < sizeof(alignments) / sizeof(size_t); i++) {
+			size_t alignment = alignments[i];
+			size_t align_ceil = ALIGNMENT_CEILING(alignment,
+			    QUANTUM);
+			p = base_alloc(tsdn, base, 1, alignment);
+			assert_ptr_not_null(p,
+			    "Unexpected base_alloc() failure");
+			assert_ptr_eq(p,
+			    (void *)(ALIGNMENT_CEILING((uintptr_t)p,
+			    alignment)), "Expected quantum alignment");
+			q = base_alloc(tsdn, base, alignment, alignment);
+			assert_ptr_not_null(q,
+			    "Unexpected base_alloc() failure");
+			assert_ptr_eq((void *)((uintptr_t)p + align_ceil), q,
+			    "Minimal allocation should take up %zu bytes",
+			    align_ceil);
+			r = base_alloc(tsdn, base, 1, alignment);
+			assert_ptr_not_null(r,
+			    "Unexpected base_alloc() failure");
+			assert_ptr_eq((void *)((uintptr_t)q + align_ceil), r,
+			    "Minimal allocation should take up %zu bytes",
+			    align_ceil);
+		}
+	}
+
+	/*
+	 * Allocate an object that cannot fit in the first block, then verify
+	 * that the first block's remaining space is considered for subsequent
+	 * allocation.
+	 */
+	assert_zu_ge(extent_size_get(&base->blocks->extent), QUANTUM,
+	    "Remainder insufficient for test");
+	/* Use up all but one quantum of block. */
+	while (extent_size_get(&base->blocks->extent) > QUANTUM) {
+		p = base_alloc(tsdn, base, QUANTUM, QUANTUM);
+		assert_ptr_not_null(p, "Unexpected base_alloc() failure");
+	}
+	r_exp = extent_addr_get(&base->blocks->extent);
+	assert_zu_eq(base->extent_sn_next, 1, "One extant block expected");
+	q = base_alloc(tsdn, base, QUANTUM + 1, QUANTUM);
+	assert_ptr_not_null(q, "Unexpected base_alloc() failure");
+	assert_ptr_ne(q, r_exp, "Expected allocation from new block");
+	assert_zu_eq(base->extent_sn_next, 2, "Two extant blocks expected");
+	r = base_alloc(tsdn, base, QUANTUM, QUANTUM);
+	assert_ptr_not_null(r, "Unexpected base_alloc() failure");
+	assert_ptr_eq(r, r_exp, "Expected allocation from first block");
+	assert_zu_eq(base->extent_sn_next, 2, "Two extant blocks expected");
+
+	/*
+	 * Check for proper alignment support when normal blocks are too small.
+	 */
+	{
+		const size_t alignments[] = {
+			HUGEPAGE,
+			HUGEPAGE << 1
+		};
+		unsigned i;
+
+		for (i = 0; i < sizeof(alignments) / sizeof(size_t); i++) {
+			size_t alignment = alignments[i];
+			p = base_alloc(tsdn, base, QUANTUM, alignment);
+			assert_ptr_not_null(p,
+			    "Unexpected base_alloc() failure");
+			assert_ptr_eq(p,
+			    (void *)(ALIGNMENT_CEILING((uintptr_t)p,
+			    alignment)), "Expected %zu-byte alignment",
+			    alignment);
+		}
+	}
+
+	did_dalloc = did_decommit = did_purge_lazy = did_purge_forced = false;
+	base_delete(base);
+	assert_true(did_dalloc, "Expected dalloc hook call");
+	assert_true(did_decommit, "Expected decommit hook call");
+	assert_true(did_purge_lazy, "Expected purge_lazy hook call");
+	assert_true(did_purge_forced, "Expected purge_forced hook call");
+}
+TEST_END
+
+int
+main(void)
+{
+
+	return (test(
+	    test_base_hooks_default,
+	    test_base_hooks_null,
+	    test_base_hooks_not_null));
+}

From 5c5ff8d121e1f8389d18dfe22912739b99e893a8 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 4 Jan 2017 20:09:03 -0800
Subject: [PATCH 0570/2608] Fix arena_large_reset_stats_cancel().

Decrement ndalloc_large rather than incrementing, in order to cancel out
the increment in arena_large_dalloc_stats_update().
---
 src/arena.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arena.c b/src/arena.c
index d5e87ead..2c3cc5ca 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -260,7 +260,7 @@ arena_large_reset_stats_cancel(arena_t *arena, size_t usize)
 
 	cassert(config_stats);
 
-	arena->stats.ndalloc_large++;
+	arena->stats.ndalloc_large--;
 	arena->stats.lstats[hindex].ndalloc--;
 }
 

From 363629df88fc9d32cd4efbcc3c1a3eef1bbfe525 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 6 Jan 2017 18:56:02 -0800
Subject: [PATCH 0571/2608] Fix allocated_large stats with respect to sampled
 small allocations.

---
 src/arena.c | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 2c3cc5ca..ec8d4790 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -211,11 +211,15 @@ arena_nactive_sub(arena_t *arena, size_t sub_pages)
 static void
 arena_large_malloc_stats_update(arena_t *arena, size_t usize)
 {
-	szind_t index = size2index(usize);
-	szind_t hindex = (index >= NBINS) ? index - NBINS : 0;
+	szind_t index, hindex;
 
 	cassert(config_stats);
 
+	if (usize < LARGE_MINCLASS)
+		usize = LARGE_MINCLASS;
+	index = size2index(usize);
+	hindex = (index >= NBINS) ? index - NBINS : 0;
+
 	arena->stats.nmalloc_large++;
 	arena->stats.allocated_large += usize;
 	arena->stats.lstats[hindex].nmalloc++;
@@ -226,11 +230,15 @@ arena_large_malloc_stats_update(arena_t *arena, size_t usize)
 static void
 arena_large_malloc_stats_update_undo(arena_t *arena, size_t usize)
 {
-	szind_t index = size2index(usize);
-	szind_t hindex = (index >= NBINS) ? index - NBINS : 0;
+	szind_t index, hindex;
 
 	cassert(config_stats);
 
+	if (usize < LARGE_MINCLASS)
+		usize = LARGE_MINCLASS;
+	index = size2index(usize);
+	hindex = (index >= NBINS) ? index - NBINS : 0;
+
 	arena->stats.nmalloc_large--;
 	arena->stats.allocated_large -= usize;
 	arena->stats.lstats[hindex].nmalloc--;
@@ -241,11 +249,15 @@ arena_large_malloc_stats_update_undo(arena_t *arena, size_t usize)
 static void
 arena_large_dalloc_stats_update(arena_t *arena, size_t usize)
 {
-	szind_t index = size2index(usize);
-	szind_t hindex = (index >= NBINS) ? index - NBINS : 0;
+	szind_t index, hindex;
 
 	cassert(config_stats);
 
+	if (usize < LARGE_MINCLASS)
+		usize = LARGE_MINCLASS;
+	index = size2index(usize);
+	hindex = (index >= NBINS) ? index - NBINS : 0;
+
 	arena->stats.ndalloc_large++;
 	arena->stats.allocated_large -= usize;
 	arena->stats.lstats[hindex].ndalloc++;

From d0a3129b8809b9f049dd0a0f8e7921d79cddc104 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 6 Jan 2017 18:57:18 -0800
Subject: [PATCH 0572/2608] Fix locking in arena_dirty_count().

This was a latent bug, since the function is (intentionally) not used.
---
 src/arena.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/arena.c b/src/arena.c
index ec8d4790..3c31cc87 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -639,12 +639,14 @@ arena_dirty_count(tsdn_t *tsdn, arena_t *arena)
 	extent_t *extent;
 	size_t ndirty = 0;
 
-	malloc_mutex_assert_owner(tsdn, &arena->extents_mtx);
+	malloc_mutex_lock(tsdn, &arena->extents_mtx);
 
 	for (extent = qr_next(&arena->extents_dirty, qr_link); extent !=
 	    &arena->extents_dirty; extent = qr_next(extent, qr_link))
 		ndirty += extent_size_get(extent) >> LG_PAGE;
 
+	malloc_mutex_unlock(tsdn, &arena->extents_mtx);
+
 	return (ndirty);
 }
 

From 027ace8519eb4ed736568082cc7e96b3f9423de8 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 27 Dec 2016 19:16:41 -0800
Subject: [PATCH 0573/2608] Reindent.

---
 include/jemalloc/jemalloc_macros.h.in | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in
index 673ffd9b..f1a8049d 100644
--- a/include/jemalloc/jemalloc_macros.h.in
+++ b/include/jemalloc/jemalloc_macros.h.in
@@ -11,25 +11,25 @@
 #define	JEMALLOC_VERSION_NREV @jemalloc_version_nrev@
 #define	JEMALLOC_VERSION_GID "@jemalloc_version_gid@"
 
-#  define MALLOCX_LG_ALIGN(la)	((int)(la))
-#  if LG_SIZEOF_PTR == 2
-#    define MALLOCX_ALIGN(a)	((int)(ffs((int)(a))-1))
-#  else
-#    define MALLOCX_ALIGN(a)						\
-       ((int)(((size_t)(a) < (size_t)INT_MAX) ? ffs((int)(a))-1 :	\
-       ffs((int)(((size_t)(a))>>32))+31))
-#  endif
-#  define MALLOCX_ZERO	((int)0x40)
+#define	MALLOCX_LG_ALIGN(la)	((int)(la))
+#if LG_SIZEOF_PTR == 2
+#  define MALLOCX_ALIGN(a)	((int)(ffs((int)(a))-1))
+#else
+#  define MALLOCX_ALIGN(a)						\
+     ((int)(((size_t)(a) < (size_t)INT_MAX) ? ffs((int)(a))-1 :	\
+     ffs((int)(((size_t)(a))>>32))+31))
+#endif
+#define	MALLOCX_ZERO	((int)0x40)
 /*
  * Bias tcache index bits so that 0 encodes "automatic tcache management", and 1
  * encodes MALLOCX_TCACHE_NONE.
  */
-#  define MALLOCX_TCACHE(tc)	((int)(((tc)+2) << 8))
-#  define MALLOCX_TCACHE_NONE	MALLOCX_TCACHE(-1)
+#define	MALLOCX_TCACHE(tc)	((int)(((tc)+2) << 8))
+#define	MALLOCX_TCACHE_NONE	MALLOCX_TCACHE(-1)
 /*
  * Bias arena index bits so that 0 encodes "use an automatically chosen arena".
  */
-#  define MALLOCX_ARENA(a)	((((int)(a))+1) << 20)
+#define	MALLOCX_ARENA(a)	((((int)(a))+1) << 20)
 
 #if defined(__cplusplus) && defined(JEMALLOC_USE_CXX_THROW)
 #  define JEMALLOC_CXX_THROW throw()

From 3dc4e83ccb448436894fbbd0b46f126cff0c1416 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 3 Jan 2017 07:27:42 -0800
Subject: [PATCH 0574/2608] Add MALLCTL_ARENAS_ALL.

Add the MALLCTL_ARENAS_ALL cpp macro as a fixed index for use
in accessing the arena.<i>.{purge,decay,dss} and stats.arenas.<i>.*
mallctls, and deprecate access via the arenas.narenas index (to be
removed in 6.0.0).
---
 doc/jemalloc.xml.in                   |  37 +++--
 include/jemalloc/internal/util.h      |   4 +
 include/jemalloc/jemalloc_macros.h.in |  14 ++
 src/ctl.c                             | 198 ++++++++++++++++----------
 src/stats.c                           |   2 +-
 test/unit/mallctl.c                   |   8 ++
 6 files changed, 171 insertions(+), 92 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 5923481a..f6b50627 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -694,12 +694,22 @@ for (i = 0; i < nbins; i++) {
     any.  A name element encoded as <literal>&lt;i&gt;</literal> or
     <literal>&lt;j&gt;</literal> indicates an integer component, where the
     integer varies from 0 to some upper value that must be determined via
-    introspection.  In the case of <mallctl>stats.arenas.&lt;i&gt;.*</mallctl>,
-    <literal>&lt;i&gt;</literal> equal to <link
-    linkend="arenas.narenas"><mallctl>arenas.narenas</mallctl></link> can be
-    used to access the summation of statistics from all arenas.  Take special
-    note of the <link linkend="epoch"><mallctl>epoch</mallctl></link> mallctl,
-    which controls refreshing of cached dynamic statistics.</para>
+    introspection.  In the case of <mallctl>stats.arenas.&lt;i&gt;.*</mallctl>
+    and <mallctl>arena.&lt;i&gt;.{purge,decay,dss}</mallctl>,
+    <literal>&lt;i&gt;</literal> equal to
+    <constant>MALLCTL_ARENAS_ALL</constant> can be used to operate on all arenas
+    or access the summation of statistics from all arenas.  This constant can be
+    utilized either via <function>mallctlnametomib()</function> followed by
+    <function>mallctlbymib()</function>, or via code such as the following:
+    <programlisting language="C"><![CDATA[
+#define STRINGIFY_HELPER(x) #x
+#define STRINGIFY(x) STRINGIFY_HELPER(x)
+
+mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
+    NULL, NULL, NULL, 0);]]></programlisting>
+    Take special note of the
+    <link linkend="epoch"><mallctl>epoch</mallctl></link> mallctl, which
+    controls refreshing of cached dynamic statistics.</para>
 
     <variablelist>
       <varlistentry id="version">
@@ -1422,8 +1432,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
           <literal>--</literal>
         </term>
         <listitem><para>Purge all unused dirty pages for arena &lt;i&gt;, or for
-        all arenas if &lt;i&gt; equals <link
-        linkend="arenas.narenas"><mallctl>arenas.narenas</mallctl></link>.
+        all arenas if &lt;i&gt; equals <constant>MALLCTL_ARENAS_ALL</constant>.
         </para></listitem>
       </varlistentry>
 
@@ -1434,10 +1443,9 @@ malloc_conf = "xmalloc:true";]]></programlisting>
           <literal>--</literal>
         </term>
         <listitem><para>Trigger decay-based purging of unused dirty pages for
-        arena &lt;i&gt;, or for all arenas if &lt;i&gt; equals <link
-        linkend="arenas.narenas"><mallctl>arenas.narenas</mallctl></link>.
-        The proportion of unused dirty pages to be purged depends on the current
-        time; see <link
+        arena &lt;i&gt;, or for all arenas if &lt;i&gt; equals
+        <constant>MALLCTL_ARENAS_ALL</constant>.  The proportion of unused dirty
+        pages to be purged depends on the current time; see <link
         linkend="opt.decay_time"><mallctl>opt.decay_time</mallctl></link> for
         details.</para></listitem>
       </varlistentry>
@@ -1465,9 +1473,8 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         </term>
         <listitem><para>Set the precedence of dss allocation as related to mmap
         allocation for arena &lt;i&gt;, or for all arenas if &lt;i&gt; equals
-        <link
-        linkend="arenas.narenas"><mallctl>arenas.narenas</mallctl></link>.  See
-        <link linkend="opt.dss"><mallctl>opt.dss</mallctl></link> for supported
+        <constant>MALLCTL_ARENAS_ALL</constant>.  See <link
+        linkend="opt.dss"><mallctl>opt.dss</mallctl></link> for supported
         settings.</para></listitem>
       </varlistentry>
 
diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index d9f97416..592806dc 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -54,6 +54,10 @@
  */
 #define	JEMALLOC_ARG_CONCAT(...) __VA_ARGS__
 
+/* cpp macro definition stringification. */
+#define	STRINGIFY_HELPER(x) #x
+#define	STRINGIFY(x) STRINGIFY_HELPER(x)
+
 /*
  * Silence compiler warnings due to uninitialized values.  This is used
  * wherever the compiler fails to recognize that the variable is never used
diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in
index f1a8049d..ea41e2e8 100644
--- a/include/jemalloc/jemalloc_macros.h.in
+++ b/include/jemalloc/jemalloc_macros.h.in
@@ -31,6 +31,20 @@
  */
 #define	MALLOCX_ARENA(a)	((((int)(a))+1) << 20)
 
+/*
+ * Use as arena index in "arena.<i>.{purge,decay,dss}" and
+ * "stats.arenas.<i>.*" mallctl interfaces to select all arenas.  This
+ * definition is intentionally specified in raw decimal format to support
+ * cpp-based string concatenation, e.g.
+ *
+ *   #define STRINGIFY_HELPER(x) #x
+ *   #define STRINGIFY(x) STRINGIFY_HELPER(x)
+ *
+ *   mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", NULL, NULL, NULL,
+ *       0);
+ */
+#define	MALLCTL_ARENAS_ALL	4096
+
 #if defined(__cplusplus) && defined(JEMALLOC_USE_CXX_THROW)
 #  define JEMALLOC_CXX_THROW throw()
 #else
diff --git a/src/ctl.c b/src/ctl.c
index 964896ab..4e5511e4 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -443,6 +443,51 @@ static const ctl_named_node_t super_root_node[] = {
 
 /******************************************************************************/
 
+static unsigned
+stats_arenas_i2a_impl(size_t i, bool compat, bool validate)
+{
+	unsigned a;
+
+	cassert(config_stats);
+
+	switch (i) {
+	case MALLCTL_ARENAS_ALL:
+		a = 0;
+		break;
+	default:
+		if (compat && i == ctl_stats.narenas) {
+			/*
+			 * Provide deprecated backward compatibility for
+			 * accessing the merged stats at index narenas rather
+			 * than via MALLCTL_ARENAS_ALL.  This is scheduled for
+			 * removal in 6.0.0.
+			 */
+			a = 0;
+		} else if (validate && i >= ctl_stats.narenas)
+			a = UINT_MAX;
+		else {
+			/*
+			 * This function should never be called for an index
+			 * more than one past the range of indices that have
+			 * initialized stats.
+			 */
+			assert(i < ctl_stats.narenas || (!validate && i ==
+			    ctl_stats.narenas));
+			a = (unsigned)i + 1;
+		}
+		break;
+	}
+
+	return (a);
+}
+
+static ctl_arena_stats_t *
+stats_arenas_i(size_t i)
+{
+
+	return (&ctl_stats.arenas[stats_arenas_i2a_impl(i, true, false)]);
+}
+
 static void
 ctl_arena_clear(ctl_arena_stats_t *astats)
 {
@@ -552,8 +597,8 @@ ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats)
 static void
 ctl_arena_refresh(tsdn_t *tsdn, arena_t *arena, unsigned i)
 {
-	ctl_arena_stats_t *astats = &ctl_stats.arenas[i];
-	ctl_arena_stats_t *sstats = &ctl_stats.arenas[ctl_stats.narenas];
+	ctl_arena_stats_t *astats = stats_arenas_i(i);
+	ctl_arena_stats_t *sstats = stats_arenas_i(MALLCTL_ARENAS_ALL);
 
 	ctl_arena_clear(astats);
 	ctl_arena_stats_amerge(tsdn, astats, arena);
@@ -580,16 +625,6 @@ ctl_grow(tsdn_t *tsdn, extent_hooks_t *extent_hooks)
 	memcpy(astats, ctl_stats.arenas, (ctl_stats.narenas + 1) *
 	    sizeof(ctl_arena_stats_t));
 	memset(&astats[ctl_stats.narenas + 1], 0, sizeof(ctl_arena_stats_t));
-	/* Swap merged stats to their new location. */
-	{
-		ctl_arena_stats_t tstats;
-		memcpy(&tstats, &astats[ctl_stats.narenas],
-		    sizeof(ctl_arena_stats_t));
-		memcpy(&astats[ctl_stats.narenas],
-		    &astats[ctl_stats.narenas + 1], sizeof(ctl_arena_stats_t));
-		memcpy(&astats[ctl_stats.narenas + 1], &tstats,
-		    sizeof(ctl_arena_stats_t));
-	}
 	a0dalloc(ctl_stats.arenas);
 	ctl_stats.arenas = astats;
 	ctl_stats.narenas++;
@@ -601,40 +636,36 @@ static void
 ctl_refresh(tsdn_t *tsdn)
 {
 	unsigned i;
+	ctl_arena_stats_t *sstats = stats_arenas_i(MALLCTL_ARENAS_ALL);
 	VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas);
 
 	/*
 	 * Clear sum stats, since they will be merged into by
 	 * ctl_arena_refresh().
 	 */
-	ctl_arena_clear(&ctl_stats.arenas[ctl_stats.narenas]);
+	ctl_arena_clear(sstats);
 
 	for (i = 0; i < ctl_stats.narenas; i++)
 		tarenas[i] = arena_get(tsdn, i, false);
 
 	for (i = 0; i < ctl_stats.narenas; i++) {
+		ctl_arena_stats_t *astats = stats_arenas_i(i);
 		bool initialized = (tarenas[i] != NULL);
 
-		ctl_stats.arenas[i].initialized = initialized;
+		astats->initialized = initialized;
 		if (initialized)
 			ctl_arena_refresh(tsdn, tarenas[i], i);
 	}
 
 	if (config_stats) {
-		ctl_stats.allocated =
-		    ctl_stats.arenas[ctl_stats.narenas].allocated_small +
-		    ctl_stats.arenas[ctl_stats.narenas].astats.allocated_large;
-		ctl_stats.active =
-		    (ctl_stats.arenas[ctl_stats.narenas].pactive << LG_PAGE);
-		ctl_stats.metadata =
-		    ctl_stats.arenas[ctl_stats.narenas].astats.base +
-		    ctl_stats.arenas[ctl_stats.narenas].astats.internal;
-		ctl_stats.resident =
-		    ctl_stats.arenas[ctl_stats.narenas].astats.resident;
-		ctl_stats.mapped =
-		    ctl_stats.arenas[ctl_stats.narenas].astats.mapped;
-		ctl_stats.retained =
-		    ctl_stats.arenas[ctl_stats.narenas].astats.retained;
+		ctl_stats.allocated = sstats->allocated_small +
+		    sstats->astats.allocated_large;
+		ctl_stats.active = (sstats->pactive << LG_PAGE);
+		ctl_stats.metadata = sstats->astats.base +
+		    sstats->astats.internal;
+		ctl_stats.resident = sstats->astats.resident;
+		ctl_stats.mapped = sstats->astats.mapped;
+		ctl_stats.retained = sstats->astats.retained;
 	}
 
 	ctl_epoch++;
@@ -660,7 +691,7 @@ ctl_init(tsdn_t *tsdn)
 		}
 		memset(ctl_stats.arenas, 0, (ctl_stats.narenas + 1) *
 		    sizeof(ctl_arena_stats_t));
-		ctl_stats.arenas[ctl_stats.narenas].initialized = true;
+		stats_arenas_i(MALLCTL_ARENAS_ALL)->initialized = true;
 
 		ctl_epoch = 0;
 		ctl_refresh(tsdn);
@@ -1399,7 +1430,11 @@ arena_i_purge(tsdn_t *tsdn, unsigned arena_ind, bool all)
 	{
 		unsigned narenas = ctl_stats.narenas;
 
-		if (arena_ind == narenas) {
+		/*
+		 * Access via index narenas is deprecated, and scheduled for
+		 * removal in 6.0.0.
+		 */
+		if (arena_ind == MALLCTL_ARENAS_ALL || arena_ind == narenas) {
 			unsigned i;
 			VARIABLE_ARRAY(arena_t *, tarenas, narenas);
 
@@ -1482,6 +1517,10 @@ arena_i_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	assert(arena_ind >= opt_narenas);
 
 	arena = arena_get(tsd_tsdn(tsd), arena_ind, false);
+	if (arena == NULL) {
+		ret = EFAULT;
+		goto label_return;
+	}
 
 	arena_reset(tsd, arena);
 
@@ -1520,7 +1559,18 @@ arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 		}
 	}
 
-	if (arena_ind < ctl_stats.narenas) {
+	/*
+	 * Access via index narenas is deprecated, and scheduled for removal in
+	 * 6.0.0.
+	 */
+	if (arena_ind == MALLCTL_ARENAS_ALL || arena_ind == ctl_stats.narenas) {
+		if (dss_prec != dss_prec_limit &&
+		    extent_dss_prec_set(dss_prec)) {
+			ret = EFAULT;
+			goto label_return;
+		}
+		dss_prec_old = extent_dss_prec_get();
+	} else {
 		arena_t *arena = arena_get(tsd_tsdn(tsd), arena_ind, false);
 		if (arena == NULL || (dss_prec != dss_prec_limit &&
 		    arena_dss_prec_set(tsd_tsdn(tsd), arena, dss_prec))) {
@@ -1528,13 +1578,6 @@ arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 			goto label_return;
 		}
 		dss_prec_old = arena_dss_prec_get(tsd_tsdn(tsd), arena);
-	} else {
-		if (dss_prec != dss_prec_limit &&
-		    extent_dss_prec_set(dss_prec)) {
-			ret = EFAULT;
-			goto label_return;
-		}
-		dss_prec_old = extent_dss_prec_get();
 	}
 
 	dss = dss_prec_names[dss_prec_old];
@@ -1621,7 +1664,7 @@ arena_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
 	const ctl_named_node_t *ret;
 
 	malloc_mutex_lock(tsdn, &ctl_mtx);
-	if (i > ctl_stats.narenas) {
+	if (i > ctl_stats.narenas && i != MALLCTL_ARENAS_ALL) {
 		ret = NULL;
 		goto label_return;
 	}
@@ -1675,7 +1718,7 @@ arenas_initialized_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	}
 
 	for (i = 0; i < nread; i++)
-		((bool *)oldp)[i] = ctl_stats.arenas[i].initialized;
+		((bool *)oldp)[i] = stats_arenas_i(i)->initialized;
 
 label_return:
 	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
@@ -1896,64 +1939,65 @@ CTL_RO_CGEN(config_stats, stats_resident, ctl_stats.resident, size_t)
 CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats.mapped, size_t)
 CTL_RO_CGEN(config_stats, stats_retained, ctl_stats.retained, size_t)
 
-CTL_RO_GEN(stats_arenas_i_dss, ctl_stats.arenas[mib[2]].dss, const char *)
-CTL_RO_GEN(stats_arenas_i_decay_time, ctl_stats.arenas[mib[2]].decay_time,
+CTL_RO_GEN(stats_arenas_i_dss, stats_arenas_i(mib[2])->dss, const char *)
+CTL_RO_GEN(stats_arenas_i_decay_time, stats_arenas_i(mib[2])->decay_time,
     ssize_t)
-CTL_RO_GEN(stats_arenas_i_nthreads, ctl_stats.arenas[mib[2]].nthreads, unsigned)
-CTL_RO_GEN(stats_arenas_i_pactive, ctl_stats.arenas[mib[2]].pactive, size_t)
-CTL_RO_GEN(stats_arenas_i_pdirty, ctl_stats.arenas[mib[2]].pdirty, size_t)
+CTL_RO_GEN(stats_arenas_i_nthreads, stats_arenas_i(mib[2])->nthreads,
+    unsigned)
+CTL_RO_GEN(stats_arenas_i_pactive, stats_arenas_i(mib[2])->pactive, size_t)
+CTL_RO_GEN(stats_arenas_i_pdirty, stats_arenas_i(mib[2])->pdirty, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_mapped,
-    ctl_stats.arenas[mib[2]].astats.mapped, size_t)
+    stats_arenas_i(mib[2])->astats.mapped, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_retained,
-    ctl_stats.arenas[mib[2]].astats.retained, size_t)
+    stats_arenas_i(mib[2])->astats.retained, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_npurge,
-    ctl_stats.arenas[mib[2]].astats.npurge, uint64_t)
+    stats_arenas_i(mib[2])->astats.npurge, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_nmadvise,
-    ctl_stats.arenas[mib[2]].astats.nmadvise, uint64_t)
+    stats_arenas_i(mib[2])->astats.nmadvise, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_purged,
-    ctl_stats.arenas[mib[2]].astats.purged, uint64_t)
+    stats_arenas_i(mib[2])->astats.purged, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_base,
-    ctl_stats.arenas[mib[2]].astats.base, size_t)
+    stats_arenas_i(mib[2])->astats.base, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_internal,
-    ctl_stats.arenas[mib[2]].astats.internal, size_t)
+    stats_arenas_i(mib[2])->astats.internal, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_resident,
-    ctl_stats.arenas[mib[2]].astats.resident, size_t)
+    stats_arenas_i(mib[2])->astats.resident, size_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_small_allocated,
-    ctl_stats.arenas[mib[2]].allocated_small, size_t)
+    stats_arenas_i(mib[2])->allocated_small, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_small_nmalloc,
-    ctl_stats.arenas[mib[2]].nmalloc_small, uint64_t)
+    stats_arenas_i(mib[2])->nmalloc_small, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_small_ndalloc,
-    ctl_stats.arenas[mib[2]].ndalloc_small, uint64_t)
+    stats_arenas_i(mib[2])->ndalloc_small, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_small_nrequests,
-    ctl_stats.arenas[mib[2]].nrequests_small, uint64_t)
+    stats_arenas_i(mib[2])->nrequests_small, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_allocated,
-    ctl_stats.arenas[mib[2]].astats.allocated_large, size_t)
+    stats_arenas_i(mib[2])->astats.allocated_large, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_nmalloc,
-    ctl_stats.arenas[mib[2]].astats.nmalloc_large, uint64_t)
+    stats_arenas_i(mib[2])->astats.nmalloc_large, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_ndalloc,
-    ctl_stats.arenas[mib[2]].astats.ndalloc_large, uint64_t)
+    stats_arenas_i(mib[2])->astats.ndalloc_large, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_nrequests,
-    ctl_stats.arenas[mib[2]].astats.nmalloc_large, uint64_t) /* Intentional. */
+    stats_arenas_i(mib[2])->astats.nmalloc_large, uint64_t) /* Intentional. */
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nmalloc,
-    ctl_stats.arenas[mib[2]].bstats[mib[4]].nmalloc, uint64_t)
+    stats_arenas_i(mib[2])->bstats[mib[4]].nmalloc, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_ndalloc,
-    ctl_stats.arenas[mib[2]].bstats[mib[4]].ndalloc, uint64_t)
+    stats_arenas_i(mib[2])->bstats[mib[4]].ndalloc, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nrequests,
-    ctl_stats.arenas[mib[2]].bstats[mib[4]].nrequests, uint64_t)
+    stats_arenas_i(mib[2])->bstats[mib[4]].nrequests, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curregs,
-    ctl_stats.arenas[mib[2]].bstats[mib[4]].curregs, size_t)
+    stats_arenas_i(mib[2])->bstats[mib[4]].curregs, size_t)
 CTL_RO_CGEN(config_stats && config_tcache, stats_arenas_i_bins_j_nfills,
-    ctl_stats.arenas[mib[2]].bstats[mib[4]].nfills, uint64_t)
+    stats_arenas_i(mib[2])->bstats[mib[4]].nfills, uint64_t)
 CTL_RO_CGEN(config_stats && config_tcache, stats_arenas_i_bins_j_nflushes,
-    ctl_stats.arenas[mib[2]].bstats[mib[4]].nflushes, uint64_t)
+    stats_arenas_i(mib[2])->bstats[mib[4]].nflushes, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nslabs,
-    ctl_stats.arenas[mib[2]].bstats[mib[4]].nslabs, uint64_t)
+    stats_arenas_i(mib[2])->bstats[mib[4]].nslabs, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nreslabs,
-    ctl_stats.arenas[mib[2]].bstats[mib[4]].reslabs, uint64_t)
+    stats_arenas_i(mib[2])->bstats[mib[4]].reslabs, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curslabs,
-    ctl_stats.arenas[mib[2]].bstats[mib[4]].curslabs, size_t)
+    stats_arenas_i(mib[2])->bstats[mib[4]].curslabs, size_t)
 
 static const ctl_named_node_t *
 stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
@@ -1966,13 +2010,13 @@ stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
 }
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_nmalloc,
-    ctl_stats.arenas[mib[2]].lstats[mib[4]].nmalloc, uint64_t)
+    stats_arenas_i(mib[2])->lstats[mib[4]].nmalloc, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_ndalloc,
-    ctl_stats.arenas[mib[2]].lstats[mib[4]].ndalloc, uint64_t)
+    stats_arenas_i(mib[2])->lstats[mib[4]].ndalloc, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_nrequests,
-    ctl_stats.arenas[mib[2]].lstats[mib[4]].nrequests, uint64_t)
+    stats_arenas_i(mib[2])->lstats[mib[4]].nrequests, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_curlextents,
-    ctl_stats.arenas[mib[2]].lstats[mib[4]].curlextents, size_t)
+    stats_arenas_i(mib[2])->lstats[mib[4]].curlextents, size_t)
 
 static const ctl_named_node_t *
 stats_arenas_i_lextents_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
@@ -1987,10 +2031,12 @@ stats_arenas_i_lextents_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
 static const ctl_named_node_t *
 stats_arenas_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
 {
-	const ctl_named_node_t * ret;
+	const ctl_named_node_t *ret;
+	size_t a;
 
 	malloc_mutex_lock(tsdn, &ctl_mtx);
-	if (i > ctl_stats.narenas || !ctl_stats.arenas[i].initialized) {
+	a = stats_arenas_i2a_impl(i, true, true);
+	if (a == UINT_MAX || !ctl_stats.arenas[a].initialized) {
 		ret = NULL;
 		goto label_return;
 	}
diff --git a/src/stats.c b/src/stats.c
index 0a3deaaa..ad7d7ba4 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -841,7 +841,7 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 					    "\nMerged arenas stats:\n");
 				}
 				stats_arena_print(write_cb, cbopaque, json,
-				    narenas, bins, large);
+				    MALLCTL_ARENAS_ALL, bins, large);
 				if (json) {
 					malloc_cprintf(write_cb, cbopaque,
 					    "\t\t\t}%s\n", (ninitialized > 1) ?
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 5073c7b1..e0efdce1 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -402,6 +402,10 @@ TEST_BEGIN(test_arena_i_purge)
 	mib[1] = narenas;
 	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctlbymib() failure");
+
+	mib[1] = MALLCTL_ARENAS_ALL;
+	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
 }
 TEST_END
 
@@ -422,6 +426,10 @@ TEST_BEGIN(test_arena_i_decay)
 	mib[1] = narenas;
 	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctlbymib() failure");
+
+	mib[1] = MALLCTL_ARENAS_ALL;
+	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
 }
 TEST_END
 

From 0f04bb1d6fc27c7fa5f6268d045c78bdc600ff65 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 3 Jan 2017 08:21:29 -0800
Subject: [PATCH 0575/2608] Rename the arenas.extend mallctl to arenas.create.

---
 doc/jemalloc.xml.in               | 18 +++++++++---------
 include/jemalloc/internal/arena.h |  6 +++---
 src/ctl.c                         |  6 +++---
 src/jemalloc.c                    |  2 +-
 test/integration/MALLOCX_ARENA.c  |  4 ++--
 test/integration/extent.c         |  4 ++--
 test/integration/xallocx.c        |  2 +-
 test/unit/arena_reset.c           |  2 +-
 test/unit/mallctl.c               |  6 +++---
 test/unit/pack.c                  |  8 ++++----
 10 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index f6b50627..36aae37c 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1457,8 +1457,8 @@ malloc_conf = "xmalloc:true";]]></programlisting>
           <literal>--</literal>
         </term>
         <listitem><para>Discard all of the arena's extant allocations.  This
-        interface can only be used with arenas created via <link
-        linkend="arenas.extend"><mallctl>arenas.extend</mallctl></link>.  None
+        interface can only be used with arenas explicitly created via <link
+        linkend="arenas.create"><mallctl>arenas.create</mallctl></link>.  None
         of the arena's discarded/cached allocations may accessed afterward.  As
         part of this requirement, all thread caches which were used to
         allocate/deallocate in conjunction with the arena must be flushed
@@ -1504,8 +1504,8 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         arena &lt;i&gt;.  The functions must be capable of operating on all
         extant extents associated with arena &lt;i&gt;, usually by passing
         unknown extents to the replaced functions.  In practice, it is feasible
-        to control allocation for arenas created via <link
-        linkend="arenas.extend"><mallctl>arenas.extend</mallctl></link> such
+        to control allocation for arenas explicitly created via <link
+        linkend="arenas.create"><mallctl>arenas.create</mallctl></link> such
         that all extents originate from an application-supplied extent allocator
         (by specifying the custom extent hook functions during arena creation),
         but the automatically created arenas will have already created extents
@@ -1836,15 +1836,15 @@ struct extent_hooks_s {
         class.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="arenas.extend">
+      <varlistentry id="arenas.create">
         <term>
-          <mallctl>arenas.extend</mallctl>
+          <mallctl>arenas.create</mallctl>
           (<type>unsigned</type>, <type>extent_hooks_t *</type>)
           <literal>rw</literal>
         </term>
-        <listitem><para>Extend the array of arenas by appending a new arena with
-        optionally specified extent hooks, and returning the new arena
-        index.</para></listitem>
+        <listitem><para>Explicitly create a new arena outside the range of
+        automatically managed arenas, with optionally specified extent hooks,
+        and return the new arena index.</para></listitem>
       </varlistentry>
 
       <varlistentry id="prof.thread_active_init">
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index d889852e..929adbe9 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -148,9 +148,9 @@ struct arena_s {
 	 * atomic operations.  Each thread has two distinct assignments, one for
 	 * application-serving allocation, and the other for internal metadata
 	 * allocation.  Internal metadata must not be allocated from arenas
-	 * created via the arenas.extend mallctl, because the arena.<i>.reset
-	 * mallctl indiscriminately discards all allocations for the affected
-	 * arena.
+	 * explicitly created via the arenas.create mallctl, because the
+	 * arena.<i>.reset mallctl indiscriminately discards all allocations for
+	 * the affected arena.
 	 *
 	 *   0: Application allocation.
 	 *   1: Internal metadata allocation.
diff --git a/src/ctl.c b/src/ctl.c
index 4e5511e4..872da80f 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -133,7 +133,7 @@ CTL_PROTO(arenas_tcache_max)
 CTL_PROTO(arenas_nbins)
 CTL_PROTO(arenas_nhbins)
 CTL_PROTO(arenas_nlextents)
-CTL_PROTO(arenas_extend)
+CTL_PROTO(arenas_create)
 CTL_PROTO(prof_thread_active_init)
 CTL_PROTO(prof_active)
 CTL_PROTO(prof_dump)
@@ -323,7 +323,7 @@ static const ctl_named_node_t arenas_node[] = {
 	{NAME("bin"),		CHILD(indexed, arenas_bin)},
 	{NAME("nlextents"),	CTL(arenas_nlextents)},
 	{NAME("lextent"),	CHILD(indexed, arenas_lextent)},
-	{NAME("extend"),	CTL(arenas_extend)}
+	{NAME("create"),	CTL(arenas_create)}
 };
 
 static const ctl_named_node_t	prof_node[] = {
@@ -1780,7 +1780,7 @@ arenas_lextent_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
 }
 
 static int
-arenas_extend_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+arenas_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 2c49401f..2acab412 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -533,7 +533,7 @@ arena_tdata_get_hard(tsd_t *tsd, unsigned ind)
 	 * Copy to tdata array.  It's possible that the actual number of arenas
 	 * has increased since narenas_total_get() was called above, but that
 	 * causes no correctness issues unless two threads concurrently execute
-	 * the arenas.extend mallctl, which we trust mallctl synchronization to
+	 * the arenas.create mallctl, which we trust mallctl synchronization to
 	 * prevent.
 	 */
 
diff --git a/test/integration/MALLOCX_ARENA.c b/test/integration/MALLOCX_ARENA.c
index 910a096f..58032da8 100644
--- a/test/integration/MALLOCX_ARENA.c
+++ b/test/integration/MALLOCX_ARENA.c
@@ -19,8 +19,8 @@ thd_start(void *arg)
 	size_t sz;
 
 	sz = sizeof(arena_ind);
-	assert_d_eq(mallctl("arenas.extend", (void *)&arena_ind, &sz, NULL, 0),
-	    0, "Error in arenas.extend");
+	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+	    0, "Error in arenas.create");
 
 	if (thread_ind % 4 != 3) {
 		size_t mib[3];
diff --git a/test/integration/extent.c b/test/integration/extent.c
index e2bd0054..6be3b836 100644
--- a/test/integration/extent.c
+++ b/test/integration/extent.c
@@ -292,7 +292,7 @@ TEST_BEGIN(test_extent_manual_hook)
 	size_t hooks_miblen;
 
 	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.extend", (void *)&arena_ind, &sz, NULL, 0),
+	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
 
 	/* Install custom extent hooks. */
@@ -354,7 +354,7 @@ TEST_BEGIN(test_extent_auto_hook)
 
 	sz = sizeof(unsigned);
 	new_size = sizeof(extent_hooks_t *);
-	assert_d_eq(mallctl("arenas.extend", (void *)&arena_ind, &sz,
+	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
 	    (void *)&new_hooks, new_size), 0, "Unexpected mallctl() failure");
 
 	test_extent_body(arena_ind);
diff --git a/test/integration/xallocx.c b/test/integration/xallocx.c
index f6083728..d35ca39e 100644
--- a/test/integration/xallocx.c
+++ b/test/integration/xallocx.c
@@ -16,7 +16,7 @@ arena_ind(void)
 
 	if (ind == 0) {
 		size_t sz = sizeof(ind);
-		assert_d_eq(mallctl("arenas.extend", (void *)&ind, &sz, NULL,
+		assert_d_eq(mallctl("arenas.create", (void *)&ind, &sz, NULL,
 		    0), 0, "Unexpected mallctl failure creating arena");
 	}
 
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index 6c944b2e..3a1b30f5 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -90,7 +90,7 @@ TEST_BEGIN(test_arena_reset)
 	tsdn_t *tsdn;
 
 	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.extend", (void *)&arena_ind, &sz, NULL, 0),
+	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
 
 	flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index e0efdce1..95c27753 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -584,14 +584,14 @@ TEST_BEGIN(test_arenas_lextent_constants)
 }
 TEST_END
 
-TEST_BEGIN(test_arenas_extend)
+TEST_BEGIN(test_arenas_create)
 {
 	unsigned narenas_before, arena, narenas_after;
 	size_t sz = sizeof(unsigned);
 
 	assert_d_eq(mallctl("arenas.narenas", (void *)&narenas_before, &sz,
 	    NULL, 0), 0, "Unexpected mallctl() failure");
-	assert_d_eq(mallctl("arenas.extend", (void *)&arena, &sz, NULL, 0), 0,
+	assert_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 	assert_d_eq(mallctl("arenas.narenas", (void *)&narenas_after, &sz, NULL,
 	    0), 0, "Unexpected mallctl() failure");
@@ -647,6 +647,6 @@ main(void)
 	    test_arenas_constants,
 	    test_arenas_bin_constants,
 	    test_arenas_lextent_constants,
-	    test_arenas_extend,
+	    test_arenas_create,
 	    test_stats_arenas));
 }
diff --git a/test/unit/pack.c b/test/unit/pack.c
index 10df08e3..81ded4ec 100644
--- a/test/unit/pack.c
+++ b/test/unit/pack.c
@@ -68,14 +68,14 @@ nregs_per_run_compute(void)
 }
 
 static unsigned
-arenas_extend_mallctl(void)
+arenas_create_mallctl(void)
 {
 	unsigned arena_ind;
 	size_t sz;
 
 	sz = sizeof(arena_ind);
-	assert_d_eq(mallctl("arenas.extend", (void *)&arena_ind, &sz, NULL, 0),
-	    0, "Error in arenas.extend");
+	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+	    0, "Error in arenas.create");
 
 	return (arena_ind);
 }
@@ -95,7 +95,7 @@ arena_reset_mallctl(unsigned arena_ind)
 
 TEST_BEGIN(test_pack)
 {
-	unsigned arena_ind = arenas_extend_mallctl();
+	unsigned arena_ind = arenas_create_mallctl();
 	size_t nregs_per_run = nregs_per_run_compute();
 	size_t nregs = nregs_per_run * NSLABS;
 	VARIABLE_ARRAY(void *, ptrs, nregs);

From d778dd2afcc338cfd521c01382b8ed84a466aa1a Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 3 Jan 2017 12:40:54 -0800
Subject: [PATCH 0576/2608] Refactor ctl_stats_t.

Refactor ctl_stats_t to be a demand-zeroed non-growing data structure.
To keep the size from being onerous (~60 MiB) on 32-bit systems, convert
the arenas field to contain pointers rather than directly embedded
ctl_arena_stats_t elements.
---
 include/jemalloc/internal/ctl.h               |   2 +-
 .../jemalloc/internal/jemalloc_internal.h.in  |  21 ++-
 src/ctl.c                                     | 151 +++++++++++-------
 3 files changed, 107 insertions(+), 67 deletions(-)

diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index 4d4f3043..dfb1e8ef 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -61,7 +61,7 @@ struct ctl_stats_s {
 	size_t			mapped;
 	size_t			retained;
 	unsigned		narenas;
-	ctl_arena_stats_t	*arenas;	/* (narenas + 1) elements. */
+	ctl_arena_stats_t	*arenas[1 << MALLOCX_ARENA_BITS];
 };
 
 #endif /* JEMALLOC_H_STRUCTS */
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 11a27366..991c541f 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -208,11 +208,18 @@ typedef unsigned szind_t;
  *
  * aaaaaaaa aaaatttt tttttttt 0znnnnnn
  */
-#define	MALLOCX_ARENA_MASK	((int)~0xfffff)
-#define	MALLOCX_ARENA_MAX	0xffe
-#define	MALLOCX_TCACHE_MASK	((int)~0xfff000ffU)
-#define	MALLOCX_TCACHE_MAX	0xffd
-#define	MALLOCX_LG_ALIGN_MASK	((int)0x3f)
+#define	MALLOCX_ARENA_BITS	12
+#define	MALLOCX_TCACHE_BITS	12
+#define	MALLOCX_LG_ALIGN_BITS	6
+#define	MALLOCX_ARENA_SHIFT	20
+#define	MALLOCX_TCACHE_SHIFT	8
+#define	MALLOCX_ARENA_MASK \
+    (((1 << MALLOCX_ARENA_BITS) - 1) << MALLOCX_ARENA_SHIFT)
+#define	MALLOCX_ARENA_MAX	((1 << MALLOCX_ARENA_BITS) - 2)
+#define	MALLOCX_TCACHE_MASK \
+    (((1 << MALLOCX_TCACHE_BITS) - 1) << MALLOCX_TCACHE_SHIFT)
+#define	MALLOCX_TCACHE_MAX	((1 << MALLOCX_TCACHE_BITS) - 3)
+#define	MALLOCX_LG_ALIGN_MASK	((1 << MALLOCX_LG_ALIGN_BITS) - 1)
 /* Use MALLOCX_ALIGN_GET() if alignment may not be specified in flags. */
 #define	MALLOCX_ALIGN_GET_SPECIFIED(flags)				\
     (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK))
@@ -222,9 +229,9 @@ typedef unsigned szind_t;
     ((bool)(flags & MALLOCX_ZERO))
 
 #define	MALLOCX_TCACHE_GET(flags)					\
-    (((unsigned)((flags & MALLOCX_TCACHE_MASK) >> 8)) - 2)
+    (((unsigned)((flags & MALLOCX_TCACHE_MASK) >> MALLOCX_TCACHE_SHIFT)) - 2)
 #define	MALLOCX_ARENA_GET(flags)					\
-    (((unsigned)(((unsigned)flags) >> 20)) - 1)
+    (((unsigned)(((unsigned)flags) >> MALLOCX_ARENA_SHIFT)) - 1)
 
 /* Smallest size class to support. */
 #define	TINY_MIN		(1U << LG_TINY_MIN)
diff --git a/src/ctl.c b/src/ctl.c
index 872da80f..d5b384c0 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -6,12 +6,12 @@
 
 /*
  * ctl_mtx protects the following:
- * - ctl_stats.*
+ * - ctl_stats->*
  */
 static malloc_mutex_t	ctl_mtx;
 static bool		ctl_initialized;
 static uint64_t		ctl_epoch;
-static ctl_stats_t	ctl_stats;
+static ctl_stats_t	*ctl_stats;
 
 /******************************************************************************/
 /* Helpers for named and indexed nodes. */
@@ -455,7 +455,7 @@ stats_arenas_i2a_impl(size_t i, bool compat, bool validate)
 		a = 0;
 		break;
 	default:
-		if (compat && i == ctl_stats.narenas) {
+		if (compat && i == ctl_stats->narenas) {
 			/*
 			 * Provide deprecated backward compatibility for
 			 * accessing the merged stats at index narenas rather
@@ -463,7 +463,7 @@ stats_arenas_i2a_impl(size_t i, bool compat, bool validate)
 			 * removal in 6.0.0.
 			 */
 			a = 0;
-		} else if (validate && i >= ctl_stats.narenas)
+		} else if (validate && i >= ctl_stats->narenas)
 			a = UINT_MAX;
 		else {
 			/*
@@ -471,8 +471,8 @@ stats_arenas_i2a_impl(size_t i, bool compat, bool validate)
 			 * more than one past the range of indices that have
 			 * initialized stats.
 			 */
-			assert(i < ctl_stats.narenas || (!validate && i ==
-			    ctl_stats.narenas));
+			assert(i < ctl_stats->narenas || (!validate && i ==
+			    ctl_stats->narenas));
 			a = (unsigned)i + 1;
 		}
 		break;
@@ -481,11 +481,32 @@ stats_arenas_i2a_impl(size_t i, bool compat, bool validate)
 	return (a);
 }
 
+static ctl_arena_stats_t *
+stats_arenas_i_impl(tsdn_t *tsdn, size_t i, bool compat, bool init)
+{
+	ctl_arena_stats_t *ret;
+
+	assert(!compat || !init);
+
+	ret = ctl_stats->arenas[stats_arenas_i2a_impl(i, compat, false)];
+	if (init && ret == NULL) {
+		ret = (ctl_arena_stats_t *)base_alloc(tsdn, b0get(),
+		    sizeof(ctl_arena_stats_t), QUANTUM);
+		if (ret == NULL)
+			return (NULL);
+		ctl_stats->arenas[stats_arenas_i2a_impl(i, compat, false)] =
+		    ret;
+	}
+
+	return (ret);
+}
+
 static ctl_arena_stats_t *
 stats_arenas_i(size_t i)
 {
-
-	return (&ctl_stats.arenas[stats_arenas_i2a_impl(i, true, false)]);
+	ctl_arena_stats_t *ret = stats_arenas_i_impl(TSDN_NULL, i, true, false);
+	assert(ret != NULL);
+	return (ret);
 }
 
 static void
@@ -609,25 +630,15 @@ ctl_arena_refresh(tsdn_t *tsdn, arena_t *arena, unsigned i)
 static bool
 ctl_grow(tsdn_t *tsdn, extent_hooks_t *extent_hooks)
 {
-	ctl_arena_stats_t *astats;
+
+	/* Trigger stats allocation. */
+	if (stats_arenas_i_impl(tsdn, ctl_stats->narenas, false, true) == NULL)
+		return (true);
 
 	/* Initialize new arena. */
-	if (arena_init(tsdn, ctl_stats.narenas, extent_hooks) == NULL)
+	if (arena_init(tsdn, ctl_stats->narenas, extent_hooks) == NULL)
 		return (true);
-
-	/* Allocate extended arena stats. */
-	astats = (ctl_arena_stats_t *)a0malloc((ctl_stats.narenas + 2) *
-	    sizeof(ctl_arena_stats_t));
-	if (astats == NULL)
-		return (true);
-
-	/* Initialize the new astats element. */
-	memcpy(astats, ctl_stats.arenas, (ctl_stats.narenas + 1) *
-	    sizeof(ctl_arena_stats_t));
-	memset(&astats[ctl_stats.narenas + 1], 0, sizeof(ctl_arena_stats_t));
-	a0dalloc(ctl_stats.arenas);
-	ctl_stats.arenas = astats;
-	ctl_stats.narenas++;
+	ctl_stats->narenas++;
 
 	return (false);
 }
@@ -637,7 +648,7 @@ ctl_refresh(tsdn_t *tsdn)
 {
 	unsigned i;
 	ctl_arena_stats_t *sstats = stats_arenas_i(MALLCTL_ARENAS_ALL);
-	VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats.narenas);
+	VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats->narenas);
 
 	/*
 	 * Clear sum stats, since they will be merged into by
@@ -645,10 +656,10 @@ ctl_refresh(tsdn_t *tsdn)
 	 */
 	ctl_arena_clear(sstats);
 
-	for (i = 0; i < ctl_stats.narenas; i++)
+	for (i = 0; i < ctl_stats->narenas; i++)
 		tarenas[i] = arena_get(tsdn, i, false);
 
-	for (i = 0; i < ctl_stats.narenas; i++) {
+	for (i = 0; i < ctl_stats->narenas; i++) {
 		ctl_arena_stats_t *astats = stats_arenas_i(i);
 		bool initialized = (tarenas[i] != NULL);
 
@@ -658,14 +669,14 @@ ctl_refresh(tsdn_t *tsdn)
 	}
 
 	if (config_stats) {
-		ctl_stats.allocated = sstats->allocated_small +
+		ctl_stats->allocated = sstats->allocated_small +
 		    sstats->astats.allocated_large;
-		ctl_stats.active = (sstats->pactive << LG_PAGE);
-		ctl_stats.metadata = sstats->astats.base +
+		ctl_stats->active = (sstats->pactive << LG_PAGE);
+		ctl_stats->metadata = sstats->astats.base +
 		    sstats->astats.internal;
-		ctl_stats.resident = sstats->astats.resident;
-		ctl_stats.mapped = sstats->astats.mapped;
-		ctl_stats.retained = sstats->astats.retained;
+		ctl_stats->resident = sstats->astats.resident;
+		ctl_stats->mapped = sstats->astats.mapped;
+		ctl_stats->retained = sstats->astats.retained;
 	}
 
 	ctl_epoch++;
@@ -678,20 +689,41 @@ ctl_init(tsdn_t *tsdn)
 
 	malloc_mutex_lock(tsdn, &ctl_mtx);
 	if (!ctl_initialized) {
+		ctl_arena_stats_t *sstats;
+		unsigned i;
+
 		/*
-		 * Allocate space for one extra arena stats element, which
-		 * contains summed stats across all arenas.
+		 * Allocate demand-zeroed space for pointers to the full range
+		 * of supported arena indices.
 		 */
-		ctl_stats.narenas = narenas_total_get();
-		ctl_stats.arenas = (ctl_arena_stats_t *)a0malloc(
-		    (ctl_stats.narenas + 1) * sizeof(ctl_arena_stats_t));
-		if (ctl_stats.arenas == NULL) {
+		if (ctl_stats == NULL) {
+			ctl_stats = (ctl_stats_t *)base_alloc(tsdn, b0get(),
+			    sizeof(ctl_stats_t), QUANTUM);
+			if (ctl_stats == NULL) {
+				ret = true;
+				goto label_return;
+			}
+		}
+
+		/*
+		 * Allocate space for the current full range of arenas here
+		 * rather than doing it lazily elsewhere, in order to limit when
+		 * OOM-caused errors can occur.
+		 */
+		if ((sstats = stats_arenas_i_impl(tsdn, MALLCTL_ARENAS_ALL,
+		    false, true)) == NULL) {
 			ret = true;
 			goto label_return;
 		}
-		memset(ctl_stats.arenas, 0, (ctl_stats.narenas + 1) *
-		    sizeof(ctl_arena_stats_t));
-		stats_arenas_i(MALLCTL_ARENAS_ALL)->initialized = true;
+		sstats->initialized = true;
+
+		ctl_stats->narenas = narenas_total_get();
+		for (i = 0; i < ctl_stats->narenas; i++) {
+			if (stats_arenas_i_impl(tsdn, i, false, true) == NULL) {
+				ret = true;
+				goto label_return;
+			}
+		}
 
 		ctl_epoch = 0;
 		ctl_refresh(tsdn);
@@ -1428,7 +1460,7 @@ arena_i_purge(tsdn_t *tsdn, unsigned arena_ind, bool all)
 
 	malloc_mutex_lock(tsdn, &ctl_mtx);
 	{
-		unsigned narenas = ctl_stats.narenas;
+		unsigned narenas = ctl_stats->narenas;
 
 		/*
 		 * Access via index narenas is deprecated, and scheduled for
@@ -1511,7 +1543,7 @@ arena_i_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	arena_ind = (unsigned)mib[1];
 	if (config_debug) {
 		malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
-		assert(arena_ind < ctl_stats.narenas);
+		assert(arena_ind < ctl_stats->narenas);
 		malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	}
 	assert(arena_ind >= opt_narenas);
@@ -1563,7 +1595,8 @@ arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	 * Access via index narenas is deprecated, and scheduled for removal in
 	 * 6.0.0.
 	 */
-	if (arena_ind == MALLCTL_ARENAS_ALL || arena_ind == ctl_stats.narenas) {
+	if (arena_ind == MALLCTL_ARENAS_ALL || arena_ind ==
+	    ctl_stats->narenas) {
 		if (dss_prec != dss_prec_limit &&
 		    extent_dss_prec_set(dss_prec)) {
 			ret = EFAULT;
@@ -1664,7 +1697,7 @@ arena_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
 	const ctl_named_node_t *ret;
 
 	malloc_mutex_lock(tsdn, &ctl_mtx);
-	if (i > ctl_stats.narenas && i != MALLCTL_ARENAS_ALL) {
+	if (i > ctl_stats->narenas && i != MALLCTL_ARENAS_ALL) {
 		ret = NULL;
 		goto label_return;
 	}
@@ -1690,7 +1723,7 @@ arenas_narenas_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 		ret = EINVAL;
 		goto label_return;
 	}
-	narenas = ctl_stats.narenas;
+	narenas = ctl_stats->narenas;
 	READ(narenas, unsigned);
 
 	ret = 0;
@@ -1708,13 +1741,13 @@ arenas_initialized_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	READONLY();
-	if (*oldlenp != ctl_stats.narenas * sizeof(bool)) {
+	if (*oldlenp != ctl_stats->narenas * sizeof(bool)) {
 		ret = EINVAL;
-		nread = (*oldlenp < ctl_stats.narenas * sizeof(bool))
-		    ? (unsigned)(*oldlenp / sizeof(bool)) : ctl_stats.narenas;
+		nread = (*oldlenp < ctl_stats->narenas * sizeof(bool))
+		    ? (unsigned)(*oldlenp / sizeof(bool)) : ctl_stats->narenas;
 	} else {
 		ret = 0;
-		nread = ctl_stats.narenas;
+		nread = ctl_stats->narenas;
 	}
 
 	for (i = 0; i < nread; i++)
@@ -1795,7 +1828,7 @@ arenas_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 		ret = EAGAIN;
 		goto label_return;
 	}
-	narenas = ctl_stats.narenas - 1;
+	narenas = ctl_stats->narenas - 1;
 	READ(narenas, unsigned);
 
 	ret = 0;
@@ -1932,12 +1965,12 @@ CTL_RO_NL_CGEN(config_prof, lg_prof_sample, lg_prof_sample, size_t)
 
 /******************************************************************************/
 
-CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats.allocated, size_t)
-CTL_RO_CGEN(config_stats, stats_active, ctl_stats.active, size_t)
-CTL_RO_CGEN(config_stats, stats_metadata, ctl_stats.metadata, size_t)
-CTL_RO_CGEN(config_stats, stats_resident, ctl_stats.resident, size_t)
-CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats.mapped, size_t)
-CTL_RO_CGEN(config_stats, stats_retained, ctl_stats.retained, size_t)
+CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats->allocated, size_t)
+CTL_RO_CGEN(config_stats, stats_active, ctl_stats->active, size_t)
+CTL_RO_CGEN(config_stats, stats_metadata, ctl_stats->metadata, size_t)
+CTL_RO_CGEN(config_stats, stats_resident, ctl_stats->resident, size_t)
+CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats->mapped, size_t)
+CTL_RO_CGEN(config_stats, stats_retained, ctl_stats->retained, size_t)
 
 CTL_RO_GEN(stats_arenas_i_dss, stats_arenas_i(mib[2])->dss, const char *)
 CTL_RO_GEN(stats_arenas_i_decay_time, stats_arenas_i(mib[2])->decay_time,
@@ -2036,7 +2069,7 @@ stats_arenas_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
 
 	malloc_mutex_lock(tsdn, &ctl_mtx);
 	a = stats_arenas_i2a_impl(i, true, true);
-	if (a == UINT_MAX || !ctl_stats.arenas[a].initialized) {
+	if (a == UINT_MAX || !ctl_stats->arenas[a]->initialized) {
 		ret = NULL;
 		goto label_return;
 	}

From c0a05e6abaca7d23c2cc225abb1b59a1160632a0 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 3 Jan 2017 15:09:50 -0800
Subject: [PATCH 0577/2608] Move static ctl_epoch variable into ctl_stats_t (as
 epoch).

---
 include/jemalloc/internal/ctl.h | 1 +
 src/ctl.c                       | 7 +++----
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index dfb1e8ef..8550bf10 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -54,6 +54,7 @@ struct ctl_arena_stats_s {
 };
 
 struct ctl_stats_s {
+	uint64_t		epoch;
 	size_t			allocated;
 	size_t			active;
 	size_t			metadata;
diff --git a/src/ctl.c b/src/ctl.c
index d5b384c0..d39edbf8 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -10,7 +10,6 @@
  */
 static malloc_mutex_t	ctl_mtx;
 static bool		ctl_initialized;
-static uint64_t		ctl_epoch;
 static ctl_stats_t	*ctl_stats;
 
 /******************************************************************************/
@@ -679,7 +678,7 @@ ctl_refresh(tsdn_t *tsdn)
 		ctl_stats->retained = sstats->astats.retained;
 	}
 
-	ctl_epoch++;
+	ctl_stats->epoch++;
 }
 
 static bool
@@ -725,7 +724,7 @@ ctl_init(tsdn_t *tsdn)
 			}
 		}
 
-		ctl_epoch = 0;
+		ctl_stats->epoch = 0;
 		ctl_refresh(tsdn);
 		ctl_initialized = true;
 	}
@@ -1169,7 +1168,7 @@ epoch_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	WRITE(newval, uint64_t);
 	if (newp != NULL)
 		ctl_refresh(tsd_tsdn(tsd));
-	READ(ctl_epoch, uint64_t);
+	READ(ctl_stats->epoch, uint64_t);
 
 	ret = 0;
 label_return:

From 6edbedd9164d9b7682f7c3afb44e2b85c8eb52de Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 4 Jan 2017 07:51:49 -0800
Subject: [PATCH 0578/2608] Range-check mib[1] --> arena_ind casts.

---
 include/jemalloc/internal/ctl.h |  2 +-
 src/ctl.c                       | 29 ++++++++++++++++++++++-------
 2 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index 8550bf10..0aa82541 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -19,7 +19,7 @@ struct ctl_named_node_s {
 	struct ctl_node_s	node;
 	const char		*name;
 	/* If (nchildren == 0), this is a terminal node. */
-	unsigned		nchildren;
+	size_t			nchildren;
 	const			ctl_node_t *children;
 	int			(*ctl)(tsd_t *, const size_t *, size_t, void *,
 	    size_t *, void *, size_t);
diff --git a/src/ctl.c b/src/ctl.c
index d39edbf8..0e7a09da 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -899,7 +899,7 @@ ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 		assert(node->nchildren > 0);
 		if (ctl_named_node(node->children) != NULL) {
 			/* Children are named. */
-			if (node->nchildren <= (unsigned)mib[i]) {
+			if (node->nchildren <= mib[i]) {
 				ret = ENOENT;
 				goto label_return;
 			}
@@ -1010,6 +1010,14 @@ ctl_postfork_child(tsdn_t *tsdn)
 	}								\
 } while (0)
 
+#define	MIB_UNSIGNED(v, i) do {						\
+	if (mib[i] > UINT_MAX) {					\
+		ret = EFAULT;						\
+		goto label_return;					\
+	}								\
+	v = (unsigned)mib[i];						\
+} while (0)
+
 /*
  * There's a lot of code duplication in the following macros due to limitations
  * in how nested cpp macros are expanded.
@@ -1503,10 +1511,12 @@ arena_i_purge_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
+	unsigned arena_ind;
 
 	READONLY();
 	WRITEONLY();
-	arena_i_purge(tsd_tsdn(tsd), (unsigned)mib[1], true);
+	MIB_UNSIGNED(arena_ind, 1);
+	arena_i_purge(tsd_tsdn(tsd), arena_ind, true);
 
 	ret = 0;
 label_return:
@@ -1518,10 +1528,12 @@ arena_i_decay_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
+	unsigned arena_ind;
 
 	READONLY();
 	WRITEONLY();
-	arena_i_purge(tsd_tsdn(tsd), (unsigned)mib[1], false);
+	MIB_UNSIGNED(arena_ind, 1);
+	arena_i_purge(tsd_tsdn(tsd), arena_ind, false);
 
 	ret = 0;
 label_return:
@@ -1538,8 +1550,8 @@ arena_i_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	READONLY();
 	WRITEONLY();
+	MIB_UNSIGNED(arena_ind, 1);
 
-	arena_ind = (unsigned)mib[1];
 	if (config_debug) {
 		malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 		assert(arena_ind < ctl_stats->narenas);
@@ -1566,12 +1578,13 @@ arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 {
 	int ret;
 	const char *dss = NULL;
-	unsigned arena_ind = (unsigned)mib[1];
+	unsigned arena_ind;
 	dss_prec_t dss_prec_old = dss_prec_limit;
 	dss_prec_t dss_prec = dss_prec_limit;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	WRITE(dss, const char *);
+	MIB_UNSIGNED(arena_ind, 1);
 	if (dss != NULL) {
 		int i;
 		bool match = false;
@@ -1626,9 +1639,10 @@ arena_i_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
-	unsigned arena_ind = (unsigned)mib[1];
+	unsigned arena_ind;
 	arena_t *arena;
 
+	MIB_UNSIGNED(arena_ind, 1);
 	arena = arena_get(tsd_tsdn(tsd), arena_ind, false);
 	if (arena == NULL) {
 		ret = EFAULT;
@@ -1661,10 +1675,11 @@ arena_i_extent_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen)
 {
 	int ret;
-	unsigned arena_ind = (unsigned)mib[1];
+	unsigned arena_ind;
 	arena_t *arena;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
+	MIB_UNSIGNED(arena_ind, 1);
 	if (arena_ind < narenas_total_get() && (arena =
 	    arena_get(tsd_tsdn(tsd), arena_ind, false)) != NULL) {
 		if (newp != NULL) {

From dc2125cf95cb1d9370ac7375185d6420c84388b9 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 4 Jan 2017 10:21:53 -0800
Subject: [PATCH 0579/2608] Replace the arenas.initialized mallctl with
 arena.<i>.initialized .

---
 doc/jemalloc.xml.in | 27 ++++++++++++-----------
 src/ctl.c           | 54 +++++++++++++++++++++------------------------
 src/stats.c         | 12 ++++++----
 test/unit/mallctl.c | 49 +++++++++++++++++++++++++---------------
 4 files changed, 78 insertions(+), 64 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 36aae37c..f213a2c8 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1253,7 +1253,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         <listitem><para>Get or set the arena associated with the calling
         thread.  If the specified arena was not initialized beforehand (see the
         <link
-        linkend="arenas.initialized"><mallctl>arenas.initialized</mallctl></link>
+        linkend="arena.i.initialized"><mallctl>arena.i.initialized</mallctl></link>
         mallctl), it will be automatically initialized as a side effect of
         calling this interface.</para></listitem>
       </varlistentry>
@@ -1425,6 +1425,19 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         </para></listitem>
       </varlistentry>
 
+      <varlistentry id="arena.i.initialized">
+        <term>
+          <mallctl>arena.&lt;i&gt;.initialized</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Get whether the specified arena's statistics are
+        initialized (i.e. the arena was initialized prior to the current epoch).
+        This interface can also be nominally used to query whether the merged
+        statistics corresponding to <constant>MALLCTL_ARENAS_ALL</constant> are
+        initialized (always true).</para></listitem>
+      </varlistentry>
+
       <varlistentry id="arena.i.purge">
         <term>
           <mallctl>arena.&lt;i&gt;.purge</mallctl>
@@ -1715,18 +1728,6 @@ struct extent_hooks_s {
         <listitem><para>Current limit on number of arenas.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="arenas.initialized">
-        <term>
-          <mallctl>arenas.initialized</mallctl>
-          (<type>bool *</type>)
-          <literal>r-</literal>
-        </term>
-        <listitem><para>An array of <link
-        linkend="arenas.narenas"><mallctl>arenas.narenas</mallctl></link>
-        booleans.  Each boolean indicates whether the corresponding arena is
-        initialized.</para></listitem>
-      </varlistentry>
-
       <varlistentry id="arenas.decay_time">
         <term>
           <mallctl>arenas.decay_time</mallctl>
diff --git a/src/ctl.c b/src/ctl.c
index 0e7a09da..45e397b8 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -109,7 +109,7 @@ CTL_PROTO(opt_prof_accum)
 CTL_PROTO(tcache_create)
 CTL_PROTO(tcache_flush)
 CTL_PROTO(tcache_destroy)
-static void	arena_i_purge(tsdn_t *tsdn, unsigned arena_ind, bool all);
+CTL_PROTO(arena_i_initialized)
 CTL_PROTO(arena_i_purge)
 CTL_PROTO(arena_i_decay)
 CTL_PROTO(arena_i_reset)
@@ -124,7 +124,6 @@ INDEX_PROTO(arenas_bin_i)
 CTL_PROTO(arenas_lextent_i_size)
 INDEX_PROTO(arenas_lextent_i)
 CTL_PROTO(arenas_narenas)
-CTL_PROTO(arenas_initialized)
 CTL_PROTO(arenas_decay_time)
 CTL_PROTO(arenas_quantum)
 CTL_PROTO(arenas_page)
@@ -271,6 +270,7 @@ static const ctl_named_node_t	tcache_node[] = {
 };
 
 static const ctl_named_node_t arena_i_node[] = {
+	{NAME("initialized"),	CTL(arena_i_initialized)},
 	{NAME("purge"),		CTL(arena_i_purge)},
 	{NAME("decay"),		CTL(arena_i_decay)},
 	{NAME("reset"),		CTL(arena_i_reset)},
@@ -312,7 +312,6 @@ static const ctl_indexed_node_t arenas_lextent_node[] = {
 
 static const ctl_named_node_t arenas_node[] = {
 	{NAME("narenas"),	CTL(arenas_narenas)},
-	{NAME("initialized"),	CTL(arenas_initialized)},
 	{NAME("decay_time"),	CTL(arenas_decay_time)},
 	{NAME("quantum"),	CTL(arenas_quantum)},
 	{NAME("page"),		CTL(arenas_page)},
@@ -1461,6 +1460,29 @@ label_return:
 
 /******************************************************************************/
 
+static int
+arena_i_initialized_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen)
+{
+	int ret;
+	tsdn_t *tsdn = tsd_tsdn(tsd);
+	unsigned arena_ind;
+	bool initialized;
+
+	READONLY();
+	MIB_UNSIGNED(arena_ind, 1);
+
+	malloc_mutex_lock(tsdn, &ctl_mtx);
+	initialized = stats_arenas_i(arena_ind)->initialized;
+	malloc_mutex_unlock(tsdn, &ctl_mtx);
+
+	READ(initialized, bool);
+
+	ret = 0;
+label_return:
+	return (ret);
+}
+
 static void
 arena_i_purge(tsdn_t *tsdn, unsigned arena_ind, bool all)
 {
@@ -1746,32 +1768,6 @@ label_return:
 	return (ret);
 }
 
-static int
-arenas_initialized_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
-{
-	int ret;
-	unsigned nread, i;
-
-	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
-	READONLY();
-	if (*oldlenp != ctl_stats->narenas * sizeof(bool)) {
-		ret = EINVAL;
-		nread = (*oldlenp < ctl_stats->narenas * sizeof(bool))
-		    ? (unsigned)(*oldlenp / sizeof(bool)) : ctl_stats->narenas;
-	} else {
-		ret = 0;
-		nread = ctl_stats->narenas;
-	}
-
-	for (i = 0; i < nread; i++)
-		((bool *)oldp)[i] = stats_arenas_i(i)->initialized;
-
-label_return:
-	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
-	return (ret);
-}
-
 static int
 arenas_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
diff --git a/src/stats.c b/src/stats.c
index ad7d7ba4..4e09eb45 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -818,14 +818,18 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 
 		CTL_GET("arenas.narenas", &narenas, unsigned);
 		{
+			size_t mib[3];
+			size_t miblen = sizeof(mib) / sizeof(size_t);
+			size_t sz;
 			VARIABLE_ARRAY(bool, initialized, narenas);
-			size_t isz;
 			unsigned i, j, ninitialized;
 
-			isz = sizeof(bool) * narenas;
-			xmallctl("arenas.initialized", (void *)initialized,
-			    &isz, NULL, 0);
+			xmallctlnametomib("arena.0.initialized", mib, &miblen);
 			for (i = ninitialized = 0; i < narenas; i++) {
+				mib[1] = i;
+				sz = sizeof(bool);
+				xmallctlbymib(mib, miblen, &initialized[i], &sz,
+				    NULL, 0);
 				if (initialized[i])
 					ninitialized++;
 			}
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 95c27753..b3320788 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -354,6 +354,36 @@ TEST_BEGIN(test_thread_arena)
 }
 TEST_END
 
+TEST_BEGIN(test_arena_i_initialized)
+{
+	unsigned narenas, i;
+	size_t sz;
+	size_t mib[3];
+	size_t miblen = sizeof(mib) / sizeof(size_t);
+	bool initialized;
+
+	sz = sizeof(narenas);
+	assert_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
+
+	assert_d_eq(mallctlnametomib("arena.0.initialized", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	for (i = 0; i < narenas; i++) {
+		mib[1] = i;
+		sz = sizeof(initialized);
+		assert_d_eq(mallctlbymib(mib, miblen, &initialized, &sz, NULL,
+		    0), 0, "Unexpected mallctl() failure");
+	}
+
+	mib[1] = MALLCTL_ARENAS_ALL;
+	sz = sizeof(initialized);
+	assert_d_eq(mallctlbymib(mib, miblen, &initialized, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+	assert_true(initialized,
+	    "Merged arena statistics should always be initialized");
+}
+TEST_END
+
 TEST_BEGIN(test_arena_i_decay_time)
 {
 	ssize_t decay_time, orig_decay_time, prev_decay_time;
@@ -479,23 +509,6 @@ TEST_BEGIN(test_arena_i_dss)
 }
 TEST_END
 
-TEST_BEGIN(test_arenas_initialized)
-{
-	unsigned narenas;
-	size_t sz = sizeof(narenas);
-
-	assert_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
-	    0, "Unexpected mallctl() failure");
-	{
-		VARIABLE_ARRAY(bool, initialized, narenas);
-
-		sz = narenas * sizeof(bool);
-		assert_d_eq(mallctl("arenas.initialized", (void *)initialized,
-		    &sz, NULL, 0), 0, "Unexpected mallctl() failure");
-	}
-}
-TEST_END
-
 TEST_BEGIN(test_arenas_decay_time)
 {
 	ssize_t decay_time, orig_decay_time, prev_decay_time;
@@ -638,11 +651,11 @@ main(void)
 	    test_tcache_none,
 	    test_tcache,
 	    test_thread_arena,
+	    test_arena_i_initialized,
 	    test_arena_i_decay_time,
 	    test_arena_i_purge,
 	    test_arena_i_decay,
 	    test_arena_i_dss,
-	    test_arenas_initialized,
 	    test_arenas_decay_time,
 	    test_arenas_constants,
 	    test_arenas_bin_constants,

From 3f291d59ada15f2be84c80dac71e0ddf03908d15 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 6 Jan 2017 11:22:08 -0800
Subject: [PATCH 0580/2608] Refactor test extent hook code to be reusable.

Move test extent hook code from the extent integration test into a
header, and normalize the out-of-band controls and introspection.
Also refactor the base unit test to use the header.
---
 test/include/test/extent_hooks.h | 264 +++++++++++++++++++++++++++
 test/integration/extent.c        | 300 ++++++-------------------------
 test/unit/base.c                 | 150 +++++-----------
 3 files changed, 366 insertions(+), 348 deletions(-)
 create mode 100644 test/include/test/extent_hooks.h

diff --git a/test/include/test/extent_hooks.h b/test/include/test/extent_hooks.h
new file mode 100644
index 00000000..f50747d0
--- /dev/null
+++ b/test/include/test/extent_hooks.h
@@ -0,0 +1,264 @@
+/*
+ * Boilerplate code used for testing extent hooks via interception and
+ * passthrough.
+ */
+
+static void	*extent_alloc_hook(extent_hooks_t *extent_hooks, void *new_addr,
+    size_t size, size_t alignment, bool *zero, bool *commit,
+    unsigned arena_ind);
+static bool	extent_dalloc_hook(extent_hooks_t *extent_hooks, void *addr,
+    size_t size, bool committed, unsigned arena_ind);
+static bool	extent_commit_hook(extent_hooks_t *extent_hooks, void *addr,
+    size_t size, size_t offset, size_t length, unsigned arena_ind);
+static bool	extent_decommit_hook(extent_hooks_t *extent_hooks, void *addr,
+    size_t size, size_t offset, size_t length, unsigned arena_ind);
+static bool	extent_purge_lazy_hook(extent_hooks_t *extent_hooks, void *addr,
+    size_t size, size_t offset, size_t length, unsigned arena_ind);
+static bool	extent_purge_forced_hook(extent_hooks_t *extent_hooks,
+    void *addr, size_t size, size_t offset, size_t length, unsigned arena_ind);
+static bool	extent_split_hook(extent_hooks_t *extent_hooks, void *addr,
+    size_t size, size_t size_a, size_t size_b, bool committed,
+    unsigned arena_ind);
+static bool	extent_merge_hook(extent_hooks_t *extent_hooks, void *addr_a,
+    size_t size_a, void *addr_b, size_t size_b, bool committed,
+    unsigned arena_ind);
+
+static extent_hooks_t *default_hooks;
+static extent_hooks_t hooks = {
+	extent_alloc_hook,
+	extent_dalloc_hook,
+	extent_commit_hook,
+	extent_decommit_hook,
+	extent_purge_lazy_hook,
+	extent_purge_forced_hook,
+	extent_split_hook,
+	extent_merge_hook
+};
+
+/* Control whether hook functions pass calls through to default hooks. */
+static bool try_alloc = true;
+static bool try_dalloc = true;
+static bool try_commit = true;
+static bool try_decommit = true;
+static bool try_purge_lazy = true;
+static bool try_purge_forced = true;
+static bool try_split = true;
+static bool try_merge = true;
+
+/* Set to false prior to operations, then introspect after operations. */
+static bool called_alloc;
+static bool called_dalloc;
+static bool called_commit;
+static bool called_decommit;
+static bool called_purge_lazy;
+static bool called_purge_forced;
+static bool called_split;
+static bool called_merge;
+
+/* Set to false prior to operations, then introspect after operations. */
+static bool did_alloc;
+static bool did_dalloc;
+static bool did_commit;
+static bool did_decommit;
+static bool did_purge_lazy;
+static bool did_purge_forced;
+static bool did_split;
+static bool did_merge;
+
+#if 0
+#  define TRACE_HOOK(fmt, ...) malloc_printf(fmt, __VA_ARGS__)
+#else
+#  define TRACE_HOOK(fmt, ...)
+#endif
+
+static void *
+extent_alloc_hook(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
+    size_t alignment, bool *zero, bool *commit, unsigned arena_ind)
+{
+	void *ret;
+
+	TRACE_HOOK("%s(extent_hooks=%p, new_addr=%p, size=%zu, alignment=%zu, "
+	    "*zero=%s, *commit=%s, arena_ind=%u)\n", __func__, extent_hooks,
+	    new_addr, size, alignment, *zero ?  "true" : "false", *commit ?
+	    "true" : "false", arena_ind);
+	assert_ptr_eq(extent_hooks, &hooks,
+	    "extent_hooks should be same as pointer used to set hooks");
+	assert_ptr_eq(extent_hooks->alloc, extent_alloc_hook,
+	    "Wrong hook function");
+	called_alloc = true;
+	if (!try_alloc)
+		return (NULL);
+	ret = default_hooks->alloc(default_hooks, new_addr, size, alignment,
+	    zero, commit, 0);
+	did_alloc = (ret != NULL);
+	return (ret);
+}
+
+static bool
+extent_dalloc_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    bool committed, unsigned arena_ind)
+{
+	bool err;
+
+	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, committed=%s, "
+	    "arena_ind=%u)\n", __func__, extent_hooks, addr, size, committed ?
+	    "true" : "false", arena_ind);
+	assert_ptr_eq(extent_hooks, &hooks,
+	    "extent_hooks should be same as pointer used to set hooks");
+	assert_ptr_eq(extent_hooks->dalloc, extent_dalloc_hook,
+	    "Wrong hook function");
+	called_dalloc = true;
+	if (!try_dalloc)
+		return (true);
+	err = default_hooks->dalloc(default_hooks, addr, size, committed, 0);
+	did_dalloc = !err;
+	return (err);
+}
+
+static bool
+extent_commit_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    size_t offset, size_t length, unsigned arena_ind)
+{
+	bool err;
+
+	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
+	    "length=%zu, arena_ind=%u)\n", __func__, extent_hooks, addr, size,
+	    offset, length, arena_ind);
+	assert_ptr_eq(extent_hooks, &hooks,
+	    "extent_hooks should be same as pointer used to set hooks");
+	assert_ptr_eq(extent_hooks->commit, extent_commit_hook,
+	    "Wrong hook function");
+	called_commit = true;
+	if (!try_commit)
+		return (true);
+	err = default_hooks->commit(default_hooks, addr, size, offset, length,
+	    0);
+	did_commit = !err;
+	return (err);
+}
+
+static bool
+extent_decommit_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    size_t offset, size_t length, unsigned arena_ind)
+{
+	bool err;
+
+	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
+	    "length=%zu, arena_ind=%u)\n", __func__, extent_hooks, addr, size,
+	    offset, length, arena_ind);
+	assert_ptr_eq(extent_hooks, &hooks,
+	    "extent_hooks should be same as pointer used to set hooks");
+	assert_ptr_eq(extent_hooks->decommit, extent_decommit_hook,
+	    "Wrong hook function");
+	called_decommit = true;
+	if (!try_decommit)
+		return (true);
+	err = default_hooks->decommit(default_hooks, addr, size, offset, length,
+	    0);
+	did_decommit = !err;
+	return (err);
+}
+
+static bool
+extent_purge_lazy_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    size_t offset, size_t length, unsigned arena_ind)
+{
+	bool err;
+
+	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
+	    "length=%zu arena_ind=%u)\n", __func__, extent_hooks, addr, size,
+	    offset, length, arena_ind);
+	assert_ptr_eq(extent_hooks, &hooks,
+	    "extent_hooks should be same as pointer used to set hooks");
+	assert_ptr_eq(extent_hooks->purge_lazy, extent_purge_lazy_hook,
+	    "Wrong hook function");
+	called_purge_lazy = true;
+	if (!try_purge_lazy)
+		return (true);
+	err = default_hooks->purge_lazy == NULL ||
+	    default_hooks->purge_lazy(default_hooks, addr, size, offset, length,
+	    0);
+	did_purge_lazy = !err;
+	return (err);
+}
+
+static bool
+extent_purge_forced_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    size_t offset, size_t length, unsigned arena_ind)
+{
+	bool err;
+
+	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
+	    "length=%zu arena_ind=%u)\n", __func__, extent_hooks, addr, size,
+	    offset, length, arena_ind);
+	assert_ptr_eq(extent_hooks, &hooks,
+	    "extent_hooks should be same as pointer used to set hooks");
+	assert_ptr_eq(extent_hooks->purge_forced, extent_purge_forced_hook,
+	    "Wrong hook function");
+	called_purge_forced = true;
+	if (!try_purge_forced)
+		return (true);
+	err = default_hooks->purge_forced == NULL ||
+	    default_hooks->purge_forced(default_hooks, addr, size, offset,
+	    length, 0);
+	did_purge_forced = !err;
+	return (err);
+}
+
+static bool
+extent_split_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    size_t size_a, size_t size_b, bool committed, unsigned arena_ind)
+{
+	bool err;
+
+	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, size_a=%zu, "
+	    "size_b=%zu, committed=%s, arena_ind=%u)\n", __func__, extent_hooks,
+	    addr, size, size_a, size_b, committed ? "true" : "false",
+	    arena_ind);
+	assert_ptr_eq(extent_hooks, &hooks,
+	    "extent_hooks should be same as pointer used to set hooks");
+	assert_ptr_eq(extent_hooks->split, extent_split_hook,
+	    "Wrong hook function");
+	called_split = true;
+	if (!try_split)
+		return (true);
+	err = (default_hooks->split == NULL ||
+	    default_hooks->split(default_hooks, addr, size, size_a, size_b,
+	    committed, 0));
+	did_split = !err;
+	return (err);
+}
+
+static bool
+extent_merge_hook(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
+    void *addr_b, size_t size_b, bool committed, unsigned arena_ind)
+{
+	bool err;
+
+	TRACE_HOOK("%s(extent_hooks=%p, addr_a=%p, size_a=%zu, addr_b=%p "
+	    "size_b=%zu, committed=%s, arena_ind=%u)\n", __func__, extent_hooks,
+	    addr_a, size_a, addr_b, size_b, committed ? "true" : "false",
+	    arena_ind);
+	assert_ptr_eq(extent_hooks, &hooks,
+	    "extent_hooks should be same as pointer used to set hooks");
+	assert_ptr_eq(extent_hooks->merge, extent_merge_hook,
+	    "Wrong hook function");
+	called_merge = true;
+	if (!try_merge)
+		return (true);
+	err = (default_hooks->merge == NULL ||
+	    default_hooks->merge(default_hooks, addr_a, size_a, addr_b, size_b,
+	    committed, 0));
+	did_merge = !err;
+	return (err);
+}
+
+static void
+extent_hooks_prep(void)
+{
+	size_t sz;
+
+	sz = sizeof(default_hooks);
+	assert_d_eq(mallctl("arena.0.extent_hooks", (void *)&default_hooks, &sz,
+	    NULL, 0), 0, "Unexpected mallctl() error");
+}
diff --git a/test/integration/extent.c b/test/integration/extent.c
index 6be3b836..e347b66d 100644
--- a/test/integration/extent.c
+++ b/test/integration/extent.c
@@ -4,204 +4,7 @@
 const char *malloc_conf = "junk:false";
 #endif
 
-static void	*extent_alloc(extent_hooks_t *extent_hooks, void *new_addr,
-    size_t size, size_t alignment, bool *zero, bool *commit,
-    unsigned arena_ind);
-static bool	extent_dalloc(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, bool committed, unsigned arena_ind);
-static bool	extent_commit(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, size_t offset, size_t length, unsigned arena_ind);
-static bool	extent_decommit(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, size_t offset, size_t length, unsigned arena_ind);
-static bool	extent_purge_lazy(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, size_t offset, size_t length, unsigned arena_ind);
-static bool	extent_purge_forced(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, size_t offset, size_t length, unsigned arena_ind);
-static bool	extent_split(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, size_t size_a, size_t size_b, bool committed,
-    unsigned arena_ind);
-static bool	extent_merge(extent_hooks_t *extent_hooks, void *addr_a,
-    size_t size_a, void *addr_b, size_t size_b, bool committed,
-    unsigned arena_ind);
-
-static extent_hooks_t hooks = {
-	extent_alloc,
-	extent_dalloc,
-	extent_commit,
-	extent_decommit,
-	extent_purge_lazy,
-	extent_purge_forced,
-	extent_split,
-	extent_merge
-};
-static extent_hooks_t *new_hooks = &hooks;
-static extent_hooks_t *orig_hooks;
-static extent_hooks_t *old_hooks;
-
-static bool do_dalloc = true;
-static bool do_decommit;
-
-static bool did_alloc;
-static bool did_dalloc;
-static bool did_commit;
-static bool did_decommit;
-static bool did_purge_lazy;
-static bool did_purge_forced;
-static bool tried_split;
-static bool did_split;
-static bool did_merge;
-
-#if 0
-#  define TRACE_HOOK(fmt, ...) malloc_printf(fmt, __VA_ARGS__)
-#else
-#  define TRACE_HOOK(fmt, ...)
-#endif
-
-static void *
-extent_alloc(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
-    size_t alignment, bool *zero, bool *commit, unsigned arena_ind)
-{
-
-	TRACE_HOOK("%s(extent_hooks=%p, new_addr=%p, size=%zu, alignment=%zu, "
-	    "*zero=%s, *commit=%s, arena_ind=%u)\n", __func__, extent_hooks,
-	    new_addr, size, alignment, *zero ?  "true" : "false", *commit ?
-	    "true" : "false", arena_ind);
-	assert_ptr_eq(extent_hooks, new_hooks,
-	    "extent_hooks should be same as pointer used to set hooks");
-	assert_ptr_eq(extent_hooks->alloc, extent_alloc, "Wrong hook function");
-	did_alloc = true;
-	return (old_hooks->alloc(old_hooks, new_addr, size, alignment, zero,
-	    commit, 0));
-}
-
-static bool
-extent_dalloc(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    bool committed, unsigned arena_ind)
-{
-
-	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, committed=%s, "
-	    "arena_ind=%u)\n", __func__, extent_hooks, addr, size, committed ?
-	    "true" : "false", arena_ind);
-	assert_ptr_eq(extent_hooks, new_hooks,
-	    "extent_hooks should be same as pointer used to set hooks");
-	assert_ptr_eq(extent_hooks->dalloc, extent_dalloc,
-	    "Wrong hook function");
-	did_dalloc = true;
-	if (!do_dalloc)
-		return (true);
-	return (old_hooks->dalloc(old_hooks, addr, size, committed, 0));
-}
-
-static bool
-extent_commit(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    size_t offset, size_t length, unsigned arena_ind)
-{
-	bool err;
-
-	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
-	    "length=%zu, arena_ind=%u)\n", __func__, extent_hooks, addr, size,
-	    offset, length, arena_ind);
-	assert_ptr_eq(extent_hooks, new_hooks,
-	    "extent_hooks should be same as pointer used to set hooks");
-	assert_ptr_eq(extent_hooks->commit, extent_commit,
-	    "Wrong hook function");
-	err = old_hooks->commit(old_hooks, addr, size, offset, length, 0);
-	did_commit = !err;
-	return (err);
-}
-
-static bool
-extent_decommit(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    size_t offset, size_t length, unsigned arena_ind)
-{
-	bool err;
-
-	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
-	    "length=%zu, arena_ind=%u)\n", __func__, extent_hooks, addr, size,
-	    offset, length, arena_ind);
-	assert_ptr_eq(extent_hooks, new_hooks,
-	    "extent_hooks should be same as pointer used to set hooks");
-	assert_ptr_eq(extent_hooks->decommit, extent_decommit,
-	    "Wrong hook function");
-	if (!do_decommit)
-		return (true);
-	err = old_hooks->decommit(old_hooks, addr, size, offset, length, 0);
-	did_decommit = !err;
-	return (err);
-}
-
-static bool
-extent_purge_lazy(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    size_t offset, size_t length, unsigned arena_ind)
-{
-
-	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
-	    "length=%zu arena_ind=%u)\n", __func__, extent_hooks, addr, size,
-	    offset, length, arena_ind);
-	assert_ptr_eq(extent_hooks, new_hooks,
-	    "extent_hooks should be same as pointer used to set hooks");
-	assert_ptr_eq(extent_hooks->purge_lazy, extent_purge_lazy,
-	    "Wrong hook function");
-	did_purge_lazy = true;
-	return (old_hooks->purge_lazy == NULL ||
-	    old_hooks->purge_lazy(old_hooks, addr, size, offset, length, 0));
-}
-
-static bool
-extent_purge_forced(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    size_t offset, size_t length, unsigned arena_ind)
-{
-
-	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
-	    "length=%zu arena_ind=%u)\n", __func__, extent_hooks, addr, size,
-	    offset, length, arena_ind);
-	assert_ptr_eq(extent_hooks, new_hooks,
-	    "extent_hooks should be same as pointer used to set hooks");
-	assert_ptr_eq(extent_hooks->purge_forced, extent_purge_forced,
-	    "Wrong hook function");
-	did_purge_forced = true;
-	return (old_hooks->purge_forced == NULL ||
-	    old_hooks->purge_forced(old_hooks, addr, size, offset, length, 0));
-}
-
-static bool
-extent_split(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    size_t size_a, size_t size_b, bool committed, unsigned arena_ind)
-{
-	bool err;
-
-	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, size_a=%zu, "
-	    "size_b=%zu, committed=%s, arena_ind=%u)\n", __func__, extent_hooks,
-	    addr, size, size_a, size_b, committed ? "true" : "false",
-	    arena_ind);
-	assert_ptr_eq(extent_hooks, new_hooks,
-	    "extent_hooks should be same as pointer used to set hooks");
-	assert_ptr_eq(extent_hooks->split, extent_split, "Wrong hook function");
-	tried_split = true;
-	err = (old_hooks->split == NULL || old_hooks->split(old_hooks, addr,
-	    size, size_a, size_b, committed, 0));
-	did_split = !err;
-	return (err);
-}
-
-static bool
-extent_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
-    void *addr_b, size_t size_b, bool committed, unsigned arena_ind)
-{
-	bool err;
-
-	TRACE_HOOK("%s(extent_hooks=%p, addr_a=%p, size_a=%zu, addr_b=%p "
-	    "size_b=%zu, committed=%s, arena_ind=%u)\n", __func__, extent_hooks,
-	    addr_a, size_a, addr_b, size_b, committed ? "true" : "false",
-	    arena_ind);
-	assert_ptr_eq(extent_hooks, new_hooks,
-	    "extent_hooks should be same as pointer used to set hooks");
-	assert_ptr_eq(extent_hooks->merge, extent_merge, "Wrong hook function");
-	err = (old_hooks->merge == NULL || old_hooks->merge(old_hooks, addr_a,
-	    size_a, addr_b, size_b, committed, 0));
-	did_merge = !err;
-	return (err);
-}
+#include "test/extent_hooks.h"
 
 static void
 test_extent_body(unsigned arena_ind)
@@ -229,37 +32,36 @@ test_extent_body(unsigned arena_ind)
 	assert_d_eq(mallctlnametomib("arena.0.purge", purge_mib, &purge_miblen),
 	    0, "Unexpected mallctlnametomib() failure");
 	purge_mib[1] = (size_t)arena_ind;
-	do_dalloc = false;
-	do_decommit = false;
+	try_dalloc = false;
+	try_decommit = false;
 	p = mallocx(large0 * 2, flags);
 	assert_ptr_not_null(p, "Unexpected mallocx() error");
-	did_dalloc = false;
-	did_decommit = false;
+	called_dalloc = false;
+	called_decommit = false;
 	did_purge_lazy = false;
 	did_purge_forced = false;
-	tried_split = false;
-	did_split = false;
+	called_split = false;
 	xallocx_success_a = (xallocx(p, large0, 0, flags) == large0);
 	assert_d_eq(mallctlbymib(purge_mib, purge_miblen, NULL, NULL, NULL, 0),
 	    0, "Unexpected arena.%u.purge error", arena_ind);
 	if (xallocx_success_a) {
-		assert_true(did_dalloc, "Expected dalloc");
-		assert_false(did_decommit, "Unexpected decommit");
+		assert_true(called_dalloc, "Expected dalloc call");
+		assert_true(called_decommit, "Expected decommit call");
 		assert_true(did_purge_lazy || did_purge_forced,
 		    "Expected purge");
 	}
-	assert_true(tried_split, "Expected split");
+	assert_true(called_split, "Expected split call");
 	dallocx(p, flags);
-	do_dalloc = true;
+	try_dalloc = true;
 
 	/* Test decommit/commit and observe split/merge. */
-	do_dalloc = false;
-	do_decommit = true;
+	try_dalloc = false;
+	try_decommit = true;
 	p = mallocx(large0 * 2, flags);
 	assert_ptr_not_null(p, "Unexpected mallocx() error");
 	did_decommit = false;
 	did_commit = false;
-	tried_split = false;
+	called_split = false;
 	did_split = false;
 	did_merge = false;
 	xallocx_success_b = (xallocx(p, large0, 0, flags) == large0);
@@ -275,8 +77,8 @@ test_extent_body(unsigned arena_ind)
 	if (xallocx_success_b && xallocx_success_c)
 		assert_true(did_merge, "Expected merge");
 	dallocx(p, flags);
-	do_dalloc = true;
-	do_decommit = false;
+	try_dalloc = true;
+	try_decommit = false;
 
 	/* Make sure non-large allocation succeeds. */
 	p = mallocx(42, flags);
@@ -290,6 +92,9 @@ TEST_BEGIN(test_extent_manual_hook)
 	size_t old_size, new_size, sz;
 	size_t hooks_mib[3];
 	size_t hooks_miblen;
+	extent_hooks_t *new_hooks, *old_hooks;
+
+	extent_hooks_prep();
 
 	sz = sizeof(unsigned);
 	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
@@ -301,24 +106,27 @@ TEST_BEGIN(test_extent_manual_hook)
 	    &hooks_miblen), 0, "Unexpected mallctlnametomib() failure");
 	hooks_mib[1] = (size_t)arena_ind;
 	old_size = sizeof(extent_hooks_t *);
+	new_hooks = &hooks;
 	new_size = sizeof(extent_hooks_t *);
 	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks,
 	    &old_size, (void *)&new_hooks, new_size), 0,
 	    "Unexpected extent_hooks error");
-	orig_hooks = old_hooks;
-	assert_ptr_ne(old_hooks->alloc, extent_alloc, "Unexpected alloc error");
-	assert_ptr_ne(old_hooks->dalloc, extent_dalloc,
-	    "Unexpected dalloc error");
-	assert_ptr_ne(old_hooks->commit, extent_commit,
-	    "Unexpected commit error");
-	assert_ptr_ne(old_hooks->decommit, extent_decommit,
-	    "Unexpected decommit error");
-	assert_ptr_ne(old_hooks->purge_lazy, extent_purge_lazy,
-	    "Unexpected purge_lazy error");
-	assert_ptr_ne(old_hooks->purge_forced, extent_purge_forced,
-	    "Unexpected purge_forced error");
-	assert_ptr_ne(old_hooks->split, extent_split, "Unexpected split error");
-	assert_ptr_ne(old_hooks->merge, extent_merge, "Unexpected merge error");
+	assert_ptr_ne(old_hooks->alloc, extent_alloc_hook,
+	    "Unexpected extent_hooks error");
+	assert_ptr_ne(old_hooks->dalloc, extent_dalloc_hook,
+	    "Unexpected extent_hooks error");
+	assert_ptr_ne(old_hooks->commit, extent_commit_hook,
+	    "Unexpected extent_hooks error");
+	assert_ptr_ne(old_hooks->decommit, extent_decommit_hook,
+	    "Unexpected extent_hooks error");
+	assert_ptr_ne(old_hooks->purge_lazy, extent_purge_lazy_hook,
+	    "Unexpected extent_hooks error");
+	assert_ptr_ne(old_hooks->purge_forced, extent_purge_forced_hook,
+	    "Unexpected extent_hooks error");
+	assert_ptr_ne(old_hooks->split, extent_split_hook,
+	    "Unexpected extent_hooks error");
+	assert_ptr_ne(old_hooks->merge, extent_merge_hook,
+	    "Unexpected extent_hooks error");
 
 	test_extent_body(arena_ind);
 
@@ -327,23 +135,23 @@ TEST_BEGIN(test_extent_manual_hook)
 	    (void *)&old_hooks, new_size), 0, "Unexpected extent_hooks error");
 	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks,
 	    &old_size, NULL, 0), 0, "Unexpected extent_hooks error");
-	assert_ptr_eq(old_hooks, orig_hooks, "Unexpected hooks error");
-	assert_ptr_eq(old_hooks->alloc, orig_hooks->alloc,
-	    "Unexpected alloc error");
-	assert_ptr_eq(old_hooks->dalloc, orig_hooks->dalloc,
-	    "Unexpected dalloc error");
-	assert_ptr_eq(old_hooks->commit, orig_hooks->commit,
-	    "Unexpected commit error");
-	assert_ptr_eq(old_hooks->decommit, orig_hooks->decommit,
-	    "Unexpected decommit error");
-	assert_ptr_eq(old_hooks->purge_lazy, orig_hooks->purge_lazy,
-	    "Unexpected purge_lazy error");
-	assert_ptr_eq(old_hooks->purge_forced, orig_hooks->purge_forced,
-	    "Unexpected purge_forced error");
-	assert_ptr_eq(old_hooks->split, orig_hooks->split,
-	    "Unexpected split error");
-	assert_ptr_eq(old_hooks->merge, orig_hooks->merge,
-	    "Unexpected merge error");
+	assert_ptr_eq(old_hooks, default_hooks, "Unexpected extent_hooks error");
+	assert_ptr_eq(old_hooks->alloc, default_hooks->alloc,
+	    "Unexpected extent_hooks error");
+	assert_ptr_eq(old_hooks->dalloc, default_hooks->dalloc,
+	    "Unexpected extent_hooks error");
+	assert_ptr_eq(old_hooks->commit, default_hooks->commit,
+	    "Unexpected extent_hooks error");
+	assert_ptr_eq(old_hooks->decommit, default_hooks->decommit,
+	    "Unexpected extent_hooks error");
+	assert_ptr_eq(old_hooks->purge_lazy, default_hooks->purge_lazy,
+	    "Unexpected extent_hooks error");
+	assert_ptr_eq(old_hooks->purge_forced, default_hooks->purge_forced,
+	    "Unexpected extent_hooks error");
+	assert_ptr_eq(old_hooks->split, default_hooks->split,
+	    "Unexpected extent_hooks error");
+	assert_ptr_eq(old_hooks->merge, default_hooks->merge,
+	    "Unexpected extent_hooks error");
 }
 TEST_END
 
@@ -351,8 +159,12 @@ TEST_BEGIN(test_extent_auto_hook)
 {
 	unsigned arena_ind;
 	size_t new_size, sz;
+	extent_hooks_t *new_hooks;
+
+	extent_hooks_prep();
 
 	sz = sizeof(unsigned);
+	new_hooks = &hooks;
 	new_size = sizeof(extent_hooks_t *);
 	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
 	    (void *)&new_hooks, new_size), 0, "Unexpected mallctl() failure");
diff --git a/test/unit/base.c b/test/unit/base.c
index 6a082a5e..8f97e8bf 100644
--- a/test/unit/base.c
+++ b/test/unit/base.c
@@ -1,27 +1,6 @@
 #include "test/jemalloc_test.h"
 
-static void	*extent_alloc_hook(extent_hooks_t *extent_hooks, void *new_addr,
-    size_t size, size_t alignment, bool *zero, bool *commit,
-    unsigned arena_ind);
-static bool	extent_dalloc_hook(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, bool committed, unsigned arena_ind);
-static bool	extent_decommit_hook(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, size_t offset, size_t length, unsigned arena_ind);
-static bool	extent_purge_lazy_hook(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, size_t offset, size_t length, unsigned arena_ind);
-static bool	extent_purge_forced_hook(extent_hooks_t *extent_hooks,
-    void *addr, size_t size, size_t offset, size_t length, unsigned arena_ind);
-
-static extent_hooks_t hooks_not_null = {
-	extent_alloc_hook,
-	extent_dalloc_hook,
-	NULL, /* commit */
-	extent_decommit_hook,
-	extent_purge_lazy_hook,
-	extent_purge_forced_hook,
-	NULL, /* split */
-	NULL /* merge */
-};
+#include "test/extent_hooks.h"
 
 static extent_hooks_t hooks_null = {
 	extent_alloc_hook,
@@ -34,80 +13,16 @@ static extent_hooks_t hooks_null = {
 	NULL /* merge */
 };
 
-static bool	did_alloc;
-static bool	did_dalloc;
-static bool	did_decommit;
-static bool	did_purge_lazy;
-static bool	did_purge_forced;
-
-#if 0
-#  define TRACE_HOOK(fmt, ...) malloc_printf(fmt, __VA_ARGS__)
-#else
-#  define TRACE_HOOK(fmt, ...)
-#endif
-
-static void *
-extent_alloc_hook(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
-    size_t alignment, bool *zero, bool *commit, unsigned arena_ind)
-{
-
-	TRACE_HOOK("%s(extent_hooks=%p, new_addr=%p, size=%zu, alignment=%zu, "
-	    "*zero=%s, *commit=%s, arena_ind=%u)\n", __func__, extent_hooks,
-	    new_addr, size, alignment, *zero ?  "true" : "false", *commit ?
-	    "true" : "false", arena_ind);
-	did_alloc = true;
-	return (extent_hooks_default.alloc(
-	    (extent_hooks_t *)&extent_hooks_default, new_addr, size, alignment,
-	    zero, commit, 0));
-}
-
-static bool
-extent_dalloc_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    bool committed, unsigned arena_ind)
-{
-
-	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, committed=%s, "
-	    "arena_ind=%u)\n", __func__, extent_hooks, addr, size, committed ?
-	    "true" : "false", arena_ind);
-	did_dalloc = true;
-	return (true); /* Cause cascade. */
-}
-
-static bool
-extent_decommit_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    size_t offset, size_t length, unsigned arena_ind)
-{
-
-	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
-	    "length=%zu, arena_ind=%u)\n", __func__, extent_hooks, addr, size,
-	    offset, length, arena_ind);
-	did_decommit = true;
-	return (true); /* Cause cascade. */
-}
-
-static bool
-extent_purge_lazy_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    size_t offset, size_t length, unsigned arena_ind)
-{
-
-	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
-	    "length=%zu arena_ind=%u)\n", __func__, extent_hooks, addr, size,
-	    offset, length, arena_ind);
-	did_purge_lazy = true;
-	return (true); /* Cause cascade. */
-}
-
-static bool
-extent_purge_forced_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    size_t offset, size_t length, unsigned arena_ind)
-{
-
-	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
-	    "length=%zu arena_ind=%u)\n", __func__, extent_hooks, addr, size,
-	    offset, length, arena_ind);
-	did_purge_forced = true;
-	return (true); /* Cause cascade. */
-}
+static extent_hooks_t hooks_not_null = {
+	extent_alloc_hook,
+	extent_dalloc_hook,
+	NULL, /* commit */
+	extent_decommit_hook,
+	extent_purge_lazy_hook,
+	extent_purge_forced_hook,
+	NULL, /* split */
+	NULL /* merge */
+};
 
 TEST_BEGIN(test_base_hooks_default)
 {
@@ -135,12 +50,21 @@ TEST_END
 
 TEST_BEGIN(test_base_hooks_null)
 {
+	extent_hooks_t hooks_orig;
 	tsdn_t *tsdn;
 	base_t *base;
 	size_t allocated0, allocated1, resident, mapped;
 
+	extent_hooks_prep();
+	try_dalloc = false;
+	try_decommit = false;
+	try_purge_lazy = false;
+	try_purge_forced = false;
+	memcpy(&hooks_orig, &hooks, sizeof(extent_hooks_t));
+	memcpy(&hooks, &hooks_null, sizeof(extent_hooks_t));
+
 	tsdn = tsdn_fetch();
-	base = base_new(tsdn, 0, (extent_hooks_t *)&hooks_null);
+	base = base_new(tsdn, 0, &hooks);
 	assert_ptr_not_null(base, "Unexpected base_new() failure");
 
 	base_stats_get(tsdn, base, &allocated0, &resident, &mapped);
@@ -155,20 +79,31 @@ TEST_BEGIN(test_base_hooks_null)
 	    "At least 42 bytes were allocated by base_alloc()");
 
 	base_delete(base);
+
+	memcpy(&hooks, &hooks_orig, sizeof(extent_hooks_t));
 }
 TEST_END
 
 TEST_BEGIN(test_base_hooks_not_null)
 {
+	extent_hooks_t hooks_orig;
 	tsdn_t *tsdn;
 	base_t *base;
 	void *p, *q, *r, *r_exp;
 
+	extent_hooks_prep();
+	try_dalloc = false;
+	try_decommit = false;
+	try_purge_lazy = false;
+	try_purge_forced = false;
+	memcpy(&hooks_orig, &hooks, sizeof(extent_hooks_t));
+	memcpy(&hooks, &hooks_not_null, sizeof(extent_hooks_t));
+
 	tsdn = tsdn_fetch();
 	did_alloc = false;
-	base = base_new(tsdn, 0, (extent_hooks_t *)&hooks_not_null);
+	base = base_new(tsdn, 0, &hooks);
 	assert_ptr_not_null(base, "Unexpected base_new() failure");
-	assert_true(did_alloc, "Expected alloc hook call");
+	assert_true(did_alloc, "Expected alloc");
 
 	/*
 	 * Check for tight packing at specified alignment under simple
@@ -254,12 +189,19 @@ TEST_BEGIN(test_base_hooks_not_null)
 		}
 	}
 
-	did_dalloc = did_decommit = did_purge_lazy = did_purge_forced = false;
+	called_dalloc = called_decommit = called_purge_lazy =
+	    called_purge_forced = false;
 	base_delete(base);
-	assert_true(did_dalloc, "Expected dalloc hook call");
-	assert_true(did_decommit, "Expected decommit hook call");
-	assert_true(did_purge_lazy, "Expected purge_lazy hook call");
-	assert_true(did_purge_forced, "Expected purge_forced hook call");
+	assert_true(called_dalloc, "Expected dalloc call");
+	assert_true(called_decommit, "Expected decommit call");
+	assert_true(called_purge_lazy, "Expected purge_lazy call");
+	assert_true(called_purge_forced, "Expected purge_forced call");
+
+	try_dalloc = true;
+	try_decommit = true;
+	try_purge_lazy = true;
+	try_purge_forced = true;
+	memcpy(&hooks, &hooks_orig, sizeof(extent_hooks_t));
 }
 TEST_END
 

From edf1bafb2b36ef4e8a2ef1ac19a4f76e5bc42528 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 3 Jan 2017 17:21:59 -0800
Subject: [PATCH 0581/2608] Implement arena.<i>.destroy .

Add MALLCTL_ARENAS_DESTROYED for accessing destroyed arena stats as an
analogue to MALLCTL_ARENAS_ALL.

This resolves #382.
---
 Makefile.in                                   |   4 +
 doc/jemalloc.xml.in                           |  51 +++-
 include/jemalloc/internal/arena.h             |   1 +
 include/jemalloc/internal/ctl.h               |  12 +-
 include/jemalloc/internal/extent.h            |   2 +
 .../jemalloc/internal/jemalloc_internal.h.in  |   2 +
 include/jemalloc/internal/private_symbols.txt |   3 +
 include/jemalloc/jemalloc_macros.h.in         |   5 +
 src/arena.c                                   |  66 +++++
 src/ctl.c                                     | 277 ++++++++++++------
 src/extent.c                                  |  21 +-
 src/jemalloc.c                                |   2 +-
 src/stats.c                                   |  37 ++-
 test/unit/arena_reset.c                       | 255 ++++++++++++++--
 test/unit/arena_reset_prof.c                  |   5 +
 test/unit/mallctl.c                           |   9 +
 16 files changed, 616 insertions(+), 136 deletions(-)
 create mode 100644 test/unit/arena_reset_prof.c

diff --git a/Makefile.in b/Makefile.in
index d8704923..edc50b4b 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -195,6 +195,10 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/util.c \
 	$(srcroot)test/unit/witness.c \
 	$(srcroot)test/unit/zero.c
+ifeq (@enable_prof@, 1)
+TESTS_UNIT += \
+	$(srcroot)test/unit/arena_reset_prof.c
+endif
 TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \
 	$(srcroot)test/integration/allocated.c \
 	$(srcroot)test/integration/extent.c \
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index f213a2c8..36ec140b 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -426,13 +426,14 @@ for (i = 0; i < nbins; i++) {
       <function>mallctl*()</function> functions internally, so inconsistent
       statistics can be reported if multiple threads use these functions
       simultaneously.  If <option>--enable-stats</option> is specified during
-      configuration, <quote>m</quote> and <quote>a</quote> can be specified to
-      omit merged arena and per arena statistics, respectively; <quote>b</quote>
-      and <quote>l</quote> can be specified to omit per size class statistics
-      for bins and large objects, respectively.  Unrecognized characters are
-      silently ignored.  Note that thread caching may prevent some statistics
-      from being completely up to date, since extra locking would be required to
-      merge counters that track thread cache operations.</para>
+      configuration, <quote>m</quote>, <quote>d</quote>, and <quote>a</quote>
+      can be specified to omit merged arena, destroyed merged arena, and per
+      arena statistics, respectively; <quote>b</quote> and <quote>l</quote> can
+      be specified to omit per size class statistics for bins and large objects,
+      respectively.  Unrecognized characters are silently ignored.  Note that
+      thread caching may prevent some statistics from being completely up to
+      date, since extra locking would be required to merge counters that track
+      thread cache operations.</para>
 
       <para>The <function>malloc_usable_size()</function> function
       returns the usable size of the allocation pointed to by
@@ -687,18 +688,21 @@ for (i = 0; i < nbins; i++) {
   <refsect1 id="mallctl_namespace">
     <title>MALLCTL NAMESPACE</title>
     <para>The following names are defined in the namespace accessible via the
-    <function>mallctl*()</function> functions.  Value types are
-    specified in parentheses, their readable/writable statuses are encoded as
+    <function>mallctl*()</function> functions.  Value types are specified in
+    parentheses, their readable/writable statuses are encoded as
     <literal>rw</literal>, <literal>r-</literal>, <literal>-w</literal>, or
     <literal>--</literal>, and required build configuration flags follow, if
     any.  A name element encoded as <literal>&lt;i&gt;</literal> or
     <literal>&lt;j&gt;</literal> indicates an integer component, where the
     integer varies from 0 to some upper value that must be determined via
     introspection.  In the case of <mallctl>stats.arenas.&lt;i&gt;.*</mallctl>
-    and <mallctl>arena.&lt;i&gt;.{purge,decay,dss}</mallctl>,
+    and <mallctl>arena.&lt;i&gt;.{initialized,purge,decay,dss}</mallctl>,
     <literal>&lt;i&gt;</literal> equal to
     <constant>MALLCTL_ARENAS_ALL</constant> can be used to operate on all arenas
-    or access the summation of statistics from all arenas.  This constant can be
+    or access the summation of statistics from all arenas; similarly
+    <literal>&lt;i&gt;</literal> equal to
+    <constant>MALLCTL_ARENAS_DESTROYED</constant> can be used to access the
+    summation of statistics from all destroyed arenas.  These constants can be
     utilized either via <function>mallctlnametomib()</function> followed by
     <function>mallctlbymib()</function>, or via code such as the following:
     <programlisting language="C"><![CDATA[
@@ -707,9 +711,9 @@ for (i = 0; i < nbins; i++) {
 
 mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
     NULL, NULL, NULL, 0);]]></programlisting>
-    Take special note of the
-    <link linkend="epoch"><mallctl>epoch</mallctl></link> mallctl, which
-    controls refreshing of cached dynamic statistics.</para>
+    Take special note of the <link
+    linkend="epoch"><mallctl>epoch</mallctl></link> mallctl, which controls
+    refreshing of cached dynamic statistics.</para>
 
     <variablelist>
       <varlistentry id="version">
@@ -1478,6 +1482,25 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         beforehand.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="arena.i.destroy">
+        <term>
+          <mallctl>arena.&lt;i&gt;.destroy</mallctl>
+          (<type>void</type>)
+          <literal>--</literal>
+        </term>
+        <listitem><para>Destroy the arena.  Discard all of the arena's extant
+        allocations using the same mechanism as for <link
+        linkend="arena.i.reset"><mallctl>arena.&lt;i&gt;.reset</mallctl></link>
+        (with all the same constraints and side effects), merge the arena stats
+        into those accessible at arena index
+        <constant>MALLCTL_ARENAS_DESTROYED</constant>, and then completely
+        discard all metadata associated with the arena.  Future calls to <link
+        linkend="arenas.create"><mallctl>arenas.create</mallctl></link> may
+        recycle the arena index.  Destruction will fail if any threads are
+        currently associated with the arena as a result of calls to <link
+        linkend="thread.arena"><mallctl>thread.arena</mallctl></link>.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="arena.i.dss">
         <term>
           <mallctl>arena.&lt;i&gt;.dss</mallctl>
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index 929adbe9..5e295509 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -290,6 +290,7 @@ bool	arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time);
 void	arena_purge(tsdn_t *tsdn, arena_t *arena, bool all);
 void	arena_maybe_purge(tsdn_t *tsdn, arena_t *arena);
 void	arena_reset(tsd_t *tsd, arena_t *arena);
+void	arena_destroy(tsd_t *tsd, arena_t *arena);
 void	arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena,
     tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes);
 void	arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info,
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index 0aa82541..7dc3e5b5 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -32,7 +32,10 @@ struct ctl_indexed_node_s {
 };
 
 struct ctl_arena_stats_s {
+	unsigned		arena_ind;
 	bool			initialized;
+	ql_elm(ctl_arena_stats_t)	destroyed_link;
+
 	unsigned		nthreads;
 	const char		*dss;
 	ssize_t			decay_time;
@@ -62,7 +65,14 @@ struct ctl_stats_s {
 	size_t			mapped;
 	size_t			retained;
 	unsigned		narenas;
-	ctl_arena_stats_t	*arenas[1 << MALLOCX_ARENA_BITS];
+	ql_head(ctl_arena_stats_t)	destroyed;
+	/*
+	 * Element 0 contains merged stats for extant arenas (accessed via
+	 * MALLCTL_ARENAS_ALL), element 1 contains merged stats for destroyed
+	 * arenas (accessed via MALLCTL_ARENAS_DESTROYED), and the remaining
+	 * MALLOCX_ARENA_MAX+1 elements correspond to arenas.
+	 */
+	ctl_arena_stats_t	*arenas[MALLOCX_ARENA_MAX + 3];
 };
 
 #endif /* JEMALLOC_H_STRUCTS */
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 33b85145..70accffb 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -125,6 +125,8 @@ extent_t	*extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 void	extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
 void	extent_dalloc_cache(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent);
+bool	extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *extent);
 void	extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent);
 bool	extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 991c541f..6395d750 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -215,6 +215,7 @@ typedef unsigned szind_t;
 #define	MALLOCX_TCACHE_SHIFT	8
 #define	MALLOCX_ARENA_MASK \
     (((1 << MALLOCX_ARENA_BITS) - 1) << MALLOCX_ARENA_SHIFT)
+/* NB: Arena index bias decreases the maximum number of arenas by 1. */
 #define	MALLOCX_ARENA_MAX	((1 << MALLOCX_ARENA_BITS) - 2)
 #define	MALLOCX_TCACHE_MASK \
     (((1 << MALLOCX_TCACHE_BITS) - 1) << MALLOCX_TCACHE_SHIFT)
@@ -470,6 +471,7 @@ void	a0dalloc(void *ptr);
 void	*bootstrap_malloc(size_t size);
 void	*bootstrap_calloc(size_t num, size_t size);
 void	bootstrap_free(void *ptr);
+void	arena_set(unsigned ind, arena_t *arena);
 unsigned	narenas_total_get(void);
 arena_t	*arena_init(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
 arena_tdata_t	*arena_tdata_get_hard(tsd_t *tsd, unsigned ind);
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 36960f08..c85219a9 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -21,6 +21,7 @@ arena_decay_time_default_get
 arena_decay_time_default_set
 arena_decay_time_get
 arena_decay_time_set
+arena_destroy
 arena_dss_prec_get
 arena_dss_prec_set
 arena_extent_alloc_large
@@ -67,6 +68,7 @@ arena_ralloc_no_move
 arena_reset
 arena_salloc
 arena_sdalloc
+arena_set
 arena_slab_regind
 arena_stats_merge
 arena_tcache_fill_small
@@ -164,6 +166,7 @@ extent_dalloc_cache
 extent_dalloc_gap
 extent_dalloc_mmap
 extent_dalloc_wrapper
+extent_dalloc_wrapper_try
 extent_decommit_wrapper
 extent_dss_boot
 extent_dss_mergeable
diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in
index ea41e2e8..05bcdd7b 100644
--- a/include/jemalloc/jemalloc_macros.h.in
+++ b/include/jemalloc/jemalloc_macros.h.in
@@ -44,6 +44,11 @@
  *       0);
  */
 #define	MALLCTL_ARENAS_ALL	4096
+/*
+ * Use as arena index in "stats.arenas.<i>.*" mallctl interfaces to select
+ * destroyed arenas.
+ */
+#define	MALLCTL_ARENAS_DESTROYED	4097
 
 #if defined(__cplusplus) && defined(JEMALLOC_USE_CXX_THROW)
 #  define JEMALLOC_CXX_THROW throw()
diff --git a/src/arena.c b/src/arena.c
index 3c31cc87..1f0c4df5 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -903,6 +903,72 @@ arena_reset(tsd_t *tsd, arena_t *arena)
 	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock);
 }
 
+static void
+arena_destroy_retained(tsdn_t *tsdn, arena_t *arena)
+{
+	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
+	size_t i;
+
+	/*
+	 * Iterate over the retained extents and blindly attempt to deallocate
+	 * them.  This gives the extent allocator underlying the extent hooks an
+	 * opportunity to unmap all retained memory without having to keep its
+	 * own metadata structures, but if deallocation fails, that is the
+	 * application's decision/problem.  In practice, retained extents are
+	 * leaked here if !config_munmap unless the application provided custom
+	 * extent hooks, so best practice to either enable munmap (and avoid dss
+	 * for arenas to be destroyed), or provide custom extent hooks that
+	 * either unmap retained extents or track them for later use.
+	 */
+	for (i = 0; i < sizeof(arena->extents_retained)/sizeof(extent_heap_t);
+	    i++) {
+		extent_heap_t *extents = &arena->extents_retained[i];
+		extent_t *extent;
+
+		while ((extent = extent_heap_remove_first(extents)) != NULL) {
+			extent_dalloc_wrapper_try(tsdn, arena, &extent_hooks,
+			    extent);
+		}
+	}
+}
+
+void
+arena_destroy(tsd_t *tsd, arena_t *arena)
+{
+
+	assert(base_ind_get(arena->base) >= narenas_auto);
+	assert(arena_nthreads_get(arena, false) == 0);
+	assert(arena_nthreads_get(arena, true) == 0);
+
+	/*
+	 * No allocations have occurred since arena_reset() was called.
+	 * Furthermore, the caller (arena_i_destroy_ctl()) purged all cached
+	 * extents, so only retained extents may remain.
+	 */
+	assert(arena->ndirty == 0);
+
+	/* Attempt to deallocate retained memory. */
+	arena_destroy_retained(tsd_tsdn(tsd), arena);
+
+	/*
+	 * Remove the arena pointer from the arenas array.  We rely on the fact
+	 * that there is no way for the application to get a dirty read from the
+	 * arenas array unless there is an inherent race in the application
+	 * involving access of an arena being concurrently destroyed.  The
+	 * application must synchronize knowledge of the arena's validity, so as
+	 * long as we use an atomic write to update the arenas array, the
+	 * application will get a clean read any time after it synchronizes
+	 * knowledge that the arena is no longer valid.
+	 */
+	arena_set(base_ind_get(arena->base), NULL);
+
+	/*
+	 * Destroy the base allocator, which manages all metadata ever mapped by
+	 * this arena.
+	 */
+	base_delete(arena->base);
+}
+
 static extent_t *
 arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, const arena_bin_info_t *bin_info)
diff --git a/src/ctl.c b/src/ctl.c
index 45e397b8..76fbce4b 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -48,18 +48,6 @@ static int	n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,	\
 static const ctl_named_node_t	*n##_index(tsdn_t *tsdn,		\
     const size_t *mib, size_t miblen, size_t i);
 
-static void	ctl_arena_clear(ctl_arena_stats_t *astats);
-static void	ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_stats_t *cstats,
-    arena_t *arena);
-static void	ctl_arena_stats_smerge(ctl_arena_stats_t *sstats,
-    ctl_arena_stats_t *astats);
-static void	ctl_arena_refresh(tsdn_t *tsdn, arena_t *arena, unsigned i);
-static bool	ctl_grow(tsdn_t *tsdn, extent_hooks_t *extent_hooks);
-static void	ctl_refresh(tsdn_t *tsdn);
-static bool	ctl_init(tsdn_t *tsdn);
-static int	ctl_lookup(tsdn_t *tsdn, const char *name,
-    ctl_node_t const **nodesp, size_t *mibp, size_t *depthp);
-
 CTL_PROTO(version)
 CTL_PROTO(epoch)
 CTL_PROTO(thread_tcache_enabled)
@@ -113,6 +101,7 @@ CTL_PROTO(arena_i_initialized)
 CTL_PROTO(arena_i_purge)
 CTL_PROTO(arena_i_decay)
 CTL_PROTO(arena_i_reset)
+CTL_PROTO(arena_i_destroy)
 CTL_PROTO(arena_i_dss)
 CTL_PROTO(arena_i_decay_time)
 CTL_PROTO(arena_i_extent_hooks)
@@ -274,6 +263,7 @@ static const ctl_named_node_t arena_i_node[] = {
 	{NAME("purge"),		CTL(arena_i_purge)},
 	{NAME("decay"),		CTL(arena_i_decay)},
 	{NAME("reset"),		CTL(arena_i_reset)},
+	{NAME("destroy"),	CTL(arena_i_destroy)},
 	{NAME("dss"),		CTL(arena_i_dss)},
 	{NAME("decay_time"),	CTL(arena_i_decay_time)},
 	{NAME("extent_hooks"),	CTL(arena_i_extent_hooks)}
@@ -452,6 +442,9 @@ stats_arenas_i2a_impl(size_t i, bool compat, bool validate)
 	case MALLCTL_ARENAS_ALL:
 		a = 0;
 		break;
+	case MALLCTL_ARENAS_DESTROYED:
+		a = 1;
+		break;
 	default:
 		if (compat && i == ctl_stats->narenas) {
 			/*
@@ -471,7 +464,7 @@ stats_arenas_i2a_impl(size_t i, bool compat, bool validate)
 			 */
 			assert(i < ctl_stats->narenas || (!validate && i ==
 			    ctl_stats->narenas));
-			a = (unsigned)i + 1;
+			a = (unsigned)i + 2;
 		}
 		break;
 	}
@@ -479,6 +472,13 @@ stats_arenas_i2a_impl(size_t i, bool compat, bool validate)
 	return (a);
 }
 
+static unsigned
+stats_arenas_i2a(size_t i)
+{
+
+	return (stats_arenas_i2a_impl(i, true, false));
+}
+
 static ctl_arena_stats_t *
 stats_arenas_i_impl(tsdn_t *tsdn, size_t i, bool compat, bool init)
 {
@@ -492,10 +492,13 @@ stats_arenas_i_impl(tsdn_t *tsdn, size_t i, bool compat, bool init)
 		    sizeof(ctl_arena_stats_t), QUANTUM);
 		if (ret == NULL)
 			return (NULL);
+		ret->arena_ind = (unsigned)i;
 		ctl_stats->arenas[stats_arenas_i2a_impl(i, compat, false)] =
 		    ret;
 	}
 
+	assert(ret == NULL || stats_arenas_i2a(ret->arena_ind) ==
+	    stats_arenas_i2a(i));
 	return (ret);
 }
 
@@ -553,92 +556,130 @@ ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_stats_t *cstats, arena_t *arena)
 }
 
 static void
-ctl_arena_stats_smerge(ctl_arena_stats_t *sstats, ctl_arena_stats_t *astats)
+ctl_arena_stats_sdmerge(ctl_arena_stats_t *sdstats, ctl_arena_stats_t *astats,
+    bool destroyed)
 {
 	unsigned i;
 
-	sstats->nthreads += astats->nthreads;
-	sstats->pactive += astats->pactive;
-	sstats->pdirty += astats->pdirty;
+	if (!destroyed) {
+		sdstats->nthreads += astats->nthreads;
+		sdstats->pactive += astats->pactive;
+		sdstats->pdirty += astats->pdirty;
+	} else {
+		assert(astats->nthreads == 0);
+		assert(astats->pactive == 0);
+		assert(astats->pdirty == 0);
+	}
 
 	if (config_stats) {
-		sstats->astats.mapped += astats->astats.mapped;
-		sstats->astats.retained += astats->astats.retained;
-		sstats->astats.npurge += astats->astats.npurge;
-		sstats->astats.nmadvise += astats->astats.nmadvise;
-		sstats->astats.purged += astats->astats.purged;
+		if (!destroyed) {
+			sdstats->astats.mapped += astats->astats.mapped;
+			sdstats->astats.retained += astats->astats.retained;
+		}
+		sdstats->astats.npurge += astats->astats.npurge;
+		sdstats->astats.nmadvise += astats->astats.nmadvise;
+		sdstats->astats.purged += astats->astats.purged;
 
-		sstats->astats.base += astats->astats.base;
-		sstats->astats.internal += astats->astats.internal;
-		sstats->astats.resident += astats->astats.resident;
+		if (!destroyed) {
+			sdstats->astats.base += astats->astats.base;
+			sdstats->astats.internal += astats->astats.internal;
+			sdstats->astats.resident += astats->astats.resident;
+		} else
+			assert(astats->astats.internal == 0);
 
-		sstats->allocated_small += astats->allocated_small;
-		sstats->nmalloc_small += astats->nmalloc_small;
-		sstats->ndalloc_small += astats->ndalloc_small;
-		sstats->nrequests_small += astats->nrequests_small;
+		if (!destroyed)
+			sdstats->allocated_small += astats->allocated_small;
+		else
+			assert(astats->allocated_small == 0);
+		sdstats->nmalloc_small += astats->nmalloc_small;
+		sdstats->ndalloc_small += astats->ndalloc_small;
+		sdstats->nrequests_small += astats->nrequests_small;
 
-		sstats->astats.allocated_large +=
-		    astats->astats.allocated_large;
-		sstats->astats.nmalloc_large += astats->astats.nmalloc_large;
-		sstats->astats.ndalloc_large += astats->astats.ndalloc_large;
-		sstats->astats.nrequests_large +=
+		if (!destroyed) {
+			sdstats->astats.allocated_large +=
+			    astats->astats.allocated_large;
+		} else
+			assert(astats->astats.allocated_large == 0);
+		sdstats->astats.nmalloc_large += astats->astats.nmalloc_large;
+		sdstats->astats.ndalloc_large += astats->astats.ndalloc_large;
+		sdstats->astats.nrequests_large +=
 		    astats->astats.nrequests_large;
 
 		for (i = 0; i < NBINS; i++) {
-			sstats->bstats[i].nmalloc += astats->bstats[i].nmalloc;
-			sstats->bstats[i].ndalloc += astats->bstats[i].ndalloc;
-			sstats->bstats[i].nrequests +=
+			sdstats->bstats[i].nmalloc += astats->bstats[i].nmalloc;
+			sdstats->bstats[i].ndalloc += astats->bstats[i].ndalloc;
+			sdstats->bstats[i].nrequests +=
 			    astats->bstats[i].nrequests;
-			sstats->bstats[i].curregs += astats->bstats[i].curregs;
+			if (!destroyed) {
+				sdstats->bstats[i].curregs +=
+				    astats->bstats[i].curregs;
+			} else
+				assert(astats->bstats[i].curregs == 0);
 			if (config_tcache) {
-				sstats->bstats[i].nfills +=
+				sdstats->bstats[i].nfills +=
 				    astats->bstats[i].nfills;
-				sstats->bstats[i].nflushes +=
+				sdstats->bstats[i].nflushes +=
 				    astats->bstats[i].nflushes;
 			}
-			sstats->bstats[i].nslabs += astats->bstats[i].nslabs;
-			sstats->bstats[i].reslabs += astats->bstats[i].reslabs;
-			sstats->bstats[i].curslabs +=
-			    astats->bstats[i].curslabs;
+			sdstats->bstats[i].nslabs += astats->bstats[i].nslabs;
+			sdstats->bstats[i].reslabs += astats->bstats[i].reslabs;
+			if (!destroyed) {
+				sdstats->bstats[i].curslabs +=
+				    astats->bstats[i].curslabs;
+			} else
+				assert(astats->bstats[i].curslabs == 0);
 		}
 
 		for (i = 0; i < NSIZES - NBINS; i++) {
-			sstats->lstats[i].nmalloc += astats->lstats[i].nmalloc;
-			sstats->lstats[i].ndalloc += astats->lstats[i].ndalloc;
-			sstats->lstats[i].nrequests +=
+			sdstats->lstats[i].nmalloc += astats->lstats[i].nmalloc;
+			sdstats->lstats[i].ndalloc += astats->lstats[i].ndalloc;
+			sdstats->lstats[i].nrequests +=
 			    astats->lstats[i].nrequests;
-			sstats->lstats[i].curlextents +=
-			    astats->lstats[i].curlextents;
+			if (!destroyed) {
+				sdstats->lstats[i].curlextents +=
+				    astats->lstats[i].curlextents;
+			} else
+				assert(astats->lstats[i].curlextents == 0);
 		}
 	}
 }
 
 static void
-ctl_arena_refresh(tsdn_t *tsdn, arena_t *arena, unsigned i)
+ctl_arena_refresh(tsdn_t *tsdn, arena_t *arena, ctl_arena_stats_t *sdstats,
+    unsigned i, bool destroyed)
 {
 	ctl_arena_stats_t *astats = stats_arenas_i(i);
-	ctl_arena_stats_t *sstats = stats_arenas_i(MALLCTL_ARENAS_ALL);
 
 	ctl_arena_clear(astats);
 	ctl_arena_stats_amerge(tsdn, astats, arena);
 	/* Merge into sum stats as well. */
-	ctl_arena_stats_smerge(sstats, astats);
+	ctl_arena_stats_sdmerge(sdstats, astats, destroyed);
 }
 
-static bool
-ctl_grow(tsdn_t *tsdn, extent_hooks_t *extent_hooks)
+static unsigned
+ctl_arena_init(tsdn_t *tsdn, extent_hooks_t *extent_hooks)
 {
+	unsigned arena_ind;
+	ctl_arena_stats_t *astats;
+
+	if ((astats = ql_last(&ctl_stats->destroyed, destroyed_link)) != NULL) {
+		ql_remove(&ctl_stats->destroyed, astats, destroyed_link);
+		arena_ind = astats->arena_ind;
+	} else
+		arena_ind = ctl_stats->narenas;
 
 	/* Trigger stats allocation. */
-	if (stats_arenas_i_impl(tsdn, ctl_stats->narenas, false, true) == NULL)
-		return (true);
+	if (stats_arenas_i_impl(tsdn, arena_ind, false, true) == NULL)
+		return (UINT_MAX);
 
 	/* Initialize new arena. */
-	if (arena_init(tsdn, ctl_stats->narenas, extent_hooks) == NULL)
-		return (true);
-	ctl_stats->narenas++;
+	if (arena_init(tsdn, arena_ind, extent_hooks) == NULL)
+		return (UINT_MAX);
 
-	return (false);
+	if (arena_ind == ctl_stats->narenas)
+		ctl_stats->narenas++;
+
+	return (arena_ind);
 }
 
 static void
@@ -663,7 +704,7 @@ ctl_refresh(tsdn_t *tsdn)
 
 		astats->initialized = initialized;
 		if (initialized)
-			ctl_arena_refresh(tsdn, tarenas[i], i);
+			ctl_arena_refresh(tsdn, tarenas[i], sstats, i, false);
 	}
 
 	if (config_stats) {
@@ -687,7 +728,7 @@ ctl_init(tsdn_t *tsdn)
 
 	malloc_mutex_lock(tsdn, &ctl_mtx);
 	if (!ctl_initialized) {
-		ctl_arena_stats_t *sstats;
+		ctl_arena_stats_t *sstats, *dstats;
 		unsigned i;
 
 		/*
@@ -715,6 +756,19 @@ ctl_init(tsdn_t *tsdn)
 		}
 		sstats->initialized = true;
 
+		if ((dstats = stats_arenas_i_impl(tsdn,
+		    MALLCTL_ARENAS_DESTROYED, false, true)) == NULL) {
+			ret = true;
+			goto label_return;
+		}
+		ctl_arena_clear(dstats);
+		/*
+		 * Don't toggle stats for MALLCTL_ARENAS_DESTROYED to
+		 * initialized until an arena is actually destroyed, so that
+		 * arena.<i>.initialized can be used to query whether the stats
+		 * are relevant.
+		 */
+
 		ctl_stats->narenas = narenas_total_get();
 		for (i = 0; i < ctl_stats->narenas; i++) {
 			if (stats_arenas_i_impl(tsdn, i, false, true) == NULL) {
@@ -723,7 +777,7 @@ ctl_init(tsdn_t *tsdn)
 			}
 		}
 
-		ctl_stats->epoch = 0;
+		ql_new(&ctl_stats->destroyed);
 		ctl_refresh(tsdn);
 		ctl_initialized = true;
 	}
@@ -1562,6 +1616,33 @@ label_return:
 	return (ret);
 }
 
+static int
+arena_i_reset_destroy_helper(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen, unsigned *arena_ind,
+    arena_t **arena)
+{
+	int ret;
+
+	READONLY();
+	WRITEONLY();
+	MIB_UNSIGNED(*arena_ind, 1);
+
+	if (*arena_ind < narenas_auto) {
+		ret = EFAULT;
+		goto label_return;
+	}
+
+	*arena = arena_get(tsd_tsdn(tsd), *arena_ind, false);
+	if (*arena == NULL) {
+		ret = EFAULT;
+		goto label_return;
+	}
+
+	ret = 0;
+label_return:
+	return (ret);
+}
+
 static int
 arena_i_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen)
@@ -1570,26 +1651,51 @@ arena_i_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	unsigned arena_ind;
 	arena_t *arena;
 
-	READONLY();
-	WRITEONLY();
-	MIB_UNSIGNED(arena_ind, 1);
+	ret = arena_i_reset_destroy_helper(tsd, mib, miblen, oldp, oldlenp,
+	    newp, newlen, &arena_ind, &arena);
+	if (ret != 0)
+		return (ret);
 
-	if (config_debug) {
-		malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
-		assert(arena_ind < ctl_stats->narenas);
-		malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
-	}
-	assert(arena_ind >= opt_narenas);
+	arena_reset(tsd, arena);
 
-	arena = arena_get(tsd_tsdn(tsd), arena_ind, false);
-	if (arena == NULL) {
+	return (ret);
+}
+
+static int
+arena_i_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen)
+{
+	int ret;
+	unsigned arena_ind;
+	arena_t *arena;
+	ctl_arena_stats_t *dstats, *astats;
+
+	ret = arena_i_reset_destroy_helper(tsd, mib, miblen, oldp, oldlenp,
+	    newp, newlen, &arena_ind, &arena);
+	if (ret != 0)
+		goto label_return;
+
+	if (arena_nthreads_get(arena, false) != 0 || arena_nthreads_get(arena,
+	    true) != 0) {
 		ret = EFAULT;
 		goto label_return;
 	}
 
+	/* Merge stats after resetting and purging arena. */
 	arena_reset(tsd, arena);
+	arena_purge(tsd_tsdn(tsd), arena, true);
+	dstats = stats_arenas_i(MALLCTL_ARENAS_DESTROYED);
+	dstats->initialized = true;
+	ctl_arena_refresh(tsd_tsdn(tsd), arena, dstats, arena_ind, true);
+	/* Destroy arena. */
+	arena_destroy(tsd, arena);
+	astats = stats_arenas_i(arena_ind);
+	astats->initialized = false;
+	/* Record arena index for later recycling via arenas.create. */
+	ql_elm_new(astats, destroyed_link);
+	ql_tail_insert(&ctl_stats->destroyed, astats, destroyed_link);
 
-	ret = 0;
+	assert(ret == 0);
 label_return:
 	return (ret);
 }
@@ -1733,9 +1839,16 @@ arena_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
 	const ctl_named_node_t *ret;
 
 	malloc_mutex_lock(tsdn, &ctl_mtx);
-	if (i > ctl_stats->narenas && i != MALLCTL_ARENAS_ALL) {
-		ret = NULL;
-		goto label_return;
+	switch (i) {
+	case MALLCTL_ARENAS_ALL:
+	case MALLCTL_ARENAS_DESTROYED:
+		break;
+	default:
+		if (i > ctl_stats->narenas) {
+			ret = NULL;
+			goto label_return;
+		}
+		break;
 	}
 
 	ret = super_arena_i_node;
@@ -1828,18 +1941,18 @@ arenas_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 {
 	int ret;
 	extent_hooks_t *extent_hooks;
-	unsigned narenas;
+	unsigned arena_ind;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 
 	extent_hooks = (extent_hooks_t *)&extent_hooks_default;
 	WRITE(extent_hooks, extent_hooks_t *);
-	if (ctl_grow(tsd_tsdn(tsd), extent_hooks)) {
+	if ((arena_ind = ctl_arena_init(tsd_tsdn(tsd), extent_hooks)) ==
+	    UINT_MAX) {
 		ret = EAGAIN;
 		goto label_return;
 	}
-	narenas = ctl_stats->narenas - 1;
-	READ(narenas, unsigned);
+	READ(arena_ind, unsigned);
 
 	ret = 0;
 label_return:
diff --git a/src/extent.c b/src/extent.c
index 6eabde31..7eb49709 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1039,11 +1039,11 @@ extent_dalloc_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	return (extent_dalloc_default_impl(addr, size));
 }
 
-void
-extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
+bool
+extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent)
 {
-	bool err, zeroed;
+	bool err;
 
 	assert(extent_base_get(extent) != NULL);
 	assert(extent_size_get(extent) != 0);
@@ -1067,10 +1067,21 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 		    extent_committed_get(extent), arena_ind_get(arena)));
 	}
 
-	if (!err) {
+	if (!err)
 		extent_dalloc(tsdn, arena, extent);
+
+	return (err);
+}
+
+void
+extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *extent)
+{
+	bool zeroed;
+
+	if (!extent_dalloc_wrapper_try(tsdn, arena, r_extent_hooks, extent))
 		return;
-	}
+
 	extent_reregister(tsdn, extent);
 	/* Try to decommit; purge if that fails. */
 	if (!extent_committed_get(extent))
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 2acab412..a053983f 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -375,7 +375,7 @@ bootstrap_free(void *ptr)
 	a0idalloc(iealloc(NULL, ptr), ptr, false);
 }
 
-static void
+void
 arena_set(unsigned ind, arena_t *arena)
 {
 
diff --git a/src/stats.c b/src/stats.c
index 4e09eb45..ef349a50 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -772,7 +772,8 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 
 static void
 stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
-    bool json, bool merged, bool unmerged, bool bins, bool large)
+    bool json, bool merged, bool destroyed, bool unmerged, bool bins,
+    bool large)
 {
 	size_t allocated, active, metadata, resident, mapped, retained;
 
@@ -808,7 +809,7 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 		    allocated, active, metadata, resident, mapped, retained);
 	}
 
-	if (merged || unmerged) {
+	if (merged || destroyed || unmerged) {
 		unsigned narenas;
 
 		if (json) {
@@ -822,6 +823,7 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 			size_t miblen = sizeof(mib) / sizeof(size_t);
 			size_t sz;
 			VARIABLE_ARRAY(bool, initialized, narenas);
+			bool destroyed_initialized;
 			unsigned i, j, ninitialized;
 
 			xmallctlnametomib("arena.0.initialized", mib, &miblen);
@@ -833,6 +835,10 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 				if (initialized[i])
 					ninitialized++;
 			}
+			mib[1] = MALLCTL_ARENAS_DESTROYED;
+			sz = sizeof(bool);
+			xmallctlbymib(mib, miblen, &destroyed_initialized, &sz,
+			    NULL, 0);
 
 			/* Merged stats. */
 			if (merged && (ninitialized > 1 || !unmerged)) {
@@ -853,6 +859,25 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 				}
 			}
 
+			/* Destroyed stats. */
+			if (destroyed_initialized && destroyed) {
+				/* Print destroyed arena stats. */
+				if (json) {
+					malloc_cprintf(write_cb, cbopaque,
+					    "\t\t\t\"destroyed\": {\n");
+				} else {
+					malloc_cprintf(write_cb, cbopaque,
+					    "\nDestroyed arenas stats:\n");
+				}
+				stats_arena_print(write_cb, cbopaque, json,
+				    MALLCTL_ARENAS_DESTROYED, bins, large);
+				if (json) {
+					malloc_cprintf(write_cb, cbopaque,
+					    "\t\t\t}%s\n", (ninitialized > 1) ?
+					    "," : "");
+				}
+			}
+
 			/* Unmerged stats. */
 			for (i = j = 0; i < narenas; i++) {
 				if (initialized[i]) {
@@ -895,6 +920,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	bool json = false;
 	bool general = true;
 	bool merged = config_stats;
+	bool destroyed = config_stats;
 	bool unmerged = config_stats;
 	bool bins = true;
 	bool large = true;
@@ -935,6 +961,9 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			case 'm':
 				merged = false;
 				break;
+			case 'd':
+				destroyed = false;
+				break;
 			case 'a':
 				unmerged = false;
 				break;
@@ -963,8 +992,8 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		stats_general_print(write_cb, cbopaque, json, more);
 	}
 	if (config_stats) {
-		stats_print_helper(write_cb, cbopaque, json, merged, unmerged,
-		    bins, large);
+		stats_print_helper(write_cb, cbopaque, json, merged, destroyed,
+		    unmerged, bins, large);
 	}
 
 	if (json) {
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index 3a1b30f5..65ff1031 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -1,9 +1,9 @@
+#ifndef ARENA_RESET_PROF_C_
 #include "test/jemalloc_test.h"
-
-#ifdef JEMALLOC_PROF
-const char *malloc_conf = "prof:true,lg_prof_sample:0";
 #endif
 
+#include "test/extent_hooks.h"
+
 static unsigned
 get_nsizes_impl(const char *cmd)
 {
@@ -79,57 +79,64 @@ vsalloc(tsdn_t *tsdn, const void *ptr)
 	return (isalloc(tsdn, extent, ptr));
 }
 
-TEST_BEGIN(test_arena_reset)
+static unsigned
+do_arena_create(extent_hooks_t *h)
+{
+	unsigned arena_ind;
+	size_t sz = sizeof(unsigned);
+	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
+	    (void *)(h != NULL ? &h : NULL), (h != NULL ? sizeof(h) : 0)), 0,
+	    "Unexpected mallctl() failure");
+	return (arena_ind);
+}
+
+static void
+do_arena_reset_pre(unsigned arena_ind, void ***ptrs, unsigned *nptrs)
 {
 #define	NLARGE	32
-	unsigned arena_ind, nsmall, nlarge, nptrs, i;
-	size_t sz, miblen;
-	void **ptrs;
+	unsigned nsmall, nlarge, i;
+	size_t sz;
 	int flags;
-	size_t mib[3];
 	tsdn_t *tsdn;
 
-	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
-	    0, "Unexpected mallctl() failure");
-
 	flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
 
 	nsmall = get_nsmall();
 	nlarge = get_nlarge() > NLARGE ? NLARGE : get_nlarge();
-	nptrs = nsmall + nlarge;
-	ptrs = (void **)malloc(nptrs * sizeof(void *));
-	assert_ptr_not_null(ptrs, "Unexpected malloc() failure");
+	*nptrs = nsmall + nlarge;
+	*ptrs = (void **)malloc(*nptrs * sizeof(void *));
+	assert_ptr_not_null(*ptrs, "Unexpected malloc() failure");
 
 	/* Allocate objects with a wide range of sizes. */
 	for (i = 0; i < nsmall; i++) {
 		sz = get_small_size(i);
-		ptrs[i] = mallocx(sz, flags);
-		assert_ptr_not_null(ptrs[i],
+		(*ptrs)[i] = mallocx(sz, flags);
+		assert_ptr_not_null((*ptrs)[i],
 		    "Unexpected mallocx(%zu, %#x) failure", sz, flags);
 	}
 	for (i = 0; i < nlarge; i++) {
 		sz = get_large_size(i);
-		ptrs[nsmall + i] = mallocx(sz, flags);
-		assert_ptr_not_null(ptrs[i],
+		(*ptrs)[nsmall + i] = mallocx(sz, flags);
+		assert_ptr_not_null((*ptrs)[i],
 		    "Unexpected mallocx(%zu, %#x) failure", sz, flags);
 	}
 
 	tsdn = tsdn_fetch();
 
 	/* Verify allocations. */
-	for (i = 0; i < nptrs; i++) {
-		assert_zu_gt(ivsalloc(tsdn, ptrs[i]), 0,
+	for (i = 0; i < *nptrs; i++) {
+		assert_zu_gt(ivsalloc(tsdn, (*ptrs)[i]), 0,
 		    "Allocation should have queryable size");
 	}
+}
 
-	/* Reset. */
-	miblen = sizeof(mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib("arena.0.reset", mib, &miblen), 0,
-	    "Unexpected mallctlnametomib() failure");
-	mib[1] = (size_t)arena_ind;
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
-	    "Unexpected mallctlbymib() failure");
+static void
+do_arena_reset_post(void **ptrs, unsigned nptrs)
+{
+	tsdn_t *tsdn;
+	unsigned i;
+
+	tsdn = tsdn_fetch();
 
 	/* Verify allocations no longer exist. */
 	for (i = 0; i < nptrs; i++) {
@@ -139,6 +146,193 @@ TEST_BEGIN(test_arena_reset)
 
 	free(ptrs);
 }
+
+static void
+do_arena_reset_destroy(const char *name, unsigned arena_ind)
+{
+	size_t mib[3];
+	size_t miblen;
+
+	miblen = sizeof(mib)/sizeof(size_t);
+	assert_d_eq(mallctlnametomib(name, mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[1] = (size_t)arena_ind;
+	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+}
+
+static void
+do_arena_reset(unsigned arena_ind)
+{
+
+	do_arena_reset_destroy("arena.0.reset", arena_ind);
+}
+
+static void
+do_arena_destroy(unsigned arena_ind)
+{
+
+	do_arena_reset_destroy("arena.0.destroy", arena_ind);
+}
+
+TEST_BEGIN(test_arena_reset)
+{
+	unsigned arena_ind;
+	void **ptrs;
+	unsigned nptrs;
+
+	arena_ind = do_arena_create(NULL);
+	do_arena_reset_pre(arena_ind, &ptrs, &nptrs);
+	do_arena_reset(arena_ind);
+	do_arena_reset_post(ptrs, nptrs);
+}
+TEST_END
+
+static bool
+arena_i_initialized(unsigned arena_ind, bool refresh)
+{
+	bool initialized;
+	size_t mib[3];
+	size_t miblen, sz;
+
+	if (refresh) {
+		uint64_t epoch = 1;
+		assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
+		    sizeof(epoch)), 0, "Unexpected mallctl() failure");
+	}
+
+	miblen = sizeof(mib)/sizeof(size_t);
+	assert_d_eq(mallctlnametomib("arena.0.initialized", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[1] = (size_t)arena_ind;
+	sz = sizeof(initialized);
+	assert_d_eq(mallctlbymib(mib, miblen, (void *)&initialized, &sz, NULL,
+	    0), 0, "Unexpected mallctlbymib() failure");
+
+	return (initialized);
+}
+
+TEST_BEGIN(test_arena_destroy_initial)
+{
+
+	assert_false(arena_i_initialized(MALLCTL_ARENAS_DESTROYED, false),
+	    "Destroyed arena stats should not be initialized");
+}
+TEST_END
+
+TEST_BEGIN(test_arena_destroy_hooks_default)
+{
+	unsigned arena_ind, arena_ind_another, arena_ind_prev;
+	void **ptrs;
+	unsigned nptrs;
+
+	arena_ind = do_arena_create(NULL);
+	do_arena_reset_pre(arena_ind, &ptrs, &nptrs);
+
+	assert_false(arena_i_initialized(arena_ind, false),
+	    "Arena stats should not be initialized");
+	assert_true(arena_i_initialized(arena_ind, true),
+	    "Arena stats should be initialized");
+
+	/*
+	 * Create another arena before destroying one, to better verify arena
+	 * index reuse.
+	 */
+	arena_ind_another = do_arena_create(NULL);
+
+	do_arena_destroy(arena_ind);
+
+	assert_false(arena_i_initialized(arena_ind, true),
+	    "Arena stats should not be initialized");
+	assert_true(arena_i_initialized(MALLCTL_ARENAS_DESTROYED, false),
+	    "Destroyed arena stats should be initialized");
+
+	do_arena_reset_post(ptrs, nptrs);
+
+	arena_ind_prev = arena_ind;
+	arena_ind = do_arena_create(NULL);
+	do_arena_reset_pre(arena_ind, &ptrs, &nptrs);
+	assert_u_eq(arena_ind, arena_ind_prev,
+	    "Arena index should have been recycled");
+	do_arena_destroy(arena_ind);
+	do_arena_reset_post(ptrs, nptrs);
+
+	do_arena_destroy(arena_ind_another);
+}
+TEST_END
+
+/*
+ * Actually unmap extents, regardless of config_munmap, so that attempts to
+ * access a destroyed arena's memory will segfault.
+ */
+static bool
+extent_dalloc_unmap(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    bool committed, unsigned arena_ind)
+{
+
+	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, committed=%s, "
+	    "arena_ind=%u)\n", __func__, extent_hooks, addr, size, committed ?
+	    "true" : "false", arena_ind);
+	assert_ptr_eq(extent_hooks, &hooks,
+	    "extent_hooks should be same as pointer used to set hooks");
+	assert_ptr_eq(extent_hooks->dalloc, extent_dalloc_unmap,
+	    "Wrong hook function");
+	called_dalloc = true;
+	if (!try_dalloc)
+		return (true);
+	pages_unmap(addr, size);
+	did_dalloc = true;
+	return (false);
+}
+
+static extent_hooks_t hooks_orig;
+
+static extent_hooks_t hooks_unmap = {
+	extent_alloc_hook,
+	extent_dalloc_unmap, /* dalloc */
+	extent_commit_hook,
+	extent_decommit_hook,
+	extent_purge_lazy_hook,
+	extent_purge_forced_hook,
+	extent_split_hook,
+	extent_merge_hook
+};
+
+TEST_BEGIN(test_arena_destroy_hooks_unmap)
+{
+	unsigned arena_ind;
+	void **ptrs;
+	unsigned nptrs;
+
+	extent_hooks_prep();
+	try_decommit = false;
+	memcpy(&hooks_orig, &hooks, sizeof(extent_hooks_t));
+	memcpy(&hooks, &hooks_unmap, sizeof(extent_hooks_t));
+
+	did_alloc = false;
+	arena_ind = do_arena_create(&hooks);
+	do_arena_reset_pre(arena_ind, &ptrs, &nptrs);
+
+	assert_true(did_alloc, "Expected alloc");
+
+	assert_false(arena_i_initialized(arena_ind, false),
+	    "Arena stats should not be initialized");
+	assert_true(arena_i_initialized(arena_ind, true),
+	    "Arena stats should be initialized");
+
+	did_dalloc = false;
+	do_arena_destroy(arena_ind);
+	assert_true(did_dalloc, "Expected dalloc");
+
+	assert_false(arena_i_initialized(arena_ind, true),
+	    "Arena stats should not be initialized");
+	assert_true(arena_i_initialized(MALLCTL_ARENAS_DESTROYED, false),
+	    "Destroyed arena stats should be initialized");
+
+	do_arena_reset_post(ptrs, nptrs);
+
+	memcpy(&hooks, &hooks_orig, sizeof(extent_hooks_t));
+}
 TEST_END
 
 int
@@ -146,5 +340,8 @@ main(void)
 {
 
 	return (test(
-	    test_arena_reset));
+	    test_arena_reset,
+	    test_arena_destroy_initial,
+	    test_arena_destroy_hooks_default,
+	    test_arena_destroy_hooks_unmap));
 }
diff --git a/test/unit/arena_reset_prof.c b/test/unit/arena_reset_prof.c
new file mode 100644
index 00000000..0fd362e9
--- /dev/null
+++ b/test/unit/arena_reset_prof.c
@@ -0,0 +1,5 @@
+#include "test/jemalloc_test.h"
+#define	ARENA_RESET_PROF_C_
+
+const char *malloc_conf = "prof:true,lg_prof_sample:0";
+#include "arena_reset.c"
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index b3320788..fbe76cb4 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -381,6 +381,15 @@ TEST_BEGIN(test_arena_i_initialized)
 	    "Unexpected mallctl() failure");
 	assert_true(initialized,
 	    "Merged arena statistics should always be initialized");
+
+	/* Equivalent to the above but using mallctl() directly. */
+	sz = sizeof(initialized);
+	assert_d_eq(mallctl(
+	    "arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".initialized",
+	    (void *)&initialized, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+	assert_true(initialized,
+	    "Merged arena statistics should always be initialized");
 }
 TEST_END
 

From 77de5f27d848414a6d26e86e2812339ffe1062d3 Mon Sep 17 00:00:00 2001
From: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Date: Wed, 21 Dec 2016 09:38:54 +0100
Subject: [PATCH 0582/2608] Use better pre-processor defines for sparc64

Currently, jemalloc detects sparc64 targets by checking whether
__sparc64__ is defined. However, this definition is used on BSD
targets only. Linux targets define both __sparc__ and __arch64__
for sparc64. Since this also works on BSD, rather use __sparc__
and __arch64__ instead of __sparc64__ to detect sparc64 targets.
---
 include/jemalloc/internal/mb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/mb.h b/include/jemalloc/internal/mb.h
index 5384728f..e58da5c3 100644
--- a/include/jemalloc/internal/mb.h
+++ b/include/jemalloc/internal/mb.h
@@ -76,7 +76,7 @@ mb_write(void)
 	    : "memory" /* Clobbers. */
 	    );
 }
-#elif defined(__sparc64__)
+#elif defined(__sparc__) && defined(__arch64__)
 JEMALLOC_INLINE void
 mb_write(void)
 {

From 94c5d22a4da7844d0bdc5b370e47b1ba14268af2 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Jan 2017 13:23:17 -0800
Subject: [PATCH 0583/2608] Remove mb.h, which is unused

---
 .../jemalloc/internal/jemalloc_internal.h.in  |   4 -
 include/jemalloc/internal/mb.h                | 115 ------------------
 2 files changed, 119 deletions(-)
 delete mode 100644 include/jemalloc/internal/mb.h

diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 6395d750..a558012a 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -376,7 +376,6 @@ typedef unsigned szind_t;
 #include "jemalloc/internal/witness.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/tsd.h"
-#include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/base.h"
 #include "jemalloc/internal/arena.h"
@@ -405,7 +404,6 @@ typedef unsigned szind_t;
 #include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/witness.h"
 #include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/bitmap.h"
 #define	JEMALLOC_ARENA_STRUCTS_A
 #include "jemalloc/internal/arena.h"
@@ -497,7 +495,6 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/witness.h"
 #include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/bitmap.h"
 #include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/base.h"
@@ -528,7 +525,6 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/tsd.h"
 #include "jemalloc/internal/witness.h"
 #include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/mb.h"
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/base.h"
diff --git a/include/jemalloc/internal/mb.h b/include/jemalloc/internal/mb.h
deleted file mode 100644
index e58da5c3..00000000
--- a/include/jemalloc/internal/mb.h
+++ /dev/null
@@ -1,115 +0,0 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#ifndef JEMALLOC_ENABLE_INLINE
-void	mb_write(void);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MB_C_))
-#ifdef __i386__
-/*
- * According to the Intel Architecture Software Developer's Manual, current
- * processors execute instructions in order from the perspective of other
- * processors in a multiprocessor system, but 1) Intel reserves the right to
- * change that, and 2) the compiler's optimizer could re-order instructions if
- * there weren't some form of barrier.  Therefore, even if running on an
- * architecture that does not need memory barriers (everything through at least
- * i686), an "optimizer barrier" is necessary.
- */
-JEMALLOC_INLINE void
-mb_write(void)
-{
-
-#  if 0
-	/* This is a true memory barrier. */
-	asm volatile ("pusha;"
-	    "xor  %%eax,%%eax;"
-	    "cpuid;"
-	    "popa;"
-	    : /* Outputs. */
-	    : /* Inputs. */
-	    : "memory" /* Clobbers. */
-	    );
-#  else
-	/*
-	 * This is hopefully enough to keep the compiler from reordering
-	 * instructions around this one.
-	 */
-	asm volatile ("nop;"
-	    : /* Outputs. */
-	    : /* Inputs. */
-	    : "memory" /* Clobbers. */
-	    );
-#  endif
-}
-#elif (defined(__amd64__) || defined(__x86_64__))
-JEMALLOC_INLINE void
-mb_write(void)
-{
-
-	asm volatile ("sfence"
-	    : /* Outputs. */
-	    : /* Inputs. */
-	    : "memory" /* Clobbers. */
-	    );
-}
-#elif defined(__powerpc__)
-JEMALLOC_INLINE void
-mb_write(void)
-{
-
-	asm volatile ("eieio"
-	    : /* Outputs. */
-	    : /* Inputs. */
-	    : "memory" /* Clobbers. */
-	    );
-}
-#elif defined(__sparc__) && defined(__arch64__)
-JEMALLOC_INLINE void
-mb_write(void)
-{
-
-	asm volatile ("membar #StoreStore"
-	    : /* Outputs. */
-	    : /* Inputs. */
-	    : "memory" /* Clobbers. */
-	    );
-}
-#elif defined(__tile__)
-JEMALLOC_INLINE void
-mb_write(void)
-{
-
-	__sync_synchronize();
-}
-#else
-/*
- * This is much slower than a simple memory barrier, but the semantics of mutex
- * unlock make this work.
- */
-JEMALLOC_INLINE void
-mb_write(void)
-{
-	malloc_mutex_t mtx;
-
-	malloc_mutex_init(&mtx, "mb", WITNESS_RANK_OMIT);
-	malloc_mutex_lock(TSDN_NULL, &mtx);
-	malloc_mutex_unlock(TSDN_NULL, &mtx);
-}
-#endif
-#endif
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/

From 77cccac8cde9fb1f1555331814c4e6440c16de43 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 10 Jan 2017 18:06:31 -0800
Subject: [PATCH 0584/2608] Break up headers into constituent parts

This is part of a broader change to make header files better represent the
dependencies between one another (see
https://github.com/jemalloc/jemalloc/issues/533). It breaks up component headers
into smaller parts that can be made to have a simpler dependency graph.

For the autogenerated headers (smoothstep.h and size_classes.h), no splitting
was necessary, so I didn't add support to emit multiple headers.
---
 include/jemalloc/internal/arena.h             | 638 ------------------
 include/jemalloc/internal/arena_externs.h     |  92 +++
 include/jemalloc/internal/arena_inlines_a.h   |  91 +++
 include/jemalloc/internal/arena_inlines_b.h   | 209 ++++++
 include/jemalloc/internal/arena_structs_a.h   |  15 +
 include/jemalloc/internal/arena_structs_b.h   | 214 ++++++
 include/jemalloc/internal/arena_types.h       |  22 +
 include/jemalloc/internal/atomic_externs.h    |  12 +
 .../internal/{atomic.h => atomic_inlines.h}   |  28 +-
 include/jemalloc/internal/base.h              |  87 ---
 include/jemalloc/internal/base_externs.h      |  18 +
 include/jemalloc/internal/base_inlines.h      |  17 +
 include/jemalloc/internal/base_structs.h      |  44 ++
 include/jemalloc/internal/base_types.h        |   7 +
 include/jemalloc/internal/bitmap.h            | 322 ---------
 include/jemalloc/internal/bitmap_externs.h    |   8 +
 include/jemalloc/internal/bitmap_inlines.h    | 152 +++++
 include/jemalloc/internal/bitmap_structs.h    |  28 +
 include/jemalloc/internal/bitmap_types.h      | 133 ++++
 include/jemalloc/internal/ckh.h               |  86 ---
 include/jemalloc/internal/ckh_externs.h       |  18 +
 include/jemalloc/internal/ckh_structs.h       |  41 ++
 include/jemalloc/internal/ckh_types.h         |  22 +
 include/jemalloc/internal/ctl.h               | 127 ----
 include/jemalloc/internal/ctl_externs.h       |  43 ++
 include/jemalloc/internal/ctl_structs.h       |  68 ++
 include/jemalloc/internal/ctl_types.h         |  10 +
 include/jemalloc/internal/extent_dss.h        |  39 --
 .../jemalloc/internal/extent_dss_externs.h    |  14 +
 .../jemalloc/internal/extent_dss_structs.h    |   6 +
 include/jemalloc/internal/extent_dss_types.h  |  14 +
 include/jemalloc/internal/extent_externs.h    |  60 ++
 .../internal/{extent.h => extent_inlines.h}   | 162 +----
 include/jemalloc/internal/extent_mmap.h       |  21 -
 .../jemalloc/internal/extent_mmap_externs.h   |   8 +
 include/jemalloc/internal/extent_structs.h    |  84 +++
 include/jemalloc/internal/extent_types.h      |   8 +
 .../internal/{hash.h => hash_inlines.h}       |  20 +-
 .../jemalloc/internal/jemalloc_internal.h.in  | 231 +++----
 .../internal/jemalloc_internal_macros.h       |   5 +
 .../internal/{large.h => large_externs.h}     |  19 +-
 include/jemalloc/internal/mutex.h             | 150 ----
 include/jemalloc/internal/mutex_externs.h     |  18 +
 include/jemalloc/internal/mutex_inlines.h     |  74 ++
 include/jemalloc/internal/mutex_structs.h     |  24 +
 include/jemalloc/internal/mutex_types.h       |  33 +
 .../internal/{nstime.h => nstime_externs.h}   |  28 +-
 include/jemalloc/internal/nstime_structs.h    |   8 +
 include/jemalloc/internal/nstime_types.h      |   9 +
 include/jemalloc/internal/pages_externs.h     |  31 +
 .../internal/{pages.h => pages_types.h}       |  47 +-
 .../internal/{prng.h => prng_inlines.h}       |  44 +-
 include/jemalloc/internal/prng_types.h        |  29 +
 include/jemalloc/internal/prof.h              | 568 ----------------
 include/jemalloc/internal/prof_externs.h      |  83 +++
 include/jemalloc/internal/prof_inlines.h      | 242 +++++++
 include/jemalloc/internal/prof_structs.h      | 187 +++++
 include/jemalloc/internal/prof_types.h        |  55 ++
 include/jemalloc/internal/ql.h                |   5 +
 include/jemalloc/internal/qr.h                |   5 +
 include/jemalloc/internal/rtree_externs.h     |  23 +
 .../internal/{rtree.h => rtree_inlines.h}     | 172 +----
 include/jemalloc/internal/rtree_structs.h     |  86 +++
 include/jemalloc/internal/rtree_types.h       |  58 ++
 include/jemalloc/internal/size_classes.sh     |  23 +-
 include/jemalloc/internal/smoothstep.h        |  22 +-
 include/jemalloc/internal/smoothstep.sh       |  22 +-
 include/jemalloc/internal/spin.h              |  51 --
 include/jemalloc/internal/spin_inlines.h      |  31 +
 include/jemalloc/internal/spin_structs.h      |   8 +
 include/jemalloc/internal/spin_types.h        |   6 +
 include/jemalloc/internal/stats_externs.h     |   9 +
 .../internal/{stats.h => stats_structs.h}     |  29 +-
 include/jemalloc/internal/stats_types.h       |   9 +
 include/jemalloc/internal/tcache_externs.h    |  47 ++
 .../internal/{tcache.h => tcache_inlines.h}   | 159 +----
 include/jemalloc/internal/tcache_structs.h    |  55 ++
 include/jemalloc/internal/tcache_types.h      |  50 ++
 .../internal/{ticker.h => ticker_inlines.h}   |  26 +-
 include/jemalloc/internal/ticker_structs.h    |   9 +
 include/jemalloc/internal/ticker_types.h      |   6 +
 include/jemalloc/internal/tsd_externs.h       |  18 +
 include/jemalloc/internal/tsd_inlines.h       | 140 ++++
 include/jemalloc/internal/tsd_structs.h       |  73 ++
 .../jemalloc/internal/{tsd.h => tsd_types.h}  | 238 +------
 include/jemalloc/internal/util_externs.h      |  23 +
 .../internal/{util.h => util_inlines.h}       | 128 +---
 include/jemalloc/internal/util_types.h        |  94 +++
 include/jemalloc/internal/witness.h           | 275 --------
 include/jemalloc/internal/witness_externs.h   |  37 +
 include/jemalloc/internal/witness_inlines.h   | 163 +++++
 include/jemalloc/internal/witness_structs.h   |  28 +
 include/jemalloc/internal/witness_types.h     |  46 ++
 test/include/test/jemalloc_test.h.in          |  16 +-
 94 files changed, 3452 insertions(+), 3611 deletions(-)
 delete mode 100644 include/jemalloc/internal/arena.h
 create mode 100644 include/jemalloc/internal/arena_externs.h
 create mode 100644 include/jemalloc/internal/arena_inlines_a.h
 create mode 100644 include/jemalloc/internal/arena_inlines_b.h
 create mode 100644 include/jemalloc/internal/arena_structs_a.h
 create mode 100644 include/jemalloc/internal/arena_structs_b.h
 create mode 100644 include/jemalloc/internal/arena_types.h
 create mode 100644 include/jemalloc/internal/atomic_externs.h
 rename include/jemalloc/internal/{atomic.h => atomic_inlines.h} (93%)
 delete mode 100644 include/jemalloc/internal/base.h
 create mode 100644 include/jemalloc/internal/base_externs.h
 create mode 100644 include/jemalloc/internal/base_inlines.h
 create mode 100644 include/jemalloc/internal/base_structs.h
 create mode 100644 include/jemalloc/internal/base_types.h
 delete mode 100644 include/jemalloc/internal/bitmap.h
 create mode 100644 include/jemalloc/internal/bitmap_externs.h
 create mode 100644 include/jemalloc/internal/bitmap_inlines.h
 create mode 100644 include/jemalloc/internal/bitmap_structs.h
 create mode 100644 include/jemalloc/internal/bitmap_types.h
 delete mode 100644 include/jemalloc/internal/ckh.h
 create mode 100644 include/jemalloc/internal/ckh_externs.h
 create mode 100644 include/jemalloc/internal/ckh_structs.h
 create mode 100644 include/jemalloc/internal/ckh_types.h
 delete mode 100644 include/jemalloc/internal/ctl.h
 create mode 100644 include/jemalloc/internal/ctl_externs.h
 create mode 100644 include/jemalloc/internal/ctl_structs.h
 create mode 100644 include/jemalloc/internal/ctl_types.h
 delete mode 100644 include/jemalloc/internal/extent_dss.h
 create mode 100644 include/jemalloc/internal/extent_dss_externs.h
 create mode 100644 include/jemalloc/internal/extent_dss_structs.h
 create mode 100644 include/jemalloc/internal/extent_dss_types.h
 create mode 100644 include/jemalloc/internal/extent_externs.h
 rename include/jemalloc/internal/{extent.h => extent_inlines.h} (58%)
 delete mode 100644 include/jemalloc/internal/extent_mmap.h
 create mode 100644 include/jemalloc/internal/extent_mmap_externs.h
 create mode 100644 include/jemalloc/internal/extent_structs.h
 create mode 100644 include/jemalloc/internal/extent_types.h
 rename include/jemalloc/internal/{hash.h => hash_inlines.h} (92%)
 rename include/jemalloc/internal/{large.h => large_externs.h} (64%)
 delete mode 100644 include/jemalloc/internal/mutex.h
 create mode 100644 include/jemalloc/internal/mutex_externs.h
 create mode 100644 include/jemalloc/internal/mutex_inlines.h
 create mode 100644 include/jemalloc/internal/mutex_structs.h
 create mode 100644 include/jemalloc/internal/mutex_types.h
 rename include/jemalloc/internal/{nstime.h => nstime_externs.h} (53%)
 create mode 100644 include/jemalloc/internal/nstime_structs.h
 create mode 100644 include/jemalloc/internal/nstime_types.h
 create mode 100644 include/jemalloc/internal/pages_externs.h
 rename include/jemalloc/internal/{pages.h => pages_types.h} (54%)
 rename include/jemalloc/internal/{prng.h => prng_inlines.h} (71%)
 create mode 100644 include/jemalloc/internal/prng_types.h
 delete mode 100644 include/jemalloc/internal/prof.h
 create mode 100644 include/jemalloc/internal/prof_externs.h
 create mode 100644 include/jemalloc/internal/prof_inlines.h
 create mode 100644 include/jemalloc/internal/prof_structs.h
 create mode 100644 include/jemalloc/internal/prof_types.h
 create mode 100644 include/jemalloc/internal/rtree_externs.h
 rename include/jemalloc/internal/{rtree.h => rtree_inlines.h} (68%)
 create mode 100644 include/jemalloc/internal/rtree_structs.h
 create mode 100644 include/jemalloc/internal/rtree_types.h
 delete mode 100644 include/jemalloc/internal/spin.h
 create mode 100644 include/jemalloc/internal/spin_inlines.h
 create mode 100644 include/jemalloc/internal/spin_structs.h
 create mode 100644 include/jemalloc/internal/spin_types.h
 create mode 100644 include/jemalloc/internal/stats_externs.h
 rename include/jemalloc/internal/{stats.h => stats_structs.h} (72%)
 create mode 100644 include/jemalloc/internal/stats_types.h
 create mode 100644 include/jemalloc/internal/tcache_externs.h
 rename include/jemalloc/internal/{tcache.h => tcache_inlines.h} (57%)
 create mode 100644 include/jemalloc/internal/tcache_structs.h
 create mode 100644 include/jemalloc/internal/tcache_types.h
 rename include/jemalloc/internal/{ticker.h => ticker_inlines.h} (57%)
 create mode 100644 include/jemalloc/internal/ticker_structs.h
 create mode 100644 include/jemalloc/internal/ticker_types.h
 create mode 100644 include/jemalloc/internal/tsd_externs.h
 create mode 100644 include/jemalloc/internal/tsd_inlines.h
 create mode 100644 include/jemalloc/internal/tsd_structs.h
 rename include/jemalloc/internal/{tsd.h => tsd_types.h} (74%)
 create mode 100644 include/jemalloc/internal/util_externs.h
 rename include/jemalloc/internal/{util.h => util_inlines.h} (50%)
 create mode 100644 include/jemalloc/internal/util_types.h
 delete mode 100644 include/jemalloc/internal/witness.h
 create mode 100644 include/jemalloc/internal/witness_externs.h
 create mode 100644 include/jemalloc/internal/witness_inlines.h
 create mode 100644 include/jemalloc/internal/witness_structs.h
 create mode 100644 include/jemalloc/internal/witness_types.h

diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
deleted file mode 100644
index 5e295509..00000000
--- a/include/jemalloc/internal/arena.h
+++ /dev/null
@@ -1,638 +0,0 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-#define	LARGE_MINCLASS		(ZU(1) << LG_LARGE_MINCLASS)
-
-/* Maximum number of regions in one slab. */
-#define	LG_SLAB_MAXREGS		(LG_PAGE - LG_TINY_MIN)
-#define	SLAB_MAXREGS		(1U << LG_SLAB_MAXREGS)
-
-/* Default decay time in seconds. */
-#define	DECAY_TIME_DEFAULT	10
-/* Number of event ticks between time checks. */
-#define	DECAY_NTICKS_PER_UPDATE	1000
-
-typedef struct arena_slab_data_s arena_slab_data_t;
-typedef struct arena_bin_info_s arena_bin_info_t;
-typedef struct arena_decay_s arena_decay_t;
-typedef struct arena_bin_s arena_bin_t;
-typedef struct arena_s arena_t;
-typedef struct arena_tdata_s arena_tdata_t;
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-#ifdef JEMALLOC_ARENA_STRUCTS_A
-struct arena_slab_data_s {
-	/* Index of bin this slab is associated with. */
-	szind_t		binind;
-
-	/* Number of free regions in slab. */
-	unsigned	nfree;
-
-	/* Per region allocated/deallocated bitmap. */
-	bitmap_t	bitmap[BITMAP_GROUPS_MAX];
-};
-#endif /* JEMALLOC_ARENA_STRUCTS_A */
-
-#ifdef JEMALLOC_ARENA_STRUCTS_B
-/*
- * Read-only information associated with each element of arena_t's bins array
- * is stored separately, partly to reduce memory usage (only one copy, rather
- * than one per arena), but mainly to avoid false cacheline sharing.
- *
- * Each slab has the following layout:
- *
- *   /--------------------\
- *   | region 0           |
- *   |--------------------|
- *   | region 1           |
- *   |--------------------|
- *   | ...                |
- *   | ...                |
- *   | ...                |
- *   |--------------------|
- *   | region nregs-1     |
- *   \--------------------/
- */
-struct arena_bin_info_s {
-	/* Size of regions in a slab for this bin's size class. */
-	size_t			reg_size;
-
-	/* Total size of a slab for this bin's size class. */
-	size_t			slab_size;
-
-	/* Total number of regions in a slab for this bin's size class. */
-	uint32_t		nregs;
-
-	/*
-	 * Metadata used to manipulate bitmaps for slabs associated with this
-	 * bin.
-	 */
-	bitmap_info_t		bitmap_info;
-};
-
-struct arena_decay_s {
-	/*
-	 * Approximate time in seconds from the creation of a set of unused
-	 * dirty pages until an equivalent set of unused dirty pages is purged
-	 * and/or reused.
-	 */
-	ssize_t			time;
-	/* time / SMOOTHSTEP_NSTEPS. */
-	nstime_t		interval;
-	/*
-	 * Time at which the current decay interval logically started.  We do
-	 * not actually advance to a new epoch until sometime after it starts
-	 * because of scheduling and computation delays, and it is even possible
-	 * to completely skip epochs.  In all cases, during epoch advancement we
-	 * merge all relevant activity into the most recently recorded epoch.
-	 */
-	nstime_t		epoch;
-	/* Deadline randomness generator. */
-	uint64_t		jitter_state;
-	/*
-	 * Deadline for current epoch.  This is the sum of interval and per
-	 * epoch jitter which is a uniform random variable in [0..interval).
-	 * Epochs always advance by precise multiples of interval, but we
-	 * randomize the deadline to reduce the likelihood of arenas purging in
-	 * lockstep.
-	 */
-	nstime_t		deadline;
-	/*
-	 * Number of dirty pages at beginning of current epoch.  During epoch
-	 * advancement we use the delta between arena->decay.ndirty and
-	 * arena->ndirty to determine how many dirty pages, if any, were
-	 * generated.
-	 */
-	size_t			nunpurged;
-	/*
-	 * Trailing log of how many unused dirty pages were generated during
-	 * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last
-	 * element is the most recent epoch.  Corresponding epoch times are
-	 * relative to epoch.
-	 */
-	size_t			backlog[SMOOTHSTEP_NSTEPS];
-};
-
-struct arena_bin_s {
-	/* All operations on arena_bin_t fields require lock ownership. */
-	malloc_mutex_t		lock;
-
-	/*
-	 * Current slab being used to service allocations of this bin's size
-	 * class.  slabcur is independent of slabs_{nonfull,full}; whenever
-	 * slabcur is reassigned, the previous slab must be deallocated or
-	 * inserted into slabs_{nonfull,full}.
-	 */
-	extent_t		*slabcur;
-
-	/*
-	 * Heap of non-full slabs.  This heap is used to assure that new
-	 * allocations come from the non-full slab that is oldest/lowest in
-	 * memory.
-	 */
-	extent_heap_t		slabs_nonfull;
-
-	/* Ring sentinel used to track full slabs. */
-	extent_t		slabs_full;
-
-	/* Bin statistics. */
-	malloc_bin_stats_t	stats;
-};
-
-struct arena_s {
-	/*
-	 * Number of threads currently assigned to this arena, synchronized via
-	 * atomic operations.  Each thread has two distinct assignments, one for
-	 * application-serving allocation, and the other for internal metadata
-	 * allocation.  Internal metadata must not be allocated from arenas
-	 * explicitly created via the arenas.create mallctl, because the
-	 * arena.<i>.reset mallctl indiscriminately discards all allocations for
-	 * the affected arena.
-	 *
-	 *   0: Application allocation.
-	 *   1: Internal metadata allocation.
-	 */
-	unsigned		nthreads[2];
-
-	/*
-	 * There are three classes of arena operations from a locking
-	 * perspective:
-	 * 1) Thread assignment (modifies nthreads) is synchronized via atomics.
-	 * 2) Bin-related operations are protected by bin locks.
-	 * 3) Extent-related operations are protected by this mutex.
-	 */
-	malloc_mutex_t		lock;
-
-	arena_stats_t		stats;
-	/*
-	 * List of tcaches for extant threads associated with this arena.
-	 * Stats from these are merged incrementally, and at exit if
-	 * opt_stats_print is enabled.
-	 */
-	ql_head(tcache_t)	tcache_ql;
-
-	uint64_t		prof_accumbytes;
-
-	/*
-	 * PRNG state for cache index randomization of large allocation base
-	 * pointers.
-	 */
-	size_t			offset_state;
-
-	/* Extent serial number generator state. */
-	size_t			extent_sn_next;
-
-	dss_prec_t		dss_prec;
-
-	/* True if a thread is currently executing arena_purge_to_limit(). */
-	bool			purging;
-
-	/* Number of pages in active extents. */
-	size_t			nactive;
-
-	/*
-	 * Current count of pages within unused extents that are potentially
-	 * dirty, and for which pages_purge_*() has not been called.  By
-	 * tracking this, we can institute a limit on how much dirty unused
-	 * memory is mapped for each arena.
-	 */
-	size_t			ndirty;
-
-	/* Decay-based purging state. */
-	arena_decay_t		decay;
-
-	/* Extant large allocations. */
-	ql_head(extent_t)	large;
-	/* Synchronizes all large allocation/update/deallocation. */
-	malloc_mutex_t		large_mtx;
-
-	/*
-	 * Heaps of extents that were previously allocated.  These are used when
-	 * allocating extents, in an attempt to re-use address space.
-	 */
-	extent_heap_t		extents_cached[NPSIZES+1];
-	extent_heap_t		extents_retained[NPSIZES+1];
-	/*
-	 * Ring sentinel used to track unused dirty memory.  Dirty memory is
-	 * managed as an LRU of cached extents.
-	 */
-	extent_t		extents_dirty;
-	/* Protects extents_{cached,retained,dirty}. */
-	malloc_mutex_t		extents_mtx;
-
-	/*
-	 * Next extent size class in a growing series to use when satisfying a
-	 * request via the extent hooks (only if !config_munmap).  This limits
-	 * the number of disjoint virtual memory ranges so that extent merging
-	 * can be effective even if multiple arenas' extent allocation requests
-	 * are highly interleaved.
-	 */
-	pszind_t		extent_grow_next;
-
-	/* Cache of extent structures that were allocated via base_alloc(). */
-	ql_head(extent_t)	extent_cache;
-	malloc_mutex_t		extent_cache_mtx;
-
-	/* bins is used to store heaps of free regions. */
-	arena_bin_t		bins[NBINS];
-
-	/* Base allocator, from which arena metadata are allocated. */
-	base_t			*base;
-};
-
-/* Used in conjunction with tsd for fast arena-related context lookup. */
-struct arena_tdata_s {
-	ticker_t		decay_ticker;
-};
-#endif /* JEMALLOC_ARENA_STRUCTS_B */
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-static const size_t	large_pad =
-#ifdef JEMALLOC_CACHE_OBLIVIOUS
-    PAGE
-#else
-    0
-#endif
-    ;
-
-extern ssize_t		opt_decay_time;
-
-extern const arena_bin_info_t	arena_bin_info[NBINS];
-
-extent_t	*arena_extent_cache_alloc(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, void *new_addr, size_t size,
-    size_t alignment, bool *zero);
-void	arena_extent_cache_dalloc(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent);
-void	arena_extent_cache_maybe_insert(tsdn_t *tsdn, arena_t *arena,
-    extent_t *extent, bool cache);
-void	arena_extent_cache_maybe_remove(tsdn_t *tsdn, arena_t *arena,
-    extent_t *extent, bool cache);
-#ifdef JEMALLOC_JET
-size_t	arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr);
-#endif
-extent_t	*arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena,
-    size_t usize, size_t alignment, bool *zero);
-void	arena_extent_dalloc_large(tsdn_t *tsdn, arena_t *arena,
-    extent_t *extent, bool locked);
-void	arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena,
-    extent_t *extent, size_t oldsize);
-void	arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena,
-    extent_t *extent, size_t oldsize);
-ssize_t	arena_decay_time_get(tsdn_t *tsdn, arena_t *arena);
-bool	arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time);
-void	arena_purge(tsdn_t *tsdn, arena_t *arena, bool all);
-void	arena_maybe_purge(tsdn_t *tsdn, arena_t *arena);
-void	arena_reset(tsd_t *tsd, arena_t *arena);
-void	arena_destroy(tsd_t *tsd, arena_t *arena);
-void	arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena,
-    tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes);
-void	arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info,
-    bool zero);
-#ifdef JEMALLOC_JET
-typedef void (arena_dalloc_junk_small_t)(void *, const arena_bin_info_t *);
-extern arena_dalloc_junk_small_t *arena_dalloc_junk_small;
-#else
-void	arena_dalloc_junk_small(void *ptr, const arena_bin_info_t *bin_info);
-#endif
-void	*arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
-    szind_t ind, bool zero);
-void	*arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
-    size_t alignment, bool zero, tcache_t *tcache);
-void	arena_prof_promote(tsdn_t *tsdn, extent_t *extent, const void *ptr,
-    size_t usize);
-void	arena_dalloc_promoted(tsdn_t *tsdn, extent_t *extent, void *ptr,
-    tcache_t *tcache, bool slow_path);
-void	arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena,
-    extent_t *extent, void *ptr);
-void	arena_dalloc_small(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
-    void *ptr);
-bool	arena_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, void *ptr,
-    size_t oldsize, size_t size, size_t extra, bool zero);
-void	*arena_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr,
-    size_t oldsize, size_t size, size_t alignment, bool zero, tcache_t *tcache);
-dss_prec_t	arena_dss_prec_get(tsdn_t *tsdn, arena_t *arena);
-bool	arena_dss_prec_set(tsdn_t *tsdn, arena_t *arena, dss_prec_t dss_prec);
-ssize_t	arena_decay_time_default_get(void);
-bool	arena_decay_time_default_set(ssize_t decay_time);
-void	arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
-    unsigned *nthreads, const char **dss, ssize_t *decay_time, size_t *nactive,
-    size_t *ndirty);
-void	arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
-    const char **dss, ssize_t *decay_time, size_t *nactive, size_t *ndirty,
-    arena_stats_t *astats, malloc_bin_stats_t *bstats,
-    malloc_large_stats_t *lstats);
-unsigned	arena_nthreads_get(arena_t *arena, bool internal);
-void	arena_nthreads_inc(arena_t *arena, bool internal);
-void	arena_nthreads_dec(arena_t *arena, bool internal);
-size_t	arena_extent_sn_next(arena_t *arena);
-arena_t	*arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
-void	arena_boot(void);
-void	arena_prefork0(tsdn_t *tsdn, arena_t *arena);
-void	arena_prefork1(tsdn_t *tsdn, arena_t *arena);
-void	arena_prefork2(tsdn_t *tsdn, arena_t *arena);
-void	arena_prefork3(tsdn_t *tsdn, arena_t *arena);
-void	arena_postfork_parent(tsdn_t *tsdn, arena_t *arena);
-void	arena_postfork_child(tsdn_t *tsdn, arena_t *arena);
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#ifndef JEMALLOC_ENABLE_INLINE
-unsigned	arena_ind_get(const arena_t *arena);
-void	arena_internal_add(arena_t *arena, size_t size);
-void	arena_internal_sub(arena_t *arena, size_t size);
-size_t	arena_internal_get(arena_t *arena);
-bool	arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes);
-bool	arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes);
-bool	arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes);
-szind_t	arena_bin_index(arena_t *arena, arena_bin_t *bin);
-prof_tctx_t	*arena_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent,
-    const void *ptr);
-void	arena_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr,
-    size_t usize, prof_tctx_t *tctx);
-void	arena_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr,
-    prof_tctx_t *tctx);
-void	arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks);
-void	arena_decay_tick(tsdn_t *tsdn, arena_t *arena);
-void	*arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
-    bool zero, tcache_t *tcache, bool slow_path);
-arena_t	*arena_aalloc(tsdn_t *tsdn, const void *ptr);
-size_t	arena_salloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr);
-void	arena_dalloc(tsdn_t *tsdn, extent_t *extent, void *ptr,
-    tcache_t *tcache, bool slow_path);
-void	arena_sdalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t size,
-    tcache_t *tcache, bool slow_path);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
-#  ifdef JEMALLOC_ARENA_INLINE_A
-JEMALLOC_INLINE unsigned
-arena_ind_get(const arena_t *arena)
-{
-
-	return (base_ind_get(arena->base));
-}
-
-JEMALLOC_INLINE void
-arena_internal_add(arena_t *arena, size_t size)
-{
-
-	atomic_add_zu(&arena->stats.internal, size);
-}
-
-JEMALLOC_INLINE void
-arena_internal_sub(arena_t *arena, size_t size)
-{
-
-	atomic_sub_zu(&arena->stats.internal, size);
-}
-
-JEMALLOC_INLINE size_t
-arena_internal_get(arena_t *arena)
-{
-
-	return (atomic_read_zu(&arena->stats.internal));
-}
-
-JEMALLOC_INLINE bool
-arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes)
-{
-
-	cassert(config_prof);
-	assert(prof_interval != 0);
-
-	arena->prof_accumbytes += accumbytes;
-	if (arena->prof_accumbytes >= prof_interval) {
-		arena->prof_accumbytes %= prof_interval;
-		return (true);
-	}
-	return (false);
-}
-
-JEMALLOC_INLINE bool
-arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes)
-{
-
-	cassert(config_prof);
-
-	if (likely(prof_interval == 0))
-		return (false);
-	return (arena_prof_accum_impl(arena, accumbytes));
-}
-
-JEMALLOC_INLINE bool
-arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes)
-{
-
-	cassert(config_prof);
-
-	if (likely(prof_interval == 0))
-		return (false);
-
-	{
-		bool ret;
-
-		malloc_mutex_lock(tsdn, &arena->lock);
-		ret = arena_prof_accum_impl(arena, accumbytes);
-		malloc_mutex_unlock(tsdn, &arena->lock);
-		return (ret);
-	}
-}
-#  endif /* JEMALLOC_ARENA_INLINE_A */
-
-#  ifdef JEMALLOC_ARENA_INLINE_B
-JEMALLOC_INLINE szind_t
-arena_bin_index(arena_t *arena, arena_bin_t *bin)
-{
-	szind_t binind = (szind_t)(bin - arena->bins);
-	assert(binind < NBINS);
-	return (binind);
-}
-
-JEMALLOC_INLINE prof_tctx_t *
-arena_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, const void *ptr)
-{
-
-	cassert(config_prof);
-	assert(ptr != NULL);
-
-	if (unlikely(!extent_slab_get(extent)))
-		return (large_prof_tctx_get(tsdn, extent));
-	return ((prof_tctx_t *)(uintptr_t)1U);
-}
-
-JEMALLOC_INLINE void
-arena_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr,
-    size_t usize, prof_tctx_t *tctx)
-{
-
-	cassert(config_prof);
-	assert(ptr != NULL);
-
-	if (unlikely(!extent_slab_get(extent)))
-		large_prof_tctx_set(tsdn, extent, tctx);
-}
-
-JEMALLOC_INLINE void
-arena_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr,
-    prof_tctx_t *tctx)
-{
-
-	cassert(config_prof);
-	assert(ptr != NULL);
-	assert(!extent_slab_get(extent));
-
-	large_prof_tctx_reset(tsdn, extent);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks)
-{
-	tsd_t *tsd;
-	ticker_t *decay_ticker;
-
-	if (unlikely(tsdn_null(tsdn)))
-		return;
-	tsd = tsdn_tsd(tsdn);
-	decay_ticker = decay_ticker_get(tsd, arena_ind_get(arena));
-	if (unlikely(decay_ticker == NULL))
-		return;
-	if (unlikely(ticker_ticks(decay_ticker, nticks)))
-		arena_purge(tsdn, arena, false);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-arena_decay_tick(tsdn_t *tsdn, arena_t *arena)
-{
-
-	malloc_mutex_assert_not_owner(tsdn, &arena->lock);
-
-	arena_decay_ticks(tsdn, arena, 1);
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
-    tcache_t *tcache, bool slow_path)
-{
-
-	assert(!tsdn_null(tsdn) || tcache == NULL);
-	assert(size != 0);
-
-	if (likely(tcache != NULL)) {
-		if (likely(size <= SMALL_MAXCLASS)) {
-			return (tcache_alloc_small(tsdn_tsd(tsdn), arena,
-			    tcache, size, ind, zero, slow_path));
-		}
-		if (likely(size <= tcache_maxclass)) {
-			return (tcache_alloc_large(tsdn_tsd(tsdn), arena,
-			    tcache, size, ind, zero, slow_path));
-		}
-		/* (size > tcache_maxclass) case falls through. */
-		assert(size > tcache_maxclass);
-	}
-
-	return (arena_malloc_hard(tsdn, arena, size, ind, zero));
-}
-
-JEMALLOC_ALWAYS_INLINE arena_t *
-arena_aalloc(tsdn_t *tsdn, const void *ptr)
-{
-
-	return (extent_arena_get(iealloc(tsdn, ptr)));
-}
-
-/* Return the size of the allocation pointed to by ptr. */
-JEMALLOC_ALWAYS_INLINE size_t
-arena_salloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr)
-{
-	size_t ret;
-
-	assert(ptr != NULL);
-
-	if (likely(extent_slab_get(extent)))
-		ret = index2size(extent_slab_data_get_const(extent)->binind);
-	else
-		ret = large_salloc(tsdn, extent);
-
-	return (ret);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-arena_dalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, tcache_t *tcache,
-    bool slow_path)
-{
-
-	assert(!tsdn_null(tsdn) || tcache == NULL);
-	assert(ptr != NULL);
-
-	if (likely(extent_slab_get(extent))) {
-		/* Small allocation. */
-		if (likely(tcache != NULL)) {
-			szind_t binind = extent_slab_data_get(extent)->binind;
-			tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, binind,
-			    slow_path);
-		} else {
-			arena_dalloc_small(tsdn, extent_arena_get(extent),
-			    extent, ptr);
-		}
-	} else {
-		size_t usize = extent_usize_get(extent);
-
-		if (likely(tcache != NULL) && usize <= tcache_maxclass) {
-			if (config_prof && unlikely(usize <= SMALL_MAXCLASS)) {
-				arena_dalloc_promoted(tsdn, extent, ptr,
-				    tcache, slow_path);
-			} else {
-				tcache_dalloc_large(tsdn_tsd(tsdn), tcache,
-				    ptr, usize, slow_path);
-			}
-		} else
-			large_dalloc(tsdn, extent);
-	}
-}
-
-JEMALLOC_ALWAYS_INLINE void
-arena_sdalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t size,
-    tcache_t *tcache, bool slow_path)
-{
-
-	assert(!tsdn_null(tsdn) || tcache == NULL);
-	assert(ptr != NULL);
-
-	if (likely(extent_slab_get(extent))) {
-		/* Small allocation. */
-		if (likely(tcache != NULL)) {
-			szind_t binind = size2index(size);
-			assert(binind == extent_slab_data_get(extent)->binind);
-			tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, binind,
-			    slow_path);
-		} else {
-			arena_dalloc_small(tsdn, extent_arena_get(extent),
-			    extent, ptr);
-		}
-	} else {
-		if (likely(tcache != NULL) && size <= tcache_maxclass) {
-			if (config_prof && unlikely(size <= SMALL_MAXCLASS)) {
-				arena_dalloc_promoted(tsdn, extent, ptr,
-				    tcache, slow_path);
-			} else {
-				tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
-				    size, slow_path);
-			}
-		} else
-			large_dalloc(tsdn, extent);
-	}
-}
-#  endif /* JEMALLOC_ARENA_INLINE_B */
-#endif
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
new file mode 100644
index 00000000..ecc82304
--- /dev/null
+++ b/include/jemalloc/internal/arena_externs.h
@@ -0,0 +1,92 @@
+#ifndef JEMALLOC_INTERNAL_ARENA_EXTERNS_H
+#define JEMALLOC_INTERNAL_ARENA_EXTERNS_H
+
+static const size_t	large_pad =
+#ifdef JEMALLOC_CACHE_OBLIVIOUS
+    PAGE
+#else
+    0
+#endif
+    ;
+
+extern ssize_t		opt_decay_time;
+
+extern const arena_bin_info_t	arena_bin_info[NBINS];
+
+extent_t	*arena_extent_cache_alloc(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, void *new_addr, size_t size,
+    size_t alignment, bool *zero);
+void	arena_extent_cache_dalloc(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *extent);
+void	arena_extent_cache_maybe_insert(tsdn_t *tsdn, arena_t *arena,
+    extent_t *extent, bool cache);
+void	arena_extent_cache_maybe_remove(tsdn_t *tsdn, arena_t *arena,
+    extent_t *extent, bool cache);
+#ifdef JEMALLOC_JET
+size_t	arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr);
+#endif
+extent_t	*arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena,
+    size_t usize, size_t alignment, bool *zero);
+void	arena_extent_dalloc_large(tsdn_t *tsdn, arena_t *arena,
+    extent_t *extent, bool locked);
+void	arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena,
+    extent_t *extent, size_t oldsize);
+void	arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena,
+    extent_t *extent, size_t oldsize);
+ssize_t	arena_decay_time_get(tsdn_t *tsdn, arena_t *arena);
+bool	arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time);
+void	arena_purge(tsdn_t *tsdn, arena_t *arena, bool all);
+void	arena_maybe_purge(tsdn_t *tsdn, arena_t *arena);
+void	arena_reset(tsd_t *tsd, arena_t *arena);
+void	arena_destroy(tsd_t *tsd, arena_t *arena);
+void	arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena,
+    tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes);
+void	arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info,
+    bool zero);
+#ifdef JEMALLOC_JET
+typedef void (arena_dalloc_junk_small_t)(void *, const arena_bin_info_t *);
+extern arena_dalloc_junk_small_t *arena_dalloc_junk_small;
+#else
+void	arena_dalloc_junk_small(void *ptr, const arena_bin_info_t *bin_info);
+#endif
+void	*arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
+    szind_t ind, bool zero);
+void	*arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
+    size_t alignment, bool zero, tcache_t *tcache);
+void	arena_prof_promote(tsdn_t *tsdn, extent_t *extent, const void *ptr,
+    size_t usize);
+void	arena_dalloc_promoted(tsdn_t *tsdn, extent_t *extent, void *ptr,
+    tcache_t *tcache, bool slow_path);
+void	arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena,
+    extent_t *extent, void *ptr);
+void	arena_dalloc_small(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+    void *ptr);
+bool	arena_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, void *ptr,
+    size_t oldsize, size_t size, size_t extra, bool zero);
+void	*arena_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr,
+    size_t oldsize, size_t size, size_t alignment, bool zero, tcache_t *tcache);
+dss_prec_t	arena_dss_prec_get(tsdn_t *tsdn, arena_t *arena);
+bool	arena_dss_prec_set(tsdn_t *tsdn, arena_t *arena, dss_prec_t dss_prec);
+ssize_t	arena_decay_time_default_get(void);
+bool	arena_decay_time_default_set(ssize_t decay_time);
+void	arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
+    unsigned *nthreads, const char **dss, ssize_t *decay_time, size_t *nactive,
+    size_t *ndirty);
+void	arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
+    const char **dss, ssize_t *decay_time, size_t *nactive, size_t *ndirty,
+    arena_stats_t *astats, malloc_bin_stats_t *bstats,
+    malloc_large_stats_t *lstats);
+unsigned	arena_nthreads_get(arena_t *arena, bool internal);
+void	arena_nthreads_inc(arena_t *arena, bool internal);
+void	arena_nthreads_dec(arena_t *arena, bool internal);
+size_t	arena_extent_sn_next(arena_t *arena);
+arena_t	*arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
+void	arena_boot(void);
+void	arena_prefork0(tsdn_t *tsdn, arena_t *arena);
+void	arena_prefork1(tsdn_t *tsdn, arena_t *arena);
+void	arena_prefork2(tsdn_t *tsdn, arena_t *arena);
+void	arena_prefork3(tsdn_t *tsdn, arena_t *arena);
+void	arena_postfork_parent(tsdn_t *tsdn, arena_t *arena);
+void	arena_postfork_child(tsdn_t *tsdn, arena_t *arena);
+
+#endif /* JEMALLOC_INTERNAL_ARENA_EXTERNS_H */
diff --git a/include/jemalloc/internal/arena_inlines_a.h b/include/jemalloc/internal/arena_inlines_a.h
new file mode 100644
index 00000000..743727b0
--- /dev/null
+++ b/include/jemalloc/internal/arena_inlines_a.h
@@ -0,0 +1,91 @@
+#ifndef JEMALLOC_INTERNAL_ARENA_INLINES_A_H
+#define JEMALLOC_INTERNAL_ARENA_INLINES_A_H
+
+#ifndef JEMALLOC_ENABLE_INLINE
+unsigned	arena_ind_get(const arena_t *arena);
+void	arena_internal_add(arena_t *arena, size_t size);
+void	arena_internal_sub(arena_t *arena, size_t size);
+size_t	arena_internal_get(arena_t *arena);
+bool	arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes);
+bool	arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes);
+bool	arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes);
+#endif /* JEMALLOC_ENABLE_INLINE */
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
+
+JEMALLOC_INLINE unsigned
+arena_ind_get(const arena_t *arena)
+{
+
+	return (base_ind_get(arena->base));
+}
+
+JEMALLOC_INLINE void
+arena_internal_add(arena_t *arena, size_t size)
+{
+
+	atomic_add_zu(&arena->stats.internal, size);
+}
+
+JEMALLOC_INLINE void
+arena_internal_sub(arena_t *arena, size_t size)
+{
+
+	atomic_sub_zu(&arena->stats.internal, size);
+}
+
+JEMALLOC_INLINE size_t
+arena_internal_get(arena_t *arena)
+{
+
+	return (atomic_read_zu(&arena->stats.internal));
+}
+
+JEMALLOC_INLINE bool
+arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes)
+{
+
+	cassert(config_prof);
+	assert(prof_interval != 0);
+
+	arena->prof_accumbytes += accumbytes;
+	if (arena->prof_accumbytes >= prof_interval) {
+		arena->prof_accumbytes %= prof_interval;
+		return (true);
+	}
+	return (false);
+}
+
+JEMALLOC_INLINE bool
+arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes)
+{
+
+	cassert(config_prof);
+
+	if (likely(prof_interval == 0))
+		return (false);
+	return (arena_prof_accum_impl(arena, accumbytes));
+}
+
+JEMALLOC_INLINE bool
+arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes)
+{
+
+	cassert(config_prof);
+
+	if (likely(prof_interval == 0))
+		return (false);
+
+	{
+		bool ret;
+
+		malloc_mutex_lock(tsdn, &arena->lock);
+		ret = arena_prof_accum_impl(arena, accumbytes);
+		malloc_mutex_unlock(tsdn, &arena->lock);
+		return (ret);
+	}
+}
+
+#endif /* (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) */
+
+#endif /* JEMALLOC_INTERNAL_ARENA_INLINES_A_H */
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
new file mode 100644
index 00000000..9068cf4c
--- /dev/null
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -0,0 +1,209 @@
+#ifndef JEMALLOC_INTERNAL_ARENA_INLINES_B_H
+#define JEMALLOC_INTERNAL_ARENA_INLINES_B_H
+
+#ifndef JEMALLOC_ENABLE_INLINE
+szind_t	arena_bin_index(arena_t *arena, arena_bin_t *bin);
+prof_tctx_t	*arena_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent,
+    const void *ptr);
+void	arena_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr,
+    size_t usize, prof_tctx_t *tctx);
+void	arena_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr,
+    prof_tctx_t *tctx);
+void	arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks);
+void	arena_decay_tick(tsdn_t *tsdn, arena_t *arena);
+void	*arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
+    bool zero, tcache_t *tcache, bool slow_path);
+arena_t	*arena_aalloc(tsdn_t *tsdn, const void *ptr);
+size_t	arena_salloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr);
+void	arena_dalloc(tsdn_t *tsdn, extent_t *extent, void *ptr,
+    tcache_t *tcache, bool slow_path);
+void	arena_sdalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t size,
+    tcache_t *tcache, bool slow_path);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
+JEMALLOC_INLINE szind_t
+arena_bin_index(arena_t *arena, arena_bin_t *bin)
+{
+	szind_t binind = (szind_t)(bin - arena->bins);
+	assert(binind < NBINS);
+	return (binind);
+}
+
+JEMALLOC_INLINE prof_tctx_t *
+arena_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, const void *ptr)
+{
+
+	cassert(config_prof);
+	assert(ptr != NULL);
+
+	if (unlikely(!extent_slab_get(extent)))
+		return (large_prof_tctx_get(tsdn, extent));
+	return ((prof_tctx_t *)(uintptr_t)1U);
+}
+
+JEMALLOC_INLINE void
+arena_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr,
+    size_t usize, prof_tctx_t *tctx)
+{
+
+	cassert(config_prof);
+	assert(ptr != NULL);
+
+	if (unlikely(!extent_slab_get(extent)))
+		large_prof_tctx_set(tsdn, extent, tctx);
+}
+
+JEMALLOC_INLINE void
+arena_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr,
+    prof_tctx_t *tctx)
+{
+
+	cassert(config_prof);
+	assert(ptr != NULL);
+	assert(!extent_slab_get(extent));
+
+	large_prof_tctx_reset(tsdn, extent);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks)
+{
+	tsd_t *tsd;
+	ticker_t *decay_ticker;
+
+	if (unlikely(tsdn_null(tsdn)))
+		return;
+	tsd = tsdn_tsd(tsdn);
+	decay_ticker = decay_ticker_get(tsd, arena_ind_get(arena));
+	if (unlikely(decay_ticker == NULL))
+		return;
+	if (unlikely(ticker_ticks(decay_ticker, nticks)))
+		arena_purge(tsdn, arena, false);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_decay_tick(tsdn_t *tsdn, arena_t *arena)
+{
+
+	malloc_mutex_assert_not_owner(tsdn, &arena->lock);
+
+	arena_decay_ticks(tsdn, arena, 1);
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
+    tcache_t *tcache, bool slow_path)
+{
+
+	assert(!tsdn_null(tsdn) || tcache == NULL);
+	assert(size != 0);
+
+	if (likely(tcache != NULL)) {
+		if (likely(size <= SMALL_MAXCLASS)) {
+			return (tcache_alloc_small(tsdn_tsd(tsdn), arena,
+			    tcache, size, ind, zero, slow_path));
+		}
+		if (likely(size <= tcache_maxclass)) {
+			return (tcache_alloc_large(tsdn_tsd(tsdn), arena,
+			    tcache, size, ind, zero, slow_path));
+		}
+		/* (size > tcache_maxclass) case falls through. */
+		assert(size > tcache_maxclass);
+	}
+
+	return (arena_malloc_hard(tsdn, arena, size, ind, zero));
+}
+
+JEMALLOC_ALWAYS_INLINE arena_t *
+arena_aalloc(tsdn_t *tsdn, const void *ptr)
+{
+
+	return (extent_arena_get(iealloc(tsdn, ptr)));
+}
+
+/* Return the size of the allocation pointed to by ptr. */
+JEMALLOC_ALWAYS_INLINE size_t
+arena_salloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr)
+{
+	size_t ret;
+
+	assert(ptr != NULL);
+
+	if (likely(extent_slab_get(extent)))
+		ret = index2size(extent_slab_data_get_const(extent)->binind);
+	else
+		ret = large_salloc(tsdn, extent);
+
+	return (ret);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_dalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, tcache_t *tcache,
+    bool slow_path)
+{
+
+	assert(!tsdn_null(tsdn) || tcache == NULL);
+	assert(ptr != NULL);
+
+	if (likely(extent_slab_get(extent))) {
+		/* Small allocation. */
+		if (likely(tcache != NULL)) {
+			szind_t binind = extent_slab_data_get(extent)->binind;
+			tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, binind,
+			    slow_path);
+		} else {
+			arena_dalloc_small(tsdn, extent_arena_get(extent),
+			    extent, ptr);
+		}
+	} else {
+		size_t usize = extent_usize_get(extent);
+
+		if (likely(tcache != NULL) && usize <= tcache_maxclass) {
+			if (config_prof && unlikely(usize <= SMALL_MAXCLASS)) {
+				arena_dalloc_promoted(tsdn, extent, ptr,
+				    tcache, slow_path);
+			} else {
+				tcache_dalloc_large(tsdn_tsd(tsdn), tcache,
+				    ptr, usize, slow_path);
+			}
+		} else
+			large_dalloc(tsdn, extent);
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_sdalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t size,
+    tcache_t *tcache, bool slow_path)
+{
+
+	assert(!tsdn_null(tsdn) || tcache == NULL);
+	assert(ptr != NULL);
+
+	if (likely(extent_slab_get(extent))) {
+		/* Small allocation. */
+		if (likely(tcache != NULL)) {
+			szind_t binind = size2index(size);
+			assert(binind == extent_slab_data_get(extent)->binind);
+			tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, binind,
+			    slow_path);
+		} else {
+			arena_dalloc_small(tsdn, extent_arena_get(extent),
+			    extent, ptr);
+		}
+	} else {
+		if (likely(tcache != NULL) && size <= tcache_maxclass) {
+			if (config_prof && unlikely(size <= SMALL_MAXCLASS)) {
+				arena_dalloc_promoted(tsdn, extent, ptr,
+				    tcache, slow_path);
+			} else {
+				tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
+				    size, slow_path);
+			}
+		} else
+			large_dalloc(tsdn, extent);
+	}
+}
+
+#endif /* (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) */
+#endif /* JEMALLOC_INTERNAL_ARENA_INLINES_B_H */
diff --git a/include/jemalloc/internal/arena_structs_a.h b/include/jemalloc/internal/arena_structs_a.h
new file mode 100644
index 00000000..ccb3b052
--- /dev/null
+++ b/include/jemalloc/internal/arena_structs_a.h
@@ -0,0 +1,15 @@
+#ifndef JEMALLOC_INTERNAL_ARENA_STRUCTS_A_H
+#define JEMALLOC_INTERNAL_ARENA_STRUCTS_A_H
+
+struct arena_slab_data_s {
+	/* Index of bin this slab is associated with. */
+	szind_t		binind;
+
+	/* Number of free regions in slab. */
+	unsigned	nfree;
+
+	/* Per region allocated/deallocated bitmap. */
+	bitmap_t	bitmap[BITMAP_GROUPS_MAX];
+};
+
+#endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_A_H */
diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
new file mode 100644
index 00000000..c1c20731
--- /dev/null
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -0,0 +1,214 @@
+#ifndef JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H
+#define JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H
+/*
+ * Read-only information associated with each element of arena_t's bins array
+ * is stored separately, partly to reduce memory usage (only one copy, rather
+ * than one per arena), but mainly to avoid false cacheline sharing.
+ *
+ * Each slab has the following layout:
+ *
+ *   /--------------------\
+ *   | region 0           |
+ *   |--------------------|
+ *   | region 1           |
+ *   |--------------------|
+ *   | ...                |
+ *   | ...                |
+ *   | ...                |
+ *   |--------------------|
+ *   | region nregs-1     |
+ *   \--------------------/
+ */
+struct arena_bin_info_s {
+	/* Size of regions in a slab for this bin's size class. */
+	size_t			reg_size;
+
+	/* Total size of a slab for this bin's size class. */
+	size_t			slab_size;
+
+	/* Total number of regions in a slab for this bin's size class. */
+	uint32_t		nregs;
+
+	/*
+	 * Metadata used to manipulate bitmaps for slabs associated with this
+	 * bin.
+	 */
+	bitmap_info_t		bitmap_info;
+};
+
+struct arena_decay_s {
+	/*
+	 * Approximate time in seconds from the creation of a set of unused
+	 * dirty pages until an equivalent set of unused dirty pages is purged
+	 * and/or reused.
+	 */
+	ssize_t			time;
+	/* time / SMOOTHSTEP_NSTEPS. */
+	nstime_t		interval;
+	/*
+	 * Time at which the current decay interval logically started.  We do
+	 * not actually advance to a new epoch until sometime after it starts
+	 * because of scheduling and computation delays, and it is even possible
+	 * to completely skip epochs.  In all cases, during epoch advancement we
+	 * merge all relevant activity into the most recently recorded epoch.
+	 */
+	nstime_t		epoch;
+	/* Deadline randomness generator. */
+	uint64_t		jitter_state;
+	/*
+	 * Deadline for current epoch.  This is the sum of interval and per
+	 * epoch jitter which is a uniform random variable in [0..interval).
+	 * Epochs always advance by precise multiples of interval, but we
+	 * randomize the deadline to reduce the likelihood of arenas purging in
+	 * lockstep.
+	 */
+	nstime_t		deadline;
+	/*
+	 * Number of dirty pages at beginning of current epoch.  During epoch
+	 * advancement we use the delta between arena->decay.ndirty and
+	 * arena->ndirty to determine how many dirty pages, if any, were
+	 * generated.
+	 */
+	size_t			nunpurged;
+	/*
+	 * Trailing log of how many unused dirty pages were generated during
+	 * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last
+	 * element is the most recent epoch.  Corresponding epoch times are
+	 * relative to epoch.
+	 */
+	size_t			backlog[SMOOTHSTEP_NSTEPS];
+};
+
+struct arena_bin_s {
+	/* All operations on arena_bin_t fields require lock ownership. */
+	malloc_mutex_t		lock;
+
+	/*
+	 * Current slab being used to service allocations of this bin's size
+	 * class.  slabcur is independent of slabs_{nonfull,full}; whenever
+	 * slabcur is reassigned, the previous slab must be deallocated or
+	 * inserted into slabs_{nonfull,full}.
+	 */
+	extent_t		*slabcur;
+
+	/*
+	 * Heap of non-full slabs.  This heap is used to assure that new
+	 * allocations come from the non-full slab that is oldest/lowest in
+	 * memory.
+	 */
+	extent_heap_t		slabs_nonfull;
+
+	/* Ring sentinel used to track full slabs. */
+	extent_t		slabs_full;
+
+	/* Bin statistics. */
+	malloc_bin_stats_t	stats;
+};
+
+struct arena_s {
+	/*
+	 * Number of threads currently assigned to this arena, synchronized via
+	 * atomic operations.  Each thread has two distinct assignments, one for
+	 * application-serving allocation, and the other for internal metadata
+	 * allocation.  Internal metadata must not be allocated from arenas
+	 * explicitly created via the arenas.create mallctl, because the
+	 * arena.<i>.reset mallctl indiscriminately discards all allocations for
+	 * the affected arena.
+	 *
+	 *   0: Application allocation.
+	 *   1: Internal metadata allocation.
+	 */
+	unsigned		nthreads[2];
+
+	/*
+	 * There are three classes of arena operations from a locking
+	 * perspective:
+	 * 1) Thread assignment (modifies nthreads) is synchronized via atomics.
+	 * 2) Bin-related operations are protected by bin locks.
+	 * 3) Extent-related operations are protected by this mutex.
+	 */
+	malloc_mutex_t		lock;
+
+	arena_stats_t		stats;
+	/*
+	 * List of tcaches for extant threads associated with this arena.
+	 * Stats from these are merged incrementally, and at exit if
+	 * opt_stats_print is enabled.
+	 */
+	ql_head(tcache_t)	tcache_ql;
+
+	uint64_t		prof_accumbytes;
+
+	/*
+	 * PRNG state for cache index randomization of large allocation base
+	 * pointers.
+	 */
+	size_t			offset_state;
+
+	/* Extent serial number generator state. */
+	size_t			extent_sn_next;
+
+	dss_prec_t		dss_prec;
+
+	/* True if a thread is currently executing arena_purge_to_limit(). */
+	bool			purging;
+
+	/* Number of pages in active extents. */
+	size_t			nactive;
+
+	/*
+	 * Current count of pages within unused extents that are potentially
+	 * dirty, and for which pages_purge_*() has not been called.  By
+	 * tracking this, we can institute a limit on how much dirty unused
+	 * memory is mapped for each arena.
+	 */
+	size_t			ndirty;
+
+	/* Decay-based purging state. */
+	arena_decay_t		decay;
+
+	/* Extant large allocations. */
+	ql_head(extent_t)	large;
+	/* Synchronizes all large allocation/update/deallocation. */
+	malloc_mutex_t		large_mtx;
+
+	/*
+	 * Heaps of extents that were previously allocated.  These are used when
+	 * allocating extents, in an attempt to re-use address space.
+	 */
+	extent_heap_t		extents_cached[NPSIZES+1];
+	extent_heap_t		extents_retained[NPSIZES+1];
+	/*
+	 * Ring sentinel used to track unused dirty memory.  Dirty memory is
+	 * managed as an LRU of cached extents.
+	 */
+	extent_t		extents_dirty;
+	/* Protects extents_{cached,retained,dirty}. */
+	malloc_mutex_t		extents_mtx;
+
+	/*
+	 * Next extent size class in a growing series to use when satisfying a
+	 * request via the extent hooks (only if !config_munmap).  This limits
+	 * the number of disjoint virtual memory ranges so that extent merging
+	 * can be effective even if multiple arenas' extent allocation requests
+	 * are highly interleaved.
+	 */
+	pszind_t		extent_grow_next;
+
+	/* Cache of extent structures that were allocated via base_alloc(). */
+	ql_head(extent_t)	extent_cache;
+	malloc_mutex_t		extent_cache_mtx;
+
+	/* bins is used to store heaps of free regions. */
+	arena_bin_t		bins[NBINS];
+
+	/* Base allocator, from which arena metadata are allocated. */
+	base_t			*base;
+};
+
+/* Used in conjunction with tsd for fast arena-related context lookup. */
+struct arena_tdata_s {
+	ticker_t		decay_ticker;
+};
+
+#endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H */
diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h
new file mode 100644
index 00000000..a13a1b61
--- /dev/null
+++ b/include/jemalloc/internal/arena_types.h
@@ -0,0 +1,22 @@
+#ifndef JEMALLOC_INTERNAL_ARENA_TYPES_H
+#define JEMALLOC_INTERNAL_ARENA_TYPES_H
+
+#define	LARGE_MINCLASS		(ZU(1) << LG_LARGE_MINCLASS)
+
+/* Maximum number of regions in one slab. */
+#define	LG_SLAB_MAXREGS		(LG_PAGE - LG_TINY_MIN)
+#define	SLAB_MAXREGS		(1U << LG_SLAB_MAXREGS)
+
+/* Default decay time in seconds. */
+#define	DECAY_TIME_DEFAULT	10
+/* Number of event ticks between time checks. */
+#define	DECAY_NTICKS_PER_UPDATE	1000
+
+typedef struct arena_slab_data_s arena_slab_data_t;
+typedef struct arena_bin_info_s arena_bin_info_t;
+typedef struct arena_decay_s arena_decay_t;
+typedef struct arena_bin_s arena_bin_t;
+typedef struct arena_s arena_t;
+typedef struct arena_tdata_s arena_tdata_t;
+
+#endif /* JEMALLOC_INTERNAL_ARENA_TYPES_H */
diff --git a/include/jemalloc/internal/atomic_externs.h b/include/jemalloc/internal/atomic_externs.h
new file mode 100644
index 00000000..002aebca
--- /dev/null
+++ b/include/jemalloc/internal/atomic_externs.h
@@ -0,0 +1,12 @@
+#ifndef JEMALLOC_INTERNAL_ATOMIC_EXTERNS_H
+#define JEMALLOC_INTERNAL_ATOMIC_EXTERNS_H
+
+#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
+#define	atomic_read_u64(p)	atomic_add_u64(p, 0)
+#endif
+#define	atomic_read_u32(p)	atomic_add_u32(p, 0)
+#define	atomic_read_p(p)	atomic_add_p(p, NULL)
+#define	atomic_read_zu(p)	atomic_add_zu(p, 0)
+#define	atomic_read_u(p)	atomic_add_u(p, 0)
+
+#endif /* JEMALLOC_INTERNAL_ATOMIC_EXTERNS_H */
diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic_inlines.h
similarity index 93%
rename from include/jemalloc/internal/atomic.h
rename to include/jemalloc/internal/atomic_inlines.h
index 4b5b4ea9..de0ac6ac 100644
--- a/include/jemalloc/internal/atomic.h
+++ b/include/jemalloc/internal/atomic_inlines.h
@@ -1,25 +1,5 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
-#define	atomic_read_u64(p)	atomic_add_u64(p, 0)
-#endif
-#define	atomic_read_u32(p)	atomic_add_u32(p, 0)
-#define	atomic_read_p(p)	atomic_add_p(p, NULL)
-#define	atomic_read_zu(p)	atomic_add_zu(p, 0)
-#define	atomic_read_u(p)	atomic_add_u(p, 0)
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
+#ifndef JEMALLOC_INTERNAL_ATOMIC_INLINES_H
+#define JEMALLOC_INTERNAL_ATOMIC_INLINES_H
 
 /*
  * All arithmetic functions return the arithmetic result of the atomic
@@ -646,6 +626,4 @@ atomic_write_u(unsigned *p, unsigned x)
 
 /******************************************************************************/
 #endif
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
+#endif /* JEMALLOC_INTERNAL_ATOMIC_INLINES_H */
diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h
deleted file mode 100644
index a54a5502..00000000
--- a/include/jemalloc/internal/base.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-typedef struct base_block_s base_block_t;
-typedef struct base_s base_t;
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-/* Embedded at the beginning of every block of base-managed virtual memory. */
-struct base_block_s {
-	/* Total size of block's virtual memory mapping. */
-	size_t		size;
-
-	/* Next block in list of base's blocks. */
-	base_block_t	*next;
-
-	/* Tracks unused trailing space. */
-	extent_t	extent;
-};
-
-struct base_s {
-	/* Associated arena's index within the arenas array. */
-	unsigned	ind;
-
-	/* User-configurable extent hook functions. */
-	union {
-		extent_hooks_t	*extent_hooks;
-		void		*extent_hooks_pun;
-	};
-
-	/* Protects base_alloc() and base_stats_get() operations. */
-	malloc_mutex_t	mtx;
-
-	/* Serial number generation state. */
-	size_t		extent_sn_next;
-
-	/* Chain of all blocks associated with base. */
-	base_block_t	*blocks;
-
-	/* Heap of extents that track unused trailing space within blocks. */
-	extent_heap_t	avail[NSIZES];
-
-	/* Stats, only maintained if config_stats. */
-	size_t		allocated;
-	size_t		resident;
-	size_t		mapped;
-};
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-base_t	*b0get(void);
-base_t	*base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
-void	base_delete(base_t *base);
-extent_hooks_t	*base_extent_hooks_get(base_t *base);
-extent_hooks_t	*base_extent_hooks_set(base_t *base,
-    extent_hooks_t *extent_hooks);
-void	*base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment);
-void	base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated,
-    size_t *resident, size_t *mapped);
-void	base_prefork(tsdn_t *tsdn, base_t *base);
-void	base_postfork_parent(tsdn_t *tsdn, base_t *base);
-void	base_postfork_child(tsdn_t *tsdn, base_t *base);
-bool	base_boot(tsdn_t *tsdn);
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#ifndef JEMALLOC_ENABLE_INLINE
-unsigned	base_ind_get(const base_t *base);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_BASE_C_))
-JEMALLOC_INLINE unsigned
-base_ind_get(const base_t *base)
-{
-
-	return (base->ind);
-}
-#endif
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
diff --git a/include/jemalloc/internal/base_externs.h b/include/jemalloc/internal/base_externs.h
new file mode 100644
index 00000000..2c555cff
--- /dev/null
+++ b/include/jemalloc/internal/base_externs.h
@@ -0,0 +1,18 @@
+#ifndef JEMALLOC_INTERNAL_BASE_EXTERNS_H
+#define JEMALLOC_INTERNAL_BASE_EXTERNS_H
+
+base_t	*b0get(void);
+base_t	*base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
+void	base_delete(base_t *base);
+extent_hooks_t	*base_extent_hooks_get(base_t *base);
+extent_hooks_t	*base_extent_hooks_set(base_t *base,
+    extent_hooks_t *extent_hooks);
+void	*base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment);
+void	base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated,
+    size_t *resident, size_t *mapped);
+void	base_prefork(tsdn_t *tsdn, base_t *base);
+void	base_postfork_parent(tsdn_t *tsdn, base_t *base);
+void	base_postfork_child(tsdn_t *tsdn, base_t *base);
+bool	base_boot(tsdn_t *tsdn);
+
+#endif /* JEMALLOC_INTERNAL_BASE_EXTERNS_H */
diff --git a/include/jemalloc/internal/base_inlines.h b/include/jemalloc/internal/base_inlines.h
new file mode 100644
index 00000000..f882bcde
--- /dev/null
+++ b/include/jemalloc/internal/base_inlines.h
@@ -0,0 +1,17 @@
+#ifndef JEMALLOC_INTERNAL_BASE_INLINES_H
+#define JEMALLOC_INTERNAL_BASE_INLINES_H
+
+#ifndef JEMALLOC_ENABLE_INLINE
+unsigned	base_ind_get(const base_t *base);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_BASE_C_))
+JEMALLOC_INLINE unsigned
+base_ind_get(const base_t *base)
+{
+
+	return (base->ind);
+}
+#endif
+
+#endif /* JEMALLOC_INTERNAL_BASE_INLINES_H */
diff --git a/include/jemalloc/internal/base_structs.h b/include/jemalloc/internal/base_structs.h
new file mode 100644
index 00000000..bad37c06
--- /dev/null
+++ b/include/jemalloc/internal/base_structs.h
@@ -0,0 +1,44 @@
+#ifndef JEMALLOC_INTERNAL_BASE_STRUCTS_H
+#define JEMALLOC_INTERNAL_BASE_STRUCTS_H
+
+/* Embedded at the beginning of every block of base-managed virtual memory. */
+struct base_block_s {
+	/* Total size of block's virtual memory mapping. */
+	size_t		size;
+
+	/* Next block in list of base's blocks. */
+	base_block_t	*next;
+
+	/* Tracks unused trailing space. */
+	extent_t	extent;
+};
+
+struct base_s {
+	/* Associated arena's index within the arenas array. */
+	unsigned	ind;
+
+	/* User-configurable extent hook functions. */
+	union {
+		extent_hooks_t	*extent_hooks;
+		void		*extent_hooks_pun;
+	};
+
+	/* Protects base_alloc() and base_stats_get() operations. */
+	malloc_mutex_t	mtx;
+
+	/* Serial number generation state. */
+	size_t		extent_sn_next;
+
+	/* Chain of all blocks associated with base. */
+	base_block_t	*blocks;
+
+	/* Heap of extents that track unused trailing space within blocks. */
+	extent_heap_t	avail[NSIZES];
+
+	/* Stats, only maintained if config_stats. */
+	size_t		allocated;
+	size_t		resident;
+	size_t		mapped;
+};
+
+#endif /* JEMALLOC_INTERNAL_BASE_STRUCTS_H */
diff --git a/include/jemalloc/internal/base_types.h b/include/jemalloc/internal/base_types.h
new file mode 100644
index 00000000..be7ee825
--- /dev/null
+++ b/include/jemalloc/internal/base_types.h
@@ -0,0 +1,7 @@
+#ifndef JEMALLOC_INTERNAL_BASE_TYPES_H
+#define JEMALLOC_INTERNAL_BASE_TYPES_H
+
+typedef struct base_block_s base_block_t;
+typedef struct base_s base_t;
+
+#endif /* JEMALLOC_INTERNAL_BASE_TYPES_H */
diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h
deleted file mode 100644
index c2e34554..00000000
--- a/include/jemalloc/internal/bitmap.h
+++ /dev/null
@@ -1,322 +0,0 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-/* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */
-#define	LG_BITMAP_MAXBITS	LG_SLAB_MAXREGS
-#define	BITMAP_MAXBITS		(ZU(1) << LG_BITMAP_MAXBITS)
-
-typedef struct bitmap_level_s bitmap_level_t;
-typedef struct bitmap_info_s bitmap_info_t;
-typedef unsigned long bitmap_t;
-#define	LG_SIZEOF_BITMAP	LG_SIZEOF_LONG
-
-/* Number of bits per group. */
-#define	LG_BITMAP_GROUP_NBITS		(LG_SIZEOF_BITMAP + 3)
-#define	BITMAP_GROUP_NBITS		(1U << LG_BITMAP_GROUP_NBITS)
-#define	BITMAP_GROUP_NBITS_MASK		(BITMAP_GROUP_NBITS-1)
-
-/*
- * Do some analysis on how big the bitmap is before we use a tree.  For a brute
- * force linear search, if we would have to call ffs_lu() more than 2^3 times,
- * use a tree instead.
- */
-#if LG_BITMAP_MAXBITS - LG_BITMAP_GROUP_NBITS > 3
-#  define BITMAP_USE_TREE
-#endif
-
-/* Number of groups required to store a given number of bits. */
-#define	BITMAP_BITS2GROUPS(nbits)					\
-    (((nbits) + BITMAP_GROUP_NBITS_MASK) >> LG_BITMAP_GROUP_NBITS)
-
-/*
- * Number of groups required at a particular level for a given number of bits.
- */
-#define	BITMAP_GROUPS_L0(nbits)						\
-    BITMAP_BITS2GROUPS(nbits)
-#define	BITMAP_GROUPS_L1(nbits)						\
-    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(nbits))
-#define	BITMAP_GROUPS_L2(nbits)						\
-    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits))))
-#define	BITMAP_GROUPS_L3(nbits)						\
-    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(		\
-	BITMAP_BITS2GROUPS((nbits)))))
-#define	BITMAP_GROUPS_L4(nbits)						\
-    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(		\
-	BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits))))))
-
-/*
- * Assuming the number of levels, number of groups required for a given number
- * of bits.
- */
-#define	BITMAP_GROUPS_1_LEVEL(nbits)					\
-    BITMAP_GROUPS_L0(nbits)
-#define	BITMAP_GROUPS_2_LEVEL(nbits)					\
-    (BITMAP_GROUPS_1_LEVEL(nbits) + BITMAP_GROUPS_L1(nbits))
-#define	BITMAP_GROUPS_3_LEVEL(nbits)					\
-    (BITMAP_GROUPS_2_LEVEL(nbits) + BITMAP_GROUPS_L2(nbits))
-#define	BITMAP_GROUPS_4_LEVEL(nbits)					\
-    (BITMAP_GROUPS_3_LEVEL(nbits) + BITMAP_GROUPS_L3(nbits))
-#define	BITMAP_GROUPS_5_LEVEL(nbits)					\
-    (BITMAP_GROUPS_4_LEVEL(nbits) + BITMAP_GROUPS_L4(nbits))
-
-/*
- * Maximum number of groups required to support LG_BITMAP_MAXBITS.
- */
-#ifdef BITMAP_USE_TREE
-
-#if LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS
-#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_1_LEVEL(BITMAP_MAXBITS)
-#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 2
-#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_2_LEVEL(BITMAP_MAXBITS)
-#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 3
-#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_3_LEVEL(BITMAP_MAXBITS)
-#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 4
-#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_4_LEVEL(BITMAP_MAXBITS)
-#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 5
-#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_5_LEVEL(BITMAP_MAXBITS)
-#else
-#  error "Unsupported bitmap size"
-#endif
-
-/*
- * Maximum number of levels possible.  This could be statically computed based
- * on LG_BITMAP_MAXBITS:
- *
- * #define BITMAP_MAX_LEVELS \
- *     (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP) \
- *     + !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP)
- *
- * However, that would not allow the generic BITMAP_INFO_INITIALIZER() macro, so
- * instead hardcode BITMAP_MAX_LEVELS to the largest number supported by the
- * various cascading macros.  The only additional cost this incurs is some
- * unused trailing entries in bitmap_info_t structures; the bitmaps themselves
- * are not impacted.
- */
-#define	BITMAP_MAX_LEVELS	5
-
-#define	BITMAP_INFO_INITIALIZER(nbits) {				\
-	/* nbits. */							\
-	nbits,								\
-	/* nlevels. */							\
-	(BITMAP_GROUPS_L0(nbits) > BITMAP_GROUPS_L1(nbits)) +		\
-	    (BITMAP_GROUPS_L1(nbits) > BITMAP_GROUPS_L2(nbits)) +	\
-	    (BITMAP_GROUPS_L2(nbits) > BITMAP_GROUPS_L3(nbits)) +	\
-	    (BITMAP_GROUPS_L3(nbits) > BITMAP_GROUPS_L4(nbits)) + 1,	\
-	/* levels. */							\
-	{								\
-		{0},							\
-		{BITMAP_GROUPS_L0(nbits)},				\
-		{BITMAP_GROUPS_L1(nbits) + BITMAP_GROUPS_L0(nbits)},	\
-		{BITMAP_GROUPS_L2(nbits) + BITMAP_GROUPS_L1(nbits) +	\
-		    BITMAP_GROUPS_L0(nbits)},				\
-		{BITMAP_GROUPS_L3(nbits) + BITMAP_GROUPS_L2(nbits) +	\
-		    BITMAP_GROUPS_L1(nbits) + BITMAP_GROUPS_L0(nbits)},	\
-		{BITMAP_GROUPS_L4(nbits) + BITMAP_GROUPS_L3(nbits) +	\
-		     BITMAP_GROUPS_L2(nbits) + BITMAP_GROUPS_L1(nbits)	\
-		     + BITMAP_GROUPS_L0(nbits)}				\
-	}								\
-}
-
-#else /* BITMAP_USE_TREE */
-
-#define	BITMAP_GROUPS_MAX	BITMAP_BITS2GROUPS(BITMAP_MAXBITS)
-
-#define	BITMAP_INFO_INITIALIZER(nbits) {				\
-	/* nbits. */							\
-	nbits,								\
-	/* ngroups. */							\
-	BITMAP_BITS2GROUPS(nbits)					\
-}
-
-#endif /* BITMAP_USE_TREE */
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-struct bitmap_level_s {
-	/* Offset of this level's groups within the array of groups. */
-	size_t group_offset;
-};
-
-struct bitmap_info_s {
-	/* Logical number of bits in bitmap (stored at bottom level). */
-	size_t nbits;
-
-#ifdef BITMAP_USE_TREE
-	/* Number of levels necessary for nbits. */
-	unsigned nlevels;
-
-	/*
-	 * Only the first (nlevels+1) elements are used, and levels are ordered
-	 * bottom to top (e.g. the bottom level is stored in levels[0]).
-	 */
-	bitmap_level_t levels[BITMAP_MAX_LEVELS+1];
-#else /* BITMAP_USE_TREE */
-	/* Number of groups necessary for nbits. */
-	size_t ngroups;
-#endif /* BITMAP_USE_TREE */
-};
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-void	bitmap_info_init(bitmap_info_t *binfo, size_t nbits);
-void	bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo);
-size_t	bitmap_size(const bitmap_info_t *binfo);
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#ifndef JEMALLOC_ENABLE_INLINE
-bool	bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo);
-bool	bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
-void	bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
-size_t	bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo);
-void	bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_BITMAP_C_))
-JEMALLOC_INLINE bool
-bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo)
-{
-#ifdef BITMAP_USE_TREE
-	size_t rgoff = binfo->levels[binfo->nlevels].group_offset - 1;
-	bitmap_t rg = bitmap[rgoff];
-	/* The bitmap is full iff the root group is 0. */
-	return (rg == 0);
-#else
-	size_t i;
-
-	for (i = 0; i < binfo->ngroups; i++) {
-		if (bitmap[i] != 0)
-			return (false);
-	}
-	return (true);
-#endif
-}
-
-JEMALLOC_INLINE bool
-bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
-{
-	size_t goff;
-	bitmap_t g;
-
-	assert(bit < binfo->nbits);
-	goff = bit >> LG_BITMAP_GROUP_NBITS;
-	g = bitmap[goff];
-	return (!(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))));
-}
-
-JEMALLOC_INLINE void
-bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
-{
-	size_t goff;
-	bitmap_t *gp;
-	bitmap_t g;
-
-	assert(bit < binfo->nbits);
-	assert(!bitmap_get(bitmap, binfo, bit));
-	goff = bit >> LG_BITMAP_GROUP_NBITS;
-	gp = &bitmap[goff];
-	g = *gp;
-	assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)));
-	g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
-	*gp = g;
-	assert(bitmap_get(bitmap, binfo, bit));
-#ifdef BITMAP_USE_TREE
-	/* Propagate group state transitions up the tree. */
-	if (g == 0) {
-		unsigned i;
-		for (i = 1; i < binfo->nlevels; i++) {
-			bit = goff;
-			goff = bit >> LG_BITMAP_GROUP_NBITS;
-			gp = &bitmap[binfo->levels[i].group_offset + goff];
-			g = *gp;
-			assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)));
-			g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
-			*gp = g;
-			if (g != 0)
-				break;
-		}
-	}
-#endif
-}
-
-/* sfu: set first unset. */
-JEMALLOC_INLINE size_t
-bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo)
-{
-	size_t bit;
-	bitmap_t g;
-	unsigned i;
-
-	assert(!bitmap_full(bitmap, binfo));
-
-#ifdef BITMAP_USE_TREE
-	i = binfo->nlevels - 1;
-	g = bitmap[binfo->levels[i].group_offset];
-	bit = ffs_lu(g) - 1;
-	while (i > 0) {
-		i--;
-		g = bitmap[binfo->levels[i].group_offset + bit];
-		bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffs_lu(g) - 1);
-	}
-#else
-	i = 0;
-	g = bitmap[0];
-	while ((bit = ffs_lu(g)) == 0) {
-		i++;
-		g = bitmap[i];
-	}
-	bit = (i << LG_BITMAP_GROUP_NBITS) + (bit - 1);
-#endif
-	bitmap_set(bitmap, binfo, bit);
-	return (bit);
-}
-
-JEMALLOC_INLINE void
-bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
-{
-	size_t goff;
-	bitmap_t *gp;
-	bitmap_t g;
-	UNUSED bool propagate;
-
-	assert(bit < binfo->nbits);
-	assert(bitmap_get(bitmap, binfo, bit));
-	goff = bit >> LG_BITMAP_GROUP_NBITS;
-	gp = &bitmap[goff];
-	g = *gp;
-	propagate = (g == 0);
-	assert((g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))) == 0);
-	g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
-	*gp = g;
-	assert(!bitmap_get(bitmap, binfo, bit));
-#ifdef BITMAP_USE_TREE
-	/* Propagate group state transitions up the tree. */
-	if (propagate) {
-		unsigned i;
-		for (i = 1; i < binfo->nlevels; i++) {
-			bit = goff;
-			goff = bit >> LG_BITMAP_GROUP_NBITS;
-			gp = &bitmap[binfo->levels[i].group_offset + goff];
-			g = *gp;
-			propagate = (g == 0);
-			assert((g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)))
-			    == 0);
-			g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
-			*gp = g;
-			if (!propagate)
-				break;
-		}
-	}
-#endif /* BITMAP_USE_TREE */
-}
-
-#endif
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
diff --git a/include/jemalloc/internal/bitmap_externs.h b/include/jemalloc/internal/bitmap_externs.h
new file mode 100644
index 00000000..4df63eba
--- /dev/null
+++ b/include/jemalloc/internal/bitmap_externs.h
@@ -0,0 +1,8 @@
+#ifndef JEMALLOC_INTERNAL_BITMAP_EXTERNS_H
+#define JEMALLOC_INTERNAL_BITMAP_EXTERNS_H
+
+void	bitmap_info_init(bitmap_info_t *binfo, size_t nbits);
+void	bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo);
+size_t	bitmap_size(const bitmap_info_t *binfo);
+
+#endif /* JEMALLOC_INTERNAL_BITMAP_EXTERNS_H */
diff --git a/include/jemalloc/internal/bitmap_inlines.h b/include/jemalloc/internal/bitmap_inlines.h
new file mode 100644
index 00000000..5400f9d1
--- /dev/null
+++ b/include/jemalloc/internal/bitmap_inlines.h
@@ -0,0 +1,152 @@
+#ifndef JEMALLOC_INTERNAL_BITMAP_INLINES_H
+#define JEMALLOC_INTERNAL_BITMAP_INLINES_H
+
+#ifndef JEMALLOC_ENABLE_INLINE
+bool	bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo);
+bool	bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
+void	bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
+size_t	bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo);
+void	bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_BITMAP_C_))
+JEMALLOC_INLINE bool
+bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo)
+{
+#ifdef BITMAP_USE_TREE
+	size_t rgoff = binfo->levels[binfo->nlevels].group_offset - 1;
+	bitmap_t rg = bitmap[rgoff];
+	/* The bitmap is full iff the root group is 0. */
+	return (rg == 0);
+#else
+	size_t i;
+
+	for (i = 0; i < binfo->ngroups; i++) {
+		if (bitmap[i] != 0)
+			return (false);
+	}
+	return (true);
+#endif
+}
+
+JEMALLOC_INLINE bool
+bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
+{
+	size_t goff;
+	bitmap_t g;
+
+	assert(bit < binfo->nbits);
+	goff = bit >> LG_BITMAP_GROUP_NBITS;
+	g = bitmap[goff];
+	return (!(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))));
+}
+
+JEMALLOC_INLINE void
+bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
+{
+	size_t goff;
+	bitmap_t *gp;
+	bitmap_t g;
+
+	assert(bit < binfo->nbits);
+	assert(!bitmap_get(bitmap, binfo, bit));
+	goff = bit >> LG_BITMAP_GROUP_NBITS;
+	gp = &bitmap[goff];
+	g = *gp;
+	assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)));
+	g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
+	*gp = g;
+	assert(bitmap_get(bitmap, binfo, bit));
+#ifdef BITMAP_USE_TREE
+	/* Propagate group state transitions up the tree. */
+	if (g == 0) {
+		unsigned i;
+		for (i = 1; i < binfo->nlevels; i++) {
+			bit = goff;
+			goff = bit >> LG_BITMAP_GROUP_NBITS;
+			gp = &bitmap[binfo->levels[i].group_offset + goff];
+			g = *gp;
+			assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)));
+			g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
+			*gp = g;
+			if (g != 0)
+				break;
+		}
+	}
+#endif
+}
+
+/* sfu: set first unset. */
+JEMALLOC_INLINE size_t
+bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo)
+{
+	size_t bit;
+	bitmap_t g;
+	unsigned i;
+
+	assert(!bitmap_full(bitmap, binfo));
+
+#ifdef BITMAP_USE_TREE
+	i = binfo->nlevels - 1;
+	g = bitmap[binfo->levels[i].group_offset];
+	bit = ffs_lu(g) - 1;
+	while (i > 0) {
+		i--;
+		g = bitmap[binfo->levels[i].group_offset + bit];
+		bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffs_lu(g) - 1);
+	}
+#else
+	i = 0;
+	g = bitmap[0];
+	while ((bit = ffs_lu(g)) == 0) {
+		i++;
+		g = bitmap[i];
+	}
+	bit = (i << LG_BITMAP_GROUP_NBITS) + (bit - 1);
+#endif
+	bitmap_set(bitmap, binfo, bit);
+	return (bit);
+}
+
+JEMALLOC_INLINE void
+bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
+{
+	size_t goff;
+	bitmap_t *gp;
+	bitmap_t g;
+	UNUSED bool propagate;
+
+	assert(bit < binfo->nbits);
+	assert(bitmap_get(bitmap, binfo, bit));
+	goff = bit >> LG_BITMAP_GROUP_NBITS;
+	gp = &bitmap[goff];
+	g = *gp;
+	propagate = (g == 0);
+	assert((g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))) == 0);
+	g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
+	*gp = g;
+	assert(!bitmap_get(bitmap, binfo, bit));
+#ifdef BITMAP_USE_TREE
+	/* Propagate group state transitions up the tree. */
+	if (propagate) {
+		unsigned i;
+		for (i = 1; i < binfo->nlevels; i++) {
+			bit = goff;
+			goff = bit >> LG_BITMAP_GROUP_NBITS;
+			gp = &bitmap[binfo->levels[i].group_offset + goff];
+			g = *gp;
+			propagate = (g == 0);
+			assert((g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)))
+			    == 0);
+			g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
+			*gp = g;
+			if (!propagate)
+				break;
+		}
+	}
+#endif /* BITMAP_USE_TREE */
+}
+
+#endif
+
+#endif /* JEMALLOC_INTERNAL_BITMAP_INLINES_H */
diff --git a/include/jemalloc/internal/bitmap_structs.h b/include/jemalloc/internal/bitmap_structs.h
new file mode 100644
index 00000000..297ae669
--- /dev/null
+++ b/include/jemalloc/internal/bitmap_structs.h
@@ -0,0 +1,28 @@
+#ifndef JEMALLOC_INTERNAL_BITMAP_STRUCTS_H
+#define JEMALLOC_INTERNAL_BITMAP_STRUCTS_H
+
+struct bitmap_level_s {
+	/* Offset of this level's groups within the array of groups. */
+	size_t group_offset;
+};
+
+struct bitmap_info_s {
+	/* Logical number of bits in bitmap (stored at bottom level). */
+	size_t nbits;
+
+#ifdef BITMAP_USE_TREE
+	/* Number of levels necessary for nbits. */
+	unsigned nlevels;
+
+	/*
+	 * Only the first (nlevels+1) elements are used, and levels are ordered
+	 * bottom to top (e.g. the bottom level is stored in levels[0]).
+	 */
+	bitmap_level_t levels[BITMAP_MAX_LEVELS+1];
+#else /* BITMAP_USE_TREE */
+	/* Number of groups necessary for nbits. */
+	size_t ngroups;
+#endif /* BITMAP_USE_TREE */
+};
+
+#endif /* JEMALLOC_INTERNAL_BITMAP_STRUCTS_H */
diff --git a/include/jemalloc/internal/bitmap_types.h b/include/jemalloc/internal/bitmap_types.h
new file mode 100644
index 00000000..d823186f
--- /dev/null
+++ b/include/jemalloc/internal/bitmap_types.h
@@ -0,0 +1,133 @@
+#ifndef JEMALLOC_INTERNAL_BITMAP_TYPES_H
+#define JEMALLOC_INTERNAL_BITMAP_TYPES_H
+
+/* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */
+#define	LG_BITMAP_MAXBITS	LG_SLAB_MAXREGS
+#define	BITMAP_MAXBITS		(ZU(1) << LG_BITMAP_MAXBITS)
+
+typedef struct bitmap_level_s bitmap_level_t;
+typedef struct bitmap_info_s bitmap_info_t;
+typedef unsigned long bitmap_t;
+#define	LG_SIZEOF_BITMAP	LG_SIZEOF_LONG
+
+/* Number of bits per group. */
+#define	LG_BITMAP_GROUP_NBITS		(LG_SIZEOF_BITMAP + 3)
+#define	BITMAP_GROUP_NBITS		(1U << LG_BITMAP_GROUP_NBITS)
+#define	BITMAP_GROUP_NBITS_MASK		(BITMAP_GROUP_NBITS-1)
+
+/*
+ * Do some analysis on how big the bitmap is before we use a tree.  For a brute
+ * force linear search, if we would have to call ffs_lu() more than 2^3 times,
+ * use a tree instead.
+ */
+#if LG_BITMAP_MAXBITS - LG_BITMAP_GROUP_NBITS > 3
+#  define BITMAP_USE_TREE
+#endif
+
+/* Number of groups required to store a given number of bits. */
+#define	BITMAP_BITS2GROUPS(nbits)					\
+    (((nbits) + BITMAP_GROUP_NBITS_MASK) >> LG_BITMAP_GROUP_NBITS)
+
+/*
+ * Number of groups required at a particular level for a given number of bits.
+ */
+#define	BITMAP_GROUPS_L0(nbits)						\
+    BITMAP_BITS2GROUPS(nbits)
+#define	BITMAP_GROUPS_L1(nbits)						\
+    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(nbits))
+#define	BITMAP_GROUPS_L2(nbits)						\
+    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits))))
+#define	BITMAP_GROUPS_L3(nbits)						\
+    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(		\
+	BITMAP_BITS2GROUPS((nbits)))))
+#define	BITMAP_GROUPS_L4(nbits)						\
+    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(		\
+	BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits))))))
+
+/*
+ * Assuming the number of levels, number of groups required for a given number
+ * of bits.
+ */
+#define	BITMAP_GROUPS_1_LEVEL(nbits)					\
+    BITMAP_GROUPS_L0(nbits)
+#define	BITMAP_GROUPS_2_LEVEL(nbits)					\
+    (BITMAP_GROUPS_1_LEVEL(nbits) + BITMAP_GROUPS_L1(nbits))
+#define	BITMAP_GROUPS_3_LEVEL(nbits)					\
+    (BITMAP_GROUPS_2_LEVEL(nbits) + BITMAP_GROUPS_L2(nbits))
+#define	BITMAP_GROUPS_4_LEVEL(nbits)					\
+    (BITMAP_GROUPS_3_LEVEL(nbits) + BITMAP_GROUPS_L3(nbits))
+#define	BITMAP_GROUPS_5_LEVEL(nbits)					\
+    (BITMAP_GROUPS_4_LEVEL(nbits) + BITMAP_GROUPS_L4(nbits))
+
+/*
+ * Maximum number of groups required to support LG_BITMAP_MAXBITS.
+ */
+#ifdef BITMAP_USE_TREE
+
+#if LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS
+#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_1_LEVEL(BITMAP_MAXBITS)
+#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 2
+#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_2_LEVEL(BITMAP_MAXBITS)
+#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 3
+#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_3_LEVEL(BITMAP_MAXBITS)
+#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 4
+#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_4_LEVEL(BITMAP_MAXBITS)
+#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 5
+#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_5_LEVEL(BITMAP_MAXBITS)
+#else
+#  error "Unsupported bitmap size"
+#endif
+
+/*
+ * Maximum number of levels possible.  This could be statically computed based
+ * on LG_BITMAP_MAXBITS:
+ *
+ * #define BITMAP_MAX_LEVELS \
+ *     (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP) \
+ *     + !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP)
+ *
+ * However, that would not allow the generic BITMAP_INFO_INITIALIZER() macro, so
+ * instead hardcode BITMAP_MAX_LEVELS to the largest number supported by the
+ * various cascading macros.  The only additional cost this incurs is some
+ * unused trailing entries in bitmap_info_t structures; the bitmaps themselves
+ * are not impacted.
+ */
+#define	BITMAP_MAX_LEVELS	5
+
+#define	BITMAP_INFO_INITIALIZER(nbits) {				\
+	/* nbits. */							\
+	nbits,								\
+	/* nlevels. */							\
+	(BITMAP_GROUPS_L0(nbits) > BITMAP_GROUPS_L1(nbits)) +		\
+	    (BITMAP_GROUPS_L1(nbits) > BITMAP_GROUPS_L2(nbits)) +	\
+	    (BITMAP_GROUPS_L2(nbits) > BITMAP_GROUPS_L3(nbits)) +	\
+	    (BITMAP_GROUPS_L3(nbits) > BITMAP_GROUPS_L4(nbits)) + 1,	\
+	/* levels. */							\
+	{								\
+		{0},							\
+		{BITMAP_GROUPS_L0(nbits)},				\
+		{BITMAP_GROUPS_L1(nbits) + BITMAP_GROUPS_L0(nbits)},	\
+		{BITMAP_GROUPS_L2(nbits) + BITMAP_GROUPS_L1(nbits) +	\
+		    BITMAP_GROUPS_L0(nbits)},				\
+		{BITMAP_GROUPS_L3(nbits) + BITMAP_GROUPS_L2(nbits) +	\
+		    BITMAP_GROUPS_L1(nbits) + BITMAP_GROUPS_L0(nbits)},	\
+		{BITMAP_GROUPS_L4(nbits) + BITMAP_GROUPS_L3(nbits) +	\
+		     BITMAP_GROUPS_L2(nbits) + BITMAP_GROUPS_L1(nbits)	\
+		     + BITMAP_GROUPS_L0(nbits)}				\
+	}								\
+}
+
+#else /* BITMAP_USE_TREE */
+
+#define	BITMAP_GROUPS_MAX	BITMAP_BITS2GROUPS(BITMAP_MAXBITS)
+
+#define	BITMAP_INFO_INITIALIZER(nbits) {				\
+	/* nbits. */							\
+	nbits,								\
+	/* ngroups. */							\
+	BITMAP_BITS2GROUPS(nbits)					\
+}
+
+#endif /* BITMAP_USE_TREE */
+
+#endif /* JEMALLOC_INTERNAL_BITMAP_TYPES_H */
diff --git a/include/jemalloc/internal/ckh.h b/include/jemalloc/internal/ckh.h
deleted file mode 100644
index f75ad90b..00000000
--- a/include/jemalloc/internal/ckh.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-typedef struct ckh_s ckh_t;
-typedef struct ckhc_s ckhc_t;
-
-/* Typedefs to allow easy function pointer passing. */
-typedef void ckh_hash_t (const void *, size_t[2]);
-typedef bool ckh_keycomp_t (const void *, const void *);
-
-/* Maintain counters used to get an idea of performance. */
-/* #define	CKH_COUNT */
-/* Print counter values in ckh_delete() (requires CKH_COUNT). */
-/* #define	CKH_VERBOSE */
-
-/*
- * There are 2^LG_CKH_BUCKET_CELLS cells in each hash table bucket.  Try to fit
- * one bucket per L1 cache line.
- */
-#define	LG_CKH_BUCKET_CELLS (LG_CACHELINE - LG_SIZEOF_PTR - 1)
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-/* Hash table cell. */
-struct ckhc_s {
-	const void	*key;
-	const void	*data;
-};
-
-struct ckh_s {
-#ifdef CKH_COUNT
-	/* Counters used to get an idea of performance. */
-	uint64_t	ngrows;
-	uint64_t	nshrinks;
-	uint64_t	nshrinkfails;
-	uint64_t	ninserts;
-	uint64_t	nrelocs;
-#endif
-
-	/* Used for pseudo-random number generation. */
-	uint64_t	prng_state;
-
-	/* Total number of items. */
-	size_t		count;
-
-	/*
-	 * Minimum and current number of hash table buckets.  There are
-	 * 2^LG_CKH_BUCKET_CELLS cells per bucket.
-	 */
-	unsigned	lg_minbuckets;
-	unsigned	lg_curbuckets;
-
-	/* Hash and comparison functions. */
-	ckh_hash_t	*hash;
-	ckh_keycomp_t	*keycomp;
-
-	/* Hash table with 2^lg_curbuckets buckets. */
-	ckhc_t		*tab;
-};
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-bool	ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
-    ckh_keycomp_t *keycomp);
-void	ckh_delete(tsd_t *tsd, ckh_t *ckh);
-size_t	ckh_count(ckh_t *ckh);
-bool	ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data);
-bool	ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data);
-bool	ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
-    void **data);
-bool	ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data);
-void	ckh_string_hash(const void *key, size_t r_hash[2]);
-bool	ckh_string_keycomp(const void *k1, const void *k2);
-void	ckh_pointer_hash(const void *key, size_t r_hash[2]);
-bool	ckh_pointer_keycomp(const void *k1, const void *k2);
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
diff --git a/include/jemalloc/internal/ckh_externs.h b/include/jemalloc/internal/ckh_externs.h
new file mode 100644
index 00000000..c912f72b
--- /dev/null
+++ b/include/jemalloc/internal/ckh_externs.h
@@ -0,0 +1,18 @@
+#ifndef JEMALLOC_INTERNAL_CKH_EXTERNS_H
+#define JEMALLOC_INTERNAL_CKH_EXTERNS_H
+
+bool	ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
+    ckh_keycomp_t *keycomp);
+void	ckh_delete(tsd_t *tsd, ckh_t *ckh);
+size_t	ckh_count(ckh_t *ckh);
+bool	ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data);
+bool	ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data);
+bool	ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
+    void **data);
+bool	ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data);
+void	ckh_string_hash(const void *key, size_t r_hash[2]);
+bool	ckh_string_keycomp(const void *k1, const void *k2);
+void	ckh_pointer_hash(const void *key, size_t r_hash[2]);
+bool	ckh_pointer_keycomp(const void *k1, const void *k2);
+
+#endif /* JEMALLOC_INTERNAL_CKH_EXTERNS_H */
diff --git a/include/jemalloc/internal/ckh_structs.h b/include/jemalloc/internal/ckh_structs.h
new file mode 100644
index 00000000..a800cbc2
--- /dev/null
+++ b/include/jemalloc/internal/ckh_structs.h
@@ -0,0 +1,41 @@
+#ifndef JEMALLOC_INTERNAL_CKH_STRUCTS_H
+#define JEMALLOC_INTERNAL_CKH_STRUCTS_H
+
+/* Hash table cell. */
+struct ckhc_s {
+	const void	*key;
+	const void	*data;
+};
+
+struct ckh_s {
+#ifdef CKH_COUNT
+	/* Counters used to get an idea of performance. */
+	uint64_t	ngrows;
+	uint64_t	nshrinks;
+	uint64_t	nshrinkfails;
+	uint64_t	ninserts;
+	uint64_t	nrelocs;
+#endif
+
+	/* Used for pseudo-random number generation. */
+	uint64_t	prng_state;
+
+	/* Total number of items. */
+	size_t		count;
+
+	/*
+	 * Minimum and current number of hash table buckets.  There are
+	 * 2^LG_CKH_BUCKET_CELLS cells per bucket.
+	 */
+	unsigned	lg_minbuckets;
+	unsigned	lg_curbuckets;
+
+	/* Hash and comparison functions. */
+	ckh_hash_t	*hash;
+	ckh_keycomp_t	*keycomp;
+
+	/* Hash table with 2^lg_curbuckets buckets. */
+	ckhc_t		*tab;
+};
+
+#endif /* JEMALLOC_INTERNAL_CKH_STRUCTS_H */
diff --git a/include/jemalloc/internal/ckh_types.h b/include/jemalloc/internal/ckh_types.h
new file mode 100644
index 00000000..9a1d8d49
--- /dev/null
+++ b/include/jemalloc/internal/ckh_types.h
@@ -0,0 +1,22 @@
+#ifndef JEMALLOC_INTERNAL_CKH_TYPES_H
+#define JEMALLOC_INTERNAL_CKH_TYPES_H
+
+typedef struct ckh_s ckh_t;
+typedef struct ckhc_s ckhc_t;
+
+/* Typedefs to allow easy function pointer passing. */
+typedef void ckh_hash_t (const void *, size_t[2]);
+typedef bool ckh_keycomp_t (const void *, const void *);
+
+/* Maintain counters used to get an idea of performance. */
+/* #define	CKH_COUNT */
+/* Print counter values in ckh_delete() (requires CKH_COUNT). */
+/* #define	CKH_VERBOSE */
+
+/*
+ * There are 2^LG_CKH_BUCKET_CELLS cells in each hash table bucket.  Try to fit
+ * one bucket per L1 cache line.
+ */
+#define	LG_CKH_BUCKET_CELLS (LG_CACHELINE - LG_SIZEOF_PTR - 1)
+
+#endif /* JEMALLOC_INTERNAL_CKH_TYPES_H */
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
deleted file mode 100644
index 7dc3e5b5..00000000
--- a/include/jemalloc/internal/ctl.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-typedef struct ctl_node_s ctl_node_t;
-typedef struct ctl_named_node_s ctl_named_node_t;
-typedef struct ctl_indexed_node_s ctl_indexed_node_t;
-typedef struct ctl_arena_stats_s ctl_arena_stats_t;
-typedef struct ctl_stats_s ctl_stats_t;
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-struct ctl_node_s {
-	bool			named;
-};
-
-struct ctl_named_node_s {
-	struct ctl_node_s	node;
-	const char		*name;
-	/* If (nchildren == 0), this is a terminal node. */
-	size_t			nchildren;
-	const			ctl_node_t *children;
-	int			(*ctl)(tsd_t *, const size_t *, size_t, void *,
-	    size_t *, void *, size_t);
-};
-
-struct ctl_indexed_node_s {
-	struct ctl_node_s	node;
-	const ctl_named_node_t	*(*index)(tsdn_t *, const size_t *, size_t,
-	    size_t);
-};
-
-struct ctl_arena_stats_s {
-	unsigned		arena_ind;
-	bool			initialized;
-	ql_elm(ctl_arena_stats_t)	destroyed_link;
-
-	unsigned		nthreads;
-	const char		*dss;
-	ssize_t			decay_time;
-	size_t			pactive;
-	size_t			pdirty;
-
-	/* The remainder are only populated if config_stats is true. */
-
-	arena_stats_t		astats;
-
-	/* Aggregate stats for small size classes, based on bin stats. */
-	size_t			allocated_small;
-	uint64_t		nmalloc_small;
-	uint64_t		ndalloc_small;
-	uint64_t		nrequests_small;
-
-	malloc_bin_stats_t	bstats[NBINS];
-	malloc_large_stats_t	lstats[NSIZES - NBINS];
-};
-
-struct ctl_stats_s {
-	uint64_t		epoch;
-	size_t			allocated;
-	size_t			active;
-	size_t			metadata;
-	size_t			resident;
-	size_t			mapped;
-	size_t			retained;
-	unsigned		narenas;
-	ql_head(ctl_arena_stats_t)	destroyed;
-	/*
-	 * Element 0 contains merged stats for extant arenas (accessed via
-	 * MALLCTL_ARENAS_ALL), element 1 contains merged stats for destroyed
-	 * arenas (accessed via MALLCTL_ARENAS_DESTROYED), and the remaining
-	 * MALLOCX_ARENA_MAX+1 elements correspond to arenas.
-	 */
-	ctl_arena_stats_t	*arenas[MALLOCX_ARENA_MAX + 3];
-};
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-int	ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen);
-int	ctl_nametomib(tsdn_t *tsdn, const char *name, size_t *mibp,
-    size_t *miblenp);
-
-int	ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen);
-bool	ctl_boot(void);
-void	ctl_prefork(tsdn_t *tsdn);
-void	ctl_postfork_parent(tsdn_t *tsdn);
-void	ctl_postfork_child(tsdn_t *tsdn);
-
-#define	xmallctl(name, oldp, oldlenp, newp, newlen) do {		\
-	if (je_mallctl(name, oldp, oldlenp, newp, newlen)		\
-	    != 0) {							\
-		malloc_printf(						\
-		    "<jemalloc>: Failure in xmallctl(\"%s\", ...)\n",	\
-		    name);						\
-		abort();						\
-	}								\
-} while (0)
-
-#define	xmallctlnametomib(name, mibp, miblenp) do {			\
-	if (je_mallctlnametomib(name, mibp, miblenp) != 0) {		\
-		malloc_printf("<jemalloc>: Failure in "			\
-		    "xmallctlnametomib(\"%s\", ...)\n", name);		\
-		abort();						\
-	}								\
-} while (0)
-
-#define	xmallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen) do {	\
-	if (je_mallctlbymib(mib, miblen, oldp, oldlenp, newp,		\
-	    newlen) != 0) {						\
-		malloc_write(						\
-		    "<jemalloc>: Failure in xmallctlbymib()\n");	\
-		abort();						\
-	}								\
-} while (0)
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
-
diff --git a/include/jemalloc/internal/ctl_externs.h b/include/jemalloc/internal/ctl_externs.h
new file mode 100644
index 00000000..11f77cfb
--- /dev/null
+++ b/include/jemalloc/internal/ctl_externs.h
@@ -0,0 +1,43 @@
+#ifndef JEMALLOC_INTERNAL_CTL_EXTERNS_H
+#define JEMALLOC_INTERNAL_CTL_EXTERNS_H
+
+int	ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
+    void *newp, size_t newlen);
+int	ctl_nametomib(tsdn_t *tsdn, const char *name, size_t *mibp,
+    size_t *miblenp);
+
+int	ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen);
+bool	ctl_boot(void);
+void	ctl_prefork(tsdn_t *tsdn);
+void	ctl_postfork_parent(tsdn_t *tsdn);
+void	ctl_postfork_child(tsdn_t *tsdn);
+
+#define	xmallctl(name, oldp, oldlenp, newp, newlen) do {		\
+	if (je_mallctl(name, oldp, oldlenp, newp, newlen)		\
+	    != 0) {							\
+		malloc_printf(						\
+		    "<jemalloc>: Failure in xmallctl(\"%s\", ...)\n",	\
+		    name);						\
+		abort();						\
+	}								\
+} while (0)
+
+#define	xmallctlnametomib(name, mibp, miblenp) do {			\
+	if (je_mallctlnametomib(name, mibp, miblenp) != 0) {		\
+		malloc_printf("<jemalloc>: Failure in "			\
+		    "xmallctlnametomib(\"%s\", ...)\n", name);		\
+		abort();						\
+	}								\
+} while (0)
+
+#define	xmallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen) do {	\
+	if (je_mallctlbymib(mib, miblen, oldp, oldlenp, newp,		\
+	    newlen) != 0) {						\
+		malloc_write(						\
+		    "<jemalloc>: Failure in xmallctlbymib()\n");	\
+		abort();						\
+	}								\
+} while (0)
+
+#endif /* JEMALLOC_INTERNAL_CTL_EXTERNS_H */
diff --git a/include/jemalloc/internal/ctl_structs.h b/include/jemalloc/internal/ctl_structs.h
new file mode 100644
index 00000000..8f94c6c4
--- /dev/null
+++ b/include/jemalloc/internal/ctl_structs.h
@@ -0,0 +1,68 @@
+#ifndef JEMALLOC_INTERNAL_CTL_STRUCTS_H
+#define JEMALLOC_INTERNAL_CTL_STRUCTS_H
+
+struct ctl_node_s {
+	bool			named;
+};
+
+struct ctl_named_node_s {
+	struct ctl_node_s	node;
+	const char		*name;
+	/* If (nchildren == 0), this is a terminal node. */
+	size_t			nchildren;
+	const			ctl_node_t *children;
+	int			(*ctl)(tsd_t *, const size_t *, size_t, void *,
+	    size_t *, void *, size_t);
+};
+
+struct ctl_indexed_node_s {
+	struct ctl_node_s	node;
+	const ctl_named_node_t	*(*index)(tsdn_t *, const size_t *, size_t,
+	    size_t);
+};
+
+struct ctl_arena_stats_s {
+	unsigned		arena_ind;
+	bool			initialized;
+	ql_elm(ctl_arena_stats_t)	destroyed_link;
+
+	unsigned		nthreads;
+	const char		*dss;
+	ssize_t			decay_time;
+	size_t			pactive;
+	size_t			pdirty;
+
+	/* The remainder are only populated if config_stats is true. */
+
+	arena_stats_t		astats;
+
+	/* Aggregate stats for small size classes, based on bin stats. */
+	size_t			allocated_small;
+	uint64_t		nmalloc_small;
+	uint64_t		ndalloc_small;
+	uint64_t		nrequests_small;
+
+	malloc_bin_stats_t	bstats[NBINS];
+	malloc_large_stats_t	lstats[NSIZES - NBINS];
+};
+
+struct ctl_stats_s {
+	uint64_t		epoch;
+	size_t			allocated;
+	size_t			active;
+	size_t			metadata;
+	size_t			resident;
+	size_t			mapped;
+	size_t			retained;
+	unsigned		narenas;
+	ql_head(ctl_arena_stats_t)	destroyed;
+	/*
+	 * Element 0 contains merged stats for extant arenas (accessed via
+	 * MALLCTL_ARENAS_ALL), element 1 contains merged stats for destroyed
+	 * arenas (accessed via MALLCTL_ARENAS_DESTROYED), and the remaining
+	 * MALLOCX_ARENA_MAX+1 elements correspond to arenas.
+	 */
+	ctl_arena_stats_t	*arenas[MALLOCX_ARENA_MAX + 3];
+};
+
+#endif /* JEMALLOC_INTERNAL_CTL_STRUCTS_H */
diff --git a/include/jemalloc/internal/ctl_types.h b/include/jemalloc/internal/ctl_types.h
new file mode 100644
index 00000000..848c4f10
--- /dev/null
+++ b/include/jemalloc/internal/ctl_types.h
@@ -0,0 +1,10 @@
+#ifndef JEMALLOC_INTERNAL_CTL_TYPES_H
+#define JEMALLOC_INTERNAL_CTL_TYPES_H
+
+typedef struct ctl_node_s ctl_node_t;
+typedef struct ctl_named_node_s ctl_named_node_t;
+typedef struct ctl_indexed_node_s ctl_indexed_node_t;
+typedef struct ctl_arena_stats_s ctl_arena_stats_t;
+typedef struct ctl_stats_s ctl_stats_t;
+
+#endif /* JEMALLOC_INTERNAL_CTL_TYPES_H */
diff --git a/include/jemalloc/internal/extent_dss.h b/include/jemalloc/internal/extent_dss.h
deleted file mode 100644
index f2dac52e..00000000
--- a/include/jemalloc/internal/extent_dss.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-typedef enum {
-	dss_prec_disabled  = 0,
-	dss_prec_primary   = 1,
-	dss_prec_secondary = 2,
-
-	dss_prec_limit     = 3
-} dss_prec_t;
-#define	DSS_PREC_DEFAULT	dss_prec_secondary
-#define	DSS_DEFAULT		"secondary"
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-extern const char *dss_prec_names[];
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-extern const char	*opt_dss;
-
-dss_prec_t	extent_dss_prec_get(void);
-bool	extent_dss_prec_set(dss_prec_t dss_prec);
-void	*extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr,
-    size_t size, size_t alignment, bool *zero, bool *commit);
-bool	extent_in_dss(void *addr);
-bool	extent_dss_mergeable(void *addr_a, void *addr_b);
-void	extent_dss_boot(void);
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
diff --git a/include/jemalloc/internal/extent_dss_externs.h b/include/jemalloc/internal/extent_dss_externs.h
new file mode 100644
index 00000000..d376fa74
--- /dev/null
+++ b/include/jemalloc/internal/extent_dss_externs.h
@@ -0,0 +1,14 @@
+#ifndef JEMALLOC_INTERNAL_EXTENT_DSS_EXTERNS_H
+#define JEMALLOC_INTERNAL_EXTENT_DSS_EXTERNS_H
+
+extern const char	*opt_dss;
+
+dss_prec_t	extent_dss_prec_get(void);
+bool	extent_dss_prec_set(dss_prec_t dss_prec);
+void	*extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr,
+    size_t size, size_t alignment, bool *zero, bool *commit);
+bool	extent_in_dss(void *addr);
+bool	extent_dss_mergeable(void *addr_a, void *addr_b);
+void	extent_dss_boot(void);
+
+#endif /* JEMALLOC_INTERNAL_EXTENT_DSS_EXTERNS_H */
diff --git a/include/jemalloc/internal/extent_dss_structs.h b/include/jemalloc/internal/extent_dss_structs.h
new file mode 100644
index 00000000..2d8c6f05
--- /dev/null
+++ b/include/jemalloc/internal/extent_dss_structs.h
@@ -0,0 +1,6 @@
+#ifndef JEMALLOC_INTERNAL_EXTENT_DSS_STRUCTS_H
+#define JEMALLOC_INTERNAL_EXTENT_DSS_STRUCTS_H
+
+extern const char *dss_prec_names[];
+
+#endif /* JEMALLOC_INTERNAL_EXTENT_DSS_STRUCTS_H */
diff --git a/include/jemalloc/internal/extent_dss_types.h b/include/jemalloc/internal/extent_dss_types.h
new file mode 100644
index 00000000..2839757c
--- /dev/null
+++ b/include/jemalloc/internal/extent_dss_types.h
@@ -0,0 +1,14 @@
+#ifndef JEMALLOC_INTERNAL_EXTENT_DSS_TYPES_H
+#define JEMALLOC_INTERNAL_EXTENT_DSS_TYPES_H
+
+typedef enum {
+	dss_prec_disabled  = 0,
+	dss_prec_primary   = 1,
+	dss_prec_secondary = 2,
+
+	dss_prec_limit     = 3
+} dss_prec_t;
+#define	DSS_PREC_DEFAULT	dss_prec_secondary
+#define	DSS_DEFAULT		"secondary"
+
+#endif /* JEMALLOC_INTERNAL_EXTENT_DSS_TYPES_H */
diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
new file mode 100644
index 00000000..59f3c7ca
--- /dev/null
+++ b/include/jemalloc/internal/extent_externs.h
@@ -0,0 +1,60 @@
+#ifndef JEMALLOC_INTERNAL_EXTENT_EXTERNS_H
+#define JEMALLOC_INTERNAL_EXTENT_EXTERNS_H
+
+extern rtree_t			extents_rtree;
+extern const extent_hooks_t	extent_hooks_default;
+
+extent_t	*extent_alloc(tsdn_t *tsdn, arena_t *arena);
+void	extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
+
+extent_hooks_t	*extent_hooks_get(arena_t *arena);
+extent_hooks_t	*extent_hooks_set(arena_t *arena, extent_hooks_t *extent_hooks);
+
+#ifdef JEMALLOC_JET
+typedef size_t (extent_size_quantize_t)(size_t);
+extern extent_size_quantize_t *extent_size_quantize_floor;
+extern extent_size_quantize_t *extent_size_quantize_ceil;
+#else
+size_t	extent_size_quantize_floor(size_t size);
+size_t	extent_size_quantize_ceil(size_t size);
+#endif
+
+ph_proto(, extent_heap_, extent_heap_t, extent_t)
+
+extent_t	*extent_alloc_cache_locked(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
+    size_t alignment, bool *zero, bool *commit, bool slab);
+extent_t	*extent_alloc_cache(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
+    size_t alignment, bool *zero, bool *commit, bool slab);
+extent_t	*extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
+    size_t alignment, bool *zero, bool *commit, bool slab);
+void	extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
+void	extent_dalloc_cache(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *extent);
+bool	extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *extent);
+void	extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *extent);
+bool	extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    size_t length);
+bool	extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    size_t length);
+bool	extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    size_t length);
+bool	extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    size_t length);
+extent_t	*extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a,
+    size_t usize_a, size_t size_b, size_t usize_b);
+bool	extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b);
+
+bool	extent_boot(void);
+
+#endif /* JEMALLOC_INTERNAL_EXTENT_EXTERNS_H */
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent_inlines.h
similarity index 58%
rename from include/jemalloc/internal/extent.h
rename to include/jemalloc/internal/extent_inlines.h
index 70accffb..e48af92f 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -1,157 +1,5 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-typedef struct extent_s extent_t;
-
-#define	EXTENT_HOOKS_INITIALIZER	NULL
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-/* Extent (span of pages).  Use accessor functions for e_* fields. */
-struct extent_s {
-	/* Arena from which this extent came, if any. */
-	arena_t			*e_arena;
-
-	/* Pointer to the extent that this structure is responsible for. */
-	void			*e_addr;
-
-	/* Extent size. */
-	size_t			e_size;
-
-	/*
-	 * Usable size, typically smaller than extent size due to large_pad or
-	 * promotion of sampled small regions.
-	 */
-	size_t			e_usize;
-
-	/*
-	 * Serial number (potentially non-unique).
-	 *
-	 * In principle serial numbers can wrap around on 32-bit systems if
-	 * JEMALLOC_MUNMAP is defined, but as long as comparison functions fall
-	 * back on address comparison for equal serial numbers, stable (if
-	 * imperfect) ordering is maintained.
-	 *
-	 * Serial numbers may not be unique even in the absence of wrap-around,
-	 * e.g. when splitting an extent and assigning the same serial number to
-	 * both resulting adjacent extents.
-	 */
-	size_t			e_sn;
-
-	/* True if extent is active (in use). */
-	bool			e_active;
-
-	/*
-	 * The zeroed flag is used by extent recycling code to track whether
-	 * memory is zero-filled.
-	 */
-	bool			e_zeroed;
-
-	/*
-	 * True if physical memory is committed to the extent, whether
-	 * explicitly or implicitly as on a system that overcommits and
-	 * satisfies physical memory needs on demand via soft page faults.
-	 */
-	bool			e_committed;
-
-	/*
-	 * The slab flag indicates whether the extent is used for a slab of
-	 * small regions.  This helps differentiate small size classes, and it
-	 * indicates whether interior pointers can be looked up via iealloc().
-	 */
-	bool			e_slab;
-
-	union {
-		/* Small region slab metadata. */
-		arena_slab_data_t	e_slab_data;
-
-		/* Profile counters, used for large objects. */
-		union {
-			void		*e_prof_tctx_pun;
-			prof_tctx_t	*e_prof_tctx;
-		};
-	};
-
-	/*
-	 * Linkage for arena's extents_dirty and arena_bin_t's slabs_full rings.
-	 */
-	qr(extent_t)		qr_link;
-
-	union {
-		/* Linkage for per size class sn/address-ordered heaps. */
-		phn(extent_t)		ph_link;
-
-		/* Linkage for arena's large and extent_cache lists. */
-		ql_elm(extent_t)	ql_link;
-	};
-};
-typedef ph(extent_t) extent_heap_t;
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-extern rtree_t			extents_rtree;
-extern const extent_hooks_t	extent_hooks_default;
-
-extent_t	*extent_alloc(tsdn_t *tsdn, arena_t *arena);
-void	extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
-
-extent_hooks_t	*extent_hooks_get(arena_t *arena);
-extent_hooks_t	*extent_hooks_set(arena_t *arena, extent_hooks_t *extent_hooks);
-
-#ifdef JEMALLOC_JET
-typedef size_t (extent_size_quantize_t)(size_t);
-extern extent_size_quantize_t *extent_size_quantize_floor;
-extern extent_size_quantize_t *extent_size_quantize_ceil;
-#else
-size_t	extent_size_quantize_floor(size_t size);
-size_t	extent_size_quantize_ceil(size_t size);
-#endif
-
-ph_proto(, extent_heap_, extent_heap_t, extent_t)
-
-extent_t	*extent_alloc_cache_locked(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
-    size_t alignment, bool *zero, bool *commit, bool slab);
-extent_t	*extent_alloc_cache(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
-    size_t alignment, bool *zero, bool *commit, bool slab);
-extent_t	*extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
-    size_t alignment, bool *zero, bool *commit, bool slab);
-void	extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
-void	extent_dalloc_cache(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent);
-bool	extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent);
-void	extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent);
-bool	extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
-    size_t length);
-bool	extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
-    size_t length);
-bool	extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
-    size_t length);
-bool	extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
-    size_t length);
-extent_t	*extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a,
-    size_t usize_a, size_t size_b, size_t usize_b);
-bool	extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b);
-
-bool	extent_boot(void);
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
+#ifndef JEMALLOC_INTERNAL_EXTENT_INLINES_H
+#define JEMALLOC_INTERNAL_EXTENT_INLINES_H
 
 #ifndef JEMALLOC_ENABLE_INLINE
 extent_t	*extent_lookup(tsdn_t *tsdn, const void *ptr, bool dependent);
@@ -492,8 +340,4 @@ extent_snad_comp(const extent_t *a, const extent_t *b)
 }
 #endif
 
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
-
-#include "jemalloc/internal/extent_dss.h"
-#include "jemalloc/internal/extent_mmap.h"
+#endif /* JEMALLOC_INTERNAL_EXTENT_INLINES_H */
diff --git a/include/jemalloc/internal/extent_mmap.h b/include/jemalloc/internal/extent_mmap.h
deleted file mode 100644
index 3c1a7884..00000000
--- a/include/jemalloc/internal/extent_mmap.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-void	*extent_alloc_mmap(void *new_addr, size_t size, size_t alignment,
-    bool *zero, bool *commit);
-bool	extent_dalloc_mmap(void *addr, size_t size);
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
diff --git a/include/jemalloc/internal/extent_mmap_externs.h b/include/jemalloc/internal/extent_mmap_externs.h
new file mode 100644
index 00000000..5917b53d
--- /dev/null
+++ b/include/jemalloc/internal/extent_mmap_externs.h
@@ -0,0 +1,8 @@
+#ifndef JEMALLOC_INTERNAL_EXTENT_MMAP_EXTERNS_H
+#define JEMALLOC_INTERNAL_EXTENT_MMAP_EXTERNS_H
+
+void	*extent_alloc_mmap(void *new_addr, size_t size, size_t alignment,
+    bool *zero, bool *commit);
+bool	extent_dalloc_mmap(void *addr, size_t size);
+
+#endif /* JEMALLOC_INTERNAL_EXTENT_MMAP_EXTERNS_H */
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
new file mode 100644
index 00000000..de31317c
--- /dev/null
+++ b/include/jemalloc/internal/extent_structs.h
@@ -0,0 +1,84 @@
+#ifndef JEMALLOC_INTERNAL_EXTENT_STRUCTS_H
+#define JEMALLOC_INTERNAL_EXTENT_STRUCTS_H
+
+/* Extent (span of pages).  Use accessor functions for e_* fields. */
+struct extent_s {
+	/* Arena from which this extent came, if any. */
+	arena_t			*e_arena;
+
+	/* Pointer to the extent that this structure is responsible for. */
+	void			*e_addr;
+
+	/* Extent size. */
+	size_t			e_size;
+
+	/*
+	 * Usable size, typically smaller than extent size due to large_pad or
+	 * promotion of sampled small regions.
+	 */
+	size_t			e_usize;
+
+	/*
+	 * Serial number (potentially non-unique).
+	 *
+	 * In principle serial numbers can wrap around on 32-bit systems if
+	 * JEMALLOC_MUNMAP is defined, but as long as comparison functions fall
+	 * back on address comparison for equal serial numbers, stable (if
+	 * imperfect) ordering is maintained.
+	 *
+	 * Serial numbers may not be unique even in the absence of wrap-around,
+	 * e.g. when splitting an extent and assigning the same serial number to
+	 * both resulting adjacent extents.
+	 */
+	size_t			e_sn;
+
+	/* True if extent is active (in use). */
+	bool			e_active;
+
+	/*
+	 * The zeroed flag is used by extent recycling code to track whether
+	 * memory is zero-filled.
+	 */
+	bool			e_zeroed;
+
+	/*
+	 * True if physical memory is committed to the extent, whether
+	 * explicitly or implicitly as on a system that overcommits and
+	 * satisfies physical memory needs on demand via soft page faults.
+	 */
+	bool			e_committed;
+
+	/*
+	 * The slab flag indicates whether the extent is used for a slab of
+	 * small regions.  This helps differentiate small size classes, and it
+	 * indicates whether interior pointers can be looked up via iealloc().
+	 */
+	bool			e_slab;
+
+	union {
+		/* Small region slab metadata. */
+		arena_slab_data_t	e_slab_data;
+
+		/* Profile counters, used for large objects. */
+		union {
+			void		*e_prof_tctx_pun;
+			prof_tctx_t	*e_prof_tctx;
+		};
+	};
+
+	/*
+	 * Linkage for arena's extents_dirty and arena_bin_t's slabs_full rings.
+	 */
+	qr(extent_t)		qr_link;
+
+	union {
+		/* Linkage for per size class sn/address-ordered heaps. */
+		phn(extent_t)		ph_link;
+
+		/* Linkage for arena's large and extent_cache lists. */
+		ql_elm(extent_t)	ql_link;
+	};
+};
+typedef ph(extent_t) extent_heap_t;
+
+#endif /* JEMALLOC_INTERNAL_EXTENT_STRUCTS_H */
diff --git a/include/jemalloc/internal/extent_types.h b/include/jemalloc/internal/extent_types.h
new file mode 100644
index 00000000..4873dc54
--- /dev/null
+++ b/include/jemalloc/internal/extent_types.h
@@ -0,0 +1,8 @@
+#ifndef JEMALLOC_INTERNAL_EXTENT_TYPES_H
+#define JEMALLOC_INTERNAL_EXTENT_TYPES_H
+
+typedef struct extent_s extent_t;
+
+#define	EXTENT_HOOKS_INITIALIZER	NULL
+
+#endif /* JEMALLOC_INTERNAL_EXTENT_TYPES_H */
diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash_inlines.h
similarity index 92%
rename from include/jemalloc/internal/hash.h
rename to include/jemalloc/internal/hash_inlines.h
index 1ff2d9a0..0340418e 100644
--- a/include/jemalloc/internal/hash.h
+++ b/include/jemalloc/internal/hash_inlines.h
@@ -1,22 +1,11 @@
+#ifndef JEMALLOC_INTERNAL_HASH_INLINES_H
+#define JEMALLOC_INTERNAL_HASH_INLINES_H
+
 /*
  * The following hash function is based on MurmurHash3, placed into the public
  * domain by Austin Appleby.  See https://github.com/aappleby/smhasher for
  * details.
  */
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
 
 #ifndef JEMALLOC_ENABLE_INLINE
 uint32_t	hash_x86_32(const void *key, int len, uint32_t seed);
@@ -353,5 +342,4 @@ hash(const void *key, size_t len, const uint32_t seed, size_t r_hash[2])
 }
 #endif
 
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
+#endif /* JEMALLOC_INTERNAL_HASH_INLINES_H */
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index a558012a..dfbb4b6d 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -176,21 +176,38 @@ static const bool have_thp =
 /*
  * jemalloc can conceptually be broken into components (arena, tcache, etc.),
  * but there are circular dependencies that cannot be broken without
- * substantial performance degradation.  In order to reduce the effect on
- * visual code flow, read the header files in multiple passes, with one of the
- * following cpp variables defined during each pass:
+ * substantial performance degradation.
  *
+ * Historically, we dealt with this by each header into four sections (types,
+ * structs, externs, and inlines), and included each header file multiple times
+ * in this file, picking out the portion we want on each pass using the
+ * following #defines:
  *   JEMALLOC_H_TYPES   : Preprocessor-defined constants and psuedo-opaque data
  *                        types.
  *   JEMALLOC_H_STRUCTS : Data structures.
  *   JEMALLOC_H_EXTERNS : Extern data declarations and function prototypes.
  *   JEMALLOC_H_INLINES : Inline functions.
+ *
+ * We're moving toward a world in which the dependencies are explicit; each file
+ * will #include the headers it depends on (rather than relying on them being
+ * implicitly available via this file including every header file in the
+ * project).
+ *
+ * We're now in an intermediate state: we've broken up the header files to avoid
+ * having to include each one multiple times, but have not yet moved the
+ * dependency information into the header files (i.e. we still rely on the
+ * ordering in this file to ensure all a header's dependencies are available in
+ * its translation unit).  Each component is now broken up into multiple header
+ * files, corresponding to the sections above (e.g. instead of "tsd.h", we now
+ * have "tsd_types.h", "tsd_structs.h", "tsd_externs.h", "tsd_inlines.h").
  */
-/******************************************************************************/
-#define	JEMALLOC_H_TYPES
 
 #include "jemalloc/internal/jemalloc_internal_macros.h"
 
+/******************************************************************************/
+/* TYPES */
+/******************************************************************************/
+
 /* Page size index type. */
 typedef unsigned pszind_t;
 
@@ -362,69 +379,57 @@ typedef unsigned szind_t;
 #  define VARIABLE_ARRAY(type, name, count) type name[(count)]
 #endif
 
-#include "jemalloc/internal/nstime.h"
-#include "jemalloc/internal/util.h"
-#include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/spin.h"
-#include "jemalloc/internal/prng.h"
-#include "jemalloc/internal/ticker.h"
-#include "jemalloc/internal/ckh.h"
+#include "jemalloc/internal/nstime_types.h"
+#include "jemalloc/internal/util_types.h"
+#include "jemalloc/internal/spin_types.h"
+#include "jemalloc/internal/prng_types.h"
+#include "jemalloc/internal/ticker_types.h"
+#include "jemalloc/internal/ckh_types.h"
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/smoothstep.h"
-#include "jemalloc/internal/stats.h"
-#include "jemalloc/internal/ctl.h"
-#include "jemalloc/internal/witness.h"
-#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/tsd.h"
-#include "jemalloc/internal/extent.h"
-#include "jemalloc/internal/base.h"
-#include "jemalloc/internal/arena.h"
-#include "jemalloc/internal/bitmap.h"
-#include "jemalloc/internal/rtree.h"
-#include "jemalloc/internal/pages.h"
-#include "jemalloc/internal/large.h"
-#include "jemalloc/internal/tcache.h"
-#include "jemalloc/internal/hash.h"
-#include "jemalloc/internal/prof.h"
+#include "jemalloc/internal/stats_types.h"
+#include "jemalloc/internal/ctl_types.h"
+#include "jemalloc/internal/witness_types.h"
+#include "jemalloc/internal/mutex_types.h"
+#include "jemalloc/internal/tsd_types.h"
+#include "jemalloc/internal/extent_types.h"
+#include "jemalloc/internal/extent_dss_types.h"
+#include "jemalloc/internal/base_types.h"
+#include "jemalloc/internal/arena_types.h"
+#include "jemalloc/internal/bitmap_types.h"
+#include "jemalloc/internal/rtree_types.h"
+#include "jemalloc/internal/pages_types.h"
+#include "jemalloc/internal/tcache_types.h"
+#include "jemalloc/internal/prof_types.h"
+
 
-#undef JEMALLOC_H_TYPES
 /******************************************************************************/
-#define	JEMALLOC_H_STRUCTS
-
-#include "jemalloc/internal/nstime.h"
-#include "jemalloc/internal/util.h"
-#include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/spin.h"
-#include "jemalloc/internal/prng.h"
-#include "jemalloc/internal/ticker.h"
-#include "jemalloc/internal/ckh.h"
-#include "jemalloc/internal/size_classes.h"
-#include "jemalloc/internal/smoothstep.h"
-#include "jemalloc/internal/stats.h"
-#include "jemalloc/internal/ctl.h"
-#include "jemalloc/internal/witness.h"
-#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/bitmap.h"
-#define	JEMALLOC_ARENA_STRUCTS_A
-#include "jemalloc/internal/arena.h"
-#undef JEMALLOC_ARENA_STRUCTS_A
-#include "jemalloc/internal/extent.h"
-#include "jemalloc/internal/base.h"
-#define	JEMALLOC_ARENA_STRUCTS_B
-#include "jemalloc/internal/arena.h"
-#undef JEMALLOC_ARENA_STRUCTS_B
-#include "jemalloc/internal/rtree.h"
-#include "jemalloc/internal/pages.h"
-#include "jemalloc/internal/large.h"
-#include "jemalloc/internal/tcache.h"
-#include "jemalloc/internal/hash.h"
-#include "jemalloc/internal/prof.h"
-
-#include "jemalloc/internal/tsd.h"
-
-#undef JEMALLOC_H_STRUCTS
+/* STRUCTS */
+/******************************************************************************/
+
+#include "jemalloc/internal/nstime_structs.h"
+#include "jemalloc/internal/spin_structs.h"
+#include "jemalloc/internal/ticker_structs.h"
+#include "jemalloc/internal/ckh_structs.h"
+#include "jemalloc/internal/stats_structs.h"
+#include "jemalloc/internal/ctl_structs.h"
+#include "jemalloc/internal/witness_structs.h"
+#include "jemalloc/internal/mutex_structs.h"
+#include "jemalloc/internal/bitmap_structs.h"
+#include "jemalloc/internal/arena_structs_a.h"
+#include "jemalloc/internal/extent_structs.h"
+#include "jemalloc/internal/extent_dss_structs.h"
+#include "jemalloc/internal/base_structs.h"
+#include "jemalloc/internal/arena_structs_b.h"
+#include "jemalloc/internal/rtree_structs.h"
+#include "jemalloc/internal/tcache_structs.h"
+#include "jemalloc/internal/prof_structs.h"
+#include "jemalloc/internal/tsd_structs.h"
+
+
+/******************************************************************************/
+/* EXTERNS */
 /******************************************************************************/
-#define	JEMALLOC_H_EXTERNS
 
 extern bool	opt_abort;
 extern const char	*opt_junk;
@@ -482,54 +487,42 @@ void	jemalloc_prefork(void);
 void	jemalloc_postfork_parent(void);
 void	jemalloc_postfork_child(void);
 
-#include "jemalloc/internal/nstime.h"
-#include "jemalloc/internal/util.h"
-#include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/spin.h"
-#include "jemalloc/internal/prng.h"
-#include "jemalloc/internal/ticker.h"
-#include "jemalloc/internal/ckh.h"
-#include "jemalloc/internal/size_classes.h"
-#include "jemalloc/internal/smoothstep.h"
-#include "jemalloc/internal/stats.h"
-#include "jemalloc/internal/ctl.h"
-#include "jemalloc/internal/witness.h"
-#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/bitmap.h"
-#include "jemalloc/internal/extent.h"
-#include "jemalloc/internal/base.h"
-#include "jemalloc/internal/arena.h"
-#include "jemalloc/internal/rtree.h"
-#include "jemalloc/internal/pages.h"
-#include "jemalloc/internal/large.h"
-#include "jemalloc/internal/tcache.h"
-#include "jemalloc/internal/hash.h"
-#include "jemalloc/internal/prof.h"
-#include "jemalloc/internal/tsd.h"
+#include "jemalloc/internal/nstime_externs.h"
+#include "jemalloc/internal/util_externs.h"
+#include "jemalloc/internal/atomic_externs.h"
+#include "jemalloc/internal/ckh_externs.h"
+#include "jemalloc/internal/stats_externs.h"
+#include "jemalloc/internal/ctl_externs.h"
+#include "jemalloc/internal/witness_externs.h"
+#include "jemalloc/internal/mutex_externs.h"
+#include "jemalloc/internal/bitmap_externs.h"
+#include "jemalloc/internal/extent_externs.h"
+#include "jemalloc/internal/extent_dss_externs.h"
+#include "jemalloc/internal/extent_mmap_externs.h"
+#include "jemalloc/internal/base_externs.h"
+#include "jemalloc/internal/arena_externs.h"
+#include "jemalloc/internal/rtree_externs.h"
+#include "jemalloc/internal/pages_externs.h"
+#include "jemalloc/internal/large_externs.h"
+#include "jemalloc/internal/tcache_externs.h"
+#include "jemalloc/internal/prof_externs.h"
+#include "jemalloc/internal/tsd_externs.h"
 
-#undef JEMALLOC_H_EXTERNS
 /******************************************************************************/
-#define	JEMALLOC_H_INLINES
+/* INLINES */
+/******************************************************************************/
 
-#include "jemalloc/internal/nstime.h"
-#include "jemalloc/internal/util.h"
-#include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/spin.h"
-#include "jemalloc/internal/prng.h"
-#include "jemalloc/internal/ticker.h"
-#include "jemalloc/internal/ckh.h"
-#include "jemalloc/internal/size_classes.h"
-#include "jemalloc/internal/smoothstep.h"
-#include "jemalloc/internal/stats.h"
-#include "jemalloc/internal/ctl.h"
-#include "jemalloc/internal/tsd.h"
-#include "jemalloc/internal/witness.h"
-#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/rtree.h"
-#include "jemalloc/internal/extent.h"
-#include "jemalloc/internal/base.h"
-#include "jemalloc/internal/pages.h"
-#include "jemalloc/internal/large.h"
+#include "jemalloc/internal/util_inlines.h"
+#include "jemalloc/internal/atomic_inlines.h"
+#include "jemalloc/internal/spin_inlines.h"
+#include "jemalloc/internal/prng_inlines.h"
+#include "jemalloc/internal/ticker_inlines.h"
+#include "jemalloc/internal/tsd_inlines.h"
+#include "jemalloc/internal/witness_inlines.h"
+#include "jemalloc/internal/mutex_inlines.h"
+#include "jemalloc/internal/rtree_inlines.h"
+#include "jemalloc/internal/extent_inlines.h"
+#include "jemalloc/internal/base_inlines.h"
 
 #ifndef JEMALLOC_ENABLE_INLINE
 pszind_t	psz2ind(size_t psz);
@@ -925,14 +918,12 @@ decay_ticker_get(tsd_t *tsd, unsigned ind)
 }
 #endif
 
-#include "jemalloc/internal/bitmap.h"
+#include "jemalloc/internal/bitmap_inlines.h"
 /*
- * Include portions of arena.h interleaved with tcache.h in order to resolve
- * circular dependencies.
+ * Include portions of arena code interleaved with tcache code in order to
+ * resolve circular dependencies.
  */
-#define	JEMALLOC_ARENA_INLINE_A
-#include "jemalloc/internal/arena.h"
-#undef JEMALLOC_ARENA_INLINE_A
+#include "jemalloc/internal/arena_inlines_a.h"
 
 #ifndef JEMALLOC_ENABLE_INLINE
 extent_t	*iealloc(tsdn_t *tsdn, const void *ptr);
@@ -947,11 +938,9 @@ iealloc(tsdn_t *tsdn, const void *ptr)
 }
 #endif
 
-#include "jemalloc/internal/tcache.h"
-#define	JEMALLOC_ARENA_INLINE_B
-#include "jemalloc/internal/arena.h"
-#undef JEMALLOC_ARENA_INLINE_B
-#include "jemalloc/internal/hash.h"
+#include "jemalloc/internal/tcache_inlines.h"
+#include "jemalloc/internal/arena_inlines_b.h"
+#include "jemalloc/internal/hash_inlines.h"
 
 #ifndef JEMALLOC_ENABLE_INLINE
 arena_t	*iaalloc(tsdn_t *tsdn, const void *ptr);
@@ -1211,10 +1200,8 @@ ixalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize, size_t size,
 }
 #endif
 
-#include "jemalloc/internal/prof.h"
+#include "jemalloc/internal/prof_inlines.h"
 
-#undef JEMALLOC_H_INLINES
-/******************************************************************************/
 
 #ifdef __cplusplus
 }
diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h
index 57492049..80820f87 100644
--- a/include/jemalloc/internal/jemalloc_internal_macros.h
+++ b/include/jemalloc/internal/jemalloc_internal_macros.h
@@ -1,3 +1,6 @@
+#ifndef JEMALLOC_INTERNAL_MACROS_H
+#define JEMALLOC_INTERNAL_MACROS_H
+
 /*
  * JEMALLOC_ALWAYS_INLINE and JEMALLOC_INLINE are used within header files for
  * functions that are static inline functions if inlining is enabled, and
@@ -55,3 +58,5 @@
 #if !defined(JEMALLOC_HAS_RESTRICT) || defined(__cplusplus)
 #  define restrict
 #endif
+
+#endif /* JEMALLOC_INTERNAL_MACROS_H */
diff --git a/include/jemalloc/internal/large.h b/include/jemalloc/internal/large_externs.h
similarity index 64%
rename from include/jemalloc/internal/large.h
rename to include/jemalloc/internal/large_externs.h
index f3d382b5..f0a03399 100644
--- a/include/jemalloc/internal/large.h
+++ b/include/jemalloc/internal/large_externs.h
@@ -1,13 +1,5 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
+#ifndef JEMALLOC_INTERNAL_LARGE_EXTERNS_H
+#define JEMALLOC_INTERNAL_LARGE_EXTERNS_H
 
 void	*large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero);
 void	*large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
@@ -32,9 +24,4 @@ prof_tctx_t	*large_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent);
 void	large_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, prof_tctx_t *tctx);
 void	large_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent);
 
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
+#endif /* JEMALLOC_INTERNAL_LARGE_EXTERNS_H */
diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
deleted file mode 100644
index d5b3693c..00000000
--- a/include/jemalloc/internal/mutex.h
+++ /dev/null
@@ -1,150 +0,0 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-typedef struct malloc_mutex_s malloc_mutex_t;
-
-#ifdef _WIN32
-#  define MALLOC_MUTEX_INITIALIZER
-#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
-#  define MALLOC_MUTEX_INITIALIZER					\
-     {OS_UNFAIR_LOCK_INIT, WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
-#elif (defined(JEMALLOC_OSSPIN))
-#  define MALLOC_MUTEX_INITIALIZER					\
-     {0, WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
-#elif (defined(JEMALLOC_MUTEX_INIT_CB))
-#  define MALLOC_MUTEX_INITIALIZER					\
-    {PTHREAD_MUTEX_INITIALIZER, NULL,					\
-     WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
-#else
-#  if (defined(JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP) &&		\
-       defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP))
-#    define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_ADAPTIVE_NP
-#    define MALLOC_MUTEX_INITIALIZER					\
-       {PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP,				\
-        WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
-#  else
-#    define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT
-#    define MALLOC_MUTEX_INITIALIZER					\
-       {PTHREAD_MUTEX_INITIALIZER,					\
-        WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
-#  endif
-#endif
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-struct malloc_mutex_s {
-#ifdef _WIN32
-#  if _WIN32_WINNT >= 0x0600
-	SRWLOCK         	lock;
-#  else
-	CRITICAL_SECTION	lock;
-#  endif
-#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
-	os_unfair_lock		lock;
-#elif (defined(JEMALLOC_OSSPIN))
-	OSSpinLock		lock;
-#elif (defined(JEMALLOC_MUTEX_INIT_CB))
-	pthread_mutex_t		lock;
-	malloc_mutex_t		*postponed_next;
-#else
-	pthread_mutex_t		lock;
-#endif
-	witness_t		witness;
-};
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-#ifdef JEMALLOC_LAZY_LOCK
-extern bool isthreaded;
-#else
-#  undef isthreaded /* Undo private_namespace.h definition. */
-#  define isthreaded true
-#endif
-
-bool	malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
-    witness_rank_t rank);
-void	malloc_mutex_prefork(tsdn_t *tsdn, malloc_mutex_t *mutex);
-void	malloc_mutex_postfork_parent(tsdn_t *tsdn, malloc_mutex_t *mutex);
-void	malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex);
-bool	malloc_mutex_boot(void);
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#ifndef JEMALLOC_ENABLE_INLINE
-void	malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex);
-void	malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex);
-void	malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex);
-void	malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_))
-JEMALLOC_INLINE void
-malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex)
-{
-
-	if (isthreaded) {
-		witness_assert_not_owner(tsdn, &mutex->witness);
-#ifdef _WIN32
-#  if _WIN32_WINNT >= 0x0600
-		AcquireSRWLockExclusive(&mutex->lock);
-#  else
-		EnterCriticalSection(&mutex->lock);
-#  endif
-#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
-		os_unfair_lock_lock(&mutex->lock);
-#elif (defined(JEMALLOC_OSSPIN))
-		OSSpinLockLock(&mutex->lock);
-#else
-		pthread_mutex_lock(&mutex->lock);
-#endif
-		witness_lock(tsdn, &mutex->witness);
-	}
-}
-
-JEMALLOC_INLINE void
-malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex)
-{
-
-	if (isthreaded) {
-		witness_unlock(tsdn, &mutex->witness);
-#ifdef _WIN32
-#  if _WIN32_WINNT >= 0x0600
-		ReleaseSRWLockExclusive(&mutex->lock);
-#  else
-		LeaveCriticalSection(&mutex->lock);
-#  endif
-#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
-		os_unfair_lock_unlock(&mutex->lock);
-#elif (defined(JEMALLOC_OSSPIN))
-		OSSpinLockUnlock(&mutex->lock);
-#else
-		pthread_mutex_unlock(&mutex->lock);
-#endif
-	}
-}
-
-JEMALLOC_INLINE void
-malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex)
-{
-
-	if (isthreaded)
-		witness_assert_owner(tsdn, &mutex->witness);
-}
-
-JEMALLOC_INLINE void
-malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex)
-{
-
-	if (isthreaded)
-		witness_assert_not_owner(tsdn, &mutex->witness);
-}
-#endif
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
diff --git a/include/jemalloc/internal/mutex_externs.h b/include/jemalloc/internal/mutex_externs.h
new file mode 100644
index 00000000..ba6418ef
--- /dev/null
+++ b/include/jemalloc/internal/mutex_externs.h
@@ -0,0 +1,18 @@
+#ifndef JEMALLOC_INTERNAL_MUTEX_EXTERNS_H
+#define JEMALLOC_INTERNAL_MUTEX_EXTERNS_H
+
+#ifdef JEMALLOC_LAZY_LOCK
+extern bool isthreaded;
+#else
+#  undef isthreaded /* Undo private_namespace.h definition. */
+#  define isthreaded true
+#endif
+
+bool	malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
+    witness_rank_t rank);
+void	malloc_mutex_prefork(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void	malloc_mutex_postfork_parent(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void	malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex);
+bool	malloc_mutex_boot(void);
+
+#endif /* JEMALLOC_INTERNAL_MUTEX_EXTERNS_H */
diff --git a/include/jemalloc/internal/mutex_inlines.h b/include/jemalloc/internal/mutex_inlines.h
new file mode 100644
index 00000000..b769f0ca
--- /dev/null
+++ b/include/jemalloc/internal/mutex_inlines.h
@@ -0,0 +1,74 @@
+#ifndef JEMALLOC_INTERNAL_MUTEX_INLINES_H
+#define JEMALLOC_INTERNAL_MUTEX_INLINES_H
+
+#ifndef JEMALLOC_ENABLE_INLINE
+void	malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void	malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void	malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void	malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_))
+JEMALLOC_INLINE void
+malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex)
+{
+
+	if (isthreaded) {
+		witness_assert_not_owner(tsdn, &mutex->witness);
+#ifdef _WIN32
+#  if _WIN32_WINNT >= 0x0600
+		AcquireSRWLockExclusive(&mutex->lock);
+#  else
+		EnterCriticalSection(&mutex->lock);
+#  endif
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+		os_unfair_lock_lock(&mutex->lock);
+#elif (defined(JEMALLOC_OSSPIN))
+		OSSpinLockLock(&mutex->lock);
+#else
+		pthread_mutex_lock(&mutex->lock);
+#endif
+		witness_lock(tsdn, &mutex->witness);
+	}
+}
+
+JEMALLOC_INLINE void
+malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex)
+{
+
+	if (isthreaded) {
+		witness_unlock(tsdn, &mutex->witness);
+#ifdef _WIN32
+#  if _WIN32_WINNT >= 0x0600
+		ReleaseSRWLockExclusive(&mutex->lock);
+#  else
+		LeaveCriticalSection(&mutex->lock);
+#  endif
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+		os_unfair_lock_unlock(&mutex->lock);
+#elif (defined(JEMALLOC_OSSPIN))
+		OSSpinLockUnlock(&mutex->lock);
+#else
+		pthread_mutex_unlock(&mutex->lock);
+#endif
+	}
+}
+
+JEMALLOC_INLINE void
+malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex)
+{
+
+	if (isthreaded)
+		witness_assert_owner(tsdn, &mutex->witness);
+}
+
+JEMALLOC_INLINE void
+malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex)
+{
+
+	if (isthreaded)
+		witness_assert_not_owner(tsdn, &mutex->witness);
+}
+#endif
+
+#endif /* JEMALLOC_INTERNAL_MUTEX_INLINES_H */
diff --git a/include/jemalloc/internal/mutex_structs.h b/include/jemalloc/internal/mutex_structs.h
new file mode 100644
index 00000000..4a18a075
--- /dev/null
+++ b/include/jemalloc/internal/mutex_structs.h
@@ -0,0 +1,24 @@
+#ifndef JEMALLOC_INTERNAL_MUTEX_STRUCTS_H
+#define JEMALLOC_INTERNAL_MUTEX_STRUCTS_H
+
+struct malloc_mutex_s {
+#ifdef _WIN32
+#  if _WIN32_WINNT >= 0x0600
+	SRWLOCK         	lock;
+#  else
+	CRITICAL_SECTION	lock;
+#  endif
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+	os_unfair_lock		lock;
+#elif (defined(JEMALLOC_OSSPIN))
+	OSSpinLock		lock;
+#elif (defined(JEMALLOC_MUTEX_INIT_CB))
+	pthread_mutex_t		lock;
+	malloc_mutex_t		*postponed_next;
+#else
+	pthread_mutex_t		lock;
+#endif
+	witness_t		witness;
+};
+
+#endif /* JEMALLOC_INTERNAL_MUTEX_STRUCTS_H */
diff --git a/include/jemalloc/internal/mutex_types.h b/include/jemalloc/internal/mutex_types.h
new file mode 100644
index 00000000..8c9f249d
--- /dev/null
+++ b/include/jemalloc/internal/mutex_types.h
@@ -0,0 +1,33 @@
+#ifndef JEMALLOC_INTERNAL_MUTEX_TYPES_H
+#define JEMALLOC_INTERNAL_MUTEX_TYPES_H
+
+typedef struct malloc_mutex_s malloc_mutex_t;
+
+#ifdef _WIN32
+#  define MALLOC_MUTEX_INITIALIZER
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+#  define MALLOC_MUTEX_INITIALIZER					\
+     {OS_UNFAIR_LOCK_INIT, WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
+#elif (defined(JEMALLOC_OSSPIN))
+#  define MALLOC_MUTEX_INITIALIZER					\
+     {0, WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
+#elif (defined(JEMALLOC_MUTEX_INIT_CB))
+#  define MALLOC_MUTEX_INITIALIZER					\
+    {PTHREAD_MUTEX_INITIALIZER, NULL,					\
+     WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
+#else
+#  if (defined(JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP) &&		\
+       defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP))
+#    define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_ADAPTIVE_NP
+#    define MALLOC_MUTEX_INITIALIZER					\
+       {PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP,				\
+        WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
+#  else
+#    define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT
+#    define MALLOC_MUTEX_INITIALIZER					\
+       {PTHREAD_MUTEX_INITIALIZER,					\
+        WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
+#  endif
+#endif
+
+#endif /* JEMALLOC_INTERNAL_MUTEX_TYPES_H */
diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime_externs.h
similarity index 53%
rename from include/jemalloc/internal/nstime.h
rename to include/jemalloc/internal/nstime_externs.h
index 93b27dc8..cf14ae0c 100644
--- a/include/jemalloc/internal/nstime.h
+++ b/include/jemalloc/internal/nstime_externs.h
@@ -1,22 +1,5 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-typedef struct nstime_s nstime_t;
-
-/* Maximum supported number of seconds (~584 years). */
-#define	NSTIME_SEC_MAX	KQU(18446744072)
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-struct nstime_s {
-	uint64_t	ns;
-};
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
+#ifndef JEMALLOC_INTERNAL_NSTIME_EXTERNS_H
+#define JEMALLOC_INTERNAL_NSTIME_EXTERNS_H
 
 void	nstime_init(nstime_t *time, uint64_t ns);
 void	nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec);
@@ -40,9 +23,4 @@ bool	nstime_monotonic(void);
 bool	nstime_update(nstime_t *time);
 #endif
 
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
+#endif /* JEMALLOC_INTERNAL_NSTIME_EXTERNS_H */
diff --git a/include/jemalloc/internal/nstime_structs.h b/include/jemalloc/internal/nstime_structs.h
new file mode 100644
index 00000000..a637f616
--- /dev/null
+++ b/include/jemalloc/internal/nstime_structs.h
@@ -0,0 +1,8 @@
+#ifndef JEMALLOC_INTERNAL_NSTIME_STRUCTS_H
+#define JEMALLOC_INTERNAL_NSTIME_STRUCTS_H
+
+struct nstime_s {
+	uint64_t	ns;
+};
+
+#endif /* JEMALLOC_INTERNAL_NSTIME_STRUCTS_H */
diff --git a/include/jemalloc/internal/nstime_types.h b/include/jemalloc/internal/nstime_types.h
new file mode 100644
index 00000000..861c5a8a
--- /dev/null
+++ b/include/jemalloc/internal/nstime_types.h
@@ -0,0 +1,9 @@
+#ifndef JEMALLOC_INTERNAL_NSTIME_TYPES_H
+#define JEMALLOC_INTERNAL_NSTIME_TYPES_H
+
+typedef struct nstime_s nstime_t;
+
+/* Maximum supported number of seconds (~584 years). */
+#define	NSTIME_SEC_MAX	KQU(18446744072)
+
+#endif /* JEMALLOC_INTERNAL_NSTIME_TYPES_H */
diff --git a/include/jemalloc/internal/pages_externs.h b/include/jemalloc/internal/pages_externs.h
new file mode 100644
index 00000000..7e34efb3
--- /dev/null
+++ b/include/jemalloc/internal/pages_externs.h
@@ -0,0 +1,31 @@
+#ifndef JEMALLOC_INTERNAL_PAGES_EXTERNS_H
+#define JEMALLOC_INTERNAL_PAGES_EXTERNS_H
+
+static const bool pages_can_purge_lazy =
+#ifdef PAGES_CAN_PURGE_LAZY
+    true
+#else
+    false
+#endif
+    ;
+static const bool pages_can_purge_forced =
+#ifdef PAGES_CAN_PURGE_FORCED
+    true
+#else
+    false
+#endif
+    ;
+
+void	*pages_map(void *addr, size_t size, bool *commit);
+void	pages_unmap(void *addr, size_t size);
+void	*pages_trim(void *addr, size_t alloc_size, size_t leadsize,
+    size_t size, bool *commit);
+bool	pages_commit(void *addr, size_t size);
+bool	pages_decommit(void *addr, size_t size);
+bool	pages_purge_lazy(void *addr, size_t size);
+bool	pages_purge_forced(void *addr, size_t size);
+bool	pages_huge(void *addr, size_t size);
+bool	pages_nohuge(void *addr, size_t size);
+void	pages_boot(void);
+
+#endif /* JEMALLOC_INTERNAL_PAGES_EXTERNS_H */
diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages_types.h
similarity index 54%
rename from include/jemalloc/internal/pages.h
rename to include/jemalloc/internal/pages_types.h
index 98e4f38a..be1e245f 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages_types.h
@@ -1,5 +1,5 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
+#ifndef JEMALLOC_INTERNAL_PAGES_TYPES_H
+#define JEMALLOC_INTERNAL_PAGES_TYPES_H
 
 /* Page size.  LG_PAGE is determined by the configure script. */
 #ifdef PAGE_MASK
@@ -41,45 +41,4 @@
 #  define PAGES_CAN_PURGE_FORCED
 #endif
 
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-static const bool pages_can_purge_lazy =
-#ifdef PAGES_CAN_PURGE_LAZY
-    true
-#else
-    false
-#endif
-    ;
-static const bool pages_can_purge_forced =
-#ifdef PAGES_CAN_PURGE_FORCED
-    true
-#else
-    false
-#endif
-    ;
-
-void	*pages_map(void *addr, size_t size, bool *commit);
-void	pages_unmap(void *addr, size_t size);
-void	*pages_trim(void *addr, size_t alloc_size, size_t leadsize,
-    size_t size, bool *commit);
-bool	pages_commit(void *addr, size_t size);
-bool	pages_decommit(void *addr, size_t size);
-bool	pages_purge_lazy(void *addr, size_t size);
-bool	pages_purge_forced(void *addr, size_t size);
-bool	pages_huge(void *addr, size_t size);
-bool	pages_nohuge(void *addr, size_t size);
-void	pages_boot(void);
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
-
+#endif /* JEMALLOC_INTERNAL_PAGES_TYPES_H */
diff --git a/include/jemalloc/internal/prng.h b/include/jemalloc/internal/prng_inlines.h
similarity index 71%
rename from include/jemalloc/internal/prng.h
rename to include/jemalloc/internal/prng_inlines.h
index 94fd55a7..b82a6620 100644
--- a/include/jemalloc/internal/prng.h
+++ b/include/jemalloc/internal/prng_inlines.h
@@ -1,42 +1,5 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-/*
- * Simple linear congruential pseudo-random number generator:
- *
- *   prng(y) = (a*x + c) % m
- *
- * where the following constants ensure maximal period:
- *
- *   a == Odd number (relatively prime to 2^n), and (a-1) is a multiple of 4.
- *   c == Odd number (relatively prime to 2^n).
- *   m == 2^32
- *
- * See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints.
- *
- * This choice of m has the disadvantage that the quality of the bits is
- * proportional to bit position.  For example, the lowest bit has a cycle of 2,
- * the next has a cycle of 4, etc.  For this reason, we prefer to use the upper
- * bits.
- */
-
-#define	PRNG_A_32	UINT32_C(1103515241)
-#define	PRNG_C_32	UINT32_C(12347)
-
-#define	PRNG_A_64	UINT64_C(6364136223846793005)
-#define	PRNG_C_64	UINT64_C(1442695040888963407)
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
+#ifndef JEMALLOC_INTERNAL_PRNG_INLINES_H
+#define JEMALLOC_INTERNAL_PRNG_INLINES_H
 
 #ifndef JEMALLOC_ENABLE_INLINE
 uint32_t	prng_state_next_u32(uint32_t state);
@@ -203,5 +166,4 @@ prng_range_zu(size_t *state, size_t range, bool atomic)
 }
 #endif
 
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
+#endif /* JEMALLOC_INTERNAL_PRNG_INLINES_H */
diff --git a/include/jemalloc/internal/prng_types.h b/include/jemalloc/internal/prng_types.h
new file mode 100644
index 00000000..dec44c09
--- /dev/null
+++ b/include/jemalloc/internal/prng_types.h
@@ -0,0 +1,29 @@
+#ifndef JEMALLOC_INTERNAL_PRNG_TYPES_H
+#define JEMALLOC_INTERNAL_PRNG_TYPES_H
+
+/*
+ * Simple linear congruential pseudo-random number generator:
+ *
+ *   prng(y) = (a*x + c) % m
+ *
+ * where the following constants ensure maximal period:
+ *
+ *   a == Odd number (relatively prime to 2^n), and (a-1) is a multiple of 4.
+ *   c == Odd number (relatively prime to 2^n).
+ *   m == 2^32
+ *
+ * See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints.
+ *
+ * This choice of m has the disadvantage that the quality of the bits is
+ * proportional to bit position.  For example, the lowest bit has a cycle of 2,
+ * the next has a cycle of 4, etc.  For this reason, we prefer to use the upper
+ * bits.
+ */
+
+#define	PRNG_A_32	UINT32_C(1103515241)
+#define	PRNG_C_32	UINT32_C(12347)
+
+#define	PRNG_A_64	UINT64_C(6364136223846793005)
+#define	PRNG_C_64	UINT64_C(1442695040888963407)
+
+#endif /* JEMALLOC_INTERNAL_PRNG_TYPES_H */
diff --git a/include/jemalloc/internal/prof.h b/include/jemalloc/internal/prof.h
deleted file mode 100644
index 2d1791b9..00000000
--- a/include/jemalloc/internal/prof.h
+++ /dev/null
@@ -1,568 +0,0 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-typedef struct prof_bt_s prof_bt_t;
-typedef struct prof_cnt_s prof_cnt_t;
-typedef struct prof_tctx_s prof_tctx_t;
-typedef struct prof_gctx_s prof_gctx_t;
-typedef struct prof_tdata_s prof_tdata_t;
-
-/* Option defaults. */
-#ifdef JEMALLOC_PROF
-#  define PROF_PREFIX_DEFAULT		"jeprof"
-#else
-#  define PROF_PREFIX_DEFAULT		""
-#endif
-#define	LG_PROF_SAMPLE_DEFAULT		19
-#define	LG_PROF_INTERVAL_DEFAULT	-1
-
-/*
- * Hard limit on stack backtrace depth.  The version of prof_backtrace() that
- * is based on __builtin_return_address() necessarily has a hard-coded number
- * of backtrace frame handlers, and should be kept in sync with this setting.
- */
-#define	PROF_BT_MAX			128
-
-/* Initial hash table size. */
-#define	PROF_CKH_MINITEMS		64
-
-/* Size of memory buffer to use when writing dump files. */
-#define	PROF_DUMP_BUFSIZE		65536
-
-/* Size of stack-allocated buffer used by prof_printf(). */
-#define	PROF_PRINTF_BUFSIZE		128
-
-/*
- * Number of mutexes shared among all gctx's.  No space is allocated for these
- * unless profiling is enabled, so it's okay to over-provision.
- */
-#define	PROF_NCTX_LOCKS			1024
-
-/*
- * Number of mutexes shared among all tdata's.  No space is allocated for these
- * unless profiling is enabled, so it's okay to over-provision.
- */
-#define	PROF_NTDATA_LOCKS		256
-
-/*
- * prof_tdata pointers close to NULL are used to encode state information that
- * is used for cleaning up during thread shutdown.
- */
-#define	PROF_TDATA_STATE_REINCARNATED	((prof_tdata_t *)(uintptr_t)1)
-#define	PROF_TDATA_STATE_PURGATORY	((prof_tdata_t *)(uintptr_t)2)
-#define	PROF_TDATA_STATE_MAX		PROF_TDATA_STATE_PURGATORY
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-struct prof_bt_s {
-	/* Backtrace, stored as len program counters. */
-	void		**vec;
-	unsigned	len;
-};
-
-#ifdef JEMALLOC_PROF_LIBGCC
-/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
-typedef struct {
-	prof_bt_t	*bt;
-	unsigned	max;
-} prof_unwind_data_t;
-#endif
-
-struct prof_cnt_s {
-	/* Profiling counters. */
-	uint64_t	curobjs;
-	uint64_t	curbytes;
-	uint64_t	accumobjs;
-	uint64_t	accumbytes;
-};
-
-typedef enum {
-	prof_tctx_state_initializing,
-	prof_tctx_state_nominal,
-	prof_tctx_state_dumping,
-	prof_tctx_state_purgatory /* Dumper must finish destroying. */
-} prof_tctx_state_t;
-
-struct prof_tctx_s {
-	/* Thread data for thread that performed the allocation. */
-	prof_tdata_t		*tdata;
-
-	/*
-	 * Copy of tdata->thr_{uid,discrim}, necessary because tdata may be
-	 * defunct during teardown.
-	 */
-	uint64_t		thr_uid;
-	uint64_t		thr_discrim;
-
-	/* Profiling counters, protected by tdata->lock. */
-	prof_cnt_t		cnts;
-
-	/* Associated global context. */
-	prof_gctx_t		*gctx;
-
-	/*
-	 * UID that distinguishes multiple tctx's created by the same thread,
-	 * but coexisting in gctx->tctxs.  There are two ways that such
-	 * coexistence can occur:
-	 * - A dumper thread can cause a tctx to be retained in the purgatory
-	 *   state.
-	 * - Although a single "producer" thread must create all tctx's which
-	 *   share the same thr_uid, multiple "consumers" can each concurrently
-	 *   execute portions of prof_tctx_destroy().  prof_tctx_destroy() only
-	 *   gets called once each time cnts.cur{objs,bytes} drop to 0, but this
-	 *   threshold can be hit again before the first consumer finishes
-	 *   executing prof_tctx_destroy().
-	 */
-	uint64_t		tctx_uid;
-
-	/* Linkage into gctx's tctxs. */
-	rb_node(prof_tctx_t)	tctx_link;
-
-	/*
-	 * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents
-	 * sample vs destroy race.
-	 */
-	bool			prepared;
-
-	/* Current dump-related state, protected by gctx->lock. */
-	prof_tctx_state_t	state;
-
-	/*
-	 * Copy of cnts snapshotted during early dump phase, protected by
-	 * dump_mtx.
-	 */
-	prof_cnt_t		dump_cnts;
-};
-typedef rb_tree(prof_tctx_t) prof_tctx_tree_t;
-
-struct prof_gctx_s {
-	/* Protects nlimbo, cnt_summed, and tctxs. */
-	malloc_mutex_t		*lock;
-
-	/*
-	 * Number of threads that currently cause this gctx to be in a state of
-	 * limbo due to one of:
-	 *   - Initializing this gctx.
-	 *   - Initializing per thread counters associated with this gctx.
-	 *   - Preparing to destroy this gctx.
-	 *   - Dumping a heap profile that includes this gctx.
-	 * nlimbo must be 1 (single destroyer) in order to safely destroy the
-	 * gctx.
-	 */
-	unsigned		nlimbo;
-
-	/*
-	 * Tree of profile counters, one for each thread that has allocated in
-	 * this context.
-	 */
-	prof_tctx_tree_t	tctxs;
-
-	/* Linkage for tree of contexts to be dumped. */
-	rb_node(prof_gctx_t)	dump_link;
-
-	/* Temporary storage for summation during dump. */
-	prof_cnt_t		cnt_summed;
-
-	/* Associated backtrace. */
-	prof_bt_t		bt;
-
-	/* Backtrace vector, variable size, referred to by bt. */
-	void			*vec[1];
-};
-typedef rb_tree(prof_gctx_t) prof_gctx_tree_t;
-
-struct prof_tdata_s {
-	malloc_mutex_t		*lock;
-
-	/* Monotonically increasing unique thread identifier. */
-	uint64_t		thr_uid;
-
-	/*
-	 * Monotonically increasing discriminator among tdata structures
-	 * associated with the same thr_uid.
-	 */
-	uint64_t		thr_discrim;
-
-	/* Included in heap profile dumps if non-NULL. */
-	char			*thread_name;
-
-	bool			attached;
-	bool			expired;
-
-	rb_node(prof_tdata_t)	tdata_link;
-
-	/*
-	 * Counter used to initialize prof_tctx_t's tctx_uid.  No locking is
-	 * necessary when incrementing this field, because only one thread ever
-	 * does so.
-	 */
-	uint64_t		tctx_uid_next;
-
-	/*
-	 * Hash of (prof_bt_t *)-->(prof_tctx_t *).  Each thread tracks
-	 * backtraces for which it has non-zero allocation/deallocation counters
-	 * associated with thread-specific prof_tctx_t objects.  Other threads
-	 * may write to prof_tctx_t contents when freeing associated objects.
-	 */
-	ckh_t			bt2tctx;
-
-	/* Sampling state. */
-	uint64_t		prng_state;
-	uint64_t		bytes_until_sample;
-
-	/* State used to avoid dumping while operating on prof internals. */
-	bool			enq;
-	bool			enq_idump;
-	bool			enq_gdump;
-
-	/*
-	 * Set to true during an early dump phase for tdata's which are
-	 * currently being dumped.  New threads' tdata's have this initialized
-	 * to false so that they aren't accidentally included in later dump
-	 * phases.
-	 */
-	bool			dumping;
-
-	/*
-	 * True if profiling is active for this tdata's thread
-	 * (thread.prof.active mallctl).
-	 */
-	bool			active;
-
-	/* Temporary storage for summation during dump. */
-	prof_cnt_t		cnt_summed;
-
-	/* Backtrace vector, used for calls to prof_backtrace(). */
-	void			*vec[PROF_BT_MAX];
-};
-typedef rb_tree(prof_tdata_t) prof_tdata_tree_t;
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-extern bool	opt_prof;
-extern bool	opt_prof_active;
-extern bool	opt_prof_thread_active_init;
-extern size_t	opt_lg_prof_sample;   /* Mean bytes between samples. */
-extern ssize_t	opt_lg_prof_interval; /* lg(prof_interval). */
-extern bool	opt_prof_gdump;       /* High-water memory dumping. */
-extern bool	opt_prof_final;       /* Final profile dumping. */
-extern bool	opt_prof_leak;        /* Dump leak summary at exit. */
-extern bool	opt_prof_accum;       /* Report cumulative bytes. */
-extern char	opt_prof_prefix[
-    /* Minimize memory bloat for non-prof builds. */
-#ifdef JEMALLOC_PROF
-    PATH_MAX +
-#endif
-    1];
-
-/* Accessed via prof_active_[gs]et{_unlocked,}(). */
-extern bool	prof_active;
-
-/* Accessed via prof_gdump_[gs]et{_unlocked,}(). */
-extern bool	prof_gdump_val;
-
-/*
- * Profile dump interval, measured in bytes allocated.  Each arena triggers a
- * profile dump when it reaches this threshold.  The effect is that the
- * interval between profile dumps averages prof_interval, though the actual
- * interval between dumps will tend to be sporadic, and the interval will be a
- * maximum of approximately (prof_interval * narenas).
- */
-extern uint64_t	prof_interval;
-
-/*
- * Initialized as opt_lg_prof_sample, and potentially modified during profiling
- * resets.
- */
-extern size_t	lg_prof_sample;
-
-void	prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
-void	prof_malloc_sample_object(tsdn_t *tsdn, extent_t *extent,
-    const void *ptr, size_t usize, prof_tctx_t *tctx);
-void	prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx);
-void	bt_init(prof_bt_t *bt, void **vec);
-void	prof_backtrace(prof_bt_t *bt);
-prof_tctx_t	*prof_lookup(tsd_t *tsd, prof_bt_t *bt);
-#ifdef JEMALLOC_JET
-size_t	prof_tdata_count(void);
-size_t	prof_bt_count(void);
-const prof_cnt_t *prof_cnt_all(void);
-typedef int (prof_dump_open_t)(bool, const char *);
-extern prof_dump_open_t *prof_dump_open;
-typedef bool (prof_dump_header_t)(tsdn_t *, bool, const prof_cnt_t *);
-extern prof_dump_header_t *prof_dump_header;
-#endif
-void	prof_idump(tsdn_t *tsdn);
-bool	prof_mdump(tsd_t *tsd, const char *filename);
-void	prof_gdump(tsdn_t *tsdn);
-prof_tdata_t	*prof_tdata_init(tsd_t *tsd);
-prof_tdata_t	*prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
-void	prof_reset(tsd_t *tsd, size_t lg_sample);
-void	prof_tdata_cleanup(tsd_t *tsd);
-bool	prof_active_get(tsdn_t *tsdn);
-bool	prof_active_set(tsdn_t *tsdn, bool active);
-const char	*prof_thread_name_get(tsd_t *tsd);
-int	prof_thread_name_set(tsd_t *tsd, const char *thread_name);
-bool	prof_thread_active_get(tsd_t *tsd);
-bool	prof_thread_active_set(tsd_t *tsd, bool active);
-bool	prof_thread_active_init_get(tsdn_t *tsdn);
-bool	prof_thread_active_init_set(tsdn_t *tsdn, bool active_init);
-bool	prof_gdump_get(tsdn_t *tsdn);
-bool	prof_gdump_set(tsdn_t *tsdn, bool active);
-void	prof_boot0(void);
-void	prof_boot1(void);
-bool	prof_boot2(tsd_t *tsd);
-void	prof_prefork0(tsdn_t *tsdn);
-void	prof_prefork1(tsdn_t *tsdn);
-void	prof_postfork_parent(tsdn_t *tsdn);
-void	prof_postfork_child(tsdn_t *tsdn);
-void	prof_sample_threshold_update(prof_tdata_t *tdata);
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#ifndef JEMALLOC_ENABLE_INLINE
-bool	prof_active_get_unlocked(void);
-bool	prof_gdump_get_unlocked(void);
-prof_tdata_t	*prof_tdata_get(tsd_t *tsd, bool create);
-prof_tctx_t	*prof_tctx_get(tsdn_t *tsdn, const extent_t *extent,
-    const void *ptr);
-void	prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr,
-    size_t usize, prof_tctx_t *tctx);
-void	prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr,
-    prof_tctx_t *tctx);
-bool	prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
-    prof_tdata_t **tdata_out);
-prof_tctx_t	*prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active,
-    bool update);
-void	prof_malloc(tsdn_t *tsdn, extent_t *extent, const void *ptr,
-    size_t usize, prof_tctx_t *tctx);
-void	prof_realloc(tsd_t *tsd, extent_t *extent, const void *ptr,
-    size_t usize, prof_tctx_t *tctx, bool prof_active, bool updated,
-    extent_t *old_extent, const void *old_ptr, size_t old_usize,
-    prof_tctx_t *old_tctx);
-void	prof_free(tsd_t *tsd, const extent_t *extent, const void *ptr,
-    size_t usize);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
-JEMALLOC_ALWAYS_INLINE bool
-prof_active_get_unlocked(void)
-{
-
-	/*
-	 * Even if opt_prof is true, sampling can be temporarily disabled by
-	 * setting prof_active to false.  No locking is used when reading
-	 * prof_active in the fast path, so there are no guarantees regarding
-	 * how long it will take for all threads to notice state changes.
-	 */
-	return (prof_active);
-}
-
-JEMALLOC_ALWAYS_INLINE bool
-prof_gdump_get_unlocked(void)
-{
-
-	/*
-	 * No locking is used when reading prof_gdump_val in the fast path, so
-	 * there are no guarantees regarding how long it will take for all
-	 * threads to notice state changes.
-	 */
-	return (prof_gdump_val);
-}
-
-JEMALLOC_ALWAYS_INLINE prof_tdata_t *
-prof_tdata_get(tsd_t *tsd, bool create)
-{
-	prof_tdata_t *tdata;
-
-	cassert(config_prof);
-
-	tdata = tsd_prof_tdata_get(tsd);
-	if (create) {
-		if (unlikely(tdata == NULL)) {
-			if (tsd_nominal(tsd)) {
-				tdata = prof_tdata_init(tsd);
-				tsd_prof_tdata_set(tsd, tdata);
-			}
-		} else if (unlikely(tdata->expired)) {
-			tdata = prof_tdata_reinit(tsd, tdata);
-			tsd_prof_tdata_set(tsd, tdata);
-		}
-		assert(tdata == NULL || tdata->attached);
-	}
-
-	return (tdata);
-}
-
-JEMALLOC_ALWAYS_INLINE prof_tctx_t *
-prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, const void *ptr)
-{
-
-	cassert(config_prof);
-	assert(ptr != NULL);
-
-	return (arena_prof_tctx_get(tsdn, extent, ptr));
-}
-
-JEMALLOC_ALWAYS_INLINE void
-prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr, size_t usize,
-    prof_tctx_t *tctx)
-{
-
-	cassert(config_prof);
-	assert(ptr != NULL);
-
-	arena_prof_tctx_set(tsdn, extent, ptr, usize, tctx);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr,
-    prof_tctx_t *tctx)
-{
-
-	cassert(config_prof);
-	assert(ptr != NULL);
-
-	arena_prof_tctx_reset(tsdn, extent, ptr, tctx);
-}
-
-JEMALLOC_ALWAYS_INLINE bool
-prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
-    prof_tdata_t **tdata_out)
-{
-	prof_tdata_t *tdata;
-
-	cassert(config_prof);
-
-	tdata = prof_tdata_get(tsd, true);
-	if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX))
-		tdata = NULL;
-
-	if (tdata_out != NULL)
-		*tdata_out = tdata;
-
-	if (unlikely(tdata == NULL))
-		return (true);
-
-	if (likely(tdata->bytes_until_sample >= usize)) {
-		if (update)
-			tdata->bytes_until_sample -= usize;
-		return (true);
-	} else {
-		/* Compute new sample threshold. */
-		if (update)
-			prof_sample_threshold_update(tdata);
-		return (!tdata->active);
-	}
-}
-
-JEMALLOC_ALWAYS_INLINE prof_tctx_t *
-prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update)
-{
-	prof_tctx_t *ret;
-	prof_tdata_t *tdata;
-	prof_bt_t bt;
-
-	assert(usize == s2u(usize));
-
-	if (!prof_active || likely(prof_sample_accum_update(tsd, usize, update,
-	    &tdata)))
-		ret = (prof_tctx_t *)(uintptr_t)1U;
-	else {
-		bt_init(&bt, tdata->vec);
-		prof_backtrace(&bt);
-		ret = prof_lookup(tsd, &bt);
-	}
-
-	return (ret);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-prof_malloc(tsdn_t *tsdn, extent_t *extent, const void *ptr, size_t usize,
-    prof_tctx_t *tctx)
-{
-
-	cassert(config_prof);
-	assert(ptr != NULL);
-	assert(usize == isalloc(tsdn, extent, ptr));
-
-	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
-		prof_malloc_sample_object(tsdn, extent, ptr, usize, tctx);
-	else {
-		prof_tctx_set(tsdn, extent, ptr, usize,
-		    (prof_tctx_t *)(uintptr_t)1U);
-	}
-}
-
-JEMALLOC_ALWAYS_INLINE void
-prof_realloc(tsd_t *tsd, extent_t *extent, const void *ptr, size_t usize,
-    prof_tctx_t *tctx, bool prof_active, bool updated, extent_t *old_extent,
-    const void *old_ptr, size_t old_usize, prof_tctx_t *old_tctx)
-{
-	bool sampled, old_sampled, moved;
-
-	cassert(config_prof);
-	assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
-
-	if (prof_active && !updated && ptr != NULL) {
-		assert(usize == isalloc(tsd_tsdn(tsd), extent, ptr));
-		if (prof_sample_accum_update(tsd, usize, true, NULL)) {
-			/*
-			 * Don't sample.  The usize passed to prof_alloc_prep()
-			 * was larger than what actually got allocated, so a
-			 * backtrace was captured for this allocation, even
-			 * though its actual usize was insufficient to cross the
-			 * sample threshold.
-			 */
-			prof_alloc_rollback(tsd, tctx, true);
-			tctx = (prof_tctx_t *)(uintptr_t)1U;
-		}
-	}
-
-	/*
-	 * The following code must differentiate among eight possible cases,
-	 * based on three boolean conditions.
-	 */
-	sampled = ((uintptr_t)tctx > (uintptr_t)1U);
-	old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U);
-	moved = (ptr != old_ptr);
-
-	/*
-	 * The following block must only execute if this is a non-moving
-	 * reallocation, because for moving reallocation the old allocation will
-	 * be deallocated via a separate call.
-	 */
-	if (unlikely(old_sampled) && !moved)
-		prof_free_sampled_object(tsd, old_usize, old_tctx);
-
-	if (unlikely(sampled)) {
-		prof_malloc_sample_object(tsd_tsdn(tsd), extent, ptr, usize,
-		    tctx);
-	} else if (moved) {
-		prof_tctx_set(tsd_tsdn(tsd), extent, ptr, usize,
-		    (prof_tctx_t *)(uintptr_t)1U);
-	} else if (unlikely(old_sampled))
-		prof_tctx_reset(tsd_tsdn(tsd), extent, ptr, tctx);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-prof_free(tsd_t *tsd, const extent_t *extent, const void *ptr, size_t usize)
-{
-	prof_tctx_t *tctx = prof_tctx_get(tsd_tsdn(tsd), extent, ptr);
-
-	cassert(config_prof);
-	assert(usize == isalloc(tsd_tsdn(tsd), extent, ptr));
-
-	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
-		prof_free_sampled_object(tsd, usize, tctx);
-}
-#endif
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
new file mode 100644
index 00000000..3f857145
--- /dev/null
+++ b/include/jemalloc/internal/prof_externs.h
@@ -0,0 +1,83 @@
+#ifndef JEMALLOC_INTERNAL_PROF_EXTERNS_H
+#define JEMALLOC_INTERNAL_PROF_EXTERNS_H
+
+extern bool	opt_prof;
+extern bool	opt_prof_active;
+extern bool	opt_prof_thread_active_init;
+extern size_t	opt_lg_prof_sample;   /* Mean bytes between samples. */
+extern ssize_t	opt_lg_prof_interval; /* lg(prof_interval). */
+extern bool	opt_prof_gdump;       /* High-water memory dumping. */
+extern bool	opt_prof_final;       /* Final profile dumping. */
+extern bool	opt_prof_leak;        /* Dump leak summary at exit. */
+extern bool	opt_prof_accum;       /* Report cumulative bytes. */
+extern char	opt_prof_prefix[
+    /* Minimize memory bloat for non-prof builds. */
+#ifdef JEMALLOC_PROF
+    PATH_MAX +
+#endif
+    1];
+
+/* Accessed via prof_active_[gs]et{_unlocked,}(). */
+extern bool	prof_active;
+
+/* Accessed via prof_gdump_[gs]et{_unlocked,}(). */
+extern bool	prof_gdump_val;
+
+/*
+ * Profile dump interval, measured in bytes allocated.  Each arena triggers a
+ * profile dump when it reaches this threshold.  The effect is that the
+ * interval between profile dumps averages prof_interval, though the actual
+ * interval between dumps will tend to be sporadic, and the interval will be a
+ * maximum of approximately (prof_interval * narenas).
+ */
+extern uint64_t	prof_interval;
+
+/*
+ * Initialized as opt_lg_prof_sample, and potentially modified during profiling
+ * resets.
+ */
+extern size_t	lg_prof_sample;
+
+void	prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
+void	prof_malloc_sample_object(tsdn_t *tsdn, extent_t *extent,
+    const void *ptr, size_t usize, prof_tctx_t *tctx);
+void	prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx);
+void	bt_init(prof_bt_t *bt, void **vec);
+void	prof_backtrace(prof_bt_t *bt);
+prof_tctx_t	*prof_lookup(tsd_t *tsd, prof_bt_t *bt);
+#ifdef JEMALLOC_JET
+size_t	prof_tdata_count(void);
+size_t	prof_bt_count(void);
+const prof_cnt_t *prof_cnt_all(void);
+typedef int (prof_dump_open_t)(bool, const char *);
+extern prof_dump_open_t *prof_dump_open;
+typedef bool (prof_dump_header_t)(tsdn_t *, bool, const prof_cnt_t *);
+extern prof_dump_header_t *prof_dump_header;
+#endif
+void	prof_idump(tsdn_t *tsdn);
+bool	prof_mdump(tsd_t *tsd, const char *filename);
+void	prof_gdump(tsdn_t *tsdn);
+prof_tdata_t	*prof_tdata_init(tsd_t *tsd);
+prof_tdata_t	*prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
+void	prof_reset(tsd_t *tsd, size_t lg_sample);
+void	prof_tdata_cleanup(tsd_t *tsd);
+bool	prof_active_get(tsdn_t *tsdn);
+bool	prof_active_set(tsdn_t *tsdn, bool active);
+const char	*prof_thread_name_get(tsd_t *tsd);
+int	prof_thread_name_set(tsd_t *tsd, const char *thread_name);
+bool	prof_thread_active_get(tsd_t *tsd);
+bool	prof_thread_active_set(tsd_t *tsd, bool active);
+bool	prof_thread_active_init_get(tsdn_t *tsdn);
+bool	prof_thread_active_init_set(tsdn_t *tsdn, bool active_init);
+bool	prof_gdump_get(tsdn_t *tsdn);
+bool	prof_gdump_set(tsdn_t *tsdn, bool active);
+void	prof_boot0(void);
+void	prof_boot1(void);
+bool	prof_boot2(tsd_t *tsd);
+void	prof_prefork0(tsdn_t *tsdn);
+void	prof_prefork1(tsdn_t *tsdn);
+void	prof_postfork_parent(tsdn_t *tsdn);
+void	prof_postfork_child(tsdn_t *tsdn);
+void	prof_sample_threshold_update(prof_tdata_t *tdata);
+
+#endif /* JEMALLOC_INTERNAL_PROF_EXTERNS_H */
diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h
new file mode 100644
index 00000000..0b580425
--- /dev/null
+++ b/include/jemalloc/internal/prof_inlines.h
@@ -0,0 +1,242 @@
+#ifndef JEMALLOC_INTERNAL_PROF_INLINES_H
+#define JEMALLOC_INTERNAL_PROF_INLINES_H
+
+#ifndef JEMALLOC_ENABLE_INLINE
+bool	prof_active_get_unlocked(void);
+bool	prof_gdump_get_unlocked(void);
+prof_tdata_t	*prof_tdata_get(tsd_t *tsd, bool create);
+prof_tctx_t	*prof_tctx_get(tsdn_t *tsdn, const extent_t *extent,
+    const void *ptr);
+void	prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr,
+    size_t usize, prof_tctx_t *tctx);
+void	prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr,
+    prof_tctx_t *tctx);
+bool	prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
+    prof_tdata_t **tdata_out);
+prof_tctx_t	*prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active,
+    bool update);
+void	prof_malloc(tsdn_t *tsdn, extent_t *extent, const void *ptr,
+    size_t usize, prof_tctx_t *tctx);
+void	prof_realloc(tsd_t *tsd, extent_t *extent, const void *ptr,
+    size_t usize, prof_tctx_t *tctx, bool prof_active, bool updated,
+    extent_t *old_extent, const void *old_ptr, size_t old_usize,
+    prof_tctx_t *old_tctx);
+void	prof_free(tsd_t *tsd, const extent_t *extent, const void *ptr,
+    size_t usize);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
+JEMALLOC_ALWAYS_INLINE bool
+prof_active_get_unlocked(void)
+{
+
+	/*
+	 * Even if opt_prof is true, sampling can be temporarily disabled by
+	 * setting prof_active to false.  No locking is used when reading
+	 * prof_active in the fast path, so there are no guarantees regarding
+	 * how long it will take for all threads to notice state changes.
+	 */
+	return (prof_active);
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+prof_gdump_get_unlocked(void)
+{
+
+	/*
+	 * No locking is used when reading prof_gdump_val in the fast path, so
+	 * there are no guarantees regarding how long it will take for all
+	 * threads to notice state changes.
+	 */
+	return (prof_gdump_val);
+}
+
+JEMALLOC_ALWAYS_INLINE prof_tdata_t *
+prof_tdata_get(tsd_t *tsd, bool create)
+{
+	prof_tdata_t *tdata;
+
+	cassert(config_prof);
+
+	tdata = tsd_prof_tdata_get(tsd);
+	if (create) {
+		if (unlikely(tdata == NULL)) {
+			if (tsd_nominal(tsd)) {
+				tdata = prof_tdata_init(tsd);
+				tsd_prof_tdata_set(tsd, tdata);
+			}
+		} else if (unlikely(tdata->expired)) {
+			tdata = prof_tdata_reinit(tsd, tdata);
+			tsd_prof_tdata_set(tsd, tdata);
+		}
+		assert(tdata == NULL || tdata->attached);
+	}
+
+	return (tdata);
+}
+
+JEMALLOC_ALWAYS_INLINE prof_tctx_t *
+prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, const void *ptr)
+{
+
+	cassert(config_prof);
+	assert(ptr != NULL);
+
+	return (arena_prof_tctx_get(tsdn, extent, ptr));
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr, size_t usize,
+    prof_tctx_t *tctx)
+{
+
+	cassert(config_prof);
+	assert(ptr != NULL);
+
+	arena_prof_tctx_set(tsdn, extent, ptr, usize, tctx);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr,
+    prof_tctx_t *tctx)
+{
+
+	cassert(config_prof);
+	assert(ptr != NULL);
+
+	arena_prof_tctx_reset(tsdn, extent, ptr, tctx);
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
+    prof_tdata_t **tdata_out)
+{
+	prof_tdata_t *tdata;
+
+	cassert(config_prof);
+
+	tdata = prof_tdata_get(tsd, true);
+	if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX))
+		tdata = NULL;
+
+	if (tdata_out != NULL)
+		*tdata_out = tdata;
+
+	if (unlikely(tdata == NULL))
+		return (true);
+
+	if (likely(tdata->bytes_until_sample >= usize)) {
+		if (update)
+			tdata->bytes_until_sample -= usize;
+		return (true);
+	} else {
+		/* Compute new sample threshold. */
+		if (update)
+			prof_sample_threshold_update(tdata);
+		return (!tdata->active);
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE prof_tctx_t *
+prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update)
+{
+	prof_tctx_t *ret;
+	prof_tdata_t *tdata;
+	prof_bt_t bt;
+
+	assert(usize == s2u(usize));
+
+	if (!prof_active || likely(prof_sample_accum_update(tsd, usize, update,
+	    &tdata)))
+		ret = (prof_tctx_t *)(uintptr_t)1U;
+	else {
+		bt_init(&bt, tdata->vec);
+		prof_backtrace(&bt);
+		ret = prof_lookup(tsd, &bt);
+	}
+
+	return (ret);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_malloc(tsdn_t *tsdn, extent_t *extent, const void *ptr, size_t usize,
+    prof_tctx_t *tctx)
+{
+
+	cassert(config_prof);
+	assert(ptr != NULL);
+	assert(usize == isalloc(tsdn, extent, ptr));
+
+	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
+		prof_malloc_sample_object(tsdn, extent, ptr, usize, tctx);
+	else {
+		prof_tctx_set(tsdn, extent, ptr, usize,
+		    (prof_tctx_t *)(uintptr_t)1U);
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_realloc(tsd_t *tsd, extent_t *extent, const void *ptr, size_t usize,
+    prof_tctx_t *tctx, bool prof_active, bool updated, extent_t *old_extent,
+    const void *old_ptr, size_t old_usize, prof_tctx_t *old_tctx)
+{
+	bool sampled, old_sampled, moved;
+
+	cassert(config_prof);
+	assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
+
+	if (prof_active && !updated && ptr != NULL) {
+		assert(usize == isalloc(tsd_tsdn(tsd), extent, ptr));
+		if (prof_sample_accum_update(tsd, usize, true, NULL)) {
+			/*
+			 * Don't sample.  The usize passed to prof_alloc_prep()
+			 * was larger than what actually got allocated, so a
+			 * backtrace was captured for this allocation, even
+			 * though its actual usize was insufficient to cross the
+			 * sample threshold.
+			 */
+			prof_alloc_rollback(tsd, tctx, true);
+			tctx = (prof_tctx_t *)(uintptr_t)1U;
+		}
+	}
+
+	/*
+	 * The following code must differentiate among eight possible cases,
+	 * based on three boolean conditions.
+	 */
+	sampled = ((uintptr_t)tctx > (uintptr_t)1U);
+	old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U);
+	moved = (ptr != old_ptr);
+
+	/*
+	 * The following block must only execute if this is a non-moving
+	 * reallocation, because for moving reallocation the old allocation will
+	 * be deallocated via a separate call.
+	 */
+	if (unlikely(old_sampled) && !moved)
+		prof_free_sampled_object(tsd, old_usize, old_tctx);
+
+	if (unlikely(sampled)) {
+		prof_malloc_sample_object(tsd_tsdn(tsd), extent, ptr, usize,
+		    tctx);
+	} else if (moved) {
+		prof_tctx_set(tsd_tsdn(tsd), extent, ptr, usize,
+		    (prof_tctx_t *)(uintptr_t)1U);
+	} else if (unlikely(old_sampled))
+		prof_tctx_reset(tsd_tsdn(tsd), extent, ptr, tctx);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_free(tsd_t *tsd, const extent_t *extent, const void *ptr, size_t usize)
+{
+	prof_tctx_t *tctx = prof_tctx_get(tsd_tsdn(tsd), extent, ptr);
+
+	cassert(config_prof);
+	assert(usize == isalloc(tsd_tsdn(tsd), extent, ptr));
+
+	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
+		prof_free_sampled_object(tsd, usize, tctx);
+}
+#endif
+
+#endif /* JEMALLOC_INTERNAL_PROF_INLINES_H */
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
new file mode 100644
index 00000000..caae1257
--- /dev/null
+++ b/include/jemalloc/internal/prof_structs.h
@@ -0,0 +1,187 @@
+#ifndef JEMALLOC_INTERNAL_PROF_STRUCTS_H
+#define JEMALLOC_INTERNAL_PROF_STRUCTS_H
+
+struct prof_bt_s {
+	/* Backtrace, stored as len program counters. */
+	void		**vec;
+	unsigned	len;
+};
+
+#ifdef JEMALLOC_PROF_LIBGCC
+/* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
+typedef struct {
+	prof_bt_t	*bt;
+	unsigned	max;
+} prof_unwind_data_t;
+#endif
+
+struct prof_cnt_s {
+	/* Profiling counters. */
+	uint64_t	curobjs;
+	uint64_t	curbytes;
+	uint64_t	accumobjs;
+	uint64_t	accumbytes;
+};
+
+typedef enum {
+	prof_tctx_state_initializing,
+	prof_tctx_state_nominal,
+	prof_tctx_state_dumping,
+	prof_tctx_state_purgatory /* Dumper must finish destroying. */
+} prof_tctx_state_t;
+
+struct prof_tctx_s {
+	/* Thread data for thread that performed the allocation. */
+	prof_tdata_t		*tdata;
+
+	/*
+	 * Copy of tdata->thr_{uid,discrim}, necessary because tdata may be
+	 * defunct during teardown.
+	 */
+	uint64_t		thr_uid;
+	uint64_t		thr_discrim;
+
+	/* Profiling counters, protected by tdata->lock. */
+	prof_cnt_t		cnts;
+
+	/* Associated global context. */
+	prof_gctx_t		*gctx;
+
+	/*
+	 * UID that distinguishes multiple tctx's created by the same thread,
+	 * but coexisting in gctx->tctxs.  There are two ways that such
+	 * coexistence can occur:
+	 * - A dumper thread can cause a tctx to be retained in the purgatory
+	 *   state.
+	 * - Although a single "producer" thread must create all tctx's which
+	 *   share the same thr_uid, multiple "consumers" can each concurrently
+	 *   execute portions of prof_tctx_destroy().  prof_tctx_destroy() only
+	 *   gets called once each time cnts.cur{objs,bytes} drop to 0, but this
+	 *   threshold can be hit again before the first consumer finishes
+	 *   executing prof_tctx_destroy().
+	 */
+	uint64_t		tctx_uid;
+
+	/* Linkage into gctx's tctxs. */
+	rb_node(prof_tctx_t)	tctx_link;
+
+	/*
+	 * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents
+	 * sample vs destroy race.
+	 */
+	bool			prepared;
+
+	/* Current dump-related state, protected by gctx->lock. */
+	prof_tctx_state_t	state;
+
+	/*
+	 * Copy of cnts snapshotted during early dump phase, protected by
+	 * dump_mtx.
+	 */
+	prof_cnt_t		dump_cnts;
+};
+typedef rb_tree(prof_tctx_t) prof_tctx_tree_t;
+
+struct prof_gctx_s {
+	/* Protects nlimbo, cnt_summed, and tctxs. */
+	malloc_mutex_t		*lock;
+
+	/*
+	 * Number of threads that currently cause this gctx to be in a state of
+	 * limbo due to one of:
+	 *   - Initializing this gctx.
+	 *   - Initializing per thread counters associated with this gctx.
+	 *   - Preparing to destroy this gctx.
+	 *   - Dumping a heap profile that includes this gctx.
+	 * nlimbo must be 1 (single destroyer) in order to safely destroy the
+	 * gctx.
+	 */
+	unsigned		nlimbo;
+
+	/*
+	 * Tree of profile counters, one for each thread that has allocated in
+	 * this context.
+	 */
+	prof_tctx_tree_t	tctxs;
+
+	/* Linkage for tree of contexts to be dumped. */
+	rb_node(prof_gctx_t)	dump_link;
+
+	/* Temporary storage for summation during dump. */
+	prof_cnt_t		cnt_summed;
+
+	/* Associated backtrace. */
+	prof_bt_t		bt;
+
+	/* Backtrace vector, variable size, referred to by bt. */
+	void			*vec[1];
+};
+typedef rb_tree(prof_gctx_t) prof_gctx_tree_t;
+
+struct prof_tdata_s {
+	malloc_mutex_t		*lock;
+
+	/* Monotonically increasing unique thread identifier. */
+	uint64_t		thr_uid;
+
+	/*
+	 * Monotonically increasing discriminator among tdata structures
+	 * associated with the same thr_uid.
+	 */
+	uint64_t		thr_discrim;
+
+	/* Included in heap profile dumps if non-NULL. */
+	char			*thread_name;
+
+	bool			attached;
+	bool			expired;
+
+	rb_node(prof_tdata_t)	tdata_link;
+
+	/*
+	 * Counter used to initialize prof_tctx_t's tctx_uid.  No locking is
+	 * necessary when incrementing this field, because only one thread ever
+	 * does so.
+	 */
+	uint64_t		tctx_uid_next;
+
+	/*
+	 * Hash of (prof_bt_t *)-->(prof_tctx_t *).  Each thread tracks
+	 * backtraces for which it has non-zero allocation/deallocation counters
+	 * associated with thread-specific prof_tctx_t objects.  Other threads
+	 * may write to prof_tctx_t contents when freeing associated objects.
+	 */
+	ckh_t			bt2tctx;
+
+	/* Sampling state. */
+	uint64_t		prng_state;
+	uint64_t		bytes_until_sample;
+
+	/* State used to avoid dumping while operating on prof internals. */
+	bool			enq;
+	bool			enq_idump;
+	bool			enq_gdump;
+
+	/*
+	 * Set to true during an early dump phase for tdata's which are
+	 * currently being dumped.  New threads' tdata's have this initialized
+	 * to false so that they aren't accidentally included in later dump
+	 * phases.
+	 */
+	bool			dumping;
+
+	/*
+	 * True if profiling is active for this tdata's thread
+	 * (thread.prof.active mallctl).
+	 */
+	bool			active;
+
+	/* Temporary storage for summation during dump. */
+	prof_cnt_t		cnt_summed;
+
+	/* Backtrace vector, used for calls to prof_backtrace(). */
+	void			*vec[PROF_BT_MAX];
+};
+typedef rb_tree(prof_tdata_t) prof_tdata_tree_t;
+
+#endif /* JEMALLOC_INTERNAL_PROF_STRUCTS_H */
diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h
new file mode 100644
index 00000000..e1eb7fb1
--- /dev/null
+++ b/include/jemalloc/internal/prof_types.h
@@ -0,0 +1,55 @@
+#ifndef JEMALLOC_INTERNAL_PROF_TYPES_H
+#define JEMALLOC_INTERNAL_PROF_TYPES_H
+
+typedef struct prof_bt_s prof_bt_t;
+typedef struct prof_cnt_s prof_cnt_t;
+typedef struct prof_tctx_s prof_tctx_t;
+typedef struct prof_gctx_s prof_gctx_t;
+typedef struct prof_tdata_s prof_tdata_t;
+
+/* Option defaults. */
+#ifdef JEMALLOC_PROF
+#  define PROF_PREFIX_DEFAULT		"jeprof"
+#else
+#  define PROF_PREFIX_DEFAULT		""
+#endif
+#define	LG_PROF_SAMPLE_DEFAULT		19
+#define	LG_PROF_INTERVAL_DEFAULT	-1
+
+/*
+ * Hard limit on stack backtrace depth.  The version of prof_backtrace() that
+ * is based on __builtin_return_address() necessarily has a hard-coded number
+ * of backtrace frame handlers, and should be kept in sync with this setting.
+ */
+#define	PROF_BT_MAX			128
+
+/* Initial hash table size. */
+#define	PROF_CKH_MINITEMS		64
+
+/* Size of memory buffer to use when writing dump files. */
+#define	PROF_DUMP_BUFSIZE		65536
+
+/* Size of stack-allocated buffer used by prof_printf(). */
+#define	PROF_PRINTF_BUFSIZE		128
+
+/*
+ * Number of mutexes shared among all gctx's.  No space is allocated for these
+ * unless profiling is enabled, so it's okay to over-provision.
+ */
+#define	PROF_NCTX_LOCKS			1024
+
+/*
+ * Number of mutexes shared among all tdata's.  No space is allocated for these
+ * unless profiling is enabled, so it's okay to over-provision.
+ */
+#define	PROF_NTDATA_LOCKS		256
+
+/*
+ * prof_tdata pointers close to NULL are used to encode state information that
+ * is used for cleaning up during thread shutdown.
+ */
+#define	PROF_TDATA_STATE_REINCARNATED	((prof_tdata_t *)(uintptr_t)1)
+#define	PROF_TDATA_STATE_PURGATORY	((prof_tdata_t *)(uintptr_t)2)
+#define	PROF_TDATA_STATE_MAX		PROF_TDATA_STATE_PURGATORY
+
+#endif /* JEMALLOC_INTERNAL_PROF_TYPES_H */
diff --git a/include/jemalloc/internal/ql.h b/include/jemalloc/internal/ql.h
index 1834bb85..424485c4 100644
--- a/include/jemalloc/internal/ql.h
+++ b/include/jemalloc/internal/ql.h
@@ -1,3 +1,6 @@
+#ifndef JEMALLOC_INTERNAL_QL_H
+#define JEMALLOC_INTERNAL_QL_H
+
 /* List definitions. */
 #define	ql_head(a_type)							\
 struct {								\
@@ -79,3 +82,5 @@ struct {								\
 
 #define	ql_reverse_foreach(a_var, a_head, a_field)			\
 	qr_reverse_foreach((a_var), ql_first(a_head), a_field)
+
+#endif /* JEMALLOC_INTERNAL_QL_H */
diff --git a/include/jemalloc/internal/qr.h b/include/jemalloc/internal/qr.h
index 3b5d0276..06dfdafd 100644
--- a/include/jemalloc/internal/qr.h
+++ b/include/jemalloc/internal/qr.h
@@ -1,3 +1,6 @@
+#ifndef JEMALLOC_INTERNAL_QR_H
+#define JEMALLOC_INTERNAL_QR_H
+
 /* Ring definitions. */
 #define	qr(a_type)							\
 struct {								\
@@ -67,3 +70,5 @@ struct {								\
 	    (var) != NULL;						\
 	    (var) = (((var) != (a_qr))					\
 	    ? (var)->a_field.qre_prev : NULL))
+
+#endif /* JEMALLOC_INTERNAL_QR_H */
diff --git a/include/jemalloc/internal/rtree_externs.h b/include/jemalloc/internal/rtree_externs.h
new file mode 100644
index 00000000..db8e8b12
--- /dev/null
+++ b/include/jemalloc/internal/rtree_externs.h
@@ -0,0 +1,23 @@
+#ifndef JEMALLOC_INTERNAL_RTREE_EXTERNS_H
+#define JEMALLOC_INTERNAL_RTREE_EXTERNS_H
+
+bool rtree_new(rtree_t *rtree, unsigned bits);
+#ifdef JEMALLOC_JET
+typedef rtree_elm_t *(rtree_node_alloc_t)(tsdn_t *, rtree_t *, size_t);
+extern rtree_node_alloc_t *rtree_node_alloc;
+typedef void (rtree_node_dalloc_t)(tsdn_t *, rtree_t *, rtree_elm_t *);
+extern rtree_node_dalloc_t *rtree_node_dalloc;
+void	rtree_delete(tsdn_t *tsdn, rtree_t *rtree);
+#endif
+rtree_elm_t	*rtree_subtree_read_hard(tsdn_t *tsdn, rtree_t *rtree,
+    unsigned level);
+rtree_elm_t	*rtree_child_read_hard(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_elm_t *elm, unsigned level);
+void	rtree_elm_witness_acquire(tsdn_t *tsdn, const rtree_t *rtree,
+    uintptr_t key, const rtree_elm_t *elm);
+void	rtree_elm_witness_access(tsdn_t *tsdn, const rtree_t *rtree,
+    const rtree_elm_t *elm);
+void	rtree_elm_witness_release(tsdn_t *tsdn, const rtree_t *rtree,
+    const rtree_elm_t *elm);
+
+#endif /* JEMALLOC_INTERNAL_RTREE_EXTERNS_H */
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree_inlines.h
similarity index 68%
rename from include/jemalloc/internal/rtree.h
rename to include/jemalloc/internal/rtree_inlines.h
index b2a2800e..7efba54d 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree_inlines.h
@@ -1,170 +1,5 @@
-/*
- * This radix tree implementation is tailored to the singular purpose of
- * associating metadata with extents that are currently owned by jemalloc.
- *
- *******************************************************************************
- */
-#ifdef JEMALLOC_H_TYPES
-
-typedef struct rtree_elm_s rtree_elm_t;
-typedef struct rtree_elm_witness_s rtree_elm_witness_t;
-typedef struct rtree_elm_witness_tsd_s rtree_elm_witness_tsd_t;
-typedef struct rtree_level_s rtree_level_t;
-typedef struct rtree_ctx_s rtree_ctx_t;
-typedef struct rtree_s rtree_t;
-
-/*
- * RTREE_BITS_PER_LEVEL must be a power of two that is no larger than the
- * machine address width.
- */
-#define	LG_RTREE_BITS_PER_LEVEL	4
-#define	RTREE_BITS_PER_LEVEL	(1U << LG_RTREE_BITS_PER_LEVEL)
-/* Maximum rtree height. */
-#define	RTREE_HEIGHT_MAX						\
-    ((1U << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL)
-
-#define	RTREE_CTX_INITIALIZER	{					\
-	false,								\
-	0,								\
-	0,								\
-	{NULL /* C initializes all trailing elements to NULL. */}	\
-}
-
-/*
- * Maximum number of concurrently acquired elements per thread.  This controls
- * how many witness_t structures are embedded in tsd.  Ideally rtree_elm_t would
- * have a witness_t directly embedded, but that would dramatically bloat the
- * tree.  This must contain enough entries to e.g. coalesce two extents.
- */
-#define	RTREE_ELM_ACQUIRE_MAX	4
-
-/* Initializers for rtree_elm_witness_tsd_t. */
-#define	RTREE_ELM_WITNESS_INITIALIZER {					\
-	NULL,								\
-	WITNESS_INITIALIZER("rtree_elm", WITNESS_RANK_RTREE_ELM)	\
-}
-
-#define	RTREE_ELM_WITNESS_TSD_INITIALIZER {				\
-	{								\
-		RTREE_ELM_WITNESS_INITIALIZER,				\
-		RTREE_ELM_WITNESS_INITIALIZER,				\
-		RTREE_ELM_WITNESS_INITIALIZER,				\
-		RTREE_ELM_WITNESS_INITIALIZER				\
-	}								\
-}
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-struct rtree_elm_s {
-	union {
-		void		*pun;
-		rtree_elm_t	*child;
-		extent_t	*extent;
-	};
-};
-
-struct rtree_elm_witness_s {
-	const rtree_elm_t	*elm;
-	witness_t		witness;
-};
-
-struct rtree_elm_witness_tsd_s {
-	rtree_elm_witness_t	witnesses[RTREE_ELM_ACQUIRE_MAX];
-};
-
-struct rtree_level_s {
-	/*
-	 * A non-NULL subtree points to a subtree rooted along the hypothetical
-	 * path to the leaf node corresponding to key 0.  Depending on what keys
-	 * have been used to store to the tree, an arbitrary combination of
-	 * subtree pointers may remain NULL.
-	 *
-	 * Suppose keys comprise 48 bits, and LG_RTREE_BITS_PER_LEVEL is 4.
-	 * This results in a 3-level tree, and the leftmost leaf can be directly
-	 * accessed via levels[2], the subtree prefixed by 0x0000 (excluding
-	 * 0x00000000) can be accessed via levels[1], and the remainder of the
-	 * tree can be accessed via levels[0].
-	 *
-	 *   levels[0] : [<unused> | 0x0001******** | 0x0002******** | ...]
-	 *
-	 *   levels[1] : [<unused> | 0x00000001**** | 0x00000002**** | ... ]
-	 *
-	 *   levels[2] : [extent(0x000000000000) | extent(0x000000000001) | ...]
-	 *
-	 * This has practical implications on x64, which currently uses only the
-	 * lower 47 bits of virtual address space in userland, thus leaving
-	 * levels[0] unused and avoiding a level of tree traversal.
-	 */
-	union {
-		void		*subtree_pun;
-		rtree_elm_t	*subtree;
-	};
-	/* Number of key bits distinguished by this level. */
-	unsigned		bits;
-	/*
-	 * Cumulative number of key bits distinguished by traversing to
-	 * corresponding tree level.
-	 */
-	unsigned		cumbits;
-};
-
-struct rtree_ctx_s {
-	/* If false, key/elms have not yet been initialized by a lookup. */
-	bool		valid;
-	/* Key that corresponds to the tree path recorded in elms. */
-	uintptr_t	key;
-	/* Memoized rtree_start_level(key). */
-	unsigned	start_level;
-	/*
-	 * A path through rtree, driven by key.  Only elements that could
-	 * actually be used for subsequent lookups are initialized, i.e. if
-	 * start_level = rtree_start_level(key) is non-zero, the first
-	 * start_level elements are uninitialized.  The last element contains a
-	 * pointer to the leaf node element that corresponds to key, so that
-	 * exact matches require no tree node offset computation.
-	 */
-	rtree_elm_t	*elms[RTREE_HEIGHT_MAX + 1];
-};
-
-struct rtree_s {
-	unsigned		height;
-	/*
-	 * Precomputed table used to convert from the number of leading 0 key
-	 * bits to which subtree level to start at.
-	 */
-	unsigned		start_level[RTREE_HEIGHT_MAX + 1];
-	rtree_level_t		levels[RTREE_HEIGHT_MAX];
-	malloc_mutex_t		init_lock;
-};
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-bool rtree_new(rtree_t *rtree, unsigned bits);
-#ifdef JEMALLOC_JET
-typedef rtree_elm_t *(rtree_node_alloc_t)(tsdn_t *, rtree_t *, size_t);
-extern rtree_node_alloc_t *rtree_node_alloc;
-typedef void (rtree_node_dalloc_t)(tsdn_t *, rtree_t *, rtree_elm_t *);
-extern rtree_node_dalloc_t *rtree_node_dalloc;
-void	rtree_delete(tsdn_t *tsdn, rtree_t *rtree);
-#endif
-rtree_elm_t	*rtree_subtree_read_hard(tsdn_t *tsdn, rtree_t *rtree,
-    unsigned level);
-rtree_elm_t	*rtree_child_read_hard(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_elm_t *elm, unsigned level);
-void	rtree_elm_witness_acquire(tsdn_t *tsdn, const rtree_t *rtree,
-    uintptr_t key, const rtree_elm_t *elm);
-void	rtree_elm_witness_access(tsdn_t *tsdn, const rtree_t *rtree,
-    const rtree_elm_t *elm);
-void	rtree_elm_witness_release(tsdn_t *tsdn, const rtree_t *rtree,
-    const rtree_elm_t *elm);
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
+#ifndef JEMALLOC_INTERNAL_RTREE_INLINES_H
+#define JEMALLOC_INTERNAL_RTREE_INLINES_H
 
 #ifndef JEMALLOC_ENABLE_INLINE
 unsigned	rtree_start_level(const rtree_t *rtree, uintptr_t key);
@@ -604,5 +439,4 @@ rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key)
 }
 #endif
 
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
+#endif /* JEMALLOC_INTERNAL_RTREE_INLINES_H */
diff --git a/include/jemalloc/internal/rtree_structs.h b/include/jemalloc/internal/rtree_structs.h
new file mode 100644
index 00000000..5a7a23c7
--- /dev/null
+++ b/include/jemalloc/internal/rtree_structs.h
@@ -0,0 +1,86 @@
+#ifndef JEMALLOC_INTERNAL_RTREE_STRUCTS_H
+#define JEMALLOC_INTERNAL_RTREE_STRUCTS_H
+
+struct rtree_elm_s {
+	union {
+		void		*pun;
+		rtree_elm_t	*child;
+		extent_t	*extent;
+	};
+};
+
+struct rtree_elm_witness_s {
+	const rtree_elm_t	*elm;
+	witness_t		witness;
+};
+
+struct rtree_elm_witness_tsd_s {
+	rtree_elm_witness_t	witnesses[RTREE_ELM_ACQUIRE_MAX];
+};
+
+struct rtree_level_s {
+	/*
+	 * A non-NULL subtree points to a subtree rooted along the hypothetical
+	 * path to the leaf node corresponding to key 0.  Depending on what keys
+	 * have been used to store to the tree, an arbitrary combination of
+	 * subtree pointers may remain NULL.
+	 *
+	 * Suppose keys comprise 48 bits, and LG_RTREE_BITS_PER_LEVEL is 4.
+	 * This results in a 3-level tree, and the leftmost leaf can be directly
+	 * accessed via levels[2], the subtree prefixed by 0x0000 (excluding
+	 * 0x00000000) can be accessed via levels[1], and the remainder of the
+	 * tree can be accessed via levels[0].
+	 *
+	 *   levels[0] : [<unused> | 0x0001******** | 0x0002******** | ...]
+	 *
+	 *   levels[1] : [<unused> | 0x00000001**** | 0x00000002**** | ... ]
+	 *
+	 *   levels[2] : [extent(0x000000000000) | extent(0x000000000001) | ...]
+	 *
+	 * This has practical implications on x64, which currently uses only the
+	 * lower 47 bits of virtual address space in userland, thus leaving
+	 * levels[0] unused and avoiding a level of tree traversal.
+	 */
+	union {
+		void		*subtree_pun;
+		rtree_elm_t	*subtree;
+	};
+	/* Number of key bits distinguished by this level. */
+	unsigned		bits;
+	/*
+	 * Cumulative number of key bits distinguished by traversing to
+	 * corresponding tree level.
+	 */
+	unsigned		cumbits;
+};
+
+struct rtree_ctx_s {
+	/* If false, key/elms have not yet been initialized by a lookup. */
+	bool		valid;
+	/* Key that corresponds to the tree path recorded in elms. */
+	uintptr_t	key;
+	/* Memoized rtree_start_level(key). */
+	unsigned	start_level;
+	/*
+	 * A path through rtree, driven by key.  Only elements that could
+	 * actually be used for subsequent lookups are initialized, i.e. if
+	 * start_level = rtree_start_level(key) is non-zero, the first
+	 * start_level elements are uninitialized.  The last element contains a
+	 * pointer to the leaf node element that corresponds to key, so that
+	 * exact matches require no tree node offset computation.
+	 */
+	rtree_elm_t	*elms[RTREE_HEIGHT_MAX + 1];
+};
+
+struct rtree_s {
+	unsigned		height;
+	/*
+	 * Precomputed table used to convert from the number of leading 0 key
+	 * bits to which subtree level to start at.
+	 */
+	unsigned		start_level[RTREE_HEIGHT_MAX + 1];
+	rtree_level_t		levels[RTREE_HEIGHT_MAX];
+	malloc_mutex_t		init_lock;
+};
+
+#endif /* JEMALLOC_INTERNAL_RTREE_STRUCTS_H */
diff --git a/include/jemalloc/internal/rtree_types.h b/include/jemalloc/internal/rtree_types.h
new file mode 100644
index 00000000..c02ab7a1
--- /dev/null
+++ b/include/jemalloc/internal/rtree_types.h
@@ -0,0 +1,58 @@
+#ifndef JEMALLOC_INTERNAL_RTREE_TYPES_H
+#define JEMALLOC_INTERNAL_RTREE_TYPES_H
+
+/*
+ * This radix tree implementation is tailored to the singular purpose of
+ * associating metadata with extents that are currently owned by jemalloc.
+ *
+ *******************************************************************************
+ */
+
+typedef struct rtree_elm_s rtree_elm_t;
+typedef struct rtree_elm_witness_s rtree_elm_witness_t;
+typedef struct rtree_elm_witness_tsd_s rtree_elm_witness_tsd_t;
+typedef struct rtree_level_s rtree_level_t;
+typedef struct rtree_ctx_s rtree_ctx_t;
+typedef struct rtree_s rtree_t;
+
+/*
+ * RTREE_BITS_PER_LEVEL must be a power of two that is no larger than the
+ * machine address width.
+ */
+#define	LG_RTREE_BITS_PER_LEVEL	4
+#define	RTREE_BITS_PER_LEVEL	(1U << LG_RTREE_BITS_PER_LEVEL)
+/* Maximum rtree height. */
+#define	RTREE_HEIGHT_MAX						\
+    ((1U << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL)
+
+#define	RTREE_CTX_INITIALIZER	{					\
+	false,								\
+	0,								\
+	0,								\
+	{NULL /* C initializes all trailing elements to NULL. */}	\
+}
+
+/*
+ * Maximum number of concurrently acquired elements per thread.  This controls
+ * how many witness_t structures are embedded in tsd.  Ideally rtree_elm_t would
+ * have a witness_t directly embedded, but that would dramatically bloat the
+ * tree.  This must contain enough entries to e.g. coalesce two extents.
+ */
+#define	RTREE_ELM_ACQUIRE_MAX	4
+
+/* Initializers for rtree_elm_witness_tsd_t. */
+#define	RTREE_ELM_WITNESS_INITIALIZER {					\
+	NULL,								\
+	WITNESS_INITIALIZER("rtree_elm", WITNESS_RANK_RTREE_ELM)	\
+}
+
+#define	RTREE_ELM_WITNESS_TSD_INITIALIZER {				\
+	{								\
+		RTREE_ELM_WITNESS_INITIALIZER,				\
+		RTREE_ELM_WITNESS_INITIALIZER,				\
+		RTREE_ELM_WITNESS_INITIALIZER,				\
+		RTREE_ELM_WITNESS_INITIALIZER				\
+	}								\
+}
+
+#endif /* JEMALLOC_INTERNAL_RTREE_TYPES_H */
diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh
index 5a57f87d..3680b653 100755
--- a/include/jemalloc/internal/size_classes.sh
+++ b/include/jemalloc/internal/size_classes.sh
@@ -261,9 +261,10 @@ size_classes() {
 }
 
 cat <<EOF
+#ifndef JEMALLOC_INTERNAL_SIZE_CLASSES_H
+#define JEMALLOC_INTERNAL_SIZE_CLASSES_H
+
 /* This file was automatically generated by size_classes.sh. */
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
 
 /*
  * This header requires LG_SIZEOF_PTR, LG_TINY_MIN, LG_QUANTUM, and LG_PAGE to
@@ -337,21 +338,5 @@ cat <<EOF
 #  error "Too many small size classes"
 #endif
 
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
+#endif /* JEMALLOC_INTERNAL_SIZE_CLASSES_H */
 EOF
diff --git a/include/jemalloc/internal/smoothstep.h b/include/jemalloc/internal/smoothstep.h
index c5333cca..dab53d9c 100644
--- a/include/jemalloc/internal/smoothstep.h
+++ b/include/jemalloc/internal/smoothstep.h
@@ -1,9 +1,11 @@
+#ifndef JEMALLOC_INTERNAL_SMOOTHSTEP_H
+#define JEMALLOC_INTERNAL_SMOOTHSTEP_H
+
 /*
  * This file was generated by the following command:
  *   sh smoothstep.sh smoother 200 24 3 15
  */
 /******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
 
 /*
  * This header defines a precomputed table based on the smoothstep family of
@@ -227,20 +229,4 @@
     STEP( 199, UINT64_C(0x0000000000ffffeb), 0.995, 0.999998759356250) \
     STEP( 200, UINT64_C(0x0000000001000000), 1.000, 1.000000000000000) \
 
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
+#endif /* JEMALLOC_INTERNAL_SMOOTHSTEP_H */
diff --git a/include/jemalloc/internal/smoothstep.sh b/include/jemalloc/internal/smoothstep.sh
index 8124693f..5d72e355 100755
--- a/include/jemalloc/internal/smoothstep.sh
+++ b/include/jemalloc/internal/smoothstep.sh
@@ -54,12 +54,14 @@ smoothest() {
 }
 
 cat <<EOF
+#ifndef JEMALLOC_INTERNAL_SMOOTHSTEP_H
+#define JEMALLOC_INTERNAL_SMOOTHSTEP_H
+
 /*
  * This file was generated by the following command:
  *   $cmd
  */
 /******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
 
 /*
  * This header defines a precomputed table based on the smoothstep family of
@@ -95,21 +97,5 @@ done
 echo
 
 cat <<EOF
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
+#endif /* JEMALLOC_INTERNAL_SMOOTHSTEP_H */
 EOF
diff --git a/include/jemalloc/internal/spin.h b/include/jemalloc/internal/spin.h
deleted file mode 100644
index 9ef5ceb9..00000000
--- a/include/jemalloc/internal/spin.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-typedef struct spin_s spin_t;
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-struct spin_s {
-	unsigned iteration;
-};
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#ifndef JEMALLOC_ENABLE_INLINE
-void	spin_init(spin_t *spin);
-void	spin_adaptive(spin_t *spin);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_SPIN_C_))
-JEMALLOC_INLINE void
-spin_init(spin_t *spin)
-{
-
-	spin->iteration = 0;
-}
-
-JEMALLOC_INLINE void
-spin_adaptive(spin_t *spin)
-{
-	volatile uint64_t i;
-
-	for (i = 0; i < (KQU(1) << spin->iteration); i++)
-		CPU_SPINWAIT;
-
-	if (spin->iteration < 63)
-		spin->iteration++;
-}
-
-#endif
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
-
diff --git a/include/jemalloc/internal/spin_inlines.h b/include/jemalloc/internal/spin_inlines.h
new file mode 100644
index 00000000..b10f67e7
--- /dev/null
+++ b/include/jemalloc/internal/spin_inlines.h
@@ -0,0 +1,31 @@
+#ifndef JEMALLOC_INTERNAL_SPIN_INLINES_H
+#define JEMALLOC_INTERNAL_SPIN_INLINES_H
+
+#ifndef JEMALLOC_ENABLE_INLINE
+void	spin_init(spin_t *spin);
+void	spin_adaptive(spin_t *spin);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_SPIN_C_))
+JEMALLOC_INLINE void
+spin_init(spin_t *spin)
+{
+
+	spin->iteration = 0;
+}
+
+JEMALLOC_INLINE void
+spin_adaptive(spin_t *spin)
+{
+	volatile uint64_t i;
+
+	for (i = 0; i < (KQU(1) << spin->iteration); i++)
+		CPU_SPINWAIT;
+
+	if (spin->iteration < 63)
+		spin->iteration++;
+}
+
+#endif
+
+#endif /* JEMALLOC_INTERNAL_SPIN_INLINES_H */
diff --git a/include/jemalloc/internal/spin_structs.h b/include/jemalloc/internal/spin_structs.h
new file mode 100644
index 00000000..ef71a765
--- /dev/null
+++ b/include/jemalloc/internal/spin_structs.h
@@ -0,0 +1,8 @@
+#ifndef JEMALLOC_INTERNAL_SPIN_STRUCTS_H
+#define JEMALLOC_INTERNAL_SPIN_STRUCTS_H
+
+struct spin_s {
+	unsigned iteration;
+};
+
+#endif /* JEMALLOC_INTERNAL_SPIN_STRUCTS_H */
diff --git a/include/jemalloc/internal/spin_types.h b/include/jemalloc/internal/spin_types.h
new file mode 100644
index 00000000..52ee4cc1
--- /dev/null
+++ b/include/jemalloc/internal/spin_types.h
@@ -0,0 +1,6 @@
+#ifndef JEMALLOC_INTERNAL_SPIN_TYPES_H
+#define JEMALLOC_INTERNAL_SPIN_TYPES_H
+
+typedef struct spin_s spin_t;
+
+#endif /* JEMALLOC_INTERNAL_SPIN_TYPES_H */
diff --git a/include/jemalloc/internal/stats_externs.h b/include/jemalloc/internal/stats_externs.h
new file mode 100644
index 00000000..a0a1ab6c
--- /dev/null
+++ b/include/jemalloc/internal/stats_externs.h
@@ -0,0 +1,9 @@
+#ifndef JEMALLOC_INTERNAL_STATS_EXTERNS_H
+#define JEMALLOC_INTERNAL_STATS_EXTERNS_H
+
+extern bool	opt_stats_print;
+
+void	stats_print(void (*write)(void *, const char *), void *cbopaque,
+    const char *opts);
+
+#endif /* JEMALLOC_INTERNAL_STATS_EXTERNS_H */
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats_structs.h
similarity index 72%
rename from include/jemalloc/internal/stats.h
rename to include/jemalloc/internal/stats_structs.h
index bea4e3e7..aaa0bf4f 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats_structs.h
@@ -1,14 +1,5 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-typedef struct tcache_bin_stats_s tcache_bin_stats_t;
-typedef struct malloc_bin_stats_s malloc_bin_stats_t;
-typedef struct malloc_large_stats_s malloc_large_stats_t;
-typedef struct arena_stats_s arena_stats_t;
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
+#ifndef JEMALLOC_INTERNAL_STATS_STRUCTS_H
+#define JEMALLOC_INTERNAL_STATS_STRUCTS_H
 
 struct tcache_bin_stats_s {
 	/*
@@ -113,18 +104,4 @@ struct arena_stats_s {
 	malloc_large_stats_t	lstats[NSIZES - NBINS];
 };
 
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-extern bool	opt_stats_print;
-
-void	stats_print(void (*write)(void *, const char *), void *cbopaque,
-    const char *opts);
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
+#endif /* JEMALLOC_INTERNAL_STATS_STRUCTS_H */
diff --git a/include/jemalloc/internal/stats_types.h b/include/jemalloc/internal/stats_types.h
new file mode 100644
index 00000000..f202b231
--- /dev/null
+++ b/include/jemalloc/internal/stats_types.h
@@ -0,0 +1,9 @@
+#ifndef JEMALLOC_INTERNAL_STATS_TYPES_H
+#define JEMALLOC_INTERNAL_STATS_TYPES_H
+
+typedef struct tcache_bin_stats_s tcache_bin_stats_t;
+typedef struct malloc_bin_stats_s malloc_bin_stats_t;
+typedef struct malloc_large_stats_s malloc_large_stats_t;
+typedef struct arena_stats_s arena_stats_t;
+
+#endif /* JEMALLOC_INTERNAL_STATS_TYPES_H */
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
new file mode 100644
index 00000000..ead90afc
--- /dev/null
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -0,0 +1,47 @@
+#ifndef JEMALLOC_INTERNAL_TCACHE_EXTERNS_H
+#define JEMALLOC_INTERNAL_TCACHE_EXTERNS_H
+
+extern bool	opt_tcache;
+extern ssize_t	opt_lg_tcache_max;
+
+extern tcache_bin_info_t	*tcache_bin_info;
+
+/*
+ * Number of tcache bins.  There are NBINS small-object bins, plus 0 or more
+ * large-object bins.
+ */
+extern unsigned	nhbins;
+
+/* Maximum cached size class. */
+extern size_t	tcache_maxclass;
+
+/*
+ * Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and
+ * usable via the MALLOCX_TCACHE() flag.  The automatic per thread tcaches are
+ * completely disjoint from this data structure.  tcaches starts off as a sparse
+ * array, so it has no physical memory footprint until individual pages are
+ * touched.  This allows the entire array to be allocated the first time an
+ * explicit tcache is created without a disproportionate impact on memory usage.
+ */
+extern tcaches_t	*tcaches;
+
+size_t	tcache_salloc(tsdn_t *tsdn, const void *ptr);
+void	tcache_event_hard(tsd_t *tsd, tcache_t *tcache);
+void	*tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
+    tcache_bin_t *tbin, szind_t binind, bool *tcache_success);
+void	tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
+    szind_t binind, unsigned rem);
+void	tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
+    unsigned rem, tcache_t *tcache);
+void	tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache,
+    arena_t *oldarena, arena_t *newarena);
+tcache_t *tcache_get_hard(tsd_t *tsd);
+tcache_t *tcache_create(tsdn_t *tsdn, arena_t *arena);
+void	tcache_cleanup(tsd_t *tsd);
+void	tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
+bool	tcaches_create(tsd_t *tsd, unsigned *r_ind);
+void	tcaches_flush(tsd_t *tsd, unsigned ind);
+void	tcaches_destroy(tsd_t *tsd, unsigned ind);
+bool	tcache_boot(tsdn_t *tsdn);
+
+#endif /* JEMALLOC_INTERNAL_TCACHE_EXTERNS_H */
diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache_inlines.h
similarity index 57%
rename from include/jemalloc/internal/tcache.h
rename to include/jemalloc/internal/tcache_inlines.h
index 25a1ad02..e522d9e6 100644
--- a/include/jemalloc/internal/tcache.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -1,157 +1,5 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-typedef struct tcache_bin_info_s tcache_bin_info_t;
-typedef struct tcache_bin_s tcache_bin_t;
-typedef struct tcache_s tcache_t;
-typedef struct tcaches_s tcaches_t;
-
-/*
- * tcache pointers close to NULL are used to encode state information that is
- * used for two purposes: preventing thread caching on a per thread basis and
- * cleaning up during thread shutdown.
- */
-#define	TCACHE_STATE_DISABLED		((tcache_t *)(uintptr_t)1)
-#define	TCACHE_STATE_REINCARNATED	((tcache_t *)(uintptr_t)2)
-#define	TCACHE_STATE_PURGATORY		((tcache_t *)(uintptr_t)3)
-#define	TCACHE_STATE_MAX		TCACHE_STATE_PURGATORY
-
-/*
- * Absolute minimum number of cache slots for each small bin.
- */
-#define	TCACHE_NSLOTS_SMALL_MIN		20
-
-/*
- * Absolute maximum number of cache slots for each small bin in the thread
- * cache.  This is an additional constraint beyond that imposed as: twice the
- * number of regions per slab for this size class.
- *
- * This constant must be an even number.
- */
-#define	TCACHE_NSLOTS_SMALL_MAX		200
-
-/* Number of cache slots for large size classes. */
-#define	TCACHE_NSLOTS_LARGE		20
-
-/* (1U << opt_lg_tcache_max) is used to compute tcache_maxclass. */
-#define	LG_TCACHE_MAXCLASS_DEFAULT	15
-
-/*
- * TCACHE_GC_SWEEP is the approximate number of allocation events between
- * full GC sweeps.  Integer rounding may cause the actual number to be
- * slightly higher, since GC is performed incrementally.
- */
-#define	TCACHE_GC_SWEEP			8192
-
-/* Number of tcache allocation/deallocation events between incremental GCs. */
-#define	TCACHE_GC_INCR							\
-    ((TCACHE_GC_SWEEP / NBINS) + ((TCACHE_GC_SWEEP / NBINS == 0) ? 0 : 1))
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-typedef enum {
-	tcache_enabled_false   = 0, /* Enable cast to/from bool. */
-	tcache_enabled_true    = 1,
-	tcache_enabled_default = 2
-} tcache_enabled_t;
-
-/*
- * Read-only information associated with each element of tcache_t's tbins array
- * is stored separately, mainly to reduce memory usage.
- */
-struct tcache_bin_info_s {
-	unsigned	ncached_max;	/* Upper limit on ncached. */
-};
-
-struct tcache_bin_s {
-	tcache_bin_stats_t tstats;
-	int		low_water;	/* Min # cached since last GC. */
-	unsigned	lg_fill_div;	/* Fill (ncached_max >> lg_fill_div). */
-	unsigned	ncached;	/* # of cached objects. */
-	/*
-	 * To make use of adjacent cacheline prefetch, the items in the avail
-	 * stack goes to higher address for newer allocations.  avail points
-	 * just above the available space, which means that
-	 * avail[-ncached, ... -1] are available items and the lowest item will
-	 * be allocated first.
-	 */
-	void		**avail;	/* Stack of available objects. */
-};
-
-struct tcache_s {
-	ql_elm(tcache_t) link;		/* Used for aggregating stats. */
-	uint64_t	prof_accumbytes;/* Cleared after arena_prof_accum(). */
-	ticker_t	gc_ticker;	/* Drives incremental GC. */
-	szind_t		next_gc_bin;	/* Next bin to GC. */
-	tcache_bin_t	tbins[1];	/* Dynamically sized. */
-	/*
-	 * The pointer stacks associated with tbins follow as a contiguous
-	 * array.  During tcache initialization, the avail pointer in each
-	 * element of tbins is initialized to point to the proper offset within
-	 * this array.
-	 */
-};
-
-/* Linkage for list of available (previously used) explicit tcache IDs. */
-struct tcaches_s {
-	union {
-		tcache_t	*tcache;
-		tcaches_t	*next;
-	};
-};
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-extern bool	opt_tcache;
-extern ssize_t	opt_lg_tcache_max;
-
-extern tcache_bin_info_t	*tcache_bin_info;
-
-/*
- * Number of tcache bins.  There are NBINS small-object bins, plus 0 or more
- * large-object bins.
- */
-extern unsigned	nhbins;
-
-/* Maximum cached size class. */
-extern size_t	tcache_maxclass;
-
-/*
- * Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and
- * usable via the MALLOCX_TCACHE() flag.  The automatic per thread tcaches are
- * completely disjoint from this data structure.  tcaches starts off as a sparse
- * array, so it has no physical memory footprint until individual pages are
- * touched.  This allows the entire array to be allocated the first time an
- * explicit tcache is created without a disproportionate impact on memory usage.
- */
-extern tcaches_t	*tcaches;
-
-size_t	tcache_salloc(tsdn_t *tsdn, const void *ptr);
-void	tcache_event_hard(tsd_t *tsd, tcache_t *tcache);
-void	*tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
-    tcache_bin_t *tbin, szind_t binind, bool *tcache_success);
-void	tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
-    szind_t binind, unsigned rem);
-void	tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
-    unsigned rem, tcache_t *tcache);
-void	tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache,
-    arena_t *oldarena, arena_t *newarena);
-tcache_t *tcache_get_hard(tsd_t *tsd);
-tcache_t *tcache_create(tsdn_t *tsdn, arena_t *arena);
-void	tcache_cleanup(tsd_t *tsd);
-void	tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
-bool	tcaches_create(tsd_t *tsd, unsigned *r_ind);
-void	tcaches_flush(tsd_t *tsd, unsigned ind);
-void	tcaches_destroy(tsd_t *tsd, unsigned ind);
-bool	tcache_boot(tsdn_t *tsdn);
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
+#ifndef JEMALLOC_INTERNAL_TCACHE_INLINES_H
+#define JEMALLOC_INTERNAL_TCACHE_INLINES_H
 
 #ifndef JEMALLOC_ENABLE_INLINE
 void	tcache_event(tsd_t *tsd, tcache_t *tcache);
@@ -455,5 +303,4 @@ tcaches_get(tsd_t *tsd, unsigned ind)
 }
 #endif
 
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
+#endif /* JEMALLOC_INTERNAL_TCACHE_INLINES_H */
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
new file mode 100644
index 00000000..a2b28afd
--- /dev/null
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -0,0 +1,55 @@
+#ifndef JEMALLOC_INTERNAL_TCACHE_STRUCTS_H
+#define JEMALLOC_INTERNAL_TCACHE_STRUCTS_H
+
+typedef enum {
+	tcache_enabled_false   = 0, /* Enable cast to/from bool. */
+	tcache_enabled_true    = 1,
+	tcache_enabled_default = 2
+} tcache_enabled_t;
+
+/*
+ * Read-only information associated with each element of tcache_t's tbins array
+ * is stored separately, mainly to reduce memory usage.
+ */
+struct tcache_bin_info_s {
+	unsigned	ncached_max;	/* Upper limit on ncached. */
+};
+
+struct tcache_bin_s {
+	tcache_bin_stats_t tstats;
+	int		low_water;	/* Min # cached since last GC. */
+	unsigned	lg_fill_div;	/* Fill (ncached_max >> lg_fill_div). */
+	unsigned	ncached;	/* # of cached objects. */
+	/*
+	 * To make use of adjacent cacheline prefetch, the items in the avail
+	 * stack goes to higher address for newer allocations.  avail points
+	 * just above the available space, which means that
+	 * avail[-ncached, ... -1] are available items and the lowest item will
+	 * be allocated first.
+	 */
+	void		**avail;	/* Stack of available objects. */
+};
+
+struct tcache_s {
+	ql_elm(tcache_t) link;		/* Used for aggregating stats. */
+	uint64_t	prof_accumbytes;/* Cleared after arena_prof_accum(). */
+	ticker_t	gc_ticker;	/* Drives incremental GC. */
+	szind_t		next_gc_bin;	/* Next bin to GC. */
+	tcache_bin_t	tbins[1];	/* Dynamically sized. */
+	/*
+	 * The pointer stacks associated with tbins follow as a contiguous
+	 * array.  During tcache initialization, the avail pointer in each
+	 * element of tbins is initialized to point to the proper offset within
+	 * this array.
+	 */
+};
+
+/* Linkage for list of available (previously used) explicit tcache IDs. */
+struct tcaches_s {
+	union {
+		tcache_t	*tcache;
+		tcaches_t	*next;
+	};
+};
+
+#endif /* JEMALLOC_INTERNAL_TCACHE_STRUCTS_H */
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
new file mode 100644
index 00000000..c6ac7670
--- /dev/null
+++ b/include/jemalloc/internal/tcache_types.h
@@ -0,0 +1,50 @@
+#ifndef JEMALLOC_INTERNAL_TCACHE_TYPES_H
+#define JEMALLOC_INTERNAL_TCACHE_TYPES_H
+
+typedef struct tcache_bin_info_s tcache_bin_info_t;
+typedef struct tcache_bin_s tcache_bin_t;
+typedef struct tcache_s tcache_t;
+typedef struct tcaches_s tcaches_t;
+
+/*
+ * tcache pointers close to NULL are used to encode state information that is
+ * used for two purposes: preventing thread caching on a per thread basis and
+ * cleaning up during thread shutdown.
+ */
+#define	TCACHE_STATE_DISABLED		((tcache_t *)(uintptr_t)1)
+#define	TCACHE_STATE_REINCARNATED	((tcache_t *)(uintptr_t)2)
+#define	TCACHE_STATE_PURGATORY		((tcache_t *)(uintptr_t)3)
+#define	TCACHE_STATE_MAX		TCACHE_STATE_PURGATORY
+
+/*
+ * Absolute minimum number of cache slots for each small bin.
+ */
+#define	TCACHE_NSLOTS_SMALL_MIN		20
+
+/*
+ * Absolute maximum number of cache slots for each small bin in the thread
+ * cache.  This is an additional constraint beyond that imposed as: twice the
+ * number of regions per slab for this size class.
+ *
+ * This constant must be an even number.
+ */
+#define	TCACHE_NSLOTS_SMALL_MAX		200
+
+/* Number of cache slots for large size classes. */
+#define	TCACHE_NSLOTS_LARGE		20
+
+/* (1U << opt_lg_tcache_max) is used to compute tcache_maxclass. */
+#define	LG_TCACHE_MAXCLASS_DEFAULT	15
+
+/*
+ * TCACHE_GC_SWEEP is the approximate number of allocation events between
+ * full GC sweeps.  Integer rounding may cause the actual number to be
+ * slightly higher, since GC is performed incrementally.
+ */
+#define	TCACHE_GC_SWEEP			8192
+
+/* Number of tcache allocation/deallocation events between incremental GCs. */
+#define	TCACHE_GC_INCR							\
+    ((TCACHE_GC_SWEEP / NBINS) + ((TCACHE_GC_SWEEP / NBINS == 0) ? 0 : 1))
+
+#endif /* JEMALLOC_INTERNAL_TCACHE_TYPES_H */
diff --git a/include/jemalloc/internal/ticker.h b/include/jemalloc/internal/ticker_inlines.h
similarity index 57%
rename from include/jemalloc/internal/ticker.h
rename to include/jemalloc/internal/ticker_inlines.h
index 4696e56d..42f37eb2 100644
--- a/include/jemalloc/internal/ticker.h
+++ b/include/jemalloc/internal/ticker_inlines.h
@@ -1,24 +1,5 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-typedef struct ticker_s ticker_t;
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-struct ticker_s {
-	int32_t	tick;
-	int32_t	nticks;
-};
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
+#ifndef JEMALLOC_INTERNAL_TICKER_INLINES_H
+#define JEMALLOC_INTERNAL_TICKER_INLINES_H
 
 #ifndef JEMALLOC_ENABLE_INLINE
 void	ticker_init(ticker_t *ticker, int32_t nticks);
@@ -71,5 +52,4 @@ ticker_tick(ticker_t *ticker)
 }
 #endif
 
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
+#endif /* JEMALLOC_INTERNAL_TICKER_INLINES_H */
diff --git a/include/jemalloc/internal/ticker_structs.h b/include/jemalloc/internal/ticker_structs.h
new file mode 100644
index 00000000..e30c4e21
--- /dev/null
+++ b/include/jemalloc/internal/ticker_structs.h
@@ -0,0 +1,9 @@
+#ifndef JEMALLOC_INTERNAL_TICKER_STRUCTS_H
+#define JEMALLOC_INTERNAL_TICKER_STRUCTS_H
+
+struct ticker_s {
+	int32_t	tick;
+	int32_t	nticks;
+};
+
+#endif /* JEMALLOC_INTERNAL_TICKER_STRUCTS_H */
diff --git a/include/jemalloc/internal/ticker_types.h b/include/jemalloc/internal/ticker_types.h
new file mode 100644
index 00000000..62d67f3d
--- /dev/null
+++ b/include/jemalloc/internal/ticker_types.h
@@ -0,0 +1,6 @@
+#ifndef JEMALLOC_INTERNAL_TICKER_TYPES_H
+#define JEMALLOC_INTERNAL_TICKER_TYPES_H
+
+typedef struct ticker_s ticker_t;
+
+#endif /* JEMALLOC_INTERNAL_TICKER_TYPES_H */
diff --git a/include/jemalloc/internal/tsd_externs.h b/include/jemalloc/internal/tsd_externs.h
new file mode 100644
index 00000000..87ebaf2d
--- /dev/null
+++ b/include/jemalloc/internal/tsd_externs.h
@@ -0,0 +1,18 @@
+#ifndef JEMALLOC_INTERNAL_TSD_EXTERNS_H
+#define JEMALLOC_INTERNAL_TSD_EXTERNS_H
+
+void	*malloc_tsd_malloc(size_t size);
+void	malloc_tsd_dalloc(void *wrapper);
+void	malloc_tsd_no_cleanup(void *arg);
+void	malloc_tsd_cleanup_register(bool (*f)(void));
+tsd_t	*malloc_tsd_boot0(void);
+void	malloc_tsd_boot1(void);
+#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \
+    !defined(_WIN32))
+void	*tsd_init_check_recursion(tsd_init_head_t *head,
+    tsd_init_block_t *block);
+void	tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block);
+#endif
+void	tsd_cleanup(void *arg);
+
+#endif /* JEMALLOC_INTERNAL_TSD_EXTERNS_H */
diff --git a/include/jemalloc/internal/tsd_inlines.h b/include/jemalloc/internal/tsd_inlines.h
new file mode 100644
index 00000000..ad915d1a
--- /dev/null
+++ b/include/jemalloc/internal/tsd_inlines.h
@@ -0,0 +1,140 @@
+#ifndef JEMALLOC_INTERNAL_TSD_INLINES_H
+#define JEMALLOC_INTERNAL_TSD_INLINES_H
+
+#ifndef JEMALLOC_ENABLE_INLINE
+malloc_tsd_protos(JEMALLOC_ATTR(unused), , tsd_t)
+
+tsd_t	*tsd_fetch_impl(bool init);
+tsd_t	*tsd_fetch(void);
+tsdn_t	*tsd_tsdn(tsd_t *tsd);
+bool	tsd_nominal(tsd_t *tsd);
+#define	O(n, t, c)							\
+t	*tsd_##n##p_get(tsd_t *tsd);					\
+t	tsd_##n##_get(tsd_t *tsd);					\
+void	tsd_##n##_set(tsd_t *tsd, t n);
+MALLOC_TSD
+#undef O
+tsdn_t	*tsdn_fetch(void);
+bool	tsdn_null(const tsdn_t *tsdn);
+tsd_t	*tsdn_tsd(tsdn_t *tsdn);
+rtree_ctx_t	*tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TSD_C_))
+malloc_tsd_externs(, tsd_t)
+malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, , tsd_t, tsd_initializer, tsd_cleanup)
+
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsd_fetch_impl(bool init)
+{
+	tsd_t *tsd = tsd_get(init);
+
+	if (!init && tsd_get_allocates() && tsd == NULL)
+		return (NULL);
+	assert(tsd != NULL);
+
+	if (unlikely(tsd->state != tsd_state_nominal)) {
+		if (tsd->state == tsd_state_uninitialized) {
+			tsd->state = tsd_state_nominal;
+			/* Trigger cleanup handler registration. */
+			tsd_set(tsd);
+		} else if (tsd->state == tsd_state_purgatory) {
+			tsd->state = tsd_state_reincarnated;
+			tsd_set(tsd);
+		} else
+			assert(tsd->state == tsd_state_reincarnated);
+	}
+
+	return (tsd);
+}
+
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsd_fetch(void)
+{
+
+	return (tsd_fetch_impl(true));
+}
+
+JEMALLOC_ALWAYS_INLINE tsdn_t *
+tsd_tsdn(tsd_t *tsd)
+{
+
+	return ((tsdn_t *)tsd);
+}
+
+JEMALLOC_INLINE bool
+tsd_nominal(tsd_t *tsd)
+{
+
+	return (tsd->state == tsd_state_nominal);
+}
+
+#define	O(n, t, c)							\
+JEMALLOC_ALWAYS_INLINE t *						\
+tsd_##n##p_get(tsd_t *tsd)						\
+{									\
+									\
+	return (&tsd->n);						\
+}									\
+									\
+JEMALLOC_ALWAYS_INLINE t						\
+tsd_##n##_get(tsd_t *tsd)						\
+{									\
+									\
+	return (*tsd_##n##p_get(tsd));					\
+}									\
+									\
+JEMALLOC_ALWAYS_INLINE void						\
+tsd_##n##_set(tsd_t *tsd, t n)						\
+{									\
+									\
+	assert(tsd->state == tsd_state_nominal);			\
+	tsd->n = n;							\
+}
+MALLOC_TSD
+#undef O
+
+JEMALLOC_ALWAYS_INLINE tsdn_t *
+tsdn_fetch(void)
+{
+
+	if (!tsd_booted_get())
+		return (NULL);
+
+	return (tsd_tsdn(tsd_fetch_impl(false)));
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsdn_null(const tsdn_t *tsdn)
+{
+
+	return (tsdn == NULL);
+}
+
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsdn_tsd(tsdn_t *tsdn)
+{
+
+	assert(!tsdn_null(tsdn));
+
+	return (&tsdn->tsd);
+}
+
+JEMALLOC_ALWAYS_INLINE rtree_ctx_t *
+tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback)
+{
+
+	/*
+	 * If tsd cannot be accessed, initialize the fallback rtree_ctx and
+	 * return a pointer to it.
+	 */
+	if (unlikely(tsdn_null(tsdn))) {
+		static const rtree_ctx_t rtree_ctx = RTREE_CTX_INITIALIZER;
+		memcpy(fallback, &rtree_ctx, sizeof(rtree_ctx_t));
+		return (fallback);
+	}
+	return (tsd_rtree_ctxp_get(tsdn_tsd(tsdn)));
+}
+#endif
+
+#endif /* JEMALLOC_INTERNAL_TSD_INLINES_H */
diff --git a/include/jemalloc/internal/tsd_structs.h b/include/jemalloc/internal/tsd_structs.h
new file mode 100644
index 00000000..8d94c5be
--- /dev/null
+++ b/include/jemalloc/internal/tsd_structs.h
@@ -0,0 +1,73 @@
+#ifndef JEMALLOC_INTERNAL_TSD_STRUCTS_H
+#define JEMALLOC_INTERNAL_TSD_STRUCTS_H
+
+#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \
+    !defined(_WIN32))
+struct tsd_init_block_s {
+	ql_elm(tsd_init_block_t)	link;
+	pthread_t			thread;
+	void				*data;
+};
+struct tsd_init_head_s {
+	ql_head(tsd_init_block_t)	blocks;
+	malloc_mutex_t			lock;
+};
+#endif
+
+#define	MALLOC_TSD							\
+/*  O(name,			type,			cleanup) */	\
+    O(tcache,			tcache_t *,		yes)		\
+    O(thread_allocated,		uint64_t,		no)		\
+    O(thread_deallocated,	uint64_t,		no)		\
+    O(prof_tdata,		prof_tdata_t *,		yes)		\
+    O(iarena,			arena_t *,		yes)		\
+    O(arena,			arena_t *,		yes)		\
+    O(arenas_tdata,		arena_tdata_t *,	yes)		\
+    O(narenas_tdata,		unsigned,		no)		\
+    O(arenas_tdata_bypass,	bool,			no)		\
+    O(tcache_enabled,		tcache_enabled_t,	no)		\
+    O(rtree_ctx,		rtree_ctx_t,		no)		\
+    O(witnesses,		witness_list_t,		yes)		\
+    O(rtree_elm_witnesses,	rtree_elm_witness_tsd_t,no)		\
+    O(witness_fork,		bool,			no)		\
+
+#define	TSD_INITIALIZER {						\
+    tsd_state_uninitialized,						\
+    NULL,								\
+    0,									\
+    0,									\
+    NULL,								\
+    NULL,								\
+    NULL,								\
+    NULL,								\
+    0,									\
+    false,								\
+    tcache_enabled_default,						\
+    RTREE_CTX_INITIALIZER,						\
+    ql_head_initializer(witnesses),					\
+    RTREE_ELM_WITNESS_TSD_INITIALIZER,					\
+    false								\
+}
+
+struct tsd_s {
+	tsd_state_t	state;
+#define	O(n, t, c)							\
+	t		n;
+MALLOC_TSD
+#undef O
+};
+
+/*
+ * Wrapper around tsd_t that makes it possible to avoid implicit conversion
+ * between tsd_t and tsdn_t, where tsdn_t is "nullable" and has to be
+ * explicitly converted to tsd_t, which is non-nullable.
+ */
+struct tsdn_s {
+	tsd_t	tsd;
+};
+
+static const tsd_t tsd_initializer = TSD_INITIALIZER;
+
+malloc_tsd_types(, tsd_t)
+
+#endif /* JEMALLOC_INTERNAL_TSD_STRUCTS_H */
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd_types.h
similarity index 74%
rename from include/jemalloc/internal/tsd.h
rename to include/jemalloc/internal/tsd_types.h
index c4f010ae..b48eaeca 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd_types.h
@@ -1,5 +1,5 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
+#ifndef JEMALLOC_INTERNAL_TSD_TYPES_H
+#define JEMALLOC_INTERNAL_TSD_TYPES_H
 
 /* Maximum number of malloc_tsd users with cleanup functions. */
 #define	MALLOC_TSD_CLEANUPS_MAX	2
@@ -576,236 +576,4 @@ a_name##tsd_set(a_type *val)						\
 }
 #endif
 
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \
-    !defined(_WIN32))
-struct tsd_init_block_s {
-	ql_elm(tsd_init_block_t)	link;
-	pthread_t			thread;
-	void				*data;
-};
-struct tsd_init_head_s {
-	ql_head(tsd_init_block_t)	blocks;
-	malloc_mutex_t			lock;
-};
-#endif
-
-#define	MALLOC_TSD							\
-/*  O(name,			type,			cleanup) */	\
-    O(tcache,			tcache_t *,		yes)		\
-    O(thread_allocated,		uint64_t,		no)		\
-    O(thread_deallocated,	uint64_t,		no)		\
-    O(prof_tdata,		prof_tdata_t *,		yes)		\
-    O(iarena,			arena_t *,		yes)		\
-    O(arena,			arena_t *,		yes)		\
-    O(arenas_tdata,		arena_tdata_t *,	yes)		\
-    O(narenas_tdata,		unsigned,		no)		\
-    O(arenas_tdata_bypass,	bool,			no)		\
-    O(tcache_enabled,		tcache_enabled_t,	no)		\
-    O(rtree_ctx,		rtree_ctx_t,		no)		\
-    O(witnesses,		witness_list_t,		yes)		\
-    O(rtree_elm_witnesses,	rtree_elm_witness_tsd_t,no)		\
-    O(witness_fork,		bool,			no)		\
-
-#define	TSD_INITIALIZER {						\
-    tsd_state_uninitialized,						\
-    NULL,								\
-    0,									\
-    0,									\
-    NULL,								\
-    NULL,								\
-    NULL,								\
-    NULL,								\
-    0,									\
-    false,								\
-    tcache_enabled_default,						\
-    RTREE_CTX_INITIALIZER,						\
-    ql_head_initializer(witnesses),					\
-    RTREE_ELM_WITNESS_TSD_INITIALIZER,					\
-    false								\
-}
-
-struct tsd_s {
-	tsd_state_t	state;
-#define	O(n, t, c)							\
-	t		n;
-MALLOC_TSD
-#undef O
-};
-
-/*
- * Wrapper around tsd_t that makes it possible to avoid implicit conversion
- * between tsd_t and tsdn_t, where tsdn_t is "nullable" and has to be
- * explicitly converted to tsd_t, which is non-nullable.
- */
-struct tsdn_s {
-	tsd_t	tsd;
-};
-
-static const tsd_t tsd_initializer = TSD_INITIALIZER;
-
-malloc_tsd_types(, tsd_t)
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-void	*malloc_tsd_malloc(size_t size);
-void	malloc_tsd_dalloc(void *wrapper);
-void	malloc_tsd_no_cleanup(void *arg);
-void	malloc_tsd_cleanup_register(bool (*f)(void));
-tsd_t	*malloc_tsd_boot0(void);
-void	malloc_tsd_boot1(void);
-#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \
-    !defined(_WIN32))
-void	*tsd_init_check_recursion(tsd_init_head_t *head,
-    tsd_init_block_t *block);
-void	tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block);
-#endif
-void	tsd_cleanup(void *arg);
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#ifndef JEMALLOC_ENABLE_INLINE
-malloc_tsd_protos(JEMALLOC_ATTR(unused), , tsd_t)
-
-tsd_t	*tsd_fetch_impl(bool init);
-tsd_t	*tsd_fetch(void);
-tsdn_t	*tsd_tsdn(tsd_t *tsd);
-bool	tsd_nominal(tsd_t *tsd);
-#define	O(n, t, c)							\
-t	*tsd_##n##p_get(tsd_t *tsd);					\
-t	tsd_##n##_get(tsd_t *tsd);					\
-void	tsd_##n##_set(tsd_t *tsd, t n);
-MALLOC_TSD
-#undef O
-tsdn_t	*tsdn_fetch(void);
-bool	tsdn_null(const tsdn_t *tsdn);
-tsd_t	*tsdn_tsd(tsdn_t *tsdn);
-rtree_ctx_t	*tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TSD_C_))
-malloc_tsd_externs(, tsd_t)
-malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, , tsd_t, tsd_initializer, tsd_cleanup)
-
-JEMALLOC_ALWAYS_INLINE tsd_t *
-tsd_fetch_impl(bool init)
-{
-	tsd_t *tsd = tsd_get(init);
-
-	if (!init && tsd_get_allocates() && tsd == NULL)
-		return (NULL);
-	assert(tsd != NULL);
-
-	if (unlikely(tsd->state != tsd_state_nominal)) {
-		if (tsd->state == tsd_state_uninitialized) {
-			tsd->state = tsd_state_nominal;
-			/* Trigger cleanup handler registration. */
-			tsd_set(tsd);
-		} else if (tsd->state == tsd_state_purgatory) {
-			tsd->state = tsd_state_reincarnated;
-			tsd_set(tsd);
-		} else
-			assert(tsd->state == tsd_state_reincarnated);
-	}
-
-	return (tsd);
-}
-
-JEMALLOC_ALWAYS_INLINE tsd_t *
-tsd_fetch(void)
-{
-
-	return (tsd_fetch_impl(true));
-}
-
-JEMALLOC_ALWAYS_INLINE tsdn_t *
-tsd_tsdn(tsd_t *tsd)
-{
-
-	return ((tsdn_t *)tsd);
-}
-
-JEMALLOC_INLINE bool
-tsd_nominal(tsd_t *tsd)
-{
-
-	return (tsd->state == tsd_state_nominal);
-}
-
-#define	O(n, t, c)							\
-JEMALLOC_ALWAYS_INLINE t *						\
-tsd_##n##p_get(tsd_t *tsd)						\
-{									\
-									\
-	return (&tsd->n);						\
-}									\
-									\
-JEMALLOC_ALWAYS_INLINE t						\
-tsd_##n##_get(tsd_t *tsd)						\
-{									\
-									\
-	return (*tsd_##n##p_get(tsd));					\
-}									\
-									\
-JEMALLOC_ALWAYS_INLINE void						\
-tsd_##n##_set(tsd_t *tsd, t n)						\
-{									\
-									\
-	assert(tsd->state == tsd_state_nominal);			\
-	tsd->n = n;							\
-}
-MALLOC_TSD
-#undef O
-
-JEMALLOC_ALWAYS_INLINE tsdn_t *
-tsdn_fetch(void)
-{
-
-	if (!tsd_booted_get())
-		return (NULL);
-
-	return (tsd_tsdn(tsd_fetch_impl(false)));
-}
-
-JEMALLOC_ALWAYS_INLINE bool
-tsdn_null(const tsdn_t *tsdn)
-{
-
-	return (tsdn == NULL);
-}
-
-JEMALLOC_ALWAYS_INLINE tsd_t *
-tsdn_tsd(tsdn_t *tsdn)
-{
-
-	assert(!tsdn_null(tsdn));
-
-	return (&tsdn->tsd);
-}
-
-JEMALLOC_ALWAYS_INLINE rtree_ctx_t *
-tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback)
-{
-
-	/*
-	 * If tsd cannot be accessed, initialize the fallback rtree_ctx and
-	 * return a pointer to it.
-	 */
-	if (unlikely(tsdn_null(tsdn))) {
-		static const rtree_ctx_t rtree_ctx = RTREE_CTX_INITIALIZER;
-		memcpy(fallback, &rtree_ctx, sizeof(rtree_ctx_t));
-		return (fallback);
-	}
-	return (tsd_rtree_ctxp_get(tsdn_tsd(tsdn)));
-}
-#endif
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
+#endif /* JEMALLOC_INTERNAL_TSD_TYPES_H */
diff --git a/include/jemalloc/internal/util_externs.h b/include/jemalloc/internal/util_externs.h
new file mode 100644
index 00000000..b203b773
--- /dev/null
+++ b/include/jemalloc/internal/util_externs.h
@@ -0,0 +1,23 @@
+#ifndef JEMALLOC_INTERNAL_UTIL_EXTERNS_H
+#define JEMALLOC_INTERNAL_UTIL_EXTERNS_H
+
+int	buferror(int err, char *buf, size_t buflen);
+uintmax_t	malloc_strtoumax(const char *restrict nptr,
+    char **restrict endptr, int base);
+void	malloc_write(const char *s);
+
+/*
+ * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating
+ * point math.
+ */
+size_t	malloc_vsnprintf(char *str, size_t size, const char *format,
+    va_list ap);
+size_t	malloc_snprintf(char *str, size_t size, const char *format, ...)
+    JEMALLOC_FORMAT_PRINTF(3, 4);
+void	malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
+    const char *format, va_list ap);
+void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque,
+    const char *format, ...) JEMALLOC_FORMAT_PRINTF(3, 4);
+void	malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
+
+#endif /* JEMALLOC_INTERNAL_UTIL_EXTERNS_H */
diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util_inlines.h
similarity index 50%
rename from include/jemalloc/internal/util.h
rename to include/jemalloc/internal/util_inlines.h
index 592806dc..93f5b1de 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util_inlines.h
@@ -1,126 +1,5 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-#ifdef _WIN32
-#  ifdef _WIN64
-#    define FMT64_PREFIX "ll"
-#    define FMTPTR_PREFIX "ll"
-#  else
-#    define FMT64_PREFIX "ll"
-#    define FMTPTR_PREFIX ""
-#  endif
-#  define FMTd32 "d"
-#  define FMTu32 "u"
-#  define FMTx32 "x"
-#  define FMTd64 FMT64_PREFIX "d"
-#  define FMTu64 FMT64_PREFIX "u"
-#  define FMTx64 FMT64_PREFIX "x"
-#  define FMTdPTR FMTPTR_PREFIX "d"
-#  define FMTuPTR FMTPTR_PREFIX "u"
-#  define FMTxPTR FMTPTR_PREFIX "x"
-#else
-#  include <inttypes.h>
-#  define FMTd32 PRId32
-#  define FMTu32 PRIu32
-#  define FMTx32 PRIx32
-#  define FMTd64 PRId64
-#  define FMTu64 PRIu64
-#  define FMTx64 PRIx64
-#  define FMTdPTR PRIdPTR
-#  define FMTuPTR PRIuPTR
-#  define FMTxPTR PRIxPTR
-#endif
-
-/* Size of stack-allocated buffer passed to buferror(). */
-#define	BUFERROR_BUF		64
-
-/*
- * Size of stack-allocated buffer used by malloc_{,v,vc}printf().  This must be
- * large enough for all possible uses within jemalloc.
- */
-#define	MALLOC_PRINTF_BUFSIZE	4096
-
-/* Junk fill patterns. */
-#ifndef JEMALLOC_ALLOC_JUNK
-#  define JEMALLOC_ALLOC_JUNK	((uint8_t)0xa5)
-#endif
-#ifndef JEMALLOC_FREE_JUNK
-#  define JEMALLOC_FREE_JUNK	((uint8_t)0x5a)
-#endif
-
-/*
- * Wrap a cpp argument that contains commas such that it isn't broken up into
- * multiple arguments.
- */
-#define	JEMALLOC_ARG_CONCAT(...) __VA_ARGS__
-
-/* cpp macro definition stringification. */
-#define	STRINGIFY_HELPER(x) #x
-#define	STRINGIFY(x) STRINGIFY_HELPER(x)
-
-/*
- * Silence compiler warnings due to uninitialized values.  This is used
- * wherever the compiler fails to recognize that the variable is never used
- * uninitialized.
- */
-#ifdef JEMALLOC_CC_SILENCE
-#  define JEMALLOC_CC_SILENCE_INIT(v) = v
-#else
-#  define JEMALLOC_CC_SILENCE_INIT(v)
-#endif
-
-#ifdef __GNUC__
-#  define likely(x)   __builtin_expect(!!(x), 1)
-#  define unlikely(x) __builtin_expect(!!(x), 0)
-#else
-#  define likely(x)   !!(x)
-#  define unlikely(x) !!(x)
-#endif
-
-#if !defined(JEMALLOC_INTERNAL_UNREACHABLE)
-#  error JEMALLOC_INTERNAL_UNREACHABLE should have been defined by configure
-#endif
-
-#define unreachable() JEMALLOC_INTERNAL_UNREACHABLE()
-
-#include "jemalloc/internal/assert.h"
-
-/* Use to assert a particular configuration, e.g., cassert(config_debug). */
-#define	cassert(c) do {							\
-	if (unlikely(!(c)))						\
-		not_reached();						\
-} while (0)
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-int	buferror(int err, char *buf, size_t buflen);
-uintmax_t	malloc_strtoumax(const char *restrict nptr,
-    char **restrict endptr, int base);
-void	malloc_write(const char *s);
-
-/*
- * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating
- * point math.
- */
-size_t	malloc_vsnprintf(char *str, size_t size, const char *format,
-    va_list ap);
-size_t	malloc_snprintf(char *str, size_t size, const char *format, ...)
-    JEMALLOC_FORMAT_PRINTF(3, 4);
-void	malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *format, va_list ap);
-void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque,
-    const char *format, ...) JEMALLOC_FORMAT_PRINTF(3, 4);
-void	malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
+#ifndef JEMALLOC_INTERNAL_UTIL_INLINES_H
+#define JEMALLOC_INTERNAL_UTIL_INLINES_H
 
 #ifndef JEMALLOC_ENABLE_INLINE
 unsigned	ffs_llu(unsigned long long bitmap);
@@ -342,5 +221,4 @@ get_errno(void)
 }
 #endif
 
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
+#endif /* JEMALLOC_INTERNAL_UTIL_INLINES_H */
diff --git a/include/jemalloc/internal/util_types.h b/include/jemalloc/internal/util_types.h
new file mode 100644
index 00000000..7f727993
--- /dev/null
+++ b/include/jemalloc/internal/util_types.h
@@ -0,0 +1,94 @@
+#ifndef JEMALLOC_INTERNAL_UTIL_TYPES_H
+#define JEMALLOC_INTERNAL_UTIL_TYPES_H
+
+#ifdef _WIN32
+#  ifdef _WIN64
+#    define FMT64_PREFIX "ll"
+#    define FMTPTR_PREFIX "ll"
+#  else
+#    define FMT64_PREFIX "ll"
+#    define FMTPTR_PREFIX ""
+#  endif
+#  define FMTd32 "d"
+#  define FMTu32 "u"
+#  define FMTx32 "x"
+#  define FMTd64 FMT64_PREFIX "d"
+#  define FMTu64 FMT64_PREFIX "u"
+#  define FMTx64 FMT64_PREFIX "x"
+#  define FMTdPTR FMTPTR_PREFIX "d"
+#  define FMTuPTR FMTPTR_PREFIX "u"
+#  define FMTxPTR FMTPTR_PREFIX "x"
+#else
+#  include <inttypes.h>
+#  define FMTd32 PRId32
+#  define FMTu32 PRIu32
+#  define FMTx32 PRIx32
+#  define FMTd64 PRId64
+#  define FMTu64 PRIu64
+#  define FMTx64 PRIx64
+#  define FMTdPTR PRIdPTR
+#  define FMTuPTR PRIuPTR
+#  define FMTxPTR PRIxPTR
+#endif
+
+/* Size of stack-allocated buffer passed to buferror(). */
+#define	BUFERROR_BUF		64
+
+/*
+ * Size of stack-allocated buffer used by malloc_{,v,vc}printf().  This must be
+ * large enough for all possible uses within jemalloc.
+ */
+#define	MALLOC_PRINTF_BUFSIZE	4096
+
+/* Junk fill patterns. */
+#ifndef JEMALLOC_ALLOC_JUNK
+#  define JEMALLOC_ALLOC_JUNK	((uint8_t)0xa5)
+#endif
+#ifndef JEMALLOC_FREE_JUNK
+#  define JEMALLOC_FREE_JUNK	((uint8_t)0x5a)
+#endif
+
+/*
+ * Wrap a cpp argument that contains commas such that it isn't broken up into
+ * multiple arguments.
+ */
+#define	JEMALLOC_ARG_CONCAT(...) __VA_ARGS__
+
+/* cpp macro definition stringification. */
+#define	STRINGIFY_HELPER(x) #x
+#define	STRINGIFY(x) STRINGIFY_HELPER(x)
+
+/*
+ * Silence compiler warnings due to uninitialized values.  This is used
+ * wherever the compiler fails to recognize that the variable is never used
+ * uninitialized.
+ */
+#ifdef JEMALLOC_CC_SILENCE
+#  define JEMALLOC_CC_SILENCE_INIT(v) = v
+#else
+#  define JEMALLOC_CC_SILENCE_INIT(v)
+#endif
+
+#ifdef __GNUC__
+#  define likely(x)   __builtin_expect(!!(x), 1)
+#  define unlikely(x) __builtin_expect(!!(x), 0)
+#else
+#  define likely(x)   !!(x)
+#  define unlikely(x) !!(x)
+#endif
+
+#if !defined(JEMALLOC_INTERNAL_UNREACHABLE)
+#  error JEMALLOC_INTERNAL_UNREACHABLE should have been defined by configure
+#endif
+
+#define unreachable() JEMALLOC_INTERNAL_UNREACHABLE()
+
+#include "jemalloc/internal/assert.h"
+
+/* Use to assert a particular configuration, e.g., cassert(config_debug). */
+#define	cassert(c) do {							\
+	if (unlikely(!(c)))						\
+		not_reached();						\
+} while (0)
+
+#endif /* JEMALLOC_INTERNAL_UTIL_TYPES_H */
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
deleted file mode 100644
index 86ddb64a..00000000
--- a/include/jemalloc/internal/witness.h
+++ /dev/null
@@ -1,275 +0,0 @@
-/******************************************************************************/
-#ifdef JEMALLOC_H_TYPES
-
-typedef struct witness_s witness_t;
-typedef unsigned witness_rank_t;
-typedef ql_head(witness_t) witness_list_t;
-typedef int witness_comp_t (const witness_t *, void *, const witness_t *,
-    void *);
-
-/*
- * Lock ranks.  Witnesses with rank WITNESS_RANK_OMIT are completely ignored by
- * the witness machinery.
- */
-#define	WITNESS_RANK_OMIT		0U
-
-#define	WITNESS_RANK_INIT		1U
-#define	WITNESS_RANK_CTL		1U
-#define	WITNESS_RANK_ARENAS		2U
-
-#define	WITNESS_RANK_PROF_DUMP		3U
-#define	WITNESS_RANK_PROF_BT2GCTX	4U
-#define	WITNESS_RANK_PROF_TDATAS	5U
-#define	WITNESS_RANK_PROF_TDATA		6U
-#define	WITNESS_RANK_PROF_GCTX		7U
-
-#define	WITNESS_RANK_ARENA		8U
-#define	WITNESS_RANK_ARENA_EXTENTS	9U
-#define	WITNESS_RANK_ARENA_EXTENT_CACHE	10
-
-#define	WITNESS_RANK_RTREE_ELM		11U
-#define	WITNESS_RANK_RTREE		12U
-#define	WITNESS_RANK_BASE		13U
-
-#define	WITNESS_RANK_LEAF		0xffffffffU
-#define	WITNESS_RANK_ARENA_BIN		WITNESS_RANK_LEAF
-#define	WITNESS_RANK_ARENA_LARGE	WITNESS_RANK_LEAF
-#define	WITNESS_RANK_DSS		WITNESS_RANK_LEAF
-#define	WITNESS_RANK_PROF_ACTIVE	WITNESS_RANK_LEAF
-#define	WITNESS_RANK_PROF_DUMP_SEQ	WITNESS_RANK_LEAF
-#define	WITNESS_RANK_PROF_GDUMP		WITNESS_RANK_LEAF
-#define	WITNESS_RANK_PROF_NEXT_THR_UID	WITNESS_RANK_LEAF
-#define	WITNESS_RANK_PROF_THREAD_ACTIVE_INIT	WITNESS_RANK_LEAF
-
-#define	WITNESS_INITIALIZER(name, rank) {name, rank, NULL, NULL, {NULL, NULL}}
-
-#endif /* JEMALLOC_H_TYPES */
-/******************************************************************************/
-#ifdef JEMALLOC_H_STRUCTS
-
-struct witness_s {
-	/* Name, used for printing lock order reversal messages. */
-	const char		*name;
-
-	/*
-	 * Witness rank, where 0 is lowest and UINT_MAX is highest.  Witnesses
-	 * must be acquired in order of increasing rank.
-	 */
-	witness_rank_t		rank;
-
-	/*
-	 * If two witnesses are of equal rank and they have the samp comp
-	 * function pointer, it is called as a last attempt to differentiate
-	 * between witnesses of equal rank.
-	 */
-	witness_comp_t		*comp;
-
-	/* Opaque data, passed to comp(). */
-	void			*opaque;
-
-	/* Linkage for thread's currently owned locks. */
-	ql_elm(witness_t)	link;
-};
-
-#endif /* JEMALLOC_H_STRUCTS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_EXTERNS
-
-void	witness_init(witness_t *witness, const char *name, witness_rank_t rank,
-    witness_comp_t *comp, void *opaque);
-#ifdef JEMALLOC_JET
-typedef void (witness_lock_error_t)(const witness_list_t *, const witness_t *);
-extern witness_lock_error_t *witness_lock_error;
-#else
-void	witness_lock_error(const witness_list_t *witnesses,
-    const witness_t *witness);
-#endif
-#ifdef JEMALLOC_JET
-typedef void (witness_owner_error_t)(const witness_t *);
-extern witness_owner_error_t *witness_owner_error;
-#else
-void	witness_owner_error(const witness_t *witness);
-#endif
-#ifdef JEMALLOC_JET
-typedef void (witness_not_owner_error_t)(const witness_t *);
-extern witness_not_owner_error_t *witness_not_owner_error;
-#else
-void	witness_not_owner_error(const witness_t *witness);
-#endif
-#ifdef JEMALLOC_JET
-typedef void (witness_lockless_error_t)(const witness_list_t *);
-extern witness_lockless_error_t *witness_lockless_error;
-#else
-void	witness_lockless_error(const witness_list_t *witnesses);
-#endif
-
-void	witnesses_cleanup(tsd_t *tsd);
-void	witness_prefork(tsd_t *tsd);
-void	witness_postfork_parent(tsd_t *tsd);
-void	witness_postfork_child(tsd_t *tsd);
-
-#endif /* JEMALLOC_H_EXTERNS */
-/******************************************************************************/
-#ifdef JEMALLOC_H_INLINES
-
-#ifndef JEMALLOC_ENABLE_INLINE
-bool	witness_owner(tsd_t *tsd, const witness_t *witness);
-void	witness_assert_owner(tsdn_t *tsdn, const witness_t *witness);
-void	witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness);
-void	witness_assert_lockless(tsdn_t *tsdn);
-void	witness_lock(tsdn_t *tsdn, witness_t *witness);
-void	witness_unlock(tsdn_t *tsdn, witness_t *witness);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_))
-/* Helper, not intended for direct use. */
-JEMALLOC_INLINE bool
-witness_owner(tsd_t *tsd, const witness_t *witness)
-{
-	witness_list_t *witnesses;
-	witness_t *w;
-
-	cassert(config_debug);
-
-	witnesses = tsd_witnessesp_get(tsd);
-	ql_foreach(w, witnesses, link) {
-		if (w == witness)
-			return (true);
-	}
-
-	return (false);
-}
-
-JEMALLOC_INLINE void
-witness_assert_owner(tsdn_t *tsdn, const witness_t *witness)
-{
-	tsd_t *tsd;
-
-	if (!config_debug)
-		return;
-
-	if (tsdn_null(tsdn))
-		return;
-	tsd = tsdn_tsd(tsdn);
-	if (witness->rank == WITNESS_RANK_OMIT)
-		return;
-
-	if (witness_owner(tsd, witness))
-		return;
-	witness_owner_error(witness);
-}
-
-JEMALLOC_INLINE void
-witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness)
-{
-	tsd_t *tsd;
-	witness_list_t *witnesses;
-	witness_t *w;
-
-	if (!config_debug)
-		return;
-
-	if (tsdn_null(tsdn))
-		return;
-	tsd = tsdn_tsd(tsdn);
-	if (witness->rank == WITNESS_RANK_OMIT)
-		return;
-
-	witnesses = tsd_witnessesp_get(tsd);
-	ql_foreach(w, witnesses, link) {
-		if (w == witness)
-			witness_not_owner_error(witness);
-	}
-}
-
-JEMALLOC_INLINE void
-witness_assert_lockless(tsdn_t *tsdn)
-{
-	tsd_t *tsd;
-	witness_list_t *witnesses;
-	witness_t *w;
-
-	if (!config_debug)
-		return;
-
-	if (tsdn_null(tsdn))
-		return;
-	tsd = tsdn_tsd(tsdn);
-
-	witnesses = tsd_witnessesp_get(tsd);
-	w = ql_last(witnesses, link);
-	if (w != NULL)
-		witness_lockless_error(witnesses);
-}
-
-JEMALLOC_INLINE void
-witness_lock(tsdn_t *tsdn, witness_t *witness)
-{
-	tsd_t *tsd;
-	witness_list_t *witnesses;
-	witness_t *w;
-
-	if (!config_debug)
-		return;
-
-	if (tsdn_null(tsdn))
-		return;
-	tsd = tsdn_tsd(tsdn);
-	if (witness->rank == WITNESS_RANK_OMIT)
-		return;
-
-	witness_assert_not_owner(tsdn, witness);
-
-	witnesses = tsd_witnessesp_get(tsd);
-	w = ql_last(witnesses, link);
-	if (w == NULL) {
-		/* No other locks; do nothing. */
-	} else if (tsd_witness_fork_get(tsd) && w->rank <= witness->rank) {
-		/* Forking, and relaxed ranking satisfied. */
-	} else if (w->rank > witness->rank) {
-		/* Not forking, rank order reversal. */
-		witness_lock_error(witnesses, witness);
-	} else if (w->rank == witness->rank && (w->comp == NULL || w->comp !=
-	    witness->comp || w->comp(w, w->opaque, witness, witness->opaque) >
-	    0)) {
-		/*
-		 * Missing/incompatible comparison function, or comparison
-		 * function indicates rank order reversal.
-		 */
-		witness_lock_error(witnesses, witness);
-	}
-
-	ql_elm_new(witness, link);
-	ql_tail_insert(witnesses, witness, link);
-}
-
-JEMALLOC_INLINE void
-witness_unlock(tsdn_t *tsdn, witness_t *witness)
-{
-	tsd_t *tsd;
-	witness_list_t *witnesses;
-
-	if (!config_debug)
-		return;
-
-	if (tsdn_null(tsdn))
-		return;
-	tsd = tsdn_tsd(tsdn);
-	if (witness->rank == WITNESS_RANK_OMIT)
-		return;
-
-	/*
-	 * Check whether owner before removal, rather than relying on
-	 * witness_assert_owner() to abort, so that unit tests can test this
-	 * function's failure mode without causing undefined behavior.
-	 */
-	if (witness_owner(tsd, witness)) {
-		witnesses = tsd_witnessesp_get(tsd);
-		ql_remove(witnesses, witness, link);
-	} else
-		witness_assert_owner(tsdn, witness);
-}
-#endif
-
-#endif /* JEMALLOC_H_INLINES */
-/******************************************************************************/
diff --git a/include/jemalloc/internal/witness_externs.h b/include/jemalloc/internal/witness_externs.h
new file mode 100644
index 00000000..dcd987cc
--- /dev/null
+++ b/include/jemalloc/internal/witness_externs.h
@@ -0,0 +1,37 @@
+#ifndef JEMALLOC_INTERNAL_WITNESS_EXTERNS_H
+#define JEMALLOC_INTERNAL_WITNESS_EXTERNS_H
+
+void	witness_init(witness_t *witness, const char *name, witness_rank_t rank,
+    witness_comp_t *comp, void *opaque);
+#ifdef JEMALLOC_JET
+typedef void (witness_lock_error_t)(const witness_list_t *, const witness_t *);
+extern witness_lock_error_t *witness_lock_error;
+#else
+void	witness_lock_error(const witness_list_t *witnesses,
+    const witness_t *witness);
+#endif
+#ifdef JEMALLOC_JET
+typedef void (witness_owner_error_t)(const witness_t *);
+extern witness_owner_error_t *witness_owner_error;
+#else
+void	witness_owner_error(const witness_t *witness);
+#endif
+#ifdef JEMALLOC_JET
+typedef void (witness_not_owner_error_t)(const witness_t *);
+extern witness_not_owner_error_t *witness_not_owner_error;
+#else
+void	witness_not_owner_error(const witness_t *witness);
+#endif
+#ifdef JEMALLOC_JET
+typedef void (witness_lockless_error_t)(const witness_list_t *);
+extern witness_lockless_error_t *witness_lockless_error;
+#else
+void	witness_lockless_error(const witness_list_t *witnesses);
+#endif
+
+void	witnesses_cleanup(tsd_t *tsd);
+void	witness_prefork(tsd_t *tsd);
+void	witness_postfork_parent(tsd_t *tsd);
+void	witness_postfork_child(tsd_t *tsd);
+
+#endif /* JEMALLOC_INTERNAL_WITNESS_EXTERNS_H */
diff --git a/include/jemalloc/internal/witness_inlines.h b/include/jemalloc/internal/witness_inlines.h
new file mode 100644
index 00000000..259aa2e5
--- /dev/null
+++ b/include/jemalloc/internal/witness_inlines.h
@@ -0,0 +1,163 @@
+#ifndef JEMALLOC_INTERNAL_WITNESS_INLINES_H
+#define JEMALLOC_INTERNAL_WITNESS_INLINES_H
+
+#ifndef JEMALLOC_ENABLE_INLINE
+bool	witness_owner(tsd_t *tsd, const witness_t *witness);
+void	witness_assert_owner(tsdn_t *tsdn, const witness_t *witness);
+void	witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness);
+void	witness_assert_lockless(tsdn_t *tsdn);
+void	witness_lock(tsdn_t *tsdn, witness_t *witness);
+void	witness_unlock(tsdn_t *tsdn, witness_t *witness);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_))
+/* Helper, not intended for direct use. */
+JEMALLOC_INLINE bool
+witness_owner(tsd_t *tsd, const witness_t *witness)
+{
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	cassert(config_debug);
+
+	witnesses = tsd_witnessesp_get(tsd);
+	ql_foreach(w, witnesses, link) {
+		if (w == witness)
+			return (true);
+	}
+
+	return (false);
+}
+
+JEMALLOC_INLINE void
+witness_assert_owner(tsdn_t *tsdn, const witness_t *witness)
+{
+	tsd_t *tsd;
+
+	if (!config_debug)
+		return;
+
+	if (tsdn_null(tsdn))
+		return;
+	tsd = tsdn_tsd(tsdn);
+	if (witness->rank == WITNESS_RANK_OMIT)
+		return;
+
+	if (witness_owner(tsd, witness))
+		return;
+	witness_owner_error(witness);
+}
+
+JEMALLOC_INLINE void
+witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness)
+{
+	tsd_t *tsd;
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	if (!config_debug)
+		return;
+
+	if (tsdn_null(tsdn))
+		return;
+	tsd = tsdn_tsd(tsdn);
+	if (witness->rank == WITNESS_RANK_OMIT)
+		return;
+
+	witnesses = tsd_witnessesp_get(tsd);
+	ql_foreach(w, witnesses, link) {
+		if (w == witness)
+			witness_not_owner_error(witness);
+	}
+}
+
+JEMALLOC_INLINE void
+witness_assert_lockless(tsdn_t *tsdn)
+{
+	tsd_t *tsd;
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	if (!config_debug)
+		return;
+
+	if (tsdn_null(tsdn))
+		return;
+	tsd = tsdn_tsd(tsdn);
+
+	witnesses = tsd_witnessesp_get(tsd);
+	w = ql_last(witnesses, link);
+	if (w != NULL)
+		witness_lockless_error(witnesses);
+}
+
+JEMALLOC_INLINE void
+witness_lock(tsdn_t *tsdn, witness_t *witness)
+{
+	tsd_t *tsd;
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	if (!config_debug)
+		return;
+
+	if (tsdn_null(tsdn))
+		return;
+	tsd = tsdn_tsd(tsdn);
+	if (witness->rank == WITNESS_RANK_OMIT)
+		return;
+
+	witness_assert_not_owner(tsdn, witness);
+
+	witnesses = tsd_witnessesp_get(tsd);
+	w = ql_last(witnesses, link);
+	if (w == NULL) {
+		/* No other locks; do nothing. */
+	} else if (tsd_witness_fork_get(tsd) && w->rank <= witness->rank) {
+		/* Forking, and relaxed ranking satisfied. */
+	} else if (w->rank > witness->rank) {
+		/* Not forking, rank order reversal. */
+		witness_lock_error(witnesses, witness);
+	} else if (w->rank == witness->rank && (w->comp == NULL || w->comp !=
+	    witness->comp || w->comp(w, w->opaque, witness, witness->opaque) >
+	    0)) {
+		/*
+		 * Missing/incompatible comparison function, or comparison
+		 * function indicates rank order reversal.
+		 */
+		witness_lock_error(witnesses, witness);
+	}
+
+	ql_elm_new(witness, link);
+	ql_tail_insert(witnesses, witness, link);
+}
+
+JEMALLOC_INLINE void
+witness_unlock(tsdn_t *tsdn, witness_t *witness)
+{
+	tsd_t *tsd;
+	witness_list_t *witnesses;
+
+	if (!config_debug)
+		return;
+
+	if (tsdn_null(tsdn))
+		return;
+	tsd = tsdn_tsd(tsdn);
+	if (witness->rank == WITNESS_RANK_OMIT)
+		return;
+
+	/*
+	 * Check whether owner before removal, rather than relying on
+	 * witness_assert_owner() to abort, so that unit tests can test this
+	 * function's failure mode without causing undefined behavior.
+	 */
+	if (witness_owner(tsd, witness)) {
+		witnesses = tsd_witnessesp_get(tsd);
+		ql_remove(witnesses, witness, link);
+	} else
+		witness_assert_owner(tsdn, witness);
+}
+#endif
+
+#endif /* JEMALLOC_INTERNAL_WITNESS_INLINES_H */
diff --git a/include/jemalloc/internal/witness_structs.h b/include/jemalloc/internal/witness_structs.h
new file mode 100644
index 00000000..95d19706
--- /dev/null
+++ b/include/jemalloc/internal/witness_structs.h
@@ -0,0 +1,28 @@
+#ifndef JEMALLOC_INTERNAL_WITNESS_STRUCTS_H
+#define JEMALLOC_INTERNAL_WITNESS_STRUCTS_H
+
+struct witness_s {
+	/* Name, used for printing lock order reversal messages. */
+	const char		*name;
+
+	/*
+	 * Witness rank, where 0 is lowest and UINT_MAX is highest.  Witnesses
+	 * must be acquired in order of increasing rank.
+	 */
+	witness_rank_t		rank;
+
+	/*
+	 * If two witnesses are of equal rank and they have the samp comp
+	 * function pointer, it is called as a last attempt to differentiate
+	 * between witnesses of equal rank.
+	 */
+	witness_comp_t		*comp;
+
+	/* Opaque data, passed to comp(). */
+	void			*opaque;
+
+	/* Linkage for thread's currently owned locks. */
+	ql_elm(witness_t)	link;
+};
+
+#endif /* JEMALLOC_INTERNAL_WITNESS_STRUCTS_H */
diff --git a/include/jemalloc/internal/witness_types.h b/include/jemalloc/internal/witness_types.h
new file mode 100644
index 00000000..ef962824
--- /dev/null
+++ b/include/jemalloc/internal/witness_types.h
@@ -0,0 +1,46 @@
+#ifndef JEMALLOC_INTERNAL_WITNESS_TYPES_H
+#define JEMALLOC_INTERNAL_WITNESS_TYPES_H
+
+typedef struct witness_s witness_t;
+typedef unsigned witness_rank_t;
+typedef ql_head(witness_t) witness_list_t;
+typedef int witness_comp_t (const witness_t *, void *, const witness_t *,
+    void *);
+
+/*
+ * Lock ranks.  Witnesses with rank WITNESS_RANK_OMIT are completely ignored by
+ * the witness machinery.
+ */
+#define	WITNESS_RANK_OMIT		0U
+
+#define	WITNESS_RANK_INIT		1U
+#define	WITNESS_RANK_CTL		1U
+#define	WITNESS_RANK_ARENAS		2U
+
+#define	WITNESS_RANK_PROF_DUMP		3U
+#define	WITNESS_RANK_PROF_BT2GCTX	4U
+#define	WITNESS_RANK_PROF_TDATAS	5U
+#define	WITNESS_RANK_PROF_TDATA		6U
+#define	WITNESS_RANK_PROF_GCTX		7U
+
+#define	WITNESS_RANK_ARENA		8U
+#define	WITNESS_RANK_ARENA_EXTENTS	9U
+#define	WITNESS_RANK_ARENA_EXTENT_CACHE	10
+
+#define	WITNESS_RANK_RTREE_ELM		11U
+#define	WITNESS_RANK_RTREE		12U
+#define	WITNESS_RANK_BASE		13U
+
+#define	WITNESS_RANK_LEAF		0xffffffffU
+#define	WITNESS_RANK_ARENA_BIN		WITNESS_RANK_LEAF
+#define	WITNESS_RANK_ARENA_LARGE	WITNESS_RANK_LEAF
+#define	WITNESS_RANK_DSS		WITNESS_RANK_LEAF
+#define	WITNESS_RANK_PROF_ACTIVE	WITNESS_RANK_LEAF
+#define	WITNESS_RANK_PROF_DUMP_SEQ	WITNESS_RANK_LEAF
+#define	WITNESS_RANK_PROF_GDUMP		WITNESS_RANK_LEAF
+#define	WITNESS_RANK_PROF_NEXT_THR_UID	WITNESS_RANK_LEAF
+#define	WITNESS_RANK_PROF_THREAD_ACTIVE_INIT	WITNESS_RANK_LEAF
+
+#define	WITNESS_INITIALIZER(name, rank) {name, rank, NULL, NULL, {NULL, NULL}}
+
+#endif /* JEMALLOC_INTERNAL_WITNESS_TYPES_H */
diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in
index 66485c0e..2dd0cdea 100644
--- a/test/include/test/jemalloc_test.h.in
+++ b/test/include/test/jemalloc_test.h.in
@@ -69,18 +69,14 @@ static const bool config_debug =
 #  define JEMALLOC_N(n) @private_namespace@##n
 #  include "jemalloc/internal/private_namespace.h"
 
-#  define JEMALLOC_H_TYPES
-#  define JEMALLOC_H_STRUCTS
-#  define JEMALLOC_H_EXTERNS
-#  define JEMALLOC_H_INLINES
-#  include "jemalloc/internal/nstime.h"
-#  include "jemalloc/internal/util.h"
+#  include "jemalloc/internal/nstime_types.h"
+#  include "jemalloc/internal/nstime_structs.h"
+#  include "jemalloc/internal/nstime_externs.h"
+#  include "jemalloc/internal/util_types.h"
+#  include "jemalloc/internal/util_externs.h"
+#  include "jemalloc/internal/util_inlines.h"
 #  include "jemalloc/internal/qr.h"
 #  include "jemalloc/internal/ql.h"
-#  undef JEMALLOC_H_TYPES
-#  undef JEMALLOC_H_STRUCTS
-#  undef JEMALLOC_H_EXTERNS
-#  undef JEMALLOC_H_INLINES
 
 /******************************************************************************/
 /*

From 9389335b866e5920a3af9c1545c14931c1a9ef1a Mon Sep 17 00:00:00 2001
From: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Date: Wed, 21 Dec 2016 09:38:54 +0100
Subject: [PATCH 0585/2608] Use better pre-processor defines for sparc64

Currently, jemalloc detects sparc64 targets by checking whether
__sparc64__ is defined. However, this definition is used on BSD
targets only. Linux targets define both __sparc__ and __arch64__
for sparc64. Since this also works on BSD, rather use __sparc__
and __arch64__ instead of __sparc64__ to detect sparc64 targets.
---
 include/jemalloc/internal/mb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/mb.h b/include/jemalloc/internal/mb.h
index 5384728f..e58da5c3 100644
--- a/include/jemalloc/internal/mb.h
+++ b/include/jemalloc/internal/mb.h
@@ -76,7 +76,7 @@ mb_write(void)
 	    : "memory" /* Clobbers. */
 	    );
 }
-#elif defined(__sparc64__)
+#elif defined(__sparc__) && defined(__arch64__)
 JEMALLOC_INLINE void
 mb_write(void)
 {

From 87e81e609b5d1bd8821d7256208091c546e62c5a Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 13 Jan 2017 10:34:50 -0800
Subject: [PATCH 0586/2608] Fix indentation.

---
 src/ckh.c            | 6 +++---
 src/jemalloc_cpp.cpp | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/ckh.c b/src/ckh.c
index 6f16565f..6f0f1e4d 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -547,10 +547,10 @@ bool
 ckh_string_keycomp(const void *k1, const void *k2)
 {
 
-    assert(k1 != NULL);
-    assert(k2 != NULL);
+	assert(k1 != NULL);
+	assert(k2 != NULL);
 
-    return (strcmp((char *)k1, (char *)k2) ? false : true);
+	return (strcmp((char *)k1, (char *)k2) ? false : true);
 }
 
 void
diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index 4c5756b3..4d88f993 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -92,7 +92,7 @@ void *
 operator new[](std::size_t size, const std::nothrow_t&) noexcept
 {
 
-  return (newImpl<true>(size));
+	return (newImpl<true>(size));
 }
 
 void

From ffbb7dac3d669697ab8b39367994a58e0c1fa42d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 13 Jan 2017 10:35:35 -0800
Subject: [PATCH 0587/2608] Remove leading blank lines from function bodies.

This resolves #535.
---
 include/jemalloc/internal/arena_inlines_a.h   |  7 ---
 include/jemalloc/internal/arena_inlines_b.h   |  8 ----
 include/jemalloc/internal/atomic_inlines.h    | 42 -----------------
 include/jemalloc/internal/base_inlines.h      |  1 -
 include/jemalloc/internal/extent_inlines.h    | 31 -------------
 include/jemalloc/internal/hash_inlines.h      |  7 ---
 .../jemalloc/internal/jemalloc_internal.h.in  | 25 -----------
 .../internal/jemalloc_internal_decls.h        |  1 -
 include/jemalloc/internal/mutex_inlines.h     |  4 --
 include/jemalloc/internal/ph.h                |  4 --
 include/jemalloc/internal/prng_inlines.h      |  3 --
 include/jemalloc/internal/prof_inlines.h      |  6 ---
 include/jemalloc/internal/rtree_inlines.h     |  5 ---
 include/jemalloc/internal/spin_inlines.h      |  1 -
 include/jemalloc/internal/tcache_inlines.h    |  1 -
 include/jemalloc/internal/ticker_inlines.h    |  5 ---
 include/jemalloc/internal/tsd_inlines.h       | 10 -----
 include/jemalloc/internal/tsd_types.h         | 25 -----------
 include/jemalloc/internal/util_inlines.h      | 13 ------
 include/msvc_compat/strings.h                 |  1 -
 src/arena.c                                   | 45 -------------------
 src/base.c                                    |  9 ----
 src/bitmap.c                                  |  4 --
 src/ckh.c                                     |  5 ---
 src/ctl.c                                     | 13 ------
 src/extent.c                                  | 28 ------------
 src/extent_dss.c                              |  5 ---
 src/extent_mmap.c                             |  1 -
 src/jemalloc.c                                | 27 -----------
 src/jemalloc_cpp.cpp                          | 10 -----
 src/large.c                                   | 11 -----
 src/mutex.c                                   |  6 ---
 src/nstime.c                                  | 14 ------
 src/pages.c                                   |  9 ----
 src/prof.c                                    | 29 ------------
 src/rtree.c                                   |  6 ---
 src/tcache.c                                  |  6 ---
 src/tsd.c                                     |  7 ---
 src/util.c                                    |  5 ---
 src/witness.c                                 |  6 ---
 src/zone.c                                    | 11 -----
 test/include/test/math.h                      |  1 -
 test/include/test/mq.h                        |  1 -
 test/integration/MALLOCX_ARENA.c              |  1 -
 test/integration/aligned_alloc.c              |  2 -
 test/integration/allocated.c                  |  2 -
 test/integration/cpp/basic.cpp                |  1 -
 test/integration/extent.c                     |  1 -
 test/integration/mallocx.c                    |  4 --
 test/integration/overflow.c                   |  1 -
 test/integration/posix_memalign.c             |  2 -
 test/integration/rallocx.c                    |  3 --
 test/integration/sdallocx.c                   |  1 -
 test/integration/thread_arena.c               |  1 -
 test/integration/thread_tcache_enabled.c      |  2 -
 test/integration/xallocx.c                    |  5 ---
 test/src/btalloc.c                            |  1 -
 test/src/mq.c                                 |  1 -
 test/src/mtx.c                                |  4 --
 test/src/test.c                               |  4 --
 test/src/thd.c                                |  3 --
 test/src/timer.c                              |  2 -
 test/stress/microbench.c                      |  6 ---
 test/unit/SFMT.c                              |  1 -
 test/unit/a0.c                                |  1 -
 test/unit/arena_reset.c                       |  9 ----
 test/unit/atomic.c                            |  6 ---
 test/unit/base.c                              |  1 -
 test/unit/bitmap.c                            |  2 -
 test/unit/ckh.c                               |  1 -
 test/unit/decay.c                             |  3 --
 test/unit/extent_quantize.c                   |  1 -
 test/unit/fork.c                              |  1 -
 test/unit/hash.c                              |  6 ---
 test/unit/junk.c                              |  5 ---
 test/unit/mallctl.c                           |  6 ---
 test/unit/math.c                              |  1 -
 test/unit/mq.c                                |  1 -
 test/unit/mtx.c                               |  1 -
 test/unit/nstime.c                            |  2 -
 test/unit/pack.c                              |  1 -
 test/unit/pages.c                             |  1 -
 test/unit/ph.c                                |  2 -
 test/unit/prng.c                              | 11 -----
 test/unit/prof_accum.c                        |  2 -
 test/unit/prof_active.c                       |  6 ---
 test/unit/prof_gdump.c                        |  1 -
 test/unit/prof_idump.c                        |  1 -
 test/unit/prof_reset.c                        |  3 --
 test/unit/prof_thread_name.c                  |  2 -
 test/unit/ql.c                                |  1 -
 test/unit/qr.c                                |  1 -
 test/unit/rb.c                                |  1 -
 test/unit/rtree.c                             |  2 -
 test/unit/size_classes.c                      |  1 -
 test/unit/slab.c                              |  1 -
 test/unit/smoothstep.c                        |  1 -
 test/unit/stats.c                             |  2 -
 test/unit/ticker.c                            |  1 -
 test/unit/tsd.c                               |  2 -
 test/unit/util.c                              |  4 --
 test/unit/witness.c                           |  7 ---
 test/unit/zero.c                              |  3 --
 103 files changed, 611 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_a.h b/include/jemalloc/internal/arena_inlines_a.h
index 743727b0..d241b8a1 100644
--- a/include/jemalloc/internal/arena_inlines_a.h
+++ b/include/jemalloc/internal/arena_inlines_a.h
@@ -16,35 +16,30 @@ bool	arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes);
 JEMALLOC_INLINE unsigned
 arena_ind_get(const arena_t *arena)
 {
-
 	return (base_ind_get(arena->base));
 }
 
 JEMALLOC_INLINE void
 arena_internal_add(arena_t *arena, size_t size)
 {
-
 	atomic_add_zu(&arena->stats.internal, size);
 }
 
 JEMALLOC_INLINE void
 arena_internal_sub(arena_t *arena, size_t size)
 {
-
 	atomic_sub_zu(&arena->stats.internal, size);
 }
 
 JEMALLOC_INLINE size_t
 arena_internal_get(arena_t *arena)
 {
-
 	return (atomic_read_zu(&arena->stats.internal));
 }
 
 JEMALLOC_INLINE bool
 arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes)
 {
-
 	cassert(config_prof);
 	assert(prof_interval != 0);
 
@@ -59,7 +54,6 @@ arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes)
 JEMALLOC_INLINE bool
 arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes)
 {
-
 	cassert(config_prof);
 
 	if (likely(prof_interval == 0))
@@ -70,7 +64,6 @@ arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes)
 JEMALLOC_INLINE bool
 arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes)
 {
-
 	cassert(config_prof);
 
 	if (likely(prof_interval == 0))
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 9068cf4c..94614668 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -33,7 +33,6 @@ arena_bin_index(arena_t *arena, arena_bin_t *bin)
 JEMALLOC_INLINE prof_tctx_t *
 arena_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, const void *ptr)
 {
-
 	cassert(config_prof);
 	assert(ptr != NULL);
 
@@ -46,7 +45,6 @@ JEMALLOC_INLINE void
 arena_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr,
     size_t usize, prof_tctx_t *tctx)
 {
-
 	cassert(config_prof);
 	assert(ptr != NULL);
 
@@ -58,7 +56,6 @@ JEMALLOC_INLINE void
 arena_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr,
     prof_tctx_t *tctx)
 {
-
 	cassert(config_prof);
 	assert(ptr != NULL);
 	assert(!extent_slab_get(extent));
@@ -85,7 +82,6 @@ arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks)
 JEMALLOC_ALWAYS_INLINE void
 arena_decay_tick(tsdn_t *tsdn, arena_t *arena)
 {
-
 	malloc_mutex_assert_not_owner(tsdn, &arena->lock);
 
 	arena_decay_ticks(tsdn, arena, 1);
@@ -95,7 +91,6 @@ JEMALLOC_ALWAYS_INLINE void *
 arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
     tcache_t *tcache, bool slow_path)
 {
-
 	assert(!tsdn_null(tsdn) || tcache == NULL);
 	assert(size != 0);
 
@@ -118,7 +113,6 @@ arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
 JEMALLOC_ALWAYS_INLINE arena_t *
 arena_aalloc(tsdn_t *tsdn, const void *ptr)
 {
-
 	return (extent_arena_get(iealloc(tsdn, ptr)));
 }
 
@@ -142,7 +136,6 @@ JEMALLOC_ALWAYS_INLINE void
 arena_dalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, tcache_t *tcache,
     bool slow_path)
 {
-
 	assert(!tsdn_null(tsdn) || tcache == NULL);
 	assert(ptr != NULL);
 
@@ -176,7 +169,6 @@ JEMALLOC_ALWAYS_INLINE void
 arena_sdalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t size,
     tcache_t *tcache, bool slow_path)
 {
-
 	assert(!tsdn_null(tsdn) || tcache == NULL);
 	assert(ptr != NULL);
 
diff --git a/include/jemalloc/internal/atomic_inlines.h b/include/jemalloc/internal/atomic_inlines.h
index de0ac6ac..89d1b354 100644
--- a/include/jemalloc/internal/atomic_inlines.h
+++ b/include/jemalloc/internal/atomic_inlines.h
@@ -101,7 +101,6 @@ atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s)
 JEMALLOC_INLINE void
 atomic_write_u64(uint64_t *p, uint64_t x)
 {
-
 	asm volatile (
 	    "xchgq %1, %0;" /* Lock is implied by xchgq. */
 	    : "=m" (*p), "+r" (x) /* Outputs. */
@@ -141,7 +140,6 @@ atomic_write_u64(uint64_t *p, uint64_t x)
 JEMALLOC_INLINE uint64_t
 atomic_add_u64(uint64_t *p, uint64_t x)
 {
-
 	/*
 	 * atomic_fetchadd_64() doesn't exist, but we only ever use this
 	 * function on LP64 systems, so atomic_fetchadd_long() will do.
@@ -154,7 +152,6 @@ atomic_add_u64(uint64_t *p, uint64_t x)
 JEMALLOC_INLINE uint64_t
 atomic_sub_u64(uint64_t *p, uint64_t x)
 {
-
 	assert(sizeof(uint64_t) == sizeof(unsigned long));
 
 	return (atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x);
@@ -163,7 +160,6 @@ atomic_sub_u64(uint64_t *p, uint64_t x)
 JEMALLOC_INLINE bool
 atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s)
 {
-
 	assert(sizeof(uint64_t) == sizeof(unsigned long));
 
 	return (!atomic_cmpset_long(p, (unsigned long)c, (unsigned long)s));
@@ -172,7 +168,6 @@ atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s)
 JEMALLOC_INLINE void
 atomic_write_u64(uint64_t *p, uint64_t x)
 {
-
 	assert(sizeof(uint64_t) == sizeof(unsigned long));
 
 	atomic_store_rel_long(p, x);
@@ -181,21 +176,18 @@ atomic_write_u64(uint64_t *p, uint64_t x)
 JEMALLOC_INLINE uint64_t
 atomic_add_u64(uint64_t *p, uint64_t x)
 {
-
 	return (OSAtomicAdd64((int64_t)x, (int64_t *)p));
 }
 
 JEMALLOC_INLINE uint64_t
 atomic_sub_u64(uint64_t *p, uint64_t x)
 {
-
 	return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p));
 }
 
 JEMALLOC_INLINE bool
 atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s)
 {
-
 	return (!OSAtomicCompareAndSwap64(c, s, (int64_t *)p));
 }
 
@@ -213,14 +205,12 @@ atomic_write_u64(uint64_t *p, uint64_t x)
 JEMALLOC_INLINE uint64_t
 atomic_add_u64(uint64_t *p, uint64_t x)
 {
-
 	return (InterlockedExchangeAdd64(p, x) + x);
 }
 
 JEMALLOC_INLINE uint64_t
 atomic_sub_u64(uint64_t *p, uint64_t x)
 {
-
 	return (InterlockedExchangeAdd64(p, -((int64_t)x)) - x);
 }
 
@@ -236,7 +226,6 @@ atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s)
 JEMALLOC_INLINE void
 atomic_write_u64(uint64_t *p, uint64_t x)
 {
-
 	InterlockedExchange64(p, x);
 }
 #  elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || \
@@ -244,28 +233,24 @@ atomic_write_u64(uint64_t *p, uint64_t x)
 JEMALLOC_INLINE uint64_t
 atomic_add_u64(uint64_t *p, uint64_t x)
 {
-
 	return (__sync_add_and_fetch(p, x));
 }
 
 JEMALLOC_INLINE uint64_t
 atomic_sub_u64(uint64_t *p, uint64_t x)
 {
-
 	return (__sync_sub_and_fetch(p, x));
 }
 
 JEMALLOC_INLINE bool
 atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s)
 {
-
 	return (!__sync_bool_compare_and_swap(p, c, s));
 }
 
 JEMALLOC_INLINE void
 atomic_write_u64(uint64_t *p, uint64_t x)
 {
-
 	__sync_lock_test_and_set(p, x);
 }
 #  else
@@ -325,7 +310,6 @@ atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s)
 JEMALLOC_INLINE void
 atomic_write_u32(uint32_t *p, uint32_t x)
 {
-
 	asm volatile (
 	    "xchgl %1, %0;" /* Lock is implied by xchgl. */
 	    : "=m" (*p), "+r" (x) /* Outputs. */
@@ -365,49 +349,42 @@ atomic_write_u32(uint32_t *p, uint32_t x)
 JEMALLOC_INLINE uint32_t
 atomic_add_u32(uint32_t *p, uint32_t x)
 {
-
 	return (atomic_fetchadd_32(p, x) + x);
 }
 
 JEMALLOC_INLINE uint32_t
 atomic_sub_u32(uint32_t *p, uint32_t x)
 {
-
 	return (atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x);
 }
 
 JEMALLOC_INLINE bool
 atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s)
 {
-
 	return (!atomic_cmpset_32(p, c, s));
 }
 
 JEMALLOC_INLINE void
 atomic_write_u32(uint32_t *p, uint32_t x)
 {
-
 	atomic_store_rel_32(p, x);
 }
 #elif (defined(JEMALLOC_OSATOMIC))
 JEMALLOC_INLINE uint32_t
 atomic_add_u32(uint32_t *p, uint32_t x)
 {
-
 	return (OSAtomicAdd32((int32_t)x, (int32_t *)p));
 }
 
 JEMALLOC_INLINE uint32_t
 atomic_sub_u32(uint32_t *p, uint32_t x)
 {
-
 	return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p));
 }
 
 JEMALLOC_INLINE bool
 atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s)
 {
-
 	return (!OSAtomicCompareAndSwap32(c, s, (int32_t *)p));
 }
 
@@ -425,14 +402,12 @@ atomic_write_u32(uint32_t *p, uint32_t x)
 JEMALLOC_INLINE uint32_t
 atomic_add_u32(uint32_t *p, uint32_t x)
 {
-
 	return (InterlockedExchangeAdd(p, x) + x);
 }
 
 JEMALLOC_INLINE uint32_t
 atomic_sub_u32(uint32_t *p, uint32_t x)
 {
-
 	return (InterlockedExchangeAdd(p, -((int32_t)x)) - x);
 }
 
@@ -448,7 +423,6 @@ atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s)
 JEMALLOC_INLINE void
 atomic_write_u32(uint32_t *p, uint32_t x)
 {
-
 	InterlockedExchange(p, x);
 }
 #elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || \
@@ -456,28 +430,24 @@ atomic_write_u32(uint32_t *p, uint32_t x)
 JEMALLOC_INLINE uint32_t
 atomic_add_u32(uint32_t *p, uint32_t x)
 {
-
 	return (__sync_add_and_fetch(p, x));
 }
 
 JEMALLOC_INLINE uint32_t
 atomic_sub_u32(uint32_t *p, uint32_t x)
 {
-
 	return (__sync_sub_and_fetch(p, x));
 }
 
 JEMALLOC_INLINE bool
 atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s)
 {
-
 	return (!__sync_bool_compare_and_swap(p, c, s));
 }
 
 JEMALLOC_INLINE void
 atomic_write_u32(uint32_t *p, uint32_t x)
 {
-
 	__sync_lock_test_and_set(p, x);
 }
 #else
@@ -489,7 +459,6 @@ atomic_write_u32(uint32_t *p, uint32_t x)
 JEMALLOC_INLINE void *
 atomic_add_p(void **p, void *x)
 {
-
 #if (LG_SIZEOF_PTR == 3)
 	return ((void *)atomic_add_u64((uint64_t *)p, (uint64_t)x));
 #elif (LG_SIZEOF_PTR == 2)
@@ -500,7 +469,6 @@ atomic_add_p(void **p, void *x)
 JEMALLOC_INLINE void *
 atomic_sub_p(void **p, void *x)
 {
-
 #if (LG_SIZEOF_PTR == 3)
 	return ((void *)atomic_add_u64((uint64_t *)p, (uint64_t)-((int64_t)x)));
 #elif (LG_SIZEOF_PTR == 2)
@@ -511,7 +479,6 @@ atomic_sub_p(void **p, void *x)
 JEMALLOC_INLINE bool
 atomic_cas_p(void **p, void *c, void *s)
 {
-
 #if (LG_SIZEOF_PTR == 3)
 	return (atomic_cas_u64((uint64_t *)p, (uint64_t)c, (uint64_t)s));
 #elif (LG_SIZEOF_PTR == 2)
@@ -522,7 +489,6 @@ atomic_cas_p(void **p, void *c, void *s)
 JEMALLOC_INLINE void
 atomic_write_p(void **p, const void *x)
 {
-
 #if (LG_SIZEOF_PTR == 3)
 	atomic_write_u64((uint64_t *)p, (uint64_t)x);
 #elif (LG_SIZEOF_PTR == 2)
@@ -535,7 +501,6 @@ atomic_write_p(void **p, const void *x)
 JEMALLOC_INLINE size_t
 atomic_add_zu(size_t *p, size_t x)
 {
-
 #if (LG_SIZEOF_PTR == 3)
 	return ((size_t)atomic_add_u64((uint64_t *)p, (uint64_t)x));
 #elif (LG_SIZEOF_PTR == 2)
@@ -546,7 +511,6 @@ atomic_add_zu(size_t *p, size_t x)
 JEMALLOC_INLINE size_t
 atomic_sub_zu(size_t *p, size_t x)
 {
-
 #if (LG_SIZEOF_PTR == 3)
 	return ((size_t)atomic_add_u64((uint64_t *)p, (uint64_t)-((int64_t)x)));
 #elif (LG_SIZEOF_PTR == 2)
@@ -557,7 +521,6 @@ atomic_sub_zu(size_t *p, size_t x)
 JEMALLOC_INLINE bool
 atomic_cas_zu(size_t *p, size_t c, size_t s)
 {
-
 #if (LG_SIZEOF_PTR == 3)
 	return (atomic_cas_u64((uint64_t *)p, (uint64_t)c, (uint64_t)s));
 #elif (LG_SIZEOF_PTR == 2)
@@ -568,7 +531,6 @@ atomic_cas_zu(size_t *p, size_t c, size_t s)
 JEMALLOC_INLINE void
 atomic_write_zu(size_t *p, size_t x)
 {
-
 #if (LG_SIZEOF_PTR == 3)
 	atomic_write_u64((uint64_t *)p, (uint64_t)x);
 #elif (LG_SIZEOF_PTR == 2)
@@ -581,7 +543,6 @@ atomic_write_zu(size_t *p, size_t x)
 JEMALLOC_INLINE unsigned
 atomic_add_u(unsigned *p, unsigned x)
 {
-
 #if (LG_SIZEOF_INT == 3)
 	return ((unsigned)atomic_add_u64((uint64_t *)p, (uint64_t)x));
 #elif (LG_SIZEOF_INT == 2)
@@ -592,7 +553,6 @@ atomic_add_u(unsigned *p, unsigned x)
 JEMALLOC_INLINE unsigned
 atomic_sub_u(unsigned *p, unsigned x)
 {
-
 #if (LG_SIZEOF_INT == 3)
 	return ((unsigned)atomic_add_u64((uint64_t *)p,
 	    (uint64_t)-((int64_t)x)));
@@ -605,7 +565,6 @@ atomic_sub_u(unsigned *p, unsigned x)
 JEMALLOC_INLINE bool
 atomic_cas_u(unsigned *p, unsigned c, unsigned s)
 {
-
 #if (LG_SIZEOF_INT == 3)
 	return (atomic_cas_u64((uint64_t *)p, (uint64_t)c, (uint64_t)s));
 #elif (LG_SIZEOF_INT == 2)
@@ -616,7 +575,6 @@ atomic_cas_u(unsigned *p, unsigned c, unsigned s)
 JEMALLOC_INLINE void
 atomic_write_u(unsigned *p, unsigned x)
 {
-
 #if (LG_SIZEOF_INT == 3)
 	atomic_write_u64((uint64_t *)p, (uint64_t)x);
 #elif (LG_SIZEOF_INT == 2)
diff --git a/include/jemalloc/internal/base_inlines.h b/include/jemalloc/internal/base_inlines.h
index f882bcde..63547d65 100644
--- a/include/jemalloc/internal/base_inlines.h
+++ b/include/jemalloc/internal/base_inlines.h
@@ -9,7 +9,6 @@ unsigned	base_ind_get(const base_t *base);
 JEMALLOC_INLINE unsigned
 base_ind_get(const base_t *base)
 {
-
 	return (base->ind);
 }
 #endif
diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index e48af92f..87e0bcd0 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -55,14 +55,12 @@ extent_lookup(tsdn_t *tsdn, const void *ptr, bool dependent)
 JEMALLOC_INLINE arena_t *
 extent_arena_get(const extent_t *extent)
 {
-
 	return (extent->e_arena);
 }
 
 JEMALLOC_INLINE void *
 extent_base_get(const extent_t *extent)
 {
-
 	assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) ||
 	    !extent->e_slab);
 	return (PAGE_ADDR2BASE(extent->e_addr));
@@ -71,7 +69,6 @@ extent_base_get(const extent_t *extent)
 JEMALLOC_INLINE void *
 extent_addr_get(const extent_t *extent)
 {
-
 	assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) ||
 	    !extent->e_slab);
 	return (extent->e_addr);
@@ -80,14 +77,12 @@ extent_addr_get(const extent_t *extent)
 JEMALLOC_INLINE size_t
 extent_size_get(const extent_t *extent)
 {
-
 	return (extent->e_size);
 }
 
 JEMALLOC_INLINE size_t
 extent_usize_get(const extent_t *extent)
 {
-
 	assert(!extent->e_slab);
 	return (extent->e_usize);
 }
@@ -95,14 +90,12 @@ extent_usize_get(const extent_t *extent)
 JEMALLOC_INLINE void *
 extent_before_get(const extent_t *extent)
 {
-
 	return ((void *)((uintptr_t)extent_base_get(extent) - PAGE));
 }
 
 JEMALLOC_INLINE void *
 extent_last_get(const extent_t *extent)
 {
-
 	return ((void *)((uintptr_t)extent_base_get(extent) +
 	    extent_size_get(extent) - PAGE));
 }
@@ -110,7 +103,6 @@ extent_last_get(const extent_t *extent)
 JEMALLOC_INLINE void *
 extent_past_get(const extent_t *extent)
 {
-
 	return ((void *)((uintptr_t)extent_base_get(extent) +
 	    extent_size_get(extent)));
 }
@@ -118,49 +110,42 @@ extent_past_get(const extent_t *extent)
 JEMALLOC_INLINE size_t
 extent_sn_get(const extent_t *extent)
 {
-
 	return (extent->e_sn);
 }
 
 JEMALLOC_INLINE bool
 extent_active_get(const extent_t *extent)
 {
-
 	return (extent->e_active);
 }
 
 JEMALLOC_INLINE bool
 extent_retained_get(const extent_t *extent)
 {
-
 	return (qr_next(extent, qr_link) == extent);
 }
 
 JEMALLOC_INLINE bool
 extent_zeroed_get(const extent_t *extent)
 {
-
 	return (extent->e_zeroed);
 }
 
 JEMALLOC_INLINE bool
 extent_committed_get(const extent_t *extent)
 {
-
 	return (extent->e_committed);
 }
 
 JEMALLOC_INLINE bool
 extent_slab_get(const extent_t *extent)
 {
-
 	return (extent->e_slab);
 }
 
 JEMALLOC_INLINE arena_slab_data_t *
 extent_slab_data_get(extent_t *extent)
 {
-
 	assert(extent->e_slab);
 	return (&extent->e_slab_data);
 }
@@ -168,7 +153,6 @@ extent_slab_data_get(extent_t *extent)
 JEMALLOC_INLINE const arena_slab_data_t *
 extent_slab_data_get_const(const extent_t *extent)
 {
-
 	assert(extent->e_slab);
 	return (&extent->e_slab_data);
 }
@@ -176,7 +160,6 @@ extent_slab_data_get_const(const extent_t *extent)
 JEMALLOC_INLINE prof_tctx_t *
 extent_prof_tctx_get(const extent_t *extent)
 {
-
 	return ((prof_tctx_t *)atomic_read_p(
 	    &((extent_t *)extent)->e_prof_tctx_pun));
 }
@@ -184,21 +167,18 @@ extent_prof_tctx_get(const extent_t *extent)
 JEMALLOC_INLINE void
 extent_arena_set(extent_t *extent, arena_t *arena)
 {
-
 	extent->e_arena = arena;
 }
 
 JEMALLOC_INLINE void
 extent_addr_set(extent_t *extent, void *addr)
 {
-
 	extent->e_addr = addr;
 }
 
 JEMALLOC_INLINE void
 extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment)
 {
-
 	assert(extent_base_get(extent) == extent_addr_get(extent));
 
 	if (alignment < PAGE) {
@@ -219,56 +199,48 @@ extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment)
 JEMALLOC_INLINE void
 extent_size_set(extent_t *extent, size_t size)
 {
-
 	extent->e_size = size;
 }
 
 JEMALLOC_INLINE void
 extent_usize_set(extent_t *extent, size_t usize)
 {
-
 	extent->e_usize = usize;
 }
 
 JEMALLOC_INLINE void
 extent_sn_set(extent_t *extent, size_t sn)
 {
-
 	extent->e_sn = sn;
 }
 
 JEMALLOC_INLINE void
 extent_active_set(extent_t *extent, bool active)
 {
-
 	extent->e_active = active;
 }
 
 JEMALLOC_INLINE void
 extent_zeroed_set(extent_t *extent, bool zeroed)
 {
-
 	extent->e_zeroed = zeroed;
 }
 
 JEMALLOC_INLINE void
 extent_committed_set(extent_t *extent, bool committed)
 {
-
 	extent->e_committed = committed;
 }
 
 JEMALLOC_INLINE void
 extent_slab_set(extent_t *extent, bool slab)
 {
-
 	extent->e_slab = slab;
 }
 
 JEMALLOC_INLINE void
 extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx)
 {
-
 	atomic_write_p(&extent->e_prof_tctx_pun, tctx);
 }
 
@@ -277,7 +249,6 @@ extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
     size_t usize, size_t sn, bool active, bool zeroed, bool committed,
     bool slab)
 {
-
 	assert(addr == PAGE_ADDR2BASE(addr) || !slab);
 
 	extent_arena_set(extent, arena);
@@ -297,14 +268,12 @@ extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
 JEMALLOC_INLINE void
 extent_ring_insert(extent_t *sentinel, extent_t *extent)
 {
-
 	qr_meld(sentinel, extent, extent_t, qr_link);
 }
 
 JEMALLOC_INLINE void
 extent_ring_remove(extent_t *extent)
 {
-
 	qr_remove(extent, qr_link);
 }
 
diff --git a/include/jemalloc/internal/hash_inlines.h b/include/jemalloc/internal/hash_inlines.h
index 0340418e..4bb78505 100644
--- a/include/jemalloc/internal/hash_inlines.h
+++ b/include/jemalloc/internal/hash_inlines.h
@@ -23,21 +23,18 @@ void	hash(const void *key, size_t len, const uint32_t seed,
 JEMALLOC_INLINE uint32_t
 hash_rotl_32(uint32_t x, int8_t r)
 {
-
 	return ((x << r) | (x >> (32 - r)));
 }
 
 JEMALLOC_INLINE uint64_t
 hash_rotl_64(uint64_t x, int8_t r)
 {
-
 	return ((x << r) | (x >> (64 - r)));
 }
 
 JEMALLOC_INLINE uint32_t
 hash_get_block_32(const uint32_t *p, int i)
 {
-
 	/* Handle unaligned read. */
 	if (unlikely((uintptr_t)p & (sizeof(uint32_t)-1)) != 0) {
 		uint32_t ret;
@@ -52,7 +49,6 @@ hash_get_block_32(const uint32_t *p, int i)
 JEMALLOC_INLINE uint64_t
 hash_get_block_64(const uint64_t *p, int i)
 {
-
 	/* Handle unaligned read. */
 	if (unlikely((uintptr_t)p & (sizeof(uint64_t)-1)) != 0) {
 		uint64_t ret;
@@ -67,7 +63,6 @@ hash_get_block_64(const uint64_t *p, int i)
 JEMALLOC_INLINE uint32_t
 hash_fmix_32(uint32_t h)
 {
-
 	h ^= h >> 16;
 	h *= 0x85ebca6b;
 	h ^= h >> 13;
@@ -80,7 +75,6 @@ hash_fmix_32(uint32_t h)
 JEMALLOC_INLINE uint64_t
 hash_fmix_64(uint64_t k)
 {
-
 	k ^= k >> 33;
 	k *= KQU(0xff51afd7ed558ccd);
 	k ^= k >> 33;
@@ -326,7 +320,6 @@ hash_x64_128(const void *key, const int len, const uint32_t seed,
 JEMALLOC_INLINE void
 hash(const void *key, size_t len, const uint32_t seed, size_t r_hash[2])
 {
-
 	assert(len <= INT_MAX); /* Unfortunate implementation limitation. */
 
 #if (LG_SIZEOF_PTR == 3 && !defined(JEMALLOC_BIG_ENDIAN))
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index dfbb4b6d..00dce68d 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -553,7 +553,6 @@ ticker_t	*decay_ticker_get(tsd_t *tsd, unsigned ind);
 JEMALLOC_ALWAYS_INLINE pszind_t
 psz2ind(size_t psz)
 {
-
 	if (unlikely(psz > LARGE_MAXCLASS))
 		return (NPSIZES);
 	{
@@ -577,7 +576,6 @@ psz2ind(size_t psz)
 JEMALLOC_INLINE size_t
 pind2sz_compute(pszind_t pind)
 {
-
 	if (unlikely(pind == NPSIZES))
 		return (LARGE_MAXCLASS + PAGE);
 	{
@@ -608,7 +606,6 @@ pind2sz_lookup(pszind_t pind)
 JEMALLOC_INLINE size_t
 pind2sz(pszind_t pind)
 {
-
 	assert(pind < NPSIZES+1);
 	return (pind2sz_lookup(pind));
 }
@@ -616,7 +613,6 @@ pind2sz(pszind_t pind)
 JEMALLOC_INLINE size_t
 psz2u(size_t psz)
 {
-
 	if (unlikely(psz > LARGE_MAXCLASS))
 		return (LARGE_MAXCLASS + PAGE);
 	{
@@ -633,7 +629,6 @@ psz2u(size_t psz)
 JEMALLOC_INLINE szind_t
 size2index_compute(size_t size)
 {
-
 	if (unlikely(size > LARGE_MAXCLASS))
 		return (NSIZES);
 #if (NTBINS != 0)
@@ -664,7 +659,6 @@ size2index_compute(size_t size)
 JEMALLOC_ALWAYS_INLINE szind_t
 size2index_lookup(size_t size)
 {
-
 	assert(size <= LOOKUP_MAXCLASS);
 	{
 		szind_t ret = (size2index_tab[(size-1) >> LG_TINY_MIN]);
@@ -676,7 +670,6 @@ size2index_lookup(size_t size)
 JEMALLOC_ALWAYS_INLINE szind_t
 size2index(size_t size)
 {
-
 	assert(size > 0);
 	if (likely(size <= LOOKUP_MAXCLASS))
 		return (size2index_lookup(size));
@@ -686,7 +679,6 @@ size2index(size_t size)
 JEMALLOC_INLINE size_t
 index2size_compute(szind_t index)
 {
-
 #if (NTBINS > 0)
 	if (index < NTBINS)
 		return (ZU(1) << (LG_TINY_MAXCLASS - NTBINS + 1 + index));
@@ -721,7 +713,6 @@ index2size_lookup(szind_t index)
 JEMALLOC_ALWAYS_INLINE size_t
 index2size(szind_t index)
 {
-
 	assert(index < NSIZES);
 	return (index2size_lookup(index));
 }
@@ -729,7 +720,6 @@ index2size(szind_t index)
 JEMALLOC_ALWAYS_INLINE size_t
 s2u_compute(size_t size)
 {
-
 	if (unlikely(size > LARGE_MAXCLASS))
 		return (0);
 #if (NTBINS > 0)
@@ -767,7 +757,6 @@ s2u_lookup(size_t size)
 JEMALLOC_ALWAYS_INLINE size_t
 s2u(size_t size)
 {
-
 	assert(size > 0);
 	if (likely(size <= LOOKUP_MAXCLASS))
 		return (s2u_lookup(size));
@@ -852,14 +841,12 @@ arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal)
 JEMALLOC_INLINE arena_t *
 arena_choose(tsd_t *tsd, arena_t *arena)
 {
-
 	return (arena_choose_impl(tsd, arena, false));
 }
 
 JEMALLOC_INLINE arena_t *
 arena_ichoose(tsd_t *tsd, arena_t *arena)
 {
-
 	return (arena_choose_impl(tsd, arena, true));
 }
 
@@ -933,7 +920,6 @@ extent_t	*iealloc(tsdn_t *tsdn, const void *ptr);
 JEMALLOC_ALWAYS_INLINE extent_t *
 iealloc(tsdn_t *tsdn, const void *ptr)
 {
-
 	return (extent_lookup(tsdn, ptr, true));
 }
 #endif
@@ -975,7 +961,6 @@ bool	ixalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize,
 JEMALLOC_ALWAYS_INLINE arena_t *
 iaalloc(tsdn_t *tsdn, const void *ptr)
 {
-
 	assert(ptr != NULL);
 
 	return (arena_aalloc(tsdn, ptr));
@@ -991,7 +976,6 @@ iaalloc(tsdn_t *tsdn, const void *ptr)
 JEMALLOC_ALWAYS_INLINE size_t
 isalloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr)
 {
-
 	assert(ptr != NULL);
 
 	return (arena_salloc(tsdn, extent, ptr));
@@ -1019,7 +1003,6 @@ iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
 JEMALLOC_ALWAYS_INLINE void *
 ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path)
 {
-
 	return (iallocztm(tsd_tsdn(tsd), size, ind, zero, tcache_get(tsd, true),
 	    false, NULL, slow_path));
 }
@@ -1049,14 +1032,12 @@ JEMALLOC_ALWAYS_INLINE void *
 ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, arena_t *arena)
 {
-
 	return (ipallocztm(tsdn, usize, alignment, zero, tcache, false, arena));
 }
 
 JEMALLOC_ALWAYS_INLINE void *
 ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero)
 {
-
 	return (ipallocztm(tsd_tsdn(tsd), usize, alignment, zero,
 	    tcache_get(tsd, true), false, NULL));
 }
@@ -1088,7 +1069,6 @@ JEMALLOC_ALWAYS_INLINE void
 idalloctm(tsdn_t *tsdn, extent_t *extent, void *ptr, tcache_t *tcache,
     bool is_internal, bool slow_path)
 {
-
 	assert(ptr != NULL);
 	assert(!is_internal || tcache == NULL);
 	assert(!is_internal || arena_ind_get(iaalloc(tsdn, ptr)) <
@@ -1104,7 +1084,6 @@ idalloctm(tsdn_t *tsdn, extent_t *extent, void *ptr, tcache_t *tcache,
 JEMALLOC_ALWAYS_INLINE void
 idalloc(tsd_t *tsd, extent_t *extent, void *ptr)
 {
-
 	idalloctm(tsd_tsdn(tsd), extent, ptr, tcache_get(tsd, false), false,
 	    true);
 }
@@ -1113,7 +1092,6 @@ JEMALLOC_ALWAYS_INLINE void
 isdalloct(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t size,
     tcache_t *tcache, bool slow_path)
 {
-
 	arena_sdalloc(tsdn, extent, ptr, size, tcache, slow_path);
 }
 
@@ -1154,7 +1132,6 @@ JEMALLOC_ALWAYS_INLINE void *
 iralloct(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize, size_t size,
     size_t alignment, bool zero, tcache_t *tcache, arena_t *arena)
 {
-
 	assert(ptr != NULL);
 	assert(size != 0);
 
@@ -1176,7 +1153,6 @@ JEMALLOC_ALWAYS_INLINE void *
 iralloc(tsd_t *tsd, extent_t *extent, void *ptr, size_t oldsize, size_t size,
     size_t alignment, bool zero)
 {
-
 	return (iralloct(tsd_tsdn(tsd), extent, ptr, oldsize, size, alignment,
 	    zero, tcache_get(tsd, true), NULL));
 }
@@ -1185,7 +1161,6 @@ JEMALLOC_ALWAYS_INLINE bool
 ixalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize, size_t size,
     size_t extra, size_t alignment, bool zero)
 {
-
 	assert(ptr != NULL);
 	assert(size != 0);
 
diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
index c907d910..277027f0 100644
--- a/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -63,7 +63,6 @@ typedef intptr_t ssize_t;
 static int
 isblank(int c)
 {
-
 	return (c == '\t' || c == ' ');
 }
 #endif
diff --git a/include/jemalloc/internal/mutex_inlines.h b/include/jemalloc/internal/mutex_inlines.h
index b769f0ca..d65fa13c 100644
--- a/include/jemalloc/internal/mutex_inlines.h
+++ b/include/jemalloc/internal/mutex_inlines.h
@@ -12,7 +12,6 @@ void	malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex);
 JEMALLOC_INLINE void
 malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
-
 	if (isthreaded) {
 		witness_assert_not_owner(tsdn, &mutex->witness);
 #ifdef _WIN32
@@ -35,7 +34,6 @@ malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex)
 JEMALLOC_INLINE void
 malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
-
 	if (isthreaded) {
 		witness_unlock(tsdn, &mutex->witness);
 #ifdef _WIN32
@@ -57,7 +55,6 @@ malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex)
 JEMALLOC_INLINE void
 malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
-
 	if (isthreaded)
 		witness_assert_owner(tsdn, &mutex->witness);
 }
@@ -65,7 +62,6 @@ malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex)
 JEMALLOC_INLINE void
 malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
-
 	if (isthreaded)
 		witness_assert_not_owner(tsdn, &mutex->witness);
 }
diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index 4f91c333..9efb7b74 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -207,19 +207,16 @@ a_attr void	a_prefix##remove(a_ph_type *ph, a_type *phn);
 a_attr void								\
 a_prefix##new(a_ph_type *ph)						\
 {									\
-									\
 	memset(ph, 0, sizeof(ph(a_type)));				\
 }									\
 a_attr bool								\
 a_prefix##empty(a_ph_type *ph)						\
 {									\
-									\
 	return (ph->ph_root == NULL);					\
 }									\
 a_attr a_type *								\
 a_prefix##first(a_ph_type *ph)						\
 {									\
-									\
 	if (ph->ph_root == NULL)					\
 		return (NULL);						\
 	ph_merge_aux(a_type, a_field, ph, a_cmp);			\
@@ -228,7 +225,6 @@ a_prefix##first(a_ph_type *ph)						\
 a_attr void								\
 a_prefix##insert(a_ph_type *ph, a_type *phn)				\
 {									\
-									\
 	memset(&phn->a_field, 0, sizeof(phn(a_type)));			\
 									\
 	/*								\
diff --git a/include/jemalloc/internal/prng_inlines.h b/include/jemalloc/internal/prng_inlines.h
index b82a6620..8cc19ce8 100644
--- a/include/jemalloc/internal/prng_inlines.h
+++ b/include/jemalloc/internal/prng_inlines.h
@@ -20,21 +20,18 @@ size_t	prng_range_zu(size_t *state, size_t range, bool atomic);
 JEMALLOC_ALWAYS_INLINE uint32_t
 prng_state_next_u32(uint32_t state)
 {
-
 	return ((state * PRNG_A_32) + PRNG_C_32);
 }
 
 JEMALLOC_ALWAYS_INLINE uint64_t
 prng_state_next_u64(uint64_t state)
 {
-
 	return ((state * PRNG_A_64) + PRNG_C_64);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
 prng_state_next_zu(size_t state)
 {
-
 #if LG_SIZEOF_PTR == 2
 	return ((state * PRNG_A_32) + PRNG_C_32);
 #elif LG_SIZEOF_PTR == 3
diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h
index 0b580425..a1ea7a32 100644
--- a/include/jemalloc/internal/prof_inlines.h
+++ b/include/jemalloc/internal/prof_inlines.h
@@ -29,7 +29,6 @@ void	prof_free(tsd_t *tsd, const extent_t *extent, const void *ptr,
 JEMALLOC_ALWAYS_INLINE bool
 prof_active_get_unlocked(void)
 {
-
 	/*
 	 * Even if opt_prof is true, sampling can be temporarily disabled by
 	 * setting prof_active to false.  No locking is used when reading
@@ -42,7 +41,6 @@ prof_active_get_unlocked(void)
 JEMALLOC_ALWAYS_INLINE bool
 prof_gdump_get_unlocked(void)
 {
-
 	/*
 	 * No locking is used when reading prof_gdump_val in the fast path, so
 	 * there are no guarantees regarding how long it will take for all
@@ -78,7 +76,6 @@ prof_tdata_get(tsd_t *tsd, bool create)
 JEMALLOC_ALWAYS_INLINE prof_tctx_t *
 prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, const void *ptr)
 {
-
 	cassert(config_prof);
 	assert(ptr != NULL);
 
@@ -89,7 +86,6 @@ JEMALLOC_ALWAYS_INLINE void
 prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr, size_t usize,
     prof_tctx_t *tctx)
 {
-
 	cassert(config_prof);
 	assert(ptr != NULL);
 
@@ -100,7 +96,6 @@ JEMALLOC_ALWAYS_INLINE void
 prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr,
     prof_tctx_t *tctx)
 {
-
 	cassert(config_prof);
 	assert(ptr != NULL);
 
@@ -162,7 +157,6 @@ JEMALLOC_ALWAYS_INLINE void
 prof_malloc(tsdn_t *tsdn, extent_t *extent, const void *ptr, size_t usize,
     prof_tctx_t *tctx)
 {
-
 	cassert(config_prof);
 	assert(ptr != NULL);
 	assert(usize == isalloc(tsdn, extent, ptr));
diff --git a/include/jemalloc/internal/rtree_inlines.h b/include/jemalloc/internal/rtree_inlines.h
index 7efba54d..7e79a6a0 100644
--- a/include/jemalloc/internal/rtree_inlines.h
+++ b/include/jemalloc/internal/rtree_inlines.h
@@ -74,7 +74,6 @@ rtree_ctx_start_level(const rtree_t *rtree, const rtree_ctx_t *rtree_ctx,
 JEMALLOC_ALWAYS_INLINE uintptr_t
 rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level)
 {
-
 	return ((key >> ((ZU(1) << (LG_SIZEOF_PTR+3)) -
 	    rtree->levels[level].cumbits)) & ((ZU(1) <<
 	    rtree->levels[level].bits) - 1));
@@ -83,7 +82,6 @@ rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level)
 JEMALLOC_ALWAYS_INLINE bool
 rtree_node_valid(rtree_elm_t *node)
 {
-
 	return ((uintptr_t)node != (uintptr_t)0);
 }
 
@@ -144,7 +142,6 @@ rtree_elm_read(rtree_elm_t *elm, bool dependent)
 JEMALLOC_INLINE void
 rtree_elm_write(rtree_elm_t *elm, const extent_t *extent)
 {
-
 	atomic_write_p(&elm->pun, extent);
 }
 
@@ -408,7 +405,6 @@ JEMALLOC_INLINE void
 rtree_elm_write_acquired(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm,
     const extent_t *extent)
 {
-
 	assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0);
 	assert(((uintptr_t)elm->pun & (uintptr_t)0x1) == (uintptr_t)0x1);
 
@@ -422,7 +418,6 @@ rtree_elm_write_acquired(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm,
 JEMALLOC_INLINE void
 rtree_elm_release(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm)
 {
-
 	rtree_elm_write(elm, rtree_elm_read_acquired(tsdn, rtree, elm));
 	if (config_debug)
 		rtree_elm_witness_release(tsdn, rtree, elm);
diff --git a/include/jemalloc/internal/spin_inlines.h b/include/jemalloc/internal/spin_inlines.h
index b10f67e7..b4e779f8 100644
--- a/include/jemalloc/internal/spin_inlines.h
+++ b/include/jemalloc/internal/spin_inlines.h
@@ -10,7 +10,6 @@ void	spin_adaptive(spin_t *spin);
 JEMALLOC_INLINE void
 spin_init(spin_t *spin)
 {
-
 	spin->iteration = 0;
 }
 
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index e522d9e6..2762b0e2 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -88,7 +88,6 @@ tcache_get(tsd_t *tsd, bool create)
 JEMALLOC_ALWAYS_INLINE void
 tcache_event(tsd_t *tsd, tcache_t *tcache)
 {
-
 	if (TCACHE_GC_INCR == 0)
 		return;
 
diff --git a/include/jemalloc/internal/ticker_inlines.h b/include/jemalloc/internal/ticker_inlines.h
index 42f37eb2..1a4395f3 100644
--- a/include/jemalloc/internal/ticker_inlines.h
+++ b/include/jemalloc/internal/ticker_inlines.h
@@ -13,7 +13,6 @@ bool	ticker_tick(ticker_t *ticker);
 JEMALLOC_INLINE void
 ticker_init(ticker_t *ticker, int32_t nticks)
 {
-
 	ticker->tick = nticks;
 	ticker->nticks = nticks;
 }
@@ -21,21 +20,18 @@ ticker_init(ticker_t *ticker, int32_t nticks)
 JEMALLOC_INLINE void
 ticker_copy(ticker_t *ticker, const ticker_t *other)
 {
-
 	*ticker = *other;
 }
 
 JEMALLOC_INLINE int32_t
 ticker_read(const ticker_t *ticker)
 {
-
 	return (ticker->tick);
 }
 
 JEMALLOC_INLINE bool
 ticker_ticks(ticker_t *ticker, int32_t nticks)
 {
-
 	if (unlikely(ticker->tick < nticks)) {
 		ticker->tick = ticker->nticks;
 		return (true);
@@ -47,7 +43,6 @@ ticker_ticks(ticker_t *ticker, int32_t nticks)
 JEMALLOC_INLINE bool
 ticker_tick(ticker_t *ticker)
 {
-
 	return (ticker_ticks(ticker, 1));
 }
 #endif
diff --git a/include/jemalloc/internal/tsd_inlines.h b/include/jemalloc/internal/tsd_inlines.h
index ad915d1a..0df21ad6 100644
--- a/include/jemalloc/internal/tsd_inlines.h
+++ b/include/jemalloc/internal/tsd_inlines.h
@@ -51,21 +51,18 @@ tsd_fetch_impl(bool init)
 JEMALLOC_ALWAYS_INLINE tsd_t *
 tsd_fetch(void)
 {
-
 	return (tsd_fetch_impl(true));
 }
 
 JEMALLOC_ALWAYS_INLINE tsdn_t *
 tsd_tsdn(tsd_t *tsd)
 {
-
 	return ((tsdn_t *)tsd);
 }
 
 JEMALLOC_INLINE bool
 tsd_nominal(tsd_t *tsd)
 {
-
 	return (tsd->state == tsd_state_nominal);
 }
 
@@ -73,21 +70,18 @@ tsd_nominal(tsd_t *tsd)
 JEMALLOC_ALWAYS_INLINE t *						\
 tsd_##n##p_get(tsd_t *tsd)						\
 {									\
-									\
 	return (&tsd->n);						\
 }									\
 									\
 JEMALLOC_ALWAYS_INLINE t						\
 tsd_##n##_get(tsd_t *tsd)						\
 {									\
-									\
 	return (*tsd_##n##p_get(tsd));					\
 }									\
 									\
 JEMALLOC_ALWAYS_INLINE void						\
 tsd_##n##_set(tsd_t *tsd, t n)						\
 {									\
-									\
 	assert(tsd->state == tsd_state_nominal);			\
 	tsd->n = n;							\
 }
@@ -97,7 +91,6 @@ MALLOC_TSD
 JEMALLOC_ALWAYS_INLINE tsdn_t *
 tsdn_fetch(void)
 {
-
 	if (!tsd_booted_get())
 		return (NULL);
 
@@ -107,14 +100,12 @@ tsdn_fetch(void)
 JEMALLOC_ALWAYS_INLINE bool
 tsdn_null(const tsdn_t *tsdn)
 {
-
 	return (tsdn == NULL);
 }
 
 JEMALLOC_ALWAYS_INLINE tsd_t *
 tsdn_tsd(tsdn_t *tsdn)
 {
-
 	assert(!tsdn_null(tsdn));
 
 	return (&tsdn->tsd);
@@ -123,7 +114,6 @@ tsdn_tsd(tsdn_t *tsdn)
 JEMALLOC_ALWAYS_INLINE rtree_ctx_t *
 tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback)
 {
-
 	/*
 	 * If tsd cannot be accessed, initialize the fallback rtree_ctx and
 	 * return a pointer to it.
diff --git a/include/jemalloc/internal/tsd_types.h b/include/jemalloc/internal/tsd_types.h
index b48eaeca..17e3da9f 100644
--- a/include/jemalloc/internal/tsd_types.h
+++ b/include/jemalloc/internal/tsd_types.h
@@ -177,7 +177,6 @@ a_attr bool		a_name##tsd_booted = false;
 a_attr bool								\
 a_name##tsd_cleanup_wrapper(void)					\
 {									\
-									\
 	if (a_name##tsd_initialized) {					\
 		a_name##tsd_initialized = false;			\
 		a_cleanup(&a_name##tsd_tls);				\
@@ -187,7 +186,6 @@ a_name##tsd_cleanup_wrapper(void)					\
 a_attr bool								\
 a_name##tsd_boot0(void)							\
 {									\
-									\
 	if (a_cleanup != malloc_tsd_no_cleanup) {			\
 		malloc_tsd_cleanup_register(				\
 		    &a_name##tsd_cleanup_wrapper);			\
@@ -198,39 +196,33 @@ a_name##tsd_boot0(void)							\
 a_attr void								\
 a_name##tsd_boot1(void)							\
 {									\
-									\
 	/* Do nothing. */						\
 }									\
 a_attr bool								\
 a_name##tsd_boot(void)							\
 {									\
-									\
 	return (a_name##tsd_boot0());					\
 }									\
 a_attr bool								\
 a_name##tsd_booted_get(void)						\
 {									\
-									\
 	return (a_name##tsd_booted);					\
 }									\
 a_attr bool								\
 a_name##tsd_get_allocates(void)						\
 {									\
-									\
 	return (false);							\
 }									\
 /* Get/set. */								\
 a_attr a_type *								\
 a_name##tsd_get(bool init)						\
 {									\
-									\
 	assert(a_name##tsd_booted);					\
 	return (&a_name##tsd_tls);					\
 }									\
 a_attr void								\
 a_name##tsd_set(a_type *val)						\
 {									\
-									\
 	assert(a_name##tsd_booted);					\
 	if (likely(&a_name##tsd_tls != val))				\
 		a_name##tsd_tls = (*val);				\
@@ -244,7 +236,6 @@ a_name##tsd_set(a_type *val)						\
 a_attr bool								\
 a_name##tsd_boot0(void)							\
 {									\
-									\
 	if (a_cleanup != malloc_tsd_no_cleanup) {			\
 		if (pthread_key_create(&a_name##tsd_tsd, a_cleanup) !=	\
 		    0)							\
@@ -256,39 +247,33 @@ a_name##tsd_boot0(void)							\
 a_attr void								\
 a_name##tsd_boot1(void)							\
 {									\
-									\
 	/* Do nothing. */						\
 }									\
 a_attr bool								\
 a_name##tsd_boot(void)							\
 {									\
-									\
 	return (a_name##tsd_boot0());					\
 }									\
 a_attr bool								\
 a_name##tsd_booted_get(void)						\
 {									\
-									\
 	return (a_name##tsd_booted);					\
 }									\
 a_attr bool								\
 a_name##tsd_get_allocates(void)						\
 {									\
-									\
 	return (false);							\
 }									\
 /* Get/set. */								\
 a_attr a_type *								\
 a_name##tsd_get(bool init)						\
 {									\
-									\
 	assert(a_name##tsd_booted);					\
 	return (&a_name##tsd_tls);					\
 }									\
 a_attr void								\
 a_name##tsd_set(a_type *val)						\
 {									\
-									\
 	assert(a_name##tsd_booted);					\
 	if (likely(&a_name##tsd_tls != val))				\
 		a_name##tsd_tls = (*val);				\
@@ -331,7 +316,6 @@ a_name##tsd_cleanup_wrapper(void)					\
 a_attr void								\
 a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper)			\
 {									\
-									\
 	if (!TlsSetValue(a_name##tsd_tsd, (void *)wrapper)) {		\
 		malloc_write("<jemalloc>: Error setting"		\
 		    " TSD for "#a_name"\n");				\
@@ -364,7 +348,6 @@ a_name##tsd_wrapper_get(bool init)					\
 a_attr bool								\
 a_name##tsd_boot0(void)							\
 {									\
-									\
 	a_name##tsd_tsd = TlsAlloc();					\
 	if (a_name##tsd_tsd == TLS_OUT_OF_INDEXES)			\
 		return (true);						\
@@ -394,7 +377,6 @@ a_name##tsd_boot1(void)							\
 a_attr bool								\
 a_name##tsd_boot(void)							\
 {									\
-									\
 	if (a_name##tsd_boot0())					\
 		return (true);						\
 	a_name##tsd_boot1();						\
@@ -403,13 +385,11 @@ a_name##tsd_boot(void)							\
 a_attr bool								\
 a_name##tsd_booted_get(void)						\
 {									\
-									\
 	return (a_name##tsd_booted);					\
 }									\
 a_attr bool								\
 a_name##tsd_get_allocates(void)						\
 {									\
-									\
 	return (true);							\
 }									\
 /* Get/set. */								\
@@ -466,7 +446,6 @@ a_name##tsd_cleanup_wrapper(void *arg)					\
 a_attr void								\
 a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper)			\
 {									\
-									\
 	if (pthread_setspecific(a_name##tsd_tsd,			\
 	    (void *)wrapper)) {						\
 		malloc_write("<jemalloc>: Error setting"		\
@@ -506,7 +485,6 @@ a_name##tsd_wrapper_get(bool init)					\
 a_attr bool								\
 a_name##tsd_boot0(void)							\
 {									\
-									\
 	if (pthread_key_create(&a_name##tsd_tsd,			\
 	    a_name##tsd_cleanup_wrapper) != 0)				\
 		return (true);						\
@@ -532,7 +510,6 @@ a_name##tsd_boot1(void)							\
 a_attr bool								\
 a_name##tsd_boot(void)							\
 {									\
-									\
 	if (a_name##tsd_boot0())					\
 		return (true);						\
 	a_name##tsd_boot1();						\
@@ -541,13 +518,11 @@ a_name##tsd_boot(void)							\
 a_attr bool								\
 a_name##tsd_booted_get(void)						\
 {									\
-									\
 	return (a_name##tsd_booted);					\
 }									\
 a_attr bool								\
 a_name##tsd_get_allocates(void)						\
 {									\
-									\
 	return (true);							\
 }									\
 /* Get/set. */								\
diff --git a/include/jemalloc/internal/util_inlines.h b/include/jemalloc/internal/util_inlines.h
index 93f5b1de..4ceed06b 100644
--- a/include/jemalloc/internal/util_inlines.h
+++ b/include/jemalloc/internal/util_inlines.h
@@ -27,28 +27,24 @@ int	get_errno(void);
 JEMALLOC_ALWAYS_INLINE unsigned
 ffs_llu(unsigned long long bitmap)
 {
-
 	return (JEMALLOC_INTERNAL_FFSLL(bitmap));
 }
 
 JEMALLOC_ALWAYS_INLINE unsigned
 ffs_lu(unsigned long bitmap)
 {
-
 	return (JEMALLOC_INTERNAL_FFSL(bitmap));
 }
 
 JEMALLOC_ALWAYS_INLINE unsigned
 ffs_u(unsigned bitmap)
 {
-
 	return (JEMALLOC_INTERNAL_FFS(bitmap));
 }
 
 JEMALLOC_ALWAYS_INLINE unsigned
 ffs_zu(size_t bitmap)
 {
-
 #if LG_SIZEOF_PTR == LG_SIZEOF_INT
 	return (ffs_u(bitmap));
 #elif LG_SIZEOF_PTR == LG_SIZEOF_LONG
@@ -63,7 +59,6 @@ ffs_zu(size_t bitmap)
 JEMALLOC_ALWAYS_INLINE unsigned
 ffs_u64(uint64_t bitmap)
 {
-
 #if LG_SIZEOF_LONG == 3
 	return (ffs_lu(bitmap));
 #elif LG_SIZEOF_LONG_LONG == 3
@@ -76,7 +71,6 @@ ffs_u64(uint64_t bitmap)
 JEMALLOC_ALWAYS_INLINE unsigned
 ffs_u32(uint32_t bitmap)
 {
-
 #if LG_SIZEOF_INT == 2
 	return (ffs_u(bitmap));
 #else
@@ -88,7 +82,6 @@ ffs_u32(uint32_t bitmap)
 JEMALLOC_INLINE uint64_t
 pow2_ceil_u64(uint64_t x)
 {
-
 	x--;
 	x |= x >> 1;
 	x |= x >> 2;
@@ -103,7 +96,6 @@ pow2_ceil_u64(uint64_t x)
 JEMALLOC_INLINE uint32_t
 pow2_ceil_u32(uint32_t x)
 {
-
 	x--;
 	x |= x >> 1;
 	x |= x >> 2;
@@ -118,7 +110,6 @@ pow2_ceil_u32(uint32_t x)
 JEMALLOC_INLINE size_t
 pow2_ceil_zu(size_t x)
 {
-
 #if (LG_SIZEOF_PTR == 3)
 	return (pow2_ceil_u64(x));
 #else
@@ -163,7 +154,6 @@ lg_floor(size_t x)
 JEMALLOC_INLINE unsigned
 lg_floor(size_t x)
 {
-
 	assert(x != 0);
 
 #if (LG_SIZEOF_PTR == LG_SIZEOF_INT)
@@ -178,7 +168,6 @@ lg_floor(size_t x)
 JEMALLOC_INLINE unsigned
 lg_floor(size_t x)
 {
-
 	assert(x != 0);
 
 	x |= (x >> 1);
@@ -200,7 +189,6 @@ lg_floor(size_t x)
 JEMALLOC_INLINE void
 set_errno(int errnum)
 {
-
 #ifdef _WIN32
 	SetLastError(errnum);
 #else
@@ -212,7 +200,6 @@ set_errno(int errnum)
 JEMALLOC_INLINE int
 get_errno(void)
 {
-
 #ifdef _WIN32
 	return (GetLastError());
 #else
diff --git a/include/msvc_compat/strings.h b/include/msvc_compat/strings.h
index a3ee2506..47998be2 100644
--- a/include/msvc_compat/strings.h
+++ b/include/msvc_compat/strings.h
@@ -17,7 +17,6 @@ static __forceinline int ffsl(long x)
 
 static __forceinline int ffs(int x)
 {
-
 	return (ffsl(x));
 }
 
diff --git a/src/arena.c b/src/arena.c
index 1f0c4df5..80af3f99 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -40,7 +40,6 @@ static void	arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena,
 static size_t
 arena_extent_dirty_npages(const extent_t *extent)
 {
-
 	return (extent_size_get(extent) >> LG_PAGE);
 }
 
@@ -76,7 +75,6 @@ static void
 arena_extent_cache_dalloc_locked(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent)
 {
-
 	malloc_mutex_assert_owner(tsdn, &arena->lock);
 
 	extent_dalloc_cache(tsdn, arena, r_extent_hooks, extent);
@@ -87,7 +85,6 @@ void
 arena_extent_cache_dalloc(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent)
 {
-
 	malloc_mutex_lock(tsdn, &arena->lock);
 	arena_extent_cache_dalloc_locked(tsdn, arena, r_extent_hooks, extent);
 	malloc_mutex_unlock(tsdn, &arena->lock);
@@ -97,7 +94,6 @@ void
 arena_extent_cache_maybe_insert(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
     bool cache)
 {
-
 	malloc_mutex_assert_owner(tsdn, &arena->extents_mtx);
 
 	if (cache) {
@@ -110,7 +106,6 @@ void
 arena_extent_cache_maybe_remove(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
     bool dirty)
 {
-
 	malloc_mutex_assert_owner(tsdn, &arena->extents_mtx);
 
 	if (dirty) {
@@ -196,14 +191,12 @@ arena_slab_reg_dalloc(tsdn_t *tsdn, extent_t *slab,
 static void
 arena_nactive_add(arena_t *arena, size_t add_pages)
 {
-
 	arena->nactive += add_pages;
 }
 
 static void
 arena_nactive_sub(arena_t *arena, size_t sub_pages)
 {
-
 	assert(arena->nactive >= sub_pages);
 	arena->nactive -= sub_pages;
 }
@@ -279,7 +272,6 @@ arena_large_reset_stats_cancel(arena_t *arena, size_t usize)
 static void
 arena_large_ralloc_stats_update(arena_t *arena, size_t oldusize, size_t usize)
 {
-
 	arena_large_dalloc_stats_update(arena, oldusize);
 	arena_large_malloc_stats_update(arena, usize);
 }
@@ -391,7 +383,6 @@ arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 static void
 arena_decay_deadline_init(arena_t *arena)
 {
-
 	/*
 	 * Generate a new deadline that is uniformly random within the next
 	 * epoch after the current one.
@@ -410,7 +401,6 @@ arena_decay_deadline_init(arena_t *arena)
 static bool
 arena_decay_deadline_reached(const arena_t *arena, const nstime_t *time)
 {
-
 	return (nstime_compare(&arena->decay.deadline, time) <= 0);
 }
 
@@ -451,7 +441,6 @@ arena_decay_backlog_update_last(arena_t *arena)
 static void
 arena_decay_backlog_update(arena_t *arena, uint64_t nadvance_u64)
 {
-
 	if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) {
 		memset(arena->decay.backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
 		    sizeof(size_t));
@@ -509,7 +498,6 @@ arena_decay_epoch_advance_purge(tsdn_t *tsdn, arena_t *arena)
 static void
 arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, const nstime_t *time)
 {
-
 	arena_decay_epoch_advance_helper(arena, time);
 	arena_decay_epoch_advance_purge(tsdn, arena);
 }
@@ -517,7 +505,6 @@ arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, const nstime_t *time)
 static void
 arena_decay_init(arena_t *arena, ssize_t decay_time)
 {
-
 	arena->decay.time = decay_time;
 	if (decay_time > 0) {
 		nstime_init2(&arena->decay.interval, decay_time, 0);
@@ -535,7 +522,6 @@ arena_decay_init(arena_t *arena, ssize_t decay_time)
 static bool
 arena_decay_time_valid(ssize_t decay_time)
 {
-
 	if (decay_time < -1)
 		return (false);
 	if (decay_time == -1 || (uint64_t)decay_time <= NSTIME_SEC_MAX)
@@ -558,7 +544,6 @@ arena_decay_time_get(tsdn_t *tsdn, arena_t *arena)
 bool
 arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time)
 {
-
 	if (!arena_decay_time_valid(decay_time))
 		return (true);
 
@@ -623,7 +608,6 @@ arena_maybe_purge_helper(tsdn_t *tsdn, arena_t *arena)
 void
 arena_maybe_purge(tsdn_t *tsdn, arena_t *arena)
 {
-
 	malloc_mutex_assert_owner(tsdn, &arena->lock);
 
 	/* Don't recursively purge. */
@@ -762,7 +746,6 @@ label_return:
 void
 arena_purge(tsdn_t *tsdn, arena_t *arena, bool all)
 {
-
 	malloc_mutex_lock(tsdn, &arena->lock);
 	if (all)
 		arena_purge_to_limit(tsdn, arena, 0);
@@ -783,7 +766,6 @@ arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *slab)
 static void
 arena_bin_slabs_nonfull_insert(arena_bin_t *bin, extent_t *slab)
 {
-
 	assert(extent_slab_data_get(slab)->nfree > 0);
 	extent_heap_insert(&bin->slabs_nonfull, slab);
 }
@@ -791,7 +773,6 @@ arena_bin_slabs_nonfull_insert(arena_bin_t *bin, extent_t *slab)
 static void
 arena_bin_slabs_nonfull_remove(arena_bin_t *bin, extent_t *slab)
 {
-
 	extent_heap_remove(&bin->slabs_nonfull, slab);
 }
 
@@ -809,7 +790,6 @@ arena_bin_slabs_nonfull_tryget(arena_bin_t *bin)
 static void
 arena_bin_slabs_full_insert(arena_bin_t *bin, extent_t *slab)
 {
-
 	assert(extent_slab_data_get(slab)->nfree == 0);
 	extent_ring_insert(&bin->slabs_full, slab);
 }
@@ -817,7 +797,6 @@ arena_bin_slabs_full_insert(arena_bin_t *bin, extent_t *slab)
 static void
 arena_bin_slabs_full_remove(extent_t *slab)
 {
-
 	extent_ring_remove(slab);
 }
 
@@ -935,7 +914,6 @@ arena_destroy_retained(tsdn_t *tsdn, arena_t *arena)
 void
 arena_destroy(tsd_t *tsd, arena_t *arena)
 {
-
 	assert(base_ind_get(arena->base) >= narenas_auto);
 	assert(arena_nthreads_get(arena, false) == 0);
 	assert(arena_nthreads_get(arena, true) == 0);
@@ -1176,7 +1154,6 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_bin_t *tbin,
 void
 arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info, bool zero)
 {
-
 	if (!zero)
 		memset(ptr, JEMALLOC_ALLOC_JUNK, bin_info->reg_size);
 }
@@ -1188,7 +1165,6 @@ arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info, bool zero)
 void
 arena_dalloc_junk_small(void *ptr, const arena_bin_info_t *bin_info)
 {
-
 	memset(ptr, JEMALLOC_FREE_JUNK, bin_info->reg_size);
 }
 #ifdef JEMALLOC_JET
@@ -1255,7 +1231,6 @@ void *
 arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
     bool zero)
 {
-
 	assert(!tsdn_null(tsdn) || arena != NULL);
 
 	if (likely(!tsdn_null(tsdn)))
@@ -1320,7 +1295,6 @@ arena_prof_promote(tsdn_t *tsdn, extent_t *extent, const void *ptr,
 static size_t
 arena_prof_demote(tsdn_t *tsdn, extent_t *extent, const void *ptr)
 {
-
 	cassert(config_prof);
 	assert(ptr != NULL);
 
@@ -1351,7 +1325,6 @@ arena_dalloc_promoted(tsdn_t *tsdn, extent_t *extent, void *ptr,
 static void
 arena_dissociate_bin_slab(extent_t *slab, arena_bin_t *bin)
 {
-
 	/* Dissociate slab from bin. */
 	if (slab == bin->slabcur)
 		bin->slabcur = NULL;
@@ -1375,7 +1348,6 @@ static void
 arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
     arena_bin_t *bin)
 {
-
 	assert(slab != bin->slabcur);
 
 	malloc_mutex_unlock(tsdn, &bin->lock);
@@ -1393,7 +1365,6 @@ static void
 arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
     arena_bin_t *bin)
 {
-
 	assert(extent_slab_data_get(slab)->nfree > 0);
 
 	/*
@@ -1446,7 +1417,6 @@ void
 arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
     void *ptr)
 {
-
 	arena_dalloc_bin_locked_impl(tsdn, arena, extent, ptr, true);
 }
 
@@ -1463,7 +1433,6 @@ arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr)
 void
 arena_dalloc_small(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr)
 {
-
 	arena_dalloc_bin(tsdn, arena, extent, ptr);
 	arena_decay_tick(tsdn, arena);
 }
@@ -1508,7 +1477,6 @@ static void *
 arena_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache)
 {
-
 	if (alignment == 0)
 		return (arena_malloc(tsdn, arena, usize, size2index(usize),
 		    zero, tcache, true));
@@ -1575,7 +1543,6 @@ arena_dss_prec_get(tsdn_t *tsdn, arena_t *arena)
 bool
 arena_dss_prec_set(tsdn_t *tsdn, arena_t *arena, dss_prec_t dss_prec)
 {
-
 	if (!have_dss)
 		return (dss_prec != dss_prec_disabled);
 	malloc_mutex_lock(tsdn, &arena->lock);
@@ -1587,14 +1554,12 @@ arena_dss_prec_set(tsdn_t *tsdn, arena_t *arena, dss_prec_t dss_prec)
 ssize_t
 arena_decay_time_default_get(void)
 {
-
 	return ((ssize_t)atomic_read_zu((size_t *)&decay_time_default));
 }
 
 bool
 arena_decay_time_default_set(ssize_t decay_time)
 {
-
 	if (!arena_decay_time_valid(decay_time))
 		return (true);
 	atomic_write_zu((size_t *)&decay_time_default, (size_t)decay_time);
@@ -1605,7 +1570,6 @@ static void
 arena_basic_stats_merge_locked(arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *decay_time, size_t *nactive, size_t *ndirty)
 {
-
 	*nthreads += arena_nthreads_get(arena, false);
 	*dss = dss_prec_names[arena->dss_prec];
 	*decay_time = arena->decay.time;
@@ -1617,7 +1581,6 @@ void
 arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *decay_time, size_t *nactive, size_t *ndirty)
 {
-
 	malloc_mutex_lock(tsdn, &arena->lock);
 	arena_basic_stats_merge_locked(arena, nthreads, dss, decay_time,
 	    nactive, ndirty);
@@ -1686,28 +1649,24 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 unsigned
 arena_nthreads_get(arena_t *arena, bool internal)
 {
-
 	return (atomic_read_u(&arena->nthreads[internal]));
 }
 
 void
 arena_nthreads_inc(arena_t *arena, bool internal)
 {
-
 	atomic_add_u(&arena->nthreads[internal], 1);
 }
 
 void
 arena_nthreads_dec(arena_t *arena, bool internal)
 {
-
 	atomic_sub_u(&arena->nthreads[internal], 1);
 }
 
 size_t
 arena_extent_sn_next(arena_t *arena)
 {
-
 	return (atomic_add_zu(&arena->extent_sn_next, 1) - 1);
 }
 
@@ -1813,28 +1772,24 @@ label_error:
 void
 arena_boot(void)
 {
-
 	arena_decay_time_default_set(opt_decay_time);
 }
 
 void
 arena_prefork0(tsdn_t *tsdn, arena_t *arena)
 {
-
 	malloc_mutex_prefork(tsdn, &arena->lock);
 }
 
 void
 arena_prefork1(tsdn_t *tsdn, arena_t *arena)
 {
-
 	malloc_mutex_prefork(tsdn, &arena->extents_mtx);
 }
 
 void
 arena_prefork2(tsdn_t *tsdn, arena_t *arena)
 {
-
 	malloc_mutex_prefork(tsdn, &arena->extent_cache_mtx);
 }
 
diff --git a/src/base.c b/src/base.c
index 5eab7cd5..7c0ef2c1 100644
--- a/src/base.c
+++ b/src/base.c
@@ -30,7 +30,6 @@ base_map(extent_hooks_t *extent_hooks, unsigned ind, size_t size)
 static void
 base_unmap(extent_hooks_t *extent_hooks, unsigned ind, void *addr, size_t size)
 {
-
 	/*
 	 * Cascade through dalloc, decommit, purge_lazy, and purge_forced,
 	 * stopping at first success.  This cascade is performed for consistency
@@ -107,7 +106,6 @@ static void
 base_extent_bump_alloc_post(tsdn_t *tsdn, base_t *base, extent_t *extent,
     size_t gap_size, void *addr, size_t size)
 {
-
 	if (extent_size_get(extent) > 0) {
 		/*
 		 * Compute the index for the largest size class that does not
@@ -202,7 +200,6 @@ base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment)
 base_t *
 b0get(void)
 {
-
 	return (b0);
 }
 
@@ -263,7 +260,6 @@ base_delete(base_t *base)
 extent_hooks_t *
 base_extent_hooks_get(base_t *base)
 {
-
 	return ((extent_hooks_t *)atomic_read_p(&base->extent_hooks_pun));
 }
 
@@ -330,7 +326,6 @@ void
 base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, size_t *resident,
     size_t *mapped)
 {
-
 	cassert(config_stats);
 
 	malloc_mutex_lock(tsdn, &base->mtx);
@@ -345,28 +340,24 @@ base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, size_t *resident,
 void
 base_prefork(tsdn_t *tsdn, base_t *base)
 {
-
 	malloc_mutex_prefork(tsdn, &base->mtx);
 }
 
 void
 base_postfork_parent(tsdn_t *tsdn, base_t *base)
 {
-
 	malloc_mutex_postfork_parent(tsdn, &base->mtx);
 }
 
 void
 base_postfork_child(tsdn_t *tsdn, base_t *base)
 {
-
 	malloc_mutex_postfork_child(tsdn, &base->mtx);
 }
 
 bool
 base_boot(tsdn_t *tsdn)
 {
-
 	b0 = base_new(tsdn, 0, (extent_hooks_t *)&extent_hooks_default);
 	return (b0 == NULL);
 }
diff --git a/src/bitmap.c b/src/bitmap.c
index 66554451..3d27f059 100644
--- a/src/bitmap.c
+++ b/src/bitmap.c
@@ -37,7 +37,6 @@ bitmap_info_init(bitmap_info_t *binfo, size_t nbits)
 static size_t
 bitmap_info_ngroups(const bitmap_info_t *binfo)
 {
-
 	return (binfo->levels[binfo->nlevels].group_offset);
 }
 
@@ -74,7 +73,6 @@ bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo)
 void
 bitmap_info_init(bitmap_info_t *binfo, size_t nbits)
 {
-
 	assert(nbits > 0);
 	assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS));
 
@@ -85,7 +83,6 @@ bitmap_info_init(bitmap_info_t *binfo, size_t nbits)
 static size_t
 bitmap_info_ngroups(const bitmap_info_t *binfo)
 {
-
 	return (binfo->ngroups);
 }
 
@@ -106,6 +103,5 @@ bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo)
 size_t
 bitmap_size(const bitmap_info_t *binfo)
 {
-
 	return (bitmap_info_ngroups(binfo) << LG_SIZEOF_BITMAP);
 }
diff --git a/src/ckh.c b/src/ckh.c
index 6f0f1e4d..fe79862c 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -411,7 +411,6 @@ label_return:
 void
 ckh_delete(tsd_t *tsd, ckh_t *ckh)
 {
-
 	assert(ckh != NULL);
 
 #ifdef CKH_VERBOSE
@@ -435,7 +434,6 @@ ckh_delete(tsd_t *tsd, ckh_t *ckh)
 size_t
 ckh_count(ckh_t *ckh)
 {
-
 	assert(ckh != NULL);
 
 	return (ckh->count);
@@ -539,14 +537,12 @@ ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data)
 void
 ckh_string_hash(const void *key, size_t r_hash[2])
 {
-
 	hash(key, strlen((const char *)key), 0x94122f33U, r_hash);
 }
 
 bool
 ckh_string_keycomp(const void *k1, const void *k2)
 {
-
 	assert(k1 != NULL);
 	assert(k2 != NULL);
 
@@ -569,6 +565,5 @@ ckh_pointer_hash(const void *key, size_t r_hash[2])
 bool
 ckh_pointer_keycomp(const void *k1, const void *k2)
 {
-
 	return ((k1 == k2) ? true : false);
 }
diff --git a/src/ctl.c b/src/ctl.c
index 76fbce4b..8484ba85 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -18,7 +18,6 @@ static ctl_stats_t	*ctl_stats;
 JEMALLOC_INLINE_C const ctl_named_node_t *
 ctl_named_node(const ctl_node_t *node)
 {
-
 	return ((node->named) ? (const ctl_named_node_t *)node : NULL);
 }
 
@@ -33,7 +32,6 @@ ctl_named_children(const ctl_named_node_t *node, size_t index)
 JEMALLOC_INLINE_C const ctl_indexed_node_t *
 ctl_indexed_node(const ctl_node_t *node)
 {
-
 	return (!node->named ? (const ctl_indexed_node_t *)node : NULL);
 }
 
@@ -475,7 +473,6 @@ stats_arenas_i2a_impl(size_t i, bool compat, bool validate)
 static unsigned
 stats_arenas_i2a(size_t i)
 {
-
 	return (stats_arenas_i2a_impl(i, true, false));
 }
 
@@ -513,7 +510,6 @@ stats_arenas_i(size_t i)
 static void
 ctl_arena_clear(ctl_arena_stats_t *astats)
 {
-
 	astats->nthreads = 0;
 	astats->dss = dss_prec_names[dss_prec_limit];
 	astats->decay_time = -1;
@@ -985,7 +981,6 @@ label_return:
 bool
 ctl_boot(void)
 {
-
 	if (malloc_mutex_init(&ctl_mtx, "ctl", WITNESS_RANK_CTL))
 		return (true);
 
@@ -997,21 +992,18 @@ ctl_boot(void)
 void
 ctl_prefork(tsdn_t *tsdn)
 {
-
 	malloc_mutex_prefork(tsdn, &ctl_mtx);
 }
 
 void
 ctl_postfork_parent(tsdn_t *tsdn)
 {
-
 	malloc_mutex_postfork_parent(tsdn, &ctl_mtx);
 }
 
 void
 ctl_postfork_child(tsdn_t *tsdn)
 {
-
 	malloc_mutex_postfork_child(tsdn, &ctl_mtx);
 }
 
@@ -1540,7 +1532,6 @@ label_return:
 static void
 arena_i_purge(tsdn_t *tsdn, unsigned arena_ind, bool all)
 {
-
 	malloc_mutex_lock(tsdn, &ctl_mtx);
 	{
 		unsigned narenas = ctl_stats->narenas;
@@ -1918,7 +1909,6 @@ CTL_RO_NL_GEN(arenas_bin_i_slab_size, arena_bin_info[mib[2]].slab_size, size_t)
 static const ctl_named_node_t *
 arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
 {
-
 	if (i > NBINS)
 		return (NULL);
 	return (super_arenas_bin_i_node);
@@ -1929,7 +1919,6 @@ CTL_RO_NL_GEN(arenas_lextent_i_size, index2size(NBINS+(szind_t)mib[2]), size_t)
 static const ctl_named_node_t *
 arenas_lextent_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
 {
-
 	if (i > NSIZES - NBINS)
 		return (NULL);
 	return (super_arenas_lextent_i_node);
@@ -2159,7 +2148,6 @@ static const ctl_named_node_t *
 stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
     size_t j)
 {
-
 	if (j > NBINS)
 		return (NULL);
 	return (super_stats_arenas_i_bins_j_node);
@@ -2178,7 +2166,6 @@ static const ctl_named_node_t *
 stats_arenas_i_lextents_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
     size_t j)
 {
-
 	if (j > NSIZES - NBINS)
 		return (NULL);
 	return (super_stats_arenas_i_lextents_j_node);
diff --git a/src/extent.c b/src/extent.c
index 7eb49709..73f79c1c 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -94,7 +94,6 @@ extent_alloc(tsdn_t *tsdn, arena_t *arena)
 void
 extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent)
 {
-
 	malloc_mutex_lock(tsdn, &arena->extent_cache_mtx);
 	ql_elm_new(extent, ql_link);
 	ql_tail_insert(&arena->extent_cache, extent, ql_link);
@@ -104,21 +103,18 @@ extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent)
 extent_hooks_t *
 extent_hooks_get(arena_t *arena)
 {
-
 	return (base_extent_hooks_get(arena->base));
 }
 
 extent_hooks_t *
 extent_hooks_set(arena_t *arena, extent_hooks_t *extent_hooks)
 {
-
 	return (base_extent_hooks_set(arena->base, extent_hooks));
 }
 
 static void
 extent_hooks_assure_initialized(arena_t *arena, extent_hooks_t **r_extent_hooks)
 {
-
 	if (*r_extent_hooks == EXTENT_HOOKS_INITIALIZER)
 		*r_extent_hooks = extent_hooks_get(arena);
 }
@@ -226,7 +222,6 @@ extent_rtree_acquire(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
     const extent_t *extent, bool dependent, bool init_missing,
     rtree_elm_t **r_elm_a, rtree_elm_t **r_elm_b)
 {
-
 	*r_elm_a = rtree_elm_acquire(tsdn, &extents_rtree, rtree_ctx,
 	    (uintptr_t)extent_base_get(extent), dependent, init_missing);
 	if (!dependent && *r_elm_a == NULL)
@@ -252,7 +247,6 @@ static void
 extent_rtree_write_acquired(tsdn_t *tsdn, rtree_elm_t *elm_a,
     rtree_elm_t *elm_b, const extent_t *extent)
 {
-
 	rtree_elm_write_acquired(tsdn, &extents_rtree, elm_a, extent);
 	if (elm_b != NULL)
 		rtree_elm_write_acquired(tsdn, &extents_rtree, elm_b, extent);
@@ -261,7 +255,6 @@ extent_rtree_write_acquired(tsdn_t *tsdn, rtree_elm_t *elm_a,
 static void
 extent_rtree_release(tsdn_t *tsdn, rtree_elm_t *elm_a, rtree_elm_t *elm_b)
 {
-
 	rtree_elm_release(tsdn, &extents_rtree, elm_a);
 	if (elm_b != NULL)
 		rtree_elm_release(tsdn, &extents_rtree, elm_b);
@@ -285,7 +278,6 @@ extent_interior_register(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
 static void
 extent_gprof_add(tsdn_t *tsdn, const extent_t *extent)
 {
-
 	cassert(config_prof);
 
 	if (opt_prof && extent_active_get(extent)) {
@@ -307,7 +299,6 @@ extent_gprof_add(tsdn_t *tsdn, const extent_t *extent)
 static void
 extent_gprof_sub(tsdn_t *tsdn, const extent_t *extent)
 {
-
 	cassert(config_prof);
 
 	if (opt_prof && extent_active_get(extent)) {
@@ -406,7 +397,6 @@ static void
 extent_leak(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
     bool cache, extent_t *extent)
 {
-
 	/*
 	 * Leak extent after making sure its pages have already been purged, so
 	 * that this is only a virtual memory leak.
@@ -641,7 +631,6 @@ extent_alloc_cache_locked(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
     size_t alignment, bool *zero, bool *commit, bool slab)
 {
-
 	malloc_mutex_assert_owner(tsdn, &arena->extents_mtx);
 
 	return (extent_alloc_cache_impl(tsdn, arena, r_extent_hooks, true,
@@ -653,7 +642,6 @@ extent_alloc_cache(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
     size_t alignment, bool *zero, bool *commit, bool slab)
 {
-
 	return (extent_alloc_cache_impl(tsdn, arena, r_extent_hooks, false,
 	    new_addr, usize, pad, alignment, zero, commit, slab));
 }
@@ -694,7 +682,6 @@ static void
 extent_retain(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
     extent_t *extent)
 {
-
 	if (config_stats)
 		arena->stats.retained += extent_size_get(extent);
 	extent_record(tsdn, arena, r_extent_hooks, arena->extents_retained,
@@ -906,7 +893,6 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 static bool
 extent_can_coalesce(const extent_t *a, const extent_t *b)
 {
-
 	if (extent_arena_get(a) != extent_arena_get(b))
 		return (false);
 	if (extent_active_get(a) != extent_active_get(b))
@@ -924,7 +910,6 @@ extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b,
     extent_heap_t extent_heaps[NPSIZES+1], bool cache)
 {
-
 	if (!extent_can_coalesce(a, b))
 		return;
 
@@ -1008,7 +993,6 @@ void
 extent_dalloc_cache(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent)
 {
-
 	assert(extent_base_get(extent) != NULL);
 	assert(extent_size_get(extent) != 0);
 
@@ -1022,7 +1006,6 @@ extent_dalloc_cache(tsdn_t *tsdn, arena_t *arena,
 static bool
 extent_dalloc_default_impl(void *addr, size_t size)
 {
-
 	if (!have_dss || !extent_in_dss(addr))
 		return (extent_dalloc_mmap(addr, size));
 	return (true);
@@ -1033,7 +1016,6 @@ static bool
 extent_dalloc_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
     bool committed, unsigned arena_ind)
 {
-
 	assert(extent_hooks == &extent_hooks_default);
 
 	return (extent_dalloc_default_impl(addr, size));
@@ -1116,7 +1098,6 @@ static bool
 extent_commit_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind)
 {
-
 	assert(extent_hooks == &extent_hooks_default);
 
 	return (pages_commit((void *)((uintptr_t)addr + (uintptr_t)offset),
@@ -1142,7 +1123,6 @@ static bool
 extent_decommit_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind)
 {
-
 	assert(extent_hooks == &extent_hooks_default);
 
 	return (pages_decommit((void *)((uintptr_t)addr + (uintptr_t)offset),
@@ -1171,7 +1151,6 @@ static bool
 extent_purge_lazy_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind)
 {
-
 	assert(extent_hooks == &extent_hooks_default);
 	assert(addr != NULL);
 	assert((offset & PAGE_MASK) == 0);
@@ -1188,7 +1167,6 @@ extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
     size_t length)
 {
-
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 	return ((*r_extent_hooks)->purge_lazy == NULL ||
 	    (*r_extent_hooks)->purge_lazy(*r_extent_hooks,
@@ -1201,7 +1179,6 @@ static bool
 extent_purge_forced_default(extent_hooks_t *extent_hooks, void *addr,
     size_t size, size_t offset, size_t length, unsigned arena_ind)
 {
-
 	assert(extent_hooks == &extent_hooks_default);
 	assert(addr != NULL);
 	assert((offset & PAGE_MASK) == 0);
@@ -1218,7 +1195,6 @@ extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
     size_t length)
 {
-
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 	return ((*r_extent_hooks)->purge_forced == NULL ||
 	    (*r_extent_hooks)->purge_forced(*r_extent_hooks,
@@ -1231,7 +1207,6 @@ static bool
 extent_split_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t size_a, size_t size_b, bool committed, unsigned arena_ind)
 {
-
 	assert(extent_hooks == &extent_hooks_default);
 
 	if (!maps_coalesce)
@@ -1310,7 +1285,6 @@ label_error_a:
 static bool
 extent_merge_default_impl(void *addr_a, void *addr_b)
 {
-
 	if (!maps_coalesce)
 		return (true);
 	if (have_dss && !extent_dss_mergeable(addr_a, addr_b))
@@ -1324,7 +1298,6 @@ static bool
 extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
     void *addr_b, size_t size_b, bool committed, unsigned arena_ind)
 {
-
 	assert(extent_hooks == &extent_hooks_default);
 
 	return (extent_merge_default_impl(addr_a, addr_b));
@@ -1396,7 +1369,6 @@ extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
 bool
 extent_boot(void)
 {
-
 	if (rtree_new(&extents_rtree, (unsigned)((ZU(1) << (LG_SIZEOF_PTR+3)) -
 	    LG_PAGE)))
 		return (true);
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 0f0c689b..5aa95b1c 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -32,7 +32,6 @@ static void		*dss_max;
 static void *
 extent_dss_sbrk(intptr_t increment)
 {
-
 #ifdef JEMALLOC_DSS
 	return (sbrk(increment));
 #else
@@ -55,7 +54,6 @@ extent_dss_prec_get(void)
 bool
 extent_dss_prec_set(dss_prec_t dss_prec)
 {
-
 	if (!have_dss)
 		return (dss_prec != dss_prec_disabled);
 	atomic_write_u(&dss_prec_default, (unsigned)dss_prec);
@@ -208,7 +206,6 @@ label_oom:
 static bool
 extent_in_dss_helper(void *addr, void *max)
 {
-
 	return ((uintptr_t)addr >= (uintptr_t)dss_base && (uintptr_t)addr <
 	    (uintptr_t)max);
 }
@@ -216,7 +213,6 @@ extent_in_dss_helper(void *addr, void *max)
 bool
 extent_in_dss(void *addr)
 {
-
 	cassert(have_dss);
 
 	return (extent_in_dss_helper(addr, atomic_read_p(&dss_max)));
@@ -241,7 +237,6 @@ extent_dss_mergeable(void *addr_a, void *addr_b)
 void
 extent_dss_boot(void)
 {
-
 	cassert(have_dss);
 
 	dss_base = extent_dss_sbrk(0);
diff --git a/src/extent_mmap.c b/src/extent_mmap.c
index 23dd4f88..e685a45b 100644
--- a/src/extent_mmap.c
+++ b/src/extent_mmap.c
@@ -69,7 +69,6 @@ extent_alloc_mmap(void *new_addr, size_t size, size_t alignment, bool *zero,
 bool
 extent_dalloc_mmap(void *addr, size_t size)
 {
-
 	if (config_munmap)
 		pages_unmap(addr, size);
 	return (!config_munmap);
diff --git a/src/jemalloc.c b/src/jemalloc.c
index a053983f..1dc91833 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -211,7 +211,6 @@ JEMALLOC_ATTR(constructor)
 static void WINAPI
 _init_init_lock(void)
 {
-
 	/*
 	 * If another constructor in the same binary is using mallctl to e.g.
 	 * set up extent hooks, it may end up running before this one, and
@@ -276,14 +275,12 @@ static bool	malloc_init_hard(void);
 JEMALLOC_ALWAYS_INLINE_C bool
 malloc_initialized(void)
 {
-
 	return (malloc_init_state == malloc_init_initialized);
 }
 
 JEMALLOC_ALWAYS_INLINE_C bool
 malloc_init_a0(void)
 {
-
 	if (unlikely(malloc_init_state == malloc_init_uninitialized))
 		return (malloc_init_hard_a0());
 	return (false);
@@ -292,7 +289,6 @@ malloc_init_a0(void)
 JEMALLOC_ALWAYS_INLINE_C bool
 malloc_init(void)
 {
-
 	if (unlikely(!malloc_initialized()) && malloc_init_hard())
 		return (true);
 	return (false);
@@ -306,7 +302,6 @@ malloc_init(void)
 static void *
 a0ialloc(size_t size, bool zero, bool is_internal)
 {
-
 	if (unlikely(malloc_init_a0()))
 		return (NULL);
 
@@ -317,21 +312,18 @@ a0ialloc(size_t size, bool zero, bool is_internal)
 static void
 a0idalloc(extent_t *extent, void *ptr, bool is_internal)
 {
-
 	idalloctm(TSDN_NULL, extent, ptr, false, is_internal, true);
 }
 
 void *
 a0malloc(size_t size)
 {
-
 	return (a0ialloc(size, false, true));
 }
 
 void
 a0dalloc(void *ptr)
 {
-
 	a0idalloc(iealloc(NULL, ptr), ptr, true);
 }
 
@@ -344,7 +336,6 @@ a0dalloc(void *ptr)
 void *
 bootstrap_malloc(size_t size)
 {
-
 	if (unlikely(size == 0))
 		size = 1;
 
@@ -368,7 +359,6 @@ bootstrap_calloc(size_t num, size_t size)
 void
 bootstrap_free(void *ptr)
 {
-
 	if (unlikely(ptr == NULL))
 		return;
 
@@ -378,28 +368,24 @@ bootstrap_free(void *ptr)
 void
 arena_set(unsigned ind, arena_t *arena)
 {
-
 	atomic_write_p((void **)&arenas[ind], arena);
 }
 
 static void
 narenas_total_set(unsigned narenas)
 {
-
 	atomic_write_u(&narenas_total, narenas);
 }
 
 static void
 narenas_total_inc(void)
 {
-
 	atomic_add_u(&narenas_total, 1);
 }
 
 unsigned
 narenas_total_get(void)
 {
-
 	return (atomic_read_u(&narenas_total));
 }
 
@@ -689,7 +675,6 @@ arenas_tdata_cleanup(tsd_t *tsd)
 static void
 stats_print_atexit(void)
 {
-
 	if (config_tcache && config_stats) {
 		tsdn_t *tsdn;
 		unsigned narenas, i;
@@ -737,7 +722,6 @@ stats_print_atexit(void)
 static char *
 secure_getenv(const char *name)
 {
-
 #  ifdef JEMALLOC_HAVE_ISSETUGID
 	if (issetugid() != 0)
 		return (NULL);
@@ -855,7 +839,6 @@ static void
 malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v,
     size_t vlen)
 {
-
 	malloc_printf("<jemalloc>: %s: %.*s:%.*s\n", msg, (int)klen, k,
 	    (int)vlen, v);
 }
@@ -1167,7 +1150,6 @@ malloc_conf_init(void)
 static bool
 malloc_init_hard_needed(void)
 {
-
 	if (malloc_initialized() || (IS_INITIALIZER && malloc_init_state ==
 	    malloc_init_recursible)) {
 		/*
@@ -1197,7 +1179,6 @@ malloc_init_hard_needed(void)
 static bool
 malloc_init_hard_a0_locked()
 {
-
 	malloc_initializer = INITIALIZER;
 
 	if (config_prof)
@@ -1261,7 +1242,6 @@ malloc_init_hard_a0(void)
 static bool
 malloc_init_hard_recursible(void)
 {
-
 	malloc_init_state = malloc_init_recursible;
 
 	ncpus = malloc_ncpus();
@@ -1285,7 +1265,6 @@ malloc_init_hard_recursible(void)
 static bool
 malloc_init_hard_finish(tsdn_t *tsdn)
 {
-
 	if (malloc_mutex_boot())
 		return (true);
 
@@ -1458,7 +1437,6 @@ JEMALLOC_ALWAYS_INLINE_C void
 ialloc_post_check(void *ret, tsdn_t *tsdn, size_t usize, const char *func,
     bool update_errno, bool slow_path)
 {
-
 	assert(!tsdn_null(tsdn) || ret == NULL);
 
 	if (unlikely(ret == NULL)) {
@@ -1617,7 +1595,6 @@ JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 JEMALLOC_ATTR(nonnull(1))
 je_posix_memalign(void **memptr, size_t alignment, size_t size)
 {
-
 	return (imemalign(memptr, alignment, size, sizeof(void *)));
 }
 
@@ -1754,7 +1731,6 @@ JEMALLOC_INLINE_C void
 isfree(tsd_t *tsd, extent_t *extent, void *ptr, size_t usize, tcache_t *tcache,
     bool slow_path)
 {
-
 	witness_assert_lockless(tsd_tsdn(tsd));
 
 	assert(ptr != NULL);
@@ -1850,7 +1826,6 @@ je_realloc(void *ptr, size_t size)
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_free(void *ptr)
 {
-
 	UTRACE(ptr, 0, 0);
 	if (likely(ptr != NULL)) {
 		tsd_t *tsd = tsd_fetch();
@@ -1959,7 +1934,6 @@ JEMALLOC_ALWAYS_INLINE_C bool
 imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize,
     size_t *alignment, bool *zero, tcache_t **tcache, arena_t **arena)
 {
-
 	if ((flags & MALLOCX_LG_ALIGN_MASK) == 0) {
 		*alignment = 0;
 		*usize = s2u(size);
@@ -2641,7 +2615,6 @@ JEMALLOC_ATTR(constructor)
 static void
 jemalloc_constructor(void)
 {
-
 	malloc_init();
 }
 #endif
diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index 4d88f993..84d47aed 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -70,55 +70,47 @@ newImpl(std::size_t size) noexcept(IsNoExcept)
 void *
 operator new(std::size_t size)
 {
-
 	return (newImpl<false>(size));
 }
 
 void *
 operator new[](std::size_t size)
 {
-
 	return (newImpl<false>(size));
 }
 
 void *
 operator new(std::size_t size, const std::nothrow_t&) noexcept
 {
-
 	return (newImpl<true>(size));
 }
 
 void *
 operator new[](std::size_t size, const std::nothrow_t&) noexcept
 {
-
 	return (newImpl<true>(size));
 }
 
 void
 operator delete(void* ptr) noexcept
 {
-
 	je_free(ptr);
 }
 
 void
 operator delete[](void* ptr) noexcept
 {
-
 	je_free(ptr);
 }
 
 void
 operator delete(void* ptr, const std::nothrow_t&) noexcept
 {
-
 	je_free(ptr);
 }
 
 void operator delete[](void* ptr, const std::nothrow_t&) noexcept
 {
-
 	je_free(ptr);
 }
 
@@ -127,13 +119,11 @@ void operator delete[](void* ptr, const std::nothrow_t&) noexcept
 void
 operator delete(void* ptr, std::size_t size) noexcept
 {
-
 	je_sdallocx(ptr, size, /*flags=*/0);
 }
 
 void operator delete[](void* ptr, std::size_t size) noexcept
 {
-
 	je_sdallocx(ptr, size, /*flags=*/0);
 }
 
diff --git a/src/large.c b/src/large.c
index ec22e64c..9936b236 100644
--- a/src/large.c
+++ b/src/large.c
@@ -6,7 +6,6 @@
 void *
 large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero)
 {
-
 	assert(usize == s2u(usize));
 
 	return (large_palloc(tsdn, arena, usize, CACHELINE, zero));
@@ -67,7 +66,6 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 void
 large_dalloc_junk(void *ptr, size_t usize)
 {
-
 	memset(ptr, JEMALLOC_FREE_JUNK, usize);
 }
 #ifdef JEMALLOC_JET
@@ -83,7 +81,6 @@ large_dalloc_junk_t *large_dalloc_junk = JEMALLOC_N(n_large_dalloc_junk);
 void
 large_dalloc_maybe_junk(void *ptr, size_t usize)
 {
-
 	if (config_fill && have_dss && unlikely(opt_junk_free)) {
 		/*
 		 * Only bother junk filling if the extent isn't about to be
@@ -198,7 +195,6 @@ bool
 large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
     size_t usize_max, bool zero)
 {
-
 	assert(s2u(extent_usize_get(extent)) == extent_usize_get(extent));
 	/* The following should have been caught by callers. */
 	assert(usize_min > 0 && usize_max <= LARGE_MAXCLASS);
@@ -247,7 +243,6 @@ static void *
 large_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool zero)
 {
-
 	if (alignment <= CACHELINE)
 		return (large_malloc(tsdn, arena, usize, zero));
 	return (large_palloc(tsdn, arena, usize, alignment, zero));
@@ -314,41 +309,35 @@ large_dalloc_impl(tsdn_t *tsdn, extent_t *extent, bool junked_locked)
 void
 large_dalloc_junked_locked(tsdn_t *tsdn, extent_t *extent)
 {
-
 	large_dalloc_impl(tsdn, extent, true);
 }
 
 void
 large_dalloc(tsdn_t *tsdn, extent_t *extent)
 {
-
 	large_dalloc_impl(tsdn, extent, false);
 }
 
 size_t
 large_salloc(tsdn_t *tsdn, const extent_t *extent)
 {
-
 	return (extent_usize_get(extent));
 }
 
 prof_tctx_t *
 large_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent)
 {
-
 	return (extent_prof_tctx_get(extent));
 }
 
 void
 large_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, prof_tctx_t *tctx)
 {
-
 	extent_prof_tctx_set(extent, tctx);
 }
 
 void
 large_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent)
 {
-
 	large_prof_tctx_set(tsdn, extent, (prof_tctx_t *)(uintptr_t)1U);
 }
diff --git a/src/mutex.c b/src/mutex.c
index b757ba86..bde536de 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -37,7 +37,6 @@ static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
 static void
 pthread_create_once(void)
 {
-
 	pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
 	if (pthread_create_fptr == NULL) {
 		malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
@@ -71,7 +70,6 @@ JEMALLOC_EXPORT int	_pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
 bool
 malloc_mutex_init(malloc_mutex_t *mutex, const char *name, witness_rank_t rank)
 {
-
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 	InitializeSRWLock(&mutex->lock);
@@ -113,21 +111,18 @@ malloc_mutex_init(malloc_mutex_t *mutex, const char *name, witness_rank_t rank)
 void
 malloc_mutex_prefork(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
-
 	malloc_mutex_lock(tsdn, mutex);
 }
 
 void
 malloc_mutex_postfork_parent(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
-
 	malloc_mutex_unlock(tsdn, mutex);
 }
 
 void
 malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
-
 #ifdef JEMALLOC_MUTEX_INIT_CB
 	malloc_mutex_unlock(tsdn, mutex);
 #else
@@ -144,7 +139,6 @@ malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex)
 bool
 malloc_mutex_boot(void)
 {
-
 #ifdef JEMALLOC_MUTEX_INIT_CB
 	postpone_init = false;
 	while (postponed_mutexes != NULL) {
diff --git a/src/nstime.c b/src/nstime.c
index 0948e29f..57ebf2e0 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -5,56 +5,48 @@
 void
 nstime_init(nstime_t *time, uint64_t ns)
 {
-
 	time->ns = ns;
 }
 
 void
 nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec)
 {
-
 	time->ns = sec * BILLION + nsec;
 }
 
 uint64_t
 nstime_ns(const nstime_t *time)
 {
-
 	return (time->ns);
 }
 
 uint64_t
 nstime_sec(const nstime_t *time)
 {
-
 	return (time->ns / BILLION);
 }
 
 uint64_t
 nstime_nsec(const nstime_t *time)
 {
-
 	return (time->ns % BILLION);
 }
 
 void
 nstime_copy(nstime_t *time, const nstime_t *source)
 {
-
 	*time = *source;
 }
 
 int
 nstime_compare(const nstime_t *a, const nstime_t *b)
 {
-
 	return ((a->ns > b->ns) - (a->ns < b->ns));
 }
 
 void
 nstime_add(nstime_t *time, const nstime_t *addend)
 {
-
 	assert(UINT64_MAX - time->ns >= addend->ns);
 
 	time->ns += addend->ns;
@@ -63,7 +55,6 @@ nstime_add(nstime_t *time, const nstime_t *addend)
 void
 nstime_subtract(nstime_t *time, const nstime_t *subtrahend)
 {
-
 	assert(nstime_compare(time, subtrahend) >= 0);
 
 	time->ns -= subtrahend->ns;
@@ -72,7 +63,6 @@ nstime_subtract(nstime_t *time, const nstime_t *subtrahend)
 void
 nstime_imultiply(nstime_t *time, uint64_t multiplier)
 {
-
 	assert((((time->ns | multiplier) & (UINT64_MAX << (sizeof(uint64_t) <<
 	    2))) == 0) || ((time->ns * multiplier) / multiplier == time->ns));
 
@@ -82,7 +72,6 @@ nstime_imultiply(nstime_t *time, uint64_t multiplier)
 void
 nstime_idivide(nstime_t *time, uint64_t divisor)
 {
-
 	assert(divisor != 0);
 
 	time->ns /= divisor;
@@ -91,7 +80,6 @@ nstime_idivide(nstime_t *time, uint64_t divisor)
 uint64_t
 nstime_divide(const nstime_t *time, const nstime_t *divisor)
 {
-
 	assert(divisor->ns != 0);
 
 	return (time->ns / divisor->ns);
@@ -135,7 +123,6 @@ nstime_get(nstime_t *time)
 static void
 nstime_get(nstime_t *time)
 {
-
 	nstime_init(time, mach_absolute_time());
 }
 #else
@@ -157,7 +144,6 @@ nstime_get(nstime_t *time)
 bool
 nstime_monotonic(void)
 {
-
 	return (NSTIME_MONOTONIC);
 #undef NSTIME_MONOTONIC
 }
diff --git a/src/pages.c b/src/pages.c
index d5a0a21c..7c26a28a 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -64,7 +64,6 @@ pages_map(void *addr, size_t size, bool *commit)
 void
 pages_unmap(void *addr, size_t size)
 {
-
 #ifdef _WIN32
 	if (VirtualFree(addr, 0, MEM_RELEASE) == 0)
 #else
@@ -121,7 +120,6 @@ pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size,
 static bool
 pages_commit_impl(void *addr, size_t size, bool commit)
 {
-
 	if (os_overcommits)
 		return (true);
 
@@ -151,21 +149,18 @@ pages_commit_impl(void *addr, size_t size, bool commit)
 bool
 pages_commit(void *addr, size_t size)
 {
-
 	return (pages_commit_impl(addr, size, true));
 }
 
 bool
 pages_decommit(void *addr, size_t size)
 {
-
 	return (pages_commit_impl(addr, size, false));
 }
 
 bool
 pages_purge_lazy(void *addr, size_t size)
 {
-
 	if (!pages_can_purge_lazy)
 		return (true);
 
@@ -182,7 +177,6 @@ pages_purge_lazy(void *addr, size_t size)
 bool
 pages_purge_forced(void *addr, size_t size)
 {
-
 	if (!pages_can_purge_forced)
 		return (true);
 
@@ -196,7 +190,6 @@ pages_purge_forced(void *addr, size_t size)
 bool
 pages_huge(void *addr, size_t size)
 {
-
 	assert(HUGEPAGE_ADDR2BASE(addr) == addr);
 	assert(HUGEPAGE_CEILING(size) == size);
 
@@ -210,7 +203,6 @@ pages_huge(void *addr, size_t size)
 bool
 pages_nohuge(void *addr, size_t size)
 {
-
 	assert(HUGEPAGE_ADDR2BASE(addr) == addr);
 	assert(HUGEPAGE_CEILING(size) == size);
 
@@ -284,7 +276,6 @@ os_overcommits_proc(void)
 void
 pages_boot(void)
 {
-
 #ifndef _WIN32
 	mmap_flags = MAP_PRIVATE | MAP_ANON;
 #endif
diff --git a/src/prof.c b/src/prof.c
index b9a9d659..237cbb50 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -226,7 +226,6 @@ void
 prof_malloc_sample_object(tsdn_t *tsdn, extent_t *extent, const void *ptr,
     size_t usize, prof_tctx_t *tctx)
 {
-
 	prof_tctx_set(tsdn, extent, ptr, usize, tctx);
 
 	malloc_mutex_lock(tsdn, tctx->tdata->lock);
@@ -243,7 +242,6 @@ prof_malloc_sample_object(tsdn_t *tsdn, extent_t *extent, const void *ptr,
 void
 prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx)
 {
-
 	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
 	assert(tctx->cnts.curobjs > 0);
 	assert(tctx->cnts.curbytes >= usize);
@@ -259,7 +257,6 @@ prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx)
 void
 bt_init(prof_bt_t *bt, void **vec)
 {
-
 	cassert(config_prof);
 
 	bt->vec = vec;
@@ -269,7 +266,6 @@ bt_init(prof_bt_t *bt, void **vec)
 JEMALLOC_INLINE_C void
 prof_enter(tsd_t *tsd, prof_tdata_t *tdata)
 {
-
 	cassert(config_prof);
 	assert(tdata == prof_tdata_get(tsd, false));
 
@@ -284,7 +280,6 @@ prof_enter(tsd_t *tsd, prof_tdata_t *tdata)
 JEMALLOC_INLINE_C void
 prof_leave(tsd_t *tsd, prof_tdata_t *tdata)
 {
-
 	cassert(config_prof);
 	assert(tdata == prof_tdata_get(tsd, false));
 
@@ -326,7 +321,6 @@ prof_backtrace(prof_bt_t *bt)
 static _Unwind_Reason_Code
 prof_unwind_init_callback(struct _Unwind_Context *context, void *arg)
 {
-
 	cassert(config_prof);
 
 	return (_URC_NO_REASON);
@@ -525,7 +519,6 @@ prof_backtrace(prof_bt_t *bt)
 void
 prof_backtrace(prof_bt_t *bt)
 {
-
 	cassert(config_prof);
 	not_reached();
 }
@@ -542,7 +535,6 @@ prof_gctx_mutex_choose(void)
 static malloc_mutex_t *
 prof_tdata_mutex_choose(uint64_t thr_uid)
 {
-
 	return (&tdata_locks[thr_uid % PROF_NTDATA_LOCKS]);
 }
 
@@ -576,7 +568,6 @@ static void
 prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
     prof_tdata_t *tdata)
 {
-
 	cassert(config_prof);
 
 	/*
@@ -612,7 +603,6 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
 static bool
 prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx)
 {
-
 	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
 
 	if (opt_prof_accum)
@@ -627,7 +617,6 @@ prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx)
 static bool
 prof_gctx_should_destroy(prof_gctx_t *gctx)
 {
-
 	if (opt_prof_accum)
 		return (false);
 	if (!tctx_tree_empty(&gctx->tctxs))
@@ -1044,7 +1033,6 @@ prof_dump_printf(bool propagate_err, const char *format, ...)
 static void
 prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata)
 {
-
 	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
 
 	malloc_mutex_lock(tsdn, tctx->gctx->lock);
@@ -1077,7 +1065,6 @@ prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata)
 static void
 prof_tctx_merge_gctx(tsdn_t *tsdn, prof_tctx_t *tctx, prof_gctx_t *gctx)
 {
-
 	malloc_mutex_assert_owner(tsdn, gctx->lock);
 
 	gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
@@ -1173,7 +1160,6 @@ label_return:
 static void
 prof_dump_gctx_prep(tsdn_t *tsdn, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs)
 {
-
 	cassert(config_prof);
 
 	malloc_mutex_lock(tsdn, gctx->lock);
@@ -1421,7 +1407,6 @@ prof_open_maps(const char *format, ...)
 static int
 prof_getpid(void)
 {
-
 #ifdef _WIN32
 	return (GetCurrentProcessId());
 #else
@@ -1491,7 +1476,6 @@ static void
 prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx,
     const char *filename)
 {
-
 #ifdef JEMALLOC_PROF
 	/*
 	 * Scaling is equivalent AdjustSamples() in jeprof, but the result may
@@ -1640,7 +1624,6 @@ label_open_close_error:
 static void
 prof_dump_filename(char *filename, char v, uint64_t vseq)
 {
-
 	cassert(config_prof);
 
 	if (vseq != VSEQ_INVALID) {
@@ -1844,7 +1827,6 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
 prof_tdata_t *
 prof_tdata_init(tsd_t *tsd)
 {
-
 	return (prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0,
 	    NULL, prof_thread_active_init_get(tsd_tsdn(tsd))));
 }
@@ -1852,7 +1834,6 @@ prof_tdata_init(tsd_t *tsd)
 static bool
 prof_tdata_should_destroy_unlocked(prof_tdata_t *tdata, bool even_if_attached)
 {
-
 	if (tdata->attached && !even_if_attached)
 		return (false);
 	if (ckh_count(&tdata->bt2tctx) != 0)
@@ -1864,7 +1845,6 @@ static bool
 prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
     bool even_if_attached)
 {
-
 	malloc_mutex_assert_owner(tsdn, tdata->lock);
 
 	return (prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
@@ -1874,7 +1854,6 @@ static void
 prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
     bool even_if_attached)
 {
-
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tdatas_mtx);
 
 	tdata_tree_remove(&tdatas, tdata);
@@ -1893,7 +1872,6 @@ prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
 static void
 prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached)
 {
-
 	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
 	prof_tdata_destroy_locked(tsd, tdata, even_if_attached);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
@@ -2162,7 +2140,6 @@ prof_gdump_set(tsdn_t *tsdn, bool gdump)
 void
 prof_boot0(void)
 {
-
 	cassert(config_prof);
 
 	memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT,
@@ -2172,7 +2149,6 @@ prof_boot0(void)
 void
 prof_boot1(void)
 {
-
 	cassert(config_prof);
 
 	/*
@@ -2198,7 +2174,6 @@ prof_boot1(void)
 bool
 prof_boot2(tsd_t *tsd)
 {
-
 	cassert(config_prof);
 
 	if (opt_prof) {
@@ -2292,7 +2267,6 @@ prof_boot2(tsd_t *tsd)
 void
 prof_prefork0(tsdn_t *tsdn)
 {
-
 	if (opt_prof) {
 		unsigned i;
 
@@ -2309,7 +2283,6 @@ prof_prefork0(tsdn_t *tsdn)
 void
 prof_prefork1(tsdn_t *tsdn)
 {
-
 	if (opt_prof) {
 		malloc_mutex_prefork(tsdn, &prof_active_mtx);
 		malloc_mutex_prefork(tsdn, &prof_dump_seq_mtx);
@@ -2322,7 +2295,6 @@ prof_prefork1(tsdn_t *tsdn)
 void
 prof_postfork_parent(tsdn_t *tsdn)
 {
-
 	if (opt_prof) {
 		unsigned i;
 
@@ -2345,7 +2317,6 @@ prof_postfork_parent(tsdn_t *tsdn)
 void
 prof_postfork_child(tsdn_t *tsdn)
 {
-
 	if (opt_prof) {
 		unsigned i;
 
diff --git a/src/rtree.c b/src/rtree.c
index fd5e85df..43f21652 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -4,7 +4,6 @@
 static unsigned
 hmin(unsigned ha, unsigned hb)
 {
-
 	return (ha < hb ? ha : hb);
 }
 
@@ -71,7 +70,6 @@ rtree_new(rtree_t *rtree, unsigned bits)
 static rtree_elm_t *
 rtree_node_alloc(tsdn_t *tsdn, rtree_t *rtree, size_t nelms)
 {
-
 	return ((rtree_elm_t *)base_alloc(tsdn, b0get(), nelms *
 	    sizeof(rtree_elm_t), CACHELINE));
 }
@@ -88,7 +86,6 @@ rtree_node_alloc_t *rtree_node_alloc = JEMALLOC_N(rtree_node_alloc_impl);
 UNUSED static void
 rtree_node_dalloc(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *node)
 {
-
 	/* Nodes are never deleted during normal operation. */
 	not_reached();
 }
@@ -103,7 +100,6 @@ static void
 rtree_delete_subtree(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *node,
     unsigned level)
 {
-
 	if (level + 1 < rtree->height) {
 		size_t nchildren, i;
 
@@ -157,7 +153,6 @@ rtree_node_init(tsdn_t *tsdn, rtree_t *rtree, unsigned level,
 rtree_elm_t *
 rtree_subtree_read_hard(tsdn_t *tsdn, rtree_t *rtree, unsigned level)
 {
-
 	return (rtree_node_init(tsdn, rtree, level,
 	    &rtree->levels[level].subtree));
 }
@@ -166,7 +161,6 @@ rtree_elm_t *
 rtree_child_read_hard(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *elm,
     unsigned level)
 {
-
 	return (rtree_node_init(tsdn, rtree, level+1, &elm->child));
 }
 
diff --git a/src/tcache.c b/src/tcache.c
index fad52777..66e255d6 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -26,7 +26,6 @@ static tcaches_t	*tcaches_avail;
 size_t
 tcache_salloc(tsdn_t *tsdn, const void *ptr)
 {
-
 	return (arena_salloc(tsdn, iealloc(tsdn, ptr), ptr));
 }
 
@@ -249,7 +248,6 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 static void
 tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
 {
-
 	if (config_stats) {
 		/* Link into list of extant tcaches. */
 		malloc_mutex_lock(tsdn, &arena->lock);
@@ -262,7 +260,6 @@ tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
 static void
 tcache_arena_dissociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
 {
-
 	if (config_stats) {
 		/* Unlink from list of extant tcaches. */
 		malloc_mutex_lock(tsdn, &arena->lock);
@@ -287,7 +284,6 @@ void
 tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *oldarena,
     arena_t *newarena)
 {
-
 	tcache_arena_dissociate(tsdn, tcache, oldarena);
 	tcache_arena_associate(tsdn, tcache, newarena);
 }
@@ -473,7 +469,6 @@ tcaches_create(tsd_t *tsd, unsigned *r_ind)
 static void
 tcaches_elm_flush(tsd_t *tsd, tcaches_t *elm)
 {
-
 	if (elm->tcache == NULL)
 		return;
 	tcache_destroy(tsd, elm->tcache);
@@ -483,7 +478,6 @@ tcaches_elm_flush(tsd_t *tsd, tcaches_t *elm)
 void
 tcaches_flush(tsd_t *tsd, unsigned ind)
 {
-
 	tcaches_elm_flush(tsd, &tcaches[ind]);
 }
 
diff --git a/src/tsd.c b/src/tsd.c
index 5d9fc9f9..b4d7e795 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -14,21 +14,18 @@ malloc_tsd_data(, , tsd_t, TSD_INITIALIZER)
 void *
 malloc_tsd_malloc(size_t size)
 {
-
 	return (a0malloc(CACHELINE_CEILING(size)));
 }
 
 void
 malloc_tsd_dalloc(void *wrapper)
 {
-
 	a0dalloc(wrapper);
 }
 
 void
 malloc_tsd_no_cleanup(void *arg)
 {
-
 	not_reached();
 }
 
@@ -61,7 +58,6 @@ _malloc_thread_cleanup(void)
 void
 malloc_tsd_cleanup_register(bool (*f)(void))
 {
-
 	assert(ncleanups < MALLOC_TSD_CLEANUPS_MAX);
 	cleanups[ncleanups] = f;
 	ncleanups++;
@@ -127,7 +123,6 @@ malloc_tsd_boot0(void)
 void
 malloc_tsd_boot1(void)
 {
-
 	tsd_boot1();
 	*tsd_arenas_tdata_bypassp_get(tsd_fetch()) = false;
 }
@@ -136,7 +131,6 @@ malloc_tsd_boot1(void)
 static BOOL WINAPI
 _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved)
 {
-
 	switch (fdwReason) {
 #ifdef JEMALLOC_LAZY_LOCK
 	case DLL_THREAD_ATTACH:
@@ -194,7 +188,6 @@ tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block)
 void
 tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block)
 {
-
 	malloc_mutex_lock(TSDN_NULL, &head->lock);
 	ql_remove(&head->blocks, block, link);
 	malloc_mutex_unlock(TSDN_NULL, &head->lock);
diff --git a/src/util.c b/src/util.c
index dd8c2363..c6ac4e11 100644
--- a/src/util.c
+++ b/src/util.c
@@ -48,7 +48,6 @@ static char	*x2s(uintmax_t x, bool alt_form, bool uppercase, char *s,
 static void
 wrtmessage(void *cbopaque, const char *s)
 {
-
 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_write)
 	/*
 	 * Use syscall(2) rather than write(2) when possible in order to avoid
@@ -74,7 +73,6 @@ JEMALLOC_EXPORT void	(*je_malloc_message)(void *, const char *s);
 void
 malloc_write(const char *s)
 {
-
 	if (je_malloc_message != NULL)
 		je_malloc_message(NULL, s);
 	else
@@ -88,7 +86,6 @@ malloc_write(const char *s)
 int
 buferror(int err, char *buf, size_t buflen)
 {
-
 #ifdef _WIN32
 	FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, err, 0,
 	    (LPSTR)buf, (DWORD)buflen, NULL);
@@ -292,7 +289,6 @@ d2s(intmax_t x, char sign, char *s, size_t *slen_p)
 static char *
 o2s(uintmax_t x, bool alt_form, char *s, size_t *slen_p)
 {
-
 	s = u2s(x, 8, false, s, slen_p);
 	if (alt_form && *s != '0') {
 		s--;
@@ -305,7 +301,6 @@ o2s(uintmax_t x, bool alt_form, char *s, size_t *slen_p)
 static char *
 x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, size_t *slen_p)
 {
-
 	s = u2s(x, 16, uppercase, s, slen_p);
 	if (alt_form) {
 		s -= 2;
diff --git a/src/witness.c b/src/witness.c
index 0f5c0d73..ffc7e247 100644
--- a/src/witness.c
+++ b/src/witness.c
@@ -5,7 +5,6 @@ void
 witness_init(witness_t *witness, const char *name, witness_rank_t rank,
     witness_comp_t *comp, void *opaque)
 {
-
 	witness->name = name;
 	witness->rank = rank;
 	witness->comp = comp;
@@ -41,7 +40,6 @@ witness_lock_error_t *witness_lock_error = JEMALLOC_N(n_witness_lock_error);
 void
 witness_owner_error(const witness_t *witness)
 {
-
 	malloc_printf("<jemalloc>: Should own %s(%u)\n", witness->name,
 	    witness->rank);
 	abort();
@@ -59,7 +57,6 @@ witness_owner_error_t *witness_owner_error = JEMALLOC_N(n_witness_owner_error);
 void
 witness_not_owner_error(const witness_t *witness)
 {
-
 	malloc_printf("<jemalloc>: Should not own %s(%u)\n", witness->name,
 	    witness->rank);
 	abort();
@@ -97,7 +94,6 @@ witness_lockless_error_t *witness_lockless_error =
 void
 witnesses_cleanup(tsd_t *tsd)
 {
-
 	witness_assert_lockless(tsd_tsdn(tsd));
 
 	/* Do nothing. */
@@ -106,14 +102,12 @@ witnesses_cleanup(tsd_t *tsd)
 void
 witness_prefork(tsd_t *tsd)
 {
-
 	tsd_witness_fork_set(tsd, true);
 }
 
 void
 witness_postfork_parent(tsd_t *tsd)
 {
-
 	tsd_witness_fork_set(tsd, false);
 }
 
diff --git a/src/zone.c b/src/zone.c
index 66ba02b9..1fcff64f 100644
--- a/src/zone.c
+++ b/src/zone.c
@@ -47,7 +47,6 @@ static void	zone_force_unlock(malloc_zone_t *zone);
 static size_t
 zone_size(malloc_zone_t *zone, void *ptr)
 {
-
 	/*
 	 * There appear to be places within Darwin (such as setenv(3)) that
 	 * cause calls to this function with pointers that *no* zone owns.  If
@@ -63,14 +62,12 @@ zone_size(malloc_zone_t *zone, void *ptr)
 static void *
 zone_malloc(malloc_zone_t *zone, size_t size)
 {
-
 	return (je_malloc(size));
 }
 
 static void *
 zone_calloc(malloc_zone_t *zone, size_t num, size_t size)
 {
-
 	return (je_calloc(num, size));
 }
 
@@ -87,7 +84,6 @@ zone_valloc(malloc_zone_t *zone, size_t size)
 static void
 zone_free(malloc_zone_t *zone, void *ptr)
 {
-
 	if (ivsalloc(tsdn_fetch(), ptr) != 0) {
 		je_free(ptr);
 		return;
@@ -99,7 +95,6 @@ zone_free(malloc_zone_t *zone, void *ptr)
 static void *
 zone_realloc(malloc_zone_t *zone, void *ptr, size_t size)
 {
-
 	if (ivsalloc(tsdn_fetch(), ptr) != 0)
 		return (je_realloc(ptr, size));
 
@@ -138,7 +133,6 @@ zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size)
 static void *
 zone_destroy(malloc_zone_t *zone)
 {
-
 	/* This function should never be called. */
 	not_reached();
 	return (NULL);
@@ -147,7 +141,6 @@ zone_destroy(malloc_zone_t *zone)
 static size_t
 zone_good_size(malloc_zone_t *zone, size_t size)
 {
-
 	if (size == 0)
 		size = 1;
 	return (s2u(size));
@@ -156,7 +149,6 @@ zone_good_size(malloc_zone_t *zone, size_t size)
 static void
 zone_force_lock(malloc_zone_t *zone)
 {
-
 	if (isthreaded)
 		jemalloc_prefork();
 }
@@ -164,7 +156,6 @@ zone_force_lock(malloc_zone_t *zone)
 static void
 zone_force_unlock(malloc_zone_t *zone)
 {
-
 	/*
 	 * Call jemalloc_postfork_child() rather than
 	 * jemalloc_postfork_parent(), because this function is executed by both
@@ -179,7 +170,6 @@ zone_force_unlock(malloc_zone_t *zone)
 static void
 zone_init(void)
 {
-
 	jemalloc_zone.size = (void *)zone_size;
 	jemalloc_zone.malloc = (void *)zone_malloc;
 	jemalloc_zone.calloc = (void *)zone_calloc;
@@ -297,7 +287,6 @@ JEMALLOC_ATTR(constructor)
 void
 zone_register(void)
 {
-
 	/*
 	 * If something else replaced the system default zone allocator, don't
 	 * register jemalloc's.
diff --git a/test/include/test/math.h b/test/include/test/math.h
index b057b29a..1728d60f 100644
--- a/test/include/test/math.h
+++ b/test/include/test/math.h
@@ -305,7 +305,6 @@ pt_chi2(double p, double df, double ln_gamma_df_2)
 JEMALLOC_INLINE double
 pt_gamma(double p, double shape, double scale, double ln_gamma_shape)
 {
-
 	return (pt_chi2(p, shape * 2.0, ln_gamma_shape) * 0.5 * scale);
 }
 #endif
diff --git a/test/include/test/mq.h b/test/include/test/mq.h
index 7c4df493..a974eb90 100644
--- a/test/include/test/mq.h
+++ b/test/include/test/mq.h
@@ -46,7 +46,6 @@ a_prefix##init(a_mq_type *mq) {						\
 a_attr void								\
 a_prefix##fini(a_mq_type *mq)						\
 {									\
-									\
 	mtx_fini(&mq->lock);						\
 }									\
 a_attr unsigned								\
diff --git a/test/integration/MALLOCX_ARENA.c b/test/integration/MALLOCX_ARENA.c
index 58032da8..1d9e423e 100644
--- a/test/integration/MALLOCX_ARENA.c
+++ b/test/integration/MALLOCX_ARENA.c
@@ -63,7 +63,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_MALLOCX_ARENA));
 }
diff --git a/test/integration/aligned_alloc.c b/test/integration/aligned_alloc.c
index 36fb6997..52b69acb 100644
--- a/test/integration/aligned_alloc.c
+++ b/test/integration/aligned_alloc.c
@@ -10,7 +10,6 @@
 static void
 purge(void)
 {
-
 	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl error");
 }
@@ -130,7 +129,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_alignment_errors,
 	    test_oom_errors,
diff --git a/test/integration/allocated.c b/test/integration/allocated.c
index 6ce145b3..7570c52f 100644
--- a/test/integration/allocated.c
+++ b/test/integration/allocated.c
@@ -98,7 +98,6 @@ label_ENOENT:
 
 TEST_BEGIN(test_main_thread)
 {
-
 	thd_start(NULL);
 }
 TEST_END
@@ -115,7 +114,6 @@ TEST_END
 int
 main(void)
 {
-
 	/* Run tests multiple times to check for bad interactions. */
 	return (test(
 	    test_main_thread,
diff --git a/test/integration/cpp/basic.cpp b/test/integration/cpp/basic.cpp
index eeb93c47..4a87a3ba 100644
--- a/test/integration/cpp/basic.cpp
+++ b/test/integration/cpp/basic.cpp
@@ -12,7 +12,6 @@ TEST_END
 int
 main()
 {
-
 	return (test(
 	    test_basic));
 }
diff --git a/test/integration/extent.c b/test/integration/extent.c
index e347b66d..30849b0c 100644
--- a/test/integration/extent.c
+++ b/test/integration/extent.c
@@ -176,7 +176,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_extent_manual_hook,
 	    test_extent_auto_hook));
diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index 2298f729..7617b1b7 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -20,7 +20,6 @@ get_nsizes_impl(const char *cmd)
 static unsigned
 get_nlarge(void)
 {
-
 	return (get_nsizes_impl("arenas.nlextents"));
 }
 
@@ -46,7 +45,6 @@ get_size_impl(const char *cmd, size_t ind)
 static size_t
 get_large_size(size_t ind)
 {
-
 	return (get_size_impl("arenas.lextent.0.size", ind));
 }
 
@@ -58,7 +56,6 @@ get_large_size(size_t ind)
 static void
 purge(void)
 {
-
 	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl error");
 }
@@ -225,7 +222,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_overflow,
 	    test_oom,
diff --git a/test/integration/overflow.c b/test/integration/overflow.c
index 3e1e15f9..ad867e7c 100644
--- a/test/integration/overflow.c
+++ b/test/integration/overflow.c
@@ -43,7 +43,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_overflow));
 }
diff --git a/test/integration/posix_memalign.c b/test/integration/posix_memalign.c
index 9f3156ac..dace10f7 100644
--- a/test/integration/posix_memalign.c
+++ b/test/integration/posix_memalign.c
@@ -10,7 +10,6 @@
 static void
 purge(void)
 {
-
 	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl error");
 }
@@ -124,7 +123,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_alignment_errors,
 	    test_oom_errors,
diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c
index dd89e8cb..0a8b50c7 100644
--- a/test/integration/rallocx.c
+++ b/test/integration/rallocx.c
@@ -16,7 +16,6 @@ get_nsizes_impl(const char *cmd)
 static unsigned
 get_nlarge(void)
 {
-
 	return (get_nsizes_impl("arenas.nlextents"));
 }
 
@@ -42,7 +41,6 @@ get_size_impl(const char *cmd, size_t ind)
 static size_t
 get_large_size(size_t ind)
 {
-
 	return (get_size_impl("arenas.lextent.0.size", ind));
 }
 
@@ -249,7 +247,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_grow_and_shrink,
 	    test_zero,
diff --git a/test/integration/sdallocx.c b/test/integration/sdallocx.c
index f92e0589..5d0a8f80 100644
--- a/test/integration/sdallocx.c
+++ b/test/integration/sdallocx.c
@@ -50,7 +50,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_basic,
 	    test_alignment_and_size));
diff --git a/test/integration/thread_arena.c b/test/integration/thread_arena.c
index 7a35a635..cf8240d1 100644
--- a/test/integration/thread_arena.c
+++ b/test/integration/thread_arena.c
@@ -75,7 +75,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_thread_arena));
 }
diff --git a/test/integration/thread_tcache_enabled.c b/test/integration/thread_tcache_enabled.c
index 2c2825e1..1394371b 100644
--- a/test/integration/thread_tcache_enabled.c
+++ b/test/integration/thread_tcache_enabled.c
@@ -86,7 +86,6 @@ label_ENOENT:
 
 TEST_BEGIN(test_main_thread)
 {
-
 	thd_start(NULL);
 }
 TEST_END
@@ -103,7 +102,6 @@ TEST_END
 int
 main(void)
 {
-
 	/* Run tests multiple times to check for bad interactions. */
 	return (test(
 	    test_main_thread,
diff --git a/test/integration/xallocx.c b/test/integration/xallocx.c
index d35ca39e..647404a7 100644
--- a/test/integration/xallocx.c
+++ b/test/integration/xallocx.c
@@ -87,14 +87,12 @@ get_nsizes_impl(const char *cmd)
 static unsigned
 get_nsmall(void)
 {
-
 	return (get_nsizes_impl("arenas.nbins"));
 }
 
 static unsigned
 get_nlarge(void)
 {
-
 	return (get_nsizes_impl("arenas.nlextents"));
 }
 
@@ -120,14 +118,12 @@ get_size_impl(const char *cmd, size_t ind)
 static size_t
 get_small_size(size_t ind)
 {
-
 	return (get_size_impl("arenas.bin.0.size", ind));
 }
 
 static size_t
 get_large_size(size_t ind)
 {
-
 	return (get_size_impl("arenas.lextent.0.size", ind));
 }
 
@@ -397,7 +393,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_same_size,
 	    test_extra_no_move,
diff --git a/test/src/btalloc.c b/test/src/btalloc.c
index 9a253d97..a78cb89b 100644
--- a/test/src/btalloc.c
+++ b/test/src/btalloc.c
@@ -3,6 +3,5 @@
 void *
 btalloc(size_t size, unsigned bits)
 {
-
 	return (btalloc_0(size, bits));
 }
diff --git a/test/src/mq.c b/test/src/mq.c
index 40b31c15..47f362c0 100644
--- a/test/src/mq.c
+++ b/test/src/mq.c
@@ -7,7 +7,6 @@
 void
 mq_nanosleep(unsigned ns)
 {
-
 	assert(ns <= 1000*1000*1000);
 
 #ifdef _WIN32
diff --git a/test/src/mtx.c b/test/src/mtx.c
index 8a5dfdd9..bbfec4ac 100644
--- a/test/src/mtx.c
+++ b/test/src/mtx.c
@@ -7,7 +7,6 @@
 bool
 mtx_init(mtx_t *mtx)
 {
-
 #ifdef _WIN32
 	if (!InitializeCriticalSectionAndSpinCount(&mtx->lock, _CRT_SPINCOUNT))
 		return (true);
@@ -33,7 +32,6 @@ mtx_init(mtx_t *mtx)
 void
 mtx_fini(mtx_t *mtx)
 {
-
 #ifdef _WIN32
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
 #elif (defined(JEMALLOC_OSSPIN))
@@ -45,7 +43,6 @@ mtx_fini(mtx_t *mtx)
 void
 mtx_lock(mtx_t *mtx)
 {
-
 #ifdef _WIN32
 	EnterCriticalSection(&mtx->lock);
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
@@ -60,7 +57,6 @@ mtx_lock(mtx_t *mtx)
 void
 mtx_unlock(mtx_t *mtx)
 {
-
 #ifdef _WIN32
 	LeaveCriticalSection(&mtx->lock);
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
diff --git a/test/src/test.c b/test/src/test.c
index d70cc750..345cc1c1 100644
--- a/test/src/test.c
+++ b/test/src/test.c
@@ -34,7 +34,6 @@ test_fail(const char *format, ...)
 static const char *
 test_status_string(test_status_t test_status)
 {
-
 	switch (test_status) {
 	case test_status_pass: return "pass";
 	case test_status_skip: return "skip";
@@ -46,7 +45,6 @@ test_status_string(test_status_t test_status)
 void
 p_test_init(const char *name)
 {
-
 	test_count++;
 	test_status = test_status_pass;
 	test_name = name;
@@ -55,7 +53,6 @@ p_test_init(const char *name)
 void
 p_test_fini(void)
 {
-
 	test_counts[test_status]++;
 	malloc_printf("%s: %s\n", test_name, test_status_string(test_status));
 }
@@ -127,7 +124,6 @@ p_test_no_malloc_init(test_t *t, ...)
 void
 p_test_fail(const char *prefix, const char *message)
 {
-
 	malloc_cprintf(NULL, NULL, "%s%s\n", prefix, message);
 	test_status = test_status_fail;
 }
diff --git a/test/src/thd.c b/test/src/thd.c
index c9d00658..e3167089 100644
--- a/test/src/thd.c
+++ b/test/src/thd.c
@@ -13,7 +13,6 @@ thd_create(thd_t *thd, void *(*proc)(void *), void *arg)
 void
 thd_join(thd_t thd, void **ret)
 {
-
 	if (WaitForSingleObject(thd, INFINITE) == WAIT_OBJECT_0 && ret) {
 		DWORD exit_code;
 		GetExitCodeThread(thd, (LPDWORD) &exit_code);
@@ -25,7 +24,6 @@ thd_join(thd_t thd, void **ret)
 void
 thd_create(thd_t *thd, void *(*proc)(void *), void *arg)
 {
-
 	if (pthread_create(thd, NULL, proc, arg) != 0)
 		test_fail("Error in pthread_create()\n");
 }
@@ -33,7 +31,6 @@ thd_create(thd_t *thd, void *(*proc)(void *), void *arg)
 void
 thd_join(thd_t thd, void **ret)
 {
-
 	pthread_join(thd, ret);
 }
 #endif
diff --git a/test/src/timer.c b/test/src/timer.c
index 3c7e63a2..82f69d0a 100644
--- a/test/src/timer.c
+++ b/test/src/timer.c
@@ -3,7 +3,6 @@
 void
 timer_start(timedelta_t *timer)
 {
-
 	nstime_init(&timer->t0, 0);
 	nstime_update(&timer->t0);
 }
@@ -11,7 +10,6 @@ timer_start(timedelta_t *timer)
 void
 timer_stop(timedelta_t *timer)
 {
-
 	nstime_copy(&timer->t1, &timer->t0);
 	nstime_update(&timer->t1);
 }
diff --git a/test/stress/microbench.c b/test/stress/microbench.c
index 7dc45f89..c599d9d3 100644
--- a/test/stress/microbench.c
+++ b/test/stress/microbench.c
@@ -65,7 +65,6 @@ mallocx_free(void)
 
 TEST_BEGIN(test_malloc_vs_mallocx)
 {
-
 	compare_funcs(10*1000*1000, 100*1000*1000, "malloc",
 	    malloc_free, "mallocx", mallocx_free);
 }
@@ -95,7 +94,6 @@ malloc_sdallocx(void)
 
 TEST_BEGIN(test_free_vs_dallocx)
 {
-
 	compare_funcs(10*1000*1000, 100*1000*1000, "free", malloc_free,
 	    "dallocx", malloc_dallocx);
 }
@@ -103,7 +101,6 @@ TEST_END
 
 TEST_BEGIN(test_dallocx_vs_sdallocx)
 {
-
 	compare_funcs(10*1000*1000, 100*1000*1000, "dallocx", malloc_dallocx,
 	    "sdallocx", malloc_sdallocx);
 }
@@ -140,7 +137,6 @@ malloc_sallocx_free(void)
 
 TEST_BEGIN(test_mus_vs_sallocx)
 {
-
 	compare_funcs(10*1000*1000, 100*1000*1000, "malloc_usable_size",
 	    malloc_mus_free, "sallocx", malloc_sallocx_free);
 }
@@ -163,7 +159,6 @@ malloc_nallocx_free(void)
 
 TEST_BEGIN(test_sallocx_vs_nallocx)
 {
-
 	compare_funcs(10*1000*1000, 100*1000*1000, "sallocx",
 	    malloc_sallocx_free, "nallocx", malloc_nallocx_free);
 }
@@ -172,7 +167,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_malloc_vs_mallocx,
 	    test_free_vs_dallocx,
diff --git a/test/unit/SFMT.c b/test/unit/SFMT.c
index ba4be870..cf52670b 100644
--- a/test/unit/SFMT.c
+++ b/test/unit/SFMT.c
@@ -1596,7 +1596,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_gen_rand_32,
 	    test_by_array_32,
diff --git a/test/unit/a0.c b/test/unit/a0.c
index b9ba45a3..87f7e527 100644
--- a/test/unit/a0.c
+++ b/test/unit/a0.c
@@ -13,7 +13,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test_no_malloc_init(
 	    test_a0));
 }
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index 65ff1031..257f9729 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -20,14 +20,12 @@ get_nsizes_impl(const char *cmd)
 static unsigned
 get_nsmall(void)
 {
-
 	return (get_nsizes_impl("arenas.nbins"));
 }
 
 static unsigned
 get_nlarge(void)
 {
-
 	return (get_nsizes_impl("arenas.nlextents"));
 }
 
@@ -53,14 +51,12 @@ get_size_impl(const char *cmd, size_t ind)
 static size_t
 get_small_size(size_t ind)
 {
-
 	return (get_size_impl("arenas.bin.0.size", ind));
 }
 
 static size_t
 get_large_size(size_t ind)
 {
-
 	return (get_size_impl("arenas.lextent.0.size", ind));
 }
 
@@ -164,14 +160,12 @@ do_arena_reset_destroy(const char *name, unsigned arena_ind)
 static void
 do_arena_reset(unsigned arena_ind)
 {
-
 	do_arena_reset_destroy("arena.0.reset", arena_ind);
 }
 
 static void
 do_arena_destroy(unsigned arena_ind)
 {
-
 	do_arena_reset_destroy("arena.0.destroy", arena_ind);
 }
 
@@ -214,7 +208,6 @@ arena_i_initialized(unsigned arena_ind, bool refresh)
 
 TEST_BEGIN(test_arena_destroy_initial)
 {
-
 	assert_false(arena_i_initialized(MALLCTL_ARENAS_DESTROYED, false),
 	    "Destroyed arena stats should not be initialized");
 }
@@ -269,7 +262,6 @@ static bool
 extent_dalloc_unmap(extent_hooks_t *extent_hooks, void *addr, size_t size,
     bool committed, unsigned arena_ind)
 {
-
 	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, committed=%s, "
 	    "arena_ind=%u)\n", __func__, extent_hooks, addr, size, committed ?
 	    "true" : "false", arena_ind);
@@ -338,7 +330,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_arena_reset,
 	    test_arena_destroy_initial,
diff --git a/test/unit/atomic.c b/test/unit/atomic.c
index b8933a69..1d143689 100644
--- a/test/unit/atomic.c
+++ b/test/unit/atomic.c
@@ -68,7 +68,6 @@ typedef struct p##_test_s p##_test_t;
 TEST_STRUCT(u64, uint64_t)
 TEST_BEGIN(test_atomic_u64)
 {
-
 #if !(LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
 	test_skip("64-bit atomic operations not supported");
 #else
@@ -80,7 +79,6 @@ TEST_END
 TEST_STRUCT(u32, uint32_t)
 TEST_BEGIN(test_atomic_u32)
 {
-
 	TEST_BODY(u32, uint32_t, uint32_t, u32, "#"FMTx32);
 }
 TEST_END
@@ -88,7 +86,6 @@ TEST_END
 TEST_STRUCT(p, void *)
 TEST_BEGIN(test_atomic_p)
 {
-
 	TEST_BODY(p, void *, uintptr_t, ptr, "p");
 }
 TEST_END
@@ -96,7 +93,6 @@ TEST_END
 TEST_STRUCT(zu, size_t)
 TEST_BEGIN(test_atomic_zu)
 {
-
 	TEST_BODY(zu, size_t, size_t, zu, "#zx");
 }
 TEST_END
@@ -104,7 +100,6 @@ TEST_END
 TEST_STRUCT(u, unsigned)
 TEST_BEGIN(test_atomic_u)
 {
-
 	TEST_BODY(u, unsigned, unsigned, u, "#x");
 }
 TEST_END
@@ -112,7 +107,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_atomic_u64,
 	    test_atomic_u32,
diff --git a/test/unit/base.c b/test/unit/base.c
index 8f97e8bf..9aa43eab 100644
--- a/test/unit/base.c
+++ b/test/unit/base.c
@@ -208,7 +208,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_base_hooks_default,
 	    test_base_hooks_null,
diff --git a/test/unit/bitmap.c b/test/unit/bitmap.c
index 10d47c76..b502bfea 100644
--- a/test/unit/bitmap.c
+++ b/test/unit/bitmap.c
@@ -126,7 +126,6 @@ test_bitmap_initializer_body(const bitmap_info_t *binfo, size_t nbits)
 
 TEST_BEGIN(test_bitmap_initializer)
 {
-
 #define	NB(nbits) {							\
 		if (nbits <= BITMAP_MAXBITS) {				\
 			bitmap_info_t binfo =				\
@@ -339,7 +338,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_bitmap_initializer,
 	    test_bitmap_size,
diff --git a/test/unit/ckh.c b/test/unit/ckh.c
index 2cbc2268..1f576689 100644
--- a/test/unit/ckh.c
+++ b/test/unit/ckh.c
@@ -206,7 +206,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_new_delete,
 	    test_count_insert_search_remove,
diff --git a/test/unit/decay.c b/test/unit/decay.c
index 7efecf0f..b3b1dd9d 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -12,14 +12,12 @@ static bool monotonic_mock;
 static bool
 nstime_monotonic_mock(void)
 {
-
 	return (monotonic_mock);
 }
 
 static bool
 nstime_update_mock(nstime_t *time)
 {
-
 	nupdates_mock++;
 	if (monotonic_mock)
 		nstime_copy(time, &time_mock);
@@ -357,7 +355,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_decay_ticks,
 	    test_decay_ticker,
diff --git a/test/unit/extent_quantize.c b/test/unit/extent_quantize.c
index 43fa3604..a5c1b7a0 100644
--- a/test/unit/extent_quantize.c
+++ b/test/unit/extent_quantize.c
@@ -138,7 +138,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_small_extent_size,
 	    test_large_extent_size,
diff --git a/test/unit/fork.c b/test/unit/fork.c
index c530797c..58091c66 100644
--- a/test/unit/fork.c
+++ b/test/unit/fork.c
@@ -58,7 +58,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_fork));
 }
diff --git a/test/unit/hash.c b/test/unit/hash.c
index 010c9d76..ff237779 100644
--- a/test/unit/hash.c
+++ b/test/unit/hash.c
@@ -38,7 +38,6 @@ typedef enum {
 static int
 hash_variant_bits(hash_variant_t variant)
 {
-
 	switch (variant) {
 	case hash_variant_x86_32: return (32);
 	case hash_variant_x86_128: return (128);
@@ -50,7 +49,6 @@ hash_variant_bits(hash_variant_t variant)
 static const char *
 hash_variant_string(hash_variant_t variant)
 {
-
 	switch (variant) {
 	case hash_variant_x86_32: return ("hash_x86_32");
 	case hash_variant_x86_128: return ("hash_x86_128");
@@ -155,21 +153,18 @@ hash_variant_verify(hash_variant_t variant)
 
 TEST_BEGIN(test_hash_x86_32)
 {
-
 	hash_variant_verify(hash_variant_x86_32);
 }
 TEST_END
 
 TEST_BEGIN(test_hash_x86_128)
 {
-
 	hash_variant_verify(hash_variant_x86_128);
 }
 TEST_END
 
 TEST_BEGIN(test_hash_x64_128)
 {
-
 	hash_variant_verify(hash_variant_x64_128);
 }
 TEST_END
@@ -177,7 +172,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_hash_x86_32,
 	    test_hash_x86_128,
diff --git a/test/unit/junk.c b/test/unit/junk.c
index 680f0d21..5f34d051 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -17,7 +17,6 @@ static bool saw_junking;
 static void
 watch_junking(void *p)
 {
-
 	watch_for_junking = p;
 	saw_junking = false;
 }
@@ -55,7 +54,6 @@ large_dalloc_junk_intercept(void *ptr, size_t usize)
 static void
 large_dalloc_maybe_junk_intercept(void *ptr, size_t usize)
 {
-
 	large_dalloc_maybe_junk_orig(ptr, usize);
 	if (ptr == watch_for_junking)
 		saw_junking = true;
@@ -130,7 +128,6 @@ test_junk(size_t sz_min, size_t sz_max)
 
 TEST_BEGIN(test_junk_small)
 {
-
 	test_skip_if(!config_fill);
 	test_junk(1, SMALL_MAXCLASS-1);
 }
@@ -138,7 +135,6 @@ TEST_END
 
 TEST_BEGIN(test_junk_large)
 {
-
 	test_skip_if(!config_fill);
 	test_junk(SMALL_MAXCLASS+1, (1U << (LG_LARGE_MINCLASS+1)));
 }
@@ -147,7 +143,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_junk_small,
 	    test_junk_large));
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index fbe76cb4..5b734e1d 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -121,7 +121,6 @@ TEST_END
 
 TEST_BEGIN(test_mallctl_config)
 {
-
 #define	TEST_MALLCTL_CONFIG(config, t) do {				\
 	t oldval;							\
 	size_t sz = sizeof(oldval);					\
@@ -551,7 +550,6 @@ TEST_END
 
 TEST_BEGIN(test_arenas_constants)
 {
-
 #define	TEST_ARENAS_CONSTANT(t, name, expected) do {			\
 	t name;								\
 	size_t sz = sizeof(t);						\
@@ -571,7 +569,6 @@ TEST_END
 
 TEST_BEGIN(test_arenas_bin_constants)
 {
-
 #define	TEST_ARENAS_BIN_CONSTANT(t, name, expected) do {		\
 	t name;								\
 	size_t sz = sizeof(t);						\
@@ -591,7 +588,6 @@ TEST_END
 
 TEST_BEGIN(test_arenas_lextent_constants)
 {
-
 #define	TEST_ARENAS_LEXTENT_CONSTANT(t, name, expected) do {		\
 	t name;								\
 	size_t sz = sizeof(t);						\
@@ -626,7 +622,6 @@ TEST_END
 
 TEST_BEGIN(test_stats_arenas)
 {
-
 #define	TEST_STATS_ARENAS(t, name) do {					\
 	t name;								\
 	size_t sz = sizeof(t);						\
@@ -647,7 +642,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_mallctl_errors,
 	    test_mallctlnametomib_errors,
diff --git a/test/unit/math.c b/test/unit/math.c
index adb72bed..8e5ec61b 100644
--- a/test/unit/math.c
+++ b/test/unit/math.c
@@ -387,7 +387,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_ln_gamma_factorial,
 	    test_ln_gamma_misc,
diff --git a/test/unit/mq.c b/test/unit/mq.c
index bde2a480..bd289c54 100644
--- a/test/unit/mq.c
+++ b/test/unit/mq.c
@@ -85,7 +85,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_mq_basic,
 	    test_mq_threaded));
diff --git a/test/unit/mtx.c b/test/unit/mtx.c
index 96ff6948..2eccc98f 100644
--- a/test/unit/mtx.c
+++ b/test/unit/mtx.c
@@ -53,7 +53,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_mtx_basic,
 	    test_mtx_race));
diff --git a/test/unit/nstime.c b/test/unit/nstime.c
index 0368bc26..6548ba23 100644
--- a/test/unit/nstime.c
+++ b/test/unit/nstime.c
@@ -178,7 +178,6 @@ TEST_END
 
 TEST_BEGIN(test_nstime_monotonic)
 {
-
 	nstime_monotonic();
 }
 TEST_END
@@ -211,7 +210,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_nstime_init,
 	    test_nstime_init2,
diff --git a/test/unit/pack.c b/test/unit/pack.c
index 81ded4ec..316b6df5 100644
--- a/test/unit/pack.c
+++ b/test/unit/pack.c
@@ -161,7 +161,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_pack));
 }
diff --git a/test/unit/pages.c b/test/unit/pages.c
index f297215a..1e6add95 100644
--- a/test/unit/pages.c
+++ b/test/unit/pages.c
@@ -24,7 +24,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_pages_huge));
 }
diff --git a/test/unit/ph.c b/test/unit/ph.c
index da442f07..10bf99e4 100644
--- a/test/unit/ph.c
+++ b/test/unit/ph.c
@@ -148,7 +148,6 @@ TEST_END
 static void
 node_remove(heap_t *heap, node_t *node)
 {
-
 	heap_remove(heap, node);
 
 	node->magic = 0;
@@ -283,7 +282,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_ph_empty,
 	    test_ph_random));
diff --git a/test/unit/prng.c b/test/unit/prng.c
index 80c9d733..f32d82a6 100644
--- a/test/unit/prng.c
+++ b/test/unit/prng.c
@@ -114,35 +114,30 @@ test_prng_lg_range_zu(bool atomic)
 
 TEST_BEGIN(test_prng_lg_range_u32_nonatomic)
 {
-
 	test_prng_lg_range_u32(false);
 }
 TEST_END
 
 TEST_BEGIN(test_prng_lg_range_u32_atomic)
 {
-
 	test_prng_lg_range_u32(true);
 }
 TEST_END
 
 TEST_BEGIN(test_prng_lg_range_u64_nonatomic)
 {
-
 	test_prng_lg_range_u64();
 }
 TEST_END
 
 TEST_BEGIN(test_prng_lg_range_zu_nonatomic)
 {
-
 	test_prng_lg_range_zu(false);
 }
 TEST_END
 
 TEST_BEGIN(test_prng_lg_range_zu_atomic)
 {
-
 	test_prng_lg_range_zu(true);
 }
 TEST_END
@@ -212,35 +207,30 @@ test_prng_range_zu(bool atomic)
 
 TEST_BEGIN(test_prng_range_u32_nonatomic)
 {
-
 	test_prng_range_u32(false);
 }
 TEST_END
 
 TEST_BEGIN(test_prng_range_u32_atomic)
 {
-
 	test_prng_range_u32(true);
 }
 TEST_END
 
 TEST_BEGIN(test_prng_range_u64_nonatomic)
 {
-
 	test_prng_range_u64();
 }
 TEST_END
 
 TEST_BEGIN(test_prng_range_zu_nonatomic)
 {
-
 	test_prng_range_zu(false);
 }
 TEST_END
 
 TEST_BEGIN(test_prng_range_zu_atomic)
 {
-
 	test_prng_range_zu(true);
 }
 TEST_END
@@ -248,7 +238,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_prng_lg_range_u32_nonatomic,
 	    test_prng_lg_range_u32_atomic,
diff --git a/test/unit/prof_accum.c b/test/unit/prof_accum.c
index d941b5bc..41ebeea5 100644
--- a/test/unit/prof_accum.c
+++ b/test/unit/prof_accum.c
@@ -24,7 +24,6 @@ prof_dump_open_intercept(bool propagate_err, const char *filename)
 static void *
 alloc_from_permuted_backtrace(unsigned thd_ind, unsigned iteration)
 {
-
 	return (btalloc(1, thd_ind*NALLOCS_PER_THREAD + iteration));
 }
 
@@ -86,7 +85,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_idump));
 }
diff --git a/test/unit/prof_active.c b/test/unit/prof_active.c
index d00943a4..d3b341d7 100644
--- a/test/unit/prof_active.c
+++ b/test/unit/prof_active.c
@@ -38,7 +38,6 @@ static void
 mallctl_prof_active_get_impl(bool prof_active_old_expected, const char *func,
     int line)
 {
-
 	mallctl_bool_get("prof.active", prof_active_old_expected, func, line);
 }
 #define	mallctl_prof_active_get(a)					\
@@ -48,7 +47,6 @@ static void
 mallctl_prof_active_set_impl(bool prof_active_old_expected,
     bool prof_active_new, const char *func, int line)
 {
-
 	mallctl_bool_set("prof.active", prof_active_old_expected,
 	    prof_active_new, func, line);
 }
@@ -59,7 +57,6 @@ static void
 mallctl_thread_prof_active_get_impl(bool thread_prof_active_old_expected,
     const char *func, int line)
 {
-
 	mallctl_bool_get("thread.prof.active", thread_prof_active_old_expected,
 	    func, line);
 }
@@ -70,7 +67,6 @@ static void
 mallctl_thread_prof_active_set_impl(bool thread_prof_active_old_expected,
     bool thread_prof_active_new, const char *func, int line)
 {
-
 	mallctl_bool_set("thread.prof.active", thread_prof_active_old_expected,
 	    thread_prof_active_new, func, line);
 }
@@ -96,7 +92,6 @@ prof_sampling_probe_impl(bool expect_sample, const char *func, int line)
 
 TEST_BEGIN(test_prof_active)
 {
-
 	test_skip_if(!config_prof);
 
 	mallctl_prof_active_get(true);
@@ -131,7 +126,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_prof_active));
 }
diff --git a/test/unit/prof_gdump.c b/test/unit/prof_gdump.c
index cb99acdf..53f7cad6 100644
--- a/test/unit/prof_gdump.c
+++ b/test/unit/prof_gdump.c
@@ -76,7 +76,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_gdump));
 }
diff --git a/test/unit/prof_idump.c b/test/unit/prof_idump.c
index c293350f..43824c6a 100644
--- a/test/unit/prof_idump.c
+++ b/test/unit/prof_idump.c
@@ -53,7 +53,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_idump));
 }
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
index 59d70796..cc13e378 100644
--- a/test/unit/prof_reset.c
+++ b/test/unit/prof_reset.c
@@ -19,7 +19,6 @@ prof_dump_open_intercept(bool propagate_err, const char *filename)
 static void
 set_prof_active(bool active)
 {
-
 	assert_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
 	    sizeof(active)), 0, "Unexpected mallctl failure");
 }
@@ -98,7 +97,6 @@ static bool
 prof_dump_header_intercept(tsdn_t *tsdn, bool propagate_err,
     const prof_cnt_t *cnt_all)
 {
-
 	prof_dump_header_intercepted = true;
 	memcpy(&cnt_all_copy, cnt_all, sizeof(prof_cnt_t));
 
@@ -292,7 +290,6 @@ TEST_END
 int
 main(void)
 {
-
 	/* Intercept dumping prior to running any tests. */
 	prof_dump_open = prof_dump_open_intercept;
 
diff --git a/test/unit/prof_thread_name.c b/test/unit/prof_thread_name.c
index 9ec54977..8699936b 100644
--- a/test/unit/prof_thread_name.c
+++ b/test/unit/prof_thread_name.c
@@ -26,7 +26,6 @@ static void
 mallctl_thread_name_set_impl(const char *thread_name, const char *func,
     int line)
 {
-
 	assert_d_eq(mallctl("thread.prof.name", NULL, NULL,
 	    (void *)&thread_name, sizeof(thread_name)), 0,
 	    "%s():%d: Unexpected mallctl failure reading thread.prof.name",
@@ -124,7 +123,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_prof_thread_name_validation,
 	    test_prof_thread_name_threaded));
diff --git a/test/unit/ql.c b/test/unit/ql.c
index 05fad450..2ebb4502 100644
--- a/test/unit/ql.c
+++ b/test/unit/ql.c
@@ -198,7 +198,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_ql_empty,
 	    test_ql_tail_insert,
diff --git a/test/unit/qr.c b/test/unit/qr.c
index 8b764e11..7c9c1029 100644
--- a/test/unit/qr.c
+++ b/test/unit/qr.c
@@ -238,7 +238,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_qr_one,
 	    test_qr_after_insert,
diff --git a/test/unit/rb.c b/test/unit/rb.c
index cf3d3a78..56e00219 100644
--- a/test/unit/rb.c
+++ b/test/unit/rb.c
@@ -347,7 +347,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_rb_empty,
 	    test_rb_random));
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 03f4e269..d2f37055 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -24,7 +24,6 @@ rtree_node_alloc_intercept(tsdn_t *tsdn, rtree_t *rtree, size_t nelms)
 static void
 rtree_node_dalloc_intercept(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *node)
 {
-
 	if (rtree != test_rtree) {
 		rtree_node_dalloc_orig(tsdn, rtree, node);
 		return;
@@ -283,7 +282,6 @@ TEST_END
 int
 main(void)
 {
-
 	rtree_node_alloc_orig = rtree_node_alloc;
 	rtree_node_alloc = rtree_node_alloc_intercept;
 	rtree_node_dalloc_orig = rtree_node_dalloc;
diff --git a/test/unit/size_classes.c b/test/unit/size_classes.c
index 9b47b204..f7c14bc0 100644
--- a/test/unit/size_classes.c
+++ b/test/unit/size_classes.c
@@ -178,7 +178,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_size_classes,
 	    test_psize_classes,
diff --git a/test/unit/slab.c b/test/unit/slab.c
index 42e82a8b..7e6a62f5 100644
--- a/test/unit/slab.c
+++ b/test/unit/slab.c
@@ -29,7 +29,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_arena_slab_regind));
 }
diff --git a/test/unit/smoothstep.c b/test/unit/smoothstep.c
index 4cfb2134..071aede2 100644
--- a/test/unit/smoothstep.c
+++ b/test/unit/smoothstep.c
@@ -98,7 +98,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_smoothstep_integral,
 	    test_smoothstep_monotonic,
diff --git a/test/unit/stats.c b/test/unit/stats.c
index a99a88f0..18856f12 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -120,7 +120,6 @@ TEST_END
 void *
 thd_start(void *arg)
 {
-
 	return (NULL);
 }
 
@@ -350,7 +349,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_stats_summary,
 	    test_stats_large,
diff --git a/test/unit/ticker.c b/test/unit/ticker.c
index e737020a..b8af46c7 100644
--- a/test/unit/ticker.c
+++ b/test/unit/ticker.c
@@ -68,7 +68,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_ticker_tick,
 	    test_ticker_ticks,
diff --git a/test/unit/tsd.c b/test/unit/tsd.c
index d5f96ac3..5313ef88 100644
--- a/test/unit/tsd.c
+++ b/test/unit/tsd.c
@@ -78,7 +78,6 @@ thd_start(void *arg)
 
 TEST_BEGIN(test_tsd_main_thread)
 {
-
 	thd_start((void *)(uintptr_t)0xa5f3e329);
 }
 TEST_END
@@ -98,7 +97,6 @@ TEST_END
 int
 main(void)
 {
-
 	/* Core tsd bootstrapping must happen prior to data_tsd_boot(). */
 	if (nallocx(1, 0) == 0) {
 		malloc_printf("Initialization error");
diff --git a/test/unit/util.c b/test/unit/util.c
index b1f9abd9..b891a199 100644
--- a/test/unit/util.c
+++ b/test/unit/util.c
@@ -33,21 +33,18 @@
 
 TEST_BEGIN(test_pow2_ceil_u64)
 {
-
 	TEST_POW2_CEIL(uint64_t, u64, FMTu64);
 }
 TEST_END
 
 TEST_BEGIN(test_pow2_ceil_u32)
 {
-
 	TEST_POW2_CEIL(uint32_t, u32, FMTu32);
 }
 TEST_END
 
 TEST_BEGIN(test_pow2_ceil_zu)
 {
-
 	TEST_POW2_CEIL(size_t, zu, "zu");
 }
 TEST_END
@@ -307,7 +304,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_pow2_ceil_u64,
 	    test_pow2_ceil_u32,
diff --git a/test/unit/witness.c b/test/unit/witness.c
index 2b012034..13593989 100644
--- a/test/unit/witness.c
+++ b/test/unit/witness.c
@@ -14,35 +14,30 @@ static void
 witness_lock_error_intercept(const witness_list_t *witnesses,
     const witness_t *witness)
 {
-
 	saw_lock_error = true;
 }
 
 static void
 witness_owner_error_intercept(const witness_t *witness)
 {
-
 	saw_owner_error = true;
 }
 
 static void
 witness_not_owner_error_intercept(const witness_t *witness)
 {
-
 	saw_not_owner_error = true;
 }
 
 static void
 witness_lockless_error_intercept(const witness_list_t *witnesses)
 {
-
 	saw_lockless_error = true;
 }
 
 static int
 witness_comp(const witness_t *a, void *oa, const witness_t *b, void *ob)
 {
-
 	assert_u_eq(a->rank, b->rank, "Witnesses should have equal rank");
 
 	assert(oa == (void *)a);
@@ -54,7 +49,6 @@ witness_comp(const witness_t *a, void *oa, const witness_t *b, void *ob)
 static int
 witness_comp_reverse(const witness_t *a, void *oa, const witness_t *b, void *ob)
 {
-
 	assert_u_eq(a->rank, b->rank, "Witnesses should have equal rank");
 
 	assert(oa == (void *)a);
@@ -273,7 +267,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_witness,
 	    test_witness_comp,
diff --git a/test/unit/zero.c b/test/unit/zero.c
index c025c831..c752954c 100644
--- a/test/unit/zero.c
+++ b/test/unit/zero.c
@@ -47,7 +47,6 @@ test_zero(size_t sz_min, size_t sz_max)
 
 TEST_BEGIN(test_zero_small)
 {
-
 	test_skip_if(!config_fill);
 	test_zero(1, SMALL_MAXCLASS-1);
 }
@@ -55,7 +54,6 @@ TEST_END
 
 TEST_BEGIN(test_zero_large)
 {
-
 	test_skip_if(!config_fill);
 	test_zero(SMALL_MAXCLASS+1, (1U << (LG_LARGE_MINCLASS+1)));
 }
@@ -64,7 +62,6 @@ TEST_END
 int
 main(void)
 {
-
 	return (test(
 	    test_zero_small,
 	    test_zero_large));

From e8990dc7c7b3083ce05823e581780c2b22f5cbbb Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 13 Jan 2017 15:22:16 -0800
Subject: [PATCH 0588/2608] Remove redundent stats-merging logic when
 destroying tcache.

The removed stats merging logic is already taken care of by tcache_flush.
---
 src/tcache.c | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index 66e255d6..d1323418 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -357,11 +357,8 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache)
 		tcache_bin_t *tbin = &tcache->tbins[i];
 		tcache_bin_flush_small(tsd, tcache, tbin, i, 0);
 
-		if (config_stats && tbin->tstats.nrequests != 0) {
-			arena_bin_t *bin = &arena->bins[i];
-			malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
-			bin->stats.nrequests += tbin->tstats.nrequests;
-			malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
+		if (config_stats) {
+			assert(tbin->tstats.nrequests == 0);
 		}
 	}
 
@@ -369,12 +366,8 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache)
 		tcache_bin_t *tbin = &tcache->tbins[i];
 		tcache_bin_flush_large(tsd, tbin, i, 0, tcache);
 
-		if (config_stats && tbin->tstats.nrequests != 0) {
-			malloc_mutex_lock(tsd_tsdn(tsd), &arena->lock);
-			arena->stats.nrequests_large += tbin->tstats.nrequests;
-			arena->stats.lstats[i - NBINS].nrequests +=
-			    tbin->tstats.nrequests;
-			malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock);
+		if (config_stats) {
+			assert(tbin->tstats.nrequests == 0);
 		}
 	}
 

From 41aa41853c1101c5e6f1b5759b830dff22e560df Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 13 Jan 2017 18:17:54 -0800
Subject: [PATCH 0589/2608] Fix style nits.

---
 src/jemalloc_cpp.cpp | 37 ++++++++++++++++++-------------------
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index 84d47aed..5cecfdbf 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -17,26 +17,25 @@
 
 void	*operator new(std::size_t size);
 void	*operator new[](std::size_t size);
-void	*operator new(std::size_t size, const std::nothrow_t&) noexcept;
-void	*operator new[](std::size_t size, const std::nothrow_t&) noexcept;
-void	operator delete(void* ptr) noexcept;
-void	operator delete[](void* ptr) noexcept;
-void	operator delete(void* ptr, const std::nothrow_t&) noexcept;
-void	operator delete[](void* ptr, const std::nothrow_t&) noexcept;
+void	*operator new(std::size_t size, const std::nothrow_t &) noexcept;
+void	*operator new[](std::size_t size, const std::nothrow_t &) noexcept;
+void	operator delete(void *ptr) noexcept;
+void	operator delete[](void *ptr) noexcept;
+void	operator delete(void *ptr, const std::nothrow_t &) noexcept;
+void	operator delete[](void *ptr, const std::nothrow_t &) noexcept;
 
 #if __cpp_sized_deallocation >= 201309
 /* C++14's sized-delete operators. */
-void	operator delete(void* ptr, std::size_t size) noexcept;
-void	operator delete[](void* ptr, std::size_t size) noexcept;
+void	operator delete(void *ptr, std::size_t size) noexcept;
+void	operator delete[](void *ptr, std::size_t size) noexcept;
 #endif
 
-
 template <bool IsNoExcept>
 JEMALLOC_INLINE
 void *
 newImpl(std::size_t size) noexcept(IsNoExcept)
 {
-	void* ptr = je_malloc(size);
+	void *ptr = je_malloc(size);
 	if (likely(ptr != nullptr))
 		return (ptr);
 
@@ -55,7 +54,7 @@ newImpl(std::size_t size) noexcept(IsNoExcept)
 
 		try {
 			handler();
-		} catch (const std::bad_alloc&) {
+		} catch (const std::bad_alloc &) {
 			break;
 		}
 
@@ -80,36 +79,36 @@ operator new[](std::size_t size)
 }
 
 void *
-operator new(std::size_t size, const std::nothrow_t&) noexcept
+operator new(std::size_t size, const std::nothrow_t &) noexcept
 {
 	return (newImpl<true>(size));
 }
 
 void *
-operator new[](std::size_t size, const std::nothrow_t&) noexcept
+operator new[](std::size_t size, const std::nothrow_t &) noexcept
 {
 	return (newImpl<true>(size));
 }
 
 void
-operator delete(void* ptr) noexcept
+operator delete(void *ptr) noexcept
 {
 	je_free(ptr);
 }
 
 void
-operator delete[](void* ptr) noexcept
+operator delete[](void *ptr) noexcept
 {
 	je_free(ptr);
 }
 
 void
-operator delete(void* ptr, const std::nothrow_t&) noexcept
+operator delete(void *ptr, const std::nothrow_t &) noexcept
 {
 	je_free(ptr);
 }
 
-void operator delete[](void* ptr, const std::nothrow_t&) noexcept
+void operator delete[](void *ptr, const std::nothrow_t &) noexcept
 {
 	je_free(ptr);
 }
@@ -117,12 +116,12 @@ void operator delete[](void* ptr, const std::nothrow_t&) noexcept
 #if __cpp_sized_deallocation >= 201309
 
 void
-operator delete(void* ptr, std::size_t size) noexcept
+operator delete(void *ptr, std::size_t size) noexcept
 {
 	je_sdallocx(ptr, size, /*flags=*/0);
 }
 
-void operator delete[](void* ptr, std::size_t size) noexcept
+void operator delete[](void *ptr, std::size_t size) noexcept
 {
 	je_sdallocx(ptr, size, /*flags=*/0);
 }

From 8115f05b2675d5449af686ddecc0ae5d5fd23fc2 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 13 Jan 2017 18:43:21 -0800
Subject: [PATCH 0590/2608] Add nullptr support to sized delete operators.

---
 src/jemalloc_cpp.cpp           |  6 ++++++
 test/integration/cpp/basic.cpp | 10 ++++++++++
 2 files changed, 16 insertions(+)

diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index 5cecfdbf..984c944b 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -118,11 +118,17 @@ void operator delete[](void *ptr, const std::nothrow_t &) noexcept
 void
 operator delete(void *ptr, std::size_t size) noexcept
 {
+	if (unlikely(ptr == nullptr)) {
+		return;
+	}
 	je_sdallocx(ptr, size, /*flags=*/0);
 }
 
 void operator delete[](void *ptr, std::size_t size) noexcept
 {
+	if (unlikely(ptr == nullptr)) {
+		return;
+	}
 	je_sdallocx(ptr, size, /*flags=*/0);
 }
 
diff --git a/test/integration/cpp/basic.cpp b/test/integration/cpp/basic.cpp
index 4a87a3ba..b208e1d1 100644
--- a/test/integration/cpp/basic.cpp
+++ b/test/integration/cpp/basic.cpp
@@ -6,6 +6,16 @@ TEST_BEGIN(test_basic)
 	auto foo = new long(4);
 	assert_ptr_not_null(foo, "Unexpected new[] failure");
 	delete foo;
+	// Test nullptr handling.
+	foo = nullptr;
+	delete foo;
+
+	auto bar = new long;
+	assert_ptr_not_null(bar, "Unexpected new failure");
+	delete bar;
+	// Test nullptr handling.
+	bar = nullptr;
+	delete bar;
 }
 TEST_END
 

From de5e1aff2a96afb18383667954740509538daa86 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 17 Jan 2017 07:19:17 -0800
Subject: [PATCH 0591/2608] Formatting/comment fixes.

---
 src/arena.c  | 4 ++--
 src/extent.c | 1 -
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 80af3f99..70d71fcb 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -895,8 +895,8 @@ arena_destroy_retained(tsdn_t *tsdn, arena_t *arena)
 	 * own metadata structures, but if deallocation fails, that is the
 	 * application's decision/problem.  In practice, retained extents are
 	 * leaked here if !config_munmap unless the application provided custom
-	 * extent hooks, so best practice to either enable munmap (and avoid dss
-	 * for arenas to be destroyed), or provide custom extent hooks that
+	 * extent hooks, so best practice is to either enable munmap (and avoid
+	 * dss for arenas to be destroyed), or provide custom extent hooks that
 	 * either unmap retained extents or track them for later use.
 	 */
 	for (i = 0; i < sizeof(arena->extents_retained)/sizeof(extent_heap_t);
diff --git a/src/extent.c b/src/extent.c
index 73f79c1c..27cf97cd 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1011,7 +1011,6 @@ extent_dalloc_default_impl(void *addr, size_t size)
 	return (true);
 }
 
-
 static bool
 extent_dalloc_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
     bool committed, unsigned arena_ind)

From 1ff09534b58957a6f23b1711d986f79f070f2b06 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 16 Jan 2017 11:09:24 -0800
Subject: [PATCH 0592/2608] Fix prof_realloc() regression.

Mostly revert the prof_realloc() changes in
498856f44a30b31fe713a18eb2fc7c6ecf3a9f63 (Move slabs out of chunks.) so
that prof_free_sampled_object() is called when appropriate.  Leave the
prof_tctx_[re]set() optimization in place, but add an assertion to
verify that all eight cases are correctly handled.  Add a comment to
make clear the code ordering, so that the regression originally fixed by
ea8d97b8978a0c0423f0ed64332463a25b787c3d (Fix
prof_{malloc,free}_sample_object() call order in prof_realloc().) is not
repeated.

This resolves #499.
---
 Makefile.in                                   |   1 +
 include/jemalloc/internal/private_symbols.txt |   1 +
 include/jemalloc/internal/prof_externs.h      |   3 +-
 include/jemalloc/internal/prof_inlines.h      |  35 ++--
 src/prof.c                                    | 165 +++++++++++++-----
 test/unit/prof_tctx.c                         |  57 ++++++
 6 files changed, 207 insertions(+), 55 deletions(-)
 create mode 100644 test/unit/prof_tctx.c

diff --git a/Makefile.in b/Makefile.in
index edc50b4b..1be7d191 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -179,6 +179,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/prof_gdump.c \
 	$(srcroot)test/unit/prof_idump.c \
 	$(srcroot)test/unit/prof_reset.c \
+	$(srcroot)test/unit/prof_tctx.c \
 	$(srcroot)test/unit/prof_thread_name.c \
 	$(srcroot)test/unit/ql.c \
 	$(srcroot)test/unit/qr.c \
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index c85219a9..745220e3 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -369,6 +369,7 @@ prof_boot0
 prof_boot1
 prof_boot2
 prof_bt_count
+prof_cnt_all
 prof_dump_header
 prof_dump_open
 prof_free
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 3f857145..76505f82 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -48,11 +48,12 @@ prof_tctx_t	*prof_lookup(tsd_t *tsd, prof_bt_t *bt);
 #ifdef JEMALLOC_JET
 size_t	prof_tdata_count(void);
 size_t	prof_bt_count(void);
-const prof_cnt_t *prof_cnt_all(void);
 typedef int (prof_dump_open_t)(bool, const char *);
 extern prof_dump_open_t *prof_dump_open;
 typedef bool (prof_dump_header_t)(tsdn_t *, bool, const prof_cnt_t *);
 extern prof_dump_header_t *prof_dump_header;
+void	prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes,
+    uint64_t *accumobjs, uint64_t *accumbytes);
 #endif
 void	prof_idump(tsdn_t *tsdn);
 bool	prof_mdump(tsd_t *tsd, const char *filename);
diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h
index a1ea7a32..394b7b37 100644
--- a/include/jemalloc/internal/prof_inlines.h
+++ b/include/jemalloc/internal/prof_inlines.h
@@ -194,30 +194,39 @@ prof_realloc(tsd_t *tsd, extent_t *extent, const void *ptr, size_t usize,
 		}
 	}
 
-	/*
-	 * The following code must differentiate among eight possible cases,
-	 * based on three boolean conditions.
-	 */
 	sampled = ((uintptr_t)tctx > (uintptr_t)1U);
 	old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U);
 	moved = (ptr != old_ptr);
 
-	/*
-	 * The following block must only execute if this is a non-moving
-	 * reallocation, because for moving reallocation the old allocation will
-	 * be deallocated via a separate call.
-	 */
-	if (unlikely(old_sampled) && !moved)
-		prof_free_sampled_object(tsd, old_usize, old_tctx);
-
 	if (unlikely(sampled)) {
 		prof_malloc_sample_object(tsd_tsdn(tsd), extent, ptr, usize,
 		    tctx);
 	} else if (moved) {
 		prof_tctx_set(tsd_tsdn(tsd), extent, ptr, usize,
 		    (prof_tctx_t *)(uintptr_t)1U);
-	} else if (unlikely(old_sampled))
+	} else if (unlikely(old_sampled)) {
+		/*
+		 * prof_tctx_set() would work for the !moved case as well, but
+		 * prof_tctx_reset() is slightly cheaper, and the proper thing
+		 * to do here in the presence of explicit knowledge re: moved
+		 * state.
+		 */
 		prof_tctx_reset(tsd_tsdn(tsd), extent, ptr, tctx);
+	} else {
+		assert((uintptr_t)prof_tctx_get(tsd_tsdn(tsd), extent, ptr) ==
+		    (uintptr_t)1U);
+	}
+
+	/*
+	 * The prof_free_sampled_object() call must come after the
+	 * prof_malloc_sample_object() call, because tctx and old_tctx may be
+	 * the same, in which case reversing the call order could cause the tctx
+	 * to be prematurely destroyed as a side effect of momentarily zeroed
+	 * counters.
+	 */
+	if (unlikely(old_sampled)) {
+		prof_free_sampled_object(tsd, old_usize, old_tctx);
+	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
diff --git a/src/prof.c b/src/prof.c
index 237cbb50..b161acfb 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -1530,95 +1530,178 @@ label_return:
 	return (ret);
 }
 
-static bool
-prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck)
+static void
+prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata,
+    struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
+    struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
+    prof_gctx_tree_t *gctxs)
 {
-	prof_tdata_t *tdata;
-	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
 	size_t tabind;
 	union {
 		prof_gctx_t	*p;
 		void		*v;
 	} gctx;
-	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
-	struct prof_gctx_dump_iter_arg_s prof_gctx_dump_iter_arg;
-	prof_gctx_tree_t gctxs;
 
-	cassert(config_prof);
-
-	tdata = prof_tdata_get(tsd, true);
-	if (tdata == NULL)
-		return (true);
-
-	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
 	prof_enter(tsd, tdata);
 
 	/*
 	 * Put gctx's in limbo and clear their counters in preparation for
 	 * summing.
 	 */
-	gctx_tree_new(&gctxs);
-	for (tabind = 0; !ckh_iter(&bt2gctx, &tabind, NULL, &gctx.v);)
-		prof_dump_gctx_prep(tsd_tsdn(tsd), gctx.p, &gctxs);
+	gctx_tree_new(gctxs);
+	for (tabind = 0; !ckh_iter(&bt2gctx, &tabind, NULL, &gctx.v);) {
+		prof_dump_gctx_prep(tsd_tsdn(tsd), gctx.p, gctxs);
+	}
 
 	/*
 	 * Iterate over tdatas, and for the non-expired ones snapshot their tctx
 	 * stats and merge them into the associated gctx's.
 	 */
-	prof_tdata_merge_iter_arg.tsdn = tsd_tsdn(tsd);
-	memset(&prof_tdata_merge_iter_arg.cnt_all, 0, sizeof(prof_cnt_t));
+	prof_tdata_merge_iter_arg->tsdn = tsd_tsdn(tsd);
+	memset(&prof_tdata_merge_iter_arg->cnt_all, 0, sizeof(prof_cnt_t));
 	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
 	tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter,
-	    (void *)&prof_tdata_merge_iter_arg);
+	    (void *)prof_tdata_merge_iter_arg);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
 
 	/* Merge tctx stats into gctx's. */
-	prof_gctx_merge_iter_arg.tsdn = tsd_tsdn(tsd);
-	prof_gctx_merge_iter_arg.leak_ngctx = 0;
-	gctx_tree_iter(&gctxs, NULL, prof_gctx_merge_iter,
-	    (void *)&prof_gctx_merge_iter_arg);
+	prof_gctx_merge_iter_arg->tsdn = tsd_tsdn(tsd);
+	prof_gctx_merge_iter_arg->leak_ngctx = 0;
+	gctx_tree_iter(gctxs, NULL, prof_gctx_merge_iter,
+	    (void *)prof_gctx_merge_iter_arg);
 
 	prof_leave(tsd, tdata);
+}
 
+static bool
+prof_dump_file(tsd_t *tsd, bool propagate_err, const char *filename,
+    bool leakcheck, prof_tdata_t *tdata,
+    struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
+    struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
+    struct prof_gctx_dump_iter_arg_s *prof_gctx_dump_iter_arg,
+    prof_gctx_tree_t *gctxs)
+{
 	/* Create dump file. */
-	if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1)
-		goto label_open_close_error;
+	if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1) {
+		return true;
+	}
 
 	/* Dump profile header. */
 	if (prof_dump_header(tsd_tsdn(tsd), propagate_err,
-	    &prof_tdata_merge_iter_arg.cnt_all))
+	    &prof_tdata_merge_iter_arg->cnt_all)) {
 		goto label_write_error;
+	}
 
 	/* Dump per gctx profile stats. */
-	prof_gctx_dump_iter_arg.tsdn = tsd_tsdn(tsd);
-	prof_gctx_dump_iter_arg.propagate_err = propagate_err;
-	if (gctx_tree_iter(&gctxs, NULL, prof_gctx_dump_iter,
-	    (void *)&prof_gctx_dump_iter_arg) != NULL)
+	prof_gctx_dump_iter_arg->tsdn = tsd_tsdn(tsd);
+	prof_gctx_dump_iter_arg->propagate_err = propagate_err;
+	if (gctx_tree_iter(gctxs, NULL, prof_gctx_dump_iter,
+	    (void *)prof_gctx_dump_iter_arg) != NULL) {
 		goto label_write_error;
+	}
 
 	/* Dump /proc/<pid>/maps if possible. */
-	if (prof_dump_maps(propagate_err))
+	if (prof_dump_maps(propagate_err)) {
 		goto label_write_error;
+	}
 
-	if (prof_dump_close(propagate_err))
-		goto label_open_close_error;
+	if (prof_dump_close(propagate_err)) {
+		return true;
+	}
 
+	return false;
+label_write_error:
+	prof_dump_close(propagate_err);
+	return true;
+}
+
+static bool
+prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck)
+{
+	prof_tdata_t *tdata;
+	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
+	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
+	struct prof_gctx_dump_iter_arg_s prof_gctx_dump_iter_arg;
+	prof_gctx_tree_t gctxs;
+	bool err;
+
+	cassert(config_prof);
+
+	tdata = prof_tdata_get(tsd, true);
+	if (tdata == NULL) {
+		return true;
+	}
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
+
+	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
+	    &prof_gctx_merge_iter_arg, &gctxs);
+	err = prof_dump_file(tsd, propagate_err, filename, leakcheck, tdata,
+	    &prof_tdata_merge_iter_arg, &prof_gctx_merge_iter_arg,
+	    &prof_gctx_dump_iter_arg, &gctxs);
 	prof_gctx_finish(tsd, &gctxs);
+
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
 
+	if (err) {
+		return true;
+	}
+
 	if (leakcheck) {
 		prof_leakcheck(&prof_tdata_merge_iter_arg.cnt_all,
 		    prof_gctx_merge_iter_arg.leak_ngctx, filename);
 	}
-	return (false);
-label_write_error:
-	prof_dump_close(propagate_err);
-label_open_close_error:
-	prof_gctx_finish(tsd, &gctxs);
-	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
-	return (true);
+	return false;
 }
 
+#ifdef JEMALLOC_JET
+void
+prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
+    uint64_t *accumbytes)
+{
+	tsd_t *tsd;
+	prof_tdata_t *tdata;
+	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
+	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
+	prof_gctx_tree_t gctxs;
+
+	tsd = tsd_fetch();
+	tdata = prof_tdata_get(tsd, false);
+	if (tdata == NULL) {
+		if (curobjs != NULL) {
+			*curobjs = 0;
+		}
+		if (curbytes != NULL) {
+			*curbytes = 0;
+		}
+		if (accumobjs != NULL) {
+			*accumobjs = 0;
+		}
+		if (accumbytes != NULL) {
+			*accumbytes = 0;
+		}
+		return;
+	}
+
+	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
+	    &prof_gctx_merge_iter_arg, &gctxs);
+	prof_gctx_finish(tsd, &gctxs);
+
+	if (curobjs != NULL) {
+		*curobjs = prof_tdata_merge_iter_arg.cnt_all.curobjs;
+	}
+	if (curbytes != NULL) {
+		*curbytes = prof_tdata_merge_iter_arg.cnt_all.curbytes;
+	}
+	if (accumobjs != NULL) {
+		*accumobjs = prof_tdata_merge_iter_arg.cnt_all.accumobjs;
+	}
+	if (accumbytes != NULL) {
+		*accumbytes = prof_tdata_merge_iter_arg.cnt_all.accumbytes;
+	}
+}
+#endif
+
 #define	DUMP_FILENAME_BUFSIZE	(PATH_MAX + 1)
 #define	VSEQ_INVALID		UINT64_C(0xffffffffffffffff)
 static void
diff --git a/test/unit/prof_tctx.c b/test/unit/prof_tctx.c
new file mode 100644
index 00000000..8f928ebf
--- /dev/null
+++ b/test/unit/prof_tctx.c
@@ -0,0 +1,57 @@
+#include "test/jemalloc_test.h"
+
+#ifdef JEMALLOC_PROF
+const char *malloc_conf = "prof:true,lg_prof_sample:0";
+#endif
+
+TEST_BEGIN(test_prof_realloc)
+{
+	tsdn_t *tsdn;
+	int flags;
+	void *p, *q;
+	extent_t *extent_p, *extent_q;
+	prof_tctx_t *tctx_p, *tctx_q;
+	uint64_t curobjs_0, curobjs_1, curobjs_2, curobjs_3;
+
+	test_skip_if(!config_prof);
+
+	tsdn = tsdn_fetch();
+	flags = MALLOCX_TCACHE_NONE;
+
+	prof_cnt_all(&curobjs_0, NULL, NULL, NULL);
+	p = mallocx(1024, flags);
+	assert_ptr_not_null(p, "Unexpected mallocx() failure");
+	extent_p = iealloc(tsdn, p);
+	assert_ptr_not_null(extent_p, "Unexpected iealloc() failure");
+	tctx_p = prof_tctx_get(tsdn, extent_p, p);
+	assert_ptr_ne(tctx_p, (prof_tctx_t *)(uintptr_t)1U,
+	    "Expected valid tctx");
+	prof_cnt_all(&curobjs_1, NULL, NULL, NULL);
+	assert_u64_eq(curobjs_0 + 1, curobjs_1,
+	    "Allocation should have increased sample size");
+
+	q = rallocx(p, 2048, flags);
+	assert_ptr_ne(p, q, "Expected move");
+	assert_ptr_not_null(p, "Unexpected rmallocx() failure");
+	extent_q = iealloc(tsdn, q);
+	assert_ptr_not_null(extent_q, "Unexpected iealloc() failure");
+	tctx_q = prof_tctx_get(tsdn, extent_q, q);
+	assert_ptr_ne(tctx_q, (prof_tctx_t *)(uintptr_t)1U,
+	    "Expected valid tctx");
+	prof_cnt_all(&curobjs_2, NULL, NULL, NULL);
+	assert_u64_eq(curobjs_1, curobjs_2,
+	    "Reallocation should not have changed sample size");
+
+	dallocx(q, flags);
+	prof_cnt_all(&curobjs_3, NULL, NULL, NULL);
+	assert_u64_eq(curobjs_0, curobjs_3,
+	    "Sample size should have returned to base level");
+}
+TEST_END
+
+int
+main(void)
+{
+	return test(
+	    test_prof_realloc);
+}

From c68bb4179312665e22d375aecf9f4306607c7c1a Mon Sep 17 00:00:00 2001
From: Mike Hommey <mh@glandium.org>
Date: Tue, 17 Jan 2017 15:54:36 +0900
Subject: [PATCH 0593/2608] Don't rely on OSX SDK malloc/malloc.h for
 malloc_zone struct definitions

The SDK jemalloc is built against might be not be the latest for various
reasons, but the resulting binary ought to work on newer versions of
OSX.

In order to ensure this, we need the fullest definitions possible, so
copy what we need from the latest version of malloc/malloc.h available
on opensource.apple.com.
---
 configure.ac                                  |  31 -----
 .../jemalloc/internal/jemalloc_internal.h.in  |   1 -
 .../internal/jemalloc_internal_defs.h.in      |   1 -
 src/zone.c                                    | 122 ++++++++++++------
 4 files changed, 86 insertions(+), 69 deletions(-)

diff --git a/configure.ac b/configure.ac
index 9573c302..4996406e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1774,37 +1774,6 @@ if test "x${enable_zone_allocator}" = "x1" ; then
     AC_MSG_ERROR([--enable-zone-allocator is only supported on Darwin])
   fi
   AC_DEFINE([JEMALLOC_ZONE], [ ])
-
-  dnl The szone version jumped from 3 to 6 between the OS X 10.5.x and 10.6
-  dnl releases.  malloc_zone_t and malloc_introspection_t have new fields in
-  dnl 10.6, which is the only source-level indication of the change.
-  AC_MSG_CHECKING([malloc zone version])
-  AC_DEFUN([JE_ZONE_PROGRAM],
-    [AC_LANG_PROGRAM(
-      [#include <malloc/malloc.h>],
-      [static int foo[[sizeof($1) $2 sizeof(void *) * $3 ? 1 : -1]]]
-    )])
-
-  AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_zone_t,==,14)],[JEMALLOC_ZONE_VERSION=3],[
-  AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_zone_t,==,15)],[JEMALLOC_ZONE_VERSION=5],[
-  AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_zone_t,==,16)],[
-    AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_introspection_t,==,9)],[JEMALLOC_ZONE_VERSION=6],[
-    AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_introspection_t,==,13)],[JEMALLOC_ZONE_VERSION=7],[JEMALLOC_ZONE_VERSION=]
-  )])],[
-  AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_zone_t,==,17)],[JEMALLOC_ZONE_VERSION=8],[
-  AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_zone_t,>,17)],[JEMALLOC_ZONE_VERSION=9],[JEMALLOC_ZONE_VERSION=]
-  )])])])])
-  if test "x${JEMALLOC_ZONE_VERSION}" = "x"; then
-    AC_MSG_RESULT([unsupported])
-    AC_MSG_ERROR([Unsupported malloc zone version])
-  fi
-  if test "${JEMALLOC_ZONE_VERSION}" = 9; then
-    JEMALLOC_ZONE_VERSION=8
-    AC_MSG_RESULT([> 8])
-  else
-    AC_MSG_RESULT([$JEMALLOC_ZONE_VERSION])
-  fi
-  AC_DEFINE_UNQUOTED(JEMALLOC_ZONE_VERSION, [$JEMALLOC_ZONE_VERSION])
 fi
 
 dnl ============================================================================
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index e7ace7d8..6213dd82 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -158,7 +158,6 @@ static const bool config_cache_oblivious =
 #include <mach/mach_error.h>
 #include <mach/mach_init.h>
 #include <mach/vm_map.h>
-#include <malloc/malloc.h>
 #endif
 
 #include "jemalloc/internal/ph.h"
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index def4ba55..b7ae3b79 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -239,7 +239,6 @@
  * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
  */
 #undef JEMALLOC_ZONE
-#undef JEMALLOC_ZONE_VERSION
 
 /*
  * Methods for determining whether the OS overcommits.
diff --git a/src/zone.c b/src/zone.c
index 0571920e..d4805c57 100644
--- a/src/zone.c
+++ b/src/zone.c
@@ -3,6 +3,75 @@
 #  error "This source file is for zones on Darwin (OS X)."
 #endif
 
+/* Definitions of the following structs in malloc/malloc.h might be too old
+ * for the built binary to run on newer versions of OSX. So use the newest
+ * possible version of those structs.
+ */
+typedef struct _malloc_zone_t {
+	void *reserved1;
+	void *reserved2;
+	size_t (*size)(struct _malloc_zone_t *, const void *);
+	void *(*malloc)(struct _malloc_zone_t *, size_t);
+	void *(*calloc)(struct _malloc_zone_t *, size_t, size_t);
+	void *(*valloc)(struct _malloc_zone_t *, size_t);
+	void (*free)(struct _malloc_zone_t *, void *);
+	void *(*realloc)(struct _malloc_zone_t *, void *, size_t);
+	void (*destroy)(struct _malloc_zone_t *);
+	const char *zone_name;
+	unsigned (*batch_malloc)(struct _malloc_zone_t *, size_t, void **, unsigned);
+	void (*batch_free)(struct _malloc_zone_t *, void **, unsigned);
+	struct malloc_introspection_t *introspect;
+	unsigned version;
+	void *(*memalign)(struct _malloc_zone_t *, size_t, size_t);
+	void (*free_definite_size)(struct _malloc_zone_t *, void *, size_t);
+	size_t (*pressure_relief)(struct _malloc_zone_t *, size_t);
+} malloc_zone_t;
+
+typedef struct {
+	vm_address_t address;
+	vm_size_t size;
+} vm_range_t;
+
+typedef struct malloc_statistics_t {
+	unsigned blocks_in_use;
+	size_t size_in_use;
+	size_t max_size_in_use;
+	size_t size_allocated;
+} malloc_statistics_t;
+
+typedef kern_return_t memory_reader_t(task_t, vm_address_t, vm_size_t, void **);
+
+typedef void vm_range_recorder_t(task_t, void *, unsigned type, vm_range_t *, unsigned);
+
+typedef struct malloc_introspection_t {
+	kern_return_t (*enumerator)(task_t, void *, unsigned, vm_address_t, memory_reader_t, vm_range_recorder_t);
+	size_t (*good_size)(malloc_zone_t *, size_t);
+	boolean_t (*check)(malloc_zone_t *);
+	void (*print)(malloc_zone_t *, boolean_t);
+	void (*log)(malloc_zone_t *, void *);
+	void (*force_lock)(malloc_zone_t *);
+	void (*force_unlock)(malloc_zone_t *);
+	void (*statistics)(malloc_zone_t *, malloc_statistics_t *);
+	boolean_t (*zone_locked)(malloc_zone_t *);
+	boolean_t (*enable_discharge_checking)(malloc_zone_t *);
+	boolean_t (*disable_discharge_checking)(malloc_zone_t *);
+	void (*discharge)(malloc_zone_t *, void *);
+#ifdef __BLOCKS__
+	void (*enumerate_discharged_pointers)(malloc_zone_t *, void (^)(void *, void *));
+#else
+	void *enumerate_unavailable_without_blocks;
+#endif
+	void (*reinit_lock)(malloc_zone_t *);
+} malloc_introspection_t;
+
+extern kern_return_t malloc_get_all_zones(task_t, memory_reader_t, vm_address_t **, unsigned *);
+
+extern malloc_zone_t *malloc_default_zone(void);
+
+extern void malloc_zone_register(malloc_zone_t *zone);
+
+extern void malloc_zone_unregister(malloc_zone_t *zone);
+
 /*
  * The malloc_default_purgeable_zone() function is only available on >= 10.6.
  * We need to check whether it is present at runtime, thus the weak_import.
@@ -20,21 +89,17 @@ static struct malloc_introspection_t jemalloc_zone_introspect;
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
-static size_t	zone_size(malloc_zone_t *zone, void *ptr);
+static size_t	zone_size(malloc_zone_t *zone, const void *ptr);
 static void	*zone_malloc(malloc_zone_t *zone, size_t size);
 static void	*zone_calloc(malloc_zone_t *zone, size_t num, size_t size);
 static void	*zone_valloc(malloc_zone_t *zone, size_t size);
 static void	zone_free(malloc_zone_t *zone, void *ptr);
 static void	*zone_realloc(malloc_zone_t *zone, void *ptr, size_t size);
-#if (JEMALLOC_ZONE_VERSION >= 5)
 static void	*zone_memalign(malloc_zone_t *zone, size_t alignment,
-#endif
-#if (JEMALLOC_ZONE_VERSION >= 6)
     size_t size);
 static void	zone_free_definite_size(malloc_zone_t *zone, void *ptr,
     size_t size);
-#endif
-static void	*zone_destroy(malloc_zone_t *zone);
+static void	zone_destroy(malloc_zone_t *zone);
 static size_t	zone_good_size(malloc_zone_t *zone, size_t size);
 static void	zone_force_lock(malloc_zone_t *zone);
 static void	zone_force_unlock(malloc_zone_t *zone);
@@ -45,7 +110,7 @@ static void	zone_force_unlock(malloc_zone_t *zone);
  */
 
 static size_t
-zone_size(malloc_zone_t *zone, void *ptr)
+zone_size(malloc_zone_t *zone, const void *ptr)
 {
 
 	/*
@@ -106,7 +171,6 @@ zone_realloc(malloc_zone_t *zone, void *ptr, size_t size)
 	return (realloc(ptr, size));
 }
 
-#if (JEMALLOC_ZONE_VERSION >= 5)
 static void *
 zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size)
 {
@@ -116,9 +180,7 @@ zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size)
 
 	return (ret);
 }
-#endif
 
-#if (JEMALLOC_ZONE_VERSION >= 6)
 static void
 zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size)
 {
@@ -133,15 +195,13 @@ zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size)
 
 	free(ptr);
 }
-#endif
 
-static void *
+static void
 zone_destroy(malloc_zone_t *zone)
 {
 
 	/* This function should never be called. */
 	not_reached();
-	return (NULL);
 }
 
 static size_t
@@ -180,48 +240,38 @@ static void
 zone_init(void)
 {
 
-	jemalloc_zone.size = (void *)zone_size;
-	jemalloc_zone.malloc = (void *)zone_malloc;
-	jemalloc_zone.calloc = (void *)zone_calloc;
-	jemalloc_zone.valloc = (void *)zone_valloc;
-	jemalloc_zone.free = (void *)zone_free;
-	jemalloc_zone.realloc = (void *)zone_realloc;
-	jemalloc_zone.destroy = (void *)zone_destroy;
+	jemalloc_zone.size = zone_size;
+	jemalloc_zone.malloc = zone_malloc;
+	jemalloc_zone.calloc = zone_calloc;
+	jemalloc_zone.valloc = zone_valloc;
+	jemalloc_zone.free = zone_free;
+	jemalloc_zone.realloc = zone_realloc;
+	jemalloc_zone.destroy = zone_destroy;
 	jemalloc_zone.zone_name = "jemalloc_zone";
 	jemalloc_zone.batch_malloc = NULL;
 	jemalloc_zone.batch_free = NULL;
 	jemalloc_zone.introspect = &jemalloc_zone_introspect;
-	jemalloc_zone.version = JEMALLOC_ZONE_VERSION;
-#if (JEMALLOC_ZONE_VERSION >= 5)
+	jemalloc_zone.version = 8;
 	jemalloc_zone.memalign = zone_memalign;
-#endif
-#if (JEMALLOC_ZONE_VERSION >= 6)
 	jemalloc_zone.free_definite_size = zone_free_definite_size;
-#endif
-#if (JEMALLOC_ZONE_VERSION >= 8)
 	jemalloc_zone.pressure_relief = NULL;
-#endif
 
 	jemalloc_zone_introspect.enumerator = NULL;
-	jemalloc_zone_introspect.good_size = (void *)zone_good_size;
+	jemalloc_zone_introspect.good_size = zone_good_size;
 	jemalloc_zone_introspect.check = NULL;
 	jemalloc_zone_introspect.print = NULL;
 	jemalloc_zone_introspect.log = NULL;
-	jemalloc_zone_introspect.force_lock = (void *)zone_force_lock;
-	jemalloc_zone_introspect.force_unlock = (void *)zone_force_unlock;
+	jemalloc_zone_introspect.force_lock = zone_force_lock;
+	jemalloc_zone_introspect.force_unlock = zone_force_unlock;
 	jemalloc_zone_introspect.statistics = NULL;
-#if (JEMALLOC_ZONE_VERSION >= 6)
 	jemalloc_zone_introspect.zone_locked = NULL;
-#endif
-#if (JEMALLOC_ZONE_VERSION >= 7)
 	jemalloc_zone_introspect.enable_discharge_checking = NULL;
 	jemalloc_zone_introspect.disable_discharge_checking = NULL;
 	jemalloc_zone_introspect.discharge = NULL;
-#  ifdef __BLOCKS__
+#ifdef __BLOCKS__
 	jemalloc_zone_introspect.enumerate_discharged_pointers = NULL;
-#  else
+#else
 	jemalloc_zone_introspect.enumerate_unavailable_without_blocks = NULL;
-#  endif
 #endif
 }
 

From c6943acb3c56d1b3d1e82dd43b3fcfeae7771990 Mon Sep 17 00:00:00 2001
From: Mike Hommey <mh@glandium.org>
Date: Tue, 17 Jan 2017 16:20:05 +0900
Subject: [PATCH 0594/2608] Add dummy implementations for most remaining OSX
 zone allocator functions

Some system libraries are using malloc_default_zone() and then using
some of the malloc_zone_* API. Under normal conditions, those functions
check the malloc_zone_t/malloc_introspection_t struct for the values
that are allowed to be NULL, so that a NULL deref doesn't happen.

As of OSX 10.12, malloc_default_zone() doesn't return the actual default
zone anymore, but returns a fake, wrapper zone. The wrapper zone defines
all the possible functions in the malloc_zone_t/malloc_introspection_t
struct (almost), and calls the function from the registered default zone
(jemalloc in our case) on its own. Without checking whether the pointers
are NULL.

This means that a system library that calls e.g.
malloc_zone_batch_malloc(malloc_default_zone(), ...) ends up trying to
call jemalloc_zone.batch_malloc, which is NULL, and crash follows.

So as of OSX 10.12, the default zone is required to have all the
functions available (really, the same as the wrapper zone), even if they
do nothing.

This is arguably a bug in libsystem_malloc in OSX 10.12, but jemalloc
still needs to work in that case.
---
 src/zone.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 108 insertions(+), 10 deletions(-)

diff --git a/src/zone.c b/src/zone.c
index d4805c57..6215133f 100644
--- a/src/zone.c
+++ b/src/zone.c
@@ -100,9 +100,24 @@ static void	*zone_memalign(malloc_zone_t *zone, size_t alignment,
 static void	zone_free_definite_size(malloc_zone_t *zone, void *ptr,
     size_t size);
 static void	zone_destroy(malloc_zone_t *zone);
+static unsigned	zone_batch_malloc(struct _malloc_zone_t *zone, size_t size,
+    void **results, unsigned num_requested);
+static void	zone_batch_free(struct _malloc_zone_t *zone,
+    void **to_be_freed, unsigned num_to_be_freed);
+static size_t	zone_pressure_relief(struct _malloc_zone_t *zone, size_t goal);
 static size_t	zone_good_size(malloc_zone_t *zone, size_t size);
+static kern_return_t	zone_enumerator(task_t task, void *data, unsigned type_mask,
+    vm_address_t zone_address, memory_reader_t reader,
+    vm_range_recorder_t recorder);
+static boolean_t	zone_check(malloc_zone_t *zone);
+static void	zone_print(malloc_zone_t *zone, boolean_t verbose);
+static void	zone_log(malloc_zone_t *zone, void *address);
 static void	zone_force_lock(malloc_zone_t *zone);
 static void	zone_force_unlock(malloc_zone_t *zone);
+static void	zone_statistics(malloc_zone_t *zone,
+    malloc_statistics_t *stats);
+static boolean_t	zone_locked(malloc_zone_t *zone);
+static void	zone_reinit_lock(malloc_zone_t *zone);
 
 /******************************************************************************/
 /*
@@ -204,6 +219,39 @@ zone_destroy(malloc_zone_t *zone)
 	not_reached();
 }
 
+static unsigned
+zone_batch_malloc(struct _malloc_zone_t *zone, size_t size, void **results,
+    unsigned num_requested)
+{
+	unsigned i;
+
+	for (i = 0; i < num_requested; i++) {
+		results[i] = je_malloc(size);
+		if (!results[i])
+			break;
+	}
+
+	return i;
+}
+
+static void
+zone_batch_free(struct _malloc_zone_t *zone, void **to_be_freed,
+    unsigned num_to_be_freed)
+{
+	unsigned i;
+
+	for (i = 0; i < num_to_be_freed; i++) {
+		zone_free(zone, to_be_freed[i]);
+		to_be_freed[i] = NULL;
+	}
+}
+
+static size_t
+zone_pressure_relief(struct _malloc_zone_t *zone, size_t goal)
+{
+	return 0;
+}
+
 static size_t
 zone_good_size(malloc_zone_t *zone, size_t size)
 {
@@ -213,6 +261,30 @@ zone_good_size(malloc_zone_t *zone, size_t size)
 	return (s2u(size));
 }
 
+static kern_return_t
+zone_enumerator(task_t task, void *data, unsigned type_mask,
+    vm_address_t zone_address, memory_reader_t reader,
+    vm_range_recorder_t recorder)
+{
+	return KERN_SUCCESS;
+}
+
+static boolean_t
+zone_check(malloc_zone_t *zone)
+{
+	return true;
+}
+
+static void
+zone_print(malloc_zone_t *zone, boolean_t verbose)
+{
+}
+
+static void
+zone_log(malloc_zone_t *zone, void *address)
+{
+}
+
 static void
 zone_force_lock(malloc_zone_t *zone)
 {
@@ -236,6 +308,31 @@ zone_force_unlock(malloc_zone_t *zone)
 		jemalloc_postfork_child();
 }
 
+static void
+zone_statistics(malloc_zone_t *zone, malloc_statistics_t *stats)
+{
+	/* We make no effort to actually fill the values */
+	stats->blocks_in_use = 0;
+	stats->size_in_use = 0;
+	stats->max_size_in_use = 0;
+	stats->size_allocated = 0;
+}
+
+static boolean_t
+zone_locked(malloc_zone_t *zone)
+{
+	/* Pretend no lock is being held */
+	return false;
+}
+
+static void
+zone_reinit_lock(malloc_zone_t *zone)
+{
+	/* As of OSX 10.12, this function is only used when force_unlock would
+	 * be used if the zone version were < 9. So just use force_unlock. */
+	zone_force_unlock(zone);
+}
+
 static void
 zone_init(void)
 {
@@ -248,23 +345,23 @@ zone_init(void)
 	jemalloc_zone.realloc = zone_realloc;
 	jemalloc_zone.destroy = zone_destroy;
 	jemalloc_zone.zone_name = "jemalloc_zone";
-	jemalloc_zone.batch_malloc = NULL;
-	jemalloc_zone.batch_free = NULL;
+	jemalloc_zone.batch_malloc = zone_batch_malloc;
+	jemalloc_zone.batch_free = zone_batch_free;
 	jemalloc_zone.introspect = &jemalloc_zone_introspect;
-	jemalloc_zone.version = 8;
+	jemalloc_zone.version = 9;
 	jemalloc_zone.memalign = zone_memalign;
 	jemalloc_zone.free_definite_size = zone_free_definite_size;
-	jemalloc_zone.pressure_relief = NULL;
+	jemalloc_zone.pressure_relief = zone_pressure_relief;
 
-	jemalloc_zone_introspect.enumerator = NULL;
+	jemalloc_zone_introspect.enumerator = zone_enumerator;
 	jemalloc_zone_introspect.good_size = zone_good_size;
-	jemalloc_zone_introspect.check = NULL;
-	jemalloc_zone_introspect.print = NULL;
-	jemalloc_zone_introspect.log = NULL;
+	jemalloc_zone_introspect.check = zone_check;
+	jemalloc_zone_introspect.print = zone_print;
+	jemalloc_zone_introspect.log = zone_log;
 	jemalloc_zone_introspect.force_lock = zone_force_lock;
 	jemalloc_zone_introspect.force_unlock = zone_force_unlock;
-	jemalloc_zone_introspect.statistics = NULL;
-	jemalloc_zone_introspect.zone_locked = NULL;
+	jemalloc_zone_introspect.statistics = zone_statistics;
+	jemalloc_zone_introspect.zone_locked = zone_locked;
 	jemalloc_zone_introspect.enable_discharge_checking = NULL;
 	jemalloc_zone_introspect.disable_discharge_checking = NULL;
 	jemalloc_zone_introspect.discharge = NULL;
@@ -273,6 +370,7 @@ zone_init(void)
 #else
 	jemalloc_zone_introspect.enumerate_unavailable_without_blocks = NULL;
 #endif
+	jemalloc_zone_introspect.reinit_lock = zone_reinit_lock;
 }
 
 static malloc_zone_t *

From 0f7376eb6295fcd751956cb3df248e838eea003f Mon Sep 17 00:00:00 2001
From: Mike Hommey <mh@glandium.org>
Date: Tue, 17 Jan 2017 15:54:36 +0900
Subject: [PATCH 0595/2608] Don't rely on OSX SDK malloc/malloc.h for
 malloc_zone struct definitions

The SDK jemalloc is built against might be not be the latest for various
reasons, but the resulting binary ought to work on newer versions of
OSX.

In order to ensure this, we need the fullest definitions possible, so
copy what we need from the latest version of malloc/malloc.h available
on opensource.apple.com.
---
 configure.ac                                  |  31 -----
 .../jemalloc/internal/jemalloc_internal.h.in  |   1 -
 .../internal/jemalloc_internal_defs.h.in      |   1 -
 src/zone.c                                    | 122 ++++++++++++------
 4 files changed, 86 insertions(+), 69 deletions(-)

diff --git a/configure.ac b/configure.ac
index f886aeb5..99a69957 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1869,37 +1869,6 @@ if test "x${enable_zone_allocator}" = "x1" ; then
     AC_MSG_ERROR([--enable-zone-allocator is only supported on Darwin])
   fi
   AC_DEFINE([JEMALLOC_ZONE], [ ])
-
-  dnl The szone version jumped from 3 to 6 between the OS X 10.5.x and 10.6
-  dnl releases.  malloc_zone_t and malloc_introspection_t have new fields in
-  dnl 10.6, which is the only source-level indication of the change.
-  AC_MSG_CHECKING([malloc zone version])
-  AC_DEFUN([JE_ZONE_PROGRAM],
-    [AC_LANG_PROGRAM(
-      [#include <malloc/malloc.h>],
-      [static int foo[[sizeof($1) $2 sizeof(void *) * $3 ? 1 : -1]]]
-    )])
-
-  AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_zone_t,==,14)],[JEMALLOC_ZONE_VERSION=3],[
-  AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_zone_t,==,15)],[JEMALLOC_ZONE_VERSION=5],[
-  AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_zone_t,==,16)],[
-    AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_introspection_t,==,9)],[JEMALLOC_ZONE_VERSION=6],[
-    AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_introspection_t,==,13)],[JEMALLOC_ZONE_VERSION=7],[JEMALLOC_ZONE_VERSION=]
-  )])],[
-  AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_zone_t,==,17)],[JEMALLOC_ZONE_VERSION=8],[
-  AC_COMPILE_IFELSE([JE_ZONE_PROGRAM(malloc_zone_t,>,17)],[JEMALLOC_ZONE_VERSION=9],[JEMALLOC_ZONE_VERSION=]
-  )])])])])
-  if test "x${JEMALLOC_ZONE_VERSION}" = "x"; then
-    AC_MSG_RESULT([unsupported])
-    AC_MSG_ERROR([Unsupported malloc zone version])
-  fi
-  if test "${JEMALLOC_ZONE_VERSION}" = 9; then
-    JEMALLOC_ZONE_VERSION=8
-    AC_MSG_RESULT([> 8])
-  else
-    AC_MSG_RESULT([$JEMALLOC_ZONE_VERSION])
-  fi
-  AC_DEFINE_UNQUOTED(JEMALLOC_ZONE_VERSION, [$JEMALLOC_ZONE_VERSION])
 fi
 
 dnl ============================================================================
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 00dce68d..dc9df35f 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -162,7 +162,6 @@ static const bool have_thp =
 #include <mach/mach_error.h>
 #include <mach/mach_init.h>
 #include <mach/vm_map.h>
-#include <malloc/malloc.h>
 #endif
 
 #include "jemalloc/internal/ph.h"
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 722c41dd..c777ab02 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -243,7 +243,6 @@
  * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
  */
 #undef JEMALLOC_ZONE
-#undef JEMALLOC_ZONE_VERSION
 
 /*
  * Methods for determining whether the OS overcommits.
diff --git a/src/zone.c b/src/zone.c
index 1fcff64f..f4dbb8f9 100644
--- a/src/zone.c
+++ b/src/zone.c
@@ -3,6 +3,75 @@
 #  error "This source file is for zones on Darwin (OS X)."
 #endif
 
+/* Definitions of the following structs in malloc/malloc.h might be too old
+ * for the built binary to run on newer versions of OSX. So use the newest
+ * possible version of those structs.
+ */
+typedef struct _malloc_zone_t {
+	void *reserved1;
+	void *reserved2;
+	size_t (*size)(struct _malloc_zone_t *, const void *);
+	void *(*malloc)(struct _malloc_zone_t *, size_t);
+	void *(*calloc)(struct _malloc_zone_t *, size_t, size_t);
+	void *(*valloc)(struct _malloc_zone_t *, size_t);
+	void (*free)(struct _malloc_zone_t *, void *);
+	void *(*realloc)(struct _malloc_zone_t *, void *, size_t);
+	void (*destroy)(struct _malloc_zone_t *);
+	const char *zone_name;
+	unsigned (*batch_malloc)(struct _malloc_zone_t *, size_t, void **, unsigned);
+	void (*batch_free)(struct _malloc_zone_t *, void **, unsigned);
+	struct malloc_introspection_t *introspect;
+	unsigned version;
+	void *(*memalign)(struct _malloc_zone_t *, size_t, size_t);
+	void (*free_definite_size)(struct _malloc_zone_t *, void *, size_t);
+	size_t (*pressure_relief)(struct _malloc_zone_t *, size_t);
+} malloc_zone_t;
+
+typedef struct {
+	vm_address_t address;
+	vm_size_t size;
+} vm_range_t;
+
+typedef struct malloc_statistics_t {
+	unsigned blocks_in_use;
+	size_t size_in_use;
+	size_t max_size_in_use;
+	size_t size_allocated;
+} malloc_statistics_t;
+
+typedef kern_return_t memory_reader_t(task_t, vm_address_t, vm_size_t, void **);
+
+typedef void vm_range_recorder_t(task_t, void *, unsigned type, vm_range_t *, unsigned);
+
+typedef struct malloc_introspection_t {
+	kern_return_t (*enumerator)(task_t, void *, unsigned, vm_address_t, memory_reader_t, vm_range_recorder_t);
+	size_t (*good_size)(malloc_zone_t *, size_t);
+	boolean_t (*check)(malloc_zone_t *);
+	void (*print)(malloc_zone_t *, boolean_t);
+	void (*log)(malloc_zone_t *, void *);
+	void (*force_lock)(malloc_zone_t *);
+	void (*force_unlock)(malloc_zone_t *);
+	void (*statistics)(malloc_zone_t *, malloc_statistics_t *);
+	boolean_t (*zone_locked)(malloc_zone_t *);
+	boolean_t (*enable_discharge_checking)(malloc_zone_t *);
+	boolean_t (*disable_discharge_checking)(malloc_zone_t *);
+	void (*discharge)(malloc_zone_t *, void *);
+#ifdef __BLOCKS__
+	void (*enumerate_discharged_pointers)(malloc_zone_t *, void (^)(void *, void *));
+#else
+	void *enumerate_unavailable_without_blocks;
+#endif
+	void (*reinit_lock)(malloc_zone_t *);
+} malloc_introspection_t;
+
+extern kern_return_t malloc_get_all_zones(task_t, memory_reader_t, vm_address_t **, unsigned *);
+
+extern malloc_zone_t *malloc_default_zone(void);
+
+extern void malloc_zone_register(malloc_zone_t *zone);
+
+extern void malloc_zone_unregister(malloc_zone_t *zone);
+
 /*
  * The malloc_default_purgeable_zone() function is only available on >= 10.6.
  * We need to check whether it is present at runtime, thus the weak_import.
@@ -20,21 +89,17 @@ static struct malloc_introspection_t jemalloc_zone_introspect;
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
-static size_t	zone_size(malloc_zone_t *zone, void *ptr);
+static size_t	zone_size(malloc_zone_t *zone, const void *ptr);
 static void	*zone_malloc(malloc_zone_t *zone, size_t size);
 static void	*zone_calloc(malloc_zone_t *zone, size_t num, size_t size);
 static void	*zone_valloc(malloc_zone_t *zone, size_t size);
 static void	zone_free(malloc_zone_t *zone, void *ptr);
 static void	*zone_realloc(malloc_zone_t *zone, void *ptr, size_t size);
-#if (JEMALLOC_ZONE_VERSION >= 5)
 static void	*zone_memalign(malloc_zone_t *zone, size_t alignment,
-#endif
-#if (JEMALLOC_ZONE_VERSION >= 6)
     size_t size);
 static void	zone_free_definite_size(malloc_zone_t *zone, void *ptr,
     size_t size);
-#endif
-static void	*zone_destroy(malloc_zone_t *zone);
+static void	zone_destroy(malloc_zone_t *zone);
 static size_t	zone_good_size(malloc_zone_t *zone, size_t size);
 static void	zone_force_lock(malloc_zone_t *zone);
 static void	zone_force_unlock(malloc_zone_t *zone);
@@ -45,7 +110,7 @@ static void	zone_force_unlock(malloc_zone_t *zone);
  */
 
 static size_t
-zone_size(malloc_zone_t *zone, void *ptr)
+zone_size(malloc_zone_t *zone, const void *ptr)
 {
 	/*
 	 * There appear to be places within Darwin (such as setenv(3)) that
@@ -101,7 +166,6 @@ zone_realloc(malloc_zone_t *zone, void *ptr, size_t size)
 	return (realloc(ptr, size));
 }
 
-#if (JEMALLOC_ZONE_VERSION >= 5)
 static void *
 zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size)
 {
@@ -111,9 +175,7 @@ zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size)
 
 	return (ret);
 }
-#endif
 
-#if (JEMALLOC_ZONE_VERSION >= 6)
 static void
 zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size)
 {
@@ -128,14 +190,12 @@ zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size)
 
 	free(ptr);
 }
-#endif
 
-static void *
+static void
 zone_destroy(malloc_zone_t *zone)
 {
 	/* This function should never be called. */
 	not_reached();
-	return (NULL);
 }
 
 static size_t
@@ -170,48 +230,38 @@ zone_force_unlock(malloc_zone_t *zone)
 static void
 zone_init(void)
 {
-	jemalloc_zone.size = (void *)zone_size;
-	jemalloc_zone.malloc = (void *)zone_malloc;
-	jemalloc_zone.calloc = (void *)zone_calloc;
-	jemalloc_zone.valloc = (void *)zone_valloc;
-	jemalloc_zone.free = (void *)zone_free;
-	jemalloc_zone.realloc = (void *)zone_realloc;
-	jemalloc_zone.destroy = (void *)zone_destroy;
+	jemalloc_zone.size = zone_size;
+	jemalloc_zone.malloc = zone_malloc;
+	jemalloc_zone.calloc = zone_calloc;
+	jemalloc_zone.valloc = zone_valloc;
+	jemalloc_zone.free = zone_free;
+	jemalloc_zone.realloc = zone_realloc;
+	jemalloc_zone.destroy = zone_destroy;
 	jemalloc_zone.zone_name = "jemalloc_zone";
 	jemalloc_zone.batch_malloc = NULL;
 	jemalloc_zone.batch_free = NULL;
 	jemalloc_zone.introspect = &jemalloc_zone_introspect;
-	jemalloc_zone.version = JEMALLOC_ZONE_VERSION;
-#if (JEMALLOC_ZONE_VERSION >= 5)
+	jemalloc_zone.version = 8;
 	jemalloc_zone.memalign = zone_memalign;
-#endif
-#if (JEMALLOC_ZONE_VERSION >= 6)
 	jemalloc_zone.free_definite_size = zone_free_definite_size;
-#endif
-#if (JEMALLOC_ZONE_VERSION >= 8)
 	jemalloc_zone.pressure_relief = NULL;
-#endif
 
 	jemalloc_zone_introspect.enumerator = NULL;
-	jemalloc_zone_introspect.good_size = (void *)zone_good_size;
+	jemalloc_zone_introspect.good_size = zone_good_size;
 	jemalloc_zone_introspect.check = NULL;
 	jemalloc_zone_introspect.print = NULL;
 	jemalloc_zone_introspect.log = NULL;
-	jemalloc_zone_introspect.force_lock = (void *)zone_force_lock;
-	jemalloc_zone_introspect.force_unlock = (void *)zone_force_unlock;
+	jemalloc_zone_introspect.force_lock = zone_force_lock;
+	jemalloc_zone_introspect.force_unlock = zone_force_unlock;
 	jemalloc_zone_introspect.statistics = NULL;
-#if (JEMALLOC_ZONE_VERSION >= 6)
 	jemalloc_zone_introspect.zone_locked = NULL;
-#endif
-#if (JEMALLOC_ZONE_VERSION >= 7)
 	jemalloc_zone_introspect.enable_discharge_checking = NULL;
 	jemalloc_zone_introspect.disable_discharge_checking = NULL;
 	jemalloc_zone_introspect.discharge = NULL;
-#  ifdef __BLOCKS__
+#ifdef __BLOCKS__
 	jemalloc_zone_introspect.enumerate_discharged_pointers = NULL;
-#  else
+#else
 	jemalloc_zone_introspect.enumerate_unavailable_without_blocks = NULL;
-#  endif
 #endif
 }
 

From 12ab4383e9ea743e8e6b9115be73f2c6dfde5e24 Mon Sep 17 00:00:00 2001
From: Mike Hommey <mh@glandium.org>
Date: Tue, 17 Jan 2017 16:20:05 +0900
Subject: [PATCH 0596/2608] Add dummy implementations for most remaining OSX
 zone allocator functions

Some system libraries are using malloc_default_zone() and then using
some of the malloc_zone_* API. Under normal conditions, those functions
check the malloc_zone_t/malloc_introspection_t struct for the values
that are allowed to be NULL, so that a NULL deref doesn't happen.

As of OSX 10.12, malloc_default_zone() doesn't return the actual default
zone anymore, but returns a fake, wrapper zone. The wrapper zone defines
all the possible functions in the malloc_zone_t/malloc_introspection_t
struct (almost), and calls the function from the registered default zone
(jemalloc in our case) on its own. Without checking whether the pointers
are NULL.

This means that a system library that calls e.g.
malloc_zone_batch_malloc(malloc_default_zone(), ...) ends up trying to
call jemalloc_zone.batch_malloc, which is NULL, and crash follows.

So as of OSX 10.12, the default zone is required to have all the
functions available (really, the same as the wrapper zone), even if they
do nothing.

This is arguably a bug in libsystem_malloc in OSX 10.12, but jemalloc
still needs to work in that case.
---
 src/zone.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 108 insertions(+), 10 deletions(-)

diff --git a/src/zone.c b/src/zone.c
index f4dbb8f9..c54f4a4f 100644
--- a/src/zone.c
+++ b/src/zone.c
@@ -100,9 +100,24 @@ static void	*zone_memalign(malloc_zone_t *zone, size_t alignment,
 static void	zone_free_definite_size(malloc_zone_t *zone, void *ptr,
     size_t size);
 static void	zone_destroy(malloc_zone_t *zone);
+static unsigned	zone_batch_malloc(struct _malloc_zone_t *zone, size_t size,
+    void **results, unsigned num_requested);
+static void	zone_batch_free(struct _malloc_zone_t *zone,
+    void **to_be_freed, unsigned num_to_be_freed);
+static size_t	zone_pressure_relief(struct _malloc_zone_t *zone, size_t goal);
 static size_t	zone_good_size(malloc_zone_t *zone, size_t size);
+static kern_return_t	zone_enumerator(task_t task, void *data, unsigned type_mask,
+    vm_address_t zone_address, memory_reader_t reader,
+    vm_range_recorder_t recorder);
+static boolean_t	zone_check(malloc_zone_t *zone);
+static void	zone_print(malloc_zone_t *zone, boolean_t verbose);
+static void	zone_log(malloc_zone_t *zone, void *address);
 static void	zone_force_lock(malloc_zone_t *zone);
 static void	zone_force_unlock(malloc_zone_t *zone);
+static void	zone_statistics(malloc_zone_t *zone,
+    malloc_statistics_t *stats);
+static boolean_t	zone_locked(malloc_zone_t *zone);
+static void	zone_reinit_lock(malloc_zone_t *zone);
 
 /******************************************************************************/
 /*
@@ -198,6 +213,39 @@ zone_destroy(malloc_zone_t *zone)
 	not_reached();
 }
 
+static unsigned
+zone_batch_malloc(struct _malloc_zone_t *zone, size_t size, void **results,
+    unsigned num_requested)
+{
+	unsigned i;
+
+	for (i = 0; i < num_requested; i++) {
+		results[i] = je_malloc(size);
+		if (!results[i])
+			break;
+	}
+
+	return i;
+}
+
+static void
+zone_batch_free(struct _malloc_zone_t *zone, void **to_be_freed,
+    unsigned num_to_be_freed)
+{
+	unsigned i;
+
+	for (i = 0; i < num_to_be_freed; i++) {
+		zone_free(zone, to_be_freed[i]);
+		to_be_freed[i] = NULL;
+	}
+}
+
+static size_t
+zone_pressure_relief(struct _malloc_zone_t *zone, size_t goal)
+{
+	return 0;
+}
+
 static size_t
 zone_good_size(malloc_zone_t *zone, size_t size)
 {
@@ -206,6 +254,30 @@ zone_good_size(malloc_zone_t *zone, size_t size)
 	return (s2u(size));
 }
 
+static kern_return_t
+zone_enumerator(task_t task, void *data, unsigned type_mask,
+    vm_address_t zone_address, memory_reader_t reader,
+    vm_range_recorder_t recorder)
+{
+	return KERN_SUCCESS;
+}
+
+static boolean_t
+zone_check(malloc_zone_t *zone)
+{
+	return true;
+}
+
+static void
+zone_print(malloc_zone_t *zone, boolean_t verbose)
+{
+}
+
+static void
+zone_log(malloc_zone_t *zone, void *address)
+{
+}
+
 static void
 zone_force_lock(malloc_zone_t *zone)
 {
@@ -227,6 +299,31 @@ zone_force_unlock(malloc_zone_t *zone)
 		jemalloc_postfork_child();
 }
 
+static void
+zone_statistics(malloc_zone_t *zone, malloc_statistics_t *stats)
+{
+	/* We make no effort to actually fill the values */
+	stats->blocks_in_use = 0;
+	stats->size_in_use = 0;
+	stats->max_size_in_use = 0;
+	stats->size_allocated = 0;
+}
+
+static boolean_t
+zone_locked(malloc_zone_t *zone)
+{
+	/* Pretend no lock is being held */
+	return false;
+}
+
+static void
+zone_reinit_lock(malloc_zone_t *zone)
+{
+	/* As of OSX 10.12, this function is only used when force_unlock would
+	 * be used if the zone version were < 9. So just use force_unlock. */
+	zone_force_unlock(zone);
+}
+
 static void
 zone_init(void)
 {
@@ -238,23 +335,23 @@ zone_init(void)
 	jemalloc_zone.realloc = zone_realloc;
 	jemalloc_zone.destroy = zone_destroy;
 	jemalloc_zone.zone_name = "jemalloc_zone";
-	jemalloc_zone.batch_malloc = NULL;
-	jemalloc_zone.batch_free = NULL;
+	jemalloc_zone.batch_malloc = zone_batch_malloc;
+	jemalloc_zone.batch_free = zone_batch_free;
 	jemalloc_zone.introspect = &jemalloc_zone_introspect;
-	jemalloc_zone.version = 8;
+	jemalloc_zone.version = 9;
 	jemalloc_zone.memalign = zone_memalign;
 	jemalloc_zone.free_definite_size = zone_free_definite_size;
-	jemalloc_zone.pressure_relief = NULL;
+	jemalloc_zone.pressure_relief = zone_pressure_relief;
 
-	jemalloc_zone_introspect.enumerator = NULL;
+	jemalloc_zone_introspect.enumerator = zone_enumerator;
 	jemalloc_zone_introspect.good_size = zone_good_size;
-	jemalloc_zone_introspect.check = NULL;
-	jemalloc_zone_introspect.print = NULL;
-	jemalloc_zone_introspect.log = NULL;
+	jemalloc_zone_introspect.check = zone_check;
+	jemalloc_zone_introspect.print = zone_print;
+	jemalloc_zone_introspect.log = zone_log;
 	jemalloc_zone_introspect.force_lock = zone_force_lock;
 	jemalloc_zone_introspect.force_unlock = zone_force_unlock;
-	jemalloc_zone_introspect.statistics = NULL;
-	jemalloc_zone_introspect.zone_locked = NULL;
+	jemalloc_zone_introspect.statistics = zone_statistics;
+	jemalloc_zone_introspect.zone_locked = zone_locked;
 	jemalloc_zone_introspect.enable_discharge_checking = NULL;
 	jemalloc_zone_introspect.disable_discharge_checking = NULL;
 	jemalloc_zone_introspect.discharge = NULL;
@@ -263,6 +360,7 @@ zone_init(void)
 #else
 	jemalloc_zone_introspect.enumerate_unavailable_without_blocks = NULL;
 #endif
+	jemalloc_zone_introspect.reinit_lock = zone_reinit_lock;
 }
 
 static malloc_zone_t *

From 58424e679d7c1095c0ac2f148ee558d6a067f577 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 22 Apr 2016 18:37:44 -0700
Subject: [PATCH 0597/2608] Added stats about number of bytes cached in tcache
 currently.

---
 include/jemalloc/internal/stats_structs.h |  3 +++
 src/arena.c                               | 15 +++++++++++++++
 src/ctl.c                                 |  9 +++++++++
 src/stats.c                               | 13 +++++++++++++
 4 files changed, 40 insertions(+)

diff --git a/include/jemalloc/internal/stats_structs.h b/include/jemalloc/internal/stats_structs.h
index aaa0bf4f..32ef6118 100644
--- a/include/jemalloc/internal/stats_structs.h
+++ b/include/jemalloc/internal/stats_structs.h
@@ -100,6 +100,9 @@ struct arena_stats_s {
 	uint64_t	ndalloc_large;
 	uint64_t	nrequests_large;
 
+	/* Number of bytes cached in tcache associated with this arena. */
+	size_t		tcache_bytes;
+
 	/* One element for each large size class. */
 	malloc_large_stats_t	lstats[NSIZES - NBINS];
 };
diff --git a/src/arena.c b/src/arena.c
index 70d71fcb..7362c4e6 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1625,6 +1625,21 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		lstats[i].nrequests += arena->stats.lstats[i].nrequests;
 		lstats[i].curlextents += arena->stats.lstats[i].curlextents;
 	}
+
+	if (config_tcache) {
+		tcache_bin_t *tbin;
+		tcache_t *tcache;
+
+		/* tcache_bytes counts currently cached bytes. */
+		astats->tcache_bytes = 0;
+		ql_foreach(tcache, &arena->tcache_ql, link) {
+			for (i = 0; i < nhbins; i++) {
+				tbin = &tcache->tbins[i];
+				astats->tcache_bytes += tbin->ncached *
+				    index2size(i);
+			}
+		}
+	}
 	malloc_mutex_unlock(tsdn, &arena->lock);
 
 	for (i = 0; i < NBINS; i++) {
diff --git a/src/ctl.c b/src/ctl.c
index 8484ba85..9c582d65 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -161,6 +161,7 @@ CTL_PROTO(stats_arenas_i_nmadvise)
 CTL_PROTO(stats_arenas_i_purged)
 CTL_PROTO(stats_arenas_i_base)
 CTL_PROTO(stats_arenas_i_internal)
+CTL_PROTO(stats_arenas_i_tcache_bytes)
 CTL_PROTO(stats_arenas_i_resident)
 INDEX_PROTO(stats_arenas_i)
 CTL_PROTO(stats_allocated)
@@ -382,6 +383,7 @@ static const ctl_named_node_t stats_arenas_i_node[] = {
 	{NAME("purged"),	CTL(stats_arenas_i_purged)},
 	{NAME("base"),		CTL(stats_arenas_i_base)},
 	{NAME("internal"),	CTL(stats_arenas_i_internal)},
+	{NAME("tcache_bytes"),	CTL(stats_arenas_i_tcache_bytes)},
 	{NAME("resident"),	CTL(stats_arenas_i_resident)},
 	{NAME("small"),		CHILD(named, stats_arenas_i_small)},
 	{NAME("large"),		CHILD(named, stats_arenas_i_large)},
@@ -601,6 +603,11 @@ ctl_arena_stats_sdmerge(ctl_arena_stats_t *sdstats, ctl_arena_stats_t *astats,
 		sdstats->astats.nrequests_large +=
 		    astats->astats.nrequests_large;
 
+		if (config_tcache) {
+			sdstats->astats.tcache_bytes +=
+			    astats->astats.tcache_bytes;
+		}
+
 		for (i = 0; i < NBINS; i++) {
 			sdstats->bstats[i].nmalloc += astats->bstats[i].nmalloc;
 			sdstats->bstats[i].ndalloc += astats->bstats[i].ndalloc;
@@ -2105,6 +2112,8 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_base,
     stats_arenas_i(mib[2])->astats.base, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_internal,
     stats_arenas_i(mib[2])->astats.internal, size_t)
+CTL_RO_CGEN(config_stats && config_tcache, stats_arenas_i_tcache_bytes,
+    stats_arenas_i(mib[2])->astats.tcache_bytes, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_resident,
     stats_arenas_i(mib[2])->astats.resident, size_t)
 
diff --git a/src/stats.c b/src/stats.c
index ef349a50..f20fd4ce 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -261,6 +261,7 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	uint64_t small_nmalloc, small_ndalloc, small_nrequests;
 	size_t large_allocated;
 	uint64_t large_nmalloc, large_ndalloc, large_nrequests;
+	size_t tcache_bytes;
 
 	CTL_GET("arenas.page", &page, size_t);
 
@@ -423,6 +424,18 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    "internal:                %12zu\n", internal);
 	}
 
+	if (config_tcache) {
+		CTL_M2_GET("stats.arenas.0.tcache_bytes", i, &tcache_bytes,
+		    size_t);
+		if (json) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t\"tcache\": %zu,\n", tcache_bytes);
+		} else {
+			malloc_cprintf(write_cb, cbopaque,
+			    "tcache:                  %12zu\n", tcache_bytes);
+		}
+	}
+
 	CTL_M2_GET("stats.arenas.0.resident", i, &resident, size_t);
 	if (json) {
 		malloc_cprintf(write_cb, cbopaque,

From 7a61ebe71f7cadd54c68ea5fb0b33f6aee290aef Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 19 Jan 2017 09:11:46 -0800
Subject: [PATCH 0598/2608] Remove -Werror=declaration-after-statement.

This partially resolves #536.
---
 configure.ac | 1 -
 1 file changed, 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 99a69957..4a1168b0 100644
--- a/configure.ac
+++ b/configure.ac
@@ -239,7 +239,6 @@ if test "x$GCC" = "xyes" ; then
     fi
   fi
   JE_CFLAGS_ADD([-Wall])
-  JE_CFLAGS_ADD([-Werror=declaration-after-statement])
   JE_CFLAGS_ADD([-Wshorten-64-to-32])
   JE_CFLAGS_ADD([-Wsign-compare])
   JE_CFLAGS_ADD([-pipe])

From 66bf773ef2980b0baf6d46a4b65ccedd9f1e1931 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 18 Jan 2017 01:01:19 -0800
Subject: [PATCH 0599/2608] Test JSON output of malloc_stats_print() and fix
 bugs.

Implement and test a JSON validation parser.  Use the parser to validate
JSON output from malloc_stats_print(), with a significant subset of
supported output options.

This resolves #551.
---
 Makefile.in             |    1 +
 src/stats.c             |   65 +--
 test/unit/stats_print.c | 1006 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 1044 insertions(+), 28 deletions(-)
 create mode 100644 test/unit/stats_print.c

diff --git a/Makefile.in b/Makefile.in
index 1be7d191..acd31f73 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -190,6 +190,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/slab.c \
 	$(srcroot)test/unit/smoothstep.c \
 	$(srcroot)test/unit/stats.c \
+	$(srcroot)test/unit/stats_print.c \
 	$(srcroot)test/unit/ticker.c \
 	$(srcroot)test/unit/nstime.c \
 	$(srcroot)test/unit/tsd.c \
diff --git a/src/stats.c b/src/stats.c
index f20fd4ce..020d56bd 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -37,7 +37,7 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
     bool json, bool large, unsigned i)
 {
 	size_t page;
-	bool config_tcache, in_gap, in_gap_prev;
+	bool in_gap, in_gap_prev;
 	unsigned nbins, j;
 
 	CTL_GET("arenas.page", &page, size_t);
@@ -47,7 +47,6 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		malloc_cprintf(write_cb, cbopaque,
 		    "\t\t\t\t\"bins\": [\n");
 	} else {
-		CTL_GET("config.tcache", &config_tcache, bool);
 		if (config_tcache) {
 			malloc_cprintf(write_cb, cbopaque,
 			    "bins:           size ind    allocated      nmalloc"
@@ -700,9 +699,11 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		malloc_cprintf(write_cb, cbopaque,
 		    "\t\t\t\"nbins\": %u,\n", nbins);
 
-		CTL_GET("arenas.nhbins", &uv, unsigned);
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\"nhbins\": %u,\n", uv);
+		if (config_tcache) {
+			CTL_GET("arenas.nhbins", &uv, unsigned);
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\"nhbins\": %u,\n", uv);
+		}
 
 		malloc_cprintf(write_cb, cbopaque,
 		    "\t\t\t\"bin\": [\n");
@@ -867,8 +868,10 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 				    MALLCTL_ARENAS_ALL, bins, large);
 				if (json) {
 					malloc_cprintf(write_cb, cbopaque,
-					    "\t\t\t}%s\n", (ninitialized > 1) ?
-					    "," : "");
+					    "\t\t\t}%s\n",
+					    ((destroyed_initialized &&
+					    destroyed) || unmerged) ?  "," :
+					    "");
 				}
 			}
 
@@ -886,31 +889,37 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 				    MALLCTL_ARENAS_DESTROYED, bins, large);
 				if (json) {
 					malloc_cprintf(write_cb, cbopaque,
-					    "\t\t\t}%s\n", (ninitialized > 1) ?
-					    "," : "");
+					    "\t\t\t}%s\n", unmerged ?  "," :
+					    "");
 				}
 			}
 
 			/* Unmerged stats. */
-			for (i = j = 0; i < narenas; i++) {
-				if (initialized[i]) {
-					if (json) {
-						j++;
-						malloc_cprintf(write_cb,
-						    cbopaque,
-						    "\t\t\t\"%u\": {\n", i);
-					} else {
-						malloc_cprintf(write_cb,
-						    cbopaque, "\narenas[%u]:\n",
-						    i);
-					}
-					stats_arena_print(write_cb, cbopaque,
-					    json, i, bins, large);
-					if (json) {
-						malloc_cprintf(write_cb,
-						    cbopaque,
-						    "\t\t\t}%s\n", (j <
-						    ninitialized) ? "," : "");
+			if (unmerged) {
+				for (i = j = 0; i < narenas; i++) {
+					if (initialized[i]) {
+						if (json) {
+							j++;
+							malloc_cprintf(write_cb,
+							    cbopaque,
+							    "\t\t\t\"%u\": {\n",
+							    i);
+						} else {
+							malloc_cprintf(write_cb,
+							    cbopaque,
+							    "\narenas[%u]:\n",
+							    i);
+						}
+						stats_arena_print(write_cb,
+						    cbopaque, json, i, bins,
+						    large);
+						if (json) {
+							malloc_cprintf(write_cb,
+							    cbopaque,
+							    "\t\t\t}%s\n", (j <
+							    ninitialized) ? ","
+							    : "");
+						}
 					}
 				}
 			}
diff --git a/test/unit/stats_print.c b/test/unit/stats_print.c
new file mode 100644
index 00000000..5accd8e2
--- /dev/null
+++ b/test/unit/stats_print.c
@@ -0,0 +1,1006 @@
+#include "test/jemalloc_test.h"
+
+typedef enum {
+	TOKEN_TYPE_NONE,
+	TOKEN_TYPE_ERROR,
+	TOKEN_TYPE_EOI,
+	TOKEN_TYPE_NULL,
+	TOKEN_TYPE_FALSE,
+	TOKEN_TYPE_TRUE,
+	TOKEN_TYPE_LBRACKET,
+	TOKEN_TYPE_RBRACKET,
+	TOKEN_TYPE_LBRACE,
+	TOKEN_TYPE_RBRACE,
+	TOKEN_TYPE_COLON,
+	TOKEN_TYPE_COMMA,
+	TOKEN_TYPE_STRING,
+	TOKEN_TYPE_NUMBER
+} token_type_t;
+
+typedef struct parser_s parser_t;
+typedef struct {
+	parser_t	*parser;
+	token_type_t	token_type;
+	size_t		pos;
+	size_t		len;
+	size_t		line;
+	size_t		col;
+} token_t;
+
+struct parser_s {
+	bool verbose;
+	char	*buf; /* '\0'-terminated. */
+	size_t	len; /* Number of characters preceding '\0' in buf. */
+	size_t	pos;
+	size_t	line;
+	size_t	col;
+	token_t	token;
+};
+
+static void
+token_init(token_t *token, parser_t *parser, token_type_t token_type,
+    size_t pos, size_t len, size_t line, size_t col)
+{
+	token->parser = parser;
+	token->token_type = token_type;
+	token->pos = pos;
+	token->len = len;
+	token->line = line;
+	token->col = col;
+}
+
+static void
+token_error(token_t *token)
+{
+	if (!token->parser->verbose) {
+		return;
+	}
+	switch (token->token_type) {
+	case TOKEN_TYPE_NONE:
+		not_reached();
+	case TOKEN_TYPE_ERROR:
+		malloc_printf("%zu:%zu: Unexpected character in token: ",
+		    token->line, token->col);
+		break;
+	default:
+		malloc_printf("%zu:%zu: Unexpected token: ", token->line,
+		    token->col);
+		break;
+	}
+	write(STDERR_FILENO, &token->parser->buf[token->pos], token->len);
+	malloc_printf("\n");
+}
+
+static void
+parser_init(parser_t *parser, bool verbose)
+{
+	parser->verbose = verbose;
+	parser->buf = NULL;
+	parser->len = 0;
+	parser->pos = 0;
+	parser->line = 1;
+	parser->col = 0;
+}
+
+static void
+parser_fini(parser_t *parser)
+{
+	if (parser->buf != NULL) {
+		dallocx(parser->buf, MALLOCX_TCACHE_NONE);
+	}
+}
+
+static bool
+parser_append(parser_t *parser, const char *str)
+{
+	size_t len = strlen(str);
+	char *buf = (parser->buf == NULL) ? mallocx(len + 1,
+	    MALLOCX_TCACHE_NONE) : rallocx(parser->buf, parser->len + len + 1,
+	    MALLOCX_TCACHE_NONE);
+	if (buf == NULL) {
+		return true;
+	}
+	memcpy(&buf[parser->len], str, len + 1);
+	parser->buf = buf;
+	parser->len += len;
+	return false;
+}
+
+static bool
+parser_tokenize(parser_t *parser)
+{
+	enum {
+		STATE_START,
+		STATE_EOI,
+		STATE_N, STATE_NU, STATE_NUL, STATE_NULL,
+		STATE_F, STATE_FA, STATE_FAL, STATE_FALS, STATE_FALSE,
+		STATE_T, STATE_TR, STATE_TRU, STATE_TRUE,
+		STATE_LBRACKET,
+		STATE_RBRACKET,
+		STATE_LBRACE,
+		STATE_RBRACE,
+		STATE_COLON,
+		STATE_COMMA,
+		STATE_CHARS,
+		STATE_CHAR_ESCAPE,
+		STATE_CHAR_U, STATE_CHAR_UD, STATE_CHAR_UDD, STATE_CHAR_UDDD,
+		STATE_STRING,
+		STATE_MINUS,
+		STATE_LEADING_ZERO,
+		STATE_DIGITS,
+		STATE_DECIMAL,
+		STATE_FRAC_DIGITS,
+		STATE_EXP,
+		STATE_EXP_SIGN,
+		STATE_EXP_DIGITS,
+		STATE_ACCEPT
+	} state = STATE_START;
+	size_t token_pos, token_line, token_col;
+
+	assert_zu_le(parser->pos, parser->len,
+	    "Position is past end of buffer");
+
+	while (state != STATE_ACCEPT) {
+		char c = parser->buf[parser->pos];
+
+		switch (state) {
+		case STATE_START:
+			token_pos = parser->pos;
+			token_line = parser->line;
+			token_col = parser->col;
+			switch (c) {
+			case ' ': case '\b': case '\n': case '\r': case '\t':
+				break;
+			case '\0':
+				state = STATE_EOI;
+				break;
+			case 'n':
+				state = STATE_N;
+				break;
+			case 'f':
+				state = STATE_F;
+				break;
+			case 't':
+				state = STATE_T;
+				break;
+			case '[':
+				state = STATE_LBRACKET;
+				break;
+			case ']':
+				state = STATE_RBRACKET;
+				break;
+			case '{':
+				state = STATE_LBRACE;
+				break;
+			case '}':
+				state = STATE_RBRACE;
+				break;
+			case ':':
+				state = STATE_COLON;
+				break;
+			case ',':
+				state = STATE_COMMA;
+				break;
+			case '"':
+				state = STATE_CHARS;
+				break;
+			case '-':
+				state = STATE_MINUS;
+				break;
+			case '0':
+				state = STATE_LEADING_ZERO;
+				break;
+			case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9':
+				state = STATE_DIGITS;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_EOI:
+			token_init(&parser->token, parser,
+			    TOKEN_TYPE_EOI, token_pos, parser->pos -
+			    token_pos, token_line, token_col);
+			state = STATE_ACCEPT;
+			break;
+		case STATE_N:
+			switch (c) {
+			case 'u':
+				state = STATE_NU;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_NU:
+			switch (c) {
+			case 'l':
+				state = STATE_NUL;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_NUL:
+			switch (c) {
+			case 'l':
+				state = STATE_NULL;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_NULL:
+			switch (c) {
+			case ' ': case '\b': case '\n': case '\r': case '\t':
+			case '\0':
+			case '[': case ']': case '{': case '}': case ':':
+			case ',':
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			token_init(&parser->token, parser, TOKEN_TYPE_NULL,
+			    token_pos, parser->pos - token_pos, token_line,
+			    token_col);
+			state = STATE_ACCEPT;
+			break;
+		case STATE_F:
+			switch (c) {
+			case 'a':
+				state = STATE_FA;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_FA:
+			switch (c) {
+			case 'l':
+				state = STATE_FAL;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_FAL:
+			switch (c) {
+			case 's':
+				state = STATE_FALS;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_FALS:
+			switch (c) {
+			case 'e':
+				state = STATE_FALSE;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_FALSE:
+			switch (c) {
+			case ' ': case '\b': case '\n': case '\r': case '\t':
+			case '\0':
+			case '[': case ']': case '{': case '}': case ':':
+			case ',':
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			token_init(&parser->token, parser,
+			    TOKEN_TYPE_FALSE, token_pos, parser->pos -
+			    token_pos, token_line, token_col);
+			state = STATE_ACCEPT;
+			break;
+		case STATE_T:
+			switch (c) {
+			case 'r':
+				state = STATE_TR;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_TR:
+			switch (c) {
+			case 'u':
+				state = STATE_TRU;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_TRU:
+			switch (c) {
+			case 'e':
+				state = STATE_TRUE;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_TRUE:
+			switch (c) {
+			case ' ': case '\b': case '\n': case '\r': case '\t':
+			case '\0':
+			case '[': case ']': case '{': case '}': case ':':
+			case ',':
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			token_init(&parser->token, parser, TOKEN_TYPE_TRUE,
+			    token_pos, parser->pos - token_pos, token_line,
+			    token_col);
+			state = STATE_ACCEPT;
+			break;
+		case STATE_LBRACKET:
+			token_init(&parser->token, parser, TOKEN_TYPE_LBRACKET,
+			    token_pos, parser->pos - token_pos, token_line,
+			    token_col);
+			state = STATE_ACCEPT;
+			break;
+		case STATE_RBRACKET:
+			token_init(&parser->token, parser, TOKEN_TYPE_RBRACKET,
+			    token_pos, parser->pos - token_pos, token_line,
+			    token_col);
+			state = STATE_ACCEPT;
+			break;
+		case STATE_LBRACE:
+			token_init(&parser->token, parser, TOKEN_TYPE_LBRACE,
+			    token_pos, parser->pos - token_pos, token_line,
+			    token_col);
+			state = STATE_ACCEPT;
+			break;
+		case STATE_RBRACE:
+			token_init(&parser->token, parser, TOKEN_TYPE_RBRACE,
+			    token_pos, parser->pos - token_pos, token_line,
+			    token_col);
+			state = STATE_ACCEPT;
+			break;
+		case STATE_COLON:
+			token_init(&parser->token, parser, TOKEN_TYPE_COLON,
+			    token_pos, parser->pos - token_pos, token_line,
+			    token_col);
+			state = STATE_ACCEPT;
+			break;
+		case STATE_COMMA:
+			token_init(&parser->token, parser, TOKEN_TYPE_COMMA,
+			    token_pos, parser->pos - token_pos, token_line,
+			    token_col);
+			state = STATE_ACCEPT;
+			break;
+		case STATE_CHARS:
+			switch (c) {
+			case '\\':
+				state = STATE_CHAR_ESCAPE;
+				break;
+			case '"':
+				state = STATE_STRING;
+				break;
+			case 0x00: case 0x01: case 0x02: case 0x03: case 0x04:
+			case 0x05: case 0x06: case 0x07: case 0x08: case 0x09:
+			case 0x0a: case 0x0b: case 0x0c: case 0x0d: case 0x0e:
+			case 0x0f: case 0x10: case 0x11: case 0x12: case 0x13:
+			case 0x14: case 0x15: case 0x16: case 0x17: case 0x18:
+			case 0x19: case 0x1a: case 0x1b: case 0x1c: case 0x1d:
+			case 0x1e: case 0x1f:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			default:
+				break;
+			}
+			break;
+		case STATE_CHAR_ESCAPE:
+			switch (c) {
+			case '"': case '\\': case '/': case 'b': case 'n':
+			case 'r': case 't':
+				state = STATE_CHARS;
+				break;
+			case 'u':
+				state = STATE_CHAR_U;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_CHAR_U:
+			switch (c) {
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9':
+			case 'a': case 'b': case 'c': case 'd': case 'e':
+			case 'f':
+			case 'A': case 'B': case 'C': case 'D': case 'E':
+			case 'F':
+				state = STATE_CHAR_UD;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_CHAR_UD:
+			switch (c) {
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9':
+			case 'a': case 'b': case 'c': case 'd': case 'e':
+			case 'f':
+			case 'A': case 'B': case 'C': case 'D': case 'E':
+			case 'F':
+				state = STATE_CHAR_UDD;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_CHAR_UDD:
+			switch (c) {
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9':
+			case 'a': case 'b': case 'c': case 'd': case 'e':
+			case 'f':
+			case 'A': case 'B': case 'C': case 'D': case 'E':
+			case 'F':
+				state = STATE_CHAR_UDDD;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_CHAR_UDDD:
+			switch (c) {
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9':
+			case 'a': case 'b': case 'c': case 'd': case 'e':
+			case 'f':
+			case 'A': case 'B': case 'C': case 'D': case 'E':
+			case 'F':
+				state = STATE_CHARS;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_STRING:
+			token_init(&parser->token, parser, TOKEN_TYPE_STRING,
+			    token_pos, parser->pos - token_pos, token_line,
+			    token_col);
+			state = STATE_ACCEPT;
+			break;
+		case STATE_MINUS:
+			switch (c) {
+			case '0':
+				state = STATE_LEADING_ZERO;
+				break;
+			case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9':
+				state = STATE_DIGITS;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_LEADING_ZERO:
+			switch (c) {
+			case '.':
+				state = STATE_DECIMAL;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_NUMBER, token_pos, parser->pos -
+				    token_pos, token_line, token_col);
+				state = STATE_ACCEPT;
+				break;
+			}
+			break;
+		case STATE_DIGITS:
+			switch (c) {
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9':
+				break;
+			case '.':
+				state = STATE_DECIMAL;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_NUMBER, token_pos, parser->pos -
+				    token_pos, token_line, token_col);
+				state = STATE_ACCEPT;
+				break;
+			}
+			break;
+		case STATE_DECIMAL:
+			switch (c) {
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9':
+				state = STATE_FRAC_DIGITS;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_FRAC_DIGITS:
+			switch (c) {
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9':
+				break;
+			case 'e': case 'E':
+				state = STATE_EXP;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_NUMBER, token_pos, parser->pos -
+				    token_pos, token_line, token_col);
+				state = STATE_ACCEPT;
+				break;
+			}
+			break;
+		case STATE_EXP:
+			switch (c) {
+			case '-': case '+':
+				state = STATE_EXP_SIGN;
+				break;
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9':
+				state = STATE_EXP_DIGITS;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_EXP_SIGN:
+			switch (c) {
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9':
+				state = STATE_EXP_DIGITS;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_EXP_DIGITS:
+			switch (c) {
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9':
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_NUMBER, token_pos, parser->pos -
+				    token_pos, token_line, token_col);
+				state = STATE_ACCEPT;
+				break;
+			}
+			break;
+		default:
+			not_reached();
+		}
+
+		if (state != STATE_ACCEPT) {
+			if (c == '\n') {
+				parser->line++;
+				parser->col = 0;
+			} else {
+				parser->col++;
+			}
+			parser->pos++;
+		}
+	}
+	return false;
+}
+
+static bool	parser_parse_array(parser_t *parser);
+static bool	parser_parse_object(parser_t *parser);
+
+static bool
+parser_parse_value(parser_t *parser)
+{
+	switch (parser->token.token_type) {
+	case TOKEN_TYPE_NULL:
+	case TOKEN_TYPE_FALSE:
+	case TOKEN_TYPE_TRUE:
+	case TOKEN_TYPE_STRING:
+	case TOKEN_TYPE_NUMBER:
+		return false;
+	case TOKEN_TYPE_LBRACE:
+		return parser_parse_object(parser);
+	case TOKEN_TYPE_LBRACKET:
+		return parser_parse_array(parser);
+	default:
+		return true;
+	}
+	not_reached();
+}
+
+static bool
+parser_parse_pair(parser_t *parser)
+{
+	assert_d_eq(parser->token.token_type, TOKEN_TYPE_STRING,
+	    "Pair should start with string");
+	if (parser_tokenize(parser)) {
+		return true;
+	}
+	switch (parser->token.token_type) {
+	case TOKEN_TYPE_COLON:
+		if (parser_tokenize(parser)) {
+			return true;
+		}
+		return parser_parse_value(parser);
+	default:
+		return true;
+	}
+}
+
+static bool
+parser_parse_values(parser_t *parser)
+{
+	if (parser_parse_value(parser)) {
+		return true;
+	}
+
+	while (true) {
+		if (parser_tokenize(parser)) {
+			return true;
+		}
+		switch (parser->token.token_type) {
+		case TOKEN_TYPE_COMMA:
+			if (parser_tokenize(parser)) {
+				return true;
+			}
+			if (parser_parse_value(parser)) {
+				return true;
+			}
+			break;
+		case TOKEN_TYPE_RBRACKET:
+			return false;
+		default:
+			return true;
+		}
+	}
+}
+
+static bool
+parser_parse_array(parser_t *parser)
+{
+	assert_d_eq(parser->token.token_type, TOKEN_TYPE_LBRACKET,
+	    "Array should start with [");
+	if (parser_tokenize(parser)) {
+		return true;
+	}
+	switch (parser->token.token_type) {
+	case TOKEN_TYPE_RBRACKET:
+		return false;
+	default:
+		return parser_parse_values(parser);
+	}
+	not_reached();
+}
+
+static bool
+parser_parse_pairs(parser_t *parser)
+{
+	assert_d_eq(parser->token.token_type, TOKEN_TYPE_STRING,
+	    "Object should start with string");
+	if (parser_parse_pair(parser)) {
+		return true;
+	}
+
+	while (true) {
+		if (parser_tokenize(parser)) {
+			return true;
+		}
+		switch (parser->token.token_type) {
+		case TOKEN_TYPE_COMMA:
+			if (parser_tokenize(parser)) {
+				return true;
+			}
+			switch (parser->token.token_type) {
+			case TOKEN_TYPE_STRING:
+				if (parser_parse_pair(parser)) {
+					return true;
+				}
+				break;
+			default:
+				return true;
+			}
+			break;
+		case TOKEN_TYPE_RBRACE:
+			return false;
+		default:
+			return true;
+		}
+	}
+}
+
+static bool
+parser_parse_object(parser_t *parser)
+{
+	assert_d_eq(parser->token.token_type, TOKEN_TYPE_LBRACE,
+	    "Object should start with {");
+	if (parser_tokenize(parser)) {
+		return true;
+	}
+	switch (parser->token.token_type) {
+	case TOKEN_TYPE_STRING:
+		return parser_parse_pairs(parser);
+	case TOKEN_TYPE_RBRACE:
+		return false;
+	default:
+		return true;
+	}
+	not_reached();
+}
+
+static bool
+parser_parse(parser_t *parser)
+{
+	if (parser_tokenize(parser)) {
+		goto label_error;
+	}
+	if (parser_parse_value(parser)) {
+		goto label_error;
+	}
+
+	if (parser_tokenize(parser)) {
+		goto label_error;
+	}
+	switch (parser->token.token_type) {
+	case TOKEN_TYPE_EOI:
+		return false;
+	default:
+		goto label_error;
+	}
+	not_reached();
+
+label_error:
+	token_error(&parser->token);
+	return true;
+}
+
+TEST_BEGIN(test_json_parser)
+{
+	size_t i;
+	const char *invalid_inputs[] = {
+		/* Tokenizer error case tests. */
+		"{ \"string\": X }",
+		"{ \"string\": nXll }",
+		"{ \"string\": nuXl }",
+		"{ \"string\": nulX }",
+		"{ \"string\": nullX }",
+		"{ \"string\": fXlse }",
+		"{ \"string\": faXse }",
+		"{ \"string\": falXe }",
+		"{ \"string\": falsX }",
+		"{ \"string\": falseX }",
+		"{ \"string\": tXue }",
+		"{ \"string\": trXe }",
+		"{ \"string\": truX }",
+		"{ \"string\": trueX }",
+		"{ \"string\": \"\n\" }",
+		"{ \"string\": \"\\z\" }",
+		"{ \"string\": \"\\uX000\" }",
+		"{ \"string\": \"\\u0X00\" }",
+		"{ \"string\": \"\\u00X0\" }",
+		"{ \"string\": \"\\u000X\" }",
+		"{ \"string\": -X }",
+		"{ \"string\": 0.X }",
+		"{ \"string\": 0.0eX }",
+		"{ \"string\": 0.0e+X }",
+
+		/* Parser error test cases. */
+		"{\"string\": }",
+		"{\"string\" }",
+		"{\"string\": [ 0 }",
+		"{\"string\": {\"a\":0, 1 } }",
+		"{\"string\": {\"a\":0: } }",
+		"{",
+		"{}{",
+	};
+	const char *valid_inputs[] = {
+		/* Token tests. */
+		"null",
+		"false",
+		"true",
+		"{}",
+		"{\"a\": 0}",
+		"[]",
+		"[0, 1]",
+		"0",
+		"1",
+		"10",
+		"-10",
+		"10.23",
+		"10.23e4",
+		"10.23e-4",
+		"10.23e+4",
+		"10.23E4",
+		"10.23E-4",
+		"10.23E+4",
+		"-10.23",
+		"-10.23e4",
+		"-10.23e-4",
+		"-10.23e+4",
+		"-10.23E4",
+		"-10.23E-4",
+		"-10.23E+4",
+		"\"value\"",
+		"\" \\\" \\/ \\b \\n \\r \\t \\u0abc \\u1DEF \"",
+
+		/* Parser test with various nesting. */
+		"{\"a\":null, \"b\":[1,[{\"c\":2},3]], \"d\":{\"e\":true}}",
+	};
+
+	for (i = 0; i < sizeof(invalid_inputs)/sizeof(const char *); i++) {
+		const char *input = invalid_inputs[i];
+		parser_t parser;
+		parser_init(&parser, false);
+		assert_false(parser_append(&parser, input),
+		    "Unexpected input appending failure");
+		assert_true(parser_parse(&parser),
+		    "Unexpected parse success for input: %s", input);
+		parser_fini(&parser);
+	}
+
+	for (i = 0; i < sizeof(valid_inputs)/sizeof(const char *); i++) {
+		const char *input = valid_inputs[i];
+		parser_t parser;
+		parser_init(&parser, true);
+		assert_false(parser_append(&parser, input),
+		    "Unexpected input appending failure");
+		assert_false(parser_parse(&parser),
+		    "Unexpected parse error for input: %s", input);
+		parser_fini(&parser);
+	}
+}
+TEST_END
+
+void
+write_cb(void *opaque, const char *str)
+{
+	parser_t *parser = (parser_t *)opaque;
+	if (parser_append(parser, str)) {
+		test_fail("Unexpected input appending failure");
+	}
+}
+
+TEST_BEGIN(test_stats_print_json)
+{
+	const char *opts[] = {
+		"J",
+		"Jg",
+		"Jm",
+		"Jd",
+		"Jmd",
+		"Jgd",
+		"Jgm",
+		"Jgmd",
+		"Ja",
+		"Jb",
+		"Jl",
+		"Jbl",
+		"Jal",
+		"Jab",
+		"Jabl",
+		"Jgmdabl",
+	};
+	unsigned arena_ind, i;
+
+	for (i = 0; i < 3; i++) {
+		unsigned j;
+
+		switch (i) {
+		case 0:
+			break;
+		case 1: {
+			size_t sz = sizeof(arena_ind);
+			assert_d_eq(mallctl("arenas.create", (void *)&arena_ind,
+			    &sz, NULL, 0), 0, "Unexpected mallctl failure");
+			break;
+		} case 2: {
+			size_t mib[3];
+			size_t miblen = sizeof(mib)/sizeof(size_t);
+			assert_d_eq(mallctlnametomib("arena.0.destroy",
+			    mib, &miblen), 0,
+			    "Unexpected mallctlnametomib failure");
+			mib[1] = arena_ind;
+			assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL,
+			    0), 0, "Unexpected mallctlbymib failure");
+			break;
+		} default:
+			not_reached();
+		}
+
+		for (j = 0; j < sizeof(opts)/sizeof(const char *); j++) {
+			parser_t parser;
+
+			parser_init(&parser, true);
+			malloc_stats_print(write_cb, (void *)&parser, opts[j]);
+			assert_false(parser_parse(&parser),
+			    "Unexpected parse error, opts=\"%s\"", opts[j]);
+			parser_fini(&parser);
+		}
+	}
+}
+TEST_END
+
+int
+main(void)
+{
+	return (test(
+	    test_json_parser,
+	    test_stats_print_json));
+}

From 9eb1b1c8814beb4026fe8a9d3e77bb44e9ad0144 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 18 Jan 2017 23:03:37 -0800
Subject: [PATCH 0600/2608] Fix --disable-stats support.

Fix numerous regressions that were exposed by --disable-stats, both in
the core library and in the tests.
---
 include/jemalloc/internal/ctl_structs.h |  46 +--
 include/jemalloc/internal/ctl_types.h   |   2 +
 src/ctl.c                               | 362 +++++++++++++-----------
 src/extent.c                            |   8 +-
 test/unit/base.c                        |  32 ++-
 5 files changed, 252 insertions(+), 198 deletions(-)

diff --git a/include/jemalloc/internal/ctl_structs.h b/include/jemalloc/internal/ctl_structs.h
index 8f94c6c4..18806a59 100644
--- a/include/jemalloc/internal/ctl_structs.h
+++ b/include/jemalloc/internal/ctl_structs.h
@@ -22,18 +22,6 @@ struct ctl_indexed_node_s {
 };
 
 struct ctl_arena_stats_s {
-	unsigned		arena_ind;
-	bool			initialized;
-	ql_elm(ctl_arena_stats_t)	destroyed_link;
-
-	unsigned		nthreads;
-	const char		*dss;
-	ssize_t			decay_time;
-	size_t			pactive;
-	size_t			pdirty;
-
-	/* The remainder are only populated if config_stats is true. */
-
 	arena_stats_t		astats;
 
 	/* Aggregate stats for small size classes, based on bin stats. */
@@ -47,22 +35,42 @@ struct ctl_arena_stats_s {
 };
 
 struct ctl_stats_s {
-	uint64_t		epoch;
 	size_t			allocated;
 	size_t			active;
 	size_t			metadata;
 	size_t			resident;
 	size_t			mapped;
 	size_t			retained;
+};
+
+struct ctl_arena_s {
+	unsigned		arena_ind;
+	bool			initialized;
+	ql_elm(ctl_arena_t)	destroyed_link;
+
+	/* Basic stats, supported even if !config_stats. */
+	unsigned		nthreads;
+	const char		*dss;
+	ssize_t			decay_time;
+	size_t			pactive;
+	size_t			pdirty;
+
+	/* NULL if !config_stats. */
+	ctl_arena_stats_t	*astats;
+};
+
+struct ctl_arenas_s {
+	uint64_t		epoch;
 	unsigned		narenas;
-	ql_head(ctl_arena_stats_t)	destroyed;
+	ql_head(ctl_arena_t)	destroyed;
+
 	/*
-	 * Element 0 contains merged stats for extant arenas (accessed via
-	 * MALLCTL_ARENAS_ALL), element 1 contains merged stats for destroyed
-	 * arenas (accessed via MALLCTL_ARENAS_DESTROYED), and the remaining
-	 * MALLOCX_ARENA_MAX+1 elements correspond to arenas.
+	 * Element 0 corresponds to merged stats for extant arenas (accessed via
+	 * MALLCTL_ARENAS_ALL), element 1 corresponds to merged stats for
+	 * destroyed arenas (accessed via MALLCTL_ARENAS_DESTROYED), and the
+	 * remaining MALLOCX_ARENA_MAX+1 elements correspond to arenas.
 	 */
-	ctl_arena_stats_t	*arenas[MALLOCX_ARENA_MAX + 3];
+	ctl_arena_t		*arenas[MALLOCX_ARENA_MAX + 3];
 };
 
 #endif /* JEMALLOC_INTERNAL_CTL_STRUCTS_H */
diff --git a/include/jemalloc/internal/ctl_types.h b/include/jemalloc/internal/ctl_types.h
index 848c4f10..7853a4b2 100644
--- a/include/jemalloc/internal/ctl_types.h
+++ b/include/jemalloc/internal/ctl_types.h
@@ -6,5 +6,7 @@ typedef struct ctl_named_node_s ctl_named_node_t;
 typedef struct ctl_indexed_node_s ctl_indexed_node_t;
 typedef struct ctl_arena_stats_s ctl_arena_stats_t;
 typedef struct ctl_stats_s ctl_stats_t;
+typedef struct ctl_arena_s ctl_arena_t;
+typedef struct ctl_arenas_s ctl_arenas_t;
 
 #endif /* JEMALLOC_INTERNAL_CTL_TYPES_H */
diff --git a/src/ctl.c b/src/ctl.c
index 9c582d65..b19c9d31 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -11,6 +11,7 @@
 static malloc_mutex_t	ctl_mtx;
 static bool		ctl_initialized;
 static ctl_stats_t	*ctl_stats;
+static ctl_arenas_t	*ctl_arenas;
 
 /******************************************************************************/
 /* Helpers for named and indexed nodes. */
@@ -432,12 +433,10 @@ static const ctl_named_node_t super_root_node[] = {
 /******************************************************************************/
 
 static unsigned
-stats_arenas_i2a_impl(size_t i, bool compat, bool validate)
+arenas_i2a_impl(size_t i, bool compat, bool validate)
 {
 	unsigned a;
 
-	cassert(config_stats);
-
 	switch (i) {
 	case MALLCTL_ARENAS_ALL:
 		a = 0;
@@ -446,7 +445,7 @@ stats_arenas_i2a_impl(size_t i, bool compat, bool validate)
 		a = 1;
 		break;
 	default:
-		if (compat && i == ctl_stats->narenas) {
+		if (compat && i == ctl_arenas->narenas) {
 			/*
 			 * Provide deprecated backward compatibility for
 			 * accessing the merged stats at index narenas rather
@@ -454,16 +453,16 @@ stats_arenas_i2a_impl(size_t i, bool compat, bool validate)
 			 * removal in 6.0.0.
 			 */
 			a = 0;
-		} else if (validate && i >= ctl_stats->narenas)
+		} else if (validate && i >= ctl_arenas->narenas)
 			a = UINT_MAX;
 		else {
 			/*
 			 * This function should never be called for an index
 			 * more than one past the range of indices that have
-			 * initialized stats.
+			 * initialized ctl data.
 			 */
-			assert(i < ctl_stats->narenas || (!validate && i ==
-			    ctl_stats->narenas));
+			assert(i < ctl_arenas->narenas || (!validate && i ==
+			    ctl_arenas->narenas));
 			a = (unsigned)i + 2;
 		}
 		break;
@@ -473,103 +472,127 @@ stats_arenas_i2a_impl(size_t i, bool compat, bool validate)
 }
 
 static unsigned
-stats_arenas_i2a(size_t i)
+arenas_i2a(size_t i)
 {
-	return (stats_arenas_i2a_impl(i, true, false));
+	return (arenas_i2a_impl(i, true, false));
 }
 
-static ctl_arena_stats_t *
-stats_arenas_i_impl(tsdn_t *tsdn, size_t i, bool compat, bool init)
+static ctl_arena_t *
+arenas_i_impl(tsdn_t *tsdn, size_t i, bool compat, bool init)
 {
-	ctl_arena_stats_t *ret;
+	ctl_arena_t *ret;
 
 	assert(!compat || !init);
 
-	ret = ctl_stats->arenas[stats_arenas_i2a_impl(i, compat, false)];
+	ret = ctl_arenas->arenas[arenas_i2a_impl(i, compat, false)];
 	if (init && ret == NULL) {
-		ret = (ctl_arena_stats_t *)base_alloc(tsdn, b0get(),
-		    sizeof(ctl_arena_stats_t), QUANTUM);
-		if (ret == NULL)
-			return (NULL);
+		if (config_stats) {
+			struct container_s {
+				ctl_arena_t		ctl_arena;
+				ctl_arena_stats_t	astats;
+			};
+			struct container_s *cont =
+			    (struct container_s *)base_alloc(tsdn, b0get(),
+			    sizeof(struct container_s), QUANTUM);
+			if (cont == NULL) {
+				return NULL;
+			}
+			ret = &cont->ctl_arena;
+			ret->astats = &cont->astats;
+		} else {
+			ret = (ctl_arena_t *)base_alloc(tsdn, b0get(),
+			    sizeof(ctl_arena_t), QUANTUM);
+			if (ret == NULL) {
+				return NULL;
+			}
+		}
 		ret->arena_ind = (unsigned)i;
-		ctl_stats->arenas[stats_arenas_i2a_impl(i, compat, false)] =
-		    ret;
+		ctl_arenas->arenas[arenas_i2a_impl(i, compat, false)] = ret;
 	}
 
-	assert(ret == NULL || stats_arenas_i2a(ret->arena_ind) ==
-	    stats_arenas_i2a(i));
+	assert(ret == NULL || arenas_i2a(ret->arena_ind) == arenas_i2a(i));
 	return (ret);
 }
 
-static ctl_arena_stats_t *
-stats_arenas_i(size_t i)
+static ctl_arena_t *
+arenas_i(size_t i)
 {
-	ctl_arena_stats_t *ret = stats_arenas_i_impl(TSDN_NULL, i, true, false);
+	ctl_arena_t *ret = arenas_i_impl(TSDN_NULL, i, true, false);
 	assert(ret != NULL);
 	return (ret);
 }
 
 static void
-ctl_arena_clear(ctl_arena_stats_t *astats)
+ctl_arena_clear(ctl_arena_t *ctl_arena)
 {
-	astats->nthreads = 0;
-	astats->dss = dss_prec_names[dss_prec_limit];
-	astats->decay_time = -1;
-	astats->pactive = 0;
-	astats->pdirty = 0;
+	ctl_arena->nthreads = 0;
+	ctl_arena->dss = dss_prec_names[dss_prec_limit];
+	ctl_arena->decay_time = -1;
+	ctl_arena->pactive = 0;
+	ctl_arena->pdirty = 0;
 	if (config_stats) {
-		memset(&astats->astats, 0, sizeof(arena_stats_t));
-		astats->allocated_small = 0;
-		astats->nmalloc_small = 0;
-		astats->ndalloc_small = 0;
-		astats->nrequests_small = 0;
-		memset(astats->bstats, 0, NBINS * sizeof(malloc_bin_stats_t));
-		memset(astats->lstats, 0, (NSIZES - NBINS) *
+		memset(&ctl_arena->astats->astats, 0, sizeof(arena_stats_t));
+		ctl_arena->astats->allocated_small = 0;
+		ctl_arena->astats->nmalloc_small = 0;
+		ctl_arena->astats->ndalloc_small = 0;
+		ctl_arena->astats->nrequests_small = 0;
+		memset(ctl_arena->astats->bstats, 0, NBINS *
+		    sizeof(malloc_bin_stats_t));
+		memset(ctl_arena->astats->lstats, 0, (NSIZES - NBINS) *
 		    sizeof(malloc_large_stats_t));
 	}
 }
 
 static void
-ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_stats_t *cstats, arena_t *arena)
+ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena)
 {
 	unsigned i;
 
 	if (config_stats) {
-		arena_stats_merge(tsdn, arena, &cstats->nthreads, &cstats->dss,
-		    &cstats->decay_time, &cstats->pactive, &cstats->pdirty,
-		    &cstats->astats, cstats->bstats, cstats->lstats);
+		arena_stats_merge(tsdn, arena, &ctl_arena->nthreads,
+		    &ctl_arena->dss, &ctl_arena->decay_time,
+		    &ctl_arena->pactive, &ctl_arena->pdirty,
+		    &ctl_arena->astats->astats, ctl_arena->astats->bstats,
+		    ctl_arena->astats->lstats);
 
 		for (i = 0; i < NBINS; i++) {
-			cstats->allocated_small += cstats->bstats[i].curregs *
+			ctl_arena->astats->allocated_small +=
+			    ctl_arena->astats->bstats[i].curregs *
 			    index2size(i);
-			cstats->nmalloc_small += cstats->bstats[i].nmalloc;
-			cstats->ndalloc_small += cstats->bstats[i].ndalloc;
-			cstats->nrequests_small += cstats->bstats[i].nrequests;
+			ctl_arena->astats->nmalloc_small +=
+			    ctl_arena->astats->bstats[i].nmalloc;
+			ctl_arena->astats->ndalloc_small +=
+			    ctl_arena->astats->bstats[i].ndalloc;
+			ctl_arena->astats->nrequests_small +=
+			    ctl_arena->astats->bstats[i].nrequests;
 		}
 	} else {
-		arena_basic_stats_merge(tsdn, arena, &cstats->nthreads,
-		    &cstats->dss, &cstats->decay_time, &cstats->pactive,
-		    &cstats->pdirty);
+		arena_basic_stats_merge(tsdn, arena, &ctl_arena->nthreads,
+		    &ctl_arena->dss, &ctl_arena->decay_time,
+		    &ctl_arena->pactive, &ctl_arena->pdirty);
 	}
 }
 
 static void
-ctl_arena_stats_sdmerge(ctl_arena_stats_t *sdstats, ctl_arena_stats_t *astats,
+ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
     bool destroyed)
 {
 	unsigned i;
 
 	if (!destroyed) {
-		sdstats->nthreads += astats->nthreads;
-		sdstats->pactive += astats->pactive;
-		sdstats->pdirty += astats->pdirty;
+		ctl_sdarena->nthreads += ctl_arena->nthreads;
+		ctl_sdarena->pactive += ctl_arena->pactive;
+		ctl_sdarena->pdirty += ctl_arena->pdirty;
 	} else {
-		assert(astats->nthreads == 0);
-		assert(astats->pactive == 0);
-		assert(astats->pdirty == 0);
+		assert(ctl_arena->nthreads == 0);
+		assert(ctl_arena->pactive == 0);
+		assert(ctl_arena->pdirty == 0);
 	}
 
 	if (config_stats) {
+		ctl_arena_stats_t *sdstats = ctl_sdarena->astats;
+		ctl_arena_stats_t *astats = ctl_arena->astats;
+
 		if (!destroyed) {
 			sdstats->astats.mapped += astats->astats.mapped;
 			sdstats->astats.retained += astats->astats.retained;
@@ -648,39 +671,40 @@ ctl_arena_stats_sdmerge(ctl_arena_stats_t *sdstats, ctl_arena_stats_t *astats,
 }
 
 static void
-ctl_arena_refresh(tsdn_t *tsdn, arena_t *arena, ctl_arena_stats_t *sdstats,
+ctl_arena_refresh(tsdn_t *tsdn, arena_t *arena, ctl_arena_t *ctl_sdarena,
     unsigned i, bool destroyed)
 {
-	ctl_arena_stats_t *astats = stats_arenas_i(i);
+	ctl_arena_t *ctl_arena = arenas_i(i);
 
-	ctl_arena_clear(astats);
-	ctl_arena_stats_amerge(tsdn, astats, arena);
+	ctl_arena_clear(ctl_arena);
+	ctl_arena_stats_amerge(tsdn, ctl_arena, arena);
 	/* Merge into sum stats as well. */
-	ctl_arena_stats_sdmerge(sdstats, astats, destroyed);
+	ctl_arena_stats_sdmerge(ctl_sdarena, ctl_arena, destroyed);
 }
 
 static unsigned
 ctl_arena_init(tsdn_t *tsdn, extent_hooks_t *extent_hooks)
 {
 	unsigned arena_ind;
-	ctl_arena_stats_t *astats;
+	ctl_arena_t *ctl_arena;
 
-	if ((astats = ql_last(&ctl_stats->destroyed, destroyed_link)) != NULL) {
-		ql_remove(&ctl_stats->destroyed, astats, destroyed_link);
-		arena_ind = astats->arena_ind;
+	if ((ctl_arena = ql_last(&ctl_arenas->destroyed, destroyed_link)) !=
+	    NULL) {
+		ql_remove(&ctl_arenas->destroyed, ctl_arena, destroyed_link);
+		arena_ind = ctl_arena->arena_ind;
 	} else
-		arena_ind = ctl_stats->narenas;
+		arena_ind = ctl_arenas->narenas;
 
 	/* Trigger stats allocation. */
-	if (stats_arenas_i_impl(tsdn, arena_ind, false, true) == NULL)
+	if (arenas_i_impl(tsdn, arena_ind, false, true) == NULL)
 		return (UINT_MAX);
 
 	/* Initialize new arena. */
 	if (arena_init(tsdn, arena_ind, extent_hooks) == NULL)
 		return (UINT_MAX);
 
-	if (arena_ind == ctl_stats->narenas)
-		ctl_stats->narenas++;
+	if (arena_ind == ctl_arenas->narenas)
+		ctl_arenas->narenas++;
 
 	return (arena_ind);
 }
@@ -689,39 +713,41 @@ static void
 ctl_refresh(tsdn_t *tsdn)
 {
 	unsigned i;
-	ctl_arena_stats_t *sstats = stats_arenas_i(MALLCTL_ARENAS_ALL);
-	VARIABLE_ARRAY(arena_t *, tarenas, ctl_stats->narenas);
+	ctl_arena_t *ctl_sarena = arenas_i(MALLCTL_ARENAS_ALL);
+	VARIABLE_ARRAY(arena_t *, tarenas, ctl_arenas->narenas);
 
 	/*
 	 * Clear sum stats, since they will be merged into by
 	 * ctl_arena_refresh().
 	 */
-	ctl_arena_clear(sstats);
+	ctl_arena_clear(ctl_sarena);
 
-	for (i = 0; i < ctl_stats->narenas; i++)
+	for (i = 0; i < ctl_arenas->narenas; i++) {
 		tarenas[i] = arena_get(tsdn, i, false);
+	}
 
-	for (i = 0; i < ctl_stats->narenas; i++) {
-		ctl_arena_stats_t *astats = stats_arenas_i(i);
+	for (i = 0; i < ctl_arenas->narenas; i++) {
+		ctl_arena_t *ctl_arena = arenas_i(i);
 		bool initialized = (tarenas[i] != NULL);
 
-		astats->initialized = initialized;
-		if (initialized)
-			ctl_arena_refresh(tsdn, tarenas[i], sstats, i, false);
+		ctl_arena->initialized = initialized;
+		if (initialized) {
+			ctl_arena_refresh(tsdn, tarenas[i], ctl_sarena, i,
+			    false);
+		}
 	}
 
 	if (config_stats) {
-		ctl_stats->allocated = sstats->allocated_small +
-		    sstats->astats.allocated_large;
-		ctl_stats->active = (sstats->pactive << LG_PAGE);
-		ctl_stats->metadata = sstats->astats.base +
-		    sstats->astats.internal;
-		ctl_stats->resident = sstats->astats.resident;
-		ctl_stats->mapped = sstats->astats.mapped;
-		ctl_stats->retained = sstats->astats.retained;
+		ctl_stats->allocated = ctl_sarena->astats->allocated_small +
+		    ctl_sarena->astats->astats.allocated_large;
+		ctl_stats->active = (ctl_sarena->pactive << LG_PAGE);
+		ctl_stats->metadata = ctl_sarena->astats->astats.base +
+		    ctl_sarena->astats->astats.internal;
+		ctl_stats->resident = ctl_sarena->astats->astats.resident;
+		ctl_stats->mapped = ctl_sarena->astats->astats.mapped;
+		ctl_stats->retained = ctl_sarena->astats->astats.retained;
 	}
-
-	ctl_stats->epoch++;
+	ctl_arenas->epoch++;
 }
 
 static bool
@@ -731,14 +757,23 @@ ctl_init(tsdn_t *tsdn)
 
 	malloc_mutex_lock(tsdn, &ctl_mtx);
 	if (!ctl_initialized) {
-		ctl_arena_stats_t *sstats, *dstats;
+		ctl_arena_t *ctl_sarena, *ctl_darena;
 		unsigned i;
 
 		/*
-		 * Allocate demand-zeroed space for pointers to the full range
-		 * of supported arena indices.
+		 * Allocate demand-zeroed space for pointers to the full
+		 * range of supported arena indices.
 		 */
-		if (ctl_stats == NULL) {
+		if (ctl_arenas == NULL) {
+			ctl_arenas = (ctl_arenas_t *)base_alloc(tsdn,
+			    b0get(), sizeof(ctl_arenas_t), QUANTUM);
+			if (ctl_arenas == NULL) {
+				ret = true;
+				goto label_return;
+			}
+		}
+
+		if (config_stats && ctl_stats == NULL) {
 			ctl_stats = (ctl_stats_t *)base_alloc(tsdn, b0get(),
 			    sizeof(ctl_stats_t), QUANTUM);
 			if (ctl_stats == NULL) {
@@ -748,40 +783,40 @@ ctl_init(tsdn_t *tsdn)
 		}
 
 		/*
-		 * Allocate space for the current full range of arenas here
-		 * rather than doing it lazily elsewhere, in order to limit when
-		 * OOM-caused errors can occur.
+		 * Allocate space for the current full range of arenas
+		 * here rather than doing it lazily elsewhere, in order
+		 * to limit when OOM-caused errors can occur.
 		 */
-		if ((sstats = stats_arenas_i_impl(tsdn, MALLCTL_ARENAS_ALL,
+		if ((ctl_sarena = arenas_i_impl(tsdn, MALLCTL_ARENAS_ALL, false,
+		    true)) == NULL) {
+			ret = true;
+			goto label_return;
+		}
+		ctl_sarena->initialized = true;
+
+		if ((ctl_darena = arenas_i_impl(tsdn, MALLCTL_ARENAS_DESTROYED,
 		    false, true)) == NULL) {
 			ret = true;
 			goto label_return;
 		}
-		sstats->initialized = true;
-
-		if ((dstats = stats_arenas_i_impl(tsdn,
-		    MALLCTL_ARENAS_DESTROYED, false, true)) == NULL) {
-			ret = true;
-			goto label_return;
-		}
-		ctl_arena_clear(dstats);
+		ctl_arena_clear(ctl_darena);
 		/*
-		 * Don't toggle stats for MALLCTL_ARENAS_DESTROYED to
-		 * initialized until an arena is actually destroyed, so that
-		 * arena.<i>.initialized can be used to query whether the stats
-		 * are relevant.
+		 * Don't toggle ctl_darena to initialized until an arena is
+		 * actually destroyed, so that arena.<i>.initialized can be used
+		 * to query whether the stats are relevant.
 		 */
 
-		ctl_stats->narenas = narenas_total_get();
-		for (i = 0; i < ctl_stats->narenas; i++) {
-			if (stats_arenas_i_impl(tsdn, i, false, true) == NULL) {
+		ctl_arenas->narenas = narenas_total_get();
+		for (i = 0; i < ctl_arenas->narenas; i++) {
+			if (arenas_i_impl(tsdn, i, false, true) == NULL) {
 				ret = true;
 				goto label_return;
 			}
 		}
 
-		ql_new(&ctl_stats->destroyed);
+		ql_new(&ctl_arenas->destroyed);
 		ctl_refresh(tsdn);
+
 		ctl_initialized = true;
 	}
 
@@ -1228,7 +1263,7 @@ epoch_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	WRITE(newval, uint64_t);
 	if (newp != NULL)
 		ctl_refresh(tsd_tsdn(tsd));
-	READ(ctl_stats->epoch, uint64_t);
+	READ(ctl_arenas->epoch, uint64_t);
 
 	ret = 0;
 label_return:
@@ -1526,7 +1561,7 @@ arena_i_initialized_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	MIB_UNSIGNED(arena_ind, 1);
 
 	malloc_mutex_lock(tsdn, &ctl_mtx);
-	initialized = stats_arenas_i(arena_ind)->initialized;
+	initialized = arenas_i(arena_ind)->initialized;
 	malloc_mutex_unlock(tsdn, &ctl_mtx);
 
 	READ(initialized, bool);
@@ -1541,7 +1576,7 @@ arena_i_purge(tsdn_t *tsdn, unsigned arena_ind, bool all)
 {
 	malloc_mutex_lock(tsdn, &ctl_mtx);
 	{
-		unsigned narenas = ctl_stats->narenas;
+		unsigned narenas = ctl_arenas->narenas;
 
 		/*
 		 * Access via index narenas is deprecated, and scheduled for
@@ -1666,7 +1701,7 @@ arena_i_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	int ret;
 	unsigned arena_ind;
 	arena_t *arena;
-	ctl_arena_stats_t *dstats, *astats;
+	ctl_arena_t *ctl_darena, *ctl_arena;
 
 	ret = arena_i_reset_destroy_helper(tsd, mib, miblen, oldp, oldlenp,
 	    newp, newlen, &arena_ind, &arena);
@@ -1682,16 +1717,16 @@ arena_i_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	/* Merge stats after resetting and purging arena. */
 	arena_reset(tsd, arena);
 	arena_purge(tsd_tsdn(tsd), arena, true);
-	dstats = stats_arenas_i(MALLCTL_ARENAS_DESTROYED);
-	dstats->initialized = true;
-	ctl_arena_refresh(tsd_tsdn(tsd), arena, dstats, arena_ind, true);
+	ctl_darena = arenas_i(MALLCTL_ARENAS_DESTROYED);
+	ctl_darena->initialized = true;
+	ctl_arena_refresh(tsd_tsdn(tsd), arena, ctl_darena, arena_ind, true);
 	/* Destroy arena. */
 	arena_destroy(tsd, arena);
-	astats = stats_arenas_i(arena_ind);
-	astats->initialized = false;
+	ctl_arena = arenas_i(arena_ind);
+	ctl_arena->initialized = false;
 	/* Record arena index for later recycling via arenas.create. */
-	ql_elm_new(astats, destroyed_link);
-	ql_tail_insert(&ctl_stats->destroyed, astats, destroyed_link);
+	ql_elm_new(ctl_arena, destroyed_link);
+	ql_tail_insert(&ctl_arenas->destroyed, ctl_arena, destroyed_link);
 
 	assert(ret == 0);
 label_return:
@@ -1734,7 +1769,7 @@ arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	 * 6.0.0.
 	 */
 	if (arena_ind == MALLCTL_ARENAS_ALL || arena_ind ==
-	    ctl_stats->narenas) {
+	    ctl_arenas->narenas) {
 		if (dss_prec != dss_prec_limit &&
 		    extent_dss_prec_set(dss_prec)) {
 			ret = EFAULT;
@@ -1842,7 +1877,7 @@ arena_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
 	case MALLCTL_ARENAS_DESTROYED:
 		break;
 	default:
-		if (i > ctl_stats->narenas) {
+		if (i > ctl_arenas->narenas) {
 			ret = NULL;
 			goto label_return;
 		}
@@ -1870,7 +1905,7 @@ arenas_narenas_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 		ret = EINVAL;
 		goto label_return;
 	}
-	narenas = ctl_stats->narenas;
+	narenas = ctl_arenas->narenas;
 	READ(narenas, unsigned);
 
 	ret = 0;
@@ -2091,67 +2126,66 @@ CTL_RO_CGEN(config_stats, stats_resident, ctl_stats->resident, size_t)
 CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats->mapped, size_t)
 CTL_RO_CGEN(config_stats, stats_retained, ctl_stats->retained, size_t)
 
-CTL_RO_GEN(stats_arenas_i_dss, stats_arenas_i(mib[2])->dss, const char *)
-CTL_RO_GEN(stats_arenas_i_decay_time, stats_arenas_i(mib[2])->decay_time,
+CTL_RO_GEN(stats_arenas_i_dss, arenas_i(mib[2])->dss, const char *)
+CTL_RO_GEN(stats_arenas_i_decay_time, arenas_i(mib[2])->decay_time,
     ssize_t)
-CTL_RO_GEN(stats_arenas_i_nthreads, stats_arenas_i(mib[2])->nthreads,
-    unsigned)
-CTL_RO_GEN(stats_arenas_i_pactive, stats_arenas_i(mib[2])->pactive, size_t)
-CTL_RO_GEN(stats_arenas_i_pdirty, stats_arenas_i(mib[2])->pdirty, size_t)
+CTL_RO_GEN(stats_arenas_i_nthreads, arenas_i(mib[2])->nthreads, unsigned)
+CTL_RO_GEN(stats_arenas_i_pactive, arenas_i(mib[2])->pactive, size_t)
+CTL_RO_GEN(stats_arenas_i_pdirty, arenas_i(mib[2])->pdirty, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_mapped,
-    stats_arenas_i(mib[2])->astats.mapped, size_t)
+    arenas_i(mib[2])->astats->astats.mapped, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_retained,
-    stats_arenas_i(mib[2])->astats.retained, size_t)
+    arenas_i(mib[2])->astats->astats.retained, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_npurge,
-    stats_arenas_i(mib[2])->astats.npurge, uint64_t)
+    arenas_i(mib[2])->astats->astats.npurge, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_nmadvise,
-    stats_arenas_i(mib[2])->astats.nmadvise, uint64_t)
+    arenas_i(mib[2])->astats->astats.nmadvise, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_purged,
-    stats_arenas_i(mib[2])->astats.purged, uint64_t)
+    arenas_i(mib[2])->astats->astats.purged, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_base,
-    stats_arenas_i(mib[2])->astats.base, size_t)
+    arenas_i(mib[2])->astats->astats.base, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_internal,
-    stats_arenas_i(mib[2])->astats.internal, size_t)
+    arenas_i(mib[2])->astats->astats.internal, size_t)
 CTL_RO_CGEN(config_stats && config_tcache, stats_arenas_i_tcache_bytes,
-    stats_arenas_i(mib[2])->astats.tcache_bytes, size_t)
+    arenas_i(mib[2])->astats->astats.tcache_bytes, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_resident,
-    stats_arenas_i(mib[2])->astats.resident, size_t)
+    arenas_i(mib[2])->astats->astats.resident, size_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_small_allocated,
-    stats_arenas_i(mib[2])->allocated_small, size_t)
+    arenas_i(mib[2])->astats->allocated_small, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_small_nmalloc,
-    stats_arenas_i(mib[2])->nmalloc_small, uint64_t)
+    arenas_i(mib[2])->astats->nmalloc_small, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_small_ndalloc,
-    stats_arenas_i(mib[2])->ndalloc_small, uint64_t)
+    arenas_i(mib[2])->astats->ndalloc_small, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_small_nrequests,
-    stats_arenas_i(mib[2])->nrequests_small, uint64_t)
+    arenas_i(mib[2])->astats->nrequests_small, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_allocated,
-    stats_arenas_i(mib[2])->astats.allocated_large, size_t)
+    arenas_i(mib[2])->astats->astats.allocated_large, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_nmalloc,
-    stats_arenas_i(mib[2])->astats.nmalloc_large, uint64_t)
+    arenas_i(mib[2])->astats->astats.nmalloc_large, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_ndalloc,
-    stats_arenas_i(mib[2])->astats.ndalloc_large, uint64_t)
+    arenas_i(mib[2])->astats->astats.ndalloc_large, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_nrequests,
-    stats_arenas_i(mib[2])->astats.nmalloc_large, uint64_t) /* Intentional. */
+    arenas_i(mib[2])->astats->astats.nmalloc_large, uint64_t) /* Intentional. */
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nmalloc,
-    stats_arenas_i(mib[2])->bstats[mib[4]].nmalloc, uint64_t)
+    arenas_i(mib[2])->astats->bstats[mib[4]].nmalloc, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_ndalloc,
-    stats_arenas_i(mib[2])->bstats[mib[4]].ndalloc, uint64_t)
+    arenas_i(mib[2])->astats->bstats[mib[4]].ndalloc, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nrequests,
-    stats_arenas_i(mib[2])->bstats[mib[4]].nrequests, uint64_t)
+    arenas_i(mib[2])->astats->bstats[mib[4]].nrequests, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curregs,
-    stats_arenas_i(mib[2])->bstats[mib[4]].curregs, size_t)
+    arenas_i(mib[2])->astats->bstats[mib[4]].curregs, size_t)
 CTL_RO_CGEN(config_stats && config_tcache, stats_arenas_i_bins_j_nfills,
-    stats_arenas_i(mib[2])->bstats[mib[4]].nfills, uint64_t)
+    arenas_i(mib[2])->astats->bstats[mib[4]].nfills, uint64_t)
 CTL_RO_CGEN(config_stats && config_tcache, stats_arenas_i_bins_j_nflushes,
-    stats_arenas_i(mib[2])->bstats[mib[4]].nflushes, uint64_t)
+    arenas_i(mib[2])->astats->bstats[mib[4]].nflushes, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nslabs,
-    stats_arenas_i(mib[2])->bstats[mib[4]].nslabs, uint64_t)
+    arenas_i(mib[2])->astats->bstats[mib[4]].nslabs, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nreslabs,
-    stats_arenas_i(mib[2])->bstats[mib[4]].reslabs, uint64_t)
+    arenas_i(mib[2])->astats->bstats[mib[4]].reslabs, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curslabs,
-    stats_arenas_i(mib[2])->bstats[mib[4]].curslabs, size_t)
+    arenas_i(mib[2])->astats->bstats[mib[4]].curslabs, size_t)
 
 static const ctl_named_node_t *
 stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
@@ -2163,13 +2197,13 @@ stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
 }
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_nmalloc,
-    stats_arenas_i(mib[2])->lstats[mib[4]].nmalloc, uint64_t)
+    arenas_i(mib[2])->astats->lstats[mib[4]].nmalloc, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_ndalloc,
-    stats_arenas_i(mib[2])->lstats[mib[4]].ndalloc, uint64_t)
+    arenas_i(mib[2])->astats->lstats[mib[4]].ndalloc, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_nrequests,
-    stats_arenas_i(mib[2])->lstats[mib[4]].nrequests, uint64_t)
+    arenas_i(mib[2])->astats->lstats[mib[4]].nrequests, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_curlextents,
-    stats_arenas_i(mib[2])->lstats[mib[4]].curlextents, size_t)
+    arenas_i(mib[2])->astats->lstats[mib[4]].curlextents, size_t)
 
 static const ctl_named_node_t *
 stats_arenas_i_lextents_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
@@ -2187,8 +2221,8 @@ stats_arenas_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
 	size_t a;
 
 	malloc_mutex_lock(tsdn, &ctl_mtx);
-	a = stats_arenas_i2a_impl(i, true, true);
-	if (a == UINT_MAX || !ctl_stats->arenas[a]->initialized) {
+	a = arenas_i2a_impl(i, true, true);
+	if (a == UINT_MAX || !ctl_arenas->arenas[a]->initialized) {
 		ret = NULL;
 		goto label_return;
 	}
diff --git a/src/extent.c b/src/extent.c
index 27cf97cd..be40aaad 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -820,9 +820,11 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
 	extent = extent_recycle(tsdn, arena, r_extent_hooks,
 	    arena->extents_retained, false, false, new_addr, usize, pad,
 	    alignment, zero, commit, slab);
-	if (extent != NULL && config_stats) {
-		size_t size = usize + pad;
-		arena->stats.retained -= size;
+	if (extent != NULL) {
+		if (config_stats) {
+			size_t size = usize + pad;
+			arena->stats.retained -= size;
+		}
 		if (config_prof)
 			extent_gprof_add(tsdn, extent);
 	}
diff --git a/test/unit/base.c b/test/unit/base.c
index 9aa43eab..76e96da8 100644
--- a/test/unit/base.c
+++ b/test/unit/base.c
@@ -33,16 +33,20 @@ TEST_BEGIN(test_base_hooks_default)
 	tsdn = tsdn_fetch();
 	base = base_new(tsdn, 0, (extent_hooks_t *)&extent_hooks_default);
 
-	base_stats_get(tsdn, base, &allocated0, &resident, &mapped);
-	assert_zu_ge(allocated0, sizeof(base_t),
-	    "Base header should count as allocated");
+	if (config_stats) {
+		base_stats_get(tsdn, base, &allocated0, &resident, &mapped);
+		assert_zu_ge(allocated0, sizeof(base_t),
+		    "Base header should count as allocated");
+	}
 
 	assert_ptr_not_null(base_alloc(tsdn, base, 42, 1),
 	    "Unexpected base_alloc() failure");
 
-	base_stats_get(tsdn, base, &allocated1, &resident, &mapped);
-	assert_zu_ge(allocated1 - allocated0, 42,
-	    "At least 42 bytes were allocated by base_alloc()");
+	if (config_stats) {
+		base_stats_get(tsdn, base, &allocated1, &resident, &mapped);
+		assert_zu_ge(allocated1 - allocated0, 42,
+		    "At least 42 bytes were allocated by base_alloc()");
+	}
 
 	base_delete(base);
 }
@@ -67,16 +71,20 @@ TEST_BEGIN(test_base_hooks_null)
 	base = base_new(tsdn, 0, &hooks);
 	assert_ptr_not_null(base, "Unexpected base_new() failure");
 
-	base_stats_get(tsdn, base, &allocated0, &resident, &mapped);
-	assert_zu_ge(allocated0, sizeof(base_t),
-	    "Base header should count as allocated");
+	if (config_stats) {
+		base_stats_get(tsdn, base, &allocated0, &resident, &mapped);
+		assert_zu_ge(allocated0, sizeof(base_t),
+		    "Base header should count as allocated");
+	}
 
 	assert_ptr_not_null(base_alloc(tsdn, base, 42, 1),
 	    "Unexpected base_alloc() failure");
 
-	base_stats_get(tsdn, base, &allocated1, &resident, &mapped);
-	assert_zu_ge(allocated1 - allocated0, 42,
-	    "At least 42 bytes were allocated by base_alloc()");
+	if (config_stats) {
+		base_stats_get(tsdn, base, &allocated1, &resident, &mapped);
+		assert_zu_ge(allocated1 - allocated0, 42,
+		    "At least 42 bytes were allocated by base_alloc()");
+	}
 
 	base_delete(base);
 

From 5154ff32ee8c37bacb6afd8a07b923eb33228357 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 18 Jan 2017 14:04:24 -0800
Subject: [PATCH 0601/2608] Unify the allocation paths

This unifies the allocation paths for malloc, posix_memalign, aligned_alloc,
calloc, memalign, valloc, and mallocx, so that they all share common code where
they can.

There's more work that could be done here, but I think this is the smallest
discrete change in this direction.
---
 src/jemalloc.c | 957 +++++++++++++++++++++++++++----------------------
 1 file changed, 535 insertions(+), 422 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 1dc91833..af2a53a2 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1352,250 +1352,457 @@ malloc_init_hard(void)
  */
 /******************************************************************************/
 /*
- * Begin malloc(3)-compatible functions.
+ * Begin allocation-path internal functions and data structures.
  */
 
-static void *
-ialloc_prof_sample(tsd_t *tsd, size_t usize, szind_t ind, bool zero,
-    prof_tctx_t *tctx, bool slow_path)
-{
-	void *p;
+/*
+ * Settings determined by the documented behavior of the allocation functions.
+ */
+typedef struct static_opts_s static_opts_t;
+struct static_opts_s {
+	/* Whether or not allocations of size 0 should be treated as size 1. */
+	bool bump_empty_alloc;
+	/*
+	 * Whether to assert that allocations are not of size 0 (after any
+	 * bumping).
+	 */
+	bool assert_nonempty_alloc;
 
-	if (tctx == NULL)
-		return (NULL);
-	if (usize <= SMALL_MAXCLASS) {
-		szind_t ind_large = size2index(LARGE_MINCLASS);
-		p = ialloc(tsd, LARGE_MINCLASS, ind_large, zero, slow_path);
-		if (p == NULL)
-			return (NULL);
-		arena_prof_promote(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), p), p,
-		    usize);
-	} else
-		p = ialloc(tsd, usize, ind, zero, slow_path);
+	/*
+	 * Whether or not to modify the 'result' argument to malloc in case of
+	 * error.
+	 */
+	bool null_out_result_on_error;
+	/* Whether to set errno when we encounter an error condition. */
+	bool set_errno_on_error;
 
-	return (p);
-}
+	/*
+	 * The minimum valid alignment for functions requesting aligned storage.
+	 */
+	size_t min_alignment;
 
-JEMALLOC_ALWAYS_INLINE_C void *
-ialloc_prof(tsd_t *tsd, size_t usize, szind_t ind, bool zero, bool slow_path)
-{
-	void *p;
-	prof_tctx_t *tctx;
+	/* The error string to use if we oom. */
+	const char *oom_string;
+	/* The error string to use if the passed-in alignment is invalid. */
+	const char *invalid_alignment_string;
 
-	tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true);
-	if (unlikely((uintptr_t)tctx != (uintptr_t)1U))
-		p = ialloc_prof_sample(tsd, usize, ind, zero, tctx, slow_path);
-	else
-		p = ialloc(tsd, usize, ind, zero, slow_path);
-	if (unlikely(p == NULL)) {
-		prof_alloc_rollback(tsd, tctx, true);
-		return (NULL);
-	}
-	prof_malloc(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), p), p, usize, tctx);
+	/*
+	 * False if we're configured to skip some time-consuming operations.
+	 *
+	 * This isn't really a malloc "behavior", but it acts as a useful
+	 * summary of several other static (or at least, static after program
+	 * initialization) options.
+	 */
+	bool slow;
+};
 
-	return (p);
+JEMALLOC_ALWAYS_INLINE_C void
+static_opts_init(static_opts_t *static_opts) {
+	static_opts->bump_empty_alloc = false;
+	static_opts->assert_nonempty_alloc = false;
+	static_opts->null_out_result_on_error = false;
+	static_opts->set_errno_on_error = false;
+	static_opts->min_alignment = 0;
+	static_opts->oom_string = "";
+	static_opts->invalid_alignment_string = "";
+	static_opts->slow = false;
 }
 
 /*
- * ialloc_body() is inlined so that fast and slow paths are generated separately
- * with statically known slow_path.
- *
- * This function guarantees that *tsdn is non-NULL on success.
+ * These correspond to the macros in jemalloc/jemalloc_macros.h.  Broadly, we
+ * should have one constant here per magic value there.  Note however that the
+ * representations need not be related.
  */
-JEMALLOC_ALWAYS_INLINE_C void *
-ialloc_body(size_t size, bool zero, tsdn_t **tsdn, size_t *usize,
-    bool slow_path)
-{
-	tsd_t *tsd;
-	szind_t ind;
+#define TCACHE_IND_NONE ((unsigned)-1)
+#define TCACHE_IND_AUTOMATIC ((unsigned)-2)
+#define ARENA_IND_AUTOMATIC ((unsigned)-1)
 
-	if (slow_path && unlikely(malloc_init())) {
-		*tsdn = NULL;
-		return (NULL);
-	}
-
-	tsd = tsd_fetch();
-	*tsdn = tsd_tsdn(tsd);
-	witness_assert_lockless(tsd_tsdn(tsd));
-
-	ind = size2index(size);
-	if (unlikely(ind >= NSIZES))
-		return (NULL);
-
-	if (config_stats || (config_prof && opt_prof)) {
-		*usize = index2size(ind);
-		assert(*usize > 0 && *usize <= LARGE_MAXCLASS);
-	}
-
-	if (config_prof && opt_prof)
-		return (ialloc_prof(tsd, *usize, ind, zero, slow_path));
-
-	return (ialloc(tsd, size, ind, zero, slow_path));
-}
+typedef struct dynamic_opts_s dynamic_opts_t;
+struct dynamic_opts_s {
+	void **result;
+	size_t num_items;
+	size_t item_size;
+	size_t alignment;
+	bool zero;
+	unsigned tcache_ind;
+	unsigned arena_ind;
+};
 
 JEMALLOC_ALWAYS_INLINE_C void
-ialloc_post_check(void *ret, tsdn_t *tsdn, size_t usize, const char *func,
-    bool update_errno, bool slow_path)
-{
-	assert(!tsdn_null(tsdn) || ret == NULL);
-
-	if (unlikely(ret == NULL)) {
-		if (slow_path && config_xmalloc && unlikely(opt_xmalloc)) {
-			malloc_printf("<jemalloc>: Error in %s(): out of "
-			    "memory\n", func);
-			abort();
-		}
-		if (update_errno)
-			set_errno(ENOMEM);
-	}
-	if (config_stats && likely(ret != NULL)) {
-		assert(usize == isalloc(tsdn, iealloc(tsdn, ret), ret));
-		*tsd_thread_allocatedp_get(tsdn_tsd(tsdn)) += usize;
-	}
-	witness_assert_lockless(tsdn);
+dynamic_opts_init(dynamic_opts_t *dynamic_opts) {
+	dynamic_opts->result = NULL;
+	dynamic_opts->num_items = 0;
+	dynamic_opts->item_size = 0;
+	dynamic_opts->alignment = 0;
+	dynamic_opts->zero = false;
+	dynamic_opts->tcache_ind = TCACHE_IND_AUTOMATIC;
+	dynamic_opts->arena_ind = ARENA_IND_AUTOMATIC;
 }
 
+/* ind is ignored if dopts->alignment > 0. */
+JEMALLOC_ALWAYS_INLINE_C void *
+imalloc_no_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
+    size_t size, size_t usize, szind_t ind) {
+	tcache_t *tcache;
+	arena_t *arena;
+
+	/* Fill in the tcache. */
+	if (dopts->tcache_ind == TCACHE_IND_AUTOMATIC) {
+		tcache = tcache_get(tsd, true);
+	} else if (dopts->tcache_ind == TCACHE_IND_NONE) {
+		tcache = NULL;
+	} else {
+		tcache = tcaches_get(tsd, dopts->tcache_ind);
+	}
+
+	/* Fill in the arena. */
+	if (dopts->arena_ind == ARENA_IND_AUTOMATIC) {
+		/*
+		 * In case of automatic arena management, we defer arena
+		 * computation until as late as we can, hoping to fill the
+		 * allocation out of the tcache.
+		 */
+		arena = NULL;
+	} else {
+		arena = arena_get(tsd_tsdn(tsd), dopts->arena_ind, true);
+	}
+
+	if (unlikely(dopts->alignment != 0)) {
+		return ipalloct(tsd_tsdn(tsd), usize, dopts->alignment,
+		    dopts->zero, tcache, arena);
+	}
+
+	return iallocztm(tsd_tsdn(tsd), size, ind, dopts->zero, tcache, false,
+	    arena, sopts->slow);
+}
+
+JEMALLOC_ALWAYS_INLINE_C void *
+imalloc_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
+    size_t usize, szind_t ind) {
+	void *ret;
+
+	/*
+	 * For small allocations, sampling bumps the usize.  If so, we allocate
+	 * from the ind_large bucket.
+	 */
+	szind_t ind_large;
+	size_t bumped_usize = usize;
+
+	if (usize <= SMALL_MAXCLASS) {
+		assert(((dopts->alignment == 0) ? s2u(LARGE_MINCLASS) :
+		    sa2u(LARGE_MINCLASS, dopts->alignment)) == LARGE_MINCLASS);
+		ind_large = size2index(LARGE_MINCLASS);
+		bumped_usize = s2u(LARGE_MINCLASS);
+		ret = imalloc_no_sample(sopts, dopts, tsd, bumped_usize,
+		    bumped_usize, ind_large);
+		if (unlikely(ret == NULL)) {
+			return NULL;
+		}
+		arena_prof_promote(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), ret),
+		    ret, usize);
+	} else {
+		ret = imalloc_no_sample(sopts, dopts, tsd, usize, usize, ind);
+	}
+
+	return ret;
+}
+
+/*
+ * Returns true if the allocation will overflow, and false otherwise.  Sets
+ * *size to the product either way.
+ */
+JEMALLOC_ALWAYS_INLINE_C bool
+compute_size_with_overflow(dynamic_opts_t *dopts, size_t *size) {
+	/*
+	 * This function is just num_items * item_size, except that we have to
+	 * check for overflow.
+	 */
+
+	/* A size_t with its high-half bits all set to 1. */
+	const static size_t high_bits = SIZE_T_MAX >> (sizeof(size_t) * 8 / 2);
+
+	*size = dopts->item_size * dopts->num_items;
+
+	if (unlikely(*size == 0)) {
+		return (dopts->num_items != 0 && dopts->item_size != 0);
+	}
+
+	/*
+	 * We got a non-zero size, but we don't know if we overflowed to get
+	 * there.  To avoid having to do a divide, we'll be clever and note that
+	 * if both A and B can be represented in N/2 bits, then their product
+	 * can be represented in N bits (without the possibility of overflow).
+	 */
+	if (likely((high_bits & (dopts->num_items | dopts->item_size)) == 0)) {
+		return false;
+	}
+	if (likely(*size / dopts->item_size == dopts->num_items)) {
+		return false;
+	}
+	return true;
+}
+
+JEMALLOC_ALWAYS_INLINE_C int
+imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts) {
+	/* Where the actual allocated memory will live. */
+	void *allocation = NULL;
+	/* Filled in by compute_size_with_overflow below. */
+	size_t size = 0;
+	/* We compute a value for this right before allocating. */
+	tsd_t *tsd = NULL;
+	/*
+	 * For unaligned allocations, we need only ind.  For aligned
+	 * allocations, or in case of stats or profiling we need usize.
+	 *
+	 * These are actually dead stores, in that their values are reset before
+	 * any branch on their value is taken.  Sometimes though, it's
+	 * convenient to pass them as arguments before this point.  To avoid
+	 * undefined behavior then, we initialize them with dummy stores.
+	 */
+	szind_t ind = 0;
+	size_t usize = 0;
+
+	/* Initialize (if we can't prove we don't have to). */
+	if (sopts->slow) {
+		if (unlikely(malloc_init())) {
+			goto label_oom;
+		}
+	}
+
+	/* Compute the amount of memory the user wants. */
+	bool overflow = compute_size_with_overflow(dopts, &size);
+	if (unlikely(overflow)) {
+		goto label_oom;
+	}
+
+	/* Validate the user input. */
+	if (sopts->bump_empty_alloc) {
+		if (unlikely(size == 0)) {
+			size = 1;
+		}
+	}
+
+	if (sopts->assert_nonempty_alloc) {
+		assert (size != 0);
+	}
+
+	if (unlikely(dopts->alignment < sopts->min_alignment
+	    || (dopts->alignment & (dopts->alignment - 1)) != 0)) {
+		goto label_invalid_alignment;
+	}
+
+	/* This is the beginning of the "core" algorithm. */
+
+	if (dopts->alignment == 0) {
+		ind = size2index(size);
+		if (unlikely(ind >= NSIZES)) {
+			goto label_oom;
+		}
+		if (config_stats || (config_prof && opt_prof)) {
+			usize = index2size(ind);
+			assert(usize > 0 && usize <= LARGE_MAXCLASS);
+		}
+	} else {
+		usize = sa2u(size, dopts->alignment);
+		if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
+			goto label_oom;
+		}
+	}
+
+	/*
+	 * We always need the tsd, even if we aren't going to use the tcache for
+	 * some reason.  Let's grab it right away.
+	 */
+	tsd = tsd_fetch();
+	witness_assert_lockless(tsd_tsdn(tsd));
+
+
+	/* If profiling is on, get our profiling context. */
+	if (config_prof && opt_prof) {
+		/*
+		 * Note that if we're going down this path, usize must have been
+		 * initialized in the previous if statement.
+		 */
+		prof_tctx_t *tctx = prof_alloc_prep(
+		    tsd, usize, prof_active_get_unlocked(), true);
+		if (likely((uintptr_t)tctx == (uintptr_t)1U)) {
+			allocation = imalloc_no_sample(
+			    sopts, dopts, tsd, usize, usize, ind);
+		} else if ((uintptr_t)tctx > (uintptr_t)1U) {
+			/*
+			 * Note that ind might still be 0 here.  This is fine;
+			 * imalloc_sample ignores ind if dopts->alignment > 0.
+			 */
+			allocation = imalloc_sample(
+			    sopts, dopts, tsd, usize, ind);
+		} else {
+			allocation = NULL;
+		}
+
+		if (unlikely(allocation == NULL)) {
+			prof_alloc_rollback(tsd, tctx, true);
+			goto label_oom;
+		}
+
+		prof_malloc(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), allocation),
+		    allocation, usize, tctx);
+
+	} else {
+		/*
+		 * If dopts->alignment > 0, then ind is still 0, but usize was
+		 * computed in the previous if statement.  Down the positive
+		 * alignment path, imalloc_no_sample ind and size (relying only
+		 * on usize).
+		 */
+		allocation = imalloc_no_sample(sopts, dopts, tsd, usize, usize,
+		    ind);
+		if (unlikely(allocation == NULL)) {
+			goto label_oom;
+		}
+	}
+
+	/*
+	 * Allocation has been done at this point.  We still have some
+	 * post-allocation work to do though.
+	 */
+	assert(dopts->alignment == 0
+	    || ((uintptr_t)allocation & (dopts->alignment - 1)) == ZU(0));
+
+	if (config_stats) {
+		assert(usize == isalloc(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd),
+		    allocation), allocation));
+		*tsd_thread_allocatedp_get(tsd) += usize;
+	}
+
+	if (sopts->slow) {
+		UTRACE(0, size, allocation);
+	}
+
+	witness_assert_lockless(tsd_tsdn(tsd));
+
+
+
+	/* Success! */
+	*dopts->result = allocation;
+	return 0;
+
+label_oom:
+	if (unlikely(sopts->slow) && config_xmalloc && unlikely(opt_xmalloc)) {
+		malloc_write(sopts->oom_string);
+		abort();
+	}
+
+	if (sopts->slow) {
+		UTRACE(NULL, size, NULL);
+	}
+
+	witness_assert_lockless(tsd_tsdn(tsd));
+
+	if (sopts->set_errno_on_error) {
+		set_errno(ENOMEM);
+	}
+
+	if (sopts->null_out_result_on_error) {
+		*dopts->result = NULL;
+	}
+
+	return ENOMEM;
+
+	/*
+	 * This label is only jumped to by one goto; we move it out of line
+	 * anyways to avoid obscuring the non-error paths, and for symmetry with
+	 * the oom case.
+	 */
+label_invalid_alignment:
+	if (config_xmalloc && unlikely(opt_xmalloc)) {
+		malloc_write(sopts->invalid_alignment_string);
+		abort();
+	}
+
+	if (sopts->set_errno_on_error) {
+		set_errno(EINVAL);
+	}
+
+	if (sopts->slow) {
+		UTRACE(NULL, size, NULL);
+	}
+
+	witness_assert_lockless(tsd_tsdn(tsd));
+
+	if (sopts->null_out_result_on_error) {
+		*dopts->result = NULL;
+	}
+
+	return EINVAL;
+}
+
+/* Returns the errno-style error code of the allocation. */
+JEMALLOC_ALWAYS_INLINE_C int
+imalloc(static_opts_t *sopts, dynamic_opts_t *dopts) {
+	if (unlikely(malloc_slow)) {
+		sopts->slow = true;
+		return imalloc_body(sopts, dopts);
+	} else {
+		sopts->slow = false;
+		return imalloc_body(sopts, dopts);
+	}
+}
+/******************************************************************************/
+/*
+ * Begin malloc(3)-compatible functions.
+ */
+
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
 void JEMALLOC_NOTHROW *
 JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)
 je_malloc(size_t size)
 {
 	void *ret;
-	tsdn_t *tsdn;
-	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
+	static_opts_t sopts;
+	dynamic_opts_t dopts;
 
-	if (size == 0)
-		size = 1;
+	static_opts_init(&sopts);
+	dynamic_opts_init(&dopts);
 
-	if (likely(!malloc_slow)) {
-		ret = ialloc_body(size, false, &tsdn, &usize, false);
-		ialloc_post_check(ret, tsdn, usize, "malloc", true, false);
-	} else {
-		ret = ialloc_body(size, false, &tsdn, &usize, true);
-		ialloc_post_check(ret, tsdn, usize, "malloc", true, true);
-		UTRACE(0, size, ret);
-	}
+	sopts.bump_empty_alloc = true;
+	sopts.null_out_result_on_error = true;
+	sopts.set_errno_on_error = true;
+	sopts.oom_string = "<jemalloc>: Error in malloc(): out of memory\n";
 
-	return (ret);
-}
+	dopts.result = &ret;
+	dopts.num_items = 1;
+	dopts.item_size = size;
 
-static void *
-imemalign_prof_sample(tsd_t *tsd, size_t alignment, size_t usize,
-    prof_tctx_t *tctx)
-{
-	void *p;
+	imalloc(&sopts, &dopts);
 
-	if (tctx == NULL)
-		return (NULL);
-	if (usize <= SMALL_MAXCLASS) {
-		assert(sa2u(LARGE_MINCLASS, alignment) == LARGE_MINCLASS);
-		p = ipalloc(tsd, LARGE_MINCLASS, alignment, false);
-		if (p == NULL)
-			return (NULL);
-		arena_prof_promote(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), p), p,
-		    usize);
-	} else
-		p = ipalloc(tsd, usize, alignment, false);
-
-	return (p);
-}
-
-JEMALLOC_ALWAYS_INLINE_C void *
-imemalign_prof(tsd_t *tsd, size_t alignment, size_t usize)
-{
-	void *p;
-	prof_tctx_t *tctx;
-
-	tctx = prof_alloc_prep(tsd, usize, prof_active_get_unlocked(), true);
-	if (unlikely((uintptr_t)tctx != (uintptr_t)1U))
-		p = imemalign_prof_sample(tsd, alignment, usize, tctx);
-	else
-		p = ipalloc(tsd, usize, alignment, false);
-	if (unlikely(p == NULL)) {
-		prof_alloc_rollback(tsd, tctx, true);
-		return (NULL);
-	}
-	prof_malloc(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), p), p, usize, tctx);
-
-	return (p);
-}
-
-JEMALLOC_ATTR(nonnull(1))
-static int
-imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment)
-{
-	int ret;
-	tsd_t *tsd;
-	size_t usize;
-	void *result;
-
-	assert(min_alignment != 0);
-
-	if (unlikely(malloc_init())) {
-		tsd = NULL;
-		result = NULL;
-		goto label_oom;
-	}
-	tsd = tsd_fetch();
-	witness_assert_lockless(tsd_tsdn(tsd));
-	if (size == 0)
-		size = 1;
-
-	/* Make sure that alignment is a large enough power of 2. */
-	if (unlikely(((alignment - 1) & alignment) != 0
-	    || (alignment < min_alignment))) {
-		if (config_xmalloc && unlikely(opt_xmalloc)) {
-			malloc_write("<jemalloc>: Error allocating "
-			    "aligned memory: invalid alignment\n");
-			abort();
-		}
-		result = NULL;
-		ret = EINVAL;
-		goto label_return;
-	}
-
-	usize = sa2u(size, alignment);
-	if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
-		result = NULL;
-		goto label_oom;
-	}
-
-	if (config_prof && opt_prof)
-		result = imemalign_prof(tsd, alignment, usize);
-	else
-		result = ipalloc(tsd, usize, alignment, false);
-	if (unlikely(result == NULL))
-		goto label_oom;
-	assert(((uintptr_t)result & (alignment - 1)) == ZU(0));
-
-	*memptr = result;
-	ret = 0;
-label_return:
-	if (config_stats && likely(result != NULL)) {
-		assert(usize == isalloc(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd),
-		    result), result));
-		*tsd_thread_allocatedp_get(tsd) += usize;
-	}
-	UTRACE(0, size, result);
-	witness_assert_lockless(tsd_tsdn(tsd));
-	return (ret);
-label_oom:
-	assert(result == NULL);
-	if (config_xmalloc && unlikely(opt_xmalloc)) {
-		malloc_write("<jemalloc>: Error allocating aligned memory: "
-		    "out of memory\n");
-		abort();
-	}
-	ret = ENOMEM;
-	witness_assert_lockless(tsd_tsdn(tsd));
-	goto label_return;
+	return ret;
 }
 
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 JEMALLOC_ATTR(nonnull(1))
 je_posix_memalign(void **memptr, size_t alignment, size_t size)
 {
-	return (imemalign(memptr, alignment, size, sizeof(void *)));
+	int ret;
+	static_opts_t sopts;
+	dynamic_opts_t dopts;
+
+	static_opts_init(&sopts);
+	dynamic_opts_init(&dopts);
+
+	sopts.bump_empty_alloc = true;
+	sopts.min_alignment = sizeof(void *);
+	sopts.oom_string =
+	    "<jemalloc>: Error allocating aligned memory: out of memory\n";
+	sopts.invalid_alignment_string =
+	    "<jemalloc>: Error allocating aligned memory: invalid alignment\n";
+
+	dopts.result = memptr;
+	dopts.num_items = 1;
+	dopts.item_size = size;
+	dopts.alignment = alignment;
+
+	ret = imalloc(&sopts, &dopts);
+	return ret;
 }
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
@@ -1604,12 +1811,28 @@ JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(2)
 je_aligned_alloc(size_t alignment, size_t size)
 {
 	void *ret;
-	int err;
 
-	if (unlikely((err = imemalign(&ret, alignment, size, 1)) != 0)) {
-		ret = NULL;
-		set_errno(err);
-	}
+	static_opts_t sopts;
+	dynamic_opts_t dopts;
+
+	static_opts_init(&sopts);
+	dynamic_opts_init(&dopts);
+
+	sopts.bump_empty_alloc = true;
+	sopts.null_out_result_on_error = true;
+	sopts.set_errno_on_error = true;
+	sopts.min_alignment = 1;
+	sopts.oom_string =
+	    "<jemalloc>: Error allocating aligned memory: out of memory\n";
+	sopts.invalid_alignment_string =
+	    "<jemalloc>: Error allocating aligned memory: invalid alignment\n";
+
+	dopts.result = &ret;
+	dopts.num_items = 1;
+	dopts.item_size = size;
+	dopts.alignment = alignment;
+
+	imalloc(&sopts, &dopts);
 	return (ret);
 }
 
@@ -1619,35 +1842,25 @@ JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2)
 je_calloc(size_t num, size_t size)
 {
 	void *ret;
-	tsdn_t *tsdn;
-	size_t num_size;
-	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
+	static_opts_t sopts;
+	dynamic_opts_t dopts;
 
-	num_size = num * size;
-	if (unlikely(num_size == 0)) {
-		if (num == 0 || size == 0)
-			num_size = 1;
-		else
-			num_size = LARGE_MAXCLASS + 1; /* Trigger OOM. */
-	/*
-	 * Try to avoid division here.  We know that it isn't possible to
-	 * overflow during multiplication if neither operand uses any of the
-	 * most significant half of the bits in a size_t.
-	 */
-	} else if (unlikely(((num | size) & (SIZE_T_MAX << (sizeof(size_t) <<
-	    2))) && (num_size / size != num)))
-		num_size = LARGE_MAXCLASS + 1; /* size_t overflow. */
+	static_opts_init(&sopts);
+	dynamic_opts_init(&dopts);
 
-	if (likely(!malloc_slow)) {
-		ret = ialloc_body(num_size, true, &tsdn, &usize, false);
-		ialloc_post_check(ret, tsdn, usize, "calloc", true, false);
-	} else {
-		ret = ialloc_body(num_size, true, &tsdn, &usize, true);
-		ialloc_post_check(ret, tsdn, usize, "calloc", true, true);
-		UTRACE(0, num_size, ret);
-	}
+	sopts.bump_empty_alloc = true;
+	sopts.null_out_result_on_error = true;
+	sopts.set_errno_on_error = true;
+	sopts.oom_string = "<jemalloc>: Error in calloc(): out of memory\n";
 
-	return (ret);
+	dopts.result = &ret;
+	dopts.num_items = num;
+	dopts.item_size = size;
+	dopts.zero = true;
+
+	imalloc(&sopts, &dopts);
+
+	return ret;
 }
 
 static void *
@@ -1795,11 +2008,7 @@ je_realloc(void *ptr, size_t size)
 		tsdn = tsd_tsdn(tsd);
 	} else {
 		/* realloc(NULL, size) is equivalent to malloc(size). */
-		if (likely(!malloc_slow))
-			ret = ialloc_body(size, false, &tsdn, &usize, false);
-		else
-			ret = ialloc_body(size, false, &tsdn, &usize, true);
-		assert(!tsdn_null(tsdn) || ret == NULL);
+		return je_malloc(size);
 	}
 
 	if (unlikely(ret == NULL)) {
@@ -1852,10 +2061,28 @@ void JEMALLOC_NOTHROW *
 JEMALLOC_ATTR(malloc)
 je_memalign(size_t alignment, size_t size)
 {
-	void *ret JEMALLOC_CC_SILENCE_INIT(NULL);
-	if (unlikely(imemalign(&ret, alignment, size, 1) != 0))
-		ret = NULL;
-	return (ret);
+	void *ret;
+	static_opts_t sopts;
+	dynamic_opts_t dopts;
+
+	static_opts_init(&sopts);
+	dynamic_opts_init(&dopts);
+
+	sopts.bump_empty_alloc = true;
+	sopts.min_alignment = 1;
+	sopts.oom_string =
+	    "<jemalloc>: Error allocating aligned memory: out of memory\n";
+	sopts.invalid_alignment_string =
+	    "<jemalloc>: Error allocating aligned memory: invalid alignment\n";
+	sopts.null_out_result_on_error = true;
+
+	dopts.result = &ret;
+	dopts.num_items = 1;
+	dopts.item_size = size;
+	dopts.alignment = alignment;
+
+	imalloc(&sopts, &dopts);
+	return ret;
 }
 #endif
 
@@ -1865,9 +2092,29 @@ void JEMALLOC_NOTHROW *
 JEMALLOC_ATTR(malloc)
 je_valloc(size_t size)
 {
-	void *ret JEMALLOC_CC_SILENCE_INIT(NULL);
-	if (unlikely(imemalign(&ret, PAGE, size, 1) != 0))
-		ret = NULL;
+	void *ret;
+
+	static_opts_t sopts;
+	dynamic_opts_t dopts;
+
+	static_opts_init(&sopts);
+	dynamic_opts_init(&dopts);
+
+	sopts.bump_empty_alloc = true;
+	sopts.null_out_result_on_error = true;
+	sopts.min_alignment = PAGE;
+	sopts.oom_string =
+	    "<jemalloc>: Error allocating aligned memory: out of memory\n";
+	sopts.invalid_alignment_string =
+	    "<jemalloc>: Error allocating aligned memory: invalid alignment\n";
+
+	dopts.result = &ret;
+	dopts.num_items = 1;
+	dopts.item_size = size;
+	dopts.alignment = PAGE;
+
+	imalloc(&sopts, &dopts);
+
 	return (ret);
 }
 #endif
@@ -1930,183 +2177,49 @@ int	__posix_memalign(void** r, size_t a, size_t s)
  * Begin non-standard functions.
  */
 
-JEMALLOC_ALWAYS_INLINE_C bool
-imallocx_flags_decode(tsd_t *tsd, size_t size, int flags, size_t *usize,
-    size_t *alignment, bool *zero, tcache_t **tcache, arena_t **arena)
-{
-	if ((flags & MALLOCX_LG_ALIGN_MASK) == 0) {
-		*alignment = 0;
-		*usize = s2u(size);
-	} else {
-		*alignment = MALLOCX_ALIGN_GET_SPECIFIED(flags);
-		*usize = sa2u(size, *alignment);
-	}
-	if (unlikely(*usize == 0 || *usize > LARGE_MAXCLASS))
-		return (true);
-	*zero = MALLOCX_ZERO_GET(flags);
-	if ((flags & MALLOCX_TCACHE_MASK) != 0) {
-		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE)
-			*tcache = NULL;
-		else
-			*tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags));
-	} else
-		*tcache = tcache_get(tsd, true);
-	if ((flags & MALLOCX_ARENA_MASK) != 0) {
-		unsigned arena_ind = MALLOCX_ARENA_GET(flags);
-		*arena = arena_get(tsd_tsdn(tsd), arena_ind, true);
-		if (unlikely(*arena == NULL))
-			return (true);
-	} else
-		*arena = NULL;
-	return (false);
-}
-
-JEMALLOC_ALWAYS_INLINE_C void *
-imallocx_flags(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
-    tcache_t *tcache, arena_t *arena, bool slow_path)
-{
-	szind_t ind;
-
-	if (unlikely(alignment != 0))
-		return (ipalloct(tsdn, usize, alignment, zero, tcache, arena));
-	ind = size2index(usize);
-	assert(ind < NSIZES);
-	return (iallocztm(tsdn, usize, ind, zero, tcache, false, arena,
-	    slow_path));
-}
-
-static void *
-imallocx_prof_sample(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
-    tcache_t *tcache, arena_t *arena, bool slow_path)
-{
-	void *p;
-
-	if (usize <= SMALL_MAXCLASS) {
-		assert(((alignment == 0) ? s2u(LARGE_MINCLASS) :
-		    sa2u(LARGE_MINCLASS, alignment)) == LARGE_MINCLASS);
-		p = imallocx_flags(tsdn, LARGE_MINCLASS, alignment, zero,
-		    tcache, arena, slow_path);
-		if (p == NULL)
-			return (NULL);
-		arena_prof_promote(tsdn, iealloc(tsdn, p), p, usize);
-	} else
-		p = imallocx_flags(tsdn, usize, alignment, zero, tcache, arena,
-		    slow_path);
-
-	return (p);
-}
-
-JEMALLOC_ALWAYS_INLINE_C void *
-imallocx_prof(tsd_t *tsd, size_t size, int flags, size_t *usize, bool slow_path)
-{
-	void *p;
-	size_t alignment;
-	bool zero;
-	tcache_t *tcache;
-	arena_t *arena;
-	prof_tctx_t *tctx;
-
-	if (unlikely(imallocx_flags_decode(tsd, size, flags, usize, &alignment,
-	    &zero, &tcache, &arena)))
-		return (NULL);
-	tctx = prof_alloc_prep(tsd, *usize, prof_active_get_unlocked(), true);
-	if (likely((uintptr_t)tctx == (uintptr_t)1U)) {
-		p = imallocx_flags(tsd_tsdn(tsd), *usize, alignment, zero,
-		    tcache, arena, slow_path);
-	} else if ((uintptr_t)tctx > (uintptr_t)1U) {
-		p = imallocx_prof_sample(tsd_tsdn(tsd), *usize, alignment, zero,
-		    tcache, arena, slow_path);
-	} else
-		p = NULL;
-	if (unlikely(p == NULL)) {
-		prof_alloc_rollback(tsd, tctx, true);
-		return (NULL);
-	}
-	prof_malloc(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), p), p, *usize, tctx);
-
-	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
-	return (p);
-}
-
-JEMALLOC_ALWAYS_INLINE_C void *
-imallocx_no_prof(tsd_t *tsd, size_t size, int flags, size_t *usize,
-    bool slow_path)
-{
-	void *p;
-	size_t alignment;
-	bool zero;
-	tcache_t *tcache;
-	arena_t *arena;
-
-	if (unlikely(imallocx_flags_decode(tsd, size, flags, usize, &alignment,
-	    &zero, &tcache, &arena)))
-		return (NULL);
-	p = imallocx_flags(tsd_tsdn(tsd), *usize, alignment, zero, tcache,
-	    arena, slow_path);
-	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
-	return (p);
-}
-
-/* This function guarantees that *tsdn is non-NULL on success. */
-JEMALLOC_ALWAYS_INLINE_C void *
-imallocx_body(size_t size, int flags, tsdn_t **tsdn, size_t *usize,
-    bool slow_path)
-{
-	tsd_t *tsd;
-
-	if (slow_path && unlikely(malloc_init())) {
-		*tsdn = NULL;
-		return (NULL);
-	}
-
-	tsd = tsd_fetch();
-	*tsdn = tsd_tsdn(tsd);
-	witness_assert_lockless(tsd_tsdn(tsd));
-
-	if (likely(flags == 0)) {
-		szind_t ind = size2index(size);
-		if (unlikely(ind >= NSIZES))
-			return (NULL);
-		if (config_stats || (config_prof && opt_prof)) {
-			*usize = index2size(ind);
-			assert(*usize > 0 && *usize <= LARGE_MAXCLASS);
-		}
-
-		if (config_prof && opt_prof) {
-			return (ialloc_prof(tsd, *usize, ind, false,
-			    slow_path));
-		}
-
-		return (ialloc(tsd, size, ind, false, slow_path));
-	}
-
-	if (config_prof && opt_prof)
-		return (imallocx_prof(tsd, size, flags, usize, slow_path));
-
-	return (imallocx_no_prof(tsd, size, flags, usize, slow_path));
-}
-
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
 void JEMALLOC_NOTHROW *
 JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)
 je_mallocx(size_t size, int flags)
 {
-	tsdn_t *tsdn;
-	void *p;
-	size_t usize;
+	void *ret;
+	static_opts_t sopts;
+	dynamic_opts_t dopts;
 
-	assert(size != 0);
+	static_opts_init(&sopts);
+	dynamic_opts_init(&dopts);
 
-	if (likely(!malloc_slow)) {
-		p = imallocx_body(size, flags, &tsdn, &usize, false);
-		ialloc_post_check(p, tsdn, usize, "mallocx", false, false);
-	} else {
-		p = imallocx_body(size, flags, &tsdn, &usize, true);
-		ialloc_post_check(p, tsdn, usize, "mallocx", false, true);
-		UTRACE(0, size, p);
+	sopts.assert_nonempty_alloc = true;
+	sopts.null_out_result_on_error = true;
+	sopts.oom_string = "<jemalloc>: Error in mallocx(): out of memory\n";
+
+	dopts.result = &ret;
+	dopts.num_items = 1;
+	dopts.item_size = size;
+	if (unlikely(flags != 0)) {
+		if ((flags & MALLOCX_LG_ALIGN_MASK) != 0) {
+			dopts.alignment = MALLOCX_ALIGN_GET_SPECIFIED(flags);
+		}
+
+		dopts.zero = MALLOCX_ZERO_GET(flags);
+
+		if ((flags & MALLOCX_TCACHE_MASK) != 0) {
+			if ((flags & MALLOCX_TCACHE_MASK)
+			    == MALLOCX_TCACHE_NONE) {
+				dopts.tcache_ind = TCACHE_IND_NONE;
+			} else {
+				dopts.tcache_ind = MALLOCX_TCACHE_GET(flags);
+			}
+		} else {
+			dopts.tcache_ind = TCACHE_IND_AUTOMATIC;
+		}
+
+		if ((flags & MALLOCX_ARENA_MASK) != 0)
+			dopts.arena_ind = MALLOCX_ARENA_GET(flags);
 	}
 
-	return (p);
+	imalloc(&sopts, &dopts);
+	return ret;
 }
 
 static void *

From c4c2592c834d8a37beb0a0d53842095160cbf9ee Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 15 Jan 2017 16:56:30 -0800
Subject: [PATCH 0602/2608] Update brace style.

Add braces around single-line blocks, and remove line breaks before
function-opening braces.

This resolves #537.
---
 include/jemalloc/internal/arena_inlines_a.h   |  27 +-
 include/jemalloc/internal/arena_inlines_b.h   |  59 +-
 include/jemalloc/internal/assert.h            |   3 +-
 include/jemalloc/internal/atomic_inlines.h    | 180 ++----
 include/jemalloc/internal/base_inlines.h      |   3 +-
 include/jemalloc/internal/bitmap_inlines.h    |  24 +-
 include/jemalloc/internal/extent_inlines.h    | 111 ++--
 include/jemalloc/internal/hash_inlines.h      |  30 +-
 .../jemalloc/internal/jemalloc_internal.h.in  | 172 +++---
 .../internal/jemalloc_internal_decls.h        |   3 +-
 include/jemalloc/internal/mutex_inlines.h     |  18 +-
 include/jemalloc/internal/ph.h                |  50 +-
 include/jemalloc/internal/prng_inlines.h      |  27 +-
 include/jemalloc/internal/prof_inlines.h      |  59 +-
 include/jemalloc/internal/qr.h                |   6 +-
 include/jemalloc/internal/rb.h                |   3 +-
 include/jemalloc/internal/rtree_inlines.h     |  99 ++-
 include/jemalloc/internal/spin_inlines.h      |  12 +-
 include/jemalloc/internal/tcache_inlines.h    |  90 +--
 include/jemalloc/internal/ticker_inlines.h    |  15 +-
 include/jemalloc/internal/tsd_inlines.h       |  42 +-
 include/jemalloc/internal/tsd_types.h         | 161 +++--
 include/jemalloc/internal/util_inlines.h      |  48 +-
 include/jemalloc/internal/util_types.h        |   3 +-
 include/jemalloc/internal/witness_inlines.h   |  75 ++-
 include/msvc_compat/strings.h                 |  20 +-
 .../vc2015/test_threads/test_threads.cpp      |   3 +-
 .../vc2015/test_threads/test_threads_main.cpp |   3 +-
 src/arena.c                                   | 516 ++++++++--------
 src/base.c                                    |  99 ++-
 src/bitmap.c                                  |  30 +-
 src/ckh.c                                     | 101 ++--
 src/ctl.c                                     | 344 +++++------
 src/extent.c                                  | 375 ++++++------
 src/extent_dss.c                              |  68 ++-
 src/extent_mmap.c                             |  22 +-
 src/jemalloc.c                                | 497 +++++++--------
 src/jemalloc_cpp.cpp                          |  33 +-
 src/large.c                                   |  87 ++-
 src/mutex.c                                   |  40 +-
 src/nstime.c                                  |  57 +-
 src/pages.c                                   |  85 +--
 src/prof.c                                    | 568 +++++++++---------
 src/rtree.c                                   |  70 +--
 src/stats.c                                   |  45 +-
 src/tcache.c                                  | 120 ++--
 src/tsd.c                                     |  42 +-
 src/util.c                                    |  96 +--
 src/witness.c                                 |  27 +-
 src/zone.c                                    |  96 ++-
 test/include/test/SFMT.h                      |  34 +-
 test/include/test/btalloc.h                   |   7 +-
 test/include/test/extent_hooks.h              |  51 +-
 test/include/test/jemalloc_test.h.in          |   3 +-
 test/include/test/math.h                      |  50 +-
 test/include/test/mq.h                        |  27 +-
 test/include/test/test.h                      |   3 +-
 test/integration/MALLOCX_ARENA.c              |  12 +-
 test/integration/aligned_alloc.c              |  21 +-
 test/integration/allocated.c                  |  24 +-
 test/integration/cpp/basic.cpp                |   6 +-
 test/integration/extent.c                     |  18 +-
 test/integration/mallocx.c                    |  42 +-
 test/integration/overflow.c                   |   6 +-
 test/integration/posix_memalign.c             |  21 +-
 test/integration/rallocx.c                    |  33 +-
 test/integration/sdallocx.c                   |  15 +-
 test/integration/thread_arena.c               |   9 +-
 test/integration/thread_tcache_enabled.c      |  12 +-
 test/integration/xallocx.c                    |  63 +-
 test/src/btalloc.c                            |   3 +-
 test/src/mq.c                                 |   3 +-
 test/src/mtx.c                                |  19 +-
 test/src/test.c                               |  30 +-
 test/src/thd.c                                |  18 +-
 test/src/timer.c                              |  18 +-
 test/stress/microbench.c                      |  57 +-
 test/unit/SFMT.c                              |  15 +-
 test/unit/a0.c                                |   6 +-
 test/unit/arena_reset.c                       |  69 +--
 test/unit/atomic.c                            |  18 +-
 test/unit/base.c                              |  12 +-
 test/unit/bitmap.c                            |  51 +-
 test/unit/ckh.c                               |  18 +-
 test/unit/decay.c                             |  27 +-
 test/unit/extent_quantize.c                   |  12 +-
 test/unit/fork.c                              |   9 +-
 test/unit/hash.c                              |  27 +-
 test/unit/junk.c                              |  33 +-
 test/unit/mallctl.c                           |  75 +--
 test/unit/math.c                              |  33 +-
 test/unit/mq.c                                |  21 +-
 test/unit/mtx.c                               |  18 +-
 test/unit/nstime.c                            |  36 +-
 test/unit/pack.c                              |  27 +-
 test/unit/pages.c                             |   6 +-
 test/unit/ph.c                                |  57 +-
 test/unit/prng.c                              |  51 +-
 test/unit/prof_accum.c                        |  18 +-
 test/unit/prof_active.c                       |  27 +-
 test/unit/prof_gdump.c                        |   9 +-
 test/unit/prof_idump.c                        |   9 +-
 test/unit/prof_reset.c                        |  36 +-
 test/unit/prof_tctx.c                         |   6 +-
 test/unit/prof_thread_name.c                  |  21 +-
 test/unit/ql.c                                |  42 +-
 test/unit/qr.c                                |  42 +-
 test/unit/rb.c                                |  84 +--
 test/unit/rtree.c                             |  39 +-
 test/unit/size_classes.c                      |  15 +-
 test/unit/slab.c                              |   6 +-
 test/unit/smoothstep.c                        |  15 +-
 test/unit/stats.c                             |  30 +-
 test/unit/stats_print.c                       |  51 +-
 test/unit/ticker.c                            |  12 +-
 test/unit/tsd.c                               |  15 +-
 test/unit/util.c                              |  24 +-
 test/unit/witness.c                           |  40 +-
 test/unit/zero.c                              |  12 +-
 119 files changed, 2971 insertions(+), 3572 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_a.h b/include/jemalloc/internal/arena_inlines_a.h
index d241b8a1..3c2b9b0a 100644
--- a/include/jemalloc/internal/arena_inlines_a.h
+++ b/include/jemalloc/internal/arena_inlines_a.h
@@ -14,32 +14,27 @@ bool	arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes);
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
 
 JEMALLOC_INLINE unsigned
-arena_ind_get(const arena_t *arena)
-{
+arena_ind_get(const arena_t *arena) {
 	return (base_ind_get(arena->base));
 }
 
 JEMALLOC_INLINE void
-arena_internal_add(arena_t *arena, size_t size)
-{
+arena_internal_add(arena_t *arena, size_t size) {
 	atomic_add_zu(&arena->stats.internal, size);
 }
 
 JEMALLOC_INLINE void
-arena_internal_sub(arena_t *arena, size_t size)
-{
+arena_internal_sub(arena_t *arena, size_t size) {
 	atomic_sub_zu(&arena->stats.internal, size);
 }
 
 JEMALLOC_INLINE size_t
-arena_internal_get(arena_t *arena)
-{
+arena_internal_get(arena_t *arena) {
 	return (atomic_read_zu(&arena->stats.internal));
 }
 
 JEMALLOC_INLINE bool
-arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes)
-{
+arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes) {
 	cassert(config_prof);
 	assert(prof_interval != 0);
 
@@ -52,22 +47,22 @@ arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes)
 }
 
 JEMALLOC_INLINE bool
-arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes)
-{
+arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes) {
 	cassert(config_prof);
 
-	if (likely(prof_interval == 0))
+	if (likely(prof_interval == 0)) {
 		return (false);
+	}
 	return (arena_prof_accum_impl(arena, accumbytes));
 }
 
 JEMALLOC_INLINE bool
-arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes)
-{
+arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) {
 	cassert(config_prof);
 
-	if (likely(prof_interval == 0))
+	if (likely(prof_interval == 0)) {
 		return (false);
+	}
 
 	{
 		bool ret;
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 94614668..5772781d 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -23,39 +23,37 @@ void	arena_sdalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t size,
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
 JEMALLOC_INLINE szind_t
-arena_bin_index(arena_t *arena, arena_bin_t *bin)
-{
+arena_bin_index(arena_t *arena, arena_bin_t *bin) {
 	szind_t binind = (szind_t)(bin - arena->bins);
 	assert(binind < NBINS);
 	return (binind);
 }
 
 JEMALLOC_INLINE prof_tctx_t *
-arena_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, const void *ptr)
-{
+arena_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, const void *ptr) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	if (unlikely(!extent_slab_get(extent)))
+	if (unlikely(!extent_slab_get(extent))) {
 		return (large_prof_tctx_get(tsdn, extent));
+	}
 	return ((prof_tctx_t *)(uintptr_t)1U);
 }
 
 JEMALLOC_INLINE void
 arena_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr,
-    size_t usize, prof_tctx_t *tctx)
-{
+    size_t usize, prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	if (unlikely(!extent_slab_get(extent)))
+	if (unlikely(!extent_slab_get(extent))) {
 		large_prof_tctx_set(tsdn, extent, tctx);
+	}
 }
 
 JEMALLOC_INLINE void
 arena_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr,
-    prof_tctx_t *tctx)
-{
+    prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 	assert(!extent_slab_get(extent));
@@ -64,24 +62,25 @@ arena_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks)
-{
+arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) {
 	tsd_t *tsd;
 	ticker_t *decay_ticker;
 
-	if (unlikely(tsdn_null(tsdn)))
+	if (unlikely(tsdn_null(tsdn))) {
 		return;
+	}
 	tsd = tsdn_tsd(tsdn);
 	decay_ticker = decay_ticker_get(tsd, arena_ind_get(arena));
-	if (unlikely(decay_ticker == NULL))
+	if (unlikely(decay_ticker == NULL)) {
 		return;
-	if (unlikely(ticker_ticks(decay_ticker, nticks)))
+	}
+	if (unlikely(ticker_ticks(decay_ticker, nticks))) {
 		arena_purge(tsdn, arena, false);
+	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_decay_tick(tsdn_t *tsdn, arena_t *arena)
-{
+arena_decay_tick(tsdn_t *tsdn, arena_t *arena) {
 	malloc_mutex_assert_not_owner(tsdn, &arena->lock);
 
 	arena_decay_ticks(tsdn, arena, 1);
@@ -89,8 +88,7 @@ arena_decay_tick(tsdn_t *tsdn, arena_t *arena)
 
 JEMALLOC_ALWAYS_INLINE void *
 arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
-    tcache_t *tcache, bool slow_path)
-{
+    tcache_t *tcache, bool slow_path) {
 	assert(!tsdn_null(tsdn) || tcache == NULL);
 	assert(size != 0);
 
@@ -111,31 +109,29 @@ arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
 }
 
 JEMALLOC_ALWAYS_INLINE arena_t *
-arena_aalloc(tsdn_t *tsdn, const void *ptr)
-{
+arena_aalloc(tsdn_t *tsdn, const void *ptr) {
 	return (extent_arena_get(iealloc(tsdn, ptr)));
 }
 
 /* Return the size of the allocation pointed to by ptr. */
 JEMALLOC_ALWAYS_INLINE size_t
-arena_salloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr)
-{
+arena_salloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr) {
 	size_t ret;
 
 	assert(ptr != NULL);
 
-	if (likely(extent_slab_get(extent)))
+	if (likely(extent_slab_get(extent))) {
 		ret = index2size(extent_slab_data_get_const(extent)->binind);
-	else
+	} else {
 		ret = large_salloc(tsdn, extent);
+	}
 
 	return (ret);
 }
 
 JEMALLOC_ALWAYS_INLINE void
 arena_dalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, tcache_t *tcache,
-    bool slow_path)
-{
+    bool slow_path) {
 	assert(!tsdn_null(tsdn) || tcache == NULL);
 	assert(ptr != NULL);
 
@@ -160,15 +156,15 @@ arena_dalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, tcache_t *tcache,
 				tcache_dalloc_large(tsdn_tsd(tsdn), tcache,
 				    ptr, usize, slow_path);
 			}
-		} else
+		} else {
 			large_dalloc(tsdn, extent);
+		}
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
 arena_sdalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t size,
-    tcache_t *tcache, bool slow_path)
-{
+    tcache_t *tcache, bool slow_path) {
 	assert(!tsdn_null(tsdn) || tcache == NULL);
 	assert(ptr != NULL);
 
@@ -192,8 +188,9 @@ arena_sdalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t size,
 				tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
 				    size, slow_path);
 			}
-		} else
+		} else {
 			large_dalloc(tsdn, extent);
+		}
 	}
 }
 
diff --git a/include/jemalloc/internal/assert.h b/include/jemalloc/internal/assert.h
index 6f8f7eb9..5da0ef42 100644
--- a/include/jemalloc/internal/assert.h
+++ b/include/jemalloc/internal/assert.h
@@ -37,8 +37,9 @@
 
 #ifndef assert_not_implemented
 #define	assert_not_implemented(e) do {					\
-	if (unlikely(config_debug && !(e)))				\
+	if (unlikely(config_debug && !(e))) {				\
 		not_implemented();					\
+	}								\
 } while (0)
 #endif
 
diff --git a/include/jemalloc/internal/atomic_inlines.h b/include/jemalloc/internal/atomic_inlines.h
index 89d1b354..790a08a2 100644
--- a/include/jemalloc/internal/atomic_inlines.h
+++ b/include/jemalloc/internal/atomic_inlines.h
@@ -53,8 +53,7 @@ void	atomic_write_u(unsigned *p, unsigned x);
 #if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
 #  if (defined(__amd64__) || defined(__x86_64__))
 JEMALLOC_INLINE uint64_t
-atomic_add_u64(uint64_t *p, uint64_t x)
-{
+atomic_add_u64(uint64_t *p, uint64_t x) {
 	uint64_t t = x;
 
 	asm volatile (
@@ -67,8 +66,7 @@ atomic_add_u64(uint64_t *p, uint64_t x)
 }
 
 JEMALLOC_INLINE uint64_t
-atomic_sub_u64(uint64_t *p, uint64_t x)
-{
+atomic_sub_u64(uint64_t *p, uint64_t x) {
 	uint64_t t;
 
 	x = (uint64_t)(-(int64_t)x);
@@ -83,8 +81,7 @@ atomic_sub_u64(uint64_t *p, uint64_t x)
 }
 
 JEMALLOC_INLINE bool
-atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s)
-{
+atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) {
 	uint8_t success;
 
 	asm volatile (
@@ -99,8 +96,7 @@ atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s)
 }
 
 JEMALLOC_INLINE void
-atomic_write_u64(uint64_t *p, uint64_t x)
-{
+atomic_write_u64(uint64_t *p, uint64_t x) {
 	asm volatile (
 	    "xchgq %1, %0;" /* Lock is implied by xchgq. */
 	    : "=m" (*p), "+r" (x) /* Outputs. */
@@ -110,36 +106,31 @@ atomic_write_u64(uint64_t *p, uint64_t x)
 }
 #  elif (defined(JEMALLOC_C11ATOMICS))
 JEMALLOC_INLINE uint64_t
-atomic_add_u64(uint64_t *p, uint64_t x)
-{
+atomic_add_u64(uint64_t *p, uint64_t x) {
 	volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
 	return (atomic_fetch_add(a, x) + x);
 }
 
 JEMALLOC_INLINE uint64_t
-atomic_sub_u64(uint64_t *p, uint64_t x)
-{
+atomic_sub_u64(uint64_t *p, uint64_t x) {
 	volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
 	return (atomic_fetch_sub(a, x) - x);
 }
 
 JEMALLOC_INLINE bool
-atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s)
-{
+atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) {
 	volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
 	return (!atomic_compare_exchange_strong(a, &c, s));
 }
 
 JEMALLOC_INLINE void
-atomic_write_u64(uint64_t *p, uint64_t x)
-{
+atomic_write_u64(uint64_t *p, uint64_t x) {
 	volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
 	atomic_store(a, x);
 }
 #  elif (defined(JEMALLOC_ATOMIC9))
 JEMALLOC_INLINE uint64_t
-atomic_add_u64(uint64_t *p, uint64_t x)
-{
+atomic_add_u64(uint64_t *p, uint64_t x) {
 	/*
 	 * atomic_fetchadd_64() doesn't exist, but we only ever use this
 	 * function on LP64 systems, so atomic_fetchadd_long() will do.
@@ -150,50 +141,43 @@ atomic_add_u64(uint64_t *p, uint64_t x)
 }
 
 JEMALLOC_INLINE uint64_t
-atomic_sub_u64(uint64_t *p, uint64_t x)
-{
+atomic_sub_u64(uint64_t *p, uint64_t x) {
 	assert(sizeof(uint64_t) == sizeof(unsigned long));
 
 	return (atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x);
 }
 
 JEMALLOC_INLINE bool
-atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s)
-{
+atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) {
 	assert(sizeof(uint64_t) == sizeof(unsigned long));
 
 	return (!atomic_cmpset_long(p, (unsigned long)c, (unsigned long)s));
 }
 
 JEMALLOC_INLINE void
-atomic_write_u64(uint64_t *p, uint64_t x)
-{
+atomic_write_u64(uint64_t *p, uint64_t x) {
 	assert(sizeof(uint64_t) == sizeof(unsigned long));
 
 	atomic_store_rel_long(p, x);
 }
 #  elif (defined(JEMALLOC_OSATOMIC))
 JEMALLOC_INLINE uint64_t
-atomic_add_u64(uint64_t *p, uint64_t x)
-{
+atomic_add_u64(uint64_t *p, uint64_t x) {
 	return (OSAtomicAdd64((int64_t)x, (int64_t *)p));
 }
 
 JEMALLOC_INLINE uint64_t
-atomic_sub_u64(uint64_t *p, uint64_t x)
-{
+atomic_sub_u64(uint64_t *p, uint64_t x) {
 	return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p));
 }
 
 JEMALLOC_INLINE bool
-atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s)
-{
+atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) {
 	return (!OSAtomicCompareAndSwap64(c, s, (int64_t *)p));
 }
 
 JEMALLOC_INLINE void
-atomic_write_u64(uint64_t *p, uint64_t x)
-{
+atomic_write_u64(uint64_t *p, uint64_t x) {
 	uint64_t o;
 
 	/*The documented OSAtomic*() API does not expose an atomic exchange. */
@@ -203,20 +187,17 @@ atomic_write_u64(uint64_t *p, uint64_t x)
 }
 #  elif (defined(_MSC_VER))
 JEMALLOC_INLINE uint64_t
-atomic_add_u64(uint64_t *p, uint64_t x)
-{
+atomic_add_u64(uint64_t *p, uint64_t x) {
 	return (InterlockedExchangeAdd64(p, x) + x);
 }
 
 JEMALLOC_INLINE uint64_t
-atomic_sub_u64(uint64_t *p, uint64_t x)
-{
+atomic_sub_u64(uint64_t *p, uint64_t x) {
 	return (InterlockedExchangeAdd64(p, -((int64_t)x)) - x);
 }
 
 JEMALLOC_INLINE bool
-atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s)
-{
+atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) {
 	uint64_t o;
 
 	o = InterlockedCompareExchange64(p, s, c);
@@ -224,33 +205,28 @@ atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s)
 }
 
 JEMALLOC_INLINE void
-atomic_write_u64(uint64_t *p, uint64_t x)
-{
+atomic_write_u64(uint64_t *p, uint64_t x) {
 	InterlockedExchange64(p, x);
 }
 #  elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || \
     defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8))
 JEMALLOC_INLINE uint64_t
-atomic_add_u64(uint64_t *p, uint64_t x)
-{
+atomic_add_u64(uint64_t *p, uint64_t x) {
 	return (__sync_add_and_fetch(p, x));
 }
 
 JEMALLOC_INLINE uint64_t
-atomic_sub_u64(uint64_t *p, uint64_t x)
-{
+atomic_sub_u64(uint64_t *p, uint64_t x) {
 	return (__sync_sub_and_fetch(p, x));
 }
 
 JEMALLOC_INLINE bool
-atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s)
-{
+atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) {
 	return (!__sync_bool_compare_and_swap(p, c, s));
 }
 
 JEMALLOC_INLINE void
-atomic_write_u64(uint64_t *p, uint64_t x)
-{
+atomic_write_u64(uint64_t *p, uint64_t x) {
 	__sync_lock_test_and_set(p, x);
 }
 #  else
@@ -262,8 +238,7 @@ atomic_write_u64(uint64_t *p, uint64_t x)
 /* 32-bit operations. */
 #if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
 JEMALLOC_INLINE uint32_t
-atomic_add_u32(uint32_t *p, uint32_t x)
-{
+atomic_add_u32(uint32_t *p, uint32_t x) {
 	uint32_t t = x;
 
 	asm volatile (
@@ -276,8 +251,7 @@ atomic_add_u32(uint32_t *p, uint32_t x)
 }
 
 JEMALLOC_INLINE uint32_t
-atomic_sub_u32(uint32_t *p, uint32_t x)
-{
+atomic_sub_u32(uint32_t *p, uint32_t x) {
 	uint32_t t;
 
 	x = (uint32_t)(-(int32_t)x);
@@ -292,8 +266,7 @@ atomic_sub_u32(uint32_t *p, uint32_t x)
 }
 
 JEMALLOC_INLINE bool
-atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s)
-{
+atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) {
 	uint8_t success;
 
 	asm volatile (
@@ -308,8 +281,7 @@ atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s)
 }
 
 JEMALLOC_INLINE void
-atomic_write_u32(uint32_t *p, uint32_t x)
-{
+atomic_write_u32(uint32_t *p, uint32_t x) {
 	asm volatile (
 	    "xchgl %1, %0;" /* Lock is implied by xchgl. */
 	    : "=m" (*p), "+r" (x) /* Outputs. */
@@ -319,78 +291,66 @@ atomic_write_u32(uint32_t *p, uint32_t x)
 }
 #  elif (defined(JEMALLOC_C11ATOMICS))
 JEMALLOC_INLINE uint32_t
-atomic_add_u32(uint32_t *p, uint32_t x)
-{
+atomic_add_u32(uint32_t *p, uint32_t x) {
 	volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p;
 	return (atomic_fetch_add(a, x) + x);
 }
 
 JEMALLOC_INLINE uint32_t
-atomic_sub_u32(uint32_t *p, uint32_t x)
-{
+atomic_sub_u32(uint32_t *p, uint32_t x) {
 	volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p;
 	return (atomic_fetch_sub(a, x) - x);
 }
 
 JEMALLOC_INLINE bool
-atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s)
-{
+atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) {
 	volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p;
 	return (!atomic_compare_exchange_strong(a, &c, s));
 }
 
 JEMALLOC_INLINE void
-atomic_write_u32(uint32_t *p, uint32_t x)
-{
+atomic_write_u32(uint32_t *p, uint32_t x) {
 	volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p;
 	atomic_store(a, x);
 }
 #elif (defined(JEMALLOC_ATOMIC9))
 JEMALLOC_INLINE uint32_t
-atomic_add_u32(uint32_t *p, uint32_t x)
-{
+atomic_add_u32(uint32_t *p, uint32_t x) {
 	return (atomic_fetchadd_32(p, x) + x);
 }
 
 JEMALLOC_INLINE uint32_t
-atomic_sub_u32(uint32_t *p, uint32_t x)
-{
+atomic_sub_u32(uint32_t *p, uint32_t x) {
 	return (atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x);
 }
 
 JEMALLOC_INLINE bool
-atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s)
-{
+atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) {
 	return (!atomic_cmpset_32(p, c, s));
 }
 
 JEMALLOC_INLINE void
-atomic_write_u32(uint32_t *p, uint32_t x)
-{
+atomic_write_u32(uint32_t *p, uint32_t x) {
 	atomic_store_rel_32(p, x);
 }
 #elif (defined(JEMALLOC_OSATOMIC))
 JEMALLOC_INLINE uint32_t
-atomic_add_u32(uint32_t *p, uint32_t x)
-{
+atomic_add_u32(uint32_t *p, uint32_t x) {
 	return (OSAtomicAdd32((int32_t)x, (int32_t *)p));
 }
 
 JEMALLOC_INLINE uint32_t
-atomic_sub_u32(uint32_t *p, uint32_t x)
-{
+atomic_sub_u32(uint32_t *p, uint32_t x) {
 	return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p));
 }
 
 JEMALLOC_INLINE bool
-atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s)
-{
+atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) {
 	return (!OSAtomicCompareAndSwap32(c, s, (int32_t *)p));
 }
 
 JEMALLOC_INLINE void
-atomic_write_u32(uint32_t *p, uint32_t x)
-{
+atomic_write_u32(uint32_t *p, uint32_t x) {
 	uint32_t o;
 
 	/*The documented OSAtomic*() API does not expose an atomic exchange. */
@@ -400,20 +360,17 @@ atomic_write_u32(uint32_t *p, uint32_t x)
 }
 #elif (defined(_MSC_VER))
 JEMALLOC_INLINE uint32_t
-atomic_add_u32(uint32_t *p, uint32_t x)
-{
+atomic_add_u32(uint32_t *p, uint32_t x) {
 	return (InterlockedExchangeAdd(p, x) + x);
 }
 
 JEMALLOC_INLINE uint32_t
-atomic_sub_u32(uint32_t *p, uint32_t x)
-{
+atomic_sub_u32(uint32_t *p, uint32_t x) {
 	return (InterlockedExchangeAdd(p, -((int32_t)x)) - x);
 }
 
 JEMALLOC_INLINE bool
-atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s)
-{
+atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) {
 	uint32_t o;
 
 	o = InterlockedCompareExchange(p, s, c);
@@ -421,33 +378,28 @@ atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s)
 }
 
 JEMALLOC_INLINE void
-atomic_write_u32(uint32_t *p, uint32_t x)
-{
+atomic_write_u32(uint32_t *p, uint32_t x) {
 	InterlockedExchange(p, x);
 }
 #elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || \
  defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4))
 JEMALLOC_INLINE uint32_t
-atomic_add_u32(uint32_t *p, uint32_t x)
-{
+atomic_add_u32(uint32_t *p, uint32_t x) {
 	return (__sync_add_and_fetch(p, x));
 }
 
 JEMALLOC_INLINE uint32_t
-atomic_sub_u32(uint32_t *p, uint32_t x)
-{
+atomic_sub_u32(uint32_t *p, uint32_t x) {
 	return (__sync_sub_and_fetch(p, x));
 }
 
 JEMALLOC_INLINE bool
-atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s)
-{
+atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) {
 	return (!__sync_bool_compare_and_swap(p, c, s));
 }
 
 JEMALLOC_INLINE void
-atomic_write_u32(uint32_t *p, uint32_t x)
-{
+atomic_write_u32(uint32_t *p, uint32_t x) {
 	__sync_lock_test_and_set(p, x);
 }
 #else
@@ -457,8 +409,7 @@ atomic_write_u32(uint32_t *p, uint32_t x)
 /******************************************************************************/
 /* Pointer operations. */
 JEMALLOC_INLINE void *
-atomic_add_p(void **p, void *x)
-{
+atomic_add_p(void **p, void *x) {
 #if (LG_SIZEOF_PTR == 3)
 	return ((void *)atomic_add_u64((uint64_t *)p, (uint64_t)x));
 #elif (LG_SIZEOF_PTR == 2)
@@ -467,8 +418,7 @@ atomic_add_p(void **p, void *x)
 }
 
 JEMALLOC_INLINE void *
-atomic_sub_p(void **p, void *x)
-{
+atomic_sub_p(void **p, void *x) {
 #if (LG_SIZEOF_PTR == 3)
 	return ((void *)atomic_add_u64((uint64_t *)p, (uint64_t)-((int64_t)x)));
 #elif (LG_SIZEOF_PTR == 2)
@@ -477,8 +427,7 @@ atomic_sub_p(void **p, void *x)
 }
 
 JEMALLOC_INLINE bool
-atomic_cas_p(void **p, void *c, void *s)
-{
+atomic_cas_p(void **p, void *c, void *s) {
 #if (LG_SIZEOF_PTR == 3)
 	return (atomic_cas_u64((uint64_t *)p, (uint64_t)c, (uint64_t)s));
 #elif (LG_SIZEOF_PTR == 2)
@@ -487,8 +436,7 @@ atomic_cas_p(void **p, void *c, void *s)
 }
 
 JEMALLOC_INLINE void
-atomic_write_p(void **p, const void *x)
-{
+atomic_write_p(void **p, const void *x) {
 #if (LG_SIZEOF_PTR == 3)
 	atomic_write_u64((uint64_t *)p, (uint64_t)x);
 #elif (LG_SIZEOF_PTR == 2)
@@ -499,8 +447,7 @@ atomic_write_p(void **p, const void *x)
 /******************************************************************************/
 /* size_t operations. */
 JEMALLOC_INLINE size_t
-atomic_add_zu(size_t *p, size_t x)
-{
+atomic_add_zu(size_t *p, size_t x) {
 #if (LG_SIZEOF_PTR == 3)
 	return ((size_t)atomic_add_u64((uint64_t *)p, (uint64_t)x));
 #elif (LG_SIZEOF_PTR == 2)
@@ -509,8 +456,7 @@ atomic_add_zu(size_t *p, size_t x)
 }
 
 JEMALLOC_INLINE size_t
-atomic_sub_zu(size_t *p, size_t x)
-{
+atomic_sub_zu(size_t *p, size_t x) {
 #if (LG_SIZEOF_PTR == 3)
 	return ((size_t)atomic_add_u64((uint64_t *)p, (uint64_t)-((int64_t)x)));
 #elif (LG_SIZEOF_PTR == 2)
@@ -519,8 +465,7 @@ atomic_sub_zu(size_t *p, size_t x)
 }
 
 JEMALLOC_INLINE bool
-atomic_cas_zu(size_t *p, size_t c, size_t s)
-{
+atomic_cas_zu(size_t *p, size_t c, size_t s) {
 #if (LG_SIZEOF_PTR == 3)
 	return (atomic_cas_u64((uint64_t *)p, (uint64_t)c, (uint64_t)s));
 #elif (LG_SIZEOF_PTR == 2)
@@ -529,8 +474,7 @@ atomic_cas_zu(size_t *p, size_t c, size_t s)
 }
 
 JEMALLOC_INLINE void
-atomic_write_zu(size_t *p, size_t x)
-{
+atomic_write_zu(size_t *p, size_t x) {
 #if (LG_SIZEOF_PTR == 3)
 	atomic_write_u64((uint64_t *)p, (uint64_t)x);
 #elif (LG_SIZEOF_PTR == 2)
@@ -541,8 +485,7 @@ atomic_write_zu(size_t *p, size_t x)
 /******************************************************************************/
 /* unsigned operations. */
 JEMALLOC_INLINE unsigned
-atomic_add_u(unsigned *p, unsigned x)
-{
+atomic_add_u(unsigned *p, unsigned x) {
 #if (LG_SIZEOF_INT == 3)
 	return ((unsigned)atomic_add_u64((uint64_t *)p, (uint64_t)x));
 #elif (LG_SIZEOF_INT == 2)
@@ -551,8 +494,7 @@ atomic_add_u(unsigned *p, unsigned x)
 }
 
 JEMALLOC_INLINE unsigned
-atomic_sub_u(unsigned *p, unsigned x)
-{
+atomic_sub_u(unsigned *p, unsigned x) {
 #if (LG_SIZEOF_INT == 3)
 	return ((unsigned)atomic_add_u64((uint64_t *)p,
 	    (uint64_t)-((int64_t)x)));
@@ -563,8 +505,7 @@ atomic_sub_u(unsigned *p, unsigned x)
 }
 
 JEMALLOC_INLINE bool
-atomic_cas_u(unsigned *p, unsigned c, unsigned s)
-{
+atomic_cas_u(unsigned *p, unsigned c, unsigned s) {
 #if (LG_SIZEOF_INT == 3)
 	return (atomic_cas_u64((uint64_t *)p, (uint64_t)c, (uint64_t)s));
 #elif (LG_SIZEOF_INT == 2)
@@ -573,8 +514,7 @@ atomic_cas_u(unsigned *p, unsigned c, unsigned s)
 }
 
 JEMALLOC_INLINE void
-atomic_write_u(unsigned *p, unsigned x)
-{
+atomic_write_u(unsigned *p, unsigned x) {
 #if (LG_SIZEOF_INT == 3)
 	atomic_write_u64((uint64_t *)p, (uint64_t)x);
 #elif (LG_SIZEOF_INT == 2)
diff --git a/include/jemalloc/internal/base_inlines.h b/include/jemalloc/internal/base_inlines.h
index 63547d65..94fb1a95 100644
--- a/include/jemalloc/internal/base_inlines.h
+++ b/include/jemalloc/internal/base_inlines.h
@@ -7,8 +7,7 @@ unsigned	base_ind_get(const base_t *base);
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_BASE_C_))
 JEMALLOC_INLINE unsigned
-base_ind_get(const base_t *base)
-{
+base_ind_get(const base_t *base) {
 	return (base->ind);
 }
 #endif
diff --git a/include/jemalloc/internal/bitmap_inlines.h b/include/jemalloc/internal/bitmap_inlines.h
index 5400f9d1..1a2411df 100644
--- a/include/jemalloc/internal/bitmap_inlines.h
+++ b/include/jemalloc/internal/bitmap_inlines.h
@@ -11,8 +11,7 @@ void	bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_BITMAP_C_))
 JEMALLOC_INLINE bool
-bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo)
-{
+bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) {
 #ifdef BITMAP_USE_TREE
 	size_t rgoff = binfo->levels[binfo->nlevels].group_offset - 1;
 	bitmap_t rg = bitmap[rgoff];
@@ -22,16 +21,16 @@ bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo)
 	size_t i;
 
 	for (i = 0; i < binfo->ngroups; i++) {
-		if (bitmap[i] != 0)
+		if (bitmap[i] != 0) {
 			return (false);
+		}
 	}
 	return (true);
 #endif
 }
 
 JEMALLOC_INLINE bool
-bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
-{
+bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
 	size_t goff;
 	bitmap_t g;
 
@@ -42,8 +41,7 @@ bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
 }
 
 JEMALLOC_INLINE void
-bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
-{
+bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
 	size_t goff;
 	bitmap_t *gp;
 	bitmap_t g;
@@ -69,8 +67,9 @@ bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
 			assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)));
 			g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
 			*gp = g;
-			if (g != 0)
+			if (g != 0) {
 				break;
+			}
 		}
 	}
 #endif
@@ -78,8 +77,7 @@ bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
 
 /* sfu: set first unset. */
 JEMALLOC_INLINE size_t
-bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo)
-{
+bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) {
 	size_t bit;
 	bitmap_t g;
 	unsigned i;
@@ -109,8 +107,7 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo)
 }
 
 JEMALLOC_INLINE void
-bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
-{
+bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
 	size_t goff;
 	bitmap_t *gp;
 	bitmap_t g;
@@ -140,8 +137,9 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit)
 			    == 0);
 			g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
 			*gp = g;
-			if (!propagate)
+			if (!propagate) {
 				break;
+			}
 		}
 	}
 #endif /* BITMAP_USE_TREE */
diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index 87e0bcd0..274e69c6 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -43,8 +43,7 @@ int	extent_snad_comp(const extent_t *a, const extent_t *b);
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_EXTENT_C_))
 JEMALLOC_INLINE extent_t *
-extent_lookup(tsdn_t *tsdn, const void *ptr, bool dependent)
-{
+extent_lookup(tsdn_t *tsdn, const void *ptr, bool dependent) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
@@ -53,132 +52,112 @@ extent_lookup(tsdn_t *tsdn, const void *ptr, bool dependent)
 }
 
 JEMALLOC_INLINE arena_t *
-extent_arena_get(const extent_t *extent)
-{
+extent_arena_get(const extent_t *extent) {
 	return (extent->e_arena);
 }
 
 JEMALLOC_INLINE void *
-extent_base_get(const extent_t *extent)
-{
+extent_base_get(const extent_t *extent) {
 	assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) ||
 	    !extent->e_slab);
 	return (PAGE_ADDR2BASE(extent->e_addr));
 }
 
 JEMALLOC_INLINE void *
-extent_addr_get(const extent_t *extent)
-{
+extent_addr_get(const extent_t *extent) {
 	assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) ||
 	    !extent->e_slab);
 	return (extent->e_addr);
 }
 
 JEMALLOC_INLINE size_t
-extent_size_get(const extent_t *extent)
-{
+extent_size_get(const extent_t *extent) {
 	return (extent->e_size);
 }
 
 JEMALLOC_INLINE size_t
-extent_usize_get(const extent_t *extent)
-{
+extent_usize_get(const extent_t *extent) {
 	assert(!extent->e_slab);
 	return (extent->e_usize);
 }
 
 JEMALLOC_INLINE void *
-extent_before_get(const extent_t *extent)
-{
+extent_before_get(const extent_t *extent) {
 	return ((void *)((uintptr_t)extent_base_get(extent) - PAGE));
 }
 
 JEMALLOC_INLINE void *
-extent_last_get(const extent_t *extent)
-{
+extent_last_get(const extent_t *extent) {
 	return ((void *)((uintptr_t)extent_base_get(extent) +
 	    extent_size_get(extent) - PAGE));
 }
 
 JEMALLOC_INLINE void *
-extent_past_get(const extent_t *extent)
-{
+extent_past_get(const extent_t *extent) {
 	return ((void *)((uintptr_t)extent_base_get(extent) +
 	    extent_size_get(extent)));
 }
 
 JEMALLOC_INLINE size_t
-extent_sn_get(const extent_t *extent)
-{
+extent_sn_get(const extent_t *extent) {
 	return (extent->e_sn);
 }
 
 JEMALLOC_INLINE bool
-extent_active_get(const extent_t *extent)
-{
+extent_active_get(const extent_t *extent) {
 	return (extent->e_active);
 }
 
 JEMALLOC_INLINE bool
-extent_retained_get(const extent_t *extent)
-{
+extent_retained_get(const extent_t *extent) {
 	return (qr_next(extent, qr_link) == extent);
 }
 
 JEMALLOC_INLINE bool
-extent_zeroed_get(const extent_t *extent)
-{
+extent_zeroed_get(const extent_t *extent) {
 	return (extent->e_zeroed);
 }
 
 JEMALLOC_INLINE bool
-extent_committed_get(const extent_t *extent)
-{
+extent_committed_get(const extent_t *extent) {
 	return (extent->e_committed);
 }
 
 JEMALLOC_INLINE bool
-extent_slab_get(const extent_t *extent)
-{
+extent_slab_get(const extent_t *extent) {
 	return (extent->e_slab);
 }
 
 JEMALLOC_INLINE arena_slab_data_t *
-extent_slab_data_get(extent_t *extent)
-{
+extent_slab_data_get(extent_t *extent) {
 	assert(extent->e_slab);
 	return (&extent->e_slab_data);
 }
 
 JEMALLOC_INLINE const arena_slab_data_t *
-extent_slab_data_get_const(const extent_t *extent)
-{
+extent_slab_data_get_const(const extent_t *extent) {
 	assert(extent->e_slab);
 	return (&extent->e_slab_data);
 }
 
 JEMALLOC_INLINE prof_tctx_t *
-extent_prof_tctx_get(const extent_t *extent)
-{
+extent_prof_tctx_get(const extent_t *extent) {
 	return ((prof_tctx_t *)atomic_read_p(
 	    &((extent_t *)extent)->e_prof_tctx_pun));
 }
 
 JEMALLOC_INLINE void
-extent_arena_set(extent_t *extent, arena_t *arena)
-{
+extent_arena_set(extent_t *extent, arena_t *arena) {
 	extent->e_arena = arena;
 }
 
 JEMALLOC_INLINE void
-extent_addr_set(extent_t *extent, void *addr)
-{
+extent_addr_set(extent_t *extent, void *addr) {
 	extent->e_addr = addr;
 }
 
 JEMALLOC_INLINE void
-extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment)
-{
+extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment) {
 	assert(extent_base_get(extent) == extent_addr_get(extent));
 
 	if (alignment < PAGE) {
@@ -197,58 +176,49 @@ extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment)
 }
 
 JEMALLOC_INLINE void
-extent_size_set(extent_t *extent, size_t size)
-{
+extent_size_set(extent_t *extent, size_t size) {
 	extent->e_size = size;
 }
 
 JEMALLOC_INLINE void
-extent_usize_set(extent_t *extent, size_t usize)
-{
+extent_usize_set(extent_t *extent, size_t usize) {
 	extent->e_usize = usize;
 }
 
 JEMALLOC_INLINE void
-extent_sn_set(extent_t *extent, size_t sn)
-{
+extent_sn_set(extent_t *extent, size_t sn) {
 	extent->e_sn = sn;
 }
 
 JEMALLOC_INLINE void
-extent_active_set(extent_t *extent, bool active)
-{
+extent_active_set(extent_t *extent, bool active) {
 	extent->e_active = active;
 }
 
 JEMALLOC_INLINE void
-extent_zeroed_set(extent_t *extent, bool zeroed)
-{
+extent_zeroed_set(extent_t *extent, bool zeroed) {
 	extent->e_zeroed = zeroed;
 }
 
 JEMALLOC_INLINE void
-extent_committed_set(extent_t *extent, bool committed)
-{
+extent_committed_set(extent_t *extent, bool committed) {
 	extent->e_committed = committed;
 }
 
 JEMALLOC_INLINE void
-extent_slab_set(extent_t *extent, bool slab)
-{
+extent_slab_set(extent_t *extent, bool slab) {
 	extent->e_slab = slab;
 }
 
 JEMALLOC_INLINE void
-extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx)
-{
+extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx) {
 	atomic_write_p(&extent->e_prof_tctx_pun, tctx);
 }
 
 JEMALLOC_INLINE void
 extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
     size_t usize, size_t sn, bool active, bool zeroed, bool committed,
-    bool slab)
-{
+    bool slab) {
 	assert(addr == PAGE_ADDR2BASE(addr) || !slab);
 
 	extent_arena_set(extent, arena);
@@ -260,26 +230,24 @@ extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
 	extent_zeroed_set(extent, zeroed);
 	extent_committed_set(extent, committed);
 	extent_slab_set(extent, slab);
-	if (config_prof)
+	if (config_prof) {
 		extent_prof_tctx_set(extent, NULL);
+	}
 	qr_new(extent, qr_link);
 }
 
 JEMALLOC_INLINE void
-extent_ring_insert(extent_t *sentinel, extent_t *extent)
-{
+extent_ring_insert(extent_t *sentinel, extent_t *extent) {
 	qr_meld(sentinel, extent, extent_t, qr_link);
 }
 
 JEMALLOC_INLINE void
-extent_ring_remove(extent_t *extent)
-{
+extent_ring_remove(extent_t *extent) {
 	qr_remove(extent, qr_link);
 }
 
 JEMALLOC_INLINE int
-extent_sn_comp(const extent_t *a, const extent_t *b)
-{
+extent_sn_comp(const extent_t *a, const extent_t *b) {
 	size_t a_sn = extent_sn_get(a);
 	size_t b_sn = extent_sn_get(b);
 
@@ -287,8 +255,7 @@ extent_sn_comp(const extent_t *a, const extent_t *b)
 }
 
 JEMALLOC_INLINE int
-extent_ad_comp(const extent_t *a, const extent_t *b)
-{
+extent_ad_comp(const extent_t *a, const extent_t *b) {
 	uintptr_t a_addr = (uintptr_t)extent_addr_get(a);
 	uintptr_t b_addr = (uintptr_t)extent_addr_get(b);
 
@@ -296,13 +263,13 @@ extent_ad_comp(const extent_t *a, const extent_t *b)
 }
 
 JEMALLOC_INLINE int
-extent_snad_comp(const extent_t *a, const extent_t *b)
-{
+extent_snad_comp(const extent_t *a, const extent_t *b) {
 	int ret;
 
 	ret = extent_sn_comp(a, b);
-	if (ret != 0)
+	if (ret != 0) {
 		return (ret);
+	}
 
 	ret = extent_ad_comp(a, b);
 	return (ret);
diff --git a/include/jemalloc/internal/hash_inlines.h b/include/jemalloc/internal/hash_inlines.h
index 4bb78505..82ac1f42 100644
--- a/include/jemalloc/internal/hash_inlines.h
+++ b/include/jemalloc/internal/hash_inlines.h
@@ -21,20 +21,17 @@ void	hash(const void *key, size_t len, const uint32_t seed,
 /******************************************************************************/
 /* Internal implementation. */
 JEMALLOC_INLINE uint32_t
-hash_rotl_32(uint32_t x, int8_t r)
-{
+hash_rotl_32(uint32_t x, int8_t r) {
 	return ((x << r) | (x >> (32 - r)));
 }
 
 JEMALLOC_INLINE uint64_t
-hash_rotl_64(uint64_t x, int8_t r)
-{
+hash_rotl_64(uint64_t x, int8_t r) {
 	return ((x << r) | (x >> (64 - r)));
 }
 
 JEMALLOC_INLINE uint32_t
-hash_get_block_32(const uint32_t *p, int i)
-{
+hash_get_block_32(const uint32_t *p, int i) {
 	/* Handle unaligned read. */
 	if (unlikely((uintptr_t)p & (sizeof(uint32_t)-1)) != 0) {
 		uint32_t ret;
@@ -47,8 +44,7 @@ hash_get_block_32(const uint32_t *p, int i)
 }
 
 JEMALLOC_INLINE uint64_t
-hash_get_block_64(const uint64_t *p, int i)
-{
+hash_get_block_64(const uint64_t *p, int i) {
 	/* Handle unaligned read. */
 	if (unlikely((uintptr_t)p & (sizeof(uint64_t)-1)) != 0) {
 		uint64_t ret;
@@ -61,8 +57,7 @@ hash_get_block_64(const uint64_t *p, int i)
 }
 
 JEMALLOC_INLINE uint32_t
-hash_fmix_32(uint32_t h)
-{
+hash_fmix_32(uint32_t h) {
 	h ^= h >> 16;
 	h *= 0x85ebca6b;
 	h ^= h >> 13;
@@ -73,8 +68,7 @@ hash_fmix_32(uint32_t h)
 }
 
 JEMALLOC_INLINE uint64_t
-hash_fmix_64(uint64_t k)
-{
+hash_fmix_64(uint64_t k) {
 	k ^= k >> 33;
 	k *= KQU(0xff51afd7ed558ccd);
 	k ^= k >> 33;
@@ -85,8 +79,7 @@ hash_fmix_64(uint64_t k)
 }
 
 JEMALLOC_INLINE uint32_t
-hash_x86_32(const void *key, int len, uint32_t seed)
-{
+hash_x86_32(const void *key, int len, uint32_t seed) {
 	const uint8_t *data = (const uint8_t *) key;
 	const int nblocks = len / 4;
 
@@ -137,8 +130,7 @@ hash_x86_32(const void *key, int len, uint32_t seed)
 
 UNUSED JEMALLOC_INLINE void
 hash_x86_128(const void *key, const int len, uint32_t seed,
-    uint64_t r_out[2])
-{
+    uint64_t r_out[2]) {
 	const uint8_t * data = (const uint8_t *) key;
 	const int nblocks = len / 16;
 
@@ -239,8 +231,7 @@ hash_x86_128(const void *key, const int len, uint32_t seed,
 
 UNUSED JEMALLOC_INLINE void
 hash_x64_128(const void *key, const int len, const uint32_t seed,
-    uint64_t r_out[2])
-{
+    uint64_t r_out[2]) {
 	const uint8_t *data = (const uint8_t *) key;
 	const int nblocks = len / 16;
 
@@ -318,8 +309,7 @@ hash_x64_128(const void *key, const int len, const uint32_t seed,
 /******************************************************************************/
 /* API. */
 JEMALLOC_INLINE void
-hash(const void *key, size_t len, const uint32_t seed, size_t r_hash[2])
-{
+hash(const void *key, size_t len, const uint32_t seed, size_t r_hash[2]) {
 	assert(len <= INT_MAX); /* Unfortunate implementation limitation. */
 
 #if (LG_SIZEOF_PTR == 3 && !defined(JEMALLOC_BIG_ENDIAN))
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index dc9df35f..c951fab4 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -550,10 +550,10 @@ ticker_t	*decay_ticker_get(tsd_t *tsd, unsigned ind);
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
 JEMALLOC_ALWAYS_INLINE pszind_t
-psz2ind(size_t psz)
-{
-	if (unlikely(psz > LARGE_MAXCLASS))
+psz2ind(size_t psz) {
+	if (unlikely(psz > LARGE_MAXCLASS)) {
 		return (NPSIZES);
+	}
 	{
 		pszind_t x = lg_floor((psz<<1)-1);
 		pszind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_PAGE) ? 0 : x -
@@ -573,10 +573,10 @@ psz2ind(size_t psz)
 }
 
 JEMALLOC_INLINE size_t
-pind2sz_compute(pszind_t pind)
-{
-	if (unlikely(pind == NPSIZES))
+pind2sz_compute(pszind_t pind) {
+	if (unlikely(pind == NPSIZES)) {
 		return (LARGE_MAXCLASS + PAGE);
+	}
 	{
 		size_t grp = pind >> LG_SIZE_CLASS_GROUP;
 		size_t mod = pind & ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
@@ -595,25 +595,23 @@ pind2sz_compute(pszind_t pind)
 }
 
 JEMALLOC_INLINE size_t
-pind2sz_lookup(pszind_t pind)
-{
+pind2sz_lookup(pszind_t pind) {
 	size_t ret = (size_t)pind2sz_tab[pind];
 	assert(ret == pind2sz_compute(pind));
 	return (ret);
 }
 
 JEMALLOC_INLINE size_t
-pind2sz(pszind_t pind)
-{
+pind2sz(pszind_t pind) {
 	assert(pind < NPSIZES+1);
 	return (pind2sz_lookup(pind));
 }
 
 JEMALLOC_INLINE size_t
-psz2u(size_t psz)
-{
-	if (unlikely(psz > LARGE_MAXCLASS))
+psz2u(size_t psz) {
+	if (unlikely(psz > LARGE_MAXCLASS)) {
 		return (LARGE_MAXCLASS + PAGE);
+	}
 	{
 		size_t x = lg_floor((psz<<1)-1);
 		size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ?
@@ -626,10 +624,10 @@ psz2u(size_t psz)
 }
 
 JEMALLOC_INLINE szind_t
-size2index_compute(size_t size)
-{
-	if (unlikely(size > LARGE_MAXCLASS))
+size2index_compute(size_t size) {
+	if (unlikely(size > LARGE_MAXCLASS)) {
 		return (NSIZES);
+	}
 #if (NTBINS != 0)
 	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
 		szind_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
@@ -656,8 +654,7 @@ size2index_compute(size_t size)
 }
 
 JEMALLOC_ALWAYS_INLINE szind_t
-size2index_lookup(size_t size)
-{
+size2index_lookup(size_t size) {
 	assert(size <= LOOKUP_MAXCLASS);
 	{
 		szind_t ret = (size2index_tab[(size-1) >> LG_TINY_MIN]);
@@ -667,20 +664,20 @@ size2index_lookup(size_t size)
 }
 
 JEMALLOC_ALWAYS_INLINE szind_t
-size2index(size_t size)
-{
+size2index(size_t size) {
 	assert(size > 0);
-	if (likely(size <= LOOKUP_MAXCLASS))
+	if (likely(size <= LOOKUP_MAXCLASS)) {
 		return (size2index_lookup(size));
+	}
 	return (size2index_compute(size));
 }
 
 JEMALLOC_INLINE size_t
-index2size_compute(szind_t index)
-{
+index2size_compute(szind_t index) {
 #if (NTBINS > 0)
-	if (index < NTBINS)
+	if (index < NTBINS) {
 		return (ZU(1) << (LG_TINY_MAXCLASS - NTBINS + 1 + index));
+	}
 #endif
 	{
 		size_t reduced_index = index - NTBINS;
@@ -702,25 +699,23 @@ index2size_compute(szind_t index)
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-index2size_lookup(szind_t index)
-{
+index2size_lookup(szind_t index) {
 	size_t ret = (size_t)index2size_tab[index];
 	assert(ret == index2size_compute(index));
 	return (ret);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-index2size(szind_t index)
-{
+index2size(szind_t index) {
 	assert(index < NSIZES);
 	return (index2size_lookup(index));
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-s2u_compute(size_t size)
-{
-	if (unlikely(size > LARGE_MAXCLASS))
+s2u_compute(size_t size) {
+	if (unlikely(size > LARGE_MAXCLASS)) {
 		return (0);
+	}
 #if (NTBINS > 0)
 	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
 		size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
@@ -741,8 +736,7 @@ s2u_compute(size_t size)
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-s2u_lookup(size_t size)
-{
+s2u_lookup(size_t size) {
 	size_t ret = index2size_lookup(size2index_lookup(size));
 
 	assert(ret == s2u_compute(size));
@@ -754,11 +748,11 @@ s2u_lookup(size_t size)
  * specified size.
  */
 JEMALLOC_ALWAYS_INLINE size_t
-s2u(size_t size)
-{
+s2u(size_t size) {
 	assert(size > 0);
-	if (likely(size <= LOOKUP_MAXCLASS))
+	if (likely(size <= LOOKUP_MAXCLASS)) {
 		return (s2u_lookup(size));
+	}
 	return (s2u_compute(size));
 }
 
@@ -767,8 +761,7 @@ s2u(size_t size)
  * specified size and alignment.
  */
 JEMALLOC_ALWAYS_INLINE size_t
-sa2u(size_t size, size_t alignment)
-{
+sa2u(size_t size, size_t alignment) {
 	size_t usize;
 
 	assert(alignment != 0 && ((alignment - 1) & alignment) == 0);
@@ -790,19 +783,21 @@ sa2u(size_t size, size_t alignment)
 		 *    192 | 11000000 |  64
 		 */
 		usize = s2u(ALIGNMENT_CEILING(size, alignment));
-		if (usize < LARGE_MINCLASS)
+		if (usize < LARGE_MINCLASS) {
 			return (usize);
+		}
 	}
 
 	/* Large size class.  Beware of overflow. */
 
-	if (unlikely(alignment > LARGE_MAXCLASS))
+	if (unlikely(alignment > LARGE_MAXCLASS)) {
 		return (0);
+	}
 
 	/* Make sure result is a large size class. */
-	if (size <= LARGE_MINCLASS)
+	if (size <= LARGE_MINCLASS) {
 		usize = LARGE_MINCLASS;
-	else {
+	} else {
 		usize = s2u(size);
 		if (usize < size) {
 			/* size_t overflow. */
@@ -823,35 +818,33 @@ sa2u(size_t size, size_t alignment)
 
 /* Choose an arena based on a per-thread value. */
 JEMALLOC_INLINE arena_t *
-arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal)
-{
+arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {
 	arena_t *ret;
 
-	if (arena != NULL)
+	if (arena != NULL) {
 		return (arena);
+	}
 
 	ret = internal ? tsd_iarena_get(tsd) : tsd_arena_get(tsd);
-	if (unlikely(ret == NULL))
+	if (unlikely(ret == NULL)) {
 		ret = arena_choose_hard(tsd, internal);
+	}
 
 	return (ret);
 }
 
 JEMALLOC_INLINE arena_t *
-arena_choose(tsd_t *tsd, arena_t *arena)
-{
+arena_choose(tsd_t *tsd, arena_t *arena) {
 	return (arena_choose_impl(tsd, arena, false));
 }
 
 JEMALLOC_INLINE arena_t *
-arena_ichoose(tsd_t *tsd, arena_t *arena)
-{
+arena_ichoose(tsd_t *tsd, arena_t *arena) {
 	return (arena_choose_impl(tsd, arena, true));
 }
 
 JEMALLOC_INLINE arena_tdata_t *
-arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing)
-{
+arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing) {
 	arena_tdata_t *tdata;
 	arena_tdata_t *arenas_tdata = tsd_arenas_tdata_get(tsd);
 
@@ -869,14 +862,14 @@ arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing)
 	}
 
 	tdata = &arenas_tdata[ind];
-	if (likely(tdata != NULL) || !refresh_if_missing)
+	if (likely(tdata != NULL) || !refresh_if_missing) {
 		return (tdata);
+	}
 	return (arena_tdata_get_hard(tsd, ind));
 }
 
 JEMALLOC_INLINE arena_t *
-arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing)
-{
+arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing) {
 	arena_t *ret;
 
 	assert(ind <= MALLOCX_ARENA_MAX);
@@ -893,13 +886,13 @@ arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing)
 }
 
 JEMALLOC_INLINE ticker_t *
-decay_ticker_get(tsd_t *tsd, unsigned ind)
-{
+decay_ticker_get(tsd_t *tsd, unsigned ind) {
 	arena_tdata_t *tdata;
 
 	tdata = arena_tdata_get(tsd, ind, true);
-	if (unlikely(tdata == NULL))
+	if (unlikely(tdata == NULL)) {
 		return (NULL);
+	}
 	return (&tdata->decay_ticker);
 }
 #endif
@@ -917,8 +910,7 @@ extent_t	*iealloc(tsdn_t *tsdn, const void *ptr);
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
 JEMALLOC_ALWAYS_INLINE extent_t *
-iealloc(tsdn_t *tsdn, const void *ptr)
-{
+iealloc(tsdn_t *tsdn, const void *ptr) {
 	return (extent_lookup(tsdn, ptr, true));
 }
 #endif
@@ -958,8 +950,7 @@ bool	ixalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize,
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
 JEMALLOC_ALWAYS_INLINE arena_t *
-iaalloc(tsdn_t *tsdn, const void *ptr)
-{
+iaalloc(tsdn_t *tsdn, const void *ptr) {
 	assert(ptr != NULL);
 
 	return (arena_aalloc(tsdn, ptr));
@@ -973,8 +964,7 @@ iaalloc(tsdn_t *tsdn, const void *ptr)
  *   size_t sz = isalloc(tsdn, extent, ptr);
  */
 JEMALLOC_ALWAYS_INLINE size_t
-isalloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr)
-{
+isalloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr) {
 	assert(ptr != NULL);
 
 	return (arena_salloc(tsdn, extent, ptr));
@@ -982,8 +972,7 @@ isalloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr)
 
 JEMALLOC_ALWAYS_INLINE void *
 iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
-    bool is_internal, arena_t *arena, bool slow_path)
-{
+    bool is_internal, arena_t *arena, bool slow_path) {
 	void *ret;
 
 	assert(size != 0);
@@ -1000,16 +989,14 @@ iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path)
-{
+ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path) {
 	return (iallocztm(tsd_tsdn(tsd), size, ind, zero, tcache_get(tsd, true),
 	    false, NULL, slow_path));
 }
 
 JEMALLOC_ALWAYS_INLINE void *
 ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
-    tcache_t *tcache, bool is_internal, arena_t *arena)
-{
+    tcache_t *tcache, bool is_internal, arena_t *arena) {
 	void *ret;
 
 	assert(usize != 0);
@@ -1029,21 +1016,18 @@ ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
 
 JEMALLOC_ALWAYS_INLINE void *
 ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
-    tcache_t *tcache, arena_t *arena)
-{
+    tcache_t *tcache, arena_t *arena) {
 	return (ipallocztm(tsdn, usize, alignment, zero, tcache, false, arena));
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero)
-{
+ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero) {
 	return (ipallocztm(tsd_tsdn(tsd), usize, alignment, zero,
 	    tcache_get(tsd, true), false, NULL));
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-ivsalloc(tsdn_t *tsdn, const void *ptr)
-{
+ivsalloc(tsdn_t *tsdn, const void *ptr) {
 	extent_t *extent;
 
 	/*
@@ -1055,8 +1039,9 @@ ivsalloc(tsdn_t *tsdn, const void *ptr)
 	 *   failure.
 	 * */
 	extent = extent_lookup(tsdn, ptr, false);
-	if (extent == NULL)
+	if (extent == NULL) {
 		return (0);
+	}
 	assert(extent_active_get(extent));
 	/* Only slab members should be looked up via interior pointers. */
 	assert(extent_addr_get(extent) == ptr || extent_slab_get(extent));
@@ -1066,8 +1051,7 @@ ivsalloc(tsdn_t *tsdn, const void *ptr)
 
 JEMALLOC_ALWAYS_INLINE void
 idalloctm(tsdn_t *tsdn, extent_t *extent, void *ptr, tcache_t *tcache,
-    bool is_internal, bool slow_path)
-{
+    bool is_internal, bool slow_path) {
 	assert(ptr != NULL);
 	assert(!is_internal || tcache == NULL);
 	assert(!is_internal || arena_ind_get(iaalloc(tsdn, ptr)) <
@@ -1081,41 +1065,42 @@ idalloctm(tsdn_t *tsdn, extent_t *extent, void *ptr, tcache_t *tcache,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-idalloc(tsd_t *tsd, extent_t *extent, void *ptr)
-{
+idalloc(tsd_t *tsd, extent_t *extent, void *ptr) {
 	idalloctm(tsd_tsdn(tsd), extent, ptr, tcache_get(tsd, false), false,
 	    true);
 }
 
 JEMALLOC_ALWAYS_INLINE void
 isdalloct(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t size,
-    tcache_t *tcache, bool slow_path)
-{
+    tcache_t *tcache, bool slow_path) {
 	arena_sdalloc(tsdn, extent, ptr, size, tcache, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
 iralloct_realign(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize,
     size_t size, size_t extra, size_t alignment, bool zero, tcache_t *tcache,
-    arena_t *arena)
-{
+    arena_t *arena) {
 	void *p;
 	size_t usize, copysize;
 
 	usize = sa2u(size + extra, alignment);
-	if (unlikely(usize == 0 || usize > LARGE_MAXCLASS))
+	if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
 		return (NULL);
+	}
 	p = ipalloct(tsdn, usize, alignment, zero, tcache, arena);
 	if (p == NULL) {
-		if (extra == 0)
+		if (extra == 0) {
 			return (NULL);
+		}
 		/* Try again, without extra this time. */
 		usize = sa2u(size, alignment);
-		if (unlikely(usize == 0 || usize > LARGE_MAXCLASS))
+		if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
 			return (NULL);
+		}
 		p = ipalloct(tsdn, usize, alignment, zero, tcache, arena);
-		if (p == NULL)
+		if (p == NULL) {
 			return (NULL);
+		}
 	}
 	/*
 	 * Copy at most size bytes (not size+extra), since the caller has no
@@ -1129,8 +1114,7 @@ iralloct_realign(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize,
 
 JEMALLOC_ALWAYS_INLINE void *
 iralloct(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize, size_t size,
-    size_t alignment, bool zero, tcache_t *tcache, arena_t *arena)
-{
+    size_t alignment, bool zero, tcache_t *tcache, arena_t *arena) {
 	assert(ptr != NULL);
 	assert(size != 0);
 
@@ -1150,16 +1134,14 @@ iralloct(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize, size_t size,
 
 JEMALLOC_ALWAYS_INLINE void *
 iralloc(tsd_t *tsd, extent_t *extent, void *ptr, size_t oldsize, size_t size,
-    size_t alignment, bool zero)
-{
+    size_t alignment, bool zero) {
 	return (iralloct(tsd_tsdn(tsd), extent, ptr, oldsize, size, alignment,
 	    zero, tcache_get(tsd, true), NULL));
 }
 
 JEMALLOC_ALWAYS_INLINE bool
 ixalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize, size_t size,
-    size_t extra, size_t alignment, bool zero)
-{
+    size_t extra, size_t alignment, bool zero) {
 	assert(ptr != NULL);
 	assert(size != 0);
 
diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
index 277027f0..fd80fdf0 100644
--- a/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -61,8 +61,7 @@ typedef intptr_t ssize_t;
 #  pragma warning(disable: 4996)
 #if _MSC_VER < 1800
 static int
-isblank(int c)
-{
+isblank(int c) {
 	return (c == '\t' || c == ' ');
 }
 #endif
diff --git a/include/jemalloc/internal/mutex_inlines.h b/include/jemalloc/internal/mutex_inlines.h
index d65fa13c..0c6c5dd5 100644
--- a/include/jemalloc/internal/mutex_inlines.h
+++ b/include/jemalloc/internal/mutex_inlines.h
@@ -10,8 +10,7 @@ void	malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex);
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_))
 JEMALLOC_INLINE void
-malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex)
-{
+malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 	if (isthreaded) {
 		witness_assert_not_owner(tsdn, &mutex->witness);
 #ifdef _WIN32
@@ -32,8 +31,7 @@ malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex)
 }
 
 JEMALLOC_INLINE void
-malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex)
-{
+malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 	if (isthreaded) {
 		witness_unlock(tsdn, &mutex->witness);
 #ifdef _WIN32
@@ -53,17 +51,17 @@ malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex)
 }
 
 JEMALLOC_INLINE void
-malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex)
-{
-	if (isthreaded)
+malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) {
+	if (isthreaded) {
 		witness_assert_owner(tsdn, &mutex->witness);
+	}
 }
 
 JEMALLOC_INLINE void
-malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex)
-{
-	if (isthreaded)
+malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) {
+	if (isthreaded) {
 		witness_assert_not_owner(tsdn, &mutex->witness);
+	}
 }
 #endif
 
diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index 9efb7b74..61dfdc0b 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -58,17 +58,18 @@ struct {								\
 	phn_prev_set(a_type, a_field, a_phn1, a_phn0);			\
 	phn0child = phn_lchild_get(a_type, a_field, a_phn0);		\
 	phn_next_set(a_type, a_field, a_phn1, phn0child);		\
-	if (phn0child != NULL)						\
+	if (phn0child != NULL) {					\
 		phn_prev_set(a_type, a_field, phn0child, a_phn1);	\
+	}								\
 	phn_lchild_set(a_type, a_field, a_phn0, a_phn1);		\
 } while (0)
 
 #define	phn_merge(a_type, a_field, a_phn0, a_phn1, a_cmp, r_phn) do {	\
-	if (a_phn0 == NULL)						\
+	if (a_phn0 == NULL) {						\
 		r_phn = a_phn1;						\
-	else if (a_phn1 == NULL)					\
+	} else if (a_phn1 == NULL) {					\
 		r_phn = a_phn0;						\
-	else if (a_cmp(a_phn0, a_phn1) < 0) {				\
+	} else if (a_cmp(a_phn0, a_phn1) < 0) {				\
 		phn_merge_ordered(a_type, a_field, a_phn0, a_phn1,	\
 		    a_cmp);						\
 		r_phn = a_phn0;						\
@@ -95,8 +96,9 @@ struct {								\
 	 */								\
 	if (phn1 != NULL) {						\
 		a_type *phnrest = phn_next_get(a_type, a_field, phn1);	\
-		if (phnrest != NULL)					\
+		if (phnrest != NULL) {					\
 			phn_prev_set(a_type, a_field, phnrest, NULL);	\
+		}							\
 		phn_prev_set(a_type, a_field, phn0, NULL);		\
 		phn_next_set(a_type, a_field, phn0, NULL);		\
 		phn_prev_set(a_type, a_field, phn1, NULL);		\
@@ -150,8 +152,9 @@ struct {								\
 				    NULL);				\
 				phn_merge(a_type, a_field, phn0, phn1,	\
 				    a_cmp, phn0);			\
-				if (head == NULL)			\
+				if (head == NULL) {			\
 					break;				\
+				}					\
 				phn_next_set(a_type, a_field, tail,	\
 				    phn0);				\
 				tail = phn0;				\
@@ -179,9 +182,9 @@ struct {								\
 
 #define	ph_merge_children(a_type, a_field, a_phn, a_cmp, r_phn) do {	\
 	a_type *lchild = phn_lchild_get(a_type, a_field, a_phn);	\
-	if (lchild == NULL)						\
+	if (lchild == NULL) {						\
 		r_phn = NULL;						\
-	else {								\
+	} else {							\
 		ph_merge_siblings(a_type, a_field, lchild, a_cmp,	\
 		    r_phn);						\
 	}								\
@@ -205,26 +208,23 @@ a_attr void	a_prefix##remove(a_ph_type *ph, a_type *phn);
  */
 #define	ph_gen(a_attr, a_prefix, a_ph_type, a_type, a_field, a_cmp)	\
 a_attr void								\
-a_prefix##new(a_ph_type *ph)						\
-{									\
+a_prefix##new(a_ph_type *ph) {						\
 	memset(ph, 0, sizeof(ph(a_type)));				\
 }									\
 a_attr bool								\
-a_prefix##empty(a_ph_type *ph)						\
-{									\
+a_prefix##empty(a_ph_type *ph) {					\
 	return (ph->ph_root == NULL);					\
 }									\
 a_attr a_type *								\
-a_prefix##first(a_ph_type *ph)						\
-{									\
-	if (ph->ph_root == NULL)					\
+a_prefix##first(a_ph_type *ph) {					\
+	if (ph->ph_root == NULL) {					\
 		return (NULL);						\
+	}								\
 	ph_merge_aux(a_type, a_field, ph, a_cmp);			\
 	return (ph->ph_root);						\
 }									\
 a_attr void								\
-a_prefix##insert(a_ph_type *ph, a_type *phn)				\
-{									\
+a_prefix##insert(a_ph_type *ph, a_type *phn) {				\
 	memset(&phn->a_field, 0, sizeof(phn(a_type)));			\
 									\
 	/*								\
@@ -235,9 +235,9 @@ a_prefix##insert(a_ph_type *ph, a_type *phn)				\
 	 * constant-time, whereas eager merging would make insert	\
 	 * O(log n).							\
 	 */								\
-	if (ph->ph_root == NULL)					\
+	if (ph->ph_root == NULL) {					\
 		ph->ph_root = phn;					\
-	else {								\
+	} else {							\
 		phn_next_set(a_type, a_field, phn, phn_next_get(a_type,	\
 		    a_field, ph->ph_root));				\
 		if (phn_next_get(a_type, a_field, ph->ph_root) !=	\
@@ -251,12 +251,12 @@ a_prefix##insert(a_ph_type *ph, a_type *phn)				\
 	}								\
 }									\
 a_attr a_type *								\
-a_prefix##remove_first(a_ph_type *ph)					\
-{									\
+a_prefix##remove_first(a_ph_type *ph) {					\
 	a_type *ret;							\
 									\
-	if (ph->ph_root == NULL)					\
+	if (ph->ph_root == NULL) {					\
 		return (NULL);						\
+	}								\
 	ph_merge_aux(a_type, a_field, ph, a_cmp);			\
 									\
 	ret = ph->ph_root;						\
@@ -267,8 +267,7 @@ a_prefix##remove_first(a_ph_type *ph)					\
 	return (ret);							\
 }									\
 a_attr void								\
-a_prefix##remove(a_ph_type *ph, a_type *phn)				\
-{									\
+a_prefix##remove(a_ph_type *ph, a_type *phn) {				\
 	a_type *replace, *parent;					\
 									\
 	/*								\
@@ -286,8 +285,9 @@ a_prefix##remove(a_ph_type *ph, a_type *phn)				\
 									\
 	/* Get parent (if phn is leftmost child) before mutating. */	\
 	if ((parent = phn_prev_get(a_type, a_field, phn)) != NULL) {	\
-		if (phn_lchild_get(a_type, a_field, parent) != phn)	\
+		if (phn_lchild_get(a_type, a_field, parent) != phn) {	\
 			parent = NULL;					\
+		}							\
 	}								\
 	/* Find a possible replacement node, and link to parent. */	\
 	ph_merge_children(a_type, a_field, phn, a_cmp, replace);	\
diff --git a/include/jemalloc/internal/prng_inlines.h b/include/jemalloc/internal/prng_inlines.h
index 8cc19ce8..124b1baa 100644
--- a/include/jemalloc/internal/prng_inlines.h
+++ b/include/jemalloc/internal/prng_inlines.h
@@ -18,20 +18,17 @@ size_t	prng_range_zu(size_t *state, size_t range, bool atomic);
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PRNG_C_))
 JEMALLOC_ALWAYS_INLINE uint32_t
-prng_state_next_u32(uint32_t state)
-{
+prng_state_next_u32(uint32_t state) {
 	return ((state * PRNG_A_32) + PRNG_C_32);
 }
 
 JEMALLOC_ALWAYS_INLINE uint64_t
-prng_state_next_u64(uint64_t state)
-{
+prng_state_next_u64(uint64_t state) {
 	return ((state * PRNG_A_64) + PRNG_C_64);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-prng_state_next_zu(size_t state)
-{
+prng_state_next_zu(size_t state) {
 #if LG_SIZEOF_PTR == 2
 	return ((state * PRNG_A_32) + PRNG_C_32);
 #elif LG_SIZEOF_PTR == 3
@@ -42,8 +39,7 @@ prng_state_next_zu(size_t state)
 }
 
 JEMALLOC_ALWAYS_INLINE uint32_t
-prng_lg_range_u32(uint32_t *state, unsigned lg_range, bool atomic)
-{
+prng_lg_range_u32(uint32_t *state, unsigned lg_range, bool atomic) {
 	uint32_t ret, state1;
 
 	assert(lg_range > 0);
@@ -67,8 +63,7 @@ prng_lg_range_u32(uint32_t *state, unsigned lg_range, bool atomic)
 
 /* 64-bit atomic operations cannot be supported on all relevant platforms. */
 JEMALLOC_ALWAYS_INLINE uint64_t
-prng_lg_range_u64(uint64_t *state, unsigned lg_range)
-{
+prng_lg_range_u64(uint64_t *state, unsigned lg_range) {
 	uint64_t ret, state1;
 
 	assert(lg_range > 0);
@@ -82,8 +77,7 @@ prng_lg_range_u64(uint64_t *state, unsigned lg_range)
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic)
-{
+prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic) {
 	size_t ret, state1;
 
 	assert(lg_range > 0);
@@ -106,8 +100,7 @@ prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic)
 }
 
 JEMALLOC_ALWAYS_INLINE uint32_t
-prng_range_u32(uint32_t *state, uint32_t range, bool atomic)
-{
+prng_range_u32(uint32_t *state, uint32_t range, bool atomic) {
 	uint32_t ret;
 	unsigned lg_range;
 
@@ -125,8 +118,7 @@ prng_range_u32(uint32_t *state, uint32_t range, bool atomic)
 }
 
 JEMALLOC_ALWAYS_INLINE uint64_t
-prng_range_u64(uint64_t *state, uint64_t range)
-{
+prng_range_u64(uint64_t *state, uint64_t range) {
 	uint64_t ret;
 	unsigned lg_range;
 
@@ -144,8 +136,7 @@ prng_range_u64(uint64_t *state, uint64_t range)
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-prng_range_zu(size_t *state, size_t range, bool atomic)
-{
+prng_range_zu(size_t *state, size_t range, bool atomic) {
 	size_t ret;
 	unsigned lg_range;
 
diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h
index 394b7b37..bb9093a8 100644
--- a/include/jemalloc/internal/prof_inlines.h
+++ b/include/jemalloc/internal/prof_inlines.h
@@ -27,8 +27,7 @@ void	prof_free(tsd_t *tsd, const extent_t *extent, const void *ptr,
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
 JEMALLOC_ALWAYS_INLINE bool
-prof_active_get_unlocked(void)
-{
+prof_active_get_unlocked(void) {
 	/*
 	 * Even if opt_prof is true, sampling can be temporarily disabled by
 	 * setting prof_active to false.  No locking is used when reading
@@ -39,8 +38,7 @@ prof_active_get_unlocked(void)
 }
 
 JEMALLOC_ALWAYS_INLINE bool
-prof_gdump_get_unlocked(void)
-{
+prof_gdump_get_unlocked(void) {
 	/*
 	 * No locking is used when reading prof_gdump_val in the fast path, so
 	 * there are no guarantees regarding how long it will take for all
@@ -50,8 +48,7 @@ prof_gdump_get_unlocked(void)
 }
 
 JEMALLOC_ALWAYS_INLINE prof_tdata_t *
-prof_tdata_get(tsd_t *tsd, bool create)
-{
+prof_tdata_get(tsd_t *tsd, bool create) {
 	prof_tdata_t *tdata;
 
 	cassert(config_prof);
@@ -74,8 +71,7 @@ prof_tdata_get(tsd_t *tsd, bool create)
 }
 
 JEMALLOC_ALWAYS_INLINE prof_tctx_t *
-prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, const void *ptr)
-{
+prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, const void *ptr) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
@@ -84,8 +80,7 @@ prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, const void *ptr)
 
 JEMALLOC_ALWAYS_INLINE void
 prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr, size_t usize,
-    prof_tctx_t *tctx)
-{
+    prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
@@ -94,8 +89,7 @@ prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr, size_t usize,
 
 JEMALLOC_ALWAYS_INLINE void
 prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr,
-    prof_tctx_t *tctx)
-{
+    prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
@@ -104,37 +98,40 @@ prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr,
 
 JEMALLOC_ALWAYS_INLINE bool
 prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
-    prof_tdata_t **tdata_out)
-{
+    prof_tdata_t **tdata_out) {
 	prof_tdata_t *tdata;
 
 	cassert(config_prof);
 
 	tdata = prof_tdata_get(tsd, true);
-	if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX))
+	if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) {
 		tdata = NULL;
+	}
 
-	if (tdata_out != NULL)
+	if (tdata_out != NULL) {
 		*tdata_out = tdata;
+	}
 
-	if (unlikely(tdata == NULL))
+	if (unlikely(tdata == NULL)) {
 		return (true);
+	}
 
 	if (likely(tdata->bytes_until_sample >= usize)) {
-		if (update)
+		if (update) {
 			tdata->bytes_until_sample -= usize;
+		}
 		return (true);
 	} else {
 		/* Compute new sample threshold. */
-		if (update)
+		if (update) {
 			prof_sample_threshold_update(tdata);
+		}
 		return (!tdata->active);
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE prof_tctx_t *
-prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update)
-{
+prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update) {
 	prof_tctx_t *ret;
 	prof_tdata_t *tdata;
 	prof_bt_t bt;
@@ -142,9 +139,9 @@ prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update)
 	assert(usize == s2u(usize));
 
 	if (!prof_active || likely(prof_sample_accum_update(tsd, usize, update,
-	    &tdata)))
+	    &tdata))) {
 		ret = (prof_tctx_t *)(uintptr_t)1U;
-	else {
+	} else {
 		bt_init(&bt, tdata->vec);
 		prof_backtrace(&bt);
 		ret = prof_lookup(tsd, &bt);
@@ -155,15 +152,14 @@ prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update)
 
 JEMALLOC_ALWAYS_INLINE void
 prof_malloc(tsdn_t *tsdn, extent_t *extent, const void *ptr, size_t usize,
-    prof_tctx_t *tctx)
-{
+    prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 	assert(usize == isalloc(tsdn, extent, ptr));
 
-	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
+	if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) {
 		prof_malloc_sample_object(tsdn, extent, ptr, usize, tctx);
-	else {
+	} else {
 		prof_tctx_set(tsdn, extent, ptr, usize,
 		    (prof_tctx_t *)(uintptr_t)1U);
 	}
@@ -172,8 +168,7 @@ prof_malloc(tsdn_t *tsdn, extent_t *extent, const void *ptr, size_t usize,
 JEMALLOC_ALWAYS_INLINE void
 prof_realloc(tsd_t *tsd, extent_t *extent, const void *ptr, size_t usize,
     prof_tctx_t *tctx, bool prof_active, bool updated, extent_t *old_extent,
-    const void *old_ptr, size_t old_usize, prof_tctx_t *old_tctx)
-{
+    const void *old_ptr, size_t old_usize, prof_tctx_t *old_tctx) {
 	bool sampled, old_sampled, moved;
 
 	cassert(config_prof);
@@ -230,15 +225,15 @@ prof_realloc(tsd_t *tsd, extent_t *extent, const void *ptr, size_t usize,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_free(tsd_t *tsd, const extent_t *extent, const void *ptr, size_t usize)
-{
+prof_free(tsd_t *tsd, const extent_t *extent, const void *ptr, size_t usize) {
 	prof_tctx_t *tctx = prof_tctx_get(tsd_tsdn(tsd), extent, ptr);
 
 	cassert(config_prof);
 	assert(usize == isalloc(tsd_tsdn(tsd), extent, ptr));
 
-	if (unlikely((uintptr_t)tctx > (uintptr_t)1U))
+	if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) {
 		prof_free_sampled_object(tsd, usize, tctx);
+	}
 }
 #endif
 
diff --git a/include/jemalloc/internal/qr.h b/include/jemalloc/internal/qr.h
index 06dfdafd..a04f7504 100644
--- a/include/jemalloc/internal/qr.h
+++ b/include/jemalloc/internal/qr.h
@@ -25,14 +25,12 @@ struct {								\
 	(a_qrelm)->a_field.qre_prev = (a_qr);				\
 } while (0)
 
-#define	qr_after_insert(a_qrelm, a_qr, a_field)				\
-    do									\
-    {									\
+#define	qr_after_insert(a_qrelm, a_qr, a_field) do {			\
 	(a_qr)->a_field.qre_next = (a_qrelm)->a_field.qre_next;		\
 	(a_qr)->a_field.qre_prev = (a_qrelm);				\
 	(a_qr)->a_field.qre_next->a_field.qre_prev = (a_qr);		\
 	(a_qrelm)->a_field.qre_next = (a_qr);				\
-    } while (0)
+} while (0)
 
 #define	qr_meld(a_qr_a, a_qr_b, a_type, a_field) do {			\
 	a_type *t;							\
diff --git a/include/jemalloc/internal/rb.h b/include/jemalloc/internal/rb.h
index 3770342f..a4b5a65e 100644
--- a/include/jemalloc/internal/rb.h
+++ b/include/jemalloc/internal/rb.h
@@ -550,8 +550,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 	        /* Find node's successor, in preparation for swap. */	\
 		pathp->cmp = 1;						\
 		nodep = pathp;						\
-		for (pathp++; pathp->node != NULL;			\
-		  pathp++) {						\
+		for (pathp++; pathp->node != NULL; pathp++) {		\
 		    pathp->cmp = -1;					\
 		    pathp[1].node = rbtn_left_get(a_type, a_field,	\
 		      pathp->node);					\
diff --git a/include/jemalloc/internal/rtree_inlines.h b/include/jemalloc/internal/rtree_inlines.h
index 7e79a6a0..9e512e9f 100644
--- a/include/jemalloc/internal/rtree_inlines.h
+++ b/include/jemalloc/internal/rtree_inlines.h
@@ -37,12 +37,12 @@ void	rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_))
 JEMALLOC_ALWAYS_INLINE unsigned
-rtree_start_level(const rtree_t *rtree, uintptr_t key)
-{
+rtree_start_level(const rtree_t *rtree, uintptr_t key) {
 	unsigned start_level;
 
-	if (unlikely(key == 0))
+	if (unlikely(key == 0)) {
 		return (rtree->height - 1);
+	}
 
 	start_level = rtree->start_level[(lg_floor(key) + 1) >>
 	    LG_RTREE_BITS_PER_LEVEL];
@@ -52,8 +52,7 @@ rtree_start_level(const rtree_t *rtree, uintptr_t key)
 
 JEMALLOC_ALWAYS_INLINE unsigned
 rtree_ctx_start_level(const rtree_t *rtree, const rtree_ctx_t *rtree_ctx,
-    uintptr_t key)
-{
+    uintptr_t key) {
 	unsigned start_level;
 	uintptr_t key_diff;
 
@@ -72,48 +71,45 @@ rtree_ctx_start_level(const rtree_t *rtree, const rtree_ctx_t *rtree_ctx,
 }
 
 JEMALLOC_ALWAYS_INLINE uintptr_t
-rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level)
-{
+rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level) {
 	return ((key >> ((ZU(1) << (LG_SIZEOF_PTR+3)) -
 	    rtree->levels[level].cumbits)) & ((ZU(1) <<
 	    rtree->levels[level].bits) - 1));
 }
 
 JEMALLOC_ALWAYS_INLINE bool
-rtree_node_valid(rtree_elm_t *node)
-{
+rtree_node_valid(rtree_elm_t *node) {
 	return ((uintptr_t)node != (uintptr_t)0);
 }
 
 JEMALLOC_ALWAYS_INLINE rtree_elm_t *
-rtree_child_tryread(rtree_elm_t *elm, bool dependent)
-{
+rtree_child_tryread(rtree_elm_t *elm, bool dependent) {
 	rtree_elm_t *child;
 
 	/* Double-checked read (first read may be stale). */
 	child = elm->child;
-	if (!dependent && !rtree_node_valid(child))
+	if (!dependent && !rtree_node_valid(child)) {
 		child = (rtree_elm_t *)atomic_read_p(&elm->pun);
+	}
 	assert(!dependent || child != NULL);
 	return (child);
 }
 
 JEMALLOC_ALWAYS_INLINE rtree_elm_t *
 rtree_child_read(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *elm, unsigned level,
-    bool dependent)
-{
+    bool dependent) {
 	rtree_elm_t *child;
 
 	child = rtree_child_tryread(elm, dependent);
-	if (!dependent && unlikely(!rtree_node_valid(child)))
+	if (!dependent && unlikely(!rtree_node_valid(child))) {
 		child = rtree_child_read_hard(tsdn, rtree, elm, level);
+	}
 	assert(!dependent || child != NULL);
 	return (child);
 }
 
 JEMALLOC_ALWAYS_INLINE extent_t *
-rtree_elm_read(rtree_elm_t *elm, bool dependent)
-{
+rtree_elm_read(rtree_elm_t *elm, bool dependent) {
 	extent_t *extent;
 
 	if (dependent) {
@@ -140,14 +136,12 @@ rtree_elm_read(rtree_elm_t *elm, bool dependent)
 }
 
 JEMALLOC_INLINE void
-rtree_elm_write(rtree_elm_t *elm, const extent_t *extent)
-{
+rtree_elm_write(rtree_elm_t *elm, const extent_t *extent) {
 	atomic_write_p(&elm->pun, extent);
 }
 
 JEMALLOC_ALWAYS_INLINE rtree_elm_t *
-rtree_subtree_tryread(rtree_t *rtree, unsigned level, bool dependent)
-{
+rtree_subtree_tryread(rtree_t *rtree, unsigned level, bool dependent) {
 	rtree_elm_t *subtree;
 
 	/* Double-checked read (first read may be stale). */
@@ -161,21 +155,21 @@ rtree_subtree_tryread(rtree_t *rtree, unsigned level, bool dependent)
 }
 
 JEMALLOC_ALWAYS_INLINE rtree_elm_t *
-rtree_subtree_read(tsdn_t *tsdn, rtree_t *rtree, unsigned level, bool dependent)
-{
+rtree_subtree_read(tsdn_t *tsdn, rtree_t *rtree, unsigned level,
+    bool dependent) {
 	rtree_elm_t *subtree;
 
 	subtree = rtree_subtree_tryread(rtree, level, dependent);
-	if (!dependent && unlikely(!rtree_node_valid(subtree)))
+	if (!dependent && unlikely(!rtree_node_valid(subtree))) {
 		subtree = rtree_subtree_read_hard(tsdn, rtree, level);
+	}
 	assert(!dependent || subtree != NULL);
 	return (subtree);
 }
 
 JEMALLOC_ALWAYS_INLINE rtree_elm_t *
 rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
-    uintptr_t key, bool dependent, bool init_missing)
-{
+    uintptr_t key, bool dependent, bool init_missing) {
 	uintptr_t subkey;
 	unsigned start_level;
 	rtree_elm_t *node;
@@ -184,9 +178,9 @@ rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 
 	if (dependent || init_missing) {
 		if (likely(rtree_ctx->valid)) {
-			if (key == rtree_ctx->key)
+			if (key == rtree_ctx->key) {
 				return (rtree_ctx->elms[rtree->height]);
-			else {
+			} else {
 				unsigned no_ctx_start_level =
 				    rtree_start_level(rtree, key);
 				unsigned ctx_start_level;
@@ -237,8 +231,9 @@ rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	case level:							\
 		assert(level < (RTREE_HEIGHT_MAX-1));			\
 		if (!dependent && unlikely(!rtree_node_valid(node))) {	\
-			if (init_missing)				\
+			if (init_missing) {				\
 				rtree_ctx->valid = false;		\
+			}						\
 			return (NULL);					\
 		}							\
 		subkey = rtree_subkey(rtree, key, level -		\
@@ -255,8 +250,9 @@ rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	case level:							\
 		assert(level == (RTREE_HEIGHT_MAX-1));			\
 		if (!dependent && unlikely(!rtree_node_valid(node))) {	\
-			if (init_missing)				\
+			if (init_missing) {				\
 				rtree_ctx->valid = false;		\
+			}						\
 			return (NULL);					\
 		}							\
 		subkey = rtree_subkey(rtree, key, level -		\
@@ -330,16 +326,16 @@ rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 
 JEMALLOC_INLINE bool
 rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
-    const extent_t *extent)
-{
+    const extent_t *extent) {
 	rtree_elm_t *elm;
 
 	assert(extent != NULL); /* Use rtree_clear() for this case. */
 	assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0);
 
 	elm = rtree_elm_lookup(tsdn, rtree, rtree_ctx, key, false, true);
-	if (elm == NULL)
+	if (elm == NULL) {
 		return (true);
+	}
 	assert(rtree_elm_read(elm, false) == NULL);
 	rtree_elm_write(elm, extent);
 
@@ -348,27 +344,27 @@ rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
 
 JEMALLOC_ALWAYS_INLINE extent_t *
 rtree_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
-    bool dependent)
-{
+    bool dependent) {
 	rtree_elm_t *elm;
 
 	elm = rtree_elm_lookup(tsdn, rtree, rtree_ctx, key, dependent, false);
-	if (elm == NULL)
+	if (elm == NULL) {
 		return (NULL);
+	}
 
 	return (rtree_elm_read(elm, dependent));
 }
 
 JEMALLOC_INLINE rtree_elm_t *
 rtree_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
-    uintptr_t key, bool dependent, bool init_missing)
-{
+    uintptr_t key, bool dependent, bool init_missing) {
 	rtree_elm_t *elm;
 
 	elm = rtree_elm_lookup(tsdn, rtree, rtree_ctx, key, dependent,
 	    init_missing);
-	if (!dependent && elm == NULL)
+	if (!dependent && elm == NULL) {
 		return (NULL);
+	}
 	{
 		extent_t *extent;
 		void *s;
@@ -380,52 +376,53 @@ rtree_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 		} while (atomic_cas_p(&elm->pun, (void *)extent, s));
 	}
 
-	if (config_debug)
+	if (config_debug) {
 		rtree_elm_witness_acquire(tsdn, rtree, key, elm);
+	}
 
 	return (elm);
 }
 
 JEMALLOC_INLINE extent_t *
-rtree_elm_read_acquired(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm)
-{
+rtree_elm_read_acquired(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm) {
 	extent_t *extent;
 
 	assert(((uintptr_t)elm->pun & (uintptr_t)0x1) == (uintptr_t)0x1);
 	extent = (extent_t *)((uintptr_t)elm->pun & ~((uintptr_t)0x1));
 	assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0);
 
-	if (config_debug)
+	if (config_debug) {
 		rtree_elm_witness_access(tsdn, rtree, elm);
+	}
 
 	return (extent);
 }
 
 JEMALLOC_INLINE void
 rtree_elm_write_acquired(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm,
-    const extent_t *extent)
-{
+    const extent_t *extent) {
 	assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0);
 	assert(((uintptr_t)elm->pun & (uintptr_t)0x1) == (uintptr_t)0x1);
 
-	if (config_debug)
+	if (config_debug) {
 		rtree_elm_witness_access(tsdn, rtree, elm);
+	}
 
 	elm->pun = (void *)((uintptr_t)extent | (uintptr_t)0x1);
 	assert(rtree_elm_read_acquired(tsdn, rtree, elm) == extent);
 }
 
 JEMALLOC_INLINE void
-rtree_elm_release(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm)
-{
+rtree_elm_release(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm) {
 	rtree_elm_write(elm, rtree_elm_read_acquired(tsdn, rtree, elm));
-	if (config_debug)
+	if (config_debug) {
 		rtree_elm_witness_release(tsdn, rtree, elm);
+	}
 }
 
 JEMALLOC_INLINE void
-rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key)
-{
+rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+    uintptr_t key) {
 	rtree_elm_t *elm;
 
 	elm = rtree_elm_acquire(tsdn, rtree, rtree_ctx, key, true, false);
diff --git a/include/jemalloc/internal/spin_inlines.h b/include/jemalloc/internal/spin_inlines.h
index b4e779f8..1ffc4232 100644
--- a/include/jemalloc/internal/spin_inlines.h
+++ b/include/jemalloc/internal/spin_inlines.h
@@ -8,21 +8,21 @@ void	spin_adaptive(spin_t *spin);
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_SPIN_C_))
 JEMALLOC_INLINE void
-spin_init(spin_t *spin)
-{
+spin_init(spin_t *spin) {
 	spin->iteration = 0;
 }
 
 JEMALLOC_INLINE void
-spin_adaptive(spin_t *spin)
-{
+spin_adaptive(spin_t *spin) {
 	volatile uint64_t i;
 
-	for (i = 0; i < (KQU(1) << spin->iteration); i++)
+	for (i = 0; i < (KQU(1) << spin->iteration); i++) {
 		CPU_SPINWAIT;
+	}
 
-	if (spin->iteration < 63)
+	if (spin->iteration < 63) {
 		spin->iteration++;
+	}
 }
 
 #endif
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 2762b0e2..4721ba30 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -21,8 +21,7 @@ tcache_t	*tcaches_get(tsd_t *tsd, unsigned ind);
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_))
 JEMALLOC_INLINE void
-tcache_flush(void)
-{
+tcache_flush(void) {
 	tsd_t *tsd;
 
 	cassert(config_tcache);
@@ -32,8 +31,7 @@ tcache_flush(void)
 }
 
 JEMALLOC_INLINE bool
-tcache_enabled_get(void)
-{
+tcache_enabled_get(void) {
 	tsd_t *tsd;
 	tcache_enabled_t tcache_enabled;
 
@@ -50,8 +48,7 @@ tcache_enabled_get(void)
 }
 
 JEMALLOC_INLINE void
-tcache_enabled_set(bool enabled)
-{
+tcache_enabled_set(bool enabled) {
 	tsd_t *tsd;
 	tcache_enabled_t tcache_enabled;
 
@@ -62,21 +59,23 @@ tcache_enabled_set(bool enabled)
 	tcache_enabled = (tcache_enabled_t)enabled;
 	tsd_tcache_enabled_set(tsd, tcache_enabled);
 
-	if (!enabled)
+	if (!enabled) {
 		tcache_cleanup(tsd);
+	}
 }
 
 JEMALLOC_ALWAYS_INLINE tcache_t *
-tcache_get(tsd_t *tsd, bool create)
-{
+tcache_get(tsd_t *tsd, bool create) {
 	tcache_t *tcache;
 
-	if (!config_tcache)
+	if (!config_tcache) {
 		return (NULL);
+	}
 
 	tcache = tsd_tcache_get(tsd);
-	if (!create)
+	if (!create) {
 		return (tcache);
+	}
 	if (unlikely(tcache == NULL) && tsd_nominal(tsd)) {
 		tcache = tcache_get_hard(tsd);
 		tsd_tcache_set(tsd, tcache);
@@ -86,18 +85,18 @@ tcache_get(tsd_t *tsd, bool create)
 }
 
 JEMALLOC_ALWAYS_INLINE void
-tcache_event(tsd_t *tsd, tcache_t *tcache)
-{
-	if (TCACHE_GC_INCR == 0)
+tcache_event(tsd_t *tsd, tcache_t *tcache) {
+	if (TCACHE_GC_INCR == 0) {
 		return;
+	}
 
-	if (unlikely(ticker_tick(&tcache->gc_ticker)))
+	if (unlikely(ticker_tick(&tcache->gc_ticker))) {
 		tcache_event_hard(tsd, tcache);
+	}
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success)
-{
+tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success) {
 	void *ret;
 
 	if (unlikely(tbin->ncached == 0)) {
@@ -116,16 +115,16 @@ tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success)
 	ret = *(tbin->avail - tbin->ncached);
 	tbin->ncached--;
 
-	if (unlikely((int)tbin->ncached < tbin->low_water))
+	if (unlikely((int)tbin->ncached < tbin->low_water)) {
 		tbin->low_water = tbin->ncached;
+	}
 
 	return (ret);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
 tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
-    szind_t binind, bool zero, bool slow_path)
-{
+    szind_t binind, bool zero, bool slow_path) {
 	void *ret;
 	tcache_bin_t *tbin;
 	bool tcache_success;
@@ -138,13 +137,15 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 	if (unlikely(!tcache_success)) {
 		bool tcache_hard_success;
 		arena = arena_choose(tsd, arena);
-		if (unlikely(arena == NULL))
+		if (unlikely(arena == NULL)) {
 			return (NULL);
+		}
 
 		ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache,
 		    tbin, binind, &tcache_hard_success);
-		if (tcache_hard_success == false)
+		if (tcache_hard_success == false) {
 			return (NULL);
+		}
 	}
 
 	assert(ret);
@@ -162,8 +163,9 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 			if (unlikely(opt_junk_alloc)) {
 				arena_alloc_junk_small(ret,
 				    &arena_bin_info[binind], false);
-			} else if (unlikely(opt_zero))
+			} else if (unlikely(opt_zero)) {
 				memset(ret, 0, usize);
+			}
 		}
 	} else {
 		if (slow_path && config_fill && unlikely(opt_junk_alloc)) {
@@ -173,18 +175,19 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 		memset(ret, 0, usize);
 	}
 
-	if (config_stats)
+	if (config_stats) {
 		tbin->tstats.nrequests++;
-	if (config_prof)
+	}
+	if (config_prof) {
 		tcache->prof_accumbytes += usize;
+	}
 	tcache_event(tsd, tcache);
 	return (ret);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
 tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
-    szind_t binind, bool zero, bool slow_path)
-{
+    szind_t binind, bool zero, bool slow_path) {
 	void *ret;
 	tcache_bin_t *tbin;
 	bool tcache_success;
@@ -199,12 +202,14 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 		 * expensive to create one and not use it.
 		 */
 		arena = arena_choose(tsd, arena);
-		if (unlikely(arena == NULL))
+		if (unlikely(arena == NULL)) {
 			return (NULL);
+		}
 
 		ret = large_malloc(tsd_tsdn(tsd), arena, s2u(size), zero);
-		if (ret == NULL)
+		if (ret == NULL) {
 			return (NULL);
+		}
 	} else {
 		size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 
@@ -220,16 +225,20 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 				if (unlikely(opt_junk_alloc)) {
 					memset(ret, JEMALLOC_ALLOC_JUNK,
 					    usize);
-				} else if (unlikely(opt_zero))
+				} else if (unlikely(opt_zero)) {
 					memset(ret, 0, usize);
+				}
 			}
-		} else
+		} else {
 			memset(ret, 0, usize);
+		}
 
-		if (config_stats)
+		if (config_stats) {
 			tbin->tstats.nrequests++;
-		if (config_prof)
+		}
+		if (config_prof) {
 			tcache->prof_accumbytes += usize;
+		}
 	}
 
 	tcache_event(tsd, tcache);
@@ -238,15 +247,15 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 
 JEMALLOC_ALWAYS_INLINE void
 tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
-    bool slow_path)
-{
+    bool slow_path) {
 	tcache_bin_t *tbin;
 	tcache_bin_info_t *tbin_info;
 
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SMALL_MAXCLASS);
 
-	if (slow_path && config_fill && unlikely(opt_junk_free))
+	if (slow_path && config_fill && unlikely(opt_junk_free)) {
 		arena_dalloc_junk_small(ptr, &arena_bin_info[binind]);
+	}
 
 	tbin = &tcache->tbins[binind];
 	tbin_info = &tcache_bin_info[binind];
@@ -263,8 +272,7 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 
 JEMALLOC_ALWAYS_INLINE void
 tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size,
-    bool slow_path)
-{
+    bool slow_path) {
 	szind_t binind;
 	tcache_bin_t *tbin;
 	tcache_bin_info_t *tbin_info;
@@ -274,8 +282,9 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size,
 
 	binind = size2index(size);
 
-	if (slow_path && config_fill && unlikely(opt_junk_free))
+	if (slow_path && config_fill && unlikely(opt_junk_free)) {
 		large_dalloc_junk(ptr, size);
+	}
 
 	tbin = &tcache->tbins[binind];
 	tbin_info = &tcache_bin_info[binind];
@@ -291,8 +300,7 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size,
 }
 
 JEMALLOC_ALWAYS_INLINE tcache_t *
-tcaches_get(tsd_t *tsd, unsigned ind)
-{
+tcaches_get(tsd_t *tsd, unsigned ind) {
 	tcaches_t *elm = &tcaches[ind];
 	if (unlikely(elm->tcache == NULL)) {
 		elm->tcache = tcache_create(tsd_tsdn(tsd), arena_choose(tsd,
diff --git a/include/jemalloc/internal/ticker_inlines.h b/include/jemalloc/internal/ticker_inlines.h
index 1a4395f3..6cc61343 100644
--- a/include/jemalloc/internal/ticker_inlines.h
+++ b/include/jemalloc/internal/ticker_inlines.h
@@ -11,27 +11,23 @@ bool	ticker_tick(ticker_t *ticker);
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TICKER_C_))
 JEMALLOC_INLINE void
-ticker_init(ticker_t *ticker, int32_t nticks)
-{
+ticker_init(ticker_t *ticker, int32_t nticks) {
 	ticker->tick = nticks;
 	ticker->nticks = nticks;
 }
 
 JEMALLOC_INLINE void
-ticker_copy(ticker_t *ticker, const ticker_t *other)
-{
+ticker_copy(ticker_t *ticker, const ticker_t *other) {
 	*ticker = *other;
 }
 
 JEMALLOC_INLINE int32_t
-ticker_read(const ticker_t *ticker)
-{
+ticker_read(const ticker_t *ticker) {
 	return (ticker->tick);
 }
 
 JEMALLOC_INLINE bool
-ticker_ticks(ticker_t *ticker, int32_t nticks)
-{
+ticker_ticks(ticker_t *ticker, int32_t nticks) {
 	if (unlikely(ticker->tick < nticks)) {
 		ticker->tick = ticker->nticks;
 		return (true);
@@ -41,8 +37,7 @@ ticker_ticks(ticker_t *ticker, int32_t nticks)
 }
 
 JEMALLOC_INLINE bool
-ticker_tick(ticker_t *ticker)
-{
+ticker_tick(ticker_t *ticker) {
 	return (ticker_ticks(ticker, 1));
 }
 #endif
diff --git a/include/jemalloc/internal/tsd_inlines.h b/include/jemalloc/internal/tsd_inlines.h
index 0df21ad6..2093d610 100644
--- a/include/jemalloc/internal/tsd_inlines.h
+++ b/include/jemalloc/internal/tsd_inlines.h
@@ -25,12 +25,12 @@ malloc_tsd_externs(, tsd_t)
 malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, , tsd_t, tsd_initializer, tsd_cleanup)
 
 JEMALLOC_ALWAYS_INLINE tsd_t *
-tsd_fetch_impl(bool init)
-{
+tsd_fetch_impl(bool init) {
 	tsd_t *tsd = tsd_get(init);
 
-	if (!init && tsd_get_allocates() && tsd == NULL)
+	if (!init && tsd_get_allocates() && tsd == NULL) {
 		return (NULL);
+	}
 	assert(tsd != NULL);
 
 	if (unlikely(tsd->state != tsd_state_nominal)) {
@@ -41,47 +41,42 @@ tsd_fetch_impl(bool init)
 		} else if (tsd->state == tsd_state_purgatory) {
 			tsd->state = tsd_state_reincarnated;
 			tsd_set(tsd);
-		} else
+		} else {
 			assert(tsd->state == tsd_state_reincarnated);
+		}
 	}
 
 	return (tsd);
 }
 
 JEMALLOC_ALWAYS_INLINE tsd_t *
-tsd_fetch(void)
-{
+tsd_fetch(void) {
 	return (tsd_fetch_impl(true));
 }
 
 JEMALLOC_ALWAYS_INLINE tsdn_t *
-tsd_tsdn(tsd_t *tsd)
-{
+tsd_tsdn(tsd_t *tsd) {
 	return ((tsdn_t *)tsd);
 }
 
 JEMALLOC_INLINE bool
-tsd_nominal(tsd_t *tsd)
-{
+tsd_nominal(tsd_t *tsd) {
 	return (tsd->state == tsd_state_nominal);
 }
 
 #define	O(n, t, c)							\
 JEMALLOC_ALWAYS_INLINE t *						\
-tsd_##n##p_get(tsd_t *tsd)						\
-{									\
+tsd_##n##p_get(tsd_t *tsd) {						\
 	return (&tsd->n);						\
 }									\
 									\
 JEMALLOC_ALWAYS_INLINE t						\
-tsd_##n##_get(tsd_t *tsd)						\
-{									\
+tsd_##n##_get(tsd_t *tsd) {						\
 	return (*tsd_##n##p_get(tsd));					\
 }									\
 									\
 JEMALLOC_ALWAYS_INLINE void						\
-tsd_##n##_set(tsd_t *tsd, t n)						\
-{									\
+tsd_##n##_set(tsd_t *tsd, t n) {					\
 	assert(tsd->state == tsd_state_nominal);			\
 	tsd->n = n;							\
 }
@@ -89,31 +84,28 @@ MALLOC_TSD
 #undef O
 
 JEMALLOC_ALWAYS_INLINE tsdn_t *
-tsdn_fetch(void)
-{
-	if (!tsd_booted_get())
+tsdn_fetch(void) {
+	if (!tsd_booted_get()) {
 		return (NULL);
+	}
 
 	return (tsd_tsdn(tsd_fetch_impl(false)));
 }
 
 JEMALLOC_ALWAYS_INLINE bool
-tsdn_null(const tsdn_t *tsdn)
-{
+tsdn_null(const tsdn_t *tsdn) {
 	return (tsdn == NULL);
 }
 
 JEMALLOC_ALWAYS_INLINE tsd_t *
-tsdn_tsd(tsdn_t *tsdn)
-{
+tsdn_tsd(tsdn_t *tsdn) {
 	assert(!tsdn_null(tsdn));
 
 	return (&tsdn->tsd);
 }
 
 JEMALLOC_ALWAYS_INLINE rtree_ctx_t *
-tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback)
-{
+tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback) {
 	/*
 	 * If tsd cannot be accessed, initialize the fallback rtree_ctx and
 	 * return a pointer to it.
diff --git a/include/jemalloc/internal/tsd_types.h b/include/jemalloc/internal/tsd_types.h
index 17e3da9f..ec40d9a7 100644
--- a/include/jemalloc/internal/tsd_types.h
+++ b/include/jemalloc/internal/tsd_types.h
@@ -175,8 +175,7 @@ a_attr bool		a_name##tsd_booted = false;
     a_cleanup)								\
 /* Initialization/cleanup. */						\
 a_attr bool								\
-a_name##tsd_cleanup_wrapper(void)					\
-{									\
+a_name##tsd_cleanup_wrapper(void) {					\
 	if (a_name##tsd_initialized) {					\
 		a_name##tsd_initialized = false;			\
 		a_cleanup(&a_name##tsd_tls);				\
@@ -184,8 +183,7 @@ a_name##tsd_cleanup_wrapper(void)					\
 	return (a_name##tsd_initialized);				\
 }									\
 a_attr bool								\
-a_name##tsd_boot0(void)							\
-{									\
+a_name##tsd_boot0(void) {						\
 	if (a_cleanup != malloc_tsd_no_cleanup) {			\
 		malloc_tsd_cleanup_register(				\
 		    &a_name##tsd_cleanup_wrapper);			\
@@ -194,96 +192,88 @@ a_name##tsd_boot0(void)							\
 	return (false);							\
 }									\
 a_attr void								\
-a_name##tsd_boot1(void)							\
-{									\
+a_name##tsd_boot1(void) {						\
 	/* Do nothing. */						\
 }									\
 a_attr bool								\
-a_name##tsd_boot(void)							\
-{									\
+a_name##tsd_boot(void) {						\
 	return (a_name##tsd_boot0());					\
 }									\
 a_attr bool								\
-a_name##tsd_booted_get(void)						\
-{									\
+a_name##tsd_booted_get(void) {						\
 	return (a_name##tsd_booted);					\
 }									\
 a_attr bool								\
-a_name##tsd_get_allocates(void)						\
-{									\
+a_name##tsd_get_allocates(void) {					\
 	return (false);							\
 }									\
 /* Get/set. */								\
 a_attr a_type *								\
-a_name##tsd_get(bool init)						\
-{									\
+a_name##tsd_get(bool init) {						\
 	assert(a_name##tsd_booted);					\
 	return (&a_name##tsd_tls);					\
 }									\
 a_attr void								\
-a_name##tsd_set(a_type *val)						\
-{									\
+a_name##tsd_set(a_type *val) {						\
 	assert(a_name##tsd_booted);					\
-	if (likely(&a_name##tsd_tls != val))				\
+	if (likely(&a_name##tsd_tls != val)) {				\
 		a_name##tsd_tls = (*val);				\
-	if (a_cleanup != malloc_tsd_no_cleanup)				\
+	}								\
+	if (a_cleanup != malloc_tsd_no_cleanup) {			\
 		a_name##tsd_initialized = true;				\
+	}								\
 }
 #elif (defined(JEMALLOC_TLS))
 #define	malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer,		\
     a_cleanup)								\
 /* Initialization/cleanup. */						\
 a_attr bool								\
-a_name##tsd_boot0(void)							\
-{									\
+a_name##tsd_boot0(void) {						\
 	if (a_cleanup != malloc_tsd_no_cleanup) {			\
 		if (pthread_key_create(&a_name##tsd_tsd, a_cleanup) !=	\
-		    0)							\
+		    0) {						\
 			return (true);					\
+		}							\
 	}								\
 	a_name##tsd_booted = true;					\
 	return (false);							\
 }									\
 a_attr void								\
-a_name##tsd_boot1(void)							\
-{									\
+a_name##tsd_boot1(void) {						\
 	/* Do nothing. */						\
 }									\
 a_attr bool								\
-a_name##tsd_boot(void)							\
-{									\
+a_name##tsd_boot(void) {						\
 	return (a_name##tsd_boot0());					\
 }									\
 a_attr bool								\
-a_name##tsd_booted_get(void)						\
-{									\
+a_name##tsd_booted_get(void) {						\
 	return (a_name##tsd_booted);					\
 }									\
 a_attr bool								\
-a_name##tsd_get_allocates(void)						\
-{									\
+a_name##tsd_get_allocates(void) {					\
 	return (false);							\
 }									\
 /* Get/set. */								\
 a_attr a_type *								\
-a_name##tsd_get(bool init)						\
-{									\
+a_name##tsd_get(bool init) {						\
 	assert(a_name##tsd_booted);					\
 	return (&a_name##tsd_tls);					\
 }									\
 a_attr void								\
-a_name##tsd_set(a_type *val)						\
-{									\
+a_name##tsd_set(a_type *val) {						\
 	assert(a_name##tsd_booted);					\
-	if (likely(&a_name##tsd_tls != val))				\
+	if (likely(&a_name##tsd_tls != val)) {				\
 		a_name##tsd_tls = (*val);				\
+	}								\
 	if (a_cleanup != malloc_tsd_no_cleanup) {			\
 		if (pthread_setspecific(a_name##tsd_tsd,		\
 		    (void *)(&a_name##tsd_tls))) {			\
 			malloc_write("<jemalloc>: Error"		\
 			    " setting TSD for "#a_name"\n");		\
-			if (opt_abort)					\
+			if (opt_abort) {				\
 				abort();				\
+			}						\
 		}							\
 	}								\
 }
@@ -292,15 +282,15 @@ a_name##tsd_set(a_type *val)						\
     a_cleanup)								\
 /* Initialization/cleanup. */						\
 a_attr bool								\
-a_name##tsd_cleanup_wrapper(void)					\
-{									\
+a_name##tsd_cleanup_wrapper(void) {					\
 	DWORD error = GetLastError();					\
 	a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *)	\
 	    TlsGetValue(a_name##tsd_tsd);				\
 	SetLastError(error);						\
 									\
-	if (wrapper == NULL)						\
+	if (wrapper == NULL) {						\
 		return (false);						\
+	}								\
 	if (a_cleanup != malloc_tsd_no_cleanup &&			\
 	    wrapper->initialized) {					\
 		wrapper->initialized = false;				\
@@ -314,8 +304,7 @@ a_name##tsd_cleanup_wrapper(void)					\
 	return (false);							\
 }									\
 a_attr void								\
-a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper)			\
-{									\
+a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) {		\
 	if (!TlsSetValue(a_name##tsd_tsd, (void *)wrapper)) {		\
 		malloc_write("<jemalloc>: Error setting"		\
 		    " TSD for "#a_name"\n");				\
@@ -323,8 +312,7 @@ a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper)			\
 	}								\
 }									\
 a_attr a_name##tsd_wrapper_t *						\
-a_name##tsd_wrapper_get(bool init)					\
-{									\
+a_name##tsd_wrapper_get(bool init) {					\
 	DWORD error = GetLastError();					\
 	a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *)	\
 	    TlsGetValue(a_name##tsd_tsd);				\
@@ -346,11 +334,11 @@ a_name##tsd_wrapper_get(bool init)					\
 	return (wrapper);						\
 }									\
 a_attr bool								\
-a_name##tsd_boot0(void)							\
-{									\
+a_name##tsd_boot0(void) {						\
 	a_name##tsd_tsd = TlsAlloc();					\
-	if (a_name##tsd_tsd == TLS_OUT_OF_INDEXES)			\
+	if (a_name##tsd_tsd == TLS_OUT_OF_INDEXES) {			\
 		return (true);						\
+	}								\
 	if (a_cleanup != malloc_tsd_no_cleanup) {			\
 		malloc_tsd_cleanup_register(				\
 		    &a_name##tsd_cleanup_wrapper);			\
@@ -360,8 +348,7 @@ a_name##tsd_boot0(void)							\
 	return (false);							\
 }									\
 a_attr void								\
-a_name##tsd_boot1(void)							\
-{									\
+a_name##tsd_boot1(void) {						\
 	a_name##tsd_wrapper_t *wrapper;					\
 	wrapper = (a_name##tsd_wrapper_t *)				\
 	    malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t));		\
@@ -375,54 +362,52 @@ a_name##tsd_boot1(void)							\
 	a_name##tsd_wrapper_set(wrapper);				\
 }									\
 a_attr bool								\
-a_name##tsd_boot(void)							\
-{									\
-	if (a_name##tsd_boot0())					\
+a_name##tsd_boot(void) {						\
+	if (a_name##tsd_boot0()) {					\
 		return (true);						\
+	}								\
 	a_name##tsd_boot1();						\
 	return (false);							\
 }									\
 a_attr bool								\
-a_name##tsd_booted_get(void)						\
-{									\
+a_name##tsd_booted_get(void) {						\
 	return (a_name##tsd_booted);					\
 }									\
 a_attr bool								\
-a_name##tsd_get_allocates(void)						\
-{									\
+a_name##tsd_get_allocates(void) {					\
 	return (true);							\
 }									\
 /* Get/set. */								\
 a_attr a_type *								\
-a_name##tsd_get(bool init)						\
-{									\
+a_name##tsd_get(bool init) {						\
 	a_name##tsd_wrapper_t *wrapper;					\
 									\
 	assert(a_name##tsd_booted);					\
 	wrapper = a_name##tsd_wrapper_get(init);			\
-	if (a_name##tsd_get_allocates() && !init && wrapper == NULL)	\
+	if (a_name##tsd_get_allocates() && !init && wrapper == NULL) {	\
 		return (NULL);						\
+	}								\
 	return (&wrapper->val);						\
 }									\
 a_attr void								\
-a_name##tsd_set(a_type *val)						\
-{									\
+a_name##tsd_set(a_type *val) {						\
 	a_name##tsd_wrapper_t *wrapper;					\
 									\
 	assert(a_name##tsd_booted);					\
 	wrapper = a_name##tsd_wrapper_get(true);			\
-	if (likely(&wrapper->val != val))				\
+	if (likely(&wrapper->val != val)) {				\
 		wrapper->val = *(val);					\
-	if (a_cleanup != malloc_tsd_no_cleanup)				\
+	}								\
+	if (a_cleanup != malloc_tsd_no_cleanup) {			\
 		wrapper->initialized = true;				\
+	}								\
 }
 #else
 #define	malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer,		\
     a_cleanup)								\
 /* Initialization/cleanup. */						\
 a_attr void								\
-a_name##tsd_cleanup_wrapper(void *arg)					\
-{									\
+a_name##tsd_cleanup_wrapper(void *arg) {				\
 	a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *)arg;	\
 									\
 	if (a_cleanup != malloc_tsd_no_cleanup &&			\
@@ -435,8 +420,9 @@ a_name##tsd_cleanup_wrapper(void *arg)					\
 			    (void *)wrapper)) {				\
 				malloc_write("<jemalloc>: Error"	\
 				    " setting TSD for "#a_name"\n");	\
-				if (opt_abort)				\
+				if (opt_abort) {			\
 					abort();			\
+				}					\
 			}						\
 			return;						\
 		}							\
@@ -444,8 +430,7 @@ a_name##tsd_cleanup_wrapper(void *arg)					\
 	malloc_tsd_dalloc(wrapper);					\
 }									\
 a_attr void								\
-a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper)			\
-{									\
+a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) {		\
 	if (pthread_setspecific(a_name##tsd_tsd,			\
 	    (void *)wrapper)) {						\
 		malloc_write("<jemalloc>: Error setting"		\
@@ -454,8 +439,7 @@ a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper)			\
 	}								\
 }									\
 a_attr a_name##tsd_wrapper_t *						\
-a_name##tsd_wrapper_get(bool init)					\
-{									\
+a_name##tsd_wrapper_get(bool init) {					\
 	a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *)	\
 	    pthread_getspecific(a_name##tsd_tsd);			\
 									\
@@ -464,8 +448,9 @@ a_name##tsd_wrapper_get(bool init)					\
 		wrapper = (a_name##tsd_wrapper_t *)			\
 		    tsd_init_check_recursion(&a_name##tsd_init_head,	\
 		    &block);						\
-		if (wrapper)						\
-		    return (wrapper);					\
+		if (wrapper) {						\
+			return (wrapper);				\
+		}							\
 		wrapper = (a_name##tsd_wrapper_t *)			\
 		    malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t));	\
 		block.data = (void *)wrapper;				\
@@ -483,18 +468,17 @@ a_name##tsd_wrapper_get(bool init)					\
 	return (wrapper);						\
 }									\
 a_attr bool								\
-a_name##tsd_boot0(void)							\
-{									\
+a_name##tsd_boot0(void) {						\
 	if (pthread_key_create(&a_name##tsd_tsd,			\
-	    a_name##tsd_cleanup_wrapper) != 0)				\
+	    a_name##tsd_cleanup_wrapper) != 0) {			\
 		return (true);						\
+	}								\
 	a_name##tsd_wrapper_set(&a_name##tsd_boot_wrapper);		\
 	a_name##tsd_booted = true;					\
 	return (false);							\
 }									\
 a_attr void								\
-a_name##tsd_boot1(void)							\
-{									\
+a_name##tsd_boot1(void) {						\
 	a_name##tsd_wrapper_t *wrapper;					\
 	wrapper = (a_name##tsd_wrapper_t *)				\
 	    malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t));		\
@@ -508,46 +492,45 @@ a_name##tsd_boot1(void)							\
 	a_name##tsd_wrapper_set(wrapper);				\
 }									\
 a_attr bool								\
-a_name##tsd_boot(void)							\
-{									\
-	if (a_name##tsd_boot0())					\
+a_name##tsd_boot(void) {						\
+	if (a_name##tsd_boot0()) {					\
 		return (true);						\
+	}								\
 	a_name##tsd_boot1();						\
 	return (false);							\
 }									\
 a_attr bool								\
-a_name##tsd_booted_get(void)						\
-{									\
+a_name##tsd_booted_get(void) {						\
 	return (a_name##tsd_booted);					\
 }									\
 a_attr bool								\
-a_name##tsd_get_allocates(void)						\
-{									\
+a_name##tsd_get_allocates(void) {					\
 	return (true);							\
 }									\
 /* Get/set. */								\
 a_attr a_type *								\
-a_name##tsd_get(bool init)						\
-{									\
+a_name##tsd_get(bool init) {						\
 	a_name##tsd_wrapper_t *wrapper;					\
 									\
 	assert(a_name##tsd_booted);					\
 	wrapper = a_name##tsd_wrapper_get(init);			\
-	if (a_name##tsd_get_allocates() && !init && wrapper == NULL)	\
+	if (a_name##tsd_get_allocates() && !init && wrapper == NULL) {	\
 		return (NULL);						\
+	}								\
 	return (&wrapper->val);						\
 }									\
 a_attr void								\
-a_name##tsd_set(a_type *val)						\
-{									\
+a_name##tsd_set(a_type *val) {						\
 	a_name##tsd_wrapper_t *wrapper;					\
 									\
 	assert(a_name##tsd_booted);					\
 	wrapper = a_name##tsd_wrapper_get(true);			\
-	if (likely(&wrapper->val != val))				\
+	if (likely(&wrapper->val != val)) {				\
 		wrapper->val = *(val);					\
-	if (a_cleanup != malloc_tsd_no_cleanup)				\
+	}								\
+	if (a_cleanup != malloc_tsd_no_cleanup) {			\
 		wrapper->initialized = true;				\
+	}								\
 }
 #endif
 
diff --git a/include/jemalloc/internal/util_inlines.h b/include/jemalloc/internal/util_inlines.h
index 4ceed06b..271673ae 100644
--- a/include/jemalloc/internal/util_inlines.h
+++ b/include/jemalloc/internal/util_inlines.h
@@ -25,26 +25,22 @@ int	get_errno(void);
 #endif
 
 JEMALLOC_ALWAYS_INLINE unsigned
-ffs_llu(unsigned long long bitmap)
-{
+ffs_llu(unsigned long long bitmap) {
 	return (JEMALLOC_INTERNAL_FFSLL(bitmap));
 }
 
 JEMALLOC_ALWAYS_INLINE unsigned
-ffs_lu(unsigned long bitmap)
-{
+ffs_lu(unsigned long bitmap) {
 	return (JEMALLOC_INTERNAL_FFSL(bitmap));
 }
 
 JEMALLOC_ALWAYS_INLINE unsigned
-ffs_u(unsigned bitmap)
-{
+ffs_u(unsigned bitmap) {
 	return (JEMALLOC_INTERNAL_FFS(bitmap));
 }
 
 JEMALLOC_ALWAYS_INLINE unsigned
-ffs_zu(size_t bitmap)
-{
+ffs_zu(size_t bitmap) {
 #if LG_SIZEOF_PTR == LG_SIZEOF_INT
 	return (ffs_u(bitmap));
 #elif LG_SIZEOF_PTR == LG_SIZEOF_LONG
@@ -57,8 +53,7 @@ ffs_zu(size_t bitmap)
 }
 
 JEMALLOC_ALWAYS_INLINE unsigned
-ffs_u64(uint64_t bitmap)
-{
+ffs_u64(uint64_t bitmap) {
 #if LG_SIZEOF_LONG == 3
 	return (ffs_lu(bitmap));
 #elif LG_SIZEOF_LONG_LONG == 3
@@ -69,8 +64,7 @@ ffs_u64(uint64_t bitmap)
 }
 
 JEMALLOC_ALWAYS_INLINE unsigned
-ffs_u32(uint32_t bitmap)
-{
+ffs_u32(uint32_t bitmap) {
 #if LG_SIZEOF_INT == 2
 	return (ffs_u(bitmap));
 #else
@@ -80,8 +74,7 @@ ffs_u32(uint32_t bitmap)
 }
 
 JEMALLOC_INLINE uint64_t
-pow2_ceil_u64(uint64_t x)
-{
+pow2_ceil_u64(uint64_t x) {
 	x--;
 	x |= x >> 1;
 	x |= x >> 2;
@@ -94,8 +87,7 @@ pow2_ceil_u64(uint64_t x)
 }
 
 JEMALLOC_INLINE uint32_t
-pow2_ceil_u32(uint32_t x)
-{
+pow2_ceil_u32(uint32_t x) {
 	x--;
 	x |= x >> 1;
 	x |= x >> 2;
@@ -108,8 +100,7 @@ pow2_ceil_u32(uint32_t x)
 
 /* Compute the smallest power of 2 that is >= x. */
 JEMALLOC_INLINE size_t
-pow2_ceil_zu(size_t x)
-{
+pow2_ceil_zu(size_t x) {
 #if (LG_SIZEOF_PTR == 3)
 	return (pow2_ceil_u64(x));
 #else
@@ -119,8 +110,7 @@ pow2_ceil_zu(size_t x)
 
 #if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
 JEMALLOC_INLINE unsigned
-lg_floor(size_t x)
-{
+lg_floor(size_t x) {
 	size_t ret;
 
 	assert(x != 0);
@@ -134,8 +124,7 @@ lg_floor(size_t x)
 }
 #elif (defined(_MSC_VER))
 JEMALLOC_INLINE unsigned
-lg_floor(size_t x)
-{
+lg_floor(size_t x) {
 	unsigned long ret;
 
 	assert(x != 0);
@@ -152,8 +141,7 @@ lg_floor(size_t x)
 }
 #elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ))
 JEMALLOC_INLINE unsigned
-lg_floor(size_t x)
-{
+lg_floor(size_t x) {
 	assert(x != 0);
 
 #if (LG_SIZEOF_PTR == LG_SIZEOF_INT)
@@ -166,8 +154,7 @@ lg_floor(size_t x)
 }
 #else
 JEMALLOC_INLINE unsigned
-lg_floor(size_t x)
-{
+lg_floor(size_t x) {
 	assert(x != 0);
 
 	x |= (x >> 1);
@@ -178,8 +165,9 @@ lg_floor(size_t x)
 #if (LG_SIZEOF_PTR == 3)
 	x |= (x >> 32);
 #endif
-	if (x == SIZE_T_MAX)
+	if (x == SIZE_T_MAX) {
 		return ((8 << LG_SIZEOF_PTR) - 1);
+	}
 	x++;
 	return (ffs_zu(x) - 2);
 }
@@ -187,8 +175,7 @@ lg_floor(size_t x)
 
 /* Set error code. */
 JEMALLOC_INLINE void
-set_errno(int errnum)
-{
+set_errno(int errnum) {
 #ifdef _WIN32
 	SetLastError(errnum);
 #else
@@ -198,8 +185,7 @@ set_errno(int errnum)
 
 /* Get last error code. */
 JEMALLOC_INLINE int
-get_errno(void)
-{
+get_errno(void) {
 #ifdef _WIN32
 	return (GetLastError());
 #else
diff --git a/include/jemalloc/internal/util_types.h b/include/jemalloc/internal/util_types.h
index 7f727993..4fe206bc 100644
--- a/include/jemalloc/internal/util_types.h
+++ b/include/jemalloc/internal/util_types.h
@@ -87,8 +87,9 @@
 
 /* Use to assert a particular configuration, e.g., cassert(config_debug). */
 #define	cassert(c) do {							\
-	if (unlikely(!(c)))						\
+	if (unlikely(!(c))) {						\
 		not_reached();						\
+	}								\
 } while (0)
 
 #endif /* JEMALLOC_INTERNAL_UTIL_TYPES_H */
diff --git a/include/jemalloc/internal/witness_inlines.h b/include/jemalloc/internal/witness_inlines.h
index 259aa2e5..2e5ebccc 100644
--- a/include/jemalloc/internal/witness_inlines.h
+++ b/include/jemalloc/internal/witness_inlines.h
@@ -13,8 +13,7 @@ void	witness_unlock(tsdn_t *tsdn, witness_t *witness);
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_))
 /* Helper, not intended for direct use. */
 JEMALLOC_INLINE bool
-witness_owner(tsd_t *tsd, const witness_t *witness)
-{
+witness_owner(tsd_t *tsd, const witness_t *witness) {
 	witness_list_t *witnesses;
 	witness_t *w;
 
@@ -22,90 +21,101 @@ witness_owner(tsd_t *tsd, const witness_t *witness)
 
 	witnesses = tsd_witnessesp_get(tsd);
 	ql_foreach(w, witnesses, link) {
-		if (w == witness)
+		if (w == witness) {
 			return (true);
+		}
 	}
 
 	return (false);
 }
 
 JEMALLOC_INLINE void
-witness_assert_owner(tsdn_t *tsdn, const witness_t *witness)
-{
+witness_assert_owner(tsdn_t *tsdn, const witness_t *witness) {
 	tsd_t *tsd;
 
-	if (!config_debug)
+	if (!config_debug) {
 		return;
+	}
 
-	if (tsdn_null(tsdn))
+	if (tsdn_null(tsdn)) {
 		return;
+	}
 	tsd = tsdn_tsd(tsdn);
-	if (witness->rank == WITNESS_RANK_OMIT)
+	if (witness->rank == WITNESS_RANK_OMIT) {
 		return;
+	}
 
-	if (witness_owner(tsd, witness))
+	if (witness_owner(tsd, witness)) {
 		return;
+	}
 	witness_owner_error(witness);
 }
 
 JEMALLOC_INLINE void
-witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness)
-{
+witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness) {
 	tsd_t *tsd;
 	witness_list_t *witnesses;
 	witness_t *w;
 
-	if (!config_debug)
+	if (!config_debug) {
 		return;
+	}
 
-	if (tsdn_null(tsdn))
+	if (tsdn_null(tsdn)) {
 		return;
+	}
 	tsd = tsdn_tsd(tsdn);
-	if (witness->rank == WITNESS_RANK_OMIT)
+	if (witness->rank == WITNESS_RANK_OMIT) {
 		return;
+	}
 
 	witnesses = tsd_witnessesp_get(tsd);
 	ql_foreach(w, witnesses, link) {
-		if (w == witness)
+		if (w == witness) {
 			witness_not_owner_error(witness);
+		}
 	}
 }
 
 JEMALLOC_INLINE void
-witness_assert_lockless(tsdn_t *tsdn)
-{
+witness_assert_lockless(tsdn_t *tsdn) {
 	tsd_t *tsd;
 	witness_list_t *witnesses;
 	witness_t *w;
 
-	if (!config_debug)
+	if (!config_debug) {
 		return;
+	}
 
-	if (tsdn_null(tsdn))
+	if (tsdn_null(tsdn)) {
 		return;
+	}
 	tsd = tsdn_tsd(tsdn);
 
 	witnesses = tsd_witnessesp_get(tsd);
 	w = ql_last(witnesses, link);
-	if (w != NULL)
+	if (w != NULL) {
 		witness_lockless_error(witnesses);
+	}
 }
 
 JEMALLOC_INLINE void
-witness_lock(tsdn_t *tsdn, witness_t *witness)
-{
+witness_lock(tsdn_t *tsdn, witness_t *witness) {
 	tsd_t *tsd;
 	witness_list_t *witnesses;
 	witness_t *w;
 
-	if (!config_debug)
+	if (!config_debug) {
 		return;
+	}
 
-	if (tsdn_null(tsdn))
+	if (tsdn_null(tsdn)) {
 		return;
+	}
 	tsd = tsdn_tsd(tsdn);
-	if (witness->rank == WITNESS_RANK_OMIT)
+	if (witness->rank == WITNESS_RANK_OMIT) {
 		return;
+	}
 
 	witness_assert_not_owner(tsdn, witness);
 
@@ -133,19 +143,21 @@ witness_lock(tsdn_t *tsdn, witness_t *witness)
 }
 
 JEMALLOC_INLINE void
-witness_unlock(tsdn_t *tsdn, witness_t *witness)
-{
+witness_unlock(tsdn_t *tsdn, witness_t *witness) {
 	tsd_t *tsd;
 	witness_list_t *witnesses;
 
-	if (!config_debug)
+	if (!config_debug) {
 		return;
+	}
 
-	if (tsdn_null(tsdn))
+	if (tsdn_null(tsdn)) {
 		return;
+	}
 	tsd = tsdn_tsd(tsdn);
-	if (witness->rank == WITNESS_RANK_OMIT)
+	if (witness->rank == WITNESS_RANK_OMIT) {
 		return;
+	}
 
 	/*
 	 * Check whether owner before removal, rather than relying on
@@ -155,8 +167,9 @@ witness_unlock(tsdn_t *tsdn, witness_t *witness)
 	if (witness_owner(tsd, witness)) {
 		witnesses = tsd_witnessesp_get(tsd);
 		ql_remove(witnesses, witness, link);
-	} else
+	} else {
 		witness_assert_owner(tsdn, witness);
+	}
 }
 #endif
 
diff --git a/include/msvc_compat/strings.h b/include/msvc_compat/strings.h
index 47998be2..971b36d4 100644
--- a/include/msvc_compat/strings.h
+++ b/include/msvc_compat/strings.h
@@ -6,17 +6,16 @@
 #ifdef _MSC_VER
 #  include <intrin.h>
 #  pragma intrinsic(_BitScanForward)
-static __forceinline int ffsl(long x)
-{
+static __forceinline int ffsl(long x) {
 	unsigned long i;
 
-	if (_BitScanForward(&i, x))
+	if (_BitScanForward(&i, x)) {
 		return (i + 1);
+	}
 	return (0);
 }
 
-static __forceinline int ffs(int x)
-{
+static __forceinline int ffs(int x) {
 	return (ffsl(x));
 }
 
@@ -24,12 +23,12 @@ static __forceinline int ffs(int x)
 #    pragma intrinsic(_BitScanForward64)
 #  endif
 
-static __forceinline int ffsll(unsigned __int64 x)
-{
+static __forceinline int ffsll(unsigned __int64 x) {
 	unsigned long i;
 #ifdef  _M_X64
-	if (_BitScanForward64(&i, x))
+	if (_BitScanForward64(&i, x)) {
 		return (i + 1);
+	}
 	return (0);
 #else
 // Fallback for 32-bit build where 64-bit version not available
@@ -41,10 +40,11 @@ static __forceinline int ffsll(unsigned __int64 x)
 
 	s.ll = x;
 
-	if (_BitScanForward(&i, s.l[0]))
+	if (_BitScanForward(&i, s.l[0])) {
 		return (i + 1);
-	else if(_BitScanForward(&i, s.l[1]))
+	} else if(_BitScanForward(&i, s.l[1])) {
 		return (i + 33);
+	}
 	return (0);
 #endif
 }
diff --git a/msvc/projects/vc2015/test_threads/test_threads.cpp b/msvc/projects/vc2015/test_threads/test_threads.cpp
index a3d1a792..92e31624 100644
--- a/msvc/projects/vc2015/test_threads/test_threads.cpp
+++ b/msvc/projects/vc2015/test_threads/test_threads.cpp
@@ -16,8 +16,7 @@ using std::thread;
 using std::uniform_int_distribution;
 using std::minstd_rand;
 
-int test_threads()
-{
+int test_threads() {
   je_malloc_conf = "narenas:3";
   int narenas = 0;
   size_t sz = sizeof(narenas);
diff --git a/msvc/projects/vc2015/test_threads/test_threads_main.cpp b/msvc/projects/vc2015/test_threads/test_threads_main.cpp
index ffd96e6a..0a022fba 100644
--- a/msvc/projects/vc2015/test_threads/test_threads_main.cpp
+++ b/msvc/projects/vc2015/test_threads/test_threads_main.cpp
@@ -5,8 +5,7 @@
 
 using namespace std::chrono_literals;
 
-int main(int argc, char** argv)
-{
+int main(int argc, char** argv) {
   int rc = test_threads();
   return rc;
 }
diff --git a/src/arena.c b/src/arena.c
index 7362c4e6..5cf9bd07 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -38,16 +38,14 @@ static void	arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena,
 /******************************************************************************/
 
 static size_t
-arena_extent_dirty_npages(const extent_t *extent)
-{
+arena_extent_dirty_npages(const extent_t *extent) {
 	return (extent_size_get(extent) >> LG_PAGE);
 }
 
 static extent_t *
 arena_extent_cache_alloc_locked(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
-    size_t alignment, bool *zero, bool slab)
-{
+    size_t alignment, bool *zero, bool slab) {
 	bool commit = true;
 
 	malloc_mutex_assert_owner(tsdn, &arena->lock);
@@ -59,8 +57,7 @@ arena_extent_cache_alloc_locked(tsdn_t *tsdn, arena_t *arena,
 extent_t *
 arena_extent_cache_alloc(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, void *new_addr, size_t size,
-    size_t alignment, bool *zero)
-{
+    size_t alignment, bool *zero) {
 	extent_t *extent;
 
 	malloc_mutex_lock(tsdn, &arena->lock);
@@ -73,8 +70,7 @@ arena_extent_cache_alloc(tsdn_t *tsdn, arena_t *arena,
 
 static void
 arena_extent_cache_dalloc_locked(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent)
-{
+    extent_hooks_t **r_extent_hooks, extent_t *extent) {
 	malloc_mutex_assert_owner(tsdn, &arena->lock);
 
 	extent_dalloc_cache(tsdn, arena, r_extent_hooks, extent);
@@ -83,8 +79,7 @@ arena_extent_cache_dalloc_locked(tsdn_t *tsdn, arena_t *arena,
 
 void
 arena_extent_cache_dalloc(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent)
-{
+    extent_hooks_t **r_extent_hooks, extent_t *extent) {
 	malloc_mutex_lock(tsdn, &arena->lock);
 	arena_extent_cache_dalloc_locked(tsdn, arena, r_extent_hooks, extent);
 	malloc_mutex_unlock(tsdn, &arena->lock);
@@ -92,8 +87,7 @@ arena_extent_cache_dalloc(tsdn_t *tsdn, arena_t *arena,
 
 void
 arena_extent_cache_maybe_insert(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
-    bool cache)
-{
+    bool cache) {
 	malloc_mutex_assert_owner(tsdn, &arena->extents_mtx);
 
 	if (cache) {
@@ -104,8 +98,7 @@ arena_extent_cache_maybe_insert(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 
 void
 arena_extent_cache_maybe_remove(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
-    bool dirty)
-{
+    bool dirty) {
 	malloc_mutex_assert_owner(tsdn, &arena->extents_mtx);
 
 	if (dirty) {
@@ -117,8 +110,7 @@ arena_extent_cache_maybe_remove(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 
 JEMALLOC_INLINE_C void *
 arena_slab_reg_alloc(tsdn_t *tsdn, extent_t *slab,
-    const arena_bin_info_t *bin_info)
-{
+    const arena_bin_info_t *bin_info) {
 	void *ret;
 	arena_slab_data_t *slab_data = extent_slab_data_get(slab);
 	size_t regind;
@@ -137,8 +129,7 @@ arena_slab_reg_alloc(tsdn_t *tsdn, extent_t *slab,
 JEMALLOC_INLINE_C
 #endif
 size_t
-arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr)
-{
+arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr) {
 	size_t diff, regind;
 
 	/* Freeing a pointer outside the slab can cause assertion failure. */
@@ -174,8 +165,7 @@ arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr)
 
 JEMALLOC_INLINE_C void
 arena_slab_reg_dalloc(tsdn_t *tsdn, extent_t *slab,
-    arena_slab_data_t *slab_data, void *ptr)
-{
+    arena_slab_data_t *slab_data, void *ptr) {
 	szind_t binind = slab_data->binind;
 	const arena_bin_info_t *bin_info = &arena_bin_info[binind];
 	size_t regind = arena_slab_regind(slab, binind, ptr);
@@ -189,27 +179,25 @@ arena_slab_reg_dalloc(tsdn_t *tsdn, extent_t *slab,
 }
 
 static void
-arena_nactive_add(arena_t *arena, size_t add_pages)
-{
+arena_nactive_add(arena_t *arena, size_t add_pages) {
 	arena->nactive += add_pages;
 }
 
 static void
-arena_nactive_sub(arena_t *arena, size_t sub_pages)
-{
+arena_nactive_sub(arena_t *arena, size_t sub_pages) {
 	assert(arena->nactive >= sub_pages);
 	arena->nactive -= sub_pages;
 }
 
 static void
-arena_large_malloc_stats_update(arena_t *arena, size_t usize)
-{
+arena_large_malloc_stats_update(arena_t *arena, size_t usize) {
 	szind_t index, hindex;
 
 	cassert(config_stats);
 
-	if (usize < LARGE_MINCLASS)
+	if (usize < LARGE_MINCLASS) {
 		usize = LARGE_MINCLASS;
+	}
 	index = size2index(usize);
 	hindex = (index >= NBINS) ? index - NBINS : 0;
 
@@ -221,14 +209,14 @@ arena_large_malloc_stats_update(arena_t *arena, size_t usize)
 }
 
 static void
-arena_large_malloc_stats_update_undo(arena_t *arena, size_t usize)
-{
+arena_large_malloc_stats_update_undo(arena_t *arena, size_t usize) {
 	szind_t index, hindex;
 
 	cassert(config_stats);
 
-	if (usize < LARGE_MINCLASS)
+	if (usize < LARGE_MINCLASS) {
 		usize = LARGE_MINCLASS;
+	}
 	index = size2index(usize);
 	hindex = (index >= NBINS) ? index - NBINS : 0;
 
@@ -240,14 +228,14 @@ arena_large_malloc_stats_update_undo(arena_t *arena, size_t usize)
 }
 
 static void
-arena_large_dalloc_stats_update(arena_t *arena, size_t usize)
-{
+arena_large_dalloc_stats_update(arena_t *arena, size_t usize) {
 	szind_t index, hindex;
 
 	cassert(config_stats);
 
-	if (usize < LARGE_MINCLASS)
+	if (usize < LARGE_MINCLASS) {
 		usize = LARGE_MINCLASS;
+	}
 	index = size2index(usize);
 	hindex = (index >= NBINS) ? index - NBINS : 0;
 
@@ -258,8 +246,7 @@ arena_large_dalloc_stats_update(arena_t *arena, size_t usize)
 }
 
 static void
-arena_large_reset_stats_cancel(arena_t *arena, size_t usize)
-{
+arena_large_reset_stats_cancel(arena_t *arena, size_t usize) {
 	szind_t index = size2index(usize);
 	szind_t hindex = (index >= NBINS) ? index - NBINS : 0;
 
@@ -270,16 +257,15 @@ arena_large_reset_stats_cancel(arena_t *arena, size_t usize)
 }
 
 static void
-arena_large_ralloc_stats_update(arena_t *arena, size_t oldusize, size_t usize)
-{
+arena_large_ralloc_stats_update(arena_t *arena, size_t oldusize, size_t usize) {
 	arena_large_dalloc_stats_update(arena, oldusize);
 	arena_large_malloc_stats_update(arena, usize);
 }
 
 static extent_t *
 arena_extent_alloc_large_hard(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, size_t usize, size_t alignment, bool *zero)
-{
+    extent_hooks_t **r_extent_hooks, size_t usize, size_t alignment,
+    bool *zero) {
 	extent_t *extent;
 	bool commit = true;
 
@@ -301,8 +287,7 @@ arena_extent_alloc_large_hard(tsdn_t *tsdn, arena_t *arena,
 
 extent_t *
 arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
-    size_t alignment, bool *zero)
-{
+    size_t alignment, bool *zero) {
 	extent_t *extent;
 	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
 
@@ -328,14 +313,14 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 
 void
 arena_extent_dalloc_large(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
-    bool locked)
-{
+    bool locked) {
 	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
 
-	if (!locked)
+	if (!locked) {
 		malloc_mutex_lock(tsdn, &arena->lock);
-	else
+	} else {
 		malloc_mutex_assert_owner(tsdn, &arena->lock);
+	}
 	if (config_stats) {
 		arena_large_dalloc_stats_update(arena,
 		    extent_usize_get(extent));
@@ -344,14 +329,14 @@ arena_extent_dalloc_large(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 	arena_nactive_sub(arena, extent_size_get(extent) >> LG_PAGE);
 
 	arena_extent_cache_dalloc_locked(tsdn, arena, &extent_hooks, extent);
-	if (!locked)
+	if (!locked) {
 		malloc_mutex_unlock(tsdn, &arena->lock);
+	}
 }
 
 void
 arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
-    size_t oldusize)
-{
+    size_t oldusize) {
 	size_t usize = extent_usize_get(extent);
 	size_t udiff = oldusize - usize;
 
@@ -366,8 +351,7 @@ arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 
 void
 arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
-    size_t oldusize)
-{
+    size_t oldusize) {
 	size_t usize = extent_usize_get(extent);
 	size_t udiff = usize - oldusize;
 
@@ -381,8 +365,7 @@ arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 }
 
 static void
-arena_decay_deadline_init(arena_t *arena)
-{
+arena_decay_deadline_init(arena_t *arena) {
 	/*
 	 * Generate a new deadline that is uniformly random within the next
 	 * epoch after the current one.
@@ -399,14 +382,12 @@ arena_decay_deadline_init(arena_t *arena)
 }
 
 static bool
-arena_decay_deadline_reached(const arena_t *arena, const nstime_t *time)
-{
+arena_decay_deadline_reached(const arena_t *arena, const nstime_t *time) {
 	return (nstime_compare(&arena->decay.deadline, time) <= 0);
 }
 
 static size_t
-arena_decay_backlog_npages_limit(const arena_t *arena)
-{
+arena_decay_backlog_npages_limit(const arena_t *arena) {
 	static const uint64_t h_steps[] = {
 #define	STEP(step, h, x, y) \
 		h,
@@ -423,24 +404,23 @@ arena_decay_backlog_npages_limit(const arena_t *arena)
 	 * to round down to the nearest whole number of pages.
 	 */
 	sum = 0;
-	for (i = 0; i < SMOOTHSTEP_NSTEPS; i++)
+	for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
 		sum += arena->decay.backlog[i] * h_steps[i];
+	}
 	npages_limit_backlog = (size_t)(sum >> SMOOTHSTEP_BFP);
 
 	return (npages_limit_backlog);
 }
 
 static void
-arena_decay_backlog_update_last(arena_t *arena)
-{
+arena_decay_backlog_update_last(arena_t *arena) {
 	size_t ndirty_delta = (arena->ndirty > arena->decay.nunpurged) ?
 	    arena->ndirty - arena->decay.nunpurged : 0;
 	arena->decay.backlog[SMOOTHSTEP_NSTEPS-1] = ndirty_delta;
 }
 
 static void
-arena_decay_backlog_update(arena_t *arena, uint64_t nadvance_u64)
-{
+arena_decay_backlog_update(arena_t *arena, uint64_t nadvance_u64) {
 	if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) {
 		memset(arena->decay.backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
 		    sizeof(size_t));
@@ -461,8 +441,7 @@ arena_decay_backlog_update(arena_t *arena, uint64_t nadvance_u64)
 }
 
 static void
-arena_decay_epoch_advance_helper(arena_t *arena, const nstime_t *time)
-{
+arena_decay_epoch_advance_helper(arena_t *arena, const nstime_t *time) {
 	uint64_t nadvance_u64;
 	nstime_t delta;
 
@@ -486,25 +465,23 @@ arena_decay_epoch_advance_helper(arena_t *arena, const nstime_t *time)
 }
 
 static void
-arena_decay_epoch_advance_purge(tsdn_t *tsdn, arena_t *arena)
-{
+arena_decay_epoch_advance_purge(tsdn_t *tsdn, arena_t *arena) {
 	size_t ndirty_limit = arena_decay_backlog_npages_limit(arena);
 
-	if (arena->ndirty > ndirty_limit)
+	if (arena->ndirty > ndirty_limit) {
 		arena_purge_to_limit(tsdn, arena, ndirty_limit);
+	}
 	arena->decay.nunpurged = arena->ndirty;
 }
 
 static void
-arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, const nstime_t *time)
-{
+arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, const nstime_t *time) {
 	arena_decay_epoch_advance_helper(arena, time);
 	arena_decay_epoch_advance_purge(tsdn, arena);
 }
 
 static void
-arena_decay_init(arena_t *arena, ssize_t decay_time)
-{
+arena_decay_init(arena_t *arena, ssize_t decay_time) {
 	arena->decay.time = decay_time;
 	if (decay_time > 0) {
 		nstime_init2(&arena->decay.interval, decay_time, 0);
@@ -520,18 +497,18 @@ arena_decay_init(arena_t *arena, ssize_t decay_time)
 }
 
 static bool
-arena_decay_time_valid(ssize_t decay_time)
-{
-	if (decay_time < -1)
+arena_decay_time_valid(ssize_t decay_time) {
+	if (decay_time < -1) {
 		return (false);
-	if (decay_time == -1 || (uint64_t)decay_time <= NSTIME_SEC_MAX)
+	}
+	if (decay_time == -1 || (uint64_t)decay_time <= NSTIME_SEC_MAX) {
 		return (true);
+	}
 	return (false);
 }
 
 ssize_t
-arena_decay_time_get(tsdn_t *tsdn, arena_t *arena)
-{
+arena_decay_time_get(tsdn_t *tsdn, arena_t *arena) {
 	ssize_t decay_time;
 
 	malloc_mutex_lock(tsdn, &arena->lock);
@@ -542,10 +519,10 @@ arena_decay_time_get(tsdn_t *tsdn, arena_t *arena)
 }
 
 bool
-arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time)
-{
-	if (!arena_decay_time_valid(decay_time))
+arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time) {
+	if (!arena_decay_time_valid(decay_time)) {
 		return (true);
+	}
 
 	malloc_mutex_lock(tsdn, &arena->lock);
 	/*
@@ -564,14 +541,14 @@ arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time)
 }
 
 static void
-arena_maybe_purge_helper(tsdn_t *tsdn, arena_t *arena)
-{
+arena_maybe_purge_helper(tsdn_t *tsdn, arena_t *arena) {
 	nstime_t time;
 
 	/* Purge all or nothing if the option is disabled. */
 	if (arena->decay.time <= 0) {
-		if (arena->decay.time == 0)
+		if (arena->decay.time == 0) {
 			arena_purge_to_limit(tsdn, arena, 0);
+		}
 		return;
 	}
 
@@ -601,33 +578,34 @@ arena_maybe_purge_helper(tsdn_t *tsdn, arena_t *arena)
 	 * during the current epoch are not subject to purge until a future
 	 * epoch, so as a result purging only happens during epoch advances.
 	 */
-	if (arena_decay_deadline_reached(arena, &time))
+	if (arena_decay_deadline_reached(arena, &time)) {
 		arena_decay_epoch_advance(tsdn, arena, &time);
+	}
 }
 
 void
-arena_maybe_purge(tsdn_t *tsdn, arena_t *arena)
-{
+arena_maybe_purge(tsdn_t *tsdn, arena_t *arena) {
 	malloc_mutex_assert_owner(tsdn, &arena->lock);
 
 	/* Don't recursively purge. */
-	if (arena->purging)
+	if (arena->purging) {
 		return;
+	}
 
 	arena_maybe_purge_helper(tsdn, arena);
 }
 
 static size_t
-arena_dirty_count(tsdn_t *tsdn, arena_t *arena)
-{
+arena_dirty_count(tsdn_t *tsdn, arena_t *arena) {
 	extent_t *extent;
 	size_t ndirty = 0;
 
 	malloc_mutex_lock(tsdn, &arena->extents_mtx);
 
 	for (extent = qr_next(&arena->extents_dirty, qr_link); extent !=
-	    &arena->extents_dirty; extent = qr_next(extent, qr_link))
+	    &arena->extents_dirty; extent = qr_next(extent, qr_link)) {
 		ndirty += extent_size_get(extent) >> LG_PAGE;
+	}
 
 	malloc_mutex_unlock(tsdn, &arena->extents_mtx);
 
@@ -636,8 +614,7 @@ arena_dirty_count(tsdn_t *tsdn, arena_t *arena)
 
 static size_t
 arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    size_t ndirty_limit, extent_t *purge_extents_sentinel)
-{
+    size_t ndirty_limit, extent_t *purge_extents_sentinel) {
 	extent_t *extent, *next;
 	size_t nstashed = 0;
 
@@ -651,8 +628,9 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		UNUSED extent_t *textent;
 
 		npages = extent_size_get(extent) >> LG_PAGE;
-		if (arena->ndirty - (nstashed + npages) < ndirty_limit)
+		if (arena->ndirty - (nstashed + npages) < ndirty_limit) {
 			break;
+		}
 
 		next = qr_next(extent, qr_link);
 		/* Allocate. */
@@ -675,20 +653,21 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 
 static size_t
 arena_purge_stashed(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *purge_extents_sentinel)
-{
+    extent_hooks_t **r_extent_hooks, extent_t *purge_extents_sentinel) {
 	UNUSED size_t nmadvise;
 	size_t npurged;
 	extent_t *extent, *next;
 
-	if (config_stats)
+	if (config_stats) {
 		nmadvise = 0;
+	}
 	npurged = 0;
 
 	for (extent = qr_next(purge_extents_sentinel, qr_link); extent !=
 	    purge_extents_sentinel; extent = next) {
-		if (config_stats)
+		if (config_stats) {
 			nmadvise++;
+		}
 		npurged += extent_size_get(extent) >> LG_PAGE;
 
 		next = qr_next(extent, qr_link);
@@ -709,8 +688,7 @@ arena_purge_stashed(tsdn_t *tsdn, arena_t *arena,
  *   invariant: (arena->ndirty >= ndirty_limit)
  */
 static void
-arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, size_t ndirty_limit)
-{
+arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, size_t ndirty_limit) {
 	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
 	size_t npurge, npurged;
 	extent_t purge_extents_sentinel;
@@ -730,33 +708,34 @@ arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, size_t ndirty_limit)
 
 	npurge = arena_stash_dirty(tsdn, arena, &extent_hooks, ndirty_limit,
 	    &purge_extents_sentinel);
-	if (npurge == 0)
+	if (npurge == 0) {
 		goto label_return;
+	}
 	npurged = arena_purge_stashed(tsdn, arena, &extent_hooks,
 	    &purge_extents_sentinel);
 	assert(npurged == npurge);
 
-	if (config_stats)
+	if (config_stats) {
 		arena->stats.npurge++;
+	}
 
 label_return:
 	arena->purging = false;
 }
 
 void
-arena_purge(tsdn_t *tsdn, arena_t *arena, bool all)
-{
+arena_purge(tsdn_t *tsdn, arena_t *arena, bool all) {
 	malloc_mutex_lock(tsdn, &arena->lock);
-	if (all)
+	if (all) {
 		arena_purge_to_limit(tsdn, arena, 0);
-	else
+	} else {
 		arena_maybe_purge(tsdn, arena);
+	}
 	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
 static void
-arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *slab)
-{
+arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *slab) {
 	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
 
 	arena_nactive_sub(arena, extent_size_get(slab) >> LG_PAGE);
@@ -764,45 +743,41 @@ arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *slab)
 }
 
 static void
-arena_bin_slabs_nonfull_insert(arena_bin_t *bin, extent_t *slab)
-{
+arena_bin_slabs_nonfull_insert(arena_bin_t *bin, extent_t *slab) {
 	assert(extent_slab_data_get(slab)->nfree > 0);
 	extent_heap_insert(&bin->slabs_nonfull, slab);
 }
 
 static void
-arena_bin_slabs_nonfull_remove(arena_bin_t *bin, extent_t *slab)
-{
+arena_bin_slabs_nonfull_remove(arena_bin_t *bin, extent_t *slab) {
 	extent_heap_remove(&bin->slabs_nonfull, slab);
 }
 
 static extent_t *
-arena_bin_slabs_nonfull_tryget(arena_bin_t *bin)
-{
+arena_bin_slabs_nonfull_tryget(arena_bin_t *bin) {
 	extent_t *slab = extent_heap_remove_first(&bin->slabs_nonfull);
-	if (slab == NULL)
+	if (slab == NULL) {
 		return (NULL);
-	if (config_stats)
+	}
+	if (config_stats) {
 		bin->stats.reslabs++;
+	}
 	return (slab);
 }
 
 static void
-arena_bin_slabs_full_insert(arena_bin_t *bin, extent_t *slab)
-{
+arena_bin_slabs_full_insert(arena_bin_t *bin, extent_t *slab) {
 	assert(extent_slab_data_get(slab)->nfree == 0);
 	extent_ring_insert(&bin->slabs_full, slab);
 }
 
 static void
-arena_bin_slabs_full_remove(extent_t *slab)
-{
+arena_bin_slabs_full_remove(extent_t *slab) {
 	extent_ring_remove(slab);
 }
 
 void
-arena_reset(tsd_t *tsd, arena_t *arena)
-{
+arena_reset(tsd_t *tsd, arena_t *arena) {
 	unsigned i;
 	extent_t *extent;
 
@@ -828,16 +803,19 @@ arena_reset(tsd_t *tsd, arena_t *arena)
 		size_t usize;
 
 		malloc_mutex_unlock(tsd_tsdn(tsd), &arena->large_mtx);
-		if (config_stats || (config_prof && opt_prof))
+		if (config_stats || (config_prof && opt_prof)) {
 			usize = isalloc(tsd_tsdn(tsd), extent, ptr);
+		}
 		/* Remove large allocation from prof sample set. */
-		if (config_prof && opt_prof)
+		if (config_prof && opt_prof) {
 			prof_free(tsd, extent, ptr, usize);
+		}
 		large_dalloc(tsd_tsdn(tsd), extent);
 		malloc_mutex_lock(tsd_tsdn(tsd), &arena->large_mtx);
 		/* Cancel out unwanted effects on stats. */
-		if (config_stats)
+		if (config_stats) {
 			arena_large_reset_stats_cancel(arena, usize);
+		}
 	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->large_mtx);
 
@@ -883,8 +861,7 @@ arena_reset(tsd_t *tsd, arena_t *arena)
 }
 
 static void
-arena_destroy_retained(tsdn_t *tsdn, arena_t *arena)
-{
+arena_destroy_retained(tsdn_t *tsdn, arena_t *arena) {
 	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
 	size_t i;
 
@@ -912,8 +889,7 @@ arena_destroy_retained(tsdn_t *tsdn, arena_t *arena)
 }
 
 void
-arena_destroy(tsd_t *tsd, arena_t *arena)
-{
+arena_destroy(tsd_t *tsd, arena_t *arena) {
 	assert(base_ind_get(arena->base) >= narenas_auto);
 	assert(arena_nthreads_get(arena, false) == 0);
 	assert(arena_nthreads_get(arena, true) == 0);
@@ -949,8 +925,7 @@ arena_destroy(tsd_t *tsd, arena_t *arena)
 
 static extent_t *
 arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, const arena_bin_info_t *bin_info)
-{
+    extent_hooks_t **r_extent_hooks, const arena_bin_info_t *bin_info) {
 	extent_t *slab;
 	bool zero, commit;
 
@@ -966,8 +941,7 @@ arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena,
 
 static extent_t *
 arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind,
-    const arena_bin_info_t *bin_info)
-{
+    const arena_bin_info_t *bin_info) {
 	extent_t *slab;
 	arena_slab_data_t *slab_data;
 	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
@@ -978,8 +952,9 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 	if (slab == NULL) {
 		slab = arena_slab_alloc_hard(tsdn, arena, &extent_hooks,
 		    bin_info);
-		if (slab == NULL)
+		if (slab == NULL) {
 			return (NULL);
+		}
 	}
 	assert(extent_slab_get(slab));
 
@@ -991,23 +966,24 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 	slab_data->nfree = bin_info->nregs;
 	bitmap_init(slab_data->bitmap, &bin_info->bitmap_info);
 
-	if (config_stats)
+	if (config_stats) {
 		arena->stats.mapped += extent_size_get(slab);
+	}
 
 	return (slab);
 }
 
 static extent_t *
 arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin,
-    szind_t binind)
-{
+    szind_t binind) {
 	extent_t *slab;
 	const arena_bin_info_t *bin_info;
 
 	/* Look for a usable slab. */
 	slab = arena_bin_slabs_nonfull_tryget(bin);
-	if (slab != NULL)
+	if (slab != NULL) {
 		return (slab);
+	}
 	/* No existing slabs have any space available. */
 
 	bin_info = &arena_bin_info[binind];
@@ -1034,8 +1010,9 @@ arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin,
 	 * so search one more time.
 	 */
 	slab = arena_bin_slabs_nonfull_tryget(bin);
-	if (slab != NULL)
+	if (slab != NULL) {
 		return (slab);
+	}
 
 	return (NULL);
 }
@@ -1043,8 +1020,7 @@ arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin,
 /* Re-fill bin->slabcur, then call arena_slab_reg_alloc(). */
 static void *
 arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin,
-    szind_t binind)
-{
+    szind_t binind) {
 	const arena_bin_info_t *bin_info;
 	extent_t *slab;
 
@@ -1088,8 +1064,9 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin,
 		bin->slabcur = NULL;
 	}
 
-	if (slab == NULL)
+	if (slab == NULL) {
 		return (NULL);
+	}
 	bin->slabcur = slab;
 
 	assert(extent_slab_data_get(bin->slabcur)->nfree > 0);
@@ -1099,15 +1076,15 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin,
 
 void
 arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_bin_t *tbin,
-    szind_t binind, uint64_t prof_accumbytes)
-{
+    szind_t binind, uint64_t prof_accumbytes) {
 	unsigned i, nfill;
 	arena_bin_t *bin;
 
 	assert(tbin->ncached == 0);
 
-	if (config_prof && arena_prof_accum(tsdn, arena, prof_accumbytes))
+	if (config_prof && arena_prof_accum(tsdn, arena, prof_accumbytes)) {
 		prof_idump(tsdn);
+	}
 	bin = &arena->bins[binind];
 	malloc_mutex_lock(tsdn, &bin->lock);
 	for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >>
@@ -1118,8 +1095,9 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_bin_t *tbin,
 		    extent_slab_data_get(slab)->nfree > 0) {
 			ptr = arena_slab_reg_alloc(tsdn, slab,
 			    &arena_bin_info[binind]);
-		} else
+		} else {
 			ptr = arena_bin_malloc_hard(tsdn, arena, bin, binind);
+		}
 		if (ptr == NULL) {
 			/*
 			 * OOM.  tbin->avail isn't yet filled down to its first
@@ -1152,10 +1130,10 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_bin_t *tbin,
 }
 
 void
-arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info, bool zero)
-{
-	if (!zero)
+arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info, bool zero) {
+	if (!zero) {
 		memset(ptr, JEMALLOC_ALLOC_JUNK, bin_info->reg_size);
+	}
 }
 
 #ifdef JEMALLOC_JET
@@ -1163,8 +1141,7 @@ arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info, bool zero)
 #define	arena_dalloc_junk_small JEMALLOC_N(n_arena_dalloc_junk_small)
 #endif
 void
-arena_dalloc_junk_small(void *ptr, const arena_bin_info_t *bin_info)
-{
+arena_dalloc_junk_small(void *ptr, const arena_bin_info_t *bin_info) {
 	memset(ptr, JEMALLOC_FREE_JUNK, bin_info->reg_size);
 }
 #ifdef JEMALLOC_JET
@@ -1175,8 +1152,7 @@ arena_dalloc_junk_small_t *arena_dalloc_junk_small =
 #endif
 
 static void *
-arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero)
-{
+arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
 	void *ret;
 	arena_bin_t *bin;
 	size_t usize;
@@ -1188,10 +1164,11 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero)
 
 	malloc_mutex_lock(tsdn, &bin->lock);
 	if ((slab = bin->slabcur) != NULL && extent_slab_data_get(slab)->nfree >
-	    0)
+	    0) {
 		ret = arena_slab_reg_alloc(tsdn, slab, &arena_bin_info[binind]);
-	else
+	} else {
 		ret = arena_bin_malloc_hard(tsdn, arena, bin, binind);
+	}
 
 	if (ret == NULL) {
 		malloc_mutex_unlock(tsdn, &bin->lock);
@@ -1204,16 +1181,18 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero)
 		bin->stats.curregs++;
 	}
 	malloc_mutex_unlock(tsdn, &bin->lock);
-	if (config_prof && arena_prof_accum(tsdn, arena, usize))
+	if (config_prof && arena_prof_accum(tsdn, arena, usize)) {
 		prof_idump(tsdn);
+	}
 
 	if (!zero) {
 		if (config_fill) {
 			if (unlikely(opt_junk_alloc)) {
 				arena_alloc_junk_small(ret,
 				    &arena_bin_info[binind], false);
-			} else if (unlikely(opt_zero))
+			} else if (unlikely(opt_zero)) {
 				memset(ret, 0, usize);
+			}
 		}
 	} else {
 		if (config_fill && unlikely(opt_junk_alloc)) {
@@ -1229,24 +1208,25 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero)
 
 void *
 arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
-    bool zero)
-{
+    bool zero) {
 	assert(!tsdn_null(tsdn) || arena != NULL);
 
-	if (likely(!tsdn_null(tsdn)))
+	if (likely(!tsdn_null(tsdn))) {
 		arena = arena_choose(tsdn_tsd(tsdn), arena);
-	if (unlikely(arena == NULL))
+	}
+	if (unlikely(arena == NULL)) {
 		return (NULL);
+	}
 
-	if (likely(size <= SMALL_MAXCLASS))
+	if (likely(size <= SMALL_MAXCLASS)) {
 		return (arena_malloc_small(tsdn, arena, ind, zero));
+	}
 	return (large_malloc(tsdn, arena, index2size(ind), zero));
 }
 
 void *
 arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
-    bool zero, tcache_t *tcache)
-{
+    bool zero, tcache_t *tcache) {
 	void *ret;
 
 	if (usize <= SMALL_MAXCLASS && (alignment < PAGE || (alignment == PAGE
@@ -1255,18 +1235,18 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 		ret = arena_malloc(tsdn, arena, usize, size2index(usize), zero,
 		    tcache, true);
 	} else {
-		if (likely(alignment <= CACHELINE))
+		if (likely(alignment <= CACHELINE)) {
 			ret = large_malloc(tsdn, arena, usize, zero);
-		else
+		} else {
 			ret = large_palloc(tsdn, arena, usize, alignment, zero);
+		}
 	}
 	return (ret);
 }
 
 void
 arena_prof_promote(tsdn_t *tsdn, extent_t *extent, const void *ptr,
-    size_t usize)
-{
+    size_t usize) {
 	arena_t *arena = extent_arena_get(extent);
 
 	cassert(config_prof);
@@ -1283,18 +1263,18 @@ arena_prof_promote(tsdn_t *tsdn, extent_t *extent, const void *ptr,
 	 * canceling.
 	 */
 	malloc_mutex_lock(tsdn, &arena->lock);
-	if (arena->prof_accumbytes >= LARGE_MINCLASS - usize)
+	if (arena->prof_accumbytes >= LARGE_MINCLASS - usize) {
 		arena->prof_accumbytes -= LARGE_MINCLASS - usize;
-	else
+	} else {
 		arena->prof_accumbytes = 0;
+	}
 	malloc_mutex_unlock(tsdn, &arena->lock);
 
 	assert(isalloc(tsdn, extent, ptr) == usize);
 }
 
 static size_t
-arena_prof_demote(tsdn_t *tsdn, extent_t *extent, const void *ptr)
-{
+arena_prof_demote(tsdn_t *tsdn, extent_t *extent, const void *ptr) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
@@ -1307,8 +1287,7 @@ arena_prof_demote(tsdn_t *tsdn, extent_t *extent, const void *ptr)
 
 void
 arena_dalloc_promoted(tsdn_t *tsdn, extent_t *extent, void *ptr,
-    tcache_t *tcache, bool slow_path)
-{
+    tcache_t *tcache, bool slow_path) {
 	size_t usize;
 
 	cassert(config_prof);
@@ -1318,17 +1297,17 @@ arena_dalloc_promoted(tsdn_t *tsdn, extent_t *extent, void *ptr,
 	if (usize <= tcache_maxclass) {
 		tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr, usize,
 		    slow_path);
-	} else
+	} else {
 		large_dalloc(tsdn, extent);
+	}
 }
 
 static void
-arena_dissociate_bin_slab(extent_t *slab, arena_bin_t *bin)
-{
+arena_dissociate_bin_slab(extent_t *slab, arena_bin_t *bin) {
 	/* Dissociate slab from bin. */
-	if (slab == bin->slabcur)
+	if (slab == bin->slabcur) {
 		bin->slabcur = NULL;
-	else {
+	} else {
 		szind_t binind = extent_slab_data_get(slab)->binind;
 		const arena_bin_info_t *bin_info = &arena_bin_info[binind];
 
@@ -1337,17 +1316,17 @@ arena_dissociate_bin_slab(extent_t *slab, arena_bin_t *bin)
 		 * slab only contains one region, then it never gets inserted
 		 * into the non-full slabs heap.
 		 */
-		if (bin_info->nregs == 1)
+		if (bin_info->nregs == 1) {
 			arena_bin_slabs_full_remove(slab);
-		else
+		} else {
 			arena_bin_slabs_nonfull_remove(bin, slab);
+		}
 	}
 }
 
 static void
 arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
-    arena_bin_t *bin)
-{
+    arena_bin_t *bin) {
 	assert(slab != bin->slabcur);
 
 	malloc_mutex_unlock(tsdn, &bin->lock);
@@ -1357,14 +1336,14 @@ arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
 	malloc_mutex_unlock(tsdn, &arena->lock);
 	/****************************/
 	malloc_mutex_lock(tsdn, &bin->lock);
-	if (config_stats)
+	if (config_stats) {
 		bin->stats.curslabs--;
+	}
 }
 
 static void
 arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
-    arena_bin_t *bin)
-{
+    arena_bin_t *bin) {
 	assert(extent_slab_data_get(slab)->nfree > 0);
 
 	/*
@@ -1375,28 +1354,31 @@ arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
 	 */
 	if (bin->slabcur != NULL && extent_snad_comp(bin->slabcur, slab) > 0) {
 		/* Switch slabcur. */
-		if (extent_slab_data_get(bin->slabcur)->nfree > 0)
+		if (extent_slab_data_get(bin->slabcur)->nfree > 0) {
 			arena_bin_slabs_nonfull_insert(bin, bin->slabcur);
-		else
+		} else {
 			arena_bin_slabs_full_insert(bin, bin->slabcur);
+		}
 		bin->slabcur = slab;
-		if (config_stats)
+		if (config_stats) {
 			bin->stats.reslabs++;
-	} else
+		}
+	} else {
 		arena_bin_slabs_nonfull_insert(bin, slab);
+	}
 }
 
 static void
 arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
-    void *ptr, bool junked)
-{
+    void *ptr, bool junked) {
 	arena_slab_data_t *slab_data = extent_slab_data_get(slab);
 	szind_t binind = slab_data->binind;
 	arena_bin_t *bin = &arena->bins[binind];
 	const arena_bin_info_t *bin_info = &arena_bin_info[binind];
 
-	if (!junked && config_fill && unlikely(opt_junk_free))
+	if (!junked && config_fill && unlikely(opt_junk_free)) {
 		arena_dalloc_junk_small(ptr, bin_info);
+	}
 
 	arena_slab_reg_dalloc(tsdn, slab, slab_data, ptr);
 	if (slab_data->nfree == bin_info->nregs) {
@@ -1415,14 +1397,12 @@ arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
 
 void
 arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
-    void *ptr)
-{
+    void *ptr) {
 	arena_dalloc_bin_locked_impl(tsdn, arena, extent, ptr, true);
 }
 
 static void
-arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr)
-{
+arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr) {
 	arena_bin_t *bin = &arena->bins[extent_slab_data_get(extent)->binind];
 
 	malloc_mutex_lock(tsdn, &bin->lock);
@@ -1431,23 +1411,22 @@ arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr)
 }
 
 void
-arena_dalloc_small(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr)
-{
+arena_dalloc_small(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr) {
 	arena_dalloc_bin(tsdn, arena, extent, ptr);
 	arena_decay_tick(tsdn, arena);
 }
 
 bool
 arena_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize,
-    size_t size, size_t extra, bool zero)
-{
+    size_t size, size_t extra, bool zero) {
 	size_t usize_min, usize_max;
 
 	/* Calls with non-zero extra had to clamp extra. */
 	assert(extra == 0 || size + extra <= LARGE_MAXCLASS);
 
-	if (unlikely(size > LARGE_MAXCLASS))
+	if (unlikely(size > LARGE_MAXCLASS)) {
 		return (true);
+	}
 
 	usize_min = s2u(size);
 	usize_max = s2u(size + extra);
@@ -1460,8 +1439,9 @@ arena_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize,
 		    oldsize);
 		if ((usize_max > SMALL_MAXCLASS || size2index(usize_max) !=
 		    size2index(oldsize)) && (size > oldsize || usize_max <
-		    oldsize))
+		    oldsize)) {
 			return (true);
+		}
 
 		arena_decay_tick(tsdn, extent_arena_get(extent));
 		return (false);
@@ -1475,33 +1455,36 @@ arena_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize,
 
 static void *
 arena_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
-    size_t alignment, bool zero, tcache_t *tcache)
-{
-	if (alignment == 0)
+    size_t alignment, bool zero, tcache_t *tcache) {
+	if (alignment == 0) {
 		return (arena_malloc(tsdn, arena, usize, size2index(usize),
 		    zero, tcache, true));
+	}
 	usize = sa2u(usize, alignment);
-	if (unlikely(usize == 0 || usize > LARGE_MAXCLASS))
+	if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
 		return (NULL);
+	}
 	return (ipalloct(tsdn, usize, alignment, zero, tcache, arena));
 }
 
 void *
 arena_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr,
-    size_t oldsize, size_t size, size_t alignment, bool zero, tcache_t *tcache)
-{
+    size_t oldsize, size_t size, size_t alignment, bool zero,
+    tcache_t *tcache) {
 	void *ret;
 	size_t usize, copysize;
 
 	usize = s2u(size);
-	if (unlikely(usize == 0 || size > LARGE_MAXCLASS))
+	if (unlikely(usize == 0 || size > LARGE_MAXCLASS)) {
 		return (NULL);
+	}
 
 	if (likely(usize <= SMALL_MAXCLASS)) {
 		/* Try to avoid moving the allocation. */
 		if (!arena_ralloc_no_move(tsdn, extent, ptr, oldsize, usize, 0,
-		    zero))
+		    zero)) {
 			return (ptr);
+		}
 	}
 
 	if (oldsize >= LARGE_MINCLASS && usize >= LARGE_MINCLASS) {
@@ -1515,8 +1498,9 @@ arena_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr,
 	 */
 	ret = arena_ralloc_move_helper(tsdn, arena, usize, alignment, zero,
 	    tcache);
-	if (ret == NULL)
+	if (ret == NULL) {
 		return (NULL);
+	}
 
 	/*
 	 * Junk/zero-filling were already done by
@@ -1530,8 +1514,7 @@ arena_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr,
 }
 
 dss_prec_t
-arena_dss_prec_get(tsdn_t *tsdn, arena_t *arena)
-{
+arena_dss_prec_get(tsdn_t *tsdn, arena_t *arena) {
 	dss_prec_t ret;
 
 	malloc_mutex_lock(tsdn, &arena->lock);
@@ -1541,10 +1524,10 @@ arena_dss_prec_get(tsdn_t *tsdn, arena_t *arena)
 }
 
 bool
-arena_dss_prec_set(tsdn_t *tsdn, arena_t *arena, dss_prec_t dss_prec)
-{
-	if (!have_dss)
+arena_dss_prec_set(tsdn_t *tsdn, arena_t *arena, dss_prec_t dss_prec) {
+	if (!have_dss) {
 		return (dss_prec != dss_prec_disabled);
+	}
 	malloc_mutex_lock(tsdn, &arena->lock);
 	arena->dss_prec = dss_prec;
 	malloc_mutex_unlock(tsdn, &arena->lock);
@@ -1552,24 +1535,22 @@ arena_dss_prec_set(tsdn_t *tsdn, arena_t *arena, dss_prec_t dss_prec)
 }
 
 ssize_t
-arena_decay_time_default_get(void)
-{
+arena_decay_time_default_get(void) {
 	return ((ssize_t)atomic_read_zu((size_t *)&decay_time_default));
 }
 
 bool
-arena_decay_time_default_set(ssize_t decay_time)
-{
-	if (!arena_decay_time_valid(decay_time))
+arena_decay_time_default_set(ssize_t decay_time) {
+	if (!arena_decay_time_valid(decay_time)) {
 		return (true);
+	}
 	atomic_write_zu((size_t *)&decay_time_default, (size_t)decay_time);
 	return (false);
 }
 
 static void
 arena_basic_stats_merge_locked(arena_t *arena, unsigned *nthreads,
-    const char **dss, ssize_t *decay_time, size_t *nactive, size_t *ndirty)
-{
+    const char **dss, ssize_t *decay_time, size_t *nactive, size_t *ndirty) {
 	*nthreads += arena_nthreads_get(arena, false);
 	*dss = dss_prec_names[arena->dss_prec];
 	*decay_time = arena->decay.time;
@@ -1579,8 +1560,7 @@ arena_basic_stats_merge_locked(arena_t *arena, unsigned *nthreads,
 
 void
 arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
-    const char **dss, ssize_t *decay_time, size_t *nactive, size_t *ndirty)
-{
+    const char **dss, ssize_t *decay_time, size_t *nactive, size_t *ndirty) {
 	malloc_mutex_lock(tsdn, &arena->lock);
 	arena_basic_stats_merge_locked(arena, nthreads, dss, decay_time,
 	    nactive, ndirty);
@@ -1591,8 +1571,7 @@ void
 arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *decay_time, size_t *nactive, size_t *ndirty,
     arena_stats_t *astats, malloc_bin_stats_t *bstats,
-    malloc_large_stats_t *lstats)
-{
+    malloc_large_stats_t *lstats) {
 	size_t base_allocated, base_resident, base_mapped;
 	unsigned i;
 
@@ -1662,57 +1641,57 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 }
 
 unsigned
-arena_nthreads_get(arena_t *arena, bool internal)
-{
+arena_nthreads_get(arena_t *arena, bool internal) {
 	return (atomic_read_u(&arena->nthreads[internal]));
 }
 
 void
-arena_nthreads_inc(arena_t *arena, bool internal)
-{
+arena_nthreads_inc(arena_t *arena, bool internal) {
 	atomic_add_u(&arena->nthreads[internal], 1);
 }
 
 void
-arena_nthreads_dec(arena_t *arena, bool internal)
-{
+arena_nthreads_dec(arena_t *arena, bool internal) {
 	atomic_sub_u(&arena->nthreads[internal], 1);
 }
 
 size_t
-arena_extent_sn_next(arena_t *arena)
-{
+arena_extent_sn_next(arena_t *arena) {
 	return (atomic_add_zu(&arena->extent_sn_next, 1) - 1);
 }
 
 arena_t *
-arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks)
-{
+arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	arena_t *arena;
 	base_t *base;
 	unsigned i;
 
-	if (ind == 0)
+	if (ind == 0) {
 		base = b0get();
-	else {
+	} else {
 		base = base_new(tsdn, ind, extent_hooks);
-		if (base == NULL)
+		if (base == NULL) {
 			return (NULL);
+		}
 	}
 
 	arena = (arena_t *)base_alloc(tsdn, base, sizeof(arena_t), CACHELINE);
-	if (arena == NULL)
+	if (arena == NULL) {
 		goto label_error;
+	}
 
 	arena->nthreads[0] = arena->nthreads[1] = 0;
-	if (malloc_mutex_init(&arena->lock, "arena", WITNESS_RANK_ARENA))
+	if (malloc_mutex_init(&arena->lock, "arena", WITNESS_RANK_ARENA)) {
 		goto label_error;
+	}
 
-	if (config_stats && config_tcache)
+	if (config_stats && config_tcache) {
 		ql_new(&arena->tcache_ql);
+	}
 
-	if (config_prof)
+	if (config_prof) {
 		arena->prof_accumbytes = 0;
+	}
 
 	if (config_cache_oblivious) {
 		/*
@@ -1738,8 +1717,9 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks)
 
 	ql_new(&arena->large);
 	if (malloc_mutex_init(&arena->large_mtx, "arena_large",
-	    WITNESS_RANK_ARENA_LARGE))
+	    WITNESS_RANK_ARENA_LARGE)) {
 		goto label_error;
+	}
 
 	for (i = 0; i < NPSIZES+1; i++) {
 		extent_heap_new(&arena->extents_cached[i]);
@@ -1750,83 +1730,85 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks)
 	    false, false);
 
 	if (malloc_mutex_init(&arena->extents_mtx, "arena_extents",
-	    WITNESS_RANK_ARENA_EXTENTS))
+	    WITNESS_RANK_ARENA_EXTENTS)) {
 		goto label_error;
+	}
 
-	if (!config_munmap)
+	if (!config_munmap) {
 		arena->extent_grow_next = psz2ind(HUGEPAGE);
+	}
 
 	ql_new(&arena->extent_cache);
 	if (malloc_mutex_init(&arena->extent_cache_mtx, "arena_extent_cache",
-	    WITNESS_RANK_ARENA_EXTENT_CACHE))
+	    WITNESS_RANK_ARENA_EXTENT_CACHE)) {
 		goto label_error;
+	}
 
 	/* Initialize bins. */
 	for (i = 0; i < NBINS; i++) {
 		arena_bin_t *bin = &arena->bins[i];
 		if (malloc_mutex_init(&bin->lock, "arena_bin",
-		    WITNESS_RANK_ARENA_BIN))
+		    WITNESS_RANK_ARENA_BIN)) {
 			goto label_error;
+		}
 		bin->slabcur = NULL;
 		extent_heap_new(&bin->slabs_nonfull);
 		extent_init(&bin->slabs_full, arena, NULL, 0, 0, 0, false,
 		    false, false, false);
-		if (config_stats)
+		if (config_stats) {
 			memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
+		}
 	}
 
 	arena->base = base;
 
 	return (arena);
 label_error:
-	if (ind != 0)
+	if (ind != 0) {
 		base_delete(base);
+	}
 	return (NULL);
 }
 
 void
-arena_boot(void)
-{
+arena_boot(void) {
 	arena_decay_time_default_set(opt_decay_time);
 }
 
 void
-arena_prefork0(tsdn_t *tsdn, arena_t *arena)
-{
+arena_prefork0(tsdn_t *tsdn, arena_t *arena) {
 	malloc_mutex_prefork(tsdn, &arena->lock);
 }
 
 void
-arena_prefork1(tsdn_t *tsdn, arena_t *arena)
-{
+arena_prefork1(tsdn_t *tsdn, arena_t *arena) {
 	malloc_mutex_prefork(tsdn, &arena->extents_mtx);
 }
 
 void
-arena_prefork2(tsdn_t *tsdn, arena_t *arena)
-{
+arena_prefork2(tsdn_t *tsdn, arena_t *arena) {
 	malloc_mutex_prefork(tsdn, &arena->extent_cache_mtx);
 }
 
 void
-arena_prefork3(tsdn_t *tsdn, arena_t *arena)
-{
+arena_prefork3(tsdn_t *tsdn, arena_t *arena) {
 	unsigned i;
 
 	base_prefork(tsdn, arena->base);
-	for (i = 0; i < NBINS; i++)
+	for (i = 0; i < NBINS; i++) {
 		malloc_mutex_prefork(tsdn, &arena->bins[i].lock);
+	}
 	malloc_mutex_prefork(tsdn, &arena->large_mtx);
 }
 
 void
-arena_postfork_parent(tsdn_t *tsdn, arena_t *arena)
-{
+arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 	unsigned i;
 
 	malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
-	for (i = 0; i < NBINS; i++)
+	for (i = 0; i < NBINS; i++) {
 		malloc_mutex_postfork_parent(tsdn, &arena->bins[i].lock);
+	}
 	base_postfork_parent(tsdn, arena->base);
 	malloc_mutex_postfork_parent(tsdn, &arena->extent_cache_mtx);
 	malloc_mutex_postfork_parent(tsdn, &arena->extents_mtx);
@@ -1834,13 +1816,13 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena)
 }
 
 void
-arena_postfork_child(tsdn_t *tsdn, arena_t *arena)
-{
+arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	unsigned i;
 
 	malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
-	for (i = 0; i < NBINS; i++)
+	for (i = 0; i < NBINS; i++) {
 		malloc_mutex_postfork_child(tsdn, &arena->bins[i].lock);
+	}
 	base_postfork_child(tsdn, arena->base);
 	malloc_mutex_postfork_child(tsdn, &arena->extent_cache_mtx);
 	malloc_mutex_postfork_child(tsdn, &arena->extents_mtx);
diff --git a/src/base.c b/src/base.c
index 7c0ef2c1..ee964faa 100644
--- a/src/base.c
+++ b/src/base.c
@@ -9,17 +9,16 @@ static base_t	*b0;
 /******************************************************************************/
 
 static void *
-base_map(extent_hooks_t *extent_hooks, unsigned ind, size_t size)
-{
+base_map(extent_hooks_t *extent_hooks, unsigned ind, size_t size) {
 	void *addr;
 	bool zero = true;
 	bool commit = true;
 
 	assert(size == HUGEPAGE_CEILING(size));
 
-	if (extent_hooks == &extent_hooks_default)
+	if (extent_hooks == &extent_hooks_default) {
 		addr = extent_alloc_mmap(NULL, size, PAGE, &zero, &commit);
-	else {
+	} else {
 		addr = extent_hooks->alloc(extent_hooks, NULL, size, PAGE,
 		    &zero, &commit, ind);
 	}
@@ -28,8 +27,8 @@ base_map(extent_hooks_t *extent_hooks, unsigned ind, size_t size)
 }
 
 static void
-base_unmap(extent_hooks_t *extent_hooks, unsigned ind, void *addr, size_t size)
-{
+base_unmap(extent_hooks_t *extent_hooks, unsigned ind, void *addr,
+    size_t size) {
 	/*
 	 * Cascade through dalloc, decommit, purge_lazy, and purge_forced,
 	 * stopping at first success.  This cascade is performed for consistency
@@ -41,40 +40,48 @@ base_unmap(extent_hooks_t *extent_hooks, unsigned ind, void *addr, size_t size)
 	 * some consistent-but-allocated state.
 	 */
 	if (extent_hooks == &extent_hooks_default) {
-		if (!extent_dalloc_mmap(addr, size))
+		if (!extent_dalloc_mmap(addr, size)) {
 			return;
-		if (!pages_decommit(addr, size))
+		}
+		if (!pages_decommit(addr, size)) {
 			return;
-		if (!pages_purge_lazy(addr, size))
+		}
+		if (!pages_purge_lazy(addr, size)) {
 			return;
-		if (!pages_purge_forced(addr, size))
+		}
+		if (!pages_purge_forced(addr, size)) {
 			return;
+		}
 		/* Nothing worked.  This should never happen. */
 		not_reached();
 	} else {
 		if (extent_hooks->dalloc != NULL &&
-		    !extent_hooks->dalloc(extent_hooks, addr, size, true, ind))
+		    !extent_hooks->dalloc(extent_hooks, addr, size, true,
+		    ind)) {
 			return;
+		}
 		if (extent_hooks->decommit != NULL &&
 		    !extent_hooks->decommit(extent_hooks, addr, size, 0, size,
-		    ind))
+		    ind)) {
 			return;
+		}
 		if (extent_hooks->purge_lazy != NULL &&
 		    !extent_hooks->purge_lazy(extent_hooks, addr, size, 0, size,
-		    ind))
+		    ind)) {
 			return;
+		}
 		if (extent_hooks->purge_forced != NULL &&
 		    !extent_hooks->purge_forced(extent_hooks, addr, size, 0,
-		    size, ind))
+		    size, ind)) {
 			return;
+		}
 		/* Nothing worked.  That's the application's problem. */
 	}
 }
 
 static void
 base_extent_init(size_t *extent_sn_next, extent_t *extent, void *addr,
-    size_t size)
-{
+    size_t size) {
 	size_t sn;
 
 	sn = *extent_sn_next;
@@ -85,8 +92,7 @@ base_extent_init(size_t *extent_sn_next, extent_t *extent, void *addr,
 
 static void *
 base_extent_bump_alloc_helper(extent_t *extent, size_t *gap_size, size_t size,
-    size_t alignment)
-{
+    size_t alignment) {
 	void *ret;
 
 	assert(alignment == ALIGNMENT_CEILING(alignment, QUANTUM));
@@ -104,8 +110,7 @@ base_extent_bump_alloc_helper(extent_t *extent, size_t *gap_size, size_t size,
 
 static void
 base_extent_bump_alloc_post(tsdn_t *tsdn, base_t *base, extent_t *extent,
-    size_t gap_size, void *addr, size_t size)
-{
+    size_t gap_size, void *addr, size_t size) {
 	if (extent_size_get(extent) > 0) {
 		/*
 		 * Compute the index for the largest size class that does not
@@ -131,8 +136,7 @@ base_extent_bump_alloc_post(tsdn_t *tsdn, base_t *base, extent_t *extent,
 
 static void *
 base_extent_bump_alloc(tsdn_t *tsdn, base_t *base, extent_t *extent,
-    size_t size, size_t alignment)
-{
+    size_t size, size_t alignment) {
 	void *ret;
 	size_t gap_size;
 
@@ -148,8 +152,7 @@ base_extent_bump_alloc(tsdn_t *tsdn, base_t *base, extent_t *extent,
  */
 static base_block_t *
 base_block_alloc(extent_hooks_t *extent_hooks, unsigned ind,
-    size_t *extent_sn_next, size_t size, size_t alignment)
-{
+    size_t *extent_sn_next, size_t size, size_t alignment) {
 	base_block_t *block;
 	size_t usize, header_size, gap_size, block_size;
 
@@ -159,8 +162,9 @@ base_block_alloc(extent_hooks_t *extent_hooks, unsigned ind,
 	gap_size = ALIGNMENT_CEILING(header_size, alignment) - header_size;
 	block_size = HUGEPAGE_CEILING(header_size + gap_size + usize);
 	block = (base_block_t *)base_map(extent_hooks, ind, block_size);
-	if (block == NULL)
+	if (block == NULL) {
 		return (NULL);
+	}
 	block->size = block_size;
 	block->next = NULL;
 	assert(block_size >= header_size);
@@ -174,8 +178,7 @@ base_block_alloc(extent_hooks_t *extent_hooks, unsigned ind,
  * specified alignment.
  */
 static extent_t *
-base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment)
-{
+base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
 	extent_hooks_t *extent_hooks = base_extent_hooks_get(base);
 	base_block_t *block;
 
@@ -183,8 +186,9 @@ base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment)
 
 	block = base_block_alloc(extent_hooks, base_ind_get(base),
 	    &base->extent_sn_next, size, alignment);
-	if (block == NULL)
+	if (block == NULL) {
 		return (NULL);
+	}
 	block->next = base->blocks;
 	base->blocks = block;
 	if (config_stats) {
@@ -198,14 +202,12 @@ base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment)
 }
 
 base_t *
-b0get(void)
-{
+b0get(void) {
 	return (b0);
 }
 
 base_t *
-base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks)
-{
+base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	base_t *base;
 	size_t extent_sn_next, base_alignment, base_size, gap_size;
 	base_block_t *block;
@@ -214,8 +216,9 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks)
 	extent_sn_next = 0;
 	block = base_block_alloc(extent_hooks, ind, &extent_sn_next,
 	    sizeof(base_t), QUANTUM);
-	if (block == NULL)
+	if (block == NULL) {
 		return (NULL);
+	}
 
 	base_alignment = CACHELINE;
 	base_size = ALIGNMENT_CEILING(sizeof(base_t), base_alignment);
@@ -229,8 +232,9 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks)
 	}
 	base->extent_sn_next = extent_sn_next;
 	base->blocks = block;
-	for (i = 0; i < NSIZES; i++)
+	for (i = 0; i < NSIZES; i++) {
 		extent_heap_new(&base->avail[i]);
+	}
 	if (config_stats) {
 		base->allocated = sizeof(base_block_t);
 		base->resident = PAGE_CEILING(sizeof(base_block_t));
@@ -245,8 +249,7 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks)
 }
 
 void
-base_delete(base_t *base)
-{
+base_delete(base_t *base) {
 	extent_hooks_t *extent_hooks = base_extent_hooks_get(base);
 	base_block_t *next = base->blocks;
 	do {
@@ -258,14 +261,12 @@ base_delete(base_t *base)
 }
 
 extent_hooks_t *
-base_extent_hooks_get(base_t *base)
-{
+base_extent_hooks_get(base_t *base) {
 	return ((extent_hooks_t *)atomic_read_p(&base->extent_hooks_pun));
 }
 
 extent_hooks_t *
-base_extent_hooks_set(base_t *base, extent_hooks_t *extent_hooks)
-{
+base_extent_hooks_set(base_t *base, extent_hooks_t *extent_hooks) {
 	extent_hooks_t *old_extent_hooks = base_extent_hooks_get(base);
 	union {
 		extent_hooks_t	**h;
@@ -287,8 +288,7 @@ base_extent_hooks_set(base_t *base, extent_hooks_t *extent_hooks)
  * sharing.
  */
 void *
-base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment)
-{
+base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
 	void *ret;
 	size_t usize, asize;
 	szind_t i;
@@ -324,8 +324,7 @@ label_return:
 
 void
 base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, size_t *resident,
-    size_t *mapped)
-{
+    size_t *mapped) {
 	cassert(config_stats);
 
 	malloc_mutex_lock(tsdn, &base->mtx);
@@ -338,26 +337,22 @@ base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, size_t *resident,
 }
 
 void
-base_prefork(tsdn_t *tsdn, base_t *base)
-{
+base_prefork(tsdn_t *tsdn, base_t *base) {
 	malloc_mutex_prefork(tsdn, &base->mtx);
 }
 
 void
-base_postfork_parent(tsdn_t *tsdn, base_t *base)
-{
+base_postfork_parent(tsdn_t *tsdn, base_t *base) {
 	malloc_mutex_postfork_parent(tsdn, &base->mtx);
 }
 
 void
-base_postfork_child(tsdn_t *tsdn, base_t *base)
-{
+base_postfork_child(tsdn_t *tsdn, base_t *base) {
 	malloc_mutex_postfork_child(tsdn, &base->mtx);
 }
 
 bool
-base_boot(tsdn_t *tsdn)
-{
+base_boot(tsdn_t *tsdn) {
 	b0 = base_new(tsdn, 0, (extent_hooks_t *)&extent_hooks_default);
 	return (b0 == NULL);
 }
diff --git a/src/bitmap.c b/src/bitmap.c
index 3d27f059..7cbc7d45 100644
--- a/src/bitmap.c
+++ b/src/bitmap.c
@@ -6,8 +6,7 @@
 #ifdef BITMAP_USE_TREE
 
 void
-bitmap_info_init(bitmap_info_t *binfo, size_t nbits)
-{
+bitmap_info_init(bitmap_info_t *binfo, size_t nbits) {
 	unsigned i;
 	size_t group_count;
 
@@ -35,14 +34,12 @@ bitmap_info_init(bitmap_info_t *binfo, size_t nbits)
 }
 
 static size_t
-bitmap_info_ngroups(const bitmap_info_t *binfo)
-{
+bitmap_info_ngroups(const bitmap_info_t *binfo) {
 	return (binfo->levels[binfo->nlevels].group_offset);
 }
 
 void
-bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo)
-{
+bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo) {
 	size_t extra;
 	unsigned i;
 
@@ -56,23 +53,24 @@ bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo)
 	memset(bitmap, 0xffU, bitmap_size(binfo));
 	extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK))
 	    & BITMAP_GROUP_NBITS_MASK;
-	if (extra != 0)
+	if (extra != 0) {
 		bitmap[binfo->levels[1].group_offset - 1] >>= extra;
+	}
 	for (i = 1; i < binfo->nlevels; i++) {
 		size_t group_count = binfo->levels[i].group_offset -
 		    binfo->levels[i-1].group_offset;
 		extra = (BITMAP_GROUP_NBITS - (group_count &
 		    BITMAP_GROUP_NBITS_MASK)) & BITMAP_GROUP_NBITS_MASK;
-		if (extra != 0)
+		if (extra != 0) {
 			bitmap[binfo->levels[i+1].group_offset - 1] >>= extra;
+		}
 	}
 }
 
 #else /* BITMAP_USE_TREE */
 
 void
-bitmap_info_init(bitmap_info_t *binfo, size_t nbits)
-{
+bitmap_info_init(bitmap_info_t *binfo, size_t nbits) {
 	assert(nbits > 0);
 	assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS));
 
@@ -81,27 +79,25 @@ bitmap_info_init(bitmap_info_t *binfo, size_t nbits)
 }
 
 static size_t
-bitmap_info_ngroups(const bitmap_info_t *binfo)
-{
+bitmap_info_ngroups(const bitmap_info_t *binfo) {
 	return (binfo->ngroups);
 }
 
 void
-bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo)
-{
+bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo) {
 	size_t extra;
 
 	memset(bitmap, 0xffU, bitmap_size(binfo));
 	extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK))
 	    & BITMAP_GROUP_NBITS_MASK;
-	if (extra != 0)
+	if (extra != 0) {
 		bitmap[binfo->ngroups - 1] >>= extra;
+	}
 }
 
 #endif /* BITMAP_USE_TREE */
 
 size_t
-bitmap_size(const bitmap_info_t *binfo)
-{
+bitmap_size(const bitmap_info_t *binfo) {
 	return (bitmap_info_ngroups(binfo) << LG_SIZEOF_BITMAP);
 }
diff --git a/src/ckh.c b/src/ckh.c
index fe79862c..0deaf809 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -50,15 +50,15 @@ static void	ckh_shrink(tsd_t *tsd, ckh_t *ckh);
  * otherwise.
  */
 JEMALLOC_INLINE_C size_t
-ckh_bucket_search(ckh_t *ckh, size_t bucket, const void *key)
-{
+ckh_bucket_search(ckh_t *ckh, size_t bucket, const void *key) {
 	ckhc_t *cell;
 	unsigned i;
 
 	for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
 		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
-		if (cell->key != NULL && ckh->keycomp(key, cell->key))
+		if (cell->key != NULL && ckh->keycomp(key, cell->key)) {
 			return ((bucket << LG_CKH_BUCKET_CELLS) + i);
+		}
 	}
 
 	return (SIZE_T_MAX);
@@ -68,8 +68,7 @@ ckh_bucket_search(ckh_t *ckh, size_t bucket, const void *key)
  * Search table for key and return cell number if found; SIZE_T_MAX otherwise.
  */
 JEMALLOC_INLINE_C size_t
-ckh_isearch(ckh_t *ckh, const void *key)
-{
+ckh_isearch(ckh_t *ckh, const void *key) {
 	size_t hashes[2], bucket, cell;
 
 	assert(ckh != NULL);
@@ -79,8 +78,9 @@ ckh_isearch(ckh_t *ckh, const void *key)
 	/* Search primary bucket. */
 	bucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets) - 1);
 	cell = ckh_bucket_search(ckh, bucket, key);
-	if (cell != SIZE_T_MAX)
+	if (cell != SIZE_T_MAX) {
 		return (cell);
+	}
 
 	/* Search secondary bucket. */
 	bucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1);
@@ -90,8 +90,7 @@ ckh_isearch(ckh_t *ckh, const void *key)
 
 JEMALLOC_INLINE_C bool
 ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
-    const void *data)
-{
+    const void *data) {
 	ckhc_t *cell;
 	unsigned offset, i;
 
@@ -123,8 +122,7 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
  */
 JEMALLOC_INLINE_C bool
 ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
-    void const **argdata)
-{
+    void const **argdata) {
 	const void *key, *data, *tkey, *tdata;
 	ckhc_t *cell;
 	size_t hashes[2], bucket, tbucket;
@@ -187,14 +185,14 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
 		}
 
 		bucket = tbucket;
-		if (!ckh_try_bucket_insert(ckh, bucket, key, data))
+		if (!ckh_try_bucket_insert(ckh, bucket, key, data)) {
 			return (false);
+		}
 	}
 }
 
 JEMALLOC_INLINE_C bool
-ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata)
-{
+ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata) {
 	size_t hashes[2], bucket;
 	const void *key = *argkey;
 	const void *data = *argdata;
@@ -203,13 +201,15 @@ ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata)
 
 	/* Try to insert in primary bucket. */
 	bucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets) - 1);
-	if (!ckh_try_bucket_insert(ckh, bucket, key, data))
+	if (!ckh_try_bucket_insert(ckh, bucket, key, data)) {
 		return (false);
+	}
 
 	/* Try to insert in secondary bucket. */
 	bucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1);
-	if (!ckh_try_bucket_insert(ckh, bucket, key, data))
+	if (!ckh_try_bucket_insert(ckh, bucket, key, data)) {
 		return (false);
+	}
 
 	/*
 	 * Try to find a place for this item via iterative eviction/relocation.
@@ -222,8 +222,7 @@ ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata)
  * old table into the new.
  */
 JEMALLOC_INLINE_C bool
-ckh_rebuild(ckh_t *ckh, ckhc_t *aTab)
-{
+ckh_rebuild(ckh_t *ckh, ckhc_t *aTab) {
 	size_t count, i, nins;
 	const void *key, *data;
 
@@ -245,8 +244,7 @@ ckh_rebuild(ckh_t *ckh, ckhc_t *aTab)
 }
 
 static bool
-ckh_grow(tsd_t *tsd, ckh_t *ckh)
-{
+ckh_grow(tsd_t *tsd, ckh_t *ckh) {
 	bool ret;
 	ckhc_t *tab, *ttab;
 	unsigned lg_prevbuckets, lg_curcells;
@@ -302,8 +300,7 @@ label_return:
 }
 
 static void
-ckh_shrink(tsd_t *tsd, ckh_t *ckh)
-{
+ckh_shrink(tsd_t *tsd, ckh_t *ckh) {
 	ckhc_t *tab, *ttab;
 	size_t usize;
 	unsigned lg_prevbuckets, lg_curcells;
@@ -315,8 +312,9 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh)
 	lg_prevbuckets = ckh->lg_curbuckets;
 	lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1;
 	usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
-	if (unlikely(usize == 0 || usize > LARGE_MAXCLASS))
+	if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
 		return;
+	}
 	tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE, true, NULL,
 	    true, arena_ichoose(tsd, NULL));
 	if (tab == NULL) {
@@ -353,8 +351,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh)
 
 bool
 ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
-    ckh_keycomp_t *keycomp)
-{
+    ckh_keycomp_t *keycomp) {
 	bool ret;
 	size_t mincells, usize;
 	unsigned lg_mincells;
@@ -384,8 +381,9 @@ ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
 	mincells = ((minitems + (3 - (minitems % 3))) / 3) << 2;
 	for (lg_mincells = LG_CKH_BUCKET_CELLS;
 	    (ZU(1) << lg_mincells) < mincells;
-	    lg_mincells++)
-		; /* Do nothing. */
+	    lg_mincells++) {
+		/* Do nothing. */
+	}
 	ckh->lg_minbuckets = lg_mincells - LG_CKH_BUCKET_CELLS;
 	ckh->lg_curbuckets = lg_mincells - LG_CKH_BUCKET_CELLS;
 	ckh->hash = hash;
@@ -409,8 +407,7 @@ label_return:
 }
 
 void
-ckh_delete(tsd_t *tsd, ckh_t *ckh)
-{
+ckh_delete(tsd_t *tsd, ckh_t *ckh) {
 	assert(ckh != NULL);
 
 #ifdef CKH_VERBOSE
@@ -427,30 +424,31 @@ ckh_delete(tsd_t *tsd, ckh_t *ckh)
 
 	idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), ckh->tab), ckh->tab,
 	    NULL, true, true);
-	if (config_debug)
+	if (config_debug) {
 		memset(ckh, JEMALLOC_FREE_JUNK, sizeof(ckh_t));
+	}
 }
 
 size_t
-ckh_count(ckh_t *ckh)
-{
+ckh_count(ckh_t *ckh) {
 	assert(ckh != NULL);
 
 	return (ckh->count);
 }
 
 bool
-ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data)
-{
+ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data) {
 	size_t i, ncells;
 
 	for (i = *tabind, ncells = (ZU(1) << (ckh->lg_curbuckets +
 	    LG_CKH_BUCKET_CELLS)); i < ncells; i++) {
 		if (ckh->tab[i].key != NULL) {
-			if (key != NULL)
+			if (key != NULL) {
 				*key = (void *)ckh->tab[i].key;
-			if (data != NULL)
+			}
+			if (data != NULL) {
 				*data = (void *)ckh->tab[i].data;
+			}
 			*tabind = i + 1;
 			return (false);
 		}
@@ -460,8 +458,7 @@ ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data)
 }
 
 bool
-ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data)
-{
+ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data) {
 	bool ret;
 
 	assert(ckh != NULL);
@@ -485,18 +482,19 @@ label_return:
 
 bool
 ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
-    void **data)
-{
+    void **data) {
 	size_t cell;
 
 	assert(ckh != NULL);
 
 	cell = ckh_isearch(ckh, searchkey);
 	if (cell != SIZE_T_MAX) {
-		if (key != NULL)
+		if (key != NULL) {
 			*key = (void *)ckh->tab[cell].key;
-		if (data != NULL)
+		}
+		if (data != NULL) {
 			*data = (void *)ckh->tab[cell].data;
+		}
 		ckh->tab[cell].key = NULL;
 		ckh->tab[cell].data = NULL; /* Not necessary. */
 
@@ -516,18 +514,19 @@ ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
 }
 
 bool
-ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data)
-{
+ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data) {
 	size_t cell;
 
 	assert(ckh != NULL);
 
 	cell = ckh_isearch(ckh, searchkey);
 	if (cell != SIZE_T_MAX) {
-		if (key != NULL)
+		if (key != NULL) {
 			*key = (void *)ckh->tab[cell].key;
-		if (data != NULL)
+		}
+		if (data != NULL) {
 			*data = (void *)ckh->tab[cell].data;
+		}
 		return (false);
 	}
 
@@ -535,14 +534,12 @@ ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data)
 }
 
 void
-ckh_string_hash(const void *key, size_t r_hash[2])
-{
+ckh_string_hash(const void *key, size_t r_hash[2]) {
 	hash(key, strlen((const char *)key), 0x94122f33U, r_hash);
 }
 
 bool
-ckh_string_keycomp(const void *k1, const void *k2)
-{
+ckh_string_keycomp(const void *k1, const void *k2) {
 	assert(k1 != NULL);
 	assert(k2 != NULL);
 
@@ -550,8 +547,7 @@ ckh_string_keycomp(const void *k1, const void *k2)
 }
 
 void
-ckh_pointer_hash(const void *key, size_t r_hash[2])
-{
+ckh_pointer_hash(const void *key, size_t r_hash[2]) {
 	union {
 		const void	*v;
 		size_t		i;
@@ -563,7 +559,6 @@ ckh_pointer_hash(const void *key, size_t r_hash[2])
 }
 
 bool
-ckh_pointer_keycomp(const void *k1, const void *k2)
-{
+ckh_pointer_keycomp(const void *k1, const void *k2) {
 	return ((k1 == k2) ? true : false);
 }
diff --git a/src/ctl.c b/src/ctl.c
index b19c9d31..929176f2 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -17,22 +17,19 @@ static ctl_arenas_t	*ctl_arenas;
 /* Helpers for named and indexed nodes. */
 
 JEMALLOC_INLINE_C const ctl_named_node_t *
-ctl_named_node(const ctl_node_t *node)
-{
+ctl_named_node(const ctl_node_t *node) {
 	return ((node->named) ? (const ctl_named_node_t *)node : NULL);
 }
 
 JEMALLOC_INLINE_C const ctl_named_node_t *
-ctl_named_children(const ctl_named_node_t *node, size_t index)
-{
+ctl_named_children(const ctl_named_node_t *node, size_t index) {
 	const ctl_named_node_t *children = ctl_named_node(node->children);
 
 	return (children ? &children[index] : NULL);
 }
 
 JEMALLOC_INLINE_C const ctl_indexed_node_t *
-ctl_indexed_node(const ctl_node_t *node)
-{
+ctl_indexed_node(const ctl_node_t *node) {
 	return (!node->named ? (const ctl_indexed_node_t *)node : NULL);
 }
 
@@ -433,8 +430,7 @@ static const ctl_named_node_t super_root_node[] = {
 /******************************************************************************/
 
 static unsigned
-arenas_i2a_impl(size_t i, bool compat, bool validate)
-{
+arenas_i2a_impl(size_t i, bool compat, bool validate) {
 	unsigned a;
 
 	switch (i) {
@@ -453,9 +449,9 @@ arenas_i2a_impl(size_t i, bool compat, bool validate)
 			 * removal in 6.0.0.
 			 */
 			a = 0;
-		} else if (validate && i >= ctl_arenas->narenas)
+		} else if (validate && i >= ctl_arenas->narenas) {
 			a = UINT_MAX;
-		else {
+		} else {
 			/*
 			 * This function should never be called for an index
 			 * more than one past the range of indices that have
@@ -472,14 +468,12 @@ arenas_i2a_impl(size_t i, bool compat, bool validate)
 }
 
 static unsigned
-arenas_i2a(size_t i)
-{
+arenas_i2a(size_t i) {
 	return (arenas_i2a_impl(i, true, false));
 }
 
 static ctl_arena_t *
-arenas_i_impl(tsdn_t *tsdn, size_t i, bool compat, bool init)
-{
+arenas_i_impl(tsdn_t *tsdn, size_t i, bool compat, bool init) {
 	ctl_arena_t *ret;
 
 	assert(!compat || !init);
@@ -515,16 +509,14 @@ arenas_i_impl(tsdn_t *tsdn, size_t i, bool compat, bool init)
 }
 
 static ctl_arena_t *
-arenas_i(size_t i)
-{
+arenas_i(size_t i) {
 	ctl_arena_t *ret = arenas_i_impl(TSDN_NULL, i, true, false);
 	assert(ret != NULL);
 	return (ret);
 }
 
 static void
-ctl_arena_clear(ctl_arena_t *ctl_arena)
-{
+ctl_arena_clear(ctl_arena_t *ctl_arena) {
 	ctl_arena->nthreads = 0;
 	ctl_arena->dss = dss_prec_names[dss_prec_limit];
 	ctl_arena->decay_time = -1;
@@ -544,8 +536,7 @@ ctl_arena_clear(ctl_arena_t *ctl_arena)
 }
 
 static void
-ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena)
-{
+ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena) {
 	unsigned i;
 
 	if (config_stats) {
@@ -575,8 +566,7 @@ ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena)
 
 static void
 ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
-    bool destroyed)
-{
+    bool destroyed) {
 	unsigned i;
 
 	if (!destroyed) {
@@ -605,13 +595,15 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 			sdstats->astats.base += astats->astats.base;
 			sdstats->astats.internal += astats->astats.internal;
 			sdstats->astats.resident += astats->astats.resident;
-		} else
+		} else {
 			assert(astats->astats.internal == 0);
+		}
 
-		if (!destroyed)
+		if (!destroyed) {
 			sdstats->allocated_small += astats->allocated_small;
-		else
+		} else {
 			assert(astats->allocated_small == 0);
+		}
 		sdstats->nmalloc_small += astats->nmalloc_small;
 		sdstats->ndalloc_small += astats->ndalloc_small;
 		sdstats->nrequests_small += astats->nrequests_small;
@@ -619,8 +611,9 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 		if (!destroyed) {
 			sdstats->astats.allocated_large +=
 			    astats->astats.allocated_large;
-		} else
+		} else {
 			assert(astats->astats.allocated_large == 0);
+		}
 		sdstats->astats.nmalloc_large += astats->astats.nmalloc_large;
 		sdstats->astats.ndalloc_large += astats->astats.ndalloc_large;
 		sdstats->astats.nrequests_large +=
@@ -639,8 +632,9 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 			if (!destroyed) {
 				sdstats->bstats[i].curregs +=
 				    astats->bstats[i].curregs;
-			} else
+			} else {
 				assert(astats->bstats[i].curregs == 0);
+			}
 			if (config_tcache) {
 				sdstats->bstats[i].nfills +=
 				    astats->bstats[i].nfills;
@@ -652,8 +646,9 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 			if (!destroyed) {
 				sdstats->bstats[i].curslabs +=
 				    astats->bstats[i].curslabs;
-			} else
+			} else {
 				assert(astats->bstats[i].curslabs == 0);
+			}
 		}
 
 		for (i = 0; i < NSIZES - NBINS; i++) {
@@ -664,16 +659,16 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 			if (!destroyed) {
 				sdstats->lstats[i].curlextents +=
 				    astats->lstats[i].curlextents;
-			} else
+			} else {
 				assert(astats->lstats[i].curlextents == 0);
+			}
 		}
 	}
 }
 
 static void
 ctl_arena_refresh(tsdn_t *tsdn, arena_t *arena, ctl_arena_t *ctl_sdarena,
-    unsigned i, bool destroyed)
-{
+    unsigned i, bool destroyed) {
 	ctl_arena_t *ctl_arena = arenas_i(i);
 
 	ctl_arena_clear(ctl_arena);
@@ -683,8 +678,7 @@ ctl_arena_refresh(tsdn_t *tsdn, arena_t *arena, ctl_arena_t *ctl_sdarena,
 }
 
 static unsigned
-ctl_arena_init(tsdn_t *tsdn, extent_hooks_t *extent_hooks)
-{
+ctl_arena_init(tsdn_t *tsdn, extent_hooks_t *extent_hooks) {
 	unsigned arena_ind;
 	ctl_arena_t *ctl_arena;
 
@@ -692,26 +686,29 @@ ctl_arena_init(tsdn_t *tsdn, extent_hooks_t *extent_hooks)
 	    NULL) {
 		ql_remove(&ctl_arenas->destroyed, ctl_arena, destroyed_link);
 		arena_ind = ctl_arena->arena_ind;
-	} else
+	} else {
 		arena_ind = ctl_arenas->narenas;
+	}
 
 	/* Trigger stats allocation. */
-	if (arenas_i_impl(tsdn, arena_ind, false, true) == NULL)
+	if (arenas_i_impl(tsdn, arena_ind, false, true) == NULL) {
 		return (UINT_MAX);
+	}
 
 	/* Initialize new arena. */
-	if (arena_init(tsdn, arena_ind, extent_hooks) == NULL)
+	if (arena_init(tsdn, arena_ind, extent_hooks) == NULL) {
 		return (UINT_MAX);
+	}
 
-	if (arena_ind == ctl_arenas->narenas)
+	if (arena_ind == ctl_arenas->narenas) {
 		ctl_arenas->narenas++;
+	}
 
 	return (arena_ind);
 }
 
 static void
-ctl_refresh(tsdn_t *tsdn)
-{
+ctl_refresh(tsdn_t *tsdn) {
 	unsigned i;
 	ctl_arena_t *ctl_sarena = arenas_i(MALLCTL_ARENAS_ALL);
 	VARIABLE_ARRAY(arena_t *, tarenas, ctl_arenas->narenas);
@@ -751,8 +748,7 @@ ctl_refresh(tsdn_t *tsdn)
 }
 
 static bool
-ctl_init(tsdn_t *tsdn)
-{
+ctl_init(tsdn_t *tsdn) {
 	bool ret;
 
 	malloc_mutex_lock(tsdn, &ctl_mtx);
@@ -828,8 +824,7 @@ label_return:
 
 static int
 ctl_lookup(tsdn_t *tsdn, const char *name, ctl_node_t const **nodesp,
-    size_t *mibp, size_t *depthp)
-{
+    size_t *mibp, size_t *depthp) {
 	int ret;
 	const char *elm, *tdot, *dot;
 	size_t elen, i, j;
@@ -857,9 +852,10 @@ ctl_lookup(tsdn_t *tsdn, const char *name, ctl_node_t const **nodesp,
 				if (strlen(child->name) == elen &&
 				    strncmp(elm, child->name, elen) == 0) {
 					node = child;
-					if (nodesp != NULL)
+					if (nodesp != NULL) {
 						nodesp[i] =
 						    (const ctl_node_t *)node;
+					}
 					mibp[i] = j;
 					break;
 				}
@@ -886,8 +882,9 @@ ctl_lookup(tsdn_t *tsdn, const char *name, ctl_node_t const **nodesp,
 				goto label_return;
 			}
 
-			if (nodesp != NULL)
+			if (nodesp != NULL) {
 				nodesp[i] = (const ctl_node_t *)node;
+			}
 			mibp[i] = (size_t)index;
 		}
 
@@ -925,8 +922,7 @@ label_return:
 
 int
 ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen)
-{
+    void *newp, size_t newlen) {
 	int ret;
 	size_t depth;
 	ctl_node_t const *nodes[CTL_MAX_DEPTH];
@@ -940,12 +936,14 @@ ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
 
 	depth = CTL_MAX_DEPTH;
 	ret = ctl_lookup(tsd_tsdn(tsd), name, nodes, mib, &depth);
-	if (ret != 0)
+	if (ret != 0) {
 		goto label_return;
+	}
 
 	node = ctl_named_node(nodes[depth-1]);
-	if (node != NULL && node->ctl)
+	if (node != NULL && node->ctl) {
 		ret = node->ctl(tsd, mib, depth, oldp, oldlenp, newp, newlen);
+	}
 	else {
 		/* The name refers to a partial path through the ctl tree. */
 		ret = ENOENT;
@@ -956,8 +954,7 @@ label_return:
 }
 
 int
-ctl_nametomib(tsdn_t *tsdn, const char *name, size_t *mibp, size_t *miblenp)
-{
+ctl_nametomib(tsdn_t *tsdn, const char *name, size_t *mibp, size_t *miblenp) {
 	int ret;
 
 	if (!ctl_initialized && ctl_init(tsdn)) {
@@ -972,8 +969,7 @@ label_return:
 
 int
 ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
-{
+    size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	const ctl_named_node_t *node;
 	size_t i;
@@ -1009,9 +1005,9 @@ ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	}
 
 	/* Call the ctl function. */
-	if (node && node->ctl)
+	if (node && node->ctl) {
 		ret = node->ctl(tsd, mib, miblen, oldp, oldlenp, newp, newlen);
-	else {
+	} else {
 		/* Partial MIB. */
 		ret = ENOENT;
 	}
@@ -1021,10 +1017,10 @@ label_return:
 }
 
 bool
-ctl_boot(void)
-{
-	if (malloc_mutex_init(&ctl_mtx, "ctl", WITNESS_RANK_CTL))
+ctl_boot(void) {
+	if (malloc_mutex_init(&ctl_mtx, "ctl", WITNESS_RANK_CTL)) {
 		return (true);
+	}
 
 	ctl_initialized = false;
 
@@ -1032,20 +1028,17 @@ ctl_boot(void)
 }
 
 void
-ctl_prefork(tsdn_t *tsdn)
-{
+ctl_prefork(tsdn_t *tsdn) {
 	malloc_mutex_prefork(tsdn, &ctl_mtx);
 }
 
 void
-ctl_postfork_parent(tsdn_t *tsdn)
-{
+ctl_postfork_parent(tsdn_t *tsdn) {
 	malloc_mutex_postfork_parent(tsdn, &ctl_mtx);
 }
 
 void
-ctl_postfork_child(tsdn_t *tsdn)
-{
+ctl_postfork_child(tsdn_t *tsdn) {
 	malloc_mutex_postfork_child(tsdn, &ctl_mtx);
 }
 
@@ -1112,36 +1105,38 @@ ctl_postfork_child(tsdn_t *tsdn)
 #define	CTL_RO_CLGEN(c, l, n, v, t)					\
 static int								\
 n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
-    size_t *oldlenp, void *newp, size_t newlen)				\
-{									\
+    size_t *oldlenp, void *newp, size_t newlen) {			\
 	int ret;							\
 	t oldval;							\
 									\
-	if (!(c))							\
+	if (!(c)) {							\
 		return (ENOENT);					\
-	if (l)								\
+	}								\
+	if (l) {							\
 		malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);		\
+	}								\
 	READONLY();							\
 	oldval = (v);							\
 	READ(oldval, t);						\
 									\
 	ret = 0;							\
 label_return:								\
-	if (l)								\
+	if (l) {							\
 		malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);		\
+	}								\
 	return (ret);							\
 }
 
 #define	CTL_RO_CGEN(c, n, v, t)						\
 static int								\
 n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
-    size_t *oldlenp, void *newp, size_t newlen)				\
-{									\
+    size_t *oldlenp, void *newp, size_t newlen) {			\
 	int ret;							\
 	t oldval;							\
 									\
-	if (!(c))							\
+	if (!(c)) {							\
 		return (ENOENT);					\
+	}								\
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);			\
 	READONLY();							\
 	oldval = (v);							\
@@ -1156,8 +1151,7 @@ label_return:								\
 #define	CTL_RO_GEN(n, v, t)						\
 static int								\
 n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
-    size_t *oldlenp, void *newp, size_t newlen)				\
-{									\
+    size_t *oldlenp, void *newp, size_t newlen) {			\
 	int ret;							\
 	t oldval;							\
 									\
@@ -1179,13 +1173,13 @@ label_return:								\
 #define	CTL_RO_NL_CGEN(c, n, v, t)					\
 static int								\
 n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
-    size_t *oldlenp, void *newp, size_t newlen)				\
-{									\
+    size_t *oldlenp, void *newp, size_t newlen) {			\
 	int ret;							\
 	t oldval;							\
 									\
-	if (!(c))							\
+	if (!(c)) {							\
 		return (ENOENT);					\
+	}								\
 	READONLY();							\
 	oldval = (v);							\
 	READ(oldval, t);						\
@@ -1198,8 +1192,7 @@ label_return:								\
 #define	CTL_RO_NL_GEN(n, v, t)						\
 static int								\
 n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
-    size_t *oldlenp, void *newp, size_t newlen)				\
-{									\
+    size_t *oldlenp, void *newp, size_t newlen) {			\
 	int ret;							\
 	t oldval;							\
 									\
@@ -1215,13 +1208,13 @@ label_return:								\
 #define	CTL_TSD_RO_NL_CGEN(c, n, m, t)					\
 static int								\
 n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
-    size_t *oldlenp, void *newp, size_t newlen)				\
-{									\
+    size_t *oldlenp, void *newp, size_t newlen) {			\
 	int ret;							\
 	t oldval;							\
 									\
-	if (!(c))							\
+	if (!(c)) {							\
 		return (ENOENT);					\
+	}								\
 	READONLY();							\
 	oldval = (m(tsd));						\
 	READ(oldval, t);						\
@@ -1234,8 +1227,7 @@ label_return:								\
 #define	CTL_RO_CONFIG_GEN(n, t)						\
 static int								\
 n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
-    size_t *oldlenp, void *newp, size_t newlen)				\
-{									\
+    size_t *oldlenp, void *newp, size_t newlen) {			\
 	int ret;							\
 	t oldval;							\
 									\
@@ -1254,15 +1246,15 @@ CTL_RO_NL_GEN(version, JEMALLOC_VERSION, const char *)
 
 static int
 epoch_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
-{
+    size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	UNUSED uint64_t newval;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	WRITE(newval, uint64_t);
-	if (newp != NULL)
+	if (newp != NULL) {
 		ctl_refresh(tsd_tsdn(tsd));
+	}
 	READ(ctl_arenas->epoch, uint64_t);
 
 	ret = 0;
@@ -1317,15 +1309,15 @@ CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool)
 
 static int
 thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
-{
+    size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	arena_t *oldarena;
 	unsigned newind, oldind;
 
 	oldarena = arena_choose(tsd, NULL);
-	if (oldarena == NULL)
+	if (oldarena == NULL) {
 		return (EAGAIN);
+	}
 
 	newind = oldind = arena_ind_get(oldarena);
 	WRITE(newind, unsigned);
@@ -1372,13 +1364,13 @@ CTL_TSD_RO_NL_CGEN(config_stats, thread_deallocatedp,
 
 static int
 thread_tcache_enabled_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen)
-{
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	bool oldval;
 
-	if (!config_tcache)
+	if (!config_tcache) {
 		return (ENOENT);
+	}
 
 	oldval = tcache_enabled_get();
 	if (newp != NULL) {
@@ -1397,12 +1389,12 @@ label_return:
 
 static int
 thread_tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen)
-{
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 
-	if (!config_tcache)
+	if (!config_tcache) {
 		return (ENOENT);
+	}
 
 	READONLY();
 	WRITEONLY();
@@ -1416,12 +1408,12 @@ label_return:
 
 static int
 thread_prof_name_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
-{
+    size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 
-	if (!config_prof)
+	if (!config_prof) {
 		return (ENOENT);
+	}
 
 	READ_XOR_WRITE();
 
@@ -1432,8 +1424,9 @@ thread_prof_name_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 		}
 
 		if ((ret = prof_thread_name_set(tsd, *(const char **)newp)) !=
-		    0)
+		    0) {
 			goto label_return;
+		}
 	} else {
 		const char *oldname = prof_thread_name_get(tsd);
 		READ(oldname, const char *);
@@ -1446,13 +1439,13 @@ label_return:
 
 static int
 thread_prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
-{
+    size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	bool oldval;
 
-	if (!config_prof)
+	if (!config_prof) {
 		return (ENOENT);
+	}
 
 	oldval = prof_thread_active_get(tsd);
 	if (newp != NULL) {
@@ -1476,13 +1469,13 @@ label_return:
 
 static int
 tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
-{
+    size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	unsigned tcache_ind;
 
-	if (!config_tcache)
+	if (!config_tcache) {
 		return (ENOENT);
+	}
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	READONLY();
@@ -1500,13 +1493,13 @@ label_return:
 
 static int
 tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
-{
+    size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	unsigned tcache_ind;
 
-	if (!config_tcache)
+	if (!config_tcache) {
 		return (ENOENT);
+	}
 
 	WRITEONLY();
 	tcache_ind = UINT_MAX;
@@ -1524,13 +1517,13 @@ label_return:
 
 static int
 tcache_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
-{
+    size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	unsigned tcache_ind;
 
-	if (!config_tcache)
+	if (!config_tcache) {
 		return (ENOENT);
+	}
 
 	WRITEONLY();
 	tcache_ind = UINT_MAX;
@@ -1550,8 +1543,7 @@ label_return:
 
 static int
 arena_i_initialized_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen)
-{
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	tsdn_t *tsdn = tsd_tsdn(tsd);
 	unsigned arena_ind;
@@ -1572,8 +1564,7 @@ label_return:
 }
 
 static void
-arena_i_purge(tsdn_t *tsdn, unsigned arena_ind, bool all)
-{
+arena_i_purge(tsdn_t *tsdn, unsigned arena_ind, bool all) {
 	malloc_mutex_lock(tsdn, &ctl_mtx);
 	{
 		unsigned narenas = ctl_arenas->narenas;
@@ -1586,8 +1577,9 @@ arena_i_purge(tsdn_t *tsdn, unsigned arena_ind, bool all)
 			unsigned i;
 			VARIABLE_ARRAY(arena_t *, tarenas, narenas);
 
-			for (i = 0; i < narenas; i++)
+			for (i = 0; i < narenas; i++) {
 				tarenas[i] = arena_get(tsdn, i, false);
+			}
 
 			/*
 			 * No further need to hold ctl_mtx, since narenas and
@@ -1596,8 +1588,9 @@ arena_i_purge(tsdn_t *tsdn, unsigned arena_ind, bool all)
 			malloc_mutex_unlock(tsdn, &ctl_mtx);
 
 			for (i = 0; i < narenas; i++) {
-				if (tarenas[i] != NULL)
+				if (tarenas[i] != NULL) {
 					arena_purge(tsdn, tarenas[i], all);
+				}
 			}
 		} else {
 			arena_t *tarena;
@@ -1609,16 +1602,16 @@ arena_i_purge(tsdn_t *tsdn, unsigned arena_ind, bool all)
 			/* No further need to hold ctl_mtx. */
 			malloc_mutex_unlock(tsdn, &ctl_mtx);
 
-			if (tarena != NULL)
+			if (tarena != NULL) {
 				arena_purge(tsdn, tarena, all);
+			}
 		}
 	}
 }
 
 static int
 arena_i_purge_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
-{
+    size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	unsigned arena_ind;
 
@@ -1634,8 +1627,7 @@ label_return:
 
 static int
 arena_i_decay_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
-{
+    size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	unsigned arena_ind;
 
@@ -1652,8 +1644,7 @@ label_return:
 static int
 arena_i_reset_destroy_helper(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen, unsigned *arena_ind,
-    arena_t **arena)
-{
+    arena_t **arena) {
 	int ret;
 
 	READONLY();
@@ -1678,16 +1669,16 @@ label_return:
 
 static int
 arena_i_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
-{
+    size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	unsigned arena_ind;
 	arena_t *arena;
 
 	ret = arena_i_reset_destroy_helper(tsd, mib, miblen, oldp, oldlenp,
 	    newp, newlen, &arena_ind, &arena);
-	if (ret != 0)
+	if (ret != 0) {
 		return (ret);
+	}
 
 	arena_reset(tsd, arena);
 
@@ -1696,8 +1687,7 @@ arena_i_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 static int
 arena_i_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
-{
+    size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	unsigned arena_ind;
 	arena_t *arena;
@@ -1705,8 +1695,9 @@ arena_i_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	ret = arena_i_reset_destroy_helper(tsd, mib, miblen, oldp, oldlenp,
 	    newp, newlen, &arena_ind, &arena);
-	if (ret != 0)
+	if (ret != 0) {
 		goto label_return;
+	}
 
 	if (arena_nthreads_get(arena, false) != 0 || arena_nthreads_get(arena,
 	    true) != 0) {
@@ -1735,8 +1726,7 @@ label_return:
 
 static int
 arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
-{
+    size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	const char *dss = NULL;
 	unsigned arena_ind;
@@ -1797,8 +1787,7 @@ label_return:
 
 static int
 arena_i_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
-{
+    size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	unsigned arena_ind;
 	arena_t *arena;
@@ -1833,8 +1822,7 @@ label_return:
 
 static int
 arena_i_extent_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen)
-{
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	unsigned arena_ind;
 	arena_t *arena;
@@ -1867,8 +1855,7 @@ label_return:
 }
 
 static const ctl_named_node_t *
-arena_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
-{
+arena_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
 	const ctl_named_node_t *ret;
 
 	malloc_mutex_lock(tsdn, &ctl_mtx);
@@ -1894,8 +1881,7 @@ label_return:
 
 static int
 arenas_narenas_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
-{
+    size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	unsigned narenas;
 
@@ -1916,8 +1902,7 @@ label_return:
 
 static int
 arenas_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
-{
+    size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 
 	if (oldp != NULL && oldlenp != NULL) {
@@ -1949,27 +1934,27 @@ CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t)
 CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t)
 CTL_RO_NL_GEN(arenas_bin_i_slab_size, arena_bin_info[mib[2]].slab_size, size_t)
 static const ctl_named_node_t *
-arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
-{
-	if (i > NBINS)
+arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
+	if (i > NBINS) {
 		return (NULL);
+	}
 	return (super_arenas_bin_i_node);
 }
 
 CTL_RO_NL_GEN(arenas_nlextents, NSIZES - NBINS, unsigned)
 CTL_RO_NL_GEN(arenas_lextent_i_size, index2size(NBINS+(szind_t)mib[2]), size_t)
 static const ctl_named_node_t *
-arenas_lextent_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
-{
-	if (i > NSIZES - NBINS)
+arenas_lextent_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
+    size_t i) {
+	if (i > NSIZES - NBINS) {
 		return (NULL);
+	}
 	return (super_arenas_lextent_i_node);
 }
 
 static int
 arenas_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
-{
+    size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	extent_hooks_t *extent_hooks;
 	unsigned arena_ind;
@@ -1995,13 +1980,13 @@ label_return:
 
 static int
 prof_thread_active_init_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen)
-{
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	bool oldval;
 
-	if (!config_prof)
+	if (!config_prof) {
 		return (ENOENT);
+	}
 
 	if (newp != NULL) {
 		if (newlen != sizeof(bool)) {
@@ -2010,8 +1995,9 @@ prof_thread_active_init_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 		}
 		oldval = prof_thread_active_init_set(tsd_tsdn(tsd),
 		    *(bool *)newp);
-	} else
+	} else {
 		oldval = prof_thread_active_init_get(tsd_tsdn(tsd));
+	}
 	READ(oldval, bool);
 
 	ret = 0;
@@ -2021,13 +2007,13 @@ label_return:
 
 static int
 prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
-{
+    size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	bool oldval;
 
-	if (!config_prof)
+	if (!config_prof) {
 		return (ENOENT);
+	}
 
 	if (newp != NULL) {
 		if (newlen != sizeof(bool)) {
@@ -2035,8 +2021,9 @@ prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 			goto label_return;
 		}
 		oldval = prof_active_set(tsd_tsdn(tsd), *(bool *)newp);
-	} else
+	} else {
 		oldval = prof_active_get(tsd_tsdn(tsd));
+	}
 	READ(oldval, bool);
 
 	ret = 0;
@@ -2046,13 +2033,13 @@ label_return:
 
 static int
 prof_dump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
-{
+    size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	const char *filename = NULL;
 
-	if (!config_prof)
+	if (!config_prof) {
 		return (ENOENT);
+	}
 
 	WRITEONLY();
 	WRITE(filename, const char *);
@@ -2069,13 +2056,13 @@ label_return:
 
 static int
 prof_gdump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
-{
+    size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	bool oldval;
 
-	if (!config_prof)
+	if (!config_prof) {
 		return (ENOENT);
+	}
 
 	if (newp != NULL) {
 		if (newlen != sizeof(bool)) {
@@ -2083,8 +2070,9 @@ prof_gdump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 			goto label_return;
 		}
 		oldval = prof_gdump_set(tsd_tsdn(tsd), *(bool *)newp);
-	} else
+	} else {
 		oldval = prof_gdump_get(tsd_tsdn(tsd));
+	}
 	READ(oldval, bool);
 
 	ret = 0;
@@ -2094,18 +2082,19 @@ label_return:
 
 static int
 prof_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen)
-{
+    size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	size_t lg_sample = lg_prof_sample;
 
-	if (!config_prof)
+	if (!config_prof) {
 		return (ENOENT);
+	}
 
 	WRITEONLY();
 	WRITE(lg_sample, size_t);
-	if (lg_sample >= (sizeof(uint64_t) << 3))
+	if (lg_sample >= (sizeof(uint64_t) << 3)) {
 		lg_sample = (sizeof(uint64_t) << 3) - 1;
+	}
 
 	prof_reset(tsd, lg_sample);
 
@@ -2189,10 +2178,10 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curslabs,
 
 static const ctl_named_node_t *
 stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
-    size_t j)
-{
-	if (j > NBINS)
+    size_t j) {
+	if (j > NBINS) {
 		return (NULL);
+	}
 	return (super_stats_arenas_i_bins_j_node);
 }
 
@@ -2207,16 +2196,15 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_curlextents,
 
 static const ctl_named_node_t *
 stats_arenas_i_lextents_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
-    size_t j)
-{
-	if (j > NSIZES - NBINS)
+    size_t j) {
+	if (j > NSIZES - NBINS) {
 		return (NULL);
+	}
 	return (super_stats_arenas_i_lextents_j_node);
 }
 
 static const ctl_named_node_t *
-stats_arenas_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i)
-{
+stats_arenas_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
 	const ctl_named_node_t *ret;
 	size_t a;
 
diff --git a/src/extent.c b/src/extent.c
index be40aaad..5cf2e25c 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -75,8 +75,7 @@ static void	extent_record(tsdn_t *tsdn, arena_t *arena,
 /******************************************************************************/
 
 extent_t *
-extent_alloc(tsdn_t *tsdn, arena_t *arena)
-{
+extent_alloc(tsdn_t *tsdn, arena_t *arena) {
 	extent_t *extent;
 
 	malloc_mutex_lock(tsdn, &arena->extent_cache_mtx);
@@ -92,8 +91,7 @@ extent_alloc(tsdn_t *tsdn, arena_t *arena)
 }
 
 void
-extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent)
-{
+extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
 	malloc_mutex_lock(tsdn, &arena->extent_cache_mtx);
 	ql_elm_new(extent, ql_link);
 	ql_tail_insert(&arena->extent_cache, extent, ql_link);
@@ -101,22 +99,21 @@ extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent)
 }
 
 extent_hooks_t *
-extent_hooks_get(arena_t *arena)
-{
+extent_hooks_get(arena_t *arena) {
 	return (base_extent_hooks_get(arena->base));
 }
 
 extent_hooks_t *
-extent_hooks_set(arena_t *arena, extent_hooks_t *extent_hooks)
-{
+extent_hooks_set(arena_t *arena, extent_hooks_t *extent_hooks) {
 	return (base_extent_hooks_set(arena->base, extent_hooks));
 }
 
 static void
-extent_hooks_assure_initialized(arena_t *arena, extent_hooks_t **r_extent_hooks)
-{
-	if (*r_extent_hooks == EXTENT_HOOKS_INITIALIZER)
+extent_hooks_assure_initialized(arena_t *arena,
+    extent_hooks_t **r_extent_hooks) {
+	if (*r_extent_hooks == EXTENT_HOOKS_INITIALIZER) {
 		*r_extent_hooks = extent_hooks_get(arena);
+	}
 }
 
 #ifdef JEMALLOC_JET
@@ -124,8 +121,7 @@ extent_hooks_assure_initialized(arena_t *arena, extent_hooks_t **r_extent_hooks)
 #define	extent_size_quantize_floor JEMALLOC_N(n_extent_size_quantize_floor)
 #endif
 size_t
-extent_size_quantize_floor(size_t size)
-{
+extent_size_quantize_floor(size_t size) {
 	size_t ret;
 	pszind_t pind;
 
@@ -161,8 +157,7 @@ extent_size_quantize_t *extent_size_quantize_floor =
 #define	extent_size_quantize_ceil JEMALLOC_N(n_extent_size_quantize_ceil)
 #endif
 size_t
-extent_size_quantize_ceil(size_t size)
-{
+extent_size_quantize_ceil(size_t size) {
 	size_t ret;
 
 	assert(size > 0);
@@ -195,8 +190,7 @@ ph_gen(, extent_heap_, extent_heap_t, extent_t, ph_link, extent_snad_comp)
 
 static void
 extent_heaps_insert(tsdn_t *tsdn, extent_heap_t extent_heaps[NPSIZES+1],
-    extent_t *extent)
-{
+    extent_t *extent) {
 	size_t psz = extent_size_quantize_floor(extent_size_get(extent));
 	pszind_t pind = psz2ind(psz);
 
@@ -207,8 +201,7 @@ extent_heaps_insert(tsdn_t *tsdn, extent_heap_t extent_heaps[NPSIZES+1],
 
 static void
 extent_heaps_remove(tsdn_t *tsdn, extent_heap_t extent_heaps[NPSIZES+1],
-    extent_t *extent)
-{
+    extent_t *extent) {
 	size_t psz = extent_size_quantize_floor(extent_size_get(extent));
 	pszind_t pind = psz2ind(psz);
 
@@ -220,12 +213,12 @@ extent_heaps_remove(tsdn_t *tsdn, extent_heap_t extent_heaps[NPSIZES+1],
 static bool
 extent_rtree_acquire(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
     const extent_t *extent, bool dependent, bool init_missing,
-    rtree_elm_t **r_elm_a, rtree_elm_t **r_elm_b)
-{
+    rtree_elm_t **r_elm_a, rtree_elm_t **r_elm_b) {
 	*r_elm_a = rtree_elm_acquire(tsdn, &extents_rtree, rtree_ctx,
 	    (uintptr_t)extent_base_get(extent), dependent, init_missing);
-	if (!dependent && *r_elm_a == NULL)
+	if (!dependent && *r_elm_a == NULL) {
 		return (true);
+	}
 	assert(*r_elm_a != NULL);
 
 	if (extent_size_get(extent) > PAGE) {
@@ -237,33 +230,33 @@ extent_rtree_acquire(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
 			return (true);
 		}
 		assert(*r_elm_b != NULL);
-	} else
+	} else {
 		*r_elm_b = NULL;
+	}
 
 	return (false);
 }
 
 static void
 extent_rtree_write_acquired(tsdn_t *tsdn, rtree_elm_t *elm_a,
-    rtree_elm_t *elm_b, const extent_t *extent)
-{
+    rtree_elm_t *elm_b, const extent_t *extent) {
 	rtree_elm_write_acquired(tsdn, &extents_rtree, elm_a, extent);
-	if (elm_b != NULL)
+	if (elm_b != NULL) {
 		rtree_elm_write_acquired(tsdn, &extents_rtree, elm_b, extent);
+	}
 }
 
 static void
-extent_rtree_release(tsdn_t *tsdn, rtree_elm_t *elm_a, rtree_elm_t *elm_b)
-{
+extent_rtree_release(tsdn_t *tsdn, rtree_elm_t *elm_a, rtree_elm_t *elm_b) {
 	rtree_elm_release(tsdn, &extents_rtree, elm_a);
-	if (elm_b != NULL)
+	if (elm_b != NULL) {
 		rtree_elm_release(tsdn, &extents_rtree, elm_b);
+	}
 }
 
 static void
 extent_interior_register(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
-    const extent_t *extent)
-{
+    const extent_t *extent) {
 	size_t i;
 
 	assert(extent_slab_get(extent));
@@ -276,8 +269,7 @@ extent_interior_register(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
 }
 
 static void
-extent_gprof_add(tsdn_t *tsdn, const extent_t *extent)
-{
+extent_gprof_add(tsdn_t *tsdn, const extent_t *extent) {
 	cassert(config_prof);
 
 	if (opt_prof && extent_active_get(extent)) {
@@ -291,14 +283,14 @@ extent_gprof_add(tsdn_t *tsdn, const extent_t *extent)
 			 */
 			high = atomic_read_zu(&highpages);
 		}
-		if (cur > high && prof_gdump_get_unlocked())
+		if (cur > high && prof_gdump_get_unlocked()) {
 			prof_gdump(tsdn);
+		}
 	}
 }
 
 static void
-extent_gprof_sub(tsdn_t *tsdn, const extent_t *extent)
-{
+extent_gprof_sub(tsdn_t *tsdn, const extent_t *extent) {
 	cassert(config_prof);
 
 	if (opt_prof && extent_active_get(extent)) {
@@ -309,37 +301,37 @@ extent_gprof_sub(tsdn_t *tsdn, const extent_t *extent)
 }
 
 static bool
-extent_register(tsdn_t *tsdn, const extent_t *extent)
-{
+extent_register(tsdn_t *tsdn, const extent_t *extent) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	rtree_elm_t *elm_a, *elm_b;
 
 	if (extent_rtree_acquire(tsdn, rtree_ctx, extent, false, true, &elm_a,
-	    &elm_b))
+	    &elm_b)) {
 		return (true);
+	}
 	extent_rtree_write_acquired(tsdn, elm_a, elm_b, extent);
-	if (extent_slab_get(extent))
+	if (extent_slab_get(extent)) {
 		extent_interior_register(tsdn, rtree_ctx, extent);
+	}
 	extent_rtree_release(tsdn, elm_a, elm_b);
 
-	if (config_prof)
+	if (config_prof) {
 		extent_gprof_add(tsdn, extent);
+	}
 
 	return (false);
 }
 
 static void
-extent_reregister(tsdn_t *tsdn, const extent_t *extent)
-{
+extent_reregister(tsdn_t *tsdn, const extent_t *extent) {
 	bool err = extent_register(tsdn, extent);
 	assert(!err);
 }
 
 static void
 extent_interior_deregister(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
-    const extent_t *extent)
-{
+    const extent_t *extent) {
 	size_t i;
 
 	assert(extent_slab_get(extent));
@@ -352,8 +344,7 @@ extent_interior_deregister(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
 }
 
 static void
-extent_deregister(tsdn_t *tsdn, extent_t *extent)
-{
+extent_deregister(tsdn_t *tsdn, extent_t *extent) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	rtree_elm_t *elm_a, *elm_b;
@@ -367,8 +358,9 @@ extent_deregister(tsdn_t *tsdn, extent_t *extent)
 	}
 	extent_rtree_release(tsdn, elm_a, elm_b);
 
-	if (config_prof)
+	if (config_prof) {
 		extent_gprof_sub(tsdn, extent);
+	}
 }
 
 /*
@@ -377,8 +369,7 @@ extent_deregister(tsdn_t *tsdn, extent_t *extent)
  */
 static extent_t *
 extent_first_best_fit(tsdn_t *tsdn, arena_t *arena,
-    extent_heap_t extent_heaps[NPSIZES+1], size_t size)
-{
+    extent_heap_t extent_heaps[NPSIZES+1], size_t size) {
 	pszind_t pind, i;
 
 	malloc_mutex_assert_owner(tsdn, &arena->extents_mtx);
@@ -386,8 +377,9 @@ extent_first_best_fit(tsdn_t *tsdn, arena_t *arena,
 	pind = psz2ind(extent_size_quantize_ceil(size));
 	for (i = pind; i < NPSIZES+1; i++) {
 		extent_t *extent = extent_heap_first(&extent_heaps[i]);
-		if (extent != NULL)
+		if (extent != NULL) {
 			return (extent);
+		}
 	}
 
 	return (NULL);
@@ -395,8 +387,7 @@ extent_first_best_fit(tsdn_t *tsdn, arena_t *arena,
 
 static void
 extent_leak(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    bool cache, extent_t *extent)
-{
+    bool cache, extent_t *extent) {
 	/*
 	 * Leak extent after making sure its pages have already been purged, so
 	 * that this is only a virtual memory leak.
@@ -415,15 +406,15 @@ static extent_t *
 extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
     extent_heap_t extent_heaps[NPSIZES+1], bool locked, bool cache,
     void *new_addr, size_t usize, size_t pad, size_t alignment, bool *zero,
-    bool *commit, bool slab)
-{
+    bool *commit, bool slab) {
 	extent_t *extent;
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	size_t size, alloc_size, leadsize, trailsize;
 
-	if (locked)
+	if (locked) {
 		malloc_mutex_assert_owner(tsdn, &arena->extents_mtx);
+	}
 	assert(new_addr == NULL || !slab);
 	assert(pad == 0 || !slab);
 	assert(alignment > 0);
@@ -452,10 +443,12 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	size = usize + pad;
 	alloc_size = size + PAGE_CEILING(alignment) - PAGE;
 	/* Beware size_t wrap-around. */
-	if (alloc_size < usize)
+	if (alloc_size < usize) {
 		return (NULL);
-	if (!locked)
+	}
+	if (!locked) {
 		malloc_mutex_lock(tsdn, &arena->extents_mtx);
+	}
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 	if (new_addr != NULL) {
 		rtree_elm_t *elm;
@@ -470,19 +463,22 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 				if (extent_arena_get(extent) != arena ||
 				    extent_size_get(extent) < size ||
 				    extent_active_get(extent) ||
-				    extent_retained_get(extent) == cache)
+				    extent_retained_get(extent) == cache) {
 					extent = NULL;
+				}
 			}
 			rtree_elm_release(tsdn, &extents_rtree, elm);
-		} else
+		} else {
 			extent = NULL;
+		}
 	} else {
 		extent = extent_first_best_fit(tsdn, arena, extent_heaps,
 		    alloc_size);
 	}
 	if (extent == NULL) {
-		if (!locked)
+		if (!locked) {
 			malloc_mutex_unlock(tsdn, &arena->extents_mtx);
+		}
 		return (NULL);
 	}
 	extent_heaps_remove(tsdn, extent_heaps, extent);
@@ -493,10 +489,12 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	assert(new_addr == NULL || leadsize == 0);
 	assert(extent_size_get(extent) >= leadsize + size);
 	trailsize = extent_size_get(extent) - leadsize - size;
-	if (extent_zeroed_get(extent))
+	if (extent_zeroed_get(extent)) {
 		*zero = true;
-	if (extent_committed_get(extent))
+	}
+	if (extent_committed_get(extent)) {
 		*commit = true;
+	}
 
 	/* Split the lead. */
 	if (leadsize != 0) {
@@ -507,8 +505,9 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		if (extent == NULL) {
 			extent_deregister(tsdn, lead);
 			extent_leak(tsdn, arena, r_extent_hooks, cache, lead);
-			if (!locked)
+			if (!locked) {
 				malloc_mutex_unlock(tsdn, &arena->extents_mtx);
+			}
 			return (NULL);
 		}
 		extent_heaps_insert(tsdn, extent_heaps, lead);
@@ -523,8 +522,9 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 			extent_deregister(tsdn, extent);
 			extent_leak(tsdn, arena, r_extent_hooks, cache,
 			    extent);
-			if (!locked)
+			if (!locked) {
 				malloc_mutex_unlock(tsdn, &arena->extents_mtx);
+			}
 			return (NULL);
 		}
 		extent_heaps_insert(tsdn, extent_heaps, trail);
@@ -540,8 +540,9 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	if (*commit && !extent_committed_get(extent)) {
 		if (extent_commit_wrapper(tsdn, arena, r_extent_hooks, extent,
 		    0, extent_size_get(extent))) {
-			if (!locked)
+			if (!locked) {
 				malloc_mutex_unlock(tsdn, &arena->extents_mtx);
+			}
 			extent_record(tsdn, arena, r_extent_hooks, extent_heaps,
 			    cache, extent);
 			return (NULL);
@@ -549,16 +550,18 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		extent_zeroed_set(extent, true);
 	}
 
-	if (pad != 0)
+	if (pad != 0) {
 		extent_addr_randomize(tsdn, extent, alignment);
+	}
 	extent_active_set(extent, true);
 	if (slab) {
 		extent_slab_set(extent, slab);
 		extent_interior_register(tsdn, rtree_ctx, extent);
 	}
 
-	if (!locked)
+	if (!locked) {
 		malloc_mutex_unlock(tsdn, &arena->extents_mtx);
+	}
 
 	if (*zero) {
 		if (!extent_zeroed_get(extent)) {
@@ -569,8 +572,9 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 			size_t *p = (size_t *)(uintptr_t)
 			    extent_addr_get(extent);
 
-			for (i = 0; i < usize / sizeof(size_t); i++)
+			for (i = 0; i < usize / sizeof(size_t); i++) {
 				assert(p[i] == 0);
+			}
 		}
 	}
 	return (extent);
@@ -584,8 +588,7 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
  */
 static void *
 extent_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
-    size_t alignment, bool *zero, bool *commit, dss_prec_t dss_prec)
-{
+    size_t alignment, bool *zero, bool *commit, dss_prec_t dss_prec) {
 	void *ret;
 
 	assert(size != 0);
@@ -594,17 +597,20 @@ extent_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 	/* "primary" dss. */
 	if (have_dss && dss_prec == dss_prec_primary && (ret =
 	    extent_alloc_dss(tsdn, arena, new_addr, size, alignment, zero,
-	    commit)) != NULL)
+	    commit)) != NULL) {
 		return (ret);
+	}
 	/* mmap. */
 	if ((ret = extent_alloc_mmap(new_addr, size, alignment, zero, commit))
-	    != NULL)
+	    != NULL) {
 		return (ret);
+	}
 	/* "secondary" dss. */
 	if (have_dss && dss_prec == dss_prec_secondary && (ret =
 	    extent_alloc_dss(tsdn, arena, new_addr, size, alignment, zero,
-	    commit)) != NULL)
+	    commit)) != NULL) {
 		return (ret);
+	}
 
 	/* All strategies for allocation failed. */
 	return (NULL);
@@ -613,8 +619,7 @@ extent_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 static extent_t *
 extent_alloc_cache_impl(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, bool locked, void *new_addr, size_t usize,
-    size_t pad, size_t alignment, bool *zero, bool *commit, bool slab)
-{
+    size_t pad, size_t alignment, bool *zero, bool *commit, bool slab) {
 	extent_t *extent;
 
 	assert(usize + pad != 0);
@@ -629,8 +634,7 @@ extent_alloc_cache_impl(tsdn_t *tsdn, arena_t *arena,
 extent_t *
 extent_alloc_cache_locked(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
-    size_t alignment, bool *zero, bool *commit, bool slab)
-{
+    size_t alignment, bool *zero, bool *commit, bool slab) {
 	malloc_mutex_assert_owner(tsdn, &arena->extents_mtx);
 
 	return (extent_alloc_cache_impl(tsdn, arena, r_extent_hooks, true,
@@ -640,16 +644,14 @@ extent_alloc_cache_locked(tsdn_t *tsdn, arena_t *arena,
 extent_t *
 extent_alloc_cache(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
-    size_t alignment, bool *zero, bool *commit, bool slab)
-{
+    size_t alignment, bool *zero, bool *commit, bool slab) {
 	return (extent_alloc_cache_impl(tsdn, arena, r_extent_hooks, false,
 	    new_addr, usize, pad, alignment, zero, commit, slab));
 }
 
 static void *
 extent_alloc_default_impl(tsdn_t *tsdn, arena_t *arena, void *new_addr,
-    size_t size, size_t alignment, bool *zero, bool *commit)
-{
+    size_t size, size_t alignment, bool *zero, bool *commit) {
 	void *ret;
 
 	ret = extent_alloc_core(tsdn, arena, new_addr, size, alignment, zero,
@@ -659,8 +661,7 @@ extent_alloc_default_impl(tsdn_t *tsdn, arena_t *arena, void *new_addr,
 
 static void *
 extent_alloc_default(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
-    size_t alignment, bool *zero, bool *commit, unsigned arena_ind)
-{
+    size_t alignment, bool *zero, bool *commit, unsigned arena_ind) {
 	tsdn_t *tsdn;
 	arena_t *arena;
 
@@ -680,10 +681,10 @@ extent_alloc_default(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
 
 static void
 extent_retain(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    extent_t *extent)
-{
-	if (config_stats)
+    extent_t *extent) {
+	if (config_stats) {
 		arena->stats.retained += extent_size_get(extent);
+	}
 	extent_record(tsdn, arena, r_extent_hooks, arena->extents_retained,
 	    false, extent);
 }
@@ -696,8 +697,7 @@ extent_retain(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 static extent_t *
 extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
-    size_t alignment, bool *zero, bool *commit, bool slab)
-{
+    size_t alignment, bool *zero, bool *commit, bool slab) {
 	extent_t *extent;
 	void *ptr;
 	size_t size, alloc_size, alloc_size_min, leadsize, trailsize;
@@ -713,13 +713,16 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	alloc_size = pind2sz(arena->extent_grow_next);
 	alloc_size_min = size + PAGE_CEILING(alignment) - PAGE;
 	/* Beware size_t wrap-around. */
-	if (alloc_size_min < usize)
+	if (alloc_size_min < usize) {
 		return (NULL);
-	if (alloc_size < alloc_size_min)
+	}
+	if (alloc_size < alloc_size_min) {
 		return (NULL);
+	}
 	extent = extent_alloc(tsdn, arena);
-	if (extent == NULL)
+	if (extent == NULL) {
 		return (NULL);
+	}
 	zeroed = false;
 	committed = false;
 	ptr = extent_alloc_core(tsdn, arena, new_addr, alloc_size, PAGE,
@@ -741,10 +744,12 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	assert(new_addr == NULL || leadsize == 0);
 	assert(alloc_size >= leadsize + size);
 	trailsize = alloc_size - leadsize - size;
-	if (extent_zeroed_get(extent))
+	if (extent_zeroed_get(extent)) {
 		*zero = true;
-	if (extent_committed_get(extent))
+	}
+	if (extent_committed_get(extent)) {
 		*commit = true;
+	}
 
 	/* Split the lead. */
 	if (leadsize != 0) {
@@ -790,8 +795,9 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 		/* Adjust gprof stats now that extent is final size. */
 		extent_gprof_add(tsdn, extent);
 	}
-	if (pad != 0)
+	if (pad != 0) {
 		extent_addr_randomize(tsdn, extent, alignment);
+	}
 	if (slab) {
 		rtree_ctx_t rtree_ctx_fallback;
 		rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn,
@@ -800,18 +806,19 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 		extent_slab_set(extent, true);
 		extent_interior_register(tsdn, rtree_ctx, extent);
 	}
-	if (*zero && !extent_zeroed_get(extent))
+	if (*zero && !extent_zeroed_get(extent)) {
 		memset(extent_addr_get(extent), 0, extent_usize_get(extent));
-	if (arena->extent_grow_next + 1 < NPSIZES)
+	}
+	if (arena->extent_grow_next + 1 < NPSIZES) {
 		arena->extent_grow_next++;
+	}
 	return (extent);
 }
 
 static extent_t *
 extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
-    size_t alignment, bool *zero, bool *commit, bool slab)
-{
+    size_t alignment, bool *zero, bool *commit, bool slab) {
 	extent_t *extent;
 
 	assert(usize != 0);
@@ -825,8 +832,9 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
 			size_t size = usize + pad;
 			arena->stats.retained -= size;
 		}
-		if (config_prof)
+		if (config_prof) {
 			extent_gprof_add(tsdn, extent);
+		}
 	}
 	if (!config_munmap && extent == NULL) {
 		extent = extent_grow_retained(tsdn, arena, r_extent_hooks,
@@ -839,16 +847,16 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
 static extent_t *
 extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
-    size_t alignment, bool *zero, bool *commit, bool slab)
-{
+    size_t alignment, bool *zero, bool *commit, bool slab) {
 	extent_t *extent;
 	size_t size;
 	void *addr;
 
 	size = usize + pad;
 	extent = extent_alloc(tsdn, arena);
-	if (extent == NULL)
+	if (extent == NULL) {
 		return (NULL);
+	}
 	if (*r_extent_hooks == &extent_hooks_default) {
 		/* Call directly to propagate tsdn. */
 		addr = extent_alloc_default_impl(tsdn, arena, new_addr, size,
@@ -863,8 +871,9 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 	}
 	extent_init(extent, arena, addr, size, usize,
 	    arena_extent_sn_next(arena), true, zero, commit, slab);
-	if (pad != 0)
+	if (pad != 0) {
 		extent_addr_randomize(tsdn, extent, alignment);
+	}
 	if (extent_register(tsdn, extent)) {
 		extent_leak(tsdn, arena, r_extent_hooks, false, extent);
 		return (NULL);
@@ -876,8 +885,7 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 extent_t *
 extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
-    size_t alignment, bool *zero, bool *commit, bool slab)
-{
+    size_t alignment, bool *zero, bool *commit, bool slab) {
 	extent_t *extent;
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
@@ -893,16 +901,19 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 }
 
 static bool
-extent_can_coalesce(const extent_t *a, const extent_t *b)
-{
-	if (extent_arena_get(a) != extent_arena_get(b))
+extent_can_coalesce(const extent_t *a, const extent_t *b) {
+	if (extent_arena_get(a) != extent_arena_get(b)) {
 		return (false);
-	if (extent_active_get(a) != extent_active_get(b))
+	}
+	if (extent_active_get(a) != extent_active_get(b)) {
 		return (false);
-	if (extent_committed_get(a) != extent_committed_get(b))
+	}
+	if (extent_committed_get(a) != extent_committed_get(b)) {
 		return (false);
-	if (extent_retained_get(a) != extent_retained_get(b))
+	}
+	if (extent_retained_get(a) != extent_retained_get(b)) {
 		return (false);
+	}
 
 	return (true);
 }
@@ -910,10 +921,10 @@ extent_can_coalesce(const extent_t *a, const extent_t *b)
 static void
 extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b,
-    extent_heap_t extent_heaps[NPSIZES+1], bool cache)
-{
-	if (!extent_can_coalesce(a, b))
+    extent_heap_t extent_heaps[NPSIZES+1], bool cache) {
+	if (!extent_can_coalesce(a, b)) {
 		return;
+	}
 
 	extent_heaps_remove(tsdn, extent_heaps, a);
 	extent_heaps_remove(tsdn, extent_heaps, b);
@@ -937,8 +948,7 @@ extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
 
 static void
 extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    extent_heap_t extent_heaps[NPSIZES+1], bool cache, extent_t *extent)
-{
+    extent_heap_t extent_heaps[NPSIZES+1], bool cache, extent_t *extent) {
 	extent_t *prev, *next;
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
@@ -980,8 +990,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 }
 
 void
-extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent)
-{
+extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
 	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
 
 	if (extent_register(tsdn, extent)) {
@@ -993,8 +1002,7 @@ extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent)
 
 void
 extent_dalloc_cache(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent)
-{
+    extent_hooks_t **r_extent_hooks, extent_t *extent) {
 	assert(extent_base_get(extent) != NULL);
 	assert(extent_size_get(extent) != 0);
 
@@ -1006,17 +1014,16 @@ extent_dalloc_cache(tsdn_t *tsdn, arena_t *arena,
 }
 
 static bool
-extent_dalloc_default_impl(void *addr, size_t size)
-{
-	if (!have_dss || !extent_in_dss(addr))
+extent_dalloc_default_impl(void *addr, size_t size) {
+	if (!have_dss || !extent_in_dss(addr)) {
 		return (extent_dalloc_mmap(addr, size));
+	}
 	return (true);
 }
 
 static bool
 extent_dalloc_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    bool committed, unsigned arena_ind)
-{
+    bool committed, unsigned arena_ind) {
 	assert(extent_hooks == &extent_hooks_default);
 
 	return (extent_dalloc_default_impl(addr, size));
@@ -1024,8 +1031,7 @@ extent_dalloc_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
 
 bool
 extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent)
-{
+    extent_hooks_t **r_extent_hooks, extent_t *extent) {
 	bool err;
 
 	assert(extent_base_get(extent) != NULL);
@@ -1050,46 +1056,50 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena,
 		    extent_committed_get(extent), arena_ind_get(arena)));
 	}
 
-	if (!err)
+	if (!err) {
 		extent_dalloc(tsdn, arena, extent);
+	}
 
 	return (err);
 }
 
 void
 extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent)
-{
+    extent_hooks_t **r_extent_hooks, extent_t *extent) {
 	bool zeroed;
 
-	if (!extent_dalloc_wrapper_try(tsdn, arena, r_extent_hooks, extent))
+	if (!extent_dalloc_wrapper_try(tsdn, arena, r_extent_hooks, extent)) {
 		return;
+	}
 
 	extent_reregister(tsdn, extent);
 	/* Try to decommit; purge if that fails. */
-	if (!extent_committed_get(extent))
+	if (!extent_committed_get(extent)) {
 		zeroed = true;
-	else if (!extent_decommit_wrapper(tsdn, arena, r_extent_hooks, extent,
-	    0, extent_size_get(extent)))
+	} else if (!extent_decommit_wrapper(tsdn, arena, r_extent_hooks, extent,
+	    0, extent_size_get(extent))) {
 		zeroed = true;
-	else if ((*r_extent_hooks)->purge_lazy != NULL &&
+	} else if ((*r_extent_hooks)->purge_lazy != NULL &&
 	    !(*r_extent_hooks)->purge_lazy(*r_extent_hooks,
 	    extent_base_get(extent), extent_size_get(extent), 0,
-	    extent_size_get(extent), arena_ind_get(arena)))
+	    extent_size_get(extent), arena_ind_get(arena))) {
 		zeroed = false;
-	else if ((*r_extent_hooks)->purge_forced != NULL &&
+	} else if ((*r_extent_hooks)->purge_forced != NULL &&
 	    !(*r_extent_hooks)->purge_forced(*r_extent_hooks,
 	    extent_base_get(extent), extent_size_get(extent), 0,
-	    extent_size_get(extent), arena_ind_get(arena)))
+	    extent_size_get(extent), arena_ind_get(arena))) {
 		zeroed = true;
-	else
+	} else {
 		zeroed = false;
+	}
 	extent_zeroed_set(extent, zeroed);
 
-	if (config_stats)
+	if (config_stats) {
 		arena->stats.retained += extent_size_get(extent);
-	if (config_prof)
+	}
+	if (config_prof) {
 		extent_gprof_sub(tsdn, extent);
+	}
 
 	extent_record(tsdn, arena, r_extent_hooks, arena->extents_retained,
 	    false, extent);
@@ -1097,8 +1107,7 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 
 static bool
 extent_commit_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    size_t offset, size_t length, unsigned arena_ind)
-{
+    size_t offset, size_t length, unsigned arena_ind) {
 	assert(extent_hooks == &extent_hooks_default);
 
 	return (pages_commit((void *)((uintptr_t)addr + (uintptr_t)offset),
@@ -1108,8 +1117,7 @@ extent_commit_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
 bool
 extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
-    size_t length)
-{
+    size_t length) {
 	bool err;
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
@@ -1122,8 +1130,7 @@ extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
 
 static bool
 extent_decommit_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    size_t offset, size_t length, unsigned arena_ind)
-{
+    size_t offset, size_t length, unsigned arena_ind) {
 	assert(extent_hooks == &extent_hooks_default);
 
 	return (pages_decommit((void *)((uintptr_t)addr + (uintptr_t)offset),
@@ -1133,8 +1140,7 @@ extent_decommit_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
 bool
 extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
-    size_t length)
-{
+    size_t length) {
 	bool err;
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
@@ -1150,8 +1156,7 @@ extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
 #ifdef PAGES_CAN_PURGE_LAZY
 static bool
 extent_purge_lazy_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    size_t offset, size_t length, unsigned arena_ind)
-{
+    size_t offset, size_t length, unsigned arena_ind) {
 	assert(extent_hooks == &extent_hooks_default);
 	assert(addr != NULL);
 	assert((offset & PAGE_MASK) == 0);
@@ -1166,8 +1171,7 @@ extent_purge_lazy_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
 bool
 extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
-    size_t length)
-{
+    size_t length) {
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 	return ((*r_extent_hooks)->purge_lazy == NULL ||
 	    (*r_extent_hooks)->purge_lazy(*r_extent_hooks,
@@ -1178,8 +1182,7 @@ extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena,
 #ifdef PAGES_CAN_PURGE_FORCED
 static bool
 extent_purge_forced_default(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, size_t offset, size_t length, unsigned arena_ind)
-{
+    size_t size, size_t offset, size_t length, unsigned arena_ind) {
 	assert(extent_hooks == &extent_hooks_default);
 	assert(addr != NULL);
 	assert((offset & PAGE_MASK) == 0);
@@ -1194,8 +1197,7 @@ extent_purge_forced_default(extent_hooks_t *extent_hooks, void *addr,
 bool
 extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
-    size_t length)
-{
+    size_t length) {
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 	return ((*r_extent_hooks)->purge_forced == NULL ||
 	    (*r_extent_hooks)->purge_forced(*r_extent_hooks,
@@ -1206,12 +1208,12 @@ extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena,
 #ifdef JEMALLOC_MAPS_COALESCE
 static bool
 extent_split_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    size_t size_a, size_t size_b, bool committed, unsigned arena_ind)
-{
+    size_t size_a, size_t size_b, bool committed, unsigned arena_ind) {
 	assert(extent_hooks == &extent_hooks_default);
 
-	if (!maps_coalesce)
+	if (!maps_coalesce) {
 		return (true);
+	}
 	return (false);
 }
 #endif
@@ -1219,8 +1221,7 @@ extent_split_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
 extent_t *
 extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a,
-    size_t usize_a, size_t size_b, size_t usize_b)
-{
+    size_t usize_a, size_t size_b, size_t usize_b) {
 	extent_t *trail;
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
@@ -1230,12 +1231,14 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 
-	if ((*r_extent_hooks)->split == NULL)
+	if ((*r_extent_hooks)->split == NULL) {
 		return (NULL);
+	}
 
 	trail = extent_alloc(tsdn, arena);
-	if (trail == NULL)
+	if (trail == NULL) {
 		goto label_error_a;
+	}
 
 	{
 		extent_t lead;
@@ -1246,8 +1249,9 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
 		    extent_slab_get(extent));
 
 		if (extent_rtree_acquire(tsdn, rtree_ctx, &lead, false, true,
-		    &lead_elm_a, &lead_elm_b))
+		    &lead_elm_a, &lead_elm_b)) {
 			goto label_error_b;
+		}
 	}
 
 	extent_init(trail, arena, (void *)((uintptr_t)extent_base_get(extent) +
@@ -1255,13 +1259,15 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
 	    extent_active_get(extent), extent_zeroed_get(extent),
 	    extent_committed_get(extent), extent_slab_get(extent));
 	if (extent_rtree_acquire(tsdn, rtree_ctx, trail, false, true,
-	    &trail_elm_a, &trail_elm_b))
+	    &trail_elm_a, &trail_elm_b)) {
 		goto label_error_c;
+	}
 
 	if ((*r_extent_hooks)->split(*r_extent_hooks, extent_base_get(extent),
 	    size_a + size_b, size_a, size_b, extent_committed_get(extent),
-	    arena_ind_get(arena)))
+	    arena_ind_get(arena))) {
 		goto label_error_d;
+	}
 
 	extent_size_set(extent, size_a);
 	extent_usize_set(extent, usize_a);
@@ -1284,12 +1290,13 @@ label_error_a:
 }
 
 static bool
-extent_merge_default_impl(void *addr_a, void *addr_b)
-{
-	if (!maps_coalesce)
+extent_merge_default_impl(void *addr_a, void *addr_b) {
+	if (!maps_coalesce) {
 		return (true);
-	if (have_dss && !extent_dss_mergeable(addr_a, addr_b))
+	}
+	if (have_dss && !extent_dss_mergeable(addr_a, addr_b)) {
 		return (true);
+	}
 
 	return (false);
 }
@@ -1297,8 +1304,7 @@ extent_merge_default_impl(void *addr_a, void *addr_b)
 #ifdef JEMALLOC_MAPS_COALESCE
 static bool
 extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
-    void *addr_b, size_t size_b, bool committed, unsigned arena_ind)
-{
+    void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
 	assert(extent_hooks == &extent_hooks_default);
 
 	return (extent_merge_default_impl(addr_a, addr_b));
@@ -1307,8 +1313,7 @@ extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
 
 bool
 extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b)
-{
+    extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b) {
 	bool err;
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
@@ -1316,8 +1321,9 @@ extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 
-	if ((*r_extent_hooks)->merge == NULL)
+	if ((*r_extent_hooks)->merge == NULL) {
 		return (true);
+	}
 
 	if (*r_extent_hooks == &extent_hooks_default) {
 		/* Call directly to propagate tsdn. */
@@ -1330,8 +1336,9 @@ extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
 		    arena_ind_get(arena));
 	}
 
-	if (err)
+	if (err) {
 		return (true);
+	}
 
 	/*
 	 * The rtree writes must happen while all the relevant elements are
@@ -1350,8 +1357,9 @@ extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
 	if (b_elm_b != NULL) {
 		rtree_elm_write_acquired(tsdn, &extents_rtree, b_elm_a, NULL);
 		rtree_elm_release(tsdn, &extents_rtree, b_elm_a);
-	} else
+	} else {
 		b_elm_b = b_elm_a;
+	}
 
 	extent_size_set(a, extent_size_get(a) + extent_size_get(b));
 	extent_usize_set(a, extent_usize_get(a) + extent_usize_get(b));
@@ -1368,14 +1376,15 @@ extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
 }
 
 bool
-extent_boot(void)
-{
+extent_boot(void) {
 	if (rtree_new(&extents_rtree, (unsigned)((ZU(1) << (LG_SIZEOF_PTR+3)) -
-	    LG_PAGE)))
+	    LG_PAGE))) {
 		return (true);
+	}
 
-	if (have_dss)
+	if (have_dss) {
 		extent_dss_boot();
+	}
 
 	return (false);
 }
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 5aa95b1c..d61d5464 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -30,8 +30,7 @@ static void		*dss_max;
 /******************************************************************************/
 
 static void *
-extent_dss_sbrk(intptr_t increment)
-{
+extent_dss_sbrk(intptr_t increment) {
 #ifdef JEMALLOC_DSS
 	return (sbrk(increment));
 #else
@@ -41,28 +40,27 @@ extent_dss_sbrk(intptr_t increment)
 }
 
 dss_prec_t
-extent_dss_prec_get(void)
-{
+extent_dss_prec_get(void) {
 	dss_prec_t ret;
 
-	if (!have_dss)
+	if (!have_dss) {
 		return (dss_prec_disabled);
+	}
 	ret = (dss_prec_t)atomic_read_u(&dss_prec_default);
 	return (ret);
 }
 
 bool
-extent_dss_prec_set(dss_prec_t dss_prec)
-{
-	if (!have_dss)
+extent_dss_prec_set(dss_prec_t dss_prec) {
+	if (!have_dss) {
 		return (dss_prec != dss_prec_disabled);
+	}
 	atomic_write_u(&dss_prec_default, (unsigned)dss_prec);
 	return (false);
 }
 
 static void *
-extent_dss_max_update(void *new_addr)
-{
+extent_dss_max_update(void *new_addr) {
 	void *max_cur;
 	spin_t spinner;
 
@@ -83,20 +81,21 @@ extent_dss_max_update(void *new_addr)
 			spin_adaptive(&spinner);
 			continue;
 		}
-		if (!atomic_cas_p(&dss_max, max_prev, max_cur))
+		if (!atomic_cas_p(&dss_max, max_prev, max_cur)) {
 			break;
+		}
 	}
 	/* Fixed new_addr can only be supported if it is at the edge of DSS. */
-	if (new_addr != NULL && max_cur != new_addr)
+	if (new_addr != NULL && max_cur != new_addr) {
 		return (NULL);
+	}
 
 	return (max_cur);
 }
 
 void *
 extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
-    size_t alignment, bool *zero, bool *commit)
-{
+    size_t alignment, bool *zero, bool *commit) {
 	extent_t *gap;
 
 	cassert(have_dss);
@@ -107,12 +106,14 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 	 * sbrk() uses a signed increment argument, so take care not to
 	 * interpret a large allocation request as a negative increment.
 	 */
-	if ((intptr_t)size < 0)
+	if ((intptr_t)size < 0) {
 		return (NULL);
+	}
 
 	gap = extent_alloc(tsdn, arena);
-	if (gap == NULL)
+	if (gap == NULL) {
 		return (NULL);
+	}
 
 	if (!atomic_read_u(&dss_exhausted)) {
 		/*
@@ -126,8 +127,9 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			intptr_t incr;
 
 			max_cur = extent_dss_max_update(new_addr);
-			if (max_cur == NULL)
+			if (max_cur == NULL) {
 				goto label_oom;
+			}
 
 			/*
 			 * Compute how much gap space (if any) is necessary to
@@ -145,8 +147,9 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			}
 			dss_next = (void *)((uintptr_t)ret + size);
 			if ((uintptr_t)ret < (uintptr_t)max_cur ||
-			    (uintptr_t)dss_next < (uintptr_t)max_cur)
+			    (uintptr_t)dss_next < (uintptr_t)max_cur) {
 				goto label_oom; /* Wrap-around. */
+			}
 			incr = gap_size + size;
 
 			/*
@@ -155,19 +158,22 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			 * DSS while dss_max is greater than the current DSS
 			 * max reported by sbrk(0).
 			 */
-			if (atomic_cas_p(&dss_max, max_cur, dss_next))
+			if (atomic_cas_p(&dss_max, max_cur, dss_next)) {
 				continue;
+			}
 
 			/* Try to allocate. */
 			dss_prev = extent_dss_sbrk(incr);
 			if (dss_prev == max_cur) {
 				/* Success. */
-				if (gap_size != 0)
+				if (gap_size != 0) {
 					extent_dalloc_gap(tsdn, arena, gap);
-				else
+				} else {
 					extent_dalloc(tsdn, arena, gap);
-				if (!*commit)
+				}
+				if (!*commit) {
 					*commit = pages_decommit(ret, size);
+				}
 				if (*zero && *commit) {
 					extent_hooks_t *extent_hooks =
 					    EXTENT_HOOKS_INITIALIZER;
@@ -177,8 +183,9 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					    size, 0, true, false, true, false);
 					if (extent_purge_forced_wrapper(tsdn,
 					    arena, &extent_hooks, &extent, 0,
-					    size))
+					    size)) {
 						memset(ret, 0, size);
+					}
 				}
 				return (ret);
 			}
@@ -204,30 +211,28 @@ label_oom:
 }
 
 static bool
-extent_in_dss_helper(void *addr, void *max)
-{
+extent_in_dss_helper(void *addr, void *max) {
 	return ((uintptr_t)addr >= (uintptr_t)dss_base && (uintptr_t)addr <
 	    (uintptr_t)max);
 }
 
 bool
-extent_in_dss(void *addr)
-{
+extent_in_dss(void *addr) {
 	cassert(have_dss);
 
 	return (extent_in_dss_helper(addr, atomic_read_p(&dss_max)));
 }
 
 bool
-extent_dss_mergeable(void *addr_a, void *addr_b)
-{
+extent_dss_mergeable(void *addr_a, void *addr_b) {
 	void *max;
 
 	cassert(have_dss);
 
 	if ((uintptr_t)addr_a < (uintptr_t)dss_base && (uintptr_t)addr_b <
-	    (uintptr_t)dss_base)
+	    (uintptr_t)dss_base) {
 		return (true);
+	}
 
 	max = atomic_read_p(&dss_max);
 	return (extent_in_dss_helper(addr_a, max) ==
@@ -235,8 +240,7 @@ extent_dss_mergeable(void *addr_a, void *addr_b)
 }
 
 void
-extent_dss_boot(void)
-{
+extent_dss_boot(void) {
 	cassert(have_dss);
 
 	dss_base = extent_dss_sbrk(0);
diff --git a/src/extent_mmap.c b/src/extent_mmap.c
index e685a45b..2c00b588 100644
--- a/src/extent_mmap.c
+++ b/src/extent_mmap.c
@@ -4,21 +4,23 @@
 /******************************************************************************/
 
 static void *
-extent_alloc_mmap_slow(size_t size, size_t alignment, bool *zero, bool *commit)
-{
+extent_alloc_mmap_slow(size_t size, size_t alignment, bool *zero,
+    bool *commit) {
 	void *ret;
 	size_t alloc_size;
 
 	alloc_size = size + alignment - PAGE;
 	/* Beware size_t wrap-around. */
-	if (alloc_size < size)
+	if (alloc_size < size) {
 		return (NULL);
+	}
 	do {
 		void *pages;
 		size_t leadsize;
 		pages = pages_map(NULL, alloc_size, commit);
-		if (pages == NULL)
+		if (pages == NULL) {
 			return (NULL);
+		}
 		leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment) -
 		    (uintptr_t)pages;
 		ret = pages_trim(pages, alloc_size, leadsize, size, commit);
@@ -31,8 +33,7 @@ extent_alloc_mmap_slow(size_t size, size_t alignment, bool *zero, bool *commit)
 
 void *
 extent_alloc_mmap(void *new_addr, size_t size, size_t alignment, bool *zero,
-    bool *commit)
-{
+    bool *commit) {
 	void *ret;
 	size_t offset;
 
@@ -52,8 +53,9 @@ extent_alloc_mmap(void *new_addr, size_t size, size_t alignment, bool *zero,
 	assert(alignment != 0);
 
 	ret = pages_map(new_addr, size, commit);
-	if (ret == NULL || ret == new_addr)
+	if (ret == NULL || ret == new_addr) {
 		return (ret);
+	}
 	assert(new_addr == NULL);
 	offset = ALIGNMENT_ADDR2OFFSET(ret, alignment);
 	if (offset != 0) {
@@ -67,9 +69,9 @@ extent_alloc_mmap(void *new_addr, size_t size, size_t alignment, bool *zero,
 }
 
 bool
-extent_dalloc_mmap(void *addr, size_t size)
-{
-	if (config_munmap)
+extent_dalloc_mmap(void *addr, size_t size) {
+	if (config_munmap) {
 		pages_unmap(addr, size);
+	}
 	return (!config_munmap);
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index af2a53a2..2de42c3e 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -209,8 +209,7 @@ static bool init_lock_initialized = false;
 
 JEMALLOC_ATTR(constructor)
 static void WINAPI
-_init_init_lock(void)
-{
+_init_init_lock(void) {
 	/*
 	 * If another constructor in the same binary is using mallctl to e.g.
 	 * set up extent hooks, it may end up running before this one, and
@@ -221,8 +220,9 @@ _init_init_lock(void)
 	 * the process creation, before any separate thread normally starts
 	 * doing anything.
 	 */
-	if (!init_lock_initialized)
+	if (!init_lock_initialized) {
 		malloc_mutex_init(&init_lock, "init", WITNESS_RANK_INIT);
+	}
 	init_lock_initialized = true;
 }
 
@@ -273,24 +273,23 @@ static bool	malloc_init_hard(void);
  */
 
 JEMALLOC_ALWAYS_INLINE_C bool
-malloc_initialized(void)
-{
+malloc_initialized(void) {
 	return (malloc_init_state == malloc_init_initialized);
 }
 
 JEMALLOC_ALWAYS_INLINE_C bool
-malloc_init_a0(void)
-{
-	if (unlikely(malloc_init_state == malloc_init_uninitialized))
+malloc_init_a0(void) {
+	if (unlikely(malloc_init_state == malloc_init_uninitialized)) {
 		return (malloc_init_hard_a0());
+	}
 	return (false);
 }
 
 JEMALLOC_ALWAYS_INLINE_C bool
-malloc_init(void)
-{
-	if (unlikely(!malloc_initialized()) && malloc_init_hard())
+malloc_init(void) {
+	if (unlikely(!malloc_initialized()) && malloc_init_hard()) {
 		return (true);
+	}
 	return (false);
 }
 
@@ -300,30 +299,27 @@ malloc_init(void)
  */
 
 static void *
-a0ialloc(size_t size, bool zero, bool is_internal)
-{
-	if (unlikely(malloc_init_a0()))
+a0ialloc(size_t size, bool zero, bool is_internal) {
+	if (unlikely(malloc_init_a0())) {
 		return (NULL);
+	}
 
 	return (iallocztm(TSDN_NULL, size, size2index(size), zero, NULL,
 	    is_internal, arena_get(TSDN_NULL, 0, true), true));
 }
 
 static void
-a0idalloc(extent_t *extent, void *ptr, bool is_internal)
-{
+a0idalloc(extent_t *extent, void *ptr, bool is_internal) {
 	idalloctm(TSDN_NULL, extent, ptr, false, is_internal, true);
 }
 
 void *
-a0malloc(size_t size)
-{
+a0malloc(size_t size) {
 	return (a0ialloc(size, false, true));
 }
 
 void
-a0dalloc(void *ptr)
-{
+a0dalloc(void *ptr) {
 	a0idalloc(iealloc(NULL, ptr), ptr, true);
 }
 
@@ -334,17 +330,16 @@ a0dalloc(void *ptr)
  */
 
 void *
-bootstrap_malloc(size_t size)
-{
-	if (unlikely(size == 0))
+bootstrap_malloc(size_t size) {
+	if (unlikely(size == 0)) {
 		size = 1;
+	}
 
 	return (a0ialloc(size, false, false));
 }
 
 void *
-bootstrap_calloc(size_t num, size_t size)
-{
+bootstrap_calloc(size_t num, size_t size) {
 	size_t num_size;
 
 	num_size = num * size;
@@ -357,49 +352,46 @@ bootstrap_calloc(size_t num, size_t size)
 }
 
 void
-bootstrap_free(void *ptr)
-{
-	if (unlikely(ptr == NULL))
+bootstrap_free(void *ptr) {
+	if (unlikely(ptr == NULL)) {
 		return;
+	}
 
 	a0idalloc(iealloc(NULL, ptr), ptr, false);
 }
 
 void
-arena_set(unsigned ind, arena_t *arena)
-{
+arena_set(unsigned ind, arena_t *arena) {
 	atomic_write_p((void **)&arenas[ind], arena);
 }
 
 static void
-narenas_total_set(unsigned narenas)
-{
+narenas_total_set(unsigned narenas) {
 	atomic_write_u(&narenas_total, narenas);
 }
 
 static void
-narenas_total_inc(void)
-{
+narenas_total_inc(void) {
 	atomic_add_u(&narenas_total, 1);
 }
 
 unsigned
-narenas_total_get(void)
-{
+narenas_total_get(void) {
 	return (atomic_read_u(&narenas_total));
 }
 
 /* Create a new arena and insert it into the arenas array at index ind. */
 static arena_t *
-arena_init_locked(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks)
-{
+arena_init_locked(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	arena_t *arena;
 
 	assert(ind <= narenas_total_get());
-	if (ind > MALLOCX_ARENA_MAX)
+	if (ind > MALLOCX_ARENA_MAX) {
 		return (NULL);
-	if (ind == narenas_total_get())
+	}
+	if (ind == narenas_total_get()) {
 		narenas_total_inc();
+	}
 
 	/*
 	 * Another thread may have already initialized arenas[ind] if it's an
@@ -418,8 +410,7 @@ arena_init_locked(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks)
 }
 
 arena_t *
-arena_init(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks)
-{
+arena_init(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	arena_t *arena;
 
 	malloc_mutex_lock(tsdn, &arenas_lock);
@@ -429,25 +420,25 @@ arena_init(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks)
 }
 
 static void
-arena_bind(tsd_t *tsd, unsigned ind, bool internal)
-{
+arena_bind(tsd_t *tsd, unsigned ind, bool internal) {
 	arena_t *arena;
 
-	if (!tsd_nominal(tsd))
+	if (!tsd_nominal(tsd)) {
 		return;
+	}
 
 	arena = arena_get(tsd_tsdn(tsd), ind, false);
 	arena_nthreads_inc(arena, internal);
 
-	if (internal)
+	if (internal) {
 		tsd_iarena_set(tsd, arena);
-	else
+	} else {
 		tsd_arena_set(tsd, arena);
+	}
 }
 
 void
-arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind)
-{
+arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind) {
 	arena_t *oldarena, *newarena;
 
 	oldarena = arena_get(tsd_tsdn(tsd), oldind, false);
@@ -458,21 +449,20 @@ arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind)
 }
 
 static void
-arena_unbind(tsd_t *tsd, unsigned ind, bool internal)
-{
+arena_unbind(tsd_t *tsd, unsigned ind, bool internal) {
 	arena_t *arena;
 
 	arena = arena_get(tsd_tsdn(tsd), ind, false);
 	arena_nthreads_dec(arena, internal);
-	if (internal)
+	if (internal) {
 		tsd_iarena_set(tsd, NULL);
-	else
+	} else {
 		tsd_arena_set(tsd, NULL);
+	}
 }
 
 arena_tdata_t *
-arena_tdata_get_hard(tsd_t *tsd, unsigned ind)
-{
+arena_tdata_get_hard(tsd_t *tsd, unsigned ind) {
 	arena_tdata_t *tdata, *arenas_tdata_old;
 	arena_tdata_t *arenas_tdata = tsd_arenas_tdata_get(tsd);
 	unsigned narenas_tdata_old, i;
@@ -541,15 +531,15 @@ arena_tdata_get_hard(tsd_t *tsd, unsigned ind)
 	/* Read the refreshed tdata array. */
 	tdata = &arenas_tdata[ind];
 label_return:
-	if (arenas_tdata_old != NULL)
+	if (arenas_tdata_old != NULL) {
 		a0dalloc(arenas_tdata_old);
+	}
 	return (tdata);
 }
 
 /* Slow path, called only by arena_choose(). */
 arena_t *
-arena_choose_hard(tsd_t *tsd, bool internal)
-{
+arena_choose_hard(tsd_t *tsd, bool internal) {
 	arena_t *ret JEMALLOC_CC_SILENCE_INIT(NULL);
 
 	if (narenas_auto > 1) {
@@ -563,8 +553,9 @@ arena_choose_hard(tsd_t *tsd, bool internal)
 		 *   choose[1]: For internal metadata allocation.
 		 */
 
-		for (j = 0; j < 2; j++)
+		for (j = 0; j < 2; j++) {
 			choose[j] = 0;
+		}
 
 		first_null = narenas_auto;
 		malloc_mutex_lock(tsd_tsdn(tsd), &arenas_lock);
@@ -580,8 +571,9 @@ arena_choose_hard(tsd_t *tsd, bool internal)
 					    tsd_tsdn(tsd), i, false), !!j) <
 					    arena_nthreads_get(arena_get(
 					    tsd_tsdn(tsd), choose[j], false),
-					    !!j))
+					    !!j)) {
 						choose[j] = i;
+					}
 				}
 			} else if (first_null == narenas_auto) {
 				/*
@@ -622,8 +614,9 @@ arena_choose_hard(tsd_t *tsd, bool internal)
 					    &arenas_lock);
 					return (NULL);
 				}
-				if (!!j == internal)
+				if (!!j == internal) {
 					ret = arena;
+				}
 			}
 			arena_bind(tsd, choose[j], !!j);
 		}
@@ -638,28 +631,27 @@ arena_choose_hard(tsd_t *tsd, bool internal)
 }
 
 void
-iarena_cleanup(tsd_t *tsd)
-{
+iarena_cleanup(tsd_t *tsd) {
 	arena_t *iarena;
 
 	iarena = tsd_iarena_get(tsd);
-	if (iarena != NULL)
+	if (iarena != NULL) {
 		arena_unbind(tsd, arena_ind_get(iarena), true);
+	}
 }
 
 void
-arena_cleanup(tsd_t *tsd)
-{
+arena_cleanup(tsd_t *tsd) {
 	arena_t *arena;
 
 	arena = tsd_arena_get(tsd);
-	if (arena != NULL)
+	if (arena != NULL) {
 		arena_unbind(tsd, arena_ind_get(arena), false);
+	}
 }
 
 void
-arenas_tdata_cleanup(tsd_t *tsd)
-{
+arenas_tdata_cleanup(tsd_t *tsd) {
 	arena_tdata_t *arenas_tdata;
 
 	/* Prevent tsd->arenas_tdata from being (re)created. */
@@ -673,8 +665,7 @@ arenas_tdata_cleanup(tsd_t *tsd)
 }
 
 static void
-stats_print_atexit(void)
-{
+stats_print_atexit(void) {
 	if (config_tcache && config_stats) {
 		tsdn_t *tsdn;
 		unsigned narenas, i;
@@ -720,19 +711,18 @@ stats_print_atexit(void)
 
 #ifndef JEMALLOC_HAVE_SECURE_GETENV
 static char *
-secure_getenv(const char *name)
-{
+secure_getenv(const char *name) {
 #  ifdef JEMALLOC_HAVE_ISSETUGID
-	if (issetugid() != 0)
+	if (issetugid() != 0) {
 		return (NULL);
+	}
 #  endif
 	return (getenv(name));
 }
 #endif
 
 static unsigned
-malloc_ncpus(void)
-{
+malloc_ncpus(void) {
 	long result;
 
 #ifdef _WIN32
@@ -761,8 +751,7 @@ malloc_ncpus(void)
 
 static bool
 malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
-    char const **v_p, size_t *vlen_p)
-{
+    char const **v_p, size_t *vlen_p) {
 	bool accept;
 	const char *opts = *opts_p;
 
@@ -837,15 +826,13 @@ malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
 
 static void
 malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v,
-    size_t vlen)
-{
+    size_t vlen) {
 	malloc_printf("<jemalloc>: %s: %.*s:%.*s\n", msg, (int)klen, k,
 	    (int)vlen, v);
 }
 
 static void
-malloc_slow_flag_init(void)
-{
+malloc_slow_flag_init(void) {
 	/*
 	 * Combine the runtime options into malloc_slow for fast path.  Called
 	 * after processing all the options.
@@ -860,8 +847,7 @@ malloc_slow_flag_init(void)
 }
 
 static void
-malloc_conf_init(void)
-{
+malloc_conf_init(void) {
 	unsigned i;
 	char buf[PATH_MAX + 1];
 	const char *opts, *k, *v;
@@ -948,17 +934,18 @@ malloc_conf_init(void)
 	(sizeof(n)-1 == vlen && strncmp(n, v, vlen) == 0)
 #define	CONF_HANDLE_BOOL(o, n, cont)					\
 			if (CONF_MATCH(n)) {				\
-				if (CONF_MATCH_VALUE("true"))		\
+				if (CONF_MATCH_VALUE("true")) {		\
 					o = true;			\
-				else if (CONF_MATCH_VALUE("false"))	\
+				} else if (CONF_MATCH_VALUE("false")) {	\
 					o = false;			\
-				else {					\
+				} else {				\
 					malloc_conf_error(		\
 					    "Invalid conf value",	\
 					    k, klen, v, vlen);		\
 				}					\
-				if (cont)				\
+				if (cont) {				\
 					continue;			\
+				}					\
 			}
 #define	CONF_MIN_no(um, min)	false
 #define	CONF_MIN_yes(um, min)	((um) < (min))
@@ -978,13 +965,15 @@ malloc_conf_init(void)
 					    k, klen, v, vlen);		\
 				} else if (clip) {			\
 					if (CONF_MIN_##check_min(um,	\
-					    (min)))			\
+					    (min))) {			\
 						o = (t)(min);		\
-					else if (CONF_MAX_##check_max(	\
-					    um, (max)))			\
+					} else if (			\
+					    CONF_MAX_##check_max(um,	\
+					    (max))) {			\
 						o = (t)(max);		\
-					else				\
+					} else {			\
 						o = (t)um;		\
+					}				\
 				} else {				\
 					if (CONF_MIN_##check_min(um,	\
 					    (min)) ||			\
@@ -994,8 +983,9 @@ malloc_conf_init(void)
 						    "Out-of-range "	\
 						    "conf value",	\
 						    k, klen, v, vlen);	\
-					} else				\
+					} else {			\
 						o = (t)um;		\
+					}				\
 				}					\
 				continue;				\
 			}
@@ -1023,8 +1013,9 @@ malloc_conf_init(void)
 					malloc_conf_error(		\
 					    "Out-of-range conf value",	\
 					    k, klen, v, vlen);		\
-				} else					\
+				} else {				\
 					o = l;				\
+				}					\
 				continue;				\
 			}
 #define	CONF_HANDLE_CHAR_P(o, n, d)					\
@@ -1148,8 +1139,7 @@ malloc_conf_init(void)
 }
 
 static bool
-malloc_init_hard_needed(void)
-{
+malloc_init_hard_needed(void) {
 	if (malloc_initialized() || (IS_INITIALIZER && malloc_init_state ==
 	    malloc_init_recursible)) {
 		/*
@@ -1177,35 +1167,42 @@ malloc_init_hard_needed(void)
 }
 
 static bool
-malloc_init_hard_a0_locked()
-{
+malloc_init_hard_a0_locked() {
 	malloc_initializer = INITIALIZER;
 
-	if (config_prof)
+	if (config_prof) {
 		prof_boot0();
+	}
 	malloc_conf_init();
 	if (opt_stats_print) {
 		/* Print statistics at exit. */
 		if (atexit(stats_print_atexit) != 0) {
 			malloc_write("<jemalloc>: Error in atexit()\n");
-			if (opt_abort)
+			if (opt_abort) {
 				abort();
+			}
 		}
 	}
 	pages_boot();
-	if (base_boot(TSDN_NULL))
+	if (base_boot(TSDN_NULL)) {
 		return (true);
-	if (extent_boot())
+	}
+	if (extent_boot()) {
 		return (true);
-	if (ctl_boot())
+	}
+	if (ctl_boot()) {
 		return (true);
-	if (config_prof)
+	}
+	if (config_prof) {
 		prof_boot1();
+	}
 	arena_boot();
-	if (config_tcache && tcache_boot(TSDN_NULL))
+	if (config_tcache && tcache_boot(TSDN_NULL)) {
 		return (true);
-	if (malloc_mutex_init(&arenas_lock, "arenas", WITNESS_RANK_ARENAS))
+	}
+	if (malloc_mutex_init(&arenas_lock, "arenas", WITNESS_RANK_ARENAS)) {
 		return (true);
+	}
 	/*
 	 * Create enough scaffolding to allow recursive allocation in
 	 * malloc_ncpus().
@@ -1218,9 +1215,10 @@ malloc_init_hard_a0_locked()
 	 * Initialize one arena here.  The rest are lazily created in
 	 * arena_choose_hard().
 	 */
-	if (arena_init(TSDN_NULL, 0, (extent_hooks_t *)&extent_hooks_default) ==
-	    NULL)
+	if (arena_init(TSDN_NULL, 0, (extent_hooks_t *)&extent_hooks_default)
+	    == NULL) {
 		return (true);
+	}
 
 	malloc_init_state = malloc_init_a0_initialized;
 
@@ -1228,8 +1226,7 @@ malloc_init_hard_a0_locked()
 }
 
 static bool
-malloc_init_hard_a0(void)
-{
+malloc_init_hard_a0(void) {
 	bool ret;
 
 	malloc_mutex_lock(TSDN_NULL, &init_lock);
@@ -1240,8 +1237,7 @@ malloc_init_hard_a0(void)
 
 /* Initialize data structures which may trigger recursive allocation. */
 static bool
-malloc_init_hard_recursible(void)
-{
+malloc_init_hard_recursible(void) {
 	malloc_init_state = malloc_init_recursible;
 
 	ncpus = malloc_ncpus();
@@ -1253,8 +1249,9 @@ malloc_init_hard_recursible(void)
 	if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent,
 	    jemalloc_postfork_child) != 0) {
 		malloc_write("<jemalloc>: Error in pthread_atfork()\n");
-		if (opt_abort)
+		if (opt_abort) {
 			abort();
+		}
 		return (true);
 	}
 #endif
@@ -1263,20 +1260,21 @@ malloc_init_hard_recursible(void)
 }
 
 static bool
-malloc_init_hard_finish(tsdn_t *tsdn)
-{
-	if (malloc_mutex_boot())
+malloc_init_hard_finish(tsdn_t *tsdn) {
+	if (malloc_mutex_boot()) {
 		return (true);
+	}
 
 	if (opt_narenas == 0) {
 		/*
 		 * For SMP systems, create more than one arena per CPU by
 		 * default.
 		 */
-		if (ncpus > 1)
+		if (ncpus > 1) {
 			opt_narenas = ncpus << 2;
-		else
+		} else {
 			opt_narenas = 1;
+		}
 	}
 	narenas_auto = opt_narenas;
 	/*
@@ -1292,8 +1290,9 @@ malloc_init_hard_finish(tsdn_t *tsdn)
 	/* Allocate and initialize arenas. */
 	arenas = (arena_t **)base_alloc(tsdn, a0->base, sizeof(arena_t *) *
 	    (MALLOCX_ARENA_MAX+1), CACHELINE);
-	if (arenas == NULL)
+	if (arenas == NULL) {
 		return (true);
+	}
 	/* Copy the pointer to the one arena that was already initialized. */
 	arena_set(0, a0);
 
@@ -1304,8 +1303,7 @@ malloc_init_hard_finish(tsdn_t *tsdn)
 }
 
 static bool
-malloc_init_hard(void)
-{
+malloc_init_hard(void) {
 	tsd_t *tsd;
 
 #if defined(_WIN32) && _WIN32_WINNT < 0x0600
@@ -1326,10 +1324,12 @@ malloc_init_hard(void)
 	malloc_mutex_unlock(TSDN_NULL, &init_lock);
 	/* Recursive allocation relies on functional tsd. */
 	tsd = malloc_tsd_boot0();
-	if (tsd == NULL)
+	if (tsd == NULL) {
 		return (true);
-	if (malloc_init_hard_recursible())
+	}
+	if (malloc_init_hard_recursible()) {
 		return (true);
+	}
 	malloc_mutex_lock(tsd_tsdn(tsd), &init_lock);
 
 	if (config_prof && prof_boot2(tsd)) {
@@ -1616,7 +1616,6 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts) {
 	tsd = tsd_fetch();
 	witness_assert_lockless(tsd_tsdn(tsd));
 
-
 	/* If profiling is on, get our profiling context. */
 	if (config_prof && opt_prof) {
 		/*
@@ -1755,8 +1754,7 @@ imalloc(static_opts_t *sopts, dynamic_opts_t *dopts) {
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
 void JEMALLOC_NOTHROW *
 JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)
-je_malloc(size_t size)
-{
+je_malloc(size_t size) {
 	void *ret;
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
@@ -1780,8 +1778,7 @@ je_malloc(size_t size)
 
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 JEMALLOC_ATTR(nonnull(1))
-je_posix_memalign(void **memptr, size_t alignment, size_t size)
-{
+je_posix_memalign(void **memptr, size_t alignment, size_t size) {
 	int ret;
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
@@ -1808,8 +1805,7 @@ je_posix_memalign(void **memptr, size_t alignment, size_t size)
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
 void JEMALLOC_NOTHROW *
 JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(2)
-je_aligned_alloc(size_t alignment, size_t size)
-{
+je_aligned_alloc(size_t alignment, size_t size) {
 	void *ret;
 
 	static_opts_t sopts;
@@ -1839,8 +1835,7 @@ je_aligned_alloc(size_t alignment, size_t size)
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
 void JEMALLOC_NOTHROW *
 JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2)
-je_calloc(size_t num, size_t size)
-{
+je_calloc(size_t num, size_t size) {
 	void *ret;
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
@@ -1865,29 +1860,30 @@ je_calloc(size_t num, size_t size)
 
 static void *
 irealloc_prof_sample(tsd_t *tsd, extent_t *extent, void *old_ptr,
-    size_t old_usize, size_t usize, prof_tctx_t *tctx)
-{
+    size_t old_usize, size_t usize, prof_tctx_t *tctx) {
 	void *p;
 
-	if (tctx == NULL)
+	if (tctx == NULL) {
 		return (NULL);
+	}
 	if (usize <= SMALL_MAXCLASS) {
 		p = iralloc(tsd, extent, old_ptr, old_usize, LARGE_MINCLASS, 0,
 		    false);
-		if (p == NULL)
+		if (p == NULL) {
 			return (NULL);
+		}
 		arena_prof_promote(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), p), p,
 		    usize);
-	} else
+	} else {
 		p = iralloc(tsd, extent, old_ptr, old_usize, usize, 0, false);
+	}
 
 	return (p);
 }
 
 JEMALLOC_ALWAYS_INLINE_C void *
 irealloc_prof(tsd_t *tsd, extent_t *old_extent, void *old_ptr, size_t old_usize,
-    size_t usize)
-{
+    size_t usize) {
 	void *p;
 	extent_t *extent;
 	bool prof_active;
@@ -1915,8 +1911,7 @@ irealloc_prof(tsd_t *tsd, extent_t *old_extent, void *old_ptr, size_t old_usize,
 }
 
 JEMALLOC_INLINE_C void
-ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
-{
+ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 	extent_t *extent;
 	size_t usize;
 
@@ -1929,42 +1924,46 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 	if (config_prof && opt_prof) {
 		usize = isalloc(tsd_tsdn(tsd), extent, ptr);
 		prof_free(tsd, extent, ptr, usize);
-	} else if (config_stats)
+	} else if (config_stats) {
 		usize = isalloc(tsd_tsdn(tsd), extent, ptr);
-	if (config_stats)
+	}
+	if (config_stats) {
 		*tsd_thread_deallocatedp_get(tsd) += usize;
+	}
 
-	if (likely(!slow_path))
+	if (likely(!slow_path)) {
 		idalloctm(tsd_tsdn(tsd), extent, ptr, tcache, false, false);
-	else
+	} else {
 		idalloctm(tsd_tsdn(tsd), extent, ptr, tcache, false, true);
+	}
 }
 
 JEMALLOC_INLINE_C void
 isfree(tsd_t *tsd, extent_t *extent, void *ptr, size_t usize, tcache_t *tcache,
-    bool slow_path)
-{
+    bool slow_path) {
 	witness_assert_lockless(tsd_tsdn(tsd));
 
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
-	if (config_prof && opt_prof)
+	if (config_prof && opt_prof) {
 		prof_free(tsd, extent, ptr, usize);
-	if (config_stats)
+	}
+	if (config_stats) {
 		*tsd_thread_deallocatedp_get(tsd) += usize;
+	}
 
-	if (likely(!slow_path))
+	if (likely(!slow_path)) {
 		isdalloct(tsd_tsdn(tsd), extent, ptr, usize, tcache, false);
-	else
+	} else {
 		isdalloct(tsd_tsdn(tsd), extent, ptr, usize, tcache, true);
+	}
 }
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
 void JEMALLOC_NOTHROW *
 JEMALLOC_ALLOC_SIZE(2)
-je_realloc(void *ptr, size_t size)
-{
+je_realloc(void *ptr, size_t size) {
 	void *ret;
 	tsdn_t *tsdn JEMALLOC_CC_SILENCE_INIT(NULL);
 	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
@@ -2000,8 +1999,9 @@ je_realloc(void *ptr, size_t size)
 			    NULL : irealloc_prof(tsd, extent, ptr, old_usize,
 			    usize);
 		} else {
-			if (config_stats)
+			if (config_stats) {
 				usize = s2u(size);
+			}
 			ret = iralloc(tsd, extent, ptr, old_usize, size, 0,
 			    false);
 		}
@@ -2033,16 +2033,16 @@ je_realloc(void *ptr, size_t size)
 }
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
-je_free(void *ptr)
-{
+je_free(void *ptr) {
 	UTRACE(ptr, 0, 0);
 	if (likely(ptr != NULL)) {
 		tsd_t *tsd = tsd_fetch();
 		witness_assert_lockless(tsd_tsdn(tsd));
-		if (likely(!malloc_slow))
+		if (likely(!malloc_slow)) {
 			ifree(tsd, ptr, tcache_get(tsd, false), false);
-		else
+		} else {
 			ifree(tsd, ptr, tcache_get(tsd, false), true);
+		}
 		witness_assert_lockless(tsd_tsdn(tsd));
 	}
 }
@@ -2059,8 +2059,7 @@ je_free(void *ptr)
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
 void JEMALLOC_NOTHROW *
 JEMALLOC_ATTR(malloc)
-je_memalign(size_t alignment, size_t size)
-{
+je_memalign(size_t alignment, size_t size) {
 	void *ret;
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
@@ -2090,8 +2089,7 @@ je_memalign(size_t alignment, size_t size)
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
 void JEMALLOC_NOTHROW *
 JEMALLOC_ATTR(malloc)
-je_valloc(size_t size)
-{
+je_valloc(size_t size) {
 	void *ret;
 
 	static_opts_t sopts;
@@ -2180,8 +2178,7 @@ int	__posix_memalign(void** r, size_t a, size_t s)
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
 void JEMALLOC_NOTHROW *
 JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)
-je_mallocx(size_t size, int flags)
-{
+je_mallocx(size_t size, int flags) {
 	void *ret;
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
@@ -2225,17 +2222,18 @@ je_mallocx(size_t size, int flags)
 static void *
 irallocx_prof_sample(tsdn_t *tsdn, extent_t *extent, void *old_ptr,
     size_t old_usize, size_t usize, size_t alignment, bool zero,
-    tcache_t *tcache, arena_t *arena, prof_tctx_t *tctx)
-{
+    tcache_t *tcache, arena_t *arena, prof_tctx_t *tctx) {
 	void *p;
 
-	if (tctx == NULL)
+	if (tctx == NULL) {
 		return (NULL);
+	}
 	if (usize <= SMALL_MAXCLASS) {
 		p = iralloct(tsdn, extent, old_ptr, old_usize, LARGE_MINCLASS,
 		    alignment, zero, tcache, arena);
-		if (p == NULL)
+		if (p == NULL) {
 			return (NULL);
+		}
 		arena_prof_promote(tsdn, iealloc(tsdn, p), p, usize);
 	} else {
 		p = iralloct(tsdn, extent, old_ptr, old_usize, usize, alignment,
@@ -2248,8 +2246,7 @@ irallocx_prof_sample(tsdn_t *tsdn, extent_t *extent, void *old_ptr,
 JEMALLOC_ALWAYS_INLINE_C void *
 irallocx_prof(tsd_t *tsd, extent_t *old_extent, void *old_ptr, size_t old_usize,
     size_t size, size_t alignment, size_t *usize, bool zero, tcache_t *tcache,
-    arena_t *arena)
-{
+    arena_t *arena) {
 	void *p;
 	extent_t *extent;
 	bool prof_active;
@@ -2281,8 +2278,9 @@ irallocx_prof(tsd_t *tsd, extent_t *old_extent, void *old_ptr, size_t old_usize,
 		 */
 		extent = old_extent;
 		*usize = isalloc(tsd_tsdn(tsd), extent, p);
-	} else
+	} else {
 		extent = iealloc(tsd_tsdn(tsd), p);
+	}
 	prof_realloc(tsd, extent, p, *usize, tctx, prof_active, false,
 	    old_extent, old_ptr, old_usize, old_tctx);
 
@@ -2292,8 +2290,7 @@ irallocx_prof(tsd_t *tsd, extent_t *old_extent, void *old_ptr, size_t old_usize,
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
 void JEMALLOC_NOTHROW *
 JEMALLOC_ALLOC_SIZE(2)
-je_rallocx(void *ptr, size_t size, int flags)
-{
+je_rallocx(void *ptr, size_t size, int flags) {
 	void *p;
 	tsd_t *tsd;
 	extent_t *extent;
@@ -2314,34 +2311,41 @@ je_rallocx(void *ptr, size_t size, int flags)
 	if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) {
 		unsigned arena_ind = MALLOCX_ARENA_GET(flags);
 		arena = arena_get(tsd_tsdn(tsd), arena_ind, true);
-		if (unlikely(arena == NULL))
+		if (unlikely(arena == NULL)) {
 			goto label_oom;
-	} else
+		}
+	} else {
 		arena = NULL;
+	}
 
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
-		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE)
+		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) {
 			tcache = NULL;
-		else
+		} else {
 			tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags));
-	} else
+		}
+	} else {
 		tcache = tcache_get(tsd, true);
+	}
 
 	old_usize = isalloc(tsd_tsdn(tsd), extent, ptr);
 
 	if (config_prof && opt_prof) {
 		usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment);
-		if (unlikely(usize == 0 || usize > LARGE_MAXCLASS))
+		if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
 			goto label_oom;
+		}
 		p = irallocx_prof(tsd, extent, ptr, old_usize, size, alignment,
 		    &usize, zero, tcache, arena);
-		if (unlikely(p == NULL))
+		if (unlikely(p == NULL)) {
 			goto label_oom;
+		}
 	} else {
 		p = iralloct(tsd_tsdn(tsd), extent, ptr, old_usize, size,
 		    alignment, zero, tcache, arena);
-		if (unlikely(p == NULL))
+		if (unlikely(p == NULL)) {
 			goto label_oom;
+		}
 		if (config_stats) {
 			usize = isalloc(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd),
 			    p), p);
@@ -2368,12 +2372,13 @@ label_oom:
 
 JEMALLOC_ALWAYS_INLINE_C size_t
 ixallocx_helper(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t old_usize,
-    size_t size, size_t extra, size_t alignment, bool zero)
-{
+    size_t size, size_t extra, size_t alignment, bool zero) {
 	size_t usize;
 
-	if (ixalloc(tsdn, extent, ptr, old_usize, size, extra, alignment, zero))
+	if (ixalloc(tsdn, extent, ptr, old_usize, size, extra, alignment,
+	    zero)) {
 		return (old_usize);
+	}
 	usize = isalloc(tsdn, extent, ptr);
 
 	return (usize);
@@ -2382,12 +2387,12 @@ ixallocx_helper(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t old_usize,
 static size_t
 ixallocx_prof_sample(tsdn_t *tsdn, extent_t *extent, void *ptr,
     size_t old_usize, size_t size, size_t extra, size_t alignment, bool zero,
-    prof_tctx_t *tctx)
-{
+    prof_tctx_t *tctx) {
 	size_t usize;
 
-	if (tctx == NULL)
+	if (tctx == NULL) {
 		return (old_usize);
+	}
 	usize = ixallocx_helper(tsdn, extent, ptr, old_usize, size, extra,
 	    alignment, zero);
 
@@ -2396,8 +2401,7 @@ ixallocx_prof_sample(tsdn_t *tsdn, extent_t *extent, void *ptr,
 
 JEMALLOC_ALWAYS_INLINE_C size_t
 ixallocx_prof(tsd_t *tsd, extent_t *extent, void *ptr, size_t old_usize,
-    size_t size, size_t extra, size_t alignment, bool zero)
-{
+    size_t size, size_t extra, size_t alignment, bool zero) {
 	size_t usize_max, usize;
 	bool prof_active;
 	prof_tctx_t *old_tctx, *tctx;
@@ -2445,8 +2449,7 @@ ixallocx_prof(tsd_t *tsd, extent_t *extent, void *ptr, size_t old_usize,
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
-je_xallocx(void *ptr, size_t size, size_t extra, int flags)
-{
+je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	tsd_t *tsd;
 	extent_t *extent;
 	size_t usize, old_usize;
@@ -2476,8 +2479,9 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
 		usize = old_usize;
 		goto label_not_resized;
 	}
-	if (unlikely(LARGE_MAXCLASS - size < extra))
+	if (unlikely(LARGE_MAXCLASS - size < extra)) {
 		extra = LARGE_MAXCLASS - size;
+	}
 
 	if (config_prof && opt_prof) {
 		usize = ixallocx_prof(tsd, extent, ptr, old_usize, size, extra,
@@ -2486,8 +2490,9 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
 		usize = ixallocx_helper(tsd_tsdn(tsd), extent, ptr, old_usize,
 		    size, extra, alignment, zero);
 	}
-	if (unlikely(usize == old_usize))
+	if (unlikely(usize == old_usize)) {
 		goto label_not_resized;
+	}
 
 	if (config_stats) {
 		*tsd_thread_allocatedp_get(tsd) += usize;
@@ -2501,8 +2506,7 @@ label_not_resized:
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
 JEMALLOC_ATTR(pure)
-je_sallocx(const void *ptr, int flags)
-{
+je_sallocx(const void *ptr, int flags) {
 	size_t usize;
 	tsdn_t *tsdn;
 
@@ -2511,18 +2515,18 @@ je_sallocx(const void *ptr, int flags)
 	tsdn = tsdn_fetch();
 	witness_assert_lockless(tsdn);
 
-	if (config_ivsalloc)
+	if (config_ivsalloc) {
 		usize = ivsalloc(tsdn, ptr);
-	else
+	} else {
 		usize = isalloc(tsdn, iealloc(tsdn, ptr), ptr);
+	}
 
 	witness_assert_lockless(tsdn);
 	return (usize);
 }
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
-je_dallocx(void *ptr, int flags)
-{
+je_dallocx(void *ptr, int flags) {
 	tsd_t *tsd;
 	tcache_t *tcache;
 
@@ -2532,39 +2536,41 @@ je_dallocx(void *ptr, int flags)
 	tsd = tsd_fetch();
 	witness_assert_lockless(tsd_tsdn(tsd));
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
-		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE)
+		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) {
 			tcache = NULL;
-		else
+		} else {
 			tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags));
-	} else
+		}
+	} else {
 		tcache = tcache_get(tsd, false);
+	}
 
 	UTRACE(ptr, 0, 0);
-	if (likely(!malloc_slow))
+	if (likely(!malloc_slow)) {
 		ifree(tsd, ptr, tcache, false);
-	else
+	} else {
 		ifree(tsd, ptr, tcache, true);
+	}
 	witness_assert_lockless(tsd_tsdn(tsd));
 }
 
 JEMALLOC_ALWAYS_INLINE_C size_t
-inallocx(tsdn_t *tsdn, size_t size, int flags)
-{
+inallocx(tsdn_t *tsdn, size_t size, int flags) {
 	size_t usize;
 
 	witness_assert_lockless(tsdn);
 
-	if (likely((flags & MALLOCX_LG_ALIGN_MASK) == 0))
+	if (likely((flags & MALLOCX_LG_ALIGN_MASK) == 0)) {
 		usize = s2u(size);
-	else
+	} else {
 		usize = sa2u(size, MALLOCX_ALIGN_GET_SPECIFIED(flags));
+	}
 	witness_assert_lockless(tsdn);
 	return (usize);
 }
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
-je_sdallocx(void *ptr, size_t size, int flags)
-{
+je_sdallocx(void *ptr, size_t size, int flags) {
 	tsd_t *tsd;
 	extent_t *extent;
 	size_t usize;
@@ -2579,39 +2585,43 @@ je_sdallocx(void *ptr, size_t size, int flags)
 
 	witness_assert_lockless(tsd_tsdn(tsd));
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
-		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE)
+		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) {
 			tcache = NULL;
-		else
+		} else {
 			tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags));
-	} else
+		}
+	} else {
 		tcache = tcache_get(tsd, false);
+	}
 
 	UTRACE(ptr, 0, 0);
-	if (likely(!malloc_slow))
+	if (likely(!malloc_slow)) {
 		isfree(tsd, extent, ptr, usize, tcache, false);
-	else
+	} else {
 		isfree(tsd, extent, ptr, usize, tcache, true);
+	}
 	witness_assert_lockless(tsd_tsdn(tsd));
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
 JEMALLOC_ATTR(pure)
-je_nallocx(size_t size, int flags)
-{
+je_nallocx(size_t size, int flags) {
 	size_t usize;
 	tsdn_t *tsdn;
 
 	assert(size != 0);
 
-	if (unlikely(malloc_init()))
+	if (unlikely(malloc_init())) {
 		return (0);
+	}
 
 	tsdn = tsdn_fetch();
 	witness_assert_lockless(tsdn);
 
 	usize = inallocx(tsdn, size, flags);
-	if (unlikely(usize > LARGE_MAXCLASS))
+	if (unlikely(usize > LARGE_MAXCLASS)) {
 		return (0);
+	}
 
 	witness_assert_lockless(tsdn);
 	return (usize);
@@ -2619,13 +2629,13 @@ je_nallocx(size_t size, int flags)
 
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen)
-{
+    size_t newlen) {
 	int ret;
 	tsd_t *tsd;
 
-	if (unlikely(malloc_init()))
+	if (unlikely(malloc_init())) {
 		return (EAGAIN);
+	}
 
 	tsd = tsd_fetch();
 	witness_assert_lockless(tsd_tsdn(tsd));
@@ -2635,13 +2645,13 @@ je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
 }
 
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
-je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp)
-{
+je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) {
 	int ret;
 	tsdn_t *tsdn;
 
-	if (unlikely(malloc_init()))
+	if (unlikely(malloc_init())) {
 		return (EAGAIN);
+	}
 
 	tsdn = tsdn_fetch();
 	witness_assert_lockless(tsdn);
@@ -2652,13 +2662,13 @@ je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp)
 
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-  void *newp, size_t newlen)
-{
+  void *newp, size_t newlen) {
 	int ret;
 	tsd_t *tsd;
 
-	if (unlikely(malloc_init()))
+	if (unlikely(malloc_init())) {
 		return (EAGAIN);
+	}
 
 	tsd = tsd_fetch();
 	witness_assert_lockless(tsd_tsdn(tsd));
@@ -2669,8 +2679,7 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *opts)
-{
+    const char *opts) {
 	tsdn_t *tsdn;
 
 	tsdn = tsdn_fetch();
@@ -2680,8 +2689,7 @@ je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
-je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr)
-{
+je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) {
 	size_t ret;
 	tsdn_t *tsdn;
 
@@ -2690,9 +2698,9 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr)
 	tsdn = tsdn_fetch();
 	witness_assert_lockless(tsdn);
 
-	if (config_ivsalloc)
+	if (config_ivsalloc) {
 		ret = ivsalloc(tsdn, ptr);
-	else {
+	} else {
 		ret = (ptr == NULL) ? 0 : isalloc(tsdn, iealloc(tsdn, ptr),
 		    ptr);
 	}
@@ -2726,8 +2734,7 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr)
 #ifndef JEMALLOC_JET
 JEMALLOC_ATTR(constructor)
 static void
-jemalloc_constructor(void)
-{
+jemalloc_constructor(void) {
 	malloc_init();
 }
 #endif
@@ -2745,8 +2752,9 @@ _malloc_prefork(void)
 	arena_t *arena;
 
 #ifdef JEMALLOC_MUTEX_INIT_CB
-	if (!malloc_initialized())
+	if (!malloc_initialized()) {
 		return;
+	}
 #endif
 	assert(malloc_initialized());
 
@@ -2779,8 +2787,9 @@ _malloc_prefork(void)
 		}
 	}
 	for (i = 0; i < narenas; i++) {
-		if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL)
+		if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL) {
 			arena_prefork3(tsd_tsdn(tsd), arena);
+		}
 	}
 	prof_prefork1(tsd_tsdn(tsd));
 }
@@ -2797,8 +2806,9 @@ _malloc_postfork(void)
 	unsigned i, narenas;
 
 #ifdef JEMALLOC_MUTEX_INIT_CB
-	if (!malloc_initialized())
+	if (!malloc_initialized()) {
 		return;
+	}
 #endif
 	assert(malloc_initialized());
 
@@ -2809,8 +2819,9 @@ _malloc_postfork(void)
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
 
-		if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL)
+		if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL) {
 			arena_postfork_parent(tsd_tsdn(tsd), arena);
+		}
 	}
 	prof_postfork_parent(tsd_tsdn(tsd));
 	malloc_mutex_postfork_parent(tsd_tsdn(tsd), &arenas_lock);
@@ -2818,8 +2829,7 @@ _malloc_postfork(void)
 }
 
 void
-jemalloc_postfork_child(void)
-{
+jemalloc_postfork_child(void) {
 	tsd_t *tsd;
 	unsigned i, narenas;
 
@@ -2832,8 +2842,9 @@ jemalloc_postfork_child(void)
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
 
-		if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL)
+		if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL) {
 			arena_postfork_child(tsd_tsdn(tsd), arena);
+		}
 	}
 	prof_postfork_child(tsd_tsdn(tsd));
 	malloc_mutex_postfork_child(tsd_tsdn(tsd), &arenas_lock);
diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index 984c944b..030ff995 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -33,8 +33,7 @@ void	operator delete[](void *ptr, std::size_t size) noexcept;
 template <bool IsNoExcept>
 JEMALLOC_INLINE
 void *
-newImpl(std::size_t size) noexcept(IsNoExcept)
-{
+newImpl(std::size_t size) noexcept(IsNoExcept) {
 	void *ptr = je_malloc(size);
 	if (likely(ptr != nullptr))
 		return (ptr);
@@ -67,65 +66,55 @@ newImpl(std::size_t size) noexcept(IsNoExcept)
 }
 
 void *
-operator new(std::size_t size)
-{
+operator new(std::size_t size) {
 	return (newImpl<false>(size));
 }
 
 void *
-operator new[](std::size_t size)
-{
+operator new[](std::size_t size) {
 	return (newImpl<false>(size));
 }
 
 void *
-operator new(std::size_t size, const std::nothrow_t &) noexcept
-{
+operator new(std::size_t size, const std::nothrow_t &) noexcept {
 	return (newImpl<true>(size));
 }
 
 void *
-operator new[](std::size_t size, const std::nothrow_t &) noexcept
-{
+operator new[](std::size_t size, const std::nothrow_t &) noexcept {
 	return (newImpl<true>(size));
 }
 
 void
-operator delete(void *ptr) noexcept
-{
+operator delete(void *ptr) noexcept {
 	je_free(ptr);
 }
 
 void
-operator delete[](void *ptr) noexcept
-{
+operator delete[](void *ptr) noexcept {
 	je_free(ptr);
 }
 
 void
-operator delete(void *ptr, const std::nothrow_t &) noexcept
-{
+operator delete(void *ptr, const std::nothrow_t &) noexcept {
 	je_free(ptr);
 }
 
-void operator delete[](void *ptr, const std::nothrow_t &) noexcept
-{
+void operator delete[](void *ptr, const std::nothrow_t &) noexcept {
 	je_free(ptr);
 }
 
 #if __cpp_sized_deallocation >= 201309
 
 void
-operator delete(void *ptr, std::size_t size) noexcept
-{
+operator delete(void *ptr, std::size_t size) noexcept {
 	if (unlikely(ptr == nullptr)) {
 		return;
 	}
 	je_sdallocx(ptr, size, /*flags=*/0);
 }
 
-void operator delete[](void *ptr, std::size_t size) noexcept
-{
+void operator delete[](void *ptr, std::size_t size) noexcept {
 	if (unlikely(ptr == nullptr)) {
 		return;
 	}
diff --git a/src/large.c b/src/large.c
index 9936b236..0f2f1763 100644
--- a/src/large.c
+++ b/src/large.c
@@ -4,8 +4,7 @@
 /******************************************************************************/
 
 void *
-large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero)
-{
+large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero) {
 	assert(usize == s2u(usize));
 
 	return (large_palloc(tsdn, arena, usize, CACHELINE, zero));
@@ -13,8 +12,7 @@ large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero)
 
 void *
 large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
-    bool zero)
-{
+    bool zero) {
 	size_t ausize;
 	extent_t *extent;
 	bool is_zeroed;
@@ -23,27 +21,31 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	assert(!tsdn_null(tsdn) || arena != NULL);
 
 	ausize = sa2u(usize, alignment);
-	if (unlikely(ausize == 0 || ausize > LARGE_MAXCLASS))
+	if (unlikely(ausize == 0 || ausize > LARGE_MAXCLASS)) {
 		return (NULL);
+	}
 
 	/*
 	 * Copy zero into is_zeroed and pass the copy to extent_alloc(), so that
 	 * it is possible to make correct junk/zero fill decisions below.
 	 */
 	is_zeroed = zero;
-	if (likely(!tsdn_null(tsdn)))
+	if (likely(!tsdn_null(tsdn))) {
 		arena = arena_choose(tsdn_tsd(tsdn), arena);
+	}
 	if (unlikely(arena == NULL) || (extent = arena_extent_alloc_large(tsdn,
-	    arena, usize, alignment, &is_zeroed)) == NULL)
+	    arena, usize, alignment, &is_zeroed)) == NULL) {
 		return (NULL);
+	}
 
 	/* Insert extent into large. */
 	malloc_mutex_lock(tsdn, &arena->large_mtx);
 	ql_elm_new(extent, ql_link);
 	ql_tail_insert(&arena->large, extent, ql_link);
 	malloc_mutex_unlock(tsdn, &arena->large_mtx);
-	if (config_prof && arena_prof_accum(tsdn, arena, usize))
+	if (config_prof && arena_prof_accum(tsdn, arena, usize)) {
 		prof_idump(tsdn);
+	}
 
 	if (zero || (config_fill && unlikely(opt_zero))) {
 		if (!is_zeroed) {
@@ -64,8 +66,7 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 #define	large_dalloc_junk JEMALLOC_N(n_large_dalloc_junk)
 #endif
 void
-large_dalloc_junk(void *ptr, size_t usize)
-{
+large_dalloc_junk(void *ptr, size_t usize) {
 	memset(ptr, JEMALLOC_FREE_JUNK, usize);
 }
 #ifdef JEMALLOC_JET
@@ -79,15 +80,15 @@ large_dalloc_junk_t *large_dalloc_junk = JEMALLOC_N(n_large_dalloc_junk);
 #define	large_dalloc_maybe_junk JEMALLOC_N(n_large_dalloc_maybe_junk)
 #endif
 void
-large_dalloc_maybe_junk(void *ptr, size_t usize)
-{
+large_dalloc_maybe_junk(void *ptr, size_t usize) {
 	if (config_fill && have_dss && unlikely(opt_junk_free)) {
 		/*
 		 * Only bother junk filling if the extent isn't about to be
 		 * unmapped.
 		 */
-		if (!config_munmap || (have_dss && extent_in_dss(ptr)))
+		if (!config_munmap || (have_dss && extent_in_dss(ptr))) {
 			large_dalloc_junk(ptr, usize);
+		}
 	}
 }
 #ifdef JEMALLOC_JET
@@ -98,8 +99,7 @@ large_dalloc_maybe_junk_t *large_dalloc_maybe_junk =
 #endif
 
 static bool
-large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize)
-{
+large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize) {
 	arena_t *arena = extent_arena_get(extent);
 	size_t oldusize = extent_usize_get(extent);
 	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
@@ -107,16 +107,18 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize)
 
 	assert(oldusize > usize);
 
-	if (extent_hooks->split == NULL)
+	if (extent_hooks->split == NULL) {
 		return (true);
+	}
 
 	/* Split excess pages. */
 	if (diff != 0) {
 		extent_t *trail = extent_split_wrapper(tsdn, arena,
 		    &extent_hooks, extent, usize + large_pad, usize, diff,
 		    diff);
-		if (trail == NULL)
+		if (trail == NULL) {
 			return (true);
+		}
 
 		if (config_fill && unlikely(opt_junk_free)) {
 			large_dalloc_maybe_junk(extent_addr_get(trail),
@@ -133,8 +135,7 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize)
 
 static bool
 large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
-    bool zero)
-{
+    bool zero) {
 	arena_t *arena = extent_arena_get(extent);
 	size_t oldusize = extent_usize_get(extent);
 	bool is_zeroed_trail = false;
@@ -142,8 +143,9 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 	size_t trailsize = usize - extent_usize_get(extent);
 	extent_t *trail;
 
-	if (extent_hooks->merge == NULL)
+	if (extent_hooks->merge == NULL) {
 		return (true);
+	}
 
 	if ((trail = arena_extent_cache_alloc(tsdn, arena, &extent_hooks,
 	    extent_past_get(extent), trailsize, CACHELINE, &is_zeroed_trail)) ==
@@ -151,8 +153,9 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 		bool commit = true;
 		if ((trail = extent_alloc_wrapper(tsdn, arena, &extent_hooks,
 		    extent_past_get(extent), trailsize, 0, CACHELINE,
-		    &is_zeroed_trail, &commit, false)) == NULL)
+		    &is_zeroed_trail, &commit, false)) == NULL) {
 			return (true);
+		}
 	}
 
 	if (extent_merge_wrapper(tsdn, arena, &extent_hooks, extent, trail)) {
@@ -193,8 +196,7 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 
 bool
 large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
-    size_t usize_max, bool zero)
-{
+    size_t usize_max, bool zero) {
 	assert(s2u(extent_usize_get(extent)) == extent_usize_get(extent));
 	/* The following should have been caught by callers. */
 	assert(usize_min > 0 && usize_max <= LARGE_MAXCLASS);
@@ -241,17 +243,16 @@ large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
 
 static void *
 large_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
-    size_t alignment, bool zero)
-{
-	if (alignment <= CACHELINE)
+    size_t alignment, bool zero) {
+	if (alignment <= CACHELINE) {
 		return (large_malloc(tsdn, arena, usize, zero));
+	}
 	return (large_palloc(tsdn, arena, usize, alignment, zero));
 }
 
 void *
 large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
-    size_t alignment, bool zero, tcache_t *tcache)
-{
+    size_t alignment, bool zero, tcache_t *tcache) {
 	void *ret;
 	size_t copysize;
 
@@ -262,8 +263,9 @@ large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
 	    LARGE_MINCLASS);
 
 	/* Try to avoid moving the allocation. */
-	if (!large_ralloc_no_move(tsdn, extent, usize, usize, zero))
+	if (!large_ralloc_no_move(tsdn, extent, usize, usize, zero)) {
 		return (extent_addr_get(extent));
+	}
 
 	/*
 	 * usize and old size are different enough that we need to use a
@@ -271,8 +273,9 @@ large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
 	 * space and copying.
 	 */
 	ret = large_ralloc_move_helper(tsdn, arena, usize, alignment, zero);
-	if (ret == NULL)
+	if (ret == NULL) {
 		return (NULL);
+	}
 
 	copysize = (usize < extent_usize_get(extent)) ? usize :
 	    extent_usize_get(extent);
@@ -288,8 +291,7 @@ large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
  * independent of these considerations.
  */
 static void
-large_dalloc_impl(tsdn_t *tsdn, extent_t *extent, bool junked_locked)
-{
+large_dalloc_impl(tsdn_t *tsdn, extent_t *extent, bool junked_locked) {
 	arena_t *arena;
 
 	arena = extent_arena_get(extent);
@@ -302,42 +304,37 @@ large_dalloc_impl(tsdn_t *tsdn, extent_t *extent, bool junked_locked)
 	}
 	arena_extent_dalloc_large(tsdn, arena, extent, junked_locked);
 
-	if (!junked_locked)
+	if (!junked_locked) {
 		arena_decay_tick(tsdn, arena);
+	}
 }
 
 void
-large_dalloc_junked_locked(tsdn_t *tsdn, extent_t *extent)
-{
+large_dalloc_junked_locked(tsdn_t *tsdn, extent_t *extent) {
 	large_dalloc_impl(tsdn, extent, true);
 }
 
 void
-large_dalloc(tsdn_t *tsdn, extent_t *extent)
-{
+large_dalloc(tsdn_t *tsdn, extent_t *extent) {
 	large_dalloc_impl(tsdn, extent, false);
 }
 
 size_t
-large_salloc(tsdn_t *tsdn, const extent_t *extent)
-{
+large_salloc(tsdn_t *tsdn, const extent_t *extent) {
 	return (extent_usize_get(extent));
 }
 
 prof_tctx_t *
-large_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent)
-{
+large_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent) {
 	return (extent_prof_tctx_get(extent));
 }
 
 void
-large_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, prof_tctx_t *tctx)
-{
+large_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, prof_tctx_t *tctx) {
 	extent_prof_tctx_set(extent, tctx);
 }
 
 void
-large_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent)
-{
+large_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent) {
 	large_prof_tctx_set(tsdn, extent, (prof_tctx_t *)(uintptr_t)1U);
 }
diff --git a/src/mutex.c b/src/mutex.c
index bde536de..bc0869f8 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -35,8 +35,7 @@ static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
     void *(*)(void *), void *__restrict);
 
 static void
-pthread_create_once(void)
-{
+pthread_create_once(void) {
 	pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
 	if (pthread_create_fptr == NULL) {
 		malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
@@ -50,8 +49,7 @@ pthread_create_once(void)
 JEMALLOC_EXPORT int
 pthread_create(pthread_t *__restrict thread,
     const pthread_attr_t *__restrict attr, void *(*start_routine)(void *),
-    void *__restrict arg)
-{
+    void *__restrict arg) {
 	static pthread_once_t once_control = PTHREAD_ONCE_INIT;
 
 	pthread_once(&once_control, pthread_create_once);
@@ -68,15 +66,16 @@ JEMALLOC_EXPORT int	_pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
 #endif
 
 bool
-malloc_mutex_init(malloc_mutex_t *mutex, const char *name, witness_rank_t rank)
-{
+malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
+    witness_rank_t rank) {
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 	InitializeSRWLock(&mutex->lock);
 #  else
 	if (!InitializeCriticalSectionAndSpinCount(&mutex->lock,
-	    _CRT_SPINCOUNT))
+	    _CRT_SPINCOUNT)) {
 		return (true);
+	}
 #  endif
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
 	mutex->lock = OS_UNFAIR_LOCK_INIT;
@@ -88,14 +87,16 @@ malloc_mutex_init(malloc_mutex_t *mutex, const char *name, witness_rank_t rank)
 		postponed_mutexes = mutex;
 	} else {
 		if (_pthread_mutex_init_calloc_cb(&mutex->lock,
-		    bootstrap_calloc) != 0)
+		    bootstrap_calloc) != 0) {
 			return (true);
+		}
 	}
 #else
 	pthread_mutexattr_t attr;
 
-	if (pthread_mutexattr_init(&attr) != 0)
+	if (pthread_mutexattr_init(&attr) != 0) {
 		return (true);
+	}
 	pthread_mutexattr_settype(&attr, MALLOC_MUTEX_TYPE);
 	if (pthread_mutex_init(&mutex->lock, &attr) != 0) {
 		pthread_mutexattr_destroy(&attr);
@@ -103,26 +104,24 @@ malloc_mutex_init(malloc_mutex_t *mutex, const char *name, witness_rank_t rank)
 	}
 	pthread_mutexattr_destroy(&attr);
 #endif
-	if (config_debug)
+	if (config_debug) {
 		witness_init(&mutex->witness, name, rank, NULL, NULL);
+	}
 	return (false);
 }
 
 void
-malloc_mutex_prefork(tsdn_t *tsdn, malloc_mutex_t *mutex)
-{
+malloc_mutex_prefork(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 	malloc_mutex_lock(tsdn, mutex);
 }
 
 void
-malloc_mutex_postfork_parent(tsdn_t *tsdn, malloc_mutex_t *mutex)
-{
+malloc_mutex_postfork_parent(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 	malloc_mutex_unlock(tsdn, mutex);
 }
 
 void
-malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex)
-{
+malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 #ifdef JEMALLOC_MUTEX_INIT_CB
 	malloc_mutex_unlock(tsdn, mutex);
 #else
@@ -130,21 +129,22 @@ malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex)
 	    mutex->witness.rank)) {
 		malloc_printf("<jemalloc>: Error re-initializing mutex in "
 		    "child\n");
-		if (opt_abort)
+		if (opt_abort) {
 			abort();
+		}
 	}
 #endif
 }
 
 bool
-malloc_mutex_boot(void)
-{
+malloc_mutex_boot(void) {
 #ifdef JEMALLOC_MUTEX_INIT_CB
 	postpone_init = false;
 	while (postponed_mutexes != NULL) {
 		if (_pthread_mutex_init_calloc_cb(&postponed_mutexes->lock,
-		    bootstrap_calloc) != 0)
+		    bootstrap_calloc) != 0) {
 			return (true);
+		}
 		postponed_mutexes = postponed_mutexes->postponed_next;
 	}
 #endif
diff --git a/src/nstime.c b/src/nstime.c
index 57ebf2e0..66989a07 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -3,66 +3,56 @@
 #define	BILLION	UINT64_C(1000000000)
 
 void
-nstime_init(nstime_t *time, uint64_t ns)
-{
+nstime_init(nstime_t *time, uint64_t ns) {
 	time->ns = ns;
 }
 
 void
-nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec)
-{
+nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec) {
 	time->ns = sec * BILLION + nsec;
 }
 
 uint64_t
-nstime_ns(const nstime_t *time)
-{
+nstime_ns(const nstime_t *time) {
 	return (time->ns);
 }
 
 uint64_t
-nstime_sec(const nstime_t *time)
-{
+nstime_sec(const nstime_t *time) {
 	return (time->ns / BILLION);
 }
 
 uint64_t
-nstime_nsec(const nstime_t *time)
-{
+nstime_nsec(const nstime_t *time) {
 	return (time->ns % BILLION);
 }
 
 void
-nstime_copy(nstime_t *time, const nstime_t *source)
-{
+nstime_copy(nstime_t *time, const nstime_t *source) {
 	*time = *source;
 }
 
 int
-nstime_compare(const nstime_t *a, const nstime_t *b)
-{
+nstime_compare(const nstime_t *a, const nstime_t *b) {
 	return ((a->ns > b->ns) - (a->ns < b->ns));
 }
 
 void
-nstime_add(nstime_t *time, const nstime_t *addend)
-{
+nstime_add(nstime_t *time, const nstime_t *addend) {
 	assert(UINT64_MAX - time->ns >= addend->ns);
 
 	time->ns += addend->ns;
 }
 
 void
-nstime_subtract(nstime_t *time, const nstime_t *subtrahend)
-{
+nstime_subtract(nstime_t *time, const nstime_t *subtrahend) {
 	assert(nstime_compare(time, subtrahend) >= 0);
 
 	time->ns -= subtrahend->ns;
 }
 
 void
-nstime_imultiply(nstime_t *time, uint64_t multiplier)
-{
+nstime_imultiply(nstime_t *time, uint64_t multiplier) {
 	assert((((time->ns | multiplier) & (UINT64_MAX << (sizeof(uint64_t) <<
 	    2))) == 0) || ((time->ns * multiplier) / multiplier == time->ns));
 
@@ -70,16 +60,14 @@ nstime_imultiply(nstime_t *time, uint64_t multiplier)
 }
 
 void
-nstime_idivide(nstime_t *time, uint64_t divisor)
-{
+nstime_idivide(nstime_t *time, uint64_t divisor) {
 	assert(divisor != 0);
 
 	time->ns /= divisor;
 }
 
 uint64_t
-nstime_divide(const nstime_t *time, const nstime_t *divisor)
-{
+nstime_divide(const nstime_t *time, const nstime_t *divisor) {
 	assert(divisor->ns != 0);
 
 	return (time->ns / divisor->ns);
@@ -88,8 +76,7 @@ nstime_divide(const nstime_t *time, const nstime_t *divisor)
 #ifdef _WIN32
 #  define NSTIME_MONOTONIC true
 static void
-nstime_get(nstime_t *time)
-{
+nstime_get(nstime_t *time) {
 	FILETIME ft;
 	uint64_t ticks_100ns;
 
@@ -101,8 +88,7 @@ nstime_get(nstime_t *time)
 #elif JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE
 #  define NSTIME_MONOTONIC true
 static void
-nstime_get(nstime_t *time)
-{
+nstime_get(nstime_t *time) {
 	struct timespec ts;
 
 	clock_gettime(CLOCK_MONOTONIC_COARSE, &ts);
@@ -111,8 +97,7 @@ nstime_get(nstime_t *time)
 #elif JEMALLOC_HAVE_CLOCK_MONOTONIC
 #  define NSTIME_MONOTONIC true
 static void
-nstime_get(nstime_t *time)
-{
+nstime_get(nstime_t *time) {
 	struct timespec ts;
 
 	clock_gettime(CLOCK_MONOTONIC, &ts);
@@ -121,15 +106,13 @@ nstime_get(nstime_t *time)
 #elif JEMALLOC_HAVE_MACH_ABSOLUTE_TIME
 #  define NSTIME_MONOTONIC true
 static void
-nstime_get(nstime_t *time)
-{
+nstime_get(nstime_t *time) {
 	nstime_init(time, mach_absolute_time());
 }
 #else
 #  define NSTIME_MONOTONIC false
 static void
-nstime_get(nstime_t *time)
-{
+nstime_get(nstime_t *time) {
 	struct timeval tv;
 
 	gettimeofday(&tv, NULL);
@@ -142,8 +125,7 @@ nstime_get(nstime_t *time)
 #define	nstime_monotonic JEMALLOC_N(n_nstime_monotonic)
 #endif
 bool
-nstime_monotonic(void)
-{
+nstime_monotonic(void) {
 	return (NSTIME_MONOTONIC);
 #undef NSTIME_MONOTONIC
 }
@@ -158,8 +140,7 @@ nstime_monotonic_t *nstime_monotonic = JEMALLOC_N(n_nstime_monotonic);
 #define	nstime_update JEMALLOC_N(n_nstime_update)
 #endif
 bool
-nstime_update(nstime_t *time)
-{
+nstime_update(nstime_t *time) {
 	nstime_t old_time;
 
 	nstime_copy(&old_time, time);
diff --git a/src/pages.c b/src/pages.c
index 7c26a28a..c23dccd7 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -18,14 +18,14 @@ static bool	os_overcommits;
 /******************************************************************************/
 
 void *
-pages_map(void *addr, size_t size, bool *commit)
-{
+pages_map(void *addr, size_t size, bool *commit) {
 	void *ret;
 
 	assert(size != 0);
 
-	if (os_overcommits)
+	if (os_overcommits) {
 		*commit = true;
+	}
 
 #ifdef _WIN32
 	/*
@@ -46,9 +46,9 @@ pages_map(void *addr, size_t size, bool *commit)
 	}
 	assert(ret != NULL);
 
-	if (ret == MAP_FAILED)
+	if (ret == MAP_FAILED) {
 		ret = NULL;
-	else if (addr != NULL && ret != addr) {
+	} else if (addr != NULL && ret != addr) {
 		/*
 		 * We succeeded in mapping memory, but not in the right place.
 		 */
@@ -62,8 +62,7 @@ pages_map(void *addr, size_t size, bool *commit)
 }
 
 void
-pages_unmap(void *addr, size_t size)
-{
+pages_unmap(void *addr, size_t size) {
 #ifdef _WIN32
 	if (VirtualFree(addr, 0, MEM_RELEASE) == 0)
 #else
@@ -80,15 +79,15 @@ pages_unmap(void *addr, size_t size)
 		              "munmap"
 #endif
 		              "(): %s\n", buf);
-		if (opt_abort)
+		if (opt_abort) {
 			abort();
+		}
 	}
 }
 
 void *
 pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size,
-    bool *commit)
-{
+    bool *commit) {
 	void *ret = (void *)((uintptr_t)addr + leadsize);
 
 	assert(alloc_size >= leadsize + size);
@@ -98,30 +97,34 @@ pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size,
 
 		pages_unmap(addr, alloc_size);
 		new_addr = pages_map(ret, size, commit);
-		if (new_addr == ret)
+		if (new_addr == ret) {
 			return (ret);
-		if (new_addr)
+		}
+		if (new_addr) {
 			pages_unmap(new_addr, size);
+		}
 		return (NULL);
 	}
 #else
 	{
 		size_t trailsize = alloc_size - leadsize - size;
 
-		if (leadsize != 0)
+		if (leadsize != 0) {
 			pages_unmap(addr, leadsize);
-		if (trailsize != 0)
+		}
+		if (trailsize != 0) {
 			pages_unmap((void *)((uintptr_t)ret + size), trailsize);
+		}
 		return (ret);
 	}
 #endif
 }
 
 static bool
-pages_commit_impl(void *addr, size_t size, bool commit)
-{
-	if (os_overcommits)
+pages_commit_impl(void *addr, size_t size, bool commit) {
+	if (os_overcommits) {
 		return (true);
+	}
 
 #ifdef _WIN32
 	return (commit ? (addr != VirtualAlloc(addr, size, MEM_COMMIT,
@@ -131,8 +134,9 @@ pages_commit_impl(void *addr, size_t size, bool commit)
 		int prot = commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
 		void *result = mmap(addr, size, prot, mmap_flags | MAP_FIXED,
 		    -1, 0);
-		if (result == MAP_FAILED)
+		if (result == MAP_FAILED) {
 			return (true);
+		}
 		if (result != addr) {
 			/*
 			 * We succeeded in mapping memory, but not in the right
@@ -147,22 +151,20 @@ pages_commit_impl(void *addr, size_t size, bool commit)
 }
 
 bool
-pages_commit(void *addr, size_t size)
-{
+pages_commit(void *addr, size_t size) {
 	return (pages_commit_impl(addr, size, true));
 }
 
 bool
-pages_decommit(void *addr, size_t size)
-{
+pages_decommit(void *addr, size_t size) {
 	return (pages_commit_impl(addr, size, false));
 }
 
 bool
-pages_purge_lazy(void *addr, size_t size)
-{
-	if (!pages_can_purge_lazy)
+pages_purge_lazy(void *addr, size_t size) {
+	if (!pages_can_purge_lazy) {
 		return (true);
+	}
 
 #ifdef _WIN32
 	VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE);
@@ -175,10 +177,10 @@ pages_purge_lazy(void *addr, size_t size)
 }
 
 bool
-pages_purge_forced(void *addr, size_t size)
-{
-	if (!pages_can_purge_forced)
+pages_purge_forced(void *addr, size_t size) {
+	if (!pages_can_purge_forced) {
 		return (true);
+	}
 
 #if defined(JEMALLOC_PURGE_MADVISE_DONTNEED)
 	return (madvise(addr, size, MADV_DONTNEED) != 0);
@@ -188,8 +190,7 @@ pages_purge_forced(void *addr, size_t size)
 }
 
 bool
-pages_huge(void *addr, size_t size)
-{
+pages_huge(void *addr, size_t size) {
 	assert(HUGEPAGE_ADDR2BASE(addr) == addr);
 	assert(HUGEPAGE_CEILING(size) == size);
 
@@ -201,8 +202,7 @@ pages_huge(void *addr, size_t size)
 }
 
 bool
-pages_nohuge(void *addr, size_t size)
-{
+pages_nohuge(void *addr, size_t size) {
 	assert(HUGEPAGE_ADDR2BASE(addr) == addr);
 	assert(HUGEPAGE_CEILING(size) == size);
 
@@ -215,14 +215,14 @@ pages_nohuge(void *addr, size_t size)
 
 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
 static bool
-os_overcommits_sysctl(void)
-{
+os_overcommits_sysctl(void) {
 	int vm_overcommit;
 	size_t sz;
 
 	sz = sizeof(vm_overcommit);
-	if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0)
+	if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0) {
 		return (false); /* Error. */
+	}
 
 	return ((vm_overcommit & 0x3) == 0);
 }
@@ -235,8 +235,7 @@ os_overcommits_sysctl(void)
  * wrappers.
  */
 static bool
-os_overcommits_proc(void)
-{
+os_overcommits_proc(void) {
 	int fd;
 	char buf[1];
 	ssize_t nread;
@@ -246,8 +245,9 @@ os_overcommits_proc(void)
 #else
 	fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
 #endif
-	if (fd == -1)
+	if (fd == -1) {
 		return (false); /* Error. */
+	}
 
 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read)
 	nread = (ssize_t)syscall(SYS_read, fd, &buf, sizeof(buf));
@@ -261,8 +261,9 @@ os_overcommits_proc(void)
 	close(fd);
 #endif
 
-	if (nread < 1)
+	if (nread < 1) {
 		return (false); /* Error. */
+	}
 	/*
 	 * /proc/sys/vm/overcommit_memory meanings:
 	 * 0: Heuristic overcommit.
@@ -274,8 +275,7 @@ os_overcommits_proc(void)
 #endif
 
 void
-pages_boot(void)
-{
+pages_boot(void) {
 #ifndef _WIN32
 	mmap_flags = MAP_PRIVATE | MAP_ANON;
 #endif
@@ -285,8 +285,9 @@ pages_boot(void)
 #elif defined(JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY)
 	os_overcommits = os_overcommits_proc();
 #  ifdef MAP_NORESERVE
-	if (os_overcommits)
+	if (os_overcommits) {
 		mmap_flags |= MAP_NORESERVE;
+	}
 #  endif
 #else
 	os_overcommits = false;
diff --git a/src/prof.c b/src/prof.c
index b161acfb..ca01d8b1 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -133,8 +133,7 @@ static char	*prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name);
 /* Red-black trees. */
 
 JEMALLOC_INLINE_C int
-prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b)
-{
+prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b) {
 	uint64_t a_thr_uid = a->thr_uid;
 	uint64_t b_thr_uid = b->thr_uid;
 	int ret = (a_thr_uid > b_thr_uid) - (a_thr_uid < b_thr_uid);
@@ -157,14 +156,14 @@ rb_gen(static UNUSED, tctx_tree_, prof_tctx_tree_t, prof_tctx_t,
     tctx_link, prof_tctx_comp)
 
 JEMALLOC_INLINE_C int
-prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b)
-{
+prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b) {
 	unsigned a_len = a->bt.len;
 	unsigned b_len = b->bt.len;
 	unsigned comp_len = (a_len < b_len) ? a_len : b_len;
 	int ret = memcmp(a->bt.vec, b->bt.vec, comp_len * sizeof(void *));
-	if (ret == 0)
+	if (ret == 0) {
 		ret = (a_len > b_len) - (a_len < b_len);
+	}
 	return (ret);
 }
 
@@ -172,8 +171,7 @@ rb_gen(static UNUSED, gctx_tree_, prof_gctx_tree_t, prof_gctx_t, dump_link,
     prof_gctx_comp)
 
 JEMALLOC_INLINE_C int
-prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b)
-{
+prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b) {
 	int ret;
 	uint64_t a_uid = a->thr_uid;
 	uint64_t b_uid = b->thr_uid;
@@ -194,8 +192,7 @@ rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link,
 /******************************************************************************/
 
 void
-prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated)
-{
+prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
 	prof_tdata_t *tdata;
 
 	cassert(config_prof);
@@ -208,24 +205,25 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated)
 		 * programs.
 		 */
 		tdata = prof_tdata_get(tsd, true);
-		if (tdata != NULL)
+		if (tdata != NULL) {
 			prof_sample_threshold_update(tdata);
+		}
 	}
 
 	if ((uintptr_t)tctx > (uintptr_t)1U) {
 		malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
 		tctx->prepared = false;
-		if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx))
+		if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx)) {
 			prof_tctx_destroy(tsd, tctx);
-		else
+		} else {
 			malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
+		}
 	}
 }
 
 void
 prof_malloc_sample_object(tsdn_t *tsdn, extent_t *extent, const void *ptr,
-    size_t usize, prof_tctx_t *tctx)
-{
+    size_t usize, prof_tctx_t *tctx) {
 	prof_tctx_set(tsdn, extent, ptr, usize, tctx);
 
 	malloc_mutex_lock(tsdn, tctx->tdata->lock);
@@ -240,23 +238,22 @@ prof_malloc_sample_object(tsdn_t *tsdn, extent_t *extent, const void *ptr,
 }
 
 void
-prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx)
-{
+prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx) {
 	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
 	assert(tctx->cnts.curobjs > 0);
 	assert(tctx->cnts.curbytes >= usize);
 	tctx->cnts.curobjs--;
 	tctx->cnts.curbytes -= usize;
 
-	if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx))
+	if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx)) {
 		prof_tctx_destroy(tsd, tctx);
-	else
+	} else {
 		malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
+	}
 }
 
 void
-bt_init(prof_bt_t *bt, void **vec)
-{
+bt_init(prof_bt_t *bt, void **vec) {
 	cassert(config_prof);
 
 	bt->vec = vec;
@@ -264,8 +261,7 @@ bt_init(prof_bt_t *bt, void **vec)
 }
 
 JEMALLOC_INLINE_C void
-prof_enter(tsd_t *tsd, prof_tdata_t *tdata)
-{
+prof_enter(tsd_t *tsd, prof_tdata_t *tdata) {
 	cassert(config_prof);
 	assert(tdata == prof_tdata_get(tsd, false));
 
@@ -278,8 +274,7 @@ prof_enter(tsd_t *tsd, prof_tdata_t *tdata)
 }
 
 JEMALLOC_INLINE_C void
-prof_leave(tsd_t *tsd, prof_tdata_t *tdata)
-{
+prof_leave(tsd_t *tsd, prof_tdata_t *tdata) {
 	cassert(config_prof);
 	assert(tdata == prof_tdata_get(tsd, false));
 
@@ -295,17 +290,18 @@ prof_leave(tsd_t *tsd, prof_tdata_t *tdata)
 		gdump = tdata->enq_gdump;
 		tdata->enq_gdump = false;
 
-		if (idump)
+		if (idump) {
 			prof_idump(tsd_tsdn(tsd));
-		if (gdump)
+		}
+		if (gdump) {
 			prof_gdump(tsd_tsdn(tsd));
+		}
 	}
 }
 
 #ifdef JEMALLOC_PROF_LIBUNWIND
 void
-prof_backtrace(prof_bt_t *bt)
-{
+prof_backtrace(prof_bt_t *bt) {
 	int nframes;
 
 	cassert(config_prof);
@@ -313,41 +309,41 @@ prof_backtrace(prof_bt_t *bt)
 	assert(bt->vec != NULL);
 
 	nframes = unw_backtrace(bt->vec, PROF_BT_MAX);
-	if (nframes <= 0)
+	if (nframes <= 0) {
 		return;
+	}
 	bt->len = nframes;
 }
 #elif (defined(JEMALLOC_PROF_LIBGCC))
 static _Unwind_Reason_Code
-prof_unwind_init_callback(struct _Unwind_Context *context, void *arg)
-{
+prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) {
 	cassert(config_prof);
 
 	return (_URC_NO_REASON);
 }
 
 static _Unwind_Reason_Code
-prof_unwind_callback(struct _Unwind_Context *context, void *arg)
-{
+prof_unwind_callback(struct _Unwind_Context *context, void *arg) {
 	prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
 	void *ip;
 
 	cassert(config_prof);
 
 	ip = (void *)_Unwind_GetIP(context);
-	if (ip == NULL)
+	if (ip == NULL) {
 		return (_URC_END_OF_STACK);
+	}
 	data->bt->vec[data->bt->len] = ip;
 	data->bt->len++;
-	if (data->bt->len == data->max)
+	if (data->bt->len == data->max) {
 		return (_URC_END_OF_STACK);
+	}
 
 	return (_URC_NO_REASON);
 }
 
 void
-prof_backtrace(prof_bt_t *bt)
-{
+prof_backtrace(prof_bt_t *bt) {
 	prof_unwind_data_t data = {bt, PROF_BT_MAX};
 
 	cassert(config_prof);
@@ -356,20 +352,22 @@ prof_backtrace(prof_bt_t *bt)
 }
 #elif (defined(JEMALLOC_PROF_GCC))
 void
-prof_backtrace(prof_bt_t *bt)
-{
+prof_backtrace(prof_bt_t *bt) {
 #define	BT_FRAME(i)							\
 	if ((i) < PROF_BT_MAX) {					\
 		void *p;						\
-		if (__builtin_frame_address(i) == 0)			\
+		if (__builtin_frame_address(i) == 0) {			\
 			return;						\
+		}							\
 		p = __builtin_return_address(i);			\
-		if (p == NULL)						\
+		if (p == NULL) {					\
 			return;						\
+		}							\
 		bt->vec[(i)] = p;					\
 		bt->len = (i) + 1;					\
-	} else								\
-		return;
+	} else {							\
+		return;							\
+	}
 
 	cassert(config_prof);
 
@@ -517,30 +515,26 @@ prof_backtrace(prof_bt_t *bt)
 }
 #else
 void
-prof_backtrace(prof_bt_t *bt)
-{
+prof_backtrace(prof_bt_t *bt) {
 	cassert(config_prof);
 	not_reached();
 }
 #endif
 
 static malloc_mutex_t *
-prof_gctx_mutex_choose(void)
-{
+prof_gctx_mutex_choose(void) {
 	unsigned ngctxs = atomic_add_u(&cum_gctxs, 1);
 
 	return (&gctx_locks[(ngctxs - 1) % PROF_NCTX_LOCKS]);
 }
 
 static malloc_mutex_t *
-prof_tdata_mutex_choose(uint64_t thr_uid)
-{
+prof_tdata_mutex_choose(uint64_t thr_uid) {
 	return (&tdata_locks[thr_uid % PROF_NTDATA_LOCKS]);
 }
 
 static prof_gctx_t *
-prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt)
-{
+prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt) {
 	/*
 	 * Create a single allocation that has space for vec of length bt->len.
 	 */
@@ -548,8 +542,9 @@ prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt)
 	prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsdn, size,
 	    size2index(size), false, NULL, true, arena_get(TSDN_NULL, 0, true),
 	    true);
-	if (gctx == NULL)
+	if (gctx == NULL) {
 		return (NULL);
+	}
 	gctx->lock = prof_gctx_mutex_choose();
 	/*
 	 * Set nlimbo to 1, in order to avoid a race condition with
@@ -566,8 +561,7 @@ prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt)
 
 static void
 prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
-    prof_tdata_t *tdata)
-{
+    prof_tdata_t *tdata) {
 	cassert(config_prof);
 
 	/*
@@ -582,8 +576,9 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
 	assert(gctx->nlimbo != 0);
 	if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) {
 		/* Remove gctx from bt2gctx. */
-		if (ckh_remove(tsd, &bt2gctx, &gctx->bt, NULL, NULL))
+		if (ckh_remove(tsd, &bt2gctx, &gctx->bt, NULL, NULL)) {
 			not_reached();
+		}
 		prof_leave(tsd, tdata_self);
 		/* Destroy gctx. */
 		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
@@ -601,34 +596,37 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
 }
 
 static bool
-prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx)
-{
+prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx) {
 	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
 
-	if (opt_prof_accum)
+	if (opt_prof_accum) {
 		return (false);
-	if (tctx->cnts.curobjs != 0)
+	}
+	if (tctx->cnts.curobjs != 0) {
 		return (false);
-	if (tctx->prepared)
+	}
+	if (tctx->prepared) {
 		return (false);
+	}
 	return (true);
 }
 
 static bool
-prof_gctx_should_destroy(prof_gctx_t *gctx)
-{
-	if (opt_prof_accum)
+prof_gctx_should_destroy(prof_gctx_t *gctx) {
+	if (opt_prof_accum) {
 		return (false);
-	if (!tctx_tree_empty(&gctx->tctxs))
+	}
+	if (!tctx_tree_empty(&gctx->tctxs)) {
 		return (false);
-	if (gctx->nlimbo != 0)
+	}
+	if (gctx->nlimbo != 0) {
 		return (false);
+	}
 	return (true);
 }
 
 static void
-prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx)
-{
+prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
 	prof_tdata_t *tdata = tctx->tdata;
 	prof_gctx_t *gctx = tctx->gctx;
 	bool destroy_tdata, destroy_tctx, destroy_gctx;
@@ -667,8 +665,9 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx)
 			 */
 			gctx->nlimbo++;
 			destroy_gctx = true;
-		} else
+		} else {
 			destroy_gctx = false;
+		}
 		break;
 	case prof_tctx_state_dumping:
 		/*
@@ -693,18 +692,19 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx)
 
 	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock);
 
-	if (destroy_tdata)
+	if (destroy_tdata) {
 		prof_tdata_destroy(tsd, tdata, false);
+	}
 
-	if (destroy_tctx)
+	if (destroy_tctx) {
 		idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), tctx), tctx,
 		    NULL, true, true);
+	}
 }
 
 static bool
 prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
-    void **p_btkey, prof_gctx_t **p_gctx, bool *p_new_gctx)
-{
+    void **p_btkey, prof_gctx_t **p_gctx, bool *p_new_gctx) {
 	union {
 		prof_gctx_t	*p;
 		void		*v;
@@ -751,8 +751,7 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
 }
 
 prof_tctx_t *
-prof_lookup(tsd_t *tsd, prof_bt_t *bt)
-{
+prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 	union {
 		prof_tctx_t	*p;
 		void		*v;
@@ -763,13 +762,15 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
 	cassert(config_prof);
 
 	tdata = prof_tdata_get(tsd, false);
-	if (tdata == NULL)
+	if (tdata == NULL) {
 		return (NULL);
+	}
 
 	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
 	not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v);
-	if (!not_found) /* Note double negative! */
+	if (!not_found) { /* Note double negative! */
 		ret.p->prepared = true;
+	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
 	if (not_found) {
 		void *btkey;
@@ -781,16 +782,18 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
 		 * cache.
 		 */
 		if (prof_lookup_global(tsd, bt, tdata, &btkey, &gctx,
-		    &new_gctx))
+		    &new_gctx)) {
 			return (NULL);
+		}
 
 		/* Link a prof_tctx_t into gctx for this thread. */
 		ret.v = iallocztm(tsd_tsdn(tsd), sizeof(prof_tctx_t),
 		    size2index(sizeof(prof_tctx_t)), false, NULL, true,
 		    arena_ichoose(tsd, NULL), true);
 		if (ret.p == NULL) {
-			if (new_gctx)
+			if (new_gctx) {
 				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
+			}
 			return (NULL);
 		}
 		ret.p->tdata = tdata;
@@ -805,8 +808,9 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
 		error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v);
 		malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
 		if (error) {
-			if (new_gctx)
+			if (new_gctx) {
 				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
+			}
 			idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), ret.v),
 			    ret.v, NULL, true, true);
 			return (NULL);
@@ -835,14 +839,14 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt)
  * -mno-sse) in order for the workaround to be complete.
  */
 void
-prof_sample_threshold_update(prof_tdata_t *tdata)
-{
+prof_sample_threshold_update(prof_tdata_t *tdata) {
 #ifdef JEMALLOC_PROF
 	uint64_t r;
 	double u;
 
-	if (!config_prof)
+	if (!config_prof) {
 		return;
+	}
 
 	if (lg_prof_sample == 0) {
 		tdata->bytes_until_sample = 0;
@@ -877,8 +881,8 @@ prof_sample_threshold_update(prof_tdata_t *tdata)
 
 #ifdef JEMALLOC_JET
 static prof_tdata_t *
-prof_tdata_count_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg)
-{
+prof_tdata_count_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
+    void *arg) {
 	size_t *tdata_count = (size_t *)arg;
 
 	(*tdata_count)++;
@@ -887,8 +891,7 @@ prof_tdata_count_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg)
 }
 
 size_t
-prof_tdata_count(void)
-{
+prof_tdata_count(void) {
 	size_t tdata_count = 0;
 	tsdn_t *tsdn;
 
@@ -904,16 +907,16 @@ prof_tdata_count(void)
 
 #ifdef JEMALLOC_JET
 size_t
-prof_bt_count(void)
-{
+prof_bt_count(void) {
 	size_t bt_count;
 	tsd_t *tsd;
 	prof_tdata_t *tdata;
 
 	tsd = tsd_fetch();
 	tdata = prof_tdata_get(tsd, false);
-	if (tdata == NULL)
+	if (tdata == NULL) {
 		return (0);
+	}
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
 	bt_count = ckh_count(&bt2gctx);
@@ -928,16 +931,16 @@ prof_bt_count(void)
 #define	prof_dump_open JEMALLOC_N(prof_dump_open_impl)
 #endif
 static int
-prof_dump_open(bool propagate_err, const char *filename)
-{
+prof_dump_open(bool propagate_err, const char *filename) {
 	int fd;
 
 	fd = creat(filename, 0644);
 	if (fd == -1 && !propagate_err) {
 		malloc_printf("<jemalloc>: creat(\"%s\"), 0644) failed\n",
 		    filename);
-		if (opt_abort)
+		if (opt_abort) {
 			abort();
+		}
 	}
 
 	return (fd);
@@ -949,8 +952,7 @@ prof_dump_open_t *prof_dump_open = JEMALLOC_N(prof_dump_open_impl);
 #endif
 
 static bool
-prof_dump_flush(bool propagate_err)
-{
+prof_dump_flush(bool propagate_err) {
 	bool ret = false;
 	ssize_t err;
 
@@ -961,8 +963,9 @@ prof_dump_flush(bool propagate_err)
 		if (!propagate_err) {
 			malloc_write("<jemalloc>: write() failed during heap "
 			    "profile flush\n");
-			if (opt_abort)
+			if (opt_abort) {
 				abort();
+			}
 		}
 		ret = true;
 	}
@@ -972,8 +975,7 @@ prof_dump_flush(bool propagate_err)
 }
 
 static bool
-prof_dump_close(bool propagate_err)
-{
+prof_dump_close(bool propagate_err) {
 	bool ret;
 
 	assert(prof_dump_fd != -1);
@@ -985,8 +987,7 @@ prof_dump_close(bool propagate_err)
 }
 
 static bool
-prof_dump_write(bool propagate_err, const char *s)
-{
+prof_dump_write(bool propagate_err, const char *s) {
 	size_t i, slen, n;
 
 	cassert(config_prof);
@@ -995,9 +996,11 @@ prof_dump_write(bool propagate_err, const char *s)
 	slen = strlen(s);
 	while (i < slen) {
 		/* Flush the buffer if it is full. */
-		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE)
-			if (prof_dump_flush(propagate_err) && propagate_err)
+		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
+			if (prof_dump_flush(propagate_err) && propagate_err) {
 				return (true);
+			}
+		}
 
 		if (prof_dump_buf_end + slen <= PROF_DUMP_BUFSIZE) {
 			/* Finish writing. */
@@ -1016,8 +1019,7 @@ prof_dump_write(bool propagate_err, const char *s)
 
 JEMALLOC_FORMAT_PRINTF(2, 3)
 static bool
-prof_dump_printf(bool propagate_err, const char *format, ...)
-{
+prof_dump_printf(bool propagate_err, const char *format, ...) {
 	bool ret;
 	va_list ap;
 	char buf[PROF_PRINTF_BUFSIZE];
@@ -1031,8 +1033,7 @@ prof_dump_printf(bool propagate_err, const char *format, ...)
 }
 
 static void
-prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata)
-{
+prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata) {
 	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
 
 	malloc_mutex_lock(tsdn, tctx->gctx->lock);
@@ -1063,8 +1064,7 @@ prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata)
 }
 
 static void
-prof_tctx_merge_gctx(tsdn_t *tsdn, prof_tctx_t *tctx, prof_gctx_t *gctx)
-{
+prof_tctx_merge_gctx(tsdn_t *tsdn, prof_tctx_t *tctx, prof_gctx_t *gctx) {
 	malloc_mutex_assert_owner(tsdn, gctx->lock);
 
 	gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
@@ -1076,8 +1076,7 @@ prof_tctx_merge_gctx(tsdn_t *tsdn, prof_tctx_t *tctx, prof_gctx_t *gctx)
 }
 
 static prof_tctx_t *
-prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg)
-{
+prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
 	tsdn_t *tsdn = (tsdn_t *)arg;
 
 	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
@@ -1103,8 +1102,7 @@ struct prof_tctx_dump_iter_arg_s {
 };
 
 static prof_tctx_t *
-prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque)
-{
+prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) {
 	struct prof_tctx_dump_iter_arg_s *arg =
 	    (struct prof_tctx_dump_iter_arg_s *)opaque;
 
@@ -1121,8 +1119,9 @@ prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque)
 		    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": "
 		    "%"FMTu64"]\n", tctx->thr_uid, tctx->dump_cnts.curobjs,
 		    tctx->dump_cnts.curbytes, tctx->dump_cnts.accumobjs,
-		    tctx->dump_cnts.accumbytes))
+		    tctx->dump_cnts.accumbytes)) {
 			return (tctx);
+		}
 		break;
 	default:
 		not_reached();
@@ -1131,8 +1130,7 @@ prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque)
 }
 
 static prof_tctx_t *
-prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg)
-{
+prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
 	tsdn_t *tsdn = (tsdn_t *)arg;
 	prof_tctx_t *ret;
 
@@ -1158,8 +1156,7 @@ label_return:
 }
 
 static void
-prof_dump_gctx_prep(tsdn_t *tsdn, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs)
-{
+prof_dump_gctx_prep(tsdn_t *tsdn, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs) {
 	cassert(config_prof);
 
 	malloc_mutex_lock(tsdn, gctx->lock);
@@ -1183,24 +1180,23 @@ struct prof_gctx_merge_iter_arg_s {
 };
 
 static prof_gctx_t *
-prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque)
-{
+prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
 	struct prof_gctx_merge_iter_arg_s *arg =
 	    (struct prof_gctx_merge_iter_arg_s *)opaque;
 
 	malloc_mutex_lock(arg->tsdn, gctx->lock);
 	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter,
 	    (void *)arg->tsdn);
-	if (gctx->cnt_summed.curobjs != 0)
+	if (gctx->cnt_summed.curobjs != 0) {
 		arg->leak_ngctx++;
+	}
 	malloc_mutex_unlock(arg->tsdn, gctx->lock);
 
 	return (NULL);
 }
 
 static void
-prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs)
-{
+prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) {
 	prof_tdata_t *tdata = prof_tdata_get(tsd, false);
 	prof_gctx_t *gctx;
 
@@ -1230,8 +1226,9 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs)
 					idalloctm(tsd_tsdn(tsd),
 					    iealloc(tsd_tsdn(tsd), to_destroy),
 					    to_destroy, NULL, true, true);
-				} else
+				} else {
 					next = NULL;
+				}
 			} while (next != NULL);
 		}
 		gctx->nlimbo--;
@@ -1239,8 +1236,9 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs)
 			gctx->nlimbo++;
 			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
 			prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
-		} else
+		} else {
 			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
+		}
 	}
 }
 
@@ -1251,8 +1249,7 @@ struct prof_tdata_merge_iter_arg_s {
 
 static prof_tdata_t *
 prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
-    void *opaque)
-{
+    void *opaque) {
 	struct prof_tdata_merge_iter_arg_s *arg =
 	    (struct prof_tdata_merge_iter_arg_s *)opaque;
 
@@ -1267,8 +1264,9 @@ prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
 		tdata->dumping = true;
 		memset(&tdata->cnt_summed, 0, sizeof(prof_cnt_t));
 		for (tabind = 0; !ckh_iter(&tdata->bt2tctx, &tabind, NULL,
-		    &tctx.v);)
+		    &tctx.v);) {
 			prof_tctx_merge_tdata(arg->tsdn, tctx.p, tdata);
+		}
 
 		arg->cnt_all.curobjs += tdata->cnt_summed.curobjs;
 		arg->cnt_all.curbytes += tdata->cnt_summed.curbytes;
@@ -1276,20 +1274,22 @@ prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
 			arg->cnt_all.accumobjs += tdata->cnt_summed.accumobjs;
 			arg->cnt_all.accumbytes += tdata->cnt_summed.accumbytes;
 		}
-	} else
+	} else {
 		tdata->dumping = false;
+	}
 	malloc_mutex_unlock(arg->tsdn, tdata->lock);
 
 	return (NULL);
 }
 
 static prof_tdata_t *
-prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg)
-{
+prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
+    void *arg) {
 	bool propagate_err = *(bool *)arg;
 
-	if (!tdata->dumping)
+	if (!tdata->dumping) {
 		return (NULL);
+	}
 
 	if (prof_dump_printf(propagate_err,
 	    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]%s%s\n",
@@ -1297,8 +1297,9 @@ prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg)
 	    tdata->cnt_summed.curbytes, tdata->cnt_summed.accumobjs,
 	    tdata->cnt_summed.accumbytes,
 	    (tdata->thread_name != NULL) ? " " : "",
-	    (tdata->thread_name != NULL) ? tdata->thread_name : ""))
+	    (tdata->thread_name != NULL) ? tdata->thread_name : "")) {
 		return (tdata);
+	}
 	return (NULL);
 }
 
@@ -1307,16 +1308,16 @@ prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg)
 #define	prof_dump_header JEMALLOC_N(prof_dump_header_impl)
 #endif
 static bool
-prof_dump_header(tsdn_t *tsdn, bool propagate_err, const prof_cnt_t *cnt_all)
-{
+prof_dump_header(tsdn_t *tsdn, bool propagate_err, const prof_cnt_t *cnt_all) {
 	bool ret;
 
 	if (prof_dump_printf(propagate_err,
 	    "heap_v2/%"FMTu64"\n"
 	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
 	    ((uint64_t)1U << lg_prof_sample), cnt_all->curobjs,
-	    cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes))
+	    cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes)) {
 		return (true);
+	}
 
 	malloc_mutex_lock(tsdn, &tdatas_mtx);
 	ret = (tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter,
@@ -1332,8 +1333,7 @@ prof_dump_header_t *prof_dump_header = JEMALLOC_N(prof_dump_header_impl);
 
 static bool
 prof_dump_gctx(tsdn_t *tsdn, bool propagate_err, prof_gctx_t *gctx,
-    const prof_bt_t *bt, prof_gctx_tree_t *gctxs)
-{
+    const prof_bt_t *bt, prof_gctx_tree_t *gctxs) {
 	bool ret;
 	unsigned i;
 	struct prof_tctx_dump_iter_arg_s prof_tctx_dump_iter_arg;
@@ -1389,8 +1389,7 @@ label_return:
 #ifndef _WIN32
 JEMALLOC_FORMAT_PRINTF(1, 2)
 static int
-prof_open_maps(const char *format, ...)
-{
+prof_open_maps(const char *format, ...) {
 	int mfd;
 	va_list ap;
 	char filename[PATH_MAX + 1];
@@ -1405,8 +1404,7 @@ prof_open_maps(const char *format, ...)
 #endif
 
 static int
-prof_getpid(void)
-{
+prof_getpid(void) {
 #ifdef _WIN32
 	return (GetCurrentProcessId());
 #else
@@ -1415,8 +1413,7 @@ prof_getpid(void)
 }
 
 static bool
-prof_dump_maps(bool propagate_err)
-{
+prof_dump_maps(bool propagate_err) {
 	bool ret;
 	int mfd;
 
@@ -1430,8 +1427,9 @@ prof_dump_maps(bool propagate_err)
 		int pid = prof_getpid();
 
 		mfd = prof_open_maps("/proc/%d/task/%d/maps", pid, pid);
-		if (mfd == -1)
+		if (mfd == -1) {
 			mfd = prof_open_maps("/proc/%d/maps", pid);
+		}
 	}
 #endif
 	if (mfd != -1) {
@@ -1463,8 +1461,9 @@ prof_dump_maps(bool propagate_err)
 
 	ret = false;
 label_return:
-	if (mfd != -1)
+	if (mfd != -1) {
 		close(mfd);
+	}
 	return (ret);
 }
 
@@ -1474,8 +1473,7 @@ label_return:
  */
 static void
 prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx,
-    const char *filename)
-{
+    const char *filename) {
 #ifdef JEMALLOC_PROF
 	/*
 	 * Scaling is equivalent AdjustSamples() in jeprof, but the result may
@@ -1510,8 +1508,7 @@ struct prof_gctx_dump_iter_arg_s {
 };
 
 static prof_gctx_t *
-prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque)
-{
+prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
 	prof_gctx_t *ret;
 	struct prof_gctx_dump_iter_arg_s *arg =
 	    (struct prof_gctx_dump_iter_arg_s *)opaque;
@@ -1534,8 +1531,7 @@ static void
 prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata,
     struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
     struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
-    prof_gctx_tree_t *gctxs)
-{
+    prof_gctx_tree_t *gctxs) {
 	size_t tabind;
 	union {
 		prof_gctx_t	*p;
@@ -1579,8 +1575,7 @@ prof_dump_file(tsd_t *tsd, bool propagate_err, const char *filename,
     struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
     struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
     struct prof_gctx_dump_iter_arg_s *prof_gctx_dump_iter_arg,
-    prof_gctx_tree_t *gctxs)
-{
+    prof_gctx_tree_t *gctxs) {
 	/* Create dump file. */
 	if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1) {
 		return true;
@@ -1616,8 +1611,8 @@ label_write_error:
 }
 
 static bool
-prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck)
-{
+prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
+    bool leakcheck) {
 	prof_tdata_t *tdata;
 	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
 	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
@@ -1657,8 +1652,7 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck)
 #ifdef JEMALLOC_JET
 void
 prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
-    uint64_t *accumbytes)
-{
+    uint64_t *accumbytes) {
 	tsd_t *tsd;
 	prof_tdata_t *tdata;
 	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
@@ -1705,8 +1699,7 @@ prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
 #define	DUMP_FILENAME_BUFSIZE	(PATH_MAX + 1)
 #define	VSEQ_INVALID		UINT64_C(0xffffffffffffffff)
 static void
-prof_dump_filename(char *filename, char v, uint64_t vseq)
-{
+prof_dump_filename(char *filename, char v, uint64_t vseq) {
 	cassert(config_prof);
 
 	if (vseq != VSEQ_INVALID) {
@@ -1724,8 +1717,7 @@ prof_dump_filename(char *filename, char v, uint64_t vseq)
 }
 
 static void
-prof_fdump(void)
-{
+prof_fdump(void) {
 	tsd_t *tsd;
 	char filename[DUMP_FILENAME_BUFSIZE];
 
@@ -1733,8 +1725,9 @@ prof_fdump(void)
 	assert(opt_prof_final);
 	assert(opt_prof_prefix[0] != '\0');
 
-	if (!prof_booted)
+	if (!prof_booted) {
 		return;
+	}
 	tsd = tsd_fetch();
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
@@ -1744,19 +1737,20 @@ prof_fdump(void)
 }
 
 void
-prof_idump(tsdn_t *tsdn)
-{
+prof_idump(tsdn_t *tsdn) {
 	tsd_t *tsd;
 	prof_tdata_t *tdata;
 
 	cassert(config_prof);
 
-	if (!prof_booted || tsdn_null(tsdn))
+	if (!prof_booted || tsdn_null(tsdn)) {
 		return;
+	}
 	tsd = tsdn_tsd(tsdn);
 	tdata = prof_tdata_get(tsd, false);
-	if (tdata == NULL)
+	if (tdata == NULL) {
 		return;
+	}
 	if (tdata->enq) {
 		tdata->enq_idump = true;
 		return;
@@ -1773,19 +1767,20 @@ prof_idump(tsdn_t *tsdn)
 }
 
 bool
-prof_mdump(tsd_t *tsd, const char *filename)
-{
+prof_mdump(tsd_t *tsd, const char *filename) {
 	char filename_buf[DUMP_FILENAME_BUFSIZE];
 
 	cassert(config_prof);
 
-	if (!opt_prof || !prof_booted)
+	if (!opt_prof || !prof_booted) {
 		return (true);
+	}
 
 	if (filename == NULL) {
 		/* No filename specified, so automatically generate one. */
-		if (opt_prof_prefix[0] == '\0')
+		if (opt_prof_prefix[0] == '\0') {
 			return (true);
+		}
 		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
 		prof_dump_filename(filename_buf, 'm', prof_dump_mseq);
 		prof_dump_mseq++;
@@ -1796,19 +1791,20 @@ prof_mdump(tsd_t *tsd, const char *filename)
 }
 
 void
-prof_gdump(tsdn_t *tsdn)
-{
+prof_gdump(tsdn_t *tsdn) {
 	tsd_t *tsd;
 	prof_tdata_t *tdata;
 
 	cassert(config_prof);
 
-	if (!prof_booted || tsdn_null(tsdn))
+	if (!prof_booted || tsdn_null(tsdn)) {
 		return;
+	}
 	tsd = tsdn_tsd(tsdn);
 	tdata = prof_tdata_get(tsd, false);
-	if (tdata == NULL)
+	if (tdata == NULL) {
 		return;
+	}
 	if (tdata->enq) {
 		tdata->enq_gdump = true;
 		return;
@@ -1825,8 +1821,7 @@ prof_gdump(tsdn_t *tsdn)
 }
 
 static void
-prof_bt_hash(const void *key, size_t r_hash[2])
-{
+prof_bt_hash(const void *key, size_t r_hash[2]) {
 	prof_bt_t *bt = (prof_bt_t *)key;
 
 	cassert(config_prof);
@@ -1835,21 +1830,20 @@ prof_bt_hash(const void *key, size_t r_hash[2])
 }
 
 static bool
-prof_bt_keycomp(const void *k1, const void *k2)
-{
+prof_bt_keycomp(const void *k1, const void *k2) {
 	const prof_bt_t *bt1 = (prof_bt_t *)k1;
 	const prof_bt_t *bt2 = (prof_bt_t *)k2;
 
 	cassert(config_prof);
 
-	if (bt1->len != bt2->len)
+	if (bt1->len != bt2->len) {
 		return (false);
+	}
 	return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
 }
 
 JEMALLOC_INLINE_C uint64_t
-prof_thr_uid_alloc(tsdn_t *tsdn)
-{
+prof_thr_uid_alloc(tsdn_t *tsdn) {
 	uint64_t thr_uid;
 
 	malloc_mutex_lock(tsdn, &next_thr_uid_mtx);
@@ -1862,8 +1856,7 @@ prof_thr_uid_alloc(tsdn_t *tsdn)
 
 static prof_tdata_t *
 prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
-    char *thread_name, bool active)
-{
+    char *thread_name, bool active) {
 	prof_tdata_t *tdata;
 
 	cassert(config_prof);
@@ -1872,8 +1865,9 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
 	tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd), sizeof(prof_tdata_t),
 	    size2index(sizeof(prof_tdata_t)), false, NULL, true,
 	    arena_get(TSDN_NULL, 0, true), true);
-	if (tdata == NULL)
+	if (tdata == NULL) {
 		return (NULL);
+	}
 
 	tdata->lock = prof_tdata_mutex_choose(thr_uid);
 	tdata->thr_uid = thr_uid;
@@ -1908,26 +1902,25 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
 }
 
 prof_tdata_t *
-prof_tdata_init(tsd_t *tsd)
-{
+prof_tdata_init(tsd_t *tsd) {
 	return (prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0,
 	    NULL, prof_thread_active_init_get(tsd_tsdn(tsd))));
 }
 
 static bool
-prof_tdata_should_destroy_unlocked(prof_tdata_t *tdata, bool even_if_attached)
-{
-	if (tdata->attached && !even_if_attached)
+prof_tdata_should_destroy_unlocked(prof_tdata_t *tdata, bool even_if_attached) {
+	if (tdata->attached && !even_if_attached) {
 		return (false);
-	if (ckh_count(&tdata->bt2tctx) != 0)
+	}
+	if (ckh_count(&tdata->bt2tctx) != 0) {
 		return (false);
+	}
 	return (true);
 }
 
 static bool
 prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
-    bool even_if_attached)
-{
+    bool even_if_attached) {
 	malloc_mutex_assert_owner(tsdn, tdata->lock);
 
 	return (prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
@@ -1935,8 +1928,7 @@ prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
 
 static void
 prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
-    bool even_if_attached)
-{
+    bool even_if_attached) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tdatas_mtx);
 
 	tdata_tree_remove(&tdatas, tdata);
@@ -1953,16 +1945,14 @@ prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
 }
 
 static void
-prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached)
-{
+prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached) {
 	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
 	prof_tdata_destroy_locked(tsd, tdata, even_if_attached);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
 }
 
 static void
-prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata)
-{
+prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata) {
 	bool destroy_tdata;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
@@ -1973,19 +1963,21 @@ prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata)
 		 * Only detach if !destroy_tdata, because detaching would allow
 		 * another thread to win the race to destroy tdata.
 		 */
-		if (!destroy_tdata)
+		if (!destroy_tdata) {
 			tdata->attached = false;
+		}
 		tsd_prof_tdata_set(tsd, NULL);
-	} else
+	} else {
 		destroy_tdata = false;
+	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
-	if (destroy_tdata)
+	if (destroy_tdata) {
 		prof_tdata_destroy(tsd, tdata, true);
+	}
 }
 
 prof_tdata_t *
-prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata)
-{
+prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) {
 	uint64_t thr_uid = tdata->thr_uid;
 	uint64_t thr_discrim = tdata->thr_discrim + 1;
 	char *thread_name = (tdata->thread_name != NULL) ?
@@ -1998,8 +1990,7 @@ prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata)
 }
 
 static bool
-prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata)
-{
+prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata) {
 	bool destroy_tdata;
 
 	malloc_mutex_lock(tsdn, tdata->lock);
@@ -2007,24 +1998,24 @@ prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata)
 		tdata->expired = true;
 		destroy_tdata = tdata->attached ? false :
 		    prof_tdata_should_destroy(tsdn, tdata, false);
-	} else
+	} else {
 		destroy_tdata = false;
+	}
 	malloc_mutex_unlock(tsdn, tdata->lock);
 
 	return (destroy_tdata);
 }
 
 static prof_tdata_t *
-prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata, void *arg)
-{
+prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
+    void *arg) {
 	tsdn_t *tsdn = (tsdn_t *)arg;
 
 	return (prof_tdata_expire(tsdn, tdata) ? tdata : NULL);
 }
 
 void
-prof_reset(tsd_t *tsd, size_t lg_sample)
-{
+prof_reset(tsd_t *tsd, size_t lg_sample) {
 	prof_tdata_t *next;
 
 	assert(lg_sample < (sizeof(uint64_t) << 3));
@@ -2041,8 +2032,9 @@ prof_reset(tsd_t *tsd, size_t lg_sample)
 		if (to_destroy != NULL) {
 			next = tdata_tree_next(&tdatas, to_destroy);
 			prof_tdata_destroy_locked(tsd, to_destroy, false);
-		} else
+		} else {
 			next = NULL;
+		}
 	} while (next != NULL);
 
 	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
@@ -2050,21 +2042,21 @@ prof_reset(tsd_t *tsd, size_t lg_sample)
 }
 
 void
-prof_tdata_cleanup(tsd_t *tsd)
-{
+prof_tdata_cleanup(tsd_t *tsd) {
 	prof_tdata_t *tdata;
 
-	if (!config_prof)
+	if (!config_prof) {
 		return;
+	}
 
 	tdata = tsd_prof_tdata_get(tsd);
-	if (tdata != NULL)
+	if (tdata != NULL) {
 		prof_tdata_detach(tsd, tdata);
+	}
 }
 
 bool
-prof_active_get(tsdn_t *tsdn)
-{
+prof_active_get(tsdn_t *tsdn) {
 	bool prof_active_current;
 
 	malloc_mutex_lock(tsdn, &prof_active_mtx);
@@ -2074,8 +2066,7 @@ prof_active_get(tsdn_t *tsdn)
 }
 
 bool
-prof_active_set(tsdn_t *tsdn, bool active)
-{
+prof_active_set(tsdn_t *tsdn, bool active) {
 	bool prof_active_old;
 
 	malloc_mutex_lock(tsdn, &prof_active_mtx);
@@ -2086,97 +2077,102 @@ prof_active_set(tsdn_t *tsdn, bool active)
 }
 
 const char *
-prof_thread_name_get(tsd_t *tsd)
-{
+prof_thread_name_get(tsd_t *tsd) {
 	prof_tdata_t *tdata;
 
 	tdata = prof_tdata_get(tsd, true);
-	if (tdata == NULL)
+	if (tdata == NULL) {
 		return ("");
+	}
 	return (tdata->thread_name != NULL ? tdata->thread_name : "");
 }
 
 static char *
-prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name)
-{
+prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name) {
 	char *ret;
 	size_t size;
 
-	if (thread_name == NULL)
+	if (thread_name == NULL) {
 		return (NULL);
+	}
 
 	size = strlen(thread_name) + 1;
-	if (size == 1)
+	if (size == 1) {
 		return ("");
+	}
 
 	ret = iallocztm(tsdn, size, size2index(size), false, NULL, true,
 	    arena_get(TSDN_NULL, 0, true), true);
-	if (ret == NULL)
+	if (ret == NULL) {
 		return (NULL);
+	}
 	memcpy(ret, thread_name, size);
 	return (ret);
 }
 
 int
-prof_thread_name_set(tsd_t *tsd, const char *thread_name)
-{
+prof_thread_name_set(tsd_t *tsd, const char *thread_name) {
 	prof_tdata_t *tdata;
 	unsigned i;
 	char *s;
 
 	tdata = prof_tdata_get(tsd, true);
-	if (tdata == NULL)
+	if (tdata == NULL) {
 		return (EAGAIN);
+	}
 
 	/* Validate input. */
-	if (thread_name == NULL)
+	if (thread_name == NULL) {
 		return (EFAULT);
+	}
 	for (i = 0; thread_name[i] != '\0'; i++) {
 		char c = thread_name[i];
-		if (!isgraph(c) && !isblank(c))
+		if (!isgraph(c) && !isblank(c)) {
 			return (EFAULT);
+		}
 	}
 
 	s = prof_thread_name_alloc(tsd_tsdn(tsd), thread_name);
-	if (s == NULL)
+	if (s == NULL) {
 		return (EAGAIN);
+	}
 
 	if (tdata->thread_name != NULL) {
 		idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd),
 		    tdata->thread_name), tdata->thread_name, NULL, true, true);
 		tdata->thread_name = NULL;
 	}
-	if (strlen(s) > 0)
+	if (strlen(s) > 0) {
 		tdata->thread_name = s;
+	}
 	return (0);
 }
 
 bool
-prof_thread_active_get(tsd_t *tsd)
-{
+prof_thread_active_get(tsd_t *tsd) {
 	prof_tdata_t *tdata;
 
 	tdata = prof_tdata_get(tsd, true);
-	if (tdata == NULL)
+	if (tdata == NULL) {
 		return (false);
+	}
 	return (tdata->active);
 }
 
 bool
-prof_thread_active_set(tsd_t *tsd, bool active)
-{
+prof_thread_active_set(tsd_t *tsd, bool active) {
 	prof_tdata_t *tdata;
 
 	tdata = prof_tdata_get(tsd, true);
-	if (tdata == NULL)
+	if (tdata == NULL) {
 		return (true);
+	}
 	tdata->active = active;
 	return (false);
 }
 
 bool
-prof_thread_active_init_get(tsdn_t *tsdn)
-{
+prof_thread_active_init_get(tsdn_t *tsdn) {
 	bool active_init;
 
 	malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx);
@@ -2186,8 +2182,7 @@ prof_thread_active_init_get(tsdn_t *tsdn)
 }
 
 bool
-prof_thread_active_init_set(tsdn_t *tsdn, bool active_init)
-{
+prof_thread_active_init_set(tsdn_t *tsdn, bool active_init) {
 	bool active_init_old;
 
 	malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx);
@@ -2198,8 +2193,7 @@ prof_thread_active_init_set(tsdn_t *tsdn, bool active_init)
 }
 
 bool
-prof_gdump_get(tsdn_t *tsdn)
-{
+prof_gdump_get(tsdn_t *tsdn) {
 	bool prof_gdump_current;
 
 	malloc_mutex_lock(tsdn, &prof_gdump_mtx);
@@ -2209,8 +2203,7 @@ prof_gdump_get(tsdn_t *tsdn)
 }
 
 bool
-prof_gdump_set(tsdn_t *tsdn, bool gdump)
-{
+prof_gdump_set(tsdn_t *tsdn, bool gdump) {
 	bool prof_gdump_old;
 
 	malloc_mutex_lock(tsdn, &prof_gdump_mtx);
@@ -2221,8 +2214,7 @@ prof_gdump_set(tsdn_t *tsdn, bool gdump)
 }
 
 void
-prof_boot0(void)
-{
+prof_boot0(void) {
 	cassert(config_prof);
 
 	memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT,
@@ -2230,8 +2222,7 @@ prof_boot0(void)
 }
 
 void
-prof_boot1(void)
-{
+prof_boot1(void) {
 	cassert(config_prof);
 
 	/*
@@ -2255,8 +2246,7 @@ prof_boot1(void)
 }
 
 bool
-prof_boot2(tsd_t *tsd)
-{
+prof_boot2(tsd_t *tsd) {
 	cassert(config_prof);
 
 	if (opt_prof) {
@@ -2266,71 +2256,85 @@ prof_boot2(tsd_t *tsd)
 
 		prof_active = opt_prof_active;
 		if (malloc_mutex_init(&prof_active_mtx, "prof_active",
-		    WITNESS_RANK_PROF_ACTIVE))
+		    WITNESS_RANK_PROF_ACTIVE)) {
 			return (true);
+		}
 
 		prof_gdump_val = opt_prof_gdump;
 		if (malloc_mutex_init(&prof_gdump_mtx, "prof_gdump",
-		    WITNESS_RANK_PROF_GDUMP))
+		    WITNESS_RANK_PROF_GDUMP)) {
 			return (true);
+		}
 
 		prof_thread_active_init = opt_prof_thread_active_init;
 		if (malloc_mutex_init(&prof_thread_active_init_mtx,
 		    "prof_thread_active_init",
-		    WITNESS_RANK_PROF_THREAD_ACTIVE_INIT))
+		    WITNESS_RANK_PROF_THREAD_ACTIVE_INIT)) {
 			return (true);
+		}
 
 		if (ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash,
-		    prof_bt_keycomp))
+		    prof_bt_keycomp)) {
 			return (true);
+		}
 		if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx",
-		    WITNESS_RANK_PROF_BT2GCTX))
+		    WITNESS_RANK_PROF_BT2GCTX)) {
 			return (true);
+		}
 
 		tdata_tree_new(&tdatas);
 		if (malloc_mutex_init(&tdatas_mtx, "prof_tdatas",
-		    WITNESS_RANK_PROF_TDATAS))
+		    WITNESS_RANK_PROF_TDATAS)) {
 			return (true);
+		}
 
 		next_thr_uid = 0;
 		if (malloc_mutex_init(&next_thr_uid_mtx, "prof_next_thr_uid",
-		    WITNESS_RANK_PROF_NEXT_THR_UID))
+		    WITNESS_RANK_PROF_NEXT_THR_UID)) {
 			return (true);
+		}
 
 		if (malloc_mutex_init(&prof_dump_seq_mtx, "prof_dump_seq",
-		    WITNESS_RANK_PROF_DUMP_SEQ))
+		    WITNESS_RANK_PROF_DUMP_SEQ)) {
 			return (true);
+		}
 		if (malloc_mutex_init(&prof_dump_mtx, "prof_dump",
-		    WITNESS_RANK_PROF_DUMP))
+		    WITNESS_RANK_PROF_DUMP)) {
 			return (true);
+		}
 
 		if (opt_prof_final && opt_prof_prefix[0] != '\0' &&
 		    atexit(prof_fdump) != 0) {
 			malloc_write("<jemalloc>: Error in atexit()\n");
-			if (opt_abort)
+			if (opt_abort) {
 				abort();
+			}
 		}
 
 		gctx_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
 		    b0get(), PROF_NCTX_LOCKS * sizeof(malloc_mutex_t),
 		    CACHELINE);
-		if (gctx_locks == NULL)
+		if (gctx_locks == NULL) {
 			return (true);
+		}
 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
 			if (malloc_mutex_init(&gctx_locks[i], "prof_gctx",
-			    WITNESS_RANK_PROF_GCTX))
+			    WITNESS_RANK_PROF_GCTX)) {
 				return (true);
+			}
 		}
 
 		tdata_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
 		    b0get(), PROF_NTDATA_LOCKS * sizeof(malloc_mutex_t),
 		    CACHELINE);
-		if (tdata_locks == NULL)
+		if (tdata_locks == NULL) {
 			return (true);
+		}
 		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
 			if (malloc_mutex_init(&tdata_locks[i], "prof_tdata",
-			    WITNESS_RANK_PROF_TDATA))
+			    WITNESS_RANK_PROF_TDATA)) {
 				return (true);
+			}
 		}
 	}
 
@@ -2348,24 +2352,24 @@ prof_boot2(tsd_t *tsd)
 }
 
 void
-prof_prefork0(tsdn_t *tsdn)
-{
+prof_prefork0(tsdn_t *tsdn) {
 	if (opt_prof) {
 		unsigned i;
 
 		malloc_mutex_prefork(tsdn, &prof_dump_mtx);
 		malloc_mutex_prefork(tsdn, &bt2gctx_mtx);
 		malloc_mutex_prefork(tsdn, &tdatas_mtx);
-		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
+		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
 			malloc_mutex_prefork(tsdn, &tdata_locks[i]);
-		for (i = 0; i < PROF_NCTX_LOCKS; i++)
+		}
+		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
 			malloc_mutex_prefork(tsdn, &gctx_locks[i]);
+		}
 	}
 }
 
 void
-prof_prefork1(tsdn_t *tsdn)
-{
+prof_prefork1(tsdn_t *tsdn) {
 	if (opt_prof) {
 		malloc_mutex_prefork(tsdn, &prof_active_mtx);
 		malloc_mutex_prefork(tsdn, &prof_dump_seq_mtx);
@@ -2376,8 +2380,7 @@ prof_prefork1(tsdn_t *tsdn)
 }
 
 void
-prof_postfork_parent(tsdn_t *tsdn)
-{
+prof_postfork_parent(tsdn_t *tsdn) {
 	if (opt_prof) {
 		unsigned i;
 
@@ -2387,10 +2390,12 @@ prof_postfork_parent(tsdn_t *tsdn)
 		malloc_mutex_postfork_parent(tsdn, &prof_gdump_mtx);
 		malloc_mutex_postfork_parent(tsdn, &prof_dump_seq_mtx);
 		malloc_mutex_postfork_parent(tsdn, &prof_active_mtx);
-		for (i = 0; i < PROF_NCTX_LOCKS; i++)
+		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
 			malloc_mutex_postfork_parent(tsdn, &gctx_locks[i]);
-		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
+		}
+		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
 			malloc_mutex_postfork_parent(tsdn, &tdata_locks[i]);
+		}
 		malloc_mutex_postfork_parent(tsdn, &tdatas_mtx);
 		malloc_mutex_postfork_parent(tsdn, &bt2gctx_mtx);
 		malloc_mutex_postfork_parent(tsdn, &prof_dump_mtx);
@@ -2398,8 +2403,7 @@ prof_postfork_parent(tsdn_t *tsdn)
 }
 
 void
-prof_postfork_child(tsdn_t *tsdn)
-{
+prof_postfork_child(tsdn_t *tsdn) {
 	if (opt_prof) {
 		unsigned i;
 
@@ -2408,10 +2412,12 @@ prof_postfork_child(tsdn_t *tsdn)
 		malloc_mutex_postfork_child(tsdn, &prof_gdump_mtx);
 		malloc_mutex_postfork_child(tsdn, &prof_dump_seq_mtx);
 		malloc_mutex_postfork_child(tsdn, &prof_active_mtx);
-		for (i = 0; i < PROF_NCTX_LOCKS; i++)
+		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
 			malloc_mutex_postfork_child(tsdn, &gctx_locks[i]);
-		for (i = 0; i < PROF_NTDATA_LOCKS; i++)
+		}
+		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
 			malloc_mutex_postfork_child(tsdn, &tdata_locks[i]);
+		}
 		malloc_mutex_postfork_child(tsdn, &tdatas_mtx);
 		malloc_mutex_postfork_child(tsdn, &bt2gctx_mtx);
 		malloc_mutex_postfork_child(tsdn, &prof_dump_mtx);
diff --git a/src/rtree.c b/src/rtree.c
index 43f21652..de3e5962 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -2,8 +2,7 @@
 #include "jemalloc/internal/jemalloc_internal.h"
 
 static unsigned
-hmin(unsigned ha, unsigned hb)
-{
+hmin(unsigned ha, unsigned hb) {
 	return (ha < hb ? ha : hb);
 }
 
@@ -12,8 +11,7 @@ hmin(unsigned ha, unsigned hb)
  * used.
  */
 bool
-rtree_new(rtree_t *rtree, unsigned bits)
-{
+rtree_new(rtree_t *rtree, unsigned bits) {
 	unsigned bits_in_leaf, height, i;
 
 	assert(RTREE_HEIGHT_MAX == ((ZU(1) << (LG_SIZEOF_PTR+3)) /
@@ -24,10 +22,12 @@ rtree_new(rtree_t *rtree, unsigned bits)
 	    : (bits % RTREE_BITS_PER_LEVEL);
 	if (bits > bits_in_leaf) {
 		height = 1 + (bits - bits_in_leaf) / RTREE_BITS_PER_LEVEL;
-		if ((height-1) * RTREE_BITS_PER_LEVEL + bits_in_leaf != bits)
+		if ((height-1) * RTREE_BITS_PER_LEVEL + bits_in_leaf != bits) {
 			height++;
-	} else
+		}
+	} else {
 		height = 1;
+	}
 	assert((height-1) * RTREE_BITS_PER_LEVEL + bits_in_leaf == bits);
 
 	rtree->height = height;
@@ -68,8 +68,7 @@ rtree_new(rtree_t *rtree, unsigned bits)
 #define	rtree_node_alloc JEMALLOC_N(rtree_node_alloc_impl)
 #endif
 static rtree_elm_t *
-rtree_node_alloc(tsdn_t *tsdn, rtree_t *rtree, size_t nelms)
-{
+rtree_node_alloc(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
 	return ((rtree_elm_t *)base_alloc(tsdn, b0get(), nelms *
 	    sizeof(rtree_elm_t), CACHELINE));
 }
@@ -84,8 +83,7 @@ rtree_node_alloc_t *rtree_node_alloc = JEMALLOC_N(rtree_node_alloc_impl);
 #define	rtree_node_dalloc JEMALLOC_N(rtree_node_dalloc_impl)
 #endif
 UNUSED static void
-rtree_node_dalloc(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *node)
-{
+rtree_node_dalloc(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *node) {
 	/* Nodes are never deleted during normal operation. */
 	not_reached();
 }
@@ -98,8 +96,7 @@ rtree_node_dalloc_t *rtree_node_dalloc = JEMALLOC_N(rtree_node_dalloc_impl);
 #ifdef JEMALLOC_JET
 static void
 rtree_delete_subtree(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *node,
-    unsigned level)
-{
+    unsigned level) {
 	if (level + 1 < rtree->height) {
 		size_t nchildren, i;
 
@@ -116,22 +113,21 @@ rtree_delete_subtree(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *node,
 }
 
 void
-rtree_delete(tsdn_t *tsdn, rtree_t *rtree)
-{
+rtree_delete(tsdn_t *tsdn, rtree_t *rtree) {
 	unsigned i;
 
 	for (i = 0; i < rtree->height; i++) {
 		rtree_elm_t *subtree = rtree->levels[i].subtree;
-		if (subtree != NULL)
+		if (subtree != NULL) {
 			rtree_delete_subtree(tsdn, rtree, subtree, i);
+		}
 	}
 }
 #endif
 
 static rtree_elm_t *
 rtree_node_init(tsdn_t *tsdn, rtree_t *rtree, unsigned level,
-    rtree_elm_t **elmp)
-{
+    rtree_elm_t **elmp) {
 	rtree_elm_t *node;
 
 	malloc_mutex_lock(tsdn, &rtree->init_lock);
@@ -151,23 +147,20 @@ rtree_node_init(tsdn_t *tsdn, rtree_t *rtree, unsigned level,
 }
 
 rtree_elm_t *
-rtree_subtree_read_hard(tsdn_t *tsdn, rtree_t *rtree, unsigned level)
-{
+rtree_subtree_read_hard(tsdn_t *tsdn, rtree_t *rtree, unsigned level) {
 	return (rtree_node_init(tsdn, rtree, level,
 	    &rtree->levels[level].subtree));
 }
 
 rtree_elm_t *
 rtree_child_read_hard(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *elm,
-    unsigned level)
-{
+    unsigned level) {
 	return (rtree_node_init(tsdn, rtree, level+1, &elm->child));
 }
 
 static int
 rtree_elm_witness_comp(const witness_t *a, void *oa, const witness_t *b,
-    void *ob)
-{
+    void *ob) {
 	uintptr_t ka = (uintptr_t)oa;
 	uintptr_t kb = (uintptr_t)ob;
 
@@ -178,8 +171,7 @@ rtree_elm_witness_comp(const witness_t *a, void *oa, const witness_t *b,
 }
 
 static witness_t *
-rtree_elm_witness_alloc(tsd_t *tsd, uintptr_t key, const rtree_elm_t *elm)
-{
+rtree_elm_witness_alloc(tsd_t *tsd, uintptr_t key, const rtree_elm_t *elm) {
 	witness_t *witness;
 	size_t i;
 	rtree_elm_witness_tsd_t *witnesses = tsd_rtree_elm_witnessesp_get(tsd);
@@ -204,8 +196,7 @@ rtree_elm_witness_alloc(tsd_t *tsd, uintptr_t key, const rtree_elm_t *elm)
 }
 
 static witness_t *
-rtree_elm_witness_find(tsd_t *tsd, const rtree_elm_t *elm)
-{
+rtree_elm_witness_find(tsd_t *tsd, const rtree_elm_t *elm) {
 	size_t i;
 	rtree_elm_witness_tsd_t *witnesses = tsd_rtree_elm_witnessesp_get(tsd);
 
@@ -213,15 +204,16 @@ rtree_elm_witness_find(tsd_t *tsd, const rtree_elm_t *elm)
 	    i++) {
 		rtree_elm_witness_t *rew = &witnesses->witnesses[i];
 
-		if (rew->elm == elm)
+		if (rew->elm == elm) {
 			return (&rew->witness);
+		}
 	}
 	not_reached();
 }
 
 static void
-rtree_elm_witness_dalloc(tsd_t *tsd, witness_t *witness, const rtree_elm_t *elm)
-{
+rtree_elm_witness_dalloc(tsd_t *tsd, witness_t *witness,
+    const rtree_elm_t *elm) {
 	size_t i;
 	rtree_elm_witness_tsd_t *witnesses = tsd_rtree_elm_witnessesp_get(tsd);
 
@@ -242,12 +234,12 @@ rtree_elm_witness_dalloc(tsd_t *tsd, witness_t *witness, const rtree_elm_t *elm)
 
 void
 rtree_elm_witness_acquire(tsdn_t *tsdn, const rtree_t *rtree, uintptr_t key,
-    const rtree_elm_t *elm)
-{
+    const rtree_elm_t *elm) {
 	witness_t *witness;
 
-	if (tsdn_null(tsdn))
+	if (tsdn_null(tsdn)) {
 		return;
+	}
 
 	witness = rtree_elm_witness_alloc(tsdn_tsd(tsdn), key, elm);
 	witness_lock(tsdn, witness);
@@ -255,12 +247,12 @@ rtree_elm_witness_acquire(tsdn_t *tsdn, const rtree_t *rtree, uintptr_t key,
 
 void
 rtree_elm_witness_access(tsdn_t *tsdn, const rtree_t *rtree,
-    const rtree_elm_t *elm)
-{
+    const rtree_elm_t *elm) {
 	witness_t *witness;
 
-	if (tsdn_null(tsdn))
+	if (tsdn_null(tsdn)) {
 		return;
+	}
 
 	witness = rtree_elm_witness_find(tsdn_tsd(tsdn), elm);
 	witness_assert_owner(tsdn, witness);
@@ -268,12 +260,12 @@ rtree_elm_witness_access(tsdn_t *tsdn, const rtree_t *rtree,
 
 void
 rtree_elm_witness_release(tsdn_t *tsdn, const rtree_t *rtree,
-    const rtree_elm_t *elm)
-{
+    const rtree_elm_t *elm) {
 	witness_t *witness;
 
-	if (tsdn_null(tsdn))
+	if (tsdn_null(tsdn)) {
 		return;
+	}
 
 	witness = rtree_elm_witness_find(tsdn_tsd(tsdn), elm);
 	witness_unlock(tsdn, witness);
diff --git a/src/stats.c b/src/stats.c
index 020d56bd..b0a7fca2 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -34,8 +34,7 @@ bool	opt_stats_print = false;
 
 static void
 stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
-    bool json, bool large, unsigned i)
-{
+    bool json, bool large, unsigned i) {
 	size_t page;
 	bool in_gap, in_gap_prev;
 	unsigned nbins, j;
@@ -144,8 +143,9 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			} else if (milli < 1000) {
 				malloc_snprintf(util, sizeof(util), "0.%zu",
 				    milli);
-			} else
+			} else {
 				malloc_snprintf(util, sizeof(util), "1");
+			}
 
 			if (config_tcache) {
 				malloc_cprintf(write_cb, cbopaque,
@@ -183,8 +183,7 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 
 static void
 stats_arena_lextents_print(void (*write_cb)(void *, const char *),
-    void *cbopaque, bool json, unsigned i)
-{
+    void *cbopaque, bool json, unsigned i) {
 	unsigned nbins, nlextents, j;
 	bool in_gap, in_gap_prev;
 
@@ -248,8 +247,7 @@ stats_arena_lextents_print(void (*write_cb)(void *, const char *),
 
 static void
 stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
-    bool json, unsigned i, bool bins, bool large)
-{
+    bool json, unsigned i, bool bins, bool large) {
 	unsigned nthreads;
 	const char *dss;
 	ssize_t decay_time;
@@ -290,8 +288,9 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		if (decay_time >= 0) {
 			malloc_cprintf(write_cb, cbopaque, "decay time: %zd\n",
 			    decay_time);
-		} else
+		} else {
 			malloc_cprintf(write_cb, cbopaque, "decay time: N/A\n");
+		}
 	}
 
 	CTL_M2_GET("stats.arenas.0.pactive", i, &pactive, size_t);
@@ -445,16 +444,17 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    "resident:                %12zu\n", resident);
 	}
 
-	if (bins)
+	if (bins) {
 		stats_arena_bins_print(write_cb, cbopaque, json, large, i);
-	if (large)
+	}
+	if (large) {
 		stats_arena_lextents_print(write_cb, cbopaque, json, i);
+	}
 }
 
 static void
 stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
-    bool json, bool more)
-{
+    bool json, bool more) {
 	const char *cpv;
 	bool bv;
 	unsigned uv;
@@ -473,8 +473,9 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	if (json) {
 		malloc_cprintf(write_cb, cbopaque,
 		"\t\t\"version\": \"%s\",\n", cpv);
-	} else
+	} else {
 		malloc_cprintf(write_cb, cbopaque, "Version: %s\n", cpv);
+	}
 
 	/* config. */
 #define	CONFIG_WRITE_BOOL_JSON(n, c)					\
@@ -655,8 +656,9 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	if (json) {
 		malloc_cprintf(write_cb, cbopaque,
 		    "\t\t\t\"narenas\": %u,\n", uv);
-	} else
+	} else {
 		malloc_cprintf(write_cb, cbopaque, "Arenas: %u\n", uv);
+	}
 
 	CTL_GET("arenas.decay_time", &ssv, ssize_t);
 	if (json) {
@@ -672,15 +674,17 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	if (json) {
 		malloc_cprintf(write_cb, cbopaque,
 		    "\t\t\t\"quantum\": %zu,\n", sv);
-	} else
+	} else {
 		malloc_cprintf(write_cb, cbopaque, "Quantum size: %zu\n", sv);
+	}
 
 	CTL_GET("arenas.page", &sv, size_t);
 	if (json) {
 		malloc_cprintf(write_cb, cbopaque,
 		    "\t\t\t\"page\": %zu,\n", sv);
-	} else
+	} else {
 		malloc_cprintf(write_cb, cbopaque, "Page size: %zu\n", sv);
+	}
 
 	if (je_mallctl("arenas.tcache_max", (void *)&sv, &ssz, NULL, 0) == 0) {
 		if (json) {
@@ -787,8 +791,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 static void
 stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
     bool json, bool merged, bool destroyed, bool unmerged, bool bins,
-    bool large)
-{
+    bool large) {
 	size_t allocated, active, metadata, resident, mapped, retained;
 
 	CTL_GET("stats.allocated", &allocated, size_t);
@@ -846,8 +849,9 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 				sz = sizeof(bool);
 				xmallctlbymib(mib, miblen, &initialized[i], &sz,
 				    NULL, 0);
-				if (initialized[i])
+				if (initialized[i]) {
 					ninitialized++;
+				}
 			}
 			mib[1] = MALLCTL_ARENAS_DESTROYED;
 			sz = sizeof(bool);
@@ -934,8 +938,7 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 
 void
 stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *opts)
-{
+    const char *opts) {
 	int err;
 	uint64_t epoch;
 	size_t u64sz;
diff --git a/src/tcache.c b/src/tcache.c
index d1323418..bb6a5a75 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -24,14 +24,12 @@ static tcaches_t	*tcaches_avail;
 /******************************************************************************/
 
 size_t
-tcache_salloc(tsdn_t *tsdn, const void *ptr)
-{
+tcache_salloc(tsdn_t *tsdn, const void *ptr) {
 	return (arena_salloc(tsdn, iealloc(tsdn, ptr), ptr));
 }
 
 void
-tcache_event_hard(tsd_t *tsd, tcache_t *tcache)
-{
+tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 	szind_t binind = tcache->next_gc_bin;
 	tcache_bin_t *tbin = &tcache->tbins[binind];
 	tcache_bin_info_t *tbin_info = &tcache_bin_info[binind];
@@ -52,33 +50,36 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache)
 		 * Reduce fill count by 2X.  Limit lg_fill_div such that the
 		 * fill count is always at least 1.
 		 */
-		if ((tbin_info->ncached_max >> (tbin->lg_fill_div+1)) >= 1)
+		if ((tbin_info->ncached_max >> (tbin->lg_fill_div+1)) >= 1) {
 			tbin->lg_fill_div++;
+		}
 	} else if (tbin->low_water < 0) {
 		/*
 		 * Increase fill count by 2X.  Make sure lg_fill_div stays
 		 * greater than 0.
 		 */
-		if (tbin->lg_fill_div > 1)
+		if (tbin->lg_fill_div > 1) {
 			tbin->lg_fill_div--;
+		}
 	}
 	tbin->low_water = tbin->ncached;
 
 	tcache->next_gc_bin++;
-	if (tcache->next_gc_bin == nhbins)
+	if (tcache->next_gc_bin == nhbins) {
 		tcache->next_gc_bin = 0;
+	}
 }
 
 void *
 tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
-    tcache_bin_t *tbin, szind_t binind, bool *tcache_success)
-{
+    tcache_bin_t *tbin, szind_t binind, bool *tcache_success) {
 	void *ret;
 
 	arena_tcache_fill_small(tsdn, arena, tbin, binind, config_prof ?
 	    tcache->prof_accumbytes : 0);
-	if (config_prof)
+	if (config_prof) {
 		tcache->prof_accumbytes = 0;
+	}
 	ret = tcache_alloc_easy(tbin, tcache_success);
 
 	return (ret);
@@ -86,8 +87,7 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 
 void
 tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
-    szind_t binind, unsigned rem)
-{
+    szind_t binind, unsigned rem) {
 	arena_t *arena;
 	void *ptr;
 	unsigned i, nflush, ndeferred;
@@ -106,8 +106,9 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 
 		if (config_prof && bin_arena == arena) {
 			if (arena_prof_accum(tsd_tsdn(tsd), arena,
-			    tcache->prof_accumbytes))
+			    tcache->prof_accumbytes)) {
 				prof_idump(tsd_tsdn(tsd));
+			}
 			tcache->prof_accumbytes = 0;
 		}
 
@@ -158,14 +159,14 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
 	    sizeof(void *));
 	tbin->ncached = rem;
-	if ((int)tbin->ncached < tbin->low_water)
+	if ((int)tbin->ncached < tbin->low_water) {
 		tbin->low_water = tbin->ncached;
+	}
 }
 
 void
 tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
-    unsigned rem, tcache_t *tcache)
-{
+    unsigned rem, tcache_t *tcache) {
 	arena_t *arena;
 	void *ptr;
 	unsigned i, nflush, ndeferred;
@@ -182,8 +183,9 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 		arena_t *locked_arena = extent_arena_get(extent);
 		UNUSED bool idump;
 
-		if (config_prof)
+		if (config_prof) {
 			idump = false;
+		}
 		malloc_mutex_lock(tsd_tsdn(tsd), &locked_arena->lock);
 		if ((config_prof || config_stats) && locked_arena == arena) {
 			if (config_prof) {
@@ -220,8 +222,9 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 			}
 		}
 		malloc_mutex_unlock(tsd_tsdn(tsd), &locked_arena->lock);
-		if (config_prof && idump)
+		if (config_prof && idump) {
 			prof_idump(tsd_tsdn(tsd));
+		}
 		arena_decay_ticks(tsd_tsdn(tsd), locked_arena, nflush -
 		    ndeferred);
 	}
@@ -241,13 +244,13 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
 	    sizeof(void *));
 	tbin->ncached = rem;
-	if ((int)tbin->ncached < tbin->low_water)
+	if ((int)tbin->ncached < tbin->low_water) {
 		tbin->low_water = tbin->ncached;
+	}
 }
 
 static void
-tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
-{
+tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 	if (config_stats) {
 		/* Link into list of extant tcaches. */
 		malloc_mutex_lock(tsdn, &arena->lock);
@@ -258,8 +261,7 @@ tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
 }
 
 static void
-tcache_arena_dissociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
-{
+tcache_arena_dissociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 	if (config_stats) {
 		/* Unlink from list of extant tcaches. */
 		malloc_mutex_lock(tsdn, &arena->lock);
@@ -282,31 +284,30 @@ tcache_arena_dissociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
 
 void
 tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *oldarena,
-    arena_t *newarena)
-{
+    arena_t *newarena) {
 	tcache_arena_dissociate(tsdn, tcache, oldarena);
 	tcache_arena_associate(tsdn, tcache, newarena);
 }
 
 tcache_t *
-tcache_get_hard(tsd_t *tsd)
-{
+tcache_get_hard(tsd_t *tsd) {
 	arena_t *arena;
 
 	if (!tcache_enabled_get()) {
-		if (tsd_nominal(tsd))
+		if (tsd_nominal(tsd)) {
 			tcache_enabled_set(false); /* Memoize. */
+		}
 		return (NULL);
 	}
 	arena = arena_choose(tsd, NULL);
-	if (unlikely(arena == NULL))
+	if (unlikely(arena == NULL)) {
 		return (NULL);
+	}
 	return (tcache_create(tsd_tsdn(tsd), arena));
 }
 
 tcache_t *
-tcache_create(tsdn_t *tsdn, arena_t *arena)
-{
+tcache_create(tsdn_t *tsdn, arena_t *arena) {
 	tcache_t *tcache;
 	size_t size, stack_offset;
 	unsigned i;
@@ -321,8 +322,9 @@ tcache_create(tsdn_t *tsdn, arena_t *arena)
 
 	tcache = ipallocztm(tsdn, size, CACHELINE, true, NULL, true,
 	    arena_get(TSDN_NULL, 0, true));
-	if (tcache == NULL)
+	if (tcache == NULL) {
 		return (NULL);
+	}
 
 	tcache_arena_associate(tsdn, tcache, arena);
 
@@ -345,8 +347,7 @@ tcache_create(tsdn_t *tsdn, arena_t *arena)
 }
 
 static void
-tcache_destroy(tsd_t *tsd, tcache_t *tcache)
-{
+tcache_destroy(tsd_t *tsd, tcache_t *tcache) {
 	arena_t *arena;
 	unsigned i;
 
@@ -372,20 +373,21 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache)
 	}
 
 	if (config_prof && tcache->prof_accumbytes > 0 &&
-	    arena_prof_accum(tsd_tsdn(tsd), arena, tcache->prof_accumbytes))
+	    arena_prof_accum(tsd_tsdn(tsd), arena, tcache->prof_accumbytes)) {
 		prof_idump(tsd_tsdn(tsd));
+	}
 
 	idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), tcache), tcache, NULL,
 	    true, true);
 }
 
 void
-tcache_cleanup(tsd_t *tsd)
-{
+tcache_cleanup(tsd_t *tsd) {
 	tcache_t *tcache;
 
-	if (!config_tcache)
+	if (!config_tcache) {
 		return;
+	}
 
 	if ((tcache = tsd_tcache_get(tsd)) != NULL) {
 		tcache_destroy(tsd, tcache);
@@ -394,8 +396,7 @@ tcache_cleanup(tsd_t *tsd)
 }
 
 void
-tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
-{
+tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 	unsigned i;
 
 	cassert(config_stats);
@@ -422,8 +423,7 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
 }
 
 bool
-tcaches_create(tsd_t *tsd, unsigned *r_ind)
-{
+tcaches_create(tsd_t *tsd, unsigned *r_ind) {
 	arena_t *arena;
 	tcache_t *tcache;
 	tcaches_t *elm;
@@ -431,18 +431,22 @@ tcaches_create(tsd_t *tsd, unsigned *r_ind)
 	if (tcaches == NULL) {
 		tcaches = base_alloc(tsd_tsdn(tsd), b0get(), sizeof(tcache_t *)
 		    * (MALLOCX_TCACHE_MAX+1), CACHELINE);
-		if (tcaches == NULL)
+		if (tcaches == NULL) {
 			return (true);
+		}
 	}
 
-	if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX)
+	if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX) {
 		return (true);
+	}
 	arena = arena_ichoose(tsd, NULL);
-	if (unlikely(arena == NULL))
+	if (unlikely(arena == NULL)) {
 		return (true);
+	}
 	tcache = tcache_create(tsd_tsdn(tsd), arena);
-	if (tcache == NULL)
+	if (tcache == NULL) {
 		return (true);
+	}
 
 	if (tcaches_avail != NULL) {
 		elm = tcaches_avail;
@@ -460,23 +464,21 @@ tcaches_create(tsd_t *tsd, unsigned *r_ind)
 }
 
 static void
-tcaches_elm_flush(tsd_t *tsd, tcaches_t *elm)
-{
-	if (elm->tcache == NULL)
+tcaches_elm_flush(tsd_t *tsd, tcaches_t *elm) {
+	if (elm->tcache == NULL) {
 		return;
+	}
 	tcache_destroy(tsd, elm->tcache);
 	elm->tcache = NULL;
 }
 
 void
-tcaches_flush(tsd_t *tsd, unsigned ind)
-{
+tcaches_flush(tsd_t *tsd, unsigned ind) {
 	tcaches_elm_flush(tsd, &tcaches[ind]);
 }
 
 void
-tcaches_destroy(tsd_t *tsd, unsigned ind)
-{
+tcaches_destroy(tsd_t *tsd, unsigned ind) {
 	tcaches_t *elm = &tcaches[ind];
 	tcaches_elm_flush(tsd, elm);
 	elm->next = tcaches_avail;
@@ -484,23 +486,25 @@ tcaches_destroy(tsd_t *tsd, unsigned ind)
 }
 
 bool
-tcache_boot(tsdn_t *tsdn)
-{
+tcache_boot(tsdn_t *tsdn) {
 	unsigned i;
 
 	/* If necessary, clamp opt_lg_tcache_max. */
-	if (opt_lg_tcache_max < 0 || (ZU(1) << opt_lg_tcache_max) < SMALL_MAXCLASS)
+	if (opt_lg_tcache_max < 0 || (ZU(1) << opt_lg_tcache_max) <
+	    SMALL_MAXCLASS) {
 		tcache_maxclass = SMALL_MAXCLASS;
-	else
+	} else {
 		tcache_maxclass = (ZU(1) << opt_lg_tcache_max);
+	}
 
 	nhbins = size2index(tcache_maxclass) + 1;
 
 	/* Initialize tcache_bin_info. */
 	tcache_bin_info = (tcache_bin_info_t *)base_alloc(tsdn, b0get(), nhbins
 	    * sizeof(tcache_bin_info_t), CACHELINE);
-	if (tcache_bin_info == NULL)
+	if (tcache_bin_info == NULL) {
 		return (true);
+	}
 	stack_nelms = 0;
 	for (i = 0; i < NBINS; i++) {
 		if ((arena_bin_info[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) {
diff --git a/src/tsd.c b/src/tsd.c
index b4d7e795..f02fc28e 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -12,20 +12,17 @@ malloc_tsd_data(, , tsd_t, TSD_INITIALIZER)
 /******************************************************************************/
 
 void *
-malloc_tsd_malloc(size_t size)
-{
+malloc_tsd_malloc(size_t size) {
 	return (a0malloc(CACHELINE_CEILING(size)));
 }
 
 void
-malloc_tsd_dalloc(void *wrapper)
-{
+malloc_tsd_dalloc(void *wrapper) {
 	a0dalloc(wrapper);
 }
 
 void
-malloc_tsd_no_cleanup(void *arg)
-{
+malloc_tsd_no_cleanup(void *arg) {
 	not_reached();
 }
 
@@ -34,21 +31,22 @@ malloc_tsd_no_cleanup(void *arg)
 JEMALLOC_EXPORT
 #endif
 void
-_malloc_thread_cleanup(void)
-{
+_malloc_thread_cleanup(void) {
 	bool pending[MALLOC_TSD_CLEANUPS_MAX], again;
 	unsigned i;
 
-	for (i = 0; i < ncleanups; i++)
+	for (i = 0; i < ncleanups; i++) {
 		pending[i] = true;
+	}
 
 	do {
 		again = false;
 		for (i = 0; i < ncleanups; i++) {
 			if (pending[i]) {
 				pending[i] = cleanups[i]();
-				if (pending[i])
+				if (pending[i]) {
 					again = true;
+				}
 			}
 		}
 	} while (again);
@@ -56,16 +54,14 @@ _malloc_thread_cleanup(void)
 #endif
 
 void
-malloc_tsd_cleanup_register(bool (*f)(void))
-{
+malloc_tsd_cleanup_register(bool (*f)(void)) {
 	assert(ncleanups < MALLOC_TSD_CLEANUPS_MAX);
 	cleanups[ncleanups] = f;
 	ncleanups++;
 }
 
 void
-tsd_cleanup(void *arg)
-{
+tsd_cleanup(void *arg) {
 	tsd_t *tsd = (tsd_t *)arg;
 
 	switch (tsd->state) {
@@ -108,29 +104,27 @@ MALLOC_TSD
 }
 
 tsd_t *
-malloc_tsd_boot0(void)
-{
+malloc_tsd_boot0(void) {
 	tsd_t *tsd;
 
 	ncleanups = 0;
-	if (tsd_boot0())
+	if (tsd_boot0()) {
 		return (NULL);
+	}
 	tsd = tsd_fetch();
 	*tsd_arenas_tdata_bypassp_get(tsd) = true;
 	return (tsd);
 }
 
 void
-malloc_tsd_boot1(void)
-{
+malloc_tsd_boot1(void) {
 	tsd_boot1();
 	*tsd_arenas_tdata_bypassp_get(tsd_fetch()) = false;
 }
 
 #ifdef _WIN32
 static BOOL WINAPI
-_tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved)
-{
+_tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) {
 	switch (fdwReason) {
 #ifdef JEMALLOC_LAZY_LOCK
 	case DLL_THREAD_ATTACH:
@@ -164,8 +158,7 @@ BOOL	(WINAPI *const tls_callback)(HINSTANCE hinstDLL,
 #if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \
     !defined(_WIN32))
 void *
-tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block)
-{
+tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block) {
 	pthread_t self = pthread_self();
 	tsd_init_block_t *iter;
 
@@ -186,8 +179,7 @@ tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block)
 }
 
 void
-tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block)
-{
+tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block) {
 	malloc_mutex_lock(TSDN_NULL, &head->lock);
 	ql_remove(&head->blocks, block, link);
 	malloc_mutex_unlock(TSDN_NULL, &head->lock);
diff --git a/src/util.c b/src/util.c
index c6ac4e11..a9595397 100644
--- a/src/util.c
+++ b/src/util.c
@@ -46,8 +46,7 @@ static char	*x2s(uintmax_t x, bool alt_form, bool uppercase, char *s,
 
 /* malloc_message() setup. */
 static void
-wrtmessage(void *cbopaque, const char *s)
-{
+wrtmessage(void *cbopaque, const char *s) {
 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_write)
 	/*
 	 * Use syscall(2) rather than write(2) when possible in order to avoid
@@ -71,12 +70,12 @@ JEMALLOC_EXPORT void	(*je_malloc_message)(void *, const char *s);
  * je_malloc_message(...) throughout the code.
  */
 void
-malloc_write(const char *s)
-{
-	if (je_malloc_message != NULL)
+malloc_write(const char *s) {
+	if (je_malloc_message != NULL) {
 		je_malloc_message(NULL, s);
-	else
+	} else {
 		wrtmessage(NULL, s);
+	}
 }
 
 /*
@@ -84,8 +83,7 @@ malloc_write(const char *s)
  * provide a wrapper.
  */
 int
-buferror(int err, char *buf, size_t buflen)
-{
+buferror(int err, char *buf, size_t buflen) {
 #ifdef _WIN32
 	FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, err, 0,
 	    (LPSTR)buf, (DWORD)buflen, NULL);
@@ -103,8 +101,7 @@ buferror(int err, char *buf, size_t buflen)
 }
 
 uintmax_t
-malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base)
-{
+malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base) {
 	uintmax_t ret, digit;
 	unsigned b;
 	bool neg;
@@ -149,10 +146,12 @@ malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base)
 		switch (p[1]) {
 		case '0': case '1': case '2': case '3': case '4': case '5':
 		case '6': case '7':
-			if (b == 0)
+			if (b == 0) {
 				b = 8;
-			if (b == 8)
+			}
+			if (b == 8) {
 				p++;
+			}
 			break;
 		case 'X': case 'x':
 			switch (p[2]) {
@@ -162,10 +161,12 @@ malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base)
 			case 'F':
 			case 'a': case 'b': case 'c': case 'd': case 'e':
 			case 'f':
-				if (b == 0)
+				if (b == 0) {
 					b = 16;
-				if (b == 16)
+				}
+				if (b == 16) {
 					p += 2;
+				}
 				break;
 			default:
 				break;
@@ -177,8 +178,9 @@ malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base)
 			goto label_return;
 		}
 	}
-	if (b == 0)
+	if (b == 0) {
 		b = 10;
+	}
 
 	/* Convert. */
 	ret = 0;
@@ -196,8 +198,9 @@ malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base)
 		}
 		p++;
 	}
-	if (neg)
+	if (neg) {
 		ret = (uintmax_t)(-((intmax_t)ret));
+	}
 
 	if (p == ns) {
 		/* No conversion performed. */
@@ -211,15 +214,15 @@ label_return:
 		if (p == ns) {
 			/* No characters were converted. */
 			*endptr = (char *)nptr;
-		} else
+		} else {
 			*endptr = (char *)p;
+		}
 	}
 	return (ret);
 }
 
 static char *
-u2s(uintmax_t x, unsigned base, bool uppercase, char *s, size_t *slen_p)
-{
+u2s(uintmax_t x, unsigned base, bool uppercase, char *s, size_t *slen_p) {
 	unsigned i;
 
 	i = U2S_BUFSIZE - 1;
@@ -261,19 +264,21 @@ u2s(uintmax_t x, unsigned base, bool uppercase, char *s, size_t *slen_p)
 }
 
 static char *
-d2s(intmax_t x, char sign, char *s, size_t *slen_p)
-{
+d2s(intmax_t x, char sign, char *s, size_t *slen_p) {
 	bool neg;
 
-	if ((neg = (x < 0)))
+	if ((neg = (x < 0))) {
 		x = -x;
+	}
 	s = u2s(x, 10, false, s, slen_p);
-	if (neg)
+	if (neg) {
 		sign = '-';
+	}
 	switch (sign) {
 	case '-':
-		if (!neg)
+		if (!neg) {
 			break;
+		}
 		/* Fall through. */
 	case ' ':
 	case '+':
@@ -287,8 +292,7 @@ d2s(intmax_t x, char sign, char *s, size_t *slen_p)
 }
 
 static char *
-o2s(uintmax_t x, bool alt_form, char *s, size_t *slen_p)
-{
+o2s(uintmax_t x, bool alt_form, char *s, size_t *slen_p) {
 	s = u2s(x, 8, false, s, slen_p);
 	if (alt_form && *s != '0') {
 		s--;
@@ -299,8 +303,7 @@ o2s(uintmax_t x, bool alt_form, char *s, size_t *slen_p)
 }
 
 static char *
-x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, size_t *slen_p)
-{
+x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, size_t *slen_p) {
 	s = u2s(x, 16, uppercase, s, slen_p);
 	if (alt_form) {
 		s -= 2;
@@ -311,14 +314,14 @@ x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, size_t *slen_p)
 }
 
 size_t
-malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap)
-{
+malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 	size_t i;
 	const char *f;
 
 #define	APPEND_C(c) do {						\
-	if (i < size)							\
+	if (i < size) {							\
 		str[i] = (c);						\
+	}								\
 	i++;								\
 } while (0)
 #define	APPEND_S(s, slen) do {						\
@@ -334,16 +337,18 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap)
 	    (size_t)width - slen : 0);					\
 	if (!left_justify && pad_len != 0) {				\
 		size_t j;						\
-		for (j = 0; j < pad_len; j++)				\
+		for (j = 0; j < pad_len; j++) {				\
 			APPEND_C(' ');					\
+		}							\
 	}								\
 	/* Value. */							\
 	APPEND_S(s, slen);						\
 	/* Right padding. */						\
 	if (left_justify && pad_len != 0) {				\
 		size_t j;						\
-		for (j = 0; j < pad_len; j++)				\
+		for (j = 0; j < pad_len; j++) {				\
 			APPEND_C(' ');					\
+		}							\
 	}								\
 } while (0)
 #define	GET_ARG_NUMERIC(val, len) do {					\
@@ -454,10 +459,11 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap)
 				break;
 			}
 			/* Width/precision separator. */
-			if (*f == '.')
+			if (*f == '.') {
 				f++;
-			else
+			} else {
 				goto label_length;
+			}
 			/* Precision. */
 			switch (*f) {
 			case '*':
@@ -484,8 +490,9 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap)
 				if (*f == 'l') {
 					len = 'q';
 					f++;
-				} else
+				} else {
 					len = 'l';
+				}
 				break;
 			case 'q': case 'j': case 't': case 'z':
 				len = *f;
@@ -576,10 +583,11 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap)
 		}}
 	}
 	label_out:
-	if (i < size)
+	if (i < size) {
 		str[i] = '\0';
-	else
+	} else {
 		str[size - 1] = '\0';
+	}
 
 #undef APPEND_C
 #undef APPEND_S
@@ -590,8 +598,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap)
 
 JEMALLOC_FORMAT_PRINTF(3, 4)
 size_t
-malloc_snprintf(char *str, size_t size, const char *format, ...)
-{
+malloc_snprintf(char *str, size_t size, const char *format, ...) {
 	size_t ret;
 	va_list ap;
 
@@ -604,8 +611,7 @@ malloc_snprintf(char *str, size_t size, const char *format, ...)
 
 void
 malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *format, va_list ap)
-{
+    const char *format, va_list ap) {
 	char buf[MALLOC_PRINTF_BUFSIZE];
 
 	if (write_cb == NULL) {
@@ -630,8 +636,7 @@ malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
 JEMALLOC_FORMAT_PRINTF(3, 4)
 void
 malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *format, ...)
-{
+    const char *format, ...) {
 	va_list ap;
 
 	va_start(ap, format);
@@ -642,8 +647,7 @@ malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque,
 /* Print to stderr in such a way as to avoid memory allocation. */
 JEMALLOC_FORMAT_PRINTF(1, 2)
 void
-malloc_printf(const char *format, ...)
-{
+malloc_printf(const char *format, ...) {
 	va_list ap;
 
 	va_start(ap, format);
diff --git a/src/witness.c b/src/witness.c
index ffc7e247..f8d66217 100644
--- a/src/witness.c
+++ b/src/witness.c
@@ -3,8 +3,7 @@
 
 void
 witness_init(witness_t *witness, const char *name, witness_rank_t rank,
-    witness_comp_t *comp, void *opaque)
-{
+    witness_comp_t *comp, void *opaque) {
 	witness->name = name;
 	witness->rank = rank;
 	witness->comp = comp;
@@ -16,8 +15,7 @@ witness_init(witness_t *witness, const char *name, witness_rank_t rank,
 #define	witness_lock_error JEMALLOC_N(n_witness_lock_error)
 #endif
 void
-witness_lock_error(const witness_list_t *witnesses, const witness_t *witness)
-{
+witness_lock_error(const witness_list_t *witnesses, const witness_t *witness) {
 	witness_t *w;
 
 	malloc_printf("<jemalloc>: Lock rank order reversal:");
@@ -38,8 +36,7 @@ witness_lock_error_t *witness_lock_error = JEMALLOC_N(n_witness_lock_error);
 #define	witness_owner_error JEMALLOC_N(n_witness_owner_error)
 #endif
 void
-witness_owner_error(const witness_t *witness)
-{
+witness_owner_error(const witness_t *witness) {
 	malloc_printf("<jemalloc>: Should own %s(%u)\n", witness->name,
 	    witness->rank);
 	abort();
@@ -55,8 +52,7 @@ witness_owner_error_t *witness_owner_error = JEMALLOC_N(n_witness_owner_error);
 #define	witness_not_owner_error JEMALLOC_N(n_witness_not_owner_error)
 #endif
 void
-witness_not_owner_error(const witness_t *witness)
-{
+witness_not_owner_error(const witness_t *witness) {
 	malloc_printf("<jemalloc>: Should not own %s(%u)\n", witness->name,
 	    witness->rank);
 	abort();
@@ -73,8 +69,7 @@ witness_not_owner_error_t *witness_not_owner_error =
 #define	witness_lockless_error JEMALLOC_N(n_witness_lockless_error)
 #endif
 void
-witness_lockless_error(const witness_list_t *witnesses)
-{
+witness_lockless_error(const witness_list_t *witnesses) {
 	witness_t *w;
 
 	malloc_printf("<jemalloc>: Should not own any locks:");
@@ -92,28 +87,24 @@ witness_lockless_error_t *witness_lockless_error =
 #endif
 
 void
-witnesses_cleanup(tsd_t *tsd)
-{
+witnesses_cleanup(tsd_t *tsd) {
 	witness_assert_lockless(tsd_tsdn(tsd));
 
 	/* Do nothing. */
 }
 
 void
-witness_prefork(tsd_t *tsd)
-{
+witness_prefork(tsd_t *tsd) {
 	tsd_witness_fork_set(tsd, true);
 }
 
 void
-witness_postfork_parent(tsd_t *tsd)
-{
+witness_postfork_parent(tsd_t *tsd) {
 	tsd_witness_fork_set(tsd, false);
 }
 
 void
-witness_postfork_child(tsd_t *tsd)
-{
+witness_postfork_child(tsd_t *tsd) {
 #ifndef JEMALLOC_MUTEX_INIT_CB
 	witness_list_t *witnesses;
 
diff --git a/src/zone.c b/src/zone.c
index c54f4a4f..8e106632 100644
--- a/src/zone.c
+++ b/src/zone.c
@@ -125,8 +125,7 @@ static void	zone_reinit_lock(malloc_zone_t *zone);
  */
 
 static size_t
-zone_size(malloc_zone_t *zone, const void *ptr)
-{
+zone_size(malloc_zone_t *zone, const void *ptr) {
 	/*
 	 * There appear to be places within Darwin (such as setenv(3)) that
 	 * cause calls to this function with pointers that *no* zone owns.  If
@@ -140,20 +139,17 @@ zone_size(malloc_zone_t *zone, const void *ptr)
 }
 
 static void *
-zone_malloc(malloc_zone_t *zone, size_t size)
-{
+zone_malloc(malloc_zone_t *zone, size_t size) {
 	return (je_malloc(size));
 }
 
 static void *
-zone_calloc(malloc_zone_t *zone, size_t num, size_t size)
-{
+zone_calloc(malloc_zone_t *zone, size_t num, size_t size) {
 	return (je_calloc(num, size));
 }
 
 static void *
-zone_valloc(malloc_zone_t *zone, size_t size)
-{
+zone_valloc(malloc_zone_t *zone, size_t size) {
 	void *ret = NULL; /* Assignment avoids useless compiler warning. */
 
 	je_posix_memalign(&ret, PAGE, size);
@@ -162,8 +158,7 @@ zone_valloc(malloc_zone_t *zone, size_t size)
 }
 
 static void
-zone_free(malloc_zone_t *zone, void *ptr)
-{
+zone_free(malloc_zone_t *zone, void *ptr) {
 	if (ivsalloc(tsdn_fetch(), ptr) != 0) {
 		je_free(ptr);
 		return;
@@ -173,17 +168,16 @@ zone_free(malloc_zone_t *zone, void *ptr)
 }
 
 static void *
-zone_realloc(malloc_zone_t *zone, void *ptr, size_t size)
-{
-	if (ivsalloc(tsdn_fetch(), ptr) != 0)
+zone_realloc(malloc_zone_t *zone, void *ptr, size_t size) {
+	if (ivsalloc(tsdn_fetch(), ptr) != 0) {
 		return (je_realloc(ptr, size));
+	}
 
 	return (realloc(ptr, size));
 }
 
 static void *
-zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size)
-{
+zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size) {
 	void *ret = NULL; /* Assignment avoids useless compiler warning. */
 
 	je_posix_memalign(&ret, alignment, size);
@@ -192,8 +186,7 @@ zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size)
 }
 
 static void
-zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size)
-{
+zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size) {
 	size_t alloc_size;
 
 	alloc_size = ivsalloc(tsdn_fetch(), ptr);
@@ -207,16 +200,14 @@ zone_free_definite_size(malloc_zone_t *zone, void *ptr, size_t size)
 }
 
 static void
-zone_destroy(malloc_zone_t *zone)
-{
+zone_destroy(malloc_zone_t *zone) {
 	/* This function should never be called. */
 	not_reached();
 }
 
 static unsigned
 zone_batch_malloc(struct _malloc_zone_t *zone, size_t size, void **results,
-    unsigned num_requested)
-{
+    unsigned num_requested) {
 	unsigned i;
 
 	for (i = 0; i < num_requested; i++) {
@@ -230,8 +221,7 @@ zone_batch_malloc(struct _malloc_zone_t *zone, size_t size, void **results,
 
 static void
 zone_batch_free(struct _malloc_zone_t *zone, void **to_be_freed,
-    unsigned num_to_be_freed)
-{
+    unsigned num_to_be_freed) {
 	unsigned i;
 
 	for (i = 0; i < num_to_be_freed; i++) {
@@ -241,53 +231,47 @@ zone_batch_free(struct _malloc_zone_t *zone, void **to_be_freed,
 }
 
 static size_t
-zone_pressure_relief(struct _malloc_zone_t *zone, size_t goal)
-{
+zone_pressure_relief(struct _malloc_zone_t *zone, size_t goal) {
 	return 0;
 }
 
 static size_t
-zone_good_size(malloc_zone_t *zone, size_t size)
-{
-	if (size == 0)
+zone_good_size(malloc_zone_t *zone, size_t size) {
+	if (size == 0) {
 		size = 1;
+	}
 	return (s2u(size));
 }
 
 static kern_return_t
 zone_enumerator(task_t task, void *data, unsigned type_mask,
     vm_address_t zone_address, memory_reader_t reader,
-    vm_range_recorder_t recorder)
-{
+    vm_range_recorder_t recorder) {
 	return KERN_SUCCESS;
 }
 
 static boolean_t
-zone_check(malloc_zone_t *zone)
-{
+zone_check(malloc_zone_t *zone) {
 	return true;
 }
 
 static void
-zone_print(malloc_zone_t *zone, boolean_t verbose)
-{
+zone_print(malloc_zone_t *zone, boolean_t verbose) {
 }
 
 static void
-zone_log(malloc_zone_t *zone, void *address)
-{
+zone_log(malloc_zone_t *zone, void *address) {
 }
 
 static void
-zone_force_lock(malloc_zone_t *zone)
-{
-	if (isthreaded)
+zone_force_lock(malloc_zone_t *zone) {
+	if (isthreaded) {
 		jemalloc_prefork();
+	}
 }
 
 static void
-zone_force_unlock(malloc_zone_t *zone)
-{
+zone_force_unlock(malloc_zone_t *zone) {
 	/*
 	 * Call jemalloc_postfork_child() rather than
 	 * jemalloc_postfork_parent(), because this function is executed by both
@@ -295,13 +279,13 @@ zone_force_unlock(malloc_zone_t *zone)
 	 * reinitialized, but the child cannot unlock mutexes that were locked
 	 * by the parent.
 	 */
-	if (isthreaded)
+	if (isthreaded) {
 		jemalloc_postfork_child();
+	}
 }
 
 static void
-zone_statistics(malloc_zone_t *zone, malloc_statistics_t *stats)
-{
+zone_statistics(malloc_zone_t *zone, malloc_statistics_t *stats) {
 	/* We make no effort to actually fill the values */
 	stats->blocks_in_use = 0;
 	stats->size_in_use = 0;
@@ -310,23 +294,20 @@ zone_statistics(malloc_zone_t *zone, malloc_statistics_t *stats)
 }
 
 static boolean_t
-zone_locked(malloc_zone_t *zone)
-{
+zone_locked(malloc_zone_t *zone) {
 	/* Pretend no lock is being held */
 	return false;
 }
 
 static void
-zone_reinit_lock(malloc_zone_t *zone)
-{
+zone_reinit_lock(malloc_zone_t *zone) {
 	/* As of OSX 10.12, this function is only used when force_unlock would
 	 * be used if the zone version were < 9. So just use force_unlock. */
 	zone_force_unlock(zone);
 }
 
 static void
-zone_init(void)
-{
+zone_init(void) {
 	jemalloc_zone.size = zone_size;
 	jemalloc_zone.malloc = zone_malloc;
 	jemalloc_zone.calloc = zone_calloc;
@@ -364,8 +345,7 @@ zone_init(void)
 }
 
 static malloc_zone_t *
-zone_default_get(void)
-{
+zone_default_get(void) {
 	malloc_zone_t **zones = NULL;
 	unsigned int num_zones = 0;
 
@@ -387,16 +367,16 @@ zone_default_get(void)
 		num_zones = 0;
 	}
 
-	if (num_zones)
+	if (num_zones) {
 		return (zones[0]);
+	}
 
 	return (malloc_default_zone());
 }
 
 /* As written, this function can only promote jemalloc_zone. */
 static void
-zone_promote(void)
-{
+zone_promote(void) {
 	malloc_zone_t *zone;
 
 	do {
@@ -433,16 +413,16 @@ zone_promote(void)
 
 JEMALLOC_ATTR(constructor)
 void
-zone_register(void)
-{
+zone_register(void) {
 	/*
 	 * If something else replaced the system default zone allocator, don't
 	 * register jemalloc's.
 	 */
 	default_zone = zone_default_get();
 	if (!default_zone->zone_name || strcmp(default_zone->zone_name,
-	    "DefaultMallocZone") != 0)
+	    "DefaultMallocZone") != 0) {
 		return;
+	}
 
 	/*
 	 * The default purgeable zone is created lazily by OSX's libc.  It uses
diff --git a/test/include/test/SFMT.h b/test/include/test/SFMT.h
index 09c1607d..4ad7484a 100644
--- a/test/include/test/SFMT.h
+++ b/test/include/test/SFMT.h
@@ -97,75 +97,65 @@ double genrand_res53_mix(sfmt_t *ctx);
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(SFMT_C_))
 /* These real versions are due to Isaku Wada */
 /** generates a random number on [0,1]-real-interval */
-JEMALLOC_INLINE double to_real1(uint32_t v)
-{
+JEMALLOC_INLINE double to_real1(uint32_t v) {
     return v * (1.0/4294967295.0); 
     /* divided by 2^32-1 */ 
 }
 
 /** generates a random number on [0,1]-real-interval */
-JEMALLOC_INLINE double genrand_real1(sfmt_t *ctx)
-{
+JEMALLOC_INLINE double genrand_real1(sfmt_t *ctx) {
     return to_real1(gen_rand32(ctx));
 }
 
 /** generates a random number on [0,1)-real-interval */
-JEMALLOC_INLINE double to_real2(uint32_t v)
-{
+JEMALLOC_INLINE double to_real2(uint32_t v) {
     return v * (1.0/4294967296.0); 
     /* divided by 2^32 */
 }
 
 /** generates a random number on [0,1)-real-interval */
-JEMALLOC_INLINE double genrand_real2(sfmt_t *ctx)
-{
+JEMALLOC_INLINE double genrand_real2(sfmt_t *ctx) {
     return to_real2(gen_rand32(ctx));
 }
 
 /** generates a random number on (0,1)-real-interval */
-JEMALLOC_INLINE double to_real3(uint32_t v)
-{
+JEMALLOC_INLINE double to_real3(uint32_t v) {
     return (((double)v) + 0.5)*(1.0/4294967296.0); 
     /* divided by 2^32 */
 }
 
 /** generates a random number on (0,1)-real-interval */
-JEMALLOC_INLINE double genrand_real3(sfmt_t *ctx)
-{
+JEMALLOC_INLINE double genrand_real3(sfmt_t *ctx) {
     return to_real3(gen_rand32(ctx));
 }
 /** These real versions are due to Isaku Wada */
 
 /** generates a random number on [0,1) with 53-bit resolution*/
-JEMALLOC_INLINE double to_res53(uint64_t v) 
-{ 
+JEMALLOC_INLINE double to_res53(uint64_t v) {
     return v * (1.0/18446744073709551616.0L);
 }
 
 /** generates a random number on [0,1) with 53-bit resolution from two
  * 32 bit integers */
-JEMALLOC_INLINE double to_res53_mix(uint32_t x, uint32_t y) 
-{ 
+JEMALLOC_INLINE double to_res53_mix(uint32_t x, uint32_t y) {
     return to_res53(x | ((uint64_t)y << 32));
 }
 
 /** generates a random number on [0,1) with 53-bit resolution
  */
-JEMALLOC_INLINE double genrand_res53(sfmt_t *ctx) 
-{ 
+JEMALLOC_INLINE double genrand_res53(sfmt_t *ctx) {
     return to_res53(gen_rand64(ctx));
-} 
+}
 
 /** generates a random number on [0,1) with 53-bit resolution
     using 32bit integer.
  */
-JEMALLOC_INLINE double genrand_res53_mix(sfmt_t *ctx) 
-{ 
+JEMALLOC_INLINE double genrand_res53_mix(sfmt_t *ctx) {
     uint32_t x, y;
 
     x = gen_rand32(ctx);
     y = gen_rand32(ctx);
     return to_res53_mix(x, y);
-} 
+}
 #endif
 #endif
diff --git a/test/include/test/btalloc.h b/test/include/test/btalloc.h
index c3f9d4df..98366afe 100644
--- a/test/include/test/btalloc.h
+++ b/test/include/test/btalloc.h
@@ -8,13 +8,12 @@ btalloc_n_proto(1)
 
 #define	btalloc_n_gen(n)						\
 void *									\
-btalloc_##n(size_t size, unsigned bits)					\
-{									\
+btalloc_##n(size_t size, unsigned bits) {				\
 	void *p;							\
 									\
-	if (bits == 0)							\
+	if (bits == 0) {						\
 		p = mallocx(size, 0);					\
-	else {								\
+	} else {							\
 		switch (bits & 0x1U) {					\
 		case 0:							\
 			p = (btalloc_0(size, bits >> 1));		\
diff --git a/test/include/test/extent_hooks.h b/test/include/test/extent_hooks.h
index f50747d0..a664c433 100644
--- a/test/include/test/extent_hooks.h
+++ b/test/include/test/extent_hooks.h
@@ -73,8 +73,7 @@ static bool did_merge;
 
 static void *
 extent_alloc_hook(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
-    size_t alignment, bool *zero, bool *commit, unsigned arena_ind)
-{
+    size_t alignment, bool *zero, bool *commit, unsigned arena_ind) {
 	void *ret;
 
 	TRACE_HOOK("%s(extent_hooks=%p, new_addr=%p, size=%zu, alignment=%zu, "
@@ -86,8 +85,9 @@ extent_alloc_hook(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
 	assert_ptr_eq(extent_hooks->alloc, extent_alloc_hook,
 	    "Wrong hook function");
 	called_alloc = true;
-	if (!try_alloc)
+	if (!try_alloc) {
 		return (NULL);
+	}
 	ret = default_hooks->alloc(default_hooks, new_addr, size, alignment,
 	    zero, commit, 0);
 	did_alloc = (ret != NULL);
@@ -96,8 +96,7 @@ extent_alloc_hook(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
 
 static bool
 extent_dalloc_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    bool committed, unsigned arena_ind)
-{
+    bool committed, unsigned arena_ind) {
 	bool err;
 
 	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, committed=%s, "
@@ -108,8 +107,9 @@ extent_dalloc_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	assert_ptr_eq(extent_hooks->dalloc, extent_dalloc_hook,
 	    "Wrong hook function");
 	called_dalloc = true;
-	if (!try_dalloc)
+	if (!try_dalloc) {
 		return (true);
+	}
 	err = default_hooks->dalloc(default_hooks, addr, size, committed, 0);
 	did_dalloc = !err;
 	return (err);
@@ -117,8 +117,7 @@ extent_dalloc_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 
 static bool
 extent_commit_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    size_t offset, size_t length, unsigned arena_ind)
-{
+    size_t offset, size_t length, unsigned arena_ind) {
 	bool err;
 
 	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
@@ -129,8 +128,9 @@ extent_commit_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	assert_ptr_eq(extent_hooks->commit, extent_commit_hook,
 	    "Wrong hook function");
 	called_commit = true;
-	if (!try_commit)
+	if (!try_commit) {
 		return (true);
+	}
 	err = default_hooks->commit(default_hooks, addr, size, offset, length,
 	    0);
 	did_commit = !err;
@@ -139,8 +139,7 @@ extent_commit_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 
 static bool
 extent_decommit_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    size_t offset, size_t length, unsigned arena_ind)
-{
+    size_t offset, size_t length, unsigned arena_ind) {
 	bool err;
 
 	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
@@ -151,8 +150,9 @@ extent_decommit_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	assert_ptr_eq(extent_hooks->decommit, extent_decommit_hook,
 	    "Wrong hook function");
 	called_decommit = true;
-	if (!try_decommit)
+	if (!try_decommit) {
 		return (true);
+	}
 	err = default_hooks->decommit(default_hooks, addr, size, offset, length,
 	    0);
 	did_decommit = !err;
@@ -161,8 +161,7 @@ extent_decommit_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 
 static bool
 extent_purge_lazy_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    size_t offset, size_t length, unsigned arena_ind)
-{
+    size_t offset, size_t length, unsigned arena_ind) {
 	bool err;
 
 	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
@@ -173,8 +172,9 @@ extent_purge_lazy_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	assert_ptr_eq(extent_hooks->purge_lazy, extent_purge_lazy_hook,
 	    "Wrong hook function");
 	called_purge_lazy = true;
-	if (!try_purge_lazy)
+	if (!try_purge_lazy) {
 		return (true);
+	}
 	err = default_hooks->purge_lazy == NULL ||
 	    default_hooks->purge_lazy(default_hooks, addr, size, offset, length,
 	    0);
@@ -184,8 +184,7 @@ extent_purge_lazy_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 
 static bool
 extent_purge_forced_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    size_t offset, size_t length, unsigned arena_ind)
-{
+    size_t offset, size_t length, unsigned arena_ind) {
 	bool err;
 
 	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
@@ -196,8 +195,9 @@ extent_purge_forced_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	assert_ptr_eq(extent_hooks->purge_forced, extent_purge_forced_hook,
 	    "Wrong hook function");
 	called_purge_forced = true;
-	if (!try_purge_forced)
+	if (!try_purge_forced) {
 		return (true);
+	}
 	err = default_hooks->purge_forced == NULL ||
 	    default_hooks->purge_forced(default_hooks, addr, size, offset,
 	    length, 0);
@@ -207,8 +207,7 @@ extent_purge_forced_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 
 static bool
 extent_split_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    size_t size_a, size_t size_b, bool committed, unsigned arena_ind)
-{
+    size_t size_a, size_t size_b, bool committed, unsigned arena_ind) {
 	bool err;
 
 	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, size_a=%zu, "
@@ -220,8 +219,9 @@ extent_split_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	assert_ptr_eq(extent_hooks->split, extent_split_hook,
 	    "Wrong hook function");
 	called_split = true;
-	if (!try_split)
+	if (!try_split) {
 		return (true);
+	}
 	err = (default_hooks->split == NULL ||
 	    default_hooks->split(default_hooks, addr, size, size_a, size_b,
 	    committed, 0));
@@ -231,8 +231,7 @@ extent_split_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 
 static bool
 extent_merge_hook(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
-    void *addr_b, size_t size_b, bool committed, unsigned arena_ind)
-{
+    void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
 	bool err;
 
 	TRACE_HOOK("%s(extent_hooks=%p, addr_a=%p, size_a=%zu, addr_b=%p "
@@ -244,8 +243,9 @@ extent_merge_hook(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
 	assert_ptr_eq(extent_hooks->merge, extent_merge_hook,
 	    "Wrong hook function");
 	called_merge = true;
-	if (!try_merge)
+	if (!try_merge) {
 		return (true);
+	}
 	err = (default_hooks->merge == NULL ||
 	    default_hooks->merge(default_hooks, addr_a, size_a, addr_b, size_b,
 	    committed, 0));
@@ -254,8 +254,7 @@ extent_merge_hook(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
 }
 
 static void
-extent_hooks_prep(void)
-{
+extent_hooks_prep(void) {
 	size_t sz;
 
 	sz = sizeof(default_hooks);
diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in
index 2dd0cdea..a0b94747 100644
--- a/test/include/test/jemalloc_test.h.in
+++ b/test/include/test/jemalloc_test.h.in
@@ -159,8 +159,9 @@ static const bool config_debug =
 } while (0)
 
 #define	assert_not_implemented(e) do {					\
-	if (!(e))							\
+	if (!(e)) {							\
 		not_implemented();					\
+	}								\
 } while (0)
 
 #ifdef __cplusplus
diff --git a/test/include/test/math.h b/test/include/test/math.h
index 1728d60f..08be69f8 100644
--- a/test/include/test/math.h
+++ b/test/include/test/math.h
@@ -16,8 +16,7 @@ double	pt_gamma(double p, double shape, double scale, double ln_gamma_shape);
  *   [S14].  Communications of the ACM 9(9):684.
  */
 JEMALLOC_INLINE double
-ln_gamma(double x)
-{
+ln_gamma(double x) {
 	double f, z;
 
 	assert(x > 0.0);
@@ -31,8 +30,9 @@ ln_gamma(double x)
 		}
 		x = z;
 		f = -log(f);
-	} else
+	} else {
 		f = 0.0;
+	}
 
 	z = 1.0 / (x * x);
 
@@ -51,8 +51,7 @@ ln_gamma(double x)
  *   Applied Statistics 19:285-287.
  */
 JEMALLOC_INLINE double
-i_gamma(double x, double p, double ln_gamma_p)
-{
+i_gamma(double x, double p, double ln_gamma_p) {
 	double acu, factor, oflo, gin, term, rn, a, b, an, dif;
 	double pn[6];
 	unsigned i;
@@ -60,8 +59,9 @@ i_gamma(double x, double p, double ln_gamma_p)
 	assert(p > 0.0);
 	assert(x >= 0.0);
 
-	if (x == 0.0)
+	if (x == 0.0) {
 		return (0.0);
+	}
 
 	acu = 1.0e-10;
 	oflo = 1.0e30;
@@ -99,8 +99,9 @@ i_gamma(double x, double p, double ln_gamma_p)
 			b += 2.0;
 			term += 1.0;
 			an = a * term;
-			for (i = 0; i < 2; i++)
+			for (i = 0; i < 2; i++) {
 				pn[i+4] = b * pn[i+2] - an * pn[i];
+			}
 			if (pn[5] != 0.0) {
 				rn = pn[4] / pn[5];
 				dif = fabs(gin - rn);
@@ -110,12 +111,14 @@ i_gamma(double x, double p, double ln_gamma_p)
 				}
 				gin = rn;
 			}
-			for (i = 0; i < 4; i++)
+			for (i = 0; i < 4; i++) {
 				pn[i] = pn[i+2];
+			}
 
 			if (fabs(pn[4]) >= oflo) {
-				for (i = 0; i < 4; i++)
+				for (i = 0; i < 4; i++) {
 					pn[i] /= oflo;
+				}
 			}
 		}
 	}
@@ -132,8 +135,7 @@ i_gamma(double x, double p, double ln_gamma_p)
  *   distribution.  Applied Statistics 37(3):477-484.
  */
 JEMALLOC_INLINE double
-pt_norm(double p)
-{
+pt_norm(double p) {
 	double q, r, ret;
 
 	assert(p > 0.0 && p < 1.0);
@@ -153,10 +155,11 @@ pt_norm(double p)
 		    r + 6.8718700749205790830e2) * r + 4.2313330701600911252e1)
 		    * r + 1.0));
 	} else {
-		if (q < 0.0)
+		if (q < 0.0) {
 			r = p;
-		else
+		} else {
 			r = 1.0 - p;
+		}
 		assert(r > 0.0);
 
 		r = sqrt(-log(r));
@@ -198,8 +201,9 @@ pt_norm(double p)
 			    5.99832206555887937690e-1)
 			    * r + 1.0));
 		}
-		if (q < 0.0)
+		if (q < 0.0) {
 			ret = -ret;
+		}
 		return (ret);
 	}
 }
@@ -219,8 +223,7 @@ pt_norm(double p)
  *   points of the Chi^2 distribution.  Applied Statistics 40(1):233-235.
  */
 JEMALLOC_INLINE double
-pt_chi2(double p, double df, double ln_gamma_df_2)
-{
+pt_chi2(double p, double df, double ln_gamma_df_2) {
 	double e, aa, xx, c, ch, a, q, p1, p2, t, x, b, s1, s2, s3, s4, s5, s6;
 	unsigned i;
 
@@ -236,8 +239,9 @@ pt_chi2(double p, double df, double ln_gamma_df_2)
 	if (df < -1.24 * log(p)) {
 		/* Starting approximation for small Chi^2. */
 		ch = pow(p * xx * exp(ln_gamma_df_2 + xx * aa), 1.0 / xx);
-		if (ch - e < 0.0)
+		if (ch - e < 0.0) {
 			return (ch);
+		}
 	} else {
 		if (df > 0.32) {
 			x = pt_norm(p);
@@ -263,8 +267,9 @@ pt_chi2(double p, double df, double ln_gamma_df_2)
 				    * (13.32 + 3.0 * ch)) / p2;
 				ch -= (1.0 - exp(a + ln_gamma_df_2 + 0.5 * ch +
 				    c * aa) * p2 / p1) / t;
-				if (fabs(q / ch - 1.0) - 0.01 <= 0.0)
+				if (fabs(q / ch - 1.0) - 0.01 <= 0.0) {
 					break;
+				}
 			}
 		}
 	}
@@ -273,8 +278,9 @@ pt_chi2(double p, double df, double ln_gamma_df_2)
 		/* Calculation of seven-term Taylor series. */
 		q = ch;
 		p1 = 0.5 * ch;
-		if (p1 < 0.0)
+		if (p1 < 0.0) {
 			return (-1.0);
+		}
 		p2 = p - i_gamma(p1, xx, ln_gamma_df_2);
 		t = p2 * exp(xx * aa + ln_gamma_df_2 + p1 - c * log(ch));
 		b = t / ch;
@@ -290,8 +296,9 @@ pt_chi2(double p, double df, double ln_gamma_df_2)
 		s6 = (120.0 + c * (346.0 + 127.0 * c)) / 5040.0;
 		ch += t * (1.0 + 0.5 * t * s1 - b * c * (s1 - b * (s2 - b * (s3
 		    - b * (s4 - b * (s5 - b * s6))))));
-		if (fabs(q / ch - 1.0) <= e)
+		if (fabs(q / ch - 1.0) <= e) {
 			break;
+		}
 	}
 
 	return (ch);
@@ -303,8 +310,7 @@ pt_chi2(double p, double df, double ln_gamma_df_2)
  * p.
  */
 JEMALLOC_INLINE double
-pt_gamma(double p, double shape, double scale, double ln_gamma_shape)
-{
+pt_gamma(double p, double shape, double scale, double ln_gamma_shape) {
 	return (pt_chi2(p, shape * 2.0, ln_gamma_shape) * 0.5 * scale);
 }
 #endif
diff --git a/test/include/test/mq.h b/test/include/test/mq.h
index a974eb90..fd66de95 100644
--- a/test/include/test/mq.h
+++ b/test/include/test/mq.h
@@ -37,20 +37,19 @@ typedef struct {							\
 a_attr bool								\
 a_prefix##init(a_mq_type *mq) {						\
 									\
-	if (mtx_init(&mq->lock))					\
+	if (mtx_init(&mq->lock)) {					\
 		return (true);						\
+	}								\
 	ql_new(&mq->msgs);						\
 	mq->count = 0;							\
 	return (false);							\
 }									\
 a_attr void								\
-a_prefix##fini(a_mq_type *mq)						\
-{									\
+a_prefix##fini(a_mq_type *mq) {						\
 	mtx_fini(&mq->lock);						\
 }									\
 a_attr unsigned								\
-a_prefix##count(a_mq_type *mq)						\
-{									\
+a_prefix##count(a_mq_type *mq) {					\
 	unsigned count;							\
 									\
 	mtx_lock(&mq->lock);						\
@@ -59,8 +58,7 @@ a_prefix##count(a_mq_type *mq)						\
 	return (count);							\
 }									\
 a_attr a_mq_msg_type *							\
-a_prefix##tryget(a_mq_type *mq)						\
-{									\
+a_prefix##tryget(a_mq_type *mq) {					\
 	a_mq_msg_type *msg;						\
 									\
 	mtx_lock(&mq->lock);						\
@@ -73,32 +71,33 @@ a_prefix##tryget(a_mq_type *mq)						\
 	return (msg);							\
 }									\
 a_attr a_mq_msg_type *							\
-a_prefix##get(a_mq_type *mq)						\
-{									\
+a_prefix##get(a_mq_type *mq) {						\
 	a_mq_msg_type *msg;						\
 	unsigned ns;							\
 									\
 	msg = a_prefix##tryget(mq);					\
-	if (msg != NULL)						\
+	if (msg != NULL) {						\
 		return (msg);						\
+	}								\
 									\
 	ns = 1;								\
 	while (true) {							\
 		mq_nanosleep(ns);					\
 		msg = a_prefix##tryget(mq);				\
-		if (msg != NULL)					\
+		if (msg != NULL) {					\
 			return (msg);					\
+		}							\
 		if (ns < 1000*1000*1000) {				\
 			/* Double sleep time, up to max 1 second. */	\
 			ns <<= 1;					\
-			if (ns > 1000*1000*1000)			\
+			if (ns > 1000*1000*1000) {			\
 				ns = 1000*1000*1000;			\
+			}						\
 		}							\
 	}								\
 }									\
 a_attr void								\
-a_prefix##put(a_mq_type *mq, a_mq_msg_type *msg)			\
-{									\
+a_prefix##put(a_mq_type *mq, a_mq_msg_type *msg) {			\
 									\
 	mtx_lock(&mq->lock);						\
 	ql_elm_new(msg, a_field);					\
diff --git a/test/include/test/test.h b/test/include/test/test.h
index 8c69fc2e..a1b6f72a 100644
--- a/test/include/test/test.h
+++ b/test/include/test/test.h
@@ -298,8 +298,7 @@ typedef void (test_t)(void);
 
 #define	TEST_BEGIN(f)							\
 static void								\
-f(void)									\
-{									\
+f(void) {								\
 	p_test_init(#f);
 
 #define	TEST_END							\
diff --git a/test/integration/MALLOCX_ARENA.c b/test/integration/MALLOCX_ARENA.c
index 1d9e423e..f706e5a5 100644
--- a/test/integration/MALLOCX_ARENA.c
+++ b/test/integration/MALLOCX_ARENA.c
@@ -11,8 +11,7 @@ static bool have_dss =
     ;
 
 void *
-thd_start(void *arg)
-{
+thd_start(void *arg) {
 	unsigned thread_ind = (unsigned)(uintptr_t)arg;
 	unsigned arena_ind;
 	void *p;
@@ -45,8 +44,7 @@ thd_start(void *arg)
 	return (NULL);
 }
 
-TEST_BEGIN(test_MALLOCX_ARENA)
-{
+TEST_BEGIN(test_MALLOCX_ARENA) {
 	thd_t thds[NTHREADS];
 	unsigned i;
 
@@ -55,14 +53,14 @@ TEST_BEGIN(test_MALLOCX_ARENA)
 		    (void *)(uintptr_t)i);
 	}
 
-	for (i = 0; i < NTHREADS; i++)
+	for (i = 0; i < NTHREADS; i++) {
 		thd_join(thds[i], NULL);
+	}
 }
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_MALLOCX_ARENA));
 }
diff --git a/test/integration/aligned_alloc.c b/test/integration/aligned_alloc.c
index 52b69acb..8a3ad6b9 100644
--- a/test/integration/aligned_alloc.c
+++ b/test/integration/aligned_alloc.c
@@ -8,14 +8,12 @@
  * potential OOM on e.g. 32-bit Windows.
  */
 static void
-purge(void)
-{
+purge(void) {
 	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl error");
 }
 
-TEST_BEGIN(test_alignment_errors)
-{
+TEST_BEGIN(test_alignment_errors) {
 	size_t alignment;
 	void *p;
 
@@ -36,8 +34,7 @@ TEST_BEGIN(test_alignment_errors)
 }
 TEST_END
 
-TEST_BEGIN(test_oom_errors)
-{
+TEST_BEGIN(test_oom_errors) {
 	size_t alignment, size;
 	void *p;
 
@@ -81,15 +78,15 @@ TEST_BEGIN(test_oom_errors)
 }
 TEST_END
 
-TEST_BEGIN(test_alignment_and_size)
-{
+TEST_BEGIN(test_alignment_and_size) {
 #define	NITER 4
 	size_t alignment, size, total;
 	unsigned i;
 	void *ps[NITER];
 
-	for (i = 0; i < NITER; i++)
+	for (i = 0; i < NITER; i++) {
 		ps[i] = NULL;
+	}
 
 	for (alignment = 8;
 	    alignment <= MAXALIGN;
@@ -110,8 +107,9 @@ TEST_BEGIN(test_alignment_and_size)
 					    alignment, size, size, buf);
 				}
 				total += malloc_usable_size(ps[i]);
-				if (total >= (MAXALIGN << 1))
+				if (total >= (MAXALIGN << 1)) {
 					break;
+				}
 			}
 			for (i = 0; i < NITER; i++) {
 				if (ps[i] != NULL) {
@@ -127,8 +125,7 @@ TEST_BEGIN(test_alignment_and_size)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_alignment_errors,
 	    test_oom_errors,
diff --git a/test/integration/allocated.c b/test/integration/allocated.c
index 7570c52f..555d40a9 100644
--- a/test/integration/allocated.c
+++ b/test/integration/allocated.c
@@ -9,8 +9,7 @@ static const bool config_stats =
     ;
 
 void *
-thd_start(void *arg)
-{
+thd_start(void *arg) {
 	int err;
 	void *p;
 	uint64_t a0, a1, d0, d1;
@@ -19,15 +18,17 @@ thd_start(void *arg)
 
 	sz = sizeof(a0);
 	if ((err = mallctl("thread.allocated", (void *)&a0, &sz, NULL, 0))) {
-		if (err == ENOENT)
+		if (err == ENOENT) {
 			goto label_ENOENT;
+		}
 		test_fail("%s(): Error in mallctl(): %s", __func__,
 		    strerror(err));
 	}
 	sz = sizeof(ap0);
 	if ((err = mallctl("thread.allocatedp", (void *)&ap0, &sz, NULL, 0))) {
-		if (err == ENOENT)
+		if (err == ENOENT) {
 			goto label_ENOENT;
+		}
 		test_fail("%s(): Error in mallctl(): %s", __func__,
 		    strerror(err));
 	}
@@ -37,16 +38,18 @@ thd_start(void *arg)
 
 	sz = sizeof(d0);
 	if ((err = mallctl("thread.deallocated", (void *)&d0, &sz, NULL, 0))) {
-		if (err == ENOENT)
+		if (err == ENOENT) {
 			goto label_ENOENT;
+		}
 		test_fail("%s(): Error in mallctl(): %s", __func__,
 		    strerror(err));
 	}
 	sz = sizeof(dp0);
 	if ((err = mallctl("thread.deallocatedp", (void *)&dp0, &sz, NULL,
 	    0))) {
-		if (err == ENOENT)
+		if (err == ENOENT) {
 			goto label_ENOENT;
+		}
 		test_fail("%s(): Error in mallctl(): %s", __func__,
 		    strerror(err));
 	}
@@ -96,14 +99,12 @@ label_ENOENT:
 	return (NULL);
 }
 
-TEST_BEGIN(test_main_thread)
-{
+TEST_BEGIN(test_main_thread) {
 	thd_start(NULL);
 }
 TEST_END
 
-TEST_BEGIN(test_subthread)
-{
+TEST_BEGIN(test_subthread) {
 	thd_t thd;
 
 	thd_create(&thd, thd_start, NULL);
@@ -112,8 +113,7 @@ TEST_BEGIN(test_subthread)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	/* Run tests multiple times to check for bad interactions. */
 	return (test(
 	    test_main_thread,
diff --git a/test/integration/cpp/basic.cpp b/test/integration/cpp/basic.cpp
index b208e1d1..fe8874fa 100644
--- a/test/integration/cpp/basic.cpp
+++ b/test/integration/cpp/basic.cpp
@@ -1,8 +1,7 @@
 #include <memory>
 #include "test/jemalloc_test.h"
 
-TEST_BEGIN(test_basic)
-{
+TEST_BEGIN(test_basic) {
 	auto foo = new long(4);
 	assert_ptr_not_null(foo, "Unexpected new[] failure");
 	delete foo;
@@ -20,8 +19,7 @@ TEST_BEGIN(test_basic)
 TEST_END
 
 int
-main()
-{
+main() {
 	return (test(
 	    test_basic));
 }
diff --git a/test/integration/extent.c b/test/integration/extent.c
index 30849b0c..d12c123c 100644
--- a/test/integration/extent.c
+++ b/test/integration/extent.c
@@ -7,8 +7,7 @@ const char *malloc_conf = "junk:false";
 #include "test/extent_hooks.h"
 
 static void
-test_extent_body(unsigned arena_ind)
-{
+test_extent_body(unsigned arena_ind) {
 	void *p;
 	size_t large0, large1, large2, sz;
 	size_t purge_mib[3];
@@ -67,15 +66,17 @@ test_extent_body(unsigned arena_ind)
 	xallocx_success_b = (xallocx(p, large0, 0, flags) == large0);
 	assert_d_eq(mallctlbymib(purge_mib, purge_miblen, NULL, NULL, NULL, 0),
 	    0, "Unexpected arena.%u.purge error", arena_ind);
-	if (xallocx_success_b)
+	if (xallocx_success_b) {
 		assert_true(did_split, "Expected split");
+	}
 	xallocx_success_c = (xallocx(p, large0 * 2, 0, flags) == large0 * 2);
 	if (did_split) {
 		assert_b_eq(did_decommit, did_commit,
 		    "Expected decommit/commit match");
 	}
-	if (xallocx_success_b && xallocx_success_c)
+	if (xallocx_success_b && xallocx_success_c) {
 		assert_true(did_merge, "Expected merge");
+	}
 	dallocx(p, flags);
 	try_dalloc = true;
 	try_decommit = false;
@@ -86,8 +87,7 @@ test_extent_body(unsigned arena_ind)
 	dallocx(p, flags);
 }
 
-TEST_BEGIN(test_extent_manual_hook)
-{
+TEST_BEGIN(test_extent_manual_hook) {
 	unsigned arena_ind;
 	size_t old_size, new_size, sz;
 	size_t hooks_mib[3];
@@ -155,8 +155,7 @@ TEST_BEGIN(test_extent_manual_hook)
 }
 TEST_END
 
-TEST_BEGIN(test_extent_auto_hook)
-{
+TEST_BEGIN(test_extent_auto_hook) {
 	unsigned arena_ind;
 	size_t new_size, sz;
 	extent_hooks_t *new_hooks;
@@ -174,8 +173,7 @@ TEST_BEGIN(test_extent_auto_hook)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_extent_manual_hook,
 	    test_extent_auto_hook));
diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index 7617b1b7..ec04c399 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -5,8 +5,7 @@ const char *malloc_conf = "junk:false";
 #endif
 
 static unsigned
-get_nsizes_impl(const char *cmd)
-{
+get_nsizes_impl(const char *cmd) {
 	unsigned ret;
 	size_t z;
 
@@ -18,14 +17,12 @@ get_nsizes_impl(const char *cmd)
 }
 
 static unsigned
-get_nlarge(void)
-{
+get_nlarge(void) {
 	return (get_nsizes_impl("arenas.nlextents"));
 }
 
 static size_t
-get_size_impl(const char *cmd, size_t ind)
-{
+get_size_impl(const char *cmd, size_t ind) {
 	size_t ret;
 	size_t z;
 	size_t mib[4];
@@ -43,8 +40,7 @@ get_size_impl(const char *cmd, size_t ind)
 }
 
 static size_t
-get_large_size(size_t ind)
-{
+get_large_size(size_t ind) {
 	return (get_size_impl("arenas.lextent.0.size", ind));
 }
 
@@ -54,14 +50,12 @@ get_large_size(size_t ind)
  * potential OOM on e.g. 32-bit Windows.
  */
 static void
-purge(void)
-{
+purge(void) {
 	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl error");
 }
 
-TEST_BEGIN(test_overflow)
-{
+TEST_BEGIN(test_overflow) {
 	size_t largemax;
 
 	largemax = get_large_size(get_nlarge()-1);
@@ -81,8 +75,7 @@ TEST_BEGIN(test_overflow)
 }
 TEST_END
 
-TEST_BEGIN(test_oom)
-{
+TEST_BEGIN(test_oom) {
 	size_t largemax;
 	bool oom;
 	void *ptrs[3];
@@ -96,15 +89,17 @@ TEST_BEGIN(test_oom)
 	oom = false;
 	for (i = 0; i < sizeof(ptrs) / sizeof(void *); i++) {
 		ptrs[i] = mallocx(largemax, 0);
-		if (ptrs[i] == NULL)
+		if (ptrs[i] == NULL) {
 			oom = true;
+		}
 	}
 	assert_true(oom,
 	    "Expected OOM during series of calls to mallocx(size=%zu, 0)",
 	    largemax);
 	for (i = 0; i < sizeof(ptrs) / sizeof(void *); i++) {
-		if (ptrs[i] != NULL)
+		if (ptrs[i] != NULL) {
 			dallocx(ptrs[i], 0);
+		}
 	}
 	purge();
 
@@ -122,8 +117,7 @@ TEST_BEGIN(test_oom)
 }
 TEST_END
 
-TEST_BEGIN(test_basic)
-{
+TEST_BEGIN(test_basic) {
 #define	MAXSZ (((size_t)1) << 23)
 	size_t sz;
 
@@ -160,16 +154,16 @@ TEST_BEGIN(test_basic)
 }
 TEST_END
 
-TEST_BEGIN(test_alignment_and_size)
-{
+TEST_BEGIN(test_alignment_and_size) {
 #define	MAXALIGN (((size_t)1) << 23)
 #define	NITER 4
 	size_t nsz, rsz, sz, alignment, total;
 	unsigned i;
 	void *ps[NITER];
 
-	for (i = 0; i < NITER; i++)
+	for (i = 0; i < NITER; i++) {
 		ps[i] = NULL;
+	}
 
 	for (alignment = 8;
 	    alignment <= MAXALIGN;
@@ -202,8 +196,9 @@ TEST_BEGIN(test_alignment_and_size)
 				    " alignment=%zu, size=%zu", ps[i],
 				    alignment, sz);
 				total += rsz;
-				if (total >= (MAXALIGN << 1))
+				if (total >= (MAXALIGN << 1)) {
 					break;
+				}
 			}
 			for (i = 0; i < NITER; i++) {
 				if (ps[i] != NULL) {
@@ -220,8 +215,7 @@ TEST_BEGIN(test_alignment_and_size)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_overflow,
 	    test_oom,
diff --git a/test/integration/overflow.c b/test/integration/overflow.c
index ad867e7c..a7f4b515 100644
--- a/test/integration/overflow.c
+++ b/test/integration/overflow.c
@@ -1,7 +1,6 @@
 #include "test/jemalloc_test.h"
 
-TEST_BEGIN(test_overflow)
-{
+TEST_BEGIN(test_overflow) {
 	unsigned nlextents;
 	size_t mib[4];
 	size_t sz, miblen, max_size_class;
@@ -41,8 +40,7 @@ TEST_BEGIN(test_overflow)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_overflow));
 }
diff --git a/test/integration/posix_memalign.c b/test/integration/posix_memalign.c
index dace10f7..6bbf1839 100644
--- a/test/integration/posix_memalign.c
+++ b/test/integration/posix_memalign.c
@@ -8,14 +8,12 @@
  * potential OOM on e.g. 32-bit Windows.
  */
 static void
-purge(void)
-{
+purge(void) {
 	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl error");
 }
 
-TEST_BEGIN(test_alignment_errors)
-{
+TEST_BEGIN(test_alignment_errors) {
 	size_t alignment;
 	void *p;
 
@@ -34,8 +32,7 @@ TEST_BEGIN(test_alignment_errors)
 }
 TEST_END
 
-TEST_BEGIN(test_oom_errors)
-{
+TEST_BEGIN(test_oom_errors) {
 	size_t alignment, size;
 	void *p;
 
@@ -73,16 +70,16 @@ TEST_BEGIN(test_oom_errors)
 }
 TEST_END
 
-TEST_BEGIN(test_alignment_and_size)
-{
+TEST_BEGIN(test_alignment_and_size) {
 #define	NITER 4
 	size_t alignment, size, total;
 	unsigned i;
 	int err;
 	void *ps[NITER];
 
-	for (i = 0; i < NITER; i++)
+	for (i = 0; i < NITER; i++) {
 		ps[i] = NULL;
+	}
 
 	for (alignment = 8;
 	    alignment <= MAXALIGN;
@@ -104,8 +101,9 @@ TEST_BEGIN(test_alignment_and_size)
 					    alignment, size, size, buf);
 				}
 				total += malloc_usable_size(ps[i]);
-				if (total >= (MAXALIGN << 1))
+				if (total >= (MAXALIGN << 1)) {
 					break;
+				}
 			}
 			for (i = 0; i < NITER; i++) {
 				if (ps[i] != NULL) {
@@ -121,8 +119,7 @@ TEST_BEGIN(test_alignment_and_size)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_alignment_errors,
 	    test_oom_errors,
diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c
index 0a8b50c7..176b9957 100644
--- a/test/integration/rallocx.c
+++ b/test/integration/rallocx.c
@@ -1,8 +1,7 @@
 #include "test/jemalloc_test.h"
 
 static unsigned
-get_nsizes_impl(const char *cmd)
-{
+get_nsizes_impl(const char *cmd) {
 	unsigned ret;
 	size_t z;
 
@@ -14,14 +13,12 @@ get_nsizes_impl(const char *cmd)
 }
 
 static unsigned
-get_nlarge(void)
-{
+get_nlarge(void) {
 	return (get_nsizes_impl("arenas.nlextents"));
 }
 
 static size_t
-get_size_impl(const char *cmd, size_t ind)
-{
+get_size_impl(const char *cmd, size_t ind) {
 	size_t ret;
 	size_t z;
 	size_t mib[4];
@@ -39,13 +36,11 @@ get_size_impl(const char *cmd, size_t ind)
 }
 
 static size_t
-get_large_size(size_t ind)
-{
+get_large_size(size_t ind) {
 	return (get_size_impl("arenas.lextent.0.size", ind));
 }
 
-TEST_BEGIN(test_grow_and_shrink)
-{
+TEST_BEGIN(test_grow_and_shrink) {
 	void *p, *q;
 	size_t tsz;
 #define	NCYCLES 3
@@ -90,8 +85,7 @@ TEST_BEGIN(test_grow_and_shrink)
 TEST_END
 
 static bool
-validate_fill(const void *p, uint8_t c, size_t offset, size_t len)
-{
+validate_fill(const void *p, uint8_t c, size_t offset, size_t len) {
 	bool ret = false;
 	const uint8_t *buf = (const uint8_t *)p;
 	size_t i;
@@ -109,8 +103,7 @@ validate_fill(const void *p, uint8_t c, size_t offset, size_t len)
 	return (ret);
 }
 
-TEST_BEGIN(test_zero)
-{
+TEST_BEGIN(test_zero) {
 	void *p, *q;
 	size_t psz, qsz, i, j;
 	size_t start_sizes[] = {1, 3*1024, 63*1024, 4095*1024};
@@ -154,8 +147,7 @@ TEST_BEGIN(test_zero)
 }
 TEST_END
 
-TEST_BEGIN(test_align)
-{
+TEST_BEGIN(test_align) {
 	void *p, *q;
 	size_t align;
 #define	MAX_ALIGN (ZU(1) << 25)
@@ -179,8 +171,7 @@ TEST_BEGIN(test_align)
 }
 TEST_END
 
-TEST_BEGIN(test_lg_align_and_zero)
-{
+TEST_BEGIN(test_lg_align_and_zero) {
 	void *p, *q;
 	unsigned lg_align;
 	size_t sz;
@@ -217,8 +208,7 @@ TEST_BEGIN(test_lg_align_and_zero)
 }
 TEST_END
 
-TEST_BEGIN(test_overflow)
-{
+TEST_BEGIN(test_overflow) {
 	size_t largemax;
 	void *p;
 
@@ -245,8 +235,7 @@ TEST_BEGIN(test_overflow)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_grow_and_shrink,
 	    test_zero,
diff --git a/test/integration/sdallocx.c b/test/integration/sdallocx.c
index 5d0a8f80..bf2fd2c0 100644
--- a/test/integration/sdallocx.c
+++ b/test/integration/sdallocx.c
@@ -3,21 +3,20 @@
 #define	MAXALIGN (((size_t)1) << 22)
 #define	NITER 3
 
-TEST_BEGIN(test_basic)
-{
+TEST_BEGIN(test_basic) {
 	void *ptr = mallocx(64, 0);
 	sdallocx(ptr, 64, 0);
 }
 TEST_END
 
-TEST_BEGIN(test_alignment_and_size)
-{
+TEST_BEGIN(test_alignment_and_size) {
 	size_t nsz, sz, alignment, total;
 	unsigned i;
 	void *ps[NITER];
 
-	for (i = 0; i < NITER; i++)
+	for (i = 0; i < NITER; i++) {
 		ps[i] = NULL;
+	}
 
 	for (alignment = 8;
 	    alignment <= MAXALIGN;
@@ -32,8 +31,9 @@ TEST_BEGIN(test_alignment_and_size)
 				ps[i] = mallocx(sz, MALLOCX_ALIGN(alignment) |
 				    MALLOCX_ZERO);
 				total += nsz;
-				if (total >= (MAXALIGN << 1))
+				if (total >= (MAXALIGN << 1)) {
 					break;
+				}
 			}
 			for (i = 0; i < NITER; i++) {
 				if (ps[i] != NULL) {
@@ -48,8 +48,7 @@ TEST_BEGIN(test_alignment_and_size)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_basic,
 	    test_alignment_and_size));
diff --git a/test/integration/thread_arena.c b/test/integration/thread_arena.c
index cf8240d1..5adb5ce0 100644
--- a/test/integration/thread_arena.c
+++ b/test/integration/thread_arena.c
@@ -3,8 +3,7 @@
 #define	NTHREADS 10
 
 void *
-thd_start(void *arg)
-{
+thd_start(void *arg) {
 	unsigned main_arena_ind = *(unsigned *)arg;
 	void *p;
 	unsigned arena_ind;
@@ -38,8 +37,7 @@ thd_start(void *arg)
 	return (NULL);
 }
 
-TEST_BEGIN(test_thread_arena)
-{
+TEST_BEGIN(test_thread_arena) {
 	void *p;
 	unsigned arena_ind;
 	size_t size;
@@ -73,8 +71,7 @@ TEST_BEGIN(test_thread_arena)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_thread_arena));
 }
diff --git a/test/integration/thread_tcache_enabled.c b/test/integration/thread_tcache_enabled.c
index 1394371b..117d06bf 100644
--- a/test/integration/thread_tcache_enabled.c
+++ b/test/integration/thread_tcache_enabled.c
@@ -9,8 +9,7 @@ static const bool config_tcache =
     ;
 
 void *
-thd_start(void *arg)
-{
+thd_start(void *arg) {
 	int err;
 	size_t sz;
 	bool e0, e1;
@@ -84,14 +83,12 @@ label_ENOENT:
 	return (NULL);
 }
 
-TEST_BEGIN(test_main_thread)
-{
+TEST_BEGIN(test_main_thread) {
 	thd_start(NULL);
 }
 TEST_END
 
-TEST_BEGIN(test_subthread)
-{
+TEST_BEGIN(test_subthread) {
 	thd_t thd;
 
 	thd_create(&thd, thd_start, NULL);
@@ -100,8 +97,7 @@ TEST_BEGIN(test_subthread)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	/* Run tests multiple times to check for bad interactions. */
 	return (test(
 	    test_main_thread,
diff --git a/test/integration/xallocx.c b/test/integration/xallocx.c
index 647404a7..9b4b68e0 100644
--- a/test/integration/xallocx.c
+++ b/test/integration/xallocx.c
@@ -10,8 +10,7 @@ const char *malloc_conf = "junk:false";
  * xallocx() would ordinarily be able to extend.
  */
 static unsigned
-arena_ind(void)
-{
+arena_ind(void) {
 	static unsigned ind = 0;
 
 	if (ind == 0) {
@@ -23,8 +22,7 @@ arena_ind(void)
 	return (ind);
 }
 
-TEST_BEGIN(test_same_size)
-{
+TEST_BEGIN(test_same_size) {
 	void *p;
 	size_t sz, tsz;
 
@@ -39,8 +37,7 @@ TEST_BEGIN(test_same_size)
 }
 TEST_END
 
-TEST_BEGIN(test_extra_no_move)
-{
+TEST_BEGIN(test_extra_no_move) {
 	void *p;
 	size_t sz, tsz;
 
@@ -55,8 +52,7 @@ TEST_BEGIN(test_extra_no_move)
 }
 TEST_END
 
-TEST_BEGIN(test_no_move_fail)
-{
+TEST_BEGIN(test_no_move_fail) {
 	void *p;
 	size_t sz, tsz;
 
@@ -72,8 +68,7 @@ TEST_BEGIN(test_no_move_fail)
 TEST_END
 
 static unsigned
-get_nsizes_impl(const char *cmd)
-{
+get_nsizes_impl(const char *cmd) {
 	unsigned ret;
 	size_t z;
 
@@ -85,20 +80,17 @@ get_nsizes_impl(const char *cmd)
 }
 
 static unsigned
-get_nsmall(void)
-{
+get_nsmall(void) {
 	return (get_nsizes_impl("arenas.nbins"));
 }
 
 static unsigned
-get_nlarge(void)
-{
+get_nlarge(void) {
 	return (get_nsizes_impl("arenas.nlextents"));
 }
 
 static size_t
-get_size_impl(const char *cmd, size_t ind)
-{
+get_size_impl(const char *cmd, size_t ind) {
 	size_t ret;
 	size_t z;
 	size_t mib[4];
@@ -116,19 +108,16 @@ get_size_impl(const char *cmd, size_t ind)
 }
 
 static size_t
-get_small_size(size_t ind)
-{
+get_small_size(size_t ind) {
 	return (get_size_impl("arenas.bin.0.size", ind));
 }
 
 static size_t
-get_large_size(size_t ind)
-{
+get_large_size(size_t ind) {
 	return (get_size_impl("arenas.lextent.0.size", ind));
 }
 
-TEST_BEGIN(test_size)
-{
+TEST_BEGIN(test_size) {
 	size_t small0, largemax;
 	void *p;
 
@@ -157,8 +146,7 @@ TEST_BEGIN(test_size)
 }
 TEST_END
 
-TEST_BEGIN(test_size_extra_overflow)
-{
+TEST_BEGIN(test_size_extra_overflow) {
 	size_t small0, largemax;
 	void *p;
 
@@ -189,8 +177,7 @@ TEST_BEGIN(test_size_extra_overflow)
 }
 TEST_END
 
-TEST_BEGIN(test_extra_small)
-{
+TEST_BEGIN(test_extra_small) {
 	size_t small0, small1, largemax;
 	void *p;
 
@@ -221,8 +208,7 @@ TEST_BEGIN(test_extra_small)
 }
 TEST_END
 
-TEST_BEGIN(test_extra_large)
-{
+TEST_BEGIN(test_extra_large) {
 	int flags = MALLOCX_ARENA(arena_ind());
 	size_t smallmax, large1, large2, large3, largemax;
 	void *p;
@@ -292,8 +278,7 @@ TEST_BEGIN(test_extra_large)
 TEST_END
 
 static void
-print_filled_extents(const void *p, uint8_t c, size_t len)
-{
+print_filled_extents(const void *p, uint8_t c, size_t len) {
 	const uint8_t *pc = (const uint8_t *)p;
 	size_t i, range0;
 	uint8_t c0;
@@ -312,26 +297,26 @@ print_filled_extents(const void *p, uint8_t c, size_t len)
 }
 
 static bool
-validate_fill(const void *p, uint8_t c, size_t offset, size_t len)
-{
+validate_fill(const void *p, uint8_t c, size_t offset, size_t len) {
 	const uint8_t *pc = (const uint8_t *)p;
 	bool err;
 	size_t i;
 
 	for (i = offset, err = false; i < offset+len; i++) {
-		if (pc[i] != c)
+		if (pc[i] != c) {
 			err = true;
+		}
 	}
 
-	if (err)
+	if (err) {
 		print_filled_extents(p, c, offset + len);
+	}
 
 	return (err);
 }
 
 static void
-test_zero(size_t szmin, size_t szmax)
-{
+test_zero(size_t szmin, size_t szmax) {
 	int flags = MALLOCX_ARENA(arena_ind()) | MALLOCX_ZERO;
 	size_t sz, nsz;
 	void *p;
@@ -378,8 +363,7 @@ test_zero(size_t szmin, size_t szmax)
 	dallocx(p, flags);
 }
 
-TEST_BEGIN(test_zero_large)
-{
+TEST_BEGIN(test_zero_large) {
 	size_t large0, large1;
 
 	/* Get size classes. */
@@ -391,8 +375,7 @@ TEST_BEGIN(test_zero_large)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_same_size,
 	    test_extra_no_move,
diff --git a/test/src/btalloc.c b/test/src/btalloc.c
index a78cb89b..bc31f9b8 100644
--- a/test/src/btalloc.c
+++ b/test/src/btalloc.c
@@ -1,7 +1,6 @@
 #include "test/jemalloc_test.h"
 
 void *
-btalloc(size_t size, unsigned bits)
-{
+btalloc(size_t size, unsigned bits) {
 	return (btalloc_0(size, bits));
 }
diff --git a/test/src/mq.c b/test/src/mq.c
index 47f362c0..9b5f672d 100644
--- a/test/src/mq.c
+++ b/test/src/mq.c
@@ -5,8 +5,7 @@
  * time is guaranteed.
  */
 void
-mq_nanosleep(unsigned ns)
-{
+mq_nanosleep(unsigned ns) {
 	assert(ns <= 1000*1000*1000);
 
 #ifdef _WIN32
diff --git a/test/src/mtx.c b/test/src/mtx.c
index bbfec4ac..924ba287 100644
--- a/test/src/mtx.c
+++ b/test/src/mtx.c
@@ -5,11 +5,12 @@
 #endif
 
 bool
-mtx_init(mtx_t *mtx)
-{
+mtx_init(mtx_t *mtx) {
 #ifdef _WIN32
-	if (!InitializeCriticalSectionAndSpinCount(&mtx->lock, _CRT_SPINCOUNT))
+	if (!InitializeCriticalSectionAndSpinCount(&mtx->lock,
+	    _CRT_SPINCOUNT)) {
 		return (true);
+	}
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
 	mtx->lock = OS_UNFAIR_LOCK_INIT;
 #elif (defined(JEMALLOC_OSSPIN))
@@ -17,8 +18,9 @@ mtx_init(mtx_t *mtx)
 #else
 	pthread_mutexattr_t attr;
 
-	if (pthread_mutexattr_init(&attr) != 0)
+	if (pthread_mutexattr_init(&attr) != 0) {
 		return (true);
+	}
 	pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_DEFAULT);
 	if (pthread_mutex_init(&mtx->lock, &attr) != 0) {
 		pthread_mutexattr_destroy(&attr);
@@ -30,8 +32,7 @@ mtx_init(mtx_t *mtx)
 }
 
 void
-mtx_fini(mtx_t *mtx)
-{
+mtx_fini(mtx_t *mtx) {
 #ifdef _WIN32
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
 #elif (defined(JEMALLOC_OSSPIN))
@@ -41,8 +42,7 @@ mtx_fini(mtx_t *mtx)
 }
 
 void
-mtx_lock(mtx_t *mtx)
-{
+mtx_lock(mtx_t *mtx) {
 #ifdef _WIN32
 	EnterCriticalSection(&mtx->lock);
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
@@ -55,8 +55,7 @@ mtx_lock(mtx_t *mtx)
 }
 
 void
-mtx_unlock(mtx_t *mtx)
-{
+mtx_unlock(mtx_t *mtx) {
 #ifdef _WIN32
 	LeaveCriticalSection(&mtx->lock);
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
diff --git a/test/src/test.c b/test/src/test.c
index 345cc1c1..1155326b 100644
--- a/test/src/test.c
+++ b/test/src/test.c
@@ -7,8 +7,7 @@ static const char *	test_name = "";
 
 JEMALLOC_FORMAT_PRINTF(1, 2)
 void
-test_skip(const char *format, ...)
-{
+test_skip(const char *format, ...) {
 	va_list ap;
 
 	va_start(ap, format);
@@ -20,8 +19,7 @@ test_skip(const char *format, ...)
 
 JEMALLOC_FORMAT_PRINTF(1, 2)
 void
-test_fail(const char *format, ...)
-{
+test_fail(const char *format, ...) {
 	va_list ap;
 
 	va_start(ap, format);
@@ -32,8 +30,7 @@ test_fail(const char *format, ...)
 }
 
 static const char *
-test_status_string(test_status_t test_status)
-{
+test_status_string(test_status_t test_status) {
 	switch (test_status) {
 	case test_status_pass: return "pass";
 	case test_status_skip: return "skip";
@@ -43,23 +40,20 @@ test_status_string(test_status_t test_status)
 }
 
 void
-p_test_init(const char *name)
-{
+p_test_init(const char *name) {
 	test_count++;
 	test_status = test_status_pass;
 	test_name = name;
 }
 
 void
-p_test_fini(void)
-{
+p_test_fini(void) {
 	test_counts[test_status]++;
 	malloc_printf("%s: %s\n", test_name, test_status_string(test_status));
 }
 
 static test_status_t
-p_test_impl(bool do_malloc_init, test_t *t, va_list ap)
-{
+p_test_impl(bool do_malloc_init, test_t *t, va_list ap) {
 	test_status_t ret;
 
 	if (do_malloc_init) {
@@ -78,8 +72,9 @@ p_test_impl(bool do_malloc_init, test_t *t, va_list ap)
 	ret = test_status_pass;
 	for (; t != NULL; t = va_arg(ap, test_t *)) {
 		t();
-		if (test_status > ret)
+		if (test_status > ret) {
 			ret = test_status;
+		}
 	}
 
 	malloc_printf("--- %s: %u/%u, %s: %u/%u, %s: %u/%u ---\n",
@@ -94,8 +89,7 @@ p_test_impl(bool do_malloc_init, test_t *t, va_list ap)
 }
 
 test_status_t
-p_test(test_t *t, ...)
-{
+p_test(test_t *t, ...) {
 	test_status_t ret;
 	va_list ap;
 
@@ -108,8 +102,7 @@ p_test(test_t *t, ...)
 }
 
 test_status_t
-p_test_no_malloc_init(test_t *t, ...)
-{
+p_test_no_malloc_init(test_t *t, ...) {
 	test_status_t ret;
 	va_list ap;
 
@@ -122,8 +115,7 @@ p_test_no_malloc_init(test_t *t, ...)
 }
 
 void
-p_test_fail(const char *prefix, const char *message)
-{
+p_test_fail(const char *prefix, const char *message) {
 	malloc_cprintf(NULL, NULL, "%s%s\n", prefix, message);
 	test_status = test_status_fail;
 }
diff --git a/test/src/thd.c b/test/src/thd.c
index e3167089..9a15eabb 100644
--- a/test/src/thd.c
+++ b/test/src/thd.c
@@ -2,17 +2,16 @@
 
 #ifdef _WIN32
 void
-thd_create(thd_t *thd, void *(*proc)(void *), void *arg)
-{
+thd_create(thd_t *thd, void *(*proc)(void *), void *arg) {
 	LPTHREAD_START_ROUTINE routine = (LPTHREAD_START_ROUTINE)proc;
 	*thd = CreateThread(NULL, 0, routine, arg, 0, NULL);
-	if (*thd == NULL)
+	if (*thd == NULL) {
 		test_fail("Error in CreateThread()\n");
+	}
 }
 
 void
-thd_join(thd_t thd, void **ret)
-{
+thd_join(thd_t thd, void **ret) {
 	if (WaitForSingleObject(thd, INFINITE) == WAIT_OBJECT_0 && ret) {
 		DWORD exit_code;
 		GetExitCodeThread(thd, (LPDWORD) &exit_code);
@@ -22,15 +21,14 @@ thd_join(thd_t thd, void **ret)
 
 #else
 void
-thd_create(thd_t *thd, void *(*proc)(void *), void *arg)
-{
-	if (pthread_create(thd, NULL, proc, arg) != 0)
+thd_create(thd_t *thd, void *(*proc)(void *), void *arg) {
+	if (pthread_create(thd, NULL, proc, arg) != 0) {
 		test_fail("Error in pthread_create()\n");
+	}
 }
 
 void
-thd_join(thd_t thd, void **ret)
-{
+thd_join(thd_t thd, void **ret) {
 	pthread_join(thd, ret);
 }
 #endif
diff --git a/test/src/timer.c b/test/src/timer.c
index 82f69d0a..1b186332 100644
--- a/test/src/timer.c
+++ b/test/src/timer.c
@@ -1,22 +1,19 @@
 #include "test/jemalloc_test.h"
 
 void
-timer_start(timedelta_t *timer)
-{
+timer_start(timedelta_t *timer) {
 	nstime_init(&timer->t0, 0);
 	nstime_update(&timer->t0);
 }
 
 void
-timer_stop(timedelta_t *timer)
-{
+timer_stop(timedelta_t *timer) {
 	nstime_copy(&timer->t1, &timer->t0);
 	nstime_update(&timer->t1);
 }
 
 uint64_t
-timer_usec(const timedelta_t *timer)
-{
+timer_usec(const timedelta_t *timer) {
 	nstime_t delta;
 
 	nstime_copy(&delta, &timer->t1);
@@ -25,8 +22,7 @@ timer_usec(const timedelta_t *timer)
 }
 
 void
-timer_ratio(timedelta_t *a, timedelta_t *b, char *buf, size_t buflen)
-{
+timer_ratio(timedelta_t *a, timedelta_t *b, char *buf, size_t buflen) {
 	uint64_t t0 = timer_usec(a);
 	uint64_t t1 = timer_usec(b);
 	uint64_t mult;
@@ -36,11 +32,13 @@ timer_ratio(timedelta_t *a, timedelta_t *b, char *buf, size_t buflen)
 	/* Whole. */
 	n = malloc_snprintf(&buf[i], buflen-i, "%"FMTu64, t0 / t1);
 	i += n;
-	if (i >= buflen)
+	if (i >= buflen) {
 		return;
+	}
 	mult = 1;
-	for (j = 0; j < n; j++)
+	for (j = 0; j < n; j++) {
 		mult *= 10;
+	}
 
 	/* Decimal. */
 	n = malloc_snprintf(&buf[i], buflen-i, ".");
diff --git a/test/stress/microbench.c b/test/stress/microbench.c
index c599d9d3..3b7e9660 100644
--- a/test/stress/microbench.c
+++ b/test/stress/microbench.c
@@ -2,22 +2,22 @@
 
 JEMALLOC_INLINE_C void
 time_func(timedelta_t *timer, uint64_t nwarmup, uint64_t niter,
-    void (*func)(void))
-{
+    void (*func)(void)) {
 	uint64_t i;
 
-	for (i = 0; i < nwarmup; i++)
+	for (i = 0; i < nwarmup; i++) {
 		func();
+	}
 	timer_start(timer);
-	for (i = 0; i < niter; i++)
+	for (i = 0; i < niter; i++) {
 		func();
+	}
 	timer_stop(timer);
 }
 
 void
 compare_funcs(uint64_t nwarmup, uint64_t niter, const char *name_a,
-    void (*func_a), const char *name_b, void (*func_b))
-{
+    void (*func_a), const char *name_b, void (*func_b)) {
 	timedelta_t timer_a, timer_b;
 	char ratio_buf[6];
 	void *p;
@@ -41,8 +41,7 @@ compare_funcs(uint64_t nwarmup, uint64_t niter, const char *name_a,
 }
 
 static void
-malloc_free(void)
-{
+malloc_free(void) {
 	/* The compiler can optimize away free(malloc(1))! */
 	void *p = malloc(1);
 	if (p == NULL) {
@@ -53,8 +52,7 @@ malloc_free(void)
 }
 
 static void
-mallocx_free(void)
-{
+mallocx_free(void) {
 	void *p = mallocx(1, 0);
 	if (p == NULL) {
 		test_fail("Unexpected mallocx() failure");
@@ -63,16 +61,14 @@ mallocx_free(void)
 	free(p);
 }
 
-TEST_BEGIN(test_malloc_vs_mallocx)
-{
+TEST_BEGIN(test_malloc_vs_mallocx) {
 	compare_funcs(10*1000*1000, 100*1000*1000, "malloc",
 	    malloc_free, "mallocx", mallocx_free);
 }
 TEST_END
 
 static void
-malloc_dallocx(void)
-{
+malloc_dallocx(void) {
 	void *p = malloc(1);
 	if (p == NULL) {
 		test_fail("Unexpected malloc() failure");
@@ -82,8 +78,7 @@ malloc_dallocx(void)
 }
 
 static void
-malloc_sdallocx(void)
-{
+malloc_sdallocx(void) {
 	void *p = malloc(1);
 	if (p == NULL) {
 		test_fail("Unexpected malloc() failure");
@@ -92,23 +87,20 @@ malloc_sdallocx(void)
 	sdallocx(p, 1, 0);
 }
 
-TEST_BEGIN(test_free_vs_dallocx)
-{
+TEST_BEGIN(test_free_vs_dallocx) {
 	compare_funcs(10*1000*1000, 100*1000*1000, "free", malloc_free,
 	    "dallocx", malloc_dallocx);
 }
 TEST_END
 
-TEST_BEGIN(test_dallocx_vs_sdallocx)
-{
+TEST_BEGIN(test_dallocx_vs_sdallocx) {
 	compare_funcs(10*1000*1000, 100*1000*1000, "dallocx", malloc_dallocx,
 	    "sdallocx", malloc_sdallocx);
 }
 TEST_END
 
 static void
-malloc_mus_free(void)
-{
+malloc_mus_free(void) {
 	void *p;
 
 	p = malloc(1);
@@ -121,8 +113,7 @@ malloc_mus_free(void)
 }
 
 static void
-malloc_sallocx_free(void)
-{
+malloc_sallocx_free(void) {
 	void *p;
 
 	p = malloc(1);
@@ -130,21 +121,20 @@ malloc_sallocx_free(void)
 		test_fail("Unexpected malloc() failure");
 		return;
 	}
-	if (sallocx(p, 0) < 1)
+	if (sallocx(p, 0) < 1) {
 		test_fail("Unexpected sallocx() failure");
+	}
 	free(p);
 }
 
-TEST_BEGIN(test_mus_vs_sallocx)
-{
+TEST_BEGIN(test_mus_vs_sallocx) {
 	compare_funcs(10*1000*1000, 100*1000*1000, "malloc_usable_size",
 	    malloc_mus_free, "sallocx", malloc_sallocx_free);
 }
 TEST_END
 
 static void
-malloc_nallocx_free(void)
-{
+malloc_nallocx_free(void) {
 	void *p;
 
 	p = malloc(1);
@@ -152,21 +142,20 @@ malloc_nallocx_free(void)
 		test_fail("Unexpected malloc() failure");
 		return;
 	}
-	if (nallocx(1, 0) < 1)
+	if (nallocx(1, 0) < 1) {
 		test_fail("Unexpected nallocx() failure");
+	}
 	free(p);
 }
 
-TEST_BEGIN(test_sallocx_vs_nallocx)
-{
+TEST_BEGIN(test_sallocx_vs_nallocx) {
 	compare_funcs(10*1000*1000, 100*1000*1000, "sallocx",
 	    malloc_sallocx_free, "nallocx", malloc_nallocx_free);
 }
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_malloc_vs_mallocx,
 	    test_free_vs_dallocx,
diff --git a/test/unit/SFMT.c b/test/unit/SFMT.c
index cf52670b..b1bcf3d3 100644
--- a/test/unit/SFMT.c
+++ b/test/unit/SFMT.c
@@ -1449,8 +1449,7 @@ static const uint64_t init_by_array_64_expected[] = {
 	KQU(15570163926716513029), KQU(13356980519185762498)
 };
 
-TEST_BEGIN(test_gen_rand_32)
-{
+TEST_BEGIN(test_gen_rand_32) {
 	uint32_t array32[BLOCK_SIZE] JEMALLOC_ATTR(aligned(16));
 	uint32_t array32_2[BLOCK_SIZE] JEMALLOC_ATTR(aligned(16));
 	int i;
@@ -1484,8 +1483,7 @@ TEST_BEGIN(test_gen_rand_32)
 }
 TEST_END
 
-TEST_BEGIN(test_by_array_32)
-{
+TEST_BEGIN(test_by_array_32) {
 	uint32_t array32[BLOCK_SIZE] JEMALLOC_ATTR(aligned(16));
 	uint32_t array32_2[BLOCK_SIZE] JEMALLOC_ATTR(aligned(16));
 	int i;
@@ -1520,8 +1518,7 @@ TEST_BEGIN(test_by_array_32)
 }
 TEST_END
 
-TEST_BEGIN(test_gen_rand_64)
-{
+TEST_BEGIN(test_gen_rand_64) {
 	uint64_t array64[BLOCK_SIZE64] JEMALLOC_ATTR(aligned(16));
 	uint64_t array64_2[BLOCK_SIZE64] JEMALLOC_ATTR(aligned(16));
 	int i;
@@ -1556,8 +1553,7 @@ TEST_BEGIN(test_gen_rand_64)
 }
 TEST_END
 
-TEST_BEGIN(test_by_array_64)
-{
+TEST_BEGIN(test_by_array_64) {
 	uint64_t array64[BLOCK_SIZE64] JEMALLOC_ATTR(aligned(16));
 	uint64_t array64_2[BLOCK_SIZE64] JEMALLOC_ATTR(aligned(16));
 	int i;
@@ -1594,8 +1590,7 @@ TEST_BEGIN(test_by_array_64)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_gen_rand_32,
 	    test_by_array_32,
diff --git a/test/unit/a0.c b/test/unit/a0.c
index 87f7e527..c7ce8cfb 100644
--- a/test/unit/a0.c
+++ b/test/unit/a0.c
@@ -1,7 +1,6 @@
 #include "test/jemalloc_test.h"
 
-TEST_BEGIN(test_a0)
-{
+TEST_BEGIN(test_a0) {
 	void *p;
 
 	p = a0malloc(1);
@@ -11,8 +10,7 @@ TEST_BEGIN(test_a0)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test_no_malloc_init(
 	    test_a0));
 }
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index 257f9729..710aaf53 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -5,8 +5,7 @@
 #include "test/extent_hooks.h"
 
 static unsigned
-get_nsizes_impl(const char *cmd)
-{
+get_nsizes_impl(const char *cmd) {
 	unsigned ret;
 	size_t z;
 
@@ -18,20 +17,17 @@ get_nsizes_impl(const char *cmd)
 }
 
 static unsigned
-get_nsmall(void)
-{
+get_nsmall(void) {
 	return (get_nsizes_impl("arenas.nbins"));
 }
 
 static unsigned
-get_nlarge(void)
-{
+get_nlarge(void) {
 	return (get_nsizes_impl("arenas.nlextents"));
 }
 
 static size_t
-get_size_impl(const char *cmd, size_t ind)
-{
+get_size_impl(const char *cmd, size_t ind) {
 	size_t ret;
 	size_t z;
 	size_t mib[4];
@@ -49,35 +45,33 @@ get_size_impl(const char *cmd, size_t ind)
 }
 
 static size_t
-get_small_size(size_t ind)
-{
+get_small_size(size_t ind) {
 	return (get_size_impl("arenas.bin.0.size", ind));
 }
 
 static size_t
-get_large_size(size_t ind)
-{
+get_large_size(size_t ind) {
 	return (get_size_impl("arenas.lextent.0.size", ind));
 }
 
 /* Like ivsalloc(), but safe to call on discarded allocations. */
 static size_t
-vsalloc(tsdn_t *tsdn, const void *ptr)
-{
+vsalloc(tsdn_t *tsdn, const void *ptr) {
 	extent_t *extent;
 
 	extent = extent_lookup(tsdn, ptr, false);
-	if (extent == NULL)
+	if (extent == NULL) {
 		return (0);
-	if (!extent_active_get(extent))
+	}
+	if (!extent_active_get(extent)) {
 		return (0);
+	}
 
 	return (isalloc(tsdn, extent, ptr));
 }
 
 static unsigned
-do_arena_create(extent_hooks_t *h)
-{
+do_arena_create(extent_hooks_t *h) {
 	unsigned arena_ind;
 	size_t sz = sizeof(unsigned);
 	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
@@ -87,8 +81,7 @@ do_arena_create(extent_hooks_t *h)
 }
 
 static void
-do_arena_reset_pre(unsigned arena_ind, void ***ptrs, unsigned *nptrs)
-{
+do_arena_reset_pre(unsigned arena_ind, void ***ptrs, unsigned *nptrs) {
 #define	NLARGE	32
 	unsigned nsmall, nlarge, i;
 	size_t sz;
@@ -127,8 +120,7 @@ do_arena_reset_pre(unsigned arena_ind, void ***ptrs, unsigned *nptrs)
 }
 
 static void
-do_arena_reset_post(void **ptrs, unsigned nptrs)
-{
+do_arena_reset_post(void **ptrs, unsigned nptrs) {
 	tsdn_t *tsdn;
 	unsigned i;
 
@@ -144,8 +136,7 @@ do_arena_reset_post(void **ptrs, unsigned nptrs)
 }
 
 static void
-do_arena_reset_destroy(const char *name, unsigned arena_ind)
-{
+do_arena_reset_destroy(const char *name, unsigned arena_ind) {
 	size_t mib[3];
 	size_t miblen;
 
@@ -158,19 +149,16 @@ do_arena_reset_destroy(const char *name, unsigned arena_ind)
 }
 
 static void
-do_arena_reset(unsigned arena_ind)
-{
+do_arena_reset(unsigned arena_ind) {
 	do_arena_reset_destroy("arena.0.reset", arena_ind);
 }
 
 static void
-do_arena_destroy(unsigned arena_ind)
-{
+do_arena_destroy(unsigned arena_ind) {
 	do_arena_reset_destroy("arena.0.destroy", arena_ind);
 }
 
-TEST_BEGIN(test_arena_reset)
-{
+TEST_BEGIN(test_arena_reset) {
 	unsigned arena_ind;
 	void **ptrs;
 	unsigned nptrs;
@@ -183,8 +171,7 @@ TEST_BEGIN(test_arena_reset)
 TEST_END
 
 static bool
-arena_i_initialized(unsigned arena_ind, bool refresh)
-{
+arena_i_initialized(unsigned arena_ind, bool refresh) {
 	bool initialized;
 	size_t mib[3];
 	size_t miblen, sz;
@@ -206,15 +193,13 @@ arena_i_initialized(unsigned arena_ind, bool refresh)
 	return (initialized);
 }
 
-TEST_BEGIN(test_arena_destroy_initial)
-{
+TEST_BEGIN(test_arena_destroy_initial) {
 	assert_false(arena_i_initialized(MALLCTL_ARENAS_DESTROYED, false),
 	    "Destroyed arena stats should not be initialized");
 }
 TEST_END
 
-TEST_BEGIN(test_arena_destroy_hooks_default)
-{
+TEST_BEGIN(test_arena_destroy_hooks_default) {
 	unsigned arena_ind, arena_ind_another, arena_ind_prev;
 	void **ptrs;
 	unsigned nptrs;
@@ -260,8 +245,7 @@ TEST_END
  */
 static bool
 extent_dalloc_unmap(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    bool committed, unsigned arena_ind)
-{
+    bool committed, unsigned arena_ind) {
 	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, committed=%s, "
 	    "arena_ind=%u)\n", __func__, extent_hooks, addr, size, committed ?
 	    "true" : "false", arena_ind);
@@ -270,8 +254,9 @@ extent_dalloc_unmap(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	assert_ptr_eq(extent_hooks->dalloc, extent_dalloc_unmap,
 	    "Wrong hook function");
 	called_dalloc = true;
-	if (!try_dalloc)
+	if (!try_dalloc) {
 		return (true);
+	}
 	pages_unmap(addr, size);
 	did_dalloc = true;
 	return (false);
@@ -290,8 +275,7 @@ static extent_hooks_t hooks_unmap = {
 	extent_merge_hook
 };
 
-TEST_BEGIN(test_arena_destroy_hooks_unmap)
-{
+TEST_BEGIN(test_arena_destroy_hooks_unmap) {
 	unsigned arena_ind;
 	void **ptrs;
 	unsigned nptrs;
@@ -328,8 +312,7 @@ TEST_BEGIN(test_arena_destroy_hooks_unmap)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_arena_reset,
 	    test_arena_destroy_initial,
diff --git a/test/unit/atomic.c b/test/unit/atomic.c
index 1d143689..3e36acd1 100644
--- a/test/unit/atomic.c
+++ b/test/unit/atomic.c
@@ -66,8 +66,7 @@ typedef struct p##_test_s p##_test_t;
 } while (0)
 
 TEST_STRUCT(u64, uint64_t)
-TEST_BEGIN(test_atomic_u64)
-{
+TEST_BEGIN(test_atomic_u64) {
 #if !(LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
 	test_skip("64-bit atomic operations not supported");
 #else
@@ -77,36 +76,31 @@ TEST_BEGIN(test_atomic_u64)
 TEST_END
 
 TEST_STRUCT(u32, uint32_t)
-TEST_BEGIN(test_atomic_u32)
-{
+TEST_BEGIN(test_atomic_u32) {
 	TEST_BODY(u32, uint32_t, uint32_t, u32, "#"FMTx32);
 }
 TEST_END
 
 TEST_STRUCT(p, void *)
-TEST_BEGIN(test_atomic_p)
-{
+TEST_BEGIN(test_atomic_p) {
 	TEST_BODY(p, void *, uintptr_t, ptr, "p");
 }
 TEST_END
 
 TEST_STRUCT(zu, size_t)
-TEST_BEGIN(test_atomic_zu)
-{
+TEST_BEGIN(test_atomic_zu) {
 	TEST_BODY(zu, size_t, size_t, zu, "#zx");
 }
 TEST_END
 
 TEST_STRUCT(u, unsigned)
-TEST_BEGIN(test_atomic_u)
-{
+TEST_BEGIN(test_atomic_u) {
 	TEST_BODY(u, unsigned, unsigned, u, "#x");
 }
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_atomic_u64,
 	    test_atomic_u32,
diff --git a/test/unit/base.c b/test/unit/base.c
index 76e96da8..65cf980b 100644
--- a/test/unit/base.c
+++ b/test/unit/base.c
@@ -24,8 +24,7 @@ static extent_hooks_t hooks_not_null = {
 	NULL /* merge */
 };
 
-TEST_BEGIN(test_base_hooks_default)
-{
+TEST_BEGIN(test_base_hooks_default) {
 	tsdn_t *tsdn;
 	base_t *base;
 	size_t allocated0, allocated1, resident, mapped;
@@ -52,8 +51,7 @@ TEST_BEGIN(test_base_hooks_default)
 }
 TEST_END
 
-TEST_BEGIN(test_base_hooks_null)
-{
+TEST_BEGIN(test_base_hooks_null) {
 	extent_hooks_t hooks_orig;
 	tsdn_t *tsdn;
 	base_t *base;
@@ -92,8 +90,7 @@ TEST_BEGIN(test_base_hooks_null)
 }
 TEST_END
 
-TEST_BEGIN(test_base_hooks_not_null)
-{
+TEST_BEGIN(test_base_hooks_not_null) {
 	extent_hooks_t hooks_orig;
 	tsdn_t *tsdn;
 	base_t *base;
@@ -214,8 +211,7 @@ TEST_BEGIN(test_base_hooks_not_null)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_base_hooks_default,
 	    test_base_hooks_null,
diff --git a/test/unit/bitmap.c b/test/unit/bitmap.c
index b502bfea..6dfa72f2 100644
--- a/test/unit/bitmap.c
+++ b/test/unit/bitmap.c
@@ -93,8 +93,7 @@
     NB(16384) \
 
 static void
-test_bitmap_initializer_body(const bitmap_info_t *binfo, size_t nbits)
-{
+test_bitmap_initializer_body(const bitmap_info_t *binfo, size_t nbits) {
 	bitmap_info_t binfo_dyn;
 	bitmap_info_init(&binfo_dyn, nbits);
 
@@ -124,8 +123,7 @@ test_bitmap_initializer_body(const bitmap_info_t *binfo, size_t nbits)
 #endif
 }
 
-TEST_BEGIN(test_bitmap_initializer)
-{
+TEST_BEGIN(test_bitmap_initializer) {
 #define	NB(nbits) {							\
 		if (nbits <= BITMAP_MAXBITS) {				\
 			bitmap_info_t binfo =				\
@@ -140,8 +138,7 @@ TEST_END
 
 static size_t
 test_bitmap_size_body(const bitmap_info_t *binfo, size_t nbits,
-    size_t prev_size)
-{
+    size_t prev_size) {
 	size_t size = bitmap_size(binfo);
 	assert_zu_ge(size, (nbits >> 3),
 	    "Bitmap size is smaller than expected");
@@ -149,8 +146,7 @@ test_bitmap_size_body(const bitmap_info_t *binfo, size_t nbits,
 	return (size);
 }
 
-TEST_BEGIN(test_bitmap_size)
-{
+TEST_BEGIN(test_bitmap_size) {
 	size_t nbits, prev_size;
 
 	prev_size = 0;
@@ -171,8 +167,7 @@ TEST_BEGIN(test_bitmap_size)
 TEST_END
 
 static void
-test_bitmap_init_body(const bitmap_info_t *binfo, size_t nbits)
-{
+test_bitmap_init_body(const bitmap_info_t *binfo, size_t nbits) {
 	size_t i;
 	bitmap_t *bitmap = (bitmap_t *)malloc(bitmap_size(binfo));
 	assert_ptr_not_null(bitmap, "Unexpected malloc() failure");
@@ -185,8 +180,7 @@ test_bitmap_init_body(const bitmap_info_t *binfo, size_t nbits)
 	free(bitmap);
 }
 
-TEST_BEGIN(test_bitmap_init)
-{
+TEST_BEGIN(test_bitmap_init) {
 	size_t nbits;
 
 	for (nbits = 1; nbits <= BITMAP_MAXBITS; nbits++) {
@@ -204,21 +198,20 @@ TEST_BEGIN(test_bitmap_init)
 TEST_END
 
 static void
-test_bitmap_set_body(const bitmap_info_t *binfo, size_t nbits)
-{
+test_bitmap_set_body(const bitmap_info_t *binfo, size_t nbits) {
 	size_t i;
 	bitmap_t *bitmap = (bitmap_t *)malloc(bitmap_size(binfo));
 	assert_ptr_not_null(bitmap, "Unexpected malloc() failure");
 	bitmap_init(bitmap, binfo);
 
-	for (i = 0; i < nbits; i++)
+	for (i = 0; i < nbits; i++) {
 		bitmap_set(bitmap, binfo, i);
+	}
 	assert_true(bitmap_full(bitmap, binfo), "All bits should be set");
 	free(bitmap);
 }
 
-TEST_BEGIN(test_bitmap_set)
-{
+TEST_BEGIN(test_bitmap_set) {
 	size_t nbits;
 
 	for (nbits = 1; nbits <= BITMAP_MAXBITS; nbits++) {
@@ -236,26 +229,27 @@ TEST_BEGIN(test_bitmap_set)
 TEST_END
 
 static void
-test_bitmap_unset_body(const bitmap_info_t *binfo, size_t nbits)
-{
+test_bitmap_unset_body(const bitmap_info_t *binfo, size_t nbits) {
 	size_t i;
 	bitmap_t *bitmap = (bitmap_t *)malloc(bitmap_size(binfo));
 	assert_ptr_not_null(bitmap, "Unexpected malloc() failure");
 	bitmap_init(bitmap, binfo);
 
-	for (i = 0; i < nbits; i++)
+	for (i = 0; i < nbits; i++) {
 		bitmap_set(bitmap, binfo, i);
+	}
 	assert_true(bitmap_full(bitmap, binfo), "All bits should be set");
-	for (i = 0; i < nbits; i++)
+	for (i = 0; i < nbits; i++) {
 		bitmap_unset(bitmap, binfo, i);
-	for (i = 0; i < nbits; i++)
+	}
+	for (i = 0; i < nbits; i++) {
 		bitmap_set(bitmap, binfo, i);
+	}
 	assert_true(bitmap_full(bitmap, binfo), "All bits should be set");
 	free(bitmap);
 }
 
-TEST_BEGIN(test_bitmap_unset)
-{
+TEST_BEGIN(test_bitmap_unset) {
 	size_t nbits;
 
 	for (nbits = 1; nbits <= BITMAP_MAXBITS; nbits++) {
@@ -273,8 +267,7 @@ TEST_BEGIN(test_bitmap_unset)
 TEST_END
 
 static void
-test_bitmap_sfu_body(const bitmap_info_t *binfo, size_t nbits)
-{
+test_bitmap_sfu_body(const bitmap_info_t *binfo, size_t nbits) {
 	size_t i;
 	bitmap_t *bitmap = (bitmap_t *)malloc(bitmap_size(binfo));
 	assert_ptr_not_null(bitmap, "Unexpected malloc() failure");
@@ -317,8 +310,7 @@ test_bitmap_sfu_body(const bitmap_info_t *binfo, size_t nbits)
 	free(bitmap);
 }
 
-TEST_BEGIN(test_bitmap_sfu)
-{
+TEST_BEGIN(test_bitmap_sfu) {
 	size_t nbits;
 
 	for (nbits = 1; nbits <= BITMAP_MAXBITS; nbits++) {
@@ -336,8 +328,7 @@ TEST_BEGIN(test_bitmap_sfu)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_bitmap_initializer,
 	    test_bitmap_size,
diff --git a/test/unit/ckh.c b/test/unit/ckh.c
index 1f576689..0638cb33 100644
--- a/test/unit/ckh.c
+++ b/test/unit/ckh.c
@@ -1,7 +1,6 @@
 #include "test/jemalloc_test.h"
 
-TEST_BEGIN(test_new_delete)
-{
+TEST_BEGIN(test_new_delete) {
 	tsd_t *tsd;
 	ckh_t ckh;
 
@@ -17,8 +16,7 @@ TEST_BEGIN(test_new_delete)
 }
 TEST_END
 
-TEST_BEGIN(test_count_insert_search_remove)
-{
+TEST_BEGIN(test_count_insert_search_remove) {
 	tsd_t *tsd;
 	ckh_t ckh;
 	const char *strs[] = {
@@ -105,8 +103,7 @@ TEST_BEGIN(test_count_insert_search_remove)
 }
 TEST_END
 
-TEST_BEGIN(test_insert_iter_remove)
-{
+TEST_BEGIN(test_insert_iter_remove) {
 #define	NITEMS ZU(1000)
 	tsd_t *tsd;
 	ckh_t ckh;
@@ -174,10 +171,12 @@ TEST_BEGIN(test_insert_iter_remove)
 				}
 			}
 
-			for (j = 0; j < i + 1; j++)
+			for (j = 0; j < i + 1; j++) {
 				assert_true(seen[j], "Item %zu not seen", j);
-			for (; j < NITEMS; j++)
+			}
+			for (; j < NITEMS; j++) {
 				assert_false(seen[j], "Item %zu seen", j);
+			}
 		}
 	}
 
@@ -204,8 +203,7 @@ TEST_BEGIN(test_insert_iter_remove)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_new_delete,
 	    test_count_insert_search_remove,
diff --git a/test/unit/decay.c b/test/unit/decay.c
index b3b1dd9d..d6334cd2 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -10,22 +10,20 @@ static nstime_t time_mock;
 static bool monotonic_mock;
 
 static bool
-nstime_monotonic_mock(void)
-{
+nstime_monotonic_mock(void) {
 	return (monotonic_mock);
 }
 
 static bool
-nstime_update_mock(nstime_t *time)
-{
+nstime_update_mock(nstime_t *time) {
 	nupdates_mock++;
-	if (monotonic_mock)
+	if (monotonic_mock) {
 		nstime_copy(time, &time_mock);
+	}
 	return (!monotonic_mock);
 }
 
-TEST_BEGIN(test_decay_ticks)
-{
+TEST_BEGIN(test_decay_ticks) {
 	ticker_t *decay_ticker;
 	unsigned tick0, tick1;
 	size_t sz, large0;
@@ -197,8 +195,7 @@ TEST_BEGIN(test_decay_ticks)
 }
 TEST_END
 
-TEST_BEGIN(test_decay_ticker)
-{
+TEST_BEGIN(test_decay_ticker) {
 #define	NPS 1024
 	int flags = (MALLOCX_ARENA(0) | MALLOCX_TCACHE_NONE);
 	void *ps[NPS];
@@ -284,14 +281,14 @@ TEST_BEGIN(test_decay_ticker)
 		nstime_update(&time);
 	} while (nstime_compare(&time, &deadline) <= 0 && npurge1 == npurge0);
 
-	if (config_stats)
+	if (config_stats) {
 		assert_u64_gt(npurge1, npurge0, "Expected purging to occur");
+	}
 #undef NPS
 }
 TEST_END
 
-TEST_BEGIN(test_decay_nonmonotonic)
-{
+TEST_BEGIN(test_decay_nonmonotonic) {
 #define	NPS (SMOOTHSTEP_NSTEPS + 1)
 	int flags = (MALLOCX_ARENA(0) | MALLOCX_TCACHE_NONE);
 	void *ps[NPS];
@@ -343,8 +340,9 @@ TEST_BEGIN(test_decay_nonmonotonic)
 	assert_d_eq(mallctl("stats.arenas.0.npurge", (void *)&npurge1, &sz,
 	    NULL, 0), config_stats ? 0 : ENOENT, "Unexpected mallctl result");
 
-	if (config_stats)
+	if (config_stats) {
 		assert_u64_eq(npurge0, npurge1, "Unexpected purging occurred");
+	}
 
 	nstime_monotonic = nstime_monotonic_orig;
 	nstime_update = nstime_update_orig;
@@ -353,8 +351,7 @@ TEST_BEGIN(test_decay_nonmonotonic)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_decay_ticks,
 	    test_decay_ticker,
diff --git a/test/unit/extent_quantize.c b/test/unit/extent_quantize.c
index a5c1b7a0..343d1d8f 100644
--- a/test/unit/extent_quantize.c
+++ b/test/unit/extent_quantize.c
@@ -1,7 +1,6 @@
 #include "test/jemalloc_test.h"
 
-TEST_BEGIN(test_small_extent_size)
-{
+TEST_BEGIN(test_small_extent_size) {
 	unsigned nbins, i;
 	size_t sz, extent_size;
 	size_t mib[4];
@@ -35,8 +34,7 @@ TEST_BEGIN(test_small_extent_size)
 }
 TEST_END
 
-TEST_BEGIN(test_large_extent_size)
-{
+TEST_BEGIN(test_large_extent_size) {
 	bool cache_oblivious;
 	unsigned nlextents, i;
 	size_t sz, extent_size_prev, ceil_prev;
@@ -100,8 +98,7 @@ TEST_BEGIN(test_large_extent_size)
 }
 TEST_END
 
-TEST_BEGIN(test_monotonic)
-{
+TEST_BEGIN(test_monotonic) {
 #define	SZ_MAX	ZU(4 * 1024 * 1024)
 	unsigned i;
 	size_t floor_prev, ceil_prev;
@@ -136,8 +133,7 @@ TEST_BEGIN(test_monotonic)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_small_extent_size,
 	    test_large_extent_size,
diff --git a/test/unit/fork.c b/test/unit/fork.c
index 58091c66..4880328e 100644
--- a/test/unit/fork.c
+++ b/test/unit/fork.c
@@ -4,8 +4,7 @@
 #include <sys/wait.h>
 #endif
 
-TEST_BEGIN(test_fork)
-{
+TEST_BEGIN(test_fork) {
 #ifndef _WIN32
 	void *p;
 	pid_t pid;
@@ -32,8 +31,9 @@ TEST_BEGIN(test_fork)
 
 		/* Parent. */
 		while (true) {
-			if (waitpid(pid, &status, 0) == -1)
+			if (waitpid(pid, &status, 0) == -1) {
 				test_fail("Unexpected waitpid() failure");
+			}
 			if (WIFSIGNALED(status)) {
 				test_fail("Unexpected child termination due to "
 				    "signal %d", WTERMSIG(status));
@@ -56,8 +56,7 @@ TEST_BEGIN(test_fork)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_fork));
 }
diff --git a/test/unit/hash.c b/test/unit/hash.c
index ff237779..977d058f 100644
--- a/test/unit/hash.c
+++ b/test/unit/hash.c
@@ -36,8 +36,7 @@ typedef enum {
 } hash_variant_t;
 
 static int
-hash_variant_bits(hash_variant_t variant)
-{
+hash_variant_bits(hash_variant_t variant) {
 	switch (variant) {
 	case hash_variant_x86_32: return (32);
 	case hash_variant_x86_128: return (128);
@@ -47,8 +46,7 @@ hash_variant_bits(hash_variant_t variant)
 }
 
 static const char *
-hash_variant_string(hash_variant_t variant)
-{
+hash_variant_string(hash_variant_t variant) {
 	switch (variant) {
 	case hash_variant_x86_32: return ("hash_x86_32");
 	case hash_variant_x86_128: return ("hash_x86_128");
@@ -59,8 +57,7 @@ hash_variant_string(hash_variant_t variant)
 
 #define	KEY_SIZE	256
 static void
-hash_variant_verify_key(hash_variant_t variant, uint8_t *key)
-{
+hash_variant_verify_key(hash_variant_t variant, uint8_t *key) {
 	const int hashbytes = hash_variant_bits(variant) / 8;
 	const int hashes_size = hashbytes * 256;
 	VARIABLE_ARRAY(uint8_t, hashes, hashes_size);
@@ -139,39 +136,35 @@ hash_variant_verify_key(hash_variant_t variant, uint8_t *key)
 }
 
 static void
-hash_variant_verify(hash_variant_t variant)
-{
+hash_variant_verify(hash_variant_t variant) {
 #define	MAX_ALIGN	16
 	uint8_t key[KEY_SIZE + (MAX_ALIGN - 1)];
 	unsigned i;
 
-	for (i = 0; i < MAX_ALIGN; i++)
+	for (i = 0; i < MAX_ALIGN; i++) {
 		hash_variant_verify_key(variant, &key[i]);
+	}
 #undef MAX_ALIGN
 }
 #undef KEY_SIZE
 
-TEST_BEGIN(test_hash_x86_32)
-{
+TEST_BEGIN(test_hash_x86_32) {
 	hash_variant_verify(hash_variant_x86_32);
 }
 TEST_END
 
-TEST_BEGIN(test_hash_x86_128)
-{
+TEST_BEGIN(test_hash_x86_128) {
 	hash_variant_verify(hash_variant_x86_128);
 }
 TEST_END
 
-TEST_BEGIN(test_hash_x64_128)
-{
+TEST_BEGIN(test_hash_x64_128) {
 	hash_variant_verify(hash_variant_x64_128);
 }
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_hash_x86_32,
 	    test_hash_x86_128,
diff --git a/test/unit/junk.c b/test/unit/junk.c
index 5f34d051..02f0726d 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -15,15 +15,13 @@ static void *watch_for_junking;
 static bool saw_junking;
 
 static void
-watch_junking(void *p)
-{
+watch_junking(void *p) {
 	watch_for_junking = p;
 	saw_junking = false;
 }
 
 static void
-arena_dalloc_junk_small_intercept(void *ptr, const arena_bin_info_t *bin_info)
-{
+arena_dalloc_junk_small_intercept(void *ptr, const arena_bin_info_t *bin_info) {
 	size_t i;
 
 	arena_dalloc_junk_small_orig(ptr, bin_info);
@@ -32,13 +30,13 @@ arena_dalloc_junk_small_intercept(void *ptr, const arena_bin_info_t *bin_info)
 		    "Missing junk fill for byte %zu/%zu of deallocated region",
 		    i, bin_info->reg_size);
 	}
-	if (ptr == watch_for_junking)
+	if (ptr == watch_for_junking) {
 		saw_junking = true;
+	}
 }
 
 static void
-large_dalloc_junk_intercept(void *ptr, size_t usize)
-{
+large_dalloc_junk_intercept(void *ptr, size_t usize) {
 	size_t i;
 
 	large_dalloc_junk_orig(ptr, usize);
@@ -47,21 +45,21 @@ large_dalloc_junk_intercept(void *ptr, size_t usize)
 		    "Missing junk fill for byte %zu/%zu of deallocated region",
 		    i, usize);
 	}
-	if (ptr == watch_for_junking)
+	if (ptr == watch_for_junking) {
 		saw_junking = true;
+	}
 }
 
 static void
-large_dalloc_maybe_junk_intercept(void *ptr, size_t usize)
-{
+large_dalloc_maybe_junk_intercept(void *ptr, size_t usize) {
 	large_dalloc_maybe_junk_orig(ptr, usize);
-	if (ptr == watch_for_junking)
+	if (ptr == watch_for_junking) {
 		saw_junking = true;
+	}
 }
 
 static void
-test_junk(size_t sz_min, size_t sz_max)
-{
+test_junk(size_t sz_min, size_t sz_max) {
 	uint8_t *s;
 	size_t sz_prev, sz, i;
 
@@ -126,23 +124,20 @@ test_junk(size_t sz_min, size_t sz_max)
 	}
 }
 
-TEST_BEGIN(test_junk_small)
-{
+TEST_BEGIN(test_junk_small) {
 	test_skip_if(!config_fill);
 	test_junk(1, SMALL_MAXCLASS-1);
 }
 TEST_END
 
-TEST_BEGIN(test_junk_large)
-{
+TEST_BEGIN(test_junk_large) {
 	test_skip_if(!config_fill);
 	test_junk(SMALL_MAXCLASS+1, (1U << (LG_LARGE_MINCLASS+1)));
 }
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_junk_small,
 	    test_junk_large));
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 5b734e1d..a116894b 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -1,7 +1,6 @@
 #include "test/jemalloc_test.h"
 
-TEST_BEGIN(test_mallctl_errors)
-{
+TEST_BEGIN(test_mallctl_errors) {
 	uint64_t epoch;
 	size_t sz;
 
@@ -28,8 +27,7 @@ TEST_BEGIN(test_mallctl_errors)
 }
 TEST_END
 
-TEST_BEGIN(test_mallctlnametomib_errors)
-{
+TEST_BEGIN(test_mallctlnametomib_errors) {
 	size_t mib[1];
 	size_t miblen;
 
@@ -39,8 +37,7 @@ TEST_BEGIN(test_mallctlnametomib_errors)
 }
 TEST_END
 
-TEST_BEGIN(test_mallctlbymib_errors)
-{
+TEST_BEGIN(test_mallctlbymib_errors) {
 	uint64_t epoch;
 	size_t sz;
 	size_t mib[1];
@@ -76,8 +73,7 @@ TEST_BEGIN(test_mallctlbymib_errors)
 }
 TEST_END
 
-TEST_BEGIN(test_mallctl_read_write)
-{
+TEST_BEGIN(test_mallctl_read_write) {
 	uint64_t old_epoch, new_epoch;
 	size_t sz = sizeof(old_epoch);
 
@@ -104,8 +100,7 @@ TEST_BEGIN(test_mallctl_read_write)
 }
 TEST_END
 
-TEST_BEGIN(test_mallctlnametomib_short_mib)
-{
+TEST_BEGIN(test_mallctlnametomib_short_mib) {
 	size_t mib[4];
 	size_t miblen;
 
@@ -119,8 +114,7 @@ TEST_BEGIN(test_mallctlnametomib_short_mib)
 }
 TEST_END
 
-TEST_BEGIN(test_mallctl_config)
-{
+TEST_BEGIN(test_mallctl_config) {
 #define	TEST_MALLCTL_CONFIG(config, t) do {				\
 	t oldval;							\
 	size_t sz = sizeof(oldval);					\
@@ -149,8 +143,7 @@ TEST_BEGIN(test_mallctl_config)
 }
 TEST_END
 
-TEST_BEGIN(test_mallctl_opt)
-{
+TEST_BEGIN(test_mallctl_opt) {
 	bool config_always = true;
 
 #define	TEST_MALLCTL_OPT(t, opt, config) do {				\
@@ -189,8 +182,7 @@ TEST_BEGIN(test_mallctl_opt)
 }
 TEST_END
 
-TEST_BEGIN(test_manpage_example)
-{
+TEST_BEGIN(test_manpage_example) {
 	unsigned nbins, i;
 	size_t mib[4];
 	size_t len, miblen;
@@ -214,8 +206,7 @@ TEST_BEGIN(test_manpage_example)
 }
 TEST_END
 
-TEST_BEGIN(test_tcache_none)
-{
+TEST_BEGIN(test_tcache_none) {
 	void *p0, *q, *p1;
 
 	test_skip_if(!config_tcache);
@@ -240,8 +231,7 @@ TEST_BEGIN(test_tcache_none)
 }
 TEST_END
 
-TEST_BEGIN(test_tcache)
-{
+TEST_BEGIN(test_tcache) {
 #define	NTCACHES	10
 	unsigned tis[NTCACHES];
 	void *ps[NTCACHES];
@@ -312,11 +302,13 @@ TEST_BEGIN(test_tcache)
 		assert_ptr_eq(qs[i], q0,
 		    "Expected rallocx() to allocate cached region, i=%u", i);
 		/* Avoid undefined behavior in case of test failure. */
-		if (qs[i] == NULL)
+		if (qs[i] == NULL) {
 			qs[i] = ps[i];
+		}
 	}
-	for (i = 0; i < NTCACHES; i++)
+	for (i = 0; i < NTCACHES; i++) {
 		dallocx(qs[i], MALLOCX_TCACHE(tis[i]));
+	}
 
 	/* Flush some non-empty tcaches. */
 	for (i = 0; i < NTCACHES/2; i++) {
@@ -334,8 +326,7 @@ TEST_BEGIN(test_tcache)
 }
 TEST_END
 
-TEST_BEGIN(test_thread_arena)
-{
+TEST_BEGIN(test_thread_arena) {
 	unsigned arena_old, arena_new, narenas;
 	size_t sz = sizeof(unsigned);
 
@@ -353,8 +344,7 @@ TEST_BEGIN(test_thread_arena)
 }
 TEST_END
 
-TEST_BEGIN(test_arena_i_initialized)
-{
+TEST_BEGIN(test_arena_i_initialized) {
 	unsigned narenas, i;
 	size_t sz;
 	size_t mib[3];
@@ -392,8 +382,7 @@ TEST_BEGIN(test_arena_i_initialized)
 }
 TEST_END
 
-TEST_BEGIN(test_arena_i_decay_time)
-{
+TEST_BEGIN(test_arena_i_decay_time) {
 	ssize_t decay_time, orig_decay_time, prev_decay_time;
 	size_t sz = sizeof(ssize_t);
 
@@ -423,8 +412,7 @@ TEST_BEGIN(test_arena_i_decay_time)
 }
 TEST_END
 
-TEST_BEGIN(test_arena_i_purge)
-{
+TEST_BEGIN(test_arena_i_purge) {
 	unsigned narenas;
 	size_t sz = sizeof(unsigned);
 	size_t mib[3];
@@ -447,8 +435,7 @@ TEST_BEGIN(test_arena_i_purge)
 }
 TEST_END
 
-TEST_BEGIN(test_arena_i_decay)
-{
+TEST_BEGIN(test_arena_i_decay) {
 	unsigned narenas;
 	size_t sz = sizeof(unsigned);
 	size_t mib[3];
@@ -471,8 +458,7 @@ TEST_BEGIN(test_arena_i_decay)
 }
 TEST_END
 
-TEST_BEGIN(test_arena_i_dss)
-{
+TEST_BEGIN(test_arena_i_dss) {
 	const char *dss_prec_old, *dss_prec_new;
 	size_t sz = sizeof(dss_prec_old);
 	size_t mib[3];
@@ -517,8 +503,7 @@ TEST_BEGIN(test_arena_i_dss)
 }
 TEST_END
 
-TEST_BEGIN(test_arenas_decay_time)
-{
+TEST_BEGIN(test_arenas_decay_time) {
 	ssize_t decay_time, orig_decay_time, prev_decay_time;
 	size_t sz = sizeof(ssize_t);
 
@@ -548,8 +533,7 @@ TEST_BEGIN(test_arenas_decay_time)
 }
 TEST_END
 
-TEST_BEGIN(test_arenas_constants)
-{
+TEST_BEGIN(test_arenas_constants) {
 #define	TEST_ARENAS_CONSTANT(t, name, expected) do {			\
 	t name;								\
 	size_t sz = sizeof(t);						\
@@ -567,8 +551,7 @@ TEST_BEGIN(test_arenas_constants)
 }
 TEST_END
 
-TEST_BEGIN(test_arenas_bin_constants)
-{
+TEST_BEGIN(test_arenas_bin_constants) {
 #define	TEST_ARENAS_BIN_CONSTANT(t, name, expected) do {		\
 	t name;								\
 	size_t sz = sizeof(t);						\
@@ -586,8 +569,7 @@ TEST_BEGIN(test_arenas_bin_constants)
 }
 TEST_END
 
-TEST_BEGIN(test_arenas_lextent_constants)
-{
+TEST_BEGIN(test_arenas_lextent_constants) {
 #define	TEST_ARENAS_LEXTENT_CONSTANT(t, name, expected) do {		\
 	t name;								\
 	size_t sz = sizeof(t);						\
@@ -602,8 +584,7 @@ TEST_BEGIN(test_arenas_lextent_constants)
 }
 TEST_END
 
-TEST_BEGIN(test_arenas_create)
-{
+TEST_BEGIN(test_arenas_create) {
 	unsigned narenas_before, arena, narenas_after;
 	size_t sz = sizeof(unsigned);
 
@@ -620,8 +601,7 @@ TEST_BEGIN(test_arenas_create)
 }
 TEST_END
 
-TEST_BEGIN(test_stats_arenas)
-{
+TEST_BEGIN(test_stats_arenas) {
 #define	TEST_STATS_ARENAS(t, name) do {					\
 	t name;								\
 	size_t sz = sizeof(t);						\
@@ -640,8 +620,7 @@ TEST_BEGIN(test_stats_arenas)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_mallctl_errors,
 	    test_mallctlnametomib_errors,
diff --git a/test/unit/math.c b/test/unit/math.c
index 8e5ec61b..15fc7d54 100644
--- a/test/unit/math.c
+++ b/test/unit/math.c
@@ -14,30 +14,29 @@
 #endif
 
 static bool
-double_eq_rel(double a, double b, double max_rel_err, double max_abs_err)
-{
+double_eq_rel(double a, double b, double max_rel_err, double max_abs_err) {
 	double rel_err;
 
-	if (fabs(a - b) < max_abs_err)
+	if (fabs(a - b) < max_abs_err) {
 		return (true);
+	}
 	rel_err = (fabs(b) > fabs(a)) ? fabs((a-b)/b) : fabs((a-b)/a);
 	return (rel_err < max_rel_err);
 }
 
 static uint64_t
-factorial(unsigned x)
-{
+factorial(unsigned x) {
 	uint64_t ret = 1;
 	unsigned i;
 
-	for (i = 2; i <= x; i++)
+	for (i = 2; i <= x; i++) {
 		ret *= (uint64_t)i;
+	}
 
 	return (ret);
 }
 
-TEST_BEGIN(test_ln_gamma_factorial)
-{
+TEST_BEGIN(test_ln_gamma_factorial) {
 	unsigned x;
 
 	/* exp(ln_gamma(x)) == (x-1)! for integer x. */
@@ -188,8 +187,7 @@ static const double ln_gamma_misc_expected[] = {
 	359.13420536957539753
 };
 
-TEST_BEGIN(test_ln_gamma_misc)
-{
+TEST_BEGIN(test_ln_gamma_misc) {
 	unsigned i;
 
 	for (i = 1; i < sizeof(ln_gamma_misc_expected)/sizeof(double); i++) {
@@ -239,8 +237,7 @@ static const double pt_norm_expected[] = {
 	1.88079360815125041, 2.05374891063182208, 2.32634787404084076
 };
 
-TEST_BEGIN(test_pt_norm)
-{
+TEST_BEGIN(test_pt_norm) {
 	unsigned i;
 
 	for (i = 1; i < sizeof(pt_norm_expected)/sizeof(double); i++) {
@@ -289,8 +286,7 @@ static const double pt_chi2_expected[] = {
 	1046.4872561869577, 1063.5717461999654, 1107.0741966053859
 };
 
-TEST_BEGIN(test_pt_chi2)
-{
+TEST_BEGIN(test_pt_chi2) {
 	unsigned i, j;
 	unsigned e = 0;
 
@@ -351,8 +347,7 @@ static const double pt_gamma_expected[] = {
 	4.7230515633946677, 5.6417477865306020, 8.4059469148854635
 };
 
-TEST_BEGIN(test_pt_gamma_shape)
-{
+TEST_BEGIN(test_pt_gamma_shape) {
 	unsigned i, j;
 	unsigned e = 0;
 
@@ -371,8 +366,7 @@ TEST_BEGIN(test_pt_gamma_shape)
 }
 TEST_END
 
-TEST_BEGIN(test_pt_gamma_scale)
-{
+TEST_BEGIN(test_pt_gamma_scale) {
 	double shape = 1.0;
 	double ln_gamma_shape = ln_gamma(shape);
 
@@ -385,8 +379,7 @@ TEST_BEGIN(test_pt_gamma_scale)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_ln_gamma_factorial,
 	    test_ln_gamma_misc,
diff --git a/test/unit/mq.c b/test/unit/mq.c
index bd289c54..95c9c500 100644
--- a/test/unit/mq.c
+++ b/test/unit/mq.c
@@ -9,8 +9,7 @@ struct mq_msg_s {
 };
 mq_gen(static, mq_, mq_t, mq_msg_t, link)
 
-TEST_BEGIN(test_mq_basic)
-{
+TEST_BEGIN(test_mq_basic) {
 	mq_t mq;
 	mq_msg_t msg;
 
@@ -31,8 +30,7 @@ TEST_BEGIN(test_mq_basic)
 TEST_END
 
 static void *
-thd_receiver_start(void *arg)
-{
+thd_receiver_start(void *arg) {
 	mq_t *mq = (mq_t *)arg;
 	unsigned i;
 
@@ -45,8 +43,7 @@ thd_receiver_start(void *arg)
 }
 
 static void *
-thd_sender_start(void *arg)
-{
+thd_sender_start(void *arg) {
 	mq_t *mq = (mq_t *)arg;
 	unsigned i;
 
@@ -61,8 +58,7 @@ thd_sender_start(void *arg)
 	return (NULL);
 }
 
-TEST_BEGIN(test_mq_threaded)
-{
+TEST_BEGIN(test_mq_threaded) {
 	mq_t mq;
 	thd_t receiver;
 	thd_t senders[NSENDERS];
@@ -71,20 +67,21 @@ TEST_BEGIN(test_mq_threaded)
 	assert_false(mq_init(&mq), "Unexpected mq_init() failure");
 
 	thd_create(&receiver, thd_receiver_start, (void *)&mq);
-	for (i = 0; i < NSENDERS; i++)
+	for (i = 0; i < NSENDERS; i++) {
 		thd_create(&senders[i], thd_sender_start, (void *)&mq);
+	}
 
 	thd_join(receiver, NULL);
-	for (i = 0; i < NSENDERS; i++)
+	for (i = 0; i < NSENDERS; i++) {
 		thd_join(senders[i], NULL);
+	}
 
 	mq_fini(&mq);
 }
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_mq_basic,
 	    test_mq_threaded));
diff --git a/test/unit/mtx.c b/test/unit/mtx.c
index 2eccc98f..0813a699 100644
--- a/test/unit/mtx.c
+++ b/test/unit/mtx.c
@@ -3,8 +3,7 @@
 #define	NTHREADS	2
 #define	NINCRS		2000000
 
-TEST_BEGIN(test_mtx_basic)
-{
+TEST_BEGIN(test_mtx_basic) {
 	mtx_t mtx;
 
 	assert_false(mtx_init(&mtx), "Unexpected mtx_init() failure");
@@ -20,8 +19,7 @@ typedef struct {
 } thd_start_arg_t;
 
 static void *
-thd_start(void *varg)
-{
+thd_start(void *varg) {
 	thd_start_arg_t *arg = (thd_start_arg_t *)varg;
 	unsigned i;
 
@@ -33,26 +31,26 @@ thd_start(void *varg)
 	return (NULL);
 }
 
-TEST_BEGIN(test_mtx_race)
-{
+TEST_BEGIN(test_mtx_race) {
 	thd_start_arg_t arg;
 	thd_t thds[NTHREADS];
 	unsigned i;
 
 	assert_false(mtx_init(&arg.mtx), "Unexpected mtx_init() failure");
 	arg.x = 0;
-	for (i = 0; i < NTHREADS; i++)
+	for (i = 0; i < NTHREADS; i++) {
 		thd_create(&thds[i], thd_start, (void *)&arg);
-	for (i = 0; i < NTHREADS; i++)
+	}
+	for (i = 0; i < NTHREADS; i++) {
 		thd_join(thds[i], NULL);
+	}
 	assert_u_eq(arg.x, NTHREADS * NINCRS,
 	    "Race-related counter corruption");
 }
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_mtx_basic,
 	    test_mtx_race));
diff --git a/test/unit/nstime.c b/test/unit/nstime.c
index 6548ba23..f628a8f3 100644
--- a/test/unit/nstime.c
+++ b/test/unit/nstime.c
@@ -2,8 +2,7 @@
 
 #define	BILLION	UINT64_C(1000000000)
 
-TEST_BEGIN(test_nstime_init)
-{
+TEST_BEGIN(test_nstime_init) {
 	nstime_t nst;
 
 	nstime_init(&nst, 42000000043);
@@ -13,8 +12,7 @@ TEST_BEGIN(test_nstime_init)
 }
 TEST_END
 
-TEST_BEGIN(test_nstime_init2)
-{
+TEST_BEGIN(test_nstime_init2) {
 	nstime_t nst;
 
 	nstime_init2(&nst, 42, 43);
@@ -23,8 +21,7 @@ TEST_BEGIN(test_nstime_init2)
 }
 TEST_END
 
-TEST_BEGIN(test_nstime_copy)
-{
+TEST_BEGIN(test_nstime_copy) {
 	nstime_t nsta, nstb;
 
 	nstime_init2(&nsta, 42, 43);
@@ -35,8 +32,7 @@ TEST_BEGIN(test_nstime_copy)
 }
 TEST_END
 
-TEST_BEGIN(test_nstime_compare)
-{
+TEST_BEGIN(test_nstime_compare) {
 	nstime_t nsta, nstb;
 
 	nstime_init2(&nsta, 42, 43);
@@ -70,8 +66,7 @@ TEST_BEGIN(test_nstime_compare)
 }
 TEST_END
 
-TEST_BEGIN(test_nstime_add)
-{
+TEST_BEGIN(test_nstime_add) {
 	nstime_t nsta, nstb;
 
 	nstime_init2(&nsta, 42, 43);
@@ -90,8 +85,7 @@ TEST_BEGIN(test_nstime_add)
 }
 TEST_END
 
-TEST_BEGIN(test_nstime_subtract)
-{
+TEST_BEGIN(test_nstime_subtract) {
 	nstime_t nsta, nstb;
 
 	nstime_init2(&nsta, 42, 43);
@@ -110,8 +104,7 @@ TEST_BEGIN(test_nstime_subtract)
 }
 TEST_END
 
-TEST_BEGIN(test_nstime_imultiply)
-{
+TEST_BEGIN(test_nstime_imultiply) {
 	nstime_t nsta, nstb;
 
 	nstime_init2(&nsta, 42, 43);
@@ -128,8 +121,7 @@ TEST_BEGIN(test_nstime_imultiply)
 }
 TEST_END
 
-TEST_BEGIN(test_nstime_idivide)
-{
+TEST_BEGIN(test_nstime_idivide) {
 	nstime_t nsta, nstb;
 
 	nstime_init2(&nsta, 42, 43);
@@ -148,8 +140,7 @@ TEST_BEGIN(test_nstime_idivide)
 }
 TEST_END
 
-TEST_BEGIN(test_nstime_divide)
-{
+TEST_BEGIN(test_nstime_divide) {
 	nstime_t nsta, nstb, nstc;
 
 	nstime_init2(&nsta, 42, 43);
@@ -176,14 +167,12 @@ TEST_BEGIN(test_nstime_divide)
 }
 TEST_END
 
-TEST_BEGIN(test_nstime_monotonic)
-{
+TEST_BEGIN(test_nstime_monotonic) {
 	nstime_monotonic();
 }
 TEST_END
 
-TEST_BEGIN(test_nstime_update)
-{
+TEST_BEGIN(test_nstime_update) {
 	nstime_t nst;
 
 	nstime_init(&nst, 0);
@@ -208,8 +197,7 @@ TEST_BEGIN(test_nstime_update)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_nstime_init,
 	    test_nstime_init2,
diff --git a/test/unit/pack.c b/test/unit/pack.c
index 316b6df5..9237ba2e 100644
--- a/test/unit/pack.c
+++ b/test/unit/pack.c
@@ -20,8 +20,7 @@ const char *malloc_conf = "decay_time:-1";
 #define	NSLABS	8
 
 static unsigned
-binind_compute(void)
-{
+binind_compute(void) {
 	size_t sz;
 	unsigned nbins, i;
 
@@ -41,8 +40,9 @@ binind_compute(void)
 		sz = sizeof(size);
 		assert_d_eq(mallctlbymib(mib, miblen, (void *)&size, &sz, NULL,
 		    0), 0, "Unexpected mallctlbymib failure");
-		if (size == SZ)
+		if (size == SZ) {
 			return (i);
+		}
 	}
 
 	test_fail("Unable to compute nregs_per_run");
@@ -50,8 +50,7 @@ binind_compute(void)
 }
 
 static size_t
-nregs_per_run_compute(void)
-{
+nregs_per_run_compute(void) {
 	uint32_t nregs;
 	size_t sz;
 	unsigned binind = binind_compute();
@@ -68,8 +67,7 @@ nregs_per_run_compute(void)
 }
 
 static unsigned
-arenas_create_mallctl(void)
-{
+arenas_create_mallctl(void) {
 	unsigned arena_ind;
 	size_t sz;
 
@@ -81,8 +79,7 @@ arenas_create_mallctl(void)
 }
 
 static void
-arena_reset_mallctl(unsigned arena_ind)
-{
+arena_reset_mallctl(unsigned arena_ind) {
 	size_t mib[3];
 	size_t miblen = sizeof(mib)/sizeof(size_t);
 
@@ -93,8 +90,7 @@ arena_reset_mallctl(unsigned arena_ind)
 	    "Unexpected mallctlbymib() failure");
 }
 
-TEST_BEGIN(test_pack)
-{
+TEST_BEGIN(test_pack) {
 	unsigned arena_ind = arenas_create_mallctl();
 	size_t nregs_per_run = nregs_per_run_compute();
 	size_t nregs = nregs_per_run * NSLABS;
@@ -125,8 +121,9 @@ TEST_BEGIN(test_pack)
 	    i++, offset = (offset + 1) % nregs_per_run) {
 		for (j = 0; j < nregs_per_run; j++) {
 			void *p = ptrs[(i * nregs_per_run) + j];
-			if (offset == j)
+			if (offset == j) {
 				continue;
+			}
 			dallocx(p, MALLOCX_ARENA(arena_ind) |
 			    MALLOCX_TCACHE_NONE);
 		}
@@ -143,8 +140,9 @@ TEST_BEGIN(test_pack)
 		for (j = 0; j < nregs_per_run; j++) {
 			void *p;
 
-			if (offset == j)
+			if (offset == j) {
 				continue;
+			}
 			p = mallocx(SZ, MALLOCX_ARENA(arena_ind) |
 			    MALLOCX_TCACHE_NONE);
 			assert_ptr_eq(p, ptrs[(i * nregs_per_run) + j],
@@ -159,8 +157,7 @@ TEST_BEGIN(test_pack)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_pack));
 }
diff --git a/test/unit/pages.c b/test/unit/pages.c
index 1e6add95..b6092de0 100644
--- a/test/unit/pages.c
+++ b/test/unit/pages.c
@@ -1,7 +1,6 @@
 #include "test/jemalloc_test.h"
 
-TEST_BEGIN(test_pages_huge)
-{
+TEST_BEGIN(test_pages_huge) {
 	size_t alloc_size;
 	bool commit;
 	void *pages, *hugepage;
@@ -22,8 +21,7 @@ TEST_BEGIN(test_pages_huge)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_pages_huge));
 }
diff --git a/test/unit/ph.c b/test/unit/ph.c
index 10bf99e4..e49a0e78 100644
--- a/test/unit/ph.c
+++ b/test/unit/ph.c
@@ -10,8 +10,7 @@ struct node_s {
 };
 
 static int
-node_cmp(const node_t *a, const node_t *b)
-{
+node_cmp(const node_t *a, const node_t *b) {
 	int ret;
 
 	ret = (a->key > b->key) - (a->key < b->key);
@@ -39,18 +38,19 @@ typedef ph(node_t) heap_t;
 ph_gen(static, heap_, heap_t, node_t, link, node_cmp_magic);
 
 static void
-node_print(const node_t *node, unsigned depth)
-{
+node_print(const node_t *node, unsigned depth) {
 	unsigned i;
 	node_t *leftmost_child, *sibling;
 
-	for (i = 0; i < depth; i++)
+	for (i = 0; i < depth; i++) {
 		malloc_printf("\t");
+	}
 	malloc_printf("%2"FMTu64"\n", node->key);
 
 	leftmost_child = phn_lchild_get(node_t, link, node);
-	if (leftmost_child == NULL)
+	if (leftmost_child == NULL) {
 		return;
+	}
 	node_print(leftmost_child, depth + 1);
 
 	for (sibling = phn_next_get(node_t, link, leftmost_child); sibling !=
@@ -60,13 +60,13 @@ node_print(const node_t *node, unsigned depth)
 }
 
 static void
-heap_print(const heap_t *heap)
-{
+heap_print(const heap_t *heap) {
 	node_t *auxelm;
 
 	malloc_printf("vvv heap %p vvv\n", heap);
-	if (heap->ph_root == NULL)
+	if (heap->ph_root == NULL) {
 		goto label_return;
+	}
 
 	node_print(heap->ph_root, 0);
 
@@ -83,8 +83,7 @@ label_return:
 }
 
 static unsigned
-node_validate(const node_t *node, const node_t *parent)
-{
+node_validate(const node_t *node, const node_t *parent) {
 	unsigned nnodes = 1;
 	node_t *leftmost_child, *sibling;
 
@@ -94,8 +93,9 @@ node_validate(const node_t *node, const node_t *parent)
 	}
 
 	leftmost_child = phn_lchild_get(node_t, link, node);
-	if (leftmost_child == NULL)
+	if (leftmost_child == NULL) {
 		return (nnodes);
+	}
 	assert_ptr_eq((void *)phn_prev_get(node_t, link, leftmost_child),
 	    (void *)node, "Leftmost child does not link to node");
 	nnodes += node_validate(leftmost_child, node);
@@ -111,13 +111,13 @@ node_validate(const node_t *node, const node_t *parent)
 }
 
 static unsigned
-heap_validate(const heap_t *heap)
-{
+heap_validate(const heap_t *heap) {
 	unsigned nnodes = 0;
 	node_t *auxelm;
 
-	if (heap->ph_root == NULL)
+	if (heap->ph_root == NULL) {
 		goto label_return;
+	}
 
 	nnodes += node_validate(heap->ph_root, NULL);
 
@@ -130,13 +130,13 @@ heap_validate(const heap_t *heap)
 	}
 
 label_return:
-	if (false)
+	if (false) {
 		heap_print(heap);
+	}
 	return (nnodes);
 }
 
-TEST_BEGIN(test_ph_empty)
-{
+TEST_BEGIN(test_ph_empty) {
 	heap_t heap;
 
 	heap_new(&heap);
@@ -146,23 +146,20 @@ TEST_BEGIN(test_ph_empty)
 TEST_END
 
 static void
-node_remove(heap_t *heap, node_t *node)
-{
+node_remove(heap_t *heap, node_t *node) {
 	heap_remove(heap, node);
 
 	node->magic = 0;
 }
 
 static node_t *
-node_remove_first(heap_t *heap)
-{
+node_remove_first(heap_t *heap) {
 	node_t *node = heap_remove_first(heap);
 	node->magic = 0;
 	return (node);
 }
 
-TEST_BEGIN(test_ph_random)
-{
+TEST_BEGIN(test_ph_random) {
 #define	NNODES 25
 #define	NBAGS 250
 #define	SEED 42
@@ -177,17 +174,20 @@ TEST_BEGIN(test_ph_random)
 		switch (i) {
 		case 0:
 			/* Insert in order. */
-			for (j = 0; j < NNODES; j++)
+			for (j = 0; j < NNODES; j++) {
 				bag[j] = j;
+			}
 			break;
 		case 1:
 			/* Insert in reverse order. */
-			for (j = 0; j < NNODES; j++)
+			for (j = 0; j < NNODES; j++) {
 				bag[j] = NNODES - j - 1;
+			}
 			break;
 		default:
-			for (j = 0; j < NNODES; j++)
+			for (j = 0; j < NNODES; j++) {
 				bag[j] = gen_rand64_range(sfmt, NNODES);
+			}
 		}
 
 		for (j = 1; j <= NNODES; j++) {
@@ -280,8 +280,7 @@ TEST_BEGIN(test_ph_random)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_ph_empty,
 	    test_ph_random));
diff --git a/test/unit/prng.c b/test/unit/prng.c
index f32d82a6..b26da36e 100644
--- a/test/unit/prng.c
+++ b/test/unit/prng.c
@@ -1,8 +1,7 @@
 #include "test/jemalloc_test.h"
 
 static void
-test_prng_lg_range_u32(bool atomic)
-{
+test_prng_lg_range_u32(bool atomic) {
 	uint32_t sa, sb, ra, rb;
 	unsigned lg_range;
 
@@ -38,8 +37,7 @@ test_prng_lg_range_u32(bool atomic)
 }
 
 static void
-test_prng_lg_range_u64(void)
-{
+test_prng_lg_range_u64(void) {
 	uint64_t sa, sb, ra, rb;
 	unsigned lg_range;
 
@@ -75,8 +73,7 @@ test_prng_lg_range_u64(void)
 }
 
 static void
-test_prng_lg_range_zu(bool atomic)
-{
+test_prng_lg_range_zu(bool atomic) {
 	size_t sa, sb, ra, rb;
 	unsigned lg_range;
 
@@ -112,39 +109,33 @@ test_prng_lg_range_zu(bool atomic)
 	}
 }
 
-TEST_BEGIN(test_prng_lg_range_u32_nonatomic)
-{
+TEST_BEGIN(test_prng_lg_range_u32_nonatomic) {
 	test_prng_lg_range_u32(false);
 }
 TEST_END
 
-TEST_BEGIN(test_prng_lg_range_u32_atomic)
-{
+TEST_BEGIN(test_prng_lg_range_u32_atomic) {
 	test_prng_lg_range_u32(true);
 }
 TEST_END
 
-TEST_BEGIN(test_prng_lg_range_u64_nonatomic)
-{
+TEST_BEGIN(test_prng_lg_range_u64_nonatomic) {
 	test_prng_lg_range_u64();
 }
 TEST_END
 
-TEST_BEGIN(test_prng_lg_range_zu_nonatomic)
-{
+TEST_BEGIN(test_prng_lg_range_zu_nonatomic) {
 	test_prng_lg_range_zu(false);
 }
 TEST_END
 
-TEST_BEGIN(test_prng_lg_range_zu_atomic)
-{
+TEST_BEGIN(test_prng_lg_range_zu_atomic) {
 	test_prng_lg_range_zu(true);
 }
 TEST_END
 
 static void
-test_prng_range_u32(bool atomic)
-{
+test_prng_range_u32(bool atomic) {
 	uint32_t range;
 #define	MAX_RANGE	10000000
 #define	RANGE_STEP	97
@@ -164,8 +155,7 @@ test_prng_range_u32(bool atomic)
 }
 
 static void
-test_prng_range_u64(void)
-{
+test_prng_range_u64(void) {
 	uint64_t range;
 #define	MAX_RANGE	10000000
 #define	RANGE_STEP	97
@@ -185,8 +175,7 @@ test_prng_range_u64(void)
 }
 
 static void
-test_prng_range_zu(bool atomic)
-{
+test_prng_range_zu(bool atomic) {
 	size_t range;
 #define	MAX_RANGE	10000000
 #define	RANGE_STEP	97
@@ -205,39 +194,33 @@ test_prng_range_zu(bool atomic)
 	}
 }
 
-TEST_BEGIN(test_prng_range_u32_nonatomic)
-{
+TEST_BEGIN(test_prng_range_u32_nonatomic) {
 	test_prng_range_u32(false);
 }
 TEST_END
 
-TEST_BEGIN(test_prng_range_u32_atomic)
-{
+TEST_BEGIN(test_prng_range_u32_atomic) {
 	test_prng_range_u32(true);
 }
 TEST_END
 
-TEST_BEGIN(test_prng_range_u64_nonatomic)
-{
+TEST_BEGIN(test_prng_range_u64_nonatomic) {
 	test_prng_range_u64();
 }
 TEST_END
 
-TEST_BEGIN(test_prng_range_zu_nonatomic)
-{
+TEST_BEGIN(test_prng_range_zu_nonatomic) {
 	test_prng_range_zu(false);
 }
 TEST_END
 
-TEST_BEGIN(test_prng_range_zu_atomic)
-{
+TEST_BEGIN(test_prng_range_zu_atomic) {
 	test_prng_range_zu(true);
 }
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_prng_lg_range_u32_nonatomic,
 	    test_prng_lg_range_u32_atomic,
diff --git a/test/unit/prof_accum.c b/test/unit/prof_accum.c
index 41ebeea5..bed0c9a6 100644
--- a/test/unit/prof_accum.c
+++ b/test/unit/prof_accum.c
@@ -11,8 +11,7 @@ const char *malloc_conf =
 #endif
 
 static int
-prof_dump_open_intercept(bool propagate_err, const char *filename)
-{
+prof_dump_open_intercept(bool propagate_err, const char *filename) {
 	int fd;
 
 	fd = open("/dev/null", O_WRONLY);
@@ -22,14 +21,12 @@ prof_dump_open_intercept(bool propagate_err, const char *filename)
 }
 
 static void *
-alloc_from_permuted_backtrace(unsigned thd_ind, unsigned iteration)
-{
+alloc_from_permuted_backtrace(unsigned thd_ind, unsigned iteration) {
 	return (btalloc(1, thd_ind*NALLOCS_PER_THREAD + iteration));
 }
 
 static void *
-thd_start(void *varg)
-{
+thd_start(void *varg) {
 	unsigned thd_ind = *(unsigned *)varg;
 	size_t bt_count_prev, bt_count;
 	unsigned i_prev, i;
@@ -57,8 +54,7 @@ thd_start(void *varg)
 	return (NULL);
 }
 
-TEST_BEGIN(test_idump)
-{
+TEST_BEGIN(test_idump) {
 	bool active;
 	thd_t thds[NTHREADS];
 	unsigned thd_args[NTHREADS];
@@ -77,14 +73,14 @@ TEST_BEGIN(test_idump)
 		thd_args[i] = i;
 		thd_create(&thds[i], thd_start, (void *)&thd_args[i]);
 	}
-	for (i = 0; i < NTHREADS; i++)
+	for (i = 0; i < NTHREADS; i++) {
 		thd_join(thds[i], NULL);
+	}
 }
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_idump));
 }
diff --git a/test/unit/prof_active.c b/test/unit/prof_active.c
index d3b341d7..422024f1 100644
--- a/test/unit/prof_active.c
+++ b/test/unit/prof_active.c
@@ -6,8 +6,7 @@ const char *malloc_conf =
 #endif
 
 static void
-mallctl_bool_get(const char *name, bool expected, const char *func, int line)
-{
+mallctl_bool_get(const char *name, bool expected, const char *func, int line) {
 	bool old;
 	size_t sz;
 
@@ -20,8 +19,7 @@ mallctl_bool_get(const char *name, bool expected, const char *func, int line)
 
 static void
 mallctl_bool_set(const char *name, bool old_expected, bool val_new,
-    const char *func, int line)
-{
+    const char *func, int line) {
 	bool old;
 	size_t sz;
 
@@ -36,8 +34,7 @@ mallctl_bool_set(const char *name, bool old_expected, bool val_new,
 
 static void
 mallctl_prof_active_get_impl(bool prof_active_old_expected, const char *func,
-    int line)
-{
+    int line) {
 	mallctl_bool_get("prof.active", prof_active_old_expected, func, line);
 }
 #define	mallctl_prof_active_get(a)					\
@@ -45,8 +42,7 @@ mallctl_prof_active_get_impl(bool prof_active_old_expected, const char *func,
 
 static void
 mallctl_prof_active_set_impl(bool prof_active_old_expected,
-    bool prof_active_new, const char *func, int line)
-{
+    bool prof_active_new, const char *func, int line) {
 	mallctl_bool_set("prof.active", prof_active_old_expected,
 	    prof_active_new, func, line);
 }
@@ -55,8 +51,7 @@ mallctl_prof_active_set_impl(bool prof_active_old_expected,
 
 static void
 mallctl_thread_prof_active_get_impl(bool thread_prof_active_old_expected,
-    const char *func, int line)
-{
+    const char *func, int line) {
 	mallctl_bool_get("thread.prof.active", thread_prof_active_old_expected,
 	    func, line);
 }
@@ -65,8 +60,7 @@ mallctl_thread_prof_active_get_impl(bool thread_prof_active_old_expected,
 
 static void
 mallctl_thread_prof_active_set_impl(bool thread_prof_active_old_expected,
-    bool thread_prof_active_new, const char *func, int line)
-{
+    bool thread_prof_active_new, const char *func, int line) {
 	mallctl_bool_set("thread.prof.active", thread_prof_active_old_expected,
 	    thread_prof_active_new, func, line);
 }
@@ -74,8 +68,7 @@ mallctl_thread_prof_active_set_impl(bool thread_prof_active_old_expected,
 	mallctl_thread_prof_active_set_impl(a, b, __func__, __LINE__)
 
 static void
-prof_sampling_probe_impl(bool expect_sample, const char *func, int line)
-{
+prof_sampling_probe_impl(bool expect_sample, const char *func, int line) {
 	void *p;
 	size_t expected_backtraces = expect_sample ? 1 : 0;
 
@@ -90,8 +83,7 @@ prof_sampling_probe_impl(bool expect_sample, const char *func, int line)
 #define	prof_sampling_probe(a)						\
 	prof_sampling_probe_impl(a, __func__, __LINE__)
 
-TEST_BEGIN(test_prof_active)
-{
+TEST_BEGIN(test_prof_active) {
 	test_skip_if(!config_prof);
 
 	mallctl_prof_active_get(true);
@@ -124,8 +116,7 @@ TEST_BEGIN(test_prof_active)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_prof_active));
 }
diff --git a/test/unit/prof_gdump.c b/test/unit/prof_gdump.c
index 53f7cad6..0d8ec71c 100644
--- a/test/unit/prof_gdump.c
+++ b/test/unit/prof_gdump.c
@@ -7,8 +7,7 @@ const char *malloc_conf = "prof:true,prof_active:false,prof_gdump:true";
 static bool did_prof_dump_open;
 
 static int
-prof_dump_open_intercept(bool propagate_err, const char *filename)
-{
+prof_dump_open_intercept(bool propagate_err, const char *filename) {
 	int fd;
 
 	did_prof_dump_open = true;
@@ -19,8 +18,7 @@ prof_dump_open_intercept(bool propagate_err, const char *filename)
 	return (fd);
 }
 
-TEST_BEGIN(test_gdump)
-{
+TEST_BEGIN(test_gdump) {
 	bool active, gdump, gdump_old;
 	void *p, *q, *r, *s;
 	size_t sz;
@@ -74,8 +72,7 @@ TEST_BEGIN(test_gdump)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_gdump));
 }
diff --git a/test/unit/prof_idump.c b/test/unit/prof_idump.c
index 43824c6a..393211ea 100644
--- a/test/unit/prof_idump.c
+++ b/test/unit/prof_idump.c
@@ -16,8 +16,7 @@ const char *malloc_conf = ""
 static bool did_prof_dump_open;
 
 static int
-prof_dump_open_intercept(bool propagate_err, const char *filename)
-{
+prof_dump_open_intercept(bool propagate_err, const char *filename) {
 	int fd;
 
 	did_prof_dump_open = true;
@@ -28,8 +27,7 @@ prof_dump_open_intercept(bool propagate_err, const char *filename)
 	return (fd);
 }
 
-TEST_BEGIN(test_idump)
-{
+TEST_BEGIN(test_idump) {
 	bool active;
 	void *p;
 
@@ -51,8 +49,7 @@ TEST_BEGIN(test_idump)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_idump));
 }
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
index cc13e378..463f6893 100644
--- a/test/unit/prof_reset.c
+++ b/test/unit/prof_reset.c
@@ -6,8 +6,7 @@ const char *malloc_conf =
 #endif
 
 static int
-prof_dump_open_intercept(bool propagate_err, const char *filename)
-{
+prof_dump_open_intercept(bool propagate_err, const char *filename) {
 	int fd;
 
 	fd = open("/dev/null", O_WRONLY);
@@ -17,15 +16,13 @@ prof_dump_open_intercept(bool propagate_err, const char *filename)
 }
 
 static void
-set_prof_active(bool active)
-{
+set_prof_active(bool active) {
 	assert_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
 	    sizeof(active)), 0, "Unexpected mallctl failure");
 }
 
 static size_t
-get_lg_prof_sample(void)
-{
+get_lg_prof_sample(void) {
 	size_t lg_prof_sample;
 	size_t sz = sizeof(size_t);
 
@@ -36,8 +33,7 @@ get_lg_prof_sample(void)
 }
 
 static void
-do_prof_reset(size_t lg_prof_sample)
-{
+do_prof_reset(size_t lg_prof_sample) {
 	assert_d_eq(mallctl("prof.reset", NULL, NULL,
 	    (void *)&lg_prof_sample, sizeof(size_t)), 0,
 	    "Unexpected mallctl failure while resetting profile data");
@@ -45,8 +41,7 @@ do_prof_reset(size_t lg_prof_sample)
 	    "Expected profile sample rate change");
 }
 
-TEST_BEGIN(test_prof_reset_basic)
-{
+TEST_BEGIN(test_prof_reset_basic) {
 	size_t lg_prof_sample_orig, lg_prof_sample, lg_prof_sample_next;
 	size_t sz;
 	unsigned i;
@@ -95,16 +90,14 @@ bool prof_dump_header_intercepted = false;
 prof_cnt_t cnt_all_copy = {0, 0, 0, 0};
 static bool
 prof_dump_header_intercept(tsdn_t *tsdn, bool propagate_err,
-    const prof_cnt_t *cnt_all)
-{
+    const prof_cnt_t *cnt_all) {
 	prof_dump_header_intercepted = true;
 	memcpy(&cnt_all_copy, cnt_all, sizeof(prof_cnt_t));
 
 	return (false);
 }
 
-TEST_BEGIN(test_prof_reset_cleanup)
-{
+TEST_BEGIN(test_prof_reset_cleanup) {
 	void *p;
 	prof_dump_header_t *prof_dump_header_orig;
 
@@ -148,8 +141,7 @@ TEST_END
 #define	RESET_INTERVAL		(1U << 10)
 #define	DUMP_INTERVAL		3677
 static void *
-thd_start(void *varg)
-{
+thd_start(void *varg) {
 	unsigned thd_ind = *(unsigned *)varg;
 	unsigned i;
 	void *objs[OBJ_RING_BUF_COUNT];
@@ -192,8 +184,7 @@ thd_start(void *varg)
 	return (NULL);
 }
 
-TEST_BEGIN(test_prof_reset)
-{
+TEST_BEGIN(test_prof_reset) {
 	size_t lg_prof_sample_orig;
 	thd_t thds[NTHREADS];
 	unsigned thd_args[NTHREADS];
@@ -216,8 +207,9 @@ TEST_BEGIN(test_prof_reset)
 		thd_args[i] = i;
 		thd_create(&thds[i], thd_start, (void *)&thd_args[i]);
 	}
-	for (i = 0; i < NTHREADS; i++)
+	for (i = 0; i < NTHREADS; i++) {
 		thd_join(thds[i], NULL);
+	}
 
 	assert_zu_eq(prof_bt_count(), bt_count,
 	    "Unexpected bactrace count change");
@@ -237,8 +229,7 @@ TEST_END
 
 /* Test sampling at the same allocation site across resets. */
 #define	NITER 10
-TEST_BEGIN(test_xallocx)
-{
+TEST_BEGIN(test_xallocx) {
 	size_t lg_prof_sample_orig;
 	unsigned i;
 	void *ptrs[NITER];
@@ -288,8 +279,7 @@ TEST_END
 #undef NITER
 
 int
-main(void)
-{
+main(void) {
 	/* Intercept dumping prior to running any tests. */
 	prof_dump_open = prof_dump_open_intercept;
 
diff --git a/test/unit/prof_tctx.c b/test/unit/prof_tctx.c
index 8f928ebf..2e35b7ec 100644
--- a/test/unit/prof_tctx.c
+++ b/test/unit/prof_tctx.c
@@ -4,8 +4,7 @@
 const char *malloc_conf = "prof:true,lg_prof_sample:0";
 #endif
 
-TEST_BEGIN(test_prof_realloc)
-{
+TEST_BEGIN(test_prof_realloc) {
 	tsdn_t *tsdn;
 	int flags;
 	void *p, *q;
@@ -50,8 +49,7 @@ TEST_BEGIN(test_prof_realloc)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return test(
 	    test_prof_realloc);
 }
diff --git a/test/unit/prof_thread_name.c b/test/unit/prof_thread_name.c
index 8699936b..ba86e10e 100644
--- a/test/unit/prof_thread_name.c
+++ b/test/unit/prof_thread_name.c
@@ -6,8 +6,7 @@ const char *malloc_conf = "prof:true,prof_active:false";
 
 static void
 mallctl_thread_name_get_impl(const char *thread_name_expected, const char *func,
-    int line)
-{
+    int line) {
 	const char *thread_name_old;
 	size_t sz;
 
@@ -24,8 +23,7 @@ mallctl_thread_name_get_impl(const char *thread_name_expected, const char *func,
 
 static void
 mallctl_thread_name_set_impl(const char *thread_name, const char *func,
-    int line)
-{
+    int line) {
 	assert_d_eq(mallctl("thread.prof.name", NULL, NULL,
 	    (void *)&thread_name, sizeof(thread_name)), 0,
 	    "%s():%d: Unexpected mallctl failure reading thread.prof.name",
@@ -35,8 +33,7 @@ mallctl_thread_name_set_impl(const char *thread_name, const char *func,
 #define	mallctl_thread_name_set(a)					\
 	mallctl_thread_name_set_impl(a, __func__, __LINE__)
 
-TEST_BEGIN(test_prof_thread_name_validation)
-{
+TEST_BEGIN(test_prof_thread_name_validation) {
 	const char *thread_name;
 
 	test_skip_if(!config_prof);
@@ -78,8 +75,7 @@ TEST_END
 #define	NTHREADS	4
 #define	NRESET		25
 static void *
-thd_start(void *varg)
-{
+thd_start(void *varg) {
 	unsigned thd_ind = *(unsigned *)varg;
 	char thread_name[16] = "";
 	unsigned i;
@@ -101,8 +97,7 @@ thd_start(void *varg)
 	return (NULL);
 }
 
-TEST_BEGIN(test_prof_thread_name_threaded)
-{
+TEST_BEGIN(test_prof_thread_name_threaded) {
 	thd_t thds[NTHREADS];
 	unsigned thd_args[NTHREADS];
 	unsigned i;
@@ -113,16 +108,16 @@ TEST_BEGIN(test_prof_thread_name_threaded)
 		thd_args[i] = i;
 		thd_create(&thds[i], thd_start, (void *)&thd_args[i]);
 	}
-	for (i = 0; i < NTHREADS; i++)
+	for (i = 0; i < NTHREADS; i++) {
 		thd_join(thds[i], NULL);
+	}
 }
 TEST_END
 #undef NTHREADS
 #undef NRESET
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_prof_thread_name_validation,
 	    test_prof_thread_name_threaded));
diff --git a/test/unit/ql.c b/test/unit/ql.c
index 2ebb4502..0bb896cb 100644
--- a/test/unit/ql.c
+++ b/test/unit/ql.c
@@ -12,8 +12,7 @@ struct list_s {
 };
 
 static void
-test_empty_list(list_head_t *head)
-{
+test_empty_list(list_head_t *head) {
 	list_t *t;
 	unsigned i;
 
@@ -34,8 +33,7 @@ test_empty_list(list_head_t *head)
 	assert_u_eq(i, 0, "Unexpected element for empty list");
 }
 
-TEST_BEGIN(test_ql_empty)
-{
+TEST_BEGIN(test_ql_empty) {
 	list_head_t head;
 
 	ql_new(&head);
@@ -44,8 +42,7 @@ TEST_BEGIN(test_ql_empty)
 TEST_END
 
 static void
-init_entries(list_t *entries, unsigned nentries)
-{
+init_entries(list_t *entries, unsigned nentries) {
 	unsigned i;
 
 	for (i = 0; i < nentries; i++) {
@@ -55,8 +52,7 @@ init_entries(list_t *entries, unsigned nentries)
 }
 
 static void
-test_entries_list(list_head_t *head, list_t *entries, unsigned nentries)
-{
+test_entries_list(list_head_t *head, list_t *entries, unsigned nentries) {
 	list_t *t;
 	unsigned i;
 
@@ -91,31 +87,31 @@ test_entries_list(list_head_t *head, list_t *entries, unsigned nentries)
 	}
 }
 
-TEST_BEGIN(test_ql_tail_insert)
-{
+TEST_BEGIN(test_ql_tail_insert) {
 	list_head_t head;
 	list_t entries[NENTRIES];
 	unsigned i;
 
 	ql_new(&head);
 	init_entries(entries, sizeof(entries)/sizeof(list_t));
-	for (i = 0; i < NENTRIES; i++)
+	for (i = 0; i < NENTRIES; i++) {
 		ql_tail_insert(&head, &entries[i], link);
+	}
 
 	test_entries_list(&head, entries, NENTRIES);
 }
 TEST_END
 
-TEST_BEGIN(test_ql_tail_remove)
-{
+TEST_BEGIN(test_ql_tail_remove) {
 	list_head_t head;
 	list_t entries[NENTRIES];
 	unsigned i;
 
 	ql_new(&head);
 	init_entries(entries, sizeof(entries)/sizeof(list_t));
-	for (i = 0; i < NENTRIES; i++)
+	for (i = 0; i < NENTRIES; i++) {
 		ql_tail_insert(&head, &entries[i], link);
+	}
 
 	for (i = 0; i < NENTRIES; i++) {
 		test_entries_list(&head, entries, NENTRIES-i);
@@ -125,31 +121,31 @@ TEST_BEGIN(test_ql_tail_remove)
 }
 TEST_END
 
-TEST_BEGIN(test_ql_head_insert)
-{
+TEST_BEGIN(test_ql_head_insert) {
 	list_head_t head;
 	list_t entries[NENTRIES];
 	unsigned i;
 
 	ql_new(&head);
 	init_entries(entries, sizeof(entries)/sizeof(list_t));
-	for (i = 0; i < NENTRIES; i++)
+	for (i = 0; i < NENTRIES; i++) {
 		ql_head_insert(&head, &entries[NENTRIES-i-1], link);
+	}
 
 	test_entries_list(&head, entries, NENTRIES);
 }
 TEST_END
 
-TEST_BEGIN(test_ql_head_remove)
-{
+TEST_BEGIN(test_ql_head_remove) {
 	list_head_t head;
 	list_t entries[NENTRIES];
 	unsigned i;
 
 	ql_new(&head);
 	init_entries(entries, sizeof(entries)/sizeof(list_t));
-	for (i = 0; i < NENTRIES; i++)
+	for (i = 0; i < NENTRIES; i++) {
 		ql_head_insert(&head, &entries[NENTRIES-i-1], link);
+	}
 
 	for (i = 0; i < NENTRIES; i++) {
 		test_entries_list(&head, &entries[i], NENTRIES-i);
@@ -159,8 +155,7 @@ TEST_BEGIN(test_ql_head_remove)
 }
 TEST_END
 
-TEST_BEGIN(test_ql_insert)
-{
+TEST_BEGIN(test_ql_insert) {
 	list_head_t head;
 	list_t entries[8];
 	list_t *a, *b, *c, *d, *e, *f, *g, *h;
@@ -196,8 +191,7 @@ TEST_BEGIN(test_ql_insert)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_ql_empty,
 	    test_ql_tail_insert,
diff --git a/test/unit/qr.c b/test/unit/qr.c
index 7c9c1029..8061a345 100644
--- a/test/unit/qr.c
+++ b/test/unit/qr.c
@@ -13,8 +13,7 @@ struct ring_s {
 };
 
 static void
-init_entries(ring_t *entries)
-{
+init_entries(ring_t *entries) {
 	unsigned i;
 
 	for (i = 0; i < NENTRIES; i++) {
@@ -24,8 +23,7 @@ init_entries(ring_t *entries)
 }
 
 static void
-test_independent_entries(ring_t *entries)
-{
+test_independent_entries(ring_t *entries) {
 	ring_t *t;
 	unsigned i, j;
 
@@ -61,8 +59,7 @@ test_independent_entries(ring_t *entries)
 	}
 }
 
-TEST_BEGIN(test_qr_one)
-{
+TEST_BEGIN(test_qr_one) {
 	ring_t entries[NENTRIES];
 
 	init_entries(entries);
@@ -71,8 +68,7 @@ TEST_BEGIN(test_qr_one)
 TEST_END
 
 static void
-test_entries_ring(ring_t *entries)
-{
+test_entries_ring(ring_t *entries) {
 	ring_t *t;
 	unsigned i, j;
 
@@ -104,27 +100,27 @@ test_entries_ring(ring_t *entries)
 	}
 }
 
-TEST_BEGIN(test_qr_after_insert)
-{
+TEST_BEGIN(test_qr_after_insert) {
 	ring_t entries[NENTRIES];
 	unsigned i;
 
 	init_entries(entries);
-	for (i = 1; i < NENTRIES; i++)
+	for (i = 1; i < NENTRIES; i++) {
 		qr_after_insert(&entries[i - 1], &entries[i], link);
+	}
 	test_entries_ring(entries);
 }
 TEST_END
 
-TEST_BEGIN(test_qr_remove)
-{
+TEST_BEGIN(test_qr_remove) {
 	ring_t entries[NENTRIES];
 	ring_t *t;
 	unsigned i, j;
 
 	init_entries(entries);
-	for (i = 1; i < NENTRIES; i++)
+	for (i = 1; i < NENTRIES; i++) {
 		qr_after_insert(&entries[i - 1], &entries[i], link);
+	}
 
 	for (i = 0; i < NENTRIES; i++) {
 		j = 0;
@@ -145,15 +141,15 @@ TEST_BEGIN(test_qr_remove)
 }
 TEST_END
 
-TEST_BEGIN(test_qr_before_insert)
-{
+TEST_BEGIN(test_qr_before_insert) {
 	ring_t entries[NENTRIES];
 	ring_t *t;
 	unsigned i, j;
 
 	init_entries(entries);
-	for (i = 1; i < NENTRIES; i++)
+	for (i = 1; i < NENTRIES; i++) {
 		qr_before_insert(&entries[i - 1], &entries[i], link);
+	}
 	for (i = 0; i < NENTRIES; i++) {
 		j = 0;
 		qr_foreach(t, &entries[i], link) {
@@ -184,8 +180,7 @@ TEST_BEGIN(test_qr_before_insert)
 TEST_END
 
 static void
-test_split_entries(ring_t *entries)
-{
+test_split_entries(ring_t *entries) {
 	ring_t *t;
 	unsigned i, j;
 
@@ -206,14 +201,14 @@ test_split_entries(ring_t *entries)
 	}
 }
 
-TEST_BEGIN(test_qr_meld_split)
-{
+TEST_BEGIN(test_qr_meld_split) {
 	ring_t entries[NENTRIES];
 	unsigned i;
 
 	init_entries(entries);
-	for (i = 1; i < NENTRIES; i++)
+	for (i = 1; i < NENTRIES; i++) {
 		qr_after_insert(&entries[i - 1], &entries[i], link);
+	}
 
 	qr_split(&entries[0], &entries[SPLIT_INDEX], ring_t, link);
 	test_split_entries(entries);
@@ -236,8 +231,7 @@ TEST_BEGIN(test_qr_meld_split)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_qr_one,
 	    test_qr_after_insert,
diff --git a/test/unit/rb.c b/test/unit/rb.c
index 56e00219..dea86c6e 100644
--- a/test/unit/rb.c
+++ b/test/unit/rb.c
@@ -1,14 +1,14 @@
 #include "test/jemalloc_test.h"
 
 #define	rbtn_black_height(a_type, a_field, a_rbt, r_height) do {	\
-    a_type *rbp_bh_t;							\
-    for (rbp_bh_t = (a_rbt)->rbt_root, (r_height) = 0;			\
-	 rbp_bh_t != NULL;						\
-      rbp_bh_t = rbtn_left_get(a_type, a_field, rbp_bh_t)) {		\
-	if (!rbtn_red_get(a_type, a_field, rbp_bh_t)) {			\
-	    (r_height)++;						\
+	a_type *rbp_bh_t;						\
+	for (rbp_bh_t = (a_rbt)->rbt_root, (r_height) = 0; rbp_bh_t !=	\
+	    NULL; rbp_bh_t = rbtn_left_get(a_type, a_field,		\
+	    rbp_bh_t)) {						\
+		if (!rbtn_red_get(a_type, a_field, rbp_bh_t)) {		\
+		(r_height)++;						\
+		}							\
 	}								\
-    }									\
 } while (0)
 
 typedef struct node_s node_t;
@@ -42,8 +42,7 @@ node_cmp(const node_t *a, const node_t *b) {
 typedef rb_tree(node_t) tree_t;
 rb_gen(static, tree_, tree_t, node_t, link, node_cmp);
 
-TEST_BEGIN(test_rb_empty)
-{
+TEST_BEGIN(test_rb_empty) {
 	tree_t tree;
 	node_t key;
 
@@ -68,52 +67,56 @@ TEST_BEGIN(test_rb_empty)
 TEST_END
 
 static unsigned
-tree_recurse(node_t *node, unsigned black_height, unsigned black_depth)
-{
+tree_recurse(node_t *node, unsigned black_height, unsigned black_depth) {
 	unsigned ret = 0;
 	node_t *left_node;
 	node_t *right_node;
 
-	if (node == NULL)
+	if (node == NULL) {
 		return (ret);
+	}
 
 	left_node = rbtn_left_get(node_t, link, node);
 	right_node = rbtn_right_get(node_t, link, node);
 
-	if (!rbtn_red_get(node_t, link, node))
+	if (!rbtn_red_get(node_t, link, node)) {
 		black_depth++;
+	}
 
 	/* Red nodes must be interleaved with black nodes. */
 	if (rbtn_red_get(node_t, link, node)) {
-		if (left_node != NULL)
+		if (left_node != NULL) {
 			assert_false(rbtn_red_get(node_t, link, left_node),
 				"Node should be black");
-		if (right_node != NULL)
+		}
+		if (right_node != NULL) {
 			assert_false(rbtn_red_get(node_t, link, right_node),
 			    "Node should be black");
+		}
 	}
 
 	/* Self. */
 	assert_u32_eq(node->magic, NODE_MAGIC, "Bad magic");
 
 	/* Left subtree. */
-	if (left_node != NULL)
+	if (left_node != NULL) {
 		ret += tree_recurse(left_node, black_height, black_depth);
-	else
+	} else {
 		ret += (black_depth != black_height);
+	}
 
 	/* Right subtree. */
-	if (right_node != NULL)
+	if (right_node != NULL) {
 		ret += tree_recurse(right_node, black_height, black_depth);
-	else
+	} else {
 		ret += (black_depth != black_height);
+	}
 
 	return (ret);
 }
 
 static node_t *
-tree_iterate_cb(tree_t *tree, node_t *node, void *data)
-{
+tree_iterate_cb(tree_t *tree, node_t *node, void *data) {
 	unsigned *i = (unsigned *)data;
 	node_t *search_node;
 
@@ -140,8 +143,7 @@ tree_iterate_cb(tree_t *tree, node_t *node, void *data)
 }
 
 static unsigned
-tree_iterate(tree_t *tree)
-{
+tree_iterate(tree_t *tree) {
 	unsigned i;
 
 	i = 0;
@@ -151,8 +153,7 @@ tree_iterate(tree_t *tree)
 }
 
 static unsigned
-tree_iterate_reverse(tree_t *tree)
-{
+tree_iterate_reverse(tree_t *tree) {
 	unsigned i;
 
 	i = 0;
@@ -162,8 +163,7 @@ tree_iterate_reverse(tree_t *tree)
 }
 
 static void
-node_remove(tree_t *tree, node_t *node, unsigned nnodes)
-{
+node_remove(tree_t *tree, node_t *node, unsigned nnodes) {
 	node_t *search_node;
 	unsigned black_height, imbalances;
 
@@ -195,8 +195,7 @@ node_remove(tree_t *tree, node_t *node, unsigned nnodes)
 }
 
 static node_t *
-remove_iterate_cb(tree_t *tree, node_t *node, void *data)
-{
+remove_iterate_cb(tree_t *tree, node_t *node, void *data) {
 	unsigned *nnodes = (unsigned *)data;
 	node_t *ret = tree_next(tree, node);
 
@@ -206,8 +205,7 @@ remove_iterate_cb(tree_t *tree, node_t *node, void *data)
 }
 
 static node_t *
-remove_reverse_iterate_cb(tree_t *tree, node_t *node, void *data)
-{
+remove_reverse_iterate_cb(tree_t *tree, node_t *node, void *data) {
 	unsigned *nnodes = (unsigned *)data;
 	node_t *ret = tree_prev(tree, node);
 
@@ -217,16 +215,14 @@ remove_reverse_iterate_cb(tree_t *tree, node_t *node, void *data)
 }
 
 static void
-destroy_cb(node_t *node, void *data)
-{
+destroy_cb(node_t *node, void *data) {
 	unsigned *nnodes = (unsigned *)data;
 
 	assert_u_gt(*nnodes, 0, "Destruction removed too many nodes");
 	(*nnodes)--;
 }
 
-TEST_BEGIN(test_rb_random)
-{
+TEST_BEGIN(test_rb_random) {
 #define	NNODES 25
 #define	NBAGS 250
 #define	SEED 42
@@ -241,17 +237,20 @@ TEST_BEGIN(test_rb_random)
 		switch (i) {
 		case 0:
 			/* Insert in order. */
-			for (j = 0; j < NNODES; j++)
+			for (j = 0; j < NNODES; j++) {
 				bag[j] = j;
+			}
 			break;
 		case 1:
 			/* Insert in reverse order. */
-			for (j = 0; j < NNODES; j++)
+			for (j = 0; j < NNODES; j++) {
 				bag[j] = NNODES - j - 1;
+			}
 			break;
 		default:
-			for (j = 0; j < NNODES; j++)
+			for (j = 0; j < NNODES; j++) {
 				bag[j] = gen_rand64_range(sfmt, NNODES);
+			}
 		}
 
 		for (j = 1; j <= NNODES; j++) {
@@ -292,12 +291,14 @@ TEST_BEGIN(test_rb_random)
 			/* Remove nodes. */
 			switch (i % 5) {
 			case 0:
-				for (k = 0; k < j; k++)
+				for (k = 0; k < j; k++) {
 					node_remove(&tree, &nodes[k], j - k);
+				}
 				break;
 			case 1:
-				for (k = j; k > 0; k--)
+				for (k = j; k > 0; k--) {
 					node_remove(&tree, &nodes[k-1], k);
+				}
 				break;
 			case 2: {
 				node_t *start;
@@ -345,8 +346,7 @@ TEST_BEGIN(test_rb_random)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_rb_empty,
 	    test_rb_random));
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index d2f37055..ca99f8a8 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -6,12 +6,12 @@ rtree_node_dalloc_t *rtree_node_dalloc_orig;
 rtree_t *test_rtree;
 
 static rtree_elm_t *
-rtree_node_alloc_intercept(tsdn_t *tsdn, rtree_t *rtree, size_t nelms)
-{
+rtree_node_alloc_intercept(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
 	rtree_elm_t *node;
 
-	if (rtree != test_rtree)
+	if (rtree != test_rtree) {
 		return rtree_node_alloc_orig(tsdn, rtree, nelms);
+	}
 
 	malloc_mutex_unlock(tsdn, &rtree->init_lock);
 	node = (rtree_elm_t *)calloc(nelms, sizeof(rtree_elm_t));
@@ -22,8 +22,7 @@ rtree_node_alloc_intercept(tsdn_t *tsdn, rtree_t *rtree, size_t nelms)
 }
 
 static void
-rtree_node_dalloc_intercept(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *node)
-{
+rtree_node_dalloc_intercept(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *node) {
 	if (rtree != test_rtree) {
 		rtree_node_dalloc_orig(tsdn, rtree, node);
 		return;
@@ -32,8 +31,7 @@ rtree_node_dalloc_intercept(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *node)
 	free(node);
 }
 
-TEST_BEGIN(test_rtree_read_empty)
-{
+TEST_BEGIN(test_rtree_read_empty) {
 	tsdn_t *tsdn;
 	unsigned i;
 
@@ -65,8 +63,7 @@ typedef struct {
 } thd_start_arg_t;
 
 static void *
-thd_start(void *varg)
-{
+thd_start(void *varg) {
 	thd_start_arg_t *arg = (thd_start_arg_t *)varg;
 	rtree_ctx_t rtree_ctx = RTREE_CTX_INITIALIZER;
 	sfmt_t *sfmt;
@@ -98,8 +95,9 @@ thd_start(void *varg)
 			    "Unexpected rtree_elm_acquire() failure");
 			rtree_elm_read_acquired(tsdn, &arg->rtree, elm);
 			rtree_elm_release(tsdn, &arg->rtree, elm);
-		} else
+		} else {
 			rtree_read(tsdn, &arg->rtree, &rtree_ctx, key, false);
+		}
 	}
 
 	free(extent);
@@ -107,8 +105,7 @@ thd_start(void *varg)
 	return (NULL);
 }
 
-TEST_BEGIN(test_rtree_concurrent)
-{
+TEST_BEGIN(test_rtree_concurrent) {
 	thd_start_arg_t arg;
 	thd_t thds[NTHREADS];
 	sfmt_t *sfmt;
@@ -123,10 +120,12 @@ TEST_BEGIN(test_rtree_concurrent)
 		assert_false(rtree_new(&arg.rtree, arg.nbits),
 		    "Unexpected rtree_new() failure");
 		arg.seed = gen_rand32(sfmt);
-		for (j = 0; j < NTHREADS; j++)
+		for (j = 0; j < NTHREADS; j++) {
 			thd_create(&thds[j], thd_start, (void *)&arg);
-		for (j = 0; j < NTHREADS; j++)
+		}
+		for (j = 0; j < NTHREADS; j++) {
 			thd_join(thds[j], NULL);
+		}
 		rtree_delete(tsdn, &arg.rtree);
 		test_rtree = NULL;
 	}
@@ -139,8 +138,7 @@ TEST_END
 #undef NITERS
 #undef SEED
 
-TEST_BEGIN(test_rtree_extrema)
-{
+TEST_BEGIN(test_rtree_extrema) {
 	unsigned i;
 	extent_t extent_a, extent_b;
 	tsdn_t *tsdn;
@@ -173,8 +171,7 @@ TEST_BEGIN(test_rtree_extrema)
 }
 TEST_END
 
-TEST_BEGIN(test_rtree_bits)
-{
+TEST_BEGIN(test_rtree_bits) {
 	tsdn_t *tsdn;
 	unsigned i, j, k;
 
@@ -217,8 +214,7 @@ TEST_BEGIN(test_rtree_bits)
 }
 TEST_END
 
-TEST_BEGIN(test_rtree_random)
-{
+TEST_BEGIN(test_rtree_random) {
 	unsigned i;
 	sfmt_t *sfmt;
 	tsdn_t *tsdn;
@@ -280,8 +276,7 @@ TEST_BEGIN(test_rtree_random)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	rtree_node_alloc_orig = rtree_node_alloc;
 	rtree_node_alloc = rtree_node_alloc_intercept;
 	rtree_node_dalloc_orig = rtree_node_dalloc;
diff --git a/test/unit/size_classes.c b/test/unit/size_classes.c
index f7c14bc0..38ea9bee 100644
--- a/test/unit/size_classes.c
+++ b/test/unit/size_classes.c
@@ -1,8 +1,7 @@
 #include "test/jemalloc_test.h"
 
 static size_t
-get_max_size_class(void)
-{
+get_max_size_class(void) {
 	unsigned nlextents;
 	size_t mib[4];
 	size_t sz, miblen, max_size_class;
@@ -23,8 +22,7 @@ get_max_size_class(void)
 	return (max_size_class);
 }
 
-TEST_BEGIN(test_size_classes)
-{
+TEST_BEGIN(test_size_classes) {
 	size_t size_class, max_size_class;
 	szind_t index, max_index;
 
@@ -80,8 +78,7 @@ TEST_BEGIN(test_size_classes)
 }
 TEST_END
 
-TEST_BEGIN(test_psize_classes)
-{
+TEST_BEGIN(test_psize_classes) {
 	size_t size_class, max_psz;
 	pszind_t pind, max_pind;
 
@@ -136,8 +133,7 @@ TEST_BEGIN(test_psize_classes)
 }
 TEST_END
 
-TEST_BEGIN(test_overflow)
-{
+TEST_BEGIN(test_overflow) {
 	size_t max_size_class, max_psz;
 
 	max_size_class = get_max_size_class();
@@ -176,8 +172,7 @@ TEST_BEGIN(test_overflow)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_size_classes,
 	    test_psize_classes,
diff --git a/test/unit/slab.c b/test/unit/slab.c
index 7e6a62f5..a5036f59 100644
--- a/test/unit/slab.c
+++ b/test/unit/slab.c
@@ -1,7 +1,6 @@
 #include "test/jemalloc_test.h"
 
-TEST_BEGIN(test_arena_slab_regind)
-{
+TEST_BEGIN(test_arena_slab_regind) {
 	szind_t binind;
 
 	for (binind = 0; binind < NBINS; binind++) {
@@ -27,8 +26,7 @@ TEST_BEGIN(test_arena_slab_regind)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_arena_slab_regind));
 }
diff --git a/test/unit/smoothstep.c b/test/unit/smoothstep.c
index 071aede2..ac279159 100644
--- a/test/unit/smoothstep.c
+++ b/test/unit/smoothstep.c
@@ -7,8 +7,7 @@ static const uint64_t smoothstep_tab[] = {
 #undef STEP
 };
 
-TEST_BEGIN(test_smoothstep_integral)
-{
+TEST_BEGIN(test_smoothstep_integral) {
 	uint64_t sum, min, max;
 	unsigned i;
 
@@ -20,8 +19,9 @@ TEST_BEGIN(test_smoothstep_integral)
 	 * integral may be off by as much as SMOOTHSTEP_NSTEPS ulps.
 	 */
 	sum = 0;
-	for (i = 0; i < SMOOTHSTEP_NSTEPS; i++)
+	for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
 		sum += smoothstep_tab[i];
+	}
 
 	max = (KQU(1) << (SMOOTHSTEP_BFP-1)) * (SMOOTHSTEP_NSTEPS+1);
 	min = max - SMOOTHSTEP_NSTEPS;
@@ -36,8 +36,7 @@ TEST_BEGIN(test_smoothstep_integral)
 }
 TEST_END
 
-TEST_BEGIN(test_smoothstep_monotonic)
-{
+TEST_BEGIN(test_smoothstep_monotonic) {
 	uint64_t prev_h;
 	unsigned i;
 
@@ -58,8 +57,7 @@ TEST_BEGIN(test_smoothstep_monotonic)
 }
 TEST_END
 
-TEST_BEGIN(test_smoothstep_slope)
-{
+TEST_BEGIN(test_smoothstep_slope) {
 	uint64_t prev_h, prev_delta;
 	unsigned i;
 
@@ -96,8 +94,7 @@ TEST_BEGIN(test_smoothstep_slope)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_smoothstep_integral,
 	    test_smoothstep_monotonic,
diff --git a/test/unit/stats.c b/test/unit/stats.c
index 18856f12..98673a8e 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -1,7 +1,6 @@
 #include "test/jemalloc_test.h"
 
-TEST_BEGIN(test_stats_summary)
-{
+TEST_BEGIN(test_stats_summary) {
 	size_t sz, allocated, active, resident, mapped;
 	int expected = config_stats ? 0 : ENOENT;
 
@@ -26,8 +25,7 @@ TEST_BEGIN(test_stats_summary)
 }
 TEST_END
 
-TEST_BEGIN(test_stats_large)
-{
+TEST_BEGIN(test_stats_large) {
 	void *p;
 	uint64_t epoch;
 	size_t allocated;
@@ -67,8 +65,7 @@ TEST_BEGIN(test_stats_large)
 }
 TEST_END
 
-TEST_BEGIN(test_stats_arenas_summary)
-{
+TEST_BEGIN(test_stats_arenas_summary) {
 	unsigned arena;
 	void *little, *large;
 	uint64_t epoch;
@@ -118,22 +115,19 @@ TEST_BEGIN(test_stats_arenas_summary)
 TEST_END
 
 void *
-thd_start(void *arg)
-{
+thd_start(void *arg) {
 	return (NULL);
 }
 
 static void
-no_lazy_lock(void)
-{
+no_lazy_lock(void) {
 	thd_t thd;
 
 	thd_create(&thd, thd_start, NULL);
 	thd_join(thd, NULL);
 }
 
-TEST_BEGIN(test_stats_arenas_small)
-{
+TEST_BEGIN(test_stats_arenas_small) {
 	unsigned arena;
 	void *p;
 	size_t sz, allocated;
@@ -183,8 +177,7 @@ TEST_BEGIN(test_stats_arenas_small)
 }
 TEST_END
 
-TEST_BEGIN(test_stats_arenas_large)
-{
+TEST_BEGIN(test_stats_arenas_large) {
 	unsigned arena;
 	void *p;
 	size_t sz, allocated;
@@ -224,8 +217,7 @@ TEST_BEGIN(test_stats_arenas_large)
 }
 TEST_END
 
-TEST_BEGIN(test_stats_arenas_bins)
-{
+TEST_BEGIN(test_stats_arenas_bins) {
 	unsigned arena;
 	void *p;
 	size_t sz, curslabs, curregs;
@@ -299,8 +291,7 @@ TEST_BEGIN(test_stats_arenas_bins)
 }
 TEST_END
 
-TEST_BEGIN(test_stats_arenas_lextents)
-{
+TEST_BEGIN(test_stats_arenas_lextents) {
 	unsigned arena;
 	void *p;
 	uint64_t epoch, nmalloc, ndalloc;
@@ -347,8 +338,7 @@ TEST_BEGIN(test_stats_arenas_lextents)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_stats_summary,
 	    test_stats_large,
diff --git a/test/unit/stats_print.c b/test/unit/stats_print.c
index 5accd8e2..1fb8fe6f 100644
--- a/test/unit/stats_print.c
+++ b/test/unit/stats_print.c
@@ -39,8 +39,7 @@ struct parser_s {
 
 static void
 token_init(token_t *token, parser_t *parser, token_type_t token_type,
-    size_t pos, size_t len, size_t line, size_t col)
-{
+    size_t pos, size_t len, size_t line, size_t col) {
 	token->parser = parser;
 	token->token_type = token_type;
 	token->pos = pos;
@@ -50,8 +49,7 @@ token_init(token_t *token, parser_t *parser, token_type_t token_type,
 }
 
 static void
-token_error(token_t *token)
-{
+token_error(token_t *token) {
 	if (!token->parser->verbose) {
 		return;
 	}
@@ -72,8 +70,7 @@ token_error(token_t *token)
 }
 
 static void
-parser_init(parser_t *parser, bool verbose)
-{
+parser_init(parser_t *parser, bool verbose) {
 	parser->verbose = verbose;
 	parser->buf = NULL;
 	parser->len = 0;
@@ -83,16 +80,14 @@ parser_init(parser_t *parser, bool verbose)
 }
 
 static void
-parser_fini(parser_t *parser)
-{
+parser_fini(parser_t *parser) {
 	if (parser->buf != NULL) {
 		dallocx(parser->buf, MALLOCX_TCACHE_NONE);
 	}
 }
 
 static bool
-parser_append(parser_t *parser, const char *str)
-{
+parser_append(parser_t *parser, const char *str) {
 	size_t len = strlen(str);
 	char *buf = (parser->buf == NULL) ? mallocx(len + 1,
 	    MALLOCX_TCACHE_NONE) : rallocx(parser->buf, parser->len + len + 1,
@@ -107,8 +102,7 @@ parser_append(parser_t *parser, const char *str)
 }
 
 static bool
-parser_tokenize(parser_t *parser)
-{
+parser_tokenize(parser_t *parser) {
 	enum {
 		STATE_START,
 		STATE_EOI,
@@ -667,8 +661,7 @@ static bool	parser_parse_array(parser_t *parser);
 static bool	parser_parse_object(parser_t *parser);
 
 static bool
-parser_parse_value(parser_t *parser)
-{
+parser_parse_value(parser_t *parser) {
 	switch (parser->token.token_type) {
 	case TOKEN_TYPE_NULL:
 	case TOKEN_TYPE_FALSE:
@@ -687,8 +680,7 @@ parser_parse_value(parser_t *parser)
 }
 
 static bool
-parser_parse_pair(parser_t *parser)
-{
+parser_parse_pair(parser_t *parser) {
 	assert_d_eq(parser->token.token_type, TOKEN_TYPE_STRING,
 	    "Pair should start with string");
 	if (parser_tokenize(parser)) {
@@ -706,8 +698,7 @@ parser_parse_pair(parser_t *parser)
 }
 
 static bool
-parser_parse_values(parser_t *parser)
-{
+parser_parse_values(parser_t *parser) {
 	if (parser_parse_value(parser)) {
 		return true;
 	}
@@ -734,8 +725,7 @@ parser_parse_values(parser_t *parser)
 }
 
 static bool
-parser_parse_array(parser_t *parser)
-{
+parser_parse_array(parser_t *parser) {
 	assert_d_eq(parser->token.token_type, TOKEN_TYPE_LBRACKET,
 	    "Array should start with [");
 	if (parser_tokenize(parser)) {
@@ -751,8 +741,7 @@ parser_parse_array(parser_t *parser)
 }
 
 static bool
-parser_parse_pairs(parser_t *parser)
-{
+parser_parse_pairs(parser_t *parser) {
 	assert_d_eq(parser->token.token_type, TOKEN_TYPE_STRING,
 	    "Object should start with string");
 	if (parser_parse_pair(parser)) {
@@ -787,8 +776,7 @@ parser_parse_pairs(parser_t *parser)
 }
 
 static bool
-parser_parse_object(parser_t *parser)
-{
+parser_parse_object(parser_t *parser) {
 	assert_d_eq(parser->token.token_type, TOKEN_TYPE_LBRACE,
 	    "Object should start with {");
 	if (parser_tokenize(parser)) {
@@ -806,8 +794,7 @@ parser_parse_object(parser_t *parser)
 }
 
 static bool
-parser_parse(parser_t *parser)
-{
+parser_parse(parser_t *parser) {
 	if (parser_tokenize(parser)) {
 		goto label_error;
 	}
@@ -831,8 +818,7 @@ label_error:
 	return true;
 }
 
-TEST_BEGIN(test_json_parser)
-{
+TEST_BEGIN(test_json_parser) {
 	size_t i;
 	const char *invalid_inputs[] = {
 		/* Tokenizer error case tests. */
@@ -929,16 +915,14 @@ TEST_BEGIN(test_json_parser)
 TEST_END
 
 void
-write_cb(void *opaque, const char *str)
-{
+write_cb(void *opaque, const char *str) {
 	parser_t *parser = (parser_t *)opaque;
 	if (parser_append(parser, str)) {
 		test_fail("Unexpected input appending failure");
 	}
 }
 
-TEST_BEGIN(test_stats_print_json)
-{
+TEST_BEGIN(test_stats_print_json) {
 	const char *opts[] = {
 		"J",
 		"Jg",
@@ -998,8 +982,7 @@ TEST_BEGIN(test_stats_print_json)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_json_parser,
 	    test_stats_print_json));
diff --git a/test/unit/ticker.c b/test/unit/ticker.c
index b8af46c7..be54356f 100644
--- a/test/unit/ticker.c
+++ b/test/unit/ticker.c
@@ -1,7 +1,6 @@
 #include "test/jemalloc_test.h"
 
-TEST_BEGIN(test_ticker_tick)
-{
+TEST_BEGIN(test_ticker_tick) {
 #define	NREPS 2
 #define	NTICKS 3
 	ticker_t ticker;
@@ -26,8 +25,7 @@ TEST_BEGIN(test_ticker_tick)
 }
 TEST_END
 
-TEST_BEGIN(test_ticker_ticks)
-{
+TEST_BEGIN(test_ticker_ticks) {
 #define	NTICKS 3
 	ticker_t ticker;
 
@@ -45,8 +43,7 @@ TEST_BEGIN(test_ticker_ticks)
 }
 TEST_END
 
-TEST_BEGIN(test_ticker_copy)
-{
+TEST_BEGIN(test_ticker_copy) {
 #define	NTICKS 3
 	ticker_t ta, tb;
 
@@ -66,8 +63,7 @@ TEST_BEGIN(test_ticker_copy)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_ticker_tick,
 	    test_ticker_ticks,
diff --git a/test/unit/tsd.c b/test/unit/tsd.c
index 5313ef88..484dc30b 100644
--- a/test/unit/tsd.c
+++ b/test/unit/tsd.c
@@ -10,8 +10,7 @@ malloc_tsd_types(data_, data_t)
 malloc_tsd_protos(, data_, data_t)
 
 void
-data_cleanup(void *arg)
-{
+data_cleanup(void *arg) {
 	data_t *data = (data_t *)arg;
 
 	if (!data_cleanup_executed) {
@@ -53,8 +52,7 @@ malloc_tsd_data(, data_, data_t, DATA_INIT)
 malloc_tsd_funcs(, data_, data_t, DATA_INIT, data_cleanup)
 
 static void *
-thd_start(void *arg)
-{
+thd_start(void *arg) {
 	data_t d = (data_t)(uintptr_t)arg;
 	void *p;
 
@@ -76,14 +74,12 @@ thd_start(void *arg)
 	return (NULL);
 }
 
-TEST_BEGIN(test_tsd_main_thread)
-{
+TEST_BEGIN(test_tsd_main_thread) {
 	thd_start((void *)(uintptr_t)0xa5f3e329);
 }
 TEST_END
 
-TEST_BEGIN(test_tsd_sub_thread)
-{
+TEST_BEGIN(test_tsd_sub_thread) {
 	thd_t thd;
 
 	data_cleanup_executed = false;
@@ -95,8 +91,7 @@ TEST_BEGIN(test_tsd_sub_thread)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	/* Core tsd bootstrapping must happen prior to data_tsd_boot(). */
 	if (nallocx(1, 0) == 0) {
 		malloc_printf("Initialization error");
diff --git a/test/unit/util.c b/test/unit/util.c
index b891a199..3d1ecf4e 100644
--- a/test/unit/util.c
+++ b/test/unit/util.c
@@ -31,26 +31,22 @@
 	}								\
 } while (0)
 
-TEST_BEGIN(test_pow2_ceil_u64)
-{
+TEST_BEGIN(test_pow2_ceil_u64) {
 	TEST_POW2_CEIL(uint64_t, u64, FMTu64);
 }
 TEST_END
 
-TEST_BEGIN(test_pow2_ceil_u32)
-{
+TEST_BEGIN(test_pow2_ceil_u32) {
 	TEST_POW2_CEIL(uint32_t, u32, FMTu32);
 }
 TEST_END
 
-TEST_BEGIN(test_pow2_ceil_zu)
-{
+TEST_BEGIN(test_pow2_ceil_zu) {
 	TEST_POW2_CEIL(size_t, zu, "zu");
 }
 TEST_END
 
-TEST_BEGIN(test_malloc_strtoumax_no_endptr)
-{
+TEST_BEGIN(test_malloc_strtoumax_no_endptr) {
 	int err;
 
 	set_errno(0);
@@ -60,8 +56,7 @@ TEST_BEGIN(test_malloc_strtoumax_no_endptr)
 }
 TEST_END
 
-TEST_BEGIN(test_malloc_strtoumax)
-{
+TEST_BEGIN(test_malloc_strtoumax) {
 	struct test_s {
 		const char *input;
 		const char *expected_remainder;
@@ -155,8 +150,7 @@ TEST_BEGIN(test_malloc_strtoumax)
 }
 TEST_END
 
-TEST_BEGIN(test_malloc_snprintf_truncated)
-{
+TEST_BEGIN(test_malloc_snprintf_truncated) {
 #define	BUFLEN	15
 	char buf[BUFLEN];
 	size_t result;
@@ -188,8 +182,7 @@ TEST_BEGIN(test_malloc_snprintf_truncated)
 }
 TEST_END
 
-TEST_BEGIN(test_malloc_snprintf)
-{
+TEST_BEGIN(test_malloc_snprintf) {
 #define	BUFLEN	128
 	char buf[BUFLEN];
 	size_t result;
@@ -302,8 +295,7 @@ TEST_BEGIN(test_malloc_snprintf)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_pow2_ceil_u64,
 	    test_pow2_ceil_u32,
diff --git a/test/unit/witness.c b/test/unit/witness.c
index 13593989..d75ca482 100644
--- a/test/unit/witness.c
+++ b/test/unit/witness.c
@@ -12,32 +12,27 @@ static bool saw_lockless_error;
 
 static void
 witness_lock_error_intercept(const witness_list_t *witnesses,
-    const witness_t *witness)
-{
+    const witness_t *witness) {
 	saw_lock_error = true;
 }
 
 static void
-witness_owner_error_intercept(const witness_t *witness)
-{
+witness_owner_error_intercept(const witness_t *witness) {
 	saw_owner_error = true;
 }
 
 static void
-witness_not_owner_error_intercept(const witness_t *witness)
-{
+witness_not_owner_error_intercept(const witness_t *witness) {
 	saw_not_owner_error = true;
 }
 
 static void
-witness_lockless_error_intercept(const witness_list_t *witnesses)
-{
+witness_lockless_error_intercept(const witness_list_t *witnesses) {
 	saw_lockless_error = true;
 }
 
 static int
-witness_comp(const witness_t *a, void *oa, const witness_t *b, void *ob)
-{
+witness_comp(const witness_t *a, void *oa, const witness_t *b, void *ob) {
 	assert_u_eq(a->rank, b->rank, "Witnesses should have equal rank");
 
 	assert(oa == (void *)a);
@@ -47,8 +42,8 @@ witness_comp(const witness_t *a, void *oa, const witness_t *b, void *ob)
 }
 
 static int
-witness_comp_reverse(const witness_t *a, void *oa, const witness_t *b, void *ob)
-{
+witness_comp_reverse(const witness_t *a, void *oa, const witness_t *b,
+    void *ob) {
 	assert_u_eq(a->rank, b->rank, "Witnesses should have equal rank");
 
 	assert(oa == (void *)a);
@@ -57,8 +52,7 @@ witness_comp_reverse(const witness_t *a, void *oa, const witness_t *b, void *ob)
 	return (-strcmp(a->name, b->name));
 }
 
-TEST_BEGIN(test_witness)
-{
+TEST_BEGIN(test_witness) {
 	witness_t a, b;
 	tsdn_t *tsdn;
 
@@ -85,8 +79,7 @@ TEST_BEGIN(test_witness)
 }
 TEST_END
 
-TEST_BEGIN(test_witness_comp)
-{
+TEST_BEGIN(test_witness_comp) {
 	witness_t a, b, c, d;
 	tsdn_t *tsdn;
 
@@ -135,8 +128,7 @@ TEST_BEGIN(test_witness_comp)
 }
 TEST_END
 
-TEST_BEGIN(test_witness_reversal)
-{
+TEST_BEGIN(test_witness_reversal) {
 	witness_t a, b;
 	tsdn_t *tsdn;
 
@@ -167,8 +159,7 @@ TEST_BEGIN(test_witness_reversal)
 }
 TEST_END
 
-TEST_BEGIN(test_witness_recursive)
-{
+TEST_BEGIN(test_witness_recursive) {
 	witness_t a;
 	tsdn_t *tsdn;
 
@@ -205,8 +196,7 @@ TEST_BEGIN(test_witness_recursive)
 }
 TEST_END
 
-TEST_BEGIN(test_witness_unlock_not_owned)
-{
+TEST_BEGIN(test_witness_unlock_not_owned) {
 	witness_t a;
 	tsdn_t *tsdn;
 
@@ -232,8 +222,7 @@ TEST_BEGIN(test_witness_unlock_not_owned)
 }
 TEST_END
 
-TEST_BEGIN(test_witness_lockful)
-{
+TEST_BEGIN(test_witness_lockful) {
 	witness_t a;
 	tsdn_t *tsdn;
 
@@ -265,8 +254,7 @@ TEST_BEGIN(test_witness_lockful)
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_witness,
 	    test_witness_comp,
diff --git a/test/unit/zero.c b/test/unit/zero.c
index c752954c..a802f053 100644
--- a/test/unit/zero.c
+++ b/test/unit/zero.c
@@ -6,8 +6,7 @@ const char *malloc_conf =
 #endif
 
 static void
-test_zero(size_t sz_min, size_t sz_max)
-{
+test_zero(size_t sz_min, size_t sz_max) {
 	uint8_t *s;
 	size_t sz_prev, sz, i;
 #define	MAGIC	((uint8_t)0x61)
@@ -45,23 +44,20 @@ test_zero(size_t sz_min, size_t sz_max)
 #undef MAGIC
 }
 
-TEST_BEGIN(test_zero_small)
-{
+TEST_BEGIN(test_zero_small) {
 	test_skip_if(!config_fill);
 	test_zero(1, SMALL_MAXCLASS-1);
 }
 TEST_END
 
-TEST_BEGIN(test_zero_large)
-{
+TEST_BEGIN(test_zero_large) {
 	test_skip_if(!config_fill);
 	test_zero(SMALL_MAXCLASS+1, (1U << (LG_LARGE_MINCLASS+1)));
 }
 TEST_END
 
 int
-main(void)
-{
+main(void) {
 	return (test(
 	    test_zero_small,
 	    test_zero_large));

From f408643a4c90d51ab8ddc1d68610650d5db87edf Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 19 Jan 2017 18:15:45 -0800
Subject: [PATCH 0603/2608] Remove extraneous parens around return arguments.

This resolves #540.
---
 configure.ac                                  |   4 +-
 include/jemalloc/internal/arena_inlines_a.h   |  16 +-
 include/jemalloc/internal/arena_inlines_b.h   |  20 +-
 include/jemalloc/internal/atomic_inlines.h    | 120 +++++-----
 include/jemalloc/internal/base_inlines.h      |   2 +-
 include/jemalloc/internal/bitmap_inlines.h    |   8 +-
 include/jemalloc/internal/extent_inlines.h    |  50 ++---
 include/jemalloc/internal/hash_inlines.h      |  14 +-
 .../jemalloc/internal/jemalloc_internal.h.in  | 124 +++++------
 include/jemalloc/internal/ph.h                |   8 +-
 include/jemalloc/internal/prng_inlines.h      |  20 +-
 include/jemalloc/internal/prof_inlines.h      |  16 +-
 include/jemalloc/internal/rb.h                |  66 +++---
 include/jemalloc/internal/rtree_inlines.h     |  38 ++--
 include/jemalloc/internal/tcache_inlines.h    |  26 +--
 include/jemalloc/internal/ticker_inlines.h    |   6 +-
 include/jemalloc/internal/tsd_inlines.h       |  24 +-
 include/jemalloc/internal/tsd_types.h         |  68 +++---
 include/jemalloc/internal/util_inlines.h      |  44 ++--
 include/jemalloc/internal/witness_inlines.h   |   4 +-
 include/msvc_compat/strings.h                 |  16 +-
 src/arena.c                                   | 124 +++++------
 src/base.c                                    |  28 +--
 src/bitmap.c                                  |   4 +-
 src/ckh.c                                     |  50 ++---
 src/ctl.c                                     | 144 ++++++------
 src/extent.c                                  | 161 +++++++-------
 src/extent_dss.c                              |  26 +--
 src/extent_mmap.c                             |  14 +-
 src/jemalloc.c                                | 164 +++++++-------
 src/jemalloc_cpp.cpp                          |  12 +-
 src/large.c                                   |  46 ++--
 src/mutex.c                                   |  16 +-
 src/nstime.c                                  |  16 +-
 src/pages.c                                   |  36 +--
 src/prof.c                                    | 210 +++++++++---------
 src/rtree.c                                   |  22 +-
 src/tcache.c                                  |  28 +--
 src/tsd.c                                     |  12 +-
 src/util.c                                    |  20 +-
 src/zone.c                                    |  20 +-
 test/include/test/btalloc.h                   |   2 +-
 test/include/test/extent_hooks.h              |  32 +--
 test/include/test/math.h                      |  24 +-
 test/include/test/mq.h                        |  12 +-
 test/integration/MALLOCX_ARENA.c              |   6 +-
 test/integration/aligned_alloc.c              |   4 +-
 test/integration/allocated.c                  |   8 +-
 test/integration/cpp/basic.cpp                |   4 +-
 test/integration/extent.c                     |   4 +-
 test/integration/mallocx.c                    |  12 +-
 test/integration/overflow.c                   |   4 +-
 test/integration/posix_memalign.c             |   4 +-
 test/integration/rallocx.c                    |  14 +-
 test/integration/sdallocx.c                   |   4 +-
 test/integration/thread_arena.c               |   6 +-
 test/integration/thread_tcache_enabled.c      |   8 +-
 test/integration/xallocx.c                    |  20 +-
 test/src/btalloc.c                            |   2 +-
 test/src/mtx.c                                |   8 +-
 test/src/test.c                               |   8 +-
 test/src/timer.c                              |   2 +-
 test/stress/microbench.c                      |   4 +-
 test/unit/SFMT.c                              |   4 +-
 test/unit/a0.c                                |   4 +-
 test/unit/arena_reset.c                       |  30 +--
 test/unit/atomic.c                            |   4 +-
 test/unit/base.c                              |   4 +-
 test/unit/bitmap.c                            |   6 +-
 test/unit/ckh.c                               |   4 +-
 test/unit/decay.c                             |   8 +-
 test/unit/extent_quantize.c                   |   4 +-
 test/unit/fork.c                              |   4 +-
 test/unit/hash.c                              |  16 +-
 test/unit/junk.c                              |   4 +-
 test/unit/mallctl.c                           |   4 +-
 test/unit/math.c                              |   8 +-
 test/unit/mq.c                                |   8 +-
 test/unit/mtx.c                               |   6 +-
 test/unit/nstime.c                            |   4 +-
 test/unit/pack.c                              |  12 +-
 test/unit/pages.c                             |   4 +-
 test/unit/ph.c                                |  16 +-
 test/unit/prng.c                              |   4 +-
 test/unit/prof_accum.c                        |  10 +-
 test/unit/prof_active.c                       |   4 +-
 test/unit/prof_gdump.c                        |   6 +-
 test/unit/prof_idump.c                        |   6 +-
 test/unit/prof_reset.c                        |  12 +-
 test/unit/prof_thread_name.c                  |   6 +-
 test/unit/ql.c                                |   4 +-
 test/unit/qr.c                                |   4 +-
 test/unit/rb.c                                |  20 +-
 test/unit/rtree.c                             |   8 +-
 test/unit/size_classes.c                      |   6 +-
 test/unit/slab.c                              |   4 +-
 test/unit/smoothstep.c                        |   4 +-
 test/unit/stats.c                             |   6 +-
 test/unit/stats_print.c                       |   4 +-
 test/unit/ticker.c                            |   4 +-
 test/unit/tsd.c                               |   8 +-
 test/unit/util.c                              |   4 +-
 test/unit/witness.c                           |   8 +-
 test/unit/zero.c                              |   4 +-
 104 files changed, 1161 insertions(+), 1168 deletions(-)

diff --git a/configure.ac b/configure.ac
index 4a1168b0..7530eff7 100644
--- a/configure.ac
+++ b/configure.ac
@@ -295,7 +295,7 @@ if test "x$enable_cxx" = "x1" ; then
 ], [[
 	int *arr = (int *)malloc(sizeof(int) * 42);
 	if (arr == NULL)
-		return (1);
+		return 1;
 ]], [je_cv_libstdcxx])
     if test "x${je_cv_libstdcxx}" = "xno" ; then
       LIBS="${SAVED_LIBS}"
@@ -1659,7 +1659,7 @@ JE_COMPILABLE([C11 atomics], [
     uint64_t x = 1;
     volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
     uint64_t r = atomic_fetch_add(a, x) + x;
-    return (r == 0);
+    return r == 0;
 ], [je_cv_c11atomics])
 if test "x${je_cv_c11atomics}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_C11ATOMICS])
diff --git a/include/jemalloc/internal/arena_inlines_a.h b/include/jemalloc/internal/arena_inlines_a.h
index 3c2b9b0a..a81aaf56 100644
--- a/include/jemalloc/internal/arena_inlines_a.h
+++ b/include/jemalloc/internal/arena_inlines_a.h
@@ -15,7 +15,7 @@ bool	arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes);
 
 JEMALLOC_INLINE unsigned
 arena_ind_get(const arena_t *arena) {
-	return (base_ind_get(arena->base));
+	return base_ind_get(arena->base);
 }
 
 JEMALLOC_INLINE void
@@ -30,7 +30,7 @@ arena_internal_sub(arena_t *arena, size_t size) {
 
 JEMALLOC_INLINE size_t
 arena_internal_get(arena_t *arena) {
-	return (atomic_read_zu(&arena->stats.internal));
+	return atomic_read_zu(&arena->stats.internal);
 }
 
 JEMALLOC_INLINE bool
@@ -41,9 +41,9 @@ arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes) {
 	arena->prof_accumbytes += accumbytes;
 	if (arena->prof_accumbytes >= prof_interval) {
 		arena->prof_accumbytes %= prof_interval;
-		return (true);
+		return true;
 	}
-	return (false);
+	return false;
 }
 
 JEMALLOC_INLINE bool
@@ -51,9 +51,9 @@ arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes) {
 	cassert(config_prof);
 
 	if (likely(prof_interval == 0)) {
-		return (false);
+		return false;
 	}
-	return (arena_prof_accum_impl(arena, accumbytes));
+	return arena_prof_accum_impl(arena, accumbytes);
 }
 
 JEMALLOC_INLINE bool
@@ -61,7 +61,7 @@ arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) {
 	cassert(config_prof);
 
 	if (likely(prof_interval == 0)) {
-		return (false);
+		return false;
 	}
 
 	{
@@ -70,7 +70,7 @@ arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) {
 		malloc_mutex_lock(tsdn, &arena->lock);
 		ret = arena_prof_accum_impl(arena, accumbytes);
 		malloc_mutex_unlock(tsdn, &arena->lock);
-		return (ret);
+		return ret;
 	}
 }
 
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 5772781d..a180322b 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -26,7 +26,7 @@ JEMALLOC_INLINE szind_t
 arena_bin_index(arena_t *arena, arena_bin_t *bin) {
 	szind_t binind = (szind_t)(bin - arena->bins);
 	assert(binind < NBINS);
-	return (binind);
+	return binind;
 }
 
 JEMALLOC_INLINE prof_tctx_t *
@@ -35,9 +35,9 @@ arena_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, const void *ptr) {
 	assert(ptr != NULL);
 
 	if (unlikely(!extent_slab_get(extent))) {
-		return (large_prof_tctx_get(tsdn, extent));
+		return large_prof_tctx_get(tsdn, extent);
 	}
-	return ((prof_tctx_t *)(uintptr_t)1U);
+	return (prof_tctx_t *)(uintptr_t)1U;
 }
 
 JEMALLOC_INLINE void
@@ -94,23 +94,23 @@ arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
 
 	if (likely(tcache != NULL)) {
 		if (likely(size <= SMALL_MAXCLASS)) {
-			return (tcache_alloc_small(tsdn_tsd(tsdn), arena,
-			    tcache, size, ind, zero, slow_path));
+			return tcache_alloc_small(tsdn_tsd(tsdn), arena,
+			    tcache, size, ind, zero, slow_path);
 		}
 		if (likely(size <= tcache_maxclass)) {
-			return (tcache_alloc_large(tsdn_tsd(tsdn), arena,
-			    tcache, size, ind, zero, slow_path));
+			return tcache_alloc_large(tsdn_tsd(tsdn), arena,
+			    tcache, size, ind, zero, slow_path);
 		}
 		/* (size > tcache_maxclass) case falls through. */
 		assert(size > tcache_maxclass);
 	}
 
-	return (arena_malloc_hard(tsdn, arena, size, ind, zero));
+	return arena_malloc_hard(tsdn, arena, size, ind, zero);
 }
 
 JEMALLOC_ALWAYS_INLINE arena_t *
 arena_aalloc(tsdn_t *tsdn, const void *ptr) {
-	return (extent_arena_get(iealloc(tsdn, ptr)));
+	return extent_arena_get(iealloc(tsdn, ptr));
 }
 
 /* Return the size of the allocation pointed to by ptr. */
@@ -126,7 +126,7 @@ arena_salloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr) {
 		ret = large_salloc(tsdn, extent);
 	}
 
-	return (ret);
+	return ret;
 }
 
 JEMALLOC_ALWAYS_INLINE void
diff --git a/include/jemalloc/internal/atomic_inlines.h b/include/jemalloc/internal/atomic_inlines.h
index 790a08a2..7c1902f8 100644
--- a/include/jemalloc/internal/atomic_inlines.h
+++ b/include/jemalloc/internal/atomic_inlines.h
@@ -9,15 +9,15 @@
  * operations can be optimized away if the return values aren't used by the
  * callers.
  *
- *   <t> atomic_read_<t>(<t> *p) { return (*p); }
- *   <t> atomic_add_<t>(<t> *p, <t> x) { return (*p += x); }
- *   <t> atomic_sub_<t>(<t> *p, <t> x) { return (*p -= x); }
+ *   <t> atomic_read_<t>(<t> *p) { return *p; }
+ *   <t> atomic_add_<t>(<t> *p, <t> x) { return *p += x; }
+ *   <t> atomic_sub_<t>(<t> *p, <t> x) { return *p -= x; }
  *   bool atomic_cas_<t>(<t> *p, <t> c, <t> s)
  *   {
  *     if (*p != c)
- *       return (true);
+ *       return true;
  *     *p = s;
- *     return (false);
+ *     return false;
  *   }
  *   void atomic_write_<t>(<t> *p, <t> x) { *p = x; }
  */
@@ -62,7 +62,7 @@ atomic_add_u64(uint64_t *p, uint64_t x) {
 	    : "m" (*p) /* Inputs. */
 	    );
 
-	return (t + x);
+	return t + x;
 }
 
 JEMALLOC_INLINE uint64_t
@@ -77,7 +77,7 @@ atomic_sub_u64(uint64_t *p, uint64_t x) {
 	    : "m" (*p) /* Inputs. */
 	    );
 
-	return (t + x);
+	return t + x;
 }
 
 JEMALLOC_INLINE bool
@@ -92,7 +92,7 @@ atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) {
 	    : "memory" /* Clobbers. */
 	    );
 
-	return (!(bool)success);
+	return !(bool)success;
 }
 
 JEMALLOC_INLINE void
@@ -108,19 +108,19 @@ atomic_write_u64(uint64_t *p, uint64_t x) {
 JEMALLOC_INLINE uint64_t
 atomic_add_u64(uint64_t *p, uint64_t x) {
 	volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
-	return (atomic_fetch_add(a, x) + x);
+	return atomic_fetch_add(a, x) + x;
 }
 
 JEMALLOC_INLINE uint64_t
 atomic_sub_u64(uint64_t *p, uint64_t x) {
 	volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
-	return (atomic_fetch_sub(a, x) - x);
+	return atomic_fetch_sub(a, x) - x;
 }
 
 JEMALLOC_INLINE bool
 atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) {
 	volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
-	return (!atomic_compare_exchange_strong(a, &c, s));
+	return !atomic_compare_exchange_strong(a, &c, s);
 }
 
 JEMALLOC_INLINE void
@@ -137,21 +137,21 @@ atomic_add_u64(uint64_t *p, uint64_t x) {
 	 */
 	assert(sizeof(uint64_t) == sizeof(unsigned long));
 
-	return (atomic_fetchadd_long(p, (unsigned long)x) + x);
+	return atomic_fetchadd_long(p, (unsigned long)x) + x;
 }
 
 JEMALLOC_INLINE uint64_t
 atomic_sub_u64(uint64_t *p, uint64_t x) {
 	assert(sizeof(uint64_t) == sizeof(unsigned long));
 
-	return (atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x);
+	return atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x;
 }
 
 JEMALLOC_INLINE bool
 atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) {
 	assert(sizeof(uint64_t) == sizeof(unsigned long));
 
-	return (!atomic_cmpset_long(p, (unsigned long)c, (unsigned long)s));
+	return !atomic_cmpset_long(p, (unsigned long)c, (unsigned long)s);
 }
 
 JEMALLOC_INLINE void
@@ -163,17 +163,17 @@ atomic_write_u64(uint64_t *p, uint64_t x) {
 #  elif (defined(JEMALLOC_OSATOMIC))
 JEMALLOC_INLINE uint64_t
 atomic_add_u64(uint64_t *p, uint64_t x) {
-	return (OSAtomicAdd64((int64_t)x, (int64_t *)p));
+	return OSAtomicAdd64((int64_t)x, (int64_t *)p);
 }
 
 JEMALLOC_INLINE uint64_t
 atomic_sub_u64(uint64_t *p, uint64_t x) {
-	return (OSAtomicAdd64(-((int64_t)x), (int64_t *)p));
+	return OSAtomicAdd64(-((int64_t)x), (int64_t *)p);
 }
 
 JEMALLOC_INLINE bool
 atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) {
-	return (!OSAtomicCompareAndSwap64(c, s, (int64_t *)p));
+	return !OSAtomicCompareAndSwap64(c, s, (int64_t *)p);
 }
 
 JEMALLOC_INLINE void
@@ -188,12 +188,12 @@ atomic_write_u64(uint64_t *p, uint64_t x) {
 #  elif (defined(_MSC_VER))
 JEMALLOC_INLINE uint64_t
 atomic_add_u64(uint64_t *p, uint64_t x) {
-	return (InterlockedExchangeAdd64(p, x) + x);
+	return InterlockedExchangeAdd64(p, x) + x;
 }
 
 JEMALLOC_INLINE uint64_t
 atomic_sub_u64(uint64_t *p, uint64_t x) {
-	return (InterlockedExchangeAdd64(p, -((int64_t)x)) - x);
+	return InterlockedExchangeAdd64(p, -((int64_t)x)) - x;
 }
 
 JEMALLOC_INLINE bool
@@ -201,7 +201,7 @@ atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) {
 	uint64_t o;
 
 	o = InterlockedCompareExchange64(p, s, c);
-	return (o != c);
+	return o != c;
 }
 
 JEMALLOC_INLINE void
@@ -212,17 +212,17 @@ atomic_write_u64(uint64_t *p, uint64_t x) {
     defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8))
 JEMALLOC_INLINE uint64_t
 atomic_add_u64(uint64_t *p, uint64_t x) {
-	return (__sync_add_and_fetch(p, x));
+	return __sync_add_and_fetch(p, x);
 }
 
 JEMALLOC_INLINE uint64_t
 atomic_sub_u64(uint64_t *p, uint64_t x) {
-	return (__sync_sub_and_fetch(p, x));
+	return __sync_sub_and_fetch(p, x);
 }
 
 JEMALLOC_INLINE bool
 atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) {
-	return (!__sync_bool_compare_and_swap(p, c, s));
+	return !__sync_bool_compare_and_swap(p, c, s);
 }
 
 JEMALLOC_INLINE void
@@ -247,7 +247,7 @@ atomic_add_u32(uint32_t *p, uint32_t x) {
 	    : "m" (*p) /* Inputs. */
 	    );
 
-	return (t + x);
+	return t + x;
 }
 
 JEMALLOC_INLINE uint32_t
@@ -262,7 +262,7 @@ atomic_sub_u32(uint32_t *p, uint32_t x) {
 	    : "m" (*p) /* Inputs. */
 	    );
 
-	return (t + x);
+	return t + x;
 }
 
 JEMALLOC_INLINE bool
@@ -277,7 +277,7 @@ atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) {
 	    : "memory"
 	    );
 
-	return (!(bool)success);
+	return !(bool)success;
 }
 
 JEMALLOC_INLINE void
@@ -293,19 +293,19 @@ atomic_write_u32(uint32_t *p, uint32_t x) {
 JEMALLOC_INLINE uint32_t
 atomic_add_u32(uint32_t *p, uint32_t x) {
 	volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p;
-	return (atomic_fetch_add(a, x) + x);
+	return atomic_fetch_add(a, x) + x;
 }
 
 JEMALLOC_INLINE uint32_t
 atomic_sub_u32(uint32_t *p, uint32_t x) {
 	volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p;
-	return (atomic_fetch_sub(a, x) - x);
+	return atomic_fetch_sub(a, x) - x;
 }
 
 JEMALLOC_INLINE bool
 atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) {
 	volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p;
-	return (!atomic_compare_exchange_strong(a, &c, s));
+	return !atomic_compare_exchange_strong(a, &c, s);
 }
 
 JEMALLOC_INLINE void
@@ -316,17 +316,17 @@ atomic_write_u32(uint32_t *p, uint32_t x) {
 #elif (defined(JEMALLOC_ATOMIC9))
 JEMALLOC_INLINE uint32_t
 atomic_add_u32(uint32_t *p, uint32_t x) {
-	return (atomic_fetchadd_32(p, x) + x);
+	return atomic_fetchadd_32(p, x) + x;
 }
 
 JEMALLOC_INLINE uint32_t
 atomic_sub_u32(uint32_t *p, uint32_t x) {
-	return (atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x);
+	return atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x;
 }
 
 JEMALLOC_INLINE bool
 atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) {
-	return (!atomic_cmpset_32(p, c, s));
+	return !atomic_cmpset_32(p, c, s);
 }
 
 JEMALLOC_INLINE void
@@ -336,17 +336,17 @@ atomic_write_u32(uint32_t *p, uint32_t x) {
 #elif (defined(JEMALLOC_OSATOMIC))
 JEMALLOC_INLINE uint32_t
 atomic_add_u32(uint32_t *p, uint32_t x) {
-	return (OSAtomicAdd32((int32_t)x, (int32_t *)p));
+	return OSAtomicAdd32((int32_t)x, (int32_t *)p);
 }
 
 JEMALLOC_INLINE uint32_t
 atomic_sub_u32(uint32_t *p, uint32_t x) {
-	return (OSAtomicAdd32(-((int32_t)x), (int32_t *)p));
+	return OSAtomicAdd32(-((int32_t)x), (int32_t *)p);
 }
 
 JEMALLOC_INLINE bool
 atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) {
-	return (!OSAtomicCompareAndSwap32(c, s, (int32_t *)p));
+	return !OSAtomicCompareAndSwap32(c, s, (int32_t *)p);
 }
 
 JEMALLOC_INLINE void
@@ -361,12 +361,12 @@ atomic_write_u32(uint32_t *p, uint32_t x) {
 #elif (defined(_MSC_VER))
 JEMALLOC_INLINE uint32_t
 atomic_add_u32(uint32_t *p, uint32_t x) {
-	return (InterlockedExchangeAdd(p, x) + x);
+	return InterlockedExchangeAdd(p, x) + x;
 }
 
 JEMALLOC_INLINE uint32_t
 atomic_sub_u32(uint32_t *p, uint32_t x) {
-	return (InterlockedExchangeAdd(p, -((int32_t)x)) - x);
+	return InterlockedExchangeAdd(p, -((int32_t)x)) - x;
 }
 
 JEMALLOC_INLINE bool
@@ -374,7 +374,7 @@ atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) {
 	uint32_t o;
 
 	o = InterlockedCompareExchange(p, s, c);
-	return (o != c);
+	return o != c;
 }
 
 JEMALLOC_INLINE void
@@ -385,17 +385,17 @@ atomic_write_u32(uint32_t *p, uint32_t x) {
  defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4))
 JEMALLOC_INLINE uint32_t
 atomic_add_u32(uint32_t *p, uint32_t x) {
-	return (__sync_add_and_fetch(p, x));
+	return __sync_add_and_fetch(p, x);
 }
 
 JEMALLOC_INLINE uint32_t
 atomic_sub_u32(uint32_t *p, uint32_t x) {
-	return (__sync_sub_and_fetch(p, x));
+	return __sync_sub_and_fetch(p, x);
 }
 
 JEMALLOC_INLINE bool
 atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) {
-	return (!__sync_bool_compare_and_swap(p, c, s));
+	return !__sync_bool_compare_and_swap(p, c, s);
 }
 
 JEMALLOC_INLINE void
@@ -411,27 +411,27 @@ atomic_write_u32(uint32_t *p, uint32_t x) {
 JEMALLOC_INLINE void *
 atomic_add_p(void **p, void *x) {
 #if (LG_SIZEOF_PTR == 3)
-	return ((void *)atomic_add_u64((uint64_t *)p, (uint64_t)x));
+	return (void *)atomic_add_u64((uint64_t *)p, (uint64_t)x);
 #elif (LG_SIZEOF_PTR == 2)
-	return ((void *)atomic_add_u32((uint32_t *)p, (uint32_t)x));
+	return (void *)atomic_add_u32((uint32_t *)p, (uint32_t)x);
 #endif
 }
 
 JEMALLOC_INLINE void *
 atomic_sub_p(void **p, void *x) {
 #if (LG_SIZEOF_PTR == 3)
-	return ((void *)atomic_add_u64((uint64_t *)p, (uint64_t)-((int64_t)x)));
+	return (void *)atomic_add_u64((uint64_t *)p, (uint64_t)-((int64_t)x));
 #elif (LG_SIZEOF_PTR == 2)
-	return ((void *)atomic_add_u32((uint32_t *)p, (uint32_t)-((int32_t)x)));
+	return (void *)atomic_add_u32((uint32_t *)p, (uint32_t)-((int32_t)x));
 #endif
 }
 
 JEMALLOC_INLINE bool
 atomic_cas_p(void **p, void *c, void *s) {
 #if (LG_SIZEOF_PTR == 3)
-	return (atomic_cas_u64((uint64_t *)p, (uint64_t)c, (uint64_t)s));
+	return atomic_cas_u64((uint64_t *)p, (uint64_t)c, (uint64_t)s);
 #elif (LG_SIZEOF_PTR == 2)
-	return (atomic_cas_u32((uint32_t *)p, (uint32_t)c, (uint32_t)s));
+	return atomic_cas_u32((uint32_t *)p, (uint32_t)c, (uint32_t)s);
 #endif
 }
 
@@ -449,27 +449,27 @@ atomic_write_p(void **p, const void *x) {
 JEMALLOC_INLINE size_t
 atomic_add_zu(size_t *p, size_t x) {
 #if (LG_SIZEOF_PTR == 3)
-	return ((size_t)atomic_add_u64((uint64_t *)p, (uint64_t)x));
+	return (size_t)atomic_add_u64((uint64_t *)p, (uint64_t)x);
 #elif (LG_SIZEOF_PTR == 2)
-	return ((size_t)atomic_add_u32((uint32_t *)p, (uint32_t)x));
+	return (size_t)atomic_add_u32((uint32_t *)p, (uint32_t)x);
 #endif
 }
 
 JEMALLOC_INLINE size_t
 atomic_sub_zu(size_t *p, size_t x) {
 #if (LG_SIZEOF_PTR == 3)
-	return ((size_t)atomic_add_u64((uint64_t *)p, (uint64_t)-((int64_t)x)));
+	return (size_t)atomic_add_u64((uint64_t *)p, (uint64_t)-((int64_t)x));
 #elif (LG_SIZEOF_PTR == 2)
-	return ((size_t)atomic_add_u32((uint32_t *)p, (uint32_t)-((int32_t)x)));
+	return (size_t)atomic_add_u32((uint32_t *)p, (uint32_t)-((int32_t)x));
 #endif
 }
 
 JEMALLOC_INLINE bool
 atomic_cas_zu(size_t *p, size_t c, size_t s) {
 #if (LG_SIZEOF_PTR == 3)
-	return (atomic_cas_u64((uint64_t *)p, (uint64_t)c, (uint64_t)s));
+	return atomic_cas_u64((uint64_t *)p, (uint64_t)c, (uint64_t)s);
 #elif (LG_SIZEOF_PTR == 2)
-	return (atomic_cas_u32((uint32_t *)p, (uint32_t)c, (uint32_t)s));
+	return atomic_cas_u32((uint32_t *)p, (uint32_t)c, (uint32_t)s);
 #endif
 }
 
@@ -487,29 +487,27 @@ atomic_write_zu(size_t *p, size_t x) {
 JEMALLOC_INLINE unsigned
 atomic_add_u(unsigned *p, unsigned x) {
 #if (LG_SIZEOF_INT == 3)
-	return ((unsigned)atomic_add_u64((uint64_t *)p, (uint64_t)x));
+	return (unsigned)atomic_add_u64((uint64_t *)p, (uint64_t)x);
 #elif (LG_SIZEOF_INT == 2)
-	return ((unsigned)atomic_add_u32((uint32_t *)p, (uint32_t)x));
+	return (unsigned)atomic_add_u32((uint32_t *)p, (uint32_t)x);
 #endif
 }
 
 JEMALLOC_INLINE unsigned
 atomic_sub_u(unsigned *p, unsigned x) {
 #if (LG_SIZEOF_INT == 3)
-	return ((unsigned)atomic_add_u64((uint64_t *)p,
-	    (uint64_t)-((int64_t)x)));
+	return (unsigned)atomic_add_u64((uint64_t *)p, (uint64_t)-((int64_t)x));
 #elif (LG_SIZEOF_INT == 2)
-	return ((unsigned)atomic_add_u32((uint32_t *)p,
-	    (uint32_t)-((int32_t)x)));
+	return (unsigned)atomic_add_u32((uint32_t *)p, (uint32_t)-((int32_t)x));
 #endif
 }
 
 JEMALLOC_INLINE bool
 atomic_cas_u(unsigned *p, unsigned c, unsigned s) {
 #if (LG_SIZEOF_INT == 3)
-	return (atomic_cas_u64((uint64_t *)p, (uint64_t)c, (uint64_t)s));
+	return atomic_cas_u64((uint64_t *)p, (uint64_t)c, (uint64_t)s);
 #elif (LG_SIZEOF_INT == 2)
-	return (atomic_cas_u32((uint32_t *)p, (uint32_t)c, (uint32_t)s));
+	return atomic_cas_u32((uint32_t *)p, (uint32_t)c, (uint32_t)s);
 #endif
 }
 
diff --git a/include/jemalloc/internal/base_inlines.h b/include/jemalloc/internal/base_inlines.h
index 94fb1a95..aa8306ac 100644
--- a/include/jemalloc/internal/base_inlines.h
+++ b/include/jemalloc/internal/base_inlines.h
@@ -8,7 +8,7 @@ unsigned	base_ind_get(const base_t *base);
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_BASE_C_))
 JEMALLOC_INLINE unsigned
 base_ind_get(const base_t *base) {
-	return (base->ind);
+	return base->ind;
 }
 #endif
 
diff --git a/include/jemalloc/internal/bitmap_inlines.h b/include/jemalloc/internal/bitmap_inlines.h
index 1a2411df..df582bbe 100644
--- a/include/jemalloc/internal/bitmap_inlines.h
+++ b/include/jemalloc/internal/bitmap_inlines.h
@@ -22,10 +22,10 @@ bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) {
 
 	for (i = 0; i < binfo->ngroups; i++) {
 		if (bitmap[i] != 0) {
-			return (false);
+			return false;
 		}
 	}
-	return (true);
+	return true;
 #endif
 }
 
@@ -37,7 +37,7 @@ bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
 	assert(bit < binfo->nbits);
 	goff = bit >> LG_BITMAP_GROUP_NBITS;
 	g = bitmap[goff];
-	return (!(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))));
+	return !(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)));
 }
 
 JEMALLOC_INLINE void
@@ -103,7 +103,7 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) {
 	bit = (i << LG_BITMAP_GROUP_NBITS) + (bit - 1);
 #endif
 	bitmap_set(bitmap, binfo, bit);
-	return (bit);
+	return bit;
 }
 
 JEMALLOC_INLINE void
diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index 274e69c6..379dd290 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -47,65 +47,65 @@ extent_lookup(tsdn_t *tsdn, const void *ptr, bool dependent) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	return (rtree_read(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr,
-	    dependent));
+	return rtree_read(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr,
+	    dependent);
 }
 
 JEMALLOC_INLINE arena_t *
 extent_arena_get(const extent_t *extent) {
-	return (extent->e_arena);
+	return extent->e_arena;
 }
 
 JEMALLOC_INLINE void *
 extent_base_get(const extent_t *extent) {
 	assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) ||
 	    !extent->e_slab);
-	return (PAGE_ADDR2BASE(extent->e_addr));
+	return PAGE_ADDR2BASE(extent->e_addr);
 }
 
 JEMALLOC_INLINE void *
 extent_addr_get(const extent_t *extent) {
 	assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) ||
 	    !extent->e_slab);
-	return (extent->e_addr);
+	return extent->e_addr;
 }
 
 JEMALLOC_INLINE size_t
 extent_size_get(const extent_t *extent) {
-	return (extent->e_size);
+	return extent->e_size;
 }
 
 JEMALLOC_INLINE size_t
 extent_usize_get(const extent_t *extent) {
 	assert(!extent->e_slab);
-	return (extent->e_usize);
+	return extent->e_usize;
 }
 
 JEMALLOC_INLINE void *
 extent_before_get(const extent_t *extent) {
-	return ((void *)((uintptr_t)extent_base_get(extent) - PAGE));
+	return (void *)((uintptr_t)extent_base_get(extent) - PAGE);
 }
 
 JEMALLOC_INLINE void *
 extent_last_get(const extent_t *extent) {
-	return ((void *)((uintptr_t)extent_base_get(extent) +
-	    extent_size_get(extent) - PAGE));
+	return (void *)((uintptr_t)extent_base_get(extent) +
+	    extent_size_get(extent) - PAGE);
 }
 
 JEMALLOC_INLINE void *
 extent_past_get(const extent_t *extent) {
-	return ((void *)((uintptr_t)extent_base_get(extent) +
-	    extent_size_get(extent)));
+	return (void *)((uintptr_t)extent_base_get(extent) +
+	    extent_size_get(extent));
 }
 
 JEMALLOC_INLINE size_t
 extent_sn_get(const extent_t *extent) {
-	return (extent->e_sn);
+	return extent->e_sn;
 }
 
 JEMALLOC_INLINE bool
 extent_active_get(const extent_t *extent) {
-	return (extent->e_active);
+	return extent->e_active;
 }
 
 JEMALLOC_INLINE bool
@@ -115,35 +115,35 @@ extent_retained_get(const extent_t *extent) {
 
 JEMALLOC_INLINE bool
 extent_zeroed_get(const extent_t *extent) {
-	return (extent->e_zeroed);
+	return extent->e_zeroed;
 }
 
 JEMALLOC_INLINE bool
 extent_committed_get(const extent_t *extent) {
-	return (extent->e_committed);
+	return extent->e_committed;
 }
 
 JEMALLOC_INLINE bool
 extent_slab_get(const extent_t *extent) {
-	return (extent->e_slab);
+	return extent->e_slab;
 }
 
 JEMALLOC_INLINE arena_slab_data_t *
 extent_slab_data_get(extent_t *extent) {
 	assert(extent->e_slab);
-	return (&extent->e_slab_data);
+	return &extent->e_slab_data;
 }
 
 JEMALLOC_INLINE const arena_slab_data_t *
 extent_slab_data_get_const(const extent_t *extent) {
 	assert(extent->e_slab);
-	return (&extent->e_slab_data);
+	return &extent->e_slab_data;
 }
 
 JEMALLOC_INLINE prof_tctx_t *
 extent_prof_tctx_get(const extent_t *extent) {
-	return ((prof_tctx_t *)atomic_read_p(
-	    &((extent_t *)extent)->e_prof_tctx_pun));
+	return (prof_tctx_t *)atomic_read_p(
+	    &((extent_t *)extent)->e_prof_tctx_pun);
 }
 
 JEMALLOC_INLINE void
@@ -251,7 +251,7 @@ extent_sn_comp(const extent_t *a, const extent_t *b) {
 	size_t a_sn = extent_sn_get(a);
 	size_t b_sn = extent_sn_get(b);
 
-	return ((a_sn > b_sn) - (a_sn < b_sn));
+	return (a_sn > b_sn) - (a_sn < b_sn);
 }
 
 JEMALLOC_INLINE int
@@ -259,7 +259,7 @@ extent_ad_comp(const extent_t *a, const extent_t *b) {
 	uintptr_t a_addr = (uintptr_t)extent_addr_get(a);
 	uintptr_t b_addr = (uintptr_t)extent_addr_get(b);
 
-	return ((a_addr > b_addr) - (a_addr < b_addr));
+	return (a_addr > b_addr) - (a_addr < b_addr);
 }
 
 JEMALLOC_INLINE int
@@ -268,11 +268,11 @@ extent_snad_comp(const extent_t *a, const extent_t *b) {
 
 	ret = extent_sn_comp(a, b);
 	if (ret != 0) {
-		return (ret);
+		return ret;
 	}
 
 	ret = extent_ad_comp(a, b);
-	return (ret);
+	return ret;
 }
 #endif
 
diff --git a/include/jemalloc/internal/hash_inlines.h b/include/jemalloc/internal/hash_inlines.h
index 82ac1f42..b134492a 100644
--- a/include/jemalloc/internal/hash_inlines.h
+++ b/include/jemalloc/internal/hash_inlines.h
@@ -37,10 +37,10 @@ hash_get_block_32(const uint32_t *p, int i) {
 		uint32_t ret;
 
 		memcpy(&ret, (uint8_t *)(p + i), sizeof(uint32_t));
-		return (ret);
+		return ret;
 	}
 
-	return (p[i]);
+	return p[i];
 }
 
 JEMALLOC_INLINE uint64_t
@@ -50,10 +50,10 @@ hash_get_block_64(const uint64_t *p, int i) {
 		uint64_t ret;
 
 		memcpy(&ret, (uint8_t *)(p + i), sizeof(uint64_t));
-		return (ret);
+		return ret;
 	}
 
-	return (p[i]);
+	return p[i];
 }
 
 JEMALLOC_INLINE uint32_t
@@ -64,7 +64,7 @@ hash_fmix_32(uint32_t h) {
 	h *= 0xc2b2ae35;
 	h ^= h >> 16;
 
-	return (h);
+	return h;
 }
 
 JEMALLOC_INLINE uint64_t
@@ -75,7 +75,7 @@ hash_fmix_64(uint64_t k) {
 	k *= KQU(0xc4ceb9fe1a85ec53);
 	k ^= k >> 33;
 
-	return (k);
+	return k;
 }
 
 JEMALLOC_INLINE uint32_t
@@ -125,7 +125,7 @@ hash_x86_32(const void *key, int len, uint32_t seed) {
 
 	h1 = hash_fmix_32(h1);
 
-	return (h1);
+	return h1;
 }
 
 UNUSED JEMALLOC_INLINE void
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index c951fab4..03a50a4d 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -552,7 +552,7 @@ ticker_t	*decay_ticker_get(tsd_t *tsd, unsigned ind);
 JEMALLOC_ALWAYS_INLINE pszind_t
 psz2ind(size_t psz) {
 	if (unlikely(psz > LARGE_MAXCLASS)) {
-		return (NPSIZES);
+		return NPSIZES;
 	}
 	{
 		pszind_t x = lg_floor((psz<<1)-1);
@@ -568,14 +568,14 @@ psz2ind(size_t psz) {
 		    ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
 
 		pszind_t ind = grp + mod;
-		return (ind);
+		return ind;
 	}
 }
 
 JEMALLOC_INLINE size_t
 pind2sz_compute(pszind_t pind) {
 	if (unlikely(pind == NPSIZES)) {
-		return (LARGE_MAXCLASS + PAGE);
+		return LARGE_MAXCLASS + PAGE;
 	}
 	{
 		size_t grp = pind >> LG_SIZE_CLASS_GROUP;
@@ -590,7 +590,7 @@ pind2sz_compute(pszind_t pind) {
 		size_t mod_size = (mod+1) << lg_delta;
 
 		size_t sz = grp_size + mod_size;
-		return (sz);
+		return sz;
 	}
 }
 
@@ -598,19 +598,19 @@ JEMALLOC_INLINE size_t
 pind2sz_lookup(pszind_t pind) {
 	size_t ret = (size_t)pind2sz_tab[pind];
 	assert(ret == pind2sz_compute(pind));
-	return (ret);
+	return ret;
 }
 
 JEMALLOC_INLINE size_t
 pind2sz(pszind_t pind) {
 	assert(pind < NPSIZES+1);
-	return (pind2sz_lookup(pind));
+	return pind2sz_lookup(pind);
 }
 
 JEMALLOC_INLINE size_t
 psz2u(size_t psz) {
 	if (unlikely(psz > LARGE_MAXCLASS)) {
-		return (LARGE_MAXCLASS + PAGE);
+		return LARGE_MAXCLASS + PAGE;
 	}
 	{
 		size_t x = lg_floor((psz<<1)-1);
@@ -619,14 +619,14 @@ psz2u(size_t psz) {
 		size_t delta = ZU(1) << lg_delta;
 		size_t delta_mask = delta - 1;
 		size_t usize = (psz + delta_mask) & ~delta_mask;
-		return (usize);
+		return usize;
 	}
 }
 
 JEMALLOC_INLINE szind_t
 size2index_compute(size_t size) {
 	if (unlikely(size > LARGE_MAXCLASS)) {
-		return (NSIZES);
+		return NSIZES;
 	}
 #if (NTBINS != 0)
 	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
@@ -649,7 +649,7 @@ size2index_compute(size_t size) {
 		    ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
 
 		szind_t index = NTBINS + grp + mod;
-		return (index);
+		return index;
 	}
 }
 
@@ -659,7 +659,7 @@ size2index_lookup(size_t size) {
 	{
 		szind_t ret = (size2index_tab[(size-1) >> LG_TINY_MIN]);
 		assert(ret == size2index_compute(size));
-		return (ret);
+		return ret;
 	}
 }
 
@@ -667,9 +667,9 @@ JEMALLOC_ALWAYS_INLINE szind_t
 size2index(size_t size) {
 	assert(size > 0);
 	if (likely(size <= LOOKUP_MAXCLASS)) {
-		return (size2index_lookup(size));
+		return size2index_lookup(size);
 	}
-	return (size2index_compute(size));
+	return size2index_compute(size);
 }
 
 JEMALLOC_INLINE size_t
@@ -694,7 +694,7 @@ index2size_compute(szind_t index) {
 		size_t mod_size = (mod+1) << lg_delta;
 
 		size_t usize = grp_size + mod_size;
-		return (usize);
+		return usize;
 	}
 }
 
@@ -702,19 +702,19 @@ JEMALLOC_ALWAYS_INLINE size_t
 index2size_lookup(szind_t index) {
 	size_t ret = (size_t)index2size_tab[index];
 	assert(ret == index2size_compute(index));
-	return (ret);
+	return ret;
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
 index2size(szind_t index) {
 	assert(index < NSIZES);
-	return (index2size_lookup(index));
+	return index2size_lookup(index);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
 s2u_compute(size_t size) {
 	if (unlikely(size > LARGE_MAXCLASS)) {
-		return (0);
+		return 0;
 	}
 #if (NTBINS > 0)
 	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
@@ -731,7 +731,7 @@ s2u_compute(size_t size) {
 		size_t delta = ZU(1) << lg_delta;
 		size_t delta_mask = delta - 1;
 		size_t usize = (size + delta_mask) & ~delta_mask;
-		return (usize);
+		return usize;
 	}
 }
 
@@ -740,7 +740,7 @@ s2u_lookup(size_t size) {
 	size_t ret = index2size_lookup(size2index_lookup(size));
 
 	assert(ret == s2u_compute(size));
-	return (ret);
+	return ret;
 }
 
 /*
@@ -751,9 +751,9 @@ JEMALLOC_ALWAYS_INLINE size_t
 s2u(size_t size) {
 	assert(size > 0);
 	if (likely(size <= LOOKUP_MAXCLASS)) {
-		return (s2u_lookup(size));
+		return s2u_lookup(size);
 	}
-	return (s2u_compute(size));
+	return s2u_compute(size);
 }
 
 /*
@@ -784,14 +784,14 @@ sa2u(size_t size, size_t alignment) {
 		 */
 		usize = s2u(ALIGNMENT_CEILING(size, alignment));
 		if (usize < LARGE_MINCLASS) {
-			return (usize);
+			return usize;
 		}
 	}
 
 	/* Large size class.  Beware of overflow. */
 
 	if (unlikely(alignment > LARGE_MAXCLASS)) {
-		return (0);
+		return 0;
 	}
 
 	/* Make sure result is a large size class. */
@@ -801,7 +801,7 @@ sa2u(size_t size, size_t alignment) {
 		usize = s2u(size);
 		if (usize < size) {
 			/* size_t overflow. */
-			return (0);
+			return 0;
 		}
 	}
 
@@ -811,9 +811,9 @@ sa2u(size_t size, size_t alignment) {
 	 */
 	if (usize + large_pad + PAGE_CEILING(alignment) - PAGE < usize) {
 		/* size_t overflow. */
-		return (0);
+		return 0;
 	}
-	return (usize);
+	return usize;
 }
 
 /* Choose an arena based on a per-thread value. */
@@ -822,7 +822,7 @@ arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {
 	arena_t *ret;
 
 	if (arena != NULL) {
-		return (arena);
+		return arena;
 	}
 
 	ret = internal ? tsd_iarena_get(tsd) : tsd_arena_get(tsd);
@@ -830,17 +830,17 @@ arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {
 		ret = arena_choose_hard(tsd, internal);
 	}
 
-	return (ret);
+	return ret;
 }
 
 JEMALLOC_INLINE arena_t *
 arena_choose(tsd_t *tsd, arena_t *arena) {
-	return (arena_choose_impl(tsd, arena, false));
+	return arena_choose_impl(tsd, arena, false);
 }
 
 JEMALLOC_INLINE arena_t *
 arena_ichoose(tsd_t *tsd, arena_t *arena) {
-	return (arena_choose_impl(tsd, arena, true));
+	return arena_choose_impl(tsd, arena, true);
 }
 
 JEMALLOC_INLINE arena_tdata_t *
@@ -850,7 +850,7 @@ arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing) {
 
 	if (unlikely(arenas_tdata == NULL)) {
 		/* arenas_tdata hasn't been initialized yet. */
-		return (arena_tdata_get_hard(tsd, ind));
+		return arena_tdata_get_hard(tsd, ind);
 	}
 	if (unlikely(ind >= tsd_narenas_tdata_get(tsd))) {
 		/*
@@ -863,9 +863,9 @@ arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing) {
 
 	tdata = &arenas_tdata[ind];
 	if (likely(tdata != NULL) || !refresh_if_missing) {
-		return (tdata);
+		return tdata;
 	}
-	return (arena_tdata_get_hard(tsd, ind));
+	return arena_tdata_get_hard(tsd, ind);
 }
 
 JEMALLOC_INLINE arena_t *
@@ -882,7 +882,7 @@ arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing) {
 			    (extent_hooks_t *)&extent_hooks_default);
 		}
 	}
-	return (ret);
+	return ret;
 }
 
 JEMALLOC_INLINE ticker_t *
@@ -891,9 +891,9 @@ decay_ticker_get(tsd_t *tsd, unsigned ind) {
 
 	tdata = arena_tdata_get(tsd, ind, true);
 	if (unlikely(tdata == NULL)) {
-		return (NULL);
+		return NULL;
 	}
-	return (&tdata->decay_ticker);
+	return &tdata->decay_ticker;
 }
 #endif
 
@@ -911,7 +911,7 @@ extent_t	*iealloc(tsdn_t *tsdn, const void *ptr);
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
 JEMALLOC_ALWAYS_INLINE extent_t *
 iealloc(tsdn_t *tsdn, const void *ptr) {
-	return (extent_lookup(tsdn, ptr, true));
+	return extent_lookup(tsdn, ptr, true);
 }
 #endif
 
@@ -953,7 +953,7 @@ JEMALLOC_ALWAYS_INLINE arena_t *
 iaalloc(tsdn_t *tsdn, const void *ptr) {
 	assert(ptr != NULL);
 
-	return (arena_aalloc(tsdn, ptr));
+	return arena_aalloc(tsdn, ptr);
 }
 
 /*
@@ -967,7 +967,7 @@ JEMALLOC_ALWAYS_INLINE size_t
 isalloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr) {
 	assert(ptr != NULL);
 
-	return (arena_salloc(tsdn, extent, ptr));
+	return arena_salloc(tsdn, extent, ptr);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
@@ -985,13 +985,13 @@ iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
 		arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn,
 		    iealloc(tsdn, ret), ret));
 	}
-	return (ret);
+	return ret;
 }
 
 JEMALLOC_ALWAYS_INLINE void *
 ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path) {
-	return (iallocztm(tsd_tsdn(tsd), size, ind, zero, tcache_get(tsd, true),
-	    false, NULL, slow_path));
+	return iallocztm(tsd_tsdn(tsd), size, ind, zero, tcache_get(tsd, true),
+	    false, NULL, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
@@ -1011,19 +1011,19 @@ ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
 		arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn,
 		    iealloc(tsdn, ret), ret));
 	}
-	return (ret);
+	return ret;
 }
 
 JEMALLOC_ALWAYS_INLINE void *
 ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, arena_t *arena) {
-	return (ipallocztm(tsdn, usize, alignment, zero, tcache, false, arena));
+	return ipallocztm(tsdn, usize, alignment, zero, tcache, false, arena);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
 ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero) {
-	return (ipallocztm(tsd_tsdn(tsd), usize, alignment, zero,
-	    tcache_get(tsd, true), false, NULL));
+	return ipallocztm(tsd_tsdn(tsd), usize, alignment, zero,
+	    tcache_get(tsd, true), false, NULL);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
@@ -1040,13 +1040,13 @@ ivsalloc(tsdn_t *tsdn, const void *ptr) {
 	 * */
 	extent = extent_lookup(tsdn, ptr, false);
 	if (extent == NULL) {
-		return (0);
+		return 0;
 	}
 	assert(extent_active_get(extent));
 	/* Only slab members should be looked up via interior pointers. */
 	assert(extent_addr_get(extent) == ptr || extent_slab_get(extent));
 
-	return (isalloc(tsdn, extent, ptr));
+	return isalloc(tsdn, extent, ptr);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -1085,21 +1085,21 @@ iralloct_realign(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize,
 
 	usize = sa2u(size + extra, alignment);
 	if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
-		return (NULL);
+		return NULL;
 	}
 	p = ipalloct(tsdn, usize, alignment, zero, tcache, arena);
 	if (p == NULL) {
 		if (extra == 0) {
-			return (NULL);
+			return NULL;
 		}
 		/* Try again, without extra this time. */
 		usize = sa2u(size, alignment);
 		if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
-			return (NULL);
+			return NULL;
 		}
 		p = ipalloct(tsdn, usize, alignment, zero, tcache, arena);
 		if (p == NULL) {
-			return (NULL);
+			return NULL;
 		}
 	}
 	/*
@@ -1109,7 +1109,7 @@ iralloct_realign(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize,
 	copysize = (size < oldsize) ? size : oldsize;
 	memcpy(p, ptr, copysize);
 	isdalloct(tsdn, extent, ptr, oldsize, tcache, true);
-	return (p);
+	return p;
 }
 
 JEMALLOC_ALWAYS_INLINE void *
@@ -1124,19 +1124,19 @@ iralloct(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize, size_t size,
 		 * Existing object alignment is inadequate; allocate new space
 		 * and copy.
 		 */
-		return (iralloct_realign(tsdn, extent, ptr, oldsize, size, 0,
-		    alignment, zero, tcache, arena));
+		return iralloct_realign(tsdn, extent, ptr, oldsize, size, 0,
+		    alignment, zero, tcache, arena);
 	}
 
-	return (arena_ralloc(tsdn, arena, extent, ptr, oldsize, size, alignment,
-	    zero, tcache));
+	return arena_ralloc(tsdn, arena, extent, ptr, oldsize, size, alignment,
+	    zero, tcache);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
 iralloc(tsd_t *tsd, extent_t *extent, void *ptr, size_t oldsize, size_t size,
     size_t alignment, bool zero) {
-	return (iralloct(tsd_tsdn(tsd), extent, ptr, oldsize, size, alignment,
-	    zero, tcache_get(tsd, true), NULL));
+	return iralloct(tsd_tsdn(tsd), extent, ptr, oldsize, size, alignment,
+	    zero, tcache_get(tsd, true), NULL);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -1148,11 +1148,11 @@ ixalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize, size_t size,
 	if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
 	    != 0) {
 		/* Existing object alignment is inadequate. */
-		return (true);
+		return true;
 	}
 
-	return (arena_ralloc_no_move(tsdn, extent, ptr, oldsize, size, extra,
-	    zero));
+	return arena_ralloc_no_move(tsdn, extent, ptr, oldsize, size, extra,
+	    zero);
 }
 #endif
 
diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index 61dfdc0b..b8141eff 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -218,10 +218,10 @@ a_prefix##empty(a_ph_type *ph) {					\
 a_attr a_type *								\
 a_prefix##first(a_ph_type *ph) {					\
 	if (ph->ph_root == NULL) {					\
-		return (NULL);						\
+		return NULL;						\
 	}								\
 	ph_merge_aux(a_type, a_field, ph, a_cmp);			\
-	return (ph->ph_root);						\
+	return ph->ph_root;						\
 }									\
 a_attr void								\
 a_prefix##insert(a_ph_type *ph, a_type *phn) {				\
@@ -255,7 +255,7 @@ a_prefix##remove_first(a_ph_type *ph) {					\
 	a_type *ret;							\
 									\
 	if (ph->ph_root == NULL) {					\
-		return (NULL);						\
+		return NULL;						\
 	}								\
 	ph_merge_aux(a_type, a_field, ph, a_cmp);			\
 									\
@@ -264,7 +264,7 @@ a_prefix##remove_first(a_ph_type *ph) {					\
 	ph_merge_children(a_type, a_field, ph->ph_root, a_cmp,		\
 	    ph->ph_root);						\
 									\
-	return (ret);							\
+	return ret;							\
 }									\
 a_attr void								\
 a_prefix##remove(a_ph_type *ph, a_type *phn) {				\
diff --git a/include/jemalloc/internal/prng_inlines.h b/include/jemalloc/internal/prng_inlines.h
index 124b1baa..646e07b7 100644
--- a/include/jemalloc/internal/prng_inlines.h
+++ b/include/jemalloc/internal/prng_inlines.h
@@ -19,20 +19,20 @@ size_t	prng_range_zu(size_t *state, size_t range, bool atomic);
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PRNG_C_))
 JEMALLOC_ALWAYS_INLINE uint32_t
 prng_state_next_u32(uint32_t state) {
-	return ((state * PRNG_A_32) + PRNG_C_32);
+	return (state * PRNG_A_32) + PRNG_C_32;
 }
 
 JEMALLOC_ALWAYS_INLINE uint64_t
 prng_state_next_u64(uint64_t state) {
-	return ((state * PRNG_A_64) + PRNG_C_64);
+	return (state * PRNG_A_64) + PRNG_C_64;
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
 prng_state_next_zu(size_t state) {
 #if LG_SIZEOF_PTR == 2
-	return ((state * PRNG_A_32) + PRNG_C_32);
+	return (state * PRNG_A_32) + PRNG_C_32;
 #elif LG_SIZEOF_PTR == 3
-	return ((state * PRNG_A_64) + PRNG_C_64);
+	return (state * PRNG_A_64) + PRNG_C_64;
 #else
 #error Unsupported pointer size
 #endif
@@ -58,7 +58,7 @@ prng_lg_range_u32(uint32_t *state, unsigned lg_range, bool atomic) {
 	}
 	ret = state1 >> (32 - lg_range);
 
-	return (ret);
+	return ret;
 }
 
 /* 64-bit atomic operations cannot be supported on all relevant platforms. */
@@ -73,7 +73,7 @@ prng_lg_range_u64(uint64_t *state, unsigned lg_range) {
 	*state = state1;
 	ret = state1 >> (64 - lg_range);
 
-	return (ret);
+	return ret;
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
@@ -96,7 +96,7 @@ prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic) {
 	}
 	ret = state1 >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) - lg_range);
 
-	return (ret);
+	return ret;
 }
 
 JEMALLOC_ALWAYS_INLINE uint32_t
@@ -114,7 +114,7 @@ prng_range_u32(uint32_t *state, uint32_t range, bool atomic) {
 		ret = prng_lg_range_u32(state, lg_range, atomic);
 	} while (ret >= range);
 
-	return (ret);
+	return ret;
 }
 
 JEMALLOC_ALWAYS_INLINE uint64_t
@@ -132,7 +132,7 @@ prng_range_u64(uint64_t *state, uint64_t range) {
 		ret = prng_lg_range_u64(state, lg_range);
 	} while (ret >= range);
 
-	return (ret);
+	return ret;
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
@@ -150,7 +150,7 @@ prng_range_zu(size_t *state, size_t range, bool atomic) {
 		ret = prng_lg_range_zu(state, lg_range, atomic);
 	} while (ret >= range);
 
-	return (ret);
+	return ret;
 }
 #endif
 
diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h
index bb9093a8..aba2936a 100644
--- a/include/jemalloc/internal/prof_inlines.h
+++ b/include/jemalloc/internal/prof_inlines.h
@@ -34,7 +34,7 @@ prof_active_get_unlocked(void) {
 	 * prof_active in the fast path, so there are no guarantees regarding
 	 * how long it will take for all threads to notice state changes.
 	 */
-	return (prof_active);
+	return prof_active;
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -44,7 +44,7 @@ prof_gdump_get_unlocked(void) {
 	 * there are no guarantees regarding how long it will take for all
 	 * threads to notice state changes.
 	 */
-	return (prof_gdump_val);
+	return prof_gdump_val;
 }
 
 JEMALLOC_ALWAYS_INLINE prof_tdata_t *
@@ -67,7 +67,7 @@ prof_tdata_get(tsd_t *tsd, bool create) {
 		assert(tdata == NULL || tdata->attached);
 	}
 
-	return (tdata);
+	return tdata;
 }
 
 JEMALLOC_ALWAYS_INLINE prof_tctx_t *
@@ -75,7 +75,7 @@ prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, const void *ptr) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	return (arena_prof_tctx_get(tsdn, extent, ptr));
+	return arena_prof_tctx_get(tsdn, extent, ptr);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -113,20 +113,20 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
 	}
 
 	if (unlikely(tdata == NULL)) {
-		return (true);
+		return true;
 	}
 
 	if (likely(tdata->bytes_until_sample >= usize)) {
 		if (update) {
 			tdata->bytes_until_sample -= usize;
 		}
-		return (true);
+		return true;
 	} else {
 		/* Compute new sample threshold. */
 		if (update) {
 			prof_sample_threshold_update(tdata);
 		}
-		return (!tdata->active);
+		return !tdata->active;
 	}
 }
 
@@ -147,7 +147,7 @@ prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update) {
 		ret = prof_lookup(tsd, &bt);
 	}
 
-	return (ret);
+	return ret;
 }
 
 JEMALLOC_ALWAYS_INLINE void
diff --git a/include/jemalloc/internal/rb.h b/include/jemalloc/internal/rb.h
index a4b5a65e..7018325f 100644
--- a/include/jemalloc/internal/rb.h
+++ b/include/jemalloc/internal/rb.h
@@ -348,13 +348,13 @@ a_attr a_type *								\
 a_prefix##first(a_rbt_type *rbtree) {					\
     a_type *ret;							\
     rbtn_first(a_type, a_field, rbtree, rbtree->rbt_root, ret);		\
-    return (ret);							\
+    return ret;								\
 }									\
 a_attr a_type *								\
 a_prefix##last(a_rbt_type *rbtree) {					\
     a_type *ret;							\
     rbtn_last(a_type, a_field, rbtree, rbtree->rbt_root, ret);		\
-    return (ret);							\
+    return ret;								\
 }									\
 a_attr a_type *								\
 a_prefix##next(a_rbt_type *rbtree, a_type *node) {			\
@@ -379,7 +379,7 @@ a_prefix##next(a_rbt_type *rbtree, a_type *node) {			\
 	    assert(tnode != NULL);					\
 	}								\
     }									\
-    return (ret);							\
+    return ret;								\
 }									\
 a_attr a_type *								\
 a_prefix##prev(a_rbt_type *rbtree, a_type *node) {			\
@@ -404,7 +404,7 @@ a_prefix##prev(a_rbt_type *rbtree, a_type *node) {			\
 	    assert(tnode != NULL);					\
 	}								\
     }									\
-    return (ret);							\
+    return ret;								\
 }									\
 a_attr a_type *								\
 a_prefix##search(a_rbt_type *rbtree, const a_type *key) {		\
@@ -419,7 +419,7 @@ a_prefix##search(a_rbt_type *rbtree, const a_type *key) {		\
 	    ret = rbtn_right_get(a_type, a_field, ret);			\
 	}								\
     }									\
-    return (ret);							\
+    return ret;								\
 }									\
 a_attr a_type *								\
 a_prefix##nsearch(a_rbt_type *rbtree, const a_type *key) {		\
@@ -438,7 +438,7 @@ a_prefix##nsearch(a_rbt_type *rbtree, const a_type *key) {		\
 	    break;							\
 	}								\
     }									\
-    return (ret);							\
+    return ret;								\
 }									\
 a_attr a_type *								\
 a_prefix##psearch(a_rbt_type *rbtree, const a_type *key) {		\
@@ -457,7 +457,7 @@ a_prefix##psearch(a_rbt_type *rbtree, const a_type *key) {		\
 	    break;							\
 	}								\
     }									\
-    return (ret);							\
+    return ret;								\
 }									\
 a_attr void								\
 a_prefix##insert(a_rbt_type *rbtree, a_type *node) {			\
@@ -872,16 +872,16 @@ a_attr a_type *								\
 a_prefix##iter_recurse(a_rbt_type *rbtree, a_type *node,		\
   a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) {		\
     if (node == NULL) {							\
-	return (NULL);							\
+	return NULL;							\
     } else {								\
 	a_type *ret;							\
 	if ((ret = a_prefix##iter_recurse(rbtree, rbtn_left_get(a_type,	\
 	  a_field, node), cb, arg)) != NULL || (ret = cb(rbtree, node,	\
 	  arg)) != NULL) {						\
-	    return (ret);						\
+	    return ret;							\
 	}								\
-	return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type,	\
-	  a_field, node), cb, arg));					\
+	return a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type,	\
+	  a_field, node), cb, arg);					\
     }									\
 }									\
 a_attr a_type *								\
@@ -893,20 +893,20 @@ a_prefix##iter_start(a_rbt_type *rbtree, a_type *start, a_type *node,	\
 	if ((ret = a_prefix##iter_start(rbtree, start,			\
 	  rbtn_left_get(a_type, a_field, node), cb, arg)) != NULL ||	\
 	  (ret = cb(rbtree, node, arg)) != NULL) {			\
-	    return (ret);						\
+	    return ret;							\
 	}								\
-	return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type,	\
-	  a_field, node), cb, arg));					\
+	return a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type,	\
+	  a_field, node), cb, arg);					\
     } else if (cmp > 0) {						\
-	return (a_prefix##iter_start(rbtree, start,			\
-	  rbtn_right_get(a_type, a_field, node), cb, arg));		\
+	return a_prefix##iter_start(rbtree, start,			\
+	  rbtn_right_get(a_type, a_field, node), cb, arg);		\
     } else {								\
 	a_type *ret;							\
 	if ((ret = cb(rbtree, node, arg)) != NULL) {			\
-	    return (ret);						\
+	    return ret;							\
 	}								\
-	return (a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type,	\
-	  a_field, node), cb, arg));					\
+	return a_prefix##iter_recurse(rbtree, rbtn_right_get(a_type,	\
+	  a_field, node), cb, arg);					\
     }									\
 }									\
 a_attr a_type *								\
@@ -919,22 +919,22 @@ a_prefix##iter(a_rbt_type *rbtree, a_type *start, a_type *(*cb)(	\
     } else {								\
 	ret = a_prefix##iter_recurse(rbtree, rbtree->rbt_root, cb, arg);\
     }									\
-    return (ret);							\
+    return ret;								\
 }									\
 a_attr a_type *								\
 a_prefix##reverse_iter_recurse(a_rbt_type *rbtree, a_type *node,	\
   a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg) {		\
     if (node == NULL) {							\
-	return (NULL);							\
+	return NULL;							\
     } else {								\
 	a_type *ret;							\
 	if ((ret = a_prefix##reverse_iter_recurse(rbtree,		\
 	  rbtn_right_get(a_type, a_field, node), cb, arg)) != NULL ||	\
 	  (ret = cb(rbtree, node, arg)) != NULL) {			\
-	    return (ret);						\
+	    return ret;							\
 	}								\
-	return (a_prefix##reverse_iter_recurse(rbtree,			\
-	  rbtn_left_get(a_type, a_field, node), cb, arg));		\
+	return a_prefix##reverse_iter_recurse(rbtree,			\
+	  rbtn_left_get(a_type, a_field, node), cb, arg);		\
     }									\
 }									\
 a_attr a_type *								\
@@ -947,20 +947,20 @@ a_prefix##reverse_iter_start(a_rbt_type *rbtree, a_type *start,		\
 	if ((ret = a_prefix##reverse_iter_start(rbtree, start,		\
 	  rbtn_right_get(a_type, a_field, node), cb, arg)) != NULL ||	\
 	  (ret = cb(rbtree, node, arg)) != NULL) {			\
-	    return (ret);						\
+	    return ret;							\
 	}								\
-	return (a_prefix##reverse_iter_recurse(rbtree,			\
-	  rbtn_left_get(a_type, a_field, node), cb, arg));		\
+	return a_prefix##reverse_iter_recurse(rbtree,			\
+	  rbtn_left_get(a_type, a_field, node), cb, arg);		\
     } else if (cmp < 0) {						\
-	return (a_prefix##reverse_iter_start(rbtree, start,		\
-	  rbtn_left_get(a_type, a_field, node), cb, arg));		\
+	return a_prefix##reverse_iter_start(rbtree, start,		\
+	  rbtn_left_get(a_type, a_field, node), cb, arg);		\
     } else {								\
 	a_type *ret;							\
 	if ((ret = cb(rbtree, node, arg)) != NULL) {			\
-	    return (ret);						\
+	    return ret;							\
 	}								\
-	return (a_prefix##reverse_iter_recurse(rbtree,			\
-	  rbtn_left_get(a_type, a_field, node), cb, arg));		\
+	return a_prefix##reverse_iter_recurse(rbtree,			\
+	  rbtn_left_get(a_type, a_field, node), cb, arg);		\
     }									\
 }									\
 a_attr a_type *								\
@@ -974,7 +974,7 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start,		\
 	ret = a_prefix##reverse_iter_recurse(rbtree, rbtree->rbt_root,	\
 	  cb, arg);							\
     }									\
-    return (ret);							\
+    return ret;								\
 }									\
 a_attr void								\
 a_prefix##destroy_recurse(a_rbt_type *rbtree, a_type *node, void (*cb)(	\
diff --git a/include/jemalloc/internal/rtree_inlines.h b/include/jemalloc/internal/rtree_inlines.h
index 9e512e9f..c9a06f64 100644
--- a/include/jemalloc/internal/rtree_inlines.h
+++ b/include/jemalloc/internal/rtree_inlines.h
@@ -41,13 +41,13 @@ rtree_start_level(const rtree_t *rtree, uintptr_t key) {
 	unsigned start_level;
 
 	if (unlikely(key == 0)) {
-		return (rtree->height - 1);
+		return rtree->height - 1;
 	}
 
 	start_level = rtree->start_level[(lg_floor(key) + 1) >>
 	    LG_RTREE_BITS_PER_LEVEL];
 	assert(start_level < rtree->height);
-	return (start_level);
+	return start_level;
 }
 
 JEMALLOC_ALWAYS_INLINE unsigned
@@ -67,7 +67,7 @@ rtree_ctx_start_level(const rtree_t *rtree, const rtree_ctx_t *rtree_ctx,
 	start_level = rtree->start_level[(lg_floor(key_diff) + 1) >>
 	    LG_RTREE_BITS_PER_LEVEL];
 	assert(start_level < rtree->height);
-	return (start_level);
+	return start_level;
 }
 
 JEMALLOC_ALWAYS_INLINE uintptr_t
@@ -92,7 +92,7 @@ rtree_child_tryread(rtree_elm_t *elm, bool dependent) {
 		child = (rtree_elm_t *)atomic_read_p(&elm->pun);
 	}
 	assert(!dependent || child != NULL);
-	return (child);
+	return child;
 }
 
 JEMALLOC_ALWAYS_INLINE rtree_elm_t *
@@ -105,7 +105,7 @@ rtree_child_read(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *elm, unsigned level,
 		child = rtree_child_read_hard(tsdn, rtree, elm, level);
 	}
 	assert(!dependent || child != NULL);
-	return (child);
+	return child;
 }
 
 JEMALLOC_ALWAYS_INLINE extent_t *
@@ -132,7 +132,7 @@ rtree_elm_read(rtree_elm_t *elm, bool dependent) {
 	/* Mask the lock bit. */
 	extent = (extent_t *)((uintptr_t)extent & ~((uintptr_t)0x1));
 
-	return (extent);
+	return extent;
 }
 
 JEMALLOC_INLINE void
@@ -151,7 +151,7 @@ rtree_subtree_tryread(rtree_t *rtree, unsigned level, bool dependent) {
 		    &rtree->levels[level].subtree_pun);
 	}
 	assert(!dependent || subtree != NULL);
-	return (subtree);
+	return subtree;
 }
 
 JEMALLOC_ALWAYS_INLINE rtree_elm_t *
@@ -164,7 +164,7 @@ rtree_subtree_read(tsdn_t *tsdn, rtree_t *rtree, unsigned level,
 		subtree = rtree_subtree_read_hard(tsdn, rtree, level);
 	}
 	assert(!dependent || subtree != NULL);
-	return (subtree);
+	return subtree;
 }
 
 JEMALLOC_ALWAYS_INLINE rtree_elm_t *
@@ -179,7 +179,7 @@ rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	if (dependent || init_missing) {
 		if (likely(rtree_ctx->valid)) {
 			if (key == rtree_ctx->key) {
-				return (rtree_ctx->elms[rtree->height]);
+				return rtree_ctx->elms[rtree->height];
 			} else {
 				unsigned no_ctx_start_level =
 				    rtree_start_level(rtree, key);
@@ -234,7 +234,7 @@ rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 			if (init_missing) {				\
 				rtree_ctx->valid = false;		\
 			}						\
-			return (NULL);					\
+			return NULL;					\
 		}							\
 		subkey = rtree_subkey(rtree, key, level -		\
 		    RTREE_GET_BIAS);					\
@@ -253,7 +253,7 @@ rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 			if (init_missing) {				\
 				rtree_ctx->valid = false;		\
 			}						\
-			return (NULL);					\
+			return NULL;					\
 		}							\
 		subkey = rtree_subkey(rtree, key, level -		\
 		    RTREE_GET_BIAS);					\
@@ -266,7 +266,7 @@ rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 			rtree_ctx->elms[level - RTREE_GET_BIAS + 1] =	\
 			    node;					\
 		}							\
-		return (node);
+		return node;
 #if RTREE_HEIGHT_MAX > 1
 	RTREE_GET_SUBTREE(0)
 #endif
@@ -334,12 +334,12 @@ rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
 
 	elm = rtree_elm_lookup(tsdn, rtree, rtree_ctx, key, false, true);
 	if (elm == NULL) {
-		return (true);
+		return true;
 	}
 	assert(rtree_elm_read(elm, false) == NULL);
 	rtree_elm_write(elm, extent);
 
-	return (false);
+	return false;
 }
 
 JEMALLOC_ALWAYS_INLINE extent_t *
@@ -349,10 +349,10 @@ rtree_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
 
 	elm = rtree_elm_lookup(tsdn, rtree, rtree_ctx, key, dependent, false);
 	if (elm == NULL) {
-		return (NULL);
+		return NULL;
 	}
 
-	return (rtree_elm_read(elm, dependent));
+	return rtree_elm_read(elm, dependent);
 }
 
 JEMALLOC_INLINE rtree_elm_t *
@@ -363,7 +363,7 @@ rtree_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	elm = rtree_elm_lookup(tsdn, rtree, rtree_ctx, key, dependent,
 	    init_missing);
 	if (!dependent && elm == NULL) {
-		return (NULL);
+		return NULL;
 	}
 	{
 		extent_t *extent;
@@ -380,7 +380,7 @@ rtree_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 		rtree_elm_witness_acquire(tsdn, rtree, key, elm);
 	}
 
-	return (elm);
+	return elm;
 }
 
 JEMALLOC_INLINE extent_t *
@@ -395,7 +395,7 @@ rtree_elm_read_acquired(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm) {
 		rtree_elm_witness_access(tsdn, rtree, elm);
 	}
 
-	return (extent);
+	return extent;
 }
 
 JEMALLOC_INLINE void
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 4721ba30..a90107f9 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -44,7 +44,7 @@ tcache_enabled_get(void) {
 		tsd_tcache_enabled_set(tsd, tcache_enabled);
 	}
 
-	return ((bool)tcache_enabled);
+	return (bool)tcache_enabled;
 }
 
 JEMALLOC_INLINE void
@@ -69,19 +69,19 @@ tcache_get(tsd_t *tsd, bool create) {
 	tcache_t *tcache;
 
 	if (!config_tcache) {
-		return (NULL);
+		return NULL;
 	}
 
 	tcache = tsd_tcache_get(tsd);
 	if (!create) {
-		return (tcache);
+		return tcache;
 	}
 	if (unlikely(tcache == NULL) && tsd_nominal(tsd)) {
 		tcache = tcache_get_hard(tsd);
 		tsd_tcache_set(tsd, tcache);
 	}
 
-	return (tcache);
+	return tcache;
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -102,7 +102,7 @@ tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success) {
 	if (unlikely(tbin->ncached == 0)) {
 		tbin->low_water = -1;
 		*tcache_success = false;
-		return (NULL);
+		return NULL;
 	}
 	/*
 	 * tcache_success (instead of ret) should be checked upon the return of
@@ -119,7 +119,7 @@ tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success) {
 		tbin->low_water = tbin->ncached;
 	}
 
-	return (ret);
+	return ret;
 }
 
 JEMALLOC_ALWAYS_INLINE void *
@@ -138,13 +138,13 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 		bool tcache_hard_success;
 		arena = arena_choose(tsd, arena);
 		if (unlikely(arena == NULL)) {
-			return (NULL);
+			return NULL;
 		}
 
 		ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache,
 		    tbin, binind, &tcache_hard_success);
 		if (tcache_hard_success == false) {
-			return (NULL);
+			return NULL;
 		}
 	}
 
@@ -182,7 +182,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 		tcache->prof_accumbytes += usize;
 	}
 	tcache_event(tsd, tcache);
-	return (ret);
+	return ret;
 }
 
 JEMALLOC_ALWAYS_INLINE void *
@@ -203,12 +203,12 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 		 */
 		arena = arena_choose(tsd, arena);
 		if (unlikely(arena == NULL)) {
-			return (NULL);
+			return NULL;
 		}
 
 		ret = large_malloc(tsd_tsdn(tsd), arena, s2u(size), zero);
 		if (ret == NULL) {
-			return (NULL);
+			return NULL;
 		}
 	} else {
 		size_t usize JEMALLOC_CC_SILENCE_INIT(0);
@@ -242,7 +242,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 	}
 
 	tcache_event(tsd, tcache);
-	return (ret);
+	return ret;
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -306,7 +306,7 @@ tcaches_get(tsd_t *tsd, unsigned ind) {
 		elm->tcache = tcache_create(tsd_tsdn(tsd), arena_choose(tsd,
 		    NULL));
 	}
-	return (elm->tcache);
+	return elm->tcache;
 }
 #endif
 
diff --git a/include/jemalloc/internal/ticker_inlines.h b/include/jemalloc/internal/ticker_inlines.h
index 6cc61343..9102ba6d 100644
--- a/include/jemalloc/internal/ticker_inlines.h
+++ b/include/jemalloc/internal/ticker_inlines.h
@@ -23,14 +23,14 @@ ticker_copy(ticker_t *ticker, const ticker_t *other) {
 
 JEMALLOC_INLINE int32_t
 ticker_read(const ticker_t *ticker) {
-	return (ticker->tick);
+	return ticker->tick;
 }
 
 JEMALLOC_INLINE bool
 ticker_ticks(ticker_t *ticker, int32_t nticks) {
 	if (unlikely(ticker->tick < nticks)) {
 		ticker->tick = ticker->nticks;
-		return (true);
+		return true;
 	}
 	ticker->tick -= nticks;
 	return(false);
@@ -38,7 +38,7 @@ ticker_ticks(ticker_t *ticker, int32_t nticks) {
 
 JEMALLOC_INLINE bool
 ticker_tick(ticker_t *ticker) {
-	return (ticker_ticks(ticker, 1));
+	return ticker_ticks(ticker, 1);
 }
 #endif
 
diff --git a/include/jemalloc/internal/tsd_inlines.h b/include/jemalloc/internal/tsd_inlines.h
index 2093d610..4aafb8dc 100644
--- a/include/jemalloc/internal/tsd_inlines.h
+++ b/include/jemalloc/internal/tsd_inlines.h
@@ -29,7 +29,7 @@ tsd_fetch_impl(bool init) {
 	tsd_t *tsd = tsd_get(init);
 
 	if (!init && tsd_get_allocates() && tsd == NULL) {
-		return (NULL);
+		return NULL;
 	}
 	assert(tsd != NULL);
 
@@ -46,17 +46,17 @@ tsd_fetch_impl(bool init) {
 		}
 	}
 
-	return (tsd);
+	return tsd;
 }
 
 JEMALLOC_ALWAYS_INLINE tsd_t *
 tsd_fetch(void) {
-	return (tsd_fetch_impl(true));
+	return tsd_fetch_impl(true);
 }
 
 JEMALLOC_ALWAYS_INLINE tsdn_t *
 tsd_tsdn(tsd_t *tsd) {
-	return ((tsdn_t *)tsd);
+	return (tsdn_t *)tsd;
 }
 
 JEMALLOC_INLINE bool
@@ -67,12 +67,12 @@ tsd_nominal(tsd_t *tsd) {
 #define	O(n, t, c)							\
 JEMALLOC_ALWAYS_INLINE t *						\
 tsd_##n##p_get(tsd_t *tsd) {						\
-	return (&tsd->n);						\
+	return &tsd->n;							\
 }									\
 									\
 JEMALLOC_ALWAYS_INLINE t						\
 tsd_##n##_get(tsd_t *tsd) {						\
-	return (*tsd_##n##p_get(tsd));					\
+	return *tsd_##n##p_get(tsd);					\
 }									\
 									\
 JEMALLOC_ALWAYS_INLINE void						\
@@ -86,22 +86,22 @@ MALLOC_TSD
 JEMALLOC_ALWAYS_INLINE tsdn_t *
 tsdn_fetch(void) {
 	if (!tsd_booted_get()) {
-		return (NULL);
+		return NULL;
 	}
 
-	return (tsd_tsdn(tsd_fetch_impl(false)));
+	return tsd_tsdn(tsd_fetch_impl(false));
 }
 
 JEMALLOC_ALWAYS_INLINE bool
 tsdn_null(const tsdn_t *tsdn) {
-	return (tsdn == NULL);
+	return tsdn == NULL;
 }
 
 JEMALLOC_ALWAYS_INLINE tsd_t *
 tsdn_tsd(tsdn_t *tsdn) {
 	assert(!tsdn_null(tsdn));
 
-	return (&tsdn->tsd);
+	return &tsdn->tsd;
 }
 
 JEMALLOC_ALWAYS_INLINE rtree_ctx_t *
@@ -113,9 +113,9 @@ tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback) {
 	if (unlikely(tsdn_null(tsdn))) {
 		static const rtree_ctx_t rtree_ctx = RTREE_CTX_INITIALIZER;
 		memcpy(fallback, &rtree_ctx, sizeof(rtree_ctx_t));
-		return (fallback);
+		return fallback;
 	}
-	return (tsd_rtree_ctxp_get(tsdn_tsd(tsdn)));
+	return tsd_rtree_ctxp_get(tsdn_tsd(tsdn));
 }
 #endif
 
diff --git a/include/jemalloc/internal/tsd_types.h b/include/jemalloc/internal/tsd_types.h
index ec40d9a7..a1dce928 100644
--- a/include/jemalloc/internal/tsd_types.h
+++ b/include/jemalloc/internal/tsd_types.h
@@ -180,7 +180,7 @@ a_name##tsd_cleanup_wrapper(void) {					\
 		a_name##tsd_initialized = false;			\
 		a_cleanup(&a_name##tsd_tls);				\
 	}								\
-	return (a_name##tsd_initialized);				\
+	return a_name##tsd_initialized;					\
 }									\
 a_attr bool								\
 a_name##tsd_boot0(void) {						\
@@ -189,7 +189,7 @@ a_name##tsd_boot0(void) {						\
 		    &a_name##tsd_cleanup_wrapper);			\
 	}								\
 	a_name##tsd_booted = true;					\
-	return (false);							\
+	return false;							\
 }									\
 a_attr void								\
 a_name##tsd_boot1(void) {						\
@@ -197,21 +197,21 @@ a_name##tsd_boot1(void) {						\
 }									\
 a_attr bool								\
 a_name##tsd_boot(void) {						\
-	return (a_name##tsd_boot0());					\
+	return a_name##tsd_boot0();					\
 }									\
 a_attr bool								\
 a_name##tsd_booted_get(void) {						\
-	return (a_name##tsd_booted);					\
+	return a_name##tsd_booted;					\
 }									\
 a_attr bool								\
 a_name##tsd_get_allocates(void) {					\
-	return (false);							\
+	return false;							\
 }									\
 /* Get/set. */								\
 a_attr a_type *								\
 a_name##tsd_get(bool init) {						\
 	assert(a_name##tsd_booted);					\
-	return (&a_name##tsd_tls);					\
+	return &a_name##tsd_tls;					\
 }									\
 a_attr void								\
 a_name##tsd_set(a_type *val) {						\
@@ -232,11 +232,11 @@ a_name##tsd_boot0(void) {						\
 	if (a_cleanup != malloc_tsd_no_cleanup) {			\
 		if (pthread_key_create(&a_name##tsd_tsd, a_cleanup) !=	\
 		    0) {						\
-			return (true);					\
+			return true;					\
 		}							\
 	}								\
 	a_name##tsd_booted = true;					\
-	return (false);							\
+	return false;							\
 }									\
 a_attr void								\
 a_name##tsd_boot1(void) {						\
@@ -244,21 +244,21 @@ a_name##tsd_boot1(void) {						\
 }									\
 a_attr bool								\
 a_name##tsd_boot(void) {						\
-	return (a_name##tsd_boot0());					\
+	return a_name##tsd_boot0();					\
 }									\
 a_attr bool								\
 a_name##tsd_booted_get(void) {						\
-	return (a_name##tsd_booted);					\
+	return a_name##tsd_booted;					\
 }									\
 a_attr bool								\
 a_name##tsd_get_allocates(void) {					\
-	return (false);							\
+	return false;							\
 }									\
 /* Get/set. */								\
 a_attr a_type *								\
 a_name##tsd_get(bool init) {						\
 	assert(a_name##tsd_booted);					\
-	return (&a_name##tsd_tls);					\
+	return &a_name##tsd_tls;					\
 }									\
 a_attr void								\
 a_name##tsd_set(a_type *val) {						\
@@ -289,7 +289,7 @@ a_name##tsd_cleanup_wrapper(void) {					\
 	SetLastError(error);						\
 									\
 	if (wrapper == NULL) {						\
-		return (false);						\
+		return false;						\
 	}								\
 	if (a_cleanup != malloc_tsd_no_cleanup &&			\
 	    wrapper->initialized) {					\
@@ -297,11 +297,11 @@ a_name##tsd_cleanup_wrapper(void) {					\
 		a_cleanup(&wrapper->val);				\
 		if (wrapper->initialized) {				\
 			/* Trigger another cleanup round. */		\
-			return (true);					\
+			return true;					\
 		}							\
 	}								\
 	malloc_tsd_dalloc(wrapper);					\
-	return (false);							\
+	return false;							\
 }									\
 a_attr void								\
 a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) {		\
@@ -331,13 +331,13 @@ a_name##tsd_wrapper_get(bool init) {					\
 		}							\
 		a_name##tsd_wrapper_set(wrapper);			\
 	}								\
-	return (wrapper);						\
+	return wrapper;							\
 }									\
 a_attr bool								\
 a_name##tsd_boot0(void) {						\
 	a_name##tsd_tsd = TlsAlloc();					\
 	if (a_name##tsd_tsd == TLS_OUT_OF_INDEXES) {			\
-		return (true);						\
+		return true;						\
 	}								\
 	if (a_cleanup != malloc_tsd_no_cleanup) {			\
 		malloc_tsd_cleanup_register(				\
@@ -345,7 +345,7 @@ a_name##tsd_boot0(void) {						\
 	}								\
 	a_name##tsd_wrapper_set(&a_name##tsd_boot_wrapper);		\
 	a_name##tsd_booted = true;					\
-	return (false);							\
+	return false;							\
 }									\
 a_attr void								\
 a_name##tsd_boot1(void) {						\
@@ -364,18 +364,18 @@ a_name##tsd_boot1(void) {						\
 a_attr bool								\
 a_name##tsd_boot(void) {						\
 	if (a_name##tsd_boot0()) {					\
-		return (true);						\
+		return true;						\
 	}								\
 	a_name##tsd_boot1();						\
-	return (false);							\
+	return false;							\
 }									\
 a_attr bool								\
 a_name##tsd_booted_get(void) {						\
-	return (a_name##tsd_booted);					\
+	return a_name##tsd_booted;					\
 }									\
 a_attr bool								\
 a_name##tsd_get_allocates(void) {					\
-	return (true);							\
+	return true;							\
 }									\
 /* Get/set. */								\
 a_attr a_type *								\
@@ -385,9 +385,9 @@ a_name##tsd_get(bool init) {						\
 	assert(a_name##tsd_booted);					\
 	wrapper = a_name##tsd_wrapper_get(init);			\
 	if (a_name##tsd_get_allocates() && !init && wrapper == NULL) {	\
-		return (NULL);						\
+		return NULL;						\
 	}								\
-	return (&wrapper->val);						\
+	return &wrapper->val;						\
 }									\
 a_attr void								\
 a_name##tsd_set(a_type *val) {						\
@@ -449,7 +449,7 @@ a_name##tsd_wrapper_get(bool init) {					\
 		    tsd_init_check_recursion(&a_name##tsd_init_head,	\
 		    &block);						\
 		if (wrapper) {						\
-			return (wrapper);				\
+			return wrapper;					\
 		}							\
 		wrapper = (a_name##tsd_wrapper_t *)			\
 		    malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t));	\
@@ -465,17 +465,17 @@ a_name##tsd_wrapper_get(bool init) {					\
 		a_name##tsd_wrapper_set(wrapper);			\
 		tsd_init_finish(&a_name##tsd_init_head, &block);	\
 	}								\
-	return (wrapper);						\
+	return wrapper;							\
 }									\
 a_attr bool								\
 a_name##tsd_boot0(void) {						\
 	if (pthread_key_create(&a_name##tsd_tsd,			\
 	    a_name##tsd_cleanup_wrapper) != 0) {			\
-		return (true);						\
+		return true;						\
 	}								\
 	a_name##tsd_wrapper_set(&a_name##tsd_boot_wrapper);		\
 	a_name##tsd_booted = true;					\
-	return (false);							\
+	return false;							\
 }									\
 a_attr void								\
 a_name##tsd_boot1(void) {						\
@@ -494,18 +494,18 @@ a_name##tsd_boot1(void) {						\
 a_attr bool								\
 a_name##tsd_boot(void) {						\
 	if (a_name##tsd_boot0()) {					\
-		return (true);						\
+		return true;						\
 	}								\
 	a_name##tsd_boot1();						\
-	return (false);							\
+	return false;							\
 }									\
 a_attr bool								\
 a_name##tsd_booted_get(void) {						\
-	return (a_name##tsd_booted);					\
+	return a_name##tsd_booted;					\
 }									\
 a_attr bool								\
 a_name##tsd_get_allocates(void) {					\
-	return (true);							\
+	return true;							\
 }									\
 /* Get/set. */								\
 a_attr a_type *								\
@@ -515,9 +515,9 @@ a_name##tsd_get(bool init) {						\
 	assert(a_name##tsd_booted);					\
 	wrapper = a_name##tsd_wrapper_get(init);			\
 	if (a_name##tsd_get_allocates() && !init && wrapper == NULL) {	\
-		return (NULL);						\
+		return NULL;						\
 	}								\
-	return (&wrapper->val);						\
+	return &wrapper->val;						\
 }									\
 a_attr void								\
 a_name##tsd_set(a_type *val) {						\
diff --git a/include/jemalloc/internal/util_inlines.h b/include/jemalloc/internal/util_inlines.h
index 271673ae..c09bd6da 100644
--- a/include/jemalloc/internal/util_inlines.h
+++ b/include/jemalloc/internal/util_inlines.h
@@ -26,27 +26,27 @@ int	get_errno(void);
 
 JEMALLOC_ALWAYS_INLINE unsigned
 ffs_llu(unsigned long long bitmap) {
-	return (JEMALLOC_INTERNAL_FFSLL(bitmap));
+	return JEMALLOC_INTERNAL_FFSLL(bitmap);
 }
 
 JEMALLOC_ALWAYS_INLINE unsigned
 ffs_lu(unsigned long bitmap) {
-	return (JEMALLOC_INTERNAL_FFSL(bitmap));
+	return JEMALLOC_INTERNAL_FFSL(bitmap);
 }
 
 JEMALLOC_ALWAYS_INLINE unsigned
 ffs_u(unsigned bitmap) {
-	return (JEMALLOC_INTERNAL_FFS(bitmap));
+	return JEMALLOC_INTERNAL_FFS(bitmap);
 }
 
 JEMALLOC_ALWAYS_INLINE unsigned
 ffs_zu(size_t bitmap) {
 #if LG_SIZEOF_PTR == LG_SIZEOF_INT
-	return (ffs_u(bitmap));
+	return ffs_u(bitmap);
 #elif LG_SIZEOF_PTR == LG_SIZEOF_LONG
-	return (ffs_lu(bitmap));
+	return ffs_lu(bitmap);
 #elif LG_SIZEOF_PTR == LG_SIZEOF_LONG_LONG
-	return (ffs_llu(bitmap));
+	return ffs_llu(bitmap);
 #else
 #error No implementation for size_t ffs()
 #endif
@@ -55,9 +55,9 @@ ffs_zu(size_t bitmap) {
 JEMALLOC_ALWAYS_INLINE unsigned
 ffs_u64(uint64_t bitmap) {
 #if LG_SIZEOF_LONG == 3
-	return (ffs_lu(bitmap));
+	return ffs_lu(bitmap);
 #elif LG_SIZEOF_LONG_LONG == 3
-	return (ffs_llu(bitmap));
+	return ffs_llu(bitmap);
 #else
 #error No implementation for 64-bit ffs()
 #endif
@@ -66,11 +66,11 @@ ffs_u64(uint64_t bitmap) {
 JEMALLOC_ALWAYS_INLINE unsigned
 ffs_u32(uint32_t bitmap) {
 #if LG_SIZEOF_INT == 2
-	return (ffs_u(bitmap));
+	return ffs_u(bitmap);
 #else
 #error No implementation for 32-bit ffs()
 #endif
-	return (ffs_u(bitmap));
+	return ffs_u(bitmap);
 }
 
 JEMALLOC_INLINE uint64_t
@@ -83,7 +83,7 @@ pow2_ceil_u64(uint64_t x) {
 	x |= x >> 16;
 	x |= x >> 32;
 	x++;
-	return (x);
+	return x;
 }
 
 JEMALLOC_INLINE uint32_t
@@ -95,16 +95,16 @@ pow2_ceil_u32(uint32_t x) {
 	x |= x >> 8;
 	x |= x >> 16;
 	x++;
-	return (x);
+	return x;
 }
 
 /* Compute the smallest power of 2 that is >= x. */
 JEMALLOC_INLINE size_t
 pow2_ceil_zu(size_t x) {
 #if (LG_SIZEOF_PTR == 3)
-	return (pow2_ceil_u64(x));
+	return pow2_ceil_u64(x);
 #else
-	return (pow2_ceil_u32(x));
+	return pow2_ceil_u32(x);
 #endif
 }
 
@@ -120,7 +120,7 @@ lg_floor(size_t x) {
 	    : "r"(x)    // Inputs.
 	    );
 	assert(ret < UINT_MAX);
-	return ((unsigned)ret);
+	return (unsigned)ret;
 }
 #elif (defined(_MSC_VER))
 JEMALLOC_INLINE unsigned
@@ -137,7 +137,7 @@ lg_floor(size_t x) {
 #  error "Unsupported type size for lg_floor()"
 #endif
 	assert(ret < UINT_MAX);
-	return ((unsigned)ret);
+	return (unsigned)ret;
 }
 #elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ))
 JEMALLOC_INLINE unsigned
@@ -145,9 +145,9 @@ lg_floor(size_t x) {
 	assert(x != 0);
 
 #if (LG_SIZEOF_PTR == LG_SIZEOF_INT)
-	return (((8 << LG_SIZEOF_PTR) - 1) - __builtin_clz(x));
+	return ((8 << LG_SIZEOF_PTR) - 1) - __builtin_clz(x);
 #elif (LG_SIZEOF_PTR == LG_SIZEOF_LONG)
-	return (((8 << LG_SIZEOF_PTR) - 1) - __builtin_clzl(x));
+	return ((8 << LG_SIZEOF_PTR) - 1) - __builtin_clzl(x);
 #else
 #  error "Unsupported type size for lg_floor()"
 #endif
@@ -166,10 +166,10 @@ lg_floor(size_t x) {
 	x |= (x >> 32);
 #endif
 	if (x == SIZE_T_MAX) {
-		return ((8 << LG_SIZEOF_PTR) - 1);
+		return (8 << LG_SIZEOF_PTR) - 1;
 	}
 	x++;
-	return (ffs_zu(x) - 2);
+	return ffs_zu(x) - 2;
 }
 #endif
 
@@ -187,9 +187,9 @@ set_errno(int errnum) {
 JEMALLOC_INLINE int
 get_errno(void) {
 #ifdef _WIN32
-	return (GetLastError());
+	return GetLastError();
 #else
-	return (errno);
+	return errno;
 #endif
 }
 #endif
diff --git a/include/jemalloc/internal/witness_inlines.h b/include/jemalloc/internal/witness_inlines.h
index 2e5ebccc..c2a27812 100644
--- a/include/jemalloc/internal/witness_inlines.h
+++ b/include/jemalloc/internal/witness_inlines.h
@@ -22,11 +22,11 @@ witness_owner(tsd_t *tsd, const witness_t *witness) {
 	witnesses = tsd_witnessesp_get(tsd);
 	ql_foreach(w, witnesses, link) {
 		if (w == witness) {
-			return (true);
+			return true;
 		}
 	}
 
-	return (false);
+	return false;
 }
 
 JEMALLOC_INLINE void
diff --git a/include/msvc_compat/strings.h b/include/msvc_compat/strings.h
index 971b36d4..996f256c 100644
--- a/include/msvc_compat/strings.h
+++ b/include/msvc_compat/strings.h
@@ -10,13 +10,13 @@ static __forceinline int ffsl(long x) {
 	unsigned long i;
 
 	if (_BitScanForward(&i, x)) {
-		return (i + 1);
+		return i + 1;
 	}
-	return (0);
+	return 0;
 }
 
 static __forceinline int ffs(int x) {
-	return (ffsl(x));
+	return ffsl(x);
 }
 
 #  ifdef  _M_X64
@@ -27,9 +27,9 @@ static __forceinline int ffsll(unsigned __int64 x) {
 	unsigned long i;
 #ifdef  _M_X64
 	if (_BitScanForward64(&i, x)) {
-		return (i + 1);
+		return i + 1;
 	}
-	return (0);
+	return 0;
 #else
 // Fallback for 32-bit build where 64-bit version not available
 // assuming little endian
@@ -41,11 +41,11 @@ static __forceinline int ffsll(unsigned __int64 x) {
 	s.ll = x;
 
 	if (_BitScanForward(&i, s.l[0])) {
-		return (i + 1);
+		return i + 1;
 	} else if(_BitScanForward(&i, s.l[1])) {
-		return (i + 33);
+		return i + 33;
 	}
-	return (0);
+	return 0;
 #endif
 }
 
diff --git a/src/arena.c b/src/arena.c
index 5cf9bd07..fe4b5de2 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -50,8 +50,8 @@ arena_extent_cache_alloc_locked(tsdn_t *tsdn, arena_t *arena,
 
 	malloc_mutex_assert_owner(tsdn, &arena->lock);
 
-	return (extent_alloc_cache(tsdn, arena, r_extent_hooks, new_addr, usize,
-	    pad, alignment, zero, &commit, slab));
+	return extent_alloc_cache(tsdn, arena, r_extent_hooks, new_addr, usize,
+	    pad, alignment, zero, &commit, slab);
 }
 
 extent_t *
@@ -65,7 +65,7 @@ arena_extent_cache_alloc(tsdn_t *tsdn, arena_t *arena,
 	    new_addr, size, 0, alignment, zero, false);
 	malloc_mutex_unlock(tsdn, &arena->lock);
 
-	return (extent);
+	return extent;
 }
 
 static void
@@ -122,7 +122,7 @@ arena_slab_reg_alloc(tsdn_t *tsdn, extent_t *slab,
 	ret = (void *)((uintptr_t)extent_addr_get(slab) +
 	    (uintptr_t)(bin_info->reg_size * regind));
 	slab_data->nfree--;
-	return (ret);
+	return ret;
 }
 
 #ifndef JEMALLOC_JET
@@ -160,7 +160,7 @@ arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr) {
 
 	assert(regind < arena_bin_info[binind].nregs);
 
-	return (regind);
+	return regind;
 }
 
 JEMALLOC_INLINE_C void
@@ -282,7 +282,7 @@ arena_extent_alloc_large_hard(tsdn_t *tsdn, arena_t *arena,
 		malloc_mutex_unlock(tsdn, &arena->lock);
 	}
 
-	return (extent);
+	return extent;
 }
 
 extent_t *
@@ -308,7 +308,7 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 		    &extent_hooks, usize, alignment, zero);
 	}
 
-	return (extent);
+	return extent;
 }
 
 void
@@ -409,7 +409,7 @@ arena_decay_backlog_npages_limit(const arena_t *arena) {
 	}
 	npages_limit_backlog = (size_t)(sum >> SMOOTHSTEP_BFP);
 
-	return (npages_limit_backlog);
+	return npages_limit_backlog;
 }
 
 static void
@@ -499,12 +499,12 @@ arena_decay_init(arena_t *arena, ssize_t decay_time) {
 static bool
 arena_decay_time_valid(ssize_t decay_time) {
 	if (decay_time < -1) {
-		return (false);
+		return false;
 	}
 	if (decay_time == -1 || (uint64_t)decay_time <= NSTIME_SEC_MAX) {
-		return (true);
+		return true;
 	}
-	return (false);
+	return false;
 }
 
 ssize_t
@@ -515,13 +515,13 @@ arena_decay_time_get(tsdn_t *tsdn, arena_t *arena) {
 	decay_time = arena->decay.time;
 	malloc_mutex_unlock(tsdn, &arena->lock);
 
-	return (decay_time);
+	return decay_time;
 }
 
 bool
 arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time) {
 	if (!arena_decay_time_valid(decay_time)) {
-		return (true);
+		return true;
 	}
 
 	malloc_mutex_lock(tsdn, &arena->lock);
@@ -537,7 +537,7 @@ arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time) {
 	arena_maybe_purge(tsdn, arena);
 	malloc_mutex_unlock(tsdn, &arena->lock);
 
-	return (false);
+	return false;
 }
 
 static void
@@ -609,7 +609,7 @@ arena_dirty_count(tsdn_t *tsdn, arena_t *arena) {
 
 	malloc_mutex_unlock(tsdn, &arena->extents_mtx);
 
-	return (ndirty);
+	return ndirty;
 }
 
 static size_t
@@ -648,7 +648,7 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	}
 
 	malloc_mutex_unlock(tsdn, &arena->extents_mtx);
-	return (nstashed);
+	return nstashed;
 }
 
 static size_t
@@ -680,7 +680,7 @@ arena_purge_stashed(tsdn_t *tsdn, arena_t *arena,
 		arena->stats.purged += npurged;
 	}
 
-	return (npurged);
+	return npurged;
 }
 
 /*
@@ -757,12 +757,12 @@ static extent_t *
 arena_bin_slabs_nonfull_tryget(arena_bin_t *bin) {
 	extent_t *slab = extent_heap_remove_first(&bin->slabs_nonfull);
 	if (slab == NULL) {
-		return (NULL);
+		return NULL;
 	}
 	if (config_stats) {
 		bin->stats.reslabs++;
 	}
-	return (slab);
+	return slab;
 }
 
 static void
@@ -936,7 +936,7 @@ arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena,
 	    bin_info->slab_size, 0, PAGE, &zero, &commit, true);
 	malloc_mutex_lock(tsdn, &arena->lock);
 
-	return (slab);
+	return slab;
 }
 
 static extent_t *
@@ -953,7 +953,7 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 		slab = arena_slab_alloc_hard(tsdn, arena, &extent_hooks,
 		    bin_info);
 		if (slab == NULL) {
-			return (NULL);
+			return NULL;
 		}
 	}
 	assert(extent_slab_get(slab));
@@ -970,7 +970,7 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 		arena->stats.mapped += extent_size_get(slab);
 	}
 
-	return (slab);
+	return slab;
 }
 
 static extent_t *
@@ -982,7 +982,7 @@ arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin,
 	/* Look for a usable slab. */
 	slab = arena_bin_slabs_nonfull_tryget(bin);
 	if (slab != NULL) {
-		return (slab);
+		return slab;
 	}
 	/* No existing slabs have any space available. */
 
@@ -1001,7 +1001,7 @@ arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin,
 			bin->stats.nslabs++;
 			bin->stats.curslabs++;
 		}
-		return (slab);
+		return slab;
 	}
 
 	/*
@@ -1011,10 +1011,10 @@ arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin,
 	 */
 	slab = arena_bin_slabs_nonfull_tryget(bin);
 	if (slab != NULL) {
-		return (slab);
+		return slab;
 	}
 
-	return (NULL);
+	return NULL;
 }
 
 /* Re-fill bin->slabcur, then call arena_slab_reg_alloc(). */
@@ -1057,7 +1057,7 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin,
 					    bin);
 				}
 			}
-			return (ret);
+			return ret;
 		}
 
 		arena_bin_slabs_full_insert(bin, bin->slabcur);
@@ -1065,13 +1065,13 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin,
 	}
 
 	if (slab == NULL) {
-		return (NULL);
+		return NULL;
 	}
 	bin->slabcur = slab;
 
 	assert(extent_slab_data_get(bin->slabcur)->nfree > 0);
 
-	return (arena_slab_reg_alloc(tsdn, slab, bin_info));
+	return arena_slab_reg_alloc(tsdn, slab, bin_info);
 }
 
 void
@@ -1172,7 +1172,7 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
 
 	if (ret == NULL) {
 		malloc_mutex_unlock(tsdn, &bin->lock);
-		return (NULL);
+		return NULL;
 	}
 
 	if (config_stats) {
@@ -1203,7 +1203,7 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
 	}
 
 	arena_decay_tick(tsdn, arena);
-	return (ret);
+	return ret;
 }
 
 void *
@@ -1215,13 +1215,13 @@ arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
 		arena = arena_choose(tsdn_tsd(tsdn), arena);
 	}
 	if (unlikely(arena == NULL)) {
-		return (NULL);
+		return NULL;
 	}
 
 	if (likely(size <= SMALL_MAXCLASS)) {
-		return (arena_malloc_small(tsdn, arena, ind, zero));
+		return arena_malloc_small(tsdn, arena, ind, zero);
 	}
-	return (large_malloc(tsdn, arena, index2size(ind), zero));
+	return large_malloc(tsdn, arena, index2size(ind), zero);
 }
 
 void *
@@ -1241,7 +1241,7 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 			ret = large_palloc(tsdn, arena, usize, alignment, zero);
 		}
 	}
-	return (ret);
+	return ret;
 }
 
 void
@@ -1282,7 +1282,7 @@ arena_prof_demote(tsdn_t *tsdn, extent_t *extent, const void *ptr) {
 
 	assert(isalloc(tsdn, extent, ptr) == LARGE_MINCLASS);
 
-	return (LARGE_MINCLASS);
+	return LARGE_MINCLASS;
 }
 
 void
@@ -1425,7 +1425,7 @@ arena_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize,
 	assert(extra == 0 || size + extra <= LARGE_MAXCLASS);
 
 	if (unlikely(size > LARGE_MAXCLASS)) {
-		return (true);
+		return true;
 	}
 
 	usize_min = s2u(size);
@@ -1440,31 +1440,31 @@ arena_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize,
 		if ((usize_max > SMALL_MAXCLASS || size2index(usize_max) !=
 		    size2index(oldsize)) && (size > oldsize || usize_max <
 		    oldsize)) {
-			return (true);
+			return true;
 		}
 
 		arena_decay_tick(tsdn, extent_arena_get(extent));
-		return (false);
+		return false;
 	} else if (oldsize >= LARGE_MINCLASS && usize_max >= LARGE_MINCLASS) {
-		return (large_ralloc_no_move(tsdn, extent, usize_min, usize_max,
-		    zero));
+		return large_ralloc_no_move(tsdn, extent, usize_min, usize_max,
+		    zero);
 	}
 
-	return (true);
+	return true;
 }
 
 static void *
 arena_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache) {
 	if (alignment == 0) {
-		return (arena_malloc(tsdn, arena, usize, size2index(usize),
-		    zero, tcache, true));
+		return arena_malloc(tsdn, arena, usize, size2index(usize),
+		    zero, tcache, true);
 	}
 	usize = sa2u(usize, alignment);
 	if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
-		return (NULL);
+		return NULL;
 	}
-	return (ipalloct(tsdn, usize, alignment, zero, tcache, arena));
+	return ipalloct(tsdn, usize, alignment, zero, tcache, arena);
 }
 
 void *
@@ -1476,20 +1476,20 @@ arena_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr,
 
 	usize = s2u(size);
 	if (unlikely(usize == 0 || size > LARGE_MAXCLASS)) {
-		return (NULL);
+		return NULL;
 	}
 
 	if (likely(usize <= SMALL_MAXCLASS)) {
 		/* Try to avoid moving the allocation. */
 		if (!arena_ralloc_no_move(tsdn, extent, ptr, oldsize, usize, 0,
 		    zero)) {
-			return (ptr);
+			return ptr;
 		}
 	}
 
 	if (oldsize >= LARGE_MINCLASS && usize >= LARGE_MINCLASS) {
-		return (large_ralloc(tsdn, arena, extent, usize, alignment,
-		    zero, tcache));
+		return large_ralloc(tsdn, arena, extent, usize, alignment,
+		    zero, tcache);
 	}
 
 	/*
@@ -1499,7 +1499,7 @@ arena_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr,
 	ret = arena_ralloc_move_helper(tsdn, arena, usize, alignment, zero,
 	    tcache);
 	if (ret == NULL) {
-		return (NULL);
+		return NULL;
 	}
 
 	/*
@@ -1510,7 +1510,7 @@ arena_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr,
 	copysize = (usize < oldsize) ? usize : oldsize;
 	memcpy(ret, ptr, copysize);
 	isdalloct(tsdn, extent, ptr, oldsize, tcache, true);
-	return (ret);
+	return ret;
 }
 
 dss_prec_t
@@ -1520,7 +1520,7 @@ arena_dss_prec_get(tsdn_t *tsdn, arena_t *arena) {
 	malloc_mutex_lock(tsdn, &arena->lock);
 	ret = arena->dss_prec;
 	malloc_mutex_unlock(tsdn, &arena->lock);
-	return (ret);
+	return ret;
 }
 
 bool
@@ -1531,21 +1531,21 @@ arena_dss_prec_set(tsdn_t *tsdn, arena_t *arena, dss_prec_t dss_prec) {
 	malloc_mutex_lock(tsdn, &arena->lock);
 	arena->dss_prec = dss_prec;
 	malloc_mutex_unlock(tsdn, &arena->lock);
-	return (false);
+	return false;
 }
 
 ssize_t
 arena_decay_time_default_get(void) {
-	return ((ssize_t)atomic_read_zu((size_t *)&decay_time_default));
+	return (ssize_t)atomic_read_zu((size_t *)&decay_time_default);
 }
 
 bool
 arena_decay_time_default_set(ssize_t decay_time) {
 	if (!arena_decay_time_valid(decay_time)) {
-		return (true);
+		return true;
 	}
 	atomic_write_zu((size_t *)&decay_time_default, (size_t)decay_time);
-	return (false);
+	return false;
 }
 
 static void
@@ -1642,7 +1642,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 
 unsigned
 arena_nthreads_get(arena_t *arena, bool internal) {
-	return (atomic_read_u(&arena->nthreads[internal]));
+	return atomic_read_u(&arena->nthreads[internal]);
 }
 
 void
@@ -1657,7 +1657,7 @@ arena_nthreads_dec(arena_t *arena, bool internal) {
 
 size_t
 arena_extent_sn_next(arena_t *arena) {
-	return (atomic_add_zu(&arena->extent_sn_next, 1) - 1);
+	return atomic_add_zu(&arena->extent_sn_next, 1) - 1;
 }
 
 arena_t *
@@ -1671,7 +1671,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	} else {
 		base = base_new(tsdn, ind, extent_hooks);
 		if (base == NULL) {
-			return (NULL);
+			return NULL;
 		}
 	}
 
@@ -1762,12 +1762,12 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 
 	arena->base = base;
 
-	return (arena);
+	return arena;
 label_error:
 	if (ind != 0) {
 		base_delete(base);
 	}
-	return (NULL);
+	return NULL;
 }
 
 void
diff --git a/src/base.c b/src/base.c
index ee964faa..886a6bde 100644
--- a/src/base.c
+++ b/src/base.c
@@ -23,7 +23,7 @@ base_map(extent_hooks_t *extent_hooks, unsigned ind, size_t size) {
 		    &zero, &commit, ind);
 	}
 
-	return (addr);
+	return addr;
 }
 
 static void
@@ -105,7 +105,7 @@ base_extent_bump_alloc_helper(extent_t *extent, size_t *gap_size, size_t size,
 	extent_init(extent, NULL, (void *)((uintptr_t)extent_addr_get(extent) +
 	    *gap_size + size), extent_size_get(extent) - *gap_size - size, 0,
 	    extent_sn_get(extent), true, true, true, false);
-	return (ret);
+	return ret;
 }
 
 static void
@@ -142,7 +142,7 @@ base_extent_bump_alloc(tsdn_t *tsdn, base_t *base, extent_t *extent,
 
 	ret = base_extent_bump_alloc_helper(extent, &gap_size, size, alignment);
 	base_extent_bump_alloc_post(tsdn, base, extent, gap_size, ret, size);
-	return (ret);
+	return ret;
 }
 
 /*
@@ -163,14 +163,14 @@ base_block_alloc(extent_hooks_t *extent_hooks, unsigned ind,
 	block_size = HUGEPAGE_CEILING(header_size + gap_size + usize);
 	block = (base_block_t *)base_map(extent_hooks, ind, block_size);
 	if (block == NULL) {
-		return (NULL);
+		return NULL;
 	}
 	block->size = block_size;
 	block->next = NULL;
 	assert(block_size >= header_size);
 	base_extent_init(extent_sn_next, &block->extent,
 	    (void *)((uintptr_t)block + header_size), block_size - header_size);
-	return (block);
+	return block;
 }
 
 /*
@@ -187,7 +187,7 @@ base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
 	block = base_block_alloc(extent_hooks, base_ind_get(base),
 	    &base->extent_sn_next, size, alignment);
 	if (block == NULL) {
-		return (NULL);
+		return NULL;
 	}
 	block->next = base->blocks;
 	base->blocks = block;
@@ -198,12 +198,12 @@ base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
 		assert(base->allocated <= base->resident);
 		assert(base->resident <= base->mapped);
 	}
-	return (&block->extent);
+	return &block->extent;
 }
 
 base_t *
 b0get(void) {
-	return (b0);
+	return b0;
 }
 
 base_t *
@@ -217,7 +217,7 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	block = base_block_alloc(extent_hooks, ind, &extent_sn_next,
 	    sizeof(base_t), QUANTUM);
 	if (block == NULL) {
-		return (NULL);
+		return NULL;
 	}
 
 	base_alignment = CACHELINE;
@@ -228,7 +228,7 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	base->extent_hooks = extent_hooks;
 	if (malloc_mutex_init(&base->mtx, "base", WITNESS_RANK_BASE)) {
 		base_unmap(extent_hooks, ind, block, block->size);
-		return (NULL);
+		return NULL;
 	}
 	base->extent_sn_next = extent_sn_next;
 	base->blocks = block;
@@ -245,7 +245,7 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	base_extent_bump_alloc_post(tsdn, base, &block->extent, gap_size, base,
 	    base_size);
 
-	return (base);
+	return base;
 }
 
 void
@@ -262,7 +262,7 @@ base_delete(base_t *base) {
 
 extent_hooks_t *
 base_extent_hooks_get(base_t *base) {
-	return ((extent_hooks_t *)atomic_read_p(&base->extent_hooks_pun));
+	return (extent_hooks_t *)atomic_read_p(&base->extent_hooks_pun);
 }
 
 extent_hooks_t *
@@ -276,7 +276,7 @@ base_extent_hooks_set(base_t *base, extent_hooks_t *extent_hooks) {
 	u.h = &base->extent_hooks;
 	atomic_write_p(u.v, extent_hooks);
 
-	return (old_extent_hooks);
+	return old_extent_hooks;
 }
 
 /*
@@ -319,7 +319,7 @@ base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
 	ret = base_extent_bump_alloc(tsdn, base, extent, usize, alignment);
 label_return:
 	malloc_mutex_unlock(tsdn, &base->mtx);
-	return (ret);
+	return ret;
 }
 
 void
diff --git a/src/bitmap.c b/src/bitmap.c
index 7cbc7d45..a9d48685 100644
--- a/src/bitmap.c
+++ b/src/bitmap.c
@@ -35,7 +35,7 @@ bitmap_info_init(bitmap_info_t *binfo, size_t nbits) {
 
 static size_t
 bitmap_info_ngroups(const bitmap_info_t *binfo) {
-	return (binfo->levels[binfo->nlevels].group_offset);
+	return binfo->levels[binfo->nlevels].group_offset;
 }
 
 void
@@ -80,7 +80,7 @@ bitmap_info_init(bitmap_info_t *binfo, size_t nbits) {
 
 static size_t
 bitmap_info_ngroups(const bitmap_info_t *binfo) {
-	return (binfo->ngroups);
+	return binfo->ngroups;
 }
 
 void
diff --git a/src/ckh.c b/src/ckh.c
index 0deaf809..7a652185 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -57,11 +57,11 @@ ckh_bucket_search(ckh_t *ckh, size_t bucket, const void *key) {
 	for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
 		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
 		if (cell->key != NULL && ckh->keycomp(key, cell->key)) {
-			return ((bucket << LG_CKH_BUCKET_CELLS) + i);
+			return (bucket << LG_CKH_BUCKET_CELLS) + i;
 		}
 	}
 
-	return (SIZE_T_MAX);
+	return SIZE_T_MAX;
 }
 
 /*
@@ -79,13 +79,13 @@ ckh_isearch(ckh_t *ckh, const void *key) {
 	bucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets) - 1);
 	cell = ckh_bucket_search(ckh, bucket, key);
 	if (cell != SIZE_T_MAX) {
-		return (cell);
+		return cell;
 	}
 
 	/* Search secondary bucket. */
 	bucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1);
 	cell = ckh_bucket_search(ckh, bucket, key);
-	return (cell);
+	return cell;
 }
 
 JEMALLOC_INLINE_C bool
@@ -107,11 +107,11 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
 			cell->key = key;
 			cell->data = data;
 			ckh->count++;
-			return (false);
+			return false;
 		}
 	}
 
-	return (true);
+	return true;
 }
 
 /*
@@ -181,12 +181,12 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
 		if (tbucket == argbucket) {
 			*argkey = key;
 			*argdata = data;
-			return (true);
+			return true;
 		}
 
 		bucket = tbucket;
 		if (!ckh_try_bucket_insert(ckh, bucket, key, data)) {
-			return (false);
+			return false;
 		}
 	}
 }
@@ -202,19 +202,19 @@ ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata) {
 	/* Try to insert in primary bucket. */
 	bucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets) - 1);
 	if (!ckh_try_bucket_insert(ckh, bucket, key, data)) {
-		return (false);
+		return false;
 	}
 
 	/* Try to insert in secondary bucket. */
 	bucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1);
 	if (!ckh_try_bucket_insert(ckh, bucket, key, data)) {
-		return (false);
+		return false;
 	}
 
 	/*
 	 * Try to find a place for this item via iterative eviction/relocation.
 	 */
-	return (ckh_evict_reloc_insert(ckh, bucket, argkey, argdata));
+	return ckh_evict_reloc_insert(ckh, bucket, argkey, argdata);
 }
 
 /*
@@ -234,13 +234,13 @@ ckh_rebuild(ckh_t *ckh, ckhc_t *aTab) {
 			data = aTab[i].data;
 			if (ckh_try_insert(ckh, &key, &data)) {
 				ckh->count = count;
-				return (true);
+				return true;
 			}
 			nins++;
 		}
 	}
 
-	return (false);
+	return false;
 }
 
 static bool
@@ -296,7 +296,7 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh) {
 
 	ret = false;
 label_return:
-	return (ret);
+	return ret;
 }
 
 static void
@@ -403,7 +403,7 @@ ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
 
 	ret = false;
 label_return:
-	return (ret);
+	return ret;
 }
 
 void
@@ -433,7 +433,7 @@ size_t
 ckh_count(ckh_t *ckh) {
 	assert(ckh != NULL);
 
-	return (ckh->count);
+	return ckh->count;
 }
 
 bool
@@ -450,11 +450,11 @@ ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data) {
 				*data = (void *)ckh->tab[i].data;
 			}
 			*tabind = i + 1;
-			return (false);
+			return false;
 		}
 	}
 
-	return (true);
+	return true;
 }
 
 bool
@@ -477,7 +477,7 @@ ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data) {
 
 	ret = false;
 label_return:
-	return (ret);
+	return ret;
 }
 
 bool
@@ -507,10 +507,10 @@ ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
 			ckh_shrink(tsd, ckh);
 		}
 
-		return (false);
+		return false;
 	}
 
-	return (true);
+	return true;
 }
 
 bool
@@ -527,10 +527,10 @@ ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data) {
 		if (data != NULL) {
 			*data = (void *)ckh->tab[cell].data;
 		}
-		return (false);
+		return false;
 	}
 
-	return (true);
+	return true;
 }
 
 void
@@ -543,7 +543,7 @@ ckh_string_keycomp(const void *k1, const void *k2) {
 	assert(k1 != NULL);
 	assert(k2 != NULL);
 
-	return (strcmp((char *)k1, (char *)k2) ? false : true);
+	return !strcmp((char *)k1, (char *)k2);
 }
 
 void
@@ -560,5 +560,5 @@ ckh_pointer_hash(const void *key, size_t r_hash[2]) {
 
 bool
 ckh_pointer_keycomp(const void *k1, const void *k2) {
-	return ((k1 == k2) ? true : false);
+	return (k1 == k2);
 }
diff --git a/src/ctl.c b/src/ctl.c
index 929176f2..232fbd71 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -464,12 +464,12 @@ arenas_i2a_impl(size_t i, bool compat, bool validate) {
 		break;
 	}
 
-	return (a);
+	return a;
 }
 
 static unsigned
 arenas_i2a(size_t i) {
-	return (arenas_i2a_impl(i, true, false));
+	return arenas_i2a_impl(i, true, false);
 }
 
 static ctl_arena_t *
@@ -505,14 +505,14 @@ arenas_i_impl(tsdn_t *tsdn, size_t i, bool compat, bool init) {
 	}
 
 	assert(ret == NULL || arenas_i2a(ret->arena_ind) == arenas_i2a(i));
-	return (ret);
+	return ret;
 }
 
 static ctl_arena_t *
 arenas_i(size_t i) {
 	ctl_arena_t *ret = arenas_i_impl(TSDN_NULL, i, true, false);
 	assert(ret != NULL);
-	return (ret);
+	return ret;
 }
 
 static void
@@ -692,19 +692,19 @@ ctl_arena_init(tsdn_t *tsdn, extent_hooks_t *extent_hooks) {
 
 	/* Trigger stats allocation. */
 	if (arenas_i_impl(tsdn, arena_ind, false, true) == NULL) {
-		return (UINT_MAX);
+		return UINT_MAX;
 	}
 
 	/* Initialize new arena. */
 	if (arena_init(tsdn, arena_ind, extent_hooks) == NULL) {
-		return (UINT_MAX);
+		return UINT_MAX;
 	}
 
 	if (arena_ind == ctl_arenas->narenas) {
 		ctl_arenas->narenas++;
 	}
 
-	return (arena_ind);
+	return arena_ind;
 }
 
 static void
@@ -819,7 +819,7 @@ ctl_init(tsdn_t *tsdn) {
 	ret = false;
 label_return:
 	malloc_mutex_unlock(tsdn, &ctl_mtx);
-	return (ret);
+	return ret;
 }
 
 static int
@@ -917,7 +917,7 @@ ctl_lookup(tsdn_t *tsdn, const char *name, ctl_node_t const **nodesp,
 
 	ret = 0;
 label_return:
-	return (ret);
+	return ret;
 }
 
 int
@@ -1019,12 +1019,12 @@ label_return:
 bool
 ctl_boot(void) {
 	if (malloc_mutex_init(&ctl_mtx, "ctl", WITNESS_RANK_CTL)) {
-		return (true);
+		return true;
 	}
 
 	ctl_initialized = false;
 
-	return (false);
+	return false;
 }
 
 void
@@ -1110,7 +1110,7 @@ n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
 	t oldval;							\
 									\
 	if (!(c)) {							\
-		return (ENOENT);					\
+		return ENOENT;						\
 	}								\
 	if (l) {							\
 		malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);		\
@@ -1124,7 +1124,7 @@ label_return:								\
 	if (l) {							\
 		malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);		\
 	}								\
-	return (ret);							\
+	return ret;							\
 }
 
 #define	CTL_RO_CGEN(c, n, v, t)						\
@@ -1135,7 +1135,7 @@ n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
 	t oldval;							\
 									\
 	if (!(c)) {							\
-		return (ENOENT);					\
+		return ENOENT;						\
 	}								\
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);			\
 	READONLY();							\
@@ -1145,7 +1145,7 @@ n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
 	ret = 0;							\
 label_return:								\
 	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);			\
-	return (ret);							\
+	return ret;							\
 }
 
 #define	CTL_RO_GEN(n, v, t)						\
@@ -1163,7 +1163,7 @@ n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
 	ret = 0;							\
 label_return:								\
 	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);			\
-	return (ret);							\
+	return ret;							\
 }
 
 /*
@@ -1178,7 +1178,7 @@ n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
 	t oldval;							\
 									\
 	if (!(c)) {							\
-		return (ENOENT);					\
+		return ENOENT;						\
 	}								\
 	READONLY();							\
 	oldval = (v);							\
@@ -1186,7 +1186,7 @@ n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
 									\
 	ret = 0;							\
 label_return:								\
-	return (ret);							\
+	return ret;							\
 }
 
 #define	CTL_RO_NL_GEN(n, v, t)						\
@@ -1202,7 +1202,7 @@ n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
 									\
 	ret = 0;							\
 label_return:								\
-	return (ret);							\
+	return ret;							\
 }
 
 #define	CTL_TSD_RO_NL_CGEN(c, n, m, t)					\
@@ -1213,7 +1213,7 @@ n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
 	t oldval;							\
 									\
 	if (!(c)) {							\
-		return (ENOENT);					\
+		return ENOENT;						\
 	}								\
 	READONLY();							\
 	oldval = (m(tsd));						\
@@ -1221,7 +1221,7 @@ n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
 									\
 	ret = 0;							\
 label_return:								\
-	return (ret);							\
+	return ret;							\
 }
 
 #define	CTL_RO_CONFIG_GEN(n, t)						\
@@ -1237,7 +1237,7 @@ n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
 									\
 	ret = 0;							\
 label_return:								\
-	return (ret);							\
+	return ret;							\
 }
 
 /******************************************************************************/
@@ -1260,7 +1260,7 @@ epoch_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	ret = 0;
 label_return:
 	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
-	return (ret);
+	return ret;
 }
 
 /******************************************************************************/
@@ -1316,7 +1316,7 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	oldarena = arena_choose(tsd, NULL);
 	if (oldarena == NULL) {
-		return (EAGAIN);
+		return EAGAIN;
 	}
 
 	newind = oldind = arena_ind_get(oldarena);
@@ -1350,7 +1350,7 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	ret = 0;
 label_return:
-	return (ret);
+	return ret;
 }
 
 CTL_TSD_RO_NL_CGEN(config_stats, thread_allocated, tsd_thread_allocated_get,
@@ -1369,7 +1369,7 @@ thread_tcache_enabled_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	bool oldval;
 
 	if (!config_tcache) {
-		return (ENOENT);
+		return ENOENT;
 	}
 
 	oldval = tcache_enabled_get();
@@ -1384,7 +1384,7 @@ thread_tcache_enabled_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 
 	ret = 0;
 label_return:
-	return (ret);
+	return ret;
 }
 
 static int
@@ -1393,7 +1393,7 @@ thread_tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	int ret;
 
 	if (!config_tcache) {
-		return (ENOENT);
+		return ENOENT;
 	}
 
 	READONLY();
@@ -1403,7 +1403,7 @@ thread_tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 
 	ret = 0;
 label_return:
-	return (ret);
+	return ret;
 }
 
 static int
@@ -1412,7 +1412,7 @@ thread_prof_name_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	int ret;
 
 	if (!config_prof) {
-		return (ENOENT);
+		return ENOENT;
 	}
 
 	READ_XOR_WRITE();
@@ -1434,7 +1434,7 @@ thread_prof_name_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	ret = 0;
 label_return:
-	return (ret);
+	return ret;
 }
 
 static int
@@ -1444,7 +1444,7 @@ thread_prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	bool oldval;
 
 	if (!config_prof) {
-		return (ENOENT);
+		return ENOENT;
 	}
 
 	oldval = prof_thread_active_get(tsd);
@@ -1462,7 +1462,7 @@ thread_prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	ret = 0;
 label_return:
-	return (ret);
+	return ret;
 }
 
 /******************************************************************************/
@@ -1474,7 +1474,7 @@ tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	unsigned tcache_ind;
 
 	if (!config_tcache) {
-		return (ENOENT);
+		return ENOENT;
 	}
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
@@ -1488,7 +1488,7 @@ tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	ret = 0;
 label_return:
 	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
-	return (ret);
+	return ret;
 }
 
 static int
@@ -1498,7 +1498,7 @@ tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	unsigned tcache_ind;
 
 	if (!config_tcache) {
-		return (ENOENT);
+		return ENOENT;
 	}
 
 	WRITEONLY();
@@ -1512,7 +1512,7 @@ tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	ret = 0;
 label_return:
-	return (ret);
+	return ret;
 }
 
 static int
@@ -1522,7 +1522,7 @@ tcache_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	unsigned tcache_ind;
 
 	if (!config_tcache) {
-		return (ENOENT);
+		return ENOENT;
 	}
 
 	WRITEONLY();
@@ -1536,7 +1536,7 @@ tcache_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	ret = 0;
 label_return:
-	return (ret);
+	return ret;
 }
 
 /******************************************************************************/
@@ -1560,7 +1560,7 @@ arena_i_initialized_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 
 	ret = 0;
 label_return:
-	return (ret);
+	return ret;
 }
 
 static void
@@ -1622,7 +1622,7 @@ arena_i_purge_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	ret = 0;
 label_return:
-	return (ret);
+	return ret;
 }
 
 static int
@@ -1638,7 +1638,7 @@ arena_i_decay_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	ret = 0;
 label_return:
-	return (ret);
+	return ret;
 }
 
 static int
@@ -1664,7 +1664,7 @@ arena_i_reset_destroy_helper(tsd_t *tsd, const size_t *mib, size_t miblen,
 
 	ret = 0;
 label_return:
-	return (ret);
+	return ret;
 }
 
 static int
@@ -1677,12 +1677,12 @@ arena_i_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	ret = arena_i_reset_destroy_helper(tsd, mib, miblen, oldp, oldlenp,
 	    newp, newlen, &arena_ind, &arena);
 	if (ret != 0) {
-		return (ret);
+		return ret;
 	}
 
 	arena_reset(tsd, arena);
 
-	return (ret);
+	return ret;
 }
 
 static int
@@ -1721,7 +1721,7 @@ arena_i_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	assert(ret == 0);
 label_return:
-	return (ret);
+	return ret;
 }
 
 static int
@@ -1782,7 +1782,7 @@ arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	ret = 0;
 label_return:
 	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
-	return (ret);
+	return ret;
 }
 
 static int
@@ -1817,7 +1817,7 @@ arena_i_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	ret = 0;
 label_return:
-	return (ret);
+	return ret;
 }
 
 static int
@@ -1851,7 +1851,7 @@ arena_i_extent_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	ret = 0;
 label_return:
 	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
-	return (ret);
+	return ret;
 }
 
 static const ctl_named_node_t *
@@ -1874,7 +1874,7 @@ arena_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
 	ret = super_arena_i_node;
 label_return:
 	malloc_mutex_unlock(tsdn, &ctl_mtx);
-	return (ret);
+	return ret;
 }
 
 /******************************************************************************/
@@ -1897,7 +1897,7 @@ arenas_narenas_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	ret = 0;
 label_return:
 	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
-	return (ret);
+	return ret;
 }
 
 static int
@@ -1922,7 +1922,7 @@ arenas_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	ret = 0;
 label_return:
-	return (ret);
+	return ret;
 }
 
 CTL_RO_NL_GEN(arenas_quantum, QUANTUM, size_t)
@@ -1936,9 +1936,9 @@ CTL_RO_NL_GEN(arenas_bin_i_slab_size, arena_bin_info[mib[2]].slab_size, size_t)
 static const ctl_named_node_t *
 arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
 	if (i > NBINS) {
-		return (NULL);
+		return NULL;
 	}
-	return (super_arenas_bin_i_node);
+	return super_arenas_bin_i_node;
 }
 
 CTL_RO_NL_GEN(arenas_nlextents, NSIZES - NBINS, unsigned)
@@ -1947,9 +1947,9 @@ static const ctl_named_node_t *
 arenas_lextent_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
     size_t i) {
 	if (i > NSIZES - NBINS) {
-		return (NULL);
+		return NULL;
 	}
-	return (super_arenas_lextent_i_node);
+	return super_arenas_lextent_i_node;
 }
 
 static int
@@ -1973,7 +1973,7 @@ arenas_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	ret = 0;
 label_return:
 	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
-	return (ret);
+	return ret;
 }
 
 /******************************************************************************/
@@ -1985,7 +1985,7 @@ prof_thread_active_init_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	bool oldval;
 
 	if (!config_prof) {
-		return (ENOENT);
+		return ENOENT;
 	}
 
 	if (newp != NULL) {
@@ -2002,7 +2002,7 @@ prof_thread_active_init_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 
 	ret = 0;
 label_return:
-	return (ret);
+	return ret;
 }
 
 static int
@@ -2012,7 +2012,7 @@ prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	bool oldval;
 
 	if (!config_prof) {
-		return (ENOENT);
+		return ENOENT;
 	}
 
 	if (newp != NULL) {
@@ -2028,7 +2028,7 @@ prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	ret = 0;
 label_return:
-	return (ret);
+	return ret;
 }
 
 static int
@@ -2038,7 +2038,7 @@ prof_dump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	const char *filename = NULL;
 
 	if (!config_prof) {
-		return (ENOENT);
+		return ENOENT;
 	}
 
 	WRITEONLY();
@@ -2051,7 +2051,7 @@ prof_dump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	ret = 0;
 label_return:
-	return (ret);
+	return ret;
 }
 
 static int
@@ -2061,7 +2061,7 @@ prof_gdump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	bool oldval;
 
 	if (!config_prof) {
-		return (ENOENT);
+		return ENOENT;
 	}
 
 	if (newp != NULL) {
@@ -2077,7 +2077,7 @@ prof_gdump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	ret = 0;
 label_return:
-	return (ret);
+	return ret;
 }
 
 static int
@@ -2087,7 +2087,7 @@ prof_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	size_t lg_sample = lg_prof_sample;
 
 	if (!config_prof) {
-		return (ENOENT);
+		return ENOENT;
 	}
 
 	WRITEONLY();
@@ -2100,7 +2100,7 @@ prof_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	ret = 0;
 label_return:
-	return (ret);
+	return ret;
 }
 
 CTL_RO_NL_CGEN(config_prof, prof_interval, prof_interval, uint64_t)
@@ -2180,9 +2180,9 @@ static const ctl_named_node_t *
 stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
     size_t j) {
 	if (j > NBINS) {
-		return (NULL);
+		return NULL;
 	}
-	return (super_stats_arenas_i_bins_j_node);
+	return super_stats_arenas_i_bins_j_node;
 }
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_nmalloc,
@@ -2198,9 +2198,9 @@ static const ctl_named_node_t *
 stats_arenas_i_lextents_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
     size_t j) {
 	if (j > NSIZES - NBINS) {
-		return (NULL);
+		return NULL;
 	}
-	return (super_stats_arenas_i_lextents_j_node);
+	return super_stats_arenas_i_lextents_j_node;
 }
 
 static const ctl_named_node_t *
@@ -2218,5 +2218,5 @@ stats_arenas_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
 	ret = super_stats_arenas_i_node;
 label_return:
 	malloc_mutex_unlock(tsdn, &ctl_mtx);
-	return (ret);
+	return ret;
 }
diff --git a/src/extent.c b/src/extent.c
index 5cf2e25c..bcdaccf5 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -82,12 +82,12 @@ extent_alloc(tsdn_t *tsdn, arena_t *arena) {
 	extent = ql_last(&arena->extent_cache, ql_link);
 	if (extent == NULL) {
 		malloc_mutex_unlock(tsdn, &arena->extent_cache_mtx);
-		return (base_alloc(tsdn, arena->base, sizeof(extent_t),
-		    QUANTUM));
+		return base_alloc(tsdn, arena->base, sizeof(extent_t),
+		    QUANTUM);
 	}
 	ql_tail_remove(&arena->extent_cache, extent_t, ql_link);
 	malloc_mutex_unlock(tsdn, &arena->extent_cache_mtx);
-	return (extent);
+	return extent;
 }
 
 void
@@ -100,12 +100,12 @@ extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
 
 extent_hooks_t *
 extent_hooks_get(arena_t *arena) {
-	return (base_extent_hooks_get(arena->base));
+	return base_extent_hooks_get(arena->base);
 }
 
 extent_hooks_t *
 extent_hooks_set(arena_t *arena, extent_hooks_t *extent_hooks) {
-	return (base_extent_hooks_set(arena->base, extent_hooks));
+	return base_extent_hooks_set(arena->base, extent_hooks);
 }
 
 static void
@@ -139,11 +139,11 @@ extent_size_quantize_floor(size_t size) {
 		 * PAGE-spaced size classes, but it's simplest to just handle
 		 * the one case that would cause erroneous results.
 		 */
-		return (size);
+		return size;
 	}
 	ret = pind2sz(pind - 1) + large_pad;
 	assert(ret <= size);
-	return (ret);
+	return ret;
 }
 #ifdef JEMALLOC_JET
 #undef extent_size_quantize_floor
@@ -176,7 +176,7 @@ extent_size_quantize_ceil(size_t size) {
 		 */
 		ret = pind2sz(psz2ind(ret - large_pad + 1)) + large_pad;
 	}
-	return (ret);
+	return ret;
 }
 #ifdef JEMALLOC_JET
 #undef extent_size_quantize_ceil
@@ -217,7 +217,7 @@ extent_rtree_acquire(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
 	*r_elm_a = rtree_elm_acquire(tsdn, &extents_rtree, rtree_ctx,
 	    (uintptr_t)extent_base_get(extent), dependent, init_missing);
 	if (!dependent && *r_elm_a == NULL) {
-		return (true);
+		return true;
 	}
 	assert(*r_elm_a != NULL);
 
@@ -227,14 +227,14 @@ extent_rtree_acquire(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
 		    init_missing);
 		if (!dependent && *r_elm_b == NULL) {
 			rtree_elm_release(tsdn, &extents_rtree, *r_elm_a);
-			return (true);
+			return true;
 		}
 		assert(*r_elm_b != NULL);
 	} else {
 		*r_elm_b = NULL;
 	}
 
-	return (false);
+	return false;
 }
 
 static void
@@ -308,7 +308,7 @@ extent_register(tsdn_t *tsdn, const extent_t *extent) {
 
 	if (extent_rtree_acquire(tsdn, rtree_ctx, extent, false, true, &elm_a,
 	    &elm_b)) {
-		return (true);
+		return true;
 	}
 	extent_rtree_write_acquired(tsdn, elm_a, elm_b, extent);
 	if (extent_slab_get(extent)) {
@@ -320,7 +320,7 @@ extent_register(tsdn_t *tsdn, const extent_t *extent) {
 		extent_gprof_add(tsdn, extent);
 	}
 
-	return (false);
+	return false;
 }
 
 static void
@@ -378,11 +378,11 @@ extent_first_best_fit(tsdn_t *tsdn, arena_t *arena,
 	for (i = pind; i < NPSIZES+1; i++) {
 		extent_t *extent = extent_heap_first(&extent_heaps[i]);
 		if (extent != NULL) {
-			return (extent);
+			return extent;
 		}
 	}
 
-	return (NULL);
+	return NULL;
 }
 
 static void
@@ -444,7 +444,7 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	alloc_size = size + PAGE_CEILING(alignment) - PAGE;
 	/* Beware size_t wrap-around. */
 	if (alloc_size < usize) {
-		return (NULL);
+		return NULL;
 	}
 	if (!locked) {
 		malloc_mutex_lock(tsdn, &arena->extents_mtx);
@@ -479,7 +479,7 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		if (!locked) {
 			malloc_mutex_unlock(tsdn, &arena->extents_mtx);
 		}
-		return (NULL);
+		return NULL;
 	}
 	extent_heaps_remove(tsdn, extent_heaps, extent);
 	arena_extent_cache_maybe_remove(tsdn, arena, extent, cache);
@@ -508,7 +508,7 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 			if (!locked) {
 				malloc_mutex_unlock(tsdn, &arena->extents_mtx);
 			}
-			return (NULL);
+			return NULL;
 		}
 		extent_heaps_insert(tsdn, extent_heaps, lead);
 		arena_extent_cache_maybe_insert(tsdn, arena, lead, cache);
@@ -525,7 +525,7 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 			if (!locked) {
 				malloc_mutex_unlock(tsdn, &arena->extents_mtx);
 			}
-			return (NULL);
+			return NULL;
 		}
 		extent_heaps_insert(tsdn, extent_heaps, trail);
 		arena_extent_cache_maybe_insert(tsdn, arena, trail, cache);
@@ -545,7 +545,7 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 			}
 			extent_record(tsdn, arena, r_extent_hooks, extent_heaps,
 			    cache, extent);
-			return (NULL);
+			return NULL;
 		}
 		extent_zeroed_set(extent, true);
 	}
@@ -577,7 +577,7 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 			}
 		}
 	}
-	return (extent);
+	return extent;
 }
 
 /*
@@ -598,22 +598,22 @@ extent_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 	if (have_dss && dss_prec == dss_prec_primary && (ret =
 	    extent_alloc_dss(tsdn, arena, new_addr, size, alignment, zero,
 	    commit)) != NULL) {
-		return (ret);
+		return ret;
 	}
 	/* mmap. */
 	if ((ret = extent_alloc_mmap(new_addr, size, alignment, zero, commit))
 	    != NULL) {
-		return (ret);
+		return ret;
 	}
 	/* "secondary" dss. */
 	if (have_dss && dss_prec == dss_prec_secondary && (ret =
 	    extent_alloc_dss(tsdn, arena, new_addr, size, alignment, zero,
 	    commit)) != NULL) {
-		return (ret);
+		return ret;
 	}
 
 	/* All strategies for allocation failed. */
-	return (NULL);
+	return NULL;
 }
 
 static extent_t *
@@ -628,7 +628,7 @@ extent_alloc_cache_impl(tsdn_t *tsdn, arena_t *arena,
 	extent = extent_recycle(tsdn, arena, r_extent_hooks,
 	    arena->extents_cached, locked, true, new_addr, usize, pad,
 	    alignment, zero, commit, slab);
-	return (extent);
+	return extent;
 }
 
 extent_t *
@@ -637,16 +637,16 @@ extent_alloc_cache_locked(tsdn_t *tsdn, arena_t *arena,
     size_t alignment, bool *zero, bool *commit, bool slab) {
 	malloc_mutex_assert_owner(tsdn, &arena->extents_mtx);
 
-	return (extent_alloc_cache_impl(tsdn, arena, r_extent_hooks, true,
-	    new_addr, usize, pad, alignment, zero, commit, slab));
+	return extent_alloc_cache_impl(tsdn, arena, r_extent_hooks, true,
+	    new_addr, usize, pad, alignment, zero, commit, slab);
 }
 
 extent_t *
 extent_alloc_cache(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
     size_t alignment, bool *zero, bool *commit, bool slab) {
-	return (extent_alloc_cache_impl(tsdn, arena, r_extent_hooks, false,
-	    new_addr, usize, pad, alignment, zero, commit, slab));
+	return extent_alloc_cache_impl(tsdn, arena, r_extent_hooks, false,
+	    new_addr, usize, pad, alignment, zero, commit, slab);
 }
 
 static void *
@@ -656,7 +656,7 @@ extent_alloc_default_impl(tsdn_t *tsdn, arena_t *arena, void *new_addr,
 
 	ret = extent_alloc_core(tsdn, arena, new_addr, size, alignment, zero,
 	    commit, arena->dss_prec);
-	return (ret);
+	return ret;
 }
 
 static void *
@@ -675,8 +675,8 @@ extent_alloc_default(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
 	 */
 	assert(arena != NULL);
 
-	return (extent_alloc_default_impl(tsdn, arena, new_addr, size,
-	    alignment, zero, commit));
+	return extent_alloc_default_impl(tsdn, arena, new_addr, size,
+	    alignment, zero, commit);
 }
 
 static void
@@ -714,14 +714,14 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	alloc_size_min = size + PAGE_CEILING(alignment) - PAGE;
 	/* Beware size_t wrap-around. */
 	if (alloc_size_min < usize) {
-		return (NULL);
+		return NULL;
 	}
 	if (alloc_size < alloc_size_min) {
-		return (NULL);
+		return NULL;
 	}
 	extent = extent_alloc(tsdn, arena);
 	if (extent == NULL) {
-		return (NULL);
+		return NULL;
 	}
 	zeroed = false;
 	committed = false;
@@ -731,7 +731,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	    arena_extent_sn_next(arena), false, zeroed, committed, false);
 	if (ptr == NULL || extent_register(tsdn, extent)) {
 		extent_dalloc(tsdn, arena, extent);
-		return (NULL);
+		return NULL;
 	}
 	/*
 	 * Set the extent as active *after registration so that no gprof-related
@@ -759,7 +759,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 		if (extent == NULL) {
 			extent_deregister(tsdn, lead);
 			extent_leak(tsdn, arena, r_extent_hooks, false, lead);
-			return (NULL);
+			return NULL;
 		}
 		extent_retain(tsdn, arena, r_extent_hooks, lead);
 	}
@@ -771,7 +771,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 		if (trail == NULL) {
 			extent_deregister(tsdn, extent);
 			extent_leak(tsdn, arena, r_extent_hooks, false, extent);
-			return (NULL);
+			return NULL;
 		}
 		extent_retain(tsdn, arena, r_extent_hooks, trail);
 	} else if (leadsize == 0) {
@@ -786,7 +786,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 		if (extent_commit_wrapper(tsdn, arena, r_extent_hooks, extent,
 		    0, extent_size_get(extent))) {
 			extent_retain(tsdn, arena, r_extent_hooks, extent);
-			return (NULL);
+			return NULL;
 		}
 		extent_zeroed_set(extent, true);
 	}
@@ -812,7 +812,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	if (arena->extent_grow_next + 1 < NPSIZES) {
 		arena->extent_grow_next++;
 	}
-	return (extent);
+	return extent;
 }
 
 static extent_t *
@@ -841,7 +841,7 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
 		    new_addr, usize, pad, alignment, zero, commit, slab);
 	}
 
-	return (extent);
+	return extent;
 }
 
 static extent_t *
@@ -855,7 +855,7 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 	size = usize + pad;
 	extent = extent_alloc(tsdn, arena);
 	if (extent == NULL) {
-		return (NULL);
+		return NULL;
 	}
 	if (*r_extent_hooks == &extent_hooks_default) {
 		/* Call directly to propagate tsdn. */
@@ -867,7 +867,7 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 	}
 	if (addr == NULL) {
 		extent_dalloc(tsdn, arena, extent);
-		return (NULL);
+		return NULL;
 	}
 	extent_init(extent, arena, addr, size, usize,
 	    arena_extent_sn_next(arena), true, zero, commit, slab);
@@ -876,10 +876,10 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 	}
 	if (extent_register(tsdn, extent)) {
 		extent_leak(tsdn, arena, r_extent_hooks, false, extent);
-		return (NULL);
+		return NULL;
 	}
 
-	return (extent);
+	return extent;
 }
 
 extent_t *
@@ -897,25 +897,25 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 		    new_addr, usize, pad, alignment, zero, commit, slab);
 	}
 
-	return (extent);
+	return extent;
 }
 
 static bool
 extent_can_coalesce(const extent_t *a, const extent_t *b) {
 	if (extent_arena_get(a) != extent_arena_get(b)) {
-		return (false);
+		return false;
 	}
 	if (extent_active_get(a) != extent_active_get(b)) {
-		return (false);
+		return false;
 	}
 	if (extent_committed_get(a) != extent_committed_get(b)) {
-		return (false);
+		return false;
 	}
 	if (extent_retained_get(a) != extent_retained_get(b)) {
-		return (false);
+		return false;
 	}
 
-	return (true);
+	return true;
 }
 
 static void
@@ -1016,9 +1016,9 @@ extent_dalloc_cache(tsdn_t *tsdn, arena_t *arena,
 static bool
 extent_dalloc_default_impl(void *addr, size_t size) {
 	if (!have_dss || !extent_in_dss(addr)) {
-		return (extent_dalloc_mmap(addr, size));
+		return extent_dalloc_mmap(addr, size);
 	}
-	return (true);
+	return true;
 }
 
 static bool
@@ -1026,7 +1026,7 @@ extent_dalloc_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
     bool committed, unsigned arena_ind) {
 	assert(extent_hooks == &extent_hooks_default);
 
-	return (extent_dalloc_default_impl(addr, size));
+	return extent_dalloc_default_impl(addr, size);
 }
 
 bool
@@ -1060,7 +1060,7 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena,
 		extent_dalloc(tsdn, arena, extent);
 	}
 
-	return (err);
+	return err;
 }
 
 void
@@ -1110,8 +1110,8 @@ extent_commit_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
 	assert(extent_hooks == &extent_hooks_default);
 
-	return (pages_commit((void *)((uintptr_t)addr + (uintptr_t)offset),
-	    length));
+	return pages_commit((void *)((uintptr_t)addr + (uintptr_t)offset),
+	    length);
 }
 
 bool
@@ -1125,7 +1125,7 @@ extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
 	    (*r_extent_hooks)->commit(*r_extent_hooks, extent_base_get(extent),
 	    extent_size_get(extent), offset, length, arena_ind_get(arena)));
 	extent_committed_set(extent, extent_committed_get(extent) || !err);
-	return (err);
+	return err;
 }
 
 static bool
@@ -1133,8 +1133,8 @@ extent_decommit_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
 	assert(extent_hooks == &extent_hooks_default);
 
-	return (pages_decommit((void *)((uintptr_t)addr + (uintptr_t)offset),
-	    length));
+	return pages_decommit((void *)((uintptr_t)addr + (uintptr_t)offset),
+	    length);
 }
 
 bool
@@ -1150,7 +1150,7 @@ extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
 	    extent_base_get(extent), extent_size_get(extent), offset, length,
 	    arena_ind_get(arena)));
 	extent_committed_set(extent, extent_committed_get(extent) && err);
-	return (err);
+	return err;
 }
 
 #ifdef PAGES_CAN_PURGE_LAZY
@@ -1163,8 +1163,8 @@ extent_purge_lazy_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	assert(length != 0);
 	assert((length & PAGE_MASK) == 0);
 
-	return (pages_purge_lazy((void *)((uintptr_t)addr + (uintptr_t)offset),
-	    length));
+	return pages_purge_lazy((void *)((uintptr_t)addr + (uintptr_t)offset),
+	    length);
 }
 #endif
 
@@ -1189,8 +1189,8 @@ extent_purge_forced_default(extent_hooks_t *extent_hooks, void *addr,
 	assert(length != 0);
 	assert((length & PAGE_MASK) == 0);
 
-	return (pages_purge_forced((void *)((uintptr_t)addr +
-	    (uintptr_t)offset), length));
+	return pages_purge_forced((void *)((uintptr_t)addr +
+	    (uintptr_t)offset), length);
 }
 #endif
 
@@ -1211,10 +1211,7 @@ extent_split_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t size_a, size_t size_b, bool committed, unsigned arena_ind) {
 	assert(extent_hooks == &extent_hooks_default);
 
-	if (!maps_coalesce) {
-		return (true);
-	}
-	return (false);
+	return !maps_coalesce;
 }
 #endif
 
@@ -1232,7 +1229,7 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 
 	if ((*r_extent_hooks)->split == NULL) {
-		return (NULL);
+		return NULL;
 	}
 
 	trail = extent_alloc(tsdn, arena);
@@ -1278,7 +1275,7 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
 	extent_rtree_release(tsdn, lead_elm_a, lead_elm_b);
 	extent_rtree_release(tsdn, trail_elm_a, trail_elm_b);
 
-	return (trail);
+	return trail;
 label_error_d:
 	extent_rtree_release(tsdn, trail_elm_a, trail_elm_b);
 label_error_c:
@@ -1286,19 +1283,19 @@ label_error_c:
 label_error_b:
 	extent_dalloc(tsdn, arena, trail);
 label_error_a:
-	return (NULL);
+	return NULL;
 }
 
 static bool
 extent_merge_default_impl(void *addr_a, void *addr_b) {
 	if (!maps_coalesce) {
-		return (true);
+		return true;
 	}
 	if (have_dss && !extent_dss_mergeable(addr_a, addr_b)) {
-		return (true);
+		return true;
 	}
 
-	return (false);
+	return false;
 }
 
 #ifdef JEMALLOC_MAPS_COALESCE
@@ -1307,7 +1304,7 @@ extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
     void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
 	assert(extent_hooks == &extent_hooks_default);
 
-	return (extent_merge_default_impl(addr_a, addr_b));
+	return extent_merge_default_impl(addr_a, addr_b);
 }
 #endif
 
@@ -1322,7 +1319,7 @@ extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 
 	if ((*r_extent_hooks)->merge == NULL) {
-		return (true);
+		return true;
 	}
 
 	if (*r_extent_hooks == &extent_hooks_default) {
@@ -1337,7 +1334,7 @@ extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
 	}
 
 	if (err) {
-		return (true);
+		return true;
 	}
 
 	/*
@@ -1372,19 +1369,19 @@ extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
 
 	extent_dalloc(tsdn, extent_arena_get(b), b);
 
-	return (false);
+	return false;
 }
 
 bool
 extent_boot(void) {
 	if (rtree_new(&extents_rtree, (unsigned)((ZU(1) << (LG_SIZEOF_PTR+3)) -
 	    LG_PAGE))) {
-		return (true);
+		return true;
 	}
 
 	if (have_dss) {
 		extent_dss_boot();
 	}
 
-	return (false);
+	return false;
 }
diff --git a/src/extent_dss.c b/src/extent_dss.c
index d61d5464..93bd6fba 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -32,10 +32,10 @@ static void		*dss_max;
 static void *
 extent_dss_sbrk(intptr_t increment) {
 #ifdef JEMALLOC_DSS
-	return (sbrk(increment));
+	return sbrk(increment);
 #else
 	not_implemented();
-	return (NULL);
+	return NULL;
 #endif
 }
 
@@ -44,10 +44,10 @@ extent_dss_prec_get(void) {
 	dss_prec_t ret;
 
 	if (!have_dss) {
-		return (dss_prec_disabled);
+		return dss_prec_disabled;
 	}
 	ret = (dss_prec_t)atomic_read_u(&dss_prec_default);
-	return (ret);
+	return ret;
 }
 
 bool
@@ -56,7 +56,7 @@ extent_dss_prec_set(dss_prec_t dss_prec) {
 		return (dss_prec != dss_prec_disabled);
 	}
 	atomic_write_u(&dss_prec_default, (unsigned)dss_prec);
-	return (false);
+	return false;
 }
 
 static void *
@@ -87,10 +87,10 @@ extent_dss_max_update(void *new_addr) {
 	}
 	/* Fixed new_addr can only be supported if it is at the edge of DSS. */
 	if (new_addr != NULL && max_cur != new_addr) {
-		return (NULL);
+		return NULL;
 	}
 
-	return (max_cur);
+	return max_cur;
 }
 
 void *
@@ -107,12 +107,12 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 	 * interpret a large allocation request as a negative increment.
 	 */
 	if ((intptr_t)size < 0) {
-		return (NULL);
+		return NULL;
 	}
 
 	gap = extent_alloc(tsdn, arena);
 	if (gap == NULL) {
-		return (NULL);
+		return NULL;
 	}
 
 	if (!atomic_read_u(&dss_exhausted)) {
@@ -187,7 +187,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 						memset(ret, 0, size);
 					}
 				}
-				return (ret);
+				return ret;
 			}
 			/*
 			 * Failure, whether due to OOM or a race with a raw
@@ -207,7 +207,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 	}
 label_oom:
 	extent_dalloc(tsdn, arena, gap);
-	return (NULL);
+	return NULL;
 }
 
 static bool
@@ -220,7 +220,7 @@ bool
 extent_in_dss(void *addr) {
 	cassert(have_dss);
 
-	return (extent_in_dss_helper(addr, atomic_read_p(&dss_max)));
+	return extent_in_dss_helper(addr, atomic_read_p(&dss_max));
 }
 
 bool
@@ -231,7 +231,7 @@ extent_dss_mergeable(void *addr_a, void *addr_b) {
 
 	if ((uintptr_t)addr_a < (uintptr_t)dss_base && (uintptr_t)addr_b <
 	    (uintptr_t)dss_base) {
-		return (true);
+		return true;
 	}
 
 	max = atomic_read_p(&dss_max);
diff --git a/src/extent_mmap.c b/src/extent_mmap.c
index 2c00b588..495d9beb 100644
--- a/src/extent_mmap.c
+++ b/src/extent_mmap.c
@@ -12,14 +12,14 @@ extent_alloc_mmap_slow(size_t size, size_t alignment, bool *zero,
 	alloc_size = size + alignment - PAGE;
 	/* Beware size_t wrap-around. */
 	if (alloc_size < size) {
-		return (NULL);
+		return NULL;
 	}
 	do {
 		void *pages;
 		size_t leadsize;
 		pages = pages_map(NULL, alloc_size, commit);
 		if (pages == NULL) {
-			return (NULL);
+			return NULL;
 		}
 		leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment) -
 		    (uintptr_t)pages;
@@ -28,7 +28,7 @@ extent_alloc_mmap_slow(size_t size, size_t alignment, bool *zero,
 
 	assert(ret != NULL);
 	*zero = true;
-	return (ret);
+	return ret;
 }
 
 void *
@@ -54,18 +54,18 @@ extent_alloc_mmap(void *new_addr, size_t size, size_t alignment, bool *zero,
 
 	ret = pages_map(new_addr, size, commit);
 	if (ret == NULL || ret == new_addr) {
-		return (ret);
+		return ret;
 	}
 	assert(new_addr == NULL);
 	offset = ALIGNMENT_ADDR2OFFSET(ret, alignment);
 	if (offset != 0) {
 		pages_unmap(ret, size);
-		return (extent_alloc_mmap_slow(size, alignment, zero, commit));
+		return extent_alloc_mmap_slow(size, alignment, zero, commit);
 	}
 
 	assert(ret != NULL);
 	*zero = true;
-	return (ret);
+	return ret;
 }
 
 bool
@@ -73,5 +73,5 @@ extent_dalloc_mmap(void *addr, size_t size) {
 	if (config_munmap) {
 		pages_unmap(addr, size);
 	}
-	return (!config_munmap);
+	return !config_munmap;
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 2de42c3e..67b430f4 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -280,17 +280,17 @@ malloc_initialized(void) {
 JEMALLOC_ALWAYS_INLINE_C bool
 malloc_init_a0(void) {
 	if (unlikely(malloc_init_state == malloc_init_uninitialized)) {
-		return (malloc_init_hard_a0());
+		return malloc_init_hard_a0();
 	}
-	return (false);
+	return false;
 }
 
 JEMALLOC_ALWAYS_INLINE_C bool
 malloc_init(void) {
 	if (unlikely(!malloc_initialized()) && malloc_init_hard()) {
-		return (true);
+		return true;
 	}
-	return (false);
+	return false;
 }
 
 /*
@@ -301,11 +301,11 @@ malloc_init(void) {
 static void *
 a0ialloc(size_t size, bool zero, bool is_internal) {
 	if (unlikely(malloc_init_a0())) {
-		return (NULL);
+		return NULL;
 	}
 
-	return (iallocztm(TSDN_NULL, size, size2index(size), zero, NULL,
-	    is_internal, arena_get(TSDN_NULL, 0, true), true));
+	return iallocztm(TSDN_NULL, size, size2index(size), zero, NULL,
+	    is_internal, arena_get(TSDN_NULL, 0, true), true);
 }
 
 static void
@@ -315,7 +315,7 @@ a0idalloc(extent_t *extent, void *ptr, bool is_internal) {
 
 void *
 a0malloc(size_t size) {
-	return (a0ialloc(size, false, true));
+	return a0ialloc(size, false, true);
 }
 
 void
@@ -335,7 +335,7 @@ bootstrap_malloc(size_t size) {
 		size = 1;
 	}
 
-	return (a0ialloc(size, false, false));
+	return a0ialloc(size, false, false);
 }
 
 void *
@@ -348,7 +348,7 @@ bootstrap_calloc(size_t num, size_t size) {
 		num_size = 1;
 	}
 
-	return (a0ialloc(num_size, true, false));
+	return a0ialloc(num_size, true, false);
 }
 
 void
@@ -377,7 +377,7 @@ narenas_total_inc(void) {
 
 unsigned
 narenas_total_get(void) {
-	return (atomic_read_u(&narenas_total));
+	return atomic_read_u(&narenas_total);
 }
 
 /* Create a new arena and insert it into the arenas array at index ind. */
@@ -387,7 +387,7 @@ arena_init_locked(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 
 	assert(ind <= narenas_total_get());
 	if (ind > MALLOCX_ARENA_MAX) {
-		return (NULL);
+		return NULL;
 	}
 	if (ind == narenas_total_get()) {
 		narenas_total_inc();
@@ -400,13 +400,13 @@ arena_init_locked(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	arena = arena_get(tsdn, ind, false);
 	if (arena != NULL) {
 		assert(ind < narenas_auto);
-		return (arena);
+		return arena;
 	}
 
 	/* Actually initialize the arena. */
 	arena = arena_new(tsdn, ind, extent_hooks);
 	arena_set(ind, arena);
-	return (arena);
+	return arena;
 }
 
 arena_t *
@@ -416,7 +416,7 @@ arena_init(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	malloc_mutex_lock(tsdn, &arenas_lock);
 	arena = arena_init_locked(tsdn, ind, extent_hooks);
 	malloc_mutex_unlock(tsdn, &arenas_lock);
-	return (arena);
+	return arena;
 }
 
 static void
@@ -534,7 +534,7 @@ label_return:
 	if (arenas_tdata_old != NULL) {
 		a0dalloc(arenas_tdata_old);
 	}
-	return (tdata);
+	return tdata;
 }
 
 /* Slow path, called only by arena_choose(). */
@@ -612,7 +612,7 @@ arena_choose_hard(tsd_t *tsd, bool internal) {
 				if (arena == NULL) {
 					malloc_mutex_unlock(tsd_tsdn(tsd),
 					    &arenas_lock);
-					return (NULL);
+					return NULL;
 				}
 				if (!!j == internal) {
 					ret = arena;
@@ -627,7 +627,7 @@ arena_choose_hard(tsd_t *tsd, bool internal) {
 		arena_bind(tsd, 0, true);
 	}
 
-	return (ret);
+	return ret;
 }
 
 void
@@ -714,10 +714,10 @@ static char *
 secure_getenv(const char *name) {
 #  ifdef JEMALLOC_HAVE_ISSETUGID
 	if (issetugid() != 0) {
-		return (NULL);
+		return NULL;
 	}
 #  endif
-	return (getenv(name));
+	return getenv(name);
 }
 #endif
 
@@ -785,10 +785,10 @@ malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
 				malloc_write("<jemalloc>: Conf string ends "
 				    "with key\n");
 			}
-			return (true);
+			return true;
 		default:
 			malloc_write("<jemalloc>: Malformed conf string\n");
-			return (true);
+			return true;
 		}
 	}
 
@@ -821,7 +821,7 @@ malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
 	}
 
 	*opts_p = opts;
-	return (false);
+	return false;
 }
 
 static void
@@ -1147,7 +1147,7 @@ malloc_init_hard_needed(void) {
 		 * acquired init_lock, or this thread is the initializing
 		 * thread, and it is recursively allocating.
 		 */
-		return (false);
+		return false;
 	}
 #ifdef JEMALLOC_THREADED_INIT
 	if (malloc_initializer != NO_INITIALIZER && !IS_INITIALIZER) {
@@ -1160,10 +1160,10 @@ malloc_init_hard_needed(void) {
 			spin_adaptive(&spinner);
 			malloc_mutex_lock(TSDN_NULL, &init_lock);
 		} while (!malloc_initialized());
-		return (false);
+		return false;
 	}
 #endif
-	return (true);
+	return true;
 }
 
 static bool
@@ -1185,23 +1185,23 @@ malloc_init_hard_a0_locked() {
 	}
 	pages_boot();
 	if (base_boot(TSDN_NULL)) {
-		return (true);
+		return true;
 	}
 	if (extent_boot()) {
-		return (true);
+		return true;
 	}
 	if (ctl_boot()) {
-		return (true);
+		return true;
 	}
 	if (config_prof) {
 		prof_boot1();
 	}
 	arena_boot();
 	if (config_tcache && tcache_boot(TSDN_NULL)) {
-		return (true);
+		return true;
 	}
 	if (malloc_mutex_init(&arenas_lock, "arenas", WITNESS_RANK_ARENAS)) {
-		return (true);
+		return true;
 	}
 	/*
 	 * Create enough scaffolding to allow recursive allocation in
@@ -1217,12 +1217,12 @@ malloc_init_hard_a0_locked() {
 	 */
 	if (arena_init(TSDN_NULL, 0, (extent_hooks_t *)&extent_hooks_default)
 	    == NULL) {
-		return (true);
+		return true;
 	}
 
 	malloc_init_state = malloc_init_a0_initialized;
 
-	return (false);
+	return false;
 }
 
 static bool
@@ -1232,7 +1232,7 @@ malloc_init_hard_a0(void) {
 	malloc_mutex_lock(TSDN_NULL, &init_lock);
 	ret = malloc_init_hard_a0_locked();
 	malloc_mutex_unlock(TSDN_NULL, &init_lock);
-	return (ret);
+	return ret;
 }
 
 /* Initialize data structures which may trigger recursive allocation. */
@@ -1252,17 +1252,17 @@ malloc_init_hard_recursible(void) {
 		if (opt_abort) {
 			abort();
 		}
-		return (true);
+		return true;
 	}
 #endif
 
-	return (false);
+	return false;
 }
 
 static bool
 malloc_init_hard_finish(tsdn_t *tsdn) {
 	if (malloc_mutex_boot()) {
-		return (true);
+		return true;
 	}
 
 	if (opt_narenas == 0) {
@@ -1291,7 +1291,7 @@ malloc_init_hard_finish(tsdn_t *tsdn) {
 	arenas = (arena_t **)base_alloc(tsdn, a0->base, sizeof(arena_t *) *
 	    (MALLOCX_ARENA_MAX+1), CACHELINE);
 	if (arenas == NULL) {
-		return (true);
+		return true;
 	}
 	/* Copy the pointer to the one arena that was already initialized. */
 	arena_set(0, a0);
@@ -1299,7 +1299,7 @@ malloc_init_hard_finish(tsdn_t *tsdn) {
 	malloc_init_state = malloc_init_initialized;
 	malloc_slow_flag_init();
 
-	return (false);
+	return false;
 }
 
 static bool
@@ -1312,39 +1312,39 @@ malloc_init_hard(void) {
 	malloc_mutex_lock(TSDN_NULL, &init_lock);
 	if (!malloc_init_hard_needed()) {
 		malloc_mutex_unlock(TSDN_NULL, &init_lock);
-		return (false);
+		return false;
 	}
 
 	if (malloc_init_state != malloc_init_a0_initialized &&
 	    malloc_init_hard_a0_locked()) {
 		malloc_mutex_unlock(TSDN_NULL, &init_lock);
-		return (true);
+		return true;
 	}
 
 	malloc_mutex_unlock(TSDN_NULL, &init_lock);
 	/* Recursive allocation relies on functional tsd. */
 	tsd = malloc_tsd_boot0();
 	if (tsd == NULL) {
-		return (true);
+		return true;
 	}
 	if (malloc_init_hard_recursible()) {
-		return (true);
+		return true;
 	}
 	malloc_mutex_lock(tsd_tsdn(tsd), &init_lock);
 
 	if (config_prof && prof_boot2(tsd)) {
 		malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
-		return (true);
+		return true;
 	}
 
 	if (malloc_init_hard_finish(tsd_tsdn(tsd))) {
 		malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
-		return (true);
+		return true;
 	}
 
 	malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
 	malloc_tsd_boot1();
-	return (false);
+	return false;
 }
 
 /*
@@ -1679,8 +1679,6 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts) {
 
 	witness_assert_lockless(tsd_tsdn(tsd));
 
-
-
 	/* Success! */
 	*dopts->result = allocation;
 	return 0;
@@ -1829,7 +1827,7 @@ je_aligned_alloc(size_t alignment, size_t size) {
 	dopts.alignment = alignment;
 
 	imalloc(&sopts, &dopts);
-	return (ret);
+	return ret;
 }
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
@@ -1864,13 +1862,13 @@ irealloc_prof_sample(tsd_t *tsd, extent_t *extent, void *old_ptr,
 	void *p;
 
 	if (tctx == NULL) {
-		return (NULL);
+		return NULL;
 	}
 	if (usize <= SMALL_MAXCLASS) {
 		p = iralloc(tsd, extent, old_ptr, old_usize, LARGE_MINCLASS, 0,
 		    false);
 		if (p == NULL) {
-			return (NULL);
+			return NULL;
 		}
 		arena_prof_promote(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), p), p,
 		    usize);
@@ -1878,7 +1876,7 @@ irealloc_prof_sample(tsd_t *tsd, extent_t *extent, void *old_ptr,
 		p = iralloc(tsd, extent, old_ptr, old_usize, usize, 0, false);
 	}
 
-	return (p);
+	return p;
 }
 
 JEMALLOC_ALWAYS_INLINE_C void *
@@ -1901,13 +1899,13 @@ irealloc_prof(tsd_t *tsd, extent_t *old_extent, void *old_ptr, size_t old_usize,
 	}
 	if (unlikely(p == NULL)) {
 		prof_alloc_rollback(tsd, tctx, true);
-		return (NULL);
+		return NULL;
 	}
 	extent = (p == old_ptr) ? old_extent : iealloc(tsd_tsdn(tsd), p);
 	prof_realloc(tsd, extent, p, usize, tctx, prof_active, true, old_extent,
 	    old_ptr, old_usize, old_tctx);
 
-	return (p);
+	return p;
 }
 
 JEMALLOC_INLINE_C void
@@ -1977,7 +1975,7 @@ je_realloc(void *ptr, size_t size) {
 			UTRACE(ptr, 0, 0);
 			tsd = tsd_fetch();
 			ifree(tsd, ptr, tcache_get(tsd, false), true);
-			return (NULL);
+			return NULL;
 		}
 		size = 1;
 	}
@@ -2029,7 +2027,7 @@ je_realloc(void *ptr, size_t size) {
 	}
 	UTRACE(ptr, size, ret);
 	witness_assert_lockless(tsdn);
-	return (ret);
+	return ret;
 }
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
@@ -2113,7 +2111,7 @@ je_valloc(size_t size) {
 
 	imalloc(&sopts, &dopts);
 
-	return (ret);
+	return ret;
 }
 #endif
 
@@ -2226,13 +2224,13 @@ irallocx_prof_sample(tsdn_t *tsdn, extent_t *extent, void *old_ptr,
 	void *p;
 
 	if (tctx == NULL) {
-		return (NULL);
+		return NULL;
 	}
 	if (usize <= SMALL_MAXCLASS) {
 		p = iralloct(tsdn, extent, old_ptr, old_usize, LARGE_MINCLASS,
 		    alignment, zero, tcache, arena);
 		if (p == NULL) {
-			return (NULL);
+			return NULL;
 		}
 		arena_prof_promote(tsdn, iealloc(tsdn, p), p, usize);
 	} else {
@@ -2240,7 +2238,7 @@ irallocx_prof_sample(tsdn_t *tsdn, extent_t *extent, void *old_ptr,
 		    zero, tcache, arena);
 	}
 
-	return (p);
+	return p;
 }
 
 JEMALLOC_ALWAYS_INLINE_C void *
@@ -2264,7 +2262,7 @@ irallocx_prof(tsd_t *tsd, extent_t *old_extent, void *old_ptr, size_t old_usize,
 	}
 	if (unlikely(p == NULL)) {
 		prof_alloc_rollback(tsd, tctx, false);
-		return (NULL);
+		return NULL;
 	}
 
 	if (p == old_ptr && alignment != 0) {
@@ -2284,7 +2282,7 @@ irallocx_prof(tsd_t *tsd, extent_t *old_extent, void *old_ptr, size_t old_usize,
 	prof_realloc(tsd, extent, p, *usize, tctx, prof_active, false,
 	    old_extent, old_ptr, old_usize, old_tctx);
 
-	return (p);
+	return p;
 }
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
@@ -2359,7 +2357,7 @@ je_rallocx(void *ptr, size_t size, int flags) {
 	}
 	UTRACE(ptr, size, p);
 	witness_assert_lockless(tsd_tsdn(tsd));
-	return (p);
+	return p;
 label_oom:
 	if (config_xmalloc && unlikely(opt_xmalloc)) {
 		malloc_write("<jemalloc>: Error in rallocx(): out of memory\n");
@@ -2367,7 +2365,7 @@ label_oom:
 	}
 	UTRACE(ptr, size, 0);
 	witness_assert_lockless(tsd_tsdn(tsd));
-	return (NULL);
+	return NULL;
 }
 
 JEMALLOC_ALWAYS_INLINE_C size_t
@@ -2377,11 +2375,11 @@ ixallocx_helper(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t old_usize,
 
 	if (ixalloc(tsdn, extent, ptr, old_usize, size, extra, alignment,
 	    zero)) {
-		return (old_usize);
+		return old_usize;
 	}
 	usize = isalloc(tsdn, extent, ptr);
 
-	return (usize);
+	return usize;
 }
 
 static size_t
@@ -2391,12 +2389,12 @@ ixallocx_prof_sample(tsdn_t *tsdn, extent_t *extent, void *ptr,
 	size_t usize;
 
 	if (tctx == NULL) {
-		return (old_usize);
+		return old_usize;
 	}
 	usize = ixallocx_helper(tsdn, extent, ptr, old_usize, size, extra,
 	    alignment, zero);
 
-	return (usize);
+	return usize;
 }
 
 JEMALLOC_ALWAYS_INLINE_C size_t
@@ -2440,12 +2438,12 @@ ixallocx_prof(tsd_t *tsd, extent_t *extent, void *ptr, size_t old_usize,
 	}
 	if (usize == old_usize) {
 		prof_alloc_rollback(tsd, tctx, false);
-		return (usize);
+		return usize;
 	}
 	prof_realloc(tsd, extent, ptr, usize, tctx, prof_active, false, extent,
 	    ptr, old_usize, old_tctx);
 
-	return (usize);
+	return usize;
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
@@ -2501,7 +2499,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 label_not_resized:
 	UTRACE(ptr, size, ptr);
 	witness_assert_lockless(tsd_tsdn(tsd));
-	return (usize);
+	return usize;
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
@@ -2522,7 +2520,7 @@ je_sallocx(const void *ptr, int flags) {
 	}
 
 	witness_assert_lockless(tsdn);
-	return (usize);
+	return usize;
 }
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
@@ -2566,7 +2564,7 @@ inallocx(tsdn_t *tsdn, size_t size, int flags) {
 		usize = sa2u(size, MALLOCX_ALIGN_GET_SPECIFIED(flags));
 	}
 	witness_assert_lockless(tsdn);
-	return (usize);
+	return usize;
 }
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
@@ -2612,7 +2610,7 @@ je_nallocx(size_t size, int flags) {
 	assert(size != 0);
 
 	if (unlikely(malloc_init())) {
-		return (0);
+		return 0;
 	}
 
 	tsdn = tsdn_fetch();
@@ -2620,11 +2618,11 @@ je_nallocx(size_t size, int flags) {
 
 	usize = inallocx(tsdn, size, flags);
 	if (unlikely(usize > LARGE_MAXCLASS)) {
-		return (0);
+		return 0;
 	}
 
 	witness_assert_lockless(tsdn);
-	return (usize);
+	return usize;
 }
 
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
@@ -2634,14 +2632,14 @@ je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
 	tsd_t *tsd;
 
 	if (unlikely(malloc_init())) {
-		return (EAGAIN);
+		return EAGAIN;
 	}
 
 	tsd = tsd_fetch();
 	witness_assert_lockless(tsd_tsdn(tsd));
 	ret = ctl_byname(tsd, name, oldp, oldlenp, newp, newlen);
 	witness_assert_lockless(tsd_tsdn(tsd));
-	return (ret);
+	return ret;
 }
 
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
@@ -2650,14 +2648,14 @@ je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) {
 	tsdn_t *tsdn;
 
 	if (unlikely(malloc_init())) {
-		return (EAGAIN);
+		return EAGAIN;
 	}
 
 	tsdn = tsdn_fetch();
 	witness_assert_lockless(tsdn);
 	ret = ctl_nametomib(tsdn, name, mibp, miblenp);
 	witness_assert_lockless(tsdn);
-	return (ret);
+	return ret;
 }
 
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
@@ -2667,14 +2665,14 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	tsd_t *tsd;
 
 	if (unlikely(malloc_init())) {
-		return (EAGAIN);
+		return EAGAIN;
 	}
 
 	tsd = tsd_fetch();
 	witness_assert_lockless(tsd_tsdn(tsd));
 	ret = ctl_bymib(tsd, mib, miblen, oldp, oldlenp, newp, newlen);
 	witness_assert_lockless(tsd_tsdn(tsd));
-	return (ret);
+	return ret;
 }
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
@@ -2706,7 +2704,7 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) {
 	}
 
 	witness_assert_lockless(tsdn);
-	return (ret);
+	return ret;
 }
 
 /*
diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index 030ff995..394fbffe 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -36,7 +36,7 @@ void *
 newImpl(std::size_t size) noexcept(IsNoExcept) {
 	void *ptr = je_malloc(size);
 	if (likely(ptr != nullptr))
-		return (ptr);
+		return ptr;
 
 	while (ptr == nullptr) {
 		std::new_handler handler;
@@ -62,27 +62,27 @@ newImpl(std::size_t size) noexcept(IsNoExcept) {
 
 	if (ptr == nullptr && !IsNoExcept)
 		std::__throw_bad_alloc();
-	return (ptr);
+	return ptr;
 }
 
 void *
 operator new(std::size_t size) {
-	return (newImpl<false>(size));
+	return newImpl<false>(size);
 }
 
 void *
 operator new[](std::size_t size) {
-	return (newImpl<false>(size));
+	return newImpl<false>(size);
 }
 
 void *
 operator new(std::size_t size, const std::nothrow_t &) noexcept {
-	return (newImpl<true>(size));
+	return newImpl<true>(size);
 }
 
 void *
 operator new[](std::size_t size, const std::nothrow_t &) noexcept {
-	return (newImpl<true>(size));
+	return newImpl<true>(size);
 }
 
 void
diff --git a/src/large.c b/src/large.c
index 0f2f1763..62d4441f 100644
--- a/src/large.c
+++ b/src/large.c
@@ -7,7 +7,7 @@ void *
 large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero) {
 	assert(usize == s2u(usize));
 
-	return (large_palloc(tsdn, arena, usize, CACHELINE, zero));
+	return large_palloc(tsdn, arena, usize, CACHELINE, zero);
 }
 
 void *
@@ -22,7 +22,7 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 
 	ausize = sa2u(usize, alignment);
 	if (unlikely(ausize == 0 || ausize > LARGE_MAXCLASS)) {
-		return (NULL);
+		return NULL;
 	}
 
 	/*
@@ -35,7 +35,7 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	}
 	if (unlikely(arena == NULL) || (extent = arena_extent_alloc_large(tsdn,
 	    arena, usize, alignment, &is_zeroed)) == NULL) {
-		return (NULL);
+		return NULL;
 	}
 
 	/* Insert extent into large. */
@@ -58,7 +58,7 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	}
 
 	arena_decay_tick(tsdn, arena);
-	return (extent_addr_get(extent));
+	return extent_addr_get(extent);
 }
 
 #ifdef JEMALLOC_JET
@@ -108,7 +108,7 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize) {
 	assert(oldusize > usize);
 
 	if (extent_hooks->split == NULL) {
-		return (true);
+		return true;
 	}
 
 	/* Split excess pages. */
@@ -117,7 +117,7 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize) {
 		    &extent_hooks, extent, usize + large_pad, usize, diff,
 		    diff);
 		if (trail == NULL) {
-			return (true);
+			return true;
 		}
 
 		if (config_fill && unlikely(opt_junk_free)) {
@@ -130,7 +130,7 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize) {
 
 	arena_extent_ralloc_large_shrink(tsdn, arena, extent, oldusize);
 
-	return (false);
+	return false;
 }
 
 static bool
@@ -144,7 +144,7 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 	extent_t *trail;
 
 	if (extent_hooks->merge == NULL) {
-		return (true);
+		return true;
 	}
 
 	if ((trail = arena_extent_cache_alloc(tsdn, arena, &extent_hooks,
@@ -154,13 +154,13 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 		if ((trail = extent_alloc_wrapper(tsdn, arena, &extent_hooks,
 		    extent_past_get(extent), trailsize, 0, CACHELINE,
 		    &is_zeroed_trail, &commit, false)) == NULL) {
-			return (true);
+			return true;
 		}
 	}
 
 	if (extent_merge_wrapper(tsdn, arena, &extent_hooks, extent, trail)) {
 		extent_dalloc_wrapper(tsdn, arena, &extent_hooks, trail);
-		return (true);
+		return true;
 	}
 
 	if (zero || (config_fill && unlikely(opt_zero))) {
@@ -191,7 +191,7 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 
 	arena_extent_ralloc_large_expand(tsdn, arena, extent, oldusize);
 
-	return (false);
+	return false;
 }
 
 bool
@@ -209,7 +209,7 @@ large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
 		if (!large_ralloc_no_move_expand(tsdn, extent, usize_max,
 		    zero)) {
 			arena_decay_tick(tsdn, extent_arena_get(extent));
-			return (false);
+			return false;
 		}
 		/* Try again, this time with usize_min. */
 		if (usize_min < usize_max && usize_min >
@@ -217,7 +217,7 @@ large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
 		    large_ralloc_no_move_expand(tsdn, extent, usize_min,
 		    zero)) {
 			arena_decay_tick(tsdn, extent_arena_get(extent));
-			return (false);
+			return false;
 		}
 	}
 
@@ -228,26 +228,26 @@ large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
 	if (extent_usize_get(extent) >= usize_min && extent_usize_get(extent) <=
 	    usize_max) {
 		arena_decay_tick(tsdn, extent_arena_get(extent));
-		return (false);
+		return false;
 	}
 
 	/* Attempt to shrink the allocation in-place. */
 	if (extent_usize_get(extent) > usize_max) {
 		if (!large_ralloc_no_move_shrink(tsdn, extent, usize_max)) {
 			arena_decay_tick(tsdn, extent_arena_get(extent));
-			return (false);
+			return false;
 		}
 	}
-	return (true);
+	return true;
 }
 
 static void *
 large_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool zero) {
 	if (alignment <= CACHELINE) {
-		return (large_malloc(tsdn, arena, usize, zero));
+		return large_malloc(tsdn, arena, usize, zero);
 	}
-	return (large_palloc(tsdn, arena, usize, alignment, zero));
+	return large_palloc(tsdn, arena, usize, alignment, zero);
 }
 
 void *
@@ -264,7 +264,7 @@ large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
 
 	/* Try to avoid moving the allocation. */
 	if (!large_ralloc_no_move(tsdn, extent, usize, usize, zero)) {
-		return (extent_addr_get(extent));
+		return extent_addr_get(extent);
 	}
 
 	/*
@@ -274,7 +274,7 @@ large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
 	 */
 	ret = large_ralloc_move_helper(tsdn, arena, usize, alignment, zero);
 	if (ret == NULL) {
-		return (NULL);
+		return NULL;
 	}
 
 	copysize = (usize < extent_usize_get(extent)) ? usize :
@@ -282,7 +282,7 @@ large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
 	memcpy(ret, extent_addr_get(extent), copysize);
 	isdalloct(tsdn, extent, extent_addr_get(extent),
 	    extent_usize_get(extent), tcache, true);
-	return (ret);
+	return ret;
 }
 
 /*
@@ -321,12 +321,12 @@ large_dalloc(tsdn_t *tsdn, extent_t *extent) {
 
 size_t
 large_salloc(tsdn_t *tsdn, const extent_t *extent) {
-	return (extent_usize_get(extent));
+	return extent_usize_get(extent);
 }
 
 prof_tctx_t *
 large_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent) {
-	return (extent_prof_tctx_get(extent));
+	return extent_prof_tctx_get(extent);
 }
 
 void
diff --git a/src/mutex.c b/src/mutex.c
index bc0869f8..f883b9d7 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -54,7 +54,7 @@ pthread_create(pthread_t *__restrict thread,
 
 	pthread_once(&once_control, pthread_create_once);
 
-	return (pthread_create_fptr(thread, attr, start_routine, arg));
+	return pthread_create_fptr(thread, attr, start_routine, arg);
 }
 #endif
 
@@ -74,7 +74,7 @@ malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
 #  else
 	if (!InitializeCriticalSectionAndSpinCount(&mutex->lock,
 	    _CRT_SPINCOUNT)) {
-		return (true);
+		return true;
 	}
 #  endif
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
@@ -88,26 +88,26 @@ malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
 	} else {
 		if (_pthread_mutex_init_calloc_cb(&mutex->lock,
 		    bootstrap_calloc) != 0) {
-			return (true);
+			return true;
 		}
 	}
 #else
 	pthread_mutexattr_t attr;
 
 	if (pthread_mutexattr_init(&attr) != 0) {
-		return (true);
+		return true;
 	}
 	pthread_mutexattr_settype(&attr, MALLOC_MUTEX_TYPE);
 	if (pthread_mutex_init(&mutex->lock, &attr) != 0) {
 		pthread_mutexattr_destroy(&attr);
-		return (true);
+		return true;
 	}
 	pthread_mutexattr_destroy(&attr);
 #endif
 	if (config_debug) {
 		witness_init(&mutex->witness, name, rank, NULL, NULL);
 	}
-	return (false);
+	return false;
 }
 
 void
@@ -143,10 +143,10 @@ malloc_mutex_boot(void) {
 	while (postponed_mutexes != NULL) {
 		if (_pthread_mutex_init_calloc_cb(&postponed_mutexes->lock,
 		    bootstrap_calloc) != 0) {
-			return (true);
+			return true;
 		}
 		postponed_mutexes = postponed_mutexes->postponed_next;
 	}
 #endif
-	return (false);
+	return false;
 }
diff --git a/src/nstime.c b/src/nstime.c
index 66989a07..09cd7786 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -14,17 +14,17 @@ nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec) {
 
 uint64_t
 nstime_ns(const nstime_t *time) {
-	return (time->ns);
+	return time->ns;
 }
 
 uint64_t
 nstime_sec(const nstime_t *time) {
-	return (time->ns / BILLION);
+	return time->ns / BILLION;
 }
 
 uint64_t
 nstime_nsec(const nstime_t *time) {
-	return (time->ns % BILLION);
+	return time->ns % BILLION;
 }
 
 void
@@ -34,7 +34,7 @@ nstime_copy(nstime_t *time, const nstime_t *source) {
 
 int
 nstime_compare(const nstime_t *a, const nstime_t *b) {
-	return ((a->ns > b->ns) - (a->ns < b->ns));
+	return (a->ns > b->ns) - (a->ns < b->ns);
 }
 
 void
@@ -70,7 +70,7 @@ uint64_t
 nstime_divide(const nstime_t *time, const nstime_t *divisor) {
 	assert(divisor->ns != 0);
 
-	return (time->ns / divisor->ns);
+	return time->ns / divisor->ns;
 }
 
 #ifdef _WIN32
@@ -126,7 +126,7 @@ nstime_get(nstime_t *time) {
 #endif
 bool
 nstime_monotonic(void) {
-	return (NSTIME_MONOTONIC);
+	return NSTIME_MONOTONIC;
 #undef NSTIME_MONOTONIC
 }
 #ifdef JEMALLOC_JET
@@ -149,10 +149,10 @@ nstime_update(nstime_t *time) {
 	/* Handle non-monotonic clocks. */
 	if (unlikely(nstime_compare(&old_time, time) > 0)) {
 		nstime_copy(time, &old_time);
-		return (true);
+		return true;
 	}
 
-	return (false);
+	return false;
 }
 #ifdef JEMALLOC_JET
 #undef nstime_update
diff --git a/src/pages.c b/src/pages.c
index c23dccd7..0b678e7d 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -58,7 +58,7 @@ pages_map(void *addr, size_t size, bool *commit) {
 #endif
 	assert(ret == NULL || (addr == NULL && ret != addr)
 	    || (addr != NULL && ret == addr));
-	return (ret);
+	return ret;
 }
 
 void
@@ -98,12 +98,12 @@ pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size,
 		pages_unmap(addr, alloc_size);
 		new_addr = pages_map(ret, size, commit);
 		if (new_addr == ret) {
-			return (ret);
+			return ret;
 		}
 		if (new_addr) {
 			pages_unmap(new_addr, size);
 		}
-		return (NULL);
+		return NULL;
 	}
 #else
 	{
@@ -115,7 +115,7 @@ pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size,
 		if (trailsize != 0) {
 			pages_unmap((void *)((uintptr_t)ret + size), trailsize);
 		}
-		return (ret);
+		return ret;
 	}
 #endif
 }
@@ -123,7 +123,7 @@ pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size,
 static bool
 pages_commit_impl(void *addr, size_t size, bool commit) {
 	if (os_overcommits) {
-		return (true);
+		return true;
 	}
 
 #ifdef _WIN32
@@ -135,7 +135,7 @@ pages_commit_impl(void *addr, size_t size, bool commit) {
 		void *result = mmap(addr, size, prot, mmap_flags | MAP_FIXED,
 		    -1, 0);
 		if (result == MAP_FAILED) {
-			return (true);
+			return true;
 		}
 		if (result != addr) {
 			/*
@@ -143,27 +143,27 @@ pages_commit_impl(void *addr, size_t size, bool commit) {
 			 * place.
 			 */
 			pages_unmap(result, size);
-			return (true);
+			return true;
 		}
-		return (false);
+		return false;
 	}
 #endif
 }
 
 bool
 pages_commit(void *addr, size_t size) {
-	return (pages_commit_impl(addr, size, true));
+	return pages_commit_impl(addr, size, true);
 }
 
 bool
 pages_decommit(void *addr, size_t size) {
-	return (pages_commit_impl(addr, size, false));
+	return pages_commit_impl(addr, size, false);
 }
 
 bool
 pages_purge_lazy(void *addr, size_t size) {
 	if (!pages_can_purge_lazy) {
-		return (true);
+		return true;
 	}
 
 #ifdef _WIN32
@@ -173,13 +173,13 @@ pages_purge_lazy(void *addr, size_t size) {
 #else
 	not_reached();
 #endif
-	return (false);
+	return false;
 }
 
 bool
 pages_purge_forced(void *addr, size_t size) {
 	if (!pages_can_purge_forced) {
-		return (true);
+		return true;
 	}
 
 #if defined(JEMALLOC_PURGE_MADVISE_DONTNEED)
@@ -197,7 +197,7 @@ pages_huge(void *addr, size_t size) {
 #ifdef JEMALLOC_THP
 	return (madvise(addr, size, MADV_HUGEPAGE) != 0);
 #else
-	return (true);
+	return true;
 #endif
 }
 
@@ -209,7 +209,7 @@ pages_nohuge(void *addr, size_t size) {
 #ifdef JEMALLOC_THP
 	return (madvise(addr, size, MADV_NOHUGEPAGE) != 0);
 #else
-	return (false);
+	return false;
 #endif
 }
 
@@ -221,7 +221,7 @@ os_overcommits_sysctl(void) {
 
 	sz = sizeof(vm_overcommit);
 	if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0) {
-		return (false); /* Error. */
+		return false; /* Error. */
 	}
 
 	return ((vm_overcommit & 0x3) == 0);
@@ -246,7 +246,7 @@ os_overcommits_proc(void) {
 	fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
 #endif
 	if (fd == -1) {
-		return (false); /* Error. */
+		return false; /* Error. */
 	}
 
 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read)
@@ -262,7 +262,7 @@ os_overcommits_proc(void) {
 #endif
 
 	if (nread < 1) {
-		return (false); /* Error. */
+		return false; /* Error. */
 	}
 	/*
 	 * /proc/sys/vm/overcommit_memory meanings:
diff --git a/src/prof.c b/src/prof.c
index ca01d8b1..1b34a750 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -149,7 +149,7 @@ prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b) {
 			    b_tctx_uid);
 		}
 	}
-	return (ret);
+	return ret;
 }
 
 rb_gen(static UNUSED, tctx_tree_, prof_tctx_tree_t, prof_tctx_t,
@@ -164,7 +164,7 @@ prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b) {
 	if (ret == 0) {
 		ret = (a_len > b_len) - (a_len < b_len);
 	}
-	return (ret);
+	return ret;
 }
 
 rb_gen(static UNUSED, gctx_tree_, prof_gctx_tree_t, prof_gctx_t, dump_link,
@@ -183,7 +183,7 @@ prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b) {
 
 		ret = ((a_discrim > b_discrim) - (a_discrim < b_discrim));
 	}
-	return (ret);
+	return ret;
 }
 
 rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link,
@@ -319,7 +319,7 @@ static _Unwind_Reason_Code
 prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) {
 	cassert(config_prof);
 
-	return (_URC_NO_REASON);
+	return _URC_NO_REASON;
 }
 
 static _Unwind_Reason_Code
@@ -331,15 +331,15 @@ prof_unwind_callback(struct _Unwind_Context *context, void *arg) {
 
 	ip = (void *)_Unwind_GetIP(context);
 	if (ip == NULL) {
-		return (_URC_END_OF_STACK);
+		return _URC_END_OF_STACK;
 	}
 	data->bt->vec[data->bt->len] = ip;
 	data->bt->len++;
 	if (data->bt->len == data->max) {
-		return (_URC_END_OF_STACK);
+		return _URC_END_OF_STACK;
 	}
 
-	return (_URC_NO_REASON);
+	return _URC_NO_REASON;
 }
 
 void
@@ -525,12 +525,12 @@ static malloc_mutex_t *
 prof_gctx_mutex_choose(void) {
 	unsigned ngctxs = atomic_add_u(&cum_gctxs, 1);
 
-	return (&gctx_locks[(ngctxs - 1) % PROF_NCTX_LOCKS]);
+	return &gctx_locks[(ngctxs - 1) % PROF_NCTX_LOCKS];
 }
 
 static malloc_mutex_t *
 prof_tdata_mutex_choose(uint64_t thr_uid) {
-	return (&tdata_locks[thr_uid % PROF_NTDATA_LOCKS]);
+	return &tdata_locks[thr_uid % PROF_NTDATA_LOCKS];
 }
 
 static prof_gctx_t *
@@ -543,7 +543,7 @@ prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt) {
 	    size2index(size), false, NULL, true, arena_get(TSDN_NULL, 0, true),
 	    true);
 	if (gctx == NULL) {
-		return (NULL);
+		return NULL;
 	}
 	gctx->lock = prof_gctx_mutex_choose();
 	/*
@@ -556,7 +556,7 @@ prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt) {
 	memcpy(gctx->vec, bt->vec, bt->len * sizeof(void *));
 	gctx->bt.vec = gctx->vec;
 	gctx->bt.len = bt->len;
-	return (gctx);
+	return gctx;
 }
 
 static void
@@ -600,29 +600,29 @@ prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx) {
 	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
 
 	if (opt_prof_accum) {
-		return (false);
+		return false;
 	}
 	if (tctx->cnts.curobjs != 0) {
-		return (false);
+		return false;
 	}
 	if (tctx->prepared) {
-		return (false);
+		return false;
 	}
-	return (true);
+	return true;
 }
 
 static bool
 prof_gctx_should_destroy(prof_gctx_t *gctx) {
 	if (opt_prof_accum) {
-		return (false);
+		return false;
 	}
 	if (!tctx_tree_empty(&gctx->tctxs)) {
-		return (false);
+		return false;
 	}
 	if (gctx->nlimbo != 0) {
-		return (false);
+		return false;
 	}
-	return (true);
+	return true;
 }
 
 static void
@@ -721,7 +721,7 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
 		gctx.p = prof_gctx_create(tsd_tsdn(tsd), bt);
 		if (gctx.v == NULL) {
 			prof_leave(tsd, tdata);
-			return (true);
+			return true;
 		}
 		btkey.p = &gctx.p->bt;
 		if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) {
@@ -729,7 +729,7 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
 			prof_leave(tsd, tdata);
 			idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), gctx.v),
 			    gctx.v, NULL, true, true);
-			return (true);
+			return true;
 		}
 		new_gctx = true;
 	} else {
@@ -747,7 +747,7 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
 	*p_btkey = btkey.v;
 	*p_gctx = gctx.p;
 	*p_new_gctx = new_gctx;
-	return (false);
+	return false;
 }
 
 prof_tctx_t *
@@ -763,7 +763,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 
 	tdata = prof_tdata_get(tsd, false);
 	if (tdata == NULL) {
-		return (NULL);
+		return NULL;
 	}
 
 	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
@@ -783,7 +783,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 		 */
 		if (prof_lookup_global(tsd, bt, tdata, &btkey, &gctx,
 		    &new_gctx)) {
-			return (NULL);
+			return NULL;
 		}
 
 		/* Link a prof_tctx_t into gctx for this thread. */
@@ -794,7 +794,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 			if (new_gctx) {
 				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
 			}
-			return (NULL);
+			return NULL;
 		}
 		ret.p->tdata = tdata;
 		ret.p->thr_uid = tdata->thr_uid;
@@ -813,7 +813,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 			}
 			idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), ret.v),
 			    ret.v, NULL, true, true);
-			return (NULL);
+			return NULL;
 		}
 		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
 		ret.p->state = prof_tctx_state_nominal;
@@ -822,7 +822,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
 	}
 
-	return (ret.p);
+	return ret.p;
 }
 
 /*
@@ -887,7 +887,7 @@ prof_tdata_count_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
 
 	(*tdata_count)++;
 
-	return (NULL);
+	return NULL;
 }
 
 size_t
@@ -901,7 +901,7 @@ prof_tdata_count(void) {
 	    (void *)&tdata_count);
 	malloc_mutex_unlock(tsdn, &tdatas_mtx);
 
-	return (tdata_count);
+	return tdata_count;
 }
 #endif
 
@@ -915,14 +915,14 @@ prof_bt_count(void) {
 	tsd = tsd_fetch();
 	tdata = prof_tdata_get(tsd, false);
 	if (tdata == NULL) {
-		return (0);
+		return 0;
 	}
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
 	bt_count = ckh_count(&bt2gctx);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
 
-	return (bt_count);
+	return bt_count;
 }
 #endif
 
@@ -943,7 +943,7 @@ prof_dump_open(bool propagate_err, const char *filename) {
 		}
 	}
 
-	return (fd);
+	return fd;
 }
 #ifdef JEMALLOC_JET
 #undef prof_dump_open
@@ -971,7 +971,7 @@ prof_dump_flush(bool propagate_err) {
 	}
 	prof_dump_buf_end = 0;
 
-	return (ret);
+	return ret;
 }
 
 static bool
@@ -983,7 +983,7 @@ prof_dump_close(bool propagate_err) {
 	close(prof_dump_fd);
 	prof_dump_fd = -1;
 
-	return (ret);
+	return ret;
 }
 
 static bool
@@ -998,7 +998,7 @@ prof_dump_write(bool propagate_err, const char *s) {
 		/* Flush the buffer if it is full. */
 		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
 			if (prof_dump_flush(propagate_err) && propagate_err) {
-				return (true);
+				return true;
 			}
 		}
 
@@ -1014,7 +1014,7 @@ prof_dump_write(bool propagate_err, const char *s) {
 		i += n;
 	}
 
-	return (false);
+	return false;
 }
 
 JEMALLOC_FORMAT_PRINTF(2, 3)
@@ -1029,7 +1029,7 @@ prof_dump_printf(bool propagate_err, const char *format, ...) {
 	va_end(ap);
 	ret = prof_dump_write(propagate_err, buf);
 
-	return (ret);
+	return ret;
 }
 
 static void
@@ -1093,7 +1093,7 @@ prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
 		not_reached();
 	}
 
-	return (NULL);
+	return NULL;
 }
 
 struct prof_tctx_dump_iter_arg_s {
@@ -1120,13 +1120,13 @@ prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) {
 		    "%"FMTu64"]\n", tctx->thr_uid, tctx->dump_cnts.curobjs,
 		    tctx->dump_cnts.curbytes, tctx->dump_cnts.accumobjs,
 		    tctx->dump_cnts.accumbytes)) {
-			return (tctx);
+			return tctx;
 		}
 		break;
 	default:
 		not_reached();
 	}
-	return (NULL);
+	return NULL;
 }
 
 static prof_tctx_t *
@@ -1152,7 +1152,7 @@ prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
 
 	ret = NULL;
 label_return:
-	return (ret);
+	return ret;
 }
 
 static void
@@ -1192,7 +1192,7 @@ prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
 	}
 	malloc_mutex_unlock(arg->tsdn, gctx->lock);
 
-	return (NULL);
+	return NULL;
 }
 
 static void
@@ -1279,7 +1279,7 @@ prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
 	}
 	malloc_mutex_unlock(arg->tsdn, tdata->lock);
 
-	return (NULL);
+	return NULL;
 }
 
 static prof_tdata_t *
@@ -1288,7 +1288,7 @@ prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
 	bool propagate_err = *(bool *)arg;
 
 	if (!tdata->dumping) {
-		return (NULL);
+		return NULL;
 	}
 
 	if (prof_dump_printf(propagate_err,
@@ -1298,9 +1298,9 @@ prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
 	    tdata->cnt_summed.accumbytes,
 	    (tdata->thread_name != NULL) ? " " : "",
 	    (tdata->thread_name != NULL) ? tdata->thread_name : "")) {
-		return (tdata);
+		return tdata;
 	}
-	return (NULL);
+	return NULL;
 }
 
 #ifdef JEMALLOC_JET
@@ -1316,14 +1316,14 @@ prof_dump_header(tsdn_t *tsdn, bool propagate_err, const prof_cnt_t *cnt_all) {
 	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
 	    ((uint64_t)1U << lg_prof_sample), cnt_all->curobjs,
 	    cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes)) {
-		return (true);
+		return true;
 	}
 
 	malloc_mutex_lock(tsdn, &tdatas_mtx);
 	ret = (tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter,
 	    (void *)&propagate_err) != NULL);
 	malloc_mutex_unlock(tsdn, &tdatas_mtx);
-	return (ret);
+	return ret;
 }
 #ifdef JEMALLOC_JET
 #undef prof_dump_header
@@ -1383,7 +1383,7 @@ prof_dump_gctx(tsdn_t *tsdn, bool propagate_err, prof_gctx_t *gctx,
 
 	ret = false;
 label_return:
-	return (ret);
+	return ret;
 }
 
 #ifndef _WIN32
@@ -1399,16 +1399,16 @@ prof_open_maps(const char *format, ...) {
 	va_end(ap);
 	mfd = open(filename, O_RDONLY);
 
-	return (mfd);
+	return mfd;
 }
 #endif
 
 static int
 prof_getpid(void) {
 #ifdef _WIN32
-	return (GetCurrentProcessId());
+	return GetCurrentProcessId();
 #else
-	return (getpid());
+	return getpid();
 #endif
 }
 
@@ -1464,7 +1464,7 @@ label_return:
 	if (mfd != -1) {
 		close(mfd);
 	}
-	return (ret);
+	return ret;
 }
 
 /*
@@ -1524,7 +1524,7 @@ prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
 	ret = NULL;
 label_return:
 	malloc_mutex_unlock(arg->tsdn, gctx->lock);
-	return (ret);
+	return ret;
 }
 
 static void
@@ -1773,13 +1773,13 @@ prof_mdump(tsd_t *tsd, const char *filename) {
 	cassert(config_prof);
 
 	if (!opt_prof || !prof_booted) {
-		return (true);
+		return true;
 	}
 
 	if (filename == NULL) {
 		/* No filename specified, so automatically generate one. */
 		if (opt_prof_prefix[0] == '\0') {
-			return (true);
+			return true;
 		}
 		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
 		prof_dump_filename(filename_buf, 'm', prof_dump_mseq);
@@ -1787,7 +1787,7 @@ prof_mdump(tsd_t *tsd, const char *filename) {
 		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
 		filename = filename_buf;
 	}
-	return (prof_dump(tsd, true, filename, false));
+	return prof_dump(tsd, true, filename, false);
 }
 
 void
@@ -1837,7 +1837,7 @@ prof_bt_keycomp(const void *k1, const void *k2) {
 	cassert(config_prof);
 
 	if (bt1->len != bt2->len) {
-		return (false);
+		return false;
 	}
 	return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
 }
@@ -1851,7 +1851,7 @@ prof_thr_uid_alloc(tsdn_t *tsdn) {
 	next_thr_uid++;
 	malloc_mutex_unlock(tsdn, &next_thr_uid_mtx);
 
-	return (thr_uid);
+	return thr_uid;
 }
 
 static prof_tdata_t *
@@ -1866,7 +1866,7 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
 	    size2index(sizeof(prof_tdata_t)), false, NULL, true,
 	    arena_get(TSDN_NULL, 0, true), true);
 	if (tdata == NULL) {
-		return (NULL);
+		return NULL;
 	}
 
 	tdata->lock = prof_tdata_mutex_choose(thr_uid);
@@ -1881,7 +1881,7 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
 	    prof_bt_keycomp)) {
 		idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), tdata), tdata,
 		    NULL, true, true);
-		return (NULL);
+		return NULL;
 	}
 
 	tdata->prng_state = (uint64_t)(uintptr_t)tdata;
@@ -1898,24 +1898,24 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
 	tdata_tree_insert(&tdatas, tdata);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
 
-	return (tdata);
+	return tdata;
 }
 
 prof_tdata_t *
 prof_tdata_init(tsd_t *tsd) {
-	return (prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0,
-	    NULL, prof_thread_active_init_get(tsd_tsdn(tsd))));
+	return prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0,
+	    NULL, prof_thread_active_init_get(tsd_tsdn(tsd)));
 }
 
 static bool
 prof_tdata_should_destroy_unlocked(prof_tdata_t *tdata, bool even_if_attached) {
 	if (tdata->attached && !even_if_attached) {
-		return (false);
+		return false;
 	}
 	if (ckh_count(&tdata->bt2tctx) != 0) {
-		return (false);
+		return false;
 	}
-	return (true);
+	return true;
 }
 
 static bool
@@ -1923,7 +1923,7 @@ prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
     bool even_if_attached) {
 	malloc_mutex_assert_owner(tsdn, tdata->lock);
 
-	return (prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
+	return prof_tdata_should_destroy_unlocked(tdata, even_if_attached);
 }
 
 static void
@@ -1985,8 +1985,8 @@ prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) {
 	bool active = tdata->active;
 
 	prof_tdata_detach(tsd, tdata);
-	return (prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name,
-	    active));
+	return prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name,
+	    active);
 }
 
 static bool
@@ -2003,7 +2003,7 @@ prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata) {
 	}
 	malloc_mutex_unlock(tsdn, tdata->lock);
 
-	return (destroy_tdata);
+	return destroy_tdata;
 }
 
 static prof_tdata_t *
@@ -2062,7 +2062,7 @@ prof_active_get(tsdn_t *tsdn) {
 	malloc_mutex_lock(tsdn, &prof_active_mtx);
 	prof_active_current = prof_active;
 	malloc_mutex_unlock(tsdn, &prof_active_mtx);
-	return (prof_active_current);
+	return prof_active_current;
 }
 
 bool
@@ -2073,7 +2073,7 @@ prof_active_set(tsdn_t *tsdn, bool active) {
 	prof_active_old = prof_active;
 	prof_active = active;
 	malloc_mutex_unlock(tsdn, &prof_active_mtx);
-	return (prof_active_old);
+	return prof_active_old;
 }
 
 const char *
@@ -2082,7 +2082,7 @@ prof_thread_name_get(tsd_t *tsd) {
 
 	tdata = prof_tdata_get(tsd, true);
 	if (tdata == NULL) {
-		return ("");
+		return "";
 	}
 	return (tdata->thread_name != NULL ? tdata->thread_name : "");
 }
@@ -2093,21 +2093,21 @@ prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name) {
 	size_t size;
 
 	if (thread_name == NULL) {
-		return (NULL);
+		return NULL;
 	}
 
 	size = strlen(thread_name) + 1;
 	if (size == 1) {
-		return ("");
+		return "";
 	}
 
 	ret = iallocztm(tsdn, size, size2index(size), false, NULL, true,
 	    arena_get(TSDN_NULL, 0, true), true);
 	if (ret == NULL) {
-		return (NULL);
+		return NULL;
 	}
 	memcpy(ret, thread_name, size);
-	return (ret);
+	return ret;
 }
 
 int
@@ -2118,23 +2118,23 @@ prof_thread_name_set(tsd_t *tsd, const char *thread_name) {
 
 	tdata = prof_tdata_get(tsd, true);
 	if (tdata == NULL) {
-		return (EAGAIN);
+		return EAGAIN;
 	}
 
 	/* Validate input. */
 	if (thread_name == NULL) {
-		return (EFAULT);
+		return EFAULT;
 	}
 	for (i = 0; thread_name[i] != '\0'; i++) {
 		char c = thread_name[i];
 		if (!isgraph(c) && !isblank(c)) {
-			return (EFAULT);
+			return EFAULT;
 		}
 	}
 
 	s = prof_thread_name_alloc(tsd_tsdn(tsd), thread_name);
 	if (s == NULL) {
-		return (EAGAIN);
+		return EAGAIN;
 	}
 
 	if (tdata->thread_name != NULL) {
@@ -2145,7 +2145,7 @@ prof_thread_name_set(tsd_t *tsd, const char *thread_name) {
 	if (strlen(s) > 0) {
 		tdata->thread_name = s;
 	}
-	return (0);
+	return 0;
 }
 
 bool
@@ -2154,9 +2154,9 @@ prof_thread_active_get(tsd_t *tsd) {
 
 	tdata = prof_tdata_get(tsd, true);
 	if (tdata == NULL) {
-		return (false);
+		return false;
 	}
-	return (tdata->active);
+	return tdata->active;
 }
 
 bool
@@ -2165,10 +2165,10 @@ prof_thread_active_set(tsd_t *tsd, bool active) {
 
 	tdata = prof_tdata_get(tsd, true);
 	if (tdata == NULL) {
-		return (true);
+		return true;
 	}
 	tdata->active = active;
-	return (false);
+	return false;
 }
 
 bool
@@ -2178,7 +2178,7 @@ prof_thread_active_init_get(tsdn_t *tsdn) {
 	malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx);
 	active_init = prof_thread_active_init;
 	malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx);
-	return (active_init);
+	return active_init;
 }
 
 bool
@@ -2189,7 +2189,7 @@ prof_thread_active_init_set(tsdn_t *tsdn, bool active_init) {
 	active_init_old = prof_thread_active_init;
 	prof_thread_active_init = active_init;
 	malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx);
-	return (active_init_old);
+	return active_init_old;
 }
 
 bool
@@ -2199,7 +2199,7 @@ prof_gdump_get(tsdn_t *tsdn) {
 	malloc_mutex_lock(tsdn, &prof_gdump_mtx);
 	prof_gdump_current = prof_gdump_val;
 	malloc_mutex_unlock(tsdn, &prof_gdump_mtx);
-	return (prof_gdump_current);
+	return prof_gdump_current;
 }
 
 bool
@@ -2210,7 +2210,7 @@ prof_gdump_set(tsdn_t *tsdn, bool gdump) {
 	prof_gdump_old = prof_gdump_val;
 	prof_gdump_val = gdump;
 	malloc_mutex_unlock(tsdn, &prof_gdump_mtx);
-	return (prof_gdump_old);
+	return prof_gdump_old;
 }
 
 void
@@ -2257,50 +2257,50 @@ prof_boot2(tsd_t *tsd) {
 		prof_active = opt_prof_active;
 		if (malloc_mutex_init(&prof_active_mtx, "prof_active",
 		    WITNESS_RANK_PROF_ACTIVE)) {
-			return (true);
+			return true;
 		}
 
 		prof_gdump_val = opt_prof_gdump;
 		if (malloc_mutex_init(&prof_gdump_mtx, "prof_gdump",
 		    WITNESS_RANK_PROF_GDUMP)) {
-			return (true);
+			return true;
 		}
 
 		prof_thread_active_init = opt_prof_thread_active_init;
 		if (malloc_mutex_init(&prof_thread_active_init_mtx,
 		    "prof_thread_active_init",
 		    WITNESS_RANK_PROF_THREAD_ACTIVE_INIT)) {
-			return (true);
+			return true;
 		}
 
 		if (ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash,
 		    prof_bt_keycomp)) {
-			return (true);
+			return true;
 		}
 		if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx",
 		    WITNESS_RANK_PROF_BT2GCTX)) {
-			return (true);
+			return true;
 		}
 
 		tdata_tree_new(&tdatas);
 		if (malloc_mutex_init(&tdatas_mtx, "prof_tdatas",
 		    WITNESS_RANK_PROF_TDATAS)) {
-			return (true);
+			return true;
 		}
 
 		next_thr_uid = 0;
 		if (malloc_mutex_init(&next_thr_uid_mtx, "prof_next_thr_uid",
 		    WITNESS_RANK_PROF_NEXT_THR_UID)) {
-			return (true);
+			return true;
 		}
 
 		if (malloc_mutex_init(&prof_dump_seq_mtx, "prof_dump_seq",
 		    WITNESS_RANK_PROF_DUMP_SEQ)) {
-			return (true);
+			return true;
 		}
 		if (malloc_mutex_init(&prof_dump_mtx, "prof_dump",
 		    WITNESS_RANK_PROF_DUMP)) {
-			return (true);
+			return true;
 		}
 
 		if (opt_prof_final && opt_prof_prefix[0] != '\0' &&
@@ -2315,12 +2315,12 @@ prof_boot2(tsd_t *tsd) {
 		    b0get(), PROF_NCTX_LOCKS * sizeof(malloc_mutex_t),
 		    CACHELINE);
 		if (gctx_locks == NULL) {
-			return (true);
+			return true;
 		}
 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
 			if (malloc_mutex_init(&gctx_locks[i], "prof_gctx",
 			    WITNESS_RANK_PROF_GCTX)) {
-				return (true);
+				return true;
 			}
 		}
 
@@ -2328,12 +2328,12 @@ prof_boot2(tsd_t *tsd) {
 		    b0get(), PROF_NTDATA_LOCKS * sizeof(malloc_mutex_t),
 		    CACHELINE);
 		if (tdata_locks == NULL) {
-			return (true);
+			return true;
 		}
 		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
 			if (malloc_mutex_init(&tdata_locks[i], "prof_tdata",
 			    WITNESS_RANK_PROF_TDATA)) {
-				return (true);
+				return true;
 			}
 		}
 	}
@@ -2348,7 +2348,7 @@ prof_boot2(tsd_t *tsd) {
 
 	prof_booted = true;
 
-	return (false);
+	return false;
 }
 
 void
diff --git a/src/rtree.c b/src/rtree.c
index de3e5962..d0c5fe65 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -60,7 +60,7 @@ rtree_new(rtree_t *rtree, unsigned bits) {
 
 	malloc_mutex_init(&rtree->init_lock, "rtree", WITNESS_RANK_RTREE);
 
-	return (false);
+	return false;
 }
 
 #ifdef JEMALLOC_JET
@@ -69,8 +69,8 @@ rtree_new(rtree_t *rtree, unsigned bits) {
 #endif
 static rtree_elm_t *
 rtree_node_alloc(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
-	return ((rtree_elm_t *)base_alloc(tsdn, b0get(), nelms *
-	    sizeof(rtree_elm_t), CACHELINE));
+	return (rtree_elm_t *)base_alloc(tsdn, b0get(), nelms *
+	    sizeof(rtree_elm_t), CACHELINE);
 }
 #ifdef JEMALLOC_JET
 #undef rtree_node_alloc
@@ -137,25 +137,25 @@ rtree_node_init(tsdn_t *tsdn, rtree_t *rtree, unsigned level,
 		    rtree->levels[level].bits);
 		if (node == NULL) {
 			malloc_mutex_unlock(tsdn, &rtree->init_lock);
-			return (NULL);
+			return NULL;
 		}
 		atomic_write_p((void **)elmp, node);
 	}
 	malloc_mutex_unlock(tsdn, &rtree->init_lock);
 
-	return (node);
+	return node;
 }
 
 rtree_elm_t *
 rtree_subtree_read_hard(tsdn_t *tsdn, rtree_t *rtree, unsigned level) {
-	return (rtree_node_init(tsdn, rtree, level,
-	    &rtree->levels[level].subtree));
+	return rtree_node_init(tsdn, rtree, level,
+	    &rtree->levels[level].subtree);
 }
 
 rtree_elm_t *
 rtree_child_read_hard(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *elm,
     unsigned level) {
-	return (rtree_node_init(tsdn, rtree, level+1, &elm->child));
+	return rtree_node_init(tsdn, rtree, level+1, &elm->child);
 }
 
 static int
@@ -167,7 +167,7 @@ rtree_elm_witness_comp(const witness_t *a, void *oa, const witness_t *b,
 	assert(ka != 0);
 	assert(kb != 0);
 
-	return ((ka > kb) - (ka < kb));
+	return (ka > kb) - (ka < kb);
 }
 
 static witness_t *
@@ -192,7 +192,7 @@ rtree_elm_witness_alloc(tsd_t *tsd, uintptr_t key, const rtree_elm_t *elm) {
 		}
 	}
 	assert(witness != NULL);
-	return (witness);
+	return witness;
 }
 
 static witness_t *
@@ -205,7 +205,7 @@ rtree_elm_witness_find(tsd_t *tsd, const rtree_elm_t *elm) {
 		rtree_elm_witness_t *rew = &witnesses->witnesses[i];
 
 		if (rew->elm == elm) {
-			return (&rew->witness);
+			return &rew->witness;
 		}
 	}
 	not_reached();
diff --git a/src/tcache.c b/src/tcache.c
index bb6a5a75..0501c3fc 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -25,7 +25,7 @@ static tcaches_t	*tcaches_avail;
 
 size_t
 tcache_salloc(tsdn_t *tsdn, const void *ptr) {
-	return (arena_salloc(tsdn, iealloc(tsdn, ptr), ptr));
+	return arena_salloc(tsdn, iealloc(tsdn, ptr), ptr);
 }
 
 void
@@ -82,7 +82,7 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 	}
 	ret = tcache_alloc_easy(tbin, tcache_success);
 
-	return (ret);
+	return ret;
 }
 
 void
@@ -297,13 +297,13 @@ tcache_get_hard(tsd_t *tsd) {
 		if (tsd_nominal(tsd)) {
 			tcache_enabled_set(false); /* Memoize. */
 		}
-		return (NULL);
+		return NULL;
 	}
 	arena = arena_choose(tsd, NULL);
 	if (unlikely(arena == NULL)) {
-		return (NULL);
+		return NULL;
 	}
-	return (tcache_create(tsd_tsdn(tsd), arena));
+	return tcache_create(tsd_tsdn(tsd), arena);
 }
 
 tcache_t *
@@ -323,7 +323,7 @@ tcache_create(tsdn_t *tsdn, arena_t *arena) {
 	tcache = ipallocztm(tsdn, size, CACHELINE, true, NULL, true,
 	    arena_get(TSDN_NULL, 0, true));
 	if (tcache == NULL) {
-		return (NULL);
+		return NULL;
 	}
 
 	tcache_arena_associate(tsdn, tcache, arena);
@@ -343,7 +343,7 @@ tcache_create(tsdn_t *tsdn, arena_t *arena) {
 		    (uintptr_t)stack_offset);
 	}
 
-	return (tcache);
+	return tcache;
 }
 
 static void
@@ -432,20 +432,20 @@ tcaches_create(tsd_t *tsd, unsigned *r_ind) {
 		tcaches = base_alloc(tsd_tsdn(tsd), b0get(), sizeof(tcache_t *)
 		    * (MALLOCX_TCACHE_MAX+1), CACHELINE);
 		if (tcaches == NULL) {
-			return (true);
+			return true;
 		}
 	}
 
 	if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX) {
-		return (true);
+		return true;
 	}
 	arena = arena_ichoose(tsd, NULL);
 	if (unlikely(arena == NULL)) {
-		return (true);
+		return true;
 	}
 	tcache = tcache_create(tsd_tsdn(tsd), arena);
 	if (tcache == NULL) {
-		return (true);
+		return true;
 	}
 
 	if (tcaches_avail != NULL) {
@@ -460,7 +460,7 @@ tcaches_create(tsd_t *tsd, unsigned *r_ind) {
 		tcaches_past++;
 	}
 
-	return (false);
+	return false;
 }
 
 static void
@@ -503,7 +503,7 @@ tcache_boot(tsdn_t *tsdn) {
 	tcache_bin_info = (tcache_bin_info_t *)base_alloc(tsdn, b0get(), nhbins
 	    * sizeof(tcache_bin_info_t), CACHELINE);
 	if (tcache_bin_info == NULL) {
-		return (true);
+		return true;
 	}
 	stack_nelms = 0;
 	for (i = 0; i < NBINS; i++) {
@@ -525,5 +525,5 @@ tcache_boot(tsdn_t *tsdn) {
 		stack_nelms += tcache_bin_info[i].ncached_max;
 	}
 
-	return (false);
+	return false;
 }
diff --git a/src/tsd.c b/src/tsd.c
index f02fc28e..ae77fcb1 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -13,7 +13,7 @@ malloc_tsd_data(, , tsd_t, TSD_INITIALIZER)
 
 void *
 malloc_tsd_malloc(size_t size) {
-	return (a0malloc(CACHELINE_CEILING(size)));
+	return a0malloc(CACHELINE_CEILING(size));
 }
 
 void
@@ -109,11 +109,11 @@ malloc_tsd_boot0(void) {
 
 	ncleanups = 0;
 	if (tsd_boot0()) {
-		return (NULL);
+		return NULL;
 	}
 	tsd = tsd_fetch();
 	*tsd_arenas_tdata_bypassp_get(tsd) = true;
-	return (tsd);
+	return tsd;
 }
 
 void
@@ -137,7 +137,7 @@ _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) {
 	default:
 		break;
 	}
-	return (true);
+	return true;
 }
 
 #ifdef _MSC_VER
@@ -167,7 +167,7 @@ tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block) {
 	ql_foreach(iter, &head->blocks, link) {
 		if (iter->thread == self) {
 			malloc_mutex_unlock(TSDN_NULL, &head->lock);
-			return (iter->data);
+			return iter->data;
 		}
 	}
 	/* Insert block into list. */
@@ -175,7 +175,7 @@ tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block) {
 	block->thread = self;
 	ql_tail_insert(&head->blocks, block, link);
 	malloc_mutex_unlock(TSDN_NULL, &head->lock);
-	return (NULL);
+	return NULL;
 }
 
 void
diff --git a/src/util.c b/src/util.c
index a9595397..faa97c8d 100644
--- a/src/util.c
+++ b/src/util.c
@@ -87,16 +87,16 @@ buferror(int err, char *buf, size_t buflen) {
 #ifdef _WIN32
 	FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, err, 0,
 	    (LPSTR)buf, (DWORD)buflen, NULL);
-	return (0);
+	return 0;
 #elif defined(__GLIBC__) && defined(_GNU_SOURCE)
 	char *b = strerror_r(err, buf, buflen);
 	if (b != buf) {
 		strncpy(buf, b, buflen);
 		buf[buflen-1] = '\0';
 	}
-	return (0);
+	return 0;
 #else
-	return (strerror_r(err, buf, buflen));
+	return strerror_r(err, buf, buflen);
 #endif
 }
 
@@ -218,7 +218,7 @@ label_return:
 			*endptr = (char *)p;
 		}
 	}
-	return (ret);
+	return ret;
 }
 
 static char *
@@ -260,7 +260,7 @@ u2s(uintmax_t x, unsigned base, bool uppercase, char *s, size_t *slen_p) {
 	}}
 
 	*slen_p = U2S_BUFSIZE - 1 - i;
-	return (&s[i]);
+	return &s[i];
 }
 
 static char *
@@ -288,7 +288,7 @@ d2s(intmax_t x, char sign, char *s, size_t *slen_p) {
 		break;
 	default: not_reached();
 	}
-	return (s);
+	return s;
 }
 
 static char *
@@ -299,7 +299,7 @@ o2s(uintmax_t x, bool alt_form, char *s, size_t *slen_p) {
 		(*slen_p)++;
 		*s = '0';
 	}
-	return (s);
+	return s;
 }
 
 static char *
@@ -310,7 +310,7 @@ x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, size_t *slen_p) {
 		(*slen_p) += 2;
 		memcpy(s, uppercase ? "0X" : "0x", 2);
 	}
-	return (s);
+	return s;
 }
 
 size_t
@@ -593,7 +593,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 #undef APPEND_S
 #undef APPEND_PADDED_S
 #undef GET_ARG_NUMERIC
-	return (i);
+	return i;
 }
 
 JEMALLOC_FORMAT_PRINTF(3, 4)
@@ -606,7 +606,7 @@ malloc_snprintf(char *str, size_t size, const char *format, ...) {
 	ret = malloc_vsnprintf(str, size, format, ap);
 	va_end(ap);
 
-	return (ret);
+	return ret;
 }
 
 void
diff --git a/src/zone.c b/src/zone.c
index 8e106632..e69f0b4a 100644
--- a/src/zone.c
+++ b/src/zone.c
@@ -135,17 +135,17 @@ zone_size(malloc_zone_t *zone, const void *ptr) {
 	 * not work in practice, we must check all pointers to assure that they
 	 * reside within a mapped extent before determining size.
 	 */
-	return (ivsalloc(tsdn_fetch(), ptr));
+	return ivsalloc(tsdn_fetch(), ptr);
 }
 
 static void *
 zone_malloc(malloc_zone_t *zone, size_t size) {
-	return (je_malloc(size));
+	return je_malloc(size);
 }
 
 static void *
 zone_calloc(malloc_zone_t *zone, size_t num, size_t size) {
-	return (je_calloc(num, size));
+	return je_calloc(num, size);
 }
 
 static void *
@@ -154,7 +154,7 @@ zone_valloc(malloc_zone_t *zone, size_t size) {
 
 	je_posix_memalign(&ret, PAGE, size);
 
-	return (ret);
+	return ret;
 }
 
 static void
@@ -170,10 +170,10 @@ zone_free(malloc_zone_t *zone, void *ptr) {
 static void *
 zone_realloc(malloc_zone_t *zone, void *ptr, size_t size) {
 	if (ivsalloc(tsdn_fetch(), ptr) != 0) {
-		return (je_realloc(ptr, size));
+		return je_realloc(ptr, size);
 	}
 
-	return (realloc(ptr, size));
+	return realloc(ptr, size);
 }
 
 static void *
@@ -182,7 +182,7 @@ zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size) {
 
 	je_posix_memalign(&ret, alignment, size);
 
-	return (ret);
+	return ret;
 }
 
 static void
@@ -240,7 +240,7 @@ zone_good_size(malloc_zone_t *zone, size_t size) {
 	if (size == 0) {
 		size = 1;
 	}
-	return (s2u(size));
+	return s2u(size);
 }
 
 static kern_return_t
@@ -368,10 +368,10 @@ zone_default_get(void) {
 	}
 
 	if (num_zones) {
-		return (zones[0]);
+		return zones[0];
 	}
 
-	return (malloc_default_zone());
+	return malloc_default_zone();
 }
 
 /* As written, this function can only promote jemalloc_zone. */
diff --git a/test/include/test/btalloc.h b/test/include/test/btalloc.h
index 98366afe..8b733f50 100644
--- a/test/include/test/btalloc.h
+++ b/test/include/test/btalloc.h
@@ -26,5 +26,5 @@ btalloc_##n(size_t size, unsigned bits) {				\
 	}								\
 	/* Intentionally sabotage tail call optimization. */		\
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");		\
-	return (p);							\
+	return p;							\
 }
diff --git a/test/include/test/extent_hooks.h b/test/include/test/extent_hooks.h
index a664c433..96fee103 100644
--- a/test/include/test/extent_hooks.h
+++ b/test/include/test/extent_hooks.h
@@ -86,12 +86,12 @@ extent_alloc_hook(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
 	    "Wrong hook function");
 	called_alloc = true;
 	if (!try_alloc) {
-		return (NULL);
+		return NULL;
 	}
 	ret = default_hooks->alloc(default_hooks, new_addr, size, alignment,
 	    zero, commit, 0);
 	did_alloc = (ret != NULL);
-	return (ret);
+	return ret;
 }
 
 static bool
@@ -108,11 +108,11 @@ extent_dalloc_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	    "Wrong hook function");
 	called_dalloc = true;
 	if (!try_dalloc) {
-		return (true);
+		return true;
 	}
 	err = default_hooks->dalloc(default_hooks, addr, size, committed, 0);
 	did_dalloc = !err;
-	return (err);
+	return err;
 }
 
 static bool
@@ -129,12 +129,12 @@ extent_commit_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	    "Wrong hook function");
 	called_commit = true;
 	if (!try_commit) {
-		return (true);
+		return true;
 	}
 	err = default_hooks->commit(default_hooks, addr, size, offset, length,
 	    0);
 	did_commit = !err;
-	return (err);
+	return err;
 }
 
 static bool
@@ -151,12 +151,12 @@ extent_decommit_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	    "Wrong hook function");
 	called_decommit = true;
 	if (!try_decommit) {
-		return (true);
+		return true;
 	}
 	err = default_hooks->decommit(default_hooks, addr, size, offset, length,
 	    0);
 	did_decommit = !err;
-	return (err);
+	return err;
 }
 
 static bool
@@ -173,13 +173,13 @@ extent_purge_lazy_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	    "Wrong hook function");
 	called_purge_lazy = true;
 	if (!try_purge_lazy) {
-		return (true);
+		return true;
 	}
 	err = default_hooks->purge_lazy == NULL ||
 	    default_hooks->purge_lazy(default_hooks, addr, size, offset, length,
 	    0);
 	did_purge_lazy = !err;
-	return (err);
+	return err;
 }
 
 static bool
@@ -196,13 +196,13 @@ extent_purge_forced_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	    "Wrong hook function");
 	called_purge_forced = true;
 	if (!try_purge_forced) {
-		return (true);
+		return true;
 	}
 	err = default_hooks->purge_forced == NULL ||
 	    default_hooks->purge_forced(default_hooks, addr, size, offset,
 	    length, 0);
 	did_purge_forced = !err;
-	return (err);
+	return err;
 }
 
 static bool
@@ -220,13 +220,13 @@ extent_split_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	    "Wrong hook function");
 	called_split = true;
 	if (!try_split) {
-		return (true);
+		return true;
 	}
 	err = (default_hooks->split == NULL ||
 	    default_hooks->split(default_hooks, addr, size, size_a, size_b,
 	    committed, 0));
 	did_split = !err;
-	return (err);
+	return err;
 }
 
 static bool
@@ -244,13 +244,13 @@ extent_merge_hook(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
 	    "Wrong hook function");
 	called_merge = true;
 	if (!try_merge) {
-		return (true);
+		return true;
 	}
 	err = (default_hooks->merge == NULL ||
 	    default_hooks->merge(default_hooks, addr_a, size_a, addr_b, size_b,
 	    committed, 0));
 	did_merge = !err;
-	return (err);
+	return err;
 }
 
 static void
diff --git a/test/include/test/math.h b/test/include/test/math.h
index 08be69f8..94173bad 100644
--- a/test/include/test/math.h
+++ b/test/include/test/math.h
@@ -36,9 +36,9 @@ ln_gamma(double x) {
 
 	z = 1.0 / (x * x);
 
-	return (f + (x-0.5) * log(x) - x + 0.918938533204673 +
+	return f + (x-0.5) * log(x) - x + 0.918938533204673 +
 	    (((-0.000595238095238 * z + 0.000793650793651) * z -
-	    0.002777777777778) * z + 0.083333333333333) / x);
+	    0.002777777777778) * z + 0.083333333333333) / x;
 }
 
 /*
@@ -60,7 +60,7 @@ i_gamma(double x, double p, double ln_gamma_p) {
 	assert(x >= 0.0);
 
 	if (x == 0.0) {
-		return (0.0);
+		return 0.0;
 	}
 
 	acu = 1.0e-10;
@@ -80,7 +80,7 @@ i_gamma(double x, double p, double ln_gamma_p) {
 			gin += term;
 			if (term <= acu) {
 				gin *= factor / p;
-				return (gin);
+				return gin;
 			}
 		}
 	} else {
@@ -107,7 +107,7 @@ i_gamma(double x, double p, double ln_gamma_p) {
 				dif = fabs(gin - rn);
 				if (dif <= acu && dif <= acu * rn) {
 					gin = 1.0 - factor * gin;
-					return (gin);
+					return gin;
 				}
 				gin = rn;
 			}
@@ -144,7 +144,7 @@ pt_norm(double p) {
 	if (fabs(q) <= 0.425) {
 		/* p close to 1/2. */
 		r = 0.180625 - q * q;
-		return (q * (((((((2.5090809287301226727e3 * r +
+		return q * (((((((2.5090809287301226727e3 * r +
 		    3.3430575583588128105e4) * r + 6.7265770927008700853e4) * r
 		    + 4.5921953931549871457e4) * r + 1.3731693765509461125e4) *
 		    r + 1.9715909503065514427e3) * r + 1.3314166789178437745e2)
@@ -153,7 +153,7 @@ pt_norm(double p) {
 		    2.8729085735721942674e4) * r + 3.9307895800092710610e4) * r
 		    + 2.1213794301586595867e4) * r + 5.3941960214247511077e3) *
 		    r + 6.8718700749205790830e2) * r + 4.2313330701600911252e1)
-		    * r + 1.0));
+		    * r + 1.0);
 	} else {
 		if (q < 0.0) {
 			r = p;
@@ -204,7 +204,7 @@ pt_norm(double p) {
 		if (q < 0.0) {
 			ret = -ret;
 		}
-		return (ret);
+		return ret;
 	}
 }
 
@@ -240,7 +240,7 @@ pt_chi2(double p, double df, double ln_gamma_df_2) {
 		/* Starting approximation for small Chi^2. */
 		ch = pow(p * xx * exp(ln_gamma_df_2 + xx * aa), 1.0 / xx);
 		if (ch - e < 0.0) {
-			return (ch);
+			return ch;
 		}
 	} else {
 		if (df > 0.32) {
@@ -279,7 +279,7 @@ pt_chi2(double p, double df, double ln_gamma_df_2) {
 		q = ch;
 		p1 = 0.5 * ch;
 		if (p1 < 0.0) {
-			return (-1.0);
+			return -1.0;
 		}
 		p2 = p - i_gamma(p1, xx, ln_gamma_df_2);
 		t = p2 * exp(xx * aa + ln_gamma_df_2 + p1 - c * log(ch));
@@ -301,7 +301,7 @@ pt_chi2(double p, double df, double ln_gamma_df_2) {
 		}
 	}
 
-	return (ch);
+	return ch;
 }
 
 /*
@@ -311,6 +311,6 @@ pt_chi2(double p, double df, double ln_gamma_df_2) {
  */
 JEMALLOC_INLINE double
 pt_gamma(double p, double shape, double scale, double ln_gamma_shape) {
-	return (pt_chi2(p, shape * 2.0, ln_gamma_shape) * 0.5 * scale);
+	return pt_chi2(p, shape * 2.0, ln_gamma_shape) * 0.5 * scale;
 }
 #endif
diff --git a/test/include/test/mq.h b/test/include/test/mq.h
index fd66de95..8d9907ba 100644
--- a/test/include/test/mq.h
+++ b/test/include/test/mq.h
@@ -38,11 +38,11 @@ a_attr bool								\
 a_prefix##init(a_mq_type *mq) {						\
 									\
 	if (mtx_init(&mq->lock)) {					\
-		return (true);						\
+		return true;						\
 	}								\
 	ql_new(&mq->msgs);						\
 	mq->count = 0;							\
-	return (false);							\
+	return false;							\
 }									\
 a_attr void								\
 a_prefix##fini(a_mq_type *mq) {						\
@@ -55,7 +55,7 @@ a_prefix##count(a_mq_type *mq) {					\
 	mtx_lock(&mq->lock);						\
 	count = mq->count;						\
 	mtx_unlock(&mq->lock);						\
-	return (count);							\
+	return count;							\
 }									\
 a_attr a_mq_msg_type *							\
 a_prefix##tryget(a_mq_type *mq) {					\
@@ -68,7 +68,7 @@ a_prefix##tryget(a_mq_type *mq) {					\
 		mq->count--;						\
 	}								\
 	mtx_unlock(&mq->lock);						\
-	return (msg);							\
+	return msg;							\
 }									\
 a_attr a_mq_msg_type *							\
 a_prefix##get(a_mq_type *mq) {						\
@@ -77,7 +77,7 @@ a_prefix##get(a_mq_type *mq) {						\
 									\
 	msg = a_prefix##tryget(mq);					\
 	if (msg != NULL) {						\
-		return (msg);						\
+		return msg;						\
 	}								\
 									\
 	ns = 1;								\
@@ -85,7 +85,7 @@ a_prefix##get(a_mq_type *mq) {						\
 		mq_nanosleep(ns);					\
 		msg = a_prefix##tryget(mq);				\
 		if (msg != NULL) {					\
-			return (msg);					\
+			return msg;					\
 		}							\
 		if (ns < 1000*1000*1000) {				\
 			/* Double sleep time, up to max 1 second. */	\
diff --git a/test/integration/MALLOCX_ARENA.c b/test/integration/MALLOCX_ARENA.c
index f706e5a5..b2ec6584 100644
--- a/test/integration/MALLOCX_ARENA.c
+++ b/test/integration/MALLOCX_ARENA.c
@@ -41,7 +41,7 @@ thd_start(void *arg) {
 	assert_ptr_not_null(p, "Unexpected mallocx() error");
 	dallocx(p, 0);
 
-	return (NULL);
+	return NULL;
 }
 
 TEST_BEGIN(test_MALLOCX_ARENA) {
@@ -61,6 +61,6 @@ TEST_END
 
 int
 main(void) {
-	return (test(
-	    test_MALLOCX_ARENA));
+	return test(
+	    test_MALLOCX_ARENA);
 }
diff --git a/test/integration/aligned_alloc.c b/test/integration/aligned_alloc.c
index 8a3ad6b9..54b3bf24 100644
--- a/test/integration/aligned_alloc.c
+++ b/test/integration/aligned_alloc.c
@@ -126,8 +126,8 @@ TEST_END
 
 int
 main(void) {
-	return (test(
+	return test(
 	    test_alignment_errors,
 	    test_oom_errors,
-	    test_alignment_and_size));
+	    test_alignment_and_size);
 }
diff --git a/test/integration/allocated.c b/test/integration/allocated.c
index 555d40a9..1425fd0a 100644
--- a/test/integration/allocated.c
+++ b/test/integration/allocated.c
@@ -91,12 +91,12 @@ thd_start(void *arg) {
 	    "Deallocated memory counter should increase by at least the amount "
 	    "explicitly deallocated");
 
-	return (NULL);
+	return NULL;
 label_ENOENT:
 	assert_false(config_stats,
 	    "ENOENT should only be returned if stats are disabled");
 	test_skip("\"thread.allocated\" mallctl not available");
-	return (NULL);
+	return NULL;
 }
 
 TEST_BEGIN(test_main_thread) {
@@ -115,10 +115,10 @@ TEST_END
 int
 main(void) {
 	/* Run tests multiple times to check for bad interactions. */
-	return (test(
+	return test(
 	    test_main_thread,
 	    test_subthread,
 	    test_main_thread,
 	    test_subthread,
-	    test_main_thread));
+	    test_main_thread);
 }
diff --git a/test/integration/cpp/basic.cpp b/test/integration/cpp/basic.cpp
index fe8874fa..65890ecd 100644
--- a/test/integration/cpp/basic.cpp
+++ b/test/integration/cpp/basic.cpp
@@ -20,6 +20,6 @@ TEST_END
 
 int
 main() {
-	return (test(
-	    test_basic));
+	return test(
+	    test_basic);
 }
diff --git a/test/integration/extent.c b/test/integration/extent.c
index d12c123c..08792df3 100644
--- a/test/integration/extent.c
+++ b/test/integration/extent.c
@@ -174,7 +174,7 @@ TEST_END
 
 int
 main(void) {
-	return (test(
+	return test(
 	    test_extent_manual_hook,
-	    test_extent_auto_hook));
+	    test_extent_auto_hook);
 }
diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index ec04c399..26076be4 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -13,12 +13,12 @@ get_nsizes_impl(const char *cmd) {
 	assert_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
 	    "Unexpected mallctl(\"%s\", ...) failure", cmd);
 
-	return (ret);
+	return ret;
 }
 
 static unsigned
 get_nlarge(void) {
-	return (get_nsizes_impl("arenas.nlextents"));
+	return get_nsizes_impl("arenas.nlextents");
 }
 
 static size_t
@@ -36,12 +36,12 @@ get_size_impl(const char *cmd, size_t ind) {
 	assert_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
 	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
 
-	return (ret);
+	return ret;
 }
 
 static size_t
 get_large_size(size_t ind) {
-	return (get_size_impl("arenas.lextent.0.size", ind));
+	return get_size_impl("arenas.lextent.0.size", ind);
 }
 
 /*
@@ -216,9 +216,9 @@ TEST_END
 
 int
 main(void) {
-	return (test(
+	return test(
 	    test_overflow,
 	    test_oom,
 	    test_basic,
-	    test_alignment_and_size));
+	    test_alignment_and_size);
 }
diff --git a/test/integration/overflow.c b/test/integration/overflow.c
index a7f4b515..6a9785b2 100644
--- a/test/integration/overflow.c
+++ b/test/integration/overflow.c
@@ -41,6 +41,6 @@ TEST_END
 
 int
 main(void) {
-	return (test(
-	    test_overflow));
+	return test(
+	    test_overflow);
 }
diff --git a/test/integration/posix_memalign.c b/test/integration/posix_memalign.c
index 6bbf1839..97b9216a 100644
--- a/test/integration/posix_memalign.c
+++ b/test/integration/posix_memalign.c
@@ -120,8 +120,8 @@ TEST_END
 
 int
 main(void) {
-	return (test(
+	return test(
 	    test_alignment_errors,
 	    test_oom_errors,
-	    test_alignment_and_size));
+	    test_alignment_and_size);
 }
diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c
index 176b9957..7c0f9c5f 100644
--- a/test/integration/rallocx.c
+++ b/test/integration/rallocx.c
@@ -9,12 +9,12 @@ get_nsizes_impl(const char *cmd) {
 	assert_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
 	    "Unexpected mallctl(\"%s\", ...) failure", cmd);
 
-	return (ret);
+	return ret;
 }
 
 static unsigned
 get_nlarge(void) {
-	return (get_nsizes_impl("arenas.nlextents"));
+	return get_nsizes_impl("arenas.nlextents");
 }
 
 static size_t
@@ -32,12 +32,12 @@ get_size_impl(const char *cmd, size_t ind) {
 	assert_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
 	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
 
-	return (ret);
+	return ret;
 }
 
 static size_t
 get_large_size(size_t ind) {
-	return (get_size_impl("arenas.lextent.0.size", ind));
+	return get_size_impl("arenas.lextent.0.size", ind);
 }
 
 TEST_BEGIN(test_grow_and_shrink) {
@@ -100,7 +100,7 @@ validate_fill(const void *p, uint8_t c, size_t offset, size_t len) {
 		}
 	}
 
-	return (ret);
+	return ret;
 }
 
 TEST_BEGIN(test_zero) {
@@ -236,10 +236,10 @@ TEST_END
 
 int
 main(void) {
-	return (test(
+	return test(
 	    test_grow_and_shrink,
 	    test_zero,
 	    test_align,
 	    test_lg_align_and_zero,
-	    test_overflow));
+	    test_overflow);
 }
diff --git a/test/integration/sdallocx.c b/test/integration/sdallocx.c
index bf2fd2c0..f7b42949 100644
--- a/test/integration/sdallocx.c
+++ b/test/integration/sdallocx.c
@@ -49,7 +49,7 @@ TEST_END
 
 int
 main(void) {
-	return (test(
+	return test(
 	    test_basic,
-	    test_alignment_and_size));
+	    test_alignment_and_size);
 }
diff --git a/test/integration/thread_arena.c b/test/integration/thread_arena.c
index 5adb5ce0..d9dc170d 100644
--- a/test/integration/thread_arena.c
+++ b/test/integration/thread_arena.c
@@ -34,7 +34,7 @@ thd_start(void *arg) {
 	assert_u_eq(arena_ind, main_arena_ind,
 	    "Arena index should be same as for main thread");
 
-	return (NULL);
+	return NULL;
 }
 
 TEST_BEGIN(test_thread_arena) {
@@ -72,6 +72,6 @@ TEST_END
 
 int
 main(void) {
-	return (test(
-	    test_thread_arena));
+	return test(
+	    test_thread_arena);
 }
diff --git a/test/integration/thread_tcache_enabled.c b/test/integration/thread_tcache_enabled.c
index 117d06bf..a0ba56b4 100644
--- a/test/integration/thread_tcache_enabled.c
+++ b/test/integration/thread_tcache_enabled.c
@@ -77,10 +77,10 @@ thd_start(void *arg) {
 	assert_false(e0, "tcache should be disabled");
 
 	free(malloc(1));
-	return (NULL);
+	return NULL;
 label_ENOENT:
 	test_skip("\"thread.tcache.enabled\" mallctl not available");
-	return (NULL);
+	return NULL;
 }
 
 TEST_BEGIN(test_main_thread) {
@@ -99,10 +99,10 @@ TEST_END
 int
 main(void) {
 	/* Run tests multiple times to check for bad interactions. */
-	return (test(
+	return test(
 	    test_main_thread,
 	    test_subthread,
 	    test_main_thread,
 	    test_subthread,
-	    test_main_thread));
+	    test_main_thread);
 }
diff --git a/test/integration/xallocx.c b/test/integration/xallocx.c
index 9b4b68e0..158f7ee9 100644
--- a/test/integration/xallocx.c
+++ b/test/integration/xallocx.c
@@ -19,7 +19,7 @@ arena_ind(void) {
 		    0), 0, "Unexpected mallctl failure creating arena");
 	}
 
-	return (ind);
+	return ind;
 }
 
 TEST_BEGIN(test_same_size) {
@@ -76,17 +76,17 @@ get_nsizes_impl(const char *cmd) {
 	assert_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
 	    "Unexpected mallctl(\"%s\", ...) failure", cmd);
 
-	return (ret);
+	return ret;
 }
 
 static unsigned
 get_nsmall(void) {
-	return (get_nsizes_impl("arenas.nbins"));
+	return get_nsizes_impl("arenas.nbins");
 }
 
 static unsigned
 get_nlarge(void) {
-	return (get_nsizes_impl("arenas.nlextents"));
+	return get_nsizes_impl("arenas.nlextents");
 }
 
 static size_t
@@ -104,17 +104,17 @@ get_size_impl(const char *cmd, size_t ind) {
 	assert_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
 	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
 
-	return (ret);
+	return ret;
 }
 
 static size_t
 get_small_size(size_t ind) {
-	return (get_size_impl("arenas.bin.0.size", ind));
+	return get_size_impl("arenas.bin.0.size", ind);
 }
 
 static size_t
 get_large_size(size_t ind) {
-	return (get_size_impl("arenas.lextent.0.size", ind));
+	return get_size_impl("arenas.lextent.0.size", ind);
 }
 
 TEST_BEGIN(test_size) {
@@ -312,7 +312,7 @@ validate_fill(const void *p, uint8_t c, size_t offset, size_t len) {
 		print_filled_extents(p, c, offset + len);
 	}
 
-	return (err);
+	return err;
 }
 
 static void
@@ -376,7 +376,7 @@ TEST_END
 
 int
 main(void) {
-	return (test(
+	return test(
 	    test_same_size,
 	    test_extra_no_move,
 	    test_no_move_fail,
@@ -384,5 +384,5 @@ main(void) {
 	    test_size_extra_overflow,
 	    test_extra_small,
 	    test_extra_large,
-	    test_zero_large));
+	    test_zero_large);
 }
diff --git a/test/src/btalloc.c b/test/src/btalloc.c
index bc31f9b8..d570952c 100644
--- a/test/src/btalloc.c
+++ b/test/src/btalloc.c
@@ -2,5 +2,5 @@
 
 void *
 btalloc(size_t size, unsigned bits) {
-	return (btalloc_0(size, bits));
+	return btalloc_0(size, bits);
 }
diff --git a/test/src/mtx.c b/test/src/mtx.c
index 924ba287..b691b482 100644
--- a/test/src/mtx.c
+++ b/test/src/mtx.c
@@ -9,7 +9,7 @@ mtx_init(mtx_t *mtx) {
 #ifdef _WIN32
 	if (!InitializeCriticalSectionAndSpinCount(&mtx->lock,
 	    _CRT_SPINCOUNT)) {
-		return (true);
+		return true;
 	}
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
 	mtx->lock = OS_UNFAIR_LOCK_INIT;
@@ -19,16 +19,16 @@ mtx_init(mtx_t *mtx) {
 	pthread_mutexattr_t attr;
 
 	if (pthread_mutexattr_init(&attr) != 0) {
-		return (true);
+		return true;
 	}
 	pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_DEFAULT);
 	if (pthread_mutex_init(&mtx->lock, &attr) != 0) {
 		pthread_mutexattr_destroy(&attr);
-		return (true);
+		return true;
 	}
 	pthread_mutexattr_destroy(&attr);
 #endif
-	return (false);
+	return false;
 }
 
 void
diff --git a/test/src/test.c b/test/src/test.c
index 1155326b..c5101d4e 100644
--- a/test/src/test.c
+++ b/test/src/test.c
@@ -65,7 +65,7 @@ p_test_impl(bool do_malloc_init, test_t *t, va_list ap) {
 		 */
 		if (nallocx(1, 0) == 0) {
 			malloc_printf("Initialization error");
-			return (test_status_fail);
+			return test_status_fail;
 		}
 	}
 
@@ -85,7 +85,7 @@ p_test_impl(bool do_malloc_init, test_t *t, va_list ap) {
 	    test_status_string(test_status_fail),
 	    test_counts[test_status_fail], test_count);
 
-	return (ret);
+	return ret;
 }
 
 test_status_t
@@ -98,7 +98,7 @@ p_test(test_t *t, ...) {
 	ret = p_test_impl(true, t, ap);
 	va_end(ap);
 
-	return (ret);
+	return ret;
 }
 
 test_status_t
@@ -111,7 +111,7 @@ p_test_no_malloc_init(test_t *t, ...) {
 	ret = p_test_impl(false, t, ap);
 	va_end(ap);
 
-	return (ret);
+	return ret;
 }
 
 void
diff --git a/test/src/timer.c b/test/src/timer.c
index 1b186332..c451c639 100644
--- a/test/src/timer.c
+++ b/test/src/timer.c
@@ -18,7 +18,7 @@ timer_usec(const timedelta_t *timer) {
 
 	nstime_copy(&delta, &timer->t1);
 	nstime_subtract(&delta, &timer->t0);
-	return (nstime_ns(&delta) / 1000);
+	return nstime_ns(&delta) / 1000;
 }
 
 void
diff --git a/test/stress/microbench.c b/test/stress/microbench.c
index 3b7e9660..6ed15001 100644
--- a/test/stress/microbench.c
+++ b/test/stress/microbench.c
@@ -156,10 +156,10 @@ TEST_END
 
 int
 main(void) {
-	return (test(
+	return test(
 	    test_malloc_vs_mallocx,
 	    test_free_vs_dallocx,
 	    test_dallocx_vs_sdallocx,
 	    test_mus_vs_sallocx,
-	    test_sallocx_vs_nallocx));
+	    test_sallocx_vs_nallocx);
 }
diff --git a/test/unit/SFMT.c b/test/unit/SFMT.c
index b1bcf3d3..b5730d63 100644
--- a/test/unit/SFMT.c
+++ b/test/unit/SFMT.c
@@ -1591,9 +1591,9 @@ TEST_END
 
 int
 main(void) {
-	return (test(
+	return test(
 	    test_gen_rand_32,
 	    test_by_array_32,
 	    test_gen_rand_64,
-	    test_by_array_64));
+	    test_by_array_64);
 }
diff --git a/test/unit/a0.c b/test/unit/a0.c
index c7ce8cfb..a27ab3f4 100644
--- a/test/unit/a0.c
+++ b/test/unit/a0.c
@@ -11,6 +11,6 @@ TEST_END
 
 int
 main(void) {
-	return (test_no_malloc_init(
-	    test_a0));
+	return test_no_malloc_init(
+	    test_a0);
 }
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index 710aaf53..3d74e37a 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -13,17 +13,17 @@ get_nsizes_impl(const char *cmd) {
 	assert_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
 	    "Unexpected mallctl(\"%s\", ...) failure", cmd);
 
-	return (ret);
+	return ret;
 }
 
 static unsigned
 get_nsmall(void) {
-	return (get_nsizes_impl("arenas.nbins"));
+	return get_nsizes_impl("arenas.nbins");
 }
 
 static unsigned
 get_nlarge(void) {
-	return (get_nsizes_impl("arenas.nlextents"));
+	return get_nsizes_impl("arenas.nlextents");
 }
 
 static size_t
@@ -41,17 +41,17 @@ get_size_impl(const char *cmd, size_t ind) {
 	assert_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
 	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
 
-	return (ret);
+	return ret;
 }
 
 static size_t
 get_small_size(size_t ind) {
-	return (get_size_impl("arenas.bin.0.size", ind));
+	return get_size_impl("arenas.bin.0.size", ind);
 }
 
 static size_t
 get_large_size(size_t ind) {
-	return (get_size_impl("arenas.lextent.0.size", ind));
+	return get_size_impl("arenas.lextent.0.size", ind);
 }
 
 /* Like ivsalloc(), but safe to call on discarded allocations. */
@@ -61,13 +61,13 @@ vsalloc(tsdn_t *tsdn, const void *ptr) {
 
 	extent = extent_lookup(tsdn, ptr, false);
 	if (extent == NULL) {
-		return (0);
+		return 0;
 	}
 	if (!extent_active_get(extent)) {
-		return (0);
+		return 0;
 	}
 
-	return (isalloc(tsdn, extent, ptr));
+	return isalloc(tsdn, extent, ptr);
 }
 
 static unsigned
@@ -77,7 +77,7 @@ do_arena_create(extent_hooks_t *h) {
 	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
 	    (void *)(h != NULL ? &h : NULL), (h != NULL ? sizeof(h) : 0)), 0,
 	    "Unexpected mallctl() failure");
-	return (arena_ind);
+	return arena_ind;
 }
 
 static void
@@ -190,7 +190,7 @@ arena_i_initialized(unsigned arena_ind, bool refresh) {
 	assert_d_eq(mallctlbymib(mib, miblen, (void *)&initialized, &sz, NULL,
 	    0), 0, "Unexpected mallctlbymib() failure");
 
-	return (initialized);
+	return initialized;
 }
 
 TEST_BEGIN(test_arena_destroy_initial) {
@@ -255,11 +255,11 @@ extent_dalloc_unmap(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	    "Wrong hook function");
 	called_dalloc = true;
 	if (!try_dalloc) {
-		return (true);
+		return true;
 	}
 	pages_unmap(addr, size);
 	did_dalloc = true;
-	return (false);
+	return false;
 }
 
 static extent_hooks_t hooks_orig;
@@ -313,9 +313,9 @@ TEST_END
 
 int
 main(void) {
-	return (test(
+	return test(
 	    test_arena_reset,
 	    test_arena_destroy_initial,
 	    test_arena_destroy_hooks_default,
-	    test_arena_destroy_hooks_unmap));
+	    test_arena_destroy_hooks_unmap);
 }
diff --git a/test/unit/atomic.c b/test/unit/atomic.c
index 3e36acd1..97ec7eb9 100644
--- a/test/unit/atomic.c
+++ b/test/unit/atomic.c
@@ -101,10 +101,10 @@ TEST_END
 
 int
 main(void) {
-	return (test(
+	return test(
 	    test_atomic_u64,
 	    test_atomic_u32,
 	    test_atomic_p,
 	    test_atomic_zu,
-	    test_atomic_u));
+	    test_atomic_u);
 }
diff --git a/test/unit/base.c b/test/unit/base.c
index 65cf980b..87116a3c 100644
--- a/test/unit/base.c
+++ b/test/unit/base.c
@@ -212,8 +212,8 @@ TEST_END
 
 int
 main(void) {
-	return (test(
+	return test(
 	    test_base_hooks_default,
 	    test_base_hooks_null,
-	    test_base_hooks_not_null));
+	    test_base_hooks_not_null);
 }
diff --git a/test/unit/bitmap.c b/test/unit/bitmap.c
index 6dfa72f2..e91f0928 100644
--- a/test/unit/bitmap.c
+++ b/test/unit/bitmap.c
@@ -143,7 +143,7 @@ test_bitmap_size_body(const bitmap_info_t *binfo, size_t nbits,
 	assert_zu_ge(size, (nbits >> 3),
 	    "Bitmap size is smaller than expected");
 	assert_zu_ge(size, prev_size, "Bitmap size is smaller than expected");
-	return (size);
+	return size;
 }
 
 TEST_BEGIN(test_bitmap_size) {
@@ -329,11 +329,11 @@ TEST_END
 
 int
 main(void) {
-	return (test(
+	return test(
 	    test_bitmap_initializer,
 	    test_bitmap_size,
 	    test_bitmap_init,
 	    test_bitmap_set,
 	    test_bitmap_unset,
-	    test_bitmap_sfu));
+	    test_bitmap_sfu);
 }
diff --git a/test/unit/ckh.c b/test/unit/ckh.c
index 0638cb33..842ae29b 100644
--- a/test/unit/ckh.c
+++ b/test/unit/ckh.c
@@ -204,8 +204,8 @@ TEST_END
 
 int
 main(void) {
-	return (test(
+	return test(
 	    test_new_delete,
 	    test_count_insert_search_remove,
-	    test_insert_iter_remove));
+	    test_insert_iter_remove);
 }
diff --git a/test/unit/decay.c b/test/unit/decay.c
index d6334cd2..83c9f49e 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -11,7 +11,7 @@ static bool monotonic_mock;
 
 static bool
 nstime_monotonic_mock(void) {
-	return (monotonic_mock);
+	return monotonic_mock;
 }
 
 static bool
@@ -20,7 +20,7 @@ nstime_update_mock(nstime_t *time) {
 	if (monotonic_mock) {
 		nstime_copy(time, &time_mock);
 	}
-	return (!monotonic_mock);
+	return !monotonic_mock;
 }
 
 TEST_BEGIN(test_decay_ticks) {
@@ -352,8 +352,8 @@ TEST_END
 
 int
 main(void) {
-	return (test(
+	return test(
 	    test_decay_ticks,
 	    test_decay_ticker,
-	    test_decay_nonmonotonic));
+	    test_decay_nonmonotonic);
 }
diff --git a/test/unit/extent_quantize.c b/test/unit/extent_quantize.c
index 343d1d8f..52af7a3d 100644
--- a/test/unit/extent_quantize.c
+++ b/test/unit/extent_quantize.c
@@ -134,8 +134,8 @@ TEST_END
 
 int
 main(void) {
-	return (test(
+	return test(
 	    test_small_extent_size,
 	    test_large_extent_size,
-	    test_monotonic));
+	    test_monotonic);
 }
diff --git a/test/unit/fork.c b/test/unit/fork.c
index 4880328e..96b1c5a0 100644
--- a/test/unit/fork.c
+++ b/test/unit/fork.c
@@ -57,6 +57,6 @@ TEST_END
 
 int
 main(void) {
-	return (test(
-	    test_fork));
+	return test(
+	    test_fork);
 }
diff --git a/test/unit/hash.c b/test/unit/hash.c
index 977d058f..0204cdad 100644
--- a/test/unit/hash.c
+++ b/test/unit/hash.c
@@ -38,9 +38,9 @@ typedef enum {
 static int
 hash_variant_bits(hash_variant_t variant) {
 	switch (variant) {
-	case hash_variant_x86_32: return (32);
-	case hash_variant_x86_128: return (128);
-	case hash_variant_x64_128: return (128);
+	case hash_variant_x86_32: return 32;
+	case hash_variant_x86_128: return 128;
+	case hash_variant_x64_128: return 128;
 	default: not_reached();
 	}
 }
@@ -48,9 +48,9 @@ hash_variant_bits(hash_variant_t variant) {
 static const char *
 hash_variant_string(hash_variant_t variant) {
 	switch (variant) {
-	case hash_variant_x86_32: return ("hash_x86_32");
-	case hash_variant_x86_128: return ("hash_x86_128");
-	case hash_variant_x64_128: return ("hash_x64_128");
+	case hash_variant_x86_32: return "hash_x86_32";
+	case hash_variant_x86_128: return "hash_x86_128";
+	case hash_variant_x64_128: return "hash_x64_128";
 	default: not_reached();
 	}
 }
@@ -165,8 +165,8 @@ TEST_END
 
 int
 main(void) {
-	return (test(
+	return test(
 	    test_hash_x86_32,
 	    test_hash_x86_128,
-	    test_hash_x64_128));
+	    test_hash_x64_128);
 }
diff --git a/test/unit/junk.c b/test/unit/junk.c
index 02f0726d..86c51089 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -138,7 +138,7 @@ TEST_END
 
 int
 main(void) {
-	return (test(
+	return test(
 	    test_junk_small,
-	    test_junk_large));
+	    test_junk_large);
 }
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index a116894b..c531a06a 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -621,7 +621,7 @@ TEST_END
 
 int
 main(void) {
-	return (test(
+	return test(
 	    test_mallctl_errors,
 	    test_mallctlnametomib_errors,
 	    test_mallctlbymib_errors,
@@ -643,5 +643,5 @@ main(void) {
 	    test_arenas_bin_constants,
 	    test_arenas_lextent_constants,
 	    test_arenas_create,
-	    test_stats_arenas));
+	    test_stats_arenas);
 }
diff --git a/test/unit/math.c b/test/unit/math.c
index 15fc7d54..d2cf16dd 100644
--- a/test/unit/math.c
+++ b/test/unit/math.c
@@ -18,7 +18,7 @@ double_eq_rel(double a, double b, double max_rel_err, double max_abs_err) {
 	double rel_err;
 
 	if (fabs(a - b) < max_abs_err) {
-		return (true);
+		return true;
 	}
 	rel_err = (fabs(b) > fabs(a)) ? fabs((a-b)/b) : fabs((a-b)/a);
 	return (rel_err < max_rel_err);
@@ -33,7 +33,7 @@ factorial(unsigned x) {
 		ret *= (uint64_t)i;
 	}
 
-	return (ret);
+	return ret;
 }
 
 TEST_BEGIN(test_ln_gamma_factorial) {
@@ -380,11 +380,11 @@ TEST_END
 
 int
 main(void) {
-	return (test(
+	return test(
 	    test_ln_gamma_factorial,
 	    test_ln_gamma_misc,
 	    test_pt_norm,
 	    test_pt_chi2,
 	    test_pt_gamma_shape,
-	    test_pt_gamma_scale));
+	    test_pt_gamma_scale);
 }
diff --git a/test/unit/mq.c b/test/unit/mq.c
index 95c9c500..fe17943e 100644
--- a/test/unit/mq.c
+++ b/test/unit/mq.c
@@ -39,7 +39,7 @@ thd_receiver_start(void *arg) {
 		assert_ptr_not_null(msg, "mq_get() should never return NULL");
 		dallocx(msg, 0);
 	}
-	return (NULL);
+	return NULL;
 }
 
 static void *
@@ -55,7 +55,7 @@ thd_sender_start(void *arg) {
 		msg = (mq_msg_t *)p;
 		mq_put(mq, msg);
 	}
-	return (NULL);
+	return NULL;
 }
 
 TEST_BEGIN(test_mq_threaded) {
@@ -82,8 +82,8 @@ TEST_END
 
 int
 main(void) {
-	return (test(
+	return test(
 	    test_mq_basic,
-	    test_mq_threaded));
+	    test_mq_threaded);
 }
 
diff --git a/test/unit/mtx.c b/test/unit/mtx.c
index 0813a699..23740ce1 100644
--- a/test/unit/mtx.c
+++ b/test/unit/mtx.c
@@ -28,7 +28,7 @@ thd_start(void *varg) {
 		arg->x++;
 		mtx_unlock(&arg->mtx);
 	}
-	return (NULL);
+	return NULL;
 }
 
 TEST_BEGIN(test_mtx_race) {
@@ -51,7 +51,7 @@ TEST_END
 
 int
 main(void) {
-	return (test(
+	return test(
 	    test_mtx_basic,
-	    test_mtx_race));
+	    test_mtx_race);
 }
diff --git a/test/unit/nstime.c b/test/unit/nstime.c
index f628a8f3..f7f1bdfd 100644
--- a/test/unit/nstime.c
+++ b/test/unit/nstime.c
@@ -198,7 +198,7 @@ TEST_END
 
 int
 main(void) {
-	return (test(
+	return test(
 	    test_nstime_init,
 	    test_nstime_init2,
 	    test_nstime_copy,
@@ -209,5 +209,5 @@ main(void) {
 	    test_nstime_idivide,
 	    test_nstime_divide,
 	    test_nstime_monotonic,
-	    test_nstime_update));
+	    test_nstime_update);
 }
diff --git a/test/unit/pack.c b/test/unit/pack.c
index 9237ba2e..3edd405d 100644
--- a/test/unit/pack.c
+++ b/test/unit/pack.c
@@ -41,12 +41,12 @@ binind_compute(void) {
 		assert_d_eq(mallctlbymib(mib, miblen, (void *)&size, &sz, NULL,
 		    0), 0, "Unexpected mallctlbymib failure");
 		if (size == SZ) {
-			return (i);
+			return i;
 		}
 	}
 
 	test_fail("Unable to compute nregs_per_run");
-	return (0);
+	return 0;
 }
 
 static size_t
@@ -63,7 +63,7 @@ nregs_per_run_compute(void) {
 	sz = sizeof(nregs);
 	assert_d_eq(mallctlbymib(mib, miblen, (void *)&nregs, &sz, NULL,
 	    0), 0, "Unexpected mallctlbymib failure");
-	return (nregs);
+	return nregs;
 }
 
 static unsigned
@@ -75,7 +75,7 @@ arenas_create_mallctl(void) {
 	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
 	    0, "Error in arenas.create");
 
-	return (arena_ind);
+	return arena_ind;
 }
 
 static void
@@ -158,6 +158,6 @@ TEST_END
 
 int
 main(void) {
-	return (test(
-	    test_pack));
+	return test(
+	    test_pack);
 }
diff --git a/test/unit/pages.c b/test/unit/pages.c
index b6092de0..30d69592 100644
--- a/test/unit/pages.c
+++ b/test/unit/pages.c
@@ -22,6 +22,6 @@ TEST_END
 
 int
 main(void) {
-	return (test(
-	    test_pages_huge));
+	return test(
+	    test_pages_huge);
 }
diff --git a/test/unit/ph.c b/test/unit/ph.c
index e49a0e78..5f3c5a45 100644
--- a/test/unit/ph.c
+++ b/test/unit/ph.c
@@ -22,7 +22,7 @@ node_cmp(const node_t *a, const node_t *b) {
 		ret = (((uintptr_t)a) > ((uintptr_t)b))
 		    - (((uintptr_t)a) < ((uintptr_t)b));
 	}
-	return (ret);
+	return ret;
 }
 
 static int
@@ -31,7 +31,7 @@ node_cmp_magic(const node_t *a, const node_t *b) {
 	assert_u32_eq(a->magic, NODE_MAGIC, "Bad magic");
 	assert_u32_eq(b->magic, NODE_MAGIC, "Bad magic");
 
-	return (node_cmp(a, b));
+	return node_cmp(a, b);
 }
 
 typedef ph(node_t) heap_t;
@@ -94,7 +94,7 @@ node_validate(const node_t *node, const node_t *parent) {
 
 	leftmost_child = phn_lchild_get(node_t, link, node);
 	if (leftmost_child == NULL) {
-		return (nnodes);
+		return nnodes;
 	}
 	assert_ptr_eq((void *)phn_prev_get(node_t, link, leftmost_child),
 	    (void *)node, "Leftmost child does not link to node");
@@ -107,7 +107,7 @@ node_validate(const node_t *node, const node_t *parent) {
 		    "sibling's prev doesn't link to sibling");
 		nnodes += node_validate(sibling, node);
 	}
-	return (nnodes);
+	return nnodes;
 }
 
 static unsigned
@@ -133,7 +133,7 @@ label_return:
 	if (false) {
 		heap_print(heap);
 	}
-	return (nnodes);
+	return nnodes;
 }
 
 TEST_BEGIN(test_ph_empty) {
@@ -156,7 +156,7 @@ static node_t *
 node_remove_first(heap_t *heap) {
 	node_t *node = heap_remove_first(heap);
 	node->magic = 0;
-	return (node);
+	return node;
 }
 
 TEST_BEGIN(test_ph_random) {
@@ -281,7 +281,7 @@ TEST_END
 
 int
 main(void) {
-	return (test(
+	return test(
 	    test_ph_empty,
-	    test_ph_random));
+	    test_ph_random);
 }
diff --git a/test/unit/prng.c b/test/unit/prng.c
index b26da36e..cbccb8a0 100644
--- a/test/unit/prng.c
+++ b/test/unit/prng.c
@@ -221,7 +221,7 @@ TEST_END
 
 int
 main(void) {
-	return (test(
+	return test(
 	    test_prng_lg_range_u32_nonatomic,
 	    test_prng_lg_range_u32_atomic,
 	    test_prng_lg_range_u64_nonatomic,
@@ -231,5 +231,5 @@ main(void) {
 	    test_prng_range_u32_atomic,
 	    test_prng_range_u64_nonatomic,
 	    test_prng_range_zu_nonatomic,
-	    test_prng_range_zu_atomic));
+	    test_prng_range_zu_atomic);
 }
diff --git a/test/unit/prof_accum.c b/test/unit/prof_accum.c
index bed0c9a6..ad7a3eaa 100644
--- a/test/unit/prof_accum.c
+++ b/test/unit/prof_accum.c
@@ -17,12 +17,12 @@ prof_dump_open_intercept(bool propagate_err, const char *filename) {
 	fd = open("/dev/null", O_WRONLY);
 	assert_d_ne(fd, -1, "Unexpected open() failure");
 
-	return (fd);
+	return fd;
 }
 
 static void *
 alloc_from_permuted_backtrace(unsigned thd_ind, unsigned iteration) {
-	return (btalloc(1, thd_ind*NALLOCS_PER_THREAD + iteration));
+	return btalloc(1, thd_ind*NALLOCS_PER_THREAD + iteration);
 }
 
 static void *
@@ -51,7 +51,7 @@ thd_start(void *varg) {
 		}
 	}
 
-	return (NULL);
+	return NULL;
 }
 
 TEST_BEGIN(test_idump) {
@@ -81,6 +81,6 @@ TEST_END
 
 int
 main(void) {
-	return (test(
-	    test_idump));
+	return test(
+	    test_idump);
 }
diff --git a/test/unit/prof_active.c b/test/unit/prof_active.c
index 422024f1..9bcb3e3b 100644
--- a/test/unit/prof_active.c
+++ b/test/unit/prof_active.c
@@ -117,6 +117,6 @@ TEST_END
 
 int
 main(void) {
-	return (test(
-	    test_prof_active));
+	return test(
+	    test_prof_active);
 }
diff --git a/test/unit/prof_gdump.c b/test/unit/prof_gdump.c
index 0d8ec71c..30320b7a 100644
--- a/test/unit/prof_gdump.c
+++ b/test/unit/prof_gdump.c
@@ -15,7 +15,7 @@ prof_dump_open_intercept(bool propagate_err, const char *filename) {
 	fd = open("/dev/null", O_WRONLY);
 	assert_d_ne(fd, -1, "Unexpected open() failure");
 
-	return (fd);
+	return fd;
 }
 
 TEST_BEGIN(test_gdump) {
@@ -73,6 +73,6 @@ TEST_END
 
 int
 main(void) {
-	return (test(
-	    test_gdump));
+	return test(
+	    test_gdump);
 }
diff --git a/test/unit/prof_idump.c b/test/unit/prof_idump.c
index 393211ea..1fed7b37 100644
--- a/test/unit/prof_idump.c
+++ b/test/unit/prof_idump.c
@@ -24,7 +24,7 @@ prof_dump_open_intercept(bool propagate_err, const char *filename) {
 	fd = open("/dev/null", O_WRONLY);
 	assert_d_ne(fd, -1, "Unexpected open() failure");
 
-	return (fd);
+	return fd;
 }
 
 TEST_BEGIN(test_idump) {
@@ -50,6 +50,6 @@ TEST_END
 
 int
 main(void) {
-	return (test(
-	    test_idump));
+	return test(
+	    test_idump);
 }
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
index 463f6893..c2bb50d6 100644
--- a/test/unit/prof_reset.c
+++ b/test/unit/prof_reset.c
@@ -12,7 +12,7 @@ prof_dump_open_intercept(bool propagate_err, const char *filename) {
 	fd = open("/dev/null", O_WRONLY);
 	assert_d_ne(fd, -1, "Unexpected open() failure");
 
-	return (fd);
+	return fd;
 }
 
 static void
@@ -29,7 +29,7 @@ get_lg_prof_sample(void) {
 	assert_d_eq(mallctl("prof.lg_sample", (void *)&lg_prof_sample, &sz,
 	    NULL, 0), 0,
 	    "Unexpected mallctl failure while reading profiling sample rate");
-	return (lg_prof_sample);
+	return lg_prof_sample;
 }
 
 static void
@@ -94,7 +94,7 @@ prof_dump_header_intercept(tsdn_t *tsdn, bool propagate_err,
 	prof_dump_header_intercepted = true;
 	memcpy(&cnt_all_copy, cnt_all, sizeof(prof_cnt_t));
 
-	return (false);
+	return false;
 }
 
 TEST_BEGIN(test_prof_reset_cleanup) {
@@ -181,7 +181,7 @@ thd_start(void *varg) {
 		}
 	}
 
-	return (NULL);
+	return NULL;
 }
 
 TEST_BEGIN(test_prof_reset) {
@@ -283,9 +283,9 @@ main(void) {
 	/* Intercept dumping prior to running any tests. */
 	prof_dump_open = prof_dump_open_intercept;
 
-	return (test(
+	return test(
 	    test_prof_reset_basic,
 	    test_prof_reset_cleanup,
 	    test_prof_reset,
-	    test_xallocx));
+	    test_xallocx);
 }
diff --git a/test/unit/prof_thread_name.c b/test/unit/prof_thread_name.c
index ba86e10e..bcf85f89 100644
--- a/test/unit/prof_thread_name.c
+++ b/test/unit/prof_thread_name.c
@@ -94,7 +94,7 @@ thd_start(void *varg) {
 	mallctl_thread_name_set(thread_name);
 	mallctl_thread_name_set("");
 
-	return (NULL);
+	return NULL;
 }
 
 TEST_BEGIN(test_prof_thread_name_threaded) {
@@ -118,7 +118,7 @@ TEST_END
 
 int
 main(void) {
-	return (test(
+	return test(
 	    test_prof_thread_name_validation,
-	    test_prof_thread_name_threaded));
+	    test_prof_thread_name_threaded);
 }
diff --git a/test/unit/ql.c b/test/unit/ql.c
index 0bb896cb..231a7243 100644
--- a/test/unit/ql.c
+++ b/test/unit/ql.c
@@ -192,11 +192,11 @@ TEST_END
 
 int
 main(void) {
-	return (test(
+	return test(
 	    test_ql_empty,
 	    test_ql_tail_insert,
 	    test_ql_tail_remove,
 	    test_ql_head_insert,
 	    test_ql_head_remove,
-	    test_ql_insert));
+	    test_ql_insert);
 }
diff --git a/test/unit/qr.c b/test/unit/qr.c
index 8061a345..9a72d308 100644
--- a/test/unit/qr.c
+++ b/test/unit/qr.c
@@ -232,10 +232,10 @@ TEST_END
 
 int
 main(void) {
-	return (test(
+	return test(
 	    test_qr_one,
 	    test_qr_after_insert,
 	    test_qr_remove,
 	    test_qr_before_insert,
-	    test_qr_meld_split));
+	    test_qr_meld_split);
 }
diff --git a/test/unit/rb.c b/test/unit/rb.c
index dea86c6e..dab2c3a2 100644
--- a/test/unit/rb.c
+++ b/test/unit/rb.c
@@ -36,7 +36,7 @@ node_cmp(const node_t *a, const node_t *b) {
 		ret = (((uintptr_t)a) > ((uintptr_t)b))
 		    - (((uintptr_t)a) < ((uintptr_t)b));
 	}
-	return (ret);
+	return ret;
 }
 
 typedef rb_tree(node_t) tree_t;
@@ -73,7 +73,7 @@ tree_recurse(node_t *node, unsigned black_height, unsigned black_depth) {
 	node_t *right_node;
 
 	if (node == NULL) {
-		return (ret);
+		return ret;
 	}
 
 	left_node = rbtn_left_get(node_t, link, node);
@@ -112,7 +112,7 @@ tree_recurse(node_t *node, unsigned black_height, unsigned black_depth) {
 		ret += (black_depth != black_height);
 	}
 
-	return (ret);
+	return ret;
 }
 
 static node_t *
@@ -139,7 +139,7 @@ tree_iterate_cb(tree_t *tree, node_t *node, void *data) {
 
 	(*i)++;
 
-	return (NULL);
+	return NULL;
 }
 
 static unsigned
@@ -149,7 +149,7 @@ tree_iterate(tree_t *tree) {
 	i = 0;
 	tree_iter(tree, NULL, tree_iterate_cb, (void *)&i);
 
-	return (i);
+	return i;
 }
 
 static unsigned
@@ -159,7 +159,7 @@ tree_iterate_reverse(tree_t *tree) {
 	i = 0;
 	tree_reverse_iter(tree, NULL, tree_iterate_cb, (void *)&i);
 
-	return (i);
+	return i;
 }
 
 static void
@@ -201,7 +201,7 @@ remove_iterate_cb(tree_t *tree, node_t *node, void *data) {
 
 	node_remove(tree, node, *nnodes);
 
-	return (ret);
+	return ret;
 }
 
 static node_t *
@@ -211,7 +211,7 @@ remove_reverse_iterate_cb(tree_t *tree, node_t *node, void *data) {
 
 	node_remove(tree, node, *nnodes);
 
-	return (ret);
+	return ret;
 }
 
 static void
@@ -347,7 +347,7 @@ TEST_END
 
 int
 main(void) {
-	return (test(
+	return test(
 	    test_rb_empty,
-	    test_rb_random));
+	    test_rb_random);
 }
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index ca99f8a8..344ac16a 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -18,7 +18,7 @@ rtree_node_alloc_intercept(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
 	assert_ptr_not_null(node, "Unexpected calloc() failure");
 	malloc_mutex_lock(tsdn, &rtree->init_lock);
 
-	return (node);
+	return node;
 }
 
 static void
@@ -102,7 +102,7 @@ thd_start(void *varg) {
 
 	free(extent);
 	fini_gen_rand(sfmt);
-	return (NULL);
+	return NULL;
 }
 
 TEST_BEGIN(test_rtree_concurrent) {
@@ -283,10 +283,10 @@ main(void) {
 	rtree_node_dalloc = rtree_node_dalloc_intercept;
 	test_rtree = NULL;
 
-	return (test(
+	return test(
 	    test_rtree_read_empty,
 	    test_rtree_concurrent,
 	    test_rtree_extrema,
 	    test_rtree_bits,
-	    test_rtree_random));
+	    test_rtree_random);
 }
diff --git a/test/unit/size_classes.c b/test/unit/size_classes.c
index 38ea9bee..70a86ad9 100644
--- a/test/unit/size_classes.c
+++ b/test/unit/size_classes.c
@@ -19,7 +19,7 @@ get_max_size_class(void) {
 	assert_d_eq(mallctlbymib(mib, miblen, (void *)&max_size_class, &sz,
 	    NULL, 0), 0, "Unexpected mallctlbymib() error");
 
-	return (max_size_class);
+	return max_size_class;
 }
 
 TEST_BEGIN(test_size_classes) {
@@ -173,8 +173,8 @@ TEST_END
 
 int
 main(void) {
-	return (test(
+	return test(
 	    test_size_classes,
 	    test_psize_classes,
-	    test_overflow));
+	    test_overflow);
 }
diff --git a/test/unit/slab.c b/test/unit/slab.c
index a5036f59..d3b45e80 100644
--- a/test/unit/slab.c
+++ b/test/unit/slab.c
@@ -27,6 +27,6 @@ TEST_END
 
 int
 main(void) {
-	return (test(
-	    test_arena_slab_regind));
+	return test(
+	    test_arena_slab_regind);
 }
diff --git a/test/unit/smoothstep.c b/test/unit/smoothstep.c
index ac279159..bf5dfb1d 100644
--- a/test/unit/smoothstep.c
+++ b/test/unit/smoothstep.c
@@ -95,8 +95,8 @@ TEST_END
 
 int
 main(void) {
-	return (test(
+	return test(
 	    test_smoothstep_integral,
 	    test_smoothstep_monotonic,
-	    test_smoothstep_slope));
+	    test_smoothstep_slope);
 }
diff --git a/test/unit/stats.c b/test/unit/stats.c
index 98673a8e..948132cb 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -116,7 +116,7 @@ TEST_END
 
 void *
 thd_start(void *arg) {
-	return (NULL);
+	return NULL;
 }
 
 static void
@@ -339,12 +339,12 @@ TEST_END
 
 int
 main(void) {
-	return (test(
+	return test(
 	    test_stats_summary,
 	    test_stats_large,
 	    test_stats_arenas_summary,
 	    test_stats_arenas_small,
 	    test_stats_arenas_large,
 	    test_stats_arenas_bins,
-	    test_stats_arenas_lextents));
+	    test_stats_arenas_lextents);
 }
diff --git a/test/unit/stats_print.c b/test/unit/stats_print.c
index 1fb8fe6f..5a11b503 100644
--- a/test/unit/stats_print.c
+++ b/test/unit/stats_print.c
@@ -983,7 +983,7 @@ TEST_END
 
 int
 main(void) {
-	return (test(
+	return test(
 	    test_json_parser,
-	    test_stats_print_json));
+	    test_stats_print_json);
 }
diff --git a/test/unit/ticker.c b/test/unit/ticker.c
index be54356f..32236f2c 100644
--- a/test/unit/ticker.c
+++ b/test/unit/ticker.c
@@ -64,8 +64,8 @@ TEST_END
 
 int
 main(void) {
-	return (test(
+	return test(
 	    test_ticker_tick,
 	    test_ticker_ticks,
-	    test_ticker_copy));
+	    test_ticker_copy);
 }
diff --git a/test/unit/tsd.c b/test/unit/tsd.c
index 484dc30b..f34f0e78 100644
--- a/test/unit/tsd.c
+++ b/test/unit/tsd.c
@@ -71,7 +71,7 @@ thd_start(void *arg) {
 	    "Resetting local data should have no effect on tsd");
 
 	free(p);
-	return (NULL);
+	return NULL;
 }
 
 TEST_BEGIN(test_tsd_main_thread) {
@@ -95,11 +95,11 @@ main(void) {
 	/* Core tsd bootstrapping must happen prior to data_tsd_boot(). */
 	if (nallocx(1, 0) == 0) {
 		malloc_printf("Initialization error");
-		return (test_status_fail);
+		return test_status_fail;
 	}
 	data_tsd_boot();
 
-	return (test(
+	return test(
 	    test_tsd_main_thread,
-	    test_tsd_sub_thread));
+	    test_tsd_sub_thread);
 }
diff --git a/test/unit/util.c b/test/unit/util.c
index 3d1ecf4e..81421e80 100644
--- a/test/unit/util.c
+++ b/test/unit/util.c
@@ -296,12 +296,12 @@ TEST_END
 
 int
 main(void) {
-	return (test(
+	return test(
 	    test_pow2_ceil_u64,
 	    test_pow2_ceil_u32,
 	    test_pow2_ceil_zu,
 	    test_malloc_strtoumax_no_endptr,
 	    test_malloc_strtoumax,
 	    test_malloc_snprintf_truncated,
-	    test_malloc_snprintf));
+	    test_malloc_snprintf);
 }
diff --git a/test/unit/witness.c b/test/unit/witness.c
index d75ca482..c914e4b3 100644
--- a/test/unit/witness.c
+++ b/test/unit/witness.c
@@ -38,7 +38,7 @@ witness_comp(const witness_t *a, void *oa, const witness_t *b, void *ob) {
 	assert(oa == (void *)a);
 	assert(ob == (void *)b);
 
-	return (strcmp(a->name, b->name));
+	return strcmp(a->name, b->name);
 }
 
 static int
@@ -49,7 +49,7 @@ witness_comp_reverse(const witness_t *a, void *oa, const witness_t *b,
 	assert(oa == (void *)a);
 	assert(ob == (void *)b);
 
-	return (-strcmp(a->name, b->name));
+	return -strcmp(a->name, b->name);
 }
 
 TEST_BEGIN(test_witness) {
@@ -255,11 +255,11 @@ TEST_END
 
 int
 main(void) {
-	return (test(
+	return test(
 	    test_witness,
 	    test_witness_comp,
 	    test_witness_reversal,
 	    test_witness_recursive,
 	    test_witness_unlock_not_owned,
-	    test_witness_lockful));
+	    test_witness_lockful);
 }
diff --git a/test/unit/zero.c b/test/unit/zero.c
index a802f053..88af9452 100644
--- a/test/unit/zero.c
+++ b/test/unit/zero.c
@@ -58,7 +58,7 @@ TEST_END
 
 int
 main(void) {
-	return (test(
+	return test(
 	    test_zero_small,
-	    test_zero_large));
+	    test_zero_large);
 }

From c0cc5db8717dd1d890bd52b687d9eef64a49554f Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 19 Jan 2017 21:41:41 -0800
Subject: [PATCH 0604/2608] Replace tabs following #define with spaces.

This resolves #564.
---
 include/jemalloc/internal/arena_types.h       |  10 +-
 include/jemalloc/internal/assert.h            |   8 +-
 include/jemalloc/internal/atomic_externs.h    |  10 +-
 include/jemalloc/internal/bitmap_types.h      |  42 ++--
 include/jemalloc/internal/ckh_types.h         |   6 +-
 include/jemalloc/internal/ctl_externs.h       |   6 +-
 include/jemalloc/internal/extent_dss_types.h  |   4 +-
 include/jemalloc/internal/extent_types.h      |   2 +-
 .../jemalloc/internal/jemalloc_internal.h.in  |  70 +++---
 .../internal/jemalloc_internal_decls.h        |   2 +-
 .../internal/jemalloc_internal_defs.h.in      |   2 +-
 .../internal/jemalloc_internal_macros.h       |  16 +-
 include/jemalloc/internal/nstime_types.h      |   2 +-
 include/jemalloc/internal/pages_types.h       |  16 +-
 include/jemalloc/internal/ph.h                |  32 +--
 .../jemalloc/internal/private_namespace.sh    |   2 +-
 include/jemalloc/internal/prng_types.h        |   8 +-
 include/jemalloc/internal/prof_types.h        |  22 +-
 include/jemalloc/internal/public_namespace.sh |   2 +-
 include/jemalloc/internal/ql.h                |  36 +--
 include/jemalloc/internal/qr.h                |  22 +-
 include/jemalloc/internal/rb.h                |  54 ++---
 include/jemalloc/internal/rtree_inlines.h     |   6 +-
 include/jemalloc/internal/rtree_types.h       |  14 +-
 include/jemalloc/internal/size_classes.sh     |  26 +-
 include/jemalloc/internal/smoothstep.h        |   8 +-
 include/jemalloc/internal/smoothstep.sh       |   8 +-
 include/jemalloc/internal/tcache_types.h      |  20 +-
 include/jemalloc/internal/tsd_inlines.h       |   4 +-
 include/jemalloc/internal/tsd_structs.h       |   6 +-
 include/jemalloc/internal/tsd_types.h         |  38 +--
 include/jemalloc/internal/util_types.h        |  12 +-
 include/jemalloc/internal/witness_types.h     |  50 ++--
 include/jemalloc/jemalloc.sh                  |   3 +-
 include/jemalloc/jemalloc_macros.h.in         |  26 +-
 include/msvc_compat/windows_extra.h           |   2 +-
 src/arena.c                                   |  20 +-
 src/atomic.c                                  |   2 +-
 src/base.c                                    |   2 +-
 src/bitmap.c                                  |   2 +-
 src/ckh.c                                     |   2 +-
 src/ctl.c                                     |  42 ++--
 src/extent.c                                  |  10 +-
 src/extent_dss.c                              |   2 +-
 src/extent_mmap.c                             |   2 +-
 src/hash.c                                    |   2 +-
 src/jemalloc.c                                |  94 ++++----
 src/jemalloc_cpp.cpp                          |   2 +-
 src/large.c                                   |  10 +-
 src/mb.c                                      |   2 +-
 src/mutex.c                                   |   4 +-
 src/nstime.c                                  |  10 +-
 src/pages.c                                   |   2 +-
 src/prng.c                                    |   2 +-
 src/prof.c                                    |  18 +-
 src/rtree.c                                   |  10 +-
 src/spin.c                                    |   2 +-
 src/stats.c                                   |  22 +-
 src/tcache.c                                  |   2 +-
 src/ticker.c                                  |   2 +-
 src/tsd.c                                     |   8 +-
 src/util.c                                    |  24 +-
 src/witness.c                                 |  18 +-
 test/include/test/btalloc.h                   |   4 +-
 test/include/test/jemalloc_test.h.in          |  10 +-
 test/include/test/mq.h                        |   4 +-
 test/include/test/test.h                      | 228 +++++++++---------
 test/integration/MALLOCX_ARENA.c              |   2 +-
 test/integration/aligned_alloc.c              |   4 +-
 test/integration/mallocx.c                    |   6 +-
 test/integration/posix_memalign.c             |   4 +-
 test/integration/rallocx.c                    |  16 +-
 test/integration/sdallocx.c                   |   4 +-
 test/integration/thread_arena.c               |   2 +-
 test/integration/xallocx.c                    |   2 +-
 test/src/SFMT.c                               |   2 +-
 test/src/math.c                               |   2 +-
 test/src/mtx.c                                |   2 +-
 test/unit/SFMT.c                              |   8 +-
 test/unit/arena_reset.c                       |   2 +-
 test/unit/arena_reset_prof.c                  |   2 +-
 test/unit/atomic.c                            |   4 +-
 test/unit/bitmap.c                            |  14 +-
 test/unit/ckh.c                               |   2 +-
 test/unit/decay.c                             |   4 +-
 test/unit/extent_quantize.c                   |   2 +-
 test/unit/hash.c                              |   4 +-
 test/unit/junk_alloc.c                        |   2 +-
 test/unit/junk_free.c                         |   2 +-
 test/unit/mallctl.c                           |  14 +-
 test/unit/math.c                              |   6 +-
 test/unit/mq.c                                |   4 +-
 test/unit/mtx.c                               |   4 +-
 test/unit/nstime.c                            |   2 +-
 test/unit/pack.c                              |   6 +-
 test/unit/ph.c                                |   8 +-
 test/unit/prng.c                              |  18 +-
 test/unit/prof_accum.c                        |   8 +-
 test/unit/prof_active.c                       |  10 +-
 test/unit/prof_reset.c                        |  12 +-
 test/unit/prof_thread_name.c                  |   8 +-
 test/unit/ql.c                                |   2 +-
 test/unit/qr.c                                |   4 +-
 test/unit/rb.c                                |  10 +-
 test/unit/rtree.c                             |  12 +-
 test/unit/smoothstep.c                        |   2 +-
 test/unit/ticker.c                            |   8 +-
 test/unit/tsd.c                               |   4 +-
 test/unit/util.c                              |  16 +-
 test/unit/zero.c                              |   2 +-
 110 files changed, 706 insertions(+), 707 deletions(-)

diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h
index a13a1b61..d821be45 100644
--- a/include/jemalloc/internal/arena_types.h
+++ b/include/jemalloc/internal/arena_types.h
@@ -1,16 +1,16 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_TYPES_H
 #define JEMALLOC_INTERNAL_ARENA_TYPES_H
 
-#define	LARGE_MINCLASS		(ZU(1) << LG_LARGE_MINCLASS)
+#define LARGE_MINCLASS		(ZU(1) << LG_LARGE_MINCLASS)
 
 /* Maximum number of regions in one slab. */
-#define	LG_SLAB_MAXREGS		(LG_PAGE - LG_TINY_MIN)
-#define	SLAB_MAXREGS		(1U << LG_SLAB_MAXREGS)
+#define LG_SLAB_MAXREGS		(LG_PAGE - LG_TINY_MIN)
+#define SLAB_MAXREGS		(1U << LG_SLAB_MAXREGS)
 
 /* Default decay time in seconds. */
-#define	DECAY_TIME_DEFAULT	10
+#define DECAY_TIME_DEFAULT	10
 /* Number of event ticks between time checks. */
-#define	DECAY_NTICKS_PER_UPDATE	1000
+#define DECAY_NTICKS_PER_UPDATE	1000
 
 typedef struct arena_slab_data_s arena_slab_data_t;
 typedef struct arena_bin_info_s arena_bin_info_t;
diff --git a/include/jemalloc/internal/assert.h b/include/jemalloc/internal/assert.h
index 5da0ef42..b9ab813e 100644
--- a/include/jemalloc/internal/assert.h
+++ b/include/jemalloc/internal/assert.h
@@ -3,7 +3,7 @@
  * assertion failure.
  */
 #ifndef assert
-#define	assert(e) do {							\
+#define assert(e) do {							\
 	if (unlikely(config_debug && !(e))) {				\
 		malloc_printf(						\
 		    "<jemalloc>: %s:%d: Failed assertion: \"%s\"\n",	\
@@ -14,7 +14,7 @@
 #endif
 
 #ifndef not_reached
-#define	not_reached() do {						\
+#define not_reached() do {						\
 	if (config_debug) {						\
 		malloc_printf(						\
 		    "<jemalloc>: %s:%d: Unreachable code reached\n",	\
@@ -26,7 +26,7 @@
 #endif
 
 #ifndef not_implemented
-#define	not_implemented() do {						\
+#define not_implemented() do {						\
 	if (config_debug) {						\
 		malloc_printf("<jemalloc>: %s:%d: Not implemented\n",	\
 		    __FILE__, __LINE__);				\
@@ -36,7 +36,7 @@
 #endif
 
 #ifndef assert_not_implemented
-#define	assert_not_implemented(e) do {					\
+#define assert_not_implemented(e) do {					\
 	if (unlikely(config_debug && !(e))) {				\
 		not_implemented();					\
 	}								\
diff --git a/include/jemalloc/internal/atomic_externs.h b/include/jemalloc/internal/atomic_externs.h
index 002aebca..09f06408 100644
--- a/include/jemalloc/internal/atomic_externs.h
+++ b/include/jemalloc/internal/atomic_externs.h
@@ -2,11 +2,11 @@
 #define JEMALLOC_INTERNAL_ATOMIC_EXTERNS_H
 
 #if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
-#define	atomic_read_u64(p)	atomic_add_u64(p, 0)
+#define atomic_read_u64(p)	atomic_add_u64(p, 0)
 #endif
-#define	atomic_read_u32(p)	atomic_add_u32(p, 0)
-#define	atomic_read_p(p)	atomic_add_p(p, NULL)
-#define	atomic_read_zu(p)	atomic_add_zu(p, 0)
-#define	atomic_read_u(p)	atomic_add_u(p, 0)
+#define atomic_read_u32(p)	atomic_add_u32(p, 0)
+#define atomic_read_p(p)	atomic_add_p(p, NULL)
+#define atomic_read_zu(p)	atomic_add_zu(p, 0)
+#define atomic_read_u(p)	atomic_add_u(p, 0)
 
 #endif /* JEMALLOC_INTERNAL_ATOMIC_EXTERNS_H */
diff --git a/include/jemalloc/internal/bitmap_types.h b/include/jemalloc/internal/bitmap_types.h
index d823186f..ec8a6dc9 100644
--- a/include/jemalloc/internal/bitmap_types.h
+++ b/include/jemalloc/internal/bitmap_types.h
@@ -2,18 +2,18 @@
 #define JEMALLOC_INTERNAL_BITMAP_TYPES_H
 
 /* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */
-#define	LG_BITMAP_MAXBITS	LG_SLAB_MAXREGS
-#define	BITMAP_MAXBITS		(ZU(1) << LG_BITMAP_MAXBITS)
+#define LG_BITMAP_MAXBITS	LG_SLAB_MAXREGS
+#define BITMAP_MAXBITS		(ZU(1) << LG_BITMAP_MAXBITS)
 
 typedef struct bitmap_level_s bitmap_level_t;
 typedef struct bitmap_info_s bitmap_info_t;
 typedef unsigned long bitmap_t;
-#define	LG_SIZEOF_BITMAP	LG_SIZEOF_LONG
+#define LG_SIZEOF_BITMAP	LG_SIZEOF_LONG
 
 /* Number of bits per group. */
-#define	LG_BITMAP_GROUP_NBITS		(LG_SIZEOF_BITMAP + 3)
-#define	BITMAP_GROUP_NBITS		(1U << LG_BITMAP_GROUP_NBITS)
-#define	BITMAP_GROUP_NBITS_MASK		(BITMAP_GROUP_NBITS-1)
+#define LG_BITMAP_GROUP_NBITS		(LG_SIZEOF_BITMAP + 3)
+#define BITMAP_GROUP_NBITS		(1U << LG_BITMAP_GROUP_NBITS)
+#define BITMAP_GROUP_NBITS_MASK		(BITMAP_GROUP_NBITS-1)
 
 /*
  * Do some analysis on how big the bitmap is before we use a tree.  For a brute
@@ -25,22 +25,22 @@ typedef unsigned long bitmap_t;
 #endif
 
 /* Number of groups required to store a given number of bits. */
-#define	BITMAP_BITS2GROUPS(nbits)					\
+#define BITMAP_BITS2GROUPS(nbits)					\
     (((nbits) + BITMAP_GROUP_NBITS_MASK) >> LG_BITMAP_GROUP_NBITS)
 
 /*
  * Number of groups required at a particular level for a given number of bits.
  */
-#define	BITMAP_GROUPS_L0(nbits)						\
+#define BITMAP_GROUPS_L0(nbits)						\
     BITMAP_BITS2GROUPS(nbits)
-#define	BITMAP_GROUPS_L1(nbits)						\
+#define BITMAP_GROUPS_L1(nbits)						\
     BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(nbits))
-#define	BITMAP_GROUPS_L2(nbits)						\
+#define BITMAP_GROUPS_L2(nbits)						\
     BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits))))
-#define	BITMAP_GROUPS_L3(nbits)						\
+#define BITMAP_GROUPS_L3(nbits)						\
     BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(		\
 	BITMAP_BITS2GROUPS((nbits)))))
-#define	BITMAP_GROUPS_L4(nbits)						\
+#define BITMAP_GROUPS_L4(nbits)						\
     BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(		\
 	BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits))))))
 
@@ -48,15 +48,15 @@ typedef unsigned long bitmap_t;
  * Assuming the number of levels, number of groups required for a given number
  * of bits.
  */
-#define	BITMAP_GROUPS_1_LEVEL(nbits)					\
+#define BITMAP_GROUPS_1_LEVEL(nbits)					\
     BITMAP_GROUPS_L0(nbits)
-#define	BITMAP_GROUPS_2_LEVEL(nbits)					\
+#define BITMAP_GROUPS_2_LEVEL(nbits)					\
     (BITMAP_GROUPS_1_LEVEL(nbits) + BITMAP_GROUPS_L1(nbits))
-#define	BITMAP_GROUPS_3_LEVEL(nbits)					\
+#define BITMAP_GROUPS_3_LEVEL(nbits)					\
     (BITMAP_GROUPS_2_LEVEL(nbits) + BITMAP_GROUPS_L2(nbits))
-#define	BITMAP_GROUPS_4_LEVEL(nbits)					\
+#define BITMAP_GROUPS_4_LEVEL(nbits)					\
     (BITMAP_GROUPS_3_LEVEL(nbits) + BITMAP_GROUPS_L3(nbits))
-#define	BITMAP_GROUPS_5_LEVEL(nbits)					\
+#define BITMAP_GROUPS_5_LEVEL(nbits)					\
     (BITMAP_GROUPS_4_LEVEL(nbits) + BITMAP_GROUPS_L4(nbits))
 
 /*
@@ -92,9 +92,9 @@ typedef unsigned long bitmap_t;
  * unused trailing entries in bitmap_info_t structures; the bitmaps themselves
  * are not impacted.
  */
-#define	BITMAP_MAX_LEVELS	5
+#define BITMAP_MAX_LEVELS	5
 
-#define	BITMAP_INFO_INITIALIZER(nbits) {				\
+#define BITMAP_INFO_INITIALIZER(nbits) {				\
 	/* nbits. */							\
 	nbits,								\
 	/* nlevels. */							\
@@ -119,9 +119,9 @@ typedef unsigned long bitmap_t;
 
 #else /* BITMAP_USE_TREE */
 
-#define	BITMAP_GROUPS_MAX	BITMAP_BITS2GROUPS(BITMAP_MAXBITS)
+#define BITMAP_GROUPS_MAX	BITMAP_BITS2GROUPS(BITMAP_MAXBITS)
 
-#define	BITMAP_INFO_INITIALIZER(nbits) {				\
+#define BITMAP_INFO_INITIALIZER(nbits) {				\
 	/* nbits. */							\
 	nbits,								\
 	/* ngroups. */							\
diff --git a/include/jemalloc/internal/ckh_types.h b/include/jemalloc/internal/ckh_types.h
index 9a1d8d49..b5911db4 100644
--- a/include/jemalloc/internal/ckh_types.h
+++ b/include/jemalloc/internal/ckh_types.h
@@ -9,14 +9,14 @@ typedef void ckh_hash_t (const void *, size_t[2]);
 typedef bool ckh_keycomp_t (const void *, const void *);
 
 /* Maintain counters used to get an idea of performance. */
-/* #define	CKH_COUNT */
+/* #define CKH_COUNT */
 /* Print counter values in ckh_delete() (requires CKH_COUNT). */
-/* #define	CKH_VERBOSE */
+/* #define CKH_VERBOSE */
 
 /*
  * There are 2^LG_CKH_BUCKET_CELLS cells in each hash table bucket.  Try to fit
  * one bucket per L1 cache line.
  */
-#define	LG_CKH_BUCKET_CELLS (LG_CACHELINE - LG_SIZEOF_PTR - 1)
+#define LG_CKH_BUCKET_CELLS (LG_CACHELINE - LG_SIZEOF_PTR - 1)
 
 #endif /* JEMALLOC_INTERNAL_CKH_TYPES_H */
diff --git a/include/jemalloc/internal/ctl_externs.h b/include/jemalloc/internal/ctl_externs.h
index 11f77cfb..2ef48c66 100644
--- a/include/jemalloc/internal/ctl_externs.h
+++ b/include/jemalloc/internal/ctl_externs.h
@@ -13,7 +13,7 @@ void	ctl_prefork(tsdn_t *tsdn);
 void	ctl_postfork_parent(tsdn_t *tsdn);
 void	ctl_postfork_child(tsdn_t *tsdn);
 
-#define	xmallctl(name, oldp, oldlenp, newp, newlen) do {		\
+#define xmallctl(name, oldp, oldlenp, newp, newlen) do {		\
 	if (je_mallctl(name, oldp, oldlenp, newp, newlen)		\
 	    != 0) {							\
 		malloc_printf(						\
@@ -23,7 +23,7 @@ void	ctl_postfork_child(tsdn_t *tsdn);
 	}								\
 } while (0)
 
-#define	xmallctlnametomib(name, mibp, miblenp) do {			\
+#define xmallctlnametomib(name, mibp, miblenp) do {			\
 	if (je_mallctlnametomib(name, mibp, miblenp) != 0) {		\
 		malloc_printf("<jemalloc>: Failure in "			\
 		    "xmallctlnametomib(\"%s\", ...)\n", name);		\
@@ -31,7 +31,7 @@ void	ctl_postfork_child(tsdn_t *tsdn);
 	}								\
 } while (0)
 
-#define	xmallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen) do {	\
+#define xmallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen) do {	\
 	if (je_mallctlbymib(mib, miblen, oldp, oldlenp, newp,		\
 	    newlen) != 0) {						\
 		malloc_write(						\
diff --git a/include/jemalloc/internal/extent_dss_types.h b/include/jemalloc/internal/extent_dss_types.h
index 2839757c..a851c7cb 100644
--- a/include/jemalloc/internal/extent_dss_types.h
+++ b/include/jemalloc/internal/extent_dss_types.h
@@ -8,7 +8,7 @@ typedef enum {
 
 	dss_prec_limit     = 3
 } dss_prec_t;
-#define	DSS_PREC_DEFAULT	dss_prec_secondary
-#define	DSS_DEFAULT		"secondary"
+#define DSS_PREC_DEFAULT	dss_prec_secondary
+#define DSS_DEFAULT		"secondary"
 
 #endif /* JEMALLOC_INTERNAL_EXTENT_DSS_TYPES_H */
diff --git a/include/jemalloc/internal/extent_types.h b/include/jemalloc/internal/extent_types.h
index 4873dc54..53db1c36 100644
--- a/include/jemalloc/internal/extent_types.h
+++ b/include/jemalloc/internal/extent_types.h
@@ -3,6 +3,6 @@
 
 typedef struct extent_s extent_t;
 
-#define	EXTENT_HOOKS_INITIALIZER	NULL
+#define EXTENT_HOOKS_INITIALIZER	NULL
 
 #endif /* JEMALLOC_INTERNAL_EXTENT_TYPES_H */
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 03a50a4d..33fd2fac 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -1,5 +1,5 @@
 #ifndef JEMALLOC_INTERNAL_H
-#define	JEMALLOC_INTERNAL_H
+#define JEMALLOC_INTERNAL_H
 
 #ifdef __cplusplus
 extern "C" {
@@ -12,7 +12,7 @@ extern "C" {
 #include <sys/ktrace.h>
 #endif
 
-#define	JEMALLOC_NO_DEMANGLE
+#define JEMALLOC_NO_DEMANGLE
 #ifdef JEMALLOC_JET
 #  define JEMALLOC_N(n) jet_##n
 #  include "jemalloc/internal/public_namespace.h"
@@ -166,7 +166,7 @@ static const bool have_thp =
 
 #include "jemalloc/internal/ph.h"
 #ifndef __PGI
-#define	RB_COMPACT
+#define RB_COMPACT
 #endif
 #include "jemalloc/internal/rb.h"
 #include "jemalloc/internal/qr.h"
@@ -224,34 +224,34 @@ typedef unsigned szind_t;
  *
  * aaaaaaaa aaaatttt tttttttt 0znnnnnn
  */
-#define	MALLOCX_ARENA_BITS	12
-#define	MALLOCX_TCACHE_BITS	12
-#define	MALLOCX_LG_ALIGN_BITS	6
-#define	MALLOCX_ARENA_SHIFT	20
-#define	MALLOCX_TCACHE_SHIFT	8
-#define	MALLOCX_ARENA_MASK \
+#define MALLOCX_ARENA_BITS	12
+#define MALLOCX_TCACHE_BITS	12
+#define MALLOCX_LG_ALIGN_BITS	6
+#define MALLOCX_ARENA_SHIFT	20
+#define MALLOCX_TCACHE_SHIFT	8
+#define MALLOCX_ARENA_MASK \
     (((1 << MALLOCX_ARENA_BITS) - 1) << MALLOCX_ARENA_SHIFT)
 /* NB: Arena index bias decreases the maximum number of arenas by 1. */
-#define	MALLOCX_ARENA_MAX	((1 << MALLOCX_ARENA_BITS) - 2)
-#define	MALLOCX_TCACHE_MASK \
+#define MALLOCX_ARENA_MAX	((1 << MALLOCX_ARENA_BITS) - 2)
+#define MALLOCX_TCACHE_MASK \
     (((1 << MALLOCX_TCACHE_BITS) - 1) << MALLOCX_TCACHE_SHIFT)
-#define	MALLOCX_TCACHE_MAX	((1 << MALLOCX_TCACHE_BITS) - 3)
-#define	MALLOCX_LG_ALIGN_MASK	((1 << MALLOCX_LG_ALIGN_BITS) - 1)
+#define MALLOCX_TCACHE_MAX	((1 << MALLOCX_TCACHE_BITS) - 3)
+#define MALLOCX_LG_ALIGN_MASK	((1 << MALLOCX_LG_ALIGN_BITS) - 1)
 /* Use MALLOCX_ALIGN_GET() if alignment may not be specified in flags. */
-#define	MALLOCX_ALIGN_GET_SPECIFIED(flags)				\
+#define MALLOCX_ALIGN_GET_SPECIFIED(flags)				\
     (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK))
-#define	MALLOCX_ALIGN_GET(flags)					\
+#define MALLOCX_ALIGN_GET(flags)					\
     (MALLOCX_ALIGN_GET_SPECIFIED(flags) & (SIZE_T_MAX-1))
-#define	MALLOCX_ZERO_GET(flags)						\
+#define MALLOCX_ZERO_GET(flags)						\
     ((bool)(flags & MALLOCX_ZERO))
 
-#define	MALLOCX_TCACHE_GET(flags)					\
+#define MALLOCX_TCACHE_GET(flags)					\
     (((unsigned)((flags & MALLOCX_TCACHE_MASK) >> MALLOCX_TCACHE_SHIFT)) - 2)
-#define	MALLOCX_ARENA_GET(flags)					\
+#define MALLOCX_ARENA_GET(flags)					\
     (((unsigned)(((unsigned)flags) >> MALLOCX_ARENA_SHIFT)) - 1)
 
 /* Smallest size class to support. */
-#define	TINY_MIN		(1U << LG_TINY_MIN)
+#define TINY_MIN		(1U << LG_TINY_MIN)
 
 /*
  * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size
@@ -312,25 +312,25 @@ typedef unsigned szind_t;
 #  endif
 #endif
 
-#define	QUANTUM			((size_t)(1U << LG_QUANTUM))
-#define	QUANTUM_MASK		(QUANTUM - 1)
+#define QUANTUM			((size_t)(1U << LG_QUANTUM))
+#define QUANTUM_MASK		(QUANTUM - 1)
 
 /* Return the smallest quantum multiple that is >= a. */
-#define	QUANTUM_CEILING(a)						\
+#define QUANTUM_CEILING(a)						\
 	(((a) + QUANTUM_MASK) & ~QUANTUM_MASK)
 
-#define	LONG			((size_t)(1U << LG_SIZEOF_LONG))
-#define	LONG_MASK		(LONG - 1)
+#define LONG			((size_t)(1U << LG_SIZEOF_LONG))
+#define LONG_MASK		(LONG - 1)
 
 /* Return the smallest long multiple that is >= a. */
-#define	LONG_CEILING(a)							\
+#define LONG_CEILING(a)							\
 	(((a) + LONG_MASK) & ~LONG_MASK)
 
-#define	SIZEOF_PTR		(1U << LG_SIZEOF_PTR)
-#define	PTR_MASK		(SIZEOF_PTR - 1)
+#define SIZEOF_PTR		(1U << LG_SIZEOF_PTR)
+#define PTR_MASK		(SIZEOF_PTR - 1)
 
 /* Return the smallest (void *) multiple that is >= a. */
-#define	PTR_CEILING(a)							\
+#define PTR_CEILING(a)							\
 	(((a) + PTR_MASK) & ~PTR_MASK)
 
 /*
@@ -340,24 +340,24 @@ typedef unsigned szind_t;
  * CACHELINE cannot be based on LG_CACHELINE because __declspec(align()) can
  * only handle raw constants.
  */
-#define	LG_CACHELINE		6
-#define	CACHELINE		64
-#define	CACHELINE_MASK		(CACHELINE - 1)
+#define LG_CACHELINE		6
+#define CACHELINE		64
+#define CACHELINE_MASK		(CACHELINE - 1)
 
 /* Return the smallest cacheline multiple that is >= s. */
-#define	CACHELINE_CEILING(s)						\
+#define CACHELINE_CEILING(s)						\
 	(((s) + CACHELINE_MASK) & ~CACHELINE_MASK)
 
 /* Return the nearest aligned address at or below a. */
-#define	ALIGNMENT_ADDR2BASE(a, alignment)				\
+#define ALIGNMENT_ADDR2BASE(a, alignment)				\
 	((void *)((uintptr_t)(a) & ((~(alignment)) + 1)))
 
 /* Return the offset between a and the nearest aligned address at or below a. */
-#define	ALIGNMENT_ADDR2OFFSET(a, alignment)				\
+#define ALIGNMENT_ADDR2OFFSET(a, alignment)				\
 	((size_t)((uintptr_t)(a) & (alignment - 1)))
 
 /* Return the smallest alignment multiple that is >= s. */
-#define	ALIGNMENT_CEILING(s, alignment)					\
+#define ALIGNMENT_CEILING(s, alignment)					\
 	(((s) + (alignment - 1)) & ((~(alignment)) + 1))
 
 /* Declare a variable-length array. */
diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
index fd80fdf0..21a4183d 100644
--- a/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -1,5 +1,5 @@
 #ifndef JEMALLOC_INTERNAL_DECLS_H
-#define	JEMALLOC_INTERNAL_DECLS_H
+#define JEMALLOC_INTERNAL_DECLS_H
 
 #include <math.h>
 #ifdef _WIN32
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index c777ab02..396a1a27 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -1,5 +1,5 @@
 #ifndef JEMALLOC_INTERNAL_DEFS_H_
-#define	JEMALLOC_INTERNAL_DEFS_H_
+#define JEMALLOC_INTERNAL_DEFS_H_
 /*
  * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all
  * public APIs to be prefixed.  This makes it possible, with some care, to use
diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h
index 80820f87..b70d08a2 100644
--- a/include/jemalloc/internal/jemalloc_internal_macros.h
+++ b/include/jemalloc/internal/jemalloc_internal_macros.h
@@ -41,15 +41,15 @@
 #  define UNUSED
 #endif
 
-#define	ZU(z)	((size_t)z)
-#define	ZI(z)	((ssize_t)z)
-#define	QU(q)	((uint64_t)q)
-#define	QI(q)	((int64_t)q)
+#define ZU(z)	((size_t)z)
+#define ZI(z)	((ssize_t)z)
+#define QU(q)	((uint64_t)q)
+#define QI(q)	((int64_t)q)
 
-#define	KZU(z)	ZU(z##ULL)
-#define	KZI(z)	ZI(z##LL)
-#define	KQU(q)	QU(q##ULL)
-#define	KQI(q)	QI(q##LL)
+#define KZU(z)	ZU(z##ULL)
+#define KZI(z)	ZI(z##LL)
+#define KQU(q)	QU(q##ULL)
+#define KQI(q)	QI(q##LL)
 
 #ifndef __DECONST
 #  define	__DECONST(type, var)	((type)(uintptr_t)(const void *)(var))
diff --git a/include/jemalloc/internal/nstime_types.h b/include/jemalloc/internal/nstime_types.h
index 861c5a8a..d6039e03 100644
--- a/include/jemalloc/internal/nstime_types.h
+++ b/include/jemalloc/internal/nstime_types.h
@@ -4,6 +4,6 @@
 typedef struct nstime_s nstime_t;
 
 /* Maximum supported number of seconds (~584 years). */
-#define	NSTIME_SEC_MAX	KQU(18446744072)
+#define NSTIME_SEC_MAX	KQU(18446744072)
 
 #endif /* JEMALLOC_INTERNAL_NSTIME_TYPES_H */
diff --git a/include/jemalloc/internal/pages_types.h b/include/jemalloc/internal/pages_types.h
index be1e245f..9e6e7c5c 100644
--- a/include/jemalloc/internal/pages_types.h
+++ b/include/jemalloc/internal/pages_types.h
@@ -5,23 +5,23 @@
 #ifdef PAGE_MASK
 #  undef PAGE_MASK
 #endif
-#define	PAGE		((size_t)(1U << LG_PAGE))
-#define	PAGE_MASK	((size_t)(PAGE - 1))
+#define PAGE		((size_t)(1U << LG_PAGE))
+#define PAGE_MASK	((size_t)(PAGE - 1))
 /* Return the page base address for the page containing address a. */
-#define	PAGE_ADDR2BASE(a)						\
+#define PAGE_ADDR2BASE(a)						\
 	((void *)((uintptr_t)(a) & ~PAGE_MASK))
 /* Return the smallest pagesize multiple that is >= s. */
-#define	PAGE_CEILING(s)							\
+#define PAGE_CEILING(s)							\
 	(((s) + PAGE_MASK) & ~PAGE_MASK)
 
 /* Huge page size.  LG_HUGEPAGE is determined by the configure script. */
-#define	HUGEPAGE	((size_t)(1U << LG_HUGEPAGE))
-#define	HUGEPAGE_MASK	((size_t)(HUGEPAGE - 1))
+#define HUGEPAGE	((size_t)(1U << LG_HUGEPAGE))
+#define HUGEPAGE_MASK	((size_t)(HUGEPAGE - 1))
 /* Return the huge page base address for the huge page containing address a. */
-#define	HUGEPAGE_ADDR2BASE(a)						\
+#define HUGEPAGE_ADDR2BASE(a)						\
 	((void *)((uintptr_t)(a) & ~HUGEPAGE_MASK))
 /* Return the smallest pagesize multiple that is >= s. */
-#define	HUGEPAGE_CEILING(s)						\
+#define HUGEPAGE_CEILING(s)						\
 	(((s) + HUGEPAGE_MASK) & ~HUGEPAGE_MASK)
 
 /* PAGES_CAN_PURGE_LAZY is defined if lazy purging is supported. */
diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index b8141eff..7e1920cb 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -13,10 +13,10 @@
  */
 
 #ifndef PH_H_
-#define	PH_H_
+#define PH_H_
 
 /* Node structure. */
-#define	phn(a_type)							\
+#define phn(a_type)							\
 struct {								\
 	a_type	*phn_prev;						\
 	a_type	*phn_next;						\
@@ -24,31 +24,31 @@ struct {								\
 }
 
 /* Root structure. */
-#define	ph(a_type)							\
+#define ph(a_type)							\
 struct {								\
 	a_type	*ph_root;						\
 }
 
 /* Internal utility macros. */
-#define	phn_lchild_get(a_type, a_field, a_phn)				\
+#define phn_lchild_get(a_type, a_field, a_phn)				\
 	(a_phn->a_field.phn_lchild)
-#define	phn_lchild_set(a_type, a_field, a_phn, a_lchild) do {		\
+#define phn_lchild_set(a_type, a_field, a_phn, a_lchild) do {		\
 	a_phn->a_field.phn_lchild = a_lchild;				\
 } while (0)
 
-#define	phn_next_get(a_type, a_field, a_phn)				\
+#define phn_next_get(a_type, a_field, a_phn)				\
 	(a_phn->a_field.phn_next)
-#define	phn_prev_set(a_type, a_field, a_phn, a_prev) do {		\
+#define phn_prev_set(a_type, a_field, a_phn, a_prev) do {		\
 	a_phn->a_field.phn_prev = a_prev;				\
 } while (0)
 
-#define	phn_prev_get(a_type, a_field, a_phn)				\
+#define phn_prev_get(a_type, a_field, a_phn)				\
 	(a_phn->a_field.phn_prev)
-#define	phn_next_set(a_type, a_field, a_phn, a_next) do {		\
+#define phn_next_set(a_type, a_field, a_phn, a_next) do {		\
 	a_phn->a_field.phn_next = a_next;				\
 } while (0)
 
-#define	phn_merge_ordered(a_type, a_field, a_phn0, a_phn1, a_cmp) do {	\
+#define phn_merge_ordered(a_type, a_field, a_phn0, a_phn1, a_cmp) do {	\
 	a_type *phn0child;						\
 									\
 	assert(a_phn0 != NULL);						\
@@ -64,7 +64,7 @@ struct {								\
 	phn_lchild_set(a_type, a_field, a_phn0, a_phn1);		\
 } while (0)
 
-#define	phn_merge(a_type, a_field, a_phn0, a_phn1, a_cmp, r_phn) do {	\
+#define phn_merge(a_type, a_field, a_phn0, a_phn1, a_cmp, r_phn) do {	\
 	if (a_phn0 == NULL) {						\
 		r_phn = a_phn1;						\
 	} else if (a_phn1 == NULL) {					\
@@ -80,7 +80,7 @@ struct {								\
 	}								\
 } while (0)
 
-#define	ph_merge_siblings(a_type, a_field, a_phn, a_cmp, r_phn) do {	\
+#define ph_merge_siblings(a_type, a_field, a_phn, a_cmp, r_phn) do {	\
 	a_type *head = NULL;						\
 	a_type *tail = NULL;						\
 	a_type *phn0 = a_phn;						\
@@ -167,7 +167,7 @@ struct {								\
 	r_phn = phn0;							\
 } while (0)
 
-#define	ph_merge_aux(a_type, a_field, a_ph, a_cmp) do {			\
+#define ph_merge_aux(a_type, a_field, a_ph, a_cmp) do {			\
 	a_type *phn = phn_next_get(a_type, a_field, a_ph->ph_root);	\
 	if (phn != NULL) {						\
 		phn_prev_set(a_type, a_field, a_ph->ph_root, NULL);	\
@@ -180,7 +180,7 @@ struct {								\
 	}								\
 } while (0)
 
-#define	ph_merge_children(a_type, a_field, a_phn, a_cmp, r_phn) do {	\
+#define ph_merge_children(a_type, a_field, a_phn, a_cmp, r_phn) do {	\
 	a_type *lchild = phn_lchild_get(a_type, a_field, a_phn);	\
 	if (lchild == NULL) {						\
 		r_phn = NULL;						\
@@ -194,7 +194,7 @@ struct {								\
  * The ph_proto() macro generates function prototypes that correspond to the
  * functions generated by an equivalently parameterized call to ph_gen().
  */
-#define	ph_proto(a_attr, a_prefix, a_ph_type, a_type)			\
+#define ph_proto(a_attr, a_prefix, a_ph_type, a_type)			\
 a_attr void	a_prefix##new(a_ph_type *ph);				\
 a_attr bool	a_prefix##empty(a_ph_type *ph);				\
 a_attr a_type	*a_prefix##first(a_ph_type *ph);			\
@@ -206,7 +206,7 @@ a_attr void	a_prefix##remove(a_ph_type *ph, a_type *phn);
  * The ph_gen() macro generates a type-specific pairing heap implementation,
  * based on the above cpp macros.
  */
-#define	ph_gen(a_attr, a_prefix, a_ph_type, a_type, a_field, a_cmp)	\
+#define ph_gen(a_attr, a_prefix, a_ph_type, a_type, a_field, a_cmp)	\
 a_attr void								\
 a_prefix##new(a_ph_type *ph) {						\
 	memset(ph, 0, sizeof(ph(a_type)));				\
diff --git a/include/jemalloc/internal/private_namespace.sh b/include/jemalloc/internal/private_namespace.sh
index cd25eb30..820862fe 100755
--- a/include/jemalloc/internal/private_namespace.sh
+++ b/include/jemalloc/internal/private_namespace.sh
@@ -1,5 +1,5 @@
 #!/bin/sh
 
 for symbol in `cat $1` ; do
-  echo "#define	${symbol} JEMALLOC_N(${symbol})"
+  echo "#define ${symbol} JEMALLOC_N(${symbol})"
 done
diff --git a/include/jemalloc/internal/prng_types.h b/include/jemalloc/internal/prng_types.h
index dec44c09..3e8e1834 100644
--- a/include/jemalloc/internal/prng_types.h
+++ b/include/jemalloc/internal/prng_types.h
@@ -20,10 +20,10 @@
  * bits.
  */
 
-#define	PRNG_A_32	UINT32_C(1103515241)
-#define	PRNG_C_32	UINT32_C(12347)
+#define PRNG_A_32	UINT32_C(1103515241)
+#define PRNG_C_32	UINT32_C(12347)
 
-#define	PRNG_A_64	UINT64_C(6364136223846793005)
-#define	PRNG_C_64	UINT64_C(1442695040888963407)
+#define PRNG_A_64	UINT64_C(6364136223846793005)
+#define PRNG_C_64	UINT64_C(1442695040888963407)
 
 #endif /* JEMALLOC_INTERNAL_PRNG_TYPES_H */
diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h
index e1eb7fb1..ff0db65e 100644
--- a/include/jemalloc/internal/prof_types.h
+++ b/include/jemalloc/internal/prof_types.h
@@ -13,43 +13,43 @@ typedef struct prof_tdata_s prof_tdata_t;
 #else
 #  define PROF_PREFIX_DEFAULT		""
 #endif
-#define	LG_PROF_SAMPLE_DEFAULT		19
-#define	LG_PROF_INTERVAL_DEFAULT	-1
+#define LG_PROF_SAMPLE_DEFAULT		19
+#define LG_PROF_INTERVAL_DEFAULT	-1
 
 /*
  * Hard limit on stack backtrace depth.  The version of prof_backtrace() that
  * is based on __builtin_return_address() necessarily has a hard-coded number
  * of backtrace frame handlers, and should be kept in sync with this setting.
  */
-#define	PROF_BT_MAX			128
+#define PROF_BT_MAX			128
 
 /* Initial hash table size. */
-#define	PROF_CKH_MINITEMS		64
+#define PROF_CKH_MINITEMS		64
 
 /* Size of memory buffer to use when writing dump files. */
-#define	PROF_DUMP_BUFSIZE		65536
+#define PROF_DUMP_BUFSIZE		65536
 
 /* Size of stack-allocated buffer used by prof_printf(). */
-#define	PROF_PRINTF_BUFSIZE		128
+#define PROF_PRINTF_BUFSIZE		128
 
 /*
  * Number of mutexes shared among all gctx's.  No space is allocated for these
  * unless profiling is enabled, so it's okay to over-provision.
  */
-#define	PROF_NCTX_LOCKS			1024
+#define PROF_NCTX_LOCKS			1024
 
 /*
  * Number of mutexes shared among all tdata's.  No space is allocated for these
  * unless profiling is enabled, so it's okay to over-provision.
  */
-#define	PROF_NTDATA_LOCKS		256
+#define PROF_NTDATA_LOCKS		256
 
 /*
  * prof_tdata pointers close to NULL are used to encode state information that
  * is used for cleaning up during thread shutdown.
  */
-#define	PROF_TDATA_STATE_REINCARNATED	((prof_tdata_t *)(uintptr_t)1)
-#define	PROF_TDATA_STATE_PURGATORY	((prof_tdata_t *)(uintptr_t)2)
-#define	PROF_TDATA_STATE_MAX		PROF_TDATA_STATE_PURGATORY
+#define PROF_TDATA_STATE_REINCARNATED	((prof_tdata_t *)(uintptr_t)1)
+#define PROF_TDATA_STATE_PURGATORY	((prof_tdata_t *)(uintptr_t)2)
+#define PROF_TDATA_STATE_MAX		PROF_TDATA_STATE_PURGATORY
 
 #endif /* JEMALLOC_INTERNAL_PROF_TYPES_H */
diff --git a/include/jemalloc/internal/public_namespace.sh b/include/jemalloc/internal/public_namespace.sh
index 362109f7..4d415ba0 100755
--- a/include/jemalloc/internal/public_namespace.sh
+++ b/include/jemalloc/internal/public_namespace.sh
@@ -2,5 +2,5 @@
 
 for nm in `cat $1` ; do
   n=`echo ${nm} |tr ':' ' ' |awk '{print $1}'`
-  echo "#define	je_${n} JEMALLOC_N(${n})"
+  echo "#define je_${n} JEMALLOC_N(${n})"
 done
diff --git a/include/jemalloc/internal/ql.h b/include/jemalloc/internal/ql.h
index 424485c4..b3a428c7 100644
--- a/include/jemalloc/internal/ql.h
+++ b/include/jemalloc/internal/ql.h
@@ -2,61 +2,61 @@
 #define JEMALLOC_INTERNAL_QL_H
 
 /* List definitions. */
-#define	ql_head(a_type)							\
+#define ql_head(a_type)							\
 struct {								\
 	a_type *qlh_first;						\
 }
 
-#define	ql_head_initializer(a_head) {NULL}
+#define ql_head_initializer(a_head) {NULL}
 
-#define	ql_elm(a_type)	qr(a_type)
+#define ql_elm(a_type)	qr(a_type)
 
 /* List functions. */
-#define	ql_new(a_head) do {						\
+#define ql_new(a_head) do {						\
 	(a_head)->qlh_first = NULL;					\
 } while (0)
 
-#define	ql_elm_new(a_elm, a_field) qr_new((a_elm), a_field)
+#define ql_elm_new(a_elm, a_field) qr_new((a_elm), a_field)
 
-#define	ql_first(a_head) ((a_head)->qlh_first)
+#define ql_first(a_head) ((a_head)->qlh_first)
 
-#define	ql_last(a_head, a_field)					\
+#define ql_last(a_head, a_field)					\
 	((ql_first(a_head) != NULL)					\
 	    ? qr_prev(ql_first(a_head), a_field) : NULL)
 
-#define	ql_next(a_head, a_elm, a_field)					\
+#define ql_next(a_head, a_elm, a_field)					\
 	((ql_last(a_head, a_field) != (a_elm))				\
 	    ? qr_next((a_elm), a_field)	: NULL)
 
-#define	ql_prev(a_head, a_elm, a_field)					\
+#define ql_prev(a_head, a_elm, a_field)					\
 	((ql_first(a_head) != (a_elm)) ? qr_prev((a_elm), a_field)	\
 				       : NULL)
 
-#define	ql_before_insert(a_head, a_qlelm, a_elm, a_field) do {		\
+#define ql_before_insert(a_head, a_qlelm, a_elm, a_field) do {		\
 	qr_before_insert((a_qlelm), (a_elm), a_field);			\
 	if (ql_first(a_head) == (a_qlelm)) {				\
 		ql_first(a_head) = (a_elm);				\
 	}								\
 } while (0)
 
-#define	ql_after_insert(a_qlelm, a_elm, a_field)			\
+#define ql_after_insert(a_qlelm, a_elm, a_field)			\
 	qr_after_insert((a_qlelm), (a_elm), a_field)
 
-#define	ql_head_insert(a_head, a_elm, a_field) do {			\
+#define ql_head_insert(a_head, a_elm, a_field) do {			\
 	if (ql_first(a_head) != NULL) {					\
 		qr_before_insert(ql_first(a_head), (a_elm), a_field);	\
 	}								\
 	ql_first(a_head) = (a_elm);					\
 } while (0)
 
-#define	ql_tail_insert(a_head, a_elm, a_field) do {			\
+#define ql_tail_insert(a_head, a_elm, a_field) do {			\
 	if (ql_first(a_head) != NULL) {					\
 		qr_before_insert(ql_first(a_head), (a_elm), a_field);	\
 	}								\
 	ql_first(a_head) = qr_next((a_elm), a_field);			\
 } while (0)
 
-#define	ql_remove(a_head, a_elm, a_field) do {				\
+#define ql_remove(a_head, a_elm, a_field) do {				\
 	if (ql_first(a_head) == (a_elm)) {				\
 		ql_first(a_head) = qr_next(ql_first(a_head), a_field);	\
 	}								\
@@ -67,20 +67,20 @@ struct {								\
 	}								\
 } while (0)
 
-#define	ql_head_remove(a_head, a_type, a_field) do {			\
+#define ql_head_remove(a_head, a_type, a_field) do {			\
 	a_type *t = ql_first(a_head);					\
 	ql_remove((a_head), t, a_field);				\
 } while (0)
 
-#define	ql_tail_remove(a_head, a_type, a_field) do {			\
+#define ql_tail_remove(a_head, a_type, a_field) do {			\
 	a_type *t = ql_last(a_head, a_field);				\
 	ql_remove((a_head), t, a_field);				\
 } while (0)
 
-#define	ql_foreach(a_var, a_head, a_field)				\
+#define ql_foreach(a_var, a_head, a_field)				\
 	qr_foreach((a_var), ql_first(a_head), a_field)
 
-#define	ql_reverse_foreach(a_var, a_head, a_field)			\
+#define ql_reverse_foreach(a_var, a_head, a_field)			\
 	qr_reverse_foreach((a_var), ql_first(a_head), a_field)
 
 #endif /* JEMALLOC_INTERNAL_QL_H */
diff --git a/include/jemalloc/internal/qr.h b/include/jemalloc/internal/qr.h
index a04f7504..1e1056b3 100644
--- a/include/jemalloc/internal/qr.h
+++ b/include/jemalloc/internal/qr.h
@@ -2,37 +2,37 @@
 #define JEMALLOC_INTERNAL_QR_H
 
 /* Ring definitions. */
-#define	qr(a_type)							\
+#define qr(a_type)							\
 struct {								\
 	a_type	*qre_next;						\
 	a_type	*qre_prev;						\
 }
 
 /* Ring functions. */
-#define	qr_new(a_qr, a_field) do {					\
+#define qr_new(a_qr, a_field) do {					\
 	(a_qr)->a_field.qre_next = (a_qr);				\
 	(a_qr)->a_field.qre_prev = (a_qr);				\
 } while (0)
 
-#define	qr_next(a_qr, a_field) ((a_qr)->a_field.qre_next)
+#define qr_next(a_qr, a_field) ((a_qr)->a_field.qre_next)
 
-#define	qr_prev(a_qr, a_field) ((a_qr)->a_field.qre_prev)
+#define qr_prev(a_qr, a_field) ((a_qr)->a_field.qre_prev)
 
-#define	qr_before_insert(a_qrelm, a_qr, a_field) do {			\
+#define qr_before_insert(a_qrelm, a_qr, a_field) do {			\
 	(a_qr)->a_field.qre_prev = (a_qrelm)->a_field.qre_prev;		\
 	(a_qr)->a_field.qre_next = (a_qrelm);				\
 	(a_qr)->a_field.qre_prev->a_field.qre_next = (a_qr);		\
 	(a_qrelm)->a_field.qre_prev = (a_qr);				\
 } while (0)
 
-#define	qr_after_insert(a_qrelm, a_qr, a_field) do {			\
+#define qr_after_insert(a_qrelm, a_qr, a_field) do {			\
 	(a_qr)->a_field.qre_next = (a_qrelm)->a_field.qre_next;		\
 	(a_qr)->a_field.qre_prev = (a_qrelm);				\
 	(a_qr)->a_field.qre_next->a_field.qre_prev = (a_qr);		\
 	(a_qrelm)->a_field.qre_next = (a_qr);				\
 } while (0)
 
-#define	qr_meld(a_qr_a, a_qr_b, a_type, a_field) do {			\
+#define qr_meld(a_qr_a, a_qr_b, a_type, a_field) do {			\
 	a_type *t;							\
 	(a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_b);	\
 	(a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_a);	\
@@ -45,10 +45,10 @@ struct {								\
  * qr_meld() and qr_split() are functionally equivalent, so there's no need to
  * have two copies of the code.
  */
-#define	qr_split(a_qr_a, a_qr_b, a_type, a_field)			\
+#define qr_split(a_qr_a, a_qr_b, a_type, a_field)			\
 	qr_meld((a_qr_a), (a_qr_b), a_type, a_field)
 
-#define	qr_remove(a_qr, a_field) do {					\
+#define qr_remove(a_qr, a_field) do {					\
 	(a_qr)->a_field.qre_prev->a_field.qre_next			\
 	    = (a_qr)->a_field.qre_next;					\
 	(a_qr)->a_field.qre_next->a_field.qre_prev			\
@@ -57,13 +57,13 @@ struct {								\
 	(a_qr)->a_field.qre_prev = (a_qr);				\
 } while (0)
 
-#define	qr_foreach(var, a_qr, a_field)					\
+#define qr_foreach(var, a_qr, a_field)					\
 	for ((var) = (a_qr);						\
 	    (var) != NULL;						\
 	    (var) = (((var)->a_field.qre_next != (a_qr))		\
 	    ? (var)->a_field.qre_next : NULL))
 
-#define	qr_reverse_foreach(var, a_qr, a_field)				\
+#define qr_reverse_foreach(var, a_qr, a_field)				\
 	for ((var) = ((a_qr) != NULL) ? qr_prev(a_qr, a_field) : NULL;	\
 	    (var) != NULL;						\
 	    (var) = (((var) != (a_qr))					\
diff --git a/include/jemalloc/internal/rb.h b/include/jemalloc/internal/rb.h
index 7018325f..aa76061e 100644
--- a/include/jemalloc/internal/rb.h
+++ b/include/jemalloc/internal/rb.h
@@ -20,17 +20,17 @@
  */
 
 #ifndef RB_H_
-#define	RB_H_
+#define RB_H_
 
 #ifdef RB_COMPACT
 /* Node structure. */
-#define	rb_node(a_type)							\
+#define rb_node(a_type)							\
 struct {								\
     a_type *rbn_left;							\
     a_type *rbn_right_red;						\
 }
 #else
-#define	rb_node(a_type)							\
+#define rb_node(a_type)							\
 struct {								\
     a_type *rbn_left;							\
     a_type *rbn_right;							\
@@ -39,48 +39,48 @@ struct {								\
 #endif
 
 /* Root structure. */
-#define	rb_tree(a_type)							\
+#define rb_tree(a_type)							\
 struct {								\
     a_type *rbt_root;							\
 }
 
 /* Left accessors. */
-#define	rbtn_left_get(a_type, a_field, a_node)				\
+#define rbtn_left_get(a_type, a_field, a_node)				\
     ((a_node)->a_field.rbn_left)
-#define	rbtn_left_set(a_type, a_field, a_node, a_left) do {		\
+#define rbtn_left_set(a_type, a_field, a_node, a_left) do {		\
     (a_node)->a_field.rbn_left = a_left;				\
 } while (0)
 
 #ifdef RB_COMPACT
 /* Right accessors. */
-#define	rbtn_right_get(a_type, a_field, a_node)				\
+#define rbtn_right_get(a_type, a_field, a_node)				\
     ((a_type *) (((intptr_t) (a_node)->a_field.rbn_right_red)		\
       & ((ssize_t)-2)))
-#define	rbtn_right_set(a_type, a_field, a_node, a_right) do {		\
+#define rbtn_right_set(a_type, a_field, a_node, a_right) do {		\
     (a_node)->a_field.rbn_right_red = (a_type *) (((uintptr_t) a_right)	\
       | (((uintptr_t) (a_node)->a_field.rbn_right_red) & ((size_t)1)));	\
 } while (0)
 
 /* Color accessors. */
-#define	rbtn_red_get(a_type, a_field, a_node)				\
+#define rbtn_red_get(a_type, a_field, a_node)				\
     ((bool) (((uintptr_t) (a_node)->a_field.rbn_right_red)		\
       & ((size_t)1)))
-#define	rbtn_color_set(a_type, a_field, a_node, a_red) do {		\
+#define rbtn_color_set(a_type, a_field, a_node, a_red) do {		\
     (a_node)->a_field.rbn_right_red = (a_type *) ((((intptr_t)		\
       (a_node)->a_field.rbn_right_red) & ((ssize_t)-2))			\
       | ((ssize_t)a_red));						\
 } while (0)
-#define	rbtn_red_set(a_type, a_field, a_node) do {			\
+#define rbtn_red_set(a_type, a_field, a_node) do {			\
     (a_node)->a_field.rbn_right_red = (a_type *) (((uintptr_t)		\
       (a_node)->a_field.rbn_right_red) | ((size_t)1));			\
 } while (0)
-#define	rbtn_black_set(a_type, a_field, a_node) do {			\
+#define rbtn_black_set(a_type, a_field, a_node) do {			\
     (a_node)->a_field.rbn_right_red = (a_type *) (((intptr_t)		\
       (a_node)->a_field.rbn_right_red) & ((ssize_t)-2));		\
 } while (0)
 
 /* Node initializer. */
-#define	rbt_node_new(a_type, a_field, a_rbt, a_node) do {		\
+#define rbt_node_new(a_type, a_field, a_rbt, a_node) do {		\
     /* Bookkeeping bit cannot be used by node pointer. */		\
     assert(((uintptr_t)(a_node) & 0x1) == 0);				\
     rbtn_left_set(a_type, a_field, (a_node), NULL);	\
@@ -89,27 +89,27 @@ struct {								\
 } while (0)
 #else
 /* Right accessors. */
-#define	rbtn_right_get(a_type, a_field, a_node)				\
+#define rbtn_right_get(a_type, a_field, a_node)				\
     ((a_node)->a_field.rbn_right)
-#define	rbtn_right_set(a_type, a_field, a_node, a_right) do {		\
+#define rbtn_right_set(a_type, a_field, a_node, a_right) do {		\
     (a_node)->a_field.rbn_right = a_right;				\
 } while (0)
 
 /* Color accessors. */
-#define	rbtn_red_get(a_type, a_field, a_node)				\
+#define rbtn_red_get(a_type, a_field, a_node)				\
     ((a_node)->a_field.rbn_red)
-#define	rbtn_color_set(a_type, a_field, a_node, a_red) do {		\
+#define rbtn_color_set(a_type, a_field, a_node, a_red) do {		\
     (a_node)->a_field.rbn_red = (a_red);				\
 } while (0)
-#define	rbtn_red_set(a_type, a_field, a_node) do {			\
+#define rbtn_red_set(a_type, a_field, a_node) do {			\
     (a_node)->a_field.rbn_red = true;					\
 } while (0)
-#define	rbtn_black_set(a_type, a_field, a_node) do {			\
+#define rbtn_black_set(a_type, a_field, a_node) do {			\
     (a_node)->a_field.rbn_red = false;					\
 } while (0)
 
 /* Node initializer. */
-#define	rbt_node_new(a_type, a_field, a_rbt, a_node) do {		\
+#define rbt_node_new(a_type, a_field, a_rbt, a_node) do {		\
     rbtn_left_set(a_type, a_field, (a_node), NULL);	\
     rbtn_right_set(a_type, a_field, (a_node), NULL);	\
     rbtn_red_set(a_type, a_field, (a_node));				\
@@ -117,12 +117,12 @@ struct {								\
 #endif
 
 /* Tree initializer. */
-#define	rb_new(a_type, a_field, a_rbt) do {				\
+#define rb_new(a_type, a_field, a_rbt) do {				\
     (a_rbt)->rbt_root = NULL;						\
 } while (0)
 
 /* Internal utility macros. */
-#define	rbtn_first(a_type, a_field, a_rbt, a_root, r_node) do {		\
+#define rbtn_first(a_type, a_field, a_rbt, a_root, r_node) do {		\
     (r_node) = (a_root);						\
     if ((r_node) != NULL) {						\
 	for (;								\
@@ -132,7 +132,7 @@ struct {								\
     }									\
 } while (0)
 
-#define	rbtn_last(a_type, a_field, a_rbt, a_root, r_node) do {		\
+#define rbtn_last(a_type, a_field, a_rbt, a_root, r_node) do {		\
     (r_node) = (a_root);						\
     if ((r_node) != NULL) {						\
 	for (; rbtn_right_get(a_type, a_field, (r_node)) != NULL;	\
@@ -141,14 +141,14 @@ struct {								\
     }									\
 } while (0)
 
-#define	rbtn_rotate_left(a_type, a_field, a_node, r_node) do {		\
+#define rbtn_rotate_left(a_type, a_field, a_node, r_node) do {		\
     (r_node) = rbtn_right_get(a_type, a_field, (a_node));		\
     rbtn_right_set(a_type, a_field, (a_node),				\
       rbtn_left_get(a_type, a_field, (r_node)));			\
     rbtn_left_set(a_type, a_field, (r_node), (a_node));			\
 } while (0)
 
-#define	rbtn_rotate_right(a_type, a_field, a_node, r_node) do {		\
+#define rbtn_rotate_right(a_type, a_field, a_node, r_node) do {		\
     (r_node) = rbtn_left_get(a_type, a_field, (a_node));		\
     rbtn_left_set(a_type, a_field, (a_node),				\
       rbtn_right_get(a_type, a_field, (r_node)));			\
@@ -160,7 +160,7 @@ struct {								\
  * functions generated by an equivalently parameterized call to rb_gen().
  */
 
-#define	rb_proto(a_attr, a_prefix, a_rbt_type, a_type)			\
+#define rb_proto(a_attr, a_prefix, a_rbt_type, a_type)			\
 a_attr void								\
 a_prefix##new(a_rbt_type *rbtree);					\
 a_attr bool								\
@@ -335,7 +335,7 @@ a_prefix##destroy(a_rbt_type *rbtree, void (*cb)(a_type *, void *),	\
  *               has begun.
  *         arg : Opaque pointer passed to cb().
  */
-#define	rb_gen(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp)	\
+#define rb_gen(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp)	\
 a_attr void								\
 a_prefix##new(a_rbt_type *rbtree) {					\
     rb_new(a_type, a_field, rbtree);					\
diff --git a/include/jemalloc/internal/rtree_inlines.h b/include/jemalloc/internal/rtree_inlines.h
index c9a06f64..795a88f7 100644
--- a/include/jemalloc/internal/rtree_inlines.h
+++ b/include/jemalloc/internal/rtree_inlines.h
@@ -225,9 +225,9 @@ rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 		    start_level, dependent);
 	}
 
-#define	RTREE_GET_BIAS	(RTREE_HEIGHT_MAX - rtree->height)
+#define RTREE_GET_BIAS	(RTREE_HEIGHT_MAX - rtree->height)
 	switch (start_level + RTREE_GET_BIAS) {
-#define	RTREE_GET_SUBTREE(level)					\
+#define RTREE_GET_SUBTREE(level)					\
 	case level:							\
 		assert(level < (RTREE_HEIGHT_MAX-1));			\
 		if (!dependent && unlikely(!rtree_node_valid(node))) {	\
@@ -246,7 +246,7 @@ rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 			    node;					\
 		}							\
 		/* Fall through. */
-#define	RTREE_GET_LEAF(level)						\
+#define RTREE_GET_LEAF(level)						\
 	case level:							\
 		assert(level == (RTREE_HEIGHT_MAX-1));			\
 		if (!dependent && unlikely(!rtree_node_valid(node))) {	\
diff --git a/include/jemalloc/internal/rtree_types.h b/include/jemalloc/internal/rtree_types.h
index c02ab7a1..122d5cef 100644
--- a/include/jemalloc/internal/rtree_types.h
+++ b/include/jemalloc/internal/rtree_types.h
@@ -19,13 +19,13 @@ typedef struct rtree_s rtree_t;
  * RTREE_BITS_PER_LEVEL must be a power of two that is no larger than the
  * machine address width.
  */
-#define	LG_RTREE_BITS_PER_LEVEL	4
-#define	RTREE_BITS_PER_LEVEL	(1U << LG_RTREE_BITS_PER_LEVEL)
+#define LG_RTREE_BITS_PER_LEVEL	4
+#define RTREE_BITS_PER_LEVEL	(1U << LG_RTREE_BITS_PER_LEVEL)
 /* Maximum rtree height. */
-#define	RTREE_HEIGHT_MAX						\
+#define RTREE_HEIGHT_MAX						\
     ((1U << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL)
 
-#define	RTREE_CTX_INITIALIZER	{					\
+#define RTREE_CTX_INITIALIZER	{					\
 	false,								\
 	0,								\
 	0,								\
@@ -38,15 +38,15 @@ typedef struct rtree_s rtree_t;
  * have a witness_t directly embedded, but that would dramatically bloat the
  * tree.  This must contain enough entries to e.g. coalesce two extents.
  */
-#define	RTREE_ELM_ACQUIRE_MAX	4
+#define RTREE_ELM_ACQUIRE_MAX	4
 
 /* Initializers for rtree_elm_witness_tsd_t. */
-#define	RTREE_ELM_WITNESS_INITIALIZER {					\
+#define RTREE_ELM_WITNESS_INITIALIZER {					\
 	NULL,								\
 	WITNESS_INITIALIZER("rtree_elm", WITNESS_RANK_RTREE_ELM)	\
 }
 
-#define	RTREE_ELM_WITNESS_TSD_INITIALIZER {				\
+#define RTREE_ELM_WITNESS_TSD_INITIALIZER {				\
 	{								\
 		RTREE_ELM_WITNESS_INITIALIZER,				\
 		RTREE_ELM_WITNESS_INITIALIZER,				\
diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh
index 3680b653..06892d8d 100755
--- a/include/jemalloc/internal/size_classes.sh
+++ b/include/jemalloc/internal/size_classes.sh
@@ -150,7 +150,7 @@ size_classes() {
   pow2 $((${lg_z} + 3)); ptr_bits=${pow2_result}
   pow2 ${lg_g}; g=${pow2_result}
 
-  echo "#define	SIZE_CLASSES \\"
+  echo "#define SIZE_CLASSES \\"
   echo "  /* index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup */ \\"
 
   ntbins=0
@@ -294,7 +294,7 @@ cat <<EOF
  *   LARGE_MAXCLASS: Maximum (large) size class.
  */
 
-#define	LG_SIZE_CLASS_GROUP	${lg_g}
+#define LG_SIZE_CLASS_GROUP	${lg_g}
 
 EOF
 
@@ -306,17 +306,17 @@ for lg_z in ${lg_zarr} ; do
       for lg_p in ${lg_parr} ; do
         echo "#if (LG_SIZEOF_PTR == ${lg_z} && LG_TINY_MIN == ${lg_t} && LG_QUANTUM == ${lg_q} && LG_PAGE == ${lg_p})"
         size_classes ${lg_z} ${lg_q} ${lg_t} ${lg_p} ${lg_g}
-        echo "#define	SIZE_CLASSES_DEFINED"
-        echo "#define	NTBINS			${ntbins}"
-        echo "#define	NLBINS			${nlbins}"
-        echo "#define	NBINS			${nbins}"
-        echo "#define	NSIZES			${nsizes}"
-        echo "#define	NPSIZES			${npsizes}"
-        echo "#define	LG_TINY_MAXCLASS	${lg_tiny_maxclass}"
-        echo "#define	LOOKUP_MAXCLASS		${lookup_maxclass}"
-        echo "#define	SMALL_MAXCLASS		${small_maxclass}"
-        echo "#define	LG_LARGE_MINCLASS	${lg_large_minclass}"
-        echo "#define	LARGE_MAXCLASS		${large_maxclass}"
+        echo "#define SIZE_CLASSES_DEFINED"
+        echo "#define NTBINS			${ntbins}"
+        echo "#define NLBINS			${nlbins}"
+        echo "#define NBINS			${nbins}"
+        echo "#define NSIZES			${nsizes}"
+        echo "#define NPSIZES			${npsizes}"
+        echo "#define LG_TINY_MAXCLASS	${lg_tiny_maxclass}"
+        echo "#define LOOKUP_MAXCLASS		${lookup_maxclass}"
+        echo "#define SMALL_MAXCLASS		${small_maxclass}"
+        echo "#define LG_LARGE_MINCLASS	${lg_large_minclass}"
+        echo "#define LARGE_MAXCLASS		${large_maxclass}"
         echo "#endif"
         echo
       done
diff --git a/include/jemalloc/internal/smoothstep.h b/include/jemalloc/internal/smoothstep.h
index dab53d9c..2e14430f 100644
--- a/include/jemalloc/internal/smoothstep.h
+++ b/include/jemalloc/internal/smoothstep.h
@@ -23,10 +23,10 @@
  *   smootheststep(x) = -20x  + 70x  - 84x  + 35x
  */
 
-#define	SMOOTHSTEP_VARIANT	"smoother"
-#define	SMOOTHSTEP_NSTEPS	200
-#define	SMOOTHSTEP_BFP		24
-#define	SMOOTHSTEP \
+#define SMOOTHSTEP_VARIANT	"smoother"
+#define SMOOTHSTEP_NSTEPS	200
+#define SMOOTHSTEP_BFP		24
+#define SMOOTHSTEP \
  /* STEP(step, h,                            x,     y) */ \
     STEP(   1, UINT64_C(0x0000000000000014), 0.005, 0.000001240643750) \
     STEP(   2, UINT64_C(0x00000000000000a5), 0.010, 0.000009850600000) \
diff --git a/include/jemalloc/internal/smoothstep.sh b/include/jemalloc/internal/smoothstep.sh
index 5d72e355..65de97bf 100755
--- a/include/jemalloc/internal/smoothstep.sh
+++ b/include/jemalloc/internal/smoothstep.sh
@@ -79,10 +79,10 @@ cat <<EOF
  *   smootheststep(x) = -20x  + 70x  - 84x  + 35x
  */
 
-#define	SMOOTHSTEP_VARIANT	"${variant}"
-#define	SMOOTHSTEP_NSTEPS	${nsteps}
-#define	SMOOTHSTEP_BFP		${bfp}
-#define	SMOOTHSTEP \\
+#define SMOOTHSTEP_VARIANT	"${variant}"
+#define SMOOTHSTEP_NSTEPS	${nsteps}
+#define SMOOTHSTEP_BFP		${bfp}
+#define SMOOTHSTEP \\
  /* STEP(step, h,                            x,     y) */ \\
 EOF
 
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index c6ac7670..2d396bf6 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -11,15 +11,15 @@ typedef struct tcaches_s tcaches_t;
  * used for two purposes: preventing thread caching on a per thread basis and
  * cleaning up during thread shutdown.
  */
-#define	TCACHE_STATE_DISABLED		((tcache_t *)(uintptr_t)1)
-#define	TCACHE_STATE_REINCARNATED	((tcache_t *)(uintptr_t)2)
-#define	TCACHE_STATE_PURGATORY		((tcache_t *)(uintptr_t)3)
-#define	TCACHE_STATE_MAX		TCACHE_STATE_PURGATORY
+#define TCACHE_STATE_DISABLED		((tcache_t *)(uintptr_t)1)
+#define TCACHE_STATE_REINCARNATED	((tcache_t *)(uintptr_t)2)
+#define TCACHE_STATE_PURGATORY		((tcache_t *)(uintptr_t)3)
+#define TCACHE_STATE_MAX		TCACHE_STATE_PURGATORY
 
 /*
  * Absolute minimum number of cache slots for each small bin.
  */
-#define	TCACHE_NSLOTS_SMALL_MIN		20
+#define TCACHE_NSLOTS_SMALL_MIN		20
 
 /*
  * Absolute maximum number of cache slots for each small bin in the thread
@@ -28,23 +28,23 @@ typedef struct tcaches_s tcaches_t;
  *
  * This constant must be an even number.
  */
-#define	TCACHE_NSLOTS_SMALL_MAX		200
+#define TCACHE_NSLOTS_SMALL_MAX		200
 
 /* Number of cache slots for large size classes. */
-#define	TCACHE_NSLOTS_LARGE		20
+#define TCACHE_NSLOTS_LARGE		20
 
 /* (1U << opt_lg_tcache_max) is used to compute tcache_maxclass. */
-#define	LG_TCACHE_MAXCLASS_DEFAULT	15
+#define LG_TCACHE_MAXCLASS_DEFAULT	15
 
 /*
  * TCACHE_GC_SWEEP is the approximate number of allocation events between
  * full GC sweeps.  Integer rounding may cause the actual number to be
  * slightly higher, since GC is performed incrementally.
  */
-#define	TCACHE_GC_SWEEP			8192
+#define TCACHE_GC_SWEEP			8192
 
 /* Number of tcache allocation/deallocation events between incremental GCs. */
-#define	TCACHE_GC_INCR							\
+#define TCACHE_GC_INCR							\
     ((TCACHE_GC_SWEEP / NBINS) + ((TCACHE_GC_SWEEP / NBINS == 0) ? 0 : 1))
 
 #endif /* JEMALLOC_INTERNAL_TCACHE_TYPES_H */
diff --git a/include/jemalloc/internal/tsd_inlines.h b/include/jemalloc/internal/tsd_inlines.h
index 4aafb8dc..3e5860ae 100644
--- a/include/jemalloc/internal/tsd_inlines.h
+++ b/include/jemalloc/internal/tsd_inlines.h
@@ -8,7 +8,7 @@ tsd_t	*tsd_fetch_impl(bool init);
 tsd_t	*tsd_fetch(void);
 tsdn_t	*tsd_tsdn(tsd_t *tsd);
 bool	tsd_nominal(tsd_t *tsd);
-#define	O(n, t, c)							\
+#define O(n, t, c)							\
 t	*tsd_##n##p_get(tsd_t *tsd);					\
 t	tsd_##n##_get(tsd_t *tsd);					\
 void	tsd_##n##_set(tsd_t *tsd, t n);
@@ -64,7 +64,7 @@ tsd_nominal(tsd_t *tsd) {
 	return (tsd->state == tsd_state_nominal);
 }
 
-#define	O(n, t, c)							\
+#define O(n, t, c)							\
 JEMALLOC_ALWAYS_INLINE t *						\
 tsd_##n##p_get(tsd_t *tsd) {						\
 	return &tsd->n;							\
diff --git a/include/jemalloc/internal/tsd_structs.h b/include/jemalloc/internal/tsd_structs.h
index 8d94c5be..ca013208 100644
--- a/include/jemalloc/internal/tsd_structs.h
+++ b/include/jemalloc/internal/tsd_structs.h
@@ -14,7 +14,7 @@ struct tsd_init_head_s {
 };
 #endif
 
-#define	MALLOC_TSD							\
+#define MALLOC_TSD							\
 /*  O(name,			type,			cleanup) */	\
     O(tcache,			tcache_t *,		yes)		\
     O(thread_allocated,		uint64_t,		no)		\
@@ -31,7 +31,7 @@ struct tsd_init_head_s {
     O(rtree_elm_witnesses,	rtree_elm_witness_tsd_t,no)		\
     O(witness_fork,		bool,			no)		\
 
-#define	TSD_INITIALIZER {						\
+#define TSD_INITIALIZER {						\
     tsd_state_uninitialized,						\
     NULL,								\
     0,									\
@@ -51,7 +51,7 @@ struct tsd_init_head_s {
 
 struct tsd_s {
 	tsd_state_t	state;
-#define	O(n, t, c)							\
+#define O(n, t, c)							\
 	t		n;
 MALLOC_TSD
 #undef O
diff --git a/include/jemalloc/internal/tsd_types.h b/include/jemalloc/internal/tsd_types.h
index a1dce928..195b6493 100644
--- a/include/jemalloc/internal/tsd_types.h
+++ b/include/jemalloc/internal/tsd_types.h
@@ -2,7 +2,7 @@
 #define JEMALLOC_INTERNAL_TSD_TYPES_H
 
 /* Maximum number of malloc_tsd users with cleanup functions. */
-#define	MALLOC_TSD_CLEANUPS_MAX	2
+#define MALLOC_TSD_CLEANUPS_MAX	2
 
 typedef bool (*malloc_tsd_cleanup_t)(void);
 
@@ -15,7 +15,7 @@ typedef struct tsd_init_head_s tsd_init_head_t;
 typedef struct tsd_s tsd_t;
 typedef struct tsdn_s tsdn_t;
 
-#define	TSDN_NULL	((tsdn_t *)0)
+#define TSDN_NULL	((tsdn_t *)0)
 
 typedef enum {
 	tsd_state_uninitialized,
@@ -77,17 +77,17 @@ typedef enum {
 
 /* malloc_tsd_types(). */
 #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
-#define	malloc_tsd_types(a_name, a_type)
+#define malloc_tsd_types(a_name, a_type)
 #elif (defined(JEMALLOC_TLS))
-#define	malloc_tsd_types(a_name, a_type)
+#define malloc_tsd_types(a_name, a_type)
 #elif (defined(_WIN32))
-#define	malloc_tsd_types(a_name, a_type)				\
+#define malloc_tsd_types(a_name, a_type)				\
 typedef struct {							\
 	bool	initialized;						\
 	a_type	val;							\
 } a_name##tsd_wrapper_t;
 #else
-#define	malloc_tsd_types(a_name, a_type)				\
+#define malloc_tsd_types(a_name, a_type)				\
 typedef struct {							\
 	bool	initialized;						\
 	a_type	val;							\
@@ -95,7 +95,7 @@ typedef struct {							\
 #endif
 
 /* malloc_tsd_protos(). */
-#define	malloc_tsd_protos(a_attr, a_name, a_type)			\
+#define malloc_tsd_protos(a_attr, a_name, a_type)			\
 a_attr bool								\
 a_name##tsd_boot0(void);						\
 a_attr void								\
@@ -111,22 +111,22 @@ a_name##tsd_set(a_type *val);
 
 /* malloc_tsd_externs(). */
 #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
-#define	malloc_tsd_externs(a_name, a_type)				\
+#define malloc_tsd_externs(a_name, a_type)				\
 extern __thread a_type	a_name##tsd_tls;				\
 extern __thread bool	a_name##tsd_initialized;			\
 extern bool		a_name##tsd_booted;
 #elif (defined(JEMALLOC_TLS))
-#define	malloc_tsd_externs(a_name, a_type)				\
+#define malloc_tsd_externs(a_name, a_type)				\
 extern __thread a_type	a_name##tsd_tls;				\
 extern pthread_key_t	a_name##tsd_tsd;				\
 extern bool		a_name##tsd_booted;
 #elif (defined(_WIN32))
-#define	malloc_tsd_externs(a_name, a_type)				\
+#define malloc_tsd_externs(a_name, a_type)				\
 extern DWORD		a_name##tsd_tsd;				\
 extern a_name##tsd_wrapper_t	a_name##tsd_boot_wrapper;		\
 extern bool		a_name##tsd_booted;
 #else
-#define	malloc_tsd_externs(a_name, a_type)				\
+#define malloc_tsd_externs(a_name, a_type)				\
 extern pthread_key_t	a_name##tsd_tsd;				\
 extern tsd_init_head_t	a_name##tsd_init_head;				\
 extern a_name##tsd_wrapper_t	a_name##tsd_boot_wrapper;		\
@@ -135,20 +135,20 @@ extern bool		a_name##tsd_booted;
 
 /* malloc_tsd_data(). */
 #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
-#define	malloc_tsd_data(a_attr, a_name, a_type, a_initializer)		\
+#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer)		\
 a_attr __thread a_type JEMALLOC_TLS_MODEL				\
     a_name##tsd_tls = a_initializer;					\
 a_attr __thread bool JEMALLOC_TLS_MODEL					\
     a_name##tsd_initialized = false;					\
 a_attr bool		a_name##tsd_booted = false;
 #elif (defined(JEMALLOC_TLS))
-#define	malloc_tsd_data(a_attr, a_name, a_type, a_initializer)		\
+#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer)		\
 a_attr __thread a_type JEMALLOC_TLS_MODEL				\
     a_name##tsd_tls = a_initializer;					\
 a_attr pthread_key_t	a_name##tsd_tsd;				\
 a_attr bool		a_name##tsd_booted = false;
 #elif (defined(_WIN32))
-#define	malloc_tsd_data(a_attr, a_name, a_type, a_initializer)		\
+#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer)		\
 a_attr DWORD		a_name##tsd_tsd;				\
 a_attr a_name##tsd_wrapper_t a_name##tsd_boot_wrapper = {		\
 	false,								\
@@ -156,7 +156,7 @@ a_attr a_name##tsd_wrapper_t a_name##tsd_boot_wrapper = {		\
 };									\
 a_attr bool		a_name##tsd_booted = false;
 #else
-#define	malloc_tsd_data(a_attr, a_name, a_type, a_initializer)		\
+#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer)		\
 a_attr pthread_key_t	a_name##tsd_tsd;				\
 a_attr tsd_init_head_t	a_name##tsd_init_head = {			\
 	ql_head_initializer(blocks),					\
@@ -171,7 +171,7 @@ a_attr bool		a_name##tsd_booted = false;
 
 /* malloc_tsd_funcs(). */
 #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
-#define	malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer,		\
+#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer,		\
     a_cleanup)								\
 /* Initialization/cleanup. */						\
 a_attr bool								\
@@ -224,7 +224,7 @@ a_name##tsd_set(a_type *val) {						\
 	}								\
 }
 #elif (defined(JEMALLOC_TLS))
-#define	malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer,		\
+#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer,		\
     a_cleanup)								\
 /* Initialization/cleanup. */						\
 a_attr bool								\
@@ -278,7 +278,7 @@ a_name##tsd_set(a_type *val) {						\
 	}								\
 }
 #elif (defined(_WIN32))
-#define	malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer,		\
+#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer,		\
     a_cleanup)								\
 /* Initialization/cleanup. */						\
 a_attr bool								\
@@ -403,7 +403,7 @@ a_name##tsd_set(a_type *val) {						\
 	}								\
 }
 #else
-#define	malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer,		\
+#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer,		\
     a_cleanup)								\
 /* Initialization/cleanup. */						\
 a_attr void								\
diff --git a/include/jemalloc/internal/util_types.h b/include/jemalloc/internal/util_types.h
index 4fe206bc..e0f79aad 100644
--- a/include/jemalloc/internal/util_types.h
+++ b/include/jemalloc/internal/util_types.h
@@ -32,13 +32,13 @@
 #endif
 
 /* Size of stack-allocated buffer passed to buferror(). */
-#define	BUFERROR_BUF		64
+#define BUFERROR_BUF		64
 
 /*
  * Size of stack-allocated buffer used by malloc_{,v,vc}printf().  This must be
  * large enough for all possible uses within jemalloc.
  */
-#define	MALLOC_PRINTF_BUFSIZE	4096
+#define MALLOC_PRINTF_BUFSIZE	4096
 
 /* Junk fill patterns. */
 #ifndef JEMALLOC_ALLOC_JUNK
@@ -52,11 +52,11 @@
  * Wrap a cpp argument that contains commas such that it isn't broken up into
  * multiple arguments.
  */
-#define	JEMALLOC_ARG_CONCAT(...) __VA_ARGS__
+#define JEMALLOC_ARG_CONCAT(...) __VA_ARGS__
 
 /* cpp macro definition stringification. */
-#define	STRINGIFY_HELPER(x) #x
-#define	STRINGIFY(x) STRINGIFY_HELPER(x)
+#define STRINGIFY_HELPER(x) #x
+#define STRINGIFY(x) STRINGIFY_HELPER(x)
 
 /*
  * Silence compiler warnings due to uninitialized values.  This is used
@@ -86,7 +86,7 @@
 #include "jemalloc/internal/assert.h"
 
 /* Use to assert a particular configuration, e.g., cassert(config_debug). */
-#define	cassert(c) do {							\
+#define cassert(c) do {							\
 	if (unlikely(!(c))) {						\
 		not_reached();						\
 	}								\
diff --git a/include/jemalloc/internal/witness_types.h b/include/jemalloc/internal/witness_types.h
index ef962824..c2a73f2e 100644
--- a/include/jemalloc/internal/witness_types.h
+++ b/include/jemalloc/internal/witness_types.h
@@ -11,36 +11,36 @@ typedef int witness_comp_t (const witness_t *, void *, const witness_t *,
  * Lock ranks.  Witnesses with rank WITNESS_RANK_OMIT are completely ignored by
  * the witness machinery.
  */
-#define	WITNESS_RANK_OMIT		0U
+#define WITNESS_RANK_OMIT		0U
 
-#define	WITNESS_RANK_INIT		1U
-#define	WITNESS_RANK_CTL		1U
-#define	WITNESS_RANK_ARENAS		2U
+#define WITNESS_RANK_INIT		1U
+#define WITNESS_RANK_CTL		1U
+#define WITNESS_RANK_ARENAS		2U
 
-#define	WITNESS_RANK_PROF_DUMP		3U
-#define	WITNESS_RANK_PROF_BT2GCTX	4U
-#define	WITNESS_RANK_PROF_TDATAS	5U
-#define	WITNESS_RANK_PROF_TDATA		6U
-#define	WITNESS_RANK_PROF_GCTX		7U
+#define WITNESS_RANK_PROF_DUMP		3U
+#define WITNESS_RANK_PROF_BT2GCTX	4U
+#define WITNESS_RANK_PROF_TDATAS	5U
+#define WITNESS_RANK_PROF_TDATA		6U
+#define WITNESS_RANK_PROF_GCTX		7U
 
-#define	WITNESS_RANK_ARENA		8U
-#define	WITNESS_RANK_ARENA_EXTENTS	9U
-#define	WITNESS_RANK_ARENA_EXTENT_CACHE	10
+#define WITNESS_RANK_ARENA		8U
+#define WITNESS_RANK_ARENA_EXTENTS	9U
+#define WITNESS_RANK_ARENA_EXTENT_CACHE	10
 
-#define	WITNESS_RANK_RTREE_ELM		11U
-#define	WITNESS_RANK_RTREE		12U
-#define	WITNESS_RANK_BASE		13U
+#define WITNESS_RANK_RTREE_ELM		11U
+#define WITNESS_RANK_RTREE		12U
+#define WITNESS_RANK_BASE		13U
 
-#define	WITNESS_RANK_LEAF		0xffffffffU
-#define	WITNESS_RANK_ARENA_BIN		WITNESS_RANK_LEAF
-#define	WITNESS_RANK_ARENA_LARGE	WITNESS_RANK_LEAF
-#define	WITNESS_RANK_DSS		WITNESS_RANK_LEAF
-#define	WITNESS_RANK_PROF_ACTIVE	WITNESS_RANK_LEAF
-#define	WITNESS_RANK_PROF_DUMP_SEQ	WITNESS_RANK_LEAF
-#define	WITNESS_RANK_PROF_GDUMP		WITNESS_RANK_LEAF
-#define	WITNESS_RANK_PROF_NEXT_THR_UID	WITNESS_RANK_LEAF
-#define	WITNESS_RANK_PROF_THREAD_ACTIVE_INIT	WITNESS_RANK_LEAF
+#define WITNESS_RANK_LEAF		0xffffffffU
+#define WITNESS_RANK_ARENA_BIN		WITNESS_RANK_LEAF
+#define WITNESS_RANK_ARENA_LARGE	WITNESS_RANK_LEAF
+#define WITNESS_RANK_DSS		WITNESS_RANK_LEAF
+#define WITNESS_RANK_PROF_ACTIVE	WITNESS_RANK_LEAF
+#define WITNESS_RANK_PROF_DUMP_SEQ	WITNESS_RANK_LEAF
+#define WITNESS_RANK_PROF_GDUMP		WITNESS_RANK_LEAF
+#define WITNESS_RANK_PROF_NEXT_THR_UID	WITNESS_RANK_LEAF
+#define WITNESS_RANK_PROF_THREAD_ACTIVE_INIT	WITNESS_RANK_LEAF
 
-#define	WITNESS_INITIALIZER(name, rank) {name, rank, NULL, NULL, {NULL, NULL}}
+#define WITNESS_INITIALIZER(name, rank) {name, rank, NULL, NULL, {NULL, NULL}}
 
 #endif /* JEMALLOC_INTERNAL_WITNESS_TYPES_H */
diff --git a/include/jemalloc/jemalloc.sh b/include/jemalloc/jemalloc.sh
index c085814f..b19b1548 100755
--- a/include/jemalloc/jemalloc.sh
+++ b/include/jemalloc/jemalloc.sh
@@ -4,7 +4,7 @@ objroot=$1
 
 cat <<EOF
 #ifndef JEMALLOC_H_
-#define	JEMALLOC_H_
+#define JEMALLOC_H_
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -15,7 +15,6 @@ for hdr in jemalloc_defs.h jemalloc_rename.h jemalloc_macros.h \
            jemalloc_protos.h jemalloc_typedefs.h jemalloc_mangle.h ; do
   cat "${objroot}include/jemalloc/${hdr}" \
       | grep -v 'Generated from .* by configure\.' \
-      | sed -e 's/^#define /#define	/g' \
       | sed -e 's/ $//g'
   echo
 done
diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in
index 05bcdd7b..aee55438 100644
--- a/include/jemalloc/jemalloc_macros.h.in
+++ b/include/jemalloc/jemalloc_macros.h.in
@@ -4,14 +4,14 @@
 #include <limits.h>
 #include <strings.h>
 
-#define	JEMALLOC_VERSION "@jemalloc_version@"
-#define	JEMALLOC_VERSION_MAJOR @jemalloc_version_major@
-#define	JEMALLOC_VERSION_MINOR @jemalloc_version_minor@
-#define	JEMALLOC_VERSION_BUGFIX @jemalloc_version_bugfix@
-#define	JEMALLOC_VERSION_NREV @jemalloc_version_nrev@
-#define	JEMALLOC_VERSION_GID "@jemalloc_version_gid@"
+#define JEMALLOC_VERSION "@jemalloc_version@"
+#define JEMALLOC_VERSION_MAJOR @jemalloc_version_major@
+#define JEMALLOC_VERSION_MINOR @jemalloc_version_minor@
+#define JEMALLOC_VERSION_BUGFIX @jemalloc_version_bugfix@
+#define JEMALLOC_VERSION_NREV @jemalloc_version_nrev@
+#define JEMALLOC_VERSION_GID "@jemalloc_version_gid@"
 
-#define	MALLOCX_LG_ALIGN(la)	((int)(la))
+#define MALLOCX_LG_ALIGN(la)	((int)(la))
 #if LG_SIZEOF_PTR == 2
 #  define MALLOCX_ALIGN(a)	((int)(ffs((int)(a))-1))
 #else
@@ -19,17 +19,17 @@
      ((int)(((size_t)(a) < (size_t)INT_MAX) ? ffs((int)(a))-1 :	\
      ffs((int)(((size_t)(a))>>32))+31))
 #endif
-#define	MALLOCX_ZERO	((int)0x40)
+#define MALLOCX_ZERO	((int)0x40)
 /*
  * Bias tcache index bits so that 0 encodes "automatic tcache management", and 1
  * encodes MALLOCX_TCACHE_NONE.
  */
-#define	MALLOCX_TCACHE(tc)	((int)(((tc)+2) << 8))
-#define	MALLOCX_TCACHE_NONE	MALLOCX_TCACHE(-1)
+#define MALLOCX_TCACHE(tc)	((int)(((tc)+2) << 8))
+#define MALLOCX_TCACHE_NONE	MALLOCX_TCACHE(-1)
 /*
  * Bias arena index bits so that 0 encodes "use an automatically chosen arena".
  */
-#define	MALLOCX_ARENA(a)	((((int)(a))+1) << 20)
+#define MALLOCX_ARENA(a)	((((int)(a))+1) << 20)
 
 /*
  * Use as arena index in "arena.<i>.{purge,decay,dss}" and
@@ -43,12 +43,12 @@
  *   mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".purge", NULL, NULL, NULL,
  *       0);
  */
-#define	MALLCTL_ARENAS_ALL	4096
+#define MALLCTL_ARENAS_ALL	4096
 /*
  * Use as arena index in "stats.arenas.<i>.*" mallctl interfaces to select
  * destroyed arenas.
  */
-#define	MALLCTL_ARENAS_DESTROYED	4097
+#define MALLCTL_ARENAS_DESTROYED	4097
 
 #if defined(__cplusplus) && defined(JEMALLOC_USE_CXX_THROW)
 #  define JEMALLOC_CXX_THROW throw()
diff --git a/include/msvc_compat/windows_extra.h b/include/msvc_compat/windows_extra.h
index 3008faa3..a6ebb930 100644
--- a/include/msvc_compat/windows_extra.h
+++ b/include/msvc_compat/windows_extra.h
@@ -1,5 +1,5 @@
 #ifndef MSVC_COMPAT_WINDOWS_EXTRA_H
-#define	MSVC_COMPAT_WINDOWS_EXTRA_H
+#define MSVC_COMPAT_WINDOWS_EXTRA_H
 
 #include <errno.h>
 
diff --git a/src/arena.c b/src/arena.c
index fe4b5de2..b0da9a03 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1,4 +1,4 @@
-#define	JEMALLOC_ARENA_C_
+#define JEMALLOC_ARENA_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 
 /******************************************************************************/
@@ -8,10 +8,10 @@ ssize_t		opt_decay_time = DECAY_TIME_DEFAULT;
 static ssize_t	decay_time_default;
 
 const arena_bin_info_t	arena_bin_info[NBINS] = {
-#define	BIN_INFO_bin_yes(reg_size, slab_size, nregs)			\
+#define BIN_INFO_bin_yes(reg_size, slab_size, nregs)			\
 	{reg_size, slab_size, nregs, BITMAP_INFO_INITIALIZER(nregs)},
-#define	BIN_INFO_bin_no(reg_size, slab_size, nregs)
-#define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs,		\
+#define BIN_INFO_bin_no(reg_size, slab_size, nregs)
+#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs,		\
     lg_delta_lookup)							\
 	BIN_INFO_bin_##bin((1U<<lg_grp) + (ndelta<<lg_delta),		\
 	    (pgs << LG_PAGE), (pgs << LG_PAGE) / ((1U<<lg_grp) +	\
@@ -142,13 +142,13 @@ arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr) {
 	/* Avoid doing division with a variable divisor. */
 	diff = (size_t)((uintptr_t)ptr - (uintptr_t)extent_addr_get(slab));
 	switch (binind) {
-#define	REGIND_bin_yes(index, reg_size)					\
+#define REGIND_bin_yes(index, reg_size)					\
 	case index:							\
 		regind = diff / (reg_size);				\
 		assert(diff == regind * (reg_size));			\
 		break;
-#define	REGIND_bin_no(index, reg_size)
-#define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs,		\
+#define REGIND_bin_no(index, reg_size)
+#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs,		\
     lg_delta_lookup)							\
 	REGIND_bin_##bin(index, (1U<<lg_grp) + (ndelta<<lg_delta))
 	SIZE_CLASSES
@@ -389,7 +389,7 @@ arena_decay_deadline_reached(const arena_t *arena, const nstime_t *time) {
 static size_t
 arena_decay_backlog_npages_limit(const arena_t *arena) {
 	static const uint64_t h_steps[] = {
-#define	STEP(step, h, x, y) \
+#define STEP(step, h, x, y) \
 		h,
 		SMOOTHSTEP
 #undef STEP
@@ -1138,7 +1138,7 @@ arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info, bool zero) {
 
 #ifdef JEMALLOC_JET
 #undef arena_dalloc_junk_small
-#define	arena_dalloc_junk_small JEMALLOC_N(n_arena_dalloc_junk_small)
+#define arena_dalloc_junk_small JEMALLOC_N(n_arena_dalloc_junk_small)
 #endif
 void
 arena_dalloc_junk_small(void *ptr, const arena_bin_info_t *bin_info) {
@@ -1146,7 +1146,7 @@ arena_dalloc_junk_small(void *ptr, const arena_bin_info_t *bin_info) {
 }
 #ifdef JEMALLOC_JET
 #undef arena_dalloc_junk_small
-#define	arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small)
+#define arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small)
 arena_dalloc_junk_small_t *arena_dalloc_junk_small =
     JEMALLOC_N(n_arena_dalloc_junk_small);
 #endif
diff --git a/src/atomic.c b/src/atomic.c
index 77ee3131..9871390d 100644
--- a/src/atomic.c
+++ b/src/atomic.c
@@ -1,2 +1,2 @@
-#define	JEMALLOC_ATOMIC_C_
+#define JEMALLOC_ATOMIC_C_
 #include "jemalloc/internal/jemalloc_internal.h"
diff --git a/src/base.c b/src/base.c
index 886a6bde..9fb1f14f 100644
--- a/src/base.c
+++ b/src/base.c
@@ -1,4 +1,4 @@
-#define	JEMALLOC_BASE_C_
+#define JEMALLOC_BASE_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 
 /******************************************************************************/
diff --git a/src/bitmap.c b/src/bitmap.c
index a9d48685..17efb73c 100644
--- a/src/bitmap.c
+++ b/src/bitmap.c
@@ -1,4 +1,4 @@
-#define	JEMALLOC_BITMAP_C_
+#define JEMALLOC_BITMAP_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 
 /******************************************************************************/
diff --git a/src/ckh.c b/src/ckh.c
index 7a652185..31d1ac21 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -34,7 +34,7 @@
  * respectively.
  *
  ******************************************************************************/
-#define	JEMALLOC_CKH_C_
+#define JEMALLOC_CKH_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 
 /******************************************************************************/
diff --git a/src/ctl.c b/src/ctl.c
index 232fbd71..64b74263 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1,4 +1,4 @@
-#define	JEMALLOC_CTL_C_
+#define JEMALLOC_CTL_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 
 /******************************************************************************/
@@ -36,11 +36,11 @@ ctl_indexed_node(const ctl_node_t *node) {
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
-#define	CTL_PROTO(n)							\
+#define CTL_PROTO(n)							\
 static int	n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,	\
     void *oldp, size_t *oldlenp, void *newp, size_t newlen);
 
-#define	INDEX_PROTO(n)							\
+#define INDEX_PROTO(n)							\
 static const ctl_named_node_t	*n##_index(tsdn_t *tsdn,		\
     const size_t *mib, size_t miblen, size_t i);
 
@@ -173,20 +173,20 @@ CTL_PROTO(stats_retained)
 /* mallctl tree. */
 
 /* Maximum tree depth. */
-#define	CTL_MAX_DEPTH	6
+#define CTL_MAX_DEPTH	6
 
-#define	NAME(n)	{true},	n
-#define	CHILD(t, c)							\
+#define NAME(n)	{true},	n
+#define CHILD(t, c)							\
 	sizeof(c##_node) / sizeof(ctl_##t##_node_t),			\
 	(ctl_node_t *)c##_node,						\
 	NULL
-#define	CTL(c)	0, NULL, c##_ctl
+#define CTL(c)	0, NULL, c##_ctl
 
 /*
  * Only handles internal indexed nodes, since there are currently no external
  * ones.
  */
-#define	INDEX(i)	{false},	i##_index
+#define INDEX(i)	{false},	i##_index
 
 static const ctl_named_node_t	thread_tcache_node[] = {
 	{NAME("enabled"),	CTL(thread_tcache_enabled)},
@@ -1045,21 +1045,21 @@ ctl_postfork_child(tsdn_t *tsdn) {
 /******************************************************************************/
 /* *_ctl() functions. */
 
-#define	READONLY()	do {						\
+#define READONLY()	do {						\
 	if (newp != NULL || newlen != 0) {				\
 		ret = EPERM;						\
 		goto label_return;					\
 	}								\
 } while (0)
 
-#define	WRITEONLY()	do {						\
+#define WRITEONLY()	do {						\
 	if (oldp != NULL || oldlenp != NULL) {				\
 		ret = EPERM;						\
 		goto label_return;					\
 	}								\
 } while (0)
 
-#define	READ_XOR_WRITE()	do {					\
+#define READ_XOR_WRITE()	do {					\
 	if ((oldp != NULL && oldlenp != NULL) && (newp != NULL ||	\
 	    newlen != 0)) {						\
 		ret = EPERM;						\
@@ -1067,7 +1067,7 @@ ctl_postfork_child(tsdn_t *tsdn) {
 	}								\
 } while (0)
 
-#define	READ(v, t)	do {						\
+#define READ(v, t)	do {						\
 	if (oldp != NULL && oldlenp != NULL) {				\
 		if (*oldlenp != sizeof(t)) {				\
 			size_t	copylen = (sizeof(t) <= *oldlenp)	\
@@ -1080,7 +1080,7 @@ ctl_postfork_child(tsdn_t *tsdn) {
 	}								\
 } while (0)
 
-#define	WRITE(v, t)	do {						\
+#define WRITE(v, t)	do {						\
 	if (newp != NULL) {						\
 		if (newlen != sizeof(t)) {				\
 			ret = EINVAL;					\
@@ -1090,7 +1090,7 @@ ctl_postfork_child(tsdn_t *tsdn) {
 	}								\
 } while (0)
 
-#define	MIB_UNSIGNED(v, i) do {						\
+#define MIB_UNSIGNED(v, i) do {						\
 	if (mib[i] > UINT_MAX) {					\
 		ret = EFAULT;						\
 		goto label_return;					\
@@ -1102,7 +1102,7 @@ ctl_postfork_child(tsdn_t *tsdn) {
  * There's a lot of code duplication in the following macros due to limitations
  * in how nested cpp macros are expanded.
  */
-#define	CTL_RO_CLGEN(c, l, n, v, t)					\
+#define CTL_RO_CLGEN(c, l, n, v, t)					\
 static int								\
 n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
     size_t *oldlenp, void *newp, size_t newlen) {			\
@@ -1127,7 +1127,7 @@ label_return:								\
 	return ret;							\
 }
 
-#define	CTL_RO_CGEN(c, n, v, t)						\
+#define CTL_RO_CGEN(c, n, v, t)						\
 static int								\
 n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
     size_t *oldlenp, void *newp, size_t newlen) {			\
@@ -1148,7 +1148,7 @@ label_return:								\
 	return ret;							\
 }
 
-#define	CTL_RO_GEN(n, v, t)						\
+#define CTL_RO_GEN(n, v, t)						\
 static int								\
 n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
     size_t *oldlenp, void *newp, size_t newlen) {			\
@@ -1170,7 +1170,7 @@ label_return:								\
  * ctl_mtx is not acquired, under the assumption that no pertinent data will
  * mutate during the call.
  */
-#define	CTL_RO_NL_CGEN(c, n, v, t)					\
+#define CTL_RO_NL_CGEN(c, n, v, t)					\
 static int								\
 n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
     size_t *oldlenp, void *newp, size_t newlen) {			\
@@ -1189,7 +1189,7 @@ label_return:								\
 	return ret;							\
 }
 
-#define	CTL_RO_NL_GEN(n, v, t)						\
+#define CTL_RO_NL_GEN(n, v, t)						\
 static int								\
 n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
     size_t *oldlenp, void *newp, size_t newlen) {			\
@@ -1205,7 +1205,7 @@ label_return:								\
 	return ret;							\
 }
 
-#define	CTL_TSD_RO_NL_CGEN(c, n, m, t)					\
+#define CTL_TSD_RO_NL_CGEN(c, n, m, t)					\
 static int								\
 n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
     size_t *oldlenp, void *newp, size_t newlen) {			\
@@ -1224,7 +1224,7 @@ label_return:								\
 	return ret;							\
 }
 
-#define	CTL_RO_CONFIG_GEN(n, t)						\
+#define CTL_RO_CONFIG_GEN(n, t)						\
 static int								\
 n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
     size_t *oldlenp, void *newp, size_t newlen) {			\
diff --git a/src/extent.c b/src/extent.c
index bcdaccf5..0dbde72a 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1,4 +1,4 @@
-#define	JEMALLOC_EXTENT_C_
+#define JEMALLOC_EXTENT_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 
 /******************************************************************************/
@@ -118,7 +118,7 @@ extent_hooks_assure_initialized(arena_t *arena,
 
 #ifdef JEMALLOC_JET
 #undef extent_size_quantize_floor
-#define	extent_size_quantize_floor JEMALLOC_N(n_extent_size_quantize_floor)
+#define extent_size_quantize_floor JEMALLOC_N(n_extent_size_quantize_floor)
 #endif
 size_t
 extent_size_quantize_floor(size_t size) {
@@ -147,14 +147,14 @@ extent_size_quantize_floor(size_t size) {
 }
 #ifdef JEMALLOC_JET
 #undef extent_size_quantize_floor
-#define	extent_size_quantize_floor JEMALLOC_N(extent_size_quantize_floor)
+#define extent_size_quantize_floor JEMALLOC_N(extent_size_quantize_floor)
 extent_size_quantize_t *extent_size_quantize_floor =
     JEMALLOC_N(n_extent_size_quantize_floor);
 #endif
 
 #ifdef JEMALLOC_JET
 #undef extent_size_quantize_ceil
-#define	extent_size_quantize_ceil JEMALLOC_N(n_extent_size_quantize_ceil)
+#define extent_size_quantize_ceil JEMALLOC_N(n_extent_size_quantize_ceil)
 #endif
 size_t
 extent_size_quantize_ceil(size_t size) {
@@ -180,7 +180,7 @@ extent_size_quantize_ceil(size_t size) {
 }
 #ifdef JEMALLOC_JET
 #undef extent_size_quantize_ceil
-#define	extent_size_quantize_ceil JEMALLOC_N(extent_size_quantize_ceil)
+#define extent_size_quantize_ceil JEMALLOC_N(extent_size_quantize_ceil)
 extent_size_quantize_t *extent_size_quantize_ceil =
     JEMALLOC_N(n_extent_size_quantize_ceil);
 #endif
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 93bd6fba..ed4140e7 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -1,4 +1,4 @@
-#define	JEMALLOC_EXTENT_DSS_C_
+#define JEMALLOC_EXTENT_DSS_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 /******************************************************************************/
 /* Data. */
diff --git a/src/extent_mmap.c b/src/extent_mmap.c
index 495d9beb..7265159a 100644
--- a/src/extent_mmap.c
+++ b/src/extent_mmap.c
@@ -1,4 +1,4 @@
-#define	JEMALLOC_EXTENT_MMAP_C_
+#define JEMALLOC_EXTENT_MMAP_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 
 /******************************************************************************/
diff --git a/src/hash.c b/src/hash.c
index cfa4da02..ffd4f2be 100644
--- a/src/hash.c
+++ b/src/hash.c
@@ -1,2 +1,2 @@
-#define	JEMALLOC_HASH_C_
+#define JEMALLOC_HASH_C_
 #include "jemalloc/internal/jemalloc_internal.h"
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 67b430f4..a9a74973 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1,4 +1,4 @@
-#define	JEMALLOC_C_
+#define JEMALLOC_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 
 /******************************************************************************/
@@ -84,10 +84,10 @@ static uint8_t	malloc_slow_flags;
 
 JEMALLOC_ALIGNED(CACHELINE)
 const size_t	pind2sz_tab[NPSIZES+1] = {
-#define	PSZ_yes(lg_grp, ndelta, lg_delta)				\
+#define PSZ_yes(lg_grp, ndelta, lg_delta)				\
 	(((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta))),
-#define	PSZ_no(lg_grp, ndelta, lg_delta)
-#define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup) \
+#define PSZ_no(lg_grp, ndelta, lg_delta)
+#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup) \
 	PSZ_##psz(lg_grp, ndelta, lg_delta)
 	SIZE_CLASSES
 #undef PSZ_yes
@@ -98,7 +98,7 @@ const size_t	pind2sz_tab[NPSIZES+1] = {
 
 JEMALLOC_ALIGNED(CACHELINE)
 const size_t	index2size_tab[NSIZES] = {
-#define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup) \
+#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup) \
 	((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta)),
 	SIZE_CLASSES
 #undef SC
@@ -108,69 +108,69 @@ JEMALLOC_ALIGNED(CACHELINE)
 const uint8_t	size2index_tab[] = {
 #if LG_TINY_MIN == 0
 #warning "Dangerous LG_TINY_MIN"
-#define	S2B_0(i)	i,
+#define S2B_0(i)	i,
 #elif LG_TINY_MIN == 1
 #warning "Dangerous LG_TINY_MIN"
-#define	S2B_1(i)	i,
+#define S2B_1(i)	i,
 #elif LG_TINY_MIN == 2
 #warning "Dangerous LG_TINY_MIN"
-#define	S2B_2(i)	i,
+#define S2B_2(i)	i,
 #elif LG_TINY_MIN == 3
-#define	S2B_3(i)	i,
+#define S2B_3(i)	i,
 #elif LG_TINY_MIN == 4
-#define	S2B_4(i)	i,
+#define S2B_4(i)	i,
 #elif LG_TINY_MIN == 5
-#define	S2B_5(i)	i,
+#define S2B_5(i)	i,
 #elif LG_TINY_MIN == 6
-#define	S2B_6(i)	i,
+#define S2B_6(i)	i,
 #elif LG_TINY_MIN == 7
-#define	S2B_7(i)	i,
+#define S2B_7(i)	i,
 #elif LG_TINY_MIN == 8
-#define	S2B_8(i)	i,
+#define S2B_8(i)	i,
 #elif LG_TINY_MIN == 9
-#define	S2B_9(i)	i,
+#define S2B_9(i)	i,
 #elif LG_TINY_MIN == 10
-#define	S2B_10(i)	i,
+#define S2B_10(i)	i,
 #elif LG_TINY_MIN == 11
-#define	S2B_11(i)	i,
+#define S2B_11(i)	i,
 #else
 #error "Unsupported LG_TINY_MIN"
 #endif
 #if LG_TINY_MIN < 1
-#define	S2B_1(i)	S2B_0(i) S2B_0(i)
+#define S2B_1(i)	S2B_0(i) S2B_0(i)
 #endif
 #if LG_TINY_MIN < 2
-#define	S2B_2(i)	S2B_1(i) S2B_1(i)
+#define S2B_2(i)	S2B_1(i) S2B_1(i)
 #endif
 #if LG_TINY_MIN < 3
-#define	S2B_3(i)	S2B_2(i) S2B_2(i)
+#define S2B_3(i)	S2B_2(i) S2B_2(i)
 #endif
 #if LG_TINY_MIN < 4
-#define	S2B_4(i)	S2B_3(i) S2B_3(i)
+#define S2B_4(i)	S2B_3(i) S2B_3(i)
 #endif
 #if LG_TINY_MIN < 5
-#define	S2B_5(i)	S2B_4(i) S2B_4(i)
+#define S2B_5(i)	S2B_4(i) S2B_4(i)
 #endif
 #if LG_TINY_MIN < 6
-#define	S2B_6(i)	S2B_5(i) S2B_5(i)
+#define S2B_6(i)	S2B_5(i) S2B_5(i)
 #endif
 #if LG_TINY_MIN < 7
-#define	S2B_7(i)	S2B_6(i) S2B_6(i)
+#define S2B_7(i)	S2B_6(i) S2B_6(i)
 #endif
 #if LG_TINY_MIN < 8
-#define	S2B_8(i)	S2B_7(i) S2B_7(i)
+#define S2B_8(i)	S2B_7(i) S2B_7(i)
 #endif
 #if LG_TINY_MIN < 9
-#define	S2B_9(i)	S2B_8(i) S2B_8(i)
+#define S2B_9(i)	S2B_8(i) S2B_8(i)
 #endif
 #if LG_TINY_MIN < 10
-#define	S2B_10(i)	S2B_9(i) S2B_9(i)
+#define S2B_10(i)	S2B_9(i) S2B_9(i)
 #endif
 #if LG_TINY_MIN < 11
-#define	S2B_11(i)	S2B_10(i) S2B_10(i)
+#define S2B_11(i)	S2B_10(i) S2B_10(i)
 #endif
-#define	S2B_no(i)
-#define	SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup) \
+#define S2B_no(i)
+#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup) \
 	S2B_##lg_delta_lookup(index)
 	SIZE_CLASSES
 #undef S2B_3
@@ -928,11 +928,11 @@ malloc_conf_init(void) {
 
 		while (*opts != '\0' && !malloc_conf_next(&opts, &k, &klen, &v,
 		    &vlen)) {
-#define	CONF_MATCH(n)							\
+#define CONF_MATCH(n)							\
 	(sizeof(n)-1 == klen && strncmp(n, k, klen) == 0)
-#define	CONF_MATCH_VALUE(n)						\
+#define CONF_MATCH_VALUE(n)						\
 	(sizeof(n)-1 == vlen && strncmp(n, v, vlen) == 0)
-#define	CONF_HANDLE_BOOL(o, n, cont)					\
+#define CONF_HANDLE_BOOL(o, n, cont)					\
 			if (CONF_MATCH(n)) {				\
 				if (CONF_MATCH_VALUE("true")) {		\
 					o = true;			\
@@ -947,11 +947,11 @@ malloc_conf_init(void) {
 					continue;			\
 				}					\
 			}
-#define	CONF_MIN_no(um, min)	false
-#define	CONF_MIN_yes(um, min)	((um) < (min))
-#define	CONF_MAX_no(um, max)	false
-#define	CONF_MAX_yes(um, max)	((um) > (max))
-#define	CONF_HANDLE_T_U(t, o, n, min, max, check_min, check_max, clip)	\
+#define CONF_MIN_no(um, min)	false
+#define CONF_MIN_yes(um, min)	((um) < (min))
+#define CONF_MAX_no(um, max)	false
+#define CONF_MAX_yes(um, max)	((um) > (max))
+#define CONF_HANDLE_T_U(t, o, n, min, max, check_min, check_max, clip)	\
 			if (CONF_MATCH(n)) {				\
 				uintmax_t um;				\
 				char *end;				\
@@ -989,14 +989,14 @@ malloc_conf_init(void) {
 				}					\
 				continue;				\
 			}
-#define	CONF_HANDLE_UNSIGNED(o, n, min, max, check_min, check_max,	\
+#define CONF_HANDLE_UNSIGNED(o, n, min, max, check_min, check_max,	\
     clip)								\
 			CONF_HANDLE_T_U(unsigned, o, n, min, max,	\
 			    check_min, check_max, clip)
-#define	CONF_HANDLE_SIZE_T(o, n, min, max, check_min, check_max, clip)	\
+#define CONF_HANDLE_SIZE_T(o, n, min, max, check_min, check_max, clip)	\
 			CONF_HANDLE_T_U(size_t, o, n, min, max,		\
 			    check_min, check_max, clip)
-#define	CONF_HANDLE_SSIZE_T(o, n, min, max)				\
+#define CONF_HANDLE_SSIZE_T(o, n, min, max)				\
 			if (CONF_MATCH(n)) {				\
 				long l;					\
 				char *end;				\
@@ -1018,7 +1018,7 @@ malloc_conf_init(void) {
 				}					\
 				continue;				\
 			}
-#define	CONF_HANDLE_CHAR_P(o, n, d)					\
+#define CONF_HANDLE_CHAR_P(o, n, d)					\
 			if (CONF_MATCH(n)) {				\
 				size_t cpylen = (vlen <=		\
 				    sizeof(o)-1) ? vlen :		\
@@ -2119,9 +2119,9 @@ je_valloc(size_t size) {
  * is_malloc(je_malloc) is some macro magic to detect if jemalloc_defs.h has
  * #define je_malloc malloc
  */
-#define	malloc_is_malloc 1
-#define	is_malloc_(a) malloc_is_ ## a
-#define	is_malloc(a) is_malloc_(a)
+#define malloc_is_malloc 1
+#define is_malloc_(a) malloc_is_ ## a
+#define is_malloc(a) is_malloc_(a)
 
 #if ((is_malloc(je_malloc) == 1) && defined(JEMALLOC_GLIBC_MALLOC_HOOK))
 /*
@@ -2147,9 +2147,9 @@ JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) =
  * be implemented also, so none of glibc's malloc.o functions are added to the
  * link.
  */
-#define	ALIAS(je_fn)	__attribute__((alias (#je_fn), used))
+#define ALIAS(je_fn)	__attribute__((alias (#je_fn), used))
 /* To force macro expansion of je_ prefix before stringification. */
-#define	PREALIAS(je_fn)  ALIAS(je_fn)
+#define PREALIAS(je_fn)  ALIAS(je_fn)
 void	*__libc_malloc(size_t size) PREALIAS(je_malloc);
 void	__libc_free(void* ptr) PREALIAS(je_free);
 void	*__libc_realloc(void* ptr, size_t size) PREALIAS(je_realloc);
diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index 394fbffe..9692b5ba 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -1,7 +1,7 @@
 #include <mutex>
 #include <new>
 
-#define	JEMALLOC_CPP_CPP_
+#define JEMALLOC_CPP_CPP_
 #include "jemalloc/internal/jemalloc_internal.h"
 
 // All operators in this file are exported.
diff --git a/src/large.c b/src/large.c
index 62d4441f..6458d81a 100644
--- a/src/large.c
+++ b/src/large.c
@@ -1,4 +1,4 @@
-#define	JEMALLOC_LARGE_C_
+#define JEMALLOC_LARGE_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 
 /******************************************************************************/
@@ -63,7 +63,7 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 
 #ifdef JEMALLOC_JET
 #undef large_dalloc_junk
-#define	large_dalloc_junk JEMALLOC_N(n_large_dalloc_junk)
+#define large_dalloc_junk JEMALLOC_N(n_large_dalloc_junk)
 #endif
 void
 large_dalloc_junk(void *ptr, size_t usize) {
@@ -71,13 +71,13 @@ large_dalloc_junk(void *ptr, size_t usize) {
 }
 #ifdef JEMALLOC_JET
 #undef large_dalloc_junk
-#define	large_dalloc_junk JEMALLOC_N(large_dalloc_junk)
+#define large_dalloc_junk JEMALLOC_N(large_dalloc_junk)
 large_dalloc_junk_t *large_dalloc_junk = JEMALLOC_N(n_large_dalloc_junk);
 #endif
 
 #ifdef JEMALLOC_JET
 #undef large_dalloc_maybe_junk
-#define	large_dalloc_maybe_junk JEMALLOC_N(n_large_dalloc_maybe_junk)
+#define large_dalloc_maybe_junk JEMALLOC_N(n_large_dalloc_maybe_junk)
 #endif
 void
 large_dalloc_maybe_junk(void *ptr, size_t usize) {
@@ -93,7 +93,7 @@ large_dalloc_maybe_junk(void *ptr, size_t usize) {
 }
 #ifdef JEMALLOC_JET
 #undef large_dalloc_maybe_junk
-#define	large_dalloc_maybe_junk JEMALLOC_N(large_dalloc_maybe_junk)
+#define large_dalloc_maybe_junk JEMALLOC_N(large_dalloc_maybe_junk)
 large_dalloc_maybe_junk_t *large_dalloc_maybe_junk =
     JEMALLOC_N(n_large_dalloc_maybe_junk);
 #endif
diff --git a/src/mb.c b/src/mb.c
index dc2c0a25..94f3c724 100644
--- a/src/mb.c
+++ b/src/mb.c
@@ -1,2 +1,2 @@
-#define	JEMALLOC_MB_C_
+#define JEMALLOC_MB_C_
 #include "jemalloc/internal/jemalloc_internal.h"
diff --git a/src/mutex.c b/src/mutex.c
index f883b9d7..f1aa155e 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -1,4 +1,4 @@
-#define	JEMALLOC_MUTEX_C_
+#define JEMALLOC_MUTEX_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 
 #if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32)
@@ -6,7 +6,7 @@
 #endif
 
 #ifndef _CRT_SPINCOUNT
-#define	_CRT_SPINCOUNT 4000
+#define _CRT_SPINCOUNT 4000
 #endif
 
 /******************************************************************************/
diff --git a/src/nstime.c b/src/nstime.c
index 09cd7786..a3f6c1de 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -1,6 +1,6 @@
 #include "jemalloc/internal/jemalloc_internal.h"
 
-#define	BILLION	UINT64_C(1000000000)
+#define BILLION	UINT64_C(1000000000)
 
 void
 nstime_init(nstime_t *time, uint64_t ns) {
@@ -122,7 +122,7 @@ nstime_get(nstime_t *time) {
 
 #ifdef JEMALLOC_JET
 #undef nstime_monotonic
-#define	nstime_monotonic JEMALLOC_N(n_nstime_monotonic)
+#define nstime_monotonic JEMALLOC_N(n_nstime_monotonic)
 #endif
 bool
 nstime_monotonic(void) {
@@ -131,13 +131,13 @@ nstime_monotonic(void) {
 }
 #ifdef JEMALLOC_JET
 #undef nstime_monotonic
-#define	nstime_monotonic JEMALLOC_N(nstime_monotonic)
+#define nstime_monotonic JEMALLOC_N(nstime_monotonic)
 nstime_monotonic_t *nstime_monotonic = JEMALLOC_N(n_nstime_monotonic);
 #endif
 
 #ifdef JEMALLOC_JET
 #undef nstime_update
-#define	nstime_update JEMALLOC_N(n_nstime_update)
+#define nstime_update JEMALLOC_N(n_nstime_update)
 #endif
 bool
 nstime_update(nstime_t *time) {
@@ -156,6 +156,6 @@ nstime_update(nstime_t *time) {
 }
 #ifdef JEMALLOC_JET
 #undef nstime_update
-#define	nstime_update JEMALLOC_N(nstime_update)
+#define nstime_update JEMALLOC_N(nstime_update)
 nstime_update_t *nstime_update = JEMALLOC_N(n_nstime_update);
 #endif
diff --git a/src/pages.c b/src/pages.c
index 0b678e7d..444a97c2 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -1,4 +1,4 @@
-#define	JEMALLOC_PAGES_C_
+#define JEMALLOC_PAGES_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 
 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
diff --git a/src/prng.c b/src/prng.c
index 76646a2a..bf908790 100644
--- a/src/prng.c
+++ b/src/prng.c
@@ -1,2 +1,2 @@
-#define	JEMALLOC_PRNG_C_
+#define JEMALLOC_PRNG_C_
 #include "jemalloc/internal/jemalloc_internal.h"
diff --git a/src/prof.c b/src/prof.c
index 1b34a750..1dd0f54d 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -1,9 +1,9 @@
-#define	JEMALLOC_PROF_C_
+#define JEMALLOC_PROF_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 /******************************************************************************/
 
 #ifdef JEMALLOC_PROF_LIBUNWIND
-#define	UNW_LOCAL_ONLY
+#define UNW_LOCAL_ONLY
 #include <libunwind.h>
 #endif
 
@@ -353,7 +353,7 @@ prof_backtrace(prof_bt_t *bt) {
 #elif (defined(JEMALLOC_PROF_GCC))
 void
 prof_backtrace(prof_bt_t *bt) {
-#define	BT_FRAME(i)							\
+#define BT_FRAME(i)							\
 	if ((i) < PROF_BT_MAX) {					\
 		void *p;						\
 		if (__builtin_frame_address(i) == 0) {			\
@@ -928,7 +928,7 @@ prof_bt_count(void) {
 
 #ifdef JEMALLOC_JET
 #undef prof_dump_open
-#define	prof_dump_open JEMALLOC_N(prof_dump_open_impl)
+#define prof_dump_open JEMALLOC_N(prof_dump_open_impl)
 #endif
 static int
 prof_dump_open(bool propagate_err, const char *filename) {
@@ -947,7 +947,7 @@ prof_dump_open(bool propagate_err, const char *filename) {
 }
 #ifdef JEMALLOC_JET
 #undef prof_dump_open
-#define	prof_dump_open JEMALLOC_N(prof_dump_open)
+#define prof_dump_open JEMALLOC_N(prof_dump_open)
 prof_dump_open_t *prof_dump_open = JEMALLOC_N(prof_dump_open_impl);
 #endif
 
@@ -1305,7 +1305,7 @@ prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
 
 #ifdef JEMALLOC_JET
 #undef prof_dump_header
-#define	prof_dump_header JEMALLOC_N(prof_dump_header_impl)
+#define prof_dump_header JEMALLOC_N(prof_dump_header_impl)
 #endif
 static bool
 prof_dump_header(tsdn_t *tsdn, bool propagate_err, const prof_cnt_t *cnt_all) {
@@ -1327,7 +1327,7 @@ prof_dump_header(tsdn_t *tsdn, bool propagate_err, const prof_cnt_t *cnt_all) {
 }
 #ifdef JEMALLOC_JET
 #undef prof_dump_header
-#define	prof_dump_header JEMALLOC_N(prof_dump_header)
+#define prof_dump_header JEMALLOC_N(prof_dump_header)
 prof_dump_header_t *prof_dump_header = JEMALLOC_N(prof_dump_header_impl);
 #endif
 
@@ -1696,8 +1696,8 @@ prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
 }
 #endif
 
-#define	DUMP_FILENAME_BUFSIZE	(PATH_MAX + 1)
-#define	VSEQ_INVALID		UINT64_C(0xffffffffffffffff)
+#define DUMP_FILENAME_BUFSIZE	(PATH_MAX + 1)
+#define VSEQ_INVALID		UINT64_C(0xffffffffffffffff)
 static void
 prof_dump_filename(char *filename, char v, uint64_t vseq) {
 	cassert(config_prof);
diff --git a/src/rtree.c b/src/rtree.c
index d0c5fe65..d760816e 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -1,4 +1,4 @@
-#define	JEMALLOC_RTREE_C_
+#define JEMALLOC_RTREE_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 
 static unsigned
@@ -65,7 +65,7 @@ rtree_new(rtree_t *rtree, unsigned bits) {
 
 #ifdef JEMALLOC_JET
 #undef rtree_node_alloc
-#define	rtree_node_alloc JEMALLOC_N(rtree_node_alloc_impl)
+#define rtree_node_alloc JEMALLOC_N(rtree_node_alloc_impl)
 #endif
 static rtree_elm_t *
 rtree_node_alloc(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
@@ -74,13 +74,13 @@ rtree_node_alloc(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
 }
 #ifdef JEMALLOC_JET
 #undef rtree_node_alloc
-#define	rtree_node_alloc JEMALLOC_N(rtree_node_alloc)
+#define rtree_node_alloc JEMALLOC_N(rtree_node_alloc)
 rtree_node_alloc_t *rtree_node_alloc = JEMALLOC_N(rtree_node_alloc_impl);
 #endif
 
 #ifdef JEMALLOC_JET
 #undef rtree_node_dalloc
-#define	rtree_node_dalloc JEMALLOC_N(rtree_node_dalloc_impl)
+#define rtree_node_dalloc JEMALLOC_N(rtree_node_dalloc_impl)
 #endif
 UNUSED static void
 rtree_node_dalloc(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *node) {
@@ -89,7 +89,7 @@ rtree_node_dalloc(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *node) {
 }
 #ifdef JEMALLOC_JET
 #undef rtree_node_dalloc
-#define	rtree_node_dalloc JEMALLOC_N(rtree_node_dalloc)
+#define rtree_node_dalloc JEMALLOC_N(rtree_node_dalloc)
 rtree_node_dalloc_t *rtree_node_dalloc = JEMALLOC_N(rtree_node_dalloc_impl);
 #endif
 
diff --git a/src/spin.c b/src/spin.c
index 5242d95a..d7eb5fa8 100644
--- a/src/spin.c
+++ b/src/spin.c
@@ -1,2 +1,2 @@
-#define	JEMALLOC_SPIN_C_
+#define JEMALLOC_SPIN_C_
 #include "jemalloc/internal/jemalloc_internal.h"
diff --git a/src/stats.c b/src/stats.c
index b0a7fca2..2a424a73 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1,12 +1,12 @@
-#define	JEMALLOC_STATS_C_
+#define JEMALLOC_STATS_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 
-#define	CTL_GET(n, v, t) do {						\
+#define CTL_GET(n, v, t) do {						\
 	size_t sz = sizeof(t);						\
 	xmallctl(n, (void *)v, &sz, NULL, 0);				\
 } while (0)
 
-#define	CTL_M2_GET(n, i, v, t) do {					\
+#define CTL_M2_GET(n, i, v, t) do {					\
 	size_t mib[6];							\
 	size_t miblen = sizeof(mib) / sizeof(size_t);			\
 	size_t sz = sizeof(t);						\
@@ -15,7 +15,7 @@
 	xmallctlbymib(mib, miblen, (void *)v, &sz, NULL, 0);		\
 } while (0)
 
-#define	CTL_M2_M4_GET(n, i, j, v, t) do {				\
+#define CTL_M2_M4_GET(n, i, j, v, t) do {				\
 	size_t mib[6];							\
 	size_t miblen = sizeof(mib) / sizeof(size_t);			\
 	size_t sz = sizeof(t);						\
@@ -478,7 +478,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	}
 
 	/* config. */
-#define	CONFIG_WRITE_BOOL_JSON(n, c)					\
+#define CONFIG_WRITE_BOOL_JSON(n, c)					\
 	if (json) {							\
 		CTL_GET("config."#n, &bv, bool);			\
 		malloc_cprintf(write_cb, cbopaque,			\
@@ -531,7 +531,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 #undef CONFIG_WRITE_BOOL_JSON
 
 	/* opt. */
-#define	OPT_WRITE_BOOL(n, c)						\
+#define OPT_WRITE_BOOL(n, c)						\
 	if (je_mallctl("opt."#n, (void *)&bv, &bsz, NULL, 0) == 0) {	\
 		if (json) {						\
 			malloc_cprintf(write_cb, cbopaque,		\
@@ -542,7 +542,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			    "  opt."#n": %s\n", bv ? "true" : "false");	\
 		}							\
 	}
-#define	OPT_WRITE_BOOL_MUTABLE(n, m, c) {				\
+#define OPT_WRITE_BOOL_MUTABLE(n, m, c) {				\
 	bool bv2;							\
 	if (je_mallctl("opt."#n, (void *)&bv, &bsz, NULL, 0) == 0 &&	\
 	    je_mallctl(#m, (void *)&bv2, &bsz, NULL, 0) == 0) {		\
@@ -557,7 +557,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		}							\
 	}								\
 }
-#define	OPT_WRITE_UNSIGNED(n, c)					\
+#define OPT_WRITE_UNSIGNED(n, c)					\
 	if (je_mallctl("opt."#n, (void *)&uv, &usz, NULL, 0) == 0) {	\
 		if (json) {						\
 			malloc_cprintf(write_cb, cbopaque,		\
@@ -567,7 +567,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			"  opt."#n": %u\n", uv);			\
 		}							\
 	}
-#define	OPT_WRITE_SSIZE_T(n, c)						\
+#define OPT_WRITE_SSIZE_T(n, c)						\
 	if (je_mallctl("opt."#n, (void *)&ssv, &sssz, NULL, 0) == 0) {	\
 		if (json) {						\
 			malloc_cprintf(write_cb, cbopaque,		\
@@ -577,7 +577,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			    "  opt."#n": %zd\n", ssv);			\
 		}							\
 	}
-#define	OPT_WRITE_SSIZE_T_MUTABLE(n, m, c) {				\
+#define OPT_WRITE_SSIZE_T_MUTABLE(n, m, c) {				\
 	ssize_t ssv2;							\
 	if (je_mallctl("opt."#n, (void *)&ssv, &sssz, NULL, 0) == 0 &&	\
 	    je_mallctl(#m, (void *)&ssv2, &sssz, NULL, 0) == 0) {	\
@@ -591,7 +591,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		}							\
 	}								\
 }
-#define	OPT_WRITE_CHAR_P(n, c)						\
+#define OPT_WRITE_CHAR_P(n, c)						\
 	if (je_mallctl("opt."#n, (void *)&cpv, &cpsz, NULL, 0) == 0) {	\
 		if (json) {						\
 			malloc_cprintf(write_cb, cbopaque,		\
diff --git a/src/tcache.c b/src/tcache.c
index 0501c3fc..96a42add 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -1,4 +1,4 @@
-#define	JEMALLOC_TCACHE_C_
+#define JEMALLOC_TCACHE_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 
 /******************************************************************************/
diff --git a/src/ticker.c b/src/ticker.c
index db090240..b0149e1c 100644
--- a/src/ticker.c
+++ b/src/ticker.c
@@ -1,2 +1,2 @@
-#define	JEMALLOC_TICKER_C_
+#define JEMALLOC_TICKER_C_
 #include "jemalloc/internal/jemalloc_internal.h"
diff --git a/src/tsd.c b/src/tsd.c
index ae77fcb1..7d56e689 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -1,4 +1,4 @@
-#define	JEMALLOC_TSD_C_
+#define JEMALLOC_TSD_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 
 /******************************************************************************/
@@ -69,10 +69,10 @@ tsd_cleanup(void *arg) {
 		/* Do nothing. */
 		break;
 	case tsd_state_nominal:
-#define	MALLOC_TSD_cleanup_yes(n, t)					\
+#define MALLOC_TSD_cleanup_yes(n, t)					\
 		n##_cleanup(tsd);
-#define	MALLOC_TSD_cleanup_no(n, t)
-#define	O(n, t, c)							\
+#define MALLOC_TSD_cleanup_no(n, t)
+#define O(n, t, c)							\
 		MALLOC_TSD_cleanup_##c(n, t)
 MALLOC_TSD
 #undef MALLOC_TSD_cleanup_yes
diff --git a/src/util.c b/src/util.c
index faa97c8d..ee5fa47e 100644
--- a/src/util.c
+++ b/src/util.c
@@ -2,14 +2,14 @@
  * Define simple versions of assertion macros that won't recurse in case
  * of assertion failures in malloc_*printf().
  */
-#define	assert(e) do {							\
+#define assert(e) do {							\
 	if (config_debug && !(e)) {					\
 		malloc_write("<jemalloc>: Failed assertion\n");		\
 		abort();						\
 	}								\
 } while (0)
 
-#define	not_reached() do {						\
+#define not_reached() do {						\
 	if (config_debug) {						\
 		malloc_write("<jemalloc>: Unreachable code reached\n");	\
 		abort();						\
@@ -17,28 +17,28 @@
 	unreachable();							\
 } while (0)
 
-#define	not_implemented() do {						\
+#define not_implemented() do {						\
 	if (config_debug) {						\
 		malloc_write("<jemalloc>: Not implemented\n");		\
 		abort();						\
 	}								\
 } while (0)
 
-#define	JEMALLOC_UTIL_C_
+#define JEMALLOC_UTIL_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
 static void	wrtmessage(void *cbopaque, const char *s);
-#define	U2S_BUFSIZE	((1U << (LG_SIZEOF_INTMAX_T + 3)) + 1)
+#define U2S_BUFSIZE	((1U << (LG_SIZEOF_INTMAX_T + 3)) + 1)
 static char	*u2s(uintmax_t x, unsigned base, bool uppercase, char *s,
     size_t *slen_p);
-#define	D2S_BUFSIZE	(1 + U2S_BUFSIZE)
+#define D2S_BUFSIZE	(1 + U2S_BUFSIZE)
 static char	*d2s(intmax_t x, char sign, char *s, size_t *slen_p);
-#define	O2S_BUFSIZE	(1 + U2S_BUFSIZE)
+#define O2S_BUFSIZE	(1 + U2S_BUFSIZE)
 static char	*o2s(uintmax_t x, bool alt_form, char *s, size_t *slen_p);
-#define	X2S_BUFSIZE	(2 + U2S_BUFSIZE)
+#define X2S_BUFSIZE	(2 + U2S_BUFSIZE)
 static char	*x2s(uintmax_t x, bool alt_form, bool uppercase, char *s,
     size_t *slen_p);
 
@@ -318,20 +318,20 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 	size_t i;
 	const char *f;
 
-#define	APPEND_C(c) do {						\
+#define APPEND_C(c) do {						\
 	if (i < size) {							\
 		str[i] = (c);						\
 	}								\
 	i++;								\
 } while (0)
-#define	APPEND_S(s, slen) do {						\
+#define APPEND_S(s, slen) do {						\
 	if (i < size) {							\
 		size_t cpylen = (slen <= size - i) ? slen : size - i;	\
 		memcpy(&str[i], s, cpylen);				\
 	}								\
 	i += slen;							\
 } while (0)
-#define	APPEND_PADDED_S(s, slen, width, left_justify) do {		\
+#define APPEND_PADDED_S(s, slen, width, left_justify) do {		\
 	/* Left padding. */						\
 	size_t pad_len = (width == -1) ? 0 : ((slen < (size_t)width) ?	\
 	    (size_t)width - slen : 0);					\
@@ -351,7 +351,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 		}							\
 	}								\
 } while (0)
-#define	GET_ARG_NUMERIC(val, len) do {					\
+#define GET_ARG_NUMERIC(val, len) do {					\
 	switch (len) {							\
 	case '?':							\
 		val = va_arg(ap, int);					\
diff --git a/src/witness.c b/src/witness.c
index f8d66217..1c03457e 100644
--- a/src/witness.c
+++ b/src/witness.c
@@ -1,4 +1,4 @@
-#define	JEMALLOC_WITNESS_C_
+#define JEMALLOC_WITNESS_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 
 void
@@ -12,7 +12,7 @@ witness_init(witness_t *witness, const char *name, witness_rank_t rank,
 
 #ifdef JEMALLOC_JET
 #undef witness_lock_error
-#define	witness_lock_error JEMALLOC_N(n_witness_lock_error)
+#define witness_lock_error JEMALLOC_N(n_witness_lock_error)
 #endif
 void
 witness_lock_error(const witness_list_t *witnesses, const witness_t *witness) {
@@ -27,13 +27,13 @@ witness_lock_error(const witness_list_t *witnesses, const witness_t *witness) {
 }
 #ifdef JEMALLOC_JET
 #undef witness_lock_error
-#define	witness_lock_error JEMALLOC_N(witness_lock_error)
+#define witness_lock_error JEMALLOC_N(witness_lock_error)
 witness_lock_error_t *witness_lock_error = JEMALLOC_N(n_witness_lock_error);
 #endif
 
 #ifdef JEMALLOC_JET
 #undef witness_owner_error
-#define	witness_owner_error JEMALLOC_N(n_witness_owner_error)
+#define witness_owner_error JEMALLOC_N(n_witness_owner_error)
 #endif
 void
 witness_owner_error(const witness_t *witness) {
@@ -43,13 +43,13 @@ witness_owner_error(const witness_t *witness) {
 }
 #ifdef JEMALLOC_JET
 #undef witness_owner_error
-#define	witness_owner_error JEMALLOC_N(witness_owner_error)
+#define witness_owner_error JEMALLOC_N(witness_owner_error)
 witness_owner_error_t *witness_owner_error = JEMALLOC_N(n_witness_owner_error);
 #endif
 
 #ifdef JEMALLOC_JET
 #undef witness_not_owner_error
-#define	witness_not_owner_error JEMALLOC_N(n_witness_not_owner_error)
+#define witness_not_owner_error JEMALLOC_N(n_witness_not_owner_error)
 #endif
 void
 witness_not_owner_error(const witness_t *witness) {
@@ -59,14 +59,14 @@ witness_not_owner_error(const witness_t *witness) {
 }
 #ifdef JEMALLOC_JET
 #undef witness_not_owner_error
-#define	witness_not_owner_error JEMALLOC_N(witness_not_owner_error)
+#define witness_not_owner_error JEMALLOC_N(witness_not_owner_error)
 witness_not_owner_error_t *witness_not_owner_error =
     JEMALLOC_N(n_witness_not_owner_error);
 #endif
 
 #ifdef JEMALLOC_JET
 #undef witness_lockless_error
-#define	witness_lockless_error JEMALLOC_N(n_witness_lockless_error)
+#define witness_lockless_error JEMALLOC_N(n_witness_lockless_error)
 #endif
 void
 witness_lockless_error(const witness_list_t *witnesses) {
@@ -81,7 +81,7 @@ witness_lockless_error(const witness_list_t *witnesses) {
 }
 #ifdef JEMALLOC_JET
 #undef witness_lockless_error
-#define	witness_lockless_error JEMALLOC_N(witness_lockless_error)
+#define witness_lockless_error JEMALLOC_N(witness_lockless_error)
 witness_lockless_error_t *witness_lockless_error =
     JEMALLOC_N(n_witness_lockless_error);
 #endif
diff --git a/test/include/test/btalloc.h b/test/include/test/btalloc.h
index 8b733f50..5877ea77 100644
--- a/test/include/test/btalloc.h
+++ b/test/include/test/btalloc.h
@@ -1,12 +1,12 @@
 /* btalloc() provides a mechanism for allocating via permuted backtraces. */
 void	*btalloc(size_t size, unsigned bits);
 
-#define	btalloc_n_proto(n)						\
+#define btalloc_n_proto(n)						\
 void	*btalloc_##n(size_t size, unsigned bits);
 btalloc_n_proto(0)
 btalloc_n_proto(1)
 
-#define	btalloc_n_gen(n)						\
+#define btalloc_n_gen(n)						\
 void *									\
 btalloc_##n(size_t size, unsigned bits) {				\
 	void *p;							\
diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in
index a0b94747..36d59cff 100644
--- a/test/include/test/jemalloc_test.h.in
+++ b/test/include/test/jemalloc_test.h.in
@@ -123,7 +123,7 @@ static const bool config_debug =
 #include "test/test.h"
 #include "test/timer.h"
 #include "test/thd.h"
-#define	MEXP 19937
+#define MEXP 19937
 #include "test/SFMT.h"
 
 /******************************************************************************/
@@ -136,7 +136,7 @@ static const bool config_debug =
 #undef not_implemented
 #undef assert_not_implemented
 
-#define	assert(e) do {							\
+#define assert(e) do {							\
 	if (!(e)) {							\
 		malloc_printf(						\
 		    "<jemalloc>: %s:%d: Failed assertion: \"%s\"\n",	\
@@ -145,20 +145,20 @@ static const bool config_debug =
 	}								\
 } while (0)
 
-#define	not_reached() do {						\
+#define not_reached() do {						\
 	malloc_printf(							\
 	    "<jemalloc>: %s:%d: Unreachable code reached\n",		\
 	    __FILE__, __LINE__);					\
 	abort();							\
 } while (0)
 
-#define	not_implemented() do {						\
+#define not_implemented() do {						\
 	malloc_printf("<jemalloc>: %s:%d: Not implemented\n",		\
 	    __FILE__, __LINE__);					\
 	abort();							\
 } while (0)
 
-#define	assert_not_implemented(e) do {					\
+#define assert_not_implemented(e) do {					\
 	if (!(e)) {							\
 		not_implemented();					\
 	}								\
diff --git a/test/include/test/mq.h b/test/include/test/mq.h
index 8d9907ba..af2c078d 100644
--- a/test/include/test/mq.h
+++ b/test/include/test/mq.h
@@ -26,9 +26,9 @@ void	mq_nanosleep(unsigned ns);
  * does not perform any cleanup of messages, since it knows nothing of their
  * payloads.
  */
-#define	mq_msg(a_mq_msg_type)	ql_elm(a_mq_msg_type)
+#define mq_msg(a_mq_msg_type)	ql_elm(a_mq_msg_type)
 
-#define	mq_gen(a_attr, a_prefix, a_mq_type, a_mq_msg_type, a_field)	\
+#define mq_gen(a_attr, a_prefix, a_mq_type, a_mq_msg_type, a_field)	\
 typedef struct {							\
 	mtx_t			lock;					\
 	ql_head(a_mq_msg_type)	msgs;					\
diff --git a/test/include/test/test.h b/test/include/test/test.h
index a1b6f72a..d7f05fad 100644
--- a/test/include/test/test.h
+++ b/test/include/test/test.h
@@ -1,6 +1,6 @@
-#define	ASSERT_BUFSIZE	256
+#define ASSERT_BUFSIZE	256
 
-#define	assert_cmp(t, a, b, cmp, neg_cmp, pri, ...) do {		\
+#define assert_cmp(t, a, b, cmp, neg_cmp, pri, ...) do {		\
 	t a_ = (a);							\
 	t b_ = (b);							\
 	if (!(a_ cmp b_)) {						\
@@ -17,200 +17,200 @@
 	}								\
 } while (0)
 
-#define	assert_ptr_eq(a, b, ...)	assert_cmp(void *, a, b, ==,	\
+#define assert_ptr_eq(a, b, ...)	assert_cmp(void *, a, b, ==,	\
     !=, "p", __VA_ARGS__)
-#define	assert_ptr_ne(a, b, ...)	assert_cmp(void *, a, b, !=,	\
+#define assert_ptr_ne(a, b, ...)	assert_cmp(void *, a, b, !=,	\
     ==, "p", __VA_ARGS__)
-#define	assert_ptr_null(a, ...)		assert_cmp(void *, a, NULL, ==,	\
+#define assert_ptr_null(a, ...)		assert_cmp(void *, a, NULL, ==,	\
     !=, "p", __VA_ARGS__)
-#define	assert_ptr_not_null(a, ...)	assert_cmp(void *, a, NULL, !=,	\
+#define assert_ptr_not_null(a, ...)	assert_cmp(void *, a, NULL, !=,	\
     ==, "p", __VA_ARGS__)
 
-#define	assert_c_eq(a, b, ...)	assert_cmp(char, a, b, ==, !=, "c", __VA_ARGS__)
-#define	assert_c_ne(a, b, ...)	assert_cmp(char, a, b, !=, ==, "c", __VA_ARGS__)
-#define	assert_c_lt(a, b, ...)	assert_cmp(char, a, b, <, >=, "c", __VA_ARGS__)
-#define	assert_c_le(a, b, ...)	assert_cmp(char, a, b, <=, >, "c", __VA_ARGS__)
-#define	assert_c_ge(a, b, ...)	assert_cmp(char, a, b, >=, <, "c", __VA_ARGS__)
-#define	assert_c_gt(a, b, ...)	assert_cmp(char, a, b, >, <=, "c", __VA_ARGS__)
+#define assert_c_eq(a, b, ...)	assert_cmp(char, a, b, ==, !=, "c", __VA_ARGS__)
+#define assert_c_ne(a, b, ...)	assert_cmp(char, a, b, !=, ==, "c", __VA_ARGS__)
+#define assert_c_lt(a, b, ...)	assert_cmp(char, a, b, <, >=, "c", __VA_ARGS__)
+#define assert_c_le(a, b, ...)	assert_cmp(char, a, b, <=, >, "c", __VA_ARGS__)
+#define assert_c_ge(a, b, ...)	assert_cmp(char, a, b, >=, <, "c", __VA_ARGS__)
+#define assert_c_gt(a, b, ...)	assert_cmp(char, a, b, >, <=, "c", __VA_ARGS__)
 
-#define	assert_x_eq(a, b, ...)	assert_cmp(int, a, b, ==, !=, "#x", __VA_ARGS__)
-#define	assert_x_ne(a, b, ...)	assert_cmp(int, a, b, !=, ==, "#x", __VA_ARGS__)
-#define	assert_x_lt(a, b, ...)	assert_cmp(int, a, b, <, >=, "#x", __VA_ARGS__)
-#define	assert_x_le(a, b, ...)	assert_cmp(int, a, b, <=, >, "#x", __VA_ARGS__)
-#define	assert_x_ge(a, b, ...)	assert_cmp(int, a, b, >=, <, "#x", __VA_ARGS__)
-#define	assert_x_gt(a, b, ...)	assert_cmp(int, a, b, >, <=, "#x", __VA_ARGS__)
+#define assert_x_eq(a, b, ...)	assert_cmp(int, a, b, ==, !=, "#x", __VA_ARGS__)
+#define assert_x_ne(a, b, ...)	assert_cmp(int, a, b, !=, ==, "#x", __VA_ARGS__)
+#define assert_x_lt(a, b, ...)	assert_cmp(int, a, b, <, >=, "#x", __VA_ARGS__)
+#define assert_x_le(a, b, ...)	assert_cmp(int, a, b, <=, >, "#x", __VA_ARGS__)
+#define assert_x_ge(a, b, ...)	assert_cmp(int, a, b, >=, <, "#x", __VA_ARGS__)
+#define assert_x_gt(a, b, ...)	assert_cmp(int, a, b, >, <=, "#x", __VA_ARGS__)
 
-#define	assert_d_eq(a, b, ...)	assert_cmp(int, a, b, ==, !=, "d", __VA_ARGS__)
-#define	assert_d_ne(a, b, ...)	assert_cmp(int, a, b, !=, ==, "d", __VA_ARGS__)
-#define	assert_d_lt(a, b, ...)	assert_cmp(int, a, b, <, >=, "d", __VA_ARGS__)
-#define	assert_d_le(a, b, ...)	assert_cmp(int, a, b, <=, >, "d", __VA_ARGS__)
-#define	assert_d_ge(a, b, ...)	assert_cmp(int, a, b, >=, <, "d", __VA_ARGS__)
-#define	assert_d_gt(a, b, ...)	assert_cmp(int, a, b, >, <=, "d", __VA_ARGS__)
+#define assert_d_eq(a, b, ...)	assert_cmp(int, a, b, ==, !=, "d", __VA_ARGS__)
+#define assert_d_ne(a, b, ...)	assert_cmp(int, a, b, !=, ==, "d", __VA_ARGS__)
+#define assert_d_lt(a, b, ...)	assert_cmp(int, a, b, <, >=, "d", __VA_ARGS__)
+#define assert_d_le(a, b, ...)	assert_cmp(int, a, b, <=, >, "d", __VA_ARGS__)
+#define assert_d_ge(a, b, ...)	assert_cmp(int, a, b, >=, <, "d", __VA_ARGS__)
+#define assert_d_gt(a, b, ...)	assert_cmp(int, a, b, >, <=, "d", __VA_ARGS__)
 
-#define	assert_u_eq(a, b, ...)	assert_cmp(int, a, b, ==, !=, "u", __VA_ARGS__)
-#define	assert_u_ne(a, b, ...)	assert_cmp(int, a, b, !=, ==, "u", __VA_ARGS__)
-#define	assert_u_lt(a, b, ...)	assert_cmp(int, a, b, <, >=, "u", __VA_ARGS__)
-#define	assert_u_le(a, b, ...)	assert_cmp(int, a, b, <=, >, "u", __VA_ARGS__)
-#define	assert_u_ge(a, b, ...)	assert_cmp(int, a, b, >=, <, "u", __VA_ARGS__)
-#define	assert_u_gt(a, b, ...)	assert_cmp(int, a, b, >, <=, "u", __VA_ARGS__)
+#define assert_u_eq(a, b, ...)	assert_cmp(int, a, b, ==, !=, "u", __VA_ARGS__)
+#define assert_u_ne(a, b, ...)	assert_cmp(int, a, b, !=, ==, "u", __VA_ARGS__)
+#define assert_u_lt(a, b, ...)	assert_cmp(int, a, b, <, >=, "u", __VA_ARGS__)
+#define assert_u_le(a, b, ...)	assert_cmp(int, a, b, <=, >, "u", __VA_ARGS__)
+#define assert_u_ge(a, b, ...)	assert_cmp(int, a, b, >=, <, "u", __VA_ARGS__)
+#define assert_u_gt(a, b, ...)	assert_cmp(int, a, b, >, <=, "u", __VA_ARGS__)
 
-#define	assert_ld_eq(a, b, ...)	assert_cmp(long, a, b, ==,	\
+#define assert_ld_eq(a, b, ...)	assert_cmp(long, a, b, ==,	\
     !=, "ld", __VA_ARGS__)
-#define	assert_ld_ne(a, b, ...)	assert_cmp(long, a, b, !=,	\
+#define assert_ld_ne(a, b, ...)	assert_cmp(long, a, b, !=,	\
     ==, "ld", __VA_ARGS__)
-#define	assert_ld_lt(a, b, ...)	assert_cmp(long, a, b, <,	\
+#define assert_ld_lt(a, b, ...)	assert_cmp(long, a, b, <,	\
     >=, "ld", __VA_ARGS__)
-#define	assert_ld_le(a, b, ...)	assert_cmp(long, a, b, <=,	\
+#define assert_ld_le(a, b, ...)	assert_cmp(long, a, b, <=,	\
     >, "ld", __VA_ARGS__)
-#define	assert_ld_ge(a, b, ...)	assert_cmp(long, a, b, >=,	\
+#define assert_ld_ge(a, b, ...)	assert_cmp(long, a, b, >=,	\
     <, "ld", __VA_ARGS__)
-#define	assert_ld_gt(a, b, ...)	assert_cmp(long, a, b, >,	\
+#define assert_ld_gt(a, b, ...)	assert_cmp(long, a, b, >,	\
     <=, "ld", __VA_ARGS__)
 
-#define	assert_lu_eq(a, b, ...)	assert_cmp(unsigned long,	\
+#define assert_lu_eq(a, b, ...)	assert_cmp(unsigned long,	\
     a, b, ==, !=, "lu", __VA_ARGS__)
-#define	assert_lu_ne(a, b, ...)	assert_cmp(unsigned long,	\
+#define assert_lu_ne(a, b, ...)	assert_cmp(unsigned long,	\
     a, b, !=, ==, "lu", __VA_ARGS__)
-#define	assert_lu_lt(a, b, ...)	assert_cmp(unsigned long,	\
+#define assert_lu_lt(a, b, ...)	assert_cmp(unsigned long,	\
     a, b, <, >=, "lu", __VA_ARGS__)
-#define	assert_lu_le(a, b, ...)	assert_cmp(unsigned long,	\
+#define assert_lu_le(a, b, ...)	assert_cmp(unsigned long,	\
     a, b, <=, >, "lu", __VA_ARGS__)
-#define	assert_lu_ge(a, b, ...)	assert_cmp(unsigned long,	\
+#define assert_lu_ge(a, b, ...)	assert_cmp(unsigned long,	\
     a, b, >=, <, "lu", __VA_ARGS__)
-#define	assert_lu_gt(a, b, ...)	assert_cmp(unsigned long,	\
+#define assert_lu_gt(a, b, ...)	assert_cmp(unsigned long,	\
     a, b, >, <=, "lu", __VA_ARGS__)
 
-#define	assert_qd_eq(a, b, ...)	assert_cmp(long long, a, b, ==,	\
+#define assert_qd_eq(a, b, ...)	assert_cmp(long long, a, b, ==,	\
     !=, "qd", __VA_ARGS__)
-#define	assert_qd_ne(a, b, ...)	assert_cmp(long long, a, b, !=,	\
+#define assert_qd_ne(a, b, ...)	assert_cmp(long long, a, b, !=,	\
     ==, "qd", __VA_ARGS__)
-#define	assert_qd_lt(a, b, ...)	assert_cmp(long long, a, b, <,	\
+#define assert_qd_lt(a, b, ...)	assert_cmp(long long, a, b, <,	\
     >=, "qd", __VA_ARGS__)
-#define	assert_qd_le(a, b, ...)	assert_cmp(long long, a, b, <=,	\
+#define assert_qd_le(a, b, ...)	assert_cmp(long long, a, b, <=,	\
     >, "qd", __VA_ARGS__)
-#define	assert_qd_ge(a, b, ...)	assert_cmp(long long, a, b, >=,	\
+#define assert_qd_ge(a, b, ...)	assert_cmp(long long, a, b, >=,	\
     <, "qd", __VA_ARGS__)
-#define	assert_qd_gt(a, b, ...)	assert_cmp(long long, a, b, >,	\
+#define assert_qd_gt(a, b, ...)	assert_cmp(long long, a, b, >,	\
     <=, "qd", __VA_ARGS__)
 
-#define	assert_qu_eq(a, b, ...)	assert_cmp(unsigned long long,	\
+#define assert_qu_eq(a, b, ...)	assert_cmp(unsigned long long,	\
     a, b, ==, !=, "qu", __VA_ARGS__)
-#define	assert_qu_ne(a, b, ...)	assert_cmp(unsigned long long,	\
+#define assert_qu_ne(a, b, ...)	assert_cmp(unsigned long long,	\
     a, b, !=, ==, "qu", __VA_ARGS__)
-#define	assert_qu_lt(a, b, ...)	assert_cmp(unsigned long long,	\
+#define assert_qu_lt(a, b, ...)	assert_cmp(unsigned long long,	\
     a, b, <, >=, "qu", __VA_ARGS__)
-#define	assert_qu_le(a, b, ...)	assert_cmp(unsigned long long,	\
+#define assert_qu_le(a, b, ...)	assert_cmp(unsigned long long,	\
     a, b, <=, >, "qu", __VA_ARGS__)
-#define	assert_qu_ge(a, b, ...)	assert_cmp(unsigned long long,	\
+#define assert_qu_ge(a, b, ...)	assert_cmp(unsigned long long,	\
     a, b, >=, <, "qu", __VA_ARGS__)
-#define	assert_qu_gt(a, b, ...)	assert_cmp(unsigned long long,	\
+#define assert_qu_gt(a, b, ...)	assert_cmp(unsigned long long,	\
     a, b, >, <=, "qu", __VA_ARGS__)
 
-#define	assert_jd_eq(a, b, ...)	assert_cmp(intmax_t, a, b, ==,	\
+#define assert_jd_eq(a, b, ...)	assert_cmp(intmax_t, a, b, ==,	\
     !=, "jd", __VA_ARGS__)
-#define	assert_jd_ne(a, b, ...)	assert_cmp(intmax_t, a, b, !=,	\
+#define assert_jd_ne(a, b, ...)	assert_cmp(intmax_t, a, b, !=,	\
     ==, "jd", __VA_ARGS__)
-#define	assert_jd_lt(a, b, ...)	assert_cmp(intmax_t, a, b, <,	\
+#define assert_jd_lt(a, b, ...)	assert_cmp(intmax_t, a, b, <,	\
     >=, "jd", __VA_ARGS__)
-#define	assert_jd_le(a, b, ...)	assert_cmp(intmax_t, a, b, <=,	\
+#define assert_jd_le(a, b, ...)	assert_cmp(intmax_t, a, b, <=,	\
     >, "jd", __VA_ARGS__)
-#define	assert_jd_ge(a, b, ...)	assert_cmp(intmax_t, a, b, >=,	\
+#define assert_jd_ge(a, b, ...)	assert_cmp(intmax_t, a, b, >=,	\
     <, "jd", __VA_ARGS__)
-#define	assert_jd_gt(a, b, ...)	assert_cmp(intmax_t, a, b, >,	\
+#define assert_jd_gt(a, b, ...)	assert_cmp(intmax_t, a, b, >,	\
     <=, "jd", __VA_ARGS__)
 
-#define	assert_ju_eq(a, b, ...)	assert_cmp(uintmax_t, a, b, ==,	\
+#define assert_ju_eq(a, b, ...)	assert_cmp(uintmax_t, a, b, ==,	\
     !=, "ju", __VA_ARGS__)
-#define	assert_ju_ne(a, b, ...)	assert_cmp(uintmax_t, a, b, !=,	\
+#define assert_ju_ne(a, b, ...)	assert_cmp(uintmax_t, a, b, !=,	\
     ==, "ju", __VA_ARGS__)
-#define	assert_ju_lt(a, b, ...)	assert_cmp(uintmax_t, a, b, <,	\
+#define assert_ju_lt(a, b, ...)	assert_cmp(uintmax_t, a, b, <,	\
     >=, "ju", __VA_ARGS__)
-#define	assert_ju_le(a, b, ...)	assert_cmp(uintmax_t, a, b, <=,	\
+#define assert_ju_le(a, b, ...)	assert_cmp(uintmax_t, a, b, <=,	\
     >, "ju", __VA_ARGS__)
-#define	assert_ju_ge(a, b, ...)	assert_cmp(uintmax_t, a, b, >=,	\
+#define assert_ju_ge(a, b, ...)	assert_cmp(uintmax_t, a, b, >=,	\
     <, "ju", __VA_ARGS__)
-#define	assert_ju_gt(a, b, ...)	assert_cmp(uintmax_t, a, b, >,	\
+#define assert_ju_gt(a, b, ...)	assert_cmp(uintmax_t, a, b, >,	\
     <=, "ju", __VA_ARGS__)
 
-#define	assert_zd_eq(a, b, ...)	assert_cmp(ssize_t, a, b, ==,	\
+#define assert_zd_eq(a, b, ...)	assert_cmp(ssize_t, a, b, ==,	\
     !=, "zd", __VA_ARGS__)
-#define	assert_zd_ne(a, b, ...)	assert_cmp(ssize_t, a, b, !=,	\
+#define assert_zd_ne(a, b, ...)	assert_cmp(ssize_t, a, b, !=,	\
     ==, "zd", __VA_ARGS__)
-#define	assert_zd_lt(a, b, ...)	assert_cmp(ssize_t, a, b, <,	\
+#define assert_zd_lt(a, b, ...)	assert_cmp(ssize_t, a, b, <,	\
     >=, "zd", __VA_ARGS__)
-#define	assert_zd_le(a, b, ...)	assert_cmp(ssize_t, a, b, <=,	\
+#define assert_zd_le(a, b, ...)	assert_cmp(ssize_t, a, b, <=,	\
     >, "zd", __VA_ARGS__)
-#define	assert_zd_ge(a, b, ...)	assert_cmp(ssize_t, a, b, >=,	\
+#define assert_zd_ge(a, b, ...)	assert_cmp(ssize_t, a, b, >=,	\
     <, "zd", __VA_ARGS__)
-#define	assert_zd_gt(a, b, ...)	assert_cmp(ssize_t, a, b, >,	\
+#define assert_zd_gt(a, b, ...)	assert_cmp(ssize_t, a, b, >,	\
     <=, "zd", __VA_ARGS__)
 
-#define	assert_zu_eq(a, b, ...)	assert_cmp(size_t, a, b, ==,	\
+#define assert_zu_eq(a, b, ...)	assert_cmp(size_t, a, b, ==,	\
     !=, "zu", __VA_ARGS__)
-#define	assert_zu_ne(a, b, ...)	assert_cmp(size_t, a, b, !=,	\
+#define assert_zu_ne(a, b, ...)	assert_cmp(size_t, a, b, !=,	\
     ==, "zu", __VA_ARGS__)
-#define	assert_zu_lt(a, b, ...)	assert_cmp(size_t, a, b, <,	\
+#define assert_zu_lt(a, b, ...)	assert_cmp(size_t, a, b, <,	\
     >=, "zu", __VA_ARGS__)
-#define	assert_zu_le(a, b, ...)	assert_cmp(size_t, a, b, <=,	\
+#define assert_zu_le(a, b, ...)	assert_cmp(size_t, a, b, <=,	\
     >, "zu", __VA_ARGS__)
-#define	assert_zu_ge(a, b, ...)	assert_cmp(size_t, a, b, >=,	\
+#define assert_zu_ge(a, b, ...)	assert_cmp(size_t, a, b, >=,	\
     <, "zu", __VA_ARGS__)
-#define	assert_zu_gt(a, b, ...)	assert_cmp(size_t, a, b, >,	\
+#define assert_zu_gt(a, b, ...)	assert_cmp(size_t, a, b, >,	\
     <=, "zu", __VA_ARGS__)
 
-#define	assert_d32_eq(a, b, ...)	assert_cmp(int32_t, a, b, ==,	\
+#define assert_d32_eq(a, b, ...)	assert_cmp(int32_t, a, b, ==,	\
     !=, FMTd32, __VA_ARGS__)
-#define	assert_d32_ne(a, b, ...)	assert_cmp(int32_t, a, b, !=,	\
+#define assert_d32_ne(a, b, ...)	assert_cmp(int32_t, a, b, !=,	\
     ==, FMTd32, __VA_ARGS__)
-#define	assert_d32_lt(a, b, ...)	assert_cmp(int32_t, a, b, <,	\
+#define assert_d32_lt(a, b, ...)	assert_cmp(int32_t, a, b, <,	\
     >=, FMTd32, __VA_ARGS__)
-#define	assert_d32_le(a, b, ...)	assert_cmp(int32_t, a, b, <=,	\
+#define assert_d32_le(a, b, ...)	assert_cmp(int32_t, a, b, <=,	\
     >, FMTd32, __VA_ARGS__)
-#define	assert_d32_ge(a, b, ...)	assert_cmp(int32_t, a, b, >=,	\
+#define assert_d32_ge(a, b, ...)	assert_cmp(int32_t, a, b, >=,	\
     <, FMTd32, __VA_ARGS__)
-#define	assert_d32_gt(a, b, ...)	assert_cmp(int32_t, a, b, >,	\
+#define assert_d32_gt(a, b, ...)	assert_cmp(int32_t, a, b, >,	\
     <=, FMTd32, __VA_ARGS__)
 
-#define	assert_u32_eq(a, b, ...)	assert_cmp(uint32_t, a, b, ==,	\
+#define assert_u32_eq(a, b, ...)	assert_cmp(uint32_t, a, b, ==,	\
     !=, FMTu32, __VA_ARGS__)
-#define	assert_u32_ne(a, b, ...)	assert_cmp(uint32_t, a, b, !=,	\
+#define assert_u32_ne(a, b, ...)	assert_cmp(uint32_t, a, b, !=,	\
     ==, FMTu32, __VA_ARGS__)
-#define	assert_u32_lt(a, b, ...)	assert_cmp(uint32_t, a, b, <,	\
+#define assert_u32_lt(a, b, ...)	assert_cmp(uint32_t, a, b, <,	\
     >=, FMTu32, __VA_ARGS__)
-#define	assert_u32_le(a, b, ...)	assert_cmp(uint32_t, a, b, <=,	\
+#define assert_u32_le(a, b, ...)	assert_cmp(uint32_t, a, b, <=,	\
     >, FMTu32, __VA_ARGS__)
-#define	assert_u32_ge(a, b, ...)	assert_cmp(uint32_t, a, b, >=,	\
+#define assert_u32_ge(a, b, ...)	assert_cmp(uint32_t, a, b, >=,	\
     <, FMTu32, __VA_ARGS__)
-#define	assert_u32_gt(a, b, ...)	assert_cmp(uint32_t, a, b, >,	\
+#define assert_u32_gt(a, b, ...)	assert_cmp(uint32_t, a, b, >,	\
     <=, FMTu32, __VA_ARGS__)
 
-#define	assert_d64_eq(a, b, ...)	assert_cmp(int64_t, a, b, ==,	\
+#define assert_d64_eq(a, b, ...)	assert_cmp(int64_t, a, b, ==,	\
     !=, FMTd64, __VA_ARGS__)
-#define	assert_d64_ne(a, b, ...)	assert_cmp(int64_t, a, b, !=,	\
+#define assert_d64_ne(a, b, ...)	assert_cmp(int64_t, a, b, !=,	\
     ==, FMTd64, __VA_ARGS__)
-#define	assert_d64_lt(a, b, ...)	assert_cmp(int64_t, a, b, <,	\
+#define assert_d64_lt(a, b, ...)	assert_cmp(int64_t, a, b, <,	\
     >=, FMTd64, __VA_ARGS__)
-#define	assert_d64_le(a, b, ...)	assert_cmp(int64_t, a, b, <=,	\
+#define assert_d64_le(a, b, ...)	assert_cmp(int64_t, a, b, <=,	\
     >, FMTd64, __VA_ARGS__)
-#define	assert_d64_ge(a, b, ...)	assert_cmp(int64_t, a, b, >=,	\
+#define assert_d64_ge(a, b, ...)	assert_cmp(int64_t, a, b, >=,	\
     <, FMTd64, __VA_ARGS__)
-#define	assert_d64_gt(a, b, ...)	assert_cmp(int64_t, a, b, >,	\
+#define assert_d64_gt(a, b, ...)	assert_cmp(int64_t, a, b, >,	\
     <=, FMTd64, __VA_ARGS__)
 
-#define	assert_u64_eq(a, b, ...)	assert_cmp(uint64_t, a, b, ==,	\
+#define assert_u64_eq(a, b, ...)	assert_cmp(uint64_t, a, b, ==,	\
     !=, FMTu64, __VA_ARGS__)
-#define	assert_u64_ne(a, b, ...)	assert_cmp(uint64_t, a, b, !=,	\
+#define assert_u64_ne(a, b, ...)	assert_cmp(uint64_t, a, b, !=,	\
     ==, FMTu64, __VA_ARGS__)
-#define	assert_u64_lt(a, b, ...)	assert_cmp(uint64_t, a, b, <,	\
+#define assert_u64_lt(a, b, ...)	assert_cmp(uint64_t, a, b, <,	\
     >=, FMTu64, __VA_ARGS__)
-#define	assert_u64_le(a, b, ...)	assert_cmp(uint64_t, a, b, <=,	\
+#define assert_u64_le(a, b, ...)	assert_cmp(uint64_t, a, b, <=,	\
     >, FMTu64, __VA_ARGS__)
-#define	assert_u64_ge(a, b, ...)	assert_cmp(uint64_t, a, b, >=,	\
+#define assert_u64_ge(a, b, ...)	assert_cmp(uint64_t, a, b, >=,	\
     <, FMTu64, __VA_ARGS__)
-#define	assert_u64_gt(a, b, ...)	assert_cmp(uint64_t, a, b, >,	\
+#define assert_u64_gt(a, b, ...)	assert_cmp(uint64_t, a, b, >,	\
     <=, FMTu64, __VA_ARGS__)
 
-#define	assert_b_eq(a, b, ...) do {					\
+#define assert_b_eq(a, b, ...) do {					\
 	bool a_ = (a);							\
 	bool b_ = (b);							\
 	if (!(a_ == b_)) {						\
@@ -226,7 +226,7 @@
 		p_test_fail(prefix, message);				\
 	}								\
 } while (0)
-#define	assert_b_ne(a, b, ...) do {					\
+#define assert_b_ne(a, b, ...) do {					\
 	bool a_ = (a);							\
 	bool b_ = (b);							\
 	if (!(a_ != b_)) {						\
@@ -242,10 +242,10 @@
 		p_test_fail(prefix, message);				\
 	}								\
 } while (0)
-#define	assert_true(a, ...)	assert_b_eq(a, true, __VA_ARGS__)
-#define	assert_false(a, ...)	assert_b_eq(a, false, __VA_ARGS__)
+#define assert_true(a, ...)	assert_b_eq(a, true, __VA_ARGS__)
+#define assert_false(a, ...)	assert_b_eq(a, false, __VA_ARGS__)
 
-#define	assert_str_eq(a, b, ...) do {				\
+#define assert_str_eq(a, b, ...) do {				\
 	if (strcmp((a), (b))) {						\
 		char prefix[ASSERT_BUFSIZE];				\
 		char message[ASSERT_BUFSIZE];				\
@@ -258,7 +258,7 @@
 		p_test_fail(prefix, message);				\
 	}								\
 } while (0)
-#define	assert_str_ne(a, b, ...) do {				\
+#define assert_str_ne(a, b, ...) do {				\
 	if (!strcmp((a), (b))) {					\
 		char prefix[ASSERT_BUFSIZE];				\
 		char message[ASSERT_BUFSIZE];				\
@@ -272,7 +272,7 @@
 	}								\
 } while (0)
 
-#define	assert_not_reached(...) do {					\
+#define assert_not_reached(...) do {					\
 	char prefix[ASSERT_BUFSIZE];					\
 	char message[ASSERT_BUFSIZE];					\
 	malloc_snprintf(prefix, sizeof(prefix),				\
@@ -296,24 +296,24 @@ typedef enum {
 
 typedef void (test_t)(void);
 
-#define	TEST_BEGIN(f)							\
+#define TEST_BEGIN(f)							\
 static void								\
 f(void) {								\
 	p_test_init(#f);
 
-#define	TEST_END							\
+#define TEST_END							\
 	goto label_test_end;						\
 label_test_end:								\
 	p_test_fini();							\
 }
 
-#define	test(...)							\
+#define test(...)							\
 	p_test(__VA_ARGS__, NULL)
 
-#define	test_no_malloc_init(...)					\
+#define test_no_malloc_init(...)					\
 	p_test_no_malloc_init(__VA_ARGS__, NULL)
 
-#define	test_skip_if(e) do {						\
+#define test_skip_if(e) do {						\
 	if (e) {							\
 		test_skip("%s:%s:%d: Test skipped: (%s)",		\
 		    __func__, __FILE__, __LINE__, #e);			\
diff --git a/test/integration/MALLOCX_ARENA.c b/test/integration/MALLOCX_ARENA.c
index b2ec6584..222164d6 100644
--- a/test/integration/MALLOCX_ARENA.c
+++ b/test/integration/MALLOCX_ARENA.c
@@ -1,6 +1,6 @@
 #include "test/jemalloc_test.h"
 
-#define	NTHREADS 10
+#define NTHREADS 10
 
 static bool have_dss =
 #ifdef JEMALLOC_DSS
diff --git a/test/integration/aligned_alloc.c b/test/integration/aligned_alloc.c
index 54b3bf24..536b67ea 100644
--- a/test/integration/aligned_alloc.c
+++ b/test/integration/aligned_alloc.c
@@ -1,6 +1,6 @@
 #include "test/jemalloc_test.h"
 
-#define	MAXALIGN (((size_t)1) << 23)
+#define MAXALIGN (((size_t)1) << 23)
 
 /*
  * On systems which can't merge extents, tests that call this function generate
@@ -79,7 +79,7 @@ TEST_BEGIN(test_oom_errors) {
 TEST_END
 
 TEST_BEGIN(test_alignment_and_size) {
-#define	NITER 4
+#define NITER 4
 	size_t alignment, size, total;
 	unsigned i;
 	void *ps[NITER];
diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index 26076be4..b60e27b6 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -118,7 +118,7 @@ TEST_BEGIN(test_oom) {
 TEST_END
 
 TEST_BEGIN(test_basic) {
-#define	MAXSZ (((size_t)1) << 23)
+#define MAXSZ (((size_t)1) << 23)
 	size_t sz;
 
 	for (sz = 1; sz < MAXSZ; sz = nallocx(sz, 0) + 1) {
@@ -155,8 +155,8 @@ TEST_BEGIN(test_basic) {
 TEST_END
 
 TEST_BEGIN(test_alignment_and_size) {
-#define	MAXALIGN (((size_t)1) << 23)
-#define	NITER 4
+#define MAXALIGN (((size_t)1) << 23)
+#define NITER 4
 	size_t nsz, rsz, sz, alignment, total;
 	unsigned i;
 	void *ps[NITER];
diff --git a/test/integration/posix_memalign.c b/test/integration/posix_memalign.c
index 97b9216a..2c2726de 100644
--- a/test/integration/posix_memalign.c
+++ b/test/integration/posix_memalign.c
@@ -1,6 +1,6 @@
 #include "test/jemalloc_test.h"
 
-#define	MAXALIGN (((size_t)1) << 23)
+#define MAXALIGN (((size_t)1) << 23)
 
 /*
  * On systems which can't merge extents, tests that call this function generate
@@ -71,7 +71,7 @@ TEST_BEGIN(test_oom_errors) {
 TEST_END
 
 TEST_BEGIN(test_alignment_and_size) {
-#define	NITER 4
+#define NITER 4
 	size_t alignment, size, total;
 	unsigned i;
 	int err;
diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c
index 7c0f9c5f..7821ca5f 100644
--- a/test/integration/rallocx.c
+++ b/test/integration/rallocx.c
@@ -43,11 +43,11 @@ get_large_size(size_t ind) {
 TEST_BEGIN(test_grow_and_shrink) {
 	void *p, *q;
 	size_t tsz;
-#define	NCYCLES 3
+#define NCYCLES 3
 	unsigned i, j;
-#define	NSZS 1024
+#define NSZS 1024
 	size_t szs[NSZS];
-#define	MAXSZ ZU(12 * 1024 * 1024)
+#define MAXSZ ZU(12 * 1024 * 1024)
 
 	p = mallocx(1, 0);
 	assert_ptr_not_null(p, "Unexpected mallocx() error");
@@ -107,8 +107,8 @@ TEST_BEGIN(test_zero) {
 	void *p, *q;
 	size_t psz, qsz, i, j;
 	size_t start_sizes[] = {1, 3*1024, 63*1024, 4095*1024};
-#define	FILL_BYTE 0xaaU
-#define	RANGE 2048
+#define FILL_BYTE 0xaaU
+#define RANGE 2048
 
 	for (i = 0; i < sizeof(start_sizes)/sizeof(size_t); i++) {
 		size_t start_size = start_sizes[i];
@@ -150,7 +150,7 @@ TEST_END
 TEST_BEGIN(test_align) {
 	void *p, *q;
 	size_t align;
-#define	MAX_ALIGN (ZU(1) << 25)
+#define MAX_ALIGN (ZU(1) << 25)
 
 	align = ZU(1);
 	p = mallocx(1, MALLOCX_ALIGN(align));
@@ -175,8 +175,8 @@ TEST_BEGIN(test_lg_align_and_zero) {
 	void *p, *q;
 	unsigned lg_align;
 	size_t sz;
-#define	MAX_LG_ALIGN 25
-#define	MAX_VALIDATE (ZU(1) << 22)
+#define MAX_LG_ALIGN 25
+#define MAX_VALIDATE (ZU(1) << 22)
 
 	lg_align = 0;
 	p = mallocx(1, MALLOCX_LG_ALIGN(lg_align)|MALLOCX_ZERO);
diff --git a/test/integration/sdallocx.c b/test/integration/sdallocx.c
index f7b42949..e7ea1d82 100644
--- a/test/integration/sdallocx.c
+++ b/test/integration/sdallocx.c
@@ -1,7 +1,7 @@
 #include "test/jemalloc_test.h"
 
-#define	MAXALIGN (((size_t)1) << 22)
-#define	NITER 3
+#define MAXALIGN (((size_t)1) << 22)
+#define NITER 3
 
 TEST_BEGIN(test_basic) {
 	void *ptr = mallocx(64, 0);
diff --git a/test/integration/thread_arena.c b/test/integration/thread_arena.c
index d9dc170d..9991a42f 100644
--- a/test/integration/thread_arena.c
+++ b/test/integration/thread_arena.c
@@ -1,6 +1,6 @@
 #include "test/jemalloc_test.h"
 
-#define	NTHREADS 10
+#define NTHREADS 10
 
 void *
 thd_start(void *arg) {
diff --git a/test/integration/xallocx.c b/test/integration/xallocx.c
index 158f7ee9..c95fbf18 100644
--- a/test/integration/xallocx.c
+++ b/test/integration/xallocx.c
@@ -320,7 +320,7 @@ test_zero(size_t szmin, size_t szmax) {
 	int flags = MALLOCX_ARENA(arena_ind()) | MALLOCX_ZERO;
 	size_t sz, nsz;
 	void *p;
-#define	FILL_BYTE 0x7aU
+#define FILL_BYTE 0x7aU
 
 	sz = szmax;
 	p = mallocx(sz, flags);
diff --git a/test/src/SFMT.c b/test/src/SFMT.c
index 80cabe05..4dc32599 100644
--- a/test/src/SFMT.c
+++ b/test/src/SFMT.c
@@ -45,7 +45,7 @@
  *
  * The new BSD License is applied to this software, see LICENSE.txt
  */
-#define	SFMT_C_
+#define SFMT_C_
 #include "test/jemalloc_test.h"
 #include "test/SFMT-params.h"
 
diff --git a/test/src/math.c b/test/src/math.c
index 887a3639..1758c677 100644
--- a/test/src/math.c
+++ b/test/src/math.c
@@ -1,2 +1,2 @@
-#define	MATH_C_
+#define MATH_C_
 #include "test/jemalloc_test.h"
diff --git a/test/src/mtx.c b/test/src/mtx.c
index b691b482..a393c01f 100644
--- a/test/src/mtx.c
+++ b/test/src/mtx.c
@@ -1,7 +1,7 @@
 #include "test/jemalloc_test.h"
 
 #ifndef _CRT_SPINCOUNT
-#define	_CRT_SPINCOUNT 4000
+#define _CRT_SPINCOUNT 4000
 #endif
 
 bool
diff --git a/test/unit/SFMT.c b/test/unit/SFMT.c
index b5730d63..1fc8cf1b 100644
--- a/test/unit/SFMT.c
+++ b/test/unit/SFMT.c
@@ -35,10 +35,10 @@
  */
 #include "test/jemalloc_test.h"
 
-#define	BLOCK_SIZE 10000
-#define	BLOCK_SIZE64 (BLOCK_SIZE / 2)
-#define	COUNT_1 1000
-#define	COUNT_2 700
+#define BLOCK_SIZE 10000
+#define BLOCK_SIZE64 (BLOCK_SIZE / 2)
+#define COUNT_1 1000
+#define COUNT_2 700
 
 static const uint32_t init_gen_rand_32_expected[] = {
 	3440181298U, 1564997079U, 1510669302U, 2930277156U, 1452439940U,
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index 3d74e37a..d2a9bb4f 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -82,7 +82,7 @@ do_arena_create(extent_hooks_t *h) {
 
 static void
 do_arena_reset_pre(unsigned arena_ind, void ***ptrs, unsigned *nptrs) {
-#define	NLARGE	32
+#define NLARGE	32
 	unsigned nsmall, nlarge, i;
 	size_t sz;
 	int flags;
diff --git a/test/unit/arena_reset_prof.c b/test/unit/arena_reset_prof.c
index 0fd362e9..6d83c843 100644
--- a/test/unit/arena_reset_prof.c
+++ b/test/unit/arena_reset_prof.c
@@ -1,5 +1,5 @@
 #include "test/jemalloc_test.h"
-#define	ARENA_RESET_PROF_C_
+#define ARENA_RESET_PROF_C_
 
 const char *malloc_conf = "prof:true,lg_prof_sample:0";
 #include "arena_reset.c"
diff --git a/test/unit/atomic.c b/test/unit/atomic.c
index 97ec7eb9..78661597 100644
--- a/test/unit/atomic.c
+++ b/test/unit/atomic.c
@@ -1,6 +1,6 @@
 #include "test/jemalloc_test.h"
 
-#define	TEST_STRUCT(p, t)						\
+#define TEST_STRUCT(p, t)						\
 struct p##_test_s {							\
 	t	accum0;							\
 	t	x;							\
@@ -8,7 +8,7 @@ struct p##_test_s {							\
 };									\
 typedef struct p##_test_s p##_test_t;
 
-#define	TEST_BODY(p, t, tc, ta, FMT) do {				\
+#define TEST_BODY(p, t, tc, ta, FMT) do {				\
 	const p##_test_t tests[] = {					\
 		{(t)-1, (t)-1, (t)-2},					\
 		{(t)-1, (t) 0, (t)-2},					\
diff --git a/test/unit/bitmap.c b/test/unit/bitmap.c
index e91f0928..ca657608 100644
--- a/test/unit/bitmap.c
+++ b/test/unit/bitmap.c
@@ -1,6 +1,6 @@
 #include "test/jemalloc_test.h"
 
-#define	NBITS_TAB \
+#define NBITS_TAB \
     NB( 1) \
     NB( 2) \
     NB( 3) \
@@ -124,7 +124,7 @@ test_bitmap_initializer_body(const bitmap_info_t *binfo, size_t nbits) {
 }
 
 TEST_BEGIN(test_bitmap_initializer) {
-#define	NB(nbits) {							\
+#define NB(nbits) {							\
 		if (nbits <= BITMAP_MAXBITS) {				\
 			bitmap_info_t binfo =				\
 			    BITMAP_INFO_INITIALIZER(nbits);		\
@@ -155,7 +155,7 @@ TEST_BEGIN(test_bitmap_size) {
 		bitmap_info_init(&binfo, nbits);
 		prev_size = test_bitmap_size_body(&binfo, nbits, prev_size);
 	}
-#define	NB(nbits) {							\
+#define NB(nbits) {							\
 		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);	\
 		prev_size = test_bitmap_size_body(&binfo, nbits,	\
 		    prev_size);						\
@@ -188,7 +188,7 @@ TEST_BEGIN(test_bitmap_init) {
 		bitmap_info_init(&binfo, nbits);
 		test_bitmap_init_body(&binfo, nbits);
 	}
-#define	NB(nbits) {							\
+#define NB(nbits) {							\
 		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);	\
 		test_bitmap_init_body(&binfo, nbits);			\
 	}
@@ -219,7 +219,7 @@ TEST_BEGIN(test_bitmap_set) {
 		bitmap_info_init(&binfo, nbits);
 		test_bitmap_set_body(&binfo, nbits);
 	}
-#define	NB(nbits) {							\
+#define NB(nbits) {							\
 		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);	\
 		test_bitmap_set_body(&binfo, nbits);			\
 	}
@@ -257,7 +257,7 @@ TEST_BEGIN(test_bitmap_unset) {
 		bitmap_info_init(&binfo, nbits);
 		test_bitmap_unset_body(&binfo, nbits);
 	}
-#define	NB(nbits) {							\
+#define NB(nbits) {							\
 		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);	\
 		test_bitmap_unset_body(&binfo, nbits);			\
 	}
@@ -318,7 +318,7 @@ TEST_BEGIN(test_bitmap_sfu) {
 		bitmap_info_init(&binfo, nbits);
 		test_bitmap_sfu_body(&binfo, nbits);
 	}
-#define	NB(nbits) {							\
+#define NB(nbits) {							\
 		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);	\
 		test_bitmap_sfu_body(&binfo, nbits);			\
 	}
diff --git a/test/unit/ckh.c b/test/unit/ckh.c
index 842ae29b..707ea5f8 100644
--- a/test/unit/ckh.c
+++ b/test/unit/ckh.c
@@ -104,7 +104,7 @@ TEST_BEGIN(test_count_insert_search_remove) {
 TEST_END
 
 TEST_BEGIN(test_insert_iter_remove) {
-#define	NITEMS ZU(1000)
+#define NITEMS ZU(1000)
 	tsd_t *tsd;
 	ckh_t ckh;
 	void **p[NITEMS];
diff --git a/test/unit/decay.c b/test/unit/decay.c
index 83c9f49e..4d172a54 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -196,7 +196,7 @@ TEST_BEGIN(test_decay_ticks) {
 TEST_END
 
 TEST_BEGIN(test_decay_ticker) {
-#define	NPS 1024
+#define NPS 1024
 	int flags = (MALLOCX_ARENA(0) | MALLOCX_TCACHE_NONE);
 	void *ps[NPS];
 	uint64_t epoch;
@@ -289,7 +289,7 @@ TEST_BEGIN(test_decay_ticker) {
 TEST_END
 
 TEST_BEGIN(test_decay_nonmonotonic) {
-#define	NPS (SMOOTHSTEP_NSTEPS + 1)
+#define NPS (SMOOTHSTEP_NSTEPS + 1)
 	int flags = (MALLOCX_ARENA(0) | MALLOCX_TCACHE_NONE);
 	void *ps[NPS];
 	uint64_t epoch;
diff --git a/test/unit/extent_quantize.c b/test/unit/extent_quantize.c
index 52af7a3d..0ca7a75d 100644
--- a/test/unit/extent_quantize.c
+++ b/test/unit/extent_quantize.c
@@ -99,7 +99,7 @@ TEST_BEGIN(test_large_extent_size) {
 TEST_END
 
 TEST_BEGIN(test_monotonic) {
-#define	SZ_MAX	ZU(4 * 1024 * 1024)
+#define SZ_MAX	ZU(4 * 1024 * 1024)
 	unsigned i;
 	size_t floor_prev, ceil_prev;
 
diff --git a/test/unit/hash.c b/test/unit/hash.c
index 0204cdad..48507515 100644
--- a/test/unit/hash.c
+++ b/test/unit/hash.c
@@ -55,7 +55,7 @@ hash_variant_string(hash_variant_t variant) {
 	}
 }
 
-#define	KEY_SIZE	256
+#define KEY_SIZE	256
 static void
 hash_variant_verify_key(hash_variant_t variant, uint8_t *key) {
 	const int hashbytes = hash_variant_bits(variant) / 8;
@@ -137,7 +137,7 @@ hash_variant_verify_key(hash_variant_t variant, uint8_t *key) {
 
 static void
 hash_variant_verify(hash_variant_t variant) {
-#define	MAX_ALIGN	16
+#define MAX_ALIGN	16
 	uint8_t key[KEY_SIZE + (MAX_ALIGN - 1)];
 	unsigned i;
 
diff --git a/test/unit/junk_alloc.c b/test/unit/junk_alloc.c
index a5895b5c..8db3331d 100644
--- a/test/unit/junk_alloc.c
+++ b/test/unit/junk_alloc.c
@@ -1,3 +1,3 @@
-#define	JEMALLOC_TEST_JUNK_OPT "junk:alloc"
+#define JEMALLOC_TEST_JUNK_OPT "junk:alloc"
 #include "junk.c"
 #undef JEMALLOC_TEST_JUNK_OPT
diff --git a/test/unit/junk_free.c b/test/unit/junk_free.c
index bb5183c9..482a61d0 100644
--- a/test/unit/junk_free.c
+++ b/test/unit/junk_free.c
@@ -1,3 +1,3 @@
-#define	JEMALLOC_TEST_JUNK_OPT "junk:free"
+#define JEMALLOC_TEST_JUNK_OPT "junk:free"
 #include "junk.c"
 #undef JEMALLOC_TEST_JUNK_OPT
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index c531a06a..c931e378 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -115,7 +115,7 @@ TEST_BEGIN(test_mallctlnametomib_short_mib) {
 TEST_END
 
 TEST_BEGIN(test_mallctl_config) {
-#define	TEST_MALLCTL_CONFIG(config, t) do {				\
+#define TEST_MALLCTL_CONFIG(config, t) do {				\
 	t oldval;							\
 	size_t sz = sizeof(oldval);					\
 	assert_d_eq(mallctl("config."#config, (void *)&oldval, &sz,	\
@@ -146,7 +146,7 @@ TEST_END
 TEST_BEGIN(test_mallctl_opt) {
 	bool config_always = true;
 
-#define	TEST_MALLCTL_OPT(t, opt, config) do {				\
+#define TEST_MALLCTL_OPT(t, opt, config) do {				\
 	t oldval;							\
 	size_t sz = sizeof(oldval);					\
 	int expected = config_##config ? 0 : ENOENT;			\
@@ -232,7 +232,7 @@ TEST_BEGIN(test_tcache_none) {
 TEST_END
 
 TEST_BEGIN(test_tcache) {
-#define	NTCACHES	10
+#define NTCACHES	10
 	unsigned tis[NTCACHES];
 	void *ps[NTCACHES];
 	void *qs[NTCACHES];
@@ -534,7 +534,7 @@ TEST_BEGIN(test_arenas_decay_time) {
 TEST_END
 
 TEST_BEGIN(test_arenas_constants) {
-#define	TEST_ARENAS_CONSTANT(t, name, expected) do {			\
+#define TEST_ARENAS_CONSTANT(t, name, expected) do {			\
 	t name;								\
 	size_t sz = sizeof(t);						\
 	assert_d_eq(mallctl("arenas."#name, (void *)&name, &sz, NULL,	\
@@ -552,7 +552,7 @@ TEST_BEGIN(test_arenas_constants) {
 TEST_END
 
 TEST_BEGIN(test_arenas_bin_constants) {
-#define	TEST_ARENAS_BIN_CONSTANT(t, name, expected) do {		\
+#define TEST_ARENAS_BIN_CONSTANT(t, name, expected) do {		\
 	t name;								\
 	size_t sz = sizeof(t);						\
 	assert_d_eq(mallctl("arenas.bin.0."#name, (void *)&name, &sz,	\
@@ -570,7 +570,7 @@ TEST_BEGIN(test_arenas_bin_constants) {
 TEST_END
 
 TEST_BEGIN(test_arenas_lextent_constants) {
-#define	TEST_ARENAS_LEXTENT_CONSTANT(t, name, expected) do {		\
+#define TEST_ARENAS_LEXTENT_CONSTANT(t, name, expected) do {		\
 	t name;								\
 	size_t sz = sizeof(t);						\
 	assert_d_eq(mallctl("arenas.lextent.0."#name, (void *)&name,	\
@@ -602,7 +602,7 @@ TEST_BEGIN(test_arenas_create) {
 TEST_END
 
 TEST_BEGIN(test_stats_arenas) {
-#define	TEST_STATS_ARENAS(t, name) do {					\
+#define TEST_STATS_ARENAS(t, name) do {					\
 	t name;								\
 	size_t sz = sizeof(t);						\
 	assert_d_eq(mallctl("stats.arenas.0."#name, (void *)&name, &sz,	\
diff --git a/test/unit/math.c b/test/unit/math.c
index d2cf16dd..09ef20c7 100644
--- a/test/unit/math.c
+++ b/test/unit/math.c
@@ -1,7 +1,7 @@
 #include "test/jemalloc_test.h"
 
-#define	MAX_REL_ERR 1.0e-9
-#define	MAX_ABS_ERR 1.0e-9
+#define MAX_REL_ERR 1.0e-9
+#define MAX_ABS_ERR 1.0e-9
 
 #include <float.h>
 
@@ -10,7 +10,7 @@
 #endif
 
 #ifndef INFINITY
-#define	INFINITY (DBL_MAX + DBL_MAX)
+#define INFINITY (DBL_MAX + DBL_MAX)
 #endif
 
 static bool
diff --git a/test/unit/mq.c b/test/unit/mq.c
index fe17943e..57a4d54e 100644
--- a/test/unit/mq.c
+++ b/test/unit/mq.c
@@ -1,7 +1,7 @@
 #include "test/jemalloc_test.h"
 
-#define	NSENDERS	3
-#define	NMSGS		100000
+#define NSENDERS	3
+#define NMSGS		100000
 
 typedef struct mq_msg_s mq_msg_t;
 struct mq_msg_s {
diff --git a/test/unit/mtx.c b/test/unit/mtx.c
index 23740ce1..424587b0 100644
--- a/test/unit/mtx.c
+++ b/test/unit/mtx.c
@@ -1,7 +1,7 @@
 #include "test/jemalloc_test.h"
 
-#define	NTHREADS	2
-#define	NINCRS		2000000
+#define NTHREADS	2
+#define NINCRS		2000000
 
 TEST_BEGIN(test_mtx_basic) {
 	mtx_t mtx;
diff --git a/test/unit/nstime.c b/test/unit/nstime.c
index f7f1bdfd..f8384f5a 100644
--- a/test/unit/nstime.c
+++ b/test/unit/nstime.c
@@ -1,6 +1,6 @@
 #include "test/jemalloc_test.h"
 
-#define	BILLION	UINT64_C(1000000000)
+#define BILLION	UINT64_C(1000000000)
 
 TEST_BEGIN(test_nstime_init) {
 	nstime_t nst;
diff --git a/test/unit/pack.c b/test/unit/pack.c
index 3edd405d..d35ac5ea 100644
--- a/test/unit/pack.c
+++ b/test/unit/pack.c
@@ -7,9 +7,9 @@ const char *malloc_conf = "decay_time:-1";
  * Size class that is a divisor of the page size, ideally 4+ regions per run.
  */
 #if LG_PAGE <= 14
-#define	SZ	(ZU(1) << (LG_PAGE - 2))
+#define SZ	(ZU(1) << (LG_PAGE - 2))
 #else
-#define	SZ	4096
+#define SZ	4096
 #endif
 
 /*
@@ -17,7 +17,7 @@ const char *malloc_conf = "decay_time:-1";
  * if mmap()ed memory grows downward, downward growth of mmap()ed memory is
  * tested.
  */
-#define	NSLABS	8
+#define NSLABS	8
 
 static unsigned
 binind_compute(void) {
diff --git a/test/unit/ph.c b/test/unit/ph.c
index 5f3c5a45..91516fae 100644
--- a/test/unit/ph.c
+++ b/test/unit/ph.c
@@ -3,7 +3,7 @@
 typedef struct node_s node_t;
 
 struct node_s {
-#define	NODE_MAGIC 0x9823af7e
+#define NODE_MAGIC 0x9823af7e
 	uint32_t magic;
 	phn(node_t) link;
 	uint64_t key;
@@ -160,9 +160,9 @@ node_remove_first(heap_t *heap) {
 }
 
 TEST_BEGIN(test_ph_random) {
-#define	NNODES 25
-#define	NBAGS 250
-#define	SEED 42
+#define NNODES 25
+#define NBAGS 250
+#define SEED 42
 	sfmt_t *sfmt;
 	uint64_t bag[NNODES];
 	heap_t heap;
diff --git a/test/unit/prng.c b/test/unit/prng.c
index cbccb8a0..74d9cf73 100644
--- a/test/unit/prng.c
+++ b/test/unit/prng.c
@@ -137,9 +137,9 @@ TEST_END
 static void
 test_prng_range_u32(bool atomic) {
 	uint32_t range;
-#define	MAX_RANGE	10000000
-#define	RANGE_STEP	97
-#define	NREPS		10
+#define MAX_RANGE	10000000
+#define RANGE_STEP	97
+#define NREPS		10
 
 	for (range = 2; range < MAX_RANGE; range += RANGE_STEP) {
 		uint32_t s;
@@ -157,9 +157,9 @@ test_prng_range_u32(bool atomic) {
 static void
 test_prng_range_u64(void) {
 	uint64_t range;
-#define	MAX_RANGE	10000000
-#define	RANGE_STEP	97
-#define	NREPS		10
+#define MAX_RANGE	10000000
+#define RANGE_STEP	97
+#define NREPS		10
 
 	for (range = 2; range < MAX_RANGE; range += RANGE_STEP) {
 		uint64_t s;
@@ -177,9 +177,9 @@ test_prng_range_u64(void) {
 static void
 test_prng_range_zu(bool atomic) {
 	size_t range;
-#define	MAX_RANGE	10000000
-#define	RANGE_STEP	97
-#define	NREPS		10
+#define MAX_RANGE	10000000
+#define RANGE_STEP	97
+#define NREPS		10
 
 	for (range = 2; range < MAX_RANGE; range += RANGE_STEP) {
 		size_t s;
diff --git a/test/unit/prof_accum.c b/test/unit/prof_accum.c
index ad7a3eaa..bcd1d881 100644
--- a/test/unit/prof_accum.c
+++ b/test/unit/prof_accum.c
@@ -1,9 +1,9 @@
 #include "test/jemalloc_test.h"
 
-#define	NTHREADS		4
-#define	NALLOCS_PER_THREAD	50
-#define	DUMP_INTERVAL		1
-#define	BT_COUNT_CHECK_INTERVAL	5
+#define NTHREADS		4
+#define NALLOCS_PER_THREAD	50
+#define DUMP_INTERVAL		1
+#define BT_COUNT_CHECK_INTERVAL	5
 
 #ifdef JEMALLOC_PROF
 const char *malloc_conf =
diff --git a/test/unit/prof_active.c b/test/unit/prof_active.c
index 9bcb3e3b..c0e085a8 100644
--- a/test/unit/prof_active.c
+++ b/test/unit/prof_active.c
@@ -37,7 +37,7 @@ mallctl_prof_active_get_impl(bool prof_active_old_expected, const char *func,
     int line) {
 	mallctl_bool_get("prof.active", prof_active_old_expected, func, line);
 }
-#define	mallctl_prof_active_get(a)					\
+#define mallctl_prof_active_get(a)					\
 	mallctl_prof_active_get_impl(a, __func__, __LINE__)
 
 static void
@@ -46,7 +46,7 @@ mallctl_prof_active_set_impl(bool prof_active_old_expected,
 	mallctl_bool_set("prof.active", prof_active_old_expected,
 	    prof_active_new, func, line);
 }
-#define	mallctl_prof_active_set(a, b)					\
+#define mallctl_prof_active_set(a, b)					\
 	mallctl_prof_active_set_impl(a, b, __func__, __LINE__)
 
 static void
@@ -55,7 +55,7 @@ mallctl_thread_prof_active_get_impl(bool thread_prof_active_old_expected,
 	mallctl_bool_get("thread.prof.active", thread_prof_active_old_expected,
 	    func, line);
 }
-#define	mallctl_thread_prof_active_get(a)				\
+#define mallctl_thread_prof_active_get(a)				\
 	mallctl_thread_prof_active_get_impl(a, __func__, __LINE__)
 
 static void
@@ -64,7 +64,7 @@ mallctl_thread_prof_active_set_impl(bool thread_prof_active_old_expected,
 	mallctl_bool_set("thread.prof.active", thread_prof_active_old_expected,
 	    thread_prof_active_new, func, line);
 }
-#define	mallctl_thread_prof_active_set(a, b)				\
+#define mallctl_thread_prof_active_set(a, b)				\
 	mallctl_thread_prof_active_set_impl(a, b, __func__, __LINE__)
 
 static void
@@ -80,7 +80,7 @@ prof_sampling_probe_impl(bool expect_sample, const char *func, int line) {
 	    "%s():%d: Unexpected backtrace count", func, line);
 	dallocx(p, 0);
 }
-#define	prof_sampling_probe(a)						\
+#define prof_sampling_probe(a)						\
 	prof_sampling_probe_impl(a, __func__, __LINE__)
 
 TEST_BEGIN(test_prof_active) {
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
index c2bb50d6..fc954f9f 100644
--- a/test/unit/prof_reset.c
+++ b/test/unit/prof_reset.c
@@ -135,11 +135,11 @@ TEST_BEGIN(test_prof_reset_cleanup) {
 }
 TEST_END
 
-#define	NTHREADS		4
-#define	NALLOCS_PER_THREAD	(1U << 13)
-#define	OBJ_RING_BUF_COUNT	1531
-#define	RESET_INTERVAL		(1U << 10)
-#define	DUMP_INTERVAL		3677
+#define NTHREADS		4
+#define NALLOCS_PER_THREAD	(1U << 13)
+#define OBJ_RING_BUF_COUNT	1531
+#define RESET_INTERVAL		(1U << 10)
+#define DUMP_INTERVAL		3677
 static void *
 thd_start(void *varg) {
 	unsigned thd_ind = *(unsigned *)varg;
@@ -228,7 +228,7 @@ TEST_END
 #undef DUMP_INTERVAL
 
 /* Test sampling at the same allocation site across resets. */
-#define	NITER 10
+#define NITER 10
 TEST_BEGIN(test_xallocx) {
 	size_t lg_prof_sample_orig;
 	unsigned i;
diff --git a/test/unit/prof_thread_name.c b/test/unit/prof_thread_name.c
index bcf85f89..a094a1c0 100644
--- a/test/unit/prof_thread_name.c
+++ b/test/unit/prof_thread_name.c
@@ -18,7 +18,7 @@ mallctl_thread_name_get_impl(const char *thread_name_expected, const char *func,
 	assert_str_eq(thread_name_old, thread_name_expected,
 	    "%s():%d: Unexpected thread.prof.name value", func, line);
 }
-#define	mallctl_thread_name_get(a)					\
+#define mallctl_thread_name_get(a)					\
 	mallctl_thread_name_get_impl(a, __func__, __LINE__)
 
 static void
@@ -30,7 +30,7 @@ mallctl_thread_name_set_impl(const char *thread_name, const char *func,
 	    func, line);
 	mallctl_thread_name_get_impl(thread_name, func, line);
 }
-#define	mallctl_thread_name_set(a)					\
+#define mallctl_thread_name_set(a)					\
 	mallctl_thread_name_set_impl(a, __func__, __LINE__)
 
 TEST_BEGIN(test_prof_thread_name_validation) {
@@ -72,8 +72,8 @@ TEST_BEGIN(test_prof_thread_name_validation) {
 }
 TEST_END
 
-#define	NTHREADS	4
-#define	NRESET		25
+#define NTHREADS	4
+#define NRESET		25
 static void *
 thd_start(void *varg) {
 	unsigned thd_ind = *(unsigned *)varg;
diff --git a/test/unit/ql.c b/test/unit/ql.c
index 231a7243..ae6481fd 100644
--- a/test/unit/ql.c
+++ b/test/unit/ql.c
@@ -1,7 +1,7 @@
 #include "test/jemalloc_test.h"
 
 /* Number of ring entries, in [2..26]. */
-#define	NENTRIES 9
+#define NENTRIES 9
 
 typedef struct list_s list_t;
 typedef ql_head(list_t) list_head_t;
diff --git a/test/unit/qr.c b/test/unit/qr.c
index 9a72d308..80c5c27d 100644
--- a/test/unit/qr.c
+++ b/test/unit/qr.c
@@ -1,9 +1,9 @@
 #include "test/jemalloc_test.h"
 
 /* Number of ring entries, in [2..26]. */
-#define	NENTRIES 9
+#define NENTRIES 9
 /* Split index, in [1..NENTRIES). */
-#define	SPLIT_INDEX 5
+#define SPLIT_INDEX 5
 
 typedef struct ring_s ring_t;
 
diff --git a/test/unit/rb.c b/test/unit/rb.c
index dab2c3a2..0bcc3c31 100644
--- a/test/unit/rb.c
+++ b/test/unit/rb.c
@@ -1,6 +1,6 @@
 #include "test/jemalloc_test.h"
 
-#define	rbtn_black_height(a_type, a_field, a_rbt, r_height) do {	\
+#define rbtn_black_height(a_type, a_field, a_rbt, r_height) do {	\
 	a_type *rbp_bh_t;						\
 	for (rbp_bh_t = (a_rbt)->rbt_root, (r_height) = 0; rbp_bh_t !=	\
 	    NULL; rbp_bh_t = rbtn_left_get(a_type, a_field,		\
@@ -14,7 +14,7 @@
 typedef struct node_s node_t;
 
 struct node_s {
-#define	NODE_MAGIC 0x9823af7e
+#define NODE_MAGIC 0x9823af7e
 	uint32_t magic;
 	rb_node(node_t) link;
 	uint64_t key;
@@ -223,9 +223,9 @@ destroy_cb(node_t *node, void *data) {
 }
 
 TEST_BEGIN(test_rb_random) {
-#define	NNODES 25
-#define	NBAGS 250
-#define	SEED 42
+#define NNODES 25
+#define NBAGS 250
+#define SEED 42
 	sfmt_t *sfmt;
 	uint64_t bag[NNODES];
 	tree_t tree;
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 344ac16a..d40e6490 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -51,10 +51,10 @@ TEST_BEGIN(test_rtree_read_empty) {
 }
 TEST_END
 
-#define	NTHREADS	8
-#define	MAX_NBITS	18
-#define	NITERS		1000
-#define	SEED		42
+#define NTHREADS	8
+#define MAX_NBITS	18
+#define NITERS		1000
+#define SEED		42
 
 typedef struct {
 	unsigned	nbits;
@@ -218,8 +218,8 @@ TEST_BEGIN(test_rtree_random) {
 	unsigned i;
 	sfmt_t *sfmt;
 	tsdn_t *tsdn;
-#define	NSET 16
-#define	SEED 42
+#define NSET 16
+#define SEED 42
 
 	sfmt = init_gen_rand(SEED);
 	tsdn = tsdn_fetch();
diff --git a/test/unit/smoothstep.c b/test/unit/smoothstep.c
index bf5dfb1d..6e3eb0f9 100644
--- a/test/unit/smoothstep.c
+++ b/test/unit/smoothstep.c
@@ -1,7 +1,7 @@
 #include "test/jemalloc_test.h"
 
 static const uint64_t smoothstep_tab[] = {
-#define	STEP(step, h, x, y) \
+#define STEP(step, h, x, y) \
 	h,
 	SMOOTHSTEP
 #undef STEP
diff --git a/test/unit/ticker.c b/test/unit/ticker.c
index 32236f2c..c2ad7295 100644
--- a/test/unit/ticker.c
+++ b/test/unit/ticker.c
@@ -1,8 +1,8 @@
 #include "test/jemalloc_test.h"
 
 TEST_BEGIN(test_ticker_tick) {
-#define	NREPS 2
-#define	NTICKS 3
+#define NREPS 2
+#define NTICKS 3
 	ticker_t ticker;
 	int32_t i, j;
 
@@ -26,7 +26,7 @@ TEST_BEGIN(test_ticker_tick) {
 TEST_END
 
 TEST_BEGIN(test_ticker_ticks) {
-#define	NTICKS 3
+#define NTICKS 3
 	ticker_t ticker;
 
 	ticker_init(&ticker, NTICKS);
@@ -44,7 +44,7 @@ TEST_BEGIN(test_ticker_ticks) {
 TEST_END
 
 TEST_BEGIN(test_ticker_copy) {
-#define	NTICKS 3
+#define NTICKS 3
 	ticker_t ta, tb;
 
 	ticker_init(&ta, NTICKS);
diff --git a/test/unit/tsd.c b/test/unit/tsd.c
index f34f0e78..ae47d23e 100644
--- a/test/unit/tsd.c
+++ b/test/unit/tsd.c
@@ -1,6 +1,6 @@
 #include "test/jemalloc_test.h"
 
-#define	THREAD_DATA 0x72b65c10
+#define THREAD_DATA 0x72b65c10
 
 typedef unsigned int data_t;
 
@@ -47,7 +47,7 @@ data_cleanup(void *arg) {
 }
 
 malloc_tsd_externs(data_, data_t)
-#define	DATA_INIT 0x12345678
+#define DATA_INIT 0x12345678
 malloc_tsd_data(, data_, data_t, DATA_INIT)
 malloc_tsd_funcs(, data_, data_t, DATA_INIT, data_cleanup)
 
diff --git a/test/unit/util.c b/test/unit/util.c
index 81421e80..5760966f 100644
--- a/test/unit/util.c
+++ b/test/unit/util.c
@@ -1,6 +1,6 @@
 #include "test/jemalloc_test.h"
 
-#define	TEST_POW2_CEIL(t, suf, pri) do {				\
+#define TEST_POW2_CEIL(t, suf, pri) do {				\
 	unsigned i, pow2;						\
 	t x;								\
 									\
@@ -65,9 +65,9 @@ TEST_BEGIN(test_malloc_strtoumax) {
 		const char *expected_errno_name;
 		uintmax_t expected_x;
 	};
-#define	ERR(e)		e, #e
-#define	KUMAX(x)	((uintmax_t)x##ULL)
-#define	KSMAX(x)	((uintmax_t)(intmax_t)x##LL)
+#define ERR(e)		e, #e
+#define KUMAX(x)	((uintmax_t)x##ULL)
+#define KSMAX(x)	((uintmax_t)(intmax_t)x##LL)
 	struct test_s tests[] = {
 		{"0",		"0",	-1,	ERR(EINVAL),	UINTMAX_MAX},
 		{"0",		"0",	1,	ERR(EINVAL),	UINTMAX_MAX},
@@ -151,11 +151,11 @@ TEST_BEGIN(test_malloc_strtoumax) {
 TEST_END
 
 TEST_BEGIN(test_malloc_snprintf_truncated) {
-#define	BUFLEN	15
+#define BUFLEN	15
 	char buf[BUFLEN];
 	size_t result;
 	size_t len;
-#define	TEST(expected_str_untruncated, ...) do {			\
+#define TEST(expected_str_untruncated, ...) do {			\
 	result = malloc_snprintf(buf, len, __VA_ARGS__);		\
 	assert_d_eq(strncmp(buf, expected_str_untruncated, len-1), 0,	\
 	    "Unexpected string inequality (\"%s\" vs \"%s\")",		\
@@ -183,10 +183,10 @@ TEST_BEGIN(test_malloc_snprintf_truncated) {
 TEST_END
 
 TEST_BEGIN(test_malloc_snprintf) {
-#define	BUFLEN	128
+#define BUFLEN	128
 	char buf[BUFLEN];
 	size_t result;
-#define	TEST(expected_str, ...) do {					\
+#define TEST(expected_str, ...) do {					\
 	result = malloc_snprintf(buf, sizeof(buf), __VA_ARGS__);	\
 	assert_str_eq(buf, expected_str, "Unexpected output");		\
 	assert_zu_eq(result, strlen(expected_str), "Unexpected result");\
diff --git a/test/unit/zero.c b/test/unit/zero.c
index 88af9452..d5b03f8d 100644
--- a/test/unit/zero.c
+++ b/test/unit/zero.c
@@ -9,7 +9,7 @@ static void
 test_zero(size_t sz_min, size_t sz_max) {
 	uint8_t *s;
 	size_t sz_prev, sz, i;
-#define	MAGIC	((uint8_t)0x61)
+#define MAGIC	((uint8_t)0x61)
 
 	sz_prev = 0;
 	s = (uint8_t *)mallocx(sz_min, 0);

From dad74bd3c811ca2b1af1fd57b28f2456da5ba08b Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 21 Jan 2017 15:12:03 -0800
Subject: [PATCH 0605/2608] Convert witness_assert_lockless() to
 witness_assert_lock_depth().

This makes it possible to make lock state assertions about precisely
which locks are held.
---
 include/jemalloc/internal/private_symbols.txt |  4 +-
 include/jemalloc/internal/witness.h           | 23 ++++--
 src/jemalloc.c                                | 74 +++++++++----------
 src/witness.c                                 | 19 ++---
 test/unit/witness.c                           | 63 +++++++++-------
 5 files changed, 102 insertions(+), 81 deletions(-)

diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index c1c6c409..4dfe442c 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -612,14 +612,14 @@ valgrind_freelike_block
 valgrind_make_mem_defined
 valgrind_make_mem_noaccess
 valgrind_make_mem_undefined
-witness_assert_lockless
+witness_assert_lock_depth
 witness_assert_not_owner
 witness_assert_owner
 witness_fork_cleanup
 witness_init
 witness_lock
 witness_lock_error
-witness_lockless_error
+witness_lock_depth_error
 witness_not_owner_error
 witness_owner
 witness_owner_error
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index cdf15d79..dfd827f7 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -91,10 +91,12 @@ extern witness_not_owner_error_t *witness_not_owner_error;
 void	witness_not_owner_error(const witness_t *witness);
 #endif
 #ifdef JEMALLOC_JET
-typedef void (witness_lockless_error_t)(const witness_list_t *);
-extern witness_lockless_error_t *witness_lockless_error;
+typedef void (witness_lock_depth_error_t)(const witness_list_t *,
+    unsigned depth);
+extern witness_lock_depth_error_t *witness_lock_depth_error;
 #else
-void	witness_lockless_error(const witness_list_t *witnesses);
+void	witness_lock_depth_error(const witness_list_t *witnesses,
+    unsigned depth);
 #endif
 
 void	witnesses_cleanup(tsd_t *tsd);
@@ -111,7 +113,7 @@ void	witness_postfork_child(tsd_t *tsd);
 bool	witness_owner(tsd_t *tsd, const witness_t *witness);
 void	witness_assert_owner(tsdn_t *tsdn, const witness_t *witness);
 void	witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness);
-void	witness_assert_lockless(tsdn_t *tsdn);
+void	witness_assert_lock_depth(tsdn_t *tsdn, unsigned depth);
 void	witness_lock(tsdn_t *tsdn, witness_t *witness);
 void	witness_unlock(tsdn_t *tsdn, witness_t *witness);
 #endif
@@ -175,9 +177,10 @@ witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness)
 }
 
 JEMALLOC_INLINE void
-witness_assert_lockless(tsdn_t *tsdn)
+witness_assert_lock_depth(tsdn_t *tsdn, unsigned depth)
 {
 	tsd_t *tsd;
+	unsigned d;
 	witness_list_t *witnesses;
 	witness_t *w;
 
@@ -188,10 +191,16 @@ witness_assert_lockless(tsdn_t *tsdn)
 		return;
 	tsd = tsdn_tsd(tsdn);
 
+	d = 0;
 	witnesses = tsd_witnessesp_get(tsd);
 	w = ql_last(witnesses, link);
-	if (w != NULL)
-		witness_lockless_error(witnesses);
+	if (w != NULL) {
+		ql_foreach(w, witnesses, link) {
+			d++;
+		}
+	}
+	if (d != depth)
+		witness_lock_depth_error(witnesses, depth);
 }
 
 JEMALLOC_INLINE void
diff --git a/src/jemalloc.c b/src/jemalloc.c
index baead664..c08f7e2f 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1581,7 +1581,7 @@ ialloc_body(size_t size, bool zero, tsdn_t **tsdn, size_t *usize,
 
 	tsd = tsd_fetch();
 	*tsdn = tsd_tsdn(tsd);
-	witness_assert_lockless(tsd_tsdn(tsd));
+	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
 
 	ind = size2index(size);
 	if (unlikely(ind >= NSIZES))
@@ -1619,7 +1619,7 @@ ialloc_post_check(void *ret, tsdn_t *tsdn, size_t usize, const char *func,
 		assert(usize == isalloc(tsdn, ret, config_prof));
 		*tsd_thread_allocatedp_get(tsdn_tsd(tsdn)) += usize;
 	}
-	witness_assert_lockless(tsdn);
+	witness_assert_lock_depth(tsdn, 0);
 }
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
@@ -1704,7 +1704,7 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment)
 		goto label_oom;
 	}
 	tsd = tsd_fetch();
-	witness_assert_lockless(tsd_tsdn(tsd));
+	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
 	if (size == 0)
 		size = 1;
 
@@ -1745,7 +1745,7 @@ label_return:
 	UTRACE(0, size, result);
 	JEMALLOC_VALGRIND_MALLOC(result != NULL, tsd_tsdn(tsd), result, usize,
 	    false);
-	witness_assert_lockless(tsd_tsdn(tsd));
+	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
 	return (ret);
 label_oom:
 	assert(result == NULL);
@@ -1755,7 +1755,7 @@ label_oom:
 		abort();
 	}
 	ret = ENOMEM;
-	witness_assert_lockless(tsd_tsdn(tsd));
+	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
 	goto label_return;
 }
 
@@ -1873,7 +1873,7 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 	size_t usize;
 	UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);
 
-	witness_assert_lockless(tsd_tsdn(tsd));
+	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
 
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
@@ -1901,7 +1901,7 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path)
 {
 	UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);
 
-	witness_assert_lockless(tsd_tsdn(tsd));
+	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
 
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
@@ -1947,7 +1947,7 @@ je_realloc(void *ptr, size_t size)
 		malloc_thread_init();
 		tsd = tsd_fetch();
 
-		witness_assert_lockless(tsd_tsdn(tsd));
+		witness_assert_lock_depth(tsd_tsdn(tsd), 0);
 
 		old_usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
 		if (config_valgrind && unlikely(in_valgrind)) {
@@ -1994,7 +1994,7 @@ je_realloc(void *ptr, size_t size)
 	UTRACE(ptr, size, ret);
 	JEMALLOC_VALGRIND_REALLOC(maybe, tsdn, ret, usize, maybe, ptr,
 	    old_usize, old_rzsize, maybe, false);
-	witness_assert_lockless(tsdn);
+	witness_assert_lock_depth(tsdn, 0);
 	return (ret);
 }
 
@@ -2005,12 +2005,12 @@ je_free(void *ptr)
 	UTRACE(ptr, 0, 0);
 	if (likely(ptr != NULL)) {
 		tsd_t *tsd = tsd_fetch();
-		witness_assert_lockless(tsd_tsdn(tsd));
+		witness_assert_lock_depth(tsd_tsdn(tsd), 0);
 		if (likely(!malloc_slow))
 			ifree(tsd, ptr, tcache_get(tsd, false), false);
 		else
 			ifree(tsd, ptr, tcache_get(tsd, false), true);
-		witness_assert_lockless(tsd_tsdn(tsd));
+		witness_assert_lock_depth(tsd_tsdn(tsd), 0);
 	}
 }
 
@@ -2239,7 +2239,7 @@ imallocx_body(size_t size, int flags, tsdn_t **tsdn, size_t *usize,
 
 	tsd = tsd_fetch();
 	*tsdn = tsd_tsdn(tsd);
-	witness_assert_lockless(tsd_tsdn(tsd));
+	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
 
 	if (likely(flags == 0)) {
 		szind_t ind = size2index(size);
@@ -2374,7 +2374,7 @@ je_rallocx(void *ptr, size_t size, int flags)
 	assert(malloc_initialized() || IS_INITIALIZER);
 	malloc_thread_init();
 	tsd = tsd_fetch();
-	witness_assert_lockless(tsd_tsdn(tsd));
+	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
 
 	if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) {
 		unsigned arena_ind = MALLOCX_ARENA_GET(flags);
@@ -2421,7 +2421,7 @@ je_rallocx(void *ptr, size_t size, int flags)
 	UTRACE(ptr, size, p);
 	JEMALLOC_VALGRIND_REALLOC(maybe, tsd_tsdn(tsd), p, usize, no, ptr,
 	    old_usize, old_rzsize, no, zero);
-	witness_assert_lockless(tsd_tsdn(tsd));
+	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
 	return (p);
 label_oom:
 	if (config_xmalloc && unlikely(opt_xmalloc)) {
@@ -2429,7 +2429,7 @@ label_oom:
 		abort();
 	}
 	UTRACE(ptr, size, 0);
-	witness_assert_lockless(tsd_tsdn(tsd));
+	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
 	return (NULL);
 }
 
@@ -2525,7 +2525,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
 	assert(malloc_initialized() || IS_INITIALIZER);
 	malloc_thread_init();
 	tsd = tsd_fetch();
-	witness_assert_lockless(tsd_tsdn(tsd));
+	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
 
 	old_usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
 
@@ -2566,7 +2566,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
 	    old_usize, old_rzsize, no, zero);
 label_not_resized:
 	UTRACE(ptr, size, ptr);
-	witness_assert_lockless(tsd_tsdn(tsd));
+	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
 	return (usize);
 }
 
@@ -2581,14 +2581,14 @@ je_sallocx(const void *ptr, int flags)
 	malloc_thread_init();
 
 	tsdn = tsdn_fetch();
-	witness_assert_lockless(tsdn);
+	witness_assert_lock_depth(tsdn, 0);
 
 	if (config_ivsalloc)
 		usize = ivsalloc(tsdn, ptr, config_prof);
 	else
 		usize = isalloc(tsdn, ptr, config_prof);
 
-	witness_assert_lockless(tsdn);
+	witness_assert_lock_depth(tsdn, 0);
 	return (usize);
 }
 
@@ -2602,7 +2602,7 @@ je_dallocx(void *ptr, int flags)
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	tsd = tsd_fetch();
-	witness_assert_lockless(tsd_tsdn(tsd));
+	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
 		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE)
 			tcache = NULL;
@@ -2616,7 +2616,7 @@ je_dallocx(void *ptr, int flags)
 		ifree(tsd, ptr, tcache, false);
 	else
 		ifree(tsd, ptr, tcache, true);
-	witness_assert_lockless(tsd_tsdn(tsd));
+	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
 }
 
 JEMALLOC_ALWAYS_INLINE_C size_t
@@ -2624,13 +2624,13 @@ inallocx(tsdn_t *tsdn, size_t size, int flags)
 {
 	size_t usize;
 
-	witness_assert_lockless(tsdn);
+	witness_assert_lock_depth(tsdn, 0);
 
 	if (likely((flags & MALLOCX_LG_ALIGN_MASK) == 0))
 		usize = s2u(size);
 	else
 		usize = sa2u(size, MALLOCX_ALIGN_GET_SPECIFIED(flags));
-	witness_assert_lockless(tsdn);
+	witness_assert_lock_depth(tsdn, 0);
 	return (usize);
 }
 
@@ -2647,7 +2647,7 @@ je_sdallocx(void *ptr, size_t size, int flags)
 	usize = inallocx(tsd_tsdn(tsd), size, flags);
 	assert(usize == isalloc(tsd_tsdn(tsd), ptr, config_prof));
 
-	witness_assert_lockless(tsd_tsdn(tsd));
+	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
 		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE)
 			tcache = NULL;
@@ -2661,7 +2661,7 @@ je_sdallocx(void *ptr, size_t size, int flags)
 		isfree(tsd, ptr, usize, tcache, false);
 	else
 		isfree(tsd, ptr, usize, tcache, true);
-	witness_assert_lockless(tsd_tsdn(tsd));
+	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
@@ -2677,13 +2677,13 @@ je_nallocx(size_t size, int flags)
 		return (0);
 
 	tsdn = tsdn_fetch();
-	witness_assert_lockless(tsdn);
+	witness_assert_lock_depth(tsdn, 0);
 
 	usize = inallocx(tsdn, size, flags);
 	if (unlikely(usize > HUGE_MAXCLASS))
 		return (0);
 
-	witness_assert_lockless(tsdn);
+	witness_assert_lock_depth(tsdn, 0);
 	return (usize);
 }
 
@@ -2698,9 +2698,9 @@ je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
 		return (EAGAIN);
 
 	tsd = tsd_fetch();
-	witness_assert_lockless(tsd_tsdn(tsd));
+	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
 	ret = ctl_byname(tsd, name, oldp, oldlenp, newp, newlen);
-	witness_assert_lockless(tsd_tsdn(tsd));
+	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
 	return (ret);
 }
 
@@ -2714,9 +2714,9 @@ je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp)
 		return (EAGAIN);
 
 	tsdn = tsdn_fetch();
-	witness_assert_lockless(tsdn);
+	witness_assert_lock_depth(tsdn, 0);
 	ret = ctl_nametomib(tsdn, name, mibp, miblenp);
-	witness_assert_lockless(tsdn);
+	witness_assert_lock_depth(tsdn, 0);
 	return (ret);
 }
 
@@ -2731,9 +2731,9 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 		return (EAGAIN);
 
 	tsd = tsd_fetch();
-	witness_assert_lockless(tsd_tsdn(tsd));
+	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
 	ret = ctl_bymib(tsd, mib, miblen, oldp, oldlenp, newp, newlen);
-	witness_assert_lockless(tsd_tsdn(tsd));
+	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
 	return (ret);
 }
 
@@ -2744,9 +2744,9 @@ je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	tsdn_t *tsdn;
 
 	tsdn = tsdn_fetch();
-	witness_assert_lockless(tsdn);
+	witness_assert_lock_depth(tsdn, 0);
 	stats_print(write_cb, cbopaque, opts);
-	witness_assert_lockless(tsdn);
+	witness_assert_lock_depth(tsdn, 0);
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
@@ -2759,14 +2759,14 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr)
 	malloc_thread_init();
 
 	tsdn = tsdn_fetch();
-	witness_assert_lockless(tsdn);
+	witness_assert_lock_depth(tsdn, 0);
 
 	if (config_ivsalloc)
 		ret = ivsalloc(tsdn, ptr, config_prof);
 	else
 		ret = (ptr == NULL) ? 0 : isalloc(tsdn, ptr, config_prof);
 
-	witness_assert_lockless(tsdn);
+	witness_assert_lock_depth(tsdn, 0);
 	return (ret);
 }
 
diff --git a/src/witness.c b/src/witness.c
index 23753f24..aaea88d4 100644
--- a/src/witness.c
+++ b/src/witness.c
@@ -71,15 +71,16 @@ witness_not_owner_error_t *witness_not_owner_error =
 #endif
 
 #ifdef JEMALLOC_JET
-#undef witness_lockless_error
-#define	witness_lockless_error JEMALLOC_N(n_witness_lockless_error)
+#undef witness_lock_depth_error
+#define	witness_lock_depth_error JEMALLOC_N(n_witness_lock_depth_error)
 #endif
 void
-witness_lockless_error(const witness_list_t *witnesses)
+witness_lock_depth_error(const witness_list_t *witnesses, unsigned depth)
 {
 	witness_t *w;
 
-	malloc_printf("<jemalloc>: Should not own any locks:");
+	malloc_printf("<jemalloc>: Should own %u lock%s:", depth, (depth != 1) ?
+	    "s" : "");
 	ql_foreach(w, witnesses, link) {
 		malloc_printf(" %s(%u)", w->name, w->rank);
 	}
@@ -87,17 +88,17 @@ witness_lockless_error(const witness_list_t *witnesses)
 	abort();
 }
 #ifdef JEMALLOC_JET
-#undef witness_lockless_error
-#define	witness_lockless_error JEMALLOC_N(witness_lockless_error)
-witness_lockless_error_t *witness_lockless_error =
-    JEMALLOC_N(n_witness_lockless_error);
+#undef witness_lock_depth_error
+#define	witness_lock_depth_error JEMALLOC_N(witness_lock_depth_error)
+witness_lock_depth_error_t *witness_lock_depth_error =
+    JEMALLOC_N(n_witness_lock_depth_error);
 #endif
 
 void
 witnesses_cleanup(tsd_t *tsd)
 {
 
-	witness_assert_lockless(tsd_tsdn(tsd));
+	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
 
 	/* Do nothing. */
 }
diff --git a/test/unit/witness.c b/test/unit/witness.c
index ed172753..9d4a1718 100644
--- a/test/unit/witness.c
+++ b/test/unit/witness.c
@@ -3,12 +3,12 @@
 static witness_lock_error_t *witness_lock_error_orig;
 static witness_owner_error_t *witness_owner_error_orig;
 static witness_not_owner_error_t *witness_not_owner_error_orig;
-static witness_lockless_error_t *witness_lockless_error_orig;
+static witness_lock_depth_error_t *witness_lock_depth_error_orig;
 
 static bool saw_lock_error;
 static bool saw_owner_error;
 static bool saw_not_owner_error;
-static bool saw_lockless_error;
+static bool saw_lock_depth_error;
 
 static void
 witness_lock_error_intercept(const witness_list_t *witnesses,
@@ -33,10 +33,11 @@ witness_not_owner_error_intercept(const witness_t *witness)
 }
 
 static void
-witness_lockless_error_intercept(const witness_list_t *witnesses)
+witness_lock_depth_error_intercept(const witness_list_t *witnesses,
+    unsigned depth)
 {
 
-	saw_lockless_error = true;
+	saw_lock_depth_error = true;
 }
 
 static int
@@ -66,22 +67,25 @@ TEST_BEGIN(test_witness)
 
 	tsdn = tsdn_fetch();
 
-	witness_assert_lockless(tsdn);
+	witness_assert_lock_depth(tsdn, 0);
 
 	witness_init(&a, "a", 1, NULL);
 	witness_assert_not_owner(tsdn, &a);
 	witness_lock(tsdn, &a);
 	witness_assert_owner(tsdn, &a);
+	witness_assert_lock_depth(tsdn, 1);
 
 	witness_init(&b, "b", 2, NULL);
 	witness_assert_not_owner(tsdn, &b);
 	witness_lock(tsdn, &b);
 	witness_assert_owner(tsdn, &b);
+	witness_assert_lock_depth(tsdn, 2);
 
 	witness_unlock(tsdn, &a);
+	witness_assert_lock_depth(tsdn, 1);
 	witness_unlock(tsdn, &b);
 
-	witness_assert_lockless(tsdn);
+	witness_assert_lock_depth(tsdn, 0);
 }
 TEST_END
 
@@ -94,18 +98,21 @@ TEST_BEGIN(test_witness_comp)
 
 	tsdn = tsdn_fetch();
 
-	witness_assert_lockless(tsdn);
+	witness_assert_lock_depth(tsdn, 0);
 
 	witness_init(&a, "a", 1, witness_comp);
 	witness_assert_not_owner(tsdn, &a);
 	witness_lock(tsdn, &a);
 	witness_assert_owner(tsdn, &a);
+	witness_assert_lock_depth(tsdn, 1);
 
 	witness_init(&b, "b", 1, witness_comp);
 	witness_assert_not_owner(tsdn, &b);
 	witness_lock(tsdn, &b);
 	witness_assert_owner(tsdn, &b);
+	witness_assert_lock_depth(tsdn, 2);
 	witness_unlock(tsdn, &b);
+	witness_assert_lock_depth(tsdn, 1);
 
 	witness_lock_error_orig = witness_lock_error;
 	witness_lock_error = witness_lock_error_intercept;
@@ -117,6 +124,7 @@ TEST_BEGIN(test_witness_comp)
 	witness_lock(tsdn, &c);
 	assert_true(saw_lock_error, "Expected witness lock error");
 	witness_unlock(tsdn, &c);
+	witness_assert_lock_depth(tsdn, 1);
 
 	saw_lock_error = false;
 
@@ -126,10 +134,11 @@ TEST_BEGIN(test_witness_comp)
 	witness_lock(tsdn, &d);
 	assert_true(saw_lock_error, "Expected witness lock error");
 	witness_unlock(tsdn, &d);
+	witness_assert_lock_depth(tsdn, 1);
 
 	witness_unlock(tsdn, &a);
 
-	witness_assert_lockless(tsdn);
+	witness_assert_lock_depth(tsdn, 0);
 
 	witness_lock_error = witness_lock_error_orig;
 }
@@ -148,20 +157,22 @@ TEST_BEGIN(test_witness_reversal)
 
 	tsdn = tsdn_fetch();
 
-	witness_assert_lockless(tsdn);
+	witness_assert_lock_depth(tsdn, 0);
 
 	witness_init(&a, "a", 1, NULL);
 	witness_init(&b, "b", 2, NULL);
 
 	witness_lock(tsdn, &b);
+	witness_assert_lock_depth(tsdn, 1);
 	assert_false(saw_lock_error, "Unexpected witness lock error");
 	witness_lock(tsdn, &a);
 	assert_true(saw_lock_error, "Expected witness lock error");
 
 	witness_unlock(tsdn, &a);
+	witness_assert_lock_depth(tsdn, 1);
 	witness_unlock(tsdn, &b);
 
-	witness_assert_lockless(tsdn);
+	witness_assert_lock_depth(tsdn, 0);
 
 	witness_lock_error = witness_lock_error_orig;
 }
@@ -184,7 +195,7 @@ TEST_BEGIN(test_witness_recursive)
 
 	tsdn = tsdn_fetch();
 
-	witness_assert_lockless(tsdn);
+	witness_assert_lock_depth(tsdn, 0);
 
 	witness_init(&a, "a", 1, NULL);
 
@@ -197,7 +208,7 @@ TEST_BEGIN(test_witness_recursive)
 
 	witness_unlock(tsdn, &a);
 
-	witness_assert_lockless(tsdn);
+	witness_assert_lock_depth(tsdn, 0);
 
 	witness_owner_error = witness_owner_error_orig;
 	witness_lock_error = witness_lock_error_orig;
@@ -218,7 +229,7 @@ TEST_BEGIN(test_witness_unlock_not_owned)
 
 	tsdn = tsdn_fetch();
 
-	witness_assert_lockless(tsdn);
+	witness_assert_lock_depth(tsdn, 0);
 
 	witness_init(&a, "a", 1, NULL);
 
@@ -226,41 +237,41 @@ TEST_BEGIN(test_witness_unlock_not_owned)
 	witness_unlock(tsdn, &a);
 	assert_true(saw_owner_error, "Expected owner error");
 
-	witness_assert_lockless(tsdn);
+	witness_assert_lock_depth(tsdn, 0);
 
 	witness_owner_error = witness_owner_error_orig;
 }
 TEST_END
 
-TEST_BEGIN(test_witness_lockful)
+TEST_BEGIN(test_witness_lock_depth)
 {
 	witness_t a;
 	tsdn_t *tsdn;
 
 	test_skip_if(!config_debug);
 
-	witness_lockless_error_orig = witness_lockless_error;
-	witness_lockless_error = witness_lockless_error_intercept;
-	saw_lockless_error = false;
+	witness_lock_depth_error_orig = witness_lock_depth_error;
+	witness_lock_depth_error = witness_lock_depth_error_intercept;
+	saw_lock_depth_error = false;
 
 	tsdn = tsdn_fetch();
 
-	witness_assert_lockless(tsdn);
+	witness_assert_lock_depth(tsdn, 0);
 
 	witness_init(&a, "a", 1, NULL);
 
-	assert_false(saw_lockless_error, "Unexpected lockless error");
-	witness_assert_lockless(tsdn);
+	assert_false(saw_lock_depth_error, "Unexpected lock_depth error");
+	witness_assert_lock_depth(tsdn, 0);
 
 	witness_lock(tsdn, &a);
-	witness_assert_lockless(tsdn);
-	assert_true(saw_lockless_error, "Expected lockless error");
+	witness_assert_lock_depth(tsdn, 0);
+	assert_true(saw_lock_depth_error, "Expected lock_depth error");
 
 	witness_unlock(tsdn, &a);
 
-	witness_assert_lockless(tsdn);
+	witness_assert_lock_depth(tsdn, 0);
 
-	witness_lockless_error = witness_lockless_error_orig;
+	witness_lock_depth_error = witness_lock_depth_error_orig;
 }
 TEST_END
 
@@ -274,5 +285,5 @@ main(void)
 	    test_witness_reversal,
 	    test_witness_recursive,
 	    test_witness_unlock_not_owned,
-	    test_witness_lockful));
+	    test_witness_lock_depth));
 }

From b49c649bc18fff4bd10a1c8adbaf1f25f6453cb6 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 20 Jan 2017 17:36:56 -0800
Subject: [PATCH 0606/2608] Fix lock order reversal during gdump.

---
 include/jemalloc/internal/chunk.h |  4 +--
 src/arena.c                       | 41 +++++++++++++++++++++----------
 src/chunk.c                       |  5 ++--
 src/huge.c                        | 41 ++++++++++++++++++++++---------
 4 files changed, 61 insertions(+), 30 deletions(-)

diff --git a/include/jemalloc/internal/chunk.h b/include/jemalloc/internal/chunk.h
index 50b9904b..55df9acc 100644
--- a/include/jemalloc/internal/chunk.h
+++ b/include/jemalloc/internal/chunk.h
@@ -52,8 +52,8 @@ chunk_hooks_t	chunk_hooks_get(tsdn_t *tsdn, arena_t *arena);
 chunk_hooks_t	chunk_hooks_set(tsdn_t *tsdn, arena_t *arena,
     const chunk_hooks_t *chunk_hooks);
 
-bool	chunk_register(tsdn_t *tsdn, const void *chunk,
-    const extent_node_t *node);
+bool	chunk_register(const void *chunk, const extent_node_t *node,
+    bool *gdump);
 void	chunk_deregister(const void *chunk, const extent_node_t *node);
 void	*chunk_alloc_base(size_t size);
 void	*chunk_alloc_cache(tsdn_t *tsdn, arena_t *arena,
diff --git a/src/arena.c b/src/arena.c
index 648a8da3..193a4a24 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -568,8 +568,8 @@ arena_chunk_init_spare(arena_t *arena)
 }
 
 static bool
-arena_chunk_register(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
-    size_t sn, bool zero)
+arena_chunk_register(arena_t *arena, arena_chunk_t *chunk, size_t sn, bool zero,
+    bool *gdump)
 {
 
 	/*
@@ -580,7 +580,7 @@ arena_chunk_register(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk,
 	 */
 	extent_node_init(&chunk->node, arena, chunk, chunksize, sn, zero, true);
 	extent_node_achunk_set(&chunk->node, true);
-	return (chunk_register(tsdn, chunk, &chunk->node));
+	return (chunk_register(chunk, &chunk->node, gdump));
 }
 
 static arena_chunk_t *
@@ -591,6 +591,7 @@ arena_chunk_alloc_internal_hard(tsdn_t *tsdn, arena_t *arena,
 	size_t sn;
 
 	malloc_mutex_unlock(tsdn, &arena->lock);
+	witness_assert_lock_depth(tsdn, 0); /* prof_gdump() requirement. */
 
 	chunk = (arena_chunk_t *)chunk_alloc_wrapper(tsdn, arena, chunk_hooks,
 	    NULL, chunksize, chunksize, &sn, zero, commit);
@@ -603,16 +604,20 @@ arena_chunk_alloc_internal_hard(tsdn_t *tsdn, arena_t *arena,
 			chunk = NULL;
 		}
 	}
-	if (chunk != NULL && arena_chunk_register(tsdn, arena, chunk, sn,
-	    *zero)) {
-		if (!*commit) {
-			/* Undo commit of header. */
-			chunk_hooks->decommit(chunk, chunksize, 0, map_bias <<
-			    LG_PAGE, arena->ind);
+	if (chunk != NULL) {
+		bool gdump;
+		if (arena_chunk_register(arena, chunk, sn, *zero, &gdump)) {
+			if (!*commit) {
+				/* Undo commit of header. */
+				chunk_hooks->decommit(chunk, chunksize, 0,
+				    map_bias << LG_PAGE, arena->ind);
+			}
+			chunk_dalloc_wrapper(tsdn, arena, chunk_hooks,
+			    (void *)chunk, chunksize, sn, *zero, *commit);
+			chunk = NULL;
 		}
-		chunk_dalloc_wrapper(tsdn, arena, chunk_hooks, (void *)chunk,
-		    chunksize, sn, *zero, *commit);
-		chunk = NULL;
+		if (config_prof && opt_prof && gdump)
+			prof_gdump(tsdn);
 	}
 
 	malloc_mutex_lock(tsdn, &arena->lock);
@@ -627,14 +632,24 @@ arena_chunk_alloc_internal(tsdn_t *tsdn, arena_t *arena, bool *zero,
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 	size_t sn;
 
+	/* prof_gdump() requirement. */
+	witness_assert_lock_depth(tsdn, 1);
+	malloc_mutex_assert_owner(tsdn, &arena->lock);
+
 	chunk = chunk_alloc_cache(tsdn, arena, &chunk_hooks, NULL, chunksize,
 	    chunksize, &sn, zero, commit, true);
 	if (chunk != NULL) {
-		if (arena_chunk_register(tsdn, arena, chunk, sn, *zero)) {
+		bool gdump;
+		if (arena_chunk_register(arena, chunk, sn, *zero, &gdump)) {
 			chunk_dalloc_cache(tsdn, arena, &chunk_hooks, chunk,
 			    chunksize, sn, true);
 			return (NULL);
 		}
+		if (config_prof && opt_prof && gdump) {
+			malloc_mutex_unlock(tsdn, &arena->lock);
+			prof_gdump(tsdn);
+			malloc_mutex_lock(tsdn, &arena->lock);
+		}
 	}
 	if (chunk == NULL) {
 		chunk = arena_chunk_alloc_internal_hard(tsdn, arena,
diff --git a/src/chunk.c b/src/chunk.c
index c1c514a8..de3bf4cf 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -141,7 +141,7 @@ chunk_hooks_assure_initialized(tsdn_t *tsdn, arena_t *arena,
 }
 
 bool
-chunk_register(tsdn_t *tsdn, const void *chunk, const extent_node_t *node)
+chunk_register(const void *chunk, const extent_node_t *node, bool *gdump)
 {
 
 	assert(extent_node_addr_get(node) == chunk);
@@ -160,8 +160,7 @@ chunk_register(tsdn_t *tsdn, const void *chunk, const extent_node_t *node)
 			 */
 			high = atomic_read_z(&highchunks);
 		}
-		if (cur > high && prof_gdump_get_unlocked())
-			prof_gdump(tsdn);
+		*gdump = (cur > high && prof_gdump_get_unlocked());
 	}
 
 	return (false);
diff --git a/src/huge.c b/src/huge.c
index 8abd8c00..9a91bed7 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -15,20 +15,20 @@ huge_node_get(const void *ptr)
 }
 
 static bool
-huge_node_set(tsdn_t *tsdn, const void *ptr, extent_node_t *node)
+huge_node_set(tsdn_t *tsdn, const void *ptr, extent_node_t *node, bool *gdump)
 {
 
 	assert(extent_node_addr_get(node) == ptr);
 	assert(!extent_node_achunk_get(node));
-	return (chunk_register(tsdn, ptr, node));
+	return (chunk_register(ptr, node, gdump));
 }
 
 static void
-huge_node_reset(tsdn_t *tsdn, const void *ptr, extent_node_t *node)
+huge_node_reset(tsdn_t *tsdn, const void *ptr, extent_node_t *node, bool *gdump)
 {
 	bool err;
 
-	err = huge_node_set(tsdn, ptr, node);
+	err = huge_node_set(tsdn, ptr, node, gdump);
 	assert(!err);
 }
 
@@ -57,11 +57,12 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	arena_t *iarena;
 	extent_node_t *node;
 	size_t sn;
-	bool is_zeroed;
+	bool is_zeroed, gdump;
 
 	/* Allocate one or more contiguous chunks for this request. */
 
 	assert(!tsdn_null(tsdn) || arena != NULL);
+	witness_assert_lock_depth(tsdn, 0); /* prof_gdump() requirement. */
 
 	ausize = sa2u(usize, alignment);
 	if (unlikely(ausize == 0 || ausize > HUGE_MAXCLASS))
@@ -91,11 +92,13 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 
 	extent_node_init(node, arena, ret, usize, sn, is_zeroed, true);
 
-	if (huge_node_set(tsdn, ret, node)) {
+	if (huge_node_set(tsdn, ret, node, &gdump)) {
 		arena_chunk_dalloc_huge(tsdn, arena, ret, usize, sn);
 		idalloctm(tsdn, node, NULL, true, true);
 		return (NULL);
 	}
+	if (config_prof && opt_prof && gdump)
+		prof_gdump(tsdn);
 
 	/* Insert node into huge. */
 	malloc_mutex_lock(tsdn, &arena->huge_mtx);
@@ -144,7 +147,9 @@ huge_ralloc_no_move_similar(tsdn_t *tsdn, void *ptr, size_t oldsize,
 	extent_node_t *node;
 	arena_t *arena;
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
-	bool pre_zeroed, post_zeroed;
+	bool pre_zeroed, post_zeroed, gdump;
+
+	witness_assert_lock_depth(tsdn, 0); /* prof_gdump() requirement. */
 
 	/* Increase usize to incorporate extra. */
 	for (usize = usize_min; usize < usize_max && (usize_next = s2u(usize+1))
@@ -178,10 +183,13 @@ huge_ralloc_no_move_similar(tsdn_t *tsdn, void *ptr, size_t oldsize,
 	huge_node_unset(ptr, node);
 	assert(extent_node_size_get(node) != usize);
 	extent_node_size_set(node, usize);
-	huge_node_reset(tsdn, ptr, node);
+	huge_node_reset(tsdn, ptr, node, &gdump);
 	/* Update zeroed. */
 	extent_node_zeroed_set(node, post_zeroed);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
+	/* gdump without any locks held. */
+	if (config_prof && opt_prof && gdump)
+		prof_gdump(tsdn);
 
 	arena_chunk_ralloc_huge_similar(tsdn, arena, ptr, oldsize, usize);
 
@@ -207,7 +215,7 @@ huge_ralloc_no_move_shrink(tsdn_t *tsdn, void *ptr, size_t oldsize,
 	arena_t *arena;
 	chunk_hooks_t chunk_hooks;
 	size_t cdiff;
-	bool pre_zeroed, post_zeroed;
+	bool pre_zeroed, post_zeroed, gdump;
 
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
@@ -215,6 +223,7 @@ huge_ralloc_no_move_shrink(tsdn_t *tsdn, void *ptr, size_t oldsize,
 	chunk_hooks = chunk_hooks_get(tsdn, arena);
 
 	assert(oldsize > usize);
+	witness_assert_lock_depth(tsdn, 0); /* prof_gdump() requirement. */
 
 	/* Split excess chunks. */
 	cdiff = CHUNK_CEILING(oldsize) - CHUNK_CEILING(usize);
@@ -241,10 +250,13 @@ huge_ralloc_no_move_shrink(tsdn_t *tsdn, void *ptr, size_t oldsize,
 	/* Update the size of the huge allocation. */
 	huge_node_unset(ptr, node);
 	extent_node_size_set(node, usize);
-	huge_node_reset(tsdn, ptr, node);
+	huge_node_reset(tsdn, ptr, node, &gdump);
 	/* Update zeroed. */
 	extent_node_zeroed_set(node, post_zeroed);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
+	/* gdump without any locks held. */
+	if (config_prof && opt_prof && gdump)
+		prof_gdump(tsdn);
 
 	/* Zap the excess chunks. */
 	arena_chunk_ralloc_huge_shrink(tsdn, arena, ptr, oldsize, usize,
@@ -258,7 +270,7 @@ huge_ralloc_no_move_expand(tsdn_t *tsdn, void *ptr, size_t oldsize,
     size_t usize, bool zero) {
 	extent_node_t *node;
 	arena_t *arena;
-	bool is_zeroed_subchunk, is_zeroed_chunk;
+	bool is_zeroed_subchunk, is_zeroed_chunk, gdump;
 
 	node = huge_node_get(ptr);
 	arena = extent_node_arena_get(node);
@@ -266,6 +278,8 @@ huge_ralloc_no_move_expand(tsdn_t *tsdn, void *ptr, size_t oldsize,
 	is_zeroed_subchunk = extent_node_zeroed_get(node);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
+	witness_assert_lock_depth(tsdn, 0); /* prof_gdump() requirement. */
+
 	/*
 	 * Use is_zeroed_chunk to detect whether the trailing memory is zeroed,
 	 * update extent's zeroed field, and zero as necessary.
@@ -280,8 +294,11 @@ huge_ralloc_no_move_expand(tsdn_t *tsdn, void *ptr, size_t oldsize,
 	extent_node_size_set(node, usize);
 	extent_node_zeroed_set(node, extent_node_zeroed_get(node) &&
 	    is_zeroed_chunk);
-	huge_node_reset(tsdn, ptr, node);
+	huge_node_reset(tsdn, ptr, node, &gdump);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
+	/* gdump without any locks held. */
+	if (config_prof && opt_prof && gdump)
+		prof_gdump(tsdn);
 
 	if (zero || (config_fill && unlikely(opt_zero))) {
 		if (!is_zeroed_subchunk) {

From b973ec797587778c6bb35f51c8f837a2ae6366cc Mon Sep 17 00:00:00 2001
From: Tamir Duberstein <tamird@gmail.com>
Date: Tue, 24 Jan 2017 14:54:18 -0500
Subject: [PATCH 0607/2608] Avoid redeclaring glibc's secure_getenv

Avoid the name secure_getenv to avoid redeclaring secure_getenv when
secure_getenv is present but its use is manually disabled via
ac_cv_func_secure_getenv=no.
---
 src/jemalloc.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index c08f7e2f..92813b62 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -790,18 +790,19 @@ stats_print_atexit(void)
  * Begin initialization functions.
  */
 
-#ifndef JEMALLOC_HAVE_SECURE_GETENV
 static char *
-secure_getenv(const char *name)
+jemalloc_secure_getenv(const char *name)
 {
-
+#ifdef JEMALLOC_HAVE_SECURE_GETENV
+	return secure_getenv(name);
+#else
 #  ifdef JEMALLOC_HAVE_ISSETUGID
 	if (issetugid() != 0)
 		return (NULL);
 #  endif
 	return (getenv(name));
-}
 #endif
+}
 
 static unsigned
 malloc_ncpus(void)
@@ -1018,7 +1019,7 @@ malloc_conf_init(void)
 #endif
 			    ;
 
-			if ((opts = secure_getenv(envname)) != NULL) {
+			if ((opts = jemalloc_secure_getenv(envname)) != NULL) {
 				/*
 				 * Do nothing; opts is already initialized to
 				 * the value of the MALLOC_CONF environment

From 0874b648e050c3503a4944963aa83bbb4cd414d6 Mon Sep 17 00:00:00 2001
From: Tamir Duberstein <tamird@gmail.com>
Date: Tue, 24 Jan 2017 14:54:18 -0500
Subject: [PATCH 0608/2608] Avoid redeclaring glibc's secure_getenv

Avoid the name secure_getenv to avoid redeclaring secure_getenv when
secure_getenv is present but its use is manually disabled via
ac_cv_func_secure_getenv=no.
---
 src/jemalloc.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index a9a74973..d0c8c037 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -709,17 +709,19 @@ stats_print_atexit(void) {
  * Begin initialization functions.
  */
 
-#ifndef JEMALLOC_HAVE_SECURE_GETENV
 static char *
-secure_getenv(const char *name) {
+jemalloc_secure_getenv(const char *name) {
+#ifdef JEMALLOC_HAVE_SECURE_GETENV
+	return secure_getenv(name);
+#else
 #  ifdef JEMALLOC_HAVE_ISSETUGID
 	if (issetugid() != 0) {
 		return NULL;
 	}
 #  endif
 	return getenv(name);
-}
 #endif
+}
 
 static unsigned
 malloc_ncpus(void) {
@@ -908,7 +910,7 @@ malloc_conf_init(void) {
 #endif
 			    ;
 
-			if ((opts = secure_getenv(envname)) != NULL) {
+			if ((opts = jemalloc_secure_getenv(envname)) != NULL) {
 				/*
 				 * Do nothing; opts is already initialized to
 				 * the value of the MALLOC_CONF environment

From 85d284181872d9a6fb813b184cad2cd0a77fc249 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 25 Jan 2017 15:50:59 -0800
Subject: [PATCH 0609/2608] Fix a bug in which a potentially invalid usize
 replaced size

In the refactoring that unified the allocation paths, usize was substituted for
size. This worked fine under the default test configuration, but triggered
asserts when we started beefing up our CI testing.

This change fixes the issue, and clarifies the comment describing the argument
selection that it got wrong.
---
 src/jemalloc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index d0c8c037..28759bc6 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1652,10 +1652,10 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts) {
 		/*
 		 * If dopts->alignment > 0, then ind is still 0, but usize was
 		 * computed in the previous if statement.  Down the positive
-		 * alignment path, imalloc_no_sample ind and size (relying only
-		 * on usize).
+		 * alignment path, imalloc_no_sample ignores ind and size
+		 * (relying only on usize).
 		 */
-		allocation = imalloc_no_sample(sopts, dopts, tsd, usize, usize,
+		allocation = imalloc_no_sample(sopts, dopts, tsd, size, usize,
 		    ind);
 		if (unlikely(allocation == NULL)) {
 			goto label_oom;

From 6e7d0890cb66af3b85ab210ed781dab11c1c4614 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 25 Jan 2017 12:58:50 -0800
Subject: [PATCH 0610/2608] Beef up travis CI integration testing

Introduces gen_travis.py, which generates .travis.yml, and updates .travis.yml
to be the generated version.

The travis build matrix approach doesn't play well with mixing and matching
various different environment settings, so we generate every build explicitly,
rather than letting them do it for us.

To avoid abusing travis resources (and save us time waiting for CI results), we
don't test every possible combination of options; we only check up to 2 unusual
settings at a time.
---
 .travis.yml           | 84 ++++++++++++++++++++++++++++++++++++------
 scripts/gen_travis.py | 86 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 159 insertions(+), 11 deletions(-)
 create mode 100755 scripts/gen_travis.py

diff --git a/.travis.yml b/.travis.yml
index 97641eca..efac8547 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,31 +3,93 @@ language: generic
 matrix:
   include:
     - os: linux
-      env: CC=gcc CXX=g++
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS=""
+    - os: osx
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS=""
     - os: linux
-      env: CC=clang CXX=clang++
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS=""
     - os: linux
-      env: CC=gcc CXX=g++ EXTRA_FLAGS=-m32
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS=""
       addons:
         apt:
           packages:
-          - gcc-multilib
+            - gcc-multilib
     - os: linux
-      env: CC=clang CXX=clang++ EXTRA_FLAGS=-m32
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-tcache"
+    - os: osx
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS=""
+    - os: osx
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS=""
+    - os: osx
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug"
+    - os: osx
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats"
+    - os: osx
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-tcache"
+    - os: linux
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS=""
       addons:
         apt:
           packages:
-          - gcc-multilib
-    - os: osx
-      env: CC=clang CXX=clang++
-    - os: osx
-      env: CC=clang CXX=clang++ EXTRA_FLAGS=-m32
+            - gcc-multilib
+    - os: linux
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug"
+    - os: linux
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof"
+    - os: linux
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats"
+    - os: linux
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-tcache"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-debug"
+      addons:
+        apt:
+          packages:
+            - gcc-multilib
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-prof"
+      addons:
+        apt:
+          packages:
+            - gcc-multilib
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-stats"
+      addons:
+        apt:
+          packages:
+            - gcc-multilib
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-tcache"
+      addons:
+        apt:
+          packages:
+            - gcc-multilib
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --enable-prof"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-stats"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-tcache"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --disable-stats"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --disable-tcache"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --disable-tcache"
+
 
 before_script:
   - autoconf
-  - ./configure${EXTRA_FLAGS:+ CC="$CC $EXTRA_FLAGS" CXX="$CXX $EXTRA_FLAGS"}
+  - ./configure ${COMPILER_FLAGS:+       CC="$CC $COMPILER_FLAGS"       CXX="$CXX $COMPILER_FLAGS" }       $CONFIGURE_FLAGS
   - make -j3
   - make -j3 tests
 
 script:
   - make check
+
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
new file mode 100755
index 00000000..93fe3283
--- /dev/null
+++ b/scripts/gen_travis.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python
+
+from itertools import combinations
+
+travis_template = """\
+language: generic
+
+matrix:
+  include:
+%s
+
+before_script:
+  - autoconf
+  - ./configure ${COMPILER_FLAGS:+ \
+      CC="$CC $COMPILER_FLAGS" \
+      CXX="$CXX $COMPILER_FLAGS" } \
+      $CONFIGURE_FLAGS
+  - make -j3
+  - make -j3 tests
+
+script:
+  - make check
+"""
+
+# The 'default' configuration is gcc, on linux, with no compiler or configure
+# flags.  We also test with clang, -m32, --enable-debug, --enable-prof,
+# --disable-stats, and --disable-tcache.  To avoid abusing travis though, we
+# don't test all 2**7 = 128 possible combinations of these; instead, we only
+# test combinations of up to 2 'unusual' settings, under the hope that bugs
+# involving interactions of such settings are rare.
+# things at once, for C(7, 0) + C(7, 1) + C(7, 2) = 29
+MAX_UNUSUAL_OPTIONS = 2
+
+os_default = 'linux'
+os_unusual = 'osx'
+
+compilers_default = 'CC=gcc CXX=g++'
+compilers_unusual = 'CC=clang CXX=clang++'
+
+compiler_flag_unusuals = ['-m32']
+
+configure_flag_unusuals = [
+    '--enable-debug', '--enable-prof', '--disable-stats', '--disable-tcache',
+]
+
+all_unusuals = (
+    [os_unusual] + [compilers_unusual] + compiler_flag_unusuals
+    + configure_flag_unusuals
+)
+
+unusual_combinations_to_test = []
+for i in xrange(MAX_UNUSUAL_OPTIONS + 1):
+    unusual_combinations_to_test += combinations(all_unusuals, i)
+
+include_rows = ""
+for unusual_combination in unusual_combinations_to_test:
+    os = os_default
+    if os_unusual in unusual_combination:
+        os = os_unusual
+
+    compilers = compilers_default
+    if compilers_unusual in unusual_combination:
+        compilers = compilers_unusual
+
+    compiler_flags = [
+        x for x in unusual_combination if x in compiler_flag_unusuals]
+
+    configure_flags = [
+        x for x in unusual_combination if x in configure_flag_unusuals]
+
+    # Filter out an unsupported configuration - heap profiling on OS X.
+    if os == 'osx' and '--enable-prof' in configure_flags:
+        continue
+
+    env_string = '{} COMPILER_FLAGS="{}" CONFIGURE_FLAGS="{}"'.format(
+        compilers, " ".join(compiler_flags), " ".join(configure_flags))
+
+    include_rows += '    - os: %s\n' % os
+    include_rows += '      env: %s\n' % env_string
+    if '-m32' in unusual_combination and os == 'linux':
+        include_rows += '      addons:\n'
+	include_rows += '        apt:\n'
+	include_rows += '          packages:\n'
+	include_rows += '            - gcc-multilib\n'
+
+print travis_template % include_rows

From 5260d9c12f8f83e4f37a28593d197638ad3d4e56 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 27 Jan 2017 13:16:56 -0800
Subject: [PATCH 0611/2608] Introduce scripts to run all possible tests

In 6e7d0890 we added better travis continuous integration tests. This is nice,
but has two problems:
- We run only a subset of interesting tests.
- The travis builds can take hours to give us back results (especially on OS X).

This adds scripts/gen_run_tests.py, and its output, run_tests.sh, which builds
and runs a larger portion of possible configurations on the local machine.

While a travis run takes several hours to complete , I can run these scripts on
my (OS X) latop and (Linux) devserve, and get a more exhaustive set of results
back in around 10 minutes.
---
 run_tests.sh             |  1 +
 scripts/gen_run_tests.py | 44 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+)
 create mode 100755 run_tests.sh
 create mode 100755 scripts/gen_run_tests.py

diff --git a/run_tests.sh b/run_tests.sh
new file mode 100755
index 00000000..b434f15b
--- /dev/null
+++ b/run_tests.sh
@@ -0,0 +1 @@
+$(dirname "$)")/scripts/gen_run_tests.py | bash
diff --git a/scripts/gen_run_tests.py b/scripts/gen_run_tests.py
new file mode 100755
index 00000000..694685cb
--- /dev/null
+++ b/scripts/gen_run_tests.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python
+
+from itertools import combinations
+
+def powerset(items):
+    result = []
+    for i in xrange(len(items) + 1):
+        result += combinations(items, i)
+    return result
+
+MAKE_J_VAL = 32
+
+possible_compilers = [('gcc', 'g++'), ('clang', 'clang++')]
+possible_compiler_opts = [
+    '-m32',
+]
+possible_config_opts = [
+    '--enable-debug',
+    '--enable-prof',
+    '--disable-stats',
+    '--disable-tcache',
+]
+
+print 'set -e'
+print 'autoconf'
+print 'unamestr=`uname`'
+
+for cc, cxx in possible_compilers:
+    for compiler_opts in powerset(possible_compiler_opts):
+        for config_opts in powerset(possible_config_opts):
+            config_line = (
+                './configure '
+                + 'CC="{} {}" '.format(cc, " ".join(compiler_opts))
+                + 'CXX="{} {}" '.format(cxx, " ".join(compiler_opts))
+                + " ".join(config_opts)
+            )
+            # Heap profiling is not supported on OS X.
+            if '--enable-prof' in config_opts:
+                print 'if [[ "$unamestr" != "Darwin" ]]; then'
+            print config_line
+            print "make clean"
+            print "make -j" + str(MAKE_J_VAL) + " check"
+            if '--enable-prof' in config_opts:
+                print 'fi'

From 449b7f486777d14f5aaa84b6822221915e1405e6 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 31 Jan 2017 16:44:57 -0800
Subject: [PATCH 0612/2608] CI: Run --enable-debug builds on windows

This will hopefully catch some windows-specific bugs.
---
 .appveyor.yml | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index ddd5c571..510815dc 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -12,6 +12,20 @@ environment:
     CPU: x86_64
   - MSYSTEM: MINGW32
     CPU: i686
+  - MSYSTEM: MINGW64
+    CPU: x86_64
+    MSVC: amd64
+    CONFIG_FLAGS: --enable-debug
+  - MSYSTEM: MINGW32
+    CPU: i686
+    MSVC: x86
+    CONFIG_FLAGS: --enable-debug
+  - MSYSTEM: MINGW64
+    CPU: x86_64
+    CONFIG_FLAGS: --enable-debug
+  - MSYSTEM: MINGW32
+    CPU: i686
+    CONFIG_FLAGS: --enable-debug
 
 install:
   - set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH%
@@ -21,7 +35,7 @@ install:
 
 build_script:
   - bash -c "autoconf"
-  - bash -c "./configure"
+  - bash -c "./configure $CONFIG_FLAGS"
   - mingw32-make -j3
   - file lib/jemalloc.dll
   - mingw32-make -j3 tests

From 190f81c6d5676efd321701dd9b8918a24da2f783 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 1 Feb 2017 10:03:04 -0800
Subject: [PATCH 0613/2608] Silence harmless warnings discovered via
 run_tests.sh.

---
 test/unit/stats_print.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/test/unit/stats_print.c b/test/unit/stats_print.c
index 5a11b503..f0437891 100644
--- a/test/unit/stats_print.c
+++ b/test/unit/stats_print.c
@@ -65,7 +65,8 @@ token_error(token_t *token) {
 		    token->col);
 		break;
 	}
-	write(STDERR_FILENO, &token->parser->buf[token->pos], token->len);
+	UNUSED ssize_t err = write(STDERR_FILENO,
+	    &token->parser->buf[token->pos], token->len);
 	malloc_printf("\n");
 }
 
@@ -129,7 +130,9 @@ parser_tokenize(parser_t *parser) {
 		STATE_EXP_DIGITS,
 		STATE_ACCEPT
 	} state = STATE_START;
-	size_t token_pos, token_line, token_col;
+	size_t token_pos JEMALLOC_CC_SILENCE_INIT(0);
+	size_t token_line JEMALLOC_CC_SILENCE_INIT(1);
+	size_t token_col JEMALLOC_CC_SILENCE_INIT(0);
 
 	assert_zu_le(parser->pos, parser->len,
 	    "Position is past end of buffer");

From bbff6ca6740c27737378f6c2dec3a13053a5a150 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 30 Jan 2017 15:54:16 -0800
Subject: [PATCH 0614/2608] Handle race in stats_arena_bins_print

When multiple threads calling stats_print, race could happen as we read the
counters in separate mallctl calls; and the removed assertion could fail when
other operations happened in between the mallctl calls. For simplicity, output
"race" in the utilization field in this case.
---
 src/stats.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index 2a424a73..ae360e1b 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -133,8 +133,16 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			availregs = nregs * curslabs;
 			milli = (availregs != 0) ? (1000 * curregs) / availregs
 			    : 1000;
-			assert(milli <= 1000);
-			if (milli < 10) {
+
+			if (milli > 1000) {
+				/*
+				 * Race detected: the counters were read in
+				 * separate mallctl calls and concurrent
+				 * operations happened in between. In this case
+				 * no meaningful utilization can be computed.
+				 */
+				malloc_snprintf(util, sizeof(util), " race");
+			} else if (milli < 10) {
 				malloc_snprintf(util, sizeof(util),
 				    "0.00%zu", milli);
 			} else if (milli < 100) {
@@ -144,6 +152,7 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 				malloc_snprintf(util, sizeof(util), "0.%zu",
 				    milli);
 			} else {
+				assert(milli == 1000);
 				malloc_snprintf(util, sizeof(util), "1");
 			}
 

From 397f54aa460593295139e94c81e8f5b2152a088f Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 29 Jan 2017 17:35:57 -0800
Subject: [PATCH 0615/2608] Conditionalize prof fork handling on config_prof.

This allows the compiler to completely remove dead code.
---
 src/prof.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/prof.c b/src/prof.c
index 1dd0f54d..28d30f29 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -2353,7 +2353,7 @@ prof_boot2(tsd_t *tsd) {
 
 void
 prof_prefork0(tsdn_t *tsdn) {
-	if (opt_prof) {
+	if (config_prof && opt_prof) {
 		unsigned i;
 
 		malloc_mutex_prefork(tsdn, &prof_dump_mtx);
@@ -2370,7 +2370,7 @@ prof_prefork0(tsdn_t *tsdn) {
 
 void
 prof_prefork1(tsdn_t *tsdn) {
-	if (opt_prof) {
+	if (config_prof && opt_prof) {
 		malloc_mutex_prefork(tsdn, &prof_active_mtx);
 		malloc_mutex_prefork(tsdn, &prof_dump_seq_mtx);
 		malloc_mutex_prefork(tsdn, &prof_gdump_mtx);
@@ -2381,7 +2381,7 @@ prof_prefork1(tsdn_t *tsdn) {
 
 void
 prof_postfork_parent(tsdn_t *tsdn) {
-	if (opt_prof) {
+	if (config_prof && opt_prof) {
 		unsigned i;
 
 		malloc_mutex_postfork_parent(tsdn,
@@ -2404,7 +2404,7 @@ prof_postfork_parent(tsdn_t *tsdn) {
 
 void
 prof_postfork_child(tsdn_t *tsdn) {
-	if (opt_prof) {
+	if (config_prof && opt_prof) {
 		unsigned i;
 
 		malloc_mutex_postfork_child(tsdn, &prof_thread_active_init_mtx);

From 5033a9176ac9489805a387c040008b088cf15bda Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 29 Jan 2017 21:51:30 -0800
Subject: [PATCH 0616/2608] Call prof_gctx_create() without owing bt2gctx_mtx.

This reduces the probability of allocating (and thereby indirectly
making a system call) while owning bt2gctx_mtx.  Unfortunately it is an
incomplete solution, because ckh insertion/deletion can also
allocate/deallocate, which requires more extensive changes to address.
---
 src/prof.c | 41 +++++++++++++++++++++++++++++------------
 1 file changed, 29 insertions(+), 12 deletions(-)

diff --git a/src/prof.c b/src/prof.c
index 28d30f29..5aeefb28 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -708,7 +708,7 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
 	union {
 		prof_gctx_t	*p;
 		void		*v;
-	} gctx;
+	} gctx, tgctx;
 	union {
 		prof_bt_t	*p;
 		void		*v;
@@ -718,21 +718,32 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
 	prof_enter(tsd, tdata);
 	if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
 		/* bt has never been seen before.  Insert it. */
-		gctx.p = prof_gctx_create(tsd_tsdn(tsd), bt);
-		if (gctx.v == NULL) {
-			prof_leave(tsd, tdata);
+		prof_leave(tsd, tdata);
+		tgctx.p = prof_gctx_create(tsd_tsdn(tsd), bt);
+		if (tgctx.v == NULL) {
 			return true;
 		}
-		btkey.p = &gctx.p->bt;
-		if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) {
-			/* OOM. */
-			prof_leave(tsd, tdata);
-			idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), gctx.v),
-			    gctx.v, NULL, true, true);
-			return true;
+		prof_enter(tsd, tdata);
+		if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
+			gctx.p = tgctx.p;
+			btkey.p = &gctx.p->bt;
+			if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) {
+				/* OOM. */
+				prof_leave(tsd, tdata);
+				idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd),
+				    gctx.v), gctx.v, NULL, true, true);
+				return true;
+			}
+			new_gctx = true;
+		} else {
+			new_gctx = false;
 		}
-		new_gctx = true;
 	} else {
+		tgctx.v = NULL;
+		new_gctx = false;
+	}
+
+	if (!new_gctx) {
 		/*
 		 * Increment nlimbo, in order to avoid a race condition with
 		 * prof_tctx_destroy()/prof_gctx_try_destroy().
@@ -741,6 +752,12 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
 		gctx.p->nlimbo++;
 		malloc_mutex_unlock(tsd_tsdn(tsd), gctx.p->lock);
 		new_gctx = false;
+
+		if (tgctx.v != NULL) {
+			/* Lost race to insert. */
+			idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd),
+			    tgctx.v), tgctx.v, NULL, true, true);
+		}
 	}
 	prof_leave(tsd, tdata);
 

From ace679ce7435e990df6b789fde4cefeb0e6b992b Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 27 Jan 2017 15:03:11 -0800
Subject: [PATCH 0617/2608] Synchronize extent_grow_next accesses.

This should have been part of 411697adcda2fd75e135cdcdafb95f2bd295dc7f
(Use exponential series to size extents.), which introduced
extent_grow_next.
---
 src/extent.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index 0dbde72a..e2af2b50 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -710,7 +710,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	 * extent creation as a side effect.
 	 */
 	size = usize + pad;
-	alloc_size = pind2sz(arena->extent_grow_next);
+	alloc_size = pind2sz(atomic_read_u(&arena->extent_grow_next));
 	alloc_size_min = size + PAGE_CEILING(alignment) - PAGE;
 	/* Beware size_t wrap-around. */
 	if (alloc_size_min < usize) {
@@ -809,8 +809,20 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	if (*zero && !extent_zeroed_get(extent)) {
 		memset(extent_addr_get(extent), 0, extent_usize_get(extent));
 	}
-	if (arena->extent_grow_next + 1 < NPSIZES) {
-		arena->extent_grow_next++;
+	/*
+	 * Increment extent_grow_next, but take care to do so atomically and
+	 * bail out if the increment would exceed the legal range.
+	 */
+	while (true) {
+		pszind_t egn = atomic_read_u(&arena->extent_grow_next);
+
+		if (egn + 1 == NPSIZES) {
+			break;
+		}
+		assert(egn + 1 < NPSIZES);
+		if (!atomic_cas_u(&arena->extent_grow_next, egn, egn + 1)) {
+			break;
+		}
 	}
 	return extent;
 }

From d0e93ada51e20f4ae394ff4dbdcf96182767c89c Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 21 Jan 2017 15:12:03 -0800
Subject: [PATCH 0618/2608] Add witness_assert_depth[_to_rank]().

This makes it possible to make lock state assertions about precisely
which locks are held.
---
 include/jemalloc/internal/private_symbols.txt |  4 +-
 include/jemalloc/internal/witness_externs.h   |  8 +--
 include/jemalloc/internal/witness_inlines.h   | 28 +++++++++-
 include/jemalloc/internal/witness_types.h     |  2 +
 src/witness.c                                 | 17 ++++---
 test/unit/witness.c                           | 51 ++++++++++++++-----
 6 files changed, 84 insertions(+), 26 deletions(-)

diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 745220e3..2567f56c 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -549,13 +549,15 @@ tsdn_fetch
 tsdn_null
 tsdn_rtree_ctx
 tsdn_tsd
+witness_assert_depth
+witness_assert_depth_to_rank
 witness_assert_lockless
 witness_assert_not_owner
 witness_assert_owner
+witness_depth_error
 witness_init
 witness_lock
 witness_lock_error
-witness_lockless_error
 witness_not_owner_error
 witness_owner
 witness_owner_error
diff --git a/include/jemalloc/internal/witness_externs.h b/include/jemalloc/internal/witness_externs.h
index dcd987cc..5d91fde2 100644
--- a/include/jemalloc/internal/witness_externs.h
+++ b/include/jemalloc/internal/witness_externs.h
@@ -23,10 +23,12 @@ extern witness_not_owner_error_t *witness_not_owner_error;
 void	witness_not_owner_error(const witness_t *witness);
 #endif
 #ifdef JEMALLOC_JET
-typedef void (witness_lockless_error_t)(const witness_list_t *);
-extern witness_lockless_error_t *witness_lockless_error;
+typedef void (witness_depth_error_t)(const witness_list_t *,
+    witness_rank_t rank_inclusive, unsigned depth);
+extern witness_depth_error_t *witness_depth_error;
 #else
-void	witness_lockless_error(const witness_list_t *witnesses);
+void	witness_depth_error(const witness_list_t *witnesses,
+    witness_rank_t rank_inclusive, unsigned depth);
 #endif
 
 void	witnesses_cleanup(tsd_t *tsd);
diff --git a/include/jemalloc/internal/witness_inlines.h b/include/jemalloc/internal/witness_inlines.h
index c2a27812..51f3f6e7 100644
--- a/include/jemalloc/internal/witness_inlines.h
+++ b/include/jemalloc/internal/witness_inlines.h
@@ -5,6 +5,9 @@
 bool	witness_owner(tsd_t *tsd, const witness_t *witness);
 void	witness_assert_owner(tsdn_t *tsdn, const witness_t *witness);
 void	witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness);
+void witness_assert_depth_to_rank(tsdn_t *tsdn, witness_rank_t rank_inclusive,
+    unsigned depth);
+void witness_assert_depth(tsdn_t *tsdn, unsigned depth);
 void	witness_assert_lockless(tsdn_t *tsdn);
 void	witness_lock(tsdn_t *tsdn, witness_t *witness);
 void	witness_unlock(tsdn_t *tsdn, witness_t *witness);
@@ -78,8 +81,10 @@ witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness) {
 }
 
 JEMALLOC_INLINE void
-witness_assert_lockless(tsdn_t *tsdn) {
+witness_assert_depth_to_rank(tsdn_t *tsdn, witness_rank_t rank_inclusive,
+    unsigned depth) {
 	tsd_t *tsd;
+	unsigned d;
 	witness_list_t *witnesses;
 	witness_t *w;
 
@@ -92,11 +97,30 @@ witness_assert_lockless(tsdn_t *tsdn) {
 	}
 	tsd = tsdn_tsd(tsdn);
 
+	d = 0;
 	witnesses = tsd_witnessesp_get(tsd);
 	w = ql_last(witnesses, link);
 	if (w != NULL) {
-		witness_lockless_error(witnesses);
+		ql_reverse_foreach(w, witnesses, link) {
+			if (w->rank < rank_inclusive) {
+				break;
+			}
+			d++;
+		}
 	}
+	if (d != depth) {
+		witness_depth_error(witnesses, rank_inclusive, depth);
+	}
+}
+
+JEMALLOC_INLINE void
+witness_assert_depth(tsdn_t *tsdn, unsigned depth) {
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_MIN, depth);
+}
+
+JEMALLOC_INLINE void
+witness_assert_lockless(tsdn_t *tsdn) {
+	witness_assert_depth(tsdn, 0);
 }
 
 JEMALLOC_INLINE void
diff --git a/include/jemalloc/internal/witness_types.h b/include/jemalloc/internal/witness_types.h
index c2a73f2e..f765d7b3 100644
--- a/include/jemalloc/internal/witness_types.h
+++ b/include/jemalloc/internal/witness_types.h
@@ -13,6 +13,8 @@ typedef int witness_comp_t (const witness_t *, void *, const witness_t *,
  */
 #define WITNESS_RANK_OMIT		0U
 
+#define WITNESS_RANK_MIN		1U
+
 #define WITNESS_RANK_INIT		1U
 #define WITNESS_RANK_CTL		1U
 #define WITNESS_RANK_ARENAS		2U
diff --git a/src/witness.c b/src/witness.c
index 1c03457e..034ea92b 100644
--- a/src/witness.c
+++ b/src/witness.c
@@ -65,14 +65,16 @@ witness_not_owner_error_t *witness_not_owner_error =
 #endif
 
 #ifdef JEMALLOC_JET
-#undef witness_lockless_error
-#define witness_lockless_error JEMALLOC_N(n_witness_lockless_error)
+#undef witness_depth_error
+#define witness_depth_error JEMALLOC_N(n_witness_depth_error)
 #endif
 void
-witness_lockless_error(const witness_list_t *witnesses) {
+witness_depth_error(const witness_list_t *witnesses,
+    witness_rank_t rank_inclusive, unsigned depth) {
 	witness_t *w;
 
-	malloc_printf("<jemalloc>: Should not own any locks:");
+	malloc_printf("<jemalloc>: Should own %u lock%s of rank >= %u:", depth,
+	    (depth != 1) ?  "s" : "", rank_inclusive);
 	ql_foreach(w, witnesses, link) {
 		malloc_printf(" %s(%u)", w->name, w->rank);
 	}
@@ -80,10 +82,9 @@ witness_lockless_error(const witness_list_t *witnesses) {
 	abort();
 }
 #ifdef JEMALLOC_JET
-#undef witness_lockless_error
-#define witness_lockless_error JEMALLOC_N(witness_lockless_error)
-witness_lockless_error_t *witness_lockless_error =
-    JEMALLOC_N(n_witness_lockless_error);
+#undef witness_depth_error
+#define witness_depth_error JEMALLOC_N(witness_depth_error)
+witness_depth_error_t *witness_depth_error = JEMALLOC_N(n_witness_depth_error);
 #endif
 
 void
diff --git a/test/unit/witness.c b/test/unit/witness.c
index c914e4b3..de2e6028 100644
--- a/test/unit/witness.c
+++ b/test/unit/witness.c
@@ -3,12 +3,12 @@
 static witness_lock_error_t *witness_lock_error_orig;
 static witness_owner_error_t *witness_owner_error_orig;
 static witness_not_owner_error_t *witness_not_owner_error_orig;
-static witness_lockless_error_t *witness_lockless_error_orig;
+static witness_depth_error_t *witness_depth_error_orig;
 
 static bool saw_lock_error;
 static bool saw_owner_error;
 static bool saw_not_owner_error;
-static bool saw_lockless_error;
+static bool saw_depth_error;
 
 static void
 witness_lock_error_intercept(const witness_list_t *witnesses,
@@ -27,8 +27,9 @@ witness_not_owner_error_intercept(const witness_t *witness) {
 }
 
 static void
-witness_lockless_error_intercept(const witness_list_t *witnesses) {
-	saw_lockless_error = true;
+witness_depth_error_intercept(const witness_list_t *witnesses,
+    witness_rank_t rank_inclusive, unsigned depth) {
+	saw_depth_error = true;
 }
 
 static int
@@ -61,21 +62,36 @@ TEST_BEGIN(test_witness) {
 	tsdn = tsdn_fetch();
 
 	witness_assert_lockless(tsdn);
+	witness_assert_depth(tsdn, 0);
+	witness_assert_depth_to_rank(tsdn, (witness_rank_t)1U, 0);
 
 	witness_init(&a, "a", 1, NULL, NULL);
 	witness_assert_not_owner(tsdn, &a);
 	witness_lock(tsdn, &a);
 	witness_assert_owner(tsdn, &a);
+	witness_assert_depth(tsdn, 1);
+	witness_assert_depth_to_rank(tsdn, (witness_rank_t)1U, 1);
+	witness_assert_depth_to_rank(tsdn, (witness_rank_t)2U, 0);
 
 	witness_init(&b, "b", 2, NULL, NULL);
 	witness_assert_not_owner(tsdn, &b);
 	witness_lock(tsdn, &b);
 	witness_assert_owner(tsdn, &b);
+	witness_assert_depth(tsdn, 2);
+	witness_assert_depth_to_rank(tsdn, (witness_rank_t)1U, 2);
+	witness_assert_depth_to_rank(tsdn, (witness_rank_t)2U, 1);
+	witness_assert_depth_to_rank(tsdn, (witness_rank_t)3U, 0);
 
 	witness_unlock(tsdn, &a);
+	witness_assert_depth(tsdn, 1);
+	witness_assert_depth_to_rank(tsdn, (witness_rank_t)1U, 1);
+	witness_assert_depth_to_rank(tsdn, (witness_rank_t)2U, 1);
+	witness_assert_depth_to_rank(tsdn, (witness_rank_t)3U, 0);
 	witness_unlock(tsdn, &b);
 
 	witness_assert_lockless(tsdn);
+	witness_assert_depth(tsdn, 0);
+	witness_assert_depth_to_rank(tsdn, (witness_rank_t)1U, 0);
 }
 TEST_END
 
@@ -93,12 +109,15 @@ TEST_BEGIN(test_witness_comp) {
 	witness_assert_not_owner(tsdn, &a);
 	witness_lock(tsdn, &a);
 	witness_assert_owner(tsdn, &a);
+	witness_assert_depth(tsdn, 1);
 
 	witness_init(&b, "b", 1, witness_comp, &b);
 	witness_assert_not_owner(tsdn, &b);
 	witness_lock(tsdn, &b);
 	witness_assert_owner(tsdn, &b);
+	witness_assert_depth(tsdn, 2);
 	witness_unlock(tsdn, &b);
+	witness_assert_depth(tsdn, 1);
 
 	witness_lock_error_orig = witness_lock_error;
 	witness_lock_error = witness_lock_error_intercept;
@@ -110,6 +129,7 @@ TEST_BEGIN(test_witness_comp) {
 	witness_lock(tsdn, &c);
 	assert_true(saw_lock_error, "Expected witness lock error");
 	witness_unlock(tsdn, &c);
+	witness_assert_depth(tsdn, 1);
 
 	saw_lock_error = false;
 
@@ -119,6 +139,7 @@ TEST_BEGIN(test_witness_comp) {
 	witness_lock(tsdn, &d);
 	assert_true(saw_lock_error, "Expected witness lock error");
 	witness_unlock(tsdn, &d);
+	witness_assert_depth(tsdn, 1);
 
 	witness_unlock(tsdn, &a);
 
@@ -146,11 +167,13 @@ TEST_BEGIN(test_witness_reversal) {
 	witness_init(&b, "b", 2, NULL, NULL);
 
 	witness_lock(tsdn, &b);
+	witness_assert_depth(tsdn, 1);
 	assert_false(saw_lock_error, "Unexpected witness lock error");
 	witness_lock(tsdn, &a);
 	assert_true(saw_lock_error, "Expected witness lock error");
 
 	witness_unlock(tsdn, &a);
+	witness_assert_depth(tsdn, 1);
 	witness_unlock(tsdn, &b);
 
 	witness_assert_lockless(tsdn);
@@ -222,34 +245,38 @@ TEST_BEGIN(test_witness_unlock_not_owned) {
 }
 TEST_END
 
-TEST_BEGIN(test_witness_lockful) {
+TEST_BEGIN(test_witness_depth) {
 	witness_t a;
 	tsdn_t *tsdn;
 
 	test_skip_if(!config_debug);
 
-	witness_lockless_error_orig = witness_lockless_error;
-	witness_lockless_error = witness_lockless_error_intercept;
-	saw_lockless_error = false;
+	witness_depth_error_orig = witness_depth_error;
+	witness_depth_error = witness_depth_error_intercept;
+	saw_depth_error = false;
 
 	tsdn = tsdn_fetch();
 
 	witness_assert_lockless(tsdn);
+	witness_assert_depth(tsdn, 0);
 
 	witness_init(&a, "a", 1, NULL, NULL);
 
-	assert_false(saw_lockless_error, "Unexpected lockless error");
+	assert_false(saw_depth_error, "Unexpected depth error");
 	witness_assert_lockless(tsdn);
+	witness_assert_depth(tsdn, 0);
 
 	witness_lock(tsdn, &a);
 	witness_assert_lockless(tsdn);
-	assert_true(saw_lockless_error, "Expected lockless error");
+	witness_assert_depth(tsdn, 0);
+	assert_true(saw_depth_error, "Expected depth error");
 
 	witness_unlock(tsdn, &a);
 
 	witness_assert_lockless(tsdn);
+	witness_assert_depth(tsdn, 0);
 
-	witness_lockless_error = witness_lockless_error_orig;
+	witness_depth_error = witness_depth_error_orig;
 }
 TEST_END
 
@@ -261,5 +288,5 @@ main(void) {
 	    test_witness_reversal,
 	    test_witness_recursive,
 	    test_witness_unlock_not_owned,
-	    test_witness_lockful);
+	    test_witness_depth);
 }

From 1b6e43507ed330314fffe0872f48a95a9fe502fe Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 29 Jan 2017 21:32:39 -0800
Subject: [PATCH 0619/2608] Fix/refactor tcaches synchronization.

Synchronize tcaches with tcaches_mtx rather than ctl_mtx.  Add missing
synchronization for tcache flushing.  This bug was introduced by
1cb181ed632e7573fb4eab194e4d216867222d27 (Implement explicit tcache
support.), which was first released in 4.0.0.
---
 include/jemalloc/internal/private_symbols.txt |  3 +
 include/jemalloc/internal/tcache_externs.h    |  3 +
 include/jemalloc/internal/witness_types.h     | 25 ++---
 src/ctl.c                                     |  2 -
 src/jemalloc.c                                |  3 +
 src/tcache.c                                  | 96 +++++++++++++++----
 6 files changed, 102 insertions(+), 30 deletions(-)

diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 2567f56c..36bcda24 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -469,6 +469,9 @@ tcache_flush
 tcache_get
 tcache_get_hard
 tcache_maxclass
+tcache_prefork
+tcache_postfork_child
+tcache_postfork_parent
 tcache_salloc
 tcache_stats_merge
 tcaches
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index ead90afc..3e4a7511 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -43,5 +43,8 @@ bool	tcaches_create(tsd_t *tsd, unsigned *r_ind);
 void	tcaches_flush(tsd_t *tsd, unsigned ind);
 void	tcaches_destroy(tsd_t *tsd, unsigned ind);
 bool	tcache_boot(tsdn_t *tsdn);
+void tcache_prefork(tsdn_t *tsdn);
+void tcache_postfork_parent(tsdn_t *tsdn);
+void tcache_postfork_child(tsdn_t *tsdn);
 
 #endif /* JEMALLOC_INTERNAL_TCACHE_EXTERNS_H */
diff --git a/include/jemalloc/internal/witness_types.h b/include/jemalloc/internal/witness_types.h
index f765d7b3..dfcf1621 100644
--- a/include/jemalloc/internal/witness_types.h
+++ b/include/jemalloc/internal/witness_types.h
@@ -17,21 +17,22 @@ typedef int witness_comp_t (const witness_t *, void *, const witness_t *,
 
 #define WITNESS_RANK_INIT		1U
 #define WITNESS_RANK_CTL		1U
-#define WITNESS_RANK_ARENAS		2U
+#define WITNESS_RANK_TCACHES		2U
+#define WITNESS_RANK_ARENAS		3U
 
-#define WITNESS_RANK_PROF_DUMP		3U
-#define WITNESS_RANK_PROF_BT2GCTX	4U
-#define WITNESS_RANK_PROF_TDATAS	5U
-#define WITNESS_RANK_PROF_TDATA		6U
-#define WITNESS_RANK_PROF_GCTX		7U
+#define WITNESS_RANK_PROF_DUMP		4U
+#define WITNESS_RANK_PROF_BT2GCTX	5U
+#define WITNESS_RANK_PROF_TDATAS	6U
+#define WITNESS_RANK_PROF_TDATA		7U
+#define WITNESS_RANK_PROF_GCTX		8U
 
-#define WITNESS_RANK_ARENA		8U
-#define WITNESS_RANK_ARENA_EXTENTS	9U
-#define WITNESS_RANK_ARENA_EXTENT_CACHE	10
+#define WITNESS_RANK_ARENA		9U
+#define WITNESS_RANK_ARENA_EXTENTS	10U
+#define WITNESS_RANK_ARENA_EXTENT_CACHE	11U
 
-#define WITNESS_RANK_RTREE_ELM		11U
-#define WITNESS_RANK_RTREE		12U
-#define WITNESS_RANK_BASE		13U
+#define WITNESS_RANK_RTREE_ELM		12U
+#define WITNESS_RANK_RTREE		13U
+#define WITNESS_RANK_BASE		14U
 
 #define WITNESS_RANK_LEAF		0xffffffffU
 #define WITNESS_RANK_ARENA_BIN		WITNESS_RANK_LEAF
diff --git a/src/ctl.c b/src/ctl.c
index 64b74263..403bc30c 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1477,7 +1477,6 @@ tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 		return ENOENT;
 	}
 
-	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	READONLY();
 	if (tcaches_create(tsd, &tcache_ind)) {
 		ret = EFAULT;
@@ -1487,7 +1486,6 @@ tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	return ret;
 }
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 28759bc6..45e9aea7 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2765,6 +2765,7 @@ _malloc_prefork(void)
 	witness_prefork(tsd);
 	/* Acquire all mutexes in a safe order. */
 	ctl_prefork(tsd_tsdn(tsd));
+	tcache_prefork(tsd_tsdn(tsd));
 	malloc_mutex_prefork(tsd_tsdn(tsd), &arenas_lock);
 	prof_prefork0(tsd_tsdn(tsd));
 	for (i = 0; i < 3; i++) {
@@ -2825,6 +2826,7 @@ _malloc_postfork(void)
 	}
 	prof_postfork_parent(tsd_tsdn(tsd));
 	malloc_mutex_postfork_parent(tsd_tsdn(tsd), &arenas_lock);
+	tcache_postfork_parent(tsd_tsdn(tsd));
 	ctl_postfork_parent(tsd_tsdn(tsd));
 }
 
@@ -2848,6 +2850,7 @@ jemalloc_postfork_child(void) {
 	}
 	prof_postfork_child(tsd_tsdn(tsd));
 	malloc_mutex_postfork_child(tsd_tsdn(tsd), &arenas_lock);
+	tcache_postfork_child(tsd_tsdn(tsd));
 	ctl_postfork_child(tsd_tsdn(tsd));
 }
 
diff --git a/src/tcache.c b/src/tcache.c
index 96a42add..76277f06 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -21,6 +21,9 @@ static unsigned		tcaches_past;
 /* Head of singly linked list tracking available tcaches elements. */
 static tcaches_t	*tcaches_avail;
 
+/* Protects tcaches{,_past,_avail}. */
+static malloc_mutex_t	tcaches_mtx;
+
 /******************************************************************************/
 
 size_t
@@ -422,32 +425,56 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 	}
 }
 
-bool
-tcaches_create(tsd_t *tsd, unsigned *r_ind) {
-	arena_t *arena;
-	tcache_t *tcache;
-	tcaches_t *elm;
+static bool
+tcaches_create_prep(tsd_t *tsd) {
+	bool err;
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
 
 	if (tcaches == NULL) {
 		tcaches = base_alloc(tsd_tsdn(tsd), b0get(), sizeof(tcache_t *)
 		    * (MALLOCX_TCACHE_MAX+1), CACHELINE);
 		if (tcaches == NULL) {
-			return true;
+			err = true;
+			goto label_return;
 		}
 	}
 
 	if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX) {
-		return true;
-	}
-	arena = arena_ichoose(tsd, NULL);
-	if (unlikely(arena == NULL)) {
-		return true;
-	}
-	tcache = tcache_create(tsd_tsdn(tsd), arena);
-	if (tcache == NULL) {
-		return true;
+		err = true;
+		goto label_return;
 	}
 
+	err = false;
+label_return:
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
+	return err;
+}
+
+bool
+tcaches_create(tsd_t *tsd, unsigned *r_ind) {
+	witness_assert_depth(tsd_tsdn(tsd), 0);
+
+	bool err;
+
+	if (tcaches_create_prep(tsd)) {
+		err = true;
+		goto label_return;
+	}
+
+	arena_t *arena = arena_ichoose(tsd, NULL);
+	if (unlikely(arena == NULL)) {
+		err = true;
+		goto label_return;
+	}
+	tcache_t *tcache = tcache_create(tsd_tsdn(tsd), arena);
+	if (tcache == NULL) {
+		err = true;
+		goto label_return;
+	}
+
+	tcaches_t *elm;
+	malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
 	if (tcaches_avail != NULL) {
 		elm = tcaches_avail;
 		tcaches_avail = tcaches_avail->next;
@@ -459,12 +486,18 @@ tcaches_create(tsd_t *tsd, unsigned *r_ind) {
 		*r_ind = tcaches_past;
 		tcaches_past++;
 	}
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
 
-	return false;
+	err = false;
+label_return:
+	witness_assert_depth(tsd_tsdn(tsd), 0);
+	return err;
 }
 
 static void
 tcaches_elm_flush(tsd_t *tsd, tcaches_t *elm) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tcaches_mtx);
+
 	if (elm->tcache == NULL) {
 		return;
 	}
@@ -474,19 +507,25 @@ tcaches_elm_flush(tsd_t *tsd, tcaches_t *elm) {
 
 void
 tcaches_flush(tsd_t *tsd, unsigned ind) {
+	malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
 	tcaches_elm_flush(tsd, &tcaches[ind]);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
 }
 
 void
 tcaches_destroy(tsd_t *tsd, unsigned ind) {
+	malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
 	tcaches_t *elm = &tcaches[ind];
 	tcaches_elm_flush(tsd, elm);
 	elm->next = tcaches_avail;
 	tcaches_avail = elm;
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
 }
 
 bool
 tcache_boot(tsdn_t *tsdn) {
+	cassert(config_tcache);
+
 	unsigned i;
 
 	/* If necessary, clamp opt_lg_tcache_max. */
@@ -497,6 +536,10 @@ tcache_boot(tsdn_t *tsdn) {
 		tcache_maxclass = (ZU(1) << opt_lg_tcache_max);
 	}
 
+	if (malloc_mutex_init(&tcaches_mtx, "tcaches", WITNESS_RANK_TCACHES)) {
+		return true;
+	}
+
 	nhbins = size2index(tcache_maxclass) + 1;
 
 	/* Initialize tcache_bin_info. */
@@ -527,3 +570,24 @@ tcache_boot(tsdn_t *tsdn) {
 
 	return false;
 }
+
+void
+tcache_prefork(tsdn_t *tsdn) {
+	if (!config_prof && opt_tcache) {
+		malloc_mutex_prefork(tsdn, &tcaches_mtx);
+	}
+}
+
+void
+tcache_postfork_parent(tsdn_t *tsdn) {
+	if (!config_prof && opt_tcache) {
+		malloc_mutex_postfork_parent(tsdn, &tcaches_mtx);
+	}
+}
+
+void
+tcache_postfork_child(tsdn_t *tsdn) {
+	if (!config_prof && opt_tcache) {
+		malloc_mutex_postfork_child(tsdn, &tcaches_mtx);
+	}
+}

From d27f29b468ae3e9d2b1da4a9880351d76e5a1662 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 29 Jan 2017 21:57:14 -0800
Subject: [PATCH 0620/2608] Disentangle arena and extent locking.

Refactor arena and extent locking protocols such that arena and
extent locks are never held when calling into the extent_*_wrapper()
API.  This requires extra care during purging since the arena lock no
longer protects the inner purging logic.  It also requires extra care to
protect extents from being merged with adjacent extents.

Convert extent_t's 'active' flag to an enumerated 'state', so that
retained extents are explicitly marked as such, rather than depending on
ring linkage state.

Refactor the extent collections (and their synchronization) for cached
and retained extents into extents_t.  Incorporate LRU functionality to
support purging.  Incorporate page count accounting, which replaces
arena->ndirty and arena->stats.retained.

Assert that no core locks are held when entering any internal
[de]allocation functions.  This is in addition to existing assertions
that no locks are held when entering external [de]allocation functions.

Audit and document synchronization protocols for all arena_t fields.

This fixes a potential deadlock due to recursive allocation during
gdump, in a similar fashion to b49c649bc18fff4bd10a1c8adbaf1f25f6453cb6
(Fix lock order reversal during gdump.), but with a necessarily much
broader code impact.
---
 include/jemalloc/internal/arena_externs.h     |  11 +-
 include/jemalloc/internal/arena_structs_b.h   | 117 ++--
 include/jemalloc/internal/extent_externs.h    |  10 +-
 include/jemalloc/internal/extent_inlines.h    |  58 +-
 include/jemalloc/internal/extent_structs.h    |  58 +-
 include/jemalloc/internal/extent_types.h      |   1 +
 .../jemalloc/internal/jemalloc_internal.h.in  |   9 +-
 include/jemalloc/internal/large_externs.h     |   3 +-
 include/jemalloc/internal/private_symbols.txt |  28 +-
 include/jemalloc/internal/stats_structs.h     |  17 +-
 include/jemalloc/internal/witness_types.h     |  12 +-
 src/arena.c                                   | 377 ++++-------
 src/base.c                                    |   5 +-
 src/extent.c                                  | 628 +++++++++++-------
 src/extent_dss.c                              |   5 +-
 src/large.c                                   |  46 +-
 src/tcache.c                                  |  31 +-
 test/unit/arena_reset.c                       |   2 +-
 test/unit/slab.c                              |   4 +-
 19 files changed, 772 insertions(+), 650 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index ecc82304..d0af91bf 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -13,22 +13,17 @@ extern ssize_t		opt_decay_time;
 
 extern const arena_bin_info_t	arena_bin_info[NBINS];
 
-extent_t	*arena_extent_cache_alloc(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, void *new_addr, size_t size,
-    size_t alignment, bool *zero);
 void	arena_extent_cache_dalloc(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent);
-void	arena_extent_cache_maybe_insert(tsdn_t *tsdn, arena_t *arena,
-    extent_t *extent, bool cache);
-void	arena_extent_cache_maybe_remove(tsdn_t *tsdn, arena_t *arena,
-    extent_t *extent, bool cache);
 #ifdef JEMALLOC_JET
 size_t	arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr);
 #endif
 extent_t	*arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena,
     size_t usize, size_t alignment, bool *zero);
-void	arena_extent_dalloc_large(tsdn_t *tsdn, arena_t *arena,
+void	arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena,
     extent_t *extent, bool locked);
+void	arena_extent_dalloc_large_finish(tsdn_t *tsdn, arena_t *arena,
+    extent_t *extent);
 void	arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena,
     extent_t *extent, size_t oldsize);
 void	arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena,
diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index c1c20731..8629446d 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -66,8 +66,8 @@ struct arena_decay_s {
 	/*
 	 * Number of dirty pages at beginning of current epoch.  During epoch
 	 * advancement we use the delta between arena->decay.ndirty and
-	 * arena->ndirty to determine how many dirty pages, if any, were
-	 * generated.
+	 * extents_npages_get(&arena->extents_cached) to determine how many
+	 * dirty pages, if any, were generated.
 	 */
 	size_t			nunpurged;
 	/*
@@ -98,8 +98,8 @@ struct arena_bin_s {
 	 */
 	extent_heap_t		slabs_nonfull;
 
-	/* Ring sentinel used to track full slabs. */
-	extent_t		slabs_full;
+	/* List used to track full slabs. */
+	extent_list_t		slabs_full;
 
 	/* Bin statistics. */
 	malloc_bin_stats_t	stats;
@@ -107,84 +107,97 @@ struct arena_bin_s {
 
 struct arena_s {
 	/*
-	 * Number of threads currently assigned to this arena, synchronized via
-	 * atomic operations.  Each thread has two distinct assignments, one for
-	 * application-serving allocation, and the other for internal metadata
-	 * allocation.  Internal metadata must not be allocated from arenas
-	 * explicitly created via the arenas.create mallctl, because the
-	 * arena.<i>.reset mallctl indiscriminately discards all allocations for
-	 * the affected arena.
+	 * Number of threads currently assigned to this arena.  Each thread has
+	 * two distinct assignments, one for application-serving allocation, and
+	 * the other for internal metadata allocation.  Internal metadata must
+	 * not be allocated from arenas explicitly created via the arenas.create
+	 * mallctl, because the arena.<i>.reset mallctl indiscriminately
+	 * discards all allocations for the affected arena.
 	 *
 	 *   0: Application allocation.
 	 *   1: Internal metadata allocation.
+	 *
+	 * Synchronization: atomic.
 	 */
 	unsigned		nthreads[2];
 
 	/*
-	 * There are three classes of arena operations from a locking
-	 * perspective:
-	 * 1) Thread assignment (modifies nthreads) is synchronized via atomics.
-	 * 2) Bin-related operations are protected by bin locks.
-	 * 3) Extent-related operations are protected by this mutex.
+	 * Synchronizes various arena operations, as indicated in field-specific
+	 * comments.
 	 */
 	malloc_mutex_t		lock;
 
+	/* Synchronization: lock. */
 	arena_stats_t		stats;
 	/*
 	 * List of tcaches for extant threads associated with this arena.
 	 * Stats from these are merged incrementally, and at exit if
 	 * opt_stats_print is enabled.
+	 *
+	 * Synchronization: lock.
 	 */
 	ql_head(tcache_t)	tcache_ql;
 
+	/* Synchronization: lock. */
 	uint64_t		prof_accumbytes;
 
 	/*
 	 * PRNG state for cache index randomization of large allocation base
 	 * pointers.
+	 *
+	 * Synchronization: atomic.
 	 */
 	size_t			offset_state;
 
-	/* Extent serial number generator state. */
+	/*
+	 * Extent serial number generator state.
+	 *
+	 * Synchronization: atomic.
+	 */
 	size_t			extent_sn_next;
 
+	/* Synchronization: lock. */
 	dss_prec_t		dss_prec;
 
-	/* True if a thread is currently executing arena_purge_to_limit(). */
-	bool			purging;
+	/*
+	 * 1/0 (true/false) if a thread is currently executing
+	 * arena_purge_to_limit().
+	 *
+	 * Synchronization: atomic.
+	 */
+	unsigned		purging;
 
-	/* Number of pages in active extents. */
+	/*
+	 * Number of pages in active extents.
+	 *
+	 * Synchronization: atomic.
+	 */
 	size_t			nactive;
 
 	/*
-	 * Current count of pages within unused extents that are potentially
-	 * dirty, and for which pages_purge_*() has not been called.  By
-	 * tracking this, we can institute a limit on how much dirty unused
-	 * memory is mapped for each arena.
+	 * Decay-based purging state.
+	 *
+	 * Synchronization: lock.
 	 */
-	size_t			ndirty;
-
-	/* Decay-based purging state. */
 	arena_decay_t		decay;
 
-	/* Extant large allocations. */
-	ql_head(extent_t)	large;
+	/*
+	 * Extant large allocations.
+	 *
+	 * Synchronization: large_mtx.
+	 */
+	extent_list_t		large;
 	/* Synchronizes all large allocation/update/deallocation. */
 	malloc_mutex_t		large_mtx;
 
 	/*
-	 * Heaps of extents that were previously allocated.  These are used when
-	 * allocating extents, in an attempt to re-use address space.
+	 * Collections of extents that were previously allocated.  These are
+	 * used when allocating extents, in an attempt to re-use address space.
+	 *
+	 * Synchronization: internal.
 	 */
-	extent_heap_t		extents_cached[NPSIZES+1];
-	extent_heap_t		extents_retained[NPSIZES+1];
-	/*
-	 * Ring sentinel used to track unused dirty memory.  Dirty memory is
-	 * managed as an LRU of cached extents.
-	 */
-	extent_t		extents_dirty;
-	/* Protects extents_{cached,retained,dirty}. */
-	malloc_mutex_t		extents_mtx;
+	extents_t		extents_cached;
+	extents_t		extents_retained;
 
 	/*
 	 * Next extent size class in a growing series to use when satisfying a
@@ -192,17 +205,31 @@ struct arena_s {
 	 * the number of disjoint virtual memory ranges so that extent merging
 	 * can be effective even if multiple arenas' extent allocation requests
 	 * are highly interleaved.
+	 *
+	 * Synchronization: atomic.
 	 */
 	pszind_t		extent_grow_next;
 
-	/* Cache of extent structures that were allocated via base_alloc(). */
-	ql_head(extent_t)	extent_cache;
-	malloc_mutex_t		extent_cache_mtx;
+	/*
+	 * Freelist of extent structures that were allocated via base_alloc().
+	 *
+	 * Synchronization: extent_freelist_mtx.
+	 */
+	extent_list_t		extent_freelist;
+	malloc_mutex_t		extent_freelist_mtx;
 
-	/* bins is used to store heaps of free regions. */
+	/*
+	 * bins is used to store heaps of free regions.
+	 *
+	 * Synchronization: internal.
+	 */
 	arena_bin_t		bins[NBINS];
 
-	/* Base allocator, from which arena metadata are allocated. */
+	/*
+	 * Base allocator, from which arena metadata are allocated.
+	 *
+	 * Synchronization: internal.
+	 */
 	base_t			*base;
 };
 
diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index 59f3c7ca..a3556118 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -21,9 +21,13 @@ size_t	extent_size_quantize_ceil(size_t size);
 
 ph_proto(, extent_heap_, extent_heap_t, extent_t)
 
-extent_t	*extent_alloc_cache_locked(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
-    size_t alignment, bool *zero, bool *commit, bool slab);
+bool extents_init(tsdn_t *tsdn, extents_t *extents, extent_state_t state);
+extent_state_t extents_state_get(const extents_t *extents);
+size_t extents_npages_get(extents_t *extents);
+extent_t *extents_evict(tsdn_t *tsdn, extents_t *extents, size_t npages_min);
+void extents_prefork(tsdn_t *tsdn, extents_t *extents);
+void extents_postfork_parent(tsdn_t *tsdn, extents_t *extents);
+void extents_postfork_child(tsdn_t *tsdn, extents_t *extents);
 extent_t	*extent_alloc_cache(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
     size_t alignment, bool *zero, bool *commit, bool slab);
diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index 379dd290..473aad71 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -12,8 +12,7 @@ void	*extent_before_get(const extent_t *extent);
 void	*extent_last_get(const extent_t *extent);
 void	*extent_past_get(const extent_t *extent);
 size_t	extent_sn_get(const extent_t *extent);
-bool	extent_active_get(const extent_t *extent);
-bool	extent_retained_get(const extent_t *extent);
+extent_state_t	extent_state_get(const extent_t *extent);
 bool	extent_zeroed_get(const extent_t *extent);
 bool	extent_committed_get(const extent_t *extent);
 bool	extent_slab_get(const extent_t *extent);
@@ -26,16 +25,19 @@ void	extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment);
 void	extent_size_set(extent_t *extent, size_t size);
 void	extent_usize_set(extent_t *extent, size_t usize);
 void	extent_sn_set(extent_t *extent, size_t sn);
-void	extent_active_set(extent_t *extent, bool active);
+void	extent_state_set(extent_t *extent, extent_state_t state);
 void	extent_zeroed_set(extent_t *extent, bool zeroed);
 void	extent_committed_set(extent_t *extent, bool committed);
 void	extent_slab_set(extent_t *extent, bool slab);
 void	extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx);
 void	extent_init(extent_t *extent, arena_t *arena, void *addr,
-    size_t size, size_t usize, size_t sn, bool active, bool zeroed,
+    size_t size, size_t usize, size_t sn, extent_state_t state, bool zeroed,
     bool committed, bool slab);
-void	extent_ring_insert(extent_t *sentinel, extent_t *extent);
-void	extent_ring_remove(extent_t *extent);
+void extent_list_init(extent_list_t *list);
+extent_t *extent_list_first(const extent_list_t *list);
+extent_t *extent_list_last(const extent_list_t *list);
+void extent_list_append(extent_list_t *list, extent_t *extent);
+void extent_list_remove(extent_list_t *list, extent_t *extent);
 int	extent_sn_comp(const extent_t *a, const extent_t *b);
 int	extent_ad_comp(const extent_t *a, const extent_t *b);
 int	extent_snad_comp(const extent_t *a, const extent_t *b);
@@ -103,14 +105,9 @@ extent_sn_get(const extent_t *extent) {
 	return extent->e_sn;
 }
 
-JEMALLOC_INLINE bool
-extent_active_get(const extent_t *extent) {
-	return extent->e_active;
-}
-
-JEMALLOC_INLINE bool
-extent_retained_get(const extent_t *extent) {
-	return (qr_next(extent, qr_link) == extent);
+JEMALLOC_INLINE extent_state_t
+extent_state_get(const extent_t *extent) {
+	return extent->e_state;
 }
 
 JEMALLOC_INLINE bool
@@ -191,8 +188,8 @@ extent_sn_set(extent_t *extent, size_t sn) {
 }
 
 JEMALLOC_INLINE void
-extent_active_set(extent_t *extent, bool active) {
-	extent->e_active = active;
+extent_state_set(extent_t *extent, extent_state_t state) {
+	extent->e_state = state;
 }
 
 JEMALLOC_INLINE void
@@ -217,7 +214,7 @@ extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx) {
 
 JEMALLOC_INLINE void
 extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
-    size_t usize, size_t sn, bool active, bool zeroed, bool committed,
+    size_t usize, size_t sn, extent_state_t state, bool zeroed, bool committed,
     bool slab) {
 	assert(addr == PAGE_ADDR2BASE(addr) || !slab);
 
@@ -226,24 +223,39 @@ extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
 	extent_size_set(extent, size);
 	extent_usize_set(extent, usize);
 	extent_sn_set(extent, sn);
-	extent_active_set(extent, active);
+	extent_state_set(extent, state);
 	extent_zeroed_set(extent, zeroed);
 	extent_committed_set(extent, committed);
 	extent_slab_set(extent, slab);
 	if (config_prof) {
 		extent_prof_tctx_set(extent, NULL);
 	}
-	qr_new(extent, qr_link);
+	ql_elm_new(extent, ql_link);
 }
 
 JEMALLOC_INLINE void
-extent_ring_insert(extent_t *sentinel, extent_t *extent) {
-	qr_meld(sentinel, extent, extent_t, qr_link);
+extent_list_init(extent_list_t *list) {
+	ql_new(list);
+}
+
+JEMALLOC_INLINE extent_t *
+extent_list_first(const extent_list_t *list) {
+	return ql_first(list);
+}
+
+JEMALLOC_INLINE extent_t *
+extent_list_last(const extent_list_t *list) {
+	return ql_last(list, ql_link);
 }
 
 JEMALLOC_INLINE void
-extent_ring_remove(extent_t *extent) {
-	qr_remove(extent, qr_link);
+extent_list_append(extent_list_t *list, extent_t *extent) {
+	ql_tail_insert(list, extent, ql_link);
+}
+
+JEMALLOC_INLINE void
+extent_list_remove(extent_list_t *list, extent_t *extent) {
+	ql_remove(list, extent, ql_link);
 }
 
 JEMALLOC_INLINE int
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index de31317c..33ca4ac7 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -1,6 +1,12 @@
 #ifndef JEMALLOC_INTERNAL_EXTENT_STRUCTS_H
 #define JEMALLOC_INTERNAL_EXTENT_STRUCTS_H
 
+typedef enum {
+	extent_state_active   = 0,
+	extent_state_dirty    = 1,
+	extent_state_retained = 2
+} extent_state_t;
+
 /* Extent (span of pages).  Use accessor functions for e_* fields. */
 struct extent_s {
 	/* Arena from which this extent came, if any. */
@@ -32,8 +38,8 @@ struct extent_s {
 	 */
 	size_t			e_sn;
 
-	/* True if extent is active (in use). */
-	bool			e_active;
+	/* Extent state. */
+	extent_state_t		e_state;
 
 	/*
 	 * The zeroed flag is used by extent recycling code to track whether
@@ -67,18 +73,48 @@ struct extent_s {
 	};
 
 	/*
-	 * Linkage for arena's extents_dirty and arena_bin_t's slabs_full rings.
+	 * List linkage, used by a variety of lists:
+	 * - arena_bin_t's slabs_full
+	 * - extents_t's LRU
+	 * - stashed dirty extents
+	 * - arena's large allocations
+	 * - arena's extent structure freelist
 	 */
-	qr(extent_t)		qr_link;
+	ql_elm(extent_t)	ql_link;
 
-	union {
-		/* Linkage for per size class sn/address-ordered heaps. */
-		phn(extent_t)		ph_link;
-
-		/* Linkage for arena's large and extent_cache lists. */
-		ql_elm(extent_t)	ql_link;
-	};
+	/* Linkage for per size class sn/address-ordered heaps. */
+	phn(extent_t)		ph_link;
 };
+typedef ql_head(extent_t) extent_list_t;
 typedef ph(extent_t) extent_heap_t;
 
+/* Quantized collection of extents, with built-in LRU queue. */
+struct extents_s {
+	malloc_mutex_t		mtx;
+
+	/*
+	 * Quantized per size class heaps of extents.
+	 *
+	 * Synchronization: mtx.
+	 */
+	extent_heap_t		heaps[NPSIZES+1];
+
+	/*
+	 * LRU of all extents in heaps.
+	 *
+	 * Synchronization: mtx.
+	 */
+	extent_list_t		lru;
+
+	/*
+	 * Page sum for all extents in heaps.
+	 *
+	 * Synchronization: atomic.
+	 */
+	size_t			npages;
+
+	/* All stored extents must be in the same state. */
+	extent_state_t		state;
+};
+
 #endif /* JEMALLOC_INTERNAL_EXTENT_STRUCTS_H */
diff --git a/include/jemalloc/internal/extent_types.h b/include/jemalloc/internal/extent_types.h
index 53db1c36..b6905ce1 100644
--- a/include/jemalloc/internal/extent_types.h
+++ b/include/jemalloc/internal/extent_types.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_EXTENT_TYPES_H
 
 typedef struct extent_s extent_t;
+typedef struct extents_s extents_t;
 
 #define EXTENT_HOOKS_INITIALIZER	NULL
 
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 33fd2fac..bace9c46 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -979,6 +979,7 @@ iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
 	assert(!is_internal || tcache == NULL);
 	assert(!is_internal || arena == NULL || arena_ind_get(arena) <
 	    narenas_auto);
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 
 	ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path);
 	if (config_stats && is_internal && likely(ret != NULL)) {
@@ -1004,6 +1005,7 @@ ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
 	assert(!is_internal || tcache == NULL);
 	assert(!is_internal || arena == NULL || arena_ind_get(arena) <
 	    narenas_auto);
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 
 	ret = arena_palloc(tsdn, arena, usize, alignment, zero, tcache);
 	assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret);
@@ -1042,7 +1044,7 @@ ivsalloc(tsdn_t *tsdn, const void *ptr) {
 	if (extent == NULL) {
 		return 0;
 	}
-	assert(extent_active_get(extent));
+	assert(extent_state_get(extent) == extent_state_active);
 	/* Only slab members should be looked up via interior pointers. */
 	assert(extent_addr_get(extent) == ptr || extent_slab_get(extent));
 
@@ -1056,6 +1058,7 @@ idalloctm(tsdn_t *tsdn, extent_t *extent, void *ptr, tcache_t *tcache,
 	assert(!is_internal || tcache == NULL);
 	assert(!is_internal || arena_ind_get(iaalloc(tsdn, ptr)) <
 	    narenas_auto);
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 	if (config_stats && is_internal) {
 		arena_internal_sub(iaalloc(tsdn, ptr), isalloc(tsdn, extent,
 		    ptr));
@@ -1073,6 +1076,7 @@ idalloc(tsd_t *tsd, extent_t *extent, void *ptr) {
 JEMALLOC_ALWAYS_INLINE void
 isdalloct(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t size,
     tcache_t *tcache, bool slow_path) {
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 	arena_sdalloc(tsdn, extent, ptr, size, tcache, slow_path);
 }
 
@@ -1080,6 +1084,7 @@ JEMALLOC_ALWAYS_INLINE void *
 iralloct_realign(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize,
     size_t size, size_t extra, size_t alignment, bool zero, tcache_t *tcache,
     arena_t *arena) {
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 	void *p;
 	size_t usize, copysize;
 
@@ -1117,6 +1122,7 @@ iralloct(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize, size_t size,
     size_t alignment, bool zero, tcache_t *tcache, arena_t *arena) {
 	assert(ptr != NULL);
 	assert(size != 0);
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 
 	if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
 	    != 0) {
@@ -1144,6 +1150,7 @@ ixalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize, size_t size,
     size_t extra, size_t alignment, bool zero) {
 	assert(ptr != NULL);
 	assert(size != 0);
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 
 	if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
 	    != 0) {
diff --git a/include/jemalloc/internal/large_externs.h b/include/jemalloc/internal/large_externs.h
index f0a03399..66aa755c 100644
--- a/include/jemalloc/internal/large_externs.h
+++ b/include/jemalloc/internal/large_externs.h
@@ -17,7 +17,8 @@ extern large_dalloc_maybe_junk_t *large_dalloc_maybe_junk;
 void	large_dalloc_junk(void *ptr, size_t usize);
 void	large_dalloc_maybe_junk(void *ptr, size_t usize);
 #endif
-void	large_dalloc_junked_locked(tsdn_t *tsdn, extent_t *extent);
+void	large_dalloc_prep_junked_locked(tsdn_t *tsdn, extent_t *extent);
+void	large_dalloc_finish(tsdn_t *tsdn, extent_t *extent);
 void	large_dalloc(tsdn_t *tsdn, extent_t *extent);
 size_t	large_salloc(tsdn_t *tsdn, const extent_t *extent);
 prof_tctx_t	*large_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent);
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 36bcda24..d1166b20 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -25,11 +25,9 @@ arena_destroy
 arena_dss_prec_get
 arena_dss_prec_set
 arena_extent_alloc_large
-arena_extent_cache_alloc
 arena_extent_cache_dalloc
-arena_extent_cache_maybe_insert
-arena_extent_cache_maybe_remove
-arena_extent_dalloc_large
+arena_extent_dalloc_large_finish
+arena_extent_dalloc_large_prep
 arena_extent_ralloc_large_expand
 arena_extent_ralloc_large_shrink
 arena_extent_sn_next
@@ -141,15 +139,12 @@ ctl_postfork_parent
 ctl_prefork
 decay_ticker_get
 dss_prec_names
-extent_active_get
-extent_active_set
 extent_ad_comp
 extent_addr_get
 extent_addr_randomize
 extent_addr_set
 extent_alloc
 extent_alloc_cache
-extent_alloc_cache_locked
 extent_alloc_dss
 extent_alloc_mmap
 extent_alloc_wrapper
@@ -184,6 +179,10 @@ extent_hooks_set
 extent_in_dss
 extent_init
 extent_last_get
+extent_list_append
+extent_list_first
+extent_list_last
+extent_list_remove
 extent_lookup
 extent_merge_wrapper
 extent_past_get
@@ -191,9 +190,6 @@ extent_prof_tctx_get
 extent_prof_tctx_set
 extent_purge_forced_wrapper
 extent_purge_lazy_wrapper
-extent_retained_get
-extent_ring_insert
-extent_ring_remove
 extent_size_get
 extent_size_quantize_ceil
 extent_size_quantize_floor
@@ -207,11 +203,20 @@ extent_sn_get
 extent_sn_set
 extent_snad_comp
 extent_split_wrapper
+extent_state_get
+extent_state_set
 extent_usize_get
 extent_usize_set
 extent_zeroed_get
 extent_zeroed_set
+extents_evict
+extents_init
+extents_npages_get
+extents_prefork
+extents_postfork_child
+extents_postfork_parent
 extents_rtree
+extents_state_get
 ffs_llu
 ffs_lu
 ffs_u
@@ -255,9 +260,10 @@ jemalloc_postfork_child
 jemalloc_postfork_parent
 jemalloc_prefork
 large_dalloc
+large_dalloc_finish
 large_dalloc_junk
-large_dalloc_junked_locked
 large_dalloc_maybe_junk
+large_dalloc_prep_junked_locked
 large_malloc
 large_palloc
 large_prof_tctx_get
diff --git a/include/jemalloc/internal/stats_structs.h b/include/jemalloc/internal/stats_structs.h
index 32ef6118..5cdb0cd9 100644
--- a/include/jemalloc/internal/stats_structs.h
+++ b/include/jemalloc/internal/stats_structs.h
@@ -70,9 +70,14 @@ struct malloc_large_stats_s {
 	size_t		curlextents;
 };
 
+/*
+ * Arena stats.  Note that fields marked "derived" are not directly maintained
+ * within the arena code; rather their values are derived during stats merge
+ * requests.
+ */
 struct arena_stats_s {
-	/* Number of bytes currently mapped. */
-	size_t		mapped;
+	/* Number of bytes currently mapped, excluding retained memory. */
+	size_t		mapped; /* Derived. */
 
 	/*
 	 * Number of bytes currently retained as a side effect of munmap() being
@@ -80,7 +85,7 @@ struct arena_stats_s {
 	 * always decommitted or purged), but they are excluded from the mapped
 	 * statistic (above).
 	 */
-	size_t		retained;
+	size_t		retained; /* Derived. */
 
 	/*
 	 * Total number of purge sweeps, total number of madvise calls made,
@@ -91,9 +96,9 @@ struct arena_stats_s {
 	uint64_t	nmadvise;
 	uint64_t	purged;
 
-	size_t		base;
+	size_t		base; /* Derived. */
 	size_t		internal; /* Protected via atomic_*_zu(). */
-	size_t		resident;
+	size_t		resident; /* Derived. */
 
 	size_t		allocated_large;
 	uint64_t	nmalloc_large;
@@ -101,7 +106,7 @@ struct arena_stats_s {
 	uint64_t	nrequests_large;
 
 	/* Number of bytes cached in tcache associated with this arena. */
-	size_t		tcache_bytes;
+	size_t		tcache_bytes; /* Derived. */
 
 	/* One element for each large size class. */
 	malloc_large_stats_t	lstats[NSIZES - NBINS];
diff --git a/include/jemalloc/internal/witness_types.h b/include/jemalloc/internal/witness_types.h
index dfcf1621..29299168 100644
--- a/include/jemalloc/internal/witness_types.h
+++ b/include/jemalloc/internal/witness_types.h
@@ -26,9 +26,17 @@ typedef int witness_comp_t (const witness_t *, void *, const witness_t *,
 #define WITNESS_RANK_PROF_TDATA		7U
 #define WITNESS_RANK_PROF_GCTX		8U
 
+/*
+ * Used as an argument to witness_depth_to_rank() in order to validate depth
+ * excluding non-core locks with lower ranks.  Since the rank argument to
+ * witness_depth_to_rank() is inclusive rather than exclusive, this definition
+ * can have the same value as the minimally ranked core lock.
+ */
+#define WITNESS_RANK_CORE		9U
+
 #define WITNESS_RANK_ARENA		9U
-#define WITNESS_RANK_ARENA_EXTENTS	10U
-#define WITNESS_RANK_ARENA_EXTENT_CACHE	11U
+#define WITNESS_RANK_EXTENTS		10U
+#define WITNESS_RANK_EXTENT_FREELIST	11U
 
 #define WITNESS_RANK_RTREE_ELM		12U
 #define WITNESS_RANK_RTREE		13U
diff --git a/src/arena.c b/src/arena.c
index b0da9a03..5905306c 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -37,75 +37,13 @@ static void	arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena,
 
 /******************************************************************************/
 
-static size_t
-arena_extent_dirty_npages(const extent_t *extent) {
-	return (extent_size_get(extent) >> LG_PAGE);
-}
-
-static extent_t *
-arena_extent_cache_alloc_locked(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
-    size_t alignment, bool *zero, bool slab) {
-	bool commit = true;
-
-	malloc_mutex_assert_owner(tsdn, &arena->lock);
-
-	return extent_alloc_cache(tsdn, arena, r_extent_hooks, new_addr, usize,
-	    pad, alignment, zero, &commit, slab);
-}
-
-extent_t *
-arena_extent_cache_alloc(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, void *new_addr, size_t size,
-    size_t alignment, bool *zero) {
-	extent_t *extent;
-
-	malloc_mutex_lock(tsdn, &arena->lock);
-	extent = arena_extent_cache_alloc_locked(tsdn, arena, r_extent_hooks,
-	    new_addr, size, 0, alignment, zero, false);
-	malloc_mutex_unlock(tsdn, &arena->lock);
-
-	return extent;
-}
-
-static void
-arena_extent_cache_dalloc_locked(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent) {
-	malloc_mutex_assert_owner(tsdn, &arena->lock);
-
-	extent_dalloc_cache(tsdn, arena, r_extent_hooks, extent);
-	arena_maybe_purge(tsdn, arena);
-}
-
 void
 arena_extent_cache_dalloc(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent) {
-	malloc_mutex_lock(tsdn, &arena->lock);
-	arena_extent_cache_dalloc_locked(tsdn, arena, r_extent_hooks, extent);
-	malloc_mutex_unlock(tsdn, &arena->lock);
-}
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 
-void
-arena_extent_cache_maybe_insert(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
-    bool cache) {
-	malloc_mutex_assert_owner(tsdn, &arena->extents_mtx);
-
-	if (cache) {
-		extent_ring_insert(&arena->extents_dirty, extent);
-		arena->ndirty += arena_extent_dirty_npages(extent);
-	}
-}
-
-void
-arena_extent_cache_maybe_remove(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
-    bool dirty) {
-	malloc_mutex_assert_owner(tsdn, &arena->extents_mtx);
-
-	if (dirty) {
-		extent_ring_remove(extent);
-		assert(arena->ndirty >= arena_extent_dirty_npages(extent));
-		arena->ndirty -= arena_extent_dirty_npages(extent);
-	}
+	extent_dalloc_cache(tsdn, arena, r_extent_hooks, extent);
+	arena_purge(tsdn, arena, false);
 }
 
 JEMALLOC_INLINE_C void *
@@ -180,13 +118,13 @@ arena_slab_reg_dalloc(tsdn_t *tsdn, extent_t *slab,
 
 static void
 arena_nactive_add(arena_t *arena, size_t add_pages) {
-	arena->nactive += add_pages;
+	atomic_add_zu(&arena->nactive, add_pages);
 }
 
 static void
 arena_nactive_sub(arena_t *arena, size_t sub_pages) {
-	assert(arena->nactive >= sub_pages);
-	arena->nactive -= sub_pages;
+	assert(atomic_read_zu(&arena->nactive) >= sub_pages);
+	atomic_sub_zu(&arena->nactive, sub_pages);
 }
 
 static void
@@ -269,6 +207,8 @@ arena_extent_alloc_large_hard(tsdn_t *tsdn, arena_t *arena,
 	extent_t *extent;
 	bool commit = true;
 
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+
 	extent = extent_alloc_wrapper(tsdn, arena, r_extent_hooks, NULL, usize,
 	    large_pad, alignment, zero, &commit, false);
 	if (extent == NULL) {
@@ -291,6 +231,8 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	extent_t *extent;
 	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
 
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+
 	malloc_mutex_lock(tsdn, &arena->lock);
 
 	/* Optimistically update stats. */
@@ -300,9 +242,11 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	}
 	arena_nactive_add(arena, (usize + large_pad) >> LG_PAGE);
 
-	extent = arena_extent_cache_alloc_locked(tsdn, arena, &extent_hooks,
-	    NULL, usize, large_pad, alignment, zero, false);
 	malloc_mutex_unlock(tsdn, &arena->lock);
+
+	bool commit = true;
+	extent = extent_alloc_cache(tsdn, arena, &extent_hooks, NULL, usize,
+	    large_pad, alignment, zero, &commit, false);
 	if (extent == NULL) {
 		extent = arena_extent_alloc_large_hard(tsdn, arena,
 		    &extent_hooks, usize, alignment, zero);
@@ -312,10 +256,8 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 }
 
 void
-arena_extent_dalloc_large(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
     bool locked) {
-	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
-
 	if (!locked) {
 		malloc_mutex_lock(tsdn, &arena->lock);
 	} else {
@@ -326,12 +268,17 @@ arena_extent_dalloc_large(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 		    extent_usize_get(extent));
 		arena->stats.mapped -= extent_size_get(extent);
 	}
-	arena_nactive_sub(arena, extent_size_get(extent) >> LG_PAGE);
-
-	arena_extent_cache_dalloc_locked(tsdn, arena, &extent_hooks, extent);
 	if (!locked) {
 		malloc_mutex_unlock(tsdn, &arena->lock);
 	}
+	arena_nactive_sub(arena, extent_size_get(extent) >> LG_PAGE);
+}
+
+void
+arena_extent_dalloc_large_finish(tsdn_t *tsdn, arena_t *arena,
+    extent_t *extent) {
+	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
+	extent_dalloc_cache(tsdn, arena, &extent_hooks, extent);
 }
 
 void
@@ -414,8 +361,9 @@ arena_decay_backlog_npages_limit(const arena_t *arena) {
 
 static void
 arena_decay_backlog_update_last(arena_t *arena) {
-	size_t ndirty_delta = (arena->ndirty > arena->decay.nunpurged) ?
-	    arena->ndirty - arena->decay.nunpurged : 0;
+	size_t ndirty = extents_npages_get(&arena->extents_cached);
+	size_t ndirty_delta = (ndirty > arena->decay.nunpurged) ? ndirty -
+	    arena->decay.nunpurged : 0;
 	arena->decay.backlog[SMOOTHSTEP_NSTEPS-1] = ndirty_delta;
 }
 
@@ -468,10 +416,15 @@ static void
 arena_decay_epoch_advance_purge(tsdn_t *tsdn, arena_t *arena) {
 	size_t ndirty_limit = arena_decay_backlog_npages_limit(arena);
 
-	if (arena->ndirty > ndirty_limit) {
+	if (extents_npages_get(&arena->extents_cached) > ndirty_limit) {
 		arena_purge_to_limit(tsdn, arena, ndirty_limit);
 	}
-	arena->decay.nunpurged = arena->ndirty;
+	/*
+	 * There may be concurrent ndirty fluctuation between the purge above
+	 * and the nunpurged update below, but this is inconsequential to decay
+	 * machinery correctness.
+	 */
+	arena->decay.nunpurged = extents_npages_get(&arena->extents_cached);
 }
 
 static void
@@ -492,7 +445,7 @@ arena_decay_init(arena_t *arena, ssize_t decay_time) {
 	nstime_update(&arena->decay.epoch);
 	arena->decay.jitter_state = (uint64_t)(uintptr_t)arena;
 	arena_decay_deadline_init(arena);
-	arena->decay.nunpurged = arena->ndirty;
+	arena->decay.nunpurged = extents_npages_get(&arena->extents_cached);
 	memset(arena->decay.backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t));
 }
 
@@ -540,9 +493,9 @@ arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time) {
 	return false;
 }
 
-static void
-arena_maybe_purge_helper(tsdn_t *tsdn, arena_t *arena) {
-	nstime_t time;
+void
+arena_maybe_purge(tsdn_t *tsdn, arena_t *arena) {
+	malloc_mutex_assert_owner(tsdn, &arena->lock);
 
 	/* Purge all or nothing if the option is disabled. */
 	if (arena->decay.time <= 0) {
@@ -552,6 +505,7 @@ arena_maybe_purge_helper(tsdn_t *tsdn, arena_t *arena) {
 		return;
 	}
 
+	nstime_t time;
 	nstime_init(&time, 0);
 	nstime_update(&time);
 	if (unlikely(!nstime_monotonic() && nstime_compare(&arena->decay.epoch,
@@ -583,95 +537,40 @@ arena_maybe_purge_helper(tsdn_t *tsdn, arena_t *arena) {
 	}
 }
 
-void
-arena_maybe_purge(tsdn_t *tsdn, arena_t *arena) {
-	malloc_mutex_assert_owner(tsdn, &arena->lock);
-
-	/* Don't recursively purge. */
-	if (arena->purging) {
-		return;
-	}
-
-	arena_maybe_purge_helper(tsdn, arena);
-}
-
-static size_t
-arena_dirty_count(tsdn_t *tsdn, arena_t *arena) {
-	extent_t *extent;
-	size_t ndirty = 0;
-
-	malloc_mutex_lock(tsdn, &arena->extents_mtx);
-
-	for (extent = qr_next(&arena->extents_dirty, qr_link); extent !=
-	    &arena->extents_dirty; extent = qr_next(extent, qr_link)) {
-		ndirty += extent_size_get(extent) >> LG_PAGE;
-	}
-
-	malloc_mutex_unlock(tsdn, &arena->extents_mtx);
-
-	return ndirty;
-}
-
 static size_t
 arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    size_t ndirty_limit, extent_t *purge_extents_sentinel) {
-	extent_t *extent, *next;
-	size_t nstashed = 0;
-
-	malloc_mutex_lock(tsdn, &arena->extents_mtx);
+    size_t ndirty_limit, extent_list_t *purge_extents) {
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 
 	/* Stash extents according to ndirty_limit. */
-	for (extent = qr_next(&arena->extents_dirty, qr_link); extent !=
-	    &arena->extents_dirty; extent = next) {
-		size_t npages;
-		bool zero, commit;
-		UNUSED extent_t *textent;
-
-		npages = extent_size_get(extent) >> LG_PAGE;
-		if (arena->ndirty - (nstashed + npages) < ndirty_limit) {
-			break;
-		}
-
-		next = qr_next(extent, qr_link);
-		/* Allocate. */
-		zero = false;
-		commit = false;
-		textent = extent_alloc_cache_locked(tsdn, arena, r_extent_hooks,
-		    extent_base_get(extent), extent_size_get(extent), 0, PAGE,
-		    &zero, &commit, false);
-		assert(textent == extent);
-		assert(zero == extent_zeroed_get(extent));
-		extent_ring_remove(extent);
-		extent_ring_insert(purge_extents_sentinel, extent);
-
-		nstashed += npages;
+	size_t nstashed = 0;
+	for (extent_t *extent = extents_evict(tsdn, &arena->extents_cached,
+	    ndirty_limit); extent != NULL; extent = extents_evict(tsdn,
+	    &arena->extents_cached, ndirty_limit)) {
+		extent_list_append(purge_extents, extent);
+		nstashed += extent_size_get(extent) >> LG_PAGE;
 	}
-
-	malloc_mutex_unlock(tsdn, &arena->extents_mtx);
 	return nstashed;
 }
 
 static size_t
 arena_purge_stashed(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *purge_extents_sentinel) {
+    extent_hooks_t **r_extent_hooks, extent_list_t *purge_extents) {
 	UNUSED size_t nmadvise;
 	size_t npurged;
-	extent_t *extent, *next;
 
 	if (config_stats) {
 		nmadvise = 0;
 	}
 	npurged = 0;
 
-	for (extent = qr_next(purge_extents_sentinel, qr_link); extent !=
-	    purge_extents_sentinel; extent = next) {
+	for (extent_t *extent = extent_list_first(purge_extents); extent !=
+	    NULL; extent = extent_list_first(purge_extents)) {
 		if (config_stats) {
 			nmadvise++;
 		}
 		npurged += extent_size_get(extent) >> LG_PAGE;
-
-		next = qr_next(extent, qr_link);
-		extent_ring_remove(extent);
+		extent_list_remove(purge_extents, extent);
 		extent_dalloc_wrapper(tsdn, arena, r_extent_hooks, extent);
 	}
 
@@ -684,43 +583,44 @@ arena_purge_stashed(tsdn_t *tsdn, arena_t *arena,
 }
 
 /*
- *   ndirty_limit: Purge as many dirty extents as possible without violating the
- *   invariant: (arena->ndirty >= ndirty_limit)
+ * ndirty_limit: Purge as many dirty extents as possible without violating the
+ * invariant: (extents_npages_get(&arena->extents_cached) >= ndirty_limit)
  */
 static void
 arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, size_t ndirty_limit) {
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 1);
+	malloc_mutex_assert_owner(tsdn, &arena->lock);
+
+	if (atomic_cas_u(&arena->purging, 0, 1)) {
+		return;
+	}
+
 	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
 	size_t npurge, npurged;
-	extent_t purge_extents_sentinel;
+	extent_list_t purge_extents;
 
-	arena->purging = true;
+	extent_list_init(&purge_extents);
 
-	/*
-	 * Calls to arena_dirty_count() are disabled even for debug builds
-	 * because overhead grows nonlinearly as memory usage increases.
-	 */
-	if (false && config_debug) {
-		size_t ndirty = arena_dirty_count(tsdn, arena);
-		assert(ndirty == arena->ndirty);
-	}
-	extent_init(&purge_extents_sentinel, arena, NULL, 0, 0, 0, false, false,
-	    false, false);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 
 	npurge = arena_stash_dirty(tsdn, arena, &extent_hooks, ndirty_limit,
-	    &purge_extents_sentinel);
+	    &purge_extents);
 	if (npurge == 0) {
+		malloc_mutex_lock(tsdn, &arena->lock);
 		goto label_return;
 	}
 	npurged = arena_purge_stashed(tsdn, arena, &extent_hooks,
-	    &purge_extents_sentinel);
+	    &purge_extents);
 	assert(npurged == npurge);
 
+	malloc_mutex_lock(tsdn, &arena->lock);
+
 	if (config_stats) {
 		arena->stats.npurge++;
 	}
 
 label_return:
-	arena->purging = false;
+	atomic_write_u(&arena->purging, 0);
 }
 
 void
@@ -737,9 +637,14 @@ arena_purge(tsdn_t *tsdn, arena_t *arena, bool all) {
 static void
 arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *slab) {
 	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
+	size_t npages = extent_size_get(slab) >> LG_PAGE;
 
-	arena_nactive_sub(arena, extent_size_get(slab) >> LG_PAGE);
-	arena_extent_cache_dalloc_locked(tsdn, arena, &extent_hooks, slab);
+	extent_dalloc_cache(tsdn, arena, &extent_hooks, slab);
+
+	arena_nactive_sub(arena, npages);
+	malloc_mutex_lock(tsdn, &arena->lock);
+	arena_maybe_purge(tsdn, arena);
+	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
 static void
@@ -768,19 +673,16 @@ arena_bin_slabs_nonfull_tryget(arena_bin_t *bin) {
 static void
 arena_bin_slabs_full_insert(arena_bin_t *bin, extent_t *slab) {
 	assert(extent_slab_data_get(slab)->nfree == 0);
-	extent_ring_insert(&bin->slabs_full, slab);
+	extent_list_append(&bin->slabs_full, slab);
 }
 
 static void
-arena_bin_slabs_full_remove(extent_t *slab) {
-	extent_ring_remove(slab);
+arena_bin_slabs_full_remove(arena_bin_t *bin, extent_t *slab) {
+	extent_list_remove(&bin->slabs_full, slab);
 }
 
 void
 arena_reset(tsd_t *tsd, arena_t *arena) {
-	unsigned i;
-	extent_t *extent;
-
 	/*
 	 * Locking in this function is unintuitive.  The caller guarantees that
 	 * no concurrent operations are happening in this arena, but there are
@@ -797,8 +699,9 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 
 	/* Large allocations. */
 	malloc_mutex_lock(tsd_tsdn(tsd), &arena->large_mtx);
-	for (extent = ql_last(&arena->large, ql_link); extent != NULL; extent =
-	    ql_last(&arena->large, ql_link)) {
+
+	for (extent_t *extent = extent_list_first(&arena->large); extent !=
+	    NULL; extent = extent_list_first(&arena->large)) {
 		void *ptr = extent_base_get(extent);
 		size_t usize;
 
@@ -819,10 +722,8 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->large_mtx);
 
-	malloc_mutex_lock(tsd_tsdn(tsd), &arena->lock);
-
 	/* Bins. */
-	for (i = 0; i < NBINS; i++) {
+	for (unsigned i = 0; i < NBINS; i++) {
 		extent_t *slab;
 		arena_bin_t *bin = &arena->bins[i];
 		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
@@ -839,10 +740,9 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 			arena_slab_dalloc(tsd_tsdn(tsd), arena, slab);
 			malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 		}
-		for (slab = qr_next(&bin->slabs_full, qr_link); slab !=
-		    &bin->slabs_full; slab = qr_next(&bin->slabs_full,
-		    qr_link)) {
-			arena_bin_slabs_full_remove(slab);
+		for (slab = extent_list_first(&bin->slabs_full); slab != NULL;
+		    slab = extent_list_first(&bin->slabs_full)) {
+			arena_bin_slabs_full_remove(bin, slab);
 			malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 			arena_slab_dalloc(tsd_tsdn(tsd), arena, slab);
 			malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
@@ -854,17 +754,12 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 	}
 
-	assert(!arena->purging);
-	arena->nactive = 0;
-
-	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock);
+	assert(atomic_read_u(&arena->purging) == 0);
+	atomic_write_zu(&arena->nactive, 0);
 }
 
 static void
 arena_destroy_retained(tsdn_t *tsdn, arena_t *arena) {
-	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
-	size_t i;
-
 	/*
 	 * Iterate over the retained extents and blindly attempt to deallocate
 	 * them.  This gives the extent allocator underlying the extent hooks an
@@ -876,15 +771,11 @@ arena_destroy_retained(tsdn_t *tsdn, arena_t *arena) {
 	 * dss for arenas to be destroyed), or provide custom extent hooks that
 	 * either unmap retained extents or track them for later use.
 	 */
-	for (i = 0; i < sizeof(arena->extents_retained)/sizeof(extent_heap_t);
-	    i++) {
-		extent_heap_t *extents = &arena->extents_retained[i];
-		extent_t *extent;
-
-		while ((extent = extent_heap_remove_first(extents)) != NULL) {
-			extent_dalloc_wrapper_try(tsdn, arena, &extent_hooks,
-			    extent);
-		}
+	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
+	for (extent_t *extent = extents_evict(tsdn, &arena->extents_retained,
+	    0); extent != NULL; extent = extents_evict(tsdn,
+	    &arena->extents_retained, 0)) {
+		extent_dalloc_wrapper_try(tsdn, arena, &extent_hooks, extent);
 	}
 }
 
@@ -899,7 +790,7 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
 	 * Furthermore, the caller (arena_i_destroy_ctl()) purged all cached
 	 * extents, so only retained extents may remain.
 	 */
-	assert(arena->ndirty == 0);
+	assert(extents_npages_get(&arena->extents_cached) == 0);
 
 	/* Attempt to deallocate retained memory. */
 	arena_destroy_retained(tsd_tsdn(tsd), arena);
@@ -929,12 +820,12 @@ arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena,
 	extent_t *slab;
 	bool zero, commit;
 
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+
 	zero = false;
 	commit = true;
-	malloc_mutex_unlock(tsdn, &arena->lock);
 	slab = extent_alloc_wrapper(tsdn, arena, r_extent_hooks, NULL,
 	    bin_info->slab_size, 0, PAGE, &zero, &commit, true);
-	malloc_mutex_lock(tsdn, &arena->lock);
 
 	return slab;
 }
@@ -942,13 +833,13 @@ arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena,
 static extent_t *
 arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind,
     const arena_bin_info_t *bin_info) {
-	extent_t *slab;
-	arena_slab_data_t *slab_data;
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+
 	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
 	bool zero = false;
-
-	slab = arena_extent_cache_alloc_locked(tsdn, arena, &extent_hooks, NULL,
-	    bin_info->slab_size, 0, PAGE, &zero, true);
+	bool commit = true;
+	extent_t *slab = extent_alloc_cache(tsdn, arena, &extent_hooks, NULL,
+	    bin_info->slab_size, 0, PAGE, &zero, &commit, true);
 	if (slab == NULL) {
 		slab = arena_slab_alloc_hard(tsdn, arena, &extent_hooks,
 		    bin_info);
@@ -958,10 +849,12 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 	}
 	assert(extent_slab_get(slab));
 
+	malloc_mutex_lock(tsdn, &arena->lock);
+
 	arena_nactive_add(arena, extent_size_get(slab) >> LG_PAGE);
 
 	/* Initialize slab internals. */
-	slab_data = extent_slab_data_get(slab);
+	arena_slab_data_t *slab_data = extent_slab_data_get(slab);
 	slab_data->binind = binind;
 	slab_data->nfree = bin_info->nregs;
 	bitmap_init(slab_data->bitmap, &bin_info->bitmap_info);
@@ -969,6 +862,7 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 	if (config_stats) {
 		arena->stats.mapped += extent_size_get(slab);
 	}
+	malloc_mutex_unlock(tsdn, &arena->lock);
 
 	return slab;
 }
@@ -991,9 +885,7 @@ arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin,
 	/* Allocate a new slab. */
 	malloc_mutex_unlock(tsdn, &bin->lock);
 	/******************************/
-	malloc_mutex_lock(tsdn, &arena->lock);
 	slab = arena_slab_alloc(tsdn, arena, binind, bin_info);
-	malloc_mutex_unlock(tsdn, &arena->lock);
 	/********************************/
 	malloc_mutex_lock(tsdn, &bin->lock);
 	if (slab != NULL) {
@@ -1317,7 +1209,7 @@ arena_dissociate_bin_slab(extent_t *slab, arena_bin_t *bin) {
 		 * into the non-full slabs heap.
 		 */
 		if (bin_info->nregs == 1) {
-			arena_bin_slabs_full_remove(slab);
+			arena_bin_slabs_full_remove(bin, slab);
 		} else {
 			arena_bin_slabs_nonfull_remove(bin, slab);
 		}
@@ -1331,9 +1223,7 @@ arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
 
 	malloc_mutex_unlock(tsdn, &bin->lock);
 	/******************************/
-	malloc_mutex_lock(tsdn, &arena->lock);
 	arena_slab_dalloc(tsdn, arena, slab);
-	malloc_mutex_unlock(tsdn, &arena->lock);
 	/****************************/
 	malloc_mutex_lock(tsdn, &bin->lock);
 	if (config_stats) {
@@ -1385,7 +1275,7 @@ arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
 		arena_dissociate_bin_slab(slab, bin);
 		arena_dalloc_bin_slab(tsdn, arena, slab, bin);
 	} else if (slab_data->nfree == 1 && slab != bin->slabcur) {
-		arena_bin_slabs_full_remove(slab);
+		arena_bin_slabs_full_remove(bin, slab);
 		arena_bin_lower_slab(tsdn, arena, slab, bin);
 	}
 
@@ -1554,8 +1444,8 @@ arena_basic_stats_merge_locked(arena_t *arena, unsigned *nthreads,
 	*nthreads += arena_nthreads_get(arena, false);
 	*dss = dss_prec_names[arena->dss_prec];
 	*decay_time = arena->decay.time;
-	*nactive += arena->nactive;
-	*ndirty += arena->ndirty;
+	*nactive += atomic_read_zu(&arena->nactive);
+	*ndirty += extents_npages_get(&arena->extents_cached);
 }
 
 void
@@ -1585,14 +1475,15 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	    &base_mapped);
 
 	astats->mapped += base_mapped + arena->stats.mapped;
-	astats->retained += arena->stats.retained;
+	astats->retained += (extents_npages_get(&arena->extents_retained) <<
+	    LG_PAGE);
 	astats->npurge += arena->stats.npurge;
 	astats->nmadvise += arena->stats.nmadvise;
 	astats->purged += arena->stats.purged;
 	astats->base += base_allocated;
 	astats->internal += arena_internal_get(arena);
-	astats->resident += base_resident + (((arena->nactive + arena->ndirty)
-	    << LG_PAGE));
+	astats->resident += base_resident + (((atomic_read_zu(&arena->nactive) +
+	    extents_npages_get(&arena->extents_cached)) << LG_PAGE));
 	astats->allocated_large += arena->stats.allocated_large;
 	astats->nmalloc_large += arena->stats.nmalloc_large;
 	astats->ndalloc_large += arena->stats.ndalloc_large;
@@ -1709,28 +1600,22 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 
 	arena->dss_prec = extent_dss_prec_get();
 
-	arena->purging = false;
-	arena->nactive = 0;
-	arena->ndirty = 0;
+	atomic_write_u(&arena->purging, 0);
+	atomic_write_zu(&arena->nactive, 0);
 
 	arena_decay_init(arena, arena_decay_time_default_get());
 
-	ql_new(&arena->large);
+	extent_list_init(&arena->large);
 	if (malloc_mutex_init(&arena->large_mtx, "arena_large",
 	    WITNESS_RANK_ARENA_LARGE)) {
 		goto label_error;
 	}
 
-	for (i = 0; i < NPSIZES+1; i++) {
-		extent_heap_new(&arena->extents_cached[i]);
-		extent_heap_new(&arena->extents_retained[i]);
+	if (extents_init(tsdn, &arena->extents_cached, extent_state_dirty)) {
+		goto label_error;
 	}
-
-	extent_init(&arena->extents_dirty, arena, NULL, 0, 0, 0, false, false,
-	    false, false);
-
-	if (malloc_mutex_init(&arena->extents_mtx, "arena_extents",
-	    WITNESS_RANK_ARENA_EXTENTS)) {
+	if (extents_init(tsdn, &arena->extents_retained,
+	    extent_state_retained)) {
 		goto label_error;
 	}
 
@@ -1738,9 +1623,9 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		arena->extent_grow_next = psz2ind(HUGEPAGE);
 	}
 
-	ql_new(&arena->extent_cache);
-	if (malloc_mutex_init(&arena->extent_cache_mtx, "arena_extent_cache",
-	    WITNESS_RANK_ARENA_EXTENT_CACHE)) {
+	extent_list_init(&arena->extent_freelist);
+	if (malloc_mutex_init(&arena->extent_freelist_mtx, "extent_freelist",
+	    WITNESS_RANK_EXTENT_FREELIST)) {
 		goto label_error;
 	}
 
@@ -1753,8 +1638,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		}
 		bin->slabcur = NULL;
 		extent_heap_new(&bin->slabs_nonfull);
-		extent_init(&bin->slabs_full, arena, NULL, 0, 0, 0, false,
-		    false, false, false);
+		extent_list_init(&bin->slabs_full);
 		if (config_stats) {
 			memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
 		}
@@ -1782,12 +1666,13 @@ arena_prefork0(tsdn_t *tsdn, arena_t *arena) {
 
 void
 arena_prefork1(tsdn_t *tsdn, arena_t *arena) {
-	malloc_mutex_prefork(tsdn, &arena->extents_mtx);
+	extents_prefork(tsdn, &arena->extents_cached);
+	extents_prefork(tsdn, &arena->extents_retained);
 }
 
 void
 arena_prefork2(tsdn_t *tsdn, arena_t *arena) {
-	malloc_mutex_prefork(tsdn, &arena->extent_cache_mtx);
+	malloc_mutex_prefork(tsdn, &arena->extent_freelist_mtx);
 }
 
 void
@@ -1810,8 +1695,9 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 		malloc_mutex_postfork_parent(tsdn, &arena->bins[i].lock);
 	}
 	base_postfork_parent(tsdn, arena->base);
-	malloc_mutex_postfork_parent(tsdn, &arena->extent_cache_mtx);
-	malloc_mutex_postfork_parent(tsdn, &arena->extents_mtx);
+	malloc_mutex_postfork_parent(tsdn, &arena->extent_freelist_mtx);
+	extents_postfork_parent(tsdn, &arena->extents_cached);
+	extents_postfork_parent(tsdn, &arena->extents_retained);
 	malloc_mutex_postfork_parent(tsdn, &arena->lock);
 }
 
@@ -1824,7 +1710,8 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 		malloc_mutex_postfork_child(tsdn, &arena->bins[i].lock);
 	}
 	base_postfork_child(tsdn, arena->base);
-	malloc_mutex_postfork_child(tsdn, &arena->extent_cache_mtx);
-	malloc_mutex_postfork_child(tsdn, &arena->extents_mtx);
+	malloc_mutex_postfork_child(tsdn, &arena->extent_freelist_mtx);
+	extents_postfork_child(tsdn, &arena->extents_cached);
+	extents_postfork_child(tsdn, &arena->extents_retained);
 	malloc_mutex_postfork_child(tsdn, &arena->lock);
 }
diff --git a/src/base.c b/src/base.c
index 9fb1f14f..e7712a64 100644
--- a/src/base.c
+++ b/src/base.c
@@ -87,7 +87,8 @@ base_extent_init(size_t *extent_sn_next, extent_t *extent, void *addr,
 	sn = *extent_sn_next;
 	(*extent_sn_next)++;
 
-	extent_init(extent, NULL, addr, size, 0, sn, true, true, true, false);
+	extent_init(extent, NULL, addr, size, 0, sn, extent_state_active, true,
+	    true, false);
 }
 
 static void *
@@ -104,7 +105,7 @@ base_extent_bump_alloc_helper(extent_t *extent, size_t *gap_size, size_t size,
 	assert(extent_size_get(extent) >= *gap_size + size);
 	extent_init(extent, NULL, (void *)((uintptr_t)extent_addr_get(extent) +
 	    *gap_size + size), extent_size_get(extent) - *gap_size - size, 0,
-	    extent_sn_get(extent), true, true, true, false);
+	    extent_sn_get(extent), extent_state_active, true, true, false);
 	return ret;
 }
 
diff --git a/src/extent.c b/src/extent.c
index e2af2b50..293b96e5 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -68,9 +68,9 @@ static size_t	highpages;
  * definition.
  */
 
-static void	extent_record(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_heap_t extent_heaps[NPSIZES+1],
-    bool cache, extent_t *extent);
+static void extent_deregister(tsdn_t *tsdn, extent_t *extent);
+static void extent_record(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extents_t *extents, extent_t *extent);
 
 /******************************************************************************/
 
@@ -78,24 +78,26 @@ extent_t *
 extent_alloc(tsdn_t *tsdn, arena_t *arena) {
 	extent_t *extent;
 
-	malloc_mutex_lock(tsdn, &arena->extent_cache_mtx);
-	extent = ql_last(&arena->extent_cache, ql_link);
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+
+	malloc_mutex_lock(tsdn, &arena->extent_freelist_mtx);
+	extent = extent_list_last(&arena->extent_freelist);
 	if (extent == NULL) {
-		malloc_mutex_unlock(tsdn, &arena->extent_cache_mtx);
-		return base_alloc(tsdn, arena->base, sizeof(extent_t),
-		    QUANTUM);
+		malloc_mutex_unlock(tsdn, &arena->extent_freelist_mtx);
+		return base_alloc(tsdn, arena->base, sizeof(extent_t), QUANTUM);
 	}
-	ql_tail_remove(&arena->extent_cache, extent_t, ql_link);
-	malloc_mutex_unlock(tsdn, &arena->extent_cache_mtx);
+	extent_list_remove(&arena->extent_freelist, extent);
+	malloc_mutex_unlock(tsdn, &arena->extent_freelist_mtx);
 	return extent;
 }
 
 void
 extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
-	malloc_mutex_lock(tsdn, &arena->extent_cache_mtx);
-	ql_elm_new(extent, ql_link);
-	ql_tail_insert(&arena->extent_cache, extent, ql_link);
-	malloc_mutex_unlock(tsdn, &arena->extent_cache_mtx);
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+
+	malloc_mutex_lock(tsdn, &arena->extent_freelist_mtx);
+	extent_list_append(&arena->extent_freelist, extent);
+	malloc_mutex_unlock(tsdn, &arena->extent_freelist_mtx);
 }
 
 extent_hooks_t *
@@ -188,26 +190,174 @@ extent_size_quantize_t *extent_size_quantize_ceil =
 /* Generate pairing heap functions. */
 ph_gen(, extent_heap_, extent_heap_t, extent_t, ph_link, extent_snad_comp)
 
-static void
-extent_heaps_insert(tsdn_t *tsdn, extent_heap_t extent_heaps[NPSIZES+1],
-    extent_t *extent) {
-	size_t psz = extent_size_quantize_floor(extent_size_get(extent));
-	pszind_t pind = psz2ind(psz);
+bool
+extents_init(tsdn_t *tsdn, extents_t *extents, extent_state_t state) {
+	if (malloc_mutex_init(&extents->mtx, "extents", WITNESS_RANK_EXTENTS)) {
+		return true;
+	}
+	for (unsigned i = 0; i < NPSIZES+1; i++) {
+		extent_heap_new(&extents->heaps[i]);
+	}
+	extent_list_init(&extents->lru);
+	extents->npages = 0;
+	extents->state = state;
+	return false;
+}
 
-	malloc_mutex_assert_owner(tsdn, &extent_arena_get(extent)->extents_mtx);
+extent_state_t
+extents_state_get(const extents_t *extents) {
+	return extents->state;
+}
 
-	extent_heap_insert(&extent_heaps[pind], extent);
+size_t
+extents_npages_get(extents_t *extents) {
+	return atomic_read_zu(&extents->npages);
 }
 
 static void
-extent_heaps_remove(tsdn_t *tsdn, extent_heap_t extent_heaps[NPSIZES+1],
-    extent_t *extent) {
-	size_t psz = extent_size_quantize_floor(extent_size_get(extent));
+extents_insert_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent) {
+	malloc_mutex_assert_owner(tsdn, &extents->mtx);
+	assert(extent_state_get(extent) == extents->state);
+
+	size_t size = extent_size_get(extent);
+	size_t psz = extent_size_quantize_floor(size);
 	pszind_t pind = psz2ind(psz);
+	extent_heap_insert(&extents->heaps[pind], extent);
+	extent_list_append(&extents->lru, extent);
+	size_t npages = size >> LG_PAGE;
+	atomic_add_zu(&extents->npages, npages);
+}
 
-	malloc_mutex_assert_owner(tsdn, &extent_arena_get(extent)->extents_mtx);
+static void
+extents_remove_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent) {
+	malloc_mutex_assert_owner(tsdn, &extents->mtx);
+	assert(extent_state_get(extent) == extents->state);
 
-	extent_heap_remove(&extent_heaps[pind], extent);
+	size_t size = extent_size_get(extent);
+	size_t psz = extent_size_quantize_floor(size);
+	pszind_t pind = psz2ind(psz);
+	extent_heap_remove(&extents->heaps[pind], extent);
+	extent_list_remove(&extents->lru, extent);
+	size_t npages = size >> LG_PAGE;
+	assert(atomic_read_zu(&extents->npages) >= npages);
+	atomic_sub_zu(&extents->npages, size >> LG_PAGE);
+}
+
+/*
+ * Do first-best-fit extent selection, i.e. select the oldest/lowest extent that
+ * best fits.
+ */
+static extent_t *
+extents_first_best_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
+    size_t size) {
+	malloc_mutex_assert_owner(tsdn, &extents->mtx);
+
+	pszind_t pind = psz2ind(extent_size_quantize_ceil(size));
+	for (pszind_t i = pind; i < NPSIZES+1; i++) {
+		extent_t *extent = extent_heap_first(&extents->heaps[i]);
+		if (extent != NULL) {
+			return extent;
+		}
+	}
+
+	return NULL;
+}
+
+extent_t *
+extents_evict(tsdn_t *tsdn, extents_t *extents, size_t npages_min) {
+	malloc_mutex_lock(tsdn, &extents->mtx);
+
+	/* Get the LRU extent, if any. */
+	extent_t *extent = extent_list_first(&extents->lru);
+	if (extent == NULL) {
+		goto label_return;
+	}
+	/* Check the eviction limit. */
+	size_t npages = extent_size_get(extent) >> LG_PAGE;
+	if (atomic_read_zu(&extents->npages) - npages < npages_min) {
+		extent = NULL;
+		goto label_return;
+	}
+	extents_remove_locked(tsdn, extents, extent);
+
+	/*
+	 * Either mark the extent active or deregister it to protect against
+	 * concurrent operations.
+	 */
+	switch (extents_state_get(extents)) {
+	case extent_state_dirty:
+		extent_state_set(extent, extent_state_active);
+		break;
+	case extent_state_retained:
+		extent_deregister(tsdn, extent);
+		break;
+	default:
+		not_reached();
+	}
+
+label_return:
+	malloc_mutex_unlock(tsdn, &extents->mtx);
+	return extent;
+}
+
+static void
+extents_leak(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
+    extents_t *extents, extent_t *extent) {
+	/*
+	 * Leak extent after making sure its pages have already been purged, so
+	 * that this is only a virtual memory leak.
+	 */
+	if (extents_state_get(extents) == extent_state_dirty) {
+		if (extent_purge_lazy_wrapper(tsdn, arena, r_extent_hooks,
+		    extent, 0, extent_size_get(extent))) {
+			extent_purge_forced_wrapper(tsdn, arena, r_extent_hooks,
+			    extent, 0, extent_size_get(extent));
+		}
+	}
+	extent_dalloc(tsdn, arena, extent);
+}
+
+void
+extents_prefork(tsdn_t *tsdn, extents_t *extents) {
+	malloc_mutex_prefork(tsdn, &extents->mtx);
+}
+
+void
+extents_postfork_parent(tsdn_t *tsdn, extents_t *extents) {
+	malloc_mutex_postfork_parent(tsdn, &extents->mtx);
+}
+
+void
+extents_postfork_child(tsdn_t *tsdn, extents_t *extents) {
+	malloc_mutex_postfork_child(tsdn, &extents->mtx);
+}
+
+static void
+extent_deactivate_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
+    extent_t *extent) {
+	assert(extent_arena_get(extent) == arena);
+	assert(extent_state_get(extent) == extent_state_active);
+
+	extent_state_set(extent, extents_state_get(extents));
+	extents_insert_locked(tsdn, extents, extent);
+}
+
+static void
+extent_deactivate(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
+    extent_t *extent) {
+	malloc_mutex_lock(tsdn, &extents->mtx);
+	extent_deactivate_locked(tsdn, arena, extents, extent);
+	malloc_mutex_unlock(tsdn, &extents->mtx);
+}
+
+static void
+extent_activate_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
+    extent_t *extent) {
+	assert(extent_arena_get(extent) == arena);
+	assert(extent_state_get(extent) == extents_state_get(extents));
+
+	extents_remove_locked(tsdn, extents, extent);
+	extent_state_set(extent, extent_state_active);
 }
 
 static bool
@@ -269,10 +419,12 @@ extent_interior_register(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
 }
 
 static void
-extent_gprof_add(tsdn_t *tsdn, const extent_t *extent) {
+extent_gdump_add(tsdn_t *tsdn, const extent_t *extent) {
 	cassert(config_prof);
+	/* prof_gdump() requirement. */
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 
-	if (opt_prof && extent_active_get(extent)) {
+	if (opt_prof && extent_state_get(extent) == extent_state_active) {
 		size_t nadd = extent_size_get(extent) >> LG_PAGE;
 		size_t cur = atomic_add_zu(&curpages, nadd);
 		size_t high = atomic_read_zu(&highpages);
@@ -290,10 +442,10 @@ extent_gprof_add(tsdn_t *tsdn, const extent_t *extent) {
 }
 
 static void
-extent_gprof_sub(tsdn_t *tsdn, const extent_t *extent) {
+extent_gdump_sub(tsdn_t *tsdn, const extent_t *extent) {
 	cassert(config_prof);
 
-	if (opt_prof && extent_active_get(extent)) {
+	if (opt_prof && extent_state_get(extent) == extent_state_active) {
 		size_t nsub = extent_size_get(extent) >> LG_PAGE;
 		assert(atomic_read_zu(&curpages) >= nsub);
 		atomic_sub_zu(&curpages, nsub);
@@ -317,7 +469,7 @@ extent_register(tsdn_t *tsdn, const extent_t *extent) {
 	extent_rtree_release(tsdn, elm_a, elm_b);
 
 	if (config_prof) {
-		extent_gprof_add(tsdn, extent);
+		extent_gdump_add(tsdn, extent);
 	}
 
 	return false;
@@ -359,68 +511,21 @@ extent_deregister(tsdn_t *tsdn, extent_t *extent) {
 	extent_rtree_release(tsdn, elm_a, elm_b);
 
 	if (config_prof) {
-		extent_gprof_sub(tsdn, extent);
+		extent_gdump_sub(tsdn, extent);
 	}
 }
 
-/*
- * Do first-best-fit extent selection, i.e. select the oldest/lowest extent that
- * best fits.
- */
-static extent_t *
-extent_first_best_fit(tsdn_t *tsdn, arena_t *arena,
-    extent_heap_t extent_heaps[NPSIZES+1], size_t size) {
-	pszind_t pind, i;
-
-	malloc_mutex_assert_owner(tsdn, &arena->extents_mtx);
-
-	pind = psz2ind(extent_size_quantize_ceil(size));
-	for (i = pind; i < NPSIZES+1; i++) {
-		extent_t *extent = extent_heap_first(&extent_heaps[i]);
-		if (extent != NULL) {
-			return extent;
-		}
-	}
-
-	return NULL;
-}
-
-static void
-extent_leak(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    bool cache, extent_t *extent) {
-	/*
-	 * Leak extent after making sure its pages have already been purged, so
-	 * that this is only a virtual memory leak.
-	 */
-	if (cache) {
-		if (extent_purge_lazy_wrapper(tsdn, arena, r_extent_hooks,
-		    extent, 0, extent_size_get(extent))) {
-			extent_purge_forced_wrapper(tsdn, arena, r_extent_hooks,
-			    extent, 0, extent_size_get(extent));
-		}
-	}
-	extent_dalloc(tsdn, arena, extent);
-}
-
 static extent_t *
-extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    extent_heap_t extent_heaps[NPSIZES+1], bool locked, bool cache,
-    void *new_addr, size_t usize, size_t pad, size_t alignment, bool *zero,
-    bool *commit, bool slab) {
-	extent_t *extent;
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-	size_t size, alloc_size, leadsize, trailsize;
-
+extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
+    bool locked, void *new_addr, size_t usize, size_t pad, size_t alignment,
+    bool *zero, bool *commit) {
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, locked ? 1 : 0);
 	if (locked) {
-		malloc_mutex_assert_owner(tsdn, &arena->extents_mtx);
+		malloc_mutex_assert_owner(tsdn, &extents->mtx);
 	}
-	assert(new_addr == NULL || !slab);
-	assert(pad == 0 || !slab);
 	assert(alignment > 0);
 	if (config_debug && new_addr != NULL) {
-		extent_t *prev;
-
 		/*
 		 * Non-NULL new_addr has two use cases:
 		 *
@@ -435,21 +540,19 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		assert(PAGE_ADDR2BASE(new_addr) == new_addr);
 		assert(pad == 0);
 		assert(alignment <= PAGE);
-		prev = extent_lookup(tsdn, (void *)((uintptr_t)new_addr - PAGE),
-		    false);
-		assert(prev == NULL || extent_past_get(prev) == new_addr);
 	}
 
-	size = usize + pad;
-	alloc_size = size + PAGE_CEILING(alignment) - PAGE;
+	size_t size = usize + pad;
+	size_t alloc_size = size + PAGE_CEILING(alignment) - PAGE;
 	/* Beware size_t wrap-around. */
 	if (alloc_size < usize) {
 		return NULL;
 	}
 	if (!locked) {
-		malloc_mutex_lock(tsdn, &arena->extents_mtx);
+		malloc_mutex_lock(tsdn, &extents->mtx);
 	}
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
+	extent_t *extent;
 	if (new_addr != NULL) {
 		rtree_elm_t *elm;
 
@@ -462,8 +565,8 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 				assert(extent_base_get(extent) == new_addr);
 				if (extent_arena_get(extent) != arena ||
 				    extent_size_get(extent) < size ||
-				    extent_active_get(extent) ||
-				    extent_retained_get(extent) == cache) {
+				    extent_state_get(extent) !=
+				    extents_state_get(extents)) {
 					extent = NULL;
 				}
 			}
@@ -472,23 +575,21 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 			extent = NULL;
 		}
 	} else {
-		extent = extent_first_best_fit(tsdn, arena, extent_heaps,
+		extent = extents_first_best_fit_locked(tsdn, arena, extents,
 		    alloc_size);
 	}
 	if (extent == NULL) {
 		if (!locked) {
-			malloc_mutex_unlock(tsdn, &arena->extents_mtx);
+			malloc_mutex_unlock(tsdn, &extents->mtx);
 		}
 		return NULL;
 	}
-	extent_heaps_remove(tsdn, extent_heaps, extent);
-	arena_extent_cache_maybe_remove(tsdn, arena, extent, cache);
 
-	leadsize = ALIGNMENT_CEILING((uintptr_t)extent_base_get(extent),
-	    PAGE_CEILING(alignment)) - (uintptr_t)extent_base_get(extent);
-	assert(new_addr == NULL || leadsize == 0);
-	assert(extent_size_get(extent) >= leadsize + size);
-	trailsize = extent_size_get(extent) - leadsize - size;
+	extent_activate_locked(tsdn, arena, extents, extent);
+	if (!locked) {
+		malloc_mutex_unlock(tsdn, &extents->mtx);
+	}
+
 	if (extent_zeroed_get(extent)) {
 		*zero = true;
 	}
@@ -496,6 +597,21 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		*commit = true;
 	}
 
+	return extent;
+}
+
+static extent_t *
+extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
+    void *new_addr, size_t usize, size_t pad, size_t alignment,
+    extent_t *extent) {
+	size_t size = usize + pad;
+	size_t leadsize = ALIGNMENT_CEILING((uintptr_t)extent_base_get(extent),
+	    PAGE_CEILING(alignment)) - (uintptr_t)extent_base_get(extent);
+	assert(new_addr == NULL || leadsize == 0);
+	assert(extent_size_get(extent) >= leadsize + size);
+	size_t trailsize = extent_size_get(extent) - leadsize - size;
+
 	/* Split the lead. */
 	if (leadsize != 0) {
 		extent_t *lead = extent;
@@ -504,14 +620,11 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		    trailsize);
 		if (extent == NULL) {
 			extent_deregister(tsdn, lead);
-			extent_leak(tsdn, arena, r_extent_hooks, cache, lead);
-			if (!locked) {
-				malloc_mutex_unlock(tsdn, &arena->extents_mtx);
-			}
+			extents_leak(tsdn, arena, r_extent_hooks, extents,
+			    lead);
 			return NULL;
 		}
-		extent_heaps_insert(tsdn, extent_heaps, lead);
-		arena_extent_cache_maybe_insert(tsdn, arena, lead, cache);
+		extent_deactivate(tsdn, arena, extents, lead);
 	}
 
 	/* Split the trail. */
@@ -520,15 +633,11 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		    r_extent_hooks, extent, size, usize, trailsize, trailsize);
 		if (trail == NULL) {
 			extent_deregister(tsdn, extent);
-			extent_leak(tsdn, arena, r_extent_hooks, cache,
+			extents_leak(tsdn, arena, r_extent_hooks, extents,
 			    extent);
-			if (!locked) {
-				malloc_mutex_unlock(tsdn, &arena->extents_mtx);
-			}
 			return NULL;
 		}
-		extent_heaps_insert(tsdn, extent_heaps, trail);
-		arena_extent_cache_maybe_insert(tsdn, arena, trail, cache);
+		extent_deactivate(tsdn, arena, extents, trail);
 	} else if (leadsize == 0) {
 		/*
 		 * Splitting causes usize to be set as a side effect, but no
@@ -537,14 +646,38 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		extent_usize_set(extent, usize);
 	}
 
+	return extent;
+}
+
+static extent_t *
+extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
+    extents_t *extents, void *new_addr, size_t usize, size_t pad,
+    size_t alignment, bool *zero, bool *commit, bool slab) {
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+	assert(new_addr == NULL || !slab);
+	assert(pad == 0 || !slab);
+
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+	extent_t *extent = extent_recycle_extract(tsdn, arena, r_extent_hooks,
+	    rtree_ctx, extents, false, new_addr, usize, pad, alignment, zero,
+	    commit);
+	if (extent == NULL) {
+		return NULL;
+	}
+
+	extent = extent_recycle_split(tsdn, arena, r_extent_hooks, rtree_ctx,
+	    extents, new_addr, usize, pad, alignment, extent);
+	if (extent == NULL) {
+		return NULL;
+	}
+
 	if (*commit && !extent_committed_get(extent)) {
 		if (extent_commit_wrapper(tsdn, arena, r_extent_hooks, extent,
 		    0, extent_size_get(extent))) {
-			if (!locked) {
-				malloc_mutex_unlock(tsdn, &arena->extents_mtx);
-			}
-			extent_record(tsdn, arena, r_extent_hooks, extent_heaps,
-			    cache, extent);
+			extent_record(tsdn, arena, r_extent_hooks, extents,
+			    extent);
 			return NULL;
 		}
 		extent_zeroed_set(extent, true);
@@ -553,16 +686,12 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	if (pad != 0) {
 		extent_addr_randomize(tsdn, extent, alignment);
 	}
-	extent_active_set(extent, true);
+	assert(extent_state_get(extent) == extent_state_active);
 	if (slab) {
 		extent_slab_set(extent, slab);
 		extent_interior_register(tsdn, rtree_ctx, extent);
 	}
 
-	if (!locked) {
-		malloc_mutex_unlock(tsdn, &arena->extents_mtx);
-	}
-
 	if (*zero) {
 		if (!extent_zeroed_get(extent)) {
 			memset(extent_addr_get(extent), 0,
@@ -616,37 +745,17 @@ extent_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 	return NULL;
 }
 
-static extent_t *
-extent_alloc_cache_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, bool locked, void *new_addr, size_t usize,
-    size_t pad, size_t alignment, bool *zero, bool *commit, bool slab) {
-	extent_t *extent;
-
-	assert(usize + pad != 0);
-	assert(alignment != 0);
-
-	extent = extent_recycle(tsdn, arena, r_extent_hooks,
-	    arena->extents_cached, locked, true, new_addr, usize, pad,
-	    alignment, zero, commit, slab);
-	return extent;
-}
-
-extent_t *
-extent_alloc_cache_locked(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
-    size_t alignment, bool *zero, bool *commit, bool slab) {
-	malloc_mutex_assert_owner(tsdn, &arena->extents_mtx);
-
-	return extent_alloc_cache_impl(tsdn, arena, r_extent_hooks, true,
-	    new_addr, usize, pad, alignment, zero, commit, slab);
-}
-
 extent_t *
 extent_alloc_cache(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
     size_t alignment, bool *zero, bool *commit, bool slab) {
-	return extent_alloc_cache_impl(tsdn, arena, r_extent_hooks, false,
-	    new_addr, usize, pad, alignment, zero, commit, slab);
+	assert(usize + pad != 0);
+	assert(alignment != 0);
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+
+	return extent_recycle(tsdn, arena, r_extent_hooks,
+	    &arena->extents_cached, new_addr, usize, pad, alignment, zero,
+	    commit, slab);
 }
 
 static void *
@@ -679,16 +788,6 @@ extent_alloc_default(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
 	    alignment, zero, commit);
 }
 
-static void
-extent_retain(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    extent_t *extent) {
-	if (config_stats) {
-		arena->stats.retained += extent_size_get(extent);
-	}
-	extent_record(tsdn, arena, r_extent_hooks, arena->extents_retained,
-	    false, extent);
-}
-
 /*
  * If virtual memory is retained, create increasingly larger extents from which
  * to split requested extents in order to limit the total number of disjoint
@@ -728,16 +827,17 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	ptr = extent_alloc_core(tsdn, arena, new_addr, alloc_size, PAGE,
 	    &zeroed, &committed, arena->dss_prec);
 	extent_init(extent, arena, ptr, alloc_size, alloc_size,
-	    arena_extent_sn_next(arena), false, zeroed, committed, false);
+	    arena_extent_sn_next(arena), extent_state_retained, zeroed,
+	    committed, false);
 	if (ptr == NULL || extent_register(tsdn, extent)) {
 		extent_dalloc(tsdn, arena, extent);
 		return NULL;
 	}
 	/*
-	 * Set the extent as active *after registration so that no gprof-related
+	 * Set the extent as active *after registration so that no gdump-related
 	 * accounting occurs during registration.
 	 */
-	extent_active_set(extent, true);
+	extent_state_set(extent, extent_state_active);
 
 	leadsize = ALIGNMENT_CEILING((uintptr_t)ptr, PAGE_CEILING(alignment)) -
 	    (uintptr_t)ptr;
@@ -758,10 +858,11 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 		    leadsize, leadsize, size + trailsize, usize + trailsize);
 		if (extent == NULL) {
 			extent_deregister(tsdn, lead);
-			extent_leak(tsdn, arena, r_extent_hooks, false, lead);
+			extents_leak(tsdn, arena, r_extent_hooks, false, lead);
 			return NULL;
 		}
-		extent_retain(tsdn, arena, r_extent_hooks, lead);
+		extent_record(tsdn, arena, r_extent_hooks,
+		    &arena->extents_retained, lead);
 	}
 
 	/* Split the trail. */
@@ -770,10 +871,12 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 		    r_extent_hooks, extent, size, usize, trailsize, trailsize);
 		if (trail == NULL) {
 			extent_deregister(tsdn, extent);
-			extent_leak(tsdn, arena, r_extent_hooks, false, extent);
+			extents_leak(tsdn, arena, r_extent_hooks,
+			    &arena->extents_retained, extent);
 			return NULL;
 		}
-		extent_retain(tsdn, arena, r_extent_hooks, trail);
+		extent_record(tsdn, arena, r_extent_hooks,
+		    &arena->extents_retained, trail);
 	} else if (leadsize == 0) {
 		/*
 		 * Splitting causes usize to be set as a side effect, but no
@@ -785,15 +888,16 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	if (*commit && !extent_committed_get(extent)) {
 		if (extent_commit_wrapper(tsdn, arena, r_extent_hooks, extent,
 		    0, extent_size_get(extent))) {
-			extent_retain(tsdn, arena, r_extent_hooks, extent);
+			extent_record(tsdn, arena, r_extent_hooks,
+			    &arena->extents_retained, extent);
 			return NULL;
 		}
 		extent_zeroed_set(extent, true);
 	}
 
 	if (config_prof) {
-		/* Adjust gprof stats now that extent is final size. */
-		extent_gprof_add(tsdn, extent);
+		/* Adjust gdump stats now that extent is final size. */
+		extent_gdump_add(tsdn, extent);
 	}
 	if (pad != 0) {
 		extent_addr_randomize(tsdn, extent, alignment);
@@ -837,15 +941,11 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
 	assert(alignment != 0);
 
 	extent = extent_recycle(tsdn, arena, r_extent_hooks,
-	    arena->extents_retained, false, false, new_addr, usize, pad,
-	    alignment, zero, commit, slab);
+	    &arena->extents_retained, new_addr, usize, pad, alignment, zero,
+	    commit, slab);
 	if (extent != NULL) {
-		if (config_stats) {
-			size_t size = usize + pad;
-			arena->stats.retained -= size;
-		}
 		if (config_prof) {
-			extent_gprof_add(tsdn, extent);
+			extent_gdump_add(tsdn, extent);
 		}
 	}
 	if (!config_munmap && extent == NULL) {
@@ -882,12 +982,14 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 		return NULL;
 	}
 	extent_init(extent, arena, addr, size, usize,
-	    arena_extent_sn_next(arena), true, zero, commit, slab);
+	    arena_extent_sn_next(arena), extent_state_active, zero, commit,
+	    slab);
 	if (pad != 0) {
 		extent_addr_randomize(tsdn, extent, alignment);
 	}
 	if (extent_register(tsdn, extent)) {
-		extent_leak(tsdn, arena, r_extent_hooks, false, extent);
+		extents_leak(tsdn, arena, r_extent_hooks,
+		    &arena->extents_retained, extent);
 		return NULL;
 	}
 
@@ -898,12 +1000,12 @@ extent_t *
 extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
     size_t alignment, bool *zero, bool *commit, bool slab) {
-	extent_t *extent;
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 
-	extent = extent_alloc_retained(tsdn, arena, r_extent_hooks, new_addr,
-	    usize, pad, alignment, zero, commit, slab);
+	extent_t *extent = extent_alloc_retained(tsdn, arena, r_extent_hooks,
+	    new_addr, usize, pad, alignment, zero, commit, slab);
 	if (extent == NULL) {
 		extent = extent_alloc_wrapper_hard(tsdn, arena, r_extent_hooks,
 		    new_addr, usize, pad, alignment, zero, commit, slab);
@@ -917,96 +1019,103 @@ extent_can_coalesce(const extent_t *a, const extent_t *b) {
 	if (extent_arena_get(a) != extent_arena_get(b)) {
 		return false;
 	}
-	if (extent_active_get(a) != extent_active_get(b)) {
+	if (extent_state_get(a) != extent_state_get(b)) {
 		return false;
 	}
 	if (extent_committed_get(a) != extent_committed_get(b)) {
 		return false;
 	}
-	if (extent_retained_get(a) != extent_retained_get(b)) {
-		return false;
-	}
 
 	return true;
 }
 
-static void
+static bool
 extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b,
-    extent_heap_t extent_heaps[NPSIZES+1], bool cache) {
+    extents_t *extents) {
 	if (!extent_can_coalesce(a, b)) {
-		return;
+		return true;
+	}
+	assert(extent_arena_get(a) == arena);
+	assert(extent_arena_get(b) == arena);
+
+	extent_activate_locked(tsdn, arena, extents, a);
+	extent_activate_locked(tsdn, arena, extents, b);
+
+	malloc_mutex_unlock(tsdn, &extents->mtx);
+	bool err = extent_merge_wrapper(tsdn, arena, r_extent_hooks, a, b);
+	malloc_mutex_lock(tsdn, &extents->mtx);
+	extent_deactivate_locked(tsdn, arena, extents, a);
+	if (err) {
+		extent_deactivate_locked(tsdn, arena, extents, b);
+		return true;
 	}
 
-	extent_heaps_remove(tsdn, extent_heaps, a);
-	extent_heaps_remove(tsdn, extent_heaps, b);
-
-	arena_extent_cache_maybe_remove(tsdn, extent_arena_get(a), a, cache);
-	arena_extent_cache_maybe_remove(tsdn, extent_arena_get(b), b, cache);
-
-	if (extent_merge_wrapper(tsdn, arena, r_extent_hooks, a, b)) {
-		extent_heaps_insert(tsdn, extent_heaps, a);
-		extent_heaps_insert(tsdn, extent_heaps, b);
-		arena_extent_cache_maybe_insert(tsdn, extent_arena_get(a), a,
-		    cache);
-		arena_extent_cache_maybe_insert(tsdn, extent_arena_get(b), b,
-		    cache);
-		return;
-	}
-
-	extent_heaps_insert(tsdn, extent_heaps, a);
-	arena_extent_cache_maybe_insert(tsdn, extent_arena_get(a), a, cache);
+	return false;
 }
 
 static void
 extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    extent_heap_t extent_heaps[NPSIZES+1], bool cache, extent_t *extent) {
+    extents_t *extents, extent_t *extent) {
 	extent_t *prev, *next;
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	assert(!cache || !extent_zeroed_get(extent));
+	assert(extents_state_get(extents) != extent_state_dirty ||
+	    !extent_zeroed_get(extent));
 
-	malloc_mutex_lock(tsdn, &arena->extents_mtx);
+	malloc_mutex_lock(tsdn, &extents->mtx);
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 
 	extent_usize_set(extent, 0);
-	extent_active_set(extent, false);
-	extent_zeroed_set(extent, !cache && extent_zeroed_get(extent));
 	if (extent_slab_get(extent)) {
 		extent_interior_deregister(tsdn, rtree_ctx, extent);
 		extent_slab_set(extent, false);
 	}
 
 	assert(extent_lookup(tsdn, extent_base_get(extent), true) == extent);
-	extent_heaps_insert(tsdn, extent_heaps, extent);
-	arena_extent_cache_maybe_insert(tsdn, arena, extent, cache);
+	extent_deactivate_locked(tsdn, arena, extents, extent);
 
-	/* Try to coalesce forward. */
-	next = rtree_read(tsdn, &extents_rtree, rtree_ctx,
-	    (uintptr_t)extent_past_get(extent), false);
-	if (next != NULL) {
-		extent_try_coalesce(tsdn, arena, r_extent_hooks, extent, next,
-		    extent_heaps, cache);
-	}
+	/*
+	 * Continue attempting to coalesce until failure, to protect against
+	 * races with other threads that are thwarted by this one.
+	 */
+	bool coalesced;
+	do {
+		coalesced = false;
 
-	/* Try to coalesce backward. */
-	prev = rtree_read(tsdn, &extents_rtree, rtree_ctx,
-	    (uintptr_t)extent_before_get(extent), false);
-	if (prev != NULL) {
-		extent_try_coalesce(tsdn, arena, r_extent_hooks, prev, extent,
-		    extent_heaps, cache);
-	}
+		/* Try to coalesce forward. */
+		next = rtree_read(tsdn, &extents_rtree, rtree_ctx,
+		    (uintptr_t)extent_past_get(extent), false);
+		if (next != NULL) {
+			coalesced = !extent_try_coalesce(tsdn, arena,
+			    r_extent_hooks, extent, next, extents);
+		}
 
-	malloc_mutex_unlock(tsdn, &arena->extents_mtx);
+		/* Try to coalesce backward. */
+		prev = rtree_read(tsdn, &extents_rtree, rtree_ctx,
+		    (uintptr_t)extent_before_get(extent), false);
+		if (prev != NULL) {
+			coalesced = !extent_try_coalesce(tsdn, arena,
+			    r_extent_hooks, prev, extent, extents);
+			if (coalesced) {
+				extent = prev;
+			}
+		}
+	} while (coalesced);
+
+	malloc_mutex_unlock(tsdn, &extents->mtx);
 }
 
 void
 extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
 	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
 
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+
 	if (extent_register(tsdn, extent)) {
-		extent_leak(tsdn, arena, &extent_hooks, false, extent);
+		extents_leak(tsdn, arena, &extent_hooks,
+		    &arena->extents_retained, extent);
 		return;
 	}
 	extent_dalloc_wrapper(tsdn, arena, &extent_hooks, extent);
@@ -1017,11 +1126,12 @@ extent_dalloc_cache(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent) {
 	assert(extent_base_get(extent) != NULL);
 	assert(extent_size_get(extent) != 0);
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 
 	extent_addr_set(extent, extent_base_get(extent));
 	extent_zeroed_set(extent, false);
 
-	extent_record(tsdn, arena, r_extent_hooks, arena->extents_cached, true,
+	extent_record(tsdn, arena, r_extent_hooks, &arena->extents_cached,
 	    extent);
 }
 
@@ -1048,15 +1158,12 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena,
 
 	assert(extent_base_get(extent) != NULL);
 	assert(extent_size_get(extent) != 0);
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 
 	extent_addr_set(extent, extent_base_get(extent));
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
-	/*
-	 * Try to deallocate.  Deregister first to avoid a race with other
-	 * allocating threads, and reregister if deallocation fails.
-	 */
-	extent_deregister(tsdn, extent);
+	/* Try to deallocate. */
 	if (*r_extent_hooks == &extent_hooks_default) {
 		/* Call directly to propagate tsdn. */
 		err = extent_dalloc_default_impl(extent_base_get(extent),
@@ -1078,14 +1185,20 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena,
 void
 extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent) {
-	bool zeroed;
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 
+	/*
+	 * Deregister first to avoid a race with other allocating threads, and
+	 * reregister if deallocation fails.
+	 */
+	extent_deregister(tsdn, extent);
 	if (!extent_dalloc_wrapper_try(tsdn, arena, r_extent_hooks, extent)) {
 		return;
 	}
 
 	extent_reregister(tsdn, extent);
 	/* Try to decommit; purge if that fails. */
+	bool zeroed;
 	if (!extent_committed_get(extent)) {
 		zeroed = true;
 	} else if (!extent_decommit_wrapper(tsdn, arena, r_extent_hooks, extent,
@@ -1106,15 +1219,12 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 	}
 	extent_zeroed_set(extent, zeroed);
 
-	if (config_stats) {
-		arena->stats.retained += extent_size_get(extent);
-	}
 	if (config_prof) {
-		extent_gprof_sub(tsdn, extent);
+		extent_gdump_sub(tsdn, extent);
 	}
 
-	extent_record(tsdn, arena, r_extent_hooks, arena->extents_retained,
-	    false, extent);
+	extent_record(tsdn, arena, r_extent_hooks, &arena->extents_retained,
+	    extent);
 }
 
 static bool
@@ -1130,10 +1240,10 @@ bool
 extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
     size_t length) {
-	bool err;
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
-	err = ((*r_extent_hooks)->commit == NULL ||
+	bool err = ((*r_extent_hooks)->commit == NULL ||
 	    (*r_extent_hooks)->commit(*r_extent_hooks, extent_base_get(extent),
 	    extent_size_get(extent), offset, length, arena_ind_get(arena)));
 	extent_committed_set(extent, extent_committed_get(extent) || !err);
@@ -1153,11 +1263,11 @@ bool
 extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
     size_t length) {
-	bool err;
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 
-	err = ((*r_extent_hooks)->decommit == NULL ||
+	bool err = ((*r_extent_hooks)->decommit == NULL ||
 	    (*r_extent_hooks)->decommit(*r_extent_hooks,
 	    extent_base_get(extent), extent_size_get(extent), offset, length,
 	    arena_ind_get(arena)));
@@ -1184,6 +1294,8 @@ bool
 extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
     size_t length) {
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 	return ((*r_extent_hooks)->purge_lazy == NULL ||
 	    (*r_extent_hooks)->purge_lazy(*r_extent_hooks,
@@ -1210,6 +1322,8 @@ bool
 extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
     size_t length) {
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 	return ((*r_extent_hooks)->purge_forced == NULL ||
 	    (*r_extent_hooks)->purge_forced(*r_extent_hooks,
@@ -1231,13 +1345,14 @@ extent_t *
 extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a,
     size_t usize_a, size_t size_b, size_t usize_b) {
+	assert(extent_size_get(extent) == size_a + size_b);
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+
 	extent_t *trail;
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	rtree_elm_t *lead_elm_a, *lead_elm_b, *trail_elm_a, *trail_elm_b;
 
-	assert(extent_size_get(extent) == size_a + size_b);
-
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 
 	if ((*r_extent_hooks)->split == NULL) {
@@ -1253,7 +1368,7 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
 		extent_t lead;
 
 		extent_init(&lead, arena, extent_addr_get(extent), size_a,
-		    usize_a, extent_sn_get(extent), extent_active_get(extent),
+		    usize_a, extent_sn_get(extent), extent_state_get(extent),
 		    extent_zeroed_get(extent), extent_committed_get(extent),
 		    extent_slab_get(extent));
 
@@ -1265,7 +1380,7 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
 
 	extent_init(trail, arena, (void *)((uintptr_t)extent_base_get(extent) +
 	    size_a), size_b, usize_b, extent_sn_get(extent),
-	    extent_active_get(extent), extent_zeroed_get(extent),
+	    extent_state_get(extent), extent_zeroed_get(extent),
 	    extent_committed_get(extent), extent_slab_get(extent));
 	if (extent_rtree_acquire(tsdn, rtree_ctx, trail, false, true,
 	    &trail_elm_a, &trail_elm_b)) {
@@ -1323,10 +1438,7 @@ extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
 bool
 extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b) {
-	bool err;
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-	rtree_elm_t *a_elm_a, *a_elm_b, *b_elm_a, *b_elm_b;
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 
@@ -1334,6 +1446,7 @@ extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
 		return true;
 	}
 
+	bool err;
 	if (*r_extent_hooks == &extent_hooks_default) {
 		/* Call directly to propagate tsdn. */
 		err = extent_merge_default_impl(extent_base_get(a),
@@ -1354,6 +1467,9 @@ extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
 	 * owned, so the following code uses decomposed helper functions rather
 	 * than extent_{,de}register() to do things in the right order.
 	 */
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+	rtree_elm_t *a_elm_a, *a_elm_b, *b_elm_a, *b_elm_b;
 	extent_rtree_acquire(tsdn, rtree_ctx, a, true, false, &a_elm_a,
 	    &a_elm_b);
 	extent_rtree_acquire(tsdn, rtree_ctx, b, true, false, &b_elm_a,
diff --git a/src/extent_dss.c b/src/extent_dss.c
index ed4140e7..a3cfab26 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -143,7 +143,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			if (gap_size != 0) {
 				extent_init(gap, arena, gap_addr, gap_size,
 				    gap_size, arena_extent_sn_next(arena),
-				    false, false, true, false);
+				    extent_state_active, false, true, false);
 			}
 			dss_next = (void *)((uintptr_t)ret + size);
 			if ((uintptr_t)ret < (uintptr_t)max_cur ||
@@ -180,7 +180,8 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					extent_t extent;
 
 					extent_init(&extent, arena, ret, size,
-					    size, 0, true, false, true, false);
+					    size, 0, extent_state_active, false,
+					    true, false);
 					if (extent_purge_forced_wrapper(tsdn,
 					    arena, &extent_hooks, &extent, 0,
 					    size)) {
diff --git a/src/large.c b/src/large.c
index 6458d81a..bfe2f714 100644
--- a/src/large.c
+++ b/src/large.c
@@ -40,8 +40,7 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 
 	/* Insert extent into large. */
 	malloc_mutex_lock(tsdn, &arena->large_mtx);
-	ql_elm_new(extent, ql_link);
-	ql_tail_insert(&arena->large, extent, ql_link);
+	extent_list_append(&arena->large, extent);
 	malloc_mutex_unlock(tsdn, &arena->large_mtx);
 	if (config_prof && arena_prof_accum(tsdn, arena, usize)) {
 		prof_idump(tsdn);
@@ -138,19 +137,19 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
     bool zero) {
 	arena_t *arena = extent_arena_get(extent);
 	size_t oldusize = extent_usize_get(extent);
-	bool is_zeroed_trail = false;
 	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
 	size_t trailsize = usize - extent_usize_get(extent);
-	extent_t *trail;
 
 	if (extent_hooks->merge == NULL) {
 		return true;
 	}
 
-	if ((trail = arena_extent_cache_alloc(tsdn, arena, &extent_hooks,
-	    extent_past_get(extent), trailsize, CACHELINE, &is_zeroed_trail)) ==
-	    NULL) {
-		bool commit = true;
+	bool is_zeroed_trail = false;
+	bool commit = true;
+	extent_t *trail;
+	if ((trail = extent_alloc_cache(tsdn, arena, &extent_hooks,
+	    extent_past_get(extent), trailsize, 0, CACHELINE, &is_zeroed_trail,
+	    &commit, false)) == NULL) {
 		if ((trail = extent_alloc_wrapper(tsdn, arena, &extent_hooks,
 		    extent_past_get(extent), trailsize, 0, CACHELINE,
 		    &is_zeroed_trail, &commit, false)) == NULL) {
@@ -291,32 +290,39 @@ large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
  * independent of these considerations.
  */
 static void
-large_dalloc_impl(tsdn_t *tsdn, extent_t *extent, bool junked_locked) {
-	arena_t *arena;
-
-	arena = extent_arena_get(extent);
+large_dalloc_prep_impl(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+    bool junked_locked) {
 	malloc_mutex_lock(tsdn, &arena->large_mtx);
-	ql_remove(&arena->large, extent, ql_link);
+	extent_list_remove(&arena->large, extent);
 	malloc_mutex_unlock(tsdn, &arena->large_mtx);
 	if (!junked_locked) {
 		large_dalloc_maybe_junk(extent_addr_get(extent),
 		    extent_usize_get(extent));
 	}
-	arena_extent_dalloc_large(tsdn, arena, extent, junked_locked);
+	arena_extent_dalloc_large_prep(tsdn, arena, extent, junked_locked);
+}
 
-	if (!junked_locked) {
-		arena_decay_tick(tsdn, arena);
-	}
+static void
+large_dalloc_finish_impl(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
+	arena_extent_dalloc_large_finish(tsdn, arena, extent);
 }
 
 void
-large_dalloc_junked_locked(tsdn_t *tsdn, extent_t *extent) {
-	large_dalloc_impl(tsdn, extent, true);
+large_dalloc_prep_junked_locked(tsdn_t *tsdn, extent_t *extent) {
+	large_dalloc_prep_impl(tsdn, extent_arena_get(extent), extent, true);
+}
+
+void
+large_dalloc_finish(tsdn_t *tsdn, extent_t *extent) {
+	large_dalloc_finish_impl(tsdn, extent_arena_get(extent), extent);
 }
 
 void
 large_dalloc(tsdn_t *tsdn, extent_t *extent) {
-	large_dalloc_impl(tsdn, extent, false);
+	arena_t *arena = extent_arena_get(extent);
+	large_dalloc_prep_impl(tsdn, arena, extent, false);
+	large_dalloc_finish_impl(tsdn, arena, extent);
+	arena_decay_tick(tsdn, arena);
 }
 
 size_t
diff --git a/src/tcache.c b/src/tcache.c
index 76277f06..94c45707 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -170,17 +170,15 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 void
 tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
     unsigned rem, tcache_t *tcache) {
-	arena_t *arena;
-	void *ptr;
-	unsigned i, nflush, ndeferred;
 	bool merged_stats = false;
 
 	assert(binind < nhbins);
 	assert(rem <= tbin->ncached);
 
-	arena = arena_choose(tsd, NULL);
+	arena_t *arena = arena_choose(tsd, NULL);
 	assert(arena != NULL);
-	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
+	unsigned nflush = tbin->ncached - rem;
+	while (nflush > 0) {
 		/* Lock the arena associated with the first object. */
 		extent_t *extent = iealloc(tsd_tsdn(tsd), *(tbin->avail - 1));
 		arena_t *locked_arena = extent_arena_get(extent);
@@ -189,7 +187,17 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 		if (config_prof) {
 			idump = false;
 		}
+
 		malloc_mutex_lock(tsd_tsdn(tsd), &locked_arena->lock);
+		for (unsigned i = 0; i < nflush; i++) {
+			void *ptr = *(tbin->avail - 1 - i);
+			assert(ptr != NULL);
+			extent = iealloc(tsd_tsdn(tsd), ptr);
+			if (extent_arena_get(extent) == locked_arena) {
+				large_dalloc_prep_junked_locked(tsd_tsdn(tsd),
+				    extent);
+			}
+		}
 		if ((config_prof || config_stats) && locked_arena == arena) {
 			if (config_prof) {
 				idump = arena_prof_accum_locked(arena,
@@ -205,14 +213,15 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 				tbin->tstats.nrequests = 0;
 			}
 		}
-		ndeferred = 0;
-		for (i = 0; i < nflush; i++) {
-			ptr = *(tbin->avail - 1 - i);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &locked_arena->lock);
+
+		unsigned ndeferred = 0;
+		for (unsigned i = 0; i < nflush; i++) {
+			void *ptr = *(tbin->avail - 1 - i);
 			assert(ptr != NULL);
 			extent = iealloc(tsd_tsdn(tsd), ptr);
 			if (extent_arena_get(extent) == locked_arena) {
-				large_dalloc_junked_locked(tsd_tsdn(tsd),
-				    extent);
+				large_dalloc_finish(tsd_tsdn(tsd), extent);
 			} else {
 				/*
 				 * This object was allocated via a different
@@ -224,12 +233,12 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 				ndeferred++;
 			}
 		}
-		malloc_mutex_unlock(tsd_tsdn(tsd), &locked_arena->lock);
 		if (config_prof && idump) {
 			prof_idump(tsd_tsdn(tsd));
 		}
 		arena_decay_ticks(tsd_tsdn(tsd), locked_arena, nflush -
 		    ndeferred);
+		nflush = ndeferred;
 	}
 	if (config_stats && !merged_stats) {
 		/*
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index d2a9bb4f..24c7f526 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -63,7 +63,7 @@ vsalloc(tsdn_t *tsdn, const void *ptr) {
 	if (extent == NULL) {
 		return 0;
 	}
-	if (!extent_active_get(extent)) {
+	if (extent_state_get(extent) != extent_state_active) {
 		return 0;
 	}
 
diff --git a/test/unit/slab.c b/test/unit/slab.c
index d3b45e80..1f2a260c 100644
--- a/test/unit/slab.c
+++ b/test/unit/slab.c
@@ -8,8 +8,8 @@ TEST_BEGIN(test_arena_slab_regind) {
 		extent_t slab;
 		const arena_bin_info_t *bin_info = &arena_bin_info[binind];
 		extent_init(&slab, NULL, mallocx(bin_info->slab_size,
-		    MALLOCX_LG_ALIGN(LG_PAGE)), bin_info->slab_size, 0, 0, true,
-		    false, true, true);
+		    MALLOCX_LG_ALIGN(LG_PAGE)), bin_info->slab_size, 0, 0,
+		    extent_state_active, false, true, true);
 		assert_ptr_not_null(extent_addr_get(&slab),
 		    "Unexpected malloc() failure");
 		for (regind = 0; regind < bin_info->nregs; regind++) {

From 767ffa2b5f79d0f8458aceab3e628e27fe7a88dc Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 3 Feb 2017 15:30:42 -0800
Subject: [PATCH 0621/2608] Fix compute_size_with_overflow().

Fix compute_size_with_overflow() to use a high_bits mask that has the
high bits set, rather than the low bits.  This regression was introduced
by 5154ff32ee8c37bacb6afd8a07b923eb33228357 (Unify the allocation
paths).
---
 src/jemalloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 45e9aea7..af410958 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1521,7 +1521,7 @@ compute_size_with_overflow(dynamic_opts_t *dopts, size_t *size) {
 	 */
 
 	/* A size_t with its high-half bits all set to 1. */
-	const static size_t high_bits = SIZE_T_MAX >> (sizeof(size_t) * 8 / 2);
+	const static size_t high_bits = SIZE_T_MAX << (sizeof(size_t) * 8 / 2);
 
 	*size = dopts->item_size * dopts->num_items;
 

From 1bac516aaae4582eb1a6f58ae58fa13c27de95a6 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 3 Feb 2017 15:33:37 -0800
Subject: [PATCH 0622/2608] Optimize compute_size_with_overflow().

Do not check for overflow unless it is actually a possibility.
---
 src/jemalloc.c | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index af410958..48be4a3f 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1362,6 +1362,8 @@ malloc_init_hard(void) {
  */
 typedef struct static_opts_s static_opts_t;
 struct static_opts_s {
+	/* Whether or not allocation size may overflow. */
+	bool may_overflow;
 	/* Whether or not allocations of size 0 should be treated as size 1. */
 	bool bump_empty_alloc;
 	/*
@@ -1400,6 +1402,7 @@ struct static_opts_s {
 
 JEMALLOC_ALWAYS_INLINE_C void
 static_opts_init(static_opts_t *static_opts) {
+	static_opts->may_overflow = false;
 	static_opts->bump_empty_alloc = false;
 	static_opts->assert_nonempty_alloc = false;
 	static_opts->null_out_result_on_error = false;
@@ -1514,12 +1517,19 @@ imalloc_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
  * *size to the product either way.
  */
 JEMALLOC_ALWAYS_INLINE_C bool
-compute_size_with_overflow(dynamic_opts_t *dopts, size_t *size) {
+compute_size_with_overflow(bool may_overflow, dynamic_opts_t *dopts,
+    size_t *size) {
 	/*
-	 * This function is just num_items * item_size, except that we have to
-	 * check for overflow.
+	 * This function is just num_items * item_size, except that we may have
+	 * to check for overflow.
 	 */
 
+	if (!may_overflow) {
+		assert(dopts->num_items == 1);
+		*size = dopts->item_size;
+		return false;
+	}
+
 	/* A size_t with its high-half bits all set to 1. */
 	const static size_t high_bits = SIZE_T_MAX << (sizeof(size_t) * 8 / 2);
 
@@ -1572,8 +1582,8 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts) {
 	}
 
 	/* Compute the amount of memory the user wants. */
-	bool overflow = compute_size_with_overflow(dopts, &size);
-	if (unlikely(overflow)) {
+	if (unlikely(compute_size_with_overflow(sopts->may_overflow, dopts,
+	    &size))) {
 		goto label_oom;
 	}
 
@@ -1843,6 +1853,7 @@ je_calloc(size_t num, size_t size) {
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
 
+	sopts.may_overflow = true;
 	sopts.bump_empty_alloc = true;
 	sopts.null_out_result_on_error = true;
 	sopts.set_errno_on_error = true;

From 6737d5f61ee2fa5073bf20a8387e8c261e2a29f8 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 4 Feb 2017 00:43:32 -0800
Subject: [PATCH 0623/2608] Fix a race in extent_grow_retained().

Set extent as active prior to registration so that other threads can't
modify it in the absence of locking.

This regression was introduced by
d27f29b468ae3e9d2b1da4a9880351d76e5a1662 (Disentangle arena and extent
locking.), via non-obvious means.  Removal of extents_mtx protection
during extent_grow_retained() execution opened up the race, but in the
presence of that locking, the code was safe.

This resolves #599.
---
 src/extent.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index 293b96e5..234be54b 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -453,7 +453,7 @@ extent_gdump_sub(tsdn_t *tsdn, const extent_t *extent) {
 }
 
 static bool
-extent_register(tsdn_t *tsdn, const extent_t *extent) {
+extent_register_impl(tsdn_t *tsdn, const extent_t *extent, bool gdump_add) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	rtree_elm_t *elm_a, *elm_b;
@@ -468,13 +468,23 @@ extent_register(tsdn_t *tsdn, const extent_t *extent) {
 	}
 	extent_rtree_release(tsdn, elm_a, elm_b);
 
-	if (config_prof) {
+	if (config_prof && gdump_add) {
 		extent_gdump_add(tsdn, extent);
 	}
 
 	return false;
 }
 
+static bool
+extent_register(tsdn_t *tsdn, const extent_t *extent) {
+	return extent_register_impl(tsdn, extent, true);
+}
+
+static bool
+extent_register_no_gdump_add(tsdn_t *tsdn, const extent_t *extent) {
+	return extent_register_impl(tsdn, extent, false);
+}
+
 static void
 extent_reregister(tsdn_t *tsdn, const extent_t *extent) {
 	bool err = extent_register(tsdn, extent);
@@ -827,17 +837,12 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	ptr = extent_alloc_core(tsdn, arena, new_addr, alloc_size, PAGE,
 	    &zeroed, &committed, arena->dss_prec);
 	extent_init(extent, arena, ptr, alloc_size, alloc_size,
-	    arena_extent_sn_next(arena), extent_state_retained, zeroed,
+	    arena_extent_sn_next(arena), extent_state_active, zeroed,
 	    committed, false);
-	if (ptr == NULL || extent_register(tsdn, extent)) {
+	if (ptr == NULL || extent_register_no_gdump_add(tsdn, extent)) {
 		extent_dalloc(tsdn, arena, extent);
 		return NULL;
 	}
-	/*
-	 * Set the extent as active *after registration so that no gdump-related
-	 * accounting occurs during registration.
-	 */
-	extent_state_set(extent, extent_state_active);
 
 	leadsize = ALIGNMENT_CEILING((uintptr_t)ptr, PAGE_CEILING(alignment)) -
 	    (uintptr_t)ptr;

From 5177995530557521d330486a3971469e1573d6fc Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 5 Feb 2017 23:59:53 -0800
Subject: [PATCH 0624/2608] Fix extent_record().

Read adjacent rtree elements while holding element locks, since the
extents mutex only protects against relevant like-state extent mutation.

Fix management of the 'coalesced' loop state variable to merge
forward/backward results, rather than overwriting the result of forward
coalescing if attempting to coalesce backward.  In practice this caused
no correctness issues, but could cause extra iterations in rare cases.

These regressions were introduced by
d27f29b468ae3e9d2b1da4a9880351d76e5a1662 (Disentangle arena and extent
locking.).
---
 src/extent.c | 51 +++++++++++++++++++++++++++++++++------------------
 1 file changed, 33 insertions(+), 18 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index 234be54b..4a83f694 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1035,12 +1035,9 @@ extent_can_coalesce(const extent_t *a, const extent_t *b) {
 }
 
 static bool
-extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b,
-    extents_t *extents) {
-	if (!extent_can_coalesce(a, b)) {
-		return true;
-	}
+extent_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
+    extent_t *a, extent_t *b, extents_t *extents) {
+	assert(extent_can_coalesce(a, b));
 	assert(extent_arena_get(a) == arena);
 	assert(extent_arena_get(b) == arena);
 
@@ -1062,7 +1059,6 @@ extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
 static void
 extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
     extents_t *extents, extent_t *extent) {
-	extent_t *prev, *next;
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
@@ -1090,21 +1086,40 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		coalesced = false;
 
 		/* Try to coalesce forward. */
-		next = rtree_read(tsdn, &extents_rtree, rtree_ctx,
-		    (uintptr_t)extent_past_get(extent), false);
-		if (next != NULL) {
-			coalesced = !extent_try_coalesce(tsdn, arena,
-			    r_extent_hooks, extent, next, extents);
+		rtree_elm_t *next_elm = rtree_elm_acquire(tsdn, &extents_rtree,
+		    rtree_ctx, (uintptr_t)extent_past_get(extent), false,
+		    false);
+		if (next_elm != NULL) {
+			extent_t *next = rtree_elm_read_acquired(tsdn,
+			    &extents_rtree, next_elm);
+			/*
+			 * extents->mtx only protects against races for
+			 * like-state extents, so call extent_can_coalesce()
+			 * before releasing the next_elm lock.
+			 */
+			bool can_coalesce = (next != NULL &&
+			    extent_can_coalesce(extent, next));
+			rtree_elm_release(tsdn, &extents_rtree, next_elm);
+			if (can_coalesce && !extent_coalesce(tsdn, arena,
+			    r_extent_hooks, extent, next, extents)) {
+				coalesced = true;
+			}
 		}
 
 		/* Try to coalesce backward. */
-		prev = rtree_read(tsdn, &extents_rtree, rtree_ctx,
-		    (uintptr_t)extent_before_get(extent), false);
-		if (prev != NULL) {
-			coalesced = !extent_try_coalesce(tsdn, arena,
-			    r_extent_hooks, prev, extent, extents);
-			if (coalesced) {
+		rtree_elm_t *prev_elm = rtree_elm_acquire(tsdn, &extents_rtree,
+		    rtree_ctx, (uintptr_t)extent_before_get(extent), false,
+		    false);
+		if (prev_elm != NULL) {
+			extent_t *prev = rtree_elm_read_acquired(tsdn,
+			    &extents_rtree, prev_elm);
+			bool can_coalesce = (prev != NULL &&
+			    extent_can_coalesce(prev, extent));
+			rtree_elm_release(tsdn, &extents_rtree, prev_elm);
+			if (can_coalesce && !extent_coalesce(tsdn, arena,
+			    r_extent_hooks, prev, extent, extents)) {
 				extent = prev;
+				coalesced = true;
 			}
 		}
 	} while (coalesced);

From 3bd6d8e41d524737845949c44bd4961b0930965c Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 6 Feb 2017 12:54:41 -0800
Subject: [PATCH 0625/2608] Conditianalize lg_tcache_max use on
 JEMALLOC_TCACHE.

---
 test/unit/decay.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/test/unit/decay.c b/test/unit/decay.c
index 4d172a54..fc8fabcf 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -1,6 +1,10 @@
 #include "test/jemalloc_test.h"
 
-const char *malloc_conf = "decay_time:1,lg_tcache_max:0";
+const char *malloc_conf = "decay_time:1"
+#ifdef JEMALLOC_TCACHE
+    ",lg_tcache_max:0"
+#endif
+    ;
 
 static nstime_monotonic_t *nstime_monotonic_orig;
 static nstime_update_t *nstime_update_orig;

From 0ecf692726f7d496e105be39772c4a1bfd74c660 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 3 Feb 2017 20:17:47 -0800
Subject: [PATCH 0626/2608] Optimize a branch out of rtree_read() if
 !dependent.

---
 include/jemalloc/internal/rtree_inlines.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/rtree_inlines.h b/include/jemalloc/internal/rtree_inlines.h
index 795a88f7..3316ea37 100644
--- a/include/jemalloc/internal/rtree_inlines.h
+++ b/include/jemalloc/internal/rtree_inlines.h
@@ -348,7 +348,7 @@ rtree_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
 	rtree_elm_t *elm;
 
 	elm = rtree_elm_lookup(tsdn, rtree, rtree_ctx, key, dependent, false);
-	if (elm == NULL) {
+	if (!dependent && elm == NULL) {
 		return NULL;
 	}
 

From 4a346f55939af4f200121cc4454089592d952f18 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 3 Feb 2017 20:21:56 -0800
Subject: [PATCH 0627/2608] Replace rtree path cache with LRU cache.

Rework rtree_ctx_t to encapsulate an rtree leaf LRU lookup cache rather
than a single-path element lookup cache.  The replacement is logically
much simpler, as well as slightly faster in the fast path case and less
prone to degraded performance during non-trivial sequences of lookups.
---
 include/jemalloc/internal/private_symbols.txt |   2 +-
 include/jemalloc/internal/rtree_inlines.h     | 182 ++++++++----------
 include/jemalloc/internal/rtree_structs.h     |  24 +--
 include/jemalloc/internal/rtree_types.h       |  22 ++-
 4 files changed, 107 insertions(+), 123 deletions(-)

diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index d1166b20..8c8653f0 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -419,7 +419,6 @@ rtree_child_read
 rtree_child_read_hard
 rtree_child_tryread
 rtree_clear
-rtree_ctx_start_level
 rtree_delete
 rtree_elm_acquire
 rtree_elm_lookup
@@ -431,6 +430,7 @@ rtree_elm_witness_acquire
 rtree_elm_witness_release
 rtree_elm_write
 rtree_elm_write_acquired
+rtree_leafkey
 rtree_new
 rtree_node_alloc
 rtree_node_dalloc
diff --git a/include/jemalloc/internal/rtree_inlines.h b/include/jemalloc/internal/rtree_inlines.h
index 3316ea37..88d6ee00 100644
--- a/include/jemalloc/internal/rtree_inlines.h
+++ b/include/jemalloc/internal/rtree_inlines.h
@@ -3,8 +3,7 @@
 
 #ifndef JEMALLOC_ENABLE_INLINE
 unsigned	rtree_start_level(const rtree_t *rtree, uintptr_t key);
-unsigned	rtree_ctx_start_level(const rtree_t *rtree,
-    const rtree_ctx_t *rtree_ctx, uintptr_t key);
+uintptr_t rtree_leafkey(rtree_t *rtree, uintptr_t key);
 uintptr_t	rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level);
 
 bool	rtree_node_valid(rtree_elm_t *node);
@@ -50,31 +49,24 @@ rtree_start_level(const rtree_t *rtree, uintptr_t key) {
 	return start_level;
 }
 
-JEMALLOC_ALWAYS_INLINE unsigned
-rtree_ctx_start_level(const rtree_t *rtree, const rtree_ctx_t *rtree_ctx,
-    uintptr_t key) {
-	unsigned start_level;
-	uintptr_t key_diff;
-
-	/* Compute the difference between old and new lookup keys. */
-	key_diff = key ^ rtree_ctx->key;
-	assert(key_diff != 0); /* Handled in rtree_elm_lookup(). */
-
-	/*
-	 * Compute the last traversal path element at which the keys' paths
-	 * are the same.
-	 */
-	start_level = rtree->start_level[(lg_floor(key_diff) + 1) >>
-	    LG_RTREE_BITS_PER_LEVEL];
-	assert(start_level < rtree->height);
-	return start_level;
+JEMALLOC_ALWAYS_INLINE uintptr_t
+rtree_leafkey(rtree_t *rtree, uintptr_t key) {
+	unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3);
+	unsigned cumbits = (rtree->levels[rtree->height-1].cumbits -
+	    rtree->levels[rtree->height-1].bits);
+	unsigned maskbits = ptrbits - cumbits;
+	uintptr_t mask = ~((ZU(1) << maskbits) - 1);
+	return (key & mask);
 }
 
 JEMALLOC_ALWAYS_INLINE uintptr_t
 rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level) {
-	return ((key >> ((ZU(1) << (LG_SIZEOF_PTR+3)) -
-	    rtree->levels[level].cumbits)) & ((ZU(1) <<
-	    rtree->levels[level].bits) - 1));
+	unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3);
+	unsigned cumbits = rtree->levels[level].cumbits;
+	unsigned shiftbits = ptrbits - cumbits;
+	unsigned maskbits = rtree->levels[level].bits;
+	unsigned mask = (ZU(1) << maskbits) - 1;
+	return ((key >> shiftbits) & mask);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -170,103 +162,89 @@ rtree_subtree_read(tsdn_t *tsdn, rtree_t *rtree, unsigned level,
 JEMALLOC_ALWAYS_INLINE rtree_elm_t *
 rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key, bool dependent, bool init_missing) {
-	uintptr_t subkey;
-	unsigned start_level;
-	rtree_elm_t *node;
-
 	assert(!dependent || !init_missing);
 
-	if (dependent || init_missing) {
-		if (likely(rtree_ctx->valid)) {
-			if (key == rtree_ctx->key) {
-				return rtree_ctx->elms[rtree->height];
-			} else {
-				unsigned no_ctx_start_level =
-				    rtree_start_level(rtree, key);
-				unsigned ctx_start_level;
-
-				if (likely(no_ctx_start_level <=
-				    rtree_ctx->start_level && (ctx_start_level =
-				    rtree_ctx_start_level(rtree, rtree_ctx,
-				    key)) >= rtree_ctx->start_level)) {
-					start_level = ctx_start_level;
-					node = rtree_ctx->elms[ctx_start_level];
-				} else {
-					start_level = no_ctx_start_level;
-					node = init_missing ?
-					    rtree_subtree_read(tsdn, rtree,
-					    no_ctx_start_level, dependent) :
-					    rtree_subtree_tryread(rtree,
-					    no_ctx_start_level, dependent);
-					rtree_ctx->start_level =
-					    no_ctx_start_level;
-					rtree_ctx->elms[no_ctx_start_level] =
-					    node;
-				}
-			}
-		} else {
-			unsigned no_ctx_start_level = rtree_start_level(rtree,
-			    key);
-
-			start_level = no_ctx_start_level;
-			node = init_missing ? rtree_subtree_read(tsdn, rtree,
-			    no_ctx_start_level, dependent) :
-			    rtree_subtree_tryread(rtree, no_ctx_start_level,
-			    dependent);
-			rtree_ctx->valid = true;
-			rtree_ctx->start_level = no_ctx_start_level;
-			rtree_ctx->elms[no_ctx_start_level] = node;
+	/* Search the cache. */
+	uintptr_t leafkey = rtree_leafkey(rtree, key);
+	if (likely(key != 0)) {
+#define RTREE_CACHE_CHECK(i) do {					\
+		if (likely(rtree_ctx->cache[i].leafkey == leafkey)) {	\
+			rtree_elm_t *leaf = rtree_ctx->cache[i].leaf;	\
+			if (likely(leaf != NULL)) {			\
+				/* Reorder. */				\
+				memmove(&rtree_ctx->cache[1],		\
+				    &rtree_ctx->cache[0],		\
+				    sizeof(rtree_ctx_cache_elm_t) * i);	\
+				rtree_ctx->cache[0].leafkey = leafkey;	\
+				rtree_ctx->cache[0].leaf = leaf;	\
+									\
+				uintptr_t subkey = rtree_subkey(rtree,	\
+				    key, rtree->height-1);		\
+				return &leaf[subkey];			\
+			}						\
+		}							\
+} while (0)
+		/* Check the MRU cache entry. */
+		RTREE_CACHE_CHECK(0);
+		/*
+		 * Search the remaining cache elements, and on success move the
+		 * matching element to the front.  Unroll the first iteration to
+		 * avoid calling memmove() (the compiler typically optimizes it
+		 * into raw moves).
+		 */
+		if (RTREE_CTX_NCACHE > 1) {
+			RTREE_CACHE_CHECK(1);
 		}
-		rtree_ctx->key = key;
-	} else {
-		start_level = rtree_start_level(rtree, key);
-		node = init_missing ? rtree_subtree_read(tsdn, rtree,
-		    start_level, dependent) : rtree_subtree_tryread(rtree,
-		    start_level, dependent);
+		for (unsigned i = 2; i < RTREE_CTX_NCACHE; i++) {
+			RTREE_CACHE_CHECK(i);
+		}
+#undef RTREE_CACHE_CHECK
 	}
 
+	unsigned start_level = rtree_start_level(rtree, key);
+	rtree_elm_t *node = init_missing ? rtree_subtree_read(tsdn, rtree,
+	    start_level, dependent) : rtree_subtree_tryread(rtree, start_level,
+	    dependent);
+
 #define RTREE_GET_BIAS	(RTREE_HEIGHT_MAX - rtree->height)
 	switch (start_level + RTREE_GET_BIAS) {
 #define RTREE_GET_SUBTREE(level)					\
-	case level:							\
+	case level: {							\
 		assert(level < (RTREE_HEIGHT_MAX-1));			\
 		if (!dependent && unlikely(!rtree_node_valid(node))) {	\
-			if (init_missing) {				\
-				rtree_ctx->valid = false;		\
-			}						\
 			return NULL;					\
 		}							\
-		subkey = rtree_subkey(rtree, key, level -		\
+		uintptr_t subkey = rtree_subkey(rtree, key, level -	\
 		    RTREE_GET_BIAS);					\
 		node = init_missing ? rtree_child_read(tsdn, rtree,	\
 		    &node[subkey], level - RTREE_GET_BIAS, dependent) :	\
 		    rtree_child_tryread(&node[subkey], dependent);	\
-		if (dependent || init_missing) {			\
-			rtree_ctx->elms[level - RTREE_GET_BIAS + 1] =	\
-			    node;					\
-		}							\
-		/* Fall through. */
+		/* Fall through. */					\
+	}
 #define RTREE_GET_LEAF(level)						\
-	case level:							\
+	case level: {							\
 		assert(level == (RTREE_HEIGHT_MAX-1));			\
 		if (!dependent && unlikely(!rtree_node_valid(node))) {	\
-			if (init_missing) {				\
-				rtree_ctx->valid = false;		\
-			}						\
 			return NULL;					\
 		}							\
-		subkey = rtree_subkey(rtree, key, level -		\
-		    RTREE_GET_BIAS);					\
 		/*							\
 		 * node is a leaf, so it contains values rather than	\
 		 * child pointers.					\
 		 */							\
-		node = &node[subkey];					\
-		if (dependent || init_missing) {			\
-			rtree_ctx->elms[level - RTREE_GET_BIAS + 1] =	\
-			    node;					\
+		if (likely(key != 0)) {					\
+			if (RTREE_CTX_NCACHE > 1) {			\
+				memmove(&rtree_ctx->cache[1],		\
+				    &rtree_ctx->cache[0],		\
+				    sizeof(rtree_ctx_cache_elm_t) *	\
+				    (RTREE_CTX_NCACHE-1));		\
+			}						\
+			rtree_ctx->cache[0].leafkey = leafkey;		\
+			rtree_ctx->cache[0].leaf = node;		\
 		}							\
-		return node;
+		uintptr_t subkey = rtree_subkey(rtree, key, level -	\
+		    RTREE_GET_BIAS);					\
+		return &node[subkey];					\
+	}
 #if RTREE_HEIGHT_MAX > 1
 	RTREE_GET_SUBTREE(0)
 #endif
@@ -365,16 +343,14 @@ rtree_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	if (!dependent && elm == NULL) {
 		return NULL;
 	}
-	{
-		extent_t *extent;
-		void *s;
 
-		do {
-			extent = rtree_elm_read(elm, false);
-			/* The least significant bit serves as a lock. */
-			s = (void *)((uintptr_t)extent | (uintptr_t)0x1);
-		} while (atomic_cas_p(&elm->pun, (void *)extent, s));
-	}
+	extent_t *extent;
+	void *s;
+	do {
+		extent = rtree_elm_read(elm, false);
+		/* The least significant bit serves as a lock. */
+		s = (void *)((uintptr_t)extent | (uintptr_t)0x1);
+	} while (atomic_cas_p(&elm->pun, (void *)extent, s));
 
 	if (config_debug) {
 		rtree_elm_witness_acquire(tsdn, rtree, key, elm);
diff --git a/include/jemalloc/internal/rtree_structs.h b/include/jemalloc/internal/rtree_structs.h
index 5a7a23c7..892156b1 100644
--- a/include/jemalloc/internal/rtree_structs.h
+++ b/include/jemalloc/internal/rtree_structs.h
@@ -54,22 +54,16 @@ struct rtree_level_s {
 	unsigned		cumbits;
 };
 
+struct rtree_ctx_cache_elm_s {
+	uintptr_t	leafkey;
+	rtree_elm_t	*leaf;
+};
+
 struct rtree_ctx_s {
-	/* If false, key/elms have not yet been initialized by a lookup. */
-	bool		valid;
-	/* Key that corresponds to the tree path recorded in elms. */
-	uintptr_t	key;
-	/* Memoized rtree_start_level(key). */
-	unsigned	start_level;
-	/*
-	 * A path through rtree, driven by key.  Only elements that could
-	 * actually be used for subsequent lookups are initialized, i.e. if
-	 * start_level = rtree_start_level(key) is non-zero, the first
-	 * start_level elements are uninitialized.  The last element contains a
-	 * pointer to the leaf node element that corresponds to key, so that
-	 * exact matches require no tree node offset computation.
-	 */
-	rtree_elm_t	*elms[RTREE_HEIGHT_MAX + 1];
+#ifndef _MSC_VER
+	JEMALLOC_ALIGNED(CACHELINE)
+#endif
+	rtree_ctx_cache_elm_t cache[RTREE_CTX_NCACHE];
 };
 
 struct rtree_s {
diff --git a/include/jemalloc/internal/rtree_types.h b/include/jemalloc/internal/rtree_types.h
index 122d5cef..b4ab018d 100644
--- a/include/jemalloc/internal/rtree_types.h
+++ b/include/jemalloc/internal/rtree_types.h
@@ -12,6 +12,7 @@ typedef struct rtree_elm_s rtree_elm_t;
 typedef struct rtree_elm_witness_s rtree_elm_witness_t;
 typedef struct rtree_elm_witness_tsd_s rtree_elm_witness_tsd_t;
 typedef struct rtree_level_s rtree_level_t;
+typedef struct rtree_ctx_cache_elm_s rtree_ctx_cache_elm_t;
 typedef struct rtree_ctx_s rtree_ctx_t;
 typedef struct rtree_s rtree_t;
 
@@ -25,11 +26,24 @@ typedef struct rtree_s rtree_t;
 #define RTREE_HEIGHT_MAX						\
     ((1U << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL)
 
+/*
+ * Number of leafkey/leaf pairs to cache.  Each entry supports an entire leaf,
+ * so the cache hit rate is typically high even with a small number of entries.
+ * In rare cases extent activity will straddle the boundary between two leaf
+ * nodes.  Furthermore, an arena may use a combination of dss and mmap.  Four
+ * entries covers both of these considerations as long as locality of reference
+ * is high, and/or total memory usage doesn't exceed the range supported by
+ * those entries.  Note that as memory usage grows past the amount that this
+ * cache can directly cover, the cache will become less effective if locality of
+ * reference is low, but the consequence is merely cache misses while traversing
+ * the tree nodes, and the cache will itself suffer cache misses if made overly
+ * large, not to mention the cost of linear search.
+ */
+#define RTREE_CTX_NCACHE 8
+
+/* Static initializer for rtree_ctx_t. */
 #define RTREE_CTX_INITIALIZER	{					\
-	false,								\
-	0,								\
-	0,								\
-	{NULL /* C initializes all trailing elements to NULL. */}	\
+	{{0, NULL} /* C initializes all trailing elements to NULL. */}	\
 }
 
 /*

From c511a44e99a2d92893d028854eabd2cd4b2c1fe1 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 3 Feb 2017 20:12:49 -0800
Subject: [PATCH 0628/2608] Split rtree_elm_lookup_hard() out of
 rtree_elm_lookup().

Anything but a hit in the first element of the lookup cache is
expensive enough to negate the benefits of inlining.
---
 include/jemalloc/internal/private_symbols.txt |   1 +
 include/jemalloc/internal/rtree_externs.h     |   2 +
 include/jemalloc/internal/rtree_inlines.h     | 104 +----------------
 src/rtree.c                                   | 105 ++++++++++++++++++
 4 files changed, 111 insertions(+), 101 deletions(-)

diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 8c8653f0..a60634ce 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -422,6 +422,7 @@ rtree_clear
 rtree_delete
 rtree_elm_acquire
 rtree_elm_lookup
+rtree_elm_lookup_hard
 rtree_elm_read
 rtree_elm_read_acquired
 rtree_elm_release
diff --git a/include/jemalloc/internal/rtree_externs.h b/include/jemalloc/internal/rtree_externs.h
index db8e8b12..7fc68fc9 100644
--- a/include/jemalloc/internal/rtree_externs.h
+++ b/include/jemalloc/internal/rtree_externs.h
@@ -13,6 +13,8 @@ rtree_elm_t	*rtree_subtree_read_hard(tsdn_t *tsdn, rtree_t *rtree,
     unsigned level);
 rtree_elm_t	*rtree_child_read_hard(tsdn_t *tsdn, rtree_t *rtree,
     rtree_elm_t *elm, unsigned level);
+rtree_elm_t *rtree_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, bool init_missing);
 void	rtree_elm_witness_acquire(tsdn_t *tsdn, const rtree_t *rtree,
     uintptr_t key, const rtree_elm_t *elm);
 void	rtree_elm_witness_access(tsdn_t *tsdn, const rtree_t *rtree,
diff --git a/include/jemalloc/internal/rtree_inlines.h b/include/jemalloc/internal/rtree_inlines.h
index 88d6ee00..372b7465 100644
--- a/include/jemalloc/internal/rtree_inlines.h
+++ b/include/jemalloc/internal/rtree_inlines.h
@@ -164,9 +164,8 @@ rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key, bool dependent, bool init_missing) {
 	assert(!dependent || !init_missing);
 
-	/* Search the cache. */
-	uintptr_t leafkey = rtree_leafkey(rtree, key);
 	if (likely(key != 0)) {
+		uintptr_t leafkey = rtree_leafkey(rtree, key);
 #define RTREE_CACHE_CHECK(i) do {					\
 		if (likely(rtree_ctx->cache[i].leafkey == leafkey)) {	\
 			rtree_elm_t *leaf = rtree_ctx->cache[i].leaf;	\
@@ -201,105 +200,8 @@ rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 #undef RTREE_CACHE_CHECK
 	}
 
-	unsigned start_level = rtree_start_level(rtree, key);
-	rtree_elm_t *node = init_missing ? rtree_subtree_read(tsdn, rtree,
-	    start_level, dependent) : rtree_subtree_tryread(rtree, start_level,
-	    dependent);
-
-#define RTREE_GET_BIAS	(RTREE_HEIGHT_MAX - rtree->height)
-	switch (start_level + RTREE_GET_BIAS) {
-#define RTREE_GET_SUBTREE(level)					\
-	case level: {							\
-		assert(level < (RTREE_HEIGHT_MAX-1));			\
-		if (!dependent && unlikely(!rtree_node_valid(node))) {	\
-			return NULL;					\
-		}							\
-		uintptr_t subkey = rtree_subkey(rtree, key, level -	\
-		    RTREE_GET_BIAS);					\
-		node = init_missing ? rtree_child_read(tsdn, rtree,	\
-		    &node[subkey], level - RTREE_GET_BIAS, dependent) :	\
-		    rtree_child_tryread(&node[subkey], dependent);	\
-		/* Fall through. */					\
-	}
-#define RTREE_GET_LEAF(level)						\
-	case level: {							\
-		assert(level == (RTREE_HEIGHT_MAX-1));			\
-		if (!dependent && unlikely(!rtree_node_valid(node))) {	\
-			return NULL;					\
-		}							\
-		/*							\
-		 * node is a leaf, so it contains values rather than	\
-		 * child pointers.					\
-		 */							\
-		if (likely(key != 0)) {					\
-			if (RTREE_CTX_NCACHE > 1) {			\
-				memmove(&rtree_ctx->cache[1],		\
-				    &rtree_ctx->cache[0],		\
-				    sizeof(rtree_ctx_cache_elm_t) *	\
-				    (RTREE_CTX_NCACHE-1));		\
-			}						\
-			rtree_ctx->cache[0].leafkey = leafkey;		\
-			rtree_ctx->cache[0].leaf = node;		\
-		}							\
-		uintptr_t subkey = rtree_subkey(rtree, key, level -	\
-		    RTREE_GET_BIAS);					\
-		return &node[subkey];					\
-	}
-#if RTREE_HEIGHT_MAX > 1
-	RTREE_GET_SUBTREE(0)
-#endif
-#if RTREE_HEIGHT_MAX > 2
-	RTREE_GET_SUBTREE(1)
-#endif
-#if RTREE_HEIGHT_MAX > 3
-	RTREE_GET_SUBTREE(2)
-#endif
-#if RTREE_HEIGHT_MAX > 4
-	RTREE_GET_SUBTREE(3)
-#endif
-#if RTREE_HEIGHT_MAX > 5
-	RTREE_GET_SUBTREE(4)
-#endif
-#if RTREE_HEIGHT_MAX > 6
-	RTREE_GET_SUBTREE(5)
-#endif
-#if RTREE_HEIGHT_MAX > 7
-	RTREE_GET_SUBTREE(6)
-#endif
-#if RTREE_HEIGHT_MAX > 8
-	RTREE_GET_SUBTREE(7)
-#endif
-#if RTREE_HEIGHT_MAX > 9
-	RTREE_GET_SUBTREE(8)
-#endif
-#if RTREE_HEIGHT_MAX > 10
-	RTREE_GET_SUBTREE(9)
-#endif
-#if RTREE_HEIGHT_MAX > 11
-	RTREE_GET_SUBTREE(10)
-#endif
-#if RTREE_HEIGHT_MAX > 12
-	RTREE_GET_SUBTREE(11)
-#endif
-#if RTREE_HEIGHT_MAX > 13
-	RTREE_GET_SUBTREE(12)
-#endif
-#if RTREE_HEIGHT_MAX > 14
-	RTREE_GET_SUBTREE(13)
-#endif
-#if RTREE_HEIGHT_MAX > 15
-	RTREE_GET_SUBTREE(14)
-#endif
-#if RTREE_HEIGHT_MAX > 16
-#  error Unsupported RTREE_HEIGHT_MAX
-#endif
-	RTREE_GET_LEAF(RTREE_HEIGHT_MAX-1)
-#undef RTREE_GET_SUBTREE
-#undef RTREE_GET_LEAF
-	default: not_reached();
-	}
-#undef RTREE_GET_BIAS
-	not_reached();
+	return rtree_elm_lookup_hard(tsdn, rtree, rtree_ctx, key, dependent,
+	    init_missing);
 }
 
 JEMALLOC_INLINE bool
diff --git a/src/rtree.c b/src/rtree.c
index d760816e..41bce5df 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -158,6 +158,111 @@ rtree_child_read_hard(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *elm,
 	return rtree_node_init(tsdn, rtree, level+1, &elm->child);
 }
 
+rtree_elm_t *
+rtree_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+    uintptr_t key, bool dependent, bool init_missing) {
+	unsigned start_level = rtree_start_level(rtree, key);
+	rtree_elm_t *node = init_missing ? rtree_subtree_read(tsdn, rtree,
+	    start_level, dependent) : rtree_subtree_tryread(rtree, start_level,
+	    dependent);
+
+#define RTREE_GET_BIAS	(RTREE_HEIGHT_MAX - rtree->height)
+	switch (start_level + RTREE_GET_BIAS) {
+#define RTREE_GET_SUBTREE(level)					\
+	case level: {							\
+		assert(level < (RTREE_HEIGHT_MAX-1));			\
+		if (!dependent && unlikely(!rtree_node_valid(node))) {	\
+			return NULL;					\
+		}							\
+		uintptr_t subkey = rtree_subkey(rtree, key, level -	\
+		    RTREE_GET_BIAS);					\
+		node = init_missing ? rtree_child_read(tsdn, rtree,	\
+		    &node[subkey], level - RTREE_GET_BIAS, dependent) :	\
+		    rtree_child_tryread(&node[subkey], dependent);	\
+		/* Fall through. */					\
+	}
+#define RTREE_GET_LEAF(level)						\
+	case level: {							\
+		assert(level == (RTREE_HEIGHT_MAX-1));			\
+		if (!dependent && unlikely(!rtree_node_valid(node))) {	\
+			return NULL;					\
+		}							\
+		/*							\
+		 * node is a leaf, so it contains values rather than	\
+		 * child pointers.					\
+		 */							\
+		if (likely(key != 0)) {					\
+			if (RTREE_CTX_NCACHE > 1) {			\
+				memmove(&rtree_ctx->cache[1],		\
+				    &rtree_ctx->cache[0],		\
+				    sizeof(rtree_ctx_cache_elm_t) *	\
+				    (RTREE_CTX_NCACHE-1));		\
+			}						\
+			uintptr_t leafkey = rtree_leafkey(rtree, key);	\
+			rtree_ctx->cache[0].leafkey = leafkey;		\
+			rtree_ctx->cache[0].leaf = node;		\
+		}							\
+		uintptr_t subkey = rtree_subkey(rtree, key, level -	\
+		    RTREE_GET_BIAS);					\
+		return &node[subkey];					\
+	}
+#if RTREE_HEIGHT_MAX > 1
+	RTREE_GET_SUBTREE(0)
+#endif
+#if RTREE_HEIGHT_MAX > 2
+	RTREE_GET_SUBTREE(1)
+#endif
+#if RTREE_HEIGHT_MAX > 3
+	RTREE_GET_SUBTREE(2)
+#endif
+#if RTREE_HEIGHT_MAX > 4
+	RTREE_GET_SUBTREE(3)
+#endif
+#if RTREE_HEIGHT_MAX > 5
+	RTREE_GET_SUBTREE(4)
+#endif
+#if RTREE_HEIGHT_MAX > 6
+	RTREE_GET_SUBTREE(5)
+#endif
+#if RTREE_HEIGHT_MAX > 7
+	RTREE_GET_SUBTREE(6)
+#endif
+#if RTREE_HEIGHT_MAX > 8
+	RTREE_GET_SUBTREE(7)
+#endif
+#if RTREE_HEIGHT_MAX > 9
+	RTREE_GET_SUBTREE(8)
+#endif
+#if RTREE_HEIGHT_MAX > 10
+	RTREE_GET_SUBTREE(9)
+#endif
+#if RTREE_HEIGHT_MAX > 11
+	RTREE_GET_SUBTREE(10)
+#endif
+#if RTREE_HEIGHT_MAX > 12
+	RTREE_GET_SUBTREE(11)
+#endif
+#if RTREE_HEIGHT_MAX > 13
+	RTREE_GET_SUBTREE(12)
+#endif
+#if RTREE_HEIGHT_MAX > 14
+	RTREE_GET_SUBTREE(13)
+#endif
+#if RTREE_HEIGHT_MAX > 15
+	RTREE_GET_SUBTREE(14)
+#endif
+#if RTREE_HEIGHT_MAX > 16
+#  error Unsupported RTREE_HEIGHT_MAX
+#endif
+	RTREE_GET_LEAF(RTREE_HEIGHT_MAX-1)
+#undef RTREE_GET_SUBTREE
+#undef RTREE_GET_LEAF
+	default: not_reached();
+	}
+#undef RTREE_GET_BIAS
+	not_reached();
+}
+
 static int
 rtree_elm_witness_comp(const witness_t *a, void *oa, const witness_t *b,
     void *ob) {

From cdc240d5019435fcb1a319fdcff6d4dc76b20143 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 3 Feb 2017 19:44:33 -0800
Subject: [PATCH 0629/2608] Make non-essential inline rtree functions static
 functions.

---
 include/jemalloc/internal/private_symbols.txt |   8 --
 include/jemalloc/internal/rtree_externs.h     |  12 +-
 include/jemalloc/internal/rtree_inlines.h     | 107 ++----------------
 src/rtree.c                                   |  77 +++++++++++--
 4 files changed, 85 insertions(+), 119 deletions(-)

diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index a60634ce..3f29d3fe 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -415,9 +415,6 @@ prof_thread_name_get
 prof_thread_name_set
 psz2ind
 psz2u
-rtree_child_read
-rtree_child_read_hard
-rtree_child_tryread
 rtree_clear
 rtree_delete
 rtree_elm_acquire
@@ -435,13 +432,8 @@ rtree_leafkey
 rtree_new
 rtree_node_alloc
 rtree_node_dalloc
-rtree_node_valid
 rtree_read
-rtree_start_level
 rtree_subkey
-rtree_subtree_read
-rtree_subtree_read_hard
-rtree_subtree_tryread
 rtree_write
 s2u
 s2u_compute
diff --git a/include/jemalloc/internal/rtree_externs.h b/include/jemalloc/internal/rtree_externs.h
index 7fc68fc9..f4f2feb5 100644
--- a/include/jemalloc/internal/rtree_externs.h
+++ b/include/jemalloc/internal/rtree_externs.h
@@ -7,19 +7,15 @@ typedef rtree_elm_t *(rtree_node_alloc_t)(tsdn_t *, rtree_t *, size_t);
 extern rtree_node_alloc_t *rtree_node_alloc;
 typedef void (rtree_node_dalloc_t)(tsdn_t *, rtree_t *, rtree_elm_t *);
 extern rtree_node_dalloc_t *rtree_node_dalloc;
-void	rtree_delete(tsdn_t *tsdn, rtree_t *rtree);
+void rtree_delete(tsdn_t *tsdn, rtree_t *rtree);
 #endif
-rtree_elm_t	*rtree_subtree_read_hard(tsdn_t *tsdn, rtree_t *rtree,
-    unsigned level);
-rtree_elm_t	*rtree_child_read_hard(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_elm_t *elm, unsigned level);
 rtree_elm_t *rtree_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree,
     rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, bool init_missing);
-void	rtree_elm_witness_acquire(tsdn_t *tsdn, const rtree_t *rtree,
+void rtree_elm_witness_acquire(tsdn_t *tsdn, const rtree_t *rtree,
     uintptr_t key, const rtree_elm_t *elm);
-void	rtree_elm_witness_access(tsdn_t *tsdn, const rtree_t *rtree,
+void rtree_elm_witness_access(tsdn_t *tsdn, const rtree_t *rtree,
     const rtree_elm_t *elm);
-void	rtree_elm_witness_release(tsdn_t *tsdn, const rtree_t *rtree,
+void rtree_elm_witness_release(tsdn_t *tsdn, const rtree_t *rtree,
     const rtree_elm_t *elm);
 
 #endif /* JEMALLOC_INTERNAL_RTREE_EXTERNS_H */
diff --git a/include/jemalloc/internal/rtree_inlines.h b/include/jemalloc/internal/rtree_inlines.h
index 372b7465..86aa8cd1 100644
--- a/include/jemalloc/internal/rtree_inlines.h
+++ b/include/jemalloc/internal/rtree_inlines.h
@@ -2,53 +2,28 @@
 #define JEMALLOC_INTERNAL_RTREE_INLINES_H
 
 #ifndef JEMALLOC_ENABLE_INLINE
-unsigned	rtree_start_level(const rtree_t *rtree, uintptr_t key);
 uintptr_t rtree_leafkey(rtree_t *rtree, uintptr_t key);
-uintptr_t	rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level);
-
-bool	rtree_node_valid(rtree_elm_t *node);
-rtree_elm_t	*rtree_child_tryread(rtree_elm_t *elm, bool dependent);
-rtree_elm_t	*rtree_child_read(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *elm,
-    unsigned level, bool dependent);
-extent_t	*rtree_elm_read(rtree_elm_t *elm, bool dependent);
-void	rtree_elm_write(rtree_elm_t *elm, const extent_t *extent);
-rtree_elm_t	*rtree_subtree_tryread(rtree_t *rtree, unsigned level,
-    bool dependent);
-rtree_elm_t	*rtree_subtree_read(tsdn_t *tsdn, rtree_t *rtree,
-    unsigned level, bool dependent);
-rtree_elm_t	*rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree,
+uintptr_t rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level);
+extent_t *rtree_elm_read(rtree_elm_t *elm, bool dependent);
+void rtree_elm_write(rtree_elm_t *elm, const extent_t *extent);
+rtree_elm_t *rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree,
     rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, bool init_missing);
-
-bool	rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+bool rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key, const extent_t *extent);
-extent_t	*rtree_read(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent);
-rtree_elm_t	*rtree_elm_acquire(tsdn_t *tsdn, rtree_t *rtree,
+extent_t *rtree_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+    uintptr_t key, bool dependent);
+rtree_elm_t *rtree_elm_acquire(tsdn_t *tsdn, rtree_t *rtree,
     rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, bool init_missing);
-extent_t	*rtree_elm_read_acquired(tsdn_t *tsdn, const rtree_t *rtree,
+extent_t *rtree_elm_read_acquired(tsdn_t *tsdn, const rtree_t *rtree,
     rtree_elm_t *elm);
-void	rtree_elm_write_acquired(tsdn_t *tsdn, const rtree_t *rtree,
+void rtree_elm_write_acquired(tsdn_t *tsdn, const rtree_t *rtree,
     rtree_elm_t *elm, const extent_t *extent);
-void	rtree_elm_release(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm);
-void	rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+void rtree_elm_release(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm);
+void rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_))
-JEMALLOC_ALWAYS_INLINE unsigned
-rtree_start_level(const rtree_t *rtree, uintptr_t key) {
-	unsigned start_level;
-
-	if (unlikely(key == 0)) {
-		return rtree->height - 1;
-	}
-
-	start_level = rtree->start_level[(lg_floor(key) + 1) >>
-	    LG_RTREE_BITS_PER_LEVEL];
-	assert(start_level < rtree->height);
-	return start_level;
-}
-
 JEMALLOC_ALWAYS_INLINE uintptr_t
 rtree_leafkey(rtree_t *rtree, uintptr_t key) {
 	unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3);
@@ -69,37 +44,6 @@ rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level) {
 	return ((key >> shiftbits) & mask);
 }
 
-JEMALLOC_ALWAYS_INLINE bool
-rtree_node_valid(rtree_elm_t *node) {
-	return ((uintptr_t)node != (uintptr_t)0);
-}
-
-JEMALLOC_ALWAYS_INLINE rtree_elm_t *
-rtree_child_tryread(rtree_elm_t *elm, bool dependent) {
-	rtree_elm_t *child;
-
-	/* Double-checked read (first read may be stale). */
-	child = elm->child;
-	if (!dependent && !rtree_node_valid(child)) {
-		child = (rtree_elm_t *)atomic_read_p(&elm->pun);
-	}
-	assert(!dependent || child != NULL);
-	return child;
-}
-
-JEMALLOC_ALWAYS_INLINE rtree_elm_t *
-rtree_child_read(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *elm, unsigned level,
-    bool dependent) {
-	rtree_elm_t *child;
-
-	child = rtree_child_tryread(elm, dependent);
-	if (!dependent && unlikely(!rtree_node_valid(child))) {
-		child = rtree_child_read_hard(tsdn, rtree, elm, level);
-	}
-	assert(!dependent || child != NULL);
-	return child;
-}
-
 JEMALLOC_ALWAYS_INLINE extent_t *
 rtree_elm_read(rtree_elm_t *elm, bool dependent) {
 	extent_t *extent;
@@ -132,33 +76,6 @@ rtree_elm_write(rtree_elm_t *elm, const extent_t *extent) {
 	atomic_write_p(&elm->pun, extent);
 }
 
-JEMALLOC_ALWAYS_INLINE rtree_elm_t *
-rtree_subtree_tryread(rtree_t *rtree, unsigned level, bool dependent) {
-	rtree_elm_t *subtree;
-
-	/* Double-checked read (first read may be stale). */
-	subtree = rtree->levels[level].subtree;
-	if (!dependent && unlikely(!rtree_node_valid(subtree))) {
-		subtree = (rtree_elm_t *)atomic_read_p(
-		    &rtree->levels[level].subtree_pun);
-	}
-	assert(!dependent || subtree != NULL);
-	return subtree;
-}
-
-JEMALLOC_ALWAYS_INLINE rtree_elm_t *
-rtree_subtree_read(tsdn_t *tsdn, rtree_t *rtree, unsigned level,
-    bool dependent) {
-	rtree_elm_t *subtree;
-
-	subtree = rtree_subtree_tryread(rtree, level, dependent);
-	if (!dependent && unlikely(!rtree_node_valid(subtree))) {
-		subtree = rtree_subtree_read_hard(tsdn, rtree, level);
-	}
-	assert(!dependent || subtree != NULL);
-	return subtree;
-}
-
 JEMALLOC_ALWAYS_INLINE rtree_elm_t *
 rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key, bool dependent, bool init_missing) {
diff --git a/src/rtree.c b/src/rtree.c
index 41bce5df..3347340b 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -146,16 +146,77 @@ rtree_node_init(tsdn_t *tsdn, rtree_t *rtree, unsigned level,
 	return node;
 }
 
-rtree_elm_t *
-rtree_subtree_read_hard(tsdn_t *tsdn, rtree_t *rtree, unsigned level) {
-	return rtree_node_init(tsdn, rtree, level,
-	    &rtree->levels[level].subtree);
+static unsigned
+rtree_start_level(const rtree_t *rtree, uintptr_t key) {
+	unsigned start_level;
+
+	if (unlikely(key == 0)) {
+		return rtree->height - 1;
+	}
+
+	start_level = rtree->start_level[(lg_floor(key) + 1) >>
+	    LG_RTREE_BITS_PER_LEVEL];
+	assert(start_level < rtree->height);
+	return start_level;
 }
 
-rtree_elm_t *
-rtree_child_read_hard(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *elm,
-    unsigned level) {
-	return rtree_node_init(tsdn, rtree, level+1, &elm->child);
+static bool
+rtree_node_valid(rtree_elm_t *node) {
+	return ((uintptr_t)node != (uintptr_t)0);
+}
+
+static rtree_elm_t *
+rtree_child_tryread(rtree_elm_t *elm, bool dependent) {
+	rtree_elm_t *child;
+
+	/* Double-checked read (first read may be stale). */
+	child = elm->child;
+	if (!dependent && !rtree_node_valid(child)) {
+		child = (rtree_elm_t *)atomic_read_p(&elm->pun);
+	}
+	assert(!dependent || child != NULL);
+	return child;
+}
+
+static rtree_elm_t *
+rtree_child_read(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *elm, unsigned level,
+    bool dependent) {
+	rtree_elm_t *child;
+
+	child = rtree_child_tryread(elm, dependent);
+	if (!dependent && unlikely(!rtree_node_valid(child))) {
+		child = rtree_node_init(tsdn, rtree, level+1, &elm->child);
+	}
+	assert(!dependent || child != NULL);
+	return child;
+}
+
+static rtree_elm_t *
+rtree_subtree_tryread(rtree_t *rtree, unsigned level, bool dependent) {
+	rtree_elm_t *subtree;
+
+	/* Double-checked read (first read may be stale). */
+	subtree = rtree->levels[level].subtree;
+	if (!dependent && unlikely(!rtree_node_valid(subtree))) {
+		subtree = (rtree_elm_t *)atomic_read_p(
+		    &rtree->levels[level].subtree_pun);
+	}
+	assert(!dependent || subtree != NULL);
+	return subtree;
+}
+
+static rtree_elm_t *
+rtree_subtree_read(tsdn_t *tsdn, rtree_t *rtree, unsigned level,
+    bool dependent) {
+	rtree_elm_t *subtree;
+
+	subtree = rtree_subtree_tryread(rtree, level, dependent);
+	if (!dependent && unlikely(!rtree_node_valid(subtree))) {
+		subtree = rtree_node_init(tsdn, rtree, level,
+		    &rtree->levels[level].subtree);
+	}
+	assert(!dependent || subtree != NULL);
+	return subtree;
 }
 
 rtree_elm_t *

From ff4db5014e78a3f80e5983dc2313421e7978c792 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 4 Feb 2017 23:08:48 -0800
Subject: [PATCH 0630/2608] Remove rtree leading 0 bit optimization.

A subsequent change instead ignores insignificant high bits.
---
 include/jemalloc/internal/rtree_structs.h | 35 ++----------
 src/rtree.c                               | 65 +++++------------------
 2 files changed, 16 insertions(+), 84 deletions(-)

diff --git a/include/jemalloc/internal/rtree_structs.h b/include/jemalloc/internal/rtree_structs.h
index 892156b1..713d3000 100644
--- a/include/jemalloc/internal/rtree_structs.h
+++ b/include/jemalloc/internal/rtree_structs.h
@@ -19,32 +19,6 @@ struct rtree_elm_witness_tsd_s {
 };
 
 struct rtree_level_s {
-	/*
-	 * A non-NULL subtree points to a subtree rooted along the hypothetical
-	 * path to the leaf node corresponding to key 0.  Depending on what keys
-	 * have been used to store to the tree, an arbitrary combination of
-	 * subtree pointers may remain NULL.
-	 *
-	 * Suppose keys comprise 48 bits, and LG_RTREE_BITS_PER_LEVEL is 4.
-	 * This results in a 3-level tree, and the leftmost leaf can be directly
-	 * accessed via levels[2], the subtree prefixed by 0x0000 (excluding
-	 * 0x00000000) can be accessed via levels[1], and the remainder of the
-	 * tree can be accessed via levels[0].
-	 *
-	 *   levels[0] : [<unused> | 0x0001******** | 0x0002******** | ...]
-	 *
-	 *   levels[1] : [<unused> | 0x00000001**** | 0x00000002**** | ... ]
-	 *
-	 *   levels[2] : [extent(0x000000000000) | extent(0x000000000001) | ...]
-	 *
-	 * This has practical implications on x64, which currently uses only the
-	 * lower 47 bits of virtual address space in userland, thus leaving
-	 * levels[0] unused and avoiding a level of tree traversal.
-	 */
-	union {
-		void		*subtree_pun;
-		rtree_elm_t	*subtree;
-	};
 	/* Number of key bits distinguished by this level. */
 	unsigned		bits;
 	/*
@@ -68,11 +42,10 @@ struct rtree_ctx_s {
 
 struct rtree_s {
 	unsigned		height;
-	/*
-	 * Precomputed table used to convert from the number of leading 0 key
-	 * bits to which subtree level to start at.
-	 */
-	unsigned		start_level[RTREE_HEIGHT_MAX + 1];
+	union {
+		void		*root_pun;
+		rtree_elm_t	*root;
+	};
 	rtree_level_t		levels[RTREE_HEIGHT_MAX];
 	malloc_mutex_t		init_lock;
 };
diff --git a/src/rtree.c b/src/rtree.c
index 3347340b..fb52cf68 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -1,11 +1,6 @@
 #define JEMALLOC_RTREE_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 
-static unsigned
-hmin(unsigned ha, unsigned hb) {
-	return (ha < hb ? ha : hb);
-}
-
 /*
  * Only the most significant bits of keys passed to rtree_{read,write}() are
  * used.
@@ -32,32 +27,24 @@ rtree_new(rtree_t *rtree, unsigned bits) {
 
 	rtree->height = height;
 
+	rtree->root_pun = NULL;
+
 	/* Root level. */
-	rtree->levels[0].subtree = NULL;
 	rtree->levels[0].bits = (height > 1) ? RTREE_BITS_PER_LEVEL :
 	    bits_in_leaf;
 	rtree->levels[0].cumbits = rtree->levels[0].bits;
 	/* Interior levels. */
 	for (i = 1; i < height-1; i++) {
-		rtree->levels[i].subtree = NULL;
 		rtree->levels[i].bits = RTREE_BITS_PER_LEVEL;
 		rtree->levels[i].cumbits = rtree->levels[i-1].cumbits +
 		    RTREE_BITS_PER_LEVEL;
 	}
 	/* Leaf level. */
 	if (height > 1) {
-		rtree->levels[height-1].subtree = NULL;
 		rtree->levels[height-1].bits = bits_in_leaf;
 		rtree->levels[height-1].cumbits = bits;
 	}
 
-	/* Compute lookup table to be used by rtree_[ctx_]start_level(). */
-	for (i = 0; i < RTREE_HEIGHT_MAX; i++) {
-		rtree->start_level[i] = hmin(RTREE_HEIGHT_MAX - 1 - i, height -
-		    1);
-	}
-	rtree->start_level[RTREE_HEIGHT_MAX] = 0;
-
 	malloc_mutex_init(&rtree->init_lock, "rtree", WITNESS_RANK_RTREE);
 
 	return false;
@@ -114,13 +101,8 @@ rtree_delete_subtree(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *node,
 
 void
 rtree_delete(tsdn_t *tsdn, rtree_t *rtree) {
-	unsigned i;
-
-	for (i = 0; i < rtree->height; i++) {
-		rtree_elm_t *subtree = rtree->levels[i].subtree;
-		if (subtree != NULL) {
-			rtree_delete_subtree(tsdn, rtree, subtree, i);
-		}
+	if (rtree->root_pun != NULL) {
+		rtree_delete_subtree(tsdn, rtree, rtree->root, 0);
 	}
 }
 #endif
@@ -146,20 +128,6 @@ rtree_node_init(tsdn_t *tsdn, rtree_t *rtree, unsigned level,
 	return node;
 }
 
-static unsigned
-rtree_start_level(const rtree_t *rtree, uintptr_t key) {
-	unsigned start_level;
-
-	if (unlikely(key == 0)) {
-		return rtree->height - 1;
-	}
-
-	start_level = rtree->start_level[(lg_floor(key) + 1) >>
-	    LG_RTREE_BITS_PER_LEVEL];
-	assert(start_level < rtree->height);
-	return start_level;
-}
-
 static bool
 rtree_node_valid(rtree_elm_t *node) {
 	return ((uintptr_t)node != (uintptr_t)0);
@@ -192,28 +160,21 @@ rtree_child_read(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *elm, unsigned level,
 }
 
 static rtree_elm_t *
-rtree_subtree_tryread(rtree_t *rtree, unsigned level, bool dependent) {
-	rtree_elm_t *subtree;
-
+rtree_subtree_tryread(rtree_t *rtree, bool dependent) {
 	/* Double-checked read (first read may be stale). */
-	subtree = rtree->levels[level].subtree;
+	rtree_elm_t *subtree = rtree->root;
 	if (!dependent && unlikely(!rtree_node_valid(subtree))) {
-		subtree = (rtree_elm_t *)atomic_read_p(
-		    &rtree->levels[level].subtree_pun);
+		subtree = (rtree_elm_t *)atomic_read_p(&rtree->root_pun);
 	}
 	assert(!dependent || subtree != NULL);
 	return subtree;
 }
 
 static rtree_elm_t *
-rtree_subtree_read(tsdn_t *tsdn, rtree_t *rtree, unsigned level,
-    bool dependent) {
-	rtree_elm_t *subtree;
-
-	subtree = rtree_subtree_tryread(rtree, level, dependent);
+rtree_subtree_read(tsdn_t *tsdn, rtree_t *rtree, bool dependent) {
+	rtree_elm_t *subtree = rtree_subtree_tryread(rtree, dependent);
 	if (!dependent && unlikely(!rtree_node_valid(subtree))) {
-		subtree = rtree_node_init(tsdn, rtree, level,
-		    &rtree->levels[level].subtree);
+		subtree = rtree_node_init(tsdn, rtree, 0, &rtree->root);
 	}
 	assert(!dependent || subtree != NULL);
 	return subtree;
@@ -222,13 +183,11 @@ rtree_subtree_read(tsdn_t *tsdn, rtree_t *rtree, unsigned level,
 rtree_elm_t *
 rtree_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key, bool dependent, bool init_missing) {
-	unsigned start_level = rtree_start_level(rtree, key);
 	rtree_elm_t *node = init_missing ? rtree_subtree_read(tsdn, rtree,
-	    start_level, dependent) : rtree_subtree_tryread(rtree, start_level,
-	    dependent);
+	    dependent) : rtree_subtree_tryread(rtree, dependent);
 
 #define RTREE_GET_BIAS	(RTREE_HEIGHT_MAX - rtree->height)
-	switch (start_level + RTREE_GET_BIAS) {
+	switch (RTREE_GET_BIAS) {
 #define RTREE_GET_SUBTREE(level)					\
 	case level: {							\
 		assert(level < (RTREE_HEIGHT_MAX-1));			\

From f5cf9b19c85f88ee91b3caf65e2a6d70f4548f31 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 6 Feb 2017 13:17:12 -0800
Subject: [PATCH 0631/2608] Determine rtree levels at compile time.

Rather than dynamically building a table to aid per level computations,
define a constant table at compile time.  Omit both high and low
insignificant bits.  Use one to three tree levels, depending on the
number of significant bits.
---
 configure.ac                                  |  68 +++++
 .../internal/jemalloc_internal_defs.h.in      |   7 +
 include/jemalloc/internal/rtree_externs.h     |  24 +-
 include/jemalloc/internal/rtree_inlines.h     |  22 +-
 include/jemalloc/internal/rtree_structs.h     |   2 -
 include/jemalloc/internal/rtree_types.h       |  17 +-
 src/extent.c                                  |   3 +-
 src/rtree.c                                   | 136 ++--------
 test/unit/rtree.c                             | 233 ++++++++----------
 9 files changed, 244 insertions(+), 268 deletions(-)

diff --git a/configure.ac b/configure.ac
index 7530eff7..e71edd72 100644
--- a/configure.ac
+++ b/configure.ac
@@ -406,6 +406,74 @@ case "${host_cpu}" in
 esac
 AC_DEFINE_UNQUOTED([CPU_SPINWAIT], [$CPU_SPINWAIT])
 
+case "${host_cpu}" in
+  aarch64)
+    AC_MSG_CHECKING([number of significant virtual address bits])
+    LG_VADDR=48
+    AC_MSG_RESULT([$LG_VADDR])
+    ;;
+  x86_64)
+    AC_CACHE_CHECK([number of significant virtual address bits],
+                   [je_cv_lg_vaddr],
+                   AC_RUN_IFELSE([AC_LANG_PROGRAM(
+[[
+#include <stdio.h>
+#ifdef _WIN32
+#include <limits.h>
+#include <intrin.h>
+typedef unsigned __int32 uint32_t;
+#else
+#include <stdint.h>
+#endif
+]], [[
+	uint32_t r[[4]];
+	uint32_t eax_in = 0x80000008U;
+#ifdef _WIN32
+	__cpuid((int *)r, (int)eax_in);
+#else
+	asm volatile ("cpuid"
+	    : "=a" (r[[0]]), "=b" (r[[1]]), "=c" (r[[2]]), "=d" (r[[3]])
+	    : "a" (eax_in), "c" (0)
+	);
+#endif
+	uint32_t eax_out = r[[0]];
+	uint32_t vaddr = ((eax_out & 0x0000ff00U) >> 8);
+	FILE *f = fopen("conftest.out", "w");
+	if (f == NULL) {
+		return 1;
+	}
+	fprintf(f, "%u", vaddr);
+	fclose(f);
+	return 0;
+]])],
+                   [je_cv_lg_vaddr=`cat conftest.out`],
+                   [je_cv_lg_vaddr=error],
+                   [je_cv_lg_vaddr=57]))
+    if test "x${je_cv_lg_vaddr}" != "x" ; then
+      LG_VADDR="${je_cv_lg_vaddr}"
+    fi
+    if test "x${LG_VADDR}" != "xerror" ; then
+      AC_DEFINE_UNQUOTED([LG_VADDR], [$LG_VADDR])
+    else
+      AC_MSG_ERROR([cannot determine number of significant virtual address bits])
+    fi
+    ;;
+  *)
+    AC_MSG_CHECKING([number of significant virtual address bits])
+    if test "x${LG_SIZEOF_PTR}" = "x3" ; then
+      LG_VADDR=64
+    elif test "x${LG_SIZEOF_PTR}" = "x2" ; then
+      LG_VADDR=32
+    elif test "x${LG_SIZEOF_PTR}" = "xLG_SIZEOF_PTR_WIN" ; then
+      LG_VADDR="(1U << (LG_SIZEOF_PTR_WIN+3))"
+    else
+      AC_MSG_ERROR([Unsupported lg(pointer size): ${LG_SIZEOF_PTR}])
+    fi
+    AC_MSG_RESULT([$LG_VADDR])
+    ;;
+esac
+AC_DEFINE_UNQUOTED([LG_VADDR], [$LG_VADDR])
+
 LD_PRELOAD_VAR="LD_PRELOAD"
 so="so"
 importlib="${so}"
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 396a1a27..6c70e167 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -22,6 +22,13 @@
  */
 #undef CPU_SPINWAIT
 
+/*
+ * Number of significant bits in virtual addresses.  This may be less than the
+ * total number of bits in a pointer, e.g. on x64, for which the uppermost 16
+ * bits are the same as bit 47.
+ */
+#undef LG_VADDR
+
 /* Defined if C11 atomics are available. */
 #undef JEMALLOC_C11ATOMICS
 
diff --git a/include/jemalloc/internal/rtree_externs.h b/include/jemalloc/internal/rtree_externs.h
index f4f2feb5..fa53580a 100644
--- a/include/jemalloc/internal/rtree_externs.h
+++ b/include/jemalloc/internal/rtree_externs.h
@@ -1,7 +1,29 @@
 #ifndef JEMALLOC_INTERNAL_RTREE_EXTERNS_H
 #define JEMALLOC_INTERNAL_RTREE_EXTERNS_H
 
-bool rtree_new(rtree_t *rtree, unsigned bits);
+/*
+ * Split the bits into one to three partitions depending on number of
+ * significant bits.  It the number of bits does not divide evenly into the
+ * number of levels, place one remainder bit per level starting at the leaf
+ * level.
+ */
+static const rtree_level_t rtree_levels[] = {
+#if RTREE_NSB <= 10
+	{RTREE_NSB, RTREE_NHIB + RTREE_NSB}
+#elif RTREE_NSB <= 36
+	{RTREE_NSB/2, RTREE_NHIB + RTREE_NSB/2},
+	{RTREE_NSB/2 + RTREE_NSB%2, RTREE_NHIB + RTREE_NSB}
+#elif RTREE_NSB <= 52
+	{RTREE_NSB/3, RTREE_NHIB + RTREE_NSB/3},
+	{RTREE_NSB/3 + RTREE_NSB%3/2,
+	    RTREE_NHIB + RTREE_NSB/3*2 + RTREE_NSB%3/2},
+	{RTREE_NSB/3 + RTREE_NSB%3 - RTREE_NSB%3/2, RTREE_NHIB + RTREE_NSB}
+#else
+#  error Unsupported number of significant virtual address bits
+#endif
+};
+
+bool rtree_new(rtree_t *rtree);
 #ifdef JEMALLOC_JET
 typedef rtree_elm_t *(rtree_node_alloc_t)(tsdn_t *, rtree_t *, size_t);
 extern rtree_node_alloc_t *rtree_node_alloc;
diff --git a/include/jemalloc/internal/rtree_inlines.h b/include/jemalloc/internal/rtree_inlines.h
index 86aa8cd1..4b848541 100644
--- a/include/jemalloc/internal/rtree_inlines.h
+++ b/include/jemalloc/internal/rtree_inlines.h
@@ -2,8 +2,8 @@
 #define JEMALLOC_INTERNAL_RTREE_INLINES_H
 
 #ifndef JEMALLOC_ENABLE_INLINE
-uintptr_t rtree_leafkey(rtree_t *rtree, uintptr_t key);
-uintptr_t rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level);
+uintptr_t rtree_leafkey(uintptr_t key);
+uintptr_t rtree_subkey(uintptr_t key, unsigned level);
 extent_t *rtree_elm_read(rtree_elm_t *elm, bool dependent);
 void rtree_elm_write(rtree_elm_t *elm, const extent_t *extent);
 rtree_elm_t *rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree,
@@ -25,21 +25,21 @@ void rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_))
 JEMALLOC_ALWAYS_INLINE uintptr_t
-rtree_leafkey(rtree_t *rtree, uintptr_t key) {
+rtree_leafkey(uintptr_t key) {
 	unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3);
-	unsigned cumbits = (rtree->levels[rtree->height-1].cumbits -
-	    rtree->levels[rtree->height-1].bits);
+	unsigned cumbits = (rtree_levels[RTREE_HEIGHT-1].cumbits -
+	    rtree_levels[RTREE_HEIGHT-1].bits);
 	unsigned maskbits = ptrbits - cumbits;
 	uintptr_t mask = ~((ZU(1) << maskbits) - 1);
 	return (key & mask);
 }
 
 JEMALLOC_ALWAYS_INLINE uintptr_t
-rtree_subkey(rtree_t *rtree, uintptr_t key, unsigned level) {
+rtree_subkey(uintptr_t key, unsigned level) {
 	unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3);
-	unsigned cumbits = rtree->levels[level].cumbits;
+	unsigned cumbits = rtree_levels[level].cumbits;
 	unsigned shiftbits = ptrbits - cumbits;
-	unsigned maskbits = rtree->levels[level].bits;
+	unsigned maskbits = rtree_levels[level].bits;
 	unsigned mask = (ZU(1) << maskbits) - 1;
 	return ((key >> shiftbits) & mask);
 }
@@ -82,7 +82,7 @@ rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	assert(!dependent || !init_missing);
 
 	if (likely(key != 0)) {
-		uintptr_t leafkey = rtree_leafkey(rtree, key);
+		uintptr_t leafkey = rtree_leafkey(key);
 #define RTREE_CACHE_CHECK(i) do {					\
 		if (likely(rtree_ctx->cache[i].leafkey == leafkey)) {	\
 			rtree_elm_t *leaf = rtree_ctx->cache[i].leaf;	\
@@ -94,8 +94,8 @@ rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 				rtree_ctx->cache[0].leafkey = leafkey;	\
 				rtree_ctx->cache[0].leaf = leaf;	\
 									\
-				uintptr_t subkey = rtree_subkey(rtree,	\
-				    key, rtree->height-1);		\
+				uintptr_t subkey = rtree_subkey(key,	\
+				    RTREE_HEIGHT-1);			\
 				return &leaf[subkey];			\
 			}						\
 		}							\
diff --git a/include/jemalloc/internal/rtree_structs.h b/include/jemalloc/internal/rtree_structs.h
index 713d3000..312171e3 100644
--- a/include/jemalloc/internal/rtree_structs.h
+++ b/include/jemalloc/internal/rtree_structs.h
@@ -41,12 +41,10 @@ struct rtree_ctx_s {
 };
 
 struct rtree_s {
-	unsigned		height;
 	union {
 		void		*root_pun;
 		rtree_elm_t	*root;
 	};
-	rtree_level_t		levels[RTREE_HEIGHT_MAX];
 	malloc_mutex_t		init_lock;
 };
 
diff --git a/include/jemalloc/internal/rtree_types.h b/include/jemalloc/internal/rtree_types.h
index b4ab018d..a654698b 100644
--- a/include/jemalloc/internal/rtree_types.h
+++ b/include/jemalloc/internal/rtree_types.h
@@ -16,15 +16,14 @@ typedef struct rtree_ctx_cache_elm_s rtree_ctx_cache_elm_t;
 typedef struct rtree_ctx_s rtree_ctx_t;
 typedef struct rtree_s rtree_t;
 
-/*
- * RTREE_BITS_PER_LEVEL must be a power of two that is no larger than the
- * machine address width.
- */
-#define LG_RTREE_BITS_PER_LEVEL	4
-#define RTREE_BITS_PER_LEVEL	(1U << LG_RTREE_BITS_PER_LEVEL)
-/* Maximum rtree height. */
-#define RTREE_HEIGHT_MAX						\
-    ((1U << (LG_SIZEOF_PTR+3)) / RTREE_BITS_PER_LEVEL)
+/* Number of high insignificant bits. */
+#define RTREE_NHIB ((1U << (LG_SIZEOF_PTR+3)) - LG_VADDR)
+/* Number of low insigificant bits. */
+#define RTREE_NLIB LG_PAGE
+/* Number of significant bits. */
+#define RTREE_NSB (LG_VADDR - RTREE_NLIB)
+/* Number of levels in radix tree. */
+#define RTREE_HEIGHT (sizeof(rtree_levels)/sizeof(rtree_level_t))
 
 /*
  * Number of leafkey/leaf pairs to cache.  Each entry supports an entire leaf,
diff --git a/src/extent.c b/src/extent.c
index 4a83f694..85c92d0f 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1522,8 +1522,7 @@ extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
 
 bool
 extent_boot(void) {
-	if (rtree_new(&extents_rtree, (unsigned)((ZU(1) << (LG_SIZEOF_PTR+3)) -
-	    LG_PAGE))) {
+	if (rtree_new(&extents_rtree)) {
 		return true;
 	}
 
diff --git a/src/rtree.c b/src/rtree.c
index fb52cf68..83929ba6 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -6,46 +6,11 @@
  * used.
  */
 bool
-rtree_new(rtree_t *rtree, unsigned bits) {
-	unsigned bits_in_leaf, height, i;
-
-	assert(RTREE_HEIGHT_MAX == ((ZU(1) << (LG_SIZEOF_PTR+3)) /
-	    RTREE_BITS_PER_LEVEL));
-	assert(bits > 0 && bits <= (sizeof(uintptr_t) << 3));
-
-	bits_in_leaf = (bits % RTREE_BITS_PER_LEVEL) == 0 ? RTREE_BITS_PER_LEVEL
-	    : (bits % RTREE_BITS_PER_LEVEL);
-	if (bits > bits_in_leaf) {
-		height = 1 + (bits - bits_in_leaf) / RTREE_BITS_PER_LEVEL;
-		if ((height-1) * RTREE_BITS_PER_LEVEL + bits_in_leaf != bits) {
-			height++;
-		}
-	} else {
-		height = 1;
-	}
-	assert((height-1) * RTREE_BITS_PER_LEVEL + bits_in_leaf == bits);
-
-	rtree->height = height;
-
+rtree_new(rtree_t *rtree) {
 	rtree->root_pun = NULL;
-
-	/* Root level. */
-	rtree->levels[0].bits = (height > 1) ? RTREE_BITS_PER_LEVEL :
-	    bits_in_leaf;
-	rtree->levels[0].cumbits = rtree->levels[0].bits;
-	/* Interior levels. */
-	for (i = 1; i < height-1; i++) {
-		rtree->levels[i].bits = RTREE_BITS_PER_LEVEL;
-		rtree->levels[i].cumbits = rtree->levels[i-1].cumbits +
-		    RTREE_BITS_PER_LEVEL;
+	if (malloc_mutex_init(&rtree->init_lock, "rtree", WITNESS_RANK_RTREE)) {
+		return true;
 	}
-	/* Leaf level. */
-	if (height > 1) {
-		rtree->levels[height-1].bits = bits_in_leaf;
-		rtree->levels[height-1].cumbits = bits;
-	}
-
-	malloc_mutex_init(&rtree->init_lock, "rtree", WITNESS_RANK_RTREE);
 
 	return false;
 }
@@ -84,10 +49,10 @@ rtree_node_dalloc_t *rtree_node_dalloc = JEMALLOC_N(rtree_node_dalloc_impl);
 static void
 rtree_delete_subtree(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *node,
     unsigned level) {
-	if (level + 1 < rtree->height) {
+	if (level + 1 < RTREE_HEIGHT) {
 		size_t nchildren, i;
 
-		nchildren = ZU(1) << rtree->levels[level].bits;
+		nchildren = ZU(1) << rtree_levels[level].bits;
 		for (i = 0; i < nchildren; i++) {
 			rtree_elm_t *child = node[i].child;
 			if (child != NULL) {
@@ -116,7 +81,7 @@ rtree_node_init(tsdn_t *tsdn, rtree_t *rtree, unsigned level,
 	node = atomic_read_p((void**)elmp);
 	if (node == NULL) {
 		node = rtree_node_alloc(tsdn, rtree, ZU(1) <<
-		    rtree->levels[level].bits);
+		    rtree_levels[level].bits);
 		if (node == NULL) {
 			malloc_mutex_unlock(tsdn, &rtree->init_lock);
 			return NULL;
@@ -186,24 +151,18 @@ rtree_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	rtree_elm_t *node = init_missing ? rtree_subtree_read(tsdn, rtree,
 	    dependent) : rtree_subtree_tryread(rtree, dependent);
 
-#define RTREE_GET_BIAS	(RTREE_HEIGHT_MAX - rtree->height)
-	switch (RTREE_GET_BIAS) {
-#define RTREE_GET_SUBTREE(level)					\
-	case level: {							\
-		assert(level < (RTREE_HEIGHT_MAX-1));			\
+#define RTREE_GET_SUBTREE(level) {					\
+		assert(level < RTREE_HEIGHT-1);				\
 		if (!dependent && unlikely(!rtree_node_valid(node))) {	\
 			return NULL;					\
 		}							\
-		uintptr_t subkey = rtree_subkey(rtree, key, level -	\
-		    RTREE_GET_BIAS);					\
+		uintptr_t subkey = rtree_subkey(key, level);		\
 		node = init_missing ? rtree_child_read(tsdn, rtree,	\
-		    &node[subkey], level - RTREE_GET_BIAS, dependent) :	\
+		    &node[subkey], level, dependent) :			\
 		    rtree_child_tryread(&node[subkey], dependent);	\
-		/* Fall through. */					\
 	}
-#define RTREE_GET_LEAF(level)						\
-	case level: {							\
-		assert(level == (RTREE_HEIGHT_MAX-1));			\
+#define RTREE_GET_LEAF(level) {						\
+		assert(level == RTREE_HEIGHT-1);			\
 		if (!dependent && unlikely(!rtree_node_valid(node))) {	\
 			return NULL;					\
 		}							\
@@ -218,68 +177,27 @@ rtree_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 				    sizeof(rtree_ctx_cache_elm_t) *	\
 				    (RTREE_CTX_NCACHE-1));		\
 			}						\
-			uintptr_t leafkey = rtree_leafkey(rtree, key);	\
+			uintptr_t leafkey = rtree_leafkey(key);		\
 			rtree_ctx->cache[0].leafkey = leafkey;		\
 			rtree_ctx->cache[0].leaf = node;		\
 		}							\
-		uintptr_t subkey = rtree_subkey(rtree, key, level -	\
-		    RTREE_GET_BIAS);					\
+		uintptr_t subkey = rtree_subkey(key, level);		\
 		return &node[subkey];					\
 	}
-#if RTREE_HEIGHT_MAX > 1
-	RTREE_GET_SUBTREE(0)
-#endif
-#if RTREE_HEIGHT_MAX > 2
-	RTREE_GET_SUBTREE(1)
-#endif
-#if RTREE_HEIGHT_MAX > 3
-	RTREE_GET_SUBTREE(2)
-#endif
-#if RTREE_HEIGHT_MAX > 4
-	RTREE_GET_SUBTREE(3)
-#endif
-#if RTREE_HEIGHT_MAX > 5
-	RTREE_GET_SUBTREE(4)
-#endif
-#if RTREE_HEIGHT_MAX > 6
-	RTREE_GET_SUBTREE(5)
-#endif
-#if RTREE_HEIGHT_MAX > 7
-	RTREE_GET_SUBTREE(6)
-#endif
-#if RTREE_HEIGHT_MAX > 8
-	RTREE_GET_SUBTREE(7)
-#endif
-#if RTREE_HEIGHT_MAX > 9
-	RTREE_GET_SUBTREE(8)
-#endif
-#if RTREE_HEIGHT_MAX > 10
-	RTREE_GET_SUBTREE(9)
-#endif
-#if RTREE_HEIGHT_MAX > 11
-	RTREE_GET_SUBTREE(10)
-#endif
-#if RTREE_HEIGHT_MAX > 12
-	RTREE_GET_SUBTREE(11)
-#endif
-#if RTREE_HEIGHT_MAX > 13
-	RTREE_GET_SUBTREE(12)
-#endif
-#if RTREE_HEIGHT_MAX > 14
-	RTREE_GET_SUBTREE(13)
-#endif
-#if RTREE_HEIGHT_MAX > 15
-	RTREE_GET_SUBTREE(14)
-#endif
-#if RTREE_HEIGHT_MAX > 16
-#  error Unsupported RTREE_HEIGHT_MAX
-#endif
-	RTREE_GET_LEAF(RTREE_HEIGHT_MAX-1)
+	if (RTREE_HEIGHT > 1) {
+		RTREE_GET_SUBTREE(0)
+	}
+	if (RTREE_HEIGHT > 2) {
+		RTREE_GET_SUBTREE(1)
+	}
+	if (RTREE_HEIGHT > 3) {
+		for (unsigned i = 2; i < RTREE_HEIGHT-1; i++) {
+			RTREE_GET_SUBTREE(i)
+		}
+	}
+	RTREE_GET_LEAF(RTREE_HEIGHT-1)
 #undef RTREE_GET_SUBTREE
 #undef RTREE_GET_LEAF
-	default: not_reached();
-	}
-#undef RTREE_GET_BIAS
 	not_reached();
 }
 
@@ -351,7 +269,7 @@ rtree_elm_witness_dalloc(tsd_t *tsd, witness_t *witness,
 			witness_init(&rew->witness, "rtree_elm",
 			    WITNESS_RANK_RTREE_ELM, rtree_elm_witness_comp,
 			    NULL);
-			    return;
+			return;
 		}
 	}
 	not_reached();
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index d40e6490..2088595b 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -33,31 +33,26 @@ rtree_node_dalloc_intercept(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *node) {
 
 TEST_BEGIN(test_rtree_read_empty) {
 	tsdn_t *tsdn;
-	unsigned i;
 
 	tsdn = tsdn_fetch();
 
-	for (i = 1; i <= (sizeof(uintptr_t) << 3); i++) {
-		rtree_t rtree;
-		rtree_ctx_t rtree_ctx = RTREE_CTX_INITIALIZER;
-		test_rtree = &rtree;
-		assert_false(rtree_new(&rtree, i),
-		    "Unexpected rtree_new() failure");
-		assert_ptr_null(rtree_read(tsdn, &rtree, &rtree_ctx, 0, false),
-		    "rtree_read() should return NULL for empty tree");
-		rtree_delete(tsdn, &rtree);
-		test_rtree = NULL;
-	}
+	rtree_t rtree;
+	rtree_ctx_t rtree_ctx = RTREE_CTX_INITIALIZER;
+	test_rtree = &rtree;
+	assert_false(rtree_new(&rtree), "Unexpected rtree_new() failure");
+	assert_ptr_null(rtree_read(tsdn, &rtree, &rtree_ctx, 0, false),
+	    "rtree_read() should return NULL for empty tree");
+	rtree_delete(tsdn, &rtree);
+	test_rtree = NULL;
 }
 TEST_END
 
 #define NTHREADS	8
-#define MAX_NBITS	18
+#define MAX_NBITS	30
 #define NITERS		1000
 #define SEED		42
 
 typedef struct {
-	unsigned	nbits;
 	rtree_t		rtree;
 	uint32_t	seed;
 } thd_start_arg_t;
@@ -77,7 +72,8 @@ thd_start(void *varg) {
 	tsdn = tsdn_fetch();
 
 	for (i = 0; i < NITERS; i++) {
-		uintptr_t key = (uintptr_t)gen_rand64(sfmt);
+		uintptr_t key = (uintptr_t)(gen_rand64(sfmt) & ((ZU(1) <<
+		    MAX_NBITS) - ZU(1)));
 		if (i % 2 == 0) {
 			rtree_elm_t *elm;
 
@@ -110,165 +106,134 @@ TEST_BEGIN(test_rtree_concurrent) {
 	thd_t thds[NTHREADS];
 	sfmt_t *sfmt;
 	tsdn_t *tsdn;
-	unsigned i, j;
 
 	sfmt = init_gen_rand(SEED);
 	tsdn = tsdn_fetch();
-	for (i = 1; i < MAX_NBITS; i++) {
-		arg.nbits = i;
-		test_rtree = &arg.rtree;
-		assert_false(rtree_new(&arg.rtree, arg.nbits),
-		    "Unexpected rtree_new() failure");
-		arg.seed = gen_rand32(sfmt);
-		for (j = 0; j < NTHREADS; j++) {
-			thd_create(&thds[j], thd_start, (void *)&arg);
-		}
-		for (j = 0; j < NTHREADS; j++) {
-			thd_join(thds[j], NULL);
-		}
-		rtree_delete(tsdn, &arg.rtree);
-		test_rtree = NULL;
+	test_rtree = &arg.rtree;
+	assert_false(rtree_new(&arg.rtree), "Unexpected rtree_new() failure");
+	arg.seed = gen_rand32(sfmt);
+	for (unsigned i = 0; i < NTHREADS; i++) {
+		thd_create(&thds[i], thd_start, (void *)&arg);
 	}
+	for (unsigned i = 0; i < NTHREADS; i++) {
+		thd_join(thds[i], NULL);
+	}
+	rtree_delete(tsdn, &arg.rtree);
+	test_rtree = NULL;
 	fini_gen_rand(sfmt);
 }
 TEST_END
 
 #undef NTHREADS
-#undef MAX_NBITS
 #undef NITERS
 #undef SEED
 
 TEST_BEGIN(test_rtree_extrema) {
-	unsigned i;
 	extent_t extent_a, extent_b;
 	tsdn_t *tsdn;
 
 	tsdn = tsdn_fetch();
 
-	for (i = 1; i <= (sizeof(uintptr_t) << 3); i++) {
-		rtree_t rtree;
-		rtree_ctx_t rtree_ctx = RTREE_CTX_INITIALIZER;
-		test_rtree = &rtree;
-		assert_false(rtree_new(&rtree, i),
-		    "Unexpected rtree_new() failure");
+	rtree_t rtree;
+	rtree_ctx_t rtree_ctx = RTREE_CTX_INITIALIZER;
+	test_rtree = &rtree;
+	assert_false(rtree_new(&rtree), "Unexpected rtree_new() failure");
 
-		assert_false(rtree_write(tsdn, &rtree, &rtree_ctx, 0,
-		    &extent_a), "Unexpected rtree_write() failure, i=%u", i);
-		assert_ptr_eq(rtree_read(tsdn, &rtree, &rtree_ctx, 0, true),
-		    &extent_a,
-		    "rtree_read() should return previously set value, i=%u", i);
+	assert_false(rtree_write(tsdn, &rtree, &rtree_ctx, 0, &extent_a),
+	    "Unexpected rtree_write() failure");
+	assert_ptr_eq(rtree_read(tsdn, &rtree, &rtree_ctx, 0, true), &extent_a,
+	    "rtree_read() should return previously set value");
 
-		assert_false(rtree_write(tsdn, &rtree, &rtree_ctx,
-		    ~((uintptr_t)0), &extent_b),
-		    "Unexpected rtree_write() failure, i=%u", i);
-		assert_ptr_eq(rtree_read(tsdn, &rtree, &rtree_ctx,
-		    ~((uintptr_t)0), true), &extent_b,
-		    "rtree_read() should return previously set value, i=%u", i);
+	assert_false(rtree_write(tsdn, &rtree, &rtree_ctx, ~((uintptr_t)0),
+	    &extent_b), "Unexpected rtree_write() failure");
+	assert_ptr_eq(rtree_read(tsdn, &rtree, &rtree_ctx, ~((uintptr_t)0),
+	    true), &extent_b,
+	    "rtree_read() should return previously set value");
 
-		rtree_delete(tsdn, &rtree);
-		test_rtree = NULL;
-	}
+	rtree_delete(tsdn, &rtree);
+	test_rtree = NULL;
 }
 TEST_END
 
 TEST_BEGIN(test_rtree_bits) {
-	tsdn_t *tsdn;
-	unsigned i, j, k;
+	tsdn_t *tsdn = tsdn_fetch();
 
-	tsdn = tsdn_fetch();
+	uintptr_t keys[] = {0, 1, (((uintptr_t)1) << LG_PAGE) - 1};
 
-	for (i = 1; i < (sizeof(uintptr_t) << 3); i++) {
-		uintptr_t keys[] = {0, 1,
-		    (((uintptr_t)1) << (sizeof(uintptr_t)*8-i)) - 1};
-		extent_t extent;
-		rtree_t rtree;
-		rtree_ctx_t rtree_ctx = RTREE_CTX_INITIALIZER;
+	extent_t extent;
+	rtree_t rtree;
+	rtree_ctx_t rtree_ctx = RTREE_CTX_INITIALIZER;
 
-		test_rtree = &rtree;
-		assert_false(rtree_new(&rtree, i),
-		    "Unexpected rtree_new() failure");
+	test_rtree = &rtree;
+	assert_false(rtree_new(&rtree),
+	    "Unexpected rtree_new() failure");
 
-		for (j = 0; j < sizeof(keys)/sizeof(uintptr_t); j++) {
-			assert_false(rtree_write(tsdn, &rtree, &rtree_ctx,
-			    keys[j], &extent),
-			    "Unexpected rtree_write() failure");
-			for (k = 0; k < sizeof(keys)/sizeof(uintptr_t); k++) {
-				assert_ptr_eq(rtree_read(tsdn, &rtree,
-				    &rtree_ctx, keys[k], true), &extent,
-				    "rtree_read() should return previously set "
-				    "value and ignore insignificant key bits; "
-				    "i=%u, j=%u, k=%u, set key=%#"FMTxPTR", "
-				    "get key=%#"FMTxPTR, i, j, k, keys[j],
-				    keys[k]);
-			}
-			assert_ptr_null(rtree_read(tsdn, &rtree, &rtree_ctx,
-			    (((uintptr_t)1) << (sizeof(uintptr_t)*8-i)), false),
-			    "Only leftmost rtree leaf should be set; "
-			    "i=%u, j=%u", i, j);
-			rtree_clear(tsdn, &rtree, &rtree_ctx, keys[j]);
+	for (unsigned i = 0; i < sizeof(keys)/sizeof(uintptr_t); i++) {
+		assert_false(rtree_write(tsdn, &rtree, &rtree_ctx, keys[i],
+		    &extent), "Unexpected rtree_write() failure");
+		for (unsigned j = 0; j < sizeof(keys)/sizeof(uintptr_t); j++) {
+			assert_ptr_eq(rtree_read(tsdn, &rtree, &rtree_ctx,
+			    keys[j], true), &extent,
+			    "rtree_read() should return previously set "
+			    "value and ignore insignificant key bits; "
+			    "i=%u, j=%u, set key=%#"FMTxPTR", get "
+			    "key=%#"FMTxPTR, i, j, keys[i], keys[j]);
 		}
-
-		rtree_delete(tsdn, &rtree);
-		test_rtree = NULL;
+		assert_ptr_null(rtree_read(tsdn, &rtree, &rtree_ctx,
+		    (((uintptr_t)1) << LG_PAGE), false),
+		    "Only leftmost rtree leaf should be set; i=%u", i);
+		rtree_clear(tsdn, &rtree, &rtree_ctx, keys[i]);
 	}
+
+	rtree_delete(tsdn, &rtree);
+	test_rtree = NULL;
 }
 TEST_END
 
 TEST_BEGIN(test_rtree_random) {
-	unsigned i;
-	sfmt_t *sfmt;
-	tsdn_t *tsdn;
 #define NSET 16
 #define SEED 42
+	sfmt_t *sfmt = init_gen_rand(SEED);
+	tsdn_t *tsdn = tsdn_fetch();
+	uintptr_t keys[NSET];
+	extent_t extent;
+	rtree_t rtree;
+	rtree_ctx_t rtree_ctx = RTREE_CTX_INITIALIZER;
+	rtree_elm_t *elm;
 
-	sfmt = init_gen_rand(SEED);
-	tsdn = tsdn_fetch();
-	for (i = 1; i <= (sizeof(uintptr_t) << 3); i++) {
-		uintptr_t keys[NSET];
-		extent_t extent;
-		unsigned j;
-		rtree_t rtree;
-		rtree_ctx_t rtree_ctx = RTREE_CTX_INITIALIZER;
-		rtree_elm_t *elm;
+	test_rtree = &rtree;
+	assert_false(rtree_new(&rtree), "Unexpected rtree_new() failure");
 
-		test_rtree = &rtree;
-		assert_false(rtree_new(&rtree, i),
-		    "Unexpected rtree_new() failure");
-
-		for (j = 0; j < NSET; j++) {
-			keys[j] = (uintptr_t)gen_rand64(sfmt);
-			elm = rtree_elm_acquire(tsdn, &rtree, &rtree_ctx,
-			    keys[j], false, true);
-			assert_ptr_not_null(elm,
-			    "Unexpected rtree_elm_acquire() failure");
-			rtree_elm_write_acquired(tsdn, &rtree, elm, &extent);
-			rtree_elm_release(tsdn, &rtree, elm);
-			assert_ptr_eq(rtree_read(tsdn, &rtree, &rtree_ctx,
-			    keys[j], true), &extent,
-			    "rtree_read() should return previously set value");
-		}
-		for (j = 0; j < NSET; j++) {
-			assert_ptr_eq(rtree_read(tsdn, &rtree, &rtree_ctx,
-			    keys[j], true), &extent,
-			    "rtree_read() should return previously set value, "
-			    "j=%u", j);
-		}
-
-		for (j = 0; j < NSET; j++) {
-			rtree_clear(tsdn, &rtree, &rtree_ctx, keys[j]);
-			assert_ptr_null(rtree_read(tsdn, &rtree, &rtree_ctx,
-			    keys[j], true),
-			    "rtree_read() should return previously set value");
-		}
-		for (j = 0; j < NSET; j++) {
-			assert_ptr_null(rtree_read(tsdn, &rtree, &rtree_ctx,
-			    keys[j], true),
-			    "rtree_read() should return previously set value");
-		}
-
-		rtree_delete(tsdn, &rtree);
-		test_rtree = NULL;
+	for (unsigned i = 0; i < NSET; i++) {
+		keys[i] = (uintptr_t)gen_rand64(sfmt);
+		elm = rtree_elm_acquire(tsdn, &rtree, &rtree_ctx, keys[i],
+		    false, true);
+		assert_ptr_not_null(elm,
+		    "Unexpected rtree_elm_acquire() failure");
+		rtree_elm_write_acquired(tsdn, &rtree, elm, &extent);
+		rtree_elm_release(tsdn, &rtree, elm);
+		assert_ptr_eq(rtree_read(tsdn, &rtree, &rtree_ctx, keys[i],
+		    true), &extent,
+		    "rtree_read() should return previously set value");
 	}
+	for (unsigned i = 0; i < NSET; i++) {
+		assert_ptr_eq(rtree_read(tsdn, &rtree, &rtree_ctx, keys[i],
+		    true), &extent,
+		    "rtree_read() should return previously set value, i=%u", i);
+	}
+
+	for (unsigned i = 0; i < NSET; i++) {
+		rtree_clear(tsdn, &rtree, &rtree_ctx, keys[i]);
+		assert_ptr_null(rtree_read(tsdn, &rtree, &rtree_ctx, keys[i],
+		    true), "rtree_read() should return previously set value");
+	}
+	for (unsigned i = 0; i < NSET; i++) {
+		assert_ptr_null(rtree_read(tsdn, &rtree, &rtree_ctx, keys[i],
+		    true), "rtree_read() should return previously set value");
+	}
+
+	rtree_delete(tsdn, &rtree);
+	test_rtree = NULL;
 	fini_gen_rand(sfmt);
 #undef NSET
 #undef SEED

From 650c070e102daeedd643dc79b463603a1ea18497 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 5 Feb 2017 02:50:59 -0800
Subject: [PATCH 0632/2608] Remove rtree support for 0 (NULL) keys.

NULL can never actually be inserted in practice, and removing support
allows a branch to be removed from the fast path.
---
 include/jemalloc/internal/rtree_inlines.h | 58 +++++++++++------------
 src/rtree.c                               | 18 ++++---
 test/unit/rtree.c                         | 12 +++--
 3 files changed, 43 insertions(+), 45 deletions(-)

diff --git a/include/jemalloc/internal/rtree_inlines.h b/include/jemalloc/internal/rtree_inlines.h
index 4b848541..0d96948b 100644
--- a/include/jemalloc/internal/rtree_inlines.h
+++ b/include/jemalloc/internal/rtree_inlines.h
@@ -79,43 +79,41 @@ rtree_elm_write(rtree_elm_t *elm, const extent_t *extent) {
 JEMALLOC_ALWAYS_INLINE rtree_elm_t *
 rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key, bool dependent, bool init_missing) {
+	assert(key != 0);
 	assert(!dependent || !init_missing);
 
-	if (likely(key != 0)) {
-		uintptr_t leafkey = rtree_leafkey(key);
+	uintptr_t leafkey = rtree_leafkey(key);
 #define RTREE_CACHE_CHECK(i) do {					\
-		if (likely(rtree_ctx->cache[i].leafkey == leafkey)) {	\
-			rtree_elm_t *leaf = rtree_ctx->cache[i].leaf;	\
-			if (likely(leaf != NULL)) {			\
-				/* Reorder. */				\
-				memmove(&rtree_ctx->cache[1],		\
-				    &rtree_ctx->cache[0],		\
-				    sizeof(rtree_ctx_cache_elm_t) * i);	\
-				rtree_ctx->cache[0].leafkey = leafkey;	\
-				rtree_ctx->cache[0].leaf = leaf;	\
+	if (likely(rtree_ctx->cache[i].leafkey == leafkey)) {		\
+		rtree_elm_t *leaf = rtree_ctx->cache[i].leaf;		\
+		if (likely(leaf != NULL)) {				\
+			/* Reorder. */					\
+			memmove(&rtree_ctx->cache[1],			\
+			    &rtree_ctx->cache[0],			\
+			    sizeof(rtree_ctx_cache_elm_t) * i);		\
+			rtree_ctx->cache[0].leafkey = leafkey;		\
+			rtree_ctx->cache[0].leaf = leaf;		\
 									\
-				uintptr_t subkey = rtree_subkey(key,	\
-				    RTREE_HEIGHT-1);			\
-				return &leaf[subkey];			\
-			}						\
+			uintptr_t subkey = rtree_subkey(key,		\
+			    RTREE_HEIGHT-1);				\
+			return &leaf[subkey];				\
 		}							\
+	}								\
 } while (0)
-		/* Check the MRU cache entry. */
-		RTREE_CACHE_CHECK(0);
-		/*
-		 * Search the remaining cache elements, and on success move the
-		 * matching element to the front.  Unroll the first iteration to
-		 * avoid calling memmove() (the compiler typically optimizes it
-		 * into raw moves).
-		 */
-		if (RTREE_CTX_NCACHE > 1) {
-			RTREE_CACHE_CHECK(1);
-		}
-		for (unsigned i = 2; i < RTREE_CTX_NCACHE; i++) {
-			RTREE_CACHE_CHECK(i);
-		}
-#undef RTREE_CACHE_CHECK
+	/* Check the MRU cache entry. */
+	RTREE_CACHE_CHECK(0);
+	/*
+	 * Search the remaining cache elements, and on success move the matching
+	 * element to the front.  Unroll the first iteration to avoid calling
+	 * memmove() (the compiler typically optimizes it into raw moves).
+	 */
+	if (RTREE_CTX_NCACHE > 1) {
+		RTREE_CACHE_CHECK(1);
 	}
+	for (unsigned i = 2; i < RTREE_CTX_NCACHE; i++) {
+		RTREE_CACHE_CHECK(i);
+	}
+#undef RTREE_CACHE_CHECK
 
 	return rtree_elm_lookup_hard(tsdn, rtree, rtree_ctx, key, dependent,
 	    init_missing);
diff --git a/src/rtree.c b/src/rtree.c
index 83929ba6..a86fa45d 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -170,17 +170,15 @@ rtree_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 		 * node is a leaf, so it contains values rather than	\
 		 * child pointers.					\
 		 */							\
-		if (likely(key != 0)) {					\
-			if (RTREE_CTX_NCACHE > 1) {			\
-				memmove(&rtree_ctx->cache[1],		\
-				    &rtree_ctx->cache[0],		\
-				    sizeof(rtree_ctx_cache_elm_t) *	\
-				    (RTREE_CTX_NCACHE-1));		\
-			}						\
-			uintptr_t leafkey = rtree_leafkey(key);		\
-			rtree_ctx->cache[0].leafkey = leafkey;		\
-			rtree_ctx->cache[0].leaf = node;		\
+		if (RTREE_CTX_NCACHE > 1) {				\
+			memmove(&rtree_ctx->cache[1],			\
+			    &rtree_ctx->cache[0],			\
+			    sizeof(rtree_ctx_cache_elm_t) *		\
+			    (RTREE_CTX_NCACHE-1));			\
 		}							\
+		uintptr_t leafkey = rtree_leafkey(key);			\
+		rtree_ctx->cache[0].leafkey = leafkey;			\
+		rtree_ctx->cache[0].leaf = node;			\
 		uintptr_t subkey = rtree_subkey(key, level);		\
 		return &node[subkey];					\
 	}
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 2088595b..488fd54b 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -40,7 +40,7 @@ TEST_BEGIN(test_rtree_read_empty) {
 	rtree_ctx_t rtree_ctx = RTREE_CTX_INITIALIZER;
 	test_rtree = &rtree;
 	assert_false(rtree_new(&rtree), "Unexpected rtree_new() failure");
-	assert_ptr_null(rtree_read(tsdn, &rtree, &rtree_ctx, 0, false),
+	assert_ptr_null(rtree_read(tsdn, &rtree, &rtree_ctx, PAGE, false),
 	    "rtree_read() should return NULL for empty tree");
 	rtree_delete(tsdn, &rtree);
 	test_rtree = NULL;
@@ -139,9 +139,10 @@ TEST_BEGIN(test_rtree_extrema) {
 	test_rtree = &rtree;
 	assert_false(rtree_new(&rtree), "Unexpected rtree_new() failure");
 
-	assert_false(rtree_write(tsdn, &rtree, &rtree_ctx, 0, &extent_a),
+	assert_false(rtree_write(tsdn, &rtree, &rtree_ctx, PAGE, &extent_a),
 	    "Unexpected rtree_write() failure");
-	assert_ptr_eq(rtree_read(tsdn, &rtree, &rtree_ctx, 0, true), &extent_a,
+	assert_ptr_eq(rtree_read(tsdn, &rtree, &rtree_ctx, PAGE, true),
+	    &extent_a,
 	    "rtree_read() should return previously set value");
 
 	assert_false(rtree_write(tsdn, &rtree, &rtree_ctx, ~((uintptr_t)0),
@@ -158,7 +159,8 @@ TEST_END
 TEST_BEGIN(test_rtree_bits) {
 	tsdn_t *tsdn = tsdn_fetch();
 
-	uintptr_t keys[] = {0, 1, (((uintptr_t)1) << LG_PAGE) - 1};
+	uintptr_t keys[] = {PAGE, PAGE + 1,
+	    PAGE + (((uintptr_t)1) << LG_PAGE) - 1};
 
 	extent_t extent;
 	rtree_t rtree;
@@ -180,7 +182,7 @@ TEST_BEGIN(test_rtree_bits) {
 			    "key=%#"FMTxPTR, i, j, keys[i], keys[j]);
 		}
 		assert_ptr_null(rtree_read(tsdn, &rtree, &rtree_ctx,
-		    (((uintptr_t)1) << LG_PAGE), false),
+		    (((uintptr_t)2) << LG_PAGE), false),
 		    "Only leftmost rtree leaf should be set; i=%u", i);
 		rtree_clear(tsdn, &rtree, &rtree_ctx, keys[i]);
 	}

From 5f118307543b128e1ad6298ec2ab1acd71140095 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 5 Feb 2017 23:57:16 -0800
Subject: [PATCH 0633/2608] Replace spin_init() with SPIN_INITIALIZER.

---
 include/jemalloc/internal/private_symbols.txt | 1 -
 include/jemalloc/internal/spin_inlines.h      | 6 ------
 include/jemalloc/internal/spin_types.h        | 2 ++
 src/extent_dss.c                              | 3 +--
 src/jemalloc.c                                | 4 +---
 5 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 3f29d3fe..2c824541 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -445,7 +445,6 @@ size2index_compute
 size2index_lookup
 size2index_tab
 spin_adaptive
-spin_init
 stats_print
 tcache_alloc_easy
 tcache_alloc_large
diff --git a/include/jemalloc/internal/spin_inlines.h b/include/jemalloc/internal/spin_inlines.h
index 1ffc4232..03beeada 100644
--- a/include/jemalloc/internal/spin_inlines.h
+++ b/include/jemalloc/internal/spin_inlines.h
@@ -2,16 +2,10 @@
 #define JEMALLOC_INTERNAL_SPIN_INLINES_H
 
 #ifndef JEMALLOC_ENABLE_INLINE
-void	spin_init(spin_t *spin);
 void	spin_adaptive(spin_t *spin);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_SPIN_C_))
-JEMALLOC_INLINE void
-spin_init(spin_t *spin) {
-	spin->iteration = 0;
-}
-
 JEMALLOC_INLINE void
 spin_adaptive(spin_t *spin) {
 	volatile uint64_t i;
diff --git a/include/jemalloc/internal/spin_types.h b/include/jemalloc/internal/spin_types.h
index 52ee4cc1..222e0698 100644
--- a/include/jemalloc/internal/spin_types.h
+++ b/include/jemalloc/internal/spin_types.h
@@ -3,4 +3,6 @@
 
 typedef struct spin_s spin_t;
 
+#define SPIN_INITIALIZER {0U}
+
 #endif /* JEMALLOC_INTERNAL_SPIN_TYPES_H */
diff --git a/src/extent_dss.c b/src/extent_dss.c
index a3cfab26..0b4e1fe3 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -62,13 +62,12 @@ extent_dss_prec_set(dss_prec_t dss_prec) {
 static void *
 extent_dss_max_update(void *new_addr) {
 	void *max_cur;
-	spin_t spinner;
 
 	/*
 	 * Get the current end of the DSS as max_cur and assure that dss_max is
 	 * up to date.
 	 */
-	spin_init(&spinner);
+	spin_t spinner = SPIN_INITIALIZER;
 	while (true) {
 		void *max_prev = atomic_read_p(&dss_max);
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 48be4a3f..d2c33bbc 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1153,10 +1153,8 @@ malloc_init_hard_needed(void) {
 	}
 #ifdef JEMALLOC_THREADED_INIT
 	if (malloc_initializer != NO_INITIALIZER && !IS_INITIALIZER) {
-		spin_t spinner;
-
 		/* Busy-wait until the initializing thread completes. */
-		spin_init(&spinner);
+		spin_t spinner = SPIN_INITIALIZER;
 		do {
 			malloc_mutex_unlock(TSDN_NULL, &init_lock);
 			spin_adaptive(&spinner);

From de8a68e85304848189643fb48100c18aa9d60e32 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 8 Feb 2017 10:30:44 -0800
Subject: [PATCH 0634/2608] Enhance spin_adaptive() to yield after several
 iterations.

This avoids worst case behavior if e.g. another thread is preempted
while owning the resource the spinning thread is waiting for.
---
 Makefile.in                              |  1 +
 include/jemalloc/internal/spin_inlines.h | 17 +++++++++++------
 test/unit/spin.c                         | 16 ++++++++++++++++
 3 files changed, 28 insertions(+), 6 deletions(-)
 create mode 100644 test/unit/spin.c

diff --git a/Makefile.in b/Makefile.in
index acd31f73..23056f78 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -189,6 +189,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/size_classes.c \
 	$(srcroot)test/unit/slab.c \
 	$(srcroot)test/unit/smoothstep.c \
+	$(srcroot)test/unit/spin.c \
 	$(srcroot)test/unit/stats.c \
 	$(srcroot)test/unit/stats_print.c \
 	$(srcroot)test/unit/ticker.c \
diff --git a/include/jemalloc/internal/spin_inlines.h b/include/jemalloc/internal/spin_inlines.h
index 03beeada..16573261 100644
--- a/include/jemalloc/internal/spin_inlines.h
+++ b/include/jemalloc/internal/spin_inlines.h
@@ -8,14 +8,19 @@ void	spin_adaptive(spin_t *spin);
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_SPIN_C_))
 JEMALLOC_INLINE void
 spin_adaptive(spin_t *spin) {
-	volatile uint64_t i;
+	volatile uint32_t i;
 
-	for (i = 0; i < (KQU(1) << spin->iteration); i++) {
-		CPU_SPINWAIT;
-	}
-
-	if (spin->iteration < 63) {
+	if (spin->iteration < 5) {
+		for (i = 0; i < (1U << spin->iteration); i++) {
+			CPU_SPINWAIT;
+		}
 		spin->iteration++;
+	} else {
+#ifdef _WIN32
+		SwitchToThread();
+#else
+		sched_yield();
+#endif
 	}
 }
 
diff --git a/test/unit/spin.c b/test/unit/spin.c
new file mode 100644
index 00000000..bd368b3d
--- /dev/null
+++ b/test/unit/spin.c
@@ -0,0 +1,16 @@
+#include "test/jemalloc_test.h"
+
+TEST_BEGIN(test_spin) {
+	spin_t spinner = SPIN_INITIALIZER;
+
+	for (unsigned i = 0; i < 100; i++) {
+		spin_adaptive(&spinner);
+	}
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_spin);
+}

From db7da563595e49fa56cfd2b94cc77fed3d8ac755 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 5 Feb 2017 23:58:02 -0800
Subject: [PATCH 0635/2608] Spin adaptively in rtree_elm_acquire().

---
 include/jemalloc/internal/rtree_inlines.h | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/include/jemalloc/internal/rtree_inlines.h b/include/jemalloc/internal/rtree_inlines.h
index 0d96948b..4de04795 100644
--- a/include/jemalloc/internal/rtree_inlines.h
+++ b/include/jemalloc/internal/rtree_inlines.h
@@ -153,21 +153,22 @@ rtree_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
 JEMALLOC_INLINE rtree_elm_t *
 rtree_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key, bool dependent, bool init_missing) {
-	rtree_elm_t *elm;
-
-	elm = rtree_elm_lookup(tsdn, rtree, rtree_ctx, key, dependent,
-	    init_missing);
+	rtree_elm_t *elm = rtree_elm_lookup(tsdn, rtree, rtree_ctx, key,
+	    dependent, init_missing);
 	if (!dependent && elm == NULL) {
 		return NULL;
 	}
 
-	extent_t *extent;
-	void *s;
-	do {
-		extent = rtree_elm_read(elm, false);
+	spin_t spinner = SPIN_INITIALIZER;
+	while (true) {
+		extent_t *extent = rtree_elm_read(elm, false);
 		/* The least significant bit serves as a lock. */
-		s = (void *)((uintptr_t)extent | (uintptr_t)0x1);
-	} while (atomic_cas_p(&elm->pun, (void *)extent, s));
+		void *s = (void *)((uintptr_t)extent | (uintptr_t)0x1);
+		if (!atomic_cas_p(&elm->pun, (void *)extent, s)) {
+			break;
+		}
+		spin_adaptive(&spinner);
+	}
 
 	if (config_debug) {
 		rtree_elm_witness_acquire(tsdn, rtree, key, elm);

From 7f55dbef9b2a2b93e021d47fa4e6d69c1a633155 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 9 Feb 2017 09:06:22 -0800
Subject: [PATCH 0636/2608] Enable mutex witnesses even when !isthreaded.

This fixes interactions with witness_assert_depth[_to_rank](), which was
added in d0e93ada51e20f4ae394ff4dbdcf96182767c89c (Add
witness_assert_depth[_to_rank]().).
---
 include/jemalloc/internal/mutex_inlines.h | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/mutex_inlines.h b/include/jemalloc/internal/mutex_inlines.h
index 0c6c5dd5..c0c3cfe9 100644
--- a/include/jemalloc/internal/mutex_inlines.h
+++ b/include/jemalloc/internal/mutex_inlines.h
@@ -11,8 +11,8 @@ void	malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex);
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_))
 JEMALLOC_INLINE void
 malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
+	witness_assert_not_owner(tsdn, &mutex->witness);
 	if (isthreaded) {
-		witness_assert_not_owner(tsdn, &mutex->witness);
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 		AcquireSRWLockExclusive(&mutex->lock);
@@ -26,14 +26,14 @@ malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 #else
 		pthread_mutex_lock(&mutex->lock);
 #endif
-		witness_lock(tsdn, &mutex->witness);
 	}
+	witness_lock(tsdn, &mutex->witness);
 }
 
 JEMALLOC_INLINE void
 malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
+	witness_unlock(tsdn, &mutex->witness);
 	if (isthreaded) {
-		witness_unlock(tsdn, &mutex->witness);
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 		ReleaseSRWLockExclusive(&mutex->lock);
@@ -52,16 +52,12 @@ malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 
 JEMALLOC_INLINE void
 malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) {
-	if (isthreaded) {
-		witness_assert_owner(tsdn, &mutex->witness);
-	}
+	witness_assert_owner(tsdn, &mutex->witness);
 }
 
 JEMALLOC_INLINE void
 malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) {
-	if (isthreaded) {
-		witness_assert_not_owner(tsdn, &mutex->witness);
-	}
+	witness_assert_not_owner(tsdn, &mutex->witness);
 }
 #endif
 

From 6b8ef771a9de9318964f8b5b7cff5ea3958f0294 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 9 Feb 2017 12:31:11 -0800
Subject: [PATCH 0637/2608] Fix rtree_subkey() regression.

Fix rtree_subkey() to use uintptr_t rather than unsigned for key
bitmasking.  This regression was introduced by
4a346f55939af4f200121cc4454089592d952f18 (Replace rtree path cache with
LRU cache.).
---
 include/jemalloc/internal/rtree_inlines.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/rtree_inlines.h b/include/jemalloc/internal/rtree_inlines.h
index 4de04795..f2efd710 100644
--- a/include/jemalloc/internal/rtree_inlines.h
+++ b/include/jemalloc/internal/rtree_inlines.h
@@ -40,7 +40,7 @@ rtree_subkey(uintptr_t key, unsigned level) {
 	unsigned cumbits = rtree_levels[level].cumbits;
 	unsigned shiftbits = ptrbits - cumbits;
 	unsigned maskbits = rtree_levels[level].bits;
-	unsigned mask = (ZU(1) << maskbits) - 1;
+	uintptr_t mask = (ZU(1) << maskbits) - 1;
 	return ((key >> shiftbits) & mask);
 }
 

From cd2501efd621b76f799d9f264385b348c6e6678d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 9 Feb 2017 13:00:59 -0800
Subject: [PATCH 0638/2608] Fix extent_alloc_dss() regression.

Fix extent_alloc_dss() to account for bytes that are not a multiple of
the page size.  This regression was introduced by
577d4572b0821a15e5370f9bf566d884b7cf707c (Make dss operations
lockless.), which was first released in 4.3.0.
---
 src/extent_dss.c | 48 +++++++++++++++++++++++++++++-------------------
 1 file changed, 29 insertions(+), 19 deletions(-)

diff --git a/src/extent_dss.c b/src/extent_dss.c
index 0b4e1fe3..50825713 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -121,35 +121,45 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 		 * malloc.
 		 */
 		while (true) {
-			void *ret, *max_cur, *gap_addr, *dss_next, *dss_prev;
-			size_t gap_size;
-			intptr_t incr;
-
-			max_cur = extent_dss_max_update(new_addr);
+			void *max_cur = extent_dss_max_update(new_addr);
 			if (max_cur == NULL) {
 				goto label_oom;
 			}
 
 			/*
-			 * Compute how much gap space (if any) is necessary to
-			 * satisfy alignment.  This space can be recycled for
-			 * later use.
+			 * Compute how much page-aligned gap space (if any) is
+			 * necessary to satisfy alignment.  This space can be
+			 * recycled for later use.
 			 */
-			gap_addr = (void *)(PAGE_CEILING((uintptr_t)max_cur));
-			ret = (void *)ALIGNMENT_CEILING((uintptr_t)gap_addr,
-			    PAGE_CEILING(alignment));
-			gap_size = (uintptr_t)ret - (uintptr_t)gap_addr;
-			if (gap_size != 0) {
-				extent_init(gap, arena, gap_addr, gap_size,
-				    gap_size, arena_extent_sn_next(arena),
+			void *gap_addr_page = (void *)(PAGE_CEILING(
+			    (uintptr_t)max_cur));
+			void *ret = (void *)ALIGNMENT_CEILING(
+			    (uintptr_t)gap_addr_page, alignment);
+			size_t gap_size_page = (uintptr_t)ret -
+			    (uintptr_t)gap_addr_page;
+			if (gap_size_page != 0) {
+				extent_init(gap, arena, gap_addr_page,
+				    gap_size_page, gap_size_page,
+				    arena_extent_sn_next(arena),
 				    extent_state_active, false, true, false);
 			}
-			dss_next = (void *)((uintptr_t)ret + size);
+			/*
+			 * Compute the address just past the end of the desired
+			 * allocation space.
+			 */
+			void *dss_next = (void *)((uintptr_t)ret + size);
 			if ((uintptr_t)ret < (uintptr_t)max_cur ||
 			    (uintptr_t)dss_next < (uintptr_t)max_cur) {
 				goto label_oom; /* Wrap-around. */
 			}
-			incr = gap_size + size;
+			/* Compute the increment, including subpage bytes. */
+			void *gap_addr_subpage = max_cur;
+			size_t gap_size_subpage = (uintptr_t)ret -
+			    (uintptr_t)gap_addr_subpage;
+			intptr_t incr = gap_size_subpage + size;
+
+			assert((uintptr_t)max_cur + incr == (uintptr_t)ret +
+			    size);
 
 			/*
 			 * Optimistically update dss_max, and roll back below if
@@ -162,10 +172,10 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			}
 
 			/* Try to allocate. */
-			dss_prev = extent_dss_sbrk(incr);
+			void *dss_prev = extent_dss_sbrk(incr);
 			if (dss_prev == max_cur) {
 				/* Success. */
-				if (gap_size != 0) {
+				if (gap_size_page != 0) {
 					extent_dalloc_gap(tsdn, arena, gap);
 				} else {
 					extent_dalloc(tsdn, arena, gap);

From 0721b895ffac734155956b8d3288c57234093c3a Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 12 Feb 2017 18:28:30 -0800
Subject: [PATCH 0639/2608] Do not generate unused tsd_*_[gs]et() functions.

This avoids a gcc diagnostic note:
    note: The ABI for passing parameters with 64-byte alignment has
    changed in GCC 4.6
This note related to the cacheline alignment of rtree_ctx_t, which was
introduced by 4a346f55939af4f200121cc4454089592d952f18 (Replace rtree
path cache with LRU cache.).
---
 include/jemalloc/internal/private_symbols.txt |  8 -----
 include/jemalloc/internal/tsd_inlines.h       | 20 ++++++-----
 include/jemalloc/internal/tsd_structs.h       | 34 ++++++++++---------
 src/tsd.c                                     |  2 +-
 4 files changed, 31 insertions(+), 33 deletions(-)

diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 2c824541..ab5a672c 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -485,8 +485,6 @@ ticker_ticks
 tsd_arena_get
 tsd_arena_set
 tsd_arenap_get
-tsd_arenas_tdata_bypass_get
-tsd_arenas_tdata_bypass_set
 tsd_arenas_tdata_bypassp_get
 tsd_arenas_tdata_get
 tsd_arenas_tdata_set
@@ -518,11 +516,7 @@ tsd_nominal
 tsd_prof_tdata_get
 tsd_prof_tdata_set
 tsd_prof_tdatap_get
-tsd_rtree_ctx_get
-tsd_rtree_ctx_set
 tsd_rtree_ctxp_get
-tsd_rtree_elm_witnesses_get
-tsd_rtree_elm_witnesses_set
 tsd_rtree_elm_witnessesp_get
 tsd_set
 tsd_tcache_enabled_get
@@ -543,8 +537,6 @@ tsd_tsdn
 tsd_witness_fork_get
 tsd_witness_fork_set
 tsd_witness_forkp_get
-tsd_witnesses_get
-tsd_witnesses_set
 tsd_witnessesp_get
 tsdn_fetch
 tsdn_null
diff --git a/include/jemalloc/internal/tsd_inlines.h b/include/jemalloc/internal/tsd_inlines.h
index 3e5860ae..1457c03e 100644
--- a/include/jemalloc/internal/tsd_inlines.h
+++ b/include/jemalloc/internal/tsd_inlines.h
@@ -8,7 +8,7 @@ tsd_t	*tsd_fetch_impl(bool init);
 tsd_t	*tsd_fetch(void);
 tsdn_t	*tsd_tsdn(tsd_t *tsd);
 bool	tsd_nominal(tsd_t *tsd);
-#define O(n, t, c)							\
+#define O(n, t, gs, c)							\
 t	*tsd_##n##p_get(tsd_t *tsd);					\
 t	tsd_##n##_get(tsd_t *tsd);					\
 void	tsd_##n##_set(tsd_t *tsd, t n);
@@ -64,23 +64,27 @@ tsd_nominal(tsd_t *tsd) {
 	return (tsd->state == tsd_state_nominal);
 }
 
-#define O(n, t, c)							\
-JEMALLOC_ALWAYS_INLINE t *						\
-tsd_##n##p_get(tsd_t *tsd) {						\
-	return &tsd->n;							\
-}									\
-									\
+#define MALLOC_TSD_getset_yes(n, t)					\
 JEMALLOC_ALWAYS_INLINE t						\
 tsd_##n##_get(tsd_t *tsd) {						\
 	return *tsd_##n##p_get(tsd);					\
 }									\
-									\
 JEMALLOC_ALWAYS_INLINE void						\
 tsd_##n##_set(tsd_t *tsd, t n) {					\
 	assert(tsd->state == tsd_state_nominal);			\
 	tsd->n = n;							\
 }
+#define MALLOC_TSD_getset_no(n, t)
+#define O(n, t, gs, c)							\
+JEMALLOC_ALWAYS_INLINE t *						\
+tsd_##n##p_get(tsd_t *tsd) {						\
+	return &tsd->n;							\
+}									\
+									\
+MALLOC_TSD_getset_##gs(n, t)
 MALLOC_TSD
+#undef MALLOC_TSD_getset_yes
+#undef MALLOC_TSD_getset_no
 #undef O
 
 JEMALLOC_ALWAYS_INLINE tsdn_t *
diff --git a/include/jemalloc/internal/tsd_structs.h b/include/jemalloc/internal/tsd_structs.h
index ca013208..503021e7 100644
--- a/include/jemalloc/internal/tsd_structs.h
+++ b/include/jemalloc/internal/tsd_structs.h
@@ -15,21 +15,23 @@ struct tsd_init_head_s {
 #endif
 
 #define MALLOC_TSD							\
-/*  O(name,			type,			cleanup) */	\
-    O(tcache,			tcache_t *,		yes)		\
-    O(thread_allocated,		uint64_t,		no)		\
-    O(thread_deallocated,	uint64_t,		no)		\
-    O(prof_tdata,		prof_tdata_t *,		yes)		\
-    O(iarena,			arena_t *,		yes)		\
-    O(arena,			arena_t *,		yes)		\
-    O(arenas_tdata,		arena_tdata_t *,	yes)		\
-    O(narenas_tdata,		unsigned,		no)		\
-    O(arenas_tdata_bypass,	bool,			no)		\
-    O(tcache_enabled,		tcache_enabled_t,	no)		\
-    O(rtree_ctx,		rtree_ctx_t,		no)		\
-    O(witnesses,		witness_list_t,		yes)		\
-    O(rtree_elm_witnesses,	rtree_elm_witness_tsd_t,no)		\
-    O(witness_fork,		bool,			no)		\
+/*  O(name,			type,		[gs]et,	cleanup) */	\
+    O(tcache,			tcache_t *,	yes,	yes)		\
+    O(thread_allocated,		uint64_t,	yes,	no)		\
+    O(thread_deallocated,	uint64_t,	yes,	no)		\
+    O(prof_tdata,		prof_tdata_t *,	yes,	yes)		\
+    O(iarena,			arena_t *,	yes,	yes)		\
+    O(arena,			arena_t *,	yes,	yes)		\
+    O(arenas_tdata,		arena_tdata_t *,yes,	yes)		\
+    O(narenas_tdata,		unsigned,	yes,	no)		\
+    O(arenas_tdata_bypass,	bool,		no,	no)		\
+    O(tcache_enabled,		tcache_enabled_t,			\
+						yes,	no)		\
+    O(rtree_ctx,		rtree_ctx_t,	no,	no)		\
+    O(witnesses,		witness_list_t,	no,	yes)		\
+    O(rtree_elm_witnesses,	rtree_elm_witness_tsd_t,		\
+						no,	no)		\
+    O(witness_fork,		bool,		yes,	no)		\
 
 #define TSD_INITIALIZER {						\
     tsd_state_uninitialized,						\
@@ -51,7 +53,7 @@ struct tsd_init_head_s {
 
 struct tsd_s {
 	tsd_state_t	state;
-#define O(n, t, c)							\
+#define O(n, t, gs, c)							\
 	t		n;
 MALLOC_TSD
 #undef O
diff --git a/src/tsd.c b/src/tsd.c
index 7d56e689..9614dd9a 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -72,7 +72,7 @@ tsd_cleanup(void *arg) {
 #define MALLOC_TSD_cleanup_yes(n, t)					\
 		n##_cleanup(tsd);
 #define MALLOC_TSD_cleanup_no(n, t)
-#define O(n, t, c)							\
+#define O(n, t, gs, c)							\
 		MALLOC_TSD_cleanup_##c(n, t)
 MALLOC_TSD
 #undef MALLOC_TSD_cleanup_yes

From b779522b9b81f8a53a1f147968a890af8664b213 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 12 Feb 2017 16:34:36 -0800
Subject: [PATCH 0640/2608] Convert arena->dss_prec synchronization to atomics.

---
 include/jemalloc/internal/arena_externs.h   |  4 ++--
 include/jemalloc/internal/arena_structs_b.h |  2 +-
 src/arena.c                                 | 17 +++++------------
 src/ctl.c                                   |  4 ++--
 4 files changed, 10 insertions(+), 17 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index d0af91bf..d6556dae 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -60,8 +60,8 @@ bool	arena_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, void *ptr,
     size_t oldsize, size_t size, size_t extra, bool zero);
 void	*arena_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr,
     size_t oldsize, size_t size, size_t alignment, bool zero, tcache_t *tcache);
-dss_prec_t	arena_dss_prec_get(tsdn_t *tsdn, arena_t *arena);
-bool	arena_dss_prec_set(tsdn_t *tsdn, arena_t *arena, dss_prec_t dss_prec);
+dss_prec_t	arena_dss_prec_get(arena_t *arena);
+bool	arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
 ssize_t	arena_decay_time_default_get(void);
 bool	arena_decay_time_default_set(ssize_t decay_time);
 void	arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index 8629446d..dde26894 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -156,7 +156,7 @@ struct arena_s {
 	 */
 	size_t			extent_sn_next;
 
-	/* Synchronization: lock. */
+	/* Synchronization: atomic. */
 	dss_prec_t		dss_prec;
 
 	/*
diff --git a/src/arena.c b/src/arena.c
index 5905306c..345c57df 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1404,23 +1404,16 @@ arena_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr,
 }
 
 dss_prec_t
-arena_dss_prec_get(tsdn_t *tsdn, arena_t *arena) {
-	dss_prec_t ret;
-
-	malloc_mutex_lock(tsdn, &arena->lock);
-	ret = arena->dss_prec;
-	malloc_mutex_unlock(tsdn, &arena->lock);
-	return ret;
+arena_dss_prec_get(arena_t *arena) {
+	return (dss_prec_t)atomic_read_u((unsigned *)&arena->dss_prec);
 }
 
 bool
-arena_dss_prec_set(tsdn_t *tsdn, arena_t *arena, dss_prec_t dss_prec) {
+arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec) {
 	if (!have_dss) {
 		return (dss_prec != dss_prec_disabled);
 	}
-	malloc_mutex_lock(tsdn, &arena->lock);
-	arena->dss_prec = dss_prec;
-	malloc_mutex_unlock(tsdn, &arena->lock);
+	atomic_write_u((unsigned *)&arena->dss_prec, dss_prec);
 	return false;
 }
 
@@ -1442,7 +1435,7 @@ static void
 arena_basic_stats_merge_locked(arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *decay_time, size_t *nactive, size_t *ndirty) {
 	*nthreads += arena_nthreads_get(arena, false);
-	*dss = dss_prec_names[arena->dss_prec];
+	*dss = dss_prec_names[arena_dss_prec_get(arena)];
 	*decay_time = arena->decay.time;
 	*nactive += atomic_read_zu(&arena->nactive);
 	*ndirty += extents_npages_get(&arena->extents_cached);
diff --git a/src/ctl.c b/src/ctl.c
index 403bc30c..0bf4258e 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1767,11 +1767,11 @@ arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	} else {
 		arena_t *arena = arena_get(tsd_tsdn(tsd), arena_ind, false);
 		if (arena == NULL || (dss_prec != dss_prec_limit &&
-		    arena_dss_prec_set(tsd_tsdn(tsd), arena, dss_prec))) {
+		    arena_dss_prec_set(arena, dss_prec))) {
 			ret = EFAULT;
 			goto label_return;
 		}
-		dss_prec_old = arena_dss_prec_get(tsd_tsdn(tsd), arena);
+		dss_prec_old = arena_dss_prec_get(arena);
 	}
 
 	dss = dss_prec_names[dss_prec_old];

From fa2d64c94b07ee21a0f6f44b9fe6e3bbefa51c6c Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 12 Feb 2017 17:03:46 -0800
Subject: [PATCH 0641/2608] Convert arena->prof_accumbytes synchronization to
 atomics.

---
 include/jemalloc/internal/arena_inlines_a.h   | 34 +--------
 include/jemalloc/internal/arena_structs_b.h   |  3 +-
 include/jemalloc/internal/atomic_inlines.h    |  4 +-
 include/jemalloc/internal/atomic_types.h      |  8 ++
 .../jemalloc/internal/jemalloc_internal.h.in  |  7 +-
 include/jemalloc/internal/private_symbols.txt |  5 +-
 include/jemalloc/internal/prof_externs.h      |  1 +
 include/jemalloc/internal/prof_inlines_a.h    | 76 +++++++++++++++++++
 .../{prof_inlines.h => prof_inlines_b.h}      |  6 +-
 include/jemalloc/internal/prof_structs.h      |  7 ++
 include/jemalloc/internal/prof_types.h        |  1 +
 include/jemalloc/internal/witness_types.h     |  1 +
 src/arena.c                                   | 18 +----
 src/prof.c                                    | 14 ++++
 src/tcache.c                                  |  2 +-
 15 files changed, 128 insertions(+), 59 deletions(-)
 create mode 100644 include/jemalloc/internal/atomic_types.h
 create mode 100644 include/jemalloc/internal/prof_inlines_a.h
 rename include/jemalloc/internal/{prof_inlines.h => prof_inlines_b.h} (98%)

diff --git a/include/jemalloc/internal/arena_inlines_a.h b/include/jemalloc/internal/arena_inlines_a.h
index a81aaf56..ea7e0995 100644
--- a/include/jemalloc/internal/arena_inlines_a.h
+++ b/include/jemalloc/internal/arena_inlines_a.h
@@ -6,8 +6,6 @@ unsigned	arena_ind_get(const arena_t *arena);
 void	arena_internal_add(arena_t *arena, size_t size);
 void	arena_internal_sub(arena_t *arena, size_t size);
 size_t	arena_internal_get(arena_t *arena);
-bool	arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes);
-bool	arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes);
 bool	arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes);
 #endif /* JEMALLOC_ENABLE_INLINE */
 
@@ -33,29 +31,6 @@ arena_internal_get(arena_t *arena) {
 	return atomic_read_zu(&arena->stats.internal);
 }
 
-JEMALLOC_INLINE bool
-arena_prof_accum_impl(arena_t *arena, uint64_t accumbytes) {
-	cassert(config_prof);
-	assert(prof_interval != 0);
-
-	arena->prof_accumbytes += accumbytes;
-	if (arena->prof_accumbytes >= prof_interval) {
-		arena->prof_accumbytes %= prof_interval;
-		return true;
-	}
-	return false;
-}
-
-JEMALLOC_INLINE bool
-arena_prof_accum_locked(arena_t *arena, uint64_t accumbytes) {
-	cassert(config_prof);
-
-	if (likely(prof_interval == 0)) {
-		return false;
-	}
-	return arena_prof_accum_impl(arena, accumbytes);
-}
-
 JEMALLOC_INLINE bool
 arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) {
 	cassert(config_prof);
@@ -64,14 +39,7 @@ arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) {
 		return false;
 	}
 
-	{
-		bool ret;
-
-		malloc_mutex_lock(tsdn, &arena->lock);
-		ret = arena_prof_accum_impl(arena, accumbytes);
-		malloc_mutex_unlock(tsdn, &arena->lock);
-		return ret;
-	}
+	return prof_accum_add(tsdn, &arena->prof_accum, accumbytes);
 }
 
 #endif /* (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) */
diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index dde26894..2ee5690e 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -138,7 +138,8 @@ struct arena_s {
 	 */
 	ql_head(tcache_t)	tcache_ql;
 
-	/* Synchronization: lock. */
+	/* Synchronization: internal. */
+	prof_accum_t		prof_accum;
 	uint64_t		prof_accumbytes;
 
 	/*
diff --git a/include/jemalloc/internal/atomic_inlines.h b/include/jemalloc/internal/atomic_inlines.h
index 7c1902f8..de66d57d 100644
--- a/include/jemalloc/internal/atomic_inlines.h
+++ b/include/jemalloc/internal/atomic_inlines.h
@@ -23,7 +23,7 @@
  */
 
 #ifndef JEMALLOC_ENABLE_INLINE
-#  if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
+#  ifdef JEMALLOC_ATOMIC_U64
 uint64_t	atomic_add_u64(uint64_t *p, uint64_t x);
 uint64_t	atomic_sub_u64(uint64_t *p, uint64_t x);
 bool	atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s);
@@ -50,7 +50,7 @@ void	atomic_write_u(unsigned *p, unsigned x);
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_))
 /******************************************************************************/
 /* 64-bit operations. */
-#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
+#ifdef JEMALLOC_ATOMIC_U64
 #  if (defined(__amd64__) || defined(__x86_64__))
 JEMALLOC_INLINE uint64_t
 atomic_add_u64(uint64_t *p, uint64_t x) {
diff --git a/include/jemalloc/internal/atomic_types.h b/include/jemalloc/internal/atomic_types.h
new file mode 100644
index 00000000..0fd5e5b5
--- /dev/null
+++ b/include/jemalloc/internal/atomic_types.h
@@ -0,0 +1,8 @@
+#ifndef JEMALLOC_INTERNAL_ATOMIC_TYPES_H
+#define JEMALLOC_INTERNAL_ATOMIC_TYPES_H
+
+#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
+#  define JEMALLOC_ATOMIC_U64
+#endif
+
+#endif /* JEMALLOC_INTERNAL_ATOMIC_TYPES_H */
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index bace9c46..7e9c24b7 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -380,6 +380,7 @@ typedef unsigned szind_t;
 
 #include "jemalloc/internal/nstime_types.h"
 #include "jemalloc/internal/util_types.h"
+#include "jemalloc/internal/atomic_types.h"
 #include "jemalloc/internal/spin_types.h"
 #include "jemalloc/internal/prng_types.h"
 #include "jemalloc/internal/ticker_types.h"
@@ -419,10 +420,10 @@ typedef unsigned szind_t;
 #include "jemalloc/internal/extent_structs.h"
 #include "jemalloc/internal/extent_dss_structs.h"
 #include "jemalloc/internal/base_structs.h"
+#include "jemalloc/internal/prof_structs.h"
 #include "jemalloc/internal/arena_structs_b.h"
 #include "jemalloc/internal/rtree_structs.h"
 #include "jemalloc/internal/tcache_structs.h"
-#include "jemalloc/internal/prof_structs.h"
 #include "jemalloc/internal/tsd_structs.h"
 
 
@@ -902,6 +903,7 @@ decay_ticker_get(tsd_t *tsd, unsigned ind) {
  * Include portions of arena code interleaved with tcache code in order to
  * resolve circular dependencies.
  */
+#include "jemalloc/internal/prof_inlines_a.h"
 #include "jemalloc/internal/arena_inlines_a.h"
 
 #ifndef JEMALLOC_ENABLE_INLINE
@@ -1163,8 +1165,7 @@ ixalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize, size_t size,
 }
 #endif
 
-#include "jemalloc/internal/prof_inlines.h"
-
+#include "jemalloc/internal/prof_inlines_b.h"
 
 #ifdef __cplusplus
 }
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index ab5a672c..4e799915 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -54,8 +54,6 @@ arena_prefork1
 arena_prefork2
 arena_prefork3
 arena_prof_accum
-arena_prof_accum_impl
-arena_prof_accum_locked
 arena_prof_promote
 arena_prof_tctx_get
 arena_prof_tctx_reset
@@ -364,6 +362,9 @@ prng_range_zu
 prng_state_next_u32
 prng_state_next_u64
 prng_state_next_zu
+prof_accum_add
+prof_accum_cancel
+prof_accum_init
 prof_active
 prof_active_get
 prof_active_get_unlocked
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 76505f82..f3b6f8d3 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -55,6 +55,7 @@ extern prof_dump_header_t *prof_dump_header;
 void	prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes,
     uint64_t *accumobjs, uint64_t *accumbytes);
 #endif
+bool prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum);
 void	prof_idump(tsdn_t *tsdn);
 bool	prof_mdump(tsd_t *tsd, const char *filename);
 void	prof_gdump(tsdn_t *tsdn);
diff --git a/include/jemalloc/internal/prof_inlines_a.h b/include/jemalloc/internal/prof_inlines_a.h
new file mode 100644
index 00000000..d77635a8
--- /dev/null
+++ b/include/jemalloc/internal/prof_inlines_a.h
@@ -0,0 +1,76 @@
+#ifndef JEMALLOC_INTERNAL_PROF_INLINES_A_H
+#define JEMALLOC_INTERNAL_PROF_INLINES_A_H
+
+#ifndef JEMALLOC_ENABLE_INLINE
+bool prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum,
+    uint64_t accumbytes);
+void prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum, size_t usize);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
+JEMALLOC_INLINE bool
+prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum, uint64_t accumbytes) {
+	cassert(config_prof);
+
+	bool overflow;
+	uint64_t a0, a1;
+
+	/*
+	 * If the application allocates fast enough (and/or if idump is slow
+	 * enough), extreme overflow here (a1 >= prof_interval * 2) can cause
+	 * idump trigger coalescing.  This is an intentional mechanism that
+	 * avoids rate-limiting allocation.
+	 */
+#ifdef JEMALLOC_ATOMIC_U64
+	do {
+		a0 = atomic_read_u64(&prof_accum->accumbytes);
+		a1 = a0 + accumbytes;
+		assert(a1 >= a0);
+		overflow = (a1 >= prof_interval);
+		if (overflow) {
+			a1 %= prof_interval;
+		}
+	} while (atomic_cas_u64(&prof_accum->accumbytes, a0, a1));
+#else
+	malloc_mutex_lock(tsdn, &prof_accum->mtx);
+	a0 = prof_accum->accumbytes;
+	a1 = a0 + accumbytes;
+	overflow = (a1 >= prof_interval);
+	if (overflow) {
+		a1 %= prof_interval;
+	}
+	prof_accum->accumbytes = a1;
+	malloc_mutex_unlock(tsdn, &prof_accum->mtx);
+#endif
+	return overflow;
+}
+
+JEMALLOC_INLINE void
+prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum, size_t usize) {
+	cassert(config_prof);
+
+	/*
+	 * Cancel out as much of the excessive prof_accumbytes increase as
+	 * possible without underflowing.  Interval-triggered dumps occur
+	 * slightly more often than intended as a result of incomplete
+	 * canceling.
+	 */
+	uint64_t a0, a1;
+#ifdef JEMALLOC_ATOMIC_U64
+	do {
+		a0 = atomic_read_u64(&prof_accum->accumbytes);
+		a1 = (a0 >= LARGE_MINCLASS - usize) ?  a0 - (LARGE_MINCLASS -
+		    usize) : 0;
+	} while (atomic_cas_u64(&prof_accum->accumbytes, a0, a1));
+#else
+	malloc_mutex_lock(tsdn, &prof_accum->mtx);
+	a0 = prof_accum->accumbytes;
+	a1 = (a0 >= LARGE_MINCLASS - usize) ?  a0 - (LARGE_MINCLASS - usize) :
+	    0;
+	prof_accum->accumbytes = a1;
+	malloc_mutex_unlock(tsdn, &prof_accum->mtx);
+#endif
+}
+#endif
+
+#endif /* JEMALLOC_INTERNAL_PROF_INLINES_A_H */
diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines_b.h
similarity index 98%
rename from include/jemalloc/internal/prof_inlines.h
rename to include/jemalloc/internal/prof_inlines_b.h
index aba2936a..9e969a07 100644
--- a/include/jemalloc/internal/prof_inlines.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -1,5 +1,5 @@
-#ifndef JEMALLOC_INTERNAL_PROF_INLINES_H
-#define JEMALLOC_INTERNAL_PROF_INLINES_H
+#ifndef JEMALLOC_INTERNAL_PROF_INLINES_B_H
+#define JEMALLOC_INTERNAL_PROF_INLINES_B_H
 
 #ifndef JEMALLOC_ENABLE_INLINE
 bool	prof_active_get_unlocked(void);
@@ -237,4 +237,4 @@ prof_free(tsd_t *tsd, const extent_t *extent, const void *ptr, size_t usize) {
 }
 #endif
 
-#endif /* JEMALLOC_INTERNAL_PROF_INLINES_H */
+#endif /* JEMALLOC_INTERNAL_PROF_INLINES_B_H */
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index caae1257..afff6aa5 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -15,6 +15,13 @@ typedef struct {
 } prof_unwind_data_t;
 #endif
 
+struct prof_accum_s {
+#ifndef JEMALLOC_ATOMIC_U64
+	malloc_mutex_t	mtx;
+#endif
+	uint64_t	accumbytes;
+};
+
 struct prof_cnt_s {
 	/* Profiling counters. */
 	uint64_t	curobjs;
diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h
index ff0db65e..1eff995e 100644
--- a/include/jemalloc/internal/prof_types.h
+++ b/include/jemalloc/internal/prof_types.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_PROF_TYPES_H
 
 typedef struct prof_bt_s prof_bt_t;
+typedef struct prof_accum_s prof_accum_t;
 typedef struct prof_cnt_s prof_cnt_t;
 typedef struct prof_tctx_s prof_tctx_t;
 typedef struct prof_gctx_s prof_gctx_t;
diff --git a/include/jemalloc/internal/witness_types.h b/include/jemalloc/internal/witness_types.h
index 29299168..f919cc5a 100644
--- a/include/jemalloc/internal/witness_types.h
+++ b/include/jemalloc/internal/witness_types.h
@@ -47,6 +47,7 @@ typedef int witness_comp_t (const witness_t *, void *, const witness_t *,
 #define WITNESS_RANK_ARENA_LARGE	WITNESS_RANK_LEAF
 #define WITNESS_RANK_DSS		WITNESS_RANK_LEAF
 #define WITNESS_RANK_PROF_ACTIVE	WITNESS_RANK_LEAF
+#define WITNESS_RANK_PROF_ACCUM		WITNESS_RANK_LEAF
 #define WITNESS_RANK_PROF_DUMP_SEQ	WITNESS_RANK_LEAF
 #define WITNESS_RANK_PROF_GDUMP		WITNESS_RANK_LEAF
 #define WITNESS_RANK_PROF_NEXT_THR_UID	WITNESS_RANK_LEAF
diff --git a/src/arena.c b/src/arena.c
index 345c57df..40db9d1d 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1148,19 +1148,7 @@ arena_prof_promote(tsdn_t *tsdn, extent_t *extent, const void *ptr,
 
 	extent_usize_set(extent, usize);
 
-	/*
-	 * Cancel out as much of the excessive prof_accumbytes increase as
-	 * possible without underflowing.  Interval-triggered dumps occur
-	 * slightly more often than intended as a result of incomplete
-	 * canceling.
-	 */
-	malloc_mutex_lock(tsdn, &arena->lock);
-	if (arena->prof_accumbytes >= LARGE_MINCLASS - usize) {
-		arena->prof_accumbytes -= LARGE_MINCLASS - usize;
-	} else {
-		arena->prof_accumbytes = 0;
-	}
-	malloc_mutex_unlock(tsdn, &arena->lock);
+	prof_accum_cancel(tsdn, &arena->prof_accum, usize);
 
 	assert(isalloc(tsdn, extent, ptr) == usize);
 }
@@ -1574,7 +1562,9 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	}
 
 	if (config_prof) {
-		arena->prof_accumbytes = 0;
+		if (prof_accum_init(tsdn, &arena->prof_accum)) {
+			goto label_error;
+		}
 	}
 
 	if (config_cache_oblivious) {
diff --git a/src/prof.c b/src/prof.c
index 5aeefb28..13fa20d3 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -1753,6 +1753,20 @@ prof_fdump(void) {
 	prof_dump(tsd, false, filename, opt_prof_leak);
 }
 
+bool
+prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum) {
+	cassert(config_prof);
+
+#ifndef JEMALLOC_ATOMIC_U64
+	if (malloc_mutex_init(&prof_accum->mtx, "prof_accum",
+	    WITNESS_RANK_PROF_ACCUM)) {
+		return true;
+	}
+#endif
+	prof_accum->accumbytes = 0;
+	return false;
+}
+
 void
 prof_idump(tsdn_t *tsdn) {
 	tsd_t *tsd;
diff --git a/src/tcache.c b/src/tcache.c
index 94c45707..f38c2d5d 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -200,7 +200,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 		}
 		if ((config_prof || config_stats) && locked_arena == arena) {
 			if (config_prof) {
-				idump = arena_prof_accum_locked(arena,
+				idump = arena_prof_accum(tsd_tsdn(tsd), arena,
 				    tcache->prof_accumbytes);
 				tcache->prof_accumbytes = 0;
 			}

From 6b5cba41916549f1aa37adac45659b60293d9495 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 12 Feb 2017 17:43:33 -0800
Subject: [PATCH 0642/2608] Convert arena->stats synchronization to atomics.

---
 include/jemalloc/internal/arena_externs.h     |  12 +-
 include/jemalloc/internal/arena_structs_b.h   |   3 +-
 .../jemalloc/internal/jemalloc_internal.h.in  |   4 +-
 include/jemalloc/internal/private_symbols.txt |   1 +
 include/jemalloc/internal/stats_structs.h     |   6 +-
 include/jemalloc/internal/witness_types.h     |   3 +-
 src/arena.c                                   | 487 +++++++++++-------
 src/large.c                                   |  15 +-
 src/tcache.c                                  |  23 +-
 9 files changed, 326 insertions(+), 228 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index d6556dae..2880399b 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -13,6 +13,12 @@ extern ssize_t		opt_decay_time;
 
 extern const arena_bin_info_t	arena_bin_info[NBINS];
 
+void arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
+    szind_t szind, uint64_t nrequests);
+void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
+    const char **dss, ssize_t *decay_time, size_t *nactive, size_t *ndirty,
+    arena_stats_t *astats, malloc_bin_stats_t *bstats,
+    malloc_large_stats_t *lstats);
 void	arena_extent_cache_dalloc(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent);
 #ifdef JEMALLOC_JET
@@ -21,7 +27,7 @@ size_t	arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr);
 extent_t	*arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena,
     size_t usize, size_t alignment, bool *zero);
 void	arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena,
-    extent_t *extent, bool locked);
+    extent_t *extent);
 void	arena_extent_dalloc_large_finish(tsdn_t *tsdn, arena_t *arena,
     extent_t *extent);
 void	arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena,
@@ -67,10 +73,6 @@ bool	arena_decay_time_default_set(ssize_t decay_time);
 void	arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
     unsigned *nthreads, const char **dss, ssize_t *decay_time, size_t *nactive,
     size_t *ndirty);
-void	arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
-    const char **dss, ssize_t *decay_time, size_t *nactive, size_t *ndirty,
-    arena_stats_t *astats, malloc_bin_stats_t *bstats,
-    malloc_large_stats_t *lstats);
 unsigned	arena_nthreads_get(arena_t *arena, bool internal);
 void	arena_nthreads_inc(arena_t *arena, bool internal);
 void	arena_nthreads_dec(arena_t *arena, bool internal);
diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index 2ee5690e..04e859b5 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -127,8 +127,9 @@ struct arena_s {
 	 */
 	malloc_mutex_t		lock;
 
-	/* Synchronization: lock. */
+	/* Synchronization: internal. */
 	arena_stats_t		stats;
+
 	/*
 	 * List of tcaches for extant threads associated with this arena.
 	 * Stats from these are merged incrementally, and at exit if
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 7e9c24b7..0d0440b5 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -411,10 +411,10 @@ typedef unsigned szind_t;
 #include "jemalloc/internal/spin_structs.h"
 #include "jemalloc/internal/ticker_structs.h"
 #include "jemalloc/internal/ckh_structs.h"
-#include "jemalloc/internal/stats_structs.h"
-#include "jemalloc/internal/ctl_structs.h"
 #include "jemalloc/internal/witness_structs.h"
 #include "jemalloc/internal/mutex_structs.h"
+#include "jemalloc/internal/stats_structs.h"
+#include "jemalloc/internal/ctl_structs.h"
 #include "jemalloc/internal/bitmap_structs.h"
 #include "jemalloc/internal/arena_structs_a.h"
 #include "jemalloc/internal/extent_structs.h"
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 4e799915..ff54a35d 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -66,6 +66,7 @@ arena_salloc
 arena_sdalloc
 arena_set
 arena_slab_regind
+arena_stats_init
 arena_stats_merge
 arena_tcache_fill_small
 arena_tdata_get
diff --git a/include/jemalloc/internal/stats_structs.h b/include/jemalloc/internal/stats_structs.h
index 5cdb0cd9..4f5984ab 100644
--- a/include/jemalloc/internal/stats_structs.h
+++ b/include/jemalloc/internal/stats_structs.h
@@ -76,6 +76,10 @@ struct malloc_large_stats_s {
  * requests.
  */
 struct arena_stats_s {
+#ifndef JEMALLOC_ATOMIC_U64
+	malloc_mutex_t	mtx;
+#endif
+
 	/* Number of bytes currently mapped, excluding retained memory. */
 	size_t		mapped; /* Derived. */
 
@@ -97,7 +101,7 @@ struct arena_stats_s {
 	uint64_t	purged;
 
 	size_t		base; /* Derived. */
-	size_t		internal; /* Protected via atomic_*_zu(). */
+	size_t		internal;
 	size_t		resident; /* Derived. */
 
 	size_t		allocated_large;
diff --git a/include/jemalloc/internal/witness_types.h b/include/jemalloc/internal/witness_types.h
index f919cc5a..3fd7998a 100644
--- a/include/jemalloc/internal/witness_types.h
+++ b/include/jemalloc/internal/witness_types.h
@@ -41,10 +41,11 @@ typedef int witness_comp_t (const witness_t *, void *, const witness_t *,
 #define WITNESS_RANK_RTREE_ELM		12U
 #define WITNESS_RANK_RTREE		13U
 #define WITNESS_RANK_BASE		14U
+#define WITNESS_RANK_ARENA_LARGE	15U
 
 #define WITNESS_RANK_LEAF		0xffffffffU
 #define WITNESS_RANK_ARENA_BIN		WITNESS_RANK_LEAF
-#define WITNESS_RANK_ARENA_LARGE	WITNESS_RANK_LEAF
+#define WITNESS_RANK_ARENA_STATS	WITNESS_RANK_LEAF
 #define WITNESS_RANK_DSS		WITNESS_RANK_LEAF
 #define WITNESS_RANK_PROF_ACTIVE	WITNESS_RANK_LEAF
 #define WITNESS_RANK_PROF_ACCUM		WITNESS_RANK_LEAF
diff --git a/src/arena.c b/src/arena.c
index 40db9d1d..ac447199 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -37,6 +37,212 @@ static void	arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena,
 
 /******************************************************************************/
 
+static bool
+arena_stats_init(tsdn_t *tsdn, arena_stats_t *arena_stats) {
+	if (config_debug) {
+		for (size_t i = 0; i < sizeof(arena_stats_t); i++) {
+			assert(((char *)arena_stats)[0] == 0);
+		}
+	}
+#ifndef JEMALLOC_ATOMIC_U64
+	if (malloc_mutex_init(&arena_stats->mtx, "arena_stats",
+	    WITNESS_RANK_ARENA_STATS)) {
+		return true;
+	}
+#endif
+	/* Memory is zeroed, so there is no need to clear stats. */
+	return false;
+}
+
+static void
+arena_stats_lock(tsdn_t *tsdn, arena_stats_t *arena_stats) {
+#ifndef JEMALLOC_ATOMIC_U64
+	malloc_mutex_lock(tsdn, &arena_stats->mtx);
+#endif
+}
+
+static void
+arena_stats_unlock(tsdn_t *tsdn, arena_stats_t *arena_stats) {
+#ifndef JEMALLOC_ATOMIC_U64
+	malloc_mutex_unlock(tsdn, &arena_stats->mtx);
+#endif
+}
+
+static uint64_t
+arena_stats_read_u64(tsdn_t *tsdn, arena_stats_t *arena_stats, uint64_t *p) {
+#ifdef JEMALLOC_ATOMIC_U64
+	return atomic_read_u64(p);
+#else
+	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
+	return *p;
+#endif
+}
+
+static void
+arena_stats_add_u64(tsdn_t *tsdn, arena_stats_t *arena_stats, uint64_t *p,
+    uint64_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+	atomic_add_u64(p, x);
+#else
+	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
+	*p += x;
+#endif
+}
+
+static void
+arena_stats_sub_u64(tsdn_t *tsdn, arena_stats_t *arena_stats, uint64_t *p,
+    uint64_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+	atomic_sub_u64(p, x);
+#else
+	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
+	*p -= x;
+#endif
+}
+
+static size_t
+arena_stats_read_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, size_t *p) {
+#ifdef JEMALLOC_ATOMIC_U64
+	return atomic_read_zu(p);
+#else
+	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
+	return *p;
+#endif
+}
+
+static void
+arena_stats_add_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, size_t *p,
+    size_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+	atomic_add_zu(p, x);
+#else
+	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
+	*p += x;
+#endif
+}
+
+static void
+arena_stats_sub_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, size_t *p,
+    size_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+	atomic_sub_zu(p, x);
+#else
+	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
+	*p -= x;
+#endif
+}
+
+void
+arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
+    szind_t szind, uint64_t nrequests) {
+	arena_stats_lock(tsdn, arena_stats);
+	arena_stats_add_u64(tsdn, arena_stats, &arena_stats->nrequests_large,
+	    nrequests);
+	arena_stats_add_u64(tsdn, arena_stats, &arena_stats->lstats[szind -
+	    NBINS].nrequests, nrequests);
+	arena_stats_unlock(tsdn, arena_stats);
+}
+
+void
+arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
+    const char **dss, ssize_t *decay_time, size_t *nactive, size_t *ndirty) {
+	malloc_mutex_lock(tsdn, &arena->lock);
+	*nthreads += arena_nthreads_get(arena, false);
+	*dss = dss_prec_names[arena_dss_prec_get(arena)];
+	*decay_time = arena->decay.time;
+	*nactive += atomic_read_zu(&arena->nactive);
+	*ndirty += extents_npages_get(&arena->extents_cached);
+	malloc_mutex_unlock(tsdn, &arena->lock);
+}
+
+void
+arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
+    const char **dss, ssize_t *decay_time, size_t *nactive, size_t *ndirty,
+    arena_stats_t *astats, malloc_bin_stats_t *bstats,
+    malloc_large_stats_t *lstats) {
+	size_t base_allocated, base_resident, base_mapped;
+	unsigned i;
+
+	cassert(config_stats);
+
+	arena_basic_stats_merge(tsdn, arena, nthreads, dss, decay_time,
+	    nactive, ndirty);
+
+	base_stats_get(tsdn, arena->base, &base_allocated, &base_resident,
+	    &base_mapped);
+
+	arena_stats_lock(tsdn, &arena->stats);
+
+	astats->mapped += base_mapped + arena_stats_read_zu(tsdn, &arena->stats,
+	    &arena->stats.mapped);
+	astats->retained += (extents_npages_get(&arena->extents_retained) <<
+	    LG_PAGE);
+	astats->npurge += arena_stats_read_u64(tsdn, &arena->stats,
+	    &arena->stats.npurge);
+	astats->nmadvise += arena_stats_read_u64(tsdn, &arena->stats,
+	    &arena->stats.nmadvise);
+	astats->purged += arena_stats_read_u64(tsdn, &arena->stats,
+	    &arena->stats.purged);
+	astats->base += base_allocated;
+	astats->internal += arena_internal_get(arena);
+	astats->resident += base_resident + (((atomic_read_zu(&arena->nactive) +
+	    extents_npages_get(&arena->extents_cached)) << LG_PAGE));
+	astats->allocated_large += arena_stats_read_zu(tsdn, &arena->stats,
+	    &arena->stats.allocated_large);
+	astats->nmalloc_large += arena_stats_read_u64(tsdn, &arena->stats,
+	    &arena->stats.nmalloc_large);
+	astats->ndalloc_large += arena_stats_read_u64(tsdn, &arena->stats,
+	    &arena->stats.ndalloc_large);
+	astats->nrequests_large += arena_stats_read_u64(tsdn, &arena->stats,
+	    &arena->stats.nrequests_large);
+
+	for (i = 0; i < NSIZES - NBINS; i++) {
+		lstats[i].nmalloc += arena_stats_read_u64(tsdn, &arena->stats,
+		    &arena->stats.lstats[i].nmalloc);
+		lstats[i].ndalloc += arena_stats_read_u64(tsdn, &arena->stats,
+		    &arena->stats.lstats[i].ndalloc);
+		lstats[i].nrequests += arena_stats_read_u64(tsdn, &arena->stats,
+		    &arena->stats.lstats[i].nrequests);
+		lstats[i].curlextents += arena_stats_read_zu(tsdn,
+		    &arena->stats, &arena->stats.lstats[i].curlextents);
+	}
+
+	arena_stats_unlock(tsdn, &arena->stats);
+
+	if (config_tcache) {
+		tcache_bin_t *tbin;
+		tcache_t *tcache;
+
+		/* tcache_bytes counts currently cached bytes. */
+		astats->tcache_bytes = 0;
+		ql_foreach(tcache, &arena->tcache_ql, link) {
+			for (i = 0; i < nhbins; i++) {
+				tbin = &tcache->tbins[i];
+				astats->tcache_bytes += tbin->ncached *
+				    index2size(i);
+			}
+		}
+	}
+
+	for (i = 0; i < NBINS; i++) {
+		arena_bin_t *bin = &arena->bins[i];
+
+		malloc_mutex_lock(tsdn, &bin->lock);
+		bstats[i].nmalloc += bin->stats.nmalloc;
+		bstats[i].ndalloc += bin->stats.ndalloc;
+		bstats[i].nrequests += bin->stats.nrequests;
+		bstats[i].curregs += bin->stats.curregs;
+		if (config_tcache) {
+			bstats[i].nfills += bin->stats.nfills;
+			bstats[i].nflushes += bin->stats.nflushes;
+		}
+		bstats[i].nslabs += bin->stats.nslabs;
+		bstats[i].reslabs += bin->stats.reslabs;
+		bstats[i].curslabs += bin->stats.curslabs;
+		malloc_mutex_unlock(tsdn, &bin->lock);
+	}
+}
+
 void
 arena_extent_cache_dalloc(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent) {
@@ -128,7 +334,7 @@ arena_nactive_sub(arena_t *arena, size_t sub_pages) {
 }
 
 static void
-arena_large_malloc_stats_update(arena_t *arena, size_t usize) {
+arena_large_malloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 	szind_t index, hindex;
 
 	cassert(config_stats);
@@ -139,15 +345,20 @@ arena_large_malloc_stats_update(arena_t *arena, size_t usize) {
 	index = size2index(usize);
 	hindex = (index >= NBINS) ? index - NBINS : 0;
 
-	arena->stats.nmalloc_large++;
-	arena->stats.allocated_large += usize;
-	arena->stats.lstats[hindex].nmalloc++;
-	arena->stats.lstats[hindex].nrequests++;
-	arena->stats.lstats[hindex].curlextents++;
+	arena_stats_add_u64(tsdn, &arena->stats, &arena->stats.nmalloc_large,
+	    1);
+	arena_stats_add_zu(tsdn, &arena->stats, &arena->stats.allocated_large,
+	    usize);
+	arena_stats_add_u64(tsdn, &arena->stats,
+	    &arena->stats.lstats[hindex].nmalloc, 1);
+	arena_stats_add_u64(tsdn, &arena->stats,
+	    &arena->stats.lstats[hindex].nrequests, 1);
+	arena_stats_add_zu(tsdn, &arena->stats,
+	    &arena->stats.lstats[hindex].curlextents, 1);
 }
 
 static void
-arena_large_malloc_stats_update_undo(arena_t *arena, size_t usize) {
+arena_large_dalloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 	szind_t index, hindex;
 
 	cassert(config_stats);
@@ -158,71 +369,36 @@ arena_large_malloc_stats_update_undo(arena_t *arena, size_t usize) {
 	index = size2index(usize);
 	hindex = (index >= NBINS) ? index - NBINS : 0;
 
-	arena->stats.nmalloc_large--;
-	arena->stats.allocated_large -= usize;
-	arena->stats.lstats[hindex].nmalloc--;
-	arena->stats.lstats[hindex].nrequests--;
-	arena->stats.lstats[hindex].curlextents--;
+	arena_stats_add_u64(tsdn, &arena->stats, &arena->stats.ndalloc_large,
+	    1);
+	arena_stats_sub_zu(tsdn, &arena->stats, &arena->stats.allocated_large,
+	    usize);
+	arena_stats_add_u64(tsdn, &arena->stats,
+	    &arena->stats.lstats[hindex].ndalloc, 1);
+	arena_stats_sub_zu(tsdn, &arena->stats,
+	    &arena->stats.lstats[hindex].curlextents, 1);
 }
 
 static void
-arena_large_dalloc_stats_update(arena_t *arena, size_t usize) {
-	szind_t index, hindex;
-
-	cassert(config_stats);
-
-	if (usize < LARGE_MINCLASS) {
-		usize = LARGE_MINCLASS;
-	}
-	index = size2index(usize);
-	hindex = (index >= NBINS) ? index - NBINS : 0;
-
-	arena->stats.ndalloc_large++;
-	arena->stats.allocated_large -= usize;
-	arena->stats.lstats[hindex].ndalloc++;
-	arena->stats.lstats[hindex].curlextents--;
-}
-
-static void
-arena_large_reset_stats_cancel(arena_t *arena, size_t usize) {
+arena_large_reset_stats_cancel(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 	szind_t index = size2index(usize);
 	szind_t hindex = (index >= NBINS) ? index - NBINS : 0;
 
 	cassert(config_stats);
 
-	arena->stats.ndalloc_large--;
-	arena->stats.lstats[hindex].ndalloc--;
+	arena_stats_lock(tsdn, &arena->stats);
+	arena_stats_sub_u64(tsdn, &arena->stats, &arena->stats.ndalloc_large,
+	    1);
+	arena_stats_sub_u64(tsdn, &arena->stats,
+	    &arena->stats.lstats[hindex].ndalloc, 1);
+	arena_stats_unlock(tsdn, &arena->stats);
 }
 
 static void
-arena_large_ralloc_stats_update(arena_t *arena, size_t oldusize, size_t usize) {
-	arena_large_dalloc_stats_update(arena, oldusize);
-	arena_large_malloc_stats_update(arena, usize);
-}
-
-static extent_t *
-arena_extent_alloc_large_hard(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, size_t usize, size_t alignment,
-    bool *zero) {
-	extent_t *extent;
-	bool commit = true;
-
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
-
-	extent = extent_alloc_wrapper(tsdn, arena, r_extent_hooks, NULL, usize,
-	    large_pad, alignment, zero, &commit, false);
-	if (extent == NULL) {
-		/* Revert optimistic stats updates. */
-		malloc_mutex_lock(tsdn, &arena->lock);
-		if (config_stats) {
-			arena_large_malloc_stats_update_undo(arena, usize);
-			arena->stats.mapped -= usize;
-		}
-		arena_nactive_sub(arena, (usize + large_pad) >> LG_PAGE);
-		malloc_mutex_unlock(tsdn, &arena->lock);
-	}
-
-	return extent;
+arena_large_ralloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t oldusize,
+    size_t usize) {
+	arena_large_dalloc_stats_update(tsdn, arena, oldusize);
+	arena_large_malloc_stats_update(tsdn, arena, usize);
 }
 
 extent_t *
@@ -233,43 +409,35 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 
 	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 
-	malloc_mutex_lock(tsdn, &arena->lock);
-
-	/* Optimistically update stats. */
-	if (config_stats) {
-		arena_large_malloc_stats_update(arena, usize);
-		arena->stats.mapped += usize;
-	}
-	arena_nactive_add(arena, (usize + large_pad) >> LG_PAGE);
-
-	malloc_mutex_unlock(tsdn, &arena->lock);
-
 	bool commit = true;
 	extent = extent_alloc_cache(tsdn, arena, &extent_hooks, NULL, usize,
 	    large_pad, alignment, zero, &commit, false);
 	if (extent == NULL) {
-		extent = arena_extent_alloc_large_hard(tsdn, arena,
-		    &extent_hooks, usize, alignment, zero);
+		extent = extent_alloc_wrapper(tsdn, arena, &extent_hooks, NULL,
+		    usize, large_pad, alignment, zero, &commit, false);
 	}
 
+	if (config_stats && extent != NULL) {
+		arena_stats_lock(tsdn, &arena->stats);
+		arena_large_malloc_stats_update(tsdn, arena, usize);
+		arena_stats_add_zu(tsdn, &arena->stats, &arena->stats.mapped,
+		    usize);
+		arena_stats_unlock(tsdn, &arena->stats);
+	}
+	arena_nactive_add(arena, (usize + large_pad) >> LG_PAGE);
+
 	return extent;
 }
 
 void
-arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
-    bool locked) {
-	if (!locked) {
-		malloc_mutex_lock(tsdn, &arena->lock);
-	} else {
-		malloc_mutex_assert_owner(tsdn, &arena->lock);
-	}
+arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
 	if (config_stats) {
-		arena_large_dalloc_stats_update(arena,
+		arena_stats_lock(tsdn, &arena->stats);
+		arena_large_dalloc_stats_update(tsdn, arena,
 		    extent_usize_get(extent));
-		arena->stats.mapped -= extent_size_get(extent);
-	}
-	if (!locked) {
-		malloc_mutex_unlock(tsdn, &arena->lock);
+		arena_stats_sub_zu(tsdn, &arena->stats, &arena->stats.mapped,
+		    extent_size_get(extent));
+		arena_stats_unlock(tsdn, &arena->stats);
 	}
 	arena_nactive_sub(arena, extent_size_get(extent) >> LG_PAGE);
 }
@@ -287,13 +455,14 @@ arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 	size_t usize = extent_usize_get(extent);
 	size_t udiff = oldusize - usize;
 
-	malloc_mutex_lock(tsdn, &arena->lock);
 	if (config_stats) {
-		arena_large_ralloc_stats_update(arena, oldusize, usize);
-		arena->stats.mapped -= udiff;
+		arena_stats_lock(tsdn, &arena->stats);
+		arena_large_ralloc_stats_update(tsdn, arena, oldusize, usize);
+		arena_stats_sub_zu(tsdn, &arena->stats, &arena->stats.mapped,
+		    udiff);
+		arena_stats_unlock(tsdn, &arena->stats);
 	}
 	arena_nactive_sub(arena, udiff >> LG_PAGE);
-	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
 void
@@ -302,13 +471,14 @@ arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 	size_t usize = extent_usize_get(extent);
 	size_t udiff = usize - oldusize;
 
-	malloc_mutex_lock(tsdn, &arena->lock);
 	if (config_stats) {
-		arena_large_ralloc_stats_update(arena, oldusize, usize);
-		arena->stats.mapped += udiff;
+		arena_stats_lock(tsdn, &arena->stats);
+		arena_large_ralloc_stats_update(tsdn, arena, oldusize, usize);
+		arena_stats_add_zu(tsdn, &arena->stats, &arena->stats.mapped,
+		    udiff);
+		arena_stats_unlock(tsdn, &arena->stats);
 	}
 	arena_nactive_add(arena, udiff >> LG_PAGE);
-	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
 static void
@@ -575,8 +745,12 @@ arena_purge_stashed(tsdn_t *tsdn, arena_t *arena,
 	}
 
 	if (config_stats) {
-		arena->stats.nmadvise += nmadvise;
-		arena->stats.purged += npurged;
+		arena_stats_lock(tsdn, &arena->stats);
+		arena_stats_add_u64(tsdn, &arena->stats, &arena->stats.nmadvise,
+		    nmadvise);
+		arena_stats_add_u64(tsdn, &arena->stats, &arena->stats.purged,
+		    npurged);
+		arena_stats_unlock(tsdn, &arena->stats);
 	}
 
 	return npurged;
@@ -616,7 +790,10 @@ arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, size_t ndirty_limit) {
 	malloc_mutex_lock(tsdn, &arena->lock);
 
 	if (config_stats) {
-		arena->stats.npurge++;
+		arena_stats_lock(tsdn, &arena->stats);
+		arena_stats_add_u64(tsdn, &arena->stats, &arena->stats.npurge,
+		    1);
+		arena_stats_unlock(tsdn, &arena->stats);
 	}
 
 label_return:
@@ -717,7 +894,8 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 		malloc_mutex_lock(tsd_tsdn(tsd), &arena->large_mtx);
 		/* Cancel out unwanted effects on stats. */
 		if (config_stats) {
-			arena_large_reset_stats_cancel(arena, usize);
+			arena_large_reset_stats_cancel(tsd_tsdn(tsd), arena,
+			    usize);
 		}
 	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->large_mtx);
@@ -849,8 +1027,6 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 	}
 	assert(extent_slab_get(slab));
 
-	malloc_mutex_lock(tsdn, &arena->lock);
-
 	arena_nactive_add(arena, extent_size_get(slab) >> LG_PAGE);
 
 	/* Initialize slab internals. */
@@ -860,9 +1036,11 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 	bitmap_init(slab_data->bitmap, &bin_info->bitmap_info);
 
 	if (config_stats) {
-		arena->stats.mapped += extent_size_get(slab);
+		arena_stats_lock(tsdn, &arena->stats);
+		arena_stats_add_zu(tsdn, &arena->stats, &arena->stats.mapped,
+		    extent_size_get(slab));
+		arena_stats_unlock(tsdn, &arena->stats);
 	}
-	malloc_mutex_unlock(tsdn, &arena->lock);
 
 	return slab;
 }
@@ -1419,99 +1597,6 @@ arena_decay_time_default_set(ssize_t decay_time) {
 	return false;
 }
 
-static void
-arena_basic_stats_merge_locked(arena_t *arena, unsigned *nthreads,
-    const char **dss, ssize_t *decay_time, size_t *nactive, size_t *ndirty) {
-	*nthreads += arena_nthreads_get(arena, false);
-	*dss = dss_prec_names[arena_dss_prec_get(arena)];
-	*decay_time = arena->decay.time;
-	*nactive += atomic_read_zu(&arena->nactive);
-	*ndirty += extents_npages_get(&arena->extents_cached);
-}
-
-void
-arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
-    const char **dss, ssize_t *decay_time, size_t *nactive, size_t *ndirty) {
-	malloc_mutex_lock(tsdn, &arena->lock);
-	arena_basic_stats_merge_locked(arena, nthreads, dss, decay_time,
-	    nactive, ndirty);
-	malloc_mutex_unlock(tsdn, &arena->lock);
-}
-
-void
-arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
-    const char **dss, ssize_t *decay_time, size_t *nactive, size_t *ndirty,
-    arena_stats_t *astats, malloc_bin_stats_t *bstats,
-    malloc_large_stats_t *lstats) {
-	size_t base_allocated, base_resident, base_mapped;
-	unsigned i;
-
-	cassert(config_stats);
-
-	malloc_mutex_lock(tsdn, &arena->lock);
-	arena_basic_stats_merge_locked(arena, nthreads, dss, decay_time,
-	    nactive, ndirty);
-
-	base_stats_get(tsdn, arena->base, &base_allocated, &base_resident,
-	    &base_mapped);
-
-	astats->mapped += base_mapped + arena->stats.mapped;
-	astats->retained += (extents_npages_get(&arena->extents_retained) <<
-	    LG_PAGE);
-	astats->npurge += arena->stats.npurge;
-	astats->nmadvise += arena->stats.nmadvise;
-	astats->purged += arena->stats.purged;
-	astats->base += base_allocated;
-	astats->internal += arena_internal_get(arena);
-	astats->resident += base_resident + (((atomic_read_zu(&arena->nactive) +
-	    extents_npages_get(&arena->extents_cached)) << LG_PAGE));
-	astats->allocated_large += arena->stats.allocated_large;
-	astats->nmalloc_large += arena->stats.nmalloc_large;
-	astats->ndalloc_large += arena->stats.ndalloc_large;
-	astats->nrequests_large += arena->stats.nrequests_large;
-
-	for (i = 0; i < NSIZES - NBINS; i++) {
-		lstats[i].nmalloc += arena->stats.lstats[i].nmalloc;
-		lstats[i].ndalloc += arena->stats.lstats[i].ndalloc;
-		lstats[i].nrequests += arena->stats.lstats[i].nrequests;
-		lstats[i].curlextents += arena->stats.lstats[i].curlextents;
-	}
-
-	if (config_tcache) {
-		tcache_bin_t *tbin;
-		tcache_t *tcache;
-
-		/* tcache_bytes counts currently cached bytes. */
-		astats->tcache_bytes = 0;
-		ql_foreach(tcache, &arena->tcache_ql, link) {
-			for (i = 0; i < nhbins; i++) {
-				tbin = &tcache->tbins[i];
-				astats->tcache_bytes += tbin->ncached *
-				    index2size(i);
-			}
-		}
-	}
-	malloc_mutex_unlock(tsdn, &arena->lock);
-
-	for (i = 0; i < NBINS; i++) {
-		arena_bin_t *bin = &arena->bins[i];
-
-		malloc_mutex_lock(tsdn, &bin->lock);
-		bstats[i].nmalloc += bin->stats.nmalloc;
-		bstats[i].ndalloc += bin->stats.ndalloc;
-		bstats[i].nrequests += bin->stats.nrequests;
-		bstats[i].curregs += bin->stats.curregs;
-		if (config_tcache) {
-			bstats[i].nfills += bin->stats.nfills;
-			bstats[i].nflushes += bin->stats.nflushes;
-		}
-		bstats[i].nslabs += bin->stats.nslabs;
-		bstats[i].reslabs += bin->stats.reslabs;
-		bstats[i].curslabs += bin->stats.curslabs;
-		malloc_mutex_unlock(tsdn, &bin->lock);
-	}
-}
-
 unsigned
 arena_nthreads_get(arena_t *arena, bool internal) {
 	return atomic_read_u(&arena->nthreads[internal]);
@@ -1557,6 +1642,12 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		goto label_error;
 	}
 
+	if (config_stats) {
+		if (arena_stats_init(tsdn, &arena->stats)) {
+			goto label_error;
+		}
+	}
+
 	if (config_stats && config_tcache) {
 		ql_new(&arena->tcache_ql);
 	}
@@ -1663,20 +1754,20 @@ arena_prefork3(tsdn_t *tsdn, arena_t *arena) {
 	unsigned i;
 
 	base_prefork(tsdn, arena->base);
+	malloc_mutex_prefork(tsdn, &arena->large_mtx);
 	for (i = 0; i < NBINS; i++) {
 		malloc_mutex_prefork(tsdn, &arena->bins[i].lock);
 	}
-	malloc_mutex_prefork(tsdn, &arena->large_mtx);
 }
 
 void
 arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 	unsigned i;
 
-	malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
 	for (i = 0; i < NBINS; i++) {
 		malloc_mutex_postfork_parent(tsdn, &arena->bins[i].lock);
 	}
+	malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
 	base_postfork_parent(tsdn, arena->base);
 	malloc_mutex_postfork_parent(tsdn, &arena->extent_freelist_mtx);
 	extents_postfork_parent(tsdn, &arena->extents_cached);
@@ -1688,10 +1779,10 @@ void
 arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	unsigned i;
 
-	malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
 	for (i = 0; i < NBINS; i++) {
 		malloc_mutex_postfork_child(tsdn, &arena->bins[i].lock);
 	}
+	malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
 	base_postfork_child(tsdn, arena->base);
 	malloc_mutex_postfork_child(tsdn, &arena->extent_freelist_mtx);
 	extents_postfork_child(tsdn, &arena->extents_cached);
diff --git a/src/large.c b/src/large.c
index bfe2f714..55e0737e 100644
--- a/src/large.c
+++ b/src/large.c
@@ -286,20 +286,23 @@ large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
 
 /*
  * junked_locked indicates whether the extent's data have been junk-filled, and
- * whether the arena's lock is currently held.  The arena's large_mtx is
- * independent of these considerations.
+ * whether the arena's large_mtx is currently held.
  */
 static void
 large_dalloc_prep_impl(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
     bool junked_locked) {
-	malloc_mutex_lock(tsdn, &arena->large_mtx);
-	extent_list_remove(&arena->large, extent);
-	malloc_mutex_unlock(tsdn, &arena->large_mtx);
+
 	if (!junked_locked) {
+		malloc_mutex_lock(tsdn, &arena->large_mtx);
+		extent_list_remove(&arena->large, extent);
+		malloc_mutex_unlock(tsdn, &arena->large_mtx);
 		large_dalloc_maybe_junk(extent_addr_get(extent),
 		    extent_usize_get(extent));
+	} else {
+		malloc_mutex_assert_owner(tsdn, &arena->large_mtx);
+		extent_list_remove(&arena->large, extent);
 	}
-	arena_extent_dalloc_large_prep(tsdn, arena, extent, junked_locked);
+	arena_extent_dalloc_large_prep(tsdn, arena, extent);
 }
 
 static void
diff --git a/src/tcache.c b/src/tcache.c
index f38c2d5d..075f3481 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -188,7 +188,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 			idump = false;
 		}
 
-		malloc_mutex_lock(tsd_tsdn(tsd), &locked_arena->lock);
+		malloc_mutex_lock(tsd_tsdn(tsd), &locked_arena->large_mtx);
 		for (unsigned i = 0; i < nflush; i++) {
 			void *ptr = *(tbin->avail - 1 - i);
 			assert(ptr != NULL);
@@ -206,14 +206,13 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 			}
 			if (config_stats) {
 				merged_stats = true;
-				arena->stats.nrequests_large +=
-				    tbin->tstats.nrequests;
-				arena->stats.lstats[binind - NBINS].nrequests +=
-				    tbin->tstats.nrequests;
+				arena_stats_large_nrequests_add(tsd_tsdn(tsd),
+				    &arena->stats, binind,
+				    tbin->tstats.nrequests);
 				tbin->tstats.nrequests = 0;
 			}
 		}
-		malloc_mutex_unlock(tsd_tsdn(tsd), &locked_arena->lock);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &locked_arena->large_mtx);
 
 		unsigned ndeferred = 0;
 		for (unsigned i = 0; i < nflush; i++) {
@@ -245,12 +244,9 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 		 * The flush loop didn't happen to flush to this thread's
 		 * arena, so the stats didn't get merged.  Manually do so now.
 		 */
-		malloc_mutex_lock(tsd_tsdn(tsd), &arena->lock);
-		arena->stats.nrequests_large += tbin->tstats.nrequests;
-		arena->stats.lstats[binind - NBINS].nrequests +=
-		    tbin->tstats.nrequests;
+		arena_stats_large_nrequests_add(tsd_tsdn(tsd), &arena->stats,
+		    binind, tbin->tstats.nrequests);
 		tbin->tstats.nrequests = 0;
-		malloc_mutex_unlock(tsd_tsdn(tsd), &arena->lock);
 	}
 
 	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
@@ -426,10 +422,9 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 	}
 
 	for (; i < nhbins; i++) {
-		malloc_large_stats_t *lstats = &arena->stats.lstats[i - NBINS];
 		tcache_bin_t *tbin = &tcache->tbins[i];
-		arena->stats.nrequests_large += tbin->tstats.nrequests;
-		lstats->nrequests += tbin->tstats.nrequests;
+		arena_stats_large_nrequests_add(tsdn, &arena->stats, i,
+		    tbin->tstats.nrequests);
 		tbin->tstats.nrequests = 0;
 	}
 }

From ab25d3c987ddb32846760cc08af8db22a6389c02 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 12 Feb 2017 18:50:53 -0800
Subject: [PATCH 0643/2608] Synchronize arena->tcache_ql with
 arena->tcache_ql_mtx.

This replaces arena->lock synchronization.
---
 include/jemalloc/internal/arena_structs_b.h |  3 ++-
 include/jemalloc/internal/witness_types.h   | 13 +++++++------
 src/arena.c                                 | 15 +++++++++++++++
 src/jemalloc.c                              | 11 +++--------
 src/tcache.c                                | 10 ++++------
 5 files changed, 31 insertions(+), 21 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index 04e859b5..132a328b 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -135,9 +135,10 @@ struct arena_s {
 	 * Stats from these are merged incrementally, and at exit if
 	 * opt_stats_print is enabled.
 	 *
-	 * Synchronization: lock.
+	 * Synchronization: tcache_ql_mtx.
 	 */
 	ql_head(tcache_t)	tcache_ql;
+	malloc_mutex_t		tcache_ql_mtx;
 
 	/* Synchronization: internal. */
 	prof_accum_t		prof_accum;
diff --git a/include/jemalloc/internal/witness_types.h b/include/jemalloc/internal/witness_types.h
index 3fd7998a..7957b410 100644
--- a/include/jemalloc/internal/witness_types.h
+++ b/include/jemalloc/internal/witness_types.h
@@ -35,13 +35,14 @@ typedef int witness_comp_t (const witness_t *, void *, const witness_t *,
 #define WITNESS_RANK_CORE		9U
 
 #define WITNESS_RANK_ARENA		9U
-#define WITNESS_RANK_EXTENTS		10U
-#define WITNESS_RANK_EXTENT_FREELIST	11U
+#define WITNESS_RANK_TCACHE_QL		10U
+#define WITNESS_RANK_EXTENTS		11U
+#define WITNESS_RANK_EXTENT_FREELIST	12U
 
-#define WITNESS_RANK_RTREE_ELM		12U
-#define WITNESS_RANK_RTREE		13U
-#define WITNESS_RANK_BASE		14U
-#define WITNESS_RANK_ARENA_LARGE	15U
+#define WITNESS_RANK_RTREE_ELM		13U
+#define WITNESS_RANK_RTREE		14U
+#define WITNESS_RANK_BASE		15U
+#define WITNESS_RANK_ARENA_LARGE	16U
 
 #define WITNESS_RANK_LEAF		0xffffffffU
 #define WITNESS_RANK_ARENA_BIN		WITNESS_RANK_LEAF
diff --git a/src/arena.c b/src/arena.c
index ac447199..f4e051ca 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -215,6 +215,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 
 		/* tcache_bytes counts currently cached bytes. */
 		astats->tcache_bytes = 0;
+		malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
 		ql_foreach(tcache, &arena->tcache_ql, link) {
 			for (i = 0; i < nhbins; i++) {
 				tbin = &tcache->tbins[i];
@@ -222,6 +223,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 				    index2size(i);
 			}
 		}
+		malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
 	}
 
 	for (i = 0; i < NBINS; i++) {
@@ -1650,6 +1652,10 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 
 	if (config_stats && config_tcache) {
 		ql_new(&arena->tcache_ql);
+		if (malloc_mutex_init(&arena->tcache_ql_mtx, "tcache_ql",
+		    WITNESS_RANK_TCACHE_QL)) {
+			goto label_error;
+		}
 	}
 
 	if (config_prof) {
@@ -1736,6 +1742,9 @@ arena_boot(void) {
 void
 arena_prefork0(tsdn_t *tsdn, arena_t *arena) {
 	malloc_mutex_prefork(tsdn, &arena->lock);
+	if (config_stats && config_tcache) {
+		malloc_mutex_prefork(tsdn, &arena->tcache_ql_mtx);
+	}
 }
 
 void
@@ -1773,6 +1782,9 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 	extents_postfork_parent(tsdn, &arena->extents_cached);
 	extents_postfork_parent(tsdn, &arena->extents_retained);
 	malloc_mutex_postfork_parent(tsdn, &arena->lock);
+	if (config_stats && config_tcache) {
+		malloc_mutex_postfork_parent(tsdn, &arena->tcache_ql_mtx);
+	}
 }
 
 void
@@ -1788,4 +1800,7 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	extents_postfork_child(tsdn, &arena->extents_cached);
 	extents_postfork_child(tsdn, &arena->extents_retained);
 	malloc_mutex_postfork_child(tsdn, &arena->lock);
+	if (config_stats && config_tcache) {
+		malloc_mutex_postfork_child(tsdn, &arena->tcache_ql_mtx);
+	}
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index d2c33bbc..197f9bdc 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -684,17 +684,12 @@ stats_print_atexit(void) {
 			if (arena != NULL) {
 				tcache_t *tcache;
 
-				/*
-				 * tcache_stats_merge() locks bins, so if any
-				 * code is introduced that acquires both arena
-				 * and bin locks in the opposite order,
-				 * deadlocks may result.
-				 */
-				malloc_mutex_lock(tsdn, &arena->lock);
+				malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
 				ql_foreach(tcache, &arena->tcache_ql, link) {
 					tcache_stats_merge(tsdn, tcache, arena);
 				}
-				malloc_mutex_unlock(tsdn, &arena->lock);
+				malloc_mutex_unlock(tsdn,
+				    &arena->tcache_ql_mtx);
 			}
 		}
 	}
diff --git a/src/tcache.c b/src/tcache.c
index 075f3481..dff31d19 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -261,10 +261,10 @@ static void
 tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 	if (config_stats) {
 		/* Link into list of extant tcaches. */
-		malloc_mutex_lock(tsdn, &arena->lock);
+		malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
 		ql_elm_new(tcache, link);
 		ql_tail_insert(&arena->tcache_ql, tcache, link);
-		malloc_mutex_unlock(tsdn, &arena->lock);
+		malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
 	}
 }
 
@@ -272,7 +272,7 @@ static void
 tcache_arena_dissociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 	if (config_stats) {
 		/* Unlink from list of extant tcaches. */
-		malloc_mutex_lock(tsdn, &arena->lock);
+		malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
 		if (config_debug) {
 			bool in_ql = false;
 			tcache_t *iter;
@@ -286,7 +286,7 @@ tcache_arena_dissociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 		}
 		ql_remove(&arena->tcache_ql, tcache, link);
 		tcache_stats_merge(tsdn, tcache, arena);
-		malloc_mutex_unlock(tsdn, &arena->lock);
+		malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
 	}
 }
 
@@ -409,8 +409,6 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 
 	cassert(config_stats);
 
-	malloc_mutex_assert_owner(tsdn, &arena->lock);
-
 	/* Merge and reset tcache stats. */
 	for (i = 0; i < NBINS; i++) {
 		arena_bin_t *bin = &arena->bins[i];

From d433471f581ca50583c7a99f9802f7388f81aa36 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 13 Feb 2017 09:44:46 -0800
Subject: [PATCH 0644/2608] Derive {allocated,nmalloc,ndalloc,nrequests}_large
 stats.

This mildly reduces stats update overhead during normal operation.
---
 include/jemalloc/internal/stats_structs.h |  8 ++--
 src/arena.c                               | 45 ++++++++++++-----------
 2 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/include/jemalloc/internal/stats_structs.h b/include/jemalloc/internal/stats_structs.h
index 4f5984ab..1571ef4f 100644
--- a/include/jemalloc/internal/stats_structs.h
+++ b/include/jemalloc/internal/stats_structs.h
@@ -104,10 +104,10 @@ struct arena_stats_s {
 	size_t		internal;
 	size_t		resident; /* Derived. */
 
-	size_t		allocated_large;
-	uint64_t	nmalloc_large;
-	uint64_t	ndalloc_large;
-	uint64_t	nrequests_large;
+	size_t		allocated_large; /* Derived. */
+	uint64_t	nmalloc_large; /* Derived. */
+	uint64_t	ndalloc_large; /* Derived. */
+	uint64_t	nrequests_large; /* Derived. */
 
 	/* Number of bytes cached in tcache associated with this arena. */
 	size_t		tcache_bytes; /* Derived. */
diff --git a/src/arena.c b/src/arena.c
index f4e051ca..8a658b99 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -136,8 +136,6 @@ void
 arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
     szind_t szind, uint64_t nrequests) {
 	arena_stats_lock(tsdn, arena_stats);
-	arena_stats_add_u64(tsdn, arena_stats, &arena_stats->nrequests_large,
-	    nrequests);
 	arena_stats_add_u64(tsdn, arena_stats, &arena_stats->lstats[szind -
 	    NBINS].nrequests, nrequests);
 	arena_stats_unlock(tsdn, arena_stats);
@@ -160,14 +158,12 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *decay_time, size_t *nactive, size_t *ndirty,
     arena_stats_t *astats, malloc_bin_stats_t *bstats,
     malloc_large_stats_t *lstats) {
-	size_t base_allocated, base_resident, base_mapped;
-	unsigned i;
-
 	cassert(config_stats);
 
 	arena_basic_stats_merge(tsdn, arena, nthreads, dss, decay_time,
 	    nactive, ndirty);
 
+	size_t base_allocated, base_resident, base_mapped;
 	base_stats_get(tsdn, arena->base, &base_allocated, &base_resident,
 	    &base_mapped);
 
@@ -196,15 +192,30 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	astats->nrequests_large += arena_stats_read_u64(tsdn, &arena->stats,
 	    &arena->stats.nrequests_large);
 
-	for (i = 0; i < NSIZES - NBINS; i++) {
-		lstats[i].nmalloc += arena_stats_read_u64(tsdn, &arena->stats,
+	astats->allocated_large = 0;
+	astats->nmalloc_large = 0;
+	astats->ndalloc_large = 0;
+	astats->nrequests_large = 0;
+	for (szind_t i = 0; i < NSIZES - NBINS; i++) {
+		uint64_t nmalloc = arena_stats_read_u64(tsdn, &arena->stats,
 		    &arena->stats.lstats[i].nmalloc);
-		lstats[i].ndalloc += arena_stats_read_u64(tsdn, &arena->stats,
+		lstats[i].nmalloc += nmalloc;
+		astats->nmalloc_large += nmalloc;
+
+		uint64_t ndalloc = arena_stats_read_u64(tsdn, &arena->stats,
 		    &arena->stats.lstats[i].ndalloc);
-		lstats[i].nrequests += arena_stats_read_u64(tsdn, &arena->stats,
+		lstats[i].ndalloc += ndalloc;
+		astats->ndalloc_large += ndalloc;
+
+		uint64_t nrequests = arena_stats_read_u64(tsdn, &arena->stats,
 		    &arena->stats.lstats[i].nrequests);
-		lstats[i].curlextents += arena_stats_read_zu(tsdn,
+		lstats[i].nrequests += nrequests;
+		astats->nrequests_large += nrequests;
+
+		size_t curlextents = arena_stats_read_zu(tsdn,
 		    &arena->stats, &arena->stats.lstats[i].curlextents);
+		lstats[i].curlextents += curlextents;
+		astats->allocated_large += curlextents * index2size(i);
 	}
 
 	arena_stats_unlock(tsdn, &arena->stats);
@@ -217,7 +228,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		astats->tcache_bytes = 0;
 		malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
 		ql_foreach(tcache, &arena->tcache_ql, link) {
-			for (i = 0; i < nhbins; i++) {
+			for (szind_t i = 0; i < nhbins; i++) {
 				tbin = &tcache->tbins[i];
 				astats->tcache_bytes += tbin->ncached *
 				    index2size(i);
@@ -226,7 +237,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
 	}
 
-	for (i = 0; i < NBINS; i++) {
+	for (szind_t i = 0; i < NBINS; i++) {
 		arena_bin_t *bin = &arena->bins[i];
 
 		malloc_mutex_lock(tsdn, &bin->lock);
@@ -347,10 +358,6 @@ arena_large_malloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 	index = size2index(usize);
 	hindex = (index >= NBINS) ? index - NBINS : 0;
 
-	arena_stats_add_u64(tsdn, &arena->stats, &arena->stats.nmalloc_large,
-	    1);
-	arena_stats_add_zu(tsdn, &arena->stats, &arena->stats.allocated_large,
-	    usize);
 	arena_stats_add_u64(tsdn, &arena->stats,
 	    &arena->stats.lstats[hindex].nmalloc, 1);
 	arena_stats_add_u64(tsdn, &arena->stats,
@@ -371,10 +378,6 @@ arena_large_dalloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 	index = size2index(usize);
 	hindex = (index >= NBINS) ? index - NBINS : 0;
 
-	arena_stats_add_u64(tsdn, &arena->stats, &arena->stats.ndalloc_large,
-	    1);
-	arena_stats_sub_zu(tsdn, &arena->stats, &arena->stats.allocated_large,
-	    usize);
 	arena_stats_add_u64(tsdn, &arena->stats,
 	    &arena->stats.lstats[hindex].ndalloc, 1);
 	arena_stats_sub_zu(tsdn, &arena->stats,
@@ -389,8 +392,6 @@ arena_large_reset_stats_cancel(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 	cassert(config_stats);
 
 	arena_stats_lock(tsdn, &arena->stats);
-	arena_stats_sub_u64(tsdn, &arena->stats, &arena->stats.ndalloc_large,
-	    1);
 	arena_stats_sub_u64(tsdn, &arena->stats,
 	    &arena->stats.lstats[hindex].ndalloc, 1);
 	arena_stats_unlock(tsdn, &arena->stats);

From f8fee6908d554aaa4f356bfcf7642bc7707eb6df Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 13 Feb 2017 11:02:32 -0800
Subject: [PATCH 0645/2608] Synchronize arena->decay with arena->decay.mtx.

This removes the last use of arena->lock.
---
 include/jemalloc/internal/arena_inlines_b.h |  2 +-
 include/jemalloc/internal/arena_structs_b.h |  8 +--
 include/jemalloc/internal/witness_types.h   |  2 +-
 src/arena.c                                 | 56 ++++++++++++---------
 4 files changed, 35 insertions(+), 33 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index a180322b..275866a4 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -81,7 +81,7 @@ arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) {
 
 JEMALLOC_ALWAYS_INLINE void
 arena_decay_tick(tsdn_t *tsdn, arena_t *arena) {
-	malloc_mutex_assert_not_owner(tsdn, &arena->lock);
+	malloc_mutex_assert_not_owner(tsdn, &arena->decay.mtx);
 
 	arena_decay_ticks(tsdn, arena, 1);
 }
diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index 132a328b..92f1e41f 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -37,6 +37,8 @@ struct arena_bin_info_s {
 };
 
 struct arena_decay_s {
+	/* Synchronizes all fields. */
+	malloc_mutex_t		mtx;
 	/*
 	 * Approximate time in seconds from the creation of a set of unused
 	 * dirty pages until an equivalent set of unused dirty pages is purged
@@ -121,12 +123,6 @@ struct arena_s {
 	 */
 	unsigned		nthreads[2];
 
-	/*
-	 * Synchronizes various arena operations, as indicated in field-specific
-	 * comments.
-	 */
-	malloc_mutex_t		lock;
-
 	/* Synchronization: internal. */
 	arena_stats_t		stats;
 
diff --git a/include/jemalloc/internal/witness_types.h b/include/jemalloc/internal/witness_types.h
index 7957b410..0678b082 100644
--- a/include/jemalloc/internal/witness_types.h
+++ b/include/jemalloc/internal/witness_types.h
@@ -34,7 +34,7 @@ typedef int witness_comp_t (const witness_t *, void *, const witness_t *,
  */
 #define WITNESS_RANK_CORE		9U
 
-#define WITNESS_RANK_ARENA		9U
+#define WITNESS_RANK_DECAY		9U
 #define WITNESS_RANK_TCACHE_QL		10U
 #define WITNESS_RANK_EXTENTS		11U
 #define WITNESS_RANK_EXTENT_FREELIST	12U
diff --git a/src/arena.c b/src/arena.c
index 8a658b99..98004ecb 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -144,13 +144,11 @@ arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
 void
 arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *decay_time, size_t *nactive, size_t *ndirty) {
-	malloc_mutex_lock(tsdn, &arena->lock);
 	*nthreads += arena_nthreads_get(arena, false);
 	*dss = dss_prec_names[arena_dss_prec_get(arena)];
-	*decay_time = arena->decay.time;
+	*decay_time = arena_decay_time_get(tsdn, arena);
 	*nactive += atomic_read_zu(&arena->nactive);
 	*ndirty += extents_npages_get(&arena->extents_cached);
-	malloc_mutex_unlock(tsdn, &arena->lock);
 }
 
 void
@@ -607,7 +605,7 @@ arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, const nstime_t *time) {
 }
 
 static void
-arena_decay_init(arena_t *arena, ssize_t decay_time) {
+arena_decay_reinit(arena_t *arena, ssize_t decay_time) {
 	arena->decay.time = decay_time;
 	if (decay_time > 0) {
 		nstime_init2(&arena->decay.interval, decay_time, 0);
@@ -622,6 +620,15 @@ arena_decay_init(arena_t *arena, ssize_t decay_time) {
 	memset(arena->decay.backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t));
 }
 
+static bool
+arena_decay_init(arena_t *arena, ssize_t decay_time) {
+	if (malloc_mutex_init(&arena->decay.mtx, "decay", WITNESS_RANK_DECAY)) {
+		return true;
+	}
+	arena_decay_reinit(arena, decay_time);
+	return false;
+}
+
 static bool
 arena_decay_time_valid(ssize_t decay_time) {
 	if (decay_time < -1) {
@@ -637,9 +644,9 @@ ssize_t
 arena_decay_time_get(tsdn_t *tsdn, arena_t *arena) {
 	ssize_t decay_time;
 
-	malloc_mutex_lock(tsdn, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->decay.mtx);
 	decay_time = arena->decay.time;
-	malloc_mutex_unlock(tsdn, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->decay.mtx);
 
 	return decay_time;
 }
@@ -650,7 +657,7 @@ arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time) {
 		return true;
 	}
 
-	malloc_mutex_lock(tsdn, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->decay.mtx);
 	/*
 	 * Restart decay backlog from scratch, which may cause many dirty pages
 	 * to be immediately purged.  It would conceptually be possible to map
@@ -659,16 +666,16 @@ arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time) {
 	 * infrequent, either between the {-1, 0, >0} states, or a one-time
 	 * arbitrary change during initial arena configuration.
 	 */
-	arena_decay_init(arena, decay_time);
+	arena_decay_reinit(arena, decay_time);
 	arena_maybe_purge(tsdn, arena);
-	malloc_mutex_unlock(tsdn, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->decay.mtx);
 
 	return false;
 }
 
 void
 arena_maybe_purge(tsdn_t *tsdn, arena_t *arena) {
-	malloc_mutex_assert_owner(tsdn, &arena->lock);
+	malloc_mutex_assert_owner(tsdn, &arena->decay.mtx);
 
 	/* Purge all or nothing if the option is disabled. */
 	if (arena->decay.time <= 0) {
@@ -766,7 +773,7 @@ arena_purge_stashed(tsdn_t *tsdn, arena_t *arena,
 static void
 arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, size_t ndirty_limit) {
 	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 1);
-	malloc_mutex_assert_owner(tsdn, &arena->lock);
+	malloc_mutex_assert_owner(tsdn, &arena->decay.mtx);
 
 	if (atomic_cas_u(&arena->purging, 0, 1)) {
 		return;
@@ -778,19 +785,19 @@ arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, size_t ndirty_limit) {
 
 	extent_list_init(&purge_extents);
 
-	malloc_mutex_unlock(tsdn, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->decay.mtx);
 
 	npurge = arena_stash_dirty(tsdn, arena, &extent_hooks, ndirty_limit,
 	    &purge_extents);
 	if (npurge == 0) {
-		malloc_mutex_lock(tsdn, &arena->lock);
+		malloc_mutex_lock(tsdn, &arena->decay.mtx);
 		goto label_return;
 	}
 	npurged = arena_purge_stashed(tsdn, arena, &extent_hooks,
 	    &purge_extents);
 	assert(npurged == npurge);
 
-	malloc_mutex_lock(tsdn, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->decay.mtx);
 
 	if (config_stats) {
 		arena_stats_lock(tsdn, &arena->stats);
@@ -805,13 +812,13 @@ label_return:
 
 void
 arena_purge(tsdn_t *tsdn, arena_t *arena, bool all) {
-	malloc_mutex_lock(tsdn, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->decay.mtx);
 	if (all) {
 		arena_purge_to_limit(tsdn, arena, 0);
 	} else {
 		arena_maybe_purge(tsdn, arena);
 	}
-	malloc_mutex_unlock(tsdn, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->decay.mtx);
 }
 
 static void
@@ -822,9 +829,9 @@ arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *slab) {
 	extent_dalloc_cache(tsdn, arena, &extent_hooks, slab);
 
 	arena_nactive_sub(arena, npages);
-	malloc_mutex_lock(tsdn, &arena->lock);
+	malloc_mutex_lock(tsdn, &arena->decay.mtx);
 	arena_maybe_purge(tsdn, arena);
-	malloc_mutex_unlock(tsdn, &arena->lock);
+	malloc_mutex_unlock(tsdn, &arena->decay.mtx);
 }
 
 static void
@@ -1641,9 +1648,6 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	}
 
 	arena->nthreads[0] = arena->nthreads[1] = 0;
-	if (malloc_mutex_init(&arena->lock, "arena", WITNESS_RANK_ARENA)) {
-		goto label_error;
-	}
 
 	if (config_stats) {
 		if (arena_stats_init(tsdn, &arena->stats)) {
@@ -1684,7 +1688,9 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	atomic_write_u(&arena->purging, 0);
 	atomic_write_zu(&arena->nactive, 0);
 
-	arena_decay_init(arena, arena_decay_time_default_get());
+	if (arena_decay_init(arena, arena_decay_time_default_get())) {
+		goto label_error;
+	}
 
 	extent_list_init(&arena->large);
 	if (malloc_mutex_init(&arena->large_mtx, "arena_large",
@@ -1742,7 +1748,7 @@ arena_boot(void) {
 
 void
 arena_prefork0(tsdn_t *tsdn, arena_t *arena) {
-	malloc_mutex_prefork(tsdn, &arena->lock);
+	malloc_mutex_prefork(tsdn, &arena->decay.mtx);
 	if (config_stats && config_tcache) {
 		malloc_mutex_prefork(tsdn, &arena->tcache_ql_mtx);
 	}
@@ -1782,7 +1788,7 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 	malloc_mutex_postfork_parent(tsdn, &arena->extent_freelist_mtx);
 	extents_postfork_parent(tsdn, &arena->extents_cached);
 	extents_postfork_parent(tsdn, &arena->extents_retained);
-	malloc_mutex_postfork_parent(tsdn, &arena->lock);
+	malloc_mutex_postfork_parent(tsdn, &arena->decay.mtx);
 	if (config_stats && config_tcache) {
 		malloc_mutex_postfork_parent(tsdn, &arena->tcache_ql_mtx);
 	}
@@ -1800,7 +1806,7 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	malloc_mutex_postfork_child(tsdn, &arena->extent_freelist_mtx);
 	extents_postfork_child(tsdn, &arena->extents_cached);
 	extents_postfork_child(tsdn, &arena->extents_retained);
-	malloc_mutex_postfork_child(tsdn, &arena->lock);
+	malloc_mutex_postfork_child(tsdn, &arena->decay.mtx);
 	if (config_stats && config_tcache) {
 		malloc_mutex_postfork_child(tsdn, &arena->tcache_ql_mtx);
 	}

From b0654b95ed784be609c5212bd34f8141bdf5caca Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 13 Feb 2017 10:35:41 -0800
Subject: [PATCH 0646/2608] Fix arena->stats.mapped accounting.

Mapped memory increases when extent_alloc_wrapper() succeeds, and
decreases when extent_dalloc_wrapper() is called (during purging).
---
 include/jemalloc/internal/arena_externs.h     |  2 +
 include/jemalloc/internal/private_symbols.txt |  1 +
 src/arena.c                                   | 72 ++++++++++++-------
 src/large.c                                   | 12 ++++
 4 files changed, 61 insertions(+), 26 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 2880399b..72cbf5fe 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -15,6 +15,8 @@ extern const arena_bin_info_t	arena_bin_info[NBINS];
 
 void arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
     szind_t szind, uint64_t nrequests);
+void arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
+    size_t size);
 void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *decay_time, size_t *nactive, size_t *ndirty,
     arena_stats_t *astats, malloc_bin_stats_t *bstats,
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index ff54a35d..15d0449c 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -67,6 +67,7 @@ arena_sdalloc
 arena_set
 arena_slab_regind
 arena_stats_init
+arena_stats_mapped_add
 arena_stats_merge
 arena_tcache_fill_small
 arena_tdata_get
diff --git a/src/arena.c b/src/arena.c
index 98004ecb..80843693 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -93,10 +93,12 @@ static void
 arena_stats_sub_u64(tsdn_t *tsdn, arena_stats_t *arena_stats, uint64_t *p,
     uint64_t x) {
 #ifdef JEMALLOC_ATOMIC_U64
-	atomic_sub_u64(p, x);
+	UNUSED uint64_t r = atomic_sub_u64(p, x);
+	assert(r + x >= r);
 #else
 	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
 	*p -= x;
+	assert(*p + x >= *p);
 #endif
 }
 
@@ -125,10 +127,12 @@ static void
 arena_stats_sub_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, size_t *p,
     size_t x) {
 #ifdef JEMALLOC_ATOMIC_U64
-	atomic_sub_zu(p, x);
+	UNUSED size_t r = atomic_sub_zu(p, x);
+	assert(r + x >= r);
 #else
 	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
 	*p -= x;
+	assert(*p + x >= *p);
 #endif
 }
 
@@ -141,6 +145,13 @@ arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
 	arena_stats_unlock(tsdn, arena_stats);
 }
 
+void
+arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats, size_t size) {
+	arena_stats_lock(tsdn, arena_stats);
+	arena_stats_add_zu(tsdn, arena_stats, &arena_stats->mapped, size);
+	arena_stats_unlock(tsdn, arena_stats);
+}
+
 void
 arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *decay_time, size_t *nactive, size_t *ndirty) {
@@ -410,22 +421,38 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 
 	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 
+	size_t mapped_add;
 	bool commit = true;
 	extent = extent_alloc_cache(tsdn, arena, &extent_hooks, NULL, usize,
 	    large_pad, alignment, zero, &commit, false);
+	size_t size = usize + large_pad;
 	if (extent == NULL) {
 		extent = extent_alloc_wrapper(tsdn, arena, &extent_hooks, NULL,
 		    usize, large_pad, alignment, zero, &commit, false);
+		if (config_stats) {
+			/*
+			 * extent may be NULL on OOM, but in that case
+			 * mapped_add isn't used below, so there's no need to
+			 * conditionlly set it to 0 here.
+			 */
+			mapped_add = size;
+		}
+	} else if (config_stats) {
+		mapped_add = 0;
 	}
 
-	if (config_stats && extent != NULL) {
-		arena_stats_lock(tsdn, &arena->stats);
-		arena_large_malloc_stats_update(tsdn, arena, usize);
-		arena_stats_add_zu(tsdn, &arena->stats, &arena->stats.mapped,
-		    usize);
-		arena_stats_unlock(tsdn, &arena->stats);
+	if (extent != NULL) {
+		if (config_stats) {
+			arena_stats_lock(tsdn, &arena->stats);
+			arena_large_malloc_stats_update(tsdn, arena, usize);
+			if (mapped_add != 0) {
+				arena_stats_add_zu(tsdn, &arena->stats,
+				    &arena->stats.mapped, mapped_add);
+			}
+			arena_stats_unlock(tsdn, &arena->stats);
+		}
+		arena_nactive_add(arena, size >> LG_PAGE);
 	}
-	arena_nactive_add(arena, (usize + large_pad) >> LG_PAGE);
 
 	return extent;
 }
@@ -436,8 +463,6 @@ arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
 		arena_stats_lock(tsdn, &arena->stats);
 		arena_large_dalloc_stats_update(tsdn, arena,
 		    extent_usize_get(extent));
-		arena_stats_sub_zu(tsdn, &arena->stats, &arena->stats.mapped,
-		    extent_size_get(extent));
 		arena_stats_unlock(tsdn, &arena->stats);
 	}
 	arena_nactive_sub(arena, extent_size_get(extent) >> LG_PAGE);
@@ -459,8 +484,6 @@ arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 	if (config_stats) {
 		arena_stats_lock(tsdn, &arena->stats);
 		arena_large_ralloc_stats_update(tsdn, arena, oldusize, usize);
-		arena_stats_sub_zu(tsdn, &arena->stats, &arena->stats.mapped,
-		    udiff);
 		arena_stats_unlock(tsdn, &arena->stats);
 	}
 	arena_nactive_sub(arena, udiff >> LG_PAGE);
@@ -475,8 +498,6 @@ arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 	if (config_stats) {
 		arena_stats_lock(tsdn, &arena->stats);
 		arena_large_ralloc_stats_update(tsdn, arena, oldusize, usize);
-		arena_stats_add_zu(tsdn, &arena->stats, &arena->stats.mapped,
-		    udiff);
 		arena_stats_unlock(tsdn, &arena->stats);
 	}
 	arena_nactive_add(arena, udiff >> LG_PAGE);
@@ -760,6 +781,8 @@ arena_purge_stashed(tsdn_t *tsdn, arena_t *arena,
 		    nmadvise);
 		arena_stats_add_u64(tsdn, &arena->stats, &arena->stats.purged,
 		    npurged);
+		arena_stats_sub_zu(tsdn, &arena->stats, &arena->stats.mapped,
+		    npurged << LG_PAGE);
 		arena_stats_unlock(tsdn, &arena->stats);
 	}
 
@@ -823,12 +846,11 @@ arena_purge(tsdn_t *tsdn, arena_t *arena, bool all) {
 
 static void
 arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *slab) {
-	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
-	size_t npages = extent_size_get(slab) >> LG_PAGE;
+	arena_nactive_sub(arena, extent_size_get(slab) >> LG_PAGE);
 
+	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
 	extent_dalloc_cache(tsdn, arena, &extent_hooks, slab);
 
-	arena_nactive_sub(arena, npages);
 	malloc_mutex_lock(tsdn, &arena->decay.mtx);
 	arena_maybe_purge(tsdn, arena);
 	malloc_mutex_unlock(tsdn, &arena->decay.mtx);
@@ -1015,6 +1037,11 @@ arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena,
 	slab = extent_alloc_wrapper(tsdn, arena, r_extent_hooks, NULL,
 	    bin_info->slab_size, 0, PAGE, &zero, &commit, true);
 
+	if (config_stats && slab != NULL) {
+		arena_stats_mapped_add(tsdn, &arena->stats,
+		    bin_info->slab_size);
+	}
+
 	return slab;
 }
 
@@ -1037,20 +1064,13 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 	}
 	assert(extent_slab_get(slab));
 
-	arena_nactive_add(arena, extent_size_get(slab) >> LG_PAGE);
-
 	/* Initialize slab internals. */
 	arena_slab_data_t *slab_data = extent_slab_data_get(slab);
 	slab_data->binind = binind;
 	slab_data->nfree = bin_info->nregs;
 	bitmap_init(slab_data->bitmap, &bin_info->bitmap_info);
 
-	if (config_stats) {
-		arena_stats_lock(tsdn, &arena->stats);
-		arena_stats_add_zu(tsdn, &arena->stats, &arena->stats.mapped,
-		    extent_size_get(slab));
-		arena_stats_unlock(tsdn, &arena->stats);
-	}
+	arena_nactive_add(arena, extent_size_get(slab) >> LG_PAGE);
 
 	return slab;
 }
diff --git a/src/large.c b/src/large.c
index 55e0737e..bb638499 100644
--- a/src/large.c
+++ b/src/large.c
@@ -147,6 +147,7 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 	bool is_zeroed_trail = false;
 	bool commit = true;
 	extent_t *trail;
+	bool new_mapping;
 	if ((trail = extent_alloc_cache(tsdn, arena, &extent_hooks,
 	    extent_past_get(extent), trailsize, 0, CACHELINE, &is_zeroed_trail,
 	    &commit, false)) == NULL) {
@@ -155,6 +156,13 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 		    &is_zeroed_trail, &commit, false)) == NULL) {
 			return true;
 		}
+		if (config_stats) {
+			new_mapping = true;
+		}
+	} else {
+		if (config_stats) {
+			new_mapping = false;
+		}
 	}
 
 	if (extent_merge_wrapper(tsdn, arena, &extent_hooks, extent, trail)) {
@@ -162,6 +170,10 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 		return true;
 	}
 
+	if (config_stats && new_mapping) {
+		arena_stats_mapped_add(tsdn, &arena->stats, trailsize);
+	}
+
 	if (zero || (config_fill && unlikely(opt_zero))) {
 		if (config_cache_oblivious) {
 			/*

From c1ebfaa673f769eff399fc5806591b3a4782a9c5 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 12 Feb 2017 23:00:19 -0800
Subject: [PATCH 0647/2608] Optimize extent coalescing.

Refactor extent_can_coalesce(), extent_coalesce(), and extent_record()
to avoid needlessly repeating extent [de]activation operations.
---
 src/extent.c | 43 +++++++++++++++++++++++--------------------
 1 file changed, 23 insertions(+), 20 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index 85c92d0f..cf502ca3 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1020,14 +1020,19 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 }
 
 static bool
-extent_can_coalesce(const extent_t *a, const extent_t *b) {
-	if (extent_arena_get(a) != extent_arena_get(b)) {
+extent_can_coalesce(arena_t *arena, extents_t *extents, const extent_t *inner,
+    const extent_t *outer) {
+	assert(extent_arena_get(inner) == arena);
+	if (extent_arena_get(outer) != arena) {
 		return false;
 	}
-	if (extent_state_get(a) != extent_state_get(b)) {
+
+	assert(extent_state_get(inner) == extent_state_active);
+	if (extent_state_get(outer) != extents->state) {
 		return false;
 	}
-	if (extent_committed_get(a) != extent_committed_get(b)) {
+
+	if (extent_committed_get(inner) != extent_committed_get(outer)) {
 		return false;
 	}
 
@@ -1036,24 +1041,21 @@ extent_can_coalesce(const extent_t *a, const extent_t *b) {
 
 static bool
 extent_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    extent_t *a, extent_t *b, extents_t *extents) {
-	assert(extent_can_coalesce(a, b));
-	assert(extent_arena_get(a) == arena);
-	assert(extent_arena_get(b) == arena);
+    extents_t *extents, extent_t *inner, extent_t *outer, bool forward) {
+	assert(extent_can_coalesce(arena, extents, inner, outer));
 
-	extent_activate_locked(tsdn, arena, extents, a);
-	extent_activate_locked(tsdn, arena, extents, b);
+	extent_activate_locked(tsdn, arena, extents, outer);
 
 	malloc_mutex_unlock(tsdn, &extents->mtx);
-	bool err = extent_merge_wrapper(tsdn, arena, r_extent_hooks, a, b);
+	bool err = extent_merge_wrapper(tsdn, arena, r_extent_hooks,
+	    forward ? inner : outer, forward ? outer : inner);
 	malloc_mutex_lock(tsdn, &extents->mtx);
-	extent_deactivate_locked(tsdn, arena, extents, a);
+
 	if (err) {
-		extent_deactivate_locked(tsdn, arena, extents, b);
-		return true;
+		extent_deactivate_locked(tsdn, arena, extents, outer);
 	}
 
-	return false;
+	return err;
 }
 
 static void
@@ -1075,7 +1077,6 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	}
 
 	assert(extent_lookup(tsdn, extent_base_get(extent), true) == extent);
-	extent_deactivate_locked(tsdn, arena, extents, extent);
 
 	/*
 	 * Continue attempting to coalesce until failure, to protect against
@@ -1098,10 +1099,10 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 			 * before releasing the next_elm lock.
 			 */
 			bool can_coalesce = (next != NULL &&
-			    extent_can_coalesce(extent, next));
+			    extent_can_coalesce(arena, extents, extent, next));
 			rtree_elm_release(tsdn, &extents_rtree, next_elm);
 			if (can_coalesce && !extent_coalesce(tsdn, arena,
-			    r_extent_hooks, extent, next, extents)) {
+			    r_extent_hooks, extents, extent, next, true)) {
 				coalesced = true;
 			}
 		}
@@ -1114,16 +1115,18 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 			extent_t *prev = rtree_elm_read_acquired(tsdn,
 			    &extents_rtree, prev_elm);
 			bool can_coalesce = (prev != NULL &&
-			    extent_can_coalesce(prev, extent));
+			    extent_can_coalesce(arena, extents, extent, prev));
 			rtree_elm_release(tsdn, &extents_rtree, prev_elm);
 			if (can_coalesce && !extent_coalesce(tsdn, arena,
-			    r_extent_hooks, prev, extent, extents)) {
+			    r_extent_hooks, extents, extent, prev, false)) {
 				extent = prev;
 				coalesced = true;
 			}
 		}
 	} while (coalesced);
 
+	extent_deactivate_locked(tsdn, arena, extents, extent);
+
 	malloc_mutex_unlock(tsdn, &extents->mtx);
 }
 

From 2dfc5b5aac983f8f192c63c604a59fed8b39e937 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 12 Feb 2017 23:18:57 -0800
Subject: [PATCH 0648/2608] Disable coalescing of cached extents.

Extent splitting and coalescing is a major component of large allocation
overhead, and disabling coalescing of cached extents provides a simple
and effective hysteresis mechanism.  Once two-phase purging is
implemented, it will probably make sense to leave coalescing disabled
for the first phase, but coalesce during the second phase.
---
 include/jemalloc/internal/extent_externs.h |  3 +-
 include/jemalloc/internal/extent_structs.h |  3 ++
 src/arena.c                                |  5 +-
 src/extent.c                               | 56 ++++++++++++++--------
 4 files changed, 43 insertions(+), 24 deletions(-)

diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index a3556118..d971ec3a 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -21,7 +21,8 @@ size_t	extent_size_quantize_ceil(size_t size);
 
 ph_proto(, extent_heap_, extent_heap_t, extent_t)
 
-bool extents_init(tsdn_t *tsdn, extents_t *extents, extent_state_t state);
+bool extents_init(tsdn_t *tsdn, extents_t *extents, extent_state_t state,
+    bool try_coalesce);
 extent_state_t extents_state_get(const extents_t *extents);
 size_t extents_npages_get(extents_t *extents);
 extent_t *extents_evict(tsdn_t *tsdn, extents_t *extents, size_t npages_min);
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index 33ca4ac7..008b6352 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -115,6 +115,9 @@ struct extents_s {
 
 	/* All stored extents must be in the same state. */
 	extent_state_t		state;
+
+	/* If true, try to coalesce during extent deallocation. */
+	bool			try_coalesce;
 };
 
 #endif /* JEMALLOC_INTERNAL_EXTENT_STRUCTS_H */
diff --git a/src/arena.c b/src/arena.c
index 80843693..e0fa3a8b 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1718,11 +1718,12 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		goto label_error;
 	}
 
-	if (extents_init(tsdn, &arena->extents_cached, extent_state_dirty)) {
+	if (extents_init(tsdn, &arena->extents_cached, extent_state_dirty,
+	    false)) {
 		goto label_error;
 	}
 	if (extents_init(tsdn, &arena->extents_retained,
-	    extent_state_retained)) {
+	    extent_state_retained, true)) {
 		goto label_error;
 	}
 
diff --git a/src/extent.c b/src/extent.c
index cf502ca3..afc60061 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -191,7 +191,8 @@ extent_size_quantize_t *extent_size_quantize_ceil =
 ph_gen(, extent_heap_, extent_heap_t, extent_t, ph_link, extent_snad_comp)
 
 bool
-extents_init(tsdn_t *tsdn, extents_t *extents, extent_state_t state) {
+extents_init(tsdn_t *tsdn, extents_t *extents, extent_state_t state,
+    bool try_coalesce) {
 	if (malloc_mutex_init(&extents->mtx, "extents", WITNESS_RANK_EXTENTS)) {
 		return true;
 	}
@@ -201,6 +202,7 @@ extents_init(tsdn_t *tsdn, extents_t *extents, extent_state_t state) {
 	extent_list_init(&extents->lru);
 	extents->npages = 0;
 	extents->state = state;
+	extents->try_coalesce = try_coalesce;
 	return false;
 }
 
@@ -1058,26 +1060,10 @@ extent_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	return err;
 }
 
-static void
-extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    extents_t *extents, extent_t *extent) {
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
-	assert(extents_state_get(extents) != extent_state_dirty ||
-	    !extent_zeroed_get(extent));
-
-	malloc_mutex_lock(tsdn, &extents->mtx);
-	extent_hooks_assure_initialized(arena, r_extent_hooks);
-
-	extent_usize_set(extent, 0);
-	if (extent_slab_get(extent)) {
-		extent_interior_deregister(tsdn, rtree_ctx, extent);
-		extent_slab_set(extent, false);
-	}
-
-	assert(extent_lookup(tsdn, extent_base_get(extent), true) == extent);
-
+static extent_t *
+extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
+    extent_t *extent) {
 	/*
 	 * Continue attempting to coalesce until failure, to protect against
 	 * races with other threads that are thwarted by this one.
@@ -1125,6 +1111,34 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		}
 	} while (coalesced);
 
+	return extent;
+}
+
+static void
+extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
+    extents_t *extents, extent_t *extent) {
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+	assert(extents_state_get(extents) != extent_state_dirty ||
+	    !extent_zeroed_get(extent));
+
+	malloc_mutex_lock(tsdn, &extents->mtx);
+	extent_hooks_assure_initialized(arena, r_extent_hooks);
+
+	extent_usize_set(extent, 0);
+	if (extent_slab_get(extent)) {
+		extent_interior_deregister(tsdn, rtree_ctx, extent);
+		extent_slab_set(extent, false);
+	}
+
+	assert(extent_lookup(tsdn, extent_base_get(extent), true) == extent);
+
+	if (extents->try_coalesce) {
+		extent = extent_try_coalesce(tsdn, arena, r_extent_hooks,
+		    rtree_ctx, extents, extent);
+	}
+
 	extent_deactivate_locked(tsdn, arena, extents, extent);
 
 	malloc_mutex_unlock(tsdn, &extents->mtx);

From d4f3f9a03f0ba199a7b51c93bdebe8236e0105da Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 25 Jan 2017 12:58:50 -0800
Subject: [PATCH 0649/2608] Beef up travis CI integration testing

Introduces gen_travis.py, which generates .travis.yml, and updates .travis.yml
to be the generated version.

The travis build matrix approach doesn't play well with mixing and matching
various different environment settings, so we generate every build explicitly,
rather than letting them do it for us.

To avoid abusing travis resources (and save us time waiting for CI results), we
don't test every possible combination of options; we only check up to 2 unusual
settings at a time.
---
 .travis.yml           | 88 +++++++++++++++++++++++++++++++++++++------
 scripts/gen_travis.py | 85 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 162 insertions(+), 11 deletions(-)
 create mode 100755 scripts/gen_travis.py

diff --git a/.travis.yml b/.travis.yml
index 1fed4f8e..b563928c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,29 +1,95 @@
-language: c
+language: generic
 
 matrix:
   include:
     - os: linux
-      compiler: gcc
+      env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS=""
+    - os: osx
+      env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS=""
     - os: linux
-      compiler: gcc
-      env:
-        - EXTRA_FLAGS=-m32
+      env: CC=clang COMPILER_FLAGS="" CONFIGURE_FLAGS=""
+    - os: linux
+      env: CC=gcc COMPILER_FLAGS="-m32" CONFIGURE_FLAGS=""
       addons:
         apt:
           packages:
-          - gcc-multilib
+            - gcc-multilib
+    - os: linux
+      env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug"
+    - os: linux
+      env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof"
+    - os: linux
+      env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats"
+    - os: linux
+      env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-tcache"
     - os: osx
-      compiler: clang
+      env: CC=clang COMPILER_FLAGS="" CONFIGURE_FLAGS=""
     - os: osx
-      compiler: clang
-      env:
-        - EXTRA_FLAGS=-m32
+      env: CC=gcc COMPILER_FLAGS="-m32" CONFIGURE_FLAGS=""
+    - os: osx
+      env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug"
+    - os: osx
+      env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats"
+    - os: osx
+      env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-tcache"
+    - os: linux
+      env: CC=clang COMPILER_FLAGS="-m32" CONFIGURE_FLAGS=""
+      addons:
+        apt:
+          packages:
+            - gcc-multilib
+    - os: linux
+      env: CC=clang COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug"
+    - os: linux
+      env: CC=clang COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof"
+    - os: linux
+      env: CC=clang COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats"
+    - os: linux
+      env: CC=clang COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-tcache"
+    - os: linux
+      env: CC=gcc COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-debug"
+      addons:
+        apt:
+          packages:
+            - gcc-multilib
+    - os: linux
+      env: CC=gcc COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-prof"
+      addons:
+        apt:
+          packages:
+            - gcc-multilib
+    - os: linux
+      env: CC=gcc COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-stats"
+      addons:
+        apt:
+          packages:
+            - gcc-multilib
+    - os: linux
+      env: CC=gcc COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-tcache"
+      addons:
+        apt:
+          packages:
+            - gcc-multilib
+    - os: linux
+      env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --enable-prof"
+    - os: linux
+      env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-stats"
+    - os: linux
+      env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-tcache"
+    - os: linux
+      env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --disable-stats"
+    - os: linux
+      env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --disable-tcache"
+    - os: linux
+      env: CC=gcc COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --disable-tcache"
+
 
 before_script:
   - autoconf
-  - ./configure${EXTRA_FLAGS:+ CC="$CC $EXTRA_FLAGS"}
+  - ./configure ${COMPILER_FLAGS:+       CC="$CC $COMPILER_FLAGS" }       $CONFIGURE_FLAGS
   - make -j3
   - make -j3 tests
 
 script:
   - make check
+
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
new file mode 100755
index 00000000..ccbcaf8c
--- /dev/null
+++ b/scripts/gen_travis.py
@@ -0,0 +1,85 @@
+#!/usr/bin/env python
+
+from itertools import combinations
+
+travis_template = """\
+language: generic
+
+matrix:
+  include:
+%s
+
+before_script:
+  - autoconf
+  - ./configure ${COMPILER_FLAGS:+ \
+      CC="$CC $COMPILER_FLAGS" } \
+      $CONFIGURE_FLAGS
+  - make -j3
+  - make -j3 tests
+
+script:
+  - make check
+"""
+
+# The 'default' configuration is gcc, on linux, with no compiler or configure
+# flags.  We also test with clang, -m32, --enable-debug, --enable-prof,
+# --disable-stats, and --disable-tcache.  To avoid abusing travis though, we
+# don't test all 2**7 = 128 possible combinations of these; instead, we only
+# test combinations of up to 2 'unusual' settings, under the hope that bugs
+# involving interactions of such settings are rare.
+# things at once, for C(7, 0) + C(7, 1) + C(7, 2) = 29
+MAX_UNUSUAL_OPTIONS = 2
+
+os_default = 'linux'
+os_unusual = 'osx'
+
+compilers_default = 'CC=gcc'
+compilers_unusual = 'CC=clang'
+
+compiler_flag_unusuals = ['-m32']
+
+configure_flag_unusuals = [
+    '--enable-debug', '--enable-prof', '--disable-stats', '--disable-tcache',
+]
+
+all_unusuals = (
+    [os_unusual] + [compilers_unusual] + compiler_flag_unusuals
+    + configure_flag_unusuals
+)
+
+unusual_combinations_to_test = []
+for i in xrange(MAX_UNUSUAL_OPTIONS + 1):
+    unusual_combinations_to_test += combinations(all_unusuals, i)
+
+include_rows = ""
+for unusual_combination in unusual_combinations_to_test:
+    os = os_default
+    if os_unusual in unusual_combination:
+        os = os_unusual
+
+    compilers = compilers_default
+    if compilers_unusual in unusual_combination:
+        compilers = compilers_unusual
+
+    compiler_flags = [
+        x for x in unusual_combination if x in compiler_flag_unusuals]
+
+    configure_flags = [
+        x for x in unusual_combination if x in configure_flag_unusuals]
+
+    # Filter out an unsupported configuration - heap profiling on OS X.
+    if os == 'osx' and '--enable-prof' in configure_flags:
+        continue
+
+    env_string = '{} COMPILER_FLAGS="{}" CONFIGURE_FLAGS="{}"'.format(
+        compilers, " ".join(compiler_flags), " ".join(configure_flags))
+
+    include_rows += '    - os: %s\n' % os
+    include_rows += '      env: %s\n' % env_string
+    if '-m32' in unusual_combination and os == 'linux':
+        include_rows += '      addons:\n'
+	include_rows += '        apt:\n'
+	include_rows += '          packages:\n'
+	include_rows += '            - gcc-multilib\n'
+
+print travis_template % include_rows

From 003ca8717fed0cf4d6eb28486bf7fa2c00643f39 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 19 Feb 2017 14:05:05 -0800
Subject: [PATCH 0650/2608] Move arena_basic_stats_merge() prototype (hygienic
 cleanup).

---
 include/jemalloc/internal/arena_externs.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 72cbf5fe..d97b6a7d 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -17,6 +17,9 @@ void arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
     szind_t szind, uint64_t nrequests);
 void arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
     size_t size);
+void	arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
+    unsigned *nthreads, const char **dss, ssize_t *decay_time, size_t *nactive,
+    size_t *ndirty);
 void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *decay_time, size_t *nactive, size_t *ndirty,
     arena_stats_t *astats, malloc_bin_stats_t *bstats,
@@ -72,9 +75,6 @@ dss_prec_t	arena_dss_prec_get(arena_t *arena);
 bool	arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
 ssize_t	arena_decay_time_default_get(void);
 bool	arena_decay_time_default_set(ssize_t decay_time);
-void	arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
-    unsigned *nthreads, const char **dss, ssize_t *decay_time, size_t *nactive,
-    size_t *ndirty);
 unsigned	arena_nthreads_get(arena_t *arena, bool internal);
 void	arena_nthreads_inc(arena_t *arena, bool internal);
 void	arena_nthreads_dec(arena_t *arena, bool internal);

From 54269dc0ed3e4d04b2539016431de3cfe8330719 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 16 Feb 2017 22:02:42 -0800
Subject: [PATCH 0651/2608] Remove obsolete arena_maybe_purge() call.

Remove a call to arena_maybe_purge() that was necessary for ratio-based
purging, but is obsolete in the context of decay-based purging.
---
 src/arena.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index e0fa3a8b..a914abda 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -850,10 +850,6 @@ arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *slab) {
 
 	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
 	extent_dalloc_cache(tsdn, arena, &extent_hooks, slab);
-
-	malloc_mutex_lock(tsdn, &arena->decay.mtx);
-	arena_maybe_purge(tsdn, arena);
-	malloc_mutex_unlock(tsdn, &arena->decay.mtx);
 }
 
 static void

From 664ef652d970e14a4c941bf650cb50dbb4128b05 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 18 Feb 2017 00:02:23 -0800
Subject: [PATCH 0652/2608] Avoid -lgcc for heap profiling if unwind.h is
 missing.

This removes an unneeded library dependency when falling back to
intrinsics-based backtracing (or failing to enable heap profiling at
all).
---
 configure.ac | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index e71edd72..1627a3d9 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1082,7 +1082,9 @@ fi
 if test "x$backtrace_method" = "x" -a "x$enable_prof_libgcc" = "x1" \
      -a "x$GCC" = "xyes" ; then
   AC_CHECK_HEADERS([unwind.h], , [enable_prof_libgcc="0"])
-  AC_CHECK_LIB([gcc], [_Unwind_Backtrace], [JE_APPEND_VS(LIBS, -lgcc)], [enable_prof_libgcc="0"])
+  if test "x${enable_prof_libgcc}" = "x1" ; then
+    AC_CHECK_LIB([gcc], [_Unwind_Backtrace], [JE_APPEND_VS(LIBS, -lgcc)], [enable_prof_libgcc="0"])
+  fi
   if test "x${enable_prof_libgcc}" = "x1" ; then
     backtrace_method="libgcc"
     AC_DEFINE([JEMALLOC_PROF_LIBGCC], [ ])

From 8ac7937eb5ce011945188ef3553dbc2bcc294a25 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 21 Feb 2017 19:38:12 -0800
Subject: [PATCH 0653/2608] Remove remainder of mb (memory barrier).

This complements 94c5d22a4da7844d0bdc5b370e47b1ba14268af2 (Remove mb.h,
which is unused).
---
 Makefile.in                                   | 1 -
 include/jemalloc/internal/private_symbols.txt | 1 -
 src/mb.c                                      | 2 --
 3 files changed, 4 deletions(-)
 delete mode 100644 src/mb.c

diff --git a/Makefile.in b/Makefile.in
index 23056f78..76a73b76 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -100,7 +100,6 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/extent_mmap.c \
 	$(srcroot)src/hash.c \
 	$(srcroot)src/large.c \
-	$(srcroot)src/mb.c \
 	$(srcroot)src/mutex.c \
 	$(srcroot)src/nstime.c \
 	$(srcroot)src/pages.c \
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 15d0449c..be56e1a2 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -296,7 +296,6 @@ malloc_tsd_no_cleanup
 malloc_vcprintf
 malloc_vsnprintf
 malloc_write
-mb_write
 narenas_auto
 narenas_total_get
 ncpus
diff --git a/src/mb.c b/src/mb.c
deleted file mode 100644
index 94f3c724..00000000
--- a/src/mb.c
+++ /dev/null
@@ -1,2 +0,0 @@
-#define JEMALLOC_MB_C_
-#include "jemalloc/internal/jemalloc_internal.h"

From fdba5ad5cc67cac8bfc247b407df1fc43e6551f9 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 21 Feb 2017 13:15:55 -0800
Subject: [PATCH 0654/2608] Repair file permissions.

This regression was caused by 8f61fdedb908c29905103b22dda32ceb29cd8ede
(Uniformly cast mallctl[bymib]() oldp/newp arguments to (void *).).

This resolves #538.
---
 msvc/projects/vc2015/test_threads/test_threads.cpp | 0
 src/stats.c                                        | 0
 src/tcache.c                                       | 0
 src/util.c                                         | 0
 test/integration/MALLOCX_ARENA.c                   | 0
 test/integration/allocated.c                       | 0
 test/integration/mallocx.c                         | 0
 test/integration/overflow.c                        | 0
 test/integration/rallocx.c                         | 0
 test/integration/thread_arena.c                    | 0
 test/integration/thread_tcache_enabled.c           | 0
 test/integration/xallocx.c                         | 0
 test/unit/arena_reset.c                            | 0
 test/unit/decay.c                                  | 0
 test/unit/mallctl.c                                | 0
 test/unit/prof_accum.c                             | 0
 test/unit/prof_active.c                            | 0
 test/unit/prof_gdump.c                             | 0
 test/unit/prof_idump.c                             | 0
 test/unit/prof_reset.c                             | 0
 test/unit/prof_thread_name.c                       | 0
 test/unit/size_classes.c                           | 0
 test/unit/stats.c                                  | 0
 23 files changed, 0 insertions(+), 0 deletions(-)
 mode change 100755 => 100644 msvc/projects/vc2015/test_threads/test_threads.cpp
 mode change 100755 => 100644 src/stats.c
 mode change 100755 => 100644 src/tcache.c
 mode change 100755 => 100644 src/util.c
 mode change 100755 => 100644 test/integration/MALLOCX_ARENA.c
 mode change 100755 => 100644 test/integration/allocated.c
 mode change 100755 => 100644 test/integration/mallocx.c
 mode change 100755 => 100644 test/integration/overflow.c
 mode change 100755 => 100644 test/integration/rallocx.c
 mode change 100755 => 100644 test/integration/thread_arena.c
 mode change 100755 => 100644 test/integration/thread_tcache_enabled.c
 mode change 100755 => 100644 test/integration/xallocx.c
 mode change 100755 => 100644 test/unit/arena_reset.c
 mode change 100755 => 100644 test/unit/decay.c
 mode change 100755 => 100644 test/unit/mallctl.c
 mode change 100755 => 100644 test/unit/prof_accum.c
 mode change 100755 => 100644 test/unit/prof_active.c
 mode change 100755 => 100644 test/unit/prof_gdump.c
 mode change 100755 => 100644 test/unit/prof_idump.c
 mode change 100755 => 100644 test/unit/prof_reset.c
 mode change 100755 => 100644 test/unit/prof_thread_name.c
 mode change 100755 => 100644 test/unit/size_classes.c
 mode change 100755 => 100644 test/unit/stats.c

diff --git a/msvc/projects/vc2015/test_threads/test_threads.cpp b/msvc/projects/vc2015/test_threads/test_threads.cpp
old mode 100755
new mode 100644
diff --git a/src/stats.c b/src/stats.c
old mode 100755
new mode 100644
diff --git a/src/tcache.c b/src/tcache.c
old mode 100755
new mode 100644
diff --git a/src/util.c b/src/util.c
old mode 100755
new mode 100644
diff --git a/test/integration/MALLOCX_ARENA.c b/test/integration/MALLOCX_ARENA.c
old mode 100755
new mode 100644
diff --git a/test/integration/allocated.c b/test/integration/allocated.c
old mode 100755
new mode 100644
diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
old mode 100755
new mode 100644
diff --git a/test/integration/overflow.c b/test/integration/overflow.c
old mode 100755
new mode 100644
diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c
old mode 100755
new mode 100644
diff --git a/test/integration/thread_arena.c b/test/integration/thread_arena.c
old mode 100755
new mode 100644
diff --git a/test/integration/thread_tcache_enabled.c b/test/integration/thread_tcache_enabled.c
old mode 100755
new mode 100644
diff --git a/test/integration/xallocx.c b/test/integration/xallocx.c
old mode 100755
new mode 100644
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
old mode 100755
new mode 100644
diff --git a/test/unit/decay.c b/test/unit/decay.c
old mode 100755
new mode 100644
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
old mode 100755
new mode 100644
diff --git a/test/unit/prof_accum.c b/test/unit/prof_accum.c
old mode 100755
new mode 100644
diff --git a/test/unit/prof_active.c b/test/unit/prof_active.c
old mode 100755
new mode 100644
diff --git a/test/unit/prof_gdump.c b/test/unit/prof_gdump.c
old mode 100755
new mode 100644
diff --git a/test/unit/prof_idump.c b/test/unit/prof_idump.c
old mode 100755
new mode 100644
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
old mode 100755
new mode 100644
diff --git a/test/unit/prof_thread_name.c b/test/unit/prof_thread_name.c
old mode 100755
new mode 100644
diff --git a/test/unit/size_classes.c b/test/unit/size_classes.c
old mode 100755
new mode 100644
diff --git a/test/unit/stats.c b/test/unit/stats.c
old mode 100755
new mode 100644

From de49674fbde4d124a0a7e7e97f5656e190980759 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 21 Feb 2017 23:40:06 -0800
Subject: [PATCH 0655/2608] Use MALLOC_CONF rather than malloc_conf for tests.

malloc_conf does not reliably work with MSVC, which complains of
"inconsistent dll linkage", i.e. its inability to support the
application overriding malloc_conf when dynamically linking/loading.
Work around this limitation by adding test harness support for per test
shell script sourcing, and converting all tests to use MALLOC_CONF
instead of malloc_conf.
---
 test/integration/extent.c     |  4 ----
 test/integration/extent.sh    |  5 +++++
 test/integration/mallocx.c    |  4 ----
 test/integration/mallocx.sh   |  5 +++++
 test/integration/xallocx.c    |  4 ----
 test/integration/xallocx.sh   |  5 +++++
 test/test.sh.in               | 29 ++++++++++++++++++++++++++++-
 test/unit/arena_reset_prof.c  |  1 -
 test/unit/arena_reset_prof.sh |  3 +++
 test/unit/decay.c             |  6 ------
 test/unit/decay.sh            |  6 ++++++
 test/unit/junk.c              |  8 --------
 test/unit/junk.sh             |  5 +++++
 test/unit/junk_alloc.c        |  2 --
 test/unit/junk_alloc.sh       |  5 +++++
 test/unit/junk_free.c         |  2 --
 test/unit/junk_free.sh        |  5 +++++
 test/unit/pack.c              |  3 ---
 test/unit/pack.sh             |  4 ++++
 test/unit/prof_accum.c        |  5 -----
 test/unit/prof_accum.sh       |  5 +++++
 test/unit/prof_active.c       |  5 -----
 test/unit/prof_active.sh      |  5 +++++
 test/unit/prof_gdump.c        |  4 ----
 test/unit/prof_gdump.sh       |  6 ++++++
 test/unit/prof_idump.c        | 13 -------------
 test/unit/prof_idump.sh       | 12 ++++++++++++
 test/unit/prof_reset.c        |  5 -----
 test/unit/prof_reset.sh       |  5 +++++
 test/unit/prof_tctx.c         |  4 ----
 test/unit/prof_tctx.sh        |  5 +++++
 test/unit/prof_thread_name.c  |  4 ----
 test/unit/prof_thread_name.sh |  5 +++++
 test/unit/zero.c              |  5 -----
 test/unit/zero.sh             |  5 +++++
 35 files changed, 119 insertions(+), 80 deletions(-)
 create mode 100644 test/integration/extent.sh
 create mode 100644 test/integration/mallocx.sh
 create mode 100644 test/integration/xallocx.sh
 create mode 100644 test/unit/arena_reset_prof.sh
 create mode 100644 test/unit/decay.sh
 create mode 100644 test/unit/junk.sh
 create mode 100644 test/unit/junk_alloc.sh
 create mode 100644 test/unit/junk_free.sh
 create mode 100644 test/unit/pack.sh
 create mode 100644 test/unit/prof_accum.sh
 create mode 100644 test/unit/prof_active.sh
 create mode 100644 test/unit/prof_gdump.sh
 create mode 100644 test/unit/prof_idump.sh
 create mode 100644 test/unit/prof_reset.sh
 create mode 100644 test/unit/prof_tctx.sh
 create mode 100644 test/unit/prof_thread_name.sh
 create mode 100644 test/unit/zero.sh

diff --git a/test/integration/extent.c b/test/integration/extent.c
index 08792df3..32432af9 100644
--- a/test/integration/extent.c
+++ b/test/integration/extent.c
@@ -1,9 +1,5 @@
 #include "test/jemalloc_test.h"
 
-#ifdef JEMALLOC_FILL
-const char *malloc_conf = "junk:false";
-#endif
-
 #include "test/extent_hooks.h"
 
 static void
diff --git a/test/integration/extent.sh b/test/integration/extent.sh
new file mode 100644
index 00000000..0cc21873
--- /dev/null
+++ b/test/integration/extent.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_fill}" = "x1" ] ; then
+  export MALLOC_CONF="junk:false"
+fi
diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index b60e27b6..b0b5cdac 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -1,9 +1,5 @@
 #include "test/jemalloc_test.h"
 
-#ifdef JEMALLOC_FILL
-const char *malloc_conf = "junk:false";
-#endif
-
 static unsigned
 get_nsizes_impl(const char *cmd) {
 	unsigned ret;
diff --git a/test/integration/mallocx.sh b/test/integration/mallocx.sh
new file mode 100644
index 00000000..0cc21873
--- /dev/null
+++ b/test/integration/mallocx.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_fill}" = "x1" ] ; then
+  export MALLOC_CONF="junk:false"
+fi
diff --git a/test/integration/xallocx.c b/test/integration/xallocx.c
index c95fbf18..cd0ca048 100644
--- a/test/integration/xallocx.c
+++ b/test/integration/xallocx.c
@@ -1,9 +1,5 @@
 #include "test/jemalloc_test.h"
 
-#ifdef JEMALLOC_FILL
-const char *malloc_conf = "junk:false";
-#endif
-
 /*
  * Use a separate arena for xallocx() extension/contraction tests so that
  * internal allocation e.g. by heap profiling can't interpose allocations where
diff --git a/test/integration/xallocx.sh b/test/integration/xallocx.sh
new file mode 100644
index 00000000..0cc21873
--- /dev/null
+++ b/test/integration/xallocx.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_fill}" = "x1" ] ; then
+  export MALLOC_CONF="junk:false"
+fi
diff --git a/test/test.sh.in b/test/test.sh.in
index a39f99f6..f0f0f979 100644
--- a/test/test.sh.in
+++ b/test/test.sh.in
@@ -11,6 +11,18 @@ case @abi@ in
     ;;
 esac
 
+# Make a copy of the @JEMALLOC_CPREFIX@MALLOC_CONF passed in to this script, so
+# it can be repeatedly concatenated with per test settings.
+export MALLOC_CONF_ALL=${@JEMALLOC_CPREFIX@MALLOC_CONF}
+# Concatenate the individual test's MALLOC_CONF and MALLOC_CONF_ALL.
+export_malloc_conf() {
+  if [ "x${MALLOC_CONF}" != "x" -a "x${MALLOC_CONF_ALL}" != "x" ] ; then
+    export @JEMALLOC_CPREFIX@MALLOC_CONF="${MALLOC_CONF},${MALLOC_CONF_ALL}"
+  else
+    export @JEMALLOC_CPREFIX@MALLOC_CONF="${MALLOC_CONF}${MALLOC_CONF_ALL}"
+  fi
+}
+
 # Corresponds to test_status_t.
 pass_code=0
 skip_code=1
@@ -24,7 +36,22 @@ for t in $@; do
     echo
   fi
   echo "=== ${t} ==="
-  ${t}@exe@ @abs_srcroot@ @abs_objroot@
+  if [ -e "@srcroot@${t}.sh" ] ; then
+    # Source the shell script corresponding to the test in a subshell and
+    # execute the test.  This allows the shell script to set MALLOC_CONF, which
+    # is then used to set @JEMALLOC_CPREFIX@MALLOC_CONF (thus allowing the
+    # per test shell script to ignore the @JEMALLOC_CPREFIX@ detail).
+    $(enable_fill=@enable_fill@ \
+      enable_prof=@enable_prof@ \
+      enable_tcache=@enable_tcache@ \
+      . @srcroot@${t}.sh && \
+      export_malloc_conf && \
+      ${t}@exe@ @abs_srcroot@ @abs_objroot@)
+  else
+    $(export MALLOC_CONF= && \
+      export_malloc_conf &&
+      ${t}@exe@ @abs_srcroot@ @abs_objroot@)
+  fi
   result_code=$?
   case ${result_code} in
     ${pass_code})
diff --git a/test/unit/arena_reset_prof.c b/test/unit/arena_reset_prof.c
index 6d83c843..38d80124 100644
--- a/test/unit/arena_reset_prof.c
+++ b/test/unit/arena_reset_prof.c
@@ -1,5 +1,4 @@
 #include "test/jemalloc_test.h"
 #define ARENA_RESET_PROF_C_
 
-const char *malloc_conf = "prof:true,lg_prof_sample:0";
 #include "arena_reset.c"
diff --git a/test/unit/arena_reset_prof.sh b/test/unit/arena_reset_prof.sh
new file mode 100644
index 00000000..041dc1c3
--- /dev/null
+++ b/test/unit/arena_reset_prof.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="prof:true,lg_prof_sample:0"
diff --git a/test/unit/decay.c b/test/unit/decay.c
index fc8fabcf..98453221 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -1,11 +1,5 @@
 #include "test/jemalloc_test.h"
 
-const char *malloc_conf = "decay_time:1"
-#ifdef JEMALLOC_TCACHE
-    ",lg_tcache_max:0"
-#endif
-    ;
-
 static nstime_monotonic_t *nstime_monotonic_orig;
 static nstime_update_t *nstime_update_orig;
 
diff --git a/test/unit/decay.sh b/test/unit/decay.sh
new file mode 100644
index 00000000..284af815
--- /dev/null
+++ b/test/unit/decay.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+export MALLOC_CONF="decay_time:1"
+if [ "x${enable_tcache}" = "x1" ] ; then
+  export MALLOC_CONF="${MALLOC_CONF},lg_tcache_max:0"
+fi
diff --git a/test/unit/junk.c b/test/unit/junk.c
index 86c51089..cfa8d0f2 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -1,13 +1,5 @@
 #include "test/jemalloc_test.h"
 
-#ifdef JEMALLOC_FILL
-#  ifndef JEMALLOC_TEST_JUNK_OPT
-#    define JEMALLOC_TEST_JUNK_OPT "junk:true"
-#  endif
-const char *malloc_conf =
-    "abort:false,zero:false," JEMALLOC_TEST_JUNK_OPT;
-#endif
-
 static arena_dalloc_junk_small_t *arena_dalloc_junk_small_orig;
 static large_dalloc_junk_t *large_dalloc_junk_orig;
 static large_dalloc_maybe_junk_t *large_dalloc_maybe_junk_orig;
diff --git a/test/unit/junk.sh b/test/unit/junk.sh
new file mode 100644
index 00000000..97cd8ca5
--- /dev/null
+++ b/test/unit/junk.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_fill}" = "x1" ] ; then
+  export MALLOC_CONF="abort:false,zero:false,junk:true"
+fi
diff --git a/test/unit/junk_alloc.c b/test/unit/junk_alloc.c
index 8db3331d..a442a0ca 100644
--- a/test/unit/junk_alloc.c
+++ b/test/unit/junk_alloc.c
@@ -1,3 +1 @@
-#define JEMALLOC_TEST_JUNK_OPT "junk:alloc"
 #include "junk.c"
-#undef JEMALLOC_TEST_JUNK_OPT
diff --git a/test/unit/junk_alloc.sh b/test/unit/junk_alloc.sh
new file mode 100644
index 00000000..e1008c2e
--- /dev/null
+++ b/test/unit/junk_alloc.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_fill}" = "x1" ] ; then
+  export MALLOC_CONF="abort:false,zero:false,junk:alloc"
+fi
diff --git a/test/unit/junk_free.c b/test/unit/junk_free.c
index 482a61d0..a442a0ca 100644
--- a/test/unit/junk_free.c
+++ b/test/unit/junk_free.c
@@ -1,3 +1 @@
-#define JEMALLOC_TEST_JUNK_OPT "junk:free"
 #include "junk.c"
-#undef JEMALLOC_TEST_JUNK_OPT
diff --git a/test/unit/junk_free.sh b/test/unit/junk_free.sh
new file mode 100644
index 00000000..402196ca
--- /dev/null
+++ b/test/unit/junk_free.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_fill}" = "x1" ] ; then
+  export MALLOC_CONF="abort:false,zero:false,junk:free"
+fi
diff --git a/test/unit/pack.c b/test/unit/pack.c
index d35ac5ea..5da4ae12 100644
--- a/test/unit/pack.c
+++ b/test/unit/pack.c
@@ -1,8 +1,5 @@
 #include "test/jemalloc_test.h"
 
-/* Immediately purge to minimize fragmentation. */
-const char *malloc_conf = "decay_time:-1";
-
 /*
  * Size class that is a divisor of the page size, ideally 4+ regions per run.
  */
diff --git a/test/unit/pack.sh b/test/unit/pack.sh
new file mode 100644
index 00000000..de12e553
--- /dev/null
+++ b/test/unit/pack.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+
+# Immediately purge to minimize fragmentation.
+export MALLOC_CONF="decay_time:-1"
diff --git a/test/unit/prof_accum.c b/test/unit/prof_accum.c
index bcd1d881..6ccab82b 100644
--- a/test/unit/prof_accum.c
+++ b/test/unit/prof_accum.c
@@ -5,11 +5,6 @@
 #define DUMP_INTERVAL		1
 #define BT_COUNT_CHECK_INTERVAL	5
 
-#ifdef JEMALLOC_PROF
-const char *malloc_conf =
-    "prof:true,prof_accum:true,prof_active:false,lg_prof_sample:0";
-#endif
-
 static int
 prof_dump_open_intercept(bool propagate_err, const char *filename) {
 	int fd;
diff --git a/test/unit/prof_accum.sh b/test/unit/prof_accum.sh
new file mode 100644
index 00000000..b3e13fc5
--- /dev/null
+++ b/test/unit/prof_accum.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:true,prof_accum:true,prof_active:false,lg_prof_sample:0"
+fi
diff --git a/test/unit/prof_active.c b/test/unit/prof_active.c
index c0e085a8..275aac89 100644
--- a/test/unit/prof_active.c
+++ b/test/unit/prof_active.c
@@ -1,10 +1,5 @@
 #include "test/jemalloc_test.h"
 
-#ifdef JEMALLOC_PROF
-const char *malloc_conf =
-    "prof:true,prof_thread_active_init:false,lg_prof_sample:0";
-#endif
-
 static void
 mallctl_bool_get(const char *name, bool expected, const char *func, int line) {
 	bool old;
diff --git a/test/unit/prof_active.sh b/test/unit/prof_active.sh
new file mode 100644
index 00000000..0167cb10
--- /dev/null
+++ b/test/unit/prof_active.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:true,prof_thread_active_init:false,lg_prof_sample:0"
+fi
diff --git a/test/unit/prof_gdump.c b/test/unit/prof_gdump.c
index 30320b7a..97ade68c 100644
--- a/test/unit/prof_gdump.c
+++ b/test/unit/prof_gdump.c
@@ -1,9 +1,5 @@
 #include "test/jemalloc_test.h"
 
-#ifdef JEMALLOC_PROF
-const char *malloc_conf = "prof:true,prof_active:false,prof_gdump:true";
-#endif
-
 static bool did_prof_dump_open;
 
 static int
diff --git a/test/unit/prof_gdump.sh b/test/unit/prof_gdump.sh
new file mode 100644
index 00000000..3f600d20
--- /dev/null
+++ b/test/unit/prof_gdump.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:true,prof_active:false,prof_gdump:true"
+fi
+
diff --git a/test/unit/prof_idump.c b/test/unit/prof_idump.c
index 1fed7b37..1cc6c98c 100644
--- a/test/unit/prof_idump.c
+++ b/test/unit/prof_idump.c
@@ -1,18 +1,5 @@
 #include "test/jemalloc_test.h"
 
-const char *malloc_conf = ""
-#ifdef JEMALLOC_PROF
-    "prof:true,prof_accum:true,prof_active:false,lg_prof_sample:0"
-    ",lg_prof_interval:0"
-#  ifdef JEMALLOC_TCACHE
-    ","
-#  endif
-#endif
-#ifdef JEMALLOC_TCACHE
-    "tcache:false"
-#endif
-    ;
-
 static bool did_prof_dump_open;
 
 static int
diff --git a/test/unit/prof_idump.sh b/test/unit/prof_idump.sh
new file mode 100644
index 00000000..fdb5813f
--- /dev/null
+++ b/test/unit/prof_idump.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:true,prof_accum:true,prof_active:false,lg_prof_sample:0,lg_prof_interval:0"
+  if [ "x${enable_tcache}" = "x1" ] ; then
+    export MALLOC_CONF="${MALLOC_CONF},tcache:false"
+  fi
+elif [ "x${enable_tcache}" = "x1" ] ; then
+  export MALLOC_CONF="tcache:false"
+fi
+
+
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
index fc954f9f..6120714e 100644
--- a/test/unit/prof_reset.c
+++ b/test/unit/prof_reset.c
@@ -1,10 +1,5 @@
 #include "test/jemalloc_test.h"
 
-#ifdef JEMALLOC_PROF
-const char *malloc_conf =
-    "prof:true,prof_active:false,lg_prof_sample:0";
-#endif
-
 static int
 prof_dump_open_intercept(bool propagate_err, const char *filename) {
 	int fd;
diff --git a/test/unit/prof_reset.sh b/test/unit/prof_reset.sh
new file mode 100644
index 00000000..43c516a0
--- /dev/null
+++ b/test/unit/prof_reset.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:true,prof_active:false,lg_prof_sample:0"
+fi
diff --git a/test/unit/prof_tctx.c b/test/unit/prof_tctx.c
index 2e35b7ec..14510c65 100644
--- a/test/unit/prof_tctx.c
+++ b/test/unit/prof_tctx.c
@@ -1,9 +1,5 @@
 #include "test/jemalloc_test.h"
 
-#ifdef JEMALLOC_PROF
-const char *malloc_conf = "prof:true,lg_prof_sample:0";
-#endif
-
 TEST_BEGIN(test_prof_realloc) {
 	tsdn_t *tsdn;
 	int flags;
diff --git a/test/unit/prof_tctx.sh b/test/unit/prof_tctx.sh
new file mode 100644
index 00000000..8fcc7d8a
--- /dev/null
+++ b/test/unit/prof_tctx.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:true,lg_prof_sample:0"
+fi
diff --git a/test/unit/prof_thread_name.c b/test/unit/prof_thread_name.c
index a094a1c0..c9c2a2b7 100644
--- a/test/unit/prof_thread_name.c
+++ b/test/unit/prof_thread_name.c
@@ -1,9 +1,5 @@
 #include "test/jemalloc_test.h"
 
-#ifdef JEMALLOC_PROF
-const char *malloc_conf = "prof:true,prof_active:false";
-#endif
-
 static void
 mallctl_thread_name_get_impl(const char *thread_name_expected, const char *func,
     int line) {
diff --git a/test/unit/prof_thread_name.sh b/test/unit/prof_thread_name.sh
new file mode 100644
index 00000000..298c1058
--- /dev/null
+++ b/test/unit/prof_thread_name.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:true,prof_active:false"
+fi
diff --git a/test/unit/zero.c b/test/unit/zero.c
index d5b03f8d..553692ba 100644
--- a/test/unit/zero.c
+++ b/test/unit/zero.c
@@ -1,10 +1,5 @@
 #include "test/jemalloc_test.h"
 
-#ifdef JEMALLOC_FILL
-const char *malloc_conf =
-    "abort:false,junk:false,zero:true";
-#endif
-
 static void
 test_zero(size_t sz_min, size_t sz_max) {
 	uint8_t *s;
diff --git a/test/unit/zero.sh b/test/unit/zero.sh
new file mode 100644
index 00000000..b4540b27
--- /dev/null
+++ b/test/unit/zero.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_fill}" = "x1" ] ; then
+  export MALLOC_CONF="abort:false,junk:false,zero:true"
+fi

From 3ecc3c84862ef3e66b20be8213b0301c06c692cc Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 29 Jan 2017 21:32:39 -0800
Subject: [PATCH 0656/2608] Fix/refactor tcaches synchronization.

Synchronize tcaches with tcaches_mtx rather than ctl_mtx.  Add missing
synchronization for tcache flushing.  This bug was introduced by
1cb181ed632e7573fb4eab194e4d216867222d27 (Implement explicit tcache
support.), which was first released in 4.0.0.
---
 include/jemalloc/internal/private_symbols.txt |   3 +
 include/jemalloc/internal/tcache.h            |   3 +
 include/jemalloc/internal/witness.h           |  21 +--
 src/ctl.c                                     |   4 +-
 src/jemalloc.c                                |   3 +
 src/tcache.c                                  | 120 ++++++++++++++----
 6 files changed, 113 insertions(+), 41 deletions(-)

diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 4dfe442c..6111eac8 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -529,6 +529,9 @@ tcache_flush
 tcache_get
 tcache_get_hard
 tcache_maxclass
+tcache_prefork
+tcache_postfork_child
+tcache_postfork_parent
 tcache_salloc
 tcache_stats_merge
 tcaches
diff --git a/include/jemalloc/internal/tcache.h b/include/jemalloc/internal/tcache.h
index 01ba062d..5fe5ebfa 100644
--- a/include/jemalloc/internal/tcache.h
+++ b/include/jemalloc/internal/tcache.h
@@ -149,6 +149,9 @@ bool	tcaches_create(tsd_t *tsd, unsigned *r_ind);
 void	tcaches_flush(tsd_t *tsd, unsigned ind);
 void	tcaches_destroy(tsd_t *tsd, unsigned ind);
 bool	tcache_boot(tsdn_t *tsdn);
+void tcache_prefork(tsdn_t *tsdn);
+void tcache_postfork_parent(tsdn_t *tsdn);
+void tcache_postfork_child(tsdn_t *tsdn);
 
 #endif /* JEMALLOC_H_EXTERNS */
 /******************************************************************************/
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index dfd827f7..e64e56eb 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -14,19 +14,20 @@ typedef int witness_comp_t (const witness_t *, const witness_t *);
 
 #define	WITNESS_RANK_INIT		1U
 #define	WITNESS_RANK_CTL		1U
-#define	WITNESS_RANK_ARENAS		2U
+#define WITNESS_RANK_TCACHES		2U
+#define	WITNESS_RANK_ARENAS		3U
 
-#define	WITNESS_RANK_PROF_DUMP		3U
-#define	WITNESS_RANK_PROF_BT2GCTX	4U
-#define	WITNESS_RANK_PROF_TDATAS	5U
-#define	WITNESS_RANK_PROF_TDATA		6U
-#define	WITNESS_RANK_PROF_GCTX		7U
+#define	WITNESS_RANK_PROF_DUMP		4U
+#define	WITNESS_RANK_PROF_BT2GCTX	5U
+#define	WITNESS_RANK_PROF_TDATAS	6U
+#define	WITNESS_RANK_PROF_TDATA		7U
+#define	WITNESS_RANK_PROF_GCTX		8U
 
-#define	WITNESS_RANK_ARENA		8U
-#define	WITNESS_RANK_ARENA_CHUNKS	9U
-#define	WITNESS_RANK_ARENA_NODE_CACHE	10
+#define	WITNESS_RANK_ARENA		9U
+#define	WITNESS_RANK_ARENA_CHUNKS	10U
+#define	WITNESS_RANK_ARENA_NODE_CACHE	11U
 
-#define	WITNESS_RANK_BASE		11U
+#define	WITNESS_RANK_BASE		12U
 
 #define	WITNESS_RANK_LEAF		0xffffffffU
 #define	WITNESS_RANK_ARENA_BIN		WITNESS_RANK_LEAF
diff --git a/src/ctl.c b/src/ctl.c
index bc78b205..1e62e2d3 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1476,7 +1476,6 @@ tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	if (!config_tcache)
 		return (ENOENT);
 
-	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	READONLY();
 	if (tcaches_create(tsd, &tcache_ind)) {
 		ret = EFAULT;
@@ -1486,8 +1485,7 @@ tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	ret = 0;
 label_return:
-	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
-	return (ret);
+	return ret;
 }
 
 static int
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 92813b62..a376e143 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2828,6 +2828,7 @@ _malloc_prefork(void)
 	witness_prefork(tsd);
 	/* Acquire all mutexes in a safe order. */
 	ctl_prefork(tsd_tsdn(tsd));
+	tcache_prefork(tsd_tsdn(tsd));
 	malloc_mutex_prefork(tsd_tsdn(tsd), &arenas_lock);
 	prof_prefork0(tsd_tsdn(tsd));
 	for (i = 0; i < 3; i++) {
@@ -2887,6 +2888,7 @@ _malloc_postfork(void)
 	}
 	prof_postfork_parent(tsd_tsdn(tsd));
 	malloc_mutex_postfork_parent(tsd_tsdn(tsd), &arenas_lock);
+	tcache_postfork_parent(tsd_tsdn(tsd));
 	ctl_postfork_parent(tsd_tsdn(tsd));
 }
 
@@ -2911,6 +2913,7 @@ jemalloc_postfork_child(void)
 	}
 	prof_postfork_child(tsd_tsdn(tsd));
 	malloc_mutex_postfork_child(tsd_tsdn(tsd), &arenas_lock);
+	tcache_postfork_child(tsd_tsdn(tsd));
 	ctl_postfork_child(tsd_tsdn(tsd));
 }
 
diff --git a/src/tcache.c b/src/tcache.c
index 21540ff4..e3b04be6 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -21,6 +21,9 @@ static unsigned		tcaches_past;
 /* Head of singly linked list tracking available tcaches elements. */
 static tcaches_t	*tcaches_avail;
 
+/* Protects tcaches{,_past,_avail}. */
+static malloc_mutex_t	tcaches_mtx;
+
 /******************************************************************************/
 
 size_t
@@ -444,29 +447,56 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena)
 	}
 }
 
-bool
-tcaches_create(tsd_t *tsd, unsigned *r_ind)
-{
-	arena_t *arena;
-	tcache_t *tcache;
-	tcaches_t *elm;
+static bool
+tcaches_create_prep(tsd_t *tsd) {
+	bool err;
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
 
 	if (tcaches == NULL) {
 		tcaches = base_alloc(tsd_tsdn(tsd), sizeof(tcache_t *) *
 		    (MALLOCX_TCACHE_MAX+1));
-		if (tcaches == NULL)
-			return (true);
+		if (tcaches == NULL) {
+			err = true;
+			goto label_return;
+		}
 	}
 
-	if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX)
-		return (true);
-	arena = arena_ichoose(tsd, NULL);
-	if (unlikely(arena == NULL))
-		return (true);
-	tcache = tcache_create(tsd_tsdn(tsd), arena);
-	if (tcache == NULL)
-		return (true);
+	if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX) {
+		err = true;
+		goto label_return;
+	}
 
+	err = false;
+label_return:
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
+	return err;
+}
+
+bool
+tcaches_create(tsd_t *tsd, unsigned *r_ind) {
+	bool err;
+	arena_t *arena;
+	tcache_t *tcache;
+	tcaches_t *elm;
+
+	if (tcaches_create_prep(tsd)) {
+		err = true;
+		goto label_return;
+	}
+
+	arena = arena_ichoose(tsd, NULL);
+	if (unlikely(arena == NULL)) {
+		err = true;
+		goto label_return;
+	}
+	tcache = tcache_create(tsd_tsdn(tsd), arena);
+	if (tcache == NULL) {
+		err = true;
+		goto label_return;
+	}
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
 	if (tcaches_avail != NULL) {
 		elm = tcaches_avail;
 		tcaches_avail = tcaches_avail->next;
@@ -478,41 +508,50 @@ tcaches_create(tsd_t *tsd, unsigned *r_ind)
 		*r_ind = tcaches_past;
 		tcaches_past++;
 	}
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
 
-	return (false);
+	err = false;
+label_return:
+	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), &tcaches_mtx);
+	return err;
 }
 
 static void
-tcaches_elm_flush(tsd_t *tsd, tcaches_t *elm)
-{
+tcaches_elm_flush(tsd_t *tsd, tcaches_t *elm) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tcaches_mtx);
 
-	if (elm->tcache == NULL)
+	if (elm->tcache == NULL) {
 		return;
+	}
 	tcache_destroy(tsd, elm->tcache);
 	elm->tcache = NULL;
 }
 
 void
-tcaches_flush(tsd_t *tsd, unsigned ind)
-{
-
+tcaches_flush(tsd_t *tsd, unsigned ind) {
+	malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
 	tcaches_elm_flush(tsd, &tcaches[ind]);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
 }
 
 void
-tcaches_destroy(tsd_t *tsd, unsigned ind)
-{
-	tcaches_t *elm = &tcaches[ind];
+tcaches_destroy(tsd_t *tsd, unsigned ind) {
+	tcaches_t *elm;
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
+	elm = &tcaches[ind];
 	tcaches_elm_flush(tsd, elm);
 	elm->next = tcaches_avail;
 	tcaches_avail = elm;
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
 }
 
 bool
-tcache_boot(tsdn_t *tsdn)
-{
+tcache_boot(tsdn_t *tsdn) {
 	unsigned i;
 
+	cassert(config_tcache);
+
 	/*
 	 * If necessary, clamp opt_lg_tcache_max, now that large_maxclass is
 	 * known.
@@ -524,6 +563,10 @@ tcache_boot(tsdn_t *tsdn)
 	else
 		tcache_maxclass = (ZU(1) << opt_lg_tcache_max);
 
+	if (malloc_mutex_init(&tcaches_mtx, "tcaches", WITNESS_RANK_TCACHES)) {
+		return true;
+	}
+
 	nhbins = size2index(tcache_maxclass) + 1;
 
 	/* Initialize tcache_bin_info. */
@@ -553,3 +596,24 @@ tcache_boot(tsdn_t *tsdn)
 
 	return (false);
 }
+
+void
+tcache_prefork(tsdn_t *tsdn) {
+	if (!config_prof && opt_tcache) {
+		malloc_mutex_prefork(tsdn, &tcaches_mtx);
+	}
+}
+
+void
+tcache_postfork_parent(tsdn_t *tsdn) {
+	if (!config_prof && opt_tcache) {
+		malloc_mutex_postfork_parent(tsdn, &tcaches_mtx);
+	}
+}
+
+void
+tcache_postfork_child(tsdn_t *tsdn) {
+	if (!config_prof && opt_tcache) {
+		malloc_mutex_postfork_child(tsdn, &tcaches_mtx);
+	}
+}

From e85e588e45fd3bac1ddc3778e6f8bfe3f668f634 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 21 Feb 2017 23:40:06 -0800
Subject: [PATCH 0657/2608] Use MALLOC_CONF rather than malloc_conf for tests.

malloc_conf does not reliably work with MSVC, which complains of
"inconsistent dll linkage", i.e. its inability to support the
application overriding malloc_conf when dynamically linking/loading.
Work around this limitation by adding test harness support for per test
shell script sourcing, and converting all tests to use MALLOC_CONF
instead of malloc_conf.
---
 test/integration/chunk.c      |  4 ----
 test/integration/chunk.sh     |  5 +++++
 test/integration/mallocx.c    |  4 ----
 test/integration/mallocx.sh   |  5 +++++
 test/integration/xallocx.c    |  4 ----
 test/integration/xallocx.sh   |  5 +++++
 test/test.sh.in               | 29 ++++++++++++++++++++++++++++-
 test/unit/arena_reset.c       |  4 ----
 test/unit/arena_reset.sh      |  5 +++++
 test/unit/decay.c             |  2 --
 test/unit/decay.sh            |  3 +++
 test/unit/junk.c              |  8 --------
 test/unit/junk.sh             |  5 +++++
 test/unit/junk_alloc.c        |  2 --
 test/unit/junk_alloc.sh       |  5 +++++
 test/unit/junk_free.c         |  2 --
 test/unit/junk_free.sh        |  5 +++++
 test/unit/lg_chunk.c          |  7 -------
 test/unit/lg_chunk.sh         |  6 ++++++
 test/unit/pack.c              |  8 --------
 test/unit/pack.sh             |  5 +++++
 test/unit/prof_accum.c        |  5 -----
 test/unit/prof_accum.sh       |  5 +++++
 test/unit/prof_active.c       |  5 -----
 test/unit/prof_active.sh      |  5 +++++
 test/unit/prof_gdump.c        |  4 ----
 test/unit/prof_gdump.sh       |  6 ++++++
 test/unit/prof_idump.c        |  6 ------
 test/unit/prof_idump.sh       |  7 +++++++
 test/unit/prof_reset.c        |  5 -----
 test/unit/prof_reset.sh       |  5 +++++
 test/unit/prof_tctx.sh        |  5 +++++
 test/unit/prof_thread_name.c  |  4 ----
 test/unit/prof_thread_name.sh |  5 +++++
 test/unit/quarantine.c        |  8 +-------
 test/unit/quarantine.sh       |  8 ++++++++
 test/unit/zero.c              |  5 -----
 test/unit/zero.sh             |  5 +++++
 38 files changed, 129 insertions(+), 87 deletions(-)
 create mode 100644 test/integration/chunk.sh
 create mode 100644 test/integration/mallocx.sh
 create mode 100644 test/integration/xallocx.sh
 create mode 100644 test/unit/arena_reset.sh
 create mode 100644 test/unit/decay.sh
 create mode 100644 test/unit/junk.sh
 create mode 100644 test/unit/junk_alloc.sh
 create mode 100644 test/unit/junk_free.sh
 create mode 100644 test/unit/lg_chunk.sh
 create mode 100644 test/unit/pack.sh
 create mode 100644 test/unit/prof_accum.sh
 create mode 100644 test/unit/prof_active.sh
 create mode 100644 test/unit/prof_gdump.sh
 create mode 100644 test/unit/prof_idump.sh
 create mode 100644 test/unit/prof_reset.sh
 create mode 100644 test/unit/prof_tctx.sh
 create mode 100644 test/unit/prof_thread_name.sh
 create mode 100644 test/unit/quarantine.sh
 create mode 100644 test/unit/zero.sh

diff --git a/test/integration/chunk.c b/test/integration/chunk.c
index 94cf0025..997567a7 100644
--- a/test/integration/chunk.c
+++ b/test/integration/chunk.c
@@ -1,9 +1,5 @@
 #include "test/jemalloc_test.h"
 
-#ifdef JEMALLOC_FILL
-const char *malloc_conf = "junk:false";
-#endif
-
 static chunk_hooks_t orig_hooks;
 static chunk_hooks_t old_hooks;
 
diff --git a/test/integration/chunk.sh b/test/integration/chunk.sh
new file mode 100644
index 00000000..0cc21873
--- /dev/null
+++ b/test/integration/chunk.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_fill}" = "x1" ] ; then
+  export MALLOC_CONF="junk:false"
+fi
diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index d709eb30..5a9058d4 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -1,9 +1,5 @@
 #include "test/jemalloc_test.h"
 
-#ifdef JEMALLOC_FILL
-const char *malloc_conf = "junk:false";
-#endif
-
 static unsigned
 get_nsizes_impl(const char *cmd)
 {
diff --git a/test/integration/mallocx.sh b/test/integration/mallocx.sh
new file mode 100644
index 00000000..0cc21873
--- /dev/null
+++ b/test/integration/mallocx.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_fill}" = "x1" ] ; then
+  export MALLOC_CONF="junk:false"
+fi
diff --git a/test/integration/xallocx.c b/test/integration/xallocx.c
index 67e0a0e7..2517a812 100644
--- a/test/integration/xallocx.c
+++ b/test/integration/xallocx.c
@@ -1,9 +1,5 @@
 #include "test/jemalloc_test.h"
 
-#ifdef JEMALLOC_FILL
-const char *malloc_conf = "junk:false";
-#endif
-
 /*
  * Use a separate arena for xallocx() extension/contraction tests so that
  * internal allocation e.g. by heap profiling can't interpose allocations where
diff --git a/test/integration/xallocx.sh b/test/integration/xallocx.sh
new file mode 100644
index 00000000..0cc21873
--- /dev/null
+++ b/test/integration/xallocx.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_fill}" = "x1" ] ; then
+  export MALLOC_CONF="junk:false"
+fi
diff --git a/test/test.sh.in b/test/test.sh.in
index a39f99f6..f0f0f979 100644
--- a/test/test.sh.in
+++ b/test/test.sh.in
@@ -11,6 +11,18 @@ case @abi@ in
     ;;
 esac
 
+# Make a copy of the @JEMALLOC_CPREFIX@MALLOC_CONF passed in to this script, so
+# it can be repeatedly concatenated with per test settings.
+export MALLOC_CONF_ALL=${@JEMALLOC_CPREFIX@MALLOC_CONF}
+# Concatenate the individual test's MALLOC_CONF and MALLOC_CONF_ALL.
+export_malloc_conf() {
+  if [ "x${MALLOC_CONF}" != "x" -a "x${MALLOC_CONF_ALL}" != "x" ] ; then
+    export @JEMALLOC_CPREFIX@MALLOC_CONF="${MALLOC_CONF},${MALLOC_CONF_ALL}"
+  else
+    export @JEMALLOC_CPREFIX@MALLOC_CONF="${MALLOC_CONF}${MALLOC_CONF_ALL}"
+  fi
+}
+
 # Corresponds to test_status_t.
 pass_code=0
 skip_code=1
@@ -24,7 +36,22 @@ for t in $@; do
     echo
   fi
   echo "=== ${t} ==="
-  ${t}@exe@ @abs_srcroot@ @abs_objroot@
+  if [ -e "@srcroot@${t}.sh" ] ; then
+    # Source the shell script corresponding to the test in a subshell and
+    # execute the test.  This allows the shell script to set MALLOC_CONF, which
+    # is then used to set @JEMALLOC_CPREFIX@MALLOC_CONF (thus allowing the
+    # per test shell script to ignore the @JEMALLOC_CPREFIX@ detail).
+    $(enable_fill=@enable_fill@ \
+      enable_prof=@enable_prof@ \
+      enable_tcache=@enable_tcache@ \
+      . @srcroot@${t}.sh && \
+      export_malloc_conf && \
+      ${t}@exe@ @abs_srcroot@ @abs_objroot@)
+  else
+    $(export MALLOC_CONF= && \
+      export_malloc_conf &&
+      ${t}@exe@ @abs_srcroot@ @abs_objroot@)
+  fi
   result_code=$?
   case ${result_code} in
     ${pass_code})
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index adf9baa5..ec1c214e 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -1,9 +1,5 @@
 #include "test/jemalloc_test.h"
 
-#ifdef JEMALLOC_PROF
-const char *malloc_conf = "prof:true,lg_prof_sample:0";
-#endif
-
 static unsigned
 get_nsizes_impl(const char *cmd)
 {
diff --git a/test/unit/arena_reset.sh b/test/unit/arena_reset.sh
new file mode 100644
index 00000000..8fcc7d8a
--- /dev/null
+++ b/test/unit/arena_reset.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:true,lg_prof_sample:0"
+fi
diff --git a/test/unit/decay.c b/test/unit/decay.c
index 5af8f807..2d8d69d1 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -1,7 +1,5 @@
 #include "test/jemalloc_test.h"
 
-const char *malloc_conf = "purge:decay,decay_time:1";
-
 static nstime_monotonic_t *nstime_monotonic_orig;
 static nstime_update_t *nstime_update_orig;
 
diff --git a/test/unit/decay.sh b/test/unit/decay.sh
new file mode 100644
index 00000000..7b8f470e
--- /dev/null
+++ b/test/unit/decay.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="purge:decay,decay_time:1"
diff --git a/test/unit/junk.c b/test/unit/junk.c
index 460bd524..bbd83fb9 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -1,13 +1,5 @@
 #include "test/jemalloc_test.h"
 
-#ifdef JEMALLOC_FILL
-#  ifndef JEMALLOC_TEST_JUNK_OPT
-#    define JEMALLOC_TEST_JUNK_OPT "junk:true"
-#  endif
-const char *malloc_conf =
-    "abort:false,zero:false,redzone:true,quarantine:0," JEMALLOC_TEST_JUNK_OPT;
-#endif
-
 static arena_dalloc_junk_small_t *arena_dalloc_junk_small_orig;
 static arena_dalloc_junk_large_t *arena_dalloc_junk_large_orig;
 static huge_dalloc_junk_t *huge_dalloc_junk_orig;
diff --git a/test/unit/junk.sh b/test/unit/junk.sh
new file mode 100644
index 00000000..e19c313e
--- /dev/null
+++ b/test/unit/junk.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_fill}" = "x1" ] ; then
+  export MALLOC_CONF="abort:false,zero:false,redzone:true,quarantine:0,junk:true"
+fi
diff --git a/test/unit/junk_alloc.c b/test/unit/junk_alloc.c
index a5895b5c..a442a0ca 100644
--- a/test/unit/junk_alloc.c
+++ b/test/unit/junk_alloc.c
@@ -1,3 +1 @@
-#define	JEMALLOC_TEST_JUNK_OPT "junk:alloc"
 #include "junk.c"
-#undef JEMALLOC_TEST_JUNK_OPT
diff --git a/test/unit/junk_alloc.sh b/test/unit/junk_alloc.sh
new file mode 100644
index 00000000..984387d3
--- /dev/null
+++ b/test/unit/junk_alloc.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_fill}" = "x1" ] ; then
+  export MALLOC_CONF="abort:false,zero:false,redzone:true,quarantine:0,junk:alloc"
+fi
diff --git a/test/unit/junk_free.c b/test/unit/junk_free.c
index bb5183c9..a442a0ca 100644
--- a/test/unit/junk_free.c
+++ b/test/unit/junk_free.c
@@ -1,3 +1 @@
-#define	JEMALLOC_TEST_JUNK_OPT "junk:free"
 #include "junk.c"
-#undef JEMALLOC_TEST_JUNK_OPT
diff --git a/test/unit/junk_free.sh b/test/unit/junk_free.sh
new file mode 100644
index 00000000..a5c21a57
--- /dev/null
+++ b/test/unit/junk_free.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_fill}" = "x1" ] ; then
+  export MALLOC_CONF="abort:false,zero:false,redzone:true,quarantine:0,junk:free"
+fi
diff --git a/test/unit/lg_chunk.c b/test/unit/lg_chunk.c
index 7e5df381..d4f77b79 100644
--- a/test/unit/lg_chunk.c
+++ b/test/unit/lg_chunk.c
@@ -1,12 +1,5 @@
 #include "test/jemalloc_test.h"
 
-/*
- * Make sure that opt.lg_chunk clamping is sufficient.  In practice, this test
- * program will fail a debug assertion during initialization and abort (rather
- * than the test soft-failing) if clamping is insufficient.
- */
-const char *malloc_conf = "lg_chunk:0";
-
 TEST_BEGIN(test_lg_chunk_clamp)
 {
 	void *p;
diff --git a/test/unit/lg_chunk.sh b/test/unit/lg_chunk.sh
new file mode 100644
index 00000000..103eef1a
--- /dev/null
+++ b/test/unit/lg_chunk.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+# Make sure that opt.lg_chunk clamping is sufficient.  In practice, this test
+# program will fail a debug assertion during initialization and abort (rather
+# than the test soft-failing) if clamping is insufficient.
+export MALLOC_CONF="lg_chunk:0"
diff --git a/test/unit/pack.c b/test/unit/pack.c
index 0b6ffcd2..991faa6b 100644
--- a/test/unit/pack.c
+++ b/test/unit/pack.c
@@ -1,13 +1,5 @@
 #include "test/jemalloc_test.h"
 
-const char *malloc_conf =
-    /* Use smallest possible chunk size. */
-    "lg_chunk:0"
-    /* Immediately purge to minimize fragmentation. */
-    ",lg_dirty_mult:-1"
-    ",decay_time:-1"
-    ;
-
 /*
  * Size class that is a divisor of the page size, ideally 4+ regions per run.
  */
diff --git a/test/unit/pack.sh b/test/unit/pack.sh
new file mode 100644
index 00000000..a58151d9
--- /dev/null
+++ b/test/unit/pack.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+# Use smallest possible chunk size.  Immediately purge to minimize
+# fragmentation.
+export MALLOC_CONF="lg_chunk:0,lg_dirty_mult:-1,decay_time:-1"
diff --git a/test/unit/prof_accum.c b/test/unit/prof_accum.c
index d941b5bc..031f0837 100644
--- a/test/unit/prof_accum.c
+++ b/test/unit/prof_accum.c
@@ -5,11 +5,6 @@
 #define	DUMP_INTERVAL		1
 #define	BT_COUNT_CHECK_INTERVAL	5
 
-#ifdef JEMALLOC_PROF
-const char *malloc_conf =
-    "prof:true,prof_accum:true,prof_active:false,lg_prof_sample:0";
-#endif
-
 static int
 prof_dump_open_intercept(bool propagate_err, const char *filename)
 {
diff --git a/test/unit/prof_accum.sh b/test/unit/prof_accum.sh
new file mode 100644
index 00000000..b3e13fc5
--- /dev/null
+++ b/test/unit/prof_accum.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:true,prof_accum:true,prof_active:false,lg_prof_sample:0"
+fi
diff --git a/test/unit/prof_active.c b/test/unit/prof_active.c
index d00943a4..a906beb7 100644
--- a/test/unit/prof_active.c
+++ b/test/unit/prof_active.c
@@ -1,10 +1,5 @@
 #include "test/jemalloc_test.h"
 
-#ifdef JEMALLOC_PROF
-const char *malloc_conf =
-    "prof:true,prof_thread_active_init:false,lg_prof_sample:0";
-#endif
-
 static void
 mallctl_bool_get(const char *name, bool expected, const char *func, int line)
 {
diff --git a/test/unit/prof_active.sh b/test/unit/prof_active.sh
new file mode 100644
index 00000000..0167cb10
--- /dev/null
+++ b/test/unit/prof_active.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:true,prof_thread_active_init:false,lg_prof_sample:0"
+fi
diff --git a/test/unit/prof_gdump.c b/test/unit/prof_gdump.c
index 996cb670..b88a74c5 100644
--- a/test/unit/prof_gdump.c
+++ b/test/unit/prof_gdump.c
@@ -1,9 +1,5 @@
 #include "test/jemalloc_test.h"
 
-#ifdef JEMALLOC_PROF
-const char *malloc_conf = "prof:true,prof_active:false,prof_gdump:true";
-#endif
-
 static bool did_prof_dump_open;
 
 static int
diff --git a/test/unit/prof_gdump.sh b/test/unit/prof_gdump.sh
new file mode 100644
index 00000000..3f600d20
--- /dev/null
+++ b/test/unit/prof_gdump.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:true,prof_active:false,prof_gdump:true"
+fi
+
diff --git a/test/unit/prof_idump.c b/test/unit/prof_idump.c
index 16c6462d..87734a4a 100644
--- a/test/unit/prof_idump.c
+++ b/test/unit/prof_idump.c
@@ -1,11 +1,5 @@
 #include "test/jemalloc_test.h"
 
-#ifdef JEMALLOC_PROF
-const char *malloc_conf =
-    "prof:true,prof_accum:true,prof_active:false,lg_prof_sample:0,"
-    "lg_prof_interval:0";
-#endif
-
 static bool did_prof_dump_open;
 
 static int
diff --git a/test/unit/prof_idump.sh b/test/unit/prof_idump.sh
new file mode 100644
index 00000000..08a1b628
--- /dev/null
+++ b/test/unit/prof_idump.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:true,prof_accum:true,prof_active:false,lg_prof_sample:0,lg_prof_interval:0"
+fi
+
+
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
index 59d70796..87b0d0c0 100644
--- a/test/unit/prof_reset.c
+++ b/test/unit/prof_reset.c
@@ -1,10 +1,5 @@
 #include "test/jemalloc_test.h"
 
-#ifdef JEMALLOC_PROF
-const char *malloc_conf =
-    "prof:true,prof_active:false,lg_prof_sample:0";
-#endif
-
 static int
 prof_dump_open_intercept(bool propagate_err, const char *filename)
 {
diff --git a/test/unit/prof_reset.sh b/test/unit/prof_reset.sh
new file mode 100644
index 00000000..43c516a0
--- /dev/null
+++ b/test/unit/prof_reset.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:true,prof_active:false,lg_prof_sample:0"
+fi
diff --git a/test/unit/prof_tctx.sh b/test/unit/prof_tctx.sh
new file mode 100644
index 00000000..8fcc7d8a
--- /dev/null
+++ b/test/unit/prof_tctx.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:true,lg_prof_sample:0"
+fi
diff --git a/test/unit/prof_thread_name.c b/test/unit/prof_thread_name.c
index 9ec54977..3251853b 100644
--- a/test/unit/prof_thread_name.c
+++ b/test/unit/prof_thread_name.c
@@ -1,9 +1,5 @@
 #include "test/jemalloc_test.h"
 
-#ifdef JEMALLOC_PROF
-const char *malloc_conf = "prof:true,prof_active:false";
-#endif
-
 static void
 mallctl_thread_name_get_impl(const char *thread_name_expected, const char *func,
     int line)
diff --git a/test/unit/prof_thread_name.sh b/test/unit/prof_thread_name.sh
new file mode 100644
index 00000000..298c1058
--- /dev/null
+++ b/test/unit/prof_thread_name.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:true,prof_active:false"
+fi
diff --git a/test/unit/quarantine.c b/test/unit/quarantine.c
index bbd48a51..6068768b 100644
--- a/test/unit/quarantine.c
+++ b/test/unit/quarantine.c
@@ -1,13 +1,7 @@
 #include "test/jemalloc_test.h"
 
+/* Keep in sync with definition in quarantine.sh. */
 #define	QUARANTINE_SIZE		8192
-#define	STRINGIFY_HELPER(x)	#x
-#define	STRINGIFY(x)		STRINGIFY_HELPER(x)
-
-#ifdef JEMALLOC_FILL
-const char *malloc_conf = "abort:false,junk:true,redzone:true,quarantine:"
-    STRINGIFY(QUARANTINE_SIZE);
-#endif
 
 void
 quarantine_clear(void)
diff --git a/test/unit/quarantine.sh b/test/unit/quarantine.sh
new file mode 100644
index 00000000..e3c6932c
--- /dev/null
+++ b/test/unit/quarantine.sh
@@ -0,0 +1,8 @@
+#!/bin/sh
+
+# Keep in sync with definition in quarantine.c.
+export QUARANTINE_SIZE=8192
+
+if [ "x${enable_fill}" = "x1" ] ; then
+  export MALLOC_CONF="abort:false,junk:true,redzone:true,quarantine:${QUARANTINE_SIZE}"
+fi
diff --git a/test/unit/zero.c b/test/unit/zero.c
index 30ebe37a..573993a2 100644
--- a/test/unit/zero.c
+++ b/test/unit/zero.c
@@ -1,10 +1,5 @@
 #include "test/jemalloc_test.h"
 
-#ifdef JEMALLOC_FILL
-const char *malloc_conf =
-    "abort:false,junk:false,zero:true,redzone:false,quarantine:0";
-#endif
-
 static void
 test_zero(size_t sz_min, size_t sz_max)
 {
diff --git a/test/unit/zero.sh b/test/unit/zero.sh
new file mode 100644
index 00000000..24488f0f
--- /dev/null
+++ b/test/unit/zero.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_fill}" = "x1" ] ; then
+  export MALLOC_CONF="abort:false,junk:false,zero:true,redzone:false,quarantine:0"
+fi

From 44e50041dc89c3aed4d03e231fd8ce6cb061f982 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 31 Jan 2017 16:44:57 -0800
Subject: [PATCH 0658/2608] CI: Run --enable-debug builds on windows

This will hopefully catch some windows-specific bugs.
---
 .appveyor.yml | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index ddd5c571..510815dc 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -12,6 +12,20 @@ environment:
     CPU: x86_64
   - MSYSTEM: MINGW32
     CPU: i686
+  - MSYSTEM: MINGW64
+    CPU: x86_64
+    MSVC: amd64
+    CONFIG_FLAGS: --enable-debug
+  - MSYSTEM: MINGW32
+    CPU: i686
+    MSVC: x86
+    CONFIG_FLAGS: --enable-debug
+  - MSYSTEM: MINGW64
+    CPU: x86_64
+    CONFIG_FLAGS: --enable-debug
+  - MSYSTEM: MINGW32
+    CPU: i686
+    CONFIG_FLAGS: --enable-debug
 
 install:
   - set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH%
@@ -21,7 +35,7 @@ install:
 
 build_script:
   - bash -c "autoconf"
-  - bash -c "./configure"
+  - bash -c "./configure $CONFIG_FLAGS"
   - mingw32-make -j3
   - file lib/jemalloc.dll
   - mingw32-make -j3 tests

From 7034e6baa10163b3c6d7866562c0b8bd4d80904a Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 21 Feb 2017 20:52:44 -0800
Subject: [PATCH 0659/2608] Enable mutex witnesses even when !isthreaded.

This fixes interactions with witness_assert_depth[_to_rank](), which was
added in dad74bd3c811ca2b1af1fd57b28f2456da5ba08b (Convert
witness_assert_lockless() to witness_assert_lock_depth().).
---
 include/jemalloc/internal/mutex.h | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index b442d2d4..2b4b1c31 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -85,8 +85,8 @@ JEMALLOC_INLINE void
 malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
 
+	witness_assert_not_owner(tsdn, &mutex->witness);
 	if (isthreaded) {
-		witness_assert_not_owner(tsdn, &mutex->witness);
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 		AcquireSRWLockExclusive(&mutex->lock);
@@ -100,16 +100,16 @@ malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex)
 #else
 		pthread_mutex_lock(&mutex->lock);
 #endif
-		witness_lock(tsdn, &mutex->witness);
 	}
+	witness_lock(tsdn, &mutex->witness);
 }
 
 JEMALLOC_INLINE void
 malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
 
+	witness_unlock(tsdn, &mutex->witness);
 	if (isthreaded) {
-		witness_unlock(tsdn, &mutex->witness);
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 		ReleaseSRWLockExclusive(&mutex->lock);
@@ -130,16 +130,14 @@ JEMALLOC_INLINE void
 malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
 
-	if (isthreaded)
-		witness_assert_owner(tsdn, &mutex->witness);
+	witness_assert_owner(tsdn, &mutex->witness);
 }
 
 JEMALLOC_INLINE void
 malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex)
 {
 
-	if (isthreaded)
-		witness_assert_not_owner(tsdn, &mutex->witness);
+	witness_assert_not_owner(tsdn, &mutex->witness);
 }
 #endif
 

From f56cb9a68e9cc95d23af0809ab4cf3e288c7e448 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 21 Jan 2017 15:12:03 -0800
Subject: [PATCH 0660/2608] Add witness_assert_depth[_to_rank]().

This makes it possible to make lock state assertions about precisely
which locks are held.
---
 include/jemalloc/internal/private_symbols.txt |  6 +-
 include/jemalloc/internal/witness.h           | 40 ++++++--
 src/arena.c                                   |  4 +-
 src/huge.c                                    |  8 +-
 src/jemalloc.c                                | 74 +++++++--------
 src/witness.c                                 | 21 ++--
 test/unit/witness.c                           | 95 +++++++++++--------
 7 files changed, 141 insertions(+), 107 deletions(-)

diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 6111eac8..8a9e32fc 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -615,14 +615,16 @@ valgrind_freelike_block
 valgrind_make_mem_defined
 valgrind_make_mem_noaccess
 valgrind_make_mem_undefined
-witness_assert_lock_depth
+witness_assert_depth
+witness_assert_depth_to_rank
+witness_assert_lockless
 witness_assert_not_owner
 witness_assert_owner
+witness_depth_error
 witness_fork_cleanup
 witness_init
 witness_lock
 witness_lock_error
-witness_lock_depth_error
 witness_not_owner_error
 witness_owner
 witness_owner_error
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index e64e56eb..b89d12a7 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -12,6 +12,8 @@ typedef int witness_comp_t (const witness_t *, const witness_t *);
  */
 #define	WITNESS_RANK_OMIT		0U
 
+#define WITNESS_RANK_MIN		1U
+
 #define	WITNESS_RANK_INIT		1U
 #define	WITNESS_RANK_CTL		1U
 #define WITNESS_RANK_TCACHES		2U
@@ -92,12 +94,12 @@ extern witness_not_owner_error_t *witness_not_owner_error;
 void	witness_not_owner_error(const witness_t *witness);
 #endif
 #ifdef JEMALLOC_JET
-typedef void (witness_lock_depth_error_t)(const witness_list_t *,
-    unsigned depth);
-extern witness_lock_depth_error_t *witness_lock_depth_error;
+typedef void (witness_depth_error_t)(const witness_list_t *,
+    witness_rank_t rank_inclusive, unsigned depth);
+extern witness_depth_error_t *witness_depth_error;
 #else
-void	witness_lock_depth_error(const witness_list_t *witnesses,
-    unsigned depth);
+void	witness_depth_error(const witness_list_t *witnesses,
+    witness_rank_t rank_inclusive, unsigned depth);
 #endif
 
 void	witnesses_cleanup(tsd_t *tsd);
@@ -114,7 +116,10 @@ void	witness_postfork_child(tsd_t *tsd);
 bool	witness_owner(tsd_t *tsd, const witness_t *witness);
 void	witness_assert_owner(tsdn_t *tsdn, const witness_t *witness);
 void	witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness);
-void	witness_assert_lock_depth(tsdn_t *tsdn, unsigned depth);
+void witness_assert_depth_to_rank(tsdn_t *tsdn, witness_rank_t rank_inclusive,
+    unsigned depth);
+void witness_assert_depth(tsdn_t *tsdn, unsigned depth);
+void	witness_assert_lockless(tsdn_t *tsdn);
 void	witness_lock(tsdn_t *tsdn, witness_t *witness);
 void	witness_unlock(tsdn_t *tsdn, witness_t *witness);
 #endif
@@ -126,6 +131,8 @@ witness_owner(tsd_t *tsd, const witness_t *witness)
 	witness_list_t *witnesses;
 	witness_t *w;
 
+	cassert(config_debug);
+
 	witnesses = tsd_witnessesp_get(tsd);
 	ql_foreach(w, witnesses, link) {
 		if (w == witness)
@@ -178,8 +185,8 @@ witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness)
 }
 
 JEMALLOC_INLINE void
-witness_assert_lock_depth(tsdn_t *tsdn, unsigned depth)
-{
+witness_assert_depth_to_rank(tsdn_t *tsdn, witness_rank_t rank_inclusive,
+    unsigned depth) {
 	tsd_t *tsd;
 	unsigned d;
 	witness_list_t *witnesses;
@@ -196,12 +203,25 @@ witness_assert_lock_depth(tsdn_t *tsdn, unsigned depth)
 	witnesses = tsd_witnessesp_get(tsd);
 	w = ql_last(witnesses, link);
 	if (w != NULL) {
-		ql_foreach(w, witnesses, link) {
+		ql_reverse_foreach(w, witnesses, link) {
+			if (w->rank < rank_inclusive) {
+				break;
+			}
 			d++;
 		}
 	}
 	if (d != depth)
-		witness_lock_depth_error(witnesses, depth);
+		witness_depth_error(witnesses, rank_inclusive, depth);
+}
+
+JEMALLOC_INLINE void
+witness_assert_depth(tsdn_t *tsdn, unsigned depth) {
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_MIN, depth);
+}
+
+JEMALLOC_INLINE void
+witness_assert_lockless(tsdn_t *tsdn) {
+	witness_assert_depth(tsdn, 0);
 }
 
 JEMALLOC_INLINE void
diff --git a/src/arena.c b/src/arena.c
index 193a4a24..c3d2622a 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -591,7 +591,7 @@ arena_chunk_alloc_internal_hard(tsdn_t *tsdn, arena_t *arena,
 	size_t sn;
 
 	malloc_mutex_unlock(tsdn, &arena->lock);
-	witness_assert_lock_depth(tsdn, 0); /* prof_gdump() requirement. */
+	witness_assert_lockless(tsdn); /* prof_gdump() requirement. */
 
 	chunk = (arena_chunk_t *)chunk_alloc_wrapper(tsdn, arena, chunk_hooks,
 	    NULL, chunksize, chunksize, &sn, zero, commit);
@@ -633,7 +633,7 @@ arena_chunk_alloc_internal(tsdn_t *tsdn, arena_t *arena, bool *zero,
 	size_t sn;
 
 	/* prof_gdump() requirement. */
-	witness_assert_lock_depth(tsdn, 1);
+	witness_assert_depth(tsdn, 1);
 	malloc_mutex_assert_owner(tsdn, &arena->lock);
 
 	chunk = chunk_alloc_cache(tsdn, arena, &chunk_hooks, NULL, chunksize,
diff --git a/src/huge.c b/src/huge.c
index 9a91bed7..f712fd84 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -62,7 +62,7 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	/* Allocate one or more contiguous chunks for this request. */
 
 	assert(!tsdn_null(tsdn) || arena != NULL);
-	witness_assert_lock_depth(tsdn, 0); /* prof_gdump() requirement. */
+	witness_assert_lockless(tsdn); /* prof_gdump() requirement. */
 
 	ausize = sa2u(usize, alignment);
 	if (unlikely(ausize == 0 || ausize > HUGE_MAXCLASS))
@@ -149,7 +149,7 @@ huge_ralloc_no_move_similar(tsdn_t *tsdn, void *ptr, size_t oldsize,
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 	bool pre_zeroed, post_zeroed, gdump;
 
-	witness_assert_lock_depth(tsdn, 0); /* prof_gdump() requirement. */
+	witness_assert_lockless(tsdn); /* prof_gdump() requirement. */
 
 	/* Increase usize to incorporate extra. */
 	for (usize = usize_min; usize < usize_max && (usize_next = s2u(usize+1))
@@ -223,7 +223,7 @@ huge_ralloc_no_move_shrink(tsdn_t *tsdn, void *ptr, size_t oldsize,
 	chunk_hooks = chunk_hooks_get(tsdn, arena);
 
 	assert(oldsize > usize);
-	witness_assert_lock_depth(tsdn, 0); /* prof_gdump() requirement. */
+	witness_assert_lockless(tsdn); /* prof_gdump() requirement. */
 
 	/* Split excess chunks. */
 	cdiff = CHUNK_CEILING(oldsize) - CHUNK_CEILING(usize);
@@ -278,7 +278,7 @@ huge_ralloc_no_move_expand(tsdn_t *tsdn, void *ptr, size_t oldsize,
 	is_zeroed_subchunk = extent_node_zeroed_get(node);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
-	witness_assert_lock_depth(tsdn, 0); /* prof_gdump() requirement. */
+	witness_assert_lockless(tsdn); /* prof_gdump() requirement. */
 
 	/*
 	 * Use is_zeroed_chunk to detect whether the trailing memory is zeroed,
diff --git a/src/jemalloc.c b/src/jemalloc.c
index a376e143..029fe525 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1582,7 +1582,7 @@ ialloc_body(size_t size, bool zero, tsdn_t **tsdn, size_t *usize,
 
 	tsd = tsd_fetch();
 	*tsdn = tsd_tsdn(tsd);
-	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
+	witness_assert_lockless(tsd_tsdn(tsd));
 
 	ind = size2index(size);
 	if (unlikely(ind >= NSIZES))
@@ -1620,7 +1620,7 @@ ialloc_post_check(void *ret, tsdn_t *tsdn, size_t usize, const char *func,
 		assert(usize == isalloc(tsdn, ret, config_prof));
 		*tsd_thread_allocatedp_get(tsdn_tsd(tsdn)) += usize;
 	}
-	witness_assert_lock_depth(tsdn, 0);
+	witness_assert_lockless(tsdn);
 }
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
@@ -1705,7 +1705,7 @@ imemalign(void **memptr, size_t alignment, size_t size, size_t min_alignment)
 		goto label_oom;
 	}
 	tsd = tsd_fetch();
-	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	if (size == 0)
 		size = 1;
 
@@ -1746,7 +1746,7 @@ label_return:
 	UTRACE(0, size, result);
 	JEMALLOC_VALGRIND_MALLOC(result != NULL, tsd_tsdn(tsd), result, usize,
 	    false);
-	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	return (ret);
 label_oom:
 	assert(result == NULL);
@@ -1756,7 +1756,7 @@ label_oom:
 		abort();
 	}
 	ret = ENOMEM;
-	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	goto label_return;
 }
 
@@ -1874,7 +1874,7 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path)
 	size_t usize;
 	UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);
 
-	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
+	witness_assert_lockless(tsd_tsdn(tsd));
 
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
@@ -1902,7 +1902,7 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path)
 {
 	UNUSED size_t rzsize JEMALLOC_CC_SILENCE_INIT(0);
 
-	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
+	witness_assert_lockless(tsd_tsdn(tsd));
 
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
@@ -1948,7 +1948,7 @@ je_realloc(void *ptr, size_t size)
 		malloc_thread_init();
 		tsd = tsd_fetch();
 
-		witness_assert_lock_depth(tsd_tsdn(tsd), 0);
+		witness_assert_lockless(tsd_tsdn(tsd));
 
 		old_usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
 		if (config_valgrind && unlikely(in_valgrind)) {
@@ -1995,7 +1995,7 @@ je_realloc(void *ptr, size_t size)
 	UTRACE(ptr, size, ret);
 	JEMALLOC_VALGRIND_REALLOC(maybe, tsdn, ret, usize, maybe, ptr,
 	    old_usize, old_rzsize, maybe, false);
-	witness_assert_lock_depth(tsdn, 0);
+	witness_assert_lockless(tsdn);
 	return (ret);
 }
 
@@ -2006,12 +2006,12 @@ je_free(void *ptr)
 	UTRACE(ptr, 0, 0);
 	if (likely(ptr != NULL)) {
 		tsd_t *tsd = tsd_fetch();
-		witness_assert_lock_depth(tsd_tsdn(tsd), 0);
+		witness_assert_lockless(tsd_tsdn(tsd));
 		if (likely(!malloc_slow))
 			ifree(tsd, ptr, tcache_get(tsd, false), false);
 		else
 			ifree(tsd, ptr, tcache_get(tsd, false), true);
-		witness_assert_lock_depth(tsd_tsdn(tsd), 0);
+		witness_assert_lockless(tsd_tsdn(tsd));
 	}
 }
 
@@ -2240,7 +2240,7 @@ imallocx_body(size_t size, int flags, tsdn_t **tsdn, size_t *usize,
 
 	tsd = tsd_fetch();
 	*tsdn = tsd_tsdn(tsd);
-	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
+	witness_assert_lockless(tsd_tsdn(tsd));
 
 	if (likely(flags == 0)) {
 		szind_t ind = size2index(size);
@@ -2375,7 +2375,7 @@ je_rallocx(void *ptr, size_t size, int flags)
 	assert(malloc_initialized() || IS_INITIALIZER);
 	malloc_thread_init();
 	tsd = tsd_fetch();
-	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
+	witness_assert_lockless(tsd_tsdn(tsd));
 
 	if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) {
 		unsigned arena_ind = MALLOCX_ARENA_GET(flags);
@@ -2422,7 +2422,7 @@ je_rallocx(void *ptr, size_t size, int flags)
 	UTRACE(ptr, size, p);
 	JEMALLOC_VALGRIND_REALLOC(maybe, tsd_tsdn(tsd), p, usize, no, ptr,
 	    old_usize, old_rzsize, no, zero);
-	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	return (p);
 label_oom:
 	if (config_xmalloc && unlikely(opt_xmalloc)) {
@@ -2430,7 +2430,7 @@ label_oom:
 		abort();
 	}
 	UTRACE(ptr, size, 0);
-	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	return (NULL);
 }
 
@@ -2526,7 +2526,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
 	assert(malloc_initialized() || IS_INITIALIZER);
 	malloc_thread_init();
 	tsd = tsd_fetch();
-	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
+	witness_assert_lockless(tsd_tsdn(tsd));
 
 	old_usize = isalloc(tsd_tsdn(tsd), ptr, config_prof);
 
@@ -2567,7 +2567,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags)
 	    old_usize, old_rzsize, no, zero);
 label_not_resized:
 	UTRACE(ptr, size, ptr);
-	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	return (usize);
 }
 
@@ -2582,14 +2582,14 @@ je_sallocx(const void *ptr, int flags)
 	malloc_thread_init();
 
 	tsdn = tsdn_fetch();
-	witness_assert_lock_depth(tsdn, 0);
+	witness_assert_lockless(tsdn);
 
 	if (config_ivsalloc)
 		usize = ivsalloc(tsdn, ptr, config_prof);
 	else
 		usize = isalloc(tsdn, ptr, config_prof);
 
-	witness_assert_lock_depth(tsdn, 0);
+	witness_assert_lockless(tsdn);
 	return (usize);
 }
 
@@ -2603,7 +2603,7 @@ je_dallocx(void *ptr, int flags)
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	tsd = tsd_fetch();
-	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
 		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE)
 			tcache = NULL;
@@ -2617,7 +2617,7 @@ je_dallocx(void *ptr, int flags)
 		ifree(tsd, ptr, tcache, false);
 	else
 		ifree(tsd, ptr, tcache, true);
-	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
+	witness_assert_lockless(tsd_tsdn(tsd));
 }
 
 JEMALLOC_ALWAYS_INLINE_C size_t
@@ -2625,13 +2625,13 @@ inallocx(tsdn_t *tsdn, size_t size, int flags)
 {
 	size_t usize;
 
-	witness_assert_lock_depth(tsdn, 0);
+	witness_assert_lockless(tsdn);
 
 	if (likely((flags & MALLOCX_LG_ALIGN_MASK) == 0))
 		usize = s2u(size);
 	else
 		usize = sa2u(size, MALLOCX_ALIGN_GET_SPECIFIED(flags));
-	witness_assert_lock_depth(tsdn, 0);
+	witness_assert_lockless(tsdn);
 	return (usize);
 }
 
@@ -2648,7 +2648,7 @@ je_sdallocx(void *ptr, size_t size, int flags)
 	usize = inallocx(tsd_tsdn(tsd), size, flags);
 	assert(usize == isalloc(tsd_tsdn(tsd), ptr, config_prof));
 
-	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
 		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE)
 			tcache = NULL;
@@ -2662,7 +2662,7 @@ je_sdallocx(void *ptr, size_t size, int flags)
 		isfree(tsd, ptr, usize, tcache, false);
 	else
 		isfree(tsd, ptr, usize, tcache, true);
-	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
+	witness_assert_lockless(tsd_tsdn(tsd));
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
@@ -2678,13 +2678,13 @@ je_nallocx(size_t size, int flags)
 		return (0);
 
 	tsdn = tsdn_fetch();
-	witness_assert_lock_depth(tsdn, 0);
+	witness_assert_lockless(tsdn);
 
 	usize = inallocx(tsdn, size, flags);
 	if (unlikely(usize > HUGE_MAXCLASS))
 		return (0);
 
-	witness_assert_lock_depth(tsdn, 0);
+	witness_assert_lockless(tsdn);
 	return (usize);
 }
 
@@ -2699,9 +2699,9 @@ je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
 		return (EAGAIN);
 
 	tsd = tsd_fetch();
-	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	ret = ctl_byname(tsd, name, oldp, oldlenp, newp, newlen);
-	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	return (ret);
 }
 
@@ -2715,9 +2715,9 @@ je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp)
 		return (EAGAIN);
 
 	tsdn = tsdn_fetch();
-	witness_assert_lock_depth(tsdn, 0);
+	witness_assert_lockless(tsdn);
 	ret = ctl_nametomib(tsdn, name, mibp, miblenp);
-	witness_assert_lock_depth(tsdn, 0);
+	witness_assert_lockless(tsdn);
 	return (ret);
 }
 
@@ -2732,9 +2732,9 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 		return (EAGAIN);
 
 	tsd = tsd_fetch();
-	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	ret = ctl_bymib(tsd, mib, miblen, oldp, oldlenp, newp, newlen);
-	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
+	witness_assert_lockless(tsd_tsdn(tsd));
 	return (ret);
 }
 
@@ -2745,9 +2745,9 @@ je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	tsdn_t *tsdn;
 
 	tsdn = tsdn_fetch();
-	witness_assert_lock_depth(tsdn, 0);
+	witness_assert_lockless(tsdn);
 	stats_print(write_cb, cbopaque, opts);
-	witness_assert_lock_depth(tsdn, 0);
+	witness_assert_lockless(tsdn);
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
@@ -2760,14 +2760,14 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr)
 	malloc_thread_init();
 
 	tsdn = tsdn_fetch();
-	witness_assert_lock_depth(tsdn, 0);
+	witness_assert_lockless(tsdn);
 
 	if (config_ivsalloc)
 		ret = ivsalloc(tsdn, ptr, config_prof);
 	else
 		ret = (ptr == NULL) ? 0 : isalloc(tsdn, ptr, config_prof);
 
-	witness_assert_lock_depth(tsdn, 0);
+	witness_assert_lockless(tsdn);
 	return (ret);
 }
 
diff --git a/src/witness.c b/src/witness.c
index aaea88d4..c3a65f7c 100644
--- a/src/witness.c
+++ b/src/witness.c
@@ -71,16 +71,16 @@ witness_not_owner_error_t *witness_not_owner_error =
 #endif
 
 #ifdef JEMALLOC_JET
-#undef witness_lock_depth_error
-#define	witness_lock_depth_error JEMALLOC_N(n_witness_lock_depth_error)
+#undef witness_depth_error
+#define witness_depth_error JEMALLOC_N(n_witness_depth_error)
 #endif
 void
-witness_lock_depth_error(const witness_list_t *witnesses, unsigned depth)
-{
+witness_depth_error(const witness_list_t *witnesses,
+    witness_rank_t rank_inclusive, unsigned depth) {
 	witness_t *w;
 
-	malloc_printf("<jemalloc>: Should own %u lock%s:", depth, (depth != 1) ?
-	    "s" : "");
+	malloc_printf("<jemalloc>: Should own %u lock%s of rank >= %u:", depth,
+	    (depth != 1) ?  "s" : "", rank_inclusive);
 	ql_foreach(w, witnesses, link) {
 		malloc_printf(" %s(%u)", w->name, w->rank);
 	}
@@ -88,17 +88,16 @@ witness_lock_depth_error(const witness_list_t *witnesses, unsigned depth)
 	abort();
 }
 #ifdef JEMALLOC_JET
-#undef witness_lock_depth_error
-#define	witness_lock_depth_error JEMALLOC_N(witness_lock_depth_error)
-witness_lock_depth_error_t *witness_lock_depth_error =
-    JEMALLOC_N(n_witness_lock_depth_error);
+#undef witness_depth_error
+#define witness_depth_error JEMALLOC_N(witness_depth_error)
+witness_depth_error_t *witness_depth_error = JEMALLOC_N(n_witness_depth_error);
 #endif
 
 void
 witnesses_cleanup(tsd_t *tsd)
 {
 
-	witness_assert_lock_depth(tsd_tsdn(tsd), 0);
+	witness_assert_lockless(tsd_tsdn(tsd));
 
 	/* Do nothing. */
 }
diff --git a/test/unit/witness.c b/test/unit/witness.c
index 9d4a1718..8b994136 100644
--- a/test/unit/witness.c
+++ b/test/unit/witness.c
@@ -3,12 +3,12 @@
 static witness_lock_error_t *witness_lock_error_orig;
 static witness_owner_error_t *witness_owner_error_orig;
 static witness_not_owner_error_t *witness_not_owner_error_orig;
-static witness_lock_depth_error_t *witness_lock_depth_error_orig;
+static witness_depth_error_t *witness_depth_error_orig;
 
 static bool saw_lock_error;
 static bool saw_owner_error;
 static bool saw_not_owner_error;
-static bool saw_lock_depth_error;
+static bool saw_depth_error;
 
 static void
 witness_lock_error_intercept(const witness_list_t *witnesses,
@@ -33,11 +33,9 @@ witness_not_owner_error_intercept(const witness_t *witness)
 }
 
 static void
-witness_lock_depth_error_intercept(const witness_list_t *witnesses,
-    unsigned depth)
-{
-
-	saw_lock_depth_error = true;
+witness_depth_error_intercept(const witness_list_t *witnesses,
+    witness_rank_t rank_inclusive, unsigned depth) {
+	saw_depth_error = true;
 }
 
 static int
@@ -67,25 +65,37 @@ TEST_BEGIN(test_witness)
 
 	tsdn = tsdn_fetch();
 
-	witness_assert_lock_depth(tsdn, 0);
+	witness_assert_lockless(tsdn);
+	witness_assert_depth(tsdn, 0);
+	witness_assert_depth_to_rank(tsdn, (witness_rank_t)1U, 0);
 
 	witness_init(&a, "a", 1, NULL);
 	witness_assert_not_owner(tsdn, &a);
 	witness_lock(tsdn, &a);
 	witness_assert_owner(tsdn, &a);
-	witness_assert_lock_depth(tsdn, 1);
+	witness_assert_depth(tsdn, 1);
+	witness_assert_depth_to_rank(tsdn, (witness_rank_t)1U, 1);
+	witness_assert_depth_to_rank(tsdn, (witness_rank_t)2U, 0);
 
 	witness_init(&b, "b", 2, NULL);
 	witness_assert_not_owner(tsdn, &b);
 	witness_lock(tsdn, &b);
 	witness_assert_owner(tsdn, &b);
-	witness_assert_lock_depth(tsdn, 2);
+	witness_assert_depth(tsdn, 2);
+	witness_assert_depth_to_rank(tsdn, (witness_rank_t)1U, 2);
+	witness_assert_depth_to_rank(tsdn, (witness_rank_t)2U, 1);
+	witness_assert_depth_to_rank(tsdn, (witness_rank_t)3U, 0);
 
 	witness_unlock(tsdn, &a);
-	witness_assert_lock_depth(tsdn, 1);
+	witness_assert_depth(tsdn, 1);
+	witness_assert_depth_to_rank(tsdn, (witness_rank_t)1U, 1);
+	witness_assert_depth_to_rank(tsdn, (witness_rank_t)2U, 1);
+	witness_assert_depth_to_rank(tsdn, (witness_rank_t)3U, 0);
 	witness_unlock(tsdn, &b);
 
-	witness_assert_lock_depth(tsdn, 0);
+	witness_assert_lockless(tsdn);
+	witness_assert_depth(tsdn, 0);
+	witness_assert_depth_to_rank(tsdn, (witness_rank_t)1U, 0);
 }
 TEST_END
 
@@ -98,21 +108,21 @@ TEST_BEGIN(test_witness_comp)
 
 	tsdn = tsdn_fetch();
 
-	witness_assert_lock_depth(tsdn, 0);
+	witness_assert_lockless(tsdn);
 
 	witness_init(&a, "a", 1, witness_comp);
 	witness_assert_not_owner(tsdn, &a);
 	witness_lock(tsdn, &a);
 	witness_assert_owner(tsdn, &a);
-	witness_assert_lock_depth(tsdn, 1);
+	witness_assert_depth(tsdn, 1);
 
 	witness_init(&b, "b", 1, witness_comp);
 	witness_assert_not_owner(tsdn, &b);
 	witness_lock(tsdn, &b);
 	witness_assert_owner(tsdn, &b);
-	witness_assert_lock_depth(tsdn, 2);
+	witness_assert_depth(tsdn, 2);
 	witness_unlock(tsdn, &b);
-	witness_assert_lock_depth(tsdn, 1);
+	witness_assert_depth(tsdn, 1);
 
 	witness_lock_error_orig = witness_lock_error;
 	witness_lock_error = witness_lock_error_intercept;
@@ -124,7 +134,7 @@ TEST_BEGIN(test_witness_comp)
 	witness_lock(tsdn, &c);
 	assert_true(saw_lock_error, "Expected witness lock error");
 	witness_unlock(tsdn, &c);
-	witness_assert_lock_depth(tsdn, 1);
+	witness_assert_depth(tsdn, 1);
 
 	saw_lock_error = false;
 
@@ -134,11 +144,11 @@ TEST_BEGIN(test_witness_comp)
 	witness_lock(tsdn, &d);
 	assert_true(saw_lock_error, "Expected witness lock error");
 	witness_unlock(tsdn, &d);
-	witness_assert_lock_depth(tsdn, 1);
+	witness_assert_depth(tsdn, 1);
 
 	witness_unlock(tsdn, &a);
 
-	witness_assert_lock_depth(tsdn, 0);
+	witness_assert_lockless(tsdn);
 
 	witness_lock_error = witness_lock_error_orig;
 }
@@ -157,22 +167,22 @@ TEST_BEGIN(test_witness_reversal)
 
 	tsdn = tsdn_fetch();
 
-	witness_assert_lock_depth(tsdn, 0);
+	witness_assert_lockless(tsdn);
 
 	witness_init(&a, "a", 1, NULL);
 	witness_init(&b, "b", 2, NULL);
 
 	witness_lock(tsdn, &b);
-	witness_assert_lock_depth(tsdn, 1);
+	witness_assert_depth(tsdn, 1);
 	assert_false(saw_lock_error, "Unexpected witness lock error");
 	witness_lock(tsdn, &a);
 	assert_true(saw_lock_error, "Expected witness lock error");
 
 	witness_unlock(tsdn, &a);
-	witness_assert_lock_depth(tsdn, 1);
+	witness_assert_depth(tsdn, 1);
 	witness_unlock(tsdn, &b);
 
-	witness_assert_lock_depth(tsdn, 0);
+	witness_assert_lockless(tsdn);
 
 	witness_lock_error = witness_lock_error_orig;
 }
@@ -195,7 +205,7 @@ TEST_BEGIN(test_witness_recursive)
 
 	tsdn = tsdn_fetch();
 
-	witness_assert_lock_depth(tsdn, 0);
+	witness_assert_lockless(tsdn);
 
 	witness_init(&a, "a", 1, NULL);
 
@@ -208,7 +218,7 @@ TEST_BEGIN(test_witness_recursive)
 
 	witness_unlock(tsdn, &a);
 
-	witness_assert_lock_depth(tsdn, 0);
+	witness_assert_lockless(tsdn);
 
 	witness_owner_error = witness_owner_error_orig;
 	witness_lock_error = witness_lock_error_orig;
@@ -229,7 +239,7 @@ TEST_BEGIN(test_witness_unlock_not_owned)
 
 	tsdn = tsdn_fetch();
 
-	witness_assert_lock_depth(tsdn, 0);
+	witness_assert_lockless(tsdn);
 
 	witness_init(&a, "a", 1, NULL);
 
@@ -237,41 +247,44 @@ TEST_BEGIN(test_witness_unlock_not_owned)
 	witness_unlock(tsdn, &a);
 	assert_true(saw_owner_error, "Expected owner error");
 
-	witness_assert_lock_depth(tsdn, 0);
+	witness_assert_lockless(tsdn);
 
 	witness_owner_error = witness_owner_error_orig;
 }
 TEST_END
 
-TEST_BEGIN(test_witness_lock_depth)
-{
+TEST_BEGIN(test_witness_depth) {
 	witness_t a;
 	tsdn_t *tsdn;
 
 	test_skip_if(!config_debug);
 
-	witness_lock_depth_error_orig = witness_lock_depth_error;
-	witness_lock_depth_error = witness_lock_depth_error_intercept;
-	saw_lock_depth_error = false;
+	witness_depth_error_orig = witness_depth_error;
+	witness_depth_error = witness_depth_error_intercept;
+	saw_depth_error = false;
 
 	tsdn = tsdn_fetch();
 
-	witness_assert_lock_depth(tsdn, 0);
+	witness_assert_lockless(tsdn);
+	witness_assert_depth(tsdn, 0);
 
 	witness_init(&a, "a", 1, NULL);
 
-	assert_false(saw_lock_depth_error, "Unexpected lock_depth error");
-	witness_assert_lock_depth(tsdn, 0);
+	assert_false(saw_depth_error, "Unexpected depth error");
+	witness_assert_lockless(tsdn);
+	witness_assert_depth(tsdn, 0);
 
 	witness_lock(tsdn, &a);
-	witness_assert_lock_depth(tsdn, 0);
-	assert_true(saw_lock_depth_error, "Expected lock_depth error");
+	witness_assert_lockless(tsdn);
+	witness_assert_depth(tsdn, 0);
+	assert_true(saw_depth_error, "Expected depth error");
 
 	witness_unlock(tsdn, &a);
 
-	witness_assert_lock_depth(tsdn, 0);
+	witness_assert_lockless(tsdn);
+	witness_assert_depth(tsdn, 0);
 
-	witness_lock_depth_error = witness_lock_depth_error_orig;
+	witness_depth_error = witness_depth_error_orig;
 }
 TEST_END
 
@@ -279,11 +292,11 @@ int
 main(void)
 {
 
-	return (test(
+	return test(
 	    test_witness,
 	    test_witness_comp,
 	    test_witness_reversal,
 	    test_witness_recursive,
 	    test_witness_unlock_not_owned,
-	    test_witness_lock_depth));
+	    test_witness_depth);
 }

From 08c24e7c1a034fc43353f47450f395a7272ccf02 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 22 Feb 2017 20:58:42 -0800
Subject: [PATCH 0661/2608] Relax witness assertions related to prof_gdump().

In some cases the prof machinery allocates (in order to modify the
bt2gctx hash table), and such operations are synchronized via
bt2gctx_mtx.  Rather than asserting that no locks are held on entry
into functions that may call prof_gdump(), make the weaker assertion
that no "core" locks are held.  The prof machinery enqueues dumps
triggered by prof_gdump() calls when bt2gctx_mtx is held, so this
weakened assertion avoids false failures in such cases.
---
 include/jemalloc/internal/witness.h |  8 ++++++++
 src/arena.c                         |  5 +++--
 src/huge.c                          | 12 ++++++++----
 3 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index b89d12a7..30d8c7e9 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -25,6 +25,14 @@ typedef int witness_comp_t (const witness_t *, const witness_t *);
 #define	WITNESS_RANK_PROF_TDATA		7U
 #define	WITNESS_RANK_PROF_GCTX		8U
 
+/*
+ * Used as an argument to witness_assert_depth_to_rank() in order to validate
+ * depth excluding non-core locks with lower ranks.  Since the rank argument to
+ * witness_assert_depth_to_rank() is inclusive rather than exclusive, this
+ * definition can have the same value as the minimally ranked core lock.
+ */
+#define WITNESS_RANK_CORE		9U
+
 #define	WITNESS_RANK_ARENA		9U
 #define	WITNESS_RANK_ARENA_CHUNKS	10U
 #define	WITNESS_RANK_ARENA_NODE_CACHE	11U
diff --git a/src/arena.c b/src/arena.c
index c3d2622a..6d178d21 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -591,7 +591,8 @@ arena_chunk_alloc_internal_hard(tsdn_t *tsdn, arena_t *arena,
 	size_t sn;
 
 	malloc_mutex_unlock(tsdn, &arena->lock);
-	witness_assert_lockless(tsdn); /* prof_gdump() requirement. */
+	/* prof_gdump() requirement. */
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 
 	chunk = (arena_chunk_t *)chunk_alloc_wrapper(tsdn, arena, chunk_hooks,
 	    NULL, chunksize, chunksize, &sn, zero, commit);
@@ -633,7 +634,7 @@ arena_chunk_alloc_internal(tsdn_t *tsdn, arena_t *arena, bool *zero,
 	size_t sn;
 
 	/* prof_gdump() requirement. */
-	witness_assert_depth(tsdn, 1);
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 1);
 	malloc_mutex_assert_owner(tsdn, &arena->lock);
 
 	chunk = chunk_alloc_cache(tsdn, arena, &chunk_hooks, NULL, chunksize,
diff --git a/src/huge.c b/src/huge.c
index f712fd84..0fbaa41a 100644
--- a/src/huge.c
+++ b/src/huge.c
@@ -62,7 +62,8 @@ huge_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	/* Allocate one or more contiguous chunks for this request. */
 
 	assert(!tsdn_null(tsdn) || arena != NULL);
-	witness_assert_lockless(tsdn); /* prof_gdump() requirement. */
+	/* prof_gdump() requirement. */
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 
 	ausize = sa2u(usize, alignment);
 	if (unlikely(ausize == 0 || ausize > HUGE_MAXCLASS))
@@ -149,7 +150,8 @@ huge_ralloc_no_move_similar(tsdn_t *tsdn, void *ptr, size_t oldsize,
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 	bool pre_zeroed, post_zeroed, gdump;
 
-	witness_assert_lockless(tsdn); /* prof_gdump() requirement. */
+	/* prof_gdump() requirement. */
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 
 	/* Increase usize to incorporate extra. */
 	for (usize = usize_min; usize < usize_max && (usize_next = s2u(usize+1))
@@ -223,7 +225,8 @@ huge_ralloc_no_move_shrink(tsdn_t *tsdn, void *ptr, size_t oldsize,
 	chunk_hooks = chunk_hooks_get(tsdn, arena);
 
 	assert(oldsize > usize);
-	witness_assert_lockless(tsdn); /* prof_gdump() requirement. */
+	/* prof_gdump() requirement. */
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 
 	/* Split excess chunks. */
 	cdiff = CHUNK_CEILING(oldsize) - CHUNK_CEILING(usize);
@@ -278,7 +281,8 @@ huge_ralloc_no_move_expand(tsdn_t *tsdn, void *ptr, size_t oldsize,
 	is_zeroed_subchunk = extent_node_zeroed_get(node);
 	malloc_mutex_unlock(tsdn, &arena->huge_mtx);
 
-	witness_assert_lockless(tsdn); /* prof_gdump() requirement. */
+	/* prof_gdump() requirement. */
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 
 	/*
 	 * Use is_zeroed_chunk to detect whether the trailing memory is zeroed,

From c2323e13a5eec70f554e532336a912a9cd78317a Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 23 Feb 2017 14:42:52 -0800
Subject: [PATCH 0662/2608] Get rid of witness in malloc_mutex_t when
 !(configured w/ debug).

We don't touch witness at all when config_debug == false.  Let's only pay the
memory cost in malloc_mutex_s when needed. Note that when !config_debug, we keep
the field in a union so that we don't have to do #ifdefs in multiple places.
---
 include/jemalloc/internal/mutex_structs.h | 30 +++++++++++++++++------
 include/jemalloc/internal/mutex_types.h   | 12 ++++-----
 include/jemalloc/internal/witness_types.h |  6 ++++-
 3 files changed, 34 insertions(+), 14 deletions(-)

diff --git a/include/jemalloc/internal/mutex_structs.h b/include/jemalloc/internal/mutex_structs.h
index 4a18a075..c34c1d47 100644
--- a/include/jemalloc/internal/mutex_structs.h
+++ b/include/jemalloc/internal/mutex_structs.h
@@ -2,23 +2,39 @@
 #define JEMALLOC_INTERNAL_MUTEX_STRUCTS_H
 
 struct malloc_mutex_s {
+	union {
+		struct {
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
-	SRWLOCK         	lock;
+			SRWLOCK         	lock;
 #  else
-	CRITICAL_SECTION	lock;
+			CRITICAL_SECTION	lock;
 #  endif
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
-	os_unfair_lock		lock;
+			os_unfair_lock		lock;
 #elif (defined(JEMALLOC_OSSPIN))
-	OSSpinLock		lock;
+			OSSpinLock		lock;
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
-	pthread_mutex_t		lock;
-	malloc_mutex_t		*postponed_next;
+			pthread_mutex_t		lock;
+			malloc_mutex_t		*postponed_next;
 #else
-	pthread_mutex_t		lock;
+			pthread_mutex_t		lock;
 #endif
+		};
+		/*
+		 * We only touch witness when configured w/ debug. However we
+		 * keep the field in a union when !debug so that we don't have
+		 * to pollute the code base with #ifdefs, while avoid paying the
+		 * memory cost.
+		 */
+#if !defined(JEMALLOC_DEBUG)
+		witness_t		witness;
+#endif
+	};
+
+#if defined(JEMALLOC_DEBUG)
 	witness_t		witness;
+#endif
 };
 
 #endif /* JEMALLOC_INTERNAL_MUTEX_STRUCTS_H */
diff --git a/include/jemalloc/internal/mutex_types.h b/include/jemalloc/internal/mutex_types.h
index 8c9f249d..b7e3a7a1 100644
--- a/include/jemalloc/internal/mutex_types.h
+++ b/include/jemalloc/internal/mutex_types.h
@@ -7,25 +7,25 @@ typedef struct malloc_mutex_s malloc_mutex_t;
 #  define MALLOC_MUTEX_INITIALIZER
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
 #  define MALLOC_MUTEX_INITIALIZER					\
-     {OS_UNFAIR_LOCK_INIT, WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
+     {{{OS_UNFAIR_LOCK_INIT}}, WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
 #elif (defined(JEMALLOC_OSSPIN))
 #  define MALLOC_MUTEX_INITIALIZER					\
-     {0, WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
+     {{{0}}, WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
 #  define MALLOC_MUTEX_INITIALIZER					\
-    {PTHREAD_MUTEX_INITIALIZER, NULL,					\
-     WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
+     {{{PTHREAD_MUTEX_INITIALIZER, NULL}},				\
+      WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
 #else
 #  if (defined(JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP) &&		\
        defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP))
 #    define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_ADAPTIVE_NP
 #    define MALLOC_MUTEX_INITIALIZER					\
-       {PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP,				\
+       {{{PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP}},			\
         WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
 #  else
 #    define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT
 #    define MALLOC_MUTEX_INITIALIZER					\
-       {PTHREAD_MUTEX_INITIALIZER,					\
+       {{{PTHREAD_MUTEX_INITIALIZER}},					\
         WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
 #  endif
 #endif
diff --git a/include/jemalloc/internal/witness_types.h b/include/jemalloc/internal/witness_types.h
index 0678b082..3efaad7e 100644
--- a/include/jemalloc/internal/witness_types.h
+++ b/include/jemalloc/internal/witness_types.h
@@ -55,6 +55,10 @@ typedef int witness_comp_t (const witness_t *, void *, const witness_t *,
 #define WITNESS_RANK_PROF_NEXT_THR_UID	WITNESS_RANK_LEAF
 #define WITNESS_RANK_PROF_THREAD_ACTIVE_INIT	WITNESS_RANK_LEAF
 
-#define WITNESS_INITIALIZER(name, rank) {name, rank, NULL, NULL, {NULL, NULL}}
+#if defined(JEMALLOC_DEBUG)
+#  define WITNESS_INITIALIZER(name, rank) {name, rank, NULL, NULL, {NULL, NULL}}
+#else
+#  define WITNESS_INITIALIZER(name, rank)
+#endif
 
 #endif /* JEMALLOC_INTERNAL_WITNESS_TYPES_H */

From adae7cfc4a2ac66c96b0dcc83b3837ac668fc44e Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 24 Feb 2017 09:45:33 -0800
Subject: [PATCH 0663/2608] Fix chunk_alloc_dss() regression.

Fix chunk_alloc_dss() to account for bytes that are not a multiple of
the chunk size.  This regression was introduced by
e2bcf037d445a84a71c7997670819ebd0a893b4a (Make dss operations
lockless.), which was first released in 4.3.0.
---
 src/chunk_dss.c | 45 +++++++++++++++++++++++++++------------------
 1 file changed, 27 insertions(+), 18 deletions(-)

diff --git a/src/chunk_dss.c b/src/chunk_dss.c
index ee3f8388..8c679395 100644
--- a/src/chunk_dss.c
+++ b/src/chunk_dss.c
@@ -115,8 +115,9 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 		 * malloc.
 		 */
 		while (true) {
-			void *ret, *cpad, *max_cur, *dss_next, *dss_prev;
-			size_t gap_size, cpad_size;
+			void *ret, *max_cur, *dss_next, *dss_prev;
+			void *gap_addr_chunk, *gap_addr_subchunk;
+			size_t gap_size_chunk, gap_size_subchunk;
 			intptr_t incr;
 
 			max_cur = chunk_dss_max_update(new_addr);
@@ -124,25 +125,32 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 				goto label_oom;
 
 			/*
-			 * Calculate how much padding is necessary to
-			 * chunk-align the end of the DSS.
-			 */
-			gap_size = (chunksize - CHUNK_ADDR2OFFSET(dss_max)) &
-			    chunksize_mask;
-			/*
-			 * Compute how much chunk-aligned pad space (if any) is
+			 * Compute how much chunk-aligned gap space (if any) is
 			 * necessary to satisfy alignment.  This space can be
 			 * recycled for later use.
 			 */
-			cpad = (void *)((uintptr_t)dss_max + gap_size);
-			ret = (void *)ALIGNMENT_CEILING((uintptr_t)dss_max,
-			    alignment);
-			cpad_size = (uintptr_t)ret - (uintptr_t)cpad;
+			gap_addr_chunk = (void *)(CHUNK_CEILING(
+			    (uintptr_t)max_cur));
+			ret = (void *)ALIGNMENT_CEILING(
+			    (uintptr_t)gap_addr_chunk, alignment);
+			gap_size_chunk = (uintptr_t)ret -
+			    (uintptr_t)gap_addr_chunk;
+			/*
+			 * Compute the address just past the end of the desired
+			 * allocation space.
+			 */
 			dss_next = (void *)((uintptr_t)ret + size);
-			if ((uintptr_t)ret < (uintptr_t)dss_max ||
-			    (uintptr_t)dss_next < (uintptr_t)dss_max)
+			if ((uintptr_t)ret < (uintptr_t)max_cur ||
+			    (uintptr_t)dss_next < (uintptr_t)max_cur)
 				goto label_oom; /* Wrap-around. */
-			incr = gap_size + cpad_size + size;
+			/* Compute the increment, including subchunk bytes. */
+			gap_addr_subchunk = max_cur;
+			gap_size_subchunk = (uintptr_t)ret -
+			    (uintptr_t)gap_addr_subchunk;
+			incr = gap_size_subchunk + size;
+
+			assert((uintptr_t)max_cur + incr == (uintptr_t)ret +
+			    size);
 
 			/*
 			 * Optimistically update dss_max, and roll back below if
@@ -157,11 +165,12 @@ chunk_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			dss_prev = chunk_dss_sbrk(incr);
 			if (dss_prev == max_cur) {
 				/* Success. */
-				if (cpad_size != 0) {
+				if (gap_size_chunk != 0) {
 					chunk_hooks_t chunk_hooks =
 					    CHUNK_HOOKS_INITIALIZER;
 					chunk_dalloc_wrapper(tsdn, arena,
-					    &chunk_hooks, cpad, cpad_size,
+					    &chunk_hooks, gap_addr_chunk,
+					    gap_size_chunk,
 					    arena_extent_sn_next(arena), false,
 					    true);
 				}

From 61d26425e53d74f31e9f2ef3a423bf730f832f68 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 24 Feb 2017 10:40:23 -0800
Subject: [PATCH 0664/2608] Fix JSON-mode output for !config_stats and/or
 !config_prof cases.

These bugs were introduced by b599b32280e1142856b0b96293a71e1684b1ccfb
(Add "J" (JSON) support to malloc_stats_print().), which was first
released in 4.3.0.

This resolves #615.
---
 src/stats.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index 1360f3bd..6b71158b 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -555,7 +555,7 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 
 static void
 stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
-    bool json, bool merged, bool unmerged)
+    bool json, bool more)
 {
 	const char *cpv;
 	bool bv;
@@ -907,11 +907,11 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    "\t\t\t]\n");
 
 		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t},\n");
+		    "\t\t}%s\n", (config_prof || more) ? "," : "");
 	}
 
 	/* prof. */
-	if (json) {
+	if (config_prof && json) {
 		malloc_cprintf(write_cb, cbopaque,
 		    "\t\t\"prof\": {\n");
 
@@ -937,8 +937,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    "\t\t\t\"lg_sample\": %zd\n", ssv);
 
 		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t}%s\n", (config_stats || merged || unmerged) ? "," :
-		    "");
+		    "\t\t}%s\n", more ? "," : "");
 	}
 }
 
@@ -1069,8 +1068,8 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	size_t u64sz;
 	bool json = false;
 	bool general = true;
-	bool merged = true;
-	bool unmerged = true;
+	bool merged = config_stats;
+	bool unmerged = config_stats;
 	bool bins = true;
 	bool large = true;
 	bool huge = true;
@@ -1137,8 +1136,10 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    "___ Begin jemalloc statistics ___\n");
 	}
 
-	if (general)
-		stats_general_print(write_cb, cbopaque, json, merged, unmerged);
+	if (general) {
+		bool more = (merged || unmerged);
+		stats_general_print(write_cb, cbopaque, json, more);
+	}
 	if (config_stats) {
 		stats_print_helper(write_cb, cbopaque, json, merged, unmerged,
 		    bins, large, huge);

From 54d2d697b21af7a5553d0c9de2e9174bfa0fc7a5 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 18 Jan 2017 01:01:19 -0800
Subject: [PATCH 0665/2608] Test JSON output of malloc_stats_print() and fix
 bugs.

Implement and test a JSON validation parser.  Use the parser to validate
JSON output from malloc_stats_print(), with a significant subset of
supported output options.

This resolves #583.
---
 Makefile.in             |    1 +
 src/stats.c             |   61 +--
 test/unit/stats_print.c | 1000 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 1035 insertions(+), 27 deletions(-)
 create mode 100644 test/unit/stats_print.c

diff --git a/Makefile.in b/Makefile.in
index c7053639..675e4cb6 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -186,6 +186,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/size_classes.c \
 	$(srcroot)test/unit/smoothstep.c \
 	$(srcroot)test/unit/stats.c \
+	$(srcroot)test/unit/stats_print.c \
 	$(srcroot)test/unit/ticker.c \
 	$(srcroot)test/unit/nstime.c \
 	$(srcroot)test/unit/tsd.c \
diff --git a/src/stats.c b/src/stats.c
index 6b71158b..8d579c7b 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -39,7 +39,7 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
     bool json, bool large, bool huge, unsigned i)
 {
 	size_t page;
-	bool config_tcache, in_gap, in_gap_prev;
+	bool in_gap, in_gap_prev;
 	unsigned nbins, j;
 
 	CTL_GET("arenas.page", &page, size_t);
@@ -49,7 +49,6 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		malloc_cprintf(write_cb, cbopaque,
 		    "\t\t\t\t\"bins\": [\n");
 	} else {
-		CTL_GET("config.tcache", &config_tcache, bool);
 		if (config_tcache) {
 			malloc_cprintf(write_cb, cbopaque,
 			    "bins:           size ind    allocated      nmalloc"
@@ -536,7 +535,7 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    "\t\t\t\t\t\"allocated\": %zu\n", metadata_allocated);
 
 		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t},\n");
+		    "\t\t\t\t}%s\n", (bins || large || huge) ? "," : "");
 	} else {
 		malloc_cprintf(write_cb, cbopaque,
 		    "metadata: mapped: %zu, allocated: %zu\n",
@@ -838,9 +837,11 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		malloc_cprintf(write_cb, cbopaque,
 		    "\t\t\t\"nbins\": %u,\n", nbins);
 
-		CTL_GET("arenas.nhbins", &uv, unsigned);
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\"nhbins\": %u,\n", uv);
+		if (config_tcache) {
+			CTL_GET("arenas.nhbins", &uv, unsigned);
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\"nhbins\": %u,\n", uv);
+		}
 
 		malloc_cprintf(write_cb, cbopaque,
 		    "\t\t\t\"bin\": [\n");
@@ -1022,31 +1023,37 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 				    narenas, bins, large, huge);
 				if (json) {
 					malloc_cprintf(write_cb, cbopaque,
-					    "\t\t\t}%s\n", (ninitialized > 1) ?
-					    "," : "");
+					    "\t\t\t}%s\n", unmerged ?  "," :
+					    "");
 				}
 			}
 
 			/* Unmerged stats. */
-			for (i = j = 0; i < narenas; i++) {
-				if (initialized[i]) {
-					if (json) {
-						j++;
-						malloc_cprintf(write_cb,
-						    cbopaque,
-						    "\t\t\t\"%u\": {\n", i);
-					} else {
-						malloc_cprintf(write_cb,
-						    cbopaque, "\narenas[%u]:\n",
-						    i);
-					}
-					stats_arena_print(write_cb, cbopaque,
-					    json, i, bins, large, huge);
-					if (json) {
-						malloc_cprintf(write_cb,
-						    cbopaque,
-						    "\t\t\t}%s\n", (j <
-						    ninitialized) ? "," : "");
+			if (unmerged) {
+				for (i = j = 0; i < narenas; i++) {
+					if (initialized[i]) {
+						if (json) {
+							j++;
+							malloc_cprintf(write_cb,
+							    cbopaque,
+							    "\t\t\t\"%u\": {\n",
+							    i);
+						} else {
+							malloc_cprintf(write_cb,
+							    cbopaque,
+							    "\narenas[%u]:\n",
+							    i);
+						}
+						stats_arena_print(write_cb,
+						    cbopaque, json, i, bins,
+						    large, huge);
+						if (json) {
+							malloc_cprintf(write_cb,
+							    cbopaque,
+							    "\t\t\t}%s\n", (j <
+							    ninitialized) ? ","
+							    : "");
+						}
 					}
 				}
 			}
diff --git a/test/unit/stats_print.c b/test/unit/stats_print.c
new file mode 100644
index 00000000..91cfdf2d
--- /dev/null
+++ b/test/unit/stats_print.c
@@ -0,0 +1,1000 @@
+#include "test/jemalloc_test.h"
+
+typedef enum {
+	TOKEN_TYPE_NONE,
+	TOKEN_TYPE_ERROR,
+	TOKEN_TYPE_EOI,
+	TOKEN_TYPE_NULL,
+	TOKEN_TYPE_FALSE,
+	TOKEN_TYPE_TRUE,
+	TOKEN_TYPE_LBRACKET,
+	TOKEN_TYPE_RBRACKET,
+	TOKEN_TYPE_LBRACE,
+	TOKEN_TYPE_RBRACE,
+	TOKEN_TYPE_COLON,
+	TOKEN_TYPE_COMMA,
+	TOKEN_TYPE_STRING,
+	TOKEN_TYPE_NUMBER
+} token_type_t;
+
+typedef struct parser_s parser_t;
+typedef struct {
+	parser_t	*parser;
+	token_type_t	token_type;
+	size_t		pos;
+	size_t		len;
+	size_t		line;
+	size_t		col;
+} token_t;
+
+struct parser_s {
+	bool verbose;
+	char	*buf; /* '\0'-terminated. */
+	size_t	len; /* Number of characters preceding '\0' in buf. */
+	size_t	pos;
+	size_t	line;
+	size_t	col;
+	token_t	token;
+};
+
+static void
+token_init(token_t *token, parser_t *parser, token_type_t token_type,
+    size_t pos, size_t len, size_t line, size_t col)
+{
+	token->parser = parser;
+	token->token_type = token_type;
+	token->pos = pos;
+	token->len = len;
+	token->line = line;
+	token->col = col;
+}
+
+static void
+token_error(token_t *token)
+{
+	if (!token->parser->verbose) {
+		return;
+	}
+	switch (token->token_type) {
+	case TOKEN_TYPE_NONE:
+		not_reached();
+	case TOKEN_TYPE_ERROR:
+		malloc_printf("%zu:%zu: Unexpected character in token: ",
+		    token->line, token->col);
+		break;
+	default:
+		malloc_printf("%zu:%zu: Unexpected token: ", token->line,
+		    token->col);
+		break;
+	}
+	write(STDERR_FILENO, &token->parser->buf[token->pos], token->len);
+	malloc_printf("\n");
+}
+
+static void
+parser_init(parser_t *parser, bool verbose)
+{
+	parser->verbose = verbose;
+	parser->buf = NULL;
+	parser->len = 0;
+	parser->pos = 0;
+	parser->line = 1;
+	parser->col = 0;
+}
+
+static void
+parser_fini(parser_t *parser)
+{
+	if (parser->buf != NULL) {
+		dallocx(parser->buf, MALLOCX_TCACHE_NONE);
+	}
+}
+
+static bool
+parser_append(parser_t *parser, const char *str)
+{
+	size_t len = strlen(str);
+	char *buf = (parser->buf == NULL) ? mallocx(len + 1,
+	    MALLOCX_TCACHE_NONE) : rallocx(parser->buf, parser->len + len + 1,
+	    MALLOCX_TCACHE_NONE);
+	if (buf == NULL) {
+		return true;
+	}
+	memcpy(&buf[parser->len], str, len + 1);
+	parser->buf = buf;
+	parser->len += len;
+	return false;
+}
+
+static bool
+parser_tokenize(parser_t *parser)
+{
+	enum {
+		STATE_START,
+		STATE_EOI,
+		STATE_N, STATE_NU, STATE_NUL, STATE_NULL,
+		STATE_F, STATE_FA, STATE_FAL, STATE_FALS, STATE_FALSE,
+		STATE_T, STATE_TR, STATE_TRU, STATE_TRUE,
+		STATE_LBRACKET,
+		STATE_RBRACKET,
+		STATE_LBRACE,
+		STATE_RBRACE,
+		STATE_COLON,
+		STATE_COMMA,
+		STATE_CHARS,
+		STATE_CHAR_ESCAPE,
+		STATE_CHAR_U, STATE_CHAR_UD, STATE_CHAR_UDD, STATE_CHAR_UDDD,
+		STATE_STRING,
+		STATE_MINUS,
+		STATE_LEADING_ZERO,
+		STATE_DIGITS,
+		STATE_DECIMAL,
+		STATE_FRAC_DIGITS,
+		STATE_EXP,
+		STATE_EXP_SIGN,
+		STATE_EXP_DIGITS,
+		STATE_ACCEPT
+	} state = STATE_START;
+	size_t token_pos, token_line, token_col;
+
+	assert_zu_le(parser->pos, parser->len,
+	    "Position is past end of buffer");
+
+	while (state != STATE_ACCEPT) {
+		char c = parser->buf[parser->pos];
+
+		switch (state) {
+		case STATE_START:
+			token_pos = parser->pos;
+			token_line = parser->line;
+			token_col = parser->col;
+			switch (c) {
+			case ' ': case '\b': case '\n': case '\r': case '\t':
+				break;
+			case '\0':
+				state = STATE_EOI;
+				break;
+			case 'n':
+				state = STATE_N;
+				break;
+			case 'f':
+				state = STATE_F;
+				break;
+			case 't':
+				state = STATE_T;
+				break;
+			case '[':
+				state = STATE_LBRACKET;
+				break;
+			case ']':
+				state = STATE_RBRACKET;
+				break;
+			case '{':
+				state = STATE_LBRACE;
+				break;
+			case '}':
+				state = STATE_RBRACE;
+				break;
+			case ':':
+				state = STATE_COLON;
+				break;
+			case ',':
+				state = STATE_COMMA;
+				break;
+			case '"':
+				state = STATE_CHARS;
+				break;
+			case '-':
+				state = STATE_MINUS;
+				break;
+			case '0':
+				state = STATE_LEADING_ZERO;
+				break;
+			case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9':
+				state = STATE_DIGITS;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_EOI:
+			token_init(&parser->token, parser,
+			    TOKEN_TYPE_EOI, token_pos, parser->pos -
+			    token_pos, token_line, token_col);
+			state = STATE_ACCEPT;
+			break;
+		case STATE_N:
+			switch (c) {
+			case 'u':
+				state = STATE_NU;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_NU:
+			switch (c) {
+			case 'l':
+				state = STATE_NUL;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_NUL:
+			switch (c) {
+			case 'l':
+				state = STATE_NULL;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_NULL:
+			switch (c) {
+			case ' ': case '\b': case '\n': case '\r': case '\t':
+			case '\0':
+			case '[': case ']': case '{': case '}': case ':':
+			case ',':
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			token_init(&parser->token, parser, TOKEN_TYPE_NULL,
+			    token_pos, parser->pos - token_pos, token_line,
+			    token_col);
+			state = STATE_ACCEPT;
+			break;
+		case STATE_F:
+			switch (c) {
+			case 'a':
+				state = STATE_FA;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_FA:
+			switch (c) {
+			case 'l':
+				state = STATE_FAL;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_FAL:
+			switch (c) {
+			case 's':
+				state = STATE_FALS;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_FALS:
+			switch (c) {
+			case 'e':
+				state = STATE_FALSE;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_FALSE:
+			switch (c) {
+			case ' ': case '\b': case '\n': case '\r': case '\t':
+			case '\0':
+			case '[': case ']': case '{': case '}': case ':':
+			case ',':
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			token_init(&parser->token, parser,
+			    TOKEN_TYPE_FALSE, token_pos, parser->pos -
+			    token_pos, token_line, token_col);
+			state = STATE_ACCEPT;
+			break;
+		case STATE_T:
+			switch (c) {
+			case 'r':
+				state = STATE_TR;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_TR:
+			switch (c) {
+			case 'u':
+				state = STATE_TRU;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_TRU:
+			switch (c) {
+			case 'e':
+				state = STATE_TRUE;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_TRUE:
+			switch (c) {
+			case ' ': case '\b': case '\n': case '\r': case '\t':
+			case '\0':
+			case '[': case ']': case '{': case '}': case ':':
+			case ',':
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			token_init(&parser->token, parser, TOKEN_TYPE_TRUE,
+			    token_pos, parser->pos - token_pos, token_line,
+			    token_col);
+			state = STATE_ACCEPT;
+			break;
+		case STATE_LBRACKET:
+			token_init(&parser->token, parser, TOKEN_TYPE_LBRACKET,
+			    token_pos, parser->pos - token_pos, token_line,
+			    token_col);
+			state = STATE_ACCEPT;
+			break;
+		case STATE_RBRACKET:
+			token_init(&parser->token, parser, TOKEN_TYPE_RBRACKET,
+			    token_pos, parser->pos - token_pos, token_line,
+			    token_col);
+			state = STATE_ACCEPT;
+			break;
+		case STATE_LBRACE:
+			token_init(&parser->token, parser, TOKEN_TYPE_LBRACE,
+			    token_pos, parser->pos - token_pos, token_line,
+			    token_col);
+			state = STATE_ACCEPT;
+			break;
+		case STATE_RBRACE:
+			token_init(&parser->token, parser, TOKEN_TYPE_RBRACE,
+			    token_pos, parser->pos - token_pos, token_line,
+			    token_col);
+			state = STATE_ACCEPT;
+			break;
+		case STATE_COLON:
+			token_init(&parser->token, parser, TOKEN_TYPE_COLON,
+			    token_pos, parser->pos - token_pos, token_line,
+			    token_col);
+			state = STATE_ACCEPT;
+			break;
+		case STATE_COMMA:
+			token_init(&parser->token, parser, TOKEN_TYPE_COMMA,
+			    token_pos, parser->pos - token_pos, token_line,
+			    token_col);
+			state = STATE_ACCEPT;
+			break;
+		case STATE_CHARS:
+			switch (c) {
+			case '\\':
+				state = STATE_CHAR_ESCAPE;
+				break;
+			case '"':
+				state = STATE_STRING;
+				break;
+			case 0x00: case 0x01: case 0x02: case 0x03: case 0x04:
+			case 0x05: case 0x06: case 0x07: case 0x08: case 0x09:
+			case 0x0a: case 0x0b: case 0x0c: case 0x0d: case 0x0e:
+			case 0x0f: case 0x10: case 0x11: case 0x12: case 0x13:
+			case 0x14: case 0x15: case 0x16: case 0x17: case 0x18:
+			case 0x19: case 0x1a: case 0x1b: case 0x1c: case 0x1d:
+			case 0x1e: case 0x1f:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			default:
+				break;
+			}
+			break;
+		case STATE_CHAR_ESCAPE:
+			switch (c) {
+			case '"': case '\\': case '/': case 'b': case 'n':
+			case 'r': case 't':
+				state = STATE_CHARS;
+				break;
+			case 'u':
+				state = STATE_CHAR_U;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_CHAR_U:
+			switch (c) {
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9':
+			case 'a': case 'b': case 'c': case 'd': case 'e':
+			case 'f':
+			case 'A': case 'B': case 'C': case 'D': case 'E':
+			case 'F':
+				state = STATE_CHAR_UD;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_CHAR_UD:
+			switch (c) {
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9':
+			case 'a': case 'b': case 'c': case 'd': case 'e':
+			case 'f':
+			case 'A': case 'B': case 'C': case 'D': case 'E':
+			case 'F':
+				state = STATE_CHAR_UDD;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_CHAR_UDD:
+			switch (c) {
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9':
+			case 'a': case 'b': case 'c': case 'd': case 'e':
+			case 'f':
+			case 'A': case 'B': case 'C': case 'D': case 'E':
+			case 'F':
+				state = STATE_CHAR_UDDD;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_CHAR_UDDD:
+			switch (c) {
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9':
+			case 'a': case 'b': case 'c': case 'd': case 'e':
+			case 'f':
+			case 'A': case 'B': case 'C': case 'D': case 'E':
+			case 'F':
+				state = STATE_CHARS;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_STRING:
+			token_init(&parser->token, parser, TOKEN_TYPE_STRING,
+			    token_pos, parser->pos - token_pos, token_line,
+			    token_col);
+			state = STATE_ACCEPT;
+			break;
+		case STATE_MINUS:
+			switch (c) {
+			case '0':
+				state = STATE_LEADING_ZERO;
+				break;
+			case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9':
+				state = STATE_DIGITS;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_LEADING_ZERO:
+			switch (c) {
+			case '.':
+				state = STATE_DECIMAL;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_NUMBER, token_pos, parser->pos -
+				    token_pos, token_line, token_col);
+				state = STATE_ACCEPT;
+				break;
+			}
+			break;
+		case STATE_DIGITS:
+			switch (c) {
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9':
+				break;
+			case '.':
+				state = STATE_DECIMAL;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_NUMBER, token_pos, parser->pos -
+				    token_pos, token_line, token_col);
+				state = STATE_ACCEPT;
+				break;
+			}
+			break;
+		case STATE_DECIMAL:
+			switch (c) {
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9':
+				state = STATE_FRAC_DIGITS;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_FRAC_DIGITS:
+			switch (c) {
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9':
+				break;
+			case 'e': case 'E':
+				state = STATE_EXP;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_NUMBER, token_pos, parser->pos -
+				    token_pos, token_line, token_col);
+				state = STATE_ACCEPT;
+				break;
+			}
+			break;
+		case STATE_EXP:
+			switch (c) {
+			case '-': case '+':
+				state = STATE_EXP_SIGN;
+				break;
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9':
+				state = STATE_EXP_DIGITS;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_EXP_SIGN:
+			switch (c) {
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9':
+				state = STATE_EXP_DIGITS;
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
+				    - token_pos, token_line, token_col);
+				return true;
+			}
+			break;
+		case STATE_EXP_DIGITS:
+			switch (c) {
+			case '0': case '1': case '2': case '3': case '4':
+			case '5': case '6': case '7': case '8': case '9':
+				break;
+			default:
+				token_init(&parser->token, parser,
+				    TOKEN_TYPE_NUMBER, token_pos, parser->pos -
+				    token_pos, token_line, token_col);
+				state = STATE_ACCEPT;
+				break;
+			}
+			break;
+		default:
+			not_reached();
+		}
+
+		if (state != STATE_ACCEPT) {
+			if (c == '\n') {
+				parser->line++;
+				parser->col = 0;
+			} else {
+				parser->col++;
+			}
+			parser->pos++;
+		}
+	}
+	return false;
+}
+
+static bool	parser_parse_array(parser_t *parser);
+static bool	parser_parse_object(parser_t *parser);
+
+static bool
+parser_parse_value(parser_t *parser)
+{
+	switch (parser->token.token_type) {
+	case TOKEN_TYPE_NULL:
+	case TOKEN_TYPE_FALSE:
+	case TOKEN_TYPE_TRUE:
+	case TOKEN_TYPE_STRING:
+	case TOKEN_TYPE_NUMBER:
+		return false;
+	case TOKEN_TYPE_LBRACE:
+		return parser_parse_object(parser);
+	case TOKEN_TYPE_LBRACKET:
+		return parser_parse_array(parser);
+	default:
+		return true;
+	}
+	not_reached();
+}
+
+static bool
+parser_parse_pair(parser_t *parser)
+{
+	assert_d_eq(parser->token.token_type, TOKEN_TYPE_STRING,
+	    "Pair should start with string");
+	if (parser_tokenize(parser)) {
+		return true;
+	}
+	switch (parser->token.token_type) {
+	case TOKEN_TYPE_COLON:
+		if (parser_tokenize(parser)) {
+			return true;
+		}
+		return parser_parse_value(parser);
+	default:
+		return true;
+	}
+}
+
+static bool
+parser_parse_values(parser_t *parser)
+{
+	if (parser_parse_value(parser)) {
+		return true;
+	}
+
+	while (true) {
+		if (parser_tokenize(parser)) {
+			return true;
+		}
+		switch (parser->token.token_type) {
+		case TOKEN_TYPE_COMMA:
+			if (parser_tokenize(parser)) {
+				return true;
+			}
+			if (parser_parse_value(parser)) {
+				return true;
+			}
+			break;
+		case TOKEN_TYPE_RBRACKET:
+			return false;
+		default:
+			return true;
+		}
+	}
+}
+
+static bool
+parser_parse_array(parser_t *parser)
+{
+	assert_d_eq(parser->token.token_type, TOKEN_TYPE_LBRACKET,
+	    "Array should start with [");
+	if (parser_tokenize(parser)) {
+		return true;
+	}
+	switch (parser->token.token_type) {
+	case TOKEN_TYPE_RBRACKET:
+		return false;
+	default:
+		return parser_parse_values(parser);
+	}
+	not_reached();
+}
+
+static bool
+parser_parse_pairs(parser_t *parser)
+{
+	assert_d_eq(parser->token.token_type, TOKEN_TYPE_STRING,
+	    "Object should start with string");
+	if (parser_parse_pair(parser)) {
+		return true;
+	}
+
+	while (true) {
+		if (parser_tokenize(parser)) {
+			return true;
+		}
+		switch (parser->token.token_type) {
+		case TOKEN_TYPE_COMMA:
+			if (parser_tokenize(parser)) {
+				return true;
+			}
+			switch (parser->token.token_type) {
+			case TOKEN_TYPE_STRING:
+				if (parser_parse_pair(parser)) {
+					return true;
+				}
+				break;
+			default:
+				return true;
+			}
+			break;
+		case TOKEN_TYPE_RBRACE:
+			return false;
+		default:
+			return true;
+		}
+	}
+}
+
+static bool
+parser_parse_object(parser_t *parser)
+{
+	assert_d_eq(parser->token.token_type, TOKEN_TYPE_LBRACE,
+	    "Object should start with {");
+	if (parser_tokenize(parser)) {
+		return true;
+	}
+	switch (parser->token.token_type) {
+	case TOKEN_TYPE_STRING:
+		return parser_parse_pairs(parser);
+	case TOKEN_TYPE_RBRACE:
+		return false;
+	default:
+		return true;
+	}
+	not_reached();
+}
+
+static bool
+parser_parse(parser_t *parser)
+{
+	if (parser_tokenize(parser)) {
+		goto label_error;
+	}
+	if (parser_parse_value(parser)) {
+		goto label_error;
+	}
+
+	if (parser_tokenize(parser)) {
+		goto label_error;
+	}
+	switch (parser->token.token_type) {
+	case TOKEN_TYPE_EOI:
+		return false;
+	default:
+		goto label_error;
+	}
+	not_reached();
+
+label_error:
+	token_error(&parser->token);
+	return true;
+}
+
+TEST_BEGIN(test_json_parser)
+{
+	size_t i;
+	const char *invalid_inputs[] = {
+		/* Tokenizer error case tests. */
+		"{ \"string\": X }",
+		"{ \"string\": nXll }",
+		"{ \"string\": nuXl }",
+		"{ \"string\": nulX }",
+		"{ \"string\": nullX }",
+		"{ \"string\": fXlse }",
+		"{ \"string\": faXse }",
+		"{ \"string\": falXe }",
+		"{ \"string\": falsX }",
+		"{ \"string\": falseX }",
+		"{ \"string\": tXue }",
+		"{ \"string\": trXe }",
+		"{ \"string\": truX }",
+		"{ \"string\": trueX }",
+		"{ \"string\": \"\n\" }",
+		"{ \"string\": \"\\z\" }",
+		"{ \"string\": \"\\uX000\" }",
+		"{ \"string\": \"\\u0X00\" }",
+		"{ \"string\": \"\\u00X0\" }",
+		"{ \"string\": \"\\u000X\" }",
+		"{ \"string\": -X }",
+		"{ \"string\": 0.X }",
+		"{ \"string\": 0.0eX }",
+		"{ \"string\": 0.0e+X }",
+
+		/* Parser error test cases. */
+		"{\"string\": }",
+		"{\"string\" }",
+		"{\"string\": [ 0 }",
+		"{\"string\": {\"a\":0, 1 } }",
+		"{\"string\": {\"a\":0: } }",
+		"{",
+		"{}{",
+	};
+	const char *valid_inputs[] = {
+		/* Token tests. */
+		"null",
+		"false",
+		"true",
+		"{}",
+		"{\"a\": 0}",
+		"[]",
+		"[0, 1]",
+		"0",
+		"1",
+		"10",
+		"-10",
+		"10.23",
+		"10.23e4",
+		"10.23e-4",
+		"10.23e+4",
+		"10.23E4",
+		"10.23E-4",
+		"10.23E+4",
+		"-10.23",
+		"-10.23e4",
+		"-10.23e-4",
+		"-10.23e+4",
+		"-10.23E4",
+		"-10.23E-4",
+		"-10.23E+4",
+		"\"value\"",
+		"\" \\\" \\/ \\b \\n \\r \\t \\u0abc \\u1DEF \"",
+
+		/* Parser test with various nesting. */
+		"{\"a\":null, \"b\":[1,[{\"c\":2},3]], \"d\":{\"e\":true}}",
+	};
+
+	for (i = 0; i < sizeof(invalid_inputs)/sizeof(const char *); i++) {
+		const char *input = invalid_inputs[i];
+		parser_t parser;
+		parser_init(&parser, false);
+		assert_false(parser_append(&parser, input),
+		    "Unexpected input appending failure");
+		assert_true(parser_parse(&parser),
+		    "Unexpected parse success for input: %s", input);
+		parser_fini(&parser);
+	}
+
+	for (i = 0; i < sizeof(valid_inputs)/sizeof(const char *); i++) {
+		const char *input = valid_inputs[i];
+		parser_t parser;
+		parser_init(&parser, true);
+		assert_false(parser_append(&parser, input),
+		    "Unexpected input appending failure");
+		assert_false(parser_parse(&parser),
+		    "Unexpected parse error for input: %s", input);
+		parser_fini(&parser);
+	}
+}
+TEST_END
+
+void
+write_cb(void *opaque, const char *str)
+{
+	parser_t *parser = (parser_t *)opaque;
+	if (parser_append(parser, str)) {
+		test_fail("Unexpected input appending failure");
+	}
+}
+
+TEST_BEGIN(test_stats_print_json)
+{
+	const char *opts[] = {
+		"J",
+		"Jg",
+		"Jm",
+		"Jgm",
+		"Ja",
+		"Jb",
+		"Jab",
+		"Jl",
+		"Jal",
+		"Jbl",
+		"Jabl",
+		"Jh",
+		"Jah",
+		"Jbh",
+		"Jabh",
+		"Jlh",
+		"Jalh",
+		"Jblh",
+		"Jablh",
+		"Jgmablh",
+	};
+	unsigned arena_ind, i;
+
+	for (i = 0; i < 2; i++) {
+		unsigned j;
+
+		switch (i) {
+		case 0:
+			break;
+		case 1: {
+			size_t sz = sizeof(arena_ind);
+			assert_d_eq(mallctl("arenas.extend", (void *)&arena_ind,
+			    &sz, NULL, 0), 0, "Unexpected mallctl failure");
+			break;
+		} default:
+			not_reached();
+		}
+
+		for (j = 0; j < sizeof(opts)/sizeof(const char *); j++) {
+			parser_t parser;
+
+			parser_init(&parser, true);
+			malloc_stats_print(write_cb, (void *)&parser, opts[j]);
+			assert_false(parser_parse(&parser),
+			    "Unexpected parse error, opts=\"%s\"", opts[j]);
+			parser_fini(&parser);
+		}
+	}
+}
+TEST_END
+
+int
+main(void)
+{
+	return (test(
+	    test_json_parser,
+	    test_stats_print_json));
+}

From ed19a4892861e40ba231c3be4e299819ce30ff3d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 1 Feb 2017 10:03:04 -0800
Subject: [PATCH 0666/2608] Silence harmless warnings discovered via
 run_tests.sh.

---
 test/unit/stats_print.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/test/unit/stats_print.c b/test/unit/stats_print.c
index 91cfdf2d..4f412dc0 100644
--- a/test/unit/stats_print.c
+++ b/test/unit/stats_print.c
@@ -67,7 +67,10 @@ token_error(token_t *token)
 		    token->col);
 		break;
 	}
-	write(STDERR_FILENO, &token->parser->buf[token->pos], token->len);
+	{
+		UNUSED ssize_t err = write(STDERR_FILENO,
+		    &token->parser->buf[token->pos], token->len);
+	}
 	malloc_printf("\n");
 }
 
@@ -135,7 +138,9 @@ parser_tokenize(parser_t *parser)
 		STATE_EXP_DIGITS,
 		STATE_ACCEPT
 	} state = STATE_START;
-	size_t token_pos, token_line, token_col;
+	size_t token_pos JEMALLOC_CC_SILENCE_INIT(0);
+	size_t token_line JEMALLOC_CC_SILENCE_INIT(1);
+	size_t token_col JEMALLOC_CC_SILENCE_INIT(0);
 
 	assert_zu_le(parser->pos, parser->len,
 	    "Position is past end of buffer");

From d727596bcbd3f2d6b2af1e21cf19210ac236f8df Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 24 Feb 2017 08:59:34 -0800
Subject: [PATCH 0667/2608] Update a comment.

---
 include/jemalloc/internal/witness_types.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/witness_types.h b/include/jemalloc/internal/witness_types.h
index 3efaad7e..95fc296c 100644
--- a/include/jemalloc/internal/witness_types.h
+++ b/include/jemalloc/internal/witness_types.h
@@ -27,10 +27,10 @@ typedef int witness_comp_t (const witness_t *, void *, const witness_t *,
 #define WITNESS_RANK_PROF_GCTX		8U
 
 /*
- * Used as an argument to witness_depth_to_rank() in order to validate depth
- * excluding non-core locks with lower ranks.  Since the rank argument to
- * witness_depth_to_rank() is inclusive rather than exclusive, this definition
- * can have the same value as the minimally ranked core lock.
+ * Used as an argument to witness_assert_depth_to_rank() in order to validate
+ * depth excluding non-core locks with lower ranks.  Since the rank argument to
+ * witness_assert_depth_to_rank() is inclusive rather than exclusive, this
+ * definition can have the same value as the minimally ranked core lock.
  */
 #define WITNESS_RANK_CORE		9U
 

From 1e2c9ef8d6778669657a057979f2a7049012e879 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 26 Feb 2017 12:58:15 -0800
Subject: [PATCH 0668/2608] Fix huge-aligned allocation.

This regression was caused by
b9408d77a63a54fd331f9b81c884f68e6d57f2e5 (Fix/simplify chunk_recycle()
allocation size computations.).

This resolves #647.
---
 Makefile.in                                   |  1 +
 include/jemalloc/internal/extent.h            |  5 +
 include/jemalloc/internal/private_symbols.txt |  2 +
 src/chunk.c                                   |  9 +-
 src/extent.c                                  | 37 +++++--
 test/unit/extent_quantize.c                   | 98 +++++++++++++++++++
 6 files changed, 141 insertions(+), 11 deletions(-)
 create mode 100644 test/unit/extent_quantize.c

diff --git a/Makefile.in b/Makefile.in
index 675e4cb6..8f1fb554 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -156,6 +156,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/bitmap.c \
 	$(srcroot)test/unit/ckh.c \
 	$(srcroot)test/unit/decay.c \
+	$(srcroot)test/unit/extent_quantize.c \
 	$(srcroot)test/unit/fork.c \
 	$(srcroot)test/unit/hash.c \
 	$(srcroot)test/unit/junk.c \
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 168ffe64..fc77f9f5 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -75,6 +75,11 @@ typedef rb_tree(extent_node_t) extent_tree_t;
 /******************************************************************************/
 #ifdef JEMALLOC_H_EXTERNS
 
+#ifdef JEMALLOC_JET
+size_t	extent_size_quantize_floor(size_t size);
+#endif
+size_t	extent_size_quantize_ceil(size_t size);
+
 rb_proto(, extent_tree_szsnad_, extent_tree_t, extent_node_t)
 
 rb_proto(, extent_tree_ad_, extent_tree_t, extent_node_t)
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 8a9e32fc..a83d9840 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -223,6 +223,8 @@ extent_node_sn_get
 extent_node_sn_set
 extent_node_zeroed_get
 extent_node_zeroed_set
+extent_size_quantize_ceil
+extent_size_quantize_floor
 extent_tree_ad_destroy
 extent_tree_ad_destroy_recurse
 extent_tree_ad_empty
diff --git a/src/chunk.c b/src/chunk.c
index de3bf4cf..94f28f2d 100644
--- a/src/chunk.c
+++ b/src/chunk.c
@@ -188,12 +188,17 @@ chunk_deregister(const void *chunk, const extent_node_t *node)
 static extent_node_t *
 chunk_first_best_fit(arena_t *arena, extent_tree_t *chunks_szsnad, size_t size)
 {
+	extent_node_t *node;
+	size_t qsize;
 	extent_node_t key;
 
 	assert(size == CHUNK_CEILING(size));
 
-	extent_node_init(&key, arena, NULL, size, 0, false, false);
-	return (extent_tree_szsnad_nsearch(chunks_szsnad, &key));
+	qsize = extent_size_quantize_ceil(size);
+	extent_node_init(&key, arena, NULL, qsize, 0, false, false);
+	node = extent_tree_szsnad_nsearch(chunks_szsnad, &key);
+	assert(node == NULL || extent_node_size_get(node) >= size);
+	return node;
 }
 
 static void *
diff --git a/src/extent.c b/src/extent.c
index 218156c6..ff8de2fe 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -3,13 +3,11 @@
 
 /******************************************************************************/
 
-/*
- * Round down to the nearest chunk size that can actually be requested during
- * normal huge allocation.
- */
-JEMALLOC_INLINE_C size_t
-extent_quantize(size_t size)
-{
+#ifndef JEMALLOC_JET
+static
+#endif
+size_t
+extent_size_quantize_floor(size_t size) {
 	size_t ret;
 	szind_t ind;
 
@@ -25,11 +23,32 @@ extent_quantize(size_t size)
 	return (ret);
 }
 
+size_t
+extent_size_quantize_ceil(size_t size) {
+	size_t ret;
+
+	assert(size > 0);
+
+	ret = extent_size_quantize_floor(size);
+	if (ret < size) {
+		/*
+		 * Skip a quantization that may have an adequately large extent,
+		 * because under-sized extents may be mixed in.  This only
+		 * happens when an unusual size is requested, i.e. for aligned
+		 * allocation, and is just one of several places where linear
+		 * search would potentially find sufficiently aligned available
+		 * memory somewhere lower.
+		 */
+		ret = index2size(size2index(ret  + 1));
+	}
+	return ret;
+}
+
 JEMALLOC_INLINE_C int
 extent_sz_comp(const extent_node_t *a, const extent_node_t *b)
 {
-	size_t a_qsize = extent_quantize(extent_node_size_get(a));
-	size_t b_qsize = extent_quantize(extent_node_size_get(b));
+	size_t a_qsize = extent_size_quantize_floor(extent_node_size_get(a));
+	size_t b_qsize = extent_size_quantize_floor(extent_node_size_get(b));
 
 	return ((a_qsize > b_qsize) - (a_qsize < b_qsize));
 }
diff --git a/test/unit/extent_quantize.c b/test/unit/extent_quantize.c
new file mode 100644
index 00000000..d2eb6d7d
--- /dev/null
+++ b/test/unit/extent_quantize.c
@@ -0,0 +1,98 @@
+#include "test/jemalloc_test.h"
+
+TEST_BEGIN(test_huge_extent_size) {
+	unsigned nhchunks, i;
+	size_t sz, extent_size_prev, ceil_prev;
+	size_t mib[4];
+	size_t miblen = sizeof(mib) / sizeof(size_t);
+
+	/*
+	 * Iterate over all huge size classes, get their extent sizes, and
+	 * verify that the quantized size is the same as the extent size.
+	 */
+
+	sz = sizeof(unsigned);
+	assert_d_eq(mallctl("arenas.nhchunks", (void *)&nhchunks, &sz, NULL,
+	    0), 0, "Unexpected mallctl failure");
+
+	assert_d_eq(mallctlnametomib("arenas.hchunk.0.size", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib failure");
+	for (i = 0; i < nhchunks; i++) {
+		size_t extent_size, floor, ceil;
+
+
+		mib[2] = i;
+		sz = sizeof(size_t);
+		assert_d_eq(mallctlbymib(mib, miblen, (void *)&extent_size,
+		    &sz, NULL, 0), 0, "Unexpected mallctlbymib failure");
+		floor = extent_size_quantize_floor(extent_size);
+		ceil = extent_size_quantize_ceil(extent_size);
+
+		assert_zu_eq(extent_size, floor,
+		    "Extent quantization should be a no-op for precise size "
+		    "(extent_size=%zu)", extent_size);
+		assert_zu_eq(extent_size, ceil,
+		    "Extent quantization should be a no-op for precise size "
+		    "(extent_size=%zu)", extent_size);
+
+		if (i > 0) {
+			assert_zu_eq(extent_size_prev,
+			    extent_size_quantize_floor(extent_size - PAGE),
+			    "Floor should be a precise size");
+			if (extent_size_prev < ceil_prev) {
+				assert_zu_eq(ceil_prev, extent_size,
+				    "Ceiling should be a precise size "
+				    "(extent_size_prev=%zu, ceil_prev=%zu, "
+				    "extent_size=%zu)", extent_size_prev,
+				    ceil_prev, extent_size);
+			}
+		}
+		if (i + 1 < nhchunks) {
+			extent_size_prev = floor;
+			ceil_prev = extent_size_quantize_ceil(extent_size +
+			    PAGE);
+		}
+	}
+}
+TEST_END
+
+TEST_BEGIN(test_monotonic) {
+#define SZ_MAX	ZU(4 * 1024 * 1024)
+	unsigned i;
+	size_t floor_prev, ceil_prev;
+
+	floor_prev = 0;
+	ceil_prev = 0;
+	for (i = 1; i <= SZ_MAX >> LG_PAGE; i++) {
+		size_t extent_size, floor, ceil;
+
+		extent_size = i << LG_PAGE;
+		floor = extent_size_quantize_floor(extent_size);
+		ceil = extent_size_quantize_ceil(extent_size);
+
+		assert_zu_le(floor, extent_size,
+		    "Floor should be <= (floor=%zu, extent_size=%zu, ceil=%zu)",
+		    floor, extent_size, ceil);
+		assert_zu_ge(ceil, extent_size,
+		    "Ceiling should be >= (floor=%zu, extent_size=%zu, "
+		    "ceil=%zu)", floor, extent_size, ceil);
+
+		assert_zu_le(floor_prev, floor, "Floor should be monotonic "
+		    "(floor_prev=%zu, floor=%zu, extent_size=%zu, ceil=%zu)",
+		    floor_prev, floor, extent_size, ceil);
+		assert_zu_le(ceil_prev, ceil, "Ceiling should be monotonic "
+		    "(floor=%zu, extent_size=%zu, ceil_prev=%zu, ceil=%zu)",
+		    floor, extent_size, ceil_prev, ceil);
+
+		floor_prev = floor;
+		ceil_prev = ceil;
+	}
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_huge_extent_size,
+	    test_monotonic);
+}

From 079b8bee37ddd35e25c0cf7ac9241520290fa66c Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 26 Feb 2017 12:48:43 -0800
Subject: [PATCH 0669/2608] Tidy up extent quantization.

Remove obsolete unit test scaffolding for extent quantization.  Remove
redundant assertions.  Add an assertion to
extents_first_best_fit_locked() that should help prevent aligned
allocation regressions.
---
 include/jemalloc/internal/extent_externs.h |  4 ----
 src/extent.c                               | 26 +++++-----------------
 2 files changed, 5 insertions(+), 25 deletions(-)

diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index d971ec3a..f5efed06 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -11,10 +11,6 @@ extent_hooks_t	*extent_hooks_get(arena_t *arena);
 extent_hooks_t	*extent_hooks_set(arena_t *arena, extent_hooks_t *extent_hooks);
 
 #ifdef JEMALLOC_JET
-typedef size_t (extent_size_quantize_t)(size_t);
-extern extent_size_quantize_t *extent_size_quantize_floor;
-extern extent_size_quantize_t *extent_size_quantize_ceil;
-#else
 size_t	extent_size_quantize_floor(size_t size);
 size_t	extent_size_quantize_ceil(size_t size);
 #endif
diff --git a/src/extent.c b/src/extent.c
index afc60061..09990aae 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -118,9 +118,8 @@ extent_hooks_assure_initialized(arena_t *arena,
 	}
 }
 
-#ifdef JEMALLOC_JET
-#undef extent_size_quantize_floor
-#define extent_size_quantize_floor JEMALLOC_N(n_extent_size_quantize_floor)
+#ifndef JEMALLOC_JET
+static
 #endif
 size_t
 extent_size_quantize_floor(size_t size) {
@@ -130,9 +129,6 @@ extent_size_quantize_floor(size_t size) {
 	assert(size > 0);
 	assert((size & PAGE_MASK) == 0);
 
-	assert(size != 0);
-	assert(size == PAGE_CEILING(size));
-
 	pind = psz2ind(size - large_pad + 1);
 	if (pind == 0) {
 		/*
@@ -147,16 +143,9 @@ extent_size_quantize_floor(size_t size) {
 	assert(ret <= size);
 	return ret;
 }
-#ifdef JEMALLOC_JET
-#undef extent_size_quantize_floor
-#define extent_size_quantize_floor JEMALLOC_N(extent_size_quantize_floor)
-extent_size_quantize_t *extent_size_quantize_floor =
-    JEMALLOC_N(n_extent_size_quantize_floor);
-#endif
 
-#ifdef JEMALLOC_JET
-#undef extent_size_quantize_ceil
-#define extent_size_quantize_ceil JEMALLOC_N(n_extent_size_quantize_ceil)
+#ifndef JEMALLOC_JET
+static
 #endif
 size_t
 extent_size_quantize_ceil(size_t size) {
@@ -180,12 +169,6 @@ extent_size_quantize_ceil(size_t size) {
 	}
 	return ret;
 }
-#ifdef JEMALLOC_JET
-#undef extent_size_quantize_ceil
-#define extent_size_quantize_ceil JEMALLOC_N(extent_size_quantize_ceil)
-extent_size_quantize_t *extent_size_quantize_ceil =
-    JEMALLOC_N(n_extent_size_quantize_ceil);
-#endif
 
 /* Generate pairing heap functions. */
 ph_gen(, extent_heap_, extent_heap_t, extent_t, ph_link, extent_snad_comp)
@@ -258,6 +241,7 @@ extents_first_best_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 	for (pszind_t i = pind; i < NPSIZES+1; i++) {
 		extent_t *extent = extent_heap_first(&extents->heaps[i]);
 		if (extent != NULL) {
+			assert(extent_size_get(extent) >= size);
 			return extent;
 		}
 	}

From 472fef2e125489e236afbbccad78946fc9f1d73f Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 25 Feb 2017 14:10:05 -0800
Subject: [PATCH 0670/2608] Fix {allocated,nmalloc,ndalloc,nrequests}_large
 stats regression.

This fixes a regression introduced by
d433471f581ca50583c7a99f9802f7388f81aa36 (Derive
{allocated,nmalloc,ndalloc,nrequests}_large stats.).
---
 include/jemalloc/internal/stats_structs.h |  2 +-
 src/arena.c                               | 14 +-------------
 2 files changed, 2 insertions(+), 14 deletions(-)

diff --git a/include/jemalloc/internal/stats_structs.h b/include/jemalloc/internal/stats_structs.h
index 1571ef4f..354f93ee 100644
--- a/include/jemalloc/internal/stats_structs.h
+++ b/include/jemalloc/internal/stats_structs.h
@@ -81,7 +81,7 @@ struct arena_stats_s {
 #endif
 
 	/* Number of bytes currently mapped, excluding retained memory. */
-	size_t		mapped; /* Derived. */
+	size_t		mapped; /* Partially derived. */
 
 	/*
 	 * Number of bytes currently retained as a side effect of munmap() being
diff --git a/src/arena.c b/src/arena.c
index a914abda..18b49312 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -192,19 +192,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	astats->internal += arena_internal_get(arena);
 	astats->resident += base_resident + (((atomic_read_zu(&arena->nactive) +
 	    extents_npages_get(&arena->extents_cached)) << LG_PAGE));
-	astats->allocated_large += arena_stats_read_zu(tsdn, &arena->stats,
-	    &arena->stats.allocated_large);
-	astats->nmalloc_large += arena_stats_read_u64(tsdn, &arena->stats,
-	    &arena->stats.nmalloc_large);
-	astats->ndalloc_large += arena_stats_read_u64(tsdn, &arena->stats,
-	    &arena->stats.ndalloc_large);
-	astats->nrequests_large += arena_stats_read_u64(tsdn, &arena->stats,
-	    &arena->stats.nrequests_large);
 
-	astats->allocated_large = 0;
-	astats->nmalloc_large = 0;
-	astats->ndalloc_large = 0;
-	astats->nrequests_large = 0;
 	for (szind_t i = 0; i < NSIZES - NBINS; i++) {
 		uint64_t nmalloc = arena_stats_read_u64(tsdn, &arena->stats,
 		    &arena->stats.lstats[i].nmalloc);
@@ -224,7 +212,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		size_t curlextents = arena_stats_read_zu(tsdn,
 		    &arena->stats, &arena->stats.lstats[i].curlextents);
 		lstats[i].curlextents += curlextents;
-		astats->allocated_large += curlextents * index2size(i);
+		astats->allocated_large += curlextents * index2size(NBINS + i);
 	}
 
 	arena_stats_unlock(tsdn, &arena->stats);

From 1027a2682bf02204265f2a2403d7701a3778a8a2 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 13 Dec 2016 13:38:11 -0800
Subject: [PATCH 0671/2608] Add some missing explicit casts.

This resolves #614.
---
 include/jemalloc/internal/tsd.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 9055acaf..9f374335 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -479,13 +479,14 @@ a_name##tsd_wrapper_get(bool init)					\
 									\
 	if (init && unlikely(wrapper == NULL)) {			\
 		tsd_init_block_t block;					\
-		wrapper = tsd_init_check_recursion(			\
-		    &a_name##tsd_init_head, &block);			\
+		wrapper = (a_name##tsd_wrapper_t *)			\
+		    tsd_init_check_recursion(&a_name##tsd_init_head,	\
+		    &block);						\
 		if (wrapper)						\
 		    return (wrapper);					\
 		wrapper = (a_name##tsd_wrapper_t *)			\
 		    malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t));	\
-		block.data = wrapper;					\
+		block.data = (void *)wrapper;				\
 		if (wrapper == NULL) {					\
 			malloc_write("<jemalloc>: Error allocating"	\
 			    " TSD for "#a_name"\n");			\

From 7c124830a1f542b5b8d386aa33fab5aa320eb975 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 27 Feb 2017 10:56:23 -0800
Subject: [PATCH 0672/2608] Fix lg_chunk clamping for config_cache_oblivious.

Fix lg_chunk clamping to take into account cache-oblivious large
allocation.  This regression only resulted in incorrect behavior if
!config_fill (false unless --disable-fill specified) and
config_cache_oblivious (true unless --disable-cache-oblivious
specified).

This regression was introduced by
8a03cf039cd06f9fa6972711195055d865673966 (Implement cache index
randomization for large allocations.), which was first released in
4.0.0.

This resolves #555.
---
 src/arena.c    | 10 ++--------
 src/jemalloc.c | 18 ++++++++++--------
 2 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 6d178d21..ca992f73 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2710,6 +2710,7 @@ arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
 		return (arena_malloc_small(tsdn, arena, ind, zero));
 	if (likely(size <= large_maxclass))
 		return (arena_malloc_large(tsdn, arena, ind, zero));
+	assert(index2size(ind) >= chunksize);
 	return (huge_malloc(tsdn, arena, index2size(ind), zero));
 }
 
@@ -3806,15 +3807,8 @@ arena_boot(void)
 	arena_maxrun = chunksize - (map_bias << LG_PAGE);
 	assert(arena_maxrun > 0);
 	large_maxclass = index2size(size2index(chunksize)-1);
-	if (large_maxclass > arena_maxrun) {
-		/*
-		 * For small chunk sizes it's possible for there to be fewer
-		 * non-header pages available than are necessary to serve the
-		 * size classes just below chunksize.
-		 */
-		large_maxclass = arena_maxrun;
-	}
 	assert(large_maxclass > 0);
+	assert(large_maxclass + large_pad <= arena_maxrun);
 	nlclasses = size2index(large_maxclass) - size2index(SMALL_MAXCLASS);
 	nhclasses = NSIZES - nlclasses - NBINS;
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 029fe525..e9d83524 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1136,16 +1136,18 @@ malloc_conf_init(void)
 
 			CONF_HANDLE_BOOL(opt_abort, "abort", true)
 			/*
-			 * Chunks always require at least one header page,
-			 * as many as 2^(LG_SIZE_CLASS_GROUP+1) data pages, and
-			 * possibly an additional page in the presence of
-			 * redzones.  In order to simplify options processing,
-			 * use a conservative bound that accommodates all these
-			 * constraints.
+			 * Chunks always require at least one header page, as
+			 * many as 2^(LG_SIZE_CLASS_GROUP+1) data pages (plus an
+			 * additional page in the presence of cache-oblivious
+			 * large), and possibly an additional page in the
+			 * presence of redzones.  In order to simplify options
+			 * processing, use a conservative bound that
+			 * accommodates all these constraints.
 			 */
 			CONF_HANDLE_SIZE_T(opt_lg_chunk, "lg_chunk", LG_PAGE +
-			    LG_SIZE_CLASS_GROUP + (config_fill ? 2 : 1),
-			    (sizeof(size_t) << 3) - 1, yes, yes, true)
+			    LG_SIZE_CLASS_GROUP + 1 + (config_cache_oblivious ||
+			    config_fill ?  1 : 0), (sizeof(size_t) << 3) - 1,
+			    yes, yes, true)
 			if (strncmp("dss", k, klen) == 0) {
 				int i;
 				bool match = false;

From 7b53fe928ee857d9401ea1a4fb77285b6fa91e7a Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 30 Jan 2017 15:54:16 -0800
Subject: [PATCH 0673/2608] Handle race in stats_arena_bins_print

When multiple threads calling stats_print, race could happen as we read the
counters in separate mallctl calls; and the removed assertion could fail when
other operations happened in between the mallctl calls. For simplicity, output
"race" in the utilization field in this case.

This resolves #616.
---
 src/stats.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index 8d579c7b..92b80865 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -136,8 +136,16 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			availregs = nregs * curruns;
 			milli = (availregs != 0) ? (1000 * curregs) / availregs
 			    : 1000;
-			assert(milli <= 1000);
-			if (milli < 10) {
+
+			if (milli > 1000) {
+				/*
+				 * Race detected: the counters were read in
+				 * separate mallctl calls and concurrent
+				 * operations happened in between. In this case
+				 * no meaningful utilization can be computed.
+				 */
+				malloc_snprintf(util, sizeof(util), " race");
+			} else if (milli < 10) {
 				malloc_snprintf(util, sizeof(util),
 				    "0.00%zu", milli);
 			} else if (milli < 100) {
@@ -146,8 +154,10 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			} else if (milli < 1000) {
 				malloc_snprintf(util, sizeof(util), "0.%zu",
 				    milli);
-			} else
+			} else {
+				assert(milli == 1000);
 				malloc_snprintf(util, sizeof(util), "1");
+			}
 
 			if (config_tcache) {
 				malloc_cprintf(write_cb, cbopaque,

From 4a068644c7b60ed91c08ade8c71c2077fec4687b Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 27 Feb 2017 17:35:19 -0800
Subject: [PATCH 0674/2608] Put -D_REENTRANT in CPPFLAGS rather than CFLAGS.

This regression was introduced by
194d6f9de8ff92841b67f38a2a6a06818e3240dd (Restructure *CFLAGS/*CXXFLAGS
configuration.).
---
 configure.ac | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 1627a3d9..5e655471 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1503,7 +1503,7 @@ if test "x$abi" != "xpecoff" ; then
   fi
 fi
 
-JE_APPEND_VS(CFLAGS, -D_REENTRANT)
+JE_APPEND_VS(CPPFLAGS, -D_REENTRANT)
 
 dnl Check whether clock_gettime(2) is in libc or librt.
 AC_SEARCH_LIBS([clock_gettime], [rt])

From 25d50a943a46e2f435002fcfdacfa93f6974ac11 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 27 Feb 2017 18:11:58 -0800
Subject: [PATCH 0675/2608] Dodge 32-bit-clang-specific backtracing failure.

This disables run_tests.sh configurations that use the combination of
32-bit clang and heap profiling.
---
 scripts/gen_run_tests.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/scripts/gen_run_tests.py b/scripts/gen_run_tests.py
index 694685cb..729ecb1a 100755
--- a/scripts/gen_run_tests.py
+++ b/scripts/gen_run_tests.py
@@ -28,6 +28,10 @@ print 'unamestr=`uname`'
 for cc, cxx in possible_compilers:
     for compiler_opts in powerset(possible_compiler_opts):
         for config_opts in powerset(possible_config_opts):
+            if cc is 'clang' \
+              and '-m32' in possible_compiler_opts \
+              and '--enable-prof' in config_opts:
+                continue
             config_line = (
                 './configure '
                 + 'CC="{} {}" '.format(cc, " ".join(compiler_opts))

From 766ddcd0f20715799042b7e24ea489f24f7121f0 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 16 Dec 2016 07:18:55 -0800
Subject: [PATCH 0676/2608] restructure *CFLAGS configuration.

Convert CFLAGS to be a concatenation:

  CFLAGS := CONFIGURE_CFLAGS SPECIFIED_CFLAGS EXTRA_CFLAGS

This ordering makes it possible to override the flags set by the
configure script both during and after configuration, with CFLAGS and
EXTRA_CFLAGS, respectively.

This resolves #619.
---
 INSTALL      |  17 ++--
 Makefile.in  |   4 +-
 configure.ac | 231 +++++++++++++++++++++++++++++----------------------
 3 files changed, 141 insertions(+), 111 deletions(-)

diff --git a/INSTALL b/INSTALL
index cce3ed71..08b3624f 100644
--- a/INSTALL
+++ b/INSTALL
@@ -306,17 +306,16 @@ The following environment variables (not a definitive list) impact configure's
 behavior:
 
 CFLAGS="?"
-    Pass these flags to the compiler.  You probably shouldn't define this unless
-    you know what you are doing.  (Use EXTRA_CFLAGS instead.)
+    Pass these flags to the C compiler.  Any flags set by the configure script
+    are prepended, which means explicitly set flags generally take precedence.
+    Take care when specifying flags such as -Werror, because configure tests may
+    be affected in undesirable ways.
 
 EXTRA_CFLAGS="?"
-    Append these flags to CFLAGS.  This makes it possible to add flags such as
-    -Werror, while allowing the configure script to determine what other flags
-    are appropriate for the specified configuration.
-
-    The configure script specifically checks whether an optimization flag (-O*)
-    is specified in EXTRA_CFLAGS, and refrains from specifying an optimization
-    level if it finds that one has already been specified.
+    Append these flags to CFLAGS, without passing them to the compiler during
+    configuration.  This makes it possible to add flags such as -Werror, while
+    allowing the configure script to determine what other flags are appropriate
+    for the specified configuration.
 
 CPPFLAGS="?"
     Pass these flags to the C preprocessor.  Note that CFLAGS is not passed to
diff --git a/Makefile.in b/Makefile.in
index 8f1fb554..e49a8711 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -24,8 +24,10 @@ abs_objroot := @abs_objroot@
 
 # Build parameters.
 CPPFLAGS := @CPPFLAGS@ -I$(srcroot)include -I$(objroot)include
+CONFIGURE_CFLAGS := @CONFIGURE_CFLAGS@
+SPECIFIED_CFLAGS := @SPECIFIED_CFLAGS@
 EXTRA_CFLAGS := @EXTRA_CFLAGS@
-CFLAGS := @CFLAGS@ $(EXTRA_CFLAGS)
+CFLAGS := $(strip $(CONFIGURE_CFLAGS) $(SPECIFIED_CFLAGS) $(EXTRA_CFLAGS))
 LDFLAGS := @LDFLAGS@
 EXTRA_LDFLAGS := @EXTRA_LDFLAGS@
 LIBS := @LIBS@
diff --git a/configure.ac b/configure.ac
index 4996406e..db9e7222 100644
--- a/configure.ac
+++ b/configure.ac
@@ -6,29 +6,66 @@ AC_CONFIG_AUX_DIR([build-aux])
 dnl ============================================================================
 dnl Custom macro definitions.
 
-dnl JE_CFLAGS_APPEND(cflag)
-AC_DEFUN([JE_CFLAGS_APPEND],
+dnl JE_CONCAT_VVV(r, a, b)
+dnl 
+dnl Set $r to the concatenation of $a and $b, with a space separating them iff
+dnl both $a and $b are non-emty.
+AC_DEFUN([JE_CONCAT_VVV],
+if test "x[$]{$2}" = "x" -o "x[$]{$3}" = "x" ; then
+  $1="[$]{$2}[$]{$3}"
+else
+  $1="[$]{$2} [$]{$3}"
+fi
+)
+
+dnl JE_APPEND_VS(a, b)
+dnl 
+dnl Set $a to the concatenation of $a and b, with a space separating them iff
+dnl both $a and b are non-empty.
+AC_DEFUN([JE_APPEND_VS],
+  T_APPEND_V=$2
+  JE_CONCAT_VVV($1, $1, T_APPEND_V)
+)
+
+CONFIGURE_CFLAGS=
+SPECIFIED_CFLAGS="${CFLAGS}"
+dnl JE_CFLAGS_ADD(cflag)
+dnl 
+dnl CFLAGS is the concatenation of CONFIGURE_CFLAGS and SPECIFIED_CFLAGS
+dnl (ignoring EXTRA_CFLAGS, which does not impact configure tests.  This macro
+dnl appends to CONFIGURE_CFLAGS and regenerates CFLAGS.
+AC_DEFUN([JE_CFLAGS_ADD],
 [
 AC_MSG_CHECKING([whether compiler supports $1])
-TCFLAGS="${CFLAGS}"
-if test "x${CFLAGS}" = "x" ; then
-  CFLAGS="$1"
-else
-  CFLAGS="${CFLAGS} $1"
-fi
+T_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+JE_APPEND_VS(CONFIGURE_CFLAGS, $1)
+JE_CONCAT_VVV(CFLAGS, CONFIGURE_CFLAGS, SPECIFIED_CFLAGS)
 AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
 [[
 ]], [[
     return 0;
 ]])],
-              [je_cv_cflags_appended=$1]
+              [je_cv_cflags_added=$1]
               AC_MSG_RESULT([yes]),
-              [je_cv_cflags_appended=]
+              [je_cv_cflags_added=]
               AC_MSG_RESULT([no])
-              [CFLAGS="${TCFLAGS}"]
+              [CONFIGURE_CFLAGS="${T_CONFIGURE_CFLAGS}"]
 )
+JE_CONCAT_VVV(CFLAGS, CONFIGURE_CFLAGS, SPECIFIED_CFLAGS)
 ])
 
+dnl JE_CFLAGS_SAVE()
+dnl JE_CFLAGS_RESTORE()
+dnl 
+dnl Save/restore CFLAGS.  Nesting is not supported.
+AC_DEFUN([JE_CFLAGS_SAVE],
+SAVED_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
+)
+AC_DEFUN([JE_CFLAGS_RESTORE],
+CONFIGURE_CFLAGS="${SAVED_CONFIGURE_CFLAGS}"
+JE_CONCAT_VVV(CFLAGS, CONFIGURE_CFLAGS, SPECIFIED_CFLAGS)
+)
+
 dnl JE_COMPILABLE(label, hcode, mcode, rvar)
 dnl 
 dnl Use AC_LINK_IFELSE() rather than AC_COMPILE_IFELSE() so that linker errors
@@ -168,46 +205,45 @@ if test "x${je_cv_cray}" = "xyes" ; then
                               [je_cv_cray_84=no])])
 fi
 
-if test "x$CFLAGS" = "x" ; then
-  no_CFLAGS="yes"
-  if test "x$GCC" = "xyes" ; then
-    JE_CFLAGS_APPEND([-std=gnu11])
-    if test "x$je_cv_cflags_appended" = "x-std=gnu11" ; then
+if test "x$GCC" = "xyes" ; then
+  JE_CFLAGS_ADD([-std=gnu11])
+  if test "x$je_cv_cflags_added" = "x-std=gnu11" ; then
+    AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT])
+  else
+    JE_CFLAGS_ADD([-std=gnu99])
+    if test "x$je_cv_cflags_added" = "x-std=gnu99" ; then
       AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT])
-    else
-      JE_CFLAGS_APPEND([-std=gnu99])
-      if test "x$je_cv_cflags_appended" = "x-std=gnu99" ; then
-        AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT])
-      fi
     fi
-    JE_CFLAGS_APPEND([-Wall])
-    JE_CFLAGS_APPEND([-Werror=declaration-after-statement])
-    JE_CFLAGS_APPEND([-Wshorten-64-to-32])
-    JE_CFLAGS_APPEND([-Wsign-compare])
-    JE_CFLAGS_APPEND([-pipe])
-    JE_CFLAGS_APPEND([-g3])
-  elif test "x$je_cv_msvc" = "xyes" ; then
-    CC="$CC -nologo"
-    JE_CFLAGS_APPEND([-Zi])
-    JE_CFLAGS_APPEND([-MT])
-    JE_CFLAGS_APPEND([-W3])
-    JE_CFLAGS_APPEND([-FS])
-    CPPFLAGS="$CPPFLAGS -I${srcdir}/include/msvc_compat"
   fi
-  if test "x$je_cv_cray" = "xyes" ; then
-    dnl cray compiler 8.4 has an inlining bug
-    if test "x$je_cv_cray_84" = "xyes" ; then
-      JE_CFLAGS_APPEND([-hipa2])
-      JE_CFLAGS_APPEND([-hnognu])
-    fi
-    if test "x$enable_cc_silence" != "xno" ; then
-      dnl ignore unreachable code warning
-      JE_CFLAGS_APPEND([-hnomessage=128])
-      dnl ignore redefinition of "malloc", "free", etc warning
-      JE_CFLAGS_APPEND([-hnomessage=1357])
-    fi
+  JE_CFLAGS_ADD([-Wall])
+  JE_CFLAGS_ADD([-Werror=declaration-after-statement])
+  JE_CFLAGS_ADD([-Wshorten-64-to-32])
+  JE_CFLAGS_ADD([-Wsign-compare])
+  JE_CFLAGS_ADD([-pipe])
+  JE_CFLAGS_ADD([-g3])
+elif test "x$je_cv_msvc" = "xyes" ; then
+  CC="$CC -nologo"
+  JE_CFLAGS_ADD([-Zi])
+  JE_CFLAGS_ADD([-MT])
+  JE_CFLAGS_ADD([-W3])
+  JE_CFLAGS_ADD([-FS])
+  JE_APPEND_VS(CPPFLAGS, -I${srcdir}/include/msvc_compat)
+fi
+if test "x$je_cv_cray" = "xyes" ; then
+  dnl cray compiler 8.4 has an inlining bug
+  if test "x$je_cv_cray_84" = "xyes" ; then
+    JE_CFLAGS_ADD([-hipa2])
+    JE_CFLAGS_ADD([-hnognu])
+  fi
+  if test "x$enable_cc_silence" != "xno" ; then
+    dnl ignore unreachable code warning
+    JE_CFLAGS_ADD([-hnomessage=128])
+    dnl ignore redefinition of "malloc", "free", etc warning
+    JE_CFLAGS_ADD([-hnomessage=1357])
   fi
 fi
+AC_SUBST([CONFIGURE_CFLAGS])
+AC_SUBST([SPECIFIED_CFLAGS])
 AC_SUBST([EXTRA_CFLAGS])
 AC_PROG_CPP
 
@@ -217,7 +253,7 @@ if test "x${ac_cv_big_endian}" = "x1" ; then
 fi
 
 if test "x${je_cv_msvc}" = "xyes" -a "x${ac_cv_header_inttypes_h}" = "xno"; then
-  CPPFLAGS="$CPPFLAGS -I${srcdir}/include/msvc_compat/C99"
+  JE_APPEND_VS(CPPFLAGS, -I${srcdir}/include/msvc_compat/C99)
 fi
 
 if test "x${je_cv_msvc}" = "xyes" ; then
@@ -348,7 +384,6 @@ dnl
 dnl Define cpp macros in CPPFLAGS, rather than doing AC_DEFINE(macro), since the
 dnl definitions need to be seen before any headers are included, which is a pain
 dnl to make happen otherwise.
-CFLAGS="$CFLAGS"
 default_munmap="1"
 maps_coalesce="1"
 case "${host}" in
@@ -380,7 +415,7 @@ case "${host}" in
 	;;
   *-*-linux-android)
 	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
-	CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE"
+	JE_APPEND_VS(CPPFLAGS, -D_GNU_SOURCE)
 	abi="elf"
 	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
 	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ])
@@ -391,7 +426,7 @@ case "${host}" in
 	;;
   *-*-linux* | *-*-kfreebsd*)
 	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
-	CPPFLAGS="$CPPFLAGS -D_GNU_SOURCE"
+	JE_APPEND_VS(CPPFLAGS, -D_GNU_SOURCE)
 	abi="elf"
 	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
 	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ])
@@ -416,8 +451,8 @@ case "${host}" in
 	abi="elf"
 	RPATH='-Wl,-R,$(1)'
 	dnl Solaris needs this for sigwait().
-	CPPFLAGS="$CPPFLAGS -D_POSIX_PTHREAD_SEMANTICS"
-	LIBS="$LIBS -lposix4 -lsocket -lnsl"
+	JE_APPEND_VS(CPPFLAGS, -D_POSIX_PTHREAD_SEMANTICS)
+	JE_APPEND_VS(LIBS, -lposix4 -lsocket -lnsl)
 	;;
   *-ibm-aix*)
 	if "$LG_SIZEOF_PTR" = "8"; then
@@ -515,19 +550,19 @@ JE_COMPILABLE([__attribute__ syntax],
 if test "x${je_cv_attribute}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_ATTR], [ ])
   if test "x${GCC}" = "xyes" -a "x${abi}" = "xelf"; then
-    JE_CFLAGS_APPEND([-fvisibility=hidden])
+    JE_CFLAGS_ADD([-fvisibility=hidden])
   fi
 fi
 dnl Check for tls_model attribute support (clang 3.0 still lacks support).
-SAVED_CFLAGS="${CFLAGS}"
-JE_CFLAGS_APPEND([-Werror])
-JE_CFLAGS_APPEND([-herror_on_warning])
+JE_CFLAGS_SAVE()
+JE_CFLAGS_ADD([-Werror])
+JE_CFLAGS_ADD([-herror_on_warning])
 JE_COMPILABLE([tls_model attribute], [],
               [static __thread int
                __attribute__((tls_model("initial-exec"), unused)) foo;
                foo = 0;],
               [je_cv_tls_model])
-CFLAGS="${SAVED_CFLAGS}"
+JE_CFLAGS_RESTORE()
 if test "x${je_cv_tls_model}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_TLS_MODEL],
             [__attribute__((tls_model("initial-exec")))])
@@ -535,35 +570,35 @@ else
   AC_DEFINE([JEMALLOC_TLS_MODEL], [ ])
 fi
 dnl Check for alloc_size attribute support.
-SAVED_CFLAGS="${CFLAGS}"
-JE_CFLAGS_APPEND([-Werror])
-JE_CFLAGS_APPEND([-herror_on_warning])
+JE_CFLAGS_SAVE()
+JE_CFLAGS_ADD([-Werror])
+JE_CFLAGS_ADD([-herror_on_warning])
 JE_COMPILABLE([alloc_size attribute], [#include <stdlib.h>],
               [void *foo(size_t size) __attribute__((alloc_size(1)));],
               [je_cv_alloc_size])
-CFLAGS="${SAVED_CFLAGS}"
+JE_CFLAGS_RESTORE()
 if test "x${je_cv_alloc_size}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_ATTR_ALLOC_SIZE], [ ])
 fi
 dnl Check for format(gnu_printf, ...) attribute support.
-SAVED_CFLAGS="${CFLAGS}"
-JE_CFLAGS_APPEND([-Werror])
-JE_CFLAGS_APPEND([-herror_on_warning])
+JE_CFLAGS_SAVE()
+JE_CFLAGS_ADD([-Werror])
+JE_CFLAGS_ADD([-herror_on_warning])
 JE_COMPILABLE([format(gnu_printf, ...) attribute], [#include <stdlib.h>],
               [void *foo(const char *format, ...) __attribute__((format(gnu_printf, 1, 2)));],
               [je_cv_format_gnu_printf])
-CFLAGS="${SAVED_CFLAGS}"
+JE_CFLAGS_RESTORE()
 if test "x${je_cv_format_gnu_printf}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF], [ ])
 fi
 dnl Check for format(printf, ...) attribute support.
-SAVED_CFLAGS="${CFLAGS}"
-JE_CFLAGS_APPEND([-Werror])
-JE_CFLAGS_APPEND([-herror_on_warning])
+JE_CFLAGS_SAVE()
+JE_CFLAGS_ADD([-Werror])
+JE_CFLAGS_ADD([-herror_on_warning])
 JE_COMPILABLE([format(printf, ...) attribute], [#include <stdlib.h>],
               [void *foo(const char *format, ...) __attribute__((format(printf, 1, 2)));],
               [je_cv_format_printf])
-CFLAGS="${SAVED_CFLAGS}"
+JE_CFLAGS_RESTORE()
 if test "x${je_cv_format_printf}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_ATTR_FORMAT_PRINTF], [ ])
 fi
@@ -625,9 +660,9 @@ if test "x$enable_code_coverage" = "x1" ; then
   deoptimize="no"
   echo "$CFLAGS $EXTRA_CFLAGS" | grep '\-O' >/dev/null || deoptimize="yes"
   if test "x${deoptimize}" = "xyes" ; then
-    JE_CFLAGS_APPEND([-O0])
+    JE_CFLAGS_ADD([-O0])
   fi
-  JE_CFLAGS_APPEND([-fprofile-arcs -ftest-coverage])
+  JE_CFLAGS_ADD([-fprofile-arcs -ftest-coverage])
   EXTRA_LDFLAGS="$EXTRA_LDFLAGS -fprofile-arcs -ftest-coverage"
   AC_DEFINE([JEMALLOC_CODE_COVERAGE], [ ])
 fi
@@ -817,19 +852,14 @@ if test "x$enable_ivsalloc" = "x1" ; then
 fi
 
 dnl Only optimize if not debugging.
-if test "x$enable_debug" = "x0" -a "x$no_CFLAGS" = "xyes" ; then
-  dnl Make sure that an optimization flag was not specified in EXTRA_CFLAGS.
-  optimize="no"
-  echo "$CFLAGS $EXTRA_CFLAGS" | grep '\-O' >/dev/null || optimize="yes"
-  if test "x${optimize}" = "xyes" ; then
-    if test "x$GCC" = "xyes" ; then
-      JE_CFLAGS_APPEND([-O3])
-      JE_CFLAGS_APPEND([-funroll-loops])
-    elif test "x$je_cv_msvc" = "xyes" ; then
-      JE_CFLAGS_APPEND([-O2])
-    else
-      JE_CFLAGS_APPEND([-O])
-    fi
+if test "x$enable_debug" = "x0" ; then
+  if test "x$GCC" = "xyes" ; then
+    JE_CFLAGS_ADD([-O3])
+    JE_CFLAGS_ADD([-funroll-loops])
+  elif test "x$je_cv_msvc" = "xyes" ; then
+    JE_CFLAGS_ADD([-O2])
+  else
+    JE_CFLAGS_ADD([-O])
   fi
 fi
 
@@ -893,10 +923,10 @@ fi,
 if test "x$backtrace_method" = "x" -a "x$enable_prof_libunwind" = "x1" ; then
   AC_CHECK_HEADERS([libunwind.h], , [enable_prof_libunwind="0"])
   if test "x$LUNWIND" = "x-lunwind" ; then
-    AC_CHECK_LIB([unwind], [unw_backtrace], [LIBS="$LIBS $LUNWIND"],
+    AC_CHECK_LIB([unwind], [unw_backtrace], [JE_APPEND_VS(LIBS, $LUNWIND)],
                  [enable_prof_libunwind="0"])
   else
-    LIBS="$LIBS $LUNWIND"
+    JE_APPEND_VS(LIBS, $LUNWIND)
   fi
   if test "x${enable_prof_libunwind}" = "x1" ; then
     backtrace_method="libunwind"
@@ -918,7 +948,7 @@ fi
 if test "x$backtrace_method" = "x" -a "x$enable_prof_libgcc" = "x1" \
      -a "x$GCC" = "xyes" ; then
   AC_CHECK_HEADERS([unwind.h], , [enable_prof_libgcc="0"])
-  AC_CHECK_LIB([gcc], [_Unwind_Backtrace], [LIBS="$LIBS -lgcc"], [enable_prof_libgcc="0"])
+  AC_CHECK_LIB([gcc], [_Unwind_Backtrace], [JE_APPEND_VS(LIBS, -lgcc)], [enable_prof_libgcc="0"])
   if test "x${enable_prof_libgcc}" = "x1" ; then
     backtrace_method="libgcc"
     AC_DEFINE([JEMALLOC_PROF_LIBGCC], [ ])
@@ -940,7 +970,7 @@ fi
 )
 if test "x$backtrace_method" = "x" -a "x$enable_prof_gcc" = "x1" \
      -a "x$GCC" = "xyes" ; then
-  JE_CFLAGS_APPEND([-fno-omit-frame-pointer])
+  JE_CFLAGS_ADD([-fno-omit-frame-pointer])
   backtrace_method="gcc intrinsics"
   AC_DEFINE([JEMALLOC_PROF_GCC], [ ])
 else
@@ -955,9 +985,7 @@ AC_MSG_CHECKING([configured backtracing method])
 AC_MSG_RESULT([$backtrace_method])
 if test "x$enable_prof" = "x1" ; then
   dnl Heap profiling uses the log(3) function.
-  if test "x$LM" != "x" ; then
-    LIBS="$LIBS $LM"
-  fi
+  JE_APPEND_VS(LIBS, $LM)
 
   AC_DEFINE([JEMALLOC_PROF], [ ])
 fi
@@ -1326,7 +1354,7 @@ if test "x$abi" != "xpecoff" ; then
   AC_CHECK_HEADERS([pthread.h], , [AC_MSG_ERROR([pthread.h is missing])])
   dnl Some systems may embed pthreads functionality in libc; check for libpthread
   dnl first, but try libc too before failing.
-  AC_CHECK_LIB([pthread], [pthread_create], [LIBS="$LIBS -lpthread"],
+  AC_CHECK_LIB([pthread], [pthread_create], [JE_APPEND_VS(LIBS, -lpthread)],
                [AC_SEARCH_LIBS([pthread_create], , ,
                                AC_MSG_ERROR([libpthread is missing]))])
   JE_COMPILABLE([pthread_atfork(3)], [
@@ -1339,7 +1367,7 @@ if test "x$abi" != "xpecoff" ; then
   fi
 fi
 
-CPPFLAGS="$CPPFLAGS -D_REENTRANT"
+JE_APPEND_VS(CPPFLAGS, -D_REENTRANT)
 
 dnl Check whether clock_gettime(2) is in libc or librt.
 AC_SEARCH_LIBS([clock_gettime], [rt])
@@ -1348,13 +1376,13 @@ dnl Cray wrapper compiler often adds `-lrt` when using `-static`. Check with
 dnl `-dynamic` as well in case a user tries to dynamically link in jemalloc
 if test "x$je_cv_cray_prgenv_wrapper" = "xyes" ; then
   if test "$ac_cv_search_clock_gettime" != "-lrt"; then
-    SAVED_CFLAGS="${CFLAGS}"
+    JE_CFLAGS_SAVE()
 
     unset ac_cv_search_clock_gettime
-    JE_CFLAGS_APPEND([-dynamic])
+    JE_CFLAGS_ADD([-dynamic])
     AC_SEARCH_LIBS([clock_gettime], [rt])
 
-    CFLAGS="${SAVED_CFLAGS}"
+    JE_CFLAGS_RESTORE()
   fi
 fi
 
@@ -1410,8 +1438,8 @@ fi
 if test "x$enable_syscall" = "x1" ; then
   dnl Check if syscall(2) is usable.  Treat warnings as errors, so that e.g. OS
   dnl X 10.12's deprecation warning prevents use.
-  SAVED_CFLAGS="${CFLAGS}"
-  JE_CFLAGS_APPEND([-Werror])
+  JE_CFLAGS_SAVE()
+  JE_CFLAGS_ADD([-Werror])
   JE_COMPILABLE([syscall(2)], [
 #include <sys/syscall.h>
 #include <unistd.h>
@@ -1419,7 +1447,7 @@ if test "x$enable_syscall" = "x1" ; then
 	syscall(SYS_write, 2, "hello", 5);
 ],
                 [je_cv_syscall])
-  CFLAGS="${SAVED_CFLAGS}"
+  JE_CFLAGS_RESTORE()
   if test "x$je_cv_syscall" = "xyes" ; then
     AC_DEFINE([JEMALLOC_USE_SYSCALL], [ ])
   fi
@@ -1495,7 +1523,7 @@ if test "x$enable_lazy_lock" = "x1" ; then
   if test "x$abi" != "xpecoff" ; then
     AC_CHECK_HEADERS([dlfcn.h], , [AC_MSG_ERROR([dlfcn.h is missing])])
     AC_CHECK_FUNC([dlsym], [],
-      [AC_CHECK_LIB([dl], [dlsym], [LIBS="$LIBS -ldl"],
+      [AC_CHECK_LIB([dl], [dlsym], [JE_APPEND_VS(LIBS, -ldl)],
                     [AC_MSG_ERROR([libdl is missing])])
       ])
   fi
@@ -1947,7 +1975,8 @@ AC_MSG_RESULT([library revision   : ${rev}])
 AC_MSG_RESULT([])
 AC_MSG_RESULT([CONFIG             : ${CONFIG}])
 AC_MSG_RESULT([CC                 : ${CC}])
-AC_MSG_RESULT([CFLAGS             : ${CFLAGS}])
+AC_MSG_RESULT([CONFIGURE_CFLAGS   : ${CONFIGURE_CFLAGS}])
+AC_MSG_RESULT([SPECIFIED_CFLAGS   : ${SPECIFIED_CFLAGS}])
 AC_MSG_RESULT([EXTRA_CFLAGS       : ${EXTRA_CFLAGS}])
 AC_MSG_RESULT([CPPFLAGS           : ${CPPFLAGS}])
 AC_MSG_RESULT([LDFLAGS            : ${LDFLAGS}])

From d84d2909c3132ee633c92fd0d720ec2aed80ff11 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 28 Feb 2017 01:08:28 -0800
Subject: [PATCH 0677/2608] Fix/enhance THP integration.

Detect whether chunks start off as THP-capable by default (according to
the state of /sys/kernel/mm/transparent_hugepage/enabled), and use this
as the basis for whether to call pages_nohuge() once per chunk during
first purge of any of the chunk's page runs.

Add the --disable-thp configure option, as well as the the opt.thp
mallctl.

This resolves #541.
---
 INSTALL                                       |   7 ++
 configure.ac                                  |  24 +++-
 doc/jemalloc.xml.in                           |  26 +++++
 include/jemalloc/internal/arena.h             |   1 +
 .../jemalloc/internal/jemalloc_internal.h.in  |   7 ++
 .../internal/jemalloc_internal_defs.h.in      |  11 +-
 include/jemalloc/internal/private_symbols.txt |   1 +
 src/arena.c                                   | 105 +++++++++++++++---
 src/ctl.c                                     |   6 +
 src/jemalloc.c                                |   3 +
 src/pages.c                                   |   4 +-
 src/stats.c                                   |   1 +
 test/unit/mallctl.c                           |   2 +
 13 files changed, 177 insertions(+), 21 deletions(-)

diff --git a/INSTALL b/INSTALL
index 08b3624f..19196ec3 100644
--- a/INSTALL
+++ b/INSTALL
@@ -157,6 +157,13 @@ any of the following arguments (not a definitive list) to 'configure':
     released in bulk, thus reducing the total number of mutex operations.  See
     the "opt.tcache" option for usage details.
 
+--disable-thp
+    Disable transparent huge page (THP) integration.  On systems with THP
+    support, THPs are explicitly disabled as a side effect of unused dirty page
+    purging for chunks that back small and/or large allocations, because such
+    chunks typically comprise active, unused dirty, and untouched clean
+    pages.
+
 --disable-munmap
     Disable virtual memory deallocation via munmap(2); instead keep track of
     the virtual memory for later use.  munmap() is disabled by default (i.e.
diff --git a/configure.ac b/configure.ac
index db9e7222..20a8a646 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1683,10 +1683,31 @@ if test "x${je_cv_madvise}" = "xyes" ; then
 	madvise((void *)0, 0, MADV_NOHUGEPAGE);
 ], [je_cv_thp])
   if test "x${je_cv_thp}" = "xyes" ; then
-    AC_DEFINE([JEMALLOC_THP], [ ])
+    AC_DEFINE([JEMALLOC_HAVE_MADVISE_HUGE], [ ])
   fi
 fi
 
+dnl Enable transparent huge page support by default.
+AC_ARG_ENABLE([thp],
+  [AS_HELP_STRING([--disable-thp],
+                  [Disable transparent huge page supprot])],
+[if test "x$enable_thp" = "xno" -o "x${je_cv_thp}" != "xyes" ; then
+  enable_thp="0"
+else
+  enable_thp="1"
+fi
+],
+[if test "x${je_cv_thp}" = "xyes" ; then
+  enable_thp="1"
+else
+  enable_thp="0"
+fi
+])
+if test "x$enable_thp" = "x1" ; then
+  AC_DEFINE([JEMALLOC_THP], [ ])
+fi
+AC_SUBST([enable_thp])
+
 dnl ============================================================================
 dnl Check whether __sync_{add,sub}_and_fetch() are available despite
 dnl __GCC_HAVE_SYNC_COMPARE_AND_SWAP_n macros being undefined.
@@ -2014,6 +2035,7 @@ AC_MSG_RESULT([prof-libunwind     : ${enable_prof_libunwind}])
 AC_MSG_RESULT([prof-libgcc        : ${enable_prof_libgcc}])
 AC_MSG_RESULT([prof-gcc           : ${enable_prof_gcc}])
 AC_MSG_RESULT([tcache             : ${enable_tcache}])
+AC_MSG_RESULT([thp                : ${enable_thp}])
 AC_MSG_RESULT([fill               : ${enable_fill}])
 AC_MSG_RESULT([utrace             : ${enable_utrace}])
 AC_MSG_RESULT([valgrind           : ${enable_valgrind}])
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index d9c83452..c97ab0fc 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -850,6 +850,17 @@ for (i = 0; i < nbins; i++) {
         during build configuration.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="config.thp">
+        <term>
+          <mallctl>config.thp</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para><option>--disable-thp</option> was not specified
+        during build configuration, and the system supports transparent huge
+        page manipulation.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="config.tls">
         <term>
           <mallctl>config.tls</mallctl>
@@ -1162,6 +1173,21 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         forcefully disabled.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.thp">
+        <term>
+          <mallctl>opt.thp</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+          [<option>--enable-thp</option>]
+        </term>
+        <listitem><para>Transparent huge page (THP) integration
+        enabled/disabled.  When enabled, THPs are explicitly disabled as a side
+        effect of unused dirty page purging for chunks that back small and/or
+        large allocations, because such chunks typically comprise active,
+        unused dirty, and untouched clean pages.  This option is enabled by
+        default.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="opt.lg_tcache_max">
         <term>
           <mallctl>opt.lg_tcache_max</mallctl>
diff --git a/include/jemalloc/internal/arena.h b/include/jemalloc/internal/arena.h
index ce4e6029..119e3a59 100644
--- a/include/jemalloc/internal/arena.h
+++ b/include/jemalloc/internal/arena.h
@@ -506,6 +506,7 @@ static const size_t	large_pad =
 #endif
     ;
 
+extern bool		opt_thp;
 extern purge_mode_t	opt_purge;
 extern const char	*purge_mode_names[];
 extern ssize_t		opt_lg_dirty_mult;
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 6213dd82..e3b499a8 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -99,6 +99,13 @@ static const bool config_tcache =
     false
 #endif
     ;
+static const bool config_thp =
+#ifdef JEMALLOC_THP
+    true
+#else
+    false
+#endif
+    ;
 static const bool config_tls =
 #ifdef JEMALLOC_TLS
     true
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index b7ae3b79..7c88b0d7 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -252,6 +252,12 @@
 /* Defined if madvise(2) is available. */
 #undef JEMALLOC_HAVE_MADVISE
 
+/*
+ * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE
+ * arguments to madvise(2).
+ */
+#undef JEMALLOC_HAVE_MADVISE_HUGE
+
 /*
  * Methods for purging unused pages differ between operating systems.
  *
@@ -264,10 +270,7 @@
 #undef JEMALLOC_PURGE_MADVISE_FREE
 #undef JEMALLOC_PURGE_MADVISE_DONTNEED
 
-/*
- * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE
- * arguments to madvise(2).
- */
+/* Defined if transparent huge page support is enabled. */
 #undef JEMALLOC_THP
 
 /* Define if operating system has alloca.h header. */
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index a83d9840..0aa9b01b 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -392,6 +392,7 @@ opt_quarantine
 opt_redzone
 opt_stats_print
 opt_tcache
+opt_thp
 opt_utrace
 opt_xmalloc
 opt_zero
diff --git a/src/arena.c b/src/arena.c
index ca992f73..a9dff0b0 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -4,6 +4,8 @@
 /******************************************************************************/
 /* Data. */
 
+bool		opt_thp = true;
+static bool	thp_initially_huge;
 purge_mode_t	opt_purge = PURGE_DEFAULT;
 const char	*purge_mode_names[] = {
 	"ratio",
@@ -680,7 +682,9 @@ arena_chunk_init_hard(tsdn_t *tsdn, arena_t *arena)
 	if (chunk == NULL)
 		return (NULL);
 
-	chunk->hugepage = true;
+	if (config_thp && opt_thp) {
+		chunk->hugepage = thp_initially_huge;
+	}
 
 	/*
 	 * Initialize the map to contain one maximal free untouched run.  Mark
@@ -745,14 +749,17 @@ arena_chunk_alloc(tsdn_t *tsdn, arena_t *arena)
 static void
 arena_chunk_discard(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk)
 {
-	size_t sn, hugepage;
+	size_t sn;
+	UNUSED bool hugepage JEMALLOC_CC_SILENCE_INIT(false);
 	bool committed;
 	chunk_hooks_t chunk_hooks = CHUNK_HOOKS_INITIALIZER;
 
 	chunk_deregister(chunk, &chunk->node);
 
 	sn = extent_node_sn_get(&chunk->node);
-	hugepage = chunk->hugepage;
+	if (config_thp && opt_thp) {
+		hugepage = chunk->hugepage;
+	}
 	committed = (arena_mapbits_decommitted_get(chunk, map_bias) == 0);
 	if (!committed) {
 		/*
@@ -765,13 +772,16 @@ arena_chunk_discard(tsdn_t *tsdn, arena_t *arena, arena_chunk_t *chunk)
 		chunk_hooks.decommit(chunk, chunksize, 0, map_bias << LG_PAGE,
 		    arena->ind);
 	}
-	if (!hugepage) {
+	if (config_thp && opt_thp && hugepage != thp_initially_huge) {
 		/*
-		 * Convert chunk back to the default state, so that all
-		 * subsequent chunk allocations start out with chunks that can
-		 * be backed by transparent huge pages.
+		 * Convert chunk back to initial THP state, so that all
+		 * subsequent chunk allocations start out in a consistent state.
 		 */
-		pages_huge(chunk, chunksize);
+		if (thp_initially_huge) {
+			pages_huge(chunk, chunksize);
+		} else {
+			pages_nohuge(chunk, chunksize);
+		}
 	}
 
 	chunk_dalloc_cache(tsdn, arena, &chunk_hooks, (void *)chunk, chunksize,
@@ -1711,13 +1721,13 @@ arena_purge_stashed(tsdn_t *tsdn, arena_t *arena, chunk_hooks_t *chunk_hooks,
 
 			/*
 			 * If this is the first run purged within chunk, mark
-			 * the chunk as non-huge.  This will prevent all use of
-			 * transparent huge pages for this chunk until the chunk
-			 * as a whole is deallocated.
+			 * the chunk as non-THP-capable.  This will prevent all
+			 * use of THPs for this chunk until the chunk as a whole
+			 * is deallocated.
 			 */
-			if (chunk->hugepage) {
-				pages_nohuge(chunk, chunksize);
-				chunk->hugepage = false;
+			if (config_thp && opt_thp && chunk->hugepage) {
+				chunk->hugepage = pages_nohuge(chunk,
+				    chunksize);
 			}
 
 			assert(pageind + npages <= chunk_npages);
@@ -3772,11 +3782,78 @@ bin_info_init(void)
 #undef SC
 }
 
+static void
+init_thp_initially_huge(void) {
+	int fd;
+	char buf[sizeof("[always] madvise never\n")];
+	ssize_t nread;
+	static const char *enabled_states[] = {
+		"[always] madvise never\n",
+		"always [madvise] never\n",
+		"always madvise [never]\n"
+	};
+	static const bool thp_initially_huge_states[] = {
+		true,
+		false,
+		false
+	};
+	unsigned i;
+
+	if (config_debug) {
+		for (i = 0; i < sizeof(enabled_states)/sizeof(const char *);
+		    i++) {
+			assert(sizeof(buf) > strlen(enabled_states[i]));
+		}
+	}
+	assert(sizeof(enabled_states)/sizeof(const char *) ==
+	    sizeof(thp_initially_huge_states)/sizeof(bool));
+
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
+	fd = (int)syscall(SYS_open,
+	    "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
+#else
+	fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
+#endif
+	if (fd == -1) {
+		goto label_error;
+	}
+
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read)
+	nread = (ssize_t)syscall(SYS_read, fd, &buf, sizeof(buf));
+#else
+	nread = read(fd, &buf, sizeof(buf));
+#endif
+
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
+	syscall(SYS_close, fd);
+#else
+	close(fd);
+#endif
+
+	if (nread < 1) {
+		goto label_error;
+	}
+	for (i = 0; i < sizeof(enabled_states)/sizeof(const char *);
+	    i++) {
+		if (strncmp(buf, enabled_states[i], (size_t)nread) == 0) {
+			thp_initially_huge = thp_initially_huge_states[i];
+			return;
+		}
+	}
+
+label_error:
+	thp_initially_huge = false;
+}
+
 void
 arena_boot(void)
 {
 	unsigned i;
 
+	if (config_thp && opt_thp) {
+		init_thp_initially_huge();
+	}
+
 	arena_lg_dirty_mult_default_set(opt_lg_dirty_mult);
 	arena_decay_time_default_set(opt_decay_time);
 
diff --git a/src/ctl.c b/src/ctl.c
index 1e62e2d3..56bc4f4c 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -84,6 +84,7 @@ CTL_PROTO(config_prof_libgcc)
 CTL_PROTO(config_prof_libunwind)
 CTL_PROTO(config_stats)
 CTL_PROTO(config_tcache)
+CTL_PROTO(config_thp)
 CTL_PROTO(config_tls)
 CTL_PROTO(config_utrace)
 CTL_PROTO(config_valgrind)
@@ -104,6 +105,7 @@ CTL_PROTO(opt_utrace)
 CTL_PROTO(opt_xmalloc)
 CTL_PROTO(opt_tcache)
 CTL_PROTO(opt_lg_tcache_max)
+CTL_PROTO(opt_thp)
 CTL_PROTO(opt_prof)
 CTL_PROTO(opt_prof_prefix)
 CTL_PROTO(opt_prof_active)
@@ -258,6 +260,7 @@ static const ctl_named_node_t	config_node[] = {
 	{NAME("prof_libunwind"), CTL(config_prof_libunwind)},
 	{NAME("stats"),		CTL(config_stats)},
 	{NAME("tcache"),	CTL(config_tcache)},
+	{NAME("thp"),		CTL(config_thp)},
 	{NAME("tls"),		CTL(config_tls)},
 	{NAME("utrace"),	CTL(config_utrace)},
 	{NAME("valgrind"),	CTL(config_valgrind)},
@@ -281,6 +284,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("xmalloc"),	CTL(opt_xmalloc)},
 	{NAME("tcache"),	CTL(opt_tcache)},
 	{NAME("lg_tcache_max"),	CTL(opt_lg_tcache_max)},
+	{NAME("thp"),		CTL(opt_thp)},
 	{NAME("prof"),		CTL(opt_prof)},
 	{NAME("prof_prefix"),	CTL(opt_prof_prefix)},
 	{NAME("prof_active"),	CTL(opt_prof_active)},
@@ -1268,6 +1272,7 @@ CTL_RO_CONFIG_GEN(config_prof_libgcc, bool)
 CTL_RO_CONFIG_GEN(config_prof_libunwind, bool)
 CTL_RO_CONFIG_GEN(config_stats, bool)
 CTL_RO_CONFIG_GEN(config_tcache, bool)
+CTL_RO_CONFIG_GEN(config_thp, bool)
 CTL_RO_CONFIG_GEN(config_tls, bool)
 CTL_RO_CONFIG_GEN(config_utrace, bool)
 CTL_RO_CONFIG_GEN(config_valgrind, bool)
@@ -1291,6 +1296,7 @@ CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool)
 CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool)
 CTL_RO_NL_CGEN(config_tcache, opt_tcache, opt_tcache, bool)
 CTL_RO_NL_CGEN(config_tcache, opt_lg_tcache_max, opt_lg_tcache_max, ssize_t)
+CTL_RO_NL_CGEN(config_thp, opt_thp, opt_thp, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *)
 CTL_RO_NL_CGEN(config_prof, opt_prof_active, opt_prof_active, bool)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index e9d83524..1cefd4ce 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1272,6 +1272,9 @@ malloc_conf_init(void)
 				    "lg_tcache_max", -1,
 				    (sizeof(size_t) << 3) - 1)
 			}
+			if (config_thp) {
+				CONF_HANDLE_BOOL(opt_thp, "thp", true)
+			}
 			if (config_prof) {
 				CONF_HANDLE_BOOL(opt_prof, "prof", true)
 				CONF_HANDLE_CHAR_P(opt_prof_prefix,
diff --git a/src/pages.c b/src/pages.c
index 5f0c9669..7698e49b 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -199,7 +199,7 @@ pages_huge(void *addr, size_t size)
 	assert(PAGE_ADDR2BASE(addr) == addr);
 	assert(PAGE_CEILING(size) == size);
 
-#ifdef JEMALLOC_THP
+#ifdef JEMALLOC_HAVE_MADVISE_HUGE
 	return (madvise(addr, size, MADV_HUGEPAGE) != 0);
 #else
 	return (false);
@@ -213,7 +213,7 @@ pages_nohuge(void *addr, size_t size)
 	assert(PAGE_ADDR2BASE(addr) == addr);
 	assert(PAGE_CEILING(size) == size);
 
-#ifdef JEMALLOC_THP
+#ifdef JEMALLOC_HAVE_MADVISE_HUGE
 	return (madvise(addr, size, MADV_NOHUGEPAGE) != 0);
 #else
 	return (false);
diff --git a/src/stats.c b/src/stats.c
index 92b80865..b76afc5a 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -750,6 +750,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	OPT_WRITE_BOOL(xmalloc, ",")
 	OPT_WRITE_BOOL(tcache, ",")
 	OPT_WRITE_SSIZE_T(lg_tcache_max, ",")
+	OPT_WRITE_BOOL(thp, ",")
 	OPT_WRITE_BOOL(prof, ",")
 	OPT_WRITE_CHAR_P(prof_prefix, ",")
 	OPT_WRITE_BOOL_MUTABLE(prof_active, prof.active, ",")
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 2353c92c..3d1a740e 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -142,6 +142,7 @@ TEST_BEGIN(test_mallctl_config)
 	TEST_MALLCTL_CONFIG(prof_libunwind, bool);
 	TEST_MALLCTL_CONFIG(stats, bool);
 	TEST_MALLCTL_CONFIG(tcache, bool);
+	TEST_MALLCTL_CONFIG(thp, bool);
 	TEST_MALLCTL_CONFIG(tls, bool);
 	TEST_MALLCTL_CONFIG(utrace, bool);
 	TEST_MALLCTL_CONFIG(valgrind, bool);
@@ -182,6 +183,7 @@ TEST_BEGIN(test_mallctl_opt)
 	TEST_MALLCTL_OPT(bool, xmalloc, xmalloc);
 	TEST_MALLCTL_OPT(bool, tcache, tcache);
 	TEST_MALLCTL_OPT(size_t, lg_tcache_max, tcache);
+	TEST_MALLCTL_OPT(bool, thp, thp);
 	TEST_MALLCTL_OPT(bool, prof, prof);
 	TEST_MALLCTL_OPT(const char *, prof_prefix, prof);
 	TEST_MALLCTL_OPT(bool, prof_active, prof);

From cbb6720861e67b9e4e965614422e22d9bfa95244 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 28 Feb 2017 12:59:22 -0800
Subject: [PATCH 0678/2608] Update ChangeLog for 4.5.0.

---
 ChangeLog | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index f75edd93..a9406853 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,41 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
+* 4.5.0 (February 28, 2017)
+
+  This is the first release to benefit from much broader continuous integration
+  testing, thanks to @davidtgoldblatt.  Had we had this testing infrastructure
+  in place for prior releases, it would have caught all of the most serious
+  regressions fixed by this release.
+
+  New features:
+  - Add --disable-thp and the opt.thp to provide opt-out mechanisms for
+    transparent huge page integration.  (@jasone)
+  - Update zone allocator integration to work with macOS 10.12.  (@glandium)
+  - Restructure *CFLAGS configuration, so that CFLAGS behaves typically, and
+    EXTRA_CFLAGS provides a way to specify e.g. -Werror during building, but not
+    during configuration.  (@jasone, @ronawho)
+
+  Bug fixes:
+  - Fix DSS (sbrk(2)-based) allocation.  This regression was first released in
+    4.3.0.  (@jasone)
+  - Handle race in per size class utilization computation.  This functionality
+    was first released in 4.0.0.  (@interwq)
+  - Fix lock order reversal during gdump.  (@jasone)
+  - Fix-refactor tcache synchronization.  This regression was first released in
+    4.0.0.  (@jasone)
+  - Fix various JSON-formatted malloc_stats_print() bugs.  This functionality
+    was first released in 4.3.0.  (@jasone)
+  - Fix huge-aligned allocation.  This regression was first released in 4.4.0.
+    (@jasone)
+  - When transparent huge page integration is enabled, detect what state pages
+    start in according to the kernel's current operating mode, and only convert
+    arena chunks to non-huge during purging if that is not their initial state.
+    This functionality was first released in 4.4.0.  (@jasone)
+  - Fix lg_chunk clamping for the --enable-cache-oblivious --disable-fill case.
+    This regression was first released in 4.0.0.  (@jasone, @428desmo)
+  - Properly detect sparc64 when building for Linux.  (@glaubitz)
+
 * 4.4.0 (December 3, 2016)
 
   New features:

From e723f99decc1ef4001ef4b946024056f7664ff9f Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 28 Feb 2017 14:29:54 -0800
Subject: [PATCH 0679/2608] Alphabetize private symbol names.

---
 include/jemalloc/internal/private_symbols.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 0aa9b01b..60b57e5a 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -532,9 +532,9 @@ tcache_flush
 tcache_get
 tcache_get_hard
 tcache_maxclass
-tcache_prefork
 tcache_postfork_child
 tcache_postfork_parent
+tcache_prefork
 tcache_salloc
 tcache_stats_merge
 tcaches

From 2406c22f366e8d2c37f4d38b2cd857a2a8bf1aa7 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 28 Feb 2017 14:57:10 -0800
Subject: [PATCH 0680/2608] Add casts to CONF_HANDLE_T_U().

This avoids signed/unsigned comparison warnings when specifying integer
constants as inputs.

Clean up whitespace and add clarifying parentheses for
CONF_HANDLE_SIZE_T(opt_lg_chunk, ...).
---
 src/jemalloc.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 1cefd4ce..f73a26cd 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1075,18 +1075,18 @@ malloc_conf_init(void)
 					    k, klen, v, vlen);		\
 				} else if (clip) {			\
 					if (CONF_MIN_##check_min(um,	\
-					    (min)))			\
+					    (t)(min)))			\
 						o = (t)(min);		\
 					else if (CONF_MAX_##check_max(	\
-					    um, (max)))			\
+					    um, (t)(max)))		\
 						o = (t)(max);		\
 					else				\
 						o = (t)um;		\
 				} else {				\
 					if (CONF_MIN_##check_min(um,	\
-					    (min)) ||			\
+					    (t)(min)) ||		\
 					    CONF_MAX_##check_max(um,	\
-					    (max))) {			\
+					    (t)(max))) {		\
 						malloc_conf_error(	\
 						    "Out-of-range "	\
 						    "conf value",	\
@@ -1145,8 +1145,8 @@ malloc_conf_init(void)
 			 * accommodates all these constraints.
 			 */
 			CONF_HANDLE_SIZE_T(opt_lg_chunk, "lg_chunk", LG_PAGE +
-			    LG_SIZE_CLASS_GROUP + 1 + (config_cache_oblivious ||
-			    config_fill ?  1 : 0), (sizeof(size_t) << 3) - 1,
+			    LG_SIZE_CLASS_GROUP + 1 + ((config_cache_oblivious
+			    || config_fill) ? 1 : 0), (sizeof(size_t) << 3) - 1,
 			    yes, yes, true)
 			if (strncmp("dss", k, klen) == 0) {
 				int i;

From 700253e1f2f40d4a74e361fa1e688986c361dba4 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 28 Feb 2017 12:59:22 -0800
Subject: [PATCH 0681/2608] Update ChangeLog for 4.5.0.

---
 ChangeLog | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index f75edd93..a9406853 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,41 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
+* 4.5.0 (February 28, 2017)
+
+  This is the first release to benefit from much broader continuous integration
+  testing, thanks to @davidtgoldblatt.  Had we had this testing infrastructure
+  in place for prior releases, it would have caught all of the most serious
+  regressions fixed by this release.
+
+  New features:
+  - Add --disable-thp and the opt.thp to provide opt-out mechanisms for
+    transparent huge page integration.  (@jasone)
+  - Update zone allocator integration to work with macOS 10.12.  (@glandium)
+  - Restructure *CFLAGS configuration, so that CFLAGS behaves typically, and
+    EXTRA_CFLAGS provides a way to specify e.g. -Werror during building, but not
+    during configuration.  (@jasone, @ronawho)
+
+  Bug fixes:
+  - Fix DSS (sbrk(2)-based) allocation.  This regression was first released in
+    4.3.0.  (@jasone)
+  - Handle race in per size class utilization computation.  This functionality
+    was first released in 4.0.0.  (@interwq)
+  - Fix lock order reversal during gdump.  (@jasone)
+  - Fix-refactor tcache synchronization.  This regression was first released in
+    4.0.0.  (@jasone)
+  - Fix various JSON-formatted malloc_stats_print() bugs.  This functionality
+    was first released in 4.3.0.  (@jasone)
+  - Fix huge-aligned allocation.  This regression was first released in 4.4.0.
+    (@jasone)
+  - When transparent huge page integration is enabled, detect what state pages
+    start in according to the kernel's current operating mode, and only convert
+    arena chunks to non-huge during purging if that is not their initial state.
+    This functionality was first released in 4.4.0.  (@jasone)
+  - Fix lg_chunk clamping for the --enable-cache-oblivious --disable-fill case.
+    This regression was first released in 4.0.0.  (@jasone, @428desmo)
+  - Properly detect sparc64 when building for Linux.  (@glaubitz)
+
 * 4.4.0 (December 3, 2016)
 
   New features:

From 379dd44c572111ea5505d33d808e659e8a8b4592 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 28 Feb 2017 14:54:07 -0800
Subject: [PATCH 0682/2608] Add casts to CONF_HANDLE_T_U().

This avoids signed/unsigned comparison warnings when specifying integer
constants as inputs.
---
 src/jemalloc.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 197f9bdc..7e652802 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -962,20 +962,20 @@ malloc_conf_init(void) {
 					    k, klen, v, vlen);		\
 				} else if (clip) {			\
 					if (CONF_MIN_##check_min(um,	\
-					    (min))) {			\
+					    (t)(min))) {		\
 						o = (t)(min);		\
 					} else if (			\
 					    CONF_MAX_##check_max(um,	\
-					    (max))) {			\
+					    (t)(max))) {		\
 						o = (t)(max);		\
 					} else {			\
 						o = (t)um;		\
 					}				\
 				} else {				\
 					if (CONF_MIN_##check_min(um,	\
-					    (min)) ||			\
+					    (t)(min)) ||		\
 					    CONF_MAX_##check_max(um,	\
-					    (max))) {			\
+					    (t)(max))) {		\
 						malloc_conf_error(	\
 						    "Out-of-range "	\
 						    "conf value",	\

From a8c9e9c651671b09f1055882b4a9e59955e303f7 Mon Sep 17 00:00:00 2001
From: charsyam <charsyam@naver.com>
Date: Wed, 1 Mar 2017 20:58:38 +0900
Subject: [PATCH 0683/2608] fix typo sytem -> system

---
 configure.ac | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 5e655471..1653fe7f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1388,7 +1388,7 @@ fi
 
 AC_ARG_WITH([lg_hugepage],
   [AS_HELP_STRING([--with-lg-hugepage=<lg-hugepage>],
-   [Base 2 log of sytem huge page size])],
+   [Base 2 log of system huge page size])],
   [je_cv_lg_hugepage="${with_lg_hugepage}"],
   [je_cv_lg_hugepage=""])
 if test "x${je_cv_lg_hugepage}" = "x" ; then

From aa1de06e3ab439e69a20fdd555a8253b0e31fc04 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 1 Mar 2017 14:43:35 -0800
Subject: [PATCH 0684/2608] Small style fix in ctl.c

---
 src/ctl.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index 0bf4258e..7ec8ff2a 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -943,8 +943,7 @@ ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
 	node = ctl_named_node(nodes[depth-1]);
 	if (node != NULL && node->ctl) {
 		ret = node->ctl(tsd, mib, depth, oldp, oldlenp, newp, newlen);
-	}
-	else {
+	} else {
 		/* The name refers to a partial path through the ctl tree. */
 		ret = ENOENT;
 	}

From ff55f07eb6cc775755ffbea406d8967ec5e13d6e Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 28 Feb 2017 19:24:08 -0800
Subject: [PATCH 0685/2608] Fix typos.

---
 ChangeLog | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index a9406853..e630595b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -12,7 +12,7 @@ brevity.  Much more detail can be found in the git revision history:
   regressions fixed by this release.
 
   New features:
-  - Add --disable-thp and the opt.thp to provide opt-out mechanisms for
+  - Add --disable-thp and the opt.thp mallctl to provide opt-out mechanisms for
     transparent huge page integration.  (@jasone)
   - Update zone allocator integration to work with macOS 10.12.  (@glandium)
   - Restructure *CFLAGS configuration, so that CFLAGS behaves typically, and
@@ -25,7 +25,7 @@ brevity.  Much more detail can be found in the git revision history:
   - Handle race in per size class utilization computation.  This functionality
     was first released in 4.0.0.  (@interwq)
   - Fix lock order reversal during gdump.  (@jasone)
-  - Fix-refactor tcache synchronization.  This regression was first released in
+  - Fix/refactor tcache synchronization.  This regression was first released in
     4.0.0.  (@jasone)
   - Fix various JSON-formatted malloc_stats_print() bugs.  This functionality
     was first released in 4.3.0.  (@jasone)

From d61a5f76b2e3bcd866e19ab90a59081c5fc917fa Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 1 Mar 2017 11:21:18 -0800
Subject: [PATCH 0686/2608] Convert arena_decay_t's time to be atomically
 synchronized.

---
 include/jemalloc/internal/arena_externs.h   |  2 +-
 include/jemalloc/internal/arena_structs_b.h | 11 +++++--
 src/arena.c                                 | 35 +++++++++++++--------
 src/ctl.c                                   |  2 +-
 4 files changed, 33 insertions(+), 17 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index d97b6a7d..7b16d229 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -39,7 +39,7 @@ void	arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena,
     extent_t *extent, size_t oldsize);
 void	arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena,
     extent_t *extent, size_t oldsize);
-ssize_t	arena_decay_time_get(tsdn_t *tsdn, arena_t *arena);
+ssize_t	arena_decay_time_get(arena_t *arena);
 bool	arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time);
 void	arena_purge(tsdn_t *tsdn, arena_t *arena, bool all);
 void	arena_maybe_purge(tsdn_t *tsdn, arena_t *arena);
diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index 92f1e41f..49fdd17d 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -36,15 +36,22 @@ struct arena_bin_info_s {
 	bitmap_info_t		bitmap_info;
 };
 
+typedef union {
+	size_t			u; /* Used for atomic operations. */
+	ssize_t			s; /* Time may be negative (means "never"). */
+} arena_decay_time_t;
+
 struct arena_decay_s {
-	/* Synchronizes all fields. */
+	/* Synchronizes all non-atomic fields. */
 	malloc_mutex_t		mtx;
 	/*
 	 * Approximate time in seconds from the creation of a set of unused
 	 * dirty pages until an equivalent set of unused dirty pages is purged
 	 * and/or reused.
+	 *
+	 * Synchronization: atomic.
 	 */
-	ssize_t			time;
+	arena_decay_time_t	time;
 	/* time / SMOOTHSTEP_NSTEPS. */
 	nstime_t		interval;
 	/*
diff --git a/src/arena.c b/src/arena.c
index 18b49312..9f395769 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -157,7 +157,7 @@ arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *decay_time, size_t *nactive, size_t *ndirty) {
 	*nthreads += arena_nthreads_get(arena, false);
 	*dss = dss_prec_names[arena_dss_prec_get(arena)];
-	*decay_time = arena_decay_time_get(tsdn, arena);
+	*decay_time = arena_decay_time_get(arena);
 	*nactive += atomic_read_zu(&arena->nactive);
 	*ndirty += extents_npages_get(&arena->extents_cached);
 }
@@ -491,6 +491,20 @@ arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 	arena_nactive_add(arena, udiff >> LG_PAGE);
 }
 
+static ssize_t
+arena_decay_time_read(arena_t *arena) {
+	arena_decay_time_t dt;
+	dt.u = atomic_read_zu(&arena->decay.time.u);
+	return dt.s;
+}
+
+static void
+arena_decay_time_write(arena_t *arena, ssize_t decay_time) {
+	arena_decay_time_t dt;
+	dt.s = decay_time;
+	atomic_write_zu(&arena->decay.time.u, dt.u);
+}
+
 static void
 arena_decay_deadline_init(arena_t *arena) {
 	/*
@@ -499,7 +513,7 @@ arena_decay_deadline_init(arena_t *arena) {
 	 */
 	nstime_copy(&arena->decay.deadline, &arena->decay.epoch);
 	nstime_add(&arena->decay.deadline, &arena->decay.interval);
-	if (arena->decay.time > 0) {
+	if (arena_decay_time_read(arena) > 0) {
 		nstime_t jitter;
 
 		nstime_init(&jitter, prng_range_u64(&arena->decay.jitter_state,
@@ -615,7 +629,7 @@ arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, const nstime_t *time) {
 
 static void
 arena_decay_reinit(arena_t *arena, ssize_t decay_time) {
-	arena->decay.time = decay_time;
+	arena_decay_time_write(arena, decay_time);
 	if (decay_time > 0) {
 		nstime_init2(&arena->decay.interval, decay_time, 0);
 		nstime_idivide(&arena->decay.interval, SMOOTHSTEP_NSTEPS);
@@ -650,14 +664,8 @@ arena_decay_time_valid(ssize_t decay_time) {
 }
 
 ssize_t
-arena_decay_time_get(tsdn_t *tsdn, arena_t *arena) {
-	ssize_t decay_time;
-
-	malloc_mutex_lock(tsdn, &arena->decay.mtx);
-	decay_time = arena->decay.time;
-	malloc_mutex_unlock(tsdn, &arena->decay.mtx);
-
-	return decay_time;
+arena_decay_time_get(arena_t *arena) {
+	return arena_decay_time_read(arena);
 }
 
 bool
@@ -687,8 +695,9 @@ arena_maybe_purge(tsdn_t *tsdn, arena_t *arena) {
 	malloc_mutex_assert_owner(tsdn, &arena->decay.mtx);
 
 	/* Purge all or nothing if the option is disabled. */
-	if (arena->decay.time <= 0) {
-		if (arena->decay.time == 0) {
+	ssize_t decay_time = arena_decay_time_read(arena);
+	if (decay_time <= 0) {
+		if (decay_time == 0) {
 			arena_purge_to_limit(tsdn, arena, 0);
 		}
 		return;
diff --git a/src/ctl.c b/src/ctl.c
index 7ec8ff2a..83e9e93e 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1797,7 +1797,7 @@ arena_i_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	}
 
 	if (oldp != NULL && oldlenp != NULL) {
-		size_t oldval = arena_decay_time_get(tsd_tsdn(tsd), arena);
+		size_t oldval = arena_decay_time_get(arena);
 		READ(oldval, ssize_t);
 	}
 	if (newp != NULL) {

From fd058f572baf0955091ed0dd66cca78105fdb539 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 1 Mar 2017 15:25:48 -0800
Subject: [PATCH 0687/2608] Immediately purge cached extents if decay_time is
 0.

This fixes a regression caused by
54269dc0ed3e4d04b2539016431de3cfe8330719 (Remove obsolete
arena_maybe_purge() call.), as well as providing a general fix.

This resolves #665.
---
 include/jemalloc/internal/arena_externs.h     |   3 -
 include/jemalloc/internal/private_symbols.txt |   2 -
 src/arena.c                                   |  69 ++++++------
 src/large.c                                   |   3 +-
 test/unit/decay.c                             | 105 +++++++++++++++++-
 5 files changed, 138 insertions(+), 44 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 7b16d229..36d91869 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -33,8 +33,6 @@ extent_t	*arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena,
     size_t usize, size_t alignment, bool *zero);
 void	arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena,
     extent_t *extent);
-void	arena_extent_dalloc_large_finish(tsdn_t *tsdn, arena_t *arena,
-    extent_t *extent);
 void	arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena,
     extent_t *extent, size_t oldsize);
 void	arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena,
@@ -42,7 +40,6 @@ void	arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena,
 ssize_t	arena_decay_time_get(arena_t *arena);
 bool	arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time);
 void	arena_purge(tsdn_t *tsdn, arena_t *arena, bool all);
-void	arena_maybe_purge(tsdn_t *tsdn, arena_t *arena);
 void	arena_reset(tsd_t *tsd, arena_t *arena);
 void	arena_destroy(tsd_t *tsd, arena_t *arena);
 void	arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena,
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index be56e1a2..0234181e 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -26,7 +26,6 @@ arena_dss_prec_get
 arena_dss_prec_set
 arena_extent_alloc_large
 arena_extent_cache_dalloc
-arena_extent_dalloc_large_finish
 arena_extent_dalloc_large_prep
 arena_extent_ralloc_large_expand
 arena_extent_ralloc_large_shrink
@@ -40,7 +39,6 @@ arena_internal_get
 arena_internal_sub
 arena_malloc
 arena_malloc_hard
-arena_maybe_purge
 arena_migrate
 arena_new
 arena_nthreads_dec
diff --git a/src/arena.c b/src/arena.c
index 9f395769..ecb5cd42 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -259,7 +259,9 @@ arena_extent_cache_dalloc(tsdn_t *tsdn, arena_t *arena,
 	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 
 	extent_dalloc_cache(tsdn, arena, r_extent_hooks, extent);
-	arena_purge(tsdn, arena, false);
+	if (arena_decay_time_get(arena) == 0) {
+		arena_purge(tsdn, arena, true);
+	}
 }
 
 JEMALLOC_INLINE_C void *
@@ -456,13 +458,6 @@ arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
 	arena_nactive_sub(arena, extent_size_get(extent) >> LG_PAGE);
 }
 
-void
-arena_extent_dalloc_large_finish(tsdn_t *tsdn, arena_t *arena,
-    extent_t *extent) {
-	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
-	extent_dalloc_cache(tsdn, arena, &extent_hooks, extent);
-}
-
 void
 arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
     size_t oldusize) {
@@ -663,34 +658,7 @@ arena_decay_time_valid(ssize_t decay_time) {
 	return false;
 }
 
-ssize_t
-arena_decay_time_get(arena_t *arena) {
-	return arena_decay_time_read(arena);
-}
-
-bool
-arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time) {
-	if (!arena_decay_time_valid(decay_time)) {
-		return true;
-	}
-
-	malloc_mutex_lock(tsdn, &arena->decay.mtx);
-	/*
-	 * Restart decay backlog from scratch, which may cause many dirty pages
-	 * to be immediately purged.  It would conceptually be possible to map
-	 * the old backlog onto the new backlog, but there is no justification
-	 * for such complexity since decay_time changes are intended to be
-	 * infrequent, either between the {-1, 0, >0} states, or a one-time
-	 * arbitrary change during initial arena configuration.
-	 */
-	arena_decay_reinit(arena, decay_time);
-	arena_maybe_purge(tsdn, arena);
-	malloc_mutex_unlock(tsdn, &arena->decay.mtx);
-
-	return false;
-}
-
-void
+static void
 arena_maybe_purge(tsdn_t *tsdn, arena_t *arena) {
 	malloc_mutex_assert_owner(tsdn, &arena->decay.mtx);
 
@@ -735,6 +703,33 @@ arena_maybe_purge(tsdn_t *tsdn, arena_t *arena) {
 	}
 }
 
+ssize_t
+arena_decay_time_get(arena_t *arena) {
+	return arena_decay_time_read(arena);
+}
+
+bool
+arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time) {
+	if (!arena_decay_time_valid(decay_time)) {
+		return true;
+	}
+
+	malloc_mutex_lock(tsdn, &arena->decay.mtx);
+	/*
+	 * Restart decay backlog from scratch, which may cause many dirty pages
+	 * to be immediately purged.  It would conceptually be possible to map
+	 * the old backlog onto the new backlog, but there is no justification
+	 * for such complexity since decay_time changes are intended to be
+	 * infrequent, either between the {-1, 0, >0} states, or a one-time
+	 * arbitrary change during initial arena configuration.
+	 */
+	arena_decay_reinit(arena, decay_time);
+	arena_maybe_purge(tsdn, arena);
+	malloc_mutex_unlock(tsdn, &arena->decay.mtx);
+
+	return false;
+}
+
 static size_t
 arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
     size_t ndirty_limit, extent_list_t *purge_extents) {
@@ -846,7 +841,7 @@ arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *slab) {
 	arena_nactive_sub(arena, extent_size_get(slab) >> LG_PAGE);
 
 	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
-	extent_dalloc_cache(tsdn, arena, &extent_hooks, slab);
+	arena_extent_cache_dalloc(tsdn, arena, &extent_hooks, slab);
 }
 
 static void
diff --git a/src/large.c b/src/large.c
index bb638499..e9536bca 100644
--- a/src/large.c
+++ b/src/large.c
@@ -319,7 +319,8 @@ large_dalloc_prep_impl(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 
 static void
 large_dalloc_finish_impl(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
-	arena_extent_dalloc_large_finish(tsdn, arena, extent);
+	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
+	arena_extent_cache_dalloc(tsdn, arena, &extent_hooks, extent);
 }
 
 void
diff --git a/test/unit/decay.c b/test/unit/decay.c
index 98453221..2513dbd4 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -348,10 +348,113 @@ TEST_BEGIN(test_decay_nonmonotonic) {
 }
 TEST_END
 
+static unsigned
+do_arena_create(ssize_t decay_time) {
+	unsigned arena_ind;
+	size_t sz = sizeof(unsigned);
+	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
+	size_t mib[3];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+	assert_d_eq(mallctlnametomib("arena.0.decay_time", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[1] = (size_t)arena_ind;
+	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&decay_time,
+	    sizeof(decay_time)), 0, "Unexpected mallctlbymib() failure");
+	return arena_ind;
+}
+
+static void
+do_arena_destroy(unsigned arena_ind) {
+	size_t mib[3];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+	assert_d_eq(mallctlnametomib("arena.0.destroy", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[1] = (size_t)arena_ind;
+	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+}
+
+void
+do_epoch(void) {
+	uint64_t epoch = 1;
+	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	    0, "Unexpected mallctl() failure");
+}
+
+static size_t
+get_arena_pdirty(unsigned arena_ind) {
+	do_epoch();
+	size_t mib[4];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+	assert_d_eq(mallctlnametomib("stats.arenas.0.pdirty", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[2] = (size_t)arena_ind;
+	size_t pdirty;
+	size_t sz = sizeof(pdirty);
+	assert_d_eq(mallctlbymib(mib, miblen, (void *)&pdirty, &sz, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+	return pdirty;
+}
+
+static void *
+do_mallocx(size_t size, int flags) {
+	void *p = mallocx(size, flags);
+	assert_ptr_not_null(p, "Unexpected mallocx() failure");
+	return p;
+}
+
+static void
+generate_dirty(unsigned arena_ind, size_t size) {
+	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+	void *p = do_mallocx(size, flags);
+	dallocx(p, flags);
+}
+
+TEST_BEGIN(test_decay_now) {
+	unsigned arena_ind = do_arena_create(0);
+	assert_zu_eq(get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages");
+	size_t sizes[] = {16, PAGE<<2, HUGEPAGE<<2};
+	/* Verify that dirty pages never linger after deallocation. */
+	for (unsigned i = 0; i < sizeof(sizes)/sizeof(size_t); i++) {
+		size_t size = sizes[i];
+		generate_dirty(arena_ind, size);
+		assert_zu_eq(get_arena_pdirty(arena_ind), 0,
+		    "Unexpected dirty pages");
+	}
+	do_arena_destroy(arena_ind);
+}
+TEST_END
+
+TEST_BEGIN(test_decay_never) {
+	unsigned arena_ind = do_arena_create(-1);
+	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+	assert_zu_eq(get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages");
+	size_t sizes[] = {16, PAGE<<2, HUGEPAGE<<2};
+	void *ptrs[sizeof(sizes)/sizeof(size_t)];
+	for (unsigned i = 0; i < sizeof(sizes)/sizeof(size_t); i++) {
+		ptrs[i] = do_mallocx(sizes[i], flags);
+	}
+	/* Verify that each deallocation generates additional dirty pages. */
+	size_t pdirty_prev = get_arena_pdirty(arena_ind);
+	assert_zu_eq(pdirty_prev, 0, "Unexpected dirty pages");
+	for (unsigned i = 0; i < sizeof(sizes)/sizeof(size_t); i++) {
+		dallocx(ptrs[i], flags);
+		size_t pdirty = get_arena_pdirty(arena_ind);
+		assert_zu_gt(pdirty, pdirty_prev,
+		    "Expected dirty pages to increase.");
+		pdirty_prev = pdirty;
+	}
+	do_arena_destroy(arena_ind);
+}
+TEST_END
+
 int
 main(void) {
 	return test(
 	    test_decay_ticks,
 	    test_decay_ticker,
-	    test_decay_nonmonotonic);
+	    test_decay_nonmonotonic,
+	    test_decay_now,
+	    test_decay_never);
 }

From 957b8c5f2171f54f66689875144830e682be8e64 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 27 Feb 2017 17:33:38 -0800
Subject: [PATCH 0688/2608] Stop #define-ining away 'inline'

In the long term, we'll transition to C99-style inline semantics.  In the
short-term, this will allow both styles to coexist without breaking one another.
---
 include/jemalloc/internal/jemalloc_internal_macros.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h
index b70d08a2..35a7a104 100644
--- a/include/jemalloc/internal/jemalloc_internal_macros.h
+++ b/include/jemalloc/internal/jemalloc_internal_macros.h
@@ -16,7 +16,6 @@
 #  define JEMALLOC_ALWAYS_INLINE_C static
 #  define JEMALLOC_INLINE
 #  define JEMALLOC_INLINE_C static
-#  define inline
 #else
 #  define JEMALLOC_ENABLE_INLINE
 #  ifdef JEMALLOC_HAVE_ATTR

From d4ac7582f32f506d5203bea2f0115076202add38 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 25 Jan 2017 09:54:27 -0800
Subject: [PATCH 0689/2608] Introduce a backport of C11 atomics

This introduces a backport of C11 atomics.  It has four implementations; ranked
in order of preference, they are:
- GCC/Clang __atomic builtins
- GCC/Clang __sync builtins
- MSVC _Interlocked builtins
- C11 atomics, from <stdatomic.h>

The primary advantages are:
- Close adherence to the standard API gives us a defined memory model.
- Type safety: atomic objects are now separate types from non-atomic ones, so
  that it's impossible to mix up atomic and non-atomic updates (which is
  undefined behavior that compilers are starting to take advantage of).
- Efficiency: we can specify ordering for operations, avoiding fences and
  atomic operations on strongly ordered architectures (example:
  `atomic_write_u32(ptr, val);` involves a CAS loop, whereas
  `atomic_store(ptr, val, ATOMIC_RELEASE);` is a plain store.

This diff leaves in the current atomics API (implementing them in terms of the
backport).  This lets us transition uses over piecemeal.

Testing:
This is by nature hard to test. I've manually tested the first three options on
Linux on gcc by futzing with the #defines manually, on freebsd with gcc and
clang, on MSVC, and on OS X with clang.  All of these were x86 machines though,
and we don't have any test infrastructure set up for non-x86 platforms.
---
 Makefile.in                                   |   1 -
 configure.ac                                  |  52 +-
 include/jemalloc/internal/atomic.h            | 111 ++++
 include/jemalloc/internal/atomic_c11.h        |  97 ++++
 include/jemalloc/internal/atomic_externs.h    |  12 -
 include/jemalloc/internal/atomic_gcc_atomic.h | 125 +++++
 include/jemalloc/internal/atomic_gcc_sync.h   | 191 +++++++
 include/jemalloc/internal/atomic_inlines.h    | 525 ------------------
 include/jemalloc/internal/atomic_msvc.h       | 158 ++++++
 include/jemalloc/internal/atomic_types.h      |   8 -
 .../jemalloc/internal/jemalloc_internal.h.in  |  22 +-
 .../internal/jemalloc_internal_defs.h.in      |  13 +-
 include/jemalloc/internal/private_symbols.txt |  20 -
 src/atomic.c                                  |   2 -
 test/unit/atomic.c                            | 282 +++++++---
 15 files changed, 947 insertions(+), 672 deletions(-)
 create mode 100644 include/jemalloc/internal/atomic.h
 create mode 100644 include/jemalloc/internal/atomic_c11.h
 delete mode 100644 include/jemalloc/internal/atomic_externs.h
 create mode 100644 include/jemalloc/internal/atomic_gcc_atomic.h
 create mode 100644 include/jemalloc/internal/atomic_gcc_sync.h
 delete mode 100644 include/jemalloc/internal/atomic_inlines.h
 create mode 100644 include/jemalloc/internal/atomic_msvc.h
 delete mode 100644 include/jemalloc/internal/atomic_types.h
 delete mode 100644 src/atomic.c

diff --git a/Makefile.in b/Makefile.in
index 76a73b76..53ebe32e 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -90,7 +90,6 @@ BINS := $(objroot)bin/jemalloc-config $(objroot)bin/jemalloc.sh $(objroot)bin/je
 C_HDRS := $(objroot)include/jemalloc/jemalloc$(install_suffix).h
 C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/arena.c \
-	$(srcroot)src/atomic.c \
 	$(srcroot)src/base.c \
 	$(srcroot)src/bitmap.c \
 	$(srcroot)src/ckh.c \
diff --git a/configure.ac b/configure.ac
index 1653fe7f..0095caf1 100644
--- a/configure.ac
+++ b/configure.ac
@@ -550,7 +550,7 @@ case "${host}" in
 	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
 	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ])
 	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
-	AC_DEFINE([JEMALLOC_C11ATOMICS])
+	AC_DEFINE([JEMALLOC_C11_ATOMICS])
 	force_tls="0"
 	default_munmap="0"
 	;;
@@ -1730,36 +1730,44 @@ JE_COMPILABLE([C11 atomics], [
     volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
     uint64_t r = atomic_fetch_add(a, x) + x;
     return r == 0;
-], [je_cv_c11atomics])
-if test "x${je_cv_c11atomics}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_C11ATOMICS])
+], [je_cv_c11_atomics])
+if test "x${je_cv_c11_atomics}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_C11_ATOMICS])
 fi
 
 dnl ============================================================================
-dnl Check for atomic(9) operations as provided on FreeBSD.
+dnl Check for GCC-style __atomic atomics.
 
-JE_COMPILABLE([atomic(9)], [
-#include <sys/types.h>
-#include <machine/atomic.h>
-#include <inttypes.h>
+JE_COMPILABLE([GCC __atomic atomics], [
 ], [
-	{
-		uint32_t x32 = 0;
-		volatile uint32_t *x32p = &x32;
-		atomic_fetchadd_32(x32p, 1);
-	}
-	{
-		unsigned long xlong = 0;
-		volatile unsigned long *xlongp = &xlong;
-		atomic_fetchadd_long(xlongp, 1);
-	}
-], [je_cv_atomic9])
-if test "x${je_cv_atomic9}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_ATOMIC9])
+    int x = 0;
+    int val = 1;
+    int y = __atomic_fetch_add(&x, val, __ATOMIC_RELAXED);
+    int after_add = x;
+    return after_add == 1;
+], [je_cv_gcc_atomic_atomics])
+if test "x${je_cv_gcc_atomic_atomics}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_GCC_ATOMIC_ATOMICS])
+fi
+
+dnl ============================================================================
+dnl Check for GCC-style __sync atomics.
+
+JE_COMPILABLE([GCC __sync atomics], [
+], [
+    int x = 0;
+    int before_add = __sync_fetch_and_add(&x, 1);
+    int after_add = x;
+    return (before_add == 0) && (after_add == 1);
+], [je_cv_gcc_sync_atomics])
+if test "x${je_cv_gcc_sync_atomics}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_GCC_SYNC_ATOMICS])
 fi
 
 dnl ============================================================================
 dnl Check for atomic(3) operations as provided on Darwin.
+dnl We need this not for the atomic operations (which are provided above), but
+dnl rather for the OSSpinLock type it exposes.
 
 JE_COMPILABLE([Darwin OSAtomic*()], [
 #include <libkern/OSAtomic.h>
diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h
new file mode 100644
index 00000000..84fbbdfb
--- /dev/null
+++ b/include/jemalloc/internal/atomic.h
@@ -0,0 +1,111 @@
+#ifndef JEMALLOC_INTERNAL_ATOMIC_H
+#define JEMALLOC_INTERNAL_ATOMIC_H
+
+#define ATOMIC_INLINE static inline
+
+#if defined(JEMALLOC_GCC_ATOMIC_ATOMICS)
+#  include "jemalloc/internal/atomic_gcc_atomic.h"
+#elif defined(JEMALLOC_GCC_SYNC_ATOMICS)
+#  include "jemalloc/internal/atomic_gcc_sync.h"
+#elif defined(_MSC_VER)
+#  include "jemalloc/internal/atomic_msvc.h"
+#elif defined(JEMALLOC_C11_ATOMICS)
+#  include "jemalloc/internal/atomic_c11.h"
+#else
+#  error "Don't have atomics implemented on this platform."
+#endif
+
+/*
+ * This header gives more or less a backport of C11 atomics. The user can write
+ * JEMALLOC_GENERATE_ATOMICS(type, short_type, lg_sizeof_type); to generate
+ * counterparts of the C11 atomic functions for type, as so:
+ *   JEMALLOC_GENERATE_ATOMICS(int *, pi, 3);
+ * and then write things like:
+ *   int *some_ptr;
+ *   atomic_pi_t atomic_ptr_to_int;
+ *   atomic_store_pi(&atomic_ptr_to_int, some_ptr, ATOMIC_RELAXED);
+ *   int *prev_value = atomic_exchange_pi(&ptr_to_int, NULL, ATOMIC_ACQ_REL);
+ *   assert(some_ptr == prev_value);
+ * and expect things to work in the obvious way.
+ *
+ * Also included (with naming differences to avoid conflicts with the standard
+ * library):
+ *   atomic_fence(atomic_memory_order_t) (mimics C11's atomic_thread_fence).
+ *   ATOMIC_INIT (mimics C11's ATOMIC_VAR_INIT).
+ */
+
+/*
+ * Pure convenience, so that we don't have to type "atomic_memory_order_"
+ * quite so often.
+ */
+#define ATOMIC_RELAXED atomic_memory_order_relaxed
+#define ATOMIC_ACQUIRE atomic_memory_order_acquire,
+#define ATOMIC_RELEASE atomic_memory_order_release,
+#define ATOMIC_ACQ_REL atomic_memory_order_acq_rel,
+#define ATOMIC_SEQ_CST atomic_memory_order_seq_cst
+
+/*
+ * In order to let us transition atomics usage piecemeal (and reason locally
+ * about memory orders), we'll support the previous API for a while.
+ */
+#define JEMALLOC_GENERATE_COMPATABILITY_ATOMICS(type, short_type)	\
+ATOMIC_INLINE type							\
+atomic_read_##short_type(type *p) {					\
+	return atomic_load_##short_type ((atomic_##short_type##_t *)p,	\
+	    ATOMIC_SEQ_CST);						\
+}									\
+									\
+ATOMIC_INLINE void							\
+atomic_write_##short_type(type *p, const type val) {			\
+	atomic_store_##short_type((atomic_##short_type##_t *)p,		\
+	    (type)val, ATOMIC_SEQ_CST);					\
+}									\
+ATOMIC_INLINE bool							\
+atomic_cas_##short_type(type *p, type c, type s) {			\
+	/* Note the '!' -- atomic_cas inverts the usual semantics. */	\
+	return !atomic_compare_exchange_strong_##short_type(		\
+	    (atomic_##short_type##_t *)p, &c, s, ATOMIC_SEQ_CST,	\
+	    ATOMIC_SEQ_CST);						\
+}
+
+#define JEMALLOC_GENERATE_COMPATABILITY_INT_ATOMICS(type, short_type)	\
+JEMALLOC_GENERATE_COMPATABILITY_ATOMICS(type, short_type)		\
+									\
+ATOMIC_INLINE type							\
+atomic_add_##short_type(type *p, type x) {				\
+	return atomic_fetch_add_##short_type(				\
+	    (atomic_##short_type##_t *)p, x, ATOMIC_SEQ_CST) + x;	\
+}									\
+ATOMIC_INLINE type							\
+atomic_sub_##short_type(type *p, type x) {				\
+	return atomic_fetch_sub_##short_type(				\
+	    (atomic_##short_type##_t *)p, x, ATOMIC_SEQ_CST) - x;	\
+}
+
+JEMALLOC_GENERATE_ATOMICS(void *, p, LG_SIZEOF_PTR)
+JEMALLOC_GENERATE_COMPATABILITY_ATOMICS(void *, p)
+
+/*
+ * There's no actual guarantee that sizeof(bool) == 1, but it's true on the only
+ * platform that actually needs to know the size, MSVC.
+ */
+JEMALLOC_GENERATE_ATOMICS(bool, b, 0)
+JEMALLOC_GENERATE_COMPATABILITY_ATOMICS(bool, b)
+
+JEMALLOC_GENERATE_INT_ATOMICS(unsigned, u, LG_SIZEOF_INT)
+JEMALLOC_GENERATE_COMPATABILITY_INT_ATOMICS(unsigned, u)
+
+JEMALLOC_GENERATE_INT_ATOMICS(size_t, zu, LG_SIZEOF_PTR)
+JEMALLOC_GENERATE_COMPATABILITY_INT_ATOMICS(size_t, zu)
+
+JEMALLOC_GENERATE_INT_ATOMICS(uint32_t, u32, 2)
+JEMALLOC_GENERATE_COMPATABILITY_INT_ATOMICS(uint32_t, u32)
+
+#  if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
+JEMALLOC_GENERATE_INT_ATOMICS(uint64_t, u64, 3)
+JEMALLOC_GENERATE_COMPATABILITY_INT_ATOMICS(uint64_t, u64)
+#  endif
+
+#undef ATOMIC_INLINE
+
+#endif /* JEMALLOC_INTERNAL_ATOMIC_H */
diff --git a/include/jemalloc/internal/atomic_c11.h b/include/jemalloc/internal/atomic_c11.h
new file mode 100644
index 00000000..a5f9313a
--- /dev/null
+++ b/include/jemalloc/internal/atomic_c11.h
@@ -0,0 +1,97 @@
+#ifndef JEMALLOC_INTERNAL_ATOMIC_C11_H
+#define JEMALLOC_INTERNAL_ATOMIC_C11_H
+
+#include <stdatomic.h>
+
+#define ATOMIC_INIT(...) ATOMIC_VAR_INIT(__VA_ARGS__)
+
+#define atomic_memory_order_t memory_order
+#define atomic_memory_order_relaxed memory_order_relaxed
+#define atomic_memory_order_acquire memory_order_acquire
+#define atomic_memory_order_release memory_order_release
+#define atomic_memory_order_acq_rel memory_order_acq_rel
+#define atomic_memory_order_seq_cst memory_order_seq_cst
+
+#define atomic_fence atomic_thread_fence
+
+#define JEMALLOC_GENERATE_ATOMICS(type, short_type,			\
+    /* unused */ lg_size)						\
+typedef _Atomic(type) atomic_##short_type##_t;				\
+									\
+ATOMIC_INLINE type							\
+atomic_load_##short_type(const atomic_##short_type##_t *a,		\
+    atomic_memory_order_t mo) {						\
+	/*								\
+	 * A strict interpretation of the C standard prevents		\
+	 * atomic_load from taking a const argument, but it's		\
+	 * convenient for our purposes. This cast is a workaround.	\
+	 */								\
+	atomic_##short_type##_t* a_nonconst =				\
+	    (atomic_##short_type##_t*)a;				\
+	return atomic_load_explicit(a_nonconst, mo);			\
+}									\
+									\
+ATOMIC_INLINE void							\
+atomic_store_##short_type(atomic_##short_type##_t *a,			\
+    type val, atomic_memory_order_t mo) {				\
+	atomic_store_explicit(a, val, mo);				\
+}									\
+									\
+ATOMIC_INLINE type							\
+atomic_exchange_##short_type(atomic_##short_type##_t *a, type val,	\
+    atomic_memory_order_t mo) {						\
+	return atomic_exchange_explicit(a, val, mo);			\
+}									\
+									\
+ATOMIC_INLINE bool							\
+atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a,	\
+    type *expected, type desired, atomic_memory_order_t success_mo,	\
+    atomic_memory_order_t failure_mo) {					\
+	return atomic_compare_exchange_weak_explicit(a, expected,	\
+	    desired, success_mo, failure_mo);				\
+}									\
+									\
+ATOMIC_INLINE bool							\
+atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a,	\
+    type *expected, type desired, atomic_memory_order_t success_mo,	\
+    atomic_memory_order_t failure_mo) {					\
+	return atomic_compare_exchange_strong_explicit(a, expected,	\
+	    desired, success_mo, failure_mo);				\
+}
+
+/*
+ * Integral types have some special operations available that non-integral ones
+ * lack.
+ */
+#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, 		\
+    /* unused */ lg_size)						\
+JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size)	\
+									\
+ATOMIC_INLINE type							\
+atomic_fetch_add_##short_type(atomic_##short_type##_t *a,		\
+    type val, atomic_memory_order_t mo) {				\
+	return atomic_fetch_add_explicit(a, val, mo);			\
+}									\
+									\
+ATOMIC_INLINE type							\
+atomic_fetch_sub_##short_type(atomic_##short_type##_t *a,		\
+    type val, atomic_memory_order_t mo) {				\
+	return atomic_fetch_sub_explicit(a, val, mo);			\
+}									\
+ATOMIC_INLINE type							\
+atomic_fetch_and_##short_type(atomic_##short_type##_t *a,		\
+    type val, atomic_memory_order_t mo) {				\
+	return atomic_fetch_and_explicit(a, val, mo);			\
+}									\
+ATOMIC_INLINE type							\
+atomic_fetch_or_##short_type(atomic_##short_type##_t *a,		\
+    type val, atomic_memory_order_t mo) {				\
+	return atomic_fetch_or_explicit(a, val, mo);			\
+}									\
+ATOMIC_INLINE type							\
+atomic_fetch_xor_##short_type(atomic_##short_type##_t *a,		\
+    type val, atomic_memory_order_t mo) {				\
+	return atomic_fetch_xor_explicit(a, val, mo);			\
+}
+
+#endif /* JEMALLOC_INTERNAL_ATOMIC_C11_H */
diff --git a/include/jemalloc/internal/atomic_externs.h b/include/jemalloc/internal/atomic_externs.h
deleted file mode 100644
index 09f06408..00000000
--- a/include/jemalloc/internal/atomic_externs.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_ATOMIC_EXTERNS_H
-#define JEMALLOC_INTERNAL_ATOMIC_EXTERNS_H
-
-#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
-#define atomic_read_u64(p)	atomic_add_u64(p, 0)
-#endif
-#define atomic_read_u32(p)	atomic_add_u32(p, 0)
-#define atomic_read_p(p)	atomic_add_p(p, NULL)
-#define atomic_read_zu(p)	atomic_add_zu(p, 0)
-#define atomic_read_u(p)	atomic_add_u(p, 0)
-
-#endif /* JEMALLOC_INTERNAL_ATOMIC_EXTERNS_H */
diff --git a/include/jemalloc/internal/atomic_gcc_atomic.h b/include/jemalloc/internal/atomic_gcc_atomic.h
new file mode 100644
index 00000000..3d13b4a6
--- /dev/null
+++ b/include/jemalloc/internal/atomic_gcc_atomic.h
@@ -0,0 +1,125 @@
+#ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_ATOMIC_H
+#define JEMALLOC_INTERNAL_ATOMIC_GCC_ATOMIC_H
+
+#define ATOMIC_INIT(...) {__VA_ARGS__}
+
+typedef enum {
+	atomic_memory_order_relaxed,
+	atomic_memory_order_acquire,
+	atomic_memory_order_release,
+	atomic_memory_order_acq_rel,
+	atomic_memory_order_seq_cst
+} atomic_memory_order_t;
+
+ATOMIC_INLINE int
+atomic_enum_to_builtin(atomic_memory_order_t mo) {
+	switch (mo) {
+	case atomic_memory_order_relaxed:
+		return __ATOMIC_RELAXED;
+	case atomic_memory_order_acquire:
+		return __ATOMIC_ACQUIRE;
+	case atomic_memory_order_release:
+		return __ATOMIC_RELEASE;
+	case atomic_memory_order_acq_rel:
+		return __ATOMIC_ACQ_REL;
+	case atomic_memory_order_seq_cst:
+		return __ATOMIC_SEQ_CST;
+	}
+	/* Can't actually happen; the switch is exhaustive. */
+	return __ATOMIC_SEQ_CST;
+}
+
+ATOMIC_INLINE void
+atomic_fence(atomic_memory_order_t mo) {
+	__atomic_thread_fence(atomic_enum_to_builtin(mo));
+}
+
+#define JEMALLOC_GENERATE_ATOMICS(type, short_type,			\
+    /* unused */ lg_size)						\
+typedef struct {							\
+	type repr;							\
+} atomic_##short_type##_t;						\
+									\
+ATOMIC_INLINE type							\
+atomic_load_##short_type(const atomic_##short_type##_t *a,		\
+    atomic_memory_order_t mo) {						\
+	type result;							\
+	__atomic_load(&a->repr, &result, atomic_enum_to_builtin(mo));	\
+	return result;							\
+}									\
+									\
+ATOMIC_INLINE void							\
+atomic_store_##short_type(atomic_##short_type##_t *a, type val,		\
+    atomic_memory_order_t mo) {						\
+	__atomic_store(&a->repr, &val, atomic_enum_to_builtin(mo));	\
+}									\
+									\
+ATOMIC_INLINE type							\
+atomic_exchange_##short_type(atomic_##short_type##_t *a, type val,	\
+    atomic_memory_order_t mo) {						\
+	type result;							\
+	__atomic_exchange(&a->repr, &val, &result,			\
+	    atomic_enum_to_builtin(mo));				\
+	return result;							\
+}									\
+									\
+ATOMIC_INLINE bool							\
+atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a,	\
+    type *expected, type desired, atomic_memory_order_t success_mo,	\
+    atomic_memory_order_t failure_mo) {					\
+	return __atomic_compare_exchange(&a->repr, expected, &desired,	\
+	    true, atomic_enum_to_builtin(success_mo),			\
+	    atomic_enum_to_builtin(failure_mo));			\
+}									\
+									\
+ATOMIC_INLINE bool							\
+atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a,	\
+    type *expected, type desired, atomic_memory_order_t success_mo,	\
+    atomic_memory_order_t failure_mo) {					\
+	return __atomic_compare_exchange(&a->repr, expected, &desired,	\
+	    false,							\
+	    atomic_enum_to_builtin(success_mo),				\
+	    atomic_enum_to_builtin(failure_mo));			\
+}
+
+
+#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type,			\
+    /* unused */ lg_size)						\
+JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size)	\
+									\
+ATOMIC_INLINE type							\
+atomic_fetch_add_##short_type(atomic_##short_type##_t *a, type val,	\
+    atomic_memory_order_t mo) {						\
+	return __atomic_fetch_add(&a->repr, val,			\
+	    atomic_enum_to_builtin(mo));				\
+}									\
+									\
+ATOMIC_INLINE type							\
+atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, type val,	\
+    atomic_memory_order_t mo) {						\
+	return __atomic_fetch_sub(&a->repr, val,			\
+	    atomic_enum_to_builtin(mo));				\
+}									\
+									\
+ATOMIC_INLINE type							\
+atomic_fetch_and_##short_type(atomic_##short_type##_t *a, type val,	\
+    atomic_memory_order_t mo) {						\
+	return __atomic_fetch_and(&a->repr, val,			\
+	    atomic_enum_to_builtin(mo));				\
+}									\
+									\
+ATOMIC_INLINE type							\
+atomic_fetch_or_##short_type(atomic_##short_type##_t *a, type val,	\
+    atomic_memory_order_t mo) {						\
+	return __atomic_fetch_or(&a->repr, val,				\
+	    atomic_enum_to_builtin(mo));				\
+}									\
+									\
+ATOMIC_INLINE type							\
+atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val,	\
+    atomic_memory_order_t mo) {						\
+	return __atomic_fetch_xor(&a->repr, val,			\
+	    atomic_enum_to_builtin(mo));				\
+}
+
+#endif /* JEMALLOC_INTERNAL_ATOMIC_GCC_ATOMIC_H */
diff --git a/include/jemalloc/internal/atomic_gcc_sync.h b/include/jemalloc/internal/atomic_gcc_sync.h
new file mode 100644
index 00000000..30846e4d
--- /dev/null
+++ b/include/jemalloc/internal/atomic_gcc_sync.h
@@ -0,0 +1,191 @@
+#ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
+#define JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
+
+#define ATOMIC_INIT(...) {__VA_ARGS__}
+
+typedef enum {
+	atomic_memory_order_relaxed,
+	atomic_memory_order_acquire,
+	atomic_memory_order_release,
+	atomic_memory_order_acq_rel,
+	atomic_memory_order_seq_cst
+} atomic_memory_order_t;
+
+ATOMIC_INLINE void
+atomic_fence(atomic_memory_order_t mo) {
+	/* Easy cases first: no barrier, and full barrier. */
+	if (mo == atomic_memory_order_relaxed) {
+		asm volatile("" ::: "memory");
+		return;
+	}
+	if (mo == atomic_memory_order_seq_cst) {
+		asm volatile("" ::: "memory");
+		__sync_synchronize();
+		asm volatile("" ::: "memory");
+		return;
+	}
+	asm volatile("" ::: "memory");
+#  if defined(__i386__) || defined(__x86_64__)
+	/* This is implicit on x86. */
+#  elif defined(__ppc__)
+	asm volatile("lwsync");
+#  elif defined(__sparc__) && defined(__arch64__)
+	if (mo == atomic_memory_order_acquire) {
+		asm volatile("membar #LoadLoad | #LoadStore");
+	} else if (mo == atomic_memory_order_release) {
+		asm volatile("membar #LoadStore | #StoreStore");
+	} else {
+		asm volatile("membar #LoadLoad | #LoadStore | #StoreStore");
+	}
+#  else
+	__sync_synchronize();
+#  endif
+	asm volatile("" ::: "memory");
+}
+
+/*
+ * A correct implementation of seq_cst loads and stores on weakly ordered
+ * architectures could do either of the following:
+ *   1. store() is weak-fence -> store -> strong fence, load() is load ->
+ *      strong-fence.
+ *   2. store() is strong-fence -> store, load() is strong-fence -> load ->
+ *      weak-fence.
+ * The tricky thing is, load() and store() above can be the load or store
+ * portions of a gcc __sync builtin, so we have to follow GCC's lead, which
+ * means going with strategy 2.
+ * On strongly ordered architectures, the natural strategy is to stick a strong
+ * fence after seq_cst stores, and have naked loads.  So we want the strong
+ * fences in different places on different architectures.
+ * atomic_pre_sc_load_fence and atomic_post_sc_store_fence allow us to
+ * accomplish this.
+ */
+
+ATOMIC_INLINE void
+atomic_pre_sc_load_fence() {
+#  if defined(__i386__) || defined(__x86_64__) ||			\
+    (defined(__sparc__) && defined(__arch64__))
+	atomic_fence(atomic_memory_order_relaxed);
+#  else
+	atomic_fence(atomic_memory_order_seq_cst);
+#  endif
+}
+
+ATOMIC_INLINE void
+atomic_post_sc_store_fence() {
+#  if defined(__i386__) || defined(__x86_64__) ||			\
+    (defined(__sparc__) && defined(__arch64__))
+	atomic_fence(atomic_memory_order_seq_cst);
+#  else
+	atomic_fence(atomic_memory_order_relaxed);
+#  endif
+
+}
+
+#define JEMALLOC_GENERATE_ATOMICS(type, short_type,			\
+    /* unused */ lg_size)						\
+typedef struct {							\
+	type volatile repr;						\
+} atomic_##short_type##_t;						\
+									\
+ATOMIC_INLINE type							\
+atomic_load_##short_type(const atomic_##short_type##_t *a,		\
+    atomic_memory_order_t mo) {						\
+	if (mo == atomic_memory_order_seq_cst) {			\
+		atomic_pre_sc_load_fence();				\
+	}								\
+	type result = a->repr;						\
+	if (mo != atomic_memory_order_relaxed) {			\
+		atomic_fence(atomic_memory_order_acquire);		\
+	}								\
+	return result;							\
+}									\
+									\
+ATOMIC_INLINE void							\
+atomic_store_##short_type(atomic_##short_type##_t *a,			\
+    type val, atomic_memory_order_t mo) {				\
+	if (mo != atomic_memory_order_relaxed) {			\
+		atomic_fence(atomic_memory_order_release);		\
+	}								\
+	a->repr = val;							\
+	if (mo == atomic_memory_order_seq_cst) {			\
+		atomic_post_sc_store_fence();				\
+	}								\
+}									\
+									\
+ATOMIC_INLINE type							\
+atomic_exchange_##short_type(atomic_##short_type##_t *a, type val,	\
+    atomic_memory_order_t mo) {						\
+	/*								\
+	 * Because of FreeBSD, we care about gcc 4.2, which doesn't have\
+	 * an atomic exchange builtin.  We fake it with a CAS loop.	\
+	 */								\
+	while (true) {							\
+		type old = a->repr;					\
+		if (__sync_bool_compare_and_swap(&a->repr, old, val)) {	\
+			return old;					\
+		}							\
+	}								\
+}									\
+									\
+ATOMIC_INLINE bool							\
+atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a,	\
+    type *expected, type desired, atomic_memory_order_t success_mo,	\
+    atomic_memory_order_t failure_mo) {					\
+	type prev = __sync_val_compare_and_swap(&a->repr, *expected,	\
+	    desired);							\
+	if (prev == *expected) {					\
+		return true;						\
+	} else {							\
+		*expected = prev;					\
+		return false;						\
+	}								\
+}									\
+ATOMIC_INLINE bool							\
+atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a,	\
+    type *expected, type desired, atomic_memory_order_t success_mo,	\
+    atomic_memory_order_t failure_mo) {					\
+	type prev = __sync_val_compare_and_swap(&a->repr, *expected,	\
+	    desired);							\
+	if (prev == *expected) {					\
+		return true;						\
+	} else {							\
+		*expected = prev;					\
+		return false;						\
+	}								\
+}
+
+#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type,			\
+    /* unused */ lg_size)						\
+JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size)	\
+									\
+ATOMIC_INLINE type							\
+atomic_fetch_add_##short_type(atomic_##short_type##_t *a, type val,	\
+    atomic_memory_order_t mo) {						\
+	return __sync_fetch_and_add(&a->repr, val);			\
+}									\
+									\
+ATOMIC_INLINE type							\
+atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, type val,	\
+    atomic_memory_order_t mo) {						\
+	return __sync_fetch_and_sub(&a->repr, val);			\
+}									\
+									\
+ATOMIC_INLINE type							\
+atomic_fetch_and_##short_type(atomic_##short_type##_t *a, type val,	\
+    atomic_memory_order_t mo) {						\
+	return __sync_fetch_and_and(&a->repr, val);			\
+}									\
+									\
+ATOMIC_INLINE type							\
+atomic_fetch_or_##short_type(atomic_##short_type##_t *a, type val,	\
+    atomic_memory_order_t mo) {						\
+	return __sync_fetch_and_or(&a->repr, val);			\
+}									\
+									\
+ATOMIC_INLINE type							\
+atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val,	\
+    atomic_memory_order_t mo) {						\
+	return __sync_fetch_and_xor(&a->repr, val);			\
+}
+
+#endif /* JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H */
diff --git a/include/jemalloc/internal/atomic_inlines.h b/include/jemalloc/internal/atomic_inlines.h
deleted file mode 100644
index de66d57d..00000000
--- a/include/jemalloc/internal/atomic_inlines.h
+++ /dev/null
@@ -1,525 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_ATOMIC_INLINES_H
-#define JEMALLOC_INTERNAL_ATOMIC_INLINES_H
-
-/*
- * All arithmetic functions return the arithmetic result of the atomic
- * operation.  Some atomic operation APIs return the value prior to mutation, in
- * which case the following functions must redundantly compute the result so
- * that it can be returned.  These functions are normally inlined, so the extra
- * operations can be optimized away if the return values aren't used by the
- * callers.
- *
- *   <t> atomic_read_<t>(<t> *p) { return *p; }
- *   <t> atomic_add_<t>(<t> *p, <t> x) { return *p += x; }
- *   <t> atomic_sub_<t>(<t> *p, <t> x) { return *p -= x; }
- *   bool atomic_cas_<t>(<t> *p, <t> c, <t> s)
- *   {
- *     if (*p != c)
- *       return true;
- *     *p = s;
- *     return false;
- *   }
- *   void atomic_write_<t>(<t> *p, <t> x) { *p = x; }
- */
-
-#ifndef JEMALLOC_ENABLE_INLINE
-#  ifdef JEMALLOC_ATOMIC_U64
-uint64_t	atomic_add_u64(uint64_t *p, uint64_t x);
-uint64_t	atomic_sub_u64(uint64_t *p, uint64_t x);
-bool	atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s);
-void	atomic_write_u64(uint64_t *p, uint64_t x);
-#  endif
-uint32_t	atomic_add_u32(uint32_t *p, uint32_t x);
-uint32_t	atomic_sub_u32(uint32_t *p, uint32_t x);
-bool	atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s);
-void	atomic_write_u32(uint32_t *p, uint32_t x);
-void	*atomic_add_p(void **p, void *x);
-void	*atomic_sub_p(void **p, void *x);
-bool	atomic_cas_p(void **p, void *c, void *s);
-void	atomic_write_p(void **p, const void *x);
-size_t	atomic_add_zu(size_t *p, size_t x);
-size_t	atomic_sub_zu(size_t *p, size_t x);
-bool	atomic_cas_zu(size_t *p, size_t c, size_t s);
-void	atomic_write_zu(size_t *p, size_t x);
-unsigned	atomic_add_u(unsigned *p, unsigned x);
-unsigned	atomic_sub_u(unsigned *p, unsigned x);
-bool	atomic_cas_u(unsigned *p, unsigned c, unsigned s);
-void	atomic_write_u(unsigned *p, unsigned x);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ATOMIC_C_))
-/******************************************************************************/
-/* 64-bit operations. */
-#ifdef JEMALLOC_ATOMIC_U64
-#  if (defined(__amd64__) || defined(__x86_64__))
-JEMALLOC_INLINE uint64_t
-atomic_add_u64(uint64_t *p, uint64_t x) {
-	uint64_t t = x;
-
-	asm volatile (
-	    "lock; xaddq %0, %1;"
-	    : "+r" (t), "=m" (*p) /* Outputs. */
-	    : "m" (*p) /* Inputs. */
-	    );
-
-	return t + x;
-}
-
-JEMALLOC_INLINE uint64_t
-atomic_sub_u64(uint64_t *p, uint64_t x) {
-	uint64_t t;
-
-	x = (uint64_t)(-(int64_t)x);
-	t = x;
-	asm volatile (
-	    "lock; xaddq %0, %1;"
-	    : "+r" (t), "=m" (*p) /* Outputs. */
-	    : "m" (*p) /* Inputs. */
-	    );
-
-	return t + x;
-}
-
-JEMALLOC_INLINE bool
-atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) {
-	uint8_t success;
-
-	asm volatile (
-	    "lock; cmpxchgq %4, %0;"
-	    "sete %1;"
-	    : "=m" (*p), "=a" (success) /* Outputs. */
-	    : "m" (*p), "a" (c), "r" (s) /* Inputs. */
-	    : "memory" /* Clobbers. */
-	    );
-
-	return !(bool)success;
-}
-
-JEMALLOC_INLINE void
-atomic_write_u64(uint64_t *p, uint64_t x) {
-	asm volatile (
-	    "xchgq %1, %0;" /* Lock is implied by xchgq. */
-	    : "=m" (*p), "+r" (x) /* Outputs. */
-	    : "m" (*p) /* Inputs. */
-	    : "memory" /* Clobbers. */
-	    );
-}
-#  elif (defined(JEMALLOC_C11ATOMICS))
-JEMALLOC_INLINE uint64_t
-atomic_add_u64(uint64_t *p, uint64_t x) {
-	volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
-	return atomic_fetch_add(a, x) + x;
-}
-
-JEMALLOC_INLINE uint64_t
-atomic_sub_u64(uint64_t *p, uint64_t x) {
-	volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
-	return atomic_fetch_sub(a, x) - x;
-}
-
-JEMALLOC_INLINE bool
-atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) {
-	volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
-	return !atomic_compare_exchange_strong(a, &c, s);
-}
-
-JEMALLOC_INLINE void
-atomic_write_u64(uint64_t *p, uint64_t x) {
-	volatile atomic_uint_least64_t *a = (volatile atomic_uint_least64_t *)p;
-	atomic_store(a, x);
-}
-#  elif (defined(JEMALLOC_ATOMIC9))
-JEMALLOC_INLINE uint64_t
-atomic_add_u64(uint64_t *p, uint64_t x) {
-	/*
-	 * atomic_fetchadd_64() doesn't exist, but we only ever use this
-	 * function on LP64 systems, so atomic_fetchadd_long() will do.
-	 */
-	assert(sizeof(uint64_t) == sizeof(unsigned long));
-
-	return atomic_fetchadd_long(p, (unsigned long)x) + x;
-}
-
-JEMALLOC_INLINE uint64_t
-atomic_sub_u64(uint64_t *p, uint64_t x) {
-	assert(sizeof(uint64_t) == sizeof(unsigned long));
-
-	return atomic_fetchadd_long(p, (unsigned long)(-(long)x)) - x;
-}
-
-JEMALLOC_INLINE bool
-atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) {
-	assert(sizeof(uint64_t) == sizeof(unsigned long));
-
-	return !atomic_cmpset_long(p, (unsigned long)c, (unsigned long)s);
-}
-
-JEMALLOC_INLINE void
-atomic_write_u64(uint64_t *p, uint64_t x) {
-	assert(sizeof(uint64_t) == sizeof(unsigned long));
-
-	atomic_store_rel_long(p, x);
-}
-#  elif (defined(JEMALLOC_OSATOMIC))
-JEMALLOC_INLINE uint64_t
-atomic_add_u64(uint64_t *p, uint64_t x) {
-	return OSAtomicAdd64((int64_t)x, (int64_t *)p);
-}
-
-JEMALLOC_INLINE uint64_t
-atomic_sub_u64(uint64_t *p, uint64_t x) {
-	return OSAtomicAdd64(-((int64_t)x), (int64_t *)p);
-}
-
-JEMALLOC_INLINE bool
-atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) {
-	return !OSAtomicCompareAndSwap64(c, s, (int64_t *)p);
-}
-
-JEMALLOC_INLINE void
-atomic_write_u64(uint64_t *p, uint64_t x) {
-	uint64_t o;
-
-	/*The documented OSAtomic*() API does not expose an atomic exchange. */
-	do {
-		o = atomic_read_u64(p);
-	} while (atomic_cas_u64(p, o, x));
-}
-#  elif (defined(_MSC_VER))
-JEMALLOC_INLINE uint64_t
-atomic_add_u64(uint64_t *p, uint64_t x) {
-	return InterlockedExchangeAdd64(p, x) + x;
-}
-
-JEMALLOC_INLINE uint64_t
-atomic_sub_u64(uint64_t *p, uint64_t x) {
-	return InterlockedExchangeAdd64(p, -((int64_t)x)) - x;
-}
-
-JEMALLOC_INLINE bool
-atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) {
-	uint64_t o;
-
-	o = InterlockedCompareExchange64(p, s, c);
-	return o != c;
-}
-
-JEMALLOC_INLINE void
-atomic_write_u64(uint64_t *p, uint64_t x) {
-	InterlockedExchange64(p, x);
-}
-#  elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) || \
-    defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_8))
-JEMALLOC_INLINE uint64_t
-atomic_add_u64(uint64_t *p, uint64_t x) {
-	return __sync_add_and_fetch(p, x);
-}
-
-JEMALLOC_INLINE uint64_t
-atomic_sub_u64(uint64_t *p, uint64_t x) {
-	return __sync_sub_and_fetch(p, x);
-}
-
-JEMALLOC_INLINE bool
-atomic_cas_u64(uint64_t *p, uint64_t c, uint64_t s) {
-	return !__sync_bool_compare_and_swap(p, c, s);
-}
-
-JEMALLOC_INLINE void
-atomic_write_u64(uint64_t *p, uint64_t x) {
-	__sync_lock_test_and_set(p, x);
-}
-#  else
-#    error "Missing implementation for 64-bit atomic operations"
-#  endif
-#endif
-
-/******************************************************************************/
-/* 32-bit operations. */
-#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
-JEMALLOC_INLINE uint32_t
-atomic_add_u32(uint32_t *p, uint32_t x) {
-	uint32_t t = x;
-
-	asm volatile (
-	    "lock; xaddl %0, %1;"
-	    : "+r" (t), "=m" (*p) /* Outputs. */
-	    : "m" (*p) /* Inputs. */
-	    );
-
-	return t + x;
-}
-
-JEMALLOC_INLINE uint32_t
-atomic_sub_u32(uint32_t *p, uint32_t x) {
-	uint32_t t;
-
-	x = (uint32_t)(-(int32_t)x);
-	t = x;
-	asm volatile (
-	    "lock; xaddl %0, %1;"
-	    : "+r" (t), "=m" (*p) /* Outputs. */
-	    : "m" (*p) /* Inputs. */
-	    );
-
-	return t + x;
-}
-
-JEMALLOC_INLINE bool
-atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) {
-	uint8_t success;
-
-	asm volatile (
-	    "lock; cmpxchgl %4, %0;"
-	    "sete %1;"
-	    : "=m" (*p), "=a" (success) /* Outputs. */
-	    : "m" (*p), "a" (c), "r" (s) /* Inputs. */
-	    : "memory"
-	    );
-
-	return !(bool)success;
-}
-
-JEMALLOC_INLINE void
-atomic_write_u32(uint32_t *p, uint32_t x) {
-	asm volatile (
-	    "xchgl %1, %0;" /* Lock is implied by xchgl. */
-	    : "=m" (*p), "+r" (x) /* Outputs. */
-	    : "m" (*p) /* Inputs. */
-	    : "memory" /* Clobbers. */
-	    );
-}
-#  elif (defined(JEMALLOC_C11ATOMICS))
-JEMALLOC_INLINE uint32_t
-atomic_add_u32(uint32_t *p, uint32_t x) {
-	volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p;
-	return atomic_fetch_add(a, x) + x;
-}
-
-JEMALLOC_INLINE uint32_t
-atomic_sub_u32(uint32_t *p, uint32_t x) {
-	volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p;
-	return atomic_fetch_sub(a, x) - x;
-}
-
-JEMALLOC_INLINE bool
-atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) {
-	volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p;
-	return !atomic_compare_exchange_strong(a, &c, s);
-}
-
-JEMALLOC_INLINE void
-atomic_write_u32(uint32_t *p, uint32_t x) {
-	volatile atomic_uint_least32_t *a = (volatile atomic_uint_least32_t *)p;
-	atomic_store(a, x);
-}
-#elif (defined(JEMALLOC_ATOMIC9))
-JEMALLOC_INLINE uint32_t
-atomic_add_u32(uint32_t *p, uint32_t x) {
-	return atomic_fetchadd_32(p, x) + x;
-}
-
-JEMALLOC_INLINE uint32_t
-atomic_sub_u32(uint32_t *p, uint32_t x) {
-	return atomic_fetchadd_32(p, (uint32_t)(-(int32_t)x)) - x;
-}
-
-JEMALLOC_INLINE bool
-atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) {
-	return !atomic_cmpset_32(p, c, s);
-}
-
-JEMALLOC_INLINE void
-atomic_write_u32(uint32_t *p, uint32_t x) {
-	atomic_store_rel_32(p, x);
-}
-#elif (defined(JEMALLOC_OSATOMIC))
-JEMALLOC_INLINE uint32_t
-atomic_add_u32(uint32_t *p, uint32_t x) {
-	return OSAtomicAdd32((int32_t)x, (int32_t *)p);
-}
-
-JEMALLOC_INLINE uint32_t
-atomic_sub_u32(uint32_t *p, uint32_t x) {
-	return OSAtomicAdd32(-((int32_t)x), (int32_t *)p);
-}
-
-JEMALLOC_INLINE bool
-atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) {
-	return !OSAtomicCompareAndSwap32(c, s, (int32_t *)p);
-}
-
-JEMALLOC_INLINE void
-atomic_write_u32(uint32_t *p, uint32_t x) {
-	uint32_t o;
-
-	/*The documented OSAtomic*() API does not expose an atomic exchange. */
-	do {
-		o = atomic_read_u32(p);
-	} while (atomic_cas_u32(p, o, x));
-}
-#elif (defined(_MSC_VER))
-JEMALLOC_INLINE uint32_t
-atomic_add_u32(uint32_t *p, uint32_t x) {
-	return InterlockedExchangeAdd(p, x) + x;
-}
-
-JEMALLOC_INLINE uint32_t
-atomic_sub_u32(uint32_t *p, uint32_t x) {
-	return InterlockedExchangeAdd(p, -((int32_t)x)) - x;
-}
-
-JEMALLOC_INLINE bool
-atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) {
-	uint32_t o;
-
-	o = InterlockedCompareExchange(p, s, c);
-	return o != c;
-}
-
-JEMALLOC_INLINE void
-atomic_write_u32(uint32_t *p, uint32_t x) {
-	InterlockedExchange(p, x);
-}
-#elif (defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4) || \
- defined(JE_FORCE_SYNC_COMPARE_AND_SWAP_4))
-JEMALLOC_INLINE uint32_t
-atomic_add_u32(uint32_t *p, uint32_t x) {
-	return __sync_add_and_fetch(p, x);
-}
-
-JEMALLOC_INLINE uint32_t
-atomic_sub_u32(uint32_t *p, uint32_t x) {
-	return __sync_sub_and_fetch(p, x);
-}
-
-JEMALLOC_INLINE bool
-atomic_cas_u32(uint32_t *p, uint32_t c, uint32_t s) {
-	return !__sync_bool_compare_and_swap(p, c, s);
-}
-
-JEMALLOC_INLINE void
-atomic_write_u32(uint32_t *p, uint32_t x) {
-	__sync_lock_test_and_set(p, x);
-}
-#else
-#  error "Missing implementation for 32-bit atomic operations"
-#endif
-
-/******************************************************************************/
-/* Pointer operations. */
-JEMALLOC_INLINE void *
-atomic_add_p(void **p, void *x) {
-#if (LG_SIZEOF_PTR == 3)
-	return (void *)atomic_add_u64((uint64_t *)p, (uint64_t)x);
-#elif (LG_SIZEOF_PTR == 2)
-	return (void *)atomic_add_u32((uint32_t *)p, (uint32_t)x);
-#endif
-}
-
-JEMALLOC_INLINE void *
-atomic_sub_p(void **p, void *x) {
-#if (LG_SIZEOF_PTR == 3)
-	return (void *)atomic_add_u64((uint64_t *)p, (uint64_t)-((int64_t)x));
-#elif (LG_SIZEOF_PTR == 2)
-	return (void *)atomic_add_u32((uint32_t *)p, (uint32_t)-((int32_t)x));
-#endif
-}
-
-JEMALLOC_INLINE bool
-atomic_cas_p(void **p, void *c, void *s) {
-#if (LG_SIZEOF_PTR == 3)
-	return atomic_cas_u64((uint64_t *)p, (uint64_t)c, (uint64_t)s);
-#elif (LG_SIZEOF_PTR == 2)
-	return atomic_cas_u32((uint32_t *)p, (uint32_t)c, (uint32_t)s);
-#endif
-}
-
-JEMALLOC_INLINE void
-atomic_write_p(void **p, const void *x) {
-#if (LG_SIZEOF_PTR == 3)
-	atomic_write_u64((uint64_t *)p, (uint64_t)x);
-#elif (LG_SIZEOF_PTR == 2)
-	atomic_write_u32((uint32_t *)p, (uint32_t)x);
-#endif
-}
-
-/******************************************************************************/
-/* size_t operations. */
-JEMALLOC_INLINE size_t
-atomic_add_zu(size_t *p, size_t x) {
-#if (LG_SIZEOF_PTR == 3)
-	return (size_t)atomic_add_u64((uint64_t *)p, (uint64_t)x);
-#elif (LG_SIZEOF_PTR == 2)
-	return (size_t)atomic_add_u32((uint32_t *)p, (uint32_t)x);
-#endif
-}
-
-JEMALLOC_INLINE size_t
-atomic_sub_zu(size_t *p, size_t x) {
-#if (LG_SIZEOF_PTR == 3)
-	return (size_t)atomic_add_u64((uint64_t *)p, (uint64_t)-((int64_t)x));
-#elif (LG_SIZEOF_PTR == 2)
-	return (size_t)atomic_add_u32((uint32_t *)p, (uint32_t)-((int32_t)x));
-#endif
-}
-
-JEMALLOC_INLINE bool
-atomic_cas_zu(size_t *p, size_t c, size_t s) {
-#if (LG_SIZEOF_PTR == 3)
-	return atomic_cas_u64((uint64_t *)p, (uint64_t)c, (uint64_t)s);
-#elif (LG_SIZEOF_PTR == 2)
-	return atomic_cas_u32((uint32_t *)p, (uint32_t)c, (uint32_t)s);
-#endif
-}
-
-JEMALLOC_INLINE void
-atomic_write_zu(size_t *p, size_t x) {
-#if (LG_SIZEOF_PTR == 3)
-	atomic_write_u64((uint64_t *)p, (uint64_t)x);
-#elif (LG_SIZEOF_PTR == 2)
-	atomic_write_u32((uint32_t *)p, (uint32_t)x);
-#endif
-}
-
-/******************************************************************************/
-/* unsigned operations. */
-JEMALLOC_INLINE unsigned
-atomic_add_u(unsigned *p, unsigned x) {
-#if (LG_SIZEOF_INT == 3)
-	return (unsigned)atomic_add_u64((uint64_t *)p, (uint64_t)x);
-#elif (LG_SIZEOF_INT == 2)
-	return (unsigned)atomic_add_u32((uint32_t *)p, (uint32_t)x);
-#endif
-}
-
-JEMALLOC_INLINE unsigned
-atomic_sub_u(unsigned *p, unsigned x) {
-#if (LG_SIZEOF_INT == 3)
-	return (unsigned)atomic_add_u64((uint64_t *)p, (uint64_t)-((int64_t)x));
-#elif (LG_SIZEOF_INT == 2)
-	return (unsigned)atomic_add_u32((uint32_t *)p, (uint32_t)-((int32_t)x));
-#endif
-}
-
-JEMALLOC_INLINE bool
-atomic_cas_u(unsigned *p, unsigned c, unsigned s) {
-#if (LG_SIZEOF_INT == 3)
-	return atomic_cas_u64((uint64_t *)p, (uint64_t)c, (uint64_t)s);
-#elif (LG_SIZEOF_INT == 2)
-	return atomic_cas_u32((uint32_t *)p, (uint32_t)c, (uint32_t)s);
-#endif
-}
-
-JEMALLOC_INLINE void
-atomic_write_u(unsigned *p, unsigned x) {
-#if (LG_SIZEOF_INT == 3)
-	atomic_write_u64((uint64_t *)p, (uint64_t)x);
-#elif (LG_SIZEOF_INT == 2)
-	atomic_write_u32((uint32_t *)p, (uint32_t)x);
-#endif
-}
-
-/******************************************************************************/
-#endif
-#endif /* JEMALLOC_INTERNAL_ATOMIC_INLINES_H */
diff --git a/include/jemalloc/internal/atomic_msvc.h b/include/jemalloc/internal/atomic_msvc.h
new file mode 100644
index 00000000..67057ce5
--- /dev/null
+++ b/include/jemalloc/internal/atomic_msvc.h
@@ -0,0 +1,158 @@
+#ifndef JEMALLOC_INTERNAL_ATOMIC_MSVC_H
+#define JEMALLOC_INTERNAL_ATOMIC_MSVC_H
+
+#define ATOMIC_INIT(...) {__VA_ARGS__}
+
+typedef enum {
+	atomic_memory_order_relaxed,
+	atomic_memory_order_acquire,
+	atomic_memory_order_release,
+	atomic_memory_order_acq_rel,
+	atomic_memory_order_seq_cst
+} atomic_memory_order_t;
+
+typedef char atomic_repr_0_t;
+typedef short atomic_repr_1_t;
+typedef long atomic_repr_2_t;
+typedef __int64 atomic_repr_3_t;
+
+ATOMIC_INLINE void
+atomic_fence(atomic_memory_order_t mo) {
+	_ReadWriteBarrier();
+#  if defined(_M_ARM) || defined(_M_ARM64)
+	/* ARM needs a barrier for everything but relaxed. */
+	if (mo != atomic_memory_order_relaxed) {
+		MemoryBarrier();
+	}
+#  elif defined(_M_IX86) || defined (_M_X64)
+	/* x86 needs a barrier only for seq_cst. */
+	if (mo == atomic_memory_order_seq_cst) {
+		MemoryBarrier();
+	}
+#  else
+#  error "Don't know how to create atomics for this platform for MSVC."
+#  endif
+	_ReadWriteBarrier();
+}
+
+#define ATOMIC_INTERLOCKED_REPR(lg_size) atomic_repr_ ## lg_size ## _t
+
+#define ATOMIC_CONCAT(a, b) ATOMIC_RAW_CONCAT(a, b)
+#define ATOMIC_RAW_CONCAT(a, b) a ## b
+
+#define ATOMIC_INTERLOCKED_NAME(base_name, lg_size) ATOMIC_CONCAT(	\
+    base_name, ATOMIC_INTERLOCKED_SUFFIX(lg_size))
+
+#define ATOMIC_INTERLOCKED_SUFFIX(lg_size)				\
+    ATOMIC_CONCAT(ATOMIC_INTERLOCKED_SUFFIX_, lg_size)
+
+#define ATOMIC_INTERLOCKED_SUFFIX_0 8
+#define ATOMIC_INTERLOCKED_SUFFIX_1 16
+#define ATOMIC_INTERLOCKED_SUFFIX_2
+#define ATOMIC_INTERLOCKED_SUFFIX_3 64
+
+#define JEMALLOC_GENERATE_ATOMICS(type, short_type, lg_size)		\
+typedef struct {							\
+	ATOMIC_INTERLOCKED_REPR(lg_size) repr;				\
+} atomic_##short_type##_t;						\
+									\
+ATOMIC_INLINE type							\
+atomic_load_##short_type(const atomic_##short_type##_t *a,		\
+    atomic_memory_order_t mo) {						\
+	ATOMIC_INTERLOCKED_REPR(lg_size) ret = a->repr;			\
+	if (mo != atomic_memory_order_relaxed) {			\
+		atomic_fence(atomic_memory_order_acquire);		\
+	}								\
+	return (type) ret;						\
+}									\
+									\
+ATOMIC_INLINE void							\
+atomic_store_##short_type(atomic_##short_type##_t *a,			\
+    type val, atomic_memory_order_t mo) {				\
+	if (mo != atomic_memory_order_relaxed) {			\
+		atomic_fence(atomic_memory_order_release);		\
+	}								\
+	a->repr = (ATOMIC_INTERLOCKED_REPR(lg_size)) val;		\
+	if (mo == atomic_memory_order_seq_cst) {			\
+		atomic_fence(atomic_memory_order_seq_cst);		\
+	}								\
+}									\
+									\
+ATOMIC_INLINE type							\
+atomic_exchange_##short_type(atomic_##short_type##_t *a, type val,	\
+    atomic_memory_order_t mo) {						\
+	return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedExchange,	\
+	    lg_size)(&a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val);	\
+}									\
+									\
+ATOMIC_INLINE bool							\
+atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a,	\
+    type *expected, type desired, atomic_memory_order_t success_mo,	\
+    atomic_memory_order_t failure_mo) {					\
+	ATOMIC_INTERLOCKED_REPR(lg_size) e =				\
+	    (ATOMIC_INTERLOCKED_REPR(lg_size))*expected;		\
+	ATOMIC_INTERLOCKED_REPR(lg_size) d =				\
+	    (ATOMIC_INTERLOCKED_REPR(lg_size))desired;			\
+	ATOMIC_INTERLOCKED_REPR(lg_size) old =				\
+	    ATOMIC_INTERLOCKED_NAME(_InterlockedCompareExchange, 	\
+		lg_size)(&a->repr, d, e);				\
+	if (old == e) {							\
+		return true;						\
+	} else {							\
+		*expected = (type)old;					\
+		return false;						\
+	}								\
+}									\
+									\
+ATOMIC_INLINE bool							\
+atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a,	\
+    type *expected, type desired, atomic_memory_order_t success_mo,	\
+    atomic_memory_order_t failure_mo) {					\
+	/* We implement the weak version with strong semantics. */	\
+	return atomic_compare_exchange_weak_##short_type(a, expected,	\
+	    desired, success_mo, failure_mo);				\
+}
+
+
+#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, lg_size)	\
+JEMALLOC_GENERATE_ATOMICS(type, short_type, lg_size)			\
+									\
+ATOMIC_INLINE type							\
+atomic_fetch_add_##short_type(atomic_##short_type##_t *a,		\
+    type val, atomic_memory_order_t mo) {				\
+	return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedExchangeAdd,	\
+	    lg_size)(&a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val);	\
+}									\
+									\
+ATOMIC_INLINE type							\
+atomic_fetch_sub_##short_type(atomic_##short_type##_t *a,		\
+    type val, atomic_memory_order_t mo) {				\
+	/*								\
+	 * MSVC warns on negation of unsigned operands, but for us it	\
+	 * gives exactly the right semantics (MAX_TYPE + 1 - operand).	\
+	 */								\
+	__pragma(warning(push))						\
+	__pragma(warning(disable: 4146))				\
+	return atomic_fetch_add_##short_type(a, -val, mo);		\
+	__pragma(warning(pop))						\
+}									\
+ATOMIC_INLINE type							\
+atomic_fetch_and_##short_type(atomic_##short_type##_t *a,		\
+    type val, atomic_memory_order_t mo) {				\
+	return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedAnd, lg_size)(	\
+	    &a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val);		\
+}									\
+ATOMIC_INLINE type							\
+atomic_fetch_or_##short_type(atomic_##short_type##_t *a,		\
+    type val, atomic_memory_order_t mo) {				\
+	return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedOr, lg_size)(	\
+	    &a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val);		\
+}									\
+ATOMIC_INLINE type							\
+atomic_fetch_xor_##short_type(atomic_##short_type##_t *a,		\
+    type val, atomic_memory_order_t mo) {				\
+	return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedXor, lg_size)(	\
+	    &a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val);		\
+}
+
+#endif /* JEMALLOC_INTERNAL_ATOMIC_MSVC_H */
diff --git a/include/jemalloc/internal/atomic_types.h b/include/jemalloc/internal/atomic_types.h
deleted file mode 100644
index 0fd5e5b5..00000000
--- a/include/jemalloc/internal/atomic_types.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_ATOMIC_TYPES_H
-#define JEMALLOC_INTERNAL_ATOMIC_TYPES_H
-
-#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
-#  define JEMALLOC_ATOMIC_U64
-#endif
-
-#endif /* JEMALLOC_INTERNAL_ATOMIC_TYPES_H */
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 0d0440b5..f18acabb 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -146,14 +146,6 @@ static const bool have_thp =
 #endif
     ;
 
-#if defined(JEMALLOC_C11ATOMICS) && !defined(__cplusplus)
-#include <stdatomic.h>
-#endif
-
-#ifdef JEMALLOC_ATOMIC9
-#include <machine/atomic.h>
-#endif
-
 #if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN))
 #include <libkern/OSAtomic.h>
 #endif
@@ -199,10 +191,21 @@ static const bool have_thp =
  * its translation unit).  Each component is now broken up into multiple header
  * files, corresponding to the sections above (e.g. instead of "tsd.h", we now
  * have "tsd_types.h", "tsd_structs.h", "tsd_externs.h", "tsd_inlines.h").
+ *
+ * Those files which have been converted to explicitly include their
+ * inter-component dependencies are now in the initial HERMETIC HEADERS
+ * section.  These headers may still rely on this file for system headers and
+ * global jemalloc headers, however.
  */
 
 #include "jemalloc/internal/jemalloc_internal_macros.h"
 
+/******************************************************************************/
+/* HERMETIC HEADERS */
+/******************************************************************************/
+
+#include "jemalloc/internal/atomic.h"
+
 /******************************************************************************/
 /* TYPES */
 /******************************************************************************/
@@ -380,7 +383,6 @@ typedef unsigned szind_t;
 
 #include "jemalloc/internal/nstime_types.h"
 #include "jemalloc/internal/util_types.h"
-#include "jemalloc/internal/atomic_types.h"
 #include "jemalloc/internal/spin_types.h"
 #include "jemalloc/internal/prng_types.h"
 #include "jemalloc/internal/ticker_types.h"
@@ -489,7 +491,6 @@ void	jemalloc_postfork_child(void);
 
 #include "jemalloc/internal/nstime_externs.h"
 #include "jemalloc/internal/util_externs.h"
-#include "jemalloc/internal/atomic_externs.h"
 #include "jemalloc/internal/ckh_externs.h"
 #include "jemalloc/internal/stats_externs.h"
 #include "jemalloc/internal/ctl_externs.h"
@@ -513,7 +514,6 @@ void	jemalloc_postfork_child(void);
 /******************************************************************************/
 
 #include "jemalloc/internal/util_inlines.h"
-#include "jemalloc/internal/atomic_inlines.h"
 #include "jemalloc/internal/spin_inlines.h"
 #include "jemalloc/internal/prng_inlines.h"
 #include "jemalloc/internal/ticker_inlines.h"
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 6c70e167..b2e0077e 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -30,16 +30,13 @@
 #undef LG_VADDR
 
 /* Defined if C11 atomics are available. */
-#undef JEMALLOC_C11ATOMICS
+#undef JEMALLOC_C11_ATOMICS
 
-/* Defined if the equivalent of FreeBSD's atomic(9) functions are available. */
-#undef JEMALLOC_ATOMIC9
+/* Defined if GCC __atomic atomics are available. */
+#undef JEMALLOC_GCC_ATOMIC_ATOMICS
 
-/*
- * Defined if OSAtomic*() functions are available, as provided by Darwin, and
- * documented in the atomic(3) manual page.
- */
-#undef JEMALLOC_OSATOMIC
+/* Defined if GCC __sync atomics are available. */
+#undef JEMALLOC_GCC_SYNC_ATOMICS
 
 /*
  * Defined if __sync_add_and_fetch(uint32_t *, uint32_t) and
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 0234181e..b122dae6 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -72,26 +72,6 @@ arena_tdata_get
 arena_tdata_get_hard
 arenas
 arenas_tdata_cleanup
-atomic_add_p
-atomic_add_u
-atomic_add_u32
-atomic_add_u64
-atomic_add_zu
-atomic_cas_p
-atomic_cas_u
-atomic_cas_u32
-atomic_cas_u64
-atomic_cas_zu
-atomic_sub_p
-atomic_sub_u
-atomic_sub_u32
-atomic_sub_u64
-atomic_sub_zu
-atomic_write_p
-atomic_write_u
-atomic_write_u32
-atomic_write_u64
-atomic_write_zu
 b0get
 base_alloc
 base_boot
diff --git a/src/atomic.c b/src/atomic.c
deleted file mode 100644
index 9871390d..00000000
--- a/src/atomic.c
+++ /dev/null
@@ -1,2 +0,0 @@
-#define JEMALLOC_ATOMIC_C_
-#include "jemalloc/internal/jemalloc_internal.h"
diff --git a/test/unit/atomic.c b/test/unit/atomic.c
index 78661597..237c7474 100644
--- a/test/unit/atomic.c
+++ b/test/unit/atomic.c
@@ -1,101 +1,257 @@
 #include "test/jemalloc_test.h"
 
-#define TEST_STRUCT(p, t)						\
-struct p##_test_s {							\
-	t	accum0;							\
-	t	x;							\
-	t	s;							\
-};									\
-typedef struct p##_test_s p##_test_t;
+/*
+ * We *almost* have consistent short names (e.g. "u32" for uint32_t, "b" for
+ * bool, etc.  The one exception is that the short name for void * is "p" in
+ * some places and "ptr" in others.  In the long run it would be nice to unify
+ * these, but in the short run we'll use this shim.
+ */
+#define assert_p_eq assert_ptr_eq
 
-#define TEST_BODY(p, t, tc, ta, FMT) do {				\
-	const p##_test_t tests[] = {					\
-		{(t)-1, (t)-1, (t)-2},					\
-		{(t)-1, (t) 0, (t)-2},					\
-		{(t)-1, (t) 1, (t)-2},					\
+/*
+ * t: the non-atomic type, like "uint32_t".
+ * ta: the short name for the type, like "u32".
+ * val[1,2,3]: Values of the given type.  The CAS tests use val2 for expected,
+ * and val3 for desired.
+ */
+
+#define DO_TESTS(t, ta, val1, val2, val3) do {				\
+	t val;								\
+	t raw_atomic;							\
+	t expected;							\
+	bool success;							\
+	/* This (along with the load below) also tests ATOMIC_LOAD. */	\
+	atomic_##ta##_t atom = ATOMIC_INIT(val1);			\
 									\
-		{(t) 0, (t)-1, (t)-2},					\
-		{(t) 0, (t) 0, (t)-2},					\
-		{(t) 0, (t) 1, (t)-2},					\
+	/* ATOMIC_INIT and load. */					\
+	val = atomic_load_##ta(&atom, ATOMIC_RELAXED);			\
+	assert_##ta##_eq(val1, val, "Load or init failed");		\
 									\
-		{(t) 1, (t)-1, (t)-2},					\
-		{(t) 1, (t) 0, (t)-2},					\
-		{(t) 1, (t) 1, (t)-2},					\
+	/* Store. */							\
+	atomic_store_##ta(&atom, val1, ATOMIC_RELAXED);			\
+	atomic_store_##ta(&atom, val2, ATOMIC_RELAXED);			\
+	val = atomic_load_##ta(&atom, ATOMIC_RELAXED);			\
+	assert_##ta##_eq(val2, val, "Store failed");			\
 									\
-		{(t)0, (t)-(1 << 22), (t)-2},				\
-		{(t)0, (t)(1 << 22), (t)-2},				\
-		{(t)(1 << 22), (t)-(1 << 22), (t)-2},			\
-		{(t)(1 << 22), (t)(1 << 22), (t)-2}			\
-	};								\
-	unsigned i;							\
+	/* Exchange. */							\
+	atomic_store_##ta(&atom, val1, ATOMIC_RELAXED);			\
+	val = atomic_exchange_##ta(&atom, val2, ATOMIC_RELAXED);	\
+	assert_##ta##_eq(val1, val, "Exchange returned invalid value");	\
+	val = atomic_load_##ta(&atom, ATOMIC_RELAXED);			\
+	assert_##ta##_eq(val2, val, "Exchange store invalid value");	\
 									\
-	for (i = 0; i < sizeof(tests)/sizeof(p##_test_t); i++) {	\
-		bool err;						\
-		t accum = tests[i].accum0;				\
-		assert_##ta##_eq(atomic_read_##p(&accum),		\
-		    tests[i].accum0,					\
-		    "Erroneous read, i=%u", i);				\
+	/* 								\
+	 * Weak CAS.  Spurious failures are allowed, so we loop a few	\
+	 * times.							\
+	 */								\
+	atomic_store_##ta(&atom, val1, ATOMIC_RELAXED);			\
+	success = false;						\
+	for (int i = 0; i < 10 && !success; i++) {			\
+		expected = val2;					\
+		success = atomic_compare_exchange_weak_##ta(&atom,	\
+		    &expected, val3, ATOMIC_RELAXED, ATOMIC_RELAXED);	\
+		assert_##ta##_eq(val1, expected, 			\
+		    "CAS should update expected");			\
+	}								\
+	assert_b_eq(val1 == val2, success,				\
+	    "Weak CAS did the wrong state update");			\
+	val = atomic_load_##ta(&atom, ATOMIC_RELAXED);			\
+	if (success) {							\
+		assert_##ta##_eq(val3, val,				\
+		    "Successful CAS should update atomic");		\
+	} else {							\
+		assert_##ta##_eq(val1, val,				\
+		    "Unsuccessful CAS should not update atomic");	\
+	}								\
 									\
-		assert_##ta##_eq(atomic_add_##p(&accum, tests[i].x),	\
-		    (t)((tc)tests[i].accum0 + (tc)tests[i].x),		\
-		    "i=%u, accum=%"FMT", x=%"FMT,			\
-		    i, tests[i].accum0, tests[i].x);			\
-		assert_##ta##_eq(atomic_read_##p(&accum), accum,	\
-		    "Erroneous add, i=%u", i);				\
+	/* Strong CAS. */						\
+	atomic_store_##ta(&atom, val1, ATOMIC_RELAXED);			\
+	expected = val2;						\
+	success = atomic_compare_exchange_strong_##ta(&atom, &expected,	\
+	    val3, ATOMIC_RELAXED, ATOMIC_RELAXED);			\
+	assert_b_eq(val1 == val2, success,				\
+	    "Strong CAS did the wrong state update");			\
+	val = atomic_load_##ta(&atom, ATOMIC_RELAXED);			\
+	if (success) {							\
+		assert_##ta##_eq(val3, val,				\
+		    "Successful CAS should update atomic");		\
+	} else {							\
+		assert_##ta##_eq(val1, val,				\
+		    "Unsuccessful CAS should not update atomic");	\
+	}								\
 									\
-		accum = tests[i].accum0;				\
-		assert_##ta##_eq(atomic_sub_##p(&accum, tests[i].x),	\
-		    (t)((tc)tests[i].accum0 - (tc)tests[i].x),		\
-		    "i=%u, accum=%"FMT", x=%"FMT,			\
-		    i, tests[i].accum0, tests[i].x);			\
-		assert_##ta##_eq(atomic_read_##p(&accum), accum,	\
-		    "Erroneous sub, i=%u", i);				\
 									\
-		accum = tests[i].accum0;				\
-		err = atomic_cas_##p(&accum, tests[i].x, tests[i].s);	\
-		assert_b_eq(err, tests[i].accum0 != tests[i].x,		\
-		    "Erroneous cas success/failure result");		\
-		assert_##ta##_eq(accum, err ? tests[i].accum0 :		\
-		    tests[i].s, "Erroneous cas effect, i=%u", i);	\
+	/* Previous atomics API. */					\
 									\
-		accum = tests[i].accum0;				\
-		atomic_write_##p(&accum, tests[i].s);			\
-		assert_##ta##_eq(accum, tests[i].s,			\
-		    "Erroneous write, i=%u", i);			\
+	/* Read. */							\
+	raw_atomic = val1;						\
+	val = atomic_read_##ta(&raw_atomic);				\
+	assert_##ta##_eq(val1, val, "Read failed");			\
+									\
+	/* Write. */							\
+	raw_atomic = val1;						\
+	atomic_write_##ta(&raw_atomic, val2);				\
+	assert_##ta##_eq(val2, raw_atomic, "Write failed");		\
+									\
+	/* CAS. */							\
+	raw_atomic = val1;						\
+	success = !atomic_cas_##ta(&raw_atomic, val2, val3);		\
+	assert_b_eq(val1 == val2, success, 				\
+	    "CAS did the wrong state update");				\
+	val = raw_atomic;						\
+	if (success) {							\
+		assert_##ta##_eq(val3, val,				\
+		    "Successful CAS should update atomic");		\
+	} else {							\
+		assert_##ta##_eq(val1, val,				\
+		    "Unsuccessful CAS should not update atomic");	\
 	}								\
 } while (0)
 
-TEST_STRUCT(u64, uint64_t)
+#define DO_INTEGER_TESTS(t, ta, val1, val2) do {			\
+	atomic_##ta##_t atom;						\
+	t val;								\
+	t raw_atomic;							\
+									\
+	/* Fetch-add. */						\
+	atomic_store_##ta(&atom, val1, ATOMIC_RELAXED);			\
+	val = atomic_fetch_add_##ta(&atom, val2, ATOMIC_RELAXED);	\
+	assert_##ta##_eq(val1, val,					\
+	    "Fetch-add should return previous value");			\
+	val = atomic_load_##ta(&atom, ATOMIC_RELAXED);			\
+	assert_##ta##_eq(val1 + val2, val,				\
+	    "Fetch-add should update atomic");				\
+									\
+	/* Fetch-sub. */						\
+	atomic_store_##ta(&atom, val1, ATOMIC_RELAXED);			\
+	val = atomic_fetch_sub_##ta(&atom, val2, ATOMIC_RELAXED);	\
+	assert_##ta##_eq(val1, val,					\
+	    "Fetch-sub should return previous value");			\
+	val = atomic_load_##ta(&atom, ATOMIC_RELAXED);			\
+	assert_##ta##_eq(val1 - val2, val,				\
+	    "Fetch-sub should update atomic");				\
+									\
+	/* Fetch-and. */						\
+	atomic_store_##ta(&atom, val1, ATOMIC_RELAXED);			\
+	val = atomic_fetch_and_##ta(&atom, val2, ATOMIC_RELAXED);	\
+	assert_##ta##_eq(val1, val,					\
+	    "Fetch-and should return previous value");			\
+	val = atomic_load_##ta(&atom, ATOMIC_RELAXED);			\
+	assert_##ta##_eq(val1 & val2, val,				\
+	    "Fetch-and should update atomic");				\
+									\
+	/* Fetch-or. */							\
+	atomic_store_##ta(&atom, val1, ATOMIC_RELAXED);			\
+	val = atomic_fetch_or_##ta(&atom, val2, ATOMIC_RELAXED);	\
+	assert_##ta##_eq(val1, val,					\
+	    "Fetch-or should return previous value");			\
+	val = atomic_load_##ta(&atom, ATOMIC_RELAXED);			\
+	assert_##ta##_eq(val1 | val2, val,				\
+	    "Fetch-or should update atomic");				\
+									\
+	/* Fetch-xor. */						\
+	atomic_store_##ta(&atom, val1, ATOMIC_RELAXED);			\
+	val = atomic_fetch_xor_##ta(&atom, val2, ATOMIC_RELAXED);	\
+	assert_##ta##_eq(val1, val,					\
+	    "Fetch-xor should return previous value");			\
+	val = atomic_load_##ta(&atom, ATOMIC_RELAXED);			\
+	assert_##ta##_eq(val1 ^ val2, val,				\
+	    "Fetch-xor should update atomic");				\
+									\
+	/* Previous atomics API. */					\
+									\
+	/* Add. */							\
+	raw_atomic = val1;						\
+	val = atomic_add_##ta(&raw_atomic, val2);			\
+	assert_##ta##_eq(val1 + val2, val,				\
+	    "atomic_add should return new value");			\
+	assert_##ta##_eq(val1 + val2, raw_atomic,			\
+	    "atomic_add should update atomic");				\
+									\
+	/* Sub. */							\
+	raw_atomic = val1;						\
+	val = atomic_sub_##ta(&raw_atomic, val2);			\
+	assert_##ta##_eq(val1 - val2, val,				\
+	    "atomic_sub should return new value");			\
+	assert_##ta##_eq(val1 - val2, raw_atomic,			\
+	    "atomic_add should update atomic");				\
+} while (0)
+
+#define TEST_STRUCT(t, ta)						\
+typedef struct {							\
+	t val1;								\
+	t val2;								\
+	t val3;								\
+} ta##_test_t;
+
+#define TEST_CASES(t) {							\
+	{(t)-1, (t)-1, (t)-2},						\
+	{(t)-1, (t) 0, (t)-2},						\
+	{(t)-1, (t) 1, (t)-2},						\
+									\
+	{(t) 0, (t)-1, (t)-2},						\
+	{(t) 0, (t) 0, (t)-2},						\
+	{(t) 0, (t) 1, (t)-2},						\
+									\
+	{(t) 1, (t)-1, (t)-2},						\
+	{(t) 1, (t) 0, (t)-2},						\
+	{(t) 1, (t) 1, (t)-2},						\
+									\
+	{(t)0, (t)-(1 << 22), (t)-2},					\
+	{(t)0, (t)(1 << 22), (t)-2},					\
+	{(t)(1 << 22), (t)-(1 << 22), (t)-2},				\
+	{(t)(1 << 22), (t)(1 << 22), (t)-2}				\
+}
+
+#define TEST_BODY(t, ta) do {						\
+	const ta##_test_t tests[] = TEST_CASES(t);			\
+	for (unsigned i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {	\
+		ta##_test_t test = tests[i];				\
+		DO_TESTS(t, ta, test.val1, test.val2, test.val3);	\
+	}								\
+} while (0)
+
+#define INTEGER_TEST_BODY(t, ta) do {					\
+	const ta##_test_t tests[] = TEST_CASES(t);			\
+	for (unsigned i = 0; i < sizeof(tests)/sizeof(tests[0]); i++) {	\
+		ta##_test_t test = tests[i];				\
+		DO_TESTS(t, ta, test.val1, test.val2, test.val3);	\
+		DO_INTEGER_TESTS(t, ta, test.val1, test.val2);		\
+	}								\
+} while (0)
+
+TEST_STRUCT(uint64_t, u64);
 TEST_BEGIN(test_atomic_u64) {
 #if !(LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
 	test_skip("64-bit atomic operations not supported");
 #else
-	TEST_BODY(u64, uint64_t, uint64_t, u64, FMTx64);
+	INTEGER_TEST_BODY(uint64_t, u64);
 #endif
 }
 TEST_END
 
-TEST_STRUCT(u32, uint32_t)
+
+TEST_STRUCT(uint32_t, u32);
 TEST_BEGIN(test_atomic_u32) {
-	TEST_BODY(u32, uint32_t, uint32_t, u32, "#"FMTx32);
+	INTEGER_TEST_BODY(uint32_t, u32);
 }
 TEST_END
 
-TEST_STRUCT(p, void *)
+TEST_STRUCT(void *, p);
 TEST_BEGIN(test_atomic_p) {
-	TEST_BODY(p, void *, uintptr_t, ptr, "p");
+	TEST_BODY(void *, p);
 }
 TEST_END
 
-TEST_STRUCT(zu, size_t)
+TEST_STRUCT(size_t, zu);
 TEST_BEGIN(test_atomic_zu) {
-	TEST_BODY(zu, size_t, size_t, zu, "#zx");
+	INTEGER_TEST_BODY(size_t, zu);
 }
 TEST_END
 
-TEST_STRUCT(u, unsigned)
+TEST_STRUCT(unsigned, u);
 TEST_BEGIN(test_atomic_u) {
-	TEST_BODY(u, unsigned, unsigned, u, "#x");
+	INTEGER_TEST_BODY(unsigned, u);
 }
 TEST_END
 

From 04d8fcb74563a305bdaa8d3ee3ba6ba49d09dfb8 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 3 Mar 2017 20:44:39 -0800
Subject: [PATCH 0690/2608] Optimize malloc_large_stats_t maintenance.

Convert the nrequests field to be partially derived, and the curlextents
to be fully derived, in order to reduce the number of stats updates
needed during common operations.

This change affects ndalloc stats during arena reset, because it is no
longer possible to cancel out ndalloc effects (curlextents would become
negative).
---
 include/jemalloc/internal/stats_structs.h |  4 +--
 src/arena.c                               | 35 ++++-------------------
 2 files changed, 8 insertions(+), 31 deletions(-)

diff --git a/include/jemalloc/internal/stats_structs.h b/include/jemalloc/internal/stats_structs.h
index 354f93ee..06ba95fc 100644
--- a/include/jemalloc/internal/stats_structs.h
+++ b/include/jemalloc/internal/stats_structs.h
@@ -64,10 +64,10 @@ struct malloc_large_stats_s {
 	 * This includes requests served by tcache, though tcache only
 	 * periodically merges into this counter.
 	 */
-	uint64_t	nrequests;
+	uint64_t	nrequests; /* Partially derived. */
 
 	/* Current number of allocations of this size class. */
-	size_t		curlextents;
+	size_t		curlextents; /* Derived. */
 };
 
 /*
diff --git a/src/arena.c b/src/arena.c
index ecb5cd42..f4450f34 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -89,7 +89,7 @@ arena_stats_add_u64(tsdn_t *tsdn, arena_stats_t *arena_stats, uint64_t *p,
 #endif
 }
 
-static void
+UNUSED static void
 arena_stats_sub_u64(tsdn_t *tsdn, arena_stats_t *arena_stats, uint64_t *p,
     uint64_t x) {
 #ifdef JEMALLOC_ATOMIC_U64
@@ -206,11 +206,12 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 
 		uint64_t nrequests = arena_stats_read_u64(tsdn, &arena->stats,
 		    &arena->stats.lstats[i].nrequests);
-		lstats[i].nrequests += nrequests;
-		astats->nrequests_large += nrequests;
+		lstats[i].nrequests += nmalloc + nrequests;
+		astats->nrequests_large += nmalloc + nrequests;
 
-		size_t curlextents = arena_stats_read_zu(tsdn,
-		    &arena->stats, &arena->stats.lstats[i].curlextents);
+		assert(nmalloc >= ndalloc);
+		assert(nmalloc - ndalloc <= SIZE_T_MAX);
+		size_t curlextents = (size_t)(nmalloc - ndalloc);
 		lstats[i].curlextents += curlextents;
 		astats->allocated_large += curlextents * index2size(NBINS + i);
 	}
@@ -359,10 +360,6 @@ arena_large_malloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 
 	arena_stats_add_u64(tsdn, &arena->stats,
 	    &arena->stats.lstats[hindex].nmalloc, 1);
-	arena_stats_add_u64(tsdn, &arena->stats,
-	    &arena->stats.lstats[hindex].nrequests, 1);
-	arena_stats_add_zu(tsdn, &arena->stats,
-	    &arena->stats.lstats[hindex].curlextents, 1);
 }
 
 static void
@@ -379,21 +376,6 @@ arena_large_dalloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 
 	arena_stats_add_u64(tsdn, &arena->stats,
 	    &arena->stats.lstats[hindex].ndalloc, 1);
-	arena_stats_sub_zu(tsdn, &arena->stats,
-	    &arena->stats.lstats[hindex].curlextents, 1);
-}
-
-static void
-arena_large_reset_stats_cancel(tsdn_t *tsdn, arena_t *arena, size_t usize) {
-	szind_t index = size2index(usize);
-	szind_t hindex = (index >= NBINS) ? index - NBINS : 0;
-
-	cassert(config_stats);
-
-	arena_stats_lock(tsdn, &arena->stats);
-	arena_stats_sub_u64(tsdn, &arena->stats,
-	    &arena->stats.lstats[hindex].ndalloc, 1);
-	arena_stats_unlock(tsdn, &arena->stats);
 }
 
 static void
@@ -912,11 +894,6 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 		}
 		large_dalloc(tsd_tsdn(tsd), extent);
 		malloc_mutex_lock(tsd_tsdn(tsd), &arena->large_mtx);
-		/* Cancel out unwanted effects on stats. */
-		if (config_stats) {
-			arena_large_reset_stats_cancel(tsd_tsdn(tsd), arena,
-			    usize);
-		}
 	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->large_mtx);
 

From e9852b577643433a2ecfef1026f1f9498e723654 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 3 Mar 2017 10:10:08 -0800
Subject: [PATCH 0691/2608] Disentangle assert and util

This is the first header refactoring diff, #533.  It splits the assert and util
components into separate, hermetic, header files.  In the process, it splits out
two of the large sub-components of util (the stdio.h replacement, and bit
manipulation routines) into their own components (malloc_io.h and bit_util.h).
This is mostly to break up cyclic dependencies, but it also breaks off a good
chunk of the catch-all-ness of util, which is nice.
---
 Makefile.in                                   |  8 +-
 include/jemalloc/internal/assert.h            | 12 ++-
 .../internal/{util_inlines.h => bit_util.h}   | 72 ++++----------
 .../jemalloc/internal/jemalloc_internal.h.in  |  7 +-
 include/jemalloc/internal/malloc_io.h         | 63 ++++++++++++
 include/jemalloc/internal/util.h              | 71 ++++++++++++++
 include/jemalloc/internal/util_externs.h      | 23 -----
 include/jemalloc/internal/util_types.h        | 95 -------------------
 src/{util.c => malloc_io.c}                   | 42 +++++---
 test/include/test/jemalloc_test.h.in          |  9 +-
 test/unit/bit_util.c                          | 55 +++++++++++
 test/unit/{util.c => malloc_io.c}             | 49 ----------
 12 files changed, 265 insertions(+), 241 deletions(-)
 rename include/jemalloc/internal/{util_inlines.h => bit_util.h} (67%)
 create mode 100644 include/jemalloc/internal/malloc_io.h
 create mode 100644 include/jemalloc/internal/util.h
 delete mode 100644 include/jemalloc/internal/util_externs.h
 delete mode 100644 include/jemalloc/internal/util_types.h
 rename src/{util.c => malloc_io.c} (94%)
 create mode 100644 test/unit/bit_util.c
 rename test/unit/{util.c => malloc_io.c} (86%)

diff --git a/Makefile.in b/Makefile.in
index 53ebe32e..04ce288a 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -99,6 +99,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/extent_mmap.c \
 	$(srcroot)src/hash.c \
 	$(srcroot)src/large.c \
+	$(srcroot)src/malloc_io.c \
 	$(srcroot)src/mutex.c \
 	$(srcroot)src/nstime.c \
 	$(srcroot)src/pages.c \
@@ -110,7 +111,6 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/tcache.c \
 	$(srcroot)src/ticker.c \
 	$(srcroot)src/tsd.c \
-	$(srcroot)src/util.c \
 	$(srcroot)src/witness.c
 ifeq ($(enable_zone_allocator), 1)
 C_SRCS += $(srcroot)src/zone.c
@@ -147,8 +147,8 @@ ifeq (1, $(link_whole_archive))
 C_UTIL_INTEGRATION_SRCS :=
 C_UTIL_CPP_SRCS :=
 else
-C_UTIL_INTEGRATION_SRCS := $(srcroot)src/nstime.c $(srcroot)src/util.c
-C_UTIL_CPP_SRCS := $(srcroot)src/nstime.c $(srcroot)src/util.c
+C_UTIL_INTEGRATION_SRCS := $(srcroot)src/nstime.c $(srcroot)src/malloc_io.c
+C_UTIL_CPP_SRCS := $(srcroot)src/nstime.c $(srcroot)src/malloc_io.c
 endif
 TESTS_UNIT := \
 	$(srcroot)test/unit/a0.c \
@@ -165,6 +165,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/junk_alloc.c \
 	$(srcroot)test/unit/junk_free.c \
 	$(srcroot)test/unit/mallctl.c \
+	$(srcroot)test/unit/malloc_io.c \
 	$(srcroot)test/unit/math.c \
 	$(srcroot)test/unit/mq.c \
 	$(srcroot)test/unit/mtx.c \
@@ -193,7 +194,6 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/ticker.c \
 	$(srcroot)test/unit/nstime.c \
 	$(srcroot)test/unit/tsd.c \
-	$(srcroot)test/unit/util.c \
 	$(srcroot)test/unit/witness.c \
 	$(srcroot)test/unit/zero.c
 ifeq (@enable_prof@, 1)
diff --git a/include/jemalloc/internal/assert.h b/include/jemalloc/internal/assert.h
index b9ab813e..be4d45b3 100644
--- a/include/jemalloc/internal/assert.h
+++ b/include/jemalloc/internal/assert.h
@@ -1,3 +1,6 @@
+#include "jemalloc/internal/malloc_io.h"
+#include "jemalloc/internal/util.h"
+
 /*
  * Define a custom assert() in order to reduce the chances of deadlock during
  * assertion failure.
@@ -43,4 +46,11 @@
 } while (0)
 #endif
 
-
+/* Use to assert a particular configuration, e.g., cassert(config_debug). */
+#ifndef cassert
+#define cassert(c) do {							\
+	if (unlikely(!(c))) {						\
+		not_reached();						\
+	}								\
+} while (0)
+#endif
diff --git a/include/jemalloc/internal/util_inlines.h b/include/jemalloc/internal/bit_util.h
similarity index 67%
rename from include/jemalloc/internal/util_inlines.h
rename to include/jemalloc/internal/bit_util.h
index c09bd6da..8d078a8a 100644
--- a/include/jemalloc/internal/util_inlines.h
+++ b/include/jemalloc/internal/bit_util.h
@@ -1,22 +1,9 @@
-#ifndef JEMALLOC_INTERNAL_UTIL_INLINES_H
-#define JEMALLOC_INTERNAL_UTIL_INLINES_H
+#ifndef JEMALLOC_INTERNAL_BIT_UTIL_H
+#define JEMALLOC_INTERNAL_BIT_UTIL_H
 
-#ifndef JEMALLOC_ENABLE_INLINE
-unsigned	ffs_llu(unsigned long long bitmap);
-unsigned	ffs_lu(unsigned long bitmap);
-unsigned	ffs_u(unsigned bitmap);
-unsigned	ffs_zu(size_t bitmap);
-unsigned	ffs_u64(uint64_t bitmap);
-unsigned	ffs_u32(uint32_t bitmap);
-uint64_t	pow2_ceil_u64(uint64_t x);
-uint32_t	pow2_ceil_u32(uint32_t x);
-size_t	pow2_ceil_zu(size_t x);
-unsigned	lg_floor(size_t x);
-void	set_errno(int errnum);
-int	get_errno(void);
-#endif
+#include "jemalloc/internal/assert.h"
 
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_UTIL_C_))
+#define BIT_UTIL_INLINE static inline
 
 /* Sanity check. */
 #if !defined(JEMALLOC_INTERNAL_FFSLL) || !defined(JEMALLOC_INTERNAL_FFSL) \
@@ -24,22 +11,23 @@ int	get_errno(void);
 #  error JEMALLOC_INTERNAL_FFS{,L,LL} should have been defined by configure
 #endif
 
-JEMALLOC_ALWAYS_INLINE unsigned
+
+BIT_UTIL_INLINE unsigned
 ffs_llu(unsigned long long bitmap) {
 	return JEMALLOC_INTERNAL_FFSLL(bitmap);
 }
 
-JEMALLOC_ALWAYS_INLINE unsigned
+BIT_UTIL_INLINE unsigned
 ffs_lu(unsigned long bitmap) {
 	return JEMALLOC_INTERNAL_FFSL(bitmap);
 }
 
-JEMALLOC_ALWAYS_INLINE unsigned
+BIT_UTIL_INLINE unsigned
 ffs_u(unsigned bitmap) {
 	return JEMALLOC_INTERNAL_FFS(bitmap);
 }
 
-JEMALLOC_ALWAYS_INLINE unsigned
+BIT_UTIL_INLINE unsigned
 ffs_zu(size_t bitmap) {
 #if LG_SIZEOF_PTR == LG_SIZEOF_INT
 	return ffs_u(bitmap);
@@ -52,7 +40,7 @@ ffs_zu(size_t bitmap) {
 #endif
 }
 
-JEMALLOC_ALWAYS_INLINE unsigned
+BIT_UTIL_INLINE unsigned
 ffs_u64(uint64_t bitmap) {
 #if LG_SIZEOF_LONG == 3
 	return ffs_lu(bitmap);
@@ -63,7 +51,7 @@ ffs_u64(uint64_t bitmap) {
 #endif
 }
 
-JEMALLOC_ALWAYS_INLINE unsigned
+BIT_UTIL_INLINE unsigned
 ffs_u32(uint32_t bitmap) {
 #if LG_SIZEOF_INT == 2
 	return ffs_u(bitmap);
@@ -73,7 +61,7 @@ ffs_u32(uint32_t bitmap) {
 	return ffs_u(bitmap);
 }
 
-JEMALLOC_INLINE uint64_t
+BIT_UTIL_INLINE uint64_t
 pow2_ceil_u64(uint64_t x) {
 	x--;
 	x |= x >> 1;
@@ -86,7 +74,7 @@ pow2_ceil_u64(uint64_t x) {
 	return x;
 }
 
-JEMALLOC_INLINE uint32_t
+BIT_UTIL_INLINE uint32_t
 pow2_ceil_u32(uint32_t x) {
 	x--;
 	x |= x >> 1;
@@ -99,7 +87,7 @@ pow2_ceil_u32(uint32_t x) {
 }
 
 /* Compute the smallest power of 2 that is >= x. */
-JEMALLOC_INLINE size_t
+BIT_UTIL_INLINE size_t
 pow2_ceil_zu(size_t x) {
 #if (LG_SIZEOF_PTR == 3)
 	return pow2_ceil_u64(x);
@@ -109,10 +97,9 @@ pow2_ceil_zu(size_t x) {
 }
 
 #if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
-JEMALLOC_INLINE unsigned
+BIT_UTIL_INLINE unsigned
 lg_floor(size_t x) {
 	size_t ret;
-
 	assert(x != 0);
 
 	asm ("bsr %1, %0"
@@ -123,7 +110,7 @@ lg_floor(size_t x) {
 	return (unsigned)ret;
 }
 #elif (defined(_MSC_VER))
-JEMALLOC_INLINE unsigned
+BIT_UTIL_INLINE unsigned
 lg_floor(size_t x) {
 	unsigned long ret;
 
@@ -140,7 +127,7 @@ lg_floor(size_t x) {
 	return (unsigned)ret;
 }
 #elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ))
-JEMALLOC_INLINE unsigned
+BIT_UTIL_INLINE unsigned
 lg_floor(size_t x) {
 	assert(x != 0);
 
@@ -153,7 +140,7 @@ lg_floor(size_t x) {
 #endif
 }
 #else
-JEMALLOC_INLINE unsigned
+BIT_UTIL_INLINE unsigned
 lg_floor(size_t x) {
 	assert(x != 0);
 
@@ -173,25 +160,6 @@ lg_floor(size_t x) {
 }
 #endif
 
-/* Set error code. */
-JEMALLOC_INLINE void
-set_errno(int errnum) {
-#ifdef _WIN32
-	SetLastError(errnum);
-#else
-	errno = errnum;
-#endif
-}
+#undef BIT_UTIL_INLINE
 
-/* Get last error code. */
-JEMALLOC_INLINE int
-get_errno(void) {
-#ifdef _WIN32
-	return GetLastError();
-#else
-	return errno;
-#endif
-}
-#endif
-
-#endif /* JEMALLOC_INTERNAL_UTIL_INLINES_H */
+#endif /* JEMALLOC_INTERNAL_BIT_UTIL_H */
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index f18acabb..09eda5ec 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -204,7 +204,11 @@ static const bool have_thp =
 /* HERMETIC HEADERS */
 /******************************************************************************/
 
+#include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/bit_util.h"
+#include "jemalloc/internal/malloc_io.h"
+#include "jemalloc/internal/util.h"
 
 /******************************************************************************/
 /* TYPES */
@@ -382,7 +386,6 @@ typedef unsigned szind_t;
 #endif
 
 #include "jemalloc/internal/nstime_types.h"
-#include "jemalloc/internal/util_types.h"
 #include "jemalloc/internal/spin_types.h"
 #include "jemalloc/internal/prng_types.h"
 #include "jemalloc/internal/ticker_types.h"
@@ -490,7 +493,6 @@ void	jemalloc_postfork_parent(void);
 void	jemalloc_postfork_child(void);
 
 #include "jemalloc/internal/nstime_externs.h"
-#include "jemalloc/internal/util_externs.h"
 #include "jemalloc/internal/ckh_externs.h"
 #include "jemalloc/internal/stats_externs.h"
 #include "jemalloc/internal/ctl_externs.h"
@@ -513,7 +515,6 @@ void	jemalloc_postfork_child(void);
 /* INLINES */
 /******************************************************************************/
 
-#include "jemalloc/internal/util_inlines.h"
 #include "jemalloc/internal/spin_inlines.h"
 #include "jemalloc/internal/prng_inlines.h"
 #include "jemalloc/internal/ticker_inlines.h"
diff --git a/include/jemalloc/internal/malloc_io.h b/include/jemalloc/internal/malloc_io.h
new file mode 100644
index 00000000..7ff3d5b1
--- /dev/null
+++ b/include/jemalloc/internal/malloc_io.h
@@ -0,0 +1,63 @@
+#ifndef JEMALLOC_INTERNAL_MALLOC_IO_H
+#define JEMALLOC_INTERNAL_MALLOC_IO_H
+
+#ifdef _WIN32
+#  ifdef _WIN64
+#    define FMT64_PREFIX "ll"
+#    define FMTPTR_PREFIX "ll"
+#  else
+#    define FMT64_PREFIX "ll"
+#    define FMTPTR_PREFIX ""
+#  endif
+#  define FMTd32 "d"
+#  define FMTu32 "u"
+#  define FMTx32 "x"
+#  define FMTd64 FMT64_PREFIX "d"
+#  define FMTu64 FMT64_PREFIX "u"
+#  define FMTx64 FMT64_PREFIX "x"
+#  define FMTdPTR FMTPTR_PREFIX "d"
+#  define FMTuPTR FMTPTR_PREFIX "u"
+#  define FMTxPTR FMTPTR_PREFIX "x"
+#else
+#  include <inttypes.h>
+#  define FMTd32 PRId32
+#  define FMTu32 PRIu32
+#  define FMTx32 PRIx32
+#  define FMTd64 PRId64
+#  define FMTu64 PRIu64
+#  define FMTx64 PRIx64
+#  define FMTdPTR PRIdPTR
+#  define FMTuPTR PRIuPTR
+#  define FMTxPTR PRIxPTR
+#endif
+
+/* Size of stack-allocated buffer passed to buferror(). */
+#define BUFERROR_BUF		64
+
+/*
+ * Size of stack-allocated buffer used by malloc_{,v,vc}printf().  This must be
+ * large enough for all possible uses within jemalloc.
+ */
+#define MALLOC_PRINTF_BUFSIZE	4096
+
+
+int buferror(int err, char *buf, size_t buflen);
+uintmax_t malloc_strtoumax(const char *restrict nptr, char **restrict endptr,
+    int base);
+void malloc_write(const char *s);
+
+/*
+ * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating
+ * point math.
+ */
+size_t malloc_vsnprintf(char *str, size_t size, const char *format,
+    va_list ap);
+size_t malloc_snprintf(char *str, size_t size, const char *format, ...)
+    JEMALLOC_FORMAT_PRINTF(3, 4);
+void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
+    const char *format, va_list ap);
+void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque,
+    const char *format, ...) JEMALLOC_FORMAT_PRINTF(3, 4);
+void malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
+
+#endif /* JEMALLOC_INTERNAL_MALLOC_IO_H */
diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
new file mode 100644
index 00000000..88662e89
--- /dev/null
+++ b/include/jemalloc/internal/util.h
@@ -0,0 +1,71 @@
+#ifndef JEMALLOC_INTERNAL_UTIL_H
+#define JEMALLOC_INTERNAL_UTIL_H
+
+#define UTIL_INLINE static inline
+
+/* Junk fill patterns. */
+#ifndef JEMALLOC_ALLOC_JUNK
+#  define JEMALLOC_ALLOC_JUNK	((uint8_t)0xa5)
+#endif
+#ifndef JEMALLOC_FREE_JUNK
+#  define JEMALLOC_FREE_JUNK	((uint8_t)0x5a)
+#endif
+
+/*
+ * Wrap a cpp argument that contains commas such that it isn't broken up into
+ * multiple arguments.
+ */
+#define JEMALLOC_ARG_CONCAT(...) __VA_ARGS__
+
+/* cpp macro definition stringification. */
+#define STRINGIFY_HELPER(x) #x
+#define STRINGIFY(x) STRINGIFY_HELPER(x)
+
+/*
+ * Silence compiler warnings due to uninitialized values.  This is used
+ * wherever the compiler fails to recognize that the variable is never used
+ * uninitialized.
+ */
+#ifdef JEMALLOC_CC_SILENCE
+#  define JEMALLOC_CC_SILENCE_INIT(v) = v
+#else
+#  define JEMALLOC_CC_SILENCE_INIT(v)
+#endif
+
+#ifdef __GNUC__
+#  define likely(x)   __builtin_expect(!!(x), 1)
+#  define unlikely(x) __builtin_expect(!!(x), 0)
+#else
+#  define likely(x)   !!(x)
+#  define unlikely(x) !!(x)
+#endif
+
+#if !defined(JEMALLOC_INTERNAL_UNREACHABLE)
+#  error JEMALLOC_INTERNAL_UNREACHABLE should have been defined by configure
+#endif
+
+#define unreachable() JEMALLOC_INTERNAL_UNREACHABLE()
+
+/* Set error code. */
+UTIL_INLINE void
+set_errno(int errnum) {
+#ifdef _WIN32
+	SetLastError(errnum);
+#else
+	errno = errnum;
+#endif
+}
+
+/* Get last error code. */
+UTIL_INLINE int
+get_errno(void) {
+#ifdef _WIN32
+	return GetLastError();
+#else
+	return errno;
+#endif
+}
+
+#undef UTIL_INLINE
+
+#endif /* JEMALLOC_INTERNAL_UTIL_H */
diff --git a/include/jemalloc/internal/util_externs.h b/include/jemalloc/internal/util_externs.h
deleted file mode 100644
index b203b773..00000000
--- a/include/jemalloc/internal/util_externs.h
+++ /dev/null
@@ -1,23 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_UTIL_EXTERNS_H
-#define JEMALLOC_INTERNAL_UTIL_EXTERNS_H
-
-int	buferror(int err, char *buf, size_t buflen);
-uintmax_t	malloc_strtoumax(const char *restrict nptr,
-    char **restrict endptr, int base);
-void	malloc_write(const char *s);
-
-/*
- * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating
- * point math.
- */
-size_t	malloc_vsnprintf(char *str, size_t size, const char *format,
-    va_list ap);
-size_t	malloc_snprintf(char *str, size_t size, const char *format, ...)
-    JEMALLOC_FORMAT_PRINTF(3, 4);
-void	malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *format, va_list ap);
-void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque,
-    const char *format, ...) JEMALLOC_FORMAT_PRINTF(3, 4);
-void	malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
-
-#endif /* JEMALLOC_INTERNAL_UTIL_EXTERNS_H */
diff --git a/include/jemalloc/internal/util_types.h b/include/jemalloc/internal/util_types.h
deleted file mode 100644
index e0f79aad..00000000
--- a/include/jemalloc/internal/util_types.h
+++ /dev/null
@@ -1,95 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_UTIL_TYPES_H
-#define JEMALLOC_INTERNAL_UTIL_TYPES_H
-
-#ifdef _WIN32
-#  ifdef _WIN64
-#    define FMT64_PREFIX "ll"
-#    define FMTPTR_PREFIX "ll"
-#  else
-#    define FMT64_PREFIX "ll"
-#    define FMTPTR_PREFIX ""
-#  endif
-#  define FMTd32 "d"
-#  define FMTu32 "u"
-#  define FMTx32 "x"
-#  define FMTd64 FMT64_PREFIX "d"
-#  define FMTu64 FMT64_PREFIX "u"
-#  define FMTx64 FMT64_PREFIX "x"
-#  define FMTdPTR FMTPTR_PREFIX "d"
-#  define FMTuPTR FMTPTR_PREFIX "u"
-#  define FMTxPTR FMTPTR_PREFIX "x"
-#else
-#  include <inttypes.h>
-#  define FMTd32 PRId32
-#  define FMTu32 PRIu32
-#  define FMTx32 PRIx32
-#  define FMTd64 PRId64
-#  define FMTu64 PRIu64
-#  define FMTx64 PRIx64
-#  define FMTdPTR PRIdPTR
-#  define FMTuPTR PRIuPTR
-#  define FMTxPTR PRIxPTR
-#endif
-
-/* Size of stack-allocated buffer passed to buferror(). */
-#define BUFERROR_BUF		64
-
-/*
- * Size of stack-allocated buffer used by malloc_{,v,vc}printf().  This must be
- * large enough for all possible uses within jemalloc.
- */
-#define MALLOC_PRINTF_BUFSIZE	4096
-
-/* Junk fill patterns. */
-#ifndef JEMALLOC_ALLOC_JUNK
-#  define JEMALLOC_ALLOC_JUNK	((uint8_t)0xa5)
-#endif
-#ifndef JEMALLOC_FREE_JUNK
-#  define JEMALLOC_FREE_JUNK	((uint8_t)0x5a)
-#endif
-
-/*
- * Wrap a cpp argument that contains commas such that it isn't broken up into
- * multiple arguments.
- */
-#define JEMALLOC_ARG_CONCAT(...) __VA_ARGS__
-
-/* cpp macro definition stringification. */
-#define STRINGIFY_HELPER(x) #x
-#define STRINGIFY(x) STRINGIFY_HELPER(x)
-
-/*
- * Silence compiler warnings due to uninitialized values.  This is used
- * wherever the compiler fails to recognize that the variable is never used
- * uninitialized.
- */
-#ifdef JEMALLOC_CC_SILENCE
-#  define JEMALLOC_CC_SILENCE_INIT(v) = v
-#else
-#  define JEMALLOC_CC_SILENCE_INIT(v)
-#endif
-
-#ifdef __GNUC__
-#  define likely(x)   __builtin_expect(!!(x), 1)
-#  define unlikely(x) __builtin_expect(!!(x), 0)
-#else
-#  define likely(x)   !!(x)
-#  define unlikely(x) !!(x)
-#endif
-
-#if !defined(JEMALLOC_INTERNAL_UNREACHABLE)
-#  error JEMALLOC_INTERNAL_UNREACHABLE should have been defined by configure
-#endif
-
-#define unreachable() JEMALLOC_INTERNAL_UNREACHABLE()
-
-#include "jemalloc/internal/assert.h"
-
-/* Use to assert a particular configuration, e.g., cassert(config_debug). */
-#define cassert(c) do {							\
-	if (unlikely(!(c))) {						\
-		not_reached();						\
-	}								\
-} while (0)
-
-#endif /* JEMALLOC_INTERNAL_UTIL_TYPES_H */
diff --git a/src/util.c b/src/malloc_io.c
similarity index 94%
rename from src/util.c
rename to src/malloc_io.c
index ee5fa47e..fd6ff0f0 100644
--- a/src/util.c
+++ b/src/malloc_io.c
@@ -1,3 +1,19 @@
+#define JEMALLOC_MALLOC_IO_C_
+#include "jemalloc/internal/jemalloc_internal.h"
+
+#ifdef assert
+#  undef assert
+#endif
+#ifdef not_reached
+#  undef not_reached
+#endif
+#ifdef not_implemented
+#  undef not_implemented
+#endif
+#ifdef assert_not_implemented
+#  undef assert_not_implemented
+#endif
+
 /*
  * Define simple versions of assertion macros that won't recurse in case
  * of assertion failures in malloc_*printf().
@@ -24,22 +40,25 @@
 	}								\
 } while (0)
 
-#define JEMALLOC_UTIL_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#define assert_not_implemented(e) do {					\
+	if (unlikely(config_debug && !(e))) {				\
+		not_implemented();					\
+	}								\
+} while (0)
 
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
-static void	wrtmessage(void *cbopaque, const char *s);
-#define U2S_BUFSIZE	((1U << (LG_SIZEOF_INTMAX_T + 3)) + 1)
-static char	*u2s(uintmax_t x, unsigned base, bool uppercase, char *s,
+static void wrtmessage(void *cbopaque, const char *s);
+#define U2S_BUFSIZE ((1U << (LG_SIZEOF_INTMAX_T + 3)) + 1)
+static char *u2s(uintmax_t x, unsigned base, bool uppercase, char *s,
     size_t *slen_p);
-#define D2S_BUFSIZE	(1 + U2S_BUFSIZE)
-static char	*d2s(intmax_t x, char sign, char *s, size_t *slen_p);
-#define O2S_BUFSIZE	(1 + U2S_BUFSIZE)
-static char	*o2s(uintmax_t x, bool alt_form, char *s, size_t *slen_p);
-#define X2S_BUFSIZE	(2 + U2S_BUFSIZE)
-static char	*x2s(uintmax_t x, bool alt_form, bool uppercase, char *s,
+#define D2S_BUFSIZE (1 + U2S_BUFSIZE)
+static char *d2s(intmax_t x, char sign, char *s, size_t *slen_p);
+#define O2S_BUFSIZE (1 + U2S_BUFSIZE)
+static char *o2s(uintmax_t x, bool alt_form, char *s, size_t *slen_p);
+#define X2S_BUFSIZE (2 + U2S_BUFSIZE)
+static char *x2s(uintmax_t x, bool alt_form, bool uppercase, char *s,
     size_t *slen_p);
 
 /******************************************************************************/
@@ -662,4 +681,5 @@ malloc_printf(const char *format, ...) {
 #undef assert
 #undef not_reached
 #undef not_implemented
+#undef assert_not_implemented
 #include "jemalloc/internal/assert.h"
diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in
index 36d59cff..0770d020 100644
--- a/test/include/test/jemalloc_test.h.in
+++ b/test/include/test/jemalloc_test.h.in
@@ -69,12 +69,15 @@ static const bool config_debug =
 #  define JEMALLOC_N(n) @private_namespace@##n
 #  include "jemalloc/internal/private_namespace.h"
 
+/* Hermetic headers. */
+#  include "jemalloc/internal/assert.h"
+#  include "jemalloc/internal/malloc_io.h"
+#  include "jemalloc/internal/util.h"
+
+/* Non-hermetic headers. */
 #  include "jemalloc/internal/nstime_types.h"
 #  include "jemalloc/internal/nstime_structs.h"
 #  include "jemalloc/internal/nstime_externs.h"
-#  include "jemalloc/internal/util_types.h"
-#  include "jemalloc/internal/util_externs.h"
-#  include "jemalloc/internal/util_inlines.h"
 #  include "jemalloc/internal/qr.h"
 #  include "jemalloc/internal/ql.h"
 
diff --git a/test/unit/bit_util.c b/test/unit/bit_util.c
new file mode 100644
index 00000000..fe5c4473
--- /dev/null
+++ b/test/unit/bit_util.c
@@ -0,0 +1,55 @@
+#include "test/jemalloc_test.h"
+
+#define TEST_POW2_CEIL(t, suf, pri) do {				\
+	unsigned i, pow2;						\
+	t x;								\
+									\
+	assert_##suf##_eq(pow2_ceil_##suf(0), 0, "Unexpected result");	\
+									\
+	for (i = 0; i < sizeof(t) * 8; i++) {				\
+		assert_##suf##_eq(pow2_ceil_##suf(((t)1) << i), ((t)1)	\
+		    << i, "Unexpected result");				\
+	}								\
+									\
+	for (i = 2; i < sizeof(t) * 8; i++) {				\
+		assert_##suf##_eq(pow2_ceil_##suf((((t)1) << i) - 1),	\
+		    ((t)1) << i, "Unexpected result");			\
+	}								\
+									\
+	for (i = 0; i < sizeof(t) * 8 - 1; i++) {			\
+		assert_##suf##_eq(pow2_ceil_##suf((((t)1) << i) + 1),	\
+		    ((t)1) << (i+1), "Unexpected result");		\
+	}								\
+									\
+	for (pow2 = 1; pow2 < 25; pow2++) {				\
+		for (x = (((t)1) << (pow2-1)) + 1; x <= ((t)1) << pow2;	\
+		    x++) {						\
+			assert_##suf##_eq(pow2_ceil_##suf(x),		\
+			    ((t)1) << pow2,				\
+			    "Unexpected result, x=%"pri, x);		\
+		}							\
+	}								\
+} while (0)
+
+TEST_BEGIN(test_pow2_ceil_u64) {
+	TEST_POW2_CEIL(uint64_t, u64, FMTu64);
+}
+TEST_END
+
+TEST_BEGIN(test_pow2_ceil_u32) {
+	TEST_POW2_CEIL(uint32_t, u32, FMTu32);
+}
+TEST_END
+
+TEST_BEGIN(test_pow2_ceil_zu) {
+	TEST_POW2_CEIL(size_t, zu, "zu");
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_pow2_ceil_u64,
+	    test_pow2_ceil_u32,
+	    test_pow2_ceil_zu);
+}
diff --git a/test/unit/util.c b/test/unit/malloc_io.c
similarity index 86%
rename from test/unit/util.c
rename to test/unit/malloc_io.c
index 5760966f..79ba7fc5 100644
--- a/test/unit/util.c
+++ b/test/unit/malloc_io.c
@@ -1,51 +1,5 @@
 #include "test/jemalloc_test.h"
 
-#define TEST_POW2_CEIL(t, suf, pri) do {				\
-	unsigned i, pow2;						\
-	t x;								\
-									\
-	assert_##suf##_eq(pow2_ceil_##suf(0), 0, "Unexpected result");	\
-									\
-	for (i = 0; i < sizeof(t) * 8; i++) {				\
-		assert_##suf##_eq(pow2_ceil_##suf(((t)1) << i), ((t)1)	\
-		    << i, "Unexpected result");				\
-	}								\
-									\
-	for (i = 2; i < sizeof(t) * 8; i++) {				\
-		assert_##suf##_eq(pow2_ceil_##suf((((t)1) << i) - 1),	\
-		    ((t)1) << i, "Unexpected result");			\
-	}								\
-									\
-	for (i = 0; i < sizeof(t) * 8 - 1; i++) {			\
-		assert_##suf##_eq(pow2_ceil_##suf((((t)1) << i) + 1),	\
-		    ((t)1) << (i+1), "Unexpected result");		\
-	}								\
-									\
-	for (pow2 = 1; pow2 < 25; pow2++) {				\
-		for (x = (((t)1) << (pow2-1)) + 1; x <= ((t)1) << pow2;	\
-		    x++) {						\
-			assert_##suf##_eq(pow2_ceil_##suf(x),		\
-			    ((t)1) << pow2,				\
-			    "Unexpected result, x=%"pri, x);		\
-		}							\
-	}								\
-} while (0)
-
-TEST_BEGIN(test_pow2_ceil_u64) {
-	TEST_POW2_CEIL(uint64_t, u64, FMTu64);
-}
-TEST_END
-
-TEST_BEGIN(test_pow2_ceil_u32) {
-	TEST_POW2_CEIL(uint32_t, u32, FMTu32);
-}
-TEST_END
-
-TEST_BEGIN(test_pow2_ceil_zu) {
-	TEST_POW2_CEIL(size_t, zu, "zu");
-}
-TEST_END
-
 TEST_BEGIN(test_malloc_strtoumax_no_endptr) {
 	int err;
 
@@ -297,9 +251,6 @@ TEST_END
 int
 main(void) {
 	return test(
-	    test_pow2_ceil_u64,
-	    test_pow2_ceil_u32,
-	    test_pow2_ceil_zu,
 	    test_malloc_strtoumax_no_endptr,
 	    test_malloc_strtoumax,
 	    test_malloc_snprintf_truncated,

From 84326c566af7d20662ee927c2daef0f63ccd3841 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 3 Mar 2017 16:43:47 -0800
Subject: [PATCH 0692/2608] Insert not_reached after an exhaustive switch

In the C11 atomics backport, we couldn't use not_reached() in
atomic_enum_to_builtin (in atomic_gcc_atomic.h), since atomic.h was hermetic and
assert.h wasn't; there was a dependency issue.  assert.h is hermetic now, so we
can include it.
---
 include/jemalloc/internal/atomic_gcc_atomic.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/atomic_gcc_atomic.h b/include/jemalloc/internal/atomic_gcc_atomic.h
index 3d13b4a6..6b73a14f 100644
--- a/include/jemalloc/internal/atomic_gcc_atomic.h
+++ b/include/jemalloc/internal/atomic_gcc_atomic.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_ATOMIC_H
 #define JEMALLOC_INTERNAL_ATOMIC_GCC_ATOMIC_H
 
+#include "jemalloc/internal/assert.h"
+
 #define ATOMIC_INIT(...) {__VA_ARGS__}
 
 typedef enum {
@@ -25,8 +27,8 @@ atomic_enum_to_builtin(atomic_memory_order_t mo) {
 	case atomic_memory_order_seq_cst:
 		return __ATOMIC_SEQ_CST;
 	}
-	/* Can't actually happen; the switch is exhaustive. */
-	return __ATOMIC_SEQ_CST;
+	/* Can't happen; the switch is exhaustive. */
+	not_reached();
 }
 
 ATOMIC_INLINE void

From 424e3428b16eef4614bf6786611e35e30983d23f Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 6 Mar 2017 11:40:29 -0800
Subject: [PATCH 0693/2608] Make type abbreviations consistent: ssize_t is zd
 everywhere

---
 include/jemalloc/internal/jemalloc_internal.h.in     | 4 ++--
 include/jemalloc/internal/jemalloc_internal_macros.h | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 09eda5ec..8d2ec7dd 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -565,7 +565,7 @@ psz2ind(size_t psz) {
 		pszind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ?
 		    LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1;
 
-		size_t delta_inverse_mask = ZI(-1) << lg_delta;
+		size_t delta_inverse_mask = ZD(-1) << lg_delta;
 		pszind_t mod = ((((psz-1) & delta_inverse_mask) >> lg_delta)) &
 		    ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
 
@@ -646,7 +646,7 @@ size2index_compute(size_t size) {
 		szind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
 		    ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
 
-		size_t delta_inverse_mask = ZI(-1) << lg_delta;
+		size_t delta_inverse_mask = ZD(-1) << lg_delta;
 		szind_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) &
 		    ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
 
diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h
index 35a7a104..c5dd9b39 100644
--- a/include/jemalloc/internal/jemalloc_internal_macros.h
+++ b/include/jemalloc/internal/jemalloc_internal_macros.h
@@ -41,14 +41,14 @@
 #endif
 
 #define ZU(z)	((size_t)z)
-#define ZI(z)	((ssize_t)z)
+#define ZD(z)	((ssize_t)z)
 #define QU(q)	((uint64_t)q)
-#define QI(q)	((int64_t)q)
+#define QD(q)	((int64_t)q)
 
 #define KZU(z)	ZU(z##ULL)
-#define KZI(z)	ZI(z##LL)
+#define KZD(z)	ZD(z##LL)
 #define KQU(q)	QU(q##ULL)
-#define KQI(q)	QI(q##LL)
+#define KQD(q)	QI(q##LL)
 
 #ifndef __DECONST
 #  define	__DECONST(type, var)	((type)(uintptr_t)(const void *)(var))

From 438efede7838a04af041ae97d34208b71033fd32 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 6 Mar 2017 11:40:58 -0800
Subject: [PATCH 0694/2608] Add atomic types for ssize_t

---
 include/jemalloc/internal/atomic.h | 3 +++
 test/unit/atomic.c                 | 8 ++++++++
 2 files changed, 11 insertions(+)

diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h
index 84fbbdfb..866adff0 100644
--- a/include/jemalloc/internal/atomic.h
+++ b/include/jemalloc/internal/atomic.h
@@ -98,6 +98,9 @@ JEMALLOC_GENERATE_COMPATABILITY_INT_ATOMICS(unsigned, u)
 JEMALLOC_GENERATE_INT_ATOMICS(size_t, zu, LG_SIZEOF_PTR)
 JEMALLOC_GENERATE_COMPATABILITY_INT_ATOMICS(size_t, zu)
 
+JEMALLOC_GENERATE_INT_ATOMICS(ssize_t, zd, LG_SIZEOF_PTR)
+JEMALLOC_GENERATE_COMPATABILITY_INT_ATOMICS(ssize_t, zd)
+
 JEMALLOC_GENERATE_INT_ATOMICS(uint32_t, u32, 2)
 JEMALLOC_GENERATE_COMPATABILITY_INT_ATOMICS(uint32_t, u32)
 
diff --git a/test/unit/atomic.c b/test/unit/atomic.c
index 237c7474..fa24415a 100644
--- a/test/unit/atomic.c
+++ b/test/unit/atomic.c
@@ -249,6 +249,13 @@ TEST_BEGIN(test_atomic_zu) {
 }
 TEST_END
 
+TEST_STRUCT(ssize_t, zd);
+TEST_BEGIN(test_atomic_zd) {
+	INTEGER_TEST_BODY(ssize_t, zd);
+}
+TEST_END
+
+
 TEST_STRUCT(unsigned, u);
 TEST_BEGIN(test_atomic_u) {
 	INTEGER_TEST_BODY(unsigned, u);
@@ -262,5 +269,6 @@ main(void) {
 	    test_atomic_u32,
 	    test_atomic_p,
 	    test_atomic_zu,
+	    test_atomic_zd,
 	    test_atomic_u);
 }

From 4f1e94658a7efd748f10bdb9de778c835e74e539 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 6 Mar 2017 11:41:29 -0800
Subject: [PATCH 0695/2608] Change arena to use the atomic functions for
 ssize_t instead of the union strategy

---
 include/jemalloc/internal/arena_structs_b.h | 7 +------
 src/arena.c                                 | 8 ++------
 2 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index 49fdd17d..ebcdbc4d 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -36,11 +36,6 @@ struct arena_bin_info_s {
 	bitmap_info_t		bitmap_info;
 };
 
-typedef union {
-	size_t			u; /* Used for atomic operations. */
-	ssize_t			s; /* Time may be negative (means "never"). */
-} arena_decay_time_t;
-
 struct arena_decay_s {
 	/* Synchronizes all non-atomic fields. */
 	malloc_mutex_t		mtx;
@@ -51,7 +46,7 @@ struct arena_decay_s {
 	 *
 	 * Synchronization: atomic.
 	 */
-	arena_decay_time_t	time;
+	ssize_t			time;
 	/* time / SMOOTHSTEP_NSTEPS. */
 	nstime_t		interval;
 	/*
diff --git a/src/arena.c b/src/arena.c
index f4450f34..56ab362d 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -470,16 +470,12 @@ arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 
 static ssize_t
 arena_decay_time_read(arena_t *arena) {
-	arena_decay_time_t dt;
-	dt.u = atomic_read_zu(&arena->decay.time.u);
-	return dt.s;
+	return atomic_read_zd(&arena->decay.time);
 }
 
 static void
 arena_decay_time_write(arena_t *arena, ssize_t decay_time) {
-	arena_decay_time_t dt;
-	dt.s = decay_time;
-	atomic_write_zu(&arena->decay.time.u, dt.u);
+	atomic_write_zd(&arena->decay.time, decay_time);
 }
 
 static void

From 8547ee11c38738f12cf9437e773edeb4c533fddc Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 2 Mar 2017 23:32:42 -0800
Subject: [PATCH 0696/2608] Fix flakiness in test_decay_ticker.

Fix the test_decay_ticker test to carefully control slab
creation/destruction such that the decay backlog reliably reaches zero.
Use an isolated arena so that no extraneous allocation can confuse the
situation.  Speed up time during the latter part of the test so that the
entire decay time can expire in a reasonable amount of wall time.
---
 test/unit/decay.c | 258 +++++++++++++++++++++++++++-------------------
 1 file changed, 150 insertions(+), 108 deletions(-)

diff --git a/test/unit/decay.c b/test/unit/decay.c
index 2513dbd4..df910aac 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -21,6 +21,106 @@ nstime_update_mock(nstime_t *time) {
 	return !monotonic_mock;
 }
 
+static unsigned
+do_arena_create(ssize_t decay_time) {
+	unsigned arena_ind;
+	size_t sz = sizeof(unsigned);
+	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
+	size_t mib[3];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+	assert_d_eq(mallctlnametomib("arena.0.decay_time", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[1] = (size_t)arena_ind;
+	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&decay_time,
+	    sizeof(decay_time)), 0, "Unexpected mallctlbymib() failure");
+	return arena_ind;
+}
+
+static void
+do_arena_destroy(unsigned arena_ind) {
+	size_t mib[3];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+	assert_d_eq(mallctlnametomib("arena.0.destroy", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[1] = (size_t)arena_ind;
+	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+}
+
+void
+do_epoch(void) {
+	uint64_t epoch = 1;
+	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	    0, "Unexpected mallctl() failure");
+}
+
+void
+do_purge(unsigned arena_ind) {
+	size_t mib[3];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+	assert_d_eq(mallctlnametomib("arena.0.purge", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[1] = (size_t)arena_ind;
+	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+}
+
+void
+do_decay(unsigned arena_ind) {
+	size_t mib[3];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+	assert_d_eq(mallctlnametomib("arena.0.decay", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[1] = (size_t)arena_ind;
+	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+}
+
+static uint64_t
+get_arena_npurge(unsigned arena_ind) {
+	do_epoch();
+	size_t mib[4];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+	assert_d_eq(mallctlnametomib("stats.arenas.0.npurge", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[2] = (size_t)arena_ind;
+	uint64_t npurge = 0;
+	size_t sz = sizeof(npurge);
+	assert_d_eq(mallctlbymib(mib, miblen, (void *)&npurge, &sz, NULL, 0),
+	    config_stats ? 0 : ENOENT, "Unexpected mallctlbymib() failure");
+	return npurge;
+}
+
+static size_t
+get_arena_pdirty(unsigned arena_ind) {
+	do_epoch();
+	size_t mib[4];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+	assert_d_eq(mallctlnametomib("stats.arenas.0.pdirty", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[2] = (size_t)arena_ind;
+	size_t pdirty;
+	size_t sz = sizeof(pdirty);
+	assert_d_eq(mallctlbymib(mib, miblen, (void *)&pdirty, &sz, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+	return pdirty;
+}
+
+static void *
+do_mallocx(size_t size, int flags) {
+	void *p = mallocx(size, flags);
+	assert_ptr_not_null(p, "Unexpected mallocx() failure");
+	return p;
+}
+
+static void
+generate_dirty(unsigned arena_ind, size_t size) {
+	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+	void *p = do_mallocx(size, flags);
+	dallocx(p, flags);
+}
+
 TEST_BEGIN(test_decay_ticks) {
 	ticker_t *decay_ticker;
 	unsigned tick0, tick1;
@@ -195,45 +295,37 @@ TEST_END
 
 TEST_BEGIN(test_decay_ticker) {
 #define NPS 1024
-	int flags = (MALLOCX_ARENA(0) | MALLOCX_TCACHE_NONE);
+#define NINTERVALS 101
+	ssize_t dt = opt_decay_time;
+	unsigned arena_ind = do_arena_create(dt);
+	int flags = (MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
 	void *ps[NPS];
-	uint64_t epoch;
-	uint64_t npurge0 = 0;
-	uint64_t npurge1 = 0;
-	size_t sz, large;
-	unsigned i, nupdates0;
-	nstime_t time, decay_time, deadline;
+	size_t large;
 
 	/*
 	 * Allocate a bunch of large objects, pause the clock, deallocate the
-	 * objects, restore the clock, then [md]allocx() in a tight loop to
-	 * verify the ticker triggers purging.
+	 * objects, restore the clock, then [md]allocx() in a tight loop while
+	 * advancing time rapidly to verify the ticker triggers purging.
 	 */
 
 	if (config_tcache) {
 		size_t tcache_max;
 
-		sz = sizeof(size_t);
+		size_t sz = sizeof(size_t);
 		assert_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max,
 		    &sz, NULL, 0), 0, "Unexpected mallctl failure");
 		large = nallocx(tcache_max + 1, flags);
 	}  else {
-		sz = sizeof(size_t);
+		size_t sz = sizeof(size_t);
 		assert_d_eq(mallctl("arenas.lextent.0.size", &large, &sz, NULL,
 		    0), 0, "Unexpected mallctl failure");
 	}
 
-	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
-	    "Unexpected mallctl failure");
-	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
-	    sizeof(uint64_t)), 0, "Unexpected mallctl failure");
-	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.npurge", (void *)&npurge0, &sz,
-	    NULL, 0), config_stats ? 0 : ENOENT, "Unexpected mallctl result");
+	do_purge(arena_ind);
+	uint64_t npurge0 = get_arena_npurge(arena_ind);
 
-	for (i = 0; i < NPS; i++) {
-		ps[i] = mallocx(large, flags);
-		assert_ptr_not_null(ps[i], "Unexpected mallocx() failure");
+	for (unsigned i = 0; i < NPS; i++) {
+		ps[i] = do_mallocx(large, flags);
 	}
 
 	nupdates_mock = 0;
@@ -246,43 +338,59 @@ TEST_BEGIN(test_decay_ticker) {
 	nstime_monotonic = nstime_monotonic_mock;
 	nstime_update = nstime_update_mock;
 
-	for (i = 0; i < NPS; i++) {
+	for (unsigned i = 0; i < NPS; i++) {
 		dallocx(ps[i], flags);
-		nupdates0 = nupdates_mock;
-		assert_d_eq(mallctl("arena.0.decay", NULL, NULL, NULL, 0), 0,
-		    "Unexpected arena.0.decay failure");
+		unsigned nupdates0 = nupdates_mock;
+		do_decay(arena_ind);
 		assert_u_gt(nupdates_mock, nupdates0,
 		    "Expected nstime_update() to be called");
 	}
 
-	nstime_monotonic = nstime_monotonic_orig;
-	nstime_update = nstime_update_orig;
+	nstime_t time, update_interval, decay_time, deadline;
 
 	nstime_init(&time, 0);
 	nstime_update(&time);
-	nstime_init2(&decay_time, opt_decay_time, 0);
+
+	nstime_init2(&decay_time, dt, 0);
 	nstime_copy(&deadline, &time);
 	nstime_add(&deadline, &decay_time);
-	do {
-		for (i = 0; i < DECAY_NTICKS_PER_UPDATE / 2; i++) {
-			void *p = mallocx(1, flags);
-			assert_ptr_not_null(p, "Unexpected mallocx() failure");
-			dallocx(p, flags);
-		}
-		assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
-		    sizeof(uint64_t)), 0, "Unexpected mallctl failure");
-		sz = sizeof(uint64_t);
-		assert_d_eq(mallctl("stats.arenas.0.npurge", (void *)&npurge1,
-		    &sz, NULL, 0), config_stats ? 0 : ENOENT,
-		    "Unexpected mallctl result");
 
+	nstime_init2(&update_interval, dt, 0);
+	nstime_idivide(&update_interval, NINTERVALS);
+
+	nstime_init2(&decay_time, dt, 0);
+	nstime_copy(&deadline, &time);
+	nstime_add(&deadline, &decay_time);
+
+	/*
+	 * Keep q's slab from being deallocated during the looping below.  If
+	 * a cached slab were to repeatedly come and go during looping, it could
+	 * prevent the decay backlog ever becoming empty.
+	 */
+	void *p = do_mallocx(1, flags);
+	uint64_t npurge1;
+	do {
+		for (unsigned i = 0; i < DECAY_NTICKS_PER_UPDATE / 2; i++) {
+			void *q = do_mallocx(1, flags);
+			dallocx(q, flags);
+		}
+		npurge1 = get_arena_npurge(arena_ind);
+
+		nstime_add(&time_mock, &update_interval);
 		nstime_update(&time);
 	} while (nstime_compare(&time, &deadline) <= 0 && npurge1 == npurge0);
+	dallocx(p, flags);
+
+	nstime_monotonic = nstime_monotonic_orig;
+	nstime_update = nstime_update_orig;
 
 	if (config_stats) {
 		assert_u64_gt(npurge1, npurge0, "Expected purging to occur");
 	}
+
+	do_arena_destroy(arena_ind);
 #undef NPS
+#undef NINTERVALS
 }
 TEST_END
 
@@ -290,7 +398,6 @@ TEST_BEGIN(test_decay_nonmonotonic) {
 #define NPS (SMOOTHSTEP_NSTEPS + 1)
 	int flags = (MALLOCX_ARENA(0) | MALLOCX_TCACHE_NONE);
 	void *ps[NPS];
-	uint64_t epoch;
 	uint64_t npurge0 = 0;
 	uint64_t npurge1 = 0;
 	size_t sz, large0;
@@ -302,8 +409,7 @@ TEST_BEGIN(test_decay_nonmonotonic) {
 
 	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl failure");
-	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
-	    sizeof(uint64_t)), 0, "Unexpected mallctl failure");
+	do_epoch();
 	sz = sizeof(uint64_t);
 	assert_d_eq(mallctl("stats.arenas.0.npurge", (void *)&npurge0, &sz,
 	    NULL, 0), config_stats ? 0 : ENOENT, "Unexpected mallctl result");
@@ -332,8 +438,7 @@ TEST_BEGIN(test_decay_nonmonotonic) {
 		    "Expected nstime_update() to be called");
 	}
 
-	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
-	    sizeof(uint64_t)), 0, "Unexpected mallctl failure");
+	do_epoch();
 	sz = sizeof(uint64_t);
 	assert_d_eq(mallctl("stats.arenas.0.npurge", (void *)&npurge1, &sz,
 	    NULL, 0), config_stats ? 0 : ENOENT, "Unexpected mallctl result");
@@ -348,69 +453,6 @@ TEST_BEGIN(test_decay_nonmonotonic) {
 }
 TEST_END
 
-static unsigned
-do_arena_create(ssize_t decay_time) {
-	unsigned arena_ind;
-	size_t sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
-	    0, "Unexpected mallctl() failure");
-	size_t mib[3];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib("arena.0.decay_time", mib, &miblen), 0,
-	    "Unexpected mallctlnametomib() failure");
-	mib[1] = (size_t)arena_ind;
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&decay_time,
-	    sizeof(decay_time)), 0, "Unexpected mallctlbymib() failure");
-	return arena_ind;
-}
-
-static void
-do_arena_destroy(unsigned arena_ind) {
-	size_t mib[3];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib("arena.0.destroy", mib, &miblen), 0,
-	    "Unexpected mallctlnametomib() failure");
-	mib[1] = (size_t)arena_ind;
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
-	    "Unexpected mallctlbymib() failure");
-}
-
-void
-do_epoch(void) {
-	uint64_t epoch = 1;
-	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
-	    0, "Unexpected mallctl() failure");
-}
-
-static size_t
-get_arena_pdirty(unsigned arena_ind) {
-	do_epoch();
-	size_t mib[4];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib("stats.arenas.0.pdirty", mib, &miblen), 0,
-	    "Unexpected mallctlnametomib() failure");
-	mib[2] = (size_t)arena_ind;
-	size_t pdirty;
-	size_t sz = sizeof(pdirty);
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&pdirty, &sz, NULL, 0), 0,
-	    "Unexpected mallctlbymib() failure");
-	return pdirty;
-}
-
-static void *
-do_mallocx(size_t size, int flags) {
-	void *p = mallocx(size, flags);
-	assert_ptr_not_null(p, "Unexpected mallocx() failure");
-	return p;
-}
-
-static void
-generate_dirty(unsigned arena_ind, size_t size) {
-	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
-	void *p = do_mallocx(size, flags);
-	dallocx(p, flags);
-}
-
 TEST_BEGIN(test_decay_now) {
 	unsigned arena_ind = do_arena_create(0);
 	assert_zu_eq(get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages");

From e201e24904d53897409b1dda451d40c5d2e0dc29 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 2 Mar 2017 18:04:35 -0800
Subject: [PATCH 0697/2608] Perform delayed coalescing prior to purging.

Rather than purging uncoalesced extents, perform just enough incremental
coalescing to purge only fully coalesced extents.  In the absence of
cached extent reuse, the immediate versus delayed incremental purging
algorithms result in the same purge order.

This resolves #655.
---
 include/jemalloc/internal/extent_externs.h    |   5 +-
 include/jemalloc/internal/extent_inlines.h    |   9 ++
 include/jemalloc/internal/extent_structs.h    |   7 +-
 include/jemalloc/internal/private_symbols.txt |   2 +
 src/arena.c                                   |  28 +++-
 src/extent.c                                  | 151 +++++++++++++-----
 6 files changed, 152 insertions(+), 50 deletions(-)

diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index f5efed06..ef2467e1 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -18,10 +18,11 @@ size_t	extent_size_quantize_ceil(size_t size);
 ph_proto(, extent_heap_, extent_heap_t, extent_t)
 
 bool extents_init(tsdn_t *tsdn, extents_t *extents, extent_state_t state,
-    bool try_coalesce);
+    bool delay_coalesce);
 extent_state_t extents_state_get(const extents_t *extents);
 size_t extents_npages_get(extents_t *extents);
-extent_t *extents_evict(tsdn_t *tsdn, extents_t *extents, size_t npages_min);
+extent_t *extents_evict(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extents_t *extents, size_t npages_min);
 void extents_prefork(tsdn_t *tsdn, extents_t *extents);
 void extents_postfork_parent(tsdn_t *tsdn, extents_t *extents);
 void extents_postfork_child(tsdn_t *tsdn, extents_t *extents);
diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index 473aad71..989c0d19 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -37,6 +37,8 @@ void extent_list_init(extent_list_t *list);
 extent_t *extent_list_first(const extent_list_t *list);
 extent_t *extent_list_last(const extent_list_t *list);
 void extent_list_append(extent_list_t *list, extent_t *extent);
+void extent_list_replace(extent_list_t *list, extent_t *to_remove,
+    extent_t *to_insert);
 void extent_list_remove(extent_list_t *list, extent_t *extent);
 int	extent_sn_comp(const extent_t *a, const extent_t *b);
 int	extent_ad_comp(const extent_t *a, const extent_t *b);
@@ -253,6 +255,13 @@ extent_list_append(extent_list_t *list, extent_t *extent) {
 	ql_tail_insert(list, extent, ql_link);
 }
 
+JEMALLOC_INLINE void
+extent_list_replace(extent_list_t *list, extent_t *to_remove,
+    extent_t *to_insert) {
+	ql_after_insert(to_remove, to_insert, ql_link);
+	ql_remove(list, to_remove, ql_link);
+}
+
 JEMALLOC_INLINE void
 extent_list_remove(extent_list_t *list, extent_t *extent) {
 	ql_remove(list, extent, ql_link);
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index 008b6352..9ea69728 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -116,8 +116,11 @@ struct extents_s {
 	/* All stored extents must be in the same state. */
 	extent_state_t		state;
 
-	/* If true, try to coalesce during extent deallocation. */
-	bool			try_coalesce;
+	/*
+	 * If true, delay coalescing until eviction; otherwise coalesce during
+	 * deallocation.
+	 */
+	bool			delay_coalesce;
 };
 
 #endif /* JEMALLOC_INTERNAL_EXTENT_STRUCTS_H */
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index b122dae6..30cd3958 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -159,8 +159,10 @@ extent_init
 extent_last_get
 extent_list_append
 extent_list_first
+extent_list_init
 extent_list_last
 extent_list_remove
+extent_list_replace
 extent_lookup
 extent_merge_wrapper
 extent_past_get
diff --git a/src/arena.c b/src/arena.c
index 56ab362d..cef61cc3 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -715,9 +715,9 @@ arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 
 	/* Stash extents according to ndirty_limit. */
 	size_t nstashed = 0;
-	for (extent_t *extent = extents_evict(tsdn, &arena->extents_cached,
-	    ndirty_limit); extent != NULL; extent = extents_evict(tsdn,
-	    &arena->extents_cached, ndirty_limit)) {
+	extent_t *extent;
+	while ((extent = extents_evict(tsdn, arena, r_extent_hooks,
+	    &arena->extents_cached, ndirty_limit)) != NULL) {
 		extent_list_append(purge_extents, extent);
 		nstashed += extent_size_get(extent) >> LG_PAGE;
 	}
@@ -943,9 +943,9 @@ arena_destroy_retained(tsdn_t *tsdn, arena_t *arena) {
 	 * either unmap retained extents or track them for later use.
 	 */
 	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
-	for (extent_t *extent = extents_evict(tsdn, &arena->extents_retained,
-	    0); extent != NULL; extent = extents_evict(tsdn,
-	    &arena->extents_retained, 0)) {
+	extent_t *extent;
+	while ((extent = extents_evict(tsdn, arena, &extent_hooks,
+	    &arena->extents_retained, 0)) != NULL) {
 		extent_dalloc_wrapper_try(tsdn, arena, &extent_hooks, extent);
 	}
 }
@@ -1679,12 +1679,24 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		goto label_error;
 	}
 
+	/*
+	 * Delay coalescing for cached extents despite the disruptive effect on
+	 * memory layout for best-fit extent allocation, since cached extents
+	 * are likely to be reused soon after deallocation, and the cost of
+	 * merging/splitting extents is non-trivial.
+	 */
 	if (extents_init(tsdn, &arena->extents_cached, extent_state_dirty,
-	    false)) {
+	    true)) {
 		goto label_error;
 	}
+	/*
+	 * Coalesce retained extents immediately, in part because they will
+	 * never be evicted (and therefore there's no opportunity for delayed
+	 * coalescing), but also because operations on retained extents are not
+	 * in the critical path.
+	 */
 	if (extents_init(tsdn, &arena->extents_retained,
-	    extent_state_retained, true)) {
+	    extent_state_retained, false)) {
 		goto label_error;
 	}
 
diff --git a/src/extent.c b/src/extent.c
index 09990aae..368c9741 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -69,6 +69,9 @@ static size_t	highpages;
  */
 
 static void extent_deregister(tsdn_t *tsdn, extent_t *extent);
+static extent_t *extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
+    extent_t *extent, bool *coalesced);
 static void extent_record(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extents_t *extents, extent_t *extent);
 
@@ -175,7 +178,7 @@ ph_gen(, extent_heap_, extent_heap_t, extent_t, ph_link, extent_snad_comp)
 
 bool
 extents_init(tsdn_t *tsdn, extents_t *extents, extent_state_t state,
-    bool try_coalesce) {
+    bool delay_coalesce) {
 	if (malloc_mutex_init(&extents->mtx, "extents", WITNESS_RANK_EXTENTS)) {
 		return true;
 	}
@@ -185,7 +188,7 @@ extents_init(tsdn_t *tsdn, extents_t *extents, extent_state_t state,
 	extent_list_init(&extents->lru);
 	extents->npages = 0;
 	extents->state = state;
-	extents->try_coalesce = try_coalesce;
+	extents->delay_coalesce = delay_coalesce;
 	return false;
 }
 
@@ -200,7 +203,8 @@ extents_npages_get(extents_t *extents) {
 }
 
 static void
-extents_insert_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent) {
+extents_insert_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent,
+    bool preserve_lru) {
 	malloc_mutex_assert_owner(tsdn, &extents->mtx);
 	assert(extent_state_get(extent) == extents->state);
 
@@ -208,13 +212,16 @@ extents_insert_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent) {
 	size_t psz = extent_size_quantize_floor(size);
 	pszind_t pind = psz2ind(psz);
 	extent_heap_insert(&extents->heaps[pind], extent);
-	extent_list_append(&extents->lru, extent);
+	if (!preserve_lru) {
+		extent_list_append(&extents->lru, extent);
+	}
 	size_t npages = size >> LG_PAGE;
 	atomic_add_zu(&extents->npages, npages);
 }
 
 static void
-extents_remove_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent) {
+extents_remove_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent,
+    bool preserve_lru) {
 	malloc_mutex_assert_owner(tsdn, &extents->mtx);
 	assert(extent_state_get(extent) == extents->state);
 
@@ -222,7 +229,9 @@ extents_remove_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent) {
 	size_t psz = extent_size_quantize_floor(size);
 	pszind_t pind = psz2ind(psz);
 	extent_heap_remove(&extents->heaps[pind], extent);
-	extent_list_remove(&extents->lru, extent);
+	if (!preserve_lru) {
+		extent_list_remove(&extents->lru, extent);
+	}
 	size_t npages = size >> LG_PAGE;
 	assert(atomic_read_zu(&extents->npages) >= npages);
 	atomic_sub_zu(&extents->npages, size >> LG_PAGE);
@@ -249,22 +258,62 @@ extents_first_best_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 	return NULL;
 }
 
+static bool
+extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
+    extent_t *extent) {
+	extent_state_set(extent, extent_state_active);
+	bool coalesced;
+	extent = extent_try_coalesce(tsdn, arena, r_extent_hooks, rtree_ctx,
+	    extents, extent, &coalesced);
+	extent_state_set(extent, extents_state_get(extents));
+
+	if (!coalesced) {
+		return true;
+	}
+	extents_insert_locked(tsdn, extents, extent, true);
+	return false;
+}
+
 extent_t *
-extents_evict(tsdn_t *tsdn, extents_t *extents, size_t npages_min) {
+extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
+    extents_t *extents, size_t npages_min) {
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
 	malloc_mutex_lock(tsdn, &extents->mtx);
 
-	/* Get the LRU extent, if any. */
-	extent_t *extent = extent_list_first(&extents->lru);
-	if (extent == NULL) {
-		goto label_return;
+	/*
+	 * Get the LRU coalesced extent, if any.  If coalescing was delayed,
+	 * the loop will iterate until the LRU extent is fully coalesced.
+	 */
+	extent_t *extent;
+	while (true) {
+		/* Get the LRU extent, if any. */
+		extent = extent_list_first(&extents->lru);
+		if (extent == NULL) {
+			goto label_return;
+		}
+		/* Check the eviction limit. */
+		size_t npages = extent_size_get(extent) >> LG_PAGE;
+		if (atomic_read_zu(&extents->npages) - npages < npages_min) {
+			extent = NULL;
+			goto label_return;
+		}
+		extents_remove_locked(tsdn, extents, extent, false);
+		if (!extents->delay_coalesce) {
+			break;
+		}
+		/* Try to coalesce. */
+		if (extent_try_delayed_coalesce(tsdn, arena, r_extent_hooks,
+		    rtree_ctx, extents, extent)) {
+			break;
+		}
+		/*
+		 * The LRU extent was just coalesced and the result placed in
+		 * the LRU at its neighbor's position.  Start over.
+		 */
 	}
-	/* Check the eviction limit. */
-	size_t npages = extent_size_get(extent) >> LG_PAGE;
-	if (atomic_read_zu(&extents->npages) - npages < npages_min) {
-		extent = NULL;
-		goto label_return;
-	}
-	extents_remove_locked(tsdn, extents, extent);
 
 	/*
 	 * Either mark the extent active or deregister it to protect against
@@ -320,29 +369,29 @@ extents_postfork_child(tsdn_t *tsdn, extents_t *extents) {
 
 static void
 extent_deactivate_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
-    extent_t *extent) {
+    extent_t *extent, bool preserve_lru) {
 	assert(extent_arena_get(extent) == arena);
 	assert(extent_state_get(extent) == extent_state_active);
 
 	extent_state_set(extent, extents_state_get(extents));
-	extents_insert_locked(tsdn, extents, extent);
+	extents_insert_locked(tsdn, extents, extent, preserve_lru);
 }
 
 static void
 extent_deactivate(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
-    extent_t *extent) {
+    extent_t *extent, bool preserve_lru) {
 	malloc_mutex_lock(tsdn, &extents->mtx);
-	extent_deactivate_locked(tsdn, arena, extents, extent);
+	extent_deactivate_locked(tsdn, arena, extents, extent, preserve_lru);
 	malloc_mutex_unlock(tsdn, &extents->mtx);
 }
 
 static void
 extent_activate_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
-    extent_t *extent) {
+    extent_t *extent, bool preserve_lru) {
 	assert(extent_arena_get(extent) == arena);
 	assert(extent_state_get(extent) == extents_state_get(extents));
 
-	extents_remove_locked(tsdn, extents, extent);
+	extents_remove_locked(tsdn, extents, extent, preserve_lru);
 	extent_state_set(extent, extent_state_active);
 }
 
@@ -581,7 +630,7 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
 		return NULL;
 	}
 
-	extent_activate_locked(tsdn, arena, extents, extent);
+	extent_activate_locked(tsdn, arena, extents, extent, false);
 	if (!locked) {
 		malloc_mutex_unlock(tsdn, &extents->mtx);
 	}
@@ -620,7 +669,7 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
 			    lead);
 			return NULL;
 		}
-		extent_deactivate(tsdn, arena, extents, lead);
+		extent_deactivate(tsdn, arena, extents, lead, false);
 	}
 
 	/* Split the trail. */
@@ -633,7 +682,7 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
 			    extent);
 			return NULL;
 		}
-		extent_deactivate(tsdn, arena, extents, trail);
+		extent_deactivate(tsdn, arena, extents, trail, false);
 	} else if (leadsize == 0) {
 		/*
 		 * Splitting causes usize to be set as a side effect, but no
@@ -1030,7 +1079,16 @@ extent_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
     extents_t *extents, extent_t *inner, extent_t *outer, bool forward) {
 	assert(extent_can_coalesce(arena, extents, inner, outer));
 
-	extent_activate_locked(tsdn, arena, extents, outer);
+	if (forward && extents->delay_coalesce) {
+		/*
+		 * The extent that remains after coalescing must occupy the
+		 * outer extent's position in the LRU.  For forward coalescing,
+		 * swap the inner extent into the LRU.
+		 */
+		extent_list_replace(&extents->lru, outer, inner);
+	}
+	extent_activate_locked(tsdn, arena, extents, outer,
+	    extents->delay_coalesce);
 
 	malloc_mutex_unlock(tsdn, &extents->mtx);
 	bool err = extent_merge_wrapper(tsdn, arena, r_extent_hooks,
@@ -1038,7 +1096,11 @@ extent_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	malloc_mutex_lock(tsdn, &extents->mtx);
 
 	if (err) {
-		extent_deactivate_locked(tsdn, arena, extents, outer);
+		if (forward && extents->delay_coalesce) {
+			extent_list_replace(&extents->lru, inner, outer);
+		}
+		extent_deactivate_locked(tsdn, arena, extents, outer,
+		    extents->delay_coalesce);
 	}
 
 	return err;
@@ -1047,14 +1109,14 @@ extent_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 static extent_t *
 extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
-    extent_t *extent) {
+    extent_t *extent, bool *coalesced) {
 	/*
 	 * Continue attempting to coalesce until failure, to protect against
 	 * races with other threads that are thwarted by this one.
 	 */
-	bool coalesced;
+	bool again;
 	do {
-		coalesced = false;
+		again = false;
 
 		/* Try to coalesce forward. */
 		rtree_elm_t *next_elm = rtree_elm_acquire(tsdn, &extents_rtree,
@@ -1073,7 +1135,12 @@ extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
 			rtree_elm_release(tsdn, &extents_rtree, next_elm);
 			if (can_coalesce && !extent_coalesce(tsdn, arena,
 			    r_extent_hooks, extents, extent, next, true)) {
-				coalesced = true;
+				if (extents->delay_coalesce) {
+					/* Do minimal coalescing. */
+					*coalesced = true;
+					return extent;
+				}
+				again = true;
 			}
 		}
 
@@ -1090,11 +1157,19 @@ extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
 			if (can_coalesce && !extent_coalesce(tsdn, arena,
 			    r_extent_hooks, extents, extent, prev, false)) {
 				extent = prev;
-				coalesced = true;
+				if (extents->delay_coalesce) {
+					/* Do minimal coalescing. */
+					*coalesced = true;
+					return extent;
+				}
+				again = true;
 			}
 		}
-	} while (coalesced);
+	} while (again);
 
+	if (extents->delay_coalesce) {
+		*coalesced = false;
+	}
 	return extent;
 }
 
@@ -1118,12 +1193,12 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 
 	assert(extent_lookup(tsdn, extent_base_get(extent), true) == extent);
 
-	if (extents->try_coalesce) {
+	if (!extents->delay_coalesce) {
 		extent = extent_try_coalesce(tsdn, arena, r_extent_hooks,
-		    rtree_ctx, extents, extent);
+		    rtree_ctx, extents, extent, NULL);
 	}
 
-	extent_deactivate_locked(tsdn, arena, extents, extent);
+	extent_deactivate_locked(tsdn, arena, extents, extent, false);
 
 	malloc_mutex_unlock(tsdn, &extents->mtx);
 }

From cc75c35db58f4ce4a27455fe5fe46fe9347d2c45 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 3 Mar 2017 22:51:21 -0800
Subject: [PATCH 0698/2608] Add any() and remove_any() to ph.

These functions select the easiest-to-remove element in the heap, which
is either the most recently inserted aux list element or the root.  If
no calls are made to first() or remove_first(), the behavior (and time
complexity) is the same as for a LIFO queue.
---
 include/jemalloc/internal/ph.h | 58 +++++++++++++++++++++++++++++++---
 test/unit/ph.c                 | 31 +++++++++++++++++-
 2 files changed, 84 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index 7e1920cb..84d6778a 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -198,8 +198,10 @@ struct {								\
 a_attr void	a_prefix##new(a_ph_type *ph);				\
 a_attr bool	a_prefix##empty(a_ph_type *ph);				\
 a_attr a_type	*a_prefix##first(a_ph_type *ph);			\
+a_attr a_type	*a_prefix##any(a_ph_type *ph);				\
 a_attr void	a_prefix##insert(a_ph_type *ph, a_type *phn);		\
 a_attr a_type	*a_prefix##remove_first(a_ph_type *ph);			\
+a_attr a_type	*a_prefix##remove_any(a_ph_type *ph);			\
 a_attr void	a_prefix##remove(a_ph_type *ph, a_type *phn);
 
 /*
@@ -223,6 +225,17 @@ a_prefix##first(a_ph_type *ph) {					\
 	ph_merge_aux(a_type, a_field, ph, a_cmp);			\
 	return ph->ph_root;						\
 }									\
+a_attr a_type *								\
+a_prefix##any(a_ph_type *ph) {						\
+	if (ph->ph_root == NULL) {					\
+		return NULL;						\
+	}								\
+	a_type *aux = phn_next_get(a_type, a_field, ph->ph_root);	\
+	if (aux != NULL) {						\
+		return aux;						\
+	}								\
+	return ph->ph_root;						\
+}									\
 a_attr void								\
 a_prefix##insert(a_ph_type *ph, a_type *phn) {				\
 	memset(&phn->a_field, 0, sizeof(phn(a_type)));			\
@@ -266,15 +279,52 @@ a_prefix##remove_first(a_ph_type *ph) {					\
 									\
 	return ret;							\
 }									\
+a_attr a_type *								\
+a_prefix##remove_any(a_ph_type *ph) {					\
+	/*								\
+	 * Remove the most recently inserted aux list element, or the	\
+	 * root if the aux list is empty.  This has the effect of	\
+	 * behaving as a LIFO (and insertion/removal is therefore	\
+	 * constant-time) if a_prefix##[remove_]first() are never	\
+	 * called.							\
+	 */								\
+	if (ph->ph_root == NULL) {					\
+		return NULL;						\
+	}								\
+	a_type *ret = phn_next_get(a_type, a_field, ph->ph_root);	\
+	if (ret != NULL) {						\
+		a_type *aux = phn_next_get(a_type, a_field, ret);	\
+		phn_next_set(a_type, a_field, ph->ph_root, aux);	\
+		if (aux != NULL) {					\
+			phn_prev_set(a_type, a_field, aux,		\
+			    ph->ph_root);				\
+		}							\
+		return ret;						\
+	}								\
+	ret = ph->ph_root;						\
+	ph_merge_children(a_type, a_field, ph->ph_root, a_cmp,		\
+	    ph->ph_root);						\
+	return ret;							\
+}									\
 a_attr void								\
 a_prefix##remove(a_ph_type *ph, a_type *phn) {				\
 	a_type *replace, *parent;					\
 									\
-	/*								\
-	 * We can delete from aux list without merging it, but we need	\
-	 * to merge if we are dealing with the root node.		\
-	 */								\
 	if (ph->ph_root == phn) {					\
+		/*							\
+		 * We can delete from aux list without merging it, but	\
+		 * we need to merge if we are dealing with the root	\
+		 * node and it has children.				\
+		 */							\
+		if (phn_lchild_get(a_type, a_field, phn) == NULL) {	\
+			ph->ph_root = phn_next_get(a_type, a_field,	\
+			    phn);					\
+			if (ph->ph_root != NULL) {			\
+				phn_prev_set(a_type, a_field,		\
+				    ph->ph_root, NULL);			\
+			}						\
+			return;						\
+		}							\
 		ph_merge_aux(a_type, a_field, ph, a_cmp);		\
 		if (ph->ph_root == phn) {				\
 			ph_merge_children(a_type, a_field, ph->ph_root,	\
diff --git a/test/unit/ph.c b/test/unit/ph.c
index 91516fae..01df340c 100644
--- a/test/unit/ph.c
+++ b/test/unit/ph.c
@@ -142,6 +142,7 @@ TEST_BEGIN(test_ph_empty) {
 	heap_new(&heap);
 	assert_true(heap_empty(&heap), "Heap should be empty");
 	assert_ptr_null(heap_first(&heap), "Unexpected node");
+	assert_ptr_null(heap_any(&heap), "Unexpected node");
 }
 TEST_END
 
@@ -159,6 +160,13 @@ node_remove_first(heap_t *heap) {
 	return node;
 }
 
+static node_t *
+node_remove_any(heap_t *heap) {
+	node_t *node = heap_remove_any(heap);
+	node->magic = 0;
+	return node;
+}
+
 TEST_BEGIN(test_ph_random) {
 #define NNODES 25
 #define NBAGS 250
@@ -204,6 +212,8 @@ TEST_BEGIN(test_ph_random) {
 			for (k = 0; k < j; k++) {
 				heap_insert(&heap, &nodes[k]);
 				if (i % 13 == 12) {
+					assert_ptr_not_null(heap_any(&heap),
+					    "Heap should not be empty");
 					/* Trigger merging. */
 					assert_ptr_not_null(heap_first(&heap),
 					    "Heap should not be empty");
@@ -216,7 +226,7 @@ TEST_BEGIN(test_ph_random) {
 			    "Heap should not be empty");
 
 			/* Remove nodes. */
-			switch (i % 4) {
+			switch (i % 6) {
 			case 0:
 				for (k = 0; k < j; k++) {
 					assert_u_eq(heap_validate(&heap), j - k,
@@ -264,12 +274,31 @@ TEST_BEGIN(test_ph_random) {
 					prev = node;
 				}
 				break;
+			} case 4: {
+				for (k = 0; k < j; k++) {
+					node_remove_any(&heap);
+					assert_u_eq(heap_validate(&heap), j - k
+					    - 1, "Incorrect node count");
+				}
+				break;
+			} case 5: {
+				for (k = 0; k < j; k++) {
+					node_t *node = heap_any(&heap);
+					assert_u_eq(heap_validate(&heap), j - k,
+					    "Incorrect node count");
+					node_remove(&heap, node);
+					assert_u_eq(heap_validate(&heap), j - k
+					    - 1, "Incorrect node count");
+				}
+				break;
 			} default:
 				not_reached();
 			}
 
 			assert_ptr_null(heap_first(&heap),
 			    "Heap should be empty");
+			assert_ptr_null(heap_any(&heap),
+			    "Heap should be empty");
 			assert_true(heap_empty(&heap), "Heap should be empty");
 		}
 	}

From cdce93e4a3045bcf0d30409666d2d4c29818aec7 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 3 Mar 2017 22:55:28 -0800
Subject: [PATCH 0699/2608] Use any-best-fit for cached extent allocation.

This simplifies what would be pairing heap operations to the equivalent
of LIFO queue operations.  This is a complementary optimization in the
context of delayed coalescing for cached extents.
---
 src/extent.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index 368c9741..60e385ee 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -238,17 +238,20 @@ extents_remove_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent,
 }
 
 /*
- * Do first-best-fit extent selection, i.e. select the oldest/lowest extent that
- * best fits.
+ * Do {first,any}-best-fit extent selection, i.e. select the oldest/lowest or
+ * any extent that best fits, where {first,any} corresponds to
+ * extents->delay_coalesce={false,true}.
  */
 static extent_t *
-extents_first_best_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
+extents_best_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
     size_t size) {
 	malloc_mutex_assert_owner(tsdn, &extents->mtx);
 
 	pszind_t pind = psz2ind(extent_size_quantize_ceil(size));
 	for (pszind_t i = pind; i < NPSIZES+1; i++) {
-		extent_t *extent = extent_heap_first(&extents->heaps[i]);
+		extent_t *extent = extents->delay_coalesce ?
+		    extent_heap_any(&extents->heaps[i]) :
+		    extent_heap_first(&extents->heaps[i]);
 		if (extent != NULL) {
 			assert(extent_size_get(extent) >= size);
 			return extent;
@@ -620,7 +623,7 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
 			extent = NULL;
 		}
 	} else {
-		extent = extents_first_best_fit_locked(tsdn, arena, extents,
+		extent = extents_best_fit_locked(tsdn, arena, extents,
 		    alloc_size);
 	}
 	if (extent == NULL) {

From 01f47f11a67d1a2505cc1f21851c466651eba431 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 6 Mar 2017 12:51:41 -0800
Subject: [PATCH 0700/2608] Store associated arena in tcache.

This fixes tcache_flush for manual tcaches, which wasn't able to find
the correct arena it associated with. Also changed the decay test to
cover this case (by using manually created arenas).
---
 include/jemalloc/internal/tcache_externs.h |  2 +-
 include/jemalloc/internal/tcache_structs.h |  1 +
 src/ctl.c                                  |  2 +-
 src/jemalloc.c                             |  1 +
 src/tcache.c                               | 17 ++++-----
 test/unit/decay.c                          | 40 +++++++++++++++++-----
 6 files changed, 45 insertions(+), 18 deletions(-)

diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 3e4a7511..83643033 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -34,7 +34,7 @@ void	tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 void	tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
     unsigned rem, tcache_t *tcache);
 void	tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache,
-    arena_t *oldarena, arena_t *newarena);
+    arena_t *arena);
 tcache_t *tcache_get_hard(tsd_t *tsd);
 tcache_t *tcache_create(tsdn_t *tsdn, arena_t *arena);
 void	tcache_cleanup(tsd_t *tsd);
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index a2b28afd..a9b70312 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -35,6 +35,7 @@ struct tcache_s {
 	uint64_t	prof_accumbytes;/* Cleared after arena_prof_accum(). */
 	ticker_t	gc_ticker;	/* Drives incremental GC. */
 	szind_t		next_gc_bin;	/* Next bin to GC. */
+	arena_t		*arena;		/* Associated arena. */
 	tcache_bin_t	tbins[1];	/* Dynamically sized. */
 	/*
 	 * The pointer stacks associated with tbins follow as a contiguous
diff --git a/src/ctl.c b/src/ctl.c
index 83e9e93e..831877b0 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1342,7 +1342,7 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 			tcache_t *tcache = tsd_tcache_get(tsd);
 			if (tcache != NULL) {
 				tcache_arena_reassociate(tsd_tsdn(tsd), tcache,
-				    oldarena, newarena);
+				    newarena);
 			}
 		}
 	}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 7e652802..b5379cc4 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -446,6 +446,7 @@ arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind) {
 	arena_nthreads_dec(oldarena, false);
 	arena_nthreads_inc(newarena, false);
 	tsd_arena_set(tsd, newarena);
+	tsd_iarena_set(tsd, newarena);
 }
 
 static void
diff --git a/src/tcache.c b/src/tcache.c
index dff31d19..78570663 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -99,7 +99,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 	assert(binind < NBINS);
 	assert(rem <= tbin->ncached);
 
-	arena = arena_choose(tsd, NULL);
+	arena = tcache->arena;
 	assert(arena != NULL);
 	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
 		/* Lock the arena bin associated with the first object. */
@@ -175,7 +175,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 	assert(binind < nhbins);
 	assert(rem <= tbin->ncached);
 
-	arena_t *arena = arena_choose(tsd, NULL);
+	arena_t *arena = tcache->arena;
 	assert(arena != NULL);
 	unsigned nflush = tbin->ncached - rem;
 	while (nflush > 0) {
@@ -259,6 +259,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 
 static void
 tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
+	tcache->arena = arena;
 	if (config_stats) {
 		/* Link into list of extant tcaches. */
 		malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
@@ -269,7 +270,8 @@ tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 }
 
 static void
-tcache_arena_dissociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
+tcache_arena_dissociate(tsdn_t *tsdn, tcache_t *tcache) {
+	arena_t *arena = tcache->arena;
 	if (config_stats) {
 		/* Unlink from list of extant tcaches. */
 		malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
@@ -291,10 +293,9 @@ tcache_arena_dissociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 }
 
 void
-tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *oldarena,
-    arena_t *newarena) {
-	tcache_arena_dissociate(tsdn, tcache, oldarena);
-	tcache_arena_associate(tsdn, tcache, newarena);
+tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
+	tcache_arena_dissociate(tsdn, tcache);
+	tcache_arena_associate(tsdn, tcache, arena);
 }
 
 tcache_t *
@@ -360,7 +361,7 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache) {
 	unsigned i;
 
 	arena = arena_choose(tsd, NULL);
-	tcache_arena_dissociate(tsd_tsdn(tsd), tcache, arena);
+	tcache_arena_dissociate(tsd_tsdn(tsd), tcache);
 
 	for (i = 0; i < NBINS; i++) {
 		tcache_bin_t *tbin = &tcache->tbins[i];
diff --git a/test/unit/decay.c b/test/unit/decay.c
index df910aac..eb4df9d7 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -123,18 +123,28 @@ generate_dirty(unsigned arena_ind, size_t size) {
 
 TEST_BEGIN(test_decay_ticks) {
 	ticker_t *decay_ticker;
-	unsigned tick0, tick1;
+	unsigned tick0, tick1, arena_ind;
 	size_t sz, large0;
 	void *p;
 
-	decay_ticker = decay_ticker_get(tsd_fetch(), 0);
-	assert_ptr_not_null(decay_ticker,
-	    "Unexpected failure getting decay ticker");
-
 	sz = sizeof(size_t);
 	assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL,
 	    0), 0, "Unexpected mallctl failure");
 
+	int err;
+	/* Set up a manually managed arena for test. */
+	arena_ind = do_arena_create(0);
+
+	/* Migrate to the new arena, and get the ticker. */
+	unsigned old_arena_ind;
+	size_t sz_arena_ind = sizeof(old_arena_ind);
+	err = mallctl("thread.arena", (void *)&old_arena_ind, &sz_arena_ind,
+		      (void *)&arena_ind, sizeof(arena_ind));
+	assert_d_eq(err, 0, "Unexpected mallctl() failure");
+	decay_ticker = decay_ticker_get(tsd_fetch(), arena_ind);
+	assert_ptr_not_null(decay_ticker,
+	    "Unexpected failure getting decay ticker");
+
 	/*
 	 * Test the standard APIs using a large size class, since we can't
 	 * control tcache interactions for small size classes (except by
@@ -263,6 +273,12 @@ TEST_BEGIN(test_decay_ticks) {
 		tcache_sizes[0] = large0;
 		tcache_sizes[1] = 1;
 
+		size_t tcache_max, sz_tcache_max;
+		sz_tcache_max = sizeof(tcache_max);
+		err = mallctl("arenas.tcache_max", (void *)&tcache_max,
+		    &sz_tcache_max, NULL, 0);
+		assert_d_eq(err, 0, "Unexpected mallctl() failure");
+
 		sz = sizeof(unsigned);
 		assert_d_eq(mallctl("tcache.create", (void *)&tcache_ind, &sz,
 		    NULL, 0), 0, "Unexpected mallctl failure");
@@ -285,9 +301,17 @@ TEST_BEGIN(test_decay_ticks) {
 			    (void *)&tcache_ind, sizeof(unsigned)), 0,
 			    "Unexpected mallctl failure");
 			tick1 = ticker_read(decay_ticker);
-			assert_u32_ne(tick1, tick0,
-			    "Expected ticker to tick during tcache flush "
-			    "(sz=%zu)", sz);
+
+			/* Will only tick if it's in tcache. */
+			if (sz <= tcache_max) {
+				assert_u32_ne(tick1, tick0,
+				    "Expected ticker to tick during tcache "
+				    "flush (sz=%zu)", sz);
+			} else {
+				assert_u32_eq(tick1, tick0,
+				    "Unexpected ticker tick during tcache "
+				    "flush (sz=%zu)", sz);
+			}
 		}
 	}
 }

From dafadce62205bddac7da1c595c956a69367810ec Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 8 Mar 2017 12:13:59 -0800
Subject: [PATCH 0701/2608] Reintroduce JEMALLOC_ATOMIC_U64

The C11 atomics backport removed this #define, which degraded atomic 64-bit
reads to require a lock even on platforms that support them.  This commit fixes
that.
---
 include/jemalloc/internal/atomic.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h
index 866adff0..acbb6216 100644
--- a/include/jemalloc/internal/atomic.h
+++ b/include/jemalloc/internal/atomic.h
@@ -44,6 +44,14 @@
 #define ATOMIC_ACQ_REL atomic_memory_order_acq_rel,
 #define ATOMIC_SEQ_CST atomic_memory_order_seq_cst
 
+/*
+ * Not all platforms have 64-bit atomics.  If we do, this #define exposes that
+ * fact.
+ */
+#if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
+#  define JEMALLOC_ATOMIC_U64
+#endif
+
 /*
  * In order to let us transition atomics usage piecemeal (and reason locally
  * about memory orders), we'll support the previous API for a while.
@@ -104,10 +112,10 @@ JEMALLOC_GENERATE_COMPATABILITY_INT_ATOMICS(ssize_t, zd)
 JEMALLOC_GENERATE_INT_ATOMICS(uint32_t, u32, 2)
 JEMALLOC_GENERATE_COMPATABILITY_INT_ATOMICS(uint32_t, u32)
 
-#  if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
+#ifdef JEMALLOC_ATOMIC_U64
 JEMALLOC_GENERATE_INT_ATOMICS(uint64_t, u64, 3)
 JEMALLOC_GENERATE_COMPATABILITY_INT_ATOMICS(uint64_t, u64)
-#  endif
+#endif
 
 #undef ATOMIC_INLINE
 

From 8adab269721b0271399027d45a8aa6b425e53fd9 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 7 Mar 2017 17:57:48 -0800
Subject: [PATCH 0702/2608] Convert extents_t's npages field to use C11-style
 atomics

In the process, we can do some strength reduction, changing the fetch-adds and
fetch-subs to be simple loads followed by stores, since the modifications all
occur while holding the mutex.
---
 include/jemalloc/internal/extent_structs.h |  7 ++++--
 src/extent.c                               | 29 +++++++++++++++++-----
 2 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index 9ea69728..c14aef86 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -109,9 +109,12 @@ struct extents_s {
 	/*
 	 * Page sum for all extents in heaps.
 	 *
-	 * Synchronization: atomic.
+	 * The synchronization here is a little tricky.  Modifications to npages
+	 * must hold mtx, but reads need not (though, a reader who sees npages
+	 * without holding the mutex can't assume anything about the rest of the
+	 * state of the extents_t).
 	 */
-	size_t			npages;
+	atomic_zu_t		npages;
 
 	/* All stored extents must be in the same state. */
 	extent_state_t		state;
diff --git a/src/extent.c b/src/extent.c
index 60e385ee..33589394 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -186,7 +186,7 @@ extents_init(tsdn_t *tsdn, extents_t *extents, extent_state_t state,
 		extent_heap_new(&extents->heaps[i]);
 	}
 	extent_list_init(&extents->lru);
-	extents->npages = 0;
+	atomic_store_zu(&extents->npages, 0, ATOMIC_RELAXED);
 	extents->state = state;
 	extents->delay_coalesce = delay_coalesce;
 	return false;
@@ -199,7 +199,7 @@ extents_state_get(const extents_t *extents) {
 
 size_t
 extents_npages_get(extents_t *extents) {
-	return atomic_read_zu(&extents->npages);
+	return atomic_load_zu(&extents->npages, ATOMIC_RELAXED);
 }
 
 static void
@@ -216,7 +216,15 @@ extents_insert_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent,
 		extent_list_append(&extents->lru, extent);
 	}
 	size_t npages = size >> LG_PAGE;
-	atomic_add_zu(&extents->npages, npages);
+	/*
+	 * All modifications to npages hold the mutex (as asserted above), so we
+	 * don't need an atomic fetch-add; we can get by with a load followed by
+	 * a store.
+	 */
+	size_t cur_extents_npages =
+	    atomic_load_zu(&extents->npages, ATOMIC_RELAXED);
+	atomic_store_zu(&extents->npages, cur_extents_npages + npages,
+	    ATOMIC_RELAXED);
 }
 
 static void
@@ -233,8 +241,15 @@ extents_remove_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent,
 		extent_list_remove(&extents->lru, extent);
 	}
 	size_t npages = size >> LG_PAGE;
-	assert(atomic_read_zu(&extents->npages) >= npages);
-	atomic_sub_zu(&extents->npages, size >> LG_PAGE);
+	/*
+	 * As in extents_insert_locked, we hold extents->mtx and so don't need
+	 * atomic operations for updating extents->npages.
+	 */
+	size_t cur_extents_npages =
+	    atomic_load_zu(&extents->npages, ATOMIC_RELAXED);
+	assert(cur_extents_npages >= npages);
+	atomic_store_zu(&extents->npages,
+	    cur_extents_npages - (size >> LG_PAGE), ATOMIC_RELAXED);
 }
 
 /*
@@ -299,7 +314,9 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		}
 		/* Check the eviction limit. */
 		size_t npages = extent_size_get(extent) >> LG_PAGE;
-		if (atomic_read_zu(&extents->npages) - npages < npages_min) {
+		size_t extents_npages = atomic_load_zu(&extents->npages,
+		    ATOMIC_RELAXED);
+		if (extents_npages - npages < npages_min) {
 			extent = NULL;
 			goto label_return;
 		}

From 8721e19c0414dce0f47a627ff948130d4294b4d7 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 8 Mar 2017 13:00:42 -0800
Subject: [PATCH 0703/2608] Fix arena_prefork lock rank order for witness.

When witness is enabled, lock rank order needs to be preserved during
prefork, not only for each arena, but also across arenas. This change
breaks arena_prefork into further stages to ensure valid rank order
across arenas. Also changed test/unit/fork to use a manual arena to
catch this case.
---
 include/jemalloc/internal/arena_externs.h     |  3 +++
 include/jemalloc/internal/private_symbols.txt |  3 +++
 src/arena.c                                   | 22 ++++++++++++++-----
 src/jemalloc.c                                | 20 ++++++++++++-----
 test/unit/fork.c                              | 13 +++++++++++
 5 files changed, 49 insertions(+), 12 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 36d91869..2df55184 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -82,6 +82,9 @@ void	arena_prefork0(tsdn_t *tsdn, arena_t *arena);
 void	arena_prefork1(tsdn_t *tsdn, arena_t *arena);
 void	arena_prefork2(tsdn_t *tsdn, arena_t *arena);
 void	arena_prefork3(tsdn_t *tsdn, arena_t *arena);
+void	arena_prefork4(tsdn_t *tsdn, arena_t *arena);
+void	arena_prefork5(tsdn_t *tsdn, arena_t *arena);
+void	arena_prefork6(tsdn_t *tsdn, arena_t *arena);
 void	arena_postfork_parent(tsdn_t *tsdn, arena_t *arena);
 void	arena_postfork_child(tsdn_t *tsdn, arena_t *arena);
 
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 30cd3958..64bea334 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -51,6 +51,9 @@ arena_prefork0
 arena_prefork1
 arena_prefork2
 arena_prefork3
+arena_prefork4
+arena_prefork5
+arena_prefork6
 arena_prof_accum
 arena_prof_promote
 arena_prof_tctx_get
diff --git a/src/arena.c b/src/arena.c
index cef61cc3..43bad81c 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1743,29 +1743,39 @@ arena_boot(void) {
 void
 arena_prefork0(tsdn_t *tsdn, arena_t *arena) {
 	malloc_mutex_prefork(tsdn, &arena->decay.mtx);
+}
+
+void
+arena_prefork1(tsdn_t *tsdn, arena_t *arena) {
 	if (config_stats && config_tcache) {
 		malloc_mutex_prefork(tsdn, &arena->tcache_ql_mtx);
 	}
 }
 
 void
-arena_prefork1(tsdn_t *tsdn, arena_t *arena) {
+arena_prefork2(tsdn_t *tsdn, arena_t *arena) {
 	extents_prefork(tsdn, &arena->extents_cached);
 	extents_prefork(tsdn, &arena->extents_retained);
 }
 
 void
-arena_prefork2(tsdn_t *tsdn, arena_t *arena) {
+arena_prefork3(tsdn_t *tsdn, arena_t *arena) {
 	malloc_mutex_prefork(tsdn, &arena->extent_freelist_mtx);
 }
 
 void
-arena_prefork3(tsdn_t *tsdn, arena_t *arena) {
-	unsigned i;
-
+arena_prefork4(tsdn_t *tsdn, arena_t *arena) {
 	base_prefork(tsdn, arena->base);
+}
+
+void
+arena_prefork5(tsdn_t *tsdn, arena_t *arena) {
 	malloc_mutex_prefork(tsdn, &arena->large_mtx);
-	for (i = 0; i < NBINS; i++) {
+}
+
+void
+arena_prefork6(tsdn_t *tsdn, arena_t *arena) {
+	for (unsigned i = 0; i < NBINS; i++) {
 		malloc_mutex_prefork(tsdn, &arena->bins[i].lock);
 	}
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index b5379cc4..ecfecf9c 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2773,7 +2773,8 @@ _malloc_prefork(void)
 	tcache_prefork(tsd_tsdn(tsd));
 	malloc_mutex_prefork(tsd_tsdn(tsd), &arenas_lock);
 	prof_prefork0(tsd_tsdn(tsd));
-	for (i = 0; i < 3; i++) {
+	/* Break arena prefork into stages to preserve lock order. */
+	for (i = 0; i < 7; i++) {
 		for (j = 0; j < narenas; j++) {
 			if ((arena = arena_get(tsd_tsdn(tsd), j, false)) !=
 			    NULL) {
@@ -2787,16 +2788,23 @@ _malloc_prefork(void)
 				case 2:
 					arena_prefork2(tsd_tsdn(tsd), arena);
 					break;
+				case 3:
+					arena_prefork3(tsd_tsdn(tsd), arena);
+					break;
+				case 4:
+					arena_prefork4(tsd_tsdn(tsd), arena);
+					break;
+				case 5:
+					arena_prefork5(tsd_tsdn(tsd), arena);
+					break;
+				case 6:
+					arena_prefork6(tsd_tsdn(tsd), arena);
+					break;
 				default: not_reached();
 				}
 			}
 		}
 	}
-	for (i = 0; i < narenas; i++) {
-		if ((arena = arena_get(tsd_tsdn(tsd), i, false)) != NULL) {
-			arena_prefork3(tsd_tsdn(tsd), arena);
-		}
-	}
 	prof_prefork1(tsd_tsdn(tsd));
 }
 
diff --git a/test/unit/fork.c b/test/unit/fork.c
index 96b1c5a0..afe22141 100644
--- a/test/unit/fork.c
+++ b/test/unit/fork.c
@@ -9,6 +9,19 @@ TEST_BEGIN(test_fork) {
 	void *p;
 	pid_t pid;
 
+	/* Set up a manually managed arena for test. */
+	unsigned arena_ind;
+	size_t sz = sizeof(unsigned);
+	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
+
+	/* Migrate to the new arena. */
+	unsigned old_arena_ind;
+	sz = sizeof(old_arena_ind);
+	assert_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
+	    (void *)&arena_ind, sizeof(arena_ind)), 0,
+	    "Unexpected mallctl() failure");
+
 	p = malloc(1);
 	assert_ptr_not_null(p, "Unexpected malloc() failure");
 

From ec532e2c5c0b25fb7ab09383fe5a274583a90def Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 2 Feb 2017 17:02:05 -0800
Subject: [PATCH 0704/2608] Implement per-CPU arena.

The new feature, opt.percpu_arena, determines thread-arena association
dynamically based CPU id. Three modes are supported: "percpu", "phycpu"
and disabled.

"percpu" uses the current core id (with help from sched_getcpu())
directly as the arena index, while "phycpu" will assign threads on the
same physical CPU to the same arena. In other words, "percpu" means # of
arenas == # of CPUs, while "phycpu" has # of arenas == 1/2 * (# of
CPUs). Note that no runtime check on whether hyper threading is enabled
is added yet.

When enabled, threads will be migrated between arenas when a CPU change
is detected. In the current design, to reduce overhead from reading CPU
id, each arena tracks the thread accessed most recently. When a new
thread comes in, we will read CPU id and update arena if necessary.
---
 configure.ac                                  |   9 ++
 include/jemalloc/internal/arena_externs.h     |   4 +
 include/jemalloc/internal/arena_inlines_a.h   |  25 ++++
 include/jemalloc/internal/arena_structs_b.h   |   7 +
 include/jemalloc/internal/arena_types.h       |  16 ++
 .../jemalloc/internal/jemalloc_internal.h.in  | 123 +++++++++++++---
 .../internal/jemalloc_internal_defs.h.in      |   3 +
 include/jemalloc/internal/private_symbols.txt |   4 +
 src/arena.c                                   |  10 ++
 src/ctl.c                                     |  18 ++-
 src/jemalloc.c                                | 139 +++++++++++++++---
 src/stats.c                                   |   1 +
 src/tcache.c                                  |  11 +-
 test/integration/thread_arena.c               |  25 +++-
 test/unit/mallctl.c                           |  39 +++--
 test/unit/stats.c                             | 100 ++++++-------
 16 files changed, 415 insertions(+), 119 deletions(-)

diff --git a/configure.ac b/configure.ac
index 0095caf1..96b105f3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1598,6 +1598,15 @@ if test "x$have_secure_getenv" = "x1" ; then
   AC_DEFINE([JEMALLOC_HAVE_SECURE_GETENV], [ ])
 fi
 
+dnl Check if the GNU-specific sched_getcpu function exists.
+AC_CHECK_FUNC([sched_getcpu],
+              [have_sched_getcpu="1"],
+              [have_sched_getcpu="0"]
+             )
+if test "x$have_sched_getcpu" = "x1" ; then
+  AC_DEFINE([JEMALLOC_HAVE_SCHED_GETCPU], [ ])
+fi
+
 dnl Check if the Solaris/BSD issetugid function exists.
 AC_CHECK_FUNC([issetugid],
               [have_issetugid="1"],
diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 2df55184..349bae99 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -13,6 +13,10 @@ extern ssize_t		opt_decay_time;
 
 extern const arena_bin_info_t	arena_bin_info[NBINS];
 
+extern percpu_arena_mode_t	percpu_arena_mode;
+extern const char	*opt_percpu_arena;
+extern const char	*percpu_arena_mode_names[];
+
 void arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
     szind_t szind, uint64_t nrequests);
 void arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
diff --git a/include/jemalloc/internal/arena_inlines_a.h b/include/jemalloc/internal/arena_inlines_a.h
index ea7e0995..9dd5304c 100644
--- a/include/jemalloc/internal/arena_inlines_a.h
+++ b/include/jemalloc/internal/arena_inlines_a.h
@@ -7,6 +7,7 @@ void	arena_internal_add(arena_t *arena, size_t size);
 void	arena_internal_sub(arena_t *arena, size_t size);
 size_t	arena_internal_get(arena_t *arena);
 bool	arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes);
+void	percpu_arena_update(tsd_t *tsd, unsigned cpu);
 #endif /* JEMALLOC_ENABLE_INLINE */
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
@@ -42,6 +43,30 @@ arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) {
 	return prof_accum_add(tsdn, &arena->prof_accum, accumbytes);
 }
 
+JEMALLOC_INLINE void
+percpu_arena_update(tsd_t *tsd, unsigned cpu) {
+	assert(have_percpu_arena);
+	arena_t *oldarena = tsd_arena_get(tsd);
+	assert(oldarena != NULL);
+	unsigned oldind = arena_ind_get(oldarena);
+
+	if (oldind != cpu) {
+		unsigned newind = cpu;
+		arena_t *newarena = arena_get(tsd_tsdn(tsd), newind, true);
+		assert(newarena != NULL);
+
+		/* Set new arena/tcache associations. */
+		arena_migrate(tsd, oldind, newind);
+		if (config_tcache) {
+			tcache_t *tcache = tsd_tcache_get(tsd);
+			if (tcache) {
+				tcache_arena_reassociate(tsd_tsdn(tsd), tcache,
+				    newarena);
+			}
+		}
+	}
+}
+
 #endif /* (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) */
 
 #endif /* JEMALLOC_INTERNAL_ARENA_INLINES_A_H */
diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index ebcdbc4d..ba8bb8ad 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -125,6 +125,13 @@ struct arena_s {
 	 */
 	unsigned		nthreads[2];
 
+	/*
+	 * When percpu_arena is enabled, to amortize the cost of reading /
+	 * updating the current CPU id, track the most recent thread accessing
+	 * this arena, and only read CPU if there is a mismatch.
+	 */
+	tsdn_t		*last_thd;
+
 	/* Synchronization: internal. */
 	arena_stats_t		stats;
 
diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h
index d821be45..067c9ee9 100644
--- a/include/jemalloc/internal/arena_types.h
+++ b/include/jemalloc/internal/arena_types.h
@@ -19,4 +19,20 @@ typedef struct arena_bin_s arena_bin_t;
 typedef struct arena_s arena_t;
 typedef struct arena_tdata_s arena_tdata_t;
 
+typedef enum {
+	percpu_arena_disabled = 0,
+	percpu_arena = 1,
+	per_phycpu_arena = 2, /* i.e. hyper threads share arena. */
+
+	percpu_arena_mode_limit = 3
+} percpu_arena_mode_t;
+
+#ifdef JEMALLOC_PERCPU_ARENA
+#define PERCPU_ARENA_MODE_DEFAULT	percpu_arena
+#define OPT_PERCPU_ARENA_DEFAULT	"percpu"
+#else
+#define PERCPU_ARENA_MODE_DEFAULT	percpu_arena_disabled
+#define OPT_PERCPU_ARENA_DEFAULT	"disabled"
+#endif
+
 #endif /* JEMALLOC_INTERNAL_ARENA_TYPES_H */
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 8d2ec7dd..97b41bb0 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -145,6 +145,17 @@ static const bool have_thp =
     false
 #endif
     ;
+#ifdef JEMALLOC_HAVE_SCHED_GETCPU
+/* Currently percpu_arena depends on sched_getcpu. */
+#define JEMALLOC_PERCPU_ARENA
+#endif
+static const bool have_percpu_arena =
+#ifdef JEMALLOC_PERCPU_ARENA
+    true
+#else
+    false
+#endif
+    ;
 
 #if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN))
 #include <libkern/OSAtomic.h>
@@ -220,6 +231,9 @@ typedef unsigned pszind_t;
 /* Size class index type. */
 typedef unsigned szind_t;
 
+/* Processor / core id type. */
+typedef int malloc_cpuid_t;
+
 /*
  * Flags bits:
  *
@@ -455,7 +469,7 @@ extern unsigned	narenas_auto;
  * Arenas that are used to service external requests.  Not all elements of the
  * arenas array are necessarily used; arenas are created lazily as needed.
  */
-extern arena_t	**arenas;
+extern arena_t	*arenas[];
 
 /*
  * pind2sz_tab encodes the same information as could be computed by
@@ -548,6 +562,10 @@ arena_tdata_t	*arena_tdata_get(tsd_t *tsd, unsigned ind,
     bool refresh_if_missing);
 arena_t	*arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing);
 ticker_t	*decay_ticker_get(tsd_t *tsd, unsigned ind);
+malloc_cpuid_t	malloc_getcpu(void);
+unsigned	percpu_arena_choose(void);
+unsigned	percpu_arena_ind_limit(void);
+
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
@@ -818,32 +836,53 @@ sa2u(size_t size, size_t alignment) {
 	return usize;
 }
 
-/* Choose an arena based on a per-thread value. */
-JEMALLOC_INLINE arena_t *
-arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {
-	arena_t *ret;
+JEMALLOC_ALWAYS_INLINE malloc_cpuid_t
+malloc_getcpu(void) {
+	assert(have_percpu_arena);
+#if defined(JEMALLOC_HAVE_SCHED_GETCPU)
+	return (malloc_cpuid_t)sched_getcpu();
+#else
+	not_reached();
+	return -1;
+#endif
+}
 
-	if (arena != NULL) {
-		return arena;
+/* Return the chosen arena index based on current cpu. */
+JEMALLOC_ALWAYS_INLINE unsigned
+percpu_arena_choose(void) {
+	unsigned arena_ind;
+	assert(have_percpu_arena && (percpu_arena_mode != percpu_arena_disabled));
+
+	malloc_cpuid_t cpuid = malloc_getcpu();
+	assert(cpuid >= 0);
+	if ((percpu_arena_mode == percpu_arena) ||
+	    ((unsigned)cpuid < ncpus / 2)) {
+		arena_ind = cpuid;
+	} else {
+		assert(percpu_arena_mode == per_phycpu_arena);
+		/* Hyper threads on the same physical CPU share arena. */
+		arena_ind = cpuid - ncpus / 2;
 	}
 
-	ret = internal ? tsd_iarena_get(tsd) : tsd_arena_get(tsd);
-	if (unlikely(ret == NULL)) {
-		ret = arena_choose_hard(tsd, internal);
+	return arena_ind;
+}
+
+/* Return the limit of percpu auto arena range, i.e. arenas[0...ind_limit). */
+JEMALLOC_ALWAYS_INLINE unsigned
+percpu_arena_ind_limit(void) {
+	assert(have_percpu_arena && (percpu_arena_mode != percpu_arena_disabled));
+	if (percpu_arena_mode == per_phycpu_arena && ncpus > 1) {
+		if (ncpus % 2) {
+			/* This likely means a misconfig. */
+			return ncpus / 2 + 1;
+		}
+		return ncpus / 2;
+	} else {
+		return ncpus;
 	}
-
-	return ret;
 }
 
-JEMALLOC_INLINE arena_t *
-arena_choose(tsd_t *tsd, arena_t *arena) {
-	return arena_choose_impl(tsd, arena, false);
-}
 
-JEMALLOC_INLINE arena_t *
-arena_ichoose(tsd_t *tsd, arena_t *arena) {
-	return arena_choose_impl(tsd, arena, true);
-}
 
 JEMALLOC_INLINE arena_tdata_t *
 arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing) {
@@ -912,6 +951,50 @@ extent_t	*iealloc(tsdn_t *tsdn, const void *ptr);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
+/* Choose an arena based on a per-thread value. */
+JEMALLOC_INLINE arena_t *
+arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {
+	arena_t *ret;
+
+	if (arena != NULL) {
+		return arena;
+	}
+
+	ret = internal ? tsd_iarena_get(tsd) : tsd_arena_get(tsd);
+	if (unlikely(ret == NULL)) {
+		ret = arena_choose_hard(tsd, internal);
+	}
+
+	assert(ret != NULL);
+	/*
+	 * Note that for percpu arena, if the current arena is outside of the
+	 * auto percpu arena range, (i.e. thread is assigned to a manually
+	 * managed arena), then percpu arena is skipped.
+	 */
+	if (have_percpu_arena && (percpu_arena_mode != percpu_arena_disabled) &&
+	    (arena_ind_get(ret) < percpu_arena_ind_limit()) &&
+	    (ret->last_thd != tsd_tsdn(tsd))) {
+		unsigned ind = percpu_arena_choose();
+		if (arena_ind_get(ret) != ind) {
+			percpu_arena_update(tsd, ind);
+			ret = tsd_arena_get(tsd);
+		}
+		ret->last_thd = tsd_tsdn(tsd);
+	}
+
+	return ret;
+}
+
+JEMALLOC_INLINE arena_t *
+arena_choose(tsd_t *tsd, arena_t *arena) {
+	return arena_choose_impl(tsd, arena, false);
+}
+
+JEMALLOC_INLINE arena_t *
+arena_ichoose(tsd_t *tsd, arena_t *arena) {
+	return arena_choose_impl(tsd, arena, true);
+}
+
 JEMALLOC_ALWAYS_INLINE extent_t *
 iealloc(tsdn_t *tsdn, const void *ptr) {
 	return extent_lookup(tsdn, ptr, true);
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index b2e0077e..500f4274 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -308,6 +308,9 @@
 /* Adaptive mutex support in pthreads. */
 #undef JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP
 
+/* GNU specific sched_getcpu support */
+#undef JEMALLOC_HAVE_SCHED_GETCPU
+
 /*
  * If defined, jemalloc symbols are not exported (doesn't work when
  * JEMALLOC_PREFIX is not defined).
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 64bea334..c0211e58 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -258,6 +258,7 @@ large_salloc
 lg_floor
 lg_prof_sample
 malloc_cprintf
+malloc_getcpu
 malloc_mutex_assert_not_owner
 malloc_mutex_assert_owner
 malloc_mutex_boot
@@ -330,6 +331,9 @@ pages_purge_forced
 pages_purge_lazy
 pages_trim
 pages_unmap
+percpu_arena_choose
+percpu_arena_ind_limit
+percpu_arena_update
 pind2sz
 pind2sz_compute
 pind2sz_lookup
diff --git a/src/arena.c b/src/arena.c
index 43bad81c..a3a1fdd7 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -4,6 +4,15 @@
 /******************************************************************************/
 /* Data. */
 
+const char	*percpu_arena_mode_names[] = {
+	"disabled",
+	"percpu",
+	"phycpu"
+};
+
+const char	*opt_percpu_arena = OPT_PERCPU_ARENA_DEFAULT;
+percpu_arena_mode_t	percpu_arena_mode = PERCPU_ARENA_MODE_DEFAULT;
+
 ssize_t		opt_decay_time = DECAY_TIME_DEFAULT;
 static ssize_t	decay_time_default;
 
@@ -1629,6 +1638,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	}
 
 	arena->nthreads[0] = arena->nthreads[1] = 0;
+	arena->last_thd = NULL;
 
 	if (config_stats) {
 		if (arena_stats_init(tsdn, &arena->stats)) {
diff --git a/src/ctl.c b/src/ctl.c
index 831877b0..d4ab699f 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -72,6 +72,7 @@ CTL_PROTO(config_xmalloc)
 CTL_PROTO(opt_abort)
 CTL_PROTO(opt_dss)
 CTL_PROTO(opt_narenas)
+CTL_PROTO(opt_percpu_arena)
 CTL_PROTO(opt_decay_time)
 CTL_PROTO(opt_stats_print)
 CTL_PROTO(opt_junk)
@@ -229,6 +230,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("abort"),		CTL(opt_abort)},
 	{NAME("dss"),		CTL(opt_dss)},
 	{NAME("narenas"),	CTL(opt_narenas)},
+	{NAME("percpu_arena"),	CTL(opt_percpu_arena)},
 	{NAME("decay_time"),	CTL(opt_decay_time)},
 	{NAME("stats_print"),	CTL(opt_stats_print)},
 	{NAME("junk"),		CTL(opt_junk)},
@@ -1284,6 +1286,7 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool)
 CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
 CTL_RO_NL_GEN(opt_dss, opt_dss, const char *)
 CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned)
+CTL_RO_NL_GEN(opt_percpu_arena, opt_percpu_arena, const char *)
 CTL_RO_NL_GEN(opt_decay_time, opt_decay_time, ssize_t)
 CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool)
 CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, const char *)
@@ -1317,10 +1320,10 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	if (oldarena == NULL) {
 		return EAGAIN;
 	}
-
 	newind = oldind = arena_ind_get(oldarena);
 	WRITE(newind, unsigned);
 	READ(oldind, unsigned);
+
 	if (newind != oldind) {
 		arena_t *newarena;
 
@@ -1330,6 +1333,19 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 			goto label_return;
 		}
 
+		if (have_percpu_arena &&
+		    (percpu_arena_mode != percpu_arena_disabled)) {
+			if (newind < percpu_arena_ind_limit()) {
+				/*
+				 * If perCPU arena is enabled, thread_arena
+				 * control is not allowed for the auto arena
+				 * range.
+				 */
+				ret = EPERM;
+				goto label_return;
+			}
+		}
+
 		/* Initialize arena if necessary. */
 		newarena = arena_get(tsd_tsdn(tsd), newind, true);
 		if (newarena == NULL) {
diff --git a/src/jemalloc.c b/src/jemalloc.c
index ecfecf9c..ce84b3cf 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -56,7 +56,8 @@ static malloc_mutex_t	arenas_lock;
  * arenas.  arenas[narenas_auto..narenas_total) are only used if the application
  * takes some action to create them and allocate from them.
  */
-arena_t			**arenas;
+JEMALLOC_ALIGNED(CACHELINE)
+arena_t			*arenas[MALLOCX_ARENA_MAX + 1];
 static unsigned		narenas_total; /* Use narenas_total_*(). */
 static arena_t		*a0; /* arenas[0]; read-only after initialization. */
 unsigned		narenas_auto; /* Read-only after initialization. */
@@ -543,6 +544,16 @@ arena_t *
 arena_choose_hard(tsd_t *tsd, bool internal) {
 	arena_t *ret JEMALLOC_CC_SILENCE_INIT(NULL);
 
+	if (have_percpu_arena && percpu_arena_mode != percpu_arena_disabled) {
+		unsigned choose = percpu_arena_choose();
+		ret = arena_get(tsd_tsdn(tsd), choose, true);
+		assert(ret != NULL);
+		arena_bind(tsd, arena_ind_get(ret), false);
+		arena_bind(tsd, arena_ind_get(ret), true);
+
+		return ret;
+	}
+
 	if (narenas_auto > 1) {
 		unsigned i, j, choose[2], first_null;
 
@@ -1095,6 +1106,30 @@ malloc_conf_init(void) {
 				    "lg_tcache_max", -1,
 				    (sizeof(size_t) << 3) - 1)
 			}
+			if (strncmp("percpu_arena", k, klen) == 0) {
+				int i;
+				bool match = false;
+				for (i = 0; i < percpu_arena_mode_limit; i++) {
+					if (strncmp(percpu_arena_mode_names[i],
+						    v, vlen) == 0) {
+						if (!have_percpu_arena) {
+							malloc_conf_error(
+							    "No getcpu support",
+							    k, klen, v, vlen);
+						}
+						percpu_arena_mode = i;
+						opt_percpu_arena =
+						    percpu_arena_mode_names[i];
+						match = true;
+						break;
+					}
+				}
+				if (!match) {
+					malloc_conf_error("Invalid conf value",
+					    k, klen, v, vlen);
+				}
+				continue;
+			}
 			if (config_prof) {
 				CONF_HANDLE_BOOL(opt_prof, "prof", true)
 				CONF_HANDLE_CHAR_P(opt_prof_prefix,
@@ -1204,8 +1239,6 @@ malloc_init_hard_a0_locked() {
 	 * malloc_ncpus().
 	 */
 	narenas_auto = 1;
-	narenas_total_set(narenas_auto);
-	arenas = &a0;
 	memset(arenas, 0, sizeof(arena_t *) * narenas_auto);
 	/*
 	 * Initialize one arena here.  The rest are lazily created in
@@ -1215,7 +1248,7 @@ malloc_init_hard_a0_locked() {
 	    == NULL) {
 		return true;
 	}
-
+	a0 = arena_get(TSDN_NULL, 0, false);
 	malloc_init_state = malloc_init_a0_initialized;
 
 	return false;
@@ -1255,23 +1288,76 @@ malloc_init_hard_recursible(void) {
 	return false;
 }
 
-static bool
-malloc_init_hard_finish(tsdn_t *tsdn) {
-	if (malloc_mutex_boot()) {
-		return true;
+static unsigned
+malloc_narenas_default(void) {
+	assert(ncpus > 0);
+	/*
+	 * For SMP systems, create more than one arena per CPU by
+	 * default.
+	 */
+	if (ncpus > 1) {
+		return ncpus << 2;
+	} else {
+		return 1;
 	}
+}
 
-	if (opt_narenas == 0) {
-		/*
-		 * For SMP systems, create more than one arena per CPU by
-		 * default.
-		 */
-		if (ncpus > 1) {
-			opt_narenas = ncpus << 2;
+static bool
+malloc_init_narenas(void) {
+	assert(ncpus > 0);
+
+	if (percpu_arena_mode != percpu_arena_disabled) {
+		if (!have_percpu_arena || malloc_getcpu() < 0) {
+			percpu_arena_mode = percpu_arena_disabled;
+			malloc_printf("<jemalloc>: perCPU arena getcpu() not "
+			    "available. Setting narenas to %u.\n", opt_narenas ?
+			    opt_narenas : malloc_narenas_default());
+			if (opt_abort) {
+				abort();
+			}
 		} else {
-			opt_narenas = 1;
+			if (ncpus > MALLOCX_ARENA_MAX) {
+				malloc_printf("<jemalloc>: narenas w/ percpu"
+				    "arena beyond limit (%d)\n", ncpus);
+				if (opt_abort) {
+					abort();
+				}
+				return true;
+			}
+			if ((percpu_arena_mode == per_phycpu_arena) &&
+			    (ncpus % 2 != 0)) {
+				malloc_printf("<jemalloc>: invalid "
+				    "configuration -- per physical CPU arena "
+				    "with odd number (%u) of CPUs (no hyper "
+				    "threading?).\n", ncpus);
+				if (opt_abort)
+					abort();
+			}
+			unsigned n = percpu_arena_ind_limit();
+			if (opt_narenas < n) {
+				/*
+				 * If narenas is specified with percpu_arena
+				 * enabled, actual narenas is set as the greater
+				 * of the two. percpu_arena_choose will be free
+				 * to use any of the arenas based on CPU
+				 * id. This is conservative (at a small cost)
+				 * but ensures correctness.
+				 *
+				 * If for some reason the ncpus determined at
+				 * boot is not the actual number (e.g. because
+				 * of affinity setting from numactl), reserving
+				 * narenas this way provides a workaround for
+				 * percpu_arena.
+				 */
+				opt_narenas = n;
+			}
 		}
 	}
+	if (opt_narenas == 0) {
+		opt_narenas = malloc_narenas_default();
+	}
+	assert(opt_narenas > 0);
+
 	narenas_auto = opt_narenas;
 	/*
 	 * Limit the number of arenas to the indexing range of MALLOCX_ARENA().
@@ -1283,14 +1369,13 @@ malloc_init_hard_finish(tsdn_t *tsdn) {
 	}
 	narenas_total_set(narenas_auto);
 
-	/* Allocate and initialize arenas. */
-	arenas = (arena_t **)base_alloc(tsdn, a0->base, sizeof(arena_t *) *
-	    (MALLOCX_ARENA_MAX+1), CACHELINE);
-	if (arenas == NULL) {
+	return false;
+}
+
+static bool
+malloc_init_hard_finish(void) {
+	if (malloc_mutex_boot())
 		return true;
-	}
-	/* Copy the pointer to the one arena that was already initialized. */
-	arena_set(0, a0);
 
 	malloc_init_state = malloc_init_initialized;
 	malloc_slow_flag_init();
@@ -1328,12 +1413,18 @@ malloc_init_hard(void) {
 	}
 	malloc_mutex_lock(tsd_tsdn(tsd), &init_lock);
 
+	/* Need this before prof_boot2 (for allocation). */
+	if (malloc_init_narenas()) {
+		malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
+		return true;
+	}
+
 	if (config_prof && prof_boot2(tsd)) {
 		malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
 		return true;
 	}
 
-	if (malloc_init_hard_finish(tsd_tsdn(tsd))) {
+	if (malloc_init_hard_finish()) {
 		malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
 		return true;
 	}
diff --git a/src/stats.c b/src/stats.c
index ae360e1b..776fb862 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -621,6 +621,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	OPT_WRITE_BOOL(abort, ",")
 	OPT_WRITE_CHAR_P(dss, ",")
 	OPT_WRITE_UNSIGNED(narenas, ",")
+	OPT_WRITE_CHAR_P(percpu_arena, ",")
 	OPT_WRITE_SSIZE_T_MUTABLE(decay_time, arenas.decay_time, ",")
 	OPT_WRITE_CHAR_P(junk, ",")
 	OPT_WRITE_BOOL(zero, ",")
diff --git a/src/tcache.c b/src/tcache.c
index 78570663..266bd1f5 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -357,12 +357,8 @@ tcache_create(tsdn_t *tsdn, arena_t *arena) {
 
 static void
 tcache_destroy(tsd_t *tsd, tcache_t *tcache) {
-	arena_t *arena;
 	unsigned i;
 
-	arena = arena_choose(tsd, NULL);
-	tcache_arena_dissociate(tsd_tsdn(tsd), tcache);
-
 	for (i = 0; i < NBINS; i++) {
 		tcache_bin_t *tbin = &tcache->tbins[i];
 		tcache_bin_flush_small(tsd, tcache, tbin, i, 0);
@@ -381,6 +377,13 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache) {
 		}
 	}
 
+	/*
+	 * Get arena after flushing -- when using percpu arena, the associated
+	 * arena could change during flush.
+	 */
+	arena_t *arena = arena_choose(tsd, NULL);
+	tcache_arena_dissociate(tsd_tsdn(tsd), tcache);
+
 	if (config_prof && tcache->prof_accumbytes > 0 &&
 	    arena_prof_accum(tsd_tsdn(tsd), arena, tcache->prof_accumbytes)) {
 		prof_idump(tsd_tsdn(tsd));
diff --git a/test/integration/thread_arena.c b/test/integration/thread_arena.c
index 9991a42f..1e5ec05d 100644
--- a/test/integration/thread_arena.c
+++ b/test/integration/thread_arena.c
@@ -37,10 +37,16 @@ thd_start(void *arg) {
 	return NULL;
 }
 
+static void
+mallctl_failure(int err) {
+	char buf[BUFERROR_BUF];
+
+	buferror(err, buf, sizeof(buf));
+	test_fail("Error in mallctl(): %s", buf);
+}
+
 TEST_BEGIN(test_thread_arena) {
 	void *p;
-	unsigned arena_ind;
-	size_t size;
 	int err;
 	thd_t thds[NTHREADS];
 	unsigned i;
@@ -48,13 +54,15 @@ TEST_BEGIN(test_thread_arena) {
 	p = malloc(1);
 	assert_ptr_not_null(p, "Error in malloc()");
 
-	size = sizeof(arena_ind);
-	if ((err = mallctl("thread.arena", (void *)&arena_ind, &size, NULL,
-	    0))) {
-		char buf[BUFERROR_BUF];
+	unsigned arena_ind, old_arena_ind;
+	size_t sz = sizeof(unsigned);
+	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+	    0, "Arena creation failure");
 
-		buferror(err, buf, sizeof(buf));
-		test_fail("Error in mallctl(): %s", buf);
+	size_t size = sizeof(arena_ind);
+	if ((err = mallctl("thread.arena", (void *)&old_arena_ind, &size,
+	    (void *)&arena_ind, sizeof(arena_ind))) != 0) {
+		mallctl_failure(err);
 	}
 
 	for (i = 0; i < NTHREADS; i++) {
@@ -67,6 +75,7 @@ TEST_BEGIN(test_thread_arena) {
 		thd_join(thds[i], (void *)&join_ret);
 		assert_zd_eq(join_ret, 0, "Unexpected thread join error");
 	}
+	free(p);
 }
 TEST_END
 
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index c931e378..1aedbe8a 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -160,6 +160,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, abort, always);
 	TEST_MALLCTL_OPT(const char *, dss, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);
+	TEST_MALLCTL_OPT(const char *, percpu_arena, always);
 	TEST_MALLCTL_OPT(ssize_t, decay_time, always);
 	TEST_MALLCTL_OPT(bool, stats_print, always);
 	TEST_MALLCTL_OPT(const char *, junk, fill);
@@ -327,20 +328,38 @@ TEST_BEGIN(test_tcache) {
 TEST_END
 
 TEST_BEGIN(test_thread_arena) {
-	unsigned arena_old, arena_new, narenas;
-	size_t sz = sizeof(unsigned);
+	unsigned old_arena_ind, new_arena_ind, narenas;
+	const char *opt_percpu_arena;
 
+	size_t sz = sizeof(opt_percpu_arena);
+	assert_d_eq(mallctl("opt.percpu_arena", &opt_percpu_arena, &sz, NULL,
+	    0), 0, "Unexpected mallctl() failure");
+
+	sz = sizeof(unsigned);
 	assert_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
 	assert_u_eq(narenas, opt_narenas, "Number of arenas incorrect");
-	arena_new = narenas - 1;
-	assert_d_eq(mallctl("thread.arena", (void *)&arena_old, &sz,
-	    (void *)&arena_new, sizeof(unsigned)), 0,
-	    "Unexpected mallctl() failure");
-	arena_new = 0;
-	assert_d_eq(mallctl("thread.arena", (void *)&arena_old, &sz,
-	    (void *)&arena_new, sizeof(unsigned)), 0,
-	    "Unexpected mallctl() failure");
+
+	if (strcmp(opt_percpu_arena, "disabled") == 0) {
+		new_arena_ind = narenas - 1;
+		assert_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
+		    (void *)&new_arena_ind, sizeof(unsigned)), 0,
+		    "Unexpected mallctl() failure");
+		new_arena_ind = 0;
+		assert_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
+		    (void *)&new_arena_ind, sizeof(unsigned)), 0,
+		    "Unexpected mallctl() failure");
+	} else {
+		assert_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
+		    NULL, 0), 0, "Unexpected mallctl() failure");
+		new_arena_ind = percpu_arena_ind_limit() - 1;
+		if (old_arena_ind != new_arena_ind) {
+			assert_d_eq(mallctl("thread.arena",
+			    (void *)&old_arena_ind, &sz, (void *)&new_arena_ind,
+			    sizeof(unsigned)), EPERM, "thread.arena ctl "
+			    "should not be allowed with percpu arena");
+		}
+	}
 }
 TEST_END
 
diff --git a/test/unit/stats.c b/test/unit/stats.c
index 948132cb..c458d3f9 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -33,7 +33,7 @@ TEST_BEGIN(test_stats_large) {
 	size_t sz;
 	int expected = config_stats ? 0 : ENOENT;
 
-	p = mallocx(SMALL_MAXCLASS+1, 0);
+	p = mallocx(SMALL_MAXCLASS+1, MALLOCX_ARENA(0));
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
 
 	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
@@ -66,7 +66,6 @@ TEST_BEGIN(test_stats_large) {
 TEST_END
 
 TEST_BEGIN(test_stats_arenas_summary) {
-	unsigned arena;
 	void *little, *large;
 	uint64_t epoch;
 	size_t sz;
@@ -74,13 +73,9 @@ TEST_BEGIN(test_stats_arenas_summary) {
 	size_t mapped;
 	uint64_t npurge, nmadvise, purged;
 
-	arena = 0;
-	assert_d_eq(mallctl("thread.arena", NULL, NULL, (void *)&arena,
-	    sizeof(arena)), 0, "Unexpected mallctl() failure");
-
-	little = mallocx(SMALL_MAXCLASS, 0);
+	little = mallocx(SMALL_MAXCLASS, MALLOCX_ARENA(0));
 	assert_ptr_not_null(little, "Unexpected mallocx() failure");
-	large = mallocx((1U << LG_LARGE_MINCLASS), 0);
+	large = mallocx((1U << LG_LARGE_MINCLASS), MALLOCX_ARENA(0));
 	assert_ptr_not_null(large, "Unexpected mallocx() failure");
 
 	dallocx(little, 0);
@@ -128,7 +123,6 @@ no_lazy_lock(void) {
 }
 
 TEST_BEGIN(test_stats_arenas_small) {
-	unsigned arena;
 	void *p;
 	size_t sz, allocated;
 	uint64_t epoch, nmalloc, ndalloc, nrequests;
@@ -136,11 +130,7 @@ TEST_BEGIN(test_stats_arenas_small) {
 
 	no_lazy_lock(); /* Lazy locking would dodge tcache testing. */
 
-	arena = 0;
-	assert_d_eq(mallctl("thread.arena", NULL, NULL, (void *)&arena,
-	    sizeof(arena)), 0, "Unexpected mallctl() failure");
-
-	p = mallocx(SMALL_MAXCLASS, 0);
+	p = mallocx(SMALL_MAXCLASS, MALLOCX_ARENA(0));
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
 
 	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
@@ -178,17 +168,12 @@ TEST_BEGIN(test_stats_arenas_small) {
 TEST_END
 
 TEST_BEGIN(test_stats_arenas_large) {
-	unsigned arena;
 	void *p;
 	size_t sz, allocated;
 	uint64_t epoch, nmalloc, ndalloc;
 	int expected = config_stats ? 0 : ENOENT;
 
-	arena = 0;
-	assert_d_eq(mallctl("thread.arena", NULL, NULL, (void *)&arena,
-	    sizeof(arena)), 0, "Unexpected mallctl() failure");
-
-	p = mallocx((1U << LG_LARGE_MINCLASS), 0);
+	p = mallocx((1U << LG_LARGE_MINCLASS), MALLOCX_ARENA(0));
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
 
 	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
@@ -217,20 +202,29 @@ TEST_BEGIN(test_stats_arenas_large) {
 }
 TEST_END
 
+static void
+gen_mallctl_str(char *cmd, char *name, unsigned arena_ind) {
+	sprintf(cmd, "stats.arenas.%u.bins.0.%s", arena_ind, name);
+}
+
 TEST_BEGIN(test_stats_arenas_bins) {
-	unsigned arena;
 	void *p;
 	size_t sz, curslabs, curregs;
 	uint64_t epoch, nmalloc, ndalloc, nrequests, nfills, nflushes;
 	uint64_t nslabs, nreslabs;
 	int expected = config_stats ? 0 : ENOENT;
 
-	arena = 0;
-	assert_d_eq(mallctl("thread.arena", NULL, NULL, (void *)&arena,
-	    sizeof(arena)), 0, "Unexpected mallctl() failure");
+	unsigned arena_ind, old_arena_ind;
+	sz = sizeof(unsigned);
+	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+	    0, "Arena creation failure");
+	sz = sizeof(arena_ind);
+	assert_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
+	    (void *)&arena_ind, sizeof(arena_ind)), 0,
+	    "Unexpected mallctl() failure");
 
-	p = mallocx(arena_bin_info[0].reg_size, 0);
-	assert_ptr_not_null(p, "Unexpected mallocx() failure");
+	p = malloc(arena_bin_info[0].reg_size);
+	assert_ptr_not_null(p, "Unexpected malloc() failure");
 
 	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
 	    config_tcache ? 0 : ENOENT, "Unexpected mallctl() result");
@@ -238,33 +232,40 @@ TEST_BEGIN(test_stats_arenas_bins) {
 	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
 	    0, "Unexpected mallctl() failure");
 
+	char cmd[128];
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.bins.0.nmalloc", (void *)&nmalloc,
-	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.bins.0.ndalloc", (void *)&ndalloc,
-	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.bins.0.nrequests",
-	    (void *)&nrequests, &sz, NULL, 0), expected,
+	gen_mallctl_str(cmd, "nmalloc", arena_ind);
+	assert_d_eq(mallctl(cmd, (void *)&nmalloc, &sz, NULL, 0), expected,
+	    "Unexpected mallctl() result");
+	gen_mallctl_str(cmd, "ndalloc", arena_ind);
+	assert_d_eq(mallctl(cmd, (void *)&ndalloc, &sz, NULL, 0), expected,
+	    "Unexpected mallctl() result");
+	gen_mallctl_str(cmd, "nrequests", arena_ind);
+	assert_d_eq(mallctl(cmd, (void *)&nrequests, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.arenas.0.bins.0.curregs", (void *)&curregs,
-	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	gen_mallctl_str(cmd, "curregs", arena_ind);
+	assert_d_eq(mallctl(cmd, (void *)&curregs, &sz, NULL, 0), expected,
+	    "Unexpected mallctl() result");
 
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.bins.0.nfills", (void *)&nfills,
-	    &sz, NULL, 0), config_tcache ? expected : ENOENT,
-	    "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.bins.0.nflushes", (void *)&nflushes,
-	    &sz, NULL, 0), config_tcache ? expected : ENOENT,
-	    "Unexpected mallctl() result");
+	gen_mallctl_str(cmd, "nfills", arena_ind);
+	assert_d_eq(mallctl(cmd, (void *)&nfills, &sz, NULL, 0),
+	    config_tcache ? expected : ENOENT, "Unexpected mallctl() result");
+	gen_mallctl_str(cmd, "nflushes", arena_ind);
+	assert_d_eq(mallctl(cmd, (void *)&nflushes, &sz, NULL, 0),
+	    config_tcache ? expected : ENOENT, "Unexpected mallctl() result");
 
-	assert_d_eq(mallctl("stats.arenas.0.bins.0.nslabs", (void *)&nslabs,
-	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.bins.0.nreslabs", (void *)&nreslabs,
-	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	gen_mallctl_str(cmd, "nslabs", arena_ind);
+	assert_d_eq(mallctl(cmd, (void *)&nslabs, &sz, NULL, 0), expected,
+	    "Unexpected mallctl() result");
+	gen_mallctl_str(cmd, "nreslabs", arena_ind);
+	assert_d_eq(mallctl(cmd, (void *)&nreslabs, &sz, NULL, 0), expected,
+	    "Unexpected mallctl() result");
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.arenas.0.bins.0.curslabs", (void *)&curslabs,
-	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	gen_mallctl_str(cmd, "curslabs", arena_ind);
+	assert_d_eq(mallctl(cmd, (void *)&curslabs, &sz, NULL, 0), expected,
+	    "Unexpected mallctl() result");
 
 	if (config_stats) {
 		assert_u64_gt(nmalloc, 0,
@@ -292,21 +293,16 @@ TEST_BEGIN(test_stats_arenas_bins) {
 TEST_END
 
 TEST_BEGIN(test_stats_arenas_lextents) {
-	unsigned arena;
 	void *p;
 	uint64_t epoch, nmalloc, ndalloc;
 	size_t curlextents, sz, hsize;
 	int expected = config_stats ? 0 : ENOENT;
 
-	arena = 0;
-	assert_d_eq(mallctl("thread.arena", NULL, NULL, (void *)&arena,
-	    sizeof(arena)), 0, "Unexpected mallctl() failure");
-
 	sz = sizeof(size_t);
 	assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&hsize, &sz, NULL,
 	    0), 0, "Unexpected mallctl() failure");
 
-	p = mallocx(hsize, 0);
+	p = mallocx(hsize, MALLOCX_ARENA(0));
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
 
 	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),

From f84471edc37dbf8ff86a36d71c988ee4d8e6c5f9 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 7 Mar 2017 15:08:29 -0800
Subject: [PATCH 0705/2608] Add documentation for percpu_arena in
 jemalloc.xml.in.

---
 doc/jemalloc.xml.in | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 36ec140b..937879a8 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -926,6 +926,24 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         number of CPUs, or one if there is a single CPU.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.percpu_arena">
+        <term>
+          <mallctl>opt.percpu_arena</mallctl>
+          (<type>const char *</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Per CPU arena mode.  Use the <quote>percpu</quote>
+        setting to enable this feature, which uses number of CPUs to determine
+        number of arenas, and bind threads to arenas dynamically based on the
+        CPU the thread runs on currently.  <quote>phycpu</quote> setting uses
+        one arena per physical CPU, which means the two hyper threads on the
+        same CPU share one arena.  Note that no runtime checking regarding the
+        availability of hyper threading is done at the moment.  When set to
+        <quote>disabled</quote>, narenas and thread to arena association will
+        not be impacted by this option.  The default is
+        <quote>percpu</quote>. </para></listitem>
+      </varlistentry>
+
       <varlistentry id="opt.decay_time">
         <term>
           <mallctl>opt.decay_time</mallctl>

From 75fddc786c9d5476cab1d5d4699e95d8907d0b51 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 8 Mar 2017 23:32:53 -0800
Subject: [PATCH 0706/2608] Fix ATOMIC_{ACQUIRE,RELEASE,ACQ_REL} definitions.

---
 include/jemalloc/internal/atomic.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h
index acbb6216..b68440c4 100644
--- a/include/jemalloc/internal/atomic.h
+++ b/include/jemalloc/internal/atomic.h
@@ -39,9 +39,9 @@
  * quite so often.
  */
 #define ATOMIC_RELAXED atomic_memory_order_relaxed
-#define ATOMIC_ACQUIRE atomic_memory_order_acquire,
-#define ATOMIC_RELEASE atomic_memory_order_release,
-#define ATOMIC_ACQ_REL atomic_memory_order_acq_rel,
+#define ATOMIC_ACQUIRE atomic_memory_order_acquire
+#define ATOMIC_RELEASE atomic_memory_order_release
+#define ATOMIC_ACQ_REL atomic_memory_order_acq_rel
 #define ATOMIC_SEQ_CST atomic_memory_order_seq_cst
 
 /*

From 3a2b183d5fe86132d0830f720b3b8dbd6a29f7e9 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 7 Mar 2017 19:18:27 -0800
Subject: [PATCH 0707/2608] Convert arena_t's purging field to non-atomic bool.

The decay mutex already protects all accesses.
---
 include/jemalloc/internal/arena_structs_b.h | 15 +++++++--------
 src/arena.c                                 |  9 +++++----
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index ba8bb8ad..84c179e8 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -167,14 +167,6 @@ struct arena_s {
 	/* Synchronization: atomic. */
 	dss_prec_t		dss_prec;
 
-	/*
-	 * 1/0 (true/false) if a thread is currently executing
-	 * arena_purge_to_limit().
-	 *
-	 * Synchronization: atomic.
-	 */
-	unsigned		purging;
-
 	/*
 	 * Number of pages in active extents.
 	 *
@@ -207,6 +199,13 @@ struct arena_s {
 	extents_t		extents_cached;
 	extents_t		extents_retained;
 
+	/*
+	 * True if a thread is currently executing arena_purge_to_limit().
+	 *
+	 * Synchronization: decay.mtx.
+	 */
+	bool			purging;
+
 	/*
 	 * Next extent size class in a growing series to use when satisfying a
 	 * request via the extent hooks (only if !config_munmap).  This limits
diff --git a/src/arena.c b/src/arena.c
index a3a1fdd7..cb0194ae 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -777,9 +777,10 @@ arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, size_t ndirty_limit) {
 	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 1);
 	malloc_mutex_assert_owner(tsdn, &arena->decay.mtx);
 
-	if (atomic_cas_u(&arena->purging, 0, 1)) {
+	if (arena->purging) {
 		return;
 	}
+	arena->purging = true;
 
 	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
 	size_t npurge, npurged;
@@ -809,7 +810,7 @@ arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, size_t ndirty_limit) {
 	}
 
 label_return:
-	atomic_write_u(&arena->purging, 0);
+	arena->purging = false;
 }
 
 void
@@ -934,7 +935,6 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 	}
 
-	assert(atomic_read_u(&arena->purging) == 0);
 	atomic_write_zu(&arena->nactive, 0);
 }
 
@@ -1676,7 +1676,6 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 
 	arena->dss_prec = extent_dss_prec_get();
 
-	atomic_write_u(&arena->purging, 0);
 	atomic_write_zu(&arena->nactive, 0);
 
 	if (arena_decay_init(arena, arena_decay_time_default_get())) {
@@ -1710,6 +1709,8 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		goto label_error;
 	}
 
+	arena->purging = false;
+
 	if (!config_munmap) {
 		arena->extent_grow_next = psz2ind(HUGEPAGE);
 	}

From 21a68e2d22da08e0f60ff79d6866dd3add19775b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 9 Mar 2017 14:49:32 -0800
Subject: [PATCH 0708/2608] Convert rtree code to use C11 atomics

In the process, I changed the implementation of rtree_elm_acquire so that it
won't even try to CAS if its initial read (getting the extent + lock bit)
indicates that the CAS is doomed to fail.  This can significantly improve
performance under contention.
---
 include/jemalloc/internal/rtree_inlines.h | 36 ++++++++++------
 include/jemalloc/internal/rtree_structs.h | 15 +++----
 src/rtree.c                               | 50 +++++++++++++++--------
 3 files changed, 62 insertions(+), 39 deletions(-)

diff --git a/include/jemalloc/internal/rtree_inlines.h b/include/jemalloc/internal/rtree_inlines.h
index f2efd710..b3301095 100644
--- a/include/jemalloc/internal/rtree_inlines.h
+++ b/include/jemalloc/internal/rtree_inlines.h
@@ -55,14 +55,16 @@ rtree_elm_read(rtree_elm_t *elm, bool dependent) {
 		 * synchronization, because the rtree update became visible in
 		 * memory before the pointer came into existence.
 		 */
-		extent = elm->extent;
+		extent = (extent_t *)atomic_load_p(&elm->child_or_extent,
+		    ATOMIC_RELAXED);
 	} else {
 		/*
 		 * An arbitrary read, e.g. on behalf of ivsalloc(), may not be
 		 * dependent on a previous rtree write, which means a stale read
 		 * could result if synchronization were omitted here.
 		 */
-		extent = (extent_t *)atomic_read_p(&elm->pun);
+		extent = (extent_t *)atomic_load_p(&elm->child_or_extent,
+		    ATOMIC_ACQUIRE);
 	}
 
 	/* Mask the lock bit. */
@@ -73,7 +75,7 @@ rtree_elm_read(rtree_elm_t *elm, bool dependent) {
 
 JEMALLOC_INLINE void
 rtree_elm_write(rtree_elm_t *elm, const extent_t *extent) {
-	atomic_write_p(&elm->pun, extent);
+	atomic_store_p(&elm->child_or_extent, (void *)extent, ATOMIC_RELEASE);
 }
 
 JEMALLOC_ALWAYS_INLINE rtree_elm_t *
@@ -161,11 +163,18 @@ rtree_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 
 	spin_t spinner = SPIN_INITIALIZER;
 	while (true) {
-		extent_t *extent = rtree_elm_read(elm, false);
 		/* The least significant bit serves as a lock. */
-		void *s = (void *)((uintptr_t)extent | (uintptr_t)0x1);
-		if (!atomic_cas_p(&elm->pun, (void *)extent, s)) {
-			break;
+		void *extent_and_lock = atomic_load_p(&elm->child_or_extent,
+		    ATOMIC_RELAXED);
+		if (likely(((uintptr_t)extent_and_lock & (uintptr_t)0x1) == 0))
+		{
+			void *locked = (void *)((uintptr_t)extent_and_lock
+			    | (uintptr_t)0x1);
+			if (likely(atomic_compare_exchange_strong_p(
+			    &elm->child_or_extent, &extent_and_lock, locked,
+			    ATOMIC_ACQUIRE, ATOMIC_RELAXED))) {
+				break;
+			}
 		}
 		spin_adaptive(&spinner);
 	}
@@ -180,9 +189,9 @@ rtree_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 JEMALLOC_INLINE extent_t *
 rtree_elm_read_acquired(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm) {
 	extent_t *extent;
-
-	assert(((uintptr_t)elm->pun & (uintptr_t)0x1) == (uintptr_t)0x1);
-	extent = (extent_t *)((uintptr_t)elm->pun & ~((uintptr_t)0x1));
+	void *ptr = atomic_load_p(&elm->child_or_extent, ATOMIC_RELAXED);
+	assert(((uintptr_t)ptr & (uintptr_t)0x1) == (uintptr_t)0x1);
+	extent = (extent_t *)((uintptr_t)ptr & ~((uintptr_t)0x1));
 	assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0);
 
 	if (config_debug) {
@@ -196,13 +205,14 @@ JEMALLOC_INLINE void
 rtree_elm_write_acquired(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm,
     const extent_t *extent) {
 	assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0);
-	assert(((uintptr_t)elm->pun & (uintptr_t)0x1) == (uintptr_t)0x1);
+	assert(((uintptr_t)atomic_load_p(&elm->child_or_extent, ATOMIC_RELAXED)
+	    & (uintptr_t)0x1) == (uintptr_t)0x1);
 
 	if (config_debug) {
 		rtree_elm_witness_access(tsdn, rtree, elm);
 	}
-
-	elm->pun = (void *)((uintptr_t)extent | (uintptr_t)0x1);
+	atomic_store_p(&elm->child_or_extent, (void *)((uintptr_t)extent
+	    | (uintptr_t)0x1), ATOMIC_RELEASE);
 	assert(rtree_elm_read_acquired(tsdn, rtree, elm) == extent);
 }
 
diff --git a/include/jemalloc/internal/rtree_structs.h b/include/jemalloc/internal/rtree_structs.h
index 312171e3..b62c489d 100644
--- a/include/jemalloc/internal/rtree_structs.h
+++ b/include/jemalloc/internal/rtree_structs.h
@@ -2,11 +2,8 @@
 #define JEMALLOC_INTERNAL_RTREE_STRUCTS_H
 
 struct rtree_elm_s {
-	union {
-		void		*pun;
-		rtree_elm_t	*child;
-		extent_t	*extent;
-	};
+	/* Either "rtree_elm_t *child;" or "extent_t *extent;". */
+	atomic_p_t	child_or_extent;
 };
 
 struct rtree_elm_witness_s {
@@ -41,11 +38,9 @@ struct rtree_ctx_s {
 };
 
 struct rtree_s {
-	union {
-		void		*root_pun;
-		rtree_elm_t	*root;
-	};
-	malloc_mutex_t		init_lock;
+	/* An rtree_elm_t *. */
+	atomic_p_t	root;
+	malloc_mutex_t	init_lock;
 };
 
 #endif /* JEMALLOC_INTERNAL_RTREE_STRUCTS_H */
diff --git a/src/rtree.c b/src/rtree.c
index a86fa45d..54dc3487 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -7,7 +7,7 @@
  */
 bool
 rtree_new(rtree_t *rtree) {
-	rtree->root_pun = NULL;
+	atomic_store_p(&rtree->root, NULL, ATOMIC_RELAXED);
 	if (malloc_mutex_init(&rtree->init_lock, "rtree", WITNESS_RANK_RTREE)) {
 		return true;
 	}
@@ -54,7 +54,8 @@ rtree_delete_subtree(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *node,
 
 		nchildren = ZU(1) << rtree_levels[level].bits;
 		for (i = 0; i < nchildren; i++) {
-			rtree_elm_t *child = node[i].child;
+			rtree_elm_t *child = (rtree_elm_t *)atomic_load_p(
+			    &node[i].child_or_extent, ATOMIC_RELAXED);
 			if (child != NULL) {
 				rtree_delete_subtree(tsdn, rtree, child, level +
 				    1);
@@ -66,19 +67,25 @@ rtree_delete_subtree(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *node,
 
 void
 rtree_delete(tsdn_t *tsdn, rtree_t *rtree) {
-	if (rtree->root_pun != NULL) {
-		rtree_delete_subtree(tsdn, rtree, rtree->root, 0);
+	rtree_elm_t *rtree_root = (rtree_elm_t *)atomic_load_p(&rtree->root,
+	    ATOMIC_RELAXED);
+	if (rtree_root != NULL) {
+		rtree_delete_subtree(tsdn, rtree, rtree_root, 0);
 	}
 }
 #endif
 
 static rtree_elm_t *
 rtree_node_init(tsdn_t *tsdn, rtree_t *rtree, unsigned level,
-    rtree_elm_t **elmp) {
+    atomic_p_t *elmp) {
 	rtree_elm_t *node;
 
 	malloc_mutex_lock(tsdn, &rtree->init_lock);
-	node = atomic_read_p((void**)elmp);
+	/*
+	 * If *elmp is non-null, then it was initialized with the init lock
+	 * held, so we can get by with 'relaxed' here.
+	 */
+	node = atomic_load_p(elmp, ATOMIC_RELAXED);
 	if (node == NULL) {
 		node = rtree_node_alloc(tsdn, rtree, ZU(1) <<
 		    rtree_levels[level].bits);
@@ -86,7 +93,11 @@ rtree_node_init(tsdn_t *tsdn, rtree_t *rtree, unsigned level,
 			malloc_mutex_unlock(tsdn, &rtree->init_lock);
 			return NULL;
 		}
-		atomic_write_p((void **)elmp, node);
+		/*
+		 * Even though we hold the lock, a later reader might not; we
+		 * need release semantics.
+		 */
+		atomic_store_p(elmp, node, ATOMIC_RELEASE);
 	}
 	malloc_mutex_unlock(tsdn, &rtree->init_lock);
 
@@ -102,11 +113,14 @@ static rtree_elm_t *
 rtree_child_tryread(rtree_elm_t *elm, bool dependent) {
 	rtree_elm_t *child;
 
-	/* Double-checked read (first read may be stale). */
-	child = elm->child;
-	if (!dependent && !rtree_node_valid(child)) {
-		child = (rtree_elm_t *)atomic_read_p(&elm->pun);
+	if (dependent) {
+		child = (rtree_elm_t *)atomic_load_p(&elm->child_or_extent,
+		    ATOMIC_RELAXED);
+	} else {
+		child = (rtree_elm_t *)atomic_load_p(&elm->child_or_extent,
+		    ATOMIC_ACQUIRE);
 	}
+
 	assert(!dependent || child != NULL);
 	return child;
 }
@@ -118,7 +132,8 @@ rtree_child_read(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *elm, unsigned level,
 
 	child = rtree_child_tryread(elm, dependent);
 	if (!dependent && unlikely(!rtree_node_valid(child))) {
-		child = rtree_node_init(tsdn, rtree, level+1, &elm->child);
+		child = rtree_node_init(tsdn, rtree, level + 1,
+		    &elm->child_or_extent);
 	}
 	assert(!dependent || child != NULL);
 	return child;
@@ -126,10 +141,13 @@ rtree_child_read(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *elm, unsigned level,
 
 static rtree_elm_t *
 rtree_subtree_tryread(rtree_t *rtree, bool dependent) {
-	/* Double-checked read (first read may be stale). */
-	rtree_elm_t *subtree = rtree->root;
-	if (!dependent && unlikely(!rtree_node_valid(subtree))) {
-		subtree = (rtree_elm_t *)atomic_read_p(&rtree->root_pun);
+	rtree_elm_t *subtree;
+	if (dependent) {
+		subtree = (rtree_elm_t *)atomic_load_p(&rtree->root,
+		    ATOMIC_RELAXED);
+	} else {
+		subtree = (rtree_elm_t *)atomic_load_p(&rtree->root,
+		    ATOMIC_ACQUIRE);
 	}
 	assert(!dependent || subtree != NULL);
 	return subtree;

From 7cbcd2e2b70d9a8547030b5a8640c85b2b7b50ab Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 9 Mar 2017 18:20:02 -0800
Subject: [PATCH 0709/2608] Fix pages_purge_forced() to discard pages on
 non-Linux systems.

madvise(..., MADV_DONTNEED) only causes demand-zeroing on Linux, so fall
back to overlaying a new mapping.
---
 configure.ac                                          |  2 ++
 include/jemalloc/internal/jemalloc_internal_defs.h.in | 11 ++++++++---
 include/jemalloc/internal/pages_types.h               |  4 +++-
 src/pages.c                                           |  9 ++++++++-
 4 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/configure.ac b/configure.ac
index 96b105f3..db42a505 100644
--- a/configure.ac
+++ b/configure.ac
@@ -547,6 +547,7 @@ case "${host}" in
 	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
 	JE_APPEND_VS(CPPFLAGS, -D_GNU_SOURCE)
 	abi="elf"
+	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS])
 	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
 	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ])
 	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
@@ -558,6 +559,7 @@ case "${host}" in
 	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
 	JE_APPEND_VS(CPPFLAGS, -D_GNU_SOURCE)
 	abi="elf"
+	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS])
 	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
 	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ])
 	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 500f4274..28eb0b34 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -265,12 +265,17 @@
  *
  *   madvise(..., MADV_FREE) : This marks pages as being unused, such that they
  *                             will be discarded rather than swapped out.
- *   madvise(..., MADV_DONTNEED) : This immediately discards pages, such that
- *                                 new pages will be demand-zeroed if the
- *                                 address region is later touched.
+ *   madvise(..., MADV_DONTNEED) : If JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS is
+ *                                 defined, this immediately discards pages,
+ *                                 such that new pages will be demand-zeroed if
+ *                                 the address region is later touched;
+ *                                 otherwise this behaves similarly to
+ *                                 MADV_FREE, though typically with higher
+ *                                 system overhead.
  */
 #undef JEMALLOC_PURGE_MADVISE_FREE
 #undef JEMALLOC_PURGE_MADVISE_DONTNEED
+#undef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS
 
 /*
  * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE
diff --git a/include/jemalloc/internal/pages_types.h b/include/jemalloc/internal/pages_types.h
index 9e6e7c5c..e44ee2a4 100644
--- a/include/jemalloc/internal/pages_types.h
+++ b/include/jemalloc/internal/pages_types.h
@@ -37,7 +37,9 @@
  * next step after purging on Windows anyway, there's no point in adding such
  * complexity.
  */
-#if !defined(_WIN32) && defined(JEMALLOC_PURGE_MADVISE_DONTNEED)
+#if !defined(_WIN32) && ((defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
+    defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)) || \
+    defined(JEMALLOC_MAPS_COALESCE))
 #  define PAGES_CAN_PURGE_FORCED
 #endif
 
diff --git a/src/pages.c b/src/pages.c
index 444a97c2..e80c3652 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -170,6 +170,9 @@ pages_purge_lazy(void *addr, size_t size) {
 	VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE);
 #elif defined(JEMALLOC_PURGE_MADVISE_FREE)
 	madvise(addr, size, MADV_FREE);
+#elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
+    !defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
+	madvise(addr, size, MADV_DONTNEED);
 #else
 	not_reached();
 #endif
@@ -182,8 +185,12 @@ pages_purge_forced(void *addr, size_t size) {
 		return true;
 	}
 
-#if defined(JEMALLOC_PURGE_MADVISE_DONTNEED)
+#if defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
+    defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
 	return (madvise(addr, size, MADV_DONTNEED) != 0);
+#elif defined(JEMALLOC_MAPS_COALESCE)
+	/* Try to overlay a new demand-zeroed mapping. */
+	return pages_commit(addr, size);
 #else
 	not_reached();
 #endif

From 28078274c4885c5d98cbbb12dd7cb138397cde8f Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 9 Mar 2017 16:39:17 -0800
Subject: [PATCH 0710/2608] Add alignment/size assertions to pages_*().

These sanity checks prevent what otherwise might result in failed system
calls and unintended fallback execution paths.
---
 src/pages.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/src/pages.c b/src/pages.c
index e80c3652..9846e19e 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -19,6 +19,9 @@ static bool	os_overcommits;
 
 void *
 pages_map(void *addr, size_t size, bool *commit) {
+	assert(PAGE_ADDR2BASE(addr) == addr);
+	assert(PAGE_CEILING(size) == size);
+
 	void *ret;
 
 	assert(size != 0);
@@ -63,6 +66,9 @@ pages_map(void *addr, size_t size, bool *commit) {
 
 void
 pages_unmap(void *addr, size_t size) {
+	assert(PAGE_ADDR2BASE(addr) == addr);
+	assert(PAGE_CEILING(size) == size);
+
 #ifdef _WIN32
 	if (VirtualFree(addr, 0, MEM_RELEASE) == 0)
 #else
@@ -122,6 +128,9 @@ pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size,
 
 static bool
 pages_commit_impl(void *addr, size_t size, bool commit) {
+	assert(PAGE_ADDR2BASE(addr) == addr);
+	assert(PAGE_CEILING(size) == size);
+
 	if (os_overcommits) {
 		return true;
 	}
@@ -162,6 +171,9 @@ pages_decommit(void *addr, size_t size) {
 
 bool
 pages_purge_lazy(void *addr, size_t size) {
+	assert(PAGE_ADDR2BASE(addr) == addr);
+	assert(PAGE_CEILING(size) == size);
+
 	if (!pages_can_purge_lazy) {
 		return true;
 	}
@@ -181,6 +193,9 @@ pages_purge_lazy(void *addr, size_t size) {
 
 bool
 pages_purge_forced(void *addr, size_t size) {
+	assert(PAGE_ADDR2BASE(addr) == addr);
+	assert(PAGE_CEILING(size) == size);
+
 	if (!pages_can_purge_forced) {
 		return true;
 	}

From 26d23da6cd91e4d7d6210c89de5194dedf0f0f60 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 9 Mar 2017 17:20:00 -0800
Subject: [PATCH 0711/2608] Prefer pages_purge_forced() over memset().

This has the dual advantages of allowing for sparsely used large
allocations, and relying on the kernel to supply zeroed pages, which
tends to be very fast on modern systems.
---
 src/extent.c | 13 ++++++++++---
 src/large.c  | 33 ++++++++++++++++++++-------------
 2 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index 33589394..c44ecb89 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -759,8 +759,11 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 
 	if (*zero) {
 		if (!extent_zeroed_get(extent)) {
-			memset(extent_addr_get(extent), 0,
-			    extent_usize_get(extent));
+			if (pages_purge_forced(extent_base_get(extent),
+			    extent_size_get(extent))) {
+				memset(extent_addr_get(extent), 0,
+				    extent_usize_get(extent));
+			}
 		} else if (config_debug) {
 			size_t i;
 			size_t *p = (size_t *)(uintptr_t)
@@ -971,7 +974,11 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 		extent_interior_register(tsdn, rtree_ctx, extent);
 	}
 	if (*zero && !extent_zeroed_get(extent)) {
-		memset(extent_addr_get(extent), 0, extent_usize_get(extent));
+		if (pages_purge_forced(extent_base_get(extent),
+		    extent_size_get(extent))) {
+			memset(extent_addr_get(extent), 0,
+			    extent_usize_get(extent));
+		}
 	}
 	/*
 	 * Increment extent_grow_next, but take care to do so atomically and
diff --git a/src/large.c b/src/large.c
index e9536bca..5145f418 100644
--- a/src/large.c
+++ b/src/large.c
@@ -25,9 +25,13 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 		return NULL;
 	}
 
+	if (config_fill && unlikely(opt_zero)) {
+		zero = true;
+	}
 	/*
-	 * Copy zero into is_zeroed and pass the copy to extent_alloc(), so that
-	 * it is possible to make correct junk/zero fill decisions below.
+	 * Copy zero into is_zeroed and pass the copy when allocating the
+	 * extent, so that it is possible to make correct junk/zero fill
+	 * decisions below, even if is_zeroed ends up true when zero is false.
 	 */
 	is_zeroed = zero;
 	if (likely(!tsdn_null(tsdn))) {
@@ -46,11 +50,8 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 		prof_idump(tsdn);
 	}
 
-	if (zero || (config_fill && unlikely(opt_zero))) {
-		if (!is_zeroed) {
-			memset(extent_addr_get(extent), 0,
-			    extent_usize_get(extent));
-		}
+	if (zero) {
+		assert(is_zeroed);
 	} else if (config_fill && unlikely(opt_junk_alloc)) {
 		memset(extent_addr_get(extent), JEMALLOC_ALLOC_JUNK,
 		    extent_usize_get(extent));
@@ -144,7 +145,16 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 		return true;
 	}
 
-	bool is_zeroed_trail = false;
+	if (config_fill && unlikely(opt_zero)) {
+		zero = true;
+	}
+	/*
+	 * Copy zero into is_zeroed_trail and pass the copy when allocating the
+	 * extent, so that it is possible to make correct junk/zero fill
+	 * decisions below, even if is_zeroed_trail ends up true when zero is
+	 * false.
+	 */
+	bool is_zeroed_trail = zero;
 	bool commit = true;
 	extent_t *trail;
 	bool new_mapping;
@@ -174,7 +184,7 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 		arena_stats_mapped_add(tsdn, &arena->stats, trailsize);
 	}
 
-	if (zero || (config_fill && unlikely(opt_zero))) {
+	if (zero) {
 		if (config_cache_oblivious) {
 			/*
 			 * Zero the trailing bytes of the original allocation's
@@ -191,10 +201,7 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 			assert(nzero > 0);
 			memset(zbase, 0, nzero);
 		}
-		if (!is_zeroed_trail) {
-			memset((void *)((uintptr_t)extent_addr_get(extent) +
-			    oldusize), 0, usize - oldusize);
-		}
+		assert(is_zeroed_trail);
 	} else if (config_fill && unlikely(opt_junk_alloc)) {
 		memset((void *)((uintptr_t)extent_addr_get(extent) + oldusize),
 		    JEMALLOC_ALLOC_JUNK, usize - oldusize);

From 4fc2acf5aef9ea8fe7e2dd39ee8b6a5050c5ff7f Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 8 Mar 2017 15:56:31 -0800
Subject: [PATCH 0712/2608] Switch atomic uint64_ts in arena_stats_t to C11
 atomics

I expect this to be the trickiest conversion we will see, since we want atomics
on 64-bit platforms, but are also always able to piggyback on some sort of
external synchronization on non-64 bit platforms.
---
 include/jemalloc/internal/stats_structs.h | 33 ++++++----
 src/arena.c                               | 60 +++++++++++------
 src/ctl.c                                 | 80 +++++++++++++++++------
 3 files changed, 119 insertions(+), 54 deletions(-)

diff --git a/include/jemalloc/internal/stats_structs.h b/include/jemalloc/internal/stats_structs.h
index 06ba95fc..b64ba2d2 100644
--- a/include/jemalloc/internal/stats_structs.h
+++ b/include/jemalloc/internal/stats_structs.h
@@ -1,6 +1,13 @@
 #ifndef JEMALLOC_INTERNAL_STATS_STRUCTS_H
 #define JEMALLOC_INTERNAL_STATS_STRUCTS_H
 
+#ifdef JEMALLOC_ATOMIC_U64
+typedef atomic_u64_t arena_stats_u64_t;
+#else
+/* Must hold the arena stats mutex while reading atomically. */
+typedef uint64_t arena_stats_u64_t;
+#endif
+
 struct tcache_bin_stats_s {
 	/*
 	 * Number of allocation requests that corresponded to the size of this
@@ -56,15 +63,15 @@ struct malloc_large_stats_s {
 	 * Total number of allocation/deallocation requests served directly by
 	 * the arena.
 	 */
-	uint64_t	nmalloc;
-	uint64_t	ndalloc;
+	arena_stats_u64_t	nmalloc;
+	arena_stats_u64_t	ndalloc;
 
 	/*
 	 * Number of allocation requests that correspond to this size class.
 	 * This includes requests served by tcache, though tcache only
 	 * periodically merges into this counter.
 	 */
-	uint64_t	nrequests; /* Partially derived. */
+	arena_stats_u64_t	nrequests; /* Partially derived. */
 
 	/* Current number of allocations of this size class. */
 	size_t		curlextents; /* Derived. */
@@ -96,18 +103,18 @@ struct arena_stats_s {
 	 * and total pages purged in order to keep dirty unused memory under
 	 * control.
 	 */
-	uint64_t	npurge;
-	uint64_t	nmadvise;
-	uint64_t	purged;
+	arena_stats_u64_t	npurge;
+	arena_stats_u64_t	nmadvise;
+	arena_stats_u64_t	purged;
 
-	size_t		base; /* Derived. */
-	size_t		internal;
-	size_t		resident; /* Derived. */
+	size_t			base; /* Derived. */
+	size_t			internal;
+	size_t			resident; /* Derived. */
 
-	size_t		allocated_large; /* Derived. */
-	uint64_t	nmalloc_large; /* Derived. */
-	uint64_t	ndalloc_large; /* Derived. */
-	uint64_t	nrequests_large; /* Derived. */
+	size_t			allocated_large; /* Derived. */
+	arena_stats_u64_t	nmalloc_large; /* Derived. */
+	arena_stats_u64_t	ndalloc_large; /* Derived. */
+	arena_stats_u64_t	nrequests_large; /* Derived. */
 
 	/* Number of bytes cached in tcache associated with this arena. */
 	size_t		tcache_bytes; /* Derived. */
diff --git a/src/arena.c b/src/arena.c
index cb0194ae..1fbf87dd 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -78,9 +78,10 @@ arena_stats_unlock(tsdn_t *tsdn, arena_stats_t *arena_stats) {
 }
 
 static uint64_t
-arena_stats_read_u64(tsdn_t *tsdn, arena_stats_t *arena_stats, uint64_t *p) {
+arena_stats_read_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
+    arena_stats_u64_t *p) {
 #ifdef JEMALLOC_ATOMIC_U64
-	return atomic_read_u64(p);
+	return atomic_load_u64(p, ATOMIC_RELAXED);
 #else
 	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
 	return *p;
@@ -88,10 +89,10 @@ arena_stats_read_u64(tsdn_t *tsdn, arena_stats_t *arena_stats, uint64_t *p) {
 }
 
 static void
-arena_stats_add_u64(tsdn_t *tsdn, arena_stats_t *arena_stats, uint64_t *p,
-    uint64_t x) {
+arena_stats_add_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
+    arena_stats_u64_t *p, uint64_t x) {
 #ifdef JEMALLOC_ATOMIC_U64
-	atomic_add_u64(p, x);
+	atomic_fetch_add_u64(p, x, ATOMIC_RELAXED);
 #else
 	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
 	*p += x;
@@ -99,11 +100,11 @@ arena_stats_add_u64(tsdn_t *tsdn, arena_stats_t *arena_stats, uint64_t *p,
 }
 
 UNUSED static void
-arena_stats_sub_u64(tsdn_t *tsdn, arena_stats_t *arena_stats, uint64_t *p,
-    uint64_t x) {
+arena_stats_sub_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
+    arena_stats_u64_t *p, uint64_t x) {
 #ifdef JEMALLOC_ATOMIC_U64
-	UNUSED uint64_t r = atomic_sub_u64(p, x);
-	assert(r + x >= r);
+	UNUSED uint64_t r = atomic_fetch_sub_u64(p, x, ATOMIC_RELAXED);
+	assert(r - x <= r);
 #else
 	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
 	*p -= x;
@@ -111,6 +112,21 @@ arena_stats_sub_u64(tsdn_t *tsdn, arena_stats_t *arena_stats, uint64_t *p,
 #endif
 }
 
+/*
+ * Non-atomically sets *dst += src.  *dst needs external synchronization.
+ * This lets us avoid the cost of a fetch_add when its unnecessary (note that
+ * the types here are atomic).
+ */
+static void
+arena_stats_accum_u64(arena_stats_u64_t *dst, uint64_t src) {
+#ifdef JEMALLOC_ATOMIC_U64
+	uint64_t cur_dst = atomic_load_u64(dst, ATOMIC_RELAXED);
+	atomic_store_u64(dst, src + cur_dst, ATOMIC_RELAXED);
+#else
+	*dst += src;
+#endif
+}
+
 static size_t
 arena_stats_read_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, size_t *p) {
 #ifdef JEMALLOC_ATOMIC_U64
@@ -191,12 +207,12 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	    &arena->stats.mapped);
 	astats->retained += (extents_npages_get(&arena->extents_retained) <<
 	    LG_PAGE);
-	astats->npurge += arena_stats_read_u64(tsdn, &arena->stats,
-	    &arena->stats.npurge);
-	astats->nmadvise += arena_stats_read_u64(tsdn, &arena->stats,
-	    &arena->stats.nmadvise);
-	astats->purged += arena_stats_read_u64(tsdn, &arena->stats,
-	    &arena->stats.purged);
+	arena_stats_accum_u64(&astats->npurge, arena_stats_read_u64(tsdn,
+	    &arena->stats, &arena->stats.npurge));
+	arena_stats_accum_u64(&astats->nmadvise, arena_stats_read_u64(tsdn,
+	    &arena->stats, &arena->stats.nmadvise));
+	arena_stats_accum_u64(&astats->purged, arena_stats_read_u64(tsdn,
+	    &arena->stats, &arena->stats.purged));
 	astats->base += base_allocated;
 	astats->internal += arena_internal_get(arena);
 	astats->resident += base_resident + (((atomic_read_zu(&arena->nactive) +
@@ -205,18 +221,20 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	for (szind_t i = 0; i < NSIZES - NBINS; i++) {
 		uint64_t nmalloc = arena_stats_read_u64(tsdn, &arena->stats,
 		    &arena->stats.lstats[i].nmalloc);
-		lstats[i].nmalloc += nmalloc;
-		astats->nmalloc_large += nmalloc;
+		arena_stats_accum_u64(&lstats[i].nmalloc, nmalloc);
+		arena_stats_accum_u64(&astats->nmalloc_large, nmalloc);
 
 		uint64_t ndalloc = arena_stats_read_u64(tsdn, &arena->stats,
 		    &arena->stats.lstats[i].ndalloc);
-		lstats[i].ndalloc += ndalloc;
-		astats->ndalloc_large += ndalloc;
+		arena_stats_accum_u64(&lstats[i].ndalloc, ndalloc);
+		arena_stats_accum_u64(&astats->ndalloc_large, ndalloc);
 
 		uint64_t nrequests = arena_stats_read_u64(tsdn, &arena->stats,
 		    &arena->stats.lstats[i].nrequests);
-		lstats[i].nrequests += nmalloc + nrequests;
-		astats->nrequests_large += nmalloc + nrequests;
+		arena_stats_accum_u64(&lstats[i].nrequests,
+		    nmalloc + nrequests);
+		arena_stats_accum_u64(&astats->nrequests_large,
+		    nmalloc + nrequests);
 
 		assert(nmalloc >= ndalloc);
 		assert(nmalloc - ndalloc <= SIZE_T_MAX);
diff --git a/src/ctl.c b/src/ctl.c
index d4ab699f..bb835836 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -431,6 +431,33 @@ static const ctl_named_node_t super_root_node[] = {
 
 /******************************************************************************/
 
+/*
+ * Sets *dst + *src non-atomically.  This is safe, since everything is
+ * synchronized by the ctl mutex.
+ */
+static void
+accum_arena_stats_u64(arena_stats_u64_t *dst, arena_stats_u64_t *src) {
+#ifdef JEMALLOC_ATOMIC_U64
+	uint64_t cur_dst = atomic_load_u64(dst, ATOMIC_RELAXED);
+	uint64_t cur_src = atomic_load_u64(src, ATOMIC_RELAXED);
+	atomic_store_u64(dst, cur_dst + cur_src, ATOMIC_RELAXED);
+#else
+	*dst += *src;
+#endif
+}
+
+/* Likewise: with ctl mutex synchronization, reading is simple. */
+static uint64_t
+arena_stats_read_u64(arena_stats_u64_t *p) {
+#ifdef JEMALLOC_ATOMIC_U64
+	return atomic_load_u64(p, ATOMIC_RELAXED);
+#else
+	return *p;
+#endif
+}
+
+/******************************************************************************/
+
 static unsigned
 arenas_i2a_impl(size_t i, bool compat, bool validate) {
 	unsigned a;
@@ -589,9 +616,12 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 			sdstats->astats.mapped += astats->astats.mapped;
 			sdstats->astats.retained += astats->astats.retained;
 		}
-		sdstats->astats.npurge += astats->astats.npurge;
-		sdstats->astats.nmadvise += astats->astats.nmadvise;
-		sdstats->astats.purged += astats->astats.purged;
+		accum_arena_stats_u64(&sdstats->astats.npurge,
+		    &astats->astats.npurge);
+		accum_arena_stats_u64(&sdstats->astats.nmadvise,
+		    &astats->astats.nmadvise);
+		accum_arena_stats_u64(&sdstats->astats.purged,
+		    &astats->astats.purged);
 
 		if (!destroyed) {
 			sdstats->astats.base += astats->astats.base;
@@ -616,10 +646,12 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 		} else {
 			assert(astats->astats.allocated_large == 0);
 		}
-		sdstats->astats.nmalloc_large += astats->astats.nmalloc_large;
-		sdstats->astats.ndalloc_large += astats->astats.ndalloc_large;
-		sdstats->astats.nrequests_large +=
-		    astats->astats.nrequests_large;
+		accum_arena_stats_u64(&sdstats->astats.nmalloc_large,
+		    &astats->astats.nmalloc_large);
+		accum_arena_stats_u64(&sdstats->astats.ndalloc_large,
+		    &astats->astats.ndalloc_large);
+		accum_arena_stats_u64(&sdstats->astats.nrequests_large,
+		    &astats->astats.nrequests_large);
 
 		if (config_tcache) {
 			sdstats->astats.tcache_bytes +=
@@ -654,10 +686,12 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 		}
 
 		for (i = 0; i < NSIZES - NBINS; i++) {
-			sdstats->lstats[i].nmalloc += astats->lstats[i].nmalloc;
-			sdstats->lstats[i].ndalloc += astats->lstats[i].ndalloc;
-			sdstats->lstats[i].nrequests +=
-			    astats->lstats[i].nrequests;
+			accum_arena_stats_u64(&sdstats->lstats[i].nmalloc,
+			    &astats->lstats[i].nmalloc);
+			accum_arena_stats_u64(&sdstats->lstats[i].ndalloc,
+			    &astats->lstats[i].ndalloc);
+			accum_arena_stats_u64(&sdstats->lstats[i].nrequests,
+			    &astats->lstats[i].nrequests);
 			if (!destroyed) {
 				sdstats->lstats[i].curlextents +=
 				    astats->lstats[i].curlextents;
@@ -2139,11 +2173,11 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_mapped,
 CTL_RO_CGEN(config_stats, stats_arenas_i_retained,
     arenas_i(mib[2])->astats->astats.retained, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_npurge,
-    arenas_i(mib[2])->astats->astats.npurge, uint64_t)
+    arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.npurge), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_nmadvise,
-    arenas_i(mib[2])->astats->astats.nmadvise, uint64_t)
+    arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.nmadvise), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_purged,
-    arenas_i(mib[2])->astats->astats.purged, uint64_t)
+    arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.purged), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_base,
     arenas_i(mib[2])->astats->astats.base, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_internal,
@@ -2164,11 +2198,14 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_small_nrequests,
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_allocated,
     arenas_i(mib[2])->astats->astats.allocated_large, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_nmalloc,
-    arenas_i(mib[2])->astats->astats.nmalloc_large, uint64_t)
+    arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.nmalloc_large),
+    uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_ndalloc,
-    arenas_i(mib[2])->astats->astats.ndalloc_large, uint64_t)
+    arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.ndalloc_large),
+    uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_nrequests,
-    arenas_i(mib[2])->astats->astats.nmalloc_large, uint64_t) /* Intentional. */
+    arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.nmalloc_large),
+    uint64_t) /* Intentional. */
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nmalloc,
     arenas_i(mib[2])->astats->bstats[mib[4]].nmalloc, uint64_t)
@@ -2199,11 +2236,14 @@ stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
 }
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_nmalloc,
-    arenas_i(mib[2])->astats->lstats[mib[4]].nmalloc, uint64_t)
+    arena_stats_read_u64(&arenas_i(mib[2])->astats->lstats[mib[4]].nmalloc),
+    uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_ndalloc,
-    arenas_i(mib[2])->astats->lstats[mib[4]].ndalloc, uint64_t)
+    arena_stats_read_u64(&arenas_i(mib[2])->astats->lstats[mib[4]].ndalloc),
+    uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_nrequests,
-    arenas_i(mib[2])->astats->lstats[mib[4]].nrequests, uint64_t)
+        arena_stats_read_u64(
+	    &arenas_i(mib[2])->astats->lstats[mib[4]].nrequests), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_curlextents,
     arenas_i(mib[2])->astats->lstats[mib[4]].curlextents, size_t)
 

From ee202efc79d650e16e3ecb1569efccbc5666e116 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 13 Mar 2017 16:18:40 -0700
Subject: [PATCH 0713/2608] Convert remaining arena_stats_t fields to atomics

These were all size_ts, so we have atomics support for them on all platforms, so
the conversion is straightforward.

Left non-atomic is curlextents, which AFAICT is not used atomically anywhere.
---
 include/jemalloc/internal/arena_inlines_a.h |  6 +-
 include/jemalloc/internal/stats_structs.h   | 14 ++--
 src/arena.c                                 | 56 +++++++++-------
 src/ctl.c                                   | 74 ++++++++++++++-------
 4 files changed, 93 insertions(+), 57 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_a.h b/include/jemalloc/internal/arena_inlines_a.h
index 9dd5304c..e1c47652 100644
--- a/include/jemalloc/internal/arena_inlines_a.h
+++ b/include/jemalloc/internal/arena_inlines_a.h
@@ -19,17 +19,17 @@ arena_ind_get(const arena_t *arena) {
 
 JEMALLOC_INLINE void
 arena_internal_add(arena_t *arena, size_t size) {
-	atomic_add_zu(&arena->stats.internal, size);
+	atomic_fetch_add_zu(&arena->stats.internal, size, ATOMIC_RELAXED);
 }
 
 JEMALLOC_INLINE void
 arena_internal_sub(arena_t *arena, size_t size) {
-	atomic_sub_zu(&arena->stats.internal, size);
+	atomic_fetch_sub_zu(&arena->stats.internal, size, ATOMIC_RELAXED);
 }
 
 JEMALLOC_INLINE size_t
 arena_internal_get(arena_t *arena) {
-	return atomic_read_zu(&arena->stats.internal);
+	return atomic_load_zu(&arena->stats.internal, ATOMIC_RELAXED);
 }
 
 JEMALLOC_INLINE bool
diff --git a/include/jemalloc/internal/stats_structs.h b/include/jemalloc/internal/stats_structs.h
index b64ba2d2..4e9c898a 100644
--- a/include/jemalloc/internal/stats_structs.h
+++ b/include/jemalloc/internal/stats_structs.h
@@ -88,7 +88,7 @@ struct arena_stats_s {
 #endif
 
 	/* Number of bytes currently mapped, excluding retained memory. */
-	size_t		mapped; /* Partially derived. */
+	atomic_zu_t		mapped; /* Partially derived. */
 
 	/*
 	 * Number of bytes currently retained as a side effect of munmap() being
@@ -96,7 +96,7 @@ struct arena_stats_s {
 	 * always decommitted or purged), but they are excluded from the mapped
 	 * statistic (above).
 	 */
-	size_t		retained; /* Derived. */
+	atomic_zu_t		retained; /* Derived. */
 
 	/*
 	 * Total number of purge sweeps, total number of madvise calls made,
@@ -107,17 +107,17 @@ struct arena_stats_s {
 	arena_stats_u64_t	nmadvise;
 	arena_stats_u64_t	purged;
 
-	size_t			base; /* Derived. */
-	size_t			internal;
-	size_t			resident; /* Derived. */
+	atomic_zu_t		base; /* Derived. */
+	atomic_zu_t		internal;
+	atomic_zu_t		resident; /* Derived. */
 
-	size_t			allocated_large; /* Derived. */
+	atomic_zu_t		allocated_large; /* Derived. */
 	arena_stats_u64_t	nmalloc_large; /* Derived. */
 	arena_stats_u64_t	ndalloc_large; /* Derived. */
 	arena_stats_u64_t	nrequests_large; /* Derived. */
 
 	/* Number of bytes cached in tcache associated with this arena. */
-	size_t		tcache_bytes; /* Derived. */
+	atomic_zu_t		tcache_bytes; /* Derived. */
 
 	/* One element for each large size class. */
 	malloc_large_stats_t	lstats[NSIZES - NBINS];
diff --git a/src/arena.c b/src/arena.c
index 1fbf87dd..417778b4 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -128,39 +128,47 @@ arena_stats_accum_u64(arena_stats_u64_t *dst, uint64_t src) {
 }
 
 static size_t
-arena_stats_read_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, size_t *p) {
+arena_stats_read_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p) {
 #ifdef JEMALLOC_ATOMIC_U64
-	return atomic_read_zu(p);
+	return atomic_load_zu(p, ATOMIC_RELAXED);
 #else
 	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
-	return *p;
+	return atomic_load_zu(p, ATOMIC_RELAXED);
 #endif
 }
 
 static void
-arena_stats_add_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, size_t *p,
+arena_stats_add_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p,
     size_t x) {
 #ifdef JEMALLOC_ATOMIC_U64
-	atomic_add_zu(p, x);
+	atomic_fetch_add_zu(p, x, ATOMIC_RELAXED);
 #else
 	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
-	*p += x;
+	size_t cur = atomic_load_zu(p, ATOMIC_RELAXED);
+	atomic_store_zu(p, cur + x, ATOMIC_RELAXED);
 #endif
 }
 
 static void
-arena_stats_sub_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, size_t *p,
+arena_stats_sub_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p,
     size_t x) {
 #ifdef JEMALLOC_ATOMIC_U64
-	UNUSED size_t r = atomic_sub_zu(p, x);
-	assert(r + x >= r);
+	UNUSED size_t r = atomic_fetch_sub_zu(p, x, ATOMIC_RELAXED);
+	assert(r - x <= r);
 #else
 	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
-	*p -= x;
-	assert(*p + x >= *p);
+	size_t cur = atomic_load_zu(p, ATOMIC_RELAXED);
+	atomic_store_zu(p, cur - x, ATOMIC_RELAXED);
 #endif
 }
 
+/* Like the _u64 variant, needs an externally synchronized *dst. */
+static void
+arena_stats_accum_zu(atomic_zu_t *dst, size_t src) {
+	size_t cur_dst = atomic_load_zu(dst, ATOMIC_RELAXED);
+	atomic_store_zu(dst, src + cur_dst, ATOMIC_RELAXED);
+}
+
 void
 arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
     szind_t szind, uint64_t nrequests) {
@@ -203,20 +211,21 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 
 	arena_stats_lock(tsdn, &arena->stats);
 
-	astats->mapped += base_mapped + arena_stats_read_zu(tsdn, &arena->stats,
-	    &arena->stats.mapped);
-	astats->retained += (extents_npages_get(&arena->extents_retained) <<
-	    LG_PAGE);
+	arena_stats_accum_zu(&astats->mapped, base_mapped
+	    + arena_stats_read_zu(tsdn, &arena->stats, &arena->stats.mapped));
+	arena_stats_accum_zu(&astats->retained,
+	    extents_npages_get(&arena->extents_retained) << LG_PAGE);
 	arena_stats_accum_u64(&astats->npurge, arena_stats_read_u64(tsdn,
 	    &arena->stats, &arena->stats.npurge));
 	arena_stats_accum_u64(&astats->nmadvise, arena_stats_read_u64(tsdn,
 	    &arena->stats, &arena->stats.nmadvise));
 	arena_stats_accum_u64(&astats->purged, arena_stats_read_u64(tsdn,
 	    &arena->stats, &arena->stats.purged));
-	astats->base += base_allocated;
-	astats->internal += arena_internal_get(arena);
-	astats->resident += base_resident + (((atomic_read_zu(&arena->nactive) +
-	    extents_npages_get(&arena->extents_cached)) << LG_PAGE));
+	arena_stats_accum_zu(&astats->base, base_allocated);
+	arena_stats_accum_zu(&astats->internal, arena_internal_get(arena));
+	arena_stats_accum_zu(&astats->resident, base_resident
+	    + (((atomic_read_zu(&arena->nactive)
+	    + extents_npages_get(&arena->extents_cached)) << LG_PAGE)));
 
 	for (szind_t i = 0; i < NSIZES - NBINS; i++) {
 		uint64_t nmalloc = arena_stats_read_u64(tsdn, &arena->stats,
@@ -240,7 +249,8 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		assert(nmalloc - ndalloc <= SIZE_T_MAX);
 		size_t curlextents = (size_t)(nmalloc - ndalloc);
 		lstats[i].curlextents += curlextents;
-		astats->allocated_large += curlextents * index2size(NBINS + i);
+		arena_stats_accum_zu(&astats->allocated_large,
+		    curlextents * index2size(NBINS + i));
 	}
 
 	arena_stats_unlock(tsdn, &arena->stats);
@@ -250,13 +260,13 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		tcache_t *tcache;
 
 		/* tcache_bytes counts currently cached bytes. */
-		astats->tcache_bytes = 0;
+		atomic_store_zu(&astats->tcache_bytes, 0, ATOMIC_RELAXED);
 		malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
 		ql_foreach(tcache, &arena->tcache_ql, link) {
 			for (szind_t i = 0; i < nhbins; i++) {
 				tbin = &tcache->tbins[i];
-				astats->tcache_bytes += tbin->ncached *
-				    index2size(i);
+				arena_stats_accum_zu(&astats->tcache_bytes,
+				    tbin->ncached * index2size(i));
 			}
 		}
 		malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
diff --git a/src/ctl.c b/src/ctl.c
index bb835836..70721584 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -456,6 +456,12 @@ arena_stats_read_u64(arena_stats_u64_t *p) {
 #endif
 }
 
+static void accum_atomic_zu(atomic_zu_t *dst, atomic_zu_t *src) {
+	size_t cur_dst = atomic_load_zu(dst, ATOMIC_RELAXED);
+	size_t cur_src = atomic_load_zu(src, ATOMIC_RELAXED);
+	atomic_store_zu(dst, cur_dst + cur_src, ATOMIC_RELAXED);
+}
+
 /******************************************************************************/
 
 static unsigned
@@ -613,8 +619,10 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 		ctl_arena_stats_t *astats = ctl_arena->astats;
 
 		if (!destroyed) {
-			sdstats->astats.mapped += astats->astats.mapped;
-			sdstats->astats.retained += astats->astats.retained;
+			accum_atomic_zu(&sdstats->astats.mapped,
+			    &astats->astats.mapped);
+			accum_atomic_zu(&sdstats->astats.retained,
+			    &astats->astats.retained);
 		}
 		accum_arena_stats_u64(&sdstats->astats.npurge,
 		    &astats->astats.npurge);
@@ -624,11 +632,15 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 		    &astats->astats.purged);
 
 		if (!destroyed) {
-			sdstats->astats.base += astats->astats.base;
-			sdstats->astats.internal += astats->astats.internal;
-			sdstats->astats.resident += astats->astats.resident;
+			accum_atomic_zu(&sdstats->astats.base,
+			    &astats->astats.base);
+			accum_atomic_zu(&sdstats->astats.internal,
+			    &astats->astats.internal);
+			accum_atomic_zu(&sdstats->astats.resident,
+			    &astats->astats.resident);
 		} else {
-			assert(astats->astats.internal == 0);
+			assert(atomic_load_zu(
+			    &astats->astats.internal, ATOMIC_RELAXED) == 0);
 		}
 
 		if (!destroyed) {
@@ -641,10 +653,11 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 		sdstats->nrequests_small += astats->nrequests_small;
 
 		if (!destroyed) {
-			sdstats->astats.allocated_large +=
-			    astats->astats.allocated_large;
+			accum_atomic_zu(&sdstats->astats.allocated_large,
+			    &astats->astats.allocated_large);
 		} else {
-			assert(astats->astats.allocated_large == 0);
+			assert(atomic_load_zu(&astats->astats.allocated_large,
+			    ATOMIC_RELAXED) == 0);
 		}
 		accum_arena_stats_u64(&sdstats->astats.nmalloc_large,
 		    &astats->astats.nmalloc_large);
@@ -654,8 +667,8 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 		    &astats->astats.nrequests_large);
 
 		if (config_tcache) {
-			sdstats->astats.tcache_bytes +=
-			    astats->astats.tcache_bytes;
+			accum_atomic_zu(&sdstats->astats.tcache_bytes,
+			    &astats->astats.tcache_bytes);
 		}
 
 		for (i = 0; i < NBINS; i++) {
@@ -772,13 +785,19 @@ ctl_refresh(tsdn_t *tsdn) {
 
 	if (config_stats) {
 		ctl_stats->allocated = ctl_sarena->astats->allocated_small +
-		    ctl_sarena->astats->astats.allocated_large;
+		    atomic_load_zu(&ctl_sarena->astats->astats.allocated_large,
+			ATOMIC_RELAXED);
 		ctl_stats->active = (ctl_sarena->pactive << LG_PAGE);
-		ctl_stats->metadata = ctl_sarena->astats->astats.base +
-		    ctl_sarena->astats->astats.internal;
-		ctl_stats->resident = ctl_sarena->astats->astats.resident;
-		ctl_stats->mapped = ctl_sarena->astats->astats.mapped;
-		ctl_stats->retained = ctl_sarena->astats->astats.retained;
+		ctl_stats->metadata = atomic_load_zu(
+		    &ctl_sarena->astats->astats.base, ATOMIC_RELAXED) +
+		    atomic_load_zu(&ctl_sarena->astats->astats.internal,
+			ATOMIC_RELAXED);
+		ctl_stats->resident = atomic_load_zu(
+		    &ctl_sarena->astats->astats.resident, ATOMIC_RELAXED);
+		ctl_stats->mapped = atomic_load_zu(
+		    &ctl_sarena->astats->astats.mapped, ATOMIC_RELAXED);
+		ctl_stats->retained = atomic_load_zu(
+		    &ctl_sarena->astats->astats.retained, ATOMIC_RELAXED);
 	}
 	ctl_arenas->epoch++;
 }
@@ -2169,9 +2188,11 @@ CTL_RO_GEN(stats_arenas_i_nthreads, arenas_i(mib[2])->nthreads, unsigned)
 CTL_RO_GEN(stats_arenas_i_pactive, arenas_i(mib[2])->pactive, size_t)
 CTL_RO_GEN(stats_arenas_i_pdirty, arenas_i(mib[2])->pdirty, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_mapped,
-    arenas_i(mib[2])->astats->astats.mapped, size_t)
+    atomic_load_zu(&arenas_i(mib[2])->astats->astats.mapped, ATOMIC_RELAXED),
+    size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_retained,
-    arenas_i(mib[2])->astats->astats.retained, size_t)
+    atomic_load_zu(&arenas_i(mib[2])->astats->astats.retained, ATOMIC_RELAXED),
+    size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_npurge,
     arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.npurge), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_nmadvise,
@@ -2179,13 +2200,17 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_nmadvise,
 CTL_RO_CGEN(config_stats, stats_arenas_i_purged,
     arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.purged), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_base,
-    arenas_i(mib[2])->astats->astats.base, size_t)
+    atomic_load_zu(&arenas_i(mib[2])->astats->astats.base, ATOMIC_RELAXED),
+    size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_internal,
-    arenas_i(mib[2])->astats->astats.internal, size_t)
+    atomic_load_zu(&arenas_i(mib[2])->astats->astats.internal, ATOMIC_RELAXED),
+    size_t)
 CTL_RO_CGEN(config_stats && config_tcache, stats_arenas_i_tcache_bytes,
-    arenas_i(mib[2])->astats->astats.tcache_bytes, size_t)
+    atomic_load_zu(&arenas_i(mib[2])->astats->astats.tcache_bytes,
+    ATOMIC_RELAXED), size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_resident,
-    arenas_i(mib[2])->astats->astats.resident, size_t)
+    atomic_load_zu(&arenas_i(mib[2])->astats->astats.resident, ATOMIC_RELAXED),
+    size_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_small_allocated,
     arenas_i(mib[2])->astats->allocated_small, size_t)
@@ -2196,7 +2221,8 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_small_ndalloc,
 CTL_RO_CGEN(config_stats, stats_arenas_i_small_nrequests,
     arenas_i(mib[2])->astats->nrequests_small, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_allocated,
-    arenas_i(mib[2])->astats->astats.allocated_large, size_t)
+    atomic_load_zu(&arenas_i(mib[2])->astats->astats.allocated_large,
+    ATOMIC_RELAXED), size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_nmalloc,
     arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.nmalloc_large),
     uint64_t)

From 765edd67b4915a392439a53141606d9e242a6618 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 7 Mar 2017 09:22:33 -0800
Subject: [PATCH 0714/2608] Refactor decay-related function parametrization.

Refactor most of the decay-related functions to take as parameters the
decay_t and associated extents_t structures to operate on.  This
prepares for supporting both lazy and forced purging on different decay
schedules.
---
 include/jemalloc/internal/arena_structs_b.h |  14 +-
 src/arena.c                                 | 182 +++++++++++---------
 2 files changed, 103 insertions(+), 93 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index 84c179e8..369b4cd2 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -174,13 +174,6 @@ struct arena_s {
 	 */
 	size_t			nactive;
 
-	/*
-	 * Decay-based purging state.
-	 *
-	 * Synchronization: lock.
-	 */
-	arena_decay_t		decay;
-
 	/*
 	 * Extant large allocations.
 	 *
@@ -199,6 +192,13 @@ struct arena_s {
 	extents_t		extents_cached;
 	extents_t		extents_retained;
 
+	/*
+	 * Decay-based purging state.
+	 *
+	 * Synchronization: internal.
+	 */
+	arena_decay_t		decay;
+
 	/*
 	 * True if a thread is currently executing arena_purge_to_limit().
 	 *
diff --git a/src/arena.c b/src/arena.c
index 417778b4..ea8e6a55 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -38,7 +38,7 @@ const arena_bin_info_t	arena_bin_info[NBINS] = {
  */
 
 static void	arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena,
-    size_t ndirty_limit);
+    arena_decay_t *decay, extents_t *extents, size_t ndirty_limit);
 static void	arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena,
     extent_t *slab, arena_bin_t *bin);
 static void	arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena,
@@ -506,39 +506,39 @@ arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 }
 
 static ssize_t
-arena_decay_time_read(arena_t *arena) {
-	return atomic_read_zd(&arena->decay.time);
+arena_decay_time_read(arena_decay_t *decay) {
+	return atomic_read_zd(&decay->time);
 }
 
 static void
-arena_decay_time_write(arena_t *arena, ssize_t decay_time) {
-	atomic_write_zd(&arena->decay.time, decay_time);
+arena_decay_time_write(arena_decay_t *decay, ssize_t decay_time) {
+	atomic_write_zd(&decay->time, decay_time);
 }
 
 static void
-arena_decay_deadline_init(arena_t *arena) {
+arena_decay_deadline_init(arena_decay_t *decay) {
 	/*
 	 * Generate a new deadline that is uniformly random within the next
 	 * epoch after the current one.
 	 */
-	nstime_copy(&arena->decay.deadline, &arena->decay.epoch);
-	nstime_add(&arena->decay.deadline, &arena->decay.interval);
-	if (arena_decay_time_read(arena) > 0) {
+	nstime_copy(&decay->deadline, &decay->epoch);
+	nstime_add(&decay->deadline, &decay->interval);
+	if (arena_decay_time_read(decay) > 0) {
 		nstime_t jitter;
 
-		nstime_init(&jitter, prng_range_u64(&arena->decay.jitter_state,
-		    nstime_ns(&arena->decay.interval)));
-		nstime_add(&arena->decay.deadline, &jitter);
+		nstime_init(&jitter, prng_range_u64(&decay->jitter_state,
+		    nstime_ns(&decay->interval)));
+		nstime_add(&decay->deadline, &jitter);
 	}
 }
 
 static bool
-arena_decay_deadline_reached(const arena_t *arena, const nstime_t *time) {
-	return (nstime_compare(&arena->decay.deadline, time) <= 0);
+arena_decay_deadline_reached(const arena_decay_t *decay, const nstime_t *time) {
+	return (nstime_compare(&decay->deadline, time) <= 0);
 }
 
 static size_t
-arena_decay_backlog_npages_limit(const arena_t *arena) {
+arena_decay_backlog_npages_limit(const arena_decay_t *decay) {
 	static const uint64_t h_steps[] = {
 #define STEP(step, h, x, y) \
 		h,
@@ -556,7 +556,7 @@ arena_decay_backlog_npages_limit(const arena_t *arena) {
 	 */
 	sum = 0;
 	for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
-		sum += arena->decay.backlog[i] * h_steps[i];
+		sum += decay->backlog[i] * h_steps[i];
 	}
 	npages_limit_backlog = (size_t)(sum >> SMOOTHSTEP_BFP);
 
@@ -564,101 +564,106 @@ arena_decay_backlog_npages_limit(const arena_t *arena) {
 }
 
 static void
-arena_decay_backlog_update_last(arena_t *arena) {
-	size_t ndirty = extents_npages_get(&arena->extents_cached);
-	size_t ndirty_delta = (ndirty > arena->decay.nunpurged) ? ndirty -
-	    arena->decay.nunpurged : 0;
-	arena->decay.backlog[SMOOTHSTEP_NSTEPS-1] = ndirty_delta;
+arena_decay_backlog_update_last(arena_decay_t *decay, extents_t *extents) {
+	size_t ndirty = extents_npages_get(extents);
+	size_t ndirty_delta = (ndirty > decay->nunpurged) ? ndirty -
+	    decay->nunpurged : 0;
+	decay->backlog[SMOOTHSTEP_NSTEPS-1] = ndirty_delta;
 }
 
 static void
-arena_decay_backlog_update(arena_t *arena, uint64_t nadvance_u64) {
+arena_decay_backlog_update(arena_decay_t *decay, extents_t *extents,
+    uint64_t nadvance_u64) {
 	if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) {
-		memset(arena->decay.backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
+		memset(decay->backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
 		    sizeof(size_t));
 	} else {
 		size_t nadvance_z = (size_t)nadvance_u64;
 
 		assert((uint64_t)nadvance_z == nadvance_u64);
 
-		memmove(arena->decay.backlog, &arena->decay.backlog[nadvance_z],
+		memmove(decay->backlog, &decay->backlog[nadvance_z],
 		    (SMOOTHSTEP_NSTEPS - nadvance_z) * sizeof(size_t));
 		if (nadvance_z > 1) {
-			memset(&arena->decay.backlog[SMOOTHSTEP_NSTEPS -
+			memset(&decay->backlog[SMOOTHSTEP_NSTEPS -
 			    nadvance_z], 0, (nadvance_z-1) * sizeof(size_t));
 		}
 	}
 
-	arena_decay_backlog_update_last(arena);
+	arena_decay_backlog_update_last(decay, extents);
 }
 
 static void
-arena_decay_epoch_advance_helper(arena_t *arena, const nstime_t *time) {
+arena_decay_epoch_advance_helper(arena_decay_t *decay, extents_t *extents,
+    const nstime_t *time) {
 	uint64_t nadvance_u64;
 	nstime_t delta;
 
-	assert(arena_decay_deadline_reached(arena, time));
+	assert(arena_decay_deadline_reached(decay, time));
 
 	nstime_copy(&delta, time);
-	nstime_subtract(&delta, &arena->decay.epoch);
-	nadvance_u64 = nstime_divide(&delta, &arena->decay.interval);
+	nstime_subtract(&delta, &decay->epoch);
+	nadvance_u64 = nstime_divide(&delta, &decay->interval);
 	assert(nadvance_u64 > 0);
 
 	/* Add nadvance_u64 decay intervals to epoch. */
-	nstime_copy(&delta, &arena->decay.interval);
+	nstime_copy(&delta, &decay->interval);
 	nstime_imultiply(&delta, nadvance_u64);
-	nstime_add(&arena->decay.epoch, &delta);
+	nstime_add(&decay->epoch, &delta);
 
 	/* Set a new deadline. */
-	arena_decay_deadline_init(arena);
+	arena_decay_deadline_init(decay);
 
 	/* Update the backlog. */
-	arena_decay_backlog_update(arena, nadvance_u64);
+	arena_decay_backlog_update(decay, extents, nadvance_u64);
 }
 
 static void
-arena_decay_epoch_advance_purge(tsdn_t *tsdn, arena_t *arena) {
-	size_t ndirty_limit = arena_decay_backlog_npages_limit(arena);
+arena_decay_epoch_advance_purge(tsdn_t *tsdn, arena_t *arena,
+    arena_decay_t *decay, extents_t *extents) {
+	size_t ndirty_limit = arena_decay_backlog_npages_limit(decay);
 
-	if (extents_npages_get(&arena->extents_cached) > ndirty_limit) {
-		arena_purge_to_limit(tsdn, arena, ndirty_limit);
+	if (extents_npages_get(extents) > ndirty_limit) {
+		arena_purge_to_limit(tsdn, arena, decay, extents, ndirty_limit);
 	}
 	/*
 	 * There may be concurrent ndirty fluctuation between the purge above
 	 * and the nunpurged update below, but this is inconsequential to decay
 	 * machinery correctness.
 	 */
-	arena->decay.nunpurged = extents_npages_get(&arena->extents_cached);
+	decay->nunpurged = extents_npages_get(extents);
 }
 
 static void
-arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, const nstime_t *time) {
-	arena_decay_epoch_advance_helper(arena, time);
-	arena_decay_epoch_advance_purge(tsdn, arena);
+arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
+    extents_t *extents, const nstime_t *time) {
+	arena_decay_epoch_advance_helper(decay, extents, time);
+	arena_decay_epoch_advance_purge(tsdn, arena, decay, extents);
 }
 
 static void
-arena_decay_reinit(arena_t *arena, ssize_t decay_time) {
-	arena_decay_time_write(arena, decay_time);
+arena_decay_reinit(arena_decay_t *decay, extents_t *extents,
+    ssize_t decay_time) {
+	arena_decay_time_write(decay, decay_time);
 	if (decay_time > 0) {
-		nstime_init2(&arena->decay.interval, decay_time, 0);
-		nstime_idivide(&arena->decay.interval, SMOOTHSTEP_NSTEPS);
+		nstime_init2(&decay->interval, decay_time, 0);
+		nstime_idivide(&decay->interval, SMOOTHSTEP_NSTEPS);
 	}
 
-	nstime_init(&arena->decay.epoch, 0);
-	nstime_update(&arena->decay.epoch);
-	arena->decay.jitter_state = (uint64_t)(uintptr_t)arena;
-	arena_decay_deadline_init(arena);
-	arena->decay.nunpurged = extents_npages_get(&arena->extents_cached);
-	memset(arena->decay.backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t));
+	nstime_init(&decay->epoch, 0);
+	nstime_update(&decay->epoch);
+	decay->jitter_state = (uint64_t)(uintptr_t)decay;
+	arena_decay_deadline_init(decay);
+	decay->nunpurged = extents_npages_get(extents);
+	memset(decay->backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t));
 }
 
 static bool
-arena_decay_init(arena_t *arena, ssize_t decay_time) {
-	if (malloc_mutex_init(&arena->decay.mtx, "decay", WITNESS_RANK_DECAY)) {
+arena_decay_init(arena_decay_t *decay, extents_t *extents, ssize_t decay_time) {
+	if (malloc_mutex_init(&decay->mtx, "decay", WITNESS_RANK_DECAY)) {
 		return true;
 	}
-	arena_decay_reinit(arena, decay_time);
+	arena_decay_reinit(decay, extents, decay_time);
 	return false;
 }
 
@@ -674,14 +679,15 @@ arena_decay_time_valid(ssize_t decay_time) {
 }
 
 static void
-arena_maybe_purge(tsdn_t *tsdn, arena_t *arena) {
-	malloc_mutex_assert_owner(tsdn, &arena->decay.mtx);
+arena_maybe_purge(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
+    extents_t *extents) {
+	malloc_mutex_assert_owner(tsdn, &decay->mtx);
 
 	/* Purge all or nothing if the option is disabled. */
-	ssize_t decay_time = arena_decay_time_read(arena);
+	ssize_t decay_time = arena_decay_time_read(decay);
 	if (decay_time <= 0) {
 		if (decay_time == 0) {
-			arena_purge_to_limit(tsdn, arena, 0);
+			arena_purge_to_limit(tsdn, arena, decay, extents, 0);
 		}
 		return;
 	}
@@ -689,8 +695,8 @@ arena_maybe_purge(tsdn_t *tsdn, arena_t *arena) {
 	nstime_t time;
 	nstime_init(&time, 0);
 	nstime_update(&time);
-	if (unlikely(!nstime_monotonic() && nstime_compare(&arena->decay.epoch,
-	    &time) > 0)) {
+	if (unlikely(!nstime_monotonic() && nstime_compare(&decay->epoch, &time)
+	    > 0)) {
 		/*
 		 * Time went backwards.  Move the epoch back in time and
 		 * generate a new deadline, with the expectation that time
@@ -700,11 +706,11 @@ arena_maybe_purge(tsdn_t *tsdn, arena_t *arena) {
 		 * clock jitter estimation and compensation isn't feasible here
 		 * because calls into this code are event-driven.
 		 */
-		nstime_copy(&arena->decay.epoch, &time);
-		arena_decay_deadline_init(arena);
+		nstime_copy(&decay->epoch, &time);
+		arena_decay_deadline_init(decay);
 	} else {
 		/* Verify that time does not go backwards. */
-		assert(nstime_compare(&arena->decay.epoch, &time) <= 0);
+		assert(nstime_compare(&decay->epoch, &time) <= 0);
 	}
 
 	/*
@@ -713,14 +719,14 @@ arena_maybe_purge(tsdn_t *tsdn, arena_t *arena) {
 	 * during the current epoch are not subject to purge until a future
 	 * epoch, so as a result purging only happens during epoch advances.
 	 */
-	if (arena_decay_deadline_reached(arena, &time)) {
-		arena_decay_epoch_advance(tsdn, arena, &time);
+	if (arena_decay_deadline_reached(decay, &time)) {
+		arena_decay_epoch_advance(tsdn, arena, decay, extents, &time);
 	}
 }
 
 ssize_t
 arena_decay_time_get(arena_t *arena) {
-	return arena_decay_time_read(arena);
+	return arena_decay_time_read(&arena->decay);
 }
 
 bool
@@ -738,8 +744,8 @@ arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time) {
 	 * infrequent, either between the {-1, 0, >0} states, or a one-time
 	 * arbitrary change during initial arena configuration.
 	 */
-	arena_decay_reinit(arena, decay_time);
-	arena_maybe_purge(tsdn, arena);
+	arena_decay_reinit(&arena->decay, &arena->extents_cached, decay_time);
+	arena_maybe_purge(tsdn, arena, &arena->decay, &arena->extents_cached);
 	malloc_mutex_unlock(tsdn, &arena->decay.mtx);
 
 	return false;
@@ -747,14 +753,14 @@ arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time) {
 
 static size_t
 arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    size_t ndirty_limit, extent_list_t *purge_extents) {
+    extents_t *extents, size_t ndirty_limit, extent_list_t *purge_extents) {
 	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 
 	/* Stash extents according to ndirty_limit. */
 	size_t nstashed = 0;
 	extent_t *extent;
-	while ((extent = extents_evict(tsdn, arena, r_extent_hooks,
-	    &arena->extents_cached, ndirty_limit)) != NULL) {
+	while ((extent = extents_evict(tsdn, arena, r_extent_hooks, extents,
+	    ndirty_limit)) != NULL) {
 		extent_list_append(purge_extents, extent);
 		nstashed += extent_size_get(extent) >> LG_PAGE;
 	}
@@ -798,12 +804,13 @@ arena_purge_stashed(tsdn_t *tsdn, arena_t *arena,
 
 /*
  * ndirty_limit: Purge as many dirty extents as possible without violating the
- * invariant: (extents_npages_get(&arena->extents_cached) >= ndirty_limit)
+ * invariant: (extents_npages_get(extents) >= ndirty_limit)
  */
 static void
-arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, size_t ndirty_limit) {
+arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
+    extents_t *extents, size_t ndirty_limit) {
 	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 1);
-	malloc_mutex_assert_owner(tsdn, &arena->decay.mtx);
+	malloc_mutex_assert_owner(tsdn, &decay->mtx);
 
 	if (arena->purging) {
 		return;
@@ -816,19 +823,19 @@ arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, size_t ndirty_limit) {
 
 	extent_list_init(&purge_extents);
 
-	malloc_mutex_unlock(tsdn, &arena->decay.mtx);
+	malloc_mutex_unlock(tsdn, &decay->mtx);
 
-	npurge = arena_stash_dirty(tsdn, arena, &extent_hooks, ndirty_limit,
-	    &purge_extents);
+	npurge = arena_stash_dirty(tsdn, arena, &extent_hooks, extents,
+	    ndirty_limit, &purge_extents);
 	if (npurge == 0) {
-		malloc_mutex_lock(tsdn, &arena->decay.mtx);
+		malloc_mutex_lock(tsdn, &decay->mtx);
 		goto label_return;
 	}
 	npurged = arena_purge_stashed(tsdn, arena, &extent_hooks,
 	    &purge_extents);
 	assert(npurged == npurge);
 
-	malloc_mutex_lock(tsdn, &arena->decay.mtx);
+	malloc_mutex_lock(tsdn, &decay->mtx);
 
 	if (config_stats) {
 		arena_stats_lock(tsdn, &arena->stats);
@@ -845,9 +852,11 @@ void
 arena_purge(tsdn_t *tsdn, arena_t *arena, bool all) {
 	malloc_mutex_lock(tsdn, &arena->decay.mtx);
 	if (all) {
-		arena_purge_to_limit(tsdn, arena, 0);
+		arena_purge_to_limit(tsdn, arena, &arena->decay,
+		    &arena->extents_cached, 0);
 	} else {
-		arena_maybe_purge(tsdn, arena);
+		arena_maybe_purge(tsdn, arena, &arena->decay,
+		    &arena->extents_cached);
 	}
 	malloc_mutex_unlock(tsdn, &arena->decay.mtx);
 }
@@ -1706,10 +1715,6 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 
 	atomic_write_zu(&arena->nactive, 0);
 
-	if (arena_decay_init(arena, arena_decay_time_default_get())) {
-		goto label_error;
-	}
-
 	extent_list_init(&arena->large);
 	if (malloc_mutex_init(&arena->large_mtx, "arena_large",
 	    WITNESS_RANK_ARENA_LARGE)) {
@@ -1737,6 +1742,11 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		goto label_error;
 	}
 
+	if (arena_decay_init(&arena->decay, &arena->extents_cached,
+	    arena_decay_time_default_get())) {
+		goto label_error;
+	}
+
 	arena->purging = false;
 
 	if (!config_munmap) {

From 38a5bfc8169b018b5b71cc72daad14c3b2f5b206 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 7 Mar 2017 19:52:57 -0800
Subject: [PATCH 0715/2608] Move arena_t's purging field into arena_decay_t.

---
 include/jemalloc/internal/arena_structs_b.h | 12 +++++-------
 src/arena.c                                 |  9 ++++-----
 2 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index 369b4cd2..612b4e7d 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -39,6 +39,11 @@ struct arena_bin_info_s {
 struct arena_decay_s {
 	/* Synchronizes all non-atomic fields. */
 	malloc_mutex_t		mtx;
+	/*
+	 * True if a thread is currently purging the extents associated with
+	 * this decay structure.
+	 */
+	bool			purging;
 	/*
 	 * Approximate time in seconds from the creation of a set of unused
 	 * dirty pages until an equivalent set of unused dirty pages is purged
@@ -199,13 +204,6 @@ struct arena_s {
 	 */
 	arena_decay_t		decay;
 
-	/*
-	 * True if a thread is currently executing arena_purge_to_limit().
-	 *
-	 * Synchronization: decay.mtx.
-	 */
-	bool			purging;
-
 	/*
 	 * Next extent size class in a growing series to use when satisfying a
 	 * request via the extent hooks (only if !config_munmap).  This limits
diff --git a/src/arena.c b/src/arena.c
index ea8e6a55..c253760b 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -663,6 +663,7 @@ arena_decay_init(arena_decay_t *decay, extents_t *extents, ssize_t decay_time) {
 	if (malloc_mutex_init(&decay->mtx, "decay", WITNESS_RANK_DECAY)) {
 		return true;
 	}
+	decay->purging = false;
 	arena_decay_reinit(decay, extents, decay_time);
 	return false;
 }
@@ -812,10 +813,10 @@ arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 1);
 	malloc_mutex_assert_owner(tsdn, &decay->mtx);
 
-	if (arena->purging) {
+	if (decay->purging) {
 		return;
 	}
-	arena->purging = true;
+	decay->purging = true;
 
 	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
 	size_t npurge, npurged;
@@ -845,7 +846,7 @@ arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	}
 
 label_return:
-	arena->purging = false;
+	decay->purging = false;
 }
 
 void
@@ -1747,8 +1748,6 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		goto label_error;
 	}
 
-	arena->purging = false;
-
 	if (!config_munmap) {
 		arena->extent_grow_next = psz2ind(HUGEPAGE);
 	}

From 64e458f5cdd64f9b67cb495f177ef96bf3ce4e0e Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 8 Mar 2017 22:42:57 -0800
Subject: [PATCH 0716/2608] Implement two-phase decay-based purging.

Split decay-based purging into two phases, the first of which uses lazy
purging to convert dirty pages to "muzzy", and the second of which uses
forced purging, decommit, or unmapping to convert pages to clean or
destroy them altogether.  Not all operating systems support lazy
purging, yet the application may provide extent hooks that implement
lazy purging, so care must be taken to dynamically omit the first phase
when necessary.

The mallctl interfaces change as follows:
- opt.decay_time --> opt.{dirty,muzzy}_decay_time
- arena.<i>.decay_time --> arena.<i>.{dirty,muzzy}_decay_time
- arenas.decay_time --> arenas.{dirty,muzzy}_decay_time
- stats.arenas.<i>.pdirty --> stats.arenas.<i>.p{dirty,muzzy}
- stats.arenas.<i>.{npurge,nmadvise,purged} -->
  stats.arenas.<i>.{dirty,muzzy}_{npurge,nmadvise,purged}

This resolves #521.
---
 Makefile.in                                   |   4 +-
 doc/jemalloc.xml.in                           | 241 ++++++++---
 include/jemalloc/internal/arena_externs.h     |  31 +-
 include/jemalloc/internal/arena_inlines_b.h   |   5 +-
 include/jemalloc/internal/arena_structs_b.h   |  29 +-
 include/jemalloc/internal/arena_types.h       |   5 +-
 include/jemalloc/internal/ctl_structs.h       |   4 +-
 include/jemalloc/internal/extent_externs.h    |  11 +-
 include/jemalloc/internal/extent_structs.h    |   3 +-
 include/jemalloc/internal/private_symbols.txt |  22 +-
 include/jemalloc/internal/stats_structs.h     |  21 +-
 include/jemalloc/internal/stats_types.h       |   1 +
 src/arena.c                                   | 386 ++++++++++++------
 src/ctl.c                                     | 214 +++++++---
 src/extent.c                                  |  73 ++--
 src/jemalloc.c                                |   6 +-
 src/large.c                                   |  21 +-
 src/stats.c                                   |  99 +++--
 test/unit/decay.c                             | 192 ++++++---
 test/unit/decay.sh                            |   2 +-
 test/unit/mallctl.c                           | 160 ++++++--
 test/unit/pack.sh                             |   2 +-
 test/unit/stats.c                             |  36 +-
 23 files changed, 1078 insertions(+), 490 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index 04ce288a..4fb852da 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -442,8 +442,8 @@ ifeq ($(enable_prof), 1)
 	$(MALLOC_CONF)="prof:true,prof_active:false" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%)
 endif
 check_integration_decay: tests_integration check_integration_dir
-	$(MALLOC_CONF)="decay_time:-1" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%)
-	$(MALLOC_CONF)="decay_time:0" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%)
+	$(MALLOC_CONF)="dirty_decay_time:-1,muzzy_decay_time:-1" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%)
+	$(MALLOC_CONF)="dirty_decay_time:0,muzzy_decay_time:0" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%)
 check_integration: tests_integration check_integration_dir
 	$(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%)
 stress: tests_stress stress_dir
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 937879a8..7faa474d 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -944,24 +944,54 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         <quote>percpu</quote>. </para></listitem>
       </varlistentry>
 
-      <varlistentry id="opt.decay_time">
+      <varlistentry id="opt.dirty_decay_time">
         <term>
-          <mallctl>opt.decay_time</mallctl>
+          <mallctl>opt.dirty_decay_time</mallctl>
           (<type>ssize_t</type>)
           <literal>r-</literal>
         </term>
         <listitem><para>Approximate time in seconds from the creation of a set
         of unused dirty pages until an equivalent set of unused dirty pages is
-        purged and/or reused.  The pages are incrementally purged according to a
-        sigmoidal decay curve that starts and ends with zero purge rate.  A
-        decay time of 0 causes all unused dirty pages to be purged immediately
-        upon creation.  A decay time of -1 disables purging.  The default decay
-        time is 10 seconds.  See <link
-        linkend="arenas.decay_time"><mallctl>arenas.decay_time</mallctl></link>
+        purged (i.e. converted to muzzy via e.g.
+        <function>madvise(<parameter>...</parameter><parameter><constant>MADV_FREE</constant></parameter>)</function>
+        if supported by the operating system, or converted to clean otherwise)
+        and/or reused.  Dirty pages are defined as previously having been
+        potentially written to by the application, and therefore consuming
+        physical memory, yet having no current use.  The pages are incrementally
+        purged according to a sigmoidal decay curve that starts and ends with
+        zero purge rate.  A decay time of 0 causes all unused dirty pages to be
+        purged immediately upon creation.  A decay time of -1 disables purging.
+        The default decay time is 10 seconds.  See <link
+        linkend="arenas.dirty_decay_time"><mallctl>arenas.dirty_decay_time</mallctl></link>
         and <link
-        linkend="arena.i.decay_time"><mallctl>arena.&lt;i&gt;.decay_time</mallctl></link>
-        for related dynamic control options.
-        </para></listitem>
+        linkend="arena.i.muzzy_decay_time"><mallctl>arena.&lt;i&gt;.muzzy_decay_time</mallctl></link>
+        for related dynamic control options.  See <link
+        linkend="opt.muzzy_decay_time"><mallctl>opt.muzzy_decay_time</mallctl></link>
+        for a description of muzzy pages.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="opt.muzzy_decay_time">
+        <term>
+          <mallctl>opt.muzzy_decay_time</mallctl>
+          (<type>ssize_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Approximate time in seconds from the creation of a set
+        of unused muzzy pages until an equivalent set of unused muzzy pages is
+        purged (i.e. converted to clean) and/or reused.  Muzzy pages are defined
+        as previously having been unused dirty pages that were subsequently
+        purged in a manner that left them subject to the reclamation whims of
+        the operating system (e.g.
+        <function>madvise(<parameter>...</parameter><parameter><constant>MADV_FREE</constant></parameter>)</function>),
+        and therefore in an indeterminate state.  The pages are incrementally
+        purged according to a sigmoidal decay curve that starts and ends with
+        zero purge rate.  A decay time of 0 causes all unused muzzy pages to be
+        purged immediately upon creation.  A decay time of -1 disables purging.
+        The default decay time is 10 seconds.  See <link
+        linkend="arenas.muzzy_decay_time"><mallctl>arenas.muzzy_decay_time</mallctl></link>
+        and <link
+        linkend="arena.i.muzzy_decay_time"><mallctl>arena.&lt;i&gt;.muzzy_decay_time</mallctl></link>
+        for related dynamic control options.</para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.stats_print">
@@ -1460,6 +1490,22 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         initialized (always true).</para></listitem>
       </varlistentry>
 
+      <varlistentry id="arena.i.decay">
+        <term>
+          <mallctl>arena.&lt;i&gt;.decay</mallctl>
+          (<type>void</type>)
+          <literal>--</literal>
+        </term>
+        <listitem><para>Trigger decay-based purging of unused dirty/muzzy pages
+        for arena &lt;i&gt;, or for all arenas if &lt;i&gt; equals
+        <constant>MALLCTL_ARENAS_ALL</constant>.  The proportion of unused
+        dirty/muzzy pages to be purged depends on the current time; see <link
+        linkend="opt.dirty_decay_time"><mallctl>opt.dirty_decay_time</mallctl></link>
+        and <link
+        linkend="opt.muzzy_decay_time"><mallctl>opt.muzy_decay_time</mallctl></link>
+        for details.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="arena.i.purge">
         <term>
           <mallctl>arena.&lt;i&gt;.purge</mallctl>
@@ -1471,20 +1517,6 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         </para></listitem>
       </varlistentry>
 
-      <varlistentry id="arena.i.decay">
-        <term>
-          <mallctl>arena.&lt;i&gt;.decay</mallctl>
-          (<type>void</type>)
-          <literal>--</literal>
-        </term>
-        <listitem><para>Trigger decay-based purging of unused dirty pages for
-        arena &lt;i&gt;, or for all arenas if &lt;i&gt; equals
-        <constant>MALLCTL_ARENAS_ALL</constant>.  The proportion of unused dirty
-        pages to be purged depends on the current time; see <link
-        linkend="opt.decay_time"><mallctl>opt.decay_time</mallctl></link> for
-        details.</para></listitem>
-      </varlistentry>
-
       <varlistentry id="arena.i.reset">
         <term>
           <mallctl>arena.&lt;i&gt;.reset</mallctl>
@@ -1532,9 +1564,9 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         settings.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="arena.i.decay_time">
+      <varlistentry id="arena.i.dirty_decay_time">
         <term>
-          <mallctl>arena.&lt;i&gt;.decay_time</mallctl>
+          <mallctl>arena.&lt;i&gt;.dirty_decay_time</mallctl>
           (<type>ssize_t</type>)
           <literal>rw</literal>
         </term>
@@ -1544,8 +1576,24 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         set, all currently unused dirty pages are considered to have fully
         decayed, which causes immediate purging of all unused dirty pages unless
         the decay time is set to -1 (i.e. purging disabled).  See <link
-        linkend="opt.decay_time"><mallctl>opt.decay_time</mallctl></link> for
-        additional information.</para></listitem>
+        linkend="opt.dirty_decay_time"><mallctl>opt.dirty_decay_time</mallctl></link>
+        for additional information.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="arena.i.muzzy_decay_time">
+        <term>
+          <mallctl>arena.&lt;i&gt;.muzzy_decay_time</mallctl>
+          (<type>ssize_t</type>)
+          <literal>rw</literal>
+        </term>
+        <listitem><para>Current per-arena approximate time in seconds from the
+        creation of a set of unused muzzy pages until an equivalent set of
+        unused muzzy pages is purged and/or reused.  Each time this interface is
+        set, all currently unused muzzy pages are considered to have fully
+        decayed, which causes immediate purging of all unused muzzy pages unless
+        the decay time is set to -1 (i.e. purging disabled).  See <link
+        linkend="opt.muzzy_decay_time"><mallctl>opt.muzzy_decay_time</mallctl></link>
+        for additional information.</para></listitem>
       </varlistentry>
 
       <varlistentry id="arena.i.extent_hooks">
@@ -1584,7 +1632,7 @@ struct extent_hooks_s {
         mapped committed memory, in the simplest case followed by deallocation.
         However, there are performance and platform reasons to retain extents
         for later reuse.  Cleanup attempts cascade from deallocation to decommit
-        to lazy purging to forced purging, which gives the extent management
+        to forced purging to lazy purging, which gives the extent management
         functions opportunities to reject the most permanent cleanup operations
         in favor of less permanent (and often less costly) operations.  All
         operations except allocation can be universally opted out of by setting
@@ -1707,12 +1755,14 @@ struct extent_hooks_s {
         <parameter>addr</parameter> and <parameter>size</parameter> at
         <parameter>offset</parameter> bytes, extending for
         <parameter>length</parameter> on behalf of arena
-        <parameter>arena_ind</parameter>.  A lazy extent purge function can
-        delay purging indefinitely and leave the pages within the purged virtual
-        memory range in an indeterminite state, whereas a forced extent purge
-        function immediately purges, and the pages within the virtual memory
-        range will be zero-filled the next time they are accessed.  If the
-        function returns true, this indicates failure to purge.</para>
+        <parameter>arena_ind</parameter>.  A lazy extent purge function (e.g.
+        implemented via
+        <function>madvise(<parameter>...</parameter><parameter><constant>MADV_FREE</constant></parameter>)</function>)
+        can delay purging indefinitely and leave the pages within the purged
+        virtual memory range in an indeterminite state, whereas a forced extent
+        purge function immediately purges, and the pages within the virtual
+        memory range will be zero-filled the next time they are accessed.  If
+        the function returns true, this indicates failure to purge.</para>
 
         <funcsynopsis><funcprototype>
           <funcdef>typedef bool <function>(extent_split_t)</function></funcdef>
@@ -1769,19 +1819,34 @@ struct extent_hooks_s {
         <listitem><para>Current limit on number of arenas.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="arenas.decay_time">
+      <varlistentry id="arenas.dirty_decay_time">
         <term>
-          <mallctl>arenas.decay_time</mallctl>
+          <mallctl>arenas.dirty_decay_time</mallctl>
           (<type>ssize_t</type>)
           <literal>rw</literal>
         </term>
         <listitem><para>Current default per-arena approximate time in seconds
         from the creation of a set of unused dirty pages until an equivalent set
         of unused dirty pages is purged and/or reused, used to initialize <link
-        linkend="arena.i.decay_time"><mallctl>arena.&lt;i&gt;.decay_time</mallctl></link>
+        linkend="arena.i.dirty_decay_time"><mallctl>arena.&lt;i&gt;.dirty_decay_time</mallctl></link>
         during arena creation.  See <link
-        linkend="opt.decay_time"><mallctl>opt.decay_time</mallctl></link> for
-        additional information.</para></listitem>
+        linkend="opt.dirty_decay_time"><mallctl>opt.dirty_decay_time</mallctl></link>
+        for additional information.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="arenas.muzzy_decay_time">
+        <term>
+          <mallctl>arenas.muzzy_decay_time</mallctl>
+          (<type>ssize_t</type>)
+          <literal>rw</literal>
+        </term>
+        <listitem><para>Current default per-arena approximate time in seconds
+        from the creation of a set of unused muzzy pages until an equivalent set
+        of unused muzzy pages is purged and/or reused, used to initialize <link
+        linkend="arena.i.muzzy_decay_time"><mallctl>arena.&lt;i&gt;.muzzy_decay_time</mallctl></link>
+        during arena creation.  See <link
+        linkend="opt.muzzy_decay_time"><mallctl>opt.muzzy_decay_time</mallctl></link>
+        for additional information.</para></listitem>
       </varlistentry>
 
       <varlistentry id="arenas.quantum">
@@ -2014,7 +2079,9 @@ struct extent_hooks_s {
         equal to <link
         linkend="stats.allocated"><mallctl>stats.allocated</mallctl></link>.
         This does not include <link linkend="stats.arenas.i.pdirty">
-        <mallctl>stats.arenas.&lt;i&gt;.pdirty</mallctl></link>, nor pages
+        <mallctl>stats.arenas.&lt;i&gt;.pdirty</mallctl></link>,
+        <link linkend="stats.arenas.i.pmuzzy">
+        <mallctl>stats.arenas.&lt;i&gt;.pmuzzy</mallctl></link>, nor pages
         entirely devoted to allocator metadata.</para></listitem>
       </varlistentry>
 
@@ -2099,16 +2166,29 @@ struct extent_hooks_s {
         </para></listitem>
       </varlistentry>
 
-      <varlistentry id="stats.arenas.i.decay_time">
+      <varlistentry id="stats.arenas.i.dirty_decay_time">
         <term>
-          <mallctl>stats.arenas.&lt;i&gt;.decay_time</mallctl>
+          <mallctl>stats.arenas.&lt;i&gt;.dirty_decay_time</mallctl>
           (<type>ssize_t</type>)
           <literal>r-</literal>
         </term>
         <listitem><para>Approximate time in seconds from the creation of a set
         of unused dirty pages until an equivalent set of unused dirty pages is
         purged and/or reused.  See <link
-        linkend="opt.decay_time"><mallctl>opt.decay_time</mallctl></link>
+        linkend="opt.dirty_decay_time"><mallctl>opt.dirty_decay_time</mallctl></link>
+        for details.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="stats.arenas.i.muzzy_decay_time">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.muzzy_decay_time</mallctl>
+          (<type>ssize_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Approximate time in seconds from the creation of a set
+        of unused muzzy pages until an equivalent set of unused muzzy pages is
+        purged and/or reused.  See <link
+        linkend="opt.muzzy_decay_time"><mallctl>opt.muzzy_decay_time</mallctl></link>
         for details.</para></listitem>
       </varlistentry>
 
@@ -2138,10 +2218,22 @@ struct extent_hooks_s {
           <literal>r-</literal>
         </term>
         <listitem><para>Number of pages within unused extents that are
-        potentially dirty, and for which
-        <function>madvise(<parameter>...</parameter>
-        <parameter><constant>MADV_DONTNEED</constant></parameter>)</function> or
-        similar has not been called.</para></listitem>
+        potentially dirty, and for which <function>madvise()</function> or
+        similar has not been called.  See <link
+        linkend="opt.dirty_decay_time"><mallctl>opt.dirty_decay_time</mallctl></link>
+        for a description of dirty pages.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="stats.arenas.i.pmuzzy">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.pmuzzy</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Number of pages within unused extents that are muzzy.
+        See <link
+        linkend="opt.muzzy_decay_time"><mallctl>opt.muzzy_decay_time</mallctl></link>
+        for a description of muzzy pages.</para></listitem>
       </varlistentry>
 
       <varlistentry id="stats.arenas.i.mapped">
@@ -2207,9 +2299,9 @@ struct extent_hooks_s {
         size.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="stats.arenas.i.npurge">
+      <varlistentry id="stats.arenas.i.dirty_npurge">
         <term>
-          <mallctl>stats.arenas.&lt;i&gt;.npurge</mallctl>
+          <mallctl>stats.arenas.&lt;i&gt;.dirty_npurge</mallctl>
           (<type>uint64_t</type>)
           <literal>r-</literal>
           [<option>--enable-stats</option>]
@@ -2218,26 +2310,57 @@ struct extent_hooks_s {
         </para></listitem>
       </varlistentry>
 
-      <varlistentry id="stats.arenas.i.nmadvise">
+      <varlistentry id="stats.arenas.i.dirty_nmadvise">
         <term>
-          <mallctl>stats.arenas.&lt;i&gt;.nmadvise</mallctl>
+          <mallctl>stats.arenas.&lt;i&gt;.dirty_nmadvise</mallctl>
           (<type>uint64_t</type>)
           <literal>r-</literal>
           [<option>--enable-stats</option>]
         </term>
-        <listitem><para>Number of <function>madvise(<parameter>...</parameter>
-        <parameter><constant>MADV_DONTNEED</constant></parameter>)</function> or
-        similar calls made to purge dirty pages.</para></listitem>
+        <listitem><para>Number of <function>madvise()</function> or similar
+        calls made to purge dirty pages.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="stats.arenas.i.purged">
+      <varlistentry id="stats.arenas.i.dirty_purged">
         <term>
-          <mallctl>stats.arenas.&lt;i&gt;.purged</mallctl>
+          <mallctl>stats.arenas.&lt;i&gt;.dirty_purged</mallctl>
           (<type>uint64_t</type>)
           <literal>r-</literal>
           [<option>--enable-stats</option>]
         </term>
-        <listitem><para>Number of pages purged.</para></listitem>
+        <listitem><para>Number of dirty pages purged.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="stats.arenas.i.muzzy_npurge">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.muzzy_npurge</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Number of muzzy page purge sweeps performed.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry id="stats.arenas.i.muzzy_nmadvise">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.muzzy_nmadvise</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Number of <function>madvise()</function> or similar
+        calls made to purge muzzy pages.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="stats.arenas.i.muzzy_purged">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.muzzy_purged</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Number of muzzy pages purged.</para></listitem>
       </varlistentry>
 
       <varlistentry id="stats.arenas.i.small.allocated">
diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 349bae99..9603d74f 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -9,7 +9,8 @@ static const size_t	large_pad =
 #endif
     ;
 
-extern ssize_t		opt_decay_time;
+extern ssize_t		opt_dirty_decay_time;
+extern ssize_t		opt_muzzy_decay_time;
 
 extern const arena_bin_info_t	arena_bin_info[NBINS];
 
@@ -22,13 +23,13 @@ void arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
 void arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
     size_t size);
 void	arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
-    unsigned *nthreads, const char **dss, ssize_t *decay_time, size_t *nactive,
-    size_t *ndirty);
+    unsigned *nthreads, const char **dss, ssize_t *dirty_decay_time,
+    ssize_t *muzzy_decay_time, size_t *nactive, size_t *ndirty, size_t *nmuzzy);
 void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
-    const char **dss, ssize_t *decay_time, size_t *nactive, size_t *ndirty,
-    arena_stats_t *astats, malloc_bin_stats_t *bstats,
-    malloc_large_stats_t *lstats);
-void	arena_extent_cache_dalloc(tsdn_t *tsdn, arena_t *arena,
+    const char **dss, ssize_t *dirty_decay_time, ssize_t *muzzy_decay_time,
+    size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
+    malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats);
+void	arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent);
 #ifdef JEMALLOC_JET
 size_t	arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr);
@@ -41,9 +42,13 @@ void	arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena,
     extent_t *extent, size_t oldsize);
 void	arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena,
     extent_t *extent, size_t oldsize);
-ssize_t	arena_decay_time_get(arena_t *arena);
-bool	arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time);
-void	arena_purge(tsdn_t *tsdn, arena_t *arena, bool all);
+ssize_t arena_dirty_decay_time_get(arena_t *arena);
+bool arena_dirty_decay_time_set(tsdn_t *tsdn, arena_t *arena,
+    ssize_t decay_time);
+ssize_t arena_muzzy_decay_time_get(arena_t *arena);
+bool arena_muzzy_decay_time_set(tsdn_t *tsdn, arena_t *arena,
+    ssize_t decay_time);
+void arena_decay(tsdn_t *tsdn, arena_t *arena, bool all);
 void	arena_reset(tsd_t *tsd, arena_t *arena);
 void	arena_destroy(tsd_t *tsd, arena_t *arena);
 void	arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena,
@@ -74,8 +79,10 @@ void	*arena_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr,
     size_t oldsize, size_t size, size_t alignment, bool zero, tcache_t *tcache);
 dss_prec_t	arena_dss_prec_get(arena_t *arena);
 bool	arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
-ssize_t	arena_decay_time_default_get(void);
-bool	arena_decay_time_default_set(ssize_t decay_time);
+ssize_t arena_dirty_decay_time_default_get(void);
+bool arena_dirty_decay_time_default_set(ssize_t decay_time);
+ssize_t arena_muzzy_decay_time_default_get(void);
+bool arena_muzzy_decay_time_default_set(ssize_t decay_time);
 unsigned	arena_nthreads_get(arena_t *arena, bool internal);
 void	arena_nthreads_inc(arena_t *arena, bool internal);
 void	arena_nthreads_dec(arena_t *arena, bool internal);
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 275866a4..b718451b 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -75,13 +75,14 @@ arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) {
 		return;
 	}
 	if (unlikely(ticker_ticks(decay_ticker, nticks))) {
-		arena_purge(tsdn, arena, false);
+		arena_decay(tsdn, arena, false);
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
 arena_decay_tick(tsdn_t *tsdn, arena_t *arena) {
-	malloc_mutex_assert_not_owner(tsdn, &arena->decay.mtx);
+	malloc_mutex_assert_not_owner(tsdn, &arena->decay_dirty.mtx);
+	malloc_mutex_assert_not_owner(tsdn, &arena->decay_muzzy.mtx);
 
 	arena_decay_ticks(tsdn, arena, 1);
 }
diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index 612b4e7d..a5191d16 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -48,10 +48,8 @@ struct arena_decay_s {
 	 * Approximate time in seconds from the creation of a set of unused
 	 * dirty pages until an equivalent set of unused dirty pages is purged
 	 * and/or reused.
-	 *
-	 * Synchronization: atomic.
 	 */
-	ssize_t			time;
+	atomic_zd_t		time;
 	/* time / SMOOTHSTEP_NSTEPS. */
 	nstime_t		interval;
 	/*
@@ -73,10 +71,10 @@ struct arena_decay_s {
 	 */
 	nstime_t		deadline;
 	/*
-	 * Number of dirty pages at beginning of current epoch.  During epoch
-	 * advancement we use the delta between arena->decay.ndirty and
-	 * extents_npages_get(&arena->extents_cached) to determine how many
-	 * dirty pages, if any, were generated.
+	 * Number of unpurged pages at beginning of current epoch.  During epoch
+	 * advancement we use the delta between arena->decay_*.nunpurged and
+	 * extents_npages_get(&arena->extents_*) to determine how many dirty
+	 * pages, if any, were generated.
 	 */
 	size_t			nunpurged;
 	/*
@@ -86,6 +84,14 @@ struct arena_decay_s {
 	 * relative to epoch.
 	 */
 	size_t			backlog[SMOOTHSTEP_NSTEPS];
+
+	/*
+	 * Pointer to associated stats.  These stats are embedded directly in
+	 * the arena's stats due to how stats structures are shared between the
+	 * arena and ctl code.
+	 *
+	 * Synchronization: Same as associated arena's stats field. */
+	decay_stats_t		*stats;
 };
 
 struct arena_bin_s {
@@ -194,15 +200,18 @@ struct arena_s {
 	 *
 	 * Synchronization: internal.
 	 */
-	extents_t		extents_cached;
+	extents_t		extents_dirty;
+	extents_t		extents_muzzy;
 	extents_t		extents_retained;
 
 	/*
-	 * Decay-based purging state.
+	 * Decay-based purging state, responsible for scheduling extent state
+	 * transitions.
 	 *
 	 * Synchronization: internal.
 	 */
-	arena_decay_t		decay;
+	arena_decay_t		decay_dirty; /* dirty --> muzzy */
+	arena_decay_t		decay_muzzy; /* muzzy --> retained */
 
 	/*
 	 * Next extent size class in a growing series to use when satisfying a
diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h
index 067c9ee9..ba53c408 100644
--- a/include/jemalloc/internal/arena_types.h
+++ b/include/jemalloc/internal/arena_types.h
@@ -7,8 +7,9 @@
 #define LG_SLAB_MAXREGS		(LG_PAGE - LG_TINY_MIN)
 #define SLAB_MAXREGS		(1U << LG_SLAB_MAXREGS)
 
-/* Default decay time in seconds. */
-#define DECAY_TIME_DEFAULT	10
+/* Default decay times in seconds. */
+#define DIRTY_DECAY_TIME_DEFAULT	10
+#define MUZZY_DECAY_TIME_DEFAULT	10
 /* Number of event ticks between time checks. */
 #define DECAY_NTICKS_PER_UPDATE	1000
 
diff --git a/include/jemalloc/internal/ctl_structs.h b/include/jemalloc/internal/ctl_structs.h
index 18806a59..4df43d90 100644
--- a/include/jemalloc/internal/ctl_structs.h
+++ b/include/jemalloc/internal/ctl_structs.h
@@ -51,9 +51,11 @@ struct ctl_arena_s {
 	/* Basic stats, supported even if !config_stats. */
 	unsigned		nthreads;
 	const char		*dss;
-	ssize_t			decay_time;
+	ssize_t			dirty_decay_time;
+	ssize_t			muzzy_decay_time;
 	size_t			pactive;
 	size_t			pdirty;
+	size_t			pmuzzy;
 
 	/* NULL if !config_stats. */
 	ctl_arena_stats_t	*astats;
diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index ef2467e1..e8f632f8 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -21,20 +21,21 @@ bool extents_init(tsdn_t *tsdn, extents_t *extents, extent_state_t state,
     bool delay_coalesce);
 extent_state_t extents_state_get(const extents_t *extents);
 size_t extents_npages_get(extents_t *extents);
+extent_t *extents_alloc(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extents_t *extents, void *new_addr,
+    size_t usize, size_t pad, size_t alignment, bool *zero, bool *commit,
+    bool slab);
+void extents_dalloc(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extents_t *extents, extent_t *extent);
 extent_t *extents_evict(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extents_t *extents, size_t npages_min);
 void extents_prefork(tsdn_t *tsdn, extents_t *extents);
 void extents_postfork_parent(tsdn_t *tsdn, extents_t *extents);
 void extents_postfork_child(tsdn_t *tsdn, extents_t *extents);
-extent_t	*extent_alloc_cache(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
-    size_t alignment, bool *zero, bool *commit, bool slab);
 extent_t	*extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
     size_t alignment, bool *zero, bool *commit, bool slab);
 void	extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
-void	extent_dalloc_cache(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent);
 bool	extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent);
 void	extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index c14aef86..001b7c13 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -4,7 +4,8 @@
 typedef enum {
 	extent_state_active   = 0,
 	extent_state_dirty    = 1,
-	extent_state_retained = 2
+	extent_state_muzzy    = 2,
+	extent_state_retained = 3
 } extent_state_t;
 
 /* Extent (span of pages).  Use accessor functions for e_* fields. */
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index c0211e58..5ca72818 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -15,21 +15,26 @@ arena_dalloc_bin_junked_locked
 arena_dalloc_junk_small
 arena_dalloc_promoted
 arena_dalloc_small
+arena_decay
 arena_decay_tick
 arena_decay_ticks
-arena_decay_time_default_get
-arena_decay_time_default_set
-arena_decay_time_get
-arena_decay_time_set
+arena_dirty_decay_time_default_get
+arena_dirty_decay_time_default_set
+arena_dirty_decay_time_get
+arena_dirty_decay_time_set
+arena_muzzy_decay_time_default_get
+arena_muzzy_decay_time_default_set
+arena_muzzy_decay_time_get
+arena_muzzy_decay_time_set
 arena_destroy
 arena_dss_prec_get
 arena_dss_prec_set
 arena_extent_alloc_large
-arena_extent_cache_dalloc
 arena_extent_dalloc_large_prep
 arena_extent_ralloc_large_expand
 arena_extent_ralloc_large_shrink
 arena_extent_sn_next
+arena_extents_dirty_dalloc
 arena_get
 arena_ichoose
 arena_ind_get
@@ -59,7 +64,6 @@ arena_prof_promote
 arena_prof_tctx_get
 arena_prof_tctx_reset
 arena_prof_tctx_set
-arena_purge
 arena_ralloc
 arena_ralloc_no_move
 arena_reset
@@ -138,7 +142,6 @@ extent_commit_wrapper
 extent_committed_get
 extent_committed_set
 extent_dalloc
-extent_dalloc_cache
 extent_dalloc_gap
 extent_dalloc_mmap
 extent_dalloc_wrapper
@@ -192,6 +195,8 @@ extent_usize_get
 extent_usize_set
 extent_zeroed_get
 extent_zeroed_set
+extents_alloc
+extents_dalloc
 extents_evict
 extents_init
 extents_npages_get
@@ -299,7 +304,8 @@ nstime_sec
 nstime_subtract
 nstime_update
 opt_abort
-opt_decay_time
+opt_dirty_decay_time
+opt_muzzy_decay_time
 opt_dss
 opt_junk
 opt_junk_alloc
diff --git a/include/jemalloc/internal/stats_structs.h b/include/jemalloc/internal/stats_structs.h
index 4e9c898a..ffcb3c18 100644
--- a/include/jemalloc/internal/stats_structs.h
+++ b/include/jemalloc/internal/stats_structs.h
@@ -77,6 +77,15 @@ struct malloc_large_stats_s {
 	size_t		curlextents; /* Derived. */
 };
 
+struct decay_stats_s {
+	/* Total number of purge sweeps. */
+	arena_stats_u64_t	npurge;
+	/* Total number of madvise calls made. */
+	arena_stats_u64_t	nmadvise;
+	/* Total number of pages purged. */
+	arena_stats_u64_t	purged;
+};
+
 /*
  * Arena stats.  Note that fields marked "derived" are not directly maintained
  * within the arena code; rather their values are derived during stats merge
@@ -84,7 +93,7 @@ struct malloc_large_stats_s {
  */
 struct arena_stats_s {
 #ifndef JEMALLOC_ATOMIC_U64
-	malloc_mutex_t	mtx;
+	malloc_mutex_t		mtx;
 #endif
 
 	/* Number of bytes currently mapped, excluding retained memory. */
@@ -98,14 +107,8 @@ struct arena_stats_s {
 	 */
 	atomic_zu_t		retained; /* Derived. */
 
-	/*
-	 * Total number of purge sweeps, total number of madvise calls made,
-	 * and total pages purged in order to keep dirty unused memory under
-	 * control.
-	 */
-	arena_stats_u64_t	npurge;
-	arena_stats_u64_t	nmadvise;
-	arena_stats_u64_t	purged;
+	decay_stats_t		decay_dirty;
+	decay_stats_t		decay_muzzy;
 
 	atomic_zu_t		base; /* Derived. */
 	atomic_zu_t		internal;
diff --git a/include/jemalloc/internal/stats_types.h b/include/jemalloc/internal/stats_types.h
index f202b231..48483388 100644
--- a/include/jemalloc/internal/stats_types.h
+++ b/include/jemalloc/internal/stats_types.h
@@ -4,6 +4,7 @@
 typedef struct tcache_bin_stats_s tcache_bin_stats_t;
 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
 typedef struct malloc_large_stats_s malloc_large_stats_t;
+typedef struct decay_stats_s decay_stats_t;
 typedef struct arena_stats_s arena_stats_t;
 
 #endif /* JEMALLOC_INTERNAL_STATS_TYPES_H */
diff --git a/src/arena.c b/src/arena.c
index c253760b..d861fff6 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -13,8 +13,10 @@ const char	*percpu_arena_mode_names[] = {
 const char	*opt_percpu_arena = OPT_PERCPU_ARENA_DEFAULT;
 percpu_arena_mode_t	percpu_arena_mode = PERCPU_ARENA_MODE_DEFAULT;
 
-ssize_t		opt_decay_time = DECAY_TIME_DEFAULT;
-static ssize_t	decay_time_default;
+ssize_t		opt_dirty_decay_time = DIRTY_DECAY_TIME_DEFAULT;
+ssize_t		opt_muzzy_decay_time = MUZZY_DECAY_TIME_DEFAULT;
+static ssize_t	dirty_decay_time_default;
+static ssize_t	muzzy_decay_time_default;
 
 const arena_bin_info_t	arena_bin_info[NBINS] = {
 #define BIN_INFO_bin_yes(reg_size, slab_size, nregs)			\
@@ -37,12 +39,13 @@ const arena_bin_info_t	arena_bin_info[NBINS] = {
  * definition.
  */
 
-static void	arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena,
-    arena_decay_t *decay, extents_t *extents, size_t ndirty_limit);
-static void	arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena,
-    extent_t *slab, arena_bin_t *bin);
-static void	arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena,
-    extent_t *slab, arena_bin_t *bin);
+static void arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena,
+    arena_decay_t *decay, extents_t *extents, bool all, size_t npages_limit);
+static void arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool all);
+static void arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
+    arena_bin_t *bin);
+static void arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
+    arena_bin_t *bin);
 
 /******************************************************************************/
 
@@ -50,7 +53,7 @@ static bool
 arena_stats_init(tsdn_t *tsdn, arena_stats_t *arena_stats) {
 	if (config_debug) {
 		for (size_t i = 0; i < sizeof(arena_stats_t); i++) {
-			assert(((char *)arena_stats)[0] == 0);
+			assert(((char *)arena_stats)[i] == 0);
 		}
 	}
 #ifndef JEMALLOC_ATOMIC_U64
@@ -187,23 +190,27 @@ arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats, size_t size) {
 
 void
 arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
-    const char **dss, ssize_t *decay_time, size_t *nactive, size_t *ndirty) {
+    const char **dss, ssize_t *dirty_decay_time, ssize_t *muzzy_decay_time,
+    size_t *nactive, size_t *ndirty,
+    size_t *nmuzzy) {
 	*nthreads += arena_nthreads_get(arena, false);
 	*dss = dss_prec_names[arena_dss_prec_get(arena)];
-	*decay_time = arena_decay_time_get(arena);
+	*dirty_decay_time = arena_dirty_decay_time_get(arena);
+	*muzzy_decay_time = arena_muzzy_decay_time_get(arena);
 	*nactive += atomic_read_zu(&arena->nactive);
-	*ndirty += extents_npages_get(&arena->extents_cached);
+	*ndirty += extents_npages_get(&arena->extents_dirty);
+	*nmuzzy += extents_npages_get(&arena->extents_muzzy);
 }
 
 void
 arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
-    const char **dss, ssize_t *decay_time, size_t *nactive, size_t *ndirty,
-    arena_stats_t *astats, malloc_bin_stats_t *bstats,
-    malloc_large_stats_t *lstats) {
+    const char **dss, ssize_t *dirty_decay_time, ssize_t *muzzy_decay_time,
+    size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
+    malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats) {
 	cassert(config_stats);
 
-	arena_basic_stats_merge(tsdn, arena, nthreads, dss, decay_time,
-	    nactive, ndirty);
+	arena_basic_stats_merge(tsdn, arena, nthreads, dss, dirty_decay_time,
+	    muzzy_decay_time, nactive, ndirty, nmuzzy);
 
 	size_t base_allocated, base_resident, base_mapped;
 	base_stats_get(tsdn, arena->base, &base_allocated, &base_resident,
@@ -215,17 +222,33 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	    + arena_stats_read_zu(tsdn, &arena->stats, &arena->stats.mapped));
 	arena_stats_accum_zu(&astats->retained,
 	    extents_npages_get(&arena->extents_retained) << LG_PAGE);
-	arena_stats_accum_u64(&astats->npurge, arena_stats_read_u64(tsdn,
-	    &arena->stats, &arena->stats.npurge));
-	arena_stats_accum_u64(&astats->nmadvise, arena_stats_read_u64(tsdn,
-	    &arena->stats, &arena->stats.nmadvise));
-	arena_stats_accum_u64(&astats->purged, arena_stats_read_u64(tsdn,
-	    &arena->stats, &arena->stats.purged));
+
+	arena_stats_accum_u64(&astats->decay_dirty.npurge,
+	    arena_stats_read_u64(tsdn, &arena->stats,
+	    &arena->stats.decay_dirty.npurge));
+	arena_stats_accum_u64(&astats->decay_dirty.nmadvise,
+	    arena_stats_read_u64(tsdn, &arena->stats,
+	    &arena->stats.decay_dirty.nmadvise));
+	arena_stats_accum_u64(&astats->decay_dirty.purged,
+	    arena_stats_read_u64(tsdn, &arena->stats,
+	    &arena->stats.decay_dirty.purged));
+
+	arena_stats_accum_u64(&astats->decay_muzzy.npurge,
+	    arena_stats_read_u64(tsdn, &arena->stats,
+	    &arena->stats.decay_muzzy.npurge));
+	arena_stats_accum_u64(&astats->decay_muzzy.nmadvise,
+	    arena_stats_read_u64(tsdn, &arena->stats,
+	    &arena->stats.decay_muzzy.nmadvise));
+	arena_stats_accum_u64(&astats->decay_muzzy.purged,
+	    arena_stats_read_u64(tsdn, &arena->stats,
+	    &arena->stats.decay_muzzy.purged));
+
 	arena_stats_accum_zu(&astats->base, base_allocated);
 	arena_stats_accum_zu(&astats->internal, arena_internal_get(arena));
-	arena_stats_accum_zu(&astats->resident, base_resident
-	    + (((atomic_read_zu(&arena->nactive)
-	    + extents_npages_get(&arena->extents_cached)) << LG_PAGE)));
+	arena_stats_accum_zu(&astats->resident, base_resident +
+	    (((atomic_read_zu(&arena->nactive) +
+	    extents_npages_get(&arena->extents_dirty) +
+	    extents_npages_get(&arena->extents_muzzy)) << LG_PAGE)));
 
 	for (szind_t i = 0; i < NSIZES - NBINS; i++) {
 		uint64_t nmalloc = arena_stats_read_u64(tsdn, &arena->stats,
@@ -292,13 +315,14 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 }
 
 void
-arena_extent_cache_dalloc(tsdn_t *tsdn, arena_t *arena,
+arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent) {
 	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 
-	extent_dalloc_cache(tsdn, arena, r_extent_hooks, extent);
-	if (arena_decay_time_get(arena) == 0) {
-		arena_purge(tsdn, arena, true);
+	extents_dalloc(tsdn, arena, r_extent_hooks, &arena->extents_dirty,
+	    extent);
+	if (arena_dirty_decay_time_get(arena) == 0) {
+		arena_decay_dirty(tsdn, arena, true);
 	}
 }
 
@@ -432,8 +456,14 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 
 	size_t mapped_add;
 	bool commit = true;
-	extent = extent_alloc_cache(tsdn, arena, &extent_hooks, NULL, usize,
-	    large_pad, alignment, zero, &commit, false);
+	extent = extents_alloc(tsdn, arena, &extent_hooks,
+	    &arena->extents_dirty, NULL, usize, large_pad, alignment, zero,
+	    &commit, false);
+	if (extent == NULL) {
+		extent = extents_alloc(tsdn, arena, &extent_hooks,
+		    &arena->extents_muzzy, NULL, usize, large_pad, alignment,
+		    zero, &commit, false);
+	}
 	size_t size = usize + large_pad;
 	if (extent == NULL) {
 		extent = extent_alloc_wrapper(tsdn, arena, &extent_hooks, NULL,
@@ -507,12 +537,12 @@ arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 
 static ssize_t
 arena_decay_time_read(arena_decay_t *decay) {
-	return atomic_read_zd(&decay->time);
+	return atomic_load_zd(&decay->time, ATOMIC_RELAXED);
 }
 
 static void
 arena_decay_time_write(arena_decay_t *decay, ssize_t decay_time) {
-	atomic_write_zd(&decay->time, decay_time);
+	atomic_store_zd(&decay->time, decay_time, ATOMIC_RELAXED);
 }
 
 static void
@@ -621,10 +651,11 @@ arena_decay_epoch_advance_helper(arena_decay_t *decay, extents_t *extents,
 static void
 arena_decay_epoch_advance_purge(tsdn_t *tsdn, arena_t *arena,
     arena_decay_t *decay, extents_t *extents) {
-	size_t ndirty_limit = arena_decay_backlog_npages_limit(decay);
+	size_t npages_limit = arena_decay_backlog_npages_limit(decay);
 
-	if (extents_npages_get(extents) > ndirty_limit) {
-		arena_purge_to_limit(tsdn, arena, decay, extents, ndirty_limit);
+	if (extents_npages_get(extents) > npages_limit) {
+		arena_decay_to_limit(tsdn, arena, decay, extents, false,
+		    npages_limit);
 	}
 	/*
 	 * There may be concurrent ndirty fluctuation between the purge above
@@ -659,12 +690,22 @@ arena_decay_reinit(arena_decay_t *decay, extents_t *extents,
 }
 
 static bool
-arena_decay_init(arena_decay_t *decay, extents_t *extents, ssize_t decay_time) {
+arena_decay_init(arena_decay_t *decay, extents_t *extents, ssize_t decay_time,
+    decay_stats_t *stats) {
+	if (config_debug) {
+		for (size_t i = 0; i < sizeof(arena_decay_t); i++) {
+			assert(((char *)decay)[i] == 0);
+		}
+	}
 	if (malloc_mutex_init(&decay->mtx, "decay", WITNESS_RANK_DECAY)) {
 		return true;
 	}
 	decay->purging = false;
 	arena_decay_reinit(decay, extents, decay_time);
+	/* Memory is zeroed, so there is no need to clear stats. */
+	if (config_stats) {
+		decay->stats = stats;
+	}
 	return false;
 }
 
@@ -680,7 +721,7 @@ arena_decay_time_valid(ssize_t decay_time) {
 }
 
 static void
-arena_maybe_purge(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
+arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
     extents_t *extents) {
 	malloc_mutex_assert_owner(tsdn, &decay->mtx);
 
@@ -688,7 +729,8 @@ arena_maybe_purge(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	ssize_t decay_time = arena_decay_time_read(decay);
 	if (decay_time <= 0) {
 		if (decay_time == 0) {
-			arena_purge_to_limit(tsdn, arena, decay, extents, 0);
+			arena_decay_to_limit(tsdn, arena, decay, extents, false,
+			    0);
 		}
 		return;
 	}
@@ -725,18 +767,29 @@ arena_maybe_purge(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	}
 }
 
-ssize_t
-arena_decay_time_get(arena_t *arena) {
-	return arena_decay_time_read(&arena->decay);
+static ssize_t
+arena_decay_time_get(arena_decay_t *decay) {
+	return arena_decay_time_read(decay);
 }
 
-bool
-arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time) {
+ssize_t
+arena_dirty_decay_time_get(arena_t *arena) {
+	return arena_decay_time_get(&arena->decay_dirty);
+}
+
+ssize_t
+arena_muzzy_decay_time_get(arena_t *arena) {
+	return arena_decay_time_get(&arena->decay_muzzy);
+}
+
+static bool
+arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
+    extents_t *extents, ssize_t decay_time) {
 	if (!arena_decay_time_valid(decay_time)) {
 		return true;
 	}
 
-	malloc_mutex_lock(tsdn, &arena->decay.mtx);
+	malloc_mutex_lock(tsdn, &decay->mtx);
 	/*
 	 * Restart decay backlog from scratch, which may cause many dirty pages
 	 * to be immediately purged.  It would conceptually be possible to map
@@ -745,58 +798,100 @@ arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time) {
 	 * infrequent, either between the {-1, 0, >0} states, or a one-time
 	 * arbitrary change during initial arena configuration.
 	 */
-	arena_decay_reinit(&arena->decay, &arena->extents_cached, decay_time);
-	arena_maybe_purge(tsdn, arena, &arena->decay, &arena->extents_cached);
-	malloc_mutex_unlock(tsdn, &arena->decay.mtx);
+	arena_decay_reinit(decay, extents, decay_time);
+	arena_maybe_decay(tsdn, arena, decay, extents);
+	malloc_mutex_unlock(tsdn, &decay->mtx);
 
 	return false;
 }
 
+bool
+arena_dirty_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time) {
+	return arena_decay_time_set(tsdn, arena, &arena->decay_dirty,
+	    &arena->extents_dirty, decay_time);
+}
+
+bool
+arena_muzzy_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time) {
+	return arena_decay_time_set(tsdn, arena, &arena->decay_muzzy,
+	    &arena->extents_muzzy, decay_time);
+}
+
 static size_t
-arena_stash_dirty(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    extents_t *extents, size_t ndirty_limit, extent_list_t *purge_extents) {
+arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extents_t *extents, size_t npages_limit,
+    extent_list_t *decay_extents) {
 	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 
-	/* Stash extents according to ndirty_limit. */
+	/* Stash extents according to npages_limit. */
 	size_t nstashed = 0;
 	extent_t *extent;
 	while ((extent = extents_evict(tsdn, arena, r_extent_hooks, extents,
-	    ndirty_limit)) != NULL) {
-		extent_list_append(purge_extents, extent);
+	    npages_limit)) != NULL) {
+		extent_list_append(decay_extents, extent);
 		nstashed += extent_size_get(extent) >> LG_PAGE;
 	}
 	return nstashed;
 }
 
 static size_t
-arena_purge_stashed(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_list_t *purge_extents) {
-	UNUSED size_t nmadvise;
+arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, arena_decay_t *decay, extents_t *extents,
+    bool all, extent_list_t *decay_extents) {
+	UNUSED size_t nmadvise, nunmapped;
 	size_t npurged;
 
 	if (config_stats) {
 		nmadvise = 0;
+		nunmapped = 0;
 	}
 	npurged = 0;
 
-	for (extent_t *extent = extent_list_first(purge_extents); extent !=
-	    NULL; extent = extent_list_first(purge_extents)) {
+	ssize_t muzzy_decay_time = arena_muzzy_decay_time_get(arena);
+	for (extent_t *extent = extent_list_first(decay_extents); extent !=
+	    NULL; extent = extent_list_first(decay_extents)) {
 		if (config_stats) {
 			nmadvise++;
 		}
-		npurged += extent_size_get(extent) >> LG_PAGE;
-		extent_list_remove(purge_extents, extent);
-		extent_dalloc_wrapper(tsdn, arena, r_extent_hooks, extent);
+		size_t npages = extent_size_get(extent) >> LG_PAGE;
+		npurged += npages;
+		extent_list_remove(decay_extents, extent);
+		switch (extents_state_get(extents)) {
+		case extent_state_active:
+			not_reached();
+		case extent_state_dirty:
+			if (!all && muzzy_decay_time != 0 &&
+			    !extent_purge_lazy_wrapper(tsdn, arena,
+			    r_extent_hooks, extent, 0,
+			    extent_size_get(extent))) {
+				extents_dalloc(tsdn, arena, r_extent_hooks,
+				    &arena->extents_muzzy, extent);
+				break;
+			}
+			/* Fall through. */
+		case extent_state_muzzy:
+			extent_dalloc_wrapper(tsdn, arena, r_extent_hooks,
+			    extent);
+			if (config_stats) {
+				nunmapped += npages;
+			}
+			break;
+		case extent_state_retained:
+		default:
+			not_reached();
+		}
 	}
 
 	if (config_stats) {
 		arena_stats_lock(tsdn, &arena->stats);
-		arena_stats_add_u64(tsdn, &arena->stats, &arena->stats.nmadvise,
-		    nmadvise);
-		arena_stats_add_u64(tsdn, &arena->stats, &arena->stats.purged,
+		arena_stats_add_u64(tsdn, &arena->stats, &decay->stats->npurge,
+		    1);
+		arena_stats_add_u64(tsdn, &arena->stats,
+		    &decay->stats->nmadvise, nmadvise);
+		arena_stats_add_u64(tsdn, &arena->stats, &decay->stats->purged,
 		    npurged);
 		arena_stats_sub_zu(tsdn, &arena->stats, &arena->stats.mapped,
-		    npurged << LG_PAGE);
+		    nunmapped);
 		arena_stats_unlock(tsdn, &arena->stats);
 	}
 
@@ -804,12 +899,12 @@ arena_purge_stashed(tsdn_t *tsdn, arena_t *arena,
 }
 
 /*
- * ndirty_limit: Purge as many dirty extents as possible without violating the
- * invariant: (extents_npages_get(extents) >= ndirty_limit)
+ * npages_limit: Decay as many dirty extents as possible without violating the
+ * invariant: (extents_npages_get(extents) >= npages_limit)
  */
 static void
-arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    extents_t *extents, size_t ndirty_limit) {
+arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
+    extents_t *extents, bool all, size_t npages_limit) {
 	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 1);
 	malloc_mutex_assert_owner(tsdn, &decay->mtx);
 
@@ -817,49 +912,53 @@ arena_purge_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 		return;
 	}
 	decay->purging = true;
-
-	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
-	size_t npurge, npurged;
-	extent_list_t purge_extents;
-
-	extent_list_init(&purge_extents);
-
 	malloc_mutex_unlock(tsdn, &decay->mtx);
 
-	npurge = arena_stash_dirty(tsdn, arena, &extent_hooks, extents,
-	    ndirty_limit, &purge_extents);
-	if (npurge == 0) {
-		malloc_mutex_lock(tsdn, &decay->mtx);
-		goto label_return;
+	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
+
+	extent_list_t decay_extents;
+	extent_list_init(&decay_extents);
+
+	size_t npurge = arena_stash_decayed(tsdn, arena, &extent_hooks, extents,
+	    npages_limit, &decay_extents);
+	if (npurge != 0) {
+		UNUSED size_t npurged = arena_decay_stashed(tsdn, arena,
+		    &extent_hooks, decay, extents, all, &decay_extents);
+		assert(npurged == npurge);
 	}
-	npurged = arena_purge_stashed(tsdn, arena, &extent_hooks,
-	    &purge_extents);
-	assert(npurged == npurge);
 
 	malloc_mutex_lock(tsdn, &decay->mtx);
-
-	if (config_stats) {
-		arena_stats_lock(tsdn, &arena->stats);
-		arena_stats_add_u64(tsdn, &arena->stats, &arena->stats.npurge,
-		    1);
-		arena_stats_unlock(tsdn, &arena->stats);
-	}
-
-label_return:
 	decay->purging = false;
 }
 
-void
-arena_purge(tsdn_t *tsdn, arena_t *arena, bool all) {
-	malloc_mutex_lock(tsdn, &arena->decay.mtx);
+static void
+arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
+    extents_t *extents, bool all) {
+	malloc_mutex_lock(tsdn, &decay->mtx);
 	if (all) {
-		arena_purge_to_limit(tsdn, arena, &arena->decay,
-		    &arena->extents_cached, 0);
+		arena_decay_to_limit(tsdn, arena, decay, extents, all, 0);
 	} else {
-		arena_maybe_purge(tsdn, arena, &arena->decay,
-		    &arena->extents_cached);
+		arena_maybe_decay(tsdn, arena, decay, extents);
 	}
-	malloc_mutex_unlock(tsdn, &arena->decay.mtx);
+	malloc_mutex_unlock(tsdn, &decay->mtx);
+}
+
+static void
+arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool all) {
+	arena_decay_impl(tsdn, arena, &arena->decay_dirty,
+	    &arena->extents_dirty, all);
+}
+
+static void
+arena_decay_muzzy(tsdn_t *tsdn, arena_t *arena, bool all) {
+	arena_decay_impl(tsdn, arena, &arena->decay_muzzy,
+	    &arena->extents_muzzy, all);
+}
+
+void
+arena_decay(tsdn_t *tsdn, arena_t *arena, bool all) {
+	arena_decay_dirty(tsdn, arena, all);
+	arena_decay_muzzy(tsdn, arena, all);
 }
 
 static void
@@ -867,7 +966,7 @@ arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *slab) {
 	arena_nactive_sub(arena, extent_size_get(slab) >> LG_PAGE);
 
 	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
-	arena_extent_cache_dalloc(tsdn, arena, &extent_hooks, slab);
+	arena_extents_dirty_dalloc(tsdn, arena, &extent_hooks, slab);
 }
 
 static void
@@ -1008,7 +1107,7 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
 	 * Furthermore, the caller (arena_i_destroy_ctl()) purged all cached
 	 * extents, so only retained extents may remain.
 	 */
-	assert(extents_npages_get(&arena->extents_cached) == 0);
+	assert(extents_npages_get(&arena->extents_dirty) == 0);
 
 	/* Attempt to deallocate retained memory. */
 	arena_destroy_retained(tsd_tsdn(tsd), arena);
@@ -1061,8 +1160,14 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
 	bool zero = false;
 	bool commit = true;
-	extent_t *slab = extent_alloc_cache(tsdn, arena, &extent_hooks, NULL,
-	    bin_info->slab_size, 0, PAGE, &zero, &commit, true);
+	extent_t *slab = extents_alloc(tsdn, arena, &extent_hooks,
+	    &arena->extents_dirty, NULL, bin_info->slab_size, 0, PAGE, &zero,
+	    &commit, true);
+	if (slab == NULL) {
+		slab = extents_alloc(tsdn, arena, &extent_hooks,
+		    &arena->extents_muzzy, NULL, bin_info->slab_size, 0, PAGE,
+		    &zero, &commit, true);
+	}
 	if (slab == NULL) {
 		slab = arena_slab_alloc_hard(tsdn, arena, &extent_hooks,
 		    bin_info);
@@ -1622,16 +1727,32 @@ arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec) {
 }
 
 ssize_t
-arena_decay_time_default_get(void) {
-	return (ssize_t)atomic_read_zu((size_t *)&decay_time_default);
+arena_dirty_decay_time_default_get(void) {
+	return (ssize_t)atomic_read_zu((size_t *)&dirty_decay_time_default);
 }
 
 bool
-arena_decay_time_default_set(ssize_t decay_time) {
+arena_dirty_decay_time_default_set(ssize_t decay_time) {
 	if (!arena_decay_time_valid(decay_time)) {
 		return true;
 	}
-	atomic_write_zu((size_t *)&decay_time_default, (size_t)decay_time);
+	atomic_write_zu((size_t *)&dirty_decay_time_default,
+	    (size_t)decay_time);
+	return false;
+}
+
+ssize_t
+arena_muzzy_decay_time_default_get(void) {
+	return (ssize_t)atomic_read_zu((size_t *)&muzzy_decay_time_default);
+}
+
+bool
+arena_muzzy_decay_time_default_set(ssize_t decay_time) {
+	if (!arena_decay_time_valid(decay_time)) {
+		return true;
+	}
+	atomic_write_zu((size_t *)&muzzy_decay_time_default,
+	    (size_t)decay_time);
 	return false;
 }
 
@@ -1723,28 +1844,40 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	}
 
 	/*
-	 * Delay coalescing for cached extents despite the disruptive effect on
+	 * Delay coalescing for dirty extents despite the disruptive effect on
 	 * memory layout for best-fit extent allocation, since cached extents
 	 * are likely to be reused soon after deallocation, and the cost of
 	 * merging/splitting extents is non-trivial.
 	 */
-	if (extents_init(tsdn, &arena->extents_cached, extent_state_dirty,
+	if (extents_init(tsdn, &arena->extents_dirty, extent_state_dirty,
 	    true)) {
 		goto label_error;
 	}
+	/*
+	 * Coalesce muzzy extents immediately, because operations on them are in
+	 * the critical path much less often than for dirty extents.
+	 */
+	if (extents_init(tsdn, &arena->extents_muzzy, extent_state_muzzy,
+	    false)) {
+		goto label_error;
+	}
 	/*
 	 * Coalesce retained extents immediately, in part because they will
 	 * never be evicted (and therefore there's no opportunity for delayed
 	 * coalescing), but also because operations on retained extents are not
 	 * in the critical path.
 	 */
-	if (extents_init(tsdn, &arena->extents_retained,
-	    extent_state_retained, false)) {
+	if (extents_init(tsdn, &arena->extents_retained, extent_state_retained,
+	    false)) {
 		goto label_error;
 	}
 
-	if (arena_decay_init(&arena->decay, &arena->extents_cached,
-	    arena_decay_time_default_get())) {
+	if (arena_decay_init(&arena->decay_dirty, &arena->extents_dirty,
+	    arena_dirty_decay_time_default_get(), &arena->stats.decay_dirty)) {
+		goto label_error;
+	}
+	if (arena_decay_init(&arena->decay_muzzy, &arena->extents_muzzy,
+	    arena_muzzy_decay_time_default_get(), &arena->stats.decay_muzzy)) {
 		goto label_error;
 	}
 
@@ -1785,12 +1918,14 @@ label_error:
 
 void
 arena_boot(void) {
-	arena_decay_time_default_set(opt_decay_time);
+	arena_dirty_decay_time_default_set(opt_dirty_decay_time);
+	arena_muzzy_decay_time_default_set(opt_muzzy_decay_time);
 }
 
 void
 arena_prefork0(tsdn_t *tsdn, arena_t *arena) {
-	malloc_mutex_prefork(tsdn, &arena->decay.mtx);
+	malloc_mutex_prefork(tsdn, &arena->decay_dirty.mtx);
+	malloc_mutex_prefork(tsdn, &arena->decay_muzzy.mtx);
 }
 
 void
@@ -1802,7 +1937,8 @@ arena_prefork1(tsdn_t *tsdn, arena_t *arena) {
 
 void
 arena_prefork2(tsdn_t *tsdn, arena_t *arena) {
-	extents_prefork(tsdn, &arena->extents_cached);
+	extents_prefork(tsdn, &arena->extents_dirty);
+	extents_prefork(tsdn, &arena->extents_muzzy);
 	extents_prefork(tsdn, &arena->extents_retained);
 }
 
@@ -1838,9 +1974,11 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 	malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
 	base_postfork_parent(tsdn, arena->base);
 	malloc_mutex_postfork_parent(tsdn, &arena->extent_freelist_mtx);
-	extents_postfork_parent(tsdn, &arena->extents_cached);
+	extents_postfork_parent(tsdn, &arena->extents_dirty);
+	extents_postfork_parent(tsdn, &arena->extents_muzzy);
 	extents_postfork_parent(tsdn, &arena->extents_retained);
-	malloc_mutex_postfork_parent(tsdn, &arena->decay.mtx);
+	malloc_mutex_postfork_parent(tsdn, &arena->decay_dirty.mtx);
+	malloc_mutex_postfork_parent(tsdn, &arena->decay_muzzy.mtx);
 	if (config_stats && config_tcache) {
 		malloc_mutex_postfork_parent(tsdn, &arena->tcache_ql_mtx);
 	}
@@ -1856,9 +1994,11 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
 	base_postfork_child(tsdn, arena->base);
 	malloc_mutex_postfork_child(tsdn, &arena->extent_freelist_mtx);
-	extents_postfork_child(tsdn, &arena->extents_cached);
+	extents_postfork_child(tsdn, &arena->extents_dirty);
+	extents_postfork_child(tsdn, &arena->extents_muzzy);
 	extents_postfork_child(tsdn, &arena->extents_retained);
-	malloc_mutex_postfork_child(tsdn, &arena->decay.mtx);
+	malloc_mutex_postfork_child(tsdn, &arena->decay_dirty.mtx);
+	malloc_mutex_postfork_child(tsdn, &arena->decay_muzzy.mtx);
 	if (config_stats && config_tcache) {
 		malloc_mutex_postfork_child(tsdn, &arena->tcache_ql_mtx);
 	}
diff --git a/src/ctl.c b/src/ctl.c
index 70721584..ee69be6d 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -73,7 +73,8 @@ CTL_PROTO(opt_abort)
 CTL_PROTO(opt_dss)
 CTL_PROTO(opt_narenas)
 CTL_PROTO(opt_percpu_arena)
-CTL_PROTO(opt_decay_time)
+CTL_PROTO(opt_dirty_decay_time)
+CTL_PROTO(opt_muzzy_decay_time)
 CTL_PROTO(opt_stats_print)
 CTL_PROTO(opt_junk)
 CTL_PROTO(opt_zero)
@@ -95,12 +96,13 @@ CTL_PROTO(tcache_create)
 CTL_PROTO(tcache_flush)
 CTL_PROTO(tcache_destroy)
 CTL_PROTO(arena_i_initialized)
-CTL_PROTO(arena_i_purge)
 CTL_PROTO(arena_i_decay)
+CTL_PROTO(arena_i_purge)
 CTL_PROTO(arena_i_reset)
 CTL_PROTO(arena_i_destroy)
 CTL_PROTO(arena_i_dss)
-CTL_PROTO(arena_i_decay_time)
+CTL_PROTO(arena_i_dirty_decay_time)
+CTL_PROTO(arena_i_muzzy_decay_time)
 CTL_PROTO(arena_i_extent_hooks)
 INDEX_PROTO(arena_i)
 CTL_PROTO(arenas_bin_i_size)
@@ -110,7 +112,8 @@ INDEX_PROTO(arenas_bin_i)
 CTL_PROTO(arenas_lextent_i_size)
 INDEX_PROTO(arenas_lextent_i)
 CTL_PROTO(arenas_narenas)
-CTL_PROTO(arenas_decay_time)
+CTL_PROTO(arenas_dirty_decay_time)
+CTL_PROTO(arenas_muzzy_decay_time)
 CTL_PROTO(arenas_quantum)
 CTL_PROTO(arenas_page)
 CTL_PROTO(arenas_tcache_max)
@@ -150,14 +153,19 @@ CTL_PROTO(stats_arenas_i_lextents_j_curlextents)
 INDEX_PROTO(stats_arenas_i_lextents_j)
 CTL_PROTO(stats_arenas_i_nthreads)
 CTL_PROTO(stats_arenas_i_dss)
-CTL_PROTO(stats_arenas_i_decay_time)
+CTL_PROTO(stats_arenas_i_dirty_decay_time)
+CTL_PROTO(stats_arenas_i_muzzy_decay_time)
 CTL_PROTO(stats_arenas_i_pactive)
 CTL_PROTO(stats_arenas_i_pdirty)
+CTL_PROTO(stats_arenas_i_pmuzzy)
 CTL_PROTO(stats_arenas_i_mapped)
 CTL_PROTO(stats_arenas_i_retained)
-CTL_PROTO(stats_arenas_i_npurge)
-CTL_PROTO(stats_arenas_i_nmadvise)
-CTL_PROTO(stats_arenas_i_purged)
+CTL_PROTO(stats_arenas_i_dirty_npurge)
+CTL_PROTO(stats_arenas_i_dirty_nmadvise)
+CTL_PROTO(stats_arenas_i_dirty_purged)
+CTL_PROTO(stats_arenas_i_muzzy_npurge)
+CTL_PROTO(stats_arenas_i_muzzy_nmadvise)
+CTL_PROTO(stats_arenas_i_muzzy_purged)
 CTL_PROTO(stats_arenas_i_base)
 CTL_PROTO(stats_arenas_i_internal)
 CTL_PROTO(stats_arenas_i_tcache_bytes)
@@ -231,7 +239,8 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("dss"),		CTL(opt_dss)},
 	{NAME("narenas"),	CTL(opt_narenas)},
 	{NAME("percpu_arena"),	CTL(opt_percpu_arena)},
-	{NAME("decay_time"),	CTL(opt_decay_time)},
+	{NAME("dirty_decay_time"), CTL(opt_dirty_decay_time)},
+	{NAME("muzzy_decay_time"), CTL(opt_muzzy_decay_time)},
 	{NAME("stats_print"),	CTL(opt_stats_print)},
 	{NAME("junk"),		CTL(opt_junk)},
 	{NAME("zero"),		CTL(opt_zero)},
@@ -259,12 +268,13 @@ static const ctl_named_node_t	tcache_node[] = {
 
 static const ctl_named_node_t arena_i_node[] = {
 	{NAME("initialized"),	CTL(arena_i_initialized)},
-	{NAME("purge"),		CTL(arena_i_purge)},
 	{NAME("decay"),		CTL(arena_i_decay)},
+	{NAME("purge"),		CTL(arena_i_purge)},
 	{NAME("reset"),		CTL(arena_i_reset)},
 	{NAME("destroy"),	CTL(arena_i_destroy)},
 	{NAME("dss"),		CTL(arena_i_dss)},
-	{NAME("decay_time"),	CTL(arena_i_decay_time)},
+	{NAME("dirty_decay_time"), CTL(arena_i_dirty_decay_time)},
+	{NAME("muzzy_decay_time"), CTL(arena_i_muzzy_decay_time)},
 	{NAME("extent_hooks"),	CTL(arena_i_extent_hooks)}
 };
 static const ctl_named_node_t super_arena_i_node[] = {
@@ -301,7 +311,8 @@ static const ctl_indexed_node_t arenas_lextent_node[] = {
 
 static const ctl_named_node_t arenas_node[] = {
 	{NAME("narenas"),	CTL(arenas_narenas)},
-	{NAME("decay_time"),	CTL(arenas_decay_time)},
+	{NAME("dirty_decay_time"), CTL(arenas_dirty_decay_time)},
+	{NAME("muzzy_decay_time"), CTL(arenas_muzzy_decay_time)},
 	{NAME("quantum"),	CTL(arenas_quantum)},
 	{NAME("page"),		CTL(arenas_page)},
 	{NAME("tcache_max"),	CTL(arenas_tcache_max)},
@@ -373,14 +384,19 @@ static const ctl_indexed_node_t stats_arenas_i_lextents_node[] = {
 static const ctl_named_node_t stats_arenas_i_node[] = {
 	{NAME("nthreads"),	CTL(stats_arenas_i_nthreads)},
 	{NAME("dss"),		CTL(stats_arenas_i_dss)},
-	{NAME("decay_time"),	CTL(stats_arenas_i_decay_time)},
+	{NAME("dirty_decay_time"), CTL(stats_arenas_i_dirty_decay_time)},
+	{NAME("muzzy_decay_time"), CTL(stats_arenas_i_muzzy_decay_time)},
 	{NAME("pactive"),	CTL(stats_arenas_i_pactive)},
 	{NAME("pdirty"),	CTL(stats_arenas_i_pdirty)},
+	{NAME("pmuzzy"),	CTL(stats_arenas_i_pmuzzy)},
 	{NAME("mapped"),	CTL(stats_arenas_i_mapped)},
 	{NAME("retained"),	CTL(stats_arenas_i_retained)},
-	{NAME("npurge"),	CTL(stats_arenas_i_npurge)},
-	{NAME("nmadvise"),	CTL(stats_arenas_i_nmadvise)},
-	{NAME("purged"),	CTL(stats_arenas_i_purged)},
+	{NAME("dirty_npurge"),	CTL(stats_arenas_i_dirty_npurge)},
+	{NAME("dirty_nmadvise"), CTL(stats_arenas_i_dirty_nmadvise)},
+	{NAME("dirty_purged"),	CTL(stats_arenas_i_dirty_purged)},
+	{NAME("muzzy_npurge"),	CTL(stats_arenas_i_muzzy_npurge)},
+	{NAME("muzzy_nmadvise"), CTL(stats_arenas_i_muzzy_nmadvise)},
+	{NAME("muzzy_purged"),	CTL(stats_arenas_i_muzzy_purged)},
 	{NAME("base"),		CTL(stats_arenas_i_base)},
 	{NAME("internal"),	CTL(stats_arenas_i_internal)},
 	{NAME("tcache_bytes"),	CTL(stats_arenas_i_tcache_bytes)},
@@ -554,9 +570,11 @@ static void
 ctl_arena_clear(ctl_arena_t *ctl_arena) {
 	ctl_arena->nthreads = 0;
 	ctl_arena->dss = dss_prec_names[dss_prec_limit];
-	ctl_arena->decay_time = -1;
+	ctl_arena->dirty_decay_time = -1;
+	ctl_arena->muzzy_decay_time = -1;
 	ctl_arena->pactive = 0;
 	ctl_arena->pdirty = 0;
+	ctl_arena->pmuzzy = 0;
 	if (config_stats) {
 		memset(&ctl_arena->astats->astats, 0, sizeof(arena_stats_t));
 		ctl_arena->astats->allocated_small = 0;
@@ -576,8 +594,9 @@ ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena) {
 
 	if (config_stats) {
 		arena_stats_merge(tsdn, arena, &ctl_arena->nthreads,
-		    &ctl_arena->dss, &ctl_arena->decay_time,
-		    &ctl_arena->pactive, &ctl_arena->pdirty,
+		    &ctl_arena->dss, &ctl_arena->dirty_decay_time,
+		    &ctl_arena->muzzy_decay_time, &ctl_arena->pactive,
+		    &ctl_arena->pdirty, &ctl_arena->pmuzzy,
 		    &ctl_arena->astats->astats, ctl_arena->astats->bstats,
 		    ctl_arena->astats->lstats);
 
@@ -594,8 +613,9 @@ ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena) {
 		}
 	} else {
 		arena_basic_stats_merge(tsdn, arena, &ctl_arena->nthreads,
-		    &ctl_arena->dss, &ctl_arena->decay_time,
-		    &ctl_arena->pactive, &ctl_arena->pdirty);
+		    &ctl_arena->dss, &ctl_arena->dirty_decay_time,
+		    &ctl_arena->muzzy_decay_time, &ctl_arena->pactive,
+		    &ctl_arena->pdirty, &ctl_arena->pmuzzy);
 	}
 }
 
@@ -608,10 +628,12 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 		ctl_sdarena->nthreads += ctl_arena->nthreads;
 		ctl_sdarena->pactive += ctl_arena->pactive;
 		ctl_sdarena->pdirty += ctl_arena->pdirty;
+		ctl_sdarena->pmuzzy += ctl_arena->pmuzzy;
 	} else {
 		assert(ctl_arena->nthreads == 0);
 		assert(ctl_arena->pactive == 0);
 		assert(ctl_arena->pdirty == 0);
+		assert(ctl_arena->pmuzzy == 0);
 	}
 
 	if (config_stats) {
@@ -624,12 +646,20 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 			accum_atomic_zu(&sdstats->astats.retained,
 			    &astats->astats.retained);
 		}
-		accum_arena_stats_u64(&sdstats->astats.npurge,
-		    &astats->astats.npurge);
-		accum_arena_stats_u64(&sdstats->astats.nmadvise,
-		    &astats->astats.nmadvise);
-		accum_arena_stats_u64(&sdstats->astats.purged,
-		    &astats->astats.purged);
+
+		accum_arena_stats_u64(&sdstats->astats.decay_dirty.npurge,
+		    &astats->astats.decay_dirty.npurge);
+		accum_arena_stats_u64(&sdstats->astats.decay_dirty.nmadvise,
+		    &astats->astats.decay_dirty.nmadvise);
+		accum_arena_stats_u64(&sdstats->astats.decay_dirty.purged,
+		    &astats->astats.decay_dirty.purged);
+
+		accum_arena_stats_u64(&sdstats->astats.decay_muzzy.npurge,
+		    &astats->astats.decay_muzzy.npurge);
+		accum_arena_stats_u64(&sdstats->astats.decay_muzzy.nmadvise,
+		    &astats->astats.decay_muzzy.nmadvise);
+		accum_arena_stats_u64(&sdstats->astats.decay_muzzy.purged,
+		    &astats->astats.decay_muzzy.purged);
 
 		if (!destroyed) {
 			accum_atomic_zu(&sdstats->astats.base,
@@ -1340,7 +1370,8 @@ CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
 CTL_RO_NL_GEN(opt_dss, opt_dss, const char *)
 CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned)
 CTL_RO_NL_GEN(opt_percpu_arena, opt_percpu_arena, const char *)
-CTL_RO_NL_GEN(opt_decay_time, opt_decay_time, ssize_t)
+CTL_RO_NL_GEN(opt_dirty_decay_time, opt_dirty_decay_time, ssize_t)
+CTL_RO_NL_GEN(opt_muzzy_decay_time, opt_muzzy_decay_time, ssize_t)
 CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool)
 CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, const char *)
 CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool)
@@ -1630,7 +1661,7 @@ label_return:
 }
 
 static void
-arena_i_purge(tsdn_t *tsdn, unsigned arena_ind, bool all) {
+arena_i_decay(tsdn_t *tsdn, unsigned arena_ind, bool all) {
 	malloc_mutex_lock(tsdn, &ctl_mtx);
 	{
 		unsigned narenas = ctl_arenas->narenas;
@@ -1655,7 +1686,7 @@ arena_i_purge(tsdn_t *tsdn, unsigned arena_ind, bool all) {
 
 			for (i = 0; i < narenas; i++) {
 				if (tarenas[i] != NULL) {
-					arena_purge(tsdn, tarenas[i], all);
+					arena_decay(tsdn, tarenas[i], all);
 				}
 			}
 		} else {
@@ -1669,28 +1700,12 @@ arena_i_purge(tsdn_t *tsdn, unsigned arena_ind, bool all) {
 			malloc_mutex_unlock(tsdn, &ctl_mtx);
 
 			if (tarena != NULL) {
-				arena_purge(tsdn, tarena, all);
+				arena_decay(tsdn, tarena, all);
 			}
 		}
 	}
 }
 
-static int
-arena_i_purge_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
-	unsigned arena_ind;
-
-	READONLY();
-	WRITEONLY();
-	MIB_UNSIGNED(arena_ind, 1);
-	arena_i_purge(tsd_tsdn(tsd), arena_ind, true);
-
-	ret = 0;
-label_return:
-	return ret;
-}
-
 static int
 arena_i_decay_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen) {
@@ -1700,7 +1715,23 @@ arena_i_decay_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	READONLY();
 	WRITEONLY();
 	MIB_UNSIGNED(arena_ind, 1);
-	arena_i_purge(tsd_tsdn(tsd), arena_ind, false);
+	arena_i_decay(tsd_tsdn(tsd), arena_ind, false);
+
+	ret = 0;
+label_return:
+	return ret;
+}
+
+static int
+arena_i_purge_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+	unsigned arena_ind;
+
+	READONLY();
+	WRITEONLY();
+	MIB_UNSIGNED(arena_ind, 1);
+	arena_i_decay(tsd_tsdn(tsd), arena_ind, true);
 
 	ret = 0;
 label_return:
@@ -1773,7 +1804,7 @@ arena_i_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	/* Merge stats after resetting and purging arena. */
 	arena_reset(tsd, arena);
-	arena_purge(tsd_tsdn(tsd), arena, true);
+	arena_decay(tsd_tsdn(tsd), arena, true);
 	ctl_darena = arenas_i(MALLCTL_ARENAS_DESTROYED);
 	ctl_darena->initialized = true;
 	ctl_arena_refresh(tsd_tsdn(tsd), arena, ctl_darena, arena_ind, true);
@@ -1852,8 +1883,8 @@ label_return:
 }
 
 static int
-arena_i_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen) {
+arena_i_decay_time_ctl_impl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen, bool dirty) {
 	int ret;
 	unsigned arena_ind;
 	arena_t *arena;
@@ -1866,7 +1897,8 @@ arena_i_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	}
 
 	if (oldp != NULL && oldlenp != NULL) {
-		size_t oldval = arena_decay_time_get(arena);
+		size_t oldval = dirty ? arena_dirty_decay_time_get(arena) :
+		    arena_muzzy_decay_time_get(arena);
 		READ(oldval, ssize_t);
 	}
 	if (newp != NULL) {
@@ -1874,7 +1906,9 @@ arena_i_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 			ret = EINVAL;
 			goto label_return;
 		}
-		if (arena_decay_time_set(tsd_tsdn(tsd), arena,
+		if (dirty ? arena_dirty_decay_time_set(tsd_tsdn(tsd), arena,
+		    *(ssize_t *)newp) :
+		    arena_muzzy_decay_time_set(tsd_tsdn(tsd), arena,
 		    *(ssize_t *)newp)) {
 			ret = EFAULT;
 			goto label_return;
@@ -1886,6 +1920,20 @@ label_return:
 	return ret;
 }
 
+static int
+arena_i_dirty_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	return arena_i_decay_time_ctl_impl(tsd, mib, miblen, oldp, oldlenp,
+	    newp, newlen, true);
+}
+
+static int
+arena_i_muzzy_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	return arena_i_decay_time_ctl_impl(tsd, mib, miblen, oldp, oldlenp,
+	    newp, newlen, false);
+}
+
 static int
 arena_i_extent_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
@@ -1967,12 +2015,13 @@ label_return:
 }
 
 static int
-arenas_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen) {
+arenas_decay_time_ctl_impl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen, bool dirty) {
 	int ret;
 
 	if (oldp != NULL && oldlenp != NULL) {
-		size_t oldval = arena_decay_time_default_get();
+		size_t oldval = (dirty ? arena_dirty_decay_time_default_get() :
+		    arena_muzzy_decay_time_default_get());
 		READ(oldval, ssize_t);
 	}
 	if (newp != NULL) {
@@ -1980,7 +2029,8 @@ arenas_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 			ret = EINVAL;
 			goto label_return;
 		}
-		if (arena_decay_time_default_set(*(ssize_t *)newp)) {
+		if (dirty ? arena_dirty_decay_time_default_set(*(ssize_t *)newp)
+		    : arena_muzzy_decay_time_default_set(*(ssize_t *)newp)) {
 			ret = EFAULT;
 			goto label_return;
 		}
@@ -1991,6 +2041,20 @@ label_return:
 	return ret;
 }
 
+static int
+arenas_dirty_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	return arenas_decay_time_ctl_impl(tsd, mib, miblen, oldp, oldlenp, newp,
+	    newlen, true);
+}
+
+static int
+arenas_muzzy_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	return arenas_decay_time_ctl_impl(tsd, mib, miblen, oldp, oldlenp, newp,
+	    newlen, false);
+}
+
 CTL_RO_NL_GEN(arenas_quantum, QUANTUM, size_t)
 CTL_RO_NL_GEN(arenas_page, PAGE, size_t)
 CTL_RO_NL_CGEN(config_tcache, arenas_tcache_max, tcache_maxclass, size_t)
@@ -2182,23 +2246,41 @@ CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats->mapped, size_t)
 CTL_RO_CGEN(config_stats, stats_retained, ctl_stats->retained, size_t)
 
 CTL_RO_GEN(stats_arenas_i_dss, arenas_i(mib[2])->dss, const char *)
-CTL_RO_GEN(stats_arenas_i_decay_time, arenas_i(mib[2])->decay_time,
+CTL_RO_GEN(stats_arenas_i_dirty_decay_time, arenas_i(mib[2])->dirty_decay_time,
+    ssize_t)
+CTL_RO_GEN(stats_arenas_i_muzzy_decay_time, arenas_i(mib[2])->muzzy_decay_time,
     ssize_t)
 CTL_RO_GEN(stats_arenas_i_nthreads, arenas_i(mib[2])->nthreads, unsigned)
 CTL_RO_GEN(stats_arenas_i_pactive, arenas_i(mib[2])->pactive, size_t)
 CTL_RO_GEN(stats_arenas_i_pdirty, arenas_i(mib[2])->pdirty, size_t)
+CTL_RO_GEN(stats_arenas_i_pmuzzy, arenas_i(mib[2])->pmuzzy, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_mapped,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.mapped, ATOMIC_RELAXED),
     size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_retained,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.retained, ATOMIC_RELAXED),
     size_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_npurge,
-    arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.npurge), uint64_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_nmadvise,
-    arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.nmadvise), uint64_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_purged,
-    arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.purged), uint64_t)
+
+CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_npurge,
+    arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.decay_dirty.npurge),
+    uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_nmadvise,
+    arena_stats_read_u64(
+    &arenas_i(mib[2])->astats->astats.decay_dirty.nmadvise), uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_purged,
+    arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.decay_dirty.purged),
+    uint64_t)
+
+CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_npurge,
+    arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.decay_muzzy.npurge),
+    uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_nmadvise,
+    arena_stats_read_u64(
+    &arenas_i(mib[2])->astats->astats.decay_muzzy.nmadvise), uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_purged,
+    arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.decay_muzzy.purged),
+    uint64_t)
+
 CTL_RO_CGEN(config_stats, stats_arenas_i_base,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.base, ATOMIC_RELAXED),
     size_t)
@@ -2268,8 +2350,8 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_ndalloc,
     arena_stats_read_u64(&arenas_i(mib[2])->astats->lstats[mib[4]].ndalloc),
     uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_nrequests,
-        arena_stats_read_u64(
-	    &arenas_i(mib[2])->astats->lstats[mib[4]].nrequests), uint64_t)
+    arena_stats_read_u64(&arenas_i(mib[2])->astats->lstats[mib[4]].nrequests),
+    uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_curlextents,
     arenas_i(mib[2])->astats->lstats[mib[4]].curlextents, size_t)
 
diff --git a/src/extent.c b/src/extent.c
index c44ecb89..c690b98e 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -69,6 +69,10 @@ static size_t	highpages;
  */
 
 static void extent_deregister(tsdn_t *tsdn, extent_t *extent);
+static extent_t *extent_recycle(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extents_t *extents, void *new_addr,
+    size_t usize, size_t pad, size_t alignment, bool *zero, bool *commit,
+    bool slab);
 static extent_t *extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
     extent_t *extent, bool *coalesced);
@@ -293,6 +297,31 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena,
 	return false;
 }
 
+extent_t *
+extents_alloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
+    extents_t *extents, void *new_addr, size_t usize, size_t pad,
+    size_t alignment, bool *zero, bool *commit, bool slab) {
+	assert(usize + pad != 0);
+	assert(alignment != 0);
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+
+	return extent_recycle(tsdn, arena, r_extent_hooks, extents, new_addr,
+	    usize, pad, alignment, zero, commit, slab);
+}
+
+void
+extents_dalloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
+    extents_t *extents, extent_t *extent) {
+	assert(extent_base_get(extent) != NULL);
+	assert(extent_size_get(extent) != 0);
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+
+	extent_addr_set(extent, extent_base_get(extent));
+	extent_zeroed_set(extent, false);
+
+	extent_record(tsdn, arena, r_extent_hooks, extents, extent);
+}
+
 extent_t *
 extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
     extents_t *extents, size_t npages_min) {
@@ -340,7 +369,10 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	 * concurrent operations.
 	 */
 	switch (extents_state_get(extents)) {
+	case extent_state_active:
+		not_reached();
 	case extent_state_dirty:
+	case extent_state_muzzy:
 		extent_state_set(extent, extent_state_active);
 		break;
 	case extent_state_retained:
@@ -813,19 +845,6 @@ extent_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 	return NULL;
 }
 
-extent_t *
-extent_alloc_cache(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
-    size_t alignment, bool *zero, bool *commit, bool slab) {
-	assert(usize + pad != 0);
-	assert(alignment != 0);
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
-
-	return extent_recycle(tsdn, arena, r_extent_hooks,
-	    &arena->extents_cached, new_addr, usize, pad, alignment, zero,
-	    commit, slab);
-}
-
 static void *
 extent_alloc_default_impl(tsdn_t *tsdn, arena_t *arena, void *new_addr,
     size_t size, size_t alignment, bool *zero, bool *commit) {
@@ -1206,7 +1225,8 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	assert(extents_state_get(extents) != extent_state_dirty ||
+	assert((extents_state_get(extents) != extent_state_dirty &&
+	    extents_state_get(extents) != extent_state_muzzy) ||
 	    !extent_zeroed_get(extent));
 
 	malloc_mutex_lock(tsdn, &extents->mtx);
@@ -1244,20 +1264,6 @@ extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
 	extent_dalloc_wrapper(tsdn, arena, &extent_hooks, extent);
 }
 
-void
-extent_dalloc_cache(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent) {
-	assert(extent_base_get(extent) != NULL);
-	assert(extent_size_get(extent) != 0);
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
-
-	extent_addr_set(extent, extent_base_get(extent));
-	extent_zeroed_set(extent, false);
-
-	extent_record(tsdn, arena, r_extent_hooks, &arena->extents_cached,
-	    extent);
-}
-
 static bool
 extent_dalloc_default_impl(void *addr, size_t size) {
 	if (!have_dss || !extent_in_dss(addr)) {
@@ -1327,16 +1333,17 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 	} else if (!extent_decommit_wrapper(tsdn, arena, r_extent_hooks, extent,
 	    0, extent_size_get(extent))) {
 		zeroed = true;
-	} else if ((*r_extent_hooks)->purge_lazy != NULL &&
-	    !(*r_extent_hooks)->purge_lazy(*r_extent_hooks,
-	    extent_base_get(extent), extent_size_get(extent), 0,
-	    extent_size_get(extent), arena_ind_get(arena))) {
-		zeroed = false;
 	} else if ((*r_extent_hooks)->purge_forced != NULL &&
 	    !(*r_extent_hooks)->purge_forced(*r_extent_hooks,
 	    extent_base_get(extent), extent_size_get(extent), 0,
 	    extent_size_get(extent), arena_ind_get(arena))) {
 		zeroed = true;
+	} else if (extent_state_get(extent) == extent_state_muzzy ||
+	    ((*r_extent_hooks)->purge_lazy != NULL &&
+	    !(*r_extent_hooks)->purge_lazy(*r_extent_hooks,
+	    extent_base_get(extent), extent_size_get(extent), 0,
+	    extent_size_get(extent), arena_ind_get(arena)))) {
+		zeroed = false;
 	} else {
 		zeroed = false;
 	}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index ce84b3cf..c1b05dbb 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1064,8 +1064,10 @@ malloc_conf_init(void) {
 			}
 			CONF_HANDLE_UNSIGNED(opt_narenas, "narenas", 1,
 			    UINT_MAX, yes, no, false)
-			CONF_HANDLE_SSIZE_T(opt_decay_time, "decay_time", -1,
-			    NSTIME_SEC_MAX);
+			CONF_HANDLE_SSIZE_T(opt_dirty_decay_time,
+			    "dirty_decay_time", -1, NSTIME_SEC_MAX);
+			CONF_HANDLE_SSIZE_T(opt_muzzy_decay_time,
+			    "muzzy_decay_time", -1, NSTIME_SEC_MAX);
 			CONF_HANDLE_BOOL(opt_stats_print, "stats_print", true)
 			if (config_fill) {
 				if (CONF_MATCH("junk")) {
diff --git a/src/large.c b/src/large.c
index 5145f418..c578995c 100644
--- a/src/large.c
+++ b/src/large.c
@@ -125,7 +125,7 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize) {
 			    extent_usize_get(trail));
 		}
 
-		arena_extent_cache_dalloc(tsdn, arena, &extent_hooks, trail);
+		arena_extents_dirty_dalloc(tsdn, arena, &extent_hooks, trail);
 	}
 
 	arena_extent_ralloc_large_shrink(tsdn, arena, extent, oldusize);
@@ -158,9 +158,16 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 	bool commit = true;
 	extent_t *trail;
 	bool new_mapping;
-	if ((trail = extent_alloc_cache(tsdn, arena, &extent_hooks,
-	    extent_past_get(extent), trailsize, 0, CACHELINE, &is_zeroed_trail,
-	    &commit, false)) == NULL) {
+	if ((trail = extents_alloc(tsdn, arena, &extent_hooks,
+	    &arena->extents_dirty, extent_past_get(extent), trailsize, 0,
+	    CACHELINE, &is_zeroed_trail, &commit, false)) != NULL
+	    || (trail = extents_alloc(tsdn, arena, &extent_hooks,
+	    &arena->extents_muzzy, extent_past_get(extent), trailsize, 0,
+	    CACHELINE, &is_zeroed_trail, &commit, false)) != NULL) {
+		if (config_stats) {
+			new_mapping = false;
+		}
+	} else {
 		if ((trail = extent_alloc_wrapper(tsdn, arena, &extent_hooks,
 		    extent_past_get(extent), trailsize, 0, CACHELINE,
 		    &is_zeroed_trail, &commit, false)) == NULL) {
@@ -169,10 +176,6 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 		if (config_stats) {
 			new_mapping = true;
 		}
-	} else {
-		if (config_stats) {
-			new_mapping = false;
-		}
 	}
 
 	if (extent_merge_wrapper(tsdn, arena, &extent_hooks, extent, trail)) {
@@ -327,7 +330,7 @@ large_dalloc_prep_impl(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 static void
 large_dalloc_finish_impl(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
 	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
-	arena_extent_cache_dalloc(tsdn, arena, &extent_hooks, extent);
+	arena_extents_dirty_dalloc(tsdn, arena, &extent_hooks, extent);
 }
 
 void
diff --git a/src/stats.c b/src/stats.c
index 776fb862..58b9a04f 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -259,10 +259,11 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
     bool json, unsigned i, bool bins, bool large) {
 	unsigned nthreads;
 	const char *dss;
-	ssize_t decay_time;
-	size_t page, pactive, pdirty, mapped, retained;
+	ssize_t dirty_decay_time, muzzy_decay_time;
+	size_t page, pactive, pdirty, pmuzzy, mapped, retained;
 	size_t base, internal, resident;
-	uint64_t npurge, nmadvise, purged;
+	uint64_t dirty_npurge, dirty_nmadvise, dirty_purged;
+	uint64_t muzzy_npurge, muzzy_nmadvise, muzzy_purged;
 	size_t small_allocated;
 	uint64_t small_nmalloc, small_ndalloc, small_nrequests;
 	size_t large_allocated;
@@ -289,39 +290,70 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    "dss allocation precedence: %s\n", dss);
 	}
 
-	CTL_M2_GET("stats.arenas.0.decay_time", i, &decay_time, ssize_t);
-	if (json) {
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\"decay_time\": %zd,\n", decay_time);
-	} else {
-		if (decay_time >= 0) {
-			malloc_cprintf(write_cb, cbopaque, "decay time: %zd\n",
-			    decay_time);
-		} else {
-			malloc_cprintf(write_cb, cbopaque, "decay time: N/A\n");
-		}
-	}
-
+	CTL_M2_GET("stats.arenas.0.dirty_decay_time", i, &dirty_decay_time,
+	    ssize_t);
+	CTL_M2_GET("stats.arenas.0.muzzy_decay_time", i, &muzzy_decay_time,
+	    ssize_t);
 	CTL_M2_GET("stats.arenas.0.pactive", i, &pactive, size_t);
 	CTL_M2_GET("stats.arenas.0.pdirty", i, &pdirty, size_t);
-	CTL_M2_GET("stats.arenas.0.npurge", i, &npurge, uint64_t);
-	CTL_M2_GET("stats.arenas.0.nmadvise", i, &nmadvise, uint64_t);
-	CTL_M2_GET("stats.arenas.0.purged", i, &purged, uint64_t);
+	CTL_M2_GET("stats.arenas.0.pmuzzy", i, &pmuzzy, size_t);
+	CTL_M2_GET("stats.arenas.0.dirty_npurge", i, &dirty_npurge, uint64_t);
+	CTL_M2_GET("stats.arenas.0.dirty_nmadvise", i, &dirty_nmadvise,
+	    uint64_t);
+	CTL_M2_GET("stats.arenas.0.dirty_purged", i, &dirty_purged, uint64_t);
+	CTL_M2_GET("stats.arenas.0.muzzy_npurge", i, &muzzy_npurge, uint64_t);
+	CTL_M2_GET("stats.arenas.0.muzzy_nmadvise", i, &muzzy_nmadvise,
+	    uint64_t);
+	CTL_M2_GET("stats.arenas.0.muzzy_purged", i, &muzzy_purged, uint64_t);
 	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"dirty_decay_time\": %zd,\n", dirty_decay_time);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"muzzy_decay_time\": %zd,\n", muzzy_decay_time);
 		malloc_cprintf(write_cb, cbopaque,
 		    "\t\t\t\t\"pactive\": %zu,\n", pactive);
 		malloc_cprintf(write_cb, cbopaque,
 		    "\t\t\t\t\"pdirty\": %zu,\n", pdirty);
 		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\"npurge\": %"FMTu64",\n", npurge);
+		    "\t\t\t\t\"pmuzzy\": %zu,\n", pmuzzy);
 		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\"nmadvise\": %"FMTu64",\n", nmadvise);
+		    "\t\t\t\t\"dirty_npurge\": %"FMTu64",\n", dirty_npurge);
 		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\"purged\": %"FMTu64",\n", purged);
+		    "\t\t\t\t\"dirty_nmadvise\": %"FMTu64",\n", dirty_nmadvise);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"dirty_purged\": %"FMTu64",\n", dirty_purged);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"muzzy_npurge\": %"FMTu64",\n", muzzy_npurge);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"muzzy_nmadvise\": %"FMTu64",\n", muzzy_nmadvise);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"muzzy_purged\": %"FMTu64",\n", muzzy_purged);
 	} else {
 		malloc_cprintf(write_cb, cbopaque,
-		    "purging: dirty: %zu, sweeps: %"FMTu64", madvises: %"FMTu64
-		    ", purged: %"FMTu64"\n", pdirty, npurge, nmadvise, purged);
+		    "decaying:  time       npages       sweeps     madvises"
+		    "       purged\n");
+		if (dirty_decay_time >= 0) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "   dirty: %5zd %12zu %12"FMTu64" %12"FMTu64" %12"
+			    FMTu64"\n", dirty_decay_time, pdirty, dirty_npurge,
+			    dirty_nmadvise, dirty_purged);
+		} else {
+			malloc_cprintf(write_cb, cbopaque,
+			    "   dirty:   N/A %12zu %12"FMTu64" %12"FMTu64" %12"
+			    FMTu64"\n", pdirty, dirty_npurge, dirty_nmadvise,
+			    dirty_purged);
+		}
+		if (muzzy_decay_time >= 0) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "   muzzy: %5zd %12zu %12"FMTu64" %12"FMTu64" %12"
+			    FMTu64"\n", muzzy_decay_time, pmuzzy, muzzy_npurge,
+			    muzzy_nmadvise, muzzy_purged);
+		} else {
+			malloc_cprintf(write_cb, cbopaque,
+			    "   muzzy:   N/A %12zu %12"FMTu64" %12"FMTu64" %12"
+			    FMTu64"\n", pmuzzy, muzzy_npurge, muzzy_nmadvise,
+			    muzzy_purged);
+		}
 	}
 
 	CTL_M2_GET("stats.arenas.0.small.allocated", i, &small_allocated,
@@ -622,7 +654,10 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	OPT_WRITE_CHAR_P(dss, ",")
 	OPT_WRITE_UNSIGNED(narenas, ",")
 	OPT_WRITE_CHAR_P(percpu_arena, ",")
-	OPT_WRITE_SSIZE_T_MUTABLE(decay_time, arenas.decay_time, ",")
+	OPT_WRITE_SSIZE_T_MUTABLE(dirty_decay_time, arenas.dirty_decay_time,
+	    ",")
+	OPT_WRITE_SSIZE_T_MUTABLE(muzzy_decay_time, arenas.muzzy_decay_time,
+	    ",")
 	OPT_WRITE_CHAR_P(junk, ",")
 	OPT_WRITE_BOOL(zero, ",")
 	OPT_WRITE_BOOL(utrace, ",")
@@ -670,16 +705,26 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		malloc_cprintf(write_cb, cbopaque, "Arenas: %u\n", uv);
 	}
 
-	CTL_GET("arenas.decay_time", &ssv, ssize_t);
+	CTL_GET("arenas.dirty_decay_time", &ssv, ssize_t);
 	if (json) {
 		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\"decay_time\": %zd,\n", ssv);
+		    "\t\t\t\"dirty_decay_time\": %zd,\n", ssv);
 	} else {
 		malloc_cprintf(write_cb, cbopaque,
 		    "Unused dirty page decay time: %zd%s\n", ssv, (ssv < 0) ?
 		    " (no decay)" : "");
 	}
 
+	CTL_GET("arenas.muzzy_decay_time", &ssv, ssize_t);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"muzzy_decay_time\": %zd,\n", ssv);
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "Unused muzzy page decay time: %zd%s\n", ssv, (ssv < 0) ?
+		    " (no decay)" : "");
+	}
+
 	CTL_GET("arenas.quantum", &sv, size_t);
 	if (json) {
 		malloc_cprintf(write_cb, cbopaque,
diff --git a/test/unit/decay.c b/test/unit/decay.c
index eb4df9d7..471a558c 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -22,18 +22,28 @@ nstime_update_mock(nstime_t *time) {
 }
 
 static unsigned
-do_arena_create(ssize_t decay_time) {
+do_arena_create(ssize_t dirty_decay_time, ssize_t muzzy_decay_time) {
 	unsigned arena_ind;
 	size_t sz = sizeof(unsigned);
 	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
 	size_t mib[3];
 	size_t miblen = sizeof(mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib("arena.0.decay_time", mib, &miblen), 0,
-	    "Unexpected mallctlnametomib() failure");
+
+	assert_d_eq(mallctlnametomib("arena.0.dirty_decay_time", mib, &miblen),
+	    0, "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&decay_time,
-	    sizeof(decay_time)), 0, "Unexpected mallctlbymib() failure");
+	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL,
+	    (void *)&dirty_decay_time,
+	    sizeof(dirty_decay_time)), 0, "Unexpected mallctlbymib() failure");
+
+	assert_d_eq(mallctlnametomib("arena.0.muzzy_decay_time", mib, &miblen),
+	    0, "Unexpected mallctlnametomib() failure");
+	mib[1] = (size_t)arena_ind;
+	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL,
+	    (void *)&muzzy_decay_time,
+	    sizeof(muzzy_decay_time)), 0, "Unexpected mallctlbymib() failure");
+
 	return arena_ind;
 }
 
@@ -78,11 +88,10 @@ do_decay(unsigned arena_ind) {
 }
 
 static uint64_t
-get_arena_npurge(unsigned arena_ind) {
-	do_epoch();
+get_arena_npurge_impl(const char *mibname, unsigned arena_ind) {
 	size_t mib[4];
 	size_t miblen = sizeof(mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib("stats.arenas.0.npurge", mib, &miblen), 0,
+	assert_d_eq(mallctlnametomib(mibname, mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[2] = (size_t)arena_ind;
 	uint64_t npurge = 0;
@@ -92,6 +101,25 @@ get_arena_npurge(unsigned arena_ind) {
 	return npurge;
 }
 
+static uint64_t
+get_arena_dirty_npurge(unsigned arena_ind) {
+	do_epoch();
+	return get_arena_npurge_impl("stats.arenas.0.dirty_npurge", arena_ind);
+}
+
+static uint64_t
+get_arena_muzzy_npurge(unsigned arena_ind) {
+	do_epoch();
+	return get_arena_npurge_impl("stats.arenas.0.muzzy_npurge", arena_ind);
+}
+
+static uint64_t
+get_arena_npurge(unsigned arena_ind) {
+	do_epoch();
+	return get_arena_npurge_impl("stats.arenas.0.dirty_npurge", arena_ind) +
+	    get_arena_npurge_impl("stats.arenas.0.muzzy_npurge", arena_ind);
+}
+
 static size_t
 get_arena_pdirty(unsigned arena_ind) {
 	do_epoch();
@@ -107,6 +135,21 @@ get_arena_pdirty(unsigned arena_ind) {
 	return pdirty;
 }
 
+static size_t
+get_arena_pmuzzy(unsigned arena_ind) {
+	do_epoch();
+	size_t mib[4];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+	assert_d_eq(mallctlnametomib("stats.arenas.0.pmuzzy", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[2] = (size_t)arena_ind;
+	size_t pmuzzy;
+	size_t sz = sizeof(pmuzzy);
+	assert_d_eq(mallctlbymib(mib, miblen, (void *)&pmuzzy, &sz, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+	return pmuzzy;
+}
+
 static void *
 do_mallocx(size_t size, int flags) {
 	void *p = mallocx(size, flags);
@@ -133,7 +176,7 @@ TEST_BEGIN(test_decay_ticks) {
 
 	int err;
 	/* Set up a manually managed arena for test. */
-	arena_ind = do_arena_create(0);
+	arena_ind = do_arena_create(0, 0);
 
 	/* Migrate to the new arena, and get the ticker. */
 	unsigned old_arena_ind;
@@ -317,19 +360,66 @@ TEST_BEGIN(test_decay_ticks) {
 }
 TEST_END
 
-TEST_BEGIN(test_decay_ticker) {
-#define NPS 1024
+static void
+decay_ticker_helper(unsigned arena_ind, int flags, bool dirty, ssize_t dt,
+    uint64_t dirty_npurge0, uint64_t muzzy_npurge0, bool terminate_asap) {
 #define NINTERVALS 101
-	ssize_t dt = opt_decay_time;
-	unsigned arena_ind = do_arena_create(dt);
+	nstime_t time, update_interval, decay_time, deadline;
+
+	nstime_init(&time, 0);
+	nstime_update(&time);
+
+	nstime_init2(&decay_time, dt, 0);
+	nstime_copy(&deadline, &time);
+	nstime_add(&deadline, &decay_time);
+
+	nstime_init2(&update_interval, dt, 0);
+	nstime_idivide(&update_interval, NINTERVALS);
+
+	/*
+	 * Keep q's slab from being deallocated during the looping below.  If a
+	 * cached slab were to repeatedly come and go during looping, it could
+	 * prevent the decay backlog ever becoming empty.
+	 */
+	void *p = do_mallocx(1, flags);
+	uint64_t dirty_npurge1, muzzy_npurge1;
+	do {
+		for (unsigned i = 0; i < DECAY_NTICKS_PER_UPDATE / 2;
+		    i++) {
+			void *q = do_mallocx(1, flags);
+			dallocx(q, flags);
+		}
+		dirty_npurge1 = get_arena_dirty_npurge(arena_ind);
+		muzzy_npurge1 = get_arena_muzzy_npurge(arena_ind);
+
+		nstime_add(&time_mock, &update_interval);
+		nstime_update(&time);
+	} while (nstime_compare(&time, &deadline) <= 0 && ((dirty_npurge1 ==
+	    dirty_npurge0 && muzzy_npurge1 == muzzy_npurge0) ||
+	    !terminate_asap));
+	dallocx(p, flags);
+
+	if (config_stats) {
+		assert_u64_gt(dirty_npurge1 + muzzy_npurge1, dirty_npurge0 +
+		    muzzy_npurge0, "Expected purging to occur");
+	}
+#undef NINTERVALS
+}
+
+TEST_BEGIN(test_decay_ticker) {
+#define NPS 2048
+	ssize_t ddt = opt_dirty_decay_time;
+	ssize_t mdt = opt_muzzy_decay_time;
+	unsigned arena_ind = do_arena_create(ddt, mdt);
 	int flags = (MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
 	void *ps[NPS];
 	size_t large;
 
 	/*
-	 * Allocate a bunch of large objects, pause the clock, deallocate the
-	 * objects, restore the clock, then [md]allocx() in a tight loop while
-	 * advancing time rapidly to verify the ticker triggers purging.
+	 * Allocate a bunch of large objects, pause the clock, deallocate every
+	 * other object (to fragment virtual memory), restore the clock, then
+	 * [md]allocx() in a tight loop while advancing time rapidly to verify
+	 * the ticker triggers purging.
 	 */
 
 	if (config_tcache) {
@@ -346,7 +436,8 @@ TEST_BEGIN(test_decay_ticker) {
 	}
 
 	do_purge(arena_ind);
-	uint64_t npurge0 = get_arena_npurge(arena_ind);
+	uint64_t dirty_npurge0 = get_arena_dirty_npurge(arena_ind);
+	uint64_t muzzy_npurge0 = get_arena_muzzy_npurge(arena_ind);
 
 	for (unsigned i = 0; i < NPS; i++) {
 		ps[i] = do_mallocx(large, flags);
@@ -362,7 +453,7 @@ TEST_BEGIN(test_decay_ticker) {
 	nstime_monotonic = nstime_monotonic_mock;
 	nstime_update = nstime_update_mock;
 
-	for (unsigned i = 0; i < NPS; i++) {
+	for (unsigned i = 0; i < NPS; i += 2) {
 		dallocx(ps[i], flags);
 		unsigned nupdates0 = nupdates_mock;
 		do_decay(arena_ind);
@@ -370,51 +461,16 @@ TEST_BEGIN(test_decay_ticker) {
 		    "Expected nstime_update() to be called");
 	}
 
-	nstime_t time, update_interval, decay_time, deadline;
+	decay_ticker_helper(arena_ind, flags, true, ddt, dirty_npurge0,
+	    muzzy_npurge0, true);
+	decay_ticker_helper(arena_ind, flags, false, ddt+mdt, dirty_npurge0,
+	    muzzy_npurge0, false);
 
-	nstime_init(&time, 0);
-	nstime_update(&time);
-
-	nstime_init2(&decay_time, dt, 0);
-	nstime_copy(&deadline, &time);
-	nstime_add(&deadline, &decay_time);
-
-	nstime_init2(&update_interval, dt, 0);
-	nstime_idivide(&update_interval, NINTERVALS);
-
-	nstime_init2(&decay_time, dt, 0);
-	nstime_copy(&deadline, &time);
-	nstime_add(&deadline, &decay_time);
-
-	/*
-	 * Keep q's slab from being deallocated during the looping below.  If
-	 * a cached slab were to repeatedly come and go during looping, it could
-	 * prevent the decay backlog ever becoming empty.
-	 */
-	void *p = do_mallocx(1, flags);
-	uint64_t npurge1;
-	do {
-		for (unsigned i = 0; i < DECAY_NTICKS_PER_UPDATE / 2; i++) {
-			void *q = do_mallocx(1, flags);
-			dallocx(q, flags);
-		}
-		npurge1 = get_arena_npurge(arena_ind);
-
-		nstime_add(&time_mock, &update_interval);
-		nstime_update(&time);
-	} while (nstime_compare(&time, &deadline) <= 0 && npurge1 == npurge0);
-	dallocx(p, flags);
+	do_arena_destroy(arena_ind);
 
 	nstime_monotonic = nstime_monotonic_orig;
 	nstime_update = nstime_update_orig;
-
-	if (config_stats) {
-		assert_u64_gt(npurge1, npurge0, "Expected purging to occur");
-	}
-
-	do_arena_destroy(arena_ind);
 #undef NPS
-#undef NINTERVALS
 }
 TEST_END
 
@@ -435,8 +491,7 @@ TEST_BEGIN(test_decay_nonmonotonic) {
 	    "Unexpected mallctl failure");
 	do_epoch();
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.npurge", (void *)&npurge0, &sz,
-	    NULL, 0), config_stats ? 0 : ENOENT, "Unexpected mallctl result");
+	npurge0 = get_arena_npurge(0);
 
 	nupdates_mock = 0;
 	nstime_init(&time_mock, 0);
@@ -464,8 +519,7 @@ TEST_BEGIN(test_decay_nonmonotonic) {
 
 	do_epoch();
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.npurge", (void *)&npurge1, &sz,
-	    NULL, 0), config_stats ? 0 : ENOENT, "Unexpected mallctl result");
+	npurge1 = get_arena_npurge(0);
 
 	if (config_stats) {
 		assert_u64_eq(npurge0, npurge1, "Unexpected purging occurred");
@@ -478,24 +532,28 @@ TEST_BEGIN(test_decay_nonmonotonic) {
 TEST_END
 
 TEST_BEGIN(test_decay_now) {
-	unsigned arena_ind = do_arena_create(0);
+	unsigned arena_ind = do_arena_create(0, 0);
 	assert_zu_eq(get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages");
+	assert_zu_eq(get_arena_pmuzzy(arena_ind), 0, "Unexpected muzzy pages");
 	size_t sizes[] = {16, PAGE<<2, HUGEPAGE<<2};
-	/* Verify that dirty pages never linger after deallocation. */
+	/* Verify that dirty/muzzy pages never linger after deallocation. */
 	for (unsigned i = 0; i < sizeof(sizes)/sizeof(size_t); i++) {
 		size_t size = sizes[i];
 		generate_dirty(arena_ind, size);
 		assert_zu_eq(get_arena_pdirty(arena_ind), 0,
 		    "Unexpected dirty pages");
+		assert_zu_eq(get_arena_pmuzzy(arena_ind), 0,
+		    "Unexpected muzzy pages");
 	}
 	do_arena_destroy(arena_ind);
 }
 TEST_END
 
 TEST_BEGIN(test_decay_never) {
-	unsigned arena_ind = do_arena_create(-1);
+	unsigned arena_ind = do_arena_create(-1, -1);
 	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
 	assert_zu_eq(get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages");
+	assert_zu_eq(get_arena_pmuzzy(arena_ind), 0, "Unexpected muzzy pages");
 	size_t sizes[] = {16, PAGE<<2, HUGEPAGE<<2};
 	void *ptrs[sizeof(sizes)/sizeof(size_t)];
 	for (unsigned i = 0; i < sizeof(sizes)/sizeof(size_t); i++) {
@@ -503,12 +561,16 @@ TEST_BEGIN(test_decay_never) {
 	}
 	/* Verify that each deallocation generates additional dirty pages. */
 	size_t pdirty_prev = get_arena_pdirty(arena_ind);
+	size_t pmuzzy_prev = get_arena_pmuzzy(arena_ind);
 	assert_zu_eq(pdirty_prev, 0, "Unexpected dirty pages");
+	assert_zu_eq(pmuzzy_prev, 0, "Unexpected muzzy pages");
 	for (unsigned i = 0; i < sizeof(sizes)/sizeof(size_t); i++) {
 		dallocx(ptrs[i], flags);
 		size_t pdirty = get_arena_pdirty(arena_ind);
+		size_t pmuzzy = get_arena_pmuzzy(arena_ind);
 		assert_zu_gt(pdirty, pdirty_prev,
 		    "Expected dirty pages to increase.");
+		assert_zu_eq(pmuzzy, 0, "Unexpected muzzy pages");
 		pdirty_prev = pdirty;
 	}
 	do_arena_destroy(arena_ind);
diff --git a/test/unit/decay.sh b/test/unit/decay.sh
index 284af815..0df17884 100644
--- a/test/unit/decay.sh
+++ b/test/unit/decay.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 
-export MALLOC_CONF="decay_time:1"
+export MALLOC_CONF="dirty_decay_time:1,muzzy_decay_time:1"
 if [ "x${enable_tcache}" = "x1" ] ; then
   export MALLOC_CONF="${MALLOC_CONF},lg_tcache_max:0"
 fi
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 1aedbe8a..4241063e 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -161,7 +161,8 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(const char *, dss, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);
 	TEST_MALLCTL_OPT(const char *, percpu_arena, always);
-	TEST_MALLCTL_OPT(ssize_t, decay_time, always);
+	TEST_MALLCTL_OPT(ssize_t, dirty_decay_time, always);
+	TEST_MALLCTL_OPT(ssize_t, muzzy_decay_time, always);
 	TEST_MALLCTL_OPT(bool, stats_print, always);
 	TEST_MALLCTL_OPT(const char *, junk, fill);
 	TEST_MALLCTL_OPT(bool, zero, fill);
@@ -401,32 +402,68 @@ TEST_BEGIN(test_arena_i_initialized) {
 }
 TEST_END
 
-TEST_BEGIN(test_arena_i_decay_time) {
-	ssize_t decay_time, orig_decay_time, prev_decay_time;
+TEST_BEGIN(test_arena_i_dirty_decay_time) {
+	ssize_t dirty_decay_time, orig_dirty_decay_time, prev_dirty_decay_time;
 	size_t sz = sizeof(ssize_t);
 
-	assert_d_eq(mallctl("arena.0.decay_time", (void *)&orig_decay_time, &sz,
-	    NULL, 0), 0, "Unexpected mallctl() failure");
-
-	decay_time = -2;
-	assert_d_eq(mallctl("arena.0.decay_time", NULL, NULL,
-	    (void *)&decay_time, sizeof(ssize_t)), EFAULT,
-	    "Unexpected mallctl() success");
-
-	decay_time = 0x7fffffff;
-	assert_d_eq(mallctl("arena.0.decay_time", NULL, NULL,
-	    (void *)&decay_time, sizeof(ssize_t)), 0,
+	assert_d_eq(mallctl("arena.0.dirty_decay_time",
+	    (void *)&orig_dirty_decay_time, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
-	for (prev_decay_time = decay_time, decay_time = -1;
-	    decay_time < 20; prev_decay_time = decay_time, decay_time++) {
-		ssize_t old_decay_time;
+	dirty_decay_time = -2;
+	assert_d_eq(mallctl("arena.0.dirty_decay_time", NULL, NULL,
+	    (void *)&dirty_decay_time, sizeof(ssize_t)), EFAULT,
+	    "Unexpected mallctl() success");
 
-		assert_d_eq(mallctl("arena.0.decay_time", (void *)&old_decay_time,
-		    &sz, (void *)&decay_time, sizeof(ssize_t)), 0,
+	dirty_decay_time = 0x7fffffff;
+	assert_d_eq(mallctl("arena.0.dirty_decay_time", NULL, NULL,
+	    (void *)&dirty_decay_time, sizeof(ssize_t)), 0,
+	    "Unexpected mallctl() failure");
+
+	for (prev_dirty_decay_time = dirty_decay_time, dirty_decay_time = -1;
+	    dirty_decay_time < 20; prev_dirty_decay_time = dirty_decay_time,
+	    dirty_decay_time++) {
+		ssize_t old_dirty_decay_time;
+
+		assert_d_eq(mallctl("arena.0.dirty_decay_time",
+		    (void *)&old_dirty_decay_time, &sz,
+		    (void *)&dirty_decay_time, sizeof(ssize_t)), 0,
 		    "Unexpected mallctl() failure");
-		assert_zd_eq(old_decay_time, prev_decay_time,
-		    "Unexpected old arena.0.decay_time");
+		assert_zd_eq(old_dirty_decay_time, prev_dirty_decay_time,
+		    "Unexpected old arena.0.dirty_decay_time");
+	}
+}
+TEST_END
+
+TEST_BEGIN(test_arena_i_muzzy_decay_time) {
+	ssize_t muzzy_decay_time, orig_muzzy_decay_time, prev_muzzy_decay_time;
+	size_t sz = sizeof(ssize_t);
+
+	assert_d_eq(mallctl("arena.0.muzzy_decay_time",
+	    (void *)&orig_muzzy_decay_time, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+
+	muzzy_decay_time = -2;
+	assert_d_eq(mallctl("arena.0.muzzy_decay_time", NULL, NULL,
+	    (void *)&muzzy_decay_time, sizeof(ssize_t)), EFAULT,
+	    "Unexpected mallctl() success");
+
+	muzzy_decay_time = 0x7fffffff;
+	assert_d_eq(mallctl("arena.0.muzzy_decay_time", NULL, NULL,
+	    (void *)&muzzy_decay_time, sizeof(ssize_t)), 0,
+	    "Unexpected mallctl() failure");
+
+	for (prev_muzzy_decay_time = muzzy_decay_time, muzzy_decay_time = -1;
+	    muzzy_decay_time < 20; prev_muzzy_decay_time = muzzy_decay_time,
+	    muzzy_decay_time++) {
+		ssize_t old_muzzy_decay_time;
+
+		assert_d_eq(mallctl("arena.0.muzzy_decay_time",
+		    (void *)&old_muzzy_decay_time, &sz,
+		    (void *)&muzzy_decay_time, sizeof(ssize_t)), 0,
+		    "Unexpected mallctl() failure");
+		assert_zd_eq(old_muzzy_decay_time, prev_muzzy_decay_time,
+		    "Unexpected old arena.0.muzzy_decay_time");
 	}
 }
 TEST_END
@@ -522,32 +559,68 @@ TEST_BEGIN(test_arena_i_dss) {
 }
 TEST_END
 
-TEST_BEGIN(test_arenas_decay_time) {
-	ssize_t decay_time, orig_decay_time, prev_decay_time;
+TEST_BEGIN(test_arenas_dirty_decay_time) {
+	ssize_t dirty_decay_time, orig_dirty_decay_time, prev_dirty_decay_time;
 	size_t sz = sizeof(ssize_t);
 
-	assert_d_eq(mallctl("arenas.decay_time", (void *)&orig_decay_time, &sz,
-	    NULL, 0), 0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("arenas.dirty_decay_time",
+	    (void *)&orig_dirty_decay_time, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
 
-	decay_time = -2;
-	assert_d_eq(mallctl("arenas.decay_time", NULL, NULL,
-	    (void *)&decay_time, sizeof(ssize_t)), EFAULT,
+	dirty_decay_time = -2;
+	assert_d_eq(mallctl("arenas.dirty_decay_time", NULL, NULL,
+	    (void *)&dirty_decay_time, sizeof(ssize_t)), EFAULT,
 	    "Unexpected mallctl() success");
 
-	decay_time = 0x7fffffff;
-	assert_d_eq(mallctl("arenas.decay_time", NULL, NULL,
-	    (void *)&decay_time, sizeof(ssize_t)), 0,
+	dirty_decay_time = 0x7fffffff;
+	assert_d_eq(mallctl("arenas.dirty_decay_time", NULL, NULL,
+	    (void *)&dirty_decay_time, sizeof(ssize_t)), 0,
 	    "Expected mallctl() failure");
 
-	for (prev_decay_time = decay_time, decay_time = -1;
-	    decay_time < 20; prev_decay_time = decay_time, decay_time++) {
-		ssize_t old_decay_time;
+	for (prev_dirty_decay_time = dirty_decay_time, dirty_decay_time = -1;
+	    dirty_decay_time < 20; prev_dirty_decay_time = dirty_decay_time,
+	    dirty_decay_time++) {
+		ssize_t old_dirty_decay_time;
 
-		assert_d_eq(mallctl("arenas.decay_time",
-		    (void *)&old_decay_time, &sz, (void *)&decay_time,
-		    sizeof(ssize_t)), 0, "Unexpected mallctl() failure");
-		assert_zd_eq(old_decay_time, prev_decay_time,
-		    "Unexpected old arenas.decay_time");
+		assert_d_eq(mallctl("arenas.dirty_decay_time",
+		    (void *)&old_dirty_decay_time, &sz,
+		    (void *)&dirty_decay_time, sizeof(ssize_t)), 0,
+		    "Unexpected mallctl() failure");
+		assert_zd_eq(old_dirty_decay_time, prev_dirty_decay_time,
+		    "Unexpected old arenas.dirty_decay_time");
+	}
+}
+TEST_END
+
+TEST_BEGIN(test_arenas_muzzy_decay_time) {
+	ssize_t muzzy_decay_time, orig_muzzy_decay_time, prev_muzzy_decay_time;
+	size_t sz = sizeof(ssize_t);
+
+	assert_d_eq(mallctl("arenas.muzzy_decay_time",
+	    (void *)&orig_muzzy_decay_time, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+
+	muzzy_decay_time = -2;
+	assert_d_eq(mallctl("arenas.muzzy_decay_time", NULL, NULL,
+	    (void *)&muzzy_decay_time, sizeof(ssize_t)), EFAULT,
+	    "Unexpected mallctl() success");
+
+	muzzy_decay_time = 0x7fffffff;
+	assert_d_eq(mallctl("arenas.muzzy_decay_time", NULL, NULL,
+	    (void *)&muzzy_decay_time, sizeof(ssize_t)), 0,
+	    "Expected mallctl() failure");
+
+	for (prev_muzzy_decay_time = muzzy_decay_time, muzzy_decay_time = -1;
+	    muzzy_decay_time < 20; prev_muzzy_decay_time = muzzy_decay_time,
+	    muzzy_decay_time++) {
+		ssize_t old_muzzy_decay_time;
+
+		assert_d_eq(mallctl("arenas.muzzy_decay_time",
+		    (void *)&old_muzzy_decay_time, &sz,
+		    (void *)&muzzy_decay_time, sizeof(ssize_t)), 0,
+		    "Unexpected mallctl() failure");
+		assert_zd_eq(old_muzzy_decay_time, prev_muzzy_decay_time,
+		    "Unexpected old arenas.muzzy_decay_time");
 	}
 }
 TEST_END
@@ -630,7 +703,8 @@ TEST_BEGIN(test_stats_arenas) {
 
 	TEST_STATS_ARENAS(unsigned, nthreads);
 	TEST_STATS_ARENAS(const char *, dss);
-	TEST_STATS_ARENAS(ssize_t, decay_time);
+	TEST_STATS_ARENAS(ssize_t, dirty_decay_time);
+	TEST_STATS_ARENAS(ssize_t, muzzy_decay_time);
 	TEST_STATS_ARENAS(size_t, pactive);
 	TEST_STATS_ARENAS(size_t, pdirty);
 
@@ -653,11 +727,13 @@ main(void) {
 	    test_tcache,
 	    test_thread_arena,
 	    test_arena_i_initialized,
-	    test_arena_i_decay_time,
+	    test_arena_i_dirty_decay_time,
+	    test_arena_i_muzzy_decay_time,
 	    test_arena_i_purge,
 	    test_arena_i_decay,
 	    test_arena_i_dss,
-	    test_arenas_decay_time,
+	    test_arenas_dirty_decay_time,
+	    test_arenas_muzzy_decay_time,
 	    test_arenas_constants,
 	    test_arenas_bin_constants,
 	    test_arenas_lextent_constants,
diff --git a/test/unit/pack.sh b/test/unit/pack.sh
index de12e553..76757ac3 100644
--- a/test/unit/pack.sh
+++ b/test/unit/pack.sh
@@ -1,4 +1,4 @@
 #!/bin/sh
 
 # Immediately purge to minimize fragmentation.
-export MALLOC_CONF="decay_time:-1"
+export MALLOC_CONF="dirty_decay_time:0,muzzy_decay_time:0"
diff --git a/test/unit/stats.c b/test/unit/stats.c
index c458d3f9..f8c6b104 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -71,7 +71,8 @@ TEST_BEGIN(test_stats_arenas_summary) {
 	size_t sz;
 	int expected = config_stats ? 0 : ENOENT;
 	size_t mapped;
-	uint64_t npurge, nmadvise, purged;
+	uint64_t dirty_npurge, dirty_nmadvise, dirty_purged;
+	uint64_t muzzy_npurge, muzzy_nmadvise, muzzy_purged;
 
 	little = mallocx(SMALL_MAXCLASS, MALLOCX_ARENA(0));
 	assert_ptr_not_null(little, "Unexpected mallocx() failure");
@@ -92,19 +93,34 @@ TEST_BEGIN(test_stats_arenas_summary) {
 	sz = sizeof(size_t);
 	assert_d_eq(mallctl("stats.arenas.0.mapped", (void *)&mapped, &sz, NULL,
 	    0), expected, "Unexepected mallctl() result");
+
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.npurge", (void *)&npurge, &sz, NULL,
-	    0), expected, "Unexepected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.nmadvise", (void *)&nmadvise, &sz,
-	    NULL, 0), expected, "Unexepected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.purged", (void *)&purged, &sz, NULL,
-	    0), expected, "Unexepected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.dirty_npurge",
+	    (void *)&dirty_npurge, &sz, NULL, 0), expected,
+	    "Unexepected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.dirty_nmadvise",
+	    (void *)&dirty_nmadvise, &sz, NULL, 0), expected,
+	    "Unexepected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.dirty_purged",
+	    (void *)&dirty_purged, &sz, NULL, 0), expected,
+	    "Unexepected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.muzzy_npurge",
+	    (void *)&muzzy_npurge, &sz, NULL, 0), expected,
+	    "Unexepected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.muzzy_nmadvise",
+	    (void *)&muzzy_nmadvise, &sz, NULL, 0), expected,
+	    "Unexepected mallctl() result");
+	assert_d_eq(mallctl("stats.arenas.0.muzzy_purged",
+	    (void *)&muzzy_purged, &sz, NULL, 0), expected,
+	    "Unexepected mallctl() result");
 
 	if (config_stats) {
-		assert_u64_gt(npurge, 0,
+		assert_u64_gt(dirty_npurge + muzzy_npurge, 0,
 		    "At least one purge should have occurred");
-		assert_u64_le(nmadvise, purged,
-		    "nmadvise should be no greater than purged");
+		assert_u64_le(dirty_nmadvise, dirty_purged,
+		    "dirty_nmadvise should be no greater than dirty_purged");
+		assert_u64_le(muzzy_nmadvise, muzzy_purged,
+		    "muzzy_nmadvise should be no greater than muzzy_purged");
 	}
 }
 TEST_END

From afb46ce23617f1315a6e75f846fd4169e8aaa455 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 15 Mar 2017 13:09:43 -0700
Subject: [PATCH 0717/2608] Propagate madvise() success/failure from
 pages_purge_lazy().

---
 src/pages.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/pages.c b/src/pages.c
index 9846e19e..fa986ba6 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -180,15 +180,15 @@ pages_purge_lazy(void *addr, size_t size) {
 
 #ifdef _WIN32
 	VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE);
+	return false;
 #elif defined(JEMALLOC_PURGE_MADVISE_FREE)
-	madvise(addr, size, MADV_FREE);
+	return (madvise(addr, size, MADV_FREE) != 0);
 #elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
     !defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
-	madvise(addr, size, MADV_DONTNEED);
+	return (madvise(addr, size, MADV_DONTNEED) != 0);
 #else
 	not_reached();
 #endif
-	return false;
 }
 
 bool

From 3a1363bcf82fc93d99b5a796ffd4cebc93c2375f Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 15 Mar 2017 12:50:37 -0700
Subject: [PATCH 0718/2608] Refactor tcaches flush/destroy to reduce lock
 duration.

Drop tcaches_mtx before calling tcache_destroy().
---
 src/tcache.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index 266bd1f5..2250425f 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -500,32 +500,39 @@ label_return:
 	return err;
 }
 
-static void
-tcaches_elm_flush(tsd_t *tsd, tcaches_t *elm) {
+static tcache_t *
+tcaches_elm_remove(tsd_t *tsd, tcaches_t *elm) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tcaches_mtx);
 
 	if (elm->tcache == NULL) {
-		return;
+		return NULL;
 	}
-	tcache_destroy(tsd, elm->tcache);
+	tcache_t *tcache = elm->tcache;
 	elm->tcache = NULL;
+	return tcache;
 }
 
 void
 tcaches_flush(tsd_t *tsd, unsigned ind) {
 	malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
-	tcaches_elm_flush(tsd, &tcaches[ind]);
+	tcache_t *tcache = tcaches_elm_remove(tsd, &tcaches[ind]);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
+	if (tcache != NULL) {
+		tcache_destroy(tsd, tcache);
+	}
 }
 
 void
 tcaches_destroy(tsd_t *tsd, unsigned ind) {
 	malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
 	tcaches_t *elm = &tcaches[ind];
-	tcaches_elm_flush(tsd, elm);
+	tcache_t *tcache = tcaches_elm_remove(tsd, elm);
 	elm->next = tcaches_avail;
 	tcaches_avail = elm;
 	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
+	if (tcache != NULL) {
+		tcache_destroy(tsd, tcache);
+	}
 }
 
 bool

From ad917626356a507a2f4fac9e0289d653b977ed31 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 17 Mar 2017 18:06:51 -0700
Subject: [PATCH 0719/2608] Not re-binding iarena when migrate between arenas.

---
 src/jemalloc.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index c1b05dbb..3c595bab 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -447,7 +447,6 @@ arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind) {
 	arena_nthreads_dec(oldarena, false);
 	arena_nthreads_inc(newarena, false);
 	tsd_arena_set(tsd, newarena);
-	tsd_iarena_set(tsd, newarena);
 }
 
 static void

From bda12bd925a174955ec12ae798f4d48a482a085b Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 20 Mar 2017 17:37:02 -0700
Subject: [PATCH 0720/2608] Clamp LG_VADDR for 32-bit builds on x64.

---
 configure.ac | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/configure.ac b/configure.ac
index db42a505..37745c40 100644
--- a/configure.ac
+++ b/configure.ac
@@ -442,6 +442,9 @@ typedef unsigned __int32 uint32_t;
 	if (f == NULL) {
 		return 1;
 	}
+	if (vaddr > (sizeof(void *) << 3)) {
+		vaddr = sizeof(void *) << 3;
+	}
 	fprintf(f, "%u", vaddr);
 	fclose(f);
 	return 0;

From e8921cf2eb1d049b688e29e14187c26ca05193ee Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 13 Mar 2017 17:36:57 -0700
Subject: [PATCH 0721/2608] Convert extent_t's usize to szind.

Rather than storing usize only for large (and prof-promoted)
allocations, store the size class index for allocations that reside
within the extent, such that the size class index is valid for all
extents that contain extant allocations, and invalid otherwise (mainly
to make debugging simpler).
---
 include/jemalloc/internal/arena_inlines_b.h   |  20 +-
 include/jemalloc/internal/extent_externs.h    |  44 ++---
 include/jemalloc/internal/extent_inlines.h    |  91 +++++----
 include/jemalloc/internal/extent_structs.h    |   6 +-
 .../jemalloc/internal/jemalloc_internal.h.in  |   7 +-
 include/jemalloc/internal/private_symbols.txt |   3 +-
 include/jemalloc/internal/tcache_inlines.h    |   9 +-
 src/arena.c                                   |  34 ++--
 src/base.c                                    |   9 +-
 src/extent.c                                  | 187 +++++++++---------
 src/extent_dss.c                              |   8 +-
 src/large.c                                   |  49 +++--
 test/unit/slab.c                              |   4 +-
 13 files changed, 233 insertions(+), 238 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index b718451b..0d4aff39 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -147,15 +147,15 @@ arena_dalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, tcache_t *tcache,
 			    extent, ptr);
 		}
 	} else {
-		size_t usize = extent_usize_get(extent);
+		szind_t szind = extent_szind_get(extent);
 
-		if (likely(tcache != NULL) && usize <= tcache_maxclass) {
-			if (config_prof && unlikely(usize <= SMALL_MAXCLASS)) {
+		if (likely(tcache != NULL) && szind < nhbins) {
+			if (config_prof && unlikely(szind < NBINS)) {
 				arena_dalloc_promoted(tsdn, extent, ptr,
 				    tcache, slow_path);
 			} else {
 				tcache_dalloc_large(tsdn_tsd(tsdn), tcache,
-				    ptr, usize, slow_path);
+				    ptr, szind, slow_path);
 			}
 		} else {
 			large_dalloc(tsdn, extent);
@@ -169,25 +169,25 @@ arena_sdalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t size,
 	assert(!tsdn_null(tsdn) || tcache == NULL);
 	assert(ptr != NULL);
 
+	szind_t szind = size2index(size);
 	if (likely(extent_slab_get(extent))) {
 		/* Small allocation. */
 		if (likely(tcache != NULL)) {
-			szind_t binind = size2index(size);
-			assert(binind == extent_slab_data_get(extent)->binind);
-			tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, binind,
+			assert(szind == extent_slab_data_get(extent)->binind);
+			tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind,
 			    slow_path);
 		} else {
 			arena_dalloc_small(tsdn, extent_arena_get(extent),
 			    extent, ptr);
 		}
 	} else {
-		if (likely(tcache != NULL) && size <= tcache_maxclass) {
-			if (config_prof && unlikely(size <= SMALL_MAXCLASS)) {
+		if (likely(tcache != NULL) && szind < nhbins) {
+			if (config_prof && unlikely(szind < NBINS)) {
 				arena_dalloc_promoted(tsdn, extent, ptr,
 				    tcache, slow_path);
 			} else {
 				tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
-				    size, slow_path);
+				    szind, slow_path);
 			}
 		} else {
 			large_dalloc(tsdn, extent);
diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index e8f632f8..68c49a17 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -4,15 +4,15 @@
 extern rtree_t			extents_rtree;
 extern const extent_hooks_t	extent_hooks_default;
 
-extent_t	*extent_alloc(tsdn_t *tsdn, arena_t *arena);
-void	extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
+extent_t *extent_alloc(tsdn_t *tsdn, arena_t *arena);
+void extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
 
-extent_hooks_t	*extent_hooks_get(arena_t *arena);
-extent_hooks_t	*extent_hooks_set(arena_t *arena, extent_hooks_t *extent_hooks);
+extent_hooks_t *extent_hooks_get(arena_t *arena);
+extent_hooks_t *extent_hooks_set(arena_t *arena, extent_hooks_t *extent_hooks);
 
 #ifdef JEMALLOC_JET
-size_t	extent_size_quantize_floor(size_t size);
-size_t	extent_size_quantize_ceil(size_t size);
+size_t extent_size_quantize_floor(size_t size);
+size_t extent_size_quantize_ceil(size_t size);
 #endif
 
 ph_proto(, extent_heap_, extent_heap_t, extent_t)
@@ -23,8 +23,8 @@ extent_state_t extents_state_get(const extents_t *extents);
 size_t extents_npages_get(extents_t *extents);
 extent_t *extents_alloc(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extents_t *extents, void *new_addr,
-    size_t usize, size_t pad, size_t alignment, bool *zero, bool *commit,
-    bool slab);
+    size_t size, size_t pad, size_t alignment, bool slab, szind_t szind,
+    bool *zero, bool *commit);
 void extents_dalloc(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extents_t *extents, extent_t *extent);
 extent_t *extents_evict(tsdn_t *tsdn, arena_t *arena,
@@ -32,32 +32,32 @@ extent_t *extents_evict(tsdn_t *tsdn, arena_t *arena,
 void extents_prefork(tsdn_t *tsdn, extents_t *extents);
 void extents_postfork_parent(tsdn_t *tsdn, extents_t *extents);
 void extents_postfork_child(tsdn_t *tsdn, extents_t *extents);
-extent_t	*extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
-    size_t alignment, bool *zero, bool *commit, bool slab);
-void	extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
-bool	extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena,
+extent_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, void *new_addr, size_t size, size_t pad,
+    size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit);
+void extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
+bool extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent);
-void	extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
+void extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent);
-bool	extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
+bool extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
     size_t length);
-bool	extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
+bool extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
     size_t length);
-bool	extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena,
+bool extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
     size_t length);
-bool	extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena,
+bool extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
     size_t length);
-extent_t	*extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
+extent_t *extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a,
-    size_t usize_a, size_t size_b, size_t usize_b);
-bool	extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
+    szind_t szind_a, size_t size_b, szind_t szind_b);
+bool extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b);
 
-bool	extent_boot(void);
+bool extent_boot(void);
 
 #endif /* JEMALLOC_INTERNAL_EXTENT_EXTERNS_H */
diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index 989c0d19..549c8f2f 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -2,37 +2,38 @@
 #define JEMALLOC_INTERNAL_EXTENT_INLINES_H
 
 #ifndef JEMALLOC_ENABLE_INLINE
-extent_t	*extent_lookup(tsdn_t *tsdn, const void *ptr, bool dependent);
-arena_t	*extent_arena_get(const extent_t *extent);
-void	*extent_base_get(const extent_t *extent);
-void	*extent_addr_get(const extent_t *extent);
-size_t	extent_size_get(const extent_t *extent);
-size_t	extent_usize_get(const extent_t *extent);
-void	*extent_before_get(const extent_t *extent);
-void	*extent_last_get(const extent_t *extent);
-void	*extent_past_get(const extent_t *extent);
-size_t	extent_sn_get(const extent_t *extent);
-extent_state_t	extent_state_get(const extent_t *extent);
-bool	extent_zeroed_get(const extent_t *extent);
-bool	extent_committed_get(const extent_t *extent);
-bool	extent_slab_get(const extent_t *extent);
-arena_slab_data_t	*extent_slab_data_get(extent_t *extent);
-const arena_slab_data_t	*extent_slab_data_get_const(const extent_t *extent);
-prof_tctx_t	*extent_prof_tctx_get(const extent_t *extent);
-void	extent_arena_set(extent_t *extent, arena_t *arena);
-void	extent_addr_set(extent_t *extent, void *addr);
-void	extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment);
-void	extent_size_set(extent_t *extent, size_t size);
-void	extent_usize_set(extent_t *extent, size_t usize);
-void	extent_sn_set(extent_t *extent, size_t sn);
-void	extent_state_set(extent_t *extent, extent_state_t state);
-void	extent_zeroed_set(extent_t *extent, bool zeroed);
-void	extent_committed_set(extent_t *extent, bool committed);
-void	extent_slab_set(extent_t *extent, bool slab);
-void	extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx);
-void	extent_init(extent_t *extent, arena_t *arena, void *addr,
-    size_t size, size_t usize, size_t sn, extent_state_t state, bool zeroed,
-    bool committed, bool slab);
+extent_t *extent_lookup(tsdn_t *tsdn, const void *ptr, bool dependent);
+arena_t *extent_arena_get(const extent_t *extent);
+void *extent_base_get(const extent_t *extent);
+void *extent_addr_get(const extent_t *extent);
+size_t extent_size_get(const extent_t *extent);
+szind_t extent_szind_get(const extent_t *extent);
+size_t extent_usize_get(const extent_t *extent);
+void *extent_before_get(const extent_t *extent);
+void *extent_last_get(const extent_t *extent);
+void *extent_past_get(const extent_t *extent);
+size_t extent_sn_get(const extent_t *extent);
+extent_state_t extent_state_get(const extent_t *extent);
+bool extent_zeroed_get(const extent_t *extent);
+bool extent_committed_get(const extent_t *extent);
+bool extent_slab_get(const extent_t *extent);
+arena_slab_data_t *extent_slab_data_get(extent_t *extent);
+const arena_slab_data_t *extent_slab_data_get_const(const extent_t *extent);
+prof_tctx_t *extent_prof_tctx_get(const extent_t *extent);
+void extent_arena_set(extent_t *extent, arena_t *arena);
+void extent_addr_set(extent_t *extent, void *addr);
+void extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment);
+void extent_size_set(extent_t *extent, size_t size);
+void extent_szind_set(extent_t *extent, szind_t szind);
+void extent_sn_set(extent_t *extent, size_t sn);
+void extent_state_set(extent_t *extent, extent_state_t state);
+void extent_zeroed_set(extent_t *extent, bool zeroed);
+void extent_committed_set(extent_t *extent, bool committed);
+void extent_slab_set(extent_t *extent, bool slab);
+void extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx);
+void extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
+    bool slab, szind_t szind, size_t sn, extent_state_t state, bool zeroed,
+    bool committed);
 void extent_list_init(extent_list_t *list);
 extent_t *extent_list_first(const extent_list_t *list);
 extent_t *extent_list_last(const extent_list_t *list);
@@ -40,9 +41,9 @@ void extent_list_append(extent_list_t *list, extent_t *extent);
 void extent_list_replace(extent_list_t *list, extent_t *to_remove,
     extent_t *to_insert);
 void extent_list_remove(extent_list_t *list, extent_t *extent);
-int	extent_sn_comp(const extent_t *a, const extent_t *b);
-int	extent_ad_comp(const extent_t *a, const extent_t *b);
-int	extent_snad_comp(const extent_t *a, const extent_t *b);
+int extent_sn_comp(const extent_t *a, const extent_t *b);
+int extent_ad_comp(const extent_t *a, const extent_t *b);
+int extent_snad_comp(const extent_t *a, const extent_t *b);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_EXTENT_C_))
@@ -79,10 +80,15 @@ extent_size_get(const extent_t *extent) {
 	return extent->e_size;
 }
 
+JEMALLOC_INLINE szind_t
+extent_szind_get(const extent_t *extent) {
+	assert(extent->e_szind < NSIZES); /* Never call when "invalid". */
+	return extent->e_szind;
+}
+
 JEMALLOC_INLINE size_t
 extent_usize_get(const extent_t *extent) {
-	assert(!extent->e_slab);
-	return extent->e_usize;
+	return index2size(extent_szind_get(extent));
 }
 
 JEMALLOC_INLINE void *
@@ -180,8 +186,9 @@ extent_size_set(extent_t *extent, size_t size) {
 }
 
 JEMALLOC_INLINE void
-extent_usize_set(extent_t *extent, size_t usize) {
-	extent->e_usize = usize;
+extent_szind_set(extent_t *extent, szind_t szind) {
+	assert(szind <= NSIZES); /* NSIZES means "invalid". */
+	extent->e_szind = szind;
 }
 
 JEMALLOC_INLINE void
@@ -216,19 +223,19 @@ extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx) {
 
 JEMALLOC_INLINE void
 extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
-    size_t usize, size_t sn, extent_state_t state, bool zeroed, bool committed,
-    bool slab) {
+    bool slab, szind_t szind, size_t sn, extent_state_t state, bool zeroed,
+    bool committed) {
 	assert(addr == PAGE_ADDR2BASE(addr) || !slab);
 
 	extent_arena_set(extent, arena);
 	extent_addr_set(extent, addr);
 	extent_size_set(extent, size);
-	extent_usize_set(extent, usize);
+	extent_slab_set(extent, slab);
+	extent_szind_set(extent, szind);
 	extent_sn_set(extent, sn);
 	extent_state_set(extent, state);
 	extent_zeroed_set(extent, zeroed);
 	extent_committed_set(extent, committed);
-	extent_slab_set(extent, slab);
 	if (config_prof) {
 		extent_prof_tctx_set(extent, NULL);
 	}
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index 001b7c13..82cfa58a 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -20,10 +20,12 @@ struct extent_s {
 	size_t			e_size;
 
 	/*
-	 * Usable size, typically smaller than extent size due to large_pad or
+	 * Usable size class index for allocations residing in this extent,
+	 * regardless of whether the extent is a slab.  Extent size and usable
+	 * size often differ even for non-slabs, either due to large_pad or
 	 * promotion of sampled small regions.
 	 */
-	size_t			e_usize;
+	szind_t			e_szind;
 
 	/*
 	 * Serial number (potentially non-unique).
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 97b41bb0..b184380d 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -536,8 +536,6 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/witness_inlines.h"
 #include "jemalloc/internal/mutex_inlines.h"
 #include "jemalloc/internal/rtree_inlines.h"
-#include "jemalloc/internal/extent_inlines.h"
-#include "jemalloc/internal/base_inlines.h"
 
 #ifndef JEMALLOC_ENABLE_INLINE
 pszind_t	psz2ind(size_t psz);
@@ -565,7 +563,6 @@ ticker_t	*decay_ticker_get(tsd_t *tsd, unsigned ind);
 malloc_cpuid_t	malloc_getcpu(void);
 unsigned	percpu_arena_choose(void);
 unsigned	percpu_arena_ind_limit(void);
-
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
@@ -882,8 +879,6 @@ percpu_arena_ind_limit(void) {
 	}
 }
 
-
-
 JEMALLOC_INLINE arena_tdata_t *
 arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing) {
 	arena_tdata_t *tdata;
@@ -938,6 +933,8 @@ decay_ticker_get(tsd_t *tsd, unsigned ind) {
 }
 #endif
 
+#include "jemalloc/internal/extent_inlines.h"
+#include "jemalloc/internal/base_inlines.h"
 #include "jemalloc/internal/bitmap_inlines.h"
 /*
  * Include portions of arena code interleaved with tcache code in order to
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 5ca72818..d68f6b61 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -191,8 +191,9 @@ extent_snad_comp
 extent_split_wrapper
 extent_state_get
 extent_state_set
+extent_szind_get
+extent_szind_set
 extent_usize_get
-extent_usize_set
 extent_zeroed_get
 extent_zeroed_set
 extents_alloc
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index a90107f9..fd7e1764 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -15,7 +15,7 @@ void	*tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
 void	tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr,
     szind_t binind, bool slow_path);
 void	tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr,
-    size_t size, bool slow_path);
+    szind_t binind, bool slow_path);
 tcache_t	*tcaches_get(tsd_t *tsd, unsigned ind);
 #endif
 
@@ -271,19 +271,16 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, size_t size,
+tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
     bool slow_path) {
-	szind_t binind;
 	tcache_bin_t *tbin;
 	tcache_bin_info_t *tbin_info;
 
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr) > SMALL_MAXCLASS);
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_maxclass);
 
-	binind = size2index(size);
-
 	if (slow_path && config_fill && unlikely(opt_junk_free)) {
-		large_dalloc_junk(ptr, size);
+		large_dalloc_junk(ptr, index2size(binind));
 	}
 
 	tbin = &tcache->tbins[binind];
diff --git a/src/arena.c b/src/arena.c
index d861fff6..2c432e6a 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -449,25 +449,25 @@ arena_large_ralloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t oldusize,
 extent_t *
 arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool *zero) {
-	extent_t *extent;
 	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
 
 	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 
+	szind_t szind = size2index(usize);
 	size_t mapped_add;
 	bool commit = true;
-	extent = extents_alloc(tsdn, arena, &extent_hooks,
-	    &arena->extents_dirty, NULL, usize, large_pad, alignment, zero,
-	    &commit, false);
+	extent_t *extent = extents_alloc(tsdn, arena, &extent_hooks,
+	    &arena->extents_dirty, NULL, usize, large_pad, alignment, false,
+	    szind, zero, &commit);
 	if (extent == NULL) {
 		extent = extents_alloc(tsdn, arena, &extent_hooks,
 		    &arena->extents_muzzy, NULL, usize, large_pad, alignment,
-		    zero, &commit, false);
+		    false, szind, zero, &commit);
 	}
 	size_t size = usize + large_pad;
 	if (extent == NULL) {
 		extent = extent_alloc_wrapper(tsdn, arena, &extent_hooks, NULL,
-		    usize, large_pad, alignment, zero, &commit, false);
+		    usize, large_pad, alignment, false, szind, zero, &commit);
 		if (config_stats) {
 			/*
 			 * extent may be NULL on OOM, but in that case
@@ -1133,7 +1133,8 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
 
 static extent_t *
 arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, const arena_bin_info_t *bin_info) {
+    extent_hooks_t **r_extent_hooks, const arena_bin_info_t *bin_info,
+    szind_t szind) {
 	extent_t *slab;
 	bool zero, commit;
 
@@ -1142,7 +1143,7 @@ arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena,
 	zero = false;
 	commit = true;
 	slab = extent_alloc_wrapper(tsdn, arena, r_extent_hooks, NULL,
-	    bin_info->slab_size, 0, PAGE, &zero, &commit, true);
+	    bin_info->slab_size, 0, PAGE, true, szind, &zero, &commit);
 
 	if (config_stats && slab != NULL) {
 		arena_stats_mapped_add(tsdn, &arena->stats,
@@ -1158,19 +1159,20 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 
 	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
+	szind_t szind = size2index(bin_info->reg_size);
 	bool zero = false;
 	bool commit = true;
 	extent_t *slab = extents_alloc(tsdn, arena, &extent_hooks,
-	    &arena->extents_dirty, NULL, bin_info->slab_size, 0, PAGE, &zero,
-	    &commit, true);
+	    &arena->extents_dirty, NULL, bin_info->slab_size, 0, PAGE, true,
+	    binind, &zero, &commit);
 	if (slab == NULL) {
 		slab = extents_alloc(tsdn, arena, &extent_hooks,
 		    &arena->extents_muzzy, NULL, bin_info->slab_size, 0, PAGE,
-		    &zero, &commit, true);
+		    true, binind, &zero, &commit);
 	}
 	if (slab == NULL) {
 		slab = arena_slab_alloc_hard(tsdn, arena, &extent_hooks,
-		    bin_info);
+		    bin_info, szind);
 		if (slab == NULL) {
 			return NULL;
 		}
@@ -1467,7 +1469,7 @@ arena_prof_promote(tsdn_t *tsdn, extent_t *extent, const void *ptr,
 	assert(isalloc(tsdn, extent, ptr) == LARGE_MINCLASS);
 	assert(usize <= SMALL_MAXCLASS);
 
-	extent_usize_set(extent, usize);
+	extent_szind_set(extent, size2index(usize));
 
 	prof_accum_cancel(tsdn, &arena->prof_accum, usize);
 
@@ -1479,7 +1481,7 @@ arena_prof_demote(tsdn_t *tsdn, extent_t *extent, const void *ptr) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	extent_usize_set(extent, LARGE_MINCLASS);
+	extent_szind_set(extent, NBINS);
 
 	assert(isalloc(tsdn, extent, ptr) == LARGE_MINCLASS);
 
@@ -1496,8 +1498,8 @@ arena_dalloc_promoted(tsdn_t *tsdn, extent_t *extent, void *ptr,
 
 	usize = arena_prof_demote(tsdn, extent, ptr);
 	if (usize <= tcache_maxclass) {
-		tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr, usize,
-		    slow_path);
+		tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
+		    size2index(usize), slow_path);
 	} else {
 		large_dalloc(tsdn, extent);
 	}
diff --git a/src/base.c b/src/base.c
index e7712a64..b1a4ae37 100644
--- a/src/base.c
+++ b/src/base.c
@@ -87,8 +87,8 @@ base_extent_init(size_t *extent_sn_next, extent_t *extent, void *addr,
 	sn = *extent_sn_next;
 	(*extent_sn_next)++;
 
-	extent_init(extent, NULL, addr, size, 0, sn, extent_state_active, true,
-	    true, false);
+	extent_init(extent, NULL, addr, size, false, NSIZES, sn,
+	    extent_state_active, true, true);
 }
 
 static void *
@@ -104,8 +104,9 @@ base_extent_bump_alloc_helper(extent_t *extent, size_t *gap_size, size_t size,
 	ret = (void *)((uintptr_t)extent_addr_get(extent) + *gap_size);
 	assert(extent_size_get(extent) >= *gap_size + size);
 	extent_init(extent, NULL, (void *)((uintptr_t)extent_addr_get(extent) +
-	    *gap_size + size), extent_size_get(extent) - *gap_size - size, 0,
-	    extent_sn_get(extent), extent_state_active, true, true, false);
+	    *gap_size + size), extent_size_get(extent) - *gap_size - size,
+	    false, NSIZES, extent_sn_get(extent), extent_state_active, true,
+	    true);
 	return ret;
 }
 
diff --git a/src/extent.c b/src/extent.c
index c690b98e..31dcbd76 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -71,8 +71,8 @@ static size_t	highpages;
 static void extent_deregister(tsdn_t *tsdn, extent_t *extent);
 static extent_t *extent_recycle(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extents_t *extents, void *new_addr,
-    size_t usize, size_t pad, size_t alignment, bool *zero, bool *commit,
-    bool slab);
+    size_t usize, size_t pad, size_t alignment, bool slab, szind_t szind,
+    bool *zero, bool *commit);
 static extent_t *extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
     extent_t *extent, bool *coalesced);
@@ -299,14 +299,14 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena,
 
 extent_t *
 extents_alloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    extents_t *extents, void *new_addr, size_t usize, size_t pad,
-    size_t alignment, bool *zero, bool *commit, bool slab) {
-	assert(usize + pad != 0);
+    extents_t *extents, void *new_addr, size_t size, size_t pad,
+    size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit) {
+	assert(size + pad != 0);
 	assert(alignment != 0);
 	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 
 	return extent_recycle(tsdn, arena, r_extent_hooks, extents, new_addr,
-	    usize, pad, alignment, zero, commit, slab);
+	    size, pad, alignment, slab, szind, zero, commit);
 }
 
 void
@@ -615,8 +615,8 @@ extent_deregister(tsdn_t *tsdn, extent_t *extent) {
 static extent_t *
 extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
-    bool locked, void *new_addr, size_t usize, size_t pad, size_t alignment,
-    bool *zero, bool *commit) {
+    bool locked, void *new_addr, size_t size, size_t pad, size_t alignment,
+    bool slab, bool *zero, bool *commit) {
 	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, locked ? 1 : 0);
 	if (locked) {
 		malloc_mutex_assert_owner(tsdn, &extents->mtx);
@@ -639,10 +639,10 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
 		assert(alignment <= PAGE);
 	}
 
-	size_t size = usize + pad;
-	size_t alloc_size = size + PAGE_CEILING(alignment) - PAGE;
+	size_t esize = size + pad;
+	size_t alloc_size = esize + PAGE_CEILING(alignment) - PAGE;
 	/* Beware size_t wrap-around. */
-	if (alloc_size < usize) {
+	if (alloc_size < esize) {
 		return NULL;
 	}
 	if (!locked) {
@@ -661,7 +661,7 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
 			if (extent != NULL) {
 				assert(extent_base_get(extent) == new_addr);
 				if (extent_arena_get(extent) != arena ||
-				    extent_size_get(extent) < size ||
+				    extent_size_get(extent) < esize ||
 				    extent_state_get(extent) !=
 				    extents_state_get(extents)) {
 					extent = NULL;
@@ -700,21 +700,20 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
 static extent_t *
 extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
-    void *new_addr, size_t usize, size_t pad, size_t alignment,
-    extent_t *extent) {
-	size_t size = usize + pad;
+    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+    szind_t szind, extent_t *extent) {
+	size_t esize = size + pad;
 	size_t leadsize = ALIGNMENT_CEILING((uintptr_t)extent_base_get(extent),
 	    PAGE_CEILING(alignment)) - (uintptr_t)extent_base_get(extent);
 	assert(new_addr == NULL || leadsize == 0);
-	assert(extent_size_get(extent) >= leadsize + size);
-	size_t trailsize = extent_size_get(extent) - leadsize - size;
+	assert(extent_size_get(extent) >= leadsize + esize);
+	size_t trailsize = extent_size_get(extent) - leadsize - esize;
 
 	/* Split the lead. */
 	if (leadsize != 0) {
 		extent_t *lead = extent;
 		extent = extent_split_wrapper(tsdn, arena, r_extent_hooks,
-		    lead, leadsize, leadsize, size + trailsize, usize +
-		    trailsize);
+		    lead, leadsize, NSIZES, esize + trailsize, szind);
 		if (extent == NULL) {
 			extent_deregister(tsdn, lead);
 			extents_leak(tsdn, arena, r_extent_hooks, extents,
@@ -727,7 +726,7 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
 	/* Split the trail. */
 	if (trailsize != 0) {
 		extent_t *trail = extent_split_wrapper(tsdn, arena,
-		    r_extent_hooks, extent, size, usize, trailsize, trailsize);
+		    r_extent_hooks, extent, esize, szind, trailsize, NSIZES);
 		if (trail == NULL) {
 			extent_deregister(tsdn, extent);
 			extents_leak(tsdn, arena, r_extent_hooks, extents,
@@ -737,10 +736,10 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
 		extent_deactivate(tsdn, arena, extents, trail, false);
 	} else if (leadsize == 0) {
 		/*
-		 * Splitting causes usize to be set as a side effect, but no
+		 * Splitting causes szind to be set as a side effect, but no
 		 * splitting occurred.
 		 */
-		extent_usize_set(extent, usize);
+		extent_szind_set(extent, szind);
 	}
 
 	return extent;
@@ -748,24 +747,25 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
 
 static extent_t *
 extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    extents_t *extents, void *new_addr, size_t usize, size_t pad,
-    size_t alignment, bool *zero, bool *commit, bool slab) {
+    extents_t *extents, void *new_addr, size_t size, size_t pad,
+    size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit) {
 	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 	assert(new_addr == NULL || !slab);
 	assert(pad == 0 || !slab);
+	assert(!*zero || !slab);
 
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
 	extent_t *extent = extent_recycle_extract(tsdn, arena, r_extent_hooks,
-	    rtree_ctx, extents, false, new_addr, usize, pad, alignment, zero,
-	    commit);
+	    rtree_ctx, extents, false, new_addr, size, pad, alignment, slab,
+	    zero, commit);
 	if (extent == NULL) {
 		return NULL;
 	}
 
 	extent = extent_recycle_split(tsdn, arena, r_extent_hooks, rtree_ctx,
-	    extents, new_addr, usize, pad, alignment, extent);
+	    extents, new_addr, size, pad, alignment, slab, szind, extent);
 	if (extent == NULL) {
 		return NULL;
 	}
@@ -790,18 +790,15 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	}
 
 	if (*zero) {
+		void *addr = extent_base_get(extent);
+		size_t size = extent_size_get(extent);
 		if (!extent_zeroed_get(extent)) {
-			if (pages_purge_forced(extent_base_get(extent),
-			    extent_size_get(extent))) {
-				memset(extent_addr_get(extent), 0,
-				    extent_usize_get(extent));
+			if (pages_purge_forced(addr, size)) {
+				memset(addr, 0, size);
 			}
 		} else if (config_debug) {
-			size_t i;
-			size_t *p = (size_t *)(uintptr_t)
-			    extent_addr_get(extent);
-
-			for (i = 0; i < usize / sizeof(size_t); i++) {
+			size_t *p = (size_t *)(uintptr_t)addr;
+			for (size_t i = 0; i < size / sizeof(size_t); i++) {
 				assert(p[i] == 0);
 			}
 		}
@@ -882,12 +879,10 @@ extent_alloc_default(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
  */
 static extent_t *
 extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
-    size_t alignment, bool *zero, bool *commit, bool slab) {
-	extent_t *extent;
-	void *ptr;
-	size_t size, alloc_size, alloc_size_min, leadsize, trailsize;
-	bool zeroed, committed;
+    extent_hooks_t **r_extent_hooks, void *new_addr, size_t size, size_t pad,
+    size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit) {
+	assert(pad == 0 || !slab);
+	assert(!*zero || !slab);
 
 	/*
 	 * Check whether the next extent size in the series would be large
@@ -895,37 +890,37 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	 * series of unsatisfiable allocation requests doesn't cause unused
 	 * extent creation as a side effect.
 	 */
-	size = usize + pad;
-	alloc_size = pind2sz(atomic_read_u(&arena->extent_grow_next));
-	alloc_size_min = size + PAGE_CEILING(alignment) - PAGE;
+	size_t esize = size + pad;
+	size_t alloc_size = pind2sz(atomic_read_u(&arena->extent_grow_next));
+	size_t alloc_size_min = esize + PAGE_CEILING(alignment) - PAGE;
 	/* Beware size_t wrap-around. */
-	if (alloc_size_min < usize) {
+	if (alloc_size_min < esize) {
 		return NULL;
 	}
 	if (alloc_size < alloc_size_min) {
 		return NULL;
 	}
-	extent = extent_alloc(tsdn, arena);
+	extent_t *extent = extent_alloc(tsdn, arena);
 	if (extent == NULL) {
 		return NULL;
 	}
-	zeroed = false;
-	committed = false;
-	ptr = extent_alloc_core(tsdn, arena, new_addr, alloc_size, PAGE,
+	bool zeroed = false;
+	bool committed = false;
+	void *ptr = extent_alloc_core(tsdn, arena, new_addr, alloc_size, PAGE,
 	    &zeroed, &committed, arena->dss_prec);
-	extent_init(extent, arena, ptr, alloc_size, alloc_size,
+	extent_init(extent, arena, ptr, alloc_size, false, NSIZES,
 	    arena_extent_sn_next(arena), extent_state_active, zeroed,
-	    committed, false);
+	    committed);
 	if (ptr == NULL || extent_register_no_gdump_add(tsdn, extent)) {
 		extent_dalloc(tsdn, arena, extent);
 		return NULL;
 	}
 
-	leadsize = ALIGNMENT_CEILING((uintptr_t)ptr, PAGE_CEILING(alignment)) -
-	    (uintptr_t)ptr;
+	size_t leadsize = ALIGNMENT_CEILING((uintptr_t)ptr,
+	    PAGE_CEILING(alignment)) - (uintptr_t)ptr;
 	assert(new_addr == NULL || leadsize == 0);
-	assert(alloc_size >= leadsize + size);
-	trailsize = alloc_size - leadsize - size;
+	assert(alloc_size >= leadsize + esize);
+	size_t trailsize = alloc_size - leadsize - esize;
 	if (extent_zeroed_get(extent)) {
 		*zero = true;
 	}
@@ -937,7 +932,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	if (leadsize != 0) {
 		extent_t *lead = extent;
 		extent = extent_split_wrapper(tsdn, arena, r_extent_hooks, lead,
-		    leadsize, leadsize, size + trailsize, usize + trailsize);
+		    leadsize, NSIZES, esize + trailsize, szind);
 		if (extent == NULL) {
 			extent_deregister(tsdn, lead);
 			extents_leak(tsdn, arena, r_extent_hooks, false, lead);
@@ -950,7 +945,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	/* Split the trail. */
 	if (trailsize != 0) {
 		extent_t *trail = extent_split_wrapper(tsdn, arena,
-		    r_extent_hooks, extent, size, usize, trailsize, trailsize);
+		    r_extent_hooks, extent, esize, szind, trailsize, NSIZES);
 		if (trail == NULL) {
 			extent_deregister(tsdn, extent);
 			extents_leak(tsdn, arena, r_extent_hooks,
@@ -961,10 +956,10 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 		    &arena->extents_retained, trail);
 	} else if (leadsize == 0) {
 		/*
-		 * Splitting causes usize to be set as a side effect, but no
+		 * Splitting causes szind to be set as a side effect, but no
 		 * splitting occurred.
 		 */
-		extent_usize_set(extent, usize);
+		extent_szind_set(extent, szind);
 	}
 
 	if (*commit && !extent_committed_get(extent)) {
@@ -993,10 +988,10 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 		extent_interior_register(tsdn, rtree_ctx, extent);
 	}
 	if (*zero && !extent_zeroed_get(extent)) {
-		if (pages_purge_forced(extent_base_get(extent),
-		    extent_size_get(extent))) {
-			memset(extent_addr_get(extent), 0,
-			    extent_usize_get(extent));
+		void *addr = extent_base_get(extent);
+		size_t size = extent_size_get(extent);
+		if (pages_purge_forced(addr, size)) {
+			memset(addr, 0, size);
 		}
 	}
 	/*
@@ -1019,16 +1014,16 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 
 static extent_t *
 extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
-    size_t alignment, bool *zero, bool *commit, bool slab) {
+    extent_hooks_t **r_extent_hooks, void *new_addr, size_t size, size_t pad,
+    size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit) {
 	extent_t *extent;
 
-	assert(usize != 0);
+	assert(size != 0);
 	assert(alignment != 0);
 
 	extent = extent_recycle(tsdn, arena, r_extent_hooks,
-	    &arena->extents_retained, new_addr, usize, pad, alignment, zero,
-	    commit, slab);
+	    &arena->extents_retained, new_addr, size, pad, alignment, slab,
+	    szind, zero, commit);
 	if (extent != NULL) {
 		if (config_prof) {
 			extent_gdump_add(tsdn, extent);
@@ -1036,7 +1031,7 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
 	}
 	if (!config_munmap && extent == NULL) {
 		extent = extent_grow_retained(tsdn, arena, r_extent_hooks,
-		    new_addr, usize, pad, alignment, zero, commit, slab);
+		    new_addr, size, pad, alignment, slab, szind, zero, commit);
 	}
 
 	return extent;
@@ -1044,32 +1039,28 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
 
 static extent_t *
 extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
-    size_t alignment, bool *zero, bool *commit, bool slab) {
-	extent_t *extent;
-	size_t size;
-	void *addr;
-
-	size = usize + pad;
-	extent = extent_alloc(tsdn, arena);
+    extent_hooks_t **r_extent_hooks, void *new_addr, size_t size, size_t pad,
+    size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit) {
+	size_t esize = size + pad;
+	extent_t *extent = extent_alloc(tsdn, arena);
 	if (extent == NULL) {
 		return NULL;
 	}
+	void *addr;
 	if (*r_extent_hooks == &extent_hooks_default) {
 		/* Call directly to propagate tsdn. */
-		addr = extent_alloc_default_impl(tsdn, arena, new_addr, size,
+		addr = extent_alloc_default_impl(tsdn, arena, new_addr, esize,
 		    alignment, zero, commit);
 	} else {
-		addr = (*r_extent_hooks)->alloc(*r_extent_hooks, new_addr, size,
-		    alignment, zero, commit, arena_ind_get(arena));
+		addr = (*r_extent_hooks)->alloc(*r_extent_hooks, new_addr,
+		    esize, alignment, zero, commit, arena_ind_get(arena));
 	}
 	if (addr == NULL) {
 		extent_dalloc(tsdn, arena, extent);
 		return NULL;
 	}
-	extent_init(extent, arena, addr, size, usize,
-	    arena_extent_sn_next(arena), extent_state_active, zero, commit,
-	    slab);
+	extent_init(extent, arena, addr, esize, slab, szind,
+	    arena_extent_sn_next(arena), extent_state_active, zero, commit);
 	if (pad != 0) {
 		extent_addr_randomize(tsdn, extent, alignment);
 	}
@@ -1084,17 +1075,17 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 
 extent_t *
 extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, void *new_addr, size_t usize, size_t pad,
-    size_t alignment, bool *zero, bool *commit, bool slab) {
+    extent_hooks_t **r_extent_hooks, void *new_addr, size_t size, size_t pad,
+    size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit) {
 	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 
 	extent_t *extent = extent_alloc_retained(tsdn, arena, r_extent_hooks,
-	    new_addr, usize, pad, alignment, zero, commit, slab);
+	    new_addr, size, pad, alignment, slab, szind, zero, commit);
 	if (extent == NULL) {
 		extent = extent_alloc_wrapper_hard(tsdn, arena, r_extent_hooks,
-		    new_addr, usize, pad, alignment, zero, commit, slab);
+		    new_addr, size, pad, alignment, slab, szind, zero, commit);
 	}
 
 	return extent;
@@ -1232,7 +1223,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	malloc_mutex_lock(tsdn, &extents->mtx);
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 
-	extent_usize_set(extent, 0);
+	extent_szind_set(extent, NSIZES);
 	if (extent_slab_get(extent)) {
 		extent_interior_deregister(tsdn, rtree_ctx, extent);
 		extent_slab_set(extent, false);
@@ -1474,7 +1465,7 @@ extent_split_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
 extent_t *
 extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a,
-    size_t usize_a, size_t size_b, size_t usize_b) {
+    szind_t szind_a, size_t size_b, szind_t szind_b) {
 	assert(extent_size_get(extent) == size_a + size_b);
 	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 
@@ -1498,9 +1489,9 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
 		extent_t lead;
 
 		extent_init(&lead, arena, extent_addr_get(extent), size_a,
-		    usize_a, extent_sn_get(extent), extent_state_get(extent),
-		    extent_zeroed_get(extent), extent_committed_get(extent),
-		    extent_slab_get(extent));
+		    extent_slab_get(extent), szind_a, extent_sn_get(extent),
+		    extent_state_get(extent), extent_zeroed_get(extent),
+		    extent_committed_get(extent));
 
 		if (extent_rtree_acquire(tsdn, rtree_ctx, &lead, false, true,
 		    &lead_elm_a, &lead_elm_b)) {
@@ -1509,9 +1500,9 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
 	}
 
 	extent_init(trail, arena, (void *)((uintptr_t)extent_base_get(extent) +
-	    size_a), size_b, usize_b, extent_sn_get(extent),
-	    extent_state_get(extent), extent_zeroed_get(extent),
-	    extent_committed_get(extent), extent_slab_get(extent));
+	    size_a), size_b, extent_slab_get(extent), szind_b,
+	    extent_sn_get(extent), extent_state_get(extent),
+	    extent_zeroed_get(extent), extent_committed_get(extent));
 	if (extent_rtree_acquire(tsdn, rtree_ctx, trail, false, true,
 	    &trail_elm_a, &trail_elm_b)) {
 		goto label_error_c;
@@ -1524,7 +1515,7 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
 	}
 
 	extent_size_set(extent, size_a);
-	extent_usize_set(extent, usize_a);
+	extent_szind_set(extent, szind_a);
 
 	extent_rtree_write_acquired(tsdn, lead_elm_a, lead_elm_b, extent);
 	extent_rtree_write_acquired(tsdn, trail_elm_a, trail_elm_b, trail);
@@ -1617,7 +1608,7 @@ extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
 	}
 
 	extent_size_set(a, extent_size_get(a) + extent_size_get(b));
-	extent_usize_set(a, extent_usize_get(a) + extent_usize_get(b));
+	extent_szind_set(a, NSIZES);
 	extent_sn_set(a, (extent_sn_get(a) < extent_sn_get(b)) ?
 	    extent_sn_get(a) : extent_sn_get(b));
 	extent_zeroed_set(a, extent_zeroed_get(a) && extent_zeroed_get(b));
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 50825713..5074594e 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -139,9 +139,9 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			    (uintptr_t)gap_addr_page;
 			if (gap_size_page != 0) {
 				extent_init(gap, arena, gap_addr_page,
-				    gap_size_page, gap_size_page,
+				    gap_size_page, false, NSIZES,
 				    arena_extent_sn_next(arena),
-				    extent_state_active, false, true, false);
+				    extent_state_active, false, true);
 			}
 			/*
 			 * Compute the address just past the end of the desired
@@ -189,8 +189,8 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					extent_t extent;
 
 					extent_init(&extent, arena, ret, size,
-					    size, 0, extent_state_active, false,
-					    true, false);
+					    size, false, NSIZES,
+					    extent_state_active, false, true);
 					if (extent_purge_forced_wrapper(tsdn,
 					    arena, &extent_hooks, &extent, 0,
 					    size)) {
diff --git a/src/large.c b/src/large.c
index c578995c..0e9f0d72 100644
--- a/src/large.c
+++ b/src/large.c
@@ -114,15 +114,15 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize) {
 	/* Split excess pages. */
 	if (diff != 0) {
 		extent_t *trail = extent_split_wrapper(tsdn, arena,
-		    &extent_hooks, extent, usize + large_pad, usize, diff,
-		    diff);
+		    &extent_hooks, extent, usize + large_pad, size2index(usize),
+		    diff, NSIZES);
 		if (trail == NULL) {
 			return true;
 		}
 
 		if (config_fill && unlikely(opt_junk_free)) {
 			large_dalloc_maybe_junk(extent_addr_get(trail),
-			    extent_usize_get(trail));
+			    extent_size_get(trail));
 		}
 
 		arena_extents_dirty_dalloc(tsdn, arena, &extent_hooks, trail);
@@ -139,7 +139,7 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 	arena_t *arena = extent_arena_get(extent);
 	size_t oldusize = extent_usize_get(extent);
 	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
-	size_t trailsize = usize - extent_usize_get(extent);
+	size_t trailsize = usize - oldusize;
 
 	if (extent_hooks->merge == NULL) {
 		return true;
@@ -160,17 +160,17 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 	bool new_mapping;
 	if ((trail = extents_alloc(tsdn, arena, &extent_hooks,
 	    &arena->extents_dirty, extent_past_get(extent), trailsize, 0,
-	    CACHELINE, &is_zeroed_trail, &commit, false)) != NULL
+	    CACHELINE, false, NSIZES, &is_zeroed_trail, &commit)) != NULL
 	    || (trail = extents_alloc(tsdn, arena, &extent_hooks,
 	    &arena->extents_muzzy, extent_past_get(extent), trailsize, 0,
-	    CACHELINE, &is_zeroed_trail, &commit, false)) != NULL) {
+	    CACHELINE, false, NSIZES, &is_zeroed_trail, &commit)) != NULL) {
 		if (config_stats) {
 			new_mapping = false;
 		}
 	} else {
 		if ((trail = extent_alloc_wrapper(tsdn, arena, &extent_hooks,
-		    extent_past_get(extent), trailsize, 0, CACHELINE,
-		    &is_zeroed_trail, &commit, false)) == NULL) {
+		    extent_past_get(extent), trailsize, 0, CACHELINE, false,
+		    NSIZES, &is_zeroed_trail, &commit)) == NULL) {
 			return true;
 		}
 		if (config_stats) {
@@ -182,6 +182,7 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 		extent_dalloc_wrapper(tsdn, arena, &extent_hooks, trail);
 		return true;
 	}
+	extent_szind_set(extent, size2index(usize));
 
 	if (config_stats && new_mapping) {
 		arena_stats_mapped_add(tsdn, &arena->stats, trailsize);
@@ -218,14 +219,14 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 bool
 large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
     size_t usize_max, bool zero) {
-	assert(s2u(extent_usize_get(extent)) == extent_usize_get(extent));
+	size_t oldusize = extent_usize_get(extent);
+
 	/* The following should have been caught by callers. */
 	assert(usize_min > 0 && usize_max <= LARGE_MAXCLASS);
 	/* Both allocation sizes must be large to avoid a move. */
-	assert(extent_usize_get(extent) >= LARGE_MINCLASS && usize_max >=
-	    LARGE_MINCLASS);
+	assert(oldusize >= LARGE_MINCLASS && usize_max >= LARGE_MINCLASS);
 
-	if (usize_max > extent_usize_get(extent)) {
+	if (usize_max > oldusize) {
 		/* Attempt to expand the allocation in-place. */
 		if (!large_ralloc_no_move_expand(tsdn, extent, usize_max,
 		    zero)) {
@@ -233,8 +234,7 @@ large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
 			return false;
 		}
 		/* Try again, this time with usize_min. */
-		if (usize_min < usize_max && usize_min >
-		    extent_usize_get(extent) &&
+		if (usize_min < usize_max && usize_min > oldusize &&
 		    large_ralloc_no_move_expand(tsdn, extent, usize_min,
 		    zero)) {
 			arena_decay_tick(tsdn, extent_arena_get(extent));
@@ -246,14 +246,13 @@ large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
 	 * Avoid moving the allocation if the existing extent size accommodates
 	 * the new size.
 	 */
-	if (extent_usize_get(extent) >= usize_min && extent_usize_get(extent) <=
-	    usize_max) {
+	if (oldusize >= usize_min && oldusize <= usize_max) {
 		arena_decay_tick(tsdn, extent_arena_get(extent));
 		return false;
 	}
 
 	/* Attempt to shrink the allocation in-place. */
-	if (extent_usize_get(extent) > usize_max) {
+	if (oldusize > usize_max) {
 		if (!large_ralloc_no_move_shrink(tsdn, extent, usize_max)) {
 			arena_decay_tick(tsdn, extent_arena_get(extent));
 			return false;
@@ -274,14 +273,12 @@ large_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
 void *
 large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache) {
-	void *ret;
-	size_t copysize;
+	size_t oldusize = extent_usize_get(extent);
 
 	/* The following should have been caught by callers. */
 	assert(usize > 0 && usize <= LARGE_MAXCLASS);
 	/* Both allocation sizes must be large to avoid a move. */
-	assert(extent_usize_get(extent) >= LARGE_MINCLASS && usize >=
-	    LARGE_MINCLASS);
+	assert(oldusize >= LARGE_MINCLASS && usize >= LARGE_MINCLASS);
 
 	/* Try to avoid moving the allocation. */
 	if (!large_ralloc_no_move(tsdn, extent, usize, usize, zero)) {
@@ -293,16 +290,16 @@ large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
 	 * different size class.  In that case, fall back to allocating new
 	 * space and copying.
 	 */
-	ret = large_ralloc_move_helper(tsdn, arena, usize, alignment, zero);
+	void *ret = large_ralloc_move_helper(tsdn, arena, usize, alignment,
+	    zero);
 	if (ret == NULL) {
 		return NULL;
 	}
 
-	copysize = (usize < extent_usize_get(extent)) ? usize :
-	    extent_usize_get(extent);
+	size_t copysize = (usize < oldusize) ? usize : oldusize;
 	memcpy(ret, extent_addr_get(extent), copysize);
-	isdalloct(tsdn, extent, extent_addr_get(extent),
-	    extent_usize_get(extent), tcache, true);
+	isdalloct(tsdn, extent, extent_addr_get(extent), oldusize, tcache,
+	    true);
 	return ret;
 }
 
diff --git a/test/unit/slab.c b/test/unit/slab.c
index 1f2a260c..6f40aeef 100644
--- a/test/unit/slab.c
+++ b/test/unit/slab.c
@@ -8,8 +8,8 @@ TEST_BEGIN(test_arena_slab_regind) {
 		extent_t slab;
 		const arena_bin_info_t *bin_info = &arena_bin_info[binind];
 		extent_init(&slab, NULL, mallocx(bin_info->slab_size,
-		    MALLOCX_LG_ALIGN(LG_PAGE)), bin_info->slab_size, 0, 0,
-		    extent_state_active, false, true, true);
+		    MALLOCX_LG_ALIGN(LG_PAGE)), bin_info->slab_size, true,
+		    binind, 0, extent_state_active, false, true);
 		assert_ptr_not_null(extent_addr_get(&slab),
 		    "Unexpected malloc() failure");
 		for (regind = 0; regind < bin_info->nregs; regind++) {

From f50d6009fe945f17584e4a004d1aae60d07bedb5 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 13 Mar 2017 17:48:26 -0700
Subject: [PATCH 0722/2608] Remove binind field from arena_slab_data_t.

binind is now redundant; the containing extent_t's szind field always
provides the same value.
---
 include/jemalloc/internal/arena_inlines_b.h | 17 +++--------------
 include/jemalloc/internal/arena_structs_a.h |  3 ---
 src/arena.c                                 | 10 +++++-----
 3 files changed, 8 insertions(+), 22 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 0d4aff39..92c89a5b 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -117,17 +117,9 @@ arena_aalloc(tsdn_t *tsdn, const void *ptr) {
 /* Return the size of the allocation pointed to by ptr. */
 JEMALLOC_ALWAYS_INLINE size_t
 arena_salloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr) {
-	size_t ret;
-
 	assert(ptr != NULL);
 
-	if (likely(extent_slab_get(extent))) {
-		ret = index2size(extent_slab_data_get_const(extent)->binind);
-	} else {
-		ret = large_salloc(tsdn, extent);
-	}
-
-	return ret;
+	return index2size(extent_szind_get(extent));
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -136,19 +128,17 @@ arena_dalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, tcache_t *tcache,
 	assert(!tsdn_null(tsdn) || tcache == NULL);
 	assert(ptr != NULL);
 
+	szind_t szind = extent_szind_get(extent);
 	if (likely(extent_slab_get(extent))) {
 		/* Small allocation. */
 		if (likely(tcache != NULL)) {
-			szind_t binind = extent_slab_data_get(extent)->binind;
-			tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, binind,
+			tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind,
 			    slow_path);
 		} else {
 			arena_dalloc_small(tsdn, extent_arena_get(extent),
 			    extent, ptr);
 		}
 	} else {
-		szind_t szind = extent_szind_get(extent);
-
 		if (likely(tcache != NULL) && szind < nhbins) {
 			if (config_prof && unlikely(szind < NBINS)) {
 				arena_dalloc_promoted(tsdn, extent, ptr,
@@ -173,7 +163,6 @@ arena_sdalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t size,
 	if (likely(extent_slab_get(extent))) {
 		/* Small allocation. */
 		if (likely(tcache != NULL)) {
-			assert(szind == extent_slab_data_get(extent)->binind);
 			tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind,
 			    slow_path);
 		} else {
diff --git a/include/jemalloc/internal/arena_structs_a.h b/include/jemalloc/internal/arena_structs_a.h
index ccb3b052..ed265b20 100644
--- a/include/jemalloc/internal/arena_structs_a.h
+++ b/include/jemalloc/internal/arena_structs_a.h
@@ -2,9 +2,6 @@
 #define JEMALLOC_INTERNAL_ARENA_STRUCTS_A_H
 
 struct arena_slab_data_s {
-	/* Index of bin this slab is associated with. */
-	szind_t		binind;
-
 	/* Number of free regions in slab. */
 	unsigned	nfree;
 
diff --git a/src/arena.c b/src/arena.c
index 2c432e6a..968343c0 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -384,7 +384,7 @@ arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr) {
 JEMALLOC_INLINE_C void
 arena_slab_reg_dalloc(tsdn_t *tsdn, extent_t *slab,
     arena_slab_data_t *slab_data, void *ptr) {
-	szind_t binind = slab_data->binind;
+	szind_t binind = extent_szind_get(slab);
 	const arena_bin_info_t *bin_info = &arena_bin_info[binind];
 	size_t regind = arena_slab_regind(slab, binind, ptr);
 
@@ -1181,7 +1181,6 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 
 	/* Initialize slab internals. */
 	arena_slab_data_t *slab_data = extent_slab_data_get(slab);
-	slab_data->binind = binind;
 	slab_data->nfree = bin_info->nregs;
 	bitmap_init(slab_data->bitmap, &bin_info->bitmap_info);
 
@@ -1511,7 +1510,7 @@ arena_dissociate_bin_slab(extent_t *slab, arena_bin_t *bin) {
 	if (slab == bin->slabcur) {
 		bin->slabcur = NULL;
 	} else {
-		szind_t binind = extent_slab_data_get(slab)->binind;
+		szind_t binind = extent_szind_get(slab);
 		const arena_bin_info_t *bin_info = &arena_bin_info[binind];
 
 		/*
@@ -1573,7 +1572,7 @@ static void
 arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
     void *ptr, bool junked) {
 	arena_slab_data_t *slab_data = extent_slab_data_get(slab);
-	szind_t binind = slab_data->binind;
+	szind_t binind = extent_szind_get(slab);
 	arena_bin_t *bin = &arena->bins[binind];
 	const arena_bin_info_t *bin_info = &arena_bin_info[binind];
 
@@ -1604,7 +1603,8 @@ arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 
 static void
 arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr) {
-	arena_bin_t *bin = &arena->bins[extent_slab_data_get(extent)->binind];
+	szind_t binind = extent_szind_get(extent);
+	arena_bin_t *bin = &arena->bins[binind];
 
 	malloc_mutex_lock(tsdn, &bin->lock);
 	arena_dalloc_bin_locked_impl(tsdn, arena, extent, ptr, false);

From 944c8a338307db0423077cb2a16149ff5b7feab8 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 16 Mar 2017 09:46:42 -0700
Subject: [PATCH 0723/2608] Split rtree_elm_t into rtree_{node,leaf}_elm_t.

This allows leaf elements to differ in size from internal node elements.

In principle it would be more correct to use a different type for each
level of the tree, but due to implementation details related to atomic
operations, we use casts anyway, thus counteracting the value of
additional type correctness.  Furthermore, such a scheme would require
function code generation (via cpp macros), as well as either unwieldy
type names for leaves or type aliases, e.g.

  typedef struct rtree_elm_d2_s rtree_leaf_elm_t;

This alternate strategy would be more correct, and with less code
duplication, but probably not worth the complexity.
---
 include/jemalloc/internal/private_symbols.txt |  26 +-
 include/jemalloc/internal/rtree_externs.h     |  30 +-
 include/jemalloc/internal/rtree_inlines.h     | 105 +++---
 include/jemalloc/internal/rtree_structs.h     |  23 +-
 include/jemalloc/internal/rtree_types.h       |  27 +-
 include/jemalloc/internal/tsd_structs.h       |   2 +-
 src/extent.c                                  |  74 ++--
 src/rtree.c                                   | 355 ++++++++++++------
 test/unit/rtree.c                             |  75 ++--
 9 files changed, 459 insertions(+), 258 deletions(-)

diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index d68f6b61..a0deef89 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -413,17 +413,19 @@ psz2ind
 psz2u
 rtree_clear
 rtree_delete
-rtree_elm_acquire
-rtree_elm_lookup
-rtree_elm_lookup_hard
-rtree_elm_read
-rtree_elm_read_acquired
-rtree_elm_release
-rtree_elm_witness_access
-rtree_elm_witness_acquire
-rtree_elm_witness_release
-rtree_elm_write
-rtree_elm_write_acquired
+rtree_leaf_alloc
+rtree_leaf_dalloc
+rtree_leaf_elm_acquire
+rtree_leaf_elm_lookup
+rtree_leaf_elm_lookup_hard
+rtree_leaf_elm_read
+rtree_leaf_elm_read_acquired
+rtree_leaf_elm_release
+rtree_leaf_elm_witness_access
+rtree_leaf_elm_witness_acquire
+rtree_leaf_elm_witness_release
+rtree_leaf_elm_write
+rtree_leaf_elm_write_acquired
 rtree_leafkey
 rtree_new
 rtree_node_alloc
@@ -513,7 +515,7 @@ tsd_prof_tdata_get
 tsd_prof_tdata_set
 tsd_prof_tdatap_get
 tsd_rtree_ctxp_get
-tsd_rtree_elm_witnessesp_get
+tsd_rtree_leaf_elm_witnessesp_get
 tsd_set
 tsd_tcache_enabled_get
 tsd_tcache_enabled_set
diff --git a/include/jemalloc/internal/rtree_externs.h b/include/jemalloc/internal/rtree_externs.h
index fa53580a..842eb0b5 100644
--- a/include/jemalloc/internal/rtree_externs.h
+++ b/include/jemalloc/internal/rtree_externs.h
@@ -8,36 +8,40 @@
  * level.
  */
 static const rtree_level_t rtree_levels[] = {
-#if RTREE_NSB <= 10
+#if RTREE_HEIGHT == 1
 	{RTREE_NSB, RTREE_NHIB + RTREE_NSB}
-#elif RTREE_NSB <= 36
+#elif RTREE_HEIGHT == 2
 	{RTREE_NSB/2, RTREE_NHIB + RTREE_NSB/2},
 	{RTREE_NSB/2 + RTREE_NSB%2, RTREE_NHIB + RTREE_NSB}
-#elif RTREE_NSB <= 52
+#elif RTREE_HEIGHT == 3
 	{RTREE_NSB/3, RTREE_NHIB + RTREE_NSB/3},
 	{RTREE_NSB/3 + RTREE_NSB%3/2,
 	    RTREE_NHIB + RTREE_NSB/3*2 + RTREE_NSB%3/2},
 	{RTREE_NSB/3 + RTREE_NSB%3 - RTREE_NSB%3/2, RTREE_NHIB + RTREE_NSB}
 #else
-#  error Unsupported number of significant virtual address bits
+#  error Unsupported rtree height
 #endif
 };
 
 bool rtree_new(rtree_t *rtree);
 #ifdef JEMALLOC_JET
-typedef rtree_elm_t *(rtree_node_alloc_t)(tsdn_t *, rtree_t *, size_t);
+typedef rtree_node_elm_t *(rtree_node_alloc_t)(tsdn_t *, rtree_t *, size_t);
 extern rtree_node_alloc_t *rtree_node_alloc;
-typedef void (rtree_node_dalloc_t)(tsdn_t *, rtree_t *, rtree_elm_t *);
+typedef rtree_leaf_elm_t *(rtree_leaf_alloc_t)(tsdn_t *, rtree_t *, size_t);
+extern rtree_leaf_alloc_t *rtree_leaf_alloc;
+typedef void (rtree_node_dalloc_t)(tsdn_t *, rtree_t *, rtree_node_elm_t *);
 extern rtree_node_dalloc_t *rtree_node_dalloc;
+typedef void (rtree_leaf_dalloc_t)(tsdn_t *, rtree_t *, rtree_leaf_elm_t *);
+extern rtree_leaf_dalloc_t *rtree_leaf_dalloc;
 void rtree_delete(tsdn_t *tsdn, rtree_t *rtree);
 #endif
-rtree_elm_t *rtree_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree,
+rtree_leaf_elm_t *rtree_leaf_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree,
     rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, bool init_missing);
-void rtree_elm_witness_acquire(tsdn_t *tsdn, const rtree_t *rtree,
-    uintptr_t key, const rtree_elm_t *elm);
-void rtree_elm_witness_access(tsdn_t *tsdn, const rtree_t *rtree,
-    const rtree_elm_t *elm);
-void rtree_elm_witness_release(tsdn_t *tsdn, const rtree_t *rtree,
-    const rtree_elm_t *elm);
+void rtree_leaf_elm_witness_acquire(tsdn_t *tsdn, const rtree_t *rtree,
+    uintptr_t key, const rtree_leaf_elm_t *elm);
+void rtree_leaf_elm_witness_access(tsdn_t *tsdn, const rtree_t *rtree,
+    const rtree_leaf_elm_t *elm);
+void rtree_leaf_elm_witness_release(tsdn_t *tsdn, const rtree_t *rtree,
+    const rtree_leaf_elm_t *elm);
 
 #endif /* JEMALLOC_INTERNAL_RTREE_EXTERNS_H */
diff --git a/include/jemalloc/internal/rtree_inlines.h b/include/jemalloc/internal/rtree_inlines.h
index b3301095..3af17d36 100644
--- a/include/jemalloc/internal/rtree_inlines.h
+++ b/include/jemalloc/internal/rtree_inlines.h
@@ -4,21 +4,22 @@
 #ifndef JEMALLOC_ENABLE_INLINE
 uintptr_t rtree_leafkey(uintptr_t key);
 uintptr_t rtree_subkey(uintptr_t key, unsigned level);
-extent_t *rtree_elm_read(rtree_elm_t *elm, bool dependent);
-void rtree_elm_write(rtree_elm_t *elm, const extent_t *extent);
-rtree_elm_t *rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree,
+extent_t *rtree_leaf_elm_read(rtree_leaf_elm_t *elm, bool dependent);
+void rtree_leaf_elm_write(rtree_leaf_elm_t *elm, const extent_t *extent);
+rtree_leaf_elm_t *rtree_leaf_elm_lookup(tsdn_t *tsdn, rtree_t *rtree,
     rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, bool init_missing);
 bool rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key, const extent_t *extent);
 extent_t *rtree_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key, bool dependent);
-rtree_elm_t *rtree_elm_acquire(tsdn_t *tsdn, rtree_t *rtree,
+rtree_leaf_elm_t *rtree_leaf_elm_acquire(tsdn_t *tsdn, rtree_t *rtree,
     rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, bool init_missing);
-extent_t *rtree_elm_read_acquired(tsdn_t *tsdn, const rtree_t *rtree,
-    rtree_elm_t *elm);
-void rtree_elm_write_acquired(tsdn_t *tsdn, const rtree_t *rtree,
-    rtree_elm_t *elm, const extent_t *extent);
-void rtree_elm_release(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm);
+extent_t *rtree_leaf_elm_read_acquired(tsdn_t *tsdn, const rtree_t *rtree,
+    rtree_leaf_elm_t *elm);
+void rtree_leaf_elm_write_acquired(tsdn_t *tsdn, const rtree_t *rtree,
+    rtree_leaf_elm_t *elm, const extent_t *extent);
+void rtree_leaf_elm_release(tsdn_t *tsdn, const rtree_t *rtree,
+    rtree_leaf_elm_t *elm);
 void rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key);
 #endif
@@ -45,7 +46,7 @@ rtree_subkey(uintptr_t key, unsigned level) {
 }
 
 JEMALLOC_ALWAYS_INLINE extent_t *
-rtree_elm_read(rtree_elm_t *elm, bool dependent) {
+rtree_leaf_elm_read(rtree_leaf_elm_t *elm, bool dependent) {
 	extent_t *extent;
 
 	if (dependent) {
@@ -55,7 +56,7 @@ rtree_elm_read(rtree_elm_t *elm, bool dependent) {
 		 * synchronization, because the rtree update became visible in
 		 * memory before the pointer came into existence.
 		 */
-		extent = (extent_t *)atomic_load_p(&elm->child_or_extent,
+		extent = (extent_t *)atomic_load_p(&elm->extent,
 		    ATOMIC_RELAXED);
 	} else {
 		/*
@@ -63,7 +64,7 @@ rtree_elm_read(rtree_elm_t *elm, bool dependent) {
 		 * dependent on a previous rtree write, which means a stale read
 		 * could result if synchronization were omitted here.
 		 */
-		extent = (extent_t *)atomic_load_p(&elm->child_or_extent,
+		extent = (extent_t *)atomic_load_p(&elm->extent,
 		    ATOMIC_ACQUIRE);
 	}
 
@@ -74,12 +75,12 @@ rtree_elm_read(rtree_elm_t *elm, bool dependent) {
 }
 
 JEMALLOC_INLINE void
-rtree_elm_write(rtree_elm_t *elm, const extent_t *extent) {
-	atomic_store_p(&elm->child_or_extent, (void *)extent, ATOMIC_RELEASE);
+rtree_leaf_elm_write(rtree_leaf_elm_t *elm, const extent_t *extent) {
+	atomic_store_p(&elm->extent, (void *)extent, ATOMIC_RELEASE);
 }
 
-JEMALLOC_ALWAYS_INLINE rtree_elm_t *
-rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_t *
+rtree_leaf_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key, bool dependent, bool init_missing) {
 	assert(key != 0);
 	assert(!dependent || !init_missing);
@@ -87,7 +88,7 @@ rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	uintptr_t leafkey = rtree_leafkey(key);
 #define RTREE_CACHE_CHECK(i) do {					\
 	if (likely(rtree_ctx->cache[i].leafkey == leafkey)) {		\
-		rtree_elm_t *leaf = rtree_ctx->cache[i].leaf;		\
+		rtree_leaf_elm_t *leaf = rtree_ctx->cache[i].leaf;	\
 		if (likely(leaf != NULL)) {				\
 			/* Reorder. */					\
 			memmove(&rtree_ctx->cache[1],			\
@@ -117,24 +118,24 @@ rtree_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	}
 #undef RTREE_CACHE_CHECK
 
-	return rtree_elm_lookup_hard(tsdn, rtree, rtree_ctx, key, dependent,
-	    init_missing);
+	return rtree_leaf_elm_lookup_hard(tsdn, rtree, rtree_ctx, key,
+	    dependent, init_missing);
 }
 
 JEMALLOC_INLINE bool
 rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
     const extent_t *extent) {
-	rtree_elm_t *elm;
+	rtree_leaf_elm_t *elm;
 
 	assert(extent != NULL); /* Use rtree_clear() for this case. */
 	assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0);
 
-	elm = rtree_elm_lookup(tsdn, rtree, rtree_ctx, key, false, true);
+	elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx, key, false, true);
 	if (elm == NULL) {
 		return true;
 	}
-	assert(rtree_elm_read(elm, false) == NULL);
-	rtree_elm_write(elm, extent);
+	assert(rtree_leaf_elm_read(elm, false) == NULL);
+	rtree_leaf_elm_write(elm, extent);
 
 	return false;
 }
@@ -142,21 +143,22 @@ rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
 JEMALLOC_ALWAYS_INLINE extent_t *
 rtree_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
     bool dependent) {
-	rtree_elm_t *elm;
+	rtree_leaf_elm_t *elm;
 
-	elm = rtree_elm_lookup(tsdn, rtree, rtree_ctx, key, dependent, false);
+	elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx, key, dependent,
+	    false);
 	if (!dependent && elm == NULL) {
 		return NULL;
 	}
 
-	return rtree_elm_read(elm, dependent);
+	return rtree_leaf_elm_read(elm, dependent);
 }
 
-JEMALLOC_INLINE rtree_elm_t *
-rtree_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+JEMALLOC_INLINE rtree_leaf_elm_t *
+rtree_leaf_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key, bool dependent, bool init_missing) {
-	rtree_elm_t *elm = rtree_elm_lookup(tsdn, rtree, rtree_ctx, key,
-	    dependent, init_missing);
+	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
+	    key, dependent, init_missing);
 	if (!dependent && elm == NULL) {
 		return NULL;
 	}
@@ -164,14 +166,14 @@ rtree_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	spin_t spinner = SPIN_INITIALIZER;
 	while (true) {
 		/* The least significant bit serves as a lock. */
-		void *extent_and_lock = atomic_load_p(&elm->child_or_extent,
+		void *extent_and_lock = atomic_load_p(&elm->extent,
 		    ATOMIC_RELAXED);
 		if (likely(((uintptr_t)extent_and_lock & (uintptr_t)0x1) == 0))
 		{
 			void *locked = (void *)((uintptr_t)extent_and_lock
 			    | (uintptr_t)0x1);
 			if (likely(atomic_compare_exchange_strong_p(
-			    &elm->child_or_extent, &extent_and_lock, locked,
+			    &elm->extent, &extent_and_lock, locked,
 			    ATOMIC_ACQUIRE, ATOMIC_RELAXED))) {
 				break;
 			}
@@ -180,58 +182,61 @@ rtree_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	}
 
 	if (config_debug) {
-		rtree_elm_witness_acquire(tsdn, rtree, key, elm);
+		rtree_leaf_elm_witness_acquire(tsdn, rtree, key, elm);
 	}
 
 	return elm;
 }
 
 JEMALLOC_INLINE extent_t *
-rtree_elm_read_acquired(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm) {
+rtree_leaf_elm_read_acquired(tsdn_t *tsdn, const rtree_t *rtree,
+    rtree_leaf_elm_t *elm) {
 	extent_t *extent;
-	void *ptr = atomic_load_p(&elm->child_or_extent, ATOMIC_RELAXED);
+	void *ptr = atomic_load_p(&elm->extent, ATOMIC_RELAXED);
 	assert(((uintptr_t)ptr & (uintptr_t)0x1) == (uintptr_t)0x1);
 	extent = (extent_t *)((uintptr_t)ptr & ~((uintptr_t)0x1));
 	assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0);
 
 	if (config_debug) {
-		rtree_elm_witness_access(tsdn, rtree, elm);
+		rtree_leaf_elm_witness_access(tsdn, rtree, elm);
 	}
 
 	return extent;
 }
 
 JEMALLOC_INLINE void
-rtree_elm_write_acquired(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm,
-    const extent_t *extent) {
+rtree_leaf_elm_write_acquired(tsdn_t *tsdn, const rtree_t *rtree,
+    rtree_leaf_elm_t *elm, const extent_t *extent) {
 	assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0);
-	assert(((uintptr_t)atomic_load_p(&elm->child_or_extent, ATOMIC_RELAXED)
+	assert(((uintptr_t)atomic_load_p(&elm->extent, ATOMIC_RELAXED)
 	    & (uintptr_t)0x1) == (uintptr_t)0x1);
 
 	if (config_debug) {
-		rtree_elm_witness_access(tsdn, rtree, elm);
+		rtree_leaf_elm_witness_access(tsdn, rtree, elm);
 	}
-	atomic_store_p(&elm->child_or_extent, (void *)((uintptr_t)extent
-	    | (uintptr_t)0x1), ATOMIC_RELEASE);
-	assert(rtree_elm_read_acquired(tsdn, rtree, elm) == extent);
+	atomic_store_p(&elm->extent, (void *)((uintptr_t)extent |
+	    (uintptr_t)0x1), ATOMIC_RELEASE);
+	assert(rtree_leaf_elm_read_acquired(tsdn, rtree, elm) == extent);
 }
 
 JEMALLOC_INLINE void
-rtree_elm_release(tsdn_t *tsdn, const rtree_t *rtree, rtree_elm_t *elm) {
-	rtree_elm_write(elm, rtree_elm_read_acquired(tsdn, rtree, elm));
+rtree_leaf_elm_release(tsdn_t *tsdn, const rtree_t *rtree,
+    rtree_leaf_elm_t *elm) {
+	rtree_leaf_elm_write(elm, rtree_leaf_elm_read_acquired(tsdn, rtree,
+	    elm));
 	if (config_debug) {
-		rtree_elm_witness_release(tsdn, rtree, elm);
+		rtree_leaf_elm_witness_release(tsdn, rtree, elm);
 	}
 }
 
 JEMALLOC_INLINE void
 rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key) {
-	rtree_elm_t *elm;
+	rtree_leaf_elm_t *elm;
 
-	elm = rtree_elm_acquire(tsdn, rtree, rtree_ctx, key, true, false);
-	rtree_elm_write_acquired(tsdn, rtree, elm, NULL);
-	rtree_elm_release(tsdn, rtree, elm);
+	elm = rtree_leaf_elm_acquire(tsdn, rtree, rtree_ctx, key, true, false);
+	rtree_leaf_elm_write_acquired(tsdn, rtree, elm, NULL);
+	rtree_leaf_elm_release(tsdn, rtree, elm);
 }
 #endif
 
diff --git a/include/jemalloc/internal/rtree_structs.h b/include/jemalloc/internal/rtree_structs.h
index b62c489d..68554035 100644
--- a/include/jemalloc/internal/rtree_structs.h
+++ b/include/jemalloc/internal/rtree_structs.h
@@ -1,18 +1,21 @@
 #ifndef JEMALLOC_INTERNAL_RTREE_STRUCTS_H
 #define JEMALLOC_INTERNAL_RTREE_STRUCTS_H
 
-struct rtree_elm_s {
-	/* Either "rtree_elm_t *child;" or "extent_t *extent;". */
-	atomic_p_t	child_or_extent;
+struct rtree_node_elm_s {
+	atomic_p_t	child;
 };
 
-struct rtree_elm_witness_s {
-	const rtree_elm_t	*elm;
+struct rtree_leaf_elm_s {
+	atomic_p_t	extent;
+};
+
+struct rtree_leaf_elm_witness_s {
+	const rtree_leaf_elm_t	*elm;
 	witness_t		witness;
 };
 
-struct rtree_elm_witness_tsd_s {
-	rtree_elm_witness_t	witnesses[RTREE_ELM_ACQUIRE_MAX];
+struct rtree_leaf_elm_witness_tsd_s {
+	rtree_leaf_elm_witness_t	witnesses[RTREE_ELM_ACQUIRE_MAX];
 };
 
 struct rtree_level_s {
@@ -26,8 +29,8 @@ struct rtree_level_s {
 };
 
 struct rtree_ctx_cache_elm_s {
-	uintptr_t	leafkey;
-	rtree_elm_t	*leaf;
+	uintptr_t		leafkey;
+	rtree_leaf_elm_t	*leaf;
 };
 
 struct rtree_ctx_s {
@@ -38,7 +41,7 @@ struct rtree_ctx_s {
 };
 
 struct rtree_s {
-	/* An rtree_elm_t *. */
+	/* An rtree_{internal,leaf}_elm_t *. */
 	atomic_p_t	root;
 	malloc_mutex_t	init_lock;
 };
diff --git a/include/jemalloc/internal/rtree_types.h b/include/jemalloc/internal/rtree_types.h
index a654698b..18fc5b0f 100644
--- a/include/jemalloc/internal/rtree_types.h
+++ b/include/jemalloc/internal/rtree_types.h
@@ -8,9 +8,10 @@
  *******************************************************************************
  */
 
-typedef struct rtree_elm_s rtree_elm_t;
-typedef struct rtree_elm_witness_s rtree_elm_witness_t;
-typedef struct rtree_elm_witness_tsd_s rtree_elm_witness_tsd_t;
+typedef struct rtree_node_elm_s rtree_node_elm_t;
+typedef struct rtree_leaf_elm_s rtree_leaf_elm_t;
+typedef struct rtree_leaf_elm_witness_s rtree_leaf_elm_witness_t;
+typedef struct rtree_leaf_elm_witness_tsd_s rtree_leaf_elm_witness_tsd_t;
 typedef struct rtree_level_s rtree_level_t;
 typedef struct rtree_ctx_cache_elm_s rtree_ctx_cache_elm_t;
 typedef struct rtree_ctx_s rtree_ctx_t;
@@ -23,7 +24,15 @@ typedef struct rtree_s rtree_t;
 /* Number of significant bits. */
 #define RTREE_NSB (LG_VADDR - RTREE_NLIB)
 /* Number of levels in radix tree. */
-#define RTREE_HEIGHT (sizeof(rtree_levels)/sizeof(rtree_level_t))
+#if RTREE_NSB <= 10
+#  define RTREE_HEIGHT 1
+#elif RTREE_NSB <= 36
+#  define RTREE_HEIGHT 2
+#elif RTREE_NSB <= 52
+#  define RTREE_HEIGHT 3
+#else
+#  error Unsupported number of significant virtual address bits
+#endif
 
 /*
  * Number of leafkey/leaf pairs to cache.  Each entry supports an entire leaf,
@@ -47,16 +56,16 @@ typedef struct rtree_s rtree_t;
 
 /*
  * Maximum number of concurrently acquired elements per thread.  This controls
- * how many witness_t structures are embedded in tsd.  Ideally rtree_elm_t would
- * have a witness_t directly embedded, but that would dramatically bloat the
- * tree.  This must contain enough entries to e.g. coalesce two extents.
+ * how many witness_t structures are embedded in tsd.  Ideally rtree_leaf_elm_t
+ * would have a witness_t directly embedded, but that would dramatically bloat
+ * the tree.  This must contain enough entries to e.g. coalesce two extents.
  */
 #define RTREE_ELM_ACQUIRE_MAX	4
 
-/* Initializers for rtree_elm_witness_tsd_t. */
+/* Initializers for rtree_leaf_elm_witness_tsd_t. */
 #define RTREE_ELM_WITNESS_INITIALIZER {					\
 	NULL,								\
-	WITNESS_INITIALIZER("rtree_elm", WITNESS_RANK_RTREE_ELM)	\
+	WITNESS_INITIALIZER("rtree_leaf_elm", WITNESS_RANK_RTREE_ELM)	\
 }
 
 #define RTREE_ELM_WITNESS_TSD_INITIALIZER {				\
diff --git a/include/jemalloc/internal/tsd_structs.h b/include/jemalloc/internal/tsd_structs.h
index 503021e7..722b9669 100644
--- a/include/jemalloc/internal/tsd_structs.h
+++ b/include/jemalloc/internal/tsd_structs.h
@@ -29,7 +29,7 @@ struct tsd_init_head_s {
 						yes,	no)		\
     O(rtree_ctx,		rtree_ctx_t,	no,	no)		\
     O(witnesses,		witness_list_t,	no,	yes)		\
-    O(rtree_elm_witnesses,	rtree_elm_witness_tsd_t,		\
+    O(rtree_leaf_elm_witnesses,	rtree_leaf_elm_witness_tsd_t,		\
 						no,	no)		\
     O(witness_fork,		bool,		yes,	no)		\
 
diff --git a/src/extent.c b/src/extent.c
index 31dcbd76..2a16d387 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -450,8 +450,8 @@ extent_activate_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 static bool
 extent_rtree_acquire(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
     const extent_t *extent, bool dependent, bool init_missing,
-    rtree_elm_t **r_elm_a, rtree_elm_t **r_elm_b) {
-	*r_elm_a = rtree_elm_acquire(tsdn, &extents_rtree, rtree_ctx,
+    rtree_leaf_elm_t **r_elm_a, rtree_leaf_elm_t **r_elm_b) {
+	*r_elm_a = rtree_leaf_elm_acquire(tsdn, &extents_rtree, rtree_ctx,
 	    (uintptr_t)extent_base_get(extent), dependent, init_missing);
 	if (!dependent && *r_elm_a == NULL) {
 		return true;
@@ -459,11 +459,11 @@ extent_rtree_acquire(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
 	assert(*r_elm_a != NULL);
 
 	if (extent_size_get(extent) > PAGE) {
-		*r_elm_b = rtree_elm_acquire(tsdn, &extents_rtree, rtree_ctx,
-		    (uintptr_t)extent_last_get(extent), dependent,
+		*r_elm_b = rtree_leaf_elm_acquire(tsdn, &extents_rtree,
+		    rtree_ctx, (uintptr_t)extent_last_get(extent), dependent,
 		    init_missing);
 		if (!dependent && *r_elm_b == NULL) {
-			rtree_elm_release(tsdn, &extents_rtree, *r_elm_a);
+			rtree_leaf_elm_release(tsdn, &extents_rtree, *r_elm_a);
 			return true;
 		}
 		assert(*r_elm_b != NULL);
@@ -475,19 +475,21 @@ extent_rtree_acquire(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
 }
 
 static void
-extent_rtree_write_acquired(tsdn_t *tsdn, rtree_elm_t *elm_a,
-    rtree_elm_t *elm_b, const extent_t *extent) {
-	rtree_elm_write_acquired(tsdn, &extents_rtree, elm_a, extent);
+extent_rtree_write_acquired(tsdn_t *tsdn, rtree_leaf_elm_t *elm_a,
+    rtree_leaf_elm_t *elm_b, const extent_t *extent) {
+	rtree_leaf_elm_write_acquired(tsdn, &extents_rtree, elm_a, extent);
 	if (elm_b != NULL) {
-		rtree_elm_write_acquired(tsdn, &extents_rtree, elm_b, extent);
+		rtree_leaf_elm_write_acquired(tsdn, &extents_rtree, elm_b,
+		    extent);
 	}
 }
 
 static void
-extent_rtree_release(tsdn_t *tsdn, rtree_elm_t *elm_a, rtree_elm_t *elm_b) {
-	rtree_elm_release(tsdn, &extents_rtree, elm_a);
+extent_rtree_release(tsdn_t *tsdn, rtree_leaf_elm_t *elm_a,
+    rtree_leaf_elm_t *elm_b) {
+	rtree_leaf_elm_release(tsdn, &extents_rtree, elm_a);
 	if (elm_b != NULL) {
-		rtree_elm_release(tsdn, &extents_rtree, elm_b);
+		rtree_leaf_elm_release(tsdn, &extents_rtree, elm_b);
 	}
 }
 
@@ -543,7 +545,7 @@ static bool
 extent_register_impl(tsdn_t *tsdn, const extent_t *extent, bool gdump_add) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-	rtree_elm_t *elm_a, *elm_b;
+	rtree_leaf_elm_t *elm_a, *elm_b;
 
 	if (extent_rtree_acquire(tsdn, rtree_ctx, extent, false, true, &elm_a,
 	    &elm_b)) {
@@ -596,7 +598,7 @@ static void
 extent_deregister(tsdn_t *tsdn, extent_t *extent) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-	rtree_elm_t *elm_a, *elm_b;
+	rtree_leaf_elm_t *elm_a, *elm_b;
 
 	extent_rtree_acquire(tsdn, rtree_ctx, extent, true, false, &elm_a,
 	    &elm_b);
@@ -651,13 +653,13 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 	extent_t *extent;
 	if (new_addr != NULL) {
-		rtree_elm_t *elm;
+		rtree_leaf_elm_t *elm;
 
-		elm = rtree_elm_acquire(tsdn, &extents_rtree, rtree_ctx,
+		elm = rtree_leaf_elm_acquire(tsdn, &extents_rtree, rtree_ctx,
 		    (uintptr_t)new_addr, false, false);
 		if (elm != NULL) {
-			extent = rtree_elm_read_acquired(tsdn, &extents_rtree,
-			    elm);
+			extent = rtree_leaf_elm_read_acquired(tsdn,
+			    &extents_rtree, elm);
 			if (extent != NULL) {
 				assert(extent_base_get(extent) == new_addr);
 				if (extent_arena_get(extent) != arena ||
@@ -667,7 +669,7 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
 					extent = NULL;
 				}
 			}
-			rtree_elm_release(tsdn, &extents_rtree, elm);
+			rtree_leaf_elm_release(tsdn, &extents_rtree, elm);
 		} else {
 			extent = NULL;
 		}
@@ -1156,11 +1158,11 @@ extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
 		again = false;
 
 		/* Try to coalesce forward. */
-		rtree_elm_t *next_elm = rtree_elm_acquire(tsdn, &extents_rtree,
-		    rtree_ctx, (uintptr_t)extent_past_get(extent), false,
-		    false);
+		rtree_leaf_elm_t *next_elm = rtree_leaf_elm_acquire(tsdn,
+		    &extents_rtree, rtree_ctx,
+		    (uintptr_t)extent_past_get(extent), false, false);
 		if (next_elm != NULL) {
-			extent_t *next = rtree_elm_read_acquired(tsdn,
+			extent_t *next = rtree_leaf_elm_read_acquired(tsdn,
 			    &extents_rtree, next_elm);
 			/*
 			 * extents->mtx only protects against races for
@@ -1169,7 +1171,7 @@ extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
 			 */
 			bool can_coalesce = (next != NULL &&
 			    extent_can_coalesce(arena, extents, extent, next));
-			rtree_elm_release(tsdn, &extents_rtree, next_elm);
+			rtree_leaf_elm_release(tsdn, &extents_rtree, next_elm);
 			if (can_coalesce && !extent_coalesce(tsdn, arena,
 			    r_extent_hooks, extents, extent, next, true)) {
 				if (extents->delay_coalesce) {
@@ -1182,15 +1184,15 @@ extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
 		}
 
 		/* Try to coalesce backward. */
-		rtree_elm_t *prev_elm = rtree_elm_acquire(tsdn, &extents_rtree,
-		    rtree_ctx, (uintptr_t)extent_before_get(extent), false,
-		    false);
+		rtree_leaf_elm_t *prev_elm = rtree_leaf_elm_acquire(tsdn,
+		    &extents_rtree, rtree_ctx,
+		    (uintptr_t)extent_before_get(extent), false, false);
 		if (prev_elm != NULL) {
-			extent_t *prev = rtree_elm_read_acquired(tsdn,
+			extent_t *prev = rtree_leaf_elm_read_acquired(tsdn,
 			    &extents_rtree, prev_elm);
 			bool can_coalesce = (prev != NULL &&
 			    extent_can_coalesce(arena, extents, extent, prev));
-			rtree_elm_release(tsdn, &extents_rtree, prev_elm);
+			rtree_leaf_elm_release(tsdn, &extents_rtree, prev_elm);
 			if (can_coalesce && !extent_coalesce(tsdn, arena,
 			    r_extent_hooks, extents, extent, prev, false)) {
 				extent = prev;
@@ -1472,7 +1474,7 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
 	extent_t *trail;
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-	rtree_elm_t *lead_elm_a, *lead_elm_b, *trail_elm_a, *trail_elm_b;
+	rtree_leaf_elm_t *lead_elm_a, *lead_elm_b, *trail_elm_a, *trail_elm_b;
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 
@@ -1590,19 +1592,21 @@ extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
 	 */
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-	rtree_elm_t *a_elm_a, *a_elm_b, *b_elm_a, *b_elm_b;
+	rtree_leaf_elm_t *a_elm_a, *a_elm_b, *b_elm_a, *b_elm_b;
 	extent_rtree_acquire(tsdn, rtree_ctx, a, true, false, &a_elm_a,
 	    &a_elm_b);
 	extent_rtree_acquire(tsdn, rtree_ctx, b, true, false, &b_elm_a,
 	    &b_elm_b);
 
 	if (a_elm_b != NULL) {
-		rtree_elm_write_acquired(tsdn, &extents_rtree, a_elm_b, NULL);
-		rtree_elm_release(tsdn, &extents_rtree, a_elm_b);
+		rtree_leaf_elm_write_acquired(tsdn, &extents_rtree, a_elm_b,
+		    NULL);
+		rtree_leaf_elm_release(tsdn, &extents_rtree, a_elm_b);
 	}
 	if (b_elm_b != NULL) {
-		rtree_elm_write_acquired(tsdn, &extents_rtree, b_elm_a, NULL);
-		rtree_elm_release(tsdn, &extents_rtree, b_elm_a);
+		rtree_leaf_elm_write_acquired(tsdn, &extents_rtree, b_elm_a,
+		    NULL);
+		rtree_leaf_elm_release(tsdn, &extents_rtree, b_elm_a);
 	} else {
 		b_elm_b = b_elm_a;
 	}
diff --git a/src/rtree.c b/src/rtree.c
index 54dc3487..18197390 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -19,10 +19,10 @@ rtree_new(rtree_t *rtree) {
 #undef rtree_node_alloc
 #define rtree_node_alloc JEMALLOC_N(rtree_node_alloc_impl)
 #endif
-static rtree_elm_t *
+static rtree_node_elm_t *
 rtree_node_alloc(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
-	return (rtree_elm_t *)base_alloc(tsdn, b0get(), nelms *
-	    sizeof(rtree_elm_t), CACHELINE);
+	return (rtree_node_elm_t *)base_alloc(tsdn, b0get(), nelms *
+	    sizeof(rtree_node_elm_t), CACHELINE);
 }
 #ifdef JEMALLOC_JET
 #undef rtree_node_alloc
@@ -35,7 +35,7 @@ rtree_node_alloc_t *rtree_node_alloc = JEMALLOC_N(rtree_node_alloc_impl);
 #define rtree_node_dalloc JEMALLOC_N(rtree_node_dalloc_impl)
 #endif
 UNUSED static void
-rtree_node_dalloc(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *node) {
+rtree_node_dalloc(tsdn_t *tsdn, rtree_t *rtree, rtree_node_elm_t *node) {
 	/* Nodes are never deleted during normal operation. */
 	not_reached();
 }
@@ -46,46 +46,92 @@ rtree_node_dalloc_t *rtree_node_dalloc = JEMALLOC_N(rtree_node_dalloc_impl);
 #endif
 
 #ifdef JEMALLOC_JET
-static void
-rtree_delete_subtree(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *node,
-    unsigned level) {
-	if (level + 1 < RTREE_HEIGHT) {
-		size_t nchildren, i;
+#undef rtree_leaf_alloc
+#define rtree_leaf_alloc JEMALLOC_N(rtree_leaf_alloc_impl)
+#endif
+static rtree_leaf_elm_t *
+rtree_leaf_alloc(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
+	return (rtree_leaf_elm_t *)base_alloc(tsdn, b0get(), nelms *
+	    sizeof(rtree_leaf_elm_t), CACHELINE);
+}
+#ifdef JEMALLOC_JET
+#undef rtree_leaf_alloc
+#define rtree_leaf_alloc JEMALLOC_N(rtree_leaf_alloc)
+rtree_leaf_alloc_t *rtree_leaf_alloc = JEMALLOC_N(rtree_leaf_alloc_impl);
+#endif
 
-		nchildren = ZU(1) << rtree_levels[level].bits;
-		for (i = 0; i < nchildren; i++) {
-			rtree_elm_t *child = (rtree_elm_t *)atomic_load_p(
-			    &node[i].child_or_extent, ATOMIC_RELAXED);
-			if (child != NULL) {
-				rtree_delete_subtree(tsdn, rtree, child, level +
+#ifdef JEMALLOC_JET
+#undef rtree_leaf_dalloc
+#define rtree_leaf_dalloc JEMALLOC_N(rtree_leaf_dalloc_impl)
+#endif
+UNUSED static void
+rtree_leaf_dalloc(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *leaf) {
+	/* Leaves are never deleted during normal operation. */
+	not_reached();
+}
+#ifdef JEMALLOC_JET
+#undef rtree_leaf_dalloc
+#define rtree_leaf_dalloc JEMALLOC_N(rtree_leaf_dalloc)
+rtree_leaf_dalloc_t *rtree_leaf_dalloc = JEMALLOC_N(rtree_leaf_dalloc_impl);
+#endif
+
+#ifdef JEMALLOC_JET
+static void
+rtree_delete_subtree(tsdn_t *tsdn, rtree_t *rtree, rtree_node_elm_t *subtree,
+    unsigned level) {
+	size_t nchildren = ZU(1) << rtree_levels[level].bits;
+	if (level + 2 < RTREE_HEIGHT) {
+		for (size_t i = 0; i < nchildren; i++) {
+			rtree_node_elm_t *node =
+			    (rtree_node_elm_t *)atomic_load_p(&subtree[i].child,
+			    ATOMIC_RELAXED);
+			if (node != NULL) {
+				rtree_delete_subtree(tsdn, rtree, node, level +
 				    1);
 			}
 		}
+	} else {
+		for (size_t i = 0; i < nchildren; i++) {
+			rtree_leaf_elm_t *leaf =
+			    (rtree_leaf_elm_t *)atomic_load_p(&subtree[i].child,
+			    ATOMIC_RELAXED);
+			if (leaf != NULL) {
+				rtree_leaf_dalloc(tsdn, rtree, leaf);
+			}
+		}
 	}
-	rtree_node_dalloc(tsdn, rtree, node);
+
+	rtree_node_dalloc(tsdn, rtree, subtree);
 }
 
 void
 rtree_delete(tsdn_t *tsdn, rtree_t *rtree) {
-	rtree_elm_t *rtree_root = (rtree_elm_t *)atomic_load_p(&rtree->root,
-	    ATOMIC_RELAXED);
-	if (rtree_root != NULL) {
-		rtree_delete_subtree(tsdn, rtree, rtree_root, 0);
+	if (RTREE_HEIGHT > 1) {
+		rtree_node_elm_t *node = (rtree_node_elm_t *)atomic_load_p(
+		    &rtree->root, ATOMIC_RELAXED);
+		if (node != NULL) {
+			rtree_delete_subtree(tsdn, rtree, node, 0);
+		}
+	} else {
+		rtree_leaf_elm_t *leaf =
+		    (rtree_leaf_elm_t *)atomic_load_p(&rtree->root,
+		    ATOMIC_RELAXED);
+		if (leaf != NULL) {
+			rtree_leaf_dalloc(tsdn, rtree, leaf);
+		}
 	}
 }
 #endif
 
-static rtree_elm_t *
+static rtree_node_elm_t *
 rtree_node_init(tsdn_t *tsdn, rtree_t *rtree, unsigned level,
     atomic_p_t *elmp) {
-	rtree_elm_t *node;
-
 	malloc_mutex_lock(tsdn, &rtree->init_lock);
 	/*
 	 * If *elmp is non-null, then it was initialized with the init lock
 	 * held, so we can get by with 'relaxed' here.
 	 */
-	node = atomic_load_p(elmp, ATOMIC_RELAXED);
+	rtree_node_elm_t *node = atomic_load_p(elmp, ATOMIC_RELAXED);
 	if (node == NULL) {
 		node = rtree_node_alloc(tsdn, rtree, ZU(1) <<
 		    rtree_levels[level].bits);
@@ -104,90 +150,186 @@ rtree_node_init(tsdn_t *tsdn, rtree_t *rtree, unsigned level,
 	return node;
 }
 
+static rtree_leaf_elm_t *
+rtree_leaf_init(tsdn_t *tsdn, rtree_t *rtree, atomic_p_t *elmp) {
+	malloc_mutex_lock(tsdn, &rtree->init_lock);
+	/*
+	 * If *elmp is non-null, then it was initialized with the init lock
+	 * held, so we can get by with 'relaxed' here.
+	 */
+	rtree_leaf_elm_t *leaf = atomic_load_p(elmp, ATOMIC_RELAXED);
+	if (leaf == NULL) {
+		leaf = rtree_leaf_alloc(tsdn, rtree, ZU(1) <<
+		    rtree_levels[RTREE_HEIGHT-1].bits);
+		if (leaf == NULL) {
+			malloc_mutex_unlock(tsdn, &rtree->init_lock);
+			return NULL;
+		}
+		/*
+		 * Even though we hold the lock, a later reader might not; we
+		 * need release semantics.
+		 */
+		atomic_store_p(elmp, leaf, ATOMIC_RELEASE);
+	}
+	malloc_mutex_unlock(tsdn, &rtree->init_lock);
+
+	return leaf;
+}
+
 static bool
-rtree_node_valid(rtree_elm_t *node) {
+rtree_node_valid(rtree_node_elm_t *node) {
 	return ((uintptr_t)node != (uintptr_t)0);
 }
 
-static rtree_elm_t *
-rtree_child_tryread(rtree_elm_t *elm, bool dependent) {
-	rtree_elm_t *child;
+static bool
+rtree_leaf_valid(rtree_leaf_elm_t *leaf) {
+	return ((uintptr_t)leaf != (uintptr_t)0);
+}
+
+static rtree_node_elm_t *
+rtree_child_node_tryread(rtree_node_elm_t *elm, bool dependent) {
+	rtree_node_elm_t *node;
 
 	if (dependent) {
-		child = (rtree_elm_t *)atomic_load_p(&elm->child_or_extent,
+		node = (rtree_node_elm_t *)atomic_load_p(&elm->child,
 		    ATOMIC_RELAXED);
 	} else {
-		child = (rtree_elm_t *)atomic_load_p(&elm->child_or_extent,
+		node = (rtree_node_elm_t *)atomic_load_p(&elm->child,
 		    ATOMIC_ACQUIRE);
 	}
 
-	assert(!dependent || child != NULL);
-	return child;
+	assert(!dependent || node != NULL);
+	return node;
 }
 
-static rtree_elm_t *
-rtree_child_read(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *elm, unsigned level,
-    bool dependent) {
-	rtree_elm_t *child;
+static rtree_node_elm_t *
+rtree_child_node_read(tsdn_t *tsdn, rtree_t *rtree, rtree_node_elm_t *elm,
+    unsigned level, bool dependent) {
+	rtree_node_elm_t *node;
 
-	child = rtree_child_tryread(elm, dependent);
-	if (!dependent && unlikely(!rtree_node_valid(child))) {
-		child = rtree_node_init(tsdn, rtree, level + 1,
-		    &elm->child_or_extent);
+	node = rtree_child_node_tryread(elm, dependent);
+	if (!dependent && unlikely(!rtree_node_valid(node))) {
+		node = rtree_node_init(tsdn, rtree, level + 1, &elm->child);
 	}
-	assert(!dependent || child != NULL);
-	return child;
+	assert(!dependent || node != NULL);
+	return node;
 }
 
-static rtree_elm_t *
-rtree_subtree_tryread(rtree_t *rtree, bool dependent) {
-	rtree_elm_t *subtree;
+static rtree_leaf_elm_t *
+rtree_child_leaf_tryread(rtree_node_elm_t *elm, bool dependent) {
+	rtree_leaf_elm_t *leaf;
+
 	if (dependent) {
-		subtree = (rtree_elm_t *)atomic_load_p(&rtree->root,
+		leaf = (rtree_leaf_elm_t *)atomic_load_p(&elm->child,
 		    ATOMIC_RELAXED);
 	} else {
-		subtree = (rtree_elm_t *)atomic_load_p(&rtree->root,
+		leaf = (rtree_leaf_elm_t *)atomic_load_p(&elm->child,
 		    ATOMIC_ACQUIRE);
 	}
-	assert(!dependent || subtree != NULL);
-	return subtree;
+
+	assert(!dependent || leaf != NULL);
+	return leaf;
 }
 
-static rtree_elm_t *
-rtree_subtree_read(tsdn_t *tsdn, rtree_t *rtree, bool dependent) {
-	rtree_elm_t *subtree = rtree_subtree_tryread(rtree, dependent);
-	if (!dependent && unlikely(!rtree_node_valid(subtree))) {
-		subtree = rtree_node_init(tsdn, rtree, 0, &rtree->root);
+static rtree_leaf_elm_t *
+rtree_child_leaf_read(tsdn_t *tsdn, rtree_t *rtree, rtree_node_elm_t *elm,
+    unsigned level, bool dependent) {
+	rtree_leaf_elm_t *leaf;
+
+	leaf = rtree_child_leaf_tryread(elm, dependent);
+	if (!dependent && unlikely(!rtree_leaf_valid(leaf))) {
+		leaf = rtree_leaf_init(tsdn, rtree, &elm->child);
 	}
-	assert(!dependent || subtree != NULL);
-	return subtree;
+	assert(!dependent || leaf != NULL);
+	return leaf;
 }
 
-rtree_elm_t *
-rtree_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+UNUSED static rtree_node_elm_t *
+rtree_root_node_tryread(rtree_t *rtree, bool dependent) {
+	rtree_node_elm_t *node;
+	if (dependent) {
+		node = (rtree_node_elm_t *)atomic_load_p(&rtree->root,
+		    ATOMIC_RELAXED);
+	} else {
+		node = (rtree_node_elm_t *)atomic_load_p(&rtree->root,
+		    ATOMIC_ACQUIRE);
+	}
+	assert(!dependent || node != NULL);
+	return node;
+}
+
+UNUSED static rtree_node_elm_t *
+rtree_root_node_read(tsdn_t *tsdn, rtree_t *rtree, bool dependent) {
+	rtree_node_elm_t *node = rtree_root_node_tryread(rtree, dependent);
+	if (!dependent && unlikely(!rtree_node_valid(node))) {
+		node = rtree_node_init(tsdn, rtree, 0, &rtree->root);
+	}
+	assert(!dependent || node != NULL);
+	return node;
+}
+
+UNUSED static rtree_leaf_elm_t *
+rtree_root_leaf_tryread(rtree_t *rtree, bool dependent) {
+	rtree_leaf_elm_t *leaf;
+	if (dependent) {
+		leaf = (rtree_leaf_elm_t *)atomic_load_p(&rtree->root,
+		    ATOMIC_RELAXED);
+	} else {
+		leaf = (rtree_leaf_elm_t *)atomic_load_p(&rtree->root,
+		    ATOMIC_ACQUIRE);
+	}
+	assert(!dependent || leaf != NULL);
+	return leaf;
+}
+
+UNUSED static rtree_leaf_elm_t *
+rtree_root_leaf_read(tsdn_t *tsdn, rtree_t *rtree, bool dependent) {
+	rtree_leaf_elm_t *leaf = rtree_root_leaf_tryread(rtree, dependent);
+	if (!dependent && unlikely(!rtree_leaf_valid(leaf))) {
+		leaf = rtree_leaf_init(tsdn, rtree, &rtree->root);
+	}
+	assert(!dependent || leaf != NULL);
+	return leaf;
+}
+
+rtree_leaf_elm_t *
+rtree_leaf_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key, bool dependent, bool init_missing) {
-	rtree_elm_t *node = init_missing ? rtree_subtree_read(tsdn, rtree,
-	    dependent) : rtree_subtree_tryread(rtree, dependent);
+	rtree_node_elm_t *node;
+	rtree_leaf_elm_t *leaf;
+#if RTREE_HEIGHT > 1
+	node = init_missing ? rtree_root_node_read(tsdn, rtree, dependent) :
+	    rtree_root_node_tryread(rtree, dependent);
+#else
+	leaf = init_missing ? rtree_root_leaf_read(tsdn, rtree, dependent) :
+	    rtree_root_leaf_tryread(rtree, dependent);
+#endif
 
-#define RTREE_GET_SUBTREE(level) {					\
+#define RTREE_GET_CHILD(level) {					\
 		assert(level < RTREE_HEIGHT-1);				\
 		if (!dependent && unlikely(!rtree_node_valid(node))) {	\
 			return NULL;					\
 		}							\
 		uintptr_t subkey = rtree_subkey(key, level);		\
-		node = init_missing ? rtree_child_read(tsdn, rtree,	\
-		    &node[subkey], level, dependent) :			\
-		    rtree_child_tryread(&node[subkey], dependent);	\
+		if (level + 2 < RTREE_HEIGHT) {				\
+			node = init_missing ?				\
+			    rtree_child_node_read(tsdn, rtree,		\
+			    &node[subkey], level, dependent) :		\
+			    rtree_child_node_tryread(&node[subkey],	\
+			    dependent);					\
+		} else {						\
+			leaf = init_missing ?				\
+			    rtree_child_leaf_read(tsdn, rtree,		\
+			    &node[subkey], level, dependent) :		\
+			    rtree_child_leaf_tryread(&node[subkey],	\
+			    dependent);					\
+		}							\
 	}
 #define RTREE_GET_LEAF(level) {						\
 		assert(level == RTREE_HEIGHT-1);			\
-		if (!dependent && unlikely(!rtree_node_valid(node))) {	\
+		if (!dependent && unlikely(!rtree_leaf_valid(leaf))) {	\
 			return NULL;					\
 		}							\
-		/*							\
-		 * node is a leaf, so it contains values rather than	\
-		 * child pointers.					\
-		 */							\
 		if (RTREE_CTX_NCACHE > 1) {				\
 			memmove(&rtree_ctx->cache[1],			\
 			    &rtree_ctx->cache[0],			\
@@ -196,29 +338,29 @@ rtree_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 		}							\
 		uintptr_t leafkey = rtree_leafkey(key);			\
 		rtree_ctx->cache[0].leafkey = leafkey;			\
-		rtree_ctx->cache[0].leaf = node;			\
+		rtree_ctx->cache[0].leaf = leaf;			\
 		uintptr_t subkey = rtree_subkey(key, level);		\
-		return &node[subkey];					\
+		return &leaf[subkey];					\
 	}
 	if (RTREE_HEIGHT > 1) {
-		RTREE_GET_SUBTREE(0)
+		RTREE_GET_CHILD(0)
 	}
 	if (RTREE_HEIGHT > 2) {
-		RTREE_GET_SUBTREE(1)
+		RTREE_GET_CHILD(1)
 	}
 	if (RTREE_HEIGHT > 3) {
 		for (unsigned i = 2; i < RTREE_HEIGHT-1; i++) {
-			RTREE_GET_SUBTREE(i)
+			RTREE_GET_CHILD(i)
 		}
 	}
 	RTREE_GET_LEAF(RTREE_HEIGHT-1)
-#undef RTREE_GET_SUBTREE
+#undef RTREE_GET_CHILD
 #undef RTREE_GET_LEAF
 	not_reached();
 }
 
 static int
-rtree_elm_witness_comp(const witness_t *a, void *oa, const witness_t *b,
+rtree_leaf_elm_witness_comp(const witness_t *a, void *oa, const witness_t *b,
     void *ob) {
 	uintptr_t ka = (uintptr_t)oa;
 	uintptr_t kb = (uintptr_t)ob;
@@ -230,23 +372,24 @@ rtree_elm_witness_comp(const witness_t *a, void *oa, const witness_t *b,
 }
 
 static witness_t *
-rtree_elm_witness_alloc(tsd_t *tsd, uintptr_t key, const rtree_elm_t *elm) {
+rtree_leaf_elm_witness_alloc(tsd_t *tsd, uintptr_t key,
+    const rtree_leaf_elm_t *elm) {
 	witness_t *witness;
 	size_t i;
-	rtree_elm_witness_tsd_t *witnesses = tsd_rtree_elm_witnessesp_get(tsd);
+	rtree_leaf_elm_witness_tsd_t *witnesses =
+	    tsd_rtree_leaf_elm_witnessesp_get(tsd);
 
 	/* Iterate over entire array to detect double allocation attempts. */
 	witness = NULL;
-	for (i = 0; i < sizeof(rtree_elm_witness_tsd_t) / sizeof(witness_t);
-	    i++) {
-		rtree_elm_witness_t *rew = &witnesses->witnesses[i];
+	for (i = 0; i < RTREE_ELM_ACQUIRE_MAX; i++) {
+		rtree_leaf_elm_witness_t *rew = &witnesses->witnesses[i];
 
 		assert(rew->elm != elm);
 		if (rew->elm == NULL && witness == NULL) {
 			rew->elm = elm;
 			witness = &rew->witness;
-			witness_init(witness, "rtree_elm",
-			    WITNESS_RANK_RTREE_ELM, rtree_elm_witness_comp,
+			witness_init(witness, "rtree_leaf_elm",
+			    WITNESS_RANK_RTREE_ELM, rtree_leaf_elm_witness_comp,
 			    (void *)key);
 		}
 	}
@@ -255,13 +398,13 @@ rtree_elm_witness_alloc(tsd_t *tsd, uintptr_t key, const rtree_elm_t *elm) {
 }
 
 static witness_t *
-rtree_elm_witness_find(tsd_t *tsd, const rtree_elm_t *elm) {
+rtree_leaf_elm_witness_find(tsd_t *tsd, const rtree_leaf_elm_t *elm) {
 	size_t i;
-	rtree_elm_witness_tsd_t *witnesses = tsd_rtree_elm_witnessesp_get(tsd);
+	rtree_leaf_elm_witness_tsd_t *witnesses =
+	    tsd_rtree_leaf_elm_witnessesp_get(tsd);
 
-	for (i = 0; i < sizeof(rtree_elm_witness_tsd_t) / sizeof(witness_t);
-	    i++) {
-		rtree_elm_witness_t *rew = &witnesses->witnesses[i];
+	for (i = 0; i < RTREE_ELM_ACQUIRE_MAX; i++) {
+		rtree_leaf_elm_witness_t *rew = &witnesses->witnesses[i];
 
 		if (rew->elm == elm) {
 			return &rew->witness;
@@ -271,19 +414,19 @@ rtree_elm_witness_find(tsd_t *tsd, const rtree_elm_t *elm) {
 }
 
 static void
-rtree_elm_witness_dalloc(tsd_t *tsd, witness_t *witness,
-    const rtree_elm_t *elm) {
+rtree_leaf_elm_witness_dalloc(tsd_t *tsd, witness_t *witness,
+    const rtree_leaf_elm_t *elm) {
 	size_t i;
-	rtree_elm_witness_tsd_t *witnesses = tsd_rtree_elm_witnessesp_get(tsd);
+	rtree_leaf_elm_witness_tsd_t *witnesses =
+	    tsd_rtree_leaf_elm_witnessesp_get(tsd);
 
-	for (i = 0; i < sizeof(rtree_elm_witness_tsd_t) / sizeof(witness_t);
-	    i++) {
-		rtree_elm_witness_t *rew = &witnesses->witnesses[i];
+	for (i = 0; i < RTREE_ELM_ACQUIRE_MAX; i++) {
+		rtree_leaf_elm_witness_t *rew = &witnesses->witnesses[i];
 
 		if (rew->elm == elm) {
 			rew->elm = NULL;
-			witness_init(&rew->witness, "rtree_elm",
-			    WITNESS_RANK_RTREE_ELM, rtree_elm_witness_comp,
+			witness_init(&rew->witness, "rtree_leaf_elm",
+			    WITNESS_RANK_RTREE_ELM, rtree_leaf_elm_witness_comp,
 			    NULL);
 			return;
 		}
@@ -292,41 +435,41 @@ rtree_elm_witness_dalloc(tsd_t *tsd, witness_t *witness,
 }
 
 void
-rtree_elm_witness_acquire(tsdn_t *tsdn, const rtree_t *rtree, uintptr_t key,
-    const rtree_elm_t *elm) {
+rtree_leaf_elm_witness_acquire(tsdn_t *tsdn, const rtree_t *rtree,
+    uintptr_t key, const rtree_leaf_elm_t *elm) {
 	witness_t *witness;
 
 	if (tsdn_null(tsdn)) {
 		return;
 	}
 
-	witness = rtree_elm_witness_alloc(tsdn_tsd(tsdn), key, elm);
+	witness = rtree_leaf_elm_witness_alloc(tsdn_tsd(tsdn), key, elm);
 	witness_lock(tsdn, witness);
 }
 
 void
-rtree_elm_witness_access(tsdn_t *tsdn, const rtree_t *rtree,
-    const rtree_elm_t *elm) {
+rtree_leaf_elm_witness_access(tsdn_t *tsdn, const rtree_t *rtree,
+    const rtree_leaf_elm_t *elm) {
 	witness_t *witness;
 
 	if (tsdn_null(tsdn)) {
 		return;
 	}
 
-	witness = rtree_elm_witness_find(tsdn_tsd(tsdn), elm);
+	witness = rtree_leaf_elm_witness_find(tsdn_tsd(tsdn), elm);
 	witness_assert_owner(tsdn, witness);
 }
 
 void
-rtree_elm_witness_release(tsdn_t *tsdn, const rtree_t *rtree,
-    const rtree_elm_t *elm) {
+rtree_leaf_elm_witness_release(tsdn_t *tsdn, const rtree_t *rtree,
+    const rtree_leaf_elm_t *elm) {
 	witness_t *witness;
 
 	if (tsdn_null(tsdn)) {
 		return;
 	}
 
-	witness = rtree_elm_witness_find(tsdn_tsd(tsdn), elm);
+	witness = rtree_leaf_elm_witness_find(tsdn_tsd(tsdn), elm);
 	witness_unlock(tsdn, witness);
-	rtree_elm_witness_dalloc(tsdn_tsd(tsdn), witness, elm);
+	rtree_leaf_elm_witness_dalloc(tsdn_tsd(tsdn), witness, elm);
 }
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 488fd54b..221f2f2c 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -2,19 +2,21 @@
 
 rtree_node_alloc_t *rtree_node_alloc_orig;
 rtree_node_dalloc_t *rtree_node_dalloc_orig;
+rtree_leaf_alloc_t *rtree_leaf_alloc_orig;
+rtree_leaf_dalloc_t *rtree_leaf_dalloc_orig;
 
 rtree_t *test_rtree;
 
-static rtree_elm_t *
+static rtree_node_elm_t *
 rtree_node_alloc_intercept(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
-	rtree_elm_t *node;
+	rtree_node_elm_t *node;
 
 	if (rtree != test_rtree) {
 		return rtree_node_alloc_orig(tsdn, rtree, nelms);
 	}
 
 	malloc_mutex_unlock(tsdn, &rtree->init_lock);
-	node = (rtree_elm_t *)calloc(nelms, sizeof(rtree_elm_t));
+	node = (rtree_node_elm_t *)calloc(nelms, sizeof(rtree_node_elm_t));
 	assert_ptr_not_null(node, "Unexpected calloc() failure");
 	malloc_mutex_lock(tsdn, &rtree->init_lock);
 
@@ -22,7 +24,8 @@ rtree_node_alloc_intercept(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
 }
 
 static void
-rtree_node_dalloc_intercept(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *node) {
+rtree_node_dalloc_intercept(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_node_elm_t *node) {
 	if (rtree != test_rtree) {
 		rtree_node_dalloc_orig(tsdn, rtree, node);
 		return;
@@ -31,6 +34,33 @@ rtree_node_dalloc_intercept(tsdn_t *tsdn, rtree_t *rtree, rtree_elm_t *node) {
 	free(node);
 }
 
+static rtree_leaf_elm_t *
+rtree_leaf_alloc_intercept(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
+	rtree_leaf_elm_t *leaf;
+
+	if (rtree != test_rtree) {
+		return rtree_leaf_alloc_orig(tsdn, rtree, nelms);
+	}
+
+	malloc_mutex_unlock(tsdn, &rtree->init_lock);
+	leaf = (rtree_leaf_elm_t *)calloc(nelms, sizeof(rtree_leaf_elm_t));
+	assert_ptr_not_null(leaf, "Unexpected calloc() failure");
+	malloc_mutex_lock(tsdn, &rtree->init_lock);
+
+	return leaf;
+}
+
+static void
+rtree_leaf_dalloc_intercept(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_leaf_elm_t *leaf) {
+	if (rtree != test_rtree) {
+		rtree_leaf_dalloc_orig(tsdn, rtree, leaf);
+		return;
+	}
+
+	free(leaf);
+}
+
 TEST_BEGIN(test_rtree_read_empty) {
 	tsdn_t *tsdn;
 
@@ -75,22 +105,20 @@ thd_start(void *varg) {
 		uintptr_t key = (uintptr_t)(gen_rand64(sfmt) & ((ZU(1) <<
 		    MAX_NBITS) - ZU(1)));
 		if (i % 2 == 0) {
-			rtree_elm_t *elm;
-
-			elm = rtree_elm_acquire(tsdn, &arg->rtree, &rtree_ctx,
-			    key, false, true);
+			rtree_leaf_elm_t *elm = rtree_leaf_elm_acquire(tsdn,
+			    &arg->rtree, &rtree_ctx, key, false, true);
 			assert_ptr_not_null(elm,
-			    "Unexpected rtree_elm_acquire() failure");
-			rtree_elm_write_acquired(tsdn, &arg->rtree, elm,
+			    "Unexpected rtree_leaf_elm_acquire() failure");
+			rtree_leaf_elm_write_acquired(tsdn, &arg->rtree, elm,
 			    extent);
-			rtree_elm_release(tsdn, &arg->rtree, elm);
+			rtree_leaf_elm_release(tsdn, &arg->rtree, elm);
 
-			elm = rtree_elm_acquire(tsdn, &arg->rtree, &rtree_ctx,
-			    key, true, false);
+			elm = rtree_leaf_elm_acquire(tsdn, &arg->rtree,
+			    &rtree_ctx, key, true, false);
 			assert_ptr_not_null(elm,
-			    "Unexpected rtree_elm_acquire() failure");
-			rtree_elm_read_acquired(tsdn, &arg->rtree, elm);
-			rtree_elm_release(tsdn, &arg->rtree, elm);
+			    "Unexpected rtree_leaf_elm_acquire() failure");
+			rtree_leaf_elm_read_acquired(tsdn, &arg->rtree, elm);
+			rtree_leaf_elm_release(tsdn, &arg->rtree, elm);
 		} else {
 			rtree_read(tsdn, &arg->rtree, &rtree_ctx, key, false);
 		}
@@ -201,19 +229,18 @@ TEST_BEGIN(test_rtree_random) {
 	extent_t extent;
 	rtree_t rtree;
 	rtree_ctx_t rtree_ctx = RTREE_CTX_INITIALIZER;
-	rtree_elm_t *elm;
 
 	test_rtree = &rtree;
 	assert_false(rtree_new(&rtree), "Unexpected rtree_new() failure");
 
 	for (unsigned i = 0; i < NSET; i++) {
 		keys[i] = (uintptr_t)gen_rand64(sfmt);
-		elm = rtree_elm_acquire(tsdn, &rtree, &rtree_ctx, keys[i],
-		    false, true);
+		rtree_leaf_elm_t *elm = rtree_leaf_elm_acquire(tsdn, &rtree,
+		    &rtree_ctx, keys[i], false, true);
 		assert_ptr_not_null(elm,
-		    "Unexpected rtree_elm_acquire() failure");
-		rtree_elm_write_acquired(tsdn, &rtree, elm, &extent);
-		rtree_elm_release(tsdn, &rtree, elm);
+		    "Unexpected rtree_leaf_elm_acquire() failure");
+		rtree_leaf_elm_write_acquired(tsdn, &rtree, elm, &extent);
+		rtree_leaf_elm_release(tsdn, &rtree, elm);
 		assert_ptr_eq(rtree_read(tsdn, &rtree, &rtree_ctx, keys[i],
 		    true), &extent,
 		    "rtree_read() should return previously set value");
@@ -248,6 +275,10 @@ main(void) {
 	rtree_node_alloc = rtree_node_alloc_intercept;
 	rtree_node_dalloc_orig = rtree_node_dalloc;
 	rtree_node_dalloc = rtree_node_dalloc_intercept;
+	rtree_leaf_alloc_orig = rtree_leaf_alloc;
+	rtree_leaf_alloc = rtree_leaf_alloc_intercept;
+	rtree_leaf_dalloc_orig = rtree_leaf_dalloc;
+	rtree_leaf_dalloc = rtree_leaf_dalloc_intercept;
 	test_rtree = NULL;
 
 	return test(

From 99d68445efa40edaf6c5317179faea4ecd07345a Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 16 Mar 2017 17:57:52 -0700
Subject: [PATCH 0724/2608] Incorporate szind/slab into rtree leaves.

Expand and restructure the rtree API such that all common operations can
be achieved with minimal work, regardless of whether the rtree leaf
fields are independent versus packed into a single atomic pointer.
---
 include/jemalloc/internal/arena_inlines_b.h   |  55 +++-
 include/jemalloc/internal/extent_externs.h    |   2 +-
 include/jemalloc/internal/extent_inlines.h    |  22 +-
 .../jemalloc/internal/jemalloc_internal.h.in  |  28 +-
 include/jemalloc/internal/large_externs.h     |   4 +-
 include/jemalloc/internal/private_symbols.txt |  17 +-
 include/jemalloc/internal/rtree_inlines.h     | 297 ++++++++++++------
 include/jemalloc/internal/rtree_structs.h     |   6 +-
 src/arena.c                                   |  11 +-
 src/extent.c                                  | 127 +++++---
 src/large.c                                   |  17 +-
 test/unit/arena_reset.c                       |  17 +-
 test/unit/rtree.c                             |  94 ++++--
 13 files changed, 471 insertions(+), 226 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 92c89a5b..8c5f9c14 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -14,7 +14,8 @@ void	arena_decay_tick(tsdn_t *tsdn, arena_t *arena);
 void	*arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
     bool zero, tcache_t *tcache, bool slow_path);
 arena_t	*arena_aalloc(tsdn_t *tsdn, const void *ptr);
-size_t	arena_salloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr);
+size_t arena_salloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr);
+size_t arena_vsalloc(tsdn_t *tsdn, const void *ptr);
 void	arena_dalloc(tsdn_t *tsdn, extent_t *extent, void *ptr,
     tcache_t *tcache, bool slow_path);
 void	arena_sdalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t size,
@@ -114,12 +115,60 @@ arena_aalloc(tsdn_t *tsdn, const void *ptr) {
 	return extent_arena_get(iealloc(tsdn, ptr));
 }
 
-/* Return the size of the allocation pointed to by ptr. */
 JEMALLOC_ALWAYS_INLINE size_t
 arena_salloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr) {
 	assert(ptr != NULL);
 
-	return index2size(extent_szind_get(extent));
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+	szind_t szind = rtree_szind_read(tsdn, &extents_rtree, rtree_ctx,
+	    (uintptr_t)ptr, true);
+	assert(szind != NSIZES);
+
+	if (config_debug && unlikely(extent != NULL)) {
+		rtree_leaf_elm_t elm;
+		rtree_leaf_elm_read(rtree_read(tsdn, &extents_rtree, rtree_ctx,
+		    (uintptr_t)ptr, true), true, &elm);
+
+		assert(extent == rtree_leaf_elm_extent_get(&elm));
+		assert(szind == extent_szind_get(extent));
+	}
+
+	return index2size(szind);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
+	/*
+	 * Return 0 if ptr is not within an extent managed by jemalloc.  This
+	 * function has two extra costs relative to isalloc():
+	 * - The rtree calls cannot claim to be dependent lookups, which induces
+	 *   rtree lookup load dependencies.
+	 * - The lookup may fail, so there is an extra branch to check for
+	 *   failure.
+	 */
+
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+	extent_t *extent;
+	szind_t szind;
+	if (rtree_extent_szind_read(tsdn, &extents_rtree, rtree_ctx,
+	    (uintptr_t)ptr, false, &extent, &szind)) {
+		return 0;
+	}
+
+	if (extent == NULL) {
+		return 0;
+	}
+	assert(extent_state_get(extent) == extent_state_active);
+	/* Only slab members should be looked up via interior pointers. */
+	assert(extent_addr_get(extent) == ptr || extent_slab_get(extent));
+
+	assert(szind != NSIZES);
+
+	return index2size(szind);
 }
 
 JEMALLOC_ALWAYS_INLINE void
diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index 68c49a17..6c153d04 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -54,7 +54,7 @@ bool extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena,
     size_t length);
 extent_t *extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a,
-    szind_t szind_a, size_t size_b, szind_t szind_b);
+    szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b);
 bool extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b);
 
diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index 549c8f2f..56f306df 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -2,11 +2,11 @@
 #define JEMALLOC_INTERNAL_EXTENT_INLINES_H
 
 #ifndef JEMALLOC_ENABLE_INLINE
-extent_t *extent_lookup(tsdn_t *tsdn, const void *ptr, bool dependent);
 arena_t *extent_arena_get(const extent_t *extent);
 void *extent_base_get(const extent_t *extent);
 void *extent_addr_get(const extent_t *extent);
 size_t extent_size_get(const extent_t *extent);
+szind_t extent_szind_get_maybe_invalid(const extent_t *extent);
 szind_t extent_szind_get(const extent_t *extent);
 size_t extent_usize_get(const extent_t *extent);
 void *extent_before_get(const extent_t *extent);
@@ -47,15 +47,6 @@ int extent_snad_comp(const extent_t *a, const extent_t *b);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_EXTENT_C_))
-JEMALLOC_INLINE extent_t *
-extent_lookup(tsdn_t *tsdn, const void *ptr, bool dependent) {
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
-	return rtree_read(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr,
-	    dependent);
-}
-
 JEMALLOC_INLINE arena_t *
 extent_arena_get(const extent_t *extent) {
 	return extent->e_arena;
@@ -81,11 +72,18 @@ extent_size_get(const extent_t *extent) {
 }
 
 JEMALLOC_INLINE szind_t
-extent_szind_get(const extent_t *extent) {
-	assert(extent->e_szind < NSIZES); /* Never call when "invalid". */
+extent_szind_get_maybe_invalid(const extent_t *extent) {
+	assert(extent->e_szind <= NSIZES);
 	return extent->e_szind;
 }
 
+JEMALLOC_INLINE szind_t
+extent_szind_get(const extent_t *extent) {
+	szind_t szind = extent_szind_get_maybe_invalid(extent);
+	assert(szind < NSIZES); /* Never call when "invalid". */
+	return szind;
+}
+
 JEMALLOC_INLINE size_t
 extent_usize_get(const extent_t *extent) {
 	return index2size(extent_szind_get(extent));
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index b184380d..238ebdca 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -535,7 +535,6 @@ void	jemalloc_postfork_child(void);
 #include "jemalloc/internal/tsd_inlines.h"
 #include "jemalloc/internal/witness_inlines.h"
 #include "jemalloc/internal/mutex_inlines.h"
-#include "jemalloc/internal/rtree_inlines.h"
 
 #ifndef JEMALLOC_ENABLE_INLINE
 pszind_t	psz2ind(size_t psz);
@@ -934,6 +933,7 @@ decay_ticker_get(tsd_t *tsd, unsigned ind) {
 #endif
 
 #include "jemalloc/internal/extent_inlines.h"
+#include "jemalloc/internal/rtree_inlines.h"
 #include "jemalloc/internal/base_inlines.h"
 #include "jemalloc/internal/bitmap_inlines.h"
 /*
@@ -994,7 +994,11 @@ arena_ichoose(tsd_t *tsd, arena_t *arena) {
 
 JEMALLOC_ALWAYS_INLINE extent_t *
 iealloc(tsdn_t *tsdn, const void *ptr) {
-	return extent_lookup(tsdn, ptr, true);
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+	return rtree_extent_read(tsdn, &extents_rtree, rtree_ctx,
+	    (uintptr_t)ptr, true);
 }
 #endif
 
@@ -1113,25 +1117,7 @@ ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero) {
 
 JEMALLOC_ALWAYS_INLINE size_t
 ivsalloc(tsdn_t *tsdn, const void *ptr) {
-	extent_t *extent;
-
-	/*
-	 * Return 0 if ptr is not within an extent managed by jemalloc.  This
-	 * function has two extra costs relative to isalloc():
-	 * - The extent_lookup() call cannot claim to be a dependent lookup,
-	 *   which induces rtree lookup load dependencies.
-	 * - The lookup may fail, so there is an extra branch to check for
-	 *   failure.
-	 * */
-	extent = extent_lookup(tsdn, ptr, false);
-	if (extent == NULL) {
-		return 0;
-	}
-	assert(extent_state_get(extent) == extent_state_active);
-	/* Only slab members should be looked up via interior pointers. */
-	assert(extent_addr_get(extent) == ptr || extent_slab_get(extent));
-
-	return isalloc(tsdn, extent, ptr);
+	return arena_vsalloc(tsdn, ptr);
 }
 
 JEMALLOC_ALWAYS_INLINE void
diff --git a/include/jemalloc/internal/large_externs.h b/include/jemalloc/internal/large_externs.h
index 66aa755c..2a208c83 100644
--- a/include/jemalloc/internal/large_externs.h
+++ b/include/jemalloc/internal/large_externs.h
@@ -14,8 +14,8 @@ extern large_dalloc_junk_t *large_dalloc_junk;
 typedef void (large_dalloc_maybe_junk_t)(void *, size_t);
 extern large_dalloc_maybe_junk_t *large_dalloc_maybe_junk;
 #else
-void	large_dalloc_junk(void *ptr, size_t usize);
-void	large_dalloc_maybe_junk(void *ptr, size_t usize);
+void	large_dalloc_junk(void *ptr, size_t size);
+void	large_dalloc_maybe_junk(void *ptr, size_t size);
 #endif
 void	large_dalloc_prep_junked_locked(tsdn_t *tsdn, extent_t *extent);
 void	large_dalloc_finish(tsdn_t *tsdn, extent_t *extent);
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index a0deef89..169e7d11 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -77,6 +77,7 @@ arena_stats_merge
 arena_tcache_fill_small
 arena_tdata_get
 arena_tdata_get_hard
+arena_vsalloc
 arenas
 arenas_tdata_cleanup
 b0get
@@ -169,7 +170,6 @@ extent_list_init
 extent_list_last
 extent_list_remove
 extent_list_replace
-extent_lookup
 extent_merge_wrapper
 extent_past_get
 extent_prof_tctx_get
@@ -192,6 +192,7 @@ extent_split_wrapper
 extent_state_get
 extent_state_set
 extent_szind_get
+extent_szind_get_maybe_invalid
 extent_szind_set
 extent_usize_get
 extent_zeroed_get
@@ -413,25 +414,33 @@ psz2ind
 psz2u
 rtree_clear
 rtree_delete
+rtree_extent_read
+rtree_extent_szind_read
 rtree_leaf_alloc
 rtree_leaf_dalloc
 rtree_leaf_elm_acquire
+rtree_leaf_elm_extent_read
+rtree_leaf_elm_extent_write
 rtree_leaf_elm_lookup
 rtree_leaf_elm_lookup_hard
-rtree_leaf_elm_read
-rtree_leaf_elm_read_acquired
 rtree_leaf_elm_release
+rtree_leaf_elm_slab_read
+rtree_leaf_elm_slab_write
+rtree_leaf_elm_szind_read
+rtree_leaf_elm_szind_write
 rtree_leaf_elm_witness_access
 rtree_leaf_elm_witness_acquire
 rtree_leaf_elm_witness_release
 rtree_leaf_elm_write
-rtree_leaf_elm_write_acquired
 rtree_leafkey
 rtree_new
 rtree_node_alloc
 rtree_node_dalloc
 rtree_read
 rtree_subkey
+rtree_szind_read
+rtree_szind_slab_read
+rtree_szind_slab_update
 rtree_write
 s2u
 s2u_compute
diff --git a/include/jemalloc/internal/rtree_inlines.h b/include/jemalloc/internal/rtree_inlines.h
index 3af17d36..9c337b85 100644
--- a/include/jemalloc/internal/rtree_inlines.h
+++ b/include/jemalloc/internal/rtree_inlines.h
@@ -4,21 +4,40 @@
 #ifndef JEMALLOC_ENABLE_INLINE
 uintptr_t rtree_leafkey(uintptr_t key);
 uintptr_t rtree_subkey(uintptr_t key, unsigned level);
-extent_t *rtree_leaf_elm_read(rtree_leaf_elm_t *elm, bool dependent);
-void rtree_leaf_elm_write(rtree_leaf_elm_t *elm, const extent_t *extent);
+extent_t *rtree_leaf_elm_extent_read(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_leaf_elm_t *elm, bool acquired, bool dependent);
+szind_t rtree_leaf_elm_szind_read(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_leaf_elm_t *elm, bool acquired, bool dependent);
+bool rtree_leaf_elm_slab_read(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_leaf_elm_t *elm, bool acquired, bool dependent);
+void rtree_leaf_elm_extent_write(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_leaf_elm_t *elm, bool acquired, extent_t *extent);
+void rtree_leaf_elm_szind_write(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_leaf_elm_t *elm, bool acquired, szind_t szind);
+void rtree_leaf_elm_slab_write(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_leaf_elm_t *elm, bool acquired, bool slab);
+void rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
+    bool acquired, extent_t *extent, szind_t szind, bool slab);
 rtree_leaf_elm_t *rtree_leaf_elm_lookup(tsdn_t *tsdn, rtree_t *rtree,
     rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, bool init_missing);
 bool rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
-    uintptr_t key, const extent_t *extent);
-extent_t *rtree_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
-    uintptr_t key, bool dependent);
+    uintptr_t key, extent_t *extent, szind_t szind, bool slab);
+rtree_leaf_elm_t *rtree_read(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent);
+extent_t *rtree_extent_read(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent);
+szind_t rtree_szind_read(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent);
+bool rtree_extent_szind_read(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, extent_t **r_extent,
+    szind_t *r_szind);
+bool rtree_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+    uintptr_t key, bool dependent, szind_t *r_szind, bool *r_slab);
+void rtree_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_ctx_t *rtree_ctx, uintptr_t key, szind_t szind, bool slab);
 rtree_leaf_elm_t *rtree_leaf_elm_acquire(tsdn_t *tsdn, rtree_t *rtree,
     rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, bool init_missing);
-extent_t *rtree_leaf_elm_read_acquired(tsdn_t *tsdn, const rtree_t *rtree,
-    rtree_leaf_elm_t *elm);
-void rtree_leaf_elm_write_acquired(tsdn_t *tsdn, const rtree_t *rtree,
-    rtree_leaf_elm_t *elm, const extent_t *extent);
-void rtree_leaf_elm_release(tsdn_t *tsdn, const rtree_t *rtree,
+void rtree_leaf_elm_release(tsdn_t *tsdn, rtree_t *rtree,
     rtree_leaf_elm_t *elm);
 void rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key);
@@ -45,38 +64,104 @@ rtree_subkey(uintptr_t key, unsigned level) {
 	return ((key >> shiftbits) & mask);
 }
 
+/*
+ * Atomic getters.
+ *
+ * dependent: Reading a value on behalf of a pointer to a valid allocation
+ *            is guaranteed to be a clean read even without synchronization,
+ *            because the rtree update became visible in memory before the
+ *            pointer came into existence.
+ * !dependent: An arbitrary read, e.g. on behalf of ivsalloc(), may not be
+ *             dependent on a previous rtree write, which means a stale read
+ *             could result if synchronization were omitted here.
+ */
 JEMALLOC_ALWAYS_INLINE extent_t *
-rtree_leaf_elm_read(rtree_leaf_elm_t *elm, bool dependent) {
-	extent_t *extent;
-
-	if (dependent) {
-		/*
-		 * Reading a value on behalf of a pointer to a valid allocation
-		 * is guaranteed to be a clean read even without
-		 * synchronization, because the rtree update became visible in
-		 * memory before the pointer came into existence.
-		 */
-		extent = (extent_t *)atomic_load_p(&elm->extent,
-		    ATOMIC_RELAXED);
-	} else {
-		/*
-		 * An arbitrary read, e.g. on behalf of ivsalloc(), may not be
-		 * dependent on a previous rtree write, which means a stale read
-		 * could result if synchronization were omitted here.
-		 */
-		extent = (extent_t *)atomic_load_p(&elm->extent,
-		    ATOMIC_ACQUIRE);
+rtree_leaf_elm_extent_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
+    bool acquired, bool dependent) {
+	if (config_debug && acquired) {
+		assert(dependent);
+		rtree_leaf_elm_witness_access(tsdn, rtree, elm);
 	}
 
-	/* Mask the lock bit. */
+	extent_t *extent = (extent_t *)atomic_load_p(&elm->le_extent, dependent
+	    ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
+	assert(!acquired || ((uintptr_t)extent & (uintptr_t)0x1) ==
+	    (uintptr_t)0x1);
+	/* Mask lock bit. */
 	extent = (extent_t *)((uintptr_t)extent & ~((uintptr_t)0x1));
-
 	return extent;
 }
 
+JEMALLOC_ALWAYS_INLINE szind_t
+rtree_leaf_elm_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
+    bool acquired, bool dependent) {
+	if (config_debug && acquired) {
+		assert(dependent);
+		rtree_leaf_elm_witness_access(tsdn, rtree, elm);
+	}
+
+	return (szind_t)atomic_load_u(&elm->le_szind, dependent ? ATOMIC_RELAXED
+	    : ATOMIC_ACQUIRE);
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+rtree_leaf_elm_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
+    bool acquired, bool dependent) {
+	if (config_debug && acquired) {
+		assert(dependent);
+		rtree_leaf_elm_witness_access(tsdn, rtree, elm);
+	}
+
+	return atomic_load_b(&elm->le_slab, dependent ? ATOMIC_RELAXED :
+	    ATOMIC_ACQUIRE);
+}
+
 JEMALLOC_INLINE void
-rtree_leaf_elm_write(rtree_leaf_elm_t *elm, const extent_t *extent) {
-	atomic_store_p(&elm->extent, (void *)extent, ATOMIC_RELEASE);
+rtree_leaf_elm_extent_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
+    bool acquired, extent_t *extent) {
+	if (config_debug && acquired) {
+		rtree_leaf_elm_witness_access(tsdn, rtree, elm);
+	}
+	assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0);
+
+	if (acquired) {
+		/* Overlay lock bit. */
+		extent = (extent_t *)((uintptr_t)extent | (uintptr_t)0x1);
+	}
+	atomic_store_p(&elm->le_extent, extent, ATOMIC_RELEASE);
+}
+
+JEMALLOC_INLINE void
+rtree_leaf_elm_szind_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
+    bool acquired, szind_t szind) {
+	if (config_debug && acquired) {
+		rtree_leaf_elm_witness_access(tsdn, rtree, elm);
+	}
+	assert(szind <= NSIZES);
+
+	atomic_store_u(&elm->le_szind, szind, ATOMIC_RELEASE);
+}
+
+JEMALLOC_INLINE void
+rtree_leaf_elm_slab_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
+     bool acquired, bool slab) {
+	if (config_debug && acquired) {
+		rtree_leaf_elm_witness_access(tsdn, rtree, elm);
+	}
+
+	atomic_store_b(&elm->le_slab, slab, ATOMIC_RELEASE);
+}
+
+JEMALLOC_INLINE void
+rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
+    bool acquired, extent_t *extent, szind_t szind, bool slab) {
+	rtree_leaf_elm_slab_write(tsdn, rtree, elm, acquired, slab);
+	rtree_leaf_elm_szind_write(tsdn, rtree, elm, acquired, szind);
+	/*
+	 * Write extent last, since the element is atomically considered valid
+	 * as soon as the extent field is non-NULL.
+	 */
+	rtree_leaf_elm_extent_write(tsdn, rtree, elm, acquired, extent);
 }
 
 JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_t *
@@ -124,34 +209,99 @@ rtree_leaf_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 
 JEMALLOC_INLINE bool
 rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
-    const extent_t *extent) {
-	rtree_leaf_elm_t *elm;
+    extent_t *extent, szind_t szind, bool slab) {
+	/* Use rtree_clear() to set the extent to NULL. */
+	assert(extent != NULL);
 
-	assert(extent != NULL); /* Use rtree_clear() for this case. */
-	assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0);
-
-	elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx, key, false, true);
+	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
+	    key, false, true);
 	if (elm == NULL) {
 		return true;
 	}
-	assert(rtree_leaf_elm_read(elm, false) == NULL);
-	rtree_leaf_elm_write(elm, extent);
+
+	assert(rtree_leaf_elm_extent_read(tsdn, rtree, elm, false, false) ==
+	    NULL);
+	rtree_leaf_elm_write(tsdn, rtree, elm, false, extent, szind, slab);
 
 	return false;
 }
 
-JEMALLOC_ALWAYS_INLINE extent_t *
+JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_t *
 rtree_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
     bool dependent) {
-	rtree_leaf_elm_t *elm;
-
-	elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx, key, dependent,
-	    false);
+	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
+	    key, dependent, false);
 	if (!dependent && elm == NULL) {
 		return NULL;
 	}
+	assert(elm != NULL);
+	return elm;
+}
 
-	return rtree_leaf_elm_read(elm, dependent);
+JEMALLOC_ALWAYS_INLINE extent_t *
+rtree_extent_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+    uintptr_t key, bool dependent) {
+	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key,
+	    dependent);
+	if (!dependent && elm == NULL) {
+		return NULL;
+	}
+	return rtree_leaf_elm_extent_read(tsdn, rtree, elm, false, dependent);
+}
+
+JEMALLOC_ALWAYS_INLINE szind_t
+rtree_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+    uintptr_t key, bool dependent) {
+	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key,
+	    dependent);
+	if (!dependent && elm == NULL) {
+		return NSIZES;
+	}
+	return rtree_leaf_elm_szind_read(tsdn, rtree, elm, false, dependent);
+}
+
+/*
+ * rtree_slab_read() is intentionally omitted because slab is always read in
+ * conjunction with szind, which makes rtree_szind_slab_read() a better choice.
+ */
+
+JEMALLOC_ALWAYS_INLINE bool
+rtree_extent_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+    uintptr_t key, bool dependent, extent_t **r_extent, szind_t *r_szind) {
+	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key,
+	    dependent);
+	if (!dependent && elm == NULL) {
+		return true;
+	}
+	*r_extent = rtree_leaf_elm_extent_read(tsdn, rtree, elm, false,
+	    dependent);
+	*r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, false,
+	    dependent);
+	return false;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+rtree_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+    uintptr_t key, bool dependent, szind_t *r_szind, bool *r_slab) {
+	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key,
+	    dependent);
+	if (!dependent && elm == NULL) {
+		return true;
+	}
+	*r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, false,
+	    dependent);
+	*r_slab = rtree_leaf_elm_slab_read(tsdn, rtree, elm, false, dependent);
+	return false;
+}
+
+JEMALLOC_INLINE void
+rtree_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+    uintptr_t key, szind_t szind, bool slab) {
+	assert(!slab || szind < NBINS);
+
+	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, true);
+	rtree_leaf_elm_slab_write(tsdn, rtree, elm, false, slab);
+	rtree_leaf_elm_szind_write(tsdn, rtree, elm, false, szind);
 }
 
 JEMALLOC_INLINE rtree_leaf_elm_t *
@@ -162,18 +312,19 @@ rtree_leaf_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	if (!dependent && elm == NULL) {
 		return NULL;
 	}
+	assert(elm != NULL);
 
 	spin_t spinner = SPIN_INITIALIZER;
 	while (true) {
 		/* The least significant bit serves as a lock. */
-		void *extent_and_lock = atomic_load_p(&elm->extent,
+		void *extent_and_lock = atomic_load_p(&elm->le_extent,
 		    ATOMIC_RELAXED);
 		if (likely(((uintptr_t)extent_and_lock & (uintptr_t)0x1) == 0))
 		{
 			void *locked = (void *)((uintptr_t)extent_and_lock
 			    | (uintptr_t)0x1);
 			if (likely(atomic_compare_exchange_strong_p(
-			    &elm->extent, &extent_and_lock, locked,
+			    &elm->le_extent, &extent_and_lock, locked,
 			    ATOMIC_ACQUIRE, ATOMIC_RELAXED))) {
 				break;
 			}
@@ -188,42 +339,11 @@ rtree_leaf_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	return elm;
 }
 
-JEMALLOC_INLINE extent_t *
-rtree_leaf_elm_read_acquired(tsdn_t *tsdn, const rtree_t *rtree,
-    rtree_leaf_elm_t *elm) {
-	extent_t *extent;
-	void *ptr = atomic_load_p(&elm->extent, ATOMIC_RELAXED);
-	assert(((uintptr_t)ptr & (uintptr_t)0x1) == (uintptr_t)0x1);
-	extent = (extent_t *)((uintptr_t)ptr & ~((uintptr_t)0x1));
-	assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0);
-
-	if (config_debug) {
-		rtree_leaf_elm_witness_access(tsdn, rtree, elm);
-	}
-
-	return extent;
-}
-
 JEMALLOC_INLINE void
-rtree_leaf_elm_write_acquired(tsdn_t *tsdn, const rtree_t *rtree,
-    rtree_leaf_elm_t *elm, const extent_t *extent) {
-	assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0);
-	assert(((uintptr_t)atomic_load_p(&elm->extent, ATOMIC_RELAXED)
-	    & (uintptr_t)0x1) == (uintptr_t)0x1);
-
-	if (config_debug) {
-		rtree_leaf_elm_witness_access(tsdn, rtree, elm);
-	}
-	atomic_store_p(&elm->extent, (void *)((uintptr_t)extent |
-	    (uintptr_t)0x1), ATOMIC_RELEASE);
-	assert(rtree_leaf_elm_read_acquired(tsdn, rtree, elm) == extent);
-}
-
-JEMALLOC_INLINE void
-rtree_leaf_elm_release(tsdn_t *tsdn, const rtree_t *rtree,
-    rtree_leaf_elm_t *elm) {
-	rtree_leaf_elm_write(elm, rtree_leaf_elm_read_acquired(tsdn, rtree,
-	    elm));
+rtree_leaf_elm_release(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm) {
+	extent_t *extent = rtree_leaf_elm_extent_read(tsdn, rtree, elm, true,
+	    true);
+	rtree_leaf_elm_extent_write(tsdn, rtree, elm, false, extent);
 	if (config_debug) {
 		rtree_leaf_elm_witness_release(tsdn, rtree, elm);
 	}
@@ -232,10 +352,9 @@ rtree_leaf_elm_release(tsdn_t *tsdn, const rtree_t *rtree,
 JEMALLOC_INLINE void
 rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key) {
-	rtree_leaf_elm_t *elm;
-
-	elm = rtree_leaf_elm_acquire(tsdn, rtree, rtree_ctx, key, true, false);
-	rtree_leaf_elm_write_acquired(tsdn, rtree, elm, NULL);
+	rtree_leaf_elm_t *elm = rtree_leaf_elm_acquire(tsdn, rtree, rtree_ctx,
+	    key, true, false);
+	rtree_leaf_elm_write(tsdn, rtree, elm, true, NULL, NSIZES, false);
 	rtree_leaf_elm_release(tsdn, rtree, elm);
 }
 #endif
diff --git a/include/jemalloc/internal/rtree_structs.h b/include/jemalloc/internal/rtree_structs.h
index 68554035..e9a507ab 100644
--- a/include/jemalloc/internal/rtree_structs.h
+++ b/include/jemalloc/internal/rtree_structs.h
@@ -2,11 +2,13 @@
 #define JEMALLOC_INTERNAL_RTREE_STRUCTS_H
 
 struct rtree_node_elm_s {
-	atomic_p_t	child;
+	atomic_p_t	child; /* (rtree_{node,leaf}_elm_t *) */
 };
 
 struct rtree_leaf_elm_s {
-	atomic_p_t	extent;
+	atomic_p_t	le_extent; /* (extent_t *) */
+	atomic_u_t	le_szind; /* (szind_t) */
+	atomic_b_t	le_slab; /* (bool) */
 };
 
 struct rtree_leaf_elm_witness_s {
diff --git a/src/arena.c b/src/arena.c
index 968343c0..2dd84761 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1468,7 +1468,12 @@ arena_prof_promote(tsdn_t *tsdn, extent_t *extent, const void *ptr,
 	assert(isalloc(tsdn, extent, ptr) == LARGE_MINCLASS);
 	assert(usize <= SMALL_MAXCLASS);
 
-	extent_szind_set(extent, size2index(usize));
+	szind_t szind = size2index(usize);
+	extent_szind_set(extent, szind);
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+	rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr,
+	    szind, false);
 
 	prof_accum_cancel(tsdn, &arena->prof_accum, usize);
 
@@ -1481,6 +1486,10 @@ arena_prof_demote(tsdn_t *tsdn, extent_t *extent, const void *ptr) {
 	assert(ptr != NULL);
 
 	extent_szind_set(extent, NBINS);
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+	rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr,
+	    NBINS, false);
 
 	assert(isalloc(tsdn, extent, ptr) == LARGE_MINCLASS);
 
diff --git a/src/extent.c b/src/extent.c
index 2a16d387..a95e2748 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -476,11 +476,12 @@ extent_rtree_acquire(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
 
 static void
 extent_rtree_write_acquired(tsdn_t *tsdn, rtree_leaf_elm_t *elm_a,
-    rtree_leaf_elm_t *elm_b, const extent_t *extent) {
-	rtree_leaf_elm_write_acquired(tsdn, &extents_rtree, elm_a, extent);
+    rtree_leaf_elm_t *elm_b, extent_t *extent, szind_t szind, bool slab) {
+	rtree_leaf_elm_write(tsdn, &extents_rtree, elm_a, true, extent, szind,
+	    slab);
 	if (elm_b != NULL) {
-		rtree_leaf_elm_write_acquired(tsdn, &extents_rtree, elm_b,
-		    extent);
+		rtree_leaf_elm_write(tsdn, &extents_rtree, elm_b, true, extent,
+		    szind, slab);
 	}
 }
 
@@ -494,16 +495,15 @@ extent_rtree_release(tsdn_t *tsdn, rtree_leaf_elm_t *elm_a,
 }
 
 static void
-extent_interior_register(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
-    const extent_t *extent) {
-	size_t i;
-
+extent_interior_register(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, extent_t *extent,
+    szind_t szind) {
 	assert(extent_slab_get(extent));
 
-	for (i = 1; i < (extent_size_get(extent) >> LG_PAGE) - 1; i++) {
+	/* Register interior. */
+	for (size_t i = 1; i < (extent_size_get(extent) >> LG_PAGE) - 1; i++) {
 		rtree_write(tsdn, &extents_rtree, rtree_ctx,
 		    (uintptr_t)extent_base_get(extent) + (uintptr_t)(i <<
-		    LG_PAGE), extent);
+		    LG_PAGE), extent, szind, true);
 	}
 }
 
@@ -542,7 +542,7 @@ extent_gdump_sub(tsdn_t *tsdn, const extent_t *extent) {
 }
 
 static bool
-extent_register_impl(tsdn_t *tsdn, const extent_t *extent, bool gdump_add) {
+extent_register_impl(tsdn_t *tsdn, extent_t *extent, bool gdump_add) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	rtree_leaf_elm_t *elm_a, *elm_b;
@@ -551,9 +551,11 @@ extent_register_impl(tsdn_t *tsdn, const extent_t *extent, bool gdump_add) {
 	    &elm_b)) {
 		return true;
 	}
-	extent_rtree_write_acquired(tsdn, elm_a, elm_b, extent);
-	if (extent_slab_get(extent)) {
-		extent_interior_register(tsdn, rtree_ctx, extent);
+	szind_t szind = extent_szind_get_maybe_invalid(extent);
+	bool slab = extent_slab_get(extent);
+	extent_rtree_write_acquired(tsdn, elm_a, elm_b, extent, szind, slab);
+	if (slab) {
+		extent_interior_register(tsdn, rtree_ctx, extent, szind);
 	}
 	extent_rtree_release(tsdn, elm_a, elm_b);
 
@@ -565,24 +567,24 @@ extent_register_impl(tsdn_t *tsdn, const extent_t *extent, bool gdump_add) {
 }
 
 static bool
-extent_register(tsdn_t *tsdn, const extent_t *extent) {
+extent_register(tsdn_t *tsdn, extent_t *extent) {
 	return extent_register_impl(tsdn, extent, true);
 }
 
 static bool
-extent_register_no_gdump_add(tsdn_t *tsdn, const extent_t *extent) {
+extent_register_no_gdump_add(tsdn_t *tsdn, extent_t *extent) {
 	return extent_register_impl(tsdn, extent, false);
 }
 
 static void
-extent_reregister(tsdn_t *tsdn, const extent_t *extent) {
+extent_reregister(tsdn_t *tsdn, extent_t *extent) {
 	bool err = extent_register(tsdn, extent);
 	assert(!err);
 }
 
 static void
 extent_interior_deregister(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
-    const extent_t *extent) {
+    extent_t *extent) {
 	size_t i;
 
 	assert(extent_slab_get(extent));
@@ -602,7 +604,7 @@ extent_deregister(tsdn_t *tsdn, extent_t *extent) {
 
 	extent_rtree_acquire(tsdn, rtree_ctx, extent, true, false, &elm_a,
 	    &elm_b);
-	extent_rtree_write_acquired(tsdn, elm_a, elm_b, NULL);
+	extent_rtree_write_acquired(tsdn, elm_a, elm_b, NULL, NSIZES, false);
 	if (extent_slab_get(extent)) {
 		extent_interior_deregister(tsdn, rtree_ctx, extent);
 		extent_slab_set(extent, false);
@@ -653,13 +655,12 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 	extent_t *extent;
 	if (new_addr != NULL) {
-		rtree_leaf_elm_t *elm;
-
-		elm = rtree_leaf_elm_acquire(tsdn, &extents_rtree, rtree_ctx,
-		    (uintptr_t)new_addr, false, false);
+		rtree_leaf_elm_t *elm = rtree_leaf_elm_acquire(tsdn,
+		    &extents_rtree, rtree_ctx, (uintptr_t)new_addr, false,
+		    false);
 		if (elm != NULL) {
-			extent = rtree_leaf_elm_read_acquired(tsdn,
-			    &extents_rtree, elm);
+			extent = rtree_leaf_elm_extent_read(tsdn,
+			   &extents_rtree, elm, true, true);
 			if (extent != NULL) {
 				assert(extent_base_get(extent) == new_addr);
 				if (extent_arena_get(extent) != arena ||
@@ -715,7 +716,8 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
 	if (leadsize != 0) {
 		extent_t *lead = extent;
 		extent = extent_split_wrapper(tsdn, arena, r_extent_hooks,
-		    lead, leadsize, NSIZES, esize + trailsize, szind);
+		    lead, leadsize, NSIZES, false, esize + trailsize, szind,
+		    slab);
 		if (extent == NULL) {
 			extent_deregister(tsdn, lead);
 			extents_leak(tsdn, arena, r_extent_hooks, extents,
@@ -728,7 +730,8 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
 	/* Split the trail. */
 	if (trailsize != 0) {
 		extent_t *trail = extent_split_wrapper(tsdn, arena,
-		    r_extent_hooks, extent, esize, szind, trailsize, NSIZES);
+		    r_extent_hooks, extent, esize, szind, slab, trailsize,
+		    NSIZES, false);
 		if (trail == NULL) {
 			extent_deregister(tsdn, extent);
 			extents_leak(tsdn, arena, r_extent_hooks, extents,
@@ -742,6 +745,16 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
 		 * splitting occurred.
 		 */
 		extent_szind_set(extent, szind);
+		if (szind != NSIZES) {
+			rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx,
+			    (uintptr_t)extent_addr_get(extent), szind, slab);
+			if (slab && extent_size_get(extent) > PAGE) {
+				rtree_szind_slab_update(tsdn, &extents_rtree,
+				    rtree_ctx,
+				    (uintptr_t)extent_past_get(extent) -
+				    (uintptr_t)PAGE, szind, slab);
+			}
+		}
 	}
 
 	return extent;
@@ -788,7 +801,7 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	assert(extent_state_get(extent) == extent_state_active);
 	if (slab) {
 		extent_slab_set(extent, slab);
-		extent_interior_register(tsdn, rtree_ctx, extent);
+		extent_interior_register(tsdn, rtree_ctx, extent, szind);
 	}
 
 	if (*zero) {
@@ -934,7 +947,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	if (leadsize != 0) {
 		extent_t *lead = extent;
 		extent = extent_split_wrapper(tsdn, arena, r_extent_hooks, lead,
-		    leadsize, NSIZES, esize + trailsize, szind);
+		    leadsize, NSIZES, false, esize + trailsize, szind, slab);
 		if (extent == NULL) {
 			extent_deregister(tsdn, lead);
 			extents_leak(tsdn, arena, r_extent_hooks, false, lead);
@@ -947,7 +960,8 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	/* Split the trail. */
 	if (trailsize != 0) {
 		extent_t *trail = extent_split_wrapper(tsdn, arena,
-		    r_extent_hooks, extent, esize, szind, trailsize, NSIZES);
+		    r_extent_hooks, extent, esize, szind, slab, trailsize,
+		    NSIZES, false);
 		if (trail == NULL) {
 			extent_deregister(tsdn, extent);
 			extents_leak(tsdn, arena, r_extent_hooks,
@@ -961,7 +975,21 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 		 * Splitting causes szind to be set as a side effect, but no
 		 * splitting occurred.
 		 */
+		rtree_ctx_t rtree_ctx_fallback;
+		rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn,
+		    &rtree_ctx_fallback);
+
 		extent_szind_set(extent, szind);
+		if (szind != NSIZES) {
+			rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx,
+			    (uintptr_t)extent_addr_get(extent), szind, slab);
+			if (slab && extent_size_get(extent) > PAGE) {
+				rtree_szind_slab_update(tsdn, &extents_rtree,
+				    rtree_ctx,
+				    (uintptr_t)extent_past_get(extent) -
+				    (uintptr_t)PAGE, szind, slab);
+			}
+		}
 	}
 
 	if (*commit && !extent_committed_get(extent)) {
@@ -987,7 +1015,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 		    &rtree_ctx_fallback);
 
 		extent_slab_set(extent, true);
-		extent_interior_register(tsdn, rtree_ctx, extent);
+		extent_interior_register(tsdn, rtree_ctx, extent, szind);
 	}
 	if (*zero && !extent_zeroed_get(extent)) {
 		void *addr = extent_base_get(extent);
@@ -1162,8 +1190,8 @@ extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
 		    &extents_rtree, rtree_ctx,
 		    (uintptr_t)extent_past_get(extent), false, false);
 		if (next_elm != NULL) {
-			extent_t *next = rtree_leaf_elm_read_acquired(tsdn,
-			    &extents_rtree, next_elm);
+			extent_t *next = rtree_leaf_elm_extent_read(tsdn,
+			    &extents_rtree, next_elm, true, true);
 			/*
 			 * extents->mtx only protects against races for
 			 * like-state extents, so call extent_can_coalesce()
@@ -1188,8 +1216,8 @@ extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
 		    &extents_rtree, rtree_ctx,
 		    (uintptr_t)extent_before_get(extent), false, false);
 		if (prev_elm != NULL) {
-			extent_t *prev = rtree_leaf_elm_read_acquired(tsdn,
-			    &extents_rtree, prev_elm);
+			extent_t *prev = rtree_leaf_elm_extent_read(tsdn,
+			    &extents_rtree, prev_elm, true, true);
 			bool can_coalesce = (prev != NULL &&
 			    extent_can_coalesce(arena, extents, extent, prev));
 			rtree_leaf_elm_release(tsdn, &extents_rtree, prev_elm);
@@ -1231,7 +1259,8 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		extent_slab_set(extent, false);
 	}
 
-	assert(extent_lookup(tsdn, extent_base_get(extent), true) == extent);
+	assert(rtree_extent_read(tsdn, &extents_rtree, rtree_ctx,
+	    (uintptr_t)extent_base_get(extent), true) == extent);
 
 	if (!extents->delay_coalesce) {
 		extent = extent_try_coalesce(tsdn, arena, r_extent_hooks,
@@ -1467,7 +1496,7 @@ extent_split_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
 extent_t *
 extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a,
-    szind_t szind_a, size_t size_b, szind_t szind_b) {
+    szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b) {
 	assert(extent_size_get(extent) == size_a + size_b);
 	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 
@@ -1491,7 +1520,7 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
 		extent_t lead;
 
 		extent_init(&lead, arena, extent_addr_get(extent), size_a,
-		    extent_slab_get(extent), szind_a, extent_sn_get(extent),
+		    slab_a, szind_a, extent_sn_get(extent),
 		    extent_state_get(extent), extent_zeroed_get(extent),
 		    extent_committed_get(extent));
 
@@ -1502,9 +1531,9 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
 	}
 
 	extent_init(trail, arena, (void *)((uintptr_t)extent_base_get(extent) +
-	    size_a), size_b, extent_slab_get(extent), szind_b,
-	    extent_sn_get(extent), extent_state_get(extent),
-	    extent_zeroed_get(extent), extent_committed_get(extent));
+	    size_a), size_b, slab_b, szind_b, extent_sn_get(extent),
+	    extent_state_get(extent), extent_zeroed_get(extent),
+	    extent_committed_get(extent));
 	if (extent_rtree_acquire(tsdn, rtree_ctx, trail, false, true,
 	    &trail_elm_a, &trail_elm_b)) {
 		goto label_error_c;
@@ -1519,8 +1548,10 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
 	extent_size_set(extent, size_a);
 	extent_szind_set(extent, szind_a);
 
-	extent_rtree_write_acquired(tsdn, lead_elm_a, lead_elm_b, extent);
-	extent_rtree_write_acquired(tsdn, trail_elm_a, trail_elm_b, trail);
+	extent_rtree_write_acquired(tsdn, lead_elm_a, lead_elm_b, extent,
+	    szind_a, slab_a);
+	extent_rtree_write_acquired(tsdn, trail_elm_a, trail_elm_b, trail,
+	    szind_b, slab_b);
 
 	extent_rtree_release(tsdn, lead_elm_a, lead_elm_b);
 	extent_rtree_release(tsdn, trail_elm_a, trail_elm_b);
@@ -1599,13 +1630,13 @@ extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
 	    &b_elm_b);
 
 	if (a_elm_b != NULL) {
-		rtree_leaf_elm_write_acquired(tsdn, &extents_rtree, a_elm_b,
-		    NULL);
+		rtree_leaf_elm_write(tsdn, &extents_rtree, a_elm_b, true, NULL,
+		    NSIZES, false);
 		rtree_leaf_elm_release(tsdn, &extents_rtree, a_elm_b);
 	}
 	if (b_elm_b != NULL) {
-		rtree_leaf_elm_write_acquired(tsdn, &extents_rtree, b_elm_a,
-		    NULL);
+		rtree_leaf_elm_write(tsdn, &extents_rtree, b_elm_a, true, NULL,
+		    NSIZES, false);
 		rtree_leaf_elm_release(tsdn, &extents_rtree, b_elm_a);
 	} else {
 		b_elm_b = b_elm_a;
@@ -1617,7 +1648,7 @@ extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
 	    extent_sn_get(a) : extent_sn_get(b));
 	extent_zeroed_set(a, extent_zeroed_get(a) && extent_zeroed_get(b));
 
-	extent_rtree_write_acquired(tsdn, a_elm_a, b_elm_b, a);
+	extent_rtree_write_acquired(tsdn, a_elm_a, b_elm_b, a, NSIZES, false);
 	extent_rtree_release(tsdn, a_elm_a, b_elm_b);
 
 	extent_dalloc(tsdn, extent_arena_get(b), b);
diff --git a/src/large.c b/src/large.c
index 0e9f0d72..845202f9 100644
--- a/src/large.c
+++ b/src/large.c
@@ -66,8 +66,8 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 #define large_dalloc_junk JEMALLOC_N(n_large_dalloc_junk)
 #endif
 void
-large_dalloc_junk(void *ptr, size_t usize) {
-	memset(ptr, JEMALLOC_FREE_JUNK, usize);
+large_dalloc_junk(void *ptr, size_t size) {
+	memset(ptr, JEMALLOC_FREE_JUNK, size);
 }
 #ifdef JEMALLOC_JET
 #undef large_dalloc_junk
@@ -80,14 +80,14 @@ large_dalloc_junk_t *large_dalloc_junk = JEMALLOC_N(n_large_dalloc_junk);
 #define large_dalloc_maybe_junk JEMALLOC_N(n_large_dalloc_maybe_junk)
 #endif
 void
-large_dalloc_maybe_junk(void *ptr, size_t usize) {
+large_dalloc_maybe_junk(void *ptr, size_t size) {
 	if (config_fill && have_dss && unlikely(opt_junk_free)) {
 		/*
 		 * Only bother junk filling if the extent isn't about to be
 		 * unmapped.
 		 */
 		if (!config_munmap || (have_dss && extent_in_dss(ptr))) {
-			large_dalloc_junk(ptr, usize);
+			large_dalloc_junk(ptr, size);
 		}
 	}
 }
@@ -115,7 +115,7 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize) {
 	if (diff != 0) {
 		extent_t *trail = extent_split_wrapper(tsdn, arena,
 		    &extent_hooks, extent, usize + large_pad, size2index(usize),
-		    diff, NSIZES);
+		    false, diff, NSIZES, false);
 		if (trail == NULL) {
 			return true;
 		}
@@ -182,7 +182,12 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 		extent_dalloc_wrapper(tsdn, arena, &extent_hooks, trail);
 		return true;
 	}
-	extent_szind_set(extent, size2index(usize));
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+	szind_t szind = size2index(usize);
+	extent_szind_set(extent, szind);
+	rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx,
+	    (uintptr_t)extent_addr_get(extent), szind, false);
 
 	if (config_stats && new_mapping) {
 		arena_stats_mapped_add(tsdn, &arena->stats, trailsize);
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index 24c7f526..589c652c 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -57,9 +57,16 @@ get_large_size(size_t ind) {
 /* Like ivsalloc(), but safe to call on discarded allocations. */
 static size_t
 vsalloc(tsdn_t *tsdn, const void *ptr) {
-	extent_t *extent;
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+	extent_t *extent;
+	szind_t szind;
+	if (rtree_extent_szind_read(tsdn, &extents_rtree, rtree_ctx,
+	    (uintptr_t)ptr, false, &extent, &szind)) {
+		return 0;
+	}
 
-	extent = extent_lookup(tsdn, ptr, false);
 	if (extent == NULL) {
 		return 0;
 	}
@@ -67,7 +74,11 @@ vsalloc(tsdn_t *tsdn, const void *ptr) {
 		return 0;
 	}
 
-	return isalloc(tsdn, extent, ptr);
+	if (szind == NSIZES) {
+		return 0;
+	}
+
+	return index2size(szind);
 }
 
 static unsigned
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 221f2f2c..b04e321b 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -70,8 +70,8 @@ TEST_BEGIN(test_rtree_read_empty) {
 	rtree_ctx_t rtree_ctx = RTREE_CTX_INITIALIZER;
 	test_rtree = &rtree;
 	assert_false(rtree_new(&rtree), "Unexpected rtree_new() failure");
-	assert_ptr_null(rtree_read(tsdn, &rtree, &rtree_ctx, PAGE, false),
-	    "rtree_read() should return NULL for empty tree");
+	assert_ptr_null(rtree_extent_read(tsdn, &rtree, &rtree_ctx, PAGE,
+	    false), "rtree_extent_read() should return NULL for empty tree");
 	rtree_delete(tsdn, &rtree);
 	test_rtree = NULL;
 }
@@ -99,6 +99,8 @@ thd_start(void *varg) {
 	sfmt = init_gen_rand(arg->seed);
 	extent = (extent_t *)malloc(sizeof(extent));
 	assert_ptr_not_null(extent, "Unexpected malloc() failure");
+	extent_init(extent, NULL, NULL, 0, false, NSIZES, 0,
+	    extent_state_active, false, false);
 	tsdn = tsdn_fetch();
 
 	for (i = 0; i < NITERS; i++) {
@@ -109,18 +111,24 @@ thd_start(void *varg) {
 			    &arg->rtree, &rtree_ctx, key, false, true);
 			assert_ptr_not_null(elm,
 			    "Unexpected rtree_leaf_elm_acquire() failure");
-			rtree_leaf_elm_write_acquired(tsdn, &arg->rtree, elm,
-			    extent);
+			rtree_leaf_elm_write(tsdn, &arg->rtree, elm, true,
+			    extent, NSIZES, false);
 			rtree_leaf_elm_release(tsdn, &arg->rtree, elm);
 
 			elm = rtree_leaf_elm_acquire(tsdn, &arg->rtree,
 			    &rtree_ctx, key, true, false);
 			assert_ptr_not_null(elm,
 			    "Unexpected rtree_leaf_elm_acquire() failure");
-			rtree_leaf_elm_read_acquired(tsdn, &arg->rtree, elm);
+			rtree_leaf_elm_extent_read(tsdn, &arg->rtree, elm, true,
+			    true);
+			rtree_leaf_elm_szind_read(tsdn, &arg->rtree, elm, true,
+			    true);
+			rtree_leaf_elm_slab_read(tsdn, &arg->rtree, elm, true,
+			    true);
 			rtree_leaf_elm_release(tsdn, &arg->rtree, elm);
 		} else {
-			rtree_read(tsdn, &arg->rtree, &rtree_ctx, key, false);
+			rtree_extent_read(tsdn, &arg->rtree, &rtree_ctx, key,
+			    false);
 		}
 	}
 
@@ -158,26 +166,33 @@ TEST_END
 
 TEST_BEGIN(test_rtree_extrema) {
 	extent_t extent_a, extent_b;
-	tsdn_t *tsdn;
+	extent_init(&extent_a, NULL, NULL, LARGE_MINCLASS, false,
+	    size2index(LARGE_MINCLASS), 0, extent_state_active, false, false);
+	extent_init(&extent_b, NULL, NULL, 0, false, NSIZES, 0,
+	    extent_state_active, false, false);
 
-	tsdn = tsdn_fetch();
+	tsdn_t *tsdn = tsdn_fetch();
 
 	rtree_t rtree;
 	rtree_ctx_t rtree_ctx = RTREE_CTX_INITIALIZER;
 	test_rtree = &rtree;
 	assert_false(rtree_new(&rtree), "Unexpected rtree_new() failure");
 
-	assert_false(rtree_write(tsdn, &rtree, &rtree_ctx, PAGE, &extent_a),
+	assert_false(rtree_write(tsdn, &rtree, &rtree_ctx, PAGE, &extent_a,
+	    extent_szind_get(&extent_a), extent_slab_get(&extent_a)),
 	    "Unexpected rtree_write() failure");
-	assert_ptr_eq(rtree_read(tsdn, &rtree, &rtree_ctx, PAGE, true),
+	rtree_szind_slab_update(tsdn, &rtree, &rtree_ctx, PAGE,
+	    extent_szind_get(&extent_a), extent_slab_get(&extent_a));
+	assert_ptr_eq(rtree_extent_read(tsdn, &rtree, &rtree_ctx, PAGE, true),
 	    &extent_a,
-	    "rtree_read() should return previously set value");
+	    "rtree_extent_read() should return previously set value");
 
 	assert_false(rtree_write(tsdn, &rtree, &rtree_ctx, ~((uintptr_t)0),
-	    &extent_b), "Unexpected rtree_write() failure");
-	assert_ptr_eq(rtree_read(tsdn, &rtree, &rtree_ctx, ~((uintptr_t)0),
-	    true), &extent_b,
-	    "rtree_read() should return previously set value");
+	    &extent_b, extent_szind_get_maybe_invalid(&extent_b),
+	    extent_slab_get(&extent_b)), "Unexpected rtree_write() failure");
+	assert_ptr_eq(rtree_extent_read(tsdn, &rtree, &rtree_ctx,
+	    ~((uintptr_t)0), true), &extent_b,
+	    "rtree_extent_read() should return previously set value");
 
 	rtree_delete(tsdn, &rtree);
 	test_rtree = NULL;
@@ -191,6 +206,9 @@ TEST_BEGIN(test_rtree_bits) {
 	    PAGE + (((uintptr_t)1) << LG_PAGE) - 1};
 
 	extent_t extent;
+	extent_init(&extent, NULL, NULL, 0, false, NSIZES, 0,
+	    extent_state_active, false, false);
+
 	rtree_t rtree;
 	rtree_ctx_t rtree_ctx = RTREE_CTX_INITIALIZER;
 
@@ -200,16 +218,17 @@ TEST_BEGIN(test_rtree_bits) {
 
 	for (unsigned i = 0; i < sizeof(keys)/sizeof(uintptr_t); i++) {
 		assert_false(rtree_write(tsdn, &rtree, &rtree_ctx, keys[i],
-		    &extent), "Unexpected rtree_write() failure");
+		    &extent, NSIZES, false),
+		    "Unexpected rtree_write() failure");
 		for (unsigned j = 0; j < sizeof(keys)/sizeof(uintptr_t); j++) {
-			assert_ptr_eq(rtree_read(tsdn, &rtree, &rtree_ctx,
-			    keys[j], true), &extent,
-			    "rtree_read() should return previously set "
-			    "value and ignore insignificant key bits; "
-			    "i=%u, j=%u, set key=%#"FMTxPTR", get "
+			assert_ptr_eq(rtree_extent_read(tsdn, &rtree,
+			    &rtree_ctx, keys[j], true),
+			    &extent, "rtree_extent_read() should return "
+			    "previously set value and ignore insignificant key "
+			    "bits; i=%u, j=%u, set key=%#"FMTxPTR", get "
 			    "key=%#"FMTxPTR, i, j, keys[i], keys[j]);
 		}
-		assert_ptr_null(rtree_read(tsdn, &rtree, &rtree_ctx,
+		assert_ptr_null(rtree_extent_read(tsdn, &rtree, &rtree_ctx,
 		    (((uintptr_t)2) << LG_PAGE), false),
 		    "Only leftmost rtree leaf should be set; i=%u", i);
 		rtree_clear(tsdn, &rtree, &rtree_ctx, keys[i]);
@@ -226,10 +245,13 @@ TEST_BEGIN(test_rtree_random) {
 	sfmt_t *sfmt = init_gen_rand(SEED);
 	tsdn_t *tsdn = tsdn_fetch();
 	uintptr_t keys[NSET];
-	extent_t extent;
 	rtree_t rtree;
 	rtree_ctx_t rtree_ctx = RTREE_CTX_INITIALIZER;
 
+	extent_t extent;
+	extent_init(&extent, NULL, NULL, 0, false, NSIZES, 0,
+	    extent_state_active, false, false);
+
 	test_rtree = &rtree;
 	assert_false(rtree_new(&rtree), "Unexpected rtree_new() failure");
 
@@ -239,26 +261,30 @@ TEST_BEGIN(test_rtree_random) {
 		    &rtree_ctx, keys[i], false, true);
 		assert_ptr_not_null(elm,
 		    "Unexpected rtree_leaf_elm_acquire() failure");
-		rtree_leaf_elm_write_acquired(tsdn, &rtree, elm, &extent);
+		rtree_leaf_elm_write(tsdn, &rtree, elm, true, &extent, NSIZES,
+		    false);
 		rtree_leaf_elm_release(tsdn, &rtree, elm);
-		assert_ptr_eq(rtree_read(tsdn, &rtree, &rtree_ctx, keys[i],
-		    true), &extent,
-		    "rtree_read() should return previously set value");
+		assert_ptr_eq(rtree_extent_read(tsdn, &rtree, &rtree_ctx,
+		    keys[i], true), &extent,
+		    "rtree_extent_read() should return previously set value");
 	}
 	for (unsigned i = 0; i < NSET; i++) {
-		assert_ptr_eq(rtree_read(tsdn, &rtree, &rtree_ctx, keys[i],
-		    true), &extent,
-		    "rtree_read() should return previously set value, i=%u", i);
+		assert_ptr_eq(rtree_extent_read(tsdn, &rtree, &rtree_ctx,
+		    keys[i], true), &extent,
+		    "rtree_extent_read() should return previously set value, "
+		    "i=%u", i);
 	}
 
 	for (unsigned i = 0; i < NSET; i++) {
 		rtree_clear(tsdn, &rtree, &rtree_ctx, keys[i]);
-		assert_ptr_null(rtree_read(tsdn, &rtree, &rtree_ctx, keys[i],
-		    true), "rtree_read() should return previously set value");
+		assert_ptr_null(rtree_extent_read(tsdn, &rtree, &rtree_ctx,
+		    keys[i], true),
+		   "rtree_extent_read() should return previously set value");
 	}
 	for (unsigned i = 0; i < NSET; i++) {
-		assert_ptr_null(rtree_read(tsdn, &rtree, &rtree_ctx, keys[i],
-		    true), "rtree_read() should return previously set value");
+		assert_ptr_null(rtree_extent_read(tsdn, &rtree, &rtree_ctx,
+		    keys[i], true),
+		    "rtree_extent_read() should return previously set value");
 	}
 
 	rtree_delete(tsdn, &rtree);

From ce41ab0c57d5f0c9310200d0fecea99ef334a834 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 22 Mar 2017 16:38:03 -0700
Subject: [PATCH 0725/2608] Embed root node into rtree_t.

This avoids one atomic operation per tree access.
---
 include/jemalloc/internal/rtree_externs.h |   2 +-
 include/jemalloc/internal/rtree_structs.h |  12 ++-
 src/extent.c                              |   2 +-
 src/rtree.c                               |  91 ++++-------------
 test/unit/rtree.c                         | 119 ++++++++++------------
 5 files changed, 86 insertions(+), 140 deletions(-)

diff --git a/include/jemalloc/internal/rtree_externs.h b/include/jemalloc/internal/rtree_externs.h
index 842eb0b5..5145c12c 100644
--- a/include/jemalloc/internal/rtree_externs.h
+++ b/include/jemalloc/internal/rtree_externs.h
@@ -23,7 +23,7 @@ static const rtree_level_t rtree_levels[] = {
 #endif
 };
 
-bool rtree_new(rtree_t *rtree);
+bool rtree_new(rtree_t *rtree, bool zeroed);
 #ifdef JEMALLOC_JET
 typedef rtree_node_elm_t *(rtree_node_alloc_t)(tsdn_t *, rtree_t *, size_t);
 extern rtree_node_alloc_t *rtree_node_alloc;
diff --git a/include/jemalloc/internal/rtree_structs.h b/include/jemalloc/internal/rtree_structs.h
index e9a507ab..3ecdf810 100644
--- a/include/jemalloc/internal/rtree_structs.h
+++ b/include/jemalloc/internal/rtree_structs.h
@@ -39,13 +39,17 @@ struct rtree_ctx_s {
 #ifndef _MSC_VER
 	JEMALLOC_ALIGNED(CACHELINE)
 #endif
-	rtree_ctx_cache_elm_t cache[RTREE_CTX_NCACHE];
+	rtree_ctx_cache_elm_t	cache[RTREE_CTX_NCACHE];
 };
 
 struct rtree_s {
-	/* An rtree_{internal,leaf}_elm_t *. */
-	atomic_p_t	root;
-	malloc_mutex_t	init_lock;
+	malloc_mutex_t		init_lock;
+	/* Number of elements based on rtree_levels[0].bits. */
+#if RTREE_HEIGHT > 1
+	rtree_node_elm_t	root[1U << (RTREE_NSB/RTREE_HEIGHT)];
+#else
+	rtree_leaf_elm_t	root[1U << (RTREE_NSB/RTREE_HEIGHT)];
+#endif
 };
 
 #endif /* JEMALLOC_INTERNAL_RTREE_STRUCTS_H */
diff --git a/src/extent.c b/src/extent.c
index a95e2748..f1b513e4 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1658,7 +1658,7 @@ extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
 
 bool
 extent_boot(void) {
-	if (rtree_new(&extents_rtree)) {
+	if (rtree_new(&extents_rtree, true)) {
 		return true;
 	}
 
diff --git a/src/rtree.c b/src/rtree.c
index 18197390..a07380f3 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -6,8 +6,15 @@
  * used.
  */
 bool
-rtree_new(rtree_t *rtree) {
-	atomic_store_p(&rtree->root, NULL, ATOMIC_RELAXED);
+rtree_new(rtree_t *rtree, bool zeroed) {
+#ifdef JEMALLOC_JET
+	if (!zeroed) {
+		memset(rtree, 0, sizeof(rtree_t)); /* Clear root. */
+	}
+#else
+	assert(zeroed);
+#endif
+
 	if (malloc_mutex_init(&rtree->init_lock, "rtree", WITNESS_RANK_RTREE)) {
 		return true;
 	}
@@ -76,6 +83,7 @@ rtree_leaf_dalloc_t *rtree_leaf_dalloc = JEMALLOC_N(rtree_leaf_dalloc_impl);
 #endif
 
 #ifdef JEMALLOC_JET
+#  if RTREE_HEIGHT > 1
 static void
 rtree_delete_subtree(tsdn_t *tsdn, rtree_t *rtree, rtree_node_elm_t *subtree,
     unsigned level) {
@@ -101,25 +109,17 @@ rtree_delete_subtree(tsdn_t *tsdn, rtree_t *rtree, rtree_node_elm_t *subtree,
 		}
 	}
 
-	rtree_node_dalloc(tsdn, rtree, subtree);
+	if (subtree != rtree->root) {
+		rtree_node_dalloc(tsdn, rtree, subtree);
+	}
 }
+#  endif
 
 void
 rtree_delete(tsdn_t *tsdn, rtree_t *rtree) {
-	if (RTREE_HEIGHT > 1) {
-		rtree_node_elm_t *node = (rtree_node_elm_t *)atomic_load_p(
-		    &rtree->root, ATOMIC_RELAXED);
-		if (node != NULL) {
-			rtree_delete_subtree(tsdn, rtree, node, 0);
-		}
-	} else {
-		rtree_leaf_elm_t *leaf =
-		    (rtree_leaf_elm_t *)atomic_load_p(&rtree->root,
-		    ATOMIC_RELAXED);
-		if (leaf != NULL) {
-			rtree_leaf_dalloc(tsdn, rtree, leaf);
-		}
-	}
+#  if RTREE_HEIGHT > 1
+	rtree_delete_subtree(tsdn, rtree, rtree->root, 0);
+#  endif
 }
 #endif
 
@@ -244,70 +244,21 @@ rtree_child_leaf_read(tsdn_t *tsdn, rtree_t *rtree, rtree_node_elm_t *elm,
 	return leaf;
 }
 
-UNUSED static rtree_node_elm_t *
-rtree_root_node_tryread(rtree_t *rtree, bool dependent) {
-	rtree_node_elm_t *node;
-	if (dependent) {
-		node = (rtree_node_elm_t *)atomic_load_p(&rtree->root,
-		    ATOMIC_RELAXED);
-	} else {
-		node = (rtree_node_elm_t *)atomic_load_p(&rtree->root,
-		    ATOMIC_ACQUIRE);
-	}
-	assert(!dependent || node != NULL);
-	return node;
-}
-
-UNUSED static rtree_node_elm_t *
-rtree_root_node_read(tsdn_t *tsdn, rtree_t *rtree, bool dependent) {
-	rtree_node_elm_t *node = rtree_root_node_tryread(rtree, dependent);
-	if (!dependent && unlikely(!rtree_node_valid(node))) {
-		node = rtree_node_init(tsdn, rtree, 0, &rtree->root);
-	}
-	assert(!dependent || node != NULL);
-	return node;
-}
-
-UNUSED static rtree_leaf_elm_t *
-rtree_root_leaf_tryread(rtree_t *rtree, bool dependent) {
-	rtree_leaf_elm_t *leaf;
-	if (dependent) {
-		leaf = (rtree_leaf_elm_t *)atomic_load_p(&rtree->root,
-		    ATOMIC_RELAXED);
-	} else {
-		leaf = (rtree_leaf_elm_t *)atomic_load_p(&rtree->root,
-		    ATOMIC_ACQUIRE);
-	}
-	assert(!dependent || leaf != NULL);
-	return leaf;
-}
-
-UNUSED static rtree_leaf_elm_t *
-rtree_root_leaf_read(tsdn_t *tsdn, rtree_t *rtree, bool dependent) {
-	rtree_leaf_elm_t *leaf = rtree_root_leaf_tryread(rtree, dependent);
-	if (!dependent && unlikely(!rtree_leaf_valid(leaf))) {
-		leaf = rtree_leaf_init(tsdn, rtree, &rtree->root);
-	}
-	assert(!dependent || leaf != NULL);
-	return leaf;
-}
-
 rtree_leaf_elm_t *
 rtree_leaf_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key, bool dependent, bool init_missing) {
 	rtree_node_elm_t *node;
 	rtree_leaf_elm_t *leaf;
 #if RTREE_HEIGHT > 1
-	node = init_missing ? rtree_root_node_read(tsdn, rtree, dependent) :
-	    rtree_root_node_tryread(rtree, dependent);
+	node = rtree->root;
 #else
-	leaf = init_missing ? rtree_root_leaf_read(tsdn, rtree, dependent) :
-	    rtree_root_leaf_tryread(rtree, dependent);
+	leaf = rtree->root;
 #endif
 
 #define RTREE_GET_CHILD(level) {					\
 		assert(level < RTREE_HEIGHT-1);				\
-		if (!dependent && unlikely(!rtree_node_valid(node))) {	\
+		if (level != 0 && !dependent &&				\
+		    unlikely(!rtree_node_valid(node))) {		\
 			return NULL;					\
 		}							\
 		uintptr_t subkey = rtree_subkey(key, level);		\
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index b04e321b..7a25c47d 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -5,13 +5,14 @@ rtree_node_dalloc_t *rtree_node_dalloc_orig;
 rtree_leaf_alloc_t *rtree_leaf_alloc_orig;
 rtree_leaf_dalloc_t *rtree_leaf_dalloc_orig;
 
-rtree_t *test_rtree;
+/* Potentially too large to safely place on the stack. */
+rtree_t test_rtree;
 
 static rtree_node_elm_t *
 rtree_node_alloc_intercept(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
 	rtree_node_elm_t *node;
 
-	if (rtree != test_rtree) {
+	if (rtree != &test_rtree) {
 		return rtree_node_alloc_orig(tsdn, rtree, nelms);
 	}
 
@@ -26,7 +27,7 @@ rtree_node_alloc_intercept(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
 static void
 rtree_node_dalloc_intercept(tsdn_t *tsdn, rtree_t *rtree,
     rtree_node_elm_t *node) {
-	if (rtree != test_rtree) {
+	if (rtree != &test_rtree) {
 		rtree_node_dalloc_orig(tsdn, rtree, node);
 		return;
 	}
@@ -38,7 +39,7 @@ static rtree_leaf_elm_t *
 rtree_leaf_alloc_intercept(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
 	rtree_leaf_elm_t *leaf;
 
-	if (rtree != test_rtree) {
+	if (rtree != &test_rtree) {
 		return rtree_leaf_alloc_orig(tsdn, rtree, nelms);
 	}
 
@@ -53,7 +54,7 @@ rtree_leaf_alloc_intercept(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
 static void
 rtree_leaf_dalloc_intercept(tsdn_t *tsdn, rtree_t *rtree,
     rtree_leaf_elm_t *leaf) {
-	if (rtree != test_rtree) {
+	if (rtree != &test_rtree) {
 		rtree_leaf_dalloc_orig(tsdn, rtree, leaf);
 		return;
 	}
@@ -66,14 +67,12 @@ TEST_BEGIN(test_rtree_read_empty) {
 
 	tsdn = tsdn_fetch();
 
-	rtree_t rtree;
+	rtree_t *rtree = &test_rtree;
 	rtree_ctx_t rtree_ctx = RTREE_CTX_INITIALIZER;
-	test_rtree = &rtree;
-	assert_false(rtree_new(&rtree), "Unexpected rtree_new() failure");
-	assert_ptr_null(rtree_extent_read(tsdn, &rtree, &rtree_ctx, PAGE,
+	assert_false(rtree_new(rtree, false), "Unexpected rtree_new() failure");
+	assert_ptr_null(rtree_extent_read(tsdn, rtree, &rtree_ctx, PAGE,
 	    false), "rtree_extent_read() should return NULL for empty tree");
-	rtree_delete(tsdn, &rtree);
-	test_rtree = NULL;
+	rtree_delete(tsdn, rtree);
 }
 TEST_END
 
@@ -83,7 +82,7 @@ TEST_END
 #define SEED		42
 
 typedef struct {
-	rtree_t		rtree;
+	rtree_t		*rtree;
 	uint32_t	seed;
 } thd_start_arg_t;
 
@@ -108,26 +107,26 @@ thd_start(void *varg) {
 		    MAX_NBITS) - ZU(1)));
 		if (i % 2 == 0) {
 			rtree_leaf_elm_t *elm = rtree_leaf_elm_acquire(tsdn,
-			    &arg->rtree, &rtree_ctx, key, false, true);
+			    arg->rtree, &rtree_ctx, key, false, true);
 			assert_ptr_not_null(elm,
 			    "Unexpected rtree_leaf_elm_acquire() failure");
-			rtree_leaf_elm_write(tsdn, &arg->rtree, elm, true,
+			rtree_leaf_elm_write(tsdn, arg->rtree, elm, true,
 			    extent, NSIZES, false);
-			rtree_leaf_elm_release(tsdn, &arg->rtree, elm);
+			rtree_leaf_elm_release(tsdn, arg->rtree, elm);
 
-			elm = rtree_leaf_elm_acquire(tsdn, &arg->rtree,
+			elm = rtree_leaf_elm_acquire(tsdn, arg->rtree,
 			    &rtree_ctx, key, true, false);
 			assert_ptr_not_null(elm,
 			    "Unexpected rtree_leaf_elm_acquire() failure");
-			rtree_leaf_elm_extent_read(tsdn, &arg->rtree, elm, true,
+			rtree_leaf_elm_extent_read(tsdn, arg->rtree, elm, true,
 			    true);
-			rtree_leaf_elm_szind_read(tsdn, &arg->rtree, elm, true,
+			rtree_leaf_elm_szind_read(tsdn, arg->rtree, elm, true,
 			    true);
-			rtree_leaf_elm_slab_read(tsdn, &arg->rtree, elm, true,
+			rtree_leaf_elm_slab_read(tsdn, arg->rtree, elm, true,
 			    true);
-			rtree_leaf_elm_release(tsdn, &arg->rtree, elm);
+			rtree_leaf_elm_release(tsdn, arg->rtree, elm);
 		} else {
-			rtree_extent_read(tsdn, &arg->rtree, &rtree_ctx, key,
+			rtree_extent_read(tsdn, arg->rtree, &rtree_ctx, key,
 			    false);
 		}
 	}
@@ -145,8 +144,9 @@ TEST_BEGIN(test_rtree_concurrent) {
 
 	sfmt = init_gen_rand(SEED);
 	tsdn = tsdn_fetch();
-	test_rtree = &arg.rtree;
-	assert_false(rtree_new(&arg.rtree), "Unexpected rtree_new() failure");
+	arg.rtree = &test_rtree;
+	assert_false(rtree_new(arg.rtree, false),
+	    "Unexpected rtree_new() failure");
 	arg.seed = gen_rand32(sfmt);
 	for (unsigned i = 0; i < NTHREADS; i++) {
 		thd_create(&thds[i], thd_start, (void *)&arg);
@@ -154,8 +154,7 @@ TEST_BEGIN(test_rtree_concurrent) {
 	for (unsigned i = 0; i < NTHREADS; i++) {
 		thd_join(thds[i], NULL);
 	}
-	rtree_delete(tsdn, &arg.rtree);
-	test_rtree = NULL;
+	rtree_delete(tsdn, arg.rtree);
 	fini_gen_rand(sfmt);
 }
 TEST_END
@@ -173,29 +172,27 @@ TEST_BEGIN(test_rtree_extrema) {
 
 	tsdn_t *tsdn = tsdn_fetch();
 
-	rtree_t rtree;
+	rtree_t *rtree = &test_rtree;
 	rtree_ctx_t rtree_ctx = RTREE_CTX_INITIALIZER;
-	test_rtree = &rtree;
-	assert_false(rtree_new(&rtree), "Unexpected rtree_new() failure");
+	assert_false(rtree_new(rtree, false), "Unexpected rtree_new() failure");
 
-	assert_false(rtree_write(tsdn, &rtree, &rtree_ctx, PAGE, &extent_a,
+	assert_false(rtree_write(tsdn, rtree, &rtree_ctx, PAGE, &extent_a,
 	    extent_szind_get(&extent_a), extent_slab_get(&extent_a)),
 	    "Unexpected rtree_write() failure");
-	rtree_szind_slab_update(tsdn, &rtree, &rtree_ctx, PAGE,
+	rtree_szind_slab_update(tsdn, rtree, &rtree_ctx, PAGE,
 	    extent_szind_get(&extent_a), extent_slab_get(&extent_a));
-	assert_ptr_eq(rtree_extent_read(tsdn, &rtree, &rtree_ctx, PAGE, true),
+	assert_ptr_eq(rtree_extent_read(tsdn, rtree, &rtree_ctx, PAGE, true),
 	    &extent_a,
 	    "rtree_extent_read() should return previously set value");
 
-	assert_false(rtree_write(tsdn, &rtree, &rtree_ctx, ~((uintptr_t)0),
+	assert_false(rtree_write(tsdn, rtree, &rtree_ctx, ~((uintptr_t)0),
 	    &extent_b, extent_szind_get_maybe_invalid(&extent_b),
 	    extent_slab_get(&extent_b)), "Unexpected rtree_write() failure");
-	assert_ptr_eq(rtree_extent_read(tsdn, &rtree, &rtree_ctx,
+	assert_ptr_eq(rtree_extent_read(tsdn, rtree, &rtree_ctx,
 	    ~((uintptr_t)0), true), &extent_b,
 	    "rtree_extent_read() should return previously set value");
 
-	rtree_delete(tsdn, &rtree);
-	test_rtree = NULL;
+	rtree_delete(tsdn, rtree);
 }
 TEST_END
 
@@ -209,33 +206,30 @@ TEST_BEGIN(test_rtree_bits) {
 	extent_init(&extent, NULL, NULL, 0, false, NSIZES, 0,
 	    extent_state_active, false, false);
 
-	rtree_t rtree;
+	rtree_t *rtree = &test_rtree;
 	rtree_ctx_t rtree_ctx = RTREE_CTX_INITIALIZER;
 
-	test_rtree = &rtree;
-	assert_false(rtree_new(&rtree),
-	    "Unexpected rtree_new() failure");
+	assert_false(rtree_new(rtree, false), "Unexpected rtree_new() failure");
 
 	for (unsigned i = 0; i < sizeof(keys)/sizeof(uintptr_t); i++) {
-		assert_false(rtree_write(tsdn, &rtree, &rtree_ctx, keys[i],
+		assert_false(rtree_write(tsdn, rtree, &rtree_ctx, keys[i],
 		    &extent, NSIZES, false),
 		    "Unexpected rtree_write() failure");
 		for (unsigned j = 0; j < sizeof(keys)/sizeof(uintptr_t); j++) {
-			assert_ptr_eq(rtree_extent_read(tsdn, &rtree,
-			    &rtree_ctx, keys[j], true),
-			    &extent, "rtree_extent_read() should return "
-			    "previously set value and ignore insignificant key "
-			    "bits; i=%u, j=%u, set key=%#"FMTxPTR", get "
-			    "key=%#"FMTxPTR, i, j, keys[i], keys[j]);
+			assert_ptr_eq(rtree_extent_read(tsdn, rtree, &rtree_ctx,
+			    keys[j], true), &extent,
+			    "rtree_extent_read() should return previously set "
+			    "value and ignore insignificant key bits; i=%u, "
+			    "j=%u, set key=%#"FMTxPTR", get key=%#"FMTxPTR, i,
+			    j, keys[i], keys[j]);
 		}
-		assert_ptr_null(rtree_extent_read(tsdn, &rtree, &rtree_ctx,
+		assert_ptr_null(rtree_extent_read(tsdn, rtree, &rtree_ctx,
 		    (((uintptr_t)2) << LG_PAGE), false),
 		    "Only leftmost rtree leaf should be set; i=%u", i);
-		rtree_clear(tsdn, &rtree, &rtree_ctx, keys[i]);
+		rtree_clear(tsdn, rtree, &rtree_ctx, keys[i]);
 	}
 
-	rtree_delete(tsdn, &rtree);
-	test_rtree = NULL;
+	rtree_delete(tsdn, rtree);
 }
 TEST_END
 
@@ -245,50 +239,48 @@ TEST_BEGIN(test_rtree_random) {
 	sfmt_t *sfmt = init_gen_rand(SEED);
 	tsdn_t *tsdn = tsdn_fetch();
 	uintptr_t keys[NSET];
-	rtree_t rtree;
+	rtree_t *rtree = &test_rtree;
 	rtree_ctx_t rtree_ctx = RTREE_CTX_INITIALIZER;
 
 	extent_t extent;
 	extent_init(&extent, NULL, NULL, 0, false, NSIZES, 0,
 	    extent_state_active, false, false);
 
-	test_rtree = &rtree;
-	assert_false(rtree_new(&rtree), "Unexpected rtree_new() failure");
+	assert_false(rtree_new(rtree, false), "Unexpected rtree_new() failure");
 
 	for (unsigned i = 0; i < NSET; i++) {
 		keys[i] = (uintptr_t)gen_rand64(sfmt);
-		rtree_leaf_elm_t *elm = rtree_leaf_elm_acquire(tsdn, &rtree,
+		rtree_leaf_elm_t *elm = rtree_leaf_elm_acquire(tsdn, rtree,
 		    &rtree_ctx, keys[i], false, true);
 		assert_ptr_not_null(elm,
 		    "Unexpected rtree_leaf_elm_acquire() failure");
-		rtree_leaf_elm_write(tsdn, &rtree, elm, true, &extent, NSIZES,
+		rtree_leaf_elm_write(tsdn, rtree, elm, true, &extent, NSIZES,
 		    false);
-		rtree_leaf_elm_release(tsdn, &rtree, elm);
-		assert_ptr_eq(rtree_extent_read(tsdn, &rtree, &rtree_ctx,
+		rtree_leaf_elm_release(tsdn, rtree, elm);
+		assert_ptr_eq(rtree_extent_read(tsdn, rtree, &rtree_ctx,
 		    keys[i], true), &extent,
 		    "rtree_extent_read() should return previously set value");
 	}
 	for (unsigned i = 0; i < NSET; i++) {
-		assert_ptr_eq(rtree_extent_read(tsdn, &rtree, &rtree_ctx,
+		assert_ptr_eq(rtree_extent_read(tsdn, rtree, &rtree_ctx,
 		    keys[i], true), &extent,
 		    "rtree_extent_read() should return previously set value, "
 		    "i=%u", i);
 	}
 
 	for (unsigned i = 0; i < NSET; i++) {
-		rtree_clear(tsdn, &rtree, &rtree_ctx, keys[i]);
-		assert_ptr_null(rtree_extent_read(tsdn, &rtree, &rtree_ctx,
+		rtree_clear(tsdn, rtree, &rtree_ctx, keys[i]);
+		assert_ptr_null(rtree_extent_read(tsdn, rtree, &rtree_ctx,
 		    keys[i], true),
 		   "rtree_extent_read() should return previously set value");
 	}
 	for (unsigned i = 0; i < NSET; i++) {
-		assert_ptr_null(rtree_extent_read(tsdn, &rtree, &rtree_ctx,
+		assert_ptr_null(rtree_extent_read(tsdn, rtree, &rtree_ctx,
 		    keys[i], true),
 		    "rtree_extent_read() should return previously set value");
 	}
 
-	rtree_delete(tsdn, &rtree);
-	test_rtree = NULL;
+	rtree_delete(tsdn, rtree);
 	fini_gen_rand(sfmt);
 #undef NSET
 #undef SEED
@@ -305,7 +297,6 @@ main(void) {
 	rtree_leaf_alloc = rtree_leaf_alloc_intercept;
 	rtree_leaf_dalloc_orig = rtree_leaf_dalloc;
 	rtree_leaf_dalloc = rtree_leaf_dalloc_intercept;
-	test_rtree = NULL;
 
 	return test(
 	    test_rtree_read_empty,

From 0ee0e0c155a05d0d028a9972ad86b9eaac4ccabd Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 20 Mar 2017 16:38:21 -0700
Subject: [PATCH 0726/2608] Implement compact rtree leaf element
 representation.

If a single virtual adddress pointer has enough unused bits to pack
{szind_t, extent_t *, bool, bool}, use a single pointer-sized field in
each rtree leaf element, rather than using three separate fields.  This
has little impact on access speed (fewer loads/stores, but more bit
twiddling), except that denser representation increases TLB
effectiveness.
---
 include/jemalloc/internal/private_symbols.txt |   5 +
 include/jemalloc/internal/rtree_inlines.h     | 127 +++++++++++++++++-
 include/jemalloc/internal/rtree_structs.h     |  19 ++-
 include/jemalloc/internal/rtree_types.h       |   4 +
 include/jemalloc/internal/size_classes.sh     |  15 +++
 5 files changed, 163 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 169e7d11..35c7028b 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -419,6 +419,11 @@ rtree_extent_szind_read
 rtree_leaf_alloc
 rtree_leaf_dalloc
 rtree_leaf_elm_acquire
+rtree_leaf_elm_bits_extent_get
+rtree_leaf_elm_bits_locked_get
+rtree_leaf_elm_bits_read
+rtree_leaf_elm_bits_slab_get
+rtree_leaf_elm_bits_szind_get
 rtree_leaf_elm_extent_read
 rtree_leaf_elm_extent_write
 rtree_leaf_elm_lookup
diff --git a/include/jemalloc/internal/rtree_inlines.h b/include/jemalloc/internal/rtree_inlines.h
index 9c337b85..6f92df94 100644
--- a/include/jemalloc/internal/rtree_inlines.h
+++ b/include/jemalloc/internal/rtree_inlines.h
@@ -4,6 +4,14 @@
 #ifndef JEMALLOC_ENABLE_INLINE
 uintptr_t rtree_leafkey(uintptr_t key);
 uintptr_t rtree_subkey(uintptr_t key, unsigned level);
+#  ifdef RTREE_LEAF_COMPACT
+uintptr_t rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_leaf_elm_t *elm, bool acquired, bool dependent);
+extent_t *rtree_leaf_elm_bits_extent_get(uintptr_t bits);
+szind_t rtree_leaf_elm_bits_szind_get(uintptr_t bits);
+bool rtree_leaf_elm_bits_slab_get(uintptr_t bits);
+bool rtree_leaf_elm_bits_locked_get(uintptr_t bits);
+#  endif
 extent_t *rtree_leaf_elm_extent_read(tsdn_t *tsdn, rtree_t *rtree,
     rtree_leaf_elm_t *elm, bool acquired, bool dependent);
 szind_t rtree_leaf_elm_szind_read(tsdn_t *tsdn, rtree_t *rtree,
@@ -75,6 +83,42 @@ rtree_subkey(uintptr_t key, unsigned level) {
  *             dependent on a previous rtree write, which means a stale read
  *             could result if synchronization were omitted here.
  */
+#  ifdef RTREE_LEAF_COMPACT
+JEMALLOC_ALWAYS_INLINE uintptr_t
+rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
+    bool acquired, bool dependent) {
+	if (config_debug && acquired) {
+		assert(dependent);
+		rtree_leaf_elm_witness_access(tsdn, rtree, elm);
+	}
+
+	return (uintptr_t)atomic_load_p(&elm->le_bits, dependent
+	    ?  ATOMIC_RELAXED : ATOMIC_ACQUIRE);
+}
+
+JEMALLOC_ALWAYS_INLINE extent_t *
+rtree_leaf_elm_bits_extent_get(uintptr_t bits) {
+	/* Restore sign-extended high bits, mask slab and lock bits. */
+	return (extent_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB) >>
+	    RTREE_NHIB) & ~((uintptr_t)0x3));
+}
+
+JEMALLOC_ALWAYS_INLINE szind_t
+rtree_leaf_elm_bits_szind_get(uintptr_t bits) {
+	return (szind_t)(bits >> LG_VADDR);
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+rtree_leaf_elm_bits_slab_get(uintptr_t bits) {
+	return (bool)((bits >> 1) & (uintptr_t)0x1);
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+rtree_leaf_elm_bits_locked_get(uintptr_t bits) {
+	return (bool)(bits & (uintptr_t)0x1);
+}
+#  endif
+
 JEMALLOC_ALWAYS_INLINE extent_t *
 rtree_leaf_elm_extent_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
     bool acquired, bool dependent) {
@@ -83,6 +127,12 @@ rtree_leaf_elm_extent_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 		rtree_leaf_elm_witness_access(tsdn, rtree, elm);
 	}
 
+#ifdef RTREE_LEAF_COMPACT
+	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, acquired,
+	    dependent);
+	assert(!acquired || rtree_leaf_elm_bits_locked_get(bits));
+	return rtree_leaf_elm_bits_extent_get(bits);
+#else
 	extent_t *extent = (extent_t *)atomic_load_p(&elm->le_extent, dependent
 	    ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
 	assert(!acquired || ((uintptr_t)extent & (uintptr_t)0x1) ==
@@ -90,6 +140,7 @@ rtree_leaf_elm_extent_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 	/* Mask lock bit. */
 	extent = (extent_t *)((uintptr_t)extent & ~((uintptr_t)0x1));
 	return extent;
+#endif
 }
 
 JEMALLOC_ALWAYS_INLINE szind_t
@@ -100,8 +151,15 @@ rtree_leaf_elm_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 		rtree_leaf_elm_witness_access(tsdn, rtree, elm);
 	}
 
+#ifdef RTREE_LEAF_COMPACT
+	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, acquired,
+	    dependent);
+	assert(!acquired || rtree_leaf_elm_bits_locked_get(bits));
+	return rtree_leaf_elm_bits_szind_get(bits);
+#else
 	return (szind_t)atomic_load_u(&elm->le_szind, dependent ? ATOMIC_RELAXED
 	    : ATOMIC_ACQUIRE);
+#endif
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -112,8 +170,15 @@ rtree_leaf_elm_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 		rtree_leaf_elm_witness_access(tsdn, rtree, elm);
 	}
 
+#ifdef RTREE_LEAF_COMPACT
+	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, acquired,
+	    dependent);
+	assert(!acquired || rtree_leaf_elm_bits_locked_get(bits));
+	return rtree_leaf_elm_bits_slab_get(bits);
+#else
 	return atomic_load_b(&elm->le_slab, dependent ? ATOMIC_RELAXED :
 	    ATOMIC_ACQUIRE);
+#endif
 }
 
 JEMALLOC_INLINE void
@@ -124,11 +189,21 @@ rtree_leaf_elm_extent_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 	}
 	assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0);
 
+#ifdef RTREE_LEAF_COMPACT
+	uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm,
+	    acquired, acquired);
+	uintptr_t bits = ((uintptr_t)rtree_leaf_elm_bits_szind_get(old_bits) <<
+	    LG_VADDR) | ((uintptr_t)extent & (((uintptr_t)0x1 << LG_VADDR) - 1))
+	    | ((uintptr_t)rtree_leaf_elm_bits_slab_get(old_bits) << 1) |
+	    (uintptr_t)acquired;
+	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
+#else
 	if (acquired) {
 		/* Overlay lock bit. */
 		extent = (extent_t *)((uintptr_t)extent | (uintptr_t)0x1);
 	}
 	atomic_store_p(&elm->le_extent, extent, ATOMIC_RELEASE);
+#endif
 }
 
 JEMALLOC_INLINE void
@@ -139,7 +214,18 @@ rtree_leaf_elm_szind_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 	}
 	assert(szind <= NSIZES);
 
+#ifdef RTREE_LEAF_COMPACT
+	uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm,
+	    acquired, acquired);
+	uintptr_t bits = ((uintptr_t)szind << LG_VADDR) |
+	    ((uintptr_t)rtree_leaf_elm_bits_extent_get(old_bits) &
+	    (((uintptr_t)0x1 << LG_VADDR) - 1)) |
+	    ((uintptr_t)rtree_leaf_elm_bits_slab_get(old_bits) << 1) |
+	    (uintptr_t)acquired;
+	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
+#else
 	atomic_store_u(&elm->le_szind, szind, ATOMIC_RELEASE);
+#endif
 }
 
 JEMALLOC_INLINE void
@@ -149,12 +235,35 @@ rtree_leaf_elm_slab_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 		rtree_leaf_elm_witness_access(tsdn, rtree, elm);
 	}
 
+#ifdef RTREE_LEAF_COMPACT
+	uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm,
+	    acquired, acquired);
+	uintptr_t bits = ((uintptr_t)rtree_leaf_elm_bits_szind_get(old_bits) <<
+	    LG_VADDR) | ((uintptr_t)rtree_leaf_elm_bits_extent_get(old_bits) &
+	    (((uintptr_t)0x1 << LG_VADDR) - 1)) | ((uintptr_t)slab << 1) |
+	    (uintptr_t)acquired;
+	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
+#else
 	atomic_store_b(&elm->le_slab, slab, ATOMIC_RELEASE);
+#endif
 }
 
 JEMALLOC_INLINE void
 rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
     bool acquired, extent_t *extent, szind_t szind, bool slab) {
+#ifdef RTREE_LEAF_COMPACT
+	if (config_debug && acquired) {
+		rtree_leaf_elm_witness_access(tsdn, rtree, elm);
+	}
+	assert(!slab || szind < NBINS);
+
+	uintptr_t bits = ((uintptr_t)szind << LG_VADDR) |
+	    ((uintptr_t)extent & (((uintptr_t)0x1 << LG_VADDR) - 1)) |
+	    ((uintptr_t)slab << 1) |
+	    (uintptr_t)acquired;
+
+	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
+#else
 	rtree_leaf_elm_slab_write(tsdn, rtree, elm, acquired, slab);
 	rtree_leaf_elm_szind_write(tsdn, rtree, elm, acquired, szind);
 	/*
@@ -162,6 +271,7 @@ rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 	 * as soon as the extent field is non-NULL.
 	 */
 	rtree_leaf_elm_extent_write(tsdn, rtree, elm, acquired, extent);
+#endif
 }
 
 JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_t *
@@ -317,19 +427,24 @@ rtree_leaf_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	spin_t spinner = SPIN_INITIALIZER;
 	while (true) {
 		/* The least significant bit serves as a lock. */
-		void *extent_and_lock = atomic_load_p(&elm->le_extent,
+#ifdef RTREE_LEAF_COMPACT
+#  define RTREE_FIELD_WITH_LOCK le_bits
+#else
+#  define RTREE_FIELD_WITH_LOCK le_extent
+#endif
+		void *bits = atomic_load_p(&elm->RTREE_FIELD_WITH_LOCK,
 		    ATOMIC_RELAXED);
-		if (likely(((uintptr_t)extent_and_lock & (uintptr_t)0x1) == 0))
-		{
-			void *locked = (void *)((uintptr_t)extent_and_lock
-			    | (uintptr_t)0x1);
+		if (likely(((uintptr_t)bits & (uintptr_t)0x1) == 0)) {
+			void *locked = (void *)((uintptr_t)bits |
+			    (uintptr_t)0x1);
 			if (likely(atomic_compare_exchange_strong_p(
-			    &elm->le_extent, &extent_and_lock, locked,
+			    &elm->RTREE_FIELD_WITH_LOCK, &bits, locked,
 			    ATOMIC_ACQUIRE, ATOMIC_RELAXED))) {
 				break;
 			}
 		}
 		spin_adaptive(&spinner);
+#undef RTREE_FIELD_WITH_LOCK
 	}
 
 	if (config_debug) {
diff --git a/include/jemalloc/internal/rtree_structs.h b/include/jemalloc/internal/rtree_structs.h
index 3ecdf810..8dd9cdaa 100644
--- a/include/jemalloc/internal/rtree_structs.h
+++ b/include/jemalloc/internal/rtree_structs.h
@@ -6,9 +6,26 @@ struct rtree_node_elm_s {
 };
 
 struct rtree_leaf_elm_s {
-	atomic_p_t	le_extent; /* (extent_t *) */
+#ifdef RTREE_LEAF_COMPACT
+	/*
+	 * Single pointer-width field containing all three leaf element fields.
+	 * For example, on a 64-bit x64 system with 48 significant virtual
+	 * memory address bits, the index, extent, and slab fields are packed as
+	 * such:
+	 *
+	 * x: index
+	 * e: extent
+	 * b: slab
+	 * k: lock
+	 *
+	 *   00000000 xxxxxxxx eeeeeeee [...] eeeeeeee eeee00bk
+	 */
+	atomic_p_t	le_bits;
+#else
+	atomic_p_t	le_extent; /* (extent_t *), lock in low bit */
 	atomic_u_t	le_szind; /* (szind_t) */
 	atomic_b_t	le_slab; /* (bool) */
+#endif
 };
 
 struct rtree_leaf_elm_witness_s {
diff --git a/include/jemalloc/internal/rtree_types.h b/include/jemalloc/internal/rtree_types.h
index 18fc5b0f..de3893be 100644
--- a/include/jemalloc/internal/rtree_types.h
+++ b/include/jemalloc/internal/rtree_types.h
@@ -33,6 +33,10 @@ typedef struct rtree_s rtree_t;
 #else
 #  error Unsupported number of significant virtual address bits
 #endif
+/* Use compact leaf representation if virtual address encoding allows. */
+#if RTREE_NHIB >= LG_CEIL_NSIZES
+#  define RTREE_LEAF_COMPACT
+#endif
 
 /*
  * Number of leafkey/leaf pairs to cache.  Each entry supports an entire leaf,
diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh
index 06892d8d..60bdbd21 100755
--- a/include/jemalloc/internal/size_classes.sh
+++ b/include/jemalloc/internal/size_classes.sh
@@ -40,6 +40,17 @@ lg() {
   done
 }
 
+lg_ceil() {
+  y=$1
+  lg ${y}; lg_floor=${lg_result}
+  pow2 ${lg_floor}; pow2_floor=${pow2_result}
+  if [ ${pow2_floor} -lt ${y} ] ; then
+    lg_ceil_result=$((${lg_floor} + 1))
+  else
+    lg_ceil_result=${lg_floor}
+  fi
+}
+
 reg_size_compute() {
   lg_grp=$1
   lg_delta=$2
@@ -246,12 +257,14 @@ size_classes() {
   done
   echo
   nsizes=${index}
+  lg_ceil ${nsizes}; lg_ceil_nsizes=${lg_ceil_result}
 
   # Defined upon completion:
   # - ntbins
   # - nlbins
   # - nbins
   # - nsizes
+  # - lg_ceil_nsizes
   # - npsizes
   # - lg_tiny_maxclass
   # - lookup_maxclass
@@ -286,6 +299,7 @@ cat <<EOF
  *   NLBINS: Number of bins supported by the lookup table.
  *   NBINS: Number of small size class bins.
  *   NSIZES: Number of size classes.
+ *   LG_CEIL_NSIZES: Number of bits required to store NSIZES.
  *   NPSIZES: Number of size classes that are a multiple of (1U << LG_PAGE).
  *   LG_TINY_MAXCLASS: Lg of maximum tiny size class.
  *   LOOKUP_MAXCLASS: Maximum size class included in lookup table.
@@ -311,6 +325,7 @@ for lg_z in ${lg_zarr} ; do
         echo "#define NLBINS			${nlbins}"
         echo "#define NBINS			${nbins}"
         echo "#define NSIZES			${nsizes}"
+        echo "#define LG_CEIL_NSIZES		${lg_ceil_nsizes}"
         echo "#define NPSIZES			${npsizes}"
         echo "#define LG_TINY_MAXCLASS	${lg_tiny_maxclass}"
         echo "#define LOOKUP_MAXCLASS		${lookup_maxclass}"

From 4f341412e5e1370220382f04b1f7d9f70c694884 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 17 Mar 2017 01:25:12 -0700
Subject: [PATCH 0727/2608] Remove extent arg from isalloc() and
 arena_salloc().

---
 include/jemalloc/internal/arena_inlines_b.h   | 13 ++-------
 .../jemalloc/internal/jemalloc_internal.h.in  | 22 ++++----------
 include/jemalloc/internal/prof_inlines_b.h    |  6 ++--
 src/arena.c                                   |  8 ++---
 src/jemalloc.c                                | 29 +++++++++----------
 src/tcache.c                                  |  2 +-
 6 files changed, 29 insertions(+), 51 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 8c5f9c14..47d62c15 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -14,7 +14,7 @@ void	arena_decay_tick(tsdn_t *tsdn, arena_t *arena);
 void	*arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
     bool zero, tcache_t *tcache, bool slow_path);
 arena_t	*arena_aalloc(tsdn_t *tsdn, const void *ptr);
-size_t arena_salloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr);
+size_t arena_salloc(tsdn_t *tsdn, const void *ptr);
 size_t arena_vsalloc(tsdn_t *tsdn, const void *ptr);
 void	arena_dalloc(tsdn_t *tsdn, extent_t *extent, void *ptr,
     tcache_t *tcache, bool slow_path);
@@ -116,7 +116,7 @@ arena_aalloc(tsdn_t *tsdn, const void *ptr) {
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-arena_salloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr) {
+arena_salloc(tsdn_t *tsdn, const void *ptr) {
 	assert(ptr != NULL);
 
 	rtree_ctx_t rtree_ctx_fallback;
@@ -126,15 +126,6 @@ arena_salloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr) {
 	    (uintptr_t)ptr, true);
 	assert(szind != NSIZES);
 
-	if (config_debug && unlikely(extent != NULL)) {
-		rtree_leaf_elm_t elm;
-		rtree_leaf_elm_read(rtree_read(tsdn, &extents_rtree, rtree_ctx,
-		    (uintptr_t)ptr, true), true, &elm);
-
-		assert(extent == rtree_leaf_elm_extent_get(&elm));
-		assert(szind == extent_szind_get(extent));
-	}
-
 	return index2size(szind);
 }
 
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 238ebdca..57b9ed8c 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -1008,7 +1008,7 @@ iealloc(tsdn_t *tsdn, const void *ptr) {
 
 #ifndef JEMALLOC_ENABLE_INLINE
 arena_t	*iaalloc(tsdn_t *tsdn, const void *ptr);
-size_t	isalloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr);
+size_t	isalloc(tsdn_t *tsdn, const void *ptr);
 void	*iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero,
     tcache_t *tcache, bool is_internal, arena_t *arena, bool slow_path);
 void	*ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero,
@@ -1043,18 +1043,11 @@ iaalloc(tsdn_t *tsdn, const void *ptr) {
 	return arena_aalloc(tsdn, ptr);
 }
 
-/*
- * Typical usage:
- *   tsdn_t *tsdn = [...]
- *   void *ptr = [...]
- *   extent_t *extent = iealloc(tsdn, ptr);
- *   size_t sz = isalloc(tsdn, extent, ptr);
- */
 JEMALLOC_ALWAYS_INLINE size_t
-isalloc(tsdn_t *tsdn, const extent_t *extent, const void *ptr) {
+isalloc(tsdn_t *tsdn, const void *ptr) {
 	assert(ptr != NULL);
 
-	return arena_salloc(tsdn, extent, ptr);
+	return arena_salloc(tsdn, ptr);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
@@ -1070,8 +1063,7 @@ iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
 
 	ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path);
 	if (config_stats && is_internal && likely(ret != NULL)) {
-		arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn,
-		    iealloc(tsdn, ret), ret));
+		arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn, ret));
 	}
 	return ret;
 }
@@ -1097,8 +1089,7 @@ ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
 	ret = arena_palloc(tsdn, arena, usize, alignment, zero, tcache);
 	assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret);
 	if (config_stats && is_internal && likely(ret != NULL)) {
-		arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn,
-		    iealloc(tsdn, ret), ret));
+		arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn, ret));
 	}
 	return ret;
 }
@@ -1129,8 +1120,7 @@ idalloctm(tsdn_t *tsdn, extent_t *extent, void *ptr, tcache_t *tcache,
 	    narenas_auto);
 	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 	if (config_stats && is_internal) {
-		arena_internal_sub(iaalloc(tsdn, ptr), isalloc(tsdn, extent,
-		    ptr));
+		arena_internal_sub(iaalloc(tsdn, ptr), isalloc(tsdn, ptr));
 	}
 
 	arena_dalloc(tsdn, extent, ptr, tcache, slow_path);
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 9e969a07..29a2b528 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -155,7 +155,7 @@ prof_malloc(tsdn_t *tsdn, extent_t *extent, const void *ptr, size_t usize,
     prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
-	assert(usize == isalloc(tsdn, extent, ptr));
+	assert(usize == isalloc(tsdn, ptr));
 
 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) {
 		prof_malloc_sample_object(tsdn, extent, ptr, usize, tctx);
@@ -175,7 +175,7 @@ prof_realloc(tsd_t *tsd, extent_t *extent, const void *ptr, size_t usize,
 	assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
 
 	if (prof_active && !updated && ptr != NULL) {
-		assert(usize == isalloc(tsd_tsdn(tsd), extent, ptr));
+		assert(usize == isalloc(tsd_tsdn(tsd), ptr));
 		if (prof_sample_accum_update(tsd, usize, true, NULL)) {
 			/*
 			 * Don't sample.  The usize passed to prof_alloc_prep()
@@ -229,7 +229,7 @@ prof_free(tsd_t *tsd, const extent_t *extent, const void *ptr, size_t usize) {
 	prof_tctx_t *tctx = prof_tctx_get(tsd_tsdn(tsd), extent, ptr);
 
 	cassert(config_prof);
-	assert(usize == isalloc(tsd_tsdn(tsd), extent, ptr));
+	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
 
 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) {
 		prof_free_sampled_object(tsd, usize, tctx);
diff --git a/src/arena.c b/src/arena.c
index 2dd84761..680c435f 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1029,7 +1029,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 
 		malloc_mutex_unlock(tsd_tsdn(tsd), &arena->large_mtx);
 		if (config_stats || (config_prof && opt_prof)) {
-			usize = isalloc(tsd_tsdn(tsd), extent, ptr);
+			usize = isalloc(tsd_tsdn(tsd), ptr);
 		}
 		/* Remove large allocation from prof sample set. */
 		if (config_prof && opt_prof) {
@@ -1465,7 +1465,7 @@ arena_prof_promote(tsdn_t *tsdn, extent_t *extent, const void *ptr,
 
 	cassert(config_prof);
 	assert(ptr != NULL);
-	assert(isalloc(tsdn, extent, ptr) == LARGE_MINCLASS);
+	assert(isalloc(tsdn, ptr) == LARGE_MINCLASS);
 	assert(usize <= SMALL_MAXCLASS);
 
 	szind_t szind = size2index(usize);
@@ -1477,7 +1477,7 @@ arena_prof_promote(tsdn_t *tsdn, extent_t *extent, const void *ptr,
 
 	prof_accum_cancel(tsdn, &arena->prof_accum, usize);
 
-	assert(isalloc(tsdn, extent, ptr) == usize);
+	assert(isalloc(tsdn, ptr) == usize);
 }
 
 static size_t
@@ -1491,7 +1491,7 @@ arena_prof_demote(tsdn_t *tsdn, extent_t *extent, const void *ptr) {
 	rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr,
 	    NBINS, false);
 
-	assert(isalloc(tsdn, extent, ptr) == LARGE_MINCLASS);
+	assert(isalloc(tsdn, ptr) == LARGE_MINCLASS);
 
 	return LARGE_MINCLASS;
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 3c595bab..8201c50a 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1766,8 +1766,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts) {
 	    || ((uintptr_t)allocation & (dopts->alignment - 1)) == ZU(0));
 
 	if (config_stats) {
-		assert(usize == isalloc(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd),
-		    allocation), allocation));
+		assert(usize == isalloc(tsd_tsdn(tsd), allocation));
 		*tsd_thread_allocatedp_get(tsd) += usize;
 	}
 
@@ -2019,10 +2018,10 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 
 	extent = iealloc(tsd_tsdn(tsd), ptr);
 	if (config_prof && opt_prof) {
-		usize = isalloc(tsd_tsdn(tsd), extent, ptr);
+		usize = isalloc(tsd_tsdn(tsd), ptr);
 		prof_free(tsd, extent, ptr, usize);
 	} else if (config_stats) {
-		usize = isalloc(tsd_tsdn(tsd), extent, ptr);
+		usize = isalloc(tsd_tsdn(tsd), ptr);
 	}
 	if (config_stats) {
 		*tsd_thread_deallocatedp_get(tsd) += usize;
@@ -2089,7 +2088,7 @@ je_realloc(void *ptr, size_t size) {
 		witness_assert_lockless(tsd_tsdn(tsd));
 
 		extent = iealloc(tsd_tsdn(tsd), ptr);
-		old_usize = isalloc(tsd_tsdn(tsd), extent, ptr);
+		old_usize = isalloc(tsd_tsdn(tsd), ptr);
 		if (config_prof && opt_prof) {
 			usize = s2u(size);
 			ret = unlikely(usize == 0 || usize > LARGE_MAXCLASS) ?
@@ -2119,7 +2118,7 @@ je_realloc(void *ptr, size_t size) {
 	if (config_stats && likely(ret != NULL)) {
 		tsd_t *tsd;
 
-		assert(usize == isalloc(tsdn, iealloc(tsdn, ret), ret));
+		assert(usize == isalloc(tsdn, ret));
 		tsd = tsdn_tsd(tsdn);
 		*tsd_thread_allocatedp_get(tsd) += usize;
 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
@@ -2374,7 +2373,7 @@ irallocx_prof(tsd_t *tsd, extent_t *old_extent, void *old_ptr, size_t old_usize,
 		 * reallocation.  Therefore, query the actual value of usize.
 		 */
 		extent = old_extent;
-		*usize = isalloc(tsd_tsdn(tsd), extent, p);
+		*usize = isalloc(tsd_tsdn(tsd), p);
 	} else {
 		extent = iealloc(tsd_tsdn(tsd), p);
 	}
@@ -2425,7 +2424,7 @@ je_rallocx(void *ptr, size_t size, int flags) {
 		tcache = tcache_get(tsd, true);
 	}
 
-	old_usize = isalloc(tsd_tsdn(tsd), extent, ptr);
+	old_usize = isalloc(tsd_tsdn(tsd), ptr);
 
 	if (config_prof && opt_prof) {
 		usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment);
@@ -2444,8 +2443,7 @@ je_rallocx(void *ptr, size_t size, int flags) {
 			goto label_oom;
 		}
 		if (config_stats) {
-			usize = isalloc(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd),
-			    p), p);
+			usize = isalloc(tsd_tsdn(tsd), p);
 		}
 	}
 	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
@@ -2476,7 +2474,7 @@ ixallocx_helper(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t old_usize,
 	    zero)) {
 		return old_usize;
 	}
-	usize = isalloc(tsdn, extent, ptr);
+	usize = isalloc(tsdn, ptr);
 
 	return usize;
 }
@@ -2561,7 +2559,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	witness_assert_lockless(tsd_tsdn(tsd));
 	extent = iealloc(tsd_tsdn(tsd), ptr);
 
-	old_usize = isalloc(tsd_tsdn(tsd), extent, ptr);
+	old_usize = isalloc(tsd_tsdn(tsd), ptr);
 
 	/*
 	 * The API explicitly absolves itself of protecting against (size +
@@ -2615,7 +2613,7 @@ je_sallocx(const void *ptr, int flags) {
 	if (config_ivsalloc) {
 		usize = ivsalloc(tsdn, ptr);
 	} else {
-		usize = isalloc(tsdn, iealloc(tsdn, ptr), ptr);
+		usize = isalloc(tsdn, ptr);
 	}
 
 	witness_assert_lockless(tsdn);
@@ -2678,7 +2676,7 @@ je_sdallocx(void *ptr, size_t size, int flags) {
 	tsd = tsd_fetch();
 	extent = iealloc(tsd_tsdn(tsd), ptr);
 	usize = inallocx(tsd_tsdn(tsd), size, flags);
-	assert(usize == isalloc(tsd_tsdn(tsd), extent, ptr));
+	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
 
 	witness_assert_lockless(tsd_tsdn(tsd));
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
@@ -2798,8 +2796,7 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) {
 	if (config_ivsalloc) {
 		ret = ivsalloc(tsdn, ptr);
 	} else {
-		ret = (ptr == NULL) ? 0 : isalloc(tsdn, iealloc(tsdn, ptr),
-		    ptr);
+		ret = (ptr == NULL) ? 0 : isalloc(tsdn, ptr);
 	}
 
 	witness_assert_lockless(tsdn);
diff --git a/src/tcache.c b/src/tcache.c
index 2250425f..7ae89751 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -28,7 +28,7 @@ static malloc_mutex_t	tcaches_mtx;
 
 size_t
 tcache_salloc(tsdn_t *tsdn, const void *ptr) {
-	return arena_salloc(tsdn, iealloc(tsdn, ptr), ptr);
+	return arena_salloc(tsdn, ptr);
 }
 
 void

From 51a2ec92a10691bf5cee78093a0aa9a1fced351d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 17 Mar 2017 02:45:12 -0700
Subject: [PATCH 0728/2608] Remove extent dereferences from the deallocation
 fast paths.

---
 include/jemalloc/internal/arena_inlines_b.h   | 71 +++++++++++++++----
 .../jemalloc/internal/jemalloc_internal.h.in  | 41 ++++++-----
 src/arena.c                                   |  2 +-
 src/ckh.c                                     | 15 ++--
 src/jemalloc.c                                | 32 ++++-----
 src/large.c                                   |  3 +-
 src/prof.c                                    | 33 ++++-----
 src/tcache.c                                  |  3 +-
 8 files changed, 113 insertions(+), 87 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 47d62c15..cbd9e20a 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -16,10 +16,9 @@ void	*arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
 arena_t	*arena_aalloc(tsdn_t *tsdn, const void *ptr);
 size_t arena_salloc(tsdn_t *tsdn, const void *ptr);
 size_t arena_vsalloc(tsdn_t *tsdn, const void *ptr);
-void	arena_dalloc(tsdn_t *tsdn, extent_t *extent, void *ptr,
-    tcache_t *tcache, bool slow_path);
-void	arena_sdalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t size,
-    tcache_t *tcache, bool slow_path);
+void	arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path);
+void	arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
+    bool slow_path);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
@@ -163,24 +162,39 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_dalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, tcache_t *tcache,
-    bool slow_path) {
+arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path) {
 	assert(!tsdn_null(tsdn) || tcache == NULL);
 	assert(ptr != NULL);
 
-	szind_t szind = extent_szind_get(extent);
-	if (likely(extent_slab_get(extent))) {
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+	szind_t szind;
+	bool slab;
+	rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr,
+	    true, &szind, &slab);
+
+	if (config_debug) {
+		extent_t *extent = rtree_extent_read(tsdn, &extents_rtree,
+		    rtree_ctx, (uintptr_t)ptr, true);
+		assert(szind == extent_szind_get(extent));
+		assert(slab == extent_slab_get(extent));
+	}
+
+	if (likely(slab)) {
 		/* Small allocation. */
 		if (likely(tcache != NULL)) {
 			tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind,
 			    slow_path);
 		} else {
+			extent_t *extent = iealloc(tsdn, ptr);
 			arena_dalloc_small(tsdn, extent_arena_get(extent),
 			    extent, ptr);
 		}
 	} else {
 		if (likely(tcache != NULL) && szind < nhbins) {
 			if (config_prof && unlikely(szind < NBINS)) {
+				extent_t *extent = iealloc(tsdn, ptr);
 				arena_dalloc_promoted(tsdn, extent, ptr,
 				    tcache, slow_path);
 			} else {
@@ -188,30 +202,62 @@ arena_dalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, tcache_t *tcache,
 				    ptr, szind, slow_path);
 			}
 		} else {
+			extent_t *extent = iealloc(tsdn, ptr);
 			large_dalloc(tsdn, extent);
 		}
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_sdalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t size,
-    tcache_t *tcache, bool slow_path) {
+arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
+    bool slow_path) {
 	assert(!tsdn_null(tsdn) || tcache == NULL);
 	assert(ptr != NULL);
 
-	szind_t szind = size2index(size);
-	if (likely(extent_slab_get(extent))) {
+	szind_t szind;
+	bool slab;
+	if (!config_prof || !opt_prof) {
+		/*
+		 * There is no risk of being confused by a promoted sampled
+		 * object, so base szind and slab on the given size.
+		 */
+		szind = size2index(size);
+		slab = (szind < NBINS);
+	}
+
+	if ((config_prof && opt_prof) || config_debug) {
+		rtree_ctx_t rtree_ctx_fallback;
+		rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn,
+		    &rtree_ctx_fallback);
+
+		rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx,
+		    (uintptr_t)ptr, true, &szind, &slab);
+
+		assert(szind == size2index(size));
+		assert((config_prof && opt_prof) || slab == (szind < NBINS));
+
+		if (config_debug) {
+			extent_t *extent = rtree_extent_read(tsdn,
+			    &extents_rtree, rtree_ctx, (uintptr_t)ptr, true);
+			assert(szind == extent_szind_get(extent));
+			assert(slab == extent_slab_get(extent));
+		}
+	}
+
+	if (likely(slab)) {
 		/* Small allocation. */
 		if (likely(tcache != NULL)) {
 			tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind,
 			    slow_path);
 		} else {
+			extent_t *extent = iealloc(tsdn, ptr);
 			arena_dalloc_small(tsdn, extent_arena_get(extent),
 			    extent, ptr);
 		}
 	} else {
 		if (likely(tcache != NULL) && szind < nhbins) {
 			if (config_prof && unlikely(szind < NBINS)) {
+				extent_t *extent = iealloc(tsdn, ptr);
 				arena_dalloc_promoted(tsdn, extent, ptr,
 				    tcache, slow_path);
 			} else {
@@ -219,6 +265,7 @@ arena_sdalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t size,
 				    szind, slow_path);
 			}
 		} else {
+			extent_t *extent = iealloc(tsdn, ptr);
 			large_dalloc(tsdn, extent);
 		}
 	}
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 57b9ed8c..b3510382 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -1019,14 +1019,14 @@ void	*ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, arena_t *arena);
 void	*ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero);
 size_t	ivsalloc(tsdn_t *tsdn, const void *ptr);
-void	idalloctm(tsdn_t *tsdn, extent_t *extent, void *ptr, tcache_t *tcache,
-    bool is_internal, bool slow_path);
-void	idalloc(tsd_t *tsd, extent_t *extent, void *ptr);
-void	isdalloct(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t size,
-    tcache_t *tcache, bool slow_path);
-void	*iralloct_realign(tsdn_t *tsdn, extent_t *extent, void *ptr,
-    size_t oldsize, size_t size, size_t extra, size_t alignment, bool zero,
-    tcache_t *tcache, arena_t *arena);
+void	idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool is_internal,
+    bool slow_path);
+void	idalloc(tsd_t *tsd, void *ptr);
+void	isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
+    bool slow_path);
+void	*iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
+    size_t extra, size_t alignment, bool zero, tcache_t *tcache,
+    arena_t *arena);
 void	*iralloct(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize,
     size_t size, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena);
 void	*iralloc(tsd_t *tsd, extent_t *extent, void *ptr, size_t oldsize,
@@ -1112,8 +1112,8 @@ ivsalloc(tsdn_t *tsdn, const void *ptr) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-idalloctm(tsdn_t *tsdn, extent_t *extent, void *ptr, tcache_t *tcache,
-    bool is_internal, bool slow_path) {
+idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool is_internal,
+    bool slow_path) {
 	assert(ptr != NULL);
 	assert(!is_internal || tcache == NULL);
 	assert(!is_internal || arena_ind_get(iaalloc(tsdn, ptr)) <
@@ -1123,25 +1123,24 @@ idalloctm(tsdn_t *tsdn, extent_t *extent, void *ptr, tcache_t *tcache,
 		arena_internal_sub(iaalloc(tsdn, ptr), isalloc(tsdn, ptr));
 	}
 
-	arena_dalloc(tsdn, extent, ptr, tcache, slow_path);
+	arena_dalloc(tsdn, ptr, tcache, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-idalloc(tsd_t *tsd, extent_t *extent, void *ptr) {
-	idalloctm(tsd_tsdn(tsd), extent, ptr, tcache_get(tsd, false), false,
-	    true);
+idalloc(tsd_t *tsd, void *ptr) {
+	idalloctm(tsd_tsdn(tsd), ptr, tcache_get(tsd, false), false, true);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-isdalloct(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t size,
+isdalloct(tsdn_t *tsdn, void *ptr, size_t size,
     tcache_t *tcache, bool slow_path) {
 	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
-	arena_sdalloc(tsdn, extent, ptr, size, tcache, slow_path);
+	arena_sdalloc(tsdn, ptr, size, tcache, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-iralloct_realign(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize,
-    size_t size, size_t extra, size_t alignment, bool zero, tcache_t *tcache,
+iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
+    size_t extra, size_t alignment, bool zero, tcache_t *tcache,
     arena_t *arena) {
 	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 	void *p;
@@ -1172,7 +1171,7 @@ iralloct_realign(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize,
 	 */
 	copysize = (size < oldsize) ? size : oldsize;
 	memcpy(p, ptr, copysize);
-	isdalloct(tsdn, extent, ptr, oldsize, tcache, true);
+	isdalloct(tsdn, ptr, oldsize, tcache, true);
 	return p;
 }
 
@@ -1189,8 +1188,8 @@ iralloct(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize, size_t size,
 		 * Existing object alignment is inadequate; allocate new space
 		 * and copy.
 		 */
-		return iralloct_realign(tsdn, extent, ptr, oldsize, size, 0,
-		    alignment, zero, tcache, arena);
+		return iralloct_realign(tsdn, ptr, oldsize, size, 0, alignment,
+		    zero, tcache, arena);
 	}
 
 	return arena_ralloc(tsdn, arena, extent, ptr, oldsize, size, alignment,
diff --git a/src/arena.c b/src/arena.c
index 680c435f..f05249dc 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1719,7 +1719,7 @@ arena_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr,
 
 	copysize = (usize < oldsize) ? usize : oldsize;
 	memcpy(ret, ptr, copysize);
-	isdalloct(tsdn, extent, ptr, oldsize, tcache, true);
+	isdalloct(tsdn, ptr, oldsize, tcache, true);
 	return ret;
 }
 
diff --git a/src/ckh.c b/src/ckh.c
index 31d1ac21..463f8dd1 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -282,14 +282,12 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh) {
 		ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
 
 		if (!ckh_rebuild(ckh, tab)) {
-			idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), tab),
-			    tab, NULL, true, true);
+			idalloctm(tsd_tsdn(tsd), tab, NULL, true, true);
 			break;
 		}
 
 		/* Rebuilding failed, so back out partially rebuilt table. */
-		idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), ckh->tab),
-		    ckh->tab, NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, true, true);
 		ckh->tab = tab;
 		ckh->lg_curbuckets = lg_prevbuckets;
 	}
@@ -331,8 +329,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh) {
 	ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
 
 	if (!ckh_rebuild(ckh, tab)) {
-		idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), tab), tab, NULL,
-		    true, true);
+		idalloctm(tsd_tsdn(tsd), tab, NULL, true, true);
 #ifdef CKH_COUNT
 		ckh->nshrinks++;
 #endif
@@ -340,8 +337,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh) {
 	}
 
 	/* Rebuilding failed, so back out partially rebuilt table. */
-	idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), ckh->tab), ckh->tab,
-	    NULL, true, true);
+	idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, true, true);
 	ckh->tab = tab;
 	ckh->lg_curbuckets = lg_prevbuckets;
 #ifdef CKH_COUNT
@@ -422,8 +418,7 @@ ckh_delete(tsd_t *tsd, ckh_t *ckh) {
 	    (unsigned long long)ckh->nrelocs);
 #endif
 
-	idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), ckh->tab), ckh->tab,
-	    NULL, true, true);
+	idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, true, true);
 	if (config_debug) {
 		memset(ckh, JEMALLOC_FREE_JUNK, sizeof(ckh_t));
 	}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 8201c50a..1077ebf3 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -310,8 +310,8 @@ a0ialloc(size_t size, bool zero, bool is_internal) {
 }
 
 static void
-a0idalloc(extent_t *extent, void *ptr, bool is_internal) {
-	idalloctm(TSDN_NULL, extent, ptr, false, is_internal, true);
+a0idalloc(void *ptr, bool is_internal) {
+	idalloctm(TSDN_NULL, ptr, false, is_internal, true);
 }
 
 void *
@@ -321,7 +321,7 @@ a0malloc(size_t size) {
 
 void
 a0dalloc(void *ptr) {
-	a0idalloc(iealloc(NULL, ptr), ptr, true);
+	a0idalloc(ptr, true);
 }
 
 /*
@@ -358,7 +358,7 @@ bootstrap_free(void *ptr) {
 		return;
 	}
 
-	a0idalloc(iealloc(NULL, ptr), ptr, false);
+	a0idalloc(ptr, false);
 }
 
 void
@@ -2008,17 +2008,15 @@ irealloc_prof(tsd_t *tsd, extent_t *old_extent, void *old_ptr, size_t old_usize,
 
 JEMALLOC_INLINE_C void
 ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
-	extent_t *extent;
-	size_t usize;
-
 	witness_assert_lockless(tsd_tsdn(tsd));
 
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
-	extent = iealloc(tsd_tsdn(tsd), ptr);
+	size_t usize;
 	if (config_prof && opt_prof) {
 		usize = isalloc(tsd_tsdn(tsd), ptr);
+		extent_t *extent = iealloc(tsd_tsdn(tsd), ptr);
 		prof_free(tsd, extent, ptr, usize);
 	} else if (config_stats) {
 		usize = isalloc(tsd_tsdn(tsd), ptr);
@@ -2028,21 +2026,21 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 	}
 
 	if (likely(!slow_path)) {
-		idalloctm(tsd_tsdn(tsd), extent, ptr, tcache, false, false);
+		idalloctm(tsd_tsdn(tsd), ptr, tcache, false, false);
 	} else {
-		idalloctm(tsd_tsdn(tsd), extent, ptr, tcache, false, true);
+		idalloctm(tsd_tsdn(tsd), ptr, tcache, false, true);
 	}
 }
 
 JEMALLOC_INLINE_C void
-isfree(tsd_t *tsd, extent_t *extent, void *ptr, size_t usize, tcache_t *tcache,
-    bool slow_path) {
+isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 	witness_assert_lockless(tsd_tsdn(tsd));
 
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	if (config_prof && opt_prof) {
+		extent_t *extent = iealloc(tsd_tsdn(tsd), ptr);
 		prof_free(tsd, extent, ptr, usize);
 	}
 	if (config_stats) {
@@ -2050,9 +2048,9 @@ isfree(tsd_t *tsd, extent_t *extent, void *ptr, size_t usize, tcache_t *tcache,
 	}
 
 	if (likely(!slow_path)) {
-		isdalloct(tsd_tsdn(tsd), extent, ptr, usize, tcache, false);
+		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, false);
 	} else {
-		isdalloct(tsd_tsdn(tsd), extent, ptr, usize, tcache, true);
+		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, true);
 	}
 }
 
@@ -2667,14 +2665,12 @@ inallocx(tsdn_t *tsdn, size_t size, int flags) {
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_sdallocx(void *ptr, size_t size, int flags) {
 	tsd_t *tsd;
-	extent_t *extent;
 	size_t usize;
 	tcache_t *tcache;
 
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 	tsd = tsd_fetch();
-	extent = iealloc(tsd_tsdn(tsd), ptr);
 	usize = inallocx(tsd_tsdn(tsd), size, flags);
 	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
 
@@ -2691,9 +2687,9 @@ je_sdallocx(void *ptr, size_t size, int flags) {
 
 	UTRACE(ptr, 0, 0);
 	if (likely(!malloc_slow)) {
-		isfree(tsd, extent, ptr, usize, tcache, false);
+		isfree(tsd, ptr, usize, tcache, false);
 	} else {
-		isfree(tsd, extent, ptr, usize, tcache, true);
+		isfree(tsd, ptr, usize, tcache, true);
 	}
 	witness_assert_lockless(tsd_tsdn(tsd));
 }
diff --git a/src/large.c b/src/large.c
index 845202f9..3f96c521 100644
--- a/src/large.c
+++ b/src/large.c
@@ -303,8 +303,7 @@ large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
 
 	size_t copysize = (usize < oldusize) ? usize : oldusize;
 	memcpy(ret, extent_addr_get(extent), copysize);
-	isdalloct(tsdn, extent, extent_addr_get(extent), oldusize, tcache,
-	    true);
+	isdalloct(tsdn, extent_addr_get(extent), oldusize, tcache, true);
 	return ret;
 }
 
diff --git a/src/prof.c b/src/prof.c
index 13fa20d3..be06555b 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -582,8 +582,7 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
 		prof_leave(tsd, tdata_self);
 		/* Destroy gctx. */
 		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
-		idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), gctx), gctx,
-		    NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), gctx, NULL, true, true);
 	} else {
 		/*
 		 * Compensate for increment in prof_tctx_destroy() or
@@ -697,8 +696,7 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
 	}
 
 	if (destroy_tctx) {
-		idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), tctx), tctx,
-		    NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), tctx, NULL, true, true);
 	}
 }
 
@@ -730,8 +728,8 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
 			if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) {
 				/* OOM. */
 				prof_leave(tsd, tdata);
-				idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd),
-				    gctx.v), gctx.v, NULL, true, true);
+				idalloctm(tsd_tsdn(tsd), gctx.v, NULL, true,
+				    true);
 				return true;
 			}
 			new_gctx = true;
@@ -755,8 +753,7 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
 
 		if (tgctx.v != NULL) {
 			/* Lost race to insert. */
-			idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd),
-			    tgctx.v), tgctx.v, NULL, true, true);
+			idalloctm(tsd_tsdn(tsd), tgctx.v, NULL, true, true);
 		}
 	}
 	prof_leave(tsd, tdata);
@@ -828,8 +825,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 			if (new_gctx) {
 				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
 			}
-			idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), ret.v),
-			    ret.v, NULL, true, true);
+			idalloctm(tsd_tsdn(tsd), ret.v, NULL, true, true);
 			return NULL;
 		}
 		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
@@ -1240,9 +1236,8 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) {
 					    to_destroy);
 					tctx_tree_remove(&gctx->tctxs,
 					    to_destroy);
-					idalloctm(tsd_tsdn(tsd),
-					    iealloc(tsd_tsdn(tsd), to_destroy),
-					    to_destroy, NULL, true, true);
+					idalloctm(tsd_tsdn(tsd), to_destroy,
+					    NULL, true, true);
 				} else {
 					next = NULL;
 				}
@@ -1910,8 +1905,7 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
 
 	if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash,
 	    prof_bt_keycomp)) {
-		idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), tdata), tdata,
-		    NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), tdata, NULL, true, true);
 		return NULL;
 	}
 
@@ -1967,12 +1961,10 @@ prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
 	assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
 
 	if (tdata->thread_name != NULL) {
-		idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd),
-		    tdata->thread_name), tdata->thread_name, NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, true, true);
 	}
 	ckh_delete(tsd, &tdata->bt2tctx);
-	idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), tdata), tdata, NULL,
-	    true, true);
+	idalloctm(tsd_tsdn(tsd), tdata, NULL, true, true);
 }
 
 static void
@@ -2169,8 +2161,7 @@ prof_thread_name_set(tsd_t *tsd, const char *thread_name) {
 	}
 
 	if (tdata->thread_name != NULL) {
-		idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd),
-		    tdata->thread_name), tdata->thread_name, NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, true, true);
 		tdata->thread_name = NULL;
 	}
 	if (strlen(s) > 0) {
diff --git a/src/tcache.c b/src/tcache.c
index 7ae89751..9c99c8b3 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -389,8 +389,7 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache) {
 		prof_idump(tsd_tsdn(tsd));
 	}
 
-	idalloctm(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), tcache), tcache, NULL,
-	    true, true);
+	idalloctm(tsd_tsdn(tsd), tcache, NULL, true, true);
 }
 
 void

From 5e67fbc367dfe6a08f065167f831d5aa0316554f Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 20 Mar 2017 11:00:07 -0700
Subject: [PATCH 0729/2608] Push down iealloc() calls.

Call iealloc() as deep into call chains as possible without causing
redundant calls.
---
 include/jemalloc/internal/arena_externs.h     |  18 +--
 include/jemalloc/internal/arena_inlines_b.h   |  39 +++--
 .../jemalloc/internal/jemalloc_internal.h.in  |  59 ++++----
 include/jemalloc/internal/prof_externs.h      |   4 +-
 include/jemalloc/internal/prof_inlines_b.h    |  63 ++++----
 src/arena.c                                   |  64 ++++----
 src/jemalloc.c                                | 141 +++++++-----------
 src/prof.c                                    |   6 +-
 test/unit/prof_tctx.c                         |   9 +-
 9 files changed, 176 insertions(+), 227 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 9603d74f..a35fe184 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -65,18 +65,16 @@ void	*arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
     szind_t ind, bool zero);
 void	*arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache);
-void	arena_prof_promote(tsdn_t *tsdn, extent_t *extent, const void *ptr,
-    size_t usize);
-void	arena_dalloc_promoted(tsdn_t *tsdn, extent_t *extent, void *ptr,
-    tcache_t *tcache, bool slow_path);
+void	arena_prof_promote(tsdn_t *tsdn, const void *ptr, size_t usize);
+void	arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
+    bool slow_path);
 void	arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena,
     extent_t *extent, void *ptr);
-void	arena_dalloc_small(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
-    void *ptr);
-bool	arena_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, void *ptr,
-    size_t oldsize, size_t size, size_t extra, bool zero);
-void	*arena_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr,
-    size_t oldsize, size_t size, size_t alignment, bool zero, tcache_t *tcache);
+void	arena_dalloc_small(tsdn_t *tsdn, void *ptr);
+bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
+    size_t extra, bool zero);
+void *arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
+    size_t size, size_t alignment, bool zero, tcache_t *tcache);
 dss_prec_t	arena_dss_prec_get(arena_t *arena);
 bool	arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
 ssize_t arena_dirty_decay_time_default_get(void);
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index cbd9e20a..3c48ce4f 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -3,12 +3,10 @@
 
 #ifndef JEMALLOC_ENABLE_INLINE
 szind_t	arena_bin_index(arena_t *arena, arena_bin_t *bin);
-prof_tctx_t	*arena_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent,
-    const void *ptr);
-void	arena_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr,
-    size_t usize, prof_tctx_t *tctx);
-void	arena_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr,
+prof_tctx_t	*arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr);
+void	arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
     prof_tctx_t *tctx);
+void	arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx);
 void	arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks);
 void	arena_decay_tick(tsdn_t *tsdn, arena_t *arena);
 void	*arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
@@ -30,10 +28,11 @@ arena_bin_index(arena_t *arena, arena_bin_t *bin) {
 }
 
 JEMALLOC_INLINE prof_tctx_t *
-arena_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, const void *ptr) {
+arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
+	const extent_t *extent = iealloc(tsdn, ptr);
 	if (unlikely(!extent_slab_get(extent))) {
 		return large_prof_tctx_get(tsdn, extent);
 	}
@@ -41,21 +40,23 @@ arena_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, const void *ptr) {
 }
 
 JEMALLOC_INLINE void
-arena_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr,
-    size_t usize, prof_tctx_t *tctx) {
+arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
+    prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
+	extent_t *extent = iealloc(tsdn, ptr);
 	if (unlikely(!extent_slab_get(extent))) {
 		large_prof_tctx_set(tsdn, extent, tctx);
 	}
 }
 
 JEMALLOC_INLINE void
-arena_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr,
-    prof_tctx_t *tctx) {
+arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
+
+	extent_t *extent = iealloc(tsdn, ptr);
 	assert(!extent_slab_get(extent));
 
 	large_prof_tctx_reset(tsdn, extent);
@@ -187,16 +188,13 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path) {
 			tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind,
 			    slow_path);
 		} else {
-			extent_t *extent = iealloc(tsdn, ptr);
-			arena_dalloc_small(tsdn, extent_arena_get(extent),
-			    extent, ptr);
+			arena_dalloc_small(tsdn, ptr);
 		}
 	} else {
 		if (likely(tcache != NULL) && szind < nhbins) {
 			if (config_prof && unlikely(szind < NBINS)) {
-				extent_t *extent = iealloc(tsdn, ptr);
-				arena_dalloc_promoted(tsdn, extent, ptr,
-				    tcache, slow_path);
+				arena_dalloc_promoted(tsdn, ptr, tcache,
+				    slow_path);
 			} else {
 				tcache_dalloc_large(tsdn_tsd(tsdn), tcache,
 				    ptr, szind, slow_path);
@@ -250,16 +248,13 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 			tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind,
 			    slow_path);
 		} else {
-			extent_t *extent = iealloc(tsdn, ptr);
-			arena_dalloc_small(tsdn, extent_arena_get(extent),
-			    extent, ptr);
+			arena_dalloc_small(tsdn, ptr);
 		}
 	} else {
 		if (likely(tcache != NULL) && szind < nhbins) {
 			if (config_prof && unlikely(szind < NBINS)) {
-				extent_t *extent = iealloc(tsdn, ptr);
-				arena_dalloc_promoted(tsdn, extent, ptr,
-				    tcache, slow_path);
+				arena_dalloc_promoted(tsdn, ptr, tcache,
+				    slow_path);
 			} else {
 				tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
 				    szind, slow_path);
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index b3510382..2fe21018 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -1007,32 +1007,32 @@ iealloc(tsdn_t *tsdn, const void *ptr) {
 #include "jemalloc/internal/hash_inlines.h"
 
 #ifndef JEMALLOC_ENABLE_INLINE
-arena_t	*iaalloc(tsdn_t *tsdn, const void *ptr);
-size_t	isalloc(tsdn_t *tsdn, const void *ptr);
-void	*iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero,
+arena_t *iaalloc(tsdn_t *tsdn, const void *ptr);
+size_t isalloc(tsdn_t *tsdn, const void *ptr);
+void *iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero,
     tcache_t *tcache, bool is_internal, arena_t *arena, bool slow_path);
-void	*ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero,
+void *ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero,
     bool slow_path);
-void	*ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
+void *ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, bool is_internal, arena_t *arena);
-void	*ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
+void *ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, arena_t *arena);
-void	*ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero);
-size_t	ivsalloc(tsdn_t *tsdn, const void *ptr);
-void	idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool is_internal,
+void *ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero);
+size_t ivsalloc(tsdn_t *tsdn, const void *ptr);
+void idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool is_internal,
     bool slow_path);
-void	idalloc(tsd_t *tsd, void *ptr);
-void	isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
+void idalloc(tsd_t *tsd, void *ptr);
+void isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
     bool slow_path);
-void	*iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
+void *iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t extra, size_t alignment, bool zero, tcache_t *tcache,
     arena_t *arena);
-void	*iralloct(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize,
-    size_t size, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena);
-void	*iralloc(tsd_t *tsd, extent_t *extent, void *ptr, size_t oldsize,
-    size_t size, size_t alignment, bool zero);
-bool	ixalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize,
-    size_t size, size_t extra, size_t alignment, bool zero);
+void *iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
+    size_t alignment, bool zero, tcache_t *tcache, arena_t *arena);
+void *iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
+    size_t alignment, bool zero);
+bool ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra,
+    size_t alignment, bool zero);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
@@ -1176,8 +1176,8 @@ iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-iralloct(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize, size_t size,
-    size_t alignment, bool zero, tcache_t *tcache, arena_t *arena) {
+iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment,
+    bool zero, tcache_t *tcache, arena_t *arena) {
 	assert(ptr != NULL);
 	assert(size != 0);
 	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
@@ -1192,20 +1192,20 @@ iralloct(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize, size_t size,
 		    zero, tcache, arena);
 	}
 
-	return arena_ralloc(tsdn, arena, extent, ptr, oldsize, size, alignment,
-	    zero, tcache);
+	return arena_ralloc(tsdn, arena, ptr, oldsize, size, alignment, zero,
+	    tcache);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-iralloc(tsd_t *tsd, extent_t *extent, void *ptr, size_t oldsize, size_t size,
-    size_t alignment, bool zero) {
-	return iralloct(tsd_tsdn(tsd), extent, ptr, oldsize, size, alignment,
-	    zero, tcache_get(tsd, true), NULL);
+iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment,
+    bool zero) {
+	return iralloct(tsd_tsdn(tsd), ptr, oldsize, size, alignment, zero,
+	    tcache_get(tsd, true), NULL);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
-ixalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize, size_t size,
-    size_t extra, size_t alignment, bool zero) {
+ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra,
+    size_t alignment, bool zero) {
 	assert(ptr != NULL);
 	assert(size != 0);
 	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
@@ -1216,8 +1216,7 @@ ixalloc(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize, size_t size,
 		return true;
 	}
 
-	return arena_ralloc_no_move(tsdn, extent, ptr, oldsize, size, extra,
-	    zero);
+	return arena_ralloc_no_move(tsdn, ptr, oldsize, size, extra, zero);
 }
 #endif
 
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index f3b6f8d3..985532f6 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -39,8 +39,8 @@ extern uint64_t	prof_interval;
 extern size_t	lg_prof_sample;
 
 void	prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
-void	prof_malloc_sample_object(tsdn_t *tsdn, extent_t *extent,
-    const void *ptr, size_t usize, prof_tctx_t *tctx);
+void	prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
+    prof_tctx_t *tctx);
 void	prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx);
 void	bt_init(prof_bt_t *bt, void **vec);
 void	prof_backtrace(prof_bt_t *bt);
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 29a2b528..5ee72c53 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -5,24 +5,20 @@
 bool	prof_active_get_unlocked(void);
 bool	prof_gdump_get_unlocked(void);
 prof_tdata_t	*prof_tdata_get(tsd_t *tsd, bool create);
-prof_tctx_t	*prof_tctx_get(tsdn_t *tsdn, const extent_t *extent,
-    const void *ptr);
-void	prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr,
-    size_t usize, prof_tctx_t *tctx);
-void	prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr,
+prof_tctx_t	*prof_tctx_get(tsdn_t *tsdn, const void *ptr);
+void	prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
     prof_tctx_t *tctx);
+void	prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx);
 bool	prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
     prof_tdata_t **tdata_out);
 prof_tctx_t	*prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active,
     bool update);
-void	prof_malloc(tsdn_t *tsdn, extent_t *extent, const void *ptr,
-    size_t usize, prof_tctx_t *tctx);
-void	prof_realloc(tsd_t *tsd, extent_t *extent, const void *ptr,
-    size_t usize, prof_tctx_t *tctx, bool prof_active, bool updated,
-    extent_t *old_extent, const void *old_ptr, size_t old_usize,
-    prof_tctx_t *old_tctx);
-void	prof_free(tsd_t *tsd, const extent_t *extent, const void *ptr,
-    size_t usize);
+void	prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize,
+    prof_tctx_t *tctx);
+void	prof_realloc(tsd_t *tsd, const void *ptr, size_t usize,
+    prof_tctx_t *tctx, bool prof_active, bool updated, const void *old_ptr,
+    size_t old_usize, prof_tctx_t *old_tctx);
+void	prof_free(tsd_t *tsd, const void *ptr, size_t usize);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
@@ -71,29 +67,27 @@ prof_tdata_get(tsd_t *tsd, bool create) {
 }
 
 JEMALLOC_ALWAYS_INLINE prof_tctx_t *
-prof_tctx_get(tsdn_t *tsdn, const extent_t *extent, const void *ptr) {
+prof_tctx_get(tsdn_t *tsdn, const void *ptr) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	return arena_prof_tctx_get(tsdn, extent, ptr);
+	return arena_prof_tctx_get(tsdn, ptr);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_tctx_set(tsdn_t *tsdn, extent_t *extent, const void *ptr, size_t usize,
-    prof_tctx_t *tctx) {
+prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	arena_prof_tctx_set(tsdn, extent, ptr, usize, tctx);
+	arena_prof_tctx_set(tsdn, ptr, usize, tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_tctx_reset(tsdn_t *tsdn, extent_t *extent, const void *ptr,
-    prof_tctx_t *tctx) {
+prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	arena_prof_tctx_reset(tsdn, extent, ptr, tctx);
+	arena_prof_tctx_reset(tsdn, ptr, tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -151,24 +145,22 @@ prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_malloc(tsdn_t *tsdn, extent_t *extent, const void *ptr, size_t usize,
-    prof_tctx_t *tctx) {
+prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 	assert(usize == isalloc(tsdn, ptr));
 
 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) {
-		prof_malloc_sample_object(tsdn, extent, ptr, usize, tctx);
+		prof_malloc_sample_object(tsdn, ptr, usize, tctx);
 	} else {
-		prof_tctx_set(tsdn, extent, ptr, usize,
-		    (prof_tctx_t *)(uintptr_t)1U);
+		prof_tctx_set(tsdn, ptr, usize, (prof_tctx_t *)(uintptr_t)1U);
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_realloc(tsd_t *tsd, extent_t *extent, const void *ptr, size_t usize,
-    prof_tctx_t *tctx, bool prof_active, bool updated, extent_t *old_extent,
-    const void *old_ptr, size_t old_usize, prof_tctx_t *old_tctx) {
+prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
+    bool prof_active, bool updated, const void *old_ptr, size_t old_usize,
+    prof_tctx_t *old_tctx) {
 	bool sampled, old_sampled, moved;
 
 	cassert(config_prof);
@@ -194,10 +186,9 @@ prof_realloc(tsd_t *tsd, extent_t *extent, const void *ptr, size_t usize,
 	moved = (ptr != old_ptr);
 
 	if (unlikely(sampled)) {
-		prof_malloc_sample_object(tsd_tsdn(tsd), extent, ptr, usize,
-		    tctx);
+		prof_malloc_sample_object(tsd_tsdn(tsd), ptr, usize, tctx);
 	} else if (moved) {
-		prof_tctx_set(tsd_tsdn(tsd), extent, ptr, usize,
+		prof_tctx_set(tsd_tsdn(tsd), ptr, usize,
 		    (prof_tctx_t *)(uintptr_t)1U);
 	} else if (unlikely(old_sampled)) {
 		/*
@@ -206,9 +197,9 @@ prof_realloc(tsd_t *tsd, extent_t *extent, const void *ptr, size_t usize,
 		 * to do here in the presence of explicit knowledge re: moved
 		 * state.
 		 */
-		prof_tctx_reset(tsd_tsdn(tsd), extent, ptr, tctx);
+		prof_tctx_reset(tsd_tsdn(tsd), ptr, tctx);
 	} else {
-		assert((uintptr_t)prof_tctx_get(tsd_tsdn(tsd), extent, ptr) ==
+		assert((uintptr_t)prof_tctx_get(tsd_tsdn(tsd), ptr) ==
 		    (uintptr_t)1U);
 	}
 
@@ -225,8 +216,8 @@ prof_realloc(tsd_t *tsd, extent_t *extent, const void *ptr, size_t usize,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_free(tsd_t *tsd, const extent_t *extent, const void *ptr, size_t usize) {
-	prof_tctx_t *tctx = prof_tctx_get(tsd_tsdn(tsd), extent, ptr);
+prof_free(tsd_t *tsd, const void *ptr, size_t usize) {
+	prof_tctx_t *tctx = prof_tctx_get(tsd_tsdn(tsd), ptr);
 
 	cassert(config_prof);
 	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
diff --git a/src/arena.c b/src/arena.c
index f05249dc..ef7ec37b 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1033,7 +1033,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 		}
 		/* Remove large allocation from prof sample set. */
 		if (config_prof && opt_prof) {
-			prof_free(tsd, extent, ptr, usize);
+			prof_free(tsd, ptr, usize);
 		}
 		large_dalloc(tsd_tsdn(tsd), extent);
 		malloc_mutex_lock(tsd_tsdn(tsd), &arena->large_mtx);
@@ -1459,19 +1459,21 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 }
 
 void
-arena_prof_promote(tsdn_t *tsdn, extent_t *extent, const void *ptr,
-    size_t usize) {
-	arena_t *arena = extent_arena_get(extent);
-
+arena_prof_promote(tsdn_t *tsdn, const void *ptr, size_t usize) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 	assert(isalloc(tsdn, ptr) == LARGE_MINCLASS);
 	assert(usize <= SMALL_MAXCLASS);
 
-	szind_t szind = size2index(usize);
-	extent_szind_set(extent, szind);
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+	extent_t *extent = rtree_extent_read(tsdn, &extents_rtree, rtree_ctx,
+	    (uintptr_t)ptr, true);
+	arena_t *arena = extent_arena_get(extent);
+
+	szind_t szind = size2index(usize);
+	extent_szind_set(extent, szind);
 	rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr,
 	    szind, false);
 
@@ -1497,14 +1499,13 @@ arena_prof_demote(tsdn_t *tsdn, extent_t *extent, const void *ptr) {
 }
 
 void
-arena_dalloc_promoted(tsdn_t *tsdn, extent_t *extent, void *ptr,
-    tcache_t *tcache, bool slow_path) {
-	size_t usize;
-
+arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
+    bool slow_path) {
 	cassert(config_prof);
 	assert(opt_prof);
 
-	usize = arena_prof_demote(tsdn, extent, ptr);
+	extent_t *extent = iealloc(tsdn, ptr);
+	size_t usize = arena_prof_demote(tsdn, extent, ptr);
 	if (usize <= tcache_maxclass) {
 		tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
 		    size2index(usize), slow_path);
@@ -1621,16 +1622,17 @@ arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr) {
 }
 
 void
-arena_dalloc_small(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr) {
+arena_dalloc_small(tsdn_t *tsdn, void *ptr) {
+	extent_t *extent = iealloc(tsdn, ptr);
+	arena_t *arena = extent_arena_get(extent);
+
 	arena_dalloc_bin(tsdn, arena, extent, ptr);
 	arena_decay_tick(tsdn, arena);
 }
 
 bool
-arena_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize,
-    size_t size, size_t extra, bool zero) {
-	size_t usize_min, usize_max;
-
+arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
+    size_t extra, bool zero) {
 	/* Calls with non-zero extra had to clamp extra. */
 	assert(extra == 0 || size + extra <= LARGE_MAXCLASS);
 
@@ -1638,8 +1640,9 @@ arena_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t oldsize,
 		return true;
 	}
 
-	usize_min = s2u(size);
-	usize_max = s2u(size + extra);
+	extent_t *extent = iealloc(tsdn, ptr);
+	size_t usize_min = s2u(size);
+	size_t usize_max = s2u(size + extra);
 	if (likely(oldsize <= SMALL_MAXCLASS && usize_min <= SMALL_MAXCLASS)) {
 		/*
 		 * Avoid moving the allocation if the size class can be left the
@@ -1678,36 +1681,31 @@ arena_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
 }
 
 void *
-arena_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr,
-    size_t oldsize, size_t size, size_t alignment, bool zero,
-    tcache_t *tcache) {
-	void *ret;
-	size_t usize, copysize;
-
-	usize = s2u(size);
+arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
+    size_t size, size_t alignment, bool zero, tcache_t *tcache) {
+	size_t usize = s2u(size);
 	if (unlikely(usize == 0 || size > LARGE_MAXCLASS)) {
 		return NULL;
 	}
 
 	if (likely(usize <= SMALL_MAXCLASS)) {
 		/* Try to avoid moving the allocation. */
-		if (!arena_ralloc_no_move(tsdn, extent, ptr, oldsize, usize, 0,
-		    zero)) {
+		if (!arena_ralloc_no_move(tsdn, ptr, oldsize, usize, 0, zero)) {
 			return ptr;
 		}
 	}
 
 	if (oldsize >= LARGE_MINCLASS && usize >= LARGE_MINCLASS) {
-		return large_ralloc(tsdn, arena, extent, usize, alignment,
-		    zero, tcache);
+		return large_ralloc(tsdn, arena, iealloc(tsdn, ptr), usize,
+		    alignment, zero, tcache);
 	}
 
 	/*
 	 * size and oldsize are different enough that we need to move the
 	 * object.  In that case, fall back to allocating new space and copying.
 	 */
-	ret = arena_ralloc_move_helper(tsdn, arena, usize, alignment, zero,
-	    tcache);
+	void *ret = arena_ralloc_move_helper(tsdn, arena, usize, alignment,
+	    zero, tcache);
 	if (ret == NULL) {
 		return NULL;
 	}
@@ -1717,7 +1715,7 @@ arena_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr,
 	 * ipalloc()/arena_malloc().
 	 */
 
-	copysize = (usize < oldsize) ? usize : oldsize;
+	size_t copysize = (usize < oldsize) ? usize : oldsize;
 	memcpy(ret, ptr, copysize);
 	isdalloct(tsdn, ptr, oldsize, tcache, true);
 	return ret;
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 1077ebf3..d8688bdd 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1589,8 +1589,7 @@ imalloc_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
 		if (unlikely(ret == NULL)) {
 			return NULL;
 		}
-		arena_prof_promote(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), ret),
-		    ret, usize);
+		arena_prof_promote(tsd_tsdn(tsd), ret, usize);
 	} else {
 		ret = imalloc_no_sample(sopts, dopts, tsd, usize, usize, ind);
 	}
@@ -1741,8 +1740,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts) {
 			goto label_oom;
 		}
 
-		prof_malloc(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), allocation),
-		    allocation, usize, tctx);
+		prof_malloc(tsd_tsdn(tsd), allocation, usize, tctx);
 
 	} else {
 		/*
@@ -1955,53 +1953,46 @@ je_calloc(size_t num, size_t size) {
 }
 
 static void *
-irealloc_prof_sample(tsd_t *tsd, extent_t *extent, void *old_ptr,
-    size_t old_usize, size_t usize, prof_tctx_t *tctx) {
+irealloc_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize,
+    prof_tctx_t *tctx) {
 	void *p;
 
 	if (tctx == NULL) {
 		return NULL;
 	}
 	if (usize <= SMALL_MAXCLASS) {
-		p = iralloc(tsd, extent, old_ptr, old_usize, LARGE_MINCLASS, 0,
-		    false);
+		p = iralloc(tsd, old_ptr, old_usize, LARGE_MINCLASS, 0, false);
 		if (p == NULL) {
 			return NULL;
 		}
-		arena_prof_promote(tsd_tsdn(tsd), iealloc(tsd_tsdn(tsd), p), p,
-		    usize);
+		arena_prof_promote(tsd_tsdn(tsd), p, usize);
 	} else {
-		p = iralloc(tsd, extent, old_ptr, old_usize, usize, 0, false);
+		p = iralloc(tsd, old_ptr, old_usize, usize, 0, false);
 	}
 
 	return p;
 }
 
 JEMALLOC_ALWAYS_INLINE_C void *
-irealloc_prof(tsd_t *tsd, extent_t *old_extent, void *old_ptr, size_t old_usize,
-    size_t usize) {
+irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize) {
 	void *p;
-	extent_t *extent;
 	bool prof_active;
 	prof_tctx_t *old_tctx, *tctx;
 
 	prof_active = prof_active_get_unlocked();
-	old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_extent, old_ptr);
+	old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_ptr);
 	tctx = prof_alloc_prep(tsd, usize, prof_active, true);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
-		p = irealloc_prof_sample(tsd, old_extent, old_ptr, old_usize,
-		    usize, tctx);
+		p = irealloc_prof_sample(tsd, old_ptr, old_usize, usize, tctx);
 	} else {
-		p = iralloc(tsd, old_extent, old_ptr, old_usize, usize, 0,
-		    false);
+		p = iralloc(tsd, old_ptr, old_usize, usize, 0, false);
 	}
 	if (unlikely(p == NULL)) {
 		prof_alloc_rollback(tsd, tctx, true);
 		return NULL;
 	}
-	extent = (p == old_ptr) ? old_extent : iealloc(tsd_tsdn(tsd), p);
-	prof_realloc(tsd, extent, p, usize, tctx, prof_active, true, old_extent,
-	    old_ptr, old_usize, old_tctx);
+	prof_realloc(tsd, p, usize, tctx, prof_active, true, old_ptr, old_usize,
+	    old_tctx);
 
 	return p;
 }
@@ -2016,8 +2007,7 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 	size_t usize;
 	if (config_prof && opt_prof) {
 		usize = isalloc(tsd_tsdn(tsd), ptr);
-		extent_t *extent = iealloc(tsd_tsdn(tsd), ptr);
-		prof_free(tsd, extent, ptr, usize);
+		prof_free(tsd, ptr, usize);
 	} else if (config_stats) {
 		usize = isalloc(tsd_tsdn(tsd), ptr);
 	}
@@ -2040,8 +2030,7 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	if (config_prof && opt_prof) {
-		extent_t *extent = iealloc(tsd_tsdn(tsd), ptr);
-		prof_free(tsd, extent, ptr, usize);
+		prof_free(tsd, ptr, usize);
 	}
 	if (config_stats) {
 		*tsd_thread_deallocatedp_get(tsd) += usize;
@@ -2077,27 +2066,21 @@ je_realloc(void *ptr, size_t size) {
 	}
 
 	if (likely(ptr != NULL)) {
-		tsd_t *tsd;
-		extent_t *extent;
-
 		assert(malloc_initialized() || IS_INITIALIZER);
-		tsd = tsd_fetch();
+		tsd_t *tsd = tsd_fetch();
 
 		witness_assert_lockless(tsd_tsdn(tsd));
 
-		extent = iealloc(tsd_tsdn(tsd), ptr);
 		old_usize = isalloc(tsd_tsdn(tsd), ptr);
 		if (config_prof && opt_prof) {
 			usize = s2u(size);
 			ret = unlikely(usize == 0 || usize > LARGE_MAXCLASS) ?
-			    NULL : irealloc_prof(tsd, extent, ptr, old_usize,
-			    usize);
+			    NULL : irealloc_prof(tsd, ptr, old_usize, usize);
 		} else {
 			if (config_stats) {
 				usize = s2u(size);
 			}
-			ret = iralloc(tsd, extent, ptr, old_usize, size, 0,
-			    false);
+			ret = iralloc(tsd, ptr, old_usize, size, 0, false);
 		}
 		tsdn = tsd_tsdn(tsd);
 	} else {
@@ -2314,47 +2297,46 @@ je_mallocx(size_t size, int flags) {
 }
 
 static void *
-irallocx_prof_sample(tsdn_t *tsdn, extent_t *extent, void *old_ptr,
-    size_t old_usize, size_t usize, size_t alignment, bool zero,
-    tcache_t *tcache, arena_t *arena, prof_tctx_t *tctx) {
+irallocx_prof_sample(tsdn_t *tsdn, void *old_ptr, size_t old_usize,
+    size_t usize, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena,
+    prof_tctx_t *tctx) {
 	void *p;
 
 	if (tctx == NULL) {
 		return NULL;
 	}
 	if (usize <= SMALL_MAXCLASS) {
-		p = iralloct(tsdn, extent, old_ptr, old_usize, LARGE_MINCLASS,
+		p = iralloct(tsdn, old_ptr, old_usize, LARGE_MINCLASS,
 		    alignment, zero, tcache, arena);
 		if (p == NULL) {
 			return NULL;
 		}
-		arena_prof_promote(tsdn, iealloc(tsdn, p), p, usize);
+		arena_prof_promote(tsdn, p, usize);
 	} else {
-		p = iralloct(tsdn, extent, old_ptr, old_usize, usize, alignment,
-		    zero, tcache, arena);
+		p = iralloct(tsdn, old_ptr, old_usize, usize, alignment, zero,
+		    tcache, arena);
 	}
 
 	return p;
 }
 
 JEMALLOC_ALWAYS_INLINE_C void *
-irallocx_prof(tsd_t *tsd, extent_t *old_extent, void *old_ptr, size_t old_usize,
-    size_t size, size_t alignment, size_t *usize, bool zero, tcache_t *tcache,
+irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
+    size_t alignment, size_t *usize, bool zero, tcache_t *tcache,
     arena_t *arena) {
 	void *p;
-	extent_t *extent;
 	bool prof_active;
 	prof_tctx_t *old_tctx, *tctx;
 
 	prof_active = prof_active_get_unlocked();
-	old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_extent, old_ptr);
+	old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_ptr);
 	tctx = prof_alloc_prep(tsd, *usize, prof_active, false);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
-		p = irallocx_prof_sample(tsd_tsdn(tsd), old_extent, old_ptr,
-		    old_usize, *usize, alignment, zero, tcache, arena, tctx);
+		p = irallocx_prof_sample(tsd_tsdn(tsd), old_ptr, old_usize,
+		    *usize, alignment, zero, tcache, arena, tctx);
 	} else {
-		p = iralloct(tsd_tsdn(tsd), old_extent, old_ptr, old_usize,
-		    size, alignment, zero, tcache, arena);
+		p = iralloct(tsd_tsdn(tsd), old_ptr, old_usize, size, alignment,
+		    zero, tcache, arena);
 	}
 	if (unlikely(p == NULL)) {
 		prof_alloc_rollback(tsd, tctx, false);
@@ -2370,13 +2352,10 @@ irallocx_prof(tsd_t *tsd, extent_t *old_extent, void *old_ptr, size_t old_usize,
 		 * be the same as the current usize because of in-place large
 		 * reallocation.  Therefore, query the actual value of usize.
 		 */
-		extent = old_extent;
 		*usize = isalloc(tsd_tsdn(tsd), p);
-	} else {
-		extent = iealloc(tsd_tsdn(tsd), p);
 	}
-	prof_realloc(tsd, extent, p, *usize, tctx, prof_active, false,
-	    old_extent, old_ptr, old_usize, old_tctx);
+	prof_realloc(tsd, p, *usize, tctx, prof_active, false, old_ptr,
+	    old_usize, old_tctx);
 
 	return p;
 }
@@ -2387,7 +2366,6 @@ JEMALLOC_ALLOC_SIZE(2)
 je_rallocx(void *ptr, size_t size, int flags) {
 	void *p;
 	tsd_t *tsd;
-	extent_t *extent;
 	size_t usize;
 	size_t old_usize;
 	size_t alignment = MALLOCX_ALIGN_GET(flags);
@@ -2400,7 +2378,6 @@ je_rallocx(void *ptr, size_t size, int flags) {
 	assert(malloc_initialized() || IS_INITIALIZER);
 	tsd = tsd_fetch();
 	witness_assert_lockless(tsd_tsdn(tsd));
-	extent = iealloc(tsd_tsdn(tsd), ptr);
 
 	if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) {
 		unsigned arena_ind = MALLOCX_ARENA_GET(flags);
@@ -2429,14 +2406,14 @@ je_rallocx(void *ptr, size_t size, int flags) {
 		if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
 			goto label_oom;
 		}
-		p = irallocx_prof(tsd, extent, ptr, old_usize, size, alignment,
-		    &usize, zero, tcache, arena);
+		p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize,
+		    zero, tcache, arena);
 		if (unlikely(p == NULL)) {
 			goto label_oom;
 		}
 	} else {
-		p = iralloct(tsd_tsdn(tsd), extent, ptr, old_usize, size,
-		    alignment, zero, tcache, arena);
+		p = iralloct(tsd_tsdn(tsd), ptr, old_usize, size, alignment,
+		    zero, tcache, arena);
 		if (unlikely(p == NULL)) {
 			goto label_oom;
 		}
@@ -2464,12 +2441,11 @@ label_oom:
 }
 
 JEMALLOC_ALWAYS_INLINE_C size_t
-ixallocx_helper(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t old_usize,
-    size_t size, size_t extra, size_t alignment, bool zero) {
+ixallocx_helper(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
+    size_t extra, size_t alignment, bool zero) {
 	size_t usize;
 
-	if (ixalloc(tsdn, extent, ptr, old_usize, size, extra, alignment,
-	    zero)) {
+	if (ixalloc(tsdn, ptr, old_usize, size, extra, alignment, zero)) {
 		return old_usize;
 	}
 	usize = isalloc(tsdn, ptr);
@@ -2478,29 +2454,28 @@ ixallocx_helper(tsdn_t *tsdn, extent_t *extent, void *ptr, size_t old_usize,
 }
 
 static size_t
-ixallocx_prof_sample(tsdn_t *tsdn, extent_t *extent, void *ptr,
-    size_t old_usize, size_t size, size_t extra, size_t alignment, bool zero,
-    prof_tctx_t *tctx) {
+ixallocx_prof_sample(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
+    size_t extra, size_t alignment, bool zero, prof_tctx_t *tctx) {
 	size_t usize;
 
 	if (tctx == NULL) {
 		return old_usize;
 	}
-	usize = ixallocx_helper(tsdn, extent, ptr, old_usize, size, extra,
-	    alignment, zero);
+	usize = ixallocx_helper(tsdn, ptr, old_usize, size, extra, alignment,
+	    zero);
 
 	return usize;
 }
 
 JEMALLOC_ALWAYS_INLINE_C size_t
-ixallocx_prof(tsd_t *tsd, extent_t *extent, void *ptr, size_t old_usize,
-    size_t size, size_t extra, size_t alignment, bool zero) {
+ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
+    size_t extra, size_t alignment, bool zero) {
 	size_t usize_max, usize;
 	bool prof_active;
 	prof_tctx_t *old_tctx, *tctx;
 
 	prof_active = prof_active_get_unlocked();
-	old_tctx = prof_tctx_get(tsd_tsdn(tsd), extent, ptr);
+	old_tctx = prof_tctx_get(tsd_tsdn(tsd), ptr);
 	/*
 	 * usize isn't knowable before ixalloc() returns when extra is non-zero.
 	 * Therefore, compute its maximum possible value and use that in
@@ -2525,18 +2500,18 @@ ixallocx_prof(tsd_t *tsd, extent_t *extent, void *ptr, size_t old_usize,
 	tctx = prof_alloc_prep(tsd, usize_max, prof_active, false);
 
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
-		usize = ixallocx_prof_sample(tsd_tsdn(tsd), extent, ptr,
-		    old_usize, size, extra, alignment, zero, tctx);
+		usize = ixallocx_prof_sample(tsd_tsdn(tsd), ptr, old_usize,
+		    size, extra, alignment, zero, tctx);
 	} else {
-		usize = ixallocx_helper(tsd_tsdn(tsd), extent, ptr, old_usize,
-		    size, extra, alignment, zero);
+		usize = ixallocx_helper(tsd_tsdn(tsd), ptr, old_usize, size,
+		    extra, alignment, zero);
 	}
 	if (usize == old_usize) {
 		prof_alloc_rollback(tsd, tctx, false);
 		return usize;
 	}
-	prof_realloc(tsd, extent, ptr, usize, tctx, prof_active, false, extent,
-	    ptr, old_usize, old_tctx);
+	prof_realloc(tsd, ptr, usize, tctx, prof_active, false, ptr, old_usize,
+	    old_tctx);
 
 	return usize;
 }
@@ -2544,7 +2519,6 @@ ixallocx_prof(tsd_t *tsd, extent_t *extent, void *ptr, size_t old_usize,
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
 je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	tsd_t *tsd;
-	extent_t *extent;
 	size_t usize, old_usize;
 	size_t alignment = MALLOCX_ALIGN_GET(flags);
 	bool zero = flags & MALLOCX_ZERO;
@@ -2555,7 +2529,6 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	assert(malloc_initialized() || IS_INITIALIZER);
 	tsd = tsd_fetch();
 	witness_assert_lockless(tsd_tsdn(tsd));
-	extent = iealloc(tsd_tsdn(tsd), ptr);
 
 	old_usize = isalloc(tsd_tsdn(tsd), ptr);
 
@@ -2577,11 +2550,11 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	}
 
 	if (config_prof && opt_prof) {
-		usize = ixallocx_prof(tsd, extent, ptr, old_usize, size, extra,
+		usize = ixallocx_prof(tsd, ptr, old_usize, size, extra,
 		    alignment, zero);
 	} else {
-		usize = ixallocx_helper(tsd_tsdn(tsd), extent, ptr, old_usize,
-		    size, extra, alignment, zero);
+		usize = ixallocx_helper(tsd_tsdn(tsd), ptr, old_usize, size,
+		    extra, alignment, zero);
 	}
 	if (unlikely(usize == old_usize)) {
 		goto label_not_resized;
diff --git a/src/prof.c b/src/prof.c
index be06555b..b04984b7 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -222,9 +222,9 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
 }
 
 void
-prof_malloc_sample_object(tsdn_t *tsdn, extent_t *extent, const void *ptr,
-    size_t usize, prof_tctx_t *tctx) {
-	prof_tctx_set(tsdn, extent, ptr, usize, tctx);
+prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
+    prof_tctx_t *tctx) {
+	prof_tctx_set(tsdn, ptr, usize, tctx);
 
 	malloc_mutex_lock(tsdn, tctx->tdata->lock);
 	tctx->cnts.curobjs++;
diff --git a/test/unit/prof_tctx.c b/test/unit/prof_tctx.c
index 14510c65..183f7ce0 100644
--- a/test/unit/prof_tctx.c
+++ b/test/unit/prof_tctx.c
@@ -4,7 +4,6 @@ TEST_BEGIN(test_prof_realloc) {
 	tsdn_t *tsdn;
 	int flags;
 	void *p, *q;
-	extent_t *extent_p, *extent_q;
 	prof_tctx_t *tctx_p, *tctx_q;
 	uint64_t curobjs_0, curobjs_1, curobjs_2, curobjs_3;
 
@@ -16,9 +15,7 @@ TEST_BEGIN(test_prof_realloc) {
 	prof_cnt_all(&curobjs_0, NULL, NULL, NULL);
 	p = mallocx(1024, flags);
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
-	extent_p = iealloc(tsdn, p);
-	assert_ptr_not_null(extent_p, "Unexpected iealloc() failure");
-	tctx_p = prof_tctx_get(tsdn, extent_p, p);
+	tctx_p = prof_tctx_get(tsdn, p);
 	assert_ptr_ne(tctx_p, (prof_tctx_t *)(uintptr_t)1U,
 	    "Expected valid tctx");
 	prof_cnt_all(&curobjs_1, NULL, NULL, NULL);
@@ -28,9 +25,7 @@ TEST_BEGIN(test_prof_realloc) {
 	q = rallocx(p, 2048, flags);
 	assert_ptr_ne(p, q, "Expected move");
 	assert_ptr_not_null(p, "Unexpected rmallocx() failure");
-	extent_q = iealloc(tsdn, q);
-	assert_ptr_not_null(extent_q, "Unexpected iealloc() failure");
-	tctx_q = prof_tctx_get(tsdn, extent_q, q);
+	tctx_q = prof_tctx_get(tsdn, q);
 	assert_ptr_ne(tctx_q, (prof_tctx_t *)(uintptr_t)1U,
 	    "Expected valid tctx");
 	prof_cnt_all(&curobjs_2, NULL, NULL, NULL);

From 32e7cf51cd879e4f2b0307bba544f913e2d77a7e Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 22 Mar 2017 11:00:40 -0700
Subject: [PATCH 0730/2608] Further specialize arena_[s]dalloc() tcache fast
 path.

Use tsd_rtree_ctx() rather than tsdn_rtree_ctx() when tcache is
non-NULL, in order to avoid an extra branch (and potentially extra stack
space) in the fast path.
---
 include/jemalloc/internal/arena_inlines_b.h   | 145 +++++++++++++-----
 include/jemalloc/internal/private_symbols.txt |   3 +
 include/jemalloc/internal/tsd_inlines.h       |  30 ++--
 3 files changed, 131 insertions(+), 47 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 3c48ce4f..ea69a688 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -2,20 +2,22 @@
 #define JEMALLOC_INTERNAL_ARENA_INLINES_B_H
 
 #ifndef JEMALLOC_ENABLE_INLINE
-szind_t	arena_bin_index(arena_t *arena, arena_bin_t *bin);
-prof_tctx_t	*arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr);
-void	arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
+szind_t arena_bin_index(arena_t *arena, arena_bin_t *bin);
+prof_tctx_t *arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr);
+void arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
     prof_tctx_t *tctx);
-void	arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx);
-void	arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks);
-void	arena_decay_tick(tsdn_t *tsdn, arena_t *arena);
-void	*arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
+void arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx);
+void arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks);
+void arena_decay_tick(tsdn_t *tsdn, arena_t *arena);
+void *arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
     bool zero, tcache_t *tcache, bool slow_path);
-arena_t	*arena_aalloc(tsdn_t *tsdn, const void *ptr);
+arena_t *arena_aalloc(tsdn_t *tsdn, const void *ptr);
 size_t arena_salloc(tsdn_t *tsdn, const void *ptr);
 size_t arena_vsalloc(tsdn_t *tsdn, const void *ptr);
-void	arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path);
-void	arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
+void arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr);
+void arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path);
+void arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size);
+void arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
     bool slow_path);
 #endif
 
@@ -162,9 +164,8 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
 	return index2size(szind);
 }
 
-JEMALLOC_ALWAYS_INLINE void
-arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path) {
-	assert(!tsdn_null(tsdn) || tcache == NULL);
+JEMALLOC_INLINE void
+arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
 	assert(ptr != NULL);
 
 	rtree_ctx_t rtree_ctx_fallback;
@@ -179,25 +180,55 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path) {
 		extent_t *extent = rtree_extent_read(tsdn, &extents_rtree,
 		    rtree_ctx, (uintptr_t)ptr, true);
 		assert(szind == extent_szind_get(extent));
+		assert(szind < NSIZES);
 		assert(slab == extent_slab_get(extent));
 	}
 
 	if (likely(slab)) {
 		/* Small allocation. */
-		if (likely(tcache != NULL)) {
-			tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind,
-			    slow_path);
-		} else {
-			arena_dalloc_small(tsdn, ptr);
-		}
+		arena_dalloc_small(tsdn, ptr);
 	} else {
-		if (likely(tcache != NULL) && szind < nhbins) {
+		extent_t *extent = iealloc(tsdn, ptr);
+		large_dalloc(tsdn, extent);
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path) {
+	assert(!tsdn_null(tsdn) || tcache == NULL);
+	assert(ptr != NULL);
+
+	if (unlikely(tcache == NULL)) {
+		arena_dalloc_no_tcache(tsdn, ptr);
+		return;
+	}
+
+	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn));
+	szind_t szind;
+	bool slab;
+	rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr,
+	    true, &szind, &slab);
+
+	if (config_debug) {
+		extent_t *extent = rtree_extent_read(tsdn, &extents_rtree,
+		    rtree_ctx, (uintptr_t)ptr, true);
+		assert(szind == extent_szind_get(extent));
+		assert(szind < NSIZES);
+		assert(slab == extent_slab_get(extent));
+	}
+
+	if (likely(slab)) {
+		/* Small allocation. */
+		tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind,
+		    slow_path);
+	} else {
+		if (szind < nhbins) {
 			if (config_prof && unlikely(szind < NBINS)) {
 				arena_dalloc_promoted(tsdn, ptr, tcache,
 				    slow_path);
 			} else {
-				tcache_dalloc_large(tsdn_tsd(tsdn), tcache,
-				    ptr, szind, slow_path);
+				tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
+				    szind, slow_path);
 			}
 		} else {
 			extent_t *extent = iealloc(tsdn, ptr);
@@ -206,11 +237,10 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path) {
 	}
 }
 
-JEMALLOC_ALWAYS_INLINE void
-arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
-    bool slow_path) {
-	assert(!tsdn_null(tsdn) || tcache == NULL);
+JEMALLOC_INLINE void
+arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
 	assert(ptr != NULL);
+	assert(size <= LARGE_MAXCLASS);
 
 	szind_t szind;
 	bool slab;
@@ -244,20 +274,65 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 
 	if (likely(slab)) {
 		/* Small allocation. */
-		if (likely(tcache != NULL)) {
-			tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind,
-			    slow_path);
-		} else {
-			arena_dalloc_small(tsdn, ptr);
-		}
+		arena_dalloc_small(tsdn, ptr);
 	} else {
-		if (likely(tcache != NULL) && szind < nhbins) {
+		extent_t *extent = iealloc(tsdn, ptr);
+		large_dalloc(tsdn, extent);
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
+    bool slow_path) {
+	assert(!tsdn_null(tsdn) || tcache == NULL);
+	assert(ptr != NULL);
+	assert(size <= LARGE_MAXCLASS);
+
+	if (unlikely(tcache == NULL)) {
+		arena_sdalloc_no_tcache(tsdn, ptr, size);
+		return;
+	}
+
+	szind_t szind;
+	bool slab;
+	if (!config_prof || !opt_prof) {
+		/*
+		 * There is no risk of being confused by a promoted sampled
+		 * object, so base szind and slab on the given size.
+		 */
+		szind = size2index(size);
+		slab = (szind < NBINS);
+	}
+
+	if ((config_prof && opt_prof) || config_debug) {
+		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn));
+
+		rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx,
+		    (uintptr_t)ptr, true, &szind, &slab);
+
+		assert(szind == size2index(size));
+		assert((config_prof && opt_prof) || slab == (szind < NBINS));
+
+		if (config_debug) {
+			extent_t *extent = rtree_extent_read(tsdn,
+			    &extents_rtree, rtree_ctx, (uintptr_t)ptr, true);
+			assert(szind == extent_szind_get(extent));
+			assert(slab == extent_slab_get(extent));
+		}
+	}
+
+	if (likely(slab)) {
+		/* Small allocation. */
+		tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind,
+		    slow_path);
+	} else {
+		if (szind < nhbins) {
 			if (config_prof && unlikely(szind < NBINS)) {
 				arena_dalloc_promoted(tsdn, ptr, tcache,
 				    slow_path);
 			} else {
-				tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
-				    szind, slow_path);
+				tcache_dalloc_large(tsdn_tsd(tsdn),
+				    tcache, ptr, szind, slow_path);
 			}
 		} else {
 			extent_t *extent = iealloc(tsdn, ptr);
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 35c7028b..e138de0c 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -13,6 +13,7 @@ arena_cleanup
 arena_dalloc
 arena_dalloc_bin_junked_locked
 arena_dalloc_junk_small
+arena_dalloc_no_tcache
 arena_dalloc_promoted
 arena_dalloc_small
 arena_decay
@@ -69,6 +70,7 @@ arena_ralloc_no_move
 arena_reset
 arena_salloc
 arena_sdalloc
+arena_sdalloc_no_tcache
 arena_set
 arena_slab_regind
 arena_stats_init
@@ -528,6 +530,7 @@ tsd_nominal
 tsd_prof_tdata_get
 tsd_prof_tdata_set
 tsd_prof_tdatap_get
+tsd_rtree_ctx
 tsd_rtree_ctxp_get
 tsd_rtree_leaf_elm_witnessesp_get
 tsd_set
diff --git a/include/jemalloc/internal/tsd_inlines.h b/include/jemalloc/internal/tsd_inlines.h
index 1457c03e..96de4063 100644
--- a/include/jemalloc/internal/tsd_inlines.h
+++ b/include/jemalloc/internal/tsd_inlines.h
@@ -4,20 +4,21 @@
 #ifndef JEMALLOC_ENABLE_INLINE
 malloc_tsd_protos(JEMALLOC_ATTR(unused), , tsd_t)
 
-tsd_t	*tsd_fetch_impl(bool init);
-tsd_t	*tsd_fetch(void);
-tsdn_t	*tsd_tsdn(tsd_t *tsd);
-bool	tsd_nominal(tsd_t *tsd);
+tsd_t *tsd_fetch_impl(bool init);
+tsd_t *tsd_fetch(void);
+tsdn_t *tsd_tsdn(tsd_t *tsd);
+bool tsd_nominal(tsd_t *tsd);
 #define O(n, t, gs, c)							\
-t	*tsd_##n##p_get(tsd_t *tsd);					\
-t	tsd_##n##_get(tsd_t *tsd);					\
-void	tsd_##n##_set(tsd_t *tsd, t n);
+t *tsd_##n##p_get(tsd_t *tsd);						\
+t tsd_##n##_get(tsd_t *tsd);						\
+void tsd_##n##_set(tsd_t *tsd, t n);
 MALLOC_TSD
 #undef O
-tsdn_t	*tsdn_fetch(void);
-bool	tsdn_null(const tsdn_t *tsdn);
-tsd_t	*tsdn_tsd(tsdn_t *tsdn);
-rtree_ctx_t	*tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback);
+tsdn_t *tsdn_fetch(void);
+bool tsdn_null(const tsdn_t *tsdn);
+tsd_t *tsdn_tsd(tsdn_t *tsdn);
+rtree_ctx_t *tsd_rtree_ctx(tsd_t *tsd);
+rtree_ctx_t *tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TSD_C_))
@@ -108,6 +109,11 @@ tsdn_tsd(tsdn_t *tsdn) {
 	return &tsdn->tsd;
 }
 
+JEMALLOC_ALWAYS_INLINE rtree_ctx_t *
+tsd_rtree_ctx(tsd_t *tsd) {
+	return tsd_rtree_ctxp_get(tsd);
+}
+
 JEMALLOC_ALWAYS_INLINE rtree_ctx_t *
 tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback) {
 	/*
@@ -119,7 +125,7 @@ tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback) {
 		memcpy(fallback, &rtree_ctx, sizeof(rtree_ctx_t));
 		return fallback;
 	}
-	return tsd_rtree_ctxp_get(tsdn_tsd(tsdn));
+	return tsd_rtree_ctx(tsdn_tsd(tsdn));
 }
 #endif
 

From 6309df628fa4f11dce084dc53c77ea852408d347 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 23 Feb 2017 14:18:07 -0800
Subject: [PATCH 0731/2608] First stage of mutex profiling.

Switched to trylock and update counters based on state.
---
 include/jemalloc/internal/mutex_inlines.h | 51 +++++++++++------------
 include/jemalloc/internal/mutex_structs.h | 43 ++++++++++++++++++-
 include/jemalloc/internal/mutex_types.h   | 42 ++++++++++++++++---
 include/jemalloc/internal/nstime_types.h  |  2 +
 src/mutex.c                               | 43 +++++++++++++++++++
 5 files changed, 149 insertions(+), 32 deletions(-)

diff --git a/include/jemalloc/internal/mutex_inlines.h b/include/jemalloc/internal/mutex_inlines.h
index c0c3cfe9..cf0ce23a 100644
--- a/include/jemalloc/internal/mutex_inlines.h
+++ b/include/jemalloc/internal/mutex_inlines.h
@@ -1,31 +1,42 @@
 #ifndef JEMALLOC_INTERNAL_MUTEX_INLINES_H
 #define JEMALLOC_INTERNAL_MUTEX_INLINES_H
 
+void	malloc_mutex_lock_slow(malloc_mutex_t *mutex);
+
 #ifndef JEMALLOC_ENABLE_INLINE
 void	malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex);
+bool	malloc_mutex_trylock(malloc_mutex_t *mutex);
 void	malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex);
 void	malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex);
 void	malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_))
+JEMALLOC_INLINE void
+malloc_mutex_lock_final(malloc_mutex_t *mutex) {
+	MALLOC_MUTEX_LOCK(mutex);
+}
+
+/* Trylock: return false if the lock is successfully acquired. */
+JEMALLOC_INLINE bool
+malloc_mutex_trylock(malloc_mutex_t *mutex) {
+	return MALLOC_MUTEX_TRYLOCK(mutex);
+}
+
 JEMALLOC_INLINE void
 malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 	witness_assert_not_owner(tsdn, &mutex->witness);
 	if (isthreaded) {
-#ifdef _WIN32
-#  if _WIN32_WINNT >= 0x0600
-		AcquireSRWLockExclusive(&mutex->lock);
-#  else
-		EnterCriticalSection(&mutex->lock);
-#  endif
-#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
-		os_unfair_lock_lock(&mutex->lock);
-#elif (defined(JEMALLOC_OSSPIN))
-		OSSpinLockLock(&mutex->lock);
-#else
-		pthread_mutex_lock(&mutex->lock);
-#endif
+		if (malloc_mutex_trylock(mutex)) {
+			malloc_mutex_lock_slow(mutex);
+		}
+		/* We own the lock now.  Update a few counters. */
+		lock_prof_data_t *data = &mutex->prof_data;
+		data->n_lock_ops++;
+		if (data->prev_owner != tsdn) {
+			data->prev_owner = tsdn;
+			data->n_owner_switches++;
+		}
 	}
 	witness_lock(tsdn, &mutex->witness);
 }
@@ -34,19 +45,7 @@ JEMALLOC_INLINE void
 malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 	witness_unlock(tsdn, &mutex->witness);
 	if (isthreaded) {
-#ifdef _WIN32
-#  if _WIN32_WINNT >= 0x0600
-		ReleaseSRWLockExclusive(&mutex->lock);
-#  else
-		LeaveCriticalSection(&mutex->lock);
-#  endif
-#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
-		os_unfair_lock_unlock(&mutex->lock);
-#elif (defined(JEMALLOC_OSSPIN))
-		OSSpinLockUnlock(&mutex->lock);
-#else
-		pthread_mutex_unlock(&mutex->lock);
-#endif
+		MALLOC_MUTEX_UNLOCK(mutex);
 	}
 }
 
diff --git a/include/jemalloc/internal/mutex_structs.h b/include/jemalloc/internal/mutex_structs.h
index c34c1d47..7065c997 100644
--- a/include/jemalloc/internal/mutex_structs.h
+++ b/include/jemalloc/internal/mutex_structs.h
@@ -1,9 +1,50 @@
 #ifndef JEMALLOC_INTERNAL_MUTEX_STRUCTS_H
 #define JEMALLOC_INTERNAL_MUTEX_STRUCTS_H
 
+struct lock_prof_data_s {
+	/*
+	 * Counters touched on the slow path, i.e. when there is lock
+	 * contention.  We update them once we have the lock.
+	 */
+	/* Total time spent waiting on this lock. */
+	nstime_t		tot_wait_time;
+	/* Max time spent on a single lock operation. */
+	nstime_t		max_wait_time;
+	/* # of times have to wait for this lock (after spinning). */
+	uint64_t		n_wait_times;
+	/* # of times acquired the lock through local spinning. */
+	uint64_t		n_spin_acquired;
+	/* Max # of threads waiting for the lock at the same time. */
+	uint32_t		max_n_thds;
+	/* Current # of threads waiting on the lock.  Atomic synced. */
+	uint32_t		n_waiting_thds;
+
+	/*
+	 * Data touched on the fast path.  These are modified right after we
+	 * grab the lock, so it's placed closest to the end (i.e. right before
+	 * the lock) so that we have a higher chance of them being on the same
+	 * cacheline.
+	 */
+	/* # of times the new lock holder is different from the previous one. */
+	uint64_t		n_owner_switches;
+	/* Previous lock holder, to facilitate n_owner_switches. */
+	tsdn_t			*prev_owner;
+	/* # of lock() operations in total. */
+	uint64_t		n_lock_ops;
+};
+
 struct malloc_mutex_s {
 	union {
 		struct {
+			/*
+			 * prof_data is defined first to reduce cacheline
+			 * bouncing: the data is not touched by the lock holder
+			 * during unlocking, while might be modified by
+			 * contenders.  Having it before the lock itself could
+			 * avoid prefetching a modified cacheline (for the
+			 * unlocking thread).
+			 */
+			lock_prof_data_t	prof_data;
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 			SRWLOCK         	lock;
@@ -22,7 +63,7 @@ struct malloc_mutex_s {
 #endif
 		};
 		/*
-		 * We only touch witness when configured w/ debug. However we
+		 * We only touch witness when configured w/ debug.  However we
 		 * keep the field in a union when !debug so that we don't have
 		 * to pollute the code base with #ifdefs, while avoid paying the
 		 * memory cost.
diff --git a/include/jemalloc/internal/mutex_types.h b/include/jemalloc/internal/mutex_types.h
index b7e3a7a1..d7c7f04f 100644
--- a/include/jemalloc/internal/mutex_types.h
+++ b/include/jemalloc/internal/mutex_types.h
@@ -1,31 +1,63 @@
 #ifndef JEMALLOC_INTERNAL_MUTEX_TYPES_H
 #define JEMALLOC_INTERNAL_MUTEX_TYPES_H
 
+typedef struct lock_prof_data_s lock_prof_data_t;
 typedef struct malloc_mutex_s malloc_mutex_t;
 
+#ifdef _WIN32
+#  if _WIN32_WINNT >= 0x0600
+#    define MALLOC_MUTEX_LOCK(m)    AcquireSRWLockExclusive(&(m)->lock)
+#    define MALLOC_MUTEX_UNLOCK(m)  ReleaseSRWLockExclusive(&(m)->lock)
+#    define MALLOC_MUTEX_TRYLOCK(m) (!TryAcquireSRWLockExclusive(&(m)->lock))
+#  else
+#    define MALLOC_MUTEX_LOCK(m)    EnterCriticalSection(&(m)->lock)
+#    define MALLOC_MUTEX_UNLOCK(m)  LeaveCriticalSection(&(m)->lock)
+#    define MALLOC_MUTEX_TRYLOCK(m) (!TryEnterCriticalSection(&(m)->lock))
+#  endif
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+#    define MALLOC_MUTEX_LOCK(m)    os_unfair_lock_lock(&(m)->lock)
+#    define MALLOC_MUTEX_UNLOCK(m)  os_unfair_lock_unlock(&(m)->lock)
+#    define MALLOC_MUTEX_TRYLOCK(m) (!os_unfair_lock_trylock(&(m)->lock))
+#elif (defined(JEMALLOC_OSSPIN))
+#    define MALLOC_MUTEX_LOCK(m)    OSSpinLockLock(&(m)->lock)
+#    define MALLOC_MUTEX_UNLOCK(m)  OSSpinLockUnlock(&(m)->lock)
+#    define MALLOC_MUTEX_TRYLOCK(m) (!OSSpinLockTry(&(m)->lock))
+#else
+#    define MALLOC_MUTEX_LOCK(m)    pthread_mutex_lock(&(m)->lock)
+#    define MALLOC_MUTEX_UNLOCK(m)  pthread_mutex_unlock(&(m)->lock)
+#    define MALLOC_MUTEX_TRYLOCK(m) (pthread_mutex_trylock(&(m)->lock) != 0)
+#endif
+
+#define LOCK_PROF_DATA_INITIALIZER					\
+    {NSTIME_ZERO_INITIALIZER, NSTIME_ZERO_INITIALIZER, 0, 0, 0, 0, 0, NULL, 0}
+
 #ifdef _WIN32
 #  define MALLOC_MUTEX_INITIALIZER
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
 #  define MALLOC_MUTEX_INITIALIZER					\
-     {{{OS_UNFAIR_LOCK_INIT}}, WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
+     {{{LOCK_PROF_DATA_INITIALIZER, OS_UNFAIR_LOCK_INIT}},		\
+      WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
 #elif (defined(JEMALLOC_OSSPIN))
 #  define MALLOC_MUTEX_INITIALIZER					\
-     {{{0}}, WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
+     {{{LOCK_PROF_DATA_INITIALIZER, 0}},				\
+      WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
 #  define MALLOC_MUTEX_INITIALIZER					\
-     {{{PTHREAD_MUTEX_INITIALIZER, NULL}},				\
+     {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, NULL}},	\
       WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
 #else
+/* TODO: get rid of adaptive mutex once we do our own spin. */
 #  if (defined(JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP) &&		\
        defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP))
 #    define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_ADAPTIVE_NP
 #    define MALLOC_MUTEX_INITIALIZER					\
-       {{{PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP}},			\
+       {{{LOCK_PROF_DATA_INITIALIZER,					\
+          PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP}},			\
         WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
 #  else
 #    define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT
 #    define MALLOC_MUTEX_INITIALIZER					\
-       {{{PTHREAD_MUTEX_INITIALIZER}},					\
+       {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER}},	\
         WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
 #  endif
 #endif
diff --git a/include/jemalloc/internal/nstime_types.h b/include/jemalloc/internal/nstime_types.h
index d6039e03..6e7e74cf 100644
--- a/include/jemalloc/internal/nstime_types.h
+++ b/include/jemalloc/internal/nstime_types.h
@@ -6,4 +6,6 @@ typedef struct nstime_s nstime_t;
 /* Maximum supported number of seconds (~584 years). */
 #define NSTIME_SEC_MAX	KQU(18446744072)
 
+#define NSTIME_ZERO_INITIALIZER {0}
+
 #endif /* JEMALLOC_INTERNAL_NSTIME_TYPES_H */
diff --git a/src/mutex.c b/src/mutex.c
index f1aa155e..2b80a9d9 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -65,6 +65,49 @@ JEMALLOC_EXPORT int	_pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
     void *(calloc_cb)(size_t, size_t));
 #endif
 
+void
+malloc_mutex_lock_slow(malloc_mutex_t *mutex) {
+	lock_prof_data_t *data = &mutex->prof_data;
+	bool spin_success = false;
+
+	{//TODO: a smart spin policy
+		if (!malloc_mutex_trylock(mutex)) {
+			spin_success = true;
+			goto label_locked;
+		}
+	}
+
+	nstime_t now, before;
+	uint32_t n_thds;
+	nstime_init(&now, 0);
+	nstime_update(&now);
+	n_thds = atomic_add_u32(&data->n_waiting_thds, 1);
+	/* One last try as above two calls may take quite some cycles. */
+	if (!malloc_mutex_trylock(mutex)) {
+		spin_success = true;
+		atomic_sub_u32(&data->n_waiting_thds, 1);
+		goto label_locked;
+	}
+	nstime_copy(&before, &now);
+	malloc_mutex_lock_final(mutex);
+	atomic_sub_u32(&data->n_waiting_thds, 1);
+	nstime_update(&now);
+	nstime_subtract(&now, &before);
+label_locked:
+	if (spin_success) {
+		data->n_spin_acquired++;
+	} else {
+		data->n_wait_times++;
+		nstime_add(&data->tot_wait_time, &now);
+		if (nstime_compare(&now, &data->max_wait_time)) {
+			nstime_copy(&data->max_wait_time, &now);
+		}
+		if (n_thds > data->max_n_thds) {
+			data->max_n_thds = n_thds;
+		}
+	}
+}
+
 bool
 malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
     witness_rank_t rank) {

From a4f176af57de77d62b4751af876512748c6ce800 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 3 Mar 2017 19:58:43 -0800
Subject: [PATCH 0732/2608] Output bin lock profiling results to malloc_stats.

Two counters are included for the small bins: lock contention rate, and
max lock waiting time.
---
 include/jemalloc/internal/mutex_inlines.h     | 35 +++++++
 include/jemalloc/internal/nstime_externs.h    |  1 +
 include/jemalloc/internal/private_symbols.txt |  2 +
 include/jemalloc/internal/stats_structs.h     |  2 +
 src/arena.c                                   |  1 +
 src/ctl.c                                     |  8 +-
 src/nstime.c                                  |  6 ++
 src/stats.c                                   | 99 ++++++++++++-------
 8 files changed, 120 insertions(+), 34 deletions(-)

diff --git a/include/jemalloc/internal/mutex_inlines.h b/include/jemalloc/internal/mutex_inlines.h
index cf0ce23a..8e81fcde 100644
--- a/include/jemalloc/internal/mutex_inlines.h
+++ b/include/jemalloc/internal/mutex_inlines.h
@@ -9,6 +9,9 @@ bool	malloc_mutex_trylock(malloc_mutex_t *mutex);
 void	malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex);
 void	malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex);
 void	malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void	malloc_lock_prof_read(tsdn_t *tsdn, lock_prof_data_t *data,
+    malloc_mutex_t *mutex);
+void	malloc_lock_prof_merge(lock_prof_data_t *sum, lock_prof_data_t *data);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_))
@@ -23,6 +26,24 @@ malloc_mutex_trylock(malloc_mutex_t *mutex) {
 	return MALLOC_MUTEX_TRYLOCK(mutex);
 }
 
+/* Aggregate lock prof data. */
+JEMALLOC_INLINE void
+malloc_lock_prof_merge(lock_prof_data_t *sum, lock_prof_data_t *data) {
+	nstime_add(&sum->tot_wait_time, &data->tot_wait_time);
+	if (nstime_compare(&data->max_wait_time, &sum->max_wait_time)) {
+		nstime_copy(&sum->max_wait_time, &data->max_wait_time);
+	}
+	sum->n_wait_times += data->n_wait_times;
+	sum->n_spin_acquired += data->n_spin_acquired;
+
+	if (sum->max_n_thds < data->max_n_thds) {
+		sum->max_n_thds = data->max_n_thds;
+	}
+	sum->n_waiting_thds += data->n_waiting_thds;
+	sum->n_owner_switches += data->n_owner_switches;
+	sum->n_lock_ops += data->n_lock_ops;
+}
+
 JEMALLOC_INLINE void
 malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 	witness_assert_not_owner(tsdn, &mutex->witness);
@@ -58,6 +79,20 @@ JEMALLOC_INLINE void
 malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 	witness_assert_not_owner(tsdn, &mutex->witness);
 }
+
+/* Copy the prof data from mutex for processing. */
+JEMALLOC_INLINE void
+malloc_lock_prof_read(tsdn_t *tsdn, lock_prof_data_t *data,
+    malloc_mutex_t *mutex) {
+	lock_prof_data_t *source = &mutex->prof_data;
+	/* Can only read with the lock. */
+	malloc_mutex_assert_owner(tsdn, mutex);
+
+	*data = *source;
+	/* n_wait_thds is not reported (modified w/o locking). */
+	data->n_waiting_thds = 0;
+}
+
 #endif
 
 #endif /* JEMALLOC_INTERNAL_MUTEX_INLINES_H */
diff --git a/include/jemalloc/internal/nstime_externs.h b/include/jemalloc/internal/nstime_externs.h
index cf14ae0c..1abc84d9 100644
--- a/include/jemalloc/internal/nstime_externs.h
+++ b/include/jemalloc/internal/nstime_externs.h
@@ -5,6 +5,7 @@ void	nstime_init(nstime_t *time, uint64_t ns);
 void	nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec);
 uint64_t	nstime_ns(const nstime_t *time);
 uint64_t	nstime_sec(const nstime_t *time);
+uint64_t	nstime_msec(const nstime_t *time);
 uint64_t	nstime_nsec(const nstime_t *time);
 void	nstime_copy(nstime_t *time, const nstime_t *source);
 int	nstime_compare(const nstime_t *a, const nstime_t *b);
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index e138de0c..14ecdeda 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -273,6 +273,7 @@ malloc_mutex_assert_owner
 malloc_mutex_boot
 malloc_mutex_init
 malloc_mutex_lock
+malloc_mutex_lock_slow
 malloc_mutex_postfork_child
 malloc_mutex_postfork_parent
 malloc_mutex_prefork
@@ -302,6 +303,7 @@ nstime_imultiply
 nstime_init
 nstime_init2
 nstime_monotonic
+nstime_msec
 nstime_ns
 nstime_nsec
 nstime_sec
diff --git a/include/jemalloc/internal/stats_structs.h b/include/jemalloc/internal/stats_structs.h
index ffcb3c18..cbc448a3 100644
--- a/include/jemalloc/internal/stats_structs.h
+++ b/include/jemalloc/internal/stats_structs.h
@@ -56,6 +56,8 @@ struct malloc_bin_stats_s {
 
 	/* Current number of slabs in this bin. */
 	size_t		curslabs;
+
+	lock_prof_data_t lock_data;
 };
 
 struct malloc_large_stats_s {
diff --git a/src/arena.c b/src/arena.c
index ef7ec37b..832a125e 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -299,6 +299,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		arena_bin_t *bin = &arena->bins[i];
 
 		malloc_mutex_lock(tsdn, &bin->lock);
+		malloc_lock_prof_read(tsdn, &bstats[i].lock_data, &bin->lock);
 		bstats[i].nmalloc += bin->stats.nmalloc;
 		bstats[i].ndalloc += bin->stats.ndalloc;
 		bstats[i].nrequests += bin->stats.nrequests;
diff --git a/src/ctl.c b/src/ctl.c
index ee69be6d..d9f8be60 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -145,6 +145,7 @@ CTL_PROTO(stats_arenas_i_bins_j_nflushes)
 CTL_PROTO(stats_arenas_i_bins_j_nslabs)
 CTL_PROTO(stats_arenas_i_bins_j_nreslabs)
 CTL_PROTO(stats_arenas_i_bins_j_curslabs)
+CTL_PROTO(stats_arenas_i_bins_j_lock_data)
 INDEX_PROTO(stats_arenas_i_bins_j)
 CTL_PROTO(stats_arenas_i_lextents_j_nmalloc)
 CTL_PROTO(stats_arenas_i_lextents_j_ndalloc)
@@ -357,7 +358,8 @@ static const ctl_named_node_t stats_arenas_i_bins_j_node[] = {
 	{NAME("nflushes"),	CTL(stats_arenas_i_bins_j_nflushes)},
 	{NAME("nslabs"),	CTL(stats_arenas_i_bins_j_nslabs)},
 	{NAME("nreslabs"),	CTL(stats_arenas_i_bins_j_nreslabs)},
-	{NAME("curslabs"),	CTL(stats_arenas_i_bins_j_curslabs)}
+	{NAME("curslabs"),	CTL(stats_arenas_i_bins_j_curslabs)},
+	{NAME("lock_data"),	CTL(stats_arenas_i_bins_j_lock_data)}
 };
 static const ctl_named_node_t super_stats_arenas_i_bins_j_node[] = {
 	{NAME(""),		CHILD(named, stats_arenas_i_bins_j)}
@@ -726,6 +728,8 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 			} else {
 				assert(astats->bstats[i].curslabs == 0);
 			}
+			malloc_lock_prof_merge(&sdstats->bstats[i].lock_data,
+			    &astats->bstats[i].lock_data);
 		}
 
 		for (i = 0; i < NSIZES - NBINS; i++) {
@@ -2333,6 +2337,8 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nreslabs,
     arenas_i(mib[2])->astats->bstats[mib[4]].reslabs, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curslabs,
     arenas_i(mib[2])->astats->bstats[mib[4]].curslabs, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_lock_data,
+    arenas_i(mib[2])->astats->bstats[mib[4]].lock_data, lock_prof_data_t)
 
 static const ctl_named_node_t *
 stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
diff --git a/src/nstime.c b/src/nstime.c
index a3f6c1de..70b2f9d8 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -1,6 +1,7 @@
 #include "jemalloc/internal/jemalloc_internal.h"
 
 #define BILLION	UINT64_C(1000000000)
+#define MILLION	UINT64_C(1000000)
 
 void
 nstime_init(nstime_t *time, uint64_t ns) {
@@ -17,6 +18,11 @@ nstime_ns(const nstime_t *time) {
 	return time->ns;
 }
 
+uint64_t
+nstime_msec(const nstime_t *time) {
+	return time->ns / MILLION;
+}
+
 uint64_t
 nstime_sec(const nstime_t *time) {
 	return time->ns / BILLION;
diff --git a/src/stats.c b/src/stats.c
index 58b9a04f..58283f87 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -32,6 +32,31 @@ bool	opt_stats_print = false;
 
 /******************************************************************************/
 
+/* Calculate x.yyy and output a string (takes a fixed sized char array). */
+static bool
+get_rate_str(uint64_t dividend, uint64_t divisor, char str[6]) {
+	if (divisor == 0 || dividend > divisor) {
+		/* The rate is not supposed to be greater than 1. */
+		return true;
+	}
+	if (dividend > 0) {
+		assert(UINT64_MAX / dividend >= 1000);
+	}
+
+	unsigned n = (unsigned)((dividend * 1000) / divisor);
+	if (n < 10) {
+		malloc_snprintf(str, 6, "0.00%u", n);
+	} else if (n < 100) {
+		malloc_snprintf(str, 6, "0.0%u", n);
+	} else if (n < 1000) {
+		malloc_snprintf(str, 6, "0.%u", n);
+	} else {
+		malloc_snprintf(str, 6, "1");
+	}
+
+	return false;
+}
+
 static void
 stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
     bool json, bool large, unsigned i) {
@@ -51,13 +76,14 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			    "bins:           size ind    allocated      nmalloc"
 			    "      ndalloc    nrequests      curregs"
 			    "     curslabs regs pgs  util       nfills"
-			    "     nflushes     newslabs      reslabs\n");
+			    "     nflushes     newslabs      reslabs"
+			    "   contention     max_wait\n");
 		} else {
 			malloc_cprintf(write_cb, cbopaque,
 			    "bins:           size ind    allocated      nmalloc"
 			    "      ndalloc    nrequests      curregs"
 			    "     curslabs regs pgs  util     newslabs"
-			    "      reslabs\n");
+			    "      reslabs   contention     max_wait\n");
 		}
 	}
 	for (j = 0, in_gap = false; j < nbins; j++) {
@@ -100,6 +126,9 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    uint64_t);
 		CTL_M2_M4_GET("stats.arenas.0.bins.0.curslabs", i, j, &curslabs,
 		    size_t);
+		lock_prof_data_t lock_data;
+		CTL_M2_M4_GET("stats.arenas.0.bins.0.lock_data", i, j,
+		    &lock_data, lock_prof_data_t);
 
 		if (json) {
 			malloc_cprintf(write_cb, cbopaque,
@@ -127,55 +156,59 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			    curslabs,
 			    (j + 1 < nbins) ? "," : "");
 		} else if (!in_gap) {
-			size_t availregs, milli;
-			char util[6]; /* "x.yyy". */
-
-			availregs = nregs * curslabs;
-			milli = (availregs != 0) ? (1000 * curregs) / availregs
-			    : 1000;
-
-			if (milli > 1000) {
-				/*
-				 * Race detected: the counters were read in
-				 * separate mallctl calls and concurrent
-				 * operations happened in between. In this case
-				 * no meaningful utilization can be computed.
-				 */
-				malloc_snprintf(util, sizeof(util), " race");
-			} else if (milli < 10) {
-				malloc_snprintf(util, sizeof(util),
-				    "0.00%zu", milli);
-			} else if (milli < 100) {
-				malloc_snprintf(util, sizeof(util), "0.0%zu",
-				    milli);
-			} else if (milli < 1000) {
-				malloc_snprintf(util, sizeof(util), "0.%zu",
-				    milli);
-			} else {
-				assert(milli == 1000);
-				malloc_snprintf(util, sizeof(util), "1");
+			size_t availregs = nregs * curslabs;
+			char util[6];
+			if (get_rate_str((uint64_t)curregs, (uint64_t)availregs,
+			    util)) {
+				if (availregs == 0) {
+					malloc_snprintf(util, sizeof(util),
+					    "1");
+				} else if (curregs > availregs) {
+					/*
+					 * Race detected: the counters were read
+					 * in separate mallctl calls and
+					 * concurrent operations happened in
+					 * between. In this case no meaningful
+					 * utilization can be computed.
+					 */
+					malloc_snprintf(util, sizeof(util),
+					    " race");
+				} else {
+					not_reached();
+				}
 			}
+			char rate[6];
+			if (get_rate_str(lock_data.n_wait_times,
+			    lock_data.n_lock_ops, rate)) {
+				if (lock_data.n_lock_ops == 0) {
+					malloc_snprintf(rate, sizeof(rate),
+					    "0");
+				}
+			}
+			uint64_t max_wait = nstime_msec(
+			    &lock_data.max_wait_time);
 
 			if (config_tcache) {
 				malloc_cprintf(write_cb, cbopaque,
 				    "%20zu %3u %12zu %12"FMTu64
 				    " %12"FMTu64" %12"FMTu64" %12zu"
 				    " %12zu %4u %3zu %-5s %12"FMTu64
-				    " %12"FMTu64" %12"FMTu64" %12"FMTu64"\n",
+				    " %12"FMTu64" %12"FMTu64" %12"FMTu64
+				    " %12s %12"FMTu64"\n",
 				    reg_size, j, curregs * reg_size, nmalloc,
 				    ndalloc, nrequests, curregs, curslabs,
 				    nregs, slab_size / page, util, nfills,
-				    nflushes, nslabs, nreslabs);
+				    nflushes, nslabs, nreslabs, rate, max_wait);
 			} else {
 				malloc_cprintf(write_cb, cbopaque,
 				    "%20zu %3u %12zu %12"FMTu64
 				    " %12"FMTu64" %12"FMTu64" %12zu"
 				    " %12zu %4u %3zu %-5s %12"FMTu64
-				    " %12"FMTu64"\n",
+				    " %12"FMTu64" %12s\n",
 				    reg_size, j, curregs * reg_size, nmalloc,
 				    ndalloc, nrequests, curregs, curslabs,
 				    nregs, slab_size / page, util, nslabs,
-				    nreslabs);
+				    nreslabs, rate);
 			}
 		}
 	}

From 0fb5c0e853963480196ac413db18d1ad78d87ec9 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 10 Mar 2017 12:14:05 -0800
Subject: [PATCH 0733/2608] Add arena lock stats output.

---
 include/jemalloc/internal/ctl_externs.h   |   8 ++
 include/jemalloc/internal/mutex_inlines.h |   6 +-
 include/jemalloc/internal/mutex_structs.h |   8 +-
 include/jemalloc/internal/mutex_types.h   |   3 +-
 include/jemalloc/internal/stats_structs.h |   7 ++
 src/arena.c                               |  18 +++
 src/ctl.c                                 | 142 ++++++++++++++++++++--
 src/mutex.c                               |  46 ++++---
 src/stats.c                               |  82 ++++++++++---
 9 files changed, 269 insertions(+), 51 deletions(-)

diff --git a/include/jemalloc/internal/ctl_externs.h b/include/jemalloc/internal/ctl_externs.h
index 2ef48c66..812ec4fa 100644
--- a/include/jemalloc/internal/ctl_externs.h
+++ b/include/jemalloc/internal/ctl_externs.h
@@ -1,6 +1,14 @@
 #ifndef JEMALLOC_INTERNAL_CTL_EXTERNS_H
 #define JEMALLOC_INTERNAL_CTL_EXTERNS_H
 
+/* Maximum ctl tree depth. */
+#define CTL_MAX_DEPTH	7
+
+#define NUM_ARENA_PROF_LOCKS 6
+#define NUM_LOCK_PROF_COUNTERS 7
+const char *arena_lock_names[NUM_ARENA_PROF_LOCKS];
+const char *lock_counter_names[NUM_LOCK_PROF_COUNTERS];
+
 int	ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
     void *newp, size_t newlen);
 int	ctl_nametomib(tsdn_t *tsdn, const char *name, size_t *mibp,
diff --git a/include/jemalloc/internal/mutex_inlines.h b/include/jemalloc/internal/mutex_inlines.h
index 8e81fcde..1020eefd 100644
--- a/include/jemalloc/internal/mutex_inlines.h
+++ b/include/jemalloc/internal/mutex_inlines.h
@@ -29,9 +29,9 @@ malloc_mutex_trylock(malloc_mutex_t *mutex) {
 /* Aggregate lock prof data. */
 JEMALLOC_INLINE void
 malloc_lock_prof_merge(lock_prof_data_t *sum, lock_prof_data_t *data) {
-	nstime_add(&sum->tot_wait_time, &data->tot_wait_time);
-	if (nstime_compare(&data->max_wait_time, &sum->max_wait_time)) {
-		nstime_copy(&sum->max_wait_time, &data->max_wait_time);
+	sum->tot_wait_time += data->tot_wait_time;
+	if (data->max_wait_time > sum->max_wait_time) {
+		sum->max_wait_time = data->max_wait_time;
 	}
 	sum->n_wait_times += data->n_wait_times;
 	sum->n_spin_acquired += data->n_spin_acquired;
diff --git a/include/jemalloc/internal/mutex_structs.h b/include/jemalloc/internal/mutex_structs.h
index 7065c997..ce80e15d 100644
--- a/include/jemalloc/internal/mutex_structs.h
+++ b/include/jemalloc/internal/mutex_structs.h
@@ -6,10 +6,10 @@ struct lock_prof_data_s {
 	 * Counters touched on the slow path, i.e. when there is lock
 	 * contention.  We update them once we have the lock.
 	 */
-	/* Total time spent waiting on this lock. */
-	nstime_t		tot_wait_time;
-	/* Max time spent on a single lock operation. */
-	nstime_t		max_wait_time;
+	/* Total time (in nano seconds) spent waiting on this lock. */
+	uint64_t		tot_wait_time;
+	/* Max time (in nano seconds) spent on a single lock operation. */
+	uint64_t		max_wait_time;
 	/* # of times have to wait for this lock (after spinning). */
 	uint64_t		n_wait_times;
 	/* # of times acquired the lock through local spinning. */
diff --git a/include/jemalloc/internal/mutex_types.h b/include/jemalloc/internal/mutex_types.h
index d7c7f04f..0d93fe87 100644
--- a/include/jemalloc/internal/mutex_types.h
+++ b/include/jemalloc/internal/mutex_types.h
@@ -28,8 +28,7 @@ typedef struct malloc_mutex_s malloc_mutex_t;
 #    define MALLOC_MUTEX_TRYLOCK(m) (pthread_mutex_trylock(&(m)->lock) != 0)
 #endif
 
-#define LOCK_PROF_DATA_INITIALIZER					\
-    {NSTIME_ZERO_INITIALIZER, NSTIME_ZERO_INITIALIZER, 0, 0, 0, 0, 0, NULL, 0}
+#define LOCK_PROF_DATA_INITIALIZER {0, 0, 0, 0, 0, 0, 0, NULL, 0}
 
 #ifdef _WIN32
 #  define MALLOC_MUTEX_INITIALIZER
diff --git a/include/jemalloc/internal/stats_structs.h b/include/jemalloc/internal/stats_structs.h
index cbc448a3..0e33394b 100644
--- a/include/jemalloc/internal/stats_structs.h
+++ b/include/jemalloc/internal/stats_structs.h
@@ -124,6 +124,13 @@ struct arena_stats_s {
 	/* Number of bytes cached in tcache associated with this arena. */
 	atomic_zu_t		tcache_bytes; /* Derived. */
 
+	lock_prof_data_t large_mtx_data;
+	lock_prof_data_t extent_freelist_mtx_data;
+	lock_prof_data_t extents_cached_mtx_data;
+	lock_prof_data_t extents_retained_mtx_data;
+	lock_prof_data_t decay_mtx_data;
+	lock_prof_data_t tcache_mtx_data;
+
 	/* One element for each large size class. */
 	malloc_large_stats_t	lstats[NSIZES - NBINS];
 };
diff --git a/src/arena.c b/src/arena.c
index 832a125e..0c12004d 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -292,9 +292,27 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 				    tbin->ncached * index2size(i));
 			}
 		}
+		malloc_lock_prof_read(tsdn, &astats->tcache_mtx_data,
+		    &arena->tcache_ql_mtx);
 		malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
 	}
 
+#define READ_ARENA_MUTEX_PROF_DATA(mtx, data)				\
+	malloc_mutex_lock(tsdn, &arena->mtx);				\
+	malloc_lock_prof_read(tsdn, &astats->data, &arena->mtx);	\
+	malloc_mutex_unlock(tsdn, &arena->mtx);
+
+	/* Gather per arena mutex profiling data. */
+	READ_ARENA_MUTEX_PROF_DATA(large_mtx, large_mtx_data)
+	READ_ARENA_MUTEX_PROF_DATA(extent_freelist_mtx,
+	    extent_freelist_mtx_data)
+	READ_ARENA_MUTEX_PROF_DATA(extents_cached.mtx,
+	    extents_cached_mtx_data)
+	READ_ARENA_MUTEX_PROF_DATA(extents_retained.mtx,
+	    extents_retained_mtx_data)
+	READ_ARENA_MUTEX_PROF_DATA(decay.mtx, decay_mtx_data)
+#undef READ_ARENA_MUTEX_PROF_DATA
+
 	for (szind_t i = 0; i < NBINS; i++) {
 		arena_bin_t *bin = &arena->bins[i];
 
diff --git a/src/ctl.c b/src/ctl.c
index d9f8be60..016275e5 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -13,6 +13,24 @@ static bool		ctl_initialized;
 static ctl_stats_t	*ctl_stats;
 static ctl_arenas_t	*ctl_arenas;
 
+const char *arena_lock_names[NUM_ARENA_PROF_LOCKS] = {
+	"large",
+	"extent_freelist",
+	"extents_cached",
+	"extents_retained",
+	"decay",
+	"tcache"
+};
+const char *lock_counter_names[NUM_LOCK_PROF_COUNTERS] = {
+	"num_ops",
+	"num_wait",
+	"num_spin_acq",
+	"num_owner_switch",
+	"total_wait_time",
+	"max_wait_time",
+	"max_num_thds"
+};
+
 /******************************************************************************/
 /* Helpers for named and indexed nodes. */
 
@@ -145,7 +163,6 @@ CTL_PROTO(stats_arenas_i_bins_j_nflushes)
 CTL_PROTO(stats_arenas_i_bins_j_nslabs)
 CTL_PROTO(stats_arenas_i_bins_j_nreslabs)
 CTL_PROTO(stats_arenas_i_bins_j_curslabs)
-CTL_PROTO(stats_arenas_i_bins_j_lock_data)
 INDEX_PROTO(stats_arenas_i_bins_j)
 CTL_PROTO(stats_arenas_i_lextents_j_nmalloc)
 CTL_PROTO(stats_arenas_i_lextents_j_ndalloc)
@@ -179,12 +196,30 @@ CTL_PROTO(stats_resident)
 CTL_PROTO(stats_mapped)
 CTL_PROTO(stats_retained)
 
+#define STATS_LOCKS_CTL_PROTO_GEN(l, n)					\
+CTL_PROTO(stats_arenas_i_##l##_##n##_num_ops)				\
+CTL_PROTO(stats_arenas_i_##l##_##n##_num_wait)				\
+CTL_PROTO(stats_arenas_i_##l##_##n##_num_spin_acq)			\
+CTL_PROTO(stats_arenas_i_##l##_##n##_num_owner_switch)			\
+CTL_PROTO(stats_arenas_i_##l##_##n##_total_wait_time)			\
+CTL_PROTO(stats_arenas_i_##l##_##n##_max_wait_time)			\
+CTL_PROTO(stats_arenas_i_##l##_##n##_max_num_thds)
+
+#define ARENA_LOCKS_CTL_PROTO_GEN(n) STATS_LOCKS_CTL_PROTO_GEN(locks, n)
+ARENA_LOCKS_CTL_PROTO_GEN(large)
+ARENA_LOCKS_CTL_PROTO_GEN(extent_freelist)
+ARENA_LOCKS_CTL_PROTO_GEN(extents_cached)
+ARENA_LOCKS_CTL_PROTO_GEN(extents_retained)
+ARENA_LOCKS_CTL_PROTO_GEN(decay)
+ARENA_LOCKS_CTL_PROTO_GEN(tcache)
+#undef ARENA_LOCKS_CTL_PROTO_GEN
+
+STATS_LOCKS_CTL_PROTO_GEN(bins_j, lock)
+#undef STATS_LOCKS_CTL_PROTO_GEN
+
 /******************************************************************************/
 /* mallctl tree. */
 
-/* Maximum tree depth. */
-#define CTL_MAX_DEPTH	6
-
 #define NAME(n)	{true},	n
 #define CHILD(t, c)							\
 	sizeof(c##_node) / sizeof(ctl_##t##_node_t),			\
@@ -349,6 +384,26 @@ static const ctl_named_node_t stats_arenas_i_large_node[] = {
 	{NAME("nrequests"),	CTL(stats_arenas_i_large_nrequests)}
 };
 
+#define LOCK_PROF_DATA_NODE(prefix, n)					\
+static const ctl_named_node_t prefix##_##n##_node[] = {			\
+	{NAME("num_ops"),						\
+	 CTL(prefix##_##n##_num_ops)},					\
+	{NAME("num_wait"),						\
+	 CTL(prefix##_##n##_num_wait)},					\
+	{NAME("num_spin_acq"),						\
+	 CTL(prefix##_##n##_num_spin_acq)},				\
+	{NAME("num_owner_switch"),					\
+	 CTL(prefix##_##n##_num_owner_switch)},				\
+	{NAME("total_wait_time"),					\
+	 CTL(prefix##_##n##_total_wait_time)},				\
+	{NAME("max_wait_time"),						\
+	 CTL(prefix##_##n##_max_wait_time)},				\
+	{NAME("max_num_thds"),						\
+	 CTL(prefix##_##n##_max_num_thds)}				\
+	/* Note that # of current waiting thread not provided. */	\
+};
+
+LOCK_PROF_DATA_NODE(stats_arenas_i_bins_j, lock)
 static const ctl_named_node_t stats_arenas_i_bins_j_node[] = {
 	{NAME("nmalloc"),	CTL(stats_arenas_i_bins_j_nmalloc)},
 	{NAME("ndalloc"),	CTL(stats_arenas_i_bins_j_ndalloc)},
@@ -359,8 +414,9 @@ static const ctl_named_node_t stats_arenas_i_bins_j_node[] = {
 	{NAME("nslabs"),	CTL(stats_arenas_i_bins_j_nslabs)},
 	{NAME("nreslabs"),	CTL(stats_arenas_i_bins_j_nreslabs)},
 	{NAME("curslabs"),	CTL(stats_arenas_i_bins_j_curslabs)},
-	{NAME("lock_data"),	CTL(stats_arenas_i_bins_j_lock_data)}
+	{NAME("lock"),		CHILD(named, stats_arenas_i_bins_j_lock)}
 };
+
 static const ctl_named_node_t super_stats_arenas_i_bins_j_node[] = {
 	{NAME(""),		CHILD(named, stats_arenas_i_bins_j)}
 };
@@ -383,6 +439,26 @@ static const ctl_indexed_node_t stats_arenas_i_lextents_node[] = {
 	{INDEX(stats_arenas_i_lextents_j)}
 };
 
+LOCK_PROF_DATA_NODE(stats_arenas_i_locks, large)
+LOCK_PROF_DATA_NODE(stats_arenas_i_locks, extent_freelist)
+LOCK_PROF_DATA_NODE(stats_arenas_i_locks, extents_cached)
+LOCK_PROF_DATA_NODE(stats_arenas_i_locks, extents_retained)
+LOCK_PROF_DATA_NODE(stats_arenas_i_locks, decay)
+LOCK_PROF_DATA_NODE(stats_arenas_i_locks, tcache)
+
+static const ctl_named_node_t stats_arenas_i_locks_node[] = {
+	{NAME("large"),		CHILD(named, stats_arenas_i_locks_large)},
+	{NAME("extent_freelist"),
+	 CHILD(named, stats_arenas_i_locks_extent_freelist)},
+	{NAME("extents_cached"),
+	 CHILD(named, stats_arenas_i_locks_extents_cached)},
+	{NAME("extents_retained"),
+	 CHILD(named, stats_arenas_i_locks_extents_retained)},
+	{NAME("decay"),		CHILD(named, stats_arenas_i_locks_decay)},
+	{NAME("tcache"),	CHILD(named, stats_arenas_i_locks_tcache)}
+};
+#undef LOCK_PROF_DATA_NODE
+
 static const ctl_named_node_t stats_arenas_i_node[] = {
 	{NAME("nthreads"),	CTL(stats_arenas_i_nthreads)},
 	{NAME("dss"),		CTL(stats_arenas_i_dss)},
@@ -406,7 +482,8 @@ static const ctl_named_node_t stats_arenas_i_node[] = {
 	{NAME("small"),		CHILD(named, stats_arenas_i_small)},
 	{NAME("large"),		CHILD(named, stats_arenas_i_large)},
 	{NAME("bins"),		CHILD(indexed, stats_arenas_i_bins)},
-	{NAME("lextents"),	CHILD(indexed, stats_arenas_i_lextents)}
+	{NAME("lextents"),	CHILD(indexed, stats_arenas_i_lextents)},
+	{NAME("locks"),		CHILD(named, stats_arenas_i_locks)}
 };
 static const ctl_named_node_t super_stats_arenas_i_node[] = {
 	{NAME(""),		CHILD(named, stats_arenas_i)}
@@ -663,6 +740,22 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 		accum_arena_stats_u64(&sdstats->astats.decay_muzzy.purged,
 		    &astats->astats.decay_muzzy.purged);
 
+		malloc_lock_prof_merge(&(sdstats->astats.large_mtx_data),
+		    &(astats->astats.large_mtx_data));
+		malloc_lock_prof_merge(
+		    &(sdstats->astats.extent_freelist_mtx_data),
+		    &(astats->astats.extent_freelist_mtx_data));
+		malloc_lock_prof_merge(
+		    &(sdstats->astats.extents_cached_mtx_data),
+		    &(astats->astats.extents_cached_mtx_data));
+		malloc_lock_prof_merge(
+		    &(sdstats->astats.extents_retained_mtx_data),
+		    &(astats->astats.extents_retained_mtx_data));
+		malloc_lock_prof_merge(&(sdstats->astats.decay_mtx_data),
+		    &(astats->astats.decay_mtx_data));
+		malloc_lock_prof_merge(&(sdstats->astats.tcache_mtx_data),
+		    &(astats->astats.tcache_mtx_data));
+
 		if (!destroyed) {
 			accum_atomic_zu(&sdstats->astats.base,
 			    &astats->astats.base);
@@ -2319,6 +2412,41 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_large_nrequests,
     arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.nmalloc_large),
     uint64_t) /* Intentional. */
 
+/* Lock profiling related APIs below. */
+#define ARENAS_LOCK_CTL_GEN(l, s, d)					\
+CTL_RO_CGEN(config_stats, stats_arenas_i_##l##_num_ops,			\
+    arenas_i(mib[2])->astats->s.d.n_lock_ops, uint64_t)			\
+CTL_RO_CGEN(config_stats, stats_arenas_i_##l##_num_wait,		\
+    arenas_i(mib[2])->astats->s.d.n_wait_times, uint64_t)		\
+CTL_RO_CGEN(config_stats, stats_arenas_i_##l##_num_spin_acq,		\
+    arenas_i(mib[2])->astats->s.d.n_spin_acquired, uint64_t)		\
+CTL_RO_CGEN(config_stats, stats_arenas_i_##l##_num_owner_switch,	\
+    arenas_i(mib[2])->astats->s.d.n_owner_switches, uint64_t) 		\
+CTL_RO_CGEN(config_stats, stats_arenas_i_##l##_total_wait_time,		\
+    arenas_i(mib[2])->astats->s.d.tot_wait_time, uint64_t)		\
+CTL_RO_CGEN(config_stats, stats_arenas_i_##l##_max_wait_time,		\
+    arenas_i(mib[2])->astats->s.d.max_wait_time, uint64_t)		\
+CTL_RO_CGEN(config_stats, stats_arenas_i_##l##_max_num_thds,		\
+    arenas_i(mib[2])->astats->s.d.max_n_thds, uint64_t)
+
+#define ARENAS_ASTATS_LOCK_CTL_GEN(l, d)				\
+    ARENAS_LOCK_CTL_GEN(locks_##l, astats, d)
+
+/* arena->large_mtx */
+ARENAS_ASTATS_LOCK_CTL_GEN(large, large_mtx_data)
+/* arena->extent_freelist_mtx */
+ARENAS_ASTATS_LOCK_CTL_GEN(extent_freelist, extent_freelist_mtx_data)
+/* arena->extents_cached.mtx */
+ARENAS_ASTATS_LOCK_CTL_GEN(extents_cached, extents_cached_mtx_data)
+/* arena->extents_retained.mtx */
+ARENAS_ASTATS_LOCK_CTL_GEN(extents_retained, extents_retained_mtx_data)
+/* arena->decay.mtx */
+ARENAS_ASTATS_LOCK_CTL_GEN(decay, decay_mtx_data)
+/* arena->tcache_ql_mtx */
+ARENAS_ASTATS_LOCK_CTL_GEN(tcache, tcache_mtx_data)
+/* arena->bins[j].lock */
+ARENAS_LOCK_CTL_GEN(bins_j_lock, bstats[mib[4]], lock_data)
+
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nmalloc,
     arenas_i(mib[2])->astats->bstats[mib[4]].nmalloc, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_ndalloc,
@@ -2337,8 +2465,6 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nreslabs,
     arenas_i(mib[2])->astats->bstats[mib[4]].reslabs, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curslabs,
     arenas_i(mib[2])->astats->bstats[mib[4]].curslabs, size_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_lock_data,
-    arenas_i(mib[2])->astats->bstats[mib[4]].lock_data, lock_prof_data_t)
 
 static const ctl_named_node_t *
 stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
diff --git a/src/mutex.c b/src/mutex.c
index 2b80a9d9..af6f3c19 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -68,49 +68,55 @@ JEMALLOC_EXPORT int	_pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
 void
 malloc_mutex_lock_slow(malloc_mutex_t *mutex) {
 	lock_prof_data_t *data = &mutex->prof_data;
-	bool spin_success = false;
 
 	{//TODO: a smart spin policy
 		if (!malloc_mutex_trylock(mutex)) {
-			spin_success = true;
-			goto label_locked;
+			data->n_spin_acquired++;
+			return;
 		}
 	}
 
 	nstime_t now, before;
-	uint32_t n_thds;
 	nstime_init(&now, 0);
 	nstime_update(&now);
-	n_thds = atomic_add_u32(&data->n_waiting_thds, 1);
+	nstime_copy(&before, &now);
+
+	uint32_t n_thds = atomic_add_u32(&data->n_waiting_thds, 1);
 	/* One last try as above two calls may take quite some cycles. */
 	if (!malloc_mutex_trylock(mutex)) {
-		spin_success = true;
 		atomic_sub_u32(&data->n_waiting_thds, 1);
-		goto label_locked;
+		data->n_spin_acquired++;
+		return;
 	}
-	nstime_copy(&before, &now);
+
+	/* True slow path. */
 	malloc_mutex_lock_final(mutex);
 	atomic_sub_u32(&data->n_waiting_thds, 1);
 	nstime_update(&now);
+
+	/* Update more slow-path only counters. */
 	nstime_subtract(&now, &before);
-label_locked:
-	if (spin_success) {
-		data->n_spin_acquired++;
-	} else {
-		data->n_wait_times++;
-		nstime_add(&data->tot_wait_time, &now);
-		if (nstime_compare(&now, &data->max_wait_time)) {
-			nstime_copy(&data->max_wait_time, &now);
-		}
-		if (n_thds > data->max_n_thds) {
-			data->max_n_thds = n_thds;
-		}
+	uint64_t wait_time = nstime_ns(&now);
+	data->n_wait_times++;
+	data->tot_wait_time += wait_time;
+	if (wait_time > data->max_wait_time) {
+		data->max_wait_time = wait_time;
 	}
+	if (n_thds > data->max_n_thds) {
+		data->max_n_thds = n_thds;
+	}
+}
+
+static void
+lock_prof_data_init(lock_prof_data_t *data) {
+	memset(data, 0, sizeof(lock_prof_data_t));
+	data->prev_owner = NULL;
 }
 
 bool
 malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
     witness_rank_t rank) {
+	lock_prof_data_init(&mutex->prof_data);
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 	InitializeSRWLock(&mutex->lock);
diff --git a/src/stats.c b/src/stats.c
index 58283f87..7b690e7f 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -7,7 +7,7 @@
 } while (0)
 
 #define CTL_M2_GET(n, i, v, t) do {					\
-	size_t mib[6];							\
+	size_t mib[CTL_MAX_DEPTH];					\
 	size_t miblen = sizeof(mib) / sizeof(size_t);			\
 	size_t sz = sizeof(t);						\
 	xmallctlnametomib(n, mib, &miblen);				\
@@ -16,7 +16,7 @@
 } while (0)
 
 #define CTL_M2_M4_GET(n, i, j, v, t) do {				\
-	size_t mib[6];							\
+	size_t mib[CTL_MAX_DEPTH];					\
 	size_t miblen = sizeof(mib) / sizeof(size_t);			\
 	size_t sz = sizeof(t);						\
 	xmallctlnametomib(n, mib, &miblen);				\
@@ -77,13 +77,13 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			    "      ndalloc    nrequests      curregs"
 			    "     curslabs regs pgs  util       nfills"
 			    "     nflushes     newslabs      reslabs"
-			    "   contention     max_wait\n");
+			    "   contention  max_wait_ns\n");
 		} else {
 			malloc_cprintf(write_cb, cbopaque,
 			    "bins:           size ind    allocated      nmalloc"
 			    "      ndalloc    nrequests      curregs"
 			    "     curslabs regs pgs  util     newslabs"
-			    "      reslabs   contention     max_wait\n");
+			    "      reslabs   contention  max_wait_ns\n");
 		}
 	}
 	for (j = 0, in_gap = false; j < nbins; j++) {
@@ -126,9 +126,14 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    uint64_t);
 		CTL_M2_M4_GET("stats.arenas.0.bins.0.curslabs", i, j, &curslabs,
 		    size_t);
-		lock_prof_data_t lock_data;
-		CTL_M2_M4_GET("stats.arenas.0.bins.0.lock_data", i, j,
-		    &lock_data, lock_prof_data_t);
+
+		uint64_t num_ops, num_wait, max_wait;
+		CTL_M2_M4_GET("stats.arenas.0.bins.0.lock.num_wait", i, j,
+		    &num_wait, uint64_t);
+		CTL_M2_M4_GET("stats.arenas.0.bins.0.lock.max_wait_time", i, j,
+		    &max_wait, uint64_t);
+		CTL_M2_M4_GET("stats.arenas.0.bins.0.lock.num_ops", i, j,
+		    &num_ops, uint64_t);
 
 		if (json) {
 			malloc_cprintf(write_cb, cbopaque,
@@ -178,15 +183,12 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 				}
 			}
 			char rate[6];
-			if (get_rate_str(lock_data.n_wait_times,
-			    lock_data.n_lock_ops, rate)) {
-				if (lock_data.n_lock_ops == 0) {
+			if (get_rate_str(num_wait, num_ops, rate)) {
+				if (num_ops == 0) {
 					malloc_snprintf(rate, sizeof(rate),
 					    "0");
 				}
 			}
-			uint64_t max_wait = nstime_msec(
-			    &lock_data.max_wait_time);
 
 			if (config_tcache) {
 				malloc_cprintf(write_cb, cbopaque,
@@ -204,11 +206,11 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 				    "%20zu %3u %12zu %12"FMTu64
 				    " %12"FMTu64" %12"FMTu64" %12zu"
 				    " %12zu %4u %3zu %-5s %12"FMTu64
-				    " %12"FMTu64" %12s\n",
+				    " %12"FMTu64" %12s %12"FMTu64"\n",
 				    reg_size, j, curregs * reg_size, nmalloc,
 				    ndalloc, nrequests, curregs, curslabs,
 				    nregs, slab_size / page, util, nslabs,
-				    nreslabs, rate);
+				    nreslabs, rate, max_wait);
 			}
 		}
 	}
@@ -287,6 +289,57 @@ stats_arena_lextents_print(void (*write_cb)(void *, const char *),
 	}
 }
 
+static void
+gen_ctl_str(char *str, const char *lock, const char *counter) {
+	sprintf(str, "stats.arenas.0.locks.%s.%s", lock, counter);
+}
+
+static void read_arena_lock_stats(unsigned arena_ind,
+    uint64_t results[NUM_ARENA_PROF_LOCKS][NUM_LOCK_PROF_COUNTERS]) {
+	char cmd[128];
+
+	unsigned i, j;
+	for (i = 0; i < NUM_ARENA_PROF_LOCKS; i++) {
+		for (j = 0; j < NUM_LOCK_PROF_COUNTERS; j++) {
+			gen_ctl_str(cmd, arena_lock_names[i],
+			    lock_counter_names[j]);
+			CTL_M2_GET(cmd, arena_ind, &results[i][j], uint64_t);
+		}
+	}
+}
+
+static void
+stats_arena_locks_print(void (*write_cb)(void *, const char *),
+    void *cbopaque, bool json, unsigned arena_ind) {
+	uint64_t lock_stats[NUM_ARENA_PROF_LOCKS][NUM_LOCK_PROF_COUNTERS];
+	read_arena_lock_stats(arena_ind, lock_stats);
+
+	/* Output lock stats. */
+	if (json) {
+		//TODO
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "                         n_lock_ops       n_waiting"
+		    "      n_spin_acq  n_owner_switch   total_wait_ns"
+		    "     max_wait_ns max_n_wait_thds\n");
+
+		unsigned i, j;
+		for (i = 0; i < NUM_ARENA_PROF_LOCKS; i++) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "%s", arena_lock_names[i]);
+			malloc_cprintf(write_cb, cbopaque, ":%*c",
+			    (int)(18 - strlen(arena_lock_names[i])), ' ');
+
+			for (j = 0; j < NUM_LOCK_PROF_COUNTERS; j++) {
+				malloc_cprintf(write_cb, cbopaque, " %15"FMTu64,
+				    lock_stats[i][j]);
+			}
+			malloc_cprintf(write_cb, cbopaque, "\n");
+		}
+	}
+
+}
+
 static void
 stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
     bool json, unsigned i, bool bins, bool large) {
@@ -518,6 +571,7 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    "resident:                %12zu\n", resident);
 	}
 
+	stats_arena_locks_print(write_cb, cbopaque, json, i);
 	if (bins) {
 		stats_arena_bins_print(write_cb, cbopaque, json, large, i);
 	}

From ca9074deffe799dafa74a1d71333a103c4c007ce Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Sat, 11 Mar 2017 20:28:31 -0800
Subject: [PATCH 0734/2608] Added lock profiling and output for global locks
 (ctl, prof and base).

---
 include/jemalloc/internal/ctl_externs.h       |  11 +-
 include/jemalloc/internal/ctl_structs.h       |   4 +
 include/jemalloc/internal/ctl_types.h         |   5 +
 include/jemalloc/internal/private_symbols.txt |   1 +
 include/jemalloc/internal/prof_externs.h      |   2 +
 src/arena.c                                   |   6 +-
 src/ctl.c                                     | 144 ++++++++++++------
 src/prof.c                                    |   3 +-
 src/stats.c                                   |  74 ++++++---
 9 files changed, 173 insertions(+), 77 deletions(-)

diff --git a/include/jemalloc/internal/ctl_externs.h b/include/jemalloc/internal/ctl_externs.h
index 812ec4fa..33ca2039 100644
--- a/include/jemalloc/internal/ctl_externs.h
+++ b/include/jemalloc/internal/ctl_externs.h
@@ -4,10 +4,13 @@
 /* Maximum ctl tree depth. */
 #define CTL_MAX_DEPTH	7
 
-#define NUM_ARENA_PROF_LOCKS 6
-#define NUM_LOCK_PROF_COUNTERS 7
-const char *arena_lock_names[NUM_ARENA_PROF_LOCKS];
-const char *lock_counter_names[NUM_LOCK_PROF_COUNTERS];
+#define NUM_GLOBAL_PROF_LOCKS	3
+#define NUM_ARENA_PROF_LOCKS	6
+#define NUM_LOCK_PROF_COUNTERS	7
+
+extern const char *arena_lock_names[NUM_ARENA_PROF_LOCKS];
+extern const char *global_lock_names[NUM_GLOBAL_PROF_LOCKS];
+extern const char *lock_counter_names[NUM_LOCK_PROF_COUNTERS];
 
 int	ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
     void *newp, size_t newlen);
diff --git a/include/jemalloc/internal/ctl_structs.h b/include/jemalloc/internal/ctl_structs.h
index 4df43d90..e467a247 100644
--- a/include/jemalloc/internal/ctl_structs.h
+++ b/include/jemalloc/internal/ctl_structs.h
@@ -41,6 +41,10 @@ struct ctl_stats_s {
 	size_t			resident;
 	size_t			mapped;
 	size_t			retained;
+
+#define MTX(mutex) lock_prof_data_t mutex##_mtx_data;
+GLOBAL_PROF_MUTEXES
+#undef MTX
 };
 
 struct ctl_arena_s {
diff --git a/include/jemalloc/internal/ctl_types.h b/include/jemalloc/internal/ctl_types.h
index 7853a4b2..562418ca 100644
--- a/include/jemalloc/internal/ctl_types.h
+++ b/include/jemalloc/internal/ctl_types.h
@@ -1,6 +1,11 @@
 #ifndef JEMALLOC_INTERNAL_CTL_TYPES_H
 #define JEMALLOC_INTERNAL_CTL_TYPES_H
 
+#define GLOBAL_PROF_MUTEXES						\
+    MTX(base)								\
+    MTX(ctl)								\
+    MTX(prof)
+
 typedef struct ctl_node_s ctl_node_t;
 typedef struct ctl_named_node_s ctl_named_node_t;
 typedef struct ctl_indexed_node_s ctl_indexed_node_t;
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 14ecdeda..3c5a21b5 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -106,6 +106,7 @@ bootstrap_calloc
 bootstrap_free
 bootstrap_malloc
 bt_init
+bt2gctx_mtx
 buferror
 ckh_count
 ckh_delete
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 985532f6..cbd9795b 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_PROF_EXTERNS_H
 #define JEMALLOC_INTERNAL_PROF_EXTERNS_H
 
+extern malloc_mutex_t	bt2gctx_mtx;
+
 extern bool	opt_prof;
 extern bool	opt_prof_active;
 extern bool	opt_prof_thread_active_init;
diff --git a/src/arena.c b/src/arena.c
index 0c12004d..266c85fb 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -298,9 +298,9 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	}
 
 #define READ_ARENA_MUTEX_PROF_DATA(mtx, data)				\
-	malloc_mutex_lock(tsdn, &arena->mtx);				\
-	malloc_lock_prof_read(tsdn, &astats->data, &arena->mtx);	\
-	malloc_mutex_unlock(tsdn, &arena->mtx);
+    malloc_mutex_lock(tsdn, &arena->mtx);				\
+    malloc_lock_prof_read(tsdn, &astats->data, &arena->mtx);		\
+    malloc_mutex_unlock(tsdn, &arena->mtx);
 
 	/* Gather per arena mutex profiling data. */
 	READ_ARENA_MUTEX_PROF_DATA(large_mtx, large_mtx_data)
diff --git a/src/ctl.c b/src/ctl.c
index 016275e5..c5ef70b5 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -13,6 +13,12 @@ static bool		ctl_initialized;
 static ctl_stats_t	*ctl_stats;
 static ctl_arenas_t	*ctl_arenas;
 
+const char *global_lock_names[NUM_GLOBAL_PROF_LOCKS] = {
+	"base",
+	"prof",
+	"ctl"
+};
+
 const char *arena_lock_names[NUM_ARENA_PROF_LOCKS] = {
 	"large",
 	"extent_freelist",
@@ -21,6 +27,7 @@ const char *arena_lock_names[NUM_ARENA_PROF_LOCKS] = {
 	"decay",
 	"tcache"
 };
+
 const char *lock_counter_names[NUM_LOCK_PROF_COUNTERS] = {
 	"num_ops",
 	"num_wait",
@@ -196,16 +203,26 @@ CTL_PROTO(stats_resident)
 CTL_PROTO(stats_mapped)
 CTL_PROTO(stats_retained)
 
-#define STATS_LOCKS_CTL_PROTO_GEN(l, n)					\
-CTL_PROTO(stats_arenas_i_##l##_##n##_num_ops)				\
-CTL_PROTO(stats_arenas_i_##l##_##n##_num_wait)				\
-CTL_PROTO(stats_arenas_i_##l##_##n##_num_spin_acq)			\
-CTL_PROTO(stats_arenas_i_##l##_##n##_num_owner_switch)			\
-CTL_PROTO(stats_arenas_i_##l##_##n##_total_wait_time)			\
-CTL_PROTO(stats_arenas_i_##l##_##n##_max_wait_time)			\
-CTL_PROTO(stats_arenas_i_##l##_##n##_max_num_thds)
+#define LOCK_STATS_CTL_PROTO_GEN(n)					\
+CTL_PROTO(stats_##n##_num_ops)						\
+CTL_PROTO(stats_##n##_num_wait)						\
+CTL_PROTO(stats_##n##_num_spin_acq)					\
+CTL_PROTO(stats_##n##_num_owner_switch)					\
+CTL_PROTO(stats_##n##_total_wait_time)					\
+CTL_PROTO(stats_##n##_max_wait_time)					\
+CTL_PROTO(stats_##n##_max_num_thds)
 
-#define ARENA_LOCKS_CTL_PROTO_GEN(n) STATS_LOCKS_CTL_PROTO_GEN(locks, n)
+/* Global locks. */
+LOCK_STATS_CTL_PROTO_GEN(locks_base)
+LOCK_STATS_CTL_PROTO_GEN(locks_prof)
+LOCK_STATS_CTL_PROTO_GEN(locks_ctl)
+
+/* Arena bin locks. */
+LOCK_STATS_CTL_PROTO_GEN(arenas_i_bins_j_lock)
+
+#define ARENA_LOCKS_CTL_PROTO_GEN(n)					\
+    LOCK_STATS_CTL_PROTO_GEN(arenas_i_locks_##n)
+/* Per arena locks. */
 ARENA_LOCKS_CTL_PROTO_GEN(large)
 ARENA_LOCKS_CTL_PROTO_GEN(extent_freelist)
 ARENA_LOCKS_CTL_PROTO_GEN(extents_cached)
@@ -213,9 +230,7 @@ ARENA_LOCKS_CTL_PROTO_GEN(extents_retained)
 ARENA_LOCKS_CTL_PROTO_GEN(decay)
 ARENA_LOCKS_CTL_PROTO_GEN(tcache)
 #undef ARENA_LOCKS_CTL_PROTO_GEN
-
-STATS_LOCKS_CTL_PROTO_GEN(bins_j, lock)
-#undef STATS_LOCKS_CTL_PROTO_GEN
+#undef LOCK_STATS_CTL_PROTO_GEN
 
 /******************************************************************************/
 /* mallctl tree. */
@@ -384,26 +399,27 @@ static const ctl_named_node_t stats_arenas_i_large_node[] = {
 	{NAME("nrequests"),	CTL(stats_arenas_i_large_nrequests)}
 };
 
-#define LOCK_PROF_DATA_NODE(prefix, n)					\
-static const ctl_named_node_t prefix##_##n##_node[] = {			\
+#define LOCK_PROF_DATA_NODE(prefix)					\
+static const ctl_named_node_t stats_##prefix##_node[] = {		\
 	{NAME("num_ops"),						\
-	 CTL(prefix##_##n##_num_ops)},					\
+	 CTL(stats_##prefix##_num_ops)},				\
 	{NAME("num_wait"),						\
-	 CTL(prefix##_##n##_num_wait)},					\
+	 CTL(stats_##prefix##_num_wait)},				\
 	{NAME("num_spin_acq"),						\
-	 CTL(prefix##_##n##_num_spin_acq)},				\
+	 CTL(stats_##prefix##_num_spin_acq)},				\
 	{NAME("num_owner_switch"),					\
-	 CTL(prefix##_##n##_num_owner_switch)},				\
+	 CTL(stats_##prefix##_num_owner_switch)},			\
 	{NAME("total_wait_time"),					\
-	 CTL(prefix##_##n##_total_wait_time)},				\
+	 CTL(stats_##prefix##_total_wait_time)},			\
 	{NAME("max_wait_time"),						\
-	 CTL(prefix##_##n##_max_wait_time)},				\
+	 CTL(stats_##prefix##_max_wait_time)},				\
 	{NAME("max_num_thds"),						\
-	 CTL(prefix##_##n##_max_num_thds)}				\
+	 CTL(stats_##prefix##_max_num_thds)}				\
 	/* Note that # of current waiting thread not provided. */	\
 };
 
-LOCK_PROF_DATA_NODE(stats_arenas_i_bins_j, lock)
+LOCK_PROF_DATA_NODE(arenas_i_bins_j_lock)
+
 static const ctl_named_node_t stats_arenas_i_bins_j_node[] = {
 	{NAME("nmalloc"),	CTL(stats_arenas_i_bins_j_nmalloc)},
 	{NAME("ndalloc"),	CTL(stats_arenas_i_bins_j_ndalloc)},
@@ -439,12 +455,14 @@ static const ctl_indexed_node_t stats_arenas_i_lextents_node[] = {
 	{INDEX(stats_arenas_i_lextents_j)}
 };
 
-LOCK_PROF_DATA_NODE(stats_arenas_i_locks, large)
-LOCK_PROF_DATA_NODE(stats_arenas_i_locks, extent_freelist)
-LOCK_PROF_DATA_NODE(stats_arenas_i_locks, extents_cached)
-LOCK_PROF_DATA_NODE(stats_arenas_i_locks, extents_retained)
-LOCK_PROF_DATA_NODE(stats_arenas_i_locks, decay)
-LOCK_PROF_DATA_NODE(stats_arenas_i_locks, tcache)
+#define ARENA_LOCK_PROF_DATA_NODE(n) LOCK_PROF_DATA_NODE(arenas_i_locks_##n)
+
+ARENA_LOCK_PROF_DATA_NODE(large)
+ARENA_LOCK_PROF_DATA_NODE(extent_freelist)
+ARENA_LOCK_PROF_DATA_NODE(extents_cached)
+ARENA_LOCK_PROF_DATA_NODE(extents_retained)
+ARENA_LOCK_PROF_DATA_NODE(decay)
+ARENA_LOCK_PROF_DATA_NODE(tcache)
 
 static const ctl_named_node_t stats_arenas_i_locks_node[] = {
 	{NAME("large"),		CHILD(named, stats_arenas_i_locks_large)},
@@ -457,7 +475,6 @@ static const ctl_named_node_t stats_arenas_i_locks_node[] = {
 	{NAME("decay"),		CHILD(named, stats_arenas_i_locks_decay)},
 	{NAME("tcache"),	CHILD(named, stats_arenas_i_locks_tcache)}
 };
-#undef LOCK_PROF_DATA_NODE
 
 static const ctl_named_node_t stats_arenas_i_node[] = {
 	{NAME("nthreads"),	CTL(stats_arenas_i_nthreads)},
@@ -493,6 +510,15 @@ static const ctl_indexed_node_t stats_arenas_node[] = {
 	{INDEX(stats_arenas_i)}
 };
 
+LOCK_PROF_DATA_NODE(locks_base)
+LOCK_PROF_DATA_NODE(locks_prof)
+LOCK_PROF_DATA_NODE(locks_ctl)
+static const ctl_named_node_t stats_locks_node[] = {
+	{NAME("base"),		CHILD(named, stats_locks_base)},
+	{NAME("prof"),		CHILD(named, stats_locks_prof)},
+	{NAME("ctl"),		CHILD(named, stats_locks_ctl)}
+};
+
 static const ctl_named_node_t stats_node[] = {
 	{NAME("allocated"),	CTL(stats_allocated)},
 	{NAME("active"),	CTL(stats_active)},
@@ -500,8 +526,10 @@ static const ctl_named_node_t stats_node[] = {
 	{NAME("resident"),	CTL(stats_resident)},
 	{NAME("mapped"),	CTL(stats_mapped)},
 	{NAME("retained"),	CTL(stats_retained)},
+	{NAME("locks"),		CHILD(named, stats_locks)},
 	{NAME("arenas"),	CHILD(indexed, stats_arenas)}
 };
+#undef LOCK_PROF_DATA_NODE
 
 static const ctl_named_node_t	root_node[] = {
 	{NAME("version"),	CTL(version)},
@@ -925,6 +953,17 @@ ctl_refresh(tsdn_t *tsdn) {
 		    &ctl_sarena->astats->astats.mapped, ATOMIC_RELAXED);
 		ctl_stats->retained = atomic_load_zu(
 		    &ctl_sarena->astats->astats.retained, ATOMIC_RELAXED);
+
+#define READ_GLOBAL_MUTEX_PROF_DATA(mtx, data)				\
+    malloc_mutex_lock(tsdn, &mtx);					\
+    malloc_lock_prof_read(tsdn, &ctl_stats->data, &mtx);		\
+    malloc_mutex_unlock(tsdn, &mtx);
+
+		READ_GLOBAL_MUTEX_PROF_DATA(b0get()->mtx, base_mtx_data);
+		READ_GLOBAL_MUTEX_PROF_DATA(bt2gctx_mtx, prof_mtx_data);
+		/* We own ctl mutex already. */
+		malloc_lock_prof_read(tsdn, &ctl_stats->ctl_mtx_data, &ctl_mtx);
+#undef READ_GLOBAL_MUTEX_PROF_DATA
 	}
 	ctl_arenas->epoch++;
 }
@@ -2413,25 +2452,34 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_large_nrequests,
     uint64_t) /* Intentional. */
 
 /* Lock profiling related APIs below. */
-#define ARENAS_LOCK_CTL_GEN(l, s, d)					\
-CTL_RO_CGEN(config_stats, stats_arenas_i_##l##_num_ops,			\
-    arenas_i(mib[2])->astats->s.d.n_lock_ops, uint64_t)			\
-CTL_RO_CGEN(config_stats, stats_arenas_i_##l##_num_wait,		\
-    arenas_i(mib[2])->astats->s.d.n_wait_times, uint64_t)		\
-CTL_RO_CGEN(config_stats, stats_arenas_i_##l##_num_spin_acq,		\
-    arenas_i(mib[2])->astats->s.d.n_spin_acquired, uint64_t)		\
-CTL_RO_CGEN(config_stats, stats_arenas_i_##l##_num_owner_switch,	\
-    arenas_i(mib[2])->astats->s.d.n_owner_switches, uint64_t) 		\
-CTL_RO_CGEN(config_stats, stats_arenas_i_##l##_total_wait_time,		\
-    arenas_i(mib[2])->astats->s.d.tot_wait_time, uint64_t)		\
-CTL_RO_CGEN(config_stats, stats_arenas_i_##l##_max_wait_time,		\
-    arenas_i(mib[2])->astats->s.d.max_wait_time, uint64_t)		\
-CTL_RO_CGEN(config_stats, stats_arenas_i_##l##_max_num_thds,		\
-    arenas_i(mib[2])->astats->s.d.max_n_thds, uint64_t)
+#define RO_LOCK_CTL_GEN(n, l)						\
+CTL_RO_CGEN(config_stats, stats_##n##_num_ops,				\
+    l.n_lock_ops, uint64_t)						\
+CTL_RO_CGEN(config_stats, stats_##n##_num_wait,				\
+    l.n_wait_times, uint64_t)						\
+CTL_RO_CGEN(config_stats, stats_##n##_num_spin_acq,			\
+    l.n_spin_acquired, uint64_t)					\
+CTL_RO_CGEN(config_stats, stats_##n##_num_owner_switch,			\
+    l.n_owner_switches, uint64_t) 					\
+CTL_RO_CGEN(config_stats, stats_##n##_total_wait_time,			\
+    l.tot_wait_time, uint64_t)						\
+CTL_RO_CGEN(config_stats, stats_##n##_max_wait_time,			\
+    l.max_wait_time, uint64_t)						\
+CTL_RO_CGEN(config_stats, stats_##n##_max_num_thds,			\
+    l.max_n_thds, uint64_t)
 
+/* Global lock. */
+#define MTX(mutex) RO_LOCK_CTL_GEN(locks_##mutex, ctl_stats->mutex##_mtx_data)
+GLOBAL_PROF_MUTEXES
+#undef MTX
+
+/* arena->bins[j].lock */
+RO_LOCK_CTL_GEN(arenas_i_bins_j_lock,
+    arenas_i(mib[2])->astats->bstats[mib[4]].lock_data)
+
+/* Per arena locks */
 #define ARENAS_ASTATS_LOCK_CTL_GEN(l, d)				\
-    ARENAS_LOCK_CTL_GEN(locks_##l, astats, d)
-
+    RO_LOCK_CTL_GEN(arenas_i_locks_##l, arenas_i(mib[2])->astats->astats.d)
 /* arena->large_mtx */
 ARENAS_ASTATS_LOCK_CTL_GEN(large, large_mtx_data)
 /* arena->extent_freelist_mtx */
@@ -2444,8 +2492,8 @@ ARENAS_ASTATS_LOCK_CTL_GEN(extents_retained, extents_retained_mtx_data)
 ARENAS_ASTATS_LOCK_CTL_GEN(decay, decay_mtx_data)
 /* arena->tcache_ql_mtx */
 ARENAS_ASTATS_LOCK_CTL_GEN(tcache, tcache_mtx_data)
-/* arena->bins[j].lock */
-ARENAS_LOCK_CTL_GEN(bins_j_lock, bstats[mib[4]], lock_data)
+#undef ARENAS_ASTATS_LOCK_CTL_GEN
+#undef RO_LOCK_CTL_GEN
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nmalloc,
     arenas_i(mib[2])->astats->bstats[mib[4]].nmalloc, uint64_t)
diff --git a/src/prof.c b/src/prof.c
index b04984b7..4e83ae3f 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -78,7 +78,8 @@ static malloc_mutex_t	*tdata_locks;
  * structure that knows about all backtraces currently captured.
  */
 static ckh_t		bt2gctx;
-static malloc_mutex_t	bt2gctx_mtx;
+/* Non static to enable profiling. */
+malloc_mutex_t		bt2gctx_mtx;
 
 /*
  * Tree of all extant prof_tdata_t structures, regardless of state,
diff --git a/src/stats.c b/src/stats.c
index 7b690e7f..89ba6693 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -127,6 +127,7 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		CTL_M2_M4_GET("stats.arenas.0.bins.0.curslabs", i, j, &curslabs,
 		    size_t);
 
+		/* Output less info for bin locks to save space. */
 		uint64_t num_ops, num_wait, max_wait;
 		CTL_M2_M4_GET("stats.arenas.0.bins.0.lock.num_wait", i, j,
 		    &num_wait, uint64_t);
@@ -290,8 +291,9 @@ stats_arena_lextents_print(void (*write_cb)(void *, const char *),
 }
 
 static void
-gen_ctl_str(char *str, const char *lock, const char *counter) {
-	sprintf(str, "stats.arenas.0.locks.%s.%s", lock, counter);
+gen_lock_ctl_str(char *str, const char *prefix, const char *lock,
+    const char *counter) {
+	sprintf(str, "stats.%s.%s.%s", prefix, lock, counter);
 }
 
 static void read_arena_lock_stats(unsigned arena_ind,
@@ -301,13 +303,34 @@ static void read_arena_lock_stats(unsigned arena_ind,
 	unsigned i, j;
 	for (i = 0; i < NUM_ARENA_PROF_LOCKS; i++) {
 		for (j = 0; j < NUM_LOCK_PROF_COUNTERS; j++) {
-			gen_ctl_str(cmd, arena_lock_names[i],
-			    lock_counter_names[j]);
+			gen_lock_ctl_str(cmd, "arenas.0.locks",
+			    arena_lock_names[i], lock_counter_names[j]);
 			CTL_M2_GET(cmd, arena_ind, &results[i][j], uint64_t);
 		}
 	}
 }
 
+static void lock_stats_output(void (*write_cb)(void *, const char *),
+    void *cbopaque, const char *name, uint64_t stats[NUM_LOCK_PROF_COUNTERS],
+    bool first_mutex) {
+	if (first_mutex) {
+		/* Print title. */
+		malloc_cprintf(write_cb, cbopaque,
+		    "                           n_lock_ops        n_waiting"
+		    "       n_spin_acq   n_owner_switch    total_wait_ns"
+		    "      max_wait_ns  max_n_wait_thds\n");
+	}
+
+	malloc_cprintf(write_cb, cbopaque, "%s", name);
+	malloc_cprintf(write_cb, cbopaque, ":%*c",
+	    (int)(19 - strlen(name)), ' ');
+
+	for (unsigned i = 0; i < NUM_LOCK_PROF_COUNTERS; i++) {
+		malloc_cprintf(write_cb, cbopaque, " %16"FMTu64, stats[i]);
+	}
+	malloc_cprintf(write_cb, cbopaque, "\n");
+}
+
 static void
 stats_arena_locks_print(void (*write_cb)(void *, const char *),
     void *cbopaque, bool json, unsigned arena_ind) {
@@ -318,23 +341,9 @@ stats_arena_locks_print(void (*write_cb)(void *, const char *),
 	if (json) {
 		//TODO
 	} else {
-		malloc_cprintf(write_cb, cbopaque,
-		    "                         n_lock_ops       n_waiting"
-		    "      n_spin_acq  n_owner_switch   total_wait_ns"
-		    "     max_wait_ns max_n_wait_thds\n");
-
-		unsigned i, j;
-		for (i = 0; i < NUM_ARENA_PROF_LOCKS; i++) {
-			malloc_cprintf(write_cb, cbopaque,
-			    "%s", arena_lock_names[i]);
-			malloc_cprintf(write_cb, cbopaque, ":%*c",
-			    (int)(18 - strlen(arena_lock_names[i])), ' ');
-
-			for (j = 0; j < NUM_LOCK_PROF_COUNTERS; j++) {
-				malloc_cprintf(write_cb, cbopaque, " %15"FMTu64,
-				    lock_stats[i][j]);
-			}
-			malloc_cprintf(write_cb, cbopaque, "\n");
+		for (unsigned i = 0; i < NUM_ARENA_PROF_LOCKS; i++) {
+			lock_stats_output(write_cb, cbopaque,
+			    arena_lock_names[i], lock_stats[i], i == 0);
 		}
 	}
 
@@ -930,6 +939,20 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	}
 }
 
+static void read_global_lock_stats(
+    uint64_t results[NUM_GLOBAL_PROF_LOCKS][NUM_LOCK_PROF_COUNTERS]) {
+	char cmd[128];
+
+	unsigned i, j;
+	for (i = 0; i < NUM_GLOBAL_PROF_LOCKS; i++) {
+		for (j = 0; j < NUM_LOCK_PROF_COUNTERS; j++) {
+			gen_lock_ctl_str(cmd, "locks", global_lock_names[i],
+			    lock_counter_names[j]);
+			CTL_GET(cmd, &results[i][j], uint64_t);
+		}
+	}
+}
+
 static void
 stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
     bool json, bool merged, bool destroyed, bool unmerged, bool bins,
@@ -942,6 +965,10 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 	CTL_GET("stats.resident", &resident, size_t);
 	CTL_GET("stats.mapped", &mapped, size_t);
 	CTL_GET("stats.retained", &retained, size_t);
+
+	uint64_t lock_stats[NUM_GLOBAL_PROF_LOCKS][NUM_LOCK_PROF_COUNTERS];
+	read_global_lock_stats(lock_stats);
+
 	if (json) {
 		malloc_cprintf(write_cb, cbopaque,
 		    "\t\t\"stats\": {\n");
@@ -966,6 +993,11 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 		    "Allocated: %zu, active: %zu, metadata: %zu,"
 		    " resident: %zu, mapped: %zu, retained: %zu\n",
 		    allocated, active, metadata, resident, mapped, retained);
+
+		for (unsigned i = 0; i < NUM_GLOBAL_PROF_LOCKS; i++) {
+			lock_stats_output(write_cb, cbopaque,
+			    global_lock_names[i], lock_stats[i], i == 0);
+		}
 	}
 
 	if (merged || destroyed || unmerged) {

From bd2006a41bc6b7e1ef60994db88b955eb3ab6cbd Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Sun, 12 Mar 2017 01:28:52 -0800
Subject: [PATCH 0735/2608] Added JSON output for lock stats.

Also added option 'x' to malloc_stats() to bypass lock section.
---
 doc/jemalloc.xml.in     |   3 +-
 src/ctl.c               |   4 +-
 src/stats.c             | 154 +++++++++++++++++++++++++++++-----------
 test/unit/stats_print.c |   7 +-
 4 files changed, 124 insertions(+), 44 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 7faa474d..265da86f 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -430,7 +430,8 @@ for (i = 0; i < nbins; i++) {
       can be specified to omit merged arena, destroyed merged arena, and per
       arena statistics, respectively; <quote>b</quote> and <quote>l</quote> can
       be specified to omit per size class statistics for bins and large objects,
-      respectively.  Unrecognized characters are silently ignored.  Note that
+      respectively; <quote>x</quote> can be specified to omit all mutex
+      statistics.  Unrecognized characters are silently ignored.  Note that
       thread caching may prevent some statistics from being completely up to
       date, since extra locking would be required to merge counters that track
       thread cache operations.</para>
diff --git a/src/ctl.c b/src/ctl.c
index c5ef70b5..a880c63a 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -960,7 +960,9 @@ ctl_refresh(tsdn_t *tsdn) {
     malloc_mutex_unlock(tsdn, &mtx);
 
 		READ_GLOBAL_MUTEX_PROF_DATA(b0get()->mtx, base_mtx_data);
-		READ_GLOBAL_MUTEX_PROF_DATA(bt2gctx_mtx, prof_mtx_data);
+		if (config_prof && opt_prof) {
+			READ_GLOBAL_MUTEX_PROF_DATA(bt2gctx_mtx, prof_mtx_data);
+		}
 		/* We own ctl mutex already. */
 		malloc_lock_prof_read(tsdn, &ctl_stats->ctl_mtx_data, &ctl_mtx);
 #undef READ_GLOBAL_MUTEX_PROF_DATA
diff --git a/src/stats.c b/src/stats.c
index 89ba6693..b6f2124a 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -57,6 +57,25 @@ get_rate_str(uint64_t dividend, uint64_t divisor, char str[6]) {
 	return false;
 }
 
+static void
+gen_lock_ctl_str(char *str, const char *prefix, const char *lock,
+    const char *counter) {
+	sprintf(str, "stats.%s.%s.%s", prefix, lock, counter);
+}
+
+static void
+read_arena_bin_lock_stats(unsigned arena_ind, unsigned bin_ind,
+    uint64_t results[NUM_LOCK_PROF_COUNTERS]) {
+	char cmd[128];
+
+	unsigned i;
+	for (i = 0; i < NUM_LOCK_PROF_COUNTERS; i++) {
+		gen_lock_ctl_str(cmd, "arenas.0.bins.0","lock",
+		    lock_counter_names[i]);
+		CTL_M2_M4_GET(cmd, arena_ind, bin_ind, &results[i], uint64_t);
+	}
+}
+
 static void
 stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
     bool json, bool large, unsigned i) {
@@ -127,16 +146,10 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		CTL_M2_M4_GET("stats.arenas.0.bins.0.curslabs", i, j, &curslabs,
 		    size_t);
 
-		/* Output less info for bin locks to save space. */
-		uint64_t num_ops, num_wait, max_wait;
-		CTL_M2_M4_GET("stats.arenas.0.bins.0.lock.num_wait", i, j,
-		    &num_wait, uint64_t);
-		CTL_M2_M4_GET("stats.arenas.0.bins.0.lock.max_wait_time", i, j,
-		    &max_wait, uint64_t);
-		CTL_M2_M4_GET("stats.arenas.0.bins.0.lock.num_ops", i, j,
-		    &num_ops, uint64_t);
-
 		if (json) {
+			uint64_t lock_stats[NUM_LOCK_PROF_COUNTERS];
+			read_arena_bin_lock_stats(i, j, lock_stats);
+
 			malloc_cprintf(write_cb, cbopaque,
 			    "\t\t\t\t\t{\n"
 			    "\t\t\t\t\t\t\"nmalloc\": %"FMTu64",\n"
@@ -156,10 +169,21 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			}
 			malloc_cprintf(write_cb, cbopaque,
 			    "\t\t\t\t\t\t\"nreslabs\": %"FMTu64",\n"
-			    "\t\t\t\t\t\t\"curslabs\": %zu\n"
-			    "\t\t\t\t\t}%s\n",
+			    "\t\t\t\t\t\t\"curslabs\": %zu,\n"
+			    "\t\t\t\t\t\t\"lock\": {\n",
 			    nreslabs,
-			    curslabs,
+			    curslabs);
+
+			for (unsigned k = 0; k < NUM_LOCK_PROF_COUNTERS; k++) {
+				malloc_cprintf(write_cb, cbopaque,
+				    "\t\t\t\t\t\t\t\"%s\": %"FMTu64"%s\n",
+				    lock_counter_names[k], lock_stats[k],
+				    k == NUM_LOCK_PROF_COUNTERS - 1 ? "" : ",");
+			}
+
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t\t\t}\n"
+			    "\t\t\t\t\t}%s\n",
 			    (j + 1 < nbins) ? "," : "");
 		} else if (!in_gap) {
 			size_t availregs = nregs * curslabs;
@@ -183,6 +207,16 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 					not_reached();
 				}
 			}
+			/* Output less info for bin locks to save space. */
+			uint64_t num_ops, num_wait, max_wait;
+			CTL_M2_M4_GET("stats.arenas.0.bins.0.lock.num_wait",
+			    i, j, &num_wait, uint64_t);
+			CTL_M2_M4_GET(
+			    "stats.arenas.0.bins.0.lock.max_wait_time", i, j,
+			    &max_wait, uint64_t);
+			CTL_M2_M4_GET("stats.arenas.0.bins.0.lock.num_ops",
+			    i, j, &num_ops, uint64_t);
+
 			char rate[6];
 			if (get_rate_str(num_wait, num_ops, rate)) {
 				if (num_ops == 0) {
@@ -291,12 +325,7 @@ stats_arena_lextents_print(void (*write_cb)(void *, const char *),
 }
 
 static void
-gen_lock_ctl_str(char *str, const char *prefix, const char *lock,
-    const char *counter) {
-	sprintf(str, "stats.%s.%s.%s", prefix, lock, counter);
-}
-
-static void read_arena_lock_stats(unsigned arena_ind,
+read_arena_lock_stats(unsigned arena_ind,
     uint64_t results[NUM_ARENA_PROF_LOCKS][NUM_LOCK_PROF_COUNTERS]) {
 	char cmd[128];
 
@@ -310,8 +339,24 @@ static void read_arena_lock_stats(unsigned arena_ind,
 	}
 }
 
-static void lock_stats_output(void (*write_cb)(void *, const char *),
-    void *cbopaque, const char *name, uint64_t stats[NUM_LOCK_PROF_COUNTERS],
+static void
+lock_stats_output_json(void (*write_cb)(void *, const char *), void *cbopaque,
+    const char *name, uint64_t stats[NUM_LOCK_PROF_COUNTERS],
+    const char *json_indent, bool last) {
+
+	malloc_cprintf(write_cb, cbopaque, "%s\"%s\": {\n", json_indent, name);
+	for (unsigned i = 0; i < NUM_LOCK_PROF_COUNTERS; i++) {
+		malloc_cprintf(write_cb, cbopaque, "%s\t\"%s\": %"FMTu64"%s\n",
+		    json_indent, lock_counter_names[i], stats[i],
+		    i < (NUM_LOCK_PROF_COUNTERS - 1) ? "," : "");
+	}
+	malloc_cprintf(write_cb, cbopaque, "%s}%s\n", json_indent,
+	    last ? "" : ",");
+}
+
+static void
+lock_stats_output(void (*write_cb)(void *, const char *), void *cbopaque,
+    const char *name, uint64_t stats[NUM_LOCK_PROF_COUNTERS],
     bool first_mutex) {
 	if (first_mutex) {
 		/* Print title. */
@@ -333,25 +378,31 @@ static void lock_stats_output(void (*write_cb)(void *, const char *),
 
 static void
 stats_arena_locks_print(void (*write_cb)(void *, const char *),
-    void *cbopaque, bool json, unsigned arena_ind) {
+    void *cbopaque, bool json, bool json_end, unsigned arena_ind) {
 	uint64_t lock_stats[NUM_ARENA_PROF_LOCKS][NUM_LOCK_PROF_COUNTERS];
 	read_arena_lock_stats(arena_ind, lock_stats);
 
 	/* Output lock stats. */
 	if (json) {
-		//TODO
+		malloc_cprintf(write_cb, cbopaque, "\t\t\t\t\"locks\": {\n");
+		for (unsigned i = 0; i < NUM_ARENA_PROF_LOCKS; i++) {
+			lock_stats_output_json(write_cb, cbopaque,
+			    arena_lock_names[i], lock_stats[i],
+			    "\t\t\t\t\t", (i == NUM_ARENA_PROF_LOCKS - 1));
+		}
+		malloc_cprintf(write_cb, cbopaque, "\t\t\t\t}%s\n",
+		    json_end ? "" : ",");
 	} else {
 		for (unsigned i = 0; i < NUM_ARENA_PROF_LOCKS; i++) {
 			lock_stats_output(write_cb, cbopaque,
 			    arena_lock_names[i], lock_stats[i], i == 0);
 		}
 	}
-
 }
 
 static void
 stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
-    bool json, unsigned i, bool bins, bool large) {
+    bool json, unsigned i, bool bins, bool large, bool lock) {
 	unsigned nthreads;
 	const char *dss;
 	ssize_t dirty_decay_time, muzzy_decay_time;
@@ -573,14 +624,17 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	CTL_M2_GET("stats.arenas.0.resident", i, &resident, size_t);
 	if (json) {
 		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\"resident\": %zu%s\n", resident, (bins || large) ?
-		    "," : "");
+		    "\t\t\t\t\"resident\": %zu%s\n", resident,
+		    (bins || large || lock) ? "," : "");
 	} else {
 		malloc_cprintf(write_cb, cbopaque,
 		    "resident:                %12zu\n", resident);
 	}
 
-	stats_arena_locks_print(write_cb, cbopaque, json, i);
+	if (lock) {
+		stats_arena_locks_print(write_cb, cbopaque, json,
+		    !(bins || large), i);
+	}
 	if (bins) {
 		stats_arena_bins_print(write_cb, cbopaque, json, large, i);
 	}
@@ -956,7 +1010,7 @@ static void read_global_lock_stats(
 static void
 stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
     bool json, bool merged, bool destroyed, bool unmerged, bool bins,
-    bool large) {
+    bool large, bool lock) {
 	size_t allocated, active, metadata, resident, mapped, retained;
 
 	CTL_GET("stats.allocated", &allocated, size_t);
@@ -967,7 +1021,9 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 	CTL_GET("stats.retained", &retained, size_t);
 
 	uint64_t lock_stats[NUM_GLOBAL_PROF_LOCKS][NUM_LOCK_PROF_COUNTERS];
-	read_global_lock_stats(lock_stats);
+	if (lock) {
+		read_global_lock_stats(lock_stats);
+	}
 
 	if (json) {
 		malloc_cprintf(write_cb, cbopaque,
@@ -984,19 +1040,31 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 		malloc_cprintf(write_cb, cbopaque,
 		    "\t\t\t\"mapped\": %zu,\n", mapped);
 		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\"retained\": %zu\n", retained);
+		    "\t\t\t\"retained\": %zu,\n", retained);
+		if (lock) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\"locks\": {\n");
 
+			for (unsigned i = 0; i < NUM_GLOBAL_PROF_LOCKS; i++) {
+				lock_stats_output_json(write_cb, cbopaque,
+				    global_lock_names[i], lock_stats[i],
+				    "\t\t\t\t", i == NUM_GLOBAL_PROF_LOCKS - 1);
+			}
+			malloc_cprintf(write_cb, cbopaque, "\t\t\t}\n");
+		}
 		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t}%s\n", (merged || unmerged) ? "," : "");
+		    "\t\t}%s\n", (merged || unmerged || destroyed) ? "," : "");
 	} else {
 		malloc_cprintf(write_cb, cbopaque,
 		    "Allocated: %zu, active: %zu, metadata: %zu,"
 		    " resident: %zu, mapped: %zu, retained: %zu\n",
 		    allocated, active, metadata, resident, mapped, retained);
-
-		for (unsigned i = 0; i < NUM_GLOBAL_PROF_LOCKS; i++) {
-			lock_stats_output(write_cb, cbopaque,
-			    global_lock_names[i], lock_stats[i], i == 0);
+		if (lock) {
+			for (unsigned i = 0; i < NUM_GLOBAL_PROF_LOCKS; i++) {
+				lock_stats_output(write_cb, cbopaque,
+				    global_lock_names[i], lock_stats[i],
+				    i == 0);
+			}
 		}
 	}
 
@@ -1043,7 +1111,7 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 					    "\nMerged arenas stats:\n");
 				}
 				stats_arena_print(write_cb, cbopaque, json,
-				    MALLCTL_ARENAS_ALL, bins, large);
+				    MALLCTL_ARENAS_ALL, bins, large, lock);
 				if (json) {
 					malloc_cprintf(write_cb, cbopaque,
 					    "\t\t\t}%s\n",
@@ -1064,7 +1132,8 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 					    "\nDestroyed arenas stats:\n");
 				}
 				stats_arena_print(write_cb, cbopaque, json,
-				    MALLCTL_ARENAS_DESTROYED, bins, large);
+				    MALLCTL_ARENAS_DESTROYED, bins, large,
+				    lock);
 				if (json) {
 					malloc_cprintf(write_cb, cbopaque,
 					    "\t\t\t}%s\n", unmerged ?  "," :
@@ -1090,7 +1159,7 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 						}
 						stats_arena_print(write_cb,
 						    cbopaque, json, i, bins,
-						    large);
+						    large, lock);
 						if (json) {
 							malloc_cprintf(write_cb,
 							    cbopaque,
@@ -1123,6 +1192,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	bool unmerged = config_stats;
 	bool bins = true;
 	bool large = true;
+	bool lock = true;
 
 	/*
 	 * Refresh stats, in case mallctl() was called by the application.
@@ -1172,6 +1242,9 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			case 'l':
 				large = false;
 				break;
+			case 'x':
+				lock = false;
+				break;
 			default:;
 			}
 		}
@@ -1187,12 +1260,11 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	}
 
 	if (general) {
-		bool more = (merged || unmerged);
-		stats_general_print(write_cb, cbopaque, json, more);
+		stats_general_print(write_cb, cbopaque, json, config_stats);
 	}
 	if (config_stats) {
 		stats_print_helper(write_cb, cbopaque, json, merged, destroyed,
-		    unmerged, bins, large);
+		    unmerged, bins, large, lock);
 	}
 
 	if (json) {
diff --git a/test/unit/stats_print.c b/test/unit/stats_print.c
index f0437891..81778b04 100644
--- a/test/unit/stats_print.c
+++ b/test/unit/stats_print.c
@@ -938,11 +938,16 @@ TEST_BEGIN(test_stats_print_json) {
 		"Ja",
 		"Jb",
 		"Jl",
+		"Jx",
 		"Jbl",
 		"Jal",
 		"Jab",
 		"Jabl",
-		"Jgmdabl",
+		"Jax",
+		"Jbx",
+		"Jlx",
+		"Jablx",
+		"Jgmdablx",
 	};
 	unsigned arena_ind, i;
 

From 64c5f5c17437ea618a2c1a5fe72814c51c46853e Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 13 Mar 2017 17:29:03 -0700
Subject: [PATCH 0736/2608] Added "stats.mutexes.reset" mallctl to reset all
 mutex stats.

Also switched from the term "lock" to "mutex".
---
 include/jemalloc/internal/ctl_externs.h       |  12 +-
 include/jemalloc/internal/ctl_structs.h       |   4 +-
 include/jemalloc/internal/mutex_externs.h     |   1 +
 include/jemalloc/internal/mutex_inlines.h     |  14 +-
 include/jemalloc/internal/mutex_structs.h     |  20 +-
 include/jemalloc/internal/mutex_types.h       |   2 +-
 include/jemalloc/internal/private_symbols.txt |   1 +
 include/jemalloc/internal/stats_structs.h     |  14 +-
 src/arena.c                                   |   6 +-
 src/ctl.c                                     | 200 +++++++++++-------
 src/mutex.c                                   |  14 +-
 src/stats.c                                   | 147 ++++++-------
 12 files changed, 248 insertions(+), 187 deletions(-)

diff --git a/include/jemalloc/internal/ctl_externs.h b/include/jemalloc/internal/ctl_externs.h
index 33ca2039..1b06dd4d 100644
--- a/include/jemalloc/internal/ctl_externs.h
+++ b/include/jemalloc/internal/ctl_externs.h
@@ -4,13 +4,13 @@
 /* Maximum ctl tree depth. */
 #define CTL_MAX_DEPTH	7
 
-#define NUM_GLOBAL_PROF_LOCKS	3
-#define NUM_ARENA_PROF_LOCKS	6
-#define NUM_LOCK_PROF_COUNTERS	7
+#define NUM_GLOBAL_PROF_MUTEXES	3
+#define NUM_ARENA_PROF_MUTEXES	6
+#define NUM_MUTEX_PROF_COUNTERS	7
 
-extern const char *arena_lock_names[NUM_ARENA_PROF_LOCKS];
-extern const char *global_lock_names[NUM_GLOBAL_PROF_LOCKS];
-extern const char *lock_counter_names[NUM_LOCK_PROF_COUNTERS];
+extern const char *arena_mutex_names[NUM_ARENA_PROF_MUTEXES];
+extern const char *global_mutex_names[NUM_GLOBAL_PROF_MUTEXES];
+extern const char *mutex_counter_names[NUM_MUTEX_PROF_COUNTERS];
 
 int	ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
     void *newp, size_t newlen);
diff --git a/include/jemalloc/internal/ctl_structs.h b/include/jemalloc/internal/ctl_structs.h
index e467a247..b1ee3555 100644
--- a/include/jemalloc/internal/ctl_structs.h
+++ b/include/jemalloc/internal/ctl_structs.h
@@ -42,8 +42,8 @@ struct ctl_stats_s {
 	size_t			mapped;
 	size_t			retained;
 
-#define MTX(mutex) lock_prof_data_t mutex##_mtx_data;
-GLOBAL_PROF_MUTEXES
+#define MTX(mutex) mutex_prof_data_t mutex##_mtx_data;
+	GLOBAL_PROF_MUTEXES
 #undef MTX
 };
 
diff --git a/include/jemalloc/internal/mutex_externs.h b/include/jemalloc/internal/mutex_externs.h
index ba6418ef..5199d3cf 100644
--- a/include/jemalloc/internal/mutex_externs.h
+++ b/include/jemalloc/internal/mutex_externs.h
@@ -14,5 +14,6 @@ void	malloc_mutex_prefork(tsdn_t *tsdn, malloc_mutex_t *mutex);
 void	malloc_mutex_postfork_parent(tsdn_t *tsdn, malloc_mutex_t *mutex);
 void	malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex);
 bool	malloc_mutex_boot(void);
+void	malloc_mutex_prof_data_reset(tsdn_t *tsdn, malloc_mutex_t *mutex);
 
 #endif /* JEMALLOC_INTERNAL_MUTEX_EXTERNS_H */
diff --git a/include/jemalloc/internal/mutex_inlines.h b/include/jemalloc/internal/mutex_inlines.h
index 1020eefd..d4703d23 100644
--- a/include/jemalloc/internal/mutex_inlines.h
+++ b/include/jemalloc/internal/mutex_inlines.h
@@ -9,9 +9,9 @@ bool	malloc_mutex_trylock(malloc_mutex_t *mutex);
 void	malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex);
 void	malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex);
 void	malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex);
-void	malloc_lock_prof_read(tsdn_t *tsdn, lock_prof_data_t *data,
+void	malloc_mutex_prof_read(tsdn_t *tsdn, mutex_prof_data_t *data,
     malloc_mutex_t *mutex);
-void	malloc_lock_prof_merge(lock_prof_data_t *sum, lock_prof_data_t *data);
+void	malloc_mutex_prof_merge(mutex_prof_data_t *sum, mutex_prof_data_t *data);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_))
@@ -28,7 +28,7 @@ malloc_mutex_trylock(malloc_mutex_t *mutex) {
 
 /* Aggregate lock prof data. */
 JEMALLOC_INLINE void
-malloc_lock_prof_merge(lock_prof_data_t *sum, lock_prof_data_t *data) {
+malloc_mutex_prof_merge(mutex_prof_data_t *sum, mutex_prof_data_t *data) {
 	sum->tot_wait_time += data->tot_wait_time;
 	if (data->max_wait_time > sum->max_wait_time) {
 		sum->max_wait_time = data->max_wait_time;
@@ -52,7 +52,7 @@ malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 			malloc_mutex_lock_slow(mutex);
 		}
 		/* We own the lock now.  Update a few counters. */
-		lock_prof_data_t *data = &mutex->prof_data;
+		mutex_prof_data_t *data = &mutex->prof_data;
 		data->n_lock_ops++;
 		if (data->prev_owner != tsdn) {
 			data->prev_owner = tsdn;
@@ -82,10 +82,10 @@ malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 
 /* Copy the prof data from mutex for processing. */
 JEMALLOC_INLINE void
-malloc_lock_prof_read(tsdn_t *tsdn, lock_prof_data_t *data,
+malloc_mutex_prof_read(tsdn_t *tsdn, mutex_prof_data_t *data,
     malloc_mutex_t *mutex) {
-	lock_prof_data_t *source = &mutex->prof_data;
-	/* Can only read with the lock. */
+	mutex_prof_data_t *source = &mutex->prof_data;
+	/* Can only read holding the mutex. */
 	malloc_mutex_assert_owner(tsdn, mutex);
 
 	*data = *source;
diff --git a/include/jemalloc/internal/mutex_structs.h b/include/jemalloc/internal/mutex_structs.h
index ce80e15d..8d6e7eb2 100644
--- a/include/jemalloc/internal/mutex_structs.h
+++ b/include/jemalloc/internal/mutex_structs.h
@@ -1,20 +1,20 @@
 #ifndef JEMALLOC_INTERNAL_MUTEX_STRUCTS_H
 #define JEMALLOC_INTERNAL_MUTEX_STRUCTS_H
 
-struct lock_prof_data_s {
+struct mutex_prof_data_s {
 	/*
 	 * Counters touched on the slow path, i.e. when there is lock
 	 * contention.  We update them once we have the lock.
 	 */
-	/* Total time (in nano seconds) spent waiting on this lock. */
+	/* Total time (in nano seconds) spent waiting on this mutex. */
 	uint64_t		tot_wait_time;
 	/* Max time (in nano seconds) spent on a single lock operation. */
 	uint64_t		max_wait_time;
-	/* # of times have to wait for this lock (after spinning). */
+	/* # of times have to wait for this mutex (after spinning). */
 	uint64_t		n_wait_times;
-	/* # of times acquired the lock through local spinning. */
+	/* # of times acquired the mutex through local spinning. */
 	uint64_t		n_spin_acquired;
-	/* Max # of threads waiting for the lock at the same time. */
+	/* Max # of threads waiting for the mutex at the same time. */
 	uint32_t		max_n_thds;
 	/* Current # of threads waiting on the lock.  Atomic synced. */
 	uint32_t		n_waiting_thds;
@@ -25,9 +25,9 @@ struct lock_prof_data_s {
 	 * the lock) so that we have a higher chance of them being on the same
 	 * cacheline.
 	 */
-	/* # of times the new lock holder is different from the previous one. */
+	/* # of times the mutex holder is different than the previous one. */
 	uint64_t		n_owner_switches;
-	/* Previous lock holder, to facilitate n_owner_switches. */
+	/* Previous mutex holder, to facilitate n_owner_switches. */
 	tsdn_t			*prev_owner;
 	/* # of lock() operations in total. */
 	uint64_t		n_lock_ops;
@@ -38,13 +38,13 @@ struct malloc_mutex_s {
 		struct {
 			/*
 			 * prof_data is defined first to reduce cacheline
-			 * bouncing: the data is not touched by the lock holder
+			 * bouncing: the data is not touched by the mutex holder
 			 * during unlocking, while might be modified by
-			 * contenders.  Having it before the lock itself could
+			 * contenders.  Having it before the mutex itself could
 			 * avoid prefetching a modified cacheline (for the
 			 * unlocking thread).
 			 */
-			lock_prof_data_t	prof_data;
+			mutex_prof_data_t	prof_data;
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 			SRWLOCK         	lock;
diff --git a/include/jemalloc/internal/mutex_types.h b/include/jemalloc/internal/mutex_types.h
index 0d93fe87..257f69ca 100644
--- a/include/jemalloc/internal/mutex_types.h
+++ b/include/jemalloc/internal/mutex_types.h
@@ -1,7 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_MUTEX_TYPES_H
 #define JEMALLOC_INTERNAL_MUTEX_TYPES_H
 
-typedef struct lock_prof_data_s lock_prof_data_t;
+typedef struct mutex_prof_data_s mutex_prof_data_t;
 typedef struct malloc_mutex_s malloc_mutex_t;
 
 #ifdef _WIN32
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 3c5a21b5..1af1f91b 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -269,6 +269,7 @@ lg_floor
 lg_prof_sample
 malloc_cprintf
 malloc_getcpu
+malloc_mutex_prof_data_reset
 malloc_mutex_assert_not_owner
 malloc_mutex_assert_owner
 malloc_mutex_boot
diff --git a/include/jemalloc/internal/stats_structs.h b/include/jemalloc/internal/stats_structs.h
index 0e33394b..601c8512 100644
--- a/include/jemalloc/internal/stats_structs.h
+++ b/include/jemalloc/internal/stats_structs.h
@@ -57,7 +57,7 @@ struct malloc_bin_stats_s {
 	/* Current number of slabs in this bin. */
 	size_t		curslabs;
 
-	lock_prof_data_t lock_data;
+	mutex_prof_data_t mutex_data;
 };
 
 struct malloc_large_stats_s {
@@ -124,12 +124,12 @@ struct arena_stats_s {
 	/* Number of bytes cached in tcache associated with this arena. */
 	atomic_zu_t		tcache_bytes; /* Derived. */
 
-	lock_prof_data_t large_mtx_data;
-	lock_prof_data_t extent_freelist_mtx_data;
-	lock_prof_data_t extents_cached_mtx_data;
-	lock_prof_data_t extents_retained_mtx_data;
-	lock_prof_data_t decay_mtx_data;
-	lock_prof_data_t tcache_mtx_data;
+	mutex_prof_data_t large_mtx_data;
+	mutex_prof_data_t extent_freelist_mtx_data;
+	mutex_prof_data_t extents_cached_mtx_data;
+	mutex_prof_data_t extents_retained_mtx_data;
+	mutex_prof_data_t decay_mtx_data;
+	mutex_prof_data_t tcache_mtx_data;
 
 	/* One element for each large size class. */
 	malloc_large_stats_t	lstats[NSIZES - NBINS];
diff --git a/src/arena.c b/src/arena.c
index 266c85fb..57b79c52 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -292,14 +292,14 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 				    tbin->ncached * index2size(i));
 			}
 		}
-		malloc_lock_prof_read(tsdn, &astats->tcache_mtx_data,
+		malloc_mutex_prof_read(tsdn, &astats->tcache_mtx_data,
 		    &arena->tcache_ql_mtx);
 		malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
 	}
 
 #define READ_ARENA_MUTEX_PROF_DATA(mtx, data)				\
     malloc_mutex_lock(tsdn, &arena->mtx);				\
-    malloc_lock_prof_read(tsdn, &astats->data, &arena->mtx);		\
+    malloc_mutex_prof_read(tsdn, &astats->data, &arena->mtx);		\
     malloc_mutex_unlock(tsdn, &arena->mtx);
 
 	/* Gather per arena mutex profiling data. */
@@ -317,7 +317,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		arena_bin_t *bin = &arena->bins[i];
 
 		malloc_mutex_lock(tsdn, &bin->lock);
-		malloc_lock_prof_read(tsdn, &bstats[i].lock_data, &bin->lock);
+		malloc_mutex_prof_read(tsdn, &bstats[i].mutex_data, &bin->lock);
 		bstats[i].nmalloc += bin->stats.nmalloc;
 		bstats[i].ndalloc += bin->stats.ndalloc;
 		bstats[i].nrequests += bin->stats.nrequests;
diff --git a/src/ctl.c b/src/ctl.c
index a880c63a..b2b9e7d9 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -13,13 +13,13 @@ static bool		ctl_initialized;
 static ctl_stats_t	*ctl_stats;
 static ctl_arenas_t	*ctl_arenas;
 
-const char *global_lock_names[NUM_GLOBAL_PROF_LOCKS] = {
+const char *global_mutex_names[NUM_GLOBAL_PROF_MUTEXES] = {
 	"base",
 	"prof",
 	"ctl"
 };
 
-const char *arena_lock_names[NUM_ARENA_PROF_LOCKS] = {
+const char *arena_mutex_names[NUM_ARENA_PROF_MUTEXES] = {
 	"large",
 	"extent_freelist",
 	"extents_cached",
@@ -28,7 +28,7 @@ const char *arena_lock_names[NUM_ARENA_PROF_LOCKS] = {
 	"tcache"
 };
 
-const char *lock_counter_names[NUM_LOCK_PROF_COUNTERS] = {
+const char *mutex_counter_names[NUM_MUTEX_PROF_COUNTERS] = {
 	"num_ops",
 	"num_wait",
 	"num_spin_acq",
@@ -203,7 +203,7 @@ CTL_PROTO(stats_resident)
 CTL_PROTO(stats_mapped)
 CTL_PROTO(stats_retained)
 
-#define LOCK_STATS_CTL_PROTO_GEN(n)					\
+#define MUTEX_STATS_CTL_PROTO_GEN(n)					\
 CTL_PROTO(stats_##n##_num_ops)						\
 CTL_PROTO(stats_##n##_num_wait)						\
 CTL_PROTO(stats_##n##_num_spin_acq)					\
@@ -212,25 +212,27 @@ CTL_PROTO(stats_##n##_total_wait_time)					\
 CTL_PROTO(stats_##n##_max_wait_time)					\
 CTL_PROTO(stats_##n##_max_num_thds)
 
-/* Global locks. */
-LOCK_STATS_CTL_PROTO_GEN(locks_base)
-LOCK_STATS_CTL_PROTO_GEN(locks_prof)
-LOCK_STATS_CTL_PROTO_GEN(locks_ctl)
+/* Global mutexes. */
+MUTEX_STATS_CTL_PROTO_GEN(mutexes_base)
+MUTEX_STATS_CTL_PROTO_GEN(mutexes_prof)
+MUTEX_STATS_CTL_PROTO_GEN(mutexes_ctl)
 
-/* Arena bin locks. */
-LOCK_STATS_CTL_PROTO_GEN(arenas_i_bins_j_lock)
+/* Arena bin mutexes. */
+MUTEX_STATS_CTL_PROTO_GEN(arenas_i_bins_j_mutex)
 
-#define ARENA_LOCKS_CTL_PROTO_GEN(n)					\
-    LOCK_STATS_CTL_PROTO_GEN(arenas_i_locks_##n)
-/* Per arena locks. */
-ARENA_LOCKS_CTL_PROTO_GEN(large)
-ARENA_LOCKS_CTL_PROTO_GEN(extent_freelist)
-ARENA_LOCKS_CTL_PROTO_GEN(extents_cached)
-ARENA_LOCKS_CTL_PROTO_GEN(extents_retained)
-ARENA_LOCKS_CTL_PROTO_GEN(decay)
-ARENA_LOCKS_CTL_PROTO_GEN(tcache)
-#undef ARENA_LOCKS_CTL_PROTO_GEN
-#undef LOCK_STATS_CTL_PROTO_GEN
+#define ARENA_MUTEXES_CTL_PROTO_GEN(n)					\
+    MUTEX_STATS_CTL_PROTO_GEN(arenas_i_mutexes_##n)
+/* Per arena mutexes. */
+ARENA_MUTEXES_CTL_PROTO_GEN(large)
+ARENA_MUTEXES_CTL_PROTO_GEN(extent_freelist)
+ARENA_MUTEXES_CTL_PROTO_GEN(extents_cached)
+ARENA_MUTEXES_CTL_PROTO_GEN(extents_retained)
+ARENA_MUTEXES_CTL_PROTO_GEN(decay)
+ARENA_MUTEXES_CTL_PROTO_GEN(tcache)
+#undef ARENA_MUTEXES_CTL_PROTO_GEN
+#undef MUTEX_STATS_CTL_PROTO_GEN
+
+CTL_PROTO(stats_mutexes_reset)
 
 /******************************************************************************/
 /* mallctl tree. */
@@ -399,7 +401,7 @@ static const ctl_named_node_t stats_arenas_i_large_node[] = {
 	{NAME("nrequests"),	CTL(stats_arenas_i_large_nrequests)}
 };
 
-#define LOCK_PROF_DATA_NODE(prefix)					\
+#define MUTEX_PROF_DATA_NODE(prefix)					\
 static const ctl_named_node_t stats_##prefix##_node[] = {		\
 	{NAME("num_ops"),						\
 	 CTL(stats_##prefix##_num_ops)},				\
@@ -418,7 +420,7 @@ static const ctl_named_node_t stats_##prefix##_node[] = {		\
 	/* Note that # of current waiting thread not provided. */	\
 };
 
-LOCK_PROF_DATA_NODE(arenas_i_bins_j_lock)
+MUTEX_PROF_DATA_NODE(arenas_i_bins_j_mutex)
 
 static const ctl_named_node_t stats_arenas_i_bins_j_node[] = {
 	{NAME("nmalloc"),	CTL(stats_arenas_i_bins_j_nmalloc)},
@@ -430,7 +432,7 @@ static const ctl_named_node_t stats_arenas_i_bins_j_node[] = {
 	{NAME("nslabs"),	CTL(stats_arenas_i_bins_j_nslabs)},
 	{NAME("nreslabs"),	CTL(stats_arenas_i_bins_j_nreslabs)},
 	{NAME("curslabs"),	CTL(stats_arenas_i_bins_j_curslabs)},
-	{NAME("lock"),		CHILD(named, stats_arenas_i_bins_j_lock)}
+	{NAME("mutex"),		CHILD(named, stats_arenas_i_bins_j_mutex)}
 };
 
 static const ctl_named_node_t super_stats_arenas_i_bins_j_node[] = {
@@ -455,25 +457,25 @@ static const ctl_indexed_node_t stats_arenas_i_lextents_node[] = {
 	{INDEX(stats_arenas_i_lextents_j)}
 };
 
-#define ARENA_LOCK_PROF_DATA_NODE(n) LOCK_PROF_DATA_NODE(arenas_i_locks_##n)
+#define ARENA_MUTEX_PROF_DATA_NODE(n) MUTEX_PROF_DATA_NODE(arenas_i_mutexes_##n)
 
-ARENA_LOCK_PROF_DATA_NODE(large)
-ARENA_LOCK_PROF_DATA_NODE(extent_freelist)
-ARENA_LOCK_PROF_DATA_NODE(extents_cached)
-ARENA_LOCK_PROF_DATA_NODE(extents_retained)
-ARENA_LOCK_PROF_DATA_NODE(decay)
-ARENA_LOCK_PROF_DATA_NODE(tcache)
+ARENA_MUTEX_PROF_DATA_NODE(large)
+ARENA_MUTEX_PROF_DATA_NODE(extent_freelist)
+ARENA_MUTEX_PROF_DATA_NODE(extents_cached)
+ARENA_MUTEX_PROF_DATA_NODE(extents_retained)
+ARENA_MUTEX_PROF_DATA_NODE(decay)
+ARENA_MUTEX_PROF_DATA_NODE(tcache)
 
-static const ctl_named_node_t stats_arenas_i_locks_node[] = {
-	{NAME("large"),		CHILD(named, stats_arenas_i_locks_large)},
+static const ctl_named_node_t stats_arenas_i_mutexes_node[] = {
+	{NAME("large"),		CHILD(named, stats_arenas_i_mutexes_large)},
 	{NAME("extent_freelist"),
-	 CHILD(named, stats_arenas_i_locks_extent_freelist)},
+	 CHILD(named, stats_arenas_i_mutexes_extent_freelist)},
 	{NAME("extents_cached"),
-	 CHILD(named, stats_arenas_i_locks_extents_cached)},
+	 CHILD(named, stats_arenas_i_mutexes_extents_cached)},
 	{NAME("extents_retained"),
-	 CHILD(named, stats_arenas_i_locks_extents_retained)},
-	{NAME("decay"),		CHILD(named, stats_arenas_i_locks_decay)},
-	{NAME("tcache"),	CHILD(named, stats_arenas_i_locks_tcache)}
+	 CHILD(named, stats_arenas_i_mutexes_extents_retained)},
+	{NAME("decay"),		CHILD(named, stats_arenas_i_mutexes_decay)},
+	{NAME("tcache"),	CHILD(named, stats_arenas_i_mutexes_tcache)}
 };
 
 static const ctl_named_node_t stats_arenas_i_node[] = {
@@ -500,7 +502,7 @@ static const ctl_named_node_t stats_arenas_i_node[] = {
 	{NAME("large"),		CHILD(named, stats_arenas_i_large)},
 	{NAME("bins"),		CHILD(indexed, stats_arenas_i_bins)},
 	{NAME("lextents"),	CHILD(indexed, stats_arenas_i_lextents)},
-	{NAME("locks"),		CHILD(named, stats_arenas_i_locks)}
+	{NAME("mutexes"),	CHILD(named, stats_arenas_i_mutexes)}
 };
 static const ctl_named_node_t super_stats_arenas_i_node[] = {
 	{NAME(""),		CHILD(named, stats_arenas_i)}
@@ -510,13 +512,14 @@ static const ctl_indexed_node_t stats_arenas_node[] = {
 	{INDEX(stats_arenas_i)}
 };
 
-LOCK_PROF_DATA_NODE(locks_base)
-LOCK_PROF_DATA_NODE(locks_prof)
-LOCK_PROF_DATA_NODE(locks_ctl)
-static const ctl_named_node_t stats_locks_node[] = {
-	{NAME("base"),		CHILD(named, stats_locks_base)},
-	{NAME("prof"),		CHILD(named, stats_locks_prof)},
-	{NAME("ctl"),		CHILD(named, stats_locks_ctl)}
+MUTEX_PROF_DATA_NODE(mutexes_base)
+MUTEX_PROF_DATA_NODE(mutexes_prof)
+MUTEX_PROF_DATA_NODE(mutexes_ctl)
+static const ctl_named_node_t stats_mutexes_node[] = {
+	{NAME("base"),		CHILD(named, stats_mutexes_base)},
+	{NAME("prof"),		CHILD(named, stats_mutexes_prof)},
+	{NAME("ctl"),		CHILD(named, stats_mutexes_ctl)},
+	{NAME("reset"),		CTL(stats_mutexes_reset)}
 };
 
 static const ctl_named_node_t stats_node[] = {
@@ -526,10 +529,10 @@ static const ctl_named_node_t stats_node[] = {
 	{NAME("resident"),	CTL(stats_resident)},
 	{NAME("mapped"),	CTL(stats_mapped)},
 	{NAME("retained"),	CTL(stats_retained)},
-	{NAME("locks"),		CHILD(named, stats_locks)},
+	{NAME("mutexes"),	CHILD(named, stats_mutexes)},
 	{NAME("arenas"),	CHILD(indexed, stats_arenas)}
 };
-#undef LOCK_PROF_DATA_NODE
+#undef MUTEX_PROF_DATA_NODE
 
 static const ctl_named_node_t	root_node[] = {
 	{NAME("version"),	CTL(version)},
@@ -768,20 +771,20 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 		accum_arena_stats_u64(&sdstats->astats.decay_muzzy.purged,
 		    &astats->astats.decay_muzzy.purged);
 
-		malloc_lock_prof_merge(&(sdstats->astats.large_mtx_data),
+		malloc_mutex_prof_merge(&(sdstats->astats.large_mtx_data),
 		    &(astats->astats.large_mtx_data));
-		malloc_lock_prof_merge(
+		malloc_mutex_prof_merge(
 		    &(sdstats->astats.extent_freelist_mtx_data),
 		    &(astats->astats.extent_freelist_mtx_data));
-		malloc_lock_prof_merge(
+		malloc_mutex_prof_merge(
 		    &(sdstats->astats.extents_cached_mtx_data),
 		    &(astats->astats.extents_cached_mtx_data));
-		malloc_lock_prof_merge(
+		malloc_mutex_prof_merge(
 		    &(sdstats->astats.extents_retained_mtx_data),
 		    &(astats->astats.extents_retained_mtx_data));
-		malloc_lock_prof_merge(&(sdstats->astats.decay_mtx_data),
+		malloc_mutex_prof_merge(&(sdstats->astats.decay_mtx_data),
 		    &(astats->astats.decay_mtx_data));
-		malloc_lock_prof_merge(&(sdstats->astats.tcache_mtx_data),
+		malloc_mutex_prof_merge(&(sdstats->astats.tcache_mtx_data),
 		    &(astats->astats.tcache_mtx_data));
 
 		if (!destroyed) {
@@ -849,8 +852,8 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 			} else {
 				assert(astats->bstats[i].curslabs == 0);
 			}
-			malloc_lock_prof_merge(&sdstats->bstats[i].lock_data,
-			    &astats->bstats[i].lock_data);
+			malloc_mutex_prof_merge(&sdstats->bstats[i].mutex_data,
+			    &astats->bstats[i].mutex_data);
 		}
 
 		for (i = 0; i < NSIZES - NBINS; i++) {
@@ -956,7 +959,7 @@ ctl_refresh(tsdn_t *tsdn) {
 
 #define READ_GLOBAL_MUTEX_PROF_DATA(mtx, data)				\
     malloc_mutex_lock(tsdn, &mtx);					\
-    malloc_lock_prof_read(tsdn, &ctl_stats->data, &mtx);		\
+    malloc_mutex_prof_read(tsdn, &ctl_stats->data, &mtx);		\
     malloc_mutex_unlock(tsdn, &mtx);
 
 		READ_GLOBAL_MUTEX_PROF_DATA(b0get()->mtx, base_mtx_data);
@@ -964,7 +967,7 @@ ctl_refresh(tsdn_t *tsdn) {
 			READ_GLOBAL_MUTEX_PROF_DATA(bt2gctx_mtx, prof_mtx_data);
 		}
 		/* We own ctl mutex already. */
-		malloc_lock_prof_read(tsdn, &ctl_stats->ctl_mtx_data, &ctl_mtx);
+		malloc_mutex_prof_read(tsdn, &ctl_stats->ctl_mtx_data, &ctl_mtx);
 #undef READ_GLOBAL_MUTEX_PROF_DATA
 	}
 	ctl_arenas->epoch++;
@@ -2454,7 +2457,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_large_nrequests,
     uint64_t) /* Intentional. */
 
 /* Lock profiling related APIs below. */
-#define RO_LOCK_CTL_GEN(n, l)						\
+#define RO_MUTEX_CTL_GEN(n, l)						\
 CTL_RO_CGEN(config_stats, stats_##n##_num_ops,				\
     l.n_lock_ops, uint64_t)						\
 CTL_RO_CGEN(config_stats, stats_##n##_num_wait,				\
@@ -2470,32 +2473,81 @@ CTL_RO_CGEN(config_stats, stats_##n##_max_wait_time,			\
 CTL_RO_CGEN(config_stats, stats_##n##_max_num_thds,			\
     l.max_n_thds, uint64_t)
 
-/* Global lock. */
-#define MTX(mutex) RO_LOCK_CTL_GEN(locks_##mutex, ctl_stats->mutex##_mtx_data)
+/* Global mutexes. */
+#define MTX(mutex)							\
+    RO_MUTEX_CTL_GEN(mutexes_##mutex, ctl_stats->mutex##_mtx_data)
 GLOBAL_PROF_MUTEXES
 #undef MTX
 
 /* arena->bins[j].lock */
-RO_LOCK_CTL_GEN(arenas_i_bins_j_lock,
-    arenas_i(mib[2])->astats->bstats[mib[4]].lock_data)
+RO_MUTEX_CTL_GEN(arenas_i_bins_j_mutex,
+    arenas_i(mib[2])->astats->bstats[mib[4]].mutex_data)
 
-/* Per arena locks */
-#define ARENAS_ASTATS_LOCK_CTL_GEN(l, d)				\
-    RO_LOCK_CTL_GEN(arenas_i_locks_##l, arenas_i(mib[2])->astats->astats.d)
+/* Per arena mutexes */
+#define ARENAS_ASTATS_MUTEX_CTL_GEN(l, d)				\
+    RO_MUTEX_CTL_GEN(arenas_i_mutexes_##l, arenas_i(mib[2])->astats->astats.d)
 /* arena->large_mtx */
-ARENAS_ASTATS_LOCK_CTL_GEN(large, large_mtx_data)
+ARENAS_ASTATS_MUTEX_CTL_GEN(large, large_mtx_data)
 /* arena->extent_freelist_mtx */
-ARENAS_ASTATS_LOCK_CTL_GEN(extent_freelist, extent_freelist_mtx_data)
+ARENAS_ASTATS_MUTEX_CTL_GEN(extent_freelist, extent_freelist_mtx_data)
 /* arena->extents_cached.mtx */
-ARENAS_ASTATS_LOCK_CTL_GEN(extents_cached, extents_cached_mtx_data)
+ARENAS_ASTATS_MUTEX_CTL_GEN(extents_cached, extents_cached_mtx_data)
 /* arena->extents_retained.mtx */
-ARENAS_ASTATS_LOCK_CTL_GEN(extents_retained, extents_retained_mtx_data)
+ARENAS_ASTATS_MUTEX_CTL_GEN(extents_retained, extents_retained_mtx_data)
 /* arena->decay.mtx */
-ARENAS_ASTATS_LOCK_CTL_GEN(decay, decay_mtx_data)
+ARENAS_ASTATS_MUTEX_CTL_GEN(decay, decay_mtx_data)
 /* arena->tcache_ql_mtx */
-ARENAS_ASTATS_LOCK_CTL_GEN(tcache, tcache_mtx_data)
-#undef ARENAS_ASTATS_LOCK_CTL_GEN
-#undef RO_LOCK_CTL_GEN
+ARENAS_ASTATS_MUTEX_CTL_GEN(tcache, tcache_mtx_data)
+#undef ARENAS_ASTATS_MUTEX_CTL_GEN
+#undef RO_MUTEX_CTL_GEN
+
+/* Resets all mutex stats, including global, arena and bin mutexes. */
+static int
+stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	if (!config_stats) {
+		return ENOENT;
+	}
+
+	tsdn_t *tsdn = tsd_tsdn(tsd);
+
+#define MUTEX_PROF_RESET(mtx)						\
+    malloc_mutex_lock(tsdn, &mtx);					\
+    malloc_mutex_prof_data_reset(tsdn, &mtx);				\
+    malloc_mutex_unlock(tsdn, &mtx);
+
+	/* Global mutexes: base, prof and ctl. */
+	MUTEX_PROF_RESET(b0get()->mtx);
+	if (config_prof && opt_prof) {
+		MUTEX_PROF_RESET(bt2gctx_mtx);
+	}
+	MUTEX_PROF_RESET(ctl_mtx);
+
+	/* Per arena mutexes. */
+	unsigned n = narenas_total_get();
+
+	for (unsigned i = 0; i < n; i++) {
+		arena_t *arena = arena_get(tsdn, i, false);
+		if (!arena) {
+			continue;
+		}
+		MUTEX_PROF_RESET(arena->large_mtx);
+		MUTEX_PROF_RESET(arena->extent_freelist_mtx);
+		MUTEX_PROF_RESET(arena->extents_cached.mtx);
+		MUTEX_PROF_RESET(arena->extents_retained.mtx);
+		MUTEX_PROF_RESET(arena->decay.mtx);
+		if (config_tcache) {
+			MUTEX_PROF_RESET(arena->tcache_ql_mtx);
+		}
+
+		for (szind_t i = 0; i < NBINS; i++) {
+			arena_bin_t *bin = &arena->bins[i];
+			MUTEX_PROF_RESET(bin->lock);
+		}
+	}
+#undef MUTEX_PROF_RESET
+	return 0;
+}
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nmalloc,
     arenas_i(mib[2])->astats->bstats[mib[4]].nmalloc, uint64_t)
diff --git a/src/mutex.c b/src/mutex.c
index af6f3c19..82a5fa3e 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -67,7 +67,7 @@ JEMALLOC_EXPORT int	_pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
 
 void
 malloc_mutex_lock_slow(malloc_mutex_t *mutex) {
-	lock_prof_data_t *data = &mutex->prof_data;
+	mutex_prof_data_t *data = &mutex->prof_data;
 
 	{//TODO: a smart spin policy
 		if (!malloc_mutex_trylock(mutex)) {
@@ -108,15 +108,21 @@ malloc_mutex_lock_slow(malloc_mutex_t *mutex) {
 }
 
 static void
-lock_prof_data_init(lock_prof_data_t *data) {
-	memset(data, 0, sizeof(lock_prof_data_t));
+mutex_prof_data_init(mutex_prof_data_t *data) {
+	memset(data, 0, sizeof(mutex_prof_data_t));
 	data->prev_owner = NULL;
 }
 
+void
+malloc_mutex_prof_data_reset(tsdn_t *tsdn, malloc_mutex_t *mutex) {
+	malloc_mutex_assert_owner(tsdn, mutex);
+	mutex_prof_data_init(&mutex->prof_data);
+}
+
 bool
 malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
     witness_rank_t rank) {
-	lock_prof_data_init(&mutex->prof_data);
+	mutex_prof_data_init(&mutex->prof_data);
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 	InitializeSRWLock(&mutex->lock);
diff --git a/src/stats.c b/src/stats.c
index b6f2124a..ada95e92 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -58,20 +58,20 @@ get_rate_str(uint64_t dividend, uint64_t divisor, char str[6]) {
 }
 
 static void
-gen_lock_ctl_str(char *str, const char *prefix, const char *lock,
+gen_mutex_ctl_str(char *str, const char *prefix, const char *mutex,
     const char *counter) {
-	sprintf(str, "stats.%s.%s.%s", prefix, lock, counter);
+	malloc_snprintf(str, 128, "stats.%s.%s.%s", prefix, mutex, counter);
 }
 
 static void
-read_arena_bin_lock_stats(unsigned arena_ind, unsigned bin_ind,
-    uint64_t results[NUM_LOCK_PROF_COUNTERS]) {
+read_arena_bin_mutex_stats(unsigned arena_ind, unsigned bin_ind,
+    uint64_t results[NUM_MUTEX_PROF_COUNTERS]) {
 	char cmd[128];
 
 	unsigned i;
-	for (i = 0; i < NUM_LOCK_PROF_COUNTERS; i++) {
-		gen_lock_ctl_str(cmd, "arenas.0.bins.0","lock",
-		    lock_counter_names[i]);
+	for (i = 0; i < NUM_MUTEX_PROF_COUNTERS; i++) {
+		gen_mutex_ctl_str(cmd, "arenas.0.bins.0","mutex",
+		    mutex_counter_names[i]);
 		CTL_M2_M4_GET(cmd, arena_ind, bin_ind, &results[i], uint64_t);
 	}
 }
@@ -147,8 +147,8 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    size_t);
 
 		if (json) {
-			uint64_t lock_stats[NUM_LOCK_PROF_COUNTERS];
-			read_arena_bin_lock_stats(i, j, lock_stats);
+			uint64_t mutex_stats[NUM_MUTEX_PROF_COUNTERS];
+			read_arena_bin_mutex_stats(i, j, mutex_stats);
 
 			malloc_cprintf(write_cb, cbopaque,
 			    "\t\t\t\t\t{\n"
@@ -170,15 +170,15 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			malloc_cprintf(write_cb, cbopaque,
 			    "\t\t\t\t\t\t\"nreslabs\": %"FMTu64",\n"
 			    "\t\t\t\t\t\t\"curslabs\": %zu,\n"
-			    "\t\t\t\t\t\t\"lock\": {\n",
+			    "\t\t\t\t\t\t\"mutex\": {\n",
 			    nreslabs,
 			    curslabs);
 
-			for (unsigned k = 0; k < NUM_LOCK_PROF_COUNTERS; k++) {
+			for (unsigned k = 0; k < NUM_MUTEX_PROF_COUNTERS; k++) {
 				malloc_cprintf(write_cb, cbopaque,
 				    "\t\t\t\t\t\t\t\"%s\": %"FMTu64"%s\n",
-				    lock_counter_names[k], lock_stats[k],
-				    k == NUM_LOCK_PROF_COUNTERS - 1 ? "" : ",");
+				    mutex_counter_names[k], mutex_stats[k],
+				    k == NUM_MUTEX_PROF_COUNTERS - 1 ? "" : ",");
 			}
 
 			malloc_cprintf(write_cb, cbopaque,
@@ -207,14 +207,14 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 					not_reached();
 				}
 			}
-			/* Output less info for bin locks to save space. */
+			/* Output less info for bin mutexes to save space. */
 			uint64_t num_ops, num_wait, max_wait;
-			CTL_M2_M4_GET("stats.arenas.0.bins.0.lock.num_wait",
+			CTL_M2_M4_GET("stats.arenas.0.bins.0.mutex.num_wait",
 			    i, j, &num_wait, uint64_t);
 			CTL_M2_M4_GET(
-			    "stats.arenas.0.bins.0.lock.max_wait_time", i, j,
+			    "stats.arenas.0.bins.0.mutex.max_wait_time", i, j,
 			    &max_wait, uint64_t);
-			CTL_M2_M4_GET("stats.arenas.0.bins.0.lock.num_ops",
+			CTL_M2_M4_GET("stats.arenas.0.bins.0.mutex.num_ops",
 			    i, j, &num_ops, uint64_t);
 
 			char rate[6];
@@ -325,38 +325,38 @@ stats_arena_lextents_print(void (*write_cb)(void *, const char *),
 }
 
 static void
-read_arena_lock_stats(unsigned arena_ind,
-    uint64_t results[NUM_ARENA_PROF_LOCKS][NUM_LOCK_PROF_COUNTERS]) {
+read_arena_mutex_stats(unsigned arena_ind,
+    uint64_t results[NUM_ARENA_PROF_MUTEXES][NUM_MUTEX_PROF_COUNTERS]) {
 	char cmd[128];
 
 	unsigned i, j;
-	for (i = 0; i < NUM_ARENA_PROF_LOCKS; i++) {
-		for (j = 0; j < NUM_LOCK_PROF_COUNTERS; j++) {
-			gen_lock_ctl_str(cmd, "arenas.0.locks",
-			    arena_lock_names[i], lock_counter_names[j]);
+	for (i = 0; i < NUM_ARENA_PROF_MUTEXES; i++) {
+		for (j = 0; j < NUM_MUTEX_PROF_COUNTERS; j++) {
+			gen_mutex_ctl_str(cmd, "arenas.0.mutexes",
+			    arena_mutex_names[i], mutex_counter_names[j]);
 			CTL_M2_GET(cmd, arena_ind, &results[i][j], uint64_t);
 		}
 	}
 }
 
 static void
-lock_stats_output_json(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *name, uint64_t stats[NUM_LOCK_PROF_COUNTERS],
+mutex_stats_output_json(void (*write_cb)(void *, const char *), void *cbopaque,
+    const char *name, uint64_t stats[NUM_MUTEX_PROF_COUNTERS],
     const char *json_indent, bool last) {
 
 	malloc_cprintf(write_cb, cbopaque, "%s\"%s\": {\n", json_indent, name);
-	for (unsigned i = 0; i < NUM_LOCK_PROF_COUNTERS; i++) {
+	for (unsigned i = 0; i < NUM_MUTEX_PROF_COUNTERS; i++) {
 		malloc_cprintf(write_cb, cbopaque, "%s\t\"%s\": %"FMTu64"%s\n",
-		    json_indent, lock_counter_names[i], stats[i],
-		    i < (NUM_LOCK_PROF_COUNTERS - 1) ? "," : "");
+		    json_indent, mutex_counter_names[i], stats[i],
+		    i < (NUM_MUTEX_PROF_COUNTERS - 1) ? "," : "");
 	}
 	malloc_cprintf(write_cb, cbopaque, "%s}%s\n", json_indent,
 	    last ? "" : ",");
 }
 
 static void
-lock_stats_output(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *name, uint64_t stats[NUM_LOCK_PROF_COUNTERS],
+mutex_stats_output(void (*write_cb)(void *, const char *), void *cbopaque,
+    const char *name, uint64_t stats[NUM_MUTEX_PROF_COUNTERS],
     bool first_mutex) {
 	if (first_mutex) {
 		/* Print title. */
@@ -370,39 +370,39 @@ lock_stats_output(void (*write_cb)(void *, const char *), void *cbopaque,
 	malloc_cprintf(write_cb, cbopaque, ":%*c",
 	    (int)(19 - strlen(name)), ' ');
 
-	for (unsigned i = 0; i < NUM_LOCK_PROF_COUNTERS; i++) {
+	for (unsigned i = 0; i < NUM_MUTEX_PROF_COUNTERS; i++) {
 		malloc_cprintf(write_cb, cbopaque, " %16"FMTu64, stats[i]);
 	}
 	malloc_cprintf(write_cb, cbopaque, "\n");
 }
 
 static void
-stats_arena_locks_print(void (*write_cb)(void *, const char *),
+stats_arena_mutexes_print(void (*write_cb)(void *, const char *),
     void *cbopaque, bool json, bool json_end, unsigned arena_ind) {
-	uint64_t lock_stats[NUM_ARENA_PROF_LOCKS][NUM_LOCK_PROF_COUNTERS];
-	read_arena_lock_stats(arena_ind, lock_stats);
+	uint64_t mutex_stats[NUM_ARENA_PROF_MUTEXES][NUM_MUTEX_PROF_COUNTERS];
+	read_arena_mutex_stats(arena_ind, mutex_stats);
 
-	/* Output lock stats. */
+	/* Output mutex stats. */
 	if (json) {
-		malloc_cprintf(write_cb, cbopaque, "\t\t\t\t\"locks\": {\n");
-		for (unsigned i = 0; i < NUM_ARENA_PROF_LOCKS; i++) {
-			lock_stats_output_json(write_cb, cbopaque,
-			    arena_lock_names[i], lock_stats[i],
-			    "\t\t\t\t\t", (i == NUM_ARENA_PROF_LOCKS - 1));
+		malloc_cprintf(write_cb, cbopaque, "\t\t\t\t\"mutexes\": {\n");
+		for (unsigned i = 0; i < NUM_ARENA_PROF_MUTEXES; i++) {
+			mutex_stats_output_json(write_cb, cbopaque,
+			    arena_mutex_names[i], mutex_stats[i],
+			    "\t\t\t\t\t", (i == NUM_ARENA_PROF_MUTEXES - 1));
 		}
 		malloc_cprintf(write_cb, cbopaque, "\t\t\t\t}%s\n",
 		    json_end ? "" : ",");
 	} else {
-		for (unsigned i = 0; i < NUM_ARENA_PROF_LOCKS; i++) {
-			lock_stats_output(write_cb, cbopaque,
-			    arena_lock_names[i], lock_stats[i], i == 0);
+		for (unsigned i = 0; i < NUM_ARENA_PROF_MUTEXES; i++) {
+			mutex_stats_output(write_cb, cbopaque,
+			    arena_mutex_names[i], mutex_stats[i], i == 0);
 		}
 	}
 }
 
 static void
 stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
-    bool json, unsigned i, bool bins, bool large, bool lock) {
+    bool json, unsigned i, bool bins, bool large, bool mutex) {
 	unsigned nthreads;
 	const char *dss;
 	ssize_t dirty_decay_time, muzzy_decay_time;
@@ -625,14 +625,14 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	if (json) {
 		malloc_cprintf(write_cb, cbopaque,
 		    "\t\t\t\t\"resident\": %zu%s\n", resident,
-		    (bins || large || lock) ? "," : "");
+		    (bins || large || mutex) ? "," : "");
 	} else {
 		malloc_cprintf(write_cb, cbopaque,
 		    "resident:                %12zu\n", resident);
 	}
 
-	if (lock) {
-		stats_arena_locks_print(write_cb, cbopaque, json,
+	if (mutex) {
+		stats_arena_mutexes_print(write_cb, cbopaque, json,
 		    !(bins || large), i);
 	}
 	if (bins) {
@@ -993,15 +993,16 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	}
 }
 
-static void read_global_lock_stats(
-    uint64_t results[NUM_GLOBAL_PROF_LOCKS][NUM_LOCK_PROF_COUNTERS]) {
+static void
+read_global_mutex_stats(
+    uint64_t results[NUM_GLOBAL_PROF_MUTEXES][NUM_MUTEX_PROF_COUNTERS]) {
 	char cmd[128];
 
 	unsigned i, j;
-	for (i = 0; i < NUM_GLOBAL_PROF_LOCKS; i++) {
-		for (j = 0; j < NUM_LOCK_PROF_COUNTERS; j++) {
-			gen_lock_ctl_str(cmd, "locks", global_lock_names[i],
-			    lock_counter_names[j]);
+	for (i = 0; i < NUM_GLOBAL_PROF_MUTEXES; i++) {
+		for (j = 0; j < NUM_MUTEX_PROF_COUNTERS; j++) {
+			gen_mutex_ctl_str(cmd, "mutexes", global_mutex_names[i],
+			    mutex_counter_names[j]);
 			CTL_GET(cmd, &results[i][j], uint64_t);
 		}
 	}
@@ -1010,7 +1011,7 @@ static void read_global_lock_stats(
 static void
 stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
     bool json, bool merged, bool destroyed, bool unmerged, bool bins,
-    bool large, bool lock) {
+    bool large, bool mutex) {
 	size_t allocated, active, metadata, resident, mapped, retained;
 
 	CTL_GET("stats.allocated", &allocated, size_t);
@@ -1020,9 +1021,9 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 	CTL_GET("stats.mapped", &mapped, size_t);
 	CTL_GET("stats.retained", &retained, size_t);
 
-	uint64_t lock_stats[NUM_GLOBAL_PROF_LOCKS][NUM_LOCK_PROF_COUNTERS];
-	if (lock) {
-		read_global_lock_stats(lock_stats);
+	uint64_t mutex_stats[NUM_GLOBAL_PROF_MUTEXES][NUM_MUTEX_PROF_COUNTERS];
+	if (mutex) {
+		read_global_mutex_stats(mutex_stats);
 	}
 
 	if (json) {
@@ -1041,14 +1042,14 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 		    "\t\t\t\"mapped\": %zu,\n", mapped);
 		malloc_cprintf(write_cb, cbopaque,
 		    "\t\t\t\"retained\": %zu,\n", retained);
-		if (lock) {
+		if (mutex) {
 			malloc_cprintf(write_cb, cbopaque,
-			    "\t\t\t\"locks\": {\n");
+			    "\t\t\t\"mutexes\": {\n");
 
-			for (unsigned i = 0; i < NUM_GLOBAL_PROF_LOCKS; i++) {
-				lock_stats_output_json(write_cb, cbopaque,
-				    global_lock_names[i], lock_stats[i],
-				    "\t\t\t\t", i == NUM_GLOBAL_PROF_LOCKS - 1);
+			for (unsigned i = 0; i < NUM_GLOBAL_PROF_MUTEXES; i++) {
+				mutex_stats_output_json(write_cb, cbopaque,
+				    global_mutex_names[i], mutex_stats[i],
+				    "\t\t\t\t", i == NUM_GLOBAL_PROF_MUTEXES - 1);
 			}
 			malloc_cprintf(write_cb, cbopaque, "\t\t\t}\n");
 		}
@@ -1059,10 +1060,10 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 		    "Allocated: %zu, active: %zu, metadata: %zu,"
 		    " resident: %zu, mapped: %zu, retained: %zu\n",
 		    allocated, active, metadata, resident, mapped, retained);
-		if (lock) {
-			for (unsigned i = 0; i < NUM_GLOBAL_PROF_LOCKS; i++) {
-				lock_stats_output(write_cb, cbopaque,
-				    global_lock_names[i], lock_stats[i],
+		if (mutex) {
+			for (unsigned i = 0; i < NUM_GLOBAL_PROF_MUTEXES; i++) {
+				mutex_stats_output(write_cb, cbopaque,
+				    global_mutex_names[i], mutex_stats[i],
 				    i == 0);
 			}
 		}
@@ -1111,7 +1112,7 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 					    "\nMerged arenas stats:\n");
 				}
 				stats_arena_print(write_cb, cbopaque, json,
-				    MALLCTL_ARENAS_ALL, bins, large, lock);
+				    MALLCTL_ARENAS_ALL, bins, large, mutex);
 				if (json) {
 					malloc_cprintf(write_cb, cbopaque,
 					    "\t\t\t}%s\n",
@@ -1133,7 +1134,7 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 				}
 				stats_arena_print(write_cb, cbopaque, json,
 				    MALLCTL_ARENAS_DESTROYED, bins, large,
-				    lock);
+				    mutex);
 				if (json) {
 					malloc_cprintf(write_cb, cbopaque,
 					    "\t\t\t}%s\n", unmerged ?  "," :
@@ -1159,7 +1160,7 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 						}
 						stats_arena_print(write_cb,
 						    cbopaque, json, i, bins,
-						    large, lock);
+						    large, mutex);
 						if (json) {
 							malloc_cprintf(write_cb,
 							    cbopaque,
@@ -1192,7 +1193,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	bool unmerged = config_stats;
 	bool bins = true;
 	bool large = true;
-	bool lock = true;
+	bool mutex = true;
 
 	/*
 	 * Refresh stats, in case mallctl() was called by the application.
@@ -1243,7 +1244,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 				large = false;
 				break;
 			case 'x':
-				lock = false;
+				mutex = false;
 				break;
 			default:;
 			}
@@ -1264,7 +1265,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	}
 	if (config_stats) {
 		stats_print_helper(write_cb, cbopaque, json, merged, destroyed,
-		    unmerged, bins, large, lock);
+		    unmerged, bins, large, mutex);
 	}
 
 	if (json) {

From 20b8c70e9f0177d3276504ec5e3f631e1b69df87 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 15 Mar 2017 14:00:57 -0700
Subject: [PATCH 0737/2608] Added extents_dirty / _muzzy mutexes, as well as
 decay_dirty / _muzzy.

---
 include/jemalloc/internal/ctl_externs.h   |  2 +-
 include/jemalloc/internal/stats_structs.h |  8 ++-
 src/arena.c                               | 11 +--
 src/ctl.c                                 | 81 ++++++++++++++---------
 4 files changed, 61 insertions(+), 41 deletions(-)

diff --git a/include/jemalloc/internal/ctl_externs.h b/include/jemalloc/internal/ctl_externs.h
index 1b06dd4d..3eddba34 100644
--- a/include/jemalloc/internal/ctl_externs.h
+++ b/include/jemalloc/internal/ctl_externs.h
@@ -5,7 +5,7 @@
 #define CTL_MAX_DEPTH	7
 
 #define NUM_GLOBAL_PROF_MUTEXES	3
-#define NUM_ARENA_PROF_MUTEXES	6
+#define NUM_ARENA_PROF_MUTEXES	8
 #define NUM_MUTEX_PROF_COUNTERS	7
 
 extern const char *arena_mutex_names[NUM_ARENA_PROF_MUTEXES];
diff --git a/include/jemalloc/internal/stats_structs.h b/include/jemalloc/internal/stats_structs.h
index 601c8512..5a9624fb 100644
--- a/include/jemalloc/internal/stats_structs.h
+++ b/include/jemalloc/internal/stats_structs.h
@@ -126,10 +126,12 @@ struct arena_stats_s {
 
 	mutex_prof_data_t large_mtx_data;
 	mutex_prof_data_t extent_freelist_mtx_data;
-	mutex_prof_data_t extents_cached_mtx_data;
+	mutex_prof_data_t extents_dirty_mtx_data;
+	mutex_prof_data_t extents_muzzy_mtx_data;
 	mutex_prof_data_t extents_retained_mtx_data;
-	mutex_prof_data_t decay_mtx_data;
-	mutex_prof_data_t tcache_mtx_data;
+	mutex_prof_data_t decay_dirty_mtx_data;
+	mutex_prof_data_t decay_muzzy_mtx_data;
+	mutex_prof_data_t tcache_list_mtx_data;
 
 	/* One element for each large size class. */
 	malloc_large_stats_t	lstats[NSIZES - NBINS];
diff --git a/src/arena.c b/src/arena.c
index 57b79c52..6bb67338 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -292,7 +292,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 				    tbin->ncached * index2size(i));
 			}
 		}
-		malloc_mutex_prof_read(tsdn, &astats->tcache_mtx_data,
+		malloc_mutex_prof_read(tsdn, &astats->tcache_list_mtx_data,
 		    &arena->tcache_ql_mtx);
 		malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
 	}
@@ -306,11 +306,14 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	READ_ARENA_MUTEX_PROF_DATA(large_mtx, large_mtx_data)
 	READ_ARENA_MUTEX_PROF_DATA(extent_freelist_mtx,
 	    extent_freelist_mtx_data)
-	READ_ARENA_MUTEX_PROF_DATA(extents_cached.mtx,
-	    extents_cached_mtx_data)
+	READ_ARENA_MUTEX_PROF_DATA(extents_dirty.mtx,
+	    extents_dirty_mtx_data)
+	READ_ARENA_MUTEX_PROF_DATA(extents_muzzy.mtx,
+	    extents_muzzy_mtx_data)
 	READ_ARENA_MUTEX_PROF_DATA(extents_retained.mtx,
 	    extents_retained_mtx_data)
-	READ_ARENA_MUTEX_PROF_DATA(decay.mtx, decay_mtx_data)
+	READ_ARENA_MUTEX_PROF_DATA(decay_dirty.mtx, decay_dirty_mtx_data)
+	READ_ARENA_MUTEX_PROF_DATA(decay_muzzy.mtx, decay_muzzy_mtx_data)
 #undef READ_ARENA_MUTEX_PROF_DATA
 
 	for (szind_t i = 0; i < NBINS; i++) {
diff --git a/src/ctl.c b/src/ctl.c
index b2b9e7d9..b6b031c3 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -22,10 +22,12 @@ const char *global_mutex_names[NUM_GLOBAL_PROF_MUTEXES] = {
 const char *arena_mutex_names[NUM_ARENA_PROF_MUTEXES] = {
 	"large",
 	"extent_freelist",
-	"extents_cached",
+	"extents_dirty",
+	"extents_muzzy",
 	"extents_retained",
-	"decay",
-	"tcache"
+	"decay_dirty",
+	"decay_muzzy",
+	"tcache_list"
 };
 
 const char *mutex_counter_names[NUM_MUTEX_PROF_COUNTERS] = {
@@ -225,10 +227,12 @@ MUTEX_STATS_CTL_PROTO_GEN(arenas_i_bins_j_mutex)
 /* Per arena mutexes. */
 ARENA_MUTEXES_CTL_PROTO_GEN(large)
 ARENA_MUTEXES_CTL_PROTO_GEN(extent_freelist)
-ARENA_MUTEXES_CTL_PROTO_GEN(extents_cached)
+ARENA_MUTEXES_CTL_PROTO_GEN(extents_dirty)
+ARENA_MUTEXES_CTL_PROTO_GEN(extents_muzzy)
 ARENA_MUTEXES_CTL_PROTO_GEN(extents_retained)
-ARENA_MUTEXES_CTL_PROTO_GEN(decay)
-ARENA_MUTEXES_CTL_PROTO_GEN(tcache)
+ARENA_MUTEXES_CTL_PROTO_GEN(decay_dirty)
+ARENA_MUTEXES_CTL_PROTO_GEN(decay_muzzy)
+ARENA_MUTEXES_CTL_PROTO_GEN(tcache_list)
 #undef ARENA_MUTEXES_CTL_PROTO_GEN
 #undef MUTEX_STATS_CTL_PROTO_GEN
 
@@ -461,21 +465,30 @@ static const ctl_indexed_node_t stats_arenas_i_lextents_node[] = {
 
 ARENA_MUTEX_PROF_DATA_NODE(large)
 ARENA_MUTEX_PROF_DATA_NODE(extent_freelist)
-ARENA_MUTEX_PROF_DATA_NODE(extents_cached)
+ARENA_MUTEX_PROF_DATA_NODE(extents_dirty)
+ARENA_MUTEX_PROF_DATA_NODE(extents_muzzy)
 ARENA_MUTEX_PROF_DATA_NODE(extents_retained)
-ARENA_MUTEX_PROF_DATA_NODE(decay)
-ARENA_MUTEX_PROF_DATA_NODE(tcache)
+ARENA_MUTEX_PROF_DATA_NODE(decay_dirty)
+ARENA_MUTEX_PROF_DATA_NODE(decay_muzzy)
+ARENA_MUTEX_PROF_DATA_NODE(tcache_list)
 
 static const ctl_named_node_t stats_arenas_i_mutexes_node[] = {
-	{NAME("large"),		CHILD(named, stats_arenas_i_mutexes_large)},
+	{NAME("large"),
+	 CHILD(named, stats_arenas_i_mutexes_large)},
 	{NAME("extent_freelist"),
 	 CHILD(named, stats_arenas_i_mutexes_extent_freelist)},
-	{NAME("extents_cached"),
-	 CHILD(named, stats_arenas_i_mutexes_extents_cached)},
+	{NAME("extents_dirty"),
+	 CHILD(named, stats_arenas_i_mutexes_extents_dirty)},
+	{NAME("extents_muzzy"),
+	 CHILD(named, stats_arenas_i_mutexes_extents_muzzy)},
 	{NAME("extents_retained"),
 	 CHILD(named, stats_arenas_i_mutexes_extents_retained)},
-	{NAME("decay"),		CHILD(named, stats_arenas_i_mutexes_decay)},
-	{NAME("tcache"),	CHILD(named, stats_arenas_i_mutexes_tcache)}
+	{NAME("decay_dirty"),
+	 CHILD(named, stats_arenas_i_mutexes_decay_dirty)},
+	{NAME("decay_muzzy"),
+	 CHILD(named, stats_arenas_i_mutexes_decay_muzzy)},
+	{NAME("tcache_list"),
+	 CHILD(named, stats_arenas_i_mutexes_tcache_list)}
 };
 
 static const ctl_named_node_t stats_arenas_i_node[] = {
@@ -777,15 +790,20 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 		    &(sdstats->astats.extent_freelist_mtx_data),
 		    &(astats->astats.extent_freelist_mtx_data));
 		malloc_mutex_prof_merge(
-		    &(sdstats->astats.extents_cached_mtx_data),
-		    &(astats->astats.extents_cached_mtx_data));
+		    &(sdstats->astats.extents_dirty_mtx_data),
+		    &(astats->astats.extents_dirty_mtx_data));
+		malloc_mutex_prof_merge(
+		    &(sdstats->astats.extents_muzzy_mtx_data),
+		    &(astats->astats.extents_muzzy_mtx_data));
 		malloc_mutex_prof_merge(
 		    &(sdstats->astats.extents_retained_mtx_data),
 		    &(astats->astats.extents_retained_mtx_data));
-		malloc_mutex_prof_merge(&(sdstats->astats.decay_mtx_data),
-		    &(astats->astats.decay_mtx_data));
-		malloc_mutex_prof_merge(&(sdstats->astats.tcache_mtx_data),
-		    &(astats->astats.tcache_mtx_data));
+		malloc_mutex_prof_merge(&(sdstats->astats.decay_dirty_mtx_data),
+		    &(astats->astats.decay_dirty_mtx_data));
+		malloc_mutex_prof_merge(&(sdstats->astats.decay_muzzy_mtx_data),
+		    &(astats->astats.decay_muzzy_mtx_data));
+		malloc_mutex_prof_merge(&(sdstats->astats.tcache_list_mtx_data),
+		    &(astats->astats.tcache_list_mtx_data));
 
 		if (!destroyed) {
 			accum_atomic_zu(&sdstats->astats.base,
@@ -2479,25 +2497,20 @@ CTL_RO_CGEN(config_stats, stats_##n##_max_num_thds,			\
 GLOBAL_PROF_MUTEXES
 #undef MTX
 
-/* arena->bins[j].lock */
+/* tcache bin mutex */
 RO_MUTEX_CTL_GEN(arenas_i_bins_j_mutex,
     arenas_i(mib[2])->astats->bstats[mib[4]].mutex_data)
-
 /* Per arena mutexes */
 #define ARENAS_ASTATS_MUTEX_CTL_GEN(l, d)				\
     RO_MUTEX_CTL_GEN(arenas_i_mutexes_##l, arenas_i(mib[2])->astats->astats.d)
-/* arena->large_mtx */
 ARENAS_ASTATS_MUTEX_CTL_GEN(large, large_mtx_data)
-/* arena->extent_freelist_mtx */
 ARENAS_ASTATS_MUTEX_CTL_GEN(extent_freelist, extent_freelist_mtx_data)
-/* arena->extents_cached.mtx */
-ARENAS_ASTATS_MUTEX_CTL_GEN(extents_cached, extents_cached_mtx_data)
-/* arena->extents_retained.mtx */
+ARENAS_ASTATS_MUTEX_CTL_GEN(extents_dirty, extents_dirty_mtx_data)
+ARENAS_ASTATS_MUTEX_CTL_GEN(extents_muzzy, extents_muzzy_mtx_data)
 ARENAS_ASTATS_MUTEX_CTL_GEN(extents_retained, extents_retained_mtx_data)
-/* arena->decay.mtx */
-ARENAS_ASTATS_MUTEX_CTL_GEN(decay, decay_mtx_data)
-/* arena->tcache_ql_mtx */
-ARENAS_ASTATS_MUTEX_CTL_GEN(tcache, tcache_mtx_data)
+ARENAS_ASTATS_MUTEX_CTL_GEN(decay_dirty, decay_dirty_mtx_data)
+ARENAS_ASTATS_MUTEX_CTL_GEN(decay_muzzy, decay_muzzy_mtx_data)
+ARENAS_ASTATS_MUTEX_CTL_GEN(tcache_list, tcache_list_mtx_data)
 #undef ARENAS_ASTATS_MUTEX_CTL_GEN
 #undef RO_MUTEX_CTL_GEN
 
@@ -2533,9 +2546,11 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 		}
 		MUTEX_PROF_RESET(arena->large_mtx);
 		MUTEX_PROF_RESET(arena->extent_freelist_mtx);
-		MUTEX_PROF_RESET(arena->extents_cached.mtx);
+		MUTEX_PROF_RESET(arena->extents_dirty.mtx);
+		MUTEX_PROF_RESET(arena->extents_muzzy.mtx);
 		MUTEX_PROF_RESET(arena->extents_retained.mtx);
-		MUTEX_PROF_RESET(arena->decay.mtx);
+		MUTEX_PROF_RESET(arena->decay_dirty.mtx);
+		MUTEX_PROF_RESET(arena->decay_muzzy.mtx);
 		if (config_tcache) {
 			MUTEX_PROF_RESET(arena->tcache_ql_mtx);
 		}

From 74f78cafdaa0adc885f9670066d3ecf13aee1ba5 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 15 Mar 2017 15:31:37 -0700
Subject: [PATCH 0738/2608] Added custom mutex spin.

A fixed max spin count is used -- with benchmark results showing it
solves almost all problems. As the benchmark used was rather intense,
the upper bound could be a little bit high. However it should offer a
good tradeoff between spinning and blocking.
---
 include/jemalloc/internal/mutex_inlines.h | 12 +++++++-----
 include/jemalloc/internal/mutex_types.h   | 16 ++++++----------
 src/mutex.c                               | 16 ++++++++++++++--
 3 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/include/jemalloc/internal/mutex_inlines.h b/include/jemalloc/internal/mutex_inlines.h
index d4703d23..7adcff4e 100644
--- a/include/jemalloc/internal/mutex_inlines.h
+++ b/include/jemalloc/internal/mutex_inlines.h
@@ -52,11 +52,13 @@ malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 			malloc_mutex_lock_slow(mutex);
 		}
 		/* We own the lock now.  Update a few counters. */
-		mutex_prof_data_t *data = &mutex->prof_data;
-		data->n_lock_ops++;
-		if (data->prev_owner != tsdn) {
-			data->prev_owner = tsdn;
-			data->n_owner_switches++;
+		if (config_stats) {
+			mutex_prof_data_t *data = &mutex->prof_data;
+			data->n_lock_ops++;
+			if (data->prev_owner != tsdn) {
+				data->prev_owner = tsdn;
+				data->n_owner_switches++;
+			}
 		}
 	}
 	witness_lock(tsdn, &mutex->witness);
diff --git a/include/jemalloc/internal/mutex_types.h b/include/jemalloc/internal/mutex_types.h
index 257f69ca..3cc7bc2b 100644
--- a/include/jemalloc/internal/mutex_types.h
+++ b/include/jemalloc/internal/mutex_types.h
@@ -4,6 +4,12 @@
 typedef struct mutex_prof_data_s mutex_prof_data_t;
 typedef struct malloc_mutex_s malloc_mutex_t;
 
+/*
+ * Based on benchmark results, a fixed spin with this amount of retries works
+ * well for our critical sections.
+ */
+#define MALLOC_MUTEX_MAX_SPIN 250
+
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 #    define MALLOC_MUTEX_LOCK(m)    AcquireSRWLockExclusive(&(m)->lock)
@@ -45,20 +51,10 @@ typedef struct malloc_mutex_s malloc_mutex_t;
      {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, NULL}},	\
       WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
 #else
-/* TODO: get rid of adaptive mutex once we do our own spin. */
-#  if (defined(JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP) &&		\
-       defined(PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP))
-#    define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_ADAPTIVE_NP
-#    define MALLOC_MUTEX_INITIALIZER					\
-       {{{LOCK_PROF_DATA_INITIALIZER,					\
-          PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP}},			\
-        WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
-#  else
 #    define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT
 #    define MALLOC_MUTEX_INITIALIZER					\
        {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER}},	\
         WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
-#  endif
 #endif
 
 #endif /* JEMALLOC_INTERNAL_MUTEX_TYPES_H */
diff --git a/src/mutex.c b/src/mutex.c
index 82a5fa3e..06ccd42c 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -69,14 +69,26 @@ void
 malloc_mutex_lock_slow(malloc_mutex_t *mutex) {
 	mutex_prof_data_t *data = &mutex->prof_data;
 
-	{//TODO: a smart spin policy
+	if (ncpus == 1) {
+		goto label_spin_done;
+	}
+
+	int cnt = 0, max_cnt = MALLOC_MUTEX_MAX_SPIN;
+	do {
+		CPU_SPINWAIT;
 		if (!malloc_mutex_trylock(mutex)) {
 			data->n_spin_acquired++;
 			return;
 		}
-	}
+	} while (cnt++ < max_cnt);
 
+	if (!config_stats) {
+		/* Only spin is useful when stats is off. */
+		malloc_mutex_lock_final(mutex);
+		return;
+	}
 	nstime_t now, before;
+label_spin_done:
 	nstime_init(&now, 0);
 	nstime_update(&now);
 	nstime_copy(&before, &now);

From f6698ec1e6752e40be9adf43ebf42ab832255afc Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 17 Mar 2017 17:42:10 -0700
Subject: [PATCH 0739/2608] Switch to nstime_t for the time related fields in
 mutex profiling.

---
 include/jemalloc/internal/mutex_inlines.h |  7 +++---
 include/jemalloc/internal/mutex_structs.h |  4 ++--
 include/jemalloc/internal/mutex_types.h   |  3 ++-
 src/ctl.c                                 |  4 ++--
 src/mutex.c                               | 28 ++++++++++++-----------
 5 files changed, 25 insertions(+), 21 deletions(-)

diff --git a/include/jemalloc/internal/mutex_inlines.h b/include/jemalloc/internal/mutex_inlines.h
index 7adcff4e..3a12a722 100644
--- a/include/jemalloc/internal/mutex_inlines.h
+++ b/include/jemalloc/internal/mutex_inlines.h
@@ -29,10 +29,11 @@ malloc_mutex_trylock(malloc_mutex_t *mutex) {
 /* Aggregate lock prof data. */
 JEMALLOC_INLINE void
 malloc_mutex_prof_merge(mutex_prof_data_t *sum, mutex_prof_data_t *data) {
-	sum->tot_wait_time += data->tot_wait_time;
-	if (data->max_wait_time > sum->max_wait_time) {
-		sum->max_wait_time = data->max_wait_time;
+	nstime_add(&sum->tot_wait_time, &data->tot_wait_time);
+	if (nstime_compare(&sum->max_wait_time, &data->max_wait_time) < 0) {
+		nstime_copy(&sum->max_wait_time, &data->max_wait_time);
 	}
+
 	sum->n_wait_times += data->n_wait_times;
 	sum->n_spin_acquired += data->n_spin_acquired;
 
diff --git a/include/jemalloc/internal/mutex_structs.h b/include/jemalloc/internal/mutex_structs.h
index 8d6e7eb2..5dddb846 100644
--- a/include/jemalloc/internal/mutex_structs.h
+++ b/include/jemalloc/internal/mutex_structs.h
@@ -7,9 +7,9 @@ struct mutex_prof_data_s {
 	 * contention.  We update them once we have the lock.
 	 */
 	/* Total time (in nano seconds) spent waiting on this mutex. */
-	uint64_t		tot_wait_time;
+	nstime_t		tot_wait_time;
 	/* Max time (in nano seconds) spent on a single lock operation. */
-	uint64_t		max_wait_time;
+	nstime_t		max_wait_time;
 	/* # of times have to wait for this mutex (after spinning). */
 	uint64_t		n_wait_times;
 	/* # of times acquired the mutex through local spinning. */
diff --git a/include/jemalloc/internal/mutex_types.h b/include/jemalloc/internal/mutex_types.h
index 3cc7bc2b..bd261490 100644
--- a/include/jemalloc/internal/mutex_types.h
+++ b/include/jemalloc/internal/mutex_types.h
@@ -34,7 +34,8 @@ typedef struct malloc_mutex_s malloc_mutex_t;
 #    define MALLOC_MUTEX_TRYLOCK(m) (pthread_mutex_trylock(&(m)->lock) != 0)
 #endif
 
-#define LOCK_PROF_DATA_INITIALIZER {0, 0, 0, 0, 0, 0, 0, NULL, 0}
+#define LOCK_PROF_DATA_INITIALIZER					\
+    {NSTIME_ZERO_INITIALIZER, NSTIME_ZERO_INITIALIZER, 0, 0, 0, 0, 0, NULL, 0}
 
 #ifdef _WIN32
 #  define MALLOC_MUTEX_INITIALIZER
diff --git a/src/ctl.c b/src/ctl.c
index b6b031c3..360c6bdc 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2485,9 +2485,9 @@ CTL_RO_CGEN(config_stats, stats_##n##_num_spin_acq,			\
 CTL_RO_CGEN(config_stats, stats_##n##_num_owner_switch,			\
     l.n_owner_switches, uint64_t) 					\
 CTL_RO_CGEN(config_stats, stats_##n##_total_wait_time,			\
-    l.tot_wait_time, uint64_t)						\
+    nstime_ns(&l.tot_wait_time), uint64_t)				\
 CTL_RO_CGEN(config_stats, stats_##n##_max_wait_time,			\
-    l.max_wait_time, uint64_t)						\
+    nstime_ns(&l.max_wait_time), uint64_t)				\
 CTL_RO_CGEN(config_stats, stats_##n##_max_num_thds,			\
     l.max_n_thds, uint64_t)
 
diff --git a/src/mutex.c b/src/mutex.c
index 06ccd42c..fa2770a3 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -68,6 +68,7 @@ JEMALLOC_EXPORT int	_pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
 void
 malloc_mutex_lock_slow(malloc_mutex_t *mutex) {
 	mutex_prof_data_t *data = &mutex->prof_data;
+	UNUSED nstime_t before = NSTIME_ZERO_INITIALIZER;
 
 	if (ncpus == 1) {
 		goto label_spin_done;
@@ -87,12 +88,11 @@ malloc_mutex_lock_slow(malloc_mutex_t *mutex) {
 		malloc_mutex_lock_final(mutex);
 		return;
 	}
-	nstime_t now, before;
 label_spin_done:
-	nstime_init(&now, 0);
-	nstime_update(&now);
-	nstime_copy(&before, &now);
-
+	nstime_update(&before);
+	/* Copy before to after to avoid clock skews. */
+	nstime_t after;
+	nstime_copy(&after, &before);
 	uint32_t n_thds = atomic_add_u32(&data->n_waiting_thds, 1);
 	/* One last try as above two calls may take quite some cycles. */
 	if (!malloc_mutex_trylock(mutex)) {
@@ -103,16 +103,18 @@ label_spin_done:
 
 	/* True slow path. */
 	malloc_mutex_lock_final(mutex);
-	atomic_sub_u32(&data->n_waiting_thds, 1);
-	nstime_update(&now);
-
 	/* Update more slow-path only counters. */
-	nstime_subtract(&now, &before);
-	uint64_t wait_time = nstime_ns(&now);
+	atomic_sub_u32(&data->n_waiting_thds, 1);
+	nstime_update(&after);
+
+	nstime_t delta;
+	nstime_copy(&delta, &after);
+	nstime_subtract(&delta, &before);
+
 	data->n_wait_times++;
-	data->tot_wait_time += wait_time;
-	if (wait_time > data->max_wait_time) {
-		data->max_wait_time = wait_time;
+	nstime_add(&data->tot_wait_time, &delta);
+	if (nstime_compare(&data->max_wait_time, &delta) < 0) {
+		nstime_copy(&data->max_wait_time, &delta);
 	}
 	if (n_thds > data->max_n_thds) {
 		data->max_n_thds = n_thds;

From d3fde1c12459f43e653bb842269b082b5635ccc6 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 21 Mar 2017 11:56:38 -0700
Subject: [PATCH 0740/2608] Refactor mutex profiling code with x-macros.

---
 include/jemalloc/internal/ctl_externs.h   |   8 -
 include/jemalloc/internal/ctl_structs.h   |   4 +-
 include/jemalloc/internal/ctl_types.h     |  46 +++++-
 include/jemalloc/internal/stats_structs.h |   9 +-
 src/arena.c                               |  24 +--
 src/ctl.c                                 | 167 ++++++-------------
 src/stats.c                               | 191 +++++++++++++---------
 7 files changed, 221 insertions(+), 228 deletions(-)

diff --git a/include/jemalloc/internal/ctl_externs.h b/include/jemalloc/internal/ctl_externs.h
index 3eddba34..17bbba06 100644
--- a/include/jemalloc/internal/ctl_externs.h
+++ b/include/jemalloc/internal/ctl_externs.h
@@ -4,14 +4,6 @@
 /* Maximum ctl tree depth. */
 #define CTL_MAX_DEPTH	7
 
-#define NUM_GLOBAL_PROF_MUTEXES	3
-#define NUM_ARENA_PROF_MUTEXES	8
-#define NUM_MUTEX_PROF_COUNTERS	7
-
-extern const char *arena_mutex_names[NUM_ARENA_PROF_MUTEXES];
-extern const char *global_mutex_names[NUM_GLOBAL_PROF_MUTEXES];
-extern const char *mutex_counter_names[NUM_MUTEX_PROF_COUNTERS];
-
 int	ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
     void *newp, size_t newlen);
 int	ctl_nametomib(tsdn_t *tsdn, const char *name, size_t *mibp,
diff --git a/include/jemalloc/internal/ctl_structs.h b/include/jemalloc/internal/ctl_structs.h
index b1ee3555..af0f78b9 100644
--- a/include/jemalloc/internal/ctl_structs.h
+++ b/include/jemalloc/internal/ctl_structs.h
@@ -42,9 +42,7 @@ struct ctl_stats_s {
 	size_t			mapped;
 	size_t			retained;
 
-#define MTX(mutex) mutex_prof_data_t mutex##_mtx_data;
-	GLOBAL_PROF_MUTEXES
-#undef MTX
+	mutex_prof_data_t	mutex_prof_data[num_global_prof_mutexes];
 };
 
 struct ctl_arena_s {
diff --git a/include/jemalloc/internal/ctl_types.h b/include/jemalloc/internal/ctl_types.h
index 562418ca..1b5c76f9 100644
--- a/include/jemalloc/internal/ctl_types.h
+++ b/include/jemalloc/internal/ctl_types.h
@@ -2,9 +2,49 @@
 #define JEMALLOC_INTERNAL_CTL_TYPES_H
 
 #define GLOBAL_PROF_MUTEXES						\
-    MTX(base)								\
-    MTX(ctl)								\
-    MTX(prof)
+    OP(base)								\
+    OP(ctl)								\
+    OP(prof)
+
+typedef enum {
+#define OP(mtx) global_prof_mutex_##mtx,
+	GLOBAL_PROF_MUTEXES
+#undef OP
+	num_global_prof_mutexes
+} global_prof_mutex_ind_t;
+
+#define ARENA_PROF_MUTEXES						\
+    OP(large)								\
+    OP(extent_freelist)							\
+    OP(extents_dirty)							\
+    OP(extents_muzzy)							\
+    OP(extents_retained)						\
+    OP(decay_dirty)							\
+    OP(decay_muzzy)							\
+    OP(tcache_list)
+
+typedef enum {
+#define OP(mtx) arena_prof_mutex_##mtx,
+	ARENA_PROF_MUTEXES
+#undef OP
+	num_arena_prof_mutexes
+} arena_prof_mutex_ind_t;
+
+#define MUTEX_PROF_COUNTERS						\
+    OP(num_ops, uint64_t)						\
+    OP(num_wait, uint64_t)						\
+    OP(num_spin_acq, uint64_t)						\
+    OP(num_owner_switch, uint64_t)					\
+    OP(total_wait_time, uint64_t)					\
+    OP(max_wait_time, uint64_t)						\
+    OP(max_num_thds, uint32_t)
+
+typedef enum {
+#define OP(counter, type) mutex_counter_##counter,
+	MUTEX_PROF_COUNTERS
+#undef OP
+	num_mutex_prof_counters
+} mutex_prof_counter_ind_t;
 
 typedef struct ctl_node_s ctl_node_t;
 typedef struct ctl_named_node_s ctl_named_node_t;
diff --git a/include/jemalloc/internal/stats_structs.h b/include/jemalloc/internal/stats_structs.h
index 5a9624fb..75a4a783 100644
--- a/include/jemalloc/internal/stats_structs.h
+++ b/include/jemalloc/internal/stats_structs.h
@@ -124,14 +124,7 @@ struct arena_stats_s {
 	/* Number of bytes cached in tcache associated with this arena. */
 	atomic_zu_t		tcache_bytes; /* Derived. */
 
-	mutex_prof_data_t large_mtx_data;
-	mutex_prof_data_t extent_freelist_mtx_data;
-	mutex_prof_data_t extents_dirty_mtx_data;
-	mutex_prof_data_t extents_muzzy_mtx_data;
-	mutex_prof_data_t extents_retained_mtx_data;
-	mutex_prof_data_t decay_dirty_mtx_data;
-	mutex_prof_data_t decay_muzzy_mtx_data;
-	mutex_prof_data_t tcache_list_mtx_data;
+	mutex_prof_data_t mutex_prof_data[num_arena_prof_mutexes];
 
 	/* One element for each large size class. */
 	malloc_large_stats_t	lstats[NSIZES - NBINS];
diff --git a/src/arena.c b/src/arena.c
index 6bb67338..3383a3b8 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -292,28 +292,32 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 				    tbin->ncached * index2size(i));
 			}
 		}
-		malloc_mutex_prof_read(tsdn, &astats->tcache_list_mtx_data,
+		malloc_mutex_prof_read(tsdn,
+		    &astats->mutex_prof_data[arena_prof_mutex_tcache_list],
 		    &arena->tcache_ql_mtx);
 		malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
 	}
 
-#define READ_ARENA_MUTEX_PROF_DATA(mtx, data)				\
+#define READ_ARENA_MUTEX_PROF_DATA(mtx, ind)				\
     malloc_mutex_lock(tsdn, &arena->mtx);				\
-    malloc_mutex_prof_read(tsdn, &astats->data, &arena->mtx);		\
+    malloc_mutex_prof_read(tsdn, &astats->mutex_prof_data[ind],		\
+        &arena->mtx);							\
     malloc_mutex_unlock(tsdn, &arena->mtx);
 
 	/* Gather per arena mutex profiling data. */
-	READ_ARENA_MUTEX_PROF_DATA(large_mtx, large_mtx_data)
+	READ_ARENA_MUTEX_PROF_DATA(large_mtx, arena_prof_mutex_large);
 	READ_ARENA_MUTEX_PROF_DATA(extent_freelist_mtx,
-	    extent_freelist_mtx_data)
+	    arena_prof_mutex_extent_freelist)
 	READ_ARENA_MUTEX_PROF_DATA(extents_dirty.mtx,
-	    extents_dirty_mtx_data)
+	    arena_prof_mutex_extents_dirty)
 	READ_ARENA_MUTEX_PROF_DATA(extents_muzzy.mtx,
-	    extents_muzzy_mtx_data)
+	    arena_prof_mutex_extents_muzzy)
 	READ_ARENA_MUTEX_PROF_DATA(extents_retained.mtx,
-	    extents_retained_mtx_data)
-	READ_ARENA_MUTEX_PROF_DATA(decay_dirty.mtx, decay_dirty_mtx_data)
-	READ_ARENA_MUTEX_PROF_DATA(decay_muzzy.mtx, decay_muzzy_mtx_data)
+	    arena_prof_mutex_extents_retained)
+	READ_ARENA_MUTEX_PROF_DATA(decay_dirty.mtx,
+	    arena_prof_mutex_decay_dirty)
+	READ_ARENA_MUTEX_PROF_DATA(decay_muzzy.mtx,
+	    arena_prof_mutex_decay_muzzy)
 #undef READ_ARENA_MUTEX_PROF_DATA
 
 	for (szind_t i = 0; i < NBINS; i++) {
diff --git a/src/ctl.c b/src/ctl.c
index 360c6bdc..e2ee36a6 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -13,33 +13,6 @@ static bool		ctl_initialized;
 static ctl_stats_t	*ctl_stats;
 static ctl_arenas_t	*ctl_arenas;
 
-const char *global_mutex_names[NUM_GLOBAL_PROF_MUTEXES] = {
-	"base",
-	"prof",
-	"ctl"
-};
-
-const char *arena_mutex_names[NUM_ARENA_PROF_MUTEXES] = {
-	"large",
-	"extent_freelist",
-	"extents_dirty",
-	"extents_muzzy",
-	"extents_retained",
-	"decay_dirty",
-	"decay_muzzy",
-	"tcache_list"
-};
-
-const char *mutex_counter_names[NUM_MUTEX_PROF_COUNTERS] = {
-	"num_ops",
-	"num_wait",
-	"num_spin_acq",
-	"num_owner_switch",
-	"total_wait_time",
-	"max_wait_time",
-	"max_num_thds"
-};
-
 /******************************************************************************/
 /* Helpers for named and indexed nodes. */
 
@@ -215,25 +188,17 @@ CTL_PROTO(stats_##n##_max_wait_time)					\
 CTL_PROTO(stats_##n##_max_num_thds)
 
 /* Global mutexes. */
-MUTEX_STATS_CTL_PROTO_GEN(mutexes_base)
-MUTEX_STATS_CTL_PROTO_GEN(mutexes_prof)
-MUTEX_STATS_CTL_PROTO_GEN(mutexes_ctl)
+#define OP(mtx) MUTEX_STATS_CTL_PROTO_GEN(mutexes_##mtx)
+GLOBAL_PROF_MUTEXES
+#undef OP
+
+/* Per arena mutexes. */
+#define OP(mtx) MUTEX_STATS_CTL_PROTO_GEN(arenas_i_mutexes_##mtx)
+ARENA_PROF_MUTEXES
+#undef OP
 
 /* Arena bin mutexes. */
 MUTEX_STATS_CTL_PROTO_GEN(arenas_i_bins_j_mutex)
-
-#define ARENA_MUTEXES_CTL_PROTO_GEN(n)					\
-    MUTEX_STATS_CTL_PROTO_GEN(arenas_i_mutexes_##n)
-/* Per arena mutexes. */
-ARENA_MUTEXES_CTL_PROTO_GEN(large)
-ARENA_MUTEXES_CTL_PROTO_GEN(extent_freelist)
-ARENA_MUTEXES_CTL_PROTO_GEN(extents_dirty)
-ARENA_MUTEXES_CTL_PROTO_GEN(extents_muzzy)
-ARENA_MUTEXES_CTL_PROTO_GEN(extents_retained)
-ARENA_MUTEXES_CTL_PROTO_GEN(decay_dirty)
-ARENA_MUTEXES_CTL_PROTO_GEN(decay_muzzy)
-ARENA_MUTEXES_CTL_PROTO_GEN(tcache_list)
-#undef ARENA_MUTEXES_CTL_PROTO_GEN
 #undef MUTEX_STATS_CTL_PROTO_GEN
 
 CTL_PROTO(stats_mutexes_reset)
@@ -461,34 +426,14 @@ static const ctl_indexed_node_t stats_arenas_i_lextents_node[] = {
 	{INDEX(stats_arenas_i_lextents_j)}
 };
 
-#define ARENA_MUTEX_PROF_DATA_NODE(n) MUTEX_PROF_DATA_NODE(arenas_i_mutexes_##n)
-
-ARENA_MUTEX_PROF_DATA_NODE(large)
-ARENA_MUTEX_PROF_DATA_NODE(extent_freelist)
-ARENA_MUTEX_PROF_DATA_NODE(extents_dirty)
-ARENA_MUTEX_PROF_DATA_NODE(extents_muzzy)
-ARENA_MUTEX_PROF_DATA_NODE(extents_retained)
-ARENA_MUTEX_PROF_DATA_NODE(decay_dirty)
-ARENA_MUTEX_PROF_DATA_NODE(decay_muzzy)
-ARENA_MUTEX_PROF_DATA_NODE(tcache_list)
+#define OP(mtx)  MUTEX_PROF_DATA_NODE(arenas_i_mutexes_##mtx)
+ARENA_PROF_MUTEXES
+#undef OP
 
 static const ctl_named_node_t stats_arenas_i_mutexes_node[] = {
-	{NAME("large"),
-	 CHILD(named, stats_arenas_i_mutexes_large)},
-	{NAME("extent_freelist"),
-	 CHILD(named, stats_arenas_i_mutexes_extent_freelist)},
-	{NAME("extents_dirty"),
-	 CHILD(named, stats_arenas_i_mutexes_extents_dirty)},
-	{NAME("extents_muzzy"),
-	 CHILD(named, stats_arenas_i_mutexes_extents_muzzy)},
-	{NAME("extents_retained"),
-	 CHILD(named, stats_arenas_i_mutexes_extents_retained)},
-	{NAME("decay_dirty"),
-	 CHILD(named, stats_arenas_i_mutexes_decay_dirty)},
-	{NAME("decay_muzzy"),
-	 CHILD(named, stats_arenas_i_mutexes_decay_muzzy)},
-	{NAME("tcache_list"),
-	 CHILD(named, stats_arenas_i_mutexes_tcache_list)}
+#define OP(mtx) {NAME(#mtx), CHILD(named, stats_arenas_i_mutexes_##mtx)},
+ARENA_PROF_MUTEXES
+#undef OP
 };
 
 static const ctl_named_node_t stats_arenas_i_node[] = {
@@ -525,15 +470,17 @@ static const ctl_indexed_node_t stats_arenas_node[] = {
 	{INDEX(stats_arenas_i)}
 };
 
-MUTEX_PROF_DATA_NODE(mutexes_base)
-MUTEX_PROF_DATA_NODE(mutexes_prof)
-MUTEX_PROF_DATA_NODE(mutexes_ctl)
+#define OP(mtx) MUTEX_PROF_DATA_NODE(mutexes_##mtx)
+GLOBAL_PROF_MUTEXES
+#undef OP
+
 static const ctl_named_node_t stats_mutexes_node[] = {
-	{NAME("base"),		CHILD(named, stats_mutexes_base)},
-	{NAME("prof"),		CHILD(named, stats_mutexes_prof)},
-	{NAME("ctl"),		CHILD(named, stats_mutexes_ctl)},
+#define OP(mtx) {NAME(#mtx), CHILD(named, stats_mutexes_##mtx)},
+GLOBAL_PROF_MUTEXES
+#undef OP
 	{NAME("reset"),		CTL(stats_mutexes_reset)}
 };
+#undef MUTEX_PROF_DATA_NODE
 
 static const ctl_named_node_t stats_node[] = {
 	{NAME("allocated"),	CTL(stats_allocated)},
@@ -545,7 +492,6 @@ static const ctl_named_node_t stats_node[] = {
 	{NAME("mutexes"),	CHILD(named, stats_mutexes)},
 	{NAME("arenas"),	CHILD(indexed, stats_arenas)}
 };
-#undef MUTEX_PROF_DATA_NODE
 
 static const ctl_named_node_t	root_node[] = {
 	{NAME("version"),	CTL(version)},
@@ -784,27 +730,13 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 		accum_arena_stats_u64(&sdstats->astats.decay_muzzy.purged,
 		    &astats->astats.decay_muzzy.purged);
 
-		malloc_mutex_prof_merge(&(sdstats->astats.large_mtx_data),
-		    &(astats->astats.large_mtx_data));
-		malloc_mutex_prof_merge(
-		    &(sdstats->astats.extent_freelist_mtx_data),
-		    &(astats->astats.extent_freelist_mtx_data));
-		malloc_mutex_prof_merge(
-		    &(sdstats->astats.extents_dirty_mtx_data),
-		    &(astats->astats.extents_dirty_mtx_data));
-		malloc_mutex_prof_merge(
-		    &(sdstats->astats.extents_muzzy_mtx_data),
-		    &(astats->astats.extents_muzzy_mtx_data));
-		malloc_mutex_prof_merge(
-		    &(sdstats->astats.extents_retained_mtx_data),
-		    &(astats->astats.extents_retained_mtx_data));
-		malloc_mutex_prof_merge(&(sdstats->astats.decay_dirty_mtx_data),
-		    &(astats->astats.decay_dirty_mtx_data));
-		malloc_mutex_prof_merge(&(sdstats->astats.decay_muzzy_mtx_data),
-		    &(astats->astats.decay_muzzy_mtx_data));
-		malloc_mutex_prof_merge(&(sdstats->astats.tcache_list_mtx_data),
-		    &(astats->astats.tcache_list_mtx_data));
-
+#define OP(mtx) malloc_mutex_prof_merge(				\
+		    &(sdstats->astats.mutex_prof_data[			\
+		        arena_prof_mutex_##mtx]),			\
+		    &(astats->astats.mutex_prof_data[			\
+		        arena_prof_mutex_##mtx]));
+ARENA_PROF_MUTEXES
+#undef OP
 		if (!destroyed) {
 			accum_atomic_zu(&sdstats->astats.base,
 			    &astats->astats.base);
@@ -975,17 +907,21 @@ ctl_refresh(tsdn_t *tsdn) {
 		ctl_stats->retained = atomic_load_zu(
 		    &ctl_sarena->astats->astats.retained, ATOMIC_RELAXED);
 
-#define READ_GLOBAL_MUTEX_PROF_DATA(mtx, data)				\
+#define READ_GLOBAL_MUTEX_PROF_DATA(i, mtx)				\
     malloc_mutex_lock(tsdn, &mtx);					\
-    malloc_mutex_prof_read(tsdn, &ctl_stats->data, &mtx);		\
+    malloc_mutex_prof_read(tsdn, &ctl_stats->mutex_prof_data[i], &mtx);	\
     malloc_mutex_unlock(tsdn, &mtx);
 
-		READ_GLOBAL_MUTEX_PROF_DATA(b0get()->mtx, base_mtx_data);
+		READ_GLOBAL_MUTEX_PROF_DATA(global_prof_mutex_base,
+		    b0get()->mtx);
 		if (config_prof && opt_prof) {
-			READ_GLOBAL_MUTEX_PROF_DATA(bt2gctx_mtx, prof_mtx_data);
+			READ_GLOBAL_MUTEX_PROF_DATA(global_prof_mutex_prof,
+			    bt2gctx_mtx);
 		}
 		/* We own ctl mutex already. */
-		malloc_mutex_prof_read(tsdn, &ctl_stats->ctl_mtx_data, &ctl_mtx);
+		malloc_mutex_prof_read(tsdn,
+		    &ctl_stats->mutex_prof_data[global_prof_mutex_ctl],
+		    &ctl_mtx);
 #undef READ_GLOBAL_MUTEX_PROF_DATA
 	}
 	ctl_arenas->epoch++;
@@ -2489,29 +2425,24 @@ CTL_RO_CGEN(config_stats, stats_##n##_total_wait_time,			\
 CTL_RO_CGEN(config_stats, stats_##n##_max_wait_time,			\
     nstime_ns(&l.max_wait_time), uint64_t)				\
 CTL_RO_CGEN(config_stats, stats_##n##_max_num_thds,			\
-    l.max_n_thds, uint64_t)
+    l.max_n_thds, uint32_t)
 
 /* Global mutexes. */
-#define MTX(mutex)							\
-    RO_MUTEX_CTL_GEN(mutexes_##mutex, ctl_stats->mutex##_mtx_data)
+#define OP(mtx)								\
+    RO_MUTEX_CTL_GEN(mutexes_##mtx,					\
+        ctl_stats->mutex_prof_data[global_prof_mutex_##mtx])
 GLOBAL_PROF_MUTEXES
-#undef MTX
+#undef OP
+
+/* Per arena mutexes */
+#define OP(mtx) RO_MUTEX_CTL_GEN(arenas_i_mutexes_##mtx,		\
+    arenas_i(mib[2])->astats->astats.mutex_prof_data[arena_prof_mutex_##mtx])
+ARENA_PROF_MUTEXES
+#undef OP
 
 /* tcache bin mutex */
 RO_MUTEX_CTL_GEN(arenas_i_bins_j_mutex,
     arenas_i(mib[2])->astats->bstats[mib[4]].mutex_data)
-/* Per arena mutexes */
-#define ARENAS_ASTATS_MUTEX_CTL_GEN(l, d)				\
-    RO_MUTEX_CTL_GEN(arenas_i_mutexes_##l, arenas_i(mib[2])->astats->astats.d)
-ARENAS_ASTATS_MUTEX_CTL_GEN(large, large_mtx_data)
-ARENAS_ASTATS_MUTEX_CTL_GEN(extent_freelist, extent_freelist_mtx_data)
-ARENAS_ASTATS_MUTEX_CTL_GEN(extents_dirty, extents_dirty_mtx_data)
-ARENAS_ASTATS_MUTEX_CTL_GEN(extents_muzzy, extents_muzzy_mtx_data)
-ARENAS_ASTATS_MUTEX_CTL_GEN(extents_retained, extents_retained_mtx_data)
-ARENAS_ASTATS_MUTEX_CTL_GEN(decay_dirty, decay_dirty_mtx_data)
-ARENAS_ASTATS_MUTEX_CTL_GEN(decay_muzzy, decay_muzzy_mtx_data)
-ARENAS_ASTATS_MUTEX_CTL_GEN(tcache_list, tcache_list_mtx_data)
-#undef ARENAS_ASTATS_MUTEX_CTL_GEN
 #undef RO_MUTEX_CTL_GEN
 
 /* Resets all mutex stats, including global, arena and bin mutexes. */
diff --git a/src/stats.c b/src/stats.c
index ada95e92..aa7ca507 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1,6 +1,18 @@
 #define JEMALLOC_STATS_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 
+const char *global_mutex_names[num_global_prof_mutexes] = {
+#define OP(mtx) #mtx,
+	GLOBAL_PROF_MUTEXES
+#undef OP
+};
+
+const char *arena_mutex_names[num_arena_prof_mutexes] = {
+#define OP(mtx) #mtx,
+	ARENA_PROF_MUTEXES
+#undef OP
+};
+
 #define CTL_GET(n, v, t) do {						\
 	size_t sz = sizeof(t);						\
 	xmallctl(n, (void *)v, &sz, NULL, 0);				\
@@ -57,28 +69,49 @@ get_rate_str(uint64_t dividend, uint64_t divisor, char str[6]) {
 	return false;
 }
 
+#define MUTEX_CTL_STR_MAX_LENGTH 128
 static void
-gen_mutex_ctl_str(char *str, const char *prefix, const char *mutex,
-    const char *counter) {
-	malloc_snprintf(str, 128, "stats.%s.%s.%s", prefix, mutex, counter);
+gen_mutex_ctl_str(char *str, size_t buf_len, const char *prefix,
+    const char *mutex, const char *counter) {
+	malloc_snprintf(str, buf_len, "stats.%s.%s.%s", prefix, mutex, counter);
 }
 
 static void
 read_arena_bin_mutex_stats(unsigned arena_ind, unsigned bin_ind,
-    uint64_t results[NUM_MUTEX_PROF_COUNTERS]) {
-	char cmd[128];
+    uint64_t results[num_mutex_prof_counters]) {
+	char cmd[MUTEX_CTL_STR_MAX_LENGTH];
+#define OP(c, t)							\
+    gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,			\
+        "arenas.0.bins.0","mutex", #c);					\
+    CTL_M2_M4_GET(cmd, arena_ind, bin_ind,				\
+        (t *)&results[mutex_counter_##c], t);
+MUTEX_PROF_COUNTERS
+#undef OP
+}
 
-	unsigned i;
-	for (i = 0; i < NUM_MUTEX_PROF_COUNTERS; i++) {
-		gen_mutex_ctl_str(cmd, "arenas.0.bins.0","mutex",
-		    mutex_counter_names[i]);
-		CTL_M2_M4_GET(cmd, arena_ind, bin_ind, &results[i], uint64_t);
-	}
+static void
+mutex_stats_output_json(void (*write_cb)(void *, const char *), void *cbopaque,
+    const char *name, uint64_t stats[num_mutex_prof_counters],
+    const char *json_indent, bool last) {
+	malloc_cprintf(write_cb, cbopaque, "%s\"%s\": {\n", json_indent, name);
+
+	mutex_prof_counter_ind_t k = 0;
+	char *fmt_str[2] = {"%s\t\"%s\": %"FMTu32"%s\n",
+	    "%s\t\"%s\": %"FMTu64"%s\n"};
+#define OP(c, t)							\
+	malloc_cprintf(write_cb, cbopaque,				\
+	    fmt_str[sizeof(t) / sizeof(uint32_t) - 1], 			\
+	    json_indent, #c, (t)stats[mutex_counter_##c],		\
+	    (++k == num_mutex_prof_counters) ? "" : ",");
+MUTEX_PROF_COUNTERS
+#undef OP
+	malloc_cprintf(write_cb, cbopaque, "%s}%s\n", json_indent,
+	    last ? "" : ",");
 }
 
 static void
 stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
-    bool json, bool large, unsigned i) {
+    bool json, bool large, bool mutex, unsigned i) {
 	size_t page;
 	bool in_gap, in_gap_prev;
 	unsigned nbins, j;
@@ -147,9 +180,6 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    size_t);
 
 		if (json) {
-			uint64_t mutex_stats[NUM_MUTEX_PROF_COUNTERS];
-			read_arena_bin_mutex_stats(i, j, mutex_stats);
-
 			malloc_cprintf(write_cb, cbopaque,
 			    "\t\t\t\t\t{\n"
 			    "\t\t\t\t\t\t\"nmalloc\": %"FMTu64",\n"
@@ -169,20 +199,16 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			}
 			malloc_cprintf(write_cb, cbopaque,
 			    "\t\t\t\t\t\t\"nreslabs\": %"FMTu64",\n"
-			    "\t\t\t\t\t\t\"curslabs\": %zu,\n"
-			    "\t\t\t\t\t\t\"mutex\": {\n",
-			    nreslabs,
-			    curslabs);
+			    "\t\t\t\t\t\t\"curslabs\": %zu%s\n",
+			    nreslabs, curslabs, mutex ? "," : "");
 
-			for (unsigned k = 0; k < NUM_MUTEX_PROF_COUNTERS; k++) {
-				malloc_cprintf(write_cb, cbopaque,
-				    "\t\t\t\t\t\t\t\"%s\": %"FMTu64"%s\n",
-				    mutex_counter_names[k], mutex_stats[k],
-				    k == NUM_MUTEX_PROF_COUNTERS - 1 ? "" : ",");
+			if (mutex) {
+				uint64_t mutex_stats[num_mutex_prof_counters];
+				read_arena_bin_mutex_stats(i, j, mutex_stats);
+				mutex_stats_output_json(write_cb, cbopaque,
+				    "mutex", mutex_stats, "\t\t\t\t\t\t", true);
 			}
-
 			malloc_cprintf(write_cb, cbopaque,
-			    "\t\t\t\t\t\t}\n"
 			    "\t\t\t\t\t}%s\n",
 			    (j + 1 < nbins) ? "," : "");
 		} else if (!in_gap) {
@@ -326,74 +352,79 @@ stats_arena_lextents_print(void (*write_cb)(void *, const char *),
 
 static void
 read_arena_mutex_stats(unsigned arena_ind,
-    uint64_t results[NUM_ARENA_PROF_MUTEXES][NUM_MUTEX_PROF_COUNTERS]) {
-	char cmd[128];
+    uint64_t results[num_arena_prof_mutexes][num_mutex_prof_counters]) {
+	char cmd[MUTEX_CTL_STR_MAX_LENGTH];
 
-	unsigned i, j;
-	for (i = 0; i < NUM_ARENA_PROF_MUTEXES; i++) {
-		for (j = 0; j < NUM_MUTEX_PROF_COUNTERS; j++) {
-			gen_mutex_ctl_str(cmd, "arenas.0.mutexes",
-			    arena_mutex_names[i], mutex_counter_names[j]);
-			CTL_M2_GET(cmd, arena_ind, &results[i][j], uint64_t);
-		}
+	arena_prof_mutex_ind_t i;
+	for (i = 0; i < num_arena_prof_mutexes; i++) {
+#define OP(c, t)							\
+		gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,	\
+		    "arenas.0.mutexes",	arena_mutex_names[i], #c);	\
+		CTL_M2_GET(cmd, arena_ind,				\
+		    (t *)&results[i][mutex_counter_##c], t);
+MUTEX_PROF_COUNTERS
+#undef OP
 	}
 }
 
-static void
-mutex_stats_output_json(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *name, uint64_t stats[NUM_MUTEX_PROF_COUNTERS],
-    const char *json_indent, bool last) {
-
-	malloc_cprintf(write_cb, cbopaque, "%s\"%s\": {\n", json_indent, name);
-	for (unsigned i = 0; i < NUM_MUTEX_PROF_COUNTERS; i++) {
-		malloc_cprintf(write_cb, cbopaque, "%s\t\"%s\": %"FMTu64"%s\n",
-		    json_indent, mutex_counter_names[i], stats[i],
-		    i < (NUM_MUTEX_PROF_COUNTERS - 1) ? "," : "");
-	}
-	malloc_cprintf(write_cb, cbopaque, "%s}%s\n", json_indent,
-	    last ? "" : ",");
-}
-
 static void
 mutex_stats_output(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *name, uint64_t stats[NUM_MUTEX_PROF_COUNTERS],
+    const char *name, uint64_t stats[num_mutex_prof_counters],
     bool first_mutex) {
 	if (first_mutex) {
 		/* Print title. */
 		malloc_cprintf(write_cb, cbopaque,
-		    "                           n_lock_ops        n_waiting"
-		    "       n_spin_acq   n_owner_switch    total_wait_ns"
-		    "      max_wait_ns  max_n_wait_thds\n");
+		    "                           n_lock_ops       n_waiting"
+		    "      n_spin_acq  n_owner_switch   total_wait_ns"
+		    "     max_wait_ns  max_n_thds\n");
 	}
 
 	malloc_cprintf(write_cb, cbopaque, "%s", name);
 	malloc_cprintf(write_cb, cbopaque, ":%*c",
-	    (int)(19 - strlen(name)), ' ');
+	    (int)(20 - strlen(name)), ' ');
 
-	for (unsigned i = 0; i < NUM_MUTEX_PROF_COUNTERS; i++) {
-		malloc_cprintf(write_cb, cbopaque, " %16"FMTu64, stats[i]);
-	}
+	char *fmt_str[2] = {"%12"FMTu32, "%16"FMTu64};
+#define OP(c, t)							\
+	malloc_cprintf(write_cb, cbopaque,				\
+	    fmt_str[sizeof(t) / sizeof(uint32_t) - 1],			\
+	    (t)stats[mutex_counter_##c]);
+MUTEX_PROF_COUNTERS
+#undef OP
 	malloc_cprintf(write_cb, cbopaque, "\n");
 }
 
 static void
 stats_arena_mutexes_print(void (*write_cb)(void *, const char *),
     void *cbopaque, bool json, bool json_end, unsigned arena_ind) {
-	uint64_t mutex_stats[NUM_ARENA_PROF_MUTEXES][NUM_MUTEX_PROF_COUNTERS];
+	uint64_t mutex_stats[num_arena_prof_mutexes][num_mutex_prof_counters];
 	read_arena_mutex_stats(arena_ind, mutex_stats);
 
 	/* Output mutex stats. */
 	if (json) {
 		malloc_cprintf(write_cb, cbopaque, "\t\t\t\t\"mutexes\": {\n");
-		for (unsigned i = 0; i < NUM_ARENA_PROF_MUTEXES; i++) {
+		arena_prof_mutex_ind_t i, last_mutex;
+		last_mutex = num_arena_prof_mutexes - 1;
+		if (!config_tcache) {
+			last_mutex--;
+		}
+		for (i = 0; i < num_arena_prof_mutexes; i++) {
+			if (!config_tcache &&
+			    i == arena_prof_mutex_tcache_list) {
+				continue;
+			}
 			mutex_stats_output_json(write_cb, cbopaque,
 			    arena_mutex_names[i], mutex_stats[i],
-			    "\t\t\t\t\t", (i == NUM_ARENA_PROF_MUTEXES - 1));
+			    "\t\t\t\t\t", (i == last_mutex));
 		}
 		malloc_cprintf(write_cb, cbopaque, "\t\t\t\t}%s\n",
 		    json_end ? "" : ",");
 	} else {
-		for (unsigned i = 0; i < NUM_ARENA_PROF_MUTEXES; i++) {
+		arena_prof_mutex_ind_t i;
+		for (i = 0; i < num_arena_prof_mutexes; i++) {
+			if (!config_tcache &&
+			    i == arena_prof_mutex_tcache_list) {
+				continue;
+			}
 			mutex_stats_output(write_cb, cbopaque,
 			    arena_mutex_names[i], mutex_stats[i], i == 0);
 		}
@@ -636,7 +667,8 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    !(bins || large), i);
 	}
 	if (bins) {
-		stats_arena_bins_print(write_cb, cbopaque, json, large, i);
+		stats_arena_bins_print(write_cb, cbopaque, json, large, mutex,
+		    i);
 	}
 	if (large) {
 		stats_arena_lextents_print(write_cb, cbopaque, json, i);
@@ -995,16 +1027,17 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 
 static void
 read_global_mutex_stats(
-    uint64_t results[NUM_GLOBAL_PROF_MUTEXES][NUM_MUTEX_PROF_COUNTERS]) {
-	char cmd[128];
+    uint64_t results[num_global_prof_mutexes][num_mutex_prof_counters]) {
+	char cmd[MUTEX_CTL_STR_MAX_LENGTH];
 
-	unsigned i, j;
-	for (i = 0; i < NUM_GLOBAL_PROF_MUTEXES; i++) {
-		for (j = 0; j < NUM_MUTEX_PROF_COUNTERS; j++) {
-			gen_mutex_ctl_str(cmd, "mutexes", global_mutex_names[i],
-			    mutex_counter_names[j]);
-			CTL_GET(cmd, &results[i][j], uint64_t);
-		}
+	global_prof_mutex_ind_t i;
+	for (i = 0; i < num_global_prof_mutexes; i++) {
+#define OP(c, t)							\
+		gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,	\
+		    "mutexes", global_mutex_names[i], #c);		\
+		CTL_GET(cmd, (t *)&results[i][mutex_counter_##c], t);
+MUTEX_PROF_COUNTERS
+#undef OP
 	}
 }
 
@@ -1021,7 +1054,7 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 	CTL_GET("stats.mapped", &mapped, size_t);
 	CTL_GET("stats.retained", &retained, size_t);
 
-	uint64_t mutex_stats[NUM_GLOBAL_PROF_MUTEXES][NUM_MUTEX_PROF_COUNTERS];
+	uint64_t mutex_stats[num_global_prof_mutexes][num_mutex_prof_counters];
 	if (mutex) {
 		read_global_mutex_stats(mutex_stats);
 	}
@@ -1041,15 +1074,16 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 		malloc_cprintf(write_cb, cbopaque,
 		    "\t\t\t\"mapped\": %zu,\n", mapped);
 		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\"retained\": %zu,\n", retained);
+		    "\t\t\t\"retained\": %zu%s\n", retained, mutex ? "," : "");
 		if (mutex) {
 			malloc_cprintf(write_cb, cbopaque,
 			    "\t\t\t\"mutexes\": {\n");
-
-			for (unsigned i = 0; i < NUM_GLOBAL_PROF_MUTEXES; i++) {
+			global_prof_mutex_ind_t i;
+			for (i = 0; i < num_global_prof_mutexes; i++) {
 				mutex_stats_output_json(write_cb, cbopaque,
 				    global_mutex_names[i], mutex_stats[i],
-				    "\t\t\t\t", i == NUM_GLOBAL_PROF_MUTEXES - 1);
+				    "\t\t\t\t",
+				    i == num_global_prof_mutexes - 1);
 			}
 			malloc_cprintf(write_cb, cbopaque, "\t\t\t}\n");
 		}
@@ -1061,7 +1095,8 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 		    " resident: %zu, mapped: %zu, retained: %zu\n",
 		    allocated, active, metadata, resident, mapped, retained);
 		if (mutex) {
-			for (unsigned i = 0; i < NUM_GLOBAL_PROF_MUTEXES; i++) {
+			global_prof_mutex_ind_t i;
+			for (i = 0; i < num_global_prof_mutexes; i++) {
 				mutex_stats_output(write_cb, cbopaque,
 				    global_mutex_names[i], mutex_stats[i],
 				    i == 0);

From 362e356675b244fceebb5fe9e2585e77dd47189d Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 22 Mar 2017 01:49:56 -0700
Subject: [PATCH 0741/2608] Profile per arena base mutex, instead of just a0.

---
 include/jemalloc/internal/ctl_types.h | 2 +-
 src/arena.c                           | 2 ++
 src/ctl.c                             | 9 ++++-----
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/ctl_types.h b/include/jemalloc/internal/ctl_types.h
index 1b5c76f9..065ccda5 100644
--- a/include/jemalloc/internal/ctl_types.h
+++ b/include/jemalloc/internal/ctl_types.h
@@ -2,7 +2,6 @@
 #define JEMALLOC_INTERNAL_CTL_TYPES_H
 
 #define GLOBAL_PROF_MUTEXES						\
-    OP(base)								\
     OP(ctl)								\
     OP(prof)
 
@@ -21,6 +20,7 @@ typedef enum {
     OP(extents_retained)						\
     OP(decay_dirty)							\
     OP(decay_muzzy)							\
+    OP(base)								\
     OP(tcache_list)
 
 typedef enum {
diff --git a/src/arena.c b/src/arena.c
index 3383a3b8..519111e5 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -318,6 +318,8 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	    arena_prof_mutex_decay_dirty)
 	READ_ARENA_MUTEX_PROF_DATA(decay_muzzy.mtx,
 	    arena_prof_mutex_decay_muzzy)
+	READ_ARENA_MUTEX_PROF_DATA(base->mtx,
+	    arena_prof_mutex_base)
 #undef READ_ARENA_MUTEX_PROF_DATA
 
 	for (szind_t i = 0; i < NBINS; i++) {
diff --git a/src/ctl.c b/src/ctl.c
index e2ee36a6..36f5634d 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -912,8 +912,6 @@ ctl_refresh(tsdn_t *tsdn) {
     malloc_mutex_prof_read(tsdn, &ctl_stats->mutex_prof_data[i], &mtx);	\
     malloc_mutex_unlock(tsdn, &mtx);
 
-		READ_GLOBAL_MUTEX_PROF_DATA(global_prof_mutex_base,
-		    b0get()->mtx);
 		if (config_prof && opt_prof) {
 			READ_GLOBAL_MUTEX_PROF_DATA(global_prof_mutex_prof,
 			    bt2gctx_mtx);
@@ -2460,12 +2458,12 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     malloc_mutex_prof_data_reset(tsdn, &mtx);				\
     malloc_mutex_unlock(tsdn, &mtx);
 
-	/* Global mutexes: base, prof and ctl. */
-	MUTEX_PROF_RESET(b0get()->mtx);
+	/* Global mutexes: ctl and prof. */
+	MUTEX_PROF_RESET(ctl_mtx);
 	if (config_prof && opt_prof) {
 		MUTEX_PROF_RESET(bt2gctx_mtx);
 	}
-	MUTEX_PROF_RESET(ctl_mtx);
+
 
 	/* Per arena mutexes. */
 	unsigned n = narenas_total_get();
@@ -2485,6 +2483,7 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 		if (config_tcache) {
 			MUTEX_PROF_RESET(arena->tcache_ql_mtx);
 		}
+		MUTEX_PROF_RESET(arena->base->mtx);
 
 		for (szind_t i = 0; i < NBINS; i++) {
 			arena_bin_t *bin = &arena->bins[i];

From bbc16a50f975d84ba43d831666b95d8b38e01656 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 22 Mar 2017 16:59:14 -0700
Subject: [PATCH 0742/2608] Added documentation for mutex profiling related
 mallctls.

---
 doc/jemalloc.xml.in | 206 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 206 insertions(+)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 265da86f..91127a03 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -2153,6 +2153,80 @@ struct extent_hooks_s {
         </para></listitem>
       </varlistentry>
 
+      <varlistentry id="stats.mutexes.ctl">
+        <term>
+          <mallctl>stats.mutexes.ctl.{counter};</mallctl>
+          (<type>counter specific type</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Statistics on <varname>ctl</varname> mutex (global
+        scope; mallctl related).  <mallctl>{counter}</mallctl> is one of the
+        counters below:</para>
+        <varlistentry id="mutex_counters">
+          <listitem><para><varname>num_ops</varname> (<type>uint64_t</type>):
+          Total number of lock acquisition operations on this mutex.</para>
+
+	  <para><varname>num_spin_acq</varname> (<type>uint64_t</type>): Number
+	  of times the mutex was spin-acquired.  When the mutex is currently
+	  locked and cannot be acquired immediately, a short period of
+	  spin-retry within jemalloc will be performed.  Acquired through spin
+	  generally means the contention was lightweight and not causing context
+	  switches.</para>
+
+	  <para><varname>num_wait</varname> (<type>uint64_t</type>): Number of
+	  times the mutex was wait-acquired, which means the mutex contention
+	  was not solved by spin-retry, and blocking operation was likely
+	  involved in order to acquire the mutex.  This event generally implies
+	  higher cost / longer delay, and should be investigated if it happens
+	  often.</para>
+
+	  <para><varname>max_wait_time</varname> (<type>uint64_t</type>):
+	  Maximum length of time in nanoseconds spent on a single wait-acquired
+	  lock operation.  Note that to avoid profiling overhead on the common
+	  path, this does not consider spin-acquired cases.</para>
+
+	  <para><varname>total_wait_time</varname> (<type>uint64_t</type>):
+	  Cumulative time in nanoseconds spent on wait-acquired lock operations.
+	  Similarly, spin-acquired cases are not considered.</para>
+
+	  <para><varname>max_num_thds</varname> (<type>uint32_t</type>): Maximum
+	  number of threads waiting on this mutex simultaneously.  Similarly,
+	  spin-acquired cases are not considered.</para>
+
+	  <para><varname>num_owner_switch</varname> (<type>uint64_t</type>):
+	  Number of times the current mutex owner is different from the previous
+	  one.  This event does not generally imply an issue; rather it is an
+	  indicator of how often the protected data are accessed by different
+	  threads.
+	  </para>
+	  </listitem>
+	</varlistentry>
+	</listitem>
+      </varlistentry>
+
+      <varlistentry id="stats.mutexes.prof">
+        <term>
+          <mallctl>stats.mutexes.prof.{counter}</mallctl>
+	  (<type>counter specific type</type>) <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Statistics on <varname>prof</varname> mutex (global
+        scope; profiling related).  <mallctl>{counter}</mallctl> is one of the
+        counters in <link linkend="mutex_counters">mutex profiling
+        counters</link>.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="stats.mutexes.reset">
+        <term>
+          <mallctl>stats.mutexes.reset</mallctl>
+	  (<type>void</type>) <literal>--</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Reset all mutex profile statistics, including global
+        mutexes, arena mutexes and bin mutexes.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="stats.arenas.i.dss">
         <term>
           <mallctl>stats.arenas.&lt;i&gt;.dss</mallctl>
@@ -2547,6 +2621,19 @@ struct extent_hooks_s {
         <listitem><para>Current number of slabs.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="stats.arenas.i.bins.mutex">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.mutex.{counter}</mallctl>
+          (<type>counter specific type</type>) <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Statistics on
+        <varname>arena.&lt;i&gt;.bins.&lt;j&gt;</varname> mutex (arena bin
+        scope; bin operation related).  <mallctl>{counter}</mallctl> is one of
+        the counters in <link linkend="mutex_counters">mutex profiling
+        counters</link>.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="stats.arenas.i.lextents.j.nmalloc">
         <term>
           <mallctl>stats.arenas.&lt;i&gt;.lextents.&lt;j&gt;.nmalloc</mallctl>
@@ -2590,6 +2677,125 @@ struct extent_hooks_s {
         <listitem><para>Current number of large allocations for this size class.
         </para></listitem>
       </varlistentry>
+
+      <varlistentry id="stats.arenas.i.mutexes.large">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.mutexes.large.{counter}</mallctl>
+          (<type>counter specific type</type>) <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Statistics on <varname>arena.&lt;i&gt;.large</varname>
+        mutex (arena scope; large allocation related).
+        <mallctl>{counter}</mallctl> is one of the counters in <link
+        linkend="mutex_counters">mutex profiling
+        counters</link>.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="stats.arenas.i.mutexes.extent_freelist">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.mutexes.extent_freelist.{counter}</mallctl>
+          (<type>counter specific type</type>) <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Statistics on <varname>arena.&lt;i&gt;.extent_freelist
+        </varname> mutex (arena scope; extent freelist related).
+        <mallctl>{counter}</mallctl> is one of the counters in <link
+        linkend="mutex_counters">mutex profiling
+        counters</link>.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="stats.arenas.i.mutexes.extents_dirty">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.mutexes.extents_dirty.{counter}</mallctl>
+          (<type>counter specific type</type>) <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Statistics on <varname>arena.&lt;i&gt;.extents_dirty
+        </varname> mutex (arena scope; dirty extents related).
+        <mallctl>{counter}</mallctl> is one of the counters in <link
+        linkend="mutex_counters">mutex profiling
+        counters</link>.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="stats.arenas.i.mutexes.extents_muzzy">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.mutexes.extents_muzzy.{counter}</mallctl>
+          (<type>counter specific type</type>) <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Statistics on <varname>arena.&lt;i&gt;.extents_muzzy
+        </varname> mutex (arena scope; muzzy extents related).
+        <mallctl>{counter}</mallctl> is one of the counters in <link
+        linkend="mutex_counters">mutex profiling
+        counters</link>.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="stats.arenas.i.mutexes.extents_retained">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.mutexes.extents_retained.{counter}</mallctl>
+          (<type>counter specific type</type>) <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Statistics on <varname>arena.&lt;i&gt;.extents_retained
+        </varname> mutex (arena scope; retained extents related).
+        <mallctl>{counter}</mallctl> is one of the counters in <link
+        linkend="mutex_counters">mutex profiling
+        counters</link>.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="stats.arenas.i.mutexes.decay_dirty">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.mutexes.decay_dirty.{counter}</mallctl>
+          (<type>counter specific type</type>) <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Statistics on <varname>arena.&lt;i&gt;.decay_dirty
+        </varname> mutex (arena scope; decay for dirty pages related).
+        <mallctl>{counter}</mallctl> is one of the counters in <link
+        linkend="mutex_counters">mutex profiling
+        counters</link>.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="stats.arenas.i.mutexes.decay_muzzy">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.mutexes.decay_muzzy.{counter}</mallctl>
+          (<type>counter specific type</type>) <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Statistics on <varname>arena.&lt;i&gt;.decay_muzzy
+        </varname> mutex (arena scope; decay for muzzy pages related).
+        <mallctl>{counter}</mallctl> is one of the counters in <link
+        linkend="mutex_counters">mutex profiling
+        counters</link>.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="stats.arenas.i.mutexes.base">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.mutexes.base.{counter}</mallctl>
+          (<type>counter specific type</type>) <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Statistics on <varname>arena.&lt;i&gt;.base</varname>
+        mutex (arena scope; base allocator related).
+        <mallctl>{counter}</mallctl> is one of the counters in <link
+        linkend="mutex_counters">mutex profiling
+        counters</link>.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="stats.arenas.i.mutexes.tcache_list">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.mutexes.tcache_list.{counter}</mallctl>
+          (<type>counter specific type</type>) <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Statistics on
+        <varname>arena.&lt;i&gt;.tcache_list</varname> mutex (arena scope;
+        tcache to arena association related).  This mutex is expected to be
+        accessed less often.  <mallctl>{counter}</mallctl> is one of the
+        counters in <link linkend="mutex_counters">mutex profiling
+        counters</link>.</para></listitem>
+      </varlistentry>
+
     </variablelist>
   </refsect1>
   <refsect1 id="heap_profile_format">

From a832ebaee905522fafa1be438dbf3fb5066f1e00 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 22 Mar 2017 20:06:25 -0700
Subject: [PATCH 0743/2608] Use first fit layout policy instead of best fit.

For extents which do not delay coalescing, use first fit layout policy
rather than first-best fit layout policy.  This packs extents toward
older virtual memory mappings, but at the cost of higher search overhead
in the common case.

This resolves #711.
---
 src/extent.c | 54 ++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 42 insertions(+), 12 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index f1b513e4..f04fd01a 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -256,21 +256,13 @@ extents_remove_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent,
 	    cur_extents_npages - (size >> LG_PAGE), ATOMIC_RELAXED);
 }
 
-/*
- * Do {first,any}-best-fit extent selection, i.e. select the oldest/lowest or
- * any extent that best fits, where {first,any} corresponds to
- * extents->delay_coalesce={false,true}.
- */
+/* Do any-best-fit extent selection, i.e. select any extent that best fits. */
 static extent_t *
 extents_best_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
     size_t size) {
-	malloc_mutex_assert_owner(tsdn, &extents->mtx);
-
 	pszind_t pind = psz2ind(extent_size_quantize_ceil(size));
 	for (pszind_t i = pind; i < NPSIZES+1; i++) {
-		extent_t *extent = extents->delay_coalesce ?
-		    extent_heap_any(&extents->heaps[i]) :
-		    extent_heap_first(&extents->heaps[i]);
+		extent_t *extent = extent_heap_any(&extents->heaps[i]);
 		if (extent != NULL) {
 			assert(extent_size_get(extent) >= size);
 			return extent;
@@ -280,6 +272,45 @@ extents_best_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 	return NULL;
 }
 
+/*
+ * Do first-fit extent selection, i.e. select the oldest/lowest extent that is
+ * large enough.
+ */
+static extent_t *
+extents_first_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
+    size_t size) {
+	extent_t *ret = NULL;
+
+	pszind_t pind = psz2ind(extent_size_quantize_ceil(size));
+	for (pszind_t i = pind; i < NPSIZES+1; i++) {
+		extent_t *extent = extent_heap_first(&extents->heaps[i]);
+		if (extent != NULL) {
+			assert(extent_size_get(extent) >= size);
+			if (ret == NULL || extent_snad_comp(extent, ret) < 0) {
+				ret = extent;
+			}
+		}
+	}
+
+	return ret;
+}
+
+/*
+ * Do {best,first}-fit extent selection, where the selection policy choice is
+ * based on extents->delay_coalesce.  Best-fit selection requires less
+ * searching, but its layout policy is less stable and may cause higher virtual
+ * memory fragmentation as a side effect.
+ */
+static extent_t *
+extents_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
+    size_t size) {
+	malloc_mutex_assert_owner(tsdn, &extents->mtx);
+
+	return extents->delay_coalesce ? extents_best_fit_locked(tsdn, arena,
+	    extents, size) : extents_first_fit_locked(tsdn, arena, extents,
+	    size);
+}
+
 static bool
 extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
@@ -675,8 +706,7 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
 			extent = NULL;
 		}
 	} else {
-		extent = extents_best_fit_locked(tsdn, arena, extents,
-		    alloc_size);
+		extent = extents_fit_locked(tsdn, arena, extents, alloc_size);
 	}
 	if (extent == NULL) {
 		if (!locked) {

From c8021d01f6efe14dc1bd200021a815638063cb5f Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 23 Mar 2017 17:59:47 -0700
Subject: [PATCH 0744/2608] Implement bitmap_ffu(), which finds the first unset
 bit.

---
 include/jemalloc/internal/bitmap_externs.h    |  2 +-
 include/jemalloc/internal/bitmap_inlines.h    | 70 +++++++++++++++++--
 include/jemalloc/internal/private_symbols.txt |  1 +
 src/arena.c                                   |  2 +-
 src/bitmap.c                                  | 27 +++++--
 test/unit/bitmap.c                            | 59 ++++++++++++----
 6 files changed, 136 insertions(+), 25 deletions(-)

diff --git a/include/jemalloc/internal/bitmap_externs.h b/include/jemalloc/internal/bitmap_externs.h
index 4df63eba..034a4e6b 100644
--- a/include/jemalloc/internal/bitmap_externs.h
+++ b/include/jemalloc/internal/bitmap_externs.h
@@ -2,7 +2,7 @@
 #define JEMALLOC_INTERNAL_BITMAP_EXTERNS_H
 
 void	bitmap_info_init(bitmap_info_t *binfo, size_t nbits);
-void	bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo);
+void	bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo, bool fill);
 size_t	bitmap_size(const bitmap_info_t *binfo);
 
 #endif /* JEMALLOC_INTERNAL_BITMAP_EXTERNS_H */
diff --git a/include/jemalloc/internal/bitmap_inlines.h b/include/jemalloc/internal/bitmap_inlines.h
index df582bbe..07166ba5 100644
--- a/include/jemalloc/internal/bitmap_inlines.h
+++ b/include/jemalloc/internal/bitmap_inlines.h
@@ -2,11 +2,13 @@
 #define JEMALLOC_INTERNAL_BITMAP_INLINES_H
 
 #ifndef JEMALLOC_ENABLE_INLINE
-bool	bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo);
-bool	bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
-void	bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
-size_t	bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo);
-void	bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
+bool bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo);
+bool bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
+void bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
+size_t bitmap_ffu(const bitmap_t *bitmap, const bitmap_info_t *binfo,
+    size_t min_bit);
+size_t bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo);
+void bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_BITMAP_C_))
@@ -75,6 +77,64 @@ bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
 #endif
 }
 
+/* ffu: find first unset >= bit. */
+JEMALLOC_INLINE size_t
+bitmap_ffu(const bitmap_t *bitmap, const bitmap_info_t *binfo, size_t min_bit) {
+	assert(min_bit < binfo->nbits);
+
+#ifdef BITMAP_USE_TREE
+	unsigned level = binfo->nlevels - 1;
+	size_t lg_bits_per_group = (LG_BITMAP_GROUP_NBITS * (level+1));
+	size_t bits_per_group = 1LU << lg_bits_per_group;
+	size_t bits_per_group_mask = bits_per_group - 1;
+	unsigned group_nmask = (min_bit & bits_per_group_mask) >> (level *
+	    LG_BITMAP_GROUP_NBITS);
+	bitmap_t group_mask = ~((1LU << group_nmask) - 1);
+	bitmap_t group = bitmap[binfo->levels[level].group_offset] & group_mask;
+	if (group == 0LU) {
+		return binfo->nbits;
+	}
+	size_t bit = ffs_lu(group) - 1;
+
+	while (level > 0) {
+		level--;
+
+		lg_bits_per_group = (LG_BITMAP_GROUP_NBITS * (level+1));
+		bits_per_group = 1LU << lg_bits_per_group;
+		bits_per_group_mask = bits_per_group - 1;
+
+		group = bitmap[binfo->levels[level].group_offset + bit];
+		size_t cur_base = bit << lg_bits_per_group;
+		if (cur_base < min_bit) {
+			group_nmask = (min_bit & bits_per_group_mask) >> (level
+			    * LG_BITMAP_GROUP_NBITS);
+			group_mask = ~((1LU << group_nmask) - 1);
+			group &= group_mask;
+		}
+		if (group == 0LU) {
+			return binfo->nbits;
+		}
+		bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffs_lu(group) - 1);
+	}
+	assert(bit < binfo->nbits);
+	return bit;
+#else
+	size_t i = min_bit >> LG_BITMAP_GROUP_NBITS;
+	bitmap_t g = bitmap[i] & ~((1LU << (min_bit & BITMAP_GROUP_NBITS_MASK))
+	    - 1);
+	size_t bit;
+	do {
+		bit = ffs_lu(g);
+		if (bit != 0) {
+			return (i << LG_BITMAP_GROUP_NBITS) + (bit - 1);
+		}
+		i++;
+		g = bitmap[i];
+	} while (i < binfo->ngroups);
+	return binfo->nbits;
+#endif
+}
+
 /* sfu: set first unset. */
 JEMALLOC_INLINE size_t
 bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) {
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 1af1f91b..d22cd874 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -94,6 +94,7 @@ base_postfork_child
 base_postfork_parent
 base_prefork
 base_stats_get
+bitmap_ffu
 bitmap_full
 bitmap_get
 bitmap_info_init
diff --git a/src/arena.c b/src/arena.c
index 519111e5..b0913c35 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1210,7 +1210,7 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 	/* Initialize slab internals. */
 	arena_slab_data_t *slab_data = extent_slab_data_get(slab);
 	slab_data->nfree = bin_info->nregs;
-	bitmap_init(slab_data->bitmap, &bin_info->bitmap_info);
+	bitmap_init(slab_data->bitmap, &bin_info->bitmap_info, false);
 
 	arena_nactive_add(arena, extent_size_get(slab) >> LG_PAGE);
 
diff --git a/src/bitmap.c b/src/bitmap.c
index 17efb73c..81d2a6da 100644
--- a/src/bitmap.c
+++ b/src/bitmap.c
@@ -39,16 +39,26 @@ bitmap_info_ngroups(const bitmap_info_t *binfo) {
 }
 
 void
-bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo) {
+bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo, bool fill) {
 	size_t extra;
 	unsigned i;
 
 	/*
 	 * Bits are actually inverted with regard to the external bitmap
-	 * interface, so the bitmap starts out with all 1 bits, except for
-	 * trailing unused bits (if any).  Note that each group uses bit 0 to
-	 * correspond to the first logical bit in the group, so extra bits
-	 * are the most significant bits of the last group.
+	 * interface.
+	 */
+
+	if (fill) {
+		/* The "filled" bitmap starts out with all 0 bits. */
+		memset(bitmap, 0, bitmap_size(binfo));
+		return;
+	}
+
+	/*
+	 * The "empty" bitmap starts out with all 1 bits, except for trailing
+	 * unused bits (if any).  Note that each group uses bit 0 to correspond
+	 * to the first logical bit in the group, so extra bits are the most
+	 * significant bits of the last group.
 	 */
 	memset(bitmap, 0xffU, bitmap_size(binfo));
 	extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK))
@@ -84,9 +94,14 @@ bitmap_info_ngroups(const bitmap_info_t *binfo) {
 }
 
 void
-bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo) {
+bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo, bool fill) {
 	size_t extra;
 
+	if (fill) {
+		memset(bitmap, 0, bitmap_size(binfo));
+		return;
+	}
+
 	memset(bitmap, 0xffU, bitmap_size(binfo));
 	extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK))
 	    & BITMAP_GROUP_NBITS_MASK;
diff --git a/test/unit/bitmap.c b/test/unit/bitmap.c
index ca657608..92a07dec 100644
--- a/test/unit/bitmap.c
+++ b/test/unit/bitmap.c
@@ -171,12 +171,18 @@ test_bitmap_init_body(const bitmap_info_t *binfo, size_t nbits) {
 	size_t i;
 	bitmap_t *bitmap = (bitmap_t *)malloc(bitmap_size(binfo));
 	assert_ptr_not_null(bitmap, "Unexpected malloc() failure");
-	bitmap_init(bitmap, binfo);
 
+	bitmap_init(bitmap, binfo, false);
 	for (i = 0; i < nbits; i++) {
 		assert_false(bitmap_get(bitmap, binfo, i),
 		    "Bit should be unset");
 	}
+
+	bitmap_init(bitmap, binfo, true);
+	for (i = 0; i < nbits; i++) {
+		assert_true(bitmap_get(bitmap, binfo, i), "Bit should be set");
+	}
+
 	free(bitmap);
 }
 
@@ -202,7 +208,7 @@ test_bitmap_set_body(const bitmap_info_t *binfo, size_t nbits) {
 	size_t i;
 	bitmap_t *bitmap = (bitmap_t *)malloc(bitmap_size(binfo));
 	assert_ptr_not_null(bitmap, "Unexpected malloc() failure");
-	bitmap_init(bitmap, binfo);
+	bitmap_init(bitmap, binfo, false);
 
 	for (i = 0; i < nbits; i++) {
 		bitmap_set(bitmap, binfo, i);
@@ -233,7 +239,7 @@ test_bitmap_unset_body(const bitmap_info_t *binfo, size_t nbits) {
 	size_t i;
 	bitmap_t *bitmap = (bitmap_t *)malloc(bitmap_size(binfo));
 	assert_ptr_not_null(bitmap, "Unexpected malloc() failure");
-	bitmap_init(bitmap, binfo);
+	bitmap_init(bitmap, binfo, false);
 
 	for (i = 0; i < nbits; i++) {
 		bitmap_set(bitmap, binfo, i);
@@ -268,14 +274,22 @@ TEST_END
 
 static void
 test_bitmap_sfu_body(const bitmap_info_t *binfo, size_t nbits) {
-	size_t i;
 	bitmap_t *bitmap = (bitmap_t *)malloc(bitmap_size(binfo));
 	assert_ptr_not_null(bitmap, "Unexpected malloc() failure");
-	bitmap_init(bitmap, binfo);
+	bitmap_init(bitmap, binfo, false);
 
 	/* Iteratively set bits starting at the beginning. */
-	for (i = 0; i < nbits; i++) {
-		assert_zd_eq(bitmap_sfu(bitmap, binfo), i,
+	for (size_t i = 0; i < nbits; i++) {
+		assert_zu_eq(bitmap_ffu(bitmap, binfo, 0), i,
+		    "First unset bit should be just after previous first unset "
+		    "bit");
+		assert_zu_eq(bitmap_ffu(bitmap, binfo, (i > 0) ? i-1 : i), i,
+		    "First unset bit should be just after previous first unset "
+		    "bit");
+		assert_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
+		    "First unset bit should be just after previous first unset "
+		    "bit");
+		assert_zu_eq(bitmap_sfu(bitmap, binfo), i,
 		    "First unset bit should be just after previous first unset "
 		    "bit");
 	}
@@ -285,9 +299,15 @@ test_bitmap_sfu_body(const bitmap_info_t *binfo, size_t nbits) {
 	 * Iteratively unset bits starting at the end, and verify that
 	 * bitmap_sfu() reaches the unset bits.
 	 */
-	for (i = nbits - 1; i < nbits; i--) { /* (nbits..0] */
+	for (size_t i = nbits - 1; i < nbits; i--) { /* (nbits..0] */
 		bitmap_unset(bitmap, binfo, i);
-		assert_zd_eq(bitmap_sfu(bitmap, binfo), i,
+		assert_zu_eq(bitmap_ffu(bitmap, binfo, 0), i,
+		    "First unset bit should the bit previously unset");
+		assert_zu_eq(bitmap_ffu(bitmap, binfo, (i > 0) ? i-1 : i), i,
+		    "First unset bit should the bit previously unset");
+		assert_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
+		    "First unset bit should the bit previously unset");
+		assert_zu_eq(bitmap_sfu(bitmap, binfo), i,
 		    "First unset bit should the bit previously unset");
 		bitmap_unset(bitmap, binfo, i);
 	}
@@ -297,14 +317,29 @@ test_bitmap_sfu_body(const bitmap_info_t *binfo, size_t nbits) {
 	 * Iteratively set bits starting at the beginning, and verify that
 	 * bitmap_sfu() looks past them.
 	 */
-	for (i = 1; i < nbits; i++) {
+	for (size_t i = 1; i < nbits; i++) {
 		bitmap_set(bitmap, binfo, i - 1);
-		assert_zd_eq(bitmap_sfu(bitmap, binfo), i,
+		assert_zu_eq(bitmap_ffu(bitmap, binfo, 0), i,
+		    "First unset bit should be just after the bit previously "
+		    "set");
+		assert_zu_eq(bitmap_ffu(bitmap, binfo, (i > 0) ? i-1 : i), i,
+		    "First unset bit should be just after the bit previously "
+		    "set");
+		assert_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
+		    "First unset bit should be just after the bit previously "
+		    "set");
+		assert_zu_eq(bitmap_sfu(bitmap, binfo), i,
 		    "First unset bit should be just after the bit previously "
 		    "set");
 		bitmap_unset(bitmap, binfo, i);
 	}
-	assert_zd_eq(bitmap_sfu(bitmap, binfo), nbits - 1,
+	assert_zu_eq(bitmap_ffu(bitmap, binfo, 0), nbits - 1,
+	    "First unset bit should be the last bit");
+	assert_zu_eq(bitmap_ffu(bitmap, binfo, (nbits > 1) ? nbits-2 : nbits-1),
+	    nbits - 1, "First unset bit should be the last bit");
+	assert_zu_eq(bitmap_ffu(bitmap, binfo, nbits - 1), nbits - 1,
+	    "First unset bit should be the last bit");
+	assert_zu_eq(bitmap_sfu(bitmap, binfo), nbits - 1,
 	    "First unset bit should be the last bit");
 	assert_true(bitmap_full(bitmap, binfo), "All bits should be set");
 	free(bitmap);

From 57e353163f0ec099aed8feee2083e95c9d4b472b Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 24 Mar 2017 01:12:42 -0700
Subject: [PATCH 0745/2608] Implement BITMAP_GROUPS().

---
 include/jemalloc/internal/bitmap_types.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/include/jemalloc/internal/bitmap_types.h b/include/jemalloc/internal/bitmap_types.h
index ec8a6dc9..d0de2f05 100644
--- a/include/jemalloc/internal/bitmap_types.h
+++ b/include/jemalloc/internal/bitmap_types.h
@@ -65,14 +65,19 @@ typedef unsigned long bitmap_t;
 #ifdef BITMAP_USE_TREE
 
 #if LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS
+#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_1_LEVEL(nbits)
 #  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_1_LEVEL(BITMAP_MAXBITS)
 #elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 2
+#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_2_LEVEL(nbits)
 #  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_2_LEVEL(BITMAP_MAXBITS)
 #elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 3
+#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_3_LEVEL(nbits)
 #  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_3_LEVEL(BITMAP_MAXBITS)
 #elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 4
+#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_4_LEVEL(nbits)
 #  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_4_LEVEL(BITMAP_MAXBITS)
 #elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 5
+#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_5_LEVEL(nbits)
 #  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_5_LEVEL(BITMAP_MAXBITS)
 #else
 #  error "Unsupported bitmap size"
@@ -119,6 +124,7 @@ typedef unsigned long bitmap_t;
 
 #else /* BITMAP_USE_TREE */
 
+#define BITMAP_GROUPS(nbits)	BITMAP_BITS2GROUPS(nbits)
 #define BITMAP_GROUPS_MAX	BITMAP_BITS2GROUPS(BITMAP_MAXBITS)
 
 #define BITMAP_INFO_INITIALIZER(nbits) {				\

From 5d33233a5e6601902df7cddd8cc8aa0b135c77b2 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 23 Mar 2017 23:45:11 -0700
Subject: [PATCH 0746/2608] Use a bitmap in extents_t to speed up search.

Rather than iteratively checking all sufficiently large heaps during
search, maintain and use a bitmap in order to skip empty heaps.
---
 include/jemalloc/internal/bitmap_types.h   |  8 ++++-
 include/jemalloc/internal/extent_structs.h |  7 ++++
 src/extent.c                               | 41 ++++++++++++++++------
 3 files changed, 44 insertions(+), 12 deletions(-)

diff --git a/include/jemalloc/internal/bitmap_types.h b/include/jemalloc/internal/bitmap_types.h
index d0de2f05..b334769f 100644
--- a/include/jemalloc/internal/bitmap_types.h
+++ b/include/jemalloc/internal/bitmap_types.h
@@ -2,7 +2,13 @@
 #define JEMALLOC_INTERNAL_BITMAP_TYPES_H
 
 /* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */
-#define LG_BITMAP_MAXBITS	LG_SLAB_MAXREGS
+#if LG_SLAB_MAXREGS > LG_CEIL_NSIZES
+/* Maximum bitmap bit count is determined by maximum regions per slab. */
+#  define LG_BITMAP_MAXBITS	LG_SLAB_MAXREGS
+#else
+/* Maximum bitmap bit count is determined by number of extent size classes. */
+#  define LG_BITMAP_MAXBITS	LG_CEIL_NSIZES
+#endif
 #define BITMAP_MAXBITS		(ZU(1) << LG_BITMAP_MAXBITS)
 
 typedef struct bitmap_level_s bitmap_level_t;
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index 82cfa58a..5cf3c9b2 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -102,6 +102,13 @@ struct extents_s {
 	 */
 	extent_heap_t		heaps[NPSIZES+1];
 
+	/*
+	 * Bitmap for which set bits correspond to non-empty heaps.
+	 *
+	 * Synchronization: mtx.
+	 */
+	bitmap_t		bitmap[BITMAP_GROUPS(NPSIZES+1)];
+
 	/*
 	 * LRU of all extents in heaps.
 	 *
diff --git a/src/extent.c b/src/extent.c
index f04fd01a..3f4f5f1b 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -6,6 +6,9 @@
 
 rtree_t		extents_rtree;
 
+static const bitmap_info_t extents_bitmap_info =
+    BITMAP_INFO_INITIALIZER(NPSIZES+1);
+
 static void	*extent_alloc_default(extent_hooks_t *extent_hooks,
     void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit,
     unsigned arena_ind);
@@ -189,6 +192,7 @@ extents_init(tsdn_t *tsdn, extents_t *extents, extent_state_t state,
 	for (unsigned i = 0; i < NPSIZES+1; i++) {
 		extent_heap_new(&extents->heaps[i]);
 	}
+	bitmap_init(extents->bitmap, &extents_bitmap_info, true);
 	extent_list_init(&extents->lru);
 	atomic_store_zu(&extents->npages, 0, ATOMIC_RELAXED);
 	extents->state = state;
@@ -215,6 +219,10 @@ extents_insert_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent,
 	size_t size = extent_size_get(extent);
 	size_t psz = extent_size_quantize_floor(size);
 	pszind_t pind = psz2ind(psz);
+	if (extent_heap_empty(&extents->heaps[pind])) {
+		bitmap_unset(extents->bitmap, &extents_bitmap_info,
+		    (size_t)pind);
+	}
 	extent_heap_insert(&extents->heaps[pind], extent);
 	if (!preserve_lru) {
 		extent_list_append(&extents->lru, extent);
@@ -241,6 +249,10 @@ extents_remove_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent,
 	size_t psz = extent_size_quantize_floor(size);
 	pszind_t pind = psz2ind(psz);
 	extent_heap_remove(&extents->heaps[pind], extent);
+	if (extent_heap_empty(&extents->heaps[pind])) {
+		bitmap_set(extents->bitmap, &extents_bitmap_info,
+		    (size_t)pind);
+	}
 	if (!preserve_lru) {
 		extent_list_remove(&extents->lru, extent);
 	}
@@ -261,12 +273,13 @@ static extent_t *
 extents_best_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
     size_t size) {
 	pszind_t pind = psz2ind(extent_size_quantize_ceil(size));
-	for (pszind_t i = pind; i < NPSIZES+1; i++) {
+	pszind_t i = (pszind_t)bitmap_ffu(extents->bitmap, &extents_bitmap_info,
+	    (size_t)pind);
+	if (i < NPSIZES+1) {
+		assert(!extent_heap_empty(&extents->heaps[i]));
 		extent_t *extent = extent_heap_any(&extents->heaps[i]);
-		if (extent != NULL) {
-			assert(extent_size_get(extent) >= size);
-			return extent;
-		}
+		assert(extent_size_get(extent) >= size);
+		return extent;
 	}
 
 	return NULL;
@@ -282,14 +295,20 @@ extents_first_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 	extent_t *ret = NULL;
 
 	pszind_t pind = psz2ind(extent_size_quantize_ceil(size));
-	for (pszind_t i = pind; i < NPSIZES+1; i++) {
+	for (pszind_t i = (pszind_t)bitmap_ffu(extents->bitmap,
+	    &extents_bitmap_info, (size_t)pind); i < NPSIZES+1; i =
+	    (pszind_t)bitmap_ffu(extents->bitmap, &extents_bitmap_info,
+	    (size_t)i+1)) {
+		assert(!extent_heap_empty(&extents->heaps[i]));
 		extent_t *extent = extent_heap_first(&extents->heaps[i]);
-		if (extent != NULL) {
-			assert(extent_size_get(extent) >= size);
-			if (ret == NULL || extent_snad_comp(extent, ret) < 0) {
-				ret = extent;
-			}
+		assert(extent_size_get(extent) >= size);
+		if (ret == NULL || extent_snad_comp(extent, ret) < 0) {
+			ret = extent;
 		}
+		if (i == NPSIZES) {
+			break;
+		}
+		assert(i < NPSIZES);
 	}
 
 	return ret;

From e6b074472e4515a74b1e8062bd94683cc8f5b3ba Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 24 Mar 2017 17:21:38 -0700
Subject: [PATCH 0747/2608] Force inline ifree to avoid function call costs on
 fast path.

Without ALWAYS_INLINE, sometimes ifree() gets compiled into its own function,
which adds overhead on the fast path.
---
 src/jemalloc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index d8688bdd..ab047c24 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1997,7 +1997,7 @@ irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize) {
 	return p;
 }
 
-JEMALLOC_INLINE_C void
+JEMALLOC_ALWAYS_INLINE_C void
 ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 	witness_assert_lockless(tsd_tsdn(tsd));
 
@@ -2022,7 +2022,7 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 	}
 }
 
-JEMALLOC_INLINE_C void
+JEMALLOC_ALWAYS_INLINE_C void
 isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 	witness_assert_lockless(tsd_tsdn(tsd));
 

From 5e12223925e3cbd1f7c314e9d0224e1fa597ccc7 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 24 Mar 2017 22:46:56 -0700
Subject: [PATCH 0748/2608] Fix BITMAP_USE_TREE version of bitmap_ffu().

This fixes an extent searching regression on 32-bit systems, caused by
the initial bitmap_ffu() implementation in
c8021d01f6efe14dc1bd200021a815638063cb5f (Implement bitmap_ffu(), which
finds the first unset bit.), as first used in
5d33233a5e6601902df7cddd8cc8aa0b135c77b2 (Use a bitmap in extents_t to
speed up search.).
---
 include/jemalloc/internal/bitmap_inlines.h | 13 +++++++
 test/unit/bitmap.c                         | 40 +++++++++++++++++++---
 2 files changed, 48 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/bitmap_inlines.h b/include/jemalloc/internal/bitmap_inlines.h
index 07166ba5..b4a5ca00 100644
--- a/include/jemalloc/internal/bitmap_inlines.h
+++ b/include/jemalloc/internal/bitmap_inlines.h
@@ -112,6 +112,19 @@ bitmap_ffu(const bitmap_t *bitmap, const bitmap_info_t *binfo, size_t min_bit) {
 			group &= group_mask;
 		}
 		if (group == 0LU) {
+			/*
+			 * If min_bit is not the first bit in its group, try
+			 * again starting at the first bit of the next group.
+			 * This will only recurse at most once, since on
+			 * recursion, min_bit will be the first bit in its
+			 * group.
+			 */
+			size_t ceil_min_bit = (min_bit +
+			    BITMAP_GROUP_NBITS_MASK) & ~BITMAP_GROUP_NBITS_MASK;
+			if (ceil_min_bit != min_bit && ceil_min_bit <
+			    binfo->nbits) {
+				return bitmap_ffu(bitmap, binfo, ceil_min_bit);
+			}
 			return binfo->nbits;
 		}
 		bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffs_lu(group) - 1);
diff --git a/test/unit/bitmap.c b/test/unit/bitmap.c
index 92a07dec..22d2871e 100644
--- a/test/unit/bitmap.c
+++ b/test/unit/bitmap.c
@@ -273,7 +273,7 @@ TEST_BEGIN(test_bitmap_unset) {
 TEST_END
 
 static void
-test_bitmap_sfu_body(const bitmap_info_t *binfo, size_t nbits) {
+test_bitmap_xfu_body(const bitmap_info_t *binfo, size_t nbits) {
 	bitmap_t *bitmap = (bitmap_t *)malloc(bitmap_size(binfo));
 	assert_ptr_not_null(bitmap, "Unexpected malloc() failure");
 	bitmap_init(bitmap, binfo, false);
@@ -342,20 +342,50 @@ test_bitmap_sfu_body(const bitmap_info_t *binfo, size_t nbits) {
 	assert_zu_eq(bitmap_sfu(bitmap, binfo), nbits - 1,
 	    "First unset bit should be the last bit");
 	assert_true(bitmap_full(bitmap, binfo), "All bits should be set");
+
+	/*
+	 * Bubble a "usu" pattern through the bitmap and verify that
+	 * bitmap_ffu() finds the correct bit for all five min_bit cases.
+	 */
+	if (nbits >= 3) {
+		for (size_t i = 0; i < nbits-2; i++) {
+			bitmap_unset(bitmap, binfo, i);
+			bitmap_unset(bitmap, binfo, i+2);
+			if (i > 0) {
+				assert_zu_eq(bitmap_ffu(bitmap, binfo, i-1), i,
+				    "Unexpected first unset bit");
+			}
+			assert_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
+			    "Unexpected first unset bit");
+			assert_zu_eq(bitmap_ffu(bitmap, binfo, i+1), i+2,
+			    "Unexpected first unset bit");
+			assert_zu_eq(bitmap_ffu(bitmap, binfo, i+2), i+2,
+			    "Unexpected first unset bit");
+			if (i + 3 < nbits) {
+				assert_zu_eq(bitmap_ffu(bitmap, binfo, i+3),
+				    nbits, "Unexpected first unset bit");
+			}
+			assert_zu_eq(bitmap_sfu(bitmap, binfo), i,
+			    "Unexpected first unset bit");
+			assert_zu_eq(bitmap_sfu(bitmap, binfo), i+2,
+			    "Unexpected first unset bit");
+		}
+	}
+
 	free(bitmap);
 }
 
-TEST_BEGIN(test_bitmap_sfu) {
+TEST_BEGIN(test_bitmap_xfu) {
 	size_t nbits;
 
 	for (nbits = 1; nbits <= BITMAP_MAXBITS; nbits++) {
 		bitmap_info_t binfo;
 		bitmap_info_init(&binfo, nbits);
-		test_bitmap_sfu_body(&binfo, nbits);
+		test_bitmap_xfu_body(&binfo, nbits);
 	}
 #define NB(nbits) {							\
 		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);	\
-		test_bitmap_sfu_body(&binfo, nbits);			\
+		test_bitmap_xfu_body(&binfo, nbits);			\
 	}
 	NBITS_TAB
 #undef NB
@@ -370,5 +400,5 @@ main(void) {
 	    test_bitmap_init,
 	    test_bitmap_set,
 	    test_bitmap_unset,
-	    test_bitmap_sfu);
+	    test_bitmap_xfu);
 }

From 0591c204b468e7b273c2f3f94f488cffbe8d7a74 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 24 Mar 2017 11:25:43 -0700
Subject: [PATCH 0749/2608] Store arena index rather than (arena_t *) in
 extent_t.

---
 include/jemalloc/internal/extent_inlines.h       | 4 ++--
 include/jemalloc/internal/extent_structs.h       | 4 ++--
 include/jemalloc/internal/jemalloc_internal.h.in | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index 56f306df..f86822de 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -49,7 +49,7 @@ int extent_snad_comp(const extent_t *a, const extent_t *b);
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_EXTENT_C_))
 JEMALLOC_INLINE arena_t *
 extent_arena_get(const extent_t *extent) {
-	return extent->e_arena;
+	return arenas[extent->e_arena_ind];
 }
 
 JEMALLOC_INLINE void *
@@ -151,7 +151,7 @@ extent_prof_tctx_get(const extent_t *extent) {
 
 JEMALLOC_INLINE void
 extent_arena_set(extent_t *extent, arena_t *arena) {
-	extent->e_arena = arena;
+	extent->e_arena_ind = (arena != NULL) ? arena_ind_get(arena) : UINT_MAX;
 }
 
 JEMALLOC_INLINE void
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index 5cf3c9b2..1b2b4bc7 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -10,8 +10,8 @@ typedef enum {
 
 /* Extent (span of pages).  Use accessor functions for e_* fields. */
 struct extent_s {
-	/* Arena from which this extent came, if any. */
-	arena_t			*e_arena;
+	/* Arena from which this extent came, or UINT_MAX if unassociated. */
+	unsigned		e_arena_ind;
 
 	/* Pointer to the extent that this structure is responsible for. */
 	void			*e_addr;
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 2fe21018..4255b639 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -932,7 +932,6 @@ decay_ticker_get(tsd_t *tsd, unsigned ind) {
 }
 #endif
 
-#include "jemalloc/internal/extent_inlines.h"
 #include "jemalloc/internal/rtree_inlines.h"
 #include "jemalloc/internal/base_inlines.h"
 #include "jemalloc/internal/bitmap_inlines.h"
@@ -942,6 +941,7 @@ decay_ticker_get(tsd_t *tsd, unsigned ind) {
  */
 #include "jemalloc/internal/prof_inlines_a.h"
 #include "jemalloc/internal/arena_inlines_a.h"
+#include "jemalloc/internal/extent_inlines.h"
 
 #ifndef JEMALLOC_ENABLE_INLINE
 extent_t	*iealloc(tsdn_t *tsdn, const void *ptr);

From 735ad8210c93185b36a36ec4740985681004ce25 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 24 Mar 2017 15:22:26 -0700
Subject: [PATCH 0750/2608] Pack various extent_t fields into a bitfield.

This reduces sizeof(extent_t) from 160 to 136 on x64.
---
 include/jemalloc/internal/extent_inlines.h | 150 ++++++++++++---------
 include/jemalloc/internal/extent_structs.h | 119 +++++++++-------
 2 files changed, 160 insertions(+), 109 deletions(-)

diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index f86822de..22229b50 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -3,20 +3,20 @@
 
 #ifndef JEMALLOC_ENABLE_INLINE
 arena_t *extent_arena_get(const extent_t *extent);
-void *extent_base_get(const extent_t *extent);
-void *extent_addr_get(const extent_t *extent);
-size_t extent_size_get(const extent_t *extent);
 szind_t extent_szind_get_maybe_invalid(const extent_t *extent);
 szind_t extent_szind_get(const extent_t *extent);
 size_t extent_usize_get(const extent_t *extent);
-void *extent_before_get(const extent_t *extent);
-void *extent_last_get(const extent_t *extent);
-void *extent_past_get(const extent_t *extent);
 size_t extent_sn_get(const extent_t *extent);
 extent_state_t extent_state_get(const extent_t *extent);
 bool extent_zeroed_get(const extent_t *extent);
 bool extent_committed_get(const extent_t *extent);
 bool extent_slab_get(const extent_t *extent);
+void *extent_base_get(const extent_t *extent);
+void *extent_addr_get(const extent_t *extent);
+size_t extent_size_get(const extent_t *extent);
+void *extent_before_get(const extent_t *extent);
+void *extent_last_get(const extent_t *extent);
+void *extent_past_get(const extent_t *extent);
 arena_slab_data_t *extent_slab_data_get(extent_t *extent);
 const arena_slab_data_t *extent_slab_data_get_const(const extent_t *extent);
 prof_tctx_t *extent_prof_tctx_get(const extent_t *extent);
@@ -49,32 +49,25 @@ int extent_snad_comp(const extent_t *a, const extent_t *b);
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_EXTENT_C_))
 JEMALLOC_INLINE arena_t *
 extent_arena_get(const extent_t *extent) {
-	return arenas[extent->e_arena_ind];
-}
-
-JEMALLOC_INLINE void *
-extent_base_get(const extent_t *extent) {
-	assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) ||
-	    !extent->e_slab);
-	return PAGE_ADDR2BASE(extent->e_addr);
-}
-
-JEMALLOC_INLINE void *
-extent_addr_get(const extent_t *extent) {
-	assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) ||
-	    !extent->e_slab);
-	return extent->e_addr;
-}
-
-JEMALLOC_INLINE size_t
-extent_size_get(const extent_t *extent) {
-	return extent->e_size;
+	unsigned arena_ind = (unsigned)((extent->e_bits &
+	    EXTENT_BITS_ARENA_MASK) >> EXTENT_BITS_ARENA_SHIFT);
+	/*
+	 * The following check is omitted because we should never actually read
+	 * a NULL arena pointer.
+	 */
+	if (false && arena_ind > MALLOCX_ARENA_MAX) {
+		return NULL;
+	}
+	assert(arena_ind <= MALLOCX_ARENA_MAX);
+	return arenas[arena_ind];
 }
 
 JEMALLOC_INLINE szind_t
 extent_szind_get_maybe_invalid(const extent_t *extent) {
-	assert(extent->e_szind <= NSIZES);
-	return extent->e_szind;
+	szind_t szind = (szind_t)((extent->e_bits & EXTENT_BITS_SZIND_MASK) >>
+	    EXTENT_BITS_SZIND_SHIFT);
+	assert(szind <= NSIZES);
+	return szind;
 }
 
 JEMALLOC_INLINE szind_t
@@ -89,6 +82,55 @@ extent_usize_get(const extent_t *extent) {
 	return index2size(extent_szind_get(extent));
 }
 
+JEMALLOC_INLINE size_t
+extent_sn_get(const extent_t *extent) {
+	return (size_t)((extent->e_bits & EXTENT_BITS_SN_MASK) >>
+	    EXTENT_BITS_SN_SHIFT);
+}
+
+JEMALLOC_INLINE extent_state_t
+extent_state_get(const extent_t *extent) {
+	return (extent_state_t)((extent->e_bits & EXTENT_BITS_STATE_MASK) >>
+	    EXTENT_BITS_STATE_SHIFT);
+}
+
+JEMALLOC_INLINE bool
+extent_zeroed_get(const extent_t *extent) {
+	return (bool)((extent->e_bits & EXTENT_BITS_ZEROED_MASK) >>
+	    EXTENT_BITS_ZEROED_SHIFT);
+}
+
+JEMALLOC_INLINE bool
+extent_committed_get(const extent_t *extent) {
+	return (bool)((extent->e_bits & EXTENT_BITS_COMMITTED_MASK) >>
+	    EXTENT_BITS_COMMITTED_SHIFT);
+}
+
+JEMALLOC_INLINE bool
+extent_slab_get(const extent_t *extent) {
+	return (bool)((extent->e_bits & EXTENT_BITS_SLAB_MASK) >>
+	    EXTENT_BITS_SLAB_SHIFT);
+}
+
+JEMALLOC_INLINE void *
+extent_base_get(const extent_t *extent) {
+	assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) ||
+	    !extent_slab_get(extent));
+	return PAGE_ADDR2BASE(extent->e_addr);
+}
+
+JEMALLOC_INLINE void *
+extent_addr_get(const extent_t *extent) {
+	assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) ||
+	    !extent_slab_get(extent));
+	return extent->e_addr;
+}
+
+JEMALLOC_INLINE size_t
+extent_size_get(const extent_t *extent) {
+	return extent->e_size;
+}
+
 JEMALLOC_INLINE void *
 extent_before_get(const extent_t *extent) {
 	return (void *)((uintptr_t)extent_base_get(extent) - PAGE);
@@ -106,40 +148,15 @@ extent_past_get(const extent_t *extent) {
 	    extent_size_get(extent));
 }
 
-JEMALLOC_INLINE size_t
-extent_sn_get(const extent_t *extent) {
-	return extent->e_sn;
-}
-
-JEMALLOC_INLINE extent_state_t
-extent_state_get(const extent_t *extent) {
-	return extent->e_state;
-}
-
-JEMALLOC_INLINE bool
-extent_zeroed_get(const extent_t *extent) {
-	return extent->e_zeroed;
-}
-
-JEMALLOC_INLINE bool
-extent_committed_get(const extent_t *extent) {
-	return extent->e_committed;
-}
-
-JEMALLOC_INLINE bool
-extent_slab_get(const extent_t *extent) {
-	return extent->e_slab;
-}
-
 JEMALLOC_INLINE arena_slab_data_t *
 extent_slab_data_get(extent_t *extent) {
-	assert(extent->e_slab);
+	assert(extent_slab_get(extent));
 	return &extent->e_slab_data;
 }
 
 JEMALLOC_INLINE const arena_slab_data_t *
 extent_slab_data_get_const(const extent_t *extent) {
-	assert(extent->e_slab);
+	assert(extent_slab_get(extent));
 	return &extent->e_slab_data;
 }
 
@@ -151,7 +168,10 @@ extent_prof_tctx_get(const extent_t *extent) {
 
 JEMALLOC_INLINE void
 extent_arena_set(extent_t *extent, arena_t *arena) {
-	extent->e_arena_ind = (arena != NULL) ? arena_ind_get(arena) : UINT_MAX;
+	unsigned arena_ind = (arena != NULL) ? arena_ind_get(arena) : ((1U <<
+	    MALLOCX_ARENA_BITS) - 1);
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_ARENA_MASK) |
+	    ((uint64_t)arena_ind << EXTENT_BITS_ARENA_SHIFT);
 }
 
 JEMALLOC_INLINE void
@@ -186,32 +206,38 @@ extent_size_set(extent_t *extent, size_t size) {
 JEMALLOC_INLINE void
 extent_szind_set(extent_t *extent, szind_t szind) {
 	assert(szind <= NSIZES); /* NSIZES means "invalid". */
-	extent->e_szind = szind;
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SZIND_MASK) |
+	    ((uint64_t)szind << EXTENT_BITS_SZIND_SHIFT);
 }
 
 JEMALLOC_INLINE void
 extent_sn_set(extent_t *extent, size_t sn) {
-	extent->e_sn = sn;
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SN_MASK) |
+	    ((uint64_t)sn << EXTENT_BITS_SN_SHIFT);
 }
 
 JEMALLOC_INLINE void
 extent_state_set(extent_t *extent, extent_state_t state) {
-	extent->e_state = state;
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_STATE_MASK) |
+	    ((uint64_t)state << EXTENT_BITS_STATE_SHIFT);
 }
 
 JEMALLOC_INLINE void
 extent_zeroed_set(extent_t *extent, bool zeroed) {
-	extent->e_zeroed = zeroed;
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_ZEROED_MASK) |
+	    ((uint64_t)zeroed << EXTENT_BITS_ZEROED_SHIFT);
 }
 
 JEMALLOC_INLINE void
 extent_committed_set(extent_t *extent, bool committed) {
-	extent->e_committed = committed;
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_COMMITTED_MASK) |
+	    ((uint64_t)committed << EXTENT_BITS_COMMITTED_SHIFT);
 }
 
 JEMALLOC_INLINE void
 extent_slab_set(extent_t *extent, bool slab) {
-	extent->e_slab = slab;
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SLAB_MASK) |
+	    ((uint64_t)slab << EXTENT_BITS_SLAB_SHIFT);
 }
 
 JEMALLOC_INLINE void
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index 1b2b4bc7..ddc04087 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -10,8 +10,78 @@ typedef enum {
 
 /* Extent (span of pages).  Use accessor functions for e_* fields. */
 struct extent_s {
-	/* Arena from which this extent came, or UINT_MAX if unassociated. */
-	unsigned		e_arena_ind;
+	/*
+	 * Bitfield containing several fields:
+	 *
+	 * a: arena_ind
+	 * b: slab
+	 * c: committed
+	 * z: zeroed
+	 * t: state
+	 * i: szind
+	 * n: sn
+	 *
+	 * nnnnnnnn ... nnnnnnni iiiiiiit tzcbaaaa aaaaaaaa
+	 *
+	 * arena_ind: Arena from which this extent came, or all 1 bits if
+	 *            unassociated.
+	 *
+	 * slab: The slab flag indicates whether the extent is used for a slab
+	 *       of small regions.  This helps differentiate small size classes,
+	 *       and it indicates whether interior pointers can be looked up via
+	 *       iealloc().
+	 *
+	 * committed: The committed flag indicates whether physical memory is
+	 *            committed to the extent, whether explicitly or implicitly
+	 *            as on a system that overcommits and satisfies physical
+	 *            memory needs on demand via soft page faults.
+	 *
+	 * zeroed: The zeroed flag is used by extent recycling code to track
+	 *         whether memory is zero-filled.
+	 *
+	 * state: The state flag is an extent_state_t.
+	 *
+	 * szind: The szind flag indicates usable size class index for
+	 *        allocations residing in this extent, regardless of whether the
+	 *        extent is a slab.  Extent size and usable size often differ
+	 *        even for non-slabs, either due to large_pad or promotion of
+	 *        sampled small regions.
+	 *
+	 * sn: Serial number (potentially non-unique).
+	 *
+	 *     Serial numbers may wrap around if JEMALLOC_MUNMAP is defined, but
+	 *     as long as comparison functions fall back on address comparison
+	 *     for equal serial numbers, stable (if imperfect) ordering is
+	 *     maintained.
+	 *
+	 *     Serial numbers may not be unique even in the absence of
+	 *     wrap-around, e.g. when splitting an extent and assigning the same
+	 *     serial number to both resulting adjacent extents.
+	 */
+	uint64_t		e_bits;
+#define EXTENT_BITS_ARENA_SHIFT		0
+#define EXTENT_BITS_ARENA_MASK \
+    (((1U << MALLOCX_ARENA_BITS) - 1) << EXTENT_BITS_ARENA_SHIFT)
+
+#define EXTENT_BITS_SLAB_SHIFT		MALLOCX_ARENA_BITS
+#define EXTENT_BITS_SLAB_MASK		(0x1U << EXTENT_BITS_SLAB_SHIFT)
+
+#define EXTENT_BITS_COMMITTED_SHIFT	(MALLOCX_ARENA_BITS + 1)
+#define EXTENT_BITS_COMMITTED_MASK	(0x1U << EXTENT_BITS_COMMITTED_SHIFT)
+
+#define EXTENT_BITS_ZEROED_SHIFT	(MALLOCX_ARENA_BITS + 2)
+#define EXTENT_BITS_ZEROED_MASK		(0x1U << EXTENT_BITS_ZEROED_SHIFT)
+
+#define EXTENT_BITS_STATE_SHIFT		(MALLOCX_ARENA_BITS + 3)
+#define EXTENT_BITS_STATE_MASK		(0x3U << EXTENT_BITS_STATE_SHIFT)
+
+#define EXTENT_BITS_SZIND_SHIFT		(MALLOCX_ARENA_BITS + 5)
+#define EXTENT_BITS_SZIND_MASK \
+    (((1U << LG_CEIL_NSIZES) - 1) << EXTENT_BITS_SZIND_SHIFT)
+
+#define EXTENT_BITS_SN_SHIFT \
+    (MALLOCX_ARENA_BITS + 5 + LG_CEIL_NSIZES)
+#define EXTENT_BITS_SN_MASK		(UINT64_MAX << EXTENT_BITS_SN_SHIFT)
 
 	/* Pointer to the extent that this structure is responsible for. */
 	void			*e_addr;
@@ -19,51 +89,6 @@ struct extent_s {
 	/* Extent size. */
 	size_t			e_size;
 
-	/*
-	 * Usable size class index for allocations residing in this extent,
-	 * regardless of whether the extent is a slab.  Extent size and usable
-	 * size often differ even for non-slabs, either due to large_pad or
-	 * promotion of sampled small regions.
-	 */
-	szind_t			e_szind;
-
-	/*
-	 * Serial number (potentially non-unique).
-	 *
-	 * In principle serial numbers can wrap around on 32-bit systems if
-	 * JEMALLOC_MUNMAP is defined, but as long as comparison functions fall
-	 * back on address comparison for equal serial numbers, stable (if
-	 * imperfect) ordering is maintained.
-	 *
-	 * Serial numbers may not be unique even in the absence of wrap-around,
-	 * e.g. when splitting an extent and assigning the same serial number to
-	 * both resulting adjacent extents.
-	 */
-	size_t			e_sn;
-
-	/* Extent state. */
-	extent_state_t		e_state;
-
-	/*
-	 * The zeroed flag is used by extent recycling code to track whether
-	 * memory is zero-filled.
-	 */
-	bool			e_zeroed;
-
-	/*
-	 * True if physical memory is committed to the extent, whether
-	 * explicitly or implicitly as on a system that overcommits and
-	 * satisfies physical memory needs on demand via soft page faults.
-	 */
-	bool			e_committed;
-
-	/*
-	 * The slab flag indicates whether the extent is used for a slab of
-	 * small regions.  This helps differentiate small size classes, and it
-	 * indicates whether interior pointers can be looked up via iealloc().
-	 */
-	bool			e_slab;
-
 	union {
 		/* Small region slab metadata. */
 		arena_slab_data_t	e_slab_data;

From 6258176c87cd3632dcdd8df14842734b46e2e916 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 27 Mar 2017 01:52:20 -0700
Subject: [PATCH 0751/2608] Fix bitmap_ffu() to work with 3+ levels.

---
 include/jemalloc/internal/bitmap_inlines.h | 74 +++++++++-------------
 test/unit/bitmap.c                         | 27 ++++++++
 2 files changed, 58 insertions(+), 43 deletions(-)

diff --git a/include/jemalloc/internal/bitmap_inlines.h b/include/jemalloc/internal/bitmap_inlines.h
index b4a5ca00..84f43a87 100644
--- a/include/jemalloc/internal/bitmap_inlines.h
+++ b/include/jemalloc/internal/bitmap_inlines.h
@@ -83,52 +83,40 @@ bitmap_ffu(const bitmap_t *bitmap, const bitmap_info_t *binfo, size_t min_bit) {
 	assert(min_bit < binfo->nbits);
 
 #ifdef BITMAP_USE_TREE
-	unsigned level = binfo->nlevels - 1;
-	size_t lg_bits_per_group = (LG_BITMAP_GROUP_NBITS * (level+1));
-	size_t bits_per_group = 1LU << lg_bits_per_group;
-	size_t bits_per_group_mask = bits_per_group - 1;
-	unsigned group_nmask = (min_bit & bits_per_group_mask) >> (level *
-	    LG_BITMAP_GROUP_NBITS);
-	bitmap_t group_mask = ~((1LU << group_nmask) - 1);
-	bitmap_t group = bitmap[binfo->levels[level].group_offset] & group_mask;
-	if (group == 0LU) {
-		return binfo->nbits;
-	}
-	size_t bit = ffs_lu(group) - 1;
-
-	while (level > 0) {
-		level--;
-
-		lg_bits_per_group = (LG_BITMAP_GROUP_NBITS * (level+1));
-		bits_per_group = 1LU << lg_bits_per_group;
-		bits_per_group_mask = bits_per_group - 1;
-
-		group = bitmap[binfo->levels[level].group_offset + bit];
-		size_t cur_base = bit << lg_bits_per_group;
-		if (cur_base < min_bit) {
-			group_nmask = (min_bit & bits_per_group_mask) >> (level
-			    * LG_BITMAP_GROUP_NBITS);
-			group_mask = ~((1LU << group_nmask) - 1);
-			group &= group_mask;
-		}
-		if (group == 0LU) {
-			/*
-			 * If min_bit is not the first bit in its group, try
-			 * again starting at the first bit of the next group.
-			 * This will only recurse at most once, since on
-			 * recursion, min_bit will be the first bit in its
-			 * group.
-			 */
-			size_t ceil_min_bit = (min_bit +
-			    BITMAP_GROUP_NBITS_MASK) & ~BITMAP_GROUP_NBITS_MASK;
-			if (ceil_min_bit != min_bit && ceil_min_bit <
-			    binfo->nbits) {
-				return bitmap_ffu(bitmap, binfo, ceil_min_bit);
+	size_t bit = 0;
+	for (unsigned level = binfo->nlevels; level--;) {
+		size_t lg_bits_per_group = (LG_BITMAP_GROUP_NBITS * (level +
+		    1));
+		bitmap_t group = bitmap[binfo->levels[level].group_offset + (bit
+		    >> lg_bits_per_group)];
+		unsigned group_nmask = ((min_bit > bit) ? (min_bit - bit) : 0)
+		    >> (lg_bits_per_group - LG_BITMAP_GROUP_NBITS);
+		assert(group_nmask <= BITMAP_GROUP_NBITS);
+		bitmap_t group_mask = ~((1LU << group_nmask) - 1);
+		bitmap_t group_masked = group & group_mask;
+		if (group_masked == 0LU) {
+			if (group == 0LU) {
+				return binfo->nbits;
 			}
-			return binfo->nbits;
+			/*
+			 * min_bit was preceded by one or more unset bits in
+			 * this group, but there are no other unset bits in this
+			 * group.  Try again starting at the first bit of the
+			 * next sibling.  This will recurse at most once per
+			 * non-root level.
+			 */
+			size_t sib_base = bit + (1U << lg_bits_per_group);
+			assert(sib_base > min_bit);
+			assert(sib_base > bit);
+			if (sib_base >= binfo->nbits) {
+				return binfo->nbits;
+			}
+			return bitmap_ffu(bitmap, binfo, sib_base);
 		}
-		bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffs_lu(group) - 1);
+		bit += (ffs_lu(group_masked) - 1) << (lg_bits_per_group -
+		    LG_BITMAP_GROUP_NBITS);
 	}
+	assert(bit >= min_bit);
 	assert(bit < binfo->nbits);
 	return bit;
 #else
diff --git a/test/unit/bitmap.c b/test/unit/bitmap.c
index 22d2871e..cafb2039 100644
--- a/test/unit/bitmap.c
+++ b/test/unit/bitmap.c
@@ -372,6 +372,33 @@ test_bitmap_xfu_body(const bitmap_info_t *binfo, size_t nbits) {
 		}
 	}
 
+	/*
+	 * Unset the last bit, bubble another unset bit through the bitmap, and
+	 * verify that bitmap_ffu() finds the correct bit for all four min_bit
+	 * cases.
+	 */
+	if (nbits >= 3) {
+		bitmap_unset(bitmap, binfo, nbits-1);
+		for (size_t i = 0; i < nbits-1; i++) {
+			bitmap_unset(bitmap, binfo, i);
+			if (i > 0) {
+				assert_zu_eq(bitmap_ffu(bitmap, binfo, i-1), i,
+				    "Unexpected first unset bit");
+			}
+			assert_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
+			    "Unexpected first unset bit");
+			assert_zu_eq(bitmap_ffu(bitmap, binfo, i+1), nbits-1,
+			    "Unexpected first unset bit");
+			assert_zu_eq(bitmap_ffu(bitmap, binfo, nbits-1),
+			    nbits-1, "Unexpected first unset bit");
+
+			assert_zu_eq(bitmap_sfu(bitmap, binfo), i,
+			    "Unexpected first unset bit");
+		}
+		assert_zu_eq(bitmap_sfu(bitmap, binfo), nbits-1,
+		    "Unexpected first unset bit");
+	}
+
 	free(bitmap);
 }
 

From 7c00f04ff40a34627e31488d02ff1081c749c7ba Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 27 Mar 2017 01:58:09 -0700
Subject: [PATCH 0752/2608] Remove BITMAP_USE_TREE.

Remove tree-structured bitmap support, in order to reduce complexity and
ease maintenance.  No bitmaps larger than 512 bits have been necessary
since before 4.0.0, and there is no current plan that would increase
maximum bitmap size.  Although tree-structured bitmaps were used on
32-bit platforms prior to this change, the overall benefits were
questionable (higher metadata overhead, higher bitmap modification cost,
marginally lower search cost).
---
 include/jemalloc/internal/bitmap_inlines.h |  95 ------------------
 include/jemalloc/internal/bitmap_structs.h |  11 ---
 include/jemalloc/internal/bitmap_types.h   | 107 ---------------------
 src/bitmap.c                               |  78 ---------------
 test/unit/bitmap.c                         |  16 ---
 5 files changed, 307 deletions(-)

diff --git a/include/jemalloc/internal/bitmap_inlines.h b/include/jemalloc/internal/bitmap_inlines.h
index 84f43a87..506d5269 100644
--- a/include/jemalloc/internal/bitmap_inlines.h
+++ b/include/jemalloc/internal/bitmap_inlines.h
@@ -14,12 +14,6 @@ void bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_BITMAP_C_))
 JEMALLOC_INLINE bool
 bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) {
-#ifdef BITMAP_USE_TREE
-	size_t rgoff = binfo->levels[binfo->nlevels].group_offset - 1;
-	bitmap_t rg = bitmap[rgoff];
-	/* The bitmap is full iff the root group is 0. */
-	return (rg == 0);
-#else
 	size_t i;
 
 	for (i = 0; i < binfo->ngroups; i++) {
@@ -28,7 +22,6 @@ bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) {
 		}
 	}
 	return true;
-#endif
 }
 
 JEMALLOC_INLINE bool
@@ -57,24 +50,6 @@ bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
 	g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
 	*gp = g;
 	assert(bitmap_get(bitmap, binfo, bit));
-#ifdef BITMAP_USE_TREE
-	/* Propagate group state transitions up the tree. */
-	if (g == 0) {
-		unsigned i;
-		for (i = 1; i < binfo->nlevels; i++) {
-			bit = goff;
-			goff = bit >> LG_BITMAP_GROUP_NBITS;
-			gp = &bitmap[binfo->levels[i].group_offset + goff];
-			g = *gp;
-			assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)));
-			g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
-			*gp = g;
-			if (g != 0) {
-				break;
-			}
-		}
-	}
-#endif
 }
 
 /* ffu: find first unset >= bit. */
@@ -82,44 +57,6 @@ JEMALLOC_INLINE size_t
 bitmap_ffu(const bitmap_t *bitmap, const bitmap_info_t *binfo, size_t min_bit) {
 	assert(min_bit < binfo->nbits);
 
-#ifdef BITMAP_USE_TREE
-	size_t bit = 0;
-	for (unsigned level = binfo->nlevels; level--;) {
-		size_t lg_bits_per_group = (LG_BITMAP_GROUP_NBITS * (level +
-		    1));
-		bitmap_t group = bitmap[binfo->levels[level].group_offset + (bit
-		    >> lg_bits_per_group)];
-		unsigned group_nmask = ((min_bit > bit) ? (min_bit - bit) : 0)
-		    >> (lg_bits_per_group - LG_BITMAP_GROUP_NBITS);
-		assert(group_nmask <= BITMAP_GROUP_NBITS);
-		bitmap_t group_mask = ~((1LU << group_nmask) - 1);
-		bitmap_t group_masked = group & group_mask;
-		if (group_masked == 0LU) {
-			if (group == 0LU) {
-				return binfo->nbits;
-			}
-			/*
-			 * min_bit was preceded by one or more unset bits in
-			 * this group, but there are no other unset bits in this
-			 * group.  Try again starting at the first bit of the
-			 * next sibling.  This will recurse at most once per
-			 * non-root level.
-			 */
-			size_t sib_base = bit + (1U << lg_bits_per_group);
-			assert(sib_base > min_bit);
-			assert(sib_base > bit);
-			if (sib_base >= binfo->nbits) {
-				return binfo->nbits;
-			}
-			return bitmap_ffu(bitmap, binfo, sib_base);
-		}
-		bit += (ffs_lu(group_masked) - 1) << (lg_bits_per_group -
-		    LG_BITMAP_GROUP_NBITS);
-	}
-	assert(bit >= min_bit);
-	assert(bit < binfo->nbits);
-	return bit;
-#else
 	size_t i = min_bit >> LG_BITMAP_GROUP_NBITS;
 	bitmap_t g = bitmap[i] & ~((1LU << (min_bit & BITMAP_GROUP_NBITS_MASK))
 	    - 1);
@@ -133,7 +70,6 @@ bitmap_ffu(const bitmap_t *bitmap, const bitmap_info_t *binfo, size_t min_bit) {
 		g = bitmap[i];
 	} while (i < binfo->ngroups);
 	return binfo->nbits;
-#endif
 }
 
 /* sfu: set first unset. */
@@ -145,16 +81,6 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) {
 
 	assert(!bitmap_full(bitmap, binfo));
 
-#ifdef BITMAP_USE_TREE
-	i = binfo->nlevels - 1;
-	g = bitmap[binfo->levels[i].group_offset];
-	bit = ffs_lu(g) - 1;
-	while (i > 0) {
-		i--;
-		g = bitmap[binfo->levels[i].group_offset + bit];
-		bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffs_lu(g) - 1);
-	}
-#else
 	i = 0;
 	g = bitmap[0];
 	while ((bit = ffs_lu(g)) == 0) {
@@ -162,7 +88,6 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) {
 		g = bitmap[i];
 	}
 	bit = (i << LG_BITMAP_GROUP_NBITS) + (bit - 1);
-#endif
 	bitmap_set(bitmap, binfo, bit);
 	return bit;
 }
@@ -184,26 +109,6 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
 	g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
 	*gp = g;
 	assert(!bitmap_get(bitmap, binfo, bit));
-#ifdef BITMAP_USE_TREE
-	/* Propagate group state transitions up the tree. */
-	if (propagate) {
-		unsigned i;
-		for (i = 1; i < binfo->nlevels; i++) {
-			bit = goff;
-			goff = bit >> LG_BITMAP_GROUP_NBITS;
-			gp = &bitmap[binfo->levels[i].group_offset + goff];
-			g = *gp;
-			propagate = (g == 0);
-			assert((g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)))
-			    == 0);
-			g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
-			*gp = g;
-			if (!propagate) {
-				break;
-			}
-		}
-	}
-#endif /* BITMAP_USE_TREE */
 }
 
 #endif
diff --git a/include/jemalloc/internal/bitmap_structs.h b/include/jemalloc/internal/bitmap_structs.h
index 297ae669..dde15328 100644
--- a/include/jemalloc/internal/bitmap_structs.h
+++ b/include/jemalloc/internal/bitmap_structs.h
@@ -10,19 +10,8 @@ struct bitmap_info_s {
 	/* Logical number of bits in bitmap (stored at bottom level). */
 	size_t nbits;
 
-#ifdef BITMAP_USE_TREE
-	/* Number of levels necessary for nbits. */
-	unsigned nlevels;
-
-	/*
-	 * Only the first (nlevels+1) elements are used, and levels are ordered
-	 * bottom to top (e.g. the bottom level is stored in levels[0]).
-	 */
-	bitmap_level_t levels[BITMAP_MAX_LEVELS+1];
-#else /* BITMAP_USE_TREE */
 	/* Number of groups necessary for nbits. */
 	size_t ngroups;
-#endif /* BITMAP_USE_TREE */
 };
 
 #endif /* JEMALLOC_INTERNAL_BITMAP_STRUCTS_H */
diff --git a/include/jemalloc/internal/bitmap_types.h b/include/jemalloc/internal/bitmap_types.h
index b334769f..091ccead 100644
--- a/include/jemalloc/internal/bitmap_types.h
+++ b/include/jemalloc/internal/bitmap_types.h
@@ -21,115 +21,10 @@ typedef unsigned long bitmap_t;
 #define BITMAP_GROUP_NBITS		(1U << LG_BITMAP_GROUP_NBITS)
 #define BITMAP_GROUP_NBITS_MASK		(BITMAP_GROUP_NBITS-1)
 
-/*
- * Do some analysis on how big the bitmap is before we use a tree.  For a brute
- * force linear search, if we would have to call ffs_lu() more than 2^3 times,
- * use a tree instead.
- */
-#if LG_BITMAP_MAXBITS - LG_BITMAP_GROUP_NBITS > 3
-#  define BITMAP_USE_TREE
-#endif
-
 /* Number of groups required to store a given number of bits. */
 #define BITMAP_BITS2GROUPS(nbits)					\
     (((nbits) + BITMAP_GROUP_NBITS_MASK) >> LG_BITMAP_GROUP_NBITS)
 
-/*
- * Number of groups required at a particular level for a given number of bits.
- */
-#define BITMAP_GROUPS_L0(nbits)						\
-    BITMAP_BITS2GROUPS(nbits)
-#define BITMAP_GROUPS_L1(nbits)						\
-    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(nbits))
-#define BITMAP_GROUPS_L2(nbits)						\
-    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits))))
-#define BITMAP_GROUPS_L3(nbits)						\
-    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(		\
-	BITMAP_BITS2GROUPS((nbits)))))
-#define BITMAP_GROUPS_L4(nbits)						\
-    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(		\
-	BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits))))))
-
-/*
- * Assuming the number of levels, number of groups required for a given number
- * of bits.
- */
-#define BITMAP_GROUPS_1_LEVEL(nbits)					\
-    BITMAP_GROUPS_L0(nbits)
-#define BITMAP_GROUPS_2_LEVEL(nbits)					\
-    (BITMAP_GROUPS_1_LEVEL(nbits) + BITMAP_GROUPS_L1(nbits))
-#define BITMAP_GROUPS_3_LEVEL(nbits)					\
-    (BITMAP_GROUPS_2_LEVEL(nbits) + BITMAP_GROUPS_L2(nbits))
-#define BITMAP_GROUPS_4_LEVEL(nbits)					\
-    (BITMAP_GROUPS_3_LEVEL(nbits) + BITMAP_GROUPS_L3(nbits))
-#define BITMAP_GROUPS_5_LEVEL(nbits)					\
-    (BITMAP_GROUPS_4_LEVEL(nbits) + BITMAP_GROUPS_L4(nbits))
-
-/*
- * Maximum number of groups required to support LG_BITMAP_MAXBITS.
- */
-#ifdef BITMAP_USE_TREE
-
-#if LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS
-#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_1_LEVEL(nbits)
-#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_1_LEVEL(BITMAP_MAXBITS)
-#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 2
-#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_2_LEVEL(nbits)
-#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_2_LEVEL(BITMAP_MAXBITS)
-#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 3
-#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_3_LEVEL(nbits)
-#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_3_LEVEL(BITMAP_MAXBITS)
-#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 4
-#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_4_LEVEL(nbits)
-#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_4_LEVEL(BITMAP_MAXBITS)
-#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 5
-#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_5_LEVEL(nbits)
-#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_5_LEVEL(BITMAP_MAXBITS)
-#else
-#  error "Unsupported bitmap size"
-#endif
-
-/*
- * Maximum number of levels possible.  This could be statically computed based
- * on LG_BITMAP_MAXBITS:
- *
- * #define BITMAP_MAX_LEVELS \
- *     (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP) \
- *     + !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP)
- *
- * However, that would not allow the generic BITMAP_INFO_INITIALIZER() macro, so
- * instead hardcode BITMAP_MAX_LEVELS to the largest number supported by the
- * various cascading macros.  The only additional cost this incurs is some
- * unused trailing entries in bitmap_info_t structures; the bitmaps themselves
- * are not impacted.
- */
-#define BITMAP_MAX_LEVELS	5
-
-#define BITMAP_INFO_INITIALIZER(nbits) {				\
-	/* nbits. */							\
-	nbits,								\
-	/* nlevels. */							\
-	(BITMAP_GROUPS_L0(nbits) > BITMAP_GROUPS_L1(nbits)) +		\
-	    (BITMAP_GROUPS_L1(nbits) > BITMAP_GROUPS_L2(nbits)) +	\
-	    (BITMAP_GROUPS_L2(nbits) > BITMAP_GROUPS_L3(nbits)) +	\
-	    (BITMAP_GROUPS_L3(nbits) > BITMAP_GROUPS_L4(nbits)) + 1,	\
-	/* levels. */							\
-	{								\
-		{0},							\
-		{BITMAP_GROUPS_L0(nbits)},				\
-		{BITMAP_GROUPS_L1(nbits) + BITMAP_GROUPS_L0(nbits)},	\
-		{BITMAP_GROUPS_L2(nbits) + BITMAP_GROUPS_L1(nbits) +	\
-		    BITMAP_GROUPS_L0(nbits)},				\
-		{BITMAP_GROUPS_L3(nbits) + BITMAP_GROUPS_L2(nbits) +	\
-		    BITMAP_GROUPS_L1(nbits) + BITMAP_GROUPS_L0(nbits)},	\
-		{BITMAP_GROUPS_L4(nbits) + BITMAP_GROUPS_L3(nbits) +	\
-		     BITMAP_GROUPS_L2(nbits) + BITMAP_GROUPS_L1(nbits)	\
-		     + BITMAP_GROUPS_L0(nbits)}				\
-	}								\
-}
-
-#else /* BITMAP_USE_TREE */
-
 #define BITMAP_GROUPS(nbits)	BITMAP_BITS2GROUPS(nbits)
 #define BITMAP_GROUPS_MAX	BITMAP_BITS2GROUPS(BITMAP_MAXBITS)
 
@@ -140,6 +35,4 @@ typedef unsigned long bitmap_t;
 	BITMAP_BITS2GROUPS(nbits)					\
 }
 
-#endif /* BITMAP_USE_TREE */
-
 #endif /* JEMALLOC_INTERNAL_BITMAP_TYPES_H */
diff --git a/src/bitmap.c b/src/bitmap.c
index 81d2a6da..a629aca6 100644
--- a/src/bitmap.c
+++ b/src/bitmap.c
@@ -3,82 +3,6 @@
 
 /******************************************************************************/
 
-#ifdef BITMAP_USE_TREE
-
-void
-bitmap_info_init(bitmap_info_t *binfo, size_t nbits) {
-	unsigned i;
-	size_t group_count;
-
-	assert(nbits > 0);
-	assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS));
-
-	/*
-	 * Compute the number of groups necessary to store nbits bits, and
-	 * progressively work upward through the levels until reaching a level
-	 * that requires only one group.
-	 */
-	binfo->levels[0].group_offset = 0;
-	group_count = BITMAP_BITS2GROUPS(nbits);
-	for (i = 1; group_count > 1; i++) {
-		assert(i < BITMAP_MAX_LEVELS);
-		binfo->levels[i].group_offset = binfo->levels[i-1].group_offset
-		    + group_count;
-		group_count = BITMAP_BITS2GROUPS(group_count);
-	}
-	binfo->levels[i].group_offset = binfo->levels[i-1].group_offset
-	    + group_count;
-	assert(binfo->levels[i].group_offset <= BITMAP_GROUPS_MAX);
-	binfo->nlevels = i;
-	binfo->nbits = nbits;
-}
-
-static size_t
-bitmap_info_ngroups(const bitmap_info_t *binfo) {
-	return binfo->levels[binfo->nlevels].group_offset;
-}
-
-void
-bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo, bool fill) {
-	size_t extra;
-	unsigned i;
-
-	/*
-	 * Bits are actually inverted with regard to the external bitmap
-	 * interface.
-	 */
-
-	if (fill) {
-		/* The "filled" bitmap starts out with all 0 bits. */
-		memset(bitmap, 0, bitmap_size(binfo));
-		return;
-	}
-
-	/*
-	 * The "empty" bitmap starts out with all 1 bits, except for trailing
-	 * unused bits (if any).  Note that each group uses bit 0 to correspond
-	 * to the first logical bit in the group, so extra bits are the most
-	 * significant bits of the last group.
-	 */
-	memset(bitmap, 0xffU, bitmap_size(binfo));
-	extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK))
-	    & BITMAP_GROUP_NBITS_MASK;
-	if (extra != 0) {
-		bitmap[binfo->levels[1].group_offset - 1] >>= extra;
-	}
-	for (i = 1; i < binfo->nlevels; i++) {
-		size_t group_count = binfo->levels[i].group_offset -
-		    binfo->levels[i-1].group_offset;
-		extra = (BITMAP_GROUP_NBITS - (group_count &
-		    BITMAP_GROUP_NBITS_MASK)) & BITMAP_GROUP_NBITS_MASK;
-		if (extra != 0) {
-			bitmap[binfo->levels[i+1].group_offset - 1] >>= extra;
-		}
-	}
-}
-
-#else /* BITMAP_USE_TREE */
-
 void
 bitmap_info_init(bitmap_info_t *binfo, size_t nbits) {
 	assert(nbits > 0);
@@ -110,8 +34,6 @@ bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo, bool fill) {
 	}
 }
 
-#endif /* BITMAP_USE_TREE */
-
 size_t
 bitmap_size(const bitmap_info_t *binfo) {
 	return (bitmap_info_ngroups(binfo) << LG_SIZEOF_BITMAP);
diff --git a/test/unit/bitmap.c b/test/unit/bitmap.c
index cafb2039..f65ed53e 100644
--- a/test/unit/bitmap.c
+++ b/test/unit/bitmap.c
@@ -103,24 +103,8 @@ test_bitmap_initializer_body(const bitmap_info_t *binfo, size_t nbits) {
 	assert_zu_eq(binfo->nbits, binfo_dyn.nbits,
 	    "Unexpected difference between static and dynamic initialization, "
 	    "nbits=%zu", nbits);
-#ifdef BITMAP_USE_TREE
-	assert_u_eq(binfo->nlevels, binfo_dyn.nlevels,
-	    "Unexpected difference between static and dynamic initialization, "
-	    "nbits=%zu", nbits);
-	{
-		unsigned i;
-
-		for (i = 0; i < binfo->nlevels; i++) {
-			assert_zu_eq(binfo->levels[i].group_offset,
-			    binfo_dyn.levels[i].group_offset,
-			    "Unexpected difference between static and dynamic "
-			    "initialization, nbits=%zu, level=%u", nbits, i);
-		}
-	}
-#else
 	assert_zu_eq(binfo->ngroups, binfo_dyn.ngroups,
 	    "Unexpected difference between static and dynamic initialization");
-#endif
 }
 
 TEST_BEGIN(test_bitmap_initializer) {

From 4020523f677fbbdac2d00de3fa7d29c395814a2d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 27 Mar 2017 04:08:51 -0700
Subject: [PATCH 0753/2608] Fix a race in rtree_szind_slab_update() for
 RTREE_LEAF_COMPACT.

---
 include/jemalloc/internal/private_symbols.txt |  1 +
 include/jemalloc/internal/rtree_inlines.h     | 65 +++++++++++++++----
 2 files changed, 53 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index d22cd874..64151c1b 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -439,6 +439,7 @@ rtree_leaf_elm_release
 rtree_leaf_elm_slab_read
 rtree_leaf_elm_slab_write
 rtree_leaf_elm_szind_read
+rtree_leaf_elm_szind_slab_update
 rtree_leaf_elm_szind_write
 rtree_leaf_elm_witness_access
 rtree_leaf_elm_witness_acquire
diff --git a/include/jemalloc/internal/rtree_inlines.h b/include/jemalloc/internal/rtree_inlines.h
index 6f92df94..d3799cb3 100644
--- a/include/jemalloc/internal/rtree_inlines.h
+++ b/include/jemalloc/internal/rtree_inlines.h
@@ -26,6 +26,8 @@ void rtree_leaf_elm_slab_write(tsdn_t *tsdn, rtree_t *rtree,
     rtree_leaf_elm_t *elm, bool acquired, bool slab);
 void rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
     bool acquired, extent_t *extent, szind_t szind, bool slab);
+void rtree_leaf_elm_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_leaf_elm_t *elm, szind_t szind, bool slab);
 rtree_leaf_elm_t *rtree_leaf_elm_lookup(tsdn_t *tsdn, rtree_t *rtree,
     rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, bool init_missing);
 bool rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
@@ -41,12 +43,12 @@ bool rtree_extent_szind_read(tsdn_t *tsdn, rtree_t *rtree,
     szind_t *r_szind);
 bool rtree_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key, bool dependent, szind_t *r_szind, bool *r_slab);
-void rtree_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_ctx_t *rtree_ctx, uintptr_t key, szind_t szind, bool slab);
 rtree_leaf_elm_t *rtree_leaf_elm_acquire(tsdn_t *tsdn, rtree_t *rtree,
     rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, bool init_missing);
 void rtree_leaf_elm_release(tsdn_t *tsdn, rtree_t *rtree,
     rtree_leaf_elm_t *elm);
+void rtree_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_ctx_t *rtree_ctx, uintptr_t key, szind_t szind, bool slab);
 void rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key);
 #endif
@@ -251,12 +253,12 @@ rtree_leaf_elm_slab_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 JEMALLOC_INLINE void
 rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
     bool acquired, extent_t *extent, szind_t szind, bool slab) {
-#ifdef RTREE_LEAF_COMPACT
 	if (config_debug && acquired) {
 		rtree_leaf_elm_witness_access(tsdn, rtree, elm);
 	}
 	assert(!slab || szind < NBINS);
 
+#ifdef RTREE_LEAF_COMPACT
 	uintptr_t bits = ((uintptr_t)szind << LG_VADDR) |
 	    ((uintptr_t)extent & (((uintptr_t)0x1 << LG_VADDR) - 1)) |
 	    ((uintptr_t)slab << 1) |
@@ -274,6 +276,44 @@ rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 #endif
 }
 
+JEMALLOC_INLINE void
+rtree_leaf_elm_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_leaf_elm_t *elm, szind_t szind, bool slab) {
+	assert(!slab || szind < NBINS);
+
+	/*
+	 * The caller implicitly assures that it is the only writer to the szind
+	 * and slab fields, and that the extent field cannot currently change.
+	 */
+#ifdef RTREE_LEAF_COMPACT
+	/*
+	 * Another thread may concurrently acquire the elm, which means that
+	 * even though the szind and slab fields will not be concurrently
+	 * modified by another thread, the fact that the lock is embedded in the
+	 * same word requires that a CAS operation be used here.
+	 */
+	uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, false,
+	    true) & ~((uintptr_t)0x1); /* Mask lock bit. */
+	uintptr_t bits = ((uintptr_t)szind << LG_VADDR) |
+	    ((uintptr_t)rtree_leaf_elm_bits_extent_get(old_bits) &
+	    (((uintptr_t)0x1 << LG_VADDR) - 1)) |
+	    ((uintptr_t)slab << 1);
+	spin_t spinner = SPIN_INITIALIZER;
+	while (true) {
+		if (likely(atomic_compare_exchange_strong_p(&elm->le_bits,
+		    (void **)&old_bits, (void *)bits, ATOMIC_ACQUIRE,
+		    ATOMIC_RELAXED))) {
+			break;
+		}
+		spin_adaptive(&spinner);
+	}
+#else
+	/* No need to lock. */
+	rtree_leaf_elm_slab_write(tsdn, rtree, elm, false, slab);
+	rtree_leaf_elm_szind_write(tsdn, rtree, elm, false, szind);
+#endif
+}
+
 JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_t *
 rtree_leaf_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key, bool dependent, bool init_missing) {
@@ -404,16 +444,6 @@ rtree_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	return false;
 }
 
-JEMALLOC_INLINE void
-rtree_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
-    uintptr_t key, szind_t szind, bool slab) {
-	assert(!slab || szind < NBINS);
-
-	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, true);
-	rtree_leaf_elm_slab_write(tsdn, rtree, elm, false, slab);
-	rtree_leaf_elm_szind_write(tsdn, rtree, elm, false, szind);
-}
-
 JEMALLOC_INLINE rtree_leaf_elm_t *
 rtree_leaf_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key, bool dependent, bool init_missing) {
@@ -464,6 +494,15 @@ rtree_leaf_elm_release(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm) {
 	}
 }
 
+JEMALLOC_INLINE void
+rtree_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+    uintptr_t key, szind_t szind, bool slab) {
+	assert(!slab || szind < NBINS);
+
+	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, true);
+	rtree_leaf_elm_szind_slab_update(tsdn, rtree, elm, szind, slab);
+}
+
 JEMALLOC_INLINE void
 rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key) {

From c6d1819e483ca226e27ca4cf81f7d850e4d4dc79 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 27 Mar 2017 04:10:11 -0700
Subject: [PATCH 0754/2608] Simplify rtree_clear() to avoid locking.

---
 include/jemalloc/internal/rtree_inlines.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/rtree_inlines.h b/include/jemalloc/internal/rtree_inlines.h
index d3799cb3..4b530dff 100644
--- a/include/jemalloc/internal/rtree_inlines.h
+++ b/include/jemalloc/internal/rtree_inlines.h
@@ -506,10 +506,10 @@ rtree_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 JEMALLOC_INLINE void
 rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key) {
-	rtree_leaf_elm_t *elm = rtree_leaf_elm_acquire(tsdn, rtree, rtree_ctx,
-	    key, true, false);
-	rtree_leaf_elm_write(tsdn, rtree, elm, true, NULL, NSIZES, false);
-	rtree_leaf_elm_release(tsdn, rtree, elm);
+	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, true);
+	assert(rtree_leaf_elm_extent_read(tsdn, rtree, elm, false, false) !=
+	    NULL);
+	rtree_leaf_elm_write(tsdn, rtree, elm, false, NULL, NSIZES, false);
 }
 #endif
 

From af3d737a9aafae8b27a837edaf1f54c14d45a727 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 27 Mar 2017 11:48:39 -0700
Subject: [PATCH 0755/2608] Simplify rtree cache replacement policy.

To avoid memmove on free() fast path, simplify the cache replacement policy to
only bubble up the cache hit element by 1.
---
 include/jemalloc/internal/rtree_inlines.h | 25 ++++++++++-------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/include/jemalloc/internal/rtree_inlines.h b/include/jemalloc/internal/rtree_inlines.h
index 4b530dff..3e619b3f 100644
--- a/include/jemalloc/internal/rtree_inlines.h
+++ b/include/jemalloc/internal/rtree_inlines.h
@@ -325,30 +325,27 @@ rtree_leaf_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	if (likely(rtree_ctx->cache[i].leafkey == leafkey)) {		\
 		rtree_leaf_elm_t *leaf = rtree_ctx->cache[i].leaf;	\
 		if (likely(leaf != NULL)) {				\
-			/* Reorder. */					\
-			memmove(&rtree_ctx->cache[1],			\
-			    &rtree_ctx->cache[0],			\
-			    sizeof(rtree_ctx_cache_elm_t) * i);		\
-			rtree_ctx->cache[0].leafkey = leafkey;		\
-			rtree_ctx->cache[0].leaf = leaf;		\
-									\
+			/* Bubble up by one. */				\
+			if (i > 0) {					\
+				rtree_ctx->cache[i] =			\
+					rtree_ctx->cache[i - 1];	\
+				rtree_ctx->cache[i - 1].leafkey =	\
+					leafkey;			\
+				rtree_ctx->cache[i - 1].leaf = leaf;	\
+			}						\
 			uintptr_t subkey = rtree_subkey(key,		\
 			    RTREE_HEIGHT-1);				\
 			return &leaf[subkey];				\
 		}							\
 	}								\
 } while (0)
-	/* Check the MRU cache entry. */
+	/* Check the first cache entry. */
 	RTREE_CACHE_CHECK(0);
 	/*
 	 * Search the remaining cache elements, and on success move the matching
-	 * element to the front.  Unroll the first iteration to avoid calling
-	 * memmove() (the compiler typically optimizes it into raw moves).
+	 * element up by one slot.
 	 */
-	if (RTREE_CTX_NCACHE > 1) {
-		RTREE_CACHE_CHECK(1);
-	}
-	for (unsigned i = 2; i < RTREE_CTX_NCACHE; i++) {
+	for (unsigned i = 1; i < RTREE_CTX_NCACHE; i++) {
 		RTREE_CACHE_CHECK(i);
 	}
 #undef RTREE_CACHE_CHECK

From 07f4f93434b8fca5aabedbbaa122fa8f6eb681a6 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 27 Mar 2017 16:41:47 -0700
Subject: [PATCH 0756/2608] Move arena_slab_data_t's nfree into extent_t's
 e_bits.

Compact extent_t to 128 bytes on 64-bit systems by moving
arena_slab_data_t's nfree into extent_t's e_bits.

Cacheline-align extent_t structures so that they always cross the
minimum number of cacheline boundaries.

Re-order extent_t fields such that all fields except the slab bitmap
(and overlaid heap profiling context pointer) are in the first
cacheline.

This resolves #461.
---
 include/jemalloc/internal/arena_structs_a.h   |  3 --
 include/jemalloc/internal/extent_inlines.h    | 32 +++++++++++-
 include/jemalloc/internal/extent_structs.h    | 50 ++++++++++++-------
 include/jemalloc/internal/private_symbols.txt |  4 ++
 src/arena.c                                   | 37 +++++++-------
 src/extent.c                                  |  3 +-
 6 files changed, 86 insertions(+), 43 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs_a.h b/include/jemalloc/internal/arena_structs_a.h
index ed265b20..07013a06 100644
--- a/include/jemalloc/internal/arena_structs_a.h
+++ b/include/jemalloc/internal/arena_structs_a.h
@@ -2,9 +2,6 @@
 #define JEMALLOC_INTERNAL_ARENA_STRUCTS_A_H
 
 struct arena_slab_data_s {
-	/* Number of free regions in slab. */
-	unsigned	nfree;
-
 	/* Per region allocated/deallocated bitmap. */
 	bitmap_t	bitmap[BITMAP_GROUPS_MAX];
 };
diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index 22229b50..99fa67c7 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -11,6 +11,7 @@ extent_state_t extent_state_get(const extent_t *extent);
 bool extent_zeroed_get(const extent_t *extent);
 bool extent_committed_get(const extent_t *extent);
 bool extent_slab_get(const extent_t *extent);
+unsigned extent_nfree_get(const extent_t *extent);
 void *extent_base_get(const extent_t *extent);
 void *extent_addr_get(const extent_t *extent);
 size_t extent_size_get(const extent_t *extent);
@@ -25,6 +26,9 @@ void extent_addr_set(extent_t *extent, void *addr);
 void extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment);
 void extent_size_set(extent_t *extent, size_t size);
 void extent_szind_set(extent_t *extent, szind_t szind);
+void extent_nfree_set(extent_t *extent, unsigned nfree);
+void extent_nfree_inc(extent_t *extent);
+void extent_nfree_dec(extent_t *extent);
 void extent_sn_set(extent_t *extent, size_t sn);
 void extent_state_set(extent_t *extent, extent_state_t state);
 void extent_zeroed_set(extent_t *extent, bool zeroed);
@@ -112,6 +116,13 @@ extent_slab_get(const extent_t *extent) {
 	    EXTENT_BITS_SLAB_SHIFT);
 }
 
+JEMALLOC_INLINE unsigned
+extent_nfree_get(const extent_t *extent) {
+	assert(extent_slab_get(extent));
+	return (unsigned)((extent->e_bits & EXTENT_BITS_NFREE_MASK) >>
+	    EXTENT_BITS_NFREE_SHIFT);
+}
+
 JEMALLOC_INLINE void *
 extent_base_get(const extent_t *extent) {
 	assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) ||
@@ -210,6 +221,25 @@ extent_szind_set(extent_t *extent, szind_t szind) {
 	    ((uint64_t)szind << EXTENT_BITS_SZIND_SHIFT);
 }
 
+JEMALLOC_INLINE void
+extent_nfree_set(extent_t *extent, unsigned nfree) {
+	assert(extent_slab_get(extent));
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_NFREE_MASK) |
+	    ((uint64_t)nfree << EXTENT_BITS_NFREE_SHIFT);
+}
+
+JEMALLOC_INLINE void
+extent_nfree_inc(extent_t *extent) {
+	assert(extent_slab_get(extent));
+	extent->e_bits += ((uint64_t)1U << EXTENT_BITS_NFREE_SHIFT);
+}
+
+JEMALLOC_INLINE void
+extent_nfree_dec(extent_t *extent) {
+	assert(extent_slab_get(extent));
+	extent->e_bits -= ((uint64_t)1U << EXTENT_BITS_NFREE_SHIFT);
+}
+
 JEMALLOC_INLINE void
 extent_sn_set(extent_t *extent, size_t sn) {
 	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SN_MASK) |
@@ -260,10 +290,10 @@ extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
 	extent_state_set(extent, state);
 	extent_zeroed_set(extent, zeroed);
 	extent_committed_set(extent, committed);
+	ql_elm_new(extent, ql_link);
 	if (config_prof) {
 		extent_prof_tctx_set(extent, NULL);
 	}
-	ql_elm_new(extent, ql_link);
 }
 
 JEMALLOC_INLINE void
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index ddc04087..1527acb9 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -20,8 +20,9 @@ struct extent_s {
 	 * t: state
 	 * i: szind
 	 * n: sn
+	 * f: nfree
 	 *
-	 * nnnnnnnn ... nnnnnnni iiiiiiit tzcbaaaa aaaaaaaa
+	 * nnnnnnnn ... nnnnnfff fffffffi iiiiiiit tzcbaaaa aaaaaaaa
 	 *
 	 * arena_ind: Arena from which this extent came, or all 1 bits if
 	 *            unassociated.
@@ -47,6 +48,8 @@ struct extent_s {
 	 *        even for non-slabs, either due to large_pad or promotion of
 	 *        sampled small regions.
 	 *
+	 * nfree: Number of free regions in slab.
+	 *
 	 * sn: Serial number (potentially non-unique).
 	 *
 	 *     Serial numbers may wrap around if JEMALLOC_MUNMAP is defined, but
@@ -61,26 +64,35 @@ struct extent_s {
 	uint64_t		e_bits;
 #define EXTENT_BITS_ARENA_SHIFT		0
 #define EXTENT_BITS_ARENA_MASK \
-    (((1U << MALLOCX_ARENA_BITS) - 1) << EXTENT_BITS_ARENA_SHIFT)
+    (((uint64_t)(1U << MALLOCX_ARENA_BITS) - 1) << EXTENT_BITS_ARENA_SHIFT)
 
 #define EXTENT_BITS_SLAB_SHIFT		MALLOCX_ARENA_BITS
-#define EXTENT_BITS_SLAB_MASK		(0x1U << EXTENT_BITS_SLAB_SHIFT)
+#define EXTENT_BITS_SLAB_MASK \
+    ((uint64_t)0x1U << EXTENT_BITS_SLAB_SHIFT)
 
 #define EXTENT_BITS_COMMITTED_SHIFT	(MALLOCX_ARENA_BITS + 1)
-#define EXTENT_BITS_COMMITTED_MASK	(0x1U << EXTENT_BITS_COMMITTED_SHIFT)
+#define EXTENT_BITS_COMMITTED_MASK \
+    ((uint64_t)0x1U << EXTENT_BITS_COMMITTED_SHIFT)
 
 #define EXTENT_BITS_ZEROED_SHIFT	(MALLOCX_ARENA_BITS + 2)
-#define EXTENT_BITS_ZEROED_MASK		(0x1U << EXTENT_BITS_ZEROED_SHIFT)
+#define EXTENT_BITS_ZEROED_MASK \
+    ((uint64_t)0x1U << EXTENT_BITS_ZEROED_SHIFT)
 
 #define EXTENT_BITS_STATE_SHIFT		(MALLOCX_ARENA_BITS + 3)
-#define EXTENT_BITS_STATE_MASK		(0x3U << EXTENT_BITS_STATE_SHIFT)
+#define EXTENT_BITS_STATE_MASK \
+    ((uint64_t)0x3U << EXTENT_BITS_STATE_SHIFT)
 
 #define EXTENT_BITS_SZIND_SHIFT		(MALLOCX_ARENA_BITS + 5)
 #define EXTENT_BITS_SZIND_MASK \
-    (((1U << LG_CEIL_NSIZES) - 1) << EXTENT_BITS_SZIND_SHIFT)
+    (((uint64_t)(1U << LG_CEIL_NSIZES) - 1) << EXTENT_BITS_SZIND_SHIFT)
+
+#define EXTENT_BITS_NFREE_SHIFT \
+    (MALLOCX_ARENA_BITS + 5 + LG_CEIL_NSIZES)
+#define EXTENT_BITS_NFREE_MASK \
+    ((uint64_t)((1U << (LG_SLAB_MAXREGS + 1)) - 1) << EXTENT_BITS_NFREE_SHIFT)
 
 #define EXTENT_BITS_SN_SHIFT \
-    (MALLOCX_ARENA_BITS + 5 + LG_CEIL_NSIZES)
+    (MALLOCX_ARENA_BITS + 5 + LG_CEIL_NSIZES + (LG_SLAB_MAXREGS + 1))
 #define EXTENT_BITS_SN_MASK		(UINT64_MAX << EXTENT_BITS_SN_SHIFT)
 
 	/* Pointer to the extent that this structure is responsible for. */
@@ -89,17 +101,6 @@ struct extent_s {
 	/* Extent size. */
 	size_t			e_size;
 
-	union {
-		/* Small region slab metadata. */
-		arena_slab_data_t	e_slab_data;
-
-		/* Profile counters, used for large objects. */
-		union {
-			void		*e_prof_tctx_pun;
-			prof_tctx_t	*e_prof_tctx;
-		};
-	};
-
 	/*
 	 * List linkage, used by a variety of lists:
 	 * - arena_bin_t's slabs_full
@@ -112,6 +113,17 @@ struct extent_s {
 
 	/* Linkage for per size class sn/address-ordered heaps. */
 	phn(extent_t)		ph_link;
+
+	union {
+		/* Small region slab metadata. */
+		arena_slab_data_t	e_slab_data;
+
+		/* Profile counters, used for large objects. */
+		union {
+			void		*e_prof_tctx_pun;
+			prof_tctx_t	*e_prof_tctx;
+		};
+	};
 };
 typedef ql_head(extent_t) extent_list_t;
 typedef ph(extent_t) extent_heap_t;
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 64151c1b..5d03f5d0 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -175,6 +175,10 @@ extent_list_last
 extent_list_remove
 extent_list_replace
 extent_merge_wrapper
+extent_nfree_dec
+extent_nfree_get
+extent_nfree_inc
+extent_nfree_set
 extent_past_get
 extent_prof_tctx_get
 extent_prof_tctx_set
diff --git a/src/arena.c b/src/arena.c
index b0913c35..b8ef4731 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -361,13 +361,13 @@ arena_slab_reg_alloc(tsdn_t *tsdn, extent_t *slab,
 	arena_slab_data_t *slab_data = extent_slab_data_get(slab);
 	size_t regind;
 
-	assert(slab_data->nfree > 0);
+	assert(extent_nfree_get(slab) > 0);
 	assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info));
 
 	regind = bitmap_sfu(slab_data->bitmap, &bin_info->bitmap_info);
 	ret = (void *)((uintptr_t)extent_addr_get(slab) +
 	    (uintptr_t)(bin_info->reg_size * regind));
-	slab_data->nfree--;
+	extent_nfree_dec(slab);
 	return ret;
 }
 
@@ -416,12 +416,12 @@ arena_slab_reg_dalloc(tsdn_t *tsdn, extent_t *slab,
 	const arena_bin_info_t *bin_info = &arena_bin_info[binind];
 	size_t regind = arena_slab_regind(slab, binind, ptr);
 
-	assert(slab_data->nfree < bin_info->nregs);
+	assert(extent_nfree_get(slab) < bin_info->nregs);
 	/* Freeing an unallocated pointer can cause assertion failure. */
 	assert(bitmap_get(slab_data->bitmap, &bin_info->bitmap_info, regind));
 
 	bitmap_unset(slab_data->bitmap, &bin_info->bitmap_info, regind);
-	slab_data->nfree++;
+	extent_nfree_inc(slab);
 }
 
 static void
@@ -999,7 +999,7 @@ arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *slab) {
 
 static void
 arena_bin_slabs_nonfull_insert(arena_bin_t *bin, extent_t *slab) {
-	assert(extent_slab_data_get(slab)->nfree > 0);
+	assert(extent_nfree_get(slab) > 0);
 	extent_heap_insert(&bin->slabs_nonfull, slab);
 }
 
@@ -1022,7 +1022,7 @@ arena_bin_slabs_nonfull_tryget(arena_bin_t *bin) {
 
 static void
 arena_bin_slabs_full_insert(arena_bin_t *bin, extent_t *slab) {
-	assert(extent_slab_data_get(slab)->nfree == 0);
+	assert(extent_nfree_get(slab) == 0);
 	extent_list_append(&bin->slabs_full, slab);
 }
 
@@ -1209,7 +1209,7 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 
 	/* Initialize slab internals. */
 	arena_slab_data_t *slab_data = extent_slab_data_get(slab);
-	slab_data->nfree = bin_info->nregs;
+	extent_nfree_set(slab, bin_info->nregs);
 	bitmap_init(slab_data->bitmap, &bin_info->bitmap_info, false);
 
 	arena_nactive_add(arena, extent_size_get(slab) >> LG_PAGE);
@@ -1277,7 +1277,7 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin,
 		 * Another thread updated slabcur while this one ran without the
 		 * bin lock in arena_bin_nonfull_slab_get().
 		 */
-		if (extent_slab_data_get(bin->slabcur)->nfree > 0) {
+		if (extent_nfree_get(bin->slabcur) > 0) {
 			void *ret = arena_slab_reg_alloc(tsdn, bin->slabcur,
 			    bin_info);
 			if (slab != NULL) {
@@ -1290,8 +1290,7 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin,
 				 * arena_bin_lower_slab() must be called, as if
 				 * a region were just deallocated from the slab.
 				 */
-				if (extent_slab_data_get(slab)->nfree ==
-				    bin_info->nregs) {
+				if (extent_nfree_get(slab) == bin_info->nregs) {
 					arena_dalloc_bin_slab(tsdn, arena, slab,
 					    bin);
 				} else {
@@ -1311,7 +1310,7 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin,
 	}
 	bin->slabcur = slab;
 
-	assert(extent_slab_data_get(bin->slabcur)->nfree > 0);
+	assert(extent_nfree_get(bin->slabcur) > 0);
 
 	return arena_slab_reg_alloc(tsdn, slab, bin_info);
 }
@@ -1333,8 +1332,8 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_bin_t *tbin,
 	    tbin->lg_fill_div); i < nfill; i++) {
 		extent_t *slab;
 		void *ptr;
-		if ((slab = bin->slabcur) != NULL &&
-		    extent_slab_data_get(slab)->nfree > 0) {
+		if ((slab = bin->slabcur) != NULL && extent_nfree_get(slab) >
+		    0) {
 			ptr = arena_slab_reg_alloc(tsdn, slab,
 			    &arena_bin_info[binind]);
 		} else {
@@ -1405,8 +1404,7 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
 	usize = index2size(binind);
 
 	malloc_mutex_lock(tsdn, &bin->lock);
-	if ((slab = bin->slabcur) != NULL && extent_slab_data_get(slab)->nfree >
-	    0) {
+	if ((slab = bin->slabcur) != NULL && extent_nfree_get(slab) > 0) {
 		ret = arena_slab_reg_alloc(tsdn, slab, &arena_bin_info[binind]);
 	} else {
 		ret = arena_bin_malloc_hard(tsdn, arena, bin, binind);
@@ -1582,7 +1580,7 @@ arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
 static void
 arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
     arena_bin_t *bin) {
-	assert(extent_slab_data_get(slab)->nfree > 0);
+	assert(extent_nfree_get(slab) > 0);
 
 	/*
 	 * Make sure that if bin->slabcur is non-NULL, it refers to the
@@ -1592,7 +1590,7 @@ arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
 	 */
 	if (bin->slabcur != NULL && extent_snad_comp(bin->slabcur, slab) > 0) {
 		/* Switch slabcur. */
-		if (extent_slab_data_get(bin->slabcur)->nfree > 0) {
+		if (extent_nfree_get(bin->slabcur) > 0) {
 			arena_bin_slabs_nonfull_insert(bin, bin->slabcur);
 		} else {
 			arena_bin_slabs_full_insert(bin, bin->slabcur);
@@ -1619,10 +1617,11 @@ arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
 	}
 
 	arena_slab_reg_dalloc(tsdn, slab, slab_data, ptr);
-	if (slab_data->nfree == bin_info->nregs) {
+	unsigned nfree = extent_nfree_get(slab);
+	if (nfree == bin_info->nregs) {
 		arena_dissociate_bin_slab(slab, bin);
 		arena_dalloc_bin_slab(tsdn, arena, slab, bin);
-	} else if (slab_data->nfree == 1 && slab != bin->slabcur) {
+	} else if (nfree == 1 && slab != bin->slabcur) {
 		arena_bin_slabs_full_remove(bin, slab);
 		arena_bin_lower_slab(tsdn, arena, slab, bin);
 	}
diff --git a/src/extent.c b/src/extent.c
index 3f4f5f1b..8bd8eb79 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -94,7 +94,8 @@ extent_alloc(tsdn_t *tsdn, arena_t *arena) {
 	extent = extent_list_last(&arena->extent_freelist);
 	if (extent == NULL) {
 		malloc_mutex_unlock(tsdn, &arena->extent_freelist_mtx);
-		return base_alloc(tsdn, arena->base, sizeof(extent_t), QUANTUM);
+		return base_alloc(tsdn, arena->base, sizeof(extent_t),
+		    CACHELINE);
 	}
 	extent_list_remove(&arena->extent_freelist, extent);
 	malloc_mutex_unlock(tsdn, &arena->extent_freelist_mtx);

From d4e98bc0b27b2ed660002e1603fa353d5c89eb91 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 27 Mar 2017 17:22:01 -0700
Subject: [PATCH 0757/2608] Lookup extent once per time during
 tcache_flush_small / _large.

Caching the extents on stack to avoid redundant looking up overhead.
---
 src/tcache.c | 42 ++++++++++++++++++++++++++++--------------
 1 file changed, 28 insertions(+), 14 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index 9c99c8b3..6057c890 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -91,19 +91,23 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 void
 tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
     szind_t binind, unsigned rem) {
-	arena_t *arena;
-	void *ptr;
-	unsigned i, nflush, ndeferred;
 	bool merged_stats = false;
 
 	assert(binind < NBINS);
 	assert(rem <= tbin->ncached);
 
-	arena = tcache->arena;
+	arena_t *arena = tcache->arena;
 	assert(arena != NULL);
-	for (nflush = tbin->ncached - rem; nflush > 0; nflush = ndeferred) {
+	unsigned nflush = tbin->ncached - rem;
+	VARIABLE_ARRAY(extent_t *, item_extent, nflush);
+	/* Look up extent once per item. */
+	for (unsigned i = 0 ; i < nflush; i++) {
+		item_extent[i] = iealloc(tsd_tsdn(tsd), *(tbin->avail - 1 - i));
+	}
+
+	while (nflush > 0) {
 		/* Lock the arena bin associated with the first object. */
-		extent_t *extent = iealloc(tsd_tsdn(tsd), *(tbin->avail - 1));
+		extent_t *extent = item_extent[0];
 		arena_t *bin_arena = extent_arena_get(extent);
 		arena_bin_t *bin = &bin_arena->bins[binind];
 
@@ -123,12 +127,12 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 			bin->stats.nrequests += tbin->tstats.nrequests;
 			tbin->tstats.nrequests = 0;
 		}
-		ndeferred = 0;
-		for (i = 0; i < nflush; i++) {
-			ptr = *(tbin->avail - 1 - i);
-			assert(ptr != NULL);
+		unsigned ndeferred = 0;
+		for (unsigned i = 0; i < nflush; i++) {
+			void *ptr = *(tbin->avail - 1 - i);
+			extent = item_extent[i];
+			assert(ptr != NULL && extent != NULL);
 
-			extent = iealloc(tsd_tsdn(tsd), ptr);
 			if (extent_arena_get(extent) == bin_arena) {
 				arena_dalloc_bin_junked_locked(tsd_tsdn(tsd),
 				    bin_arena, extent, ptr);
@@ -140,11 +144,13 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 				 * handled in a future pass.
 				 */
 				*(tbin->avail - 1 - ndeferred) = ptr;
+				item_extent[ndeferred] = extent;
 				ndeferred++;
 			}
 		}
 		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 		arena_decay_ticks(tsd_tsdn(tsd), bin_arena, nflush - ndeferred);
+		nflush = ndeferred;
 	}
 	if (config_stats && !merged_stats) {
 		/*
@@ -178,9 +184,15 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 	arena_t *arena = tcache->arena;
 	assert(arena != NULL);
 	unsigned nflush = tbin->ncached - rem;
+	VARIABLE_ARRAY(extent_t *, item_extent, nflush);
+	/* Look up extent once per item. */
+	for (unsigned i = 0 ; i < nflush; i++) {
+		item_extent[i] = iealloc(tsd_tsdn(tsd), *(tbin->avail - 1 - i));
+	}
+
 	while (nflush > 0) {
 		/* Lock the arena associated with the first object. */
-		extent_t *extent = iealloc(tsd_tsdn(tsd), *(tbin->avail - 1));
+		extent_t *extent = item_extent[0];
 		arena_t *locked_arena = extent_arena_get(extent);
 		UNUSED bool idump;
 
@@ -217,8 +229,9 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 		unsigned ndeferred = 0;
 		for (unsigned i = 0; i < nflush; i++) {
 			void *ptr = *(tbin->avail - 1 - i);
-			assert(ptr != NULL);
-			extent = iealloc(tsd_tsdn(tsd), ptr);
+			extent = item_extent[i];
+			assert(ptr != NULL && extent != NULL);
+
 			if (extent_arena_get(extent) == locked_arena) {
 				large_dalloc_finish(tsd_tsdn(tsd), extent);
 			} else {
@@ -229,6 +242,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 				 * in a future pass.
 				 */
 				*(tbin->avail - 1 - ndeferred) = ptr;
+				item_extent[ndeferred] = extent;
 				ndeferred++;
 			}
 		}

From 5bf800a54247c5752053831e15f7b132bf9fddbf Mon Sep 17 00:00:00 2001
From: Aliaksey Kandratsenka <alk@tut.by>
Date: Sat, 14 Dec 2013 12:03:02 -0800
Subject: [PATCH 0758/2608] issue-586: detect main executable even if PIE is
 active

Previous logic of detecting main program addresses is to assume that
main executable is at least addressess. With PIE (active by default on
Ubuntus) it doesn't work.

In order to deal with that, we're attempting to find main executable
mapping in /proc/[pid]/maps. And old logic is preserved too just in
case.
---
 bin/jeprof.in | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/bin/jeprof.in b/bin/jeprof.in
index 42087fce..baa80a54 100644
--- a/bin/jeprof.in
+++ b/bin/jeprof.in
@@ -71,6 +71,7 @@
 use strict;
 use warnings;
 use Getopt::Long;
+use Cwd;
 
 my $JEPROF_VERSION = "@jemalloc_version@";
 my $PPROF_VERSION = "2.0";
@@ -4570,7 +4571,7 @@ sub ParseTextSectionHeader {
 # Split /proc/pid/maps dump into a list of libraries
 sub ParseLibraries {
   return if $main::use_symbol_page;  # We don't need libraries info.
-  my $prog = shift;
+  my $prog = Cwd::abs_path(shift);
   my $map = shift;
   my $pcs = shift;
 
@@ -4603,6 +4604,16 @@ sub ParseLibraries {
       $finish = HexExtend($2);
       $offset = $zero_offset;
       $lib = $3;
+    } elsif (($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+)$/i) && ($4 eq $prog)) {
+      # PIEs and address space randomization do not play well with our
+      # default assumption that main executable is at lowest
+      # addresses. So we're detecting main executable in
+      # /proc/self/maps as well.
+      $start = HexExtend($1);
+      $finish = HexExtend($2);
+      $offset = HexExtend($3);
+      $lib = $4;
+      $lib =~ s|\\|/|g;     # turn windows-style paths into unix-style paths
     }
     # FreeBSD 10.0 virtual memory map /proc/curproc/map as defined in
     # function procfs_doprocmap (sys/fs/procfs/procfs_map.c)

From 9ed84b0d458a22e1d98f071f8fb5efb2de24998e Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 29 Mar 2017 13:18:02 -0700
Subject: [PATCH 0759/2608] Add init function support to tsd members.

This will facilitate embedding tcache into tsd, which will require proper
initialization cannot be done via the static initializer.  Make tsd->rtree_ctx
to be initialized via rtree_ctx_data_init().
---
 include/jemalloc/internal/private_symbols.txt |  2 ++
 include/jemalloc/internal/rtree_externs.h     |  1 +
 include/jemalloc/internal/rtree_types.h       | 14 +++++---
 include/jemalloc/internal/tsd_externs.h       |  1 +
 include/jemalloc/internal/tsd_inlines.h       |  9 ++---
 include/jemalloc/internal/tsd_structs.h       | 34 +++++++++----------
 src/rtree.c                                   | 11 ++++++
 src/tsd.c                                     | 19 ++++++++++-
 test/unit/rtree.c                             | 16 +++++----
 9 files changed, 74 insertions(+), 33 deletions(-)

diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 5d03f5d0..1cced603 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -424,6 +424,7 @@ prof_thread_name_set
 psz2ind
 psz2u
 rtree_clear
+rtree_ctx_data_init
 rtree_delete
 rtree_extent_read
 rtree_extent_szind_read
@@ -520,6 +521,7 @@ tsd_booted
 tsd_booted_get
 tsd_cleanup
 tsd_cleanup_wrapper
+tsd_data_init
 tsd_fetch
 tsd_fetch_impl
 tsd_get
diff --git a/include/jemalloc/internal/rtree_externs.h b/include/jemalloc/internal/rtree_externs.h
index 5145c12c..482f6ba3 100644
--- a/include/jemalloc/internal/rtree_externs.h
+++ b/include/jemalloc/internal/rtree_externs.h
@@ -43,5 +43,6 @@ void rtree_leaf_elm_witness_access(tsdn_t *tsdn, const rtree_t *rtree,
     const rtree_leaf_elm_t *elm);
 void rtree_leaf_elm_witness_release(tsdn_t *tsdn, const rtree_t *rtree,
     const rtree_leaf_elm_t *elm);
+bool rtree_ctx_data_init(rtree_ctx_t *ctx);
 
 #endif /* JEMALLOC_INTERNAL_RTREE_EXTERNS_H */
diff --git a/include/jemalloc/internal/rtree_types.h b/include/jemalloc/internal/rtree_types.h
index de3893be..e6041560 100644
--- a/include/jemalloc/internal/rtree_types.h
+++ b/include/jemalloc/internal/rtree_types.h
@@ -38,6 +38,9 @@ typedef struct rtree_s rtree_t;
 #  define RTREE_LEAF_COMPACT
 #endif
 
+/* Needed for initialization only. */
+#define RTREE_LEAFKEY_INVALID	((uintptr_t)1)
+
 /*
  * Number of leafkey/leaf pairs to cache.  Each entry supports an entire leaf,
  * so the cache hit rate is typically high even with a small number of entries.
@@ -51,12 +54,13 @@ typedef struct rtree_s rtree_t;
  * the tree nodes, and the cache will itself suffer cache misses if made overly
  * large, not to mention the cost of linear search.
  */
-#define RTREE_CTX_NCACHE 8
+#define RTREE_CTX_NCACHE	8
 
-/* Static initializer for rtree_ctx_t. */
-#define RTREE_CTX_INITIALIZER	{					\
-	{{0, NULL} /* C initializes all trailing elements to NULL. */}	\
-}
+/*
+ * Zero initializer required for tsd initialization only.  Proper initialization
+ * done via rtree_ctx_data_init().
+ */
+#define RTREE_CTX_ZERO_INITIALIZER {{{0}}}
 
 /*
  * Maximum number of concurrently acquired elements per thread.  This controls
diff --git a/include/jemalloc/internal/tsd_externs.h b/include/jemalloc/internal/tsd_externs.h
index 87ebaf2d..9b88a567 100644
--- a/include/jemalloc/internal/tsd_externs.h
+++ b/include/jemalloc/internal/tsd_externs.h
@@ -14,5 +14,6 @@ void	*tsd_init_check_recursion(tsd_init_head_t *head,
 void	tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block);
 #endif
 void	tsd_cleanup(void *arg);
+bool	tsd_data_init(void *arg);
 
 #endif /* JEMALLOC_INTERNAL_TSD_EXTERNS_H */
diff --git a/include/jemalloc/internal/tsd_inlines.h b/include/jemalloc/internal/tsd_inlines.h
index 96de4063..7d57b7dd 100644
--- a/include/jemalloc/internal/tsd_inlines.h
+++ b/include/jemalloc/internal/tsd_inlines.h
@@ -8,7 +8,7 @@ tsd_t *tsd_fetch_impl(bool init);
 tsd_t *tsd_fetch(void);
 tsdn_t *tsd_tsdn(tsd_t *tsd);
 bool tsd_nominal(tsd_t *tsd);
-#define O(n, t, gs, c)							\
+#define O(n, t, gs, i, c)						\
 t *tsd_##n##p_get(tsd_t *tsd);						\
 t tsd_##n##_get(tsd_t *tsd);						\
 void tsd_##n##_set(tsd_t *tsd, t n);
@@ -39,9 +39,11 @@ tsd_fetch_impl(bool init) {
 			tsd->state = tsd_state_nominal;
 			/* Trigger cleanup handler registration. */
 			tsd_set(tsd);
+			tsd_data_init(tsd);
 		} else if (tsd->state == tsd_state_purgatory) {
 			tsd->state = tsd_state_reincarnated;
 			tsd_set(tsd);
+			tsd_data_init(tsd);
 		} else {
 			assert(tsd->state == tsd_state_reincarnated);
 		}
@@ -76,7 +78,7 @@ tsd_##n##_set(tsd_t *tsd, t n) {					\
 	tsd->n = n;							\
 }
 #define MALLOC_TSD_getset_no(n, t)
-#define O(n, t, gs, c)							\
+#define O(n, t, gs, i, c)						\
 JEMALLOC_ALWAYS_INLINE t *						\
 tsd_##n##p_get(tsd_t *tsd) {						\
 	return &tsd->n;							\
@@ -121,8 +123,7 @@ tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback) {
 	 * return a pointer to it.
 	 */
 	if (unlikely(tsdn_null(tsdn))) {
-		static const rtree_ctx_t rtree_ctx = RTREE_CTX_INITIALIZER;
-		memcpy(fallback, &rtree_ctx, sizeof(rtree_ctx_t));
+		rtree_ctx_data_init(fallback);
 		return fallback;
 	}
 	return tsd_rtree_ctx(tsdn_tsd(tsdn));
diff --git a/include/jemalloc/internal/tsd_structs.h b/include/jemalloc/internal/tsd_structs.h
index 722b9669..b4ac09fd 100644
--- a/include/jemalloc/internal/tsd_structs.h
+++ b/include/jemalloc/internal/tsd_structs.h
@@ -15,23 +15,23 @@ struct tsd_init_head_s {
 #endif
 
 #define MALLOC_TSD							\
-/*  O(name,			type,		[gs]et,	cleanup) */	\
-    O(tcache,			tcache_t *,	yes,	yes)		\
-    O(thread_allocated,		uint64_t,	yes,	no)		\
-    O(thread_deallocated,	uint64_t,	yes,	no)		\
-    O(prof_tdata,		prof_tdata_t *,	yes,	yes)		\
-    O(iarena,			arena_t *,	yes,	yes)		\
-    O(arena,			arena_t *,	yes,	yes)		\
-    O(arenas_tdata,		arena_tdata_t *,yes,	yes)		\
-    O(narenas_tdata,		unsigned,	yes,	no)		\
-    O(arenas_tdata_bypass,	bool,		no,	no)		\
+/*  O(name,			type,		[gs]et,	init,	cleanup) */ \
+    O(tcache,			tcache_t *,	yes,	no,	yes)	\
+    O(thread_allocated,		uint64_t,	yes,	no,	no)	\
+    O(thread_deallocated,	uint64_t,	yes,	no,	no)	\
+    O(prof_tdata,		prof_tdata_t *,	yes,	no,	yes)	\
+    O(iarena,			arena_t *,	yes,	no,	yes)	\
+    O(arena,			arena_t *,	yes,	no,	yes)	\
+    O(arenas_tdata,		arena_tdata_t *,yes,	no,	yes)	\
+    O(narenas_tdata,		unsigned,	yes,	no,	no)	\
+    O(arenas_tdata_bypass,	bool,		no,	no,	no)	\
     O(tcache_enabled,		tcache_enabled_t,			\
-						yes,	no)		\
-    O(rtree_ctx,		rtree_ctx_t,	no,	no)		\
-    O(witnesses,		witness_list_t,	no,	yes)		\
+						yes,	no,	no)	\
+    O(rtree_ctx,		rtree_ctx_t,	no,	yes,	no)	\
+    O(witnesses,		witness_list_t,	no,	no,	yes)	\
     O(rtree_leaf_elm_witnesses,	rtree_leaf_elm_witness_tsd_t,		\
-						no,	no)		\
-    O(witness_fork,		bool,		yes,	no)		\
+						no,	no,	no)	\
+    O(witness_fork,		bool,		yes,	no,	no)
 
 #define TSD_INITIALIZER {						\
     tsd_state_uninitialized,						\
@@ -45,7 +45,7 @@ struct tsd_init_head_s {
     0,									\
     false,								\
     tcache_enabled_default,						\
-    RTREE_CTX_INITIALIZER,						\
+    RTREE_CTX_ZERO_INITIALIZER,						\
     ql_head_initializer(witnesses),					\
     RTREE_ELM_WITNESS_TSD_INITIALIZER,					\
     false								\
@@ -53,7 +53,7 @@ struct tsd_init_head_s {
 
 struct tsd_s {
 	tsd_state_t	state;
-#define O(n, t, gs, c)							\
+#define O(n, t, gs, i, c)						\
 	t		n;
 MALLOC_TSD
 #undef O
diff --git a/src/rtree.c b/src/rtree.c
index a07380f3..b2c6824f 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -424,3 +424,14 @@ rtree_leaf_elm_witness_release(tsdn_t *tsdn, const rtree_t *rtree,
 	witness_unlock(tsdn, witness);
 	rtree_leaf_elm_witness_dalloc(tsdn_tsd(tsdn), witness, elm);
 }
+
+bool
+rtree_ctx_data_init(rtree_ctx_t *ctx) {
+	for (unsigned i = 0; i < RTREE_CTX_NCACHE; i++) {
+		rtree_ctx_cache_elm_t *cache = &ctx->cache[i];
+		cache->leafkey = RTREE_LEAFKEY_INVALID;
+		cache->leaf = NULL;
+	}
+
+	return false;
+}
diff --git a/src/tsd.c b/src/tsd.c
index 9614dd9a..970d5baa 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -60,6 +60,23 @@ malloc_tsd_cleanup_register(bool (*f)(void)) {
 	ncleanups++;
 }
 
+bool
+tsd_data_init(void *arg) {
+	tsd_t *tsd = (tsd_t *)arg;
+#define MALLOC_TSD_init_yes(n, t)					\
+	if (n##_data_init(&tsd->n)) {					\
+		return true;						\
+	}
+#define MALLOC_TSD_init_no(n, t)
+#define O(n, t, gs, i, c)						\
+	MALLOC_TSD_init_##i(n, t)
+MALLOC_TSD
+#undef MALLOC_TSD_init_yes
+#undef MALLOC_TSD_init_no
+#undef O
+	return false;
+}
+
 void
 tsd_cleanup(void *arg) {
 	tsd_t *tsd = (tsd_t *)arg;
@@ -72,7 +89,7 @@ tsd_cleanup(void *arg) {
 #define MALLOC_TSD_cleanup_yes(n, t)					\
 		n##_cleanup(tsd);
 #define MALLOC_TSD_cleanup_no(n, t)
-#define O(n, t, gs, c)							\
+#define O(n, t, gs, i, c)						\
 		MALLOC_TSD_cleanup_##c(n, t)
 MALLOC_TSD
 #undef MALLOC_TSD_cleanup_yes
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 7a25c47d..3c5b2df4 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -68,7 +68,8 @@ TEST_BEGIN(test_rtree_read_empty) {
 	tsdn = tsdn_fetch();
 
 	rtree_t *rtree = &test_rtree;
-	rtree_ctx_t rtree_ctx = RTREE_CTX_INITIALIZER;
+	rtree_ctx_t rtree_ctx;
+	rtree_ctx_data_init(&rtree_ctx);
 	assert_false(rtree_new(rtree, false), "Unexpected rtree_new() failure");
 	assert_ptr_null(rtree_extent_read(tsdn, rtree, &rtree_ctx, PAGE,
 	    false), "rtree_extent_read() should return NULL for empty tree");
@@ -89,7 +90,8 @@ typedef struct {
 static void *
 thd_start(void *varg) {
 	thd_start_arg_t *arg = (thd_start_arg_t *)varg;
-	rtree_ctx_t rtree_ctx = RTREE_CTX_INITIALIZER;
+	rtree_ctx_t rtree_ctx;
+	rtree_ctx_data_init(&rtree_ctx);
 	sfmt_t *sfmt;
 	extent_t *extent;
 	tsdn_t *tsdn;
@@ -173,7 +175,8 @@ TEST_BEGIN(test_rtree_extrema) {
 	tsdn_t *tsdn = tsdn_fetch();
 
 	rtree_t *rtree = &test_rtree;
-	rtree_ctx_t rtree_ctx = RTREE_CTX_INITIALIZER;
+	rtree_ctx_t rtree_ctx;
+	rtree_ctx_data_init(&rtree_ctx);
 	assert_false(rtree_new(rtree, false), "Unexpected rtree_new() failure");
 
 	assert_false(rtree_write(tsdn, rtree, &rtree_ctx, PAGE, &extent_a,
@@ -207,8 +210,8 @@ TEST_BEGIN(test_rtree_bits) {
 	    extent_state_active, false, false);
 
 	rtree_t *rtree = &test_rtree;
-	rtree_ctx_t rtree_ctx = RTREE_CTX_INITIALIZER;
-
+	rtree_ctx_t rtree_ctx;
+	rtree_ctx_data_init(&rtree_ctx);
 	assert_false(rtree_new(rtree, false), "Unexpected rtree_new() failure");
 
 	for (unsigned i = 0; i < sizeof(keys)/sizeof(uintptr_t); i++) {
@@ -240,7 +243,8 @@ TEST_BEGIN(test_rtree_random) {
 	tsdn_t *tsdn = tsdn_fetch();
 	uintptr_t keys[NSET];
 	rtree_t *rtree = &test_rtree;
-	rtree_ctx_t rtree_ctx = RTREE_CTX_INITIALIZER;
+	rtree_ctx_t rtree_ctx;
+	rtree_ctx_data_init(&rtree_ctx);
 
 	extent_t extent;
 	extent_init(&extent, NULL, NULL, 0, false, NSIZES, 0,

From 51d368295032910577d4f34b9ff99b3ed41544b9 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 28 Mar 2017 17:14:43 -0700
Subject: [PATCH 0760/2608] Remove the leafkey NULL check in leaf_elm_lookup.

---
 include/jemalloc/internal/rtree_inlines.h | 18 +++++++-----------
 include/jemalloc/internal/rtree_types.h   |  6 +++---
 2 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/include/jemalloc/internal/rtree_inlines.h b/include/jemalloc/internal/rtree_inlines.h
index 3e619b3f..bebe49e0 100644
--- a/include/jemalloc/internal/rtree_inlines.h
+++ b/include/jemalloc/internal/rtree_inlines.h
@@ -324,19 +324,15 @@ rtree_leaf_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 #define RTREE_CACHE_CHECK(i) do {					\
 	if (likely(rtree_ctx->cache[i].leafkey == leafkey)) {		\
 		rtree_leaf_elm_t *leaf = rtree_ctx->cache[i].leaf;	\
-		if (likely(leaf != NULL)) {				\
+		assert(leaf != NULL);					\
+		if (i > 0) {						\
 			/* Bubble up by one. */				\
-			if (i > 0) {					\
-				rtree_ctx->cache[i] =			\
-					rtree_ctx->cache[i - 1];	\
-				rtree_ctx->cache[i - 1].leafkey =	\
-					leafkey;			\
-				rtree_ctx->cache[i - 1].leaf = leaf;	\
-			}						\
-			uintptr_t subkey = rtree_subkey(key,		\
-			    RTREE_HEIGHT-1);				\
-			return &leaf[subkey];				\
+			rtree_ctx->cache[i] = rtree_ctx->cache[i - 1];	\
+			rtree_ctx->cache[i - 1].leafkey = leafkey;	\
+			rtree_ctx->cache[i - 1].leaf = leaf;		\
 		}							\
+		uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1);	\
+		return &leaf[subkey];					\
 	}								\
 } while (0)
 	/* Check the first cache entry. */
diff --git a/include/jemalloc/internal/rtree_types.h b/include/jemalloc/internal/rtree_types.h
index e6041560..e480542d 100644
--- a/include/jemalloc/internal/rtree_types.h
+++ b/include/jemalloc/internal/rtree_types.h
@@ -39,7 +39,7 @@ typedef struct rtree_s rtree_t;
 #endif
 
 /* Needed for initialization only. */
-#define RTREE_LEAFKEY_INVALID	((uintptr_t)1)
+#define RTREE_LEAFKEY_INVALID ((uintptr_t)1)
 
 /*
  * Number of leafkey/leaf pairs to cache.  Each entry supports an entire leaf,
@@ -54,7 +54,7 @@ typedef struct rtree_s rtree_t;
  * the tree nodes, and the cache will itself suffer cache misses if made overly
  * large, not to mention the cost of linear search.
  */
-#define RTREE_CTX_NCACHE	8
+#define RTREE_CTX_NCACHE 8
 
 /*
  * Zero initializer required for tsd initialization only.  Proper initialization
@@ -68,7 +68,7 @@ typedef struct rtree_s rtree_t;
  * would have a witness_t directly embedded, but that would dramatically bloat
  * the tree.  This must contain enough entries to e.g. coalesce two extents.
  */
-#define RTREE_ELM_ACQUIRE_MAX	4
+#define RTREE_ELM_ACQUIRE_MAX 4
 
 /* Initializers for rtree_leaf_elm_witness_tsd_t. */
 #define RTREE_ELM_WITNESS_INITIALIZER {					\

From d3cda3423cd7ae47630833e4a888bdaf6a7bf8d9 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 29 Mar 2017 17:00:52 -0700
Subject: [PATCH 0761/2608] Do proper cleanup for tsd_state_reincarnated.

Also enable arena_bind under non-nominal state, as the cleanup will be handled
correctly now.
---
 include/jemalloc/internal/tsd_inlines.h |  3 +-
 src/jemalloc.c                          |  9 ++----
 src/tsd.c                               | 15 ++++-----
 test/unit/tsd.c                         | 41 ++++++++++++++++++++++++-
 4 files changed, 50 insertions(+), 18 deletions(-)

diff --git a/include/jemalloc/internal/tsd_inlines.h b/include/jemalloc/internal/tsd_inlines.h
index 7d57b7dd..7c3fba5f 100644
--- a/include/jemalloc/internal/tsd_inlines.h
+++ b/include/jemalloc/internal/tsd_inlines.h
@@ -74,7 +74,8 @@ tsd_##n##_get(tsd_t *tsd) {						\
 }									\
 JEMALLOC_ALWAYS_INLINE void						\
 tsd_##n##_set(tsd_t *tsd, t n) {					\
-	assert(tsd->state == tsd_state_nominal);			\
+	assert(tsd->state == tsd_state_nominal ||			\
+	    tsd->state == tsd_state_reincarnated);			\
 	tsd->n = n;							\
 }
 #define MALLOC_TSD_getset_no(n, t)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index ab047c24..7c8fe9c9 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -422,13 +422,7 @@ arena_init(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 
 static void
 arena_bind(tsd_t *tsd, unsigned ind, bool internal) {
-	arena_t *arena;
-
-	if (!tsd_nominal(tsd)) {
-		return;
-	}
-
-	arena = arena_get(tsd_tsdn(tsd), ind, false);
+	arena_t *arena = arena_get(tsd_tsdn(tsd), ind, false);
 	arena_nthreads_inc(arena, internal);
 
 	if (internal) {
@@ -455,6 +449,7 @@ arena_unbind(tsd_t *tsd, unsigned ind, bool internal) {
 
 	arena = arena_get(tsd_tsdn(tsd), ind, false);
 	arena_nthreads_dec(arena, internal);
+
 	if (internal) {
 		tsd_iarena_set(tsd, NULL);
 	} else {
diff --git a/src/tsd.c b/src/tsd.c
index 970d5baa..6b68c001 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -86,6 +86,12 @@ tsd_cleanup(void *arg) {
 		/* Do nothing. */
 		break;
 	case tsd_state_nominal:
+	case tsd_state_reincarnated:
+		/*
+		 * Reincarnated means another destructor deallocated memory
+		 * after this destructor was called.  Reset state to
+		 * tsd_state_purgatory and request another callback.
+		 */
 #define MALLOC_TSD_cleanup_yes(n, t)					\
 		n##_cleanup(tsd);
 #define MALLOC_TSD_cleanup_no(n, t)
@@ -106,15 +112,6 @@ MALLOC_TSD
 		 * nothing, and do not request another callback.
 		 */
 		break;
-	case tsd_state_reincarnated:
-		/*
-		 * Another destructor deallocated memory after this destructor
-		 * was called.  Reset state to tsd_state_purgatory and request
-		 * another callback.
-		 */
-		tsd->state = tsd_state_purgatory;
-		tsd_set(tsd);
-		break;
 	default:
 		not_reached();
 	}
diff --git a/test/unit/tsd.c b/test/unit/tsd.c
index ae47d23e..e033bb76 100644
--- a/test/unit/tsd.c
+++ b/test/unit/tsd.c
@@ -90,6 +90,44 @@ TEST_BEGIN(test_tsd_sub_thread) {
 }
 TEST_END
 
+static void *
+thd_start_reincarnated(void *arg) {
+	tsd_t *tsd = tsd_fetch();
+	assert(tsd);
+
+	void *p = malloc(1);
+	assert_ptr_not_null(p, "Unexpected malloc() failure");
+
+	/* Manually trigger reincarnation. */
+	assert_ptr_not_null(tsd->arena, "Should have tsd arena set.");
+	tsd_cleanup((void *)tsd);
+	assert_ptr_null(tsd->arena, "TSD arena should have been cleared.");
+	assert_u_eq(tsd->state, tsd_state_purgatory,
+	    "TSD state should be purgatory\n");
+
+	free(p);
+	assert_u_eq(tsd->state, tsd_state_reincarnated,
+	    "TSD state should be reincarnated\n");
+	p = mallocx(1, MALLOCX_TCACHE_NONE);
+	assert_ptr_not_null(p, "Unexpected malloc() failure");
+	assert_ptr_not_null(tsd->arena,
+	    "Should have tsd arena set after reincarnation.");
+
+	free(p);
+	tsd_cleanup((void *)tsd);
+	assert_ptr_null(tsd->arena,
+	    "TSD arena should have been cleared after 2nd cleanup.");
+
+	return NULL;
+}
+
+TEST_BEGIN(test_tsd_reincarnation) {
+	thd_t thd;
+	thd_create(&thd, thd_start_reincarnated, NULL);
+	thd_join(thd, NULL);
+}
+TEST_END
+
 int
 main(void) {
 	/* Core tsd bootstrapping must happen prior to data_tsd_boot(). */
@@ -101,5 +139,6 @@ main(void) {
 
 	return test(
 	    test_tsd_main_thread,
-	    test_tsd_sub_thread);
+	    test_tsd_sub_thread,
+	    test_tsd_reincarnation);
 }

From 492e9f301eeb980e880d307214fd441264986324 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 4 Apr 2017 11:09:45 -0700
Subject: [PATCH 0762/2608] Make the tsd member init functions to take tsd_t *
 type.

---
 include/jemalloc/internal/rtree_externs.h | 3 ++-
 src/rtree.c                               | 7 ++++++-
 src/tsd.c                                 | 2 +-
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/rtree_externs.h b/include/jemalloc/internal/rtree_externs.h
index 482f6ba3..c8d1c376 100644
--- a/include/jemalloc/internal/rtree_externs.h
+++ b/include/jemalloc/internal/rtree_externs.h
@@ -43,6 +43,7 @@ void rtree_leaf_elm_witness_access(tsdn_t *tsdn, const rtree_t *rtree,
     const rtree_leaf_elm_t *elm);
 void rtree_leaf_elm_witness_release(tsdn_t *tsdn, const rtree_t *rtree,
     const rtree_leaf_elm_t *elm);
-bool rtree_ctx_data_init(rtree_ctx_t *ctx);
+void rtree_ctx_data_init(rtree_ctx_t *ctx);
+bool tsd_rtree_ctx_data_init(tsd_t *tsd);
 
 #endif /* JEMALLOC_INTERNAL_RTREE_EXTERNS_H */
diff --git a/src/rtree.c b/src/rtree.c
index b2c6824f..de4990bd 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -425,13 +425,18 @@ rtree_leaf_elm_witness_release(tsdn_t *tsdn, const rtree_t *rtree,
 	rtree_leaf_elm_witness_dalloc(tsdn_tsd(tsdn), witness, elm);
 }
 
-bool
+void
 rtree_ctx_data_init(rtree_ctx_t *ctx) {
 	for (unsigned i = 0; i < RTREE_CTX_NCACHE; i++) {
 		rtree_ctx_cache_elm_t *cache = &ctx->cache[i];
 		cache->leafkey = RTREE_LEAFKEY_INVALID;
 		cache->leaf = NULL;
 	}
+}
+
+bool
+tsd_rtree_ctx_data_init(tsd_t *tsd) {
+	rtree_ctx_data_init(&tsd->rtree_ctx);
 
 	return false;
 }
diff --git a/src/tsd.c b/src/tsd.c
index 6b68c001..86502116 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -64,7 +64,7 @@ bool
 tsd_data_init(void *arg) {
 	tsd_t *tsd = (tsd_t *)arg;
 #define MALLOC_TSD_init_yes(n, t)					\
-	if (n##_data_init(&tsd->n)) {					\
+	if (tsd_##n##_data_init(tsd)) {					\
 		return true;						\
 	}
 #define MALLOC_TSD_init_no(n, t)

From 7da04a6b091eb2e3a9fa69bb5f58c18dc10f8e2d Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 3 Apr 2017 18:18:40 -0700
Subject: [PATCH 0763/2608] Convert prng module to use C11-style atomics

---
 include/jemalloc/internal/arena_structs_b.h |  2 +-
 include/jemalloc/internal/prng_inlines.h    | 45 +++++++++++----------
 src/arena.c                                 |  4 +-
 test/unit/prng.c                            | 38 ++++++++---------
 4 files changed, 46 insertions(+), 43 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index a5191d16..fe4e4eed 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -166,7 +166,7 @@ struct arena_s {
 	 *
 	 * Synchronization: atomic.
 	 */
-	size_t			offset_state;
+	atomic_zu_t		offset_state;
 
 	/*
 	 * Extent serial number generator state.
diff --git a/include/jemalloc/internal/prng_inlines.h b/include/jemalloc/internal/prng_inlines.h
index 646e07b7..3f06ccd4 100644
--- a/include/jemalloc/internal/prng_inlines.h
+++ b/include/jemalloc/internal/prng_inlines.h
@@ -6,14 +6,15 @@ uint32_t	prng_state_next_u32(uint32_t state);
 uint64_t	prng_state_next_u64(uint64_t state);
 size_t	prng_state_next_zu(size_t state);
 
-uint32_t	prng_lg_range_u32(uint32_t *state, unsigned lg_range,
+uint32_t	prng_lg_range_u32(atomic_u32_t *state, unsigned lg_range,
     bool atomic);
 uint64_t	prng_lg_range_u64(uint64_t *state, unsigned lg_range);
-size_t	prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic);
+size_t	prng_lg_range_zu(atomic_zu_t *state, unsigned lg_range, bool atomic);
 
-uint32_t	prng_range_u32(uint32_t *state, uint32_t range, bool atomic);
+uint32_t	prng_range_u32(atomic_u32_t *state, uint32_t range,
+    bool atomic);
 uint64_t	prng_range_u64(uint64_t *state, uint64_t range);
-size_t	prng_range_zu(size_t *state, size_t range, bool atomic);
+size_t	prng_range_zu(atomic_zu_t *state, size_t range, bool atomic);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PRNG_C_))
@@ -39,22 +40,22 @@ prng_state_next_zu(size_t state) {
 }
 
 JEMALLOC_ALWAYS_INLINE uint32_t
-prng_lg_range_u32(uint32_t *state, unsigned lg_range, bool atomic) {
-	uint32_t ret, state1;
+prng_lg_range_u32(atomic_u32_t *state, unsigned lg_range, bool atomic) {
+	uint32_t ret, state0, state1;
 
 	assert(lg_range > 0);
 	assert(lg_range <= 32);
 
-	if (atomic) {
-		uint32_t state0;
+	state0 = atomic_load_u32(state, ATOMIC_RELAXED);
 
+	if (atomic) {
 		do {
-			state0 = atomic_read_u32(state);
 			state1 = prng_state_next_u32(state0);
-		} while (atomic_cas_u32(state, state0, state1));
+		} while (!atomic_compare_exchange_weak_u32(state, &state0,
+		    state1, ATOMIC_RELAXED, ATOMIC_RELAXED));
 	} else {
-		state1 = prng_state_next_u32(*state);
-		*state = state1;
+		state1 = prng_state_next_u32(state0);
+		atomic_store_u32(state, state1, ATOMIC_RELAXED);
 	}
 	ret = state1 >> (32 - lg_range);
 
@@ -77,22 +78,22 @@ prng_lg_range_u64(uint64_t *state, unsigned lg_range) {
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic) {
-	size_t ret, state1;
+prng_lg_range_zu(atomic_zu_t *state, unsigned lg_range, bool atomic) {
+	size_t ret, state0, state1;
 
 	assert(lg_range > 0);
 	assert(lg_range <= ZU(1) << (3 + LG_SIZEOF_PTR));
 
-	if (atomic) {
-		size_t state0;
+	state0 = atomic_load_zu(state, ATOMIC_RELAXED);
 
+	if (atomic) {
 		do {
-			state0 = atomic_read_zu(state);
 			state1 = prng_state_next_zu(state0);
-		} while (atomic_cas_zu(state, state0, state1));
+		} while (atomic_compare_exchange_weak_zu(state, &state0,
+		    state1, ATOMIC_RELAXED, ATOMIC_RELAXED));
 	} else {
-		state1 = prng_state_next_zu(*state);
-		*state = state1;
+		state1 = prng_state_next_zu(state0);
+		atomic_store_zu(state, state1, ATOMIC_RELAXED);
 	}
 	ret = state1 >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) - lg_range);
 
@@ -100,7 +101,7 @@ prng_lg_range_zu(size_t *state, unsigned lg_range, bool atomic) {
 }
 
 JEMALLOC_ALWAYS_INLINE uint32_t
-prng_range_u32(uint32_t *state, uint32_t range, bool atomic) {
+prng_range_u32(atomic_u32_t *state, uint32_t range, bool atomic) {
 	uint32_t ret;
 	unsigned lg_range;
 
@@ -136,7 +137,7 @@ prng_range_u64(uint64_t *state, uint64_t range) {
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-prng_range_zu(size_t *state, size_t range, bool atomic) {
+prng_range_zu(atomic_zu_t *state, size_t range, bool atomic) {
 	size_t ret;
 	unsigned lg_range;
 
diff --git a/src/arena.c b/src/arena.c
index b8ef4731..53bef36e 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1863,8 +1863,8 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		 * cost of test repeatability.  For debug builds, instead use a
 		 * deterministic seed.
 		 */
-		arena->offset_state = config_debug ? ind :
-		    (size_t)(uintptr_t)arena;
+		atomic_store_zu(&arena->offset_state, config_debug ? ind :
+		    (size_t)(uintptr_t)arena, ATOMIC_RELAXED);
 	}
 
 	arena->extent_sn_next = 0;
diff --git a/test/unit/prng.c b/test/unit/prng.c
index 74d9cf73..b5795c2f 100644
--- a/test/unit/prng.c
+++ b/test/unit/prng.c
@@ -2,31 +2,32 @@
 
 static void
 test_prng_lg_range_u32(bool atomic) {
-	uint32_t sa, sb, ra, rb;
+	atomic_u32_t sa, sb;
+	uint32_t ra, rb;
 	unsigned lg_range;
 
-	sa = 42;
+	atomic_store_u32(&sa, 42, ATOMIC_RELAXED);
 	ra = prng_lg_range_u32(&sa, 32, atomic);
-	sa = 42;
+	atomic_store_u32(&sa, 42, ATOMIC_RELAXED);
 	rb = prng_lg_range_u32(&sa, 32, atomic);
 	assert_u32_eq(ra, rb,
 	    "Repeated generation should produce repeated results");
 
-	sb = 42;
+	atomic_store_u32(&sb, 42, ATOMIC_RELAXED);
 	rb = prng_lg_range_u32(&sb, 32, atomic);
 	assert_u32_eq(ra, rb,
 	    "Equivalent generation should produce equivalent results");
 
-	sa = 42;
+	atomic_store_u32(&sa, 42, ATOMIC_RELAXED);
 	ra = prng_lg_range_u32(&sa, 32, atomic);
 	rb = prng_lg_range_u32(&sa, 32, atomic);
 	assert_u32_ne(ra, rb,
 	    "Full-width results must not immediately repeat");
 
-	sa = 42;
+	atomic_store_u32(&sa, 42, ATOMIC_RELAXED);
 	ra = prng_lg_range_u32(&sa, 32, atomic);
 	for (lg_range = 31; lg_range > 0; lg_range--) {
-		sb = 42;
+		atomic_store_u32(&sb, 42, ATOMIC_RELAXED);
 		rb = prng_lg_range_u32(&sb, lg_range, atomic);
 		assert_u32_eq((rb & (UINT32_C(0xffffffff) << lg_range)),
 		    0, "High order bits should be 0, lg_range=%u", lg_range);
@@ -74,32 +75,33 @@ test_prng_lg_range_u64(void) {
 
 static void
 test_prng_lg_range_zu(bool atomic) {
-	size_t sa, sb, ra, rb;
+	atomic_zu_t sa, sb;
+	size_t ra, rb;
 	unsigned lg_range;
 
-	sa = 42;
+	atomic_store_zu(&sa, 42, ATOMIC_RELAXED);
 	ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
-	sa = 42;
+	atomic_store_zu(&sa, 42, ATOMIC_RELAXED);
 	rb = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
 	assert_zu_eq(ra, rb,
 	    "Repeated generation should produce repeated results");
 
-	sb = 42;
+	atomic_store_zu(&sb, 42, ATOMIC_RELAXED);
 	rb = prng_lg_range_zu(&sb, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
 	assert_zu_eq(ra, rb,
 	    "Equivalent generation should produce equivalent results");
 
-	sa = 42;
+	atomic_store_zu(&sa, 42, ATOMIC_RELAXED);
 	ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
 	rb = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
 	assert_zu_ne(ra, rb,
 	    "Full-width results must not immediately repeat");
 
-	sa = 42;
+	atomic_store_zu(&sa, 42, ATOMIC_RELAXED);
 	ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
 	for (lg_range = (ZU(1) << (3 + LG_SIZEOF_PTR)) - 1; lg_range > 0;
 	    lg_range--) {
-		sb = 42;
+		atomic_store_zu(&sb, 42, ATOMIC_RELAXED);
 		rb = prng_lg_range_zu(&sb, lg_range, atomic);
 		assert_zu_eq((rb & (SIZE_T_MAX << lg_range)),
 		    0, "High order bits should be 0, lg_range=%u", lg_range);
@@ -142,10 +144,10 @@ test_prng_range_u32(bool atomic) {
 #define NREPS		10
 
 	for (range = 2; range < MAX_RANGE; range += RANGE_STEP) {
-		uint32_t s;
+		atomic_u32_t s;
 		unsigned rep;
 
-		s = range;
+		atomic_store_u32(&s, range, ATOMIC_RELAXED);
 		for (rep = 0; rep < NREPS; rep++) {
 			uint32_t r = prng_range_u32(&s, range, atomic);
 
@@ -182,10 +184,10 @@ test_prng_range_zu(bool atomic) {
 #define NREPS		10
 
 	for (range = 2; range < MAX_RANGE; range += RANGE_STEP) {
-		size_t s;
+		atomic_zu_t s;
 		unsigned rep;
 
-		s = range;
+		atomic_store_zu(&s, range, ATOMIC_RELAXED);
 		for (rep = 0; rep < NREPS; rep++) {
 			size_t r = prng_range_zu(&s, range, atomic);
 

From 864adb7f4219dc9b920ead049478946f0a42428d Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 4 Apr 2017 14:33:25 -0700
Subject: [PATCH 0764/2608] Transition e_prof_tctx in struct extent to C11
 atomics

---
 include/jemalloc/internal/extent_inlines.h |  6 +++---
 include/jemalloc/internal/extent_structs.h | 10 +++++-----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index 99fa67c7..e6e447cf 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -173,8 +173,8 @@ extent_slab_data_get_const(const extent_t *extent) {
 
 JEMALLOC_INLINE prof_tctx_t *
 extent_prof_tctx_get(const extent_t *extent) {
-	return (prof_tctx_t *)atomic_read_p(
-	    &((extent_t *)extent)->e_prof_tctx_pun);
+	return (prof_tctx_t *)atomic_load_p(&extent->e_prof_tctx,
+	    ATOMIC_ACQUIRE);
 }
 
 JEMALLOC_INLINE void
@@ -272,7 +272,7 @@ extent_slab_set(extent_t *extent, bool slab) {
 
 JEMALLOC_INLINE void
 extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx) {
-	atomic_write_p(&extent->e_prof_tctx_pun, tctx);
+	atomic_store_p(&extent->e_prof_tctx, tctx, ATOMIC_RELEASE);
 }
 
 JEMALLOC_INLINE void
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index 1527acb9..5d41bb81 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -118,11 +118,11 @@ struct extent_s {
 		/* Small region slab metadata. */
 		arena_slab_data_t	e_slab_data;
 
-		/* Profile counters, used for large objects. */
-		union {
-			void		*e_prof_tctx_pun;
-			prof_tctx_t	*e_prof_tctx;
-		};
+		/*
+		 * Profile counters, used for large objects.  Points to a
+		 * prof_tctx_t.
+		 */
+		atomic_p_t		e_prof_tctx;
 	};
 };
 typedef ql_head(extent_t) extent_list_t;

From bc32ec3503433fae4c737c7ffe6b3822ce98d5d8 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 4 Apr 2017 15:12:24 -0700
Subject: [PATCH 0765/2608] Move arena-tracking atomics in jemalloc.c to
 C11-style

---
 include/jemalloc/internal/extent_inlines.h       |  2 +-
 include/jemalloc/internal/jemalloc_internal.h.in |  7 +++----
 src/jemalloc.c                                   | 14 ++++++++------
 3 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index e6e447cf..f1b94776 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -63,7 +63,7 @@ extent_arena_get(const extent_t *extent) {
 		return NULL;
 	}
 	assert(arena_ind <= MALLOCX_ARENA_MAX);
-	return arenas[arena_ind];
+	return (arena_t *)atomic_load_p(&arenas[arena_ind], ATOMIC_ACQUIRE);
 }
 
 JEMALLOC_INLINE szind_t
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 4255b639..04f91c07 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -469,7 +469,7 @@ extern unsigned	narenas_auto;
  * Arenas that are used to service external requests.  Not all elements of the
  * arenas array are necessarily used; arenas are created lazily as needed.
  */
-extern arena_t	*arenas[];
+extern atomic_p_t arenas[];
 
 /*
  * pind2sz_tab encodes the same information as could be computed by
@@ -909,10 +909,9 @@ arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing) {
 
 	assert(ind <= MALLOCX_ARENA_MAX);
 
-	ret = arenas[ind];
+	ret = (arena_t *)atomic_load_p(&arenas[ind], ATOMIC_ACQUIRE);
 	if (unlikely(ret == NULL)) {
-		ret = (arena_t *)atomic_read_p((void **)&arenas[ind]);
-		if (init_if_missing && unlikely(ret == NULL)) {
+		if (init_if_missing) {
 			ret = arena_init(tsdn, ind,
 			    (extent_hooks_t *)&extent_hooks_default);
 		}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 7c8fe9c9..94ae030c 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -55,10 +55,12 @@ static malloc_mutex_t	arenas_lock;
  * arenas[0..narenas_auto) are used for automatic multiplexing of threads and
  * arenas.  arenas[narenas_auto..narenas_total) are only used if the application
  * takes some action to create them and allocate from them.
+ *
+ * Points to an arena_t.
  */
 JEMALLOC_ALIGNED(CACHELINE)
-arena_t			*arenas[MALLOCX_ARENA_MAX + 1];
-static unsigned		narenas_total; /* Use narenas_total_*(). */
+atomic_p_t		arenas[MALLOCX_ARENA_MAX + 1];
+static atomic_u_t	narenas_total; /* Use narenas_total_*(). */
 static arena_t		*a0; /* arenas[0]; read-only after initialization. */
 unsigned		narenas_auto; /* Read-only after initialization. */
 
@@ -363,22 +365,22 @@ bootstrap_free(void *ptr) {
 
 void
 arena_set(unsigned ind, arena_t *arena) {
-	atomic_write_p((void **)&arenas[ind], arena);
+	atomic_store_p(&arenas[ind], arena, ATOMIC_RELEASE);
 }
 
 static void
 narenas_total_set(unsigned narenas) {
-	atomic_write_u(&narenas_total, narenas);
+	atomic_store_u(&narenas_total, narenas, ATOMIC_RELEASE);
 }
 
 static void
 narenas_total_inc(void) {
-	atomic_add_u(&narenas_total, 1);
+	atomic_fetch_add_u(&narenas_total, 1, ATOMIC_RELEASE);
 }
 
 unsigned
 narenas_total_get(void) {
-	return atomic_read_u(&narenas_total);
+	return atomic_load_u(&narenas_total, ATOMIC_ACQUIRE);
 }
 
 /* Create a new arena and insert it into the arenas array at index ind. */

From 56b72c7b1781ef75c2450a08e08079fe164bb2df Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 4 Apr 2017 17:22:24 -0700
Subject: [PATCH 0766/2608] Transition arena struct fields to C11 atomics

---
 include/jemalloc/internal/arena_structs_b.h | 16 +++---
 src/arena.c                                 | 56 +++++++++++----------
 src/extent.c                                | 15 +++---
 3 files changed, 48 insertions(+), 39 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index fe4e4eed..00e0d0c8 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -134,7 +134,7 @@ struct arena_s {
 	 *
 	 * Synchronization: atomic.
 	 */
-	unsigned		nthreads[2];
+	atomic_u_t		nthreads[2];
 
 	/*
 	 * When percpu_arena is enabled, to amortize the cost of reading /
@@ -173,17 +173,21 @@ struct arena_s {
 	 *
 	 * Synchronization: atomic.
 	 */
-	size_t			extent_sn_next;
+	atomic_zu_t		extent_sn_next;
 
-	/* Synchronization: atomic. */
-	dss_prec_t		dss_prec;
+	/*
+	 * Represents a dss_prec_t, but atomically.
+	 *
+	 * Synchronization: atomic.
+	 */
+	atomic_u_t		dss_prec;
 
 	/*
 	 * Number of pages in active extents.
 	 *
 	 * Synchronization: atomic.
 	 */
-	size_t			nactive;
+	atomic_zu_t		nactive;
 
 	/*
 	 * Extant large allocations.
@@ -222,7 +226,7 @@ struct arena_s {
 	 *
 	 * Synchronization: atomic.
 	 */
-	pszind_t		extent_grow_next;
+	atomic_u_t		extent_grow_next;
 
 	/*
 	 * Freelist of extent structures that were allocated via base_alloc().
diff --git a/src/arena.c b/src/arena.c
index 53bef36e..feb1f760 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -13,10 +13,11 @@ const char	*percpu_arena_mode_names[] = {
 const char	*opt_percpu_arena = OPT_PERCPU_ARENA_DEFAULT;
 percpu_arena_mode_t	percpu_arena_mode = PERCPU_ARENA_MODE_DEFAULT;
 
-ssize_t		opt_dirty_decay_time = DIRTY_DECAY_TIME_DEFAULT;
-ssize_t		opt_muzzy_decay_time = MUZZY_DECAY_TIME_DEFAULT;
-static ssize_t	dirty_decay_time_default;
-static ssize_t	muzzy_decay_time_default;
+ssize_t opt_dirty_decay_time = DIRTY_DECAY_TIME_DEFAULT;
+ssize_t opt_muzzy_decay_time = MUZZY_DECAY_TIME_DEFAULT;
+
+static atomic_zd_t dirty_decay_time_default;
+static atomic_zd_t muzzy_decay_time_default;
 
 const arena_bin_info_t	arena_bin_info[NBINS] = {
 #define BIN_INFO_bin_yes(reg_size, slab_size, nregs)			\
@@ -197,7 +198,7 @@ arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	*dss = dss_prec_names[arena_dss_prec_get(arena)];
 	*dirty_decay_time = arena_dirty_decay_time_get(arena);
 	*muzzy_decay_time = arena_muzzy_decay_time_get(arena);
-	*nactive += atomic_read_zu(&arena->nactive);
+	*nactive += atomic_load_zu(&arena->nactive, ATOMIC_RELAXED);
 	*ndirty += extents_npages_get(&arena->extents_dirty);
 	*nmuzzy += extents_npages_get(&arena->extents_muzzy);
 }
@@ -246,7 +247,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	arena_stats_accum_zu(&astats->base, base_allocated);
 	arena_stats_accum_zu(&astats->internal, arena_internal_get(arena));
 	arena_stats_accum_zu(&astats->resident, base_resident +
-	    (((atomic_read_zu(&arena->nactive) +
+	    (((atomic_load_zu(&arena->nactive, ATOMIC_RELAXED) +
 	    extents_npages_get(&arena->extents_dirty) +
 	    extents_npages_get(&arena->extents_muzzy)) << LG_PAGE)));
 
@@ -426,13 +427,13 @@ arena_slab_reg_dalloc(tsdn_t *tsdn, extent_t *slab,
 
 static void
 arena_nactive_add(arena_t *arena, size_t add_pages) {
-	atomic_add_zu(&arena->nactive, add_pages);
+	atomic_fetch_add_zu(&arena->nactive, add_pages, ATOMIC_RELAXED);
 }
 
 static void
 arena_nactive_sub(arena_t *arena, size_t sub_pages) {
-	assert(atomic_read_zu(&arena->nactive) >= sub_pages);
-	atomic_sub_zu(&arena->nactive, sub_pages);
+	assert(atomic_load_zu(&arena->nactive, ATOMIC_RELAXED) >= sub_pages);
+	atomic_fetch_sub_zu(&arena->nactive, sub_pages, ATOMIC_RELAXED);
 }
 
 static void
@@ -1100,7 +1101,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 	}
 
-	atomic_write_zu(&arena->nactive, 0);
+	atomic_store_zu(&arena->nactive, 0, ATOMIC_RELAXED);
 }
 
 static void
@@ -1750,7 +1751,7 @@ arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
 
 dss_prec_t
 arena_dss_prec_get(arena_t *arena) {
-	return (dss_prec_t)atomic_read_u((unsigned *)&arena->dss_prec);
+	return (dss_prec_t)atomic_load_u(&arena->dss_prec, ATOMIC_ACQUIRE);
 }
 
 bool
@@ -1758,13 +1759,13 @@ arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec) {
 	if (!have_dss) {
 		return (dss_prec != dss_prec_disabled);
 	}
-	atomic_write_u((unsigned *)&arena->dss_prec, dss_prec);
+	atomic_store_u(&arena->dss_prec, (unsigned)dss_prec, ATOMIC_RELEASE);
 	return false;
 }
 
 ssize_t
 arena_dirty_decay_time_default_get(void) {
-	return (ssize_t)atomic_read_zu((size_t *)&dirty_decay_time_default);
+	return atomic_load_zd(&dirty_decay_time_default, ATOMIC_RELAXED);
 }
 
 bool
@@ -1772,14 +1773,13 @@ arena_dirty_decay_time_default_set(ssize_t decay_time) {
 	if (!arena_decay_time_valid(decay_time)) {
 		return true;
 	}
-	atomic_write_zu((size_t *)&dirty_decay_time_default,
-	    (size_t)decay_time);
+	atomic_store_zd(&dirty_decay_time_default, decay_time, ATOMIC_RELAXED);
 	return false;
 }
 
 ssize_t
 arena_muzzy_decay_time_default_get(void) {
-	return (ssize_t)atomic_read_zu((size_t *)&muzzy_decay_time_default);
+	return atomic_load_zd(&muzzy_decay_time_default, ATOMIC_RELAXED);
 }
 
 bool
@@ -1787,29 +1787,28 @@ arena_muzzy_decay_time_default_set(ssize_t decay_time) {
 	if (!arena_decay_time_valid(decay_time)) {
 		return true;
 	}
-	atomic_write_zu((size_t *)&muzzy_decay_time_default,
-	    (size_t)decay_time);
+	atomic_store_zd(&muzzy_decay_time_default, decay_time, ATOMIC_RELAXED);
 	return false;
 }
 
 unsigned
 arena_nthreads_get(arena_t *arena, bool internal) {
-	return atomic_read_u(&arena->nthreads[internal]);
+	return atomic_load_u(&arena->nthreads[internal], ATOMIC_RELAXED);
 }
 
 void
 arena_nthreads_inc(arena_t *arena, bool internal) {
-	atomic_add_u(&arena->nthreads[internal], 1);
+	atomic_fetch_add_u(&arena->nthreads[internal], 1, ATOMIC_RELAXED);
 }
 
 void
 arena_nthreads_dec(arena_t *arena, bool internal) {
-	atomic_sub_u(&arena->nthreads[internal], 1);
+	atomic_fetch_sub_u(&arena->nthreads[internal], 1, ATOMIC_RELAXED);
 }
 
 size_t
 arena_extent_sn_next(arena_t *arena) {
-	return atomic_add_zu(&arena->extent_sn_next, 1) - 1;
+	return atomic_fetch_add_zu(&arena->extent_sn_next, 1, ATOMIC_RELAXED);
 }
 
 arena_t *
@@ -1832,7 +1831,8 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		goto label_error;
 	}
 
-	arena->nthreads[0] = arena->nthreads[1] = 0;
+	atomic_store_u(&arena->nthreads[0], 0, ATOMIC_RELAXED);
+	atomic_store_u(&arena->nthreads[1], 0, ATOMIC_RELAXED);
 	arena->last_thd = NULL;
 
 	if (config_stats) {
@@ -1867,11 +1867,12 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		    (size_t)(uintptr_t)arena, ATOMIC_RELAXED);
 	}
 
-	arena->extent_sn_next = 0;
+	atomic_store_zu(&arena->extent_sn_next, 0, ATOMIC_RELAXED);
 
-	arena->dss_prec = extent_dss_prec_get();
+	atomic_store_u(&arena->dss_prec, (unsigned)extent_dss_prec_get(),
+	    ATOMIC_RELAXED);
 
-	atomic_write_zu(&arena->nactive, 0);
+	atomic_store_zu(&arena->nactive, 0, ATOMIC_RELAXED);
 
 	extent_list_init(&arena->large);
 	if (malloc_mutex_init(&arena->large_mtx, "arena_large",
@@ -1918,7 +1919,8 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	}
 
 	if (!config_munmap) {
-		arena->extent_grow_next = psz2ind(HUGEPAGE);
+		atomic_store_u(&arena->extent_grow_next, psz2ind(HUGEPAGE),
+		    ATOMIC_RELAXED);
 	}
 
 	extent_list_init(&arena->extent_freelist);
diff --git a/src/extent.c b/src/extent.c
index 8bd8eb79..a75016af 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -913,7 +913,8 @@ extent_alloc_default_impl(tsdn_t *tsdn, arena_t *arena, void *new_addr,
 	void *ret;
 
 	ret = extent_alloc_core(tsdn, arena, new_addr, size, alignment, zero,
-	    commit, arena->dss_prec);
+	    commit, (dss_prec_t)atomic_load_u(&arena->dss_prec,
+	    ATOMIC_RELAXED));
 	return ret;
 }
 
@@ -956,7 +957,8 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	 * extent creation as a side effect.
 	 */
 	size_t esize = size + pad;
-	size_t alloc_size = pind2sz(atomic_read_u(&arena->extent_grow_next));
+	size_t alloc_size = pind2sz(atomic_load_u(&arena->extent_grow_next,
+	    ATOMIC_RELAXED));
 	size_t alloc_size_min = esize + PAGE_CEILING(alignment) - PAGE;
 	/* Beware size_t wrap-around. */
 	if (alloc_size_min < esize) {
@@ -972,7 +974,8 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	bool zeroed = false;
 	bool committed = false;
 	void *ptr = extent_alloc_core(tsdn, arena, new_addr, alloc_size, PAGE,
-	    &zeroed, &committed, arena->dss_prec);
+	    &zeroed, &committed, (dss_prec_t)atomic_load_u(&arena->dss_prec,
+	    ATOMIC_RELAXED));
 	extent_init(extent, arena, ptr, alloc_size, false, NSIZES,
 	    arena_extent_sn_next(arena), extent_state_active, zeroed,
 	    committed);
@@ -1078,14 +1081,14 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	 * Increment extent_grow_next, but take care to do so atomically and
 	 * bail out if the increment would exceed the legal range.
 	 */
+	pszind_t egn = atomic_load_u(&arena->extent_grow_next, ATOMIC_RELAXED);
 	while (true) {
-		pszind_t egn = atomic_read_u(&arena->extent_grow_next);
-
 		if (egn + 1 == NPSIZES) {
 			break;
 		}
 		assert(egn + 1 < NPSIZES);
-		if (!atomic_cas_u(&arena->extent_grow_next, egn, egn + 1)) {
+		if (atomic_compare_exchange_weak_u(&arena->extent_grow_next,
+		    &egn, egn + 1, ATOMIC_RELAXED, ATOMIC_RELAXED)) {
 			break;
 		}
 	}

From 92aafb0efe47dbca23fb5b54c33fd4504601ae76 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 4 Apr 2017 17:32:21 -0700
Subject: [PATCH 0767/2608] Make base_t's extent_hooks field C11-atomic

---
 include/jemalloc/internal/base_structs.h | 10 +++++-----
 src/base.c                               | 14 ++++----------
 2 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/base_structs.h b/include/jemalloc/internal/base_structs.h
index bad37c06..13d5bd46 100644
--- a/include/jemalloc/internal/base_structs.h
+++ b/include/jemalloc/internal/base_structs.h
@@ -17,11 +17,11 @@ struct base_s {
 	/* Associated arena's index within the arenas array. */
 	unsigned	ind;
 
-	/* User-configurable extent hook functions. */
-	union {
-		extent_hooks_t	*extent_hooks;
-		void		*extent_hooks_pun;
-	};
+	/*
+	 * User-configurable extent hook functions.  Points to an
+	 * extent_hooks_t.
+	 */
+	atomic_p_t extent_hooks;
 
 	/* Protects base_alloc() and base_stats_get() operations. */
 	malloc_mutex_t	mtx;
diff --git a/src/base.c b/src/base.c
index b1a4ae37..4275259e 100644
--- a/src/base.c
+++ b/src/base.c
@@ -227,7 +227,7 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	base = (base_t *)base_extent_bump_alloc_helper(&block->extent,
 	    &gap_size, base_size, base_alignment);
 	base->ind = ind;
-	base->extent_hooks = extent_hooks;
+	atomic_store_p(&base->extent_hooks, extent_hooks, ATOMIC_RELAXED);
 	if (malloc_mutex_init(&base->mtx, "base", WITNESS_RANK_BASE)) {
 		base_unmap(extent_hooks, ind, block, block->size);
 		return NULL;
@@ -264,20 +264,14 @@ base_delete(base_t *base) {
 
 extent_hooks_t *
 base_extent_hooks_get(base_t *base) {
-	return (extent_hooks_t *)atomic_read_p(&base->extent_hooks_pun);
+	return (extent_hooks_t *)atomic_load_p(&base->extent_hooks,
+	    ATOMIC_ACQUIRE);
 }
 
 extent_hooks_t *
 base_extent_hooks_set(base_t *base, extent_hooks_t *extent_hooks) {
 	extent_hooks_t *old_extent_hooks = base_extent_hooks_get(base);
-	union {
-		extent_hooks_t	**h;
-		void		**v;
-	} u;
-
-	u.h = &base->extent_hooks;
-	atomic_write_p(u.v, extent_hooks);
-
+	atomic_store_p(&base->extent_hooks, extent_hooks, ATOMIC_RELEASE);
 	return old_extent_hooks;
 }
 

From 55d992c48c0ca9c5d823bd717f854c2c8939d1f3 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 4 Apr 2017 17:58:06 -0700
Subject: [PATCH 0768/2608] Make extent_dss use C11-style atomics

---
 src/extent_dss.c | 36 +++++++++++++++++++++---------------
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/src/extent_dss.c b/src/extent_dss.c
index 5074594e..99919090 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -18,14 +18,15 @@ const char	*dss_prec_names[] = {
  * guarantee that sizeof(dss_prec_t) is the same as sizeof(unsigned), and we use
  * atomic operations to synchronize the setting.
  */
-static unsigned		dss_prec_default = (unsigned)DSS_PREC_DEFAULT;
+static atomic_u_t	dss_prec_default = ATOMIC_INIT(
+    (unsigned)DSS_PREC_DEFAULT);
 
 /* Base address of the DSS. */
 static void		*dss_base;
 /* Atomic boolean indicating whether the DSS is exhausted. */
-static unsigned		dss_exhausted;
+static atomic_b_t	dss_exhausted;
 /* Atomic current upper limit on DSS addresses. */
-static void		*dss_max;
+static atomic_p_t	dss_max;
 
 /******************************************************************************/
 
@@ -46,7 +47,7 @@ extent_dss_prec_get(void) {
 	if (!have_dss) {
 		return dss_prec_disabled;
 	}
-	ret = (dss_prec_t)atomic_read_u(&dss_prec_default);
+	ret = (dss_prec_t)atomic_load_u(&dss_prec_default, ATOMIC_ACQUIRE);
 	return ret;
 }
 
@@ -55,7 +56,7 @@ extent_dss_prec_set(dss_prec_t dss_prec) {
 	if (!have_dss) {
 		return (dss_prec != dss_prec_disabled);
 	}
-	atomic_write_u(&dss_prec_default, (unsigned)dss_prec);
+	atomic_store_u(&dss_prec_default, (unsigned)dss_prec, ATOMIC_RELEASE);
 	return false;
 }
 
@@ -69,7 +70,7 @@ extent_dss_max_update(void *new_addr) {
 	 */
 	spin_t spinner = SPIN_INITIALIZER;
 	while (true) {
-		void *max_prev = atomic_read_p(&dss_max);
+		void *max_prev = atomic_load_p(&dss_max, ATOMIC_RELAXED);
 
 		max_cur = extent_dss_sbrk(0);
 		if ((uintptr_t)max_prev > (uintptr_t)max_cur) {
@@ -80,7 +81,8 @@ extent_dss_max_update(void *new_addr) {
 			spin_adaptive(&spinner);
 			continue;
 		}
-		if (!atomic_cas_p(&dss_max, max_prev, max_cur)) {
+		if (atomic_compare_exchange_weak_p(&dss_max, &max_prev,
+		    max_cur, ATOMIC_ACQ_REL, ATOMIC_RELAXED)) {
 			break;
 		}
 	}
@@ -114,7 +116,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 		return NULL;
 	}
 
-	if (!atomic_read_u(&dss_exhausted)) {
+	if (!atomic_load_b(&dss_exhausted, ATOMIC_ACQUIRE)) {
 		/*
 		 * The loop is necessary to recover from races with other
 		 * threads that are using the DSS for something other than
@@ -167,7 +169,8 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			 * DSS while dss_max is greater than the current DSS
 			 * max reported by sbrk(0).
 			 */
-			if (atomic_cas_p(&dss_max, max_cur, dss_next)) {
+			if (!atomic_compare_exchange_weak_p(&dss_max, &max_cur,
+			    dss_next, ATOMIC_ACQ_REL, ATOMIC_RELAXED)) {
 				continue;
 			}
 
@@ -207,10 +210,12 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			 * succeeded since this invocation started, in which
 			 * case rollback is not necessary.
 			 */
-			atomic_cas_p(&dss_max, dss_next, max_cur);
+			atomic_compare_exchange_strong_p(&dss_max, &dss_next,
+			    max_cur, ATOMIC_ACQ_REL, ATOMIC_RELAXED);
 			if (dss_prev == (void *)-1) {
 				/* OOM. */
-				atomic_write_u(&dss_exhausted, (unsigned)true);
+				atomic_store_b(&dss_exhausted, true,
+				    ATOMIC_RELEASE);
 				goto label_oom;
 			}
 		}
@@ -230,7 +235,8 @@ bool
 extent_in_dss(void *addr) {
 	cassert(have_dss);
 
-	return extent_in_dss_helper(addr, atomic_read_p(&dss_max));
+	return extent_in_dss_helper(addr, atomic_load_p(&dss_max,
+	    ATOMIC_ACQUIRE));
 }
 
 bool
@@ -244,7 +250,7 @@ extent_dss_mergeable(void *addr_a, void *addr_b) {
 		return true;
 	}
 
-	max = atomic_read_p(&dss_max);
+	max = atomic_load_p(&dss_max, ATOMIC_ACQUIRE);
 	return (extent_in_dss_helper(addr_a, max) ==
 	    extent_in_dss_helper(addr_b, max));
 }
@@ -254,8 +260,8 @@ extent_dss_boot(void) {
 	cassert(have_dss);
 
 	dss_base = extent_dss_sbrk(0);
-	dss_exhausted = (unsigned)(dss_base == (void *)-1);
-	dss_max = dss_base;
+	atomic_store_b(&dss_exhausted, dss_base == (void *)-1, ATOMIC_RELAXED);
+	atomic_store_p(&dss_max, dss_base, ATOMIC_RELAXED);
 }
 
 /******************************************************************************/

From 30d74db08ef2617236cbff219b5f40c9ff0aa0fc Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 4 Apr 2017 18:08:58 -0700
Subject: [PATCH 0769/2608] Convert accumbytes in prof_accum_t to C11 atomics,
 when possible

---
 include/jemalloc/internal/prof_inlines_a.h | 10 ++++++----
 include/jemalloc/internal/prof_structs.h   |  4 +++-
 src/prof.c                                 |  4 +++-
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/prof_inlines_a.h b/include/jemalloc/internal/prof_inlines_a.h
index d77635a8..d0d29685 100644
--- a/include/jemalloc/internal/prof_inlines_a.h
+++ b/include/jemalloc/internal/prof_inlines_a.h
@@ -22,15 +22,16 @@ prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum, uint64_t accumbytes) {
 	 * avoids rate-limiting allocation.
 	 */
 #ifdef JEMALLOC_ATOMIC_U64
+	a0 = atomic_load_u64(&prof_accum->accumbytes, ATOMIC_RELAXED);
 	do {
-		a0 = atomic_read_u64(&prof_accum->accumbytes);
 		a1 = a0 + accumbytes;
 		assert(a1 >= a0);
 		overflow = (a1 >= prof_interval);
 		if (overflow) {
 			a1 %= prof_interval;
 		}
-	} while (atomic_cas_u64(&prof_accum->accumbytes, a0, a1));
+	} while (!atomic_compare_exchange_weak_u64(&prof_accum->accumbytes, &a0,
+	    a1, ATOMIC_RELAXED, ATOMIC_RELAXED));
 #else
 	malloc_mutex_lock(tsdn, &prof_accum->mtx);
 	a0 = prof_accum->accumbytes;
@@ -57,11 +58,12 @@ prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum, size_t usize) {
 	 */
 	uint64_t a0, a1;
 #ifdef JEMALLOC_ATOMIC_U64
+	a0 = atomic_load_u64(&prof_accum->accumbytes, ATOMIC_RELAXED);
 	do {
-		a0 = atomic_read_u64(&prof_accum->accumbytes);
 		a1 = (a0 >= LARGE_MINCLASS - usize) ?  a0 - (LARGE_MINCLASS -
 		    usize) : 0;
-	} while (atomic_cas_u64(&prof_accum->accumbytes, a0, a1));
+	} while (!atomic_compare_exchange_weak_u64(&prof_accum->accumbytes, &a0,
+	    a1, ATOMIC_RELAXED, ATOMIC_RELAXED));
 #else
 	malloc_mutex_lock(tsdn, &prof_accum->mtx);
 	a0 = prof_accum->accumbytes;
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index afff6aa5..fba8c295 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -18,8 +18,10 @@ typedef struct {
 struct prof_accum_s {
 #ifndef JEMALLOC_ATOMIC_U64
 	malloc_mutex_t	mtx;
-#endif
 	uint64_t	accumbytes;
+#else
+	atomic_u64_t	accumbytes;
+#endif
 };
 
 struct prof_cnt_s {
diff --git a/src/prof.c b/src/prof.c
index 4e83ae3f..ce02d99c 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -1758,8 +1758,10 @@ prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum) {
 	    WITNESS_RANK_PROF_ACCUM)) {
 		return true;
 	}
-#endif
 	prof_accum->accumbytes = 0;
+#else
+	atomic_store_u64(&prof_accum->accumbytes, 0, ATOMIC_RELAXED);
+#endif
 	return false;
 }
 

From 492a941f493f77f60062039b60040f426aa7ee45 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 4 Apr 2017 18:20:42 -0700
Subject: [PATCH 0770/2608] Convert extent module to use C11-style atomcis

---
 src/extent.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index a75016af..e080773b 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -62,8 +62,8 @@ const extent_hooks_t	extent_hooks_default = {
 };
 
 /* Used exclusively for gdump triggering. */
-static size_t	curpages;
-static size_t	highpages;
+static atomic_zu_t curpages;
+static atomic_zu_t highpages;
 
 /******************************************************************************/
 /*
@@ -566,14 +566,16 @@ extent_gdump_add(tsdn_t *tsdn, const extent_t *extent) {
 
 	if (opt_prof && extent_state_get(extent) == extent_state_active) {
 		size_t nadd = extent_size_get(extent) >> LG_PAGE;
-		size_t cur = atomic_add_zu(&curpages, nadd);
-		size_t high = atomic_read_zu(&highpages);
-		while (cur > high && atomic_cas_zu(&highpages, high, cur)) {
+		size_t cur = atomic_fetch_add_zu(&curpages, nadd,
+		    ATOMIC_RELAXED) + nadd;
+		size_t high = atomic_load_zu(&highpages, ATOMIC_RELAXED);
+		while (cur > high && !atomic_compare_exchange_weak_zu(
+		    &highpages, &high, cur, ATOMIC_RELAXED, ATOMIC_RELAXED)) {
 			/*
 			 * Don't refresh cur, because it may have decreased
 			 * since this thread lost the highpages update race.
+			 * Note that high is updated in case of CAS failure.
 			 */
-			high = atomic_read_zu(&highpages);
 		}
 		if (cur > high && prof_gdump_get_unlocked()) {
 			prof_gdump(tsdn);
@@ -587,8 +589,8 @@ extent_gdump_sub(tsdn_t *tsdn, const extent_t *extent) {
 
 	if (opt_prof && extent_state_get(extent) == extent_state_active) {
 		size_t nsub = extent_size_get(extent) >> LG_PAGE;
-		assert(atomic_read_zu(&curpages) >= nsub);
-		atomic_sub_zu(&curpages, nsub);
+		assert(atomic_load_zu(&curpages, ATOMIC_RELAXED) >= nsub);
+		atomic_fetch_sub_zu(&curpages, nsub, ATOMIC_RELAXED);
 	}
 }
 

From 5dcc13b342b3ffb38a1215ab2584b8cb12c46030 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 4 Apr 2017 18:34:01 -0700
Subject: [PATCH 0771/2608] Make the mutex n_waiting_thds field a C11-style
 atomic

---
 include/jemalloc/internal/mutex_inlines.h | 14 ++++++++++++--
 include/jemalloc/internal/mutex_structs.h |  2 +-
 include/jemalloc/internal/mutex_types.h   |  3 ++-
 src/mutex.c                               |  7 ++++---
 4 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/mutex_inlines.h b/include/jemalloc/internal/mutex_inlines.h
index 3a12a722..0552e190 100644
--- a/include/jemalloc/internal/mutex_inlines.h
+++ b/include/jemalloc/internal/mutex_inlines.h
@@ -40,7 +40,12 @@ malloc_mutex_prof_merge(mutex_prof_data_t *sum, mutex_prof_data_t *data) {
 	if (sum->max_n_thds < data->max_n_thds) {
 		sum->max_n_thds = data->max_n_thds;
 	}
-	sum->n_waiting_thds += data->n_waiting_thds;
+	uint32_t cur_n_waiting_thds = atomic_load_u32(&sum->n_waiting_thds,
+	    ATOMIC_RELAXED);
+	uint32_t new_n_waiting_thds = cur_n_waiting_thds + atomic_load_u32(
+	    &data->n_waiting_thds, ATOMIC_RELAXED);
+	atomic_store_u32(&sum->n_waiting_thds, new_n_waiting_thds,
+	    ATOMIC_RELAXED);
 	sum->n_owner_switches += data->n_owner_switches;
 	sum->n_lock_ops += data->n_lock_ops;
 }
@@ -91,9 +96,14 @@ malloc_mutex_prof_read(tsdn_t *tsdn, mutex_prof_data_t *data,
 	/* Can only read holding the mutex. */
 	malloc_mutex_assert_owner(tsdn, mutex);
 
+	/*
+	 * Not *really* allowed (we shouldn't be doing non-atomic loads of
+	 * atomic data), but the mutex protection makes this safe, and writing
+	 * a member-for-member copy is tedious for this situation.
+	 */
 	*data = *source;
 	/* n_wait_thds is not reported (modified w/o locking). */
-	data->n_waiting_thds = 0;
+	atomic_store_u32(&data->n_waiting_thds, 0, ATOMIC_RELAXED);
 }
 
 #endif
diff --git a/include/jemalloc/internal/mutex_structs.h b/include/jemalloc/internal/mutex_structs.h
index 5dddb846..ff090b22 100644
--- a/include/jemalloc/internal/mutex_structs.h
+++ b/include/jemalloc/internal/mutex_structs.h
@@ -17,7 +17,7 @@ struct mutex_prof_data_s {
 	/* Max # of threads waiting for the mutex at the same time. */
 	uint32_t		max_n_thds;
 	/* Current # of threads waiting on the lock.  Atomic synced. */
-	uint32_t		n_waiting_thds;
+	atomic_u32_t		n_waiting_thds;
 
 	/*
 	 * Data touched on the fast path.  These are modified right after we
diff --git a/include/jemalloc/internal/mutex_types.h b/include/jemalloc/internal/mutex_types.h
index bd261490..e6589374 100644
--- a/include/jemalloc/internal/mutex_types.h
+++ b/include/jemalloc/internal/mutex_types.h
@@ -35,7 +35,8 @@ typedef struct malloc_mutex_s malloc_mutex_t;
 #endif
 
 #define LOCK_PROF_DATA_INITIALIZER					\
-    {NSTIME_ZERO_INITIALIZER, NSTIME_ZERO_INITIALIZER, 0, 0, 0, 0, 0, NULL, 0}
+    {NSTIME_ZERO_INITIALIZER, NSTIME_ZERO_INITIALIZER, 0, 0, 0,		\
+	    ATOMIC_INIT(0), 0, NULL, 0}
 
 #ifdef _WIN32
 #  define MALLOC_MUTEX_INITIALIZER
diff --git a/src/mutex.c b/src/mutex.c
index fa2770a3..8c593101 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -93,10 +93,11 @@ label_spin_done:
 	/* Copy before to after to avoid clock skews. */
 	nstime_t after;
 	nstime_copy(&after, &before);
-	uint32_t n_thds = atomic_add_u32(&data->n_waiting_thds, 1);
+	uint32_t n_thds = atomic_fetch_add_u32(&data->n_waiting_thds, 1,
+	    ATOMIC_RELAXED) + 1;
 	/* One last try as above two calls may take quite some cycles. */
 	if (!malloc_mutex_trylock(mutex)) {
-		atomic_sub_u32(&data->n_waiting_thds, 1);
+		atomic_fetch_sub_u32(&data->n_waiting_thds, 1, ATOMIC_RELAXED);
 		data->n_spin_acquired++;
 		return;
 	}
@@ -104,7 +105,7 @@ label_spin_done:
 	/* True slow path. */
 	malloc_mutex_lock_final(mutex);
 	/* Update more slow-path only counters. */
-	atomic_sub_u32(&data->n_waiting_thds, 1);
+	atomic_fetch_sub_u32(&data->n_waiting_thds, 1, ATOMIC_RELAXED);
 	nstime_update(&after);
 
 	nstime_t delta;

From 074f2256caecee17b168fe6d7d243a0c6e69a130 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 4 Apr 2017 18:36:45 -0700
Subject: [PATCH 0772/2608] Make prof's cum_gctx a C11-style atomic

---
 src/prof.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/prof.c b/src/prof.c
index ce02d99c..a0290b8f 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -63,7 +63,7 @@ size_t		lg_prof_sample;
  * creating/destroying mutexes.
  */
 static malloc_mutex_t	*gctx_locks;
-static unsigned		cum_gctxs; /* Atomic counter. */
+static atomic_u_t	cum_gctxs; /* Atomic counter. */
 
 /*
  * Table of mutexes that are shared among tdata's.  No operations require
@@ -524,7 +524,7 @@ prof_backtrace(prof_bt_t *bt) {
 
 static malloc_mutex_t *
 prof_gctx_mutex_choose(void) {
-	unsigned ngctxs = atomic_add_u(&cum_gctxs, 1);
+	unsigned ngctxs = atomic_fetch_add_u(&cum_gctxs, 1, ATOMIC_RELAXED);
 
 	return &gctx_locks[(ngctxs - 1) % PROF_NCTX_LOCKS];
 }

From eeabdd246693fbf7c54e03ff8957889e63dc9a0c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 4 Apr 2017 18:40:37 -0700
Subject: [PATCH 0773/2608] Remove the pre-C11-atomics API, which is now unused

---
 include/jemalloc/internal/atomic.h | 45 ------------------------------
 test/unit/atomic.c                 | 45 ------------------------------
 2 files changed, 90 deletions(-)

diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h
index b68440c4..adadb1a3 100644
--- a/include/jemalloc/internal/atomic.h
+++ b/include/jemalloc/internal/atomic.h
@@ -52,69 +52,24 @@
 #  define JEMALLOC_ATOMIC_U64
 #endif
 
-/*
- * In order to let us transition atomics usage piecemeal (and reason locally
- * about memory orders), we'll support the previous API for a while.
- */
-#define JEMALLOC_GENERATE_COMPATABILITY_ATOMICS(type, short_type)	\
-ATOMIC_INLINE type							\
-atomic_read_##short_type(type *p) {					\
-	return atomic_load_##short_type ((atomic_##short_type##_t *)p,	\
-	    ATOMIC_SEQ_CST);						\
-}									\
-									\
-ATOMIC_INLINE void							\
-atomic_write_##short_type(type *p, const type val) {			\
-	atomic_store_##short_type((atomic_##short_type##_t *)p,		\
-	    (type)val, ATOMIC_SEQ_CST);					\
-}									\
-ATOMIC_INLINE bool							\
-atomic_cas_##short_type(type *p, type c, type s) {			\
-	/* Note the '!' -- atomic_cas inverts the usual semantics. */	\
-	return !atomic_compare_exchange_strong_##short_type(		\
-	    (atomic_##short_type##_t *)p, &c, s, ATOMIC_SEQ_CST,	\
-	    ATOMIC_SEQ_CST);						\
-}
-
-#define JEMALLOC_GENERATE_COMPATABILITY_INT_ATOMICS(type, short_type)	\
-JEMALLOC_GENERATE_COMPATABILITY_ATOMICS(type, short_type)		\
-									\
-ATOMIC_INLINE type							\
-atomic_add_##short_type(type *p, type x) {				\
-	return atomic_fetch_add_##short_type(				\
-	    (atomic_##short_type##_t *)p, x, ATOMIC_SEQ_CST) + x;	\
-}									\
-ATOMIC_INLINE type							\
-atomic_sub_##short_type(type *p, type x) {				\
-	return atomic_fetch_sub_##short_type(				\
-	    (atomic_##short_type##_t *)p, x, ATOMIC_SEQ_CST) - x;	\
-}
-
 JEMALLOC_GENERATE_ATOMICS(void *, p, LG_SIZEOF_PTR)
-JEMALLOC_GENERATE_COMPATABILITY_ATOMICS(void *, p)
 
 /*
  * There's no actual guarantee that sizeof(bool) == 1, but it's true on the only
  * platform that actually needs to know the size, MSVC.
  */
 JEMALLOC_GENERATE_ATOMICS(bool, b, 0)
-JEMALLOC_GENERATE_COMPATABILITY_ATOMICS(bool, b)
 
 JEMALLOC_GENERATE_INT_ATOMICS(unsigned, u, LG_SIZEOF_INT)
-JEMALLOC_GENERATE_COMPATABILITY_INT_ATOMICS(unsigned, u)
 
 JEMALLOC_GENERATE_INT_ATOMICS(size_t, zu, LG_SIZEOF_PTR)
-JEMALLOC_GENERATE_COMPATABILITY_INT_ATOMICS(size_t, zu)
 
 JEMALLOC_GENERATE_INT_ATOMICS(ssize_t, zd, LG_SIZEOF_PTR)
-JEMALLOC_GENERATE_COMPATABILITY_INT_ATOMICS(ssize_t, zd)
 
 JEMALLOC_GENERATE_INT_ATOMICS(uint32_t, u32, 2)
-JEMALLOC_GENERATE_COMPATABILITY_INT_ATOMICS(uint32_t, u32)
 
 #ifdef JEMALLOC_ATOMIC_U64
 JEMALLOC_GENERATE_INT_ATOMICS(uint64_t, u64, 3)
-JEMALLOC_GENERATE_COMPATABILITY_INT_ATOMICS(uint64_t, u64)
 #endif
 
 #undef ATOMIC_INLINE
diff --git a/test/unit/atomic.c b/test/unit/atomic.c
index fa24415a..572d8d23 100644
--- a/test/unit/atomic.c
+++ b/test/unit/atomic.c
@@ -17,7 +17,6 @@
 
 #define DO_TESTS(t, ta, val1, val2, val3) do {				\
 	t val;								\
-	t raw_atomic;							\
 	t expected;							\
 	bool success;							\
 	/* This (along with the load below) also tests ATOMIC_LOAD. */	\
@@ -81,37 +80,11 @@
 	}								\
 									\
 									\
-	/* Previous atomics API. */					\
-									\
-	/* Read. */							\
-	raw_atomic = val1;						\
-	val = atomic_read_##ta(&raw_atomic);				\
-	assert_##ta##_eq(val1, val, "Read failed");			\
-									\
-	/* Write. */							\
-	raw_atomic = val1;						\
-	atomic_write_##ta(&raw_atomic, val2);				\
-	assert_##ta##_eq(val2, raw_atomic, "Write failed");		\
-									\
-	/* CAS. */							\
-	raw_atomic = val1;						\
-	success = !atomic_cas_##ta(&raw_atomic, val2, val3);		\
-	assert_b_eq(val1 == val2, success, 				\
-	    "CAS did the wrong state update");				\
-	val = raw_atomic;						\
-	if (success) {							\
-		assert_##ta##_eq(val3, val,				\
-		    "Successful CAS should update atomic");		\
-	} else {							\
-		assert_##ta##_eq(val1, val,				\
-		    "Unsuccessful CAS should not update atomic");	\
-	}								\
 } while (0)
 
 #define DO_INTEGER_TESTS(t, ta, val1, val2) do {			\
 	atomic_##ta##_t atom;						\
 	t val;								\
-	t raw_atomic;							\
 									\
 	/* Fetch-add. */						\
 	atomic_store_##ta(&atom, val1, ATOMIC_RELAXED);			\
@@ -157,24 +130,6 @@
 	val = atomic_load_##ta(&atom, ATOMIC_RELAXED);			\
 	assert_##ta##_eq(val1 ^ val2, val,				\
 	    "Fetch-xor should update atomic");				\
-									\
-	/* Previous atomics API. */					\
-									\
-	/* Add. */							\
-	raw_atomic = val1;						\
-	val = atomic_add_##ta(&raw_atomic, val2);			\
-	assert_##ta##_eq(val1 + val2, val,				\
-	    "atomic_add should return new value");			\
-	assert_##ta##_eq(val1 + val2, raw_atomic,			\
-	    "atomic_add should update atomic");				\
-									\
-	/* Sub. */							\
-	raw_atomic = val1;						\
-	val = atomic_sub_##ta(&raw_atomic, val2);			\
-	assert_##ta##_eq(val1 - val2, val,				\
-	    "atomic_sub should return new value");			\
-	assert_##ta##_eq(val1 - val2, raw_atomic,			\
-	    "atomic_add should update atomic");				\
 } while (0)
 
 #define TEST_STRUCT(t, ta)						\

From fde3e20cc04db459f3c76134bc6dfb0ee5c422bb Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 27 Mar 2017 21:50:38 -0700
Subject: [PATCH 0774/2608] Integrate auto tcache into TSD.

The embedded tcache is initialized upon tsd initialization.  The avail arrays
for the tbins will be allocated / deallocated accordingly during init / cleanup.

With this change, the pointer to the auto tcache will always be available, as
long as we have access to the TSD.  tcache_available() (called in tcache_get())
is provided to check if we should use tcache.
---
 include/jemalloc/internal/arena_inlines_a.h   |  10 +-
 .../jemalloc/internal/jemalloc_internal.h.in  |  96 ++++++--
 include/jemalloc/internal/private_symbols.txt |   8 +-
 include/jemalloc/internal/rtree_inlines.h     |   7 +-
 include/jemalloc/internal/tcache_externs.h    |   7 +-
 include/jemalloc/internal/tcache_inlines.h    |  65 ++----
 include/jemalloc/internal/tcache_structs.h    |   6 +-
 include/jemalloc/internal/tcache_types.h      |   3 +
 include/jemalloc/internal/tsd_externs.h       |   5 +-
 include/jemalloc/internal/tsd_structs.h       |   7 +-
 include/jemalloc/internal/tsd_types.h         |  12 +-
 src/ctl.c                                     |   6 +-
 src/jemalloc.c                                |  16 +-
 src/tcache.c                                  | 219 +++++++++++++-----
 src/tsd.c                                     |   5 -
 test/unit/tsd.c                               |   6 +
 16 files changed, 300 insertions(+), 178 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_a.h b/include/jemalloc/internal/arena_inlines_a.h
index e1c47652..cf92342b 100644
--- a/include/jemalloc/internal/arena_inlines_a.h
+++ b/include/jemalloc/internal/arena_inlines_a.h
@@ -57,12 +57,10 @@ percpu_arena_update(tsd_t *tsd, unsigned cpu) {
 
 		/* Set new arena/tcache associations. */
 		arena_migrate(tsd, oldind, newind);
-		if (config_tcache) {
-			tcache_t *tcache = tsd_tcache_get(tsd);
-			if (tcache) {
-				tcache_arena_reassociate(tsd_tsdn(tsd), tcache,
-				    newarena);
-			}
+		tcache_t *tcache = tcache_get(tsd);
+		if (config_tcache && tcache) {
+			tcache_arena_reassociate(tsd_tsdn(tsd), tcache,
+			    newarena);
 		}
 	}
 }
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 04f91c07..449a4ab8 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -488,23 +488,24 @@ extern size_t const	index2size_tab[NSIZES];
  */
 extern uint8_t const	size2index_tab[];
 
-void	*a0malloc(size_t size);
-void	a0dalloc(void *ptr);
-void	*bootstrap_malloc(size_t size);
-void	*bootstrap_calloc(size_t num, size_t size);
-void	bootstrap_free(void *ptr);
-void	arena_set(unsigned ind, arena_t *arena);
-unsigned	narenas_total_get(void);
-arena_t	*arena_init(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
-arena_tdata_t	*arena_tdata_get_hard(tsd_t *tsd, unsigned ind);
-arena_t	*arena_choose_hard(tsd_t *tsd, bool internal);
-void	arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind);
-void	iarena_cleanup(tsd_t *tsd);
-void	arena_cleanup(tsd_t *tsd);
-void	arenas_tdata_cleanup(tsd_t *tsd);
-void	jemalloc_prefork(void);
-void	jemalloc_postfork_parent(void);
-void	jemalloc_postfork_child(void);
+void *a0malloc(size_t size);
+void a0dalloc(void *ptr);
+void *bootstrap_malloc(size_t size);
+void *bootstrap_calloc(size_t num, size_t size);
+void bootstrap_free(void *ptr);
+void arena_set(unsigned ind, arena_t *arena);
+unsigned narenas_total_get(void);
+arena_t *arena_init(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
+arena_tdata_t *arena_tdata_get_hard(tsd_t *tsd, unsigned ind);
+arena_t *arena_choose_hard(tsd_t *tsd, bool internal);
+void arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind);
+void iarena_cleanup(tsd_t *tsd);
+void arena_cleanup(tsd_t *tsd);
+void arenas_tdata_cleanup(tsd_t *tsd);
+void jemalloc_prefork(void);
+void jemalloc_postfork_parent(void);
+void jemalloc_postfork_child(void);
+bool malloc_initialized(void);
 
 #include "jemalloc/internal/nstime_externs.h"
 #include "jemalloc/internal/ckh_externs.h"
@@ -559,6 +560,8 @@ arena_tdata_t	*arena_tdata_get(tsd_t *tsd, unsigned ind,
     bool refresh_if_missing);
 arena_t	*arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing);
 ticker_t	*decay_ticker_get(tsd_t *tsd, unsigned ind);
+bool	tcache_available(tsd_t *tsd);
+tcache_t	*tcache_get(tsd_t *tsd);
 malloc_cpuid_t	malloc_getcpu(void);
 unsigned	percpu_arena_choose(void);
 unsigned	percpu_arena_ind_limit(void);
@@ -929,6 +932,38 @@ decay_ticker_get(tsd_t *tsd, unsigned ind) {
 	}
 	return &tdata->decay_ticker;
 }
+
+JEMALLOC_ALWAYS_INLINE bool
+tcache_available(tsd_t *tsd) {
+	cassert(config_tcache);
+
+	/*
+	 * Thread specific auto tcache might be unavailable if: 1) during tcache
+	 * initialization, or 2) disabled through thread.tcache.enabled mallctl
+	 * or config options.  This check covers all cases.
+	 */
+	if (likely(tsd_tcache_enabled_get(tsd) == tcache_enabled_true)) {
+		/* Associated arena == null implies tcache init in progress. */
+		if (tsd_tcachep_get(tsd)->arena != NULL) {
+			assert(tsd_tcachep_get(tsd)->tbins[0].avail != NULL);
+		}
+		return true;
+	}
+
+	return false;
+}
+
+JEMALLOC_ALWAYS_INLINE tcache_t *
+tcache_get(tsd_t *tsd) {
+	if (!config_tcache) {
+		return NULL;
+	}
+	if (!tcache_available(tsd)) {
+		return NULL;
+	}
+
+	return tsd_tcachep_get(tsd);
+}
 #endif
 
 #include "jemalloc/internal/rtree_inlines.h"
@@ -959,9 +994,24 @@ arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {
 	ret = internal ? tsd_iarena_get(tsd) : tsd_arena_get(tsd);
 	if (unlikely(ret == NULL)) {
 		ret = arena_choose_hard(tsd, internal);
+		assert(ret);
+		if (config_tcache && tcache_available(tsd)) {
+			tcache_t *tcache = tcache_get(tsd);
+			if (tcache->arena != NULL) {
+				/* See comments in tcache_data_init().*/
+				assert(tcache->arena ==
+				    arena_get(tsd_tsdn(tsd), 0, false));
+				if (tcache->arena != ret) {
+					tcache_arena_reassociate(tsd_tsdn(tsd),
+					    tcache, ret);
+				}
+			} else {
+				tcache_arena_associate(tsd_tsdn(tsd), tcache,
+				    ret);
+			}
+		}
 	}
 
-	assert(ret != NULL);
 	/*
 	 * Note that for percpu arena, if the current arena is outside of the
 	 * auto percpu arena range, (i.e. thread is assigned to a manually
@@ -1069,8 +1119,8 @@ iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
 
 JEMALLOC_ALWAYS_INLINE void *
 ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path) {
-	return iallocztm(tsd_tsdn(tsd), size, ind, zero, tcache_get(tsd, true),
-	    false, NULL, slow_path);
+	return iallocztm(tsd_tsdn(tsd), size, ind, zero, tcache_get(tsd), false,
+	    NULL, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
@@ -1102,7 +1152,7 @@ ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
 JEMALLOC_ALWAYS_INLINE void *
 ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero) {
 	return ipallocztm(tsd_tsdn(tsd), usize, alignment, zero,
-	    tcache_get(tsd, true), false, NULL);
+	    tcache_get(tsd), false, NULL);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
@@ -1127,7 +1177,7 @@ idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool is_internal,
 
 JEMALLOC_ALWAYS_INLINE void
 idalloc(tsd_t *tsd, void *ptr) {
-	idalloctm(tsd_tsdn(tsd), ptr, tcache_get(tsd, false), false, true);
+	idalloctm(tsd_tsdn(tsd), ptr, tcache_get(tsd), false, true);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -1199,7 +1249,7 @@ JEMALLOC_ALWAYS_INLINE void *
 iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment,
     bool zero) {
 	return iralloct(tsd_tsdn(tsd), ptr, oldsize, size, alignment, zero,
-	    tcache_get(tsd, true), NULL);
+	    tcache_get(tsd), NULL);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 1cced603..e2bb0592 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -274,6 +274,7 @@ lg_floor
 lg_prof_sample
 malloc_cprintf
 malloc_getcpu
+malloc_initialized
 malloc_mutex_prof_data_reset
 malloc_mutex_assert_not_owner
 malloc_mutex_assert_owner
@@ -293,7 +294,6 @@ malloc_tsd_boot1
 malloc_tsd_cleanup_register
 malloc_tsd_dalloc
 malloc_tsd_malloc
-malloc_tsd_no_cleanup
 malloc_vcprintf
 malloc_vsnprintf
 malloc_write
@@ -475,22 +475,23 @@ tcache_alloc_easy
 tcache_alloc_large
 tcache_alloc_small
 tcache_alloc_small_hard
+tcache_arena_associate
 tcache_arena_reassociate
 tcache_bin_flush_large
 tcache_bin_flush_small
 tcache_bin_info
 tcache_boot
 tcache_cleanup
-tcache_create
+tcache_create_explicit
 tcache_dalloc_large
 tcache_dalloc_small
+tcache_data_init
 tcache_enabled_get
 tcache_enabled_set
 tcache_event
 tcache_event_hard
 tcache_flush
 tcache_get
-tcache_get_hard
 tcache_maxclass
 tcache_prefork
 tcache_postfork_child
@@ -521,7 +522,6 @@ tsd_booted
 tsd_booted_get
 tsd_cleanup
 tsd_cleanup_wrapper
-tsd_data_init
 tsd_fetch
 tsd_fetch_impl
 tsd_get
diff --git a/include/jemalloc/internal/rtree_inlines.h b/include/jemalloc/internal/rtree_inlines.h
index bebe49e0..ce03c578 100644
--- a/include/jemalloc/internal/rtree_inlines.h
+++ b/include/jemalloc/internal/rtree_inlines.h
@@ -321,13 +321,18 @@ rtree_leaf_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	assert(!dependent || !init_missing);
 
 	uintptr_t leafkey = rtree_leafkey(key);
+	assert(leafkey != RTREE_LEAFKEY_INVALID);
+
 #define RTREE_CACHE_CHECK(i) do {					\
 	if (likely(rtree_ctx->cache[i].leafkey == leafkey)) {		\
 		rtree_leaf_elm_t *leaf = rtree_ctx->cache[i].leaf;	\
 		assert(leaf != NULL);					\
 		if (i > 0) {						\
 			/* Bubble up by one. */				\
-			rtree_ctx->cache[i] = rtree_ctx->cache[i - 1];	\
+			rtree_ctx->cache[i].leafkey =			\
+				rtree_ctx->cache[i - 1].leafkey;	\
+			rtree_ctx->cache[i].leaf =			\
+				rtree_ctx->cache[i - 1].leaf;		\
 			rtree_ctx->cache[i - 1].leafkey = leafkey;	\
 			rtree_ctx->cache[i - 1].leaf = leaf;		\
 		}							\
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 83643033..75ff3214 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -35,16 +35,19 @@ void	tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
     unsigned rem, tcache_t *tcache);
 void	tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache,
     arena_t *arena);
-tcache_t *tcache_get_hard(tsd_t *tsd);
-tcache_t *tcache_create(tsdn_t *tsdn, arena_t *arena);
+tcache_t *tcache_create_explicit(tsd_t *tsd);
 void	tcache_cleanup(tsd_t *tsd);
 void	tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
 bool	tcaches_create(tsd_t *tsd, unsigned *r_ind);
 void	tcaches_flush(tsd_t *tsd, unsigned ind);
 void	tcaches_destroy(tsd_t *tsd, unsigned ind);
 bool	tcache_boot(tsdn_t *tsdn);
+void tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
 void tcache_prefork(tsdn_t *tsdn);
 void tcache_postfork_parent(tsdn_t *tsdn);
 void tcache_postfork_child(tsdn_t *tsdn);
+void tcache_flush(void);
+bool tsd_tcache_data_init(tsd_t *tsd);
+bool tsd_tcache_enabled_data_init(tsd_t *tsd);
 
 #endif /* JEMALLOC_INTERNAL_TCACHE_EXTERNS_H */
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index fd7e1764..c3660963 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -4,9 +4,9 @@
 #ifndef JEMALLOC_ENABLE_INLINE
 void	tcache_event(tsd_t *tsd, tcache_t *tcache);
 void	tcache_flush(void);
-bool	tcache_enabled_get(void);
-tcache_t *tcache_get(tsd_t *tsd, bool create);
-void	tcache_enabled_set(bool enabled);
+bool	tcache_enabled_get(tsd_t *tsd);
+tcache_t *tcache_get(tsd_t *tsd);
+void	tcache_enabled_set(tsd_t *tsd, bool enabled);
 void	*tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success);
 void	*tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
     size_t size, szind_t ind, bool zero, bool slow_path);
@@ -20,68 +20,32 @@ tcache_t	*tcaches_get(tsd_t *tsd, unsigned ind);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_))
-JEMALLOC_INLINE void
-tcache_flush(void) {
-	tsd_t *tsd;
-
-	cassert(config_tcache);
-
-	tsd = tsd_fetch();
-	tcache_cleanup(tsd);
-}
-
 JEMALLOC_INLINE bool
-tcache_enabled_get(void) {
-	tsd_t *tsd;
+tcache_enabled_get(tsd_t *tsd) {
 	tcache_enabled_t tcache_enabled;
 
 	cassert(config_tcache);
 
-	tsd = tsd_fetch();
 	tcache_enabled = tsd_tcache_enabled_get(tsd);
-	if (tcache_enabled == tcache_enabled_default) {
-		tcache_enabled = (tcache_enabled_t)opt_tcache;
-		tsd_tcache_enabled_set(tsd, tcache_enabled);
-	}
+	assert(tcache_enabled != tcache_enabled_default);
 
 	return (bool)tcache_enabled;
 }
 
 JEMALLOC_INLINE void
-tcache_enabled_set(bool enabled) {
-	tsd_t *tsd;
-	tcache_enabled_t tcache_enabled;
-
+tcache_enabled_set(tsd_t *tsd, bool enabled) {
 	cassert(config_tcache);
 
-	tsd = tsd_fetch();
+	tcache_enabled_t old = tsd_tcache_enabled_get(tsd);
 
-	tcache_enabled = (tcache_enabled_t)enabled;
-	tsd_tcache_enabled_set(tsd, tcache_enabled);
-
-	if (!enabled) {
+	if ((old != tcache_enabled_true) && enabled) {
+		tsd_tcache_data_init(tsd);
+	} else if ((old == tcache_enabled_true) && !enabled) {
 		tcache_cleanup(tsd);
 	}
-}
-
-JEMALLOC_ALWAYS_INLINE tcache_t *
-tcache_get(tsd_t *tsd, bool create) {
-	tcache_t *tcache;
-
-	if (!config_tcache) {
-		return NULL;
-	}
-
-	tcache = tsd_tcache_get(tsd);
-	if (!create) {
-		return tcache;
-	}
-	if (unlikely(tcache == NULL) && tsd_nominal(tsd)) {
-		tcache = tcache_get_hard(tsd);
-		tsd_tcache_set(tsd, tcache);
-	}
-
-	return tcache;
+	/* Commit the state last.  Above calls check current state. */
+	tcache_enabled_t tcache_enabled = (tcache_enabled_t)enabled;
+	tsd_tcache_enabled_set(tsd, tcache_enabled);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -300,8 +264,7 @@ JEMALLOC_ALWAYS_INLINE tcache_t *
 tcaches_get(tsd_t *tsd, unsigned ind) {
 	tcaches_t *elm = &tcaches[ind];
 	if (unlikely(elm->tcache == NULL)) {
-		elm->tcache = tcache_create(tsd_tsdn(tsd), arena_choose(tsd,
-		    NULL));
+		elm->tcache = tcache_create_explicit(tsd);
 	}
 	return elm->tcache;
 }
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index a9b70312..c9c05cd2 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -36,13 +36,17 @@ struct tcache_s {
 	ticker_t	gc_ticker;	/* Drives incremental GC. */
 	szind_t		next_gc_bin;	/* Next bin to GC. */
 	arena_t		*arena;		/* Associated arena. */
-	tcache_bin_t	tbins[1];	/* Dynamically sized. */
 	/*
 	 * The pointer stacks associated with tbins follow as a contiguous
 	 * array.  During tcache initialization, the avail pointer in each
 	 * element of tbins is initialized to point to the proper offset within
 	 * this array.
 	 */
+#ifdef JEMALLOC_TCACHE
+	tcache_bin_t	tbins[NSIZES];
+#else
+	tcache_bin_t	tbins[0];
+#endif
 };
 
 /* Linkage for list of available (previously used) explicit tcache IDs. */
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index 2d396bf6..8624ac2f 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -47,4 +47,7 @@ typedef struct tcaches_s tcaches_t;
 #define TCACHE_GC_INCR							\
     ((TCACHE_GC_SWEEP / NBINS) + ((TCACHE_GC_SWEEP / NBINS == 0) ? 0 : 1))
 
+/* Used in TSD static initializer only. Real init in tcache_data_init(). */
+#define TCACHE_ZERO_INITIALIZER {{NULL}}
+
 #endif /* JEMALLOC_INTERNAL_TCACHE_TYPES_H */
diff --git a/include/jemalloc/internal/tsd_externs.h b/include/jemalloc/internal/tsd_externs.h
index 9b88a567..d15fd591 100644
--- a/include/jemalloc/internal/tsd_externs.h
+++ b/include/jemalloc/internal/tsd_externs.h
@@ -3,7 +3,6 @@
 
 void	*malloc_tsd_malloc(size_t size);
 void	malloc_tsd_dalloc(void *wrapper);
-void	malloc_tsd_no_cleanup(void *arg);
 void	malloc_tsd_cleanup_register(bool (*f)(void));
 tsd_t	*malloc_tsd_boot0(void);
 void	malloc_tsd_boot1(void);
@@ -13,7 +12,7 @@ void	*tsd_init_check_recursion(tsd_init_head_t *head,
     tsd_init_block_t *block);
 void	tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block);
 #endif
-void	tsd_cleanup(void *arg);
-bool	tsd_data_init(void *arg);
+bool tsd_data_init(void *arg);
+void tsd_cleanup(void *arg);
 
 #endif /* JEMALLOC_INTERNAL_TSD_EXTERNS_H */
diff --git a/include/jemalloc/internal/tsd_structs.h b/include/jemalloc/internal/tsd_structs.h
index b4ac09fd..d399563c 100644
--- a/include/jemalloc/internal/tsd_structs.h
+++ b/include/jemalloc/internal/tsd_structs.h
@@ -16,7 +16,7 @@ struct tsd_init_head_s {
 
 #define MALLOC_TSD							\
 /*  O(name,			type,		[gs]et,	init,	cleanup) */ \
-    O(tcache,			tcache_t *,	yes,	no,	yes)	\
+    O(tcache,			tcache_t,	yes,	no,	yes)	\
     O(thread_allocated,		uint64_t,	yes,	no,	no)	\
     O(thread_deallocated,	uint64_t,	yes,	no,	no)	\
     O(prof_tdata,		prof_tdata_t *,	yes,	no,	yes)	\
@@ -26,7 +26,7 @@ struct tsd_init_head_s {
     O(narenas_tdata,		unsigned,	yes,	no,	no)	\
     O(arenas_tdata_bypass,	bool,		no,	no,	no)	\
     O(tcache_enabled,		tcache_enabled_t,			\
-						yes,	no,	no)	\
+						yes,	yes,	no)	\
     O(rtree_ctx,		rtree_ctx_t,	no,	yes,	no)	\
     O(witnesses,		witness_list_t,	no,	no,	yes)	\
     O(rtree_leaf_elm_witnesses,	rtree_leaf_elm_witness_tsd_t,		\
@@ -35,7 +35,7 @@ struct tsd_init_head_s {
 
 #define TSD_INITIALIZER {						\
     tsd_state_uninitialized,						\
-    NULL,								\
+    TCACHE_ZERO_INITIALIZER,						\
     0,									\
     0,									\
     NULL,								\
@@ -69,6 +69,7 @@ struct tsdn_s {
 };
 
 static const tsd_t tsd_initializer = TSD_INITIALIZER;
+UNUSED static const void *malloc_tsd_no_cleanup = (void *)0;
 
 malloc_tsd_types(, tsd_t)
 
diff --git a/include/jemalloc/internal/tsd_types.h b/include/jemalloc/internal/tsd_types.h
index 195b6493..29c6378a 100644
--- a/include/jemalloc/internal/tsd_types.h
+++ b/include/jemalloc/internal/tsd_types.h
@@ -357,8 +357,10 @@ a_name##tsd_boot1(void) {						\
 		    " TSD for "#a_name"\n");				\
 		abort();						\
 	}								\
-	memcpy(wrapper, &a_name##tsd_boot_wrapper,			\
-	    sizeof(a_name##tsd_wrapper_t));				\
+	a_name##tsd_boot_wrapper.initialized = false;			\
+	a_cleanup(&a_name##tsd_boot_wrapper.val);			\
+	wrapper->initialized = false;					\
+	wrapper->val = a_initializer;					\
 	a_name##tsd_wrapper_set(wrapper);				\
 }									\
 a_attr bool								\
@@ -487,8 +489,10 @@ a_name##tsd_boot1(void) {						\
 		    " TSD for "#a_name"\n");				\
 		abort();						\
 	}								\
-	memcpy(wrapper, &a_name##tsd_boot_wrapper,			\
-	    sizeof(a_name##tsd_wrapper_t));				\
+	a_name##tsd_boot_wrapper.initialized = false;			\
+	a_cleanup(&a_name##tsd_boot_wrapper.val);			\
+	wrapper->initialized = false;					\
+	wrapper->val = a_initializer;					\
 	a_name##tsd_wrapper_set(wrapper);				\
 }									\
 a_attr bool								\
diff --git a/src/ctl.c b/src/ctl.c
index 36f5634d..a59a741f 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1532,7 +1532,7 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 		/* Set new arena/tcache associations. */
 		arena_migrate(tsd, oldind, newind);
 		if (config_tcache) {
-			tcache_t *tcache = tsd_tcache_get(tsd);
+			tcache_t *tcache = tsd_tcachep_get(tsd);
 			if (tcache != NULL) {
 				tcache_arena_reassociate(tsd_tsdn(tsd), tcache,
 				    newarena);
@@ -1564,13 +1564,13 @@ thread_tcache_enabled_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 		return ENOENT;
 	}
 
-	oldval = tcache_enabled_get();
+	oldval = tcache_enabled_get(tsd);
 	if (newp != NULL) {
 		if (newlen != sizeof(bool)) {
 			ret = EINVAL;
 			goto label_return;
 		}
-		tcache_enabled_set(*(bool *)newp);
+		tcache_enabled_set(tsd, *(bool *)newp);
 	}
 	READ(oldval, bool);
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 94ae030c..9d66f7f6 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -275,7 +275,7 @@ static bool	malloc_init_hard(void);
  * Begin miscellaneous support functions.
  */
 
-JEMALLOC_ALWAYS_INLINE_C bool
+bool
 malloc_initialized(void) {
 	return (malloc_init_state == malloc_init_initialized);
 }
@@ -1536,7 +1536,7 @@ imalloc_no_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
 
 	/* Fill in the tcache. */
 	if (dopts->tcache_ind == TCACHE_IND_AUTOMATIC) {
-		tcache = tcache_get(tsd, true);
+		tcache = tcache_get(tsd);
 	} else if (dopts->tcache_ind == TCACHE_IND_NONE) {
 		tcache = NULL;
 	} else {
@@ -2056,7 +2056,7 @@ je_realloc(void *ptr, size_t size) {
 			/* realloc(ptr, 0) is equivalent to free(ptr). */
 			UTRACE(ptr, 0, 0);
 			tsd = tsd_fetch();
-			ifree(tsd, ptr, tcache_get(tsd, false), true);
+			ifree(tsd, ptr, tcache_get(tsd), true);
 			return NULL;
 		}
 		size = 1;
@@ -2113,9 +2113,9 @@ je_free(void *ptr) {
 		tsd_t *tsd = tsd_fetch();
 		witness_assert_lockless(tsd_tsdn(tsd));
 		if (likely(!malloc_slow)) {
-			ifree(tsd, ptr, tcache_get(tsd, false), false);
+			ifree(tsd, ptr, tcache_get(tsd), false);
 		} else {
-			ifree(tsd, ptr, tcache_get(tsd, false), true);
+			ifree(tsd, ptr, tcache_get(tsd), true);
 		}
 		witness_assert_lockless(tsd_tsdn(tsd));
 	}
@@ -2393,7 +2393,7 @@ je_rallocx(void *ptr, size_t size, int flags) {
 			tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags));
 		}
 	} else {
-		tcache = tcache_get(tsd, true);
+		tcache = tcache_get(tsd);
 	}
 
 	old_usize = isalloc(tsd_tsdn(tsd), ptr);
@@ -2605,7 +2605,7 @@ je_dallocx(void *ptr, int flags) {
 			tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags));
 		}
 	} else {
-		tcache = tcache_get(tsd, false);
+		tcache = tcache_get(tsd);
 	}
 
 	UTRACE(ptr, 0, 0);
@@ -2652,7 +2652,7 @@ je_sdallocx(void *ptr, size_t size, int flags) {
 			tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags));
 		}
 	} else {
-		tcache = tcache_get(tsd, false);
+		tcache = tcache_get(tsd);
 	}
 
 	UTRACE(ptr, 0, 0);
diff --git a/src/tcache.c b/src/tcache.c
index 6057c890..aa2917b2 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -4,7 +4,13 @@
 /******************************************************************************/
 /* Data. */
 
-bool	opt_tcache = true;
+bool	opt_tcache =
+#ifdef JEMALLOC_TCACHE
+    true
+#else
+    false
+#endif
+    ;
 ssize_t	opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
 
 tcache_bin_info_t	*tcache_bin_info;
@@ -78,6 +84,7 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     tcache_bin_t *tbin, szind_t binind, bool *tcache_success) {
 	void *ret;
 
+	assert(tcache->arena);
 	arena_tcache_fill_small(tsdn, arena, tbin, binind, config_prof ?
 	    tcache->prof_accumbytes : 0);
 	if (config_prof) {
@@ -271,9 +278,11 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 	}
 }
 
-static void
+void
 tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
+	assert(tcache->arena == NULL);
 	tcache->arena = arena;
+
 	if (config_stats) {
 		/* Link into list of extant tcaches. */
 		malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
@@ -286,6 +295,7 @@ tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 static void
 tcache_arena_dissociate(tsdn_t *tsdn, tcache_t *tcache) {
 	arena_t *arena = tcache->arena;
+	assert(arena);
 	if (config_stats) {
 		/* Unlink from list of extant tcaches. */
 		malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
@@ -304,6 +314,7 @@ tcache_arena_dissociate(tsdn_t *tsdn, tcache_t *tcache) {
 		tcache_stats_merge(tsdn, tcache, arena);
 		malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
 	}
+	tcache->arena = NULL;
 }
 
 void
@@ -312,49 +323,32 @@ tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 	tcache_arena_associate(tsdn, tcache, arena);
 }
 
-tcache_t *
-tcache_get_hard(tsd_t *tsd) {
-	arena_t *arena;
+bool
+tsd_tcache_enabled_data_init(tsd_t *tsd) {
+	/* Called upon tsd initialization. */
+	tsd_tcache_enabled_set(tsd, (tcache_enabled_t)opt_tcache);
+	if (opt_tcache) {
+		/* Trigger tcache init. */
+		tsd_tcache_data_init(tsd);
+	}
 
-	if (!tcache_enabled_get()) {
-		if (tsd_nominal(tsd)) {
-			tcache_enabled_set(false); /* Memoize. */
-		}
-		return NULL;
-	}
-	arena = arena_choose(tsd, NULL);
-	if (unlikely(arena == NULL)) {
-		return NULL;
-	}
-	return tcache_create(tsd_tsdn(tsd), arena);
+	return false;
 }
 
-tcache_t *
-tcache_create(tsdn_t *tsdn, arena_t *arena) {
-	tcache_t *tcache;
-	size_t size, stack_offset;
-	unsigned i;
-
-	size = offsetof(tcache_t, tbins) + (sizeof(tcache_bin_t) * nhbins);
-	/* Naturally align the pointer stacks. */
-	size = PTR_CEILING(size);
-	stack_offset = size;
-	size += stack_nelms * sizeof(void *);
-	/* Avoid false cacheline sharing. */
-	size = sa2u(size, CACHELINE);
-
-	tcache = ipallocztm(tsdn, size, CACHELINE, true, NULL, true,
-	    arena_get(TSDN_NULL, 0, true));
-	if (tcache == NULL) {
-		return NULL;
-	}
-
-	tcache_arena_associate(tsdn, tcache, arena);
+/* Initialize auto tcache (embedded in TSD). */
+static void
+tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
+	memset(&tcache->link, 0, sizeof(ql_elm(tcache_t)));
+	tcache->prof_accumbytes = 0;
+	tcache->next_gc_bin = 0;
+	tcache->arena = NULL;
 
 	ticker_init(&tcache->gc_ticker, TCACHE_GC_INCR);
 
+	size_t stack_offset = 0;
 	assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
-	for (i = 0; i < nhbins; i++) {
+	memset(tcache->tbins, 0, sizeof(tcache_bin_t) * nhbins);
+	for (unsigned i = 0; i < nhbins; i++) {
 		tcache->tbins[i].lg_fill_div = 1;
 		stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
 		/*
@@ -362,15 +356,90 @@ tcache_create(tsdn_t *tsdn, arena_t *arena) {
 		 * access the slots toward higher addresses (for the benefit of
 		 * prefetch).
 		 */
-		tcache->tbins[i].avail = (void **)((uintptr_t)tcache +
+		tcache->tbins[i].avail = (void **)((uintptr_t)avail_stack +
 		    (uintptr_t)stack_offset);
 	}
+	assert(stack_offset == stack_nelms * sizeof(void *));
+}
+
+/* Initialize auto tcache (embedded in TSD). */
+bool
+tsd_tcache_data_init(tsd_t *tsd) {
+	if (!config_tcache) {
+		return false;
+	}
+
+	tcache_t *tcache = &tsd->tcache;
+	assert(tcache->tbins[0].avail == NULL);
+	size_t size = stack_nelms * sizeof(void *);
+	/* Avoid false cacheline sharing. */
+	size = sa2u(size, CACHELINE);
+
+	/* Manually initialize rcache as we may need it for allocation. */
+	tsd_rtree_ctx_data_init(tsd);
+
+	void *avail_array = ipallocztm(tsd_tsdn(tsd), size, CACHELINE, true,
+	    NULL, true, arena_get(TSDN_NULL, 0, true));
+	if (avail_array == NULL) {
+		return true;
+	}
+
+	tcache_init(tsd, tcache, avail_array);
+	/*
+	 * Initialization is a bit tricky here.  After malloc init is done, all
+	 * threads can rely on arena_choose and associate tcache accordingly.
+	 * However, the thread that does actual malloc bootstrapping relies on
+	 * functional tsd, and it can only rely on a0.  In that case, we
+	 * associate its tcache to a0 temporarily, and later on
+	 * arena_choose_hard() will re-associate properly.
+	 */
+	tcache->arena = NULL;
+	arena_t *arena;
+	if (!malloc_initialized()) {
+		/* If in initialization, assign to a0. */
+		arena = arena_get(tsd_tsdn(tsd), 0, false);
+		tcache_arena_associate(tsd_tsdn(tsd), tcache, arena);
+	} else {
+		arena = arena_choose(tsd, NULL);
+		/* This may happen if thread.tcache.enabled is used. */
+		if (tcache->arena == NULL) {
+			tcache_arena_associate(tsd_tsdn(tsd), tcache, arena);
+		}
+	}
+	assert(arena == tcache->arena);
+
+	return false;
+}
+
+/* Created manual tcache for tcache.create mallctl. */
+tcache_t *
+tcache_create_explicit(tsd_t *tsd) {
+	tcache_t *tcache;
+	size_t size, stack_offset;
+
+	size = sizeof(tcache_t);
+	/* Naturally align the pointer stacks. */
+	size = PTR_CEILING(size);
+	stack_offset = size;
+	size += stack_nelms * sizeof(void *);
+	/* Avoid false cacheline sharing. */
+	size = sa2u(size, CACHELINE);
+
+	tcache = ipallocztm(tsd_tsdn(tsd), size, CACHELINE, true, NULL, true,
+	    arena_get(TSDN_NULL, 0, true));
+	if (tcache == NULL) {
+		return NULL;
+	}
+
+	tcache_init(tsd, tcache,
+	    (void *)((uintptr_t)tcache + (uintptr_t)stack_offset));
+	tcache_arena_associate(tsd_tsdn(tsd), tcache, arena_ichoose(tsd, NULL));
 
 	return tcache;
 }
 
 static void
-tcache_destroy(tsd_t *tsd, tcache_t *tcache) {
+tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
 	unsigned i;
 
 	for (i = 0; i < NBINS; i++) {
@@ -381,7 +450,6 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache) {
 			assert(tbin->tstats.nrequests == 0);
 		}
 	}
-
 	for (; i < nhbins; i++) {
 		tcache_bin_t *tbin = &tcache->tbins[i];
 		tcache_bin_flush_large(tsd, tbin, i, 0, tcache);
@@ -391,32 +459,60 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache) {
 		}
 	}
 
-	/*
-	 * Get arena after flushing -- when using percpu arena, the associated
-	 * arena could change during flush.
-	 */
-	arena_t *arena = arena_choose(tsd, NULL);
-	tcache_arena_dissociate(tsd_tsdn(tsd), tcache);
-
-	if (config_prof && tcache->prof_accumbytes > 0 &&
+	arena_t *arena = tcache->arena;
+	if (config_prof && arena && tcache->prof_accumbytes > 0 &&
 	    arena_prof_accum(tsd_tsdn(tsd), arena, tcache->prof_accumbytes)) {
 		prof_idump(tsd_tsdn(tsd));
 	}
-
-	idalloctm(tsd_tsdn(tsd), tcache, NULL, true, true);
 }
 
 void
-tcache_cleanup(tsd_t *tsd) {
-	tcache_t *tcache;
+tcache_flush(void) {
+	tsd_t *tsd;
 
+	cassert(config_tcache);
+
+	tsd = tsd_fetch();
+	tcache_flush_cache(tsd, tsd_tcachep_get(tsd));
+}
+
+static void
+tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
+	tcache_flush_cache(tsd, tcache);
+	tcache_arena_dissociate(tsd_tsdn(tsd), tcache);
+
+	if (tsd_tcache) {
+		/* Release the avail array for the TSD embedded auto tcache. */
+		void *avail_array = (void *)((uintptr_t)tcache->tbins[0].avail -
+		    (uintptr_t)tcache_bin_info[0].ncached_max * sizeof(void *));
+		idalloctm(tsd_tsdn(tsd), avail_array, NULL, true, true);
+	} else {
+		/* Release both the tcache struct and avail array. */
+		idalloctm(tsd_tsdn(tsd), tcache, NULL, true, true);
+	}
+}
+
+/* For auto tcache (embedded in TSD) only. */
+void
+tcache_cleanup(tsd_t *tsd) {
 	if (!config_tcache) {
 		return;
 	}
 
-	if ((tcache = tsd_tcache_get(tsd)) != NULL) {
-		tcache_destroy(tsd, tcache);
-		tsd_tcache_set(tsd, NULL);
+	tcache_t *tcache = tsd_tcachep_get(tsd);
+	if (!tcache_available(tsd)) {
+		assert(tsd_tcache_enabled_get(tsd) == tcache_enabled_false);
+		if (config_debug) {
+			assert(tcache->tbins[0].avail == NULL);
+		}
+		return;
+	}
+	assert(tsd_tcache_enabled_get(tsd) == tcache_enabled_true);
+	assert(tcache->tbins[0].avail != NULL);
+
+	tcache_destroy(tsd, tcache, true);
+	if (config_debug) {
+		tcache->tbins[0].avail = NULL;
 	}
 }
 
@@ -481,12 +577,7 @@ tcaches_create(tsd_t *tsd, unsigned *r_ind) {
 		goto label_return;
 	}
 
-	arena_t *arena = arena_ichoose(tsd, NULL);
-	if (unlikely(arena == NULL)) {
-		err = true;
-		goto label_return;
-	}
-	tcache_t *tcache = tcache_create(tsd_tsdn(tsd), arena);
+	tcache_t *tcache = tcache_create_explicit(tsd);
 	if (tcache == NULL) {
 		err = true;
 		goto label_return;
@@ -531,7 +622,7 @@ tcaches_flush(tsd_t *tsd, unsigned ind) {
 	tcache_t *tcache = tcaches_elm_remove(tsd, &tcaches[ind]);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
 	if (tcache != NULL) {
-		tcache_destroy(tsd, tcache);
+		tcache_destroy(tsd, tcache, false);
 	}
 }
 
@@ -544,7 +635,7 @@ tcaches_destroy(tsd_t *tsd, unsigned ind) {
 	tcaches_avail = elm;
 	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
 	if (tcache != NULL) {
-		tcache_destroy(tsd, tcache);
+		tcache_destroy(tsd, tcache, false);
 	}
 }
 
diff --git a/src/tsd.c b/src/tsd.c
index 86502116..8b54770e 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -21,11 +21,6 @@ malloc_tsd_dalloc(void *wrapper) {
 	a0dalloc(wrapper);
 }
 
-void
-malloc_tsd_no_cleanup(void *arg) {
-	not_reached();
-}
-
 #if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
 #ifndef _WIN32
 JEMALLOC_EXPORT
diff --git a/test/unit/tsd.c b/test/unit/tsd.c
index e033bb76..5bfcdf49 100644
--- a/test/unit/tsd.c
+++ b/test/unit/tsd.c
@@ -5,6 +5,7 @@
 typedef unsigned int data_t;
 
 static bool data_cleanup_executed;
+static bool data_test_started;
 
 malloc_tsd_types(data_, data_t)
 malloc_tsd_protos(, data_, data_t)
@@ -13,6 +14,9 @@ void
 data_cleanup(void *arg) {
 	data_t *data = (data_t *)arg;
 
+	if (!data_test_started) {
+		return;
+	}
 	if (!data_cleanup_executed) {
 		assert_x_eq(*data, THREAD_DATA,
 		    "Argument passed into cleanup function should match tsd "
@@ -135,7 +139,9 @@ main(void) {
 		malloc_printf("Initialization error");
 		return test_status_fail;
 	}
+	data_test_started = false;
 	data_tsd_boot();
+	data_test_started = true;
 
 	return test(
 	    test_tsd_main_thread,

From 0fba57e579e688d0ccda5a615c91ab66cd4ba54a Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 5 Apr 2017 19:23:41 -0700
Subject: [PATCH 0775/2608] Get rid of tcache_enabled_t as we have runtime init
 support.

---
 include/jemalloc/internal/jemalloc_internal.h.in |  2 +-
 include/jemalloc/internal/tcache_inlines.h       | 16 +++++-----------
 include/jemalloc/internal/tcache_structs.h       |  6 ------
 include/jemalloc/internal/tcache_types.h         |  3 +++
 include/jemalloc/internal/tsd_structs.h          |  5 ++---
 src/tcache.c                                     |  6 +++---
 6 files changed, 14 insertions(+), 24 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 449a4ab8..3b137fcb 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -942,7 +942,7 @@ tcache_available(tsd_t *tsd) {
 	 * initialization, or 2) disabled through thread.tcache.enabled mallctl
 	 * or config options.  This check covers all cases.
 	 */
-	if (likely(tsd_tcache_enabled_get(tsd) == tcache_enabled_true)) {
+	if (likely(tsd_tcache_enabled_get(tsd) == true)) {
 		/* Associated arena == null implies tcache init in progress. */
 		if (tsd_tcachep_get(tsd)->arena != NULL) {
 			assert(tsd_tcachep_get(tsd)->tbins[0].avail != NULL);
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index c3660963..929d8a7e 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -22,30 +22,24 @@ tcache_t	*tcaches_get(tsd_t *tsd, unsigned ind);
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_))
 JEMALLOC_INLINE bool
 tcache_enabled_get(tsd_t *tsd) {
-	tcache_enabled_t tcache_enabled;
-
 	cassert(config_tcache);
 
-	tcache_enabled = tsd_tcache_enabled_get(tsd);
-	assert(tcache_enabled != tcache_enabled_default);
-
-	return (bool)tcache_enabled;
+	return tsd_tcache_enabled_get(tsd);
 }
 
 JEMALLOC_INLINE void
 tcache_enabled_set(tsd_t *tsd, bool enabled) {
 	cassert(config_tcache);
 
-	tcache_enabled_t old = tsd_tcache_enabled_get(tsd);
+	bool was_enabled = tsd_tcache_enabled_get(tsd);
 
-	if ((old != tcache_enabled_true) && enabled) {
+	if (!was_enabled && enabled) {
 		tsd_tcache_data_init(tsd);
-	} else if ((old == tcache_enabled_true) && !enabled) {
+	} else if (was_enabled && !enabled) {
 		tcache_cleanup(tsd);
 	}
 	/* Commit the state last.  Above calls check current state. */
-	tcache_enabled_t tcache_enabled = (tcache_enabled_t)enabled;
-	tsd_tcache_enabled_set(tsd, tcache_enabled);
+	tsd_tcache_enabled_set(tsd, enabled);
 }
 
 JEMALLOC_ALWAYS_INLINE void
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index c9c05cd2..d7ec4b69 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -1,12 +1,6 @@
 #ifndef JEMALLOC_INTERNAL_TCACHE_STRUCTS_H
 #define JEMALLOC_INTERNAL_TCACHE_STRUCTS_H
 
-typedef enum {
-	tcache_enabled_false   = 0, /* Enable cast to/from bool. */
-	tcache_enabled_true    = 1,
-	tcache_enabled_default = 2
-} tcache_enabled_t;
-
 /*
  * Read-only information associated with each element of tcache_t's tbins array
  * is stored separately, mainly to reduce memory usage.
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index 8624ac2f..70f89608 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -50,4 +50,7 @@ typedef struct tcaches_s tcaches_t;
 /* Used in TSD static initializer only. Real init in tcache_data_init(). */
 #define TCACHE_ZERO_INITIALIZER {{NULL}}
 
+/* Used in TSD static initializer only. Will be initialized to opt_tcache. */
+#define TCACHE_ENABLED_DEFAULT false
+
 #endif /* JEMALLOC_INTERNAL_TCACHE_TYPES_H */
diff --git a/include/jemalloc/internal/tsd_structs.h b/include/jemalloc/internal/tsd_structs.h
index d399563c..f327c769 100644
--- a/include/jemalloc/internal/tsd_structs.h
+++ b/include/jemalloc/internal/tsd_structs.h
@@ -25,8 +25,7 @@ struct tsd_init_head_s {
     O(arenas_tdata,		arena_tdata_t *,yes,	no,	yes)	\
     O(narenas_tdata,		unsigned,	yes,	no,	no)	\
     O(arenas_tdata_bypass,	bool,		no,	no,	no)	\
-    O(tcache_enabled,		tcache_enabled_t,			\
-						yes,	yes,	no)	\
+    O(tcache_enabled,		bool,		yes,	yes,	no)	\
     O(rtree_ctx,		rtree_ctx_t,	no,	yes,	no)	\
     O(witnesses,		witness_list_t,	no,	no,	yes)	\
     O(rtree_leaf_elm_witnesses,	rtree_leaf_elm_witness_tsd_t,		\
@@ -44,7 +43,7 @@ struct tsd_init_head_s {
     NULL,								\
     0,									\
     false,								\
-    tcache_enabled_default,						\
+    TCACHE_ENABLED_DEFAULT,						\
     RTREE_CTX_ZERO_INITIALIZER,						\
     ql_head_initializer(witnesses),					\
     RTREE_ELM_WITNESS_TSD_INITIALIZER,					\
diff --git a/src/tcache.c b/src/tcache.c
index aa2917b2..b8ce4a07 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -326,7 +326,7 @@ tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 bool
 tsd_tcache_enabled_data_init(tsd_t *tsd) {
 	/* Called upon tsd initialization. */
-	tsd_tcache_enabled_set(tsd, (tcache_enabled_t)opt_tcache);
+	tsd_tcache_enabled_set(tsd, opt_tcache);
 	if (opt_tcache) {
 		/* Trigger tcache init. */
 		tsd_tcache_data_init(tsd);
@@ -501,13 +501,13 @@ tcache_cleanup(tsd_t *tsd) {
 
 	tcache_t *tcache = tsd_tcachep_get(tsd);
 	if (!tcache_available(tsd)) {
-		assert(tsd_tcache_enabled_get(tsd) == tcache_enabled_false);
+		assert(tsd_tcache_enabled_get(tsd) == false);
 		if (config_debug) {
 			assert(tcache->tbins[0].avail == NULL);
 		}
 		return;
 	}
-	assert(tsd_tcache_enabled_get(tsd) == tcache_enabled_true);
+	assert(tsd_tcache_enabled_get(tsd));
 	assert(tcache->tbins[0].avail != NULL);
 
 	tcache_destroy(tsd, tcache, true);

From 4dec507546040896338d8bbdb2075c7ad3a4b9f3 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 5 Apr 2017 22:04:12 -0700
Subject: [PATCH 0776/2608] Bypass witness_fork in TSD when !config_debug.

With the tcache change, we plan to leave some blank space when !config_debug
(unused tbins, witnesses) at the end of the tsd. Let's not touch the memory.
---
 src/witness.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/witness.c b/src/witness.c
index 034ea92b..cbffaeaa 100644
--- a/src/witness.c
+++ b/src/witness.c
@@ -96,16 +96,25 @@ witnesses_cleanup(tsd_t *tsd) {
 
 void
 witness_prefork(tsd_t *tsd) {
+	if (!config_debug) {
+		return;
+	}
 	tsd_witness_fork_set(tsd, true);
 }
 
 void
 witness_postfork_parent(tsd_t *tsd) {
+	if (!config_debug) {
+		return;
+	}
 	tsd_witness_fork_set(tsd, false);
 }
 
 void
 witness_postfork_child(tsd_t *tsd) {
+	if (!config_debug) {
+		return;
+	}
 #ifndef JEMALLOC_MUTEX_INIT_CB
 	witness_list_t *witnesses;
 

From 36bd90b96212772f1adbd421a6b091b542278995 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 6 Apr 2017 12:35:22 -0700
Subject: [PATCH 0777/2608] Optimizing TSD and thread cache layout.

1) Re-organize TSD so that frequently accessed fields are closer to the
beginning and more compact.  Assuming 64-bit, the first 2.5 cachelines now
contains everything needed on tcache fast path, expect the tcache struct itself.

2) Re-organize tcache and tbins.  Take lg_fill_div out of tbin, and reduce tbin
to 24 bytes (down from 32). Split tbins into tbins_small and tbins_large, and
place tbins_small close to the beginning.
---
 include/jemalloc/internal/arena_externs.h     |  2 +-
 .../jemalloc/internal/jemalloc_internal.h.in  | 69 ++++++++++-------
 include/jemalloc/internal/rtree_structs.h     |  3 -
 include/jemalloc/internal/tcache_inlines.h    | 12 +--
 include/jemalloc/internal/tcache_structs.h    | 30 ++++++--
 include/jemalloc/internal/tcache_types.h      |  7 +-
 include/jemalloc/internal/tsd_structs.h       | 63 +++++++++++----
 include/jemalloc/internal/tsd_types.h         | 14 ++--
 src/arena.c                                   | 16 ++--
 src/tcache.c                                  | 76 +++++++++++--------
 10 files changed, 189 insertions(+), 103 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index a35fe184..0f86dc05 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -51,7 +51,7 @@ bool arena_muzzy_decay_time_set(tsdn_t *tsdn, arena_t *arena,
 void arena_decay(tsdn_t *tsdn, arena_t *arena, bool all);
 void	arena_reset(tsd_t *tsd, arena_t *arena);
 void	arena_destroy(tsd_t *tsd, arena_t *arena);
-void	arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena,
+void	arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes);
 void	arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info,
     bool zero);
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 3b137fcb..c00912bf 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -538,33 +538,35 @@ bool malloc_initialized(void);
 #include "jemalloc/internal/mutex_inlines.h"
 
 #ifndef JEMALLOC_ENABLE_INLINE
-pszind_t	psz2ind(size_t psz);
-size_t	pind2sz_compute(pszind_t pind);
-size_t	pind2sz_lookup(pszind_t pind);
-size_t	pind2sz(pszind_t pind);
-size_t	psz2u(size_t psz);
-szind_t	size2index_compute(size_t size);
-szind_t	size2index_lookup(size_t size);
-szind_t	size2index(size_t size);
-size_t	index2size_compute(szind_t index);
-size_t	index2size_lookup(szind_t index);
-size_t	index2size(szind_t index);
-size_t	s2u_compute(size_t size);
-size_t	s2u_lookup(size_t size);
-size_t	s2u(size_t size);
-size_t	sa2u(size_t size, size_t alignment);
-arena_t	*arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal);
-arena_t	*arena_choose(tsd_t *tsd, arena_t *arena);
-arena_t	*arena_ichoose(tsd_t *tsd, arena_t *arena);
-arena_tdata_t	*arena_tdata_get(tsd_t *tsd, unsigned ind,
+pszind_t psz2ind(size_t psz);
+size_t pind2sz_compute(pszind_t pind);
+size_t pind2sz_lookup(pszind_t pind);
+size_t pind2sz(pszind_t pind);
+size_t psz2u(size_t psz);
+szind_t size2index_compute(size_t size);
+szind_t size2index_lookup(size_t size);
+szind_t size2index(size_t size);
+size_t index2size_compute(szind_t index);
+size_t index2size_lookup(szind_t index);
+size_t index2size(szind_t index);
+size_t s2u_compute(size_t size);
+size_t s2u_lookup(size_t size);
+size_t s2u(size_t size);
+size_t sa2u(size_t size, size_t alignment);
+arena_t *arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal);
+arena_t *arena_choose(tsd_t *tsd, arena_t *arena);
+arena_t *arena_ichoose(tsd_t *tsd, arena_t *arena);
+arena_tdata_t *arena_tdata_get(tsd_t *tsd, unsigned ind,
     bool refresh_if_missing);
-arena_t	*arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing);
-ticker_t	*decay_ticker_get(tsd_t *tsd, unsigned ind);
-bool	tcache_available(tsd_t *tsd);
-tcache_t	*tcache_get(tsd_t *tsd);
-malloc_cpuid_t	malloc_getcpu(void);
-unsigned	percpu_arena_choose(void);
-unsigned	percpu_arena_ind_limit(void);
+arena_t *arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing);
+ticker_t *decay_ticker_get(tsd_t *tsd, unsigned ind);
+bool tcache_available(tsd_t *tsd);
+tcache_bin_t *tcache_small_bin_get(tcache_t *tcache, szind_t binind);
+tcache_bin_t *tcache_large_bin_get(tcache_t *tcache, szind_t binind);
+tcache_t *tcache_get(tsd_t *tsd);
+malloc_cpuid_t malloc_getcpu(void);
+unsigned percpu_arena_choose(void);
+unsigned percpu_arena_ind_limit(void);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
@@ -933,6 +935,18 @@ decay_ticker_get(tsd_t *tsd, unsigned ind) {
 	return &tdata->decay_ticker;
 }
 
+JEMALLOC_ALWAYS_INLINE tcache_bin_t *
+tcache_small_bin_get(tcache_t *tcache, szind_t binind) {
+	assert(binind < NBINS);
+	return &tcache->tbins_small[binind];
+}
+
+JEMALLOC_ALWAYS_INLINE tcache_bin_t *
+tcache_large_bin_get(tcache_t *tcache, szind_t binind) {
+	assert(binind >= NBINS &&binind < nhbins);
+	return &tcache->tbins_large[binind - NBINS];
+}
+
 JEMALLOC_ALWAYS_INLINE bool
 tcache_available(tsd_t *tsd) {
 	cassert(config_tcache);
@@ -945,7 +959,8 @@ tcache_available(tsd_t *tsd) {
 	if (likely(tsd_tcache_enabled_get(tsd) == true)) {
 		/* Associated arena == null implies tcache init in progress. */
 		if (tsd_tcachep_get(tsd)->arena != NULL) {
-			assert(tsd_tcachep_get(tsd)->tbins[0].avail != NULL);
+			assert(tcache_small_bin_get(tsd_tcachep_get(tsd),
+			    0)->avail != NULL);
 		}
 		return true;
 	}
diff --git a/include/jemalloc/internal/rtree_structs.h b/include/jemalloc/internal/rtree_structs.h
index 8dd9cdaa..123248ae 100644
--- a/include/jemalloc/internal/rtree_structs.h
+++ b/include/jemalloc/internal/rtree_structs.h
@@ -53,9 +53,6 @@ struct rtree_ctx_cache_elm_s {
 };
 
 struct rtree_ctx_s {
-#ifndef _MSC_VER
-	JEMALLOC_ALIGNED(CACHELINE)
-#endif
 	rtree_ctx_cache_elm_t	cache[RTREE_CTX_NCACHE];
 };
 
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 929d8a7e..dae43f99 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -73,7 +73,7 @@ tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success) {
 	ret = *(tbin->avail - tbin->ncached);
 	tbin->ncached--;
 
-	if (unlikely((int)tbin->ncached < tbin->low_water)) {
+	if (unlikely((low_water_t)tbin->ncached < tbin->low_water)) {
 		tbin->low_water = tbin->ncached;
 	}
 
@@ -89,7 +89,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 
 	assert(binind < NBINS);
-	tbin = &tcache->tbins[binind];
+	tbin = tcache_small_bin_get(tcache, binind);
 	ret = tcache_alloc_easy(tbin, &tcache_success);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
@@ -150,8 +150,8 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 	tcache_bin_t *tbin;
 	bool tcache_success;
 
-	assert(binind < nhbins);
-	tbin = &tcache->tbins[binind];
+	assert(binind >= NBINS &&binind < nhbins);
+	tbin = tcache_large_bin_get(tcache, binind);
 	ret = tcache_alloc_easy(tbin, &tcache_success);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
@@ -215,7 +215,7 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 		arena_dalloc_junk_small(ptr, &arena_bin_info[binind]);
 	}
 
-	tbin = &tcache->tbins[binind];
+	tbin = tcache_small_bin_get(tcache, binind);
 	tbin_info = &tcache_bin_info[binind];
 	if (unlikely(tbin->ncached == tbin_info->ncached_max)) {
 		tcache_bin_flush_small(tsd, tcache, tbin, binind,
@@ -241,7 +241,7 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 		large_dalloc_junk(ptr, index2size(binind));
 	}
 
-	tbin = &tcache->tbins[binind];
+	tbin = tcache_large_bin_get(tcache, binind);
 	tbin_info = &tcache_bin_info[binind];
 	if (unlikely(tbin->ncached == tbin_info->ncached_max)) {
 		tcache_bin_flush_large(tsd, tbin, binind,
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index d7ec4b69..4e101609 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -10,10 +10,14 @@ struct tcache_bin_info_s {
 };
 
 struct tcache_bin_s {
+	low_water_t	low_water;	/* Min # cached since last GC. */
+	uint32_t	ncached;	/* # of cached objects. */
+	/*
+	 * ncached and stats are both modified frequently.  Let's keep them
+	 * close so that they have a higher chance of being on the same
+	 * cacheline, thus less write-backs.
+	 */
 	tcache_bin_stats_t tstats;
-	int		low_water;	/* Min # cached since last GC. */
-	unsigned	lg_fill_div;	/* Fill (ncached_max >> lg_fill_div). */
-	unsigned	ncached;	/* # of cached objects. */
 	/*
 	 * To make use of adjacent cacheline prefetch, the items in the avail
 	 * stack goes to higher address for newer allocations.  avail points
@@ -25,11 +29,9 @@ struct tcache_bin_s {
 };
 
 struct tcache_s {
-	ql_elm(tcache_t) link;		/* Used for aggregating stats. */
+	/* Data accessed frequently first: prof, ticker and small bins. */
 	uint64_t	prof_accumbytes;/* Cleared after arena_prof_accum(). */
 	ticker_t	gc_ticker;	/* Drives incremental GC. */
-	szind_t		next_gc_bin;	/* Next bin to GC. */
-	arena_t		*arena;		/* Associated arena. */
 	/*
 	 * The pointer stacks associated with tbins follow as a contiguous
 	 * array.  During tcache initialization, the avail pointer in each
@@ -37,9 +39,21 @@ struct tcache_s {
 	 * this array.
 	 */
 #ifdef JEMALLOC_TCACHE
-	tcache_bin_t	tbins[NSIZES];
+	tcache_bin_t	tbins_small[NBINS];
 #else
-	tcache_bin_t	tbins[0];
+	tcache_bin_t	tbins_small[0];
+#endif
+	/* Data accessed less often below. */
+	ql_elm(tcache_t) link;		/* Used for aggregating stats. */
+	arena_t		*arena;		/* Associated arena. */
+	szind_t		next_gc_bin;	/* Next bin to GC. */
+#ifdef JEMALLOC_TCACHE
+	/* For small bins, fill (ncached_max >> lg_fill_div). */
+	uint8_t		lg_fill_div[NBINS];
+	tcache_bin_t	tbins_large[NSIZES-NBINS];
+#else
+	uint8_t		lg_fill_div[0];
+	tcache_bin_t	tbins_large[0];
 #endif
 };
 
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index 70f89608..a60db6ff 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -6,6 +6,9 @@ typedef struct tcache_bin_s tcache_bin_t;
 typedef struct tcache_s tcache_t;
 typedef struct tcaches_s tcaches_t;
 
+/* ncached is cast to this type for comparison. */
+typedef int32_t low_water_t;
+
 /*
  * tcache pointers close to NULL are used to encode state information that is
  * used for two purposes: preventing thread caching on a per thread basis and
@@ -48,9 +51,9 @@ typedef struct tcaches_s tcaches_t;
     ((TCACHE_GC_SWEEP / NBINS) + ((TCACHE_GC_SWEEP / NBINS == 0) ? 0 : 1))
 
 /* Used in TSD static initializer only. Real init in tcache_data_init(). */
-#define TCACHE_ZERO_INITIALIZER {{NULL}}
+#define TCACHE_ZERO_INITIALIZER {0}
 
 /* Used in TSD static initializer only. Will be initialized to opt_tcache. */
-#define TCACHE_ENABLED_DEFAULT false
+#define TCACHE_ENABLED_ZERO_INITIALIZER false
 
 #endif /* JEMALLOC_INTERNAL_TCACHE_TYPES_H */
diff --git a/include/jemalloc/internal/tsd_structs.h b/include/jemalloc/internal/tsd_structs.h
index f327c769..2dca0bdb 100644
--- a/include/jemalloc/internal/tsd_structs.h
+++ b/include/jemalloc/internal/tsd_structs.h
@@ -14,19 +14,54 @@ struct tsd_init_head_s {
 };
 #endif
 
+/*
+ * Thread-Specific-Data layout
+ * --- data accessed on tcache fast path: state, rtree_ctx, stats, prof ---
+ * s: state
+ * e: tcache_enabled
+ * m: thread_allocated (config_stats)
+ * f: thread_deallocated (config_stats)
+ * p: prof_tdata (config_prof)
+ * c: rtree_ctx (rtree cache accessed on deallocation)
+ * t: tcache
+ * --- data not accessed on tcache fast path: arena related fields ---
+ * d: arenas_tdata_bypass
+ * r: narenas_tdata
+ * x: blank space (1 byte)
+ * i: iarena
+ * a: arena
+ * o: arenas_tdata
+ * Loading TSD data is on the critical path of basically all malloc operations.
+ * In particular, tcache and rtree_ctx rely on hot CPU cache to be effective.
+ * Use a compact layout to reduce cache footprint.
+ * +--- 64-bit and 64B cacheline; 1B each letter; First byte on the left. ---+
+ * |----------------------------  1st cacheline  ----------------------------|
+ * | sedxrrrr mmmmmmmm ffffffff pppppppp [c * 32  ........ ........ .......] |
+ * |----------------------------  2nd cacheline  ----------------------------|
+ * | [c * 64  ........ ........ ........ ........ ........ ........ .......] |
+ * |----------------------------  3nd cacheline  ----------------------------|
+ * | [c * 32  ........ ........ .......] iiiiiiii aaaaaaaa oooooooo [t...... |
+ * +-------------------------------------------------------------------------+
+ * Note: the entire tcache is embedded into TSD and spans multiple cachelines.
+ *
+ * The last 3 members (i, a and o) before tcache isn't really needed on tcache
+ * fast path.  However we have a number of unused tcache bins and witnesses
+ * (never touched unless config_debug) at the end of tcache, so we place them
+ * there to avoid breaking the cachelines and possibly paging in an extra page.
+ */
 #define MALLOC_TSD							\
 /*  O(name,			type,		[gs]et,	init,	cleanup) */ \
-    O(tcache,			tcache_t,	yes,	no,	yes)	\
+    O(tcache_enabled,		bool,		yes,	yes,	no)	\
+    O(arenas_tdata_bypass,	bool,		no,	no,	no)	\
+    O(narenas_tdata,		uint32_t,	yes,	no,	no)	\
     O(thread_allocated,		uint64_t,	yes,	no,	no)	\
     O(thread_deallocated,	uint64_t,	yes,	no,	no)	\
     O(prof_tdata,		prof_tdata_t *,	yes,	no,	yes)	\
+    O(rtree_ctx,		rtree_ctx_t,	no,	yes,	no)	\
     O(iarena,			arena_t *,	yes,	no,	yes)	\
     O(arena,			arena_t *,	yes,	no,	yes)	\
     O(arenas_tdata,		arena_tdata_t *,yes,	no,	yes)	\
-    O(narenas_tdata,		unsigned,	yes,	no,	no)	\
-    O(arenas_tdata_bypass,	bool,		no,	no,	no)	\
-    O(tcache_enabled,		bool,		yes,	yes,	no)	\
-    O(rtree_ctx,		rtree_ctx_t,	no,	yes,	no)	\
+    O(tcache,			tcache_t,	yes,	no,	yes)	\
     O(witnesses,		witness_list_t,	no,	no,	yes)	\
     O(rtree_leaf_elm_witnesses,	rtree_leaf_elm_witness_tsd_t,		\
 						no,	no,	no)	\
@@ -34,17 +69,17 @@ struct tsd_init_head_s {
 
 #define TSD_INITIALIZER {						\
     tsd_state_uninitialized,						\
-    TCACHE_ZERO_INITIALIZER,						\
-    0,									\
-    0,									\
-    NULL,								\
-    NULL,								\
-    NULL,								\
-    NULL,								\
-    0,									\
+    TCACHE_ENABLED_ZERO_INITIALIZER,					\
     false,								\
-    TCACHE_ENABLED_DEFAULT,						\
+    0,									\
+    0,									\
+    0,									\
+    NULL,								\
     RTREE_CTX_ZERO_INITIALIZER,						\
+    NULL,								\
+    NULL,								\
+    NULL,								\
+    TCACHE_ZERO_INITIALIZER,						\
     ql_head_initializer(witnesses),					\
     RTREE_ELM_WITNESS_TSD_INITIALIZER,					\
     false								\
diff --git a/include/jemalloc/internal/tsd_types.h b/include/jemalloc/internal/tsd_types.h
index 29c6378a..4d5fef57 100644
--- a/include/jemalloc/internal/tsd_types.h
+++ b/include/jemalloc/internal/tsd_types.h
@@ -17,12 +17,14 @@ typedef struct tsdn_s tsdn_t;
 
 #define TSDN_NULL	((tsdn_t *)0)
 
-typedef enum {
-	tsd_state_uninitialized,
-	tsd_state_nominal,
-	tsd_state_purgatory,
-	tsd_state_reincarnated
-} tsd_state_t;
+enum {
+	tsd_state_uninitialized = 0,
+	tsd_state_nominal = 1,
+	tsd_state_purgatory = 2,
+	tsd_state_reincarnated = 3
+};
+/* Manually limit tsd_state_t to a single byte. */
+typedef uint8_t tsd_state_t;
 
 /*
  * TLS/TSD-agnostic macro-based implementation of thread-specific data.  There
diff --git a/src/arena.c b/src/arena.c
index feb1f760..b78719e4 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -287,8 +287,14 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		atomic_store_zu(&astats->tcache_bytes, 0, ATOMIC_RELAXED);
 		malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
 		ql_foreach(tcache, &arena->tcache_ql, link) {
-			for (szind_t i = 0; i < nhbins; i++) {
-				tbin = &tcache->tbins[i];
+			szind_t i = 0;
+			for (; i < NBINS; i++) {
+				tbin = tcache_small_bin_get(tcache, i);
+				arena_stats_accum_zu(&astats->tcache_bytes,
+				    tbin->ncached * index2size(i));
+			}
+			for (; i < nhbins; i++) {
+				tbin = tcache_large_bin_get(tcache, i);
 				arena_stats_accum_zu(&astats->tcache_bytes,
 				    tbin->ncached * index2size(i));
 			}
@@ -1317,8 +1323,8 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin,
 }
 
 void
-arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_bin_t *tbin,
-    szind_t binind, uint64_t prof_accumbytes) {
+arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
+    tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes) {
 	unsigned i, nfill;
 	arena_bin_t *bin;
 
@@ -1330,7 +1336,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_bin_t *tbin,
 	bin = &arena->bins[binind];
 	malloc_mutex_lock(tsdn, &bin->lock);
 	for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >>
-	    tbin->lg_fill_div); i < nfill; i++) {
+	    tcache->lg_fill_div[binind]); i < nfill; i++) {
 		extent_t *slab;
 		void *ptr;
 		if ((slab = bin->slabcur) != NULL && extent_nfree_get(slab) >
diff --git a/src/tcache.c b/src/tcache.c
index b8ce4a07..34b46afd 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -40,9 +40,13 @@ tcache_salloc(tsdn_t *tsdn, const void *ptr) {
 void
 tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 	szind_t binind = tcache->next_gc_bin;
-	tcache_bin_t *tbin = &tcache->tbins[binind];
-	tcache_bin_info_t *tbin_info = &tcache_bin_info[binind];
 
+	tcache_bin_t *tbin;
+	if (binind < NBINS) {
+		tbin = tcache_small_bin_get(tcache, binind);
+	} else {
+		tbin = tcache_large_bin_get(tcache, binind);
+	}
 	if (tbin->low_water > 0) {
 		/*
 		 * Flush (ceiling) 3/4 of the objects below the low water mark.
@@ -51,24 +55,26 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 			tcache_bin_flush_small(tsd, tcache, tbin, binind,
 			    tbin->ncached - tbin->low_water + (tbin->low_water
 			    >> 2));
+			/*
+			 * Reduce fill count by 2X.  Limit lg_fill_div such that
+			 * the fill count is always at least 1.
+			 */
+			tcache_bin_info_t *tbin_info = &tcache_bin_info[binind];
+			if ((tbin_info->ncached_max >>
+			     (tcache->lg_fill_div[binind] + 1)) >= 1) {
+				tcache->lg_fill_div[binind]++;
+			}
 		} else {
 			tcache_bin_flush_large(tsd, tbin, binind, tbin->ncached
 			    - tbin->low_water + (tbin->low_water >> 2), tcache);
 		}
-		/*
-		 * Reduce fill count by 2X.  Limit lg_fill_div such that the
-		 * fill count is always at least 1.
-		 */
-		if ((tbin_info->ncached_max >> (tbin->lg_fill_div+1)) >= 1) {
-			tbin->lg_fill_div++;
-		}
 	} else if (tbin->low_water < 0) {
 		/*
-		 * Increase fill count by 2X.  Make sure lg_fill_div stays
-		 * greater than 0.
+		 * Increase fill count by 2X for small bins.  Make sure
+		 * lg_fill_div stays greater than 0.
 		 */
-		if (tbin->lg_fill_div > 1) {
-			tbin->lg_fill_div--;
+		if (binind < NBINS && tcache->lg_fill_div[binind] > 1) {
+			tcache->lg_fill_div[binind]--;
 		}
 	}
 	tbin->low_water = tbin->ncached;
@@ -85,8 +91,8 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 	void *ret;
 
 	assert(tcache->arena);
-	arena_tcache_fill_small(tsdn, arena, tbin, binind, config_prof ?
-	    tcache->prof_accumbytes : 0);
+	arena_tcache_fill_small(tsdn, arena, tcache, tbin, binind,
+	    config_prof ? tcache->prof_accumbytes : 0);
 	if (config_prof) {
 		tcache->prof_accumbytes = 0;
 	}
@@ -175,7 +181,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
 	    sizeof(void *));
 	tbin->ncached = rem;
-	if ((int)tbin->ncached < tbin->low_water) {
+	if ((low_water_t)tbin->ncached < tbin->low_water) {
 		tbin->low_water = tbin->ncached;
 	}
 }
@@ -273,7 +279,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
 	    sizeof(void *));
 	tbin->ncached = rem;
-	if ((int)tbin->ncached < tbin->low_water) {
+	if ((low_water_t)tbin->ncached < tbin->low_water) {
 		tbin->low_water = tbin->ncached;
 	}
 }
@@ -347,17 +353,24 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
 
 	size_t stack_offset = 0;
 	assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
-	memset(tcache->tbins, 0, sizeof(tcache_bin_t) * nhbins);
-	for (unsigned i = 0; i < nhbins; i++) {
-		tcache->tbins[i].lg_fill_div = 1;
+	memset(tcache->tbins_small, 0, sizeof(tcache_bin_t) * NBINS);
+	memset(tcache->tbins_large, 0, sizeof(tcache_bin_t) * (nhbins - NBINS));
+	unsigned i = 0;
+	for (; i < NBINS; i++) {
+		tcache->lg_fill_div[i] = 1;
 		stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
 		/*
 		 * avail points past the available space.  Allocations will
 		 * access the slots toward higher addresses (for the benefit of
 		 * prefetch).
 		 */
-		tcache->tbins[i].avail = (void **)((uintptr_t)avail_stack +
-		    (uintptr_t)stack_offset);
+		tcache_small_bin_get(tcache, i)->avail =
+		    (void **)((uintptr_t)avail_stack + (uintptr_t)stack_offset);
+	}
+	for (; i < nhbins; i++) {
+		stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
+		tcache_large_bin_get(tcache, i)->avail =
+		    (void **)((uintptr_t)avail_stack + (uintptr_t)stack_offset);
 	}
 	assert(stack_offset == stack_nelms * sizeof(void *));
 }
@@ -370,7 +383,7 @@ tsd_tcache_data_init(tsd_t *tsd) {
 	}
 
 	tcache_t *tcache = &tsd->tcache;
-	assert(tcache->tbins[0].avail == NULL);
+	assert(tcache_small_bin_get(tcache, 0)->avail == NULL);
 	size_t size = stack_nelms * sizeof(void *);
 	/* Avoid false cacheline sharing. */
 	size = sa2u(size, CACHELINE);
@@ -443,7 +456,7 @@ tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
 	unsigned i;
 
 	for (i = 0; i < NBINS; i++) {
-		tcache_bin_t *tbin = &tcache->tbins[i];
+		tcache_bin_t *tbin = tcache_small_bin_get(tcache, i);
 		tcache_bin_flush_small(tsd, tcache, tbin, i, 0);
 
 		if (config_stats) {
@@ -451,7 +464,7 @@ tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
 		}
 	}
 	for (; i < nhbins; i++) {
-		tcache_bin_t *tbin = &tcache->tbins[i];
+		tcache_bin_t *tbin = tcache_large_bin_get(tcache, i);
 		tcache_bin_flush_large(tsd, tbin, i, 0, tcache);
 
 		if (config_stats) {
@@ -483,7 +496,8 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
 
 	if (tsd_tcache) {
 		/* Release the avail array for the TSD embedded auto tcache. */
-		void *avail_array = (void *)((uintptr_t)tcache->tbins[0].avail -
+		void *avail_array =
+		    (void *)((uintptr_t)tcache_small_bin_get(tcache, 0)->avail -
 		    (uintptr_t)tcache_bin_info[0].ncached_max * sizeof(void *));
 		idalloctm(tsd_tsdn(tsd), avail_array, NULL, true, true);
 	} else {
@@ -503,16 +517,16 @@ tcache_cleanup(tsd_t *tsd) {
 	if (!tcache_available(tsd)) {
 		assert(tsd_tcache_enabled_get(tsd) == false);
 		if (config_debug) {
-			assert(tcache->tbins[0].avail == NULL);
+			assert(tcache_small_bin_get(tcache, 0)->avail == NULL);
 		}
 		return;
 	}
 	assert(tsd_tcache_enabled_get(tsd));
-	assert(tcache->tbins[0].avail != NULL);
+	assert(tcache_small_bin_get(tcache, 0)->avail != NULL);
 
 	tcache_destroy(tsd, tcache, true);
 	if (config_debug) {
-		tcache->tbins[0].avail = NULL;
+		tcache_small_bin_get(tcache, 0)->avail = NULL;
 	}
 }
 
@@ -525,7 +539,7 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 	/* Merge and reset tcache stats. */
 	for (i = 0; i < NBINS; i++) {
 		arena_bin_t *bin = &arena->bins[i];
-		tcache_bin_t *tbin = &tcache->tbins[i];
+		tcache_bin_t *tbin = tcache_small_bin_get(tcache, i);
 		malloc_mutex_lock(tsdn, &bin->lock);
 		bin->stats.nrequests += tbin->tstats.nrequests;
 		malloc_mutex_unlock(tsdn, &bin->lock);
@@ -533,7 +547,7 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 	}
 
 	for (; i < nhbins; i++) {
-		tcache_bin_t *tbin = &tcache->tbins[i];
+		tcache_bin_t *tbin = tcache_large_bin_get(tcache, i);
 		arena_stats_large_nrequests_add(tsdn, &arena->stats, i,
 		    tbin->tstats.nrequests);
 		tbin->tstats.nrequests = 0;

From 0a0fcd3e6a0816f0a56fa852416d0ece861c0abb Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 28 Mar 2017 17:30:54 -0700
Subject: [PATCH 0778/2608] Add hooking functionality

This allows us to hook chosen functions and do interesting things there (in
particular: reentrancy checking).
---
 Makefile.in                                   |  2 +
 include/jemalloc/internal/hooks.h             | 19 +++++
 .../jemalloc/internal/jemalloc_internal.h.in  |  7 ++
 include/jemalloc/internal/malloc_io.h         |  2 +-
 include/jemalloc/internal/private_symbols.txt |  1 +
 include/jemalloc/internal/stats_externs.h     |  2 +-
 src/hooks.c                                   | 12 +++
 src/prof.c                                    |  7 ++
 src/tsd.c                                     |  9 +++
 test/include/test/jemalloc_test.h.in          |  2 +
 test/include/test/test.h                      |  6 ++
 test/src/test.c                               | 76 ++++++++++++++++++-
 test/unit/hooks.c                             | 38 ++++++++++
 test/unit/prof_accum.c                        |  2 +-
 test/unit/prof_active.c                       |  2 +-
 test/unit/prof_gdump.c                        |  2 +-
 test/unit/prof_reset.c                        |  2 +-
 test/unit/prof_tctx.c                         |  2 +-
 test/unit/tsd.c                               |  1 +
 19 files changed, 183 insertions(+), 11 deletions(-)
 create mode 100644 include/jemalloc/internal/hooks.h
 create mode 100644 src/hooks.c
 create mode 100644 test/unit/hooks.c

diff --git a/Makefile.in b/Makefile.in
index 4fb852da..26c811c8 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -98,6 +98,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/extent_dss.c \
 	$(srcroot)src/extent_mmap.c \
 	$(srcroot)src/hash.c \
+	$(srcroot)src/hooks.c \
 	$(srcroot)src/large.c \
 	$(srcroot)src/malloc_io.c \
 	$(srcroot)src/mutex.c \
@@ -161,6 +162,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/extent_quantize.c \
 	$(srcroot)test/unit/fork.c \
 	$(srcroot)test/unit/hash.c \
+	$(srcroot)test/unit/hooks.c \
 	$(srcroot)test/unit/junk.c \
 	$(srcroot)test/unit/junk_alloc.c \
 	$(srcroot)test/unit/junk_free.c \
diff --git a/include/jemalloc/internal/hooks.h b/include/jemalloc/internal/hooks.h
new file mode 100644
index 00000000..608b268f
--- /dev/null
+++ b/include/jemalloc/internal/hooks.h
@@ -0,0 +1,19 @@
+#ifndef JEMALLOC_INTERNAL_HOOKS_H
+#define JEMALLOC_INTERNAL_HOOKS_H
+
+extern void (*hooks_arena_new_hook)();
+extern void (*hooks_libc_hook)();
+
+#define JEMALLOC_HOOK(fn, hook) ((void)(hook != NULL && (hook(), 0)), fn)
+
+#define open JEMALLOC_HOOK(open, hooks_libc_hook)
+#define read JEMALLOC_HOOK(read, hooks_libc_hook)
+#define write JEMALLOC_HOOK(write, hooks_libc_hook)
+#define readlink JEMALLOC_HOOK(readlink, hooks_libc_hook)
+#define close JEMALLOC_HOOK(close, hooks_libc_hook)
+#define creat JEMALLOC_HOOK(creat, hooks_libc_hook)
+#define secure_getenv JEMALLOC_HOOK(secure_getenv, hooks_libc_hook)
+/* Note that this is undef'd and re-define'd in src/prof.c. */
+#define _Unwind_Backtrace JEMALLOC_HOOK(_Unwind_Backtrace, hooks_libc_hook)
+
+#endif /* JEMALLOC_INTERNAL_HOOKS_H */
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index c00912bf..1c0bf43a 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -23,7 +23,14 @@ extern "C" {
 #  define JEMALLOC_N(n) @private_namespace@##n
 #  include "../jemalloc@install_suffix@.h"
 #endif
+
+/*
+ * Note that the ordering matters here; the hook itself is name-mangled.  We
+ * want the inclusion of hooks to happen early, so that we hook as much as
+ * possible.
+ */
 #include "jemalloc/internal/private_namespace.h"
+#include "jemalloc/internal/hooks.h"
 
 static const bool config_debug =
 #ifdef JEMALLOC_DEBUG
diff --git a/include/jemalloc/internal/malloc_io.h b/include/jemalloc/internal/malloc_io.h
index 7ff3d5b1..8b2fb96f 100644
--- a/include/jemalloc/internal/malloc_io.h
+++ b/include/jemalloc/internal/malloc_io.h
@@ -56,7 +56,7 @@ size_t malloc_snprintf(char *str, size_t size, const char *format, ...)
     JEMALLOC_FORMAT_PRINTF(3, 4);
 void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
     const char *format, va_list ap);
-void malloc_cprintf(void (*write)(void *, const char *), void *cbopaque,
+void malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque,
     const char *format, ...) JEMALLOC_FORMAT_PRINTF(3, 4);
 void malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
 
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index e2bb0592..deae8243 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -232,6 +232,7 @@ hash_rotl_64
 hash_x64_128
 hash_x86_128
 hash_x86_32
+hooks_libc_hook
 iaalloc
 ialloc
 iallocztm
diff --git a/include/jemalloc/internal/stats_externs.h b/include/jemalloc/internal/stats_externs.h
index a0a1ab6c..519441c9 100644
--- a/include/jemalloc/internal/stats_externs.h
+++ b/include/jemalloc/internal/stats_externs.h
@@ -3,7 +3,7 @@
 
 extern bool	opt_stats_print;
 
-void	stats_print(void (*write)(void *, const char *), void *cbopaque,
+void	stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
     const char *opts);
 
 #endif /* JEMALLOC_INTERNAL_STATS_EXTERNS_H */
diff --git a/src/hooks.c b/src/hooks.c
new file mode 100644
index 00000000..c32471e9
--- /dev/null
+++ b/src/hooks.c
@@ -0,0 +1,12 @@
+#include "jemalloc/internal/jemalloc_internal.h"
+
+/*
+ * The hooks are a little bit screwy -- they're not genuinely exported in the
+ * sense that we want them available to end-users, but we do want them visible
+ * from outside the generated library, so that we can use them in test code.
+ */
+JEMALLOC_EXPORT
+void (*hooks_arena_new_hook)() = NULL;
+
+JEMALLOC_EXPORT
+void (*hooks_libc_hook)() = NULL;
diff --git a/src/prof.c b/src/prof.c
index a0290b8f..db1ef035 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -8,7 +8,14 @@
 #endif
 
 #ifdef JEMALLOC_PROF_LIBGCC
+/*
+ * We have a circular dependency -- jemalloc_internal.h tells us if we should
+ * use libgcc's unwinding functionality, but after we've included that, we've
+ * already hooked _Unwind_Backtrace.  We'll temporarily disable hooking.
+ */
+#undef _Unwind_Backtrace
 #include <unwind.h>
+#define _Unwind_Backtrace JEMALLOC_HOOK(_Unwind_Backtrace, hooks_libc_hook)
 #endif
 
 /******************************************************************************/
diff --git a/src/tsd.c b/src/tsd.c
index 8b54770e..0d5de8ea 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -149,6 +149,15 @@ _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) {
 	return true;
 }
 
+/*
+ * We need to be able to say "read" here (in the "pragma section"), but have
+ * hooked "read". We won't read for the rest of the file, so we can get away
+ * with unhooking.
+ */
+#ifdef read
+#  undef read
+#endif
+
 #ifdef _MSC_VER
 #  ifdef _M_IX86
 #    pragma comment(linker, "/INCLUDE:__tls_used")
diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in
index 0770d020..e3882b29 100644
--- a/test/include/test/jemalloc_test.h.in
+++ b/test/include/test/jemalloc_test.h.in
@@ -45,6 +45,7 @@ extern "C" {
 #  define JEMALLOC_MANGLE
 #  include "jemalloc/internal/jemalloc_internal.h"
 
+
 /******************************************************************************/
 /*
  * For integration tests, expose the public jemalloc interfaces, but only
@@ -68,6 +69,7 @@ static const bool config_debug =
 
 #  define JEMALLOC_N(n) @private_namespace@##n
 #  include "jemalloc/internal/private_namespace.h"
+#  include "jemalloc/internal/hooks.h"
 
 /* Hermetic headers. */
 #  include "jemalloc/internal/assert.h"
diff --git a/test/include/test/test.h b/test/include/test/test.h
index d7f05fad..fd0e5265 100644
--- a/test/include/test/test.h
+++ b/test/include/test/test.h
@@ -310,6 +310,9 @@ label_test_end:								\
 #define test(...)							\
 	p_test(__VA_ARGS__, NULL)
 
+#define test_no_reentrancy(...)							\
+	p_test_no_reentrancy(__VA_ARGS__, NULL)
+
 #define test_no_malloc_init(...)					\
 	p_test_no_malloc_init(__VA_ARGS__, NULL)
 
@@ -321,11 +324,14 @@ label_test_end:								\
 	}								\
 } while (0)
 
+bool test_is_reentrant();
+
 void	test_skip(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
 void	test_fail(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
 
 /* For private use by macros. */
 test_status_t	p_test(test_t *t, ...);
+test_status_t	p_test_no_reentrancy(test_t *t, ...);
 test_status_t	p_test_no_malloc_init(test_t *t, ...);
 void	p_test_init(const char *name);
 void	p_test_fini(void);
diff --git a/test/src/test.c b/test/src/test.c
index c5101d4e..fe6dc60e 100644
--- a/test/src/test.c
+++ b/test/src/test.c
@@ -1,10 +1,42 @@
 #include "test/jemalloc_test.h"
 
+/* Test status state. */
+
 static unsigned		test_count = 0;
 static test_status_t	test_counts[test_status_count] = {0, 0, 0};
 static test_status_t	test_status = test_status_pass;
 static const char *	test_name = "";
 
+/* Reentrancy testing helpers. */
+
+#define NUM_REENTRANT_ALLOCS 20
+static bool reentrant = false;
+static bool hook_ran = false;
+static void *to_free[NUM_REENTRANT_ALLOCS];
+
+static void
+reentrancy_hook() {
+	hook_ran = true;
+	hooks_libc_hook = NULL;
+
+	void *to_free_local[NUM_REENTRANT_ALLOCS];
+	size_t alloc_size = 1;
+	for (int i = 0; i < NUM_REENTRANT_ALLOCS; i++) {
+		to_free[i] = malloc(alloc_size);
+		to_free_local[i] = malloc(alloc_size);
+		alloc_size *= 2;
+	}
+	for (int i = 0; i < NUM_REENTRANT_ALLOCS; i++) {
+		free(to_free_local[i]);
+	}
+}
+
+/* Actual test infrastructure. */
+bool
+test_is_reentrant() {
+	return reentrant;
+}
+
 JEMALLOC_FORMAT_PRINTF(1, 2)
 void
 test_skip(const char *format, ...) {
@@ -49,11 +81,13 @@ p_test_init(const char *name) {
 void
 p_test_fini(void) {
 	test_counts[test_status]++;
-	malloc_printf("%s: %s\n", test_name, test_status_string(test_status));
+	malloc_printf("%s: %s (%s)\n", test_name,
+	    test_status_string(test_status),
+	    reentrant ? "reentrant" : "non-reentrant");
 }
 
 static test_status_t
-p_test_impl(bool do_malloc_init, test_t *t, va_list ap) {
+p_test_impl(bool do_malloc_init, bool do_reentrant, test_t *t, va_list ap) {
 	test_status_t ret;
 
 	if (do_malloc_init) {
@@ -71,10 +105,27 @@ p_test_impl(bool do_malloc_init, test_t *t, va_list ap) {
 
 	ret = test_status_pass;
 	for (; t != NULL; t = va_arg(ap, test_t *)) {
+		/* Non-reentrant run. */
+		reentrant = false;
 		t();
 		if (test_status > ret) {
 			ret = test_status;
 		}
+		/* Reentrant run. */
+		if (do_reentrant) {
+			reentrant = true;
+			hooks_libc_hook = &reentrancy_hook;
+			t();
+			if (test_status > ret) {
+				ret = test_status;
+			}
+			if (hook_ran) {
+				hook_ran = false;
+				for (int i = 0; i < NUM_REENTRANT_ALLOCS; i++) {
+					free(to_free[i]);
+				}
+			}
+		}
 	}
 
 	malloc_printf("--- %s: %u/%u, %s: %u/%u, %s: %u/%u ---\n",
@@ -95,7 +146,20 @@ p_test(test_t *t, ...) {
 
 	ret = test_status_pass;
 	va_start(ap, t);
-	ret = p_test_impl(true, t, ap);
+	ret = p_test_impl(true, true, t, ap);
+	va_end(ap);
+
+	return ret;
+}
+
+test_status_t
+p_test_no_reentrancy(test_t *t, ...) {
+	test_status_t ret;
+	va_list ap;
+
+	ret = test_status_pass;
+	va_start(ap, t);
+	ret = p_test_impl(true, false, t, ap);
 	va_end(ap);
 
 	return ret;
@@ -108,7 +172,11 @@ p_test_no_malloc_init(test_t *t, ...) {
 
 	ret = test_status_pass;
 	va_start(ap, t);
-	ret = p_test_impl(false, t, ap);
+	/*
+	 * We also omit reentrancy from bootstrapping tests, since we don't
+	 * (yet) care about general reentrancy during bootstrapping.
+	 */
+	ret = p_test_impl(false, false, t, ap);
 	va_end(ap);
 
 	return ret;
diff --git a/test/unit/hooks.c b/test/unit/hooks.c
new file mode 100644
index 00000000..b70172e1
--- /dev/null
+++ b/test/unit/hooks.c
@@ -0,0 +1,38 @@
+#include "test/jemalloc_test.h"
+
+static bool hook_called = false;
+
+static void
+hook() {
+	hook_called = true;
+}
+
+static int
+func_to_hook(int arg1, int arg2) {
+	return arg1 + arg2;
+}
+
+#define func_to_hook JEMALLOC_HOOK(func_to_hook, hooks_libc_hook)
+
+TEST_BEGIN(unhooked_call) {
+	hooks_libc_hook = NULL;
+	hook_called = false;
+	assert_d_eq(3, func_to_hook(1, 2), "Hooking changed return value.");
+	assert_false(hook_called, "Nulling out hook didn't take.");
+}
+TEST_END
+
+TEST_BEGIN(hooked_call) {
+	hooks_libc_hook = &hook;
+	hook_called = false;
+	assert_d_eq(3, func_to_hook(1, 2), "Hooking changed return value.");
+	assert_true(hook_called, "Hook should have executed.");
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    unhooked_call,
+	    hooked_call);
+}
diff --git a/test/unit/prof_accum.c b/test/unit/prof_accum.c
index 6ccab82b..25220063 100644
--- a/test/unit/prof_accum.c
+++ b/test/unit/prof_accum.c
@@ -76,6 +76,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
+	return test_no_reentrancy(
 	    test_idump);
 }
diff --git a/test/unit/prof_active.c b/test/unit/prof_active.c
index 275aac89..850a24a7 100644
--- a/test/unit/prof_active.c
+++ b/test/unit/prof_active.c
@@ -112,6 +112,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
+	return test_no_reentrancy(
 	    test_prof_active);
 }
diff --git a/test/unit/prof_gdump.c b/test/unit/prof_gdump.c
index 97ade68c..fcb434cb 100644
--- a/test/unit/prof_gdump.c
+++ b/test/unit/prof_gdump.c
@@ -69,6 +69,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
+	return test_no_reentrancy(
 	    test_gdump);
 }
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
index 6120714e..7cce42d2 100644
--- a/test/unit/prof_reset.c
+++ b/test/unit/prof_reset.c
@@ -278,7 +278,7 @@ main(void) {
 	/* Intercept dumping prior to running any tests. */
 	prof_dump_open = prof_dump_open_intercept;
 
-	return test(
+	return test_no_reentrancy(
 	    test_prof_reset_basic,
 	    test_prof_reset_cleanup,
 	    test_prof_reset,
diff --git a/test/unit/prof_tctx.c b/test/unit/prof_tctx.c
index 183f7ce0..30c6b178 100644
--- a/test/unit/prof_tctx.c
+++ b/test/unit/prof_tctx.c
@@ -41,6 +41,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
+	return test_no_reentrancy(
 	    test_prof_realloc);
 }
diff --git a/test/unit/tsd.c b/test/unit/tsd.c
index 5bfcdf49..4a0f3185 100644
--- a/test/unit/tsd.c
+++ b/test/unit/tsd.c
@@ -79,6 +79,7 @@ thd_start(void *arg) {
 }
 
 TEST_BEGIN(test_tsd_main_thread) {
+	test_skip_if(test_is_reentrant());
 	thd_start((void *)(uintptr_t)0xa5f3e329);
 }
 TEST_END

From b407a65401bca5828760c8fd5e940e91475a2b3e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 31 Mar 2017 19:59:45 -0700
Subject: [PATCH 0779/2608] Add basic reentrancy-checking support, and allow
 arena_new to reenter.

This checks whether or not we're reentrant using thread-local data, and, if we
are, moves certain internal allocations to use arena 0 (which should be properly
initialized after bootstrapping).

The immediate thing this allows is spinning up threads in arena_new, which will
enable spinning up background threads there.
---
 include/jemalloc/internal/hooks.h             |  4 +-
 .../jemalloc/internal/jemalloc_internal.h.in  |  9 +-
 include/jemalloc/internal/private_symbols.txt |  4 +
 include/jemalloc/internal/tsd_structs.h       |  6 +-
 src/arena.c                                   | 13 +++
 src/jemalloc.c                                | 98 ++++++++++++++++---
 test/src/test.c                               | 76 +++++++++-----
 test/stress/microbench.c                      |  2 +-
 test/unit/stats.c                             |  2 +-
 test/unit/tsd.c                               |  3 +-
 10 files changed, 170 insertions(+), 47 deletions(-)

diff --git a/include/jemalloc/internal/hooks.h b/include/jemalloc/internal/hooks.h
index 608b268f..cd49afcb 100644
--- a/include/jemalloc/internal/hooks.h
+++ b/include/jemalloc/internal/hooks.h
@@ -1,8 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_HOOKS_H
 #define JEMALLOC_INTERNAL_HOOKS_H
 
-extern void (*hooks_arena_new_hook)();
-extern void (*hooks_libc_hook)();
+extern JEMALLOC_EXPORT void (*hooks_arena_new_hook)();
+extern JEMALLOC_EXPORT void (*hooks_libc_hook)();
 
 #define JEMALLOC_HOOK(fn, hook) ((void)(hook != NULL && (hook(), 0)), fn)
 
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 1c0bf43a..62dae0c4 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -1013,6 +1013,11 @@ arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {
 		return arena;
 	}
 
+	/* During reentrancy, arena 0 is the safest bet. */
+	if (*tsd_reentrancy_levelp_get(tsd) > 1) {
+		return arena_get(tsd_tsdn(tsd), 0, true);
+	}
+
 	ret = internal ? tsd_iarena_get(tsd) : tsd_arena_get(tsd);
 	if (unlikely(ret == NULL)) {
 		ret = arena_choose_hard(tsd, internal);
@@ -1193,7 +1198,9 @@ idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool is_internal,
 	if (config_stats && is_internal) {
 		arena_internal_sub(iaalloc(tsdn, ptr), isalloc(tsdn, ptr));
 	}
-
+	if (!is_internal && *tsd_reentrancy_levelp_get(tsdn_tsd(tsdn)) != 0) {
+		tcache = NULL;
+	}
 	arena_dalloc(tsdn, ptr, tcache, slow_path);
 }
 
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index deae8243..4931d489 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -232,6 +232,7 @@ hash_rotl_64
 hash_x64_128
 hash_x86_128
 hash_x86_32
+hooks_arena_new_hook
 hooks_libc_hook
 iaalloc
 ialloc
@@ -537,6 +538,9 @@ tsd_init_head
 tsd_narenas_tdata_get
 tsd_narenas_tdata_set
 tsd_narenas_tdatap_get
+tsd_reentrancy_level_get
+tsd_reentrancy_level_set
+tsd_reentrancy_levelp_get
 tsd_wrapper_get
 tsd_wrapper_set
 tsd_nominal
diff --git a/include/jemalloc/internal/tsd_structs.h b/include/jemalloc/internal/tsd_structs.h
index 2dca0bdb..12df63d1 100644
--- a/include/jemalloc/internal/tsd_structs.h
+++ b/include/jemalloc/internal/tsd_structs.h
@@ -65,7 +65,8 @@ struct tsd_init_head_s {
     O(witnesses,		witness_list_t,	no,	no,	yes)	\
     O(rtree_leaf_elm_witnesses,	rtree_leaf_elm_witness_tsd_t,		\
 						no,	no,	no)	\
-    O(witness_fork,		bool,		yes,	no,	no)
+    O(witness_fork,		bool,		yes,	no,	no)	\
+    O(reentrancy_level,		int,		no,	no,	no)
 
 #define TSD_INITIALIZER {						\
     tsd_state_uninitialized,						\
@@ -82,7 +83,8 @@ struct tsd_init_head_s {
     TCACHE_ZERO_INITIALIZER,						\
     ql_head_initializer(witnesses),					\
     RTREE_ELM_WITNESS_TSD_INITIALIZER,					\
-    false								\
+    false,								\
+    0									\
 }
 
 struct tsd_s {
diff --git a/src/arena.c b/src/arena.c
index b78719e4..19069bbe 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1952,6 +1952,19 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 
 	arena->base = base;
 
+	/* We don't support reetrancy for arena 0 bootstrapping. */
+	if (ind != 0 && hooks_arena_new_hook) {
+		/*
+		 * If we're here, then arena 0 already exists, so bootstrapping
+		 * is done enough that we should have tsd.
+		 */
+		int *reentrancy_level = tsd_reentrancy_levelp_get(tsdn_tsd(
+		    tsdn));
+		++*reentrancy_level;
+		hooks_arena_new_hook();
+		--*reentrancy_level;
+	}
+
 	return arena;
 label_error:
 	if (ind != 0) {
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 9d66f7f6..7b205ff6 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1656,6 +1656,14 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts) {
 	szind_t ind = 0;
 	size_t usize = 0;
 
+	/*
+	 * For reentrancy checking, we get the old reentrancy level from tsd and
+	 * reset it once we're done.  In case of early bailout though, we never
+	 * bother getting the old level, so we shouldn't try to reset it.  This
+	 * is indicated by leaving the pointer as NULL.
+	 */
+	int *reentrancy_level = NULL;
+
 	/* Initialize (if we can't prove we don't have to). */
 	if (sopts->slow) {
 		if (unlikely(malloc_init())) {
@@ -1708,7 +1716,27 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts) {
 	 * some reason.  Let's grab it right away.
 	 */
 	tsd = tsd_fetch();
-	witness_assert_lockless(tsd_tsdn(tsd));
+
+	/*
+	 * If we need to handle reentrancy, we can do it out of a
+	 * known-initialized arena (i.e. arena 0).
+	 */
+	reentrancy_level = tsd_reentrancy_levelp_get(tsd);
+	++*reentrancy_level;
+	if (*reentrancy_level == 1) {
+		witness_assert_lockless(tsd_tsdn(tsd));
+	}
+	if (unlikely(*reentrancy_level > 1)) {
+		/*
+		 * We should never specify particular arenas or tcaches from
+		 * within our internal allocations.
+		 */
+		assert(dopts->tcache_ind == TCACHE_IND_AUTOMATIC);
+		assert(dopts->arena_ind = ARENA_IND_AUTOMATIC);
+		dopts->tcache_ind = TCACHE_IND_NONE;
+		/* We know that arena 0 has already been initialized. */
+		dopts->arena_ind = 0;
+	}
 
 	/* If profiling is on, get our profiling context. */
 	if (config_prof && opt_prof) {
@@ -1769,9 +1797,15 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts) {
 		UTRACE(0, size, allocation);
 	}
 
-	witness_assert_lockless(tsd_tsdn(tsd));
-
 	/* Success! */
+	if (*reentrancy_level == 1) {
+		witness_assert_lockless(tsd_tsdn(tsd));
+	}
+	/*
+	 * If we got here, we never bailed out on a failure path, so
+	 * reentrancy_level is non-null.
+	 */
+	--*reentrancy_level;
 	*dopts->result = allocation;
 	return 0;
 
@@ -1795,6 +1829,10 @@ label_oom:
 		*dopts->result = NULL;
 	}
 
+	if (reentrancy_level != NULL) {
+		--*reentrancy_level;
+	}
+
 	return ENOMEM;
 
 	/*
@@ -1822,6 +1860,10 @@ label_invalid_alignment:
 		*dopts->result = NULL;
 	}
 
+	if (reentrancy_level != NULL) {
+		--*reentrancy_level;
+	}
+
 	return EINVAL;
 }
 
@@ -1996,7 +2038,9 @@ irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize) {
 
 JEMALLOC_ALWAYS_INLINE_C void
 ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
-	witness_assert_lockless(tsd_tsdn(tsd));
+	if (*tsd_reentrancy_levelp_get(tsd) == 0) {
+		witness_assert_lockless(tsd_tsdn(tsd));
+	}
 
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
@@ -2021,7 +2065,9 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 
 JEMALLOC_ALWAYS_INLINE_C void
 isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
-	witness_assert_lockless(tsd_tsdn(tsd));
+	if (*tsd_reentrancy_levelp_get(tsd) == 0) {
+		witness_assert_lockless(tsd_tsdn(tsd));
+	}
 
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
@@ -2056,7 +2102,11 @@ je_realloc(void *ptr, size_t size) {
 			/* realloc(ptr, 0) is equivalent to free(ptr). */
 			UTRACE(ptr, 0, 0);
 			tsd = tsd_fetch();
-			ifree(tsd, ptr, tcache_get(tsd), true);
+			tcache_t *tcache = NULL;
+			if (likely(*tsd_reentrancy_levelp_get(tsd) == 0)) {
+				tcache = tcache_get(tsd);
+			}
+			ifree(tsd, ptr, tcache, true);
 			return NULL;
 		}
 		size = 1;
@@ -2111,13 +2161,21 @@ je_free(void *ptr) {
 	UTRACE(ptr, 0, 0);
 	if (likely(ptr != NULL)) {
 		tsd_t *tsd = tsd_fetch();
-		witness_assert_lockless(tsd_tsdn(tsd));
-		if (likely(!malloc_slow)) {
-			ifree(tsd, ptr, tcache_get(tsd), false);
-		} else {
-			ifree(tsd, ptr, tcache_get(tsd), true);
+		if (*tsd_reentrancy_levelp_get(tsd) == 0) {
+			witness_assert_lockless(tsd_tsdn(tsd));
+		}
+		tcache_t *tcache = NULL;
+		if (likely(*tsd_reentrancy_levelp_get(tsd) == 0)) {
+			tcache = tcache_get(tsd);
+		}
+		if (likely(!malloc_slow)) {
+			ifree(tsd, ptr, tcache, false);
+		} else {
+			ifree(tsd, ptr, tcache, true);
+		}
+		if (*tsd_reentrancy_levelp_get(tsd) == 0) {
+			witness_assert_lockless(tsd_tsdn(tsd));
 		}
-		witness_assert_lockless(tsd_tsdn(tsd));
 	}
 }
 
@@ -2599,13 +2657,19 @@ je_dallocx(void *ptr, int flags) {
 	tsd = tsd_fetch();
 	witness_assert_lockless(tsd_tsdn(tsd));
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
+		/* Not allowed to be reentrant and specify a custom tcache. */
+		assert(*tsd_reentrancy_levelp_get(tsd) == 0);
 		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) {
 			tcache = NULL;
 		} else {
 			tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags));
 		}
 	} else {
-		tcache = tcache_get(tsd);
+		if (likely(*tsd_reentrancy_levelp_get(tsd) == 0)) {
+			tcache = tcache_get(tsd);
+		} else {
+			tcache = NULL;
+		}
 	}
 
 	UTRACE(ptr, 0, 0);
@@ -2646,13 +2710,19 @@ je_sdallocx(void *ptr, size_t size, int flags) {
 
 	witness_assert_lockless(tsd_tsdn(tsd));
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
+		/* Not allowed to be reentrant and specify a custom tcache. */
+		assert(*tsd_reentrancy_levelp_get(tsd) == 0);
 		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) {
 			tcache = NULL;
 		} else {
 			tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags));
 		}
 	} else {
-		tcache = tcache_get(tsd);
+		if (likely(*tsd_reentrancy_levelp_get(tsd) == 0)) {
+			tcache = tcache_get(tsd);
+		} else {
+			tcache = NULL;
+		}
 	}
 
 	UTRACE(ptr, 0, 0);
diff --git a/test/src/test.c b/test/src/test.c
index fe6dc60e..01a4d738 100644
--- a/test/src/test.c
+++ b/test/src/test.c
@@ -10,31 +10,56 @@ static const char *	test_name = "";
 /* Reentrancy testing helpers. */
 
 #define NUM_REENTRANT_ALLOCS 20
-static bool reentrant = false;
-static bool hook_ran = false;
-static void *to_free[NUM_REENTRANT_ALLOCS];
+typedef enum {
+	non_reentrant = 0,
+	libc_reentrant = 1,
+	arena_new_reentrant = 2
+} reentrancy_t;
+static reentrancy_t reentrancy;
+
+static bool libc_hook_ran = false;
+static bool arena_new_hook_ran = false;
+
+static const char *
+reentrancy_t_str(reentrancy_t r) {
+	switch (r) {
+	case non_reentrant:
+		return "non-reentrant";
+	case libc_reentrant:
+		return "libc-reentrant";
+	case arena_new_reentrant:
+		return "arena_new-reentrant";
+	default:
+		unreachable();
+	}
+}
 
 static void
-reentrancy_hook() {
-	hook_ran = true;
-	hooks_libc_hook = NULL;
+do_hook(bool *hook_ran, void (**hook)()) {
+	*hook_ran = true;
+	*hook = NULL;
 
-	void *to_free_local[NUM_REENTRANT_ALLOCS];
 	size_t alloc_size = 1;
 	for (int i = 0; i < NUM_REENTRANT_ALLOCS; i++) {
-		to_free[i] = malloc(alloc_size);
-		to_free_local[i] = malloc(alloc_size);
+		free(malloc(alloc_size));
 		alloc_size *= 2;
 	}
-	for (int i = 0; i < NUM_REENTRANT_ALLOCS; i++) {
-		free(to_free_local[i]);
-	}
+}
+
+static void
+libc_reentrancy_hook() {
+	do_hook(&libc_hook_ran, &hooks_libc_hook);
+}
+
+static void
+arena_new_reentrancy_hook() {
+	do_hook(&arena_new_hook_ran, &hooks_arena_new_hook);
 }
 
 /* Actual test infrastructure. */
 bool
 test_is_reentrant() {
-	return reentrant;
+	return reentrancy != non_reentrant;
 }
 
 JEMALLOC_FORMAT_PRINTF(1, 2)
@@ -81,9 +106,8 @@ p_test_init(const char *name) {
 void
 p_test_fini(void) {
 	test_counts[test_status]++;
-	malloc_printf("%s: %s (%s)\n", test_name,
-	    test_status_string(test_status),
-	    reentrant ? "reentrant" : "non-reentrant");
+	malloc_printf("%s (%s): %s\n", test_name, reentrancy_t_str(reentrancy),
+	    test_status_string(test_status));
 }
 
 static test_status_t
@@ -106,24 +130,28 @@ p_test_impl(bool do_malloc_init, bool do_reentrant, test_t *t, va_list ap) {
 	ret = test_status_pass;
 	for (; t != NULL; t = va_arg(ap, test_t *)) {
 		/* Non-reentrant run. */
-		reentrant = false;
+		reentrancy = non_reentrant;
+		hooks_arena_new_hook = hooks_libc_hook = NULL;
 		t();
 		if (test_status > ret) {
 			ret = test_status;
 		}
 		/* Reentrant run. */
 		if (do_reentrant) {
-			reentrant = true;
-			hooks_libc_hook = &reentrancy_hook;
+			reentrancy = libc_reentrant;
+			hooks_arena_new_hook = NULL;
+			hooks_libc_hook = &libc_reentrancy_hook;
 			t();
 			if (test_status > ret) {
 				ret = test_status;
 			}
-			if (hook_ran) {
-				hook_ran = false;
-				for (int i = 0; i < NUM_REENTRANT_ALLOCS; i++) {
-					free(to_free[i]);
-				}
+
+			reentrancy = arena_new_reentrant;
+			hooks_libc_hook = NULL;
+			hooks_arena_new_hook = &arena_new_reentrancy_hook;
+			t();
+			if (test_status > ret) {
+				ret = test_status;
 			}
 		}
 	}
diff --git a/test/stress/microbench.c b/test/stress/microbench.c
index 6ed15001..73cbcab0 100644
--- a/test/stress/microbench.c
+++ b/test/stress/microbench.c
@@ -156,7 +156,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
+	return test_no_reentrancy(
 	    test_malloc_vs_mallocx,
 	    test_free_vs_dallocx,
 	    test_dallocx_vs_sdallocx,
diff --git a/test/unit/stats.c b/test/unit/stats.c
index f8c6b104..1619f5b6 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -351,7 +351,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
+	return test_no_reentrancy(
 	    test_stats_summary,
 	    test_stats_large,
 	    test_stats_arenas_summary,
diff --git a/test/unit/tsd.c b/test/unit/tsd.c
index 4a0f3185..38114674 100644
--- a/test/unit/tsd.c
+++ b/test/unit/tsd.c
@@ -79,7 +79,6 @@ thd_start(void *arg) {
 }
 
 TEST_BEGIN(test_tsd_main_thread) {
-	test_skip_if(test_is_reentrant());
 	thd_start((void *)(uintptr_t)0xa5f3e329);
 }
 TEST_END
@@ -144,7 +143,7 @@ main(void) {
 	data_tsd_boot();
 	data_test_started = true;
 
-	return test(
+	return test_no_reentrancy(
 	    test_tsd_main_thread,
 	    test_tsd_sub_thread,
 	    test_tsd_reincarnation);

From 04ef218d872ae73964f9f6c8d1186c4a1e270d70 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 7 Apr 2017 14:20:57 -0700
Subject: [PATCH 0780/2608] Move reentrancy_level to the beginning of TSD.

---
 include/jemalloc/internal/tsd_structs.h | 14 +++++++-------
 src/arena.c                             |  2 +-
 src/jemalloc.c                          |  2 +-
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/tsd_structs.h b/include/jemalloc/internal/tsd_structs.h
index 12df63d1..7f34d1b4 100644
--- a/include/jemalloc/internal/tsd_structs.h
+++ b/include/jemalloc/internal/tsd_structs.h
@@ -26,8 +26,8 @@ struct tsd_init_head_s {
  * t: tcache
  * --- data not accessed on tcache fast path: arena related fields ---
  * d: arenas_tdata_bypass
- * r: narenas_tdata
- * x: blank space (1 byte)
+ * r: reentrancy_level
+ * x: narenas_tdata
  * i: iarena
  * a: arena
  * o: arenas_tdata
@@ -36,7 +36,7 @@ struct tsd_init_head_s {
  * Use a compact layout to reduce cache footprint.
  * +--- 64-bit and 64B cacheline; 1B each letter; First byte on the left. ---+
  * |----------------------------  1st cacheline  ----------------------------|
- * | sedxrrrr mmmmmmmm ffffffff pppppppp [c * 32  ........ ........ .......] |
+ * | sedrxxxx mmmmmmmm ffffffff pppppppp [c * 32  ........ ........ .......] |
  * |----------------------------  2nd cacheline  ----------------------------|
  * | [c * 64  ........ ........ ........ ........ ........ ........ .......] |
  * |----------------------------  3nd cacheline  ----------------------------|
@@ -53,6 +53,7 @@ struct tsd_init_head_s {
 /*  O(name,			type,		[gs]et,	init,	cleanup) */ \
     O(tcache_enabled,		bool,		yes,	yes,	no)	\
     O(arenas_tdata_bypass,	bool,		no,	no,	no)	\
+    O(reentrancy_level,		int8_t,		no,	no,	no)	\
     O(narenas_tdata,		uint32_t,	yes,	no,	no)	\
     O(thread_allocated,		uint64_t,	yes,	no,	no)	\
     O(thread_deallocated,	uint64_t,	yes,	no,	no)	\
@@ -65,8 +66,7 @@ struct tsd_init_head_s {
     O(witnesses,		witness_list_t,	no,	no,	yes)	\
     O(rtree_leaf_elm_witnesses,	rtree_leaf_elm_witness_tsd_t,		\
 						no,	no,	no)	\
-    O(witness_fork,		bool,		yes,	no,	no)	\
-    O(reentrancy_level,		int,		no,	no,	no)
+    O(witness_fork,		bool,		yes,	no,	no)
 
 #define TSD_INITIALIZER {						\
     tsd_state_uninitialized,						\
@@ -75,6 +75,7 @@ struct tsd_init_head_s {
     0,									\
     0,									\
     0,									\
+    0,									\
     NULL,								\
     RTREE_CTX_ZERO_INITIALIZER,						\
     NULL,								\
@@ -83,8 +84,7 @@ struct tsd_init_head_s {
     TCACHE_ZERO_INITIALIZER,						\
     ql_head_initializer(witnesses),					\
     RTREE_ELM_WITNESS_TSD_INITIALIZER,					\
-    false,								\
-    0									\
+    false								\
 }
 
 struct tsd_s {
diff --git a/src/arena.c b/src/arena.c
index 19069bbe..198c6e49 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1958,7 +1958,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		 * If we're here, then arena 0 already exists, so bootstrapping
 		 * is done enough that we should have tsd.
 		 */
-		int *reentrancy_level = tsd_reentrancy_levelp_get(tsdn_tsd(
+		int8_t *reentrancy_level = tsd_reentrancy_levelp_get(tsdn_tsd(
 		    tsdn));
 		++*reentrancy_level;
 		hooks_arena_new_hook();
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 7b205ff6..513d9cd2 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1662,7 +1662,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts) {
 	 * bother getting the old level, so we shouldn't try to reset it.  This
 	 * is indicated by leaving the pointer as NULL.
 	 */
-	int *reentrancy_level = NULL;
+	int8_t *reentrancy_level = NULL;
 
 	/* Initialize (if we can't prove we don't have to). */
 	if (sopts->slow) {

From 701daa5298b3befe2aff05ce590533165abb9ba4 Mon Sep 17 00:00:00 2001
From: Rafael Folco <rfolco@br.ibm.com>
Date: Mon, 10 Apr 2017 12:39:38 +0000
Subject: [PATCH 0781/2608] Port CPU_SPINWAIT to __powerpc64__

Hyper-threaded CPUs may need a special instruction inside spin loops in
order to yield to another virtual CPU. The 'pause' instruction that is
available for x86 is not supported on Power.
Apparently the extended mnemonics like yield, mdoio, and mdoom are not
actually implemented on POWER8, although mentioned in the ISA 2.07
document. The recommended magic bits are an 'or 31,31,31'.
---
 configure.ac | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 37745c40..894e72d0 100644
--- a/configure.ac
+++ b/configure.ac
@@ -398,8 +398,9 @@ case "${host_cpu}" in
 	    fi
 	fi
 	;;
-  powerpc)
+  powerpc*)
 	AC_DEFINE_UNQUOTED([HAVE_ALTIVEC], [ ])
+	CPU_SPINWAIT='__asm__ volatile("or 31,31,31")'
 	;;
   *)
 	;;

From 8209df24ea5bc1dcb560ac64cf2b4ff552f8c9ff Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 10 Apr 2017 13:26:21 -0700
Subject: [PATCH 0782/2608] Turn on -Werror for travis CI builds

---
 .travis.yml           | 56 +++++++++++++++++++++----------------------
 scripts/gen_travis.py |  4 +++-
 2 files changed, 31 insertions(+), 29 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index efac8547..2235206d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,85 +3,85 @@ language: generic
 matrix:
   include:
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS=""
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS=""
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS=""
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS=""
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
       addons:
         apt:
           packages:
             - gcc-multilib
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-tcache"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-tcache" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS=""
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS=""
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-tcache"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-tcache" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS=""
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
       addons:
         apt:
           packages:
             - gcc-multilib
     - os: linux
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug"
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof"
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats"
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-tcache"
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-tcache" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-debug"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
       addons:
         apt:
           packages:
             - gcc-multilib
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-prof"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
       addons:
         apt:
           packages:
             - gcc-multilib
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-stats"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
       addons:
         apt:
           packages:
             - gcc-multilib
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-tcache"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-tcache" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
       addons:
         apt:
           packages:
             - gcc-multilib
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --enable-prof"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-stats"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-tcache"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-tcache" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --disable-stats"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --disable-tcache"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --disable-tcache" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --disable-tcache"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --disable-tcache" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
 
 
 before_script:
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index 93fe3283..35a10ee6 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -72,7 +72,9 @@ for unusual_combination in unusual_combinations_to_test:
     if os == 'osx' and '--enable-prof' in configure_flags:
         continue
 
-    env_string = '{} COMPILER_FLAGS="{}" CONFIGURE_FLAGS="{}"'.format(
+    # We get some spurious errors when -Warray-bounds is enabled.
+    env_string = ('{} COMPILER_FLAGS="{}" CONFIGURE_FLAGS="{}" '
+	'EXTRA_CFLAGS="-Werror -Wno-array-bounds"').format(
         compilers, " ".join(compiler_flags), " ".join(configure_flags))
 
     include_rows += '    - os: %s\n' % os

From bfa530b75b15b6965566d8d47ad8d722da722f52 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 7 Apr 2017 14:12:30 -0700
Subject: [PATCH 0783/2608] Pass dealloc_ctx down free() fast path.

This gets rid of the redundent rtree lookup down fast path.
---
 include/jemalloc/internal/arena_inlines_b.h   | 20 +++++++++++----
 include/jemalloc/internal/arena_structs_b.h   |  6 +++++
 include/jemalloc/internal/arena_types.h       |  1 +
 .../jemalloc/internal/jemalloc_internal.h.in  | 12 ++++-----
 src/ckh.c                                     | 10 ++++----
 src/jemalloc.c                                | 18 +++++++++----
 src/prof.c                                    | 25 +++++++++++--------
 src/tcache.c                                  |  4 +--
 8 files changed, 62 insertions(+), 34 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index ea69a688..96889c1b 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -15,7 +15,8 @@ arena_t *arena_aalloc(tsdn_t *tsdn, const void *ptr);
 size_t arena_salloc(tsdn_t *tsdn, const void *ptr);
 size_t arena_vsalloc(tsdn_t *tsdn, const void *ptr);
 void arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr);
-void arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path);
+void arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
+    dalloc_ctx_t *dalloc_ctx, bool slow_path);
 void arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size);
 void arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
     bool slow_path);
@@ -194,7 +195,8 @@ arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path) {
+arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
+    dalloc_ctx_t *dalloc_ctx, bool slow_path) {
 	assert(!tsdn_null(tsdn) || tcache == NULL);
 	assert(ptr != NULL);
 
@@ -203,13 +205,21 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path) {
 		return;
 	}
 
-	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn));
 	szind_t szind;
 	bool slab;
-	rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr,
-	    true, &szind, &slab);
+	rtree_ctx_t *rtree_ctx;
+	if (dalloc_ctx != NULL) {
+		szind = dalloc_ctx->szind;
+		slab = dalloc_ctx->slab;
+		assert(szind != NSIZES);
+	} else {
+		rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn));
+		rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx,
+		    (uintptr_t)ptr, true, &szind, &slab);
+	}
 
 	if (config_debug) {
+		rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn));
 		extent_t *extent = rtree_extent_read(tsdn, &extents_rtree,
 		    rtree_ctx, (uintptr_t)ptr, true);
 		assert(szind == extent_szind_get(extent));
diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index 00e0d0c8..b6ba3dbc 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -256,4 +256,10 @@ struct arena_tdata_s {
 	ticker_t		decay_ticker;
 };
 
+/* Used to pass rtree lookup context down the deallocation path. */
+struct dalloc_ctx_s {
+	szind_t szind;
+	bool slab;
+};
+
 #endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H */
diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h
index ba53c408..435b930d 100644
--- a/include/jemalloc/internal/arena_types.h
+++ b/include/jemalloc/internal/arena_types.h
@@ -19,6 +19,7 @@ typedef struct arena_decay_s arena_decay_t;
 typedef struct arena_bin_s arena_bin_t;
 typedef struct arena_s arena_t;
 typedef struct arena_tdata_s arena_tdata_t;
+typedef struct dalloc_ctx_s dalloc_ctx_t;
 
 typedef enum {
 	percpu_arena_disabled = 0,
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 62dae0c4..17c77aa3 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -1095,8 +1095,8 @@ void *ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, arena_t *arena);
 void *ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero);
 size_t ivsalloc(tsdn_t *tsdn, const void *ptr);
-void idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool is_internal,
-    bool slow_path);
+void idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
+    dalloc_ctx_t *dalloc_ctx, bool is_internal, bool slow_path);
 void idalloc(tsd_t *tsd, void *ptr);
 void isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
     bool slow_path);
@@ -1188,8 +1188,8 @@ ivsalloc(tsdn_t *tsdn, const void *ptr) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool is_internal,
-    bool slow_path) {
+idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, dalloc_ctx_t *dalloc_ctx,
+    bool is_internal, bool slow_path) {
 	assert(ptr != NULL);
 	assert(!is_internal || tcache == NULL);
 	assert(!is_internal || arena_ind_get(iaalloc(tsdn, ptr)) <
@@ -1201,12 +1201,12 @@ idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool is_internal,
 	if (!is_internal && *tsd_reentrancy_levelp_get(tsdn_tsd(tsdn)) != 0) {
 		tcache = NULL;
 	}
-	arena_dalloc(tsdn, ptr, tcache, slow_path);
+	arena_dalloc(tsdn, ptr, tcache, dalloc_ctx, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void
 idalloc(tsd_t *tsd, void *ptr) {
-	idalloctm(tsd_tsdn(tsd), ptr, tcache_get(tsd), false, true);
+	idalloctm(tsd_tsdn(tsd), ptr, tcache_get(tsd), NULL, false, true);
 }
 
 JEMALLOC_ALWAYS_INLINE void
diff --git a/src/ckh.c b/src/ckh.c
index 463f8dd1..03262ef5 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -282,12 +282,12 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh) {
 		ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
 
 		if (!ckh_rebuild(ckh, tab)) {
-			idalloctm(tsd_tsdn(tsd), tab, NULL, true, true);
+			idalloctm(tsd_tsdn(tsd), tab, NULL, NULL, true, true);
 			break;
 		}
 
 		/* Rebuilding failed, so back out partially rebuilt table. */
-		idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, NULL, true, true);
 		ckh->tab = tab;
 		ckh->lg_curbuckets = lg_prevbuckets;
 	}
@@ -329,7 +329,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh) {
 	ckh->lg_curbuckets = lg_curcells - LG_CKH_BUCKET_CELLS;
 
 	if (!ckh_rebuild(ckh, tab)) {
-		idalloctm(tsd_tsdn(tsd), tab, NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), tab, NULL, NULL, true, true);
 #ifdef CKH_COUNT
 		ckh->nshrinks++;
 #endif
@@ -337,7 +337,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh) {
 	}
 
 	/* Rebuilding failed, so back out partially rebuilt table. */
-	idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, true, true);
+	idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, NULL, true, true);
 	ckh->tab = tab;
 	ckh->lg_curbuckets = lg_prevbuckets;
 #ifdef CKH_COUNT
@@ -418,7 +418,7 @@ ckh_delete(tsd_t *tsd, ckh_t *ckh) {
 	    (unsigned long long)ckh->nrelocs);
 #endif
 
-	idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, true, true);
+	idalloctm(tsd_tsdn(tsd), ckh->tab, NULL, NULL, true, true);
 	if (config_debug) {
 		memset(ckh, JEMALLOC_FREE_JUNK, sizeof(ckh_t));
 	}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 513d9cd2..bc659325 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -313,7 +313,7 @@ a0ialloc(size_t size, bool zero, bool is_internal) {
 
 static void
 a0idalloc(void *ptr, bool is_internal) {
-	idalloctm(TSDN_NULL, ptr, false, is_internal, true);
+	idalloctm(TSDN_NULL, ptr, NULL, NULL, is_internal, true);
 }
 
 void *
@@ -2045,21 +2045,29 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
+	dalloc_ctx_t dalloc_ctx;
+	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
+	rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
+	    (uintptr_t)ptr, true, &dalloc_ctx.szind, &dalloc_ctx.slab);
+	assert(dalloc_ctx.szind != NSIZES);
+
 	size_t usize;
 	if (config_prof && opt_prof) {
-		usize = isalloc(tsd_tsdn(tsd), ptr);
+		usize = index2size(dalloc_ctx.szind);
 		prof_free(tsd, ptr, usize);
 	} else if (config_stats) {
-		usize = isalloc(tsd_tsdn(tsd), ptr);
+		usize = index2size(dalloc_ctx.szind);
 	}
 	if (config_stats) {
 		*tsd_thread_deallocatedp_get(tsd) += usize;
 	}
 
 	if (likely(!slow_path)) {
-		idalloctm(tsd_tsdn(tsd), ptr, tcache, false, false);
+		idalloctm(tsd_tsdn(tsd), ptr, tcache, &dalloc_ctx, false,
+		    false);
 	} else {
-		idalloctm(tsd_tsdn(tsd), ptr, tcache, false, true);
+		idalloctm(tsd_tsdn(tsd), ptr, tcache, &dalloc_ctx, false,
+		    true);
 	}
 }
 
diff --git a/src/prof.c b/src/prof.c
index db1ef035..40610d71 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -590,7 +590,7 @@ prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
 		prof_leave(tsd, tdata_self);
 		/* Destroy gctx. */
 		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
-		idalloctm(tsd_tsdn(tsd), gctx, NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), gctx, NULL, NULL, true, true);
 	} else {
 		/*
 		 * Compensate for increment in prof_tctx_destroy() or
@@ -704,7 +704,7 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
 	}
 
 	if (destroy_tctx) {
-		idalloctm(tsd_tsdn(tsd), tctx, NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), tctx, NULL, NULL, true, true);
 	}
 }
 
@@ -736,8 +736,8 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
 			if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) {
 				/* OOM. */
 				prof_leave(tsd, tdata);
-				idalloctm(tsd_tsdn(tsd), gctx.v, NULL, true,
-				    true);
+				idalloctm(tsd_tsdn(tsd), gctx.v, NULL, NULL,
+				    true, true);
 				return true;
 			}
 			new_gctx = true;
@@ -761,7 +761,8 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
 
 		if (tgctx.v != NULL) {
 			/* Lost race to insert. */
-			idalloctm(tsd_tsdn(tsd), tgctx.v, NULL, true, true);
+			idalloctm(tsd_tsdn(tsd), tgctx.v, NULL, NULL, true,
+			    true);
 		}
 	}
 	prof_leave(tsd, tdata);
@@ -833,7 +834,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 			if (new_gctx) {
 				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
 			}
-			idalloctm(tsd_tsdn(tsd), ret.v, NULL, true, true);
+			idalloctm(tsd_tsdn(tsd), ret.v, NULL, NULL, true, true);
 			return NULL;
 		}
 		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
@@ -1245,7 +1246,7 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) {
 					tctx_tree_remove(&gctx->tctxs,
 					    to_destroy);
 					idalloctm(tsd_tsdn(tsd), to_destroy,
-					    NULL, true, true);
+					    NULL, NULL, true, true);
 				} else {
 					next = NULL;
 				}
@@ -1915,7 +1916,7 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
 
 	if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash,
 	    prof_bt_keycomp)) {
-		idalloctm(tsd_tsdn(tsd), tdata, NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
 		return NULL;
 	}
 
@@ -1971,10 +1972,11 @@ prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
 	assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
 
 	if (tdata->thread_name != NULL) {
-		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
+		    true);
 	}
 	ckh_delete(tsd, &tdata->bt2tctx);
-	idalloctm(tsd_tsdn(tsd), tdata, NULL, true, true);
+	idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
 }
 
 static void
@@ -2171,7 +2173,8 @@ prof_thread_name_set(tsd_t *tsd, const char *thread_name) {
 	}
 
 	if (tdata->thread_name != NULL) {
-		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
+		    true);
 		tdata->thread_name = NULL;
 	}
 	if (strlen(s) > 0) {
diff --git a/src/tcache.c b/src/tcache.c
index 34b46afd..09776e1a 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -499,10 +499,10 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
 		void *avail_array =
 		    (void *)((uintptr_t)tcache_small_bin_get(tcache, 0)->avail -
 		    (uintptr_t)tcache_bin_info[0].ncached_max * sizeof(void *));
-		idalloctm(tsd_tsdn(tsd), avail_array, NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), avail_array, NULL, NULL, true, true);
 	} else {
 		/* Release both the tcache struct and avail array. */
-		idalloctm(tsd_tsdn(tsd), tcache, NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), tcache, NULL, NULL, true, true);
 	}
 }
 

From 57e36e1a12e5cc6af7942196a3f37c9d280ab767 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 10 Apr 2017 18:25:27 -0700
Subject: [PATCH 0784/2608] Header refactoring: Add CPP_PROLOGUE and
 CPP_EPILOGUE macros

---
 include/jemalloc/internal/jemalloc_internal.h.in | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 17c77aa3..73c5d3a5 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -2,9 +2,15 @@
 #define JEMALLOC_INTERNAL_H
 
 #ifdef __cplusplus
-extern "C" {
+#  define CPP_PROLOGUE extern "C" {
+#  define CPP_EPILOGUE }
+#else
+#  define CPP_PROLOGUE
+#  define CPP_EPILOGUE
 #endif
 
+CPP_PROLOGUE
+
 #include "jemalloc_internal_defs.h"
 #include "jemalloc/internal/jemalloc_internal_decls.h"
 
@@ -1300,8 +1306,6 @@ ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra,
 
 #include "jemalloc/internal/prof_inlines_b.h"
 
-#ifdef __cplusplus
-}
-#endif
+CPP_EPILOGUE
 
 #endif /* JEMALLOC_INTERNAL_H */

From 2f00ce4da7b1c360a9b1129ebcdb087da562e2d4 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 10 Apr 2017 16:54:25 -0700
Subject: [PATCH 0785/2608] Header refactoring: break out ph.h dependencies

---
 include/jemalloc/internal/extent_externs.h       | 2 ++
 include/jemalloc/internal/extent_structs.h       | 2 ++
 include/jemalloc/internal/jemalloc_internal.h.in | 1 -
 src/extent.c                                     | 2 ++
 test/unit/ph.c                                   | 2 ++
 5 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index 6c153d04..3fe4a0ad 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_EXTENT_EXTERNS_H
 #define JEMALLOC_INTERNAL_EXTENT_EXTERNS_H
 
+#include "jemalloc/internal/ph.h"
+
 extern rtree_t			extents_rtree;
 extern const extent_hooks_t	extent_hooks_default;
 
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index 5d41bb81..87107a88 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_EXTENT_STRUCTS_H
 #define JEMALLOC_INTERNAL_EXTENT_STRUCTS_H
 
+#include "jemalloc/internal/ph.h"
+
 typedef enum {
 	extent_state_active   = 0,
 	extent_state_dirty    = 1,
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 73c5d3a5..7756b8f0 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -180,7 +180,6 @@ static const bool have_percpu_arena =
 #include <mach/vm_map.h>
 #endif
 
-#include "jemalloc/internal/ph.h"
 #ifndef __PGI
 #define RB_COMPACT
 #endif
diff --git a/src/extent.c b/src/extent.c
index e080773b..b6c3f4b4 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1,6 +1,8 @@
 #define JEMALLOC_EXTENT_C_
 #include "jemalloc/internal/jemalloc_internal.h"
 
+#include "jemalloc/internal/ph.h"
+
 /******************************************************************************/
 /* Data. */
 
diff --git a/test/unit/ph.c b/test/unit/ph.c
index 01df340c..88bf56f8 100644
--- a/test/unit/ph.c
+++ b/test/unit/ph.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/ph.h"
+
 typedef struct node_s node_t;
 
 struct node_s {

From 63a5cd4cc2a4812adc7f3a8bd3ea0633115b151e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 10 Apr 2017 17:03:25 -0700
Subject: [PATCH 0786/2608] Header refactoring: break out rb.h dependencies

---
 include/jemalloc/internal/jemalloc_internal.h.in | 4 ----
 include/jemalloc/internal/prof_structs.h         | 2 ++
 include/jemalloc/internal/rb.h                   | 4 ++++
 test/unit/rb.c                                   | 2 ++
 4 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 7756b8f0..532af2b8 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -180,10 +180,6 @@ static const bool have_percpu_arena =
 #include <mach/vm_map.h>
 #endif
 
-#ifndef __PGI
-#define RB_COMPACT
-#endif
-#include "jemalloc/internal/rb.h"
 #include "jemalloc/internal/qr.h"
 #include "jemalloc/internal/ql.h"
 
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index fba8c295..e1936769 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_PROF_STRUCTS_H
 #define JEMALLOC_INTERNAL_PROF_STRUCTS_H
 
+#include "jemalloc/internal/rb.h"
+
 struct prof_bt_s {
 	/* Backtrace, stored as len program counters. */
 	void		**vec;
diff --git a/include/jemalloc/internal/rb.h b/include/jemalloc/internal/rb.h
index aa76061e..47fa5ca9 100644
--- a/include/jemalloc/internal/rb.h
+++ b/include/jemalloc/internal/rb.h
@@ -22,6 +22,10 @@
 #ifndef RB_H_
 #define RB_H_
 
+#ifndef __PGI
+#define RB_COMPACT
+#endif
+
 #ifdef RB_COMPACT
 /* Node structure. */
 #define rb_node(a_type)							\
diff --git a/test/unit/rb.c b/test/unit/rb.c
index 0bcc3c31..65c04920 100644
--- a/test/unit/rb.c
+++ b/test/unit/rb.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/rb.h"
+
 #define rbtn_black_height(a_type, a_field, a_rbt, r_height) do {	\
 	a_type *rbp_bh_t;						\
 	for (rbp_bh_t = (a_rbt)->rbt_root, (r_height) = 0; rbp_bh_t !=	\

From 610cb83419d4dd741002b5af19b47f16110bf673 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 10 Apr 2017 17:06:53 -0700
Subject: [PATCH 0787/2608] Header refactoring: break out qr.h dependencies

---
 include/jemalloc/internal/jemalloc_internal.h.in | 1 -
 include/jemalloc/internal/ql.h                   | 2 ++
 test/unit/qr.c                                   | 2 ++
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index 532af2b8..fdeda7b6 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -180,7 +180,6 @@ static const bool have_percpu_arena =
 #include <mach/vm_map.h>
 #endif
 
-#include "jemalloc/internal/qr.h"
 #include "jemalloc/internal/ql.h"
 
 /*
diff --git a/include/jemalloc/internal/ql.h b/include/jemalloc/internal/ql.h
index b3a428c7..80290407 100644
--- a/include/jemalloc/internal/ql.h
+++ b/include/jemalloc/internal/ql.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_QL_H
 #define JEMALLOC_INTERNAL_QL_H
 
+#include "jemalloc/internal/qr.h"
+
 /* List definitions. */
 #define ql_head(a_type)							\
 struct {								\
diff --git a/test/unit/qr.c b/test/unit/qr.c
index 80c5c27d..271a1095 100644
--- a/test/unit/qr.c
+++ b/test/unit/qr.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/qr.h"
+
 /* Number of ring entries, in [2..26]. */
 #define NENTRIES 9
 /* Split index, in [1..NENTRIES). */

From 0237870c60104b418ce78b86752bf44b3b478b97 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 10 Apr 2017 17:11:33 -0700
Subject: [PATCH 0788/2608] Header refactoring: break out ql.h dependencies

---
 include/jemalloc/internal/arena_structs_b.h      | 3 +++
 include/jemalloc/internal/ctl_structs.h          | 2 ++
 include/jemalloc/internal/extent_inlines.h       | 2 ++
 include/jemalloc/internal/extent_structs.h       | 1 +
 include/jemalloc/internal/jemalloc_internal.h.in | 2 --
 include/jemalloc/internal/tcache_structs.h       | 2 ++
 include/jemalloc/internal/tsd_structs.h          | 2 ++
 include/jemalloc/internal/tsd_types.h            | 2 ++
 include/jemalloc/internal/witness_inlines.h      | 2 ++
 include/jemalloc/internal/witness_types.h        | 2 ++
 test/unit/ql.c                                   | 2 ++
 11 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index b6ba3dbc..935cd169 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -1,5 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H
 #define JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H
+
+#include "jemalloc/internal/ql.h"
+
 /*
  * Read-only information associated with each element of arena_t's bins array
  * is stored separately, partly to reduce memory usage (only one copy, rather
diff --git a/include/jemalloc/internal/ctl_structs.h b/include/jemalloc/internal/ctl_structs.h
index af0f78b9..2b48a68e 100644
--- a/include/jemalloc/internal/ctl_structs.h
+++ b/include/jemalloc/internal/ctl_structs.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_CTL_STRUCTS_H
 #define JEMALLOC_INTERNAL_CTL_STRUCTS_H
 
+#include "jemalloc/internal/ql.h"
+
 struct ctl_node_s {
 	bool			named;
 };
diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index f1b94776..6fc01017 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_EXTENT_INLINES_H
 #define JEMALLOC_INTERNAL_EXTENT_INLINES_H
 
+#include "jemalloc/internal/ql.h"
+
 #ifndef JEMALLOC_ENABLE_INLINE
 arena_t *extent_arena_get(const extent_t *extent);
 szind_t extent_szind_get_maybe_invalid(const extent_t *extent);
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index 87107a88..2f81fa1c 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_EXTENT_STRUCTS_H
 
 #include "jemalloc/internal/ph.h"
+#include "jemalloc/internal/ql.h"
 
 typedef enum {
 	extent_state_active   = 0,
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
index fdeda7b6..58240e39 100644
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ b/include/jemalloc/internal/jemalloc_internal.h.in
@@ -180,8 +180,6 @@ static const bool have_percpu_arena =
 #include <mach/vm_map.h>
 #endif
 
-#include "jemalloc/internal/ql.h"
-
 /*
  * jemalloc can conceptually be broken into components (arena, tcache, etc.),
  * but there are circular dependencies that cannot be broken without
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index 4e101609..c43e59b7 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_TCACHE_STRUCTS_H
 #define JEMALLOC_INTERNAL_TCACHE_STRUCTS_H
 
+#include "jemalloc/internal/ql.h"
+
 /*
  * Read-only information associated with each element of tcache_t's tbins array
  * is stored separately, mainly to reduce memory usage.
diff --git a/include/jemalloc/internal/tsd_structs.h b/include/jemalloc/internal/tsd_structs.h
index 7f34d1b4..ac74152c 100644
--- a/include/jemalloc/internal/tsd_structs.h
+++ b/include/jemalloc/internal/tsd_structs.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_TSD_STRUCTS_H
 #define JEMALLOC_INTERNAL_TSD_STRUCTS_H
 
+#include "jemalloc/internal/ql.h"
+
 #if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \
     !defined(_WIN32))
 struct tsd_init_block_s {
diff --git a/include/jemalloc/internal/tsd_types.h b/include/jemalloc/internal/tsd_types.h
index 4d5fef57..27afd1d6 100644
--- a/include/jemalloc/internal/tsd_types.h
+++ b/include/jemalloc/internal/tsd_types.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_TSD_TYPES_H
 #define JEMALLOC_INTERNAL_TSD_TYPES_H
 
+#include "jemalloc/internal/ql.h"
+
 /* Maximum number of malloc_tsd users with cleanup functions. */
 #define MALLOC_TSD_CLEANUPS_MAX	2
 
diff --git a/include/jemalloc/internal/witness_inlines.h b/include/jemalloc/internal/witness_inlines.h
index 51f3f6e7..c5027f11 100644
--- a/include/jemalloc/internal/witness_inlines.h
+++ b/include/jemalloc/internal/witness_inlines.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_WITNESS_INLINES_H
 #define JEMALLOC_INTERNAL_WITNESS_INLINES_H
 
+#include "jemalloc/internal/ql.h"
+
 #ifndef JEMALLOC_ENABLE_INLINE
 bool	witness_owner(tsd_t *tsd, const witness_t *witness);
 void	witness_assert_owner(tsdn_t *tsdn, const witness_t *witness);
diff --git a/include/jemalloc/internal/witness_types.h b/include/jemalloc/internal/witness_types.h
index 95fc296c..d43a363b 100644
--- a/include/jemalloc/internal/witness_types.h
+++ b/include/jemalloc/internal/witness_types.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_WITNESS_TYPES_H
 #define JEMALLOC_INTERNAL_WITNESS_TYPES_H
 
+#include "jemalloc/internal/ql.h"
+
 typedef struct witness_s witness_t;
 typedef unsigned witness_rank_t;
 typedef ql_head(witness_t) witness_list_t;
diff --git a/test/unit/ql.c b/test/unit/ql.c
index ae6481fd..b76c24c4 100644
--- a/test/unit/ql.c
+++ b/test/unit/ql.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/ql.h"
+
 /* Number of ring entries, in [2..26]. */
 #define NENTRIES 9
 

From 743d940dc34495e61145bbe4901ae2c2dcd4c4ef Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 10 Apr 2017 18:17:55 -0700
Subject: [PATCH 0789/2608] Header refactoring: Split up jemalloc_internal.h

This is a biggy.  jemalloc_internal.h has been doing multiple jobs for a while
now:
- The source of system-wide definitions.
- The catch-all include file.
- The module header file for jemalloc.c

This commit splits up this functionality.  The system-wide definitions
responsibility has moved to jemalloc_preamble.h.  The catch-all include file is
now jemalloc_internal_includes.h.  The module headers for jemalloc.c are now in
jemalloc_internal_[externs|inlines|types].h, just as they are for the other
modules.
---
 .gitignore                                    |    2 +-
 configure.ac                                  |    6 +-
 .../jemalloc/internal/jemalloc_internal.h.in  | 1303 -----------------
 .../internal/jemalloc_internal_externs.h      |   61 +
 .../internal/jemalloc_internal_includes.h     |  155 ++
 .../internal/jemalloc_internal_inlines_a.h    |  448 ++++++
 .../internal/jemalloc_internal_inlines_b.h    |   83 ++
 .../internal/jemalloc_internal_inlines_c.h    |  220 +++
 .../internal/jemalloc_internal_types.h        |  178 +++
 .../jemalloc/internal/jemalloc_preamble.h.in  |  187 +++
 src/arena.c                                   |    3 +-
 src/base.c                                    |    3 +-
 src/bitmap.c                                  |    3 +-
 src/ckh.c                                     |    3 +-
 src/ctl.c                                     |    3 +-
 src/extent.c                                  |    4 +-
 src/extent_dss.c                              |    4 +-
 src/extent_mmap.c                             |    3 +-
 src/hash.c                                    |    3 +-
 src/hooks.c                                   |    2 +-
 src/jemalloc.c                                |    3 +-
 src/jemalloc_cpp.cpp                          |    3 +-
 src/large.c                                   |    3 +-
 src/malloc_io.c                               |    3 +-
 src/mutex.c                                   |    3 +-
 src/nstime.c                                  |    3 +-
 src/pages.c                                   |    3 +-
 src/prng.c                                    |    3 +-
 src/prof.c                                    |    4 +-
 src/rtree.c                                   |    3 +-
 src/spin.c                                    |    3 +-
 src/stats.c                                   |    3 +-
 src/tcache.c                                  |    3 +-
 src/ticker.c                                  |    3 +-
 src/tsd.c                                     |    3 +-
 src/witness.c                                 |    3 +-
 src/zone.c                                    |    3 +-
 test/include/test/jemalloc_test.h.in          |    7 +-
 38 files changed, 1396 insertions(+), 1337 deletions(-)
 delete mode 100644 include/jemalloc/internal/jemalloc_internal.h.in
 create mode 100644 include/jemalloc/internal/jemalloc_internal_externs.h
 create mode 100644 include/jemalloc/internal/jemalloc_internal_includes.h
 create mode 100644 include/jemalloc/internal/jemalloc_internal_inlines_a.h
 create mode 100644 include/jemalloc/internal/jemalloc_internal_inlines_b.h
 create mode 100644 include/jemalloc/internal/jemalloc_internal_inlines_c.h
 create mode 100644 include/jemalloc/internal/jemalloc_internal_types.h
 create mode 100644 include/jemalloc/internal/jemalloc_preamble.h.in

diff --git a/.gitignore b/.gitignore
index 548c7d1a..9bbc5d66 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,7 +21,7 @@
 
 /Makefile
 
-/include/jemalloc/internal/jemalloc_internal.h
+/include/jemalloc/internal/jemalloc_preamble.h
 /include/jemalloc/internal/jemalloc_internal_defs.h
 /include/jemalloc/internal/private_namespace.h
 /include/jemalloc/internal/private_unnamespace.h
diff --git a/configure.ac b/configure.ac
index 894e72d0..00868133 100644
--- a/configure.ac
+++ b/configure.ac
@@ -875,7 +875,7 @@ cfgoutputs_in="${cfgoutputs_in} doc/jemalloc.xml.in"
 cfgoutputs_in="${cfgoutputs_in} include/jemalloc/jemalloc_macros.h.in"
 cfgoutputs_in="${cfgoutputs_in} include/jemalloc/jemalloc_protos.h.in"
 cfgoutputs_in="${cfgoutputs_in} include/jemalloc/jemalloc_typedefs.h.in"
-cfgoutputs_in="${cfgoutputs_in} include/jemalloc/internal/jemalloc_internal.h.in"
+cfgoutputs_in="${cfgoutputs_in} include/jemalloc/internal/jemalloc_preamble.h.in"
 cfgoutputs_in="${cfgoutputs_in} test/test.sh.in"
 cfgoutputs_in="${cfgoutputs_in} test/include/test/jemalloc_test.h.in"
 
@@ -887,7 +887,7 @@ cfgoutputs_out="${cfgoutputs_out} doc/jemalloc.xml"
 cfgoutputs_out="${cfgoutputs_out} include/jemalloc/jemalloc_macros.h"
 cfgoutputs_out="${cfgoutputs_out} include/jemalloc/jemalloc_protos.h"
 cfgoutputs_out="${cfgoutputs_out} include/jemalloc/jemalloc_typedefs.h"
-cfgoutputs_out="${cfgoutputs_out} include/jemalloc/internal/jemalloc_internal.h"
+cfgoutputs_out="${cfgoutputs_out} include/jemalloc/internal/jemalloc_preamble.h"
 cfgoutputs_out="${cfgoutputs_out} test/test.sh"
 cfgoutputs_out="${cfgoutputs_out} test/include/test/jemalloc_test.h"
 
@@ -899,7 +899,7 @@ cfgoutputs_tup="${cfgoutputs_tup} doc/jemalloc.xml:doc/jemalloc.xml.in"
 cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/jemalloc_macros.h:include/jemalloc/jemalloc_macros.h.in"
 cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/jemalloc_protos.h:include/jemalloc/jemalloc_protos.h.in"
 cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/jemalloc_typedefs.h:include/jemalloc/jemalloc_typedefs.h.in"
-cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/internal/jemalloc_internal.h"
+cfgoutputs_tup="${cfgoutputs_tup} include/jemalloc/internal/jemalloc_preamble.h"
 cfgoutputs_tup="${cfgoutputs_tup} test/test.sh:test/test.sh.in"
 cfgoutputs_tup="${cfgoutputs_tup} test/include/test/jemalloc_test.h:test/include/test/jemalloc_test.h.in"
 
diff --git a/include/jemalloc/internal/jemalloc_internal.h.in b/include/jemalloc/internal/jemalloc_internal.h.in
deleted file mode 100644
index 58240e39..00000000
--- a/include/jemalloc/internal/jemalloc_internal.h.in
+++ /dev/null
@@ -1,1303 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_H
-#define JEMALLOC_INTERNAL_H
-
-#ifdef __cplusplus
-#  define CPP_PROLOGUE extern "C" {
-#  define CPP_EPILOGUE }
-#else
-#  define CPP_PROLOGUE
-#  define CPP_EPILOGUE
-#endif
-
-CPP_PROLOGUE
-
-#include "jemalloc_internal_defs.h"
-#include "jemalloc/internal/jemalloc_internal_decls.h"
-
-#ifdef JEMALLOC_UTRACE
-#include <sys/ktrace.h>
-#endif
-
-#define JEMALLOC_NO_DEMANGLE
-#ifdef JEMALLOC_JET
-#  define JEMALLOC_N(n) jet_##n
-#  include "jemalloc/internal/public_namespace.h"
-#  define JEMALLOC_NO_RENAME
-#  include "../jemalloc@install_suffix@.h"
-#  undef JEMALLOC_NO_RENAME
-#else
-#  define JEMALLOC_N(n) @private_namespace@##n
-#  include "../jemalloc@install_suffix@.h"
-#endif
-
-/*
- * Note that the ordering matters here; the hook itself is name-mangled.  We
- * want the inclusion of hooks to happen early, so that we hook as much as
- * possible.
- */
-#include "jemalloc/internal/private_namespace.h"
-#include "jemalloc/internal/hooks.h"
-
-static const bool config_debug =
-#ifdef JEMALLOC_DEBUG
-    true
-#else
-    false
-#endif
-    ;
-static const bool have_dss =
-#ifdef JEMALLOC_DSS
-    true
-#else
-    false
-#endif
-    ;
-static const bool config_fill =
-#ifdef JEMALLOC_FILL
-    true
-#else
-    false
-#endif
-    ;
-static const bool config_lazy_lock =
-#ifdef JEMALLOC_LAZY_LOCK
-    true
-#else
-    false
-#endif
-    ;
-static const char * const config_malloc_conf = JEMALLOC_CONFIG_MALLOC_CONF;
-static const bool config_prof =
-#ifdef JEMALLOC_PROF
-    true
-#else
-    false
-#endif
-    ;
-static const bool config_prof_libgcc =
-#ifdef JEMALLOC_PROF_LIBGCC
-    true
-#else
-    false
-#endif
-    ;
-static const bool config_prof_libunwind =
-#ifdef JEMALLOC_PROF_LIBUNWIND
-    true
-#else
-    false
-#endif
-    ;
-static const bool maps_coalesce =
-#ifdef JEMALLOC_MAPS_COALESCE
-    true
-#else
-    false
-#endif
-    ;
-static const bool config_munmap =
-#ifdef JEMALLOC_MUNMAP
-    true
-#else
-    false
-#endif
-    ;
-static const bool config_stats =
-#ifdef JEMALLOC_STATS
-    true
-#else
-    false
-#endif
-    ;
-static const bool config_tcache =
-#ifdef JEMALLOC_TCACHE
-    true
-#else
-    false
-#endif
-    ;
-static const bool config_tls =
-#ifdef JEMALLOC_TLS
-    true
-#else
-    false
-#endif
-    ;
-static const bool config_utrace =
-#ifdef JEMALLOC_UTRACE
-    true
-#else
-    false
-#endif
-    ;
-static const bool config_xmalloc =
-#ifdef JEMALLOC_XMALLOC
-    true
-#else
-    false
-#endif
-    ;
-static const bool config_ivsalloc =
-#ifdef JEMALLOC_IVSALLOC
-    true
-#else
-    false
-#endif
-    ;
-static const bool config_cache_oblivious =
-#ifdef JEMALLOC_CACHE_OBLIVIOUS
-    true
-#else
-    false
-#endif
-    ;
-static const bool have_thp =
-#ifdef JEMALLOC_THP
-    true
-#else
-    false
-#endif
-    ;
-#ifdef JEMALLOC_HAVE_SCHED_GETCPU
-/* Currently percpu_arena depends on sched_getcpu. */
-#define JEMALLOC_PERCPU_ARENA
-#endif
-static const bool have_percpu_arena =
-#ifdef JEMALLOC_PERCPU_ARENA
-    true
-#else
-    false
-#endif
-    ;
-
-#if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN))
-#include <libkern/OSAtomic.h>
-#endif
-
-#ifdef JEMALLOC_ZONE
-#include <mach/mach_error.h>
-#include <mach/mach_init.h>
-#include <mach/vm_map.h>
-#endif
-
-/*
- * jemalloc can conceptually be broken into components (arena, tcache, etc.),
- * but there are circular dependencies that cannot be broken without
- * substantial performance degradation.
- *
- * Historically, we dealt with this by each header into four sections (types,
- * structs, externs, and inlines), and included each header file multiple times
- * in this file, picking out the portion we want on each pass using the
- * following #defines:
- *   JEMALLOC_H_TYPES   : Preprocessor-defined constants and psuedo-opaque data
- *                        types.
- *   JEMALLOC_H_STRUCTS : Data structures.
- *   JEMALLOC_H_EXTERNS : Extern data declarations and function prototypes.
- *   JEMALLOC_H_INLINES : Inline functions.
- *
- * We're moving toward a world in which the dependencies are explicit; each file
- * will #include the headers it depends on (rather than relying on them being
- * implicitly available via this file including every header file in the
- * project).
- *
- * We're now in an intermediate state: we've broken up the header files to avoid
- * having to include each one multiple times, but have not yet moved the
- * dependency information into the header files (i.e. we still rely on the
- * ordering in this file to ensure all a header's dependencies are available in
- * its translation unit).  Each component is now broken up into multiple header
- * files, corresponding to the sections above (e.g. instead of "tsd.h", we now
- * have "tsd_types.h", "tsd_structs.h", "tsd_externs.h", "tsd_inlines.h").
- *
- * Those files which have been converted to explicitly include their
- * inter-component dependencies are now in the initial HERMETIC HEADERS
- * section.  These headers may still rely on this file for system headers and
- * global jemalloc headers, however.
- */
-
-#include "jemalloc/internal/jemalloc_internal_macros.h"
-
-/******************************************************************************/
-/* HERMETIC HEADERS */
-/******************************************************************************/
-
-#include "jemalloc/internal/assert.h"
-#include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/bit_util.h"
-#include "jemalloc/internal/malloc_io.h"
-#include "jemalloc/internal/util.h"
-
-/******************************************************************************/
-/* TYPES */
-/******************************************************************************/
-
-/* Page size index type. */
-typedef unsigned pszind_t;
-
-/* Size class index type. */
-typedef unsigned szind_t;
-
-/* Processor / core id type. */
-typedef int malloc_cpuid_t;
-
-/*
- * Flags bits:
- *
- * a: arena
- * t: tcache
- * 0: unused
- * z: zero
- * n: alignment
- *
- * aaaaaaaa aaaatttt tttttttt 0znnnnnn
- */
-#define MALLOCX_ARENA_BITS	12
-#define MALLOCX_TCACHE_BITS	12
-#define MALLOCX_LG_ALIGN_BITS	6
-#define MALLOCX_ARENA_SHIFT	20
-#define MALLOCX_TCACHE_SHIFT	8
-#define MALLOCX_ARENA_MASK \
-    (((1 << MALLOCX_ARENA_BITS) - 1) << MALLOCX_ARENA_SHIFT)
-/* NB: Arena index bias decreases the maximum number of arenas by 1. */
-#define MALLOCX_ARENA_MAX	((1 << MALLOCX_ARENA_BITS) - 2)
-#define MALLOCX_TCACHE_MASK \
-    (((1 << MALLOCX_TCACHE_BITS) - 1) << MALLOCX_TCACHE_SHIFT)
-#define MALLOCX_TCACHE_MAX	((1 << MALLOCX_TCACHE_BITS) - 3)
-#define MALLOCX_LG_ALIGN_MASK	((1 << MALLOCX_LG_ALIGN_BITS) - 1)
-/* Use MALLOCX_ALIGN_GET() if alignment may not be specified in flags. */
-#define MALLOCX_ALIGN_GET_SPECIFIED(flags)				\
-    (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK))
-#define MALLOCX_ALIGN_GET(flags)					\
-    (MALLOCX_ALIGN_GET_SPECIFIED(flags) & (SIZE_T_MAX-1))
-#define MALLOCX_ZERO_GET(flags)						\
-    ((bool)(flags & MALLOCX_ZERO))
-
-#define MALLOCX_TCACHE_GET(flags)					\
-    (((unsigned)((flags & MALLOCX_TCACHE_MASK) >> MALLOCX_TCACHE_SHIFT)) - 2)
-#define MALLOCX_ARENA_GET(flags)					\
-    (((unsigned)(((unsigned)flags) >> MALLOCX_ARENA_SHIFT)) - 1)
-
-/* Smallest size class to support. */
-#define TINY_MIN		(1U << LG_TINY_MIN)
-
-/*
- * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size
- * classes).
- */
-#ifndef LG_QUANTUM
-#  if (defined(__i386__) || defined(_M_IX86))
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __ia64__
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __alpha__
-#    define LG_QUANTUM		4
-#  endif
-#  if (defined(__sparc64__) || defined(__sparcv9) || defined(__sparc_v9__))
-#    define LG_QUANTUM		4
-#  endif
-#  if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64))
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __arm__
-#    define LG_QUANTUM		3
-#  endif
-#  ifdef __aarch64__
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __hppa__
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __mips__
-#    define LG_QUANTUM		3
-#  endif
-#  ifdef __or1k__
-#    define LG_QUANTUM		3
-#  endif
-#  ifdef __powerpc__
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __riscv__
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __s390__
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __SH4__
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __tile__
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __le32__
-#    define LG_QUANTUM		4
-#  endif
-#  ifndef LG_QUANTUM
-#    error "Unknown minimum alignment for architecture; specify via "
-	 "--with-lg-quantum"
-#  endif
-#endif
-
-#define QUANTUM			((size_t)(1U << LG_QUANTUM))
-#define QUANTUM_MASK		(QUANTUM - 1)
-
-/* Return the smallest quantum multiple that is >= a. */
-#define QUANTUM_CEILING(a)						\
-	(((a) + QUANTUM_MASK) & ~QUANTUM_MASK)
-
-#define LONG			((size_t)(1U << LG_SIZEOF_LONG))
-#define LONG_MASK		(LONG - 1)
-
-/* Return the smallest long multiple that is >= a. */
-#define LONG_CEILING(a)							\
-	(((a) + LONG_MASK) & ~LONG_MASK)
-
-#define SIZEOF_PTR		(1U << LG_SIZEOF_PTR)
-#define PTR_MASK		(SIZEOF_PTR - 1)
-
-/* Return the smallest (void *) multiple that is >= a. */
-#define PTR_CEILING(a)							\
-	(((a) + PTR_MASK) & ~PTR_MASK)
-
-/*
- * Maximum size of L1 cache line.  This is used to avoid cache line aliasing.
- * In addition, this controls the spacing of cacheline-spaced size classes.
- *
- * CACHELINE cannot be based on LG_CACHELINE because __declspec(align()) can
- * only handle raw constants.
- */
-#define LG_CACHELINE		6
-#define CACHELINE		64
-#define CACHELINE_MASK		(CACHELINE - 1)
-
-/* Return the smallest cacheline multiple that is >= s. */
-#define CACHELINE_CEILING(s)						\
-	(((s) + CACHELINE_MASK) & ~CACHELINE_MASK)
-
-/* Return the nearest aligned address at or below a. */
-#define ALIGNMENT_ADDR2BASE(a, alignment)				\
-	((void *)((uintptr_t)(a) & ((~(alignment)) + 1)))
-
-/* Return the offset between a and the nearest aligned address at or below a. */
-#define ALIGNMENT_ADDR2OFFSET(a, alignment)				\
-	((size_t)((uintptr_t)(a) & (alignment - 1)))
-
-/* Return the smallest alignment multiple that is >= s. */
-#define ALIGNMENT_CEILING(s, alignment)					\
-	(((s) + (alignment - 1)) & ((~(alignment)) + 1))
-
-/* Declare a variable-length array. */
-#if __STDC_VERSION__ < 199901L
-#  ifdef _MSC_VER
-#    include <malloc.h>
-#    define alloca _alloca
-#  else
-#    ifdef JEMALLOC_HAS_ALLOCA_H
-#      include <alloca.h>
-#    else
-#      include <stdlib.h>
-#    endif
-#  endif
-#  define VARIABLE_ARRAY(type, name, count) \
-	type *name = alloca(sizeof(type) * (count))
-#else
-#  define VARIABLE_ARRAY(type, name, count) type name[(count)]
-#endif
-
-#include "jemalloc/internal/nstime_types.h"
-#include "jemalloc/internal/spin_types.h"
-#include "jemalloc/internal/prng_types.h"
-#include "jemalloc/internal/ticker_types.h"
-#include "jemalloc/internal/ckh_types.h"
-#include "jemalloc/internal/size_classes.h"
-#include "jemalloc/internal/smoothstep.h"
-#include "jemalloc/internal/stats_types.h"
-#include "jemalloc/internal/ctl_types.h"
-#include "jemalloc/internal/witness_types.h"
-#include "jemalloc/internal/mutex_types.h"
-#include "jemalloc/internal/tsd_types.h"
-#include "jemalloc/internal/extent_types.h"
-#include "jemalloc/internal/extent_dss_types.h"
-#include "jemalloc/internal/base_types.h"
-#include "jemalloc/internal/arena_types.h"
-#include "jemalloc/internal/bitmap_types.h"
-#include "jemalloc/internal/rtree_types.h"
-#include "jemalloc/internal/pages_types.h"
-#include "jemalloc/internal/tcache_types.h"
-#include "jemalloc/internal/prof_types.h"
-
-
-/******************************************************************************/
-/* STRUCTS */
-/******************************************************************************/
-
-#include "jemalloc/internal/nstime_structs.h"
-#include "jemalloc/internal/spin_structs.h"
-#include "jemalloc/internal/ticker_structs.h"
-#include "jemalloc/internal/ckh_structs.h"
-#include "jemalloc/internal/witness_structs.h"
-#include "jemalloc/internal/mutex_structs.h"
-#include "jemalloc/internal/stats_structs.h"
-#include "jemalloc/internal/ctl_structs.h"
-#include "jemalloc/internal/bitmap_structs.h"
-#include "jemalloc/internal/arena_structs_a.h"
-#include "jemalloc/internal/extent_structs.h"
-#include "jemalloc/internal/extent_dss_structs.h"
-#include "jemalloc/internal/base_structs.h"
-#include "jemalloc/internal/prof_structs.h"
-#include "jemalloc/internal/arena_structs_b.h"
-#include "jemalloc/internal/rtree_structs.h"
-#include "jemalloc/internal/tcache_structs.h"
-#include "jemalloc/internal/tsd_structs.h"
-
-
-/******************************************************************************/
-/* EXTERNS */
-/******************************************************************************/
-
-extern bool	opt_abort;
-extern const char	*opt_junk;
-extern bool	opt_junk_alloc;
-extern bool	opt_junk_free;
-extern bool	opt_utrace;
-extern bool	opt_xmalloc;
-extern bool	opt_zero;
-extern unsigned	opt_narenas;
-
-/* Number of CPUs. */
-extern unsigned	ncpus;
-
-/* Number of arenas used for automatic multiplexing of threads and arenas. */
-extern unsigned	narenas_auto;
-
-/*
- * Arenas that are used to service external requests.  Not all elements of the
- * arenas array are necessarily used; arenas are created lazily as needed.
- */
-extern atomic_p_t arenas[];
-
-/*
- * pind2sz_tab encodes the same information as could be computed by
- * pind2sz_compute().
- */
-extern size_t const	pind2sz_tab[NPSIZES+1];
-/*
- * index2size_tab encodes the same information as could be computed (at
- * unacceptable cost in some code paths) by index2size_compute().
- */
-extern size_t const	index2size_tab[NSIZES];
-/*
- * size2index_tab is a compact lookup table that rounds request sizes up to
- * size classes.  In order to reduce cache footprint, the table is compressed,
- * and all accesses are via size2index().
- */
-extern uint8_t const	size2index_tab[];
-
-void *a0malloc(size_t size);
-void a0dalloc(void *ptr);
-void *bootstrap_malloc(size_t size);
-void *bootstrap_calloc(size_t num, size_t size);
-void bootstrap_free(void *ptr);
-void arena_set(unsigned ind, arena_t *arena);
-unsigned narenas_total_get(void);
-arena_t *arena_init(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
-arena_tdata_t *arena_tdata_get_hard(tsd_t *tsd, unsigned ind);
-arena_t *arena_choose_hard(tsd_t *tsd, bool internal);
-void arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind);
-void iarena_cleanup(tsd_t *tsd);
-void arena_cleanup(tsd_t *tsd);
-void arenas_tdata_cleanup(tsd_t *tsd);
-void jemalloc_prefork(void);
-void jemalloc_postfork_parent(void);
-void jemalloc_postfork_child(void);
-bool malloc_initialized(void);
-
-#include "jemalloc/internal/nstime_externs.h"
-#include "jemalloc/internal/ckh_externs.h"
-#include "jemalloc/internal/stats_externs.h"
-#include "jemalloc/internal/ctl_externs.h"
-#include "jemalloc/internal/witness_externs.h"
-#include "jemalloc/internal/mutex_externs.h"
-#include "jemalloc/internal/bitmap_externs.h"
-#include "jemalloc/internal/extent_externs.h"
-#include "jemalloc/internal/extent_dss_externs.h"
-#include "jemalloc/internal/extent_mmap_externs.h"
-#include "jemalloc/internal/base_externs.h"
-#include "jemalloc/internal/arena_externs.h"
-#include "jemalloc/internal/rtree_externs.h"
-#include "jemalloc/internal/pages_externs.h"
-#include "jemalloc/internal/large_externs.h"
-#include "jemalloc/internal/tcache_externs.h"
-#include "jemalloc/internal/prof_externs.h"
-#include "jemalloc/internal/tsd_externs.h"
-
-/******************************************************************************/
-/* INLINES */
-/******************************************************************************/
-
-#include "jemalloc/internal/spin_inlines.h"
-#include "jemalloc/internal/prng_inlines.h"
-#include "jemalloc/internal/ticker_inlines.h"
-#include "jemalloc/internal/tsd_inlines.h"
-#include "jemalloc/internal/witness_inlines.h"
-#include "jemalloc/internal/mutex_inlines.h"
-
-#ifndef JEMALLOC_ENABLE_INLINE
-pszind_t psz2ind(size_t psz);
-size_t pind2sz_compute(pszind_t pind);
-size_t pind2sz_lookup(pszind_t pind);
-size_t pind2sz(pszind_t pind);
-size_t psz2u(size_t psz);
-szind_t size2index_compute(size_t size);
-szind_t size2index_lookup(size_t size);
-szind_t size2index(size_t size);
-size_t index2size_compute(szind_t index);
-size_t index2size_lookup(szind_t index);
-size_t index2size(szind_t index);
-size_t s2u_compute(size_t size);
-size_t s2u_lookup(size_t size);
-size_t s2u(size_t size);
-size_t sa2u(size_t size, size_t alignment);
-arena_t *arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal);
-arena_t *arena_choose(tsd_t *tsd, arena_t *arena);
-arena_t *arena_ichoose(tsd_t *tsd, arena_t *arena);
-arena_tdata_t *arena_tdata_get(tsd_t *tsd, unsigned ind,
-    bool refresh_if_missing);
-arena_t *arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing);
-ticker_t *decay_ticker_get(tsd_t *tsd, unsigned ind);
-bool tcache_available(tsd_t *tsd);
-tcache_bin_t *tcache_small_bin_get(tcache_t *tcache, szind_t binind);
-tcache_bin_t *tcache_large_bin_get(tcache_t *tcache, szind_t binind);
-tcache_t *tcache_get(tsd_t *tsd);
-malloc_cpuid_t malloc_getcpu(void);
-unsigned percpu_arena_choose(void);
-unsigned percpu_arena_ind_limit(void);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
-JEMALLOC_ALWAYS_INLINE pszind_t
-psz2ind(size_t psz) {
-	if (unlikely(psz > LARGE_MAXCLASS)) {
-		return NPSIZES;
-	}
-	{
-		pszind_t x = lg_floor((psz<<1)-1);
-		pszind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_PAGE) ? 0 : x -
-		    (LG_SIZE_CLASS_GROUP + LG_PAGE);
-		pszind_t grp = shift << LG_SIZE_CLASS_GROUP;
-
-		pszind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ?
-		    LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1;
-
-		size_t delta_inverse_mask = ZD(-1) << lg_delta;
-		pszind_t mod = ((((psz-1) & delta_inverse_mask) >> lg_delta)) &
-		    ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
-
-		pszind_t ind = grp + mod;
-		return ind;
-	}
-}
-
-JEMALLOC_INLINE size_t
-pind2sz_compute(pszind_t pind) {
-	if (unlikely(pind == NPSIZES)) {
-		return LARGE_MAXCLASS + PAGE;
-	}
-	{
-		size_t grp = pind >> LG_SIZE_CLASS_GROUP;
-		size_t mod = pind & ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
-
-		size_t grp_size_mask = ~((!!grp)-1);
-		size_t grp_size = ((ZU(1) << (LG_PAGE +
-		    (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask;
-
-		size_t shift = (grp == 0) ? 1 : grp;
-		size_t lg_delta = shift + (LG_PAGE-1);
-		size_t mod_size = (mod+1) << lg_delta;
-
-		size_t sz = grp_size + mod_size;
-		return sz;
-	}
-}
-
-JEMALLOC_INLINE size_t
-pind2sz_lookup(pszind_t pind) {
-	size_t ret = (size_t)pind2sz_tab[pind];
-	assert(ret == pind2sz_compute(pind));
-	return ret;
-}
-
-JEMALLOC_INLINE size_t
-pind2sz(pszind_t pind) {
-	assert(pind < NPSIZES+1);
-	return pind2sz_lookup(pind);
-}
-
-JEMALLOC_INLINE size_t
-psz2u(size_t psz) {
-	if (unlikely(psz > LARGE_MAXCLASS)) {
-		return LARGE_MAXCLASS + PAGE;
-	}
-	{
-		size_t x = lg_floor((psz<<1)-1);
-		size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ?
-		    LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1;
-		size_t delta = ZU(1) << lg_delta;
-		size_t delta_mask = delta - 1;
-		size_t usize = (psz + delta_mask) & ~delta_mask;
-		return usize;
-	}
-}
-
-JEMALLOC_INLINE szind_t
-size2index_compute(size_t size) {
-	if (unlikely(size > LARGE_MAXCLASS)) {
-		return NSIZES;
-	}
-#if (NTBINS != 0)
-	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
-		szind_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
-		szind_t lg_ceil = lg_floor(pow2_ceil_zu(size));
-		return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin);
-	}
-#endif
-	{
-		szind_t x = lg_floor((size<<1)-1);
-		szind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 :
-		    x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM);
-		szind_t grp = shift << LG_SIZE_CLASS_GROUP;
-
-		szind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
-		    ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
-
-		size_t delta_inverse_mask = ZD(-1) << lg_delta;
-		szind_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) &
-		    ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
-
-		szind_t index = NTBINS + grp + mod;
-		return index;
-	}
-}
-
-JEMALLOC_ALWAYS_INLINE szind_t
-size2index_lookup(size_t size) {
-	assert(size <= LOOKUP_MAXCLASS);
-	{
-		szind_t ret = (size2index_tab[(size-1) >> LG_TINY_MIN]);
-		assert(ret == size2index_compute(size));
-		return ret;
-	}
-}
-
-JEMALLOC_ALWAYS_INLINE szind_t
-size2index(size_t size) {
-	assert(size > 0);
-	if (likely(size <= LOOKUP_MAXCLASS)) {
-		return size2index_lookup(size);
-	}
-	return size2index_compute(size);
-}
-
-JEMALLOC_INLINE size_t
-index2size_compute(szind_t index) {
-#if (NTBINS > 0)
-	if (index < NTBINS) {
-		return (ZU(1) << (LG_TINY_MAXCLASS - NTBINS + 1 + index));
-	}
-#endif
-	{
-		size_t reduced_index = index - NTBINS;
-		size_t grp = reduced_index >> LG_SIZE_CLASS_GROUP;
-		size_t mod = reduced_index & ((ZU(1) << LG_SIZE_CLASS_GROUP) -
-		    1);
-
-		size_t grp_size_mask = ~((!!grp)-1);
-		size_t grp_size = ((ZU(1) << (LG_QUANTUM +
-		    (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask;
-
-		size_t shift = (grp == 0) ? 1 : grp;
-		size_t lg_delta = shift + (LG_QUANTUM-1);
-		size_t mod_size = (mod+1) << lg_delta;
-
-		size_t usize = grp_size + mod_size;
-		return usize;
-	}
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-index2size_lookup(szind_t index) {
-	size_t ret = (size_t)index2size_tab[index];
-	assert(ret == index2size_compute(index));
-	return ret;
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-index2size(szind_t index) {
-	assert(index < NSIZES);
-	return index2size_lookup(index);
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-s2u_compute(size_t size) {
-	if (unlikely(size > LARGE_MAXCLASS)) {
-		return 0;
-	}
-#if (NTBINS > 0)
-	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
-		size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
-		size_t lg_ceil = lg_floor(pow2_ceil_zu(size));
-		return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) :
-		    (ZU(1) << lg_ceil));
-	}
-#endif
-	{
-		size_t x = lg_floor((size<<1)-1);
-		size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
-		    ?  LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
-		size_t delta = ZU(1) << lg_delta;
-		size_t delta_mask = delta - 1;
-		size_t usize = (size + delta_mask) & ~delta_mask;
-		return usize;
-	}
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-s2u_lookup(size_t size) {
-	size_t ret = index2size_lookup(size2index_lookup(size));
-
-	assert(ret == s2u_compute(size));
-	return ret;
-}
-
-/*
- * Compute usable size that would result from allocating an object with the
- * specified size.
- */
-JEMALLOC_ALWAYS_INLINE size_t
-s2u(size_t size) {
-	assert(size > 0);
-	if (likely(size <= LOOKUP_MAXCLASS)) {
-		return s2u_lookup(size);
-	}
-	return s2u_compute(size);
-}
-
-/*
- * Compute usable size that would result from allocating an object with the
- * specified size and alignment.
- */
-JEMALLOC_ALWAYS_INLINE size_t
-sa2u(size_t size, size_t alignment) {
-	size_t usize;
-
-	assert(alignment != 0 && ((alignment - 1) & alignment) == 0);
-
-	/* Try for a small size class. */
-	if (size <= SMALL_MAXCLASS && alignment < PAGE) {
-		/*
-		 * Round size up to the nearest multiple of alignment.
-		 *
-		 * This done, we can take advantage of the fact that for each
-		 * small size class, every object is aligned at the smallest
-		 * power of two that is non-zero in the base two representation
-		 * of the size.  For example:
-		 *
-		 *   Size |   Base 2 | Minimum alignment
-		 *   -----+----------+------------------
-		 *     96 |  1100000 |  32
-		 *    144 | 10100000 |  32
-		 *    192 | 11000000 |  64
-		 */
-		usize = s2u(ALIGNMENT_CEILING(size, alignment));
-		if (usize < LARGE_MINCLASS) {
-			return usize;
-		}
-	}
-
-	/* Large size class.  Beware of overflow. */
-
-	if (unlikely(alignment > LARGE_MAXCLASS)) {
-		return 0;
-	}
-
-	/* Make sure result is a large size class. */
-	if (size <= LARGE_MINCLASS) {
-		usize = LARGE_MINCLASS;
-	} else {
-		usize = s2u(size);
-		if (usize < size) {
-			/* size_t overflow. */
-			return 0;
-		}
-	}
-
-	/*
-	 * Calculate the multi-page mapping that large_palloc() would need in
-	 * order to guarantee the alignment.
-	 */
-	if (usize + large_pad + PAGE_CEILING(alignment) - PAGE < usize) {
-		/* size_t overflow. */
-		return 0;
-	}
-	return usize;
-}
-
-JEMALLOC_ALWAYS_INLINE malloc_cpuid_t
-malloc_getcpu(void) {
-	assert(have_percpu_arena);
-#if defined(JEMALLOC_HAVE_SCHED_GETCPU)
-	return (malloc_cpuid_t)sched_getcpu();
-#else
-	not_reached();
-	return -1;
-#endif
-}
-
-/* Return the chosen arena index based on current cpu. */
-JEMALLOC_ALWAYS_INLINE unsigned
-percpu_arena_choose(void) {
-	unsigned arena_ind;
-	assert(have_percpu_arena && (percpu_arena_mode != percpu_arena_disabled));
-
-	malloc_cpuid_t cpuid = malloc_getcpu();
-	assert(cpuid >= 0);
-	if ((percpu_arena_mode == percpu_arena) ||
-	    ((unsigned)cpuid < ncpus / 2)) {
-		arena_ind = cpuid;
-	} else {
-		assert(percpu_arena_mode == per_phycpu_arena);
-		/* Hyper threads on the same physical CPU share arena. */
-		arena_ind = cpuid - ncpus / 2;
-	}
-
-	return arena_ind;
-}
-
-/* Return the limit of percpu auto arena range, i.e. arenas[0...ind_limit). */
-JEMALLOC_ALWAYS_INLINE unsigned
-percpu_arena_ind_limit(void) {
-	assert(have_percpu_arena && (percpu_arena_mode != percpu_arena_disabled));
-	if (percpu_arena_mode == per_phycpu_arena && ncpus > 1) {
-		if (ncpus % 2) {
-			/* This likely means a misconfig. */
-			return ncpus / 2 + 1;
-		}
-		return ncpus / 2;
-	} else {
-		return ncpus;
-	}
-}
-
-JEMALLOC_INLINE arena_tdata_t *
-arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing) {
-	arena_tdata_t *tdata;
-	arena_tdata_t *arenas_tdata = tsd_arenas_tdata_get(tsd);
-
-	if (unlikely(arenas_tdata == NULL)) {
-		/* arenas_tdata hasn't been initialized yet. */
-		return arena_tdata_get_hard(tsd, ind);
-	}
-	if (unlikely(ind >= tsd_narenas_tdata_get(tsd))) {
-		/*
-		 * ind is invalid, cache is old (too small), or tdata to be
-		 * initialized.
-		 */
-		return (refresh_if_missing ? arena_tdata_get_hard(tsd, ind) :
-		    NULL);
-	}
-
-	tdata = &arenas_tdata[ind];
-	if (likely(tdata != NULL) || !refresh_if_missing) {
-		return tdata;
-	}
-	return arena_tdata_get_hard(tsd, ind);
-}
-
-JEMALLOC_INLINE arena_t *
-arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing) {
-	arena_t *ret;
-
-	assert(ind <= MALLOCX_ARENA_MAX);
-
-	ret = (arena_t *)atomic_load_p(&arenas[ind], ATOMIC_ACQUIRE);
-	if (unlikely(ret == NULL)) {
-		if (init_if_missing) {
-			ret = arena_init(tsdn, ind,
-			    (extent_hooks_t *)&extent_hooks_default);
-		}
-	}
-	return ret;
-}
-
-JEMALLOC_INLINE ticker_t *
-decay_ticker_get(tsd_t *tsd, unsigned ind) {
-	arena_tdata_t *tdata;
-
-	tdata = arena_tdata_get(tsd, ind, true);
-	if (unlikely(tdata == NULL)) {
-		return NULL;
-	}
-	return &tdata->decay_ticker;
-}
-
-JEMALLOC_ALWAYS_INLINE tcache_bin_t *
-tcache_small_bin_get(tcache_t *tcache, szind_t binind) {
-	assert(binind < NBINS);
-	return &tcache->tbins_small[binind];
-}
-
-JEMALLOC_ALWAYS_INLINE tcache_bin_t *
-tcache_large_bin_get(tcache_t *tcache, szind_t binind) {
-	assert(binind >= NBINS &&binind < nhbins);
-	return &tcache->tbins_large[binind - NBINS];
-}
-
-JEMALLOC_ALWAYS_INLINE bool
-tcache_available(tsd_t *tsd) {
-	cassert(config_tcache);
-
-	/*
-	 * Thread specific auto tcache might be unavailable if: 1) during tcache
-	 * initialization, or 2) disabled through thread.tcache.enabled mallctl
-	 * or config options.  This check covers all cases.
-	 */
-	if (likely(tsd_tcache_enabled_get(tsd) == true)) {
-		/* Associated arena == null implies tcache init in progress. */
-		if (tsd_tcachep_get(tsd)->arena != NULL) {
-			assert(tcache_small_bin_get(tsd_tcachep_get(tsd),
-			    0)->avail != NULL);
-		}
-		return true;
-	}
-
-	return false;
-}
-
-JEMALLOC_ALWAYS_INLINE tcache_t *
-tcache_get(tsd_t *tsd) {
-	if (!config_tcache) {
-		return NULL;
-	}
-	if (!tcache_available(tsd)) {
-		return NULL;
-	}
-
-	return tsd_tcachep_get(tsd);
-}
-#endif
-
-#include "jemalloc/internal/rtree_inlines.h"
-#include "jemalloc/internal/base_inlines.h"
-#include "jemalloc/internal/bitmap_inlines.h"
-/*
- * Include portions of arena code interleaved with tcache code in order to
- * resolve circular dependencies.
- */
-#include "jemalloc/internal/prof_inlines_a.h"
-#include "jemalloc/internal/arena_inlines_a.h"
-#include "jemalloc/internal/extent_inlines.h"
-
-#ifndef JEMALLOC_ENABLE_INLINE
-extent_t	*iealloc(tsdn_t *tsdn, const void *ptr);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
-/* Choose an arena based on a per-thread value. */
-JEMALLOC_INLINE arena_t *
-arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {
-	arena_t *ret;
-
-	if (arena != NULL) {
-		return arena;
-	}
-
-	/* During reentrancy, arena 0 is the safest bet. */
-	if (*tsd_reentrancy_levelp_get(tsd) > 1) {
-		return arena_get(tsd_tsdn(tsd), 0, true);
-	}
-
-	ret = internal ? tsd_iarena_get(tsd) : tsd_arena_get(tsd);
-	if (unlikely(ret == NULL)) {
-		ret = arena_choose_hard(tsd, internal);
-		assert(ret);
-		if (config_tcache && tcache_available(tsd)) {
-			tcache_t *tcache = tcache_get(tsd);
-			if (tcache->arena != NULL) {
-				/* See comments in tcache_data_init().*/
-				assert(tcache->arena ==
-				    arena_get(tsd_tsdn(tsd), 0, false));
-				if (tcache->arena != ret) {
-					tcache_arena_reassociate(tsd_tsdn(tsd),
-					    tcache, ret);
-				}
-			} else {
-				tcache_arena_associate(tsd_tsdn(tsd), tcache,
-				    ret);
-			}
-		}
-	}
-
-	/*
-	 * Note that for percpu arena, if the current arena is outside of the
-	 * auto percpu arena range, (i.e. thread is assigned to a manually
-	 * managed arena), then percpu arena is skipped.
-	 */
-	if (have_percpu_arena && (percpu_arena_mode != percpu_arena_disabled) &&
-	    (arena_ind_get(ret) < percpu_arena_ind_limit()) &&
-	    (ret->last_thd != tsd_tsdn(tsd))) {
-		unsigned ind = percpu_arena_choose();
-		if (arena_ind_get(ret) != ind) {
-			percpu_arena_update(tsd, ind);
-			ret = tsd_arena_get(tsd);
-		}
-		ret->last_thd = tsd_tsdn(tsd);
-	}
-
-	return ret;
-}
-
-JEMALLOC_INLINE arena_t *
-arena_choose(tsd_t *tsd, arena_t *arena) {
-	return arena_choose_impl(tsd, arena, false);
-}
-
-JEMALLOC_INLINE arena_t *
-arena_ichoose(tsd_t *tsd, arena_t *arena) {
-	return arena_choose_impl(tsd, arena, true);
-}
-
-JEMALLOC_ALWAYS_INLINE extent_t *
-iealloc(tsdn_t *tsdn, const void *ptr) {
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
-	return rtree_extent_read(tsdn, &extents_rtree, rtree_ctx,
-	    (uintptr_t)ptr, true);
-}
-#endif
-
-#include "jemalloc/internal/tcache_inlines.h"
-#include "jemalloc/internal/arena_inlines_b.h"
-#include "jemalloc/internal/hash_inlines.h"
-
-#ifndef JEMALLOC_ENABLE_INLINE
-arena_t *iaalloc(tsdn_t *tsdn, const void *ptr);
-size_t isalloc(tsdn_t *tsdn, const void *ptr);
-void *iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero,
-    tcache_t *tcache, bool is_internal, arena_t *arena, bool slow_path);
-void *ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero,
-    bool slow_path);
-void *ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
-    tcache_t *tcache, bool is_internal, arena_t *arena);
-void *ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
-    tcache_t *tcache, arena_t *arena);
-void *ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero);
-size_t ivsalloc(tsdn_t *tsdn, const void *ptr);
-void idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
-    dalloc_ctx_t *dalloc_ctx, bool is_internal, bool slow_path);
-void idalloc(tsd_t *tsd, void *ptr);
-void isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
-    bool slow_path);
-void *iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
-    size_t extra, size_t alignment, bool zero, tcache_t *tcache,
-    arena_t *arena);
-void *iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
-    size_t alignment, bool zero, tcache_t *tcache, arena_t *arena);
-void *iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
-    size_t alignment, bool zero);
-bool ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra,
-    size_t alignment, bool zero);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
-JEMALLOC_ALWAYS_INLINE arena_t *
-iaalloc(tsdn_t *tsdn, const void *ptr) {
-	assert(ptr != NULL);
-
-	return arena_aalloc(tsdn, ptr);
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-isalloc(tsdn_t *tsdn, const void *ptr) {
-	assert(ptr != NULL);
-
-	return arena_salloc(tsdn, ptr);
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
-    bool is_internal, arena_t *arena, bool slow_path) {
-	void *ret;
-
-	assert(size != 0);
-	assert(!is_internal || tcache == NULL);
-	assert(!is_internal || arena == NULL || arena_ind_get(arena) <
-	    narenas_auto);
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
-
-	ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path);
-	if (config_stats && is_internal && likely(ret != NULL)) {
-		arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn, ret));
-	}
-	return ret;
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path) {
-	return iallocztm(tsd_tsdn(tsd), size, ind, zero, tcache_get(tsd), false,
-	    NULL, slow_path);
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
-    tcache_t *tcache, bool is_internal, arena_t *arena) {
-	void *ret;
-
-	assert(usize != 0);
-	assert(usize == sa2u(usize, alignment));
-	assert(!is_internal || tcache == NULL);
-	assert(!is_internal || arena == NULL || arena_ind_get(arena) <
-	    narenas_auto);
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
-
-	ret = arena_palloc(tsdn, arena, usize, alignment, zero, tcache);
-	assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret);
-	if (config_stats && is_internal && likely(ret != NULL)) {
-		arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn, ret));
-	}
-	return ret;
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
-    tcache_t *tcache, arena_t *arena) {
-	return ipallocztm(tsdn, usize, alignment, zero, tcache, false, arena);
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero) {
-	return ipallocztm(tsd_tsdn(tsd), usize, alignment, zero,
-	    tcache_get(tsd), false, NULL);
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-ivsalloc(tsdn_t *tsdn, const void *ptr) {
-	return arena_vsalloc(tsdn, ptr);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, dalloc_ctx_t *dalloc_ctx,
-    bool is_internal, bool slow_path) {
-	assert(ptr != NULL);
-	assert(!is_internal || tcache == NULL);
-	assert(!is_internal || arena_ind_get(iaalloc(tsdn, ptr)) <
-	    narenas_auto);
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
-	if (config_stats && is_internal) {
-		arena_internal_sub(iaalloc(tsdn, ptr), isalloc(tsdn, ptr));
-	}
-	if (!is_internal && *tsd_reentrancy_levelp_get(tsdn_tsd(tsdn)) != 0) {
-		tcache = NULL;
-	}
-	arena_dalloc(tsdn, ptr, tcache, dalloc_ctx, slow_path);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-idalloc(tsd_t *tsd, void *ptr) {
-	idalloctm(tsd_tsdn(tsd), ptr, tcache_get(tsd), NULL, false, true);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-isdalloct(tsdn_t *tsdn, void *ptr, size_t size,
-    tcache_t *tcache, bool slow_path) {
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
-	arena_sdalloc(tsdn, ptr, size, tcache, slow_path);
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
-    size_t extra, size_t alignment, bool zero, tcache_t *tcache,
-    arena_t *arena) {
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
-	void *p;
-	size_t usize, copysize;
-
-	usize = sa2u(size + extra, alignment);
-	if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
-		return NULL;
-	}
-	p = ipalloct(tsdn, usize, alignment, zero, tcache, arena);
-	if (p == NULL) {
-		if (extra == 0) {
-			return NULL;
-		}
-		/* Try again, without extra this time. */
-		usize = sa2u(size, alignment);
-		if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
-			return NULL;
-		}
-		p = ipalloct(tsdn, usize, alignment, zero, tcache, arena);
-		if (p == NULL) {
-			return NULL;
-		}
-	}
-	/*
-	 * Copy at most size bytes (not size+extra), since the caller has no
-	 * expectation that the extra bytes will be reliably preserved.
-	 */
-	copysize = (size < oldsize) ? size : oldsize;
-	memcpy(p, ptr, copysize);
-	isdalloct(tsdn, ptr, oldsize, tcache, true);
-	return p;
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment,
-    bool zero, tcache_t *tcache, arena_t *arena) {
-	assert(ptr != NULL);
-	assert(size != 0);
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
-
-	if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
-	    != 0) {
-		/*
-		 * Existing object alignment is inadequate; allocate new space
-		 * and copy.
-		 */
-		return iralloct_realign(tsdn, ptr, oldsize, size, 0, alignment,
-		    zero, tcache, arena);
-	}
-
-	return arena_ralloc(tsdn, arena, ptr, oldsize, size, alignment, zero,
-	    tcache);
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment,
-    bool zero) {
-	return iralloct(tsd_tsdn(tsd), ptr, oldsize, size, alignment, zero,
-	    tcache_get(tsd), NULL);
-}
-
-JEMALLOC_ALWAYS_INLINE bool
-ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra,
-    size_t alignment, bool zero) {
-	assert(ptr != NULL);
-	assert(size != 0);
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
-
-	if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
-	    != 0) {
-		/* Existing object alignment is inadequate. */
-		return true;
-	}
-
-	return arena_ralloc_no_move(tsdn, ptr, oldsize, size, extra, zero);
-}
-#endif
-
-#include "jemalloc/internal/prof_inlines_b.h"
-
-CPP_EPILOGUE
-
-#endif /* JEMALLOC_INTERNAL_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
new file mode 100644
index 00000000..7ac39bea
--- /dev/null
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -0,0 +1,61 @@
+#ifndef JEMALLOC_INTERNAL_EXTERNS_H
+#define JEMALLOC_INTERNAL_EXTERNS_H
+
+extern bool	opt_abort;
+extern const char	*opt_junk;
+extern bool	opt_junk_alloc;
+extern bool	opt_junk_free;
+extern bool	opt_utrace;
+extern bool	opt_xmalloc;
+extern bool	opt_zero;
+extern unsigned	opt_narenas;
+
+/* Number of CPUs. */
+extern unsigned	ncpus;
+
+/* Number of arenas used for automatic multiplexing of threads and arenas. */
+extern unsigned	narenas_auto;
+
+/*
+ * Arenas that are used to service external requests.  Not all elements of the
+ * arenas array are necessarily used; arenas are created lazily as needed.
+ */
+extern atomic_p_t arenas[];
+
+/*
+ * pind2sz_tab encodes the same information as could be computed by
+ * pind2sz_compute().
+ */
+extern size_t const	pind2sz_tab[NPSIZES+1];
+/*
+ * index2size_tab encodes the same information as could be computed (at
+ * unacceptable cost in some code paths) by index2size_compute().
+ */
+extern size_t const	index2size_tab[NSIZES];
+/*
+ * size2index_tab is a compact lookup table that rounds request sizes up to
+ * size classes.  In order to reduce cache footprint, the table is compressed,
+ * and all accesses are via size2index().
+ */
+extern uint8_t const	size2index_tab[];
+
+void *a0malloc(size_t size);
+void a0dalloc(void *ptr);
+void *bootstrap_malloc(size_t size);
+void *bootstrap_calloc(size_t num, size_t size);
+void bootstrap_free(void *ptr);
+void arena_set(unsigned ind, arena_t *arena);
+unsigned narenas_total_get(void);
+arena_t *arena_init(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
+arena_tdata_t *arena_tdata_get_hard(tsd_t *tsd, unsigned ind);
+arena_t *arena_choose_hard(tsd_t *tsd, bool internal);
+void arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind);
+void iarena_cleanup(tsd_t *tsd);
+void arena_cleanup(tsd_t *tsd);
+void arenas_tdata_cleanup(tsd_t *tsd);
+void jemalloc_prefork(void);
+void jemalloc_postfork_parent(void);
+void jemalloc_postfork_child(void);
+bool malloc_initialized(void);
+
+#endif /* JEMALLOC_INTERNAL_EXTERNS_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
new file mode 100644
index 00000000..45035137
--- /dev/null
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -0,0 +1,155 @@
+#ifndef JEMALLOC_INTERNAL_INCLUDES_H
+#define JEMALLOC_INTERNAL_INCLUDES_H
+
+/*
+ * jemalloc can conceptually be broken into components (arena, tcache, etc.),
+ * but there are circular dependencies that cannot be broken without
+ * substantial performance degradation.
+ *
+ * Historically, we dealt with this by each header into four sections (types,
+ * structs, externs, and inlines), and included each header file multiple times
+ * in this file, picking out the portion we want on each pass using the
+ * following #defines:
+ *   JEMALLOC_H_TYPES   : Preprocessor-defined constants and psuedo-opaque data
+ *                        types.
+ *   JEMALLOC_H_STRUCTS : Data structures.
+ *   JEMALLOC_H_EXTERNS : Extern data declarations and function prototypes.
+ *   JEMALLOC_H_INLINES : Inline functions.
+ *
+ * We're moving toward a world in which the dependencies are explicit; each file
+ * will #include the headers it depends on (rather than relying on them being
+ * implicitly available via this file including every header file in the
+ * project).
+ *
+ * We're now in an intermediate state: we've broken up the header files to avoid
+ * having to include each one multiple times, but have not yet moved the
+ * dependency information into the header files (i.e. we still rely on the
+ * ordering in this file to ensure all a header's dependencies are available in
+ * its translation unit).  Each component is now broken up into multiple header
+ * files, corresponding to the sections above (e.g. instead of "tsd.h", we now
+ * have "tsd_types.h", "tsd_structs.h", "tsd_externs.h", "tsd_inlines.h").
+ *
+ * Those files which have been converted to explicitly include their
+ * inter-component dependencies are now in the initial HERMETIC HEADERS
+ * section.  All headers may still rely on jemalloc_preamble.h (which, by fiat,
+ * must be included first in every translation unit) for system headers and
+ * global jemalloc definitions, however.
+ */
+
+CPP_PROLOGUE
+
+/******************************************************************************/
+/* HERMETIC HEADERS */
+/******************************************************************************/
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/bit_util.h"
+#include "jemalloc/internal/malloc_io.h"
+#include "jemalloc/internal/util.h"
+
+/******************************************************************************/
+/* TYPES */
+/******************************************************************************/
+
+#include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/nstime_types.h"
+#include "jemalloc/internal/spin_types.h"
+#include "jemalloc/internal/prng_types.h"
+#include "jemalloc/internal/ticker_types.h"
+#include "jemalloc/internal/ckh_types.h"
+#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/smoothstep.h"
+#include "jemalloc/internal/stats_types.h"
+#include "jemalloc/internal/ctl_types.h"
+#include "jemalloc/internal/witness_types.h"
+#include "jemalloc/internal/mutex_types.h"
+#include "jemalloc/internal/tsd_types.h"
+#include "jemalloc/internal/extent_types.h"
+#include "jemalloc/internal/extent_dss_types.h"
+#include "jemalloc/internal/base_types.h"
+#include "jemalloc/internal/arena_types.h"
+#include "jemalloc/internal/bitmap_types.h"
+#include "jemalloc/internal/rtree_types.h"
+#include "jemalloc/internal/pages_types.h"
+#include "jemalloc/internal/tcache_types.h"
+#include "jemalloc/internal/prof_types.h"
+
+/******************************************************************************/
+/* STRUCTS */
+/******************************************************************************/
+
+#include "jemalloc/internal/nstime_structs.h"
+#include "jemalloc/internal/spin_structs.h"
+#include "jemalloc/internal/ticker_structs.h"
+#include "jemalloc/internal/ckh_structs.h"
+#include "jemalloc/internal/witness_structs.h"
+#include "jemalloc/internal/mutex_structs.h"
+#include "jemalloc/internal/stats_structs.h"
+#include "jemalloc/internal/ctl_structs.h"
+#include "jemalloc/internal/bitmap_structs.h"
+#include "jemalloc/internal/arena_structs_a.h"
+#include "jemalloc/internal/extent_structs.h"
+#include "jemalloc/internal/extent_dss_structs.h"
+#include "jemalloc/internal/base_structs.h"
+#include "jemalloc/internal/prof_structs.h"
+#include "jemalloc/internal/arena_structs_b.h"
+#include "jemalloc/internal/rtree_structs.h"
+#include "jemalloc/internal/tcache_structs.h"
+#include "jemalloc/internal/tsd_structs.h"
+
+/******************************************************************************/
+/* EXTERNS */
+/******************************************************************************/
+
+#include "jemalloc/internal/jemalloc_internal_externs.h"
+#include "jemalloc/internal/nstime_externs.h"
+#include "jemalloc/internal/ckh_externs.h"
+#include "jemalloc/internal/stats_externs.h"
+#include "jemalloc/internal/ctl_externs.h"
+#include "jemalloc/internal/witness_externs.h"
+#include "jemalloc/internal/mutex_externs.h"
+#include "jemalloc/internal/bitmap_externs.h"
+#include "jemalloc/internal/extent_externs.h"
+#include "jemalloc/internal/extent_dss_externs.h"
+#include "jemalloc/internal/extent_mmap_externs.h"
+#include "jemalloc/internal/base_externs.h"
+#include "jemalloc/internal/arena_externs.h"
+#include "jemalloc/internal/rtree_externs.h"
+#include "jemalloc/internal/pages_externs.h"
+#include "jemalloc/internal/large_externs.h"
+#include "jemalloc/internal/tcache_externs.h"
+#include "jemalloc/internal/prof_externs.h"
+#include "jemalloc/internal/tsd_externs.h"
+
+/******************************************************************************/
+/* INLINES */
+/******************************************************************************/
+
+#include "jemalloc/internal/spin_inlines.h"
+#include "jemalloc/internal/prng_inlines.h"
+#include "jemalloc/internal/ticker_inlines.h"
+#include "jemalloc/internal/tsd_inlines.h"
+#include "jemalloc/internal/witness_inlines.h"
+#include "jemalloc/internal/mutex_inlines.h"
+#include "jemalloc/internal/jemalloc_internal_inlines_a.h"
+#include "jemalloc/internal/rtree_inlines.h"
+#include "jemalloc/internal/base_inlines.h"
+#include "jemalloc/internal/bitmap_inlines.h"
+/*
+ * Include portions of arena code interleaved with tcache code in order to
+ * resolve circular dependencies.
+ */
+#include "jemalloc/internal/prof_inlines_a.h"
+#include "jemalloc/internal/arena_inlines_a.h"
+#include "jemalloc/internal/extent_inlines.h"
+#include "jemalloc/internal/jemalloc_internal_inlines_b.h"
+#include "jemalloc/internal/tcache_inlines.h"
+#include "jemalloc/internal/arena_inlines_b.h"
+#include "jemalloc/internal/hash_inlines.h"
+#include "jemalloc/internal/jemalloc_internal_inlines_c.h"
+#include "jemalloc/internal/prof_inlines_b.h"
+
+CPP_EPILOGUE
+
+#endif /* JEMALLOC_INTERNAL_INCLUDES_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
new file mode 100644
index 00000000..256329a0
--- /dev/null
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -0,0 +1,448 @@
+#ifndef JEMALLOC_INTERNAL_INLINES_A_H
+#define JEMALLOC_INTERNAL_INLINES_A_H
+
+#ifndef JEMALLOC_ENABLE_INLINE
+pszind_t psz2ind(size_t psz);
+size_t pind2sz_compute(pszind_t pind);
+size_t pind2sz_lookup(pszind_t pind);
+size_t pind2sz(pszind_t pind);
+size_t psz2u(size_t psz);
+szind_t size2index_compute(size_t size);
+szind_t size2index_lookup(size_t size);
+szind_t size2index(size_t size);
+size_t index2size_compute(szind_t index);
+size_t index2size_lookup(szind_t index);
+size_t index2size(szind_t index);
+size_t s2u_compute(size_t size);
+size_t s2u_lookup(size_t size);
+size_t s2u(size_t size);
+size_t sa2u(size_t size, size_t alignment);
+arena_t *arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal);
+arena_t *arena_choose(tsd_t *tsd, arena_t *arena);
+arena_t *arena_ichoose(tsd_t *tsd, arena_t *arena);
+arena_tdata_t *arena_tdata_get(tsd_t *tsd, unsigned ind,
+    bool refresh_if_missing);
+arena_t *arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing);
+ticker_t *decay_ticker_get(tsd_t *tsd, unsigned ind);
+bool tcache_available(tsd_t *tsd);
+tcache_bin_t *tcache_small_bin_get(tcache_t *tcache, szind_t binind);
+tcache_bin_t *tcache_large_bin_get(tcache_t *tcache, szind_t binind);
+tcache_t *tcache_get(tsd_t *tsd);
+malloc_cpuid_t malloc_getcpu(void);
+unsigned percpu_arena_choose(void);
+unsigned percpu_arena_ind_limit(void);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
+JEMALLOC_ALWAYS_INLINE pszind_t
+psz2ind(size_t psz) {
+	if (unlikely(psz > LARGE_MAXCLASS)) {
+		return NPSIZES;
+	}
+	{
+		pszind_t x = lg_floor((psz<<1)-1);
+		pszind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_PAGE) ? 0 : x -
+		    (LG_SIZE_CLASS_GROUP + LG_PAGE);
+		pszind_t grp = shift << LG_SIZE_CLASS_GROUP;
+
+		pszind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ?
+		    LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1;
+
+		size_t delta_inverse_mask = ZD(-1) << lg_delta;
+		pszind_t mod = ((((psz-1) & delta_inverse_mask) >> lg_delta)) &
+		    ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
+
+		pszind_t ind = grp + mod;
+		return ind;
+	}
+}
+
+JEMALLOC_INLINE size_t
+pind2sz_compute(pszind_t pind) {
+	if (unlikely(pind == NPSIZES)) {
+		return LARGE_MAXCLASS + PAGE;
+	}
+	{
+		size_t grp = pind >> LG_SIZE_CLASS_GROUP;
+		size_t mod = pind & ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
+
+		size_t grp_size_mask = ~((!!grp)-1);
+		size_t grp_size = ((ZU(1) << (LG_PAGE +
+		    (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask;
+
+		size_t shift = (grp == 0) ? 1 : grp;
+		size_t lg_delta = shift + (LG_PAGE-1);
+		size_t mod_size = (mod+1) << lg_delta;
+
+		size_t sz = grp_size + mod_size;
+		return sz;
+	}
+}
+
+JEMALLOC_INLINE size_t
+pind2sz_lookup(pszind_t pind) {
+	size_t ret = (size_t)pind2sz_tab[pind];
+	assert(ret == pind2sz_compute(pind));
+	return ret;
+}
+
+JEMALLOC_INLINE size_t
+pind2sz(pszind_t pind) {
+	assert(pind < NPSIZES+1);
+	return pind2sz_lookup(pind);
+}
+
+JEMALLOC_INLINE size_t
+psz2u(size_t psz) {
+	if (unlikely(psz > LARGE_MAXCLASS)) {
+		return LARGE_MAXCLASS + PAGE;
+	}
+	{
+		size_t x = lg_floor((psz<<1)-1);
+		size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ?
+		    LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1;
+		size_t delta = ZU(1) << lg_delta;
+		size_t delta_mask = delta - 1;
+		size_t usize = (psz + delta_mask) & ~delta_mask;
+		return usize;
+	}
+}
+
+JEMALLOC_INLINE szind_t
+size2index_compute(size_t size) {
+	if (unlikely(size > LARGE_MAXCLASS)) {
+		return NSIZES;
+	}
+#if (NTBINS != 0)
+	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
+		szind_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
+		szind_t lg_ceil = lg_floor(pow2_ceil_zu(size));
+		return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin);
+	}
+#endif
+	{
+		szind_t x = lg_floor((size<<1)-1);
+		szind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 :
+		    x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM);
+		szind_t grp = shift << LG_SIZE_CLASS_GROUP;
+
+		szind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
+		    ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
+
+		size_t delta_inverse_mask = ZD(-1) << lg_delta;
+		szind_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) &
+		    ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
+
+		szind_t index = NTBINS + grp + mod;
+		return index;
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE szind_t
+size2index_lookup(size_t size) {
+	assert(size <= LOOKUP_MAXCLASS);
+	{
+		szind_t ret = (size2index_tab[(size-1) >> LG_TINY_MIN]);
+		assert(ret == size2index_compute(size));
+		return ret;
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE szind_t
+size2index(size_t size) {
+	assert(size > 0);
+	if (likely(size <= LOOKUP_MAXCLASS)) {
+		return size2index_lookup(size);
+	}
+	return size2index_compute(size);
+}
+
+JEMALLOC_INLINE size_t
+index2size_compute(szind_t index) {
+#if (NTBINS > 0)
+	if (index < NTBINS) {
+		return (ZU(1) << (LG_TINY_MAXCLASS - NTBINS + 1 + index));
+	}
+#endif
+	{
+		size_t reduced_index = index - NTBINS;
+		size_t grp = reduced_index >> LG_SIZE_CLASS_GROUP;
+		size_t mod = reduced_index & ((ZU(1) << LG_SIZE_CLASS_GROUP) -
+		    1);
+
+		size_t grp_size_mask = ~((!!grp)-1);
+		size_t grp_size = ((ZU(1) << (LG_QUANTUM +
+		    (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask;
+
+		size_t shift = (grp == 0) ? 1 : grp;
+		size_t lg_delta = shift + (LG_QUANTUM-1);
+		size_t mod_size = (mod+1) << lg_delta;
+
+		size_t usize = grp_size + mod_size;
+		return usize;
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+index2size_lookup(szind_t index) {
+	size_t ret = (size_t)index2size_tab[index];
+	assert(ret == index2size_compute(index));
+	return ret;
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+index2size(szind_t index) {
+	assert(index < NSIZES);
+	return index2size_lookup(index);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+s2u_compute(size_t size) {
+	if (unlikely(size > LARGE_MAXCLASS)) {
+		return 0;
+	}
+#if (NTBINS > 0)
+	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
+		size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
+		size_t lg_ceil = lg_floor(pow2_ceil_zu(size));
+		return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) :
+		    (ZU(1) << lg_ceil));
+	}
+#endif
+	{
+		size_t x = lg_floor((size<<1)-1);
+		size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
+		    ?  LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
+		size_t delta = ZU(1) << lg_delta;
+		size_t delta_mask = delta - 1;
+		size_t usize = (size + delta_mask) & ~delta_mask;
+		return usize;
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+s2u_lookup(size_t size) {
+	size_t ret = index2size_lookup(size2index_lookup(size));
+
+	assert(ret == s2u_compute(size));
+	return ret;
+}
+
+/*
+ * Compute usable size that would result from allocating an object with the
+ * specified size.
+ */
+JEMALLOC_ALWAYS_INLINE size_t
+s2u(size_t size) {
+	assert(size > 0);
+	if (likely(size <= LOOKUP_MAXCLASS)) {
+		return s2u_lookup(size);
+	}
+	return s2u_compute(size);
+}
+
+/*
+ * Compute usable size that would result from allocating an object with the
+ * specified size and alignment.
+ */
+JEMALLOC_ALWAYS_INLINE size_t
+sa2u(size_t size, size_t alignment) {
+	size_t usize;
+
+	assert(alignment != 0 && ((alignment - 1) & alignment) == 0);
+
+	/* Try for a small size class. */
+	if (size <= SMALL_MAXCLASS && alignment < PAGE) {
+		/*
+		 * Round size up to the nearest multiple of alignment.
+		 *
+		 * This done, we can take advantage of the fact that for each
+		 * small size class, every object is aligned at the smallest
+		 * power of two that is non-zero in the base two representation
+		 * of the size.  For example:
+		 *
+		 *   Size |   Base 2 | Minimum alignment
+		 *   -----+----------+------------------
+		 *     96 |  1100000 |  32
+		 *    144 | 10100000 |  32
+		 *    192 | 11000000 |  64
+		 */
+		usize = s2u(ALIGNMENT_CEILING(size, alignment));
+		if (usize < LARGE_MINCLASS) {
+			return usize;
+		}
+	}
+
+	/* Large size class.  Beware of overflow. */
+
+	if (unlikely(alignment > LARGE_MAXCLASS)) {
+		return 0;
+	}
+
+	/* Make sure result is a large size class. */
+	if (size <= LARGE_MINCLASS) {
+		usize = LARGE_MINCLASS;
+	} else {
+		usize = s2u(size);
+		if (usize < size) {
+			/* size_t overflow. */
+			return 0;
+		}
+	}
+
+	/*
+	 * Calculate the multi-page mapping that large_palloc() would need in
+	 * order to guarantee the alignment.
+	 */
+	if (usize + large_pad + PAGE_CEILING(alignment) - PAGE < usize) {
+		/* size_t overflow. */
+		return 0;
+	}
+	return usize;
+}
+
+JEMALLOC_ALWAYS_INLINE malloc_cpuid_t
+malloc_getcpu(void) {
+	assert(have_percpu_arena);
+#if defined(JEMALLOC_HAVE_SCHED_GETCPU)
+	return (malloc_cpuid_t)sched_getcpu();
+#else
+	not_reached();
+	return -1;
+#endif
+}
+
+/* Return the chosen arena index based on current cpu. */
+JEMALLOC_ALWAYS_INLINE unsigned
+percpu_arena_choose(void) {
+	unsigned arena_ind;
+	assert(have_percpu_arena && (percpu_arena_mode != percpu_arena_disabled));
+
+	malloc_cpuid_t cpuid = malloc_getcpu();
+	assert(cpuid >= 0);
+	if ((percpu_arena_mode == percpu_arena) ||
+	    ((unsigned)cpuid < ncpus / 2)) {
+		arena_ind = cpuid;
+	} else {
+		assert(percpu_arena_mode == per_phycpu_arena);
+		/* Hyper threads on the same physical CPU share arena. */
+		arena_ind = cpuid - ncpus / 2;
+	}
+
+	return arena_ind;
+}
+
+/* Return the limit of percpu auto arena range, i.e. arenas[0...ind_limit). */
+JEMALLOC_ALWAYS_INLINE unsigned
+percpu_arena_ind_limit(void) {
+	assert(have_percpu_arena && (percpu_arena_mode != percpu_arena_disabled));
+	if (percpu_arena_mode == per_phycpu_arena && ncpus > 1) {
+		if (ncpus % 2) {
+			/* This likely means a misconfig. */
+			return ncpus / 2 + 1;
+		}
+		return ncpus / 2;
+	} else {
+		return ncpus;
+	}
+}
+
+JEMALLOC_INLINE arena_tdata_t *
+arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing) {
+	arena_tdata_t *tdata;
+	arena_tdata_t *arenas_tdata = tsd_arenas_tdata_get(tsd);
+
+	if (unlikely(arenas_tdata == NULL)) {
+		/* arenas_tdata hasn't been initialized yet. */
+		return arena_tdata_get_hard(tsd, ind);
+	}
+	if (unlikely(ind >= tsd_narenas_tdata_get(tsd))) {
+		/*
+		 * ind is invalid, cache is old (too small), or tdata to be
+		 * initialized.
+		 */
+		return (refresh_if_missing ? arena_tdata_get_hard(tsd, ind) :
+		    NULL);
+	}
+
+	tdata = &arenas_tdata[ind];
+	if (likely(tdata != NULL) || !refresh_if_missing) {
+		return tdata;
+	}
+	return arena_tdata_get_hard(tsd, ind);
+}
+
+JEMALLOC_INLINE arena_t *
+arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing) {
+	arena_t *ret;
+
+	assert(ind <= MALLOCX_ARENA_MAX);
+
+	ret = (arena_t *)atomic_load_p(&arenas[ind], ATOMIC_ACQUIRE);
+	if (unlikely(ret == NULL)) {
+		if (init_if_missing) {
+			ret = arena_init(tsdn, ind,
+			    (extent_hooks_t *)&extent_hooks_default);
+		}
+	}
+	return ret;
+}
+
+JEMALLOC_INLINE ticker_t *
+decay_ticker_get(tsd_t *tsd, unsigned ind) {
+	arena_tdata_t *tdata;
+
+	tdata = arena_tdata_get(tsd, ind, true);
+	if (unlikely(tdata == NULL)) {
+		return NULL;
+	}
+	return &tdata->decay_ticker;
+}
+
+JEMALLOC_ALWAYS_INLINE tcache_bin_t *
+tcache_small_bin_get(tcache_t *tcache, szind_t binind) {
+	assert(binind < NBINS);
+	return &tcache->tbins_small[binind];
+}
+
+JEMALLOC_ALWAYS_INLINE tcache_bin_t *
+tcache_large_bin_get(tcache_t *tcache, szind_t binind) {
+	assert(binind >= NBINS &&binind < nhbins);
+	return &tcache->tbins_large[binind - NBINS];
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tcache_available(tsd_t *tsd) {
+	cassert(config_tcache);
+
+	/*
+	 * Thread specific auto tcache might be unavailable if: 1) during tcache
+	 * initialization, or 2) disabled through thread.tcache.enabled mallctl
+	 * or config options.  This check covers all cases.
+	 */
+	if (likely(tsd_tcache_enabled_get(tsd) == true)) {
+		/* Associated arena == null implies tcache init in progress. */
+		if (tsd_tcachep_get(tsd)->arena != NULL) {
+			assert(tcache_small_bin_get(tsd_tcachep_get(tsd),
+			    0)->avail != NULL);
+		}
+		return true;
+	}
+
+	return false;
+}
+
+JEMALLOC_ALWAYS_INLINE tcache_t *
+tcache_get(tsd_t *tsd) {
+	if (!config_tcache) {
+		return NULL;
+	}
+	if (!tcache_available(tsd)) {
+		return NULL;
+	}
+
+	return tsd_tcachep_get(tsd);
+}
+#endif
+
+#endif /* JEMALLOC_INTERNAL_INLINES_A_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
new file mode 100644
index 00000000..52afb42d
--- /dev/null
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -0,0 +1,83 @@
+#ifndef JEMALLOC_INTERNAL_INLINES_B_H
+#define JEMALLOC_INTERNAL_INLINES_B_H
+
+#ifndef JEMALLOC_ENABLE_INLINE
+extent_t	*iealloc(tsdn_t *tsdn, const void *ptr);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
+/* Choose an arena based on a per-thread value. */
+JEMALLOC_INLINE arena_t *
+arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {
+	arena_t *ret;
+
+	if (arena != NULL) {
+		return arena;
+	}
+
+	/* During reentrancy, arena 0 is the safest bet. */
+	if (*tsd_reentrancy_levelp_get(tsd) > 1) {
+		return arena_get(tsd_tsdn(tsd), 0, true);
+	}
+
+	ret = internal ? tsd_iarena_get(tsd) : tsd_arena_get(tsd);
+	if (unlikely(ret == NULL)) {
+		ret = arena_choose_hard(tsd, internal);
+		assert(ret);
+		if (config_tcache && tcache_available(tsd)) {
+			tcache_t *tcache = tcache_get(tsd);
+			if (tcache->arena != NULL) {
+				/* See comments in tcache_data_init().*/
+				assert(tcache->arena ==
+				    arena_get(tsd_tsdn(tsd), 0, false));
+				if (tcache->arena != ret) {
+					tcache_arena_reassociate(tsd_tsdn(tsd),
+					    tcache, ret);
+				}
+			} else {
+				tcache_arena_associate(tsd_tsdn(tsd), tcache,
+				    ret);
+			}
+		}
+	}
+
+	/*
+	 * Note that for percpu arena, if the current arena is outside of the
+	 * auto percpu arena range, (i.e. thread is assigned to a manually
+	 * managed arena), then percpu arena is skipped.
+	 */
+	if (have_percpu_arena && (percpu_arena_mode != percpu_arena_disabled) &&
+	    (arena_ind_get(ret) < percpu_arena_ind_limit()) &&
+	    (ret->last_thd != tsd_tsdn(tsd))) {
+		unsigned ind = percpu_arena_choose();
+		if (arena_ind_get(ret) != ind) {
+			percpu_arena_update(tsd, ind);
+			ret = tsd_arena_get(tsd);
+		}
+		ret->last_thd = tsd_tsdn(tsd);
+	}
+
+	return ret;
+}
+
+JEMALLOC_INLINE arena_t *
+arena_choose(tsd_t *tsd, arena_t *arena) {
+	return arena_choose_impl(tsd, arena, false);
+}
+
+JEMALLOC_INLINE arena_t *
+arena_ichoose(tsd_t *tsd, arena_t *arena) {
+	return arena_choose_impl(tsd, arena, true);
+}
+
+JEMALLOC_ALWAYS_INLINE extent_t *
+iealloc(tsdn_t *tsdn, const void *ptr) {
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+	return rtree_extent_read(tsdn, &extents_rtree, rtree_ctx,
+	    (uintptr_t)ptr, true);
+}
+#endif
+
+#endif /* JEMALLOC_INTERNAL_INLINES_B_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
new file mode 100644
index 00000000..05debd22
--- /dev/null
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -0,0 +1,220 @@
+#ifndef JEMALLOC_INTERNAL_INLINES_C_H
+#define JEMALLOC_INTERNAL_INLINES_C_H
+
+#ifndef JEMALLOC_ENABLE_INLINE
+arena_t *iaalloc(tsdn_t *tsdn, const void *ptr);
+size_t isalloc(tsdn_t *tsdn, const void *ptr);
+void *iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero,
+    tcache_t *tcache, bool is_internal, arena_t *arena, bool slow_path);
+void *ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero,
+    bool slow_path);
+void *ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
+    tcache_t *tcache, bool is_internal, arena_t *arena);
+void *ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
+    tcache_t *tcache, arena_t *arena);
+void *ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero);
+size_t ivsalloc(tsdn_t *tsdn, const void *ptr);
+void idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
+    dalloc_ctx_t *dalloc_ctx, bool is_internal, bool slow_path);
+void idalloc(tsd_t *tsd, void *ptr);
+void isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
+    bool slow_path);
+void *iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
+    size_t extra, size_t alignment, bool zero, tcache_t *tcache,
+    arena_t *arena);
+void *iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
+    size_t alignment, bool zero, tcache_t *tcache, arena_t *arena);
+void *iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
+    size_t alignment, bool zero);
+bool ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra,
+    size_t alignment, bool zero);
+#endif
+
+#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
+JEMALLOC_ALWAYS_INLINE arena_t *
+iaalloc(tsdn_t *tsdn, const void *ptr) {
+	assert(ptr != NULL);
+
+	return arena_aalloc(tsdn, ptr);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+isalloc(tsdn_t *tsdn, const void *ptr) {
+	assert(ptr != NULL);
+
+	return arena_salloc(tsdn, ptr);
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
+    bool is_internal, arena_t *arena, bool slow_path) {
+	void *ret;
+
+	assert(size != 0);
+	assert(!is_internal || tcache == NULL);
+	assert(!is_internal || arena == NULL || arena_ind_get(arena) <
+	    narenas_auto);
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+
+	ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path);
+	if (config_stats && is_internal && likely(ret != NULL)) {
+		arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn, ret));
+	}
+	return ret;
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path) {
+	return iallocztm(tsd_tsdn(tsd), size, ind, zero, tcache_get(tsd), false,
+	    NULL, slow_path);
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
+    tcache_t *tcache, bool is_internal, arena_t *arena) {
+	void *ret;
+
+	assert(usize != 0);
+	assert(usize == sa2u(usize, alignment));
+	assert(!is_internal || tcache == NULL);
+	assert(!is_internal || arena == NULL || arena_ind_get(arena) <
+	    narenas_auto);
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+
+	ret = arena_palloc(tsdn, arena, usize, alignment, zero, tcache);
+	assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret);
+	if (config_stats && is_internal && likely(ret != NULL)) {
+		arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn, ret));
+	}
+	return ret;
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
+    tcache_t *tcache, arena_t *arena) {
+	return ipallocztm(tsdn, usize, alignment, zero, tcache, false, arena);
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero) {
+	return ipallocztm(tsd_tsdn(tsd), usize, alignment, zero,
+	    tcache_get(tsd), false, NULL);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+ivsalloc(tsdn_t *tsdn, const void *ptr) {
+	return arena_vsalloc(tsdn, ptr);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, dalloc_ctx_t *dalloc_ctx,
+    bool is_internal, bool slow_path) {
+	assert(ptr != NULL);
+	assert(!is_internal || tcache == NULL);
+	assert(!is_internal || arena_ind_get(iaalloc(tsdn, ptr)) <
+	    narenas_auto);
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+	if (config_stats && is_internal) {
+		arena_internal_sub(iaalloc(tsdn, ptr), isalloc(tsdn, ptr));
+	}
+	if (!is_internal && *tsd_reentrancy_levelp_get(tsdn_tsd(tsdn)) != 0) {
+		tcache = NULL;
+	}
+	arena_dalloc(tsdn, ptr, tcache, dalloc_ctx, slow_path);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+idalloc(tsd_t *tsd, void *ptr) {
+	idalloctm(tsd_tsdn(tsd), ptr, tcache_get(tsd), NULL, false, true);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+isdalloct(tsdn_t *tsdn, void *ptr, size_t size,
+    tcache_t *tcache, bool slow_path) {
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+	arena_sdalloc(tsdn, ptr, size, tcache, slow_path);
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
+    size_t extra, size_t alignment, bool zero, tcache_t *tcache,
+    arena_t *arena) {
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+	void *p;
+	size_t usize, copysize;
+
+	usize = sa2u(size + extra, alignment);
+	if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
+		return NULL;
+	}
+	p = ipalloct(tsdn, usize, alignment, zero, tcache, arena);
+	if (p == NULL) {
+		if (extra == 0) {
+			return NULL;
+		}
+		/* Try again, without extra this time. */
+		usize = sa2u(size, alignment);
+		if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
+			return NULL;
+		}
+		p = ipalloct(tsdn, usize, alignment, zero, tcache, arena);
+		if (p == NULL) {
+			return NULL;
+		}
+	}
+	/*
+	 * Copy at most size bytes (not size+extra), since the caller has no
+	 * expectation that the extra bytes will be reliably preserved.
+	 */
+	copysize = (size < oldsize) ? size : oldsize;
+	memcpy(p, ptr, copysize);
+	isdalloct(tsdn, ptr, oldsize, tcache, true);
+	return p;
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment,
+    bool zero, tcache_t *tcache, arena_t *arena) {
+	assert(ptr != NULL);
+	assert(size != 0);
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+
+	if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
+	    != 0) {
+		/*
+		 * Existing object alignment is inadequate; allocate new space
+		 * and copy.
+		 */
+		return iralloct_realign(tsdn, ptr, oldsize, size, 0, alignment,
+		    zero, tcache, arena);
+	}
+
+	return arena_ralloc(tsdn, arena, ptr, oldsize, size, alignment, zero,
+	    tcache);
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment,
+    bool zero) {
+	return iralloct(tsd_tsdn(tsd), ptr, oldsize, size, alignment, zero,
+	    tcache_get(tsd), NULL);
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra,
+    size_t alignment, bool zero) {
+	assert(ptr != NULL);
+	assert(size != 0);
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+
+	if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
+	    != 0) {
+		/* Existing object alignment is inadequate. */
+		return true;
+	}
+
+	return arena_ralloc_no_move(tsdn, ptr, oldsize, size, extra, zero);
+}
+#endif
+
+#endif /* JEMALLOC_INTERNAL_INLINES_C_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_types.h b/include/jemalloc/internal/jemalloc_internal_types.h
new file mode 100644
index 00000000..663ed8b5
--- /dev/null
+++ b/include/jemalloc/internal/jemalloc_internal_types.h
@@ -0,0 +1,178 @@
+#ifndef JEMALLOC_INTERNAL_TYPES_H
+#define JEMALLOC_INTERNAL_TYPES_H
+
+/* Page size index type. */
+typedef unsigned pszind_t;
+
+/* Size class index type. */
+typedef unsigned szind_t;
+
+/* Processor / core id type. */
+typedef int malloc_cpuid_t;
+
+/*
+ * Flags bits:
+ *
+ * a: arena
+ * t: tcache
+ * 0: unused
+ * z: zero
+ * n: alignment
+ *
+ * aaaaaaaa aaaatttt tttttttt 0znnnnnn
+ */
+#define MALLOCX_ARENA_BITS	12
+#define MALLOCX_TCACHE_BITS	12
+#define MALLOCX_LG_ALIGN_BITS	6
+#define MALLOCX_ARENA_SHIFT	20
+#define MALLOCX_TCACHE_SHIFT	8
+#define MALLOCX_ARENA_MASK \
+    (((1 << MALLOCX_ARENA_BITS) - 1) << MALLOCX_ARENA_SHIFT)
+/* NB: Arena index bias decreases the maximum number of arenas by 1. */
+#define MALLOCX_ARENA_MAX	((1 << MALLOCX_ARENA_BITS) - 2)
+#define MALLOCX_TCACHE_MASK \
+    (((1 << MALLOCX_TCACHE_BITS) - 1) << MALLOCX_TCACHE_SHIFT)
+#define MALLOCX_TCACHE_MAX	((1 << MALLOCX_TCACHE_BITS) - 3)
+#define MALLOCX_LG_ALIGN_MASK	((1 << MALLOCX_LG_ALIGN_BITS) - 1)
+/* Use MALLOCX_ALIGN_GET() if alignment may not be specified in flags. */
+#define MALLOCX_ALIGN_GET_SPECIFIED(flags)				\
+    (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK))
+#define MALLOCX_ALIGN_GET(flags)					\
+    (MALLOCX_ALIGN_GET_SPECIFIED(flags) & (SIZE_T_MAX-1))
+#define MALLOCX_ZERO_GET(flags)						\
+    ((bool)(flags & MALLOCX_ZERO))
+
+#define MALLOCX_TCACHE_GET(flags)					\
+    (((unsigned)((flags & MALLOCX_TCACHE_MASK) >> MALLOCX_TCACHE_SHIFT)) - 2)
+#define MALLOCX_ARENA_GET(flags)					\
+    (((unsigned)(((unsigned)flags) >> MALLOCX_ARENA_SHIFT)) - 1)
+
+/* Smallest size class to support. */
+#define TINY_MIN		(1U << LG_TINY_MIN)
+
+/*
+ * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size
+ * classes).
+ */
+#ifndef LG_QUANTUM
+#  if (defined(__i386__) || defined(_M_IX86))
+#    define LG_QUANTUM		4
+#  endif
+#  ifdef __ia64__
+#    define LG_QUANTUM		4
+#  endif
+#  ifdef __alpha__
+#    define LG_QUANTUM		4
+#  endif
+#  if (defined(__sparc64__) || defined(__sparcv9) || defined(__sparc_v9__))
+#    define LG_QUANTUM		4
+#  endif
+#  if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64))
+#    define LG_QUANTUM		4
+#  endif
+#  ifdef __arm__
+#    define LG_QUANTUM		3
+#  endif
+#  ifdef __aarch64__
+#    define LG_QUANTUM		4
+#  endif
+#  ifdef __hppa__
+#    define LG_QUANTUM		4
+#  endif
+#  ifdef __mips__
+#    define LG_QUANTUM		3
+#  endif
+#  ifdef __or1k__
+#    define LG_QUANTUM		3
+#  endif
+#  ifdef __powerpc__
+#    define LG_QUANTUM		4
+#  endif
+#  ifdef __riscv__
+#    define LG_QUANTUM		4
+#  endif
+#  ifdef __s390__
+#    define LG_QUANTUM		4
+#  endif
+#  ifdef __SH4__
+#    define LG_QUANTUM		4
+#  endif
+#  ifdef __tile__
+#    define LG_QUANTUM		4
+#  endif
+#  ifdef __le32__
+#    define LG_QUANTUM		4
+#  endif
+#  ifndef LG_QUANTUM
+#    error "Unknown minimum alignment for architecture; specify via "
+	 "--with-lg-quantum"
+#  endif
+#endif
+
+#define QUANTUM			((size_t)(1U << LG_QUANTUM))
+#define QUANTUM_MASK		(QUANTUM - 1)
+
+/* Return the smallest quantum multiple that is >= a. */
+#define QUANTUM_CEILING(a)						\
+	(((a) + QUANTUM_MASK) & ~QUANTUM_MASK)
+
+#define LONG			((size_t)(1U << LG_SIZEOF_LONG))
+#define LONG_MASK		(LONG - 1)
+
+/* Return the smallest long multiple that is >= a. */
+#define LONG_CEILING(a)							\
+	(((a) + LONG_MASK) & ~LONG_MASK)
+
+#define SIZEOF_PTR		(1U << LG_SIZEOF_PTR)
+#define PTR_MASK		(SIZEOF_PTR - 1)
+
+/* Return the smallest (void *) multiple that is >= a. */
+#define PTR_CEILING(a)							\
+	(((a) + PTR_MASK) & ~PTR_MASK)
+
+/*
+ * Maximum size of L1 cache line.  This is used to avoid cache line aliasing.
+ * In addition, this controls the spacing of cacheline-spaced size classes.
+ *
+ * CACHELINE cannot be based on LG_CACHELINE because __declspec(align()) can
+ * only handle raw constants.
+ */
+#define LG_CACHELINE		6
+#define CACHELINE		64
+#define CACHELINE_MASK		(CACHELINE - 1)
+
+/* Return the smallest cacheline multiple that is >= s. */
+#define CACHELINE_CEILING(s)						\
+	(((s) + CACHELINE_MASK) & ~CACHELINE_MASK)
+
+/* Return the nearest aligned address at or below a. */
+#define ALIGNMENT_ADDR2BASE(a, alignment)				\
+	((void *)((uintptr_t)(a) & ((~(alignment)) + 1)))
+
+/* Return the offset between a and the nearest aligned address at or below a. */
+#define ALIGNMENT_ADDR2OFFSET(a, alignment)				\
+	((size_t)((uintptr_t)(a) & (alignment - 1)))
+
+/* Return the smallest alignment multiple that is >= s. */
+#define ALIGNMENT_CEILING(s, alignment)					\
+	(((s) + (alignment - 1)) & ((~(alignment)) + 1))
+
+/* Declare a variable-length array. */
+#if __STDC_VERSION__ < 199901L
+#  ifdef _MSC_VER
+#    include <malloc.h>
+#    define alloca _alloca
+#  else
+#    ifdef JEMALLOC_HAS_ALLOCA_H
+#      include <alloca.h>
+#    else
+#      include <stdlib.h>
+#    endif
+#  endif
+#  define VARIABLE_ARRAY(type, name, count) \
+	type *name = alloca(sizeof(type) * (count))
+#else
+#  define VARIABLE_ARRAY(type, name, count) type name[(count)]
+#endif
+
+#endif /* JEMALLOC_INTERNAL_TYPES_H */
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
new file mode 100644
index 00000000..6e38fe65
--- /dev/null
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -0,0 +1,187 @@
+#ifndef JEMALLOC_PREAMBLE_H
+#define JEMALLOC_PREAMBLE_H
+
+#ifdef __cplusplus
+#  define CPP_PROLOGUE extern "C" {
+#  define CPP_EPILOGUE }
+#else
+#  define CPP_PROLOGUE
+#  define CPP_EPILOGUE
+#endif
+
+CPP_PROLOGUE
+
+#include "jemalloc_internal_defs.h"
+#include "jemalloc/internal/jemalloc_internal_decls.h"
+
+#ifdef JEMALLOC_UTRACE
+#include <sys/ktrace.h>
+#endif
+
+#define JEMALLOC_NO_DEMANGLE
+#ifdef JEMALLOC_JET
+#  define JEMALLOC_N(n) jet_##n
+#  include "jemalloc/internal/public_namespace.h"
+#  define JEMALLOC_NO_RENAME
+#  include "../jemalloc@install_suffix@.h"
+#  undef JEMALLOC_NO_RENAME
+#else
+#  define JEMALLOC_N(n) @private_namespace@##n
+#  include "../jemalloc@install_suffix@.h"
+#endif
+
+#if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN))
+#include <libkern/OSAtomic.h>
+#endif
+
+#ifdef JEMALLOC_ZONE
+#include <mach/mach_error.h>
+#include <mach/mach_init.h>
+#include <mach/vm_map.h>
+#endif
+
+#include "jemalloc/internal/jemalloc_internal_macros.h"
+
+/*
+ * Note that the ordering matters here; the hook itself is name-mangled.  We
+ * want the inclusion of hooks to happen early, so that we hook as much as
+ * possible.
+ */
+#include "jemalloc/internal/private_namespace.h"
+#include "jemalloc/internal/hooks.h"
+
+static const bool config_debug =
+#ifdef JEMALLOC_DEBUG
+    true
+#else
+    false
+#endif
+    ;
+static const bool have_dss =
+#ifdef JEMALLOC_DSS
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_fill =
+#ifdef JEMALLOC_FILL
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_lazy_lock =
+#ifdef JEMALLOC_LAZY_LOCK
+    true
+#else
+    false
+#endif
+    ;
+static const char * const config_malloc_conf = JEMALLOC_CONFIG_MALLOC_CONF;
+static const bool config_prof =
+#ifdef JEMALLOC_PROF
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_prof_libgcc =
+#ifdef JEMALLOC_PROF_LIBGCC
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_prof_libunwind =
+#ifdef JEMALLOC_PROF_LIBUNWIND
+    true
+#else
+    false
+#endif
+    ;
+static const bool maps_coalesce =
+#ifdef JEMALLOC_MAPS_COALESCE
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_munmap =
+#ifdef JEMALLOC_MUNMAP
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_stats =
+#ifdef JEMALLOC_STATS
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_tcache =
+#ifdef JEMALLOC_TCACHE
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_tls =
+#ifdef JEMALLOC_TLS
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_utrace =
+#ifdef JEMALLOC_UTRACE
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_xmalloc =
+#ifdef JEMALLOC_XMALLOC
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_ivsalloc =
+#ifdef JEMALLOC_IVSALLOC
+    true
+#else
+    false
+#endif
+    ;
+static const bool config_cache_oblivious =
+#ifdef JEMALLOC_CACHE_OBLIVIOUS
+    true
+#else
+    false
+#endif
+    ;
+static const bool have_thp =
+#ifdef JEMALLOC_THP
+    true
+#else
+    false
+#endif
+    ;
+#ifdef JEMALLOC_HAVE_SCHED_GETCPU
+/* Currently percpu_arena depends on sched_getcpu. */
+#define JEMALLOC_PERCPU_ARENA
+#endif
+static const bool have_percpu_arena =
+#ifdef JEMALLOC_PERCPU_ARENA
+    true
+#else
+    false
+#endif
+    ;
+
+CPP_EPILOGUE
+
+#endif /* JEMALLOC_PREAMBLE_H */
diff --git a/src/arena.c b/src/arena.c
index 198c6e49..5d313e32 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1,5 +1,6 @@
 #define JEMALLOC_ARENA_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
 
 /******************************************************************************/
 /* Data. */
diff --git a/src/base.c b/src/base.c
index 4275259e..eb68a175 100644
--- a/src/base.c
+++ b/src/base.c
@@ -1,5 +1,6 @@
 #define JEMALLOC_BASE_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
 
 /******************************************************************************/
 /* Data. */
diff --git a/src/bitmap.c b/src/bitmap.c
index a629aca6..2eb50f1b 100644
--- a/src/bitmap.c
+++ b/src/bitmap.c
@@ -1,5 +1,6 @@
 #define JEMALLOC_BITMAP_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
 
 /******************************************************************************/
 
diff --git a/src/ckh.c b/src/ckh.c
index 03262ef5..a359a5cc 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -35,7 +35,8 @@
  *
  ******************************************************************************/
 #define JEMALLOC_CKH_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
 
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
diff --git a/src/ctl.c b/src/ctl.c
index a59a741f..6d6fadc7 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1,5 +1,6 @@
 #define JEMALLOC_CTL_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
 
 /******************************************************************************/
 /* Data. */
diff --git a/src/extent.c b/src/extent.c
index b6c3f4b4..2344e9cd 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1,8 +1,10 @@
 #define JEMALLOC_EXTENT_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#include "jemalloc/internal/jemalloc_preamble.h"
 
 #include "jemalloc/internal/ph.h"
 
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
 /******************************************************************************/
 /* Data. */
 
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 99919090..c609f14c 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -1,5 +1,7 @@
 #define JEMALLOC_EXTENT_DSS_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
 /******************************************************************************/
 /* Data. */
 
diff --git a/src/extent_mmap.c b/src/extent_mmap.c
index 7265159a..5717573e 100644
--- a/src/extent_mmap.c
+++ b/src/extent_mmap.c
@@ -1,5 +1,6 @@
 #define JEMALLOC_EXTENT_MMAP_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
 
 /******************************************************************************/
 
diff --git a/src/hash.c b/src/hash.c
index ffd4f2be..7b2bdc2b 100644
--- a/src/hash.c
+++ b/src/hash.c
@@ -1,2 +1,3 @@
 #define JEMALLOC_HASH_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
diff --git a/src/hooks.c b/src/hooks.c
index c32471e9..6266ecd4 100644
--- a/src/hooks.c
+++ b/src/hooks.c
@@ -1,4 +1,4 @@
-#include "jemalloc/internal/jemalloc_internal.h"
+#include "jemalloc/internal/jemalloc_preamble.h"
 
 /*
  * The hooks are a little bit screwy -- they're not genuinely exported in the
diff --git a/src/jemalloc.c b/src/jemalloc.c
index bc659325..27f9711c 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1,5 +1,6 @@
 #define JEMALLOC_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
 
 /******************************************************************************/
 /* Data. */
diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index 9692b5ba..b6d7c9a5 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -2,7 +2,8 @@
 #include <new>
 
 #define JEMALLOC_CPP_CPP_
-#include "jemalloc/internal/jemalloc_internal.h"
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
 
 // All operators in this file are exported.
 
diff --git a/src/large.c b/src/large.c
index 3f96c521..18987c1a 100644
--- a/src/large.c
+++ b/src/large.c
@@ -1,5 +1,6 @@
 #define JEMALLOC_LARGE_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
 
 /******************************************************************************/
 
diff --git a/src/malloc_io.c b/src/malloc_io.c
index fd6ff0f0..98ef7a65 100644
--- a/src/malloc_io.c
+++ b/src/malloc_io.c
@@ -1,5 +1,6 @@
 #define JEMALLOC_MALLOC_IO_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #ifdef assert
 #  undef assert
diff --git a/src/mutex.c b/src/mutex.c
index 8c593101..26af5239 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -1,5 +1,6 @@
 #define JEMALLOC_MUTEX_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32)
 #include <dlfcn.h>
diff --git a/src/nstime.c b/src/nstime.c
index 70b2f9d8..ee8d78e7 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -1,4 +1,5 @@
-#include "jemalloc/internal/jemalloc_internal.h"
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #define BILLION	UINT64_C(1000000000)
 #define MILLION	UINT64_C(1000000)
diff --git a/src/pages.c b/src/pages.c
index fa986ba6..53ca653b 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -1,5 +1,6 @@
 #define JEMALLOC_PAGES_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
 #include <sys/sysctl.h>
diff --git a/src/prng.c b/src/prng.c
index bf908790..83c04bf9 100644
--- a/src/prng.c
+++ b/src/prng.c
@@ -1,2 +1,3 @@
 #define JEMALLOC_PRNG_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
diff --git a/src/prof.c b/src/prof.c
index 40610d71..1844c2f3 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -1,5 +1,7 @@
 #define JEMALLOC_PROF_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
 /******************************************************************************/
 
 #ifdef JEMALLOC_PROF_LIBUNWIND
diff --git a/src/rtree.c b/src/rtree.c
index de4990bd..051428f1 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -1,5 +1,6 @@
 #define JEMALLOC_RTREE_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
 
 /*
  * Only the most significant bits of keys passed to rtree_{read,write}() are
diff --git a/src/spin.c b/src/spin.c
index d7eb5fa8..d2d39419 100644
--- a/src/spin.c
+++ b/src/spin.c
@@ -1,2 +1,3 @@
 #define JEMALLOC_SPIN_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
diff --git a/src/stats.c b/src/stats.c
index aa7ca507..110d62f7 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1,5 +1,6 @@
 #define JEMALLOC_STATS_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
 
 const char *global_mutex_names[num_global_prof_mutexes] = {
 #define OP(mtx) #mtx,
diff --git a/src/tcache.c b/src/tcache.c
index 09776e1a..99749fbc 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -1,5 +1,6 @@
 #define JEMALLOC_TCACHE_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
 
 /******************************************************************************/
 /* Data. */
diff --git a/src/ticker.c b/src/ticker.c
index b0149e1c..d7b8cd26 100644
--- a/src/ticker.c
+++ b/src/ticker.c
@@ -1,2 +1,3 @@
 #define JEMALLOC_TICKER_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
diff --git a/src/tsd.c b/src/tsd.c
index 0d5de8ea..3e72548c 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -1,5 +1,6 @@
 #define JEMALLOC_TSD_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
 
 /******************************************************************************/
 /* Data. */
diff --git a/src/witness.c b/src/witness.c
index cbffaeaa..26b16e77 100644
--- a/src/witness.c
+++ b/src/witness.c
@@ -1,5 +1,6 @@
 #define JEMALLOC_WITNESS_C_
-#include "jemalloc/internal/jemalloc_internal.h"
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
 
 void
 witness_init(witness_t *witness, const char *name, witness_rank_t rank,
diff --git a/src/zone.c b/src/zone.c
index e69f0b4a..a8a571fd 100644
--- a/src/zone.c
+++ b/src/zone.c
@@ -1,4 +1,5 @@
-#include "jemalloc/internal/jemalloc_internal.h"
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
 #ifndef JEMALLOC_ZONE
 #  error "This source file is for zones on Darwin (OS X)."
 #endif
diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in
index e3882b29..02eaac2b 100644
--- a/test/include/test/jemalloc_test.h.in
+++ b/test/include/test/jemalloc_test.h.in
@@ -43,8 +43,8 @@ extern "C" {
 #ifdef JEMALLOC_UNIT_TEST
 #  define JEMALLOC_JET
 #  define JEMALLOC_MANGLE
-#  include "jemalloc/internal/jemalloc_internal.h"
-
+#  include "jemalloc/internal/jemalloc_preamble.h"
+#  include "jemalloc/internal/jemalloc_internal_includes.h"
 
 /******************************************************************************/
 /*
@@ -96,7 +96,8 @@ static const bool config_debug =
 #  include "jemalloc/jemalloc_protos_jet.h"
 
 #  define JEMALLOC_JET
-#  include "jemalloc/internal/jemalloc_internal.h"
+#  include "jemalloc/internal/jemalloc_preamble.h"
+#  include "jemalloc/internal/jemalloc_internal_includes.h"
 #  include "jemalloc/internal/public_unnamespace.h"
 #  undef JEMALLOC_JET
 

From e709fae1d73b874796d7f629ef39a44e9b53fa87 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 10 Apr 2017 19:04:40 -0700
Subject: [PATCH 0790/2608] Header refactoring: move atomic.h out of the
 catch-all

---
 include/jemalloc/internal/arena_structs_b.h             | 1 +
 include/jemalloc/internal/atomic.h                      | 4 ++++
 include/jemalloc/internal/extent_structs.h              | 1 +
 include/jemalloc/internal/jemalloc_internal_externs.h   | 2 ++
 include/jemalloc/internal/jemalloc_internal_includes.h  | 1 -
 include/jemalloc/internal/jemalloc_internal_inlines_a.h | 2 ++
 include/jemalloc/internal/mutex_structs.h               | 2 ++
 include/jemalloc/internal/prng_inlines.h                | 2 ++
 include/jemalloc/internal/rtree_structs.h               | 2 ++
 include/jemalloc/internal/stats_structs.h               | 2 ++
 src/jemalloc.c                                          | 2 ++
 11 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index 935cd169..1f4fb6b1 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H
 #define JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H
 
+#include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/ql.h"
 
 /*
diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h
index adadb1a3..1bfae7d7 100644
--- a/include/jemalloc/internal/atomic.h
+++ b/include/jemalloc/internal/atomic.h
@@ -3,6 +3,8 @@
 
 #define ATOMIC_INLINE static inline
 
+CPP_PROLOGUE
+
 #if defined(JEMALLOC_GCC_ATOMIC_ATOMICS)
 #  include "jemalloc/internal/atomic_gcc_atomic.h"
 #elif defined(JEMALLOC_GCC_SYNC_ATOMICS)
@@ -74,4 +76,6 @@ JEMALLOC_GENERATE_INT_ATOMICS(uint64_t, u64, 3)
 
 #undef ATOMIC_INLINE
 
+CPP_EPILOGUE
+
 #endif /* JEMALLOC_INTERNAL_ATOMIC_H */
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index 2f81fa1c..3d3d418b 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_EXTENT_STRUCTS_H
 #define JEMALLOC_INTERNAL_EXTENT_STRUCTS_H
 
+#include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/ph.h"
 #include "jemalloc/internal/ql.h"
 
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 7ac39bea..56d39d48 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_EXTERNS_H
 #define JEMALLOC_INTERNAL_EXTERNS_H
 
+#include "jemalloc/internal/atomic.h"
+
 extern bool	opt_abort;
 extern const char	*opt_junk;
 extern bool	opt_junk_alloc;
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index 45035137..53374f99 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -43,7 +43,6 @@ CPP_PROLOGUE
 /******************************************************************************/
 
 #include "jemalloc/internal/assert.h"
-#include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/bit_util.h"
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/util.h"
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index 256329a0..822b4d75 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_INLINES_A_H
 #define JEMALLOC_INTERNAL_INLINES_A_H
 
+#include "jemalloc/internal/atomic.h"
+
 #ifndef JEMALLOC_ENABLE_INLINE
 pszind_t psz2ind(size_t psz);
 size_t pind2sz_compute(pszind_t pind);
diff --git a/include/jemalloc/internal/mutex_structs.h b/include/jemalloc/internal/mutex_structs.h
index ff090b22..7b7085d4 100644
--- a/include/jemalloc/internal/mutex_structs.h
+++ b/include/jemalloc/internal/mutex_structs.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_MUTEX_STRUCTS_H
 #define JEMALLOC_INTERNAL_MUTEX_STRUCTS_H
 
+#include "jemalloc/internal/atomic.h"
+
 struct mutex_prof_data_s {
 	/*
 	 * Counters touched on the slow path, i.e. when there is lock
diff --git a/include/jemalloc/internal/prng_inlines.h b/include/jemalloc/internal/prng_inlines.h
index 3f06ccd4..7026d52a 100644
--- a/include/jemalloc/internal/prng_inlines.h
+++ b/include/jemalloc/internal/prng_inlines.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_PRNG_INLINES_H
 #define JEMALLOC_INTERNAL_PRNG_INLINES_H
 
+#include "jemalloc/internal/atomic.h"
+
 #ifndef JEMALLOC_ENABLE_INLINE
 uint32_t	prng_state_next_u32(uint32_t state);
 uint64_t	prng_state_next_u64(uint64_t state);
diff --git a/include/jemalloc/internal/rtree_structs.h b/include/jemalloc/internal/rtree_structs.h
index 123248ae..175a013c 100644
--- a/include/jemalloc/internal/rtree_structs.h
+++ b/include/jemalloc/internal/rtree_structs.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_RTREE_STRUCTS_H
 #define JEMALLOC_INTERNAL_RTREE_STRUCTS_H
 
+#include "jemalloc/internal/atomic.h"
+
 struct rtree_node_elm_s {
 	atomic_p_t	child; /* (rtree_{node,leaf}_elm_t *) */
 };
diff --git a/include/jemalloc/internal/stats_structs.h b/include/jemalloc/internal/stats_structs.h
index 75a4a783..dc994b52 100644
--- a/include/jemalloc/internal/stats_structs.h
+++ b/include/jemalloc/internal/stats_structs.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_STATS_STRUCTS_H
 #define JEMALLOC_INTERNAL_STATS_STRUCTS_H
 
+#include "jemalloc/internal/atomic.h"
+
 #ifdef JEMALLOC_ATOMIC_U64
 typedef atomic_u64_t arena_stats_u64_t;
 #else
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 27f9711c..77ee857c 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2,6 +2,8 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/atomic.h"
+
 /******************************************************************************/
 /* Data. */
 

From f35213bae4ee6294a0743607637f9be8989622f1 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 11 Apr 2017 14:56:43 -0700
Subject: [PATCH 0791/2608] Pass dalloc_ctx down the sdalloc path.

This avoids redundant rtree lookups.
---
 include/jemalloc/internal/arena_inlines_b.h   | 38 +++++++++++--------
 .../internal/jemalloc_internal_inlines_c.h    | 10 ++---
 src/arena.c                                   |  2 +-
 src/jemalloc.c                                | 13 ++++++-
 src/large.c                                   |  2 +-
 5 files changed, 41 insertions(+), 24 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 96889c1b..382289ee 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -19,7 +19,7 @@ void arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
     dalloc_ctx_t *dalloc_ctx, bool slow_path);
 void arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size);
 void arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
-    bool slow_path);
+    dalloc_ctx_t *dalloc_ctx, bool slow_path);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
@@ -293,7 +293,7 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
 
 JEMALLOC_ALWAYS_INLINE void
 arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
-    bool slow_path) {
+    dalloc_ctx_t *dalloc_ctx, bool slow_path) {
 	assert(!tsdn_null(tsdn) || tcache == NULL);
 	assert(ptr != NULL);
 	assert(size <= LARGE_MAXCLASS);
@@ -305,7 +305,22 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 
 	szind_t szind;
 	bool slab;
-	if (!config_prof || !opt_prof) {
+	UNUSED dalloc_ctx_t local_ctx;
+	if (config_prof && opt_prof) {
+		if (dalloc_ctx == NULL) {
+			/* Uncommon case and should be a static check. */
+			rtree_ctx_t rtree_ctx_fallback;
+			rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn,
+			    &rtree_ctx_fallback);
+			rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx,
+			    (uintptr_t)ptr, true, &local_ctx.szind,
+			    &local_ctx.slab);
+			assert(local_ctx.szind == size2index(size));
+			dalloc_ctx = &local_ctx;
+		}
+		slab = dalloc_ctx->slab;
+		szind = dalloc_ctx->szind;
+	} else {
 		/*
 		 * There is no risk of being confused by a promoted sampled
 		 * object, so base szind and slab on the given size.
@@ -314,21 +329,14 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 		slab = (szind < NBINS);
 	}
 
-	if ((config_prof && opt_prof) || config_debug) {
+	if (config_debug) {
 		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn));
-
 		rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx,
 		    (uintptr_t)ptr, true, &szind, &slab);
-
-		assert(szind == size2index(size));
-		assert((config_prof && opt_prof) || slab == (szind < NBINS));
-
-		if (config_debug) {
-			extent_t *extent = rtree_extent_read(tsdn,
-			    &extents_rtree, rtree_ctx, (uintptr_t)ptr, true);
-			assert(szind == extent_szind_get(extent));
-			assert(slab == extent_slab_get(extent));
-		}
+		extent_t *extent = rtree_extent_read(tsdn,
+		    &extents_rtree, rtree_ctx, (uintptr_t)ptr, true);
+		assert(szind == extent_szind_get(extent));
+		assert(slab == extent_slab_get(extent));
 	}
 
 	if (likely(slab)) {
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 05debd22..4fb34241 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -18,7 +18,7 @@ void idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
     dalloc_ctx_t *dalloc_ctx, bool is_internal, bool slow_path);
 void idalloc(tsd_t *tsd, void *ptr);
 void isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
-    bool slow_path);
+    dalloc_ctx_t *dalloc_ctx, bool slow_path);
 void *iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t extra, size_t alignment, bool zero, tcache_t *tcache,
     arena_t *arena);
@@ -129,10 +129,10 @@ idalloc(tsd_t *tsd, void *ptr) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-isdalloct(tsdn_t *tsdn, void *ptr, size_t size,
-    tcache_t *tcache, bool slow_path) {
+isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
+    dalloc_ctx_t *dalloc_ctx, bool slow_path) {
 	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
-	arena_sdalloc(tsdn, ptr, size, tcache, slow_path);
+	arena_sdalloc(tsdn, ptr, size, tcache, dalloc_ctx, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
@@ -168,7 +168,7 @@ iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 	 */
 	copysize = (size < oldsize) ? size : oldsize;
 	memcpy(p, ptr, copysize);
-	isdalloct(tsdn, ptr, oldsize, tcache, true);
+	isdalloct(tsdn, ptr, oldsize, tcache, NULL, true);
 	return p;
 }
 
diff --git a/src/arena.c b/src/arena.c
index 5d313e32..16728b34 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1752,7 +1752,7 @@ arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
 
 	size_t copysize = (usize < oldsize) ? usize : oldsize;
 	memcpy(ret, ptr, copysize);
-	isdalloct(tsdn, ptr, oldsize, tcache, true);
+	isdalloct(tsdn, ptr, oldsize, tcache, NULL, true);
 	return ret;
 }
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 77ee857c..e71949a1 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2083,17 +2083,26 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
+	dalloc_ctx_t dalloc_ctx, *ctx;
 	if (config_prof && opt_prof) {
+		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
+		rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
+		    (uintptr_t)ptr, true, &dalloc_ctx.szind, &dalloc_ctx.slab);
+		assert(dalloc_ctx.szind == size2index(usize));
 		prof_free(tsd, ptr, usize);
+		ctx = &dalloc_ctx;
+	} else {
+		ctx = NULL;
 	}
+
 	if (config_stats) {
 		*tsd_thread_deallocatedp_get(tsd) += usize;
 	}
 
 	if (likely(!slow_path)) {
-		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, false);
+		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, ctx, false);
 	} else {
-		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, true);
+		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, ctx, true);
 	}
 }
 
diff --git a/src/large.c b/src/large.c
index 18987c1a..3b53eb33 100644
--- a/src/large.c
+++ b/src/large.c
@@ -304,7 +304,7 @@ large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
 
 	size_t copysize = (usize < oldusize) ? usize : oldusize;
 	memcpy(ret, extent_addr_get(extent), copysize);
-	isdalloct(tsdn, extent_addr_get(extent), oldusize, tcache, true);
+	isdalloct(tsdn, extent_addr_get(extent), oldusize, tcache, NULL, true);
 	return ret;
 }
 

From ccfe68a916baecc50fd7eae3d5be945469016e4c Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 11 Apr 2017 18:13:10 -0700
Subject: [PATCH 0792/2608] Pass alloc_ctx down profiling path.

With this change, when profiling is enabled, we avoid doing redundant rtree
lookups. Also changed dalloc_atx_t to alloc_atx_t, as it's now used on
allocation path as well (to speed up profiling).
---
 include/jemalloc/internal/arena_inlines_b.h   | 63 ++++++++------
 include/jemalloc/internal/arena_structs_b.h   |  4 +-
 include/jemalloc/internal/arena_types.h       |  2 +-
 .../internal/jemalloc_internal_inlines_c.h    | 12 +--
 include/jemalloc/internal/prof_inlines_b.h    | 33 ++++----
 src/arena.c                                   | 11 ++-
 src/jemalloc.c                                | 84 ++++++++++++-------
 src/prof.c                                    |  2 +-
 test/unit/prof_tctx.c                         |  4 +-
 9 files changed, 133 insertions(+), 82 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 382289ee..8c76e0b0 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -3,9 +3,10 @@
 
 #ifndef JEMALLOC_ENABLE_INLINE
 szind_t arena_bin_index(arena_t *arena, arena_bin_t *bin);
-prof_tctx_t *arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr);
+prof_tctx_t *arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr,
+    alloc_ctx_t *ctx);
 void arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
-    prof_tctx_t *tctx);
+    alloc_ctx_t *ctx, prof_tctx_t *tctx);
 void arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx);
 void arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks);
 void arena_decay_tick(tsdn_t *tsdn, arena_t *arena);
@@ -16,10 +17,10 @@ size_t arena_salloc(tsdn_t *tsdn, const void *ptr);
 size_t arena_vsalloc(tsdn_t *tsdn, const void *ptr);
 void arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr);
 void arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
-    dalloc_ctx_t *dalloc_ctx, bool slow_path);
+    alloc_ctx_t *alloc_ctx, bool slow_path);
 void arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size);
 void arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
-    dalloc_ctx_t *dalloc_ctx, bool slow_path);
+    alloc_ctx_t *alloc_ctx, bool slow_path);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
@@ -30,27 +31,41 @@ arena_bin_index(arena_t *arena, arena_bin_t *bin) {
 	return binind;
 }
 
-JEMALLOC_INLINE prof_tctx_t *
-arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr) {
+JEMALLOC_ALWAYS_INLINE prof_tctx_t *
+arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	const extent_t *extent = iealloc(tsdn, ptr);
-	if (unlikely(!extent_slab_get(extent))) {
-		return large_prof_tctx_get(tsdn, extent);
+	/* Static check. */
+	if (alloc_ctx == NULL) {
+		const extent_t *extent = iealloc(tsdn, ptr);
+		if (unlikely(!extent_slab_get(extent))) {
+			return large_prof_tctx_get(tsdn, extent);
+		}
+	} else {
+		if (unlikely(!alloc_ctx->slab)) {
+			return large_prof_tctx_get(tsdn, iealloc(tsdn, ptr));
+		}
 	}
 	return (prof_tctx_t *)(uintptr_t)1U;
 }
 
-JEMALLOC_INLINE void
+JEMALLOC_ALWAYS_INLINE void
 arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
-    prof_tctx_t *tctx) {
+    alloc_ctx_t *alloc_ctx, prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	extent_t *extent = iealloc(tsdn, ptr);
-	if (unlikely(!extent_slab_get(extent))) {
-		large_prof_tctx_set(tsdn, extent, tctx);
+	/* Static check. */
+	if (alloc_ctx == NULL) {
+		extent_t *extent = iealloc(tsdn, ptr);
+		if (unlikely(!extent_slab_get(extent))) {
+			large_prof_tctx_set(tsdn, extent, tctx);
+		}
+	} else {
+		if (unlikely(!alloc_ctx->slab)) {
+			large_prof_tctx_set(tsdn, iealloc(tsdn, ptr), tctx);
+		}
 	}
 }
 
@@ -196,7 +211,7 @@ arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
 
 JEMALLOC_ALWAYS_INLINE void
 arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
-    dalloc_ctx_t *dalloc_ctx, bool slow_path) {
+    alloc_ctx_t *alloc_ctx, bool slow_path) {
 	assert(!tsdn_null(tsdn) || tcache == NULL);
 	assert(ptr != NULL);
 
@@ -208,9 +223,9 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 	szind_t szind;
 	bool slab;
 	rtree_ctx_t *rtree_ctx;
-	if (dalloc_ctx != NULL) {
-		szind = dalloc_ctx->szind;
-		slab = dalloc_ctx->slab;
+	if (alloc_ctx != NULL) {
+		szind = alloc_ctx->szind;
+		slab = alloc_ctx->slab;
 		assert(szind != NSIZES);
 	} else {
 		rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn));
@@ -293,7 +308,7 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
 
 JEMALLOC_ALWAYS_INLINE void
 arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
-    dalloc_ctx_t *dalloc_ctx, bool slow_path) {
+    alloc_ctx_t *alloc_ctx, bool slow_path) {
 	assert(!tsdn_null(tsdn) || tcache == NULL);
 	assert(ptr != NULL);
 	assert(size <= LARGE_MAXCLASS);
@@ -305,9 +320,9 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 
 	szind_t szind;
 	bool slab;
-	UNUSED dalloc_ctx_t local_ctx;
+	UNUSED alloc_ctx_t local_ctx;
 	if (config_prof && opt_prof) {
-		if (dalloc_ctx == NULL) {
+		if (alloc_ctx == NULL) {
 			/* Uncommon case and should be a static check. */
 			rtree_ctx_t rtree_ctx_fallback;
 			rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn,
@@ -316,10 +331,10 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 			    (uintptr_t)ptr, true, &local_ctx.szind,
 			    &local_ctx.slab);
 			assert(local_ctx.szind == size2index(size));
-			dalloc_ctx = &local_ctx;
+			alloc_ctx = &local_ctx;
 		}
-		slab = dalloc_ctx->slab;
-		szind = dalloc_ctx->szind;
+		slab = alloc_ctx->slab;
+		szind = alloc_ctx->szind;
 	} else {
 		/*
 		 * There is no risk of being confused by a promoted sampled
diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index 1f4fb6b1..1370b535 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -260,8 +260,8 @@ struct arena_tdata_s {
 	ticker_t		decay_ticker;
 };
 
-/* Used to pass rtree lookup context down the deallocation path. */
-struct dalloc_ctx_s {
+/* Used to pass rtree lookup context down the path. */
+struct alloc_ctx_s {
 	szind_t szind;
 	bool slab;
 };
diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h
index 435b930d..e243aabf 100644
--- a/include/jemalloc/internal/arena_types.h
+++ b/include/jemalloc/internal/arena_types.h
@@ -19,7 +19,7 @@ typedef struct arena_decay_s arena_decay_t;
 typedef struct arena_bin_s arena_bin_t;
 typedef struct arena_s arena_t;
 typedef struct arena_tdata_s arena_tdata_t;
-typedef struct dalloc_ctx_s dalloc_ctx_t;
+typedef struct alloc_ctx_s alloc_ctx_t;
 
 typedef enum {
 	percpu_arena_disabled = 0,
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 4fb34241..7884a206 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -15,10 +15,10 @@ void *ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
 void *ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero);
 size_t ivsalloc(tsdn_t *tsdn, const void *ptr);
 void idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
-    dalloc_ctx_t *dalloc_ctx, bool is_internal, bool slow_path);
+    alloc_ctx_t *alloc_ctx, bool is_internal, bool slow_path);
 void idalloc(tsd_t *tsd, void *ptr);
 void isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
-    dalloc_ctx_t *dalloc_ctx, bool slow_path);
+    alloc_ctx_t *alloc_ctx, bool slow_path);
 void *iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t extra, size_t alignment, bool zero, tcache_t *tcache,
     arena_t *arena);
@@ -107,7 +107,7 @@ ivsalloc(tsdn_t *tsdn, const void *ptr) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, dalloc_ctx_t *dalloc_ctx,
+idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, alloc_ctx_t *alloc_ctx,
     bool is_internal, bool slow_path) {
 	assert(ptr != NULL);
 	assert(!is_internal || tcache == NULL);
@@ -120,7 +120,7 @@ idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, dalloc_ctx_t *dalloc_ctx,
 	if (!is_internal && *tsd_reentrancy_levelp_get(tsdn_tsd(tsdn)) != 0) {
 		tcache = NULL;
 	}
-	arena_dalloc(tsdn, ptr, tcache, dalloc_ctx, slow_path);
+	arena_dalloc(tsdn, ptr, tcache, alloc_ctx, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -130,9 +130,9 @@ idalloc(tsd_t *tsd, void *ptr) {
 
 JEMALLOC_ALWAYS_INLINE void
 isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
-    dalloc_ctx_t *dalloc_ctx, bool slow_path) {
+    alloc_ctx_t *alloc_ctx, bool slow_path) {
 	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
-	arena_sdalloc(tsdn, ptr, size, tcache, dalloc_ctx, slow_path);
+	arena_sdalloc(tsdn, ptr, size, tcache, alloc_ctx, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 5ee72c53..6a79c01e 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -5,20 +5,22 @@
 bool	prof_active_get_unlocked(void);
 bool	prof_gdump_get_unlocked(void);
 prof_tdata_t	*prof_tdata_get(tsd_t *tsd, bool create);
-prof_tctx_t	*prof_tctx_get(tsdn_t *tsdn, const void *ptr);
+prof_tctx_t	*prof_tctx_get(tsdn_t *tsdn, const void *ptr,
+    alloc_ctx_t *alloc_ctx);
 void	prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
-    prof_tctx_t *tctx);
+    alloc_ctx_t *alloc_ctx, prof_tctx_t *tctx);
 void	prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx);
 bool	prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
     prof_tdata_t **tdata_out);
 prof_tctx_t	*prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active,
     bool update);
 void	prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize,
-    prof_tctx_t *tctx);
+    alloc_ctx_t *alloc_ctx, prof_tctx_t *tctx);
 void	prof_realloc(tsd_t *tsd, const void *ptr, size_t usize,
     prof_tctx_t *tctx, bool prof_active, bool updated, const void *old_ptr,
     size_t old_usize, prof_tctx_t *old_tctx);
-void	prof_free(tsd_t *tsd, const void *ptr, size_t usize);
+void	prof_free(tsd_t *tsd, const void *ptr, size_t usize,
+    alloc_ctx_t *alloc_ctx);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
@@ -67,19 +69,20 @@ prof_tdata_get(tsd_t *tsd, bool create) {
 }
 
 JEMALLOC_ALWAYS_INLINE prof_tctx_t *
-prof_tctx_get(tsdn_t *tsdn, const void *ptr) {
+prof_tctx_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	return arena_prof_tctx_get(tsdn, ptr);
+	return arena_prof_tctx_get(tsdn, ptr, alloc_ctx);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx) {
+prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
+    alloc_ctx_t *alloc_ctx, prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	arena_prof_tctx_set(tsdn, ptr, usize, tctx);
+	arena_prof_tctx_set(tsdn, ptr, usize, alloc_ctx, tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -145,7 +148,8 @@ prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx) {
+prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx,
+    prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 	assert(usize == isalloc(tsdn, ptr));
@@ -153,7 +157,8 @@ prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize, prof_tctx_t *tctx) {
 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) {
 		prof_malloc_sample_object(tsdn, ptr, usize, tctx);
 	} else {
-		prof_tctx_set(tsdn, ptr, usize, (prof_tctx_t *)(uintptr_t)1U);
+		prof_tctx_set(tsdn, ptr, usize, alloc_ctx,
+		    (prof_tctx_t *)(uintptr_t)1U);
 	}
 }
 
@@ -188,7 +193,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 	if (unlikely(sampled)) {
 		prof_malloc_sample_object(tsd_tsdn(tsd), ptr, usize, tctx);
 	} else if (moved) {
-		prof_tctx_set(tsd_tsdn(tsd), ptr, usize,
+		prof_tctx_set(tsd_tsdn(tsd), ptr, usize, NULL,
 		    (prof_tctx_t *)(uintptr_t)1U);
 	} else if (unlikely(old_sampled)) {
 		/*
@@ -199,7 +204,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 		 */
 		prof_tctx_reset(tsd_tsdn(tsd), ptr, tctx);
 	} else {
-		assert((uintptr_t)prof_tctx_get(tsd_tsdn(tsd), ptr) ==
+		assert((uintptr_t)prof_tctx_get(tsd_tsdn(tsd), ptr, NULL) ==
 		    (uintptr_t)1U);
 	}
 
@@ -216,8 +221,8 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_free(tsd_t *tsd, const void *ptr, size_t usize) {
-	prof_tctx_t *tctx = prof_tctx_get(tsd_tsdn(tsd), ptr);
+prof_free(tsd_t *tsd, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx) {
+	prof_tctx_t *tctx = prof_tctx_get(tsd_tsdn(tsd), ptr, alloc_ctx);
 
 	cassert(config_prof);
 	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
diff --git a/src/arena.c b/src/arena.c
index 16728b34..4f5dcf6e 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1064,12 +1064,19 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 		size_t usize;
 
 		malloc_mutex_unlock(tsd_tsdn(tsd), &arena->large_mtx);
+		alloc_ctx_t alloc_ctx;
+		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
+		rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
+		    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
+		assert(alloc_ctx.szind != NSIZES);
+
 		if (config_stats || (config_prof && opt_prof)) {
-			usize = isalloc(tsd_tsdn(tsd), ptr);
+			usize = index2size(alloc_ctx.szind);
+			assert(usize == isalloc(tsd_tsdn(tsd), ptr));
 		}
 		/* Remove large allocation from prof sample set. */
 		if (config_prof && opt_prof) {
-			prof_free(tsd, ptr, usize);
+			prof_free(tsd, ptr, usize, &alloc_ctx);
 		}
 		large_dalloc(tsd_tsdn(tsd), extent);
 		malloc_mutex_lock(tsd_tsdn(tsd), &arena->large_mtx);
diff --git a/src/jemalloc.c b/src/jemalloc.c
index e71949a1..fb164ee9 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1749,7 +1749,14 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts) {
 		 */
 		prof_tctx_t *tctx = prof_alloc_prep(
 		    tsd, usize, prof_active_get_unlocked(), true);
+
+		alloc_ctx_t alloc_ctx;
 		if (likely((uintptr_t)tctx == (uintptr_t)1U)) {
+			if (usize > SMALL_MAXCLASS) {
+				alloc_ctx.slab = false;
+			} else {
+				alloc_ctx.slab = true;
+			}
 			allocation = imalloc_no_sample(
 			    sopts, dopts, tsd, usize, usize, ind);
 		} else if ((uintptr_t)tctx > (uintptr_t)1U) {
@@ -1759,6 +1766,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts) {
 			 */
 			allocation = imalloc_sample(
 			    sopts, dopts, tsd, usize, ind);
+			alloc_ctx.slab = false;
 		} else {
 			allocation = NULL;
 		}
@@ -1767,9 +1775,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts) {
 			prof_alloc_rollback(tsd, tctx, true);
 			goto label_oom;
 		}
-
-		prof_malloc(tsd_tsdn(tsd), allocation, usize, tctx);
-
+		prof_malloc(tsd_tsdn(tsd), allocation, usize, &alloc_ctx, tctx);
 	} else {
 		/*
 		 * If dopts->alignment > 0, then ind is still 0, but usize was
@@ -2016,13 +2022,14 @@ irealloc_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize,
 }
 
 JEMALLOC_ALWAYS_INLINE_C void *
-irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize) {
+irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize,
+   alloc_ctx_t *alloc_ctx) {
 	void *p;
 	bool prof_active;
 	prof_tctx_t *old_tctx, *tctx;
 
 	prof_active = prof_active_get_unlocked();
-	old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_ptr);
+	old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_ptr, alloc_ctx);
 	tctx = prof_alloc_prep(tsd, usize, prof_active, true);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
 		p = irealloc_prof_sample(tsd, old_ptr, old_usize, usize, tctx);
@@ -2048,28 +2055,28 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
-	dalloc_ctx_t dalloc_ctx;
+	alloc_ctx_t alloc_ctx;
 	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
 	rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
-	    (uintptr_t)ptr, true, &dalloc_ctx.szind, &dalloc_ctx.slab);
-	assert(dalloc_ctx.szind != NSIZES);
+	    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
+	assert(alloc_ctx.szind != NSIZES);
 
 	size_t usize;
 	if (config_prof && opt_prof) {
-		usize = index2size(dalloc_ctx.szind);
-		prof_free(tsd, ptr, usize);
+		usize = index2size(alloc_ctx.szind);
+		prof_free(tsd, ptr, usize, &alloc_ctx);
 	} else if (config_stats) {
-		usize = index2size(dalloc_ctx.szind);
+		usize = index2size(alloc_ctx.szind);
 	}
 	if (config_stats) {
 		*tsd_thread_deallocatedp_get(tsd) += usize;
 	}
 
 	if (likely(!slow_path)) {
-		idalloctm(tsd_tsdn(tsd), ptr, tcache, &dalloc_ctx, false,
+		idalloctm(tsd_tsdn(tsd), ptr, tcache, &alloc_ctx, false,
 		    false);
 	} else {
-		idalloctm(tsd_tsdn(tsd), ptr, tcache, &dalloc_ctx, false,
+		idalloctm(tsd_tsdn(tsd), ptr, tcache, &alloc_ctx, false,
 		    true);
 	}
 }
@@ -2083,14 +2090,14 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
-	dalloc_ctx_t dalloc_ctx, *ctx;
+	alloc_ctx_t alloc_ctx, *ctx;
 	if (config_prof && opt_prof) {
 		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
 		rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
-		    (uintptr_t)ptr, true, &dalloc_ctx.szind, &dalloc_ctx.slab);
-		assert(dalloc_ctx.szind == size2index(usize));
-		prof_free(tsd, ptr, usize);
-		ctx = &dalloc_ctx;
+		    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
+		assert(alloc_ctx.szind == size2index(usize));
+		ctx = &alloc_ctx;
+		prof_free(tsd, ptr, usize, ctx);
 	} else {
 		ctx = NULL;
 	}
@@ -2138,11 +2145,18 @@ je_realloc(void *ptr, size_t size) {
 
 		witness_assert_lockless(tsd_tsdn(tsd));
 
-		old_usize = isalloc(tsd_tsdn(tsd), ptr);
+		alloc_ctx_t alloc_ctx;
+		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
+		rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
+		    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
+		assert(alloc_ctx.szind != NSIZES);
+		old_usize = index2size(alloc_ctx.szind);
+		assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
 		if (config_prof && opt_prof) {
 			usize = s2u(size);
 			ret = unlikely(usize == 0 || usize > LARGE_MAXCLASS) ?
-			    NULL : irealloc_prof(tsd, ptr, old_usize, usize);
+			    NULL : irealloc_prof(tsd, ptr, old_usize, usize,
+			    &alloc_ctx);
 		} else {
 			if (config_stats) {
 				usize = s2u(size);
@@ -2398,13 +2412,13 @@ irallocx_prof_sample(tsdn_t *tsdn, void *old_ptr, size_t old_usize,
 JEMALLOC_ALWAYS_INLINE_C void *
 irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
     size_t alignment, size_t *usize, bool zero, tcache_t *tcache,
-    arena_t *arena) {
+    arena_t *arena, alloc_ctx_t *alloc_ctx) {
 	void *p;
 	bool prof_active;
 	prof_tctx_t *old_tctx, *tctx;
 
 	prof_active = prof_active_get_unlocked();
-	old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_ptr);
+	old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_ptr, alloc_ctx);
 	tctx = prof_alloc_prep(tsd, *usize, prof_active, false);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
 		p = irallocx_prof_sample(tsd_tsdn(tsd), old_ptr, old_usize,
@@ -2474,15 +2488,20 @@ je_rallocx(void *ptr, size_t size, int flags) {
 		tcache = tcache_get(tsd);
 	}
 
-	old_usize = isalloc(tsd_tsdn(tsd), ptr);
-
+	alloc_ctx_t alloc_ctx;
+	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
+	rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
+	    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
+	assert(alloc_ctx.szind != NSIZES);
+	old_usize = index2size(alloc_ctx.szind);
+	assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
 	if (config_prof && opt_prof) {
 		usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment);
 		if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
 			goto label_oom;
 		}
 		p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize,
-		    zero, tcache, arena);
+		    zero, tcache, arena, &alloc_ctx);
 		if (unlikely(p == NULL)) {
 			goto label_oom;
 		}
@@ -2544,13 +2563,13 @@ ixallocx_prof_sample(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
 
 JEMALLOC_ALWAYS_INLINE_C size_t
 ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
-    size_t extra, size_t alignment, bool zero) {
+    size_t extra, size_t alignment, bool zero, alloc_ctx_t *alloc_ctx) {
 	size_t usize_max, usize;
 	bool prof_active;
 	prof_tctx_t *old_tctx, *tctx;
 
 	prof_active = prof_active_get_unlocked();
-	old_tctx = prof_tctx_get(tsd_tsdn(tsd), ptr);
+	old_tctx = prof_tctx_get(tsd_tsdn(tsd), ptr, alloc_ctx);
 	/*
 	 * usize isn't knowable before ixalloc() returns when extra is non-zero.
 	 * Therefore, compute its maximum possible value and use that in
@@ -2605,8 +2624,13 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	tsd = tsd_fetch();
 	witness_assert_lockless(tsd_tsdn(tsd));
 
-	old_usize = isalloc(tsd_tsdn(tsd), ptr);
-
+	alloc_ctx_t alloc_ctx;
+	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
+	rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
+	    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
+	assert(alloc_ctx.szind != NSIZES);
+	old_usize = index2size(alloc_ctx.szind);
+	assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
 	/*
 	 * The API explicitly absolves itself of protecting against (size +
 	 * extra) numerical overflow, but we may need to clamp extra to avoid
@@ -2626,7 +2650,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 
 	if (config_prof && opt_prof) {
 		usize = ixallocx_prof(tsd, ptr, old_usize, size, extra,
-		    alignment, zero);
+		    alignment, zero, &alloc_ctx);
 	} else {
 		usize = ixallocx_helper(tsd_tsdn(tsd), ptr, old_usize, size,
 		    extra, alignment, zero);
diff --git a/src/prof.c b/src/prof.c
index 1844c2f3..334466b1 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -234,7 +234,7 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
 void
 prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
     prof_tctx_t *tctx) {
-	prof_tctx_set(tsdn, ptr, usize, tctx);
+	prof_tctx_set(tsdn, ptr, usize, NULL, tctx);
 
 	malloc_mutex_lock(tsdn, tctx->tdata->lock);
 	tctx->cnts.curobjs++;
diff --git a/test/unit/prof_tctx.c b/test/unit/prof_tctx.c
index 30c6b178..ff3b2b0c 100644
--- a/test/unit/prof_tctx.c
+++ b/test/unit/prof_tctx.c
@@ -15,7 +15,7 @@ TEST_BEGIN(test_prof_realloc) {
 	prof_cnt_all(&curobjs_0, NULL, NULL, NULL);
 	p = mallocx(1024, flags);
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
-	tctx_p = prof_tctx_get(tsdn, p);
+	tctx_p = prof_tctx_get(tsdn, p, NULL);
 	assert_ptr_ne(tctx_p, (prof_tctx_t *)(uintptr_t)1U,
 	    "Expected valid tctx");
 	prof_cnt_all(&curobjs_1, NULL, NULL, NULL);
@@ -25,7 +25,7 @@ TEST_BEGIN(test_prof_realloc) {
 	q = rallocx(p, 2048, flags);
 	assert_ptr_ne(p, q, "Expected move");
 	assert_ptr_not_null(p, "Unexpected rmallocx() failure");
-	tctx_q = prof_tctx_get(tsdn, q);
+	tctx_q = prof_tctx_get(tsdn, q, NULL);
 	assert_ptr_ne(tctx_q, (prof_tctx_t *)(uintptr_t)1U,
 	    "Expected valid tctx");
 	prof_cnt_all(&curobjs_2, NULL, NULL, NULL);

From b348ba29bb94b6e9da8dcea1105d4614556aceb9 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 11 Apr 2017 23:13:45 -0700
Subject: [PATCH 0793/2608] Bundle 3 branches on fast path into tsd_state.

Added tsd_state_nominal_slow, which on fast path malloc() incorporates
tcache_enabled check, and on fast path free() bundles both malloc_slow and
tcache_enabled branches.
---
 .../internal/jemalloc_internal_externs.h      |   4 +
 include/jemalloc/internal/private_symbols.txt |   3 +
 include/jemalloc/internal/tcache_inlines.h    |   1 +
 include/jemalloc/internal/tsd_externs.h       |   2 +
 include/jemalloc/internal/tsd_inlines.h       |  83 +++++++-------
 include/jemalloc/internal/tsd_structs.h       |   2 +-
 include/jemalloc/internal/tsd_types.h         |  10 +-
 src/jemalloc.c                                | 101 +++++++++++-------
 src/tcache.c                                  |   2 +
 src/tsd.c                                     |  40 ++++++-
 10 files changed, 170 insertions(+), 78 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 56d39d48..45c119f8 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -3,6 +3,10 @@
 
 #include "jemalloc/internal/atomic.h"
 
+/* TSD checks this to set thread local slow state accordingly. */
+extern bool	malloc_slow;
+
+/* Run-time options. */
 extern bool	opt_abort;
 extern const char	*opt_junk;
 extern bool	opt_junk_alloc;
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 4931d489..c1573aa6 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -289,6 +289,7 @@ malloc_mutex_postfork_parent
 malloc_mutex_prefork
 malloc_mutex_unlock
 malloc_printf
+malloc_slow
 malloc_snprintf
 malloc_strtoumax
 malloc_tsd_boot0
@@ -526,6 +527,7 @@ tsd_cleanup
 tsd_cleanup_wrapper
 tsd_fetch
 tsd_fetch_impl
+tsd_fetch_slow
 tsd_get
 tsd_get_allocates
 tsd_iarena_get
@@ -541,6 +543,7 @@ tsd_narenas_tdatap_get
 tsd_reentrancy_level_get
 tsd_reentrancy_level_set
 tsd_reentrancy_levelp_get
+tsd_slow_update
 tsd_wrapper_get
 tsd_wrapper_set
 tsd_nominal
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index dae43f99..ea29f350 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -40,6 +40,7 @@ tcache_enabled_set(tsd_t *tsd, bool enabled) {
 	}
 	/* Commit the state last.  Above calls check current state. */
 	tsd_tcache_enabled_set(tsd, enabled);
+	tsd_slow_update(tsd);
 }
 
 JEMALLOC_ALWAYS_INLINE void
diff --git a/include/jemalloc/internal/tsd_externs.h b/include/jemalloc/internal/tsd_externs.h
index d15fd591..6b9dfdc6 100644
--- a/include/jemalloc/internal/tsd_externs.h
+++ b/include/jemalloc/internal/tsd_externs.h
@@ -14,5 +14,7 @@ void	tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block);
 #endif
 bool tsd_data_init(void *arg);
 void tsd_cleanup(void *arg);
+tsd_t *tsd_fetch_slow(tsd_t *tsd);
+void tsd_slow_update(tsd_t *tsd);
 
 #endif /* JEMALLOC_INTERNAL_TSD_EXTERNS_H */
diff --git a/include/jemalloc/internal/tsd_inlines.h b/include/jemalloc/internal/tsd_inlines.h
index 7c3fba5f..46eefb6e 100644
--- a/include/jemalloc/internal/tsd_inlines.h
+++ b/include/jemalloc/internal/tsd_inlines.h
@@ -19,12 +19,54 @@ bool tsdn_null(const tsdn_t *tsdn);
 tsd_t *tsdn_tsd(tsdn_t *tsdn);
 rtree_ctx_t *tsd_rtree_ctx(tsd_t *tsd);
 rtree_ctx_t *tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback);
+bool tsd_fast(tsd_t *tsd);
+void tsd_assert_fast(tsd_t *tsd);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TSD_C_))
 malloc_tsd_externs(, tsd_t)
 malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, , tsd_t, tsd_initializer, tsd_cleanup)
 
+#define MALLOC_TSD_getset_yes(n, t)					\
+JEMALLOC_ALWAYS_INLINE t						\
+tsd_##n##_get(tsd_t *tsd) {						\
+	return *tsd_##n##p_get(tsd);					\
+}									\
+JEMALLOC_ALWAYS_INLINE void						\
+tsd_##n##_set(tsd_t *tsd, t n) {					\
+	assert(tsd->state == tsd_state_nominal ||			\
+	    tsd->state == tsd_state_nominal_slow ||			\
+	    tsd->state == tsd_state_reincarnated);			\
+	tsd->n = n;							\
+}
+#define MALLOC_TSD_getset_no(n, t)
+#define O(n, t, gs, i, c)						\
+JEMALLOC_ALWAYS_INLINE t *						\
+tsd_##n##p_get(tsd_t *tsd) {						\
+	return &tsd->n;							\
+}									\
+									\
+MALLOC_TSD_getset_##gs(n, t)
+MALLOC_TSD
+#undef MALLOC_TSD_getset_yes
+#undef MALLOC_TSD_getset_no
+#undef O
+
+JEMALLOC_ALWAYS_INLINE void
+tsd_assert_fast(tsd_t *tsd) {
+	assert(!malloc_slow && tsd_tcache_enabled_get(tsd));
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_fast(tsd_t *tsd) {
+	bool fast = (tsd->state == tsd_state_nominal);
+	if (fast) {
+		tsd_assert_fast(tsd);
+	}
+
+	return fast;
+}
+
 JEMALLOC_ALWAYS_INLINE tsd_t *
 tsd_fetch_impl(bool init) {
 	tsd_t *tsd = tsd_get(init);
@@ -35,19 +77,10 @@ tsd_fetch_impl(bool init) {
 	assert(tsd != NULL);
 
 	if (unlikely(tsd->state != tsd_state_nominal)) {
-		if (tsd->state == tsd_state_uninitialized) {
-			tsd->state = tsd_state_nominal;
-			/* Trigger cleanup handler registration. */
-			tsd_set(tsd);
-			tsd_data_init(tsd);
-		} else if (tsd->state == tsd_state_purgatory) {
-			tsd->state = tsd_state_reincarnated;
-			tsd_set(tsd);
-			tsd_data_init(tsd);
-		} else {
-			assert(tsd->state == tsd_state_reincarnated);
-		}
+		return tsd_fetch_slow(tsd);
 	}
+	assert(tsd_fast(tsd));
+	tsd_assert_fast(tsd);
 
 	return tsd;
 }
@@ -64,33 +97,9 @@ tsd_tsdn(tsd_t *tsd) {
 
 JEMALLOC_INLINE bool
 tsd_nominal(tsd_t *tsd) {
-	return (tsd->state == tsd_state_nominal);
+	return (tsd->state <= tsd_state_nominal_max);
 }
 
-#define MALLOC_TSD_getset_yes(n, t)					\
-JEMALLOC_ALWAYS_INLINE t						\
-tsd_##n##_get(tsd_t *tsd) {						\
-	return *tsd_##n##p_get(tsd);					\
-}									\
-JEMALLOC_ALWAYS_INLINE void						\
-tsd_##n##_set(tsd_t *tsd, t n) {					\
-	assert(tsd->state == tsd_state_nominal ||			\
-	    tsd->state == tsd_state_reincarnated);			\
-	tsd->n = n;							\
-}
-#define MALLOC_TSD_getset_no(n, t)
-#define O(n, t, gs, i, c)						\
-JEMALLOC_ALWAYS_INLINE t *						\
-tsd_##n##p_get(tsd_t *tsd) {						\
-	return &tsd->n;							\
-}									\
-									\
-MALLOC_TSD_getset_##gs(n, t)
-MALLOC_TSD
-#undef MALLOC_TSD_getset_yes
-#undef MALLOC_TSD_getset_no
-#undef O
-
 JEMALLOC_ALWAYS_INLINE tsdn_t *
 tsdn_fetch(void) {
 	if (!tsd_booted_get()) {
diff --git a/include/jemalloc/internal/tsd_structs.h b/include/jemalloc/internal/tsd_structs.h
index ac74152c..c166fe6b 100644
--- a/include/jemalloc/internal/tsd_structs.h
+++ b/include/jemalloc/internal/tsd_structs.h
@@ -64,7 +64,7 @@ struct tsd_init_head_s {
     O(iarena,			arena_t *,	yes,	no,	yes)	\
     O(arena,			arena_t *,	yes,	no,	yes)	\
     O(arenas_tdata,		arena_tdata_t *,yes,	no,	yes)	\
-    O(tcache,			tcache_t,	yes,	no,	yes)	\
+    O(tcache,			tcache_t,	no,	no,	yes)	\
     O(witnesses,		witness_list_t,	no,	no,	yes)	\
     O(rtree_leaf_elm_witnesses,	rtree_leaf_elm_witness_tsd_t,		\
 						no,	no,	no)	\
diff --git a/include/jemalloc/internal/tsd_types.h b/include/jemalloc/internal/tsd_types.h
index 27afd1d6..dc9efbb6 100644
--- a/include/jemalloc/internal/tsd_types.h
+++ b/include/jemalloc/internal/tsd_types.h
@@ -20,11 +20,15 @@ typedef struct tsdn_s tsdn_t;
 #define TSDN_NULL	((tsdn_t *)0)
 
 enum {
-	tsd_state_uninitialized = 0,
-	tsd_state_nominal = 1,
+	tsd_state_nominal = 0, /* Common case --> jnz. */
+	tsd_state_nominal_slow = 1, /* Initialized but on slow path. */
+	/* the above 2 nominal states should be lower values. */
+	tsd_state_nominal_max = 1, /* used for comparison only. */
 	tsd_state_purgatory = 2,
-	tsd_state_reincarnated = 3
+	tsd_state_reincarnated = 3,
+	tsd_state_uninitialized = 4
 };
+
 /* Manually limit tsd_state_t to a single byte. */
 typedef uint8_t tsd_state_t;
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index fb164ee9..4bec2dea 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -76,7 +76,7 @@ typedef enum {
 static malloc_init_t	malloc_init_state = malloc_init_uninitialized;
 
 /* False should be the common case.  Set to true to trigger initialization. */
-static bool	malloc_slow = true;
+bool			malloc_slow = true;
 
 /* When malloc_slow is true, set the corresponding bits for sanity check. */
 enum {
@@ -1539,7 +1539,13 @@ imalloc_no_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
 
 	/* Fill in the tcache. */
 	if (dopts->tcache_ind == TCACHE_IND_AUTOMATIC) {
-		tcache = tcache_get(tsd);
+		if (likely(!sopts->slow)) {
+			/* Getting tcache ptr unconditionally. */
+			tcache = tsd_tcachep_get(tsd);
+			assert(tcache == tcache_get(tsd));
+		} else {
+			tcache = tcache_get(tsd);
+		}
 	} else if (dopts->tcache_ind == TCACHE_IND_NONE) {
 		tcache = NULL;
 	} else {
@@ -1640,13 +1646,11 @@ compute_size_with_overflow(bool may_overflow, dynamic_opts_t *dopts,
 }
 
 JEMALLOC_ALWAYS_INLINE_C int
-imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts) {
+imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 	/* Where the actual allocated memory will live. */
 	void *allocation = NULL;
 	/* Filled in by compute_size_with_overflow below. */
 	size_t size = 0;
-	/* We compute a value for this right before allocating. */
-	tsd_t *tsd = NULL;
 	/*
 	 * For unaligned allocations, we need only ind.  For aligned
 	 * allocations, or in case of stats or profiling we need usize.
@@ -1667,13 +1671,6 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts) {
 	 */
 	int8_t *reentrancy_level = NULL;
 
-	/* Initialize (if we can't prove we don't have to). */
-	if (sopts->slow) {
-		if (unlikely(malloc_init())) {
-			goto label_oom;
-		}
-	}
-
 	/* Compute the amount of memory the user wants. */
 	if (unlikely(compute_size_with_overflow(sopts->may_overflow, dopts,
 	    &size))) {
@@ -1714,11 +1711,6 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts) {
 		}
 	}
 
-	/*
-	 * We always need the tsd, even if we aren't going to use the tcache for
-	 * some reason.  Let's grab it right away.
-	 */
-	tsd = tsd_fetch();
 
 	/*
 	 * If we need to handle reentrancy, we can do it out of a
@@ -1752,11 +1744,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts) {
 
 		alloc_ctx_t alloc_ctx;
 		if (likely((uintptr_t)tctx == (uintptr_t)1U)) {
-			if (usize > SMALL_MAXCLASS) {
-				alloc_ctx.slab = false;
-			} else {
-				alloc_ctx.slab = true;
-			}
+			alloc_ctx.slab = (usize <= SMALL_MAXCLASS);
 			allocation = imalloc_no_sample(
 			    sopts, dopts, tsd, usize, usize, ind);
 		} else if ((uintptr_t)tctx > (uintptr_t)1U) {
@@ -1879,12 +1867,29 @@ label_invalid_alignment:
 /* Returns the errno-style error code of the allocation. */
 JEMALLOC_ALWAYS_INLINE_C int
 imalloc(static_opts_t *sopts, dynamic_opts_t *dopts) {
-	if (unlikely(malloc_slow)) {
-		sopts->slow = true;
-		return imalloc_body(sopts, dopts);
-	} else {
+	if (unlikely(!malloc_initialized()) && unlikely(malloc_init())) {
+		if (config_xmalloc && unlikely(opt_xmalloc)) {
+			malloc_write(sopts->oom_string);
+			abort();
+		}
+		UTRACE(NULL, size, NULL);
+		set_errno(ENOMEM);
+		*dopts->result = NULL;
+
+		return ENOMEM;
+	}
+
+	/* We always need the tsd.  Let's grab it right away. */
+	tsd_t *tsd = tsd_fetch();
+	assert(tsd);
+	if (likely(tsd_fast(tsd))) {
+		/* Fast and common path. */
+		tsd_assert_fast(tsd);
 		sopts->slow = false;
-		return imalloc_body(sopts, dopts);
+		return imalloc_body(sopts, dopts, tsd);
+	} else {
+		sopts->slow = true;
+		return imalloc_body(sopts, dopts, tsd);
 	}
 }
 /******************************************************************************/
@@ -2198,13 +2203,23 @@ je_free(void *ptr) {
 		if (*tsd_reentrancy_levelp_get(tsd) == 0) {
 			witness_assert_lockless(tsd_tsdn(tsd));
 		}
-		tcache_t *tcache = NULL;
-		if (likely(*tsd_reentrancy_levelp_get(tsd) == 0)) {
-			tcache = tcache_get(tsd);
-		}
-		if (likely(!malloc_slow)) {
+		tcache_t *tcache;
+		if (likely(tsd_fast(tsd))) {
+			tsd_assert_fast(tsd);
+			if (likely(*tsd_reentrancy_levelp_get(tsd) == 0)) {
+				/* Getting tcache ptr unconditionally. */
+				tcache = tsd_tcachep_get(tsd);
+				assert(tcache == tcache_get(tsd));
+			} else {
+				tcache = NULL;
+			}
 			ifree(tsd, ptr, tcache, false);
 		} else {
+			if (likely(*tsd_reentrancy_levelp_get(tsd) == 0)) {
+				tcache = tcache_get(tsd);
+			} else {
+				tcache = NULL;
+			}
 			ifree(tsd, ptr, tcache, true);
 		}
 		if (*tsd_reentrancy_levelp_get(tsd) == 0) {
@@ -2699,6 +2714,7 @@ je_dallocx(void *ptr, int flags) {
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	tsd = tsd_fetch();
+	bool fast = tsd_fast(tsd);
 	witness_assert_lockless(tsd_tsdn(tsd));
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
 		/* Not allowed to be reentrant and specify a custom tcache. */
@@ -2710,14 +2726,20 @@ je_dallocx(void *ptr, int flags) {
 		}
 	} else {
 		if (likely(*tsd_reentrancy_levelp_get(tsd) == 0)) {
-			tcache = tcache_get(tsd);
+			if (likely(fast)) {
+				tcache = tsd_tcachep_get(tsd);
+				assert(tcache == tcache_get(tsd));
+			} else {
+				tcache = tcache_get(tsd);
+			}
 		} else {
 			tcache = NULL;
 		}
 	}
 
 	UTRACE(ptr, 0, 0);
-	if (likely(!malloc_slow)) {
+	if (likely(fast)) {
+		tsd_assert_fast(tsd);
 		ifree(tsd, ptr, tcache, false);
 	} else {
 		ifree(tsd, ptr, tcache, true);
@@ -2749,6 +2771,7 @@ je_sdallocx(void *ptr, size_t size, int flags) {
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 	tsd = tsd_fetch();
+	bool fast = tsd_fast(tsd);
 	usize = inallocx(tsd_tsdn(tsd), size, flags);
 	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
 
@@ -2763,14 +2786,20 @@ je_sdallocx(void *ptr, size_t size, int flags) {
 		}
 	} else {
 		if (likely(*tsd_reentrancy_levelp_get(tsd) == 0)) {
-			tcache = tcache_get(tsd);
+			if (likely(fast)) {
+				tcache = tsd_tcachep_get(tsd);
+				assert(tcache == tcache_get(tsd));
+			} else {
+				tcache = tcache_get(tsd);
+			}
 		} else {
 			tcache = NULL;
 		}
 	}
 
 	UTRACE(ptr, 0, 0);
-	if (likely(!malloc_slow)) {
+	if (likely(fast)) {
+		tsd_assert_fast(tsd);
 		isfree(tsd, ptr, usize, tcache, false);
 	} else {
 		isfree(tsd, ptr, usize, tcache, true);
diff --git a/src/tcache.c b/src/tcache.c
index 99749fbc..7e71bb6a 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -334,6 +334,8 @@ bool
 tsd_tcache_enabled_data_init(tsd_t *tsd) {
 	/* Called upon tsd initialization. */
 	tsd_tcache_enabled_set(tsd, opt_tcache);
+	tsd_slow_update(tsd);
+
 	if (opt_tcache) {
 		/* Trigger tcache init. */
 		tsd_tcache_data_init(tsd);
diff --git a/src/tsd.c b/src/tsd.c
index 3e72548c..bdd3f3c5 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -12,6 +12,40 @@ malloc_tsd_data(, , tsd_t, TSD_INITIALIZER)
 
 /******************************************************************************/
 
+void
+tsd_slow_update(tsd_t *tsd) {
+	if (tsd_nominal(tsd)) {
+		if (malloc_slow || !tsd->tcache_enabled) {
+			tsd->state = tsd_state_nominal_slow;
+		} else {
+			tsd->state = tsd_state_nominal;
+		}
+	}
+}
+
+tsd_t *
+tsd_fetch_slow(tsd_t *tsd) {
+	if (tsd->state == tsd_state_nominal_slow) {
+		/* On slow path but no work needed. */
+		assert(malloc_slow || !tsd_tcache_enabled_get(tsd) ||
+		    *tsd_arenas_tdata_bypassp_get(tsd));
+	} else if (tsd->state == tsd_state_uninitialized) {
+		tsd->state = tsd_state_nominal;
+		tsd_slow_update(tsd);
+		/* Trigger cleanup handler registration. */
+		tsd_set(tsd);
+		tsd_data_init(tsd);
+	} else if (tsd->state == tsd_state_purgatory) {
+		tsd->state = tsd_state_reincarnated;
+		tsd_set(tsd);
+		tsd_data_init(tsd);
+	} else {
+		assert(tsd->state == tsd_state_reincarnated);
+	}
+
+	return tsd;
+}
+
 void *
 malloc_tsd_malloc(size_t size) {
 	return a0malloc(CACHELINE_CEILING(size));
@@ -82,6 +116,7 @@ tsd_cleanup(void *arg) {
 		/* Do nothing. */
 		break;
 	case tsd_state_nominal:
+	case tsd_state_nominal_slow:
 	case tsd_state_reincarnated:
 		/*
 		 * Reincarnated means another destructor deallocated memory
@@ -129,7 +164,10 @@ malloc_tsd_boot0(void) {
 void
 malloc_tsd_boot1(void) {
 	tsd_boot1();
-	*tsd_arenas_tdata_bypassp_get(tsd_fetch()) = false;
+	tsd_t *tsd = tsd_fetch();
+	/* malloc_slow has been set properly.  Update tsd_slow. */
+	tsd_slow_update(tsd);
+	*tsd_arenas_tdata_bypassp_get(tsd) = false;
 }
 
 #ifdef _WIN32

From c2fcf9c2cfcbaba58db1941c91c7a8a4b6623401 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 12 Apr 2017 16:16:27 -0700
Subject: [PATCH 0794/2608] Switch to fine-grained reentrancy support.

Previously we had a general detection and support of reentrancy, at the cost of
having branches and inc / dec operations on fast paths.  To avoid taxing fast
paths, we move the reentrancy operations onto tsd slow state, and only modify
reentrancy level around external calls (that might trigger reentrancy).
---
 .../internal/jemalloc_internal_inlines_a.h    |  23 ++++
 .../internal/jemalloc_internal_inlines_b.h    |   2 +-
 .../internal/jemalloc_internal_inlines_c.h    |   4 +-
 include/jemalloc/internal/tsd_inlines.h       |   8 +-
 include/jemalloc/internal/tsd_structs.h       |   2 +-
 src/arena.c                                   |   6 +-
 src/jemalloc.c                                | 127 ++++++++----------
 src/tsd.c                                     |   4 +-
 8 files changed, 90 insertions(+), 86 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index 822b4d75..600d7226 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -33,6 +33,8 @@ tcache_t *tcache_get(tsd_t *tsd);
 malloc_cpuid_t malloc_getcpu(void);
 unsigned percpu_arena_choose(void);
 unsigned percpu_arena_ind_limit(void);
+void pre_reentrancy(tsd_t *tsd);
+void post_reentrancy(tsd_t *tsd);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
@@ -445,6 +447,27 @@ tcache_get(tsd_t *tsd) {
 
 	return tsd_tcachep_get(tsd);
 }
+
+JEMALLOC_INLINE void
+pre_reentrancy(tsd_t *tsd) {
+	bool fast = tsd_fast(tsd);
+	++*tsd_reentrancy_levelp_get(tsd);
+	if (fast) {
+		/* Prepare slow path for reentrancy. */
+		tsd_slow_update(tsd);
+		assert(tsd->state == tsd_state_nominal_slow);
+	}
+}
+
+JEMALLOC_INLINE void
+post_reentrancy(tsd_t *tsd) {
+	int8_t *reentrancy_level = tsd_reentrancy_levelp_get(tsd);
+	assert(*reentrancy_level > 0);
+	if (--*reentrancy_level == 0) {
+		tsd_slow_update(tsd);
+	}
+}
+
 #endif
 
 #endif /* JEMALLOC_INTERNAL_INLINES_A_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index 52afb42d..e7d564ce 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -16,7 +16,7 @@ arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {
 	}
 
 	/* During reentrancy, arena 0 is the safest bet. */
-	if (*tsd_reentrancy_levelp_get(tsd) > 1) {
+	if (unlikely(tsd_reentrancy_level_get(tsd) > 0)) {
 		return arena_get(tsd_tsdn(tsd), 0, true);
 	}
 
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 7884a206..bb1f2deb 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -117,8 +117,8 @@ idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, alloc_ctx_t *alloc_ctx,
 	if (config_stats && is_internal) {
 		arena_internal_sub(iaalloc(tsdn, ptr), isalloc(tsdn, ptr));
 	}
-	if (!is_internal && *tsd_reentrancy_levelp_get(tsdn_tsd(tsdn)) != 0) {
-		tcache = NULL;
+	if (!is_internal && tsd_reentrancy_level_get(tsdn_tsd(tsdn)) != 0) {
+		assert(tcache == NULL);
 	}
 	arena_dalloc(tsdn, ptr, tcache, alloc_ctx, slow_path);
 }
diff --git a/include/jemalloc/internal/tsd_inlines.h b/include/jemalloc/internal/tsd_inlines.h
index 46eefb6e..93469bca 100644
--- a/include/jemalloc/internal/tsd_inlines.h
+++ b/include/jemalloc/internal/tsd_inlines.h
@@ -20,7 +20,7 @@ tsd_t *tsdn_tsd(tsdn_t *tsdn);
 rtree_ctx_t *tsd_rtree_ctx(tsd_t *tsd);
 rtree_ctx_t *tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback);
 bool tsd_fast(tsd_t *tsd);
-void tsd_assert_fast(tsd_t *tsd);
+bool tsd_assert_fast(tsd_t *tsd);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TSD_C_))
@@ -52,9 +52,11 @@ MALLOC_TSD
 #undef MALLOC_TSD_getset_no
 #undef O
 
-JEMALLOC_ALWAYS_INLINE void
+JEMALLOC_ALWAYS_INLINE bool
 tsd_assert_fast(tsd_t *tsd) {
-	assert(!malloc_slow && tsd_tcache_enabled_get(tsd));
+	assert(!malloc_slow && tsd_tcache_enabled_get(tsd) &&
+	    tsd_reentrancy_level_get(tsd) == 0);
+	return true;
 }
 
 JEMALLOC_ALWAYS_INLINE bool
diff --git a/include/jemalloc/internal/tsd_structs.h b/include/jemalloc/internal/tsd_structs.h
index c166fe6b..40fea97b 100644
--- a/include/jemalloc/internal/tsd_structs.h
+++ b/include/jemalloc/internal/tsd_structs.h
@@ -55,7 +55,7 @@ struct tsd_init_head_s {
 /*  O(name,			type,		[gs]et,	init,	cleanup) */ \
     O(tcache_enabled,		bool,		yes,	yes,	no)	\
     O(arenas_tdata_bypass,	bool,		no,	no,	no)	\
-    O(reentrancy_level,		int8_t,		no,	no,	no)	\
+    O(reentrancy_level,		int8_t,		yes,	no,	no)	\
     O(narenas_tdata,		uint32_t,	yes,	no,	no)	\
     O(thread_allocated,		uint64_t,	yes,	no,	no)	\
     O(thread_deallocated,	uint64_t,	yes,	no,	no)	\
diff --git a/src/arena.c b/src/arena.c
index 4f5dcf6e..5b540ce3 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1966,11 +1966,9 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		 * If we're here, then arena 0 already exists, so bootstrapping
 		 * is done enough that we should have tsd.
 		 */
-		int8_t *reentrancy_level = tsd_reentrancy_levelp_get(tsdn_tsd(
-		    tsdn));
-		++*reentrancy_level;
+		pre_reentrancy(tsdn_tsd(tsdn));
 		hooks_arena_new_hook();
-		--*reentrancy_level;
+		post_reentrancy(tsdn_tsd(tsdn));
 	}
 
 	return arena;
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 4bec2dea..4c38517b 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1663,13 +1663,8 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 	szind_t ind = 0;
 	size_t usize = 0;
 
-	/*
-	 * For reentrancy checking, we get the old reentrancy level from tsd and
-	 * reset it once we're done.  In case of early bailout though, we never
-	 * bother getting the old level, so we shouldn't try to reset it.  This
-	 * is indicated by leaving the pointer as NULL.
-	 */
-	int8_t *reentrancy_level = NULL;
+	/* Reentrancy is only checked on slow path. */
+	int8_t reentrancy_level;
 
 	/* Compute the amount of memory the user wants. */
 	if (unlikely(compute_size_with_overflow(sopts->may_overflow, dopts,
@@ -1716,12 +1711,11 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 	 * If we need to handle reentrancy, we can do it out of a
 	 * known-initialized arena (i.e. arena 0).
 	 */
-	reentrancy_level = tsd_reentrancy_levelp_get(tsd);
-	++*reentrancy_level;
-	if (*reentrancy_level == 1) {
+	reentrancy_level = tsd_reentrancy_level_get(tsd);
+	if (reentrancy_level == 0) {
 		witness_assert_lockless(tsd_tsdn(tsd));
 	}
-	if (unlikely(*reentrancy_level > 1)) {
+	if (sopts->slow && unlikely(reentrancy_level > 0)) {
 		/*
 		 * We should never specify particular arenas or tcaches from
 		 * within our internal allocations.
@@ -1795,14 +1789,9 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 	}
 
 	/* Success! */
-	if (*reentrancy_level == 1) {
+	if (reentrancy_level == 0) {
 		witness_assert_lockless(tsd_tsdn(tsd));
 	}
-	/*
-	 * If we got here, we never bailed out on a failure path, so
-	 * reentrancy_level is non-null.
-	 */
-	--*reentrancy_level;
 	*dopts->result = allocation;
 	return 0;
 
@@ -1826,10 +1815,6 @@ label_oom:
 		*dopts->result = NULL;
 	}
 
-	if (reentrancy_level != NULL) {
-		--*reentrancy_level;
-	}
-
 	return ENOMEM;
 
 	/*
@@ -1857,10 +1842,6 @@ label_invalid_alignment:
 		*dopts->result = NULL;
 	}
 
-	if (reentrancy_level != NULL) {
-		--*reentrancy_level;
-	}
-
 	return EINVAL;
 }
 
@@ -2053,8 +2034,11 @@ irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize,
 
 JEMALLOC_ALWAYS_INLINE_C void
 ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
-	if (*tsd_reentrancy_levelp_get(tsd) == 0) {
+	assert(slow_path || tsd_assert_fast(tsd));
+	if (tsd_reentrancy_level_get(tsd) == 0) {
 		witness_assert_lockless(tsd_tsdn(tsd));
+	} else {
+		assert(slow_path);
 	}
 
 	assert(ptr != NULL);
@@ -2088,8 +2072,11 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 
 JEMALLOC_ALWAYS_INLINE_C void
 isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
-	if (*tsd_reentrancy_levelp_get(tsd) == 0) {
+	assert(slow_path || tsd_assert_fast(tsd));
+	if (tsd_reentrancy_level_get(tsd) == 0) {
 		witness_assert_lockless(tsd_tsdn(tsd));
+	} else {
+		assert(slow_path);
 	}
 
 	assert(ptr != NULL);
@@ -2129,14 +2116,14 @@ je_realloc(void *ptr, size_t size) {
 
 	if (unlikely(size == 0)) {
 		if (ptr != NULL) {
-			tsd_t *tsd;
-
 			/* realloc(ptr, 0) is equivalent to free(ptr). */
 			UTRACE(ptr, 0, 0);
-			tsd = tsd_fetch();
-			tcache_t *tcache = NULL;
-			if (likely(*tsd_reentrancy_levelp_get(tsd) == 0)) {
+			tcache_t *tcache;
+			tsd_t *tsd = tsd_fetch();
+			if (tsd_reentrancy_level_get(tsd) == 0) {
 				tcache = tcache_get(tsd);
+			} else {
+				tcache = NULL;
 			}
 			ifree(tsd, ptr, tcache, true);
 			return NULL;
@@ -2200,29 +2187,25 @@ je_free(void *ptr) {
 	UTRACE(ptr, 0, 0);
 	if (likely(ptr != NULL)) {
 		tsd_t *tsd = tsd_fetch();
-		if (*tsd_reentrancy_levelp_get(tsd) == 0) {
+		if (tsd_reentrancy_level_get(tsd) == 0) {
 			witness_assert_lockless(tsd_tsdn(tsd));
 		}
+
 		tcache_t *tcache;
 		if (likely(tsd_fast(tsd))) {
 			tsd_assert_fast(tsd);
-			if (likely(*tsd_reentrancy_levelp_get(tsd) == 0)) {
-				/* Getting tcache ptr unconditionally. */
-				tcache = tsd_tcachep_get(tsd);
-				assert(tcache == tcache_get(tsd));
-			} else {
-				tcache = NULL;
-			}
+			/* Unconditionally get tcache ptr on fast path. */
+			tcache = tsd_tcachep_get(tsd);
 			ifree(tsd, ptr, tcache, false);
 		} else {
-			if (likely(*tsd_reentrancy_levelp_get(tsd) == 0)) {
+			if (likely(tsd_reentrancy_level_get(tsd) == 0)) {
 				tcache = tcache_get(tsd);
 			} else {
 				tcache = NULL;
 			}
 			ifree(tsd, ptr, tcache, true);
 		}
-		if (*tsd_reentrancy_levelp_get(tsd) == 0) {
+		if (tsd_reentrancy_level_get(tsd) == 0) {
 			witness_assert_lockless(tsd_tsdn(tsd));
 		}
 	}
@@ -2707,33 +2690,32 @@ je_sallocx(const void *ptr, int flags) {
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_dallocx(void *ptr, int flags) {
-	tsd_t *tsd;
-	tcache_t *tcache;
-
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
-	tsd = tsd_fetch();
+	tsd_t *tsd = tsd_fetch();
 	bool fast = tsd_fast(tsd);
 	witness_assert_lockless(tsd_tsdn(tsd));
+
+	tcache_t *tcache;
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
 		/* Not allowed to be reentrant and specify a custom tcache. */
-		assert(*tsd_reentrancy_levelp_get(tsd) == 0);
+		assert(tsd_reentrancy_level_get(tsd) == 0);
 		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) {
 			tcache = NULL;
 		} else {
 			tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags));
 		}
 	} else {
-		if (likely(*tsd_reentrancy_levelp_get(tsd) == 0)) {
-			if (likely(fast)) {
-				tcache = tsd_tcachep_get(tsd);
-				assert(tcache == tcache_get(tsd));
-			} else {
-				tcache = tcache_get(tsd);
-			}
+		if (likely(fast)) {
+			tcache = tsd_tcachep_get(tsd);
+			assert(tcache == tcache_get(tsd));
 		} else {
-			tcache = NULL;
+			if (likely(tsd_reentrancy_level_get(tsd) == 0)) {
+				tcache = tcache_get(tsd);
+			}  else {
+				tcache = NULL;
+			}
 		}
 	}
 
@@ -2749,10 +2731,9 @@ je_dallocx(void *ptr, int flags) {
 
 JEMALLOC_ALWAYS_INLINE_C size_t
 inallocx(tsdn_t *tsdn, size_t size, int flags) {
-	size_t usize;
-
 	witness_assert_lockless(tsdn);
 
+	size_t usize;
 	if (likely((flags & MALLOCX_LG_ALIGN_MASK) == 0)) {
 		usize = s2u(size);
 	} else {
@@ -2764,36 +2745,34 @@ inallocx(tsdn_t *tsdn, size_t size, int flags) {
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_sdallocx(void *ptr, size_t size, int flags) {
-	tsd_t *tsd;
-	size_t usize;
-	tcache_t *tcache;
-
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
-	tsd = tsd_fetch();
-	bool fast = tsd_fast(tsd);
-	usize = inallocx(tsd_tsdn(tsd), size, flags);
-	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
 
+	tsd_t *tsd = tsd_fetch();
+	bool fast = tsd_fast(tsd);
+	size_t usize = inallocx(tsd_tsdn(tsd), size, flags);
+	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
 	witness_assert_lockless(tsd_tsdn(tsd));
+
+	tcache_t *tcache;
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
 		/* Not allowed to be reentrant and specify a custom tcache. */
-		assert(*tsd_reentrancy_levelp_get(tsd) == 0);
+		assert(tsd_reentrancy_level_get(tsd) == 0);
 		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) {
 			tcache = NULL;
 		} else {
 			tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags));
 		}
 	} else {
-		if (likely(*tsd_reentrancy_levelp_get(tsd) == 0)) {
-			if (likely(fast)) {
-				tcache = tsd_tcachep_get(tsd);
-				assert(tcache == tcache_get(tsd));
-			} else {
-				tcache = tcache_get(tsd);
-			}
+		if (likely(fast)) {
+			tcache = tsd_tcachep_get(tsd);
+			assert(tcache == tcache_get(tsd));
 		} else {
-			tcache = NULL;
+			if (likely(tsd_reentrancy_level_get(tsd) == 0)) {
+				tcache = tcache_get(tsd);
+			} else {
+				tcache = NULL;
+			}
 		}
 	}
 
diff --git a/src/tsd.c b/src/tsd.c
index bdd3f3c5..cb7dd3fb 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -15,7 +15,8 @@ malloc_tsd_data(, , tsd_t, TSD_INITIALIZER)
 void
 tsd_slow_update(tsd_t *tsd) {
 	if (tsd_nominal(tsd)) {
-		if (malloc_slow || !tsd->tcache_enabled) {
+		if (malloc_slow || !tsd->tcache_enabled ||
+		    tsd_reentrancy_level_get(tsd) > 0) {
 			tsd->state = tsd_state_nominal_slow;
 		} else {
 			tsd->state = tsd_state_nominal;
@@ -28,6 +29,7 @@ tsd_fetch_slow(tsd_t *tsd) {
 	if (tsd->state == tsd_state_nominal_slow) {
 		/* On slow path but no work needed. */
 		assert(malloc_slow || !tsd_tcache_enabled_get(tsd) ||
+		    tsd_reentrancy_level_get(tsd) > 0 ||
 		    *tsd_arenas_tdata_bypassp_get(tsd));
 	} else if (tsd->state == tsd_state_uninitialized) {
 		tsd->state = tsd_state_nominal;

From d16f1e53df3836f95deeca73419bb8c541aa579f Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 14 Apr 2017 20:54:49 -0700
Subject: [PATCH 0795/2608] Skip percpu arena when choosing iarena.

---
 include/jemalloc/internal/jemalloc_internal_inlines_b.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index e7d564ce..f22708a5 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -47,7 +47,7 @@ arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {
 	 * managed arena), then percpu arena is skipped.
 	 */
 	if (have_percpu_arena && (percpu_arena_mode != percpu_arena_disabled) &&
-	    (arena_ind_get(ret) < percpu_arena_ind_limit()) &&
+	    !internal && (arena_ind_get(ret) < percpu_arena_ind_limit()) &&
 	    (ret->last_thd != tsd_tsdn(tsd))) {
 		unsigned ind = percpu_arena_choose();
 		if (arena_ind_get(ret) != ind) {

From 3c9c41edb28b02c5ec45cfea0f076276e985cf9e Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 14 Apr 2017 11:05:38 -0700
Subject: [PATCH 0796/2608] Improve rtree cache with a two-level cache design.

Two levels of rcache is implemented: a direct mapped cache as L1, combined with
a LRU cache as L2.  The L1 cache offers low cost on cache hit, but could suffer
collision under circumstances.  This is complemented by the L2 LRU cache, which
is slower on cache access (overhead from linear search + reordering), but solves
collison of L1 rather well.
---
 include/jemalloc/internal/rtree_inlines.h | 61 ++++++++++++++++-------
 include/jemalloc/internal/rtree_structs.h |  3 ++
 include/jemalloc/internal/rtree_types.h   | 30 ++++++-----
 src/prof.c                                |  1 +
 src/rtree.c                               | 37 +++++++++++---
 5 files changed, 97 insertions(+), 35 deletions(-)

diff --git a/include/jemalloc/internal/rtree_inlines.h b/include/jemalloc/internal/rtree_inlines.h
index ce03c578..6791f50c 100644
--- a/include/jemalloc/internal/rtree_inlines.h
+++ b/include/jemalloc/internal/rtree_inlines.h
@@ -64,6 +64,15 @@ rtree_leafkey(uintptr_t key) {
 	return (key & mask);
 }
 
+JEMALLOC_ALWAYS_INLINE size_t
+rtree_cache_direct_map(uintptr_t key) {
+	unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3);
+	unsigned cumbits = (rtree_levels[RTREE_HEIGHT-1].cumbits -
+	    rtree_levels[RTREE_HEIGHT-1].bits);
+	unsigned maskbits = ptrbits - cumbits;
+	return (size_t)((key >> maskbits) & (RTREE_CTX_NCACHE - 1));
+}
+
 JEMALLOC_ALWAYS_INLINE uintptr_t
 rtree_subkey(uintptr_t key, unsigned level) {
 	unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3);
@@ -320,36 +329,54 @@ rtree_leaf_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	assert(key != 0);
 	assert(!dependent || !init_missing);
 
+	size_t slot = rtree_cache_direct_map(key);
 	uintptr_t leafkey = rtree_leafkey(key);
 	assert(leafkey != RTREE_LEAFKEY_INVALID);
 
-#define RTREE_CACHE_CHECK(i) do {					\
-	if (likely(rtree_ctx->cache[i].leafkey == leafkey)) {		\
-		rtree_leaf_elm_t *leaf = rtree_ctx->cache[i].leaf;	\
+	/* Fast path: L1 direct mapped cache. */
+	if (likely(rtree_ctx->cache[slot].leafkey == leafkey)) {
+		rtree_leaf_elm_t *leaf = rtree_ctx->cache[slot].leaf;
+		assert(leaf != NULL);
+		uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1);
+		return &leaf[subkey];
+	}
+	/*
+	 * Search the L2 LRU cache.  On hit, swap the matching element into the
+	 * slot in L1 cache, and move the position in L2 up by 1.
+	 */
+#define RTREE_CACHE_CHECK_L2(i) do {					\
+	if (likely(rtree_ctx->l2_cache[i].leafkey == leafkey)) {	\
+		rtree_leaf_elm_t *leaf = rtree_ctx->l2_cache[i].leaf;	\
 		assert(leaf != NULL);					\
 		if (i > 0) {						\
 			/* Bubble up by one. */				\
-			rtree_ctx->cache[i].leafkey =			\
-				rtree_ctx->cache[i - 1].leafkey;	\
-			rtree_ctx->cache[i].leaf =			\
-				rtree_ctx->cache[i - 1].leaf;		\
-			rtree_ctx->cache[i - 1].leafkey = leafkey;	\
-			rtree_ctx->cache[i - 1].leaf = leaf;		\
+			rtree_ctx->l2_cache[i].leafkey =		\
+				rtree_ctx->l2_cache[i - 1].leafkey;	\
+			rtree_ctx->l2_cache[i].leaf =			\
+				rtree_ctx->l2_cache[i - 1].leaf;	\
+			rtree_ctx->l2_cache[i - 1].leafkey =		\
+			    rtree_ctx->cache[slot].leafkey;		\
+			rtree_ctx->l2_cache[i - 1].leaf =		\
+			    rtree_ctx->cache[slot].leaf;		\
+		} else {						\
+			rtree_ctx->l2_cache[0].leafkey =		\
+			    rtree_ctx->cache[slot].leafkey;		\
+			rtree_ctx->l2_cache[0].leaf =			\
+			    rtree_ctx->cache[slot].leaf;		\
 		}							\
+		rtree_ctx->cache[slot].leafkey = leafkey;		\
+		rtree_ctx->cache[slot].leaf = leaf;			\
 		uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1);	\
 		return &leaf[subkey];					\
 	}								\
 } while (0)
 	/* Check the first cache entry. */
-	RTREE_CACHE_CHECK(0);
-	/*
-	 * Search the remaining cache elements, and on success move the matching
-	 * element up by one slot.
-	 */
-	for (unsigned i = 1; i < RTREE_CTX_NCACHE; i++) {
-		RTREE_CACHE_CHECK(i);
+	RTREE_CACHE_CHECK_L2(0);
+	/* Search the remaining cache elements. */
+	for (unsigned i = 1; i < RTREE_CTX_NCACHE_L2; i++) {
+		RTREE_CACHE_CHECK_L2(i);
 	}
-#undef RTREE_CACHE_CHECK
+#undef RTREE_CACHE_CHECK_L2
 
 	return rtree_leaf_elm_lookup_hard(tsdn, rtree, rtree_ctx, key,
 	    dependent, init_missing);
diff --git a/include/jemalloc/internal/rtree_structs.h b/include/jemalloc/internal/rtree_structs.h
index 175a013c..7ff92e61 100644
--- a/include/jemalloc/internal/rtree_structs.h
+++ b/include/jemalloc/internal/rtree_structs.h
@@ -55,7 +55,10 @@ struct rtree_ctx_cache_elm_s {
 };
 
 struct rtree_ctx_s {
+	/* Direct mapped cache. */
 	rtree_ctx_cache_elm_t	cache[RTREE_CTX_NCACHE];
+	/* L2 LRU cache. */
+	rtree_ctx_cache_elm_t	l2_cache[RTREE_CTX_NCACHE_L2];
 };
 
 struct rtree_s {
diff --git a/include/jemalloc/internal/rtree_types.h b/include/jemalloc/internal/rtree_types.h
index e480542d..d9a4cf4d 100644
--- a/include/jemalloc/internal/rtree_types.h
+++ b/include/jemalloc/internal/rtree_types.h
@@ -42,19 +42,25 @@ typedef struct rtree_s rtree_t;
 #define RTREE_LEAFKEY_INVALID ((uintptr_t)1)
 
 /*
- * Number of leafkey/leaf pairs to cache.  Each entry supports an entire leaf,
- * so the cache hit rate is typically high even with a small number of entries.
- * In rare cases extent activity will straddle the boundary between two leaf
- * nodes.  Furthermore, an arena may use a combination of dss and mmap.  Four
- * entries covers both of these considerations as long as locality of reference
- * is high, and/or total memory usage doesn't exceed the range supported by
- * those entries.  Note that as memory usage grows past the amount that this
- * cache can directly cover, the cache will become less effective if locality of
- * reference is low, but the consequence is merely cache misses while traversing
- * the tree nodes, and the cache will itself suffer cache misses if made overly
- * large, not to mention the cost of linear search.
+ * Number of leafkey/leaf pairs to cache in L1 and L2 level respectively.  Each
+ * entry supports an entire leaf, so the cache hit rate is typically high even
+ * with a small number of entries.  In rare cases extent activity will straddle
+ * the boundary between two leaf nodes.  Furthermore, an arena may use a
+ * combination of dss and mmap.  Note that as memory usage grows past the amount
+ * that this cache can directly cover, the cache will become less effective if
+ * locality of reference is low, but the consequence is merely cache misses
+ * while traversing the tree nodes.
+ *
+ * The L1 direct mapped cache offers consistent and low cost on cache hit.
+ * However collision could affect hit rate negatively.  This is resolved by
+ * combining with a L2 LRU cache, which requires linear search and re-ordering
+ * on access but suffers no collision.  Note that, the cache will itself suffer
+ * cache misses if made overly large, plus the cost of linear search in the LRU
+ * cache.
  */
-#define RTREE_CTX_NCACHE 8
+#define RTREE_CTX_LG_NCACHE 4
+#define RTREE_CTX_NCACHE (1 << RTREE_CTX_LG_NCACHE)
+#define RTREE_CTX_NCACHE_L2 8
 
 /*
  * Zero initializer required for tsd initialization only.  Proper initialization
diff --git a/src/prof.c b/src/prof.c
index 334466b1..b33e9397 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -310,6 +310,7 @@ prof_leave(tsd_t *tsd, prof_tdata_t *tdata) {
 }
 
 #ifdef JEMALLOC_PROF_LIBUNWIND
+JEMALLOC_ALIGNED(CACHELINE)
 void
 prof_backtrace(prof_bt_t *bt) {
 	int nframes;
diff --git a/src/rtree.c b/src/rtree.c
index 051428f1..8d11d99f 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -256,6 +256,16 @@ rtree_leaf_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	leaf = rtree->root;
 #endif
 
+	if (config_debug) {
+		uintptr_t leafkey = rtree_leafkey(key);
+		for (unsigned i = 0; i < RTREE_CTX_NCACHE; i++) {
+			assert(rtree_ctx->cache[i].leafkey != leafkey);
+		}
+		for (unsigned i = 0; i < RTREE_CTX_NCACHE_L2; i++) {
+			assert(rtree_ctx->l2_cache[i].leafkey != leafkey);
+		}
+	}
+
 #define RTREE_GET_CHILD(level) {					\
 		assert(level < RTREE_HEIGHT-1);				\
 		if (level != 0 && !dependent &&				\
@@ -277,20 +287,30 @@ rtree_leaf_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 			    dependent);					\
 		}							\
 	}
+	/*
+	 * Cache replacement upon hard lookup (i.e. L1 & L2 rtree cache miss):
+	 * (1) evict last entry in L2 cache; (2) move the collision slot from L1
+	 * cache down to L2; and 3) fill L1.
+	 */
 #define RTREE_GET_LEAF(level) {						\
 		assert(level == RTREE_HEIGHT-1);			\
 		if (!dependent && unlikely(!rtree_leaf_valid(leaf))) {	\
 			return NULL;					\
 		}							\
-		if (RTREE_CTX_NCACHE > 1) {				\
-			memmove(&rtree_ctx->cache[1],			\
-			    &rtree_ctx->cache[0],			\
+		if (RTREE_CTX_NCACHE_L2 > 1) {				\
+			memmove(&rtree_ctx->l2_cache[1],		\
+			    &rtree_ctx->l2_cache[0],			\
 			    sizeof(rtree_ctx_cache_elm_t) *		\
-			    (RTREE_CTX_NCACHE-1));			\
+			    (RTREE_CTX_NCACHE_L2 - 1));			\
 		}							\
+		size_t slot = rtree_cache_direct_map(key);		\
+		rtree_ctx->l2_cache[0].leafkey =			\
+		    rtree_ctx->cache[slot].leafkey;			\
+		rtree_ctx->l2_cache[0].leaf =				\
+		    rtree_ctx->cache[slot].leaf;			\
 		uintptr_t leafkey = rtree_leafkey(key);			\
-		rtree_ctx->cache[0].leafkey = leafkey;			\
-		rtree_ctx->cache[0].leaf = leaf;			\
+		rtree_ctx->cache[slot].leafkey = leafkey;		\
+		rtree_ctx->cache[slot].leaf = leaf;			\
 		uintptr_t subkey = rtree_subkey(key, level);		\
 		return &leaf[subkey];					\
 	}
@@ -433,6 +453,11 @@ rtree_ctx_data_init(rtree_ctx_t *ctx) {
 		cache->leafkey = RTREE_LEAFKEY_INVALID;
 		cache->leaf = NULL;
 	}
+	for (unsigned i = 0; i < RTREE_CTX_NCACHE_L2; i++) {
+		rtree_ctx_cache_elm_t *cache = &ctx->l2_cache[i];
+		cache->leafkey = RTREE_LEAFKEY_INVALID;
+		cache->leaf = NULL;
+	}
 }
 
 bool

From 675701660cede59972707f60af32117023f91728 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 16 Apr 2017 17:40:16 -0700
Subject: [PATCH 0797/2608] Update base_unmap() to match
 extent_dalloc_wrapper().

Reverse the order of forced versus lazy purging attempts in
base_unmap(), in order to match the order in extent_dalloc_wrapper(),
which was reversed by 64e458f5cdd64f9b67cb495f177ef96bf3ce4e0e
(Implement two-phase decay-based purging.).
---
 src/base.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/base.c b/src/base.c
index eb68a175..00c6c6a2 100644
--- a/src/base.c
+++ b/src/base.c
@@ -31,14 +31,14 @@ static void
 base_unmap(extent_hooks_t *extent_hooks, unsigned ind, void *addr,
     size_t size) {
 	/*
-	 * Cascade through dalloc, decommit, purge_lazy, and purge_forced,
+	 * Cascade through dalloc, decommit, purge_forced, and purge_lazy,
 	 * stopping at first success.  This cascade is performed for consistency
 	 * with the cascade in extent_dalloc_wrapper() because an application's
 	 * custom hooks may not support e.g. dalloc.  This function is only ever
 	 * called as a side effect of arena destruction, so although it might
 	 * seem pointless to do anything besides dalloc here, the application
-	 * may in fact want the end state of all associated virtual memory to in
-	 * some consistent-but-allocated state.
+	 * may in fact want the end state of all associated virtual memory to be
+	 * in some consistent-but-allocated state.
 	 */
 	if (extent_hooks == &extent_hooks_default) {
 		if (!extent_dalloc_mmap(addr, size)) {
@@ -47,10 +47,10 @@ base_unmap(extent_hooks_t *extent_hooks, unsigned ind, void *addr,
 		if (!pages_decommit(addr, size)) {
 			return;
 		}
-		if (!pages_purge_lazy(addr, size)) {
+		if (!pages_purge_forced(addr, size)) {
 			return;
 		}
-		if (!pages_purge_forced(addr, size)) {
+		if (!pages_purge_lazy(addr, size)) {
 			return;
 		}
 		/* Nothing worked.  This should never happen. */
@@ -66,16 +66,16 @@ base_unmap(extent_hooks_t *extent_hooks, unsigned ind, void *addr,
 		    ind)) {
 			return;
 		}
-		if (extent_hooks->purge_lazy != NULL &&
-		    !extent_hooks->purge_lazy(extent_hooks, addr, size, 0, size,
-		    ind)) {
-			return;
-		}
 		if (extent_hooks->purge_forced != NULL &&
 		    !extent_hooks->purge_forced(extent_hooks, addr, size, 0,
 		    size, ind)) {
 			return;
 		}
+		if (extent_hooks->purge_lazy != NULL &&
+		    !extent_hooks->purge_lazy(extent_hooks, addr, size, 0, size,
+		    ind)) {
+			return;
+		}
 		/* Nothing worked.  That's the application's problem. */
 	}
 }

From 69aa5528091db805accc32af8d350f32b91bfd1a Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 16 Apr 2017 18:39:14 -0700
Subject: [PATCH 0798/2608] Allocate increasingly large base blocks.

Limit the total number of base block by leveraging the exponential
size class sequence, similarly to extent_grow_retained().
---
 include/jemalloc/internal/base_structs.h |  9 +++-
 src/base.c                               | 62 ++++++++++++++----------
 2 files changed, 44 insertions(+), 27 deletions(-)

diff --git a/include/jemalloc/internal/base_structs.h b/include/jemalloc/internal/base_structs.h
index 13d5bd46..d79f38ee 100644
--- a/include/jemalloc/internal/base_structs.h
+++ b/include/jemalloc/internal/base_structs.h
@@ -21,11 +21,18 @@ struct base_s {
 	 * User-configurable extent hook functions.  Points to an
 	 * extent_hooks_t.
 	 */
-	atomic_p_t extent_hooks;
+	atomic_p_t	extent_hooks;
 
 	/* Protects base_alloc() and base_stats_get() operations. */
 	malloc_mutex_t	mtx;
 
+	/*
+	 * Most recent size class in the series of increasingly large base
+	 * extents.  Logarithmic spacing between subsequent allocations ensures
+	 * that the total number of distinct mappings remains small.
+	 */
+	pszind_t	pind_last;
+
 	/* Serial number generation state. */
 	size_t		extent_sn_next;
 
diff --git a/src/base.c b/src/base.c
index 00c6c6a2..515d3361 100644
--- a/src/base.c
+++ b/src/base.c
@@ -155,19 +155,33 @@ base_extent_bump_alloc(tsdn_t *tsdn, base_t *base, extent_t *extent,
  */
 static base_block_t *
 base_block_alloc(extent_hooks_t *extent_hooks, unsigned ind,
-    size_t *extent_sn_next, size_t size, size_t alignment) {
-	base_block_t *block;
-	size_t usize, header_size, gap_size, block_size;
-
+    pszind_t *pind_last, size_t *extent_sn_next, size_t size,
+    size_t alignment) {
 	alignment = ALIGNMENT_CEILING(alignment, QUANTUM);
-	usize = ALIGNMENT_CEILING(size, alignment);
-	header_size = sizeof(base_block_t);
-	gap_size = ALIGNMENT_CEILING(header_size, alignment) - header_size;
-	block_size = HUGEPAGE_CEILING(header_size + gap_size + usize);
-	block = (base_block_t *)base_map(extent_hooks, ind, block_size);
+	size_t usize = ALIGNMENT_CEILING(size, alignment);
+	size_t header_size = sizeof(base_block_t);
+	size_t gap_size = ALIGNMENT_CEILING(header_size, alignment) -
+	    header_size;
+	/*
+	 * Create increasingly larger blocks in order to limit the total number
+	 * of disjoint virtual memory ranges.  Choose the next size in the page
+	 * size class series (skipping size classes that are not a multiple of
+	 * HUGEPAGE), or a size large enough to satisfy the requested size and
+	 * alignment, whichever is larger.
+	 */
+	size_t min_block_size = HUGEPAGE_CEILING(psz2u(header_size + gap_size +
+	    usize));
+	pszind_t pind_next = (*pind_last + 1 < NPSIZES) ? *pind_last + 1 :
+	    *pind_last;
+	size_t next_block_size = HUGEPAGE_CEILING(pind2sz(pind_next));
+	size_t block_size = (min_block_size > next_block_size) ? min_block_size
+	    : next_block_size;
+	base_block_t *block = (base_block_t *)base_map(extent_hooks, ind,
+	    block_size);
 	if (block == NULL) {
 		return NULL;
 	}
+	*pind_last = psz2ind(block_size);
 	block->size = block_size;
 	block->next = NULL;
 	assert(block_size >= header_size);
@@ -182,13 +196,11 @@ base_block_alloc(extent_hooks_t *extent_hooks, unsigned ind,
  */
 static extent_t *
 base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
-	extent_hooks_t *extent_hooks = base_extent_hooks_get(base);
-	base_block_t *block;
-
 	malloc_mutex_assert_owner(tsdn, &base->mtx);
 
-	block = base_block_alloc(extent_hooks, base_ind_get(base),
-	    &base->extent_sn_next, size, alignment);
+	extent_hooks_t *extent_hooks = base_extent_hooks_get(base);
+	base_block_t *block = base_block_alloc(extent_hooks, base_ind_get(base),
+	    &base->pind_last, &base->extent_sn_next, size, alignment);
 	if (block == NULL) {
 		return NULL;
 	}
@@ -211,21 +223,18 @@ b0get(void) {
 
 base_t *
 base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
-	base_t *base;
-	size_t extent_sn_next, base_alignment, base_size, gap_size;
-	base_block_t *block;
-	szind_t i;
-
-	extent_sn_next = 0;
-	block = base_block_alloc(extent_hooks, ind, &extent_sn_next,
-	    sizeof(base_t), QUANTUM);
+	pszind_t pind_last = 0;
+	size_t extent_sn_next = 0;
+	base_block_t *block = base_block_alloc(extent_hooks, ind, &pind_last,
+	    &extent_sn_next, sizeof(base_t), QUANTUM);
 	if (block == NULL) {
 		return NULL;
 	}
 
-	base_alignment = CACHELINE;
-	base_size = ALIGNMENT_CEILING(sizeof(base_t), base_alignment);
-	base = (base_t *)base_extent_bump_alloc_helper(&block->extent,
+	size_t gap_size;
+	size_t base_alignment = CACHELINE;
+	size_t base_size = ALIGNMENT_CEILING(sizeof(base_t), base_alignment);
+	base_t *base = (base_t *)base_extent_bump_alloc_helper(&block->extent,
 	    &gap_size, base_size, base_alignment);
 	base->ind = ind;
 	atomic_store_p(&base->extent_hooks, extent_hooks, ATOMIC_RELAXED);
@@ -233,9 +242,10 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		base_unmap(extent_hooks, ind, block, block->size);
 		return NULL;
 	}
+	base->pind_last = pind_last;
 	base->extent_sn_next = extent_sn_next;
 	base->blocks = block;
-	for (i = 0; i < NSIZES; i++) {
+	for (szind_t i = 0; i < NSIZES; i++) {
 		extent_heap_new(&base->avail[i]);
 	}
 	if (config_stats) {

From 76b35f4b2fdcc6eeb0ee7ecfbeaa05ef3fa2753e Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 16 Apr 2017 21:51:26 -0700
Subject: [PATCH 0799/2608] Track extent structure serial number (esn) in
 extent_t.

This enables stable sorting of extent_t structures.
---
 include/jemalloc/internal/base_externs.h      | 23 +++---
 include/jemalloc/internal/extent_inlines.h    | 44 +++++++++++-
 include/jemalloc/internal/extent_structs.h    | 18 ++++-
 include/jemalloc/internal/private_symbols.txt |  6 ++
 src/base.c                                    | 71 +++++++++++--------
 src/extent.c                                  |  3 +-
 test/unit/base.c                              |  4 +-
 7 files changed, 121 insertions(+), 48 deletions(-)

diff --git a/include/jemalloc/internal/base_externs.h b/include/jemalloc/internal/base_externs.h
index 2c555cff..0a1114f4 100644
--- a/include/jemalloc/internal/base_externs.h
+++ b/include/jemalloc/internal/base_externs.h
@@ -1,18 +1,19 @@
 #ifndef JEMALLOC_INTERNAL_BASE_EXTERNS_H
 #define JEMALLOC_INTERNAL_BASE_EXTERNS_H
 
-base_t	*b0get(void);
-base_t	*base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
-void	base_delete(base_t *base);
-extent_hooks_t	*base_extent_hooks_get(base_t *base);
-extent_hooks_t	*base_extent_hooks_set(base_t *base,
+base_t *b0get(void);
+base_t *base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
+void base_delete(base_t *base);
+extent_hooks_t *base_extent_hooks_get(base_t *base);
+extent_hooks_t *base_extent_hooks_set(base_t *base,
     extent_hooks_t *extent_hooks);
-void	*base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment);
-void	base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated,
+void *base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment);
+extent_t *base_alloc_extent(tsdn_t *tsdn, base_t *base);
+void base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated,
     size_t *resident, size_t *mapped);
-void	base_prefork(tsdn_t *tsdn, base_t *base);
-void	base_postfork_parent(tsdn_t *tsdn, base_t *base);
-void	base_postfork_child(tsdn_t *tsdn, base_t *base);
-bool	base_boot(tsdn_t *tsdn);
+void base_prefork(tsdn_t *tsdn, base_t *base);
+void base_postfork_parent(tsdn_t *tsdn, base_t *base);
+void base_postfork_child(tsdn_t *tsdn, base_t *base);
+bool base_boot(tsdn_t *tsdn);
 
 #endif /* JEMALLOC_INTERNAL_BASE_EXTERNS_H */
diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index 6fc01017..e1c5cea1 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -17,6 +17,8 @@ unsigned extent_nfree_get(const extent_t *extent);
 void *extent_base_get(const extent_t *extent);
 void *extent_addr_get(const extent_t *extent);
 size_t extent_size_get(const extent_t *extent);
+size_t extent_esn_get(const extent_t *extent);
+size_t extent_bsize_get(const extent_t *extent);
 void *extent_before_get(const extent_t *extent);
 void *extent_last_get(const extent_t *extent);
 void *extent_past_get(const extent_t *extent);
@@ -27,6 +29,8 @@ void extent_arena_set(extent_t *extent, arena_t *arena);
 void extent_addr_set(extent_t *extent, void *addr);
 void extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment);
 void extent_size_set(extent_t *extent, size_t size);
+void extent_esn_set(extent_t *extent, size_t esn);
+void extent_bsize_set(extent_t *extent, size_t bsize);
 void extent_szind_set(extent_t *extent, szind_t szind);
 void extent_nfree_set(extent_t *extent, unsigned nfree);
 void extent_nfree_inc(extent_t *extent);
@@ -40,6 +44,7 @@ void extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx);
 void extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
     bool slab, szind_t szind, size_t sn, extent_state_t state, bool zeroed,
     bool committed);
+void extent_binit(extent_t *extent, void *addr, size_t size, size_t sn);
 void extent_list_init(extent_list_t *list);
 extent_t *extent_list_first(const extent_list_t *list);
 extent_t *extent_list_last(const extent_list_t *list);
@@ -141,7 +146,17 @@ extent_addr_get(const extent_t *extent) {
 
 JEMALLOC_INLINE size_t
 extent_size_get(const extent_t *extent) {
-	return extent->e_size;
+	return (extent->e_size_esn & EXTENT_SIZE_MASK);
+}
+
+JEMALLOC_INLINE size_t
+extent_esn_get(const extent_t *extent) {
+	return (extent->e_size_esn & EXTENT_ESN_MASK);
+}
+
+JEMALLOC_INLINE size_t
+extent_bsize_get(const extent_t *extent) {
+	return extent->e_bsize;
 }
 
 JEMALLOC_INLINE void *
@@ -213,7 +228,19 @@ extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment) {
 
 JEMALLOC_INLINE void
 extent_size_set(extent_t *extent, size_t size) {
-	extent->e_size = size;
+	assert((size & ~EXTENT_SIZE_MASK) == 0);
+	extent->e_size_esn = size | (extent->e_size_esn & ~EXTENT_SIZE_MASK);
+}
+
+JEMALLOC_INLINE void
+extent_esn_set(extent_t *extent, size_t esn) {
+	extent->e_size_esn = (extent->e_size_esn & ~EXTENT_ESN_MASK) | (esn &
+	    EXTENT_ESN_MASK);
+}
+
+JEMALLOC_INLINE void
+extent_bsize_set(extent_t *extent, size_t bsize) {
+	extent->e_bsize = bsize;
 }
 
 JEMALLOC_INLINE void
@@ -298,6 +325,19 @@ extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
 	}
 }
 
+JEMALLOC_INLINE void
+extent_binit(extent_t *extent, void *addr, size_t bsize, size_t sn) {
+	extent_arena_set(extent, NULL);
+	extent_addr_set(extent, addr);
+	extent_bsize_set(extent, bsize);
+	extent_slab_set(extent, false);
+	extent_szind_set(extent, NSIZES);
+	extent_sn_set(extent, sn);
+	extent_state_set(extent, extent_state_active);
+	extent_zeroed_set(extent, true);
+	extent_committed_set(extent, true);
+}
+
 JEMALLOC_INLINE void
 extent_list_init(extent_list_t *list) {
 	ql_new(list);
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index 3d3d418b..38c3c8a1 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -23,8 +23,8 @@ struct extent_s {
 	 * z: zeroed
 	 * t: state
 	 * i: szind
-	 * n: sn
 	 * f: nfree
+	 * n: sn
 	 *
 	 * nnnnnnnn ... nnnnnfff fffffffi iiiiiiit tzcbaaaa aaaaaaaa
 	 *
@@ -102,8 +102,20 @@ struct extent_s {
 	/* Pointer to the extent that this structure is responsible for. */
 	void			*e_addr;
 
-	/* Extent size. */
-	size_t			e_size;
+	union {
+		/*
+		 * Extent size and serial number associated with the extent
+		 * structure (different than the serial number for the extent at
+		 * e_addr).
+		 *
+		 * ssssssss [...] ssssssss ssssnnnn nnnnnnnn
+		 */
+		size_t			e_size_esn;
+	#define EXTENT_SIZE_MASK	((size_t)~(PAGE-1))
+	#define EXTENT_ESN_MASK		((size_t)PAGE-1)
+		/* Base extent size, which may not be a multiple of PAGE. */
+		size_t			e_bsize;
+	};
 
 	/*
 	 * List linkage, used by a variety of lists:
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index c1573aa6..dd35d50f 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -86,6 +86,7 @@ b0get
 base_alloc
 base_boot
 base_delete
+base_extent_alloc
 base_extent_hooks_get
 base_extent_hooks_set
 base_ind_get
@@ -143,6 +144,9 @@ extent_arena_set
 extent_base_get
 extent_before_get
 extent_boot
+extent_binit
+extent_bsize_get
+extent_bsize_set
 extent_commit_wrapper
 extent_committed_get
 extent_committed_set
@@ -156,6 +160,8 @@ extent_dss_boot
 extent_dss_mergeable
 extent_dss_prec_get
 extent_dss_prec_set
+extent_esn_get
+extent_esn_set
 extent_heap_empty
 extent_heap_first
 extent_heap_insert
diff --git a/src/base.c b/src/base.c
index 515d3361..caec9557 100644
--- a/src/base.c
+++ b/src/base.c
@@ -88,8 +88,7 @@ base_extent_init(size_t *extent_sn_next, extent_t *extent, void *addr,
 	sn = *extent_sn_next;
 	(*extent_sn_next)++;
 
-	extent_init(extent, NULL, addr, size, false, NSIZES, sn,
-	    extent_state_active, true, true);
+	extent_binit(extent, addr, size, sn);
 }
 
 static void *
@@ -103,23 +102,22 @@ base_extent_bump_alloc_helper(extent_t *extent, size_t *gap_size, size_t size,
 	*gap_size = ALIGNMENT_CEILING((uintptr_t)extent_addr_get(extent),
 	    alignment) - (uintptr_t)extent_addr_get(extent);
 	ret = (void *)((uintptr_t)extent_addr_get(extent) + *gap_size);
-	assert(extent_size_get(extent) >= *gap_size + size);
-	extent_init(extent, NULL, (void *)((uintptr_t)extent_addr_get(extent) +
-	    *gap_size + size), extent_size_get(extent) - *gap_size - size,
-	    false, NSIZES, extent_sn_get(extent), extent_state_active, true,
-	    true);
+	assert(extent_bsize_get(extent) >= *gap_size + size);
+	extent_binit(extent, (void *)((uintptr_t)extent_addr_get(extent) +
+	    *gap_size + size), extent_bsize_get(extent) - *gap_size - size,
+	    extent_sn_get(extent));
 	return ret;
 }
 
 static void
 base_extent_bump_alloc_post(tsdn_t *tsdn, base_t *base, extent_t *extent,
     size_t gap_size, void *addr, size_t size) {
-	if (extent_size_get(extent) > 0) {
+	if (extent_bsize_get(extent) > 0) {
 		/*
 		 * Compute the index for the largest size class that does not
 		 * exceed extent's size.
 		 */
-		szind_t index_floor = size2index(extent_size_get(extent) + 1) -
+		szind_t index_floor = size2index(extent_bsize_get(extent) + 1) -
 		    1;
 		extent_heap_insert(&base->avail[index_floor], extent);
 	}
@@ -286,28 +284,16 @@ base_extent_hooks_set(base_t *base, extent_hooks_t *extent_hooks) {
 	return old_extent_hooks;
 }
 
-/*
- * base_alloc() returns zeroed memory, which is always demand-zeroed for the
- * auto arenas, in order to make multi-page sparse data structures such as radix
- * tree nodes efficient with respect to physical memory usage.  Upon success a
- * pointer to at least size bytes with specified alignment is returned.  Note
- * that size is rounded up to the nearest multiple of alignment to avoid false
- * sharing.
- */
-void *
-base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
-	void *ret;
-	size_t usize, asize;
-	szind_t i;
-	extent_t *extent;
-
+static void *
+base_alloc_impl(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment,
+    size_t *esn) {
 	alignment = QUANTUM_CEILING(alignment);
-	usize = ALIGNMENT_CEILING(size, alignment);
-	asize = usize + alignment - QUANTUM;
+	size_t usize = ALIGNMENT_CEILING(size, alignment);
+	size_t asize = usize + alignment - QUANTUM;
 
-	extent = NULL;
+	extent_t *extent = NULL;
 	malloc_mutex_lock(tsdn, &base->mtx);
-	for (i = size2index(asize); i < NSIZES; i++) {
+	for (szind_t i = size2index(asize); i < NSIZES; i++) {
 		extent = extent_heap_remove_first(&base->avail[i]);
 		if (extent != NULL) {
 			/* Use existing space. */
@@ -318,17 +304,46 @@ base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
 		/* Try to allocate more space. */
 		extent = base_extent_alloc(tsdn, base, usize, alignment);
 	}
+	void *ret;
 	if (extent == NULL) {
 		ret = NULL;
 		goto label_return;
 	}
 
 	ret = base_extent_bump_alloc(tsdn, base, extent, usize, alignment);
+	if (esn != NULL) {
+		*esn = extent_sn_get(extent);
+	}
 label_return:
 	malloc_mutex_unlock(tsdn, &base->mtx);
 	return ret;
 }
 
+/*
+ * base_alloc() returns zeroed memory, which is always demand-zeroed for the
+ * auto arenas, in order to make multi-page sparse data structures such as radix
+ * tree nodes efficient with respect to physical memory usage.  Upon success a
+ * pointer to at least size bytes with specified alignment is returned.  Note
+ * that size is rounded up to the nearest multiple of alignment to avoid false
+ * sharing.
+ */
+void *
+base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
+	return base_alloc_impl(tsdn, base, size, alignment, NULL);
+}
+
+extent_t *
+base_alloc_extent(tsdn_t *tsdn, base_t *base) {
+	size_t esn;
+	extent_t *extent = base_alloc_impl(tsdn, base, sizeof(extent_t),
+	    CACHELINE, &esn);
+	if (extent == NULL) {
+		return NULL;
+	}
+	extent_esn_set(extent, esn);
+	return extent;
+}
+
 void
 base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, size_t *resident,
     size_t *mapped) {
diff --git a/src/extent.c b/src/extent.c
index 2344e9cd..c999ae64 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -98,8 +98,7 @@ extent_alloc(tsdn_t *tsdn, arena_t *arena) {
 	extent = extent_list_last(&arena->extent_freelist);
 	if (extent == NULL) {
 		malloc_mutex_unlock(tsdn, &arena->extent_freelist_mtx);
-		return base_alloc(tsdn, arena->base, sizeof(extent_t),
-		    CACHELINE);
+		return base_alloc_extent(tsdn, arena->base);
 	}
 	extent_list_remove(&arena->extent_freelist, extent);
 	malloc_mutex_unlock(tsdn, &arena->extent_freelist_mtx);
diff --git a/test/unit/base.c b/test/unit/base.c
index 87116a3c..f498394e 100644
--- a/test/unit/base.c
+++ b/test/unit/base.c
@@ -154,10 +154,10 @@ TEST_BEGIN(test_base_hooks_not_null) {
 	 * that the first block's remaining space is considered for subsequent
 	 * allocation.
 	 */
-	assert_zu_ge(extent_size_get(&base->blocks->extent), QUANTUM,
+	assert_zu_ge(extent_bsize_get(&base->blocks->extent), QUANTUM,
 	    "Remainder insufficient for test");
 	/* Use up all but one quantum of block. */
-	while (extent_size_get(&base->blocks->extent) > QUANTUM) {
+	while (extent_bsize_get(&base->blocks->extent) > QUANTUM) {
 		p = base_alloc(tsdn, base, QUANTUM, QUANTUM);
 		assert_ptr_not_null(p, "Unexpected base_alloc() failure");
 	}

From 881fbf762f18c8a94e71e94fb78f03d59bd4ad58 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 16 Apr 2017 22:31:16 -0700
Subject: [PATCH 0800/2608] Prefer old/low extent_t structures during reuse.

Rather than using a LIFO queue to track available extent_t structures,
use a red-black tree, and always choose the oldest/lowest available
during reuse.
---
 doc/jemalloc.xml.in                           |  8 ++---
 include/jemalloc/internal/arena_structs_b.h   |  9 +++---
 include/jemalloc/internal/ctl_types.h         |  2 +-
 include/jemalloc/internal/extent_externs.h    |  2 ++
 include/jemalloc/internal/extent_inlines.h    | 31 +++++++++++++++++++
 include/jemalloc/internal/extent_structs.h    | 25 +++++++++------
 include/jemalloc/internal/private_symbols.txt |  3 ++
 src/arena.c                                   | 14 ++++-----
 src/ctl.c                                     |  2 +-
 src/extent.c                                  | 21 +++++++------
 10 files changed, 80 insertions(+), 37 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 91127a03..3b98395d 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -2691,14 +2691,14 @@ struct extent_hooks_s {
         counters</link>.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="stats.arenas.i.mutexes.extent_freelist">
+      <varlistentry id="stats.arenas.i.mutexes.extent_avail">
         <term>
-          <mallctl>stats.arenas.&lt;i&gt;.mutexes.extent_freelist.{counter}</mallctl>
+          <mallctl>stats.arenas.&lt;i&gt;.mutexes.extent_avail.{counter}</mallctl>
           (<type>counter specific type</type>) <literal>r-</literal>
           [<option>--enable-stats</option>]
         </term>
-        <listitem><para>Statistics on <varname>arena.&lt;i&gt;.extent_freelist
-        </varname> mutex (arena scope; extent freelist related).
+        <listitem><para>Statistics on <varname>arena.&lt;i&gt;.extent_avail
+        </varname> mutex (arena scope; extent avail related).
         <mallctl>{counter}</mallctl> is one of the counters in <link
         linkend="mutex_counters">mutex profiling
         counters</link>.</para></listitem>
diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index 1370b535..14c473c5 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -233,12 +233,13 @@ struct arena_s {
 	atomic_u_t		extent_grow_next;
 
 	/*
-	 * Freelist of extent structures that were allocated via base_alloc().
+	 * Available extent structures that were allocated via
+	 * base_alloc_extent().
 	 *
-	 * Synchronization: extent_freelist_mtx.
+	 * Synchronization: extent_avail_mtx.
 	 */
-	extent_list_t		extent_freelist;
-	malloc_mutex_t		extent_freelist_mtx;
+	extent_tree_t		extent_avail;
+	malloc_mutex_t		extent_avail_mtx;
 
 	/*
 	 * bins is used to store heaps of free regions.
diff --git a/include/jemalloc/internal/ctl_types.h b/include/jemalloc/internal/ctl_types.h
index 065ccda5..e7986092 100644
--- a/include/jemalloc/internal/ctl_types.h
+++ b/include/jemalloc/internal/ctl_types.h
@@ -14,7 +14,7 @@ typedef enum {
 
 #define ARENA_PROF_MUTEXES						\
     OP(large)								\
-    OP(extent_freelist)							\
+    OP(extent_avail)							\
     OP(extents_dirty)							\
     OP(extents_muzzy)							\
     OP(extents_retained)						\
diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index 3fe4a0ad..58e57e70 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_EXTENT_EXTERNS_H
 #define JEMALLOC_INTERNAL_EXTENT_EXTERNS_H
 
+#include "jemalloc/internal/rb.h"
 #include "jemalloc/internal/ph.h"
 
 extern rtree_t			extents_rtree;
@@ -17,6 +18,7 @@ size_t extent_size_quantize_floor(size_t size);
 size_t extent_size_quantize_ceil(size_t size);
 #endif
 
+rb_proto(, extent_avail_, extent_tree_t, extent_t)
 ph_proto(, extent_heap_, extent_heap_t, extent_t)
 
 bool extents_init(tsdn_t *tsdn, extents_t *extents, extent_state_t state,
diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index e1c5cea1..fbe51e47 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -53,8 +53,10 @@ void extent_list_replace(extent_list_t *list, extent_t *to_remove,
     extent_t *to_insert);
 void extent_list_remove(extent_list_t *list, extent_t *extent);
 int extent_sn_comp(const extent_t *a, const extent_t *b);
+int extent_esn_comp(const extent_t *a, const extent_t *b);
 int extent_ad_comp(const extent_t *a, const extent_t *b);
 int extent_snad_comp(const extent_t *a, const extent_t *b);
+int extent_esnead_comp(const extent_t *a, const extent_t *b);
 #endif
 
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_EXTENT_C_))
@@ -378,6 +380,14 @@ extent_sn_comp(const extent_t *a, const extent_t *b) {
 	return (a_sn > b_sn) - (a_sn < b_sn);
 }
 
+JEMALLOC_INLINE int
+extent_esn_comp(const extent_t *a, const extent_t *b) {
+	size_t a_esn = extent_esn_get(a);
+	size_t b_esn = extent_esn_get(b);
+
+	return (a_esn > b_esn) - (a_esn < b_esn);
+}
+
 JEMALLOC_INLINE int
 extent_ad_comp(const extent_t *a, const extent_t *b) {
 	uintptr_t a_addr = (uintptr_t)extent_addr_get(a);
@@ -386,6 +396,14 @@ extent_ad_comp(const extent_t *a, const extent_t *b) {
 	return (a_addr > b_addr) - (a_addr < b_addr);
 }
 
+JEMALLOC_INLINE int
+extent_ead_comp(const extent_t *a, const extent_t *b) {
+	uintptr_t a_eaddr = (uintptr_t)a;
+	uintptr_t b_eaddr = (uintptr_t)b;
+
+	return (a_eaddr > b_eaddr) - (a_eaddr < b_eaddr);
+}
+
 JEMALLOC_INLINE int
 extent_snad_comp(const extent_t *a, const extent_t *b) {
 	int ret;
@@ -398,6 +416,19 @@ extent_snad_comp(const extent_t *a, const extent_t *b) {
 	ret = extent_ad_comp(a, b);
 	return ret;
 }
+
+JEMALLOC_INLINE int
+extent_esnead_comp(const extent_t *a, const extent_t *b) {
+	int ret;
+
+	ret = extent_esn_comp(a, b);
+	if (ret != 0) {
+		return ret;
+	}
+
+	ret = extent_ead_comp(a, b);
+	return ret;
+}
 #endif
 
 #endif /* JEMALLOC_INTERNAL_EXTENT_INLINES_H */
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index 38c3c8a1..7066b8f6 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -2,8 +2,9 @@
 #define JEMALLOC_INTERNAL_EXTENT_STRUCTS_H
 
 #include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/ph.h"
 #include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/rb.h"
+#include "jemalloc/internal/ph.h"
 
 typedef enum {
 	extent_state_active   = 0,
@@ -117,15 +118,18 @@ struct extent_s {
 		size_t			e_bsize;
 	};
 
-	/*
-	 * List linkage, used by a variety of lists:
-	 * - arena_bin_t's slabs_full
-	 * - extents_t's LRU
-	 * - stashed dirty extents
-	 * - arena's large allocations
-	 * - arena's extent structure freelist
-	 */
-	ql_elm(extent_t)	ql_link;
+	union {
+		/*
+		 * List linkage, used by a variety of lists:
+		 * - arena_bin_t's slabs_full
+		 * - extents_t's LRU
+		 * - stashed dirty extents
+		 * - arena's large allocations
+		 */
+		ql_elm(extent_t)	ql_link;
+		/* Red-black tree linkage, used by arena's extent_avail. */
+		rb_node(extent_t)	rb_link;
+	};
 
 	/* Linkage for per size class sn/address-ordered heaps. */
 	phn(extent_t)		ph_link;
@@ -142,6 +146,7 @@ struct extent_s {
 	};
 };
 typedef ql_head(extent_t) extent_list_t;
+typedef rb_tree(extent_t) extent_tree_t;
 typedef ph(extent_t) extent_heap_t;
 
 /* Quantized collection of extents, with built-in LRU queue. */
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index dd35d50f..34c27897 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -160,8 +160,11 @@ extent_dss_boot
 extent_dss_mergeable
 extent_dss_prec_get
 extent_dss_prec_set
+extent_ead_comp
+extent_esn_comp
 extent_esn_get
 extent_esn_set
+extent_esnead_comp
 extent_heap_empty
 extent_heap_first
 extent_heap_insert
diff --git a/src/arena.c b/src/arena.c
index 5b540ce3..74511405 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -314,8 +314,8 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 
 	/* Gather per arena mutex profiling data. */
 	READ_ARENA_MUTEX_PROF_DATA(large_mtx, arena_prof_mutex_large);
-	READ_ARENA_MUTEX_PROF_DATA(extent_freelist_mtx,
-	    arena_prof_mutex_extent_freelist)
+	READ_ARENA_MUTEX_PROF_DATA(extent_avail_mtx,
+	    arena_prof_mutex_extent_avail)
 	READ_ARENA_MUTEX_PROF_DATA(extents_dirty.mtx,
 	    arena_prof_mutex_extents_dirty)
 	READ_ARENA_MUTEX_PROF_DATA(extents_muzzy.mtx,
@@ -1937,8 +1937,8 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		    ATOMIC_RELAXED);
 	}
 
-	extent_list_init(&arena->extent_freelist);
-	if (malloc_mutex_init(&arena->extent_freelist_mtx, "extent_freelist",
+	extent_avail_new(&arena->extent_avail);
+	if (malloc_mutex_init(&arena->extent_avail_mtx, "extent_avail",
 	    WITNESS_RANK_EXTENT_FREELIST)) {
 		goto label_error;
 	}
@@ -2007,7 +2007,7 @@ arena_prefork2(tsdn_t *tsdn, arena_t *arena) {
 
 void
 arena_prefork3(tsdn_t *tsdn, arena_t *arena) {
-	malloc_mutex_prefork(tsdn, &arena->extent_freelist_mtx);
+	malloc_mutex_prefork(tsdn, &arena->extent_avail_mtx);
 }
 
 void
@@ -2036,7 +2036,7 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 	}
 	malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
 	base_postfork_parent(tsdn, arena->base);
-	malloc_mutex_postfork_parent(tsdn, &arena->extent_freelist_mtx);
+	malloc_mutex_postfork_parent(tsdn, &arena->extent_avail_mtx);
 	extents_postfork_parent(tsdn, &arena->extents_dirty);
 	extents_postfork_parent(tsdn, &arena->extents_muzzy);
 	extents_postfork_parent(tsdn, &arena->extents_retained);
@@ -2056,7 +2056,7 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	}
 	malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
 	base_postfork_child(tsdn, arena->base);
-	malloc_mutex_postfork_child(tsdn, &arena->extent_freelist_mtx);
+	malloc_mutex_postfork_child(tsdn, &arena->extent_avail_mtx);
 	extents_postfork_child(tsdn, &arena->extents_dirty);
 	extents_postfork_child(tsdn, &arena->extents_muzzy);
 	extents_postfork_child(tsdn, &arena->extents_retained);
diff --git a/src/ctl.c b/src/ctl.c
index 6d6fadc7..7a1b03f1 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2475,7 +2475,7 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 			continue;
 		}
 		MUTEX_PROF_RESET(arena->large_mtx);
-		MUTEX_PROF_RESET(arena->extent_freelist_mtx);
+		MUTEX_PROF_RESET(arena->extent_avail_mtx);
 		MUTEX_PROF_RESET(arena->extents_dirty.mtx);
 		MUTEX_PROF_RESET(arena->extents_muzzy.mtx);
 		MUTEX_PROF_RESET(arena->extents_retained.mtx);
diff --git a/src/extent.c b/src/extent.c
index c999ae64..0bfc555d 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -88,20 +88,21 @@ static void extent_record(tsdn_t *tsdn, arena_t *arena,
 
 /******************************************************************************/
 
+rb_gen(UNUSED, extent_avail_, extent_tree_t, extent_t, rb_link,
+    extent_esnead_comp)
+
 extent_t *
 extent_alloc(tsdn_t *tsdn, arena_t *arena) {
-	extent_t *extent;
-
 	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 
-	malloc_mutex_lock(tsdn, &arena->extent_freelist_mtx);
-	extent = extent_list_last(&arena->extent_freelist);
+	malloc_mutex_lock(tsdn, &arena->extent_avail_mtx);
+	extent_t *extent = extent_avail_first(&arena->extent_avail);
 	if (extent == NULL) {
-		malloc_mutex_unlock(tsdn, &arena->extent_freelist_mtx);
+		malloc_mutex_unlock(tsdn, &arena->extent_avail_mtx);
 		return base_alloc_extent(tsdn, arena->base);
 	}
-	extent_list_remove(&arena->extent_freelist, extent);
-	malloc_mutex_unlock(tsdn, &arena->extent_freelist_mtx);
+	extent_avail_remove(&arena->extent_avail, extent);
+	malloc_mutex_unlock(tsdn, &arena->extent_avail_mtx);
 	return extent;
 }
 
@@ -109,9 +110,9 @@ void
 extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
 	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 
-	malloc_mutex_lock(tsdn, &arena->extent_freelist_mtx);
-	extent_list_append(&arena->extent_freelist, extent);
-	malloc_mutex_unlock(tsdn, &arena->extent_freelist_mtx);
+	malloc_mutex_lock(tsdn, &arena->extent_avail_mtx);
+	extent_avail_insert(&arena->extent_avail, extent);
+	malloc_mutex_unlock(tsdn, &arena->extent_avail_mtx);
 }
 
 extent_hooks_t *

From 855c127348b2764fe3f25966949377e327efe1c8 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 17 Apr 2017 15:32:44 -0700
Subject: [PATCH 0801/2608] Remove the function alignment of prof_backtrace.

This was an attempt to avoid triggering slow path in libunwind, however turns
out to be ineffective.
---
 src/prof.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/prof.c b/src/prof.c
index b33e9397..334466b1 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -310,7 +310,6 @@ prof_leave(tsd_t *tsd, prof_tdata_t *tdata) {
 }
 
 #ifdef JEMALLOC_PROF_LIBUNWIND
-JEMALLOC_ALIGNED(CACHELINE)
 void
 prof_backtrace(prof_bt_t *bt) {
 	int nframes;

From c43a83d225551273c30c087ba90cbe2ca3c15e55 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 17 Apr 2017 15:50:13 -0700
Subject: [PATCH 0802/2608] Fix LD_PRELOAD_VAR configuration logic for 64-bit
 AIX.

---
 configure.ac | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 00868133..a3cfc88f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -591,7 +591,7 @@ case "${host}" in
 	JE_APPEND_VS(LIBS, -lposix4 -lsocket -lnsl)
 	;;
   *-ibm-aix*)
-	if "$LG_SIZEOF_PTR" = "8"; then
+	if "${LG_SIZEOF_PTR}" = "3"; then
 	  dnl 64bit AIX
 	  LD_PRELOAD_VAR="LDR_PRELOAD64"
 	else

From a01f99307719dcc8ca27cc70f0f0011beff914fa Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 17 Apr 2017 15:54:53 -0700
Subject: [PATCH 0803/2608] Only disable munmap(2) by default on 64-bit Linux.

This reduces the likelihood of address space exhaustion on 32-bit
systems.

This resolves #350.
---
 INSTALL      | 8 +++++---
 configure.ac | 8 ++++++--
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/INSTALL b/INSTALL
index d7496612..042f8291 100644
--- a/INSTALL
+++ b/INSTALL
@@ -161,9 +161,11 @@ any of the following arguments (not a definitive list) to 'configure':
 --disable-munmap
     Disable virtual memory deallocation via munmap(2); instead keep track of
     the virtual memory for later use.  munmap() is disabled by default (i.e.
-    --disable-munmap is implied) on Linux, which has a quirk in its virtual
-    memory allocation algorithm that causes semi-permanent VM map holes under
-    normal jemalloc operation.
+    --disable-munmap is implied) on [64-bit] Linux, which has a quirk in its
+    virtual memory allocation algorithm that causes semi-permanent VM map holes
+    under normal jemalloc operation.  Although munmap() causes issues on 32-bit
+    Linux as well, it is not disabled by default due to the practical
+    possibility of address space exhaustion.
 
 --disable-fill
     Disable support for junk/zero filling of memory.  See the "opt.junk" and
diff --git a/configure.ac b/configure.ac
index a3cfc88f..f6d08ccd 100644
--- a/configure.ac
+++ b/configure.ac
@@ -557,7 +557,9 @@ case "${host}" in
 	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
 	AC_DEFINE([JEMALLOC_C11_ATOMICS])
 	force_tls="0"
-	default_munmap="0"
+	if "${LG_SIZEOF_PTR}" = "3"; then
+	  default_munmap="0"
+	fi
 	;;
   *-*-linux* | *-*-kfreebsd*)
 	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
@@ -568,7 +570,9 @@ case "${host}" in
 	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ])
 	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
 	AC_DEFINE([JEMALLOC_USE_CXX_THROW], [ ])
-	default_munmap="0"
+	if "${LG_SIZEOF_PTR}" = "3"; then
+	  default_munmap="0"
+	fi
 	;;
   *-*-netbsd*)
 	AC_MSG_CHECKING([ABI])

From 22366518b7309cc7dbe7908818e01a88632bd665 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 17 Apr 2017 15:22:14 -0700
Subject: [PATCH 0804/2608] Move CPP_PROLOGUE and CPP_EPILOGUE to the .cpp

This lets us avoid having to specify them in every C file.
---
 include/jemalloc/internal/atomic.h                   |  4 ----
 .../jemalloc/internal/jemalloc_internal_includes.h   |  4 ----
 include/jemalloc/internal/jemalloc_preamble.h.in     | 12 ------------
 include/jemalloc/internal/malloc_io.h                |  1 -
 src/jemalloc_cpp.cpp                                 |  8 ++++++++
 5 files changed, 8 insertions(+), 21 deletions(-)

diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h
index 1bfae7d7..adadb1a3 100644
--- a/include/jemalloc/internal/atomic.h
+++ b/include/jemalloc/internal/atomic.h
@@ -3,8 +3,6 @@
 
 #define ATOMIC_INLINE static inline
 
-CPP_PROLOGUE
-
 #if defined(JEMALLOC_GCC_ATOMIC_ATOMICS)
 #  include "jemalloc/internal/atomic_gcc_atomic.h"
 #elif defined(JEMALLOC_GCC_SYNC_ATOMICS)
@@ -76,6 +74,4 @@ JEMALLOC_GENERATE_INT_ATOMICS(uint64_t, u64, 3)
 
 #undef ATOMIC_INLINE
 
-CPP_EPILOGUE
-
 #endif /* JEMALLOC_INTERNAL_ATOMIC_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index 53374f99..6871883b 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -36,8 +36,6 @@
  * global jemalloc definitions, however.
  */
 
-CPP_PROLOGUE
-
 /******************************************************************************/
 /* HERMETIC HEADERS */
 /******************************************************************************/
@@ -149,6 +147,4 @@ CPP_PROLOGUE
 #include "jemalloc/internal/jemalloc_internal_inlines_c.h"
 #include "jemalloc/internal/prof_inlines_b.h"
 
-CPP_EPILOGUE
-
 #endif /* JEMALLOC_INTERNAL_INCLUDES_H */
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index 6e38fe65..7c796c61 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -1,16 +1,6 @@
 #ifndef JEMALLOC_PREAMBLE_H
 #define JEMALLOC_PREAMBLE_H
 
-#ifdef __cplusplus
-#  define CPP_PROLOGUE extern "C" {
-#  define CPP_EPILOGUE }
-#else
-#  define CPP_PROLOGUE
-#  define CPP_EPILOGUE
-#endif
-
-CPP_PROLOGUE
-
 #include "jemalloc_internal_defs.h"
 #include "jemalloc/internal/jemalloc_internal_decls.h"
 
@@ -182,6 +172,4 @@ static const bool have_percpu_arena =
 #endif
     ;
 
-CPP_EPILOGUE
-
 #endif /* JEMALLOC_PREAMBLE_H */
diff --git a/include/jemalloc/internal/malloc_io.h b/include/jemalloc/internal/malloc_io.h
index 8b2fb96f..47ae58ec 100644
--- a/include/jemalloc/internal/malloc_io.h
+++ b/include/jemalloc/internal/malloc_io.h
@@ -40,7 +40,6 @@
  */
 #define MALLOC_PRINTF_BUFSIZE	4096
 
-
 int buferror(int err, char *buf, size_t buflen);
 uintmax_t malloc_strtoumax(const char *restrict nptr, char **restrict endptr,
     int base);
diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index b6d7c9a5..71999a8a 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -2,9 +2,17 @@
 #include <new>
 
 #define JEMALLOC_CPP_CPP_
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#ifdef __cplusplus
+}
+#endif
+
 // All operators in this file are exported.
 
 // Possibly alias hidden versions of malloc and sdallocx to avoid an extra plt

From 0b00ffe55f01958a048ded483eababd051247b8d Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 11 Apr 2017 12:57:18 -0700
Subject: [PATCH 0805/2608] Header refactoring: move bit_util.h out of the
 catchall

---
 include/jemalloc/internal/bitmap_inlines.h              | 2 ++
 include/jemalloc/internal/jemalloc_internal_includes.h  | 1 -
 include/jemalloc/internal/jemalloc_internal_inlines_a.h | 1 +
 include/jemalloc/internal/prng_inlines.h                | 1 +
 test/unit/bit_util.c                                    | 2 ++
 5 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/bitmap_inlines.h b/include/jemalloc/internal/bitmap_inlines.h
index 506d5269..fc4bad4c 100644
--- a/include/jemalloc/internal/bitmap_inlines.h
+++ b/include/jemalloc/internal/bitmap_inlines.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_BITMAP_INLINES_H
 #define JEMALLOC_INTERNAL_BITMAP_INLINES_H
 
+#include "jemalloc/internal/bit_util.h"
+
 #ifndef JEMALLOC_ENABLE_INLINE
 bool bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo);
 bool bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index 6871883b..64cda53a 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -41,7 +41,6 @@
 /******************************************************************************/
 
 #include "jemalloc/internal/assert.h"
-#include "jemalloc/internal/bit_util.h"
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/util.h"
 
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index 600d7226..978814b4 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_INLINES_A_H
 
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/bit_util.h"
 
 #ifndef JEMALLOC_ENABLE_INLINE
 pszind_t psz2ind(size_t psz);
diff --git a/include/jemalloc/internal/prng_inlines.h b/include/jemalloc/internal/prng_inlines.h
index 7026d52a..c39c63f5 100644
--- a/include/jemalloc/internal/prng_inlines.h
+++ b/include/jemalloc/internal/prng_inlines.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_PRNG_INLINES_H
 
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/bit_util.h"
 
 #ifndef JEMALLOC_ENABLE_INLINE
 uint32_t	prng_state_next_u32(uint32_t state);
diff --git a/test/unit/bit_util.c b/test/unit/bit_util.c
index fe5c4473..42a97013 100644
--- a/test/unit/bit_util.c
+++ b/test/unit/bit_util.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/bit_util.h"
+
 #define TEST_POW2_CEIL(t, suf, pri) do {				\
 	unsigned i, pow2;						\
 	t x;								\

From 54373be0840881cb1123a190013dd11c34ab62f1 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 11 Apr 2017 13:06:31 -0700
Subject: [PATCH 0806/2608] Header refactoring: move malloc_io.h out of the
 catchall

---
 include/jemalloc/internal/ctl_externs.h                | 2 ++
 include/jemalloc/internal/jemalloc_internal_includes.h | 1 -
 src/ckh.c                                              | 2 ++
 src/jemalloc.c                                         | 1 +
 src/malloc_io.c                                        | 1 +
 src/mutex.c                                            | 2 ++
 src/pages.c                                            | 2 ++
 src/prof.c                                             | 2 ++
 src/witness.c                                          | 2 ++
 9 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/ctl_externs.h b/include/jemalloc/internal/ctl_externs.h
index 17bbba06..875a8101 100644
--- a/include/jemalloc/internal/ctl_externs.h
+++ b/include/jemalloc/internal/ctl_externs.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_CTL_EXTERNS_H
 #define JEMALLOC_INTERNAL_CTL_EXTERNS_H
 
+#include "jemalloc/internal/malloc_io.h"
+
 /* Maximum ctl tree depth. */
 #define CTL_MAX_DEPTH	7
 
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index 64cda53a..d4d0c201 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -41,7 +41,6 @@
 /******************************************************************************/
 
 #include "jemalloc/internal/assert.h"
-#include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/util.h"
 
 /******************************************************************************/
diff --git a/src/ckh.c b/src/ckh.c
index a359a5cc..a9d181bd 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -38,6 +38,8 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/malloc_io.h"
+
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 4c38517b..4b4e9430 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3,6 +3,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/malloc_io.h"
 
 /******************************************************************************/
 /* Data. */
diff --git a/src/malloc_io.c b/src/malloc_io.c
index 98ef7a65..11dc68db 100644
--- a/src/malloc_io.c
+++ b/src/malloc_io.c
@@ -1,6 +1,7 @@
 #define JEMALLOC_MALLOC_IO_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
+#include "jemalloc/internal/malloc_io.h"
 
 #ifdef assert
 #  undef assert
diff --git a/src/mutex.c b/src/mutex.c
index 26af5239..92c23dab 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -2,6 +2,8 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/malloc_io.h"
+
 #if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32)
 #include <dlfcn.h>
 #endif
diff --git a/src/pages.c b/src/pages.c
index 53ca653b..f1138231 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -2,6 +2,8 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/malloc_io.h"
+
 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
 #include <sys/sysctl.h>
 #endif
diff --git a/src/prof.c b/src/prof.c
index 334466b1..276ca360 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -2,6 +2,8 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/malloc_io.h"
+
 /******************************************************************************/
 
 #ifdef JEMALLOC_PROF_LIBUNWIND
diff --git a/src/witness.c b/src/witness.c
index 26b16e77..bd040826 100644
--- a/src/witness.c
+++ b/src/witness.c
@@ -2,6 +2,8 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/malloc_io.h"
+
 void
 witness_init(witness_t *witness, const char *name, witness_rank_t rank,
     witness_comp_t *comp, void *opaque) {

From f692e6c214ec3fb5cb64e4131470793c6494afbd Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 11 Apr 2017 13:31:16 -0700
Subject: [PATCH 0807/2608] Header refactoring: move util.h out of the catchall

---
 include/jemalloc/internal/jemalloc_internal_includes.h | 1 -
 include/jemalloc/internal/tcache_inlines.h             | 2 ++
 src/arena.c                                            | 2 ++
 src/ckh.c                                              | 1 +
 src/ctl.c                                              | 2 ++
 src/jemalloc.c                                         | 1 +
 src/large.c                                            | 2 ++
 src/malloc_io.c                                        | 2 ++
 test/unit/junk.c                                       | 2 ++
 test/unit/mallctl.c                                    | 2 ++
 test/unit/stats_print.c                                | 2 ++
 11 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index d4d0c201..3a9c6ca2 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -41,7 +41,6 @@
 /******************************************************************************/
 
 #include "jemalloc/internal/assert.h"
-#include "jemalloc/internal/util.h"
 
 /******************************************************************************/
 /* TYPES */
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index ea29f350..77e559a7 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_TCACHE_INLINES_H
 #define JEMALLOC_INTERNAL_TCACHE_INLINES_H
 
+#include "jemalloc/internal/util.h"
+
 #ifndef JEMALLOC_ENABLE_INLINE
 void	tcache_event(tsd_t *tsd, tcache_t *tcache);
 void	tcache_flush(void);
diff --git a/src/arena.c b/src/arena.c
index 74511405..27a0f75c 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2,6 +2,8 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/util.h"
+
 /******************************************************************************/
 /* Data. */
 
diff --git a/src/ckh.c b/src/ckh.c
index a9d181bd..ce977e14 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -39,6 +39,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/malloc_io.h"
+#include "jemalloc/internal/util.h"
 
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
diff --git a/src/ctl.c b/src/ctl.c
index 7a1b03f1..29689c8e 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2,6 +2,8 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/util.h"
+
 /******************************************************************************/
 /* Data. */
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 4b4e9430..7e3ec6ea 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -4,6 +4,7 @@
 
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/malloc_io.h"
+#include "jemalloc/internal/util.h"
 
 /******************************************************************************/
 /* Data. */
diff --git a/src/large.c b/src/large.c
index 3b53eb33..aa3ea1ff 100644
--- a/src/large.c
+++ b/src/large.c
@@ -2,6 +2,8 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/util.h"
+
 /******************************************************************************/
 
 void *
diff --git a/src/malloc_io.c b/src/malloc_io.c
index 11dc68db..6b99afcd 100644
--- a/src/malloc_io.c
+++ b/src/malloc_io.c
@@ -1,7 +1,9 @@
 #define JEMALLOC_MALLOC_IO_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
+
 #include "jemalloc/internal/malloc_io.h"
+#include "jemalloc/internal/util.h"
 
 #ifdef assert
 #  undef assert
diff --git a/test/unit/junk.c b/test/unit/junk.c
index cfa8d0f2..f9390e41 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/util.h"
+
 static arena_dalloc_junk_small_t *arena_dalloc_junk_small_orig;
 static large_dalloc_junk_t *large_dalloc_junk_orig;
 static large_dalloc_maybe_junk_t *large_dalloc_maybe_junk_orig;
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 4241063e..b8c6a255 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/util.h"
+
 TEST_BEGIN(test_mallctl_errors) {
 	uint64_t epoch;
 	size_t sz;
diff --git a/test/unit/stats_print.c b/test/unit/stats_print.c
index 81778b04..acb26b06 100644
--- a/test/unit/stats_print.c
+++ b/test/unit/stats_print.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/util.h"
+
 typedef enum {
 	TOKEN_TYPE_NONE,
 	TOKEN_TYPE_ERROR,

From d9ec36e22dfe61f3fb972dee33a5cae529e46b07 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 11 Apr 2017 14:43:12 -0700
Subject: [PATCH 0808/2608] Header refactoring: move assert.h out of the
 catch-all

---
 include/jemalloc/internal/hash_inlines.h               | 2 ++
 include/jemalloc/internal/jemalloc_internal_includes.h | 6 ------
 src/arena.c                                            | 1 +
 src/base.c                                             | 2 ++
 src/bitmap.c                                           | 2 ++
 src/ckh.c                                              | 1 +
 src/ctl.c                                              | 1 +
 src/extent.c                                           | 3 ++-
 src/extent_dss.c                                       | 2 ++
 src/extent_mmap.c                                      | 2 ++
 src/jemalloc.c                                         | 1 +
 src/large.c                                            | 1 +
 src/mutex.c                                            | 1 +
 src/nstime.c                                           | 2 ++
 src/pages.c                                            | 1 +
 src/prof.c                                             | 1 +
 src/rtree.c                                            | 2 ++
 src/stats.c                                            | 2 ++
 src/tcache.c                                           | 2 ++
 src/tsd.c                                              | 2 ++
 src/witness.c                                          | 1 +
 src/zone.c                                             | 3 +++
 22 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/hash_inlines.h b/include/jemalloc/internal/hash_inlines.h
index b134492a..321c17cc 100644
--- a/include/jemalloc/internal/hash_inlines.h
+++ b/include/jemalloc/internal/hash_inlines.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_HASH_INLINES_H
 #define JEMALLOC_INTERNAL_HASH_INLINES_H
 
+#include "jemalloc/internal/assert.h"
+
 /*
  * The following hash function is based on MurmurHash3, placed into the public
  * domain by Austin Appleby.  See https://github.com/aappleby/smhasher for
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index 3a9c6ca2..1539c909 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -36,12 +36,6 @@
  * global jemalloc definitions, however.
  */
 
-/******************************************************************************/
-/* HERMETIC HEADERS */
-/******************************************************************************/
-
-#include "jemalloc/internal/assert.h"
-
 /******************************************************************************/
 /* TYPES */
 /******************************************************************************/
diff --git a/src/arena.c b/src/arena.c
index 27a0f75c..bb45a90c 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2,6 +2,7 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/util.h"
 
 /******************************************************************************/
diff --git a/src/base.c b/src/base.c
index caec9557..3de6e3b0 100644
--- a/src/base.c
+++ b/src/base.c
@@ -2,6 +2,8 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/assert.h"
+
 /******************************************************************************/
 /* Data. */
 
diff --git a/src/bitmap.c b/src/bitmap.c
index 2eb50f1b..275636b9 100644
--- a/src/bitmap.c
+++ b/src/bitmap.c
@@ -2,6 +2,8 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/assert.h"
+
 /******************************************************************************/
 
 void
diff --git a/src/ckh.c b/src/ckh.c
index ce977e14..aaa97924 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -38,6 +38,7 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/util.h"
 
diff --git a/src/ctl.c b/src/ctl.c
index 29689c8e..4fba2cd2 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2,6 +2,7 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/util.h"
 
 /******************************************************************************/
diff --git a/src/extent.c b/src/extent.c
index 0bfc555d..6b7da3f9 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1,9 +1,10 @@
 #define JEMALLOC_EXTENT_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/ph.h"
 
-#include "jemalloc/internal/jemalloc_internal_includes.h"
 
 /******************************************************************************/
 /* Data. */
diff --git a/src/extent_dss.c b/src/extent_dss.c
index c609f14c..06bccc83 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -2,6 +2,8 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/assert.h"
+
 /******************************************************************************/
 /* Data. */
 
diff --git a/src/extent_mmap.c b/src/extent_mmap.c
index 5717573e..9381dc16 100644
--- a/src/extent_mmap.c
+++ b/src/extent_mmap.c
@@ -2,6 +2,8 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/assert.h"
+
 /******************************************************************************/
 
 static void *
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 7e3ec6ea..d6b21586 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2,6 +2,7 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/util.h"
diff --git a/src/large.c b/src/large.c
index aa3ea1ff..629656d0 100644
--- a/src/large.c
+++ b/src/large.c
@@ -2,6 +2,7 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/util.h"
 
 /******************************************************************************/
diff --git a/src/mutex.c b/src/mutex.c
index 92c23dab..3bb5ce1d 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -2,6 +2,7 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/malloc_io.h"
 
 #if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32)
diff --git a/src/nstime.c b/src/nstime.c
index ee8d78e7..e0895476 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -1,6 +1,8 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/assert.h"
+
 #define BILLION	UINT64_C(1000000000)
 #define MILLION	UINT64_C(1000000)
 
diff --git a/src/pages.c b/src/pages.c
index f1138231..7fa254f7 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -2,6 +2,7 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/malloc_io.h"
 
 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
diff --git a/src/prof.c b/src/prof.c
index 276ca360..f2b21f72 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -2,6 +2,7 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/malloc_io.h"
 
 /******************************************************************************/
diff --git a/src/rtree.c b/src/rtree.c
index 8d11d99f..ada6e9d5 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -2,6 +2,8 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/assert.h"
+
 /*
  * Only the most significant bits of keys passed to rtree_{read,write}() are
  * used.
diff --git a/src/stats.c b/src/stats.c
index 110d62f7..bbba4679 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -2,6 +2,8 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/assert.h"
+
 const char *global_mutex_names[num_global_prof_mutexes] = {
 #define OP(mtx) #mtx,
 	GLOBAL_PROF_MUTEXES
diff --git a/src/tcache.c b/src/tcache.c
index 7e71bb6a..971c016b 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -2,6 +2,8 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/assert.h"
+
 /******************************************************************************/
 /* Data. */
 
diff --git a/src/tsd.c b/src/tsd.c
index cb7dd3fb..686b4ef4 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -2,6 +2,8 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/assert.h"
+
 /******************************************************************************/
 /* Data. */
 
diff --git a/src/witness.c b/src/witness.c
index bd040826..edb736bf 100644
--- a/src/witness.c
+++ b/src/witness.c
@@ -2,6 +2,7 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/malloc_io.h"
 
 void
diff --git a/src/zone.c b/src/zone.c
index a8a571fd..37bc8da9 100644
--- a/src/zone.c
+++ b/src/zone.c
@@ -1,5 +1,8 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+
 #ifndef JEMALLOC_ZONE
 #  error "This source file is for zones on Darwin (OS X)."
 #endif

From 7ebc83894f025332d44cae361bd89c53c04acfc7 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 17 Apr 2017 15:52:44 -0700
Subject: [PATCH 0809/2608] Header refactoring: move jemalloc_internal_types.h
 out of the catch-all

---
 include/jemalloc/internal/arena_inlines_b.h             | 2 ++
 include/jemalloc/internal/jemalloc_internal_includes.h  | 1 -
 include/jemalloc/internal/jemalloc_internal_inlines_a.h | 1 +
 include/jemalloc/internal/jemalloc_internal_inlines_c.h | 2 ++
 include/jemalloc/internal/size_classes.sh               | 5 +++--
 include/jemalloc/internal/tcache_inlines.h              | 1 +
 src/jemalloc.c                                          | 1 +
 7 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 8c76e0b0..4264f4b3 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_INLINES_B_H
 #define JEMALLOC_INTERNAL_ARENA_INLINES_B_H
 
+#include "jemalloc/internal/jemalloc_internal_types.h"
+
 #ifndef JEMALLOC_ENABLE_INLINE
 szind_t arena_bin_index(arena_t *arena, arena_bin_t *bin);
 prof_tctx_t *arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr,
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index 1539c909..7a51c2d4 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -40,7 +40,6 @@
 /* TYPES */
 /******************************************************************************/
 
-#include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/nstime_types.h"
 #include "jemalloc/internal/spin_types.h"
 #include "jemalloc/internal/prng_types.h"
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index 978814b4..0d922f12 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -3,6 +3,7 @@
 
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/bit_util.h"
+#include "jemalloc/internal/jemalloc_internal_types.h"
 
 #ifndef JEMALLOC_ENABLE_INLINE
 pszind_t psz2ind(size_t psz);
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index bb1f2deb..8c793819 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_INLINES_C_H
 #define JEMALLOC_INTERNAL_INLINES_C_H
 
+#include "jemalloc/internal/jemalloc_internal_types.h"
+
 #ifndef JEMALLOC_ENABLE_INLINE
 arena_t *iaalloc(tsdn_t *tsdn, const void *ptr);
 size_t isalloc(tsdn_t *tsdn, const void *ptr);
diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh
index 60bdbd21..da1e006c 100755
--- a/include/jemalloc/internal/size_classes.sh
+++ b/include/jemalloc/internal/size_classes.sh
@@ -279,9 +279,10 @@ cat <<EOF
 
 /* This file was automatically generated by size_classes.sh. */
 
+#include "jemalloc/internal/jemalloc_internal_types.h"
+
 /*
- * This header requires LG_SIZEOF_PTR, LG_TINY_MIN, LG_QUANTUM, and LG_PAGE to
- * be defined prior to inclusion, and it in turn defines:
+ * This header file defines:
  *
  *   LG_SIZE_CLASS_GROUP: Lg of size class count for each size doubling.
  *   SIZE_CLASSES: Complete table of SC(index, lg_grp, lg_delta, ndelta, psz,
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 77e559a7..d425b82a 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_TCACHE_INLINES_H
 #define JEMALLOC_INTERNAL_TCACHE_INLINES_H
 
+#include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/util.h"
 
 #ifndef JEMALLOC_ENABLE_INLINE
diff --git a/src/jemalloc.c b/src/jemalloc.c
index d6b21586..3dad7265 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -4,6 +4,7 @@
 
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/util.h"
 

From 418d96a86ce95e36f3dbd3dd700a30b5b7cdbcfd Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 17 Apr 2017 16:17:02 -0700
Subject: [PATCH 0810/2608] Header refactoring: unify nstime.h and move it out
 of the catch-all

---
 include/jemalloc/internal/arena_structs_b.h     |  1 +
 .../internal/jemalloc_internal_includes.h       |  3 ---
 include/jemalloc/internal/mutex_inlines.h       |  2 ++
 include/jemalloc/internal/mutex_structs.h       |  1 +
 .../internal/{nstime_externs.h => nstime.h}     | 17 +++++++++++++----
 include/jemalloc/internal/nstime_structs.h      |  8 --------
 include/jemalloc/internal/nstime_types.h        | 11 -----------
 src/ctl.c                                       |  1 +
 src/nstime.c                                    |  2 ++
 test/include/test/jemalloc_test.h.in            |  4 +---
 10 files changed, 21 insertions(+), 29 deletions(-)
 rename include/jemalloc/internal/{nstime_externs.h => nstime.h} (73%)
 delete mode 100644 include/jemalloc/internal/nstime_structs.h
 delete mode 100644 include/jemalloc/internal/nstime_types.h

diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index 14c473c5..f2195f68 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H
 
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/ql.h"
 
 /*
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index 7a51c2d4..f31fed6a 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -40,7 +40,6 @@
 /* TYPES */
 /******************************************************************************/
 
-#include "jemalloc/internal/nstime_types.h"
 #include "jemalloc/internal/spin_types.h"
 #include "jemalloc/internal/prng_types.h"
 #include "jemalloc/internal/ticker_types.h"
@@ -66,7 +65,6 @@
 /* STRUCTS */
 /******************************************************************************/
 
-#include "jemalloc/internal/nstime_structs.h"
 #include "jemalloc/internal/spin_structs.h"
 #include "jemalloc/internal/ticker_structs.h"
 #include "jemalloc/internal/ckh_structs.h"
@@ -90,7 +88,6 @@
 /******************************************************************************/
 
 #include "jemalloc/internal/jemalloc_internal_externs.h"
-#include "jemalloc/internal/nstime_externs.h"
 #include "jemalloc/internal/ckh_externs.h"
 #include "jemalloc/internal/stats_externs.h"
 #include "jemalloc/internal/ctl_externs.h"
diff --git a/include/jemalloc/internal/mutex_inlines.h b/include/jemalloc/internal/mutex_inlines.h
index 0552e190..5ec439f7 100644
--- a/include/jemalloc/internal/mutex_inlines.h
+++ b/include/jemalloc/internal/mutex_inlines.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_MUTEX_INLINES_H
 #define JEMALLOC_INTERNAL_MUTEX_INLINES_H
 
+#include "jemalloc/internal/nstime.h"
+
 void	malloc_mutex_lock_slow(malloc_mutex_t *mutex);
 
 #ifndef JEMALLOC_ENABLE_INLINE
diff --git a/include/jemalloc/internal/mutex_structs.h b/include/jemalloc/internal/mutex_structs.h
index 7b7085d4..dc755547 100644
--- a/include/jemalloc/internal/mutex_structs.h
+++ b/include/jemalloc/internal/mutex_structs.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_MUTEX_STRUCTS_H
 
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/nstime.h"
 
 struct mutex_prof_data_s {
 	/*
diff --git a/include/jemalloc/internal/nstime_externs.h b/include/jemalloc/internal/nstime.h
similarity index 73%
rename from include/jemalloc/internal/nstime_externs.h
rename to include/jemalloc/internal/nstime.h
index 1abc84d9..cfccca09 100644
--- a/include/jemalloc/internal/nstime_externs.h
+++ b/include/jemalloc/internal/nstime.h
@@ -1,7 +1,15 @@
-#ifndef JEMALLOC_INTERNAL_NSTIME_EXTERNS_H
-#define JEMALLOC_INTERNAL_NSTIME_EXTERNS_H
+#ifndef JEMALLOC_INTERNAL_NSTIME_H
+#define JEMALLOC_INTERNAL_NSTIME_H
 
-void	nstime_init(nstime_t *time, uint64_t ns);
+/* Maximum supported number of seconds (~584 years). */
+#define NSTIME_SEC_MAX KQU(18446744072)
+#define NSTIME_ZERO_INITIALIZER {0}
+
+typedef struct {
+	uint64_t ns;
+} nstime_t;
+
+void nstime_init(nstime_t *time, uint64_t ns);
 void	nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec);
 uint64_t	nstime_ns(const nstime_t *time);
 uint64_t	nstime_sec(const nstime_t *time);
@@ -24,4 +32,5 @@ bool	nstime_monotonic(void);
 bool	nstime_update(nstime_t *time);
 #endif
 
-#endif /* JEMALLOC_INTERNAL_NSTIME_EXTERNS_H */
+
+#endif /* JEMALLOC_INTERNAL_NSTIME_H */
diff --git a/include/jemalloc/internal/nstime_structs.h b/include/jemalloc/internal/nstime_structs.h
deleted file mode 100644
index a637f616..00000000
--- a/include/jemalloc/internal/nstime_structs.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_NSTIME_STRUCTS_H
-#define JEMALLOC_INTERNAL_NSTIME_STRUCTS_H
-
-struct nstime_s {
-	uint64_t	ns;
-};
-
-#endif /* JEMALLOC_INTERNAL_NSTIME_STRUCTS_H */
diff --git a/include/jemalloc/internal/nstime_types.h b/include/jemalloc/internal/nstime_types.h
deleted file mode 100644
index 6e7e74cf..00000000
--- a/include/jemalloc/internal/nstime_types.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_NSTIME_TYPES_H
-#define JEMALLOC_INTERNAL_NSTIME_TYPES_H
-
-typedef struct nstime_s nstime_t;
-
-/* Maximum supported number of seconds (~584 years). */
-#define NSTIME_SEC_MAX	KQU(18446744072)
-
-#define NSTIME_ZERO_INITIALIZER {0}
-
-#endif /* JEMALLOC_INTERNAL_NSTIME_TYPES_H */
diff --git a/src/ctl.c b/src/ctl.c
index 4fba2cd2..069e5356 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3,6 +3,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/util.h"
 
 /******************************************************************************/
diff --git a/src/nstime.c b/src/nstime.c
index e0895476..9f5d192d 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -1,6 +1,8 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/nstime.h"
+
 #include "jemalloc/internal/assert.h"
 
 #define BILLION	UINT64_C(1000000000)
diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in
index 02eaac2b..67caa86b 100644
--- a/test/include/test/jemalloc_test.h.in
+++ b/test/include/test/jemalloc_test.h.in
@@ -74,12 +74,10 @@ static const bool config_debug =
 /* Hermetic headers. */
 #  include "jemalloc/internal/assert.h"
 #  include "jemalloc/internal/malloc_io.h"
+#  include "jemalloc/internal/nstime.h"
 #  include "jemalloc/internal/util.h"
 
 /* Non-hermetic headers. */
-#  include "jemalloc/internal/nstime_types.h"
-#  include "jemalloc/internal/nstime_structs.h"
-#  include "jemalloc/internal/nstime_externs.h"
 #  include "jemalloc/internal/qr.h"
 #  include "jemalloc/internal/ql.h"
 

From 38e847c1c594fb9ad4862233f3602ade85da4e7f Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 17 Apr 2017 16:35:04 -0700
Subject: [PATCH 0811/2608] Header refactoring: unify spin.h and move it out of
 the catch-all.

---
 .../internal/jemalloc_internal_includes.h     |  3 --
 include/jemalloc/internal/rtree_inlines.h     |  2 ++
 include/jemalloc/internal/spin.h              | 36 +++++++++++++++++++
 include/jemalloc/internal/spin_inlines.h      | 29 ---------------
 include/jemalloc/internal/spin_structs.h      |  8 -----
 include/jemalloc/internal/spin_types.h        |  8 -----
 src/extent_dss.c                              |  1 +
 src/jemalloc.c                                |  1 +
 src/spin.c                                    |  3 +-
 9 files changed, 42 insertions(+), 49 deletions(-)
 create mode 100644 include/jemalloc/internal/spin.h
 delete mode 100644 include/jemalloc/internal/spin_inlines.h
 delete mode 100644 include/jemalloc/internal/spin_structs.h
 delete mode 100644 include/jemalloc/internal/spin_types.h

diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index f31fed6a..669194d0 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -40,7 +40,6 @@
 /* TYPES */
 /******************************************************************************/
 
-#include "jemalloc/internal/spin_types.h"
 #include "jemalloc/internal/prng_types.h"
 #include "jemalloc/internal/ticker_types.h"
 #include "jemalloc/internal/ckh_types.h"
@@ -65,7 +64,6 @@
 /* STRUCTS */
 /******************************************************************************/
 
-#include "jemalloc/internal/spin_structs.h"
 #include "jemalloc/internal/ticker_structs.h"
 #include "jemalloc/internal/ckh_structs.h"
 #include "jemalloc/internal/witness_structs.h"
@@ -110,7 +108,6 @@
 /* INLINES */
 /******************************************************************************/
 
-#include "jemalloc/internal/spin_inlines.h"
 #include "jemalloc/internal/prng_inlines.h"
 #include "jemalloc/internal/ticker_inlines.h"
 #include "jemalloc/internal/tsd_inlines.h"
diff --git a/include/jemalloc/internal/rtree_inlines.h b/include/jemalloc/internal/rtree_inlines.h
index 6791f50c..030e5787 100644
--- a/include/jemalloc/internal/rtree_inlines.h
+++ b/include/jemalloc/internal/rtree_inlines.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_RTREE_INLINES_H
 #define JEMALLOC_INTERNAL_RTREE_INLINES_H
 
+#include "jemalloc/internal/spin.h"
+
 #ifndef JEMALLOC_ENABLE_INLINE
 uintptr_t rtree_leafkey(uintptr_t key);
 uintptr_t rtree_subkey(uintptr_t key, unsigned level);
diff --git a/include/jemalloc/internal/spin.h b/include/jemalloc/internal/spin.h
new file mode 100644
index 00000000..e2afc98c
--- /dev/null
+++ b/include/jemalloc/internal/spin.h
@@ -0,0 +1,36 @@
+#ifndef JEMALLOC_INTERNAL_SPIN_H
+#define JEMALLOC_INTERNAL_SPIN_H
+
+#ifdef JEMALLOC_SPIN_C_
+#  define SPIN_INLINE extern inline
+#else
+#  define SPIN_INLINE inline
+#endif
+
+#define SPIN_INITIALIZER {0U}
+
+typedef struct {
+	unsigned iteration;
+} spin_t;
+
+SPIN_INLINE void
+spin_adaptive(spin_t *spin) {
+	volatile uint32_t i;
+
+	if (spin->iteration < 5) {
+		for (i = 0; i < (1U << spin->iteration); i++) {
+			CPU_SPINWAIT;
+		}
+		spin->iteration++;
+	} else {
+#ifdef _WIN32
+		SwitchToThread();
+#else
+		sched_yield();
+#endif
+	}
+}
+
+#undef SPIN_INLINE
+
+#endif /* JEMALLOC_INTERNAL_SPIN_H */
diff --git a/include/jemalloc/internal/spin_inlines.h b/include/jemalloc/internal/spin_inlines.h
deleted file mode 100644
index 16573261..00000000
--- a/include/jemalloc/internal/spin_inlines.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_SPIN_INLINES_H
-#define JEMALLOC_INTERNAL_SPIN_INLINES_H
-
-#ifndef JEMALLOC_ENABLE_INLINE
-void	spin_adaptive(spin_t *spin);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_SPIN_C_))
-JEMALLOC_INLINE void
-spin_adaptive(spin_t *spin) {
-	volatile uint32_t i;
-
-	if (spin->iteration < 5) {
-		for (i = 0; i < (1U << spin->iteration); i++) {
-			CPU_SPINWAIT;
-		}
-		spin->iteration++;
-	} else {
-#ifdef _WIN32
-		SwitchToThread();
-#else
-		sched_yield();
-#endif
-	}
-}
-
-#endif
-
-#endif /* JEMALLOC_INTERNAL_SPIN_INLINES_H */
diff --git a/include/jemalloc/internal/spin_structs.h b/include/jemalloc/internal/spin_structs.h
deleted file mode 100644
index ef71a765..00000000
--- a/include/jemalloc/internal/spin_structs.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_SPIN_STRUCTS_H
-#define JEMALLOC_INTERNAL_SPIN_STRUCTS_H
-
-struct spin_s {
-	unsigned iteration;
-};
-
-#endif /* JEMALLOC_INTERNAL_SPIN_STRUCTS_H */
diff --git a/include/jemalloc/internal/spin_types.h b/include/jemalloc/internal/spin_types.h
deleted file mode 100644
index 222e0698..00000000
--- a/include/jemalloc/internal/spin_types.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_SPIN_TYPES_H
-#define JEMALLOC_INTERNAL_SPIN_TYPES_H
-
-typedef struct spin_s spin_t;
-
-#define SPIN_INITIALIZER {0U}
-
-#endif /* JEMALLOC_INTERNAL_SPIN_TYPES_H */
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 06bccc83..6b5d066f 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -3,6 +3,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/spin.h"
 
 /******************************************************************************/
 /* Data. */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 3dad7265..0297cf56 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -6,6 +6,7 @@
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/malloc_io.h"
+#include "jemalloc/internal/spin.h"
 #include "jemalloc/internal/util.h"
 
 /******************************************************************************/
diff --git a/src/spin.c b/src/spin.c
index d2d39419..24372c26 100644
--- a/src/spin.c
+++ b/src/spin.c
@@ -1,3 +1,4 @@
 #define JEMALLOC_SPIN_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
-#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/spin.h"

From 45f087eb033927338b9df847eb9be6886ef48cf7 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 16 Apr 2017 09:25:56 -0700
Subject: [PATCH 0812/2608] Revert "Remove BITMAP_USE_TREE."

Some systems use a native 64 KiB page size, which means that the bitmap
for the smallest size class can be 8192 bits, not just 512 bits as when
the page size is 4 KiB.  Linear search in bitmap_{sfu,ffu}() is
unacceptably slow for such large bitmaps.

This reverts commit 7c00f04ff40a34627e31488d02ff1081c749c7ba.
---
 include/jemalloc/internal/bitmap_inlines.h |  95 ++++++++++++++++++
 include/jemalloc/internal/bitmap_structs.h |  11 +++
 include/jemalloc/internal/bitmap_types.h   | 107 +++++++++++++++++++++
 src/bitmap.c                               |  78 +++++++++++++++
 test/unit/bitmap.c                         |  16 +++
 5 files changed, 307 insertions(+)

diff --git a/include/jemalloc/internal/bitmap_inlines.h b/include/jemalloc/internal/bitmap_inlines.h
index fc4bad4c..c2362018 100644
--- a/include/jemalloc/internal/bitmap_inlines.h
+++ b/include/jemalloc/internal/bitmap_inlines.h
@@ -16,6 +16,12 @@ void bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_BITMAP_C_))
 JEMALLOC_INLINE bool
 bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) {
+#ifdef BITMAP_USE_TREE
+	size_t rgoff = binfo->levels[binfo->nlevels].group_offset - 1;
+	bitmap_t rg = bitmap[rgoff];
+	/* The bitmap is full iff the root group is 0. */
+	return (rg == 0);
+#else
 	size_t i;
 
 	for (i = 0; i < binfo->ngroups; i++) {
@@ -24,6 +30,7 @@ bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) {
 		}
 	}
 	return true;
+#endif
 }
 
 JEMALLOC_INLINE bool
@@ -52,6 +59,24 @@ bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
 	g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
 	*gp = g;
 	assert(bitmap_get(bitmap, binfo, bit));
+#ifdef BITMAP_USE_TREE
+	/* Propagate group state transitions up the tree. */
+	if (g == 0) {
+		unsigned i;
+		for (i = 1; i < binfo->nlevels; i++) {
+			bit = goff;
+			goff = bit >> LG_BITMAP_GROUP_NBITS;
+			gp = &bitmap[binfo->levels[i].group_offset + goff];
+			g = *gp;
+			assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)));
+			g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
+			*gp = g;
+			if (g != 0) {
+				break;
+			}
+		}
+	}
+#endif
 }
 
 /* ffu: find first unset >= bit. */
@@ -59,6 +84,44 @@ JEMALLOC_INLINE size_t
 bitmap_ffu(const bitmap_t *bitmap, const bitmap_info_t *binfo, size_t min_bit) {
 	assert(min_bit < binfo->nbits);
 
+#ifdef BITMAP_USE_TREE
+	size_t bit = 0;
+	for (unsigned level = binfo->nlevels; level--;) {
+		size_t lg_bits_per_group = (LG_BITMAP_GROUP_NBITS * (level +
+		    1));
+		bitmap_t group = bitmap[binfo->levels[level].group_offset + (bit
+		    >> lg_bits_per_group)];
+		unsigned group_nmask = ((min_bit > bit) ? (min_bit - bit) : 0)
+		    >> (lg_bits_per_group - LG_BITMAP_GROUP_NBITS);
+		assert(group_nmask <= BITMAP_GROUP_NBITS);
+		bitmap_t group_mask = ~((1LU << group_nmask) - 1);
+		bitmap_t group_masked = group & group_mask;
+		if (group_masked == 0LU) {
+			if (group == 0LU) {
+				return binfo->nbits;
+			}
+			/*
+			 * min_bit was preceded by one or more unset bits in
+			 * this group, but there are no other unset bits in this
+			 * group.  Try again starting at the first bit of the
+			 * next sibling.  This will recurse at most once per
+			 * non-root level.
+			 */
+			size_t sib_base = bit + (1U << lg_bits_per_group);
+			assert(sib_base > min_bit);
+			assert(sib_base > bit);
+			if (sib_base >= binfo->nbits) {
+				return binfo->nbits;
+			}
+			return bitmap_ffu(bitmap, binfo, sib_base);
+		}
+		bit += (ffs_lu(group_masked) - 1) << (lg_bits_per_group -
+		    LG_BITMAP_GROUP_NBITS);
+	}
+	assert(bit >= min_bit);
+	assert(bit < binfo->nbits);
+	return bit;
+#else
 	size_t i = min_bit >> LG_BITMAP_GROUP_NBITS;
 	bitmap_t g = bitmap[i] & ~((1LU << (min_bit & BITMAP_GROUP_NBITS_MASK))
 	    - 1);
@@ -72,6 +135,7 @@ bitmap_ffu(const bitmap_t *bitmap, const bitmap_info_t *binfo, size_t min_bit) {
 		g = bitmap[i];
 	} while (i < binfo->ngroups);
 	return binfo->nbits;
+#endif
 }
 
 /* sfu: set first unset. */
@@ -83,6 +147,16 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) {
 
 	assert(!bitmap_full(bitmap, binfo));
 
+#ifdef BITMAP_USE_TREE
+	i = binfo->nlevels - 1;
+	g = bitmap[binfo->levels[i].group_offset];
+	bit = ffs_lu(g) - 1;
+	while (i > 0) {
+		i--;
+		g = bitmap[binfo->levels[i].group_offset + bit];
+		bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffs_lu(g) - 1);
+	}
+#else
 	i = 0;
 	g = bitmap[0];
 	while ((bit = ffs_lu(g)) == 0) {
@@ -90,6 +164,7 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) {
 		g = bitmap[i];
 	}
 	bit = (i << LG_BITMAP_GROUP_NBITS) + (bit - 1);
+#endif
 	bitmap_set(bitmap, binfo, bit);
 	return bit;
 }
@@ -111,6 +186,26 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
 	g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
 	*gp = g;
 	assert(!bitmap_get(bitmap, binfo, bit));
+#ifdef BITMAP_USE_TREE
+	/* Propagate group state transitions up the tree. */
+	if (propagate) {
+		unsigned i;
+		for (i = 1; i < binfo->nlevels; i++) {
+			bit = goff;
+			goff = bit >> LG_BITMAP_GROUP_NBITS;
+			gp = &bitmap[binfo->levels[i].group_offset + goff];
+			g = *gp;
+			propagate = (g == 0);
+			assert((g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)))
+			    == 0);
+			g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
+			*gp = g;
+			if (!propagate) {
+				break;
+			}
+		}
+	}
+#endif /* BITMAP_USE_TREE */
 }
 
 #endif
diff --git a/include/jemalloc/internal/bitmap_structs.h b/include/jemalloc/internal/bitmap_structs.h
index dde15328..297ae669 100644
--- a/include/jemalloc/internal/bitmap_structs.h
+++ b/include/jemalloc/internal/bitmap_structs.h
@@ -10,8 +10,19 @@ struct bitmap_info_s {
 	/* Logical number of bits in bitmap (stored at bottom level). */
 	size_t nbits;
 
+#ifdef BITMAP_USE_TREE
+	/* Number of levels necessary for nbits. */
+	unsigned nlevels;
+
+	/*
+	 * Only the first (nlevels+1) elements are used, and levels are ordered
+	 * bottom to top (e.g. the bottom level is stored in levels[0]).
+	 */
+	bitmap_level_t levels[BITMAP_MAX_LEVELS+1];
+#else /* BITMAP_USE_TREE */
 	/* Number of groups necessary for nbits. */
 	size_t ngroups;
+#endif /* BITMAP_USE_TREE */
 };
 
 #endif /* JEMALLOC_INTERNAL_BITMAP_STRUCTS_H */
diff --git a/include/jemalloc/internal/bitmap_types.h b/include/jemalloc/internal/bitmap_types.h
index 091ccead..b334769f 100644
--- a/include/jemalloc/internal/bitmap_types.h
+++ b/include/jemalloc/internal/bitmap_types.h
@@ -21,10 +21,115 @@ typedef unsigned long bitmap_t;
 #define BITMAP_GROUP_NBITS		(1U << LG_BITMAP_GROUP_NBITS)
 #define BITMAP_GROUP_NBITS_MASK		(BITMAP_GROUP_NBITS-1)
 
+/*
+ * Do some analysis on how big the bitmap is before we use a tree.  For a brute
+ * force linear search, if we would have to call ffs_lu() more than 2^3 times,
+ * use a tree instead.
+ */
+#if LG_BITMAP_MAXBITS - LG_BITMAP_GROUP_NBITS > 3
+#  define BITMAP_USE_TREE
+#endif
+
 /* Number of groups required to store a given number of bits. */
 #define BITMAP_BITS2GROUPS(nbits)					\
     (((nbits) + BITMAP_GROUP_NBITS_MASK) >> LG_BITMAP_GROUP_NBITS)
 
+/*
+ * Number of groups required at a particular level for a given number of bits.
+ */
+#define BITMAP_GROUPS_L0(nbits)						\
+    BITMAP_BITS2GROUPS(nbits)
+#define BITMAP_GROUPS_L1(nbits)						\
+    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(nbits))
+#define BITMAP_GROUPS_L2(nbits)						\
+    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits))))
+#define BITMAP_GROUPS_L3(nbits)						\
+    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(		\
+	BITMAP_BITS2GROUPS((nbits)))))
+#define BITMAP_GROUPS_L4(nbits)						\
+    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(		\
+	BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits))))))
+
+/*
+ * Assuming the number of levels, number of groups required for a given number
+ * of bits.
+ */
+#define BITMAP_GROUPS_1_LEVEL(nbits)					\
+    BITMAP_GROUPS_L0(nbits)
+#define BITMAP_GROUPS_2_LEVEL(nbits)					\
+    (BITMAP_GROUPS_1_LEVEL(nbits) + BITMAP_GROUPS_L1(nbits))
+#define BITMAP_GROUPS_3_LEVEL(nbits)					\
+    (BITMAP_GROUPS_2_LEVEL(nbits) + BITMAP_GROUPS_L2(nbits))
+#define BITMAP_GROUPS_4_LEVEL(nbits)					\
+    (BITMAP_GROUPS_3_LEVEL(nbits) + BITMAP_GROUPS_L3(nbits))
+#define BITMAP_GROUPS_5_LEVEL(nbits)					\
+    (BITMAP_GROUPS_4_LEVEL(nbits) + BITMAP_GROUPS_L4(nbits))
+
+/*
+ * Maximum number of groups required to support LG_BITMAP_MAXBITS.
+ */
+#ifdef BITMAP_USE_TREE
+
+#if LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS
+#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_1_LEVEL(nbits)
+#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_1_LEVEL(BITMAP_MAXBITS)
+#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 2
+#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_2_LEVEL(nbits)
+#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_2_LEVEL(BITMAP_MAXBITS)
+#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 3
+#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_3_LEVEL(nbits)
+#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_3_LEVEL(BITMAP_MAXBITS)
+#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 4
+#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_4_LEVEL(nbits)
+#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_4_LEVEL(BITMAP_MAXBITS)
+#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 5
+#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_5_LEVEL(nbits)
+#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_5_LEVEL(BITMAP_MAXBITS)
+#else
+#  error "Unsupported bitmap size"
+#endif
+
+/*
+ * Maximum number of levels possible.  This could be statically computed based
+ * on LG_BITMAP_MAXBITS:
+ *
+ * #define BITMAP_MAX_LEVELS \
+ *     (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP) \
+ *     + !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP)
+ *
+ * However, that would not allow the generic BITMAP_INFO_INITIALIZER() macro, so
+ * instead hardcode BITMAP_MAX_LEVELS to the largest number supported by the
+ * various cascading macros.  The only additional cost this incurs is some
+ * unused trailing entries in bitmap_info_t structures; the bitmaps themselves
+ * are not impacted.
+ */
+#define BITMAP_MAX_LEVELS	5
+
+#define BITMAP_INFO_INITIALIZER(nbits) {				\
+	/* nbits. */							\
+	nbits,								\
+	/* nlevels. */							\
+	(BITMAP_GROUPS_L0(nbits) > BITMAP_GROUPS_L1(nbits)) +		\
+	    (BITMAP_GROUPS_L1(nbits) > BITMAP_GROUPS_L2(nbits)) +	\
+	    (BITMAP_GROUPS_L2(nbits) > BITMAP_GROUPS_L3(nbits)) +	\
+	    (BITMAP_GROUPS_L3(nbits) > BITMAP_GROUPS_L4(nbits)) + 1,	\
+	/* levels. */							\
+	{								\
+		{0},							\
+		{BITMAP_GROUPS_L0(nbits)},				\
+		{BITMAP_GROUPS_L1(nbits) + BITMAP_GROUPS_L0(nbits)},	\
+		{BITMAP_GROUPS_L2(nbits) + BITMAP_GROUPS_L1(nbits) +	\
+		    BITMAP_GROUPS_L0(nbits)},				\
+		{BITMAP_GROUPS_L3(nbits) + BITMAP_GROUPS_L2(nbits) +	\
+		    BITMAP_GROUPS_L1(nbits) + BITMAP_GROUPS_L0(nbits)},	\
+		{BITMAP_GROUPS_L4(nbits) + BITMAP_GROUPS_L3(nbits) +	\
+		     BITMAP_GROUPS_L2(nbits) + BITMAP_GROUPS_L1(nbits)	\
+		     + BITMAP_GROUPS_L0(nbits)}				\
+	}								\
+}
+
+#else /* BITMAP_USE_TREE */
+
 #define BITMAP_GROUPS(nbits)	BITMAP_BITS2GROUPS(nbits)
 #define BITMAP_GROUPS_MAX	BITMAP_BITS2GROUPS(BITMAP_MAXBITS)
 
@@ -35,4 +140,6 @@ typedef unsigned long bitmap_t;
 	BITMAP_BITS2GROUPS(nbits)					\
 }
 
+#endif /* BITMAP_USE_TREE */
+
 #endif /* JEMALLOC_INTERNAL_BITMAP_TYPES_H */
diff --git a/src/bitmap.c b/src/bitmap.c
index 275636b9..468b3178 100644
--- a/src/bitmap.c
+++ b/src/bitmap.c
@@ -6,6 +6,82 @@
 
 /******************************************************************************/
 
+#ifdef BITMAP_USE_TREE
+
+void
+bitmap_info_init(bitmap_info_t *binfo, size_t nbits) {
+	unsigned i;
+	size_t group_count;
+
+	assert(nbits > 0);
+	assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS));
+
+	/*
+	 * Compute the number of groups necessary to store nbits bits, and
+	 * progressively work upward through the levels until reaching a level
+	 * that requires only one group.
+	 */
+	binfo->levels[0].group_offset = 0;
+	group_count = BITMAP_BITS2GROUPS(nbits);
+	for (i = 1; group_count > 1; i++) {
+		assert(i < BITMAP_MAX_LEVELS);
+		binfo->levels[i].group_offset = binfo->levels[i-1].group_offset
+		    + group_count;
+		group_count = BITMAP_BITS2GROUPS(group_count);
+	}
+	binfo->levels[i].group_offset = binfo->levels[i-1].group_offset
+	    + group_count;
+	assert(binfo->levels[i].group_offset <= BITMAP_GROUPS_MAX);
+	binfo->nlevels = i;
+	binfo->nbits = nbits;
+}
+
+static size_t
+bitmap_info_ngroups(const bitmap_info_t *binfo) {
+	return binfo->levels[binfo->nlevels].group_offset;
+}
+
+void
+bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo, bool fill) {
+	size_t extra;
+	unsigned i;
+
+	/*
+	 * Bits are actually inverted with regard to the external bitmap
+	 * interface.
+	 */
+
+	if (fill) {
+		/* The "filled" bitmap starts out with all 0 bits. */
+		memset(bitmap, 0, bitmap_size(binfo));
+		return;
+	}
+
+	/*
+	 * The "empty" bitmap starts out with all 1 bits, except for trailing
+	 * unused bits (if any).  Note that each group uses bit 0 to correspond
+	 * to the first logical bit in the group, so extra bits are the most
+	 * significant bits of the last group.
+	 */
+	memset(bitmap, 0xffU, bitmap_size(binfo));
+	extra = (BITMAP_GROUP_NBITS - (binfo->nbits & BITMAP_GROUP_NBITS_MASK))
+	    & BITMAP_GROUP_NBITS_MASK;
+	if (extra != 0) {
+		bitmap[binfo->levels[1].group_offset - 1] >>= extra;
+	}
+	for (i = 1; i < binfo->nlevels; i++) {
+		size_t group_count = binfo->levels[i].group_offset -
+		    binfo->levels[i-1].group_offset;
+		extra = (BITMAP_GROUP_NBITS - (group_count &
+		    BITMAP_GROUP_NBITS_MASK)) & BITMAP_GROUP_NBITS_MASK;
+		if (extra != 0) {
+			bitmap[binfo->levels[i+1].group_offset - 1] >>= extra;
+		}
+	}
+}
+
+#else /* BITMAP_USE_TREE */
+
 void
 bitmap_info_init(bitmap_info_t *binfo, size_t nbits) {
 	assert(nbits > 0);
@@ -37,6 +113,8 @@ bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo, bool fill) {
 	}
 }
 
+#endif /* BITMAP_USE_TREE */
+
 size_t
 bitmap_size(const bitmap_info_t *binfo) {
 	return (bitmap_info_ngroups(binfo) << LG_SIZEOF_BITMAP);
diff --git a/test/unit/bitmap.c b/test/unit/bitmap.c
index f65ed53e..cafb2039 100644
--- a/test/unit/bitmap.c
+++ b/test/unit/bitmap.c
@@ -103,8 +103,24 @@ test_bitmap_initializer_body(const bitmap_info_t *binfo, size_t nbits) {
 	assert_zu_eq(binfo->nbits, binfo_dyn.nbits,
 	    "Unexpected difference between static and dynamic initialization, "
 	    "nbits=%zu", nbits);
+#ifdef BITMAP_USE_TREE
+	assert_u_eq(binfo->nlevels, binfo_dyn.nlevels,
+	    "Unexpected difference between static and dynamic initialization, "
+	    "nbits=%zu", nbits);
+	{
+		unsigned i;
+
+		for (i = 0; i < binfo->nlevels; i++) {
+			assert_zu_eq(binfo->levels[i].group_offset,
+			    binfo_dyn.levels[i].group_offset,
+			    "Unexpected difference between static and dynamic "
+			    "initialization, nbits=%zu, level=%u", nbits, i);
+		}
+	}
+#else
 	assert_zu_eq(binfo->ngroups, binfo_dyn.ngroups,
 	    "Unexpected difference between static and dynamic initialization");
+#endif
 }
 
 TEST_BEGIN(test_bitmap_initializer) {

From da4cff0279b2e8f2b0482ae961f2e2f63662342d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 16 Apr 2017 16:23:32 -0700
Subject: [PATCH 0813/2608] Support --with-lg-page values larger than system
 page size.

All mappings continue to be PAGE-aligned, even if the system page size
is smaller.  This change is primarily intended to provide a mechanism
for supporting multiple page sizes with the same binary; smaller page
sizes work better in conjunction with jemalloc's design.

This resolves #467.
---
 INSTALL                                       |   9 +-
 include/jemalloc/internal/pages_externs.h     |   6 +-
 include/jemalloc/internal/private_symbols.txt |   1 -
 src/extent_mmap.c                             |  60 +-----
 src/jemalloc.c                                |   4 +-
 src/pages.c                                   | 182 +++++++++++++-----
 test/unit/pack.c                              |   2 +-
 test/unit/pages.c                             |   2 +-
 8 files changed, 155 insertions(+), 111 deletions(-)

diff --git a/INSTALL b/INSTALL
index 042f8291..705f0ff5 100644
--- a/INSTALL
+++ b/INSTALL
@@ -219,9 +219,12 @@ any of the following arguments (not a definitive list) to 'configure':
     documentation.
 
 --with-lg-page=<lg-page>
-    Specify the base 2 log of the system page size.  This option is only useful
-    when cross compiling, since the configure script automatically determines
-    the host's page size by default.
+    Specify the base 2 log of the allocator page size, which must in turn be at
+    least as large as the system page size.  By default the configure script
+    determines the host's page size and sets the allocator page size equal to
+    the system page size, so this option need not be specified unless the
+    system page size may change between configuration and execution, e.g. when
+    cross compiling.
 
 --with-lg-page-sizes=<lg-page-sizes>
     Specify the comma-separated base 2 logs of the page sizes to support.  This
diff --git a/include/jemalloc/internal/pages_externs.h b/include/jemalloc/internal/pages_externs.h
index 7e34efb3..af9a01b8 100644
--- a/include/jemalloc/internal/pages_externs.h
+++ b/include/jemalloc/internal/pages_externs.h
@@ -16,16 +16,14 @@ static const bool pages_can_purge_forced =
 #endif
     ;
 
-void	*pages_map(void *addr, size_t size, bool *commit);
+void	*pages_map(void *addr, size_t size, size_t alignment, bool *commit);
 void	pages_unmap(void *addr, size_t size);
-void	*pages_trim(void *addr, size_t alloc_size, size_t leadsize,
-    size_t size, bool *commit);
 bool	pages_commit(void *addr, size_t size);
 bool	pages_decommit(void *addr, size_t size);
 bool	pages_purge_lazy(void *addr, size_t size);
 bool	pages_purge_forced(void *addr, size_t size);
 bool	pages_huge(void *addr, size_t size);
 bool	pages_nohuge(void *addr, size_t size);
-void	pages_boot(void);
+bool	pages_boot(void);
 
 #endif /* JEMALLOC_INTERNAL_PAGES_EXTERNS_H */
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 34c27897..649a689f 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -360,7 +360,6 @@ pages_map
 pages_nohuge
 pages_purge_forced
 pages_purge_lazy
-pages_trim
 pages_unmap
 percpu_arena_choose
 percpu_arena_ind_limit
diff --git a/src/extent_mmap.c b/src/extent_mmap.c
index 9381dc16..b1862753 100644
--- a/src/extent_mmap.c
+++ b/src/extent_mmap.c
@@ -6,66 +6,14 @@
 
 /******************************************************************************/
 
-static void *
-extent_alloc_mmap_slow(size_t size, size_t alignment, bool *zero,
-    bool *commit) {
-	void *ret;
-	size_t alloc_size;
-
-	alloc_size = size + alignment - PAGE;
-	/* Beware size_t wrap-around. */
-	if (alloc_size < size) {
-		return NULL;
-	}
-	do {
-		void *pages;
-		size_t leadsize;
-		pages = pages_map(NULL, alloc_size, commit);
-		if (pages == NULL) {
-			return NULL;
-		}
-		leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment) -
-		    (uintptr_t)pages;
-		ret = pages_trim(pages, alloc_size, leadsize, size, commit);
-	} while (ret == NULL);
-
-	assert(ret != NULL);
-	*zero = true;
-	return ret;
-}
-
 void *
 extent_alloc_mmap(void *new_addr, size_t size, size_t alignment, bool *zero,
     bool *commit) {
-	void *ret;
-	size_t offset;
-
-	/*
-	 * Ideally, there would be a way to specify alignment to mmap() (like
-	 * NetBSD has), but in the absence of such a feature, we have to work
-	 * hard to efficiently create aligned mappings.  The reliable, but
-	 * slow method is to create a mapping that is over-sized, then trim the
-	 * excess.  However, that always results in one or two calls to
-	 * pages_unmap().
-	 *
-	 * Optimistically try mapping precisely the right amount before falling
-	 * back to the slow method, with the expectation that the optimistic
-	 * approach works most of the time.
-	 */
-
-	assert(alignment != 0);
-
-	ret = pages_map(new_addr, size, commit);
-	if (ret == NULL || ret == new_addr) {
-		return ret;
+	void *ret = pages_map(new_addr, size, ALIGNMENT_CEILING(alignment,
+	    PAGE), commit);
+	if (ret == NULL) {
+		return NULL;
 	}
-	assert(new_addr == NULL);
-	offset = ALIGNMENT_ADDR2OFFSET(ret, alignment);
-	if (offset != 0) {
-		pages_unmap(ret, size);
-		return extent_alloc_mmap_slow(size, alignment, zero, commit);
-	}
-
 	assert(ret != NULL);
 	*zero = true;
 	return ret;
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 0297cf56..ea632c2e 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1220,7 +1220,9 @@ malloc_init_hard_a0_locked() {
 			}
 		}
 	}
-	pages_boot();
+	if (pages_boot()) {
+		return true;
+	}
 	if (base_boot(TSDN_NULL)) {
 		return true;
 	}
diff --git a/src/pages.c b/src/pages.c
index 7fa254f7..46c307b8 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -12,6 +12,9 @@
 /******************************************************************************/
 /* Data. */
 
+/* Actual operating system page size, detected during bootstrap, <= PAGE. */
+static size_t	os_page;
+
 #ifndef _WIN32
 #  define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE)
 #  define PAGES_PROT_DECOMMIT (PROT_NONE)
@@ -20,20 +23,26 @@ static int	mmap_flags;
 static bool	os_overcommits;
 
 /******************************************************************************/
+/*
+ * Function prototypes for static functions that are referenced prior to
+ * definition.
+ */
 
-void *
-pages_map(void *addr, size_t size, bool *commit) {
-	assert(PAGE_ADDR2BASE(addr) == addr);
-	assert(PAGE_CEILING(size) == size);
+static void os_pages_unmap(void *addr, size_t size);
 
-	void *ret;
+/******************************************************************************/
 
+static void *
+os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
+	assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr);
+	assert(ALIGNMENT_CEILING(size, os_page) == size);
 	assert(size != 0);
 
 	if (os_overcommits) {
 		*commit = true;
 	}
 
+	void *ret;
 #ifdef _WIN32
 	/*
 	 * If VirtualAlloc can't allocate at the given address when one is
@@ -59,19 +68,48 @@ pages_map(void *addr, size_t size, bool *commit) {
 		/*
 		 * We succeeded in mapping memory, but not in the right place.
 		 */
-		pages_unmap(ret, size);
+		os_pages_unmap(ret, size);
 		ret = NULL;
 	}
 #endif
-	assert(ret == NULL || (addr == NULL && ret != addr)
-	    || (addr != NULL && ret == addr));
+	assert(ret == NULL || (addr == NULL && ret != addr) || (addr != NULL &&
+	    ret == addr));
 	return ret;
 }
 
-void
-pages_unmap(void *addr, size_t size) {
-	assert(PAGE_ADDR2BASE(addr) == addr);
-	assert(PAGE_CEILING(size) == size);
+static void *
+os_pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size,
+    bool *commit) {
+	void *ret = (void *)((uintptr_t)addr + leadsize);
+
+	assert(alloc_size >= leadsize + size);
+#ifdef _WIN32
+	os_pages_unmap(addr, alloc_size);
+	void *new_addr = os_pages_map(ret, size, PAGE, commit);
+	if (new_addr == ret) {
+		return ret;
+	}
+	if (new_addr != NULL) {
+		os_pages_unmap(new_addr, size);
+	}
+	return NULL;
+#else
+	size_t trailsize = alloc_size - leadsize - size;
+
+	if (leadsize != 0) {
+		os_pages_unmap(addr, leadsize);
+	}
+	if (trailsize != 0) {
+		os_pages_unmap((void *)((uintptr_t)ret + size), trailsize);
+	}
+	return ret;
+#endif
+}
+
+static void
+os_pages_unmap(void *addr, size_t size) {
+	assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr);
+	assert(ALIGNMENT_CEILING(size, os_page) == size);
 
 #ifdef _WIN32
 	if (VirtualFree(addr, 0, MEM_RELEASE) == 0)
@@ -84,50 +122,80 @@ pages_unmap(void *addr, size_t size) {
 		buferror(get_errno(), buf, sizeof(buf));
 		malloc_printf("<jemalloc>: Error in "
 #ifdef _WIN32
-		              "VirtualFree"
+		    "VirtualFree"
 #else
-		              "munmap"
+		    "munmap"
 #endif
-		              "(): %s\n", buf);
+		    "(): %s\n", buf);
 		if (opt_abort) {
 			abort();
 		}
 	}
 }
 
-void *
-pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size,
-    bool *commit) {
-	void *ret = (void *)((uintptr_t)addr + leadsize);
-
-	assert(alloc_size >= leadsize + size);
-#ifdef _WIN32
-	{
-		void *new_addr;
-
-		pages_unmap(addr, alloc_size);
-		new_addr = pages_map(ret, size, commit);
-		if (new_addr == ret) {
-			return ret;
-		}
-		if (new_addr) {
-			pages_unmap(new_addr, size);
-		}
+static void *
+pages_map_slow(size_t size, size_t alignment, bool *commit) {
+	size_t alloc_size = size + alignment - os_page;
+	/* Beware size_t wrap-around. */
+	if (alloc_size < size) {
 		return NULL;
 	}
-#else
-	{
-		size_t trailsize = alloc_size - leadsize - size;
 
-		if (leadsize != 0) {
-			pages_unmap(addr, leadsize);
-		}
-		if (trailsize != 0) {
-			pages_unmap((void *)((uintptr_t)ret + size), trailsize);
+	void *ret;
+	do {
+		void *pages = os_pages_map(NULL, alloc_size, alignment, commit);
+		if (pages == NULL) {
+			return NULL;
 		}
+		size_t leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment)
+		    - (uintptr_t)pages;
+		ret = os_pages_trim(pages, alloc_size, leadsize, size, commit);
+	} while (ret == NULL);
+
+	assert(ret != NULL);
+	assert(PAGE_ADDR2BASE(ret) == ret);
+	return ret;
+}
+
+void *
+pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
+	assert(alignment >= PAGE);
+	assert(ALIGNMENT_ADDR2BASE(addr, alignment) == addr);
+
+	/*
+	 * Ideally, there would be a way to specify alignment to mmap() (like
+	 * NetBSD has), but in the absence of such a feature, we have to work
+	 * hard to efficiently create aligned mappings.  The reliable, but
+	 * slow method is to create a mapping that is over-sized, then trim the
+	 * excess.  However, that always results in one or two calls to
+	 * os_pages_unmap(), and it can leave holes in the process's virtual
+	 * memory map if memory grows downward.
+	 *
+	 * Optimistically try mapping precisely the right amount before falling
+	 * back to the slow method, with the expectation that the optimistic
+	 * approach works most of the time.
+	 */
+
+	void *ret = os_pages_map(addr, size, os_page, commit);
+	if (ret == NULL || ret == addr) {
 		return ret;
 	}
-#endif
+	assert(addr == NULL);
+	if (ALIGNMENT_ADDR2OFFSET(ret, alignment) != 0) {
+		os_pages_unmap(ret, size);
+		return pages_map_slow(size, alignment, commit);
+	}
+
+	assert(PAGE_ADDR2BASE(ret) == ret);
+	return ret;
+}
+
+void
+pages_unmap(void *addr, size_t size) {
+	assert(PAGE_ADDR2BASE(addr) == addr);
+	assert(PAGE_CEILING(size) == size);
+
+	os_pages_unmap(addr, size);
 }
 
 static bool
@@ -155,7 +223,7 @@ pages_commit_impl(void *addr, size_t size, bool commit) {
 			 * We succeeded in mapping memory, but not in the right
 			 * place.
 			 */
-			pages_unmap(result, size);
+			os_pages_unmap(result, size);
 			return true;
 		}
 		return false;
@@ -239,6 +307,21 @@ pages_nohuge(void *addr, size_t size) {
 #endif
 }
 
+static size_t
+os_page_detect(void) {
+#ifdef _WIN32
+	SYSTEM_INFO si;
+	GetSystemInfo(&si);
+	return si.dwPageSize;
+#else
+	long result = sysconf(_SC_PAGESIZE);
+	if (result == -1) {
+		return LG_PAGE;
+	}
+	return (size_t)result;
+#endif
+}
+
 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
 static bool
 os_overcommits_sysctl(void) {
@@ -300,8 +383,17 @@ os_overcommits_proc(void) {
 }
 #endif
 
-void
+bool
 pages_boot(void) {
+	os_page = os_page_detect();
+	if (os_page > PAGE) {
+		malloc_write("<jemalloc>: Unsupported system page size\n");
+		if (opt_abort) {
+			abort();
+		}
+		return true;
+	}
+
 #ifndef _WIN32
 	mmap_flags = MAP_PRIVATE | MAP_ANON;
 #endif
@@ -318,4 +410,6 @@ pages_boot(void) {
 #else
 	os_overcommits = false;
 #endif
+
+	return false;
 }
diff --git a/test/unit/pack.c b/test/unit/pack.c
index 5da4ae12..edfc548f 100644
--- a/test/unit/pack.c
+++ b/test/unit/pack.c
@@ -6,7 +6,7 @@
 #if LG_PAGE <= 14
 #define SZ	(ZU(1) << (LG_PAGE - 2))
 #else
-#define SZ	4096
+#define SZ	ZU(4096)
 #endif
 
 /*
diff --git a/test/unit/pages.c b/test/unit/pages.c
index 30d69592..4457f369 100644
--- a/test/unit/pages.c
+++ b/test/unit/pages.c
@@ -7,7 +7,7 @@ TEST_BEGIN(test_pages_huge) {
 
 	alloc_size = HUGEPAGE * 2 - PAGE;
 	commit = true;
-	pages = pages_map(NULL, alloc_size, &commit);
+	pages = pages_map(NULL, alloc_size, PAGE, &commit);
 	assert_ptr_not_null(pages, "Unexpected pages_map() error");
 
 	hugepage = (void *)(ALIGNMENT_CEILING((uintptr_t)pages, HUGEPAGE));

From acf4c8ae33539a219711791c3556016b853b7d09 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 18 Apr 2017 15:00:14 -0700
Subject: [PATCH 0814/2608] Output 4 counters for bin mutexes instead of just
 2.

---
 src/stats.c | 32 ++++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index bbba4679..435dfb9f 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -126,19 +126,21 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		malloc_cprintf(write_cb, cbopaque,
 		    "\t\t\t\t\"bins\": [\n");
 	} else {
+		char *mutex_counters = "   n_lock_ops    n_waiting"
+		    "   n_spin_acq  max_wait_ns\n";
 		if (config_tcache) {
 			malloc_cprintf(write_cb, cbopaque,
 			    "bins:           size ind    allocated      nmalloc"
 			    "      ndalloc    nrequests      curregs"
 			    "     curslabs regs pgs  util       nfills"
-			    "     nflushes     newslabs      reslabs"
-			    "   contention  max_wait_ns\n");
+			    "     nflushes     newslabs      reslabs%s",
+			    mutex ? mutex_counters : "\n");
 		} else {
 			malloc_cprintf(write_cb, cbopaque,
 			    "bins:           size ind    allocated      nmalloc"
 			    "      ndalloc    nrequests      curregs"
 			    "     curslabs regs pgs  util     newslabs"
-			    "      reslabs   contention  max_wait_ns\n");
+			    "      reslabs%s", mutex ? mutex_counters : "\n");
 		}
 	}
 	for (j = 0, in_gap = false; j < nbins; j++) {
@@ -245,6 +247,10 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			    &max_wait, uint64_t);
 			CTL_M2_M4_GET("stats.arenas.0.bins.0.mutex.num_ops",
 			    i, j, &num_ops, uint64_t);
+			uint64_t mutex_stats[num_mutex_prof_counters];
+			if (mutex) {
+				read_arena_bin_mutex_stats(i, j, mutex_stats);
+			}
 
 			char rate[6];
 			if (get_rate_str(num_wait, num_ops, rate)) {
@@ -259,22 +265,32 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 				    "%20zu %3u %12zu %12"FMTu64
 				    " %12"FMTu64" %12"FMTu64" %12zu"
 				    " %12zu %4u %3zu %-5s %12"FMTu64
-				    " %12"FMTu64" %12"FMTu64" %12"FMTu64
-				    " %12s %12"FMTu64"\n",
+				    " %12"FMTu64" %12"FMTu64" %12"FMTu64,
 				    reg_size, j, curregs * reg_size, nmalloc,
 				    ndalloc, nrequests, curregs, curslabs,
 				    nregs, slab_size / page, util, nfills,
-				    nflushes, nslabs, nreslabs, rate, max_wait);
+				    nflushes, nslabs, nreslabs);
 			} else {
 				malloc_cprintf(write_cb, cbopaque,
 				    "%20zu %3u %12zu %12"FMTu64
 				    " %12"FMTu64" %12"FMTu64" %12zu"
 				    " %12zu %4u %3zu %-5s %12"FMTu64
-				    " %12"FMTu64" %12s %12"FMTu64"\n",
+				    " %12"FMTu64,
 				    reg_size, j, curregs * reg_size, nmalloc,
 				    ndalloc, nrequests, curregs, curslabs,
 				    nregs, slab_size / page, util, nslabs,
-				    nreslabs, rate, max_wait);
+				    nreslabs);
+			}
+			if (mutex) {
+				malloc_cprintf(write_cb, cbopaque,
+				    " %12"FMTu64" %12"FMTu64" %12"FMTu64
+				    " %12"FMTu64"\n",
+				    mutex_stats[mutex_counter_num_ops],
+				    mutex_stats[mutex_counter_num_wait],
+				    mutex_stats[mutex_counter_num_spin_acq],
+				    mutex_stats[mutex_counter_max_wait_time]);
+			} else {
+				malloc_cprintf(write_cb, cbopaque, "\n");
 			}
 		}
 	}

From fed9a880c811fc56f7563efcb0a70c6ffe401c5f Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 19 Apr 2017 16:14:54 -0700
Subject: [PATCH 0815/2608] Trim before commit in extent_recycle().

This avoids creating clean committed pages as a side effect of aligned
allocation.  For configurations that decommit memory, purged pages are
decommitted, and decommitted extents cannot be coalesced with committed
extents.  Unless the clean committed pages happen to be selected during
allocation, they cause unnecessary permanent extent fragmentation.

This resolves #766.
---
 src/extent.c      | 8 ++++++--
 src/extent_mmap.c | 4 +++-
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index 6b7da3f9..d08ccdb3 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -829,12 +829,16 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
+	bool committed = false;
 	extent_t *extent = extent_recycle_extract(tsdn, arena, r_extent_hooks,
 	    rtree_ctx, extents, false, new_addr, size, pad, alignment, slab,
-	    zero, commit);
+	    zero, &committed);
 	if (extent == NULL) {
 		return NULL;
 	}
+	if (committed) {
+		*commit = true;
+	}
 
 	extent = extent_recycle_split(tsdn, arena, r_extent_hooks, rtree_ctx,
 	    extents, new_addr, size, pad, alignment, slab, szind, extent);
@@ -996,7 +1000,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	assert(new_addr == NULL || leadsize == 0);
 	assert(alloc_size >= leadsize + esize);
 	size_t trailsize = alloc_size - leadsize - esize;
-	if (extent_zeroed_get(extent)) {
+	if (extent_zeroed_get(extent) && extent_committed_get(extent)) {
 		*zero = true;
 	}
 	if (extent_committed_get(extent)) {
diff --git a/src/extent_mmap.c b/src/extent_mmap.c
index b1862753..be099373 100644
--- a/src/extent_mmap.c
+++ b/src/extent_mmap.c
@@ -15,7 +15,9 @@ extent_alloc_mmap(void *new_addr, size_t size, size_t alignment, bool *zero,
 		return NULL;
 	}
 	assert(ret != NULL);
-	*zero = true;
+	if (*commit) {
+		*zero = true;
+	}
 	return ret;
 }
 

From 5aa46f027df42636d4aa1fb70d1078a6c5f96420 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 20 Apr 2017 15:19:02 -0700
Subject: [PATCH 0816/2608] Bypass extent tracking for auto arenas.

Tracking extents is required by arena_reset.  To support this, the extent
linkage was used for tracking 1) large allocations, and 2) full slabs.  However
modifying the extent linkage could be an expensive operation as it likely incurs
cache misses.  Since we forbid arena_reset on auto arenas, let's bypass the
linkage operations for auto arenas.
---
 .../internal/jemalloc_internal_inlines_a.h    |  1 +
 .../internal/jemalloc_internal_inlines_b.h    |  6 ++++
 .../internal/jemalloc_internal_inlines_c.h    |  9 ++---
 src/arena.c                                   | 33 ++++++++++++-------
 src/ctl.c                                     |  7 +---
 src/large.c                                   | 25 +++++++++-----
 6 files changed, 49 insertions(+), 32 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index 0d922f12..38fa3c70 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -24,6 +24,7 @@ size_t sa2u(size_t size, size_t alignment);
 arena_t *arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal);
 arena_t *arena_choose(tsd_t *tsd, arena_t *arena);
 arena_t *arena_ichoose(tsd_t *tsd, arena_t *arena);
+bool arena_is_auto(arena_t *arena);
 arena_tdata_t *arena_tdata_get(tsd_t *tsd, unsigned ind,
     bool refresh_if_missing);
 arena_t *arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing);
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index f22708a5..ab54a598 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -70,6 +70,12 @@ arena_ichoose(tsd_t *tsd, arena_t *arena) {
 	return arena_choose_impl(tsd, arena, true);
 }
 
+JEMALLOC_INLINE bool
+arena_is_auto(arena_t *arena) {
+	assert(narenas_auto > 0);
+	return (arena_ind_get(arena) < narenas_auto);
+}
+
 JEMALLOC_ALWAYS_INLINE extent_t *
 iealloc(tsdn_t *tsdn, const void *ptr) {
 	rtree_ctx_t rtree_ctx_fallback;
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 8c793819..70ac6669 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -54,8 +54,7 @@ iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
 
 	assert(size != 0);
 	assert(!is_internal || tcache == NULL);
-	assert(!is_internal || arena == NULL || arena_ind_get(arena) <
-	    narenas_auto);
+	assert(!is_internal || arena == NULL || arena_is_auto(arena));
 	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 
 	ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path);
@@ -79,8 +78,7 @@ ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
 	assert(usize != 0);
 	assert(usize == sa2u(usize, alignment));
 	assert(!is_internal || tcache == NULL);
-	assert(!is_internal || arena == NULL || arena_ind_get(arena) <
-	    narenas_auto);
+	assert(!is_internal || arena == NULL || arena_is_auto(arena));
 	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 
 	ret = arena_palloc(tsdn, arena, usize, alignment, zero, tcache);
@@ -113,8 +111,7 @@ idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, alloc_ctx_t *alloc_ctx,
     bool is_internal, bool slow_path) {
 	assert(ptr != NULL);
 	assert(!is_internal || tcache == NULL);
-	assert(!is_internal || arena_ind_get(iaalloc(tsdn, ptr)) <
-	    narenas_auto);
+	assert(!is_internal || arena_is_auto(iaalloc(tsdn, ptr)));
 	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 	if (config_stats && is_internal) {
 		arena_internal_sub(iaalloc(tsdn, ptr), isalloc(tsdn, ptr));
diff --git a/src/arena.c b/src/arena.c
index bb45a90c..94a4b5ef 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1032,13 +1032,24 @@ arena_bin_slabs_nonfull_tryget(arena_bin_t *bin) {
 }
 
 static void
-arena_bin_slabs_full_insert(arena_bin_t *bin, extent_t *slab) {
+arena_bin_slabs_full_insert(arena_t *arena, arena_bin_t *bin, extent_t *slab) {
 	assert(extent_nfree_get(slab) == 0);
+	/*
+	 *  Tracking extents is required by arena_reset, which is not allowed
+	 *  for auto arenas.  Bypass this step to avoid touching the extent
+	 *  linkage (often results in cache misses) for auto arenas.
+	 */
+	if (arena_is_auto(arena)) {
+		return;
+	}
 	extent_list_append(&bin->slabs_full, slab);
 }
 
 static void
-arena_bin_slabs_full_remove(arena_bin_t *bin, extent_t *slab) {
+arena_bin_slabs_full_remove(arena_t *arena, arena_bin_t *bin, extent_t *slab) {
+	if (arena_is_auto(arena)) {
+		return;
+	}
 	extent_list_remove(&bin->slabs_full, slab);
 }
 
@@ -1106,7 +1117,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 		}
 		for (slab = extent_list_first(&bin->slabs_full); slab != NULL;
 		    slab = extent_list_first(&bin->slabs_full)) {
-			arena_bin_slabs_full_remove(bin, slab);
+			arena_bin_slabs_full_remove(arena, bin, slab);
 			malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 			arena_slab_dalloc(tsd_tsdn(tsd), arena, slab);
 			malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
@@ -1285,8 +1296,8 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin,
 	extent_t *slab;
 
 	bin_info = &arena_bin_info[binind];
-	if (bin->slabcur != NULL) {
-		arena_bin_slabs_full_insert(bin, bin->slabcur);
+	if (!arena_is_auto(arena) && bin->slabcur != NULL) {
+		arena_bin_slabs_full_insert(arena, bin, bin->slabcur);
 		bin->slabcur = NULL;
 	}
 	slab = arena_bin_nonfull_slab_get(tsdn, arena, bin, binind);
@@ -1319,7 +1330,7 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin,
 			return ret;
 		}
 
-		arena_bin_slabs_full_insert(bin, bin->slabcur);
+		arena_bin_slabs_full_insert(arena, bin, bin->slabcur);
 		bin->slabcur = NULL;
 	}
 
@@ -1559,7 +1570,7 @@ arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 }
 
 static void
-arena_dissociate_bin_slab(extent_t *slab, arena_bin_t *bin) {
+arena_dissociate_bin_slab(arena_t *arena, extent_t *slab, arena_bin_t *bin) {
 	/* Dissociate slab from bin. */
 	if (slab == bin->slabcur) {
 		bin->slabcur = NULL;
@@ -1573,7 +1584,7 @@ arena_dissociate_bin_slab(extent_t *slab, arena_bin_t *bin) {
 		 * into the non-full slabs heap.
 		 */
 		if (bin_info->nregs == 1) {
-			arena_bin_slabs_full_remove(bin, slab);
+			arena_bin_slabs_full_remove(arena, bin, slab);
 		} else {
 			arena_bin_slabs_nonfull_remove(bin, slab);
 		}
@@ -1611,7 +1622,7 @@ arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
 		if (extent_nfree_get(bin->slabcur) > 0) {
 			arena_bin_slabs_nonfull_insert(bin, bin->slabcur);
 		} else {
-			arena_bin_slabs_full_insert(bin, bin->slabcur);
+			arena_bin_slabs_full_insert(arena, bin, bin->slabcur);
 		}
 		bin->slabcur = slab;
 		if (config_stats) {
@@ -1637,10 +1648,10 @@ arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
 	arena_slab_reg_dalloc(tsdn, slab, slab_data, ptr);
 	unsigned nfree = extent_nfree_get(slab);
 	if (nfree == bin_info->nregs) {
-		arena_dissociate_bin_slab(slab, bin);
+		arena_dissociate_bin_slab(arena, slab, bin);
 		arena_dalloc_bin_slab(tsdn, arena, slab, bin);
 	} else if (nfree == 1 && slab != bin->slabcur) {
-		arena_bin_slabs_full_remove(bin, slab);
+		arena_bin_slabs_full_remove(arena, bin, slab);
 		arena_bin_lower_slab(tsdn, arena, slab, bin);
 	}
 
diff --git a/src/ctl.c b/src/ctl.c
index 069e5356..1b0ee053 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1846,13 +1846,8 @@ arena_i_reset_destroy_helper(tsd_t *tsd, const size_t *mib, size_t miblen,
 	WRITEONLY();
 	MIB_UNSIGNED(*arena_ind, 1);
 
-	if (*arena_ind < narenas_auto) {
-		ret = EFAULT;
-		goto label_return;
-	}
-
 	*arena = arena_get(tsd_tsdn(tsd), *arena_ind, false);
-	if (*arena == NULL) {
+	if (*arena == NULL || arena_is_auto(*arena)) {
 		ret = EFAULT;
 		goto label_return;
 	}
diff --git a/src/large.c b/src/large.c
index 629656d0..36e8be91 100644
--- a/src/large.c
+++ b/src/large.c
@@ -46,10 +46,13 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 		return NULL;
 	}
 
-	/* Insert extent into large. */
-	malloc_mutex_lock(tsdn, &arena->large_mtx);
-	extent_list_append(&arena->large, extent);
-	malloc_mutex_unlock(tsdn, &arena->large_mtx);
+	/* See comments in arena_bin_slabs_full_insert(). */
+	if (!arena_is_auto(arena)) {
+		/* Insert extent into large. */
+		malloc_mutex_lock(tsdn, &arena->large_mtx);
+		extent_list_append(&arena->large, extent);
+		malloc_mutex_unlock(tsdn, &arena->large_mtx);
+	}
 	if (config_prof && arena_prof_accum(tsdn, arena, usize)) {
 		prof_idump(tsdn);
 	}
@@ -318,16 +321,20 @@ large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
 static void
 large_dalloc_prep_impl(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
     bool junked_locked) {
-
 	if (!junked_locked) {
-		malloc_mutex_lock(tsdn, &arena->large_mtx);
-		extent_list_remove(&arena->large, extent);
-		malloc_mutex_unlock(tsdn, &arena->large_mtx);
+		/* See comments in arena_bin_slabs_full_insert(). */
+		if (!arena_is_auto(arena)) {
+			malloc_mutex_lock(tsdn, &arena->large_mtx);
+			extent_list_remove(&arena->large, extent);
+			malloc_mutex_unlock(tsdn, &arena->large_mtx);
+		}
 		large_dalloc_maybe_junk(extent_addr_get(extent),
 		    extent_usize_get(extent));
 	} else {
 		malloc_mutex_assert_owner(tsdn, &arena->large_mtx);
-		extent_list_remove(&arena->large, extent);
+		if (!arena_is_auto(arena)) {
+			extent_list_remove(&arena->large, extent);
+		}
 	}
 	arena_extent_dalloc_large_prep(tsdn, arena, extent);
 }

From 4403c9ab441eabb6c55d93b99836f7126e46be75 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 20 Apr 2017 17:21:37 -0700
Subject: [PATCH 0817/2608] Remove --disable-tcache.

Simplify configuration by removing the --disable-tcache option, but
replace the testing for that configuration with
--with-malloc-conf=tcache:false.

Fix the thread.arena and thread.tcache.flush mallctls to work correctly
if tcache is disabled.

This partially resolves #580.
---
 .travis.yml                                   |  14 +--
 INSTALL                                       |   5 -
 configure.ac                                  |  17 ---
 doc/jemalloc.xml.in                           |  34 +-----
 include/jemalloc/internal/arena_inlines_a.h   |   2 +-
 .../internal/jemalloc_internal_defs.h.in      |   7 --
 .../internal/jemalloc_internal_inlines_a.h    |  19 ++-
 .../internal/jemalloc_internal_inlines_b.h    |   2 +-
 .../jemalloc/internal/jemalloc_preamble.h.in  |   7 --
 include/jemalloc/internal/tcache_inlines.h    |   5 -
 include/jemalloc/internal/tcache_structs.h    |   9 --
 scripts/gen_run_tests.py                      |   2 +-
 scripts/gen_travis.py                         |  15 ++-
 src/arena.c                                   |  56 ++++-----
 src/ctl.c                                     |  66 +++-------
 src/jemalloc.c                                |  13 +-
 src/stats.c                                   | 114 +++++-------------
 src/tcache.c                                  |  44 ++-----
 test/integration/thread_tcache_enabled.c      |  25 +---
 test/test.sh.in                               |   1 -
 test/unit/decay.c                             | 101 +++++++---------
 test/unit/decay.sh                            |   5 +-
 test/unit/mallctl.c                           |   9 +-
 test/unit/prof_idump.sh                       |   8 +-
 test/unit/stats.c                             |  20 +--
 25 files changed, 188 insertions(+), 412 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 2235206d..4838cb37 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -21,7 +21,7 @@ matrix:
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-tcache" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
@@ -31,7 +31,7 @@ matrix:
     - os: osx
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-tcache" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=clang CXX=clang++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
       addons:
@@ -45,7 +45,7 @@ matrix:
     - os: linux
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-tcache" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
       addons:
@@ -65,7 +65,7 @@ matrix:
           packages:
             - gcc-multilib
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-tcache" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
       addons:
         apt:
           packages:
@@ -75,13 +75,13 @@ matrix:
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-tcache" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --disable-tcache" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --disable-tcache" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
 
 
 before_script:
diff --git a/INSTALL b/INSTALL
index 705f0ff5..f2c0fa8b 100644
--- a/INSTALL
+++ b/INSTALL
@@ -153,11 +153,6 @@ any of the following arguments (not a definitive list) to 'configure':
     Statically link against the specified libunwind.a rather than dynamically
     linking with -lunwind.
 
---disable-tcache
-    Disable thread-specific caches for small objects.  Objects are cached and
-    released in bulk, thus reducing the total number of mutex operations.  See
-    the "opt.tcache" option for usage details.
-
 --disable-munmap
     Disable virtual memory deallocation via munmap(2); instead keep track of
     the virtual memory for later use.  munmap() is disabled by default (i.e.
diff --git a/configure.ac b/configure.ac
index f6d08ccd..669c1b38 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1137,22 +1137,6 @@ if test "x$enable_prof" = "x1" ; then
 fi
 AC_SUBST([enable_prof])
 
-dnl Enable thread-specific caching by default.
-AC_ARG_ENABLE([tcache],
-  [AS_HELP_STRING([--disable-tcache], [Disable per thread caches])],
-[if test "x$enable_tcache" = "xno" ; then
-  enable_tcache="0"
-else
-  enable_tcache="1"
-fi
-],
-[enable_tcache="1"]
-)
-if test "x$enable_tcache" = "x1" ; then
-  AC_DEFINE([JEMALLOC_TCACHE], [ ])
-fi
-AC_SUBST([enable_tcache])
-
 dnl Indicate whether adjacent virtual memory mappings automatically coalesce
 dnl (and fragment on demand).
 if test "x${maps_coalesce}" = "x1" ; then
@@ -2181,7 +2165,6 @@ AC_MSG_RESULT([prof               : ${enable_prof}])
 AC_MSG_RESULT([prof-libunwind     : ${enable_prof_libunwind}])
 AC_MSG_RESULT([prof-libgcc        : ${enable_prof_libgcc}])
 AC_MSG_RESULT([prof-gcc           : ${enable_prof_gcc}])
-AC_MSG_RESULT([tcache             : ${enable_tcache}])
 AC_MSG_RESULT([fill               : ${enable_fill}])
 AC_MSG_RESULT([utrace             : ${enable_utrace}])
 AC_MSG_RESULT([xmalloc            : ${enable_xmalloc}])
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 3b98395d..2b321a7c 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -510,13 +510,12 @@ for (i = 0; i < nbins; i++) {
     sense to reduce the number of arenas if an application does not make much
     use of the allocation functions.</para>
 
-    <para>In addition to multiple arenas, unless
-    <option>--disable-tcache</option> is specified during configuration, this
-    allocator supports thread-specific caching, in order to make it possible to
-    completely avoid synchronization for most allocation requests.  Such caching
-    allows very fast allocation in the common case, but it increases memory
-    usage and fragmentation, since a bounded number of objects can remain
-    allocated in each thread cache.</para>
+    <para>In addition to multiple arenas, this allocator supports
+    thread-specific caching, in order to make it possible to completely avoid
+    synchronization for most allocation requests.  Such caching allows very fast
+    allocation in the common case, but it increases memory usage and
+    fragmentation, since a bounded number of objects can remain allocated in
+    each thread cache.</para>
 
     <para>Memory is conceptually broken into extents.  Extents are always
     aligned to multiples of the page size.  This alignment makes it possible to
@@ -839,16 +838,6 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         build configuration.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="config.tcache">
-        <term>
-          <mallctl>config.tcache</mallctl>
-          (<type>bool</type>)
-          <literal>r-</literal>
-        </term>
-        <listitem><para><option>--disable-tcache</option> was not specified
-        during build configuration.</para></listitem>
-      </varlistentry>
-
       <varlistentry id="config.tls">
         <term>
           <mallctl>config.tls</mallctl>
@@ -1095,7 +1084,6 @@ malloc_conf = "xmalloc:true";]]></programlisting>
           <mallctl>opt.tcache</mallctl>
           (<type>bool</type>)
           <literal>r-</literal>
-          [<option>--enable-tcache</option>]
         </term>
         <listitem><para>Thread-specific caching (tcache) enabled/disabled.  When
         there are multiple threads, each thread uses a tcache for objects up to
@@ -1112,7 +1100,6 @@ malloc_conf = "xmalloc:true";]]></programlisting>
           <mallctl>opt.lg_tcache_max</mallctl>
           (<type>size_t</type>)
           <literal>r-</literal>
-          [<option>--enable-tcache</option>]
         </term>
         <listitem><para>Maximum size class (log base 2) to cache in the
         thread-specific cache (tcache).  At a minimum, all small size classes
@@ -1370,7 +1357,6 @@ malloc_conf = "xmalloc:true";]]></programlisting>
           <mallctl>thread.tcache.enabled</mallctl>
           (<type>bool</type>)
           <literal>rw</literal>
-          [<option>--enable-tcache</option>]
         </term>
         <listitem><para>Enable/disable calling thread's tcache.  The tcache is
         implicitly flushed as a side effect of becoming
@@ -1384,7 +1370,6 @@ malloc_conf = "xmalloc:true";]]></programlisting>
           <mallctl>thread.tcache.flush</mallctl>
           (<type>void</type>)
           <literal>--</literal>
-          [<option>--enable-tcache</option>]
         </term>
         <listitem><para>Flush calling thread's thread-specific cache (tcache).
         This interface releases all cached objects and internal data structures
@@ -1440,7 +1425,6 @@ malloc_conf = "xmalloc:true";]]></programlisting>
           <mallctl>tcache.create</mallctl>
           (<type>unsigned</type>)
           <literal>r-</literal>
-          [<option>--enable-tcache</option>]
         </term>
         <listitem><para>Create an explicit thread-specific cache (tcache) and
         return an identifier that can be passed to the <link
@@ -1457,7 +1441,6 @@ malloc_conf = "xmalloc:true";]]></programlisting>
           <mallctl>tcache.flush</mallctl>
           (<type>unsigned</type>)
           <literal>-w</literal>
-          [<option>--enable-tcache</option>]
         </term>
         <listitem><para>Flush the specified thread-specific cache (tcache).  The
         same considerations apply to this interface as to <link
@@ -1471,7 +1454,6 @@ malloc_conf = "xmalloc:true";]]></programlisting>
           <mallctl>tcache.destroy</mallctl>
           (<type>unsigned</type>)
           <literal>-w</literal>
-          [<option>--enable-tcache</option>]
         </term>
         <listitem><para>Flush the specified thread-specific cache (tcache) and
         make the identifier available for use during a future tcache creation.
@@ -1873,7 +1855,6 @@ struct extent_hooks_s {
           <mallctl>arenas.tcache_max</mallctl>
           (<type>size_t</type>)
           <literal>r-</literal>
-          [<option>--enable-tcache</option>]
         </term>
         <listitem><para>Maximum thread-cached size class.</para></listitem>
       </varlistentry>
@@ -1892,7 +1873,6 @@ struct extent_hooks_s {
           <mallctl>arenas.nhbins</mallctl>
           (<type>unsigned</type>)
           <literal>r-</literal>
-          [<option>--enable-tcache</option>]
         </term>
         <listitem><para>Total number of thread cache bin size
         classes.</para></listitem>
@@ -2575,7 +2555,6 @@ struct extent_hooks_s {
           <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.nfills</mallctl>
           (<type>uint64_t</type>)
           <literal>r-</literal>
-          [<option>--enable-stats</option> <option>--enable-tcache</option>]
         </term>
         <listitem><para>Cumulative number of tcache fills.</para></listitem>
       </varlistentry>
@@ -2585,7 +2564,6 @@ struct extent_hooks_s {
           <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.nflushes</mallctl>
           (<type>uint64_t</type>)
           <literal>r-</literal>
-          [<option>--enable-stats</option> <option>--enable-tcache</option>]
         </term>
         <listitem><para>Cumulative number of tcache flushes.</para></listitem>
       </varlistentry>
diff --git a/include/jemalloc/internal/arena_inlines_a.h b/include/jemalloc/internal/arena_inlines_a.h
index cf92342b..2bd5ce75 100644
--- a/include/jemalloc/internal/arena_inlines_a.h
+++ b/include/jemalloc/internal/arena_inlines_a.h
@@ -58,7 +58,7 @@ percpu_arena_update(tsd_t *tsd, unsigned cpu) {
 		/* Set new arena/tcache associations. */
 		arena_migrate(tsd, oldind, newind);
 		tcache_t *tcache = tcache_get(tsd);
-		if (config_tcache && tcache) {
+		if (tcache != NULL) {
 			tcache_arena_reassociate(tsd_tsdn(tsd), tcache,
 			    newarena);
 		}
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 28eb0b34..d3d76944 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -154,13 +154,6 @@
 /* Use gcc intrinsics for profile backtracing if defined. */
 #undef JEMALLOC_PROF_GCC
 
-/*
- * JEMALLOC_TCACHE enables a thread-specific caching layer for small objects.
- * This makes it possible to allocate/deallocate objects without any locking
- * when the cache is in the steady state.
- */
-#undef JEMALLOC_TCACHE
-
 /*
  * JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage
  * segment (DSS).
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index 38fa3c70..c28bd7cf 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -323,7 +323,8 @@ malloc_getcpu(void) {
 JEMALLOC_ALWAYS_INLINE unsigned
 percpu_arena_choose(void) {
 	unsigned arena_ind;
-	assert(have_percpu_arena && (percpu_arena_mode != percpu_arena_disabled));
+	assert(have_percpu_arena && (percpu_arena_mode !=
+	    percpu_arena_disabled));
 
 	malloc_cpuid_t cpuid = malloc_getcpu();
 	assert(cpuid >= 0);
@@ -420,19 +421,16 @@ tcache_large_bin_get(tcache_t *tcache, szind_t binind) {
 
 JEMALLOC_ALWAYS_INLINE bool
 tcache_available(tsd_t *tsd) {
-	cassert(config_tcache);
-
 	/*
 	 * Thread specific auto tcache might be unavailable if: 1) during tcache
 	 * initialization, or 2) disabled through thread.tcache.enabled mallctl
 	 * or config options.  This check covers all cases.
 	 */
-	if (likely(tsd_tcache_enabled_get(tsd) == true)) {
-		/* Associated arena == null implies tcache init in progress. */
-		if (tsd_tcachep_get(tsd)->arena != NULL) {
-			assert(tcache_small_bin_get(tsd_tcachep_get(tsd),
-			    0)->avail != NULL);
-		}
+	if (likely(tsd_tcache_enabled_get(tsd))) {
+		/* Associated arena == NULL implies tcache init in progress. */
+		assert(tsd_tcachep_get(tsd)->arena == NULL ||
+		    tcache_small_bin_get(tsd_tcachep_get(tsd), 0)->avail !=
+		    NULL);
 		return true;
 	}
 
@@ -441,9 +439,6 @@ tcache_available(tsd_t *tsd) {
 
 JEMALLOC_ALWAYS_INLINE tcache_t *
 tcache_get(tsd_t *tsd) {
-	if (!config_tcache) {
-		return NULL;
-	}
 	if (!tcache_available(tsd)) {
 		return NULL;
 	}
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index ab54a598..2fd371c3 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -24,7 +24,7 @@ arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {
 	if (unlikely(ret == NULL)) {
 		ret = arena_choose_hard(tsd, internal);
 		assert(ret);
-		if (config_tcache && tcache_available(tsd)) {
+		if (tcache_available(tsd)) {
 			tcache_t *tcache = tcache_get(tsd);
 			if (tcache->arena != NULL) {
 				/* See comments in tcache_data_init().*/
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index 7c796c61..0e2ce312 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -111,13 +111,6 @@ static const bool config_stats =
     false
 #endif
     ;
-static const bool config_tcache =
-#ifdef JEMALLOC_TCACHE
-    true
-#else
-    false
-#endif
-    ;
 static const bool config_tls =
 #ifdef JEMALLOC_TLS
     true
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index d425b82a..67d35b58 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -6,7 +6,6 @@
 
 #ifndef JEMALLOC_ENABLE_INLINE
 void	tcache_event(tsd_t *tsd, tcache_t *tcache);
-void	tcache_flush(void);
 bool	tcache_enabled_get(tsd_t *tsd);
 tcache_t *tcache_get(tsd_t *tsd);
 void	tcache_enabled_set(tsd_t *tsd, bool enabled);
@@ -25,15 +24,11 @@ tcache_t	*tcaches_get(tsd_t *tsd, unsigned ind);
 #if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_))
 JEMALLOC_INLINE bool
 tcache_enabled_get(tsd_t *tsd) {
-	cassert(config_tcache);
-
 	return tsd_tcache_enabled_get(tsd);
 }
 
 JEMALLOC_INLINE void
 tcache_enabled_set(tsd_t *tsd, bool enabled) {
-	cassert(config_tcache);
-
 	bool was_enabled = tsd_tcache_enabled_get(tsd);
 
 	if (!was_enabled && enabled) {
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index c43e59b7..fe27f362 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -40,23 +40,14 @@ struct tcache_s {
 	 * element of tbins is initialized to point to the proper offset within
 	 * this array.
 	 */
-#ifdef JEMALLOC_TCACHE
 	tcache_bin_t	tbins_small[NBINS];
-#else
-	tcache_bin_t	tbins_small[0];
-#endif
 	/* Data accessed less often below. */
 	ql_elm(tcache_t) link;		/* Used for aggregating stats. */
 	arena_t		*arena;		/* Associated arena. */
 	szind_t		next_gc_bin;	/* Next bin to GC. */
-#ifdef JEMALLOC_TCACHE
 	/* For small bins, fill (ncached_max >> lg_fill_div). */
 	uint8_t		lg_fill_div[NBINS];
 	tcache_bin_t	tbins_large[NSIZES-NBINS];
-#else
-	uint8_t		lg_fill_div[0];
-	tcache_bin_t	tbins_large[0];
-#endif
 };
 
 /* Linkage for list of available (previously used) explicit tcache IDs. */
diff --git a/scripts/gen_run_tests.py b/scripts/gen_run_tests.py
index 729ecb1a..9e46ba90 100755
--- a/scripts/gen_run_tests.py
+++ b/scripts/gen_run_tests.py
@@ -18,7 +18,7 @@ possible_config_opts = [
     '--enable-debug',
     '--enable-prof',
     '--disable-stats',
-    '--disable-tcache',
+    '--with-malloc-conf=tcache:false',
 ]
 
 print 'set -e'
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index 35a10ee6..4649cb71 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -24,11 +24,11 @@ script:
 
 # The 'default' configuration is gcc, on linux, with no compiler or configure
 # flags.  We also test with clang, -m32, --enable-debug, --enable-prof,
-# --disable-stats, and --disable-tcache.  To avoid abusing travis though, we
-# don't test all 2**7 = 128 possible combinations of these; instead, we only
-# test combinations of up to 2 'unusual' settings, under the hope that bugs
-# involving interactions of such settings are rare.
-# things at once, for C(7, 0) + C(7, 1) + C(7, 2) = 29
+# --disable-stats, and --with-malloc-conf=tcache:false.  To avoid abusing
+# travis though, we don't test all 2**7 = 128 possible combinations of these;
+# instead, we only test combinations of up to 2 'unusual' settings, under the
+# hope that bugs involving interactions of such settings are rare.
+# Things at once, for C(7, 0) + C(7, 1) + C(7, 2) = 29
 MAX_UNUSUAL_OPTIONS = 2
 
 os_default = 'linux'
@@ -40,7 +40,10 @@ compilers_unusual = 'CC=clang CXX=clang++'
 compiler_flag_unusuals = ['-m32']
 
 configure_flag_unusuals = [
-    '--enable-debug', '--enable-prof', '--disable-stats', '--disable-tcache',
+    '--enable-debug',
+    '--enable-prof',
+    '--disable-stats',
+    '--with-malloc-conf=tcache:false',
 ]
 
 all_unusuals = (
diff --git a/src/arena.c b/src/arena.c
index 94a4b5ef..c2eca449 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -283,31 +283,27 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 
 	arena_stats_unlock(tsdn, &arena->stats);
 
-	if (config_tcache) {
-		tcache_bin_t *tbin;
-		tcache_t *tcache;
-
-		/* tcache_bytes counts currently cached bytes. */
-		atomic_store_zu(&astats->tcache_bytes, 0, ATOMIC_RELAXED);
-		malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
-		ql_foreach(tcache, &arena->tcache_ql, link) {
-			szind_t i = 0;
-			for (; i < NBINS; i++) {
-				tbin = tcache_small_bin_get(tcache, i);
-				arena_stats_accum_zu(&astats->tcache_bytes,
-				    tbin->ncached * index2size(i));
-			}
-			for (; i < nhbins; i++) {
-				tbin = tcache_large_bin_get(tcache, i);
-				arena_stats_accum_zu(&astats->tcache_bytes,
-				    tbin->ncached * index2size(i));
-			}
+	/* tcache_bytes counts currently cached bytes. */
+	atomic_store_zu(&astats->tcache_bytes, 0, ATOMIC_RELAXED);
+	malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
+	tcache_t *tcache;
+	ql_foreach(tcache, &arena->tcache_ql, link) {
+		szind_t i = 0;
+		for (; i < NBINS; i++) {
+			tcache_bin_t *tbin = tcache_small_bin_get(tcache, i);
+			arena_stats_accum_zu(&astats->tcache_bytes,
+			    tbin->ncached * index2size(i));
+		}
+		for (; i < nhbins; i++) {
+			tcache_bin_t *tbin = tcache_large_bin_get(tcache, i);
+			arena_stats_accum_zu(&astats->tcache_bytes,
+			    tbin->ncached * index2size(i));
 		}
-		malloc_mutex_prof_read(tsdn,
-		    &astats->mutex_prof_data[arena_prof_mutex_tcache_list],
-		    &arena->tcache_ql_mtx);
-		malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
 	}
+	malloc_mutex_prof_read(tsdn,
+	    &astats->mutex_prof_data[arena_prof_mutex_tcache_list],
+	    &arena->tcache_ql_mtx);
+	malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
 
 #define READ_ARENA_MUTEX_PROF_DATA(mtx, ind)				\
     malloc_mutex_lock(tsdn, &arena->mtx);				\
@@ -342,10 +338,8 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		bstats[i].ndalloc += bin->stats.ndalloc;
 		bstats[i].nrequests += bin->stats.nrequests;
 		bstats[i].curregs += bin->stats.curregs;
-		if (config_tcache) {
-			bstats[i].nfills += bin->stats.nfills;
-			bstats[i].nflushes += bin->stats.nflushes;
-		}
+		bstats[i].nfills += bin->stats.nfills;
+		bstats[i].nflushes += bin->stats.nflushes;
 		bstats[i].nslabs += bin->stats.nslabs;
 		bstats[i].reslabs += bin->stats.reslabs;
 		bstats[i].curslabs += bin->stats.curslabs;
@@ -1867,9 +1861,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		if (arena_stats_init(tsdn, &arena->stats)) {
 			goto label_error;
 		}
-	}
 
-	if (config_stats && config_tcache) {
 		ql_new(&arena->tcache_ql);
 		if (malloc_mutex_init(&arena->tcache_ql_mtx, "tcache_ql",
 		    WITNESS_RANK_TCACHE_QL)) {
@@ -2007,7 +1999,7 @@ arena_prefork0(tsdn_t *tsdn, arena_t *arena) {
 
 void
 arena_prefork1(tsdn_t *tsdn, arena_t *arena) {
-	if (config_stats && config_tcache) {
+	if (config_stats) {
 		malloc_mutex_prefork(tsdn, &arena->tcache_ql_mtx);
 	}
 }
@@ -2056,7 +2048,7 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 	extents_postfork_parent(tsdn, &arena->extents_retained);
 	malloc_mutex_postfork_parent(tsdn, &arena->decay_dirty.mtx);
 	malloc_mutex_postfork_parent(tsdn, &arena->decay_muzzy.mtx);
-	if (config_stats && config_tcache) {
+	if (config_stats) {
 		malloc_mutex_postfork_parent(tsdn, &arena->tcache_ql_mtx);
 	}
 }
@@ -2076,7 +2068,7 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	extents_postfork_child(tsdn, &arena->extents_retained);
 	malloc_mutex_postfork_child(tsdn, &arena->decay_dirty.mtx);
 	malloc_mutex_postfork_child(tsdn, &arena->decay_muzzy.mtx);
-	if (config_stats && config_tcache) {
+	if (config_stats) {
 		malloc_mutex_postfork_child(tsdn, &arena->tcache_ql_mtx);
 	}
 }
diff --git a/src/ctl.c b/src/ctl.c
index 1b0ee053..a1842956 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -70,7 +70,6 @@ CTL_PROTO(config_prof)
 CTL_PROTO(config_prof_libgcc)
 CTL_PROTO(config_prof_libunwind)
 CTL_PROTO(config_stats)
-CTL_PROTO(config_tcache)
 CTL_PROTO(config_tls)
 CTL_PROTO(config_utrace)
 CTL_PROTO(config_xmalloc)
@@ -255,7 +254,6 @@ static const ctl_named_node_t	config_node[] = {
 	{NAME("prof_libgcc"),	CTL(config_prof_libgcc)},
 	{NAME("prof_libunwind"), CTL(config_prof_libunwind)},
 	{NAME("stats"),		CTL(config_stats)},
-	{NAME("tcache"),	CTL(config_tcache)},
 	{NAME("tls"),		CTL(config_tls)},
 	{NAME("utrace"),	CTL(config_utrace)},
 	{NAME("xmalloc"),	CTL(config_xmalloc)}
@@ -777,10 +775,8 @@ ARENA_PROF_MUTEXES
 		accum_arena_stats_u64(&sdstats->astats.nrequests_large,
 		    &astats->astats.nrequests_large);
 
-		if (config_tcache) {
-			accum_atomic_zu(&sdstats->astats.tcache_bytes,
-			    &astats->astats.tcache_bytes);
-		}
+		accum_atomic_zu(&sdstats->astats.tcache_bytes,
+		    &astats->astats.tcache_bytes);
 
 		for (i = 0; i < NBINS; i++) {
 			sdstats->bstats[i].nmalloc += astats->bstats[i].nmalloc;
@@ -793,12 +789,9 @@ ARENA_PROF_MUTEXES
 			} else {
 				assert(astats->bstats[i].curregs == 0);
 			}
-			if (config_tcache) {
-				sdstats->bstats[i].nfills +=
-				    astats->bstats[i].nfills;
-				sdstats->bstats[i].nflushes +=
-				    astats->bstats[i].nflushes;
-			}
+			sdstats->bstats[i].nfills += astats->bstats[i].nfills;
+			sdstats->bstats[i].nflushes +=
+			    astats->bstats[i].nflushes;
 			sdstats->bstats[i].nslabs += astats->bstats[i].nslabs;
 			sdstats->bstats[i].reslabs += astats->bstats[i].reslabs;
 			if (!destroyed) {
@@ -1457,7 +1450,6 @@ CTL_RO_CONFIG_GEN(config_prof, bool)
 CTL_RO_CONFIG_GEN(config_prof_libgcc, bool)
 CTL_RO_CONFIG_GEN(config_prof_libunwind, bool)
 CTL_RO_CONFIG_GEN(config_stats, bool)
-CTL_RO_CONFIG_GEN(config_tcache, bool)
 CTL_RO_CONFIG_GEN(config_tls, bool)
 CTL_RO_CONFIG_GEN(config_utrace, bool)
 CTL_RO_CONFIG_GEN(config_xmalloc, bool)
@@ -1475,8 +1467,8 @@ CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, const char *)
 CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool)
 CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool)
 CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool)
-CTL_RO_NL_CGEN(config_tcache, opt_tcache, opt_tcache, bool)
-CTL_RO_NL_CGEN(config_tcache, opt_lg_tcache_max, opt_lg_tcache_max, ssize_t)
+CTL_RO_NL_GEN(opt_tcache, opt_tcache, bool)
+CTL_RO_NL_GEN(opt_lg_tcache_max, opt_lg_tcache_max, ssize_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *)
 CTL_RO_NL_CGEN(config_prof, opt_prof_active, opt_prof_active, bool)
@@ -1536,12 +1528,9 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 		}
 		/* Set new arena/tcache associations. */
 		arena_migrate(tsd, oldind, newind);
-		if (config_tcache) {
-			tcache_t *tcache = tsd_tcachep_get(tsd);
-			if (tcache != NULL) {
-				tcache_arena_reassociate(tsd_tsdn(tsd), tcache,
-				    newarena);
-			}
+		if (tcache_available(tsd)) {
+			tcache_arena_reassociate(tsd_tsdn(tsd),
+			    tsd_tcachep_get(tsd), newarena);
 		}
 	}
 
@@ -1565,10 +1554,6 @@ thread_tcache_enabled_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	int ret;
 	bool oldval;
 
-	if (!config_tcache) {
-		return ENOENT;
-	}
-
 	oldval = tcache_enabled_get(tsd);
 	if (newp != NULL) {
 		if (newlen != sizeof(bool)) {
@@ -1589,8 +1574,9 @@ thread_tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 
-	if (!config_tcache) {
-		return ENOENT;
+	if (!tcache_available(tsd)) {
+		ret = EFAULT;
+		goto label_return;
 	}
 
 	READONLY();
@@ -1670,10 +1656,6 @@ tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	int ret;
 	unsigned tcache_ind;
 
-	if (!config_tcache) {
-		return ENOENT;
-	}
-
 	READONLY();
 	if (tcaches_create(tsd, &tcache_ind)) {
 		ret = EFAULT;
@@ -1692,10 +1674,6 @@ tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	int ret;
 	unsigned tcache_ind;
 
-	if (!config_tcache) {
-		return ENOENT;
-	}
-
 	WRITEONLY();
 	tcache_ind = UINT_MAX;
 	WRITE(tcache_ind, unsigned);
@@ -1716,10 +1694,6 @@ tcache_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	int ret;
 	unsigned tcache_ind;
 
-	if (!config_tcache) {
-		return ENOENT;
-	}
-
 	WRITEONLY();
 	tcache_ind = UINT_MAX;
 	WRITE(tcache_ind, unsigned);
@@ -2150,9 +2124,9 @@ arenas_muzzy_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 
 CTL_RO_NL_GEN(arenas_quantum, QUANTUM, size_t)
 CTL_RO_NL_GEN(arenas_page, PAGE, size_t)
-CTL_RO_NL_CGEN(config_tcache, arenas_tcache_max, tcache_maxclass, size_t)
+CTL_RO_NL_GEN(arenas_tcache_max, tcache_maxclass, size_t)
 CTL_RO_NL_GEN(arenas_nbins, NBINS, unsigned)
-CTL_RO_NL_CGEN(config_tcache, arenas_nhbins, nhbins, unsigned)
+CTL_RO_NL_GEN(arenas_nhbins, nhbins, unsigned)
 CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t)
 CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t)
 CTL_RO_NL_GEN(arenas_bin_i_slab_size, arena_bin_info[mib[2]].slab_size, size_t)
@@ -2380,7 +2354,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_base,
 CTL_RO_CGEN(config_stats, stats_arenas_i_internal,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.internal, ATOMIC_RELAXED),
     size_t)
-CTL_RO_CGEN(config_stats && config_tcache, stats_arenas_i_tcache_bytes,
+CTL_RO_CGEN(config_stats, stats_arenas_i_tcache_bytes,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.tcache_bytes,
     ATOMIC_RELAXED), size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_resident,
@@ -2480,9 +2454,7 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 		MUTEX_PROF_RESET(arena->extents_retained.mtx);
 		MUTEX_PROF_RESET(arena->decay_dirty.mtx);
 		MUTEX_PROF_RESET(arena->decay_muzzy.mtx);
-		if (config_tcache) {
-			MUTEX_PROF_RESET(arena->tcache_ql_mtx);
-		}
+		MUTEX_PROF_RESET(arena->tcache_ql_mtx);
 		MUTEX_PROF_RESET(arena->base->mtx);
 
 		for (szind_t i = 0; i < NBINS; i++) {
@@ -2502,9 +2474,9 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nrequests,
     arenas_i(mib[2])->astats->bstats[mib[4]].nrequests, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curregs,
     arenas_i(mib[2])->astats->bstats[mib[4]].curregs, size_t)
-CTL_RO_CGEN(config_stats && config_tcache, stats_arenas_i_bins_j_nfills,
+CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nfills,
     arenas_i(mib[2])->astats->bstats[mib[4]].nfills, uint64_t)
-CTL_RO_CGEN(config_stats && config_tcache, stats_arenas_i_bins_j_nflushes,
+CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nflushes,
     arenas_i(mib[2])->astats->bstats[mib[4]].nflushes, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nslabs,
     arenas_i(mib[2])->astats->bstats[mib[4]].nslabs, uint64_t)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index ea632c2e..e08226c9 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -682,7 +682,7 @@ arenas_tdata_cleanup(tsd_t *tsd) {
 
 static void
 stats_print_atexit(void) {
-	if (config_tcache && config_stats) {
+	if (config_stats) {
 		tsdn_t *tsdn;
 		unsigned narenas, i;
 
@@ -1106,12 +1106,9 @@ malloc_conf_init(void) {
 			if (config_xmalloc) {
 				CONF_HANDLE_BOOL(opt_xmalloc, "xmalloc", true)
 			}
-			if (config_tcache) {
-				CONF_HANDLE_BOOL(opt_tcache, "tcache", true)
-				CONF_HANDLE_SSIZE_T(opt_lg_tcache_max,
-				    "lg_tcache_max", -1,
-				    (sizeof(size_t) << 3) - 1)
-			}
+			CONF_HANDLE_BOOL(opt_tcache, "tcache", true)
+			CONF_HANDLE_SSIZE_T(opt_lg_tcache_max, "lg_tcache_max",
+			    -1, (sizeof(size_t) << 3) - 1)
 			if (strncmp("percpu_arena", k, klen) == 0) {
 				int i;
 				bool match = false;
@@ -1236,7 +1233,7 @@ malloc_init_hard_a0_locked() {
 		prof_boot1();
 	}
 	arena_boot();
-	if (config_tcache && tcache_boot(TSDN_NULL)) {
+	if (tcache_boot(TSDN_NULL)) {
 		return true;
 	}
 	if (malloc_mutex_init(&arenas_lock, "arenas", WITNESS_RANK_ARENAS)) {
diff --git a/src/stats.c b/src/stats.c
index 435dfb9f..4074c940 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -128,20 +128,11 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	} else {
 		char *mutex_counters = "   n_lock_ops    n_waiting"
 		    "   n_spin_acq  max_wait_ns\n";
-		if (config_tcache) {
-			malloc_cprintf(write_cb, cbopaque,
-			    "bins:           size ind    allocated      nmalloc"
-			    "      ndalloc    nrequests      curregs"
-			    "     curslabs regs pgs  util       nfills"
-			    "     nflushes     newslabs      reslabs%s",
-			    mutex ? mutex_counters : "\n");
-		} else {
-			malloc_cprintf(write_cb, cbopaque,
-			    "bins:           size ind    allocated      nmalloc"
-			    "      ndalloc    nrequests      curregs"
-			    "     curslabs regs pgs  util     newslabs"
-			    "      reslabs%s", mutex ? mutex_counters : "\n");
-		}
+		malloc_cprintf(write_cb, cbopaque,
+		    "bins:           size ind    allocated      nmalloc"
+		    "      ndalloc    nrequests      curregs     curslabs regs"
+		    " pgs  util       nfills     nflushes     newslabs"
+		    "      reslabs%s", mutex ? mutex_counters : "\n");
 	}
 	for (j = 0, in_gap = false; j < nbins; j++) {
 		uint64_t nslabs;
@@ -173,12 +164,10 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    size_t);
 		CTL_M2_M4_GET("stats.arenas.0.bins.0.nrequests", i, j,
 		    &nrequests, uint64_t);
-		if (config_tcache) {
-			CTL_M2_M4_GET("stats.arenas.0.bins.0.nfills", i, j,
-			    &nfills, uint64_t);
-			CTL_M2_M4_GET("stats.arenas.0.bins.0.nflushes", i, j,
-			    &nflushes, uint64_t);
-		}
+		CTL_M2_M4_GET("stats.arenas.0.bins.0.nfills", i, j, &nfills,
+		    uint64_t);
+		CTL_M2_M4_GET("stats.arenas.0.bins.0.nflushes", i, j, &nflushes,
+		    uint64_t);
 		CTL_M2_M4_GET("stats.arenas.0.bins.0.nreslabs", i, j, &nreslabs,
 		    uint64_t);
 		CTL_M2_M4_GET("stats.arenas.0.bins.0.curslabs", i, j, &curslabs,
@@ -190,23 +179,13 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			    "\t\t\t\t\t\t\"nmalloc\": %"FMTu64",\n"
 			    "\t\t\t\t\t\t\"ndalloc\": %"FMTu64",\n"
 			    "\t\t\t\t\t\t\"curregs\": %zu,\n"
-			    "\t\t\t\t\t\t\"nrequests\": %"FMTu64",\n",
-			    nmalloc,
-			    ndalloc,
-			    curregs,
-			    nrequests);
-			if (config_tcache) {
-				malloc_cprintf(write_cb, cbopaque,
-				    "\t\t\t\t\t\t\"nfills\": %"FMTu64",\n"
-				    "\t\t\t\t\t\t\"nflushes\": %"FMTu64",\n",
-				    nfills,
-				    nflushes);
-			}
-			malloc_cprintf(write_cb, cbopaque,
+			    "\t\t\t\t\t\t\"nrequests\": %"FMTu64",\n"
+			    "\t\t\t\t\t\t\"nfills\": %"FMTu64",\n"
+			    "\t\t\t\t\t\t\"nflushes\": %"FMTu64",\n"
 			    "\t\t\t\t\t\t\"nreslabs\": %"FMTu64",\n"
 			    "\t\t\t\t\t\t\"curslabs\": %zu%s\n",
-			    nreslabs, curslabs, mutex ? "," : "");
-
+			    nmalloc, ndalloc, curregs, nrequests, nfills,
+			    nflushes, nreslabs, curslabs, mutex ? "," : "");
 			if (mutex) {
 				uint64_t mutex_stats[num_mutex_prof_counters];
 				read_arena_bin_mutex_stats(i, j, mutex_stats);
@@ -260,27 +239,13 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 				}
 			}
 
-			if (config_tcache) {
-				malloc_cprintf(write_cb, cbopaque,
-				    "%20zu %3u %12zu %12"FMTu64
-				    " %12"FMTu64" %12"FMTu64" %12zu"
-				    " %12zu %4u %3zu %-5s %12"FMTu64
-				    " %12"FMTu64" %12"FMTu64" %12"FMTu64,
-				    reg_size, j, curregs * reg_size, nmalloc,
-				    ndalloc, nrequests, curregs, curslabs,
-				    nregs, slab_size / page, util, nfills,
-				    nflushes, nslabs, nreslabs);
-			} else {
-				malloc_cprintf(write_cb, cbopaque,
-				    "%20zu %3u %12zu %12"FMTu64
-				    " %12"FMTu64" %12"FMTu64" %12zu"
-				    " %12zu %4u %3zu %-5s %12"FMTu64
-				    " %12"FMTu64,
-				    reg_size, j, curregs * reg_size, nmalloc,
-				    ndalloc, nrequests, curregs, curslabs,
-				    nregs, slab_size / page, util, nslabs,
-				    nreslabs);
-			}
+			malloc_cprintf(write_cb, cbopaque, "%20zu %3u %12zu %12"
+			    FMTu64" %12"FMTu64" %12"FMTu64" %12zu %12zu %4u"
+			    " %3zu %-5s %12"FMTu64" %12"FMTu64" %12"FMTu64
+			    " %12"FMTu64, reg_size, j, curregs * reg_size,
+			    nmalloc, ndalloc, nrequests, curregs, curslabs,
+			    nregs, slab_size / page, util, nfills, nflushes,
+			    nslabs, nreslabs);
 			if (mutex) {
 				malloc_cprintf(write_cb, cbopaque,
 				    " %12"FMTu64" %12"FMTu64" %12"FMTu64
@@ -423,14 +388,7 @@ stats_arena_mutexes_print(void (*write_cb)(void *, const char *),
 		malloc_cprintf(write_cb, cbopaque, "\t\t\t\t\"mutexes\": {\n");
 		arena_prof_mutex_ind_t i, last_mutex;
 		last_mutex = num_arena_prof_mutexes - 1;
-		if (!config_tcache) {
-			last_mutex--;
-		}
 		for (i = 0; i < num_arena_prof_mutexes; i++) {
-			if (!config_tcache &&
-			    i == arena_prof_mutex_tcache_list) {
-				continue;
-			}
 			mutex_stats_output_json(write_cb, cbopaque,
 			    arena_mutex_names[i], mutex_stats[i],
 			    "\t\t\t\t\t", (i == last_mutex));
@@ -440,10 +398,6 @@ stats_arena_mutexes_print(void (*write_cb)(void *, const char *),
 	} else {
 		arena_prof_mutex_ind_t i;
 		for (i = 0; i < num_arena_prof_mutexes; i++) {
-			if (!config_tcache &&
-			    i == arena_prof_mutex_tcache_list) {
-				continue;
-			}
 			mutex_stats_output(write_cb, cbopaque,
 			    arena_mutex_names[i], mutex_stats[i], i == 0);
 		}
@@ -659,16 +613,13 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    "internal:                %12zu\n", internal);
 	}
 
-	if (config_tcache) {
-		CTL_M2_GET("stats.arenas.0.tcache_bytes", i, &tcache_bytes,
-		    size_t);
-		if (json) {
-			malloc_cprintf(write_cb, cbopaque,
-			    "\t\t\t\t\"tcache\": %zu,\n", tcache_bytes);
-		} else {
-			malloc_cprintf(write_cb, cbopaque,
-			    "tcache:                  %12zu\n", tcache_bytes);
-		}
+	CTL_M2_GET("stats.arenas.0.tcache_bytes", i, &tcache_bytes, size_t);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"tcache\": %zu,\n", tcache_bytes);
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "tcache:                  %12zu\n", tcache_bytes);
 	}
 
 	CTL_M2_GET("stats.arenas.0.resident", i, &resident, size_t);
@@ -761,7 +712,6 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	CONFIG_WRITE_BOOL_JSON(prof_libgcc, ",")
 	CONFIG_WRITE_BOOL_JSON(prof_libunwind, ",")
 	CONFIG_WRITE_BOOL_JSON(stats, ",")
-	CONFIG_WRITE_BOOL_JSON(tcache, ",")
 	CONFIG_WRITE_BOOL_JSON(tls, ",")
 	CONFIG_WRITE_BOOL_JSON(utrace, ",")
 	CONFIG_WRITE_BOOL_JSON(xmalloc, "")
@@ -959,11 +909,9 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		malloc_cprintf(write_cb, cbopaque,
 		    "\t\t\t\"nbins\": %u,\n", nbins);
 
-		if (config_tcache) {
-			CTL_GET("arenas.nhbins", &uv, unsigned);
-			malloc_cprintf(write_cb, cbopaque,
-			    "\t\t\t\"nhbins\": %u,\n", uv);
-		}
+		CTL_GET("arenas.nhbins", &uv, unsigned);
+		malloc_cprintf(write_cb, cbopaque, "\t\t\t\"nhbins\": %u,\n",
+		    uv);
 
 		malloc_cprintf(write_cb, cbopaque,
 		    "\t\t\t\"bin\": [\n");
diff --git a/src/tcache.c b/src/tcache.c
index 971c016b..72d1e47f 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -7,13 +7,7 @@
 /******************************************************************************/
 /* Data. */
 
-bool	opt_tcache =
-#ifdef JEMALLOC_TCACHE
-    true
-#else
-    false
-#endif
-    ;
+bool	opt_tcache = true;
 ssize_t	opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
 
 tcache_bin_info_t	*tcache_bin_info;
@@ -93,7 +87,7 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     tcache_bin_t *tbin, szind_t binind, bool *tcache_success) {
 	void *ret;
 
-	assert(tcache->arena);
+	assert(tcache->arena != NULL);
 	arena_tcache_fill_small(tsdn, arena, tcache, tbin, binind,
 	    config_prof ? tcache->prof_accumbytes : 0);
 	if (config_prof) {
@@ -304,7 +298,7 @@ tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 static void
 tcache_arena_dissociate(tsdn_t *tsdn, tcache_t *tcache) {
 	arena_t *arena = tcache->arena;
-	assert(arena);
+	assert(arena != NULL);
 	if (config_stats) {
 		/* Unlink from list of extant tcaches. */
 		malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
@@ -383,10 +377,6 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
 /* Initialize auto tcache (embedded in TSD). */
 bool
 tsd_tcache_data_init(tsd_t *tsd) {
-	if (!config_tcache) {
-		return false;
-	}
-
 	tcache_t *tcache = &tsd->tcache;
 	assert(tcache_small_bin_get(tcache, 0)->avail == NULL);
 	size_t size = stack_nelms * sizeof(void *);
@@ -458,9 +448,9 @@ tcache_create_explicit(tsd_t *tsd) {
 
 static void
 tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
-	unsigned i;
+	assert(tcache->arena != NULL);
 
-	for (i = 0; i < NBINS; i++) {
+	for (unsigned i = 0; i < NBINS; i++) {
 		tcache_bin_t *tbin = tcache_small_bin_get(tcache, i);
 		tcache_bin_flush_small(tsd, tcache, tbin, i, 0);
 
@@ -468,7 +458,7 @@ tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
 			assert(tbin->tstats.nrequests == 0);
 		}
 	}
-	for (; i < nhbins; i++) {
+	for (unsigned i = NBINS; i < nhbins; i++) {
 		tcache_bin_t *tbin = tcache_large_bin_get(tcache, i);
 		tcache_bin_flush_large(tsd, tbin, i, 0, tcache);
 
@@ -477,20 +467,17 @@ tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
 		}
 	}
 
-	arena_t *arena = tcache->arena;
-	if (config_prof && arena && tcache->prof_accumbytes > 0 &&
-	    arena_prof_accum(tsd_tsdn(tsd), arena, tcache->prof_accumbytes)) {
+	if (config_prof && tcache->prof_accumbytes > 0 &&
+	    arena_prof_accum(tsd_tsdn(tsd), tcache->arena,
+	    tcache->prof_accumbytes)) {
 		prof_idump(tsd_tsdn(tsd));
 	}
 }
 
 void
 tcache_flush(void) {
-	tsd_t *tsd;
-
-	cassert(config_tcache);
-
-	tsd = tsd_fetch();
+	tsd_t *tsd = tsd_fetch();
+	assert(tcache_available(tsd));
 	tcache_flush_cache(tsd, tsd_tcachep_get(tsd));
 }
 
@@ -514,10 +501,6 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
 /* For auto tcache (embedded in TSD) only. */
 void
 tcache_cleanup(tsd_t *tsd) {
-	if (!config_tcache) {
-		return;
-	}
-
 	tcache_t *tcache = tsd_tcachep_get(tsd);
 	if (!tcache_available(tsd)) {
 		assert(tsd_tcache_enabled_get(tsd) == false);
@@ -660,10 +643,6 @@ tcaches_destroy(tsd_t *tsd, unsigned ind) {
 
 bool
 tcache_boot(tsdn_t *tsdn) {
-	cassert(config_tcache);
-
-	unsigned i;
-
 	/* If necessary, clamp opt_lg_tcache_max. */
 	if (opt_lg_tcache_max < 0 || (ZU(1) << opt_lg_tcache_max) <
 	    SMALL_MAXCLASS) {
@@ -685,6 +664,7 @@ tcache_boot(tsdn_t *tsdn) {
 		return true;
 	}
 	stack_nelms = 0;
+	unsigned i;
 	for (i = 0; i < NBINS; i++) {
 		if ((arena_bin_info[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) {
 			tcache_bin_info[i].ncached_max =
diff --git a/test/integration/thread_tcache_enabled.c b/test/integration/thread_tcache_enabled.c
index a0ba56b4..0c343a6c 100644
--- a/test/integration/thread_tcache_enabled.c
+++ b/test/integration/thread_tcache_enabled.c
@@ -1,29 +1,11 @@
 #include "test/jemalloc_test.h"
 
-static const bool config_tcache =
-#ifdef JEMALLOC_TCACHE
-    true
-#else
-    false
-#endif
-    ;
-
 void *
 thd_start(void *arg) {
-	int err;
-	size_t sz;
 	bool e0, e1;
-
-	sz = sizeof(bool);
-	if ((err = mallctl("thread.tcache.enabled", (void *)&e0, &sz, NULL,
-	    0))) {
-		if (err == ENOENT) {
-			assert_false(config_tcache,
-			    "ENOENT should only be returned if tcache is "
-			    "disabled");
-		}
-		goto label_ENOENT;
-	}
+	size_t sz = sizeof(bool);
+	assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz, NULL,
+	    0), 0, "Unexpected mallctl failure");
 
 	if (e0) {
 		e1 = false;
@@ -78,7 +60,6 @@ thd_start(void *arg) {
 
 	free(malloc(1));
 	return NULL;
-label_ENOENT:
 	test_skip("\"thread.tcache.enabled\" mallctl not available");
 	return NULL;
 }
diff --git a/test/test.sh.in b/test/test.sh.in
index f0f0f979..4d0e0df6 100644
--- a/test/test.sh.in
+++ b/test/test.sh.in
@@ -43,7 +43,6 @@ for t in $@; do
     # per test shell script to ignore the @JEMALLOC_CPREFIX@ detail).
     $(enable_fill=@enable_fill@ \
       enable_prof=@enable_prof@ \
-      enable_tcache=@enable_tcache@ \
       . @srcroot@${t}.sh && \
       export_malloc_conf && \
       ${t}@exe@ @abs_srcroot@ @abs_objroot@)
diff --git a/test/unit/decay.c b/test/unit/decay.c
index 471a558c..26359faf 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -174,16 +174,15 @@ TEST_BEGIN(test_decay_ticks) {
 	assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL,
 	    0), 0, "Unexpected mallctl failure");
 
-	int err;
 	/* Set up a manually managed arena for test. */
 	arena_ind = do_arena_create(0, 0);
 
 	/* Migrate to the new arena, and get the ticker. */
 	unsigned old_arena_ind;
 	size_t sz_arena_ind = sizeof(old_arena_ind);
-	err = mallctl("thread.arena", (void *)&old_arena_ind, &sz_arena_ind,
-		      (void *)&arena_ind, sizeof(arena_ind));
-	assert_d_eq(err, 0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("thread.arena", (void *)&old_arena_ind,
+	    &sz_arena_ind, (void *)&arena_ind, sizeof(arena_ind)), 0,
+	    "Unexpected mallctl() failure");
 	decay_ticker = decay_ticker_get(tsd_fetch(), arena_ind);
 	assert_ptr_not_null(decay_ticker,
 	    "Unexpected failure getting decay ticker");
@@ -310,51 +309,48 @@ TEST_BEGIN(test_decay_ticks) {
 	 * Test tcache fill/flush interactions for large and small size classes,
 	 * using an explicit tcache.
 	 */
-	if (config_tcache) {
-		unsigned tcache_ind, i;
-		size_t tcache_sizes[2];
-		tcache_sizes[0] = large0;
-		tcache_sizes[1] = 1;
+	unsigned tcache_ind, i;
+	size_t tcache_sizes[2];
+	tcache_sizes[0] = large0;
+	tcache_sizes[1] = 1;
 
-		size_t tcache_max, sz_tcache_max;
-		sz_tcache_max = sizeof(tcache_max);
-		err = mallctl("arenas.tcache_max", (void *)&tcache_max,
-		    &sz_tcache_max, NULL, 0);
-		assert_d_eq(err, 0, "Unexpected mallctl() failure");
+	size_t tcache_max, sz_tcache_max;
+	sz_tcache_max = sizeof(tcache_max);
+	assert_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max,
+	    &sz_tcache_max, NULL, 0), 0, "Unexpected mallctl() failure");
 
-		sz = sizeof(unsigned);
-		assert_d_eq(mallctl("tcache.create", (void *)&tcache_ind, &sz,
-		    NULL, 0), 0, "Unexpected mallctl failure");
+	sz = sizeof(unsigned);
+	assert_d_eq(mallctl("tcache.create", (void *)&tcache_ind, &sz,
+	    NULL, 0), 0, "Unexpected mallctl failure");
 
-		for (i = 0; i < sizeof(tcache_sizes) / sizeof(size_t); i++) {
-			sz = tcache_sizes[i];
+	for (i = 0; i < sizeof(tcache_sizes) / sizeof(size_t); i++) {
+		sz = tcache_sizes[i];
 
-			/* tcache fill. */
-			tick0 = ticker_read(decay_ticker);
-			p = mallocx(sz, MALLOCX_TCACHE(tcache_ind));
-			assert_ptr_not_null(p, "Unexpected mallocx() failure");
-			tick1 = ticker_read(decay_ticker);
+		/* tcache fill. */
+		tick0 = ticker_read(decay_ticker);
+		p = mallocx(sz, MALLOCX_TCACHE(tcache_ind));
+		assert_ptr_not_null(p, "Unexpected mallocx() failure");
+		tick1 = ticker_read(decay_ticker);
+		assert_u32_ne(tick1, tick0,
+		    "Expected ticker to tick during tcache fill "
+		    "(sz=%zu)", sz);
+		/* tcache flush. */
+		dallocx(p, MALLOCX_TCACHE(tcache_ind));
+		tick0 = ticker_read(decay_ticker);
+		assert_d_eq(mallctl("tcache.flush", NULL, NULL,
+		    (void *)&tcache_ind, sizeof(unsigned)), 0,
+		    "Unexpected mallctl failure");
+		tick1 = ticker_read(decay_ticker);
+
+		/* Will only tick if it's in tcache. */
+		if (sz <= tcache_max) {
 			assert_u32_ne(tick1, tick0,
-			    "Expected ticker to tick during tcache fill "
-			    "(sz=%zu)", sz);
-			/* tcache flush. */
-			dallocx(p, MALLOCX_TCACHE(tcache_ind));
-			tick0 = ticker_read(decay_ticker);
-			assert_d_eq(mallctl("tcache.flush", NULL, NULL,
-			    (void *)&tcache_ind, sizeof(unsigned)), 0,
-			    "Unexpected mallctl failure");
-			tick1 = ticker_read(decay_ticker);
-
-			/* Will only tick if it's in tcache. */
-			if (sz <= tcache_max) {
-				assert_u32_ne(tick1, tick0,
-				    "Expected ticker to tick during tcache "
-				    "flush (sz=%zu)", sz);
-			} else {
-				assert_u32_eq(tick1, tick0,
-				    "Unexpected ticker tick during tcache "
-				    "flush (sz=%zu)", sz);
-			}
+			    "Expected ticker to tick during tcache "
+			    "flush (sz=%zu)", sz);
+		} else {
+			assert_u32_eq(tick1, tick0,
+			    "Unexpected ticker tick during tcache "
+			    "flush (sz=%zu)", sz);
 		}
 	}
 }
@@ -422,18 +418,11 @@ TEST_BEGIN(test_decay_ticker) {
 	 * the ticker triggers purging.
 	 */
 
-	if (config_tcache) {
-		size_t tcache_max;
-
-		size_t sz = sizeof(size_t);
-		assert_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max,
-		    &sz, NULL, 0), 0, "Unexpected mallctl failure");
-		large = nallocx(tcache_max + 1, flags);
-	}  else {
-		size_t sz = sizeof(size_t);
-		assert_d_eq(mallctl("arenas.lextent.0.size", &large, &sz, NULL,
-		    0), 0, "Unexpected mallctl failure");
-	}
+	size_t tcache_max;
+	size_t sz = sizeof(size_t);
+	assert_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max, &sz, NULL,
+	    0), 0, "Unexpected mallctl failure");
+	large = nallocx(tcache_max + 1, flags);
 
 	do_purge(arena_ind);
 	uint64_t dirty_npurge0 = get_arena_dirty_npurge(arena_ind);
diff --git a/test/unit/decay.sh b/test/unit/decay.sh
index 0df17884..a41489b0 100644
--- a/test/unit/decay.sh
+++ b/test/unit/decay.sh
@@ -1,6 +1,3 @@
 #!/bin/sh
 
-export MALLOC_CONF="dirty_decay_time:1,muzzy_decay_time:1"
-if [ "x${enable_tcache}" = "x1" ] ; then
-  export MALLOC_CONF="${MALLOC_CONF},lg_tcache_max:0"
-fi
+export MALLOC_CONF="dirty_decay_time:1,muzzy_decay_time:1,lg_tcache_max:0"
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index b8c6a255..945d8290 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -136,7 +136,6 @@ TEST_BEGIN(test_mallctl_config) {
 	TEST_MALLCTL_CONFIG(prof_libgcc, bool);
 	TEST_MALLCTL_CONFIG(prof_libunwind, bool);
 	TEST_MALLCTL_CONFIG(stats, bool);
-	TEST_MALLCTL_CONFIG(tcache, bool);
 	TEST_MALLCTL_CONFIG(tls, bool);
 	TEST_MALLCTL_CONFIG(utrace, bool);
 	TEST_MALLCTL_CONFIG(xmalloc, bool);
@@ -170,8 +169,8 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, zero, fill);
 	TEST_MALLCTL_OPT(bool, utrace, utrace);
 	TEST_MALLCTL_OPT(bool, xmalloc, xmalloc);
-	TEST_MALLCTL_OPT(bool, tcache, tcache);
-	TEST_MALLCTL_OPT(size_t, lg_tcache_max, tcache);
+	TEST_MALLCTL_OPT(bool, tcache, always);
+	TEST_MALLCTL_OPT(size_t, lg_tcache_max, always);
 	TEST_MALLCTL_OPT(bool, prof, prof);
 	TEST_MALLCTL_OPT(const char *, prof_prefix, prof);
 	TEST_MALLCTL_OPT(bool, prof_active, prof);
@@ -213,8 +212,6 @@ TEST_END
 TEST_BEGIN(test_tcache_none) {
 	void *p0, *q, *p1;
 
-	test_skip_if(!config_tcache);
-
 	/* Allocate p and q. */
 	p0 = mallocx(42, 0);
 	assert_ptr_not_null(p0, "Unexpected mallocx() failure");
@@ -243,8 +240,6 @@ TEST_BEGIN(test_tcache) {
 	unsigned i;
 	size_t sz, psz, qsz;
 
-	test_skip_if(!config_tcache);
-
 	psz = 42;
 	qsz = nallocx(psz, 0) + 1;
 
diff --git a/test/unit/prof_idump.sh b/test/unit/prof_idump.sh
index fdb5813f..4dc599a3 100644
--- a/test/unit/prof_idump.sh
+++ b/test/unit/prof_idump.sh
@@ -1,12 +1,8 @@
 #!/bin/sh
 
+export MALLOC_CONF="tcache:false"
 if [ "x${enable_prof}" = "x1" ] ; then
-  export MALLOC_CONF="prof:true,prof_accum:true,prof_active:false,lg_prof_sample:0,lg_prof_interval:0"
-  if [ "x${enable_tcache}" = "x1" ] ; then
-    export MALLOC_CONF="${MALLOC_CONF},tcache:false"
-  fi
-elif [ "x${enable_tcache}" = "x1" ] ; then
-  export MALLOC_CONF="tcache:false"
+  export MALLOC_CONF="${MALLOC_CONF},prof:true,prof_accum:true,prof_active:false,lg_prof_sample:0,lg_prof_interval:0"
 fi
 
 
diff --git a/test/unit/stats.c b/test/unit/stats.c
index 1619f5b6..f5ee1287 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -83,7 +83,7 @@ TEST_BEGIN(test_stats_arenas_summary) {
 	dallocx(large, 0);
 
 	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
-	    config_tcache ? 0 : ENOENT, "Unexpected mallctl() result");
+	    opt_tcache ? 0 : EFAULT, "Unexpected mallctl() result");
 	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
@@ -150,7 +150,7 @@ TEST_BEGIN(test_stats_arenas_small) {
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
 
 	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
-	    config_tcache ? 0 : ENOENT, "Unexpected mallctl() result");
+	    opt_tcache ? 0 : EFAULT, "Unexpected mallctl() result");
 
 	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
 	    0, "Unexpected mallctl() failure");
@@ -230,6 +230,10 @@ TEST_BEGIN(test_stats_arenas_bins) {
 	uint64_t nslabs, nreslabs;
 	int expected = config_stats ? 0 : ENOENT;
 
+	/* Make sure allocation below isn't satisfied by tcache. */
+	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
+	    opt_tcache ? 0 : EFAULT, "Unexpected mallctl() result");
+
 	unsigned arena_ind, old_arena_ind;
 	sz = sizeof(unsigned);
 	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
@@ -243,7 +247,7 @@ TEST_BEGIN(test_stats_arenas_bins) {
 	assert_ptr_not_null(p, "Unexpected malloc() failure");
 
 	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
-	    config_tcache ? 0 : ENOENT, "Unexpected mallctl() result");
+	    opt_tcache ? 0 : EFAULT, "Unexpected mallctl() result");
 
 	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
 	    0, "Unexpected mallctl() failure");
@@ -266,11 +270,11 @@ TEST_BEGIN(test_stats_arenas_bins) {
 
 	sz = sizeof(uint64_t);
 	gen_mallctl_str(cmd, "nfills", arena_ind);
-	assert_d_eq(mallctl(cmd, (void *)&nfills, &sz, NULL, 0),
-	    config_tcache ? expected : ENOENT, "Unexpected mallctl() result");
+	assert_d_eq(mallctl(cmd, (void *)&nfills, &sz, NULL, 0), expected,
+	    "Unexpected mallctl() result");
 	gen_mallctl_str(cmd, "nflushes", arena_ind);
-	assert_d_eq(mallctl(cmd, (void *)&nflushes, &sz, NULL, 0),
-	    config_tcache ? expected : ENOENT, "Unexpected mallctl() result");
+	assert_d_eq(mallctl(cmd, (void *)&nflushes, &sz, NULL, 0), expected,
+	    "Unexpected mallctl() result");
 
 	gen_mallctl_str(cmd, "nslabs", arena_ind);
 	assert_d_eq(mallctl(cmd, (void *)&nslabs, &sz, NULL, 0), expected,
@@ -292,7 +296,7 @@ TEST_BEGIN(test_stats_arenas_bins) {
 		    "nrequests should be greater than zero");
 		assert_zu_gt(curregs, 0,
 		    "allocated should be greater than zero");
-		if (config_tcache) {
+		if (opt_tcache) {
 			assert_u64_gt(nfills, 0,
 			    "At least one fill should have occurred");
 			assert_u64_gt(nflushes, 0,

From ae248a216098add2d91358a49758b181bcbb4d35 Mon Sep 17 00:00:00 2001
From: Jim Chen <nchen@mozilla.com>
Date: Wed, 19 Apr 2017 15:22:10 -0400
Subject: [PATCH 0818/2608] Use openat syscall if available

Some architectures like AArch64 may not have the open syscall because it
was superseded by the openat syscall, so check and use SYS_openat if
SYS_open is not available.

Additionally, Android headers for AArch64 define SYS_open to __NR_open,
even though __NR_open is undefined. Undefine SYS_open in that case so
SYS_openat is used.
---
 include/jemalloc/internal/jemalloc_internal_decls.h | 5 +++++
 src/pages.c                                         | 3 +++
 2 files changed, 8 insertions(+)

diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
index 21a4183d..d75de0b9 100644
--- a/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -14,6 +14,11 @@
 #    if !defined(SYS_write) && defined(__NR_write)
 #      define SYS_write __NR_write
 #    endif
+#    if defined(SYS_open) && defined(__aarch64__)
+       /* Android headers may define SYS_open to __NR_open even though
+        * __NR_open may not exist on AArch64 (superseded by __NR_openat). */
+#      undef SYS_open
+#    endif
 #    include <sys/uio.h>
 #  endif
 #  include <pthread.h>
diff --git a/src/pages.c b/src/pages.c
index 46c307b8..86907aa5 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -351,6 +351,9 @@ os_overcommits_proc(void) {
 
 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
 	fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY);
+#elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat)
+	fd = (int)syscall(SYS_openat,
+	    AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY);
 #else
 	fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
 #endif

From b2a8453a3fa653dc71f82ac9df64b012917f6b8c Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 21 Apr 2017 10:07:01 -0700
Subject: [PATCH 0819/2608] Remove --disable-tls.

This option is no longer useful, because TLS is correctly configured
automatically on all supported platforms.

This partially resolves #580.
---
 INSTALL             |  5 -----
 configure.ac        | 27 ++++-----------------------
 doc/jemalloc.xml.in | 10 ----------
 src/ctl.c           |  3 ---
 src/stats.c         |  1 -
 test/unit/mallctl.c |  1 -
 6 files changed, 4 insertions(+), 43 deletions(-)

diff --git a/INSTALL b/INSTALL
index f2c0fa8b..2800df46 100644
--- a/INSTALL
+++ b/INSTALL
@@ -186,11 +186,6 @@ any of the following arguments (not a definitive list) to 'configure':
     practice, this feature usually has little impact on performance unless
     thread-specific caching is disabled.
 
---disable-tls
-    Disable thread-local storage (TLS), which allows for fast access to
-    thread-local variables via the __thread keyword.  If TLS is available,
-    jemalloc uses it for several purposes.
-
 --disable-cache-oblivious
     Disable cache-oblivious large allocation alignment for large allocation
     requests with no alignment constraints.  If this feature is disabled, all
diff --git a/configure.ac b/configure.ac
index 669c1b38..4709e27c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1670,27 +1670,14 @@ if test "x$enable_lazy_lock" = "x1" ; then
 fi
 AC_SUBST([enable_lazy_lock])
 
-AC_ARG_ENABLE([tls],
-  [AS_HELP_STRING([--disable-tls], [Disable thread-local storage (__thread keyword)])],
-if test "x$enable_tls" = "xno" ; then
+dnl Automatically configure TLS.
+if test "x${force_tls}" = "x1" ; then
+  enable_tls="1"
+elif test "x${force_tls}" = "x0" ; then
   enable_tls="0"
 else
   enable_tls="1"
 fi
-,
-enable_tls=""
-)
-if test "x${enable_tls}" = "x" ; then
-  if test "x${force_tls}" = "x1" ; then
-    AC_MSG_RESULT([Forcing TLS to avoid allocator/threading bootstrap issues])
-    enable_tls="1"
-  elif test "x${force_tls}" = "x0" ; then
-    AC_MSG_RESULT([Forcing no TLS to avoid allocator/threading bootstrap issues])
-    enable_tls="0"
-  else
-    enable_tls="1"
-  fi
-fi
 if test "x${enable_tls}" = "x1" ; then
 AC_MSG_CHECKING([for TLS])
 AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
@@ -1709,12 +1696,7 @@ else
 fi
 AC_SUBST([enable_tls])
 if test "x${enable_tls}" = "x1" ; then
-  if test "x${force_tls}" = "x0" ; then
-    AC_MSG_WARN([TLS enabled despite being marked unusable on this platform])
-  fi
   AC_DEFINE_UNQUOTED([JEMALLOC_TLS], [ ])
-elif test "x${force_tls}" = "x1" ; then
-  AC_MSG_WARN([TLS disabled despite being marked critical on this platform])
 fi
 
 dnl ============================================================================
@@ -2170,7 +2152,6 @@ AC_MSG_RESULT([utrace             : ${enable_utrace}])
 AC_MSG_RESULT([xmalloc            : ${enable_xmalloc}])
 AC_MSG_RESULT([munmap             : ${enable_munmap}])
 AC_MSG_RESULT([lazy_lock          : ${enable_lazy_lock}])
-AC_MSG_RESULT([tls                : ${enable_tls}])
 AC_MSG_RESULT([cache-oblivious    : ${enable_cache_oblivious}])
 AC_MSG_RESULT([cxx                : ${enable_cxx}])
 AC_MSG_RESULT([===============================================================================])
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 2b321a7c..7dace367 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -838,16 +838,6 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         build configuration.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="config.tls">
-        <term>
-          <mallctl>config.tls</mallctl>
-          (<type>bool</type>)
-          <literal>r-</literal>
-        </term>
-        <listitem><para><option>--disable-tls</option> was not specified during
-        build configuration.</para></listitem>
-      </varlistentry>
-
       <varlistentry id="config.utrace">
         <term>
           <mallctl>config.utrace</mallctl>
diff --git a/src/ctl.c b/src/ctl.c
index a1842956..e9143dd4 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -70,7 +70,6 @@ CTL_PROTO(config_prof)
 CTL_PROTO(config_prof_libgcc)
 CTL_PROTO(config_prof_libunwind)
 CTL_PROTO(config_stats)
-CTL_PROTO(config_tls)
 CTL_PROTO(config_utrace)
 CTL_PROTO(config_xmalloc)
 CTL_PROTO(opt_abort)
@@ -254,7 +253,6 @@ static const ctl_named_node_t	config_node[] = {
 	{NAME("prof_libgcc"),	CTL(config_prof_libgcc)},
 	{NAME("prof_libunwind"), CTL(config_prof_libunwind)},
 	{NAME("stats"),		CTL(config_stats)},
-	{NAME("tls"),		CTL(config_tls)},
 	{NAME("utrace"),	CTL(config_utrace)},
 	{NAME("xmalloc"),	CTL(config_xmalloc)}
 };
@@ -1450,7 +1448,6 @@ CTL_RO_CONFIG_GEN(config_prof, bool)
 CTL_RO_CONFIG_GEN(config_prof_libgcc, bool)
 CTL_RO_CONFIG_GEN(config_prof_libunwind, bool)
 CTL_RO_CONFIG_GEN(config_stats, bool)
-CTL_RO_CONFIG_GEN(config_tls, bool)
 CTL_RO_CONFIG_GEN(config_utrace, bool)
 CTL_RO_CONFIG_GEN(config_xmalloc, bool)
 
diff --git a/src/stats.c b/src/stats.c
index 4074c940..71c9a94d 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -712,7 +712,6 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	CONFIG_WRITE_BOOL_JSON(prof_libgcc, ",")
 	CONFIG_WRITE_BOOL_JSON(prof_libunwind, ",")
 	CONFIG_WRITE_BOOL_JSON(stats, ",")
-	CONFIG_WRITE_BOOL_JSON(tls, ",")
 	CONFIG_WRITE_BOOL_JSON(utrace, ",")
 	CONFIG_WRITE_BOOL_JSON(xmalloc, "")
 
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 945d8290..8afd25ab 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -136,7 +136,6 @@ TEST_BEGIN(test_mallctl_config) {
 	TEST_MALLCTL_CONFIG(prof_libgcc, bool);
 	TEST_MALLCTL_CONFIG(prof_libunwind, bool);
 	TEST_MALLCTL_CONFIG(stats, bool);
-	TEST_MALLCTL_CONFIG(tls, bool);
 	TEST_MALLCTL_CONFIG(utrace, bool);
 	TEST_MALLCTL_CONFIG(xmalloc, bool);
 

From 3823effe126ec602c438b02eb70d4c258a2f0e3f Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 21 Apr 2017 11:00:36 -0700
Subject: [PATCH 0820/2608] Remove --enable-ivsalloc.

Continue to use ivsalloc() when --enable-debug is specified (and add
assertions to guard against 0 size), but stop providing a documented
explicit semantics-changing band-aid to dodge undefined behavior in
sallocx() and malloc_usable_size().  ivsalloc() remains compiled in,
unlike when #211 restored --enable-ivsalloc, and if
JEMALLOC_FORCE_IVSALLOC is defined during compilation, sallocx() and
malloc_usable_size() will still use ivsalloc().

This partially resolves #580.
---
 INSTALL                                       |  7 -------
 configure.ac                                  | 19 +------------------
 .../internal/jemalloc_internal_defs.h.in      |  6 ------
 .../jemalloc/internal/jemalloc_preamble.h.in  | 18 +++++++++++-------
 src/jemalloc.c                                | 15 +++++++++++----
 5 files changed, 23 insertions(+), 42 deletions(-)

diff --git a/INSTALL b/INSTALL
index 2800df46..6c53bfc0 100644
--- a/INSTALL
+++ b/INSTALL
@@ -104,7 +104,6 @@ any of the following arguments (not a definitive list) to 'configure':
 --enable-debug
     Enable assertions and validation code.  This incurs a substantial
     performance hit, but is very useful during application development.
-    Implies --enable-ivsalloc.
 
 --enable-code-coverage
     Enable code coverage support, for use during jemalloc test development.
@@ -123,12 +122,6 @@ any of the following arguments (not a definitive list) to 'configure':
     Disable statistics gathering functionality.  See the "opt.stats_print"
     option documentation for usage details.
 
---enable-ivsalloc
-    Enable validation code for malloc_usable_size() and sallocx(), which
-    verifies that pointers reside within jemalloc-owned extents before
-    dereferencing metadata.  This incurs a minor performance hit, and causes
-    the functions to return 0 for failed lookups.
-
 --enable-prof
     Enable heap profiling and leak detection functionality.  See the "opt.prof"
     option documentation for usage details.  When enabled, there are several
diff --git a/configure.ac b/configure.ac
index 4709e27c..73450b4c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -958,7 +958,7 @@ fi
 dnl Do not compile with debugging by default.
 AC_ARG_ENABLE([debug],
   [AS_HELP_STRING([--enable-debug],
-                  [Build debugging code (implies --enable-ivsalloc)])],
+                  [Build debugging code])],
 [if test "x$enable_debug" = "xno" ; then
   enable_debug="0"
 else
@@ -972,26 +972,9 @@ if test "x$enable_debug" = "x1" ; then
 fi
 if test "x$enable_debug" = "x1" ; then
   AC_DEFINE([JEMALLOC_DEBUG], [ ])
-  enable_ivsalloc="1"
 fi
 AC_SUBST([enable_debug])
 
-dnl Do not validate pointers by default.
-AC_ARG_ENABLE([ivsalloc],
-  [AS_HELP_STRING([--enable-ivsalloc],
-                  [Validate pointers passed through the public API])],
-[if test "x$enable_ivsalloc" = "xno" ; then
-  enable_ivsalloc="0"
-else
-  enable_ivsalloc="1"
-fi
-],
-[enable_ivsalloc="0"]
-)
-if test "x$enable_ivsalloc" = "x1" ; then
-  AC_DEFINE([JEMALLOC_IVSALLOC], [ ])
-fi
-
 dnl Only optimize if not debugging.
 if test "x$enable_debug" = "x0" ; then
   if test "x$GCC" = "xyes" ; then
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index d3d76944..44896ae2 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -224,12 +224,6 @@
 #undef JEMALLOC_INTERNAL_FFSL
 #undef JEMALLOC_INTERNAL_FFS
 
-/*
- * JEMALLOC_IVSALLOC enables ivsalloc(), which verifies that pointers reside
- * within jemalloc-owned extents before dereferencing them.
- */
-#undef JEMALLOC_IVSALLOC
-
 /*
  * If defined, explicitly attempt to more uniformly distribute large allocation
  * pointer alignments across all cache indices.
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index 0e2ce312..dc21cf49 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -132,13 +132,6 @@ static const bool config_xmalloc =
     false
 #endif
     ;
-static const bool config_ivsalloc =
-#ifdef JEMALLOC_IVSALLOC
-    true
-#else
-    false
-#endif
-    ;
 static const bool config_cache_oblivious =
 #ifdef JEMALLOC_CACHE_OBLIVIOUS
     true
@@ -164,5 +157,16 @@ static const bool have_percpu_arena =
     false
 #endif
     ;
+/*
+ * Undocumented, and not recommended; the application should take full
+ * responsibility for tracking provenance.
+ */
+static const bool force_ivsalloc =
+#ifdef JEMALLOC_FORCE_IVSALLOC
+    true
+#else
+    false
+#endif
+    ;
 
 #endif /* JEMALLOC_PREAMBLE_H */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index e08226c9..27a9fd7b 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2678,12 +2678,14 @@ je_sallocx(const void *ptr, int flags) {
 	tsdn_t *tsdn;
 
 	assert(malloc_initialized() || IS_INITIALIZER);
+	assert(ptr != NULL);
 
 	tsdn = tsdn_fetch();
 	witness_assert_lockless(tsdn);
 
-	if (config_ivsalloc) {
+	if (config_debug || force_ivsalloc) {
 		usize = ivsalloc(tsdn, ptr);
+		assert(force_ivsalloc || usize != 0);
 	} else {
 		usize = isalloc(tsdn, ptr);
 	}
@@ -2885,10 +2887,15 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) {
 	tsdn = tsdn_fetch();
 	witness_assert_lockless(tsdn);
 
-	if (config_ivsalloc) {
-		ret = ivsalloc(tsdn, ptr);
+	if (unlikely(ptr == NULL)) {
+		ret = 0;
 	} else {
-		ret = (ptr == NULL) ? 0 : isalloc(tsdn, ptr);
+		if (config_debug || force_ivsalloc) {
+			ret = ivsalloc(tsdn, ptr);
+			assert(force_ivsalloc || ret != 0);
+		} else {
+			ret = isalloc(tsdn, ptr);
+		}
 	}
 
 	witness_assert_lockless(tsdn);

From 425253e2cd64e23585f557bfa82789a5208d06e1 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 21 Apr 2017 13:47:49 -0700
Subject: [PATCH 0821/2608] Enable -Wundef, when supported.

This can catch bugs in which one header defines a numeric constant, and another
uses it without including the defining header. Undefined preprocessor symbols
expand to '0', so that this will compile fine, silently doing the math wrong.
---
 configure.ac                                          |  5 ++++-
 include/jemalloc/internal/jemalloc_internal_defs.h.in |  3 +++
 include/jemalloc/internal/jemalloc_preamble.h.in      |  1 +
 src/jemalloc.c                                        | 10 +---------
 src/nstime.c                                          |  6 +++---
 5 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/configure.ac b/configure.ac
index 73450b4c..0a717255 100644
--- a/configure.ac
+++ b/configure.ac
@@ -241,6 +241,7 @@ if test "x$GCC" = "xyes" ; then
   JE_CFLAGS_ADD([-Wall])
   JE_CFLAGS_ADD([-Wshorten-64-to-32])
   JE_CFLAGS_ADD([-Wsign-compare])
+  JE_CFLAGS_ADD([-Wundef])
   JE_CFLAGS_ADD([-pipe])
   JE_CFLAGS_ADD([-g3])
 elif test "x$je_cv_msvc" = "xyes" ; then
@@ -824,7 +825,9 @@ else
   JEMALLOC_PREFIX="je_"
 fi]
 )
-if test "x$JEMALLOC_PREFIX" != "x" ; then
+if test "x$JEMALLOC_PREFIX" = "x" ; then
+  AC_DEFINE([JEMALLOC_IS_MALLOC])
+else
   JEMALLOC_CPREFIX=`echo ${JEMALLOC_PREFIX} | tr "a-z" "A-Z"`
   AC_DEFINE_UNQUOTED([JEMALLOC_PREFIX], ["$JEMALLOC_PREFIX"])
   AC_DEFINE_UNQUOTED([JEMALLOC_CPREFIX], ["$JEMALLOC_CPREFIX"])
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 44896ae2..1bec2c93 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -312,4 +312,7 @@
 /* config.malloc_conf options string. */
 #undef JEMALLOC_CONFIG_MALLOC_CONF
 
+/* If defined, jemalloc takes the malloc/free/etc. symbol names. */
+#undef JEMALLOC_IS_MALLOC
+
 #endif /* JEMALLOC_INTERNAL_DEFS_H_ */
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index dc21cf49..79827fc4 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -10,6 +10,7 @@
 
 #define JEMALLOC_NO_DEMANGLE
 #ifdef JEMALLOC_JET
+#  undef JEMALLOC_IS_MALLOC
 #  define JEMALLOC_N(n) jet_##n
 #  include "jemalloc/internal/public_namespace.h"
 #  define JEMALLOC_NO_RENAME
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 27a9fd7b..de858e36 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2285,15 +2285,7 @@ je_valloc(size_t size) {
 }
 #endif
 
-/*
- * is_malloc(je_malloc) is some macro magic to detect if jemalloc_defs.h has
- * #define je_malloc malloc
- */
-#define malloc_is_malloc 1
-#define is_malloc_(a) malloc_is_ ## a
-#define is_malloc(a) is_malloc_(a)
-
-#if ((is_malloc(je_malloc) == 1) && defined(JEMALLOC_GLIBC_MALLOC_HOOK))
+#if defined(JEMALLOC_IS_MALLOC) && defined(JEMALLOC_GLIBC_MALLOC_HOOK)
 /*
  * glibc provides the RTLD_DEEPBIND flag for dlopen which can make it possible
  * to inconsistently reference libc's malloc(3)-compatible functions
diff --git a/src/nstime.c b/src/nstime.c
index 9f5d192d..20c00422 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -96,7 +96,7 @@ nstime_get(nstime_t *time) {
 
 	nstime_init(time, ticks_100ns * 100);
 }
-#elif JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE
+#elif defined(JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE)
 #  define NSTIME_MONOTONIC true
 static void
 nstime_get(nstime_t *time) {
@@ -105,7 +105,7 @@ nstime_get(nstime_t *time) {
 	clock_gettime(CLOCK_MONOTONIC_COARSE, &ts);
 	nstime_init2(time, ts.tv_sec, ts.tv_nsec);
 }
-#elif JEMALLOC_HAVE_CLOCK_MONOTONIC
+#elif defined(JEMALLOC_HAVE_CLOCK_MONOTONIC)
 #  define NSTIME_MONOTONIC true
 static void
 nstime_get(nstime_t *time) {
@@ -114,7 +114,7 @@ nstime_get(nstime_t *time) {
 	clock_gettime(CLOCK_MONOTONIC, &ts);
 	nstime_init2(time, ts.tv_sec, ts.tv_nsec);
 }
-#elif JEMALLOC_HAVE_MACH_ABSOLUTE_TIME
+#elif defined(JEMALLOC_HAVE_MACH_ABSOLUTE_TIME)
 #  define NSTIME_MONOTONIC true
 static void
 nstime_get(nstime_t *time) {

From 3aac709029f053e3329302771ad2069724f461e7 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 21 Apr 2017 14:49:17 -0700
Subject: [PATCH 0822/2608] Output MALLOC_CONF and debug cmd when test failure
 happens.

---
 test/test.sh.in | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/test/test.sh.in b/test/test.sh.in
index 4d0e0df6..39302fff 100644
--- a/test/test.sh.in
+++ b/test/test.sh.in
@@ -41,15 +41,15 @@ for t in $@; do
     # execute the test.  This allows the shell script to set MALLOC_CONF, which
     # is then used to set @JEMALLOC_CPREFIX@MALLOC_CONF (thus allowing the
     # per test shell script to ignore the @JEMALLOC_CPREFIX@ detail).
-    $(enable_fill=@enable_fill@ \
-      enable_prof=@enable_prof@ \
-      . @srcroot@${t}.sh && \
-      export_malloc_conf && \
-      ${t}@exe@ @abs_srcroot@ @abs_objroot@)
+    enable_fill=@enable_fill@ \
+    enable_prof=@enable_prof@ \
+    . @srcroot@${t}.sh && \
+    export_malloc_conf && \
+    $JEMALLOC_TEST_PREFIX ${t}@exe@ @abs_srcroot@ @abs_objroot@
   else
-    $(export MALLOC_CONF= && \
-      export_malloc_conf &&
-      ${t}@exe@ @abs_srcroot@ @abs_objroot@)
+    export MALLOC_CONF= && \
+    export_malloc_conf && \
+    $JEMALLOC_TEST_PREFIX ${t}@exe@ @abs_srcroot@ @abs_objroot@
   fi
   result_code=$?
   case ${result_code} in
@@ -63,7 +63,8 @@ for t in $@; do
       fail_count=$((fail_count+1))
       ;;
     *)
-      echo "Test harness error" 1>&2
+      echo "Test harness error: ${t} w/ MALLOC_CONF=\"${MALLOC_CONF}\"" 1>&2
+      echo "Use prefix to debug, e.g. JEMALLOC_TEST_PREFIX=\"gdb --args\" sh test/test.sh ${t}" 1>&2
       exit 1
   esac
 done

From 7d86c92c61c60b771cdf146f6187c1550a089ad1 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 24 Apr 2017 09:14:31 -0700
Subject: [PATCH 0823/2608] Add missing 'test' to LG_SIZEOF_PTR tests.

This fixes a bug/regression introduced by
a01f99307719dcc8ca27cc70f0f0011beff914fa (Only disable munmap(2) by
default on 64-bit Linux.).
---
 configure.ac | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/configure.ac b/configure.ac
index 0a717255..42cabad3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -558,7 +558,7 @@ case "${host}" in
 	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
 	AC_DEFINE([JEMALLOC_C11_ATOMICS])
 	force_tls="0"
-	if "${LG_SIZEOF_PTR}" = "3"; then
+	if test "${LG_SIZEOF_PTR}" = "3"; then
 	  default_munmap="0"
 	fi
 	;;
@@ -571,7 +571,7 @@ case "${host}" in
 	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ])
 	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
 	AC_DEFINE([JEMALLOC_USE_CXX_THROW], [ ])
-	if "${LG_SIZEOF_PTR}" = "3"; then
+	if test "${LG_SIZEOF_PTR}" = "3"; then
 	  default_munmap="0"
 	fi
 	;;
@@ -596,7 +596,7 @@ case "${host}" in
 	JE_APPEND_VS(LIBS, -lposix4 -lsocket -lnsl)
 	;;
   *-ibm-aix*)
-	if "${LG_SIZEOF_PTR}" = "3"; then
+	if test "${LG_SIZEOF_PTR}" = "3"; then
 	  dnl 64bit AIX
 	  LD_PRELOAD_VAR="LDR_PRELOAD64"
 	else

From 4d2e4bf5ebb1e37a9348fdbf51af0b63304d7c98 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 21 Apr 2017 09:37:34 -0700
Subject: [PATCH 0824/2608] Get rid of most of the various inline macros.

---
 include/jemalloc/internal/arena_inlines_a.h   |  25 +--
 include/jemalloc/internal/arena_inlines_b.h   |  32 +---
 include/jemalloc/internal/base_inlines.h      |   8 +-
 include/jemalloc/internal/bitmap_inlines.h    |  25 +--
 include/jemalloc/internal/extent_inlines.h    | 160 ++++++------------
 include/jemalloc/internal/hash_inlines.h      |  32 ++--
 .../internal/jemalloc_internal_inlines_a.h    |  60 ++-----
 .../internal/jemalloc_internal_inlines_b.h    |  14 +-
 .../internal/jemalloc_internal_inlines_c.h    |  31 ----
 .../internal/jemalloc_internal_macros.h       |  34 +---
 include/jemalloc/internal/mutex_inlines.h     |  30 +---
 include/jemalloc/internal/prng_inlines.h      |  18 --
 include/jemalloc/internal/prof_inlines_a.h    |  12 +-
 include/jemalloc/internal/prof_inlines_b.h    |  24 ---
 include/jemalloc/internal/rtree_inlines.h     |  74 ++------
 include/jemalloc/internal/tcache_inlines.h    |  23 +--
 include/jemalloc/internal/ticker_inlines.h    |  20 +--
 include/jemalloc/internal/tsd_inlines.h       |  26 +--
 include/jemalloc/internal/witness_inlines.h   |  30 +---
 src/arena.c                                   |   6 +-
 src/ckh.c                                     |  12 +-
 src/ctl.c                                     |   6 +-
 src/jemalloc.c                                |  32 ++--
 src/jemalloc_cpp.cpp                          |   1 -
 src/prof.c                                    |  12 +-
 test/include/test/SFMT-alti.h                 |  12 +-
 test/include/test/SFMT-sse2.h                 |  12 +-
 test/include/test/SFMT.h                      |  35 ++--
 test/include/test/math.h                      |  20 +--
 test/src/SFMT.c                               |  74 ++++----
 test/stress/microbench.c                      |   2 +-
 31 files changed, 233 insertions(+), 669 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_a.h b/include/jemalloc/internal/arena_inlines_a.h
index 2bd5ce75..da587706 100644
--- a/include/jemalloc/internal/arena_inlines_a.h
+++ b/include/jemalloc/internal/arena_inlines_a.h
@@ -1,38 +1,27 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_INLINES_A_H
 #define JEMALLOC_INTERNAL_ARENA_INLINES_A_H
 
-#ifndef JEMALLOC_ENABLE_INLINE
-unsigned	arena_ind_get(const arena_t *arena);
-void	arena_internal_add(arena_t *arena, size_t size);
-void	arena_internal_sub(arena_t *arena, size_t size);
-size_t	arena_internal_get(arena_t *arena);
-bool	arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes);
-void	percpu_arena_update(tsd_t *tsd, unsigned cpu);
-#endif /* JEMALLOC_ENABLE_INLINE */
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
-
-JEMALLOC_INLINE unsigned
+static inline unsigned
 arena_ind_get(const arena_t *arena) {
 	return base_ind_get(arena->base);
 }
 
-JEMALLOC_INLINE void
+static inline void
 arena_internal_add(arena_t *arena, size_t size) {
 	atomic_fetch_add_zu(&arena->stats.internal, size, ATOMIC_RELAXED);
 }
 
-JEMALLOC_INLINE void
+static inline void
 arena_internal_sub(arena_t *arena, size_t size) {
 	atomic_fetch_sub_zu(&arena->stats.internal, size, ATOMIC_RELAXED);
 }
 
-JEMALLOC_INLINE size_t
+static inline size_t
 arena_internal_get(arena_t *arena) {
 	return atomic_load_zu(&arena->stats.internal, ATOMIC_RELAXED);
 }
 
-JEMALLOC_INLINE bool
+static inline bool
 arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) {
 	cassert(config_prof);
 
@@ -43,7 +32,7 @@ arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) {
 	return prof_accum_add(tsdn, &arena->prof_accum, accumbytes);
 }
 
-JEMALLOC_INLINE void
+static inline void
 percpu_arena_update(tsd_t *tsd, unsigned cpu) {
 	assert(have_percpu_arena);
 	arena_t *oldarena = tsd_arena_get(tsd);
@@ -65,6 +54,4 @@ percpu_arena_update(tsd_t *tsd, unsigned cpu) {
 	}
 }
 
-#endif /* (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) */
-
 #endif /* JEMALLOC_INTERNAL_ARENA_INLINES_A_H */
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 4264f4b3..526103bc 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -3,30 +3,7 @@
 
 #include "jemalloc/internal/jemalloc_internal_types.h"
 
-#ifndef JEMALLOC_ENABLE_INLINE
-szind_t arena_bin_index(arena_t *arena, arena_bin_t *bin);
-prof_tctx_t *arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr,
-    alloc_ctx_t *ctx);
-void arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
-    alloc_ctx_t *ctx, prof_tctx_t *tctx);
-void arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx);
-void arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks);
-void arena_decay_tick(tsdn_t *tsdn, arena_t *arena);
-void *arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
-    bool zero, tcache_t *tcache, bool slow_path);
-arena_t *arena_aalloc(tsdn_t *tsdn, const void *ptr);
-size_t arena_salloc(tsdn_t *tsdn, const void *ptr);
-size_t arena_vsalloc(tsdn_t *tsdn, const void *ptr);
-void arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr);
-void arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
-    alloc_ctx_t *alloc_ctx, bool slow_path);
-void arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size);
-void arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
-    alloc_ctx_t *alloc_ctx, bool slow_path);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_))
-JEMALLOC_INLINE szind_t
+static inline szind_t
 arena_bin_index(arena_t *arena, arena_bin_t *bin) {
 	szind_t binind = (szind_t)(bin - arena->bins);
 	assert(binind < NBINS);
@@ -71,7 +48,7 @@ arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
 	}
 }
 
-JEMALLOC_INLINE void
+static inline void
 arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
@@ -182,7 +159,7 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
 	return index2size(szind);
 }
 
-JEMALLOC_INLINE void
+static inline void
 arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
 	assert(ptr != NULL);
 
@@ -264,7 +241,7 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 	}
 }
 
-JEMALLOC_INLINE void
+static inline void
 arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
 	assert(ptr != NULL);
 	assert(size <= LARGE_MAXCLASS);
@@ -376,5 +353,4 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 	}
 }
 
-#endif /* (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_ARENA_C_)) */
 #endif /* JEMALLOC_INTERNAL_ARENA_INLINES_B_H */
diff --git a/include/jemalloc/internal/base_inlines.h b/include/jemalloc/internal/base_inlines.h
index aa8306ac..931560bf 100644
--- a/include/jemalloc/internal/base_inlines.h
+++ b/include/jemalloc/internal/base_inlines.h
@@ -1,15 +1,9 @@
 #ifndef JEMALLOC_INTERNAL_BASE_INLINES_H
 #define JEMALLOC_INTERNAL_BASE_INLINES_H
 
-#ifndef JEMALLOC_ENABLE_INLINE
-unsigned	base_ind_get(const base_t *base);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_BASE_C_))
-JEMALLOC_INLINE unsigned
+static inline unsigned
 base_ind_get(const base_t *base) {
 	return base->ind;
 }
-#endif
 
 #endif /* JEMALLOC_INTERNAL_BASE_INLINES_H */
diff --git a/include/jemalloc/internal/bitmap_inlines.h b/include/jemalloc/internal/bitmap_inlines.h
index c2362018..84425b34 100644
--- a/include/jemalloc/internal/bitmap_inlines.h
+++ b/include/jemalloc/internal/bitmap_inlines.h
@@ -3,18 +3,7 @@
 
 #include "jemalloc/internal/bit_util.h"
 
-#ifndef JEMALLOC_ENABLE_INLINE
-bool bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo);
-bool bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
-void bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
-size_t bitmap_ffu(const bitmap_t *bitmap, const bitmap_info_t *binfo,
-    size_t min_bit);
-size_t bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo);
-void bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_BITMAP_C_))
-JEMALLOC_INLINE bool
+static inline bool
 bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) {
 #ifdef BITMAP_USE_TREE
 	size_t rgoff = binfo->levels[binfo->nlevels].group_offset - 1;
@@ -33,7 +22,7 @@ bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) {
 #endif
 }
 
-JEMALLOC_INLINE bool
+static inline bool
 bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
 	size_t goff;
 	bitmap_t g;
@@ -44,7 +33,7 @@ bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
 	return !(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)));
 }
 
-JEMALLOC_INLINE void
+static inline void
 bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
 	size_t goff;
 	bitmap_t *gp;
@@ -80,7 +69,7 @@ bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
 }
 
 /* ffu: find first unset >= bit. */
-JEMALLOC_INLINE size_t
+static inline size_t
 bitmap_ffu(const bitmap_t *bitmap, const bitmap_info_t *binfo, size_t min_bit) {
 	assert(min_bit < binfo->nbits);
 
@@ -139,7 +128,7 @@ bitmap_ffu(const bitmap_t *bitmap, const bitmap_info_t *binfo, size_t min_bit) {
 }
 
 /* sfu: set first unset. */
-JEMALLOC_INLINE size_t
+static inline size_t
 bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) {
 	size_t bit;
 	bitmap_t g;
@@ -169,7 +158,7 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) {
 	return bit;
 }
 
-JEMALLOC_INLINE void
+static inline void
 bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
 	size_t goff;
 	bitmap_t *gp;
@@ -208,6 +197,4 @@ bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
 #endif /* BITMAP_USE_TREE */
 }
 
-#endif
-
 #endif /* JEMALLOC_INTERNAL_BITMAP_INLINES_H */
diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index fbe51e47..22d45ce1 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -3,64 +3,7 @@
 
 #include "jemalloc/internal/ql.h"
 
-#ifndef JEMALLOC_ENABLE_INLINE
-arena_t *extent_arena_get(const extent_t *extent);
-szind_t extent_szind_get_maybe_invalid(const extent_t *extent);
-szind_t extent_szind_get(const extent_t *extent);
-size_t extent_usize_get(const extent_t *extent);
-size_t extent_sn_get(const extent_t *extent);
-extent_state_t extent_state_get(const extent_t *extent);
-bool extent_zeroed_get(const extent_t *extent);
-bool extent_committed_get(const extent_t *extent);
-bool extent_slab_get(const extent_t *extent);
-unsigned extent_nfree_get(const extent_t *extent);
-void *extent_base_get(const extent_t *extent);
-void *extent_addr_get(const extent_t *extent);
-size_t extent_size_get(const extent_t *extent);
-size_t extent_esn_get(const extent_t *extent);
-size_t extent_bsize_get(const extent_t *extent);
-void *extent_before_get(const extent_t *extent);
-void *extent_last_get(const extent_t *extent);
-void *extent_past_get(const extent_t *extent);
-arena_slab_data_t *extent_slab_data_get(extent_t *extent);
-const arena_slab_data_t *extent_slab_data_get_const(const extent_t *extent);
-prof_tctx_t *extent_prof_tctx_get(const extent_t *extent);
-void extent_arena_set(extent_t *extent, arena_t *arena);
-void extent_addr_set(extent_t *extent, void *addr);
-void extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment);
-void extent_size_set(extent_t *extent, size_t size);
-void extent_esn_set(extent_t *extent, size_t esn);
-void extent_bsize_set(extent_t *extent, size_t bsize);
-void extent_szind_set(extent_t *extent, szind_t szind);
-void extent_nfree_set(extent_t *extent, unsigned nfree);
-void extent_nfree_inc(extent_t *extent);
-void extent_nfree_dec(extent_t *extent);
-void extent_sn_set(extent_t *extent, size_t sn);
-void extent_state_set(extent_t *extent, extent_state_t state);
-void extent_zeroed_set(extent_t *extent, bool zeroed);
-void extent_committed_set(extent_t *extent, bool committed);
-void extent_slab_set(extent_t *extent, bool slab);
-void extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx);
-void extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
-    bool slab, szind_t szind, size_t sn, extent_state_t state, bool zeroed,
-    bool committed);
-void extent_binit(extent_t *extent, void *addr, size_t size, size_t sn);
-void extent_list_init(extent_list_t *list);
-extent_t *extent_list_first(const extent_list_t *list);
-extent_t *extent_list_last(const extent_list_t *list);
-void extent_list_append(extent_list_t *list, extent_t *extent);
-void extent_list_replace(extent_list_t *list, extent_t *to_remove,
-    extent_t *to_insert);
-void extent_list_remove(extent_list_t *list, extent_t *extent);
-int extent_sn_comp(const extent_t *a, const extent_t *b);
-int extent_esn_comp(const extent_t *a, const extent_t *b);
-int extent_ad_comp(const extent_t *a, const extent_t *b);
-int extent_snad_comp(const extent_t *a, const extent_t *b);
-int extent_esnead_comp(const extent_t *a, const extent_t *b);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_EXTENT_C_))
-JEMALLOC_INLINE arena_t *
+static inline arena_t *
 extent_arena_get(const extent_t *extent) {
 	unsigned arena_ind = (unsigned)((extent->e_bits &
 	    EXTENT_BITS_ARENA_MASK) >> EXTENT_BITS_ARENA_SHIFT);
@@ -75,7 +18,7 @@ extent_arena_get(const extent_t *extent) {
 	return (arena_t *)atomic_load_p(&arenas[arena_ind], ATOMIC_ACQUIRE);
 }
 
-JEMALLOC_INLINE szind_t
+static inline szind_t
 extent_szind_get_maybe_invalid(const extent_t *extent) {
 	szind_t szind = (szind_t)((extent->e_bits & EXTENT_BITS_SZIND_MASK) >>
 	    EXTENT_BITS_SZIND_SHIFT);
@@ -83,120 +26,120 @@ extent_szind_get_maybe_invalid(const extent_t *extent) {
 	return szind;
 }
 
-JEMALLOC_INLINE szind_t
+static inline szind_t
 extent_szind_get(const extent_t *extent) {
 	szind_t szind = extent_szind_get_maybe_invalid(extent);
 	assert(szind < NSIZES); /* Never call when "invalid". */
 	return szind;
 }
 
-JEMALLOC_INLINE size_t
+static inline size_t
 extent_usize_get(const extent_t *extent) {
 	return index2size(extent_szind_get(extent));
 }
 
-JEMALLOC_INLINE size_t
+static inline size_t
 extent_sn_get(const extent_t *extent) {
 	return (size_t)((extent->e_bits & EXTENT_BITS_SN_MASK) >>
 	    EXTENT_BITS_SN_SHIFT);
 }
 
-JEMALLOC_INLINE extent_state_t
+static inline extent_state_t
 extent_state_get(const extent_t *extent) {
 	return (extent_state_t)((extent->e_bits & EXTENT_BITS_STATE_MASK) >>
 	    EXTENT_BITS_STATE_SHIFT);
 }
 
-JEMALLOC_INLINE bool
+static inline bool
 extent_zeroed_get(const extent_t *extent) {
 	return (bool)((extent->e_bits & EXTENT_BITS_ZEROED_MASK) >>
 	    EXTENT_BITS_ZEROED_SHIFT);
 }
 
-JEMALLOC_INLINE bool
+static inline bool
 extent_committed_get(const extent_t *extent) {
 	return (bool)((extent->e_bits & EXTENT_BITS_COMMITTED_MASK) >>
 	    EXTENT_BITS_COMMITTED_SHIFT);
 }
 
-JEMALLOC_INLINE bool
+static inline bool
 extent_slab_get(const extent_t *extent) {
 	return (bool)((extent->e_bits & EXTENT_BITS_SLAB_MASK) >>
 	    EXTENT_BITS_SLAB_SHIFT);
 }
 
-JEMALLOC_INLINE unsigned
+static inline unsigned
 extent_nfree_get(const extent_t *extent) {
 	assert(extent_slab_get(extent));
 	return (unsigned)((extent->e_bits & EXTENT_BITS_NFREE_MASK) >>
 	    EXTENT_BITS_NFREE_SHIFT);
 }
 
-JEMALLOC_INLINE void *
+static inline void *
 extent_base_get(const extent_t *extent) {
 	assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) ||
 	    !extent_slab_get(extent));
 	return PAGE_ADDR2BASE(extent->e_addr);
 }
 
-JEMALLOC_INLINE void *
+static inline void *
 extent_addr_get(const extent_t *extent) {
 	assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) ||
 	    !extent_slab_get(extent));
 	return extent->e_addr;
 }
 
-JEMALLOC_INLINE size_t
+static inline size_t
 extent_size_get(const extent_t *extent) {
 	return (extent->e_size_esn & EXTENT_SIZE_MASK);
 }
 
-JEMALLOC_INLINE size_t
+static inline size_t
 extent_esn_get(const extent_t *extent) {
 	return (extent->e_size_esn & EXTENT_ESN_MASK);
 }
 
-JEMALLOC_INLINE size_t
+static inline size_t
 extent_bsize_get(const extent_t *extent) {
 	return extent->e_bsize;
 }
 
-JEMALLOC_INLINE void *
+static inline void *
 extent_before_get(const extent_t *extent) {
 	return (void *)((uintptr_t)extent_base_get(extent) - PAGE);
 }
 
-JEMALLOC_INLINE void *
+static inline void *
 extent_last_get(const extent_t *extent) {
 	return (void *)((uintptr_t)extent_base_get(extent) +
 	    extent_size_get(extent) - PAGE);
 }
 
-JEMALLOC_INLINE void *
+static inline void *
 extent_past_get(const extent_t *extent) {
 	return (void *)((uintptr_t)extent_base_get(extent) +
 	    extent_size_get(extent));
 }
 
-JEMALLOC_INLINE arena_slab_data_t *
+static inline arena_slab_data_t *
 extent_slab_data_get(extent_t *extent) {
 	assert(extent_slab_get(extent));
 	return &extent->e_slab_data;
 }
 
-JEMALLOC_INLINE const arena_slab_data_t *
+static inline const arena_slab_data_t *
 extent_slab_data_get_const(const extent_t *extent) {
 	assert(extent_slab_get(extent));
 	return &extent->e_slab_data;
 }
 
-JEMALLOC_INLINE prof_tctx_t *
+static inline prof_tctx_t *
 extent_prof_tctx_get(const extent_t *extent) {
 	return (prof_tctx_t *)atomic_load_p(&extent->e_prof_tctx,
 	    ATOMIC_ACQUIRE);
 }
 
-JEMALLOC_INLINE void
+static inline void
 extent_arena_set(extent_t *extent, arena_t *arena) {
 	unsigned arena_ind = (arena != NULL) ? arena_ind_get(arena) : ((1U <<
 	    MALLOCX_ARENA_BITS) - 1);
@@ -204,12 +147,12 @@ extent_arena_set(extent_t *extent, arena_t *arena) {
 	    ((uint64_t)arena_ind << EXTENT_BITS_ARENA_SHIFT);
 }
 
-JEMALLOC_INLINE void
+static inline void
 extent_addr_set(extent_t *extent, void *addr) {
 	extent->e_addr = addr;
 }
 
-JEMALLOC_INLINE void
+static inline void
 extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment) {
 	assert(extent_base_get(extent) == extent_addr_get(extent));
 
@@ -228,85 +171,85 @@ extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment) {
 	}
 }
 
-JEMALLOC_INLINE void
+static inline void
 extent_size_set(extent_t *extent, size_t size) {
 	assert((size & ~EXTENT_SIZE_MASK) == 0);
 	extent->e_size_esn = size | (extent->e_size_esn & ~EXTENT_SIZE_MASK);
 }
 
-JEMALLOC_INLINE void
+static inline void
 extent_esn_set(extent_t *extent, size_t esn) {
 	extent->e_size_esn = (extent->e_size_esn & ~EXTENT_ESN_MASK) | (esn &
 	    EXTENT_ESN_MASK);
 }
 
-JEMALLOC_INLINE void
+static inline void
 extent_bsize_set(extent_t *extent, size_t bsize) {
 	extent->e_bsize = bsize;
 }
 
-JEMALLOC_INLINE void
+static inline void
 extent_szind_set(extent_t *extent, szind_t szind) {
 	assert(szind <= NSIZES); /* NSIZES means "invalid". */
 	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SZIND_MASK) |
 	    ((uint64_t)szind << EXTENT_BITS_SZIND_SHIFT);
 }
 
-JEMALLOC_INLINE void
+static inline void
 extent_nfree_set(extent_t *extent, unsigned nfree) {
 	assert(extent_slab_get(extent));
 	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_NFREE_MASK) |
 	    ((uint64_t)nfree << EXTENT_BITS_NFREE_SHIFT);
 }
 
-JEMALLOC_INLINE void
+static inline void
 extent_nfree_inc(extent_t *extent) {
 	assert(extent_slab_get(extent));
 	extent->e_bits += ((uint64_t)1U << EXTENT_BITS_NFREE_SHIFT);
 }
 
-JEMALLOC_INLINE void
+static inline void
 extent_nfree_dec(extent_t *extent) {
 	assert(extent_slab_get(extent));
 	extent->e_bits -= ((uint64_t)1U << EXTENT_BITS_NFREE_SHIFT);
 }
 
-JEMALLOC_INLINE void
+static inline void
 extent_sn_set(extent_t *extent, size_t sn) {
 	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SN_MASK) |
 	    ((uint64_t)sn << EXTENT_BITS_SN_SHIFT);
 }
 
-JEMALLOC_INLINE void
+static inline void
 extent_state_set(extent_t *extent, extent_state_t state) {
 	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_STATE_MASK) |
 	    ((uint64_t)state << EXTENT_BITS_STATE_SHIFT);
 }
 
-JEMALLOC_INLINE void
+static inline void
 extent_zeroed_set(extent_t *extent, bool zeroed) {
 	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_ZEROED_MASK) |
 	    ((uint64_t)zeroed << EXTENT_BITS_ZEROED_SHIFT);
 }
 
-JEMALLOC_INLINE void
+static inline void
 extent_committed_set(extent_t *extent, bool committed) {
 	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_COMMITTED_MASK) |
 	    ((uint64_t)committed << EXTENT_BITS_COMMITTED_SHIFT);
 }
 
-JEMALLOC_INLINE void
+static inline void
 extent_slab_set(extent_t *extent, bool slab) {
 	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SLAB_MASK) |
 	    ((uint64_t)slab << EXTENT_BITS_SLAB_SHIFT);
 }
 
-JEMALLOC_INLINE void
+static inline void
 extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx) {
 	atomic_store_p(&extent->e_prof_tctx, tctx, ATOMIC_RELEASE);
 }
 
-JEMALLOC_INLINE void
+static inline void
 extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
     bool slab, szind_t szind, size_t sn, extent_state_t state, bool zeroed,
     bool committed) {
@@ -327,7 +270,7 @@ extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
 	}
 }
 
-JEMALLOC_INLINE void
+static inline void
 extent_binit(extent_t *extent, void *addr, size_t bsize, size_t sn) {
 	extent_arena_set(extent, NULL);
 	extent_addr_set(extent, addr);
@@ -340,39 +283,39 @@ extent_binit(extent_t *extent, void *addr, size_t bsize, size_t sn) {
 	extent_committed_set(extent, true);
 }
 
-JEMALLOC_INLINE void
+static inline void
 extent_list_init(extent_list_t *list) {
 	ql_new(list);
 }
 
-JEMALLOC_INLINE extent_t *
+static inline extent_t *
 extent_list_first(const extent_list_t *list) {
 	return ql_first(list);
 }
 
-JEMALLOC_INLINE extent_t *
+static inline extent_t *
 extent_list_last(const extent_list_t *list) {
 	return ql_last(list, ql_link);
 }
 
-JEMALLOC_INLINE void
+static inline void
 extent_list_append(extent_list_t *list, extent_t *extent) {
 	ql_tail_insert(list, extent, ql_link);
 }
 
-JEMALLOC_INLINE void
+static inline void
 extent_list_replace(extent_list_t *list, extent_t *to_remove,
     extent_t *to_insert) {
 	ql_after_insert(to_remove, to_insert, ql_link);
 	ql_remove(list, to_remove, ql_link);
 }
 
-JEMALLOC_INLINE void
+static inline void
 extent_list_remove(extent_list_t *list, extent_t *extent) {
 	ql_remove(list, extent, ql_link);
 }
 
-JEMALLOC_INLINE int
+static inline int
 extent_sn_comp(const extent_t *a, const extent_t *b) {
 	size_t a_sn = extent_sn_get(a);
 	size_t b_sn = extent_sn_get(b);
@@ -380,7 +323,7 @@ extent_sn_comp(const extent_t *a, const extent_t *b) {
 	return (a_sn > b_sn) - (a_sn < b_sn);
 }
 
-JEMALLOC_INLINE int
+static inline int
 extent_esn_comp(const extent_t *a, const extent_t *b) {
 	size_t a_esn = extent_esn_get(a);
 	size_t b_esn = extent_esn_get(b);
@@ -388,7 +331,7 @@ extent_esn_comp(const extent_t *a, const extent_t *b) {
 	return (a_esn > b_esn) - (a_esn < b_esn);
 }
 
-JEMALLOC_INLINE int
+static inline int
 extent_ad_comp(const extent_t *a, const extent_t *b) {
 	uintptr_t a_addr = (uintptr_t)extent_addr_get(a);
 	uintptr_t b_addr = (uintptr_t)extent_addr_get(b);
@@ -396,7 +339,7 @@ extent_ad_comp(const extent_t *a, const extent_t *b) {
 	return (a_addr > b_addr) - (a_addr < b_addr);
 }
 
-JEMALLOC_INLINE int
+static inline int
 extent_ead_comp(const extent_t *a, const extent_t *b) {
 	uintptr_t a_eaddr = (uintptr_t)a;
 	uintptr_t b_eaddr = (uintptr_t)b;
@@ -404,7 +347,7 @@ extent_ead_comp(const extent_t *a, const extent_t *b) {
 	return (a_eaddr > b_eaddr) - (a_eaddr < b_eaddr);
 }
 
-JEMALLOC_INLINE int
+static inline int
 extent_snad_comp(const extent_t *a, const extent_t *b) {
 	int ret;
 
@@ -417,7 +360,7 @@ extent_snad_comp(const extent_t *a, const extent_t *b) {
 	return ret;
 }
 
-JEMALLOC_INLINE int
+static inline int
 extent_esnead_comp(const extent_t *a, const extent_t *b) {
 	int ret;
 
@@ -429,6 +372,5 @@ extent_esnead_comp(const extent_t *a, const extent_t *b) {
 	ret = extent_ead_comp(a, b);
 	return ret;
 }
-#endif
 
 #endif /* JEMALLOC_INTERNAL_EXTENT_INLINES_H */
diff --git a/include/jemalloc/internal/hash_inlines.h b/include/jemalloc/internal/hash_inlines.h
index 321c17cc..2cd7e3ee 100644
--- a/include/jemalloc/internal/hash_inlines.h
+++ b/include/jemalloc/internal/hash_inlines.h
@@ -9,30 +9,19 @@
  * details.
  */
 
-#ifndef JEMALLOC_ENABLE_INLINE
-uint32_t	hash_x86_32(const void *key, int len, uint32_t seed);
-void	hash_x86_128(const void *key, const int len, uint32_t seed,
-    uint64_t r_out[2]);
-void	hash_x64_128(const void *key, const int len, const uint32_t seed,
-    uint64_t r_out[2]);
-void	hash(const void *key, size_t len, const uint32_t seed,
-    size_t r_hash[2]);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_HASH_C_))
 /******************************************************************************/
 /* Internal implementation. */
-JEMALLOC_INLINE uint32_t
+static inline uint32_t
 hash_rotl_32(uint32_t x, int8_t r) {
 	return ((x << r) | (x >> (32 - r)));
 }
 
-JEMALLOC_INLINE uint64_t
+static inline uint64_t
 hash_rotl_64(uint64_t x, int8_t r) {
 	return ((x << r) | (x >> (64 - r)));
 }
 
-JEMALLOC_INLINE uint32_t
+static inline uint32_t
 hash_get_block_32(const uint32_t *p, int i) {
 	/* Handle unaligned read. */
 	if (unlikely((uintptr_t)p & (sizeof(uint32_t)-1)) != 0) {
@@ -45,7 +34,7 @@ hash_get_block_32(const uint32_t *p, int i) {
 	return p[i];
 }
 
-JEMALLOC_INLINE uint64_t
+static inline uint64_t
 hash_get_block_64(const uint64_t *p, int i) {
 	/* Handle unaligned read. */
 	if (unlikely((uintptr_t)p & (sizeof(uint64_t)-1)) != 0) {
@@ -58,7 +47,7 @@ hash_get_block_64(const uint64_t *p, int i) {
 	return p[i];
 }
 
-JEMALLOC_INLINE uint32_t
+static inline uint32_t
 hash_fmix_32(uint32_t h) {
 	h ^= h >> 16;
 	h *= 0x85ebca6b;
@@ -69,7 +58,7 @@ hash_fmix_32(uint32_t h) {
 	return h;
 }
 
-JEMALLOC_INLINE uint64_t
+static inline uint64_t
 hash_fmix_64(uint64_t k) {
 	k ^= k >> 33;
 	k *= KQU(0xff51afd7ed558ccd);
@@ -80,7 +69,7 @@ hash_fmix_64(uint64_t k) {
 	return k;
 }
 
-JEMALLOC_INLINE uint32_t
+static inline uint32_t
 hash_x86_32(const void *key, int len, uint32_t seed) {
 	const uint8_t *data = (const uint8_t *) key;
 	const int nblocks = len / 4;
@@ -130,7 +119,7 @@ hash_x86_32(const void *key, int len, uint32_t seed) {
 	return h1;
 }
 
-UNUSED JEMALLOC_INLINE void
+UNUSED static inline void
 hash_x86_128(const void *key, const int len, uint32_t seed,
     uint64_t r_out[2]) {
 	const uint8_t * data = (const uint8_t *) key;
@@ -231,7 +220,7 @@ hash_x86_128(const void *key, const int len, uint32_t seed,
 	r_out[1] = (((uint64_t) h4) << 32) | h3;
 }
 
-UNUSED JEMALLOC_INLINE void
+UNUSED static inline void
 hash_x64_128(const void *key, const int len, const uint32_t seed,
     uint64_t r_out[2]) {
 	const uint8_t *data = (const uint8_t *) key;
@@ -310,7 +299,7 @@ hash_x64_128(const void *key, const int len, const uint32_t seed,
 
 /******************************************************************************/
 /* API. */
-JEMALLOC_INLINE void
+static inline void
 hash(const void *key, size_t len, const uint32_t seed, size_t r_hash[2]) {
 	assert(len <= INT_MAX); /* Unfortunate implementation limitation. */
 
@@ -325,6 +314,5 @@ hash(const void *key, size_t len, const uint32_t seed, size_t r_hash[2]) {
 	}
 #endif
 }
-#endif
 
 #endif /* JEMALLOC_INTERNAL_HASH_INLINES_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index c28bd7cf..9cb933c2 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -5,42 +5,6 @@
 #include "jemalloc/internal/bit_util.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 
-#ifndef JEMALLOC_ENABLE_INLINE
-pszind_t psz2ind(size_t psz);
-size_t pind2sz_compute(pszind_t pind);
-size_t pind2sz_lookup(pszind_t pind);
-size_t pind2sz(pszind_t pind);
-size_t psz2u(size_t psz);
-szind_t size2index_compute(size_t size);
-szind_t size2index_lookup(size_t size);
-szind_t size2index(size_t size);
-size_t index2size_compute(szind_t index);
-size_t index2size_lookup(szind_t index);
-size_t index2size(szind_t index);
-size_t s2u_compute(size_t size);
-size_t s2u_lookup(size_t size);
-size_t s2u(size_t size);
-size_t sa2u(size_t size, size_t alignment);
-arena_t *arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal);
-arena_t *arena_choose(tsd_t *tsd, arena_t *arena);
-arena_t *arena_ichoose(tsd_t *tsd, arena_t *arena);
-bool arena_is_auto(arena_t *arena);
-arena_tdata_t *arena_tdata_get(tsd_t *tsd, unsigned ind,
-    bool refresh_if_missing);
-arena_t *arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing);
-ticker_t *decay_ticker_get(tsd_t *tsd, unsigned ind);
-bool tcache_available(tsd_t *tsd);
-tcache_bin_t *tcache_small_bin_get(tcache_t *tcache, szind_t binind);
-tcache_bin_t *tcache_large_bin_get(tcache_t *tcache, szind_t binind);
-tcache_t *tcache_get(tsd_t *tsd);
-malloc_cpuid_t malloc_getcpu(void);
-unsigned percpu_arena_choose(void);
-unsigned percpu_arena_ind_limit(void);
-void pre_reentrancy(tsd_t *tsd);
-void post_reentrancy(tsd_t *tsd);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
 JEMALLOC_ALWAYS_INLINE pszind_t
 psz2ind(size_t psz) {
 	if (unlikely(psz > LARGE_MAXCLASS)) {
@@ -64,7 +28,7 @@ psz2ind(size_t psz) {
 	}
 }
 
-JEMALLOC_INLINE size_t
+static inline size_t
 pind2sz_compute(pszind_t pind) {
 	if (unlikely(pind == NPSIZES)) {
 		return LARGE_MAXCLASS + PAGE;
@@ -86,20 +50,20 @@ pind2sz_compute(pszind_t pind) {
 	}
 }
 
-JEMALLOC_INLINE size_t
+static inline size_t
 pind2sz_lookup(pszind_t pind) {
 	size_t ret = (size_t)pind2sz_tab[pind];
 	assert(ret == pind2sz_compute(pind));
 	return ret;
 }
 
-JEMALLOC_INLINE size_t
+static inline size_t
 pind2sz(pszind_t pind) {
 	assert(pind < NPSIZES+1);
 	return pind2sz_lookup(pind);
 }
 
-JEMALLOC_INLINE size_t
+static inline size_t
 psz2u(size_t psz) {
 	if (unlikely(psz > LARGE_MAXCLASS)) {
 		return LARGE_MAXCLASS + PAGE;
@@ -115,7 +79,7 @@ psz2u(size_t psz) {
 	}
 }
 
-JEMALLOC_INLINE szind_t
+static inline szind_t
 size2index_compute(size_t size) {
 	if (unlikely(size > LARGE_MAXCLASS)) {
 		return NSIZES;
@@ -164,7 +128,7 @@ size2index(size_t size) {
 	return size2index_compute(size);
 }
 
-JEMALLOC_INLINE size_t
+static inline size_t
 index2size_compute(szind_t index) {
 #if (NTBINS > 0)
 	if (index < NTBINS) {
@@ -355,7 +319,7 @@ percpu_arena_ind_limit(void) {
 	}
 }
 
-JEMALLOC_INLINE arena_tdata_t *
+static inline arena_tdata_t *
 arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing) {
 	arena_tdata_t *tdata;
 	arena_tdata_t *arenas_tdata = tsd_arenas_tdata_get(tsd);
@@ -380,7 +344,7 @@ arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing) {
 	return arena_tdata_get_hard(tsd, ind);
 }
 
-JEMALLOC_INLINE arena_t *
+static inline arena_t *
 arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing) {
 	arena_t *ret;
 
@@ -396,7 +360,7 @@ arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing) {
 	return ret;
 }
 
-JEMALLOC_INLINE ticker_t *
+static inline ticker_t *
 decay_ticker_get(tsd_t *tsd, unsigned ind) {
 	arena_tdata_t *tdata;
 
@@ -446,7 +410,7 @@ tcache_get(tsd_t *tsd) {
 	return tsd_tcachep_get(tsd);
 }
 
-JEMALLOC_INLINE void
+static inline void
 pre_reentrancy(tsd_t *tsd) {
 	bool fast = tsd_fast(tsd);
 	++*tsd_reentrancy_levelp_get(tsd);
@@ -457,7 +421,7 @@ pre_reentrancy(tsd_t *tsd) {
 	}
 }
 
-JEMALLOC_INLINE void
+static inline void
 post_reentrancy(tsd_t *tsd) {
 	int8_t *reentrancy_level = tsd_reentrancy_levelp_get(tsd);
 	assert(*reentrancy_level > 0);
@@ -466,6 +430,4 @@ post_reentrancy(tsd_t *tsd) {
 	}
 }
 
-#endif
-
 #endif /* JEMALLOC_INTERNAL_INLINES_A_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index 2fd371c3..cfc52094 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -1,13 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_INLINES_B_H
 #define JEMALLOC_INTERNAL_INLINES_B_H
 
-#ifndef JEMALLOC_ENABLE_INLINE
-extent_t	*iealloc(tsdn_t *tsdn, const void *ptr);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
 /* Choose an arena based on a per-thread value. */
-JEMALLOC_INLINE arena_t *
+static inline arena_t *
 arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {
 	arena_t *ret;
 
@@ -60,17 +55,17 @@ arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {
 	return ret;
 }
 
-JEMALLOC_INLINE arena_t *
+static inline arena_t *
 arena_choose(tsd_t *tsd, arena_t *arena) {
 	return arena_choose_impl(tsd, arena, false);
 }
 
-JEMALLOC_INLINE arena_t *
+static inline arena_t *
 arena_ichoose(tsd_t *tsd, arena_t *arena) {
 	return arena_choose_impl(tsd, arena, true);
 }
 
-JEMALLOC_INLINE bool
+static inline bool
 arena_is_auto(arena_t *arena) {
 	assert(narenas_auto > 0);
 	return (arena_ind_get(arena) < narenas_auto);
@@ -84,6 +79,5 @@ iealloc(tsdn_t *tsdn, const void *ptr) {
 	return rtree_extent_read(tsdn, &extents_rtree, rtree_ctx,
 	    (uintptr_t)ptr, true);
 }
-#endif
 
 #endif /* JEMALLOC_INTERNAL_INLINES_B_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 70ac6669..415c503b 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -3,36 +3,6 @@
 
 #include "jemalloc/internal/jemalloc_internal_types.h"
 
-#ifndef JEMALLOC_ENABLE_INLINE
-arena_t *iaalloc(tsdn_t *tsdn, const void *ptr);
-size_t isalloc(tsdn_t *tsdn, const void *ptr);
-void *iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero,
-    tcache_t *tcache, bool is_internal, arena_t *arena, bool slow_path);
-void *ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero,
-    bool slow_path);
-void *ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
-    tcache_t *tcache, bool is_internal, arena_t *arena);
-void *ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
-    tcache_t *tcache, arena_t *arena);
-void *ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero);
-size_t ivsalloc(tsdn_t *tsdn, const void *ptr);
-void idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
-    alloc_ctx_t *alloc_ctx, bool is_internal, bool slow_path);
-void idalloc(tsd_t *tsd, void *ptr);
-void isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
-    alloc_ctx_t *alloc_ctx, bool slow_path);
-void *iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
-    size_t extra, size_t alignment, bool zero, tcache_t *tcache,
-    arena_t *arena);
-void *iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
-    size_t alignment, bool zero, tcache_t *tcache, arena_t *arena);
-void *iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size,
-    size_t alignment, bool zero);
-bool ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra,
-    size_t alignment, bool zero);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_C_))
 JEMALLOC_ALWAYS_INLINE arena_t *
 iaalloc(tsdn_t *tsdn, const void *ptr) {
 	assert(ptr != NULL);
@@ -214,6 +184,5 @@ ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra,
 
 	return arena_ralloc_no_move(tsdn, ptr, oldsize, size, extra, zero);
 }
-#endif
 
 #endif /* JEMALLOC_INTERNAL_INLINES_C_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h
index c5dd9b39..1b2802a8 100644
--- a/include/jemalloc/internal/jemalloc_internal_macros.h
+++ b/include/jemalloc/internal/jemalloc_internal_macros.h
@@ -1,37 +1,13 @@
 #ifndef JEMALLOC_INTERNAL_MACROS_H
 #define JEMALLOC_INTERNAL_MACROS_H
 
-/*
- * JEMALLOC_ALWAYS_INLINE and JEMALLOC_INLINE are used within header files for
- * functions that are static inline functions if inlining is enabled, and
- * single-definition library-private functions if inlining is disabled.
- *
- * JEMALLOC_ALWAYS_INLINE_C and JEMALLOC_INLINE_C are for use in .c files, in
- * which case the denoted functions are always static, regardless of whether
- * inlining is enabled.
- */
 #if defined(JEMALLOC_DEBUG) || defined(JEMALLOC_CODE_COVERAGE)
-   /* Disable inlining to make debugging/profiling easier. */
-#  define JEMALLOC_ALWAYS_INLINE
-#  define JEMALLOC_ALWAYS_INLINE_C static
-#  define JEMALLOC_INLINE
-#  define JEMALLOC_INLINE_C static
+#  define JEMALLOC_ALWAYS_INLINE static inline
 #else
-#  define JEMALLOC_ENABLE_INLINE
-#  ifdef JEMALLOC_HAVE_ATTR
-#    define JEMALLOC_ALWAYS_INLINE \
-	 static inline JEMALLOC_ATTR(unused) JEMALLOC_ATTR(always_inline)
-#    define JEMALLOC_ALWAYS_INLINE_C \
-	 static inline JEMALLOC_ATTR(always_inline)
-#  else
-#    define JEMALLOC_ALWAYS_INLINE static inline
-#    define JEMALLOC_ALWAYS_INLINE_C static inline
-#  endif
-#  define JEMALLOC_INLINE static inline
-#  define JEMALLOC_INLINE_C static inline
-#  ifdef _MSC_VER
-#    define inline _inline
-#  endif
+#  define JEMALLOC_ALWAYS_INLINE JEMALLOC_ATTR(always_inline) static inline
+#endif
+#ifdef _MSC_VER
+#  define inline _inline
 #endif
 
 #ifdef JEMALLOC_CC_SILENCE
diff --git a/include/jemalloc/internal/mutex_inlines.h b/include/jemalloc/internal/mutex_inlines.h
index 5ec439f7..2856d844 100644
--- a/include/jemalloc/internal/mutex_inlines.h
+++ b/include/jemalloc/internal/mutex_inlines.h
@@ -5,31 +5,19 @@
 
 void	malloc_mutex_lock_slow(malloc_mutex_t *mutex);
 
-#ifndef JEMALLOC_ENABLE_INLINE
-void	malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex);
-bool	malloc_mutex_trylock(malloc_mutex_t *mutex);
-void	malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex);
-void	malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex);
-void	malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex);
-void	malloc_mutex_prof_read(tsdn_t *tsdn, mutex_prof_data_t *data,
-    malloc_mutex_t *mutex);
-void	malloc_mutex_prof_merge(mutex_prof_data_t *sum, mutex_prof_data_t *data);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_))
-JEMALLOC_INLINE void
+static inline void
 malloc_mutex_lock_final(malloc_mutex_t *mutex) {
 	MALLOC_MUTEX_LOCK(mutex);
 }
 
 /* Trylock: return false if the lock is successfully acquired. */
-JEMALLOC_INLINE bool
+static inline bool
 malloc_mutex_trylock(malloc_mutex_t *mutex) {
 	return MALLOC_MUTEX_TRYLOCK(mutex);
 }
 
 /* Aggregate lock prof data. */
-JEMALLOC_INLINE void
+static inline void
 malloc_mutex_prof_merge(mutex_prof_data_t *sum, mutex_prof_data_t *data) {
 	nstime_add(&sum->tot_wait_time, &data->tot_wait_time);
 	if (nstime_compare(&sum->max_wait_time, &data->max_wait_time) < 0) {
@@ -52,7 +40,7 @@ malloc_mutex_prof_merge(mutex_prof_data_t *sum, mutex_prof_data_t *data) {
 	sum->n_lock_ops += data->n_lock_ops;
 }
 
-JEMALLOC_INLINE void
+static inline void
 malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 	witness_assert_not_owner(tsdn, &mutex->witness);
 	if (isthreaded) {
@@ -72,7 +60,7 @@ malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 	witness_lock(tsdn, &mutex->witness);
 }
 
-JEMALLOC_INLINE void
+static inline void
 malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 	witness_unlock(tsdn, &mutex->witness);
 	if (isthreaded) {
@@ -80,18 +68,18 @@ malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 	}
 }
 
-JEMALLOC_INLINE void
+static inline void
 malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 	witness_assert_owner(tsdn, &mutex->witness);
 }
 
-JEMALLOC_INLINE void
+static inline void
 malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 	witness_assert_not_owner(tsdn, &mutex->witness);
 }
 
 /* Copy the prof data from mutex for processing. */
-JEMALLOC_INLINE void
+static inline void
 malloc_mutex_prof_read(tsdn_t *tsdn, mutex_prof_data_t *data,
     malloc_mutex_t *mutex) {
 	mutex_prof_data_t *source = &mutex->prof_data;
@@ -108,6 +96,4 @@ malloc_mutex_prof_read(tsdn_t *tsdn, mutex_prof_data_t *data,
 	atomic_store_u32(&data->n_waiting_thds, 0, ATOMIC_RELAXED);
 }
 
-#endif
-
 #endif /* JEMALLOC_INTERNAL_MUTEX_INLINES_H */
diff --git a/include/jemalloc/internal/prng_inlines.h b/include/jemalloc/internal/prng_inlines.h
index c39c63f5..0275dfc4 100644
--- a/include/jemalloc/internal/prng_inlines.h
+++ b/include/jemalloc/internal/prng_inlines.h
@@ -4,23 +4,6 @@
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/bit_util.h"
 
-#ifndef JEMALLOC_ENABLE_INLINE
-uint32_t	prng_state_next_u32(uint32_t state);
-uint64_t	prng_state_next_u64(uint64_t state);
-size_t	prng_state_next_zu(size_t state);
-
-uint32_t	prng_lg_range_u32(atomic_u32_t *state, unsigned lg_range,
-    bool atomic);
-uint64_t	prng_lg_range_u64(uint64_t *state, unsigned lg_range);
-size_t	prng_lg_range_zu(atomic_zu_t *state, unsigned lg_range, bool atomic);
-
-uint32_t	prng_range_u32(atomic_u32_t *state, uint32_t range,
-    bool atomic);
-uint64_t	prng_range_u64(uint64_t *state, uint64_t range);
-size_t	prng_range_zu(atomic_zu_t *state, size_t range, bool atomic);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PRNG_C_))
 JEMALLOC_ALWAYS_INLINE uint32_t
 prng_state_next_u32(uint32_t state) {
 	return (state * PRNG_A_32) + PRNG_C_32;
@@ -156,6 +139,5 @@ prng_range_zu(atomic_zu_t *state, size_t range, bool atomic) {
 
 	return ret;
 }
-#endif
 
 #endif /* JEMALLOC_INTERNAL_PRNG_INLINES_H */
diff --git a/include/jemalloc/internal/prof_inlines_a.h b/include/jemalloc/internal/prof_inlines_a.h
index d0d29685..6203cbd9 100644
--- a/include/jemalloc/internal/prof_inlines_a.h
+++ b/include/jemalloc/internal/prof_inlines_a.h
@@ -1,14 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_PROF_INLINES_A_H
 #define JEMALLOC_INTERNAL_PROF_INLINES_A_H
 
-#ifndef JEMALLOC_ENABLE_INLINE
-bool prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum,
-    uint64_t accumbytes);
-void prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum, size_t usize);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
-JEMALLOC_INLINE bool
+static inline bool
 prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum, uint64_t accumbytes) {
 	cassert(config_prof);
 
@@ -46,7 +39,7 @@ prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum, uint64_t accumbytes) {
 	return overflow;
 }
 
-JEMALLOC_INLINE void
+static inline void
 prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum, size_t usize) {
 	cassert(config_prof);
 
@@ -73,6 +66,5 @@ prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum, size_t usize) {
 	malloc_mutex_unlock(tsdn, &prof_accum->mtx);
 #endif
 }
-#endif
 
 #endif /* JEMALLOC_INTERNAL_PROF_INLINES_A_H */
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 6a79c01e..eba981b9 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -1,29 +1,6 @@
 #ifndef JEMALLOC_INTERNAL_PROF_INLINES_B_H
 #define JEMALLOC_INTERNAL_PROF_INLINES_B_H
 
-#ifndef JEMALLOC_ENABLE_INLINE
-bool	prof_active_get_unlocked(void);
-bool	prof_gdump_get_unlocked(void);
-prof_tdata_t	*prof_tdata_get(tsd_t *tsd, bool create);
-prof_tctx_t	*prof_tctx_get(tsdn_t *tsdn, const void *ptr,
-    alloc_ctx_t *alloc_ctx);
-void	prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
-    alloc_ctx_t *alloc_ctx, prof_tctx_t *tctx);
-void	prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx);
-bool	prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
-    prof_tdata_t **tdata_out);
-prof_tctx_t	*prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active,
-    bool update);
-void	prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize,
-    alloc_ctx_t *alloc_ctx, prof_tctx_t *tctx);
-void	prof_realloc(tsd_t *tsd, const void *ptr, size_t usize,
-    prof_tctx_t *tctx, bool prof_active, bool updated, const void *old_ptr,
-    size_t old_usize, prof_tctx_t *old_tctx);
-void	prof_free(tsd_t *tsd, const void *ptr, size_t usize,
-    alloc_ctx_t *alloc_ctx);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_PROF_C_))
 JEMALLOC_ALWAYS_INLINE bool
 prof_active_get_unlocked(void) {
 	/*
@@ -231,6 +208,5 @@ prof_free(tsd_t *tsd, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx) {
 		prof_free_sampled_object(tsd, usize, tctx);
 	}
 }
-#endif
 
 #endif /* JEMALLOC_INTERNAL_PROF_INLINES_B_H */
diff --git a/include/jemalloc/internal/rtree_inlines.h b/include/jemalloc/internal/rtree_inlines.h
index 030e5787..f4f7c2ca 100644
--- a/include/jemalloc/internal/rtree_inlines.h
+++ b/include/jemalloc/internal/rtree_inlines.h
@@ -3,59 +3,6 @@
 
 #include "jemalloc/internal/spin.h"
 
-#ifndef JEMALLOC_ENABLE_INLINE
-uintptr_t rtree_leafkey(uintptr_t key);
-uintptr_t rtree_subkey(uintptr_t key, unsigned level);
-#  ifdef RTREE_LEAF_COMPACT
-uintptr_t rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, bool acquired, bool dependent);
-extent_t *rtree_leaf_elm_bits_extent_get(uintptr_t bits);
-szind_t rtree_leaf_elm_bits_szind_get(uintptr_t bits);
-bool rtree_leaf_elm_bits_slab_get(uintptr_t bits);
-bool rtree_leaf_elm_bits_locked_get(uintptr_t bits);
-#  endif
-extent_t *rtree_leaf_elm_extent_read(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, bool acquired, bool dependent);
-szind_t rtree_leaf_elm_szind_read(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, bool acquired, bool dependent);
-bool rtree_leaf_elm_slab_read(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, bool acquired, bool dependent);
-void rtree_leaf_elm_extent_write(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, bool acquired, extent_t *extent);
-void rtree_leaf_elm_szind_write(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, bool acquired, szind_t szind);
-void rtree_leaf_elm_slab_write(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, bool acquired, bool slab);
-void rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
-    bool acquired, extent_t *extent, szind_t szind, bool slab);
-void rtree_leaf_elm_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, szind_t szind, bool slab);
-rtree_leaf_elm_t *rtree_leaf_elm_lookup(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, bool init_missing);
-bool rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
-    uintptr_t key, extent_t *extent, szind_t szind, bool slab);
-rtree_leaf_elm_t *rtree_read(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent);
-extent_t *rtree_extent_read(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent);
-szind_t rtree_szind_read(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent);
-bool rtree_extent_szind_read(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, extent_t **r_extent,
-    szind_t *r_szind);
-bool rtree_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
-    uintptr_t key, bool dependent, szind_t *r_szind, bool *r_slab);
-rtree_leaf_elm_t *rtree_leaf_elm_acquire(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, bool init_missing);
-void rtree_leaf_elm_release(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm);
-void rtree_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_ctx_t *rtree_ctx, uintptr_t key, szind_t szind, bool slab);
-void rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
-    uintptr_t key);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_RTREE_C_))
 JEMALLOC_ALWAYS_INLINE uintptr_t
 rtree_leafkey(uintptr_t key) {
 	unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3);
@@ -194,7 +141,7 @@ rtree_leaf_elm_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 #endif
 }
 
-JEMALLOC_INLINE void
+static inline void
 rtree_leaf_elm_extent_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
     bool acquired, extent_t *extent) {
 	if (config_debug && acquired) {
@@ -219,7 +166,7 @@ rtree_leaf_elm_extent_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 #endif
 }
 
-JEMALLOC_INLINE void
+static inline void
 rtree_leaf_elm_szind_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
     bool acquired, szind_t szind) {
 	if (config_debug && acquired) {
@@ -241,7 +188,7 @@ rtree_leaf_elm_szind_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 #endif
 }
 
-JEMALLOC_INLINE void
+static inline void
 rtree_leaf_elm_slab_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
      bool acquired, bool slab) {
 	if (config_debug && acquired) {
@@ -261,7 +208,7 @@ rtree_leaf_elm_slab_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 #endif
 }
 
-JEMALLOC_INLINE void
+static inline void
 rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
     bool acquired, extent_t *extent, szind_t szind, bool slab) {
 	if (config_debug && acquired) {
@@ -287,7 +234,7 @@ rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 #endif
 }
 
-JEMALLOC_INLINE void
+static inline void
 rtree_leaf_elm_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree,
     rtree_leaf_elm_t *elm, szind_t szind, bool slab) {
 	assert(!slab || szind < NBINS);
@@ -384,7 +331,7 @@ rtree_leaf_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	    dependent, init_missing);
 }
 
-JEMALLOC_INLINE bool
+static inline bool
 rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
     extent_t *extent, szind_t szind, bool slab) {
 	/* Use rtree_clear() to set the extent to NULL. */
@@ -471,7 +418,7 @@ rtree_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	return false;
 }
 
-JEMALLOC_INLINE rtree_leaf_elm_t *
+static inline rtree_leaf_elm_t *
 rtree_leaf_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key, bool dependent, bool init_missing) {
 	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
@@ -511,7 +458,7 @@ rtree_leaf_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	return elm;
 }
 
-JEMALLOC_INLINE void
+static inline void
 rtree_leaf_elm_release(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm) {
 	extent_t *extent = rtree_leaf_elm_extent_read(tsdn, rtree, elm, true,
 	    true);
@@ -521,7 +468,7 @@ rtree_leaf_elm_release(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm) {
 	}
 }
 
-JEMALLOC_INLINE void
+static inline void
 rtree_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key, szind_t szind, bool slab) {
 	assert(!slab || szind < NBINS);
@@ -530,7 +477,7 @@ rtree_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	rtree_leaf_elm_szind_slab_update(tsdn, rtree, elm, szind, slab);
 }
 
-JEMALLOC_INLINE void
+static inline void
 rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key) {
 	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, true);
@@ -538,6 +485,5 @@ rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	    NULL);
 	rtree_leaf_elm_write(tsdn, rtree, elm, false, NULL, NSIZES, false);
 }
-#endif
 
 #endif /* JEMALLOC_INTERNAL_RTREE_INLINES_H */
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 67d35b58..8b42af66 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -4,30 +4,12 @@
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/util.h"
 
-#ifndef JEMALLOC_ENABLE_INLINE
-void	tcache_event(tsd_t *tsd, tcache_t *tcache);
-bool	tcache_enabled_get(tsd_t *tsd);
-tcache_t *tcache_get(tsd_t *tsd);
-void	tcache_enabled_set(tsd_t *tsd, bool enabled);
-void	*tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success);
-void	*tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
-    size_t size, szind_t ind, bool zero, bool slow_path);
-void	*tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
-    size_t size, szind_t ind, bool zero, bool slow_path);
-void	tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr,
-    szind_t binind, bool slow_path);
-void	tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr,
-    szind_t binind, bool slow_path);
-tcache_t	*tcaches_get(tsd_t *tsd, unsigned ind);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TCACHE_C_))
-JEMALLOC_INLINE bool
+static inline bool
 tcache_enabled_get(tsd_t *tsd) {
 	return tsd_tcache_enabled_get(tsd);
 }
 
-JEMALLOC_INLINE void
+static inline void
 tcache_enabled_set(tsd_t *tsd, bool enabled) {
 	bool was_enabled = tsd_tcache_enabled_get(tsd);
 
@@ -261,6 +243,5 @@ tcaches_get(tsd_t *tsd, unsigned ind) {
 	}
 	return elm->tcache;
 }
-#endif
 
 #endif /* JEMALLOC_INTERNAL_TCACHE_INLINES_H */
diff --git a/include/jemalloc/internal/ticker_inlines.h b/include/jemalloc/internal/ticker_inlines.h
index 9102ba6d..cd5821f9 100644
--- a/include/jemalloc/internal/ticker_inlines.h
+++ b/include/jemalloc/internal/ticker_inlines.h
@@ -1,32 +1,23 @@
 #ifndef JEMALLOC_INTERNAL_TICKER_INLINES_H
 #define JEMALLOC_INTERNAL_TICKER_INLINES_H
 
-#ifndef JEMALLOC_ENABLE_INLINE
-void	ticker_init(ticker_t *ticker, int32_t nticks);
-void	ticker_copy(ticker_t *ticker, const ticker_t *other);
-int32_t	ticker_read(const ticker_t *ticker);
-bool	ticker_ticks(ticker_t *ticker, int32_t nticks);
-bool	ticker_tick(ticker_t *ticker);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TICKER_C_))
-JEMALLOC_INLINE void
+static inline void
 ticker_init(ticker_t *ticker, int32_t nticks) {
 	ticker->tick = nticks;
 	ticker->nticks = nticks;
 }
 
-JEMALLOC_INLINE void
+static inline void
 ticker_copy(ticker_t *ticker, const ticker_t *other) {
 	*ticker = *other;
 }
 
-JEMALLOC_INLINE int32_t
+static inline int32_t
 ticker_read(const ticker_t *ticker) {
 	return ticker->tick;
 }
 
-JEMALLOC_INLINE bool
+static inline bool
 ticker_ticks(ticker_t *ticker, int32_t nticks) {
 	if (unlikely(ticker->tick < nticks)) {
 		ticker->tick = ticker->nticks;
@@ -36,10 +27,9 @@ ticker_ticks(ticker_t *ticker, int32_t nticks) {
 	return(false);
 }
 
-JEMALLOC_INLINE bool
+static inline bool
 ticker_tick(ticker_t *ticker) {
 	return ticker_ticks(ticker, 1);
 }
-#endif
 
 #endif /* JEMALLOC_INTERNAL_TICKER_INLINES_H */
diff --git a/include/jemalloc/internal/tsd_inlines.h b/include/jemalloc/internal/tsd_inlines.h
index 93469bca..f0f77e48 100644
--- a/include/jemalloc/internal/tsd_inlines.h
+++ b/include/jemalloc/internal/tsd_inlines.h
@@ -1,29 +1,6 @@
 #ifndef JEMALLOC_INTERNAL_TSD_INLINES_H
 #define JEMALLOC_INTERNAL_TSD_INLINES_H
 
-#ifndef JEMALLOC_ENABLE_INLINE
-malloc_tsd_protos(JEMALLOC_ATTR(unused), , tsd_t)
-
-tsd_t *tsd_fetch_impl(bool init);
-tsd_t *tsd_fetch(void);
-tsdn_t *tsd_tsdn(tsd_t *tsd);
-bool tsd_nominal(tsd_t *tsd);
-#define O(n, t, gs, i, c)						\
-t *tsd_##n##p_get(tsd_t *tsd);						\
-t tsd_##n##_get(tsd_t *tsd);						\
-void tsd_##n##_set(tsd_t *tsd, t n);
-MALLOC_TSD
-#undef O
-tsdn_t *tsdn_fetch(void);
-bool tsdn_null(const tsdn_t *tsdn);
-tsd_t *tsdn_tsd(tsdn_t *tsdn);
-rtree_ctx_t *tsd_rtree_ctx(tsd_t *tsd);
-rtree_ctx_t *tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback);
-bool tsd_fast(tsd_t *tsd);
-bool tsd_assert_fast(tsd_t *tsd);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_TSD_C_))
 malloc_tsd_externs(, tsd_t)
 malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, , tsd_t, tsd_initializer, tsd_cleanup)
 
@@ -97,7 +74,7 @@ tsd_tsdn(tsd_t *tsd) {
 	return (tsdn_t *)tsd;
 }
 
-JEMALLOC_INLINE bool
+static inline bool
 tsd_nominal(tsd_t *tsd) {
 	return (tsd->state <= tsd_state_nominal_max);
 }
@@ -140,6 +117,5 @@ tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback) {
 	}
 	return tsd_rtree_ctx(tsdn_tsd(tsdn));
 }
-#endif
 
 #endif /* JEMALLOC_INTERNAL_TSD_INLINES_H */
diff --git a/include/jemalloc/internal/witness_inlines.h b/include/jemalloc/internal/witness_inlines.h
index c5027f11..51d1af38 100644
--- a/include/jemalloc/internal/witness_inlines.h
+++ b/include/jemalloc/internal/witness_inlines.h
@@ -3,21 +3,8 @@
 
 #include "jemalloc/internal/ql.h"
 
-#ifndef JEMALLOC_ENABLE_INLINE
-bool	witness_owner(tsd_t *tsd, const witness_t *witness);
-void	witness_assert_owner(tsdn_t *tsdn, const witness_t *witness);
-void	witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness);
-void witness_assert_depth_to_rank(tsdn_t *tsdn, witness_rank_t rank_inclusive,
-    unsigned depth);
-void witness_assert_depth(tsdn_t *tsdn, unsigned depth);
-void	witness_assert_lockless(tsdn_t *tsdn);
-void	witness_lock(tsdn_t *tsdn, witness_t *witness);
-void	witness_unlock(tsdn_t *tsdn, witness_t *witness);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(JEMALLOC_MUTEX_C_))
 /* Helper, not intended for direct use. */
-JEMALLOC_INLINE bool
+static inline bool
 witness_owner(tsd_t *tsd, const witness_t *witness) {
 	witness_list_t *witnesses;
 	witness_t *w;
@@ -34,7 +21,7 @@ witness_owner(tsd_t *tsd, const witness_t *witness) {
 	return false;
 }
 
-JEMALLOC_INLINE void
+static inline void
 witness_assert_owner(tsdn_t *tsdn, const witness_t *witness) {
 	tsd_t *tsd;
 
@@ -56,7 +43,7 @@ witness_assert_owner(tsdn_t *tsdn, const witness_t *witness) {
 	witness_owner_error(witness);
 }
 
-JEMALLOC_INLINE void
+static inline void
 witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness) {
 	tsd_t *tsd;
 	witness_list_t *witnesses;
@@ -82,7 +69,7 @@ witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness) {
 	}
 }
 
-JEMALLOC_INLINE void
+static inline void
 witness_assert_depth_to_rank(tsdn_t *tsdn, witness_rank_t rank_inclusive,
     unsigned depth) {
 	tsd_t *tsd;
@@ -115,17 +102,17 @@ witness_assert_depth_to_rank(tsdn_t *tsdn, witness_rank_t rank_inclusive,
 	}
 }
 
-JEMALLOC_INLINE void
+static inline void
 witness_assert_depth(tsdn_t *tsdn, unsigned depth) {
 	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_MIN, depth);
 }
 
-JEMALLOC_INLINE void
+static inline void
 witness_assert_lockless(tsdn_t *tsdn) {
 	witness_assert_depth(tsdn, 0);
 }
 
-JEMALLOC_INLINE void
+static inline void
 witness_lock(tsdn_t *tsdn, witness_t *witness) {
 	tsd_t *tsd;
 	witness_list_t *witnesses;
@@ -168,7 +155,7 @@ witness_lock(tsdn_t *tsdn, witness_t *witness) {
 	ql_tail_insert(witnesses, witness, link);
 }
 
-JEMALLOC_INLINE void
+static inline void
 witness_unlock(tsdn_t *tsdn, witness_t *witness) {
 	tsd_t *tsd;
 	witness_list_t *witnesses;
@@ -197,6 +184,5 @@ witness_unlock(tsdn_t *tsdn, witness_t *witness) {
 		witness_assert_owner(tsdn, witness);
 	}
 }
-#endif
 
 #endif /* JEMALLOC_INTERNAL_WITNESS_INLINES_H */
diff --git a/src/arena.c b/src/arena.c
index c2eca449..40561c03 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -359,7 +359,7 @@ arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
 	}
 }
 
-JEMALLOC_INLINE_C void *
+static void *
 arena_slab_reg_alloc(tsdn_t *tsdn, extent_t *slab,
     const arena_bin_info_t *bin_info) {
 	void *ret;
@@ -377,7 +377,7 @@ arena_slab_reg_alloc(tsdn_t *tsdn, extent_t *slab,
 }
 
 #ifndef JEMALLOC_JET
-JEMALLOC_INLINE_C
+static
 #endif
 size_t
 arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr) {
@@ -414,7 +414,7 @@ arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr) {
 	return regind;
 }
 
-JEMALLOC_INLINE_C void
+static void
 arena_slab_reg_dalloc(tsdn_t *tsdn, extent_t *slab,
     arena_slab_data_t *slab_data, void *ptr) {
 	szind_t binind = extent_szind_get(slab);
diff --git a/src/ckh.c b/src/ckh.c
index aaa97924..6576740b 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -54,7 +54,7 @@ static void	ckh_shrink(tsd_t *tsd, ckh_t *ckh);
  * Search bucket for key and return the cell number if found; SIZE_T_MAX
  * otherwise.
  */
-JEMALLOC_INLINE_C size_t
+static size_t
 ckh_bucket_search(ckh_t *ckh, size_t bucket, const void *key) {
 	ckhc_t *cell;
 	unsigned i;
@@ -72,7 +72,7 @@ ckh_bucket_search(ckh_t *ckh, size_t bucket, const void *key) {
 /*
  * Search table for key and return cell number if found; SIZE_T_MAX otherwise.
  */
-JEMALLOC_INLINE_C size_t
+static size_t
 ckh_isearch(ckh_t *ckh, const void *key) {
 	size_t hashes[2], bucket, cell;
 
@@ -93,7 +93,7 @@ ckh_isearch(ckh_t *ckh, const void *key) {
 	return cell;
 }
 
-JEMALLOC_INLINE_C bool
+static bool
 ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
     const void *data) {
 	ckhc_t *cell;
@@ -125,7 +125,7 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
  * eviction/relocation procedure until either success or detection of an
  * eviction/relocation bucket cycle.
  */
-JEMALLOC_INLINE_C bool
+static bool
 ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
     void const **argdata) {
 	const void *key, *data, *tkey, *tdata;
@@ -196,7 +196,7 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
 	}
 }
 
-JEMALLOC_INLINE_C bool
+static bool
 ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata) {
 	size_t hashes[2], bucket;
 	const void *key = *argkey;
@@ -226,7 +226,7 @@ ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata) {
  * Try to rebuild the hash table from scratch by inserting all items from the
  * old table into the new.
  */
-JEMALLOC_INLINE_C bool
+static bool
 ckh_rebuild(ckh_t *ckh, ckhc_t *aTab) {
 	size_t count, i, nins;
 	const void *key, *data;
diff --git a/src/ctl.c b/src/ctl.c
index e9143dd4..72372d55 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -21,19 +21,19 @@ static ctl_arenas_t	*ctl_arenas;
 /******************************************************************************/
 /* Helpers for named and indexed nodes. */
 
-JEMALLOC_INLINE_C const ctl_named_node_t *
+static const ctl_named_node_t *
 ctl_named_node(const ctl_node_t *node) {
 	return ((node->named) ? (const ctl_named_node_t *)node : NULL);
 }
 
-JEMALLOC_INLINE_C const ctl_named_node_t *
+static const ctl_named_node_t *
 ctl_named_children(const ctl_named_node_t *node, size_t index) {
 	const ctl_named_node_t *children = ctl_named_node(node->children);
 
 	return (children ? &children[index] : NULL);
 }
 
-JEMALLOC_INLINE_C const ctl_indexed_node_t *
+static const ctl_indexed_node_t *
 ctl_indexed_node(const ctl_node_t *node) {
 	return (!node->named ? (const ctl_indexed_node_t *)node : NULL);
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index de858e36..51194992 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -288,7 +288,7 @@ malloc_initialized(void) {
 	return (malloc_init_state == malloc_init_initialized);
 }
 
-JEMALLOC_ALWAYS_INLINE_C bool
+JEMALLOC_ALWAYS_INLINE bool
 malloc_init_a0(void) {
 	if (unlikely(malloc_init_state == malloc_init_uninitialized)) {
 		return malloc_init_hard_a0();
@@ -296,7 +296,7 @@ malloc_init_a0(void) {
 	return false;
 }
 
-JEMALLOC_ALWAYS_INLINE_C bool
+JEMALLOC_ALWAYS_INLINE bool
 malloc_init(void) {
 	if (unlikely(!malloc_initialized()) && malloc_init_hard()) {
 		return true;
@@ -1490,7 +1490,7 @@ struct static_opts_s {
 	bool slow;
 };
 
-JEMALLOC_ALWAYS_INLINE_C void
+JEMALLOC_ALWAYS_INLINE void
 static_opts_init(static_opts_t *static_opts) {
 	static_opts->may_overflow = false;
 	static_opts->bump_empty_alloc = false;
@@ -1523,7 +1523,7 @@ struct dynamic_opts_s {
 	unsigned arena_ind;
 };
 
-JEMALLOC_ALWAYS_INLINE_C void
+JEMALLOC_ALWAYS_INLINE void
 dynamic_opts_init(dynamic_opts_t *dynamic_opts) {
 	dynamic_opts->result = NULL;
 	dynamic_opts->num_items = 0;
@@ -1535,7 +1535,7 @@ dynamic_opts_init(dynamic_opts_t *dynamic_opts) {
 }
 
 /* ind is ignored if dopts->alignment > 0. */
-JEMALLOC_ALWAYS_INLINE_C void *
+JEMALLOC_ALWAYS_INLINE void *
 imalloc_no_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
     size_t size, size_t usize, szind_t ind) {
 	tcache_t *tcache;
@@ -1577,7 +1577,7 @@ imalloc_no_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
 	    arena, sopts->slow);
 }
 
-JEMALLOC_ALWAYS_INLINE_C void *
+JEMALLOC_ALWAYS_INLINE void *
 imalloc_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
     size_t usize, szind_t ind) {
 	void *ret;
@@ -1611,7 +1611,7 @@ imalloc_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
  * Returns true if the allocation will overflow, and false otherwise.  Sets
  * *size to the product either way.
  */
-JEMALLOC_ALWAYS_INLINE_C bool
+JEMALLOC_ALWAYS_INLINE bool
 compute_size_with_overflow(bool may_overflow, dynamic_opts_t *dopts,
     size_t *size) {
 	/*
@@ -1649,7 +1649,7 @@ compute_size_with_overflow(bool may_overflow, dynamic_opts_t *dopts,
 	return true;
 }
 
-JEMALLOC_ALWAYS_INLINE_C int
+JEMALLOC_ALWAYS_INLINE int
 imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 	/* Where the actual allocated memory will live. */
 	void *allocation = NULL;
@@ -1850,7 +1850,7 @@ label_invalid_alignment:
 }
 
 /* Returns the errno-style error code of the allocation. */
-JEMALLOC_ALWAYS_INLINE_C int
+JEMALLOC_ALWAYS_INLINE int
 imalloc(static_opts_t *sopts, dynamic_opts_t *dopts) {
 	if (unlikely(!malloc_initialized()) && unlikely(malloc_init())) {
 		if (config_xmalloc && unlikely(opt_xmalloc)) {
@@ -2011,7 +2011,7 @@ irealloc_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize,
 	return p;
 }
 
-JEMALLOC_ALWAYS_INLINE_C void *
+JEMALLOC_ALWAYS_INLINE void *
 irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize,
    alloc_ctx_t *alloc_ctx) {
 	void *p;
@@ -2036,7 +2036,7 @@ irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize,
 	return p;
 }
 
-JEMALLOC_ALWAYS_INLINE_C void
+JEMALLOC_ALWAYS_INLINE void
 ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 	assert(slow_path || tsd_assert_fast(tsd));
 	if (tsd_reentrancy_level_get(tsd) == 0) {
@@ -2074,7 +2074,7 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 	}
 }
 
-JEMALLOC_ALWAYS_INLINE_C void
+JEMALLOC_ALWAYS_INLINE void
 isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 	assert(slow_path || tsd_assert_fast(tsd));
 	if (tsd_reentrancy_level_get(tsd) == 0) {
@@ -2403,7 +2403,7 @@ irallocx_prof_sample(tsdn_t *tsdn, void *old_ptr, size_t old_usize,
 	return p;
 }
 
-JEMALLOC_ALWAYS_INLINE_C void *
+JEMALLOC_ALWAYS_INLINE void *
 irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
     size_t alignment, size_t *usize, bool zero, tcache_t *tcache,
     arena_t *arena, alloc_ctx_t *alloc_ctx) {
@@ -2528,7 +2528,7 @@ label_oom:
 	return NULL;
 }
 
-JEMALLOC_ALWAYS_INLINE_C size_t
+JEMALLOC_ALWAYS_INLINE size_t
 ixallocx_helper(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
     size_t extra, size_t alignment, bool zero) {
 	size_t usize;
@@ -2555,7 +2555,7 @@ ixallocx_prof_sample(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
 	return usize;
 }
 
-JEMALLOC_ALWAYS_INLINE_C size_t
+JEMALLOC_ALWAYS_INLINE size_t
 ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
     size_t extra, size_t alignment, bool zero, alloc_ctx_t *alloc_ctx) {
 	size_t usize_max, usize;
@@ -2727,7 +2727,7 @@ je_dallocx(void *ptr, int flags) {
 	witness_assert_lockless(tsd_tsdn(tsd));
 }
 
-JEMALLOC_ALWAYS_INLINE_C size_t
+JEMALLOC_ALWAYS_INLINE size_t
 inallocx(tsdn_t *tsdn, size_t size, int flags) {
 	witness_assert_lockless(tsdn);
 
diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index 71999a8a..844ab398 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -40,7 +40,6 @@ void	operator delete[](void *ptr, std::size_t size) noexcept;
 #endif
 
 template <bool IsNoExcept>
-JEMALLOC_INLINE
 void *
 newImpl(std::size_t size) noexcept(IsNoExcept) {
 	void *ptr = je_malloc(size);
diff --git a/src/prof.c b/src/prof.c
index f2b21f72..99a4c8f0 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -145,7 +145,7 @@ static char	*prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name);
 /******************************************************************************/
 /* Red-black trees. */
 
-JEMALLOC_INLINE_C int
+static int
 prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b) {
 	uint64_t a_thr_uid = a->thr_uid;
 	uint64_t b_thr_uid = b->thr_uid;
@@ -168,7 +168,7 @@ prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b) {
 rb_gen(static UNUSED, tctx_tree_, prof_tctx_tree_t, prof_tctx_t,
     tctx_link, prof_tctx_comp)
 
-JEMALLOC_INLINE_C int
+static int
 prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b) {
 	unsigned a_len = a->bt.len;
 	unsigned b_len = b->bt.len;
@@ -183,7 +183,7 @@ prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b) {
 rb_gen(static UNUSED, gctx_tree_, prof_gctx_tree_t, prof_gctx_t, dump_link,
     prof_gctx_comp)
 
-JEMALLOC_INLINE_C int
+static int
 prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b) {
 	int ret;
 	uint64_t a_uid = a->thr_uid;
@@ -273,7 +273,7 @@ bt_init(prof_bt_t *bt, void **vec) {
 	bt->len = 0;
 }
 
-JEMALLOC_INLINE_C void
+static void
 prof_enter(tsd_t *tsd, prof_tdata_t *tdata) {
 	cassert(config_prof);
 	assert(tdata == prof_tdata_get(tsd, false));
@@ -286,7 +286,7 @@ prof_enter(tsd_t *tsd, prof_tdata_t *tdata) {
 	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
 }
 
-JEMALLOC_INLINE_C void
+static void
 prof_leave(tsd_t *tsd, prof_tdata_t *tdata) {
 	cassert(config_prof);
 	assert(tdata == prof_tdata_get(tsd, false));
@@ -1884,7 +1884,7 @@ prof_bt_keycomp(const void *k1, const void *k2) {
 	return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
 }
 
-JEMALLOC_INLINE_C uint64_t
+static uint64_t
 prof_thr_uid_alloc(tsdn_t *tsdn) {
 	uint64_t thr_uid;
 
diff --git a/test/include/test/SFMT-alti.h b/test/include/test/SFMT-alti.h
index 0005df6b..a1885dbf 100644
--- a/test/include/test/SFMT-alti.h
+++ b/test/include/test/SFMT-alti.h
@@ -33,8 +33,8 @@
  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
-/** 
- * @file SFMT-alti.h 
+/**
+ * @file SFMT-alti.h
  *
  * @brief SIMD oriented Fast Mersenne Twister(SFMT)
  * pseudorandom number generator
@@ -95,7 +95,7 @@ vector unsigned int vec_recursion(vector unsigned int a,
  * This function fills the internal state array with pseudorandom
  * integers.
  */
-JEMALLOC_INLINE void gen_rand_all(sfmt_t *ctx) {
+static inline void gen_rand_all(sfmt_t *ctx) {
     int i;
     vector unsigned int r, r1, r2;
 
@@ -119,10 +119,10 @@ JEMALLOC_INLINE void gen_rand_all(sfmt_t *ctx) {
  * This function fills the user-specified array with pseudorandom
  * integers.
  *
- * @param array an 128-bit array to be filled by pseudorandom numbers.  
+ * @param array an 128-bit array to be filled by pseudorandom numbers.
  * @param size number of 128-bit pesudorandom numbers to be generated.
  */
-JEMALLOC_INLINE void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
+static inline void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
     int i, j;
     vector unsigned int r, r1, r2;
 
@@ -173,7 +173,7 @@ JEMALLOC_INLINE void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
  * @param array an 128-bit array to be swaped.
  * @param size size of 128-bit array.
  */
-JEMALLOC_INLINE void swap(w128_t *array, int size) {
+static inline void swap(w128_t *array, int size) {
     int i;
     const vector unsigned char perm = ALTI_SWAP;
 
diff --git a/test/include/test/SFMT-sse2.h b/test/include/test/SFMT-sse2.h
index 0314a163..169ad558 100644
--- a/test/include/test/SFMT-sse2.h
+++ b/test/include/test/SFMT-sse2.h
@@ -33,7 +33,7 @@
  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
-/** 
+/**
  * @file  SFMT-sse2.h
  * @brief SIMD oriented Fast Mersenne Twister(SFMT) for Intel SSE2
  *
@@ -60,10 +60,10 @@
  * @param mask 128-bit mask
  * @return output
  */
-JEMALLOC_ALWAYS_INLINE __m128i mm_recursion(__m128i *a, __m128i *b, 
+JEMALLOC_ALWAYS_INLINE __m128i mm_recursion(__m128i *a, __m128i *b,
 				   __m128i c, __m128i d, __m128i mask) {
     __m128i v, x, y, z;
-    
+
     x = _mm_load_si128(a);
     y = _mm_srli_epi32(*b, SR1);
     z = _mm_srli_si128(c, SR2);
@@ -81,7 +81,7 @@ JEMALLOC_ALWAYS_INLINE __m128i mm_recursion(__m128i *a, __m128i *b,
  * This function fills the internal state array with pseudorandom
  * integers.
  */
-JEMALLOC_INLINE void gen_rand_all(sfmt_t *ctx) {
+static inline void gen_rand_all(sfmt_t *ctx) {
     int i;
     __m128i r, r1, r2, mask;
     mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1);
@@ -108,10 +108,10 @@ JEMALLOC_INLINE void gen_rand_all(sfmt_t *ctx) {
  * This function fills the user-specified array with pseudorandom
  * integers.
  *
- * @param array an 128-bit array to be filled by pseudorandom numbers.  
+ * @param array an 128-bit array to be filled by pseudorandom numbers.
  * @param size number of 128-bit pesudorandom numbers to be generated.
  */
-JEMALLOC_INLINE void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
+static inline void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
     int i, j;
     __m128i r, r1, r2, mask;
     mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1);
diff --git a/test/include/test/SFMT.h b/test/include/test/SFMT.h
index 4ad7484a..863fc55e 100644
--- a/test/include/test/SFMT.h
+++ b/test/include/test/SFMT.h
@@ -81,76 +81,62 @@ const char *get_idstring(void);
 int get_min_array_size32(void);
 int get_min_array_size64(void);
 
-#ifndef JEMALLOC_ENABLE_INLINE
-double to_real1(uint32_t v);
-double genrand_real1(sfmt_t *ctx);
-double to_real2(uint32_t v);
-double genrand_real2(sfmt_t *ctx);
-double to_real3(uint32_t v);
-double genrand_real3(sfmt_t *ctx);
-double to_res53(uint64_t v);
-double to_res53_mix(uint32_t x, uint32_t y);
-double genrand_res53(sfmt_t *ctx);
-double genrand_res53_mix(sfmt_t *ctx);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(SFMT_C_))
 /* These real versions are due to Isaku Wada */
 /** generates a random number on [0,1]-real-interval */
-JEMALLOC_INLINE double to_real1(uint32_t v) {
+static inline double to_real1(uint32_t v) {
     return v * (1.0/4294967295.0); 
     /* divided by 2^32-1 */ 
 }
 
 /** generates a random number on [0,1]-real-interval */
-JEMALLOC_INLINE double genrand_real1(sfmt_t *ctx) {
+static inline double genrand_real1(sfmt_t *ctx) {
     return to_real1(gen_rand32(ctx));
 }
 
 /** generates a random number on [0,1)-real-interval */
-JEMALLOC_INLINE double to_real2(uint32_t v) {
+static inline double to_real2(uint32_t v) {
     return v * (1.0/4294967296.0); 
     /* divided by 2^32 */
 }
 
 /** generates a random number on [0,1)-real-interval */
-JEMALLOC_INLINE double genrand_real2(sfmt_t *ctx) {
+static inline double genrand_real2(sfmt_t *ctx) {
     return to_real2(gen_rand32(ctx));
 }
 
 /** generates a random number on (0,1)-real-interval */
-JEMALLOC_INLINE double to_real3(uint32_t v) {
+static inline double to_real3(uint32_t v) {
     return (((double)v) + 0.5)*(1.0/4294967296.0); 
     /* divided by 2^32 */
 }
 
 /** generates a random number on (0,1)-real-interval */
-JEMALLOC_INLINE double genrand_real3(sfmt_t *ctx) {
+static inline double genrand_real3(sfmt_t *ctx) {
     return to_real3(gen_rand32(ctx));
 }
 /** These real versions are due to Isaku Wada */
 
 /** generates a random number on [0,1) with 53-bit resolution*/
-JEMALLOC_INLINE double to_res53(uint64_t v) {
+static inline double to_res53(uint64_t v) {
     return v * (1.0/18446744073709551616.0L);
 }
 
 /** generates a random number on [0,1) with 53-bit resolution from two
  * 32 bit integers */
-JEMALLOC_INLINE double to_res53_mix(uint32_t x, uint32_t y) {
+static inline double to_res53_mix(uint32_t x, uint32_t y) {
     return to_res53(x | ((uint64_t)y << 32));
 }
 
 /** generates a random number on [0,1) with 53-bit resolution
  */
-JEMALLOC_INLINE double genrand_res53(sfmt_t *ctx) {
+static inline double genrand_res53(sfmt_t *ctx) {
     return to_res53(gen_rand64(ctx));
 }
 
 /** generates a random number on [0,1) with 53-bit resolution
     using 32bit integer.
  */
-JEMALLOC_INLINE double genrand_res53_mix(sfmt_t *ctx) {
+static inline double genrand_res53_mix(sfmt_t *ctx) {
     uint32_t x, y;
 
     x = gen_rand32(ctx);
@@ -158,4 +144,3 @@ JEMALLOC_INLINE double genrand_res53_mix(sfmt_t *ctx) {
     return to_res53_mix(x, y);
 }
 #endif
-#endif
diff --git a/test/include/test/math.h b/test/include/test/math.h
index 94173bad..efba086d 100644
--- a/test/include/test/math.h
+++ b/test/include/test/math.h
@@ -1,12 +1,3 @@
-#ifndef JEMALLOC_ENABLE_INLINE
-double	ln_gamma(double x);
-double	i_gamma(double x, double p, double ln_gamma_p);
-double	pt_norm(double p);
-double	pt_chi2(double p, double df, double ln_gamma_df_2);
-double	pt_gamma(double p, double shape, double scale, double ln_gamma_shape);
-#endif
-
-#if (defined(JEMALLOC_ENABLE_INLINE) || defined(MATH_C_))
 /*
  * Compute the natural log of Gamma(x), accurate to 10 decimal places.
  *
@@ -15,7 +6,7 @@ double	pt_gamma(double p, double shape, double scale, double ln_gamma_shape);
  *   Pike, M.C., I.D. Hill (1966) Algorithm 291: Logarithm of Gamma function
  *   [S14].  Communications of the ACM 9(9):684.
  */
-JEMALLOC_INLINE double
+static inline double
 ln_gamma(double x) {
 	double f, z;
 
@@ -50,7 +41,7 @@ ln_gamma(double x) {
  *   Bhattacharjee, G.P. (1970) Algorithm AS 32: The incomplete Gamma integral.
  *   Applied Statistics 19:285-287.
  */
-JEMALLOC_INLINE double
+static inline double
 i_gamma(double x, double p, double ln_gamma_p) {
 	double acu, factor, oflo, gin, term, rn, a, b, an, dif;
 	double pn[6];
@@ -134,7 +125,7 @@ i_gamma(double x, double p, double ln_gamma_p) {
  *   Wichura, M.J. (1988) Algorithm AS 241: The percentage points of the normal
  *   distribution.  Applied Statistics 37(3):477-484.
  */
-JEMALLOC_INLINE double
+static inline double
 pt_norm(double p) {
 	double q, r, ret;
 
@@ -222,7 +213,7 @@ pt_norm(double p) {
  *   Shea, B.L. (1991) Algorithm AS R85: A remark on AS 91: The percentage
  *   points of the Chi^2 distribution.  Applied Statistics 40(1):233-235.
  */
-JEMALLOC_INLINE double
+static inline double
 pt_chi2(double p, double df, double ln_gamma_df_2) {
 	double e, aa, xx, c, ch, a, q, p1, p2, t, x, b, s1, s2, s3, s4, s5, s6;
 	unsigned i;
@@ -309,8 +300,7 @@ pt_chi2(double p, double df, double ln_gamma_df_2) {
  * compute the upper limit on the definite integral from [0..z] that satisfies
  * p.
  */
-JEMALLOC_INLINE double
+static inline double
 pt_gamma(double p, double shape, double scale, double ln_gamma_shape) {
 	return pt_chi2(p, shape * 2.0, ln_gamma_shape) * 0.5 * scale;
 }
-#endif
diff --git a/test/src/SFMT.c b/test/src/SFMT.c
index 4dc32599..c05e2183 100644
--- a/test/src/SFMT.c
+++ b/test/src/SFMT.c
@@ -33,7 +33,7 @@
  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
-/** 
+/**
  * @file  SFMT.c
  * @brief SIMD oriented Fast Mersenne Twister(SFMT)
  *
@@ -108,7 +108,7 @@ struct sfmt_s {
 
 /*--------------------------------------
   FILE GLOBAL VARIABLES
-  internal state, index counter and flag 
+  internal state, index counter and flag
   --------------------------------------*/
 
 /** a parity check vector which certificate the period of 2^{MEXP} */
@@ -117,18 +117,18 @@ static uint32_t parity[4] = {PARITY1, PARITY2, PARITY3, PARITY4};
 /*----------------
   STATIC FUNCTIONS
   ----------------*/
-JEMALLOC_INLINE_C int idxof(int i);
+static inline int idxof(int i);
 #if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2))
-JEMALLOC_INLINE_C void rshift128(w128_t *out,  w128_t const *in, int shift);
-JEMALLOC_INLINE_C void lshift128(w128_t *out,  w128_t const *in, int shift);
+static inline void rshift128(w128_t *out,  w128_t const *in, int shift);
+static inline void lshift128(w128_t *out,  w128_t const *in, int shift);
 #endif
-JEMALLOC_INLINE_C void gen_rand_all(sfmt_t *ctx);
-JEMALLOC_INLINE_C void gen_rand_array(sfmt_t *ctx, w128_t *array, int size);
-JEMALLOC_INLINE_C uint32_t func1(uint32_t x);
-JEMALLOC_INLINE_C uint32_t func2(uint32_t x);
+static inline void gen_rand_all(sfmt_t *ctx);
+static inline void gen_rand_array(sfmt_t *ctx, w128_t *array, int size);
+static inline uint32_t func1(uint32_t x);
+static inline uint32_t func2(uint32_t x);
 static void period_certification(sfmt_t *ctx);
 #if defined(BIG_ENDIAN64) && !defined(ONLY64)
-JEMALLOC_INLINE_C void swap(w128_t *array, int size);
+static inline void swap(w128_t *array, int size);
 #endif
 
 #if defined(HAVE_ALTIVEC)
@@ -138,15 +138,15 @@ JEMALLOC_INLINE_C void swap(w128_t *array, int size);
 #endif
 
 /**
- * This function simulate a 64-bit index of LITTLE ENDIAN 
+ * This function simulate a 64-bit index of LITTLE ENDIAN
  * in BIG ENDIAN machine.
  */
 #ifdef ONLY64
-JEMALLOC_INLINE_C int idxof(int i) {
+static inline int idxof(int i) {
     return i ^ 1;
 }
 #else
-JEMALLOC_INLINE_C int idxof(int i) {
+static inline int idxof(int i) {
     return i;
 }
 #endif
@@ -160,7 +160,7 @@ JEMALLOC_INLINE_C int idxof(int i) {
  */
 #if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2))
 #ifdef ONLY64
-JEMALLOC_INLINE_C void rshift128(w128_t *out, w128_t const *in, int shift) {
+static inline void rshift128(w128_t *out, w128_t const *in, int shift) {
     uint64_t th, tl, oh, ol;
 
     th = ((uint64_t)in->u[2] << 32) | ((uint64_t)in->u[3]);
@@ -175,7 +175,7 @@ JEMALLOC_INLINE_C void rshift128(w128_t *out, w128_t const *in, int shift) {
     out->u[3] = (uint32_t)oh;
 }
 #else
-JEMALLOC_INLINE_C void rshift128(w128_t *out, w128_t const *in, int shift) {
+static inline void rshift128(w128_t *out, w128_t const *in, int shift) {
     uint64_t th, tl, oh, ol;
 
     th = ((uint64_t)in->u[3] << 32) | ((uint64_t)in->u[2]);
@@ -199,7 +199,7 @@ JEMALLOC_INLINE_C void rshift128(w128_t *out, w128_t const *in, int shift) {
  * @param shift the shift value
  */
 #ifdef ONLY64
-JEMALLOC_INLINE_C void lshift128(w128_t *out, w128_t const *in, int shift) {
+static inline void lshift128(w128_t *out, w128_t const *in, int shift) {
     uint64_t th, tl, oh, ol;
 
     th = ((uint64_t)in->u[2] << 32) | ((uint64_t)in->u[3]);
@@ -214,7 +214,7 @@ JEMALLOC_INLINE_C void lshift128(w128_t *out, w128_t const *in, int shift) {
     out->u[3] = (uint32_t)oh;
 }
 #else
-JEMALLOC_INLINE_C void lshift128(w128_t *out, w128_t const *in, int shift) {
+static inline void lshift128(w128_t *out, w128_t const *in, int shift) {
     uint64_t th, tl, oh, ol;
 
     th = ((uint64_t)in->u[3] << 32) | ((uint64_t)in->u[2]);
@@ -241,37 +241,37 @@ JEMALLOC_INLINE_C void lshift128(w128_t *out, w128_t const *in, int shift) {
  */
 #if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2))
 #ifdef ONLY64
-JEMALLOC_INLINE_C void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c,
+static inline void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c,
 				w128_t *d) {
     w128_t x;
     w128_t y;
 
     lshift128(&x, a, SL2);
     rshift128(&y, c, SR2);
-    r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK2) ^ y.u[0] 
+    r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK2) ^ y.u[0]
 	^ (d->u[0] << SL1);
-    r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK1) ^ y.u[1] 
+    r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK1) ^ y.u[1]
 	^ (d->u[1] << SL1);
-    r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK4) ^ y.u[2] 
+    r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK4) ^ y.u[2]
 	^ (d->u[2] << SL1);
-    r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK3) ^ y.u[3] 
+    r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK3) ^ y.u[3]
 	^ (d->u[3] << SL1);
 }
 #else
-JEMALLOC_INLINE_C void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c,
+static inline void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c,
 				w128_t *d) {
     w128_t x;
     w128_t y;
 
     lshift128(&x, a, SL2);
     rshift128(&y, c, SR2);
-    r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK1) ^ y.u[0] 
+    r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK1) ^ y.u[0]
 	^ (d->u[0] << SL1);
-    r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK2) ^ y.u[1] 
+    r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK2) ^ y.u[1]
 	^ (d->u[1] << SL1);
-    r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK3) ^ y.u[2] 
+    r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK3) ^ y.u[2]
 	^ (d->u[2] << SL1);
-    r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK4) ^ y.u[3] 
+    r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK4) ^ y.u[3]
 	^ (d->u[3] << SL1);
 }
 #endif
@@ -282,7 +282,7 @@ JEMALLOC_INLINE_C void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c,
  * This function fills the internal state array with pseudorandom
  * integers.
  */
-JEMALLOC_INLINE_C void gen_rand_all(sfmt_t *ctx) {
+static inline void gen_rand_all(sfmt_t *ctx) {
     int i;
     w128_t *r1, *r2;
 
@@ -306,10 +306,10 @@ JEMALLOC_INLINE_C void gen_rand_all(sfmt_t *ctx) {
  * This function fills the user-specified array with pseudorandom
  * integers.
  *
- * @param array an 128-bit array to be filled by pseudorandom numbers.  
+ * @param array an 128-bit array to be filled by pseudorandom numbers.
  * @param size number of 128-bit pseudorandom numbers to be generated.
  */
-JEMALLOC_INLINE_C void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
+static inline void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
     int i, j;
     w128_t *r1, *r2;
 
@@ -343,7 +343,7 @@ JEMALLOC_INLINE_C void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
 #endif
 
 #if defined(BIG_ENDIAN64) && !defined(ONLY64) && !defined(HAVE_ALTIVEC)
-JEMALLOC_INLINE_C void swap(w128_t *array, int size) {
+static inline void swap(w128_t *array, int size) {
     int i;
     uint32_t x, y;
 
@@ -476,7 +476,7 @@ uint32_t gen_rand32_range(sfmt_t *ctx, uint32_t limit) {
  * This function generates and returns 64-bit pseudorandom number.
  * init_gen_rand or init_by_array must be called before this function.
  * The function gen_rand64 should not be called after gen_rand32,
- * unless an initialization is again executed. 
+ * unless an initialization is again executed.
  * @return 64-bit pseudorandom number
  */
 uint64_t gen_rand64(sfmt_t *ctx) {
@@ -618,7 +618,7 @@ sfmt_t *init_gen_rand(uint32_t seed) {
 
     psfmt32[idxof(0)] = seed;
     for (i = 1; i < N32; i++) {
-	psfmt32[idxof(i)] = 1812433253UL * (psfmt32[idxof(i - 1)] 
+	psfmt32[idxof(i)] = 1812433253UL * (psfmt32[idxof(i - 1)]
 					    ^ (psfmt32[idxof(i - 1)] >> 30))
 	    + i;
     }
@@ -668,7 +668,7 @@ sfmt_t *init_by_array(uint32_t *init_key, int key_length) {
     } else {
 	count = N32;
     }
-    r = func1(psfmt32[idxof(0)] ^ psfmt32[idxof(mid)] 
+    r = func1(psfmt32[idxof(0)] ^ psfmt32[idxof(mid)]
 	      ^ psfmt32[idxof(N32 - 1)]);
     psfmt32[idxof(mid)] += r;
     r += key_length;
@@ -677,7 +677,7 @@ sfmt_t *init_by_array(uint32_t *init_key, int key_length) {
 
     count--;
     for (i = 1, j = 0; (j < count) && (j < key_length); j++) {
-	r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % N32)] 
+	r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % N32)]
 		  ^ psfmt32[idxof((i + N32 - 1) % N32)]);
 	psfmt32[idxof((i + mid) % N32)] += r;
 	r += init_key[j] + i;
@@ -686,7 +686,7 @@ sfmt_t *init_by_array(uint32_t *init_key, int key_length) {
 	i = (i + 1) % N32;
     }
     for (; j < count; j++) {
-	r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % N32)] 
+	r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % N32)]
 		  ^ psfmt32[idxof((i + N32 - 1) % N32)]);
 	psfmt32[idxof((i + mid) % N32)] += r;
 	r += i;
@@ -695,7 +695,7 @@ sfmt_t *init_by_array(uint32_t *init_key, int key_length) {
 	i = (i + 1) % N32;
     }
     for (j = 0; j < N32; j++) {
-	r = func2(psfmt32[idxof(i)] + psfmt32[idxof((i + mid) % N32)] 
+	r = func2(psfmt32[idxof(i)] + psfmt32[idxof((i + mid) % N32)]
 		  + psfmt32[idxof((i + N32 - 1) % N32)]);
 	psfmt32[idxof((i + mid) % N32)] ^= r;
 	r -= i;
diff --git a/test/stress/microbench.c b/test/stress/microbench.c
index 73cbcab0..988b7938 100644
--- a/test/stress/microbench.c
+++ b/test/stress/microbench.c
@@ -1,6 +1,6 @@
 #include "test/jemalloc_test.h"
 
-JEMALLOC_INLINE_C void
+static inline void
 time_func(timedelta_t *timer, uint64_t nwarmup, uint64_t niter,
     void (*func)(void)) {
 	uint64_t i;

From fa3ad730c492c50f19fc68050ea5d5175b1df3cb Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 19 Apr 2017 12:48:50 -0700
Subject: [PATCH 0825/2608] Header refactoring: prng module - remove from the
 catchall and unify.

---
 include/jemalloc/internal/extent_inlines.h    |  1 +
 .../internal/jemalloc_internal_includes.h     |  2 -
 .../internal/{prng_inlines.h => prng.h}       | 50 +++++++++++++++++--
 include/jemalloc/internal/prng_types.h        | 29 -----------
 include/jemalloc/internal/prof_structs.h      |  1 +
 src/ckh.c                                     |  1 +
 6 files changed, 49 insertions(+), 35 deletions(-)
 rename include/jemalloc/internal/{prng_inlines.h => prng.h} (65%)
 delete mode 100644 include/jemalloc/internal/prng_types.h

diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index 22d45ce1..a73b6530 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_EXTENT_INLINES_H
 #define JEMALLOC_INTERNAL_EXTENT_INLINES_H
 
+#include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/ql.h"
 
 static inline arena_t *
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index 669194d0..5e80f96c 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -40,7 +40,6 @@
 /* TYPES */
 /******************************************************************************/
 
-#include "jemalloc/internal/prng_types.h"
 #include "jemalloc/internal/ticker_types.h"
 #include "jemalloc/internal/ckh_types.h"
 #include "jemalloc/internal/size_classes.h"
@@ -108,7 +107,6 @@
 /* INLINES */
 /******************************************************************************/
 
-#include "jemalloc/internal/prng_inlines.h"
 #include "jemalloc/internal/ticker_inlines.h"
 #include "jemalloc/internal/tsd_inlines.h"
 #include "jemalloc/internal/witness_inlines.h"
diff --git a/include/jemalloc/internal/prng_inlines.h b/include/jemalloc/internal/prng.h
similarity index 65%
rename from include/jemalloc/internal/prng_inlines.h
rename to include/jemalloc/internal/prng.h
index 0275dfc4..15cc2d18 100644
--- a/include/jemalloc/internal/prng_inlines.h
+++ b/include/jemalloc/internal/prng.h
@@ -1,9 +1,37 @@
-#ifndef JEMALLOC_INTERNAL_PRNG_INLINES_H
-#define JEMALLOC_INTERNAL_PRNG_INLINES_H
+#ifndef JEMALLOC_INTERNAL_PRNG_H
+#define JEMALLOC_INTERNAL_PRNG_H
 
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/bit_util.h"
 
+/*
+ * Simple linear congruential pseudo-random number generator:
+ *
+ *   prng(y) = (a*x + c) % m
+ *
+ * where the following constants ensure maximal period:
+ *
+ *   a == Odd number (relatively prime to 2^n), and (a-1) is a multiple of 4.
+ *   c == Odd number (relatively prime to 2^n).
+ *   m == 2^32
+ *
+ * See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints.
+ *
+ * This choice of m has the disadvantage that the quality of the bits is
+ * proportional to bit position.  For example, the lowest bit has a cycle of 2,
+ * the next has a cycle of 4, etc.  For this reason, we prefer to use the upper
+ * bits.
+ */
+
+/******************************************************************************/
+/* INTERNAL DEFINITIONS -- IGNORE */
+/******************************************************************************/
+#define PRNG_A_32	UINT32_C(1103515241)
+#define PRNG_C_32	UINT32_C(12347)
+
+#define PRNG_A_64	UINT64_C(6364136223846793005)
+#define PRNG_C_64	UINT64_C(1442695040888963407)
+
 JEMALLOC_ALWAYS_INLINE uint32_t
 prng_state_next_u32(uint32_t state) {
 	return (state * PRNG_A_32) + PRNG_C_32;
@@ -25,6 +53,16 @@ prng_state_next_zu(size_t state) {
 #endif
 }
 
+/******************************************************************************/
+/* BEGIN PUBLIC API */
+/******************************************************************************/
+
+/*
+ * The prng_lg_range functions give a uniform int in the half-open range [0,
+ * 2**lg_range).  If atomic is true, they do so safely from multiple threads.
+ * Multithreaded 64-bit prngs aren't supported.
+ */
+
 JEMALLOC_ALWAYS_INLINE uint32_t
 prng_lg_range_u32(atomic_u32_t *state, unsigned lg_range, bool atomic) {
 	uint32_t ret, state0, state1;
@@ -48,7 +86,6 @@ prng_lg_range_u32(atomic_u32_t *state, unsigned lg_range, bool atomic) {
 	return ret;
 }
 
-/* 64-bit atomic operations cannot be supported on all relevant platforms. */
 JEMALLOC_ALWAYS_INLINE uint64_t
 prng_lg_range_u64(uint64_t *state, unsigned lg_range) {
 	uint64_t ret, state1;
@@ -86,6 +123,11 @@ prng_lg_range_zu(atomic_zu_t *state, unsigned lg_range, bool atomic) {
 	return ret;
 }
 
+/*
+ * The prng_range functions behave like the prng_lg_range, but return a result
+ * in [0, range) instead of [0, 2**lg_range).
+ */
+
 JEMALLOC_ALWAYS_INLINE uint32_t
 prng_range_u32(atomic_u32_t *state, uint32_t range, bool atomic) {
 	uint32_t ret;
@@ -140,4 +182,4 @@ prng_range_zu(atomic_zu_t *state, size_t range, bool atomic) {
 	return ret;
 }
 
-#endif /* JEMALLOC_INTERNAL_PRNG_INLINES_H */
+#endif /* JEMALLOC_INTERNAL_PRNG_H */
diff --git a/include/jemalloc/internal/prng_types.h b/include/jemalloc/internal/prng_types.h
deleted file mode 100644
index 3e8e1834..00000000
--- a/include/jemalloc/internal/prng_types.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_PRNG_TYPES_H
-#define JEMALLOC_INTERNAL_PRNG_TYPES_H
-
-/*
- * Simple linear congruential pseudo-random number generator:
- *
- *   prng(y) = (a*x + c) % m
- *
- * where the following constants ensure maximal period:
- *
- *   a == Odd number (relatively prime to 2^n), and (a-1) is a multiple of 4.
- *   c == Odd number (relatively prime to 2^n).
- *   m == 2^32
- *
- * See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints.
- *
- * This choice of m has the disadvantage that the quality of the bits is
- * proportional to bit position.  For example, the lowest bit has a cycle of 2,
- * the next has a cycle of 4, etc.  For this reason, we prefer to use the upper
- * bits.
- */
-
-#define PRNG_A_32	UINT32_C(1103515241)
-#define PRNG_C_32	UINT32_C(12347)
-
-#define PRNG_A_64	UINT64_C(6364136223846793005)
-#define PRNG_C_64	UINT64_C(1442695040888963407)
-
-#endif /* JEMALLOC_INTERNAL_PRNG_TYPES_H */
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index e1936769..82080aa1 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_PROF_STRUCTS_H
 #define JEMALLOC_INTERNAL_PROF_STRUCTS_H
 
+#include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/rb.h"
 
 struct prof_bt_s {
diff --git a/src/ckh.c b/src/ckh.c
index 6576740b..db52a845 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -40,6 +40,7 @@
 
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/malloc_io.h"
+#include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/util.h"
 
 /******************************************************************************/

From bf2dc7e67840807fb90451ab34e7150401f7f7c4 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 19 Apr 2017 13:39:33 -0700
Subject: [PATCH 0826/2608] Header refactoring: ticker module - remove from the
 catchall and unify.

---
 include/jemalloc/internal/arena_inlines_b.h   |  1 +
 include/jemalloc/internal/arena_structs_b.h   |  1 +
 .../internal/jemalloc_internal_includes.h     |  3 ---
 .../internal/jemalloc_internal_inlines_a.h    |  1 +
 include/jemalloc/internal/tcache_inlines.h    |  1 +
 include/jemalloc/internal/tcache_structs.h    |  1 +
 .../internal/{ticker_inlines.h => ticker.h}   | 19 ++++++++++++++++---
 include/jemalloc/internal/ticker_structs.h    |  9 ---------
 include/jemalloc/internal/ticker_types.h      |  6 ------
 src/jemalloc.c                                |  1 +
 test/unit/decay.c                             |  2 ++
 test/unit/ticker.c                            |  2 ++
 12 files changed, 26 insertions(+), 21 deletions(-)
 rename include/jemalloc/internal/{ticker_inlines.h => ticker.h} (52%)
 delete mode 100644 include/jemalloc/internal/ticker_structs.h
 delete mode 100644 include/jemalloc/internal/ticker_types.h

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 526103bc..054757d4 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_ARENA_INLINES_B_H
 
 #include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/ticker.h"
 
 static inline szind_t
 arena_bin_index(arena_t *arena, arena_bin_t *bin) {
diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index f2195f68..7b133f2e 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -4,6 +4,7 @@
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/ticker.h"
 
 /*
  * Read-only information associated with each element of arena_t's bins array
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index 5e80f96c..8ce7864f 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -40,7 +40,6 @@
 /* TYPES */
 /******************************************************************************/
 
-#include "jemalloc/internal/ticker_types.h"
 #include "jemalloc/internal/ckh_types.h"
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/smoothstep.h"
@@ -63,7 +62,6 @@
 /* STRUCTS */
 /******************************************************************************/
 
-#include "jemalloc/internal/ticker_structs.h"
 #include "jemalloc/internal/ckh_structs.h"
 #include "jemalloc/internal/witness_structs.h"
 #include "jemalloc/internal/mutex_structs.h"
@@ -107,7 +105,6 @@
 /* INLINES */
 /******************************************************************************/
 
-#include "jemalloc/internal/ticker_inlines.h"
 #include "jemalloc/internal/tsd_inlines.h"
 #include "jemalloc/internal/witness_inlines.h"
 #include "jemalloc/internal/mutex_inlines.h"
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index 9cb933c2..06a5c717 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -4,6 +4,7 @@
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/bit_util.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/ticker.h"
 
 JEMALLOC_ALWAYS_INLINE pszind_t
 psz2ind(size_t psz) {
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 8b42af66..25931d82 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_TCACHE_INLINES_H
 
 #include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/util.h"
 
 static inline bool
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index fe27f362..c01098f1 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_TCACHE_STRUCTS_H
 
 #include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/ticker.h"
 
 /*
  * Read-only information associated with each element of tcache_t's tbins array
diff --git a/include/jemalloc/internal/ticker_inlines.h b/include/jemalloc/internal/ticker.h
similarity index 52%
rename from include/jemalloc/internal/ticker_inlines.h
rename to include/jemalloc/internal/ticker.h
index cd5821f9..faaac91d 100644
--- a/include/jemalloc/internal/ticker_inlines.h
+++ b/include/jemalloc/internal/ticker.h
@@ -1,5 +1,18 @@
-#ifndef JEMALLOC_INTERNAL_TICKER_INLINES_H
-#define JEMALLOC_INTERNAL_TICKER_INLINES_H
+#ifndef JEMALLOC_INTERNAL_TICKER_H
+#define JEMALLOC_INTERNAL_TICKER_H
+
+/**
+ * A ticker makes it easy to count-down events until some limit.  You
+ * ticker_init the ticker to trigger every nticks events.  You then notify it
+ * that an event has occurred with calls to ticker_tick (or that nticks events
+ * have occurred with a call to ticker_ticks), which will return true (and reset
+ * the counter) if the countdown hit zero.
+ */
+
+typedef struct {
+	int32_t tick;
+	int32_t nticks;
+} ticker_t;
 
 static inline void
 ticker_init(ticker_t *ticker, int32_t nticks) {
@@ -32,4 +45,4 @@ ticker_tick(ticker_t *ticker) {
 	return ticker_ticks(ticker, 1);
 }
 
-#endif /* JEMALLOC_INTERNAL_TICKER_INLINES_H */
+#endif /* JEMALLOC_INTERNAL_TICKER_H */
diff --git a/include/jemalloc/internal/ticker_structs.h b/include/jemalloc/internal/ticker_structs.h
deleted file mode 100644
index e30c4e21..00000000
--- a/include/jemalloc/internal/ticker_structs.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_TICKER_STRUCTS_H
-#define JEMALLOC_INTERNAL_TICKER_STRUCTS_H
-
-struct ticker_s {
-	int32_t	tick;
-	int32_t	nticks;
-};
-
-#endif /* JEMALLOC_INTERNAL_TICKER_STRUCTS_H */
diff --git a/include/jemalloc/internal/ticker_types.h b/include/jemalloc/internal/ticker_types.h
deleted file mode 100644
index 62d67f3d..00000000
--- a/include/jemalloc/internal/ticker_types.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_TICKER_TYPES_H
-#define JEMALLOC_INTERNAL_TICKER_TYPES_H
-
-typedef struct ticker_s ticker_t;
-
-#endif /* JEMALLOC_INTERNAL_TICKER_TYPES_H */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 51194992..108258bd 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -7,6 +7,7 @@
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/spin.h"
+#include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/util.h"
 
 /******************************************************************************/
diff --git a/test/unit/decay.c b/test/unit/decay.c
index 26359faf..389f6e06 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/ticker.h"
+
 static nstime_monotonic_t *nstime_monotonic_orig;
 static nstime_update_t *nstime_update_orig;
 
diff --git a/test/unit/ticker.c b/test/unit/ticker.c
index c2ad7295..e5790a31 100644
--- a/test/unit/ticker.c
+++ b/test/unit/ticker.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/ticker.h"
+
 TEST_BEGIN(test_ticker_tick) {
 #define NREPS 2
 #define NTICKS 3

From 68da2361d2cbb4cc34a784093bd2bee94bede543 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 19 Apr 2017 14:56:42 -0700
Subject: [PATCH 0827/2608] Header refactoring: ckh module - remove from the
 catchall and unify.

---
 include/jemalloc/internal/ckh.h               | 101 ++++++++++++++++++
 include/jemalloc/internal/ckh_externs.h       |  18 ----
 include/jemalloc/internal/ckh_structs.h       |  41 -------
 include/jemalloc/internal/ckh_types.h         |  22 ----
 .../internal/jemalloc_internal_includes.h     |   3 -
 include/jemalloc/internal/prof_structs.h      |   1 +
 src/ckh.c                                     |   3 +
 src/prof.c                                    |   1 +
 8 files changed, 106 insertions(+), 84 deletions(-)
 create mode 100644 include/jemalloc/internal/ckh.h
 delete mode 100644 include/jemalloc/internal/ckh_externs.h
 delete mode 100644 include/jemalloc/internal/ckh_structs.h
 delete mode 100644 include/jemalloc/internal/ckh_types.h

diff --git a/include/jemalloc/internal/ckh.h b/include/jemalloc/internal/ckh.h
new file mode 100644
index 00000000..96922e04
--- /dev/null
+++ b/include/jemalloc/internal/ckh.h
@@ -0,0 +1,101 @@
+#ifndef JEMALLOC_INTERNAL_CKH_H
+#define JEMALLOC_INTERNAL_CKH_H
+
+#include "jemalloc/internal/tsd_types.h"
+
+/* Cuckoo hashing implementation.  Skip to the end for the interface. */
+
+/******************************************************************************/
+/* INTERNAL DEFINITIONS -- IGNORE */
+/******************************************************************************/
+
+/* Maintain counters used to get an idea of performance. */
+/* #define CKH_COUNT */
+/* Print counter values in ckh_delete() (requires CKH_COUNT). */
+/* #define CKH_VERBOSE */
+
+/*
+ * There are 2^LG_CKH_BUCKET_CELLS cells in each hash table bucket.  Try to fit
+ * one bucket per L1 cache line.
+ */
+#define LG_CKH_BUCKET_CELLS (LG_CACHELINE - LG_SIZEOF_PTR - 1)
+
+/* Typedefs to allow easy function pointer passing. */
+typedef void ckh_hash_t (const void *, size_t[2]);
+typedef bool ckh_keycomp_t (const void *, const void *);
+
+/* Hash table cell. */
+typedef struct {
+	const void *key;
+	const void *data;
+} ckhc_t;
+
+/* The hash table itself. */
+typedef struct {
+#ifdef CKH_COUNT
+	/* Counters used to get an idea of performance. */
+	uint64_t ngrows;
+	uint64_t nshrinks;
+	uint64_t nshrinkfails;
+	uint64_t ninserts;
+	uint64_t nrelocs;
+#endif
+
+	/* Used for pseudo-random number generation. */
+	uint64_t prng_state;
+
+	/* Total number of items. */
+	size_t count;
+
+	/*
+	 * Minimum and current number of hash table buckets.  There are
+	 * 2^LG_CKH_BUCKET_CELLS cells per bucket.
+	 */
+	unsigned lg_minbuckets;
+	unsigned lg_curbuckets;
+
+	/* Hash and comparison functions. */
+	ckh_hash_t *hash;
+	ckh_keycomp_t *keycomp;
+
+	/* Hash table with 2^lg_curbuckets buckets. */
+	ckhc_t *tab;
+} ckh_t;
+
+/******************************************************************************/
+/* BEGIN PUBLIC API */
+/******************************************************************************/
+
+/* Lifetime management.  Minitems is the initial capacity. */
+bool ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
+    ckh_keycomp_t *keycomp);
+void ckh_delete(tsd_t *tsd, ckh_t *ckh);
+
+/* Get the number of elements in the set. */
+size_t ckh_count(ckh_t *ckh);
+
+/*
+ * To iterate over the elements in the table, initialize *tabind to 0 and call
+ * this function until it returns true.  Each call that returns false will
+ * update *key and *data to the next element in the table, assuming the pointers
+ * are non-NULL.
+ */
+bool ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data);
+
+/*
+ * Basic hash table operations -- insert, removal, lookup.  For ckh_remove and
+ * ckh_search, key or data can be NULL.  The hash-table only stores pointers to
+ * the key and value, and doesn't do any lifetime management.
+ */
+bool ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data);
+bool ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
+    void **data);
+bool ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data);
+
+/* Some useful hash and comparison functions for strings and pointers. */
+void ckh_string_hash(const void *key, size_t r_hash[2]);
+bool ckh_string_keycomp(const void *k1, const void *k2);
+void ckh_pointer_hash(const void *key, size_t r_hash[2]);
+bool ckh_pointer_keycomp(const void *k1, const void *k2);
+
+#endif /* JEMALLOC_INTERNAL_CKH_H */
diff --git a/include/jemalloc/internal/ckh_externs.h b/include/jemalloc/internal/ckh_externs.h
deleted file mode 100644
index c912f72b..00000000
--- a/include/jemalloc/internal/ckh_externs.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_CKH_EXTERNS_H
-#define JEMALLOC_INTERNAL_CKH_EXTERNS_H
-
-bool	ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
-    ckh_keycomp_t *keycomp);
-void	ckh_delete(tsd_t *tsd, ckh_t *ckh);
-size_t	ckh_count(ckh_t *ckh);
-bool	ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data);
-bool	ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data);
-bool	ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
-    void **data);
-bool	ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data);
-void	ckh_string_hash(const void *key, size_t r_hash[2]);
-bool	ckh_string_keycomp(const void *k1, const void *k2);
-void	ckh_pointer_hash(const void *key, size_t r_hash[2]);
-bool	ckh_pointer_keycomp(const void *k1, const void *k2);
-
-#endif /* JEMALLOC_INTERNAL_CKH_EXTERNS_H */
diff --git a/include/jemalloc/internal/ckh_structs.h b/include/jemalloc/internal/ckh_structs.h
deleted file mode 100644
index a800cbc2..00000000
--- a/include/jemalloc/internal/ckh_structs.h
+++ /dev/null
@@ -1,41 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_CKH_STRUCTS_H
-#define JEMALLOC_INTERNAL_CKH_STRUCTS_H
-
-/* Hash table cell. */
-struct ckhc_s {
-	const void	*key;
-	const void	*data;
-};
-
-struct ckh_s {
-#ifdef CKH_COUNT
-	/* Counters used to get an idea of performance. */
-	uint64_t	ngrows;
-	uint64_t	nshrinks;
-	uint64_t	nshrinkfails;
-	uint64_t	ninserts;
-	uint64_t	nrelocs;
-#endif
-
-	/* Used for pseudo-random number generation. */
-	uint64_t	prng_state;
-
-	/* Total number of items. */
-	size_t		count;
-
-	/*
-	 * Minimum and current number of hash table buckets.  There are
-	 * 2^LG_CKH_BUCKET_CELLS cells per bucket.
-	 */
-	unsigned	lg_minbuckets;
-	unsigned	lg_curbuckets;
-
-	/* Hash and comparison functions. */
-	ckh_hash_t	*hash;
-	ckh_keycomp_t	*keycomp;
-
-	/* Hash table with 2^lg_curbuckets buckets. */
-	ckhc_t		*tab;
-};
-
-#endif /* JEMALLOC_INTERNAL_CKH_STRUCTS_H */
diff --git a/include/jemalloc/internal/ckh_types.h b/include/jemalloc/internal/ckh_types.h
deleted file mode 100644
index b5911db4..00000000
--- a/include/jemalloc/internal/ckh_types.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_CKH_TYPES_H
-#define JEMALLOC_INTERNAL_CKH_TYPES_H
-
-typedef struct ckh_s ckh_t;
-typedef struct ckhc_s ckhc_t;
-
-/* Typedefs to allow easy function pointer passing. */
-typedef void ckh_hash_t (const void *, size_t[2]);
-typedef bool ckh_keycomp_t (const void *, const void *);
-
-/* Maintain counters used to get an idea of performance. */
-/* #define CKH_COUNT */
-/* Print counter values in ckh_delete() (requires CKH_COUNT). */
-/* #define CKH_VERBOSE */
-
-/*
- * There are 2^LG_CKH_BUCKET_CELLS cells in each hash table bucket.  Try to fit
- * one bucket per L1 cache line.
- */
-#define LG_CKH_BUCKET_CELLS (LG_CACHELINE - LG_SIZEOF_PTR - 1)
-
-#endif /* JEMALLOC_INTERNAL_CKH_TYPES_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index 8ce7864f..f4a19862 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -40,7 +40,6 @@
 /* TYPES */
 /******************************************************************************/
 
-#include "jemalloc/internal/ckh_types.h"
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/smoothstep.h"
 #include "jemalloc/internal/stats_types.h"
@@ -62,7 +61,6 @@
 /* STRUCTS */
 /******************************************************************************/
 
-#include "jemalloc/internal/ckh_structs.h"
 #include "jemalloc/internal/witness_structs.h"
 #include "jemalloc/internal/mutex_structs.h"
 #include "jemalloc/internal/stats_structs.h"
@@ -83,7 +81,6 @@
 /******************************************************************************/
 
 #include "jemalloc/internal/jemalloc_internal_externs.h"
-#include "jemalloc/internal/ckh_externs.h"
 #include "jemalloc/internal/stats_externs.h"
 #include "jemalloc/internal/ctl_externs.h"
 #include "jemalloc/internal/witness_externs.h"
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index 82080aa1..a26a0420 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_PROF_STRUCTS_H
 #define JEMALLOC_INTERNAL_PROF_STRUCTS_H
 
+#include "jemalloc/internal/ckh.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/rb.h"
 
diff --git a/src/ckh.c b/src/ckh.c
index db52a845..8f0bac07 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -36,6 +36,9 @@
  ******************************************************************************/
 #define JEMALLOC_CKH_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
+
+#include "jemalloc/internal/ckh.h"
+
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
diff --git a/src/prof.c b/src/prof.c
index 99a4c8f0..a8f6aed2 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -3,6 +3,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/ckh.h"
 #include "jemalloc/internal/malloc_io.h"
 
 /******************************************************************************/

From 31b43219dbf397f273350a66a3a594fdfbaa1e00 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 19 Apr 2017 15:09:01 -0700
Subject: [PATCH 0828/2608] Header refactoring: size_classes module - remove
 from the catchall

---
 include/jemalloc/internal/arena_externs.h               | 2 ++
 include/jemalloc/internal/arena_inlines_b.h             | 1 +
 include/jemalloc/internal/arena_structs_b.h             | 2 ++
 include/jemalloc/internal/base_structs.h                | 3 +++
 include/jemalloc/internal/bitmap_types.h                | 2 ++
 include/jemalloc/internal/ctl_structs.h                 | 2 ++
 include/jemalloc/internal/extent_structs.h              | 1 +
 include/jemalloc/internal/jemalloc_internal_includes.h  | 1 -
 include/jemalloc/internal/jemalloc_internal_inlines_a.h | 1 +
 include/jemalloc/internal/rtree_inlines.h               | 1 +
 include/jemalloc/internal/rtree_types.h                 | 2 ++
 include/jemalloc/internal/stats_structs.h               | 1 +
 include/jemalloc/internal/tcache_externs.h              | 2 ++
 include/jemalloc/internal/tcache_inlines.h              | 1 +
 include/jemalloc/internal/tcache_structs.h              | 1 +
 include/jemalloc/internal/tcache_types.h                | 2 ++
 src/arena.c                                             | 1 +
 src/ctl.c                                               | 1 +
 src/jemalloc.c                                          | 1 +
 src/tcache.c                                            | 1 +
 20 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 0f86dc05..241165ec 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_EXTERNS_H
 #define JEMALLOC_INTERNAL_ARENA_EXTERNS_H
 
+#include "jemalloc/internal/size_classes.h"
+
 static const size_t	large_pad =
 #ifdef JEMALLOC_CACHE_OBLIVIOUS
     PAGE
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 054757d4..ca7af7fd 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_ARENA_INLINES_B_H
 
 #include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/ticker.h"
 
 static inline szind_t
diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index 7b133f2e..dbff7876 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -2,8 +2,10 @@
 #define JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H
 
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/ticker.h"
 
 /*
diff --git a/include/jemalloc/internal/base_structs.h b/include/jemalloc/internal/base_structs.h
index d79f38ee..1d0a1f3a 100644
--- a/include/jemalloc/internal/base_structs.h
+++ b/include/jemalloc/internal/base_structs.h
@@ -1,6 +1,9 @@
 #ifndef JEMALLOC_INTERNAL_BASE_STRUCTS_H
 #define JEMALLOC_INTERNAL_BASE_STRUCTS_H
 
+#include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/size_classes.h"
+
 /* Embedded at the beginning of every block of base-managed virtual memory. */
 struct base_block_s {
 	/* Total size of block's virtual memory mapping. */
diff --git a/include/jemalloc/internal/bitmap_types.h b/include/jemalloc/internal/bitmap_types.h
index b334769f..95f0dd12 100644
--- a/include/jemalloc/internal/bitmap_types.h
+++ b/include/jemalloc/internal/bitmap_types.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_BITMAP_TYPES_H
 #define JEMALLOC_INTERNAL_BITMAP_TYPES_H
 
+#include "jemalloc/internal/size_classes.h"
+
 /* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */
 #if LG_SLAB_MAXREGS > LG_CEIL_NSIZES
 /* Maximum bitmap bit count is determined by maximum regions per slab. */
diff --git a/include/jemalloc/internal/ctl_structs.h b/include/jemalloc/internal/ctl_structs.h
index 2b48a68e..b0c37c9e 100644
--- a/include/jemalloc/internal/ctl_structs.h
+++ b/include/jemalloc/internal/ctl_structs.h
@@ -1,7 +1,9 @@
 #ifndef JEMALLOC_INTERNAL_CTL_STRUCTS_H
 #define JEMALLOC_INTERNAL_CTL_STRUCTS_H
 
+#include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/size_classes.h"
 
 struct ctl_node_s {
 	bool			named;
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index 7066b8f6..aa0a3a75 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -5,6 +5,7 @@
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/rb.h"
 #include "jemalloc/internal/ph.h"
+#include "jemalloc/internal/size_classes.h"
 
 typedef enum {
 	extent_state_active   = 0,
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index f4a19862..3794c34c 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -40,7 +40,6 @@
 /* TYPES */
 /******************************************************************************/
 
-#include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/smoothstep.h"
 #include "jemalloc/internal/stats_types.h"
 #include "jemalloc/internal/ctl_types.h"
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index 06a5c717..1755c3ac 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -4,6 +4,7 @@
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/bit_util.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/ticker.h"
 
 JEMALLOC_ALWAYS_INLINE pszind_t
diff --git a/include/jemalloc/internal/rtree_inlines.h b/include/jemalloc/internal/rtree_inlines.h
index f4f7c2ca..7bc52383 100644
--- a/include/jemalloc/internal/rtree_inlines.h
+++ b/include/jemalloc/internal/rtree_inlines.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_RTREE_INLINES_H
 #define JEMALLOC_INTERNAL_RTREE_INLINES_H
 
+#include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/spin.h"
 
 JEMALLOC_ALWAYS_INLINE uintptr_t
diff --git a/include/jemalloc/internal/rtree_types.h b/include/jemalloc/internal/rtree_types.h
index d9a4cf4d..402f741c 100644
--- a/include/jemalloc/internal/rtree_types.h
+++ b/include/jemalloc/internal/rtree_types.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_RTREE_TYPES_H
 #define JEMALLOC_INTERNAL_RTREE_TYPES_H
 
+#include "jemalloc/internal/size_classes.h"
+
 /*
  * This radix tree implementation is tailored to the singular purpose of
  * associating metadata with extents that are currently owned by jemalloc.
diff --git a/include/jemalloc/internal/stats_structs.h b/include/jemalloc/internal/stats_structs.h
index dc994b52..3693a854 100644
--- a/include/jemalloc/internal/stats_structs.h
+++ b/include/jemalloc/internal/stats_structs.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_STATS_STRUCTS_H
 
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/size_classes.h"
 
 #ifdef JEMALLOC_ATOMIC_U64
 typedef atomic_u64_t arena_stats_u64_t;
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 75ff3214..abe133fa 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_TCACHE_EXTERNS_H
 #define JEMALLOC_INTERNAL_TCACHE_EXTERNS_H
 
+#include "jemalloc/internal/size_classes.h"
+
 extern bool	opt_tcache;
 extern ssize_t	opt_lg_tcache_max;
 
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 25931d82..8a65ba2b 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_TCACHE_INLINES_H
 
 #include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/util.h"
 
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index c01098f1..cd0cea55 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_TCACHE_STRUCTS_H
 
 #include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/ticker.h"
 
 /*
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index a60db6ff..1155d62c 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_TCACHE_TYPES_H
 #define JEMALLOC_INTERNAL_TCACHE_TYPES_H
 
+#include "jemalloc/internal/size_classes.h"
+
 typedef struct tcache_bin_info_s tcache_bin_info_t;
 typedef struct tcache_bin_s tcache_bin_t;
 typedef struct tcache_s tcache_t;
diff --git a/src/arena.c b/src/arena.c
index 40561c03..77f72828 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -3,6 +3,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/util.h"
 
 /******************************************************************************/
diff --git a/src/ctl.c b/src/ctl.c
index 72372d55..c054ded6 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -4,6 +4,7 @@
 
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/nstime.h"
+#include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/util.h"
 
 /******************************************************************************/
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 108258bd..602cf677 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -6,6 +6,7 @@
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/malloc_io.h"
+#include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/spin.h"
 #include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/util.h"
diff --git a/src/tcache.c b/src/tcache.c
index 72d1e47f..c272a3c4 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -3,6 +3,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/size_classes.h"
 
 /******************************************************************************/
 /* Data. */

From 36abf78aa924f5f038f94443357e89ae86ae3510 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 19 Apr 2017 15:15:57 -0700
Subject: [PATCH 0829/2608] Header refactoring: move smoothstep.h out of the
 catchall.

---
 include/jemalloc/internal/arena_structs_b.h            | 1 +
 include/jemalloc/internal/jemalloc_internal_includes.h | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index dbff7876..8a7d7a12 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -6,6 +6,7 @@
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/smoothstep.h"
 #include "jemalloc/internal/ticker.h"
 
 /*
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index 3794c34c..8b650fbf 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -40,7 +40,6 @@
 /* TYPES */
 /******************************************************************************/
 
-#include "jemalloc/internal/smoothstep.h"
 #include "jemalloc/internal/stats_types.h"
 #include "jemalloc/internal/ctl_types.h"
 #include "jemalloc/internal/witness_types.h"

From d6b5c7e0f6973fa95d398cb9df3f48ac205e1fc3 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 20 Apr 2017 13:38:12 -0700
Subject: [PATCH 0830/2608] Header refactoring: stats - unify and remove from
 catchall

---
 include/jemalloc/internal/arena_externs.h     |  1 +
 include/jemalloc/internal/arena_structs_b.h   |  1 +
 include/jemalloc/internal/ctl_structs.h       |  1 +
 .../internal/jemalloc_internal_includes.h     |  3 --
 .../internal/{stats_structs.h => stats.h}     | 38 ++++++++++++-------
 include/jemalloc/internal/stats_externs.h     |  9 -----
 include/jemalloc/internal/stats_types.h       | 10 -----
 include/jemalloc/internal/tcache_structs.h    |  1 +
 8 files changed, 29 insertions(+), 35 deletions(-)
 rename include/jemalloc/internal/{stats_structs.h => stats.h} (81%)
 delete mode 100644 include/jemalloc/internal/stats_externs.h
 delete mode 100644 include/jemalloc/internal/stats_types.h

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 241165ec..1e13efd3 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_ARENA_EXTERNS_H
 
 #include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/stats.h"
 
 static const size_t	large_pad =
 #ifdef JEMALLOC_CACHE_OBLIVIOUS
diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index 8a7d7a12..bef73aaf 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -7,6 +7,7 @@
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/smoothstep.h"
+#include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ticker.h"
 
 /*
diff --git a/include/jemalloc/internal/ctl_structs.h b/include/jemalloc/internal/ctl_structs.h
index b0c37c9e..c64820d2 100644
--- a/include/jemalloc/internal/ctl_structs.h
+++ b/include/jemalloc/internal/ctl_structs.h
@@ -4,6 +4,7 @@
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/stats.h"
 
 struct ctl_node_s {
 	bool			named;
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index 8b650fbf..0cb6183d 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -40,7 +40,6 @@
 /* TYPES */
 /******************************************************************************/
 
-#include "jemalloc/internal/stats_types.h"
 #include "jemalloc/internal/ctl_types.h"
 #include "jemalloc/internal/witness_types.h"
 #include "jemalloc/internal/mutex_types.h"
@@ -61,7 +60,6 @@
 
 #include "jemalloc/internal/witness_structs.h"
 #include "jemalloc/internal/mutex_structs.h"
-#include "jemalloc/internal/stats_structs.h"
 #include "jemalloc/internal/ctl_structs.h"
 #include "jemalloc/internal/bitmap_structs.h"
 #include "jemalloc/internal/arena_structs_a.h"
@@ -79,7 +77,6 @@
 /******************************************************************************/
 
 #include "jemalloc/internal/jemalloc_internal_externs.h"
-#include "jemalloc/internal/stats_externs.h"
 #include "jemalloc/internal/ctl_externs.h"
 #include "jemalloc/internal/witness_externs.h"
 #include "jemalloc/internal/mutex_externs.h"
diff --git a/include/jemalloc/internal/stats_structs.h b/include/jemalloc/internal/stats.h
similarity index 81%
rename from include/jemalloc/internal/stats_structs.h
rename to include/jemalloc/internal/stats.h
index 3693a854..9414200f 100644
--- a/include/jemalloc/internal/stats_structs.h
+++ b/include/jemalloc/internal/stats.h
@@ -1,9 +1,21 @@
-#ifndef JEMALLOC_INTERNAL_STATS_STRUCTS_H
-#define JEMALLOC_INTERNAL_STATS_STRUCTS_H
+#ifndef JEMALLOC_INTERNAL_STATS_H
+#define JEMALLOC_INTERNAL_STATS_H
 
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/size_classes.h"
 
+/* The opt.stats_print storage. */
+extern bool opt_stats_print;
+
+/* Implements je_malloc_stats_print. */
+void stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
+    const char *opts);
+
+/*
+ * In those architectures that support 64-bit atomics, we use atomic updates for
+ * our 64-bit values.  Otherwise, we use a plain uint64_t and synchronize
+ * externally.
+ */
 #ifdef JEMALLOC_ATOMIC_U64
 typedef atomic_u64_t arena_stats_u64_t;
 #else
@@ -11,15 +23,15 @@ typedef atomic_u64_t arena_stats_u64_t;
 typedef uint64_t arena_stats_u64_t;
 #endif
 
-struct tcache_bin_stats_s {
+typedef struct tcache_bin_stats_s {
 	/*
 	 * Number of allocation requests that corresponded to the size of this
 	 * bin.
 	 */
 	uint64_t	nrequests;
-};
+} tcache_bin_stats_t;
 
-struct malloc_bin_stats_s {
+typedef struct malloc_bin_stats_s {
 	/*
 	 * Total number of allocation/deallocation requests served directly by
 	 * the bin.  Note that tcache may allocate an object, then recycle it
@@ -61,9 +73,9 @@ struct malloc_bin_stats_s {
 	size_t		curslabs;
 
 	mutex_prof_data_t mutex_data;
-};
+} malloc_bin_stats_t;
 
-struct malloc_large_stats_s {
+typedef struct malloc_large_stats_s {
 	/*
 	 * Total number of allocation/deallocation requests served directly by
 	 * the arena.
@@ -80,23 +92,23 @@ struct malloc_large_stats_s {
 
 	/* Current number of allocations of this size class. */
 	size_t		curlextents; /* Derived. */
-};
+} malloc_large_stats_t;
 
-struct decay_stats_s {
+typedef struct decay_stats_s {
 	/* Total number of purge sweeps. */
 	arena_stats_u64_t	npurge;
 	/* Total number of madvise calls made. */
 	arena_stats_u64_t	nmadvise;
 	/* Total number of pages purged. */
 	arena_stats_u64_t	purged;
-};
+} decay_stats_t;
 
 /*
  * Arena stats.  Note that fields marked "derived" are not directly maintained
  * within the arena code; rather their values are derived during stats merge
  * requests.
  */
-struct arena_stats_s {
+typedef struct arena_stats_s {
 #ifndef JEMALLOC_ATOMIC_U64
 	malloc_mutex_t		mtx;
 #endif
@@ -131,6 +143,6 @@ struct arena_stats_s {
 
 	/* One element for each large size class. */
 	malloc_large_stats_t	lstats[NSIZES - NBINS];
-};
+} arena_stats_t;
 
-#endif /* JEMALLOC_INTERNAL_STATS_STRUCTS_H */
+#endif /* JEMALLOC_INTERNAL_STATS_H */
diff --git a/include/jemalloc/internal/stats_externs.h b/include/jemalloc/internal/stats_externs.h
deleted file mode 100644
index 519441c9..00000000
--- a/include/jemalloc/internal/stats_externs.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_STATS_EXTERNS_H
-#define JEMALLOC_INTERNAL_STATS_EXTERNS_H
-
-extern bool	opt_stats_print;
-
-void	stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *opts);
-
-#endif /* JEMALLOC_INTERNAL_STATS_EXTERNS_H */
diff --git a/include/jemalloc/internal/stats_types.h b/include/jemalloc/internal/stats_types.h
deleted file mode 100644
index 48483388..00000000
--- a/include/jemalloc/internal/stats_types.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_STATS_TYPES_H
-#define JEMALLOC_INTERNAL_STATS_TYPES_H
-
-typedef struct tcache_bin_stats_s tcache_bin_stats_t;
-typedef struct malloc_bin_stats_s malloc_bin_stats_t;
-typedef struct malloc_large_stats_s malloc_large_stats_t;
-typedef struct decay_stats_s decay_stats_t;
-typedef struct arena_stats_s arena_stats_t;
-
-#endif /* JEMALLOC_INTERNAL_STATS_TYPES_H */
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index cd0cea55..7c0afb0a 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -3,6 +3,7 @@
 
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ticker.h"
 
 /*

From 120c7a747fe9a94bd3bac88080789b7c98a760d1 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 20 Apr 2017 14:32:24 -0700
Subject: [PATCH 0831/2608] Header refactoring: bitmap - unify and remove from
 catchall.

---
 include/jemalloc/internal/arena_structs_a.h   |   2 +
 include/jemalloc/internal/arena_structs_b.h   |   1 +
 include/jemalloc/internal/bitmap.h            | 369 ++++++++++++++++++
 include/jemalloc/internal/bitmap_externs.h    |   8 -
 include/jemalloc/internal/bitmap_inlines.h    | 200 ----------
 include/jemalloc/internal/bitmap_structs.h    |  28 --
 include/jemalloc/internal/bitmap_types.h      | 147 -------
 include/jemalloc/internal/extent_structs.h    |   1 +
 .../internal/jemalloc_internal_includes.h     |   4 -
 9 files changed, 373 insertions(+), 387 deletions(-)
 create mode 100644 include/jemalloc/internal/bitmap.h
 delete mode 100644 include/jemalloc/internal/bitmap_externs.h
 delete mode 100644 include/jemalloc/internal/bitmap_inlines.h
 delete mode 100644 include/jemalloc/internal/bitmap_structs.h
 delete mode 100644 include/jemalloc/internal/bitmap_types.h

diff --git a/include/jemalloc/internal/arena_structs_a.h b/include/jemalloc/internal/arena_structs_a.h
index 07013a06..46aa77c8 100644
--- a/include/jemalloc/internal/arena_structs_a.h
+++ b/include/jemalloc/internal/arena_structs_a.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_STRUCTS_A_H
 #define JEMALLOC_INTERNAL_ARENA_STRUCTS_A_H
 
+#include "jemalloc/internal/bitmap.h"
+
 struct arena_slab_data_s {
 	/* Per region allocated/deallocated bitmap. */
 	bitmap_t	bitmap[BITMAP_GROUPS_MAX];
diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index bef73aaf..ecc59d3b 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H
 
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/bitmap.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/ql.h"
diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h
new file mode 100644
index 00000000..f6374e14
--- /dev/null
+++ b/include/jemalloc/internal/bitmap.h
@@ -0,0 +1,369 @@
+#ifndef JEMALLOC_INTERNAL_BITMAP_H
+#define JEMALLOC_INTERNAL_BITMAP_H
+
+#include "jemalloc/internal/arena_types.h"
+#include "jemalloc/internal/bit_util.h"
+#include "jemalloc/internal/size_classes.h"
+
+typedef unsigned long bitmap_t;
+#define LG_SIZEOF_BITMAP	LG_SIZEOF_LONG
+
+/* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */
+#if LG_SLAB_MAXREGS > LG_CEIL_NSIZES
+/* Maximum bitmap bit count is determined by maximum regions per slab. */
+#  define LG_BITMAP_MAXBITS	LG_SLAB_MAXREGS
+#else
+/* Maximum bitmap bit count is determined by number of extent size classes. */
+#  define LG_BITMAP_MAXBITS	LG_CEIL_NSIZES
+#endif
+#define BITMAP_MAXBITS		(ZU(1) << LG_BITMAP_MAXBITS)
+
+/* Number of bits per group. */
+#define LG_BITMAP_GROUP_NBITS		(LG_SIZEOF_BITMAP + 3)
+#define BITMAP_GROUP_NBITS		(1U << LG_BITMAP_GROUP_NBITS)
+#define BITMAP_GROUP_NBITS_MASK		(BITMAP_GROUP_NBITS-1)
+
+/*
+ * Do some analysis on how big the bitmap is before we use a tree.  For a brute
+ * force linear search, if we would have to call ffs_lu() more than 2^3 times,
+ * use a tree instead.
+ */
+#if LG_BITMAP_MAXBITS - LG_BITMAP_GROUP_NBITS > 3
+#  define BITMAP_USE_TREE
+#endif
+
+/* Number of groups required to store a given number of bits. */
+#define BITMAP_BITS2GROUPS(nbits)					\
+    (((nbits) + BITMAP_GROUP_NBITS_MASK) >> LG_BITMAP_GROUP_NBITS)
+
+/*
+ * Number of groups required at a particular level for a given number of bits.
+ */
+#define BITMAP_GROUPS_L0(nbits)						\
+    BITMAP_BITS2GROUPS(nbits)
+#define BITMAP_GROUPS_L1(nbits)						\
+    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(nbits))
+#define BITMAP_GROUPS_L2(nbits)						\
+    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits))))
+#define BITMAP_GROUPS_L3(nbits)						\
+    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(		\
+	BITMAP_BITS2GROUPS((nbits)))))
+#define BITMAP_GROUPS_L4(nbits)						\
+    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(		\
+	BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits))))))
+
+/*
+ * Assuming the number of levels, number of groups required for a given number
+ * of bits.
+ */
+#define BITMAP_GROUPS_1_LEVEL(nbits)					\
+    BITMAP_GROUPS_L0(nbits)
+#define BITMAP_GROUPS_2_LEVEL(nbits)					\
+    (BITMAP_GROUPS_1_LEVEL(nbits) + BITMAP_GROUPS_L1(nbits))
+#define BITMAP_GROUPS_3_LEVEL(nbits)					\
+    (BITMAP_GROUPS_2_LEVEL(nbits) + BITMAP_GROUPS_L2(nbits))
+#define BITMAP_GROUPS_4_LEVEL(nbits)					\
+    (BITMAP_GROUPS_3_LEVEL(nbits) + BITMAP_GROUPS_L3(nbits))
+#define BITMAP_GROUPS_5_LEVEL(nbits)					\
+    (BITMAP_GROUPS_4_LEVEL(nbits) + BITMAP_GROUPS_L4(nbits))
+
+/*
+ * Maximum number of groups required to support LG_BITMAP_MAXBITS.
+ */
+#ifdef BITMAP_USE_TREE
+
+#if LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS
+#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_1_LEVEL(nbits)
+#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_1_LEVEL(BITMAP_MAXBITS)
+#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 2
+#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_2_LEVEL(nbits)
+#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_2_LEVEL(BITMAP_MAXBITS)
+#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 3
+#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_3_LEVEL(nbits)
+#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_3_LEVEL(BITMAP_MAXBITS)
+#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 4
+#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_4_LEVEL(nbits)
+#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_4_LEVEL(BITMAP_MAXBITS)
+#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 5
+#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_5_LEVEL(nbits)
+#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_5_LEVEL(BITMAP_MAXBITS)
+#else
+#  error "Unsupported bitmap size"
+#endif
+
+/*
+ * Maximum number of levels possible.  This could be statically computed based
+ * on LG_BITMAP_MAXBITS:
+ *
+ * #define BITMAP_MAX_LEVELS \
+ *     (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP) \
+ *     + !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP)
+ *
+ * However, that would not allow the generic BITMAP_INFO_INITIALIZER() macro, so
+ * instead hardcode BITMAP_MAX_LEVELS to the largest number supported by the
+ * various cascading macros.  The only additional cost this incurs is some
+ * unused trailing entries in bitmap_info_t structures; the bitmaps themselves
+ * are not impacted.
+ */
+#define BITMAP_MAX_LEVELS	5
+
+#define BITMAP_INFO_INITIALIZER(nbits) {				\
+	/* nbits. */							\
+	nbits,								\
+	/* nlevels. */							\
+	(BITMAP_GROUPS_L0(nbits) > BITMAP_GROUPS_L1(nbits)) +		\
+	    (BITMAP_GROUPS_L1(nbits) > BITMAP_GROUPS_L2(nbits)) +	\
+	    (BITMAP_GROUPS_L2(nbits) > BITMAP_GROUPS_L3(nbits)) +	\
+	    (BITMAP_GROUPS_L3(nbits) > BITMAP_GROUPS_L4(nbits)) + 1,	\
+	/* levels. */							\
+	{								\
+		{0},							\
+		{BITMAP_GROUPS_L0(nbits)},				\
+		{BITMAP_GROUPS_L1(nbits) + BITMAP_GROUPS_L0(nbits)},	\
+		{BITMAP_GROUPS_L2(nbits) + BITMAP_GROUPS_L1(nbits) +	\
+		    BITMAP_GROUPS_L0(nbits)},				\
+		{BITMAP_GROUPS_L3(nbits) + BITMAP_GROUPS_L2(nbits) +	\
+		    BITMAP_GROUPS_L1(nbits) + BITMAP_GROUPS_L0(nbits)},	\
+		{BITMAP_GROUPS_L4(nbits) + BITMAP_GROUPS_L3(nbits) +	\
+		     BITMAP_GROUPS_L2(nbits) + BITMAP_GROUPS_L1(nbits)	\
+		     + BITMAP_GROUPS_L0(nbits)}				\
+	}								\
+}
+
+#else /* BITMAP_USE_TREE */
+
+#define BITMAP_GROUPS(nbits)	BITMAP_BITS2GROUPS(nbits)
+#define BITMAP_GROUPS_MAX	BITMAP_BITS2GROUPS(BITMAP_MAXBITS)
+
+#define BITMAP_INFO_INITIALIZER(nbits) {				\
+	/* nbits. */							\
+	nbits,								\
+	/* ngroups. */							\
+	BITMAP_BITS2GROUPS(nbits)					\
+}
+
+#endif /* BITMAP_USE_TREE */
+
+typedef struct bitmap_level_s {
+	/* Offset of this level's groups within the array of groups. */
+	size_t group_offset;
+} bitmap_level_t;
+
+typedef struct bitmap_info_s {
+	/* Logical number of bits in bitmap (stored at bottom level). */
+	size_t nbits;
+
+#ifdef BITMAP_USE_TREE
+	/* Number of levels necessary for nbits. */
+	unsigned nlevels;
+
+	/*
+	 * Only the first (nlevels+1) elements are used, and levels are ordered
+	 * bottom to top (e.g. the bottom level is stored in levels[0]).
+	 */
+	bitmap_level_t levels[BITMAP_MAX_LEVELS+1];
+#else /* BITMAP_USE_TREE */
+	/* Number of groups necessary for nbits. */
+	size_t ngroups;
+#endif /* BITMAP_USE_TREE */
+} bitmap_info_t;
+
+void bitmap_info_init(bitmap_info_t *binfo, size_t nbits);
+void bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo, bool fill);
+size_t bitmap_size(const bitmap_info_t *binfo);
+
+static inline bool
+bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) {
+#ifdef BITMAP_USE_TREE
+	size_t rgoff = binfo->levels[binfo->nlevels].group_offset - 1;
+	bitmap_t rg = bitmap[rgoff];
+	/* The bitmap is full iff the root group is 0. */
+	return (rg == 0);
+#else
+	size_t i;
+
+	for (i = 0; i < binfo->ngroups; i++) {
+		if (bitmap[i] != 0) {
+			return false;
+		}
+	}
+	return true;
+#endif
+}
+
+static inline bool
+bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
+	size_t goff;
+	bitmap_t g;
+
+	assert(bit < binfo->nbits);
+	goff = bit >> LG_BITMAP_GROUP_NBITS;
+	g = bitmap[goff];
+	return !(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)));
+}
+
+static inline void
+bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
+	size_t goff;
+	bitmap_t *gp;
+	bitmap_t g;
+
+	assert(bit < binfo->nbits);
+	assert(!bitmap_get(bitmap, binfo, bit));
+	goff = bit >> LG_BITMAP_GROUP_NBITS;
+	gp = &bitmap[goff];
+	g = *gp;
+	assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)));
+	g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
+	*gp = g;
+	assert(bitmap_get(bitmap, binfo, bit));
+#ifdef BITMAP_USE_TREE
+	/* Propagate group state transitions up the tree. */
+	if (g == 0) {
+		unsigned i;
+		for (i = 1; i < binfo->nlevels; i++) {
+			bit = goff;
+			goff = bit >> LG_BITMAP_GROUP_NBITS;
+			gp = &bitmap[binfo->levels[i].group_offset + goff];
+			g = *gp;
+			assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)));
+			g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
+			*gp = g;
+			if (g != 0) {
+				break;
+			}
+		}
+	}
+#endif
+}
+
+/* ffu: find first unset >= bit. */
+static inline size_t
+bitmap_ffu(const bitmap_t *bitmap, const bitmap_info_t *binfo, size_t min_bit) {
+	assert(min_bit < binfo->nbits);
+
+#ifdef BITMAP_USE_TREE
+	size_t bit = 0;
+	for (unsigned level = binfo->nlevels; level--;) {
+		size_t lg_bits_per_group = (LG_BITMAP_GROUP_NBITS * (level +
+		    1));
+		bitmap_t group = bitmap[binfo->levels[level].group_offset + (bit
+		    >> lg_bits_per_group)];
+		unsigned group_nmask = ((min_bit > bit) ? (min_bit - bit) : 0)
+		    >> (lg_bits_per_group - LG_BITMAP_GROUP_NBITS);
+		assert(group_nmask <= BITMAP_GROUP_NBITS);
+		bitmap_t group_mask = ~((1LU << group_nmask) - 1);
+		bitmap_t group_masked = group & group_mask;
+		if (group_masked == 0LU) {
+			if (group == 0LU) {
+				return binfo->nbits;
+			}
+			/*
+			 * min_bit was preceded by one or more unset bits in
+			 * this group, but there are no other unset bits in this
+			 * group.  Try again starting at the first bit of the
+			 * next sibling.  This will recurse at most once per
+			 * non-root level.
+			 */
+			size_t sib_base = bit + (1U << lg_bits_per_group);
+			assert(sib_base > min_bit);
+			assert(sib_base > bit);
+			if (sib_base >= binfo->nbits) {
+				return binfo->nbits;
+			}
+			return bitmap_ffu(bitmap, binfo, sib_base);
+		}
+		bit += (ffs_lu(group_masked) - 1) << (lg_bits_per_group -
+		    LG_BITMAP_GROUP_NBITS);
+	}
+	assert(bit >= min_bit);
+	assert(bit < binfo->nbits);
+	return bit;
+#else
+	size_t i = min_bit >> LG_BITMAP_GROUP_NBITS;
+	bitmap_t g = bitmap[i] & ~((1LU << (min_bit & BITMAP_GROUP_NBITS_MASK))
+	    - 1);
+	size_t bit;
+	do {
+		bit = ffs_lu(g);
+		if (bit != 0) {
+			return (i << LG_BITMAP_GROUP_NBITS) + (bit - 1);
+		}
+		i++;
+		g = bitmap[i];
+	} while (i < binfo->ngroups);
+	return binfo->nbits;
+#endif
+}
+
+/* sfu: set first unset. */
+static inline size_t
+bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) {
+	size_t bit;
+	bitmap_t g;
+	unsigned i;
+
+	assert(!bitmap_full(bitmap, binfo));
+
+#ifdef BITMAP_USE_TREE
+	i = binfo->nlevels - 1;
+	g = bitmap[binfo->levels[i].group_offset];
+	bit = ffs_lu(g) - 1;
+	while (i > 0) {
+		i--;
+		g = bitmap[binfo->levels[i].group_offset + bit];
+		bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffs_lu(g) - 1);
+	}
+#else
+	i = 0;
+	g = bitmap[0];
+	while ((bit = ffs_lu(g)) == 0) {
+		i++;
+		g = bitmap[i];
+	}
+	bit = (i << LG_BITMAP_GROUP_NBITS) + (bit - 1);
+#endif
+	bitmap_set(bitmap, binfo, bit);
+	return bit;
+}
+
+static inline void
+bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
+	size_t goff;
+	bitmap_t *gp;
+	bitmap_t g;
+	UNUSED bool propagate;
+
+	assert(bit < binfo->nbits);
+	assert(bitmap_get(bitmap, binfo, bit));
+	goff = bit >> LG_BITMAP_GROUP_NBITS;
+	gp = &bitmap[goff];
+	g = *gp;
+	propagate = (g == 0);
+	assert((g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))) == 0);
+	g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
+	*gp = g;
+	assert(!bitmap_get(bitmap, binfo, bit));
+#ifdef BITMAP_USE_TREE
+	/* Propagate group state transitions up the tree. */
+	if (propagate) {
+		unsigned i;
+		for (i = 1; i < binfo->nlevels; i++) {
+			bit = goff;
+			goff = bit >> LG_BITMAP_GROUP_NBITS;
+			gp = &bitmap[binfo->levels[i].group_offset + goff];
+			g = *gp;
+			propagate = (g == 0);
+			assert((g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)))
+			    == 0);
+			g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
+			*gp = g;
+			if (!propagate) {
+				break;
+			}
+		}
+	}
+#endif /* BITMAP_USE_TREE */
+}
+
+#endif /* JEMALLOC_INTERNAL_BITMAP_H */
diff --git a/include/jemalloc/internal/bitmap_externs.h b/include/jemalloc/internal/bitmap_externs.h
deleted file mode 100644
index 034a4e6b..00000000
--- a/include/jemalloc/internal/bitmap_externs.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_BITMAP_EXTERNS_H
-#define JEMALLOC_INTERNAL_BITMAP_EXTERNS_H
-
-void	bitmap_info_init(bitmap_info_t *binfo, size_t nbits);
-void	bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo, bool fill);
-size_t	bitmap_size(const bitmap_info_t *binfo);
-
-#endif /* JEMALLOC_INTERNAL_BITMAP_EXTERNS_H */
diff --git a/include/jemalloc/internal/bitmap_inlines.h b/include/jemalloc/internal/bitmap_inlines.h
deleted file mode 100644
index 84425b34..00000000
--- a/include/jemalloc/internal/bitmap_inlines.h
+++ /dev/null
@@ -1,200 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_BITMAP_INLINES_H
-#define JEMALLOC_INTERNAL_BITMAP_INLINES_H
-
-#include "jemalloc/internal/bit_util.h"
-
-static inline bool
-bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) {
-#ifdef BITMAP_USE_TREE
-	size_t rgoff = binfo->levels[binfo->nlevels].group_offset - 1;
-	bitmap_t rg = bitmap[rgoff];
-	/* The bitmap is full iff the root group is 0. */
-	return (rg == 0);
-#else
-	size_t i;
-
-	for (i = 0; i < binfo->ngroups; i++) {
-		if (bitmap[i] != 0) {
-			return false;
-		}
-	}
-	return true;
-#endif
-}
-
-static inline bool
-bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
-	size_t goff;
-	bitmap_t g;
-
-	assert(bit < binfo->nbits);
-	goff = bit >> LG_BITMAP_GROUP_NBITS;
-	g = bitmap[goff];
-	return !(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)));
-}
-
-static inline void
-bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
-	size_t goff;
-	bitmap_t *gp;
-	bitmap_t g;
-
-	assert(bit < binfo->nbits);
-	assert(!bitmap_get(bitmap, binfo, bit));
-	goff = bit >> LG_BITMAP_GROUP_NBITS;
-	gp = &bitmap[goff];
-	g = *gp;
-	assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)));
-	g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
-	*gp = g;
-	assert(bitmap_get(bitmap, binfo, bit));
-#ifdef BITMAP_USE_TREE
-	/* Propagate group state transitions up the tree. */
-	if (g == 0) {
-		unsigned i;
-		for (i = 1; i < binfo->nlevels; i++) {
-			bit = goff;
-			goff = bit >> LG_BITMAP_GROUP_NBITS;
-			gp = &bitmap[binfo->levels[i].group_offset + goff];
-			g = *gp;
-			assert(g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)));
-			g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
-			*gp = g;
-			if (g != 0) {
-				break;
-			}
-		}
-	}
-#endif
-}
-
-/* ffu: find first unset >= bit. */
-static inline size_t
-bitmap_ffu(const bitmap_t *bitmap, const bitmap_info_t *binfo, size_t min_bit) {
-	assert(min_bit < binfo->nbits);
-
-#ifdef BITMAP_USE_TREE
-	size_t bit = 0;
-	for (unsigned level = binfo->nlevels; level--;) {
-		size_t lg_bits_per_group = (LG_BITMAP_GROUP_NBITS * (level +
-		    1));
-		bitmap_t group = bitmap[binfo->levels[level].group_offset + (bit
-		    >> lg_bits_per_group)];
-		unsigned group_nmask = ((min_bit > bit) ? (min_bit - bit) : 0)
-		    >> (lg_bits_per_group - LG_BITMAP_GROUP_NBITS);
-		assert(group_nmask <= BITMAP_GROUP_NBITS);
-		bitmap_t group_mask = ~((1LU << group_nmask) - 1);
-		bitmap_t group_masked = group & group_mask;
-		if (group_masked == 0LU) {
-			if (group == 0LU) {
-				return binfo->nbits;
-			}
-			/*
-			 * min_bit was preceded by one or more unset bits in
-			 * this group, but there are no other unset bits in this
-			 * group.  Try again starting at the first bit of the
-			 * next sibling.  This will recurse at most once per
-			 * non-root level.
-			 */
-			size_t sib_base = bit + (1U << lg_bits_per_group);
-			assert(sib_base > min_bit);
-			assert(sib_base > bit);
-			if (sib_base >= binfo->nbits) {
-				return binfo->nbits;
-			}
-			return bitmap_ffu(bitmap, binfo, sib_base);
-		}
-		bit += (ffs_lu(group_masked) - 1) << (lg_bits_per_group -
-		    LG_BITMAP_GROUP_NBITS);
-	}
-	assert(bit >= min_bit);
-	assert(bit < binfo->nbits);
-	return bit;
-#else
-	size_t i = min_bit >> LG_BITMAP_GROUP_NBITS;
-	bitmap_t g = bitmap[i] & ~((1LU << (min_bit & BITMAP_GROUP_NBITS_MASK))
-	    - 1);
-	size_t bit;
-	do {
-		bit = ffs_lu(g);
-		if (bit != 0) {
-			return (i << LG_BITMAP_GROUP_NBITS) + (bit - 1);
-		}
-		i++;
-		g = bitmap[i];
-	} while (i < binfo->ngroups);
-	return binfo->nbits;
-#endif
-}
-
-/* sfu: set first unset. */
-static inline size_t
-bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) {
-	size_t bit;
-	bitmap_t g;
-	unsigned i;
-
-	assert(!bitmap_full(bitmap, binfo));
-
-#ifdef BITMAP_USE_TREE
-	i = binfo->nlevels - 1;
-	g = bitmap[binfo->levels[i].group_offset];
-	bit = ffs_lu(g) - 1;
-	while (i > 0) {
-		i--;
-		g = bitmap[binfo->levels[i].group_offset + bit];
-		bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffs_lu(g) - 1);
-	}
-#else
-	i = 0;
-	g = bitmap[0];
-	while ((bit = ffs_lu(g)) == 0) {
-		i++;
-		g = bitmap[i];
-	}
-	bit = (i << LG_BITMAP_GROUP_NBITS) + (bit - 1);
-#endif
-	bitmap_set(bitmap, binfo, bit);
-	return bit;
-}
-
-static inline void
-bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
-	size_t goff;
-	bitmap_t *gp;
-	bitmap_t g;
-	UNUSED bool propagate;
-
-	assert(bit < binfo->nbits);
-	assert(bitmap_get(bitmap, binfo, bit));
-	goff = bit >> LG_BITMAP_GROUP_NBITS;
-	gp = &bitmap[goff];
-	g = *gp;
-	propagate = (g == 0);
-	assert((g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK))) == 0);
-	g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
-	*gp = g;
-	assert(!bitmap_get(bitmap, binfo, bit));
-#ifdef BITMAP_USE_TREE
-	/* Propagate group state transitions up the tree. */
-	if (propagate) {
-		unsigned i;
-		for (i = 1; i < binfo->nlevels; i++) {
-			bit = goff;
-			goff = bit >> LG_BITMAP_GROUP_NBITS;
-			gp = &bitmap[binfo->levels[i].group_offset + goff];
-			g = *gp;
-			propagate = (g == 0);
-			assert((g & (ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK)))
-			    == 0);
-			g ^= ZU(1) << (bit & BITMAP_GROUP_NBITS_MASK);
-			*gp = g;
-			if (!propagate) {
-				break;
-			}
-		}
-	}
-#endif /* BITMAP_USE_TREE */
-}
-
-#endif /* JEMALLOC_INTERNAL_BITMAP_INLINES_H */
diff --git a/include/jemalloc/internal/bitmap_structs.h b/include/jemalloc/internal/bitmap_structs.h
deleted file mode 100644
index 297ae669..00000000
--- a/include/jemalloc/internal/bitmap_structs.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_BITMAP_STRUCTS_H
-#define JEMALLOC_INTERNAL_BITMAP_STRUCTS_H
-
-struct bitmap_level_s {
-	/* Offset of this level's groups within the array of groups. */
-	size_t group_offset;
-};
-
-struct bitmap_info_s {
-	/* Logical number of bits in bitmap (stored at bottom level). */
-	size_t nbits;
-
-#ifdef BITMAP_USE_TREE
-	/* Number of levels necessary for nbits. */
-	unsigned nlevels;
-
-	/*
-	 * Only the first (nlevels+1) elements are used, and levels are ordered
-	 * bottom to top (e.g. the bottom level is stored in levels[0]).
-	 */
-	bitmap_level_t levels[BITMAP_MAX_LEVELS+1];
-#else /* BITMAP_USE_TREE */
-	/* Number of groups necessary for nbits. */
-	size_t ngroups;
-#endif /* BITMAP_USE_TREE */
-};
-
-#endif /* JEMALLOC_INTERNAL_BITMAP_STRUCTS_H */
diff --git a/include/jemalloc/internal/bitmap_types.h b/include/jemalloc/internal/bitmap_types.h
deleted file mode 100644
index 95f0dd12..00000000
--- a/include/jemalloc/internal/bitmap_types.h
+++ /dev/null
@@ -1,147 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_BITMAP_TYPES_H
-#define JEMALLOC_INTERNAL_BITMAP_TYPES_H
-
-#include "jemalloc/internal/size_classes.h"
-
-/* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */
-#if LG_SLAB_MAXREGS > LG_CEIL_NSIZES
-/* Maximum bitmap bit count is determined by maximum regions per slab. */
-#  define LG_BITMAP_MAXBITS	LG_SLAB_MAXREGS
-#else
-/* Maximum bitmap bit count is determined by number of extent size classes. */
-#  define LG_BITMAP_MAXBITS	LG_CEIL_NSIZES
-#endif
-#define BITMAP_MAXBITS		(ZU(1) << LG_BITMAP_MAXBITS)
-
-typedef struct bitmap_level_s bitmap_level_t;
-typedef struct bitmap_info_s bitmap_info_t;
-typedef unsigned long bitmap_t;
-#define LG_SIZEOF_BITMAP	LG_SIZEOF_LONG
-
-/* Number of bits per group. */
-#define LG_BITMAP_GROUP_NBITS		(LG_SIZEOF_BITMAP + 3)
-#define BITMAP_GROUP_NBITS		(1U << LG_BITMAP_GROUP_NBITS)
-#define BITMAP_GROUP_NBITS_MASK		(BITMAP_GROUP_NBITS-1)
-
-/*
- * Do some analysis on how big the bitmap is before we use a tree.  For a brute
- * force linear search, if we would have to call ffs_lu() more than 2^3 times,
- * use a tree instead.
- */
-#if LG_BITMAP_MAXBITS - LG_BITMAP_GROUP_NBITS > 3
-#  define BITMAP_USE_TREE
-#endif
-
-/* Number of groups required to store a given number of bits. */
-#define BITMAP_BITS2GROUPS(nbits)					\
-    (((nbits) + BITMAP_GROUP_NBITS_MASK) >> LG_BITMAP_GROUP_NBITS)
-
-/*
- * Number of groups required at a particular level for a given number of bits.
- */
-#define BITMAP_GROUPS_L0(nbits)						\
-    BITMAP_BITS2GROUPS(nbits)
-#define BITMAP_GROUPS_L1(nbits)						\
-    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(nbits))
-#define BITMAP_GROUPS_L2(nbits)						\
-    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits))))
-#define BITMAP_GROUPS_L3(nbits)						\
-    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(		\
-	BITMAP_BITS2GROUPS((nbits)))))
-#define BITMAP_GROUPS_L4(nbits)						\
-    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(		\
-	BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits))))))
-
-/*
- * Assuming the number of levels, number of groups required for a given number
- * of bits.
- */
-#define BITMAP_GROUPS_1_LEVEL(nbits)					\
-    BITMAP_GROUPS_L0(nbits)
-#define BITMAP_GROUPS_2_LEVEL(nbits)					\
-    (BITMAP_GROUPS_1_LEVEL(nbits) + BITMAP_GROUPS_L1(nbits))
-#define BITMAP_GROUPS_3_LEVEL(nbits)					\
-    (BITMAP_GROUPS_2_LEVEL(nbits) + BITMAP_GROUPS_L2(nbits))
-#define BITMAP_GROUPS_4_LEVEL(nbits)					\
-    (BITMAP_GROUPS_3_LEVEL(nbits) + BITMAP_GROUPS_L3(nbits))
-#define BITMAP_GROUPS_5_LEVEL(nbits)					\
-    (BITMAP_GROUPS_4_LEVEL(nbits) + BITMAP_GROUPS_L4(nbits))
-
-/*
- * Maximum number of groups required to support LG_BITMAP_MAXBITS.
- */
-#ifdef BITMAP_USE_TREE
-
-#if LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS
-#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_1_LEVEL(nbits)
-#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_1_LEVEL(BITMAP_MAXBITS)
-#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 2
-#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_2_LEVEL(nbits)
-#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_2_LEVEL(BITMAP_MAXBITS)
-#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 3
-#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_3_LEVEL(nbits)
-#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_3_LEVEL(BITMAP_MAXBITS)
-#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 4
-#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_4_LEVEL(nbits)
-#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_4_LEVEL(BITMAP_MAXBITS)
-#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 5
-#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_5_LEVEL(nbits)
-#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_5_LEVEL(BITMAP_MAXBITS)
-#else
-#  error "Unsupported bitmap size"
-#endif
-
-/*
- * Maximum number of levels possible.  This could be statically computed based
- * on LG_BITMAP_MAXBITS:
- *
- * #define BITMAP_MAX_LEVELS \
- *     (LG_BITMAP_MAXBITS / LG_SIZEOF_BITMAP) \
- *     + !!(LG_BITMAP_MAXBITS % LG_SIZEOF_BITMAP)
- *
- * However, that would not allow the generic BITMAP_INFO_INITIALIZER() macro, so
- * instead hardcode BITMAP_MAX_LEVELS to the largest number supported by the
- * various cascading macros.  The only additional cost this incurs is some
- * unused trailing entries in bitmap_info_t structures; the bitmaps themselves
- * are not impacted.
- */
-#define BITMAP_MAX_LEVELS	5
-
-#define BITMAP_INFO_INITIALIZER(nbits) {				\
-	/* nbits. */							\
-	nbits,								\
-	/* nlevels. */							\
-	(BITMAP_GROUPS_L0(nbits) > BITMAP_GROUPS_L1(nbits)) +		\
-	    (BITMAP_GROUPS_L1(nbits) > BITMAP_GROUPS_L2(nbits)) +	\
-	    (BITMAP_GROUPS_L2(nbits) > BITMAP_GROUPS_L3(nbits)) +	\
-	    (BITMAP_GROUPS_L3(nbits) > BITMAP_GROUPS_L4(nbits)) + 1,	\
-	/* levels. */							\
-	{								\
-		{0},							\
-		{BITMAP_GROUPS_L0(nbits)},				\
-		{BITMAP_GROUPS_L1(nbits) + BITMAP_GROUPS_L0(nbits)},	\
-		{BITMAP_GROUPS_L2(nbits) + BITMAP_GROUPS_L1(nbits) +	\
-		    BITMAP_GROUPS_L0(nbits)},				\
-		{BITMAP_GROUPS_L3(nbits) + BITMAP_GROUPS_L2(nbits) +	\
-		    BITMAP_GROUPS_L1(nbits) + BITMAP_GROUPS_L0(nbits)},	\
-		{BITMAP_GROUPS_L4(nbits) + BITMAP_GROUPS_L3(nbits) +	\
-		     BITMAP_GROUPS_L2(nbits) + BITMAP_GROUPS_L1(nbits)	\
-		     + BITMAP_GROUPS_L0(nbits)}				\
-	}								\
-}
-
-#else /* BITMAP_USE_TREE */
-
-#define BITMAP_GROUPS(nbits)	BITMAP_BITS2GROUPS(nbits)
-#define BITMAP_GROUPS_MAX	BITMAP_BITS2GROUPS(BITMAP_MAXBITS)
-
-#define BITMAP_INFO_INITIALIZER(nbits) {				\
-	/* nbits. */							\
-	nbits,								\
-	/* ngroups. */							\
-	BITMAP_BITS2GROUPS(nbits)					\
-}
-
-#endif /* BITMAP_USE_TREE */
-
-#endif /* JEMALLOC_INTERNAL_BITMAP_TYPES_H */
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index aa0a3a75..5d8c3a20 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_EXTENT_STRUCTS_H
 
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/bitmap.h"
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/rb.h"
 #include "jemalloc/internal/ph.h"
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index 0cb6183d..f98a1b27 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -48,7 +48,6 @@
 #include "jemalloc/internal/extent_dss_types.h"
 #include "jemalloc/internal/base_types.h"
 #include "jemalloc/internal/arena_types.h"
-#include "jemalloc/internal/bitmap_types.h"
 #include "jemalloc/internal/rtree_types.h"
 #include "jemalloc/internal/pages_types.h"
 #include "jemalloc/internal/tcache_types.h"
@@ -61,7 +60,6 @@
 #include "jemalloc/internal/witness_structs.h"
 #include "jemalloc/internal/mutex_structs.h"
 #include "jemalloc/internal/ctl_structs.h"
-#include "jemalloc/internal/bitmap_structs.h"
 #include "jemalloc/internal/arena_structs_a.h"
 #include "jemalloc/internal/extent_structs.h"
 #include "jemalloc/internal/extent_dss_structs.h"
@@ -80,7 +78,6 @@
 #include "jemalloc/internal/ctl_externs.h"
 #include "jemalloc/internal/witness_externs.h"
 #include "jemalloc/internal/mutex_externs.h"
-#include "jemalloc/internal/bitmap_externs.h"
 #include "jemalloc/internal/extent_externs.h"
 #include "jemalloc/internal/extent_dss_externs.h"
 #include "jemalloc/internal/extent_mmap_externs.h"
@@ -103,7 +100,6 @@
 #include "jemalloc/internal/jemalloc_internal_inlines_a.h"
 #include "jemalloc/internal/rtree_inlines.h"
 #include "jemalloc/internal/base_inlines.h"
-#include "jemalloc/internal/bitmap_inlines.h"
 /*
  * Include portions of arena code interleaved with tcache code in order to
  * resolve circular dependencies.

From b54530020fb31b47376a0248aa3b73a51e9a5927 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 21 Apr 2017 14:49:31 -0700
Subject: [PATCH 0832/2608] Remove --with-lg-size-class-group.

Four size classes per size doubling has proven to be a universally good
choice for the entire 4.x release series, so there's little point to
preserving this configurability.

This partially resolves #580.
---
 INSTALL      | 32 +-------------------------------
 configure.ac |  9 +--------
 2 files changed, 2 insertions(+), 39 deletions(-)

diff --git a/INSTALL b/INSTALL
index 6c53bfc0..ffc3767b 100644
--- a/INSTALL
+++ b/INSTALL
@@ -220,35 +220,6 @@ any of the following arguments (not a definitive list) to 'configure':
     when cross compiling, or when overriding the default for systems that do
     not explicitly support huge pages.
 
---with-lg-size-class-group=<lg-size-class-group>
-    Specify the base 2 log of how many size classes to use for each doubling in
-    size.  By default jemalloc uses <lg-size-class-group>=2, which results in
-    e.g. the following size classes:
-
-      [...], 64,
-      80, 96, 112, 128,
-      160, [...]
-
-    <lg-size-class-group>=3 results in e.g. the following size classes:
-
-      [...], 64,
-      72, 80, 88, 96, 104, 112, 120, 128,
-      144, [...]
-
-    The minimal <lg-size-class-group>=0 causes jemalloc to only provide size
-    classes that are powers of 2:
-
-      [...],
-      64,
-      128,
-      256,
-      [...]
-
-    An implementation detail currently limits the total number of small size
-    classes to 255, and a compilation error will result if the
-    <lg-size-class-group> you specify cannot be supported.  The limit is
-    roughly <lg-size-class-group>=4, depending on page size.
-
 --with-lg-quantum=<lg-quantum>
     Specify the base 2 log of the minimum allocation alignment.  jemalloc needs
     to know the minimum alignment that meets the following C standard
@@ -272,8 +243,7 @@ any of the following arguments (not a definitive list) to 'configure':
     this allocator noncompliance means that it is generally safe in practice to
     let jemalloc's minimum alignment follow glibc's lead.  If you specify
     --with-lg-quantum=3 during configuration, jemalloc will provide additional
-    size classes that are not 16-byte-aligned (24, 40, and 56, assuming
-    --with-lg-size-class-group=2).
+    size classes that are not 16-byte-aligned (24, 40, and 56).
 
 --with-lg-tiny-min=<lg-tiny-min>
     Specify the base 2 log of the minimum tiny size class to support.  Tiny
diff --git a/configure.ac b/configure.ac
index 42cabad3..2e233712 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1401,12 +1401,6 @@ AC_ARG_WITH([lg_page_sizes],
    [Base 2 logs of system page sizes to support])],
   [LG_PAGE_SIZES="$with_lg_page_sizes"], [LG_PAGE_SIZES="$LG_PAGE"])
 
-AC_ARG_WITH([lg_size_class_group],
-  [AS_HELP_STRING([--with-lg-size-class-group=<lg-size-class-group>],
-   [Base 2 log of size classes per doubling])],
-  [LG_SIZE_CLASS_GROUP="$with_lg_size_class_group"],
-  [LG_SIZE_CLASS_GROUP="2"])
-
 dnl ============================================================================
 dnl jemalloc configuration.
 dnl 
@@ -2022,7 +2016,7 @@ AC_CONFIG_COMMANDS([include/jemalloc/internal/public_unnamespace.h], [
 ])
 AC_CONFIG_COMMANDS([include/jemalloc/internal/size_classes.h], [
   mkdir -p "${objroot}include/jemalloc/internal"
-  "${SHELL}" "${srcdir}/include/jemalloc/internal/size_classes.sh" "${LG_QUANTA}" ${LG_TINY_MIN} "${LG_PAGE_SIZES}" ${LG_SIZE_CLASS_GROUP} > "${objroot}include/jemalloc/internal/size_classes.h"
+  "${SHELL}" "${srcdir}/include/jemalloc/internal/size_classes.sh" "${LG_QUANTA}" ${LG_TINY_MIN} "${LG_PAGE_SIZES}" 2 > "${objroot}include/jemalloc/internal/size_classes.h"
 ], [
   SHELL="${SHELL}"
   srcdir="${srcdir}"
@@ -2030,7 +2024,6 @@ AC_CONFIG_COMMANDS([include/jemalloc/internal/size_classes.h], [
   LG_QUANTA="${LG_QUANTA}"
   LG_TINY_MIN=${LG_TINY_MIN}
   LG_PAGE_SIZES="${LG_PAGE_SIZES}"
-  LG_SIZE_CLASS_GROUP=${LG_SIZE_CLASS_GROUP}
 ])
 AC_CONFIG_COMMANDS([include/jemalloc/jemalloc_protos_jet.h], [
   mkdir -p "${objroot}include/jemalloc"

From af76f0e5d28cd6f0ce8e6c8c6a2a78ba4089868a Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 21 Apr 2017 15:35:54 -0700
Subject: [PATCH 0833/2608] Remove --with-lg-tiny-min.

This option isn't useful in practice.

This partially resolves #580.
---
 INSTALL                                       | 37 ++++---------------
 configure.ac                                  | 10 +----
 .../internal/jemalloc_internal_defs.h.in      |  3 --
 include/jemalloc/internal/size_classes.sh     |  2 +
 4 files changed, 11 insertions(+), 41 deletions(-)

diff --git a/INSTALL b/INSTALL
index ffc3767b..d4ddba2b 100644
--- a/INSTALL
+++ b/INSTALL
@@ -233,38 +233,17 @@ any of the following arguments (not a definitive list) to 'configure':
     This setting is architecture-specific, and although jemalloc includes known
     safe values for the most commonly used modern architectures, there is a
     wrinkle related to GNU libc (glibc) that may impact your choice of
-    <lg-quantum>.  On most modern architectures, this mandates 16-byte alignment
-    (<lg-quantum>=4), but the glibc developers chose not to meet this
+    <lg-quantum>.  On most modern architectures, this mandates 16-byte
+    alignment (<lg-quantum>=4), but the glibc developers chose not to meet this
     requirement for performance reasons.  An old discussion can be found at
     https://sourceware.org/bugzilla/show_bug.cgi?id=206 .  Unlike glibc,
     jemalloc does follow the C standard by default (caveat: jemalloc
-    technically cheats if --with-lg-tiny-min is smaller than
-    --with-lg-quantum), but the fact that Linux systems already work around
-    this allocator noncompliance means that it is generally safe in practice to
-    let jemalloc's minimum alignment follow glibc's lead.  If you specify
-    --with-lg-quantum=3 during configuration, jemalloc will provide additional
-    size classes that are not 16-byte-aligned (24, 40, and 56).
-
---with-lg-tiny-min=<lg-tiny-min>
-    Specify the base 2 log of the minimum tiny size class to support.  Tiny
-    size classes are powers of 2 less than the quantum, and are only
-    incorporated if <lg-tiny-min> is less than <lg-quantum> (see
-    --with-lg-quantum).  Tiny size classes technically violate the C standard
-    requirement for minimum alignment, and crashes could conceivably result if
-    the compiler were to generate instructions that made alignment assumptions,
-    both because illegal instruction traps could result, and because accesses
-    could straddle page boundaries and cause segmentation faults due to
-    accessing unmapped addresses.
-
-    The default of <lg-tiny-min>=3 works well in practice even on architectures
-    that technically require 16-byte alignment, probably for the same reason
-    --with-lg-quantum=3 works.  Smaller tiny size classes can, and will, cause
-    crashes (see https://bugzilla.mozilla.org/show_bug.cgi?id=691003 for an
-    example).
-
-    This option is rarely useful, and is mainly provided as documentation of a
-    subtle implementation detail.  If you do use this option, specify a
-    value in [3, ..., <lg-quantum>].
+    technically cheats for size classes smaller than the quantum), but the fact
+    that Linux systems already work around this allocator noncompliance means
+    that it is generally safe in practice to let jemalloc's minimum alignment
+    follow glibc's lead.  If you specify --with-lg-quantum=3 during
+    configuration, jemalloc will provide additional size classes that are not
+    16-byte-aligned (24, 40, and 56).
 
 The following environment variables (not a definitive list) impact configure's
 behavior:
diff --git a/configure.ac b/configure.ac
index 2e233712..a950a23a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1296,13 +1296,6 @@ else
   fi
 fi
 
-AC_ARG_WITH([lg_tiny_min],
-  [AS_HELP_STRING([--with-lg-tiny-min=<lg-tiny-min>],
-   [Base 2 log of minimum tiny size class to support])],
-  [LG_TINY_MIN="$with_lg_tiny_min"],
-  [LG_TINY_MIN="3"])
-AC_DEFINE_UNQUOTED([LG_TINY_MIN], [$LG_TINY_MIN])
-
 AC_ARG_WITH([lg_quantum],
   [AS_HELP_STRING([--with-lg-quantum=<lg-quantum>],
    [Base 2 log of minimum allocation alignment])],
@@ -2016,13 +2009,12 @@ AC_CONFIG_COMMANDS([include/jemalloc/internal/public_unnamespace.h], [
 ])
 AC_CONFIG_COMMANDS([include/jemalloc/internal/size_classes.h], [
   mkdir -p "${objroot}include/jemalloc/internal"
-  "${SHELL}" "${srcdir}/include/jemalloc/internal/size_classes.sh" "${LG_QUANTA}" ${LG_TINY_MIN} "${LG_PAGE_SIZES}" 2 > "${objroot}include/jemalloc/internal/size_classes.h"
+  "${SHELL}" "${srcdir}/include/jemalloc/internal/size_classes.sh" "${LG_QUANTA}" 3 "${LG_PAGE_SIZES}" 2 > "${objroot}include/jemalloc/internal/size_classes.h"
 ], [
   SHELL="${SHELL}"
   srcdir="${srcdir}"
   objroot="${objroot}"
   LG_QUANTA="${LG_QUANTA}"
-  LG_TINY_MIN=${LG_TINY_MIN}
   LG_PAGE_SIZES="${LG_PAGE_SIZES}"
 ])
 AC_CONFIG_COMMANDS([include/jemalloc/jemalloc_protos_jet.h], [
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 1bec2c93..7e83e7b7 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -172,9 +172,6 @@
 /* Support lazy locking (avoid locking unless a second thread is launched). */
 #undef JEMALLOC_LAZY_LOCK
 
-/* Minimum size class to support is 2^LG_TINY_MIN bytes. */
-#undef LG_TINY_MIN
-
 /*
  * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size
  * classes).
diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh
index da1e006c..dd562db1 100755
--- a/include/jemalloc/internal/size_classes.sh
+++ b/include/jemalloc/internal/size_classes.sh
@@ -285,6 +285,7 @@ cat <<EOF
  * This header file defines:
  *
  *   LG_SIZE_CLASS_GROUP: Lg of size class count for each size doubling.
+ *   LG_TINY_MIN: Lg of minimum size class to support.
  *   SIZE_CLASSES: Complete table of SC(index, lg_grp, lg_delta, ndelta, psz,
  *                 bin, pgs, lg_delta_lookup) tuples.
  *     index: Size class index.
@@ -310,6 +311,7 @@ cat <<EOF
  */
 
 #define LG_SIZE_CLASS_GROUP	${lg_g}
+#define LG_TINY_MIN		${lg_tmin}
 
 EOF
 

From f970c497dc5ab2b885d266d59948510c430f8ec1 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 21 Apr 2017 15:05:43 -0700
Subject: [PATCH 0834/2608] Implement malloc_mutex_trylock() w/ proper stats
 update.

---
 include/jemalloc/internal/mutex_inlines.h     | 42 +++++++++++++------
 include/jemalloc/internal/private_symbols.txt |  4 ++
 src/mutex.c                                   |  4 +-
 3 files changed, 36 insertions(+), 14 deletions(-)

diff --git a/include/jemalloc/internal/mutex_inlines.h b/include/jemalloc/internal/mutex_inlines.h
index 2856d844..6da21cf6 100644
--- a/include/jemalloc/internal/mutex_inlines.h
+++ b/include/jemalloc/internal/mutex_inlines.h
@@ -10,10 +10,36 @@ malloc_mutex_lock_final(malloc_mutex_t *mutex) {
 	MALLOC_MUTEX_LOCK(mutex);
 }
 
+static inline bool
+malloc_mutex_trylock_final(malloc_mutex_t *mutex) {
+	return MALLOC_MUTEX_TRYLOCK(mutex);
+}
+
+static inline void
+mutex_owner_stats_update(tsdn_t *tsdn, malloc_mutex_t *mutex) {
+	if (config_stats) {
+		mutex_prof_data_t *data = &mutex->prof_data;
+		data->n_lock_ops++;
+		if (data->prev_owner != tsdn) {
+			data->prev_owner = tsdn;
+			data->n_owner_switches++;
+		}
+	}
+}
+
 /* Trylock: return false if the lock is successfully acquired. */
 static inline bool
-malloc_mutex_trylock(malloc_mutex_t *mutex) {
-	return MALLOC_MUTEX_TRYLOCK(mutex);
+malloc_mutex_trylock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
+	witness_assert_not_owner(tsdn, &mutex->witness);
+	if (isthreaded) {
+		if (malloc_mutex_trylock_final(mutex)) {
+			return true;
+		}
+		mutex_owner_stats_update(tsdn, mutex);
+	}
+	witness_lock(tsdn, &mutex->witness);
+
+	return false;
 }
 
 /* Aggregate lock prof data. */
@@ -44,18 +70,10 @@ static inline void
 malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 	witness_assert_not_owner(tsdn, &mutex->witness);
 	if (isthreaded) {
-		if (malloc_mutex_trylock(mutex)) {
+		if (malloc_mutex_trylock_final(mutex)) {
 			malloc_mutex_lock_slow(mutex);
 		}
-		/* We own the lock now.  Update a few counters. */
-		if (config_stats) {
-			mutex_prof_data_t *data = &mutex->prof_data;
-			data->n_lock_ops++;
-			if (data->prev_owner != tsdn) {
-				data->prev_owner = tsdn;
-				data->n_owner_switches++;
-			}
-		}
+		mutex_owner_stats_update(tsdn, mutex);
 	}
 	witness_lock(tsdn, &mutex->witness);
 }
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 649a689f..50590957 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -292,10 +292,13 @@ malloc_mutex_assert_owner
 malloc_mutex_boot
 malloc_mutex_init
 malloc_mutex_lock
+malloc_mutex_lock_final
 malloc_mutex_lock_slow
 malloc_mutex_postfork_child
 malloc_mutex_postfork_parent
 malloc_mutex_prefork
+malloc_mutex_trylock
+malloc_mutex_trylock_final
 malloc_mutex_unlock
 malloc_printf
 malloc_slow
@@ -309,6 +312,7 @@ malloc_tsd_malloc
 malloc_vcprintf
 malloc_vsnprintf
 malloc_write
+mutex_owner_stats_update
 narenas_auto
 narenas_total_get
 ncpus
diff --git a/src/mutex.c b/src/mutex.c
index 3bb5ce1d..3eec970f 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -81,7 +81,7 @@ malloc_mutex_lock_slow(malloc_mutex_t *mutex) {
 	int cnt = 0, max_cnt = MALLOC_MUTEX_MAX_SPIN;
 	do {
 		CPU_SPINWAIT;
-		if (!malloc_mutex_trylock(mutex)) {
+		if (!malloc_mutex_trylock_final(mutex)) {
 			data->n_spin_acquired++;
 			return;
 		}
@@ -100,7 +100,7 @@ label_spin_done:
 	uint32_t n_thds = atomic_fetch_add_u32(&data->n_waiting_thds, 1,
 	    ATOMIC_RELAXED) + 1;
 	/* One last try as above two calls may take quite some cycles. */
-	if (!malloc_mutex_trylock(mutex)) {
+	if (!malloc_mutex_trylock_final(mutex)) {
 		atomic_fetch_sub_u32(&data->n_waiting_thds, 1, ATOMIC_RELAXED);
 		data->n_spin_acquired++;
 		return;

From cf6035e1ee60dd9245b119aadb2ccec592dde27d Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 21 Apr 2017 15:23:14 -0700
Subject: [PATCH 0835/2608] Use trylock in arena_decay_impl().

If another thread is working on decay, we don't have to wait for the mutex.
---
 src/arena.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 77f72828..1288b7ba 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -47,7 +47,7 @@ const arena_bin_info_t	arena_bin_info[NBINS] = {
 
 static void arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena,
     arena_decay_t *decay, extents_t *extents, bool all, size_t npages_limit);
-static void arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool all);
+static bool arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool all);
 static void arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
     arena_bin_t *bin);
 static void arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
@@ -965,33 +965,41 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	decay->purging = false;
 }
 
-static void
+static bool
 arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
     extents_t *extents, bool all) {
-	malloc_mutex_lock(tsdn, &decay->mtx);
 	if (all) {
+		malloc_mutex_lock(tsdn, &decay->mtx);
 		arena_decay_to_limit(tsdn, arena, decay, extents, all, 0);
 	} else {
+		if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
+			/* No need to wait if another thread is in progress. */
+			return true;
+		}
 		arena_maybe_decay(tsdn, arena, decay, extents);
 	}
 	malloc_mutex_unlock(tsdn, &decay->mtx);
+
+	return false;
 }
 
-static void
+static bool
 arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool all) {
-	arena_decay_impl(tsdn, arena, &arena->decay_dirty,
+	return arena_decay_impl(tsdn, arena, &arena->decay_dirty,
 	    &arena->extents_dirty, all);
 }
 
-static void
+static bool
 arena_decay_muzzy(tsdn_t *tsdn, arena_t *arena, bool all) {
-	arena_decay_impl(tsdn, arena, &arena->decay_muzzy,
+	return arena_decay_impl(tsdn, arena, &arena->decay_muzzy,
 	    &arena->extents_muzzy, all);
 }
 
 void
 arena_decay(tsdn_t *tsdn, arena_t *arena, bool all) {
-	arena_decay_dirty(tsdn, arena, all);
+	if (arena_decay_dirty(tsdn, arena, all)) {
+		return;
+	}
 	arena_decay_muzzy(tsdn, arena, all);
 }
 

From 0f63396b2317dd1a456008819fbedecc372abf13 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 24 Apr 2017 11:47:10 -0700
Subject: [PATCH 0836/2608] Remove --disable-cc-silence.

The explicit compiler warning suppression controlled by this option is
universally desirable, so remove the ability to disable suppression.

This partially resolves #580.
---
 INSTALL                                       |  5 ----
 configure.ac                                  | 27 +++----------------
 .../internal/jemalloc_internal_defs.h.in      |  3 ---
 .../internal/jemalloc_internal_macros.h       |  6 +----
 include/jemalloc/internal/util.h              |  6 +----
 5 files changed, 6 insertions(+), 41 deletions(-)

diff --git a/INSTALL b/INSTALL
index d4ddba2b..c14dd377 100644
--- a/INSTALL
+++ b/INSTALL
@@ -96,11 +96,6 @@ any of the following arguments (not a definitive list) to 'configure':
 
       --with-malloc-conf=decay_time:30
 
---disable-cc-silence
-    Disable code that silences non-useful compiler warnings.  This is mainly
-    useful during development when auditing the set of warnings that are being
-    silenced.
-
 --enable-debug
     Enable assertions and validation code.  This incurs a substantial
     performance hit, but is very useful during application development.
diff --git a/configure.ac b/configure.ac
index a950a23a..2baead74 100644
--- a/configure.ac
+++ b/configure.ac
@@ -258,12 +258,10 @@ if test "x$je_cv_cray" = "xyes" ; then
     JE_CFLAGS_ADD([-hipa2])
     JE_CFLAGS_ADD([-hnognu])
   fi
-  if test "x$enable_cc_silence" != "xno" ; then
-    dnl ignore unreachable code warning
-    JE_CFLAGS_ADD([-hnomessage=128])
-    dnl ignore redefinition of "malloc", "free", etc warning
-    JE_CFLAGS_ADD([-hnomessage=1357])
-  fi
+  dnl ignore unreachable code warning
+  JE_CFLAGS_ADD([-hnomessage=128])
+  dnl ignore redefinition of "malloc", "free", etc warning
+  JE_CFLAGS_ADD([-hnomessage=1357])
 fi
 AC_SUBST([CONFIGURE_CFLAGS])
 AC_SUBST([SPECIFIED_CFLAGS])
@@ -942,22 +940,6 @@ cfghdrs_tup="include/jemalloc/jemalloc_defs.h:include/jemalloc/jemalloc_defs.h.i
 cfghdrs_tup="${cfghdrs_tup} include/jemalloc/internal/jemalloc_internal_defs.h:include/jemalloc/internal/jemalloc_internal_defs.h.in"
 cfghdrs_tup="${cfghdrs_tup} test/include/test/jemalloc_test_defs.h:test/include/test/jemalloc_test_defs.h.in"
 
-dnl Silence irrelevant compiler warnings by default.
-AC_ARG_ENABLE([cc-silence],
-  [AS_HELP_STRING([--disable-cc-silence],
-                  [Do not silence irrelevant compiler warnings])],
-[if test "x$enable_cc_silence" = "xno" ; then
-  enable_cc_silence="0"
-else
-  enable_cc_silence="1"
-fi
-],
-[enable_cc_silence="1"]
-)
-if test "x$enable_cc_silence" = "x1" ; then
-  AC_DEFINE([JEMALLOC_CC_SILENCE], [ ])
-fi
-
 dnl Do not compile with debugging by default.
 AC_ARG_ENABLE([debug],
   [AS_HELP_STRING([--enable-debug],
@@ -2110,7 +2092,6 @@ AC_MSG_RESULT([                   : ${JEMALLOC_PRIVATE_NAMESPACE}])
 AC_MSG_RESULT([install_suffix     : ${install_suffix}])
 AC_MSG_RESULT([malloc_conf        : ${config_malloc_conf}])
 AC_MSG_RESULT([autogen            : ${enable_autogen}])
-AC_MSG_RESULT([cc-silence         : ${enable_cc_silence}])
 AC_MSG_RESULT([debug              : ${enable_debug}])
 AC_MSG_RESULT([code-coverage      : ${enable_code_coverage}])
 AC_MSG_RESULT([stats              : ${enable_stats}])
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 7e83e7b7..dbcbf26a 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -127,9 +127,6 @@
 /* Non-empty if the tls_model attribute is supported. */
 #undef JEMALLOC_TLS_MODEL
 
-/* JEMALLOC_CC_SILENCE enables code that silences unuseful compiler warnings. */
-#undef JEMALLOC_CC_SILENCE
-
 /* JEMALLOC_CODE_COVERAGE enables test code coverage analysis. */
 #undef JEMALLOC_CODE_COVERAGE
 
diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h
index 1b2802a8..317b2ab0 100644
--- a/include/jemalloc/internal/jemalloc_internal_macros.h
+++ b/include/jemalloc/internal/jemalloc_internal_macros.h
@@ -10,11 +10,7 @@
 #  define inline _inline
 #endif
 
-#ifdef JEMALLOC_CC_SILENCE
-#  define UNUSED JEMALLOC_ATTR(unused)
-#else
-#  define UNUSED
-#endif
+#define UNUSED JEMALLOC_ATTR(unused)
 
 #define ZU(z)	((size_t)z)
 #define ZD(z)	((ssize_t)z)
diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index 88662e89..304cb545 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -26,11 +26,7 @@
  * wherever the compiler fails to recognize that the variable is never used
  * uninitialized.
  */
-#ifdef JEMALLOC_CC_SILENCE
-#  define JEMALLOC_CC_SILENCE_INIT(v) = v
-#else
-#  define JEMALLOC_CC_SILENCE_INIT(v)
-#endif
+#define JEMALLOC_CC_SILENCE_INIT(v) = v
 
 #ifdef __GNUC__
 #  define likely(x)   __builtin_expect(!!(x), 1)

From e2cc6280ed96e2e6a2754d4e7187296e377d9548 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 24 Apr 2017 15:22:03 -0700
Subject: [PATCH 0837/2608] Remove --enable-code-coverage.

This option hasn't been particularly useful since the original pre-3.0.0
push to broaden test coverage.

This partially resolves #580.
---
 INSTALL                                       | 13 -----
 Makefile.in                                   | 50 -------------------
 configure.ac                                  | 26 ----------
 coverage.sh                                   | 16 ------
 .../internal/jemalloc_internal_defs.h.in      |  3 --
 .../internal/jemalloc_internal_macros.h       |  2 +-
 6 files changed, 1 insertion(+), 109 deletions(-)
 delete mode 100755 coverage.sh

diff --git a/INSTALL b/INSTALL
index c14dd377..5d0cd21e 100644
--- a/INSTALL
+++ b/INSTALL
@@ -100,19 +100,6 @@ any of the following arguments (not a definitive list) to 'configure':
     Enable assertions and validation code.  This incurs a substantial
     performance hit, but is very useful during application development.
 
---enable-code-coverage
-    Enable code coverage support, for use during jemalloc test development.
-    Additional testing targets are available if this option is enabled:
-
-      coverage
-      coverage_unit
-      coverage_integration
-      coverage_stress
-
-    These targets do not clear code coverage results from previous runs, and
-    there are interactions between the various coverage targets, so it is
-    usually advisable to run 'make clean' between repeated code coverage runs.
-
 --disable-stats
     Disable statistics gathering functionality.  See the "opt.stats_print"
     option documentation for usage details.
diff --git a/Makefile.in b/Makefile.in
index 26c811c8..418e92b4 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -55,7 +55,6 @@ cfghdrs_out := @cfghdrs_out@
 cfgoutputs_in := $(addprefix $(srcroot),@cfgoutputs_in@)
 cfgoutputs_out := @cfgoutputs_out@
 enable_autogen := @enable_autogen@
-enable_code_coverage := @enable_code_coverage@
 enable_prof := @enable_prof@
 enable_zone_allocator := @enable_zone_allocator@
 MALLOC_CONF := @JEMALLOC_CPREFIX@MALLOC_CONF
@@ -452,38 +451,6 @@ stress: tests_stress stress_dir
 	$(SHELL) $(objroot)test/test.sh $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%)
 check: check_unit check_integration check_integration_decay check_integration_prof
 
-ifeq ($(enable_code_coverage), 1)
-coverage_unit: check_unit
-	$(SHELL) $(srcroot)coverage.sh $(srcroot)src jet $(C_JET_OBJS)
-	$(SHELL) $(srcroot)coverage.sh $(srcroot)test/src unit $(C_TESTLIB_UNIT_OBJS)
-	$(SHELL) $(srcroot)coverage.sh $(srcroot)test/unit unit $(TESTS_UNIT_OBJS)
-
-coverage_integration: check_integration
-	$(SHELL) $(srcroot)coverage.sh $(srcroot)src pic $(C_PIC_OBJS)
-	$(SHELL) $(srcroot)coverage.sh $(srcroot)src integration $(C_UTIL_INTEGRATION_OBJS)
-	$(SHELL) $(srcroot)coverage.sh $(srcroot)test/src integration $(C_TESTLIB_INTEGRATION_OBJS)
-	$(SHELL) $(srcroot)coverage.sh $(srcroot)test/integration integration $(TESTS_INTEGRATION_OBJS)
-	$(SHELL) $(srcroot)coverage.sh $(srcroot)test/integration/cpp integration $(TESTS_INTEGRATION_CPP_OBJS)
-
-coverage_stress: stress
-	$(SHELL) $(srcroot)coverage.sh $(srcroot)src pic $(C_PIC_OBJS)
-	$(SHELL) $(srcroot)coverage.sh $(srcroot)src jet $(C_JET_OBJS)
-	$(SHELL) $(srcroot)coverage.sh $(srcroot)test/src stress $(C_TESTLIB_STRESS_OBJS)
-	$(SHELL) $(srcroot)coverage.sh $(srcroot)test/stress stress $(TESTS_STRESS_OBJS)
-
-coverage: check
-	$(SHELL) $(srcroot)coverage.sh $(srcroot)src pic $(C_PIC_OBJS)
-	$(SHELL) $(srcroot)coverage.sh $(srcroot)src jet $(C_JET_OBJS)
-	$(SHELL) $(srcroot)coverage.sh $(srcroot)src integration $(C_UTIL_INTEGRATION_OBJS)
-	$(SHELL) $(srcroot)coverage.sh $(srcroot)test/src unit $(C_TESTLIB_UNIT_OBJS)
-	$(SHELL) $(srcroot)coverage.sh $(srcroot)test/src integration $(C_TESTLIB_INTEGRATION_OBJS)
-	$(SHELL) $(srcroot)coverage.sh $(srcroot)test/src stress $(C_TESTLIB_STRESS_OBJS)
-	$(SHELL) $(srcroot)coverage.sh $(srcroot)test/unit unit $(TESTS_UNIT_OBJS) $(TESTS_UNIT_AUX_OBJS)
-	$(SHELL) $(srcroot)coverage.sh $(srcroot)test/integration integration $(TESTS_INTEGRATION_OBJS)
-	$(SHELL) $(srcroot)coverage.sh $(srcroot)test/integration/cpp integration $(TESTS_INTEGRATION_CPP_OBJS)
-	$(SHELL) $(srcroot)coverage.sh $(srcroot)test/stress integration $(TESTS_STRESS_OBJS)
-endif
-
 clean:
 	rm -f $(C_OBJS)
 	rm -f $(CPP_OBJS)
@@ -492,37 +459,20 @@ clean:
 	rm -f $(C_JET_OBJS)
 	rm -f $(C_TESTLIB_OBJS)
 	rm -f $(C_OBJS:%.$(O)=%.d)
-	rm -f $(C_OBJS:%.$(O)=%.gcda)
-	rm -f $(C_OBJS:%.$(O)=%.gcno)
 	rm -f $(CPP_OBJS:%.$(O)=%.d)
-	rm -f $(CPP_OBJS:%.$(O)=%.gcda)
-	rm -f $(CPP_OBJS:%.$(O)=%.gcno)
 	rm -f $(C_PIC_OBJS:%.$(O)=%.d)
-	rm -f $(C_PIC_OBJS:%.$(O)=%.gcda)
-	rm -f $(C_PIC_OBJS:%.$(O)=%.gcno)
 	rm -f $(CPP_PIC_OBJS:%.$(O)=%.d)
-	rm -f $(CPP_PIC_OBJS:%.$(O)=%.gcda)
-	rm -f $(CPP_PIC_OBJS:%.$(O)=%.gcno)
 	rm -f $(C_JET_OBJS:%.$(O)=%.d)
-	rm -f $(C_JET_OBJS:%.$(O)=%.gcda)
-	rm -f $(C_JET_OBJS:%.$(O)=%.gcno)
 	rm -f $(C_TESTLIB_OBJS:%.$(O)=%.d)
-	rm -f $(C_TESTLIB_OBJS:%.$(O)=%.gcda)
-	rm -f $(C_TESTLIB_OBJS:%.$(O)=%.gcno)
 	rm -f $(TESTS_OBJS:%.$(O)=%$(EXE))
 	rm -f $(TESTS_OBJS)
 	rm -f $(TESTS_OBJS:%.$(O)=%.d)
-	rm -f $(TESTS_OBJS:%.$(O)=%.gcda)
-	rm -f $(TESTS_OBJS:%.$(O)=%.gcno)
 	rm -f $(TESTS_OBJS:%.$(O)=%.out)
 	rm -f $(TESTS_CPP_OBJS:%.$(O)=%$(EXE))
 	rm -f $(TESTS_CPP_OBJS)
 	rm -f $(TESTS_CPP_OBJS:%.$(O)=%.d)
-	rm -f $(TESTS_CPP_OBJS:%.$(O)=%.gcda)
-	rm -f $(TESTS_CPP_OBJS:%.$(O)=%.gcno)
 	rm -f $(TESTS_CPP_OBJS:%.$(O)=%.out)
 	rm -f $(DSOS) $(STATIC_LIBS)
-	rm -f $(objroot)*.gcov.*
 
 distclean: clean
 	rm -f $(objroot)bin/jemalloc-config
diff --git a/configure.ac b/configure.ac
index 2baead74..04952d89 100644
--- a/configure.ac
+++ b/configure.ac
@@ -783,31 +783,6 @@ AC_CHECK_FUNC([valloc],
 	      [AC_DEFINE([JEMALLOC_OVERRIDE_VALLOC], [ ])
 	       public_syms="${public_syms} valloc"])
 
-dnl Do not compute test code coverage by default.
-GCOV_FLAGS=
-AC_ARG_ENABLE([code-coverage],
-  [AS_HELP_STRING([--enable-code-coverage],
-   [Enable code coverage])],
-[if test "x$enable_code_coverage" = "xno" ; then
-  enable_code_coverage="0"
-else
-  enable_code_coverage="1"
-fi
-],
-[enable_code_coverage="0"]
-)
-if test "x$enable_code_coverage" = "x1" ; then
-  deoptimize="no"
-  echo "$CFLAGS $EXTRA_CFLAGS" | grep '\-O' >/dev/null || deoptimize="yes"
-  if test "x${deoptimize}" = "xyes" ; then
-    JE_CFLAGS_ADD([-O0])
-  fi
-  JE_CFLAGS_ADD([-fprofile-arcs -ftest-coverage])
-  EXTRA_LDFLAGS="$EXTRA_LDFLAGS -fprofile-arcs -ftest-coverage"
-  AC_DEFINE([JEMALLOC_CODE_COVERAGE], [ ])
-fi
-AC_SUBST([enable_code_coverage])
-
 dnl Perform no name mangling by default.
 AC_ARG_WITH([mangling],
   [AS_HELP_STRING([--with-mangling=<map>], [Mangle symbols in <map>])],
@@ -2093,7 +2068,6 @@ AC_MSG_RESULT([install_suffix     : ${install_suffix}])
 AC_MSG_RESULT([malloc_conf        : ${config_malloc_conf}])
 AC_MSG_RESULT([autogen            : ${enable_autogen}])
 AC_MSG_RESULT([debug              : ${enable_debug}])
-AC_MSG_RESULT([code-coverage      : ${enable_code_coverage}])
 AC_MSG_RESULT([stats              : ${enable_stats}])
 AC_MSG_RESULT([prof               : ${enable_prof}])
 AC_MSG_RESULT([prof-libunwind     : ${enable_prof_libunwind}])
diff --git a/coverage.sh b/coverage.sh
deleted file mode 100755
index 6d1362a8..00000000
--- a/coverage.sh
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/bin/sh
-
-set -e
-
-objdir=$1
-suffix=$2
-shift 2
-objs=$@
-
-gcov -b -p -f -o "${objdir}" ${objs}
-
-# Move gcov outputs so that subsequent gcov invocations won't clobber results
-# for the same sources with different compilation flags.
-for f in `find . -maxdepth 1 -type f -name '*.gcov'` ; do
-  mv "${f}" "${f}.${suffix}"
-done
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index dbcbf26a..c22e5302 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -127,9 +127,6 @@
 /* Non-empty if the tls_model attribute is supported. */
 #undef JEMALLOC_TLS_MODEL
 
-/* JEMALLOC_CODE_COVERAGE enables test code coverage analysis. */
-#undef JEMALLOC_CODE_COVERAGE
-
 /*
  * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables
  * inline functions.
diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h
index 317b2ab0..a1712cf5 100644
--- a/include/jemalloc/internal/jemalloc_internal_macros.h
+++ b/include/jemalloc/internal/jemalloc_internal_macros.h
@@ -1,7 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_MACROS_H
 #define JEMALLOC_INTERNAL_MACROS_H
 
-#if defined(JEMALLOC_DEBUG) || defined(JEMALLOC_CODE_COVERAGE)
+#ifdef JEMALLOC_DEBUG
 #  define JEMALLOC_ALWAYS_INLINE static inline
 #else
 #  define JEMALLOC_ALWAYS_INLINE JEMALLOC_ATTR(always_inline) static inline

From c67c3e4a63277718b9d137a38663c6ae324c99aa Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 24 Apr 2017 17:28:55 -0700
Subject: [PATCH 0838/2608] Replace --disable-munmap with opt.munmap.

Control use of munmap(2) via a run-time option rather than a
compile-time option (with the same per platform default).  The old
behavior of --disable-munmap can be achieved with
--with-malloc-conf=munmap:false.

This partially resolves #580.
---
 INSTALL                                       |  9 -----
 configure.ac                                  | 16 +-------
 doc/jemalloc.xml.in                           | 38 ++++++++++++-------
 include/jemalloc/internal/arena_structs_b.h   |  8 ++--
 .../jemalloc/internal/extent_mmap_externs.h   |  2 +
 .../internal/jemalloc_internal_defs.h.in      |  7 ++--
 .../jemalloc/internal/jemalloc_preamble.h.in  |  7 ----
 src/arena.c                                   |  4 +-
 src/ctl.c                                     |  6 +--
 src/extent.c                                  |  2 +-
 src/extent_mmap.c                             | 15 +++++++-
 src/jemalloc.c                                | 38 ++++++++-----------
 src/large.c                                   |  2 +-
 src/stats.c                                   |  2 +-
 test/unit/arena_reset.c                       |  4 +-
 test/unit/mallctl.c                           |  2 +-
 16 files changed, 77 insertions(+), 85 deletions(-)

diff --git a/INSTALL b/INSTALL
index 5d0cd21e..abf3290b 100644
--- a/INSTALL
+++ b/INSTALL
@@ -128,15 +128,6 @@ any of the following arguments (not a definitive list) to 'configure':
     Statically link against the specified libunwind.a rather than dynamically
     linking with -lunwind.
 
---disable-munmap
-    Disable virtual memory deallocation via munmap(2); instead keep track of
-    the virtual memory for later use.  munmap() is disabled by default (i.e.
-    --disable-munmap is implied) on [64-bit] Linux, which has a quirk in its
-    virtual memory allocation algorithm that causes semi-permanent VM map holes
-    under normal jemalloc operation.  Although munmap() causes issues on 32-bit
-    Linux as well, it is not disabled by default due to the practical
-    possibility of address space exhaustion.
-
 --disable-fill
     Disable support for junk/zero filling of memory.  See the "opt.junk" and
     "opt.zero" option documentation for usage details.
diff --git a/configure.ac b/configure.ac
index 04952d89..6447c51a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1086,21 +1086,10 @@ if test "x${maps_coalesce}" = "x1" ; then
   AC_DEFINE([JEMALLOC_MAPS_COALESCE], [ ])
 fi
 
-dnl Enable VM deallocation via munmap() by default.
-AC_ARG_ENABLE([munmap],
-  [AS_HELP_STRING([--disable-munmap], [Disable VM deallocation via munmap(2)])],
-[if test "x$enable_munmap" = "xno" ; then
-  enable_munmap="0"
-else
-  enable_munmap="1"
-fi
-],
-[enable_munmap="${default_munmap}"]
-)
-if test "x$enable_munmap" = "x1" ; then
+dnl Indicate whether to use munmap() by default.
+if test "x$default_munmap" = "x1" ; then
   AC_DEFINE([JEMALLOC_MUNMAP], [ ])
 fi
-AC_SUBST([enable_munmap])
 
 dnl Enable allocation from DSS if supported by the OS.
 have_dss="1"
@@ -2076,7 +2065,6 @@ AC_MSG_RESULT([prof-gcc           : ${enable_prof_gcc}])
 AC_MSG_RESULT([fill               : ${enable_fill}])
 AC_MSG_RESULT([utrace             : ${enable_utrace}])
 AC_MSG_RESULT([xmalloc            : ${enable_xmalloc}])
-AC_MSG_RESULT([munmap             : ${enable_munmap}])
 AC_MSG_RESULT([lazy_lock          : ${enable_lazy_lock}])
 AC_MSG_RESULT([cache-oblivious    : ${enable_cache_oblivious}])
 AC_MSG_RESULT([cxx                : ${enable_cxx}])
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 7dace367..66d8e5df 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -788,16 +788,6 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         during build configuration.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="config.munmap">
-        <term>
-          <mallctl>config.munmap</mallctl>
-          (<type>bool</type>)
-          <literal>r-</literal>
-        </term>
-        <listitem><para><option>--enable-munmap</option> was specified during
-        build configuration.</para></listitem>
-      </varlistentry>
-
       <varlistentry id="config.prof">
         <term>
           <mallctl>config.prof</mallctl>
@@ -873,6 +863,28 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         </para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.munmap">
+        <term>
+          <mallctl>opt.munmap</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>If true, call
+        <citerefentry><refentrytitle>munmap</refentrytitle>
+        <manvolnum>2</manvolnum></citerefentry> or equivalent rather than
+        retaining unused virtual memory (see <link
+        linkend="stats.retained">stats.retained</link> for related details).
+        This option is enabled by default unless it is known to trigger
+        platform-specific performance problems, e.g. for [64-bit] Linux, which
+        has a quirk in its virtual memory allocation algorithm that causes
+        semi-permanent VM map holes under normal jemalloc operation.  Although
+        <citerefentry><refentrytitle>munmap</refentrytitle>
+        <manvolnum>2</manvolnum></citerefentry> causes issues on 32-bit Linux as
+        well, it is not disabled by default due to the practical possibility of
+        address space exhaustion.
+        </para></listitem>
+      </varlistentry>
+
       <varlistentry id="opt.dss">
         <term>
           <mallctl>opt.dss</mallctl>
@@ -2114,9 +2126,9 @@ struct extent_hooks_s {
         <listitem><para>Total number of bytes in virtual memory mappings that
         were retained rather than being returned to the operating system via
         e.g. <citerefentry><refentrytitle>munmap</refentrytitle>
-        <manvolnum>2</manvolnum></citerefentry>.  Retained virtual memory is
-        typically untouched, decommitted, or purged, so it has no strongly
-        associated physical memory (see <link
+        <manvolnum>2</manvolnum></citerefentry> or similar.  Retained virtual
+        memory is typically untouched, decommitted, or purged, so it has no
+        strongly associated physical memory (see <link
         linkend="arena.i.extent_hooks">extent hooks</link> for details).
         Retained memory is excluded from mapped memory statistics, e.g. <link
         linkend="stats.mapped"><mallctl>stats.mapped</mallctl></link>.
diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index ecc59d3b..6b83e526 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -230,10 +230,10 @@ struct arena_s {
 
 	/*
 	 * Next extent size class in a growing series to use when satisfying a
-	 * request via the extent hooks (only if !config_munmap).  This limits
-	 * the number of disjoint virtual memory ranges so that extent merging
-	 * can be effective even if multiple arenas' extent allocation requests
-	 * are highly interleaved.
+	 * request via the extent hooks (only if !opt_munmap).  This limits the
+	 * number of disjoint virtual memory ranges so that extent merging can
+	 * be effective even if multiple arenas' extent allocation requests are
+	 * highly interleaved.
 	 *
 	 * Synchronization: atomic.
 	 */
diff --git a/include/jemalloc/internal/extent_mmap_externs.h b/include/jemalloc/internal/extent_mmap_externs.h
index 5917b53d..e5bc8110 100644
--- a/include/jemalloc/internal/extent_mmap_externs.h
+++ b/include/jemalloc/internal/extent_mmap_externs.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_EXTENT_MMAP_EXTERNS_H
 #define JEMALLOC_INTERNAL_EXTENT_MMAP_EXTERNS_H
 
+extern bool	opt_munmap;
+
 void	*extent_alloc_mmap(void *new_addr, size_t size, size_t alignment,
     bool *zero, bool *commit);
 bool	extent_dalloc_mmap(void *addr, size_t size);
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index c22e5302..8f7c42b8 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -192,9 +192,10 @@
 #undef JEMALLOC_MAPS_COALESCE
 
 /*
- * If defined, use munmap() to unmap freed extents, rather than storing them for
- * later reuse.  This is disabled by default on Linux because common sequences
- * of mmap()/munmap() calls will cause virtual memory map holes.
+ * If defined, use munmap() to unmap freed extents by default, rather than
+ * storing them for later reuse.  This is disabled on 64-bit Linux because
+ * common sequences of mmap()/munmap() calls will cause virtual memory map
+ * holes.
  */
 #undef JEMALLOC_MUNMAP
 
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index 79827fc4..bc0ca641 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -98,13 +98,6 @@ static const bool maps_coalesce =
     false
 #endif
     ;
-static const bool config_munmap =
-#ifdef JEMALLOC_MUNMAP
-    true
-#else
-    false
-#endif
-    ;
 static const bool config_stats =
 #ifdef JEMALLOC_STATS
     true
diff --git a/src/arena.c b/src/arena.c
index 1288b7ba..3b94a20d 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1143,7 +1143,7 @@ arena_destroy_retained(tsdn_t *tsdn, arena_t *arena) {
 	 * opportunity to unmap all retained memory without having to keep its
 	 * own metadata structures, but if deallocation fails, that is the
 	 * application's decision/problem.  In practice, retained extents are
-	 * leaked here if !config_munmap unless the application provided custom
+	 * leaked here if !opt_munmap unless the application provided custom
 	 * extent hooks, so best practice is to either enable munmap (and avoid
 	 * dss for arenas to be destroyed), or provide custom extent hooks that
 	 * either unmap retained extents or track them for later use.
@@ -1947,7 +1947,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		goto label_error;
 	}
 
-	if (!config_munmap) {
+	if (!opt_munmap) {
 		atomic_store_u(&arena->extent_grow_next, psz2ind(HUGEPAGE),
 		    ATOMIC_RELAXED);
 	}
diff --git a/src/ctl.c b/src/ctl.c
index c054ded6..8c2e7bc2 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -66,7 +66,6 @@ CTL_PROTO(config_debug)
 CTL_PROTO(config_fill)
 CTL_PROTO(config_lazy_lock)
 CTL_PROTO(config_malloc_conf)
-CTL_PROTO(config_munmap)
 CTL_PROTO(config_prof)
 CTL_PROTO(config_prof_libgcc)
 CTL_PROTO(config_prof_libunwind)
@@ -74,6 +73,7 @@ CTL_PROTO(config_stats)
 CTL_PROTO(config_utrace)
 CTL_PROTO(config_xmalloc)
 CTL_PROTO(opt_abort)
+CTL_PROTO(opt_munmap)
 CTL_PROTO(opt_dss)
 CTL_PROTO(opt_narenas)
 CTL_PROTO(opt_percpu_arena)
@@ -249,7 +249,6 @@ static const ctl_named_node_t	config_node[] = {
 	{NAME("fill"),		CTL(config_fill)},
 	{NAME("lazy_lock"),	CTL(config_lazy_lock)},
 	{NAME("malloc_conf"),	CTL(config_malloc_conf)},
-	{NAME("munmap"),	CTL(config_munmap)},
 	{NAME("prof"),		CTL(config_prof)},
 	{NAME("prof_libgcc"),	CTL(config_prof_libgcc)},
 	{NAME("prof_libunwind"), CTL(config_prof_libunwind)},
@@ -260,6 +259,7 @@ static const ctl_named_node_t	config_node[] = {
 
 static const ctl_named_node_t opt_node[] = {
 	{NAME("abort"),		CTL(opt_abort)},
+	{NAME("munmap"),	CTL(opt_munmap)},
 	{NAME("dss"),		CTL(opt_dss)},
 	{NAME("narenas"),	CTL(opt_narenas)},
 	{NAME("percpu_arena"),	CTL(opt_percpu_arena)},
@@ -1444,7 +1444,6 @@ CTL_RO_CONFIG_GEN(config_debug, bool)
 CTL_RO_CONFIG_GEN(config_fill, bool)
 CTL_RO_CONFIG_GEN(config_lazy_lock, bool)
 CTL_RO_CONFIG_GEN(config_malloc_conf, const char *)
-CTL_RO_CONFIG_GEN(config_munmap, bool)
 CTL_RO_CONFIG_GEN(config_prof, bool)
 CTL_RO_CONFIG_GEN(config_prof_libgcc, bool)
 CTL_RO_CONFIG_GEN(config_prof_libunwind, bool)
@@ -1455,6 +1454,7 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool)
 /******************************************************************************/
 
 CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
+CTL_RO_NL_GEN(opt_munmap, opt_munmap, bool)
 CTL_RO_NL_GEN(opt_dss, opt_dss, const char *)
 CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned)
 CTL_RO_NL_GEN(opt_percpu_arena, opt_percpu_arena, const char *)
diff --git a/src/extent.c b/src/extent.c
index d08ccdb3..1ddaf240 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1123,7 +1123,7 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
 			extent_gdump_add(tsdn, extent);
 		}
 	}
-	if (!config_munmap && extent == NULL) {
+	if (!opt_munmap && extent == NULL) {
 		extent = extent_grow_retained(tsdn, arena, r_extent_hooks,
 		    new_addr, size, pad, alignment, slab, szind, zero, commit);
 	}
diff --git a/src/extent_mmap.c b/src/extent_mmap.c
index be099373..5fe82ee5 100644
--- a/src/extent_mmap.c
+++ b/src/extent_mmap.c
@@ -4,6 +4,17 @@
 
 #include "jemalloc/internal/assert.h"
 
+/******************************************************************************/
+/* Data. */
+
+bool	opt_munmap =
+#ifdef JEMALLOC_MUNMAP
+    true
+#else
+    false
+#endif
+    ;
+
 /******************************************************************************/
 
 void *
@@ -23,8 +34,8 @@ extent_alloc_mmap(void *new_addr, size_t size, size_t alignment, bool *zero,
 
 bool
 extent_dalloc_mmap(void *addr, size_t size) {
-	if (config_munmap) {
+	if (opt_munmap) {
 		pages_unmap(addr, size);
 	}
-	return !config_munmap;
+	return !opt_munmap;
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 602cf677..5e1f0a72 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -947,7 +947,7 @@ malloc_conf_init(void) {
 	(sizeof(n)-1 == klen && strncmp(n, k, klen) == 0)
 #define CONF_MATCH_VALUE(n)						\
 	(sizeof(n)-1 == vlen && strncmp(n, v, vlen) == 0)
-#define CONF_HANDLE_BOOL(o, n, cont)					\
+#define CONF_HANDLE_BOOL(o, n)						\
 			if (CONF_MATCH(n)) {				\
 				if (CONF_MATCH_VALUE("true")) {		\
 					o = true;			\
@@ -958,9 +958,7 @@ malloc_conf_init(void) {
 					    "Invalid conf value",	\
 					    k, klen, v, vlen);		\
 				}					\
-				if (cont) {				\
-					continue;			\
-				}					\
+				continue;				\
 			}
 #define CONF_MIN_no(um, min)	false
 #define CONF_MIN_yes(um, min)	((um) < (min))
@@ -1043,7 +1041,8 @@ malloc_conf_init(void) {
 				continue;				\
 			}
 
-			CONF_HANDLE_BOOL(opt_abort, "abort", true)
+			CONF_HANDLE_BOOL(opt_abort, "abort")
+			CONF_HANDLE_BOOL(opt_munmap, "munmap")
 			if (strncmp("dss", k, klen) == 0) {
 				int i;
 				bool match = false;
@@ -1074,7 +1073,7 @@ malloc_conf_init(void) {
 			    "dirty_decay_time", -1, NSTIME_SEC_MAX);
 			CONF_HANDLE_SSIZE_T(opt_muzzy_decay_time,
 			    "muzzy_decay_time", -1, NSTIME_SEC_MAX);
-			CONF_HANDLE_BOOL(opt_stats_print, "stats_print", true)
+			CONF_HANDLE_BOOL(opt_stats_print, "stats_print")
 			if (config_fill) {
 				if (CONF_MATCH("junk")) {
 					if (CONF_MATCH_VALUE("true")) {
@@ -1100,15 +1099,15 @@ malloc_conf_init(void) {
 					}
 					continue;
 				}
-				CONF_HANDLE_BOOL(opt_zero, "zero", true)
+				CONF_HANDLE_BOOL(opt_zero, "zero")
 			}
 			if (config_utrace) {
-				CONF_HANDLE_BOOL(opt_utrace, "utrace", true)
+				CONF_HANDLE_BOOL(opt_utrace, "utrace")
 			}
 			if (config_xmalloc) {
-				CONF_HANDLE_BOOL(opt_xmalloc, "xmalloc", true)
+				CONF_HANDLE_BOOL(opt_xmalloc, "xmalloc")
 			}
-			CONF_HANDLE_BOOL(opt_tcache, "tcache", true)
+			CONF_HANDLE_BOOL(opt_tcache, "tcache")
 			CONF_HANDLE_SSIZE_T(opt_lg_tcache_max, "lg_tcache_max",
 			    -1, (sizeof(size_t) << 3) - 1)
 			if (strncmp("percpu_arena", k, klen) == 0) {
@@ -1136,27 +1135,22 @@ malloc_conf_init(void) {
 				continue;
 			}
 			if (config_prof) {
-				CONF_HANDLE_BOOL(opt_prof, "prof", true)
+				CONF_HANDLE_BOOL(opt_prof, "prof")
 				CONF_HANDLE_CHAR_P(opt_prof_prefix,
 				    "prof_prefix", "jeprof")
-				CONF_HANDLE_BOOL(opt_prof_active, "prof_active",
-				    true)
+				CONF_HANDLE_BOOL(opt_prof_active, "prof_active")
 				CONF_HANDLE_BOOL(opt_prof_thread_active_init,
-				    "prof_thread_active_init", true)
+				    "prof_thread_active_init")
 				CONF_HANDLE_SIZE_T(opt_lg_prof_sample,
 				    "lg_prof_sample", 0, (sizeof(uint64_t) << 3)
 				    - 1, no, yes, true)
-				CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum",
-				    true)
+				CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum")
 				CONF_HANDLE_SSIZE_T(opt_lg_prof_interval,
 				    "lg_prof_interval", -1,
 				    (sizeof(uint64_t) << 3) - 1)
-				CONF_HANDLE_BOOL(opt_prof_gdump, "prof_gdump",
-				    true)
-				CONF_HANDLE_BOOL(opt_prof_final, "prof_final",
-				    true)
-				CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak",
-				    true)
+				CONF_HANDLE_BOOL(opt_prof_gdump, "prof_gdump")
+				CONF_HANDLE_BOOL(opt_prof_final, "prof_final")
+				CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak")
 			}
 			malloc_conf_error("Invalid conf pair", k, klen, v,
 			    vlen);
diff --git a/src/large.c b/src/large.c
index 36e8be91..4d515fbb 100644
--- a/src/large.c
+++ b/src/large.c
@@ -93,7 +93,7 @@ large_dalloc_maybe_junk(void *ptr, size_t size) {
 		 * Only bother junk filling if the extent isn't about to be
 		 * unmapped.
 		 */
-		if (!config_munmap || (have_dss && extent_in_dss(ptr))) {
+		if (!opt_munmap || (have_dss && extent_in_dss(ptr))) {
 			large_dalloc_junk(ptr, size);
 		}
 	}
diff --git a/src/stats.c b/src/stats.c
index 71c9a94d..ca9db89d 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -707,7 +707,6 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    "config.malloc_conf: \"%s\"\n", config_malloc_conf);
 	}
 
-	CONFIG_WRITE_BOOL_JSON(munmap, ",")
 	CONFIG_WRITE_BOOL_JSON(prof, ",")
 	CONFIG_WRITE_BOOL_JSON(prof_libgcc, ",")
 	CONFIG_WRITE_BOOL_JSON(prof_libunwind, ",")
@@ -801,6 +800,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    "Run-time option settings:\n");
 	}
 	OPT_WRITE_BOOL(abort, ",")
+	OPT_WRITE_BOOL(munmap, ",")
 	OPT_WRITE_CHAR_P(dss, ",")
 	OPT_WRITE_UNSIGNED(narenas, ",")
 	OPT_WRITE_CHAR_P(percpu_arena, ",")
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index 589c652c..0fa240b7 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -251,8 +251,8 @@ TEST_BEGIN(test_arena_destroy_hooks_default) {
 TEST_END
 
 /*
- * Actually unmap extents, regardless of config_munmap, so that attempts to
- * access a destroyed arena's memory will segfault.
+ * Actually unmap extents, regardless of opt_munmap, so that attempts to access
+ * a destroyed arena's memory will segfault.
  */
 static bool
 extent_dalloc_unmap(extent_hooks_t *extent_hooks, void *addr, size_t size,
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 8afd25ab..51a5244e 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -131,7 +131,6 @@ TEST_BEGIN(test_mallctl_config) {
 	TEST_MALLCTL_CONFIG(fill, bool);
 	TEST_MALLCTL_CONFIG(lazy_lock, bool);
 	TEST_MALLCTL_CONFIG(malloc_conf, const char *);
-	TEST_MALLCTL_CONFIG(munmap, bool);
 	TEST_MALLCTL_CONFIG(prof, bool);
 	TEST_MALLCTL_CONFIG(prof_libgcc, bool);
 	TEST_MALLCTL_CONFIG(prof_libunwind, bool);
@@ -158,6 +157,7 @@ TEST_BEGIN(test_mallctl_opt) {
 } while (0)
 
 	TEST_MALLCTL_OPT(bool, abort, always);
+	TEST_MALLCTL_OPT(bool, munmap, always);
 	TEST_MALLCTL_OPT(const char *, dss, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);
 	TEST_MALLCTL_OPT(const char *, percpu_arena, always);

From 89e2d3c12b573310e60b97beaf178007a71d83a3 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 24 Apr 2017 17:09:56 -0700
Subject: [PATCH 0839/2608] Header refactoring: ctl - unify and remove from
 catchall.

In order to do this, we introduce the mutex_prof module, which breaks a circular
dependency between ctl and prof.
---
 include/jemalloc/internal/ctl.h               | 130 ++++++++++++++++++
 include/jemalloc/internal/ctl_externs.h       |  48 -------
 include/jemalloc/internal/ctl_structs.h       |  85 ------------
 include/jemalloc/internal/ctl_types.h         |  57 --------
 .../internal/jemalloc_internal_includes.h     |   3 -
 include/jemalloc/internal/mutex_prof.h        |  84 +++++++++++
 include/jemalloc/internal/mutex_structs.h     |  34 +----
 include/jemalloc/internal/stats.h             |   2 +-
 src/ctl.c                                     |  19 +--
 src/jemalloc.c                                |   1 +
 src/stats.c                                   |  58 ++++----
 11 files changed, 257 insertions(+), 264 deletions(-)
 create mode 100644 include/jemalloc/internal/ctl.h
 delete mode 100644 include/jemalloc/internal/ctl_externs.h
 delete mode 100644 include/jemalloc/internal/ctl_structs.h
 delete mode 100644 include/jemalloc/internal/ctl_types.h
 create mode 100644 include/jemalloc/internal/mutex_prof.h

diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
new file mode 100644
index 00000000..de74a75d
--- /dev/null
+++ b/include/jemalloc/internal/ctl.h
@@ -0,0 +1,130 @@
+#ifndef JEMALLOC_INTERNAL_CTL_H
+#define JEMALLOC_INTERNAL_CTL_H
+
+#include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/malloc_io.h"
+#include "jemalloc/internal/mutex_prof.h"
+#include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/stats.h"
+
+/* Maximum ctl tree depth. */
+#define CTL_MAX_DEPTH	7
+
+typedef struct ctl_node_s {
+	bool named;
+} ctl_node_t;
+
+typedef struct ctl_named_node_s {
+	ctl_node_t node;
+	const char *name;
+	/* If (nchildren == 0), this is a terminal node. */
+	size_t nchildren;
+	const ctl_node_t *children;
+	int (*ctl)(tsd_t *, const size_t *, size_t, void *, size_t *, void *,
+	    size_t);
+} ctl_named_node_t;
+
+typedef struct ctl_indexed_node_s {
+	struct ctl_node_s node;
+	const ctl_named_node_t *(*index)(tsdn_t *, const size_t *, size_t,
+	    size_t);
+} ctl_indexed_node_t;
+
+typedef struct ctl_arena_stats_s {
+	arena_stats_t astats;
+
+	/* Aggregate stats for small size classes, based on bin stats. */
+	size_t allocated_small;
+	uint64_t nmalloc_small;
+	uint64_t ndalloc_small;
+	uint64_t nrequests_small;
+
+	malloc_bin_stats_t bstats[NBINS];
+	malloc_large_stats_t lstats[NSIZES - NBINS];
+} ctl_arena_stats_t;
+
+typedef struct ctl_stats_s {
+	size_t allocated;
+	size_t active;
+	size_t metadata;
+	size_t resident;
+	size_t mapped;
+	size_t retained;
+
+	mutex_prof_data_t mutex_prof_data[mutex_prof_num_global_mutexes];
+} ctl_stats_t;
+
+typedef struct ctl_arena_s ctl_arena_t;
+struct ctl_arena_s {
+	unsigned arena_ind;
+	bool initialized;
+	ql_elm(ctl_arena_t) destroyed_link;
+
+	/* Basic stats, supported even if !config_stats. */
+	unsigned nthreads;
+	const char *dss;
+	ssize_t dirty_decay_time;
+	ssize_t muzzy_decay_time;
+	size_t pactive;
+	size_t pdirty;
+	size_t pmuzzy;
+
+	/* NULL if !config_stats. */
+	ctl_arena_stats_t *astats;
+};
+
+typedef struct ctl_arenas_s {
+	uint64_t epoch;
+	unsigned narenas;
+	ql_head(ctl_arena_t) destroyed;
+
+	/*
+	 * Element 0 corresponds to merged stats for extant arenas (accessed via
+	 * MALLCTL_ARENAS_ALL), element 1 corresponds to merged stats for
+	 * destroyed arenas (accessed via MALLCTL_ARENAS_DESTROYED), and the
+	 * remaining MALLOCX_ARENA_MAX+1 elements correspond to arenas.
+	 */
+	ctl_arena_t *arenas[MALLOCX_ARENA_MAX + 3];
+} ctl_arenas_t;
+
+int ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
+    void *newp, size_t newlen);
+int ctl_nametomib(tsdn_t *tsdn, const char *name, size_t *mibp,
+    size_t *miblenp);
+
+int ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen);
+bool ctl_boot(void);
+void ctl_prefork(tsdn_t *tsdn);
+void ctl_postfork_parent(tsdn_t *tsdn);
+void ctl_postfork_child(tsdn_t *tsdn);
+
+#define xmallctl(name, oldp, oldlenp, newp, newlen) do {		\
+	if (je_mallctl(name, oldp, oldlenp, newp, newlen)		\
+	    != 0) {							\
+		malloc_printf(						\
+		    "<jemalloc>: Failure in xmallctl(\"%s\", ...)\n",	\
+		    name);						\
+		abort();						\
+	}								\
+} while (0)
+
+#define xmallctlnametomib(name, mibp, miblenp) do {			\
+	if (je_mallctlnametomib(name, mibp, miblenp) != 0) {		\
+		malloc_printf("<jemalloc>: Failure in "			\
+		    "xmallctlnametomib(\"%s\", ...)\n", name);		\
+		abort();						\
+	}								\
+} while (0)
+
+#define xmallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen) do {	\
+	if (je_mallctlbymib(mib, miblen, oldp, oldlenp, newp,		\
+	    newlen) != 0) {						\
+		malloc_write(						\
+		    "<jemalloc>: Failure in xmallctlbymib()\n");	\
+		abort();						\
+	}								\
+} while (0)
+
+#endif /* JEMALLOC_INTERNAL_CTL_H */
diff --git a/include/jemalloc/internal/ctl_externs.h b/include/jemalloc/internal/ctl_externs.h
deleted file mode 100644
index 875a8101..00000000
--- a/include/jemalloc/internal/ctl_externs.h
+++ /dev/null
@@ -1,48 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_CTL_EXTERNS_H
-#define JEMALLOC_INTERNAL_CTL_EXTERNS_H
-
-#include "jemalloc/internal/malloc_io.h"
-
-/* Maximum ctl tree depth. */
-#define CTL_MAX_DEPTH	7
-
-int	ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen);
-int	ctl_nametomib(tsdn_t *tsdn, const char *name, size_t *mibp,
-    size_t *miblenp);
-
-int	ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen);
-bool	ctl_boot(void);
-void	ctl_prefork(tsdn_t *tsdn);
-void	ctl_postfork_parent(tsdn_t *tsdn);
-void	ctl_postfork_child(tsdn_t *tsdn);
-
-#define xmallctl(name, oldp, oldlenp, newp, newlen) do {		\
-	if (je_mallctl(name, oldp, oldlenp, newp, newlen)		\
-	    != 0) {							\
-		malloc_printf(						\
-		    "<jemalloc>: Failure in xmallctl(\"%s\", ...)\n",	\
-		    name);						\
-		abort();						\
-	}								\
-} while (0)
-
-#define xmallctlnametomib(name, mibp, miblenp) do {			\
-	if (je_mallctlnametomib(name, mibp, miblenp) != 0) {		\
-		malloc_printf("<jemalloc>: Failure in "			\
-		    "xmallctlnametomib(\"%s\", ...)\n", name);		\
-		abort();						\
-	}								\
-} while (0)
-
-#define xmallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen) do {	\
-	if (je_mallctlbymib(mib, miblen, oldp, oldlenp, newp,		\
-	    newlen) != 0) {						\
-		malloc_write(						\
-		    "<jemalloc>: Failure in xmallctlbymib()\n");	\
-		abort();						\
-	}								\
-} while (0)
-
-#endif /* JEMALLOC_INTERNAL_CTL_EXTERNS_H */
diff --git a/include/jemalloc/internal/ctl_structs.h b/include/jemalloc/internal/ctl_structs.h
deleted file mode 100644
index c64820d2..00000000
--- a/include/jemalloc/internal/ctl_structs.h
+++ /dev/null
@@ -1,85 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_CTL_STRUCTS_H
-#define JEMALLOC_INTERNAL_CTL_STRUCTS_H
-
-#include "jemalloc/internal/jemalloc_internal_types.h"
-#include "jemalloc/internal/ql.h"
-#include "jemalloc/internal/size_classes.h"
-#include "jemalloc/internal/stats.h"
-
-struct ctl_node_s {
-	bool			named;
-};
-
-struct ctl_named_node_s {
-	struct ctl_node_s	node;
-	const char		*name;
-	/* If (nchildren == 0), this is a terminal node. */
-	size_t			nchildren;
-	const			ctl_node_t *children;
-	int			(*ctl)(tsd_t *, const size_t *, size_t, void *,
-	    size_t *, void *, size_t);
-};
-
-struct ctl_indexed_node_s {
-	struct ctl_node_s	node;
-	const ctl_named_node_t	*(*index)(tsdn_t *, const size_t *, size_t,
-	    size_t);
-};
-
-struct ctl_arena_stats_s {
-	arena_stats_t		astats;
-
-	/* Aggregate stats for small size classes, based on bin stats. */
-	size_t			allocated_small;
-	uint64_t		nmalloc_small;
-	uint64_t		ndalloc_small;
-	uint64_t		nrequests_small;
-
-	malloc_bin_stats_t	bstats[NBINS];
-	malloc_large_stats_t	lstats[NSIZES - NBINS];
-};
-
-struct ctl_stats_s {
-	size_t			allocated;
-	size_t			active;
-	size_t			metadata;
-	size_t			resident;
-	size_t			mapped;
-	size_t			retained;
-
-	mutex_prof_data_t	mutex_prof_data[num_global_prof_mutexes];
-};
-
-struct ctl_arena_s {
-	unsigned		arena_ind;
-	bool			initialized;
-	ql_elm(ctl_arena_t)	destroyed_link;
-
-	/* Basic stats, supported even if !config_stats. */
-	unsigned		nthreads;
-	const char		*dss;
-	ssize_t			dirty_decay_time;
-	ssize_t			muzzy_decay_time;
-	size_t			pactive;
-	size_t			pdirty;
-	size_t			pmuzzy;
-
-	/* NULL if !config_stats. */
-	ctl_arena_stats_t	*astats;
-};
-
-struct ctl_arenas_s {
-	uint64_t		epoch;
-	unsigned		narenas;
-	ql_head(ctl_arena_t)	destroyed;
-
-	/*
-	 * Element 0 corresponds to merged stats for extant arenas (accessed via
-	 * MALLCTL_ARENAS_ALL), element 1 corresponds to merged stats for
-	 * destroyed arenas (accessed via MALLCTL_ARENAS_DESTROYED), and the
-	 * remaining MALLOCX_ARENA_MAX+1 elements correspond to arenas.
-	 */
-	ctl_arena_t		*arenas[MALLOCX_ARENA_MAX + 3];
-};
-
-#endif /* JEMALLOC_INTERNAL_CTL_STRUCTS_H */
diff --git a/include/jemalloc/internal/ctl_types.h b/include/jemalloc/internal/ctl_types.h
deleted file mode 100644
index e7986092..00000000
--- a/include/jemalloc/internal/ctl_types.h
+++ /dev/null
@@ -1,57 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_CTL_TYPES_H
-#define JEMALLOC_INTERNAL_CTL_TYPES_H
-
-#define GLOBAL_PROF_MUTEXES						\
-    OP(ctl)								\
-    OP(prof)
-
-typedef enum {
-#define OP(mtx) global_prof_mutex_##mtx,
-	GLOBAL_PROF_MUTEXES
-#undef OP
-	num_global_prof_mutexes
-} global_prof_mutex_ind_t;
-
-#define ARENA_PROF_MUTEXES						\
-    OP(large)								\
-    OP(extent_avail)							\
-    OP(extents_dirty)							\
-    OP(extents_muzzy)							\
-    OP(extents_retained)						\
-    OP(decay_dirty)							\
-    OP(decay_muzzy)							\
-    OP(base)								\
-    OP(tcache_list)
-
-typedef enum {
-#define OP(mtx) arena_prof_mutex_##mtx,
-	ARENA_PROF_MUTEXES
-#undef OP
-	num_arena_prof_mutexes
-} arena_prof_mutex_ind_t;
-
-#define MUTEX_PROF_COUNTERS						\
-    OP(num_ops, uint64_t)						\
-    OP(num_wait, uint64_t)						\
-    OP(num_spin_acq, uint64_t)						\
-    OP(num_owner_switch, uint64_t)					\
-    OP(total_wait_time, uint64_t)					\
-    OP(max_wait_time, uint64_t)						\
-    OP(max_num_thds, uint32_t)
-
-typedef enum {
-#define OP(counter, type) mutex_counter_##counter,
-	MUTEX_PROF_COUNTERS
-#undef OP
-	num_mutex_prof_counters
-} mutex_prof_counter_ind_t;
-
-typedef struct ctl_node_s ctl_node_t;
-typedef struct ctl_named_node_s ctl_named_node_t;
-typedef struct ctl_indexed_node_s ctl_indexed_node_t;
-typedef struct ctl_arena_stats_s ctl_arena_stats_t;
-typedef struct ctl_stats_s ctl_stats_t;
-typedef struct ctl_arena_s ctl_arena_t;
-typedef struct ctl_arenas_s ctl_arenas_t;
-
-#endif /* JEMALLOC_INTERNAL_CTL_TYPES_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index f98a1b27..9000841e 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -40,7 +40,6 @@
 /* TYPES */
 /******************************************************************************/
 
-#include "jemalloc/internal/ctl_types.h"
 #include "jemalloc/internal/witness_types.h"
 #include "jemalloc/internal/mutex_types.h"
 #include "jemalloc/internal/tsd_types.h"
@@ -59,7 +58,6 @@
 
 #include "jemalloc/internal/witness_structs.h"
 #include "jemalloc/internal/mutex_structs.h"
-#include "jemalloc/internal/ctl_structs.h"
 #include "jemalloc/internal/arena_structs_a.h"
 #include "jemalloc/internal/extent_structs.h"
 #include "jemalloc/internal/extent_dss_structs.h"
@@ -75,7 +73,6 @@
 /******************************************************************************/
 
 #include "jemalloc/internal/jemalloc_internal_externs.h"
-#include "jemalloc/internal/ctl_externs.h"
 #include "jemalloc/internal/witness_externs.h"
 #include "jemalloc/internal/mutex_externs.h"
 #include "jemalloc/internal/extent_externs.h"
diff --git a/include/jemalloc/internal/mutex_prof.h b/include/jemalloc/internal/mutex_prof.h
new file mode 100644
index 00000000..50c0af0a
--- /dev/null
+++ b/include/jemalloc/internal/mutex_prof.h
@@ -0,0 +1,84 @@
+#ifndef JEMALLOC_INTERNAL_MUTEX_PROF_H
+#define JEMALLOC_INTERNAL_MUTEX_PROF_H
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/nstime.h"
+
+#define MUTEX_PROF_GLOBAL_MUTEXES					\
+    OP(ctl)								\
+    OP(prof)
+
+typedef enum {
+#define OP(mtx) global_prof_mutex_##mtx,
+	MUTEX_PROF_GLOBAL_MUTEXES
+#undef OP
+	mutex_prof_num_global_mutexes
+} mutex_prof_global_ind_t;
+
+#define MUTEX_PROF_ARENA_MUTEXES					\
+    OP(large)								\
+    OP(extent_avail)							\
+    OP(extents_dirty)							\
+    OP(extents_muzzy)							\
+    OP(extents_retained)						\
+    OP(decay_dirty)							\
+    OP(decay_muzzy)							\
+    OP(base)								\
+    OP(tcache_list)
+
+typedef enum {
+#define OP(mtx) arena_prof_mutex_##mtx,
+	MUTEX_PROF_ARENA_MUTEXES
+#undef OP
+	mutex_prof_num_arena_mutexes
+} mutex_prof_arena_ind_t;
+
+#define MUTEX_PROF_COUNTERS						\
+    OP(num_ops, uint64_t)						\
+    OP(num_wait, uint64_t)						\
+    OP(num_spin_acq, uint64_t)						\
+    OP(num_owner_switch, uint64_t)					\
+    OP(total_wait_time, uint64_t)					\
+    OP(max_wait_time, uint64_t)						\
+    OP(max_num_thds, uint32_t)
+
+typedef enum {
+#define OP(counter, type) mutex_counter_##counter,
+	MUTEX_PROF_COUNTERS
+#undef OP
+	mutex_prof_num_counters
+} mutex_prof_counter_ind_t;
+
+typedef struct mutex_prof_data_s {
+	/*
+	 * Counters touched on the slow path, i.e. when there is lock
+	 * contention.  We update them once we have the lock.
+	 */
+	/* Total time (in nano seconds) spent waiting on this mutex. */
+	nstime_t		tot_wait_time;
+	/* Max time (in nano seconds) spent on a single lock operation. */
+	nstime_t		max_wait_time;
+	/* # of times have to wait for this mutex (after spinning). */
+	uint64_t		n_wait_times;
+	/* # of times acquired the mutex through local spinning. */
+	uint64_t		n_spin_acquired;
+	/* Max # of threads waiting for the mutex at the same time. */
+	uint32_t		max_n_thds;
+	/* Current # of threads waiting on the lock.  Atomic synced. */
+	atomic_u32_t		n_waiting_thds;
+
+	/*
+	 * Data touched on the fast path.  These are modified right after we
+	 * grab the lock, so it's placed closest to the end (i.e. right before
+	 * the lock) so that we have a higher chance of them being on the same
+	 * cacheline.
+	 */
+	/* # of times the mutex holder is different than the previous one. */
+	uint64_t		n_owner_switches;
+	/* Previous mutex holder, to facilitate n_owner_switches. */
+	tsdn_t			*prev_owner;
+	/* # of lock() operations in total. */
+	uint64_t		n_lock_ops;
+} mutex_prof_data_t;
+
+#endif /* JEMALLOC_INTERNAL_MUTEX_PROF_H */
diff --git a/include/jemalloc/internal/mutex_structs.h b/include/jemalloc/internal/mutex_structs.h
index dc755547..2691852d 100644
--- a/include/jemalloc/internal/mutex_structs.h
+++ b/include/jemalloc/internal/mutex_structs.h
@@ -2,39 +2,7 @@
 #define JEMALLOC_INTERNAL_MUTEX_STRUCTS_H
 
 #include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/nstime.h"
-
-struct mutex_prof_data_s {
-	/*
-	 * Counters touched on the slow path, i.e. when there is lock
-	 * contention.  We update them once we have the lock.
-	 */
-	/* Total time (in nano seconds) spent waiting on this mutex. */
-	nstime_t		tot_wait_time;
-	/* Max time (in nano seconds) spent on a single lock operation. */
-	nstime_t		max_wait_time;
-	/* # of times have to wait for this mutex (after spinning). */
-	uint64_t		n_wait_times;
-	/* # of times acquired the mutex through local spinning. */
-	uint64_t		n_spin_acquired;
-	/* Max # of threads waiting for the mutex at the same time. */
-	uint32_t		max_n_thds;
-	/* Current # of threads waiting on the lock.  Atomic synced. */
-	atomic_u32_t		n_waiting_thds;
-
-	/*
-	 * Data touched on the fast path.  These are modified right after we
-	 * grab the lock, so it's placed closest to the end (i.e. right before
-	 * the lock) so that we have a higher chance of them being on the same
-	 * cacheline.
-	 */
-	/* # of times the mutex holder is different than the previous one. */
-	uint64_t		n_owner_switches;
-	/* Previous mutex holder, to facilitate n_owner_switches. */
-	tsdn_t			*prev_owner;
-	/* # of lock() operations in total. */
-	uint64_t		n_lock_ops;
-};
+#include "jemalloc/internal/mutex_prof.h"
 
 struct malloc_mutex_s {
 	union {
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index 9414200f..301a50ab 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -139,7 +139,7 @@ typedef struct arena_stats_s {
 	/* Number of bytes cached in tcache associated with this arena. */
 	atomic_zu_t		tcache_bytes; /* Derived. */
 
-	mutex_prof_data_t mutex_prof_data[num_arena_prof_mutexes];
+	mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes];
 
 	/* One element for each large size class. */
 	malloc_large_stats_t	lstats[NSIZES - NBINS];
diff --git a/src/ctl.c b/src/ctl.c
index 8c2e7bc2..3591f891 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3,6 +3,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/util.h"
@@ -193,12 +194,12 @@ CTL_PROTO(stats_##n##_max_num_thds)
 
 /* Global mutexes. */
 #define OP(mtx) MUTEX_STATS_CTL_PROTO_GEN(mutexes_##mtx)
-GLOBAL_PROF_MUTEXES
+MUTEX_PROF_GLOBAL_MUTEXES
 #undef OP
 
 /* Per arena mutexes. */
 #define OP(mtx) MUTEX_STATS_CTL_PROTO_GEN(arenas_i_mutexes_##mtx)
-ARENA_PROF_MUTEXES
+MUTEX_PROF_ARENA_MUTEXES
 #undef OP
 
 /* Arena bin mutexes. */
@@ -429,12 +430,12 @@ static const ctl_indexed_node_t stats_arenas_i_lextents_node[] = {
 };
 
 #define OP(mtx)  MUTEX_PROF_DATA_NODE(arenas_i_mutexes_##mtx)
-ARENA_PROF_MUTEXES
+MUTEX_PROF_ARENA_MUTEXES
 #undef OP
 
 static const ctl_named_node_t stats_arenas_i_mutexes_node[] = {
 #define OP(mtx) {NAME(#mtx), CHILD(named, stats_arenas_i_mutexes_##mtx)},
-ARENA_PROF_MUTEXES
+MUTEX_PROF_ARENA_MUTEXES
 #undef OP
 };
 
@@ -473,12 +474,12 @@ static const ctl_indexed_node_t stats_arenas_node[] = {
 };
 
 #define OP(mtx) MUTEX_PROF_DATA_NODE(mutexes_##mtx)
-GLOBAL_PROF_MUTEXES
+MUTEX_PROF_GLOBAL_MUTEXES
 #undef OP
 
 static const ctl_named_node_t stats_mutexes_node[] = {
 #define OP(mtx) {NAME(#mtx), CHILD(named, stats_mutexes_##mtx)},
-GLOBAL_PROF_MUTEXES
+MUTEX_PROF_GLOBAL_MUTEXES
 #undef OP
 	{NAME("reset"),		CTL(stats_mutexes_reset)}
 };
@@ -737,7 +738,7 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 		        arena_prof_mutex_##mtx]),			\
 		    &(astats->astats.mutex_prof_data[			\
 		        arena_prof_mutex_##mtx]));
-ARENA_PROF_MUTEXES
+MUTEX_PROF_ARENA_MUTEXES
 #undef OP
 		if (!destroyed) {
 			accum_atomic_zu(&sdstats->astats.base,
@@ -2401,13 +2402,13 @@ CTL_RO_CGEN(config_stats, stats_##n##_max_num_thds,			\
 #define OP(mtx)								\
     RO_MUTEX_CTL_GEN(mutexes_##mtx,					\
         ctl_stats->mutex_prof_data[global_prof_mutex_##mtx])
-GLOBAL_PROF_MUTEXES
+MUTEX_PROF_GLOBAL_MUTEXES
 #undef OP
 
 /* Per arena mutexes */
 #define OP(mtx) RO_MUTEX_CTL_GEN(arenas_i_mutexes_##mtx,		\
     arenas_i(mib[2])->astats->astats.mutex_prof_data[arena_prof_mutex_##mtx])
-ARENA_PROF_MUTEXES
+MUTEX_PROF_ARENA_MUTEXES
 #undef OP
 
 /* tcache bin mutex */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 5e1f0a72..42146004 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -4,6 +4,7 @@
 
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/size_classes.h"
diff --git a/src/stats.c b/src/stats.c
index ca9db89d..5d515186 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -3,16 +3,18 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/mutex_prof.h"
 
-const char *global_mutex_names[num_global_prof_mutexes] = {
+const char *global_mutex_names[mutex_prof_num_global_mutexes] = {
 #define OP(mtx) #mtx,
-	GLOBAL_PROF_MUTEXES
+	MUTEX_PROF_GLOBAL_MUTEXES
 #undef OP
 };
 
-const char *arena_mutex_names[num_arena_prof_mutexes] = {
+const char *arena_mutex_names[mutex_prof_num_arena_mutexes] = {
 #define OP(mtx) #mtx,
-	ARENA_PROF_MUTEXES
+	MUTEX_PROF_ARENA_MUTEXES
 #undef OP
 };
 
@@ -81,7 +83,7 @@ gen_mutex_ctl_str(char *str, size_t buf_len, const char *prefix,
 
 static void
 read_arena_bin_mutex_stats(unsigned arena_ind, unsigned bin_ind,
-    uint64_t results[num_mutex_prof_counters]) {
+    uint64_t results[mutex_prof_num_counters]) {
 	char cmd[MUTEX_CTL_STR_MAX_LENGTH];
 #define OP(c, t)							\
     gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,			\
@@ -94,7 +96,7 @@ MUTEX_PROF_COUNTERS
 
 static void
 mutex_stats_output_json(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *name, uint64_t stats[num_mutex_prof_counters],
+    const char *name, uint64_t stats[mutex_prof_num_counters],
     const char *json_indent, bool last) {
 	malloc_cprintf(write_cb, cbopaque, "%s\"%s\": {\n", json_indent, name);
 
@@ -105,7 +107,7 @@ mutex_stats_output_json(void (*write_cb)(void *, const char *), void *cbopaque,
 	malloc_cprintf(write_cb, cbopaque,				\
 	    fmt_str[sizeof(t) / sizeof(uint32_t) - 1], 			\
 	    json_indent, #c, (t)stats[mutex_counter_##c],		\
-	    (++k == num_mutex_prof_counters) ? "" : ",");
+	    (++k == mutex_prof_num_counters) ? "" : ",");
 MUTEX_PROF_COUNTERS
 #undef OP
 	malloc_cprintf(write_cb, cbopaque, "%s}%s\n", json_indent,
@@ -187,7 +189,7 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			    nmalloc, ndalloc, curregs, nrequests, nfills,
 			    nflushes, nreslabs, curslabs, mutex ? "," : "");
 			if (mutex) {
-				uint64_t mutex_stats[num_mutex_prof_counters];
+				uint64_t mutex_stats[mutex_prof_num_counters];
 				read_arena_bin_mutex_stats(i, j, mutex_stats);
 				mutex_stats_output_json(write_cb, cbopaque,
 				    "mutex", mutex_stats, "\t\t\t\t\t\t", true);
@@ -226,7 +228,7 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			    &max_wait, uint64_t);
 			CTL_M2_M4_GET("stats.arenas.0.bins.0.mutex.num_ops",
 			    i, j, &num_ops, uint64_t);
-			uint64_t mutex_stats[num_mutex_prof_counters];
+			uint64_t mutex_stats[mutex_prof_num_counters];
 			if (mutex) {
 				read_arena_bin_mutex_stats(i, j, mutex_stats);
 			}
@@ -336,11 +338,11 @@ stats_arena_lextents_print(void (*write_cb)(void *, const char *),
 
 static void
 read_arena_mutex_stats(unsigned arena_ind,
-    uint64_t results[num_arena_prof_mutexes][num_mutex_prof_counters]) {
+    uint64_t results[mutex_prof_num_arena_mutexes][mutex_prof_num_counters]) {
 	char cmd[MUTEX_CTL_STR_MAX_LENGTH];
 
-	arena_prof_mutex_ind_t i;
-	for (i = 0; i < num_arena_prof_mutexes; i++) {
+	mutex_prof_arena_ind_t i;
+	for (i = 0; i < mutex_prof_num_arena_mutexes; i++) {
 #define OP(c, t)							\
 		gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,	\
 		    "arenas.0.mutexes",	arena_mutex_names[i], #c);	\
@@ -353,7 +355,7 @@ MUTEX_PROF_COUNTERS
 
 static void
 mutex_stats_output(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *name, uint64_t stats[num_mutex_prof_counters],
+    const char *name, uint64_t stats[mutex_prof_num_counters],
     bool first_mutex) {
 	if (first_mutex) {
 		/* Print title. */
@@ -380,15 +382,15 @@ MUTEX_PROF_COUNTERS
 static void
 stats_arena_mutexes_print(void (*write_cb)(void *, const char *),
     void *cbopaque, bool json, bool json_end, unsigned arena_ind) {
-	uint64_t mutex_stats[num_arena_prof_mutexes][num_mutex_prof_counters];
+	uint64_t mutex_stats[mutex_prof_num_arena_mutexes][mutex_prof_num_counters];
 	read_arena_mutex_stats(arena_ind, mutex_stats);
 
 	/* Output mutex stats. */
 	if (json) {
 		malloc_cprintf(write_cb, cbopaque, "\t\t\t\t\"mutexes\": {\n");
-		arena_prof_mutex_ind_t i, last_mutex;
-		last_mutex = num_arena_prof_mutexes - 1;
-		for (i = 0; i < num_arena_prof_mutexes; i++) {
+		mutex_prof_arena_ind_t i, last_mutex;
+		last_mutex = mutex_prof_num_arena_mutexes - 1;
+		for (i = 0; i < mutex_prof_num_arena_mutexes; i++) {
 			mutex_stats_output_json(write_cb, cbopaque,
 			    arena_mutex_names[i], mutex_stats[i],
 			    "\t\t\t\t\t", (i == last_mutex));
@@ -396,8 +398,8 @@ stats_arena_mutexes_print(void (*write_cb)(void *, const char *),
 		malloc_cprintf(write_cb, cbopaque, "\t\t\t\t}%s\n",
 		    json_end ? "" : ",");
 	} else {
-		arena_prof_mutex_ind_t i;
-		for (i = 0; i < num_arena_prof_mutexes; i++) {
+		mutex_prof_arena_ind_t i;
+		for (i = 0; i < mutex_prof_num_arena_mutexes; i++) {
 			mutex_stats_output(write_cb, cbopaque,
 			    arena_mutex_names[i], mutex_stats[i], i == 0);
 		}
@@ -993,11 +995,11 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 
 static void
 read_global_mutex_stats(
-    uint64_t results[num_global_prof_mutexes][num_mutex_prof_counters]) {
+    uint64_t results[mutex_prof_num_global_mutexes][mutex_prof_num_counters]) {
 	char cmd[MUTEX_CTL_STR_MAX_LENGTH];
 
-	global_prof_mutex_ind_t i;
-	for (i = 0; i < num_global_prof_mutexes; i++) {
+	mutex_prof_global_ind_t i;
+	for (i = 0; i < mutex_prof_num_global_mutexes; i++) {
 #define OP(c, t)							\
 		gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,	\
 		    "mutexes", global_mutex_names[i], #c);		\
@@ -1020,7 +1022,7 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 	CTL_GET("stats.mapped", &mapped, size_t);
 	CTL_GET("stats.retained", &retained, size_t);
 
-	uint64_t mutex_stats[num_global_prof_mutexes][num_mutex_prof_counters];
+	uint64_t mutex_stats[mutex_prof_num_global_mutexes][mutex_prof_num_counters];
 	if (mutex) {
 		read_global_mutex_stats(mutex_stats);
 	}
@@ -1044,12 +1046,12 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 		if (mutex) {
 			malloc_cprintf(write_cb, cbopaque,
 			    "\t\t\t\"mutexes\": {\n");
-			global_prof_mutex_ind_t i;
-			for (i = 0; i < num_global_prof_mutexes; i++) {
+			mutex_prof_global_ind_t i;
+			for (i = 0; i < mutex_prof_num_global_mutexes; i++) {
 				mutex_stats_output_json(write_cb, cbopaque,
 				    global_mutex_names[i], mutex_stats[i],
 				    "\t\t\t\t",
-				    i == num_global_prof_mutexes - 1);
+				    i == mutex_prof_num_global_mutexes - 1);
 			}
 			malloc_cprintf(write_cb, cbopaque, "\t\t\t}\n");
 		}
@@ -1061,8 +1063,8 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 		    " resident: %zu, mapped: %zu, retained: %zu\n",
 		    allocated, active, metadata, resident, mapped, retained);
 		if (mutex) {
-			global_prof_mutex_ind_t i;
-			for (i = 0; i < num_global_prof_mutexes; i++) {
+			mutex_prof_global_ind_t i;
+			for (i = 0; i < mutex_prof_num_global_mutexes; i++) {
 				mutex_stats_output(write_cb, cbopaque,
 				    global_mutex_names[i], mutex_stats[i],
 				    i == 0);

From dab4beb277f5fd82dd0f66324bb9a2c7458afe1c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 24 Apr 2017 17:16:36 -0700
Subject: [PATCH 0840/2608] Header refactoring: hash - unify and remove from
 catchall.

---
 include/jemalloc/internal/{hash_inlines.h => hash.h}   | 6 +++---
 include/jemalloc/internal/jemalloc_internal_includes.h | 1 -
 src/ckh.c                                              | 1 +
 src/prof.c                                             | 1 +
 test/unit/hash.c                                       | 1 +
 5 files changed, 6 insertions(+), 4 deletions(-)
 rename include/jemalloc/internal/{hash_inlines.h => hash.h} (98%)

diff --git a/include/jemalloc/internal/hash_inlines.h b/include/jemalloc/internal/hash.h
similarity index 98%
rename from include/jemalloc/internal/hash_inlines.h
rename to include/jemalloc/internal/hash.h
index 2cd7e3ee..188296cf 100644
--- a/include/jemalloc/internal/hash_inlines.h
+++ b/include/jemalloc/internal/hash.h
@@ -1,5 +1,5 @@
-#ifndef JEMALLOC_INTERNAL_HASH_INLINES_H
-#define JEMALLOC_INTERNAL_HASH_INLINES_H
+#ifndef JEMALLOC_INTERNAL_HASH_H
+#define JEMALLOC_INTERNAL_HASH_H
 
 #include "jemalloc/internal/assert.h"
 
@@ -315,4 +315,4 @@ hash(const void *key, size_t len, const uint32_t seed, size_t r_hash[2]) {
 #endif
 }
 
-#endif /* JEMALLOC_INTERNAL_HASH_INLINES_H */
+#endif /* JEMALLOC_INTERNAL_HASH_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index 9000841e..fb4105f0 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -107,7 +107,6 @@
 #include "jemalloc/internal/jemalloc_internal_inlines_b.h"
 #include "jemalloc/internal/tcache_inlines.h"
 #include "jemalloc/internal/arena_inlines_b.h"
-#include "jemalloc/internal/hash_inlines.h"
 #include "jemalloc/internal/jemalloc_internal_inlines_c.h"
 #include "jemalloc/internal/prof_inlines_b.h"
 
diff --git a/src/ckh.c b/src/ckh.c
index 8f0bac07..013b6249 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -42,6 +42,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/hash.h"
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/util.h"
diff --git a/src/prof.c b/src/prof.c
index a8f6aed2..a872afb1 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -4,6 +4,7 @@
 
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/ckh.h"
+#include "jemalloc/internal/hash.h"
 #include "jemalloc/internal/malloc_io.h"
 
 /******************************************************************************/
diff --git a/test/unit/hash.c b/test/unit/hash.c
index 48507515..7cc034f8 100644
--- a/test/unit/hash.c
+++ b/test/unit/hash.c
@@ -28,6 +28,7 @@
  */
 
 #include "test/jemalloc_test.h"
+#include "jemalloc/internal/hash.h"
 
 typedef enum {
 	hash_variant_x86_32,

From 268843ac680f688582044621434221bedf78719b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 24 Apr 2017 18:05:15 -0700
Subject: [PATCH 0841/2608] Header refactoring: pages.h - unify and remove from
 catchall.

---
 include/jemalloc/internal/arena_externs.h     |  1 +
 include/jemalloc/internal/extent_inlines.h    |  1 +
 .../internal/jemalloc_internal_includes.h     |  2 --
 .../internal/{pages_types.h => pages.h}       | 31 +++++++++++++++++--
 include/jemalloc/internal/pages_externs.h     | 29 -----------------
 src/pages.c                                   |  3 ++
 6 files changed, 33 insertions(+), 34 deletions(-)
 rename include/jemalloc/internal/{pages_types.h => pages.h} (68%)
 delete mode 100644 include/jemalloc/internal/pages_externs.h

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 1e13efd3..7d56e44b 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_EXTERNS_H
 #define JEMALLOC_INTERNAL_ARENA_EXTERNS_H
 
+#include "jemalloc/internal/pages.h"
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/stats.h"
 
diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index a73b6530..e1f8bd9e 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_EXTENT_INLINES_H
 #define JEMALLOC_INTERNAL_EXTENT_INLINES_H
 
+#include "jemalloc/internal/pages.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/ql.h"
 
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index fb4105f0..340cb1ce 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -48,7 +48,6 @@
 #include "jemalloc/internal/base_types.h"
 #include "jemalloc/internal/arena_types.h"
 #include "jemalloc/internal/rtree_types.h"
-#include "jemalloc/internal/pages_types.h"
 #include "jemalloc/internal/tcache_types.h"
 #include "jemalloc/internal/prof_types.h"
 
@@ -81,7 +80,6 @@
 #include "jemalloc/internal/base_externs.h"
 #include "jemalloc/internal/arena_externs.h"
 #include "jemalloc/internal/rtree_externs.h"
-#include "jemalloc/internal/pages_externs.h"
 #include "jemalloc/internal/large_externs.h"
 #include "jemalloc/internal/tcache_externs.h"
 #include "jemalloc/internal/prof_externs.h"
diff --git a/include/jemalloc/internal/pages_types.h b/include/jemalloc/internal/pages.h
similarity index 68%
rename from include/jemalloc/internal/pages_types.h
rename to include/jemalloc/internal/pages.h
index e44ee2a4..28383b7f 100644
--- a/include/jemalloc/internal/pages_types.h
+++ b/include/jemalloc/internal/pages.h
@@ -1,5 +1,5 @@
-#ifndef JEMALLOC_INTERNAL_PAGES_TYPES_H
-#define JEMALLOC_INTERNAL_PAGES_TYPES_H
+#ifndef JEMALLOC_INTERNAL_PAGES_EXTERNS_H
+#define JEMALLOC_INTERNAL_PAGES_EXTERNS_H
 
 /* Page size.  LG_PAGE is determined by the configure script. */
 #ifdef PAGE_MASK
@@ -43,4 +43,29 @@
 #  define PAGES_CAN_PURGE_FORCED
 #endif
 
-#endif /* JEMALLOC_INTERNAL_PAGES_TYPES_H */
+static const bool pages_can_purge_lazy =
+#ifdef PAGES_CAN_PURGE_LAZY
+    true
+#else
+    false
+#endif
+    ;
+static const bool pages_can_purge_forced =
+#ifdef PAGES_CAN_PURGE_FORCED
+    true
+#else
+    false
+#endif
+    ;
+
+void *pages_map(void *addr, size_t size, size_t alignment, bool *commit);
+void pages_unmap(void *addr, size_t size);
+bool pages_commit(void *addr, size_t size);
+bool pages_decommit(void *addr, size_t size);
+bool pages_purge_lazy(void *addr, size_t size);
+bool pages_purge_forced(void *addr, size_t size);
+bool pages_huge(void *addr, size_t size);
+bool pages_nohuge(void *addr, size_t size);
+bool pages_boot(void);
+
+#endif /* JEMALLOC_INTERNAL_PAGES_EXTERNS_H */
diff --git a/include/jemalloc/internal/pages_externs.h b/include/jemalloc/internal/pages_externs.h
deleted file mode 100644
index af9a01b8..00000000
--- a/include/jemalloc/internal/pages_externs.h
+++ /dev/null
@@ -1,29 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_PAGES_EXTERNS_H
-#define JEMALLOC_INTERNAL_PAGES_EXTERNS_H
-
-static const bool pages_can_purge_lazy =
-#ifdef PAGES_CAN_PURGE_LAZY
-    true
-#else
-    false
-#endif
-    ;
-static const bool pages_can_purge_forced =
-#ifdef PAGES_CAN_PURGE_FORCED
-    true
-#else
-    false
-#endif
-    ;
-
-void	*pages_map(void *addr, size_t size, size_t alignment, bool *commit);
-void	pages_unmap(void *addr, size_t size);
-bool	pages_commit(void *addr, size_t size);
-bool	pages_decommit(void *addr, size_t size);
-bool	pages_purge_lazy(void *addr, size_t size);
-bool	pages_purge_forced(void *addr, size_t size);
-bool	pages_huge(void *addr, size_t size);
-bool	pages_nohuge(void *addr, size_t size);
-bool	pages_boot(void);
-
-#endif /* JEMALLOC_INTERNAL_PAGES_EXTERNS_H */
diff --git a/src/pages.c b/src/pages.c
index 86907aa5..3a048e3b 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -1,5 +1,8 @@
 #define JEMALLOC_PAGES_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
+
+#include "jemalloc/internal/pages.h"
+
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"

From 05775a37360c7f1d41dc57b73ed5c0f259024d9f Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 24 Apr 2017 18:14:57 -0700
Subject: [PATCH 0842/2608] Avoid prof_dump during reentrancy.

---
 include/jemalloc/internal/prof_inlines_b.h |  3 ++
 src/prof.c                                 | 32 ++++++++++++++--------
 2 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index eba981b9..8cdea615 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -96,6 +96,9 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
 		}
 		return true;
 	} else {
+		if (tsd->reentrancy_level > 0) {
+			return true;
+		}
 		/* Compute new sample threshold. */
 		if (update) {
 			prof_sample_threshold_update(tdata);
diff --git a/src/prof.c b/src/prof.c
index a872afb1..1e818ab4 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -1641,30 +1641,30 @@ label_write_error:
 static bool
 prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
     bool leakcheck) {
-	prof_tdata_t *tdata;
-	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
-	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
-	struct prof_gctx_dump_iter_arg_s prof_gctx_dump_iter_arg;
-	prof_gctx_tree_t gctxs;
-	bool err;
-
 	cassert(config_prof);
+	assert(tsd->reentrancy_level == 0);
 
-	tdata = prof_tdata_get(tsd, true);
+	prof_tdata_t * tdata = prof_tdata_get(tsd, true);
 	if (tdata == NULL) {
 		return true;
 	}
 
+	pre_reentrancy(tsd);
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
 
+	prof_gctx_tree_t gctxs;
+	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
+	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
+	struct prof_gctx_dump_iter_arg_s prof_gctx_dump_iter_arg;
 	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
 	    &prof_gctx_merge_iter_arg, &gctxs);
-	err = prof_dump_file(tsd, propagate_err, filename, leakcheck, tdata,
+	bool err = prof_dump_file(tsd, propagate_err, filename, leakcheck, tdata,
 	    &prof_tdata_merge_iter_arg, &prof_gctx_merge_iter_arg,
 	    &prof_gctx_dump_iter_arg, &gctxs);
 	prof_gctx_finish(tsd, &gctxs);
 
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
+	post_reentrancy(tsd);
 
 	if (err) {
 		return true;
@@ -1757,6 +1757,7 @@ prof_fdump(void) {
 		return;
 	}
 	tsd = tsd_fetch();
+	assert(tsd->reentrancy_level == 0);
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
 	prof_dump_filename(filename, 'f', VSEQ_INVALID);
@@ -1791,6 +1792,10 @@ prof_idump(tsdn_t *tsdn) {
 		return;
 	}
 	tsd = tsdn_tsd(tsdn);
+	if (tsd->reentrancy_level > 0) {
+		return;
+	}
+
 	tdata = prof_tdata_get(tsd, false);
 	if (tdata == NULL) {
 		return;
@@ -1812,14 +1817,13 @@ prof_idump(tsdn_t *tsdn) {
 
 bool
 prof_mdump(tsd_t *tsd, const char *filename) {
-	char filename_buf[DUMP_FILENAME_BUFSIZE];
-
 	cassert(config_prof);
+	assert(tsd->reentrancy_level == 0);
 
 	if (!opt_prof || !prof_booted) {
 		return true;
 	}
-
+	char filename_buf[DUMP_FILENAME_BUFSIZE];
 	if (filename == NULL) {
 		/* No filename specified, so automatically generate one. */
 		if (opt_prof_prefix[0] == '\0') {
@@ -1845,6 +1849,10 @@ prof_gdump(tsdn_t *tsdn) {
 		return;
 	}
 	tsd = tsdn_tsd(tsdn);
+	if (tsd->reentrancy_level > 0) {
+		return;
+	}
+
 	tdata = prof_tdata_get(tsd, false);
 	if (tdata == NULL) {
 		return;

From e2aad5e810853ebfa285b361563120bd1925ca19 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 25 Apr 2017 13:33:22 -0700
Subject: [PATCH 0843/2608] Remove redundant extent lookup in
 tcache_bin_flush_large.

---
 src/tcache.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tcache.c b/src/tcache.c
index c272a3c4..e2474a32 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -215,7 +215,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 		for (unsigned i = 0; i < nflush; i++) {
 			void *ptr = *(tbin->avail - 1 - i);
 			assert(ptr != NULL);
-			extent = iealloc(tsd_tsdn(tsd), ptr);
+			extent = item_extent[i];
 			if (extent_arena_get(extent) == locked_arena) {
 				large_dalloc_prep_junked_locked(tsd_tsdn(tsd),
 				    extent);

From 8584adc451f31adfc4ab8693d9189cf3a7e5d858 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 25 Apr 2017 13:31:45 -0700
Subject: [PATCH 0844/2608] Use trylock in tcache_bin_flush when possible.

During tcache gc, use tcache_bin_try_flush_small / _large so that we can skip
items with their bins locked already.
---
 include/jemalloc/internal/tcache_externs.h |  30 ++--
 include/jemalloc/internal/tcache_inlines.h |   4 +-
 src/tcache.c                               | 171 +++++++++++++++------
 3 files changed, 142 insertions(+), 63 deletions(-)

diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index abe133fa..95dfe446 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -27,23 +27,27 @@ extern size_t	tcache_maxclass;
  */
 extern tcaches_t	*tcaches;
 
-size_t	tcache_salloc(tsdn_t *tsdn, const void *ptr);
-void	tcache_event_hard(tsd_t *tsd, tcache_t *tcache);
-void	*tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
+size_t tcache_salloc(tsdn_t *tsdn, const void *ptr);
+void tcache_event_hard(tsd_t *tsd, tcache_t *tcache);
+void *tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     tcache_bin_t *tbin, szind_t binind, bool *tcache_success);
-void	tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
+void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
     szind_t binind, unsigned rem);
-void	tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
-    unsigned rem, tcache_t *tcache);
-void	tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache,
+unsigned tcache_bin_try_flush_small(tsd_t *tsd, tcache_t *tcache,
+    tcache_bin_t *tbin, szind_t binind, unsigned rem);
+void tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
+    szind_t binind, unsigned rem);
+unsigned tcache_bin_try_flush_large(tsd_t *tsd, tcache_t *tcache,
+    tcache_bin_t *tbin, szind_t binind, unsigned rem);
+void tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache,
     arena_t *arena);
 tcache_t *tcache_create_explicit(tsd_t *tsd);
-void	tcache_cleanup(tsd_t *tsd);
-void	tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
-bool	tcaches_create(tsd_t *tsd, unsigned *r_ind);
-void	tcaches_flush(tsd_t *tsd, unsigned ind);
-void	tcaches_destroy(tsd_t *tsd, unsigned ind);
-bool	tcache_boot(tsdn_t *tsdn);
+void tcache_cleanup(tsd_t *tsd);
+void tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
+bool tcaches_create(tsd_t *tsd, unsigned *r_ind);
+void tcaches_flush(tsd_t *tsd, unsigned ind);
+void tcaches_destroy(tsd_t *tsd, unsigned ind);
+bool tcache_boot(tsdn_t *tsdn);
 void tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
 void tcache_prefork(tsdn_t *tsdn);
 void tcache_postfork_parent(tsdn_t *tsdn);
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 8a65ba2b..5e9a7a0f 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -227,8 +227,8 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	tbin = tcache_large_bin_get(tcache, binind);
 	tbin_info = &tcache_bin_info[binind];
 	if (unlikely(tbin->ncached == tbin_info->ncached_max)) {
-		tcache_bin_flush_large(tsd, tbin, binind,
-		    (tbin_info->ncached_max >> 1), tcache);
+		tcache_bin_flush_large(tsd, tcache, tbin, binind,
+		    (tbin_info->ncached_max >> 1));
 	}
 	assert(tbin->ncached < tbin_info->ncached_max);
 	tbin->ncached++;
diff --git a/src/tcache.c b/src/tcache.c
index e2474a32..292c0176 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -45,14 +45,16 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 	} else {
 		tbin = tcache_large_bin_get(tcache, binind);
 	}
+	bool repeat_bin;
 	if (tbin->low_water > 0) {
 		/*
 		 * Flush (ceiling) 3/4 of the objects below the low water mark.
 		 */
+		unsigned nflushed;
 		if (binind < NBINS) {
-			tcache_bin_flush_small(tsd, tcache, tbin, binind,
-			    tbin->ncached - tbin->low_water + (tbin->low_water
-			    >> 2));
+			nflushed = tcache_bin_try_flush_small(tsd, tcache, tbin,
+			    binind, tbin->ncached - tbin->low_water +
+			    (tbin->low_water >> 2));
 			/*
 			 * Reduce fill count by 2X.  Limit lg_fill_div such that
 			 * the fill count is always at least 1.
@@ -63,23 +65,29 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 				tcache->lg_fill_div[binind]++;
 			}
 		} else {
-			tcache_bin_flush_large(tsd, tbin, binind, tbin->ncached
-			    - tbin->low_water + (tbin->low_water >> 2), tcache);
+			nflushed = tcache_bin_try_flush_large(tsd, tcache, tbin,
+			    binind, tbin->ncached - tbin->low_water +
+			    (tbin->low_water >> 2));
 		}
-	} else if (tbin->low_water < 0) {
-		/*
-		 * Increase fill count by 2X for small bins.  Make sure
-		 * lg_fill_div stays greater than 0.
-		 */
-		if (binind < NBINS && tcache->lg_fill_div[binind] > 1) {
-			tcache->lg_fill_div[binind]--;
+		repeat_bin = (nflushed == 0);
+	} else {
+		if (tbin->low_water < 0) {
+			/*
+			 * Increase fill count by 2X for small bins.  Make sure
+			 * lg_fill_div stays greater than 0.
+			 */
+			if (binind < NBINS && tcache->lg_fill_div[binind] > 1) {
+				tcache->lg_fill_div[binind]--;
+			}
 		}
+		repeat_bin = false;
 	}
-	tbin->low_water = tbin->ncached;
-
-	tcache->next_gc_bin++;
-	if (tcache->next_gc_bin == nhbins) {
-		tcache->next_gc_bin = 0;
+	if (!repeat_bin) {
+		tcache->next_gc_bin++;
+		if (tcache->next_gc_bin == nhbins) {
+			tcache->next_gc_bin = 0;
+		}
+		tbin->low_water = tbin->ncached;
 	}
 }
 
@@ -99,11 +107,9 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 	return ret;
 }
 
-void
-tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
-    szind_t binind, unsigned rem) {
-	bool merged_stats = false;
-
+static unsigned
+tcache_bin_flush_small_impl(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
+    szind_t binind, unsigned rem, bool must_flush) {
 	assert(binind < NBINS);
 	assert(rem <= tbin->ncached);
 
@@ -116,9 +122,12 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 		item_extent[i] = iealloc(tsd_tsdn(tsd), *(tbin->avail - 1 - i));
 	}
 
+	bool merged_stats = false;
+	unsigned nflushed = 0;
+	unsigned nskipped = 0;
 	while (nflush > 0) {
 		/* Lock the arena bin associated with the first object. */
-		extent_t *extent = item_extent[0];
+		extent_t *extent = item_extent[nskipped];
 		arena_t *bin_arena = extent_arena_get(extent);
 		arena_bin_t *bin = &bin_arena->bins[binind];
 
@@ -130,7 +139,16 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 			tcache->prof_accumbytes = 0;
 		}
 
-		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
+		if (must_flush) {
+			malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
+		} else {
+			/* Make best effort to flush w/o blocking. */
+			if (malloc_mutex_trylock(tsd_tsdn(tsd), &bin->lock)) {
+				nskipped++;
+				nflush--;
+				continue;
+			}
+		}
 		if (config_stats && bin_arena == arena) {
 			assert(!merged_stats);
 			merged_stats = true;
@@ -139,7 +157,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 			tbin->tstats.nrequests = 0;
 		}
 		unsigned ndeferred = 0;
-		for (unsigned i = 0; i < nflush; i++) {
+		for (unsigned i = nskipped; i < nflush; i++) {
 			void *ptr = *(tbin->avail - 1 - i);
 			extent = item_extent[i];
 			assert(ptr != NULL && extent != NULL);
@@ -154,13 +172,14 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 				 * locked.  Stash the object, so that it can be
 				 * handled in a future pass.
 				 */
-				*(tbin->avail - 1 - ndeferred) = ptr;
-				item_extent[ndeferred] = extent;
+				*(tbin->avail - 1 - ndeferred - nskipped) = ptr;
+				item_extent[ndeferred + nskipped] = extent;
 				ndeferred++;
 			}
 		}
 		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 		arena_decay_ticks(tsd_tsdn(tsd), bin_arena, nflush - ndeferred);
+		nflushed += nflush - ndeferred;
 		nflush = ndeferred;
 	}
 	if (config_stats && !merged_stats) {
@@ -169,26 +188,49 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 		 * arena, so the stats didn't get merged.  Manually do so now.
 		 */
 		arena_bin_t *bin = &arena->bins[binind];
-		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
-		bin->stats.nflushes++;
-		bin->stats.nrequests += tbin->tstats.nrequests;
-		tbin->tstats.nrequests = 0;
-		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
+		if (must_flush) {
+			malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
+		}
+		if (must_flush ||
+		    !malloc_mutex_trylock(tsd_tsdn(tsd), &bin->lock)) {
+			malloc_mutex_assert_owner(tsd_tsdn(tsd), &bin->lock);
+			bin->stats.nflushes++;
+			bin->stats.nrequests += tbin->tstats.nrequests;
+			tbin->tstats.nrequests = 0;
+			malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
+		}
 	}
+	assert(nflushed == tbin->ncached - rem - nskipped);
+	assert(nskipped == 0 || !must_flush);
 
-	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
-	    sizeof(void *));
-	tbin->ncached = rem;
+	if (nflushed > 0) {
+		memmove(tbin->avail - (rem + nskipped), tbin->avail -
+		    tbin->ncached, rem * sizeof(void *));
+	}
+	tbin->ncached = rem + nskipped;
 	if ((low_water_t)tbin->ncached < tbin->low_water) {
 		tbin->low_water = tbin->ncached;
 	}
+
+	return nflushed;
 }
 
 void
-tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
-    unsigned rem, tcache_t *tcache) {
-	bool merged_stats = false;
+tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
+    szind_t binind, unsigned rem) {
+	tcache_bin_flush_small_impl(tsd, tcache, tbin, binind, rem, true);
+}
 
+unsigned
+tcache_bin_try_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
+    szind_t binind, unsigned rem) {
+	return tcache_bin_flush_small_impl(tsd, tcache, tbin, binind, rem,
+	    false);
+}
+
+static unsigned
+tcache_bin_flush_large_impl(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
+    szind_t binind, unsigned rem, bool must_flush) {
 	assert(binind < nhbins);
 	assert(rem <= tbin->ncached);
 
@@ -201,18 +243,31 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 		item_extent[i] = iealloc(tsd_tsdn(tsd), *(tbin->avail - 1 - i));
 	}
 
+	bool merged_stats = false;
+	unsigned nflushed = 0;
+	unsigned nskipped = 0;
 	while (nflush > 0) {
 		/* Lock the arena associated with the first object. */
-		extent_t *extent = item_extent[0];
+		extent_t *extent = item_extent[nskipped];
 		arena_t *locked_arena = extent_arena_get(extent);
 		UNUSED bool idump;
 
 		if (config_prof) {
 			idump = false;
 		}
+		if (must_flush) {
+			malloc_mutex_lock(tsd_tsdn(tsd), &locked_arena->large_mtx);
+		} else {
+			/* Make best effort to flush w/o blocking. */
+			if (malloc_mutex_trylock(tsd_tsdn(tsd),
+			    &locked_arena->large_mtx)) {
+				nskipped++;
+				nflush--;
+				continue;
+			}
+		}
 
-		malloc_mutex_lock(tsd_tsdn(tsd), &locked_arena->large_mtx);
-		for (unsigned i = 0; i < nflush; i++) {
+		for (unsigned i = nskipped; i < nflush; i++) {
 			void *ptr = *(tbin->avail - 1 - i);
 			assert(ptr != NULL);
 			extent = item_extent[i];
@@ -238,7 +293,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 		malloc_mutex_unlock(tsd_tsdn(tsd), &locked_arena->large_mtx);
 
 		unsigned ndeferred = 0;
-		for (unsigned i = 0; i < nflush; i++) {
+		for (unsigned i = nskipped; i < nflush; i++) {
 			void *ptr = *(tbin->avail - 1 - i);
 			extent = item_extent[i];
 			assert(ptr != NULL && extent != NULL);
@@ -252,8 +307,8 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 				 * Stash the object, so that it can be handled
 				 * in a future pass.
 				 */
-				*(tbin->avail - 1 - ndeferred) = ptr;
-				item_extent[ndeferred] = extent;
+				*(tbin->avail - 1 - ndeferred - nskipped) = ptr;
+				item_extent[ndeferred + nskipped] = extent;
 				ndeferred++;
 			}
 		}
@@ -262,6 +317,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 		}
 		arena_decay_ticks(tsd_tsdn(tsd), locked_arena, nflush -
 		    ndeferred);
+		nflushed += nflush - ndeferred;
 		nflush = ndeferred;
 	}
 	if (config_stats && !merged_stats) {
@@ -274,12 +330,31 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 		tbin->tstats.nrequests = 0;
 	}
 
-	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
-	    sizeof(void *));
-	tbin->ncached = rem;
+	assert(nflushed == tbin->ncached - rem - nskipped);
+	assert(nskipped == 0 || !must_flush);
+
+	if (nflushed > 0) {
+		memmove(tbin->avail - (rem + nskipped), tbin->avail -
+		    tbin->ncached, rem * sizeof(void *));
+	}
+	tbin->ncached = rem + nskipped;
 	if ((low_water_t)tbin->ncached < tbin->low_water) {
 		tbin->low_water = tbin->ncached;
 	}
+	return nflushed;
+}
+
+void
+tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
+    szind_t binind, unsigned rem) {
+	tcache_bin_flush_large_impl(tsd, tcache, tbin, binind, rem, true);
+}
+
+unsigned
+tcache_bin_try_flush_large(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
+    szind_t binind, unsigned rem) {
+	return tcache_bin_flush_large_impl(tsd, tcache, tbin, binind, rem,
+	    false);
 }
 
 void
@@ -461,7 +536,7 @@ tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
 	}
 	for (unsigned i = NBINS; i < nhbins; i++) {
 		tcache_bin_t *tbin = tcache_large_bin_get(tcache, i);
-		tcache_bin_flush_large(tsd, tbin, i, 0, tcache);
+		tcache_bin_flush_large(tsd, tcache, tbin, i, 0);
 
 		if (config_stats) {
 			assert(tbin->tstats.nrequests == 0);

From b0c2a28280d363fc85aa8b4fdbe7814ef46cb17b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 25 Apr 2017 14:10:31 -0700
Subject: [PATCH 0845/2608] Use try_flush first in tcache_dalloc.

Only do must_flush if try_flush didn't manage to free anything.
---
 include/jemalloc/internal/tcache_inlines.h | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 5e9a7a0f..511fceab 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -201,8 +201,11 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	tbin = tcache_small_bin_get(tcache, binind);
 	tbin_info = &tcache_bin_info[binind];
 	if (unlikely(tbin->ncached == tbin_info->ncached_max)) {
-		tcache_bin_flush_small(tsd, tcache, tbin, binind,
-		    (tbin_info->ncached_max >> 1));
+		if (tcache_bin_try_flush_small(tsd, tcache, tbin, binind,
+		    (tbin_info->ncached_max >> 1)) == 0) {
+			tcache_bin_flush_small(tsd, tcache, tbin, binind,
+			    (tbin_info->ncached_max >> 1));
+		}
 	}
 	assert(tbin->ncached < tbin_info->ncached_max);
 	tbin->ncached++;
@@ -227,8 +230,11 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	tbin = tcache_large_bin_get(tcache, binind);
 	tbin_info = &tcache_bin_info[binind];
 	if (unlikely(tbin->ncached == tbin_info->ncached_max)) {
-		tcache_bin_flush_large(tsd, tcache, tbin, binind,
-		    (tbin_info->ncached_max >> 1));
+		if (tcache_bin_try_flush_large(tsd, tcache, tbin, binind,
+		    (tbin_info->ncached_max >> 1)) == 0) {
+			tcache_bin_flush_large(tsd, tcache, tbin, binind,
+			    (tbin_info->ncached_max >> 1));
+		}
 	}
 	assert(tbin->ncached < tbin_info->ncached_max);
 	tbin->ncached++;

From 5c56603e91d8d0021280615f86b95efe0463b6a8 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 27 Apr 2017 14:55:07 -0700
Subject: [PATCH 0846/2608] Inline tcache_bin_flush_small_impl / _large_impl.

---
 src/tcache.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index 292c0176..a7e05b17 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -107,7 +107,7 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 	return ret;
 }
 
-static unsigned
+static inline unsigned
 tcache_bin_flush_small_impl(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
     szind_t binind, unsigned rem, bool must_flush) {
 	assert(binind < NBINS);
@@ -228,7 +228,7 @@ tcache_bin_try_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 	    false);
 }
 
-static unsigned
+static inline unsigned
 tcache_bin_flush_large_impl(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
     szind_t binind, unsigned rem, bool must_flush) {
 	assert(binind < nhbins);

From d901a377753cf4c75d2f72f7a692c44f61eac4a4 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 27 Apr 2017 14:12:51 -0700
Subject: [PATCH 0847/2608] Revert "Use try_flush first in tcache_dalloc."

This reverts commit b0c2a28280d363fc85aa8b4fdbe7814ef46cb17b.  Production
benchmark shows this caused significant regression in both CPU and memory
consumption.  Will investigate separately later on.
---
 include/jemalloc/internal/tcache_inlines.h | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 511fceab..5e9a7a0f 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -201,11 +201,8 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	tbin = tcache_small_bin_get(tcache, binind);
 	tbin_info = &tcache_bin_info[binind];
 	if (unlikely(tbin->ncached == tbin_info->ncached_max)) {
-		if (tcache_bin_try_flush_small(tsd, tcache, tbin, binind,
-		    (tbin_info->ncached_max >> 1)) == 0) {
-			tcache_bin_flush_small(tsd, tcache, tbin, binind,
-			    (tbin_info->ncached_max >> 1));
-		}
+		tcache_bin_flush_small(tsd, tcache, tbin, binind,
+		    (tbin_info->ncached_max >> 1));
 	}
 	assert(tbin->ncached < tbin_info->ncached_max);
 	tbin->ncached++;
@@ -230,11 +227,8 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	tbin = tcache_large_bin_get(tcache, binind);
 	tbin_info = &tcache_bin_info[binind];
 	if (unlikely(tbin->ncached == tbin_info->ncached_max)) {
-		if (tcache_bin_try_flush_large(tsd, tcache, tbin, binind,
-		    (tbin_info->ncached_max >> 1)) == 0) {
-			tcache_bin_flush_large(tsd, tcache, tbin, binind,
-			    (tbin_info->ncached_max >> 1));
-		}
+		tcache_bin_flush_large(tsd, tcache, tbin, binind,
+		    (tbin_info->ncached_max >> 1));
 	}
 	assert(tbin->ncached < tbin_info->ncached_max);
 	tbin->ncached++;

From b9ab04a191dbcb9246d5180fc7ae822a85861939 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 26 Apr 2017 16:26:12 -0700
Subject: [PATCH 0848/2608] Refactor !opt.munmap to opt.retain.

---
 configure.ac                                    | 12 ++++++------
 doc/jemalloc.xml.in                             | 17 +++++++++--------
 include/jemalloc/internal/arena_structs_b.h     |  2 +-
 include/jemalloc/internal/extent_mmap_externs.h |  2 +-
 include/jemalloc/internal/extent_structs.h      |  7 +++----
 .../internal/jemalloc_internal_defs.h.in        |  6 +++---
 include/jemalloc/internal/stats.h               |  7 +++----
 src/arena.c                                     |  6 +++---
 src/ctl.c                                       |  6 +++---
 src/extent.c                                    |  2 +-
 src/extent_mmap.c                               |  8 ++++----
 src/jemalloc.c                                  |  2 +-
 src/large.c                                     |  2 +-
 src/stats.c                                     |  2 +-
 test/unit/arena_reset.c                         |  2 +-
 test/unit/mallctl.c                             |  2 +-
 16 files changed, 42 insertions(+), 43 deletions(-)

diff --git a/configure.ac b/configure.ac
index 6447c51a..9f8311cc 100644
--- a/configure.ac
+++ b/configure.ac
@@ -517,7 +517,7 @@ dnl
 dnl Define cpp macros in CPPFLAGS, rather than doing AC_DEFINE(macro), since the
 dnl definitions need to be seen before any headers are included, which is a pain
 dnl to make happen otherwise.
-default_munmap="1"
+default_retain="0"
 maps_coalesce="1"
 case "${host}" in
   *-*-darwin* | *-*-ios*)
@@ -557,7 +557,7 @@ case "${host}" in
 	AC_DEFINE([JEMALLOC_C11_ATOMICS])
 	force_tls="0"
 	if test "${LG_SIZEOF_PTR}" = "3"; then
-	  default_munmap="0"
+	  default_retain="1"
 	fi
 	;;
   *-*-linux* | *-*-kfreebsd*)
@@ -570,7 +570,7 @@ case "${host}" in
 	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
 	AC_DEFINE([JEMALLOC_USE_CXX_THROW], [ ])
 	if test "${LG_SIZEOF_PTR}" = "3"; then
-	  default_munmap="0"
+	  default_retain="1"
 	fi
 	;;
   *-*-netbsd*)
@@ -1086,9 +1086,9 @@ if test "x${maps_coalesce}" = "x1" ; then
   AC_DEFINE([JEMALLOC_MAPS_COALESCE], [ ])
 fi
 
-dnl Indicate whether to use munmap() by default.
-if test "x$default_munmap" = "x1" ; then
-  AC_DEFINE([JEMALLOC_MUNMAP], [ ])
+dnl Indicate whether to retain memory (rather than using) munmap()) by default.
+if test "x$default_retain" = "x1" ; then
+  AC_DEFINE([JEMALLOC_RETAIN], [ ])
 fi
 
 dnl Enable allocation from DSS if supported by the OS.
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 66d8e5df..fa65c39b 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -863,25 +863,26 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         </para></listitem>
       </varlistentry>
 
-      <varlistentry id="opt.munmap">
+      <varlistentry id="opt.retain">
         <term>
-          <mallctl>opt.munmap</mallctl>
+          <mallctl>opt.retain</mallctl>
           (<type>bool</type>)
           <literal>r-</literal>
         </term>
-        <listitem><para>If true, call
+        <listitem><para>If true, retain unused virtual memory for later reuse
+        rather than discarding it by calling
         <citerefentry><refentrytitle>munmap</refentrytitle>
-        <manvolnum>2</manvolnum></citerefentry> or equivalent rather than
-        retaining unused virtual memory (see <link
+        <manvolnum>2</manvolnum></citerefentry> or equivalent (see <link
         linkend="stats.retained">stats.retained</link> for related details).
-        This option is enabled by default unless it is known to trigger
+        This option is disabled by default unless discarding virtual memory is
+        known to trigger
         platform-specific performance problems, e.g. for [64-bit] Linux, which
         has a quirk in its virtual memory allocation algorithm that causes
         semi-permanent VM map holes under normal jemalloc operation.  Although
         <citerefentry><refentrytitle>munmap</refentrytitle>
         <manvolnum>2</manvolnum></citerefentry> causes issues on 32-bit Linux as
-        well, it is not disabled by default due to the practical possibility of
-        address space exhaustion.
+        well, retaining virtual memory for 32-bit Linux is disabled by default
+        due to the practical possibility of address space exhaustion.
         </para></listitem>
       </varlistentry>
 
diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index 6b83e526..d98b455e 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -230,7 +230,7 @@ struct arena_s {
 
 	/*
 	 * Next extent size class in a growing series to use when satisfying a
-	 * request via the extent hooks (only if !opt_munmap).  This limits the
+	 * request via the extent hooks (only if opt_retain).  This limits the
 	 * number of disjoint virtual memory ranges so that extent merging can
 	 * be effective even if multiple arenas' extent allocation requests are
 	 * highly interleaved.
diff --git a/include/jemalloc/internal/extent_mmap_externs.h b/include/jemalloc/internal/extent_mmap_externs.h
index e5bc8110..fe9a79ac 100644
--- a/include/jemalloc/internal/extent_mmap_externs.h
+++ b/include/jemalloc/internal/extent_mmap_externs.h
@@ -1,7 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_EXTENT_MMAP_EXTERNS_H
 #define JEMALLOC_INTERNAL_EXTENT_MMAP_EXTERNS_H
 
-extern bool	opt_munmap;
+extern bool	opt_retain;
 
 void	*extent_alloc_mmap(void *new_addr, size_t size, size_t alignment,
     bool *zero, bool *commit);
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index 5d8c3a20..62bae39a 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -59,10 +59,9 @@ struct extent_s {
 	 *
 	 * sn: Serial number (potentially non-unique).
 	 *
-	 *     Serial numbers may wrap around if JEMALLOC_MUNMAP is defined, but
-	 *     as long as comparison functions fall back on address comparison
-	 *     for equal serial numbers, stable (if imperfect) ordering is
-	 *     maintained.
+	 *     Serial numbers may wrap around if !opt_retain, but as long as
+	 *     comparison functions fall back on address comparison for equal
+	 *     serial numbers, stable (if imperfect) ordering is maintained.
 	 *
 	 *     Serial numbers may not be unique even in the absence of
 	 *     wrap-around, e.g. when splitting an extent and assigning the same
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 8f7c42b8..bccee167 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -192,12 +192,12 @@
 #undef JEMALLOC_MAPS_COALESCE
 
 /*
- * If defined, use munmap() to unmap freed extents by default, rather than
- * storing them for later reuse.  This is disabled on 64-bit Linux because
+ * If defined, retain memory for later reuse by default rather than using e.g.
+ * munmap() to unmap freed extents.  This is enabled on 64-bit Linux because
  * common sequences of mmap()/munmap() calls will cause virtual memory map
  * holes.
  */
-#undef JEMALLOC_MUNMAP
+#undef JEMALLOC_RETAIN
 
 /* TLS is used to map arenas and magazine caches to threads. */
 #undef JEMALLOC_TLS
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index 301a50ab..fd98422d 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -117,10 +117,9 @@ typedef struct arena_stats_s {
 	atomic_zu_t		mapped; /* Partially derived. */
 
 	/*
-	 * Number of bytes currently retained as a side effect of munmap() being
-	 * disabled/bypassed.  Retained bytes are technically mapped (though
-	 * always decommitted or purged), but they are excluded from the mapped
-	 * statistic (above).
+	 * Number of unused virtual memory bytes currently retained.  Retained
+	 * bytes are technically mapped (though always decommitted or purged),
+	 * but they are excluded from the mapped statistic (above).
 	 */
 	atomic_zu_t		retained; /* Derived. */
 
diff --git a/src/arena.c b/src/arena.c
index 3b94a20d..2c7cea08 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1143,8 +1143,8 @@ arena_destroy_retained(tsdn_t *tsdn, arena_t *arena) {
 	 * opportunity to unmap all retained memory without having to keep its
 	 * own metadata structures, but if deallocation fails, that is the
 	 * application's decision/problem.  In practice, retained extents are
-	 * leaked here if !opt_munmap unless the application provided custom
-	 * extent hooks, so best practice is to either enable munmap (and avoid
+	 * leaked here if opt_retain unless the application provided custom
+	 * extent hooks, so best practice is to either disable retain (and avoid
 	 * dss for arenas to be destroyed), or provide custom extent hooks that
 	 * either unmap retained extents or track them for later use.
 	 */
@@ -1947,7 +1947,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		goto label_error;
 	}
 
-	if (!opt_munmap) {
+	if (opt_retain) {
 		atomic_store_u(&arena->extent_grow_next, psz2ind(HUGEPAGE),
 		    ATOMIC_RELAXED);
 	}
diff --git a/src/ctl.c b/src/ctl.c
index 3591f891..7d53a336 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -74,7 +74,7 @@ CTL_PROTO(config_stats)
 CTL_PROTO(config_utrace)
 CTL_PROTO(config_xmalloc)
 CTL_PROTO(opt_abort)
-CTL_PROTO(opt_munmap)
+CTL_PROTO(opt_retain)
 CTL_PROTO(opt_dss)
 CTL_PROTO(opt_narenas)
 CTL_PROTO(opt_percpu_arena)
@@ -260,7 +260,7 @@ static const ctl_named_node_t	config_node[] = {
 
 static const ctl_named_node_t opt_node[] = {
 	{NAME("abort"),		CTL(opt_abort)},
-	{NAME("munmap"),	CTL(opt_munmap)},
+	{NAME("retain"),	CTL(opt_retain)},
 	{NAME("dss"),		CTL(opt_dss)},
 	{NAME("narenas"),	CTL(opt_narenas)},
 	{NAME("percpu_arena"),	CTL(opt_percpu_arena)},
@@ -1455,7 +1455,7 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool)
 /******************************************************************************/
 
 CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
-CTL_RO_NL_GEN(opt_munmap, opt_munmap, bool)
+CTL_RO_NL_GEN(opt_retain, opt_retain, bool)
 CTL_RO_NL_GEN(opt_dss, opt_dss, const char *)
 CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned)
 CTL_RO_NL_GEN(opt_percpu_arena, opt_percpu_arena, const char *)
diff --git a/src/extent.c b/src/extent.c
index 1ddaf240..bc17711c 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1123,7 +1123,7 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
 			extent_gdump_add(tsdn, extent);
 		}
 	}
-	if (!opt_munmap && extent == NULL) {
+	if (opt_retain && extent == NULL) {
 		extent = extent_grow_retained(tsdn, arena, r_extent_hooks,
 		    new_addr, size, pad, alignment, slab, szind, zero, commit);
 	}
diff --git a/src/extent_mmap.c b/src/extent_mmap.c
index 5fe82ee5..3e4e1ef7 100644
--- a/src/extent_mmap.c
+++ b/src/extent_mmap.c
@@ -7,8 +7,8 @@
 /******************************************************************************/
 /* Data. */
 
-bool	opt_munmap =
-#ifdef JEMALLOC_MUNMAP
+bool	opt_retain =
+#ifdef JEMALLOC_RETAIN
     true
 #else
     false
@@ -34,8 +34,8 @@ extent_alloc_mmap(void *new_addr, size_t size, size_t alignment, bool *zero,
 
 bool
 extent_dalloc_mmap(void *addr, size_t size) {
-	if (opt_munmap) {
+	if (!opt_retain) {
 		pages_unmap(addr, size);
 	}
-	return !opt_munmap;
+	return opt_retain;
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 42146004..97a64431 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1043,7 +1043,7 @@ malloc_conf_init(void) {
 			}
 
 			CONF_HANDLE_BOOL(opt_abort, "abort")
-			CONF_HANDLE_BOOL(opt_munmap, "munmap")
+			CONF_HANDLE_BOOL(opt_retain, "retain")
 			if (strncmp("dss", k, klen) == 0) {
 				int i;
 				bool match = false;
diff --git a/src/large.c b/src/large.c
index 4d515fbb..f657ccbe 100644
--- a/src/large.c
+++ b/src/large.c
@@ -93,7 +93,7 @@ large_dalloc_maybe_junk(void *ptr, size_t size) {
 		 * Only bother junk filling if the extent isn't about to be
 		 * unmapped.
 		 */
-		if (!opt_munmap || (have_dss && extent_in_dss(ptr))) {
+		if (opt_retain || (have_dss && extent_in_dss(ptr))) {
 			large_dalloc_junk(ptr, size);
 		}
 	}
diff --git a/src/stats.c b/src/stats.c
index 5d515186..34fc37f2 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -802,7 +802,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    "Run-time option settings:\n");
 	}
 	OPT_WRITE_BOOL(abort, ",")
-	OPT_WRITE_BOOL(munmap, ",")
+	OPT_WRITE_BOOL(retain, ",")
 	OPT_WRITE_CHAR_P(dss, ",")
 	OPT_WRITE_UNSIGNED(narenas, ",")
 	OPT_WRITE_CHAR_P(percpu_arena, ",")
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index 0fa240b7..5d6c1a77 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -251,7 +251,7 @@ TEST_BEGIN(test_arena_destroy_hooks_default) {
 TEST_END
 
 /*
- * Actually unmap extents, regardless of opt_munmap, so that attempts to access
+ * Actually unmap extents, regardless of opt_retain, so that attempts to access
  * a destroyed arena's memory will segfault.
  */
 static bool
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 51a5244e..b07a6d04 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -157,7 +157,7 @@ TEST_BEGIN(test_mallctl_opt) {
 } while (0)
 
 	TEST_MALLCTL_OPT(bool, abort, always);
-	TEST_MALLCTL_OPT(bool, munmap, always);
+	TEST_MALLCTL_OPT(bool, retain, always);
 	TEST_MALLCTL_OPT(const char *, dss, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);
 	TEST_MALLCTL_OPT(const char *, percpu_arena, always);

From c86c8f4ffbf8c118203f7327610a2ad80cf9622c Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 27 Apr 2017 15:51:35 -0700
Subject: [PATCH 0849/2608] Add extent_destroy_t and use it during arena
 destruction.

Add the extent_destroy_t extent destruction hook to extent_hooks_t, and
use it during arena destruction.  This hook explicitly communicates to
the callee that the extent must be destroyed or tracked for later reuse,
lest it be permanently leaked.  Prior to this change, retained extents
could unintentionally be leaked if extent retention was enabled.

This resolves #560.
---
 configure.ac                                  |  2 +-
 doc/jemalloc.xml.in                           | 20 ++++++++
 include/jemalloc/internal/extent_externs.h    |  4 +-
 include/jemalloc/internal/private_symbols.txt |  2 +-
 include/jemalloc/jemalloc_typedefs.h.in       |  9 ++++
 src/arena.c                                   | 20 ++++----
 src/extent.c                                  | 47 ++++++++++++++++++-
 test/include/test/extent_hooks.h              | 24 ++++++++++
 test/unit/arena_reset.c                       |  1 +
 test/unit/base.c                              |  8 +++-
 10 files changed, 120 insertions(+), 17 deletions(-)

diff --git a/configure.ac b/configure.ac
index 9f8311cc..462f509f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1086,7 +1086,7 @@ if test "x${maps_coalesce}" = "x1" ; then
   AC_DEFINE([JEMALLOC_MAPS_COALESCE], [ ])
 fi
 
-dnl Indicate whether to retain memory (rather than using) munmap()) by default.
+dnl Indicate whether to retain memory (rather than using munmap()) by default.
 if test "x$default_retain" = "x1" ; then
   AC_DEFINE([JEMALLOC_RETAIN], [ ])
 fi
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index fa65c39b..d1b2e334 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1605,6 +1605,7 @@ typedef extent_hooks_s extent_hooks_t;
 struct extent_hooks_s {
 	extent_alloc_t		*alloc;
 	extent_dalloc_t		*dalloc;
+	extent_destroy_t	*destroy;
 	extent_commit_t		*commit;
 	extent_decommit_t	*decommit;
 	extent_purge_t		*purge_lazy;
@@ -1681,6 +1682,25 @@ struct extent_hooks_s {
         remains mapped, in the same commit state, and available for future use,
         in which case it will be automatically retained for later reuse.</para>
 
+        <funcsynopsis><funcprototype>
+          <funcdef>typedef void <function>(extent_destroy_t)</function></funcdef>
+          <paramdef>extent_hooks_t *<parameter>extent_hooks</parameter></paramdef>
+          <paramdef>void *<parameter>addr</parameter></paramdef>
+          <paramdef>size_t <parameter>size</parameter></paramdef>
+          <paramdef>bool <parameter>committed</parameter></paramdef>
+          <paramdef>unsigned <parameter>arena_ind</parameter></paramdef>
+        </funcprototype></funcsynopsis>
+        <literallayout></literallayout>
+        <para>
+        An extent destruction function conforms to the
+        <type>extent_destroy_t</type> type and unconditionally destroys an
+        extent at given <parameter>addr</parameter> and
+        <parameter>size</parameter> with
+        <parameter>committed</parameter>/decommited memory as indicated, on
+        behalf of arena <parameter>arena_ind</parameter>.  This function may be
+        called to destroy retained extents during arena destruction (see <link
+        linkend="arena.i.destroy"><mallctl>arena.&lt;i&gt;.destroy</mallctl></link>).</para>
+
         <funcsynopsis><funcprototype>
           <funcdef>typedef bool <function>(extent_commit_t)</function></funcdef>
           <paramdef>extent_hooks_t *<parameter>extent_hooks</parameter></paramdef>
diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index 58e57e70..c4fe8425 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -40,10 +40,10 @@ extent_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, void *new_addr, size_t size, size_t pad,
     size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit);
 void extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
-bool extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent);
 void extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent);
+void extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *extent);
 bool extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
     size_t length);
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
index 50590957..eb9b3010 100644
--- a/include/jemalloc/internal/private_symbols.txt
+++ b/include/jemalloc/internal/private_symbols.txt
@@ -154,8 +154,8 @@ extent_dalloc
 extent_dalloc_gap
 extent_dalloc_mmap
 extent_dalloc_wrapper
-extent_dalloc_wrapper_try
 extent_decommit_wrapper
+extent_destroy_wrapper
 extent_dss_boot
 extent_dss_mergeable
 extent_dss_prec_get
diff --git a/include/jemalloc/jemalloc_typedefs.h.in b/include/jemalloc/jemalloc_typedefs.h.in
index 91b5a8dc..1a588743 100644
--- a/include/jemalloc/jemalloc_typedefs.h.in
+++ b/include/jemalloc/jemalloc_typedefs.h.in
@@ -16,6 +16,14 @@ typedef void *(extent_alloc_t)(extent_hooks_t *, void *, size_t, size_t, bool *,
 typedef bool (extent_dalloc_t)(extent_hooks_t *, void *, size_t, bool,
     unsigned);
 
+/*
+ * void
+ * extent_destroy(extent_hooks_t *extent_hooks, void *addr, size_t size,
+ *     bool committed, unsigned arena_ind);
+ */
+typedef void (extent_destroy_t)(extent_hooks_t *, void *, size_t, bool,
+    unsigned);
+
 /*
  * bool
  * extent_commit(extent_hooks_t *extent_hooks, void *addr, size_t size,
@@ -59,6 +67,7 @@ typedef bool (extent_merge_t)(extent_hooks_t *, void *, size_t, void *, size_t,
 struct extent_hooks_s {
 	extent_alloc_t		*alloc;
 	extent_dalloc_t		*dalloc;
+	extent_destroy_t	*destroy;
 	extent_commit_t		*commit;
 	extent_decommit_t	*decommit;
 	extent_purge_t		*purge_lazy;
diff --git a/src/arena.c b/src/arena.c
index 2c7cea08..edbd875f 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1138,21 +1138,19 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 static void
 arena_destroy_retained(tsdn_t *tsdn, arena_t *arena) {
 	/*
-	 * Iterate over the retained extents and blindly attempt to deallocate
-	 * them.  This gives the extent allocator underlying the extent hooks an
-	 * opportunity to unmap all retained memory without having to keep its
-	 * own metadata structures, but if deallocation fails, that is the
-	 * application's decision/problem.  In practice, retained extents are
-	 * leaked here if opt_retain unless the application provided custom
-	 * extent hooks, so best practice is to either disable retain (and avoid
-	 * dss for arenas to be destroyed), or provide custom extent hooks that
-	 * either unmap retained extents or track them for later use.
+	 * Iterate over the retained extents and destroy them.  This gives the
+	 * extent allocator underlying the extent hooks an opportunity to unmap
+	 * all retained memory without having to keep its own metadata
+	 * structures.  In practice, virtual memory for dss-allocated extents is
+	 * leaked here, so best practice is to avoid dss for arenas to be
+	 * destroyed, or provide custom extent hooks that track retained
+	 * dss-based extents for later reuse.
 	 */
 	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
 	extent_t *extent;
 	while ((extent = extents_evict(tsdn, arena, &extent_hooks,
 	    &arena->extents_retained, 0)) != NULL) {
-		extent_dalloc_wrapper_try(tsdn, arena, &extent_hooks, extent);
+		extent_destroy_wrapper(tsdn, arena, &extent_hooks, extent);
 	}
 }
 
@@ -1169,7 +1167,7 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
 	 */
 	assert(extents_npages_get(&arena->extents_dirty) == 0);
 
-	/* Attempt to deallocate retained memory. */
+	/* Deallocate retained memory. */
 	arena_destroy_retained(tsd_tsdn(tsd), arena);
 
 	/*
diff --git a/src/extent.c b/src/extent.c
index bc17711c..1b284535 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -19,6 +19,8 @@ static void	*extent_alloc_default(extent_hooks_t *extent_hooks,
     unsigned arena_ind);
 static bool	extent_dalloc_default(extent_hooks_t *extent_hooks, void *addr,
     size_t size, bool committed, unsigned arena_ind);
+static void	extent_destroy_default(extent_hooks_t *extent_hooks, void *addr,
+    size_t size, bool committed, unsigned arena_ind);
 static bool	extent_commit_default(extent_hooks_t *extent_hooks, void *addr,
     size_t size, size_t offset, size_t length, unsigned arena_ind);
 static bool	extent_decommit_default(extent_hooks_t *extent_hooks,
@@ -43,6 +45,7 @@ static bool	extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a,
 const extent_hooks_t	extent_hooks_default = {
 	extent_alloc_default,
 	extent_dalloc_default,
+	extent_destroy_default,
 	extent_commit_default,
 	extent_decommit_default
 #ifdef PAGES_CAN_PURGE_LAZY
@@ -1366,7 +1369,7 @@ extent_dalloc_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	return extent_dalloc_default_impl(addr, size);
 }
 
-bool
+static bool
 extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent) {
 	bool err;
@@ -1443,6 +1446,48 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 	    extent);
 }
 
+static void
+extent_destroy_default_impl(void *addr, size_t size) {
+	if (!have_dss || !extent_in_dss(addr)) {
+		pages_unmap(addr, size);
+	}
+}
+
+static void
+extent_destroy_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    bool committed, unsigned arena_ind) {
+	assert(extent_hooks == &extent_hooks_default);
+
+	extent_destroy_default_impl(addr, size);
+}
+
+void
+extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *extent) {
+	assert(extent_base_get(extent) != NULL);
+	assert(extent_size_get(extent) != 0);
+	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+
+	/* Deregister first to avoid a race with other allocating threads. */
+	extent_deregister(tsdn, extent);
+
+	extent_addr_set(extent, extent_base_get(extent));
+
+	extent_hooks_assure_initialized(arena, r_extent_hooks);
+	/* Try to destroy; silently fail otherwise. */
+	if (*r_extent_hooks == &extent_hooks_default) {
+		/* Call directly to propagate tsdn. */
+		extent_destroy_default_impl(extent_base_get(extent),
+		    extent_size_get(extent));
+	} else if ((*r_extent_hooks)->destroy != NULL) {
+		(*r_extent_hooks)->destroy(*r_extent_hooks,
+		    extent_base_get(extent), extent_size_get(extent),
+		    extent_committed_get(extent), arena_ind_get(arena));
+	}
+
+	extent_dalloc(tsdn, arena, extent);
+}
+
 static bool
 extent_commit_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
diff --git a/test/include/test/extent_hooks.h b/test/include/test/extent_hooks.h
index 96fee103..ea012857 100644
--- a/test/include/test/extent_hooks.h
+++ b/test/include/test/extent_hooks.h
@@ -8,6 +8,8 @@ static void	*extent_alloc_hook(extent_hooks_t *extent_hooks, void *new_addr,
     unsigned arena_ind);
 static bool	extent_dalloc_hook(extent_hooks_t *extent_hooks, void *addr,
     size_t size, bool committed, unsigned arena_ind);
+static void	extent_destroy_hook(extent_hooks_t *extent_hooks, void *addr,
+    size_t size, bool committed, unsigned arena_ind);
 static bool	extent_commit_hook(extent_hooks_t *extent_hooks, void *addr,
     size_t size, size_t offset, size_t length, unsigned arena_ind);
 static bool	extent_decommit_hook(extent_hooks_t *extent_hooks, void *addr,
@@ -27,6 +29,7 @@ static extent_hooks_t *default_hooks;
 static extent_hooks_t hooks = {
 	extent_alloc_hook,
 	extent_dalloc_hook,
+	extent_destroy_hook,
 	extent_commit_hook,
 	extent_decommit_hook,
 	extent_purge_lazy_hook,
@@ -38,6 +41,7 @@ static extent_hooks_t hooks = {
 /* Control whether hook functions pass calls through to default hooks. */
 static bool try_alloc = true;
 static bool try_dalloc = true;
+static bool try_destroy = true;
 static bool try_commit = true;
 static bool try_decommit = true;
 static bool try_purge_lazy = true;
@@ -48,6 +52,7 @@ static bool try_merge = true;
 /* Set to false prior to operations, then introspect after operations. */
 static bool called_alloc;
 static bool called_dalloc;
+static bool called_destroy;
 static bool called_commit;
 static bool called_decommit;
 static bool called_purge_lazy;
@@ -58,6 +63,7 @@ static bool called_merge;
 /* Set to false prior to operations, then introspect after operations. */
 static bool did_alloc;
 static bool did_dalloc;
+static bool did_destroy;
 static bool did_commit;
 static bool did_decommit;
 static bool did_purge_lazy;
@@ -115,6 +121,24 @@ extent_dalloc_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	return err;
 }
 
+static void
+extent_destroy_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    bool committed, unsigned arena_ind) {
+	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, committed=%s, "
+	    "arena_ind=%u)\n", __func__, extent_hooks, addr, size, committed ?
+	    "true" : "false", arena_ind);
+	assert_ptr_eq(extent_hooks, &hooks,
+	    "extent_hooks should be same as pointer used to set hooks");
+	assert_ptr_eq(extent_hooks->destroy, extent_destroy_hook,
+	    "Wrong hook function");
+	called_destroy = true;
+	if (!try_destroy) {
+		return;
+	}
+	default_hooks->destroy(default_hooks, addr, size, committed, 0);
+	did_destroy = true;
+}
+
 static bool
 extent_commit_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index 5d6c1a77..d1698325 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -278,6 +278,7 @@ static extent_hooks_t hooks_orig;
 static extent_hooks_t hooks_unmap = {
 	extent_alloc_hook,
 	extent_dalloc_unmap, /* dalloc */
+	extent_destroy_hook,
 	extent_commit_hook,
 	extent_decommit_hook,
 	extent_purge_lazy_hook,
diff --git a/test/unit/base.c b/test/unit/base.c
index f498394e..5dc42f0a 100644
--- a/test/unit/base.c
+++ b/test/unit/base.c
@@ -5,6 +5,7 @@
 static extent_hooks_t hooks_null = {
 	extent_alloc_hook,
 	NULL, /* dalloc */
+	NULL, /* destroy */
 	NULL, /* commit */
 	NULL, /* decommit */
 	NULL, /* purge_lazy */
@@ -16,6 +17,7 @@ static extent_hooks_t hooks_null = {
 static extent_hooks_t hooks_not_null = {
 	extent_alloc_hook,
 	extent_dalloc_hook,
+	extent_destroy_hook,
 	NULL, /* commit */
 	extent_decommit_hook,
 	extent_purge_lazy_hook,
@@ -59,6 +61,7 @@ TEST_BEGIN(test_base_hooks_null) {
 
 	extent_hooks_prep();
 	try_dalloc = false;
+	try_destroy = true;
 	try_decommit = false;
 	try_purge_lazy = false;
 	try_purge_forced = false;
@@ -98,6 +101,7 @@ TEST_BEGIN(test_base_hooks_not_null) {
 
 	extent_hooks_prep();
 	try_dalloc = false;
+	try_destroy = true;
 	try_decommit = false;
 	try_purge_lazy = false;
 	try_purge_forced = false;
@@ -194,15 +198,17 @@ TEST_BEGIN(test_base_hooks_not_null) {
 		}
 	}
 
-	called_dalloc = called_decommit = called_purge_lazy =
+	called_dalloc = called_destroy = called_decommit = called_purge_lazy =
 	    called_purge_forced = false;
 	base_delete(base);
 	assert_true(called_dalloc, "Expected dalloc call");
+	assert_true(!called_destroy, "Unexpected destroy call");
 	assert_true(called_decommit, "Expected decommit call");
 	assert_true(called_purge_lazy, "Expected purge_lazy call");
 	assert_true(called_purge_forced, "Expected purge_forced call");
 
 	try_dalloc = true;
+	try_destroy = true;
 	try_decommit = true;
 	try_purge_lazy = true;
 	try_purge_forced = true;

From 209f2926b8e734317942231332f24b4bfd94587e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 26 Apr 2017 18:37:44 -0700
Subject: [PATCH 0850/2608] Header refactoring: tsd - cleanup and dependency
 breaking.

This removes the tsd macros (which are used only for tsd_t in real builds).  We
break up the circular dependencies involving tsd.

We also move all tsd access through getters and setters.  This allows us to
assert that we only touch data when tsd is in a valid state.

We simplify the usages of the x macro trick, removing all the customizability
(get/set, init, cleanup), moving the lifetime logic to tsd_init and tsd_cleanup.
This lets us make initialization order independent of order within tsd_t.
---
 include/jemalloc/internal/ckh.h               |   2 +-
 .../internal/jemalloc_internal_externs.h      |   2 +
 .../internal/jemalloc_internal_includes.h     |   8 +-
 include/jemalloc/internal/mutex_externs.h     |   2 +
 include/jemalloc/internal/mutex_inlines.h     |   1 +
 include/jemalloc/internal/mutex_prof.h        |   1 +
 include/jemalloc/internal/prof_inlines_b.h    |   2 +-
 include/jemalloc/internal/rtree_ctx.h         |  22 +
 include/jemalloc/internal/rtree_externs.h     |   2 -
 include/jemalloc/internal/rtree_structs.h     |  21 -
 include/jemalloc/internal/rtree_types.h       |   4 -
 include/jemalloc/internal/rtree_witness.h     |  19 +
 include/jemalloc/internal/stats.h             |   3 +
 include/jemalloc/internal/ticker.h            |   2 +
 include/jemalloc/internal/tsd.h               | 298 ++++++++++
 include/jemalloc/internal/tsd_externs.h       |  20 -
 include/jemalloc/internal/tsd_generic.h       | 160 ++++++
 include/jemalloc/internal/tsd_inlines.h       | 121 ----
 .../internal/tsd_malloc_thread_cleanup.h      |  60 ++
 include/jemalloc/internal/tsd_structs.h       | 114 ----
 include/jemalloc/internal/tsd_tls.h           |  59 ++
 include/jemalloc/internal/tsd_types.h         | 541 +-----------------
 include/jemalloc/internal/tsd_win.h           | 139 +++++
 src/jemalloc.c                                |   8 +-
 src/prof.c                                    |  10 +-
 src/rtree.c                                   |   7 -
 src/tcache.c                                  |   5 +-
 src/tsd.c                                     |  73 ++-
 test/unit/tsd.c                               |  77 ++-
 29 files changed, 870 insertions(+), 913 deletions(-)
 create mode 100644 include/jemalloc/internal/rtree_ctx.h
 create mode 100644 include/jemalloc/internal/rtree_witness.h
 create mode 100644 include/jemalloc/internal/tsd.h
 delete mode 100644 include/jemalloc/internal/tsd_externs.h
 create mode 100644 include/jemalloc/internal/tsd_generic.h
 delete mode 100644 include/jemalloc/internal/tsd_inlines.h
 create mode 100644 include/jemalloc/internal/tsd_malloc_thread_cleanup.h
 delete mode 100644 include/jemalloc/internal/tsd_structs.h
 create mode 100644 include/jemalloc/internal/tsd_tls.h
 create mode 100644 include/jemalloc/internal/tsd_win.h

diff --git a/include/jemalloc/internal/ckh.h b/include/jemalloc/internal/ckh.h
index 96922e04..7b3850bc 100644
--- a/include/jemalloc/internal/ckh.h
+++ b/include/jemalloc/internal/ckh.h
@@ -1,7 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_CKH_H
 #define JEMALLOC_INTERNAL_CKH_H
 
-#include "jemalloc/internal/tsd_types.h"
+#include "jemalloc/internal/tsd.h"
 
 /* Cuckoo hashing implementation.  Skip to the end for the interface. */
 
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 45c119f8..9a431fc1 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -2,6 +2,8 @@
 #define JEMALLOC_INTERNAL_EXTERNS_H
 
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/tsd_types.h"
 
 /* TSD checks this to set thread local slow state accordingly. */
 extern bool	malloc_slow;
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index 340cb1ce..84917a70 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -26,8 +26,8 @@
  * dependency information into the header files (i.e. we still rely on the
  * ordering in this file to ensure all a header's dependencies are available in
  * its translation unit).  Each component is now broken up into multiple header
- * files, corresponding to the sections above (e.g. instead of "tsd.h", we now
- * have "tsd_types.h", "tsd_structs.h", "tsd_externs.h", "tsd_inlines.h").
+ * files, corresponding to the sections above (e.g. instead of "foo.h", we now
+ * have "foo_types.h", "foo_structs.h", "foo_externs.h", "foo_inlines.h").
  *
  * Those files which have been converted to explicitly include their
  * inter-component dependencies are now in the initial HERMETIC HEADERS
@@ -42,7 +42,6 @@
 
 #include "jemalloc/internal/witness_types.h"
 #include "jemalloc/internal/mutex_types.h"
-#include "jemalloc/internal/tsd_types.h"
 #include "jemalloc/internal/extent_types.h"
 #include "jemalloc/internal/extent_dss_types.h"
 #include "jemalloc/internal/base_types.h"
@@ -65,7 +64,6 @@
 #include "jemalloc/internal/arena_structs_b.h"
 #include "jemalloc/internal/rtree_structs.h"
 #include "jemalloc/internal/tcache_structs.h"
-#include "jemalloc/internal/tsd_structs.h"
 
 /******************************************************************************/
 /* EXTERNS */
@@ -83,13 +81,11 @@
 #include "jemalloc/internal/large_externs.h"
 #include "jemalloc/internal/tcache_externs.h"
 #include "jemalloc/internal/prof_externs.h"
-#include "jemalloc/internal/tsd_externs.h"
 
 /******************************************************************************/
 /* INLINES */
 /******************************************************************************/
 
-#include "jemalloc/internal/tsd_inlines.h"
 #include "jemalloc/internal/witness_inlines.h"
 #include "jemalloc/internal/mutex_inlines.h"
 #include "jemalloc/internal/jemalloc_internal_inlines_a.h"
diff --git a/include/jemalloc/internal/mutex_externs.h b/include/jemalloc/internal/mutex_externs.h
index 5199d3cf..8e40cb34 100644
--- a/include/jemalloc/internal/mutex_externs.h
+++ b/include/jemalloc/internal/mutex_externs.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_MUTEX_EXTERNS_H
 #define JEMALLOC_INTERNAL_MUTEX_EXTERNS_H
 
+#include "jemalloc/internal/tsd_types.h"
+
 #ifdef JEMALLOC_LAZY_LOCK
 extern bool isthreaded;
 #else
diff --git a/include/jemalloc/internal/mutex_inlines.h b/include/jemalloc/internal/mutex_inlines.h
index 6da21cf6..babe8d3a 100644
--- a/include/jemalloc/internal/mutex_inlines.h
+++ b/include/jemalloc/internal/mutex_inlines.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_MUTEX_INLINES_H
 
 #include "jemalloc/internal/nstime.h"
+#include "jemalloc/internal/tsd_types.h"
 
 void	malloc_mutex_lock_slow(malloc_mutex_t *mutex);
 
diff --git a/include/jemalloc/internal/mutex_prof.h b/include/jemalloc/internal/mutex_prof.h
index 50c0af0a..f7301c88 100644
--- a/include/jemalloc/internal/mutex_prof.h
+++ b/include/jemalloc/internal/mutex_prof.h
@@ -3,6 +3,7 @@
 
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/nstime.h"
+#include "jemalloc/internal/tsd_types.h"
 
 #define MUTEX_PROF_GLOBAL_MUTEXES					\
     OP(ctl)								\
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 8cdea615..fba7b998 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -96,7 +96,7 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
 		}
 		return true;
 	} else {
-		if (tsd->reentrancy_level > 0) {
+		if (tsd_reentrancy_level_get(tsd) > 0) {
 			return true;
 		}
 		/* Compute new sample threshold. */
diff --git a/include/jemalloc/internal/rtree_ctx.h b/include/jemalloc/internal/rtree_ctx.h
new file mode 100644
index 00000000..fe2c8bde
--- /dev/null
+++ b/include/jemalloc/internal/rtree_ctx.h
@@ -0,0 +1,22 @@
+#ifndef JEMALLOC_INTERNAL_RTREE_CTX_H
+#define JEMALLOC_INTERNAL_RTREE_CTX_H
+
+#include "jemalloc/internal/rtree_types.h"
+
+typedef struct rtree_ctx_cache_elm_s rtree_ctx_cache_elm_t;
+struct rtree_ctx_cache_elm_s {
+	uintptr_t		leafkey;
+	rtree_leaf_elm_t	*leaf;
+};
+
+typedef struct rtree_ctx_s rtree_ctx_t;
+struct rtree_ctx_s {
+	/* Direct mapped cache. */
+	rtree_ctx_cache_elm_t	cache[RTREE_CTX_NCACHE];
+	/* L2 LRU cache. */
+	rtree_ctx_cache_elm_t	l2_cache[RTREE_CTX_NCACHE_L2];
+};
+
+void rtree_ctx_data_init(rtree_ctx_t *ctx);
+
+#endif /* JEMALLOC_INTERNAL_RTREE_CTX_H */
diff --git a/include/jemalloc/internal/rtree_externs.h b/include/jemalloc/internal/rtree_externs.h
index c8d1c376..5145c12c 100644
--- a/include/jemalloc/internal/rtree_externs.h
+++ b/include/jemalloc/internal/rtree_externs.h
@@ -43,7 +43,5 @@ void rtree_leaf_elm_witness_access(tsdn_t *tsdn, const rtree_t *rtree,
     const rtree_leaf_elm_t *elm);
 void rtree_leaf_elm_witness_release(tsdn_t *tsdn, const rtree_t *rtree,
     const rtree_leaf_elm_t *elm);
-void rtree_ctx_data_init(rtree_ctx_t *ctx);
-bool tsd_rtree_ctx_data_init(tsd_t *tsd);
 
 #endif /* JEMALLOC_INTERNAL_RTREE_EXTERNS_H */
diff --git a/include/jemalloc/internal/rtree_structs.h b/include/jemalloc/internal/rtree_structs.h
index 7ff92e61..4418934f 100644
--- a/include/jemalloc/internal/rtree_structs.h
+++ b/include/jemalloc/internal/rtree_structs.h
@@ -30,15 +30,6 @@ struct rtree_leaf_elm_s {
 #endif
 };
 
-struct rtree_leaf_elm_witness_s {
-	const rtree_leaf_elm_t	*elm;
-	witness_t		witness;
-};
-
-struct rtree_leaf_elm_witness_tsd_s {
-	rtree_leaf_elm_witness_t	witnesses[RTREE_ELM_ACQUIRE_MAX];
-};
-
 struct rtree_level_s {
 	/* Number of key bits distinguished by this level. */
 	unsigned		bits;
@@ -49,18 +40,6 @@ struct rtree_level_s {
 	unsigned		cumbits;
 };
 
-struct rtree_ctx_cache_elm_s {
-	uintptr_t		leafkey;
-	rtree_leaf_elm_t	*leaf;
-};
-
-struct rtree_ctx_s {
-	/* Direct mapped cache. */
-	rtree_ctx_cache_elm_t	cache[RTREE_CTX_NCACHE];
-	/* L2 LRU cache. */
-	rtree_ctx_cache_elm_t	l2_cache[RTREE_CTX_NCACHE_L2];
-};
-
 struct rtree_s {
 	malloc_mutex_t		init_lock;
 	/* Number of elements based on rtree_levels[0].bits. */
diff --git a/include/jemalloc/internal/rtree_types.h b/include/jemalloc/internal/rtree_types.h
index 402f741c..b465086d 100644
--- a/include/jemalloc/internal/rtree_types.h
+++ b/include/jemalloc/internal/rtree_types.h
@@ -12,11 +12,7 @@
 
 typedef struct rtree_node_elm_s rtree_node_elm_t;
 typedef struct rtree_leaf_elm_s rtree_leaf_elm_t;
-typedef struct rtree_leaf_elm_witness_s rtree_leaf_elm_witness_t;
-typedef struct rtree_leaf_elm_witness_tsd_s rtree_leaf_elm_witness_tsd_t;
 typedef struct rtree_level_s rtree_level_t;
-typedef struct rtree_ctx_cache_elm_s rtree_ctx_cache_elm_t;
-typedef struct rtree_ctx_s rtree_ctx_t;
 typedef struct rtree_s rtree_t;
 
 /* Number of high insignificant bits. */
diff --git a/include/jemalloc/internal/rtree_witness.h b/include/jemalloc/internal/rtree_witness.h
new file mode 100644
index 00000000..4a136203
--- /dev/null
+++ b/include/jemalloc/internal/rtree_witness.h
@@ -0,0 +1,19 @@
+#ifndef JEMALLOC_INTERNAL_RTREE_WITNESS_H
+#define JEMALLOC_INTERNAL_RTREE_WITNESS_H
+
+#include "jemalloc/internal/rtree_types.h"
+#include "jemalloc/internal/witness_types.h"
+#include "jemalloc/internal/witness_structs.h"
+
+typedef struct rtree_leaf_elm_witness_s rtree_leaf_elm_witness_t;
+struct rtree_leaf_elm_witness_s {
+	const rtree_leaf_elm_t	*elm;
+	witness_t		witness;
+};
+
+typedef struct rtree_leaf_elm_witness_tsd_s rtree_leaf_elm_witness_tsd_t;
+struct rtree_leaf_elm_witness_tsd_s {
+	rtree_leaf_elm_witness_t	witnesses[RTREE_ELM_ACQUIRE_MAX];
+};
+
+#endif /* JEMALLOC_INTERNAL_RTREE_WITNESS_H */
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index fd98422d..385a8514 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -2,6 +2,9 @@
 #define JEMALLOC_INTERNAL_STATS_H
 
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/mutex_prof.h"
+#include "jemalloc/internal/mutex_types.h"
+#include "jemalloc/internal/mutex_structs.h"
 #include "jemalloc/internal/size_classes.h"
 
 /* The opt.stats_print storage. */
diff --git a/include/jemalloc/internal/ticker.h b/include/jemalloc/internal/ticker.h
index faaac91d..572b9645 100644
--- a/include/jemalloc/internal/ticker.h
+++ b/include/jemalloc/internal/ticker.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_TICKER_H
 #define JEMALLOC_INTERNAL_TICKER_H
 
+#include "jemalloc/internal/util.h"
+
 /**
  * A ticker makes it easy to count-down events until some limit.  You
  * ticker_init the ticker to trigger every nticks events.  You then notify it
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
new file mode 100644
index 00000000..3d6576b4
--- /dev/null
+++ b/include/jemalloc/internal/tsd.h
@@ -0,0 +1,298 @@
+#ifndef JEMALLOC_INTERNAL_TSD_H
+#define JEMALLOC_INTERNAL_TSD_H
+
+#include "jemalloc/internal/arena_types.h"
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/jemalloc_internal_externs.h"
+#include "jemalloc/internal/prof_types.h"
+#include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/rtree_ctx.h"
+#include "jemalloc/internal/rtree_witness.h"
+#include "jemalloc/internal/tcache_types.h"
+#include "jemalloc/internal/tcache_structs.h"
+#include "jemalloc/internal/util.h"
+#include "jemalloc/internal/witness_types.h"
+#include "jemalloc/internal/witness_structs.h"
+
+/*
+ * Thread-Specific-Data layout
+ * --- data accessed on tcache fast path: state, rtree_ctx, stats, prof ---
+ * s: state
+ * e: tcache_enabled
+ * m: thread_allocated (config_stats)
+ * f: thread_deallocated (config_stats)
+ * p: prof_tdata (config_prof)
+ * c: rtree_ctx (rtree cache accessed on deallocation)
+ * t: tcache
+ * --- data not accessed on tcache fast path: arena-related fields ---
+ * d: arenas_tdata_bypass
+ * r: reentrancy_level
+ * x: narenas_tdata
+ * i: iarena
+ * a: arena
+ * o: arenas_tdata
+ * Loading TSD data is on the critical path of basically all malloc operations.
+ * In particular, tcache and rtree_ctx rely on hot CPU cache to be effective.
+ * Use a compact layout to reduce cache footprint.
+ * +--- 64-bit and 64B cacheline; 1B each letter; First byte on the left. ---+
+ * |----------------------------  1st cacheline  ----------------------------|
+ * | sedrxxxx mmmmmmmm ffffffff pppppppp [c * 32  ........ ........ .......] |
+ * |----------------------------  2nd cacheline  ----------------------------|
+ * | [c * 64  ........ ........ ........ ........ ........ ........ .......] |
+ * |----------------------------  3nd cacheline  ----------------------------|
+ * | [c * 32  ........ ........ .......] iiiiiiii aaaaaaaa oooooooo [t...... |
+ * +-------------------------------------------------------------------------+
+ * Note: the entire tcache is embedded into TSD and spans multiple cachelines.
+ *
+ * The last 3 members (i, a and o) before tcache isn't really needed on tcache
+ * fast path.  However we have a number of unused tcache bins and witnesses
+ * (never touched unless config_debug) at the end of tcache, so we place them
+ * there to avoid breaking the cachelines and possibly paging in an extra page.
+ */
+#ifdef JEMALLOC_JET
+typedef void (*test_callback_t)(int *);
+#  define MALLOC_TSD_TEST_DATA_INIT 0x72b65c10
+#  define MALLOC_TEST_TSD \
+    O(test_data,		int)					\
+    O(test_callback,		test_callback_t)
+#  define MALLOC_TEST_TSD_INITIALIZER , MALLOC_TSD_TEST_DATA_INIT, NULL
+#else
+#  define MALLOC_TEST_TSD
+#  define MALLOC_TEST_TSD_INITIALIZER
+#endif
+
+#define MALLOC_TSD							\
+/*  O(name,			type) */ 				\
+    O(tcache_enabled,		bool)					\
+    O(arenas_tdata_bypass,	bool)					\
+    O(reentrancy_level,		int8_t)					\
+    O(narenas_tdata,		uint32_t)				\
+    O(thread_allocated,		uint64_t)				\
+    O(thread_deallocated,	uint64_t)				\
+    O(prof_tdata,		prof_tdata_t *)				\
+    O(rtree_ctx,		rtree_ctx_t)				\
+    O(iarena,			arena_t *)				\
+    O(arena,			arena_t *)				\
+    O(arenas_tdata,		arena_tdata_t *)			\
+    O(tcache,			tcache_t)				\
+    O(witnesses,		witness_list_t)				\
+    O(rtree_leaf_elm_witnesses,	rtree_leaf_elm_witness_tsd_t)		\
+    O(witness_fork,		bool)					\
+    MALLOC_TEST_TSD
+
+#define TSD_INITIALIZER {						\
+    tsd_state_uninitialized,						\
+    TCACHE_ENABLED_ZERO_INITIALIZER,					\
+    false,								\
+    0,									\
+    0,									\
+    0,									\
+    0,									\
+    NULL,								\
+    RTREE_CTX_ZERO_INITIALIZER,						\
+    NULL,								\
+    NULL,								\
+    NULL,								\
+    TCACHE_ZERO_INITIALIZER,						\
+    ql_head_initializer(witnesses),					\
+    RTREE_ELM_WITNESS_TSD_INITIALIZER,					\
+    false								\
+    MALLOC_TEST_TSD_INITIALIZER						\
+}
+
+enum {
+	tsd_state_nominal = 0, /* Common case --> jnz. */
+	tsd_state_nominal_slow = 1, /* Initialized but on slow path. */
+	/* the above 2 nominal states should be lower values. */
+	tsd_state_nominal_max = 1, /* used for comparison only. */
+	tsd_state_purgatory = 2,
+	tsd_state_reincarnated = 3,
+	tsd_state_uninitialized = 4
+};
+
+/* Manually limit tsd_state_t to a single byte. */
+typedef uint8_t tsd_state_t;
+
+/* The actual tsd. */
+typedef struct tsd_s tsd_t;
+struct tsd_s {
+	/*
+	 * The contents should be treated as totally opaque outside the tsd
+	 * module.  Access any thread-local state through the getters and
+	 * setters below.
+	 */
+	tsd_state_t	state;
+#define O(n, t)								\
+	t use_a_getter_or_setter_instead_##n;
+MALLOC_TSD
+#undef O
+};
+
+/*
+ * Wrapper around tsd_t that makes it possible to avoid implicit conversion
+ * between tsd_t and tsdn_t, where tsdn_t is "nullable" and has to be
+ * explicitly converted to tsd_t, which is non-nullable.
+ */
+typedef struct tsdn_s tsdn_t;
+struct tsdn_s {
+	tsd_t tsd;
+};
+#define TSDN_NULL ((tsdn_t *)0)
+
+void *malloc_tsd_malloc(size_t size);
+void malloc_tsd_dalloc(void *wrapper);
+void malloc_tsd_cleanup_register(bool (*f)(void));
+tsd_t *malloc_tsd_boot0(void);
+void malloc_tsd_boot1(void);
+bool tsd_data_init(void *arg);
+void tsd_cleanup(void *arg);
+tsd_t *tsd_fetch_slow(tsd_t *tsd);
+void tsd_slow_update(tsd_t *tsd);
+
+/*
+ * We put the platform-specific data declarations and inlines into their own
+ * header files to avoid cluttering this file.  They define tsd_boot0,
+ * tsd_boot1, tsd_boot, tsd_booted_get, tsd_get_allocates, tsd_get, and tsd_set.
+ */
+#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
+#include "jemalloc/internal/tsd_malloc_thread_cleanup.h"
+#elif (defined(JEMALLOC_TLS))
+#include "jemalloc/internal/tsd_tls.h"
+#elif (defined(_WIN32))
+#include "jemalloc/internal/tsd_win.h"
+#else
+#include "jemalloc/internal/tsd_generic.h"
+#endif
+
+/*
+ * tsd_foop_get_unsafe(tsd) returns a pointer to the thread-local instance of
+ * foo.  This omits some safety checks, and so can be used during tsd
+ * initialization and cleanup.
+ */
+#define O(n, t)								\
+JEMALLOC_ALWAYS_INLINE t *						\
+tsd_##n##p_get_unsafe(tsd_t *tsd) {					\
+	return &tsd->use_a_getter_or_setter_instead_##n;		\
+}
+MALLOC_TSD
+#undef O
+
+/* tsd_foop_get(tsd) returns a pointer to the thread-local instance of foo. */
+#define O(n, t)								\
+JEMALLOC_ALWAYS_INLINE t *						\
+tsd_##n##p_get(tsd_t *tsd) {						\
+	assert(tsd->state == tsd_state_nominal ||			\
+	    tsd->state == tsd_state_nominal_slow ||			\
+	    tsd->state == tsd_state_reincarnated);			\
+	return tsd_##n##p_get_unsafe(tsd);				\
+}
+MALLOC_TSD
+#undef O
+
+/* tsd_foo_get(tsd) returns the value of the thread-local instance of foo. */
+#define O(n, t)								\
+JEMALLOC_ALWAYS_INLINE t						\
+tsd_##n##_get(tsd_t *tsd) {						\
+	return *tsd_##n##p_get(tsd);					\
+}
+MALLOC_TSD
+#undef O
+
+/* tsd_foo_set(tsd, val) updates the thread-local instance of foo to be val. */
+#define O(n, t)								\
+JEMALLOC_ALWAYS_INLINE void						\
+tsd_##n##_set(tsd_t *tsd, t val) {					\
+	*tsd_##n##p_get(tsd) = val;					\
+}
+MALLOC_TSD
+#undef O
+
+JEMALLOC_ALWAYS_INLINE void
+tsd_assert_fast(tsd_t *tsd) {
+	assert(!malloc_slow && tsd_tcache_enabled_get(tsd) &&
+	    tsd_reentrancy_level_get(tsd) == 0);
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_fast(tsd_t *tsd) {
+	bool fast = (tsd->state == tsd_state_nominal);
+	if (fast) {
+		tsd_assert_fast(tsd);
+	}
+
+	return fast;
+}
+
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsd_fetch_impl(bool init) {
+	tsd_t *tsd = tsd_get(init);
+
+	if (!init && tsd_get_allocates() && tsd == NULL) {
+		return NULL;
+	}
+	assert(tsd != NULL);
+
+	if (unlikely(tsd->state != tsd_state_nominal)) {
+		return tsd_fetch_slow(tsd);
+	}
+	assert(tsd_fast(tsd));
+	tsd_assert_fast(tsd);
+
+	return tsd;
+}
+
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsd_fetch(void) {
+	return tsd_fetch_impl(true);
+}
+
+JEMALLOC_ALWAYS_INLINE tsdn_t *
+tsd_tsdn(tsd_t *tsd) {
+	return (tsdn_t *)tsd;
+}
+
+static inline bool
+tsd_nominal(tsd_t *tsd) {
+	return (tsd->state <= tsd_state_nominal_max);
+}
+
+JEMALLOC_ALWAYS_INLINE tsdn_t *
+tsdn_fetch(void) {
+	if (!tsd_booted_get()) {
+		return NULL;
+	}
+
+	return tsd_tsdn(tsd_fetch_impl(false));
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsdn_null(const tsdn_t *tsdn) {
+	return tsdn == NULL;
+}
+
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsdn_tsd(tsdn_t *tsdn) {
+	assert(!tsdn_null(tsdn));
+
+	return &tsdn->tsd;
+}
+
+JEMALLOC_ALWAYS_INLINE rtree_ctx_t *
+tsd_rtree_ctx(tsd_t *tsd) {
+	return tsd_rtree_ctxp_get(tsd);
+}
+
+JEMALLOC_ALWAYS_INLINE rtree_ctx_t *
+tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback) {
+	/*
+	 * If tsd cannot be accessed, initialize the fallback rtree_ctx and
+	 * return a pointer to it.
+	 */
+	if (unlikely(tsdn_null(tsdn))) {
+		rtree_ctx_data_init(fallback);
+		return fallback;
+	}
+	return tsd_rtree_ctx(tsdn_tsd(tsdn));
+}
+
+#endif /* JEMALLOC_INTERNAL_TSD_H */
diff --git a/include/jemalloc/internal/tsd_externs.h b/include/jemalloc/internal/tsd_externs.h
deleted file mode 100644
index 6b9dfdc6..00000000
--- a/include/jemalloc/internal/tsd_externs.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_TSD_EXTERNS_H
-#define JEMALLOC_INTERNAL_TSD_EXTERNS_H
-
-void	*malloc_tsd_malloc(size_t size);
-void	malloc_tsd_dalloc(void *wrapper);
-void	malloc_tsd_cleanup_register(bool (*f)(void));
-tsd_t	*malloc_tsd_boot0(void);
-void	malloc_tsd_boot1(void);
-#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \
-    !defined(_WIN32))
-void	*tsd_init_check_recursion(tsd_init_head_t *head,
-    tsd_init_block_t *block);
-void	tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block);
-#endif
-bool tsd_data_init(void *arg);
-void tsd_cleanup(void *arg);
-tsd_t *tsd_fetch_slow(tsd_t *tsd);
-void tsd_slow_update(tsd_t *tsd);
-
-#endif /* JEMALLOC_INTERNAL_TSD_EXTERNS_H */
diff --git a/include/jemalloc/internal/tsd_generic.h b/include/jemalloc/internal/tsd_generic.h
new file mode 100644
index 00000000..d59cb743
--- /dev/null
+++ b/include/jemalloc/internal/tsd_generic.h
@@ -0,0 +1,160 @@
+#ifdef JEMALLOC_INTERNAL_TSD_GENERIC_H
+#error This file should be included only once, by tsd.h.
+#endif
+#define JEMALLOC_INTERNAL_TSD_GENERIC_H
+
+typedef struct tsd_init_block_s tsd_init_block_t;
+struct tsd_init_block_s {
+	ql_elm(tsd_init_block_t) link;
+	pthread_t thread;
+	void *data;
+};
+
+typedef struct tsd_init_head_s tsd_init_head_t;
+struct tsd_init_head_s {
+	ql_head(tsd_init_block_t) blocks;
+	malloc_mutex_t lock;
+};
+
+typedef struct {
+	bool initialized;
+	tsd_t val;
+} tsd_wrapper_t;
+
+void *tsd_init_check_recursion(tsd_init_head_t *head,
+    tsd_init_block_t *block);
+void tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block);
+
+extern pthread_key_t tsd_tsd;
+extern tsd_init_head_t tsd_init_head;
+extern tsd_wrapper_t tsd_boot_wrapper;
+extern bool tsd_booted;
+
+/* Initialization/cleanup. */
+JEMALLOC_ALWAYS_INLINE void
+tsd_cleanup_wrapper(void *arg) {
+	tsd_wrapper_t *wrapper = (tsd_wrapper_t *)arg;
+
+	if (wrapper->initialized) {
+		wrapper->initialized = false;
+		tsd_cleanup(&wrapper->val);
+		if (wrapper->initialized) {
+			/* Trigger another cleanup round. */
+			if (pthread_setspecific(tsd_tsd, (void *)wrapper) != 0)
+			{
+				malloc_write("<jemalloc>: Error setting TSD\n");
+				if (opt_abort) {
+					abort();
+				}
+			}
+			return;
+		}
+	}
+	malloc_tsd_dalloc(wrapper);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tsd_wrapper_set(tsd_wrapper_t *wrapper) {
+	if (pthread_setspecific(tsd_tsd, (void *)wrapper) != 0) {
+		malloc_write("<jemalloc>: Error setting TSD\n");
+		abort();
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE tsd_wrapper_t *
+tsd_wrapper_get(bool init) {
+	tsd_wrapper_t *wrapper = (tsd_wrapper_t *)pthread_getspecific(tsd_tsd);
+
+	if (init && unlikely(wrapper == NULL)) {
+		tsd_init_block_t block;
+		wrapper = (tsd_wrapper_t *)
+		    tsd_init_check_recursion(&tsd_init_head, &block);
+		if (wrapper) {
+			return wrapper;
+		}
+		wrapper = (tsd_wrapper_t *)
+		    malloc_tsd_malloc(sizeof(tsd_wrapper_t));
+		block.data = (void *)wrapper;
+		if (wrapper == NULL) {
+			malloc_write("<jemalloc>: Error allocating TSD\n");
+			abort();
+		} else {
+			wrapper->initialized = false;
+			tsd_t initializer = TSD_INITIALIZER;
+			wrapper->val = initializer;
+		}
+		tsd_wrapper_set(wrapper);
+		tsd_init_finish(&tsd_init_head, &block);
+	}
+	return wrapper;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_boot0(void) {
+	if (pthread_key_create(&tsd_tsd, tsd_cleanup_wrapper) != 0) {
+		return true;
+	}
+	tsd_wrapper_set(&tsd_boot_wrapper);
+	tsd_booted = true;
+	return false;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tsd_boot1(void) {
+	tsd_wrapper_t *wrapper;
+	wrapper = (tsd_wrapper_t *)malloc_tsd_malloc(sizeof(tsd_wrapper_t));
+	if (wrapper == NULL) {
+		malloc_write("<jemalloc>: Error allocating TSD\n");
+		abort();
+	}
+	tsd_boot_wrapper.initialized = false;
+	tsd_cleanup(&tsd_boot_wrapper.val);
+	wrapper->initialized = false;
+	tsd_t initializer = TSD_INITIALIZER;
+	wrapper->val = initializer;
+	tsd_wrapper_set(wrapper);
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_boot(void) {
+	if (tsd_boot0()) {
+		return true;
+	}
+	tsd_boot1();
+	return false;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_booted_get(void) {
+	return tsd_booted;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_get_allocates(void) {
+	return true;
+}
+
+/* Get/set. */
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsd_get(bool init) {
+	tsd_wrapper_t *wrapper;
+
+	assert(tsd_booted);
+	wrapper = tsd_wrapper_get(init);
+	if (tsd_get_allocates() && !init && wrapper == NULL) {
+		return NULL;
+	}
+	return &wrapper->val;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tsd_set(tsd_t *val) {
+	tsd_wrapper_t *wrapper;
+
+	assert(tsd_booted);
+	wrapper = tsd_wrapper_get(true);
+	if (likely(&wrapper->val != val)) {
+		wrapper->val = *(val);
+	}
+	wrapper->initialized = true;
+}
diff --git a/include/jemalloc/internal/tsd_inlines.h b/include/jemalloc/internal/tsd_inlines.h
deleted file mode 100644
index f0f77e48..00000000
--- a/include/jemalloc/internal/tsd_inlines.h
+++ /dev/null
@@ -1,121 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_TSD_INLINES_H
-#define JEMALLOC_INTERNAL_TSD_INLINES_H
-
-malloc_tsd_externs(, tsd_t)
-malloc_tsd_funcs(JEMALLOC_ALWAYS_INLINE, , tsd_t, tsd_initializer, tsd_cleanup)
-
-#define MALLOC_TSD_getset_yes(n, t)					\
-JEMALLOC_ALWAYS_INLINE t						\
-tsd_##n##_get(tsd_t *tsd) {						\
-	return *tsd_##n##p_get(tsd);					\
-}									\
-JEMALLOC_ALWAYS_INLINE void						\
-tsd_##n##_set(tsd_t *tsd, t n) {					\
-	assert(tsd->state == tsd_state_nominal ||			\
-	    tsd->state == tsd_state_nominal_slow ||			\
-	    tsd->state == tsd_state_reincarnated);			\
-	tsd->n = n;							\
-}
-#define MALLOC_TSD_getset_no(n, t)
-#define O(n, t, gs, i, c)						\
-JEMALLOC_ALWAYS_INLINE t *						\
-tsd_##n##p_get(tsd_t *tsd) {						\
-	return &tsd->n;							\
-}									\
-									\
-MALLOC_TSD_getset_##gs(n, t)
-MALLOC_TSD
-#undef MALLOC_TSD_getset_yes
-#undef MALLOC_TSD_getset_no
-#undef O
-
-JEMALLOC_ALWAYS_INLINE bool
-tsd_assert_fast(tsd_t *tsd) {
-	assert(!malloc_slow && tsd_tcache_enabled_get(tsd) &&
-	    tsd_reentrancy_level_get(tsd) == 0);
-	return true;
-}
-
-JEMALLOC_ALWAYS_INLINE bool
-tsd_fast(tsd_t *tsd) {
-	bool fast = (tsd->state == tsd_state_nominal);
-	if (fast) {
-		tsd_assert_fast(tsd);
-	}
-
-	return fast;
-}
-
-JEMALLOC_ALWAYS_INLINE tsd_t *
-tsd_fetch_impl(bool init) {
-	tsd_t *tsd = tsd_get(init);
-
-	if (!init && tsd_get_allocates() && tsd == NULL) {
-		return NULL;
-	}
-	assert(tsd != NULL);
-
-	if (unlikely(tsd->state != tsd_state_nominal)) {
-		return tsd_fetch_slow(tsd);
-	}
-	assert(tsd_fast(tsd));
-	tsd_assert_fast(tsd);
-
-	return tsd;
-}
-
-JEMALLOC_ALWAYS_INLINE tsd_t *
-tsd_fetch(void) {
-	return tsd_fetch_impl(true);
-}
-
-JEMALLOC_ALWAYS_INLINE tsdn_t *
-tsd_tsdn(tsd_t *tsd) {
-	return (tsdn_t *)tsd;
-}
-
-static inline bool
-tsd_nominal(tsd_t *tsd) {
-	return (tsd->state <= tsd_state_nominal_max);
-}
-
-JEMALLOC_ALWAYS_INLINE tsdn_t *
-tsdn_fetch(void) {
-	if (!tsd_booted_get()) {
-		return NULL;
-	}
-
-	return tsd_tsdn(tsd_fetch_impl(false));
-}
-
-JEMALLOC_ALWAYS_INLINE bool
-tsdn_null(const tsdn_t *tsdn) {
-	return tsdn == NULL;
-}
-
-JEMALLOC_ALWAYS_INLINE tsd_t *
-tsdn_tsd(tsdn_t *tsdn) {
-	assert(!tsdn_null(tsdn));
-
-	return &tsdn->tsd;
-}
-
-JEMALLOC_ALWAYS_INLINE rtree_ctx_t *
-tsd_rtree_ctx(tsd_t *tsd) {
-	return tsd_rtree_ctxp_get(tsd);
-}
-
-JEMALLOC_ALWAYS_INLINE rtree_ctx_t *
-tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback) {
-	/*
-	 * If tsd cannot be accessed, initialize the fallback rtree_ctx and
-	 * return a pointer to it.
-	 */
-	if (unlikely(tsdn_null(tsdn))) {
-		rtree_ctx_data_init(fallback);
-		return fallback;
-	}
-	return tsd_rtree_ctx(tsdn_tsd(tsdn));
-}
-
-#endif /* JEMALLOC_INTERNAL_TSD_INLINES_H */
diff --git a/include/jemalloc/internal/tsd_malloc_thread_cleanup.h b/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
new file mode 100644
index 00000000..beb467a6
--- /dev/null
+++ b/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
@@ -0,0 +1,60 @@
+#ifdef JEMALLOC_INTERNAL_TSD_MALLOC_THREAD_CLEANUP_H
+#error This file should be included only once, by tsd.h.
+#endif
+#define JEMALLOC_INTERNAL_TSD_MALLOC_THREAD_CLEANUP_H
+
+extern __thread tsd_t tsd_tls;
+extern __thread bool tsd_initialized;
+extern bool tsd_booted;
+
+/* Initialization/cleanup. */
+JEMALLOC_ALWAYS_INLINE bool
+tsd_cleanup_wrapper(void) {
+	if (tsd_initialized) {
+		tsd_initialized = false;
+		tsd_cleanup(&tsd_tls);
+	}
+	return tsd_initialized;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_boot0(void) {
+	malloc_tsd_cleanup_register(&tsd_cleanup_wrapper);
+	tsd_booted = true;
+	return false;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tsd_boot1(void) {
+	/* Do nothing. */
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_boot(void) {
+	return tsd_boot0();
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_booted_get(void) {
+	return tsd_booted;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_get_allocates(void) {
+	return false;
+}
+
+/* Get/set. */
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsd_get(bool init) {
+	assert(tsd_booted);
+	return &tsd_tls;
+}
+JEMALLOC_ALWAYS_INLINE void
+tsd_set(tsd_t *val) {
+	assert(tsd_booted);
+	if (likely(&tsd_tls != val)) {
+		tsd_tls = (*val);
+	}
+	tsd_initialized = true;
+}
diff --git a/include/jemalloc/internal/tsd_structs.h b/include/jemalloc/internal/tsd_structs.h
deleted file mode 100644
index 40fea97b..00000000
--- a/include/jemalloc/internal/tsd_structs.h
+++ /dev/null
@@ -1,114 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_TSD_STRUCTS_H
-#define JEMALLOC_INTERNAL_TSD_STRUCTS_H
-
-#include "jemalloc/internal/ql.h"
-
-#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \
-    !defined(_WIN32))
-struct tsd_init_block_s {
-	ql_elm(tsd_init_block_t)	link;
-	pthread_t			thread;
-	void				*data;
-};
-struct tsd_init_head_s {
-	ql_head(tsd_init_block_t)	blocks;
-	malloc_mutex_t			lock;
-};
-#endif
-
-/*
- * Thread-Specific-Data layout
- * --- data accessed on tcache fast path: state, rtree_ctx, stats, prof ---
- * s: state
- * e: tcache_enabled
- * m: thread_allocated (config_stats)
- * f: thread_deallocated (config_stats)
- * p: prof_tdata (config_prof)
- * c: rtree_ctx (rtree cache accessed on deallocation)
- * t: tcache
- * --- data not accessed on tcache fast path: arena related fields ---
- * d: arenas_tdata_bypass
- * r: reentrancy_level
- * x: narenas_tdata
- * i: iarena
- * a: arena
- * o: arenas_tdata
- * Loading TSD data is on the critical path of basically all malloc operations.
- * In particular, tcache and rtree_ctx rely on hot CPU cache to be effective.
- * Use a compact layout to reduce cache footprint.
- * +--- 64-bit and 64B cacheline; 1B each letter; First byte on the left. ---+
- * |----------------------------  1st cacheline  ----------------------------|
- * | sedrxxxx mmmmmmmm ffffffff pppppppp [c * 32  ........ ........ .......] |
- * |----------------------------  2nd cacheline  ----------------------------|
- * | [c * 64  ........ ........ ........ ........ ........ ........ .......] |
- * |----------------------------  3nd cacheline  ----------------------------|
- * | [c * 32  ........ ........ .......] iiiiiiii aaaaaaaa oooooooo [t...... |
- * +-------------------------------------------------------------------------+
- * Note: the entire tcache is embedded into TSD and spans multiple cachelines.
- *
- * The last 3 members (i, a and o) before tcache isn't really needed on tcache
- * fast path.  However we have a number of unused tcache bins and witnesses
- * (never touched unless config_debug) at the end of tcache, so we place them
- * there to avoid breaking the cachelines and possibly paging in an extra page.
- */
-#define MALLOC_TSD							\
-/*  O(name,			type,		[gs]et,	init,	cleanup) */ \
-    O(tcache_enabled,		bool,		yes,	yes,	no)	\
-    O(arenas_tdata_bypass,	bool,		no,	no,	no)	\
-    O(reentrancy_level,		int8_t,		yes,	no,	no)	\
-    O(narenas_tdata,		uint32_t,	yes,	no,	no)	\
-    O(thread_allocated,		uint64_t,	yes,	no,	no)	\
-    O(thread_deallocated,	uint64_t,	yes,	no,	no)	\
-    O(prof_tdata,		prof_tdata_t *,	yes,	no,	yes)	\
-    O(rtree_ctx,		rtree_ctx_t,	no,	yes,	no)	\
-    O(iarena,			arena_t *,	yes,	no,	yes)	\
-    O(arena,			arena_t *,	yes,	no,	yes)	\
-    O(arenas_tdata,		arena_tdata_t *,yes,	no,	yes)	\
-    O(tcache,			tcache_t,	no,	no,	yes)	\
-    O(witnesses,		witness_list_t,	no,	no,	yes)	\
-    O(rtree_leaf_elm_witnesses,	rtree_leaf_elm_witness_tsd_t,		\
-						no,	no,	no)	\
-    O(witness_fork,		bool,		yes,	no,	no)
-
-#define TSD_INITIALIZER {						\
-    tsd_state_uninitialized,						\
-    TCACHE_ENABLED_ZERO_INITIALIZER,					\
-    false,								\
-    0,									\
-    0,									\
-    0,									\
-    0,									\
-    NULL,								\
-    RTREE_CTX_ZERO_INITIALIZER,						\
-    NULL,								\
-    NULL,								\
-    NULL,								\
-    TCACHE_ZERO_INITIALIZER,						\
-    ql_head_initializer(witnesses),					\
-    RTREE_ELM_WITNESS_TSD_INITIALIZER,					\
-    false								\
-}
-
-struct tsd_s {
-	tsd_state_t	state;
-#define O(n, t, gs, i, c)						\
-	t		n;
-MALLOC_TSD
-#undef O
-};
-
-/*
- * Wrapper around tsd_t that makes it possible to avoid implicit conversion
- * between tsd_t and tsdn_t, where tsdn_t is "nullable" and has to be
- * explicitly converted to tsd_t, which is non-nullable.
- */
-struct tsdn_s {
-	tsd_t	tsd;
-};
-
-static const tsd_t tsd_initializer = TSD_INITIALIZER;
-UNUSED static const void *malloc_tsd_no_cleanup = (void *)0;
-
-malloc_tsd_types(, tsd_t)
-
-#endif /* JEMALLOC_INTERNAL_TSD_STRUCTS_H */
diff --git a/include/jemalloc/internal/tsd_tls.h b/include/jemalloc/internal/tsd_tls.h
new file mode 100644
index 00000000..757aaa0e
--- /dev/null
+++ b/include/jemalloc/internal/tsd_tls.h
@@ -0,0 +1,59 @@
+#ifdef JEMALLOC_INTERNAL_TSD_TLS_H
+#error This file should be included only once, by tsd.h.
+#endif
+#define JEMALLOC_INTERNAL_TSD_TLS_H
+
+extern __thread tsd_t tsd_tls;
+extern pthread_key_t tsd_tsd;
+extern bool tsd_booted;
+
+/* Initialization/cleanup. */
+JEMALLOC_ALWAYS_INLINE bool
+tsd_boot0(void) {
+	if (pthread_key_create(&tsd_tsd, &tsd_cleanup) != 0) {
+		return true;
+	}
+	tsd_booted = true;
+	return false;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tsd_boot1(void) {
+	/* Do nothing. */
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_boot(void) {
+	return tsd_boot0();
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_booted_get(void) {
+	return tsd_booted;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_get_allocates(void) {
+	return false;
+}
+
+/* Get/set. */
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsd_get(bool init) {
+	assert(tsd_booted);
+	return &tsd_tls;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tsd_set(tsd_t *val) {
+	assert(tsd_booted);
+	if (likely(&tsd_tls != val)) {
+		tsd_tls = (*val);
+	}
+	if (pthread_setspecific(tsd_tsd, (void *)(&tsd_tls)) != 0) {
+		malloc_write("<jemalloc>: Error setting tsd.\n");
+		if (opt_abort) {
+			abort();
+		}
+	}
+}
diff --git a/include/jemalloc/internal/tsd_types.h b/include/jemalloc/internal/tsd_types.h
index dc9efbb6..6200af61 100644
--- a/include/jemalloc/internal/tsd_types.h
+++ b/include/jemalloc/internal/tsd_types.h
@@ -1,549 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_TSD_TYPES_H
 #define JEMALLOC_INTERNAL_TSD_TYPES_H
 
-#include "jemalloc/internal/ql.h"
-
-/* Maximum number of malloc_tsd users with cleanup functions. */
 #define MALLOC_TSD_CLEANUPS_MAX	2
 
-typedef bool (*malloc_tsd_cleanup_t)(void);
-
-#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \
-    !defined(_WIN32))
-typedef struct tsd_init_block_s tsd_init_block_t;
-typedef struct tsd_init_head_s tsd_init_head_t;
-#endif
-
 typedef struct tsd_s tsd_t;
 typedef struct tsdn_s tsdn_t;
-
-#define TSDN_NULL	((tsdn_t *)0)
-
-enum {
-	tsd_state_nominal = 0, /* Common case --> jnz. */
-	tsd_state_nominal_slow = 1, /* Initialized but on slow path. */
-	/* the above 2 nominal states should be lower values. */
-	tsd_state_nominal_max = 1, /* used for comparison only. */
-	tsd_state_purgatory = 2,
-	tsd_state_reincarnated = 3,
-	tsd_state_uninitialized = 4
-};
-
-/* Manually limit tsd_state_t to a single byte. */
-typedef uint8_t tsd_state_t;
-
-/*
- * TLS/TSD-agnostic macro-based implementation of thread-specific data.  There
- * are five macros that support (at least) three use cases: file-private,
- * library-private, and library-private inlined.  Following is an example
- * library-private tsd variable:
- *
- * In example.h:
- *   typedef struct {
- *           int x;
- *           int y;
- *   } example_t;
- *   #define EX_INITIALIZER JEMALLOC_CONCAT({0, 0})
- *   malloc_tsd_types(example_, example_t)
- *   malloc_tsd_protos(, example_, example_t)
- *   malloc_tsd_externs(example_, example_t)
- * In example.c:
- *   malloc_tsd_data(, example_, example_t, EX_INITIALIZER)
- *   malloc_tsd_funcs(, example_, example_t, EX_INITIALIZER,
- *       example_tsd_cleanup)
- *
- * The result is a set of generated functions, e.g.:
- *
- *   bool example_tsd_boot(void) {...}
- *   bool example_tsd_booted_get(void) {...}
- *   example_t *example_tsd_get(bool init) {...}
- *   void example_tsd_set(example_t *val) {...}
- *
- * Note that all of the functions deal in terms of (a_type *) rather than
- * (a_type) so that it is possible to support non-pointer types (unlike
- * pthreads TSD).  example_tsd_cleanup() is passed an (a_type *) pointer that is
- * cast to (void *).  This means that the cleanup function needs to cast the
- * function argument to (a_type *), then dereference the resulting pointer to
- * access fields, e.g.
- *
- *   void
- *   example_tsd_cleanup(void *arg)
- *   {
- *           example_t *example = (example_t *)arg;
- *
- *           example->x = 42;
- *           [...]
- *           if ([want the cleanup function to be called again])
- *                   example_tsd_set(example);
- *   }
- *
- * If example_tsd_set() is called within example_tsd_cleanup(), it will be
- * called again.  This is similar to how pthreads TSD destruction works, except
- * that pthreads only calls the cleanup function again if the value was set to
- * non-NULL.
- */
-
-/* malloc_tsd_types(). */
-#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
-#define malloc_tsd_types(a_name, a_type)
-#elif (defined(JEMALLOC_TLS))
-#define malloc_tsd_types(a_name, a_type)
-#elif (defined(_WIN32))
-#define malloc_tsd_types(a_name, a_type)				\
-typedef struct {							\
-	bool	initialized;						\
-	a_type	val;							\
-} a_name##tsd_wrapper_t;
-#else
-#define malloc_tsd_types(a_name, a_type)				\
-typedef struct {							\
-	bool	initialized;						\
-	a_type	val;							\
-} a_name##tsd_wrapper_t;
-#endif
-
-/* malloc_tsd_protos(). */
-#define malloc_tsd_protos(a_attr, a_name, a_type)			\
-a_attr bool								\
-a_name##tsd_boot0(void);						\
-a_attr void								\
-a_name##tsd_boot1(void);						\
-a_attr bool								\
-a_name##tsd_boot(void);							\
-a_attr bool								\
-a_name##tsd_booted_get(void);						\
-a_attr a_type *								\
-a_name##tsd_get(bool init);						\
-a_attr void								\
-a_name##tsd_set(a_type *val);
-
-/* malloc_tsd_externs(). */
-#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
-#define malloc_tsd_externs(a_name, a_type)				\
-extern __thread a_type	a_name##tsd_tls;				\
-extern __thread bool	a_name##tsd_initialized;			\
-extern bool		a_name##tsd_booted;
-#elif (defined(JEMALLOC_TLS))
-#define malloc_tsd_externs(a_name, a_type)				\
-extern __thread a_type	a_name##tsd_tls;				\
-extern pthread_key_t	a_name##tsd_tsd;				\
-extern bool		a_name##tsd_booted;
-#elif (defined(_WIN32))
-#define malloc_tsd_externs(a_name, a_type)				\
-extern DWORD		a_name##tsd_tsd;				\
-extern a_name##tsd_wrapper_t	a_name##tsd_boot_wrapper;		\
-extern bool		a_name##tsd_booted;
-#else
-#define malloc_tsd_externs(a_name, a_type)				\
-extern pthread_key_t	a_name##tsd_tsd;				\
-extern tsd_init_head_t	a_name##tsd_init_head;				\
-extern a_name##tsd_wrapper_t	a_name##tsd_boot_wrapper;		\
-extern bool		a_name##tsd_booted;
-#endif
-
-/* malloc_tsd_data(). */
-#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
-#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer)		\
-a_attr __thread a_type JEMALLOC_TLS_MODEL				\
-    a_name##tsd_tls = a_initializer;					\
-a_attr __thread bool JEMALLOC_TLS_MODEL					\
-    a_name##tsd_initialized = false;					\
-a_attr bool		a_name##tsd_booted = false;
-#elif (defined(JEMALLOC_TLS))
-#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer)		\
-a_attr __thread a_type JEMALLOC_TLS_MODEL				\
-    a_name##tsd_tls = a_initializer;					\
-a_attr pthread_key_t	a_name##tsd_tsd;				\
-a_attr bool		a_name##tsd_booted = false;
-#elif (defined(_WIN32))
-#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer)		\
-a_attr DWORD		a_name##tsd_tsd;				\
-a_attr a_name##tsd_wrapper_t a_name##tsd_boot_wrapper = {		\
-	false,								\
-	a_initializer							\
-};									\
-a_attr bool		a_name##tsd_booted = false;
-#else
-#define malloc_tsd_data(a_attr, a_name, a_type, a_initializer)		\
-a_attr pthread_key_t	a_name##tsd_tsd;				\
-a_attr tsd_init_head_t	a_name##tsd_init_head = {			\
-	ql_head_initializer(blocks),					\
-	MALLOC_MUTEX_INITIALIZER					\
-};									\
-a_attr a_name##tsd_wrapper_t a_name##tsd_boot_wrapper = {		\
-	false,								\
-	a_initializer							\
-};									\
-a_attr bool		a_name##tsd_booted = false;
-#endif
-
-/* malloc_tsd_funcs(). */
-#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
-#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer,		\
-    a_cleanup)								\
-/* Initialization/cleanup. */						\
-a_attr bool								\
-a_name##tsd_cleanup_wrapper(void) {					\
-	if (a_name##tsd_initialized) {					\
-		a_name##tsd_initialized = false;			\
-		a_cleanup(&a_name##tsd_tls);				\
-	}								\
-	return a_name##tsd_initialized;					\
-}									\
-a_attr bool								\
-a_name##tsd_boot0(void) {						\
-	if (a_cleanup != malloc_tsd_no_cleanup) {			\
-		malloc_tsd_cleanup_register(				\
-		    &a_name##tsd_cleanup_wrapper);			\
-	}								\
-	a_name##tsd_booted = true;					\
-	return false;							\
-}									\
-a_attr void								\
-a_name##tsd_boot1(void) {						\
-	/* Do nothing. */						\
-}									\
-a_attr bool								\
-a_name##tsd_boot(void) {						\
-	return a_name##tsd_boot0();					\
-}									\
-a_attr bool								\
-a_name##tsd_booted_get(void) {						\
-	return a_name##tsd_booted;					\
-}									\
-a_attr bool								\
-a_name##tsd_get_allocates(void) {					\
-	return false;							\
-}									\
-/* Get/set. */								\
-a_attr a_type *								\
-a_name##tsd_get(bool init) {						\
-	assert(a_name##tsd_booted);					\
-	return &a_name##tsd_tls;					\
-}									\
-a_attr void								\
-a_name##tsd_set(a_type *val) {						\
-	assert(a_name##tsd_booted);					\
-	if (likely(&a_name##tsd_tls != val)) {				\
-		a_name##tsd_tls = (*val);				\
-	}								\
-	if (a_cleanup != malloc_tsd_no_cleanup) {			\
-		a_name##tsd_initialized = true;				\
-	}								\
-}
-#elif (defined(JEMALLOC_TLS))
-#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer,		\
-    a_cleanup)								\
-/* Initialization/cleanup. */						\
-a_attr bool								\
-a_name##tsd_boot0(void) {						\
-	if (a_cleanup != malloc_tsd_no_cleanup) {			\
-		if (pthread_key_create(&a_name##tsd_tsd, a_cleanup) !=	\
-		    0) {						\
-			return true;					\
-		}							\
-	}								\
-	a_name##tsd_booted = true;					\
-	return false;							\
-}									\
-a_attr void								\
-a_name##tsd_boot1(void) {						\
-	/* Do nothing. */						\
-}									\
-a_attr bool								\
-a_name##tsd_boot(void) {						\
-	return a_name##tsd_boot0();					\
-}									\
-a_attr bool								\
-a_name##tsd_booted_get(void) {						\
-	return a_name##tsd_booted;					\
-}									\
-a_attr bool								\
-a_name##tsd_get_allocates(void) {					\
-	return false;							\
-}									\
-/* Get/set. */								\
-a_attr a_type *								\
-a_name##tsd_get(bool init) {						\
-	assert(a_name##tsd_booted);					\
-	return &a_name##tsd_tls;					\
-}									\
-a_attr void								\
-a_name##tsd_set(a_type *val) {						\
-	assert(a_name##tsd_booted);					\
-	if (likely(&a_name##tsd_tls != val)) {				\
-		a_name##tsd_tls = (*val);				\
-	}								\
-	if (a_cleanup != malloc_tsd_no_cleanup) {			\
-		if (pthread_setspecific(a_name##tsd_tsd,		\
-		    (void *)(&a_name##tsd_tls))) {			\
-			malloc_write("<jemalloc>: Error"		\
-			    " setting TSD for "#a_name"\n");		\
-			if (opt_abort) {				\
-				abort();				\
-			}						\
-		}							\
-	}								\
-}
-#elif (defined(_WIN32))
-#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer,		\
-    a_cleanup)								\
-/* Initialization/cleanup. */						\
-a_attr bool								\
-a_name##tsd_cleanup_wrapper(void) {					\
-	DWORD error = GetLastError();					\
-	a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *)	\
-	    TlsGetValue(a_name##tsd_tsd);				\
-	SetLastError(error);						\
-									\
-	if (wrapper == NULL) {						\
-		return false;						\
-	}								\
-	if (a_cleanup != malloc_tsd_no_cleanup &&			\
-	    wrapper->initialized) {					\
-		wrapper->initialized = false;				\
-		a_cleanup(&wrapper->val);				\
-		if (wrapper->initialized) {				\
-			/* Trigger another cleanup round. */		\
-			return true;					\
-		}							\
-	}								\
-	malloc_tsd_dalloc(wrapper);					\
-	return false;							\
-}									\
-a_attr void								\
-a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) {		\
-	if (!TlsSetValue(a_name##tsd_tsd, (void *)wrapper)) {		\
-		malloc_write("<jemalloc>: Error setting"		\
-		    " TSD for "#a_name"\n");				\
-		abort();						\
-	}								\
-}									\
-a_attr a_name##tsd_wrapper_t *						\
-a_name##tsd_wrapper_get(bool init) {					\
-	DWORD error = GetLastError();					\
-	a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *)	\
-	    TlsGetValue(a_name##tsd_tsd);				\
-	SetLastError(error);						\
-									\
-	if (init && unlikely(wrapper == NULL)) {			\
-		wrapper = (a_name##tsd_wrapper_t *)			\
-		    malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t));	\
-		if (wrapper == NULL) {					\
-			malloc_write("<jemalloc>: Error allocating"	\
-			    " TSD for "#a_name"\n");			\
-			abort();					\
-		} else {						\
-			wrapper->initialized = false;			\
-			wrapper->val = a_initializer;			\
-		}							\
-		a_name##tsd_wrapper_set(wrapper);			\
-	}								\
-	return wrapper;							\
-}									\
-a_attr bool								\
-a_name##tsd_boot0(void) {						\
-	a_name##tsd_tsd = TlsAlloc();					\
-	if (a_name##tsd_tsd == TLS_OUT_OF_INDEXES) {			\
-		return true;						\
-	}								\
-	if (a_cleanup != malloc_tsd_no_cleanup) {			\
-		malloc_tsd_cleanup_register(				\
-		    &a_name##tsd_cleanup_wrapper);			\
-	}								\
-	a_name##tsd_wrapper_set(&a_name##tsd_boot_wrapper);		\
-	a_name##tsd_booted = true;					\
-	return false;							\
-}									\
-a_attr void								\
-a_name##tsd_boot1(void) {						\
-	a_name##tsd_wrapper_t *wrapper;					\
-	wrapper = (a_name##tsd_wrapper_t *)				\
-	    malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t));		\
-	if (wrapper == NULL) {						\
-		malloc_write("<jemalloc>: Error allocating"		\
-		    " TSD for "#a_name"\n");				\
-		abort();						\
-	}								\
-	a_name##tsd_boot_wrapper.initialized = false;			\
-	a_cleanup(&a_name##tsd_boot_wrapper.val);			\
-	wrapper->initialized = false;					\
-	wrapper->val = a_initializer;					\
-	a_name##tsd_wrapper_set(wrapper);				\
-}									\
-a_attr bool								\
-a_name##tsd_boot(void) {						\
-	if (a_name##tsd_boot0()) {					\
-		return true;						\
-	}								\
-	a_name##tsd_boot1();						\
-	return false;							\
-}									\
-a_attr bool								\
-a_name##tsd_booted_get(void) {						\
-	return a_name##tsd_booted;					\
-}									\
-a_attr bool								\
-a_name##tsd_get_allocates(void) {					\
-	return true;							\
-}									\
-/* Get/set. */								\
-a_attr a_type *								\
-a_name##tsd_get(bool init) {						\
-	a_name##tsd_wrapper_t *wrapper;					\
-									\
-	assert(a_name##tsd_booted);					\
-	wrapper = a_name##tsd_wrapper_get(init);			\
-	if (a_name##tsd_get_allocates() && !init && wrapper == NULL) {	\
-		return NULL;						\
-	}								\
-	return &wrapper->val;						\
-}									\
-a_attr void								\
-a_name##tsd_set(a_type *val) {						\
-	a_name##tsd_wrapper_t *wrapper;					\
-									\
-	assert(a_name##tsd_booted);					\
-	wrapper = a_name##tsd_wrapper_get(true);			\
-	if (likely(&wrapper->val != val)) {				\
-		wrapper->val = *(val);					\
-	}								\
-	if (a_cleanup != malloc_tsd_no_cleanup) {			\
-		wrapper->initialized = true;				\
-	}								\
-}
-#else
-#define malloc_tsd_funcs(a_attr, a_name, a_type, a_initializer,		\
-    a_cleanup)								\
-/* Initialization/cleanup. */						\
-a_attr void								\
-a_name##tsd_cleanup_wrapper(void *arg) {				\
-	a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *)arg;	\
-									\
-	if (a_cleanup != malloc_tsd_no_cleanup &&			\
-	    wrapper->initialized) {					\
-		wrapper->initialized = false;				\
-		a_cleanup(&wrapper->val);				\
-		if (wrapper->initialized) {				\
-			/* Trigger another cleanup round. */		\
-			if (pthread_setspecific(a_name##tsd_tsd,	\
-			    (void *)wrapper)) {				\
-				malloc_write("<jemalloc>: Error"	\
-				    " setting TSD for "#a_name"\n");	\
-				if (opt_abort) {			\
-					abort();			\
-				}					\
-			}						\
-			return;						\
-		}							\
-	}								\
-	malloc_tsd_dalloc(wrapper);					\
-}									\
-a_attr void								\
-a_name##tsd_wrapper_set(a_name##tsd_wrapper_t *wrapper) {		\
-	if (pthread_setspecific(a_name##tsd_tsd,			\
-	    (void *)wrapper)) {						\
-		malloc_write("<jemalloc>: Error setting"		\
-		    " TSD for "#a_name"\n");				\
-		abort();						\
-	}								\
-}									\
-a_attr a_name##tsd_wrapper_t *						\
-a_name##tsd_wrapper_get(bool init) {					\
-	a_name##tsd_wrapper_t *wrapper = (a_name##tsd_wrapper_t *)	\
-	    pthread_getspecific(a_name##tsd_tsd);			\
-									\
-	if (init && unlikely(wrapper == NULL)) {			\
-		tsd_init_block_t block;					\
-		wrapper = (a_name##tsd_wrapper_t *)			\
-		    tsd_init_check_recursion(&a_name##tsd_init_head,	\
-		    &block);						\
-		if (wrapper) {						\
-			return wrapper;					\
-		}							\
-		wrapper = (a_name##tsd_wrapper_t *)			\
-		    malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t));	\
-		block.data = (void *)wrapper;				\
-		if (wrapper == NULL) {					\
-			malloc_write("<jemalloc>: Error allocating"	\
-			    " TSD for "#a_name"\n");			\
-			abort();					\
-		} else {						\
-			wrapper->initialized = false;			\
-			wrapper->val = a_initializer;			\
-		}							\
-		a_name##tsd_wrapper_set(wrapper);			\
-		tsd_init_finish(&a_name##tsd_init_head, &block);	\
-	}								\
-	return wrapper;							\
-}									\
-a_attr bool								\
-a_name##tsd_boot0(void) {						\
-	if (pthread_key_create(&a_name##tsd_tsd,			\
-	    a_name##tsd_cleanup_wrapper) != 0) {			\
-		return true;						\
-	}								\
-	a_name##tsd_wrapper_set(&a_name##tsd_boot_wrapper);		\
-	a_name##tsd_booted = true;					\
-	return false;							\
-}									\
-a_attr void								\
-a_name##tsd_boot1(void) {						\
-	a_name##tsd_wrapper_t *wrapper;					\
-	wrapper = (a_name##tsd_wrapper_t *)				\
-	    malloc_tsd_malloc(sizeof(a_name##tsd_wrapper_t));		\
-	if (wrapper == NULL) {						\
-		malloc_write("<jemalloc>: Error allocating"		\
-		    " TSD for "#a_name"\n");				\
-		abort();						\
-	}								\
-	a_name##tsd_boot_wrapper.initialized = false;			\
-	a_cleanup(&a_name##tsd_boot_wrapper.val);			\
-	wrapper->initialized = false;					\
-	wrapper->val = a_initializer;					\
-	a_name##tsd_wrapper_set(wrapper);				\
-}									\
-a_attr bool								\
-a_name##tsd_boot(void) {						\
-	if (a_name##tsd_boot0()) {					\
-		return true;						\
-	}								\
-	a_name##tsd_boot1();						\
-	return false;							\
-}									\
-a_attr bool								\
-a_name##tsd_booted_get(void) {						\
-	return a_name##tsd_booted;					\
-}									\
-a_attr bool								\
-a_name##tsd_get_allocates(void) {					\
-	return true;							\
-}									\
-/* Get/set. */								\
-a_attr a_type *								\
-a_name##tsd_get(bool init) {						\
-	a_name##tsd_wrapper_t *wrapper;					\
-									\
-	assert(a_name##tsd_booted);					\
-	wrapper = a_name##tsd_wrapper_get(init);			\
-	if (a_name##tsd_get_allocates() && !init && wrapper == NULL) {	\
-		return NULL;						\
-	}								\
-	return &wrapper->val;						\
-}									\
-a_attr void								\
-a_name##tsd_set(a_type *val) {						\
-	a_name##tsd_wrapper_t *wrapper;					\
-									\
-	assert(a_name##tsd_booted);					\
-	wrapper = a_name##tsd_wrapper_get(true);			\
-	if (likely(&wrapper->val != val)) {				\
-		wrapper->val = *(val);					\
-	}								\
-	if (a_cleanup != malloc_tsd_no_cleanup) {			\
-		wrapper->initialized = true;				\
-	}								\
-}
-#endif
+typedef bool (*malloc_tsd_cleanup_t)(void);
 
 #endif /* JEMALLOC_INTERNAL_TSD_TYPES_H */
diff --git a/include/jemalloc/internal/tsd_win.h b/include/jemalloc/internal/tsd_win.h
new file mode 100644
index 00000000..cf30d18e
--- /dev/null
+++ b/include/jemalloc/internal/tsd_win.h
@@ -0,0 +1,139 @@
+#ifdef JEMALLOC_INTERNAL_TSD_WIN_H
+#error This file should be included only once, by tsd.h.
+#endif
+#define JEMALLOC_INTERNAL_TSD_WIN_H
+
+typedef struct {
+	bool initialized;
+	tsd_t val;
+} tsd_wrapper_t;
+
+extern DWORD tsd_tsd;
+extern tsd_wrapper_t tsd_boot_wrapper;
+extern bool tsd_booted;
+
+/* Initialization/cleanup. */
+JEMALLOC_ALWAYS_INLINE bool
+tsd_cleanup_wrapper(void) {
+	DWORD error = GetLastError();
+	tsd_wrapper_t *wrapper = (tsd_wrapper_t *)TlsGetValue(tsd_tsd);
+	SetLastError(error);
+
+	if (wrapper == NULL) {
+		return false;
+	}
+
+	if (wrapper->initialized) {
+		wrapper->initialized = false;
+		tsd_cleanup(&wrapper->val);
+		if (wrapper->initialized) {
+			/* Trigger another cleanup round. */
+			return true;
+		}
+	}
+	malloc_tsd_dalloc(wrapper);
+	return false;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tsd_wrapper_set(tsd_wrapper_t *wrapper) {
+	if (!TlsSetValue(tsd_tsd, (void *)wrapper)) {
+		malloc_write("<jemalloc>: Error setting TSD\n");
+		abort();
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE tsd_wrapper_t *
+tsd_wrapper_get(bool init) {
+	DWORD error = GetLastError();
+	tsd_wrapper_t *wrapper = (tsd_wrapper_t *) TlsGetValue(tsd_tsd);
+	SetLastError(error);
+
+	if (init && unlikely(wrapper == NULL)) {
+		wrapper = (tsd_wrapper_t *)
+		    malloc_tsd_malloc(sizeof(tsd_wrapper_t));
+		if (wrapper == NULL) {
+			malloc_write("<jemalloc>: Error allocating TSD\n");
+			abort();
+		} else {
+			wrapper->initialized = false;
+			/* MSVC is finicky about aggregate initialization. */
+			tsd_t tsd_initializer = TSD_INITIALIZER;
+			wrapper->val = tsd_initializer;
+		}
+		tsd_wrapper_set(wrapper);
+	}
+	return wrapper;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_boot0(void) {
+	tsd_tsd = TlsAlloc();
+	if (tsd_tsd == TLS_OUT_OF_INDEXES) {
+		return true;
+	}
+	malloc_tsd_cleanup_register(&tsd_cleanup_wrapper);
+	tsd_wrapper_set(&tsd_boot_wrapper);
+	tsd_booted = true;
+	return false;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tsd_boot1(void) {
+	tsd_wrapper_t *wrapper;
+	wrapper = (tsd_wrapper_t *)
+	    malloc_tsd_malloc(sizeof(tsd_wrapper_t));
+	if (wrapper == NULL) {
+		malloc_write("<jemalloc>: Error allocating TSD\n");
+		abort();
+	}
+	tsd_boot_wrapper.initialized = false;
+	tsd_cleanup(&tsd_boot_wrapper.val);
+	wrapper->initialized = false;
+	tsd_t initializer = TSD_INITIALIZER;
+	wrapper->val = initializer;
+	tsd_wrapper_set(wrapper);
+}
+JEMALLOC_ALWAYS_INLINE bool
+tsd_boot(void) {
+	if (tsd_boot0()) {
+		return true;
+	}
+	tsd_boot1();
+	return false;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_booted_get(void) {
+	return tsd_booted;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_get_allocates(void) {
+	return true;
+}
+
+/* Get/set. */
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsd_get(bool init) {
+	tsd_wrapper_t *wrapper;
+
+	assert(tsd_booted);
+	wrapper = tsd_wrapper_get(init);
+	if (tsd_get_allocates() && !init && wrapper == NULL) {
+		return NULL;
+	}
+	return &wrapper->val;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tsd_set(tsd_t *val) {
+	tsd_wrapper_t *wrapper;
+
+	assert(tsd_booted);
+	wrapper = tsd_wrapper_get(true);
+	if (likely(&wrapper->val != val)) {
+		wrapper->val = *(val);
+	}
+	wrapper->initialized = true;
+}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 97a64431..b8c94133 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2035,7 +2035,9 @@ irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize,
 
 JEMALLOC_ALWAYS_INLINE void
 ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
-	assert(slow_path || tsd_assert_fast(tsd));
+	if (!slow_path) {
+		tsd_assert_fast(tsd);
+	}
 	if (tsd_reentrancy_level_get(tsd) == 0) {
 		witness_assert_lockless(tsd_tsdn(tsd));
 	} else {
@@ -2073,7 +2075,9 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 
 JEMALLOC_ALWAYS_INLINE void
 isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
-	assert(slow_path || tsd_assert_fast(tsd));
+	if (!slow_path) {
+		tsd_assert_fast(tsd);
+	}
 	if (tsd_reentrancy_level_get(tsd) == 0) {
 		witness_assert_lockless(tsd_tsdn(tsd));
 	} else {
diff --git a/src/prof.c b/src/prof.c
index 1e818ab4..d60680c1 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -1642,7 +1642,7 @@ static bool
 prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
     bool leakcheck) {
 	cassert(config_prof);
-	assert(tsd->reentrancy_level == 0);
+	assert(tsd_reentrancy_level_get(tsd) == 0);
 
 	prof_tdata_t * tdata = prof_tdata_get(tsd, true);
 	if (tdata == NULL) {
@@ -1757,7 +1757,7 @@ prof_fdump(void) {
 		return;
 	}
 	tsd = tsd_fetch();
-	assert(tsd->reentrancy_level == 0);
+	assert(tsd_reentrancy_level_get(tsd) == 0);
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
 	prof_dump_filename(filename, 'f', VSEQ_INVALID);
@@ -1792,7 +1792,7 @@ prof_idump(tsdn_t *tsdn) {
 		return;
 	}
 	tsd = tsdn_tsd(tsdn);
-	if (tsd->reentrancy_level > 0) {
+	if (tsd_reentrancy_level_get(tsd) > 0) {
 		return;
 	}
 
@@ -1818,7 +1818,7 @@ prof_idump(tsdn_t *tsdn) {
 bool
 prof_mdump(tsd_t *tsd, const char *filename) {
 	cassert(config_prof);
-	assert(tsd->reentrancy_level == 0);
+	assert(tsd_reentrancy_level_get(tsd) == 0);
 
 	if (!opt_prof || !prof_booted) {
 		return true;
@@ -1849,7 +1849,7 @@ prof_gdump(tsdn_t *tsdn) {
 		return;
 	}
 	tsd = tsdn_tsd(tsdn);
-	if (tsd->reentrancy_level > 0) {
+	if (tsd_reentrancy_level_get(tsd) > 0) {
 		return;
 	}
 
diff --git a/src/rtree.c b/src/rtree.c
index ada6e9d5..72786ff5 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -461,10 +461,3 @@ rtree_ctx_data_init(rtree_ctx_t *ctx) {
 		cache->leaf = NULL;
 	}
 }
-
-bool
-tsd_rtree_ctx_data_init(tsd_t *tsd) {
-	rtree_ctx_data_init(&tsd->rtree_ctx);
-
-	return false;
-}
diff --git a/src/tcache.c b/src/tcache.c
index a7e05b17..afb1faa6 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -453,15 +453,12 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
 /* Initialize auto tcache (embedded in TSD). */
 bool
 tsd_tcache_data_init(tsd_t *tsd) {
-	tcache_t *tcache = &tsd->tcache;
+	tcache_t *tcache = tsd_tcachep_get_unsafe(tsd);
 	assert(tcache_small_bin_get(tcache, 0)->avail == NULL);
 	size_t size = stack_nelms * sizeof(void *);
 	/* Avoid false cacheline sharing. */
 	size = sa2u(size, CACHELINE);
 
-	/* Manually initialize rcache as we may need it for allocation. */
-	tsd_rtree_ctx_data_init(tsd);
-
 	void *avail_array = ipallocztm(tsd_tsdn(tsd), size, CACHELINE, true,
 	    NULL, true, arena_get(TSDN_NULL, 0, true));
 	if (avail_array == NULL) {
diff --git a/src/tsd.c b/src/tsd.c
index 686b4ef4..612f7523 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -10,14 +10,38 @@
 static unsigned ncleanups;
 static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX];
 
-malloc_tsd_data(, , tsd_t, TSD_INITIALIZER)
+#ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
+__thread tsd_t JEMALLOC_TLS_MODEL tsd_tls = TSD_INITIALIZER;
+__thread bool JEMALLOC_TLS_MODEL tsd_initialized = false;
+bool tsd_booted = false;
+#elif (defined(JEMALLOC_TLS))
+__thread tsd_t JEMALLOC_TLS_MODEL tsd_tls = TSD_INITIALIZER;
+pthread_key_t tsd_tsd;
+bool tsd_booted = false;
+#elif (defined(_WIN32))
+DWORD tsd_tsd;
+tsd_wrapper_t tsd_boot_wrapper = {false, TSD_INITIALIZER};
+bool tsd_booted = false;
+#else
+pthread_key_t tsd_tsd;
+tsd_init_head_t	tsd_init_head = {
+	ql_head_initializer(blocks),
+	MALLOC_MUTEX_INITIALIZER
+};
+tsd_wrapper_t tsd_boot_wrapper = {
+	false,
+	TSD_INITIALIZER
+};
+bool tsd_booted = false;
+#endif
+
 
 /******************************************************************************/
 
 void
 tsd_slow_update(tsd_t *tsd) {
 	if (tsd_nominal(tsd)) {
-		if (malloc_slow || !tsd->tcache_enabled ||
+		if (malloc_slow || !tsd_tcache_enabled_get(tsd) ||
 		    tsd_reentrancy_level_get(tsd) > 0) {
 			tsd->state = tsd_state_nominal_slow;
 		} else {
@@ -97,20 +121,28 @@ malloc_tsd_cleanup_register(bool (*f)(void)) {
 bool
 tsd_data_init(void *arg) {
 	tsd_t *tsd = (tsd_t *)arg;
-#define MALLOC_TSD_init_yes(n, t)					\
-	if (tsd_##n##_data_init(tsd)) {					\
-		return true;						\
+	/*
+	 * We initialize the rtree context first (before the tcache), since the
+	 * tcache initialization depends on it.
+	 */
+	rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd));
+
+	if (tsd_tcache_enabled_data_init(tsd)) {
+		return true;
 	}
-#define MALLOC_TSD_init_no(n, t)
-#define O(n, t, gs, i, c)						\
-	MALLOC_TSD_init_##i(n, t)
-MALLOC_TSD
-#undef MALLOC_TSD_init_yes
-#undef MALLOC_TSD_init_no
-#undef O
 	return false;
 }
 
+static void
+tsd_do_data_cleanup(tsd_t *tsd) {
+	prof_tdata_cleanup(tsd);
+	iarena_cleanup(tsd);
+	arena_cleanup(tsd);
+	arenas_tdata_cleanup(tsd);
+	tcache_cleanup(tsd);
+	witnesses_cleanup(tsd);
+}
+
 void
 tsd_cleanup(void *arg) {
 	tsd_t *tsd = (tsd_t *)arg;
@@ -127,15 +159,7 @@ tsd_cleanup(void *arg) {
 		 * after this destructor was called.  Reset state to
 		 * tsd_state_purgatory and request another callback.
 		 */
-#define MALLOC_TSD_cleanup_yes(n, t)					\
-		n##_cleanup(tsd);
-#define MALLOC_TSD_cleanup_no(n, t)
-#define O(n, t, gs, i, c)						\
-		MALLOC_TSD_cleanup_##c(n, t)
-MALLOC_TSD
-#undef MALLOC_TSD_cleanup_yes
-#undef MALLOC_TSD_cleanup_no
-#undef O
+		tsd_do_data_cleanup(tsd);
 		tsd->state = tsd_state_purgatory;
 		tsd_set(tsd);
 		break;
@@ -150,6 +174,13 @@ MALLOC_TSD
 	default:
 		not_reached();
 	}
+#ifdef JEMALLOC_JET
+	test_callback_t test_callback = *tsd_test_callbackp_get_unsafe(tsd);
+	int *data = tsd_test_datap_get_unsafe(tsd);
+	if (test_callback != NULL) {
+		test_callback(data);
+	}
+#endif
 }
 
 tsd_t *
diff --git a/test/unit/tsd.c b/test/unit/tsd.c
index 38114674..c9a7d809 100644
--- a/test/unit/tsd.c
+++ b/test/unit/tsd.c
@@ -1,41 +1,29 @@
 #include "test/jemalloc_test.h"
 
-#define THREAD_DATA 0x72b65c10
-
-typedef unsigned int data_t;
-
-static bool data_cleanup_executed;
-static bool data_test_started;
-
-malloc_tsd_types(data_, data_t)
-malloc_tsd_protos(, data_, data_t)
+static int data_cleanup_count;
 
 void
-data_cleanup(void *arg) {
-	data_t *data = (data_t *)arg;
-
-	if (!data_test_started) {
-		return;
-	}
-	if (!data_cleanup_executed) {
-		assert_x_eq(*data, THREAD_DATA,
+data_cleanup(int *data) {
+	if (data_cleanup_count == 0) {
+		assert_x_eq(*data, MALLOC_TSD_TEST_DATA_INIT,
 		    "Argument passed into cleanup function should match tsd "
 		    "value");
 	}
-	data_cleanup_executed = true;
+	++data_cleanup_count;
 
 	/*
 	 * Allocate during cleanup for two rounds, in order to assure that
 	 * jemalloc's internal tsd reinitialization happens.
 	 */
+	bool reincarnate = false;
 	switch (*data) {
-	case THREAD_DATA:
+	case MALLOC_TSD_TEST_DATA_INIT:
 		*data = 1;
-		data_tsd_set(data);
+		reincarnate = true;
 		break;
 	case 1:
 		*data = 2;
-		data_tsd_set(data);
+		reincarnate = true;
 		break;
 	case 2:
 		return;
@@ -43,37 +31,35 @@ data_cleanup(void *arg) {
 		not_reached();
 	}
 
-	{
+	if (reincarnate) {
 		void *p = mallocx(1, 0);
 		assert_ptr_not_null(p, "Unexpeced mallocx() failure");
 		dallocx(p, 0);
 	}
 }
 
-malloc_tsd_externs(data_, data_t)
-#define DATA_INIT 0x12345678
-malloc_tsd_data(, data_, data_t, DATA_INIT)
-malloc_tsd_funcs(, data_, data_t, DATA_INIT, data_cleanup)
-
 static void *
 thd_start(void *arg) {
-	data_t d = (data_t)(uintptr_t)arg;
+	int d = (int)(uintptr_t)arg;
 	void *p;
 
-	assert_x_eq(*data_tsd_get(true), DATA_INIT,
+	tsd_t *tsd = tsd_fetch();
+	assert_x_eq(tsd_test_data_get(tsd), MALLOC_TSD_TEST_DATA_INIT,
 	    "Initial tsd get should return initialization value");
 
 	p = malloc(1);
 	assert_ptr_not_null(p, "Unexpected malloc() failure");
 
-	data_tsd_set(&d);
-	assert_x_eq(*data_tsd_get(true), d,
+	tsd_test_data_set(tsd, d);
+	assert_x_eq(tsd_test_data_get(tsd), d,
 	    "After tsd set, tsd get should return value that was set");
 
 	d = 0;
-	assert_x_eq(*data_tsd_get(true), (data_t)(uintptr_t)arg,
+	assert_x_eq(tsd_test_data_get(tsd), (int)(uintptr_t)arg,
 	    "Resetting local data should have no effect on tsd");
 
+	tsd_test_callback_set(tsd, &data_cleanup);
+
 	free(p);
 	return NULL;
 }
@@ -86,11 +72,15 @@ TEST_END
 TEST_BEGIN(test_tsd_sub_thread) {
 	thd_t thd;
 
-	data_cleanup_executed = false;
-	thd_create(&thd, thd_start, (void *)THREAD_DATA);
+	data_cleanup_count = 0;
+	thd_create(&thd, thd_start, (void *)MALLOC_TSD_TEST_DATA_INIT);
 	thd_join(thd, NULL);
-	assert_true(data_cleanup_executed,
-	    "Cleanup function should have executed");
+	/*
+	 * We reincarnate twice in the data cleanup, so it should execute at
+	 * least 3 times.
+	 */
+	assert_x_ge(data_cleanup_count, 3,
+	    "Cleanup function should have executed multiple times.");
 }
 TEST_END
 
@@ -103,9 +93,11 @@ thd_start_reincarnated(void *arg) {
 	assert_ptr_not_null(p, "Unexpected malloc() failure");
 
 	/* Manually trigger reincarnation. */
-	assert_ptr_not_null(tsd->arena, "Should have tsd arena set.");
+	assert_ptr_not_null(tsd_arena_get(tsd),
+	    "Should have tsd arena set.");
 	tsd_cleanup((void *)tsd);
-	assert_ptr_null(tsd->arena, "TSD arena should have been cleared.");
+	assert_ptr_null(*tsd_arenap_get_unsafe(tsd),
+	    "TSD arena should have been cleared.");
 	assert_u_eq(tsd->state, tsd_state_purgatory,
 	    "TSD state should be purgatory\n");
 
@@ -114,12 +106,12 @@ thd_start_reincarnated(void *arg) {
 	    "TSD state should be reincarnated\n");
 	p = mallocx(1, MALLOCX_TCACHE_NONE);
 	assert_ptr_not_null(p, "Unexpected malloc() failure");
-	assert_ptr_not_null(tsd->arena,
+	assert_ptr_not_null(*tsd_arenap_get_unsafe(tsd),
 	    "Should have tsd arena set after reincarnation.");
 
 	free(p);
 	tsd_cleanup((void *)tsd);
-	assert_ptr_null(tsd->arena,
+	assert_ptr_null(*tsd_arenap_get_unsafe(tsd),
 	    "TSD arena should have been cleared after 2nd cleanup.");
 
 	return NULL;
@@ -134,14 +126,11 @@ TEST_END
 
 int
 main(void) {
-	/* Core tsd bootstrapping must happen prior to data_tsd_boot(). */
+	/* Ensure tsd bootstrapped. */
 	if (nallocx(1, 0) == 0) {
 		malloc_printf("Initialization error");
 		return test_status_fail;
 	}
-	data_test_started = false;
-	data_tsd_boot();
-	data_test_started = true;
 
 	return test_no_reentrancy(
 	    test_tsd_main_thread,

From fc1aaf13fed6f9344c0681440e5a5782c889d0dc Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 28 Apr 2017 13:31:09 -0700
Subject: [PATCH 0851/2608] Revert "Use trylock in tcache_bin_flush when
 possible."

This reverts commit 8584adc451f31adfc4ab8693d9189cf3a7e5d858.  Production
results not favorable.  Will investigate separately.
---
 include/jemalloc/internal/tcache_externs.h |  30 ++--
 include/jemalloc/internal/tcache_inlines.h |   4 +-
 src/tcache.c                               | 171 ++++++---------------
 3 files changed, 63 insertions(+), 142 deletions(-)

diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 95dfe446..abe133fa 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -27,27 +27,23 @@ extern size_t	tcache_maxclass;
  */
 extern tcaches_t	*tcaches;
 
-size_t tcache_salloc(tsdn_t *tsdn, const void *ptr);
-void tcache_event_hard(tsd_t *tsd, tcache_t *tcache);
-void *tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
+size_t	tcache_salloc(tsdn_t *tsdn, const void *ptr);
+void	tcache_event_hard(tsd_t *tsd, tcache_t *tcache);
+void	*tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     tcache_bin_t *tbin, szind_t binind, bool *tcache_success);
-void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
+void	tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
     szind_t binind, unsigned rem);
-unsigned tcache_bin_try_flush_small(tsd_t *tsd, tcache_t *tcache,
-    tcache_bin_t *tbin, szind_t binind, unsigned rem);
-void tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
-    szind_t binind, unsigned rem);
-unsigned tcache_bin_try_flush_large(tsd_t *tsd, tcache_t *tcache,
-    tcache_bin_t *tbin, szind_t binind, unsigned rem);
-void tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache,
+void	tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
+    unsigned rem, tcache_t *tcache);
+void	tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache,
     arena_t *arena);
 tcache_t *tcache_create_explicit(tsd_t *tsd);
-void tcache_cleanup(tsd_t *tsd);
-void tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
-bool tcaches_create(tsd_t *tsd, unsigned *r_ind);
-void tcaches_flush(tsd_t *tsd, unsigned ind);
-void tcaches_destroy(tsd_t *tsd, unsigned ind);
-bool tcache_boot(tsdn_t *tsdn);
+void	tcache_cleanup(tsd_t *tsd);
+void	tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
+bool	tcaches_create(tsd_t *tsd, unsigned *r_ind);
+void	tcaches_flush(tsd_t *tsd, unsigned ind);
+void	tcaches_destroy(tsd_t *tsd, unsigned ind);
+bool	tcache_boot(tsdn_t *tsdn);
 void tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
 void tcache_prefork(tsdn_t *tsdn);
 void tcache_postfork_parent(tsdn_t *tsdn);
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 5e9a7a0f..8a65ba2b 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -227,8 +227,8 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	tbin = tcache_large_bin_get(tcache, binind);
 	tbin_info = &tcache_bin_info[binind];
 	if (unlikely(tbin->ncached == tbin_info->ncached_max)) {
-		tcache_bin_flush_large(tsd, tcache, tbin, binind,
-		    (tbin_info->ncached_max >> 1));
+		tcache_bin_flush_large(tsd, tbin, binind,
+		    (tbin_info->ncached_max >> 1), tcache);
 	}
 	assert(tbin->ncached < tbin_info->ncached_max);
 	tbin->ncached++;
diff --git a/src/tcache.c b/src/tcache.c
index afb1faa6..ee5e816f 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -45,16 +45,14 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 	} else {
 		tbin = tcache_large_bin_get(tcache, binind);
 	}
-	bool repeat_bin;
 	if (tbin->low_water > 0) {
 		/*
 		 * Flush (ceiling) 3/4 of the objects below the low water mark.
 		 */
-		unsigned nflushed;
 		if (binind < NBINS) {
-			nflushed = tcache_bin_try_flush_small(tsd, tcache, tbin,
-			    binind, tbin->ncached - tbin->low_water +
-			    (tbin->low_water >> 2));
+			tcache_bin_flush_small(tsd, tcache, tbin, binind,
+			    tbin->ncached - tbin->low_water + (tbin->low_water
+			    >> 2));
 			/*
 			 * Reduce fill count by 2X.  Limit lg_fill_div such that
 			 * the fill count is always at least 1.
@@ -65,29 +63,23 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 				tcache->lg_fill_div[binind]++;
 			}
 		} else {
-			nflushed = tcache_bin_try_flush_large(tsd, tcache, tbin,
-			    binind, tbin->ncached - tbin->low_water +
-			    (tbin->low_water >> 2));
+			tcache_bin_flush_large(tsd, tbin, binind, tbin->ncached
+			    - tbin->low_water + (tbin->low_water >> 2), tcache);
 		}
-		repeat_bin = (nflushed == 0);
-	} else {
-		if (tbin->low_water < 0) {
-			/*
-			 * Increase fill count by 2X for small bins.  Make sure
-			 * lg_fill_div stays greater than 0.
-			 */
-			if (binind < NBINS && tcache->lg_fill_div[binind] > 1) {
-				tcache->lg_fill_div[binind]--;
-			}
+	} else if (tbin->low_water < 0) {
+		/*
+		 * Increase fill count by 2X for small bins.  Make sure
+		 * lg_fill_div stays greater than 0.
+		 */
+		if (binind < NBINS && tcache->lg_fill_div[binind] > 1) {
+			tcache->lg_fill_div[binind]--;
 		}
-		repeat_bin = false;
 	}
-	if (!repeat_bin) {
-		tcache->next_gc_bin++;
-		if (tcache->next_gc_bin == nhbins) {
-			tcache->next_gc_bin = 0;
-		}
-		tbin->low_water = tbin->ncached;
+	tbin->low_water = tbin->ncached;
+
+	tcache->next_gc_bin++;
+	if (tcache->next_gc_bin == nhbins) {
+		tcache->next_gc_bin = 0;
 	}
 }
 
@@ -107,9 +99,11 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 	return ret;
 }
 
-static inline unsigned
-tcache_bin_flush_small_impl(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
-    szind_t binind, unsigned rem, bool must_flush) {
+void
+tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
+    szind_t binind, unsigned rem) {
+	bool merged_stats = false;
+
 	assert(binind < NBINS);
 	assert(rem <= tbin->ncached);
 
@@ -122,12 +116,9 @@ tcache_bin_flush_small_impl(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 		item_extent[i] = iealloc(tsd_tsdn(tsd), *(tbin->avail - 1 - i));
 	}
 
-	bool merged_stats = false;
-	unsigned nflushed = 0;
-	unsigned nskipped = 0;
 	while (nflush > 0) {
 		/* Lock the arena bin associated with the first object. */
-		extent_t *extent = item_extent[nskipped];
+		extent_t *extent = item_extent[0];
 		arena_t *bin_arena = extent_arena_get(extent);
 		arena_bin_t *bin = &bin_arena->bins[binind];
 
@@ -139,16 +130,7 @@ tcache_bin_flush_small_impl(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 			tcache->prof_accumbytes = 0;
 		}
 
-		if (must_flush) {
-			malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
-		} else {
-			/* Make best effort to flush w/o blocking. */
-			if (malloc_mutex_trylock(tsd_tsdn(tsd), &bin->lock)) {
-				nskipped++;
-				nflush--;
-				continue;
-			}
-		}
+		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 		if (config_stats && bin_arena == arena) {
 			assert(!merged_stats);
 			merged_stats = true;
@@ -157,7 +139,7 @@ tcache_bin_flush_small_impl(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 			tbin->tstats.nrequests = 0;
 		}
 		unsigned ndeferred = 0;
-		for (unsigned i = nskipped; i < nflush; i++) {
+		for (unsigned i = 0; i < nflush; i++) {
 			void *ptr = *(tbin->avail - 1 - i);
 			extent = item_extent[i];
 			assert(ptr != NULL && extent != NULL);
@@ -172,14 +154,13 @@ tcache_bin_flush_small_impl(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 				 * locked.  Stash the object, so that it can be
 				 * handled in a future pass.
 				 */
-				*(tbin->avail - 1 - ndeferred - nskipped) = ptr;
-				item_extent[ndeferred + nskipped] = extent;
+				*(tbin->avail - 1 - ndeferred) = ptr;
+				item_extent[ndeferred] = extent;
 				ndeferred++;
 			}
 		}
 		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 		arena_decay_ticks(tsd_tsdn(tsd), bin_arena, nflush - ndeferred);
-		nflushed += nflush - ndeferred;
 		nflush = ndeferred;
 	}
 	if (config_stats && !merged_stats) {
@@ -188,49 +169,26 @@ tcache_bin_flush_small_impl(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 		 * arena, so the stats didn't get merged.  Manually do so now.
 		 */
 		arena_bin_t *bin = &arena->bins[binind];
-		if (must_flush) {
-			malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
-		}
-		if (must_flush ||
-		    !malloc_mutex_trylock(tsd_tsdn(tsd), &bin->lock)) {
-			malloc_mutex_assert_owner(tsd_tsdn(tsd), &bin->lock);
-			bin->stats.nflushes++;
-			bin->stats.nrequests += tbin->tstats.nrequests;
-			tbin->tstats.nrequests = 0;
-			malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
-		}
+		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
+		bin->stats.nflushes++;
+		bin->stats.nrequests += tbin->tstats.nrequests;
+		tbin->tstats.nrequests = 0;
+		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 	}
-	assert(nflushed == tbin->ncached - rem - nskipped);
-	assert(nskipped == 0 || !must_flush);
 
-	if (nflushed > 0) {
-		memmove(tbin->avail - (rem + nskipped), tbin->avail -
-		    tbin->ncached, rem * sizeof(void *));
-	}
-	tbin->ncached = rem + nskipped;
+	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
+	    sizeof(void *));
+	tbin->ncached = rem;
 	if ((low_water_t)tbin->ncached < tbin->low_water) {
 		tbin->low_water = tbin->ncached;
 	}
-
-	return nflushed;
 }
 
 void
-tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
-    szind_t binind, unsigned rem) {
-	tcache_bin_flush_small_impl(tsd, tcache, tbin, binind, rem, true);
-}
+tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
+    unsigned rem, tcache_t *tcache) {
+	bool merged_stats = false;
 
-unsigned
-tcache_bin_try_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
-    szind_t binind, unsigned rem) {
-	return tcache_bin_flush_small_impl(tsd, tcache, tbin, binind, rem,
-	    false);
-}
-
-static inline unsigned
-tcache_bin_flush_large_impl(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
-    szind_t binind, unsigned rem, bool must_flush) {
 	assert(binind < nhbins);
 	assert(rem <= tbin->ncached);
 
@@ -243,31 +201,18 @@ tcache_bin_flush_large_impl(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 		item_extent[i] = iealloc(tsd_tsdn(tsd), *(tbin->avail - 1 - i));
 	}
 
-	bool merged_stats = false;
-	unsigned nflushed = 0;
-	unsigned nskipped = 0;
 	while (nflush > 0) {
 		/* Lock the arena associated with the first object. */
-		extent_t *extent = item_extent[nskipped];
+		extent_t *extent = item_extent[0];
 		arena_t *locked_arena = extent_arena_get(extent);
 		UNUSED bool idump;
 
 		if (config_prof) {
 			idump = false;
 		}
-		if (must_flush) {
-			malloc_mutex_lock(tsd_tsdn(tsd), &locked_arena->large_mtx);
-		} else {
-			/* Make best effort to flush w/o blocking. */
-			if (malloc_mutex_trylock(tsd_tsdn(tsd),
-			    &locked_arena->large_mtx)) {
-				nskipped++;
-				nflush--;
-				continue;
-			}
-		}
 
-		for (unsigned i = nskipped; i < nflush; i++) {
+		malloc_mutex_lock(tsd_tsdn(tsd), &locked_arena->large_mtx);
+		for (unsigned i = 0; i < nflush; i++) {
 			void *ptr = *(tbin->avail - 1 - i);
 			assert(ptr != NULL);
 			extent = item_extent[i];
@@ -293,7 +238,7 @@ tcache_bin_flush_large_impl(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 		malloc_mutex_unlock(tsd_tsdn(tsd), &locked_arena->large_mtx);
 
 		unsigned ndeferred = 0;
-		for (unsigned i = nskipped; i < nflush; i++) {
+		for (unsigned i = 0; i < nflush; i++) {
 			void *ptr = *(tbin->avail - 1 - i);
 			extent = item_extent[i];
 			assert(ptr != NULL && extent != NULL);
@@ -307,8 +252,8 @@ tcache_bin_flush_large_impl(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 				 * Stash the object, so that it can be handled
 				 * in a future pass.
 				 */
-				*(tbin->avail - 1 - ndeferred - nskipped) = ptr;
-				item_extent[ndeferred + nskipped] = extent;
+				*(tbin->avail - 1 - ndeferred) = ptr;
+				item_extent[ndeferred] = extent;
 				ndeferred++;
 			}
 		}
@@ -317,7 +262,6 @@ tcache_bin_flush_large_impl(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 		}
 		arena_decay_ticks(tsd_tsdn(tsd), locked_arena, nflush -
 		    ndeferred);
-		nflushed += nflush - ndeferred;
 		nflush = ndeferred;
 	}
 	if (config_stats && !merged_stats) {
@@ -330,31 +274,12 @@ tcache_bin_flush_large_impl(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 		tbin->tstats.nrequests = 0;
 	}
 
-	assert(nflushed == tbin->ncached - rem - nskipped);
-	assert(nskipped == 0 || !must_flush);
-
-	if (nflushed > 0) {
-		memmove(tbin->avail - (rem + nskipped), tbin->avail -
-		    tbin->ncached, rem * sizeof(void *));
-	}
-	tbin->ncached = rem + nskipped;
+	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
+	    sizeof(void *));
+	tbin->ncached = rem;
 	if ((low_water_t)tbin->ncached < tbin->low_water) {
 		tbin->low_water = tbin->ncached;
 	}
-	return nflushed;
-}
-
-void
-tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
-    szind_t binind, unsigned rem) {
-	tcache_bin_flush_large_impl(tsd, tcache, tbin, binind, rem, true);
-}
-
-unsigned
-tcache_bin_try_flush_large(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
-    szind_t binind, unsigned rem) {
-	return tcache_bin_flush_large_impl(tsd, tcache, tbin, binind, rem,
-	    false);
 }
 
 void
@@ -533,7 +458,7 @@ tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
 	}
 	for (unsigned i = NBINS; i < nhbins; i++) {
 		tcache_bin_t *tbin = tcache_large_bin_get(tcache, i);
-		tcache_bin_flush_large(tsd, tcache, tbin, i, 0);
+		tcache_bin_flush_large(tsd, tbin, i, 0, tcache);
 
 		if (config_stats) {
 			assert(tbin->tstats.nrequests == 0);

From 1c982c37d94fae5211206b3e41a8722b326ad5d4 Mon Sep 17 00:00:00 2001
From: rustyx <me@rustyx.org>
Date: Tue, 2 May 2017 16:29:35 +0200
Subject: [PATCH 0852/2608] Make VS2015 project work again

---
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |  61 +-----
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  | 188 +-----------------
 2 files changed, 10 insertions(+), 239 deletions(-)

diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 75ea8fba..832ff69d 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -34,63 +34,8 @@
       <Platform>x64</Platform>
     </ProjectConfiguration>
   </ItemGroup>
-  <ItemGroup>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\arena.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\assert.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\atomic.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\base.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\bitmap.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\ckh.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\ctl.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\extent.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\extent_dss.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\extent_mmap.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\hash.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal_decls.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal_defs.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal_macros.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\large.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\mb.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\mutex.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\nstime.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\pages.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\ph.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\private_namespace.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\private_unnamespace.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\prng.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\prof.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\public_namespace.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\public_unnamespace.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\ql.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\qr.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\rb.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\rtree.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\size_classes.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\smoothstep.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\spin.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\stats.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\tcache.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\ticker.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\tsd.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\util.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\witness.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_defs.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_macros.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_mangle.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_protos.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_protos_jet.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_rename.h" />
-    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_typedefs.h" />
-    <ClInclude Include="..\..\..\..\include\msvc_compat\C99\stdbool.h" />
-    <ClInclude Include="..\..\..\..\include\msvc_compat\C99\stdint.h" />
-    <ClInclude Include="..\..\..\..\include\msvc_compat\strings.h" />
-    <ClInclude Include="..\..\..\..\include\msvc_compat\windows_extra.h" />
-  </ItemGroup>
   <ItemGroup>
     <ClCompile Include="..\..\..\..\src\arena.c" />
-    <ClCompile Include="..\..\..\..\src\atomic.c" />
     <ClCompile Include="..\..\..\..\src\base.c" />
     <ClCompile Include="..\..\..\..\src\bitmap.c" />
     <ClCompile Include="..\..\..\..\src\ckh.c" />
@@ -99,9 +44,10 @@
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
     <ClCompile Include="..\..\..\..\src\hash.c" />
+    <ClCompile Include="..\..\..\..\src\hooks.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
     <ClCompile Include="..\..\..\..\src\large.c" />
-    <ClCompile Include="..\..\..\..\src\mb.c" />
+    <ClCompile Include="..\..\..\..\src\malloc_io.c" />
     <ClCompile Include="..\..\..\..\src\mutex.c" />
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
@@ -113,7 +59,6 @@
     <ClCompile Include="..\..\..\..\src\tcache.c" />
     <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
-    <ClCompile Include="..\..\..\..\src\util.c" />
     <ClCompile Include="..\..\..\..\src\witness.c" />
   </ItemGroup>
   <PropertyGroup Label="Globals">
@@ -395,4 +340,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index a328a6f9..9d4a7c7d 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -5,185 +5,11 @@
       <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
       <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
     </Filter>
-    <Filter Include="Header Files">
-      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
-      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
-    </Filter>
-    <Filter Include="Header Files\internal">
-      <UniqueIdentifier>{5697dfa3-16cf-4932-b428-6e0ec6e9f98e}</UniqueIdentifier>
-    </Filter>
-    <Filter Include="Header Files\msvc_compat">
-      <UniqueIdentifier>{0cbd2ca6-42a7-4f82-8517-d7e7a14fd986}</UniqueIdentifier>
-    </Filter>
-    <Filter Include="Header Files\msvc_compat\C99">
-      <UniqueIdentifier>{0abe6f30-49b5-46dd-8aca-6e33363fa52c}</UniqueIdentifier>
-    </Filter>
-  </ItemGroup>
-  <ItemGroup>
-    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_defs.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_macros.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_mangle.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_protos.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_protos_jet.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_rename.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\jemalloc_typedefs.h">
-      <Filter>Header Files</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\arena.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\assert.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\atomic.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\base.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\bitmap.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\ckh.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\ctl.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\extent.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\extent_dss.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\extent_mmap.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\hash.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal_decls.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal_defs.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\jemalloc_internal_macros.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\large.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\mb.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\mutex.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\nstime.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\pages.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\ph.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\private_namespace.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\private_unnamespace.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\prng.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\prof.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\public_namespace.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\public_unnamespace.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\ql.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\qr.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\rb.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\rtree.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\size_classes.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\smoothstep.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\spin.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\stats.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\tcache.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\ticker.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\tsd.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\util.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\jemalloc\internal\witness.h">
-      <Filter>Header Files\internal</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\msvc_compat\strings.h">
-      <Filter>Header Files\msvc_compat</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\msvc_compat\windows_extra.h">
-      <Filter>Header Files\msvc_compat</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\msvc_compat\C99\stdbool.h">
-      <Filter>Header Files\msvc_compat\C99</Filter>
-    </ClInclude>
-    <ClInclude Include="..\..\..\..\include\msvc_compat\C99\stdint.h">
-      <Filter>Header Files\msvc_compat\C99</Filter>
-    </ClInclude>
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="..\..\..\..\src\arena.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\atomic.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\base.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -214,9 +40,6 @@
     <ClCompile Include="..\..\..\..\src\large.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\mb.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\mutex.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -250,11 +73,14 @@
     <ClCompile Include="..\..\..\..\src\tsd.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\util.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\witness.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hooks.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\malloc_io.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
-</Project>
+</Project>
\ No newline at end of file

From 344dd342dddf341f2db47c0a37f8b2aadccfdce7 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 2 May 2017 21:42:33 -0700
Subject: [PATCH 0853/2608] rtree_leaf_elm_extent_write() -->
 rtree_leaf_elm_extent_lock_write()

Refactor rtree_leaf_elm_extent_write() as
rtree_leaf_elm_extent_lock_write(), so that whether the leaf element is
currently acquired is separate from what lock state to write.  This
allows for a relaxed atomic read when releasing the lock.
---
 include/jemalloc/internal/rtree_inlines.h | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/include/jemalloc/internal/rtree_inlines.h b/include/jemalloc/internal/rtree_inlines.h
index 7bc52383..b66e8ae2 100644
--- a/include/jemalloc/internal/rtree_inlines.h
+++ b/include/jemalloc/internal/rtree_inlines.h
@@ -54,7 +54,7 @@ rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 	}
 
 	return (uintptr_t)atomic_load_p(&elm->le_bits, dependent
-	    ?  ATOMIC_RELAXED : ATOMIC_ACQUIRE);
+	    ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
 }
 
 JEMALLOC_ALWAYS_INLINE extent_t *
@@ -143,8 +143,8 @@ rtree_leaf_elm_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 }
 
 static inline void
-rtree_leaf_elm_extent_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
-    bool acquired, extent_t *extent) {
+rtree_leaf_elm_extent_lock_write(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_leaf_elm_t *elm, bool acquired, extent_t *extent, bool lock) {
 	if (config_debug && acquired) {
 		rtree_leaf_elm_witness_access(tsdn, rtree, elm);
 	}
@@ -156,10 +156,10 @@ rtree_leaf_elm_extent_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 	uintptr_t bits = ((uintptr_t)rtree_leaf_elm_bits_szind_get(old_bits) <<
 	    LG_VADDR) | ((uintptr_t)extent & (((uintptr_t)0x1 << LG_VADDR) - 1))
 	    | ((uintptr_t)rtree_leaf_elm_bits_slab_get(old_bits) << 1) |
-	    (uintptr_t)acquired;
+	    (uintptr_t)lock;
 	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
 #else
-	if (acquired) {
+	if (lock) {
 		/* Overlay lock bit. */
 		extent = (extent_t *)((uintptr_t)extent | (uintptr_t)0x1);
 	}
@@ -222,7 +222,6 @@ rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 	    ((uintptr_t)extent & (((uintptr_t)0x1 << LG_VADDR) - 1)) |
 	    ((uintptr_t)slab << 1) |
 	    (uintptr_t)acquired;
-
 	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
 #else
 	rtree_leaf_elm_slab_write(tsdn, rtree, elm, acquired, slab);
@@ -231,7 +230,8 @@ rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 	 * Write extent last, since the element is atomically considered valid
 	 * as soon as the extent field is non-NULL.
 	 */
-	rtree_leaf_elm_extent_write(tsdn, rtree, elm, acquired, extent);
+	rtree_leaf_elm_extent_lock_write(tsdn, rtree, elm, acquired, extent,
+	    acquired);
 #endif
 }
 
@@ -463,7 +463,8 @@ static inline void
 rtree_leaf_elm_release(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm) {
 	extent_t *extent = rtree_leaf_elm_extent_read(tsdn, rtree, elm, true,
 	    true);
-	rtree_leaf_elm_extent_write(tsdn, rtree, elm, false, extent);
+	rtree_leaf_elm_extent_lock_write(tsdn, rtree, elm, true, extent, false);
+
 	if (config_debug) {
 		rtree_leaf_elm_witness_release(tsdn, rtree, elm);
 	}

From 0798fe6e7056a2eb571dde06927e87635dd2e74c Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 2 May 2017 21:45:46 -0700
Subject: [PATCH 0854/2608] Fix rtree_leaf_elm_szind_slab_update().

Re-read the leaf element when atomic CAS fails due to a race with
another thread that has locked the leaf element, since
atomic_compare_exchange_strong_p() overwrites the expected value with
the actual value on failure.  This regression was introduced by
0ee0e0c155a05d0d028a9972ad86b9eaac4ccabd (Implement compact rtree leaf
element representation.).

This resolves #798.
---
 include/jemalloc/internal/rtree_inlines.h | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/include/jemalloc/internal/rtree_inlines.h b/include/jemalloc/internal/rtree_inlines.h
index b66e8ae2..bcc2041a 100644
--- a/include/jemalloc/internal/rtree_inlines.h
+++ b/include/jemalloc/internal/rtree_inlines.h
@@ -251,17 +251,16 @@ rtree_leaf_elm_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree,
 	 * modified by another thread, the fact that the lock is embedded in the
 	 * same word requires that a CAS operation be used here.
 	 */
-	uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, false,
-	    true) & ~((uintptr_t)0x1); /* Mask lock bit. */
-	uintptr_t bits = ((uintptr_t)szind << LG_VADDR) |
-	    ((uintptr_t)rtree_leaf_elm_bits_extent_get(old_bits) &
-	    (((uintptr_t)0x1 << LG_VADDR) - 1)) |
-	    ((uintptr_t)slab << 1);
 	spin_t spinner = SPIN_INITIALIZER;
 	while (true) {
+		void *old_bits = (void *)(rtree_leaf_elm_bits_read(tsdn, rtree,
+		    elm, false, true) & ~((uintptr_t)0x1)); /* Mask lock bit. */
+		void *bits = (void *)(((uintptr_t)szind << LG_VADDR) |
+		    ((uintptr_t)rtree_leaf_elm_bits_extent_get(
+		    (uintptr_t)old_bits) & (((uintptr_t)0x1 << LG_VADDR) - 1)) |
+		    ((uintptr_t)slab << 1));
 		if (likely(atomic_compare_exchange_strong_p(&elm->le_bits,
-		    (void **)&old_bits, (void *)bits, ATOMIC_ACQUIRE,
-		    ATOMIC_RELAXED))) {
+		    &old_bits, bits, ATOMIC_ACQUIRE, ATOMIC_RELAXED))) {
 			break;
 		}
 		spin_adaptive(&spinner);

From 31baedbbb9d1701b13312415b59d2b6240bb18e4 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 3 May 2017 10:07:39 -0700
Subject: [PATCH 0855/2608] Add --with-version=VERSION .

This simplifies configuration when embedding a jemalloc release into
another project's git repository.

This resolves #811.
---
 INSTALL      | 17 ++++++++++++++---
 configure.ac | 10 +++++++---
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/INSTALL b/INSTALL
index abf3290b..125cad2b 100644
--- a/INSTALL
+++ b/INSTALL
@@ -35,9 +35,20 @@ any of the following arguments (not a definitive list) to 'configure':
     will cause files to be installed into /usr/local/include, /usr/local/lib,
     and /usr/local/man.
 
---with-version=<major>.<minor>.<bugfix>-<nrev>-g<gid>
-    Use the specified version string rather than trying to generate one (if in
-    a git repository) or use existing the VERSION file (if present).
+--with-version=(<major>.<minor>.<bugfix>-<nrev>-g<gid>|VERSION)
+    The VERSION file is mandatory for successful configuration, and the
+    following steps are taken to assure its presence:
+    1) If --with-version=<major>.<minor>.<bugfix>-<nrev>-g<gid> is specified,
+       generate VERSION using the specified value.
+    2) If --with-version is not specified in either form and the source
+       directory is inside a git repository, try to generate VERSION via 'git
+       describe' invocations that pattern-match release tags.
+    3) If VERSION is missing, generate it with a bogus version:
+       0.0.0-0-g0000000000000000000000000000000000000000
+
+    Note that --with-version=VERSION bypasses (1) and (2), which simplifies
+    VERSION configuration when embedding a jemalloc release into another
+    project's git repository.
 
 --with-rpath=<colon-separated-rpath>
     Embed one or more library paths, so that libjemalloc can find the libraries
diff --git a/configure.ac b/configure.ac
index 462f509f..7ffdbea8 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1349,10 +1349,14 @@ AC_ARG_WITH([version],
    [Version string])],
   [
     echo "${with_version}" | grep ['^[0-9]\+\.[0-9]\+\.[0-9]\+-[0-9]\+-g[0-9a-f]\+$'] 2>&1 1>/dev/null
-    if test $? -ne 0 ; then
-      AC_MSG_ERROR([${with_version} does not match <major>.<minor>.<bugfix>-<nrev>-g<gid>])
+    if test $? -eq 0 ; then
+      echo "$with_version" > "${objroot}VERSION"
+    else
+      echo "${with_version}" | grep ['^VERSION$'] 2>&1 1>/dev/null
+      if test $? -ne 0 ; then
+        AC_MSG_ERROR([${with_version} does not match <major>.<minor>.<bugfix>-<nrev>-g<gid> or VERSION])
+      fi
     fi
-    echo "$with_version" > "${objroot}VERSION"
   ], [
     dnl Set VERSION if source directory is inside a git repository.
     if test "x`test ! \"${srcroot}\" && cd \"${srcroot}\"; git rev-parse --is-inside-work-tree 2>/dev/null`" = "xtrue" ; then

From 11d2f39d96d1a1e4d35a438e184fa0785a2baf08 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 11 May 2017 09:06:41 -0700
Subject: [PATCH 0856/2608] Remove mutex_prof_data_t redeclaration.

Redeclaration causes compilations failures with e.g. gcc 4.2.1 on
FreeBSD.  This regression was introduced by
89e2d3c12b573310e60b97beaf178007a71d83a3 (Header refactoring: ctl -
unify and remove from catchall.).
---
 include/jemalloc/internal/mutex_prof.h  | 2 +-
 include/jemalloc/internal/mutex_types.h | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/mutex_prof.h b/include/jemalloc/internal/mutex_prof.h
index f7301c88..1cc198d6 100644
--- a/include/jemalloc/internal/mutex_prof.h
+++ b/include/jemalloc/internal/mutex_prof.h
@@ -50,7 +50,7 @@ typedef enum {
 	mutex_prof_num_counters
 } mutex_prof_counter_ind_t;
 
-typedef struct mutex_prof_data_s {
+typedef struct {
 	/*
 	 * Counters touched on the slow path, i.e. when there is lock
 	 * contention.  We update them once we have the lock.
diff --git a/include/jemalloc/internal/mutex_types.h b/include/jemalloc/internal/mutex_types.h
index e6589374..5af8d099 100644
--- a/include/jemalloc/internal/mutex_types.h
+++ b/include/jemalloc/internal/mutex_types.h
@@ -1,7 +1,6 @@
 #ifndef JEMALLOC_INTERNAL_MUTEX_TYPES_H
 #define JEMALLOC_INTERNAL_MUTEX_TYPES_H
 
-typedef struct mutex_prof_data_s mutex_prof_data_t;
 typedef struct malloc_mutex_s malloc_mutex_t;
 
 /*

From 81ef365622c52d9252f546061652b0b31513c0b7 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 11 May 2017 16:50:49 -0700
Subject: [PATCH 0857/2608] Avoid compiler warnings on Windows.

---
 include/jemalloc/internal/bitmap.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h
index f6374e14..ac990290 100644
--- a/include/jemalloc/internal/bitmap.h
+++ b/include/jemalloc/internal/bitmap.h
@@ -249,8 +249,8 @@ bitmap_ffu(const bitmap_t *bitmap, const bitmap_info_t *binfo, size_t min_bit) {
 		    1));
 		bitmap_t group = bitmap[binfo->levels[level].group_offset + (bit
 		    >> lg_bits_per_group)];
-		unsigned group_nmask = ((min_bit > bit) ? (min_bit - bit) : 0)
-		    >> (lg_bits_per_group - LG_BITMAP_GROUP_NBITS);
+		unsigned group_nmask = (unsigned)(((min_bit > bit) ? (min_bit -
+		    bit) : 0) >> (lg_bits_per_group - LG_BITMAP_GROUP_NBITS));
 		assert(group_nmask <= BITMAP_GROUP_NBITS);
 		bitmap_t group_mask = ~((1LU << group_nmask) - 1);
 		bitmap_t group_masked = group & group_mask;
@@ -265,7 +265,7 @@ bitmap_ffu(const bitmap_t *bitmap, const bitmap_info_t *binfo, size_t min_bit) {
 			 * next sibling.  This will recurse at most once per
 			 * non-root level.
 			 */
-			size_t sib_base = bit + (1U << lg_bits_per_group);
+			size_t sib_base = bit + (ZU(1) << lg_bits_per_group);
 			assert(sib_base > min_bit);
 			assert(sib_base > bit);
 			if (sib_base >= binfo->nbits) {
@@ -273,8 +273,8 @@ bitmap_ffu(const bitmap_t *bitmap, const bitmap_info_t *binfo, size_t min_bit) {
 			}
 			return bitmap_ffu(bitmap, binfo, sib_base);
 		}
-		bit += (ffs_lu(group_masked) - 1) << (lg_bits_per_group -
-		    LG_BITMAP_GROUP_NBITS);
+		bit += ((size_t)(ffs_lu(group_masked) - 1)) <<
+		    (lg_bits_per_group - LG_BITMAP_GROUP_NBITS);
 	}
 	assert(bit >= min_bit);
 	assert(bit < binfo->nbits);

From 17ddddee10c85dec0765a8329fc466c48c8c0592 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 11 May 2017 13:56:41 -0700
Subject: [PATCH 0858/2608] Specify -Werror for run_tests builds.

---
 scripts/gen_run_tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/gen_run_tests.py b/scripts/gen_run_tests.py
index 9e46ba90..0446c65c 100755
--- a/scripts/gen_run_tests.py
+++ b/scripts/gen_run_tests.py
@@ -33,7 +33,7 @@ for cc, cxx in possible_compilers:
               and '--enable-prof' in config_opts:
                 continue
             config_line = (
-                './configure '
+                'EXTRA_CFLAGS=-Werror EXTRA_CXXFLAGS=-Werror ./configure '
                 + 'CC="{} {}" '.format(cc, " ".join(compiler_opts))
                 + 'CXX="{} {}" '.format(cxx, " ".join(compiler_opts))
                 + " ".join(config_opts)

From 6f58e630b6257c1e64efae952cc0b9c280b92622 Mon Sep 17 00:00:00 2001
From: Arkady Shapkin <arkady.shapkin@gmail.com>
Date: Thu, 11 May 2017 21:57:05 +0300
Subject: [PATCH 0859/2608] Update and rename INSTALL to INSTALL.md

---
 INSTALL => INSTALL.md | 164 +++++++++++++++++++++++++++---------------
 1 file changed, 107 insertions(+), 57 deletions(-)
 rename INSTALL => INSTALL.md (87%)

diff --git a/INSTALL b/INSTALL.md
similarity index 87%
rename from INSTALL
rename to INSTALL.md
index 125cad2b..e0cfc0be 100644
--- a/INSTALL
+++ b/INSTALL.md
@@ -18,16 +18,19 @@ would create a dependency on xsltproc in packaged releases, hence the
 requirement to either run 'make dist' or avoid installing docs via the various
 install_* targets documented below.
 
-=== Advanced configuration =====================================================
+
+## Advanced configuration
 
 The 'configure' script supports numerous options that allow control of which
 functionality is enabled, where jemalloc is installed, etc.  Optionally, pass
 any of the following arguments (not a definitive list) to 'configure':
 
---help
+* `--help`
+
     Print a definitive list of options.
 
---prefix=<install-root-dir>
+* `--prefix=<install-root-dir>`
+
     Set the base directory in which to install.  For example:
 
         ./configure --prefix=/usr/local
@@ -35,7 +38,8 @@ any of the following arguments (not a definitive list) to 'configure':
     will cause files to be installed into /usr/local/include, /usr/local/lib,
     and /usr/local/man.
 
---with-version=(<major>.<minor>.<bugfix>-<nrev>-g<gid>|VERSION)
+* `--with-version=(<major>.<minor>.<bugfix>-<nrev>-g<gid>|VERSION)`
+
     The VERSION file is mandatory for successful configuration, and the
     following steps are taken to assure its presence:
     1) If --with-version=<major>.<minor>.<bugfix>-<nrev>-g<gid> is specified,
@@ -50,11 +54,13 @@ any of the following arguments (not a definitive list) to 'configure':
     VERSION configuration when embedding a jemalloc release into another
     project's git repository.
 
---with-rpath=<colon-separated-rpath>
+* `--with-rpath=<colon-separated-rpath>`
+
     Embed one or more library paths, so that libjemalloc can find the libraries
     it is linked to.  This works only on ELF-based systems.
 
---with-mangling=<map>
+* `--with-mangling=<map>`
+
     Mangle public symbols specified in <map> which is a comma-separated list of
     name:mangled pairs.
 
@@ -67,7 +73,8 @@ any of the following arguments (not a definitive list) to 'configure':
     --with-jemalloc-prefix, and mangled symbols are then ignored when applying
     the prefix.
 
---with-jemalloc-prefix=<prefix>
+* `--with-jemalloc-prefix=<prefix>`
+
     Prefix all public APIs with <prefix>.  For example, if <prefix> is
     "prefix_", API changes like the following occur:
 
@@ -83,39 +90,46 @@ any of the following arguments (not a definitive list) to 'configure':
     jemalloc overlays the default malloc zone, but makes no attempt to actually
     replace the "malloc", "calloc", etc. symbols.
 
---without-export
+* `--without-export`
+
     Don't export public APIs.  This can be useful when building jemalloc as a
     static library, or to avoid exporting public APIs when using the zone
     allocator on OSX.
 
---with-private-namespace=<prefix>
+* `--with-private-namespace=<prefix>`
+
     Prefix all library-private APIs with <prefix>je_.  For shared libraries,
     symbol visibility mechanisms prevent these symbols from being exported, but
     for static libraries, naming collisions are a real possibility.  By
     default, <prefix> is empty, which results in a symbol prefix of je_ .
 
---with-install-suffix=<suffix>
+* `--with-install-suffix=<suffix>`
+
     Append <suffix> to the base name of all installed files, such that multiple
     versions of jemalloc can coexist in the same installation directory.  For
     example, libjemalloc.so.0 becomes libjemalloc<suffix>.so.0.
 
---with-malloc-conf=<malloc_conf>
-    Embed <malloc_conf> as a run-time options string that is processed prior to
+* `--with-malloc-conf=<malloc_conf>`
+
+    Embed `<malloc_conf>` as a run-time options string that is processed prior to
     the malloc_conf global variable, the /etc/malloc.conf symlink, and the
     MALLOC_CONF environment variable.  For example, to change the default decay
     time to 30 seconds:
 
       --with-malloc-conf=decay_time:30
 
---enable-debug
+* `--enable-debug`
+
     Enable assertions and validation code.  This incurs a substantial
     performance hit, but is very useful during application development.
 
---disable-stats
+* `--disable-stats`
+
     Disable statistics gathering functionality.  See the "opt.stats_print"
     option documentation for usage details.
 
---enable-prof
+* `--enable-prof`
+
     Enable heap profiling and leak detection functionality.  See the "opt.prof"
     option documentation for usage details.  When enabled, there are several
     approaches to backtracing, and the configure script chooses the first one
@@ -125,45 +139,55 @@ any of the following arguments (not a definitive list) to 'configure':
     + libgcc         (unless --disable-prof-libgcc)
     + gcc intrinsics (unless --disable-prof-gcc)
 
---enable-prof-libunwind
+* `--enable-prof-libunwind`
+
     Use the libunwind library (http://www.nongnu.org/libunwind/) for stack
     backtracing.
 
---disable-prof-libgcc
+* `--disable-prof-libgcc`
+
     Disable the use of libgcc's backtracing functionality.
 
---disable-prof-gcc
+* `--disable-prof-gcc`
+
     Disable the use of gcc intrinsics for backtracing.
 
---with-static-libunwind=<libunwind.a>
+* `--with-static-libunwind=<libunwind.a>`
+
     Statically link against the specified libunwind.a rather than dynamically
     linking with -lunwind.
 
---disable-fill
+* `--disable-fill`
+
     Disable support for junk/zero filling of memory.  See the "opt.junk" and
     "opt.zero" option documentation for usage details.
 
---disable-zone-allocator
+* `--disable-zone-allocator`
+
     Disable zone allocator for Darwin.  This means jemalloc won't be hooked as
     the default allocator on OSX/iOS.
 
---enable-utrace
+* `--enable-utrace`
+
     Enable utrace(2)-based allocation tracing.  This feature is not broadly
     portable (FreeBSD has it, but Linux and OS X do not).
 
---enable-xmalloc
+* `--enable-xmalloc`
+
     Enable support for optional immediate termination due to out-of-memory
     errors, as is commonly implemented by "xmalloc" wrapper function for malloc.
     See the "opt.xmalloc" option documentation for usage details.
 
---enable-lazy-lock
+* `--enable-lazy-lock`
+
     Enable code that wraps pthread_create() to detect when an application
     switches from single-threaded to multi-threaded mode, so that it can avoid
     mutex locking/unlocking operations while in single-threaded mode.  In
     practice, this feature usually has little impact on performance unless
     thread-specific caching is disabled.
 
---disable-cache-oblivious
+* `--disable-cache-oblivious`
+
     Disable cache-oblivious large allocation alignment for large allocation
     requests with no alignment constraints.  If this feature is disabled, all
     large allocations are page-aligned as an implementation artifact, which can
@@ -172,20 +196,24 @@ any of the following arguments (not a definitive list) to 'configure':
     most extreme case increases physical memory usage for the 16 KiB size class
     to 20 KiB.
 
---disable-syscall
+* `--disable-syscall`
+
     Disable use of syscall(2) rather than {open,read,write,close}(2).  This is
     intended as a workaround for systems that place security limitations on
     syscall(2).
 
---disable-cxx
+* `--disable-cxx`
+
     Disable C++ integration.  This will cause new and delete operator
     implementations to be omitted.
 
---with-xslroot=<path>
+* `--with-xslroot=<path>`
+
     Specify where to find DocBook XSL stylesheets when building the
     documentation.
 
---with-lg-page=<lg-page>
+* `--with-lg-page=<lg-page>`
+
     Specify the base 2 log of the allocator page size, which must in turn be at
     least as large as the system page size.  By default the configure script
     determines the host's page size and sets the allocator page size equal to
@@ -193,23 +221,26 @@ any of the following arguments (not a definitive list) to 'configure':
     system page size may change between configuration and execution, e.g. when
     cross compiling.
 
---with-lg-page-sizes=<lg-page-sizes>
+* `--with-lg-page-sizes=<lg-page-sizes>`
+
     Specify the comma-separated base 2 logs of the page sizes to support.  This
     option may be useful when cross-compiling in combination with
-    --with-lg-page, but its primary use case is for integration with FreeBSD's
+    `--with-lg-page`, but its primary use case is for integration with FreeBSD's
     libc, wherein jemalloc is embedded.
 
---with-lg-hugepage=<lg-hugepage>
+* `--with-lg-hugepage=<lg-hugepage>`
+
     Specify the base 2 log of the system huge page size.  This option is useful
     when cross compiling, or when overriding the default for systems that do
     not explicitly support huge pages.
 
---with-lg-quantum=<lg-quantum>
+* `--with-lg-quantum=<lg-quantum>`
+
     Specify the base 2 log of the minimum allocation alignment.  jemalloc needs
     to know the minimum alignment that meets the following C standard
     requirement (quoted from the April 12, 2011 draft of the C11 standard):
 
-      The pointer returned if the allocation succeeds is suitably aligned so
+    >  The pointer returned if the allocation succeeds is suitably aligned so
       that it may be assigned to a pointer to any type of object with a
       fundamental alignment requirement and then used to access such an object
       or an array of such objects in the space allocated [...]
@@ -220,44 +251,50 @@ any of the following arguments (not a definitive list) to 'configure':
     <lg-quantum>.  On most modern architectures, this mandates 16-byte
     alignment (<lg-quantum>=4), but the glibc developers chose not to meet this
     requirement for performance reasons.  An old discussion can be found at
-    https://sourceware.org/bugzilla/show_bug.cgi?id=206 .  Unlike glibc,
+    <https://sourceware.org/bugzilla/show_bug.cgi?id=206> .  Unlike glibc,
     jemalloc does follow the C standard by default (caveat: jemalloc
     technically cheats for size classes smaller than the quantum), but the fact
     that Linux systems already work around this allocator noncompliance means
     that it is generally safe in practice to let jemalloc's minimum alignment
-    follow glibc's lead.  If you specify --with-lg-quantum=3 during
+    follow glibc's lead.  If you specify `--with-lg-quantum=3` during
     configuration, jemalloc will provide additional size classes that are not
     16-byte-aligned (24, 40, and 56).
 
 The following environment variables (not a definitive list) impact configure's
 behavior:
 
-CFLAGS="?"
-CXXFLAGS="?"
+* `CFLAGS="?"`
+* `CXXFLAGS="?"`
+
     Pass these flags to the C/C++ compiler.  Any flags set by the configure
     script are prepended, which means explicitly set flags generally take
     precedence.  Take care when specifying flags such as -Werror, because
     configure tests may be affected in undesirable ways.
 
-EXTRA_CFLAGS="?"
-EXTRA_CXXFLAGS="?"
+* `EXTRA_CFLAGS="?"`
+* `EXTRA_CXXFLAGS="?"`
+
     Append these flags to CFLAGS/CXXFLAGS, without passing them to the
     compiler(s) during configuration.  This makes it possible to add flags such
     as -Werror, while allowing the configure script to determine what other
     flags are appropriate for the specified configuration.
 
-CPPFLAGS="?"
+* `CPPFLAGS="?"`
+
     Pass these flags to the C preprocessor.  Note that CFLAGS is not passed to
     'cpp' when 'configure' is looking for include files, so you must use
     CPPFLAGS instead if you need to help 'configure' find header files.
 
-LD_LIBRARY_PATH="?"
+* `LD_LIBRARY_PATH="?"`
+
     'ld' uses this colon-separated list to find libraries.
 
-LDFLAGS="?"
+* `LDFLAGS="?"`
+
     Pass these flags when linking.
 
-PATH="?"
+* `PATH="?"`
+
     'configure' uses this to find programs.
 
 In some cases it may be necessary to work around configuration results that do
@@ -269,7 +306,8 @@ e.g.:
 
     echo "je_cv_madv_free=no" > config.cache && ./configure -C
 
-=== Advanced compilation =======================================================
+
+## Advanced compilation
 
 To build only parts of jemalloc, use the following targets:
 
@@ -297,40 +335,51 @@ To clean up build results to varying degrees, use the following make targets:
     distclean
     relclean
 
-=== Advanced installation ======================================================
+
+## Advanced installation
 
 Optionally, define make variables when invoking make, including (not
 exclusively):
 
-INCLUDEDIR="?"
+* `INCLUDEDIR="?"`
+
     Use this as the installation prefix for header files.
 
-LIBDIR="?"
+* `LIBDIR="?"`
+
     Use this as the installation prefix for libraries.
 
-MANDIR="?"
+* `MANDIR="?"`
+
     Use this as the installation prefix for man pages.
 
-DESTDIR="?"
+* `DESTDIR="?"`
+
     Prepend DESTDIR to INCLUDEDIR, LIBDIR, DATADIR, and MANDIR.  This is useful
     when installing to a different path than was specified via --prefix.
 
-CC="?"
+* `CC="?"`
+
     Use this to invoke the C compiler.
 
-CFLAGS="?"
+* `CFLAGS="?"`
+
     Pass these flags to the compiler.
 
-CPPFLAGS="?"
+* `CPPFLAGS="?"`
+
     Pass these flags to the C preprocessor.
 
-LDFLAGS="?"
+* `LDFLAGS="?"`
+
     Pass these flags when linking.
 
-PATH="?"
+* `PATH="?"`
+
     Use this to search for programs used during configuration and building.
 
-=== Development ================================================================
+
+## Development
 
 If you intend to make non-trivial changes to jemalloc, use the 'autogen.sh'
 script rather than 'configure'.  This re-generates 'configure', enables
@@ -347,7 +396,8 @@ directory, issue configuration and build commands:
     ../configure --enable-autogen
     make
 
-=== Documentation ==============================================================
+
+## Documentation
 
 The manual page is generated in both html and roff formats.  Any web browser
 can be used to view the html manual.  The roff manual page can be formatted

From b3b033eefd7892f0bed7fc30f431016660b44918 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 11 May 2017 20:22:48 -0700
Subject: [PATCH 0860/2608] Do not build in parallel on AppVeyor.

The compiler database used by MSVC is increasingly becoming corrupt,
presumably due to concurrency-related corruption, despite the -FS
compiler flag being specified as recommended.
---
 .appveyor.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index 510815dc..9a7d00a9 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -36,7 +36,7 @@ install:
 build_script:
   - bash -c "autoconf"
   - bash -c "./configure $CONFIG_FLAGS"
-  - mingw32-make -j3
+  - mingw32-make
   - file lib/jemalloc.dll
-  - mingw32-make -j3 tests
+  - mingw32-make tests
   - mingw32-make -k check

From a268af50857f0a4d139f26c66d22debbfae7a674 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 1 May 2017 23:10:42 -0700
Subject: [PATCH 0861/2608] Stop depending on JEMALLOC_N() for function
 interception during testing.

Instead, always define function pointers for interceptable functions,
but mark them const unless testing, so that the compiler can optimize
out the pointer dereferences.
---
 include/jemalloc/internal/arena_externs.h     |  8 +-
 .../internal/jemalloc_internal_macros.h       |  7 ++
 include/jemalloc/internal/large_externs.h     | 38 +++++----
 include/jemalloc/internal/nstime.h            | 38 ++++-----
 include/jemalloc/internal/prof_externs.h      | 77 ++++++++++---------
 include/jemalloc/internal/rtree_externs.h     | 14 ++--
 include/jemalloc/internal/witness_externs.h   | 40 ++++------
 src/arena.c                                   | 16 +---
 src/large.c                                   | 30 ++------
 src/nstime.c                                  | 28 ++-----
 src/prof.c                                    | 27 ++-----
 src/rtree.c                                   | 54 +++----------
 src/witness.c                                 | 61 ++++-----------
 13 files changed, 157 insertions(+), 281 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 7d56e44b..410709c6 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -59,12 +59,10 @@ void	arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes);
 void	arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info,
     bool zero);
-#ifdef JEMALLOC_JET
+
 typedef void (arena_dalloc_junk_small_t)(void *, const arena_bin_info_t *);
-extern arena_dalloc_junk_small_t *arena_dalloc_junk_small;
-#else
-void	arena_dalloc_junk_small(void *ptr, const arena_bin_info_t *bin_info);
-#endif
+extern arena_dalloc_junk_small_t *JET_MUTABLE arena_dalloc_junk_small;
+
 void	*arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
     szind_t ind, bool zero);
 void	*arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h
index a1712cf5..4571895e 100644
--- a/include/jemalloc/internal/jemalloc_internal_macros.h
+++ b/include/jemalloc/internal/jemalloc_internal_macros.h
@@ -30,4 +30,11 @@
 #  define restrict
 #endif
 
+/* Various function pointers are statick and immutable except during testing. */
+#ifdef JEMALLOC_JET
+#  define JET_MUTABLE
+#else
+#  define JET_MUTABLE const
+#endif
+
 #endif /* JEMALLOC_INTERNAL_MACROS_H */
diff --git a/include/jemalloc/internal/large_externs.h b/include/jemalloc/internal/large_externs.h
index 2a208c83..3f36282c 100644
--- a/include/jemalloc/internal/large_externs.h
+++ b/include/jemalloc/internal/large_externs.h
@@ -1,28 +1,26 @@
 #ifndef JEMALLOC_INTERNAL_LARGE_EXTERNS_H
 #define JEMALLOC_INTERNAL_LARGE_EXTERNS_H
 
-void	*large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero);
-void	*large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
-    size_t alignment, bool zero);
-bool	large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
+void *large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero);
+void *large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
+    bool zero);
+bool large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
     size_t usize_max, bool zero);
-void	*large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
-    size_t usize, size_t alignment, bool zero, tcache_t *tcache);
-#ifdef JEMALLOC_JET
+void *large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
+    size_t alignment, bool zero, tcache_t *tcache);
+
 typedef void (large_dalloc_junk_t)(void *, size_t);
-extern large_dalloc_junk_t *large_dalloc_junk;
+extern large_dalloc_junk_t *JET_MUTABLE large_dalloc_junk;
+
 typedef void (large_dalloc_maybe_junk_t)(void *, size_t);
-extern large_dalloc_maybe_junk_t *large_dalloc_maybe_junk;
-#else
-void	large_dalloc_junk(void *ptr, size_t size);
-void	large_dalloc_maybe_junk(void *ptr, size_t size);
-#endif
-void	large_dalloc_prep_junked_locked(tsdn_t *tsdn, extent_t *extent);
-void	large_dalloc_finish(tsdn_t *tsdn, extent_t *extent);
-void	large_dalloc(tsdn_t *tsdn, extent_t *extent);
-size_t	large_salloc(tsdn_t *tsdn, const extent_t *extent);
-prof_tctx_t	*large_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent);
-void	large_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, prof_tctx_t *tctx);
-void	large_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent);
+extern large_dalloc_maybe_junk_t *JET_MUTABLE large_dalloc_maybe_junk;
+
+void large_dalloc_prep_junked_locked(tsdn_t *tsdn, extent_t *extent);
+void large_dalloc_finish(tsdn_t *tsdn, extent_t *extent);
+void large_dalloc(tsdn_t *tsdn, extent_t *extent);
+size_t large_salloc(tsdn_t *tsdn, const extent_t *extent);
+prof_tctx_t *large_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent);
+void large_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, prof_tctx_t *tctx);
+void large_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent);
 
 #endif /* JEMALLOC_INTERNAL_LARGE_EXTERNS_H */
diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h
index cfccca09..ad7efb89 100644
--- a/include/jemalloc/internal/nstime.h
+++ b/include/jemalloc/internal/nstime.h
@@ -10,27 +10,23 @@ typedef struct {
 } nstime_t;
 
 void nstime_init(nstime_t *time, uint64_t ns);
-void	nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec);
-uint64_t	nstime_ns(const nstime_t *time);
-uint64_t	nstime_sec(const nstime_t *time);
-uint64_t	nstime_msec(const nstime_t *time);
-uint64_t	nstime_nsec(const nstime_t *time);
-void	nstime_copy(nstime_t *time, const nstime_t *source);
-int	nstime_compare(const nstime_t *a, const nstime_t *b);
-void	nstime_add(nstime_t *time, const nstime_t *addend);
-void	nstime_subtract(nstime_t *time, const nstime_t *subtrahend);
-void	nstime_imultiply(nstime_t *time, uint64_t multiplier);
-void	nstime_idivide(nstime_t *time, uint64_t divisor);
-uint64_t	nstime_divide(const nstime_t *time, const nstime_t *divisor);
-#ifdef JEMALLOC_JET
-typedef bool (nstime_monotonic_t)(void);
-extern nstime_monotonic_t *nstime_monotonic;
-typedef bool (nstime_update_t)(nstime_t *);
-extern nstime_update_t *nstime_update;
-#else
-bool	nstime_monotonic(void);
-bool	nstime_update(nstime_t *time);
-#endif
+void nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec);
+uint64_t nstime_ns(const nstime_t *time);
+uint64_t nstime_sec(const nstime_t *time);
+uint64_t nstime_msec(const nstime_t *time);
+uint64_t nstime_nsec(const nstime_t *time);
+void nstime_copy(nstime_t *time, const nstime_t *source);
+int nstime_compare(const nstime_t *a, const nstime_t *b);
+void nstime_add(nstime_t *time, const nstime_t *addend);
+void nstime_subtract(nstime_t *time, const nstime_t *subtrahend);
+void nstime_imultiply(nstime_t *time, uint64_t multiplier);
+void nstime_idivide(nstime_t *time, uint64_t divisor);
+uint64_t nstime_divide(const nstime_t *time, const nstime_t *divisor);
 
+typedef bool (nstime_monotonic_t)(void);
+extern nstime_monotonic_t *JET_MUTABLE nstime_monotonic;
+
+typedef bool (nstime_update_t)(nstime_t *);
+extern nstime_update_t *JET_MUTABLE nstime_update;
 
 #endif /* JEMALLOC_INTERNAL_NSTIME_H */
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index cbd9795b..2891b8bd 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -40,48 +40,51 @@ extern uint64_t	prof_interval;
  */
 extern size_t	lg_prof_sample;
 
-void	prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
-void	prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
+void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
+void prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
     prof_tctx_t *tctx);
-void	prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx);
-void	bt_init(prof_bt_t *bt, void **vec);
-void	prof_backtrace(prof_bt_t *bt);
-prof_tctx_t	*prof_lookup(tsd_t *tsd, prof_bt_t *bt);
+void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx);
+void bt_init(prof_bt_t *bt, void **vec);
+void prof_backtrace(prof_bt_t *bt);
+prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt);
 #ifdef JEMALLOC_JET
-size_t	prof_tdata_count(void);
-size_t	prof_bt_count(void);
+size_t prof_tdata_count(void);
+size_t prof_bt_count(void);
+#endif
 typedef int (prof_dump_open_t)(bool, const char *);
-extern prof_dump_open_t *prof_dump_open;
+extern prof_dump_open_t *JET_MUTABLE prof_dump_open;
+
 typedef bool (prof_dump_header_t)(tsdn_t *, bool, const prof_cnt_t *);
-extern prof_dump_header_t *prof_dump_header;
-void	prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes,
-    uint64_t *accumobjs, uint64_t *accumbytes);
+extern prof_dump_header_t *JET_MUTABLE prof_dump_header;
+#ifdef JEMALLOC_JET
+void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
+    uint64_t *accumbytes);
 #endif
 bool prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum);
-void	prof_idump(tsdn_t *tsdn);
-bool	prof_mdump(tsd_t *tsd, const char *filename);
-void	prof_gdump(tsdn_t *tsdn);
-prof_tdata_t	*prof_tdata_init(tsd_t *tsd);
-prof_tdata_t	*prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
-void	prof_reset(tsd_t *tsd, size_t lg_sample);
-void	prof_tdata_cleanup(tsd_t *tsd);
-bool	prof_active_get(tsdn_t *tsdn);
-bool	prof_active_set(tsdn_t *tsdn, bool active);
-const char	*prof_thread_name_get(tsd_t *tsd);
-int	prof_thread_name_set(tsd_t *tsd, const char *thread_name);
-bool	prof_thread_active_get(tsd_t *tsd);
-bool	prof_thread_active_set(tsd_t *tsd, bool active);
-bool	prof_thread_active_init_get(tsdn_t *tsdn);
-bool	prof_thread_active_init_set(tsdn_t *tsdn, bool active_init);
-bool	prof_gdump_get(tsdn_t *tsdn);
-bool	prof_gdump_set(tsdn_t *tsdn, bool active);
-void	prof_boot0(void);
-void	prof_boot1(void);
-bool	prof_boot2(tsd_t *tsd);
-void	prof_prefork0(tsdn_t *tsdn);
-void	prof_prefork1(tsdn_t *tsdn);
-void	prof_postfork_parent(tsdn_t *tsdn);
-void	prof_postfork_child(tsdn_t *tsdn);
-void	prof_sample_threshold_update(prof_tdata_t *tdata);
+void prof_idump(tsdn_t *tsdn);
+bool prof_mdump(tsd_t *tsd, const char *filename);
+void prof_gdump(tsdn_t *tsdn);
+prof_tdata_t *prof_tdata_init(tsd_t *tsd);
+prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
+void prof_reset(tsd_t *tsd, size_t lg_sample);
+void prof_tdata_cleanup(tsd_t *tsd);
+bool prof_active_get(tsdn_t *tsdn);
+bool prof_active_set(tsdn_t *tsdn, bool active);
+const char *prof_thread_name_get(tsd_t *tsd);
+int prof_thread_name_set(tsd_t *tsd, const char *thread_name);
+bool prof_thread_active_get(tsd_t *tsd);
+bool prof_thread_active_set(tsd_t *tsd, bool active);
+bool prof_thread_active_init_get(tsdn_t *tsdn);
+bool prof_thread_active_init_set(tsdn_t *tsdn, bool active_init);
+bool prof_gdump_get(tsdn_t *tsdn);
+bool prof_gdump_set(tsdn_t *tsdn, bool active);
+void prof_boot0(void);
+void prof_boot1(void);
+bool prof_boot2(tsd_t *tsd);
+void prof_prefork0(tsdn_t *tsdn);
+void prof_prefork1(tsdn_t *tsdn);
+void prof_postfork_parent(tsdn_t *tsdn);
+void prof_postfork_child(tsdn_t *tsdn);
+void prof_sample_threshold_update(prof_tdata_t *tdata);
 
 #endif /* JEMALLOC_INTERNAL_PROF_EXTERNS_H */
diff --git a/include/jemalloc/internal/rtree_externs.h b/include/jemalloc/internal/rtree_externs.h
index 5145c12c..5742f589 100644
--- a/include/jemalloc/internal/rtree_externs.h
+++ b/include/jemalloc/internal/rtree_externs.h
@@ -24,15 +24,19 @@ static const rtree_level_t rtree_levels[] = {
 };
 
 bool rtree_new(rtree_t *rtree, bool zeroed);
-#ifdef JEMALLOC_JET
+
 typedef rtree_node_elm_t *(rtree_node_alloc_t)(tsdn_t *, rtree_t *, size_t);
-extern rtree_node_alloc_t *rtree_node_alloc;
+extern rtree_node_alloc_t *JET_MUTABLE rtree_node_alloc;
+
 typedef rtree_leaf_elm_t *(rtree_leaf_alloc_t)(tsdn_t *, rtree_t *, size_t);
-extern rtree_leaf_alloc_t *rtree_leaf_alloc;
+extern rtree_leaf_alloc_t *JET_MUTABLE rtree_leaf_alloc;
+
 typedef void (rtree_node_dalloc_t)(tsdn_t *, rtree_t *, rtree_node_elm_t *);
-extern rtree_node_dalloc_t *rtree_node_dalloc;
+extern rtree_node_dalloc_t *JET_MUTABLE rtree_node_dalloc;
+
 typedef void (rtree_leaf_dalloc_t)(tsdn_t *, rtree_t *, rtree_leaf_elm_t *);
-extern rtree_leaf_dalloc_t *rtree_leaf_dalloc;
+extern rtree_leaf_dalloc_t *JET_MUTABLE rtree_leaf_dalloc;
+#ifdef JEMALLOC_JET
 void rtree_delete(tsdn_t *tsdn, rtree_t *rtree);
 #endif
 rtree_leaf_elm_t *rtree_leaf_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree,
diff --git a/include/jemalloc/internal/witness_externs.h b/include/jemalloc/internal/witness_externs.h
index 5d91fde2..99df4c50 100644
--- a/include/jemalloc/internal/witness_externs.h
+++ b/include/jemalloc/internal/witness_externs.h
@@ -1,39 +1,25 @@
 #ifndef JEMALLOC_INTERNAL_WITNESS_EXTERNS_H
 #define JEMALLOC_INTERNAL_WITNESS_EXTERNS_H
 
-void	witness_init(witness_t *witness, const char *name, witness_rank_t rank,
+void witness_init(witness_t *witness, const char *name, witness_rank_t rank,
     witness_comp_t *comp, void *opaque);
-#ifdef JEMALLOC_JET
+
 typedef void (witness_lock_error_t)(const witness_list_t *, const witness_t *);
-extern witness_lock_error_t *witness_lock_error;
-#else
-void	witness_lock_error(const witness_list_t *witnesses,
-    const witness_t *witness);
-#endif
-#ifdef JEMALLOC_JET
+extern witness_lock_error_t *JET_MUTABLE witness_lock_error;
+
 typedef void (witness_owner_error_t)(const witness_t *);
-extern witness_owner_error_t *witness_owner_error;
-#else
-void	witness_owner_error(const witness_t *witness);
-#endif
-#ifdef JEMALLOC_JET
+extern witness_owner_error_t *JET_MUTABLE witness_owner_error;
+
 typedef void (witness_not_owner_error_t)(const witness_t *);
-extern witness_not_owner_error_t *witness_not_owner_error;
-#else
-void	witness_not_owner_error(const witness_t *witness);
-#endif
-#ifdef JEMALLOC_JET
+extern witness_not_owner_error_t *JET_MUTABLE witness_not_owner_error;
+
 typedef void (witness_depth_error_t)(const witness_list_t *,
     witness_rank_t rank_inclusive, unsigned depth);
-extern witness_depth_error_t *witness_depth_error;
-#else
-void	witness_depth_error(const witness_list_t *witnesses,
-    witness_rank_t rank_inclusive, unsigned depth);
-#endif
+extern witness_depth_error_t *JET_MUTABLE witness_depth_error;
 
-void	witnesses_cleanup(tsd_t *tsd);
-void	witness_prefork(tsd_t *tsd);
-void	witness_postfork_parent(tsd_t *tsd);
-void	witness_postfork_child(tsd_t *tsd);
+void witnesses_cleanup(tsd_t *tsd);
+void witness_prefork(tsd_t *tsd);
+void witness_postfork_parent(tsd_t *tsd);
+void witness_postfork_child(tsd_t *tsd);
 
 #endif /* JEMALLOC_INTERNAL_WITNESS_EXTERNS_H */
diff --git a/src/arena.c b/src/arena.c
index edbd875f..045e6127 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1407,20 +1407,12 @@ arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info, bool zero) {
 	}
 }
 
-#ifdef JEMALLOC_JET
-#undef arena_dalloc_junk_small
-#define arena_dalloc_junk_small JEMALLOC_N(n_arena_dalloc_junk_small)
-#endif
-void
-arena_dalloc_junk_small(void *ptr, const arena_bin_info_t *bin_info) {
+static void
+arena_dalloc_junk_small_impl(void *ptr, const arena_bin_info_t *bin_info) {
 	memset(ptr, JEMALLOC_FREE_JUNK, bin_info->reg_size);
 }
-#ifdef JEMALLOC_JET
-#undef arena_dalloc_junk_small
-#define arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small)
-arena_dalloc_junk_small_t *arena_dalloc_junk_small =
-    JEMALLOC_N(n_arena_dalloc_junk_small);
-#endif
+arena_dalloc_junk_small_t *JET_MUTABLE arena_dalloc_junk_small =
+    arena_dalloc_junk_small_impl;
 
 static void *
 arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
diff --git a/src/large.c b/src/large.c
index f657ccbe..ed73dc22 100644
--- a/src/large.c
+++ b/src/large.c
@@ -68,26 +68,14 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	return extent_addr_get(extent);
 }
 
-#ifdef JEMALLOC_JET
-#undef large_dalloc_junk
-#define large_dalloc_junk JEMALLOC_N(n_large_dalloc_junk)
-#endif
-void
-large_dalloc_junk(void *ptr, size_t size) {
+static void
+large_dalloc_junk_impl(void *ptr, size_t size) {
 	memset(ptr, JEMALLOC_FREE_JUNK, size);
 }
-#ifdef JEMALLOC_JET
-#undef large_dalloc_junk
-#define large_dalloc_junk JEMALLOC_N(large_dalloc_junk)
-large_dalloc_junk_t *large_dalloc_junk = JEMALLOC_N(n_large_dalloc_junk);
-#endif
+large_dalloc_junk_t *JET_MUTABLE large_dalloc_junk = large_dalloc_junk_impl;
 
-#ifdef JEMALLOC_JET
-#undef large_dalloc_maybe_junk
-#define large_dalloc_maybe_junk JEMALLOC_N(n_large_dalloc_maybe_junk)
-#endif
-void
-large_dalloc_maybe_junk(void *ptr, size_t size) {
+static void
+large_dalloc_maybe_junk_impl(void *ptr, size_t size) {
 	if (config_fill && have_dss && unlikely(opt_junk_free)) {
 		/*
 		 * Only bother junk filling if the extent isn't about to be
@@ -98,12 +86,8 @@ large_dalloc_maybe_junk(void *ptr, size_t size) {
 		}
 	}
 }
-#ifdef JEMALLOC_JET
-#undef large_dalloc_maybe_junk
-#define large_dalloc_maybe_junk JEMALLOC_N(large_dalloc_maybe_junk)
-large_dalloc_maybe_junk_t *large_dalloc_maybe_junk =
-    JEMALLOC_N(n_large_dalloc_maybe_junk);
-#endif
+large_dalloc_maybe_junk_t *JET_MUTABLE large_dalloc_maybe_junk =
+    large_dalloc_maybe_junk_impl;
 
 static bool
 large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize) {
diff --git a/src/nstime.c b/src/nstime.c
index 20c00422..e5412274 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -131,27 +131,15 @@ nstime_get(nstime_t *time) {
 }
 #endif
 
-#ifdef JEMALLOC_JET
-#undef nstime_monotonic
-#define nstime_monotonic JEMALLOC_N(n_nstime_monotonic)
-#endif
-bool
-nstime_monotonic(void) {
+static bool
+nstime_monotonic_impl(void) {
 	return NSTIME_MONOTONIC;
 #undef NSTIME_MONOTONIC
 }
-#ifdef JEMALLOC_JET
-#undef nstime_monotonic
-#define nstime_monotonic JEMALLOC_N(nstime_monotonic)
-nstime_monotonic_t *nstime_monotonic = JEMALLOC_N(n_nstime_monotonic);
-#endif
+nstime_monotonic_t *JET_MUTABLE nstime_monotonic = nstime_monotonic_impl;
 
-#ifdef JEMALLOC_JET
-#undef nstime_update
-#define nstime_update JEMALLOC_N(n_nstime_update)
-#endif
-bool
-nstime_update(nstime_t *time) {
+static bool
+nstime_update_impl(nstime_t *time) {
 	nstime_t old_time;
 
 	nstime_copy(&old_time, time);
@@ -165,8 +153,4 @@ nstime_update(nstime_t *time) {
 
 	return false;
 }
-#ifdef JEMALLOC_JET
-#undef nstime_update
-#define nstime_update JEMALLOC_N(nstime_update)
-nstime_update_t *nstime_update = JEMALLOC_N(n_nstime_update);
-#endif
+nstime_update_t *JET_MUTABLE nstime_update = nstime_update_impl;
diff --git a/src/prof.c b/src/prof.c
index d60680c1..470d926f 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -932,9 +932,7 @@ prof_tdata_count(void) {
 
 	return tdata_count;
 }
-#endif
 
-#ifdef JEMALLOC_JET
 size_t
 prof_bt_count(void) {
 	size_t bt_count;
@@ -955,12 +953,8 @@ prof_bt_count(void) {
 }
 #endif
 
-#ifdef JEMALLOC_JET
-#undef prof_dump_open
-#define prof_dump_open JEMALLOC_N(prof_dump_open_impl)
-#endif
 static int
-prof_dump_open(bool propagate_err, const char *filename) {
+prof_dump_open_impl(bool propagate_err, const char *filename) {
 	int fd;
 
 	fd = creat(filename, 0644);
@@ -974,11 +968,7 @@ prof_dump_open(bool propagate_err, const char *filename) {
 
 	return fd;
 }
-#ifdef JEMALLOC_JET
-#undef prof_dump_open
-#define prof_dump_open JEMALLOC_N(prof_dump_open)
-prof_dump_open_t *prof_dump_open = JEMALLOC_N(prof_dump_open_impl);
-#endif
+prof_dump_open_t *JET_MUTABLE prof_dump_open = prof_dump_open_impl;
 
 static bool
 prof_dump_flush(bool propagate_err) {
@@ -1331,12 +1321,9 @@ prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
 	return NULL;
 }
 
-#ifdef JEMALLOC_JET
-#undef prof_dump_header
-#define prof_dump_header JEMALLOC_N(prof_dump_header_impl)
-#endif
 static bool
-prof_dump_header(tsdn_t *tsdn, bool propagate_err, const prof_cnt_t *cnt_all) {
+prof_dump_header_impl(tsdn_t *tsdn, bool propagate_err,
+    const prof_cnt_t *cnt_all) {
 	bool ret;
 
 	if (prof_dump_printf(propagate_err,
@@ -1353,11 +1340,7 @@ prof_dump_header(tsdn_t *tsdn, bool propagate_err, const prof_cnt_t *cnt_all) {
 	malloc_mutex_unlock(tsdn, &tdatas_mtx);
 	return ret;
 }
-#ifdef JEMALLOC_JET
-#undef prof_dump_header
-#define prof_dump_header JEMALLOC_N(prof_dump_header)
-prof_dump_header_t *prof_dump_header = JEMALLOC_N(prof_dump_header_impl);
-#endif
+prof_dump_header_t *JET_MUTABLE prof_dump_header = prof_dump_header_impl;
 
 static bool
 prof_dump_gctx(tsdn_t *tsdn, bool propagate_err, prof_gctx_t *gctx,
diff --git a/src/rtree.c b/src/rtree.c
index 72786ff5..62df0143 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -25,65 +25,35 @@ rtree_new(rtree_t *rtree, bool zeroed) {
 	return false;
 }
 
-#ifdef JEMALLOC_JET
-#undef rtree_node_alloc
-#define rtree_node_alloc JEMALLOC_N(rtree_node_alloc_impl)
-#endif
 static rtree_node_elm_t *
-rtree_node_alloc(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
+rtree_node_alloc_impl(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
 	return (rtree_node_elm_t *)base_alloc(tsdn, b0get(), nelms *
 	    sizeof(rtree_node_elm_t), CACHELINE);
 }
-#ifdef JEMALLOC_JET
-#undef rtree_node_alloc
-#define rtree_node_alloc JEMALLOC_N(rtree_node_alloc)
-rtree_node_alloc_t *rtree_node_alloc = JEMALLOC_N(rtree_node_alloc_impl);
-#endif
+rtree_node_alloc_t *JET_MUTABLE rtree_node_alloc = rtree_node_alloc_impl;
 
-#ifdef JEMALLOC_JET
-#undef rtree_node_dalloc
-#define rtree_node_dalloc JEMALLOC_N(rtree_node_dalloc_impl)
-#endif
-UNUSED static void
-rtree_node_dalloc(tsdn_t *tsdn, rtree_t *rtree, rtree_node_elm_t *node) {
+static void
+rtree_node_dalloc_impl(tsdn_t *tsdn, rtree_t *rtree, rtree_node_elm_t *node) {
 	/* Nodes are never deleted during normal operation. */
 	not_reached();
 }
-#ifdef JEMALLOC_JET
-#undef rtree_node_dalloc
-#define rtree_node_dalloc JEMALLOC_N(rtree_node_dalloc)
-rtree_node_dalloc_t *rtree_node_dalloc = JEMALLOC_N(rtree_node_dalloc_impl);
-#endif
+UNUSED rtree_node_dalloc_t *JET_MUTABLE rtree_node_dalloc =
+    rtree_node_dalloc_impl;
 
-#ifdef JEMALLOC_JET
-#undef rtree_leaf_alloc
-#define rtree_leaf_alloc JEMALLOC_N(rtree_leaf_alloc_impl)
-#endif
 static rtree_leaf_elm_t *
-rtree_leaf_alloc(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
+rtree_leaf_alloc_impl(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
 	return (rtree_leaf_elm_t *)base_alloc(tsdn, b0get(), nelms *
 	    sizeof(rtree_leaf_elm_t), CACHELINE);
 }
-#ifdef JEMALLOC_JET
-#undef rtree_leaf_alloc
-#define rtree_leaf_alloc JEMALLOC_N(rtree_leaf_alloc)
-rtree_leaf_alloc_t *rtree_leaf_alloc = JEMALLOC_N(rtree_leaf_alloc_impl);
-#endif
+rtree_leaf_alloc_t *JET_MUTABLE rtree_leaf_alloc = rtree_leaf_alloc_impl;
 
-#ifdef JEMALLOC_JET
-#undef rtree_leaf_dalloc
-#define rtree_leaf_dalloc JEMALLOC_N(rtree_leaf_dalloc_impl)
-#endif
-UNUSED static void
-rtree_leaf_dalloc(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *leaf) {
+static void
+rtree_leaf_dalloc_impl(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *leaf) {
 	/* Leaves are never deleted during normal operation. */
 	not_reached();
 }
-#ifdef JEMALLOC_JET
-#undef rtree_leaf_dalloc
-#define rtree_leaf_dalloc JEMALLOC_N(rtree_leaf_dalloc)
-rtree_leaf_dalloc_t *rtree_leaf_dalloc = JEMALLOC_N(rtree_leaf_dalloc_impl);
-#endif
+UNUSED rtree_leaf_dalloc_t *JET_MUTABLE rtree_leaf_dalloc =
+    rtree_leaf_dalloc_impl;
 
 #ifdef JEMALLOC_JET
 #  if RTREE_HEIGHT > 1
diff --git a/src/witness.c b/src/witness.c
index edb736bf..0e910dca 100644
--- a/src/witness.c
+++ b/src/witness.c
@@ -14,12 +14,9 @@ witness_init(witness_t *witness, const char *name, witness_rank_t rank,
 	witness->opaque = opaque;
 }
 
-#ifdef JEMALLOC_JET
-#undef witness_lock_error
-#define witness_lock_error JEMALLOC_N(n_witness_lock_error)
-#endif
-void
-witness_lock_error(const witness_list_t *witnesses, const witness_t *witness) {
+static void
+witness_lock_error_impl(const witness_list_t *witnesses,
+    const witness_t *witness) {
 	witness_t *w;
 
 	malloc_printf("<jemalloc>: Lock rank order reversal:");
@@ -29,51 +26,28 @@ witness_lock_error(const witness_list_t *witnesses, const witness_t *witness) {
 	malloc_printf(" %s(%u)\n", witness->name, witness->rank);
 	abort();
 }
-#ifdef JEMALLOC_JET
-#undef witness_lock_error
-#define witness_lock_error JEMALLOC_N(witness_lock_error)
-witness_lock_error_t *witness_lock_error = JEMALLOC_N(n_witness_lock_error);
-#endif
+witness_lock_error_t *JET_MUTABLE witness_lock_error = witness_lock_error_impl;
 
-#ifdef JEMALLOC_JET
-#undef witness_owner_error
-#define witness_owner_error JEMALLOC_N(n_witness_owner_error)
-#endif
-void
-witness_owner_error(const witness_t *witness) {
+static void
+witness_owner_error_impl(const witness_t *witness) {
 	malloc_printf("<jemalloc>: Should own %s(%u)\n", witness->name,
 	    witness->rank);
 	abort();
 }
-#ifdef JEMALLOC_JET
-#undef witness_owner_error
-#define witness_owner_error JEMALLOC_N(witness_owner_error)
-witness_owner_error_t *witness_owner_error = JEMALLOC_N(n_witness_owner_error);
-#endif
+witness_owner_error_t *JET_MUTABLE witness_owner_error =
+    witness_owner_error_impl;
 
-#ifdef JEMALLOC_JET
-#undef witness_not_owner_error
-#define witness_not_owner_error JEMALLOC_N(n_witness_not_owner_error)
-#endif
-void
-witness_not_owner_error(const witness_t *witness) {
+static void
+witness_not_owner_error_impl(const witness_t *witness) {
 	malloc_printf("<jemalloc>: Should not own %s(%u)\n", witness->name,
 	    witness->rank);
 	abort();
 }
-#ifdef JEMALLOC_JET
-#undef witness_not_owner_error
-#define witness_not_owner_error JEMALLOC_N(witness_not_owner_error)
-witness_not_owner_error_t *witness_not_owner_error =
-    JEMALLOC_N(n_witness_not_owner_error);
-#endif
+witness_not_owner_error_t *JET_MUTABLE witness_not_owner_error =
+    witness_not_owner_error_impl;
 
-#ifdef JEMALLOC_JET
-#undef witness_depth_error
-#define witness_depth_error JEMALLOC_N(n_witness_depth_error)
-#endif
-void
-witness_depth_error(const witness_list_t *witnesses,
+static void
+witness_depth_error_impl(const witness_list_t *witnesses,
     witness_rank_t rank_inclusive, unsigned depth) {
 	witness_t *w;
 
@@ -85,11 +59,8 @@ witness_depth_error(const witness_list_t *witnesses,
 	malloc_printf("\n");
 	abort();
 }
-#ifdef JEMALLOC_JET
-#undef witness_depth_error
-#define witness_depth_error JEMALLOC_N(witness_depth_error)
-witness_depth_error_t *witness_depth_error = JEMALLOC_N(n_witness_depth_error);
-#endif
+witness_depth_error_t *JET_MUTABLE witness_depth_error =
+    witness_depth_error_impl;
 
 void
 witnesses_cleanup(tsd_t *tsd) {

From a4ae9707daee680a6fa0575646849fa8ef0bfad1 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 3 May 2017 11:07:00 -0700
Subject: [PATCH 0862/2608] Remove unused private_unnamespace infrastructure.

---
 .gitignore                                       | 1 -
 configure.ac                                     | 9 ---------
 include/jemalloc/internal/private_unnamespace.sh | 5 -----
 3 files changed, 15 deletions(-)
 delete mode 100755 include/jemalloc/internal/private_unnamespace.sh

diff --git a/.gitignore b/.gitignore
index 9bbc5d66..df2fb21a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,7 +24,6 @@
 /include/jemalloc/internal/jemalloc_preamble.h
 /include/jemalloc/internal/jemalloc_internal_defs.h
 /include/jemalloc/internal/private_namespace.h
-/include/jemalloc/internal/private_unnamespace.h
 /include/jemalloc/internal/public_namespace.h
 /include/jemalloc/internal/public_symbols.txt
 /include/jemalloc/internal/public_unnamespace.h
diff --git a/configure.ac b/configure.ac
index 7ffdbea8..ffbe11a4 100644
--- a/configure.ac
+++ b/configure.ac
@@ -886,7 +886,6 @@ cfgoutputs_tup="${cfgoutputs_tup} test/include/test/jemalloc_test.h:test/include
 cfghdrs_in="include/jemalloc/jemalloc_defs.h.in"
 cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/jemalloc_internal_defs.h.in"
 cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/private_namespace.sh"
-cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/private_unnamespace.sh"
 cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/private_symbols.txt"
 cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/public_namespace.sh"
 cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/public_unnamespace.sh"
@@ -899,7 +898,6 @@ cfghdrs_in="${cfghdrs_in} test/include/test/jemalloc_test_defs.h.in"
 cfghdrs_out="include/jemalloc/jemalloc_defs.h"
 cfghdrs_out="${cfghdrs_out} include/jemalloc/jemalloc${install_suffix}.h"
 cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/private_namespace.h"
-cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/private_unnamespace.h"
 cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/public_symbols.txt"
 cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/public_namespace.h"
 cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/public_unnamespace.h"
@@ -1913,13 +1911,6 @@ AC_CONFIG_COMMANDS([include/jemalloc/internal/private_namespace.h], [
   srcdir="${srcdir}"
   objroot="${objroot}"
 ])
-AC_CONFIG_COMMANDS([include/jemalloc/internal/private_unnamespace.h], [
-  mkdir -p "${objroot}include/jemalloc/internal"
-  "${srcdir}/include/jemalloc/internal/private_unnamespace.sh" "${srcdir}/include/jemalloc/internal/private_symbols.txt" > "${objroot}include/jemalloc/internal/private_unnamespace.h"
-], [
-  srcdir="${srcdir}"
-  objroot="${objroot}"
-])
 AC_CONFIG_COMMANDS([include/jemalloc/internal/public_symbols.txt], [
   f="${objroot}include/jemalloc/internal/public_symbols.txt"
   mkdir -p "${objroot}include/jemalloc/internal"
diff --git a/include/jemalloc/internal/private_unnamespace.sh b/include/jemalloc/internal/private_unnamespace.sh
deleted file mode 100755
index 23fed8e8..00000000
--- a/include/jemalloc/internal/private_unnamespace.sh
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/sh
-
-for symbol in `cat $1` ; do
-  echo "#undef ${symbol}"
-done

From 909f0482e479c1914a1bd528bf7ade702ed6415c Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 4 May 2017 11:20:43 -0700
Subject: [PATCH 0863/2608] Automatically generate private symbol name mangling
 macros.

Rather than using a manually maintained list of internal symbols to
drive name mangling, add a compilation phase to automatically extract
the list of internal symbols.

This resolves #677.
---
 .gitignore                                    |  18 +-
 Makefile.in                                   |  53 +-
 configure.ac                                  | 110 +++-
 .../internal/jemalloc_internal_defs.h.in      |  12 +
 .../jemalloc/internal/jemalloc_preamble.h.in  |   8 +-
 .../jemalloc/internal/private_namespace.sh    |   2 +-
 include/jemalloc/internal/private_symbols.sh  |  51 ++
 include/jemalloc/internal/private_symbols.txt | 609 ------------------
 src/jemalloc.c                                |  47 +-
 9 files changed, 239 insertions(+), 671 deletions(-)
 create mode 100755 include/jemalloc/internal/private_symbols.sh
 delete mode 100644 include/jemalloc/internal/private_symbols.txt

diff --git a/.gitignore b/.gitignore
index df2fb21a..216d3c9d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,3 @@
-/*.gcov.*
-
 /bin/jemalloc-config
 /bin/jemalloc.sh
 /bin/jeprof
@@ -24,6 +22,9 @@
 /include/jemalloc/internal/jemalloc_preamble.h
 /include/jemalloc/internal/jemalloc_internal_defs.h
 /include/jemalloc/internal/private_namespace.h
+/include/jemalloc/internal/private_namespace_jet.h
+/include/jemalloc/internal/private_symbols.awk
+/include/jemalloc/internal/private_symbols_jet.awk
 /include/jemalloc/internal/public_namespace.h
 /include/jemalloc/internal/public_symbols.txt
 /include/jemalloc/internal/public_unnamespace.h
@@ -39,8 +40,7 @@
 /include/jemalloc/jemalloc_typedefs.h
 
 /src/*.[od]
-/src/*.gcda
-/src/*.gcno
+/src/*.sym
 
 /test/test.sh
 test/include/test/jemalloc_test.h
@@ -49,33 +49,23 @@ test/include/test/jemalloc_test_defs.h
 /test/integration/[A-Za-z]*
 !/test/integration/[A-Za-z]*.*
 /test/integration/*.[od]
-/test/integration/*.gcda
-/test/integration/*.gcno
 /test/integration/*.out
 
 /test/integration/cpp/[A-Za-z]*
 !/test/integration/cpp/[A-Za-z]*.*
 /test/integration/cpp/*.[od]
-/test/integration/cpp/*.gcda
-/test/integration/cpp/*.gcno
 /test/integration/cpp/*.out
 
 /test/src/*.[od]
-/test/src/*.gcda
-/test/src/*.gcno
 
 /test/stress/[A-Za-z]*
 !/test/stress/[A-Za-z]*.*
 /test/stress/*.[od]
-/test/stress/*.gcda
-/test/stress/*.gcno
 /test/stress/*.out
 
 /test/unit/[A-Za-z]*
 !/test/unit/[A-Za-z]*.*
 /test/unit/*.[od]
-/test/unit/*.gcda
-/test/unit/*.gcno
 /test/unit/*.out
 
 /VERSION
diff --git a/Makefile.in b/Makefile.in
index 418e92b4..94d8021f 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -68,6 +68,8 @@ TEST_LD_MODE = @TEST_LD_MODE@
 MKLIB = @MKLIB@
 AR = @AR@
 ARFLAGS = @ARFLAGS@
+DUMP_SYMS = @DUMP_SYMS@
+AWK := @AWK@
 CC_MM = @CC_MM@
 LM := @LM@
 INSTALL = @INSTALL@
@@ -224,10 +226,15 @@ TESTS_STRESS := $(srcroot)test/stress/microbench.c
 
 TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_INTEGRATION_CPP) $(TESTS_STRESS)
 
+PRIVATE_NAMESPACE_HDRS := $(objroot)include/jemalloc/internal/private_namespace.h $(objroot)include/jemalloc/internal/private_namespace_jet.h
+C_SYM_OBJS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.sym.$(O))
+C_SYMS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.sym)
 C_OBJS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.$(O))
 CPP_OBJS := $(CPP_SRCS:$(srcroot)%.cpp=$(objroot)%.$(O))
 C_PIC_OBJS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.pic.$(O))
 CPP_PIC_OBJS := $(CPP_SRCS:$(srcroot)%.cpp=$(objroot)%.pic.$(O))
+C_JET_SYM_OBJS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.jet.sym.$(O))
+C_JET_SYMS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.jet.sym)
 C_JET_OBJS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.jet.$(O))
 C_TESTLIB_UNIT_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.unit.$(O))
 C_TESTLIB_INTEGRATION_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.integration.$(O))
@@ -268,24 +275,32 @@ build_doc: $(DOCS)
 # Include generated dependency files.
 #
 ifdef CC_MM
+-include $(C_SYM_OBJS:%.$(O)=%.d)
 -include $(C_OBJS:%.$(O)=%.d)
 -include $(CPP_OBJS:%.$(O)=%.d)
 -include $(C_PIC_OBJS:%.$(O)=%.d)
 -include $(CPP_PIC_OBJS:%.$(O)=%.d)
+-include $(C_JET_SYM_OBJS:%.$(O)=%.d)
 -include $(C_JET_OBJS:%.$(O)=%.d)
 -include $(C_TESTLIB_OBJS:%.$(O)=%.d)
 -include $(TESTS_OBJS:%.$(O)=%.d)
 -include $(TESTS_CPP_OBJS:%.$(O)=%.d)
 endif
 
+$(C_SYM_OBJS): $(objroot)src/%.sym.$(O): $(srcroot)src/%.c
+$(C_SYM_OBJS): CPPFLAGS += -DJEMALLOC_NO_PRIVATE_NAMESPACE
+$(C_SYMS): $(objroot)src/%.sym: $(objroot)src/%.sym.$(O)
 $(C_OBJS): $(objroot)src/%.$(O): $(srcroot)src/%.c
 $(CPP_OBJS): $(objroot)src/%.$(O): $(srcroot)src/%.cpp
 $(C_PIC_OBJS): $(objroot)src/%.pic.$(O): $(srcroot)src/%.c
 $(C_PIC_OBJS): CFLAGS += $(PIC_CFLAGS)
 $(CPP_PIC_OBJS): $(objroot)src/%.pic.$(O): $(srcroot)src/%.cpp
 $(CPP_PIC_OBJS): CXXFLAGS += $(PIC_CFLAGS)
+$(C_JET_SYM_OBJS): $(objroot)src/%.jet.sym.$(O): $(srcroot)src/%.c
+$(C_JET_SYM_OBJS): CPPFLAGS += -DJEMALLOC_JET -DJEMALLOC_NO_PRIVATE_NAMESPACE
+$(C_JET_SYMS): $(objroot)src/%.jet.sym: $(objroot)src/%.jet.sym.$(O)
 $(C_JET_OBJS): $(objroot)src/%.jet.$(O): $(srcroot)src/%.c
-$(C_JET_OBJS): CFLAGS += -DJEMALLOC_JET
+$(C_JET_OBJS): CPPFLAGS += -DJEMALLOC_JET
 $(C_TESTLIB_UNIT_OBJS): $(objroot)test/src/%.unit.$(O): $(srcroot)test/src/%.c
 $(C_TESTLIB_UNIT_OBJS): CPPFLAGS += -DJEMALLOC_UNIT_TEST
 $(C_TESTLIB_INTEGRATION_OBJS): $(objroot)test/src/%.integration.$(O): $(srcroot)test/src/%.c
@@ -303,25 +318,42 @@ $(TESTS_CPP_OBJS): $(objroot)test/%.$(O): $(srcroot)test/%.cpp
 $(TESTS_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include
 $(TESTS_CPP_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include
 ifneq ($(IMPORTLIB),$(SO))
-$(CPP_OBJS) $(C_OBJS) $(C_JET_OBJS): CPPFLAGS += -DDLLEXPORT
+$(CPP_OBJS) $(C_SYM_OBJS) $(C_OBJS) $(C_JET_SYM_OBJS) $(C_JET_OBJS): CPPFLAGS += -DDLLEXPORT
 endif
 
-ifndef CC_MM
 # Dependencies.
+ifndef CC_MM
 HEADER_DIRS = $(srcroot)include/jemalloc/internal \
 	$(objroot)include/jemalloc $(objroot)include/jemalloc/internal
-HEADERS = $(wildcard $(foreach dir,$(HEADER_DIRS),$(dir)/*.h))
-$(C_OBJS) $(CPP_OBJS) $(C_PIC_OBJS) $(CPP_PIC_OBJS) $(C_JET_OBJS) $(C_TESTLIB_OBJS) $(TESTS_OBJS): $(HEADERS)
+HEADERS = $(filter-out $(PRIVATE_NAMESPACE_HDRS),$(wildcard $(foreach dir,$(HEADER_DIRS),$(dir)/*.h)))
+$(C_SYM_OBJS) $(C_OBJS) $(CPP_OBJS) $(C_PIC_OBJS) $(CPP_PIC_OBJS) $(C_JET_SYM_OBJS) $(C_JET_OBJS) $(C_TESTLIB_OBJS) $(TESTS_OBJS) $(TESTS_CPP_OBJS): $(HEADERS)
 $(TESTS_OBJS) $(TESTS_CPP_OBJS): $(objroot)test/include/test/jemalloc_test.h
 endif
 
-$(C_OBJS) $(C_PIC_OBJS) $(C_JET_OBJS) $(C_TESTLIB_OBJS) $(TESTS_OBJS): %.$(O):
+$(C_OBJS) $(CPP_OBJS) $(C_PIC_OBJS) $(CPP_PIC_OBJS) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_INTEGRATION_CPP_OBJS): $(objroot)include/jemalloc/internal/private_namespace.h
+$(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS) $(C_TESTLIB_STRESS_OBJS) $(TESTS_UNIT_OBJS) $(TESTS_STRESS_OBJS): $(objroot)include/jemalloc/internal/private_namespace_jet.h
+
+$(C_SYM_OBJS) $(C_OBJS) $(C_PIC_OBJS) $(C_JET_SYM_OBJS) $(C_JET_OBJS) $(C_TESTLIB_OBJS) $(TESTS_OBJS): %.$(O):
 	@mkdir -p $(@D)
 	$(CC) $(CFLAGS) -c $(CPPFLAGS) $(CTARGET) $<
 ifdef CC_MM
 	@$(CC) -MM $(CPPFLAGS) -MT $@ -o $(@:%.$(O)=%.d) $<
 endif
 
+$(C_SYMS): %.sym:
+	@mkdir -p $(@D)
+	$(DUMP_SYMS) $< | $(AWK) -f $(objroot)include/jemalloc/internal/private_symbols.awk > $@
+
+$(C_JET_SYMS): %.sym:
+	@mkdir -p $(@D)
+	$(DUMP_SYMS) $< | $(AWK) -f $(objroot)include/jemalloc/internal/private_symbols_jet.awk > $@
+
+$(objroot)include/jemalloc/internal/private_namespace.h: $(C_SYMS)
+	$(SHELL) $(objdir)include/jemalloc/internal/private_namespace.sh $^ > $@
+
+$(objroot)include/jemalloc/internal/private_namespace_jet.h: $(C_JET_SYMS)
+	$(SHELL) $(objdir)include/jemalloc/internal/private_namespace.sh $^ > $@
+
 $(CPP_OBJS) $(CPP_PIC_OBJS) $(TESTS_CPP_OBJS): %.$(O):
 	@mkdir -p $(@D)
 	$(CXX) $(CXXFLAGS) -c $(CPPFLAGS) $(CTARGET) $<
@@ -347,7 +379,7 @@ $(STATIC_LIBS):
 	@mkdir -p $(@D)
 	$(AR) $(ARFLAGS)@AROUT@ $+
 
-$(objroot)test/unit/%$(EXE): $(objroot)test/unit/%.$(O) $(TESTS_UNIT_LINK_OBJS) $(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS)
+$(objroot)test/unit/%$(EXE): $(objroot)test/unit/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS)
 	@mkdir -p $(@D)
 	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS)
 
@@ -452,16 +484,23 @@ stress: tests_stress stress_dir
 check: check_unit check_integration check_integration_decay check_integration_prof
 
 clean:
+	rm -f $(PRIVATE_NAMESPACE_HDRS)
+	rm -f $(C_SYM_OBJS)
+	rm -f $(C_SYMS)
 	rm -f $(C_OBJS)
 	rm -f $(CPP_OBJS)
 	rm -f $(C_PIC_OBJS)
 	rm -f $(CPP_PIC_OBJS)
+	rm -f $(C_JET_SYM_OBJS)
+	rm -f $(C_JET_SYMS)
 	rm -f $(C_JET_OBJS)
 	rm -f $(C_TESTLIB_OBJS)
+	rm -f $(C_SYM_OBJS:%.$(O)=%.d)
 	rm -f $(C_OBJS:%.$(O)=%.d)
 	rm -f $(CPP_OBJS:%.$(O)=%.d)
 	rm -f $(C_PIC_OBJS:%.$(O)=%.d)
 	rm -f $(CPP_PIC_OBJS:%.$(O)=%.d)
+	rm -f $(C_JET_SYM_OBJS:%.$(O)=%.d)
 	rm -f $(C_JET_OBJS:%.$(O)=%.d)
 	rm -f $(C_TESTLIB_OBJS:%.$(O)=%.d)
 	rm -f $(TESTS_OBJS:%.$(O)=%$(EXE))
diff --git a/configure.ac b/configure.ac
index ffbe11a4..6c1d4ffc 100644
--- a/configure.ac
+++ b/configure.ac
@@ -510,6 +510,8 @@ AN_PROGRAM([ar], [AC_PROG_AR])
 AC_DEFUN([AC_PROG_AR], [AC_CHECK_TOOL(AR, ar, :)])
 AC_PROG_AR
 
+AC_PROG_AWK
+
 dnl Platform-specific settings.  abi and RPATH can probably be determined
 dnl programmatically, but doing so is error-prone, which makes it generally
 dnl not worth the trouble.
@@ -519,6 +521,8 @@ dnl definitions need to be seen before any headers are included, which is a pain
 dnl to make happen otherwise.
 default_retain="0"
 maps_coalesce="1"
+DUMP_SYMS="nm -a"
+SYM_PREFIX=""
 case "${host}" in
   *-*-darwin* | *-*-ios*)
 	abi="macho"
@@ -530,6 +534,7 @@ case "${host}" in
 	DSO_LDFLAGS='-shared -Wl,-install_name,$(LIBDIR)/$(@F)'
 	SOREV="${rev}.${so}"
 	sbrk_deprecated="1"
+	SYM_PREFIX="_"
 	;;
   *-*-freebsd*)
 	abi="elf"
@@ -624,6 +629,7 @@ case "${host}" in
 	  DSO_LDFLAGS="-shared"
 	  link_whole_archive="1"
 	fi
+	DUMP_SYMS="dumpbin /SYMBOLS"
 	a="lib"
 	libprefix=""
 	SOREV="${so}"
@@ -671,6 +677,7 @@ AC_SUBST([TEST_LD_MODE])
 AC_SUBST([MKLIB])
 AC_SUBST([ARFLAGS])
 AC_SUBST([AROUT])
+AC_SUBST([DUMP_SYMS])
 AC_SUBST([CC_MM])
 
 dnl Determine whether libm must be linked to use e.g. log(3).
@@ -773,16 +780,6 @@ AC_PROG_RANLIB
 AC_PATH_PROG([LD], [ld], [false], [$PATH])
 AC_PATH_PROG([AUTOCONF], [autoconf], [false], [$PATH])
 
-public_syms="malloc_conf malloc_message malloc calloc posix_memalign aligned_alloc realloc free mallocx rallocx xallocx sallocx dallocx sdallocx nallocx mallctl mallctlnametomib mallctlbymib malloc_stats_print malloc_usable_size"
-
-dnl Check for allocator-related functions that should be wrapped.
-AC_CHECK_FUNC([memalign],
-	      [AC_DEFINE([JEMALLOC_OVERRIDE_MEMALIGN], [ ])
-	       public_syms="${public_syms} memalign"])
-AC_CHECK_FUNC([valloc],
-	      [AC_DEFINE([JEMALLOC_OVERRIDE_VALLOC], [ ])
-	       public_syms="${public_syms} valloc"])
-
 dnl Perform no name mangling by default.
 AC_ARG_WITH([mangling],
   [AS_HELP_STRING([--with-mangling=<map>], [Mangle symbols in <map>])],
@@ -814,6 +811,49 @@ AC_ARG_WITH([export],
 fi]
 )
 
+public_syms="aligned_alloc calloc dallocx free mallctl mallctlbymib mallctlnametomib malloc malloc_conf malloc_message malloc_stats_print malloc_usable_size mallocx nallocx posix_memalign rallocx realloc sallocx sdallocx xallocx"
+dnl Check for additional platform-specific public API functions.
+AC_CHECK_FUNC([memalign],
+	      [AC_DEFINE([JEMALLOC_OVERRIDE_MEMALIGN], [ ])
+	       public_syms="${public_syms} memalign"])
+AC_CHECK_FUNC([valloc],
+	      [AC_DEFINE([JEMALLOC_OVERRIDE_VALLOC], [ ])
+	       public_syms="${public_syms} valloc"])
+
+dnl Check for allocator-related functions that should be wrapped.
+wrap_syms=
+if test "x${JEMALLOC_PREFIX}" = "x" ; then
+  AC_CHECK_FUNC([__libc_calloc],
+		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_CALLOC], [ ])
+		 wrap_syms="${wrap_syms} __libc_calloc"])
+  AC_CHECK_FUNC([__libc_free],
+		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_FREE], [ ])
+		 wrap_syms="${wrap_syms} __libc_free"])
+  AC_CHECK_FUNC([__libc_malloc],
+		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_MALLOC], [ ])
+		 wrap_syms="${wrap_syms} __libc_malloc"])
+  AC_CHECK_FUNC([__libc_memalign],
+		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_MEMALIGN], [ ])
+		 wrap_syms="${wrap_syms} __libc_memalign"])
+  AC_CHECK_FUNC([__libc_realloc],
+		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_REALLOC], [ ])
+		 wrap_syms="${wrap_syms} __libc_realloc"])
+  AC_CHECK_FUNC([__libc_valloc],
+		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_VALLOC], [ ])
+		 wrap_syms="${wrap_syms} __libc_valloc"])
+  AC_CHECK_FUNC([__posix_memalign],
+		[AC_DEFINE([JEMALLOC_OVERRIDE___POSIX_MEMALIGN], [ ])
+		 wrap_syms="${wrap_syms} __posix_memalign"])
+fi
+
+case "${host}" in
+  *-*-mingw* | *-*-cygwin*)
+    wrap_syms="${wrap_syms} tls_callback"
+    ;;
+  *)
+    ;;
+esac
+
 dnl Mangle library-private APIs.
 AC_ARG_WITH([private_namespace],
   [AS_HELP_STRING([--with-private-namespace=<prefix>], [Prefix to prepend to all library-private APIs])],
@@ -885,8 +925,8 @@ cfgoutputs_tup="${cfgoutputs_tup} test/include/test/jemalloc_test.h:test/include
 
 cfghdrs_in="include/jemalloc/jemalloc_defs.h.in"
 cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/jemalloc_internal_defs.h.in"
+cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/private_symbols.sh"
 cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/private_namespace.sh"
-cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/private_symbols.txt"
 cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/public_namespace.sh"
 cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/public_unnamespace.sh"
 cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/size_classes.sh"
@@ -897,7 +937,8 @@ cfghdrs_in="${cfghdrs_in} test/include/test/jemalloc_test_defs.h.in"
 
 cfghdrs_out="include/jemalloc/jemalloc_defs.h"
 cfghdrs_out="${cfghdrs_out} include/jemalloc/jemalloc${install_suffix}.h"
-cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/private_namespace.h"
+cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/private_symbols.awk"
+cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/private_symbols_jet.awk"
 cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/public_symbols.txt"
 cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/public_namespace.h"
 cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/public_unnamespace.h"
@@ -1541,6 +1582,7 @@ AC_CHECK_FUNC([_malloc_thread_cleanup],
              )
 if test "x$have__malloc_thread_cleanup" = "x1" ; then
   AC_DEFINE([JEMALLOC_MALLOC_THREAD_CLEANUP], [ ])
+  wrap_syms="${wrap_syms} _malloc_thread_cleanup"
   force_tls="1"
 fi
 
@@ -1553,6 +1595,7 @@ AC_CHECK_FUNC([_pthread_mutex_init_calloc_cb],
              )
 if test "x$have__pthread_mutex_init_calloc_cb" = "x1" ; then
   AC_DEFINE([JEMALLOC_MUTEX_INIT_CB])
+  wrap_syms="${wrap_syms} _malloc_prefork _malloc_postfork"
 fi
 
 dnl Disable lazy locking by default.
@@ -1588,6 +1631,7 @@ if test "x$enable_lazy_lock" = "x1" ; then
       ])
   fi
   AC_DEFINE([JEMALLOC_LAZY_LOCK], [ ])
+  wrap_syms="${wrap_syms} pthread_create"
 fi
 AC_SUBST([enable_lazy_lock])
 
@@ -1870,7 +1914,10 @@ extern void *(* __realloc_hook)(void *ptr, size_t size);
   if (__free_hook && ptr) __free_hook(ptr);
 ], [je_cv_glibc_malloc_hook])
 if test "x${je_cv_glibc_malloc_hook}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_GLIBC_MALLOC_HOOK], [ ])
+  if test "x${JEMALLOC_PREFIX}" = "x" ; then
+    AC_DEFINE([JEMALLOC_GLIBC_MALLOC_HOOK], [ ])
+    wrap_syms="${wrap_syms} __free_hook __malloc_hook __realloc_hook"
+  fi
 fi
 
 JE_COMPILABLE([glibc memalign hook], [
@@ -1882,7 +1929,10 @@ extern void *(* __memalign_hook)(size_t alignment, size_t size);
   if (__memalign_hook) ptr = __memalign_hook(16, 7);
 ], [je_cv_glibc_memalign_hook])
 if test "x${je_cv_glibc_memalign_hook}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_GLIBC_MEMALIGN_HOOK], [ ])
+  if test "x${JEMALLOC_PREFIX}" = "x" ; then
+    AC_DEFINE([JEMALLOC_GLIBC_MEMALIGN_HOOK], [ ])
+    wrap_syms="${wrap_syms} __memalign_hook"
+  fi
 fi
 
 JE_COMPILABLE([pthreads adaptive mutexes], [
@@ -1904,13 +1954,6 @@ AC_HEADER_STDBOOL
 dnl ============================================================================
 dnl Define commands that generate output files.
 
-AC_CONFIG_COMMANDS([include/jemalloc/internal/private_namespace.h], [
-  mkdir -p "${objroot}include/jemalloc/internal"
-  "${srcdir}/include/jemalloc/internal/private_namespace.sh" "${srcdir}/include/jemalloc/internal/private_symbols.txt" > "${objroot}include/jemalloc/internal/private_namespace.h"
-], [
-  srcdir="${srcdir}"
-  objroot="${objroot}"
-])
 AC_CONFIG_COMMANDS([include/jemalloc/internal/public_symbols.txt], [
   f="${objroot}include/jemalloc/internal/public_symbols.txt"
   mkdir -p "${objroot}include/jemalloc/internal"
@@ -1934,6 +1977,31 @@ AC_CONFIG_COMMANDS([include/jemalloc/internal/public_symbols.txt], [
   public_syms="${public_syms}"
   JEMALLOC_PREFIX="${JEMALLOC_PREFIX}"
 ])
+AC_CONFIG_COMMANDS([include/jemalloc/internal/private_symbols.awk], [
+  f="${objroot}include/jemalloc/internal/private_symbols.awk"
+  mkdir -p "${objroot}include/jemalloc/internal"
+  export_syms=`for sym in ${public_syms}; do echo "${JEMALLOC_PREFIX}${sym}"; done; for sym in ${wrap_syms}; do echo "${sym}"; done;`
+  "${srcdir}/include/jemalloc/internal/private_symbols.sh" "${SYM_PREFIX}" ${export_syms} > "${objroot}include/jemalloc/internal/private_symbols.awk"
+], [
+  srcdir="${srcdir}"
+  objroot="${objroot}"
+  public_syms="${public_syms}"
+  wrap_syms="${wrap_syms}"
+  SYM_PREFIX="${SYM_PREFIX}"
+  JEMALLOC_PREFIX="${JEMALLOC_PREFIX}"
+])
+AC_CONFIG_COMMANDS([include/jemalloc/internal/private_symbols_jet.awk], [
+  f="${objroot}include/jemalloc/internal/private_symbols_jet.awk"
+  mkdir -p "${objroot}include/jemalloc/internal"
+  export_syms=`for sym in ${public_syms}; do echo "jet_${sym}"; done; for sym in ${wrap_syms}; do echo "${sym}"; done;`
+  "${srcdir}/include/jemalloc/internal/private_symbols.sh" "${SYM_PREFIX}" ${export_syms} > "${objroot}include/jemalloc/internal/private_symbols_jet.awk"
+], [
+  srcdir="${srcdir}"
+  objroot="${objroot}"
+  public_syms="${public_syms}"
+  wrap_syms="${wrap_syms}"
+  SYM_PREFIX="${SYM_PREFIX}"
+])
 AC_CONFIG_COMMANDS([include/jemalloc/internal/public_namespace.h], [
   mkdir -p "${objroot}include/jemalloc/internal"
   "${srcdir}/include/jemalloc/internal/public_namespace.sh" "${objroot}include/jemalloc/internal/public_symbols.txt" > "${objroot}include/jemalloc/internal/public_namespace.h"
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index bccee167..78ddd376 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -8,6 +8,18 @@
 #undef JEMALLOC_PREFIX
 #undef JEMALLOC_CPREFIX
 
+/*
+ * Define overrides for non-standard allocator-related functions if they are
+ * present on the system.
+ */
+#undef JEMALLOC_OVERRIDE___LIBC_CALLOC
+#undef JEMALLOC_OVERRIDE___LIBC_FREE
+#undef JEMALLOC_OVERRIDE___LIBC_MALLOC
+#undef JEMALLOC_OVERRIDE___LIBC_MEMALIGN
+#undef JEMALLOC_OVERRIDE___LIBC_REALLOC
+#undef JEMALLOC_OVERRIDE___LIBC_VALLOC
+#undef JEMALLOC_OVERRIDE___POSIX_MEMALIGN
+
 /*
  * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs.
  * For shared libraries, symbol visibility mechanisms prevent these symbols
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index bc0ca641..9e9225ef 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -38,7 +38,13 @@
  * want the inclusion of hooks to happen early, so that we hook as much as
  * possible.
  */
-#include "jemalloc/internal/private_namespace.h"
+#ifndef JEMALLOC_NO_PRIVATE_NAMESPACE
+#  ifndef JEMALLOC_JET
+#    include "jemalloc/internal/private_namespace.h"
+#  else
+#    include "jemalloc/internal/private_namespace_jet.h"
+#  endif
+#endif
 #include "jemalloc/internal/hooks.h"
 
 static const bool config_debug =
diff --git a/include/jemalloc/internal/private_namespace.sh b/include/jemalloc/internal/private_namespace.sh
index 820862fe..6ef1346a 100755
--- a/include/jemalloc/internal/private_namespace.sh
+++ b/include/jemalloc/internal/private_namespace.sh
@@ -1,5 +1,5 @@
 #!/bin/sh
 
-for symbol in `cat $1` ; do
+for symbol in `cat "$@"` ; do
   echo "#define ${symbol} JEMALLOC_N(${symbol})"
 done
diff --git a/include/jemalloc/internal/private_symbols.sh b/include/jemalloc/internal/private_symbols.sh
new file mode 100755
index 00000000..442a259f
--- /dev/null
+++ b/include/jemalloc/internal/private_symbols.sh
@@ -0,0 +1,51 @@
+#!/bin/sh
+#
+# Generate private_symbols[_jet].awk.
+#
+# Usage: private_symbols.sh <sym_prefix> <sym>*
+#
+# <sym_prefix> is typically "" or "_".
+
+sym_prefix=$1
+shift
+
+cat <<EOF
+#!/usr/bin/env awk -f
+
+BEGIN {
+  sym_prefix = "${sym_prefix}"
+  split("\\
+EOF
+
+for public_sym in "$@" ; do
+  cat <<EOF
+        ${sym_prefix}${public_sym} \\
+EOF
+done
+
+cat <<"EOF"
+        ", exported_symbol_names)
+  # Store exported symbol names as keys in exported_symbols.
+  for (i in exported_symbol_names) {
+    exported_symbols[exported_symbol_names[i]] = 1
+  }
+}
+
+# Process 'nm -a <c_source.o>' output.
+#
+# Handle lines like:
+#   0000000000000008 D opt_junk
+#   0000000000007574 T malloc_initialized
+(NF == 3 && $2 ~ /^[ABCDGRSTVW]$/ && !($3 in exported_symbols) && $3 ~ /^[A-Za-z0-9_]+$/) {
+  print substr($3, 1+length(sym_prefix), length($3)-length(sym_prefix))
+}
+
+# Process 'dumpbin /SYMBOLS <c_source.obj>' output.
+#
+# Handle lines like:
+#   353 00008098 SECT4  notype       External     | opt_junk
+#   3F1 00000000 SECT7  notype ()    External     | malloc_initialized
+($3 ~ /^SECT[0-9]+/ && $(NF-2) == "External" && !($NF in exported_symbols)) {
+  print $NF
+}
+EOF
diff --git a/include/jemalloc/internal/private_symbols.txt b/include/jemalloc/internal/private_symbols.txt
deleted file mode 100644
index eb9b3010..00000000
--- a/include/jemalloc/internal/private_symbols.txt
+++ /dev/null
@@ -1,609 +0,0 @@
-a0dalloc
-a0malloc
-arena_aalloc
-arena_alloc_junk_small
-arena_basic_stats_merge
-arena_bin_index
-arena_bin_info
-arena_boot
-arena_choose
-arena_choose_hard
-arena_choose_impl
-arena_cleanup
-arena_dalloc
-arena_dalloc_bin_junked_locked
-arena_dalloc_junk_small
-arena_dalloc_no_tcache
-arena_dalloc_promoted
-arena_dalloc_small
-arena_decay
-arena_decay_tick
-arena_decay_ticks
-arena_dirty_decay_time_default_get
-arena_dirty_decay_time_default_set
-arena_dirty_decay_time_get
-arena_dirty_decay_time_set
-arena_muzzy_decay_time_default_get
-arena_muzzy_decay_time_default_set
-arena_muzzy_decay_time_get
-arena_muzzy_decay_time_set
-arena_destroy
-arena_dss_prec_get
-arena_dss_prec_set
-arena_extent_alloc_large
-arena_extent_dalloc_large_prep
-arena_extent_ralloc_large_expand
-arena_extent_ralloc_large_shrink
-arena_extent_sn_next
-arena_extents_dirty_dalloc
-arena_get
-arena_ichoose
-arena_ind_get
-arena_init
-arena_internal_add
-arena_internal_get
-arena_internal_sub
-arena_malloc
-arena_malloc_hard
-arena_migrate
-arena_new
-arena_nthreads_dec
-arena_nthreads_get
-arena_nthreads_inc
-arena_palloc
-arena_postfork_child
-arena_postfork_parent
-arena_prefork0
-arena_prefork1
-arena_prefork2
-arena_prefork3
-arena_prefork4
-arena_prefork5
-arena_prefork6
-arena_prof_accum
-arena_prof_promote
-arena_prof_tctx_get
-arena_prof_tctx_reset
-arena_prof_tctx_set
-arena_ralloc
-arena_ralloc_no_move
-arena_reset
-arena_salloc
-arena_sdalloc
-arena_sdalloc_no_tcache
-arena_set
-arena_slab_regind
-arena_stats_init
-arena_stats_mapped_add
-arena_stats_merge
-arena_tcache_fill_small
-arena_tdata_get
-arena_tdata_get_hard
-arena_vsalloc
-arenas
-arenas_tdata_cleanup
-b0get
-base_alloc
-base_boot
-base_delete
-base_extent_alloc
-base_extent_hooks_get
-base_extent_hooks_set
-base_ind_get
-base_new
-base_postfork_child
-base_postfork_parent
-base_prefork
-base_stats_get
-bitmap_ffu
-bitmap_full
-bitmap_get
-bitmap_info_init
-bitmap_init
-bitmap_set
-bitmap_sfu
-bitmap_size
-bitmap_unset
-bootstrap_calloc
-bootstrap_free
-bootstrap_malloc
-bt_init
-bt2gctx_mtx
-buferror
-ckh_count
-ckh_delete
-ckh_insert
-ckh_iter
-ckh_new
-ckh_pointer_hash
-ckh_pointer_keycomp
-ckh_remove
-ckh_search
-ckh_string_hash
-ckh_string_keycomp
-ctl_boot
-ctl_bymib
-ctl_byname
-ctl_nametomib
-ctl_postfork_child
-ctl_postfork_parent
-ctl_prefork
-decay_ticker_get
-dss_prec_names
-extent_ad_comp
-extent_addr_get
-extent_addr_randomize
-extent_addr_set
-extent_alloc
-extent_alloc_cache
-extent_alloc_dss
-extent_alloc_mmap
-extent_alloc_wrapper
-extent_arena_get
-extent_arena_set
-extent_base_get
-extent_before_get
-extent_boot
-extent_binit
-extent_bsize_get
-extent_bsize_set
-extent_commit_wrapper
-extent_committed_get
-extent_committed_set
-extent_dalloc
-extent_dalloc_gap
-extent_dalloc_mmap
-extent_dalloc_wrapper
-extent_decommit_wrapper
-extent_destroy_wrapper
-extent_dss_boot
-extent_dss_mergeable
-extent_dss_prec_get
-extent_dss_prec_set
-extent_ead_comp
-extent_esn_comp
-extent_esn_get
-extent_esn_set
-extent_esnead_comp
-extent_heap_empty
-extent_heap_first
-extent_heap_insert
-extent_heap_new
-extent_heap_remove
-extent_heap_remove_first
-extent_hooks_default
-extent_hooks_get
-extent_hooks_set
-extent_in_dss
-extent_init
-extent_last_get
-extent_list_append
-extent_list_first
-extent_list_init
-extent_list_last
-extent_list_remove
-extent_list_replace
-extent_merge_wrapper
-extent_nfree_dec
-extent_nfree_get
-extent_nfree_inc
-extent_nfree_set
-extent_past_get
-extent_prof_tctx_get
-extent_prof_tctx_set
-extent_purge_forced_wrapper
-extent_purge_lazy_wrapper
-extent_size_get
-extent_size_quantize_ceil
-extent_size_quantize_floor
-extent_size_set
-extent_slab_data_get
-extent_slab_data_get_const
-extent_slab_get
-extent_slab_set
-extent_sn_comp
-extent_sn_get
-extent_sn_set
-extent_snad_comp
-extent_split_wrapper
-extent_state_get
-extent_state_set
-extent_szind_get
-extent_szind_get_maybe_invalid
-extent_szind_set
-extent_usize_get
-extent_zeroed_get
-extent_zeroed_set
-extents_alloc
-extents_dalloc
-extents_evict
-extents_init
-extents_npages_get
-extents_prefork
-extents_postfork_child
-extents_postfork_parent
-extents_rtree
-extents_state_get
-ffs_llu
-ffs_lu
-ffs_u
-ffs_u32
-ffs_u64
-ffs_zu
-get_errno
-hash
-hash_fmix_32
-hash_fmix_64
-hash_get_block_32
-hash_get_block_64
-hash_rotl_32
-hash_rotl_64
-hash_x64_128
-hash_x86_128
-hash_x86_32
-hooks_arena_new_hook
-hooks_libc_hook
-iaalloc
-ialloc
-iallocztm
-iarena_cleanup
-idalloc
-idalloctm
-iealloc
-index2size
-index2size_compute
-index2size_lookup
-index2size_tab
-ipalloc
-ipalloct
-ipallocztm
-iralloc
-iralloct
-iralloct_realign
-isalloc
-isdalloct
-isthreaded
-ivsalloc
-ixalloc
-jemalloc_postfork_child
-jemalloc_postfork_parent
-jemalloc_prefork
-large_dalloc
-large_dalloc_finish
-large_dalloc_junk
-large_dalloc_maybe_junk
-large_dalloc_prep_junked_locked
-large_malloc
-large_palloc
-large_prof_tctx_get
-large_prof_tctx_reset
-large_prof_tctx_set
-large_ralloc
-large_ralloc_no_move
-large_salloc
-lg_floor
-lg_prof_sample
-malloc_cprintf
-malloc_getcpu
-malloc_initialized
-malloc_mutex_prof_data_reset
-malloc_mutex_assert_not_owner
-malloc_mutex_assert_owner
-malloc_mutex_boot
-malloc_mutex_init
-malloc_mutex_lock
-malloc_mutex_lock_final
-malloc_mutex_lock_slow
-malloc_mutex_postfork_child
-malloc_mutex_postfork_parent
-malloc_mutex_prefork
-malloc_mutex_trylock
-malloc_mutex_trylock_final
-malloc_mutex_unlock
-malloc_printf
-malloc_slow
-malloc_snprintf
-malloc_strtoumax
-malloc_tsd_boot0
-malloc_tsd_boot1
-malloc_tsd_cleanup_register
-malloc_tsd_dalloc
-malloc_tsd_malloc
-malloc_vcprintf
-malloc_vsnprintf
-malloc_write
-mutex_owner_stats_update
-narenas_auto
-narenas_total_get
-ncpus
-nhbins
-nstime_add
-nstime_compare
-nstime_copy
-nstime_divide
-nstime_idivide
-nstime_imultiply
-nstime_init
-nstime_init2
-nstime_monotonic
-nstime_msec
-nstime_ns
-nstime_nsec
-nstime_sec
-nstime_subtract
-nstime_update
-opt_abort
-opt_dirty_decay_time
-opt_muzzy_decay_time
-opt_dss
-opt_junk
-opt_junk_alloc
-opt_junk_free
-opt_lg_prof_interval
-opt_lg_prof_sample
-opt_lg_tcache_max
-opt_narenas
-opt_prof
-opt_prof_accum
-opt_prof_active
-opt_prof_final
-opt_prof_gdump
-opt_prof_leak
-opt_prof_prefix
-opt_prof_thread_active_init
-opt_stats_print
-opt_tcache
-opt_utrace
-opt_xmalloc
-opt_zero
-pages_boot
-pages_commit
-pages_decommit
-pages_huge
-pages_map
-pages_nohuge
-pages_purge_forced
-pages_purge_lazy
-pages_unmap
-percpu_arena_choose
-percpu_arena_ind_limit
-percpu_arena_update
-pind2sz
-pind2sz_compute
-pind2sz_lookup
-pind2sz_tab
-pow2_ceil_u32
-pow2_ceil_u64
-pow2_ceil_zu
-prng_lg_range_u32
-prng_lg_range_u64
-prng_lg_range_zu
-prng_range_u32
-prng_range_u64
-prng_range_zu
-prng_state_next_u32
-prng_state_next_u64
-prng_state_next_zu
-prof_accum_add
-prof_accum_cancel
-prof_accum_init
-prof_active
-prof_active_get
-prof_active_get_unlocked
-prof_active_set
-prof_alloc_prep
-prof_alloc_rollback
-prof_backtrace
-prof_boot0
-prof_boot1
-prof_boot2
-prof_bt_count
-prof_cnt_all
-prof_dump_header
-prof_dump_open
-prof_free
-prof_free_sampled_object
-prof_gdump
-prof_gdump_get
-prof_gdump_get_unlocked
-prof_gdump_set
-prof_gdump_val
-prof_idump
-prof_interval
-prof_lookup
-prof_malloc
-prof_malloc_sample_object
-prof_mdump
-prof_postfork_child
-prof_postfork_parent
-prof_prefork0
-prof_prefork1
-prof_realloc
-prof_reset
-prof_sample_accum_update
-prof_sample_threshold_update
-prof_tctx_get
-prof_tctx_reset
-prof_tctx_set
-prof_tdata_cleanup
-prof_tdata_count
-prof_tdata_get
-prof_tdata_init
-prof_tdata_reinit
-prof_thread_active_get
-prof_thread_active_init_get
-prof_thread_active_init_set
-prof_thread_active_set
-prof_thread_name_get
-prof_thread_name_set
-psz2ind
-psz2u
-rtree_clear
-rtree_ctx_data_init
-rtree_delete
-rtree_extent_read
-rtree_extent_szind_read
-rtree_leaf_alloc
-rtree_leaf_dalloc
-rtree_leaf_elm_acquire
-rtree_leaf_elm_bits_extent_get
-rtree_leaf_elm_bits_locked_get
-rtree_leaf_elm_bits_read
-rtree_leaf_elm_bits_slab_get
-rtree_leaf_elm_bits_szind_get
-rtree_leaf_elm_extent_read
-rtree_leaf_elm_extent_write
-rtree_leaf_elm_lookup
-rtree_leaf_elm_lookup_hard
-rtree_leaf_elm_release
-rtree_leaf_elm_slab_read
-rtree_leaf_elm_slab_write
-rtree_leaf_elm_szind_read
-rtree_leaf_elm_szind_slab_update
-rtree_leaf_elm_szind_write
-rtree_leaf_elm_witness_access
-rtree_leaf_elm_witness_acquire
-rtree_leaf_elm_witness_release
-rtree_leaf_elm_write
-rtree_leafkey
-rtree_new
-rtree_node_alloc
-rtree_node_dalloc
-rtree_read
-rtree_subkey
-rtree_szind_read
-rtree_szind_slab_read
-rtree_szind_slab_update
-rtree_write
-s2u
-s2u_compute
-s2u_lookup
-sa2u
-set_errno
-size2index
-size2index_compute
-size2index_lookup
-size2index_tab
-spin_adaptive
-stats_print
-tcache_alloc_easy
-tcache_alloc_large
-tcache_alloc_small
-tcache_alloc_small_hard
-tcache_arena_associate
-tcache_arena_reassociate
-tcache_bin_flush_large
-tcache_bin_flush_small
-tcache_bin_info
-tcache_boot
-tcache_cleanup
-tcache_create_explicit
-tcache_dalloc_large
-tcache_dalloc_small
-tcache_data_init
-tcache_enabled_get
-tcache_enabled_set
-tcache_event
-tcache_event_hard
-tcache_flush
-tcache_get
-tcache_maxclass
-tcache_prefork
-tcache_postfork_child
-tcache_postfork_parent
-tcache_salloc
-tcache_stats_merge
-tcaches
-tcaches_create
-tcaches_destroy
-tcaches_flush
-tcaches_get
-ticker_copy
-ticker_init
-ticker_read
-ticker_tick
-ticker_ticks
-tsd_arena_get
-tsd_arena_set
-tsd_arenap_get
-tsd_arenas_tdata_bypassp_get
-tsd_arenas_tdata_get
-tsd_arenas_tdata_set
-tsd_arenas_tdatap_get
-tsd_boot
-tsd_boot0
-tsd_boot1
-tsd_booted
-tsd_booted_get
-tsd_cleanup
-tsd_cleanup_wrapper
-tsd_fetch
-tsd_fetch_impl
-tsd_fetch_slow
-tsd_get
-tsd_get_allocates
-tsd_iarena_get
-tsd_iarena_set
-tsd_iarenap_get
-tsd_initialized
-tsd_init_check_recursion
-tsd_init_finish
-tsd_init_head
-tsd_narenas_tdata_get
-tsd_narenas_tdata_set
-tsd_narenas_tdatap_get
-tsd_reentrancy_level_get
-tsd_reentrancy_level_set
-tsd_reentrancy_levelp_get
-tsd_slow_update
-tsd_wrapper_get
-tsd_wrapper_set
-tsd_nominal
-tsd_prof_tdata_get
-tsd_prof_tdata_set
-tsd_prof_tdatap_get
-tsd_rtree_ctx
-tsd_rtree_ctxp_get
-tsd_rtree_leaf_elm_witnessesp_get
-tsd_set
-tsd_tcache_enabled_get
-tsd_tcache_enabled_set
-tsd_tcache_enabledp_get
-tsd_tcache_get
-tsd_tcache_set
-tsd_tcachep_get
-tsd_thread_allocated_get
-tsd_thread_allocated_set
-tsd_thread_allocatedp_get
-tsd_thread_deallocated_get
-tsd_thread_deallocated_set
-tsd_thread_deallocatedp_get
-tsd_tls
-tsd_tsd
-tsd_tsdn
-tsd_witness_fork_get
-tsd_witness_fork_set
-tsd_witness_forkp_get
-tsd_witnessesp_get
-tsdn_fetch
-tsdn_null
-tsdn_rtree_ctx
-tsdn_tsd
-witness_assert_depth
-witness_assert_depth_to_rank
-witness_assert_lockless
-witness_assert_not_owner
-witness_assert_owner
-witness_depth_error
-witness_init
-witness_lock
-witness_lock_error
-witness_not_owner_error
-witness_owner
-witness_owner_error
-witness_postfork_child
-witness_postfork_parent
-witness_prefork
-witness_unlock
-witnesses_cleanup
-zone_register
diff --git a/src/jemalloc.c b/src/jemalloc.c
index b8c94133..b5ef3ace 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2299,33 +2299,44 @@ je_valloc(size_t size) {
 JEMALLOC_EXPORT void (*__free_hook)(void *ptr) = je_free;
 JEMALLOC_EXPORT void *(*__malloc_hook)(size_t size) = je_malloc;
 JEMALLOC_EXPORT void *(*__realloc_hook)(void *ptr, size_t size) = je_realloc;
-# ifdef JEMALLOC_GLIBC_MEMALIGN_HOOK
+#  ifdef JEMALLOC_GLIBC_MEMALIGN_HOOK
 JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) =
     je_memalign;
-# endif
+#  endif
 
-#ifdef CPU_COUNT
+#  ifdef CPU_COUNT
 /*
  * To enable static linking with glibc, the libc specific malloc interface must
  * be implemented also, so none of glibc's malloc.o functions are added to the
  * link.
  */
-#define ALIAS(je_fn)	__attribute__((alias (#je_fn), used))
+#    define ALIAS(je_fn)	__attribute__((alias (#je_fn), used))
 /* To force macro expansion of je_ prefix before stringification. */
-#define PREALIAS(je_fn)  ALIAS(je_fn)
-void	*__libc_malloc(size_t size) PREALIAS(je_malloc);
-void	__libc_free(void* ptr) PREALIAS(je_free);
-void	*__libc_realloc(void* ptr, size_t size) PREALIAS(je_realloc);
-void	*__libc_calloc(size_t n, size_t size) PREALIAS(je_calloc);
-void	*__libc_memalign(size_t align, size_t s) PREALIAS(je_memalign);
-void	*__libc_valloc(size_t size) PREALIAS(je_valloc);
-int	__posix_memalign(void** r, size_t a, size_t s)
-    PREALIAS(je_posix_memalign);
-#undef PREALIAS
-#undef ALIAS
-
-#endif
-
+#    define PREALIAS(je_fn)	ALIAS(je_fn)
+#    ifdef JEMALLOC_OVERRIDE___LIBC_CALLOC
+void *__libc_calloc(size_t n, size_t size) PREALIAS(je_calloc);
+#    endif
+#    ifdef JEMALLOC_OVERRIDE___LIBC_FREE
+void __libc_free(void* ptr) PREALIAS(je_free);
+#    endif
+#    ifdef JEMALLOC_OVERRIDE___LIBC_MALLOC
+void *__libc_malloc(size_t size) PREALIAS(je_malloc);
+#    endif
+#    ifdef JEMALLOC_OVERRIDE___LIBC_MEMALIGN
+void *__libc_memalign(size_t align, size_t s) PREALIAS(je_memalign);
+#    endif
+#    ifdef JEMALLOC_OVERRIDE___LIBC_REALLOC
+void *__libc_realloc(void* ptr, size_t size) PREALIAS(je_realloc);
+#    endif
+#    ifdef JEMALLOC_OVERRIDE___LIBC_VALLOC
+void *__libc_valloc(size_t size) PREALIAS(je_valloc);
+#    endif
+#    ifdef JEMALLOC_OVERRIDE___POSIX_MEMALIGN
+int __posix_memalign(void** r, size_t a, size_t s) PREALIAS(je_posix_memalign);
+#    endif
+#    undef PREALIAS
+#    undef ALIAS
+#  endif
 #endif
 
 /*

From 18a83681cf6fa0ab79cd0a89f8755d53931a39fb Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 13 May 2017 15:20:48 -0700
Subject: [PATCH 0864/2608] Refactor (MALLOCX_ARENA_MAX + 1) to be
 MALLOCX_ARENA_LIMIT.

This resolves #673.
---
 include/jemalloc/internal/ctl.h                        |  4 ++--
 include/jemalloc/internal/extent_inlines.h             |  4 ++--
 .../jemalloc/internal/jemalloc_internal_inlines_a.h    |  2 +-
 include/jemalloc/internal/jemalloc_internal_types.h    |  2 +-
 src/jemalloc.c                                         | 10 +++++-----
 5 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index de74a75d..23c95510 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -83,9 +83,9 @@ typedef struct ctl_arenas_s {
 	 * Element 0 corresponds to merged stats for extant arenas (accessed via
 	 * MALLCTL_ARENAS_ALL), element 1 corresponds to merged stats for
 	 * destroyed arenas (accessed via MALLCTL_ARENAS_DESTROYED), and the
-	 * remaining MALLOCX_ARENA_MAX+1 elements correspond to arenas.
+	 * remaining MALLOCX_ARENA_LIMIT elements correspond to arenas.
 	 */
-	ctl_arena_t *arenas[MALLOCX_ARENA_MAX + 3];
+	ctl_arena_t *arenas[2 + MALLOCX_ARENA_LIMIT];
 } ctl_arenas_t;
 
 int ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index e1f8bd9e..0e6311d9 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -13,10 +13,10 @@ extent_arena_get(const extent_t *extent) {
 	 * The following check is omitted because we should never actually read
 	 * a NULL arena pointer.
 	 */
-	if (false && arena_ind > MALLOCX_ARENA_MAX) {
+	if (false && arena_ind >= MALLOCX_ARENA_LIMIT) {
 		return NULL;
 	}
-	assert(arena_ind <= MALLOCX_ARENA_MAX);
+	assert(arena_ind < MALLOCX_ARENA_LIMIT);
 	return (arena_t *)atomic_load_p(&arenas[arena_ind], ATOMIC_ACQUIRE);
 }
 
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index 1755c3ac..c8e26298 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -350,7 +350,7 @@ static inline arena_t *
 arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing) {
 	arena_t *ret;
 
-	assert(ind <= MALLOCX_ARENA_MAX);
+	assert(ind < MALLOCX_ARENA_LIMIT);
 
 	ret = (arena_t *)atomic_load_p(&arenas[ind], ATOMIC_ACQUIRE);
 	if (unlikely(ret == NULL)) {
diff --git a/include/jemalloc/internal/jemalloc_internal_types.h b/include/jemalloc/internal/jemalloc_internal_types.h
index 663ed8b5..50f9d001 100644
--- a/include/jemalloc/internal/jemalloc_internal_types.h
+++ b/include/jemalloc/internal/jemalloc_internal_types.h
@@ -29,7 +29,7 @@ typedef int malloc_cpuid_t;
 #define MALLOCX_ARENA_MASK \
     (((1 << MALLOCX_ARENA_BITS) - 1) << MALLOCX_ARENA_SHIFT)
 /* NB: Arena index bias decreases the maximum number of arenas by 1. */
-#define MALLOCX_ARENA_MAX	((1 << MALLOCX_ARENA_BITS) - 2)
+#define MALLOCX_ARENA_LIMIT	((1 << MALLOCX_ARENA_BITS) - 1)
 #define MALLOCX_TCACHE_MASK \
     (((1 << MALLOCX_TCACHE_BITS) - 1) << MALLOCX_TCACHE_SHIFT)
 #define MALLOCX_TCACHE_MAX	((1 << MALLOCX_TCACHE_BITS) - 3)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index b5ef3ace..13218449 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -70,7 +70,7 @@ static malloc_mutex_t	arenas_lock;
  * Points to an arena_t.
  */
 JEMALLOC_ALIGNED(CACHELINE)
-atomic_p_t		arenas[MALLOCX_ARENA_MAX + 1];
+atomic_p_t		arenas[MALLOCX_ARENA_LIMIT];
 static atomic_u_t	narenas_total; /* Use narenas_total_*(). */
 static arena_t		*a0; /* arenas[0]; read-only after initialization. */
 unsigned		narenas_auto; /* Read-only after initialization. */
@@ -400,7 +400,7 @@ arena_init_locked(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	arena_t *arena;
 
 	assert(ind <= narenas_total_get());
-	if (ind > MALLOCX_ARENA_MAX) {
+	if (ind >= MALLOCX_ARENA_LIMIT) {
 		return NULL;
 	}
 	if (ind == narenas_total_get()) {
@@ -1318,7 +1318,7 @@ malloc_init_narenas(void) {
 				abort();
 			}
 		} else {
-			if (ncpus > MALLOCX_ARENA_MAX) {
+			if (ncpus >= MALLOCX_ARENA_LIMIT) {
 				malloc_printf("<jemalloc>: narenas w/ percpu"
 				    "arena beyond limit (%d)\n", ncpus);
 				if (opt_abort) {
@@ -1364,8 +1364,8 @@ malloc_init_narenas(void) {
 	/*
 	 * Limit the number of arenas to the indexing range of MALLOCX_ARENA().
 	 */
-	if (narenas_auto > MALLOCX_ARENA_MAX) {
-		narenas_auto = MALLOCX_ARENA_MAX;
+	if (narenas_auto >= MALLOCX_ARENA_LIMIT) {
+		narenas_auto = MALLOCX_ARENA_LIMIT - 1;
 		malloc_printf("<jemalloc>: Reducing narenas to limit (%d)\n",
 		    narenas_auto);
 	}

From b8ba3c313205fd8269b9f0f9f8460b172d2fa32d Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 15 May 2017 11:03:14 -0700
Subject: [PATCH 0865/2608] Use srcroot path for private_namespace.sh.

---
 Makefile.in | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index 94d8021f..5476617f 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -349,10 +349,10 @@ $(C_JET_SYMS): %.sym:
 	$(DUMP_SYMS) $< | $(AWK) -f $(objroot)include/jemalloc/internal/private_symbols_jet.awk > $@
 
 $(objroot)include/jemalloc/internal/private_namespace.h: $(C_SYMS)
-	$(SHELL) $(objdir)include/jemalloc/internal/private_namespace.sh $^ > $@
+	$(SHELL) $(srcroot)include/jemalloc/internal/private_namespace.sh $^ > $@
 
 $(objroot)include/jemalloc/internal/private_namespace_jet.h: $(C_JET_SYMS)
-	$(SHELL) $(objdir)include/jemalloc/internal/private_namespace.sh $^ > $@
+	$(SHELL) $(srcroot)include/jemalloc/internal/private_namespace.sh $^ > $@
 
 $(CPP_OBJS) $(CPP_PIC_OBJS) $(TESTS_CPP_OBJS): %.$(O):
 	@mkdir -p $(@D)

From 04fec5e0844bd0cc10dcd290e82f5f6aa486e494 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 16 May 2017 23:12:59 -0700
Subject: [PATCH 0866/2608] Avoid over-rebuilding due to namespace mangling.

Take care not to touch generated namespace mangling headers unless their
contents would change.

This resolves #838.
---
 .gitignore  |  2 ++
 Makefile.in | 11 ++++++++---
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/.gitignore b/.gitignore
index 216d3c9d..9acf374b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,7 +21,9 @@
 
 /include/jemalloc/internal/jemalloc_preamble.h
 /include/jemalloc/internal/jemalloc_internal_defs.h
+/include/jemalloc/internal/private_namespace.gen.h
 /include/jemalloc/internal/private_namespace.h
+/include/jemalloc/internal/private_namespace_jet.gen.h
 /include/jemalloc/internal/private_namespace_jet.h
 /include/jemalloc/internal/private_symbols.awk
 /include/jemalloc/internal/private_symbols_jet.awk
diff --git a/Makefile.in b/Makefile.in
index 5476617f..370ead58 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -227,6 +227,7 @@ TESTS_STRESS := $(srcroot)test/stress/microbench.c
 TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_INTEGRATION_CPP) $(TESTS_STRESS)
 
 PRIVATE_NAMESPACE_HDRS := $(objroot)include/jemalloc/internal/private_namespace.h $(objroot)include/jemalloc/internal/private_namespace_jet.h
+PRIVATE_NAMESPACE_GEN_HDRS := $(PRIVATE_NAMESPACE_HDRS:%.h=%.gen.h)
 C_SYM_OBJS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.sym.$(O))
 C_SYMS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.sym)
 C_OBJS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.$(O))
@@ -254,7 +255,7 @@ TESTS_CPP_OBJS := $(TESTS_INTEGRATION_CPP_OBJS)
 .PHONY: install_doc_html install_doc_man install_doc install
 .PHONY: tests check clean distclean relclean
 
-.SECONDARY : $(TESTS_OBJS) $(TESTS_CPP_OBJS)
+.SECONDARY : $(PRIVATE_NAMESPACE_GEN_HDRS) $(TESTS_OBJS) $(TESTS_CPP_OBJS)
 
 # Default target.
 all: build_lib
@@ -348,12 +349,15 @@ $(C_JET_SYMS): %.sym:
 	@mkdir -p $(@D)
 	$(DUMP_SYMS) $< | $(AWK) -f $(objroot)include/jemalloc/internal/private_symbols_jet.awk > $@
 
-$(objroot)include/jemalloc/internal/private_namespace.h: $(C_SYMS)
+$(objroot)include/jemalloc/internal/private_namespace.gen.h: $(C_SYMS)
 	$(SHELL) $(srcroot)include/jemalloc/internal/private_namespace.sh $^ > $@
 
-$(objroot)include/jemalloc/internal/private_namespace_jet.h: $(C_JET_SYMS)
+$(objroot)include/jemalloc/internal/private_namespace_jet.gen.h: $(C_JET_SYMS)
 	$(SHELL) $(srcroot)include/jemalloc/internal/private_namespace.sh $^ > $@
 
+%.h: %.gen.h
+	@if ! `cmp -s $< $@` ; then echo "cp $< $<"; cp $< $@ ; fi
+
 $(CPP_OBJS) $(CPP_PIC_OBJS) $(TESTS_CPP_OBJS): %.$(O):
 	@mkdir -p $(@D)
 	$(CXX) $(CXXFLAGS) -c $(CPPFLAGS) $(CTARGET) $<
@@ -485,6 +489,7 @@ check: check_unit check_integration check_integration_decay check_integration_pr
 
 clean:
 	rm -f $(PRIVATE_NAMESPACE_HDRS)
+	rm -f $(PRIVATE_NAMESPACE_GEN_HDRS)
 	rm -f $(C_SYM_OBJS)
 	rm -f $(C_SYMS)
 	rm -f $(C_OBJS)

From baf3e294e05ab62b0f80b825a76687c8a1ea001e Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 16 May 2017 13:56:00 -0700
Subject: [PATCH 0867/2608] Add stats: arena uptime.

---
 doc/jemalloc.xml.in                         | 12 ++++++++++++
 include/jemalloc/internal/arena_structs_b.h |  2 ++
 include/jemalloc/internal/stats.h           |  3 +++
 src/arena.c                                 |  7 +++++++
 src/ctl.c                                   |  8 ++++++++
 src/stats.c                                 | 10 ++++++++++
 6 files changed, 42 insertions(+)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index d1b2e334..1efc9163 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -2280,6 +2280,18 @@ struct extent_hooks_s {
         arena.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="stats.arenas.i.uptime">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.uptime</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Time elapsed (in nanoseconds) since the arena was
+        created.  If &lt;i&gt; equals <constant>0</constant> or
+        <constant>MALLCTL_ARENAS_ALL</constant>, this is the uptime since malloc
+        initialization.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="stats.arenas.i.pactive">
         <term>
           <mallctl>stats.arenas.&lt;i&gt;.pactive</mallctl>
diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index d98b455e..99e5f6a0 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -261,6 +261,8 @@ struct arena_s {
 	 * Synchronization: internal.
 	 */
 	base_t			*base;
+	/* Used to determine uptime.  Read-only after initialization. */
+	nstime_t		create_time;
 };
 
 /* Used in conjunction with tsd for fast arena-related context lookup. */
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index 385a8514..3f5c20c7 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -145,6 +145,9 @@ typedef struct arena_stats_s {
 
 	/* One element for each large size class. */
 	malloc_large_stats_t	lstats[NSIZES - NBINS];
+
+	/* Arena uptime. */
+	nstime_t		uptime;
 } arena_stats_t;
 
 #endif /* JEMALLOC_INTERNAL_STATS_H */
diff --git a/src/arena.c b/src/arena.c
index 045e6127..03680e00 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -330,6 +330,10 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	    arena_prof_mutex_base)
 #undef READ_ARENA_MUTEX_PROF_DATA
 
+	nstime_copy(&astats->uptime, &arena->create_time);
+	nstime_update(&astats->uptime);
+	nstime_subtract(&astats->uptime, &arena->create_time);
+
 	for (szind_t i = 0; i < NBINS; i++) {
 		arena_bin_t *bin = &arena->bins[i];
 
@@ -1965,6 +1969,9 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 
 	arena->base = base;
 
+	nstime_init(&arena->create_time, 0);
+	nstime_update(&arena->create_time);
+
 	/* We don't support reetrancy for arena 0 bootstrapping. */
 	if (ind != 0 && hooks_arena_new_hook) {
 		/*
diff --git a/src/ctl.c b/src/ctl.c
index 7d53a336..79f2447f 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -157,6 +157,7 @@ CTL_PROTO(stats_arenas_i_lextents_j_nrequests)
 CTL_PROTO(stats_arenas_i_lextents_j_curlextents)
 INDEX_PROTO(stats_arenas_i_lextents_j)
 CTL_PROTO(stats_arenas_i_nthreads)
+CTL_PROTO(stats_arenas_i_uptime)
 CTL_PROTO(stats_arenas_i_dss)
 CTL_PROTO(stats_arenas_i_dirty_decay_time)
 CTL_PROTO(stats_arenas_i_muzzy_decay_time)
@@ -441,6 +442,7 @@ MUTEX_PROF_ARENA_MUTEXES
 
 static const ctl_named_node_t stats_arenas_i_node[] = {
 	{NAME("nthreads"),	CTL(stats_arenas_i_nthreads)},
+	{NAME("uptime"),	CTL(stats_arenas_i_uptime)},
 	{NAME("dss"),		CTL(stats_arenas_i_dss)},
 	{NAME("dirty_decay_time"), CTL(stats_arenas_i_dirty_decay_time)},
 	{NAME("muzzy_decay_time"), CTL(stats_arenas_i_muzzy_decay_time)},
@@ -778,6 +780,10 @@ MUTEX_PROF_ARENA_MUTEXES
 		accum_atomic_zu(&sdstats->astats.tcache_bytes,
 		    &astats->astats.tcache_bytes);
 
+		if (ctl_arena->arena_ind == 0) {
+			sdstats->astats.uptime = astats->astats.uptime;
+		}
+
 		for (i = 0; i < NBINS; i++) {
 			sdstats->bstats[i].nmalloc += astats->bstats[i].nmalloc;
 			sdstats->bstats[i].ndalloc += astats->bstats[i].ndalloc;
@@ -2317,6 +2323,8 @@ CTL_RO_GEN(stats_arenas_i_dirty_decay_time, arenas_i(mib[2])->dirty_decay_time,
 CTL_RO_GEN(stats_arenas_i_muzzy_decay_time, arenas_i(mib[2])->muzzy_decay_time,
     ssize_t)
 CTL_RO_GEN(stats_arenas_i_nthreads, arenas_i(mib[2])->nthreads, unsigned)
+CTL_RO_GEN(stats_arenas_i_uptime,
+    nstime_ns(&arenas_i(mib[2])->astats->astats.uptime), uint64_t)
 CTL_RO_GEN(stats_arenas_i_pactive, arenas_i(mib[2])->pactive, size_t)
 CTL_RO_GEN(stats_arenas_i_pdirty, arenas_i(mib[2])->pdirty, size_t)
 CTL_RO_GEN(stats_arenas_i_pmuzzy, arenas_i(mib[2])->pmuzzy, size_t)
diff --git a/src/stats.c b/src/stats.c
index 34fc37f2..883c7d14 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -421,6 +421,7 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	size_t large_allocated;
 	uint64_t large_nmalloc, large_ndalloc, large_nrequests;
 	size_t tcache_bytes;
+	uint64_t uptime;
 
 	CTL_GET("arenas.page", &page, size_t);
 
@@ -433,6 +434,15 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    "assigned threads: %u\n", nthreads);
 	}
 
+	CTL_M2_GET("stats.arenas.0.uptime", i, &uptime, uint64_t);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"uptime_ns\": %"FMTu64",\n", uptime);
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "uptime: %"FMTu64"\n", uptime);
+	}
+
 	CTL_M2_GET("stats.arenas.0.dss", i, &dss, const char *);
 	if (json) {
 		malloc_cprintf(write_cb, cbopaque,

From 6e62c6286258e340308b4a989b4bd80232fed8e1 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 17 May 2017 10:47:00 -0700
Subject: [PATCH 0868/2608] Refactor *decay_time into *decay_ms.

Support millisecond resolution for decay times.  Among other use cases
this makes it possible to specify a short initial dirty-->muzzy decay
phase, followed by a longer muzzy-->clean decay phase.

This resolves #812.
---
 INSTALL.md                                    |   2 +-
 Makefile.in                                   |   4 +-
 doc/jemalloc.xml.in                           | 118 ++++++------
 include/jemalloc/internal/arena_externs.h     |  28 ++-
 include/jemalloc/internal/arena_structs_b.h   |   4 +-
 include/jemalloc/internal/arena_types.h       |   6 +-
 include/jemalloc/internal/ctl.h               |   4 +-
 .../internal/jemalloc_internal_decls.h        |   3 +
 src/arena.c                                   | 130 ++++++-------
 src/ctl.c                                     |  95 +++++-----
 src/jemalloc.c                                |  12 +-
 src/stats.c                                   |  42 ++---
 test/unit/decay.c                             |  24 +--
 test/unit/decay.sh                            |   2 +-
 test/unit/mallctl.c                           | 172 +++++++++---------
 test/unit/pack.sh                             |   2 +-
 16 files changed, 320 insertions(+), 328 deletions(-)

diff --git a/INSTALL.md b/INSTALL.md
index e0cfc0be..25f625af 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -116,7 +116,7 @@ any of the following arguments (not a definitive list) to 'configure':
     MALLOC_CONF environment variable.  For example, to change the default decay
     time to 30 seconds:
 
-      --with-malloc-conf=decay_time:30
+      --with-malloc-conf=decay_ms:30000
 
 * `--enable-debug`
 
diff --git a/Makefile.in b/Makefile.in
index 370ead58..2f16fbf3 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -479,8 +479,8 @@ ifeq ($(enable_prof), 1)
 	$(MALLOC_CONF)="prof:true,prof_active:false" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%)
 endif
 check_integration_decay: tests_integration check_integration_dir
-	$(MALLOC_CONF)="dirty_decay_time:-1,muzzy_decay_time:-1" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%)
-	$(MALLOC_CONF)="dirty_decay_time:0,muzzy_decay_time:0" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%)
+	$(MALLOC_CONF)="dirty_decay_ms:-1,muzzy_decay_ms:-1" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%)
+	$(MALLOC_CONF)="dirty_decay_ms:0,muzzy_decay_ms:0" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%)
 check_integration: tests_integration check_integration_dir
 	$(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%)
 stress: tests_stress stress_dir
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 1efc9163..be018bfb 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -937,15 +937,15 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         <quote>percpu</quote>. </para></listitem>
       </varlistentry>
 
-      <varlistentry id="opt.dirty_decay_time">
+      <varlistentry id="opt.dirty_decay_ms">
         <term>
-          <mallctl>opt.dirty_decay_time</mallctl>
+          <mallctl>opt.dirty_decay_ms</mallctl>
           (<type>ssize_t</type>)
           <literal>r-</literal>
         </term>
-        <listitem><para>Approximate time in seconds from the creation of a set
-        of unused dirty pages until an equivalent set of unused dirty pages is
-        purged (i.e. converted to muzzy via e.g.
+        <listitem><para>Approximate time in milliseconds from the creation of a
+        set of unused dirty pages until an equivalent set of unused dirty pages
+        is purged (i.e. converted to muzzy via e.g.
         <function>madvise(<parameter>...</parameter><parameter><constant>MADV_FREE</constant></parameter>)</function>
         if supported by the operating system, or converted to clean otherwise)
         and/or reused.  Dirty pages are defined as previously having been
@@ -955,35 +955,35 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         zero purge rate.  A decay time of 0 causes all unused dirty pages to be
         purged immediately upon creation.  A decay time of -1 disables purging.
         The default decay time is 10 seconds.  See <link
-        linkend="arenas.dirty_decay_time"><mallctl>arenas.dirty_decay_time</mallctl></link>
+        linkend="arenas.dirty_decay_ms"><mallctl>arenas.dirty_decay_ms</mallctl></link>
         and <link
-        linkend="arena.i.muzzy_decay_time"><mallctl>arena.&lt;i&gt;.muzzy_decay_time</mallctl></link>
+        linkend="arena.i.muzzy_decay_ms"><mallctl>arena.&lt;i&gt;.muzzy_decay_ms</mallctl></link>
         for related dynamic control options.  See <link
-        linkend="opt.muzzy_decay_time"><mallctl>opt.muzzy_decay_time</mallctl></link>
+        linkend="opt.muzzy_decay_ms"><mallctl>opt.muzzy_decay_ms</mallctl></link>
         for a description of muzzy pages.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="opt.muzzy_decay_time">
+      <varlistentry id="opt.muzzy_decay_ms">
         <term>
-          <mallctl>opt.muzzy_decay_time</mallctl>
+          <mallctl>opt.muzzy_decay_ms</mallctl>
           (<type>ssize_t</type>)
           <literal>r-</literal>
         </term>
-        <listitem><para>Approximate time in seconds from the creation of a set
-        of unused muzzy pages until an equivalent set of unused muzzy pages is
-        purged (i.e. converted to clean) and/or reused.  Muzzy pages are defined
-        as previously having been unused dirty pages that were subsequently
-        purged in a manner that left them subject to the reclamation whims of
-        the operating system (e.g.
+        <listitem><para>Approximate time in milliseconds from the creation of a
+        set of unused muzzy pages until an equivalent set of unused muzzy pages
+        is purged (i.e. converted to clean) and/or reused.  Muzzy pages are
+        defined as previously having been unused dirty pages that were
+        subsequently purged in a manner that left them subject to the
+        reclamation whims of the operating system (e.g.
         <function>madvise(<parameter>...</parameter><parameter><constant>MADV_FREE</constant></parameter>)</function>),
         and therefore in an indeterminate state.  The pages are incrementally
         purged according to a sigmoidal decay curve that starts and ends with
         zero purge rate.  A decay time of 0 causes all unused muzzy pages to be
         purged immediately upon creation.  A decay time of -1 disables purging.
         The default decay time is 10 seconds.  See <link
-        linkend="arenas.muzzy_decay_time"><mallctl>arenas.muzzy_decay_time</mallctl></link>
+        linkend="arenas.muzzy_decay_ms"><mallctl>arenas.muzzy_decay_ms</mallctl></link>
         and <link
-        linkend="arena.i.muzzy_decay_time"><mallctl>arena.&lt;i&gt;.muzzy_decay_time</mallctl></link>
+        linkend="arena.i.muzzy_decay_ms"><mallctl>arena.&lt;i&gt;.muzzy_decay_ms</mallctl></link>
         for related dynamic control options.</para></listitem>
       </varlistentry>
 
@@ -1486,9 +1486,9 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         for arena &lt;i&gt;, or for all arenas if &lt;i&gt; equals
         <constant>MALLCTL_ARENAS_ALL</constant>.  The proportion of unused
         dirty/muzzy pages to be purged depends on the current time; see <link
-        linkend="opt.dirty_decay_time"><mallctl>opt.dirty_decay_time</mallctl></link>
+        linkend="opt.dirty_decay_ms"><mallctl>opt.dirty_decay_ms</mallctl></link>
         and <link
-        linkend="opt.muzzy_decay_time"><mallctl>opt.muzy_decay_time</mallctl></link>
+        linkend="opt.muzzy_decay_ms"><mallctl>opt.muzy_decay_ms</mallctl></link>
         for details.</para></listitem>
       </varlistentry>
 
@@ -1550,35 +1550,35 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         settings.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="arena.i.dirty_decay_time">
+      <varlistentry id="arena.i.dirty_decay_ms">
         <term>
-          <mallctl>arena.&lt;i&gt;.dirty_decay_time</mallctl>
+          <mallctl>arena.&lt;i&gt;.dirty_decay_ms</mallctl>
           (<type>ssize_t</type>)
           <literal>rw</literal>
         </term>
-        <listitem><para>Current per-arena approximate time in seconds from the
-        creation of a set of unused dirty pages until an equivalent set of
+        <listitem><para>Current per-arena approximate time in milliseconds from
+        the creation of a set of unused dirty pages until an equivalent set of
         unused dirty pages is purged and/or reused.  Each time this interface is
         set, all currently unused dirty pages are considered to have fully
         decayed, which causes immediate purging of all unused dirty pages unless
         the decay time is set to -1 (i.e. purging disabled).  See <link
-        linkend="opt.dirty_decay_time"><mallctl>opt.dirty_decay_time</mallctl></link>
+        linkend="opt.dirty_decay_ms"><mallctl>opt.dirty_decay_ms</mallctl></link>
         for additional information.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="arena.i.muzzy_decay_time">
+      <varlistentry id="arena.i.muzzy_decay_ms">
         <term>
-          <mallctl>arena.&lt;i&gt;.muzzy_decay_time</mallctl>
+          <mallctl>arena.&lt;i&gt;.muzzy_decay_ms</mallctl>
           (<type>ssize_t</type>)
           <literal>rw</literal>
         </term>
-        <listitem><para>Current per-arena approximate time in seconds from the
-        creation of a set of unused muzzy pages until an equivalent set of
+        <listitem><para>Current per-arena approximate time in milliseconds from
+        the creation of a set of unused muzzy pages until an equivalent set of
         unused muzzy pages is purged and/or reused.  Each time this interface is
         set, all currently unused muzzy pages are considered to have fully
         decayed, which causes immediate purging of all unused muzzy pages unless
         the decay time is set to -1 (i.e. purging disabled).  See <link
-        linkend="opt.muzzy_decay_time"><mallctl>opt.muzzy_decay_time</mallctl></link>
+        linkend="opt.muzzy_decay_ms"><mallctl>opt.muzzy_decay_ms</mallctl></link>
         for additional information.</para></listitem>
       </varlistentry>
 
@@ -1825,33 +1825,35 @@ struct extent_hooks_s {
         <listitem><para>Current limit on number of arenas.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="arenas.dirty_decay_time">
+      <varlistentry id="arenas.dirty_decay_ms">
         <term>
-          <mallctl>arenas.dirty_decay_time</mallctl>
+          <mallctl>arenas.dirty_decay_ms</mallctl>
           (<type>ssize_t</type>)
           <literal>rw</literal>
         </term>
-        <listitem><para>Current default per-arena approximate time in seconds
-        from the creation of a set of unused dirty pages until an equivalent set
-        of unused dirty pages is purged and/or reused, used to initialize <link
-        linkend="arena.i.dirty_decay_time"><mallctl>arena.&lt;i&gt;.dirty_decay_time</mallctl></link>
+        <listitem><para>Current default per-arena approximate time in
+        milliseconds from the creation of a set of unused dirty pages until an
+        equivalent set of unused dirty pages is purged and/or reused, used to
+        initialize <link
+        linkend="arena.i.dirty_decay_ms"><mallctl>arena.&lt;i&gt;.dirty_decay_ms</mallctl></link>
         during arena creation.  See <link
-        linkend="opt.dirty_decay_time"><mallctl>opt.dirty_decay_time</mallctl></link>
+        linkend="opt.dirty_decay_ms"><mallctl>opt.dirty_decay_ms</mallctl></link>
         for additional information.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="arenas.muzzy_decay_time">
+      <varlistentry id="arenas.muzzy_decay_ms">
         <term>
-          <mallctl>arenas.muzzy_decay_time</mallctl>
+          <mallctl>arenas.muzzy_decay_ms</mallctl>
           (<type>ssize_t</type>)
           <literal>rw</literal>
         </term>
-        <listitem><para>Current default per-arena approximate time in seconds
-        from the creation of a set of unused muzzy pages until an equivalent set
-        of unused muzzy pages is purged and/or reused, used to initialize <link
-        linkend="arena.i.muzzy_decay_time"><mallctl>arena.&lt;i&gt;.muzzy_decay_time</mallctl></link>
+        <listitem><para>Current default per-arena approximate time in
+        milliseconds from the creation of a set of unused muzzy pages until an
+        equivalent set of unused muzzy pages is purged and/or reused, used to
+        initialize <link
+        linkend="arena.i.muzzy_decay_ms"><mallctl>arena.&lt;i&gt;.muzzy_decay_ms</mallctl></link>
         during arena creation.  See <link
-        linkend="opt.muzzy_decay_time"><mallctl>opt.muzzy_decay_time</mallctl></link>
+        linkend="opt.muzzy_decay_ms"><mallctl>opt.muzzy_decay_ms</mallctl></link>
         for additional information.</para></listitem>
       </varlistentry>
 
@@ -2244,29 +2246,29 @@ struct extent_hooks_s {
         </para></listitem>
       </varlistentry>
 
-      <varlistentry id="stats.arenas.i.dirty_decay_time">
+      <varlistentry id="stats.arenas.i.dirty_decay_ms">
         <term>
-          <mallctl>stats.arenas.&lt;i&gt;.dirty_decay_time</mallctl>
+          <mallctl>stats.arenas.&lt;i&gt;.dirty_decay_ms</mallctl>
           (<type>ssize_t</type>)
           <literal>r-</literal>
         </term>
-        <listitem><para>Approximate time in seconds from the creation of a set
-        of unused dirty pages until an equivalent set of unused dirty pages is
-        purged and/or reused.  See <link
-        linkend="opt.dirty_decay_time"><mallctl>opt.dirty_decay_time</mallctl></link>
+        <listitem><para>Approximate time in milliseconds from the creation of a
+        set of unused dirty pages until an equivalent set of unused dirty pages
+        is purged and/or reused.  See <link
+        linkend="opt.dirty_decay_ms"><mallctl>opt.dirty_decay_ms</mallctl></link>
         for details.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="stats.arenas.i.muzzy_decay_time">
+      <varlistentry id="stats.arenas.i.muzzy_decay_ms">
         <term>
-          <mallctl>stats.arenas.&lt;i&gt;.muzzy_decay_time</mallctl>
+          <mallctl>stats.arenas.&lt;i&gt;.muzzy_decay_ms</mallctl>
           (<type>ssize_t</type>)
           <literal>r-</literal>
         </term>
-        <listitem><para>Approximate time in seconds from the creation of a set
-        of unused muzzy pages until an equivalent set of unused muzzy pages is
-        purged and/or reused.  See <link
-        linkend="opt.muzzy_decay_time"><mallctl>opt.muzzy_decay_time</mallctl></link>
+        <listitem><para>Approximate time in milliseconds from the creation of a
+        set of unused muzzy pages until an equivalent set of unused muzzy pages
+        is purged and/or reused.  See <link
+        linkend="opt.muzzy_decay_ms"><mallctl>opt.muzzy_decay_ms</mallctl></link>
         for details.</para></listitem>
       </varlistentry>
 
@@ -2310,7 +2312,7 @@ struct extent_hooks_s {
         <listitem><para>Number of pages within unused extents that are
         potentially dirty, and for which <function>madvise()</function> or
         similar has not been called.  See <link
-        linkend="opt.dirty_decay_time"><mallctl>opt.dirty_decay_time</mallctl></link>
+        linkend="opt.dirty_decay_ms"><mallctl>opt.dirty_decay_ms</mallctl></link>
         for a description of dirty pages.</para></listitem>
       </varlistentry>
 
@@ -2322,7 +2324,7 @@ struct extent_hooks_s {
         </term>
         <listitem><para>Number of pages within unused extents that are muzzy.
         See <link
-        linkend="opt.muzzy_decay_time"><mallctl>opt.muzzy_decay_time</mallctl></link>
+        linkend="opt.muzzy_decay_ms"><mallctl>opt.muzzy_decay_ms</mallctl></link>
         for a description of muzzy pages.</para></listitem>
       </varlistentry>
 
diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 410709c6..292b8d6d 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -13,8 +13,8 @@ static const size_t	large_pad =
 #endif
     ;
 
-extern ssize_t		opt_dirty_decay_time;
-extern ssize_t		opt_muzzy_decay_time;
+extern ssize_t		opt_dirty_decay_ms;
+extern ssize_t		opt_muzzy_decay_ms;
 
 extern const arena_bin_info_t	arena_bin_info[NBINS];
 
@@ -27,10 +27,10 @@ void arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
 void arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
     size_t size);
 void	arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
-    unsigned *nthreads, const char **dss, ssize_t *dirty_decay_time,
-    ssize_t *muzzy_decay_time, size_t *nactive, size_t *ndirty, size_t *nmuzzy);
+    unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms,
+    ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy);
 void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
-    const char **dss, ssize_t *dirty_decay_time, ssize_t *muzzy_decay_time,
+    const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
     malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats);
 void	arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
@@ -46,12 +46,10 @@ void	arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena,
     extent_t *extent, size_t oldsize);
 void	arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena,
     extent_t *extent, size_t oldsize);
-ssize_t arena_dirty_decay_time_get(arena_t *arena);
-bool arena_dirty_decay_time_set(tsdn_t *tsdn, arena_t *arena,
-    ssize_t decay_time);
-ssize_t arena_muzzy_decay_time_get(arena_t *arena);
-bool arena_muzzy_decay_time_set(tsdn_t *tsdn, arena_t *arena,
-    ssize_t decay_time);
+ssize_t arena_dirty_decay_ms_get(arena_t *arena);
+bool arena_dirty_decay_ms_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_ms);
+ssize_t arena_muzzy_decay_ms_get(arena_t *arena);
+bool arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_ms);
 void arena_decay(tsdn_t *tsdn, arena_t *arena, bool all);
 void	arena_reset(tsd_t *tsd, arena_t *arena);
 void	arena_destroy(tsd_t *tsd, arena_t *arena);
@@ -79,10 +77,10 @@ void *arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
     size_t size, size_t alignment, bool zero, tcache_t *tcache);
 dss_prec_t	arena_dss_prec_get(arena_t *arena);
 bool	arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
-ssize_t arena_dirty_decay_time_default_get(void);
-bool arena_dirty_decay_time_default_set(ssize_t decay_time);
-ssize_t arena_muzzy_decay_time_default_get(void);
-bool arena_muzzy_decay_time_default_set(ssize_t decay_time);
+ssize_t arena_dirty_decay_ms_default_get(void);
+bool arena_dirty_decay_ms_default_set(ssize_t decay_ms);
+ssize_t arena_muzzy_decay_ms_default_get(void);
+bool arena_muzzy_decay_ms_default_set(ssize_t decay_ms);
 unsigned	arena_nthreads_get(arena_t *arena, bool internal);
 void	arena_nthreads_inc(arena_t *arena, bool internal);
 void	arena_nthreads_dec(arena_t *arena, bool internal);
diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index 99e5f6a0..459dd89d 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -56,11 +56,11 @@ struct arena_decay_s {
 	 */
 	bool			purging;
 	/*
-	 * Approximate time in seconds from the creation of a set of unused
+	 * Approximate time in milliseconds from the creation of a set of unused
 	 * dirty pages until an equivalent set of unused dirty pages is purged
 	 * and/or reused.
 	 */
-	atomic_zd_t		time;
+	atomic_zd_t		time_ms;
 	/* time / SMOOTHSTEP_NSTEPS. */
 	nstime_t		interval;
 	/*
diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h
index e243aabf..34d4f6f2 100644
--- a/include/jemalloc/internal/arena_types.h
+++ b/include/jemalloc/internal/arena_types.h
@@ -7,9 +7,9 @@
 #define LG_SLAB_MAXREGS		(LG_PAGE - LG_TINY_MIN)
 #define SLAB_MAXREGS		(1U << LG_SLAB_MAXREGS)
 
-/* Default decay times in seconds. */
-#define DIRTY_DECAY_TIME_DEFAULT	10
-#define MUZZY_DECAY_TIME_DEFAULT	10
+/* Default decay times in milliseconds. */
+#define DIRTY_DECAY_MS_DEFAULT	ZD(10 * 1000)
+#define MUZZY_DECAY_MS_DEFAULT	ZD(10 * 1000)
 /* Number of event ticks between time checks. */
 #define DECAY_NTICKS_PER_UPDATE	1000
 
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index 23c95510..60b3979f 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -64,8 +64,8 @@ struct ctl_arena_s {
 	/* Basic stats, supported even if !config_stats. */
 	unsigned nthreads;
 	const char *dss;
-	ssize_t dirty_decay_time;
-	ssize_t muzzy_decay_time;
+	ssize_t dirty_decay_ms;
+	ssize_t muzzy_decay_ms;
 	size_t pactive;
 	size_t pdirty;
 	size_t pmuzzy;
diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
index d75de0b9..1efdb56b 100644
--- a/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -41,6 +41,9 @@
 #ifndef SIZE_T_MAX
 #  define SIZE_T_MAX	SIZE_MAX
 #endif
+#ifndef SSIZE_MAX
+#  define SSIZE_MAX	((ssize_t)(SIZE_T_MAX >> 1))
+#endif
 #include <stdarg.h>
 #include <stdbool.h>
 #include <stdio.h>
diff --git a/src/arena.c b/src/arena.c
index 03680e00..42bfc6b1 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -18,11 +18,11 @@ const char	*percpu_arena_mode_names[] = {
 const char	*opt_percpu_arena = OPT_PERCPU_ARENA_DEFAULT;
 percpu_arena_mode_t	percpu_arena_mode = PERCPU_ARENA_MODE_DEFAULT;
 
-ssize_t opt_dirty_decay_time = DIRTY_DECAY_TIME_DEFAULT;
-ssize_t opt_muzzy_decay_time = MUZZY_DECAY_TIME_DEFAULT;
+ssize_t opt_dirty_decay_ms = DIRTY_DECAY_MS_DEFAULT;
+ssize_t opt_muzzy_decay_ms = MUZZY_DECAY_MS_DEFAULT;
 
-static atomic_zd_t dirty_decay_time_default;
-static atomic_zd_t muzzy_decay_time_default;
+static atomic_zd_t dirty_decay_ms_default;
+static atomic_zd_t muzzy_decay_ms_default;
 
 const arena_bin_info_t	arena_bin_info[NBINS] = {
 #define BIN_INFO_bin_yes(reg_size, slab_size, nregs)			\
@@ -196,13 +196,12 @@ arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats, size_t size) {
 
 void
 arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
-    const char **dss, ssize_t *dirty_decay_time, ssize_t *muzzy_decay_time,
-    size_t *nactive, size_t *ndirty,
-    size_t *nmuzzy) {
+    const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
+    size_t *nactive, size_t *ndirty, size_t *nmuzzy) {
 	*nthreads += arena_nthreads_get(arena, false);
 	*dss = dss_prec_names[arena_dss_prec_get(arena)];
-	*dirty_decay_time = arena_dirty_decay_time_get(arena);
-	*muzzy_decay_time = arena_muzzy_decay_time_get(arena);
+	*dirty_decay_ms = arena_dirty_decay_ms_get(arena);
+	*muzzy_decay_ms = arena_muzzy_decay_ms_get(arena);
 	*nactive += atomic_load_zu(&arena->nactive, ATOMIC_RELAXED);
 	*ndirty += extents_npages_get(&arena->extents_dirty);
 	*nmuzzy += extents_npages_get(&arena->extents_muzzy);
@@ -210,13 +209,13 @@ arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 
 void
 arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
-    const char **dss, ssize_t *dirty_decay_time, ssize_t *muzzy_decay_time,
+    const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
     malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats) {
 	cassert(config_stats);
 
-	arena_basic_stats_merge(tsdn, arena, nthreads, dss, dirty_decay_time,
-	    muzzy_decay_time, nactive, ndirty, nmuzzy);
+	arena_basic_stats_merge(tsdn, arena, nthreads, dss, dirty_decay_ms,
+	    muzzy_decay_ms, nactive, ndirty, nmuzzy);
 
 	size_t base_allocated, base_resident, base_mapped;
 	base_stats_get(tsdn, arena->base, &base_allocated, &base_resident,
@@ -359,7 +358,7 @@ arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
 
 	extents_dalloc(tsdn, arena, r_extent_hooks, &arena->extents_dirty,
 	    extent);
-	if (arena_dirty_decay_time_get(arena) == 0) {
+	if (arena_dirty_decay_ms_get(arena) == 0) {
 		arena_decay_dirty(tsdn, arena, true);
 	}
 }
@@ -574,13 +573,13 @@ arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 }
 
 static ssize_t
-arena_decay_time_read(arena_decay_t *decay) {
-	return atomic_load_zd(&decay->time, ATOMIC_RELAXED);
+arena_decay_ms_read(arena_decay_t *decay) {
+	return atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
 }
 
 static void
-arena_decay_time_write(arena_decay_t *decay, ssize_t decay_time) {
-	atomic_store_zd(&decay->time, decay_time, ATOMIC_RELAXED);
+arena_decay_ms_write(arena_decay_t *decay, ssize_t decay_ms) {
+	atomic_store_zd(&decay->time_ms, decay_ms, ATOMIC_RELAXED);
 }
 
 static void
@@ -591,7 +590,7 @@ arena_decay_deadline_init(arena_decay_t *decay) {
 	 */
 	nstime_copy(&decay->deadline, &decay->epoch);
 	nstime_add(&decay->deadline, &decay->interval);
-	if (arena_decay_time_read(decay) > 0) {
+	if (arena_decay_ms_read(decay) > 0) {
 		nstime_t jitter;
 
 		nstime_init(&jitter, prng_range_u64(&decay->jitter_state,
@@ -711,11 +710,11 @@ arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 }
 
 static void
-arena_decay_reinit(arena_decay_t *decay, extents_t *extents,
-    ssize_t decay_time) {
-	arena_decay_time_write(decay, decay_time);
-	if (decay_time > 0) {
-		nstime_init2(&decay->interval, decay_time, 0);
+arena_decay_reinit(arena_decay_t *decay, extents_t *extents, ssize_t decay_ms) {
+	arena_decay_ms_write(decay, decay_ms);
+	if (decay_ms > 0) {
+		nstime_init(&decay->interval, (uint64_t)decay_ms *
+		    KQU(1000000));
 		nstime_idivide(&decay->interval, SMOOTHSTEP_NSTEPS);
 	}
 
@@ -728,7 +727,7 @@ arena_decay_reinit(arena_decay_t *decay, extents_t *extents,
 }
 
 static bool
-arena_decay_init(arena_decay_t *decay, extents_t *extents, ssize_t decay_time,
+arena_decay_init(arena_decay_t *decay, extents_t *extents, ssize_t decay_ms,
     decay_stats_t *stats) {
 	if (config_debug) {
 		for (size_t i = 0; i < sizeof(arena_decay_t); i++) {
@@ -739,7 +738,7 @@ arena_decay_init(arena_decay_t *decay, extents_t *extents, ssize_t decay_time,
 		return true;
 	}
 	decay->purging = false;
-	arena_decay_reinit(decay, extents, decay_time);
+	arena_decay_reinit(decay, extents, decay_ms);
 	/* Memory is zeroed, so there is no need to clear stats. */
 	if (config_stats) {
 		decay->stats = stats;
@@ -748,11 +747,12 @@ arena_decay_init(arena_decay_t *decay, extents_t *extents, ssize_t decay_time,
 }
 
 static bool
-arena_decay_time_valid(ssize_t decay_time) {
-	if (decay_time < -1) {
+arena_decay_ms_valid(ssize_t decay_ms) {
+	if (decay_ms < -1) {
 		return false;
 	}
-	if (decay_time == -1 || (uint64_t)decay_time <= NSTIME_SEC_MAX) {
+	if (decay_ms == -1 || (uint64_t)decay_ms <= NSTIME_SEC_MAX *
+	    KQU(1000)) {
 		return true;
 	}
 	return false;
@@ -764,9 +764,9 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	malloc_mutex_assert_owner(tsdn, &decay->mtx);
 
 	/* Purge all or nothing if the option is disabled. */
-	ssize_t decay_time = arena_decay_time_read(decay);
-	if (decay_time <= 0) {
-		if (decay_time == 0) {
+	ssize_t decay_ms = arena_decay_ms_read(decay);
+	if (decay_ms <= 0) {
+		if (decay_ms == 0) {
 			arena_decay_to_limit(tsdn, arena, decay, extents, false,
 			    0);
 		}
@@ -806,24 +806,24 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 }
 
 static ssize_t
-arena_decay_time_get(arena_decay_t *decay) {
-	return arena_decay_time_read(decay);
+arena_decay_ms_get(arena_decay_t *decay) {
+	return arena_decay_ms_read(decay);
 }
 
 ssize_t
-arena_dirty_decay_time_get(arena_t *arena) {
-	return arena_decay_time_get(&arena->decay_dirty);
+arena_dirty_decay_ms_get(arena_t *arena) {
+	return arena_decay_ms_get(&arena->decay_dirty);
 }
 
 ssize_t
-arena_muzzy_decay_time_get(arena_t *arena) {
-	return arena_decay_time_get(&arena->decay_muzzy);
+arena_muzzy_decay_ms_get(arena_t *arena) {
+	return arena_decay_ms_get(&arena->decay_muzzy);
 }
 
 static bool
-arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    extents_t *extents, ssize_t decay_time) {
-	if (!arena_decay_time_valid(decay_time)) {
+arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
+    extents_t *extents, ssize_t decay_ms) {
+	if (!arena_decay_ms_valid(decay_ms)) {
 		return true;
 	}
 
@@ -832,11 +832,11 @@ arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	 * Restart decay backlog from scratch, which may cause many dirty pages
 	 * to be immediately purged.  It would conceptually be possible to map
 	 * the old backlog onto the new backlog, but there is no justification
-	 * for such complexity since decay_time changes are intended to be
+	 * for such complexity since decay_ms changes are intended to be
 	 * infrequent, either between the {-1, 0, >0} states, or a one-time
 	 * arbitrary change during initial arena configuration.
 	 */
-	arena_decay_reinit(decay, extents, decay_time);
+	arena_decay_reinit(decay, extents, decay_ms);
 	arena_maybe_decay(tsdn, arena, decay, extents);
 	malloc_mutex_unlock(tsdn, &decay->mtx);
 
@@ -844,15 +844,17 @@ arena_decay_time_set(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 }
 
 bool
-arena_dirty_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time) {
-	return arena_decay_time_set(tsdn, arena, &arena->decay_dirty,
-	    &arena->extents_dirty, decay_time);
+arena_dirty_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
+    ssize_t decay_ms) {
+	return arena_decay_ms_set(tsdn, arena, &arena->decay_dirty,
+	    &arena->extents_dirty, decay_ms);
 }
 
 bool
-arena_muzzy_decay_time_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_time) {
-	return arena_decay_time_set(tsdn, arena, &arena->decay_muzzy,
-	    &arena->extents_muzzy, decay_time);
+arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
+    ssize_t decay_ms) {
+	return arena_decay_ms_set(tsdn, arena, &arena->decay_muzzy,
+	    &arena->extents_muzzy, decay_ms);
 }
 
 static size_t
@@ -885,7 +887,7 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
 	}
 	npurged = 0;
 
-	ssize_t muzzy_decay_time = arena_muzzy_decay_time_get(arena);
+	ssize_t muzzy_decay_ms = arena_muzzy_decay_ms_get(arena);
 	for (extent_t *extent = extent_list_first(decay_extents); extent !=
 	    NULL; extent = extent_list_first(decay_extents)) {
 		if (config_stats) {
@@ -898,7 +900,7 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
 		case extent_state_active:
 			not_reached();
 		case extent_state_dirty:
-			if (!all && muzzy_decay_time != 0 &&
+			if (!all && muzzy_decay_ms != 0 &&
 			    !extent_purge_lazy_wrapper(tsdn, arena,
 			    r_extent_hooks, extent, 0,
 			    extent_size_get(extent))) {
@@ -1789,30 +1791,30 @@ arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec) {
 }
 
 ssize_t
-arena_dirty_decay_time_default_get(void) {
-	return atomic_load_zd(&dirty_decay_time_default, ATOMIC_RELAXED);
+arena_dirty_decay_ms_default_get(void) {
+	return atomic_load_zd(&dirty_decay_ms_default, ATOMIC_RELAXED);
 }
 
 bool
-arena_dirty_decay_time_default_set(ssize_t decay_time) {
-	if (!arena_decay_time_valid(decay_time)) {
+arena_dirty_decay_ms_default_set(ssize_t decay_ms) {
+	if (!arena_decay_ms_valid(decay_ms)) {
 		return true;
 	}
-	atomic_store_zd(&dirty_decay_time_default, decay_time, ATOMIC_RELAXED);
+	atomic_store_zd(&dirty_decay_ms_default, decay_ms, ATOMIC_RELAXED);
 	return false;
 }
 
 ssize_t
-arena_muzzy_decay_time_default_get(void) {
-	return atomic_load_zd(&muzzy_decay_time_default, ATOMIC_RELAXED);
+arena_muzzy_decay_ms_default_get(void) {
+	return atomic_load_zd(&muzzy_decay_ms_default, ATOMIC_RELAXED);
 }
 
 bool
-arena_muzzy_decay_time_default_set(ssize_t decay_time) {
-	if (!arena_decay_time_valid(decay_time)) {
+arena_muzzy_decay_ms_default_set(ssize_t decay_ms) {
+	if (!arena_decay_ms_valid(decay_ms)) {
 		return true;
 	}
-	atomic_store_zd(&muzzy_decay_time_default, decay_time, ATOMIC_RELAXED);
+	atomic_store_zd(&muzzy_decay_ms_default, decay_ms, ATOMIC_RELAXED);
 	return false;
 }
 
@@ -1933,11 +1935,11 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	}
 
 	if (arena_decay_init(&arena->decay_dirty, &arena->extents_dirty,
-	    arena_dirty_decay_time_default_get(), &arena->stats.decay_dirty)) {
+	    arena_dirty_decay_ms_default_get(), &arena->stats.decay_dirty)) {
 		goto label_error;
 	}
 	if (arena_decay_init(&arena->decay_muzzy, &arena->extents_muzzy,
-	    arena_muzzy_decay_time_default_get(), &arena->stats.decay_muzzy)) {
+	    arena_muzzy_decay_ms_default_get(), &arena->stats.decay_muzzy)) {
 		goto label_error;
 	}
 
@@ -1993,8 +1995,8 @@ label_error:
 
 void
 arena_boot(void) {
-	arena_dirty_decay_time_default_set(opt_dirty_decay_time);
-	arena_muzzy_decay_time_default_set(opt_muzzy_decay_time);
+	arena_dirty_decay_ms_default_set(opt_dirty_decay_ms);
+	arena_muzzy_decay_ms_default_set(opt_muzzy_decay_ms);
 }
 
 void
diff --git a/src/ctl.c b/src/ctl.c
index 79f2447f..296e74f2 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -78,8 +78,8 @@ CTL_PROTO(opt_retain)
 CTL_PROTO(opt_dss)
 CTL_PROTO(opt_narenas)
 CTL_PROTO(opt_percpu_arena)
-CTL_PROTO(opt_dirty_decay_time)
-CTL_PROTO(opt_muzzy_decay_time)
+CTL_PROTO(opt_dirty_decay_ms)
+CTL_PROTO(opt_muzzy_decay_ms)
 CTL_PROTO(opt_stats_print)
 CTL_PROTO(opt_junk)
 CTL_PROTO(opt_zero)
@@ -106,8 +106,8 @@ CTL_PROTO(arena_i_purge)
 CTL_PROTO(arena_i_reset)
 CTL_PROTO(arena_i_destroy)
 CTL_PROTO(arena_i_dss)
-CTL_PROTO(arena_i_dirty_decay_time)
-CTL_PROTO(arena_i_muzzy_decay_time)
+CTL_PROTO(arena_i_dirty_decay_ms)
+CTL_PROTO(arena_i_muzzy_decay_ms)
 CTL_PROTO(arena_i_extent_hooks)
 INDEX_PROTO(arena_i)
 CTL_PROTO(arenas_bin_i_size)
@@ -117,8 +117,8 @@ INDEX_PROTO(arenas_bin_i)
 CTL_PROTO(arenas_lextent_i_size)
 INDEX_PROTO(arenas_lextent_i)
 CTL_PROTO(arenas_narenas)
-CTL_PROTO(arenas_dirty_decay_time)
-CTL_PROTO(arenas_muzzy_decay_time)
+CTL_PROTO(arenas_dirty_decay_ms)
+CTL_PROTO(arenas_muzzy_decay_ms)
 CTL_PROTO(arenas_quantum)
 CTL_PROTO(arenas_page)
 CTL_PROTO(arenas_tcache_max)
@@ -159,8 +159,8 @@ INDEX_PROTO(stats_arenas_i_lextents_j)
 CTL_PROTO(stats_arenas_i_nthreads)
 CTL_PROTO(stats_arenas_i_uptime)
 CTL_PROTO(stats_arenas_i_dss)
-CTL_PROTO(stats_arenas_i_dirty_decay_time)
-CTL_PROTO(stats_arenas_i_muzzy_decay_time)
+CTL_PROTO(stats_arenas_i_dirty_decay_ms)
+CTL_PROTO(stats_arenas_i_muzzy_decay_ms)
 CTL_PROTO(stats_arenas_i_pactive)
 CTL_PROTO(stats_arenas_i_pdirty)
 CTL_PROTO(stats_arenas_i_pmuzzy)
@@ -265,8 +265,8 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("dss"),		CTL(opt_dss)},
 	{NAME("narenas"),	CTL(opt_narenas)},
 	{NAME("percpu_arena"),	CTL(opt_percpu_arena)},
-	{NAME("dirty_decay_time"), CTL(opt_dirty_decay_time)},
-	{NAME("muzzy_decay_time"), CTL(opt_muzzy_decay_time)},
+	{NAME("dirty_decay_ms"), CTL(opt_dirty_decay_ms)},
+	{NAME("muzzy_decay_ms"), CTL(opt_muzzy_decay_ms)},
 	{NAME("stats_print"),	CTL(opt_stats_print)},
 	{NAME("junk"),		CTL(opt_junk)},
 	{NAME("zero"),		CTL(opt_zero)},
@@ -299,8 +299,8 @@ static const ctl_named_node_t arena_i_node[] = {
 	{NAME("reset"),		CTL(arena_i_reset)},
 	{NAME("destroy"),	CTL(arena_i_destroy)},
 	{NAME("dss"),		CTL(arena_i_dss)},
-	{NAME("dirty_decay_time"), CTL(arena_i_dirty_decay_time)},
-	{NAME("muzzy_decay_time"), CTL(arena_i_muzzy_decay_time)},
+	{NAME("dirty_decay_ms"), CTL(arena_i_dirty_decay_ms)},
+	{NAME("muzzy_decay_ms"), CTL(arena_i_muzzy_decay_ms)},
 	{NAME("extent_hooks"),	CTL(arena_i_extent_hooks)}
 };
 static const ctl_named_node_t super_arena_i_node[] = {
@@ -337,8 +337,8 @@ static const ctl_indexed_node_t arenas_lextent_node[] = {
 
 static const ctl_named_node_t arenas_node[] = {
 	{NAME("narenas"),	CTL(arenas_narenas)},
-	{NAME("dirty_decay_time"), CTL(arenas_dirty_decay_time)},
-	{NAME("muzzy_decay_time"), CTL(arenas_muzzy_decay_time)},
+	{NAME("dirty_decay_ms"), CTL(arenas_dirty_decay_ms)},
+	{NAME("muzzy_decay_ms"), CTL(arenas_muzzy_decay_ms)},
 	{NAME("quantum"),	CTL(arenas_quantum)},
 	{NAME("page"),		CTL(arenas_page)},
 	{NAME("tcache_max"),	CTL(arenas_tcache_max)},
@@ -444,8 +444,8 @@ static const ctl_named_node_t stats_arenas_i_node[] = {
 	{NAME("nthreads"),	CTL(stats_arenas_i_nthreads)},
 	{NAME("uptime"),	CTL(stats_arenas_i_uptime)},
 	{NAME("dss"),		CTL(stats_arenas_i_dss)},
-	{NAME("dirty_decay_time"), CTL(stats_arenas_i_dirty_decay_time)},
-	{NAME("muzzy_decay_time"), CTL(stats_arenas_i_muzzy_decay_time)},
+	{NAME("dirty_decay_ms"), CTL(stats_arenas_i_dirty_decay_ms)},
+	{NAME("muzzy_decay_ms"), CTL(stats_arenas_i_muzzy_decay_ms)},
 	{NAME("pactive"),	CTL(stats_arenas_i_pactive)},
 	{NAME("pdirty"),	CTL(stats_arenas_i_pdirty)},
 	{NAME("pmuzzy"),	CTL(stats_arenas_i_pmuzzy)},
@@ -644,8 +644,8 @@ static void
 ctl_arena_clear(ctl_arena_t *ctl_arena) {
 	ctl_arena->nthreads = 0;
 	ctl_arena->dss = dss_prec_names[dss_prec_limit];
-	ctl_arena->dirty_decay_time = -1;
-	ctl_arena->muzzy_decay_time = -1;
+	ctl_arena->dirty_decay_ms = -1;
+	ctl_arena->muzzy_decay_ms = -1;
 	ctl_arena->pactive = 0;
 	ctl_arena->pdirty = 0;
 	ctl_arena->pmuzzy = 0;
@@ -668,8 +668,8 @@ ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena) {
 
 	if (config_stats) {
 		arena_stats_merge(tsdn, arena, &ctl_arena->nthreads,
-		    &ctl_arena->dss, &ctl_arena->dirty_decay_time,
-		    &ctl_arena->muzzy_decay_time, &ctl_arena->pactive,
+		    &ctl_arena->dss, &ctl_arena->dirty_decay_ms,
+		    &ctl_arena->muzzy_decay_ms, &ctl_arena->pactive,
 		    &ctl_arena->pdirty, &ctl_arena->pmuzzy,
 		    &ctl_arena->astats->astats, ctl_arena->astats->bstats,
 		    ctl_arena->astats->lstats);
@@ -687,8 +687,8 @@ ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena) {
 		}
 	} else {
 		arena_basic_stats_merge(tsdn, arena, &ctl_arena->nthreads,
-		    &ctl_arena->dss, &ctl_arena->dirty_decay_time,
-		    &ctl_arena->muzzy_decay_time, &ctl_arena->pactive,
+		    &ctl_arena->dss, &ctl_arena->dirty_decay_ms,
+		    &ctl_arena->muzzy_decay_ms, &ctl_arena->pactive,
 		    &ctl_arena->pdirty, &ctl_arena->pmuzzy);
 	}
 }
@@ -1465,8 +1465,8 @@ CTL_RO_NL_GEN(opt_retain, opt_retain, bool)
 CTL_RO_NL_GEN(opt_dss, opt_dss, const char *)
 CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned)
 CTL_RO_NL_GEN(opt_percpu_arena, opt_percpu_arena, const char *)
-CTL_RO_NL_GEN(opt_dirty_decay_time, opt_dirty_decay_time, ssize_t)
-CTL_RO_NL_GEN(opt_muzzy_decay_time, opt_muzzy_decay_time, ssize_t)
+CTL_RO_NL_GEN(opt_dirty_decay_ms, opt_dirty_decay_ms, ssize_t)
+CTL_RO_NL_GEN(opt_muzzy_decay_ms, opt_muzzy_decay_ms, ssize_t)
 CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool)
 CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, const char *)
 CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool)
@@ -1955,7 +1955,7 @@ label_return:
 }
 
 static int
-arena_i_decay_time_ctl_impl(tsd_t *tsd, const size_t *mib, size_t miblen,
+arena_i_decay_ms_ctl_impl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen, bool dirty) {
 	int ret;
 	unsigned arena_ind;
@@ -1969,8 +1969,8 @@ arena_i_decay_time_ctl_impl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	}
 
 	if (oldp != NULL && oldlenp != NULL) {
-		size_t oldval = dirty ? arena_dirty_decay_time_get(arena) :
-		    arena_muzzy_decay_time_get(arena);
+		size_t oldval = dirty ? arena_dirty_decay_ms_get(arena) :
+		    arena_muzzy_decay_ms_get(arena);
 		READ(oldval, ssize_t);
 	}
 	if (newp != NULL) {
@@ -1978,10 +1978,9 @@ arena_i_decay_time_ctl_impl(tsd_t *tsd, const size_t *mib, size_t miblen,
 			ret = EINVAL;
 			goto label_return;
 		}
-		if (dirty ? arena_dirty_decay_time_set(tsd_tsdn(tsd), arena,
-		    *(ssize_t *)newp) :
-		    arena_muzzy_decay_time_set(tsd_tsdn(tsd), arena,
-		    *(ssize_t *)newp)) {
+		if (dirty ? arena_dirty_decay_ms_set(tsd_tsdn(tsd), arena,
+		    *(ssize_t *)newp) : arena_muzzy_decay_ms_set(tsd_tsdn(tsd),
+		    arena, *(ssize_t *)newp)) {
 			ret = EFAULT;
 			goto label_return;
 		}
@@ -1993,17 +1992,17 @@ label_return:
 }
 
 static int
-arena_i_dirty_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+arena_i_dirty_decay_ms_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	return arena_i_decay_time_ctl_impl(tsd, mib, miblen, oldp, oldlenp,
-	    newp, newlen, true);
+	return arena_i_decay_ms_ctl_impl(tsd, mib, miblen, oldp, oldlenp, newp,
+	    newlen, true);
 }
 
 static int
-arena_i_muzzy_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+arena_i_muzzy_decay_ms_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	return arena_i_decay_time_ctl_impl(tsd, mib, miblen, oldp, oldlenp,
-	    newp, newlen, false);
+	return arena_i_decay_ms_ctl_impl(tsd, mib, miblen, oldp, oldlenp, newp,
+	    newlen, false);
 }
 
 static int
@@ -2087,13 +2086,13 @@ label_return:
 }
 
 static int
-arenas_decay_time_ctl_impl(tsd_t *tsd, const size_t *mib, size_t miblen,
+arenas_decay_ms_ctl_impl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen, bool dirty) {
 	int ret;
 
 	if (oldp != NULL && oldlenp != NULL) {
-		size_t oldval = (dirty ? arena_dirty_decay_time_default_get() :
-		    arena_muzzy_decay_time_default_get());
+		size_t oldval = (dirty ? arena_dirty_decay_ms_default_get() :
+		    arena_muzzy_decay_ms_default_get());
 		READ(oldval, ssize_t);
 	}
 	if (newp != NULL) {
@@ -2101,8 +2100,8 @@ arenas_decay_time_ctl_impl(tsd_t *tsd, const size_t *mib, size_t miblen,
 			ret = EINVAL;
 			goto label_return;
 		}
-		if (dirty ? arena_dirty_decay_time_default_set(*(ssize_t *)newp)
-		    : arena_muzzy_decay_time_default_set(*(ssize_t *)newp)) {
+		if (dirty ?  arena_dirty_decay_ms_default_set(*(ssize_t *)newp)
+		    : arena_muzzy_decay_ms_default_set(*(ssize_t *)newp)) {
 			ret = EFAULT;
 			goto label_return;
 		}
@@ -2114,16 +2113,16 @@ label_return:
 }
 
 static int
-arenas_dirty_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+arenas_dirty_decay_ms_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	return arenas_decay_time_ctl_impl(tsd, mib, miblen, oldp, oldlenp, newp,
+	return arenas_decay_ms_ctl_impl(tsd, mib, miblen, oldp, oldlenp, newp,
 	    newlen, true);
 }
 
 static int
-arenas_muzzy_decay_time_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+arenas_muzzy_decay_ms_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	return arenas_decay_time_ctl_impl(tsd, mib, miblen, oldp, oldlenp, newp,
+	return arenas_decay_ms_ctl_impl(tsd, mib, miblen, oldp, oldlenp, newp,
 	    newlen, false);
 }
 
@@ -2318,9 +2317,9 @@ CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats->mapped, size_t)
 CTL_RO_CGEN(config_stats, stats_retained, ctl_stats->retained, size_t)
 
 CTL_RO_GEN(stats_arenas_i_dss, arenas_i(mib[2])->dss, const char *)
-CTL_RO_GEN(stats_arenas_i_dirty_decay_time, arenas_i(mib[2])->dirty_decay_time,
+CTL_RO_GEN(stats_arenas_i_dirty_decay_ms, arenas_i(mib[2])->dirty_decay_ms,
     ssize_t)
-CTL_RO_GEN(stats_arenas_i_muzzy_decay_time, arenas_i(mib[2])->muzzy_decay_time,
+CTL_RO_GEN(stats_arenas_i_muzzy_decay_ms, arenas_i(mib[2])->muzzy_decay_ms,
     ssize_t)
 CTL_RO_GEN(stats_arenas_i_nthreads, arenas_i(mib[2])->nthreads, unsigned)
 CTL_RO_GEN(stats_arenas_i_uptime,
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 13218449..47133edf 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1070,10 +1070,14 @@ malloc_conf_init(void) {
 			}
 			CONF_HANDLE_UNSIGNED(opt_narenas, "narenas", 1,
 			    UINT_MAX, yes, no, false)
-			CONF_HANDLE_SSIZE_T(opt_dirty_decay_time,
-			    "dirty_decay_time", -1, NSTIME_SEC_MAX);
-			CONF_HANDLE_SSIZE_T(opt_muzzy_decay_time,
-			    "muzzy_decay_time", -1, NSTIME_SEC_MAX);
+			CONF_HANDLE_SSIZE_T(opt_dirty_decay_ms,
+			    "dirty_decay_ms", -1, NSTIME_SEC_MAX * KQU(1000) <
+			    QU(SSIZE_MAX) ? NSTIME_SEC_MAX * KQU(1000) :
+			    SSIZE_MAX);
+			CONF_HANDLE_SSIZE_T(opt_muzzy_decay_ms,
+			    "muzzy_decay_ms", -1, NSTIME_SEC_MAX * KQU(1000) <
+			    QU(SSIZE_MAX) ? NSTIME_SEC_MAX * KQU(1000) :
+			    SSIZE_MAX);
 			CONF_HANDLE_BOOL(opt_stats_print, "stats_print")
 			if (config_fill) {
 				if (CONF_MATCH("junk")) {
diff --git a/src/stats.c b/src/stats.c
index 883c7d14..3c9eb35a 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -411,7 +411,7 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
     bool json, unsigned i, bool bins, bool large, bool mutex) {
 	unsigned nthreads;
 	const char *dss;
-	ssize_t dirty_decay_time, muzzy_decay_time;
+	ssize_t dirty_decay_ms, muzzy_decay_ms;
 	size_t page, pactive, pdirty, pmuzzy, mapped, retained;
 	size_t base, internal, resident;
 	uint64_t dirty_npurge, dirty_nmadvise, dirty_purged;
@@ -452,9 +452,9 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    "dss allocation precedence: %s\n", dss);
 	}
 
-	CTL_M2_GET("stats.arenas.0.dirty_decay_time", i, &dirty_decay_time,
+	CTL_M2_GET("stats.arenas.0.dirty_decay_ms", i, &dirty_decay_ms,
 	    ssize_t);
-	CTL_M2_GET("stats.arenas.0.muzzy_decay_time", i, &muzzy_decay_time,
+	CTL_M2_GET("stats.arenas.0.muzzy_decay_ms", i, &muzzy_decay_ms,
 	    ssize_t);
 	CTL_M2_GET("stats.arenas.0.pactive", i, &pactive, size_t);
 	CTL_M2_GET("stats.arenas.0.pdirty", i, &pdirty, size_t);
@@ -469,9 +469,9 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	CTL_M2_GET("stats.arenas.0.muzzy_purged", i, &muzzy_purged, uint64_t);
 	if (json) {
 		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\"dirty_decay_time\": %zd,\n", dirty_decay_time);
+		    "\t\t\t\t\"dirty_decay_ms\": %zd,\n", dirty_decay_ms);
 		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\"muzzy_decay_time\": %zd,\n", muzzy_decay_time);
+		    "\t\t\t\t\"muzzy_decay_ms\": %zd,\n", muzzy_decay_ms);
 		malloc_cprintf(write_cb, cbopaque,
 		    "\t\t\t\t\"pactive\": %zu,\n", pactive);
 		malloc_cprintf(write_cb, cbopaque,
@@ -494,10 +494,10 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		malloc_cprintf(write_cb, cbopaque,
 		    "decaying:  time       npages       sweeps     madvises"
 		    "       purged\n");
-		if (dirty_decay_time >= 0) {
+		if (dirty_decay_ms >= 0) {
 			malloc_cprintf(write_cb, cbopaque,
 			    "   dirty: %5zd %12zu %12"FMTu64" %12"FMTu64" %12"
-			    FMTu64"\n", dirty_decay_time, pdirty, dirty_npurge,
+			    FMTu64"\n", dirty_decay_ms, pdirty, dirty_npurge,
 			    dirty_nmadvise, dirty_purged);
 		} else {
 			malloc_cprintf(write_cb, cbopaque,
@@ -505,10 +505,10 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			    FMTu64"\n", pdirty, dirty_npurge, dirty_nmadvise,
 			    dirty_purged);
 		}
-		if (muzzy_decay_time >= 0) {
+		if (muzzy_decay_ms >= 0) {
 			malloc_cprintf(write_cb, cbopaque,
 			    "   muzzy: %5zd %12zu %12"FMTu64" %12"FMTu64" %12"
-			    FMTu64"\n", muzzy_decay_time, pmuzzy, muzzy_npurge,
+			    FMTu64"\n", muzzy_decay_ms, pmuzzy, muzzy_npurge,
 			    muzzy_nmadvise, muzzy_purged);
 		} else {
 			malloc_cprintf(write_cb, cbopaque,
@@ -816,10 +816,8 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	OPT_WRITE_CHAR_P(dss, ",")
 	OPT_WRITE_UNSIGNED(narenas, ",")
 	OPT_WRITE_CHAR_P(percpu_arena, ",")
-	OPT_WRITE_SSIZE_T_MUTABLE(dirty_decay_time, arenas.dirty_decay_time,
-	    ",")
-	OPT_WRITE_SSIZE_T_MUTABLE(muzzy_decay_time, arenas.muzzy_decay_time,
-	    ",")
+	OPT_WRITE_SSIZE_T_MUTABLE(dirty_decay_ms, arenas.dirty_decay_ms, ",")
+	OPT_WRITE_SSIZE_T_MUTABLE(muzzy_decay_ms, arenas.muzzy_decay_ms, ",")
 	OPT_WRITE_CHAR_P(junk, ",")
 	OPT_WRITE_BOOL(zero, ",")
 	OPT_WRITE_BOOL(utrace, ",")
@@ -867,24 +865,14 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		malloc_cprintf(write_cb, cbopaque, "Arenas: %u\n", uv);
 	}
 
-	CTL_GET("arenas.dirty_decay_time", &ssv, ssize_t);
 	if (json) {
+		CTL_GET("arenas.dirty_decay_ms", &ssv, ssize_t);
 		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\"dirty_decay_time\": %zd,\n", ssv);
-	} else {
-		malloc_cprintf(write_cb, cbopaque,
-		    "Unused dirty page decay time: %zd%s\n", ssv, (ssv < 0) ?
-		    " (no decay)" : "");
-	}
+		    "\t\t\t\"dirty_decay_ms\": %zd,\n", ssv);
 
-	CTL_GET("arenas.muzzy_decay_time", &ssv, ssize_t);
-	if (json) {
+		CTL_GET("arenas.muzzy_decay_ms", &ssv, ssize_t);
 		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\"muzzy_decay_time\": %zd,\n", ssv);
-	} else {
-		malloc_cprintf(write_cb, cbopaque,
-		    "Unused muzzy page decay time: %zd%s\n", ssv, (ssv < 0) ?
-		    " (no decay)" : "");
+		    "\t\t\t\"muzzy_decay_ms\": %zd,\n", ssv);
 	}
 
 	CTL_GET("arenas.quantum", &sv, size_t);
diff --git a/test/unit/decay.c b/test/unit/decay.c
index 389f6e06..19f76fa5 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -24,7 +24,7 @@ nstime_update_mock(nstime_t *time) {
 }
 
 static unsigned
-do_arena_create(ssize_t dirty_decay_time, ssize_t muzzy_decay_time) {
+do_arena_create(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 	unsigned arena_ind;
 	size_t sz = sizeof(unsigned);
 	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
@@ -32,19 +32,19 @@ do_arena_create(ssize_t dirty_decay_time, ssize_t muzzy_decay_time) {
 	size_t mib[3];
 	size_t miblen = sizeof(mib)/sizeof(size_t);
 
-	assert_d_eq(mallctlnametomib("arena.0.dirty_decay_time", mib, &miblen),
+	assert_d_eq(mallctlnametomib("arena.0.dirty_decay_ms", mib, &miblen),
 	    0, "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
 	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL,
-	    (void *)&dirty_decay_time,
-	    sizeof(dirty_decay_time)), 0, "Unexpected mallctlbymib() failure");
+	    (void *)&dirty_decay_ms, sizeof(dirty_decay_ms)), 0,
+	    "Unexpected mallctlbymib() failure");
 
-	assert_d_eq(mallctlnametomib("arena.0.muzzy_decay_time", mib, &miblen),
+	assert_d_eq(mallctlnametomib("arena.0.muzzy_decay_ms", mib, &miblen),
 	    0, "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
 	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL,
-	    (void *)&muzzy_decay_time,
-	    sizeof(muzzy_decay_time)), 0, "Unexpected mallctlbymib() failure");
+	    (void *)&muzzy_decay_ms, sizeof(muzzy_decay_ms)), 0,
+	    "Unexpected mallctlbymib() failure");
 
 	return arena_ind;
 }
@@ -362,14 +362,14 @@ static void
 decay_ticker_helper(unsigned arena_ind, int flags, bool dirty, ssize_t dt,
     uint64_t dirty_npurge0, uint64_t muzzy_npurge0, bool terminate_asap) {
 #define NINTERVALS 101
-	nstime_t time, update_interval, decay_time, deadline;
+	nstime_t time, update_interval, decay_ms, deadline;
 
 	nstime_init(&time, 0);
 	nstime_update(&time);
 
-	nstime_init2(&decay_time, dt, 0);
+	nstime_init2(&decay_ms, dt, 0);
 	nstime_copy(&deadline, &time);
-	nstime_add(&deadline, &decay_time);
+	nstime_add(&deadline, &decay_ms);
 
 	nstime_init2(&update_interval, dt, 0);
 	nstime_idivide(&update_interval, NINTERVALS);
@@ -406,8 +406,8 @@ decay_ticker_helper(unsigned arena_ind, int flags, bool dirty, ssize_t dt,
 
 TEST_BEGIN(test_decay_ticker) {
 #define NPS 2048
-	ssize_t ddt = opt_dirty_decay_time;
-	ssize_t mdt = opt_muzzy_decay_time;
+	ssize_t ddt = opt_dirty_decay_ms;
+	ssize_t mdt = opt_muzzy_decay_ms;
 	unsigned arena_ind = do_arena_create(ddt, mdt);
 	int flags = (MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
 	void *ps[NPS];
diff --git a/test/unit/decay.sh b/test/unit/decay.sh
index a41489b0..45aeccf4 100644
--- a/test/unit/decay.sh
+++ b/test/unit/decay.sh
@@ -1,3 +1,3 @@
 #!/bin/sh
 
-export MALLOC_CONF="dirty_decay_time:1,muzzy_decay_time:1,lg_tcache_max:0"
+export MALLOC_CONF="dirty_decay_ms:1000,muzzy_decay_ms:1000,lg_tcache_max:0"
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index b07a6d04..f721c21d 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -161,8 +161,8 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(const char *, dss, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);
 	TEST_MALLCTL_OPT(const char *, percpu_arena, always);
-	TEST_MALLCTL_OPT(ssize_t, dirty_decay_time, always);
-	TEST_MALLCTL_OPT(ssize_t, muzzy_decay_time, always);
+	TEST_MALLCTL_OPT(ssize_t, dirty_decay_ms, always);
+	TEST_MALLCTL_OPT(ssize_t, muzzy_decay_ms, always);
 	TEST_MALLCTL_OPT(bool, stats_print, always);
 	TEST_MALLCTL_OPT(const char *, junk, fill);
 	TEST_MALLCTL_OPT(bool, zero, fill);
@@ -398,68 +398,66 @@ TEST_BEGIN(test_arena_i_initialized) {
 }
 TEST_END
 
-TEST_BEGIN(test_arena_i_dirty_decay_time) {
-	ssize_t dirty_decay_time, orig_dirty_decay_time, prev_dirty_decay_time;
+TEST_BEGIN(test_arena_i_dirty_decay_ms) {
+	ssize_t dirty_decay_ms, orig_dirty_decay_ms, prev_dirty_decay_ms;
 	size_t sz = sizeof(ssize_t);
 
-	assert_d_eq(mallctl("arena.0.dirty_decay_time",
-	    (void *)&orig_dirty_decay_time, &sz, NULL, 0), 0,
+	assert_d_eq(mallctl("arena.0.dirty_decay_ms",
+	    (void *)&orig_dirty_decay_ms, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
-	dirty_decay_time = -2;
-	assert_d_eq(mallctl("arena.0.dirty_decay_time", NULL, NULL,
-	    (void *)&dirty_decay_time, sizeof(ssize_t)), EFAULT,
+	dirty_decay_ms = -2;
+	assert_d_eq(mallctl("arena.0.dirty_decay_ms", NULL, NULL,
+	    (void *)&dirty_decay_ms, sizeof(ssize_t)), EFAULT,
 	    "Unexpected mallctl() success");
 
-	dirty_decay_time = 0x7fffffff;
-	assert_d_eq(mallctl("arena.0.dirty_decay_time", NULL, NULL,
-	    (void *)&dirty_decay_time, sizeof(ssize_t)), 0,
+	dirty_decay_ms = 0x7fffffff;
+	assert_d_eq(mallctl("arena.0.dirty_decay_ms", NULL, NULL,
+	    (void *)&dirty_decay_ms, sizeof(ssize_t)), 0,
 	    "Unexpected mallctl() failure");
 
-	for (prev_dirty_decay_time = dirty_decay_time, dirty_decay_time = -1;
-	    dirty_decay_time < 20; prev_dirty_decay_time = dirty_decay_time,
-	    dirty_decay_time++) {
-		ssize_t old_dirty_decay_time;
+	for (prev_dirty_decay_ms = dirty_decay_ms, dirty_decay_ms = -1;
+	    dirty_decay_ms < 20; prev_dirty_decay_ms = dirty_decay_ms,
+	    dirty_decay_ms++) {
+		ssize_t old_dirty_decay_ms;
 
-		assert_d_eq(mallctl("arena.0.dirty_decay_time",
-		    (void *)&old_dirty_decay_time, &sz,
-		    (void *)&dirty_decay_time, sizeof(ssize_t)), 0,
-		    "Unexpected mallctl() failure");
-		assert_zd_eq(old_dirty_decay_time, prev_dirty_decay_time,
-		    "Unexpected old arena.0.dirty_decay_time");
+		assert_d_eq(mallctl("arena.0.dirty_decay_ms",
+		    (void *)&old_dirty_decay_ms, &sz, (void *)&dirty_decay_ms,
+		    sizeof(ssize_t)), 0, "Unexpected mallctl() failure");
+		assert_zd_eq(old_dirty_decay_ms, prev_dirty_decay_ms,
+		    "Unexpected old arena.0.dirty_decay_ms");
 	}
 }
 TEST_END
 
-TEST_BEGIN(test_arena_i_muzzy_decay_time) {
-	ssize_t muzzy_decay_time, orig_muzzy_decay_time, prev_muzzy_decay_time;
+TEST_BEGIN(test_arena_i_muzzy_decay_ms) {
+	ssize_t muzzy_decay_ms, orig_muzzy_decay_ms, prev_muzzy_decay_ms;
 	size_t sz = sizeof(ssize_t);
 
-	assert_d_eq(mallctl("arena.0.muzzy_decay_time",
-	    (void *)&orig_muzzy_decay_time, &sz, NULL, 0), 0,
+	assert_d_eq(mallctl("arena.0.muzzy_decay_ms",
+	    (void *)&orig_muzzy_decay_ms, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
-	muzzy_decay_time = -2;
-	assert_d_eq(mallctl("arena.0.muzzy_decay_time", NULL, NULL,
-	    (void *)&muzzy_decay_time, sizeof(ssize_t)), EFAULT,
+	muzzy_decay_ms = -2;
+	assert_d_eq(mallctl("arena.0.muzzy_decay_ms", NULL, NULL,
+	    (void *)&muzzy_decay_ms, sizeof(ssize_t)), EFAULT,
 	    "Unexpected mallctl() success");
 
-	muzzy_decay_time = 0x7fffffff;
-	assert_d_eq(mallctl("arena.0.muzzy_decay_time", NULL, NULL,
-	    (void *)&muzzy_decay_time, sizeof(ssize_t)), 0,
+	muzzy_decay_ms = 0x7fffffff;
+	assert_d_eq(mallctl("arena.0.muzzy_decay_ms", NULL, NULL,
+	    (void *)&muzzy_decay_ms, sizeof(ssize_t)), 0,
 	    "Unexpected mallctl() failure");
 
-	for (prev_muzzy_decay_time = muzzy_decay_time, muzzy_decay_time = -1;
-	    muzzy_decay_time < 20; prev_muzzy_decay_time = muzzy_decay_time,
-	    muzzy_decay_time++) {
-		ssize_t old_muzzy_decay_time;
+	for (prev_muzzy_decay_ms = muzzy_decay_ms, muzzy_decay_ms = -1;
+	    muzzy_decay_ms < 20; prev_muzzy_decay_ms = muzzy_decay_ms,
+	    muzzy_decay_ms++) {
+		ssize_t old_muzzy_decay_ms;
 
-		assert_d_eq(mallctl("arena.0.muzzy_decay_time",
-		    (void *)&old_muzzy_decay_time, &sz,
-		    (void *)&muzzy_decay_time, sizeof(ssize_t)), 0,
-		    "Unexpected mallctl() failure");
-		assert_zd_eq(old_muzzy_decay_time, prev_muzzy_decay_time,
-		    "Unexpected old arena.0.muzzy_decay_time");
+		assert_d_eq(mallctl("arena.0.muzzy_decay_ms",
+		    (void *)&old_muzzy_decay_ms, &sz, (void *)&muzzy_decay_ms,
+		    sizeof(ssize_t)), 0, "Unexpected mallctl() failure");
+		assert_zd_eq(old_muzzy_decay_ms, prev_muzzy_decay_ms,
+		    "Unexpected old arena.0.muzzy_decay_ms");
 	}
 }
 TEST_END
@@ -555,68 +553,66 @@ TEST_BEGIN(test_arena_i_dss) {
 }
 TEST_END
 
-TEST_BEGIN(test_arenas_dirty_decay_time) {
-	ssize_t dirty_decay_time, orig_dirty_decay_time, prev_dirty_decay_time;
+TEST_BEGIN(test_arenas_dirty_decay_ms) {
+	ssize_t dirty_decay_ms, orig_dirty_decay_ms, prev_dirty_decay_ms;
 	size_t sz = sizeof(ssize_t);
 
-	assert_d_eq(mallctl("arenas.dirty_decay_time",
-	    (void *)&orig_dirty_decay_time, &sz, NULL, 0), 0,
+	assert_d_eq(mallctl("arenas.dirty_decay_ms",
+	    (void *)&orig_dirty_decay_ms, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
-	dirty_decay_time = -2;
-	assert_d_eq(mallctl("arenas.dirty_decay_time", NULL, NULL,
-	    (void *)&dirty_decay_time, sizeof(ssize_t)), EFAULT,
+	dirty_decay_ms = -2;
+	assert_d_eq(mallctl("arenas.dirty_decay_ms", NULL, NULL,
+	    (void *)&dirty_decay_ms, sizeof(ssize_t)), EFAULT,
 	    "Unexpected mallctl() success");
 
-	dirty_decay_time = 0x7fffffff;
-	assert_d_eq(mallctl("arenas.dirty_decay_time", NULL, NULL,
-	    (void *)&dirty_decay_time, sizeof(ssize_t)), 0,
+	dirty_decay_ms = 0x7fffffff;
+	assert_d_eq(mallctl("arenas.dirty_decay_ms", NULL, NULL,
+	    (void *)&dirty_decay_ms, sizeof(ssize_t)), 0,
 	    "Expected mallctl() failure");
 
-	for (prev_dirty_decay_time = dirty_decay_time, dirty_decay_time = -1;
-	    dirty_decay_time < 20; prev_dirty_decay_time = dirty_decay_time,
-	    dirty_decay_time++) {
-		ssize_t old_dirty_decay_time;
+	for (prev_dirty_decay_ms = dirty_decay_ms, dirty_decay_ms = -1;
+	    dirty_decay_ms < 20; prev_dirty_decay_ms = dirty_decay_ms,
+	    dirty_decay_ms++) {
+		ssize_t old_dirty_decay_ms;
 
-		assert_d_eq(mallctl("arenas.dirty_decay_time",
-		    (void *)&old_dirty_decay_time, &sz,
-		    (void *)&dirty_decay_time, sizeof(ssize_t)), 0,
-		    "Unexpected mallctl() failure");
-		assert_zd_eq(old_dirty_decay_time, prev_dirty_decay_time,
-		    "Unexpected old arenas.dirty_decay_time");
+		assert_d_eq(mallctl("arenas.dirty_decay_ms",
+		    (void *)&old_dirty_decay_ms, &sz, (void *)&dirty_decay_ms,
+		    sizeof(ssize_t)), 0, "Unexpected mallctl() failure");
+		assert_zd_eq(old_dirty_decay_ms, prev_dirty_decay_ms,
+		    "Unexpected old arenas.dirty_decay_ms");
 	}
 }
 TEST_END
 
-TEST_BEGIN(test_arenas_muzzy_decay_time) {
-	ssize_t muzzy_decay_time, orig_muzzy_decay_time, prev_muzzy_decay_time;
+TEST_BEGIN(test_arenas_muzzy_decay_ms) {
+	ssize_t muzzy_decay_ms, orig_muzzy_decay_ms, prev_muzzy_decay_ms;
 	size_t sz = sizeof(ssize_t);
 
-	assert_d_eq(mallctl("arenas.muzzy_decay_time",
-	    (void *)&orig_muzzy_decay_time, &sz, NULL, 0), 0,
+	assert_d_eq(mallctl("arenas.muzzy_decay_ms",
+	    (void *)&orig_muzzy_decay_ms, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
-	muzzy_decay_time = -2;
-	assert_d_eq(mallctl("arenas.muzzy_decay_time", NULL, NULL,
-	    (void *)&muzzy_decay_time, sizeof(ssize_t)), EFAULT,
+	muzzy_decay_ms = -2;
+	assert_d_eq(mallctl("arenas.muzzy_decay_ms", NULL, NULL,
+	    (void *)&muzzy_decay_ms, sizeof(ssize_t)), EFAULT,
 	    "Unexpected mallctl() success");
 
-	muzzy_decay_time = 0x7fffffff;
-	assert_d_eq(mallctl("arenas.muzzy_decay_time", NULL, NULL,
-	    (void *)&muzzy_decay_time, sizeof(ssize_t)), 0,
+	muzzy_decay_ms = 0x7fffffff;
+	assert_d_eq(mallctl("arenas.muzzy_decay_ms", NULL, NULL,
+	    (void *)&muzzy_decay_ms, sizeof(ssize_t)), 0,
 	    "Expected mallctl() failure");
 
-	for (prev_muzzy_decay_time = muzzy_decay_time, muzzy_decay_time = -1;
-	    muzzy_decay_time < 20; prev_muzzy_decay_time = muzzy_decay_time,
-	    muzzy_decay_time++) {
-		ssize_t old_muzzy_decay_time;
+	for (prev_muzzy_decay_ms = muzzy_decay_ms, muzzy_decay_ms = -1;
+	    muzzy_decay_ms < 20; prev_muzzy_decay_ms = muzzy_decay_ms,
+	    muzzy_decay_ms++) {
+		ssize_t old_muzzy_decay_ms;
 
-		assert_d_eq(mallctl("arenas.muzzy_decay_time",
-		    (void *)&old_muzzy_decay_time, &sz,
-		    (void *)&muzzy_decay_time, sizeof(ssize_t)), 0,
-		    "Unexpected mallctl() failure");
-		assert_zd_eq(old_muzzy_decay_time, prev_muzzy_decay_time,
-		    "Unexpected old arenas.muzzy_decay_time");
+		assert_d_eq(mallctl("arenas.muzzy_decay_ms",
+		    (void *)&old_muzzy_decay_ms, &sz, (void *)&muzzy_decay_ms,
+		    sizeof(ssize_t)), 0, "Unexpected mallctl() failure");
+		assert_zd_eq(old_muzzy_decay_ms, prev_muzzy_decay_ms,
+		    "Unexpected old arenas.muzzy_decay_ms");
 	}
 }
 TEST_END
@@ -699,8 +695,8 @@ TEST_BEGIN(test_stats_arenas) {
 
 	TEST_STATS_ARENAS(unsigned, nthreads);
 	TEST_STATS_ARENAS(const char *, dss);
-	TEST_STATS_ARENAS(ssize_t, dirty_decay_time);
-	TEST_STATS_ARENAS(ssize_t, muzzy_decay_time);
+	TEST_STATS_ARENAS(ssize_t, dirty_decay_ms);
+	TEST_STATS_ARENAS(ssize_t, muzzy_decay_ms);
 	TEST_STATS_ARENAS(size_t, pactive);
 	TEST_STATS_ARENAS(size_t, pdirty);
 
@@ -723,13 +719,13 @@ main(void) {
 	    test_tcache,
 	    test_thread_arena,
 	    test_arena_i_initialized,
-	    test_arena_i_dirty_decay_time,
-	    test_arena_i_muzzy_decay_time,
+	    test_arena_i_dirty_decay_ms,
+	    test_arena_i_muzzy_decay_ms,
 	    test_arena_i_purge,
 	    test_arena_i_decay,
 	    test_arena_i_dss,
-	    test_arenas_dirty_decay_time,
-	    test_arenas_muzzy_decay_time,
+	    test_arenas_dirty_decay_ms,
+	    test_arenas_muzzy_decay_ms,
 	    test_arenas_constants,
 	    test_arenas_bin_constants,
 	    test_arenas_lextent_constants,
diff --git a/test/unit/pack.sh b/test/unit/pack.sh
index 76757ac3..6f451480 100644
--- a/test/unit/pack.sh
+++ b/test/unit/pack.sh
@@ -1,4 +1,4 @@
 #!/bin/sh
 
 # Immediately purge to minimize fragmentation.
-export MALLOC_CONF="dirty_decay_time:0,muzzy_decay_time:0"
+export MALLOC_CONF="dirty_decay_ms:0,muzzy_decay_ms:0"

From 26c792e61a163b38b373023bca2947283dcd1fc8 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 15 May 2017 15:38:15 -0700
Subject: [PATCH 0869/2608] Allow mutexes to take a lock ordering enum at
 construction.

This lets us specify whether and how mutexes of the same rank are allowed to be
acquired.  Currently, we only allow two polices (only a single mutex at a given
rank at a time, and mutexes acquired in ascending order), but we can plausibly
allow more (e.g. the "release uncontended mutexes before blocking").
---
 include/jemalloc/internal/mutex_externs.h |  2 +-
 include/jemalloc/internal/mutex_structs.h |  6 +++--
 include/jemalloc/internal/mutex_types.h   | 10 ++++++++
 src/arena.c                               | 13 ++++++-----
 src/base.c                                |  3 ++-
 src/ctl.c                                 |  3 ++-
 src/extent.c                              |  3 ++-
 src/jemalloc.c                            |  6 +++--
 src/mutex.c                               | 28 ++++++++++++++++++++---
 src/prof.c                                | 25 +++++++++++---------
 src/rtree.c                               |  3 ++-
 src/tcache.c                              |  3 ++-
 12 files changed, 75 insertions(+), 30 deletions(-)

diff --git a/include/jemalloc/internal/mutex_externs.h b/include/jemalloc/internal/mutex_externs.h
index 8e40cb34..c9a817fb 100644
--- a/include/jemalloc/internal/mutex_externs.h
+++ b/include/jemalloc/internal/mutex_externs.h
@@ -11,7 +11,7 @@ extern bool isthreaded;
 #endif
 
 bool	malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
-    witness_rank_t rank);
+    witness_rank_t rank, malloc_mutex_lock_order_t lock_order);
 void	malloc_mutex_prefork(tsdn_t *tsdn, malloc_mutex_t *mutex);
 void	malloc_mutex_postfork_parent(tsdn_t *tsdn, malloc_mutex_t *mutex);
 void	malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex);
diff --git a/include/jemalloc/internal/mutex_structs.h b/include/jemalloc/internal/mutex_structs.h
index 2691852d..a8b16a16 100644
--- a/include/jemalloc/internal/mutex_structs.h
+++ b/include/jemalloc/internal/mutex_structs.h
@@ -40,12 +40,14 @@ struct malloc_mutex_s {
 		 * memory cost.
 		 */
 #if !defined(JEMALLOC_DEBUG)
-		witness_t		witness;
+		witness_t			witness;
+		malloc_mutex_lock_order_t	lock_order;
 #endif
 	};
 
 #if defined(JEMALLOC_DEBUG)
-	witness_t		witness;
+	witness_t			witness;
+	malloc_mutex_lock_order_t	lock_order;
 #endif
 };
 
diff --git a/include/jemalloc/internal/mutex_types.h b/include/jemalloc/internal/mutex_types.h
index 5af8d099..65a9938d 100644
--- a/include/jemalloc/internal/mutex_types.h
+++ b/include/jemalloc/internal/mutex_types.h
@@ -3,6 +3,16 @@
 
 typedef struct malloc_mutex_s malloc_mutex_t;
 
+typedef enum {
+	/* Can only acquire one mutex of a given witness rank at a time. */
+	malloc_mutex_rank_exclusive,
+	/*
+	 * Can acquire multiple mutexes of the same witness rank, but in
+	 * address-ascending order only.
+	 */
+	malloc_mutex_address_ordered
+} malloc_mutex_lock_order_t;
+
 /*
  * Based on benchmark results, a fixed spin with this amount of retries works
  * well for our critical sections.
diff --git a/src/arena.c b/src/arena.c
index 42bfc6b1..67e1b2f5 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -64,7 +64,7 @@ arena_stats_init(tsdn_t *tsdn, arena_stats_t *arena_stats) {
 	}
 #ifndef JEMALLOC_ATOMIC_U64
 	if (malloc_mutex_init(&arena_stats->mtx, "arena_stats",
-	    WITNESS_RANK_ARENA_STATS)) {
+	    WITNESS_RANK_ARENA_STATS, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 #endif
@@ -734,7 +734,8 @@ arena_decay_init(arena_decay_t *decay, extents_t *extents, ssize_t decay_ms,
 			assert(((char *)decay)[i] == 0);
 		}
 	}
-	if (malloc_mutex_init(&decay->mtx, "decay", WITNESS_RANK_DECAY)) {
+	if (malloc_mutex_init(&decay->mtx, "decay", WITNESS_RANK_DECAY,
+	    malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	decay->purging = false;
@@ -1869,7 +1870,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 
 		ql_new(&arena->tcache_ql);
 		if (malloc_mutex_init(&arena->tcache_ql_mtx, "tcache_ql",
-		    WITNESS_RANK_TCACHE_QL)) {
+		    WITNESS_RANK_TCACHE_QL, malloc_mutex_rank_exclusive)) {
 			goto label_error;
 		}
 	}
@@ -1901,7 +1902,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 
 	extent_list_init(&arena->large);
 	if (malloc_mutex_init(&arena->large_mtx, "arena_large",
-	    WITNESS_RANK_ARENA_LARGE)) {
+	    WITNESS_RANK_ARENA_LARGE, malloc_mutex_rank_exclusive)) {
 		goto label_error;
 	}
 
@@ -1950,7 +1951,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 
 	extent_avail_new(&arena->extent_avail);
 	if (malloc_mutex_init(&arena->extent_avail_mtx, "extent_avail",
-	    WITNESS_RANK_EXTENT_FREELIST)) {
+	    WITNESS_RANK_EXTENT_FREELIST, malloc_mutex_rank_exclusive)) {
 		goto label_error;
 	}
 
@@ -1958,7 +1959,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	for (i = 0; i < NBINS; i++) {
 		arena_bin_t *bin = &arena->bins[i];
 		if (malloc_mutex_init(&bin->lock, "arena_bin",
-		    WITNESS_RANK_ARENA_BIN)) {
+		    WITNESS_RANK_ARENA_BIN, malloc_mutex_rank_exclusive)) {
 			goto label_error;
 		}
 		bin->slabcur = NULL;
diff --git a/src/base.c b/src/base.c
index 3de6e3b0..7502a657 100644
--- a/src/base.c
+++ b/src/base.c
@@ -238,7 +238,8 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	    &gap_size, base_size, base_alignment);
 	base->ind = ind;
 	atomic_store_p(&base->extent_hooks, extent_hooks, ATOMIC_RELAXED);
-	if (malloc_mutex_init(&base->mtx, "base", WITNESS_RANK_BASE)) {
+	if (malloc_mutex_init(&base->mtx, "base", WITNESS_RANK_BASE,
+	    malloc_mutex_rank_exclusive)) {
 		base_unmap(extent_hooks, ind, block, block->size);
 		return NULL;
 	}
diff --git a/src/ctl.c b/src/ctl.c
index 296e74f2..7f69f151 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1199,7 +1199,8 @@ label_return:
 
 bool
 ctl_boot(void) {
-	if (malloc_mutex_init(&ctl_mtx, "ctl", WITNESS_RANK_CTL)) {
+	if (malloc_mutex_init(&ctl_mtx, "ctl", WITNESS_RANK_CTL,
+	    malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 
diff --git a/src/extent.c b/src/extent.c
index 1b284535..513d16d5 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -195,7 +195,8 @@ ph_gen(, extent_heap_, extent_heap_t, extent_t, ph_link, extent_snad_comp)
 bool
 extents_init(tsdn_t *tsdn, extents_t *extents, extent_state_t state,
     bool delay_coalesce) {
-	if (malloc_mutex_init(&extents->mtx, "extents", WITNESS_RANK_EXTENTS)) {
+	if (malloc_mutex_init(&extents->mtx, "extents", WITNESS_RANK_EXTENTS,
+	    malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	for (unsigned i = 0; i < NPSIZES+1; i++) {
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 47133edf..56aef5b0 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -235,7 +235,8 @@ _init_init_lock(void) {
 	 * doing anything.
 	 */
 	if (!init_lock_initialized) {
-		malloc_mutex_init(&init_lock, "init", WITNESS_RANK_INIT);
+		malloc_mutex_init(&init_lock, "init", WITNESS_RANK_INIT,
+		    malloc_mutex_rank_exclusive);
 	}
 	init_lock_initialized = true;
 }
@@ -1237,7 +1238,8 @@ malloc_init_hard_a0_locked() {
 	if (tcache_boot(TSDN_NULL)) {
 		return true;
 	}
-	if (malloc_mutex_init(&arenas_lock, "arenas", WITNESS_RANK_ARENAS)) {
+	if (malloc_mutex_init(&arenas_lock, "arenas", WITNESS_RANK_ARENAS,
+	    malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	/*
diff --git a/src/mutex.c b/src/mutex.c
index 3eec970f..b15bbf6e 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -138,9 +138,25 @@ malloc_mutex_prof_data_reset(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 	mutex_prof_data_init(&mutex->prof_data);
 }
 
+static int
+mutex_addr_comp(const witness_t *witness1, void *mutex1,
+    const witness_t *witness2, void *mutex2) {
+	assert(mutex1 != NULL);
+	assert(mutex2 != NULL);
+	uintptr_t mu1int = (uintptr_t)mutex1;
+	uintptr_t mu2int = (uintptr_t)mutex2;
+	if (mu1int < mu2int) {
+		return -1;
+	} else if (mu1int == mu2int) {
+		return 0;
+	} else {
+		return 1;
+	}
+}
+
 bool
 malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
-    witness_rank_t rank) {
+    witness_rank_t rank, malloc_mutex_lock_order_t lock_order) {
 	mutex_prof_data_init(&mutex->prof_data);
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
@@ -179,7 +195,13 @@ malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
 	pthread_mutexattr_destroy(&attr);
 #endif
 	if (config_debug) {
-		witness_init(&mutex->witness, name, rank, NULL, NULL);
+		mutex->lock_order = lock_order;
+		if (lock_order == malloc_mutex_address_ordered) {
+			witness_init(&mutex->witness, name, rank,
+			    mutex_addr_comp, &mutex);
+		} else {
+			witness_init(&mutex->witness, name, rank, NULL, NULL);
+		}
 	}
 	return false;
 }
@@ -200,7 +222,7 @@ malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 	malloc_mutex_unlock(tsdn, mutex);
 #else
 	if (malloc_mutex_init(mutex, mutex->witness.name,
-	    mutex->witness.rank)) {
+	    mutex->witness.rank, mutex->lock_order)) {
 		malloc_printf("<jemalloc>: Error re-initializing mutex in "
 		    "child\n");
 		if (opt_abort) {
diff --git a/src/prof.c b/src/prof.c
index 470d926f..18978810 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -1754,7 +1754,7 @@ prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum) {
 
 #ifndef JEMALLOC_ATOMIC_U64
 	if (malloc_mutex_init(&prof_accum->mtx, "prof_accum",
-	    WITNESS_RANK_PROF_ACCUM)) {
+	    WITNESS_RANK_PROF_ACCUM, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	prof_accum->accumbytes = 0;
@@ -2289,20 +2289,21 @@ prof_boot2(tsd_t *tsd) {
 
 		prof_active = opt_prof_active;
 		if (malloc_mutex_init(&prof_active_mtx, "prof_active",
-		    WITNESS_RANK_PROF_ACTIVE)) {
+		    WITNESS_RANK_PROF_ACTIVE, malloc_mutex_rank_exclusive)) {
 			return true;
 		}
 
 		prof_gdump_val = opt_prof_gdump;
 		if (malloc_mutex_init(&prof_gdump_mtx, "prof_gdump",
-		    WITNESS_RANK_PROF_GDUMP)) {
+		    WITNESS_RANK_PROF_GDUMP, malloc_mutex_rank_exclusive)) {
 			return true;
 		}
 
 		prof_thread_active_init = opt_prof_thread_active_init;
 		if (malloc_mutex_init(&prof_thread_active_init_mtx,
 		    "prof_thread_active_init",
-		    WITNESS_RANK_PROF_THREAD_ACTIVE_INIT)) {
+		    WITNESS_RANK_PROF_THREAD_ACTIVE_INIT,
+		    malloc_mutex_rank_exclusive)) {
 			return true;
 		}
 
@@ -2311,28 +2312,28 @@ prof_boot2(tsd_t *tsd) {
 			return true;
 		}
 		if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx",
-		    WITNESS_RANK_PROF_BT2GCTX)) {
+		    WITNESS_RANK_PROF_BT2GCTX, malloc_mutex_rank_exclusive)) {
 			return true;
 		}
 
 		tdata_tree_new(&tdatas);
 		if (malloc_mutex_init(&tdatas_mtx, "prof_tdatas",
-		    WITNESS_RANK_PROF_TDATAS)) {
+		    WITNESS_RANK_PROF_TDATAS, malloc_mutex_rank_exclusive)) {
 			return true;
 		}
 
 		next_thr_uid = 0;
 		if (malloc_mutex_init(&next_thr_uid_mtx, "prof_next_thr_uid",
-		    WITNESS_RANK_PROF_NEXT_THR_UID)) {
+		    WITNESS_RANK_PROF_NEXT_THR_UID, malloc_mutex_rank_exclusive)) {
 			return true;
 		}
 
 		if (malloc_mutex_init(&prof_dump_seq_mtx, "prof_dump_seq",
-		    WITNESS_RANK_PROF_DUMP_SEQ)) {
+		    WITNESS_RANK_PROF_DUMP_SEQ, malloc_mutex_rank_exclusive)) {
 			return true;
 		}
 		if (malloc_mutex_init(&prof_dump_mtx, "prof_dump",
-		    WITNESS_RANK_PROF_DUMP)) {
+		    WITNESS_RANK_PROF_DUMP, malloc_mutex_rank_exclusive)) {
 			return true;
 		}
 
@@ -2352,7 +2353,8 @@ prof_boot2(tsd_t *tsd) {
 		}
 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
 			if (malloc_mutex_init(&gctx_locks[i], "prof_gctx",
-			    WITNESS_RANK_PROF_GCTX)) {
+			    WITNESS_RANK_PROF_GCTX,
+			    malloc_mutex_rank_exclusive)) {
 				return true;
 			}
 		}
@@ -2365,7 +2367,8 @@ prof_boot2(tsd_t *tsd) {
 		}
 		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
 			if (malloc_mutex_init(&tdata_locks[i], "prof_tdata",
-			    WITNESS_RANK_PROF_TDATA)) {
+			    WITNESS_RANK_PROF_TDATA,
+			    malloc_mutex_rank_exclusive)) {
 				return true;
 			}
 		}
diff --git a/src/rtree.c b/src/rtree.c
index 62df0143..6d4a71a2 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -18,7 +18,8 @@ rtree_new(rtree_t *rtree, bool zeroed) {
 	assert(zeroed);
 #endif
 
-	if (malloc_mutex_init(&rtree->init_lock, "rtree", WITNESS_RANK_RTREE)) {
+	if (malloc_mutex_init(&rtree->init_lock, "rtree", WITNESS_RANK_RTREE,
+	    malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 
diff --git a/src/tcache.c b/src/tcache.c
index ee5e816f..d9f5e7cb 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -649,7 +649,8 @@ tcache_boot(tsdn_t *tsdn) {
 		tcache_maxclass = (ZU(1) << opt_lg_tcache_max);
 	}
 
-	if (malloc_mutex_init(&tcaches_mtx, "tcaches", WITNESS_RANK_TCACHES)) {
+	if (malloc_mutex_init(&tcaches_mtx, "tcaches", WITNESS_RANK_TCACHES,
+	    malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 

From 3f685e88245c9807d7bdcaffce47b0fe14b974be Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 15 May 2017 14:23:51 -0700
Subject: [PATCH 0870/2608] Protect the rtree/extent interactions with a mutex
 pool.

Instead of embedding a lock bit in rtree leaf elements, we associate extents
with a small set of mutexes.  This gets us two things:

- We can use the system mutexes.  This (hypothetically) protects us from
  priority inversion, and lets us stop doing a backoff/sleep loop, instead
  opting for precise wakeups from the mutex.
- Cuts down on the number of mutex acquisitions we have to do (from 4 in the
  worst case to two).

We end up simplifying most of the rtree code (which no longer has to deal with
locking or concurrency at all), at the cost of additional complexity in the
extent code: since the mutex protecting the rtree leaf elements is determined by
reading the extent out of those elements, the initial read is racy, so that we
may acquire an out of date mutex.  We re-check the extent in the leaf after
acquiring the mutex to protect us from this race.
---
 Makefile.in                                   |   1 +
 include/jemalloc/internal/extent_externs.h    |   1 +
 include/jemalloc/internal/extent_inlines.h    |  27 ++
 .../internal/jemalloc_internal_includes.h     |   2 +
 .../jemalloc/internal/mutex_pool_inlines.h    |  89 +++++++
 .../jemalloc/internal/mutex_pool_structs.h    |  14 +
 include/jemalloc/internal/mutex_structs.h     |   2 +
 include/jemalloc/internal/rtree_externs.h     |   6 -
 include/jemalloc/internal/rtree_inlines.h     | 214 +++------------
 include/jemalloc/internal/rtree_structs.h     |   6 +-
 include/jemalloc/internal/rtree_types.h       |  23 --
 include/jemalloc/internal/rtree_witness.h     |  19 --
 include/jemalloc/internal/tsd.h               |   3 -
 include/jemalloc/internal/witness_types.h     |   2 +-
 src/extent.c                                  | 246 +++++++++++-------
 src/mutex_pool.c                              |  15 ++
 src/rtree.c                                   | 115 --------
 test/unit/rtree.c                             |  93 +------
 18 files changed, 341 insertions(+), 537 deletions(-)
 create mode 100644 include/jemalloc/internal/mutex_pool_inlines.h
 create mode 100644 include/jemalloc/internal/mutex_pool_structs.h
 delete mode 100644 include/jemalloc/internal/rtree_witness.h
 create mode 100644 src/mutex_pool.c

diff --git a/Makefile.in b/Makefile.in
index 2f16fbf3..264b077c 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -103,6 +103,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/large.c \
 	$(srcroot)src/malloc_io.c \
 	$(srcroot)src/mutex.c \
+	$(srcroot)src/mutex_pool.c \
 	$(srcroot)src/nstime.c \
 	$(srcroot)src/pages.c \
 	$(srcroot)src/prng.c \
diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index c4fe8425..7a5b38c6 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -6,6 +6,7 @@
 
 extern rtree_t			extents_rtree;
 extern const extent_hooks_t	extent_hooks_default;
+extern mutex_pool_t		extent_mutex_pool;
 
 extent_t *extent_alloc(tsdn_t *tsdn, arena_t *arena);
 void extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index 0e6311d9..2ebd9452 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -1,10 +1,37 @@
 #ifndef JEMALLOC_INTERNAL_EXTENT_INLINES_H
 #define JEMALLOC_INTERNAL_EXTENT_INLINES_H
 
+#include "jemalloc/internal/mutex_pool_inlines.h"
 #include "jemalloc/internal/pages.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/ql.h"
 
+static inline void
+extent_lock(tsdn_t *tsdn, extent_t *extent) {
+	assert(extent != NULL);
+	mutex_pool_lock(tsdn, &extent_mutex_pool, (uintptr_t)extent);
+}
+
+static inline void
+extent_unlock(tsdn_t *tsdn, extent_t *extent) {
+	assert(extent != NULL);
+	mutex_pool_unlock(tsdn, &extent_mutex_pool, (uintptr_t)extent);
+}
+
+static inline void
+extent_lock2(tsdn_t *tsdn, extent_t *extent1, extent_t *extent2) {
+	assert(extent1 != NULL && extent2 != NULL);
+	mutex_pool_lock2(tsdn, &extent_mutex_pool, (uintptr_t)extent1,
+	    (uintptr_t)extent2);
+}
+
+static inline void
+extent_unlock2(tsdn_t *tsdn, extent_t *extent1, extent_t *extent2) {
+	assert(extent1 != NULL && extent2 != NULL);
+	mutex_pool_unlock2(tsdn, &extent_mutex_pool, (uintptr_t)extent1,
+	    (uintptr_t)extent2);
+}
+
 static inline arena_t *
 extent_arena_get(const extent_t *extent) {
 	unsigned arena_ind = (unsigned)((extent->e_bits &
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index 84917a70..cf321c12 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -56,6 +56,7 @@
 
 #include "jemalloc/internal/witness_structs.h"
 #include "jemalloc/internal/mutex_structs.h"
+#include "jemalloc/internal/mutex_pool_structs.h"
 #include "jemalloc/internal/arena_structs_a.h"
 #include "jemalloc/internal/extent_structs.h"
 #include "jemalloc/internal/extent_dss_structs.h"
@@ -88,6 +89,7 @@
 
 #include "jemalloc/internal/witness_inlines.h"
 #include "jemalloc/internal/mutex_inlines.h"
+#include "jemalloc/internal/mutex_pool_inlines.h"
 #include "jemalloc/internal/jemalloc_internal_inlines_a.h"
 #include "jemalloc/internal/rtree_inlines.h"
 #include "jemalloc/internal/base_inlines.h"
diff --git a/include/jemalloc/internal/mutex_pool_inlines.h b/include/jemalloc/internal/mutex_pool_inlines.h
new file mode 100644
index 00000000..0b667aaa
--- /dev/null
+++ b/include/jemalloc/internal/mutex_pool_inlines.h
@@ -0,0 +1,89 @@
+#ifndef JEMALLOC_INTERNAL_MUTEX_POOL_INLINES_H
+#define JEMALLOC_INTERNAL_MUTEX_POOL_INLINES_H
+
+#include "jemalloc/internal/hash.h"
+#include "jemalloc/internal/mutex_inlines.h"
+#include "jemalloc/internal/mutex_pool_structs.h"
+
+/*
+ * This file really combines "inlines" and "externs", but only transitionally.
+ */
+
+bool mutex_pool_init(mutex_pool_t *pool, const char *name, witness_rank_t rank);
+
+static inline malloc_mutex_t *
+mutex_pool_mutex(mutex_pool_t *pool, uintptr_t key) {
+	size_t hash_result[2];
+	hash(&key, sizeof(key), 0xd50dcc1b, hash_result);
+	return &pool->mutexes[hash_result[0] % MUTEX_POOL_SIZE];
+}
+
+static inline void
+mutex_pool_assert_not_held(tsdn_t *tsdn, mutex_pool_t *pool) {
+	for (int i = 0; i < MUTEX_POOL_SIZE; i++) {
+		malloc_mutex_assert_not_owner(tsdn, &pool->mutexes[i]);
+	}
+}
+
+/*
+ * Note that a mutex pool doesn't work exactly the way an embdedded mutex would.
+ * You're not allowed to acquire mutexes in the pool one at a time.  You have to
+ * acquire all the mutexes you'll need in a single function call, and then
+ * release them all in a single function call.
+ */
+
+static inline void
+mutex_pool_lock(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key) {
+	mutex_pool_assert_not_held(tsdn, pool);
+
+	malloc_mutex_t *mutex = mutex_pool_mutex(pool, key);
+	malloc_mutex_lock(tsdn, mutex);
+}
+
+static inline void
+mutex_pool_unlock(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key) {
+	malloc_mutex_t *mutex = mutex_pool_mutex(pool, key);
+	malloc_mutex_unlock(tsdn, mutex);
+
+	mutex_pool_assert_not_held(tsdn, pool);
+}
+
+static inline void
+mutex_pool_lock2(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key1,
+    uintptr_t key2) {
+	mutex_pool_assert_not_held(tsdn, pool);
+
+	malloc_mutex_t *mutex1 = mutex_pool_mutex(pool, key1);
+	malloc_mutex_t *mutex2 = mutex_pool_mutex(pool, key2);
+	if ((uintptr_t)mutex1 < (uintptr_t)mutex2) {
+		malloc_mutex_lock(tsdn, mutex1);
+		malloc_mutex_lock(tsdn, mutex2);
+	} else if ((uintptr_t)mutex1 == (uintptr_t)mutex2) {
+		malloc_mutex_lock(tsdn, mutex1);
+	} else {
+		malloc_mutex_lock(tsdn, mutex2);
+		malloc_mutex_lock(tsdn, mutex1);
+	}
+}
+
+static inline void
+mutex_pool_unlock2(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key1,
+    uintptr_t key2) {
+	malloc_mutex_t *mutex1 = mutex_pool_mutex(pool, key1);
+	malloc_mutex_t *mutex2 = mutex_pool_mutex(pool, key2);
+	if (mutex1 == mutex2) {
+		malloc_mutex_unlock(tsdn, mutex1);
+	} else {
+		malloc_mutex_unlock(tsdn, mutex1);
+		malloc_mutex_unlock(tsdn, mutex2);
+	}
+
+	mutex_pool_assert_not_held(tsdn, pool);
+}
+
+static inline void
+mutex_pool_assert_owner(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key) {
+	malloc_mutex_assert_owner(tsdn, mutex_pool_mutex(pool, key));
+}
+
+#endif /* JEMALLOC_INTERNAL_MUTEX_POOL_INLINES_H */
diff --git a/include/jemalloc/internal/mutex_pool_structs.h b/include/jemalloc/internal/mutex_pool_structs.h
new file mode 100644
index 00000000..a662166c
--- /dev/null
+++ b/include/jemalloc/internal/mutex_pool_structs.h
@@ -0,0 +1,14 @@
+#ifndef JEMALLOC_INTERNAL_MUTEX_POOL_STRUCTS_H
+#define JEMALLOC_INTERNAL_MUTEX_POOL_STRUCTS_H
+
+/* This file really combines "structs" and "types", but only transitionally. */
+
+/* We do mod reductions by this value, so it should be kept a power of 2. */
+#define MUTEX_POOL_SIZE 256
+
+typedef struct mutex_pool_s mutex_pool_t;
+struct mutex_pool_s {
+	malloc_mutex_t mutexes[MUTEX_POOL_SIZE];
+};
+
+#endif /* JEMALLOC_INTERNAL_MUTEX_POOL_STRUCTS_H */
diff --git a/include/jemalloc/internal/mutex_structs.h b/include/jemalloc/internal/mutex_structs.h
index a8b16a16..92f41676 100644
--- a/include/jemalloc/internal/mutex_structs.h
+++ b/include/jemalloc/internal/mutex_structs.h
@@ -3,6 +3,8 @@
 
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/mutex_prof.h"
+#include "jemalloc/internal/witness_types.h"
+#include "jemalloc/internal/witness_structs.h"
 
 struct malloc_mutex_s {
 	union {
diff --git a/include/jemalloc/internal/rtree_externs.h b/include/jemalloc/internal/rtree_externs.h
index 5742f589..d7d81654 100644
--- a/include/jemalloc/internal/rtree_externs.h
+++ b/include/jemalloc/internal/rtree_externs.h
@@ -41,11 +41,5 @@ void rtree_delete(tsdn_t *tsdn, rtree_t *rtree);
 #endif
 rtree_leaf_elm_t *rtree_leaf_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree,
     rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, bool init_missing);
-void rtree_leaf_elm_witness_acquire(tsdn_t *tsdn, const rtree_t *rtree,
-    uintptr_t key, const rtree_leaf_elm_t *elm);
-void rtree_leaf_elm_witness_access(tsdn_t *tsdn, const rtree_t *rtree,
-    const rtree_leaf_elm_t *elm);
-void rtree_leaf_elm_witness_release(tsdn_t *tsdn, const rtree_t *rtree,
-    const rtree_leaf_elm_t *elm);
 
 #endif /* JEMALLOC_INTERNAL_RTREE_EXTERNS_H */
diff --git a/include/jemalloc/internal/rtree_inlines.h b/include/jemalloc/internal/rtree_inlines.h
index bcc2041a..335a89cf 100644
--- a/include/jemalloc/internal/rtree_inlines.h
+++ b/include/jemalloc/internal/rtree_inlines.h
@@ -47,21 +47,16 @@ rtree_subkey(uintptr_t key, unsigned level) {
 #  ifdef RTREE_LEAF_COMPACT
 JEMALLOC_ALWAYS_INLINE uintptr_t
 rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
-    bool acquired, bool dependent) {
-	if (config_debug && acquired) {
-		assert(dependent);
-		rtree_leaf_elm_witness_access(tsdn, rtree, elm);
-	}
-
+    bool dependent) {
 	return (uintptr_t)atomic_load_p(&elm->le_bits, dependent
 	    ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
 }
 
 JEMALLOC_ALWAYS_INLINE extent_t *
 rtree_leaf_elm_bits_extent_get(uintptr_t bits) {
-	/* Restore sign-extended high bits, mask slab and lock bits. */
+	/* Restore sign-extended high bits, mask slab bit. */
 	return (extent_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB) >>
-	    RTREE_NHIB) & ~((uintptr_t)0x3));
+	    RTREE_NHIB) & ~((uintptr_t)0x1));
 }
 
 JEMALLOC_ALWAYS_INLINE szind_t
@@ -71,51 +66,29 @@ rtree_leaf_elm_bits_szind_get(uintptr_t bits) {
 
 JEMALLOC_ALWAYS_INLINE bool
 rtree_leaf_elm_bits_slab_get(uintptr_t bits) {
-	return (bool)((bits >> 1) & (uintptr_t)0x1);
-}
-
-JEMALLOC_ALWAYS_INLINE bool
-rtree_leaf_elm_bits_locked_get(uintptr_t bits) {
 	return (bool)(bits & (uintptr_t)0x1);
 }
+
 #  endif
 
 JEMALLOC_ALWAYS_INLINE extent_t *
 rtree_leaf_elm_extent_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
-    bool acquired, bool dependent) {
-	if (config_debug && acquired) {
-		assert(dependent);
-		rtree_leaf_elm_witness_access(tsdn, rtree, elm);
-	}
-
+    bool dependent) {
 #ifdef RTREE_LEAF_COMPACT
-	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, acquired,
-	    dependent);
-	assert(!acquired || rtree_leaf_elm_bits_locked_get(bits));
+	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
 	return rtree_leaf_elm_bits_extent_get(bits);
 #else
 	extent_t *extent = (extent_t *)atomic_load_p(&elm->le_extent, dependent
 	    ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
-	assert(!acquired || ((uintptr_t)extent & (uintptr_t)0x1) ==
-	    (uintptr_t)0x1);
-	/* Mask lock bit. */
-	extent = (extent_t *)((uintptr_t)extent & ~((uintptr_t)0x1));
 	return extent;
 #endif
 }
 
 JEMALLOC_ALWAYS_INLINE szind_t
 rtree_leaf_elm_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
-    bool acquired, bool dependent) {
-	if (config_debug && acquired) {
-		assert(dependent);
-		rtree_leaf_elm_witness_access(tsdn, rtree, elm);
-	}
-
+    bool dependent) {
 #ifdef RTREE_LEAF_COMPACT
-	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, acquired,
-	    dependent);
-	assert(!acquired || rtree_leaf_elm_bits_locked_get(bits));
+	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
 	return rtree_leaf_elm_bits_szind_get(bits);
 #else
 	return (szind_t)atomic_load_u(&elm->le_szind, dependent ? ATOMIC_RELAXED
@@ -125,16 +98,9 @@ rtree_leaf_elm_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 
 JEMALLOC_ALWAYS_INLINE bool
 rtree_leaf_elm_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
-    bool acquired, bool dependent) {
-	if (config_debug && acquired) {
-		assert(dependent);
-		rtree_leaf_elm_witness_access(tsdn, rtree, elm);
-	}
-
+    bool dependent) {
 #ifdef RTREE_LEAF_COMPACT
-	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, acquired,
-	    dependent);
-	assert(!acquired || rtree_leaf_elm_bits_locked_get(bits));
+	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
 	return rtree_leaf_elm_bits_slab_get(bits);
 #else
 	return atomic_load_b(&elm->le_slab, dependent ? ATOMIC_RELAXED :
@@ -143,46 +109,31 @@ rtree_leaf_elm_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 }
 
 static inline void
-rtree_leaf_elm_extent_lock_write(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, bool acquired, extent_t *extent, bool lock) {
-	if (config_debug && acquired) {
-		rtree_leaf_elm_witness_access(tsdn, rtree, elm);
-	}
-	assert(((uintptr_t)extent & (uintptr_t)0x1) == (uintptr_t)0x0);
-
+rtree_leaf_elm_extent_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
+    extent_t *extent) {
 #ifdef RTREE_LEAF_COMPACT
-	uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm,
-	    acquired, acquired);
+	uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, true);
 	uintptr_t bits = ((uintptr_t)rtree_leaf_elm_bits_szind_get(old_bits) <<
 	    LG_VADDR) | ((uintptr_t)extent & (((uintptr_t)0x1 << LG_VADDR) - 1))
-	    | ((uintptr_t)rtree_leaf_elm_bits_slab_get(old_bits) << 1) |
-	    (uintptr_t)lock;
+	    | ((uintptr_t)rtree_leaf_elm_bits_slab_get(old_bits));
 	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
 #else
-	if (lock) {
-		/* Overlay lock bit. */
-		extent = (extent_t *)((uintptr_t)extent | (uintptr_t)0x1);
-	}
 	atomic_store_p(&elm->le_extent, extent, ATOMIC_RELEASE);
 #endif
 }
 
 static inline void
 rtree_leaf_elm_szind_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
-    bool acquired, szind_t szind) {
-	if (config_debug && acquired) {
-		rtree_leaf_elm_witness_access(tsdn, rtree, elm);
-	}
+    szind_t szind) {
 	assert(szind <= NSIZES);
 
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm,
-	    acquired, acquired);
+	    true);
 	uintptr_t bits = ((uintptr_t)szind << LG_VADDR) |
 	    ((uintptr_t)rtree_leaf_elm_bits_extent_get(old_bits) &
 	    (((uintptr_t)0x1 << LG_VADDR) - 1)) |
-	    ((uintptr_t)rtree_leaf_elm_bits_slab_get(old_bits) << 1) |
-	    (uintptr_t)acquired;
+	    ((uintptr_t)rtree_leaf_elm_bits_slab_get(old_bits));
 	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
 #else
 	atomic_store_u(&elm->le_szind, szind, ATOMIC_RELEASE);
@@ -191,18 +142,13 @@ rtree_leaf_elm_szind_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 
 static inline void
 rtree_leaf_elm_slab_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
-     bool acquired, bool slab) {
-	if (config_debug && acquired) {
-		rtree_leaf_elm_witness_access(tsdn, rtree, elm);
-	}
-
+     bool slab) {
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm,
-	    acquired, acquired);
+	    true);
 	uintptr_t bits = ((uintptr_t)rtree_leaf_elm_bits_szind_get(old_bits) <<
 	    LG_VADDR) | ((uintptr_t)rtree_leaf_elm_bits_extent_get(old_bits) &
-	    (((uintptr_t)0x1 << LG_VADDR) - 1)) | ((uintptr_t)slab << 1) |
-	    (uintptr_t)acquired;
+	    (((uintptr_t)0x1 << LG_VADDR) - 1)) | ((uintptr_t)slab);
 	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
 #else
 	atomic_store_b(&elm->le_slab, slab, ATOMIC_RELEASE);
@@ -211,27 +157,20 @@ rtree_leaf_elm_slab_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 
 static inline void
 rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
-    bool acquired, extent_t *extent, szind_t szind, bool slab) {
-	if (config_debug && acquired) {
-		rtree_leaf_elm_witness_access(tsdn, rtree, elm);
-	}
-	assert(!slab || szind < NBINS);
-
+    extent_t *extent, szind_t szind, bool slab) {
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t bits = ((uintptr_t)szind << LG_VADDR) |
 	    ((uintptr_t)extent & (((uintptr_t)0x1 << LG_VADDR) - 1)) |
-	    ((uintptr_t)slab << 1) |
-	    (uintptr_t)acquired;
+	    ((uintptr_t)slab);
 	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
 #else
-	rtree_leaf_elm_slab_write(tsdn, rtree, elm, acquired, slab);
-	rtree_leaf_elm_szind_write(tsdn, rtree, elm, acquired, szind);
+	rtree_leaf_elm_slab_write(tsdn, rtree, elm, slab);
+	rtree_leaf_elm_szind_write(tsdn, rtree, elm, szind);
 	/*
 	 * Write extent last, since the element is atomically considered valid
 	 * as soon as the extent field is non-NULL.
 	 */
-	rtree_leaf_elm_extent_lock_write(tsdn, rtree, elm, acquired, extent,
-	    acquired);
+	rtree_leaf_elm_extent_write(tsdn, rtree, elm, extent);
 #endif
 }
 
@@ -244,32 +183,8 @@ rtree_leaf_elm_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree,
 	 * The caller implicitly assures that it is the only writer to the szind
 	 * and slab fields, and that the extent field cannot currently change.
 	 */
-#ifdef RTREE_LEAF_COMPACT
-	/*
-	 * Another thread may concurrently acquire the elm, which means that
-	 * even though the szind and slab fields will not be concurrently
-	 * modified by another thread, the fact that the lock is embedded in the
-	 * same word requires that a CAS operation be used here.
-	 */
-	spin_t spinner = SPIN_INITIALIZER;
-	while (true) {
-		void *old_bits = (void *)(rtree_leaf_elm_bits_read(tsdn, rtree,
-		    elm, false, true) & ~((uintptr_t)0x1)); /* Mask lock bit. */
-		void *bits = (void *)(((uintptr_t)szind << LG_VADDR) |
-		    ((uintptr_t)rtree_leaf_elm_bits_extent_get(
-		    (uintptr_t)old_bits) & (((uintptr_t)0x1 << LG_VADDR) - 1)) |
-		    ((uintptr_t)slab << 1));
-		if (likely(atomic_compare_exchange_strong_p(&elm->le_bits,
-		    &old_bits, bits, ATOMIC_ACQUIRE, ATOMIC_RELAXED))) {
-			break;
-		}
-		spin_adaptive(&spinner);
-	}
-#else
-	/* No need to lock. */
-	rtree_leaf_elm_slab_write(tsdn, rtree, elm, false, slab);
-	rtree_leaf_elm_szind_write(tsdn, rtree, elm, false, szind);
-#endif
+	rtree_leaf_elm_slab_write(tsdn, rtree, elm, slab);
+	rtree_leaf_elm_szind_write(tsdn, rtree, elm, szind);
 }
 
 JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_t *
@@ -343,9 +258,8 @@ rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
 		return true;
 	}
 
-	assert(rtree_leaf_elm_extent_read(tsdn, rtree, elm, false, false) ==
-	    NULL);
-	rtree_leaf_elm_write(tsdn, rtree, elm, false, extent, szind, slab);
+	assert(rtree_leaf_elm_extent_read(tsdn, rtree, elm, false) == NULL);
+	rtree_leaf_elm_write(tsdn, rtree, elm, extent, szind, slab);
 
 	return false;
 }
@@ -370,7 +284,7 @@ rtree_extent_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	if (!dependent && elm == NULL) {
 		return NULL;
 	}
-	return rtree_leaf_elm_extent_read(tsdn, rtree, elm, false, dependent);
+	return rtree_leaf_elm_extent_read(tsdn, rtree, elm, dependent);
 }
 
 JEMALLOC_ALWAYS_INLINE szind_t
@@ -381,7 +295,7 @@ rtree_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	if (!dependent && elm == NULL) {
 		return NSIZES;
 	}
-	return rtree_leaf_elm_szind_read(tsdn, rtree, elm, false, dependent);
+	return rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent);
 }
 
 /*
@@ -397,10 +311,8 @@ rtree_extent_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	if (!dependent && elm == NULL) {
 		return true;
 	}
-	*r_extent = rtree_leaf_elm_extent_read(tsdn, rtree, elm, false,
-	    dependent);
-	*r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, false,
-	    dependent);
+	*r_extent = rtree_leaf_elm_extent_read(tsdn, rtree, elm, dependent);
+	*r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent);
 	return false;
 }
 
@@ -412,63 +324,11 @@ rtree_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	if (!dependent && elm == NULL) {
 		return true;
 	}
-	*r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, false,
-	    dependent);
-	*r_slab = rtree_leaf_elm_slab_read(tsdn, rtree, elm, false, dependent);
+	*r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent);
+	*r_slab = rtree_leaf_elm_slab_read(tsdn, rtree, elm, dependent);
 	return false;
 }
 
-static inline rtree_leaf_elm_t *
-rtree_leaf_elm_acquire(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
-    uintptr_t key, bool dependent, bool init_missing) {
-	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
-	    key, dependent, init_missing);
-	if (!dependent && elm == NULL) {
-		return NULL;
-	}
-	assert(elm != NULL);
-
-	spin_t spinner = SPIN_INITIALIZER;
-	while (true) {
-		/* The least significant bit serves as a lock. */
-#ifdef RTREE_LEAF_COMPACT
-#  define RTREE_FIELD_WITH_LOCK le_bits
-#else
-#  define RTREE_FIELD_WITH_LOCK le_extent
-#endif
-		void *bits = atomic_load_p(&elm->RTREE_FIELD_WITH_LOCK,
-		    ATOMIC_RELAXED);
-		if (likely(((uintptr_t)bits & (uintptr_t)0x1) == 0)) {
-			void *locked = (void *)((uintptr_t)bits |
-			    (uintptr_t)0x1);
-			if (likely(atomic_compare_exchange_strong_p(
-			    &elm->RTREE_FIELD_WITH_LOCK, &bits, locked,
-			    ATOMIC_ACQUIRE, ATOMIC_RELAXED))) {
-				break;
-			}
-		}
-		spin_adaptive(&spinner);
-#undef RTREE_FIELD_WITH_LOCK
-	}
-
-	if (config_debug) {
-		rtree_leaf_elm_witness_acquire(tsdn, rtree, key, elm);
-	}
-
-	return elm;
-}
-
-static inline void
-rtree_leaf_elm_release(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm) {
-	extent_t *extent = rtree_leaf_elm_extent_read(tsdn, rtree, elm, true,
-	    true);
-	rtree_leaf_elm_extent_lock_write(tsdn, rtree, elm, true, extent, false);
-
-	if (config_debug) {
-		rtree_leaf_elm_witness_release(tsdn, rtree, elm);
-	}
-}
-
 static inline void
 rtree_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key, szind_t szind, bool slab) {
@@ -482,9 +342,9 @@ static inline void
 rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key) {
 	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, true);
-	assert(rtree_leaf_elm_extent_read(tsdn, rtree, elm, false, false) !=
+	assert(rtree_leaf_elm_extent_read(tsdn, rtree, elm, false) !=
 	    NULL);
-	rtree_leaf_elm_write(tsdn, rtree, elm, false, NULL, NSIZES, false);
+	rtree_leaf_elm_write(tsdn, rtree, elm, NULL, NSIZES, false);
 }
 
 #endif /* JEMALLOC_INTERNAL_RTREE_INLINES_H */
diff --git a/include/jemalloc/internal/rtree_structs.h b/include/jemalloc/internal/rtree_structs.h
index 4418934f..ba0f96d0 100644
--- a/include/jemalloc/internal/rtree_structs.h
+++ b/include/jemalloc/internal/rtree_structs.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_RTREE_STRUCTS_H
 
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/mutex_pool_structs.h"
 
 struct rtree_node_elm_s {
 	atomic_p_t	child; /* (rtree_{node,leaf}_elm_t *) */
@@ -18,13 +19,12 @@ struct rtree_leaf_elm_s {
 	 * x: index
 	 * e: extent
 	 * b: slab
-	 * k: lock
 	 *
-	 *   00000000 xxxxxxxx eeeeeeee [...] eeeeeeee eeee00bk
+	 *   00000000 xxxxxxxx eeeeeeee [...] eeeeeeee eeee000b
 	 */
 	atomic_p_t	le_bits;
 #else
-	atomic_p_t	le_extent; /* (extent_t *), lock in low bit */
+	atomic_p_t	le_extent; /* (extent_t *) */
 	atomic_u_t	le_szind; /* (szind_t) */
 	atomic_b_t	le_slab; /* (bool) */
 #endif
diff --git a/include/jemalloc/internal/rtree_types.h b/include/jemalloc/internal/rtree_types.h
index b465086d..fd0f1409 100644
--- a/include/jemalloc/internal/rtree_types.h
+++ b/include/jemalloc/internal/rtree_types.h
@@ -66,27 +66,4 @@ typedef struct rtree_s rtree_t;
  */
 #define RTREE_CTX_ZERO_INITIALIZER {{{0}}}
 
-/*
- * Maximum number of concurrently acquired elements per thread.  This controls
- * how many witness_t structures are embedded in tsd.  Ideally rtree_leaf_elm_t
- * would have a witness_t directly embedded, but that would dramatically bloat
- * the tree.  This must contain enough entries to e.g. coalesce two extents.
- */
-#define RTREE_ELM_ACQUIRE_MAX 4
-
-/* Initializers for rtree_leaf_elm_witness_tsd_t. */
-#define RTREE_ELM_WITNESS_INITIALIZER {					\
-	NULL,								\
-	WITNESS_INITIALIZER("rtree_leaf_elm", WITNESS_RANK_RTREE_ELM)	\
-}
-
-#define RTREE_ELM_WITNESS_TSD_INITIALIZER {				\
-	{								\
-		RTREE_ELM_WITNESS_INITIALIZER,				\
-		RTREE_ELM_WITNESS_INITIALIZER,				\
-		RTREE_ELM_WITNESS_INITIALIZER,				\
-		RTREE_ELM_WITNESS_INITIALIZER				\
-	}								\
-}
-
 #endif /* JEMALLOC_INTERNAL_RTREE_TYPES_H */
diff --git a/include/jemalloc/internal/rtree_witness.h b/include/jemalloc/internal/rtree_witness.h
deleted file mode 100644
index 4a136203..00000000
--- a/include/jemalloc/internal/rtree_witness.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_RTREE_WITNESS_H
-#define JEMALLOC_INTERNAL_RTREE_WITNESS_H
-
-#include "jemalloc/internal/rtree_types.h"
-#include "jemalloc/internal/witness_types.h"
-#include "jemalloc/internal/witness_structs.h"
-
-typedef struct rtree_leaf_elm_witness_s rtree_leaf_elm_witness_t;
-struct rtree_leaf_elm_witness_s {
-	const rtree_leaf_elm_t	*elm;
-	witness_t		witness;
-};
-
-typedef struct rtree_leaf_elm_witness_tsd_s rtree_leaf_elm_witness_tsd_t;
-struct rtree_leaf_elm_witness_tsd_s {
-	rtree_leaf_elm_witness_t	witnesses[RTREE_ELM_ACQUIRE_MAX];
-};
-
-#endif /* JEMALLOC_INTERNAL_RTREE_WITNESS_H */
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 3d6576b4..1a269755 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -7,7 +7,6 @@
 #include "jemalloc/internal/prof_types.h"
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/rtree_ctx.h"
-#include "jemalloc/internal/rtree_witness.h"
 #include "jemalloc/internal/tcache_types.h"
 #include "jemalloc/internal/tcache_structs.h"
 #include "jemalloc/internal/util.h"
@@ -76,7 +75,6 @@ typedef void (*test_callback_t)(int *);
     O(arenas_tdata,		arena_tdata_t *)			\
     O(tcache,			tcache_t)				\
     O(witnesses,		witness_list_t)				\
-    O(rtree_leaf_elm_witnesses,	rtree_leaf_elm_witness_tsd_t)		\
     O(witness_fork,		bool)					\
     MALLOC_TEST_TSD
 
@@ -95,7 +93,6 @@ typedef void (*test_callback_t)(int *);
     NULL,								\
     TCACHE_ZERO_INITIALIZER,						\
     ql_head_initializer(witnesses),					\
-    RTREE_ELM_WITNESS_TSD_INITIALIZER,					\
     false								\
     MALLOC_TEST_TSD_INITIALIZER						\
 }
diff --git a/include/jemalloc/internal/witness_types.h b/include/jemalloc/internal/witness_types.h
index d43a363b..f686702e 100644
--- a/include/jemalloc/internal/witness_types.h
+++ b/include/jemalloc/internal/witness_types.h
@@ -41,7 +41,7 @@ typedef int witness_comp_t (const witness_t *, void *, const witness_t *,
 #define WITNESS_RANK_EXTENTS		11U
 #define WITNESS_RANK_EXTENT_FREELIST	12U
 
-#define WITNESS_RANK_RTREE_ELM		13U
+#define WITNESS_RANK_EXTENT_POOL	13U
 #define WITNESS_RANK_RTREE		14U
 #define WITNESS_RANK_BASE		15U
 #define WITNESS_RANK_ARENA_LARGE	16U
diff --git a/src/extent.c b/src/extent.c
index 513d16d5..6503f2a1 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -5,11 +5,12 @@
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/ph.h"
 
-
 /******************************************************************************/
 /* Data. */
 
 rtree_t		extents_rtree;
+/* Keyed by the address of the extent_t being protected. */
+mutex_pool_t	extent_mutex_pool;
 
 static const bitmap_info_t extents_bitmap_info =
     BITMAP_INFO_INITIALIZER(NPSIZES+1);
@@ -95,6 +96,57 @@ static void extent_record(tsdn_t *tsdn, arena_t *arena,
 rb_gen(UNUSED, extent_avail_, extent_tree_t, extent_t, rb_link,
     extent_esnead_comp)
 
+typedef enum {
+	lock_result_success,
+	lock_result_failure,
+	lock_result_no_extent
+} lock_result_t;
+
+static lock_result_t
+extent_rtree_leaf_elm_try_lock(tsdn_t *tsdn, rtree_leaf_elm_t *elm,
+    extent_t **result) {
+	extent_t *extent1 = rtree_leaf_elm_extent_read(tsdn, &extents_rtree,
+	    elm, true);
+
+	if (extent1 == NULL) {
+		return lock_result_no_extent;
+	}
+	/*
+	 * It's possible that the extent changed out from under us, and with it
+	 * the leaf->extent mapping.  We have to recheck while holding the lock.
+	 */
+	extent_lock(tsdn, extent1);
+	extent_t *extent2 = rtree_leaf_elm_extent_read(tsdn,
+	    &extents_rtree, elm, true);
+
+	if (extent1 == extent2) {
+		*result = extent1;
+		return lock_result_success;
+	} else {
+		extent_unlock(tsdn, extent1);
+		return lock_result_failure;
+	}
+}
+
+/*
+ * Returns a pool-locked extent_t * if there's one associated with the given
+ * address, and NULL otherwise.
+ */
+static extent_t *
+extent_lock_from_addr(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, void *addr) {
+	extent_t *ret = NULL;
+	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, &extents_rtree,
+	    rtree_ctx, (uintptr_t)addr, false, false);
+	if (elm == NULL) {
+		return NULL;
+	}
+	lock_result_t lock_result;
+	do {
+		lock_result = extent_rtree_leaf_elm_try_lock(tsdn, elm, &ret);
+	} while (lock_result == lock_result_failure);
+	return ret;
+}
+
 extent_t *
 extent_alloc(tsdn_t *tsdn, arena_t *arena) {
 	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
@@ -508,28 +560,22 @@ extent_activate_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 }
 
 static bool
-extent_rtree_acquire(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
+extent_rtree_leaf_elms_lookup(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
     const extent_t *extent, bool dependent, bool init_missing,
     rtree_leaf_elm_t **r_elm_a, rtree_leaf_elm_t **r_elm_b) {
-	*r_elm_a = rtree_leaf_elm_acquire(tsdn, &extents_rtree, rtree_ctx,
+	*r_elm_a = rtree_leaf_elm_lookup(tsdn, &extents_rtree, rtree_ctx,
 	    (uintptr_t)extent_base_get(extent), dependent, init_missing);
 	if (!dependent && *r_elm_a == NULL) {
 		return true;
 	}
 	assert(*r_elm_a != NULL);
 
-	if (extent_size_get(extent) > PAGE) {
-		*r_elm_b = rtree_leaf_elm_acquire(tsdn, &extents_rtree,
-		    rtree_ctx, (uintptr_t)extent_last_get(extent), dependent,
-		    init_missing);
-		if (!dependent && *r_elm_b == NULL) {
-			rtree_leaf_elm_release(tsdn, &extents_rtree, *r_elm_a);
-			return true;
-		}
-		assert(*r_elm_b != NULL);
-	} else {
-		*r_elm_b = NULL;
+	*r_elm_b = rtree_leaf_elm_lookup(tsdn, &extents_rtree, rtree_ctx,
+	    (uintptr_t)extent_last_get(extent), dependent, init_missing);
+	if (!dependent && *r_elm_b == NULL) {
+		return true;
 	}
+	assert(*r_elm_b != NULL);
 
 	return false;
 }
@@ -537,20 +583,10 @@ extent_rtree_acquire(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
 static void
 extent_rtree_write_acquired(tsdn_t *tsdn, rtree_leaf_elm_t *elm_a,
     rtree_leaf_elm_t *elm_b, extent_t *extent, szind_t szind, bool slab) {
-	rtree_leaf_elm_write(tsdn, &extents_rtree, elm_a, true, extent, szind,
-	    slab);
+	rtree_leaf_elm_write(tsdn, &extents_rtree, elm_a, extent, szind, slab);
 	if (elm_b != NULL) {
-		rtree_leaf_elm_write(tsdn, &extents_rtree, elm_b, true, extent,
-		    szind, slab);
-	}
-}
-
-static void
-extent_rtree_release(tsdn_t *tsdn, rtree_leaf_elm_t *elm_a,
-    rtree_leaf_elm_t *elm_b) {
-	rtree_leaf_elm_release(tsdn, &extents_rtree, elm_a);
-	if (elm_b != NULL) {
-		rtree_leaf_elm_release(tsdn, &extents_rtree, elm_b);
+		rtree_leaf_elm_write(tsdn, &extents_rtree, elm_b, extent, szind,
+		    slab);
 	}
 }
 
@@ -609,17 +645,25 @@ extent_register_impl(tsdn_t *tsdn, extent_t *extent, bool gdump_add) {
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	rtree_leaf_elm_t *elm_a, *elm_b;
 
-	if (extent_rtree_acquire(tsdn, rtree_ctx, extent, false, true, &elm_a,
-	    &elm_b)) {
+	/*
+	 * We need to hold the lock to protect against a concurrent coalesce
+	 * operation that sees us in a partial state.
+	 */
+	extent_lock(tsdn, extent);
+
+	if (extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, extent, false, true,
+	    &elm_a, &elm_b)) {
 		return true;
 	}
+
 	szind_t szind = extent_szind_get_maybe_invalid(extent);
 	bool slab = extent_slab_get(extent);
 	extent_rtree_write_acquired(tsdn, elm_a, elm_b, extent, szind, slab);
 	if (slab) {
 		extent_interior_register(tsdn, rtree_ctx, extent, szind);
 	}
-	extent_rtree_release(tsdn, elm_a, elm_b);
+
+	extent_unlock(tsdn, extent);
 
 	if (config_prof && gdump_add) {
 		extent_gdump_add(tsdn, extent);
@@ -663,15 +707,18 @@ extent_deregister(tsdn_t *tsdn, extent_t *extent) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	rtree_leaf_elm_t *elm_a, *elm_b;
+	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, extent, true, false,
+	    &elm_a, &elm_b);
+
+	extent_lock(tsdn, extent);
 
-	extent_rtree_acquire(tsdn, rtree_ctx, extent, true, false, &elm_a,
-	    &elm_b);
 	extent_rtree_write_acquired(tsdn, elm_a, elm_b, NULL, NSIZES, false);
 	if (extent_slab_get(extent)) {
 		extent_interior_deregister(tsdn, rtree_ctx, extent);
 		extent_slab_set(extent, false);
 	}
-	extent_rtree_release(tsdn, elm_a, elm_b);
+
+	extent_unlock(tsdn, extent);
 
 	if (config_prof) {
 		extent_gdump_sub(tsdn, extent);
@@ -717,24 +764,21 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 	extent_t *extent;
 	if (new_addr != NULL) {
-		rtree_leaf_elm_t *elm = rtree_leaf_elm_acquire(tsdn,
-		    &extents_rtree, rtree_ctx, (uintptr_t)new_addr, false,
-		    false);
-		if (elm != NULL) {
-			extent = rtree_leaf_elm_extent_read(tsdn,
-			   &extents_rtree, elm, true, true);
-			if (extent != NULL) {
-				assert(extent_base_get(extent) == new_addr);
-				if (extent_arena_get(extent) != arena ||
-				    extent_size_get(extent) < esize ||
-				    extent_state_get(extent) !=
-				    extents_state_get(extents)) {
-					extent = NULL;
-				}
+		extent = extent_lock_from_addr(tsdn, rtree_ctx, new_addr);
+		if (extent != NULL) {
+			/*
+			 * We might null-out extent to report an error, but we
+			 * still need to unlock the associated mutex after.
+			 */
+			extent_t *unlock_extent = extent;
+			assert(extent_base_get(extent) == new_addr);
+			if (extent_arena_get(extent) != arena ||
+			    extent_size_get(extent) < esize ||
+			    extent_state_get(extent) !=
+			    extents_state_get(extents)) {
+				extent = NULL;
 			}
-			rtree_leaf_elm_release(tsdn, &extents_rtree, elm);
-		} else {
-			extent = NULL;
+			extent_unlock(tsdn, unlock_extent);
 		}
 	} else {
 		extent = extents_fit_locked(tsdn, arena, extents, alloc_size);
@@ -1254,20 +1298,19 @@ extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
 		again = false;
 
 		/* Try to coalesce forward. */
-		rtree_leaf_elm_t *next_elm = rtree_leaf_elm_acquire(tsdn,
-		    &extents_rtree, rtree_ctx,
-		    (uintptr_t)extent_past_get(extent), false, false);
-		if (next_elm != NULL) {
-			extent_t *next = rtree_leaf_elm_extent_read(tsdn,
-			    &extents_rtree, next_elm, true, true);
+		extent_t *next = extent_lock_from_addr(tsdn, rtree_ctx,
+		    extent_past_get(extent));
+		if (next != NULL) {
 			/*
 			 * extents->mtx only protects against races for
 			 * like-state extents, so call extent_can_coalesce()
-			 * before releasing the next_elm lock.
+			 * before releasing next's pool lock.
 			 */
-			bool can_coalesce = (next != NULL &&
-			    extent_can_coalesce(arena, extents, extent, next));
-			rtree_leaf_elm_release(tsdn, &extents_rtree, next_elm);
+			bool can_coalesce = extent_can_coalesce(arena, extents,
+			    extent, next);
+
+			extent_unlock(tsdn, next);
+
 			if (can_coalesce && !extent_coalesce(tsdn, arena,
 			    r_extent_hooks, extents, extent, next, true)) {
 				if (extents->delay_coalesce) {
@@ -1280,15 +1323,13 @@ extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
 		}
 
 		/* Try to coalesce backward. */
-		rtree_leaf_elm_t *prev_elm = rtree_leaf_elm_acquire(tsdn,
-		    &extents_rtree, rtree_ctx,
-		    (uintptr_t)extent_before_get(extent), false, false);
-		if (prev_elm != NULL) {
-			extent_t *prev = rtree_leaf_elm_extent_read(tsdn,
-			    &extents_rtree, prev_elm, true, true);
-			bool can_coalesce = (prev != NULL &&
-			    extent_can_coalesce(arena, extents, extent, prev));
-			rtree_leaf_elm_release(tsdn, &extents_rtree, prev_elm);
+		extent_t *prev = extent_lock_from_addr(tsdn, rtree_ctx,
+		    extent_before_get(extent));
+		if (prev != NULL) {
+			bool can_coalesce = extent_can_coalesce(arena, extents,
+			    extent, prev);
+			extent_unlock(tsdn, prev);
+
 			if (can_coalesce && !extent_coalesce(tsdn, arena,
 			    r_extent_hooks, extents, extent, prev, false)) {
 				extent = prev;
@@ -1610,22 +1651,25 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
 	assert(extent_size_get(extent) == size_a + size_b);
 	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
 
-	extent_t *trail;
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-	rtree_leaf_elm_t *lead_elm_a, *lead_elm_b, *trail_elm_a, *trail_elm_b;
-
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 
 	if ((*r_extent_hooks)->split == NULL) {
 		return NULL;
 	}
 
-	trail = extent_alloc(tsdn, arena);
+	extent_t *trail = extent_alloc(tsdn, arena);
 	if (trail == NULL) {
 		goto label_error_a;
 	}
 
+	extent_init(trail, arena, (void *)((uintptr_t)extent_base_get(extent) +
+	    size_a), size_b, slab_b, szind_b, extent_sn_get(extent),
+	    extent_state_get(extent), extent_zeroed_get(extent),
+	    extent_committed_get(extent));
+
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+	rtree_leaf_elm_t *lead_elm_a, *lead_elm_b;
 	{
 		extent_t lead;
 
@@ -1634,25 +1678,24 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
 		    extent_state_get(extent), extent_zeroed_get(extent),
 		    extent_committed_get(extent));
 
-		if (extent_rtree_acquire(tsdn, rtree_ctx, &lead, false, true,
-		    &lead_elm_a, &lead_elm_b)) {
-			goto label_error_b;
-		}
+		extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, &lead, false,
+		    true, &lead_elm_a, &lead_elm_b);
+	}
+	rtree_leaf_elm_t *trail_elm_a, *trail_elm_b;
+	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, trail, false, true,
+	    &trail_elm_a, &trail_elm_b);
+
+	if (lead_elm_a == NULL || lead_elm_b == NULL || trail_elm_a == NULL
+	    || trail_elm_b == NULL) {
+		goto label_error_b;
 	}
 
-	extent_init(trail, arena, (void *)((uintptr_t)extent_base_get(extent) +
-	    size_a), size_b, slab_b, szind_b, extent_sn_get(extent),
-	    extent_state_get(extent), extent_zeroed_get(extent),
-	    extent_committed_get(extent));
-	if (extent_rtree_acquire(tsdn, rtree_ctx, trail, false, true,
-	    &trail_elm_a, &trail_elm_b)) {
-		goto label_error_c;
-	}
+	extent_lock2(tsdn, extent, trail);
 
 	if ((*r_extent_hooks)->split(*r_extent_hooks, extent_base_get(extent),
 	    size_a + size_b, size_a, size_b, extent_committed_get(extent),
 	    arena_ind_get(arena))) {
-		goto label_error_d;
+		goto label_error_c;
 	}
 
 	extent_size_set(extent, size_a);
@@ -1663,14 +1706,11 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
 	extent_rtree_write_acquired(tsdn, trail_elm_a, trail_elm_b, trail,
 	    szind_b, slab_b);
 
-	extent_rtree_release(tsdn, lead_elm_a, lead_elm_b);
-	extent_rtree_release(tsdn, trail_elm_a, trail_elm_b);
+	extent_unlock2(tsdn, extent, trail);
 
 	return trail;
-label_error_d:
-	extent_rtree_release(tsdn, trail_elm_a, trail_elm_b);
 label_error_c:
-	extent_rtree_release(tsdn, lead_elm_a, lead_elm_b);
+	extent_unlock2(tsdn, extent, trail);
 label_error_b:
 	extent_dalloc(tsdn, arena, trail);
 label_error_a:
@@ -1734,20 +1774,20 @@ extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	rtree_leaf_elm_t *a_elm_a, *a_elm_b, *b_elm_a, *b_elm_b;
-	extent_rtree_acquire(tsdn, rtree_ctx, a, true, false, &a_elm_a,
+	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, a, true, false, &a_elm_a,
 	    &a_elm_b);
-	extent_rtree_acquire(tsdn, rtree_ctx, b, true, false, &b_elm_a,
+	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, b, true, false, &b_elm_a,
 	    &b_elm_b);
 
+	extent_lock2(tsdn, a, b);
+
 	if (a_elm_b != NULL) {
-		rtree_leaf_elm_write(tsdn, &extents_rtree, a_elm_b, true, NULL,
+		rtree_leaf_elm_write(tsdn, &extents_rtree, a_elm_b, NULL,
 		    NSIZES, false);
-		rtree_leaf_elm_release(tsdn, &extents_rtree, a_elm_b);
 	}
 	if (b_elm_b != NULL) {
-		rtree_leaf_elm_write(tsdn, &extents_rtree, b_elm_a, true, NULL,
+		rtree_leaf_elm_write(tsdn, &extents_rtree, b_elm_a, NULL,
 		    NSIZES, false);
-		rtree_leaf_elm_release(tsdn, &extents_rtree, b_elm_a);
 	} else {
 		b_elm_b = b_elm_a;
 	}
@@ -1759,7 +1799,8 @@ extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
 	extent_zeroed_set(a, extent_zeroed_get(a) && extent_zeroed_get(b));
 
 	extent_rtree_write_acquired(tsdn, a_elm_a, b_elm_b, a, NSIZES, false);
-	extent_rtree_release(tsdn, a_elm_a, b_elm_b);
+
+	extent_unlock2(tsdn, a, b);
 
 	extent_dalloc(tsdn, extent_arena_get(b), b);
 
@@ -1772,6 +1813,11 @@ extent_boot(void) {
 		return true;
 	}
 
+	if (mutex_pool_init(&extent_mutex_pool, "extent_mutex_pool",
+	    WITNESS_RANK_EXTENT_POOL)) {
+		return true;
+	}
+
 	if (have_dss) {
 		extent_dss_boot();
 	}
diff --git a/src/mutex_pool.c b/src/mutex_pool.c
new file mode 100644
index 00000000..004d6d0f
--- /dev/null
+++ b/src/mutex_pool.c
@@ -0,0 +1,15 @@
+#define JEMALLOC_MUTEX_POOL_C_
+
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+bool
+mutex_pool_init(mutex_pool_t *pool, const char *name, witness_rank_t rank) {
+	for (int i = 0; i < MUTEX_POOL_SIZE; ++i) {
+		if (malloc_mutex_init(&pool->mutexes[i], name, rank,
+		    malloc_mutex_address_ordered)) {
+			return true;
+		}
+	}
+	return false;
+}
diff --git a/src/rtree.c b/src/rtree.c
index 6d4a71a2..637853c7 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -304,121 +304,6 @@ rtree_leaf_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	not_reached();
 }
 
-static int
-rtree_leaf_elm_witness_comp(const witness_t *a, void *oa, const witness_t *b,
-    void *ob) {
-	uintptr_t ka = (uintptr_t)oa;
-	uintptr_t kb = (uintptr_t)ob;
-
-	assert(ka != 0);
-	assert(kb != 0);
-
-	return (ka > kb) - (ka < kb);
-}
-
-static witness_t *
-rtree_leaf_elm_witness_alloc(tsd_t *tsd, uintptr_t key,
-    const rtree_leaf_elm_t *elm) {
-	witness_t *witness;
-	size_t i;
-	rtree_leaf_elm_witness_tsd_t *witnesses =
-	    tsd_rtree_leaf_elm_witnessesp_get(tsd);
-
-	/* Iterate over entire array to detect double allocation attempts. */
-	witness = NULL;
-	for (i = 0; i < RTREE_ELM_ACQUIRE_MAX; i++) {
-		rtree_leaf_elm_witness_t *rew = &witnesses->witnesses[i];
-
-		assert(rew->elm != elm);
-		if (rew->elm == NULL && witness == NULL) {
-			rew->elm = elm;
-			witness = &rew->witness;
-			witness_init(witness, "rtree_leaf_elm",
-			    WITNESS_RANK_RTREE_ELM, rtree_leaf_elm_witness_comp,
-			    (void *)key);
-		}
-	}
-	assert(witness != NULL);
-	return witness;
-}
-
-static witness_t *
-rtree_leaf_elm_witness_find(tsd_t *tsd, const rtree_leaf_elm_t *elm) {
-	size_t i;
-	rtree_leaf_elm_witness_tsd_t *witnesses =
-	    tsd_rtree_leaf_elm_witnessesp_get(tsd);
-
-	for (i = 0; i < RTREE_ELM_ACQUIRE_MAX; i++) {
-		rtree_leaf_elm_witness_t *rew = &witnesses->witnesses[i];
-
-		if (rew->elm == elm) {
-			return &rew->witness;
-		}
-	}
-	not_reached();
-}
-
-static void
-rtree_leaf_elm_witness_dalloc(tsd_t *tsd, witness_t *witness,
-    const rtree_leaf_elm_t *elm) {
-	size_t i;
-	rtree_leaf_elm_witness_tsd_t *witnesses =
-	    tsd_rtree_leaf_elm_witnessesp_get(tsd);
-
-	for (i = 0; i < RTREE_ELM_ACQUIRE_MAX; i++) {
-		rtree_leaf_elm_witness_t *rew = &witnesses->witnesses[i];
-
-		if (rew->elm == elm) {
-			rew->elm = NULL;
-			witness_init(&rew->witness, "rtree_leaf_elm",
-			    WITNESS_RANK_RTREE_ELM, rtree_leaf_elm_witness_comp,
-			    NULL);
-			return;
-		}
-	}
-	not_reached();
-}
-
-void
-rtree_leaf_elm_witness_acquire(tsdn_t *tsdn, const rtree_t *rtree,
-    uintptr_t key, const rtree_leaf_elm_t *elm) {
-	witness_t *witness;
-
-	if (tsdn_null(tsdn)) {
-		return;
-	}
-
-	witness = rtree_leaf_elm_witness_alloc(tsdn_tsd(tsdn), key, elm);
-	witness_lock(tsdn, witness);
-}
-
-void
-rtree_leaf_elm_witness_access(tsdn_t *tsdn, const rtree_t *rtree,
-    const rtree_leaf_elm_t *elm) {
-	witness_t *witness;
-
-	if (tsdn_null(tsdn)) {
-		return;
-	}
-
-	witness = rtree_leaf_elm_witness_find(tsdn_tsd(tsdn), elm);
-	witness_assert_owner(tsdn, witness);
-}
-
-void
-rtree_leaf_elm_witness_release(tsdn_t *tsdn, const rtree_t *rtree,
-    const rtree_leaf_elm_t *elm) {
-	witness_t *witness;
-
-	if (tsdn_null(tsdn)) {
-		return;
-	}
-
-	witness = rtree_leaf_elm_witness_find(tsdn_tsd(tsdn), elm);
-	witness_unlock(tsdn, witness);
-	rtree_leaf_elm_witness_dalloc(tsdn_tsd(tsdn), witness, elm);
-}
-
 void
 rtree_ctx_data_init(rtree_ctx_t *ctx) {
 	for (unsigned i = 0; i < RTREE_CTX_NCACHE; i++) {
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 3c5b2df4..b854afd7 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -77,90 +77,6 @@ TEST_BEGIN(test_rtree_read_empty) {
 }
 TEST_END
 
-#define NTHREADS	8
-#define MAX_NBITS	30
-#define NITERS		1000
-#define SEED		42
-
-typedef struct {
-	rtree_t		*rtree;
-	uint32_t	seed;
-} thd_start_arg_t;
-
-static void *
-thd_start(void *varg) {
-	thd_start_arg_t *arg = (thd_start_arg_t *)varg;
-	rtree_ctx_t rtree_ctx;
-	rtree_ctx_data_init(&rtree_ctx);
-	sfmt_t *sfmt;
-	extent_t *extent;
-	tsdn_t *tsdn;
-	unsigned i;
-
-	sfmt = init_gen_rand(arg->seed);
-	extent = (extent_t *)malloc(sizeof(extent));
-	assert_ptr_not_null(extent, "Unexpected malloc() failure");
-	extent_init(extent, NULL, NULL, 0, false, NSIZES, 0,
-	    extent_state_active, false, false);
-	tsdn = tsdn_fetch();
-
-	for (i = 0; i < NITERS; i++) {
-		uintptr_t key = (uintptr_t)(gen_rand64(sfmt) & ((ZU(1) <<
-		    MAX_NBITS) - ZU(1)));
-		if (i % 2 == 0) {
-			rtree_leaf_elm_t *elm = rtree_leaf_elm_acquire(tsdn,
-			    arg->rtree, &rtree_ctx, key, false, true);
-			assert_ptr_not_null(elm,
-			    "Unexpected rtree_leaf_elm_acquire() failure");
-			rtree_leaf_elm_write(tsdn, arg->rtree, elm, true,
-			    extent, NSIZES, false);
-			rtree_leaf_elm_release(tsdn, arg->rtree, elm);
-
-			elm = rtree_leaf_elm_acquire(tsdn, arg->rtree,
-			    &rtree_ctx, key, true, false);
-			assert_ptr_not_null(elm,
-			    "Unexpected rtree_leaf_elm_acquire() failure");
-			rtree_leaf_elm_extent_read(tsdn, arg->rtree, elm, true,
-			    true);
-			rtree_leaf_elm_szind_read(tsdn, arg->rtree, elm, true,
-			    true);
-			rtree_leaf_elm_slab_read(tsdn, arg->rtree, elm, true,
-			    true);
-			rtree_leaf_elm_release(tsdn, arg->rtree, elm);
-		} else {
-			rtree_extent_read(tsdn, arg->rtree, &rtree_ctx, key,
-			    false);
-		}
-	}
-
-	free(extent);
-	fini_gen_rand(sfmt);
-	return NULL;
-}
-
-TEST_BEGIN(test_rtree_concurrent) {
-	thd_start_arg_t arg;
-	thd_t thds[NTHREADS];
-	sfmt_t *sfmt;
-	tsdn_t *tsdn;
-
-	sfmt = init_gen_rand(SEED);
-	tsdn = tsdn_fetch();
-	arg.rtree = &test_rtree;
-	assert_false(rtree_new(arg.rtree, false),
-	    "Unexpected rtree_new() failure");
-	arg.seed = gen_rand32(sfmt);
-	for (unsigned i = 0; i < NTHREADS; i++) {
-		thd_create(&thds[i], thd_start, (void *)&arg);
-	}
-	for (unsigned i = 0; i < NTHREADS; i++) {
-		thd_join(thds[i], NULL);
-	}
-	rtree_delete(tsdn, arg.rtree);
-	fini_gen_rand(sfmt);
-}
-TEST_END
-
 #undef NTHREADS
 #undef NITERS
 #undef SEED
@@ -254,13 +170,11 @@ TEST_BEGIN(test_rtree_random) {
 
 	for (unsigned i = 0; i < NSET; i++) {
 		keys[i] = (uintptr_t)gen_rand64(sfmt);
-		rtree_leaf_elm_t *elm = rtree_leaf_elm_acquire(tsdn, rtree,
+		rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree,
 		    &rtree_ctx, keys[i], false, true);
 		assert_ptr_not_null(elm,
-		    "Unexpected rtree_leaf_elm_acquire() failure");
-		rtree_leaf_elm_write(tsdn, rtree, elm, true, &extent, NSIZES,
-		    false);
-		rtree_leaf_elm_release(tsdn, rtree, elm);
+		    "Unexpected rtree_leaf_elm_lookup() failure");
+		rtree_leaf_elm_write(tsdn, rtree, elm, &extent, NSIZES, false);
 		assert_ptr_eq(rtree_extent_read(tsdn, rtree, &rtree_ctx,
 		    keys[i], true), &extent,
 		    "rtree_extent_read() should return previously set value");
@@ -304,7 +218,6 @@ main(void) {
 
 	return test(
 	    test_rtree_read_empty,
-	    test_rtree_concurrent,
 	    test_rtree_extrema,
 	    test_rtree_bits,
 	    test_rtree_random);

From b693c7868ea965407aca4cb01fdb8fe9af14adce Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 17 Mar 2017 12:42:33 -0700
Subject: [PATCH 0871/2608] Implementing opt.background_thread.

Added opt.background_thread to enable background threads, which handles purging
currently.  When enabled, decay ticks will not trigger purging (which will be
left to the background threads).  We limit the max number of threads to NCPUs.
When percpu arena is enabled, set CPU affinity for the background threads as
well.

The sleep interval of background threads is dynamic and determined by computing
number of pages to purge in the future (based on backlog).
---
 Makefile.in                                   |   1 +
 configure.ac                                  |  32 +-
 include/jemalloc/internal/arena_externs.h     |  83 +--
 include/jemalloc/internal/arena_inlines_b.h   |   2 +-
 .../internal/background_thread_externs.h      |  29 +
 .../internal/background_thread_inlines.h      |  21 +
 .../internal/background_thread_structs.h      |  25 +
 .../internal/jemalloc_internal_defs.h.in      |   9 +
 .../internal/jemalloc_internal_includes.h     |   3 +
 .../jemalloc/internal/jemalloc_preamble.h.in  |  13 +
 include/jemalloc/internal/smoothstep.h        | 402 ++++++------
 include/jemalloc/internal/smoothstep.sh       |   6 +-
 include/jemalloc/internal/witness_types.h     |  32 +-
 src/arena.c                                   | 156 +++--
 src/background_thread.c                       | 572 ++++++++++++++++++
 src/ctl.c                                     |  92 ++-
 src/jemalloc.c                                |  53 +-
 src/mutex.c                                   |  19 +-
 src/stats.c                                   |   1 +
 test/integration/extent.c                     |  14 +
 test/unit/decay.c                             |  20 +
 test/unit/smoothstep.c                        |   2 +-
 test/unit/stats.c                             |   6 +-
 23 files changed, 1245 insertions(+), 348 deletions(-)
 create mode 100644 include/jemalloc/internal/background_thread_externs.h
 create mode 100644 include/jemalloc/internal/background_thread_inlines.h
 create mode 100644 include/jemalloc/internal/background_thread_structs.h
 create mode 100644 src/background_thread.c

diff --git a/Makefile.in b/Makefile.in
index 264b077c..aa6f3f62 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -91,6 +91,7 @@ BINS := $(objroot)bin/jemalloc-config $(objroot)bin/jemalloc.sh $(objroot)bin/je
 C_HDRS := $(objroot)include/jemalloc/jemalloc$(install_suffix).h
 C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/arena.c \
+	$(srcroot)src/background_thread.c \
 	$(srcroot)src/base.c \
 	$(srcroot)src/bitmap.c \
 	$(srcroot)src/ckh.c \
diff --git a/configure.ac b/configure.ac
index 6c1d4ffc..8be4be45 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1443,12 +1443,23 @@ dnl ============================================================================
 dnl Configure pthreads.
 
 if test "x$abi" != "xpecoff" ; then
+  AC_DEFINE([JEMALLOC_HAVE_PTHREAD], [ ])
   AC_CHECK_HEADERS([pthread.h], , [AC_MSG_ERROR([pthread.h is missing])])
   dnl Some systems may embed pthreads functionality in libc; check for libpthread
   dnl first, but try libc too before failing.
   AC_CHECK_LIB([pthread], [pthread_create], [JE_APPEND_VS(LIBS, -lpthread)],
                [AC_SEARCH_LIBS([pthread_create], , ,
                                AC_MSG_ERROR([libpthread is missing]))])
+  wrap_syms="${wrap_syms} pthread_create"
+  dnl Check if we have dlsym support.
+  have_dlsym="1"
+  AC_CHECK_HEADERS([dlfcn.h],
+    AC_CHECK_FUNC([dlsym], [],
+      [AC_CHECK_LIB([dl], [dlsym], [LIBS="$LIBS -ldl"], [have_dlsym="0"])]),
+    [have_dlsym="0"])
+  if test "x$have_dlsym" = "x1" ; then
+    AC_DEFINE([JEMALLOC_HAVE_DLSYM], [ ])
+  fi
   JE_COMPILABLE([pthread_atfork(3)], [
 #include <pthread.h>
 ], [
@@ -1563,6 +1574,15 @@ if test "x$have_sched_getcpu" = "x1" ; then
   AC_DEFINE([JEMALLOC_HAVE_SCHED_GETCPU], [ ])
 fi
 
+dnl Check if the GNU-specific sched_setaffinity function exists.
+AC_CHECK_FUNC([sched_setaffinity],
+              [have_sched_setaffinity="1"],
+              [have_sched_setaffinity="0"]
+             )
+if test "x$have_sched_setaffinity" = "x1" ; then
+  AC_DEFINE([JEMALLOC_HAVE_SCHED_SETAFFINITY], [ ])
+fi
+
 dnl Check if the Solaris/BSD issetugid function exists.
 AC_CHECK_FUNC([issetugid],
               [have_issetugid="1"],
@@ -1623,15 +1643,11 @@ if test "x${enable_lazy_lock}" = "x1" -a "x${abi}" = "xpecoff" ; then
   enable_lazy_lock="0"
 fi
 if test "x$enable_lazy_lock" = "x1" ; then
-  if test "x$abi" != "xpecoff" ; then
-    AC_CHECK_HEADERS([dlfcn.h], , [AC_MSG_ERROR([dlfcn.h is missing])])
-    AC_CHECK_FUNC([dlsym], [],
-      [AC_CHECK_LIB([dl], [dlsym], [JE_APPEND_VS(LIBS, -ldl)],
-                    [AC_MSG_ERROR([libdl is missing])])
-      ])
+  if test "x$have_dlsym" = "x1" ; then
+    AC_DEFINE([JEMALLOC_LAZY_LOCK], [ ])
+  else
+    AC_MSG_ERROR([Missing dlsym support: lazy-lock cannot be enabled.])
   fi
-  AC_DEFINE([JEMALLOC_LAZY_LOCK], [ ])
-  wrap_syms="${wrap_syms} pthread_create"
 fi
 AC_SUBST([enable_lazy_lock])
 
diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 292b8d6d..273705f7 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -5,7 +5,7 @@
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/stats.h"
 
-static const size_t	large_pad =
+static const size_t large_pad =
 #ifdef JEMALLOC_CACHE_OBLIVIOUS
     PAGE
 #else
@@ -13,88 +13,91 @@ static const size_t	large_pad =
 #endif
     ;
 
-extern ssize_t		opt_dirty_decay_ms;
-extern ssize_t		opt_muzzy_decay_ms;
+extern ssize_t opt_dirty_decay_ms;
+extern ssize_t opt_muzzy_decay_ms;
 
-extern const arena_bin_info_t	arena_bin_info[NBINS];
+extern const arena_bin_info_t arena_bin_info[NBINS];
 
-extern percpu_arena_mode_t	percpu_arena_mode;
+extern percpu_arena_mode_t percpu_arena_mode;
 extern const char	*opt_percpu_arena;
 extern const char	*percpu_arena_mode_names[];
 
+extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS];
+
 void arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
     szind_t szind, uint64_t nrequests);
 void arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
     size_t size);
-void	arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
+void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
     unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms,
     ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy);
 void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
     malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats);
-void	arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
+void arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent);
 #ifdef JEMALLOC_JET
-size_t	arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr);
+size_t arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr);
 #endif
-extent_t	*arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena,
+extent_t *arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena,
     size_t usize, size_t alignment, bool *zero);
-void	arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena,
+void arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena,
     extent_t *extent);
-void	arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena,
+void arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena,
     extent_t *extent, size_t oldsize);
-void	arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena,
+void arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena,
     extent_t *extent, size_t oldsize);
 ssize_t arena_dirty_decay_ms_get(arena_t *arena);
 bool arena_dirty_decay_ms_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_ms);
 ssize_t arena_muzzy_decay_ms_get(arena_t *arena);
 bool arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_ms);
-void arena_decay(tsdn_t *tsdn, arena_t *arena, bool all);
-void	arena_reset(tsd_t *tsd, arena_t *arena);
-void	arena_destroy(tsd_t *tsd, arena_t *arena);
-void	arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
+void arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
+    bool all);
+void arena_reset(tsd_t *tsd, arena_t *arena);
+void arena_destroy(tsd_t *tsd, arena_t *arena);
+void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes);
-void	arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info,
+void arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info,
     bool zero);
 
 typedef void (arena_dalloc_junk_small_t)(void *, const arena_bin_info_t *);
 extern arena_dalloc_junk_small_t *JET_MUTABLE arena_dalloc_junk_small;
 
-void	*arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
+void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
     szind_t ind, bool zero);
-void	*arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
+void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache);
-void	arena_prof_promote(tsdn_t *tsdn, const void *ptr, size_t usize);
-void	arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
+void arena_prof_promote(tsdn_t *tsdn, const void *ptr, size_t usize);
+void arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
     bool slow_path);
-void	arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena,
+void arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena,
     extent_t *extent, void *ptr);
-void	arena_dalloc_small(tsdn_t *tsdn, void *ptr);
+void arena_dalloc_small(tsdn_t *tsdn, void *ptr);
 bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t extra, bool zero);
 void *arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
     size_t size, size_t alignment, bool zero, tcache_t *tcache);
-dss_prec_t	arena_dss_prec_get(arena_t *arena);
-bool	arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
+dss_prec_t arena_dss_prec_get(arena_t *arena);
+bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
 ssize_t arena_dirty_decay_ms_default_get(void);
 bool arena_dirty_decay_ms_default_set(ssize_t decay_ms);
 ssize_t arena_muzzy_decay_ms_default_get(void);
 bool arena_muzzy_decay_ms_default_set(ssize_t decay_ms);
-unsigned	arena_nthreads_get(arena_t *arena, bool internal);
-void	arena_nthreads_inc(arena_t *arena, bool internal);
-void	arena_nthreads_dec(arena_t *arena, bool internal);
-size_t	arena_extent_sn_next(arena_t *arena);
-arena_t	*arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
-void	arena_boot(void);
-void	arena_prefork0(tsdn_t *tsdn, arena_t *arena);
-void	arena_prefork1(tsdn_t *tsdn, arena_t *arena);
-void	arena_prefork2(tsdn_t *tsdn, arena_t *arena);
-void	arena_prefork3(tsdn_t *tsdn, arena_t *arena);
-void	arena_prefork4(tsdn_t *tsdn, arena_t *arena);
-void	arena_prefork5(tsdn_t *tsdn, arena_t *arena);
-void	arena_prefork6(tsdn_t *tsdn, arena_t *arena);
-void	arena_postfork_parent(tsdn_t *tsdn, arena_t *arena);
-void	arena_postfork_child(tsdn_t *tsdn, arena_t *arena);
+unsigned arena_nthreads_get(arena_t *arena, bool internal);
+void arena_nthreads_inc(arena_t *arena, bool internal);
+void arena_nthreads_dec(arena_t *arena, bool internal);
+size_t arena_extent_sn_next(arena_t *arena);
+arena_t *arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
+void arena_boot(void);
+void arena_prefork0(tsdn_t *tsdn, arena_t *arena);
+void arena_prefork1(tsdn_t *tsdn, arena_t *arena);
+void arena_prefork2(tsdn_t *tsdn, arena_t *arena);
+void arena_prefork3(tsdn_t *tsdn, arena_t *arena);
+void arena_prefork4(tsdn_t *tsdn, arena_t *arena);
+void arena_prefork5(tsdn_t *tsdn, arena_t *arena);
+void arena_prefork6(tsdn_t *tsdn, arena_t *arena);
+void arena_postfork_parent(tsdn_t *tsdn, arena_t *arena);
+void arena_postfork_child(tsdn_t *tsdn, arena_t *arena);
 
 #endif /* JEMALLOC_INTERNAL_ARENA_EXTERNS_H */
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index ca7af7fd..a1057184 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -75,7 +75,7 @@ arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) {
 		return;
 	}
 	if (unlikely(ticker_ticks(decay_ticker, nticks))) {
-		arena_decay(tsdn, arena, false);
+		arena_decay(tsdn, arena, false, false);
 	}
 }
 
diff --git a/include/jemalloc/internal/background_thread_externs.h b/include/jemalloc/internal/background_thread_externs.h
new file mode 100644
index 00000000..993f0e3b
--- /dev/null
+++ b/include/jemalloc/internal/background_thread_externs.h
@@ -0,0 +1,29 @@
+#ifndef JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H
+#define JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H
+
+extern bool opt_background_thread;
+extern malloc_mutex_t background_thread_lock;
+extern atomic_b_t background_thread_enabled_state;
+extern size_t n_background_threads;
+extern background_thread_info_t *background_thread_info;
+
+bool background_thread_create(tsd_t *tsd, unsigned arena_ind);
+bool background_threads_init(tsd_t *tsd);
+bool background_threads_enable(tsd_t *tsd);
+bool background_threads_disable(tsd_t *tsd);
+bool background_threads_disable_single(tsd_t *tsd,
+    background_thread_info_t *info);
+void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
+    arena_decay_t *decay, size_t npages_new);
+void background_thread_prefork0(tsdn_t *tsdn);
+void background_thread_prefork1(tsdn_t *tsdn);
+void background_thread_postfork_parent(tsdn_t *tsdn);
+void background_thread_postfork_child(tsdn_t *tsdn);
+
+#if defined(JEMALLOC_BACKGROUND_THREAD) || defined(JEMALLOC_LAZY_LOCK)
+extern int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
+    void *(*)(void *), void *__restrict);
+void *load_pthread_create_fptr(void);
+#endif
+
+#endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H */
diff --git a/include/jemalloc/internal/background_thread_inlines.h b/include/jemalloc/internal/background_thread_inlines.h
new file mode 100644
index 00000000..2709ae31
--- /dev/null
+++ b/include/jemalloc/internal/background_thread_inlines.h
@@ -0,0 +1,21 @@
+#ifndef JEMALLOC_INTERNAL_BACKGROUND_THREAD_INLINES_H
+#define JEMALLOC_INTERNAL_BACKGROUND_THREAD_INLINES_H
+
+JEMALLOC_ALWAYS_INLINE bool
+background_thread_enabled(void) {
+	return atomic_load_b(&background_thread_enabled_state, ATOMIC_RELAXED);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+background_thread_enabled_set(tsdn_t *tsdn, bool state) {
+	malloc_mutex_assert_owner(tsdn, &background_thread_lock);
+	atomic_store_b(&background_thread_enabled_state, state, ATOMIC_RELAXED);
+}
+
+JEMALLOC_ALWAYS_INLINE background_thread_info_t *
+arena_background_thread_info_get(arena_t *arena) {
+	unsigned arena_ind = arena_ind_get(arena);
+	return &background_thread_info[arena_ind % ncpus];
+}
+
+#endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_INLINES_H */
diff --git a/include/jemalloc/internal/background_thread_structs.h b/include/jemalloc/internal/background_thread_structs.h
new file mode 100644
index 00000000..a43d600d
--- /dev/null
+++ b/include/jemalloc/internal/background_thread_structs.h
@@ -0,0 +1,25 @@
+#ifndef JEMALLOC_INTERNAL_BACKGROUND_THREAD_STRUCTS_H
+#define JEMALLOC_INTERNAL_BACKGROUND_THREAD_STRUCTS_H
+
+struct background_thread_info_s {
+	malloc_mutex_t		mtx;
+#ifdef JEMALLOC_BACKGROUND_THREAD
+	/* Background thread is pthread specific. */
+	pthread_cond_t		cond;
+	pthread_t		thread;
+	/* Whether the thread has been created. */
+	bool			started;
+	/* Next scheduled wakeup time (absolute time). */
+	nstime_t		next_wakeup;
+	/*
+	 *  Since the last background thread run, newly added number of pages
+	 *  that need to be purged by the next wakeup.  This is adjusted on
+	 *  epoch advance, and is used to determine whether we should signal the
+	 *  background thread to wake up earlier.
+	 */
+	size_t			npages_to_purge_new;
+#endif /* ifdef JEMALLOC_BACKGROUND_THREAD */
+};
+typedef struct background_thread_info_s background_thread_info_t;
+
+#endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_STRUCTS_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 78ddd376..75576a56 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -301,12 +301,21 @@
 /* glibc memalign hook. */
 #undef JEMALLOC_GLIBC_MEMALIGN_HOOK
 
+/* pthread support */
+#undef JEMALLOC_HAVE_PTHREAD
+
+/* dlsym() support */
+#undef JEMALLOC_HAVE_DLSYM
+
 /* Adaptive mutex support in pthreads. */
 #undef JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP
 
 /* GNU specific sched_getcpu support */
 #undef JEMALLOC_HAVE_SCHED_GETCPU
 
+/* GNU specific sched_setaffinity support */
+#undef JEMALLOC_HAVE_SCHED_SETAFFINITY
+
 /*
  * If defined, jemalloc symbols are not exported (doesn't work when
  * JEMALLOC_PREFIX is not defined).
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index cf321c12..45e648bc 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -65,6 +65,7 @@
 #include "jemalloc/internal/arena_structs_b.h"
 #include "jemalloc/internal/rtree_structs.h"
 #include "jemalloc/internal/tcache_structs.h"
+#include "jemalloc/internal/background_thread_structs.h"
 
 /******************************************************************************/
 /* EXTERNS */
@@ -82,6 +83,7 @@
 #include "jemalloc/internal/large_externs.h"
 #include "jemalloc/internal/tcache_externs.h"
 #include "jemalloc/internal/prof_externs.h"
+#include "jemalloc/internal/background_thread_externs.h"
 
 /******************************************************************************/
 /* INLINES */
@@ -105,5 +107,6 @@
 #include "jemalloc/internal/arena_inlines_b.h"
 #include "jemalloc/internal/jemalloc_internal_inlines_c.h"
 #include "jemalloc/internal/prof_inlines_b.h"
+#include "jemalloc/internal/background_thread_inlines.h"
 
 #endif /* JEMALLOC_INTERNAL_INCLUDES_H */
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index 9e9225ef..0e876103 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -169,4 +169,17 @@ static const bool force_ivsalloc =
 #endif
     ;
 
+#if (defined(JEMALLOC_HAVE_PTHREAD) && defined(JEMALLOC_HAVE_DLSYM)	\
+    && !defined(JEMALLOC_OSSPIN) && !defined(JEMALLOC_OS_UNFAIR_LOCK))
+/* Currently background thread supports pthread only. */
+#define JEMALLOC_BACKGROUND_THREAD
+#endif
+static const bool have_background_thread =
+#ifdef JEMALLOC_BACKGROUND_THREAD
+    true
+#else
+    false
+#endif
+    ;
+
 #endif /* JEMALLOC_PREAMBLE_H */
diff --git a/include/jemalloc/internal/smoothstep.h b/include/jemalloc/internal/smoothstep.h
index 2e14430f..5bca6e8c 100644
--- a/include/jemalloc/internal/smoothstep.h
+++ b/include/jemalloc/internal/smoothstep.h
@@ -27,206 +27,206 @@
 #define SMOOTHSTEP_NSTEPS	200
 #define SMOOTHSTEP_BFP		24
 #define SMOOTHSTEP \
- /* STEP(step, h,                            x,     y) */ \
-    STEP(   1, UINT64_C(0x0000000000000014), 0.005, 0.000001240643750) \
-    STEP(   2, UINT64_C(0x00000000000000a5), 0.010, 0.000009850600000) \
-    STEP(   3, UINT64_C(0x0000000000000229), 0.015, 0.000032995181250) \
-    STEP(   4, UINT64_C(0x0000000000000516), 0.020, 0.000077619200000) \
-    STEP(   5, UINT64_C(0x00000000000009dc), 0.025, 0.000150449218750) \
-    STEP(   6, UINT64_C(0x00000000000010e8), 0.030, 0.000257995800000) \
-    STEP(   7, UINT64_C(0x0000000000001aa4), 0.035, 0.000406555756250) \
-    STEP(   8, UINT64_C(0x0000000000002777), 0.040, 0.000602214400000) \
-    STEP(   9, UINT64_C(0x00000000000037c2), 0.045, 0.000850847793750) \
-    STEP(  10, UINT64_C(0x0000000000004be6), 0.050, 0.001158125000000) \
-    STEP(  11, UINT64_C(0x000000000000643c), 0.055, 0.001529510331250) \
-    STEP(  12, UINT64_C(0x000000000000811f), 0.060, 0.001970265600000) \
-    STEP(  13, UINT64_C(0x000000000000a2e2), 0.065, 0.002485452368750) \
-    STEP(  14, UINT64_C(0x000000000000c9d8), 0.070, 0.003079934200000) \
-    STEP(  15, UINT64_C(0x000000000000f64f), 0.075, 0.003758378906250) \
-    STEP(  16, UINT64_C(0x0000000000012891), 0.080, 0.004525260800000) \
-    STEP(  17, UINT64_C(0x00000000000160e7), 0.085, 0.005384862943750) \
-    STEP(  18, UINT64_C(0x0000000000019f95), 0.090, 0.006341279400000) \
-    STEP(  19, UINT64_C(0x000000000001e4dc), 0.095, 0.007398417481250) \
-    STEP(  20, UINT64_C(0x00000000000230fc), 0.100, 0.008560000000000) \
-    STEP(  21, UINT64_C(0x0000000000028430), 0.105, 0.009829567518750) \
-    STEP(  22, UINT64_C(0x000000000002deb0), 0.110, 0.011210480600000) \
-    STEP(  23, UINT64_C(0x00000000000340b1), 0.115, 0.012705922056250) \
-    STEP(  24, UINT64_C(0x000000000003aa67), 0.120, 0.014318899200000) \
-    STEP(  25, UINT64_C(0x0000000000041c00), 0.125, 0.016052246093750) \
-    STEP(  26, UINT64_C(0x00000000000495a8), 0.130, 0.017908625800000) \
-    STEP(  27, UINT64_C(0x000000000005178b), 0.135, 0.019890532631250) \
-    STEP(  28, UINT64_C(0x000000000005a1cf), 0.140, 0.022000294400000) \
-    STEP(  29, UINT64_C(0x0000000000063498), 0.145, 0.024240074668750) \
-    STEP(  30, UINT64_C(0x000000000006d009), 0.150, 0.026611875000000) \
-    STEP(  31, UINT64_C(0x000000000007743f), 0.155, 0.029117537206250) \
-    STEP(  32, UINT64_C(0x0000000000082157), 0.160, 0.031758745600000) \
-    STEP(  33, UINT64_C(0x000000000008d76b), 0.165, 0.034537029243750) \
-    STEP(  34, UINT64_C(0x0000000000099691), 0.170, 0.037453764200000) \
-    STEP(  35, UINT64_C(0x00000000000a5edf), 0.175, 0.040510175781250) \
-    STEP(  36, UINT64_C(0x00000000000b3067), 0.180, 0.043707340800000) \
-    STEP(  37, UINT64_C(0x00000000000c0b38), 0.185, 0.047046189818750) \
-    STEP(  38, UINT64_C(0x00000000000cef5e), 0.190, 0.050527509400000) \
-    STEP(  39, UINT64_C(0x00000000000ddce6), 0.195, 0.054151944356250) \
-    STEP(  40, UINT64_C(0x00000000000ed3d8), 0.200, 0.057920000000000) \
-    STEP(  41, UINT64_C(0x00000000000fd439), 0.205, 0.061832044393750) \
-    STEP(  42, UINT64_C(0x000000000010de0e), 0.210, 0.065888310600000) \
-    STEP(  43, UINT64_C(0x000000000011f158), 0.215, 0.070088898931250) \
-    STEP(  44, UINT64_C(0x0000000000130e17), 0.220, 0.074433779200000) \
-    STEP(  45, UINT64_C(0x0000000000143448), 0.225, 0.078922792968750) \
-    STEP(  46, UINT64_C(0x00000000001563e7), 0.230, 0.083555655800000) \
-    STEP(  47, UINT64_C(0x0000000000169cec), 0.235, 0.088331959506250) \
-    STEP(  48, UINT64_C(0x000000000017df4f), 0.240, 0.093251174400000) \
-    STEP(  49, UINT64_C(0x0000000000192b04), 0.245, 0.098312651543750) \
-    STEP(  50, UINT64_C(0x00000000001a8000), 0.250, 0.103515625000000) \
-    STEP(  51, UINT64_C(0x00000000001bde32), 0.255, 0.108859214081250) \
-    STEP(  52, UINT64_C(0x00000000001d458b), 0.260, 0.114342425600000) \
-    STEP(  53, UINT64_C(0x00000000001eb5f8), 0.265, 0.119964156118750) \
-    STEP(  54, UINT64_C(0x0000000000202f65), 0.270, 0.125723194200000) \
-    STEP(  55, UINT64_C(0x000000000021b1bb), 0.275, 0.131618222656250) \
-    STEP(  56, UINT64_C(0x0000000000233ce3), 0.280, 0.137647820800000) \
-    STEP(  57, UINT64_C(0x000000000024d0c3), 0.285, 0.143810466693750) \
-    STEP(  58, UINT64_C(0x0000000000266d40), 0.290, 0.150104539400000) \
-    STEP(  59, UINT64_C(0x000000000028123d), 0.295, 0.156528321231250) \
-    STEP(  60, UINT64_C(0x000000000029bf9c), 0.300, 0.163080000000000) \
-    STEP(  61, UINT64_C(0x00000000002b753d), 0.305, 0.169757671268750) \
-    STEP(  62, UINT64_C(0x00000000002d32fe), 0.310, 0.176559340600000) \
-    STEP(  63, UINT64_C(0x00000000002ef8bc), 0.315, 0.183482925806250) \
-    STEP(  64, UINT64_C(0x000000000030c654), 0.320, 0.190526259200000) \
-    STEP(  65, UINT64_C(0x0000000000329b9f), 0.325, 0.197687089843750) \
-    STEP(  66, UINT64_C(0x0000000000347875), 0.330, 0.204963085800000) \
-    STEP(  67, UINT64_C(0x0000000000365cb0), 0.335, 0.212351836381250) \
-    STEP(  68, UINT64_C(0x0000000000384825), 0.340, 0.219850854400000) \
-    STEP(  69, UINT64_C(0x00000000003a3aa8), 0.345, 0.227457578418750) \
-    STEP(  70, UINT64_C(0x00000000003c340f), 0.350, 0.235169375000000) \
-    STEP(  71, UINT64_C(0x00000000003e342b), 0.355, 0.242983540956250) \
-    STEP(  72, UINT64_C(0x0000000000403ace), 0.360, 0.250897305600000) \
-    STEP(  73, UINT64_C(0x00000000004247c8), 0.365, 0.258907832993750) \
-    STEP(  74, UINT64_C(0x0000000000445ae9), 0.370, 0.267012224200000) \
-    STEP(  75, UINT64_C(0x0000000000467400), 0.375, 0.275207519531250) \
-    STEP(  76, UINT64_C(0x00000000004892d8), 0.380, 0.283490700800000) \
-    STEP(  77, UINT64_C(0x00000000004ab740), 0.385, 0.291858693568750) \
-    STEP(  78, UINT64_C(0x00000000004ce102), 0.390, 0.300308369400000) \
-    STEP(  79, UINT64_C(0x00000000004f0fe9), 0.395, 0.308836548106250) \
-    STEP(  80, UINT64_C(0x00000000005143bf), 0.400, 0.317440000000000) \
-    STEP(  81, UINT64_C(0x0000000000537c4d), 0.405, 0.326115448143750) \
-    STEP(  82, UINT64_C(0x000000000055b95b), 0.410, 0.334859570600000) \
-    STEP(  83, UINT64_C(0x000000000057fab1), 0.415, 0.343669002681250) \
-    STEP(  84, UINT64_C(0x00000000005a4015), 0.420, 0.352540339200000) \
-    STEP(  85, UINT64_C(0x00000000005c894e), 0.425, 0.361470136718750) \
-    STEP(  86, UINT64_C(0x00000000005ed622), 0.430, 0.370454915800000) \
-    STEP(  87, UINT64_C(0x0000000000612655), 0.435, 0.379491163256250) \
-    STEP(  88, UINT64_C(0x00000000006379ac), 0.440, 0.388575334400000) \
-    STEP(  89, UINT64_C(0x000000000065cfeb), 0.445, 0.397703855293750) \
-    STEP(  90, UINT64_C(0x00000000006828d6), 0.450, 0.406873125000000) \
-    STEP(  91, UINT64_C(0x00000000006a842f), 0.455, 0.416079517831250) \
-    STEP(  92, UINT64_C(0x00000000006ce1bb), 0.460, 0.425319385600000) \
-    STEP(  93, UINT64_C(0x00000000006f413a), 0.465, 0.434589059868750) \
-    STEP(  94, UINT64_C(0x000000000071a270), 0.470, 0.443884854200000) \
-    STEP(  95, UINT64_C(0x000000000074051d), 0.475, 0.453203066406250) \
-    STEP(  96, UINT64_C(0x0000000000766905), 0.480, 0.462539980800000) \
-    STEP(  97, UINT64_C(0x000000000078cde7), 0.485, 0.471891870443750) \
-    STEP(  98, UINT64_C(0x00000000007b3387), 0.490, 0.481254999400000) \
-    STEP(  99, UINT64_C(0x00000000007d99a4), 0.495, 0.490625624981250) \
-    STEP( 100, UINT64_C(0x0000000000800000), 0.500, 0.500000000000000) \
-    STEP( 101, UINT64_C(0x000000000082665b), 0.505, 0.509374375018750) \
-    STEP( 102, UINT64_C(0x000000000084cc78), 0.510, 0.518745000600000) \
-    STEP( 103, UINT64_C(0x0000000000873218), 0.515, 0.528108129556250) \
-    STEP( 104, UINT64_C(0x00000000008996fa), 0.520, 0.537460019200000) \
-    STEP( 105, UINT64_C(0x00000000008bfae2), 0.525, 0.546796933593750) \
-    STEP( 106, UINT64_C(0x00000000008e5d8f), 0.530, 0.556115145800000) \
-    STEP( 107, UINT64_C(0x000000000090bec5), 0.535, 0.565410940131250) \
-    STEP( 108, UINT64_C(0x0000000000931e44), 0.540, 0.574680614400000) \
-    STEP( 109, UINT64_C(0x0000000000957bd0), 0.545, 0.583920482168750) \
-    STEP( 110, UINT64_C(0x000000000097d729), 0.550, 0.593126875000000) \
-    STEP( 111, UINT64_C(0x00000000009a3014), 0.555, 0.602296144706250) \
-    STEP( 112, UINT64_C(0x00000000009c8653), 0.560, 0.611424665600000) \
-    STEP( 113, UINT64_C(0x00000000009ed9aa), 0.565, 0.620508836743750) \
-    STEP( 114, UINT64_C(0x0000000000a129dd), 0.570, 0.629545084200000) \
-    STEP( 115, UINT64_C(0x0000000000a376b1), 0.575, 0.638529863281250) \
-    STEP( 116, UINT64_C(0x0000000000a5bfea), 0.580, 0.647459660800000) \
-    STEP( 117, UINT64_C(0x0000000000a8054e), 0.585, 0.656330997318750) \
-    STEP( 118, UINT64_C(0x0000000000aa46a4), 0.590, 0.665140429400000) \
-    STEP( 119, UINT64_C(0x0000000000ac83b2), 0.595, 0.673884551856250) \
-    STEP( 120, UINT64_C(0x0000000000aebc40), 0.600, 0.682560000000000) \
-    STEP( 121, UINT64_C(0x0000000000b0f016), 0.605, 0.691163451893750) \
-    STEP( 122, UINT64_C(0x0000000000b31efd), 0.610, 0.699691630600000) \
-    STEP( 123, UINT64_C(0x0000000000b548bf), 0.615, 0.708141306431250) \
-    STEP( 124, UINT64_C(0x0000000000b76d27), 0.620, 0.716509299200000) \
-    STEP( 125, UINT64_C(0x0000000000b98c00), 0.625, 0.724792480468750) \
-    STEP( 126, UINT64_C(0x0000000000bba516), 0.630, 0.732987775800000) \
-    STEP( 127, UINT64_C(0x0000000000bdb837), 0.635, 0.741092167006250) \
-    STEP( 128, UINT64_C(0x0000000000bfc531), 0.640, 0.749102694400000) \
-    STEP( 129, UINT64_C(0x0000000000c1cbd4), 0.645, 0.757016459043750) \
-    STEP( 130, UINT64_C(0x0000000000c3cbf0), 0.650, 0.764830625000000) \
-    STEP( 131, UINT64_C(0x0000000000c5c557), 0.655, 0.772542421581250) \
-    STEP( 132, UINT64_C(0x0000000000c7b7da), 0.660, 0.780149145600000) \
-    STEP( 133, UINT64_C(0x0000000000c9a34f), 0.665, 0.787648163618750) \
-    STEP( 134, UINT64_C(0x0000000000cb878a), 0.670, 0.795036914200000) \
-    STEP( 135, UINT64_C(0x0000000000cd6460), 0.675, 0.802312910156250) \
-    STEP( 136, UINT64_C(0x0000000000cf39ab), 0.680, 0.809473740800000) \
-    STEP( 137, UINT64_C(0x0000000000d10743), 0.685, 0.816517074193750) \
-    STEP( 138, UINT64_C(0x0000000000d2cd01), 0.690, 0.823440659400000) \
-    STEP( 139, UINT64_C(0x0000000000d48ac2), 0.695, 0.830242328731250) \
-    STEP( 140, UINT64_C(0x0000000000d64063), 0.700, 0.836920000000000) \
-    STEP( 141, UINT64_C(0x0000000000d7edc2), 0.705, 0.843471678768750) \
-    STEP( 142, UINT64_C(0x0000000000d992bf), 0.710, 0.849895460600000) \
-    STEP( 143, UINT64_C(0x0000000000db2f3c), 0.715, 0.856189533306250) \
-    STEP( 144, UINT64_C(0x0000000000dcc31c), 0.720, 0.862352179200000) \
-    STEP( 145, UINT64_C(0x0000000000de4e44), 0.725, 0.868381777343750) \
-    STEP( 146, UINT64_C(0x0000000000dfd09a), 0.730, 0.874276805800000) \
-    STEP( 147, UINT64_C(0x0000000000e14a07), 0.735, 0.880035843881250) \
-    STEP( 148, UINT64_C(0x0000000000e2ba74), 0.740, 0.885657574400000) \
-    STEP( 149, UINT64_C(0x0000000000e421cd), 0.745, 0.891140785918750) \
-    STEP( 150, UINT64_C(0x0000000000e58000), 0.750, 0.896484375000000) \
-    STEP( 151, UINT64_C(0x0000000000e6d4fb), 0.755, 0.901687348456250) \
-    STEP( 152, UINT64_C(0x0000000000e820b0), 0.760, 0.906748825600000) \
-    STEP( 153, UINT64_C(0x0000000000e96313), 0.765, 0.911668040493750) \
-    STEP( 154, UINT64_C(0x0000000000ea9c18), 0.770, 0.916444344200000) \
-    STEP( 155, UINT64_C(0x0000000000ebcbb7), 0.775, 0.921077207031250) \
-    STEP( 156, UINT64_C(0x0000000000ecf1e8), 0.780, 0.925566220800000) \
-    STEP( 157, UINT64_C(0x0000000000ee0ea7), 0.785, 0.929911101068750) \
-    STEP( 158, UINT64_C(0x0000000000ef21f1), 0.790, 0.934111689400000) \
-    STEP( 159, UINT64_C(0x0000000000f02bc6), 0.795, 0.938167955606250) \
-    STEP( 160, UINT64_C(0x0000000000f12c27), 0.800, 0.942080000000000) \
-    STEP( 161, UINT64_C(0x0000000000f22319), 0.805, 0.945848055643750) \
-    STEP( 162, UINT64_C(0x0000000000f310a1), 0.810, 0.949472490600000) \
-    STEP( 163, UINT64_C(0x0000000000f3f4c7), 0.815, 0.952953810181250) \
-    STEP( 164, UINT64_C(0x0000000000f4cf98), 0.820, 0.956292659200000) \
-    STEP( 165, UINT64_C(0x0000000000f5a120), 0.825, 0.959489824218750) \
-    STEP( 166, UINT64_C(0x0000000000f6696e), 0.830, 0.962546235800000) \
-    STEP( 167, UINT64_C(0x0000000000f72894), 0.835, 0.965462970756250) \
-    STEP( 168, UINT64_C(0x0000000000f7dea8), 0.840, 0.968241254400000) \
-    STEP( 169, UINT64_C(0x0000000000f88bc0), 0.845, 0.970882462793750) \
-    STEP( 170, UINT64_C(0x0000000000f92ff6), 0.850, 0.973388125000000) \
-    STEP( 171, UINT64_C(0x0000000000f9cb67), 0.855, 0.975759925331250) \
-    STEP( 172, UINT64_C(0x0000000000fa5e30), 0.860, 0.977999705600000) \
-    STEP( 173, UINT64_C(0x0000000000fae874), 0.865, 0.980109467368750) \
-    STEP( 174, UINT64_C(0x0000000000fb6a57), 0.870, 0.982091374200000) \
-    STEP( 175, UINT64_C(0x0000000000fbe400), 0.875, 0.983947753906250) \
-    STEP( 176, UINT64_C(0x0000000000fc5598), 0.880, 0.985681100800000) \
-    STEP( 177, UINT64_C(0x0000000000fcbf4e), 0.885, 0.987294077943750) \
-    STEP( 178, UINT64_C(0x0000000000fd214f), 0.890, 0.988789519400000) \
-    STEP( 179, UINT64_C(0x0000000000fd7bcf), 0.895, 0.990170432481250) \
-    STEP( 180, UINT64_C(0x0000000000fdcf03), 0.900, 0.991440000000000) \
-    STEP( 181, UINT64_C(0x0000000000fe1b23), 0.905, 0.992601582518750) \
-    STEP( 182, UINT64_C(0x0000000000fe606a), 0.910, 0.993658720600000) \
-    STEP( 183, UINT64_C(0x0000000000fe9f18), 0.915, 0.994615137056250) \
-    STEP( 184, UINT64_C(0x0000000000fed76e), 0.920, 0.995474739200000) \
-    STEP( 185, UINT64_C(0x0000000000ff09b0), 0.925, 0.996241621093750) \
-    STEP( 186, UINT64_C(0x0000000000ff3627), 0.930, 0.996920065800000) \
-    STEP( 187, UINT64_C(0x0000000000ff5d1d), 0.935, 0.997514547631250) \
-    STEP( 188, UINT64_C(0x0000000000ff7ee0), 0.940, 0.998029734400000) \
-    STEP( 189, UINT64_C(0x0000000000ff9bc3), 0.945, 0.998470489668750) \
-    STEP( 190, UINT64_C(0x0000000000ffb419), 0.950, 0.998841875000000) \
-    STEP( 191, UINT64_C(0x0000000000ffc83d), 0.955, 0.999149152206250) \
-    STEP( 192, UINT64_C(0x0000000000ffd888), 0.960, 0.999397785600000) \
-    STEP( 193, UINT64_C(0x0000000000ffe55b), 0.965, 0.999593444243750) \
-    STEP( 194, UINT64_C(0x0000000000ffef17), 0.970, 0.999742004200000) \
-    STEP( 195, UINT64_C(0x0000000000fff623), 0.975, 0.999849550781250) \
-    STEP( 196, UINT64_C(0x0000000000fffae9), 0.980, 0.999922380800000) \
-    STEP( 197, UINT64_C(0x0000000000fffdd6), 0.985, 0.999967004818750) \
-    STEP( 198, UINT64_C(0x0000000000ffff5a), 0.990, 0.999990149400000) \
-    STEP( 199, UINT64_C(0x0000000000ffffeb), 0.995, 0.999998759356250) \
-    STEP( 200, UINT64_C(0x0000000001000000), 1.000, 1.000000000000000) \
+ /* STEP(step, h,                            x,     y,                 h_sum) */ \
+    STEP(   1, UINT64_C(0x0000000000000014), 0.005, 0.000001240643750, UINT64_C(0x0000000000000014)) \
+    STEP(   2, UINT64_C(0x00000000000000a5), 0.010, 0.000009850600000, UINT64_C(0x00000000000000b9)) \
+    STEP(   3, UINT64_C(0x0000000000000229), 0.015, 0.000032995181250, UINT64_C(0x00000000000002e2)) \
+    STEP(   4, UINT64_C(0x0000000000000516), 0.020, 0.000077619200000, UINT64_C(0x00000000000007f8)) \
+    STEP(   5, UINT64_C(0x00000000000009dc), 0.025, 0.000150449218750, UINT64_C(0x00000000000011d4)) \
+    STEP(   6, UINT64_C(0x00000000000010e8), 0.030, 0.000257995800000, UINT64_C(0x00000000000022bc)) \
+    STEP(   7, UINT64_C(0x0000000000001aa4), 0.035, 0.000406555756250, UINT64_C(0x0000000000003d60)) \
+    STEP(   8, UINT64_C(0x0000000000002777), 0.040, 0.000602214400000, UINT64_C(0x00000000000064d7)) \
+    STEP(   9, UINT64_C(0x00000000000037c2), 0.045, 0.000850847793750, UINT64_C(0x0000000000009c99)) \
+    STEP(  10, UINT64_C(0x0000000000004be6), 0.050, 0.001158125000000, UINT64_C(0x000000000000e87f)) \
+    STEP(  11, UINT64_C(0x000000000000643c), 0.055, 0.001529510331250, UINT64_C(0x0000000000014cbb)) \
+    STEP(  12, UINT64_C(0x000000000000811f), 0.060, 0.001970265600000, UINT64_C(0x000000000001cdda)) \
+    STEP(  13, UINT64_C(0x000000000000a2e2), 0.065, 0.002485452368750, UINT64_C(0x00000000000270bc)) \
+    STEP(  14, UINT64_C(0x000000000000c9d8), 0.070, 0.003079934200000, UINT64_C(0x0000000000033a94)) \
+    STEP(  15, UINT64_C(0x000000000000f64f), 0.075, 0.003758378906250, UINT64_C(0x00000000000430e3)) \
+    STEP(  16, UINT64_C(0x0000000000012891), 0.080, 0.004525260800000, UINT64_C(0x0000000000055974)) \
+    STEP(  17, UINT64_C(0x00000000000160e7), 0.085, 0.005384862943750, UINT64_C(0x000000000006ba5b)) \
+    STEP(  18, UINT64_C(0x0000000000019f95), 0.090, 0.006341279400000, UINT64_C(0x00000000000859f0)) \
+    STEP(  19, UINT64_C(0x000000000001e4dc), 0.095, 0.007398417481250, UINT64_C(0x00000000000a3ecc)) \
+    STEP(  20, UINT64_C(0x00000000000230fc), 0.100, 0.008560000000000, UINT64_C(0x00000000000c6fc8)) \
+    STEP(  21, UINT64_C(0x0000000000028430), 0.105, 0.009829567518750, UINT64_C(0x00000000000ef3f8)) \
+    STEP(  22, UINT64_C(0x000000000002deb0), 0.110, 0.011210480600000, UINT64_C(0x000000000011d2a8)) \
+    STEP(  23, UINT64_C(0x00000000000340b1), 0.115, 0.012705922056250, UINT64_C(0x0000000000151359)) \
+    STEP(  24, UINT64_C(0x000000000003aa67), 0.120, 0.014318899200000, UINT64_C(0x000000000018bdc0)) \
+    STEP(  25, UINT64_C(0x0000000000041c00), 0.125, 0.016052246093750, UINT64_C(0x00000000001cd9c0)) \
+    STEP(  26, UINT64_C(0x00000000000495a8), 0.130, 0.017908625800000, UINT64_C(0x0000000000216f68)) \
+    STEP(  27, UINT64_C(0x000000000005178b), 0.135, 0.019890532631250, UINT64_C(0x00000000002686f3)) \
+    STEP(  28, UINT64_C(0x000000000005a1cf), 0.140, 0.022000294400000, UINT64_C(0x00000000002c28c2)) \
+    STEP(  29, UINT64_C(0x0000000000063498), 0.145, 0.024240074668750, UINT64_C(0x0000000000325d5a)) \
+    STEP(  30, UINT64_C(0x000000000006d009), 0.150, 0.026611875000000, UINT64_C(0x0000000000392d63)) \
+    STEP(  31, UINT64_C(0x000000000007743f), 0.155, 0.029117537206250, UINT64_C(0x000000000040a1a2)) \
+    STEP(  32, UINT64_C(0x0000000000082157), 0.160, 0.031758745600000, UINT64_C(0x000000000048c2f9)) \
+    STEP(  33, UINT64_C(0x000000000008d76b), 0.165, 0.034537029243750, UINT64_C(0x0000000000519a64)) \
+    STEP(  34, UINT64_C(0x0000000000099691), 0.170, 0.037453764200000, UINT64_C(0x00000000005b30f5)) \
+    STEP(  35, UINT64_C(0x00000000000a5edf), 0.175, 0.040510175781250, UINT64_C(0x0000000000658fd4)) \
+    STEP(  36, UINT64_C(0x00000000000b3067), 0.180, 0.043707340800000, UINT64_C(0x000000000070c03b)) \
+    STEP(  37, UINT64_C(0x00000000000c0b38), 0.185, 0.047046189818750, UINT64_C(0x00000000007ccb73)) \
+    STEP(  38, UINT64_C(0x00000000000cef5e), 0.190, 0.050527509400000, UINT64_C(0x000000000089bad1)) \
+    STEP(  39, UINT64_C(0x00000000000ddce6), 0.195, 0.054151944356250, UINT64_C(0x00000000009797b7)) \
+    STEP(  40, UINT64_C(0x00000000000ed3d8), 0.200, 0.057920000000000, UINT64_C(0x0000000000a66b8f)) \
+    STEP(  41, UINT64_C(0x00000000000fd439), 0.205, 0.061832044393750, UINT64_C(0x0000000000b63fc8)) \
+    STEP(  42, UINT64_C(0x000000000010de0e), 0.210, 0.065888310600000, UINT64_C(0x0000000000c71dd6)) \
+    STEP(  43, UINT64_C(0x000000000011f158), 0.215, 0.070088898931250, UINT64_C(0x0000000000d90f2e)) \
+    STEP(  44, UINT64_C(0x0000000000130e17), 0.220, 0.074433779200000, UINT64_C(0x0000000000ec1d45)) \
+    STEP(  45, UINT64_C(0x0000000000143448), 0.225, 0.078922792968750, UINT64_C(0x000000000100518d)) \
+    STEP(  46, UINT64_C(0x00000000001563e7), 0.230, 0.083555655800000, UINT64_C(0x000000000115b574)) \
+    STEP(  47, UINT64_C(0x0000000000169cec), 0.235, 0.088331959506250, UINT64_C(0x00000000012c5260)) \
+    STEP(  48, UINT64_C(0x000000000017df4f), 0.240, 0.093251174400000, UINT64_C(0x00000000014431af)) \
+    STEP(  49, UINT64_C(0x0000000000192b04), 0.245, 0.098312651543750, UINT64_C(0x00000000015d5cb3)) \
+    STEP(  50, UINT64_C(0x00000000001a8000), 0.250, 0.103515625000000, UINT64_C(0x000000000177dcb3)) \
+    STEP(  51, UINT64_C(0x00000000001bde32), 0.255, 0.108859214081250, UINT64_C(0x000000000193bae5)) \
+    STEP(  52, UINT64_C(0x00000000001d458b), 0.260, 0.114342425600000, UINT64_C(0x0000000001b10070)) \
+    STEP(  53, UINT64_C(0x00000000001eb5f8), 0.265, 0.119964156118750, UINT64_C(0x0000000001cfb668)) \
+    STEP(  54, UINT64_C(0x0000000000202f65), 0.270, 0.125723194200000, UINT64_C(0x0000000001efe5cd)) \
+    STEP(  55, UINT64_C(0x000000000021b1bb), 0.275, 0.131618222656250, UINT64_C(0x0000000002119788)) \
+    STEP(  56, UINT64_C(0x0000000000233ce3), 0.280, 0.137647820800000, UINT64_C(0x000000000234d46b)) \
+    STEP(  57, UINT64_C(0x000000000024d0c3), 0.285, 0.143810466693750, UINT64_C(0x000000000259a52e)) \
+    STEP(  58, UINT64_C(0x0000000000266d40), 0.290, 0.150104539400000, UINT64_C(0x000000000280126e)) \
+    STEP(  59, UINT64_C(0x000000000028123d), 0.295, 0.156528321231250, UINT64_C(0x0000000002a824ab)) \
+    STEP(  60, UINT64_C(0x000000000029bf9c), 0.300, 0.163080000000000, UINT64_C(0x0000000002d1e447)) \
+    STEP(  61, UINT64_C(0x00000000002b753d), 0.305, 0.169757671268750, UINT64_C(0x0000000002fd5984)) \
+    STEP(  62, UINT64_C(0x00000000002d32fe), 0.310, 0.176559340600000, UINT64_C(0x00000000032a8c82)) \
+    STEP(  63, UINT64_C(0x00000000002ef8bc), 0.315, 0.183482925806250, UINT64_C(0x000000000359853e)) \
+    STEP(  64, UINT64_C(0x000000000030c654), 0.320, 0.190526259200000, UINT64_C(0x00000000038a4b92)) \
+    STEP(  65, UINT64_C(0x0000000000329b9f), 0.325, 0.197687089843750, UINT64_C(0x0000000003bce731)) \
+    STEP(  66, UINT64_C(0x0000000000347875), 0.330, 0.204963085800000, UINT64_C(0x0000000003f15fa6)) \
+    STEP(  67, UINT64_C(0x0000000000365cb0), 0.335, 0.212351836381250, UINT64_C(0x000000000427bc56)) \
+    STEP(  68, UINT64_C(0x0000000000384825), 0.340, 0.219850854400000, UINT64_C(0x000000000460047b)) \
+    STEP(  69, UINT64_C(0x00000000003a3aa8), 0.345, 0.227457578418750, UINT64_C(0x00000000049a3f23)) \
+    STEP(  70, UINT64_C(0x00000000003c340f), 0.350, 0.235169375000000, UINT64_C(0x0000000004d67332)) \
+    STEP(  71, UINT64_C(0x00000000003e342b), 0.355, 0.242983540956250, UINT64_C(0x000000000514a75d)) \
+    STEP(  72, UINT64_C(0x0000000000403ace), 0.360, 0.250897305600000, UINT64_C(0x000000000554e22b)) \
+    STEP(  73, UINT64_C(0x00000000004247c8), 0.365, 0.258907832993750, UINT64_C(0x00000000059729f3)) \
+    STEP(  74, UINT64_C(0x0000000000445ae9), 0.370, 0.267012224200000, UINT64_C(0x0000000005db84dc)) \
+    STEP(  75, UINT64_C(0x0000000000467400), 0.375, 0.275207519531250, UINT64_C(0x000000000621f8dc)) \
+    STEP(  76, UINT64_C(0x00000000004892d8), 0.380, 0.283490700800000, UINT64_C(0x00000000066a8bb4)) \
+    STEP(  77, UINT64_C(0x00000000004ab740), 0.385, 0.291858693568750, UINT64_C(0x0000000006b542f4)) \
+    STEP(  78, UINT64_C(0x00000000004ce102), 0.390, 0.300308369400000, UINT64_C(0x00000000070223f6)) \
+    STEP(  79, UINT64_C(0x00000000004f0fe9), 0.395, 0.308836548106250, UINT64_C(0x00000000075133df)) \
+    STEP(  80, UINT64_C(0x00000000005143bf), 0.400, 0.317440000000000, UINT64_C(0x0000000007a2779e)) \
+    STEP(  81, UINT64_C(0x0000000000537c4d), 0.405, 0.326115448143750, UINT64_C(0x0000000007f5f3eb)) \
+    STEP(  82, UINT64_C(0x000000000055b95b), 0.410, 0.334859570600000, UINT64_C(0x00000000084bad46)) \
+    STEP(  83, UINT64_C(0x000000000057fab1), 0.415, 0.343669002681250, UINT64_C(0x0000000008a3a7f7)) \
+    STEP(  84, UINT64_C(0x00000000005a4015), 0.420, 0.352540339200000, UINT64_C(0x0000000008fde80c)) \
+    STEP(  85, UINT64_C(0x00000000005c894e), 0.425, 0.361470136718750, UINT64_C(0x00000000095a715a)) \
+    STEP(  86, UINT64_C(0x00000000005ed622), 0.430, 0.370454915800000, UINT64_C(0x0000000009b9477c)) \
+    STEP(  87, UINT64_C(0x0000000000612655), 0.435, 0.379491163256250, UINT64_C(0x000000000a1a6dd1)) \
+    STEP(  88, UINT64_C(0x00000000006379ac), 0.440, 0.388575334400000, UINT64_C(0x000000000a7de77d)) \
+    STEP(  89, UINT64_C(0x000000000065cfeb), 0.445, 0.397703855293750, UINT64_C(0x000000000ae3b768)) \
+    STEP(  90, UINT64_C(0x00000000006828d6), 0.450, 0.406873125000000, UINT64_C(0x000000000b4be03e)) \
+    STEP(  91, UINT64_C(0x00000000006a842f), 0.455, 0.416079517831250, UINT64_C(0x000000000bb6646d)) \
+    STEP(  92, UINT64_C(0x00000000006ce1bb), 0.460, 0.425319385600000, UINT64_C(0x000000000c234628)) \
+    STEP(  93, UINT64_C(0x00000000006f413a), 0.465, 0.434589059868750, UINT64_C(0x000000000c928762)) \
+    STEP(  94, UINT64_C(0x000000000071a270), 0.470, 0.443884854200000, UINT64_C(0x000000000d0429d2)) \
+    STEP(  95, UINT64_C(0x000000000074051d), 0.475, 0.453203066406250, UINT64_C(0x000000000d782eef)) \
+    STEP(  96, UINT64_C(0x0000000000766905), 0.480, 0.462539980800000, UINT64_C(0x000000000dee97f4)) \
+    STEP(  97, UINT64_C(0x000000000078cde7), 0.485, 0.471891870443750, UINT64_C(0x000000000e6765db)) \
+    STEP(  98, UINT64_C(0x00000000007b3387), 0.490, 0.481254999400000, UINT64_C(0x000000000ee29962)) \
+    STEP(  99, UINT64_C(0x00000000007d99a4), 0.495, 0.490625624981250, UINT64_C(0x000000000f603306)) \
+    STEP( 100, UINT64_C(0x0000000000800000), 0.500, 0.500000000000000, UINT64_C(0x000000000fe03306)) \
+    STEP( 101, UINT64_C(0x000000000082665b), 0.505, 0.509374375018750, UINT64_C(0x0000000010629961)) \
+    STEP( 102, UINT64_C(0x000000000084cc78), 0.510, 0.518745000600000, UINT64_C(0x0000000010e765d9)) \
+    STEP( 103, UINT64_C(0x0000000000873218), 0.515, 0.528108129556250, UINT64_C(0x00000000116e97f1)) \
+    STEP( 104, UINT64_C(0x00000000008996fa), 0.520, 0.537460019200000, UINT64_C(0x0000000011f82eeb)) \
+    STEP( 105, UINT64_C(0x00000000008bfae2), 0.525, 0.546796933593750, UINT64_C(0x00000000128429cd)) \
+    STEP( 106, UINT64_C(0x00000000008e5d8f), 0.530, 0.556115145800000, UINT64_C(0x000000001312875c)) \
+    STEP( 107, UINT64_C(0x000000000090bec5), 0.535, 0.565410940131250, UINT64_C(0x0000000013a34621)) \
+    STEP( 108, UINT64_C(0x0000000000931e44), 0.540, 0.574680614400000, UINT64_C(0x0000000014366465)) \
+    STEP( 109, UINT64_C(0x0000000000957bd0), 0.545, 0.583920482168750, UINT64_C(0x0000000014cbe035)) \
+    STEP( 110, UINT64_C(0x000000000097d729), 0.550, 0.593126875000000, UINT64_C(0x000000001563b75e)) \
+    STEP( 111, UINT64_C(0x00000000009a3014), 0.555, 0.602296144706250, UINT64_C(0x0000000015fde772)) \
+    STEP( 112, UINT64_C(0x00000000009c8653), 0.560, 0.611424665600000, UINT64_C(0x00000000169a6dc5)) \
+    STEP( 113, UINT64_C(0x00000000009ed9aa), 0.565, 0.620508836743750, UINT64_C(0x000000001739476f)) \
+    STEP( 114, UINT64_C(0x0000000000a129dd), 0.570, 0.629545084200000, UINT64_C(0x0000000017da714c)) \
+    STEP( 115, UINT64_C(0x0000000000a376b1), 0.575, 0.638529863281250, UINT64_C(0x00000000187de7fd)) \
+    STEP( 116, UINT64_C(0x0000000000a5bfea), 0.580, 0.647459660800000, UINT64_C(0x000000001923a7e7)) \
+    STEP( 117, UINT64_C(0x0000000000a8054e), 0.585, 0.656330997318750, UINT64_C(0x0000000019cbad35)) \
+    STEP( 118, UINT64_C(0x0000000000aa46a4), 0.590, 0.665140429400000, UINT64_C(0x000000001a75f3d9)) \
+    STEP( 119, UINT64_C(0x0000000000ac83b2), 0.595, 0.673884551856250, UINT64_C(0x000000001b22778b)) \
+    STEP( 120, UINT64_C(0x0000000000aebc40), 0.600, 0.682560000000000, UINT64_C(0x000000001bd133cb)) \
+    STEP( 121, UINT64_C(0x0000000000b0f016), 0.605, 0.691163451893750, UINT64_C(0x000000001c8223e1)) \
+    STEP( 122, UINT64_C(0x0000000000b31efd), 0.610, 0.699691630600000, UINT64_C(0x000000001d3542de)) \
+    STEP( 123, UINT64_C(0x0000000000b548bf), 0.615, 0.708141306431250, UINT64_C(0x000000001dea8b9d)) \
+    STEP( 124, UINT64_C(0x0000000000b76d27), 0.620, 0.716509299200000, UINT64_C(0x000000001ea1f8c4)) \
+    STEP( 125, UINT64_C(0x0000000000b98c00), 0.625, 0.724792480468750, UINT64_C(0x000000001f5b84c4)) \
+    STEP( 126, UINT64_C(0x0000000000bba516), 0.630, 0.732987775800000, UINT64_C(0x00000000201729da)) \
+    STEP( 127, UINT64_C(0x0000000000bdb837), 0.635, 0.741092167006250, UINT64_C(0x0000000020d4e211)) \
+    STEP( 128, UINT64_C(0x0000000000bfc531), 0.640, 0.749102694400000, UINT64_C(0x000000002194a742)) \
+    STEP( 129, UINT64_C(0x0000000000c1cbd4), 0.645, 0.757016459043750, UINT64_C(0x0000000022567316)) \
+    STEP( 130, UINT64_C(0x0000000000c3cbf0), 0.650, 0.764830625000000, UINT64_C(0x00000000231a3f06)) \
+    STEP( 131, UINT64_C(0x0000000000c5c557), 0.655, 0.772542421581250, UINT64_C(0x0000000023e0045d)) \
+    STEP( 132, UINT64_C(0x0000000000c7b7da), 0.660, 0.780149145600000, UINT64_C(0x0000000024a7bc37)) \
+    STEP( 133, UINT64_C(0x0000000000c9a34f), 0.665, 0.787648163618750, UINT64_C(0x0000000025715f86)) \
+    STEP( 134, UINT64_C(0x0000000000cb878a), 0.670, 0.795036914200000, UINT64_C(0x00000000263ce710)) \
+    STEP( 135, UINT64_C(0x0000000000cd6460), 0.675, 0.802312910156250, UINT64_C(0x00000000270a4b70)) \
+    STEP( 136, UINT64_C(0x0000000000cf39ab), 0.680, 0.809473740800000, UINT64_C(0x0000000027d9851b)) \
+    STEP( 137, UINT64_C(0x0000000000d10743), 0.685, 0.816517074193750, UINT64_C(0x0000000028aa8c5e)) \
+    STEP( 138, UINT64_C(0x0000000000d2cd01), 0.690, 0.823440659400000, UINT64_C(0x00000000297d595f)) \
+    STEP( 139, UINT64_C(0x0000000000d48ac2), 0.695, 0.830242328731250, UINT64_C(0x000000002a51e421)) \
+    STEP( 140, UINT64_C(0x0000000000d64063), 0.700, 0.836920000000000, UINT64_C(0x000000002b282484)) \
+    STEP( 141, UINT64_C(0x0000000000d7edc2), 0.705, 0.843471678768750, UINT64_C(0x000000002c001246)) \
+    STEP( 142, UINT64_C(0x0000000000d992bf), 0.710, 0.849895460600000, UINT64_C(0x000000002cd9a505)) \
+    STEP( 143, UINT64_C(0x0000000000db2f3c), 0.715, 0.856189533306250, UINT64_C(0x000000002db4d441)) \
+    STEP( 144, UINT64_C(0x0000000000dcc31c), 0.720, 0.862352179200000, UINT64_C(0x000000002e91975d)) \
+    STEP( 145, UINT64_C(0x0000000000de4e44), 0.725, 0.868381777343750, UINT64_C(0x000000002f6fe5a1)) \
+    STEP( 146, UINT64_C(0x0000000000dfd09a), 0.730, 0.874276805800000, UINT64_C(0x00000000304fb63b)) \
+    STEP( 147, UINT64_C(0x0000000000e14a07), 0.735, 0.880035843881250, UINT64_C(0x0000000031310042)) \
+    STEP( 148, UINT64_C(0x0000000000e2ba74), 0.740, 0.885657574400000, UINT64_C(0x000000003213bab6)) \
+    STEP( 149, UINT64_C(0x0000000000e421cd), 0.745, 0.891140785918750, UINT64_C(0x0000000032f7dc83)) \
+    STEP( 150, UINT64_C(0x0000000000e58000), 0.750, 0.896484375000000, UINT64_C(0x0000000033dd5c83)) \
+    STEP( 151, UINT64_C(0x0000000000e6d4fb), 0.755, 0.901687348456250, UINT64_C(0x0000000034c4317e)) \
+    STEP( 152, UINT64_C(0x0000000000e820b0), 0.760, 0.906748825600000, UINT64_C(0x0000000035ac522e)) \
+    STEP( 153, UINT64_C(0x0000000000e96313), 0.765, 0.911668040493750, UINT64_C(0x000000003695b541)) \
+    STEP( 154, UINT64_C(0x0000000000ea9c18), 0.770, 0.916444344200000, UINT64_C(0x0000000037805159)) \
+    STEP( 155, UINT64_C(0x0000000000ebcbb7), 0.775, 0.921077207031250, UINT64_C(0x00000000386c1d10)) \
+    STEP( 156, UINT64_C(0x0000000000ecf1e8), 0.780, 0.925566220800000, UINT64_C(0x0000000039590ef8)) \
+    STEP( 157, UINT64_C(0x0000000000ee0ea7), 0.785, 0.929911101068750, UINT64_C(0x000000003a471d9f)) \
+    STEP( 158, UINT64_C(0x0000000000ef21f1), 0.790, 0.934111689400000, UINT64_C(0x000000003b363f90)) \
+    STEP( 159, UINT64_C(0x0000000000f02bc6), 0.795, 0.938167955606250, UINT64_C(0x000000003c266b56)) \
+    STEP( 160, UINT64_C(0x0000000000f12c27), 0.800, 0.942080000000000, UINT64_C(0x000000003d17977d)) \
+    STEP( 161, UINT64_C(0x0000000000f22319), 0.805, 0.945848055643750, UINT64_C(0x000000003e09ba96)) \
+    STEP( 162, UINT64_C(0x0000000000f310a1), 0.810, 0.949472490600000, UINT64_C(0x000000003efccb37)) \
+    STEP( 163, UINT64_C(0x0000000000f3f4c7), 0.815, 0.952953810181250, UINT64_C(0x000000003ff0bffe)) \
+    STEP( 164, UINT64_C(0x0000000000f4cf98), 0.820, 0.956292659200000, UINT64_C(0x0000000040e58f96)) \
+    STEP( 165, UINT64_C(0x0000000000f5a120), 0.825, 0.959489824218750, UINT64_C(0x0000000041db30b6)) \
+    STEP( 166, UINT64_C(0x0000000000f6696e), 0.830, 0.962546235800000, UINT64_C(0x0000000042d19a24)) \
+    STEP( 167, UINT64_C(0x0000000000f72894), 0.835, 0.965462970756250, UINT64_C(0x0000000043c8c2b8)) \
+    STEP( 168, UINT64_C(0x0000000000f7dea8), 0.840, 0.968241254400000, UINT64_C(0x0000000044c0a160)) \
+    STEP( 169, UINT64_C(0x0000000000f88bc0), 0.845, 0.970882462793750, UINT64_C(0x0000000045b92d20)) \
+    STEP( 170, UINT64_C(0x0000000000f92ff6), 0.850, 0.973388125000000, UINT64_C(0x0000000046b25d16)) \
+    STEP( 171, UINT64_C(0x0000000000f9cb67), 0.855, 0.975759925331250, UINT64_C(0x0000000047ac287d)) \
+    STEP( 172, UINT64_C(0x0000000000fa5e30), 0.860, 0.977999705600000, UINT64_C(0x0000000048a686ad)) \
+    STEP( 173, UINT64_C(0x0000000000fae874), 0.865, 0.980109467368750, UINT64_C(0x0000000049a16f21)) \
+    STEP( 174, UINT64_C(0x0000000000fb6a57), 0.870, 0.982091374200000, UINT64_C(0x000000004a9cd978)) \
+    STEP( 175, UINT64_C(0x0000000000fbe400), 0.875, 0.983947753906250, UINT64_C(0x000000004b98bd78)) \
+    STEP( 176, UINT64_C(0x0000000000fc5598), 0.880, 0.985681100800000, UINT64_C(0x000000004c951310)) \
+    STEP( 177, UINT64_C(0x0000000000fcbf4e), 0.885, 0.987294077943750, UINT64_C(0x000000004d91d25e)) \
+    STEP( 178, UINT64_C(0x0000000000fd214f), 0.890, 0.988789519400000, UINT64_C(0x000000004e8ef3ad)) \
+    STEP( 179, UINT64_C(0x0000000000fd7bcf), 0.895, 0.990170432481250, UINT64_C(0x000000004f8c6f7c)) \
+    STEP( 180, UINT64_C(0x0000000000fdcf03), 0.900, 0.991440000000000, UINT64_C(0x00000000508a3e7f)) \
+    STEP( 181, UINT64_C(0x0000000000fe1b23), 0.905, 0.992601582518750, UINT64_C(0x00000000518859a2)) \
+    STEP( 182, UINT64_C(0x0000000000fe606a), 0.910, 0.993658720600000, UINT64_C(0x000000005286ba0c)) \
+    STEP( 183, UINT64_C(0x0000000000fe9f18), 0.915, 0.994615137056250, UINT64_C(0x0000000053855924)) \
+    STEP( 184, UINT64_C(0x0000000000fed76e), 0.920, 0.995474739200000, UINT64_C(0x0000000054843092)) \
+    STEP( 185, UINT64_C(0x0000000000ff09b0), 0.925, 0.996241621093750, UINT64_C(0x0000000055833a42)) \
+    STEP( 186, UINT64_C(0x0000000000ff3627), 0.930, 0.996920065800000, UINT64_C(0x0000000056827069)) \
+    STEP( 187, UINT64_C(0x0000000000ff5d1d), 0.935, 0.997514547631250, UINT64_C(0x000000005781cd86)) \
+    STEP( 188, UINT64_C(0x0000000000ff7ee0), 0.940, 0.998029734400000, UINT64_C(0x0000000058814c66)) \
+    STEP( 189, UINT64_C(0x0000000000ff9bc3), 0.945, 0.998470489668750, UINT64_C(0x000000005980e829)) \
+    STEP( 190, UINT64_C(0x0000000000ffb419), 0.950, 0.998841875000000, UINT64_C(0x000000005a809c42)) \
+    STEP( 191, UINT64_C(0x0000000000ffc83d), 0.955, 0.999149152206250, UINT64_C(0x000000005b80647f)) \
+    STEP( 192, UINT64_C(0x0000000000ffd888), 0.960, 0.999397785600000, UINT64_C(0x000000005c803d07)) \
+    STEP( 193, UINT64_C(0x0000000000ffe55b), 0.965, 0.999593444243750, UINT64_C(0x000000005d802262)) \
+    STEP( 194, UINT64_C(0x0000000000ffef17), 0.970, 0.999742004200000, UINT64_C(0x000000005e801179)) \
+    STEP( 195, UINT64_C(0x0000000000fff623), 0.975, 0.999849550781250, UINT64_C(0x000000005f80079c)) \
+    STEP( 196, UINT64_C(0x0000000000fffae9), 0.980, 0.999922380800000, UINT64_C(0x0000000060800285)) \
+    STEP( 197, UINT64_C(0x0000000000fffdd6), 0.985, 0.999967004818750, UINT64_C(0x000000006180005b)) \
+    STEP( 198, UINT64_C(0x0000000000ffff5a), 0.990, 0.999990149400000, UINT64_C(0x00000000627fffb5)) \
+    STEP( 199, UINT64_C(0x0000000000ffffeb), 0.995, 0.999998759356250, UINT64_C(0x00000000637fffa0)) \
+    STEP( 200, UINT64_C(0x0000000001000000), 1.000, 1.000000000000000, UINT64_C(0x00000000647fffa0)) \
 
 #endif /* JEMALLOC_INTERNAL_SMOOTHSTEP_H */
diff --git a/include/jemalloc/internal/smoothstep.sh b/include/jemalloc/internal/smoothstep.sh
index 65de97bf..41164615 100755
--- a/include/jemalloc/internal/smoothstep.sh
+++ b/include/jemalloc/internal/smoothstep.sh
@@ -83,14 +83,16 @@ cat <<EOF
 #define SMOOTHSTEP_NSTEPS	${nsteps}
 #define SMOOTHSTEP_BFP		${bfp}
 #define SMOOTHSTEP \\
- /* STEP(step, h,                            x,     y) */ \\
+ /* STEP(step, h,                            x,     y,                 h_sum) */ \\
 EOF
 
 s=1
+h_sum=0
 while [ $s -le $nsteps ] ; do
   $variant ${s}
   x=`echo ${xprec} k ${s} ${nsteps} / p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g'`
-  printf '    STEP(%4d, UINT64_C(0x%016x), %s, %s) \\\n' ${s} ${h} ${x} ${y}
+  h_sum=$((h_sum+h))
+  printf '    STEP(%4d, UINT64_C(0x%016x), %s, %s, UINT64_C(0x%016x)) \\\n' ${s} ${h} ${x} ${y} ${h_sum}
 
   s=$((s+1))
 done
diff --git a/include/jemalloc/internal/witness_types.h b/include/jemalloc/internal/witness_types.h
index f686702e..28ec7c8c 100644
--- a/include/jemalloc/internal/witness_types.h
+++ b/include/jemalloc/internal/witness_types.h
@@ -22,11 +22,15 @@ typedef int witness_comp_t (const witness_t *, void *, const witness_t *,
 #define WITNESS_RANK_TCACHES		2U
 #define WITNESS_RANK_ARENAS		3U
 
-#define WITNESS_RANK_PROF_DUMP		4U
-#define WITNESS_RANK_PROF_BT2GCTX	5U
-#define WITNESS_RANK_PROF_TDATAS	6U
-#define WITNESS_RANK_PROF_TDATA		7U
-#define WITNESS_RANK_PROF_GCTX		8U
+#define WITNESS_RANK_BACKGROUND_THREAD_GLOBAL	4U
+
+#define WITNESS_RANK_PROF_DUMP		5U
+#define WITNESS_RANK_PROF_BT2GCTX	6U
+#define WITNESS_RANK_PROF_TDATAS	7U
+#define WITNESS_RANK_PROF_TDATA		8U
+#define WITNESS_RANK_PROF_GCTX		9U
+
+#define WITNESS_RANK_BACKGROUND_THREAD	10U
 
 /*
  * Used as an argument to witness_assert_depth_to_rank() in order to validate
@@ -34,17 +38,17 @@ typedef int witness_comp_t (const witness_t *, void *, const witness_t *,
  * witness_assert_depth_to_rank() is inclusive rather than exclusive, this
  * definition can have the same value as the minimally ranked core lock.
  */
-#define WITNESS_RANK_CORE		9U
+#define WITNESS_RANK_CORE		11U
 
-#define WITNESS_RANK_DECAY		9U
-#define WITNESS_RANK_TCACHE_QL		10U
-#define WITNESS_RANK_EXTENTS		11U
-#define WITNESS_RANK_EXTENT_FREELIST	12U
+#define WITNESS_RANK_DECAY		11U
+#define WITNESS_RANK_TCACHE_QL		12U
+#define WITNESS_RANK_EXTENTS		13U
+#define WITNESS_RANK_EXTENT_FREELIST	14U
 
-#define WITNESS_RANK_EXTENT_POOL	13U
-#define WITNESS_RANK_RTREE		14U
-#define WITNESS_RANK_BASE		15U
-#define WITNESS_RANK_ARENA_LARGE	16U
+#define WITNESS_RANK_EXTENT_POOL	15U
+#define WITNESS_RANK_RTREE		16U
+#define WITNESS_RANK_BASE		17U
+#define WITNESS_RANK_ARENA_LARGE	18U
 
 #define WITNESS_RANK_LEAF		0xffffffffU
 #define WITNESS_RANK_ARENA_BIN		WITNESS_RANK_LEAF
diff --git a/src/arena.c b/src/arena.c
index 67e1b2f5..0bd82dbc 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -9,14 +9,13 @@
 /******************************************************************************/
 /* Data. */
 
-const char	*percpu_arena_mode_names[] = {
+const char *percpu_arena_mode_names[] = {
 	"disabled",
 	"percpu",
 	"phycpu"
 };
-
-const char	*opt_percpu_arena = OPT_PERCPU_ARENA_DEFAULT;
-percpu_arena_mode_t	percpu_arena_mode = PERCPU_ARENA_MODE_DEFAULT;
+const char *opt_percpu_arena = OPT_PERCPU_ARENA_DEFAULT;
+percpu_arena_mode_t percpu_arena_mode = PERCPU_ARENA_MODE_DEFAULT;
 
 ssize_t opt_dirty_decay_ms = DIRTY_DECAY_MS_DEFAULT;
 ssize_t opt_muzzy_decay_ms = MUZZY_DECAY_MS_DEFAULT;
@@ -24,7 +23,7 @@ ssize_t opt_muzzy_decay_ms = MUZZY_DECAY_MS_DEFAULT;
 static atomic_zd_t dirty_decay_ms_default;
 static atomic_zd_t muzzy_decay_ms_default;
 
-const arena_bin_info_t	arena_bin_info[NBINS] = {
+const arena_bin_info_t arena_bin_info[NBINS] = {
 #define BIN_INFO_bin_yes(reg_size, slab_size, nregs)			\
 	{reg_size, slab_size, nregs, BITMAP_INFO_INITIALIZER(nregs)},
 #define BIN_INFO_bin_no(reg_size, slab_size, nregs)
@@ -39,6 +38,13 @@ const arena_bin_info_t	arena_bin_info[NBINS] = {
 #undef SC
 };
 
+const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
+#define STEP(step, h, x, y, h_sum)		\
+		h,
+		SMOOTHSTEP
+#undef STEP
+};
+
 /******************************************************************************/
 /*
  * Function prototypes for static functions that are referenced prior to
@@ -47,7 +53,8 @@ const arena_bin_info_t	arena_bin_info[NBINS] = {
 
 static void arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena,
     arena_decay_t *decay, extents_t *extents, bool all, size_t npages_limit);
-static bool arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool all);
+static bool arena_decay_dirty(tsdn_t *tsdn, arena_t *arena,
+    bool is_background_thread, bool all);
 static void arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
     arena_bin_t *bin);
 static void arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
@@ -359,7 +366,7 @@ arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
 	extents_dalloc(tsdn, arena, r_extent_hooks, &arena->extents_dirty,
 	    extent);
 	if (arena_dirty_decay_ms_get(arena) == 0) {
-		arena_decay_dirty(tsdn, arena, true);
+		arena_decay_dirty(tsdn, arena, false, true);
 	}
 }
 
@@ -606,12 +613,6 @@ arena_decay_deadline_reached(const arena_decay_t *decay, const nstime_t *time) {
 
 static size_t
 arena_decay_backlog_npages_limit(const arena_decay_t *decay) {
-	static const uint64_t h_steps[] = {
-#define STEP(step, h, x, y) \
-		h,
-		SMOOTHSTEP
-#undef STEP
-	};
 	uint64_t sum;
 	size_t npages_limit_backlog;
 	unsigned i;
@@ -660,17 +661,27 @@ arena_decay_backlog_update(arena_decay_t *decay, extents_t *extents,
 	arena_decay_backlog_update_last(decay, extents);
 }
 
+static void
+arena_decay_try_purge(tsdn_t *tsdn, arena_t *arena,
+    arena_decay_t *decay, extents_t *extents) {
+	size_t npages_limit = arena_decay_backlog_npages_limit(decay);
+
+	if (extents_npages_get(extents) > npages_limit) {
+		arena_decay_to_limit(tsdn, arena, decay, extents, false,
+		    npages_limit);
+	}
+}
+
 static void
 arena_decay_epoch_advance_helper(arena_decay_t *decay, extents_t *extents,
     const nstime_t *time) {
-	uint64_t nadvance_u64;
-	nstime_t delta;
-
 	assert(arena_decay_deadline_reached(decay, time));
 
+	nstime_t delta;
 	nstime_copy(&delta, time);
 	nstime_subtract(&delta, &decay->epoch);
-	nadvance_u64 = nstime_divide(&delta, &decay->interval);
+
+	uint64_t nadvance_u64 = nstime_divide(&delta, &decay->interval);
 	assert(nadvance_u64 > 0);
 
 	/* Add nadvance_u64 decay intervals to epoch. */
@@ -686,14 +697,13 @@ arena_decay_epoch_advance_helper(arena_decay_t *decay, extents_t *extents,
 }
 
 static void
-arena_decay_epoch_advance_purge(tsdn_t *tsdn, arena_t *arena,
-    arena_decay_t *decay, extents_t *extents) {
-	size_t npages_limit = arena_decay_backlog_npages_limit(decay);
-
-	if (extents_npages_get(extents) > npages_limit) {
-		arena_decay_to_limit(tsdn, arena, decay, extents, false,
-		    npages_limit);
+arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
+    extents_t *extents, const nstime_t *time, bool purge) {
+	arena_decay_epoch_advance_helper(decay, extents, time);
+	if (purge) {
+		arena_decay_try_purge(tsdn, arena, decay, extents);
 	}
+
 	/*
 	 * There may be concurrent ndirty fluctuation between the purge above
 	 * and the nunpurged update below, but this is inconsequential to decay
@@ -702,13 +712,6 @@ arena_decay_epoch_advance_purge(tsdn_t *tsdn, arena_t *arena,
 	decay->nunpurged = extents_npages_get(extents);
 }
 
-static void
-arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    extents_t *extents, const nstime_t *time) {
-	arena_decay_epoch_advance_helper(decay, extents, time);
-	arena_decay_epoch_advance_purge(tsdn, arena, decay, extents);
-}
-
 static void
 arena_decay_reinit(arena_decay_t *decay, extents_t *extents, ssize_t decay_ms) {
 	arena_decay_ms_write(decay, decay_ms);
@@ -759,9 +762,9 @@ arena_decay_ms_valid(ssize_t decay_ms) {
 	return false;
 }
 
-static void
+static bool
 arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    extents_t *extents) {
+    extents_t *extents, bool is_background_thread) {
 	malloc_mutex_assert_owner(tsdn, &decay->mtx);
 
 	/* Purge all or nothing if the option is disabled. */
@@ -771,7 +774,7 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 			arena_decay_to_limit(tsdn, arena, decay, extents, false,
 			    0);
 		}
-		return;
+		return false;
 	}
 
 	nstime_t time;
@@ -799,11 +802,20 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	 * If the deadline has been reached, advance to the current epoch and
 	 * purge to the new limit if necessary.  Note that dirty pages created
 	 * during the current epoch are not subject to purge until a future
-	 * epoch, so as a result purging only happens during epoch advances.
+	 * epoch, so as a result purging only happens during epoch advances, or
+	 * being triggered by background threads (scheduled event).
 	 */
-	if (arena_decay_deadline_reached(decay, &time)) {
-		arena_decay_epoch_advance(tsdn, arena, decay, extents, &time);
+	bool advance_epoch = arena_decay_deadline_reached(decay, &time);
+	if (advance_epoch) {
+		bool should_purge = is_background_thread ||
+		    !background_thread_enabled();
+		arena_decay_epoch_advance(tsdn, arena, decay, extents, &time,
+		    should_purge);
+	} else if (is_background_thread) {
+		arena_decay_try_purge(tsdn, arena, decay, extents);
 	}
+
+	return advance_epoch;
 }
 
 static ssize_t
@@ -838,7 +850,7 @@ arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	 * arbitrary change during initial arena configuration.
 	 */
 	arena_decay_reinit(decay, extents, decay_ms);
-	arena_maybe_decay(tsdn, arena, decay, extents);
+	arena_maybe_decay(tsdn, arena, decay, extents, false);
 	malloc_mutex_unlock(tsdn, &decay->mtx);
 
 	return false;
@@ -974,40 +986,57 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 
 static bool
 arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    extents_t *extents, bool all) {
+    extents_t *extents, bool is_background_thread, bool all) {
 	if (all) {
 		malloc_mutex_lock(tsdn, &decay->mtx);
 		arena_decay_to_limit(tsdn, arena, decay, extents, all, 0);
-	} else {
-		if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
-			/* No need to wait if another thread is in progress. */
-			return true;
-		}
-		arena_maybe_decay(tsdn, arena, decay, extents);
+		malloc_mutex_unlock(tsdn, &decay->mtx);
+
+		return false;
+	}
+
+	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
+		/* No need to wait if another thread is in progress. */
+		return true;
+	}
+
+	bool epoch_advanced = arena_maybe_decay(tsdn, arena, decay, extents,
+	    is_background_thread);
+	size_t npages_new;
+	if (epoch_advanced) {
+		/* Backlog is updated on epoch advance. */
+		npages_new = decay->backlog[SMOOTHSTEP_NSTEPS-1];
 	}
 	malloc_mutex_unlock(tsdn, &decay->mtx);
 
+	if (have_background_thread && background_thread_enabled() &&
+	    epoch_advanced && !is_background_thread) {
+		background_thread_interval_check(tsdn, arena, decay, npages_new);
+	}
+
 	return false;
 }
 
 static bool
-arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool all) {
+arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
+    bool all) {
 	return arena_decay_impl(tsdn, arena, &arena->decay_dirty,
-	    &arena->extents_dirty, all);
+	    &arena->extents_dirty, is_background_thread, all);
 }
 
 static bool
-arena_decay_muzzy(tsdn_t *tsdn, arena_t *arena, bool all) {
+arena_decay_muzzy(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
+    bool all) {
 	return arena_decay_impl(tsdn, arena, &arena->decay_muzzy,
-	    &arena->extents_muzzy, all);
+	    &arena->extents_muzzy, is_background_thread, all);
 }
 
 void
-arena_decay(tsdn_t *tsdn, arena_t *arena, bool all) {
-	if (arena_decay_dirty(tsdn, arena, all)) {
+arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) {
+	if (arena_decay_dirty(tsdn, arena, is_background_thread, all)) {
 		return;
 	}
-	arena_decay_muzzy(tsdn, arena, all);
+	arena_decay_muzzy(tsdn, arena, is_background_thread, all);
 }
 
 static void
@@ -1173,6 +1202,7 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
 	 * extents, so only retained extents may remain.
 	 */
 	assert(extents_npages_get(&arena->extents_dirty) == 0);
+	assert(extents_npages_get(&arena->extents_muzzy) == 0);
 
 	/* Deallocate retained memory. */
 	arena_destroy_retained(tsd_tsdn(tsd), arena);
@@ -1971,19 +2001,35 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	}
 
 	arena->base = base;
+	/* Set arena before creating background threads. */
+	arena_set(ind, arena);
 
 	nstime_init(&arena->create_time, 0);
 	nstime_update(&arena->create_time);
 
-	/* We don't support reetrancy for arena 0 bootstrapping. */
-	if (ind != 0 && hooks_arena_new_hook) {
+	/* We don't support reentrancy for arena 0 bootstrapping. */
+	if (ind != 0) {
 		/*
 		 * If we're here, then arena 0 already exists, so bootstrapping
 		 * is done enough that we should have tsd.
 		 */
+		assert(!tsdn_null(tsdn));
 		pre_reentrancy(tsdn_tsd(tsdn));
-		hooks_arena_new_hook();
+		if (hooks_arena_new_hook) {
+			hooks_arena_new_hook();
+		}
 		post_reentrancy(tsdn_tsd(tsdn));
+
+		/* background_thread_create() handles reentrancy internally. */
+		if (have_background_thread) {
+			bool err;
+			malloc_mutex_lock(tsdn, &background_thread_lock);
+			err = background_thread_create(tsdn_tsd(tsdn), ind);
+			malloc_mutex_unlock(tsdn, &background_thread_lock);
+			if (err) {
+				goto label_error;
+			}
+		}
 	}
 
 	return arena;
diff --git a/src/background_thread.c b/src/background_thread.c
new file mode 100644
index 00000000..671e57f7
--- /dev/null
+++ b/src/background_thread.c
@@ -0,0 +1,572 @@
+#define JEMALLOC_BACKGROUND_THREAD_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+
+/******************************************************************************/
+/* Data. */
+
+/* This option should be opt-in only. */
+#define BACKGROUND_THREAD_DEFAULT false
+/* Read-only after initialization. */
+bool opt_background_thread = BACKGROUND_THREAD_DEFAULT;
+
+/* Used for thread creation, termination and stats. */
+malloc_mutex_t background_thread_lock;
+/* Indicates global state.  Atomic because decay reads this w/o locking. */
+atomic_b_t background_thread_enabled_state;
+size_t n_background_threads;
+/* Thread info per-index. */
+background_thread_info_t *background_thread_info;
+
+/******************************************************************************/
+
+#ifndef JEMALLOC_BACKGROUND_THREAD
+#define NOT_REACHED { not_reached(); }
+bool background_thread_create(tsd_t *tsd, unsigned arena_ind) NOT_REACHED
+bool background_threads_init(tsd_t *tsd) NOT_REACHED
+bool background_threads_enable(tsd_t *tsd) NOT_REACHED
+bool background_threads_disable(tsd_t *tsd) NOT_REACHED
+bool background_threads_disable_single(tsd_t *tsd,
+    background_thread_info_t *info) NOT_REACHED
+void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
+    arena_decay_t *decay, size_t npages_new) NOT_REACHED
+void background_thread_prefork0(tsdn_t *tsdn) NOT_REACHED
+void background_thread_prefork1(tsdn_t *tsdn) NOT_REACHED
+void background_thread_postfork_parent(tsdn_t *tsdn) NOT_REACHED
+void background_thread_postfork_child(tsdn_t *tsdn) NOT_REACHED
+#undef NOT_REACHED
+#else
+bool
+background_threads_init(tsd_t *tsd) {
+	assert(have_background_thread);
+	assert(narenas_total_get() > 0);
+
+	background_thread_enabled_set(tsd_tsdn(tsd), opt_background_thread);
+	if (malloc_mutex_init(&background_thread_lock,
+	    "background_thread_global",
+	    WITNESS_RANK_BACKGROUND_THREAD_GLOBAL,
+	    malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+	background_thread_info = (background_thread_info_t *)base_alloc(
+	    tsd_tsdn(tsd), b0get(), ncpus * sizeof(background_thread_info_t),
+	    CACHELINE);
+	if (background_thread_info == NULL) {
+		return true;
+	}
+
+	for (unsigned i = 0; i < ncpus; i++) {
+		background_thread_info_t *info = &background_thread_info[i];
+		if (malloc_mutex_init(&info->mtx, "background_thread",
+		    WITNESS_RANK_BACKGROUND_THREAD,
+		    malloc_mutex_rank_exclusive)) {
+			return true;
+		}
+		if (pthread_cond_init(&info->cond, NULL)) {
+			return true;
+		}
+		info->started = false;
+		nstime_init(&info->next_wakeup, 0);
+		info->npages_to_purge_new = 0;
+	}
+
+	return false;
+}
+
+static inline bool
+set_current_thread_affinity(UNUSED int cpu) {
+#if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
+	cpu_set_t cpuset;
+	CPU_ZERO(&cpuset);
+	CPU_SET(cpu, &cpuset);
+	int ret = sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
+
+	return (ret != 0);
+#else
+	return false;
+#endif
+}
+
+/* Threshold for determining when to wake up the background thread. */
+#define BACKGROUND_THREAD_NPAGES_THRESHOLD UINT64_C(1024)
+#define BILLION UINT64_C(1000000000)
+/* Minimal sleep interval 100 ms. */
+#define BACKGROUND_THREAD_MIN_INTERVAL_NS (BILLION / 10)
+#define BACKGROUND_THREAD_INDEFINITE_SLEEP UINT64_MAX
+
+static inline size_t
+decay_npurge_after_interval(arena_decay_t *decay, size_t interval) {
+	size_t i;
+	uint64_t sum = 0;
+	for (i = 0; i < interval; i++) {
+		sum += decay->backlog[i] * h_steps[i];
+	}
+	for (; i < SMOOTHSTEP_NSTEPS; i++) {
+		sum += decay->backlog[i] * (h_steps[i] - h_steps[i - interval]);
+	}
+
+	return (size_t)(sum >> SMOOTHSTEP_BFP);
+}
+
+static uint64_t
+arena_decay_compute_purge_interval_impl(tsdn_t *tsdn, arena_decay_t *decay,
+    extents_t *extents) {
+	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
+		/* Use minimal interval if decay is contended. */
+		return BACKGROUND_THREAD_MIN_INTERVAL_NS;
+	}
+
+	uint64_t interval;
+	ssize_t decay_time = atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
+	if (decay_time <= 0) {
+		/* Purging is eagerly done or disabled currently. */
+		interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
+		goto label_done;
+	}
+
+	uint64_t decay_interval_ns = nstime_ns(&decay->interval);
+	assert(decay_interval_ns > 0);
+	size_t npages = extents_npages_get(extents);
+	if (npages == 0) {
+		unsigned i;
+		for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
+			if (decay->backlog[i] > 0) {
+				break;
+			}
+		}
+		if (i == SMOOTHSTEP_NSTEPS) {
+			/* No dirty pages recorded.  Sleep indefinitely. */
+			interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
+			goto label_done;
+		}
+	}
+	if (npages <= BACKGROUND_THREAD_NPAGES_THRESHOLD) {
+		/* Use max interval. */
+		interval = decay_interval_ns * SMOOTHSTEP_NSTEPS;
+		goto label_done;
+	}
+
+	size_t lb = BACKGROUND_THREAD_MIN_INTERVAL_NS / decay_interval_ns;
+	size_t ub = SMOOTHSTEP_NSTEPS;
+	/* Minimal 2 intervals to ensure reaching next epoch deadline. */
+	lb = (lb < 2) ? 2 : lb;
+	if ((decay_interval_ns * ub <= BACKGROUND_THREAD_MIN_INTERVAL_NS) ||
+	    (lb + 2 > ub)) {
+		interval = BACKGROUND_THREAD_MIN_INTERVAL_NS;
+		goto label_done;
+	}
+
+	assert(lb + 2 <= ub);
+	size_t npurge_lb, npurge_ub;
+	npurge_lb = decay_npurge_after_interval(decay, lb);
+	if (npurge_lb > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
+		interval = decay_interval_ns * lb;
+		goto label_done;
+	}
+	npurge_ub = decay_npurge_after_interval(decay, ub);
+	if (npurge_ub < BACKGROUND_THREAD_NPAGES_THRESHOLD) {
+		interval = decay_interval_ns * ub;
+		goto label_done;
+	}
+
+	unsigned n_search = 0;
+	size_t target, npurge;
+	while ((npurge_lb + BACKGROUND_THREAD_NPAGES_THRESHOLD < npurge_ub)
+	    && (lb + 2 < ub)) {
+		target = (lb + ub) / 2;
+		npurge = decay_npurge_after_interval(decay, target);
+		if (npurge > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
+			ub = target;
+			npurge_ub = npurge;
+		} else {
+			lb = target;
+			npurge_lb = npurge;
+		}
+		assert(n_search++ < lg_floor(SMOOTHSTEP_NSTEPS) + 1);
+	}
+	interval = decay_interval_ns * (ub + lb) / 2;
+label_done:
+	interval = (interval < BACKGROUND_THREAD_MIN_INTERVAL_NS) ?
+	    BACKGROUND_THREAD_MIN_INTERVAL_NS : interval;
+	malloc_mutex_unlock(tsdn, &decay->mtx);
+
+	return interval;
+}
+
+/* Compute purge interval for background threads. */
+static uint64_t
+arena_decay_compute_purge_interval(tsdn_t *tsdn, arena_t *arena) {
+	uint64_t i1, i2;
+	i1 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_dirty,
+	    &arena->extents_dirty);
+	if (i1 == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
+		return i1;
+	}
+	i2 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_muzzy,
+	    &arena->extents_muzzy);
+
+	return i1 < i2 ? i1 : i2;
+}
+
+static inline uint64_t
+background_work_once(tsdn_t *tsdn, unsigned ind) {
+	arena_t *arena;
+	unsigned i, narenas;
+	uint64_t min_interval;
+
+	min_interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
+	narenas = narenas_total_get();
+	for (i = ind; i < narenas; i += ncpus) {
+		arena = arena_get(tsdn, i, false);
+		if (!arena) {
+			continue;
+		}
+
+		arena_decay(tsdn, arena, true, false);
+		uint64_t interval = arena_decay_compute_purge_interval(tsdn,
+		    arena);
+		if (interval == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
+			return interval;
+		}
+
+		assert(interval > BACKGROUND_THREAD_MIN_INTERVAL_NS);
+		if (min_interval > interval) {
+			min_interval = interval;
+		}
+	}
+
+	return min_interval;
+}
+
+static void
+background_work(tsdn_t *tsdn, unsigned ind) {
+	int ret;
+	background_thread_info_t *info = &background_thread_info[ind];
+
+	malloc_mutex_lock(tsdn, &info->mtx);
+	while (info->started) {
+		uint64_t interval = background_work_once(tsdn, ind);
+		info->npages_to_purge_new = 0;
+
+		if (interval == BACKGROUND_THREAD_INDEFINITE_SLEEP) {
+			nstime_init(&info->next_wakeup,
+			    BACKGROUND_THREAD_INDEFINITE_SLEEP);
+			ret = pthread_cond_wait(&info->cond, &info->mtx.lock);
+			assert(ret == 0);
+			continue;
+		}
+
+		assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS &&
+		    interval <= BACKGROUND_THREAD_INDEFINITE_SLEEP);
+		nstime_init(&info->next_wakeup, 0);
+		nstime_update(&info->next_wakeup);
+		info->next_wakeup.ns += interval;
+
+		nstime_t ts_wakeup;
+		struct timeval tv;
+		gettimeofday(&tv, NULL);
+		nstime_init2(&ts_wakeup, tv.tv_sec,
+		    tv.tv_usec * 1000 + interval);
+		struct timespec ts;
+		ts.tv_sec = (size_t)nstime_sec(&ts_wakeup);
+		ts.tv_nsec = (size_t)nstime_nsec(&ts_wakeup);
+		ret = pthread_cond_timedwait(&info->cond, &info->mtx.lock,
+		    &ts);
+		assert(ret == ETIMEDOUT || ret == 0);
+	}
+	malloc_mutex_unlock(tsdn, &info->mtx);
+}
+
+static void *
+background_thread_entry(void *ind_arg) {
+	unsigned thread_ind = (unsigned)(uintptr_t)ind_arg;
+	assert(thread_ind < narenas_total_get() && thread_ind < ncpus);
+
+	if (opt_percpu_arena != percpu_arena_disabled) {
+		set_current_thread_affinity((int)thread_ind);
+	}
+	/*
+	 * Start periodic background work.  We avoid fetching tsd to keep the
+	 * background thread "outside", since there may be side effects, for
+	 * example triggering new arena creation (which in turn triggers
+	 * background thread creation).
+	 */
+	background_work(TSDN_NULL, thread_ind);
+	assert(pthread_equal(pthread_self(),
+	    background_thread_info[thread_ind].thread));
+
+	return NULL;
+}
+
+/* Create a new background thread if needed. */
+bool
+background_thread_create(tsd_t *tsd, unsigned arena_ind) {
+	assert(have_background_thread);
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
+
+	/* We create at most NCPUs threads. */
+	size_t thread_ind = arena_ind % ncpus;
+	background_thread_info_t *info = &background_thread_info[thread_ind];
+
+	bool need_new_thread;
+	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+	need_new_thread = background_thread_enabled() && !info->started;
+	malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
+	if (!need_new_thread) {
+		return false;
+	}
+
+	pre_reentrancy(tsd);
+	int err;
+	load_pthread_create_fptr();
+	if ((err = pthread_create(&info->thread, NULL,
+	    background_thread_entry, (void *)thread_ind)) != 0) {
+		malloc_printf("<jemalloc>: arena %u background thread creation "
+		    "failed (%d).\n", arena_ind, err);
+	}
+	post_reentrancy(tsd);
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+	assert(info->started == false);
+	if (err == 0) {
+		info->started = true;
+		n_background_threads++;
+	}
+	malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
+
+	return (err != 0);
+}
+
+bool
+background_threads_enable(tsd_t *tsd) {
+	assert(n_background_threads == 0);
+	assert(background_thread_enabled());
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
+
+	VARIABLE_ARRAY(bool, created, ncpus);
+	unsigned i, ncreated;
+	for (i = 0; i < ncpus; i++) {
+		created[i] = false;
+	}
+	ncreated = 0;
+
+	unsigned n = narenas_total_get();
+	for (i = 0; i < n; i++) {
+		if (created[i % ncpus] ||
+		    arena_get(tsd_tsdn(tsd), i, false) == NULL) {
+			continue;
+		}
+		if (background_thread_create(tsd, i)) {
+			return true;
+		}
+		created[i % ncpus] = true;
+		if (++ncreated == ncpus) {
+			break;
+		}
+	}
+
+	return false;
+}
+
+bool
+background_threads_disable_single(tsd_t *tsd, background_thread_info_t *info) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
+	pre_reentrancy(tsd);
+
+	bool has_thread;
+	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+	if (info->started) {
+		has_thread = true;
+		info->started = false;
+		pthread_cond_signal(&info->cond);
+	} else {
+		has_thread = false;
+	}
+	malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
+
+	if (!has_thread) {
+		post_reentrancy(tsd);
+		return false;
+	}
+	void *ret;
+	if (pthread_join(info->thread, &ret)) {
+		post_reentrancy(tsd);
+		return true;
+	}
+	assert(ret == NULL);
+	n_background_threads--;
+	post_reentrancy(tsd);
+
+	return false;
+}
+
+bool
+background_threads_disable(tsd_t *tsd) {
+	assert(!background_thread_enabled());
+	for (unsigned i = 0; i < ncpus; i++) {
+		background_thread_info_t *info = &background_thread_info[i];
+		if (background_threads_disable_single(tsd, info)) {
+			return true;
+		}
+	}
+	assert(n_background_threads == 0);
+
+	return false;
+}
+
+/* Check if we need to signal the background thread early. */
+void
+background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
+    arena_decay_t *decay, size_t npages_new) {
+	background_thread_info_t *info = arena_background_thread_info_get(
+	    arena);
+
+	if (malloc_mutex_trylock(tsdn, &info->mtx)) {
+		/*
+		 * Background thread may hold the mutex for a long period of
+		 * time.  We'd like to avoid the variance on application
+		 * threads.  So keep this non-blocking, and leave the work to a
+		 * future epoch.
+		 */
+		return;
+	}
+
+	if (!info->started) {
+		goto label_done;
+	}
+	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
+		goto label_done;
+	}
+
+	ssize_t decay_time = atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
+	if (decay_time <= 0) {
+		/* Purging is eagerly done or disabled currently. */
+		goto label_done_unlock2;
+	}
+	if (nstime_compare(&info->next_wakeup, &decay->epoch) <= 0) {
+		goto label_done_unlock2;
+	}
+
+	uint64_t decay_interval_ns = nstime_ns(&decay->interval);
+	assert(decay_interval_ns > 0);
+	nstime_t diff;
+	nstime_copy(&diff, &info->next_wakeup);
+	nstime_subtract(&diff, &decay->epoch);
+	if (nstime_ns(&diff) < BACKGROUND_THREAD_MIN_INTERVAL_NS) {
+		goto label_done_unlock2;
+	}
+
+	if (npages_new > 0) {
+		size_t n_epoch = (size_t)(nstime_ns(&diff) / decay_interval_ns);
+		/*
+		 * Compute how many new pages we would need to purge by the next
+		 * wakeup, which is used to determine if we should signal the
+		 * background thread.
+		 */
+		uint64_t npurge_new;
+		if (n_epoch >= SMOOTHSTEP_NSTEPS) {
+			npurge_new = npages_new;
+		} else {
+			uint64_t h_steps_max = h_steps[SMOOTHSTEP_NSTEPS - 1];
+			assert(h_steps_max >=
+			    h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
+			npurge_new = npages_new * (h_steps_max -
+			    h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
+			npurge_new >>= SMOOTHSTEP_BFP;
+		}
+		info->npages_to_purge_new += npurge_new;
+	}
+
+	if (info->npages_to_purge_new > BACKGROUND_THREAD_NPAGES_THRESHOLD ||
+	    (nstime_ns(&info->next_wakeup) ==
+	    BACKGROUND_THREAD_INDEFINITE_SLEEP && info->npages_to_purge_new > 0)) {
+		info->npages_to_purge_new = 0;
+		pthread_cond_signal(&info->cond);
+	}
+label_done_unlock2:
+	malloc_mutex_unlock(tsdn, &decay->mtx);
+label_done:
+	malloc_mutex_unlock(tsdn, &info->mtx);
+}
+
+void
+background_thread_prefork0(tsdn_t *tsdn) {
+	malloc_mutex_prefork(tsdn, &background_thread_lock);
+	if (background_thread_enabled()) {
+		background_thread_enabled_set(tsdn, false);
+		background_threads_disable(tsdn_tsd(tsdn));
+		/* Enable again to re-create threads after fork. */
+		background_thread_enabled_set(tsdn, true);
+	}
+	assert(n_background_threads == 0);
+}
+
+void
+background_thread_prefork1(tsdn_t *tsdn) {
+	for (unsigned i = 0; i < ncpus; i++) {
+		malloc_mutex_prefork(tsdn, &background_thread_info[i].mtx);
+	}
+}
+
+static void
+background_thread_postfork_init(tsdn_t *tsdn) {
+	if (background_thread_enabled()) {
+		background_threads_enable(tsdn_tsd(tsdn));
+	}
+}
+
+void
+background_thread_postfork_parent(tsdn_t *tsdn) {
+	for (unsigned i = 0; i < ncpus; i++) {
+		malloc_mutex_postfork_parent(tsdn,
+		    &background_thread_info[i].mtx);
+	}
+	background_thread_postfork_init(tsdn);
+	malloc_mutex_postfork_parent(tsdn, &background_thread_lock);
+}
+
+void
+background_thread_postfork_child(tsdn_t *tsdn) {
+	for (unsigned i = 0; i < ncpus; i++) {
+		malloc_mutex_postfork_child(tsdn,
+		    &background_thread_info[i].mtx);
+	}
+	malloc_mutex_postfork_child(tsdn, &background_thread_lock);
+
+	malloc_mutex_lock(tsdn, &background_thread_lock);
+	background_thread_postfork_init(tsdn);
+	malloc_mutex_unlock(tsdn, &background_thread_lock);
+}
+
+#undef BACKGROUND_THREAD_NPAGES_THRESHOLD
+#undef BILLION
+#undef BACKGROUND_THREAD_MIN_INTERVAL_NS
+#undef BACKGROUND_THREAD_INDEFINITE_SLEEP
+
+#endif /* defined(JEMALLOC_BACKGROUND_THREAD) */
+
+#if defined(JEMALLOC_BACKGROUND_THREAD) || defined(JEMALLOC_LAZY_LOCK)
+#include <dlfcn.h>
+
+int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
+    void *(*)(void *), void *__restrict);
+
+void *
+load_pthread_create_fptr(void) {
+	if (pthread_create_fptr) {
+		return pthread_create_fptr;
+	}
+
+	pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
+	if (pthread_create_fptr == NULL) {
+		malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
+		    "\"pthread_create\")\n");
+		abort();
+	}
+
+	return pthread_create_fptr;
+}
+
+#endif
diff --git a/src/ctl.c b/src/ctl.c
index 7f69f151..ee0979a8 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -53,6 +53,7 @@ static const ctl_named_node_t	*n##_index(tsdn_t *tsdn,		\
 
 CTL_PROTO(version)
 CTL_PROTO(epoch)
+CTL_PROTO(background_thread)
 CTL_PROTO(thread_tcache_enabled)
 CTL_PROTO(thread_tcache_flush)
 CTL_PROTO(thread_prof_name)
@@ -78,6 +79,7 @@ CTL_PROTO(opt_retain)
 CTL_PROTO(opt_dss)
 CTL_PROTO(opt_narenas)
 CTL_PROTO(opt_percpu_arena)
+CTL_PROTO(opt_background_thread)
 CTL_PROTO(opt_dirty_decay_ms)
 CTL_PROTO(opt_muzzy_decay_ms)
 CTL_PROTO(opt_stats_print)
@@ -265,6 +267,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("dss"),		CTL(opt_dss)},
 	{NAME("narenas"),	CTL(opt_narenas)},
 	{NAME("percpu_arena"),	CTL(opt_percpu_arena)},
+	{NAME("background_thread"),	CTL(opt_background_thread)},
 	{NAME("dirty_decay_ms"), CTL(opt_dirty_decay_ms)},
 	{NAME("muzzy_decay_ms"), CTL(opt_muzzy_decay_ms)},
 	{NAME("stats_print"),	CTL(opt_stats_print)},
@@ -501,6 +504,7 @@ static const ctl_named_node_t stats_node[] = {
 static const ctl_named_node_t	root_node[] = {
 	{NAME("version"),	CTL(version)},
 	{NAME("epoch"),		CTL(epoch)},
+	{NAME("background_thread"),	CTL(background_thread)},
 	{NAME("thread"),	CHILD(named, thread)},
 	{NAME("config"),	CHILD(named, config)},
 	{NAME("opt"),		CHILD(named, opt)},
@@ -1445,6 +1449,53 @@ label_return:
 	return ret;
 }
 
+static int
+background_thread_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+	bool oldval;
+
+	if (!have_background_thread) {
+		return ENOENT;
+	}
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock);
+	if (newp == NULL) {
+		oldval = background_thread_enabled();
+		READ(oldval, bool);
+	} else {
+		if (newlen != sizeof(bool)) {
+			ret = EINVAL;
+			goto label_return;
+		}
+		oldval = background_thread_enabled();
+		READ(oldval, bool);
+
+		bool newval = *(bool *)newp;
+		if (newval == oldval) {
+			ret = 0;
+			goto label_return;
+		}
+
+		background_thread_enabled_set(tsd_tsdn(tsd), newval);
+		if (newval) {
+			if (background_threads_enable(tsd)) {
+				ret = EFAULT;
+				goto label_return;
+			}
+		} else {
+			if (background_threads_disable(tsd)) {
+				ret = EFAULT;
+				goto label_return;
+			}
+		}
+	}
+	ret = 0;
+label_return:
+	malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
+	return ret;
+}
+
 /******************************************************************************/
 
 CTL_RO_CONFIG_GEN(config_cache_oblivious, bool)
@@ -1466,6 +1517,7 @@ CTL_RO_NL_GEN(opt_retain, opt_retain, bool)
 CTL_RO_NL_GEN(opt_dss, opt_dss, const char *)
 CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned)
 CTL_RO_NL_GEN(opt_percpu_arena, opt_percpu_arena, const char *)
+CTL_RO_NL_GEN(opt_background_thread, opt_background_thread, bool)
 CTL_RO_NL_GEN(opt_dirty_decay_ms, opt_dirty_decay_ms, ssize_t)
 CTL_RO_NL_GEN(opt_muzzy_decay_ms, opt_muzzy_decay_ms, ssize_t)
 CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool)
@@ -1764,7 +1816,8 @@ arena_i_decay(tsdn_t *tsdn, unsigned arena_ind, bool all) {
 
 			for (i = 0; i < narenas; i++) {
 				if (tarenas[i] != NULL) {
-					arena_decay(tsdn, tarenas[i], all);
+					arena_decay(tsdn, tarenas[i], false,
+					    all);
 				}
 			}
 		} else {
@@ -1778,7 +1831,7 @@ arena_i_decay(tsdn_t *tsdn, unsigned arena_ind, bool all) {
 			malloc_mutex_unlock(tsdn, &ctl_mtx);
 
 			if (tarena != NULL) {
-				arena_decay(tsdn, tarena, all);
+				arena_decay(tsdn, tarena, false, all);
 			}
 		}
 	}
@@ -1837,6 +1890,35 @@ label_return:
 	return ret;
 }
 
+static void
+arena_reset_prepare_background_thread(tsd_t *tsd, unsigned arena_ind) {
+	/* Temporarily disable the background thread during arena reset. */
+	if (have_background_thread) {
+		malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock);
+		if (background_thread_enabled()) {
+			unsigned ind = arena_ind % ncpus;
+			background_thread_info_t *info =
+			    &background_thread_info[ind];
+			assert(info->started);
+			background_threads_disable_single(tsd, info);
+		}
+	}
+}
+
+static void
+arena_reset_finish_background_thread(tsd_t *tsd, unsigned arena_ind) {
+	if (have_background_thread) {
+		if (background_thread_enabled()) {
+			unsigned ind = arena_ind % ncpus;
+			background_thread_info_t *info =
+			    &background_thread_info[ind];
+			assert(!info->started);
+			background_thread_create(tsd, ind);
+		}
+		malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
+	}
+}
+
 static int
 arena_i_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen) {
@@ -1850,7 +1932,9 @@ arena_i_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 		return ret;
 	}
 
+	arena_reset_prepare_background_thread(tsd, arena_ind);
 	arena_reset(tsd, arena);
+	arena_reset_finish_background_thread(tsd, arena_ind);
 
 	return ret;
 }
@@ -1875,9 +1959,10 @@ arena_i_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 		goto label_return;
 	}
 
+	arena_reset_prepare_background_thread(tsd, arena_ind);
 	/* Merge stats after resetting and purging arena. */
 	arena_reset(tsd, arena);
-	arena_decay(tsd_tsdn(tsd), arena, true);
+	arena_decay(tsd_tsdn(tsd), arena, false, true);
 	ctl_darena = arenas_i(MALLCTL_ARENAS_DESTROYED);
 	ctl_darena->initialized = true;
 	ctl_arena_refresh(tsd_tsdn(tsd), arena, ctl_darena, arena_ind, true);
@@ -1888,6 +1973,7 @@ arena_i_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	/* Record arena index for later recycling via arenas.create. */
 	ql_elm_new(ctl_arena, destroyed_link);
 	ql_tail_insert(&ctl_arenas->destroyed, ctl_arena, destroyed_link);
+	arena_reset_finish_background_thread(tsd, arena_ind);
 
 	assert(ret == 0);
 label_return:
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 56aef5b0..b03e5f48 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -420,7 +420,7 @@ arena_init_locked(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 
 	/* Actually initialize the arena. */
 	arena = arena_new(tsdn, ind, extent_hooks);
-	arena_set(ind, arena);
+
 	return arena;
 }
 
@@ -1140,6 +1140,8 @@ malloc_conf_init(void) {
 				}
 				continue;
 			}
+			CONF_HANDLE_BOOL(opt_background_thread,
+			    "background_thread");
 			if (config_prof) {
 				CONF_HANDLE_BOOL(opt_prof, "prof")
 				CONF_HANDLE_CHAR_P(opt_prof_prefix,
@@ -1380,6 +1382,22 @@ malloc_init_narenas(void) {
 	return false;
 }
 
+static bool
+malloc_init_background_threads(tsd_t *tsd) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &init_lock);
+	if (!have_background_thread) {
+		if (opt_background_thread) {
+			malloc_printf("<jemalloc>: option background_thread "
+			    "currently supports pthread only. \n");
+			return true;
+		} else {
+			return false;
+		}
+	}
+
+	return background_threads_init(tsd);
+}
+
 static bool
 malloc_init_hard_finish(void) {
 	if (malloc_mutex_boot())
@@ -1421,8 +1439,8 @@ malloc_init_hard(void) {
 	}
 	malloc_mutex_lock(tsd_tsdn(tsd), &init_lock);
 
-	/* Need this before prof_boot2 (for allocation). */
-	if (malloc_init_narenas()) {
+	/* Initialize narenas before prof_boot2 (for allocation). */
+	if (malloc_init_narenas() || malloc_init_background_threads(tsd)) {
 		malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
 		return true;
 	}
@@ -1439,6 +1457,23 @@ malloc_init_hard(void) {
 
 	malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
 	malloc_tsd_boot1();
+
+	/* Update TSD after tsd_boot1. */
+	tsd = tsd_fetch();
+	if (opt_background_thread) {
+		assert(have_background_thread);
+		/*
+		 * Need to finish init & unlock first before creating background
+		 * threads (pthread_create depends on malloc).
+		 */
+		malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock);
+		bool err = background_thread_create(tsd, 0);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
+		if (err) {
+			return true;
+		}
+	}
+
 	return false;
 }
 
@@ -2970,7 +3005,13 @@ _malloc_prefork(void)
 	ctl_prefork(tsd_tsdn(tsd));
 	tcache_prefork(tsd_tsdn(tsd));
 	malloc_mutex_prefork(tsd_tsdn(tsd), &arenas_lock);
+	if (have_background_thread) {
+		background_thread_prefork0(tsd_tsdn(tsd));
+	}
 	prof_prefork0(tsd_tsdn(tsd));
+	if (have_background_thread) {
+		background_thread_prefork1(tsd_tsdn(tsd));
+	}
 	/* Break arena prefork into stages to preserve lock order. */
 	for (i = 0; i < 7; i++) {
 		for (j = 0; j < narenas; j++) {
@@ -3036,6 +3077,9 @@ _malloc_postfork(void)
 		}
 	}
 	prof_postfork_parent(tsd_tsdn(tsd));
+	if (have_background_thread) {
+		background_thread_postfork_parent(tsd_tsdn(tsd));
+	}
 	malloc_mutex_postfork_parent(tsd_tsdn(tsd), &arenas_lock);
 	tcache_postfork_parent(tsd_tsdn(tsd));
 	ctl_postfork_parent(tsd_tsdn(tsd));
@@ -3060,6 +3104,9 @@ jemalloc_postfork_child(void) {
 		}
 	}
 	prof_postfork_child(tsd_tsdn(tsd));
+	if (have_background_thread) {
+		background_thread_postfork_child(tsd_tsdn(tsd));
+	}
 	malloc_mutex_postfork_child(tsd_tsdn(tsd), &arenas_lock);
 	tcache_postfork_child(tsd_tsdn(tsd));
 	ctl_postfork_child(tsd_tsdn(tsd));
diff --git a/src/mutex.c b/src/mutex.c
index b15bbf6e..c92ddd72 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -5,10 +5,6 @@
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/malloc_io.h"
 
-#if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32)
-#include <dlfcn.h>
-#endif
-
 #ifndef _CRT_SPINCOUNT
 #define _CRT_SPINCOUNT 4000
 #endif
@@ -24,10 +20,6 @@ static bool		postpone_init = true;
 static malloc_mutex_t	*postponed_mutexes = NULL;
 #endif
 
-#if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32)
-static void	pthread_create_once(void);
-#endif
-
 /******************************************************************************/
 /*
  * We intercept pthread_create() calls in order to toggle isthreaded if the
@@ -35,18 +27,9 @@ static void	pthread_create_once(void);
  */
 
 #if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32)
-static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
-    void *(*)(void *), void *__restrict);
-
 static void
 pthread_create_once(void) {
-	pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
-	if (pthread_create_fptr == NULL) {
-		malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
-		    "\"pthread_create\")\n");
-		abort();
-	}
-
+	pthread_create_fptr = load_pthread_create_fptr();
 	isthreaded = true;
 }
 
diff --git a/src/stats.c b/src/stats.c
index 3c9eb35a..f98b8ece 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -816,6 +816,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	OPT_WRITE_CHAR_P(dss, ",")
 	OPT_WRITE_UNSIGNED(narenas, ",")
 	OPT_WRITE_CHAR_P(percpu_arena, ",")
+	OPT_WRITE_BOOL_MUTABLE(background_thread, background_thread, ",")
 	OPT_WRITE_SSIZE_T_MUTABLE(dirty_decay_ms, arenas.dirty_decay_ms, ",")
 	OPT_WRITE_SSIZE_T_MUTABLE(muzzy_decay_ms, arenas.muzzy_decay_ms, ",")
 	OPT_WRITE_CHAR_P(junk, ",")
diff --git a/test/integration/extent.c b/test/integration/extent.c
index 32432af9..7262b803 100644
--- a/test/integration/extent.c
+++ b/test/integration/extent.c
@@ -2,6 +2,18 @@
 
 #include "test/extent_hooks.h"
 
+static bool
+check_background_thread_enabled(void) {
+	bool enabled;
+	size_t sz = sizeof(bool);
+	int ret = mallctl("background_thread", (void *)&enabled, &sz, NULL,0);
+	if (ret == ENOENT) {
+		return false;
+	}
+	assert_d_eq(ret, 0, "Unexpected mallctl error");
+	return enabled;
+}
+
 static void
 test_extent_body(unsigned arena_ind) {
 	void *p;
@@ -124,6 +136,7 @@ TEST_BEGIN(test_extent_manual_hook) {
 	assert_ptr_ne(old_hooks->merge, extent_merge_hook,
 	    "Unexpected extent_hooks error");
 
+	test_skip_if(check_background_thread_enabled());
 	test_extent_body(arena_ind);
 
 	/* Restore extent hooks. */
@@ -164,6 +177,7 @@ TEST_BEGIN(test_extent_auto_hook) {
 	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
 	    (void *)&new_hooks, new_size), 0, "Unexpected mallctl() failure");
 
+	test_skip_if(check_background_thread_enabled());
 	test_extent_body(arena_ind);
 }
 TEST_END
diff --git a/test/unit/decay.c b/test/unit/decay.c
index 19f76fa5..f727bf93 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -9,6 +9,18 @@ static unsigned nupdates_mock;
 static nstime_t time_mock;
 static bool monotonic_mock;
 
+static bool
+check_background_thread_enabled(void) {
+	bool enabled;
+	size_t sz = sizeof(bool);
+	int ret = mallctl("background_thread", (void *)&enabled, &sz, NULL,0);
+	if (ret == ENOENT) {
+		return false;
+	}
+	assert_d_eq(ret, 0, "Unexpected mallctl error");
+	return enabled;
+}
+
 static bool
 nstime_monotonic_mock(void) {
 	return monotonic_mock;
@@ -167,6 +179,8 @@ generate_dirty(unsigned arena_ind, size_t size) {
 }
 
 TEST_BEGIN(test_decay_ticks) {
+	test_skip_if(check_background_thread_enabled());
+
 	ticker_t *decay_ticker;
 	unsigned tick0, tick1, arena_ind;
 	size_t sz, large0;
@@ -405,6 +419,7 @@ decay_ticker_helper(unsigned arena_ind, int flags, bool dirty, ssize_t dt,
 }
 
 TEST_BEGIN(test_decay_ticker) {
+	test_skip_if(check_background_thread_enabled());
 #define NPS 2048
 	ssize_t ddt = opt_dirty_decay_ms;
 	ssize_t mdt = opt_muzzy_decay_ms;
@@ -466,6 +481,7 @@ TEST_BEGIN(test_decay_ticker) {
 TEST_END
 
 TEST_BEGIN(test_decay_nonmonotonic) {
+	test_skip_if(check_background_thread_enabled());
 #define NPS (SMOOTHSTEP_NSTEPS + 1)
 	int flags = (MALLOCX_ARENA(0) | MALLOCX_TCACHE_NONE);
 	void *ps[NPS];
@@ -523,6 +539,8 @@ TEST_BEGIN(test_decay_nonmonotonic) {
 TEST_END
 
 TEST_BEGIN(test_decay_now) {
+	test_skip_if(check_background_thread_enabled());
+
 	unsigned arena_ind = do_arena_create(0, 0);
 	assert_zu_eq(get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages");
 	assert_zu_eq(get_arena_pmuzzy(arena_ind), 0, "Unexpected muzzy pages");
@@ -541,6 +559,8 @@ TEST_BEGIN(test_decay_now) {
 TEST_END
 
 TEST_BEGIN(test_decay_never) {
+	test_skip_if(check_background_thread_enabled());
+
 	unsigned arena_ind = do_arena_create(-1, -1);
 	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
 	assert_zu_eq(get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages");
diff --git a/test/unit/smoothstep.c b/test/unit/smoothstep.c
index 6e3eb0f9..549aed12 100644
--- a/test/unit/smoothstep.c
+++ b/test/unit/smoothstep.c
@@ -1,7 +1,7 @@
 #include "test/jemalloc_test.h"
 
 static const uint64_t smoothstep_tab[] = {
-#define STEP(step, h, x, y) \
+#define STEP(step, h, x, y, h_sum)		\
 	h,
 	SMOOTHSTEP
 #undef STEP
diff --git a/test/unit/stats.c b/test/unit/stats.c
index f5ee1287..d9849d80 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -115,8 +115,10 @@ TEST_BEGIN(test_stats_arenas_summary) {
 	    "Unexepected mallctl() result");
 
 	if (config_stats) {
-		assert_u64_gt(dirty_npurge + muzzy_npurge, 0,
-		    "At least one purge should have occurred");
+		if (!background_thread_enabled()) {
+			assert_u64_gt(dirty_npurge + muzzy_npurge, 0,
+			    "At least one purge should have occurred");
+		}
 		assert_u64_le(dirty_nmadvise, dirty_purged,
 		    "dirty_nmadvise should be no greater than dirty_purged");
 		assert_u64_le(muzzy_nmadvise, muzzy_purged,

From 2bee0c6251856f48ed6882df2f02a060c0a14829 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 12 May 2017 12:30:33 -0700
Subject: [PATCH 0872/2608] Add background thread related stats.

---
 .../internal/background_thread_externs.h      |  2 +
 .../internal/background_thread_structs.h      | 11 +++
 include/jemalloc/internal/ctl.h               |  1 +
 include/jemalloc/internal/nstime.h            |  2 +
 src/background_thread.c                       | 99 +++++++++++++++----
 src/ctl.c                                     | 30 ++++++
 src/nstime.c                                  | 14 +++
 src/stats.c                                   | 40 +++++++-
 test/unit/nstime.c                            | 36 +++++++
 9 files changed, 214 insertions(+), 21 deletions(-)

diff --git a/include/jemalloc/internal/background_thread_externs.h b/include/jemalloc/internal/background_thread_externs.h
index 993f0e3b..fe25acfe 100644
--- a/include/jemalloc/internal/background_thread_externs.h
+++ b/include/jemalloc/internal/background_thread_externs.h
@@ -19,6 +19,8 @@ void background_thread_prefork0(tsdn_t *tsdn);
 void background_thread_prefork1(tsdn_t *tsdn);
 void background_thread_postfork_parent(tsdn_t *tsdn);
 void background_thread_postfork_child(tsdn_t *tsdn);
+bool background_thread_stats_read(tsdn_t *tsdn,
+    background_thread_stats_t *stats);
 
 #if defined(JEMALLOC_BACKGROUND_THREAD) || defined(JEMALLOC_LAZY_LOCK)
 extern int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
diff --git a/include/jemalloc/internal/background_thread_structs.h b/include/jemalloc/internal/background_thread_structs.h
index a43d600d..c4eb8273 100644
--- a/include/jemalloc/internal/background_thread_structs.h
+++ b/include/jemalloc/internal/background_thread_structs.h
@@ -18,8 +18,19 @@ struct background_thread_info_s {
 	 *  background thread to wake up earlier.
 	 */
 	size_t			npages_to_purge_new;
+	/* Stats: total number of runs since started. */
+	uint64_t		tot_n_runs;
+	/* Stats: total sleep time since started. */
+	nstime_t		tot_sleep_time;
 #endif /* ifdef JEMALLOC_BACKGROUND_THREAD */
 };
 typedef struct background_thread_info_s background_thread_info_t;
 
+struct background_thread_stats_s {
+	size_t num_threads;
+	uint64_t num_runs;
+	nstime_t run_interval;
+};
+typedef struct background_thread_stats_s background_thread_stats_t;
+
 #endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_STRUCTS_H */
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index 60b3979f..f159383d 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -52,6 +52,7 @@ typedef struct ctl_stats_s {
 	size_t mapped;
 	size_t retained;
 
+	background_thread_stats_t background_thread;
 	mutex_prof_data_t mutex_prof_data[mutex_prof_num_global_mutexes];
 } ctl_stats_t;
 
diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h
index ad7efb89..17c177c7 100644
--- a/include/jemalloc/internal/nstime.h
+++ b/include/jemalloc/internal/nstime.h
@@ -18,7 +18,9 @@ uint64_t nstime_nsec(const nstime_t *time);
 void nstime_copy(nstime_t *time, const nstime_t *source);
 int nstime_compare(const nstime_t *a, const nstime_t *b);
 void nstime_add(nstime_t *time, const nstime_t *addend);
+void nstime_iadd(nstime_t *time, uint64_t addend);
 void nstime_subtract(nstime_t *time, const nstime_t *subtrahend);
+void nstime_isubtract(nstime_t *time, uint64_t subtrahend);
 void nstime_imultiply(nstime_t *time, uint64_t multiplier);
 void nstime_idivide(nstime_t *time, uint64_t divisor);
 uint64_t nstime_divide(const nstime_t *time, const nstime_t *divisor);
diff --git a/src/background_thread.c b/src/background_thread.c
index 671e57f7..076d9815 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -36,8 +36,21 @@ void background_thread_prefork0(tsdn_t *tsdn) NOT_REACHED
 void background_thread_prefork1(tsdn_t *tsdn) NOT_REACHED
 void background_thread_postfork_parent(tsdn_t *tsdn) NOT_REACHED
 void background_thread_postfork_child(tsdn_t *tsdn) NOT_REACHED
+bool background_thread_stats_read(tsdn_t *tsdn,
+    background_thread_stats_t *stats) NOT_REACHED
 #undef NOT_REACHED
 #else
+
+static void
+background_thread_info_reinit(background_thread_info_t *info) {
+	nstime_init(&info->next_wakeup, 0);
+	info->npages_to_purge_new = 0;
+	if (config_stats) {
+		info->tot_n_runs = 0;
+		nstime_init(&info->tot_sleep_time, 0);
+	}
+}
+
 bool
 background_threads_init(tsd_t *tsd) {
 	assert(have_background_thread);
@@ -68,8 +81,7 @@ background_threads_init(tsd_t *tsd) {
 			return true;
 		}
 		info->started = false;
-		nstime_init(&info->next_wakeup, 0);
-		info->npages_to_purge_new = 0;
+		background_thread_info_reinit(info);
 	}
 
 	return false;
@@ -248,33 +260,49 @@ background_work(tsdn_t *tsdn, unsigned ind) {
 	malloc_mutex_lock(tsdn, &info->mtx);
 	while (info->started) {
 		uint64_t interval = background_work_once(tsdn, ind);
+		if (config_stats) {
+			info->tot_n_runs++;
+		}
 		info->npages_to_purge_new = 0;
 
+		struct timeval tv;
+		gettimeofday(&tv, NULL);
+		nstime_t before_sleep;
+		nstime_init2(&before_sleep, tv.tv_sec, tv.tv_usec * 1000);
+
 		if (interval == BACKGROUND_THREAD_INDEFINITE_SLEEP) {
 			nstime_init(&info->next_wakeup,
 			    BACKGROUND_THREAD_INDEFINITE_SLEEP);
 			ret = pthread_cond_wait(&info->cond, &info->mtx.lock);
 			assert(ret == 0);
-			continue;
+		} else {
+			assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS &&
+			    interval <= BACKGROUND_THREAD_INDEFINITE_SLEEP);
+			nstime_init(&info->next_wakeup, 0);
+			nstime_update(&info->next_wakeup);
+			nstime_iadd(&info->next_wakeup, interval);
+
+			nstime_t ts_wakeup;
+			nstime_copy(&ts_wakeup, &before_sleep);
+			nstime_iadd(&ts_wakeup, interval);
+			struct timespec ts;
+			ts.tv_sec = (size_t)nstime_sec(&ts_wakeup);
+			ts.tv_nsec = (size_t)nstime_nsec(&ts_wakeup);
+
+			ret = pthread_cond_timedwait(&info->cond,
+			    &info->mtx.lock, &ts);
+			assert(ret == ETIMEDOUT || ret == 0);
 		}
 
-		assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS &&
-		    interval <= BACKGROUND_THREAD_INDEFINITE_SLEEP);
-		nstime_init(&info->next_wakeup, 0);
-		nstime_update(&info->next_wakeup);
-		info->next_wakeup.ns += interval;
-
-		nstime_t ts_wakeup;
-		struct timeval tv;
-		gettimeofday(&tv, NULL);
-		nstime_init2(&ts_wakeup, tv.tv_sec,
-		    tv.tv_usec * 1000 + interval);
-		struct timespec ts;
-		ts.tv_sec = (size_t)nstime_sec(&ts_wakeup);
-		ts.tv_nsec = (size_t)nstime_nsec(&ts_wakeup);
-		ret = pthread_cond_timedwait(&info->cond, &info->mtx.lock,
-		    &ts);
-		assert(ret == ETIMEDOUT || ret == 0);
+		if (config_stats) {
+			gettimeofday(&tv, NULL);
+			nstime_t after_sleep;
+			nstime_init2(&after_sleep, tv.tv_sec, tv.tv_usec * 1000);
+			if (nstime_compare(&after_sleep, &before_sleep) > 0) {
+				nstime_subtract(&after_sleep, &before_sleep);
+				nstime_add(&info->tot_sleep_time, &after_sleep);
+			}
+		}
 	}
 	malloc_mutex_unlock(tsdn, &info->mtx);
 }
@@ -332,6 +360,7 @@ background_thread_create(tsd_t *tsd, unsigned arena_ind) {
 	assert(info->started == false);
 	if (err == 0) {
 		info->started = true;
+		background_thread_info_reinit(info);
 		n_background_threads++;
 	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
@@ -540,6 +569,36 @@ background_thread_postfork_child(tsdn_t *tsdn) {
 	malloc_mutex_unlock(tsdn, &background_thread_lock);
 }
 
+bool
+background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
+	assert(config_stats);
+	malloc_mutex_lock(tsdn, &background_thread_lock);
+	if (!background_thread_enabled()) {
+		malloc_mutex_unlock(tsdn, &background_thread_lock);
+		return true;
+	}
+
+	stats->num_threads = n_background_threads;
+	uint64_t num_runs = 0;
+	nstime_init(&stats->run_interval, 0);
+	for (unsigned i = 0; i < ncpus; i++) {
+		background_thread_info_t *info = &background_thread_info[i];
+		malloc_mutex_lock(tsdn, &info->mtx);
+		if (info->started) {
+			num_runs += info->tot_n_runs;
+			nstime_add(&stats->run_interval, &info->tot_sleep_time);
+		}
+		malloc_mutex_unlock(tsdn, &info->mtx);
+	}
+	stats->num_runs = num_runs;
+	if (num_runs > 0) {
+		nstime_idivide(&stats->run_interval, num_runs);
+	}
+	malloc_mutex_unlock(tsdn, &background_thread_lock);
+
+	return false;
+}
+
 #undef BACKGROUND_THREAD_NPAGES_THRESHOLD
 #undef BILLION
 #undef BACKGROUND_THREAD_MIN_INTERVAL_NS
diff --git a/src/ctl.c b/src/ctl.c
index ee0979a8..caa9f3e8 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -181,6 +181,9 @@ CTL_PROTO(stats_arenas_i_resident)
 INDEX_PROTO(stats_arenas_i)
 CTL_PROTO(stats_allocated)
 CTL_PROTO(stats_active)
+CTL_PROTO(stats_background_thread_num_threads)
+CTL_PROTO(stats_background_thread_num_runs)
+CTL_PROTO(stats_background_thread_run_interval)
 CTL_PROTO(stats_metadata)
 CTL_PROTO(stats_resident)
 CTL_PROTO(stats_mapped)
@@ -478,6 +481,12 @@ static const ctl_indexed_node_t stats_arenas_node[] = {
 	{INDEX(stats_arenas_i)}
 };
 
+static const ctl_named_node_t stats_background_thread_node[] = {
+	{NAME("num_threads"),	CTL(stats_background_thread_num_threads)},
+	{NAME("num_runs"),	CTL(stats_background_thread_num_runs)},
+	{NAME("run_interval"),	CTL(stats_background_thread_run_interval)}
+};
+
 #define OP(mtx) MUTEX_PROF_DATA_NODE(mutexes_##mtx)
 MUTEX_PROF_GLOBAL_MUTEXES
 #undef OP
@@ -497,6 +506,8 @@ static const ctl_named_node_t stats_node[] = {
 	{NAME("resident"),	CTL(stats_resident)},
 	{NAME("mapped"),	CTL(stats_mapped)},
 	{NAME("retained"),	CTL(stats_retained)},
+	{NAME("background_thread"),
+	 CHILD(named, stats_background_thread)},
 	{NAME("mutexes"),	CHILD(named, stats_mutexes)},
 	{NAME("arenas"),	CHILD(indexed, stats_arenas)}
 };
@@ -872,6 +883,16 @@ ctl_arena_init(tsdn_t *tsdn, extent_hooks_t *extent_hooks) {
 	return arena_ind;
 }
 
+static void
+ctl_background_thread_stats_read(tsdn_t *tsdn) {
+	background_thread_stats_t *stats = &ctl_stats->background_thread;
+	if (!have_background_thread ||
+	    background_thread_stats_read(tsdn, stats)) {
+		memset(stats, 0, sizeof(background_thread_stats_t));
+		nstime_init(&stats->run_interval, 0);
+	}
+}
+
 static void
 ctl_refresh(tsdn_t *tsdn) {
 	unsigned i;
@@ -915,6 +936,8 @@ ctl_refresh(tsdn_t *tsdn) {
 		ctl_stats->retained = atomic_load_zu(
 		    &ctl_sarena->astats->astats.retained, ATOMIC_RELAXED);
 
+		ctl_background_thread_stats_read(tsdn);
+
 #define READ_GLOBAL_MUTEX_PROF_DATA(i, mtx)				\
     malloc_mutex_lock(tsdn, &mtx);					\
     malloc_mutex_prof_read(tsdn, &ctl_stats->mutex_prof_data[i], &mtx);	\
@@ -2403,6 +2426,13 @@ CTL_RO_CGEN(config_stats, stats_resident, ctl_stats->resident, size_t)
 CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats->mapped, size_t)
 CTL_RO_CGEN(config_stats, stats_retained, ctl_stats->retained, size_t)
 
+CTL_RO_CGEN(config_stats, stats_background_thread_num_threads,
+    ctl_stats->background_thread.num_threads, size_t)
+CTL_RO_CGEN(config_stats, stats_background_thread_num_runs,
+    ctl_stats->background_thread.num_runs, uint64_t)
+CTL_RO_CGEN(config_stats, stats_background_thread_run_interval,
+    nstime_ns(&ctl_stats->background_thread.run_interval), uint64_t)
+
 CTL_RO_GEN(stats_arenas_i_dss, arenas_i(mib[2])->dss, const char *)
 CTL_RO_GEN(stats_arenas_i_dirty_decay_ms, arenas_i(mib[2])->dirty_decay_ms,
     ssize_t)
diff --git a/src/nstime.c b/src/nstime.c
index e5412274..71db3539 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -55,6 +55,13 @@ nstime_add(nstime_t *time, const nstime_t *addend) {
 	time->ns += addend->ns;
 }
 
+void
+nstime_iadd(nstime_t *time, uint64_t addend) {
+	assert(UINT64_MAX - time->ns >= addend);
+
+	time->ns += addend;
+}
+
 void
 nstime_subtract(nstime_t *time, const nstime_t *subtrahend) {
 	assert(nstime_compare(time, subtrahend) >= 0);
@@ -62,6 +69,13 @@ nstime_subtract(nstime_t *time, const nstime_t *subtrahend) {
 	time->ns -= subtrahend->ns;
 }
 
+void
+nstime_isubtract(nstime_t *time, uint64_t subtrahend) {
+	assert(time->ns >= subtrahend);
+
+	time->ns -= subtrahend;
+}
+
 void
 nstime_imultiply(nstime_t *time, uint64_t multiplier) {
 	assert((((time->ns | multiplier) & (UINT64_MAX << (sizeof(uint64_t) <<
diff --git a/src/stats.c b/src/stats.c
index f98b8ece..fd108162 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1013,6 +1013,8 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
     bool json, bool merged, bool destroyed, bool unmerged, bool bins,
     bool large, bool mutex) {
 	size_t allocated, active, metadata, resident, mapped, retained;
+	size_t num_background_threads;
+	uint64_t background_thread_num_runs, background_thread_run_interval;
 
 	CTL_GET("stats.allocated", &allocated, size_t);
 	CTL_GET("stats.active", &active, size_t);
@@ -1026,6 +1028,19 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 		read_global_mutex_stats(mutex_stats);
 	}
 
+	if (have_background_thread) {
+		CTL_GET("stats.background_thread.num_threads",
+		    &num_background_threads, size_t);
+		CTL_GET("stats.background_thread.num_runs",
+		    &background_thread_num_runs, uint64_t);
+		CTL_GET("stats.background_thread.run_interval",
+		    &background_thread_run_interval, uint64_t);
+	} else {
+		num_background_threads = 0;
+		background_thread_num_runs = 0;
+		background_thread_run_interval = 0;
+	}
+
 	if (json) {
 		malloc_cprintf(write_cb, cbopaque,
 		    "\t\t\"stats\": {\n");
@@ -1041,7 +1056,21 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 		malloc_cprintf(write_cb, cbopaque,
 		    "\t\t\t\"mapped\": %zu,\n", mapped);
 		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\"retained\": %zu%s\n", retained, mutex ? "," : "");
+		    "\t\t\t\"retained\": %zu,\n", retained);
+
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"background_thread\": {\n");
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"num_threads\": %zu,\n", num_background_threads);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"num_runs\": %"FMTu64",\n",
+		    background_thread_num_runs);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"run_interval\": %"FMTu64"\n",
+		    background_thread_run_interval);
+		malloc_cprintf(write_cb, cbopaque, "\t\t\t}%s\n",
+		    mutex ? "," : "");
+
 		if (mutex) {
 			malloc_cprintf(write_cb, cbopaque,
 			    "\t\t\t\"mutexes\": {\n");
@@ -1061,6 +1090,15 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 		    "Allocated: %zu, active: %zu, metadata: %zu,"
 		    " resident: %zu, mapped: %zu, retained: %zu\n",
 		    allocated, active, metadata, resident, mapped, retained);
+
+		if (have_background_thread && num_background_threads > 0) {
+			malloc_cprintf(write_cb, cbopaque,
+			    "Background threads: %zu, num_runs: %"FMTu64", "
+			    "run_interval: %"FMTu64" ns\n",
+			    num_background_threads,
+			    background_thread_num_runs,
+			    background_thread_run_interval);
+		}
 		if (mutex) {
 			mutex_prof_global_ind_t i;
 			for (i = 0; i < mutex_prof_num_global_mutexes; i++) {
diff --git a/test/unit/nstime.c b/test/unit/nstime.c
index f8384f5a..f3137805 100644
--- a/test/unit/nstime.c
+++ b/test/unit/nstime.c
@@ -85,6 +85,23 @@ TEST_BEGIN(test_nstime_add) {
 }
 TEST_END
 
+TEST_BEGIN(test_nstime_iadd) {
+	nstime_t nsta, nstb;
+
+	nstime_init2(&nsta, 42, BILLION - 1);
+	nstime_iadd(&nsta, 1);
+	nstime_init2(&nstb, 43, 0);
+	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+	    "Incorrect addition result");
+
+	nstime_init2(&nsta, 42, 1);
+	nstime_iadd(&nsta, BILLION + 1);
+	nstime_init2(&nstb, 43, 2);
+	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+	    "Incorrect addition result");
+}
+TEST_END
+
 TEST_BEGIN(test_nstime_subtract) {
 	nstime_t nsta, nstb;
 
@@ -104,6 +121,23 @@ TEST_BEGIN(test_nstime_subtract) {
 }
 TEST_END
 
+TEST_BEGIN(test_nstime_isubtract) {
+	nstime_t nsta, nstb;
+
+	nstime_init2(&nsta, 42, 43);
+	nstime_isubtract(&nsta, 42*BILLION + 43);
+	nstime_init(&nstb, 0);
+	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+	    "Incorrect subtraction result");
+
+	nstime_init2(&nsta, 42, 43);
+	nstime_isubtract(&nsta, 41*BILLION + 44);
+	nstime_init2(&nstb, 0, BILLION - 1);
+	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+	    "Incorrect subtraction result");
+}
+TEST_END
+
 TEST_BEGIN(test_nstime_imultiply) {
 	nstime_t nsta, nstb;
 
@@ -204,7 +238,9 @@ main(void) {
 	    test_nstime_copy,
 	    test_nstime_compare,
 	    test_nstime_add,
+	    test_nstime_iadd,
 	    test_nstime_subtract,
+	    test_nstime_isubtract,
 	    test_nstime_imultiply,
 	    test_nstime_idivide,
 	    test_nstime_divide,

From 5f5ed2198e47f3e904cbf1aff7c124e196855272 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 12 May 2017 16:26:59 -0700
Subject: [PATCH 0873/2608] Add profiling for the background thread mutex.

---
 include/jemalloc/internal/mutex_externs.h | 12 ++++++------
 include/jemalloc/internal/mutex_prof.h    |  1 +
 src/ctl.c                                 | 12 ++++++++++++
 src/mutex.c                               |  2 ++
 4 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/mutex_externs.h b/include/jemalloc/internal/mutex_externs.h
index c9a817fb..d0139f2e 100644
--- a/include/jemalloc/internal/mutex_externs.h
+++ b/include/jemalloc/internal/mutex_externs.h
@@ -10,12 +10,12 @@ extern bool isthreaded;
 #  define isthreaded true
 #endif
 
-bool	malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
+bool malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
     witness_rank_t rank, malloc_mutex_lock_order_t lock_order);
-void	malloc_mutex_prefork(tsdn_t *tsdn, malloc_mutex_t *mutex);
-void	malloc_mutex_postfork_parent(tsdn_t *tsdn, malloc_mutex_t *mutex);
-void	malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex);
-bool	malloc_mutex_boot(void);
-void	malloc_mutex_prof_data_reset(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void malloc_mutex_prefork(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void malloc_mutex_postfork_parent(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex);
+bool malloc_mutex_boot(void);
+void malloc_mutex_prof_data_reset(tsdn_t *tsdn, malloc_mutex_t *mutex);
 
 #endif /* JEMALLOC_INTERNAL_MUTEX_EXTERNS_H */
diff --git a/include/jemalloc/internal/mutex_prof.h b/include/jemalloc/internal/mutex_prof.h
index 1cc198d6..3358bcf5 100644
--- a/include/jemalloc/internal/mutex_prof.h
+++ b/include/jemalloc/internal/mutex_prof.h
@@ -6,6 +6,7 @@
 #include "jemalloc/internal/tsd_types.h"
 
 #define MUTEX_PROF_GLOBAL_MUTEXES					\
+    OP(background_thread)						\
     OP(ctl)								\
     OP(prof)
 
diff --git a/src/ctl.c b/src/ctl.c
index caa9f3e8..da5e1710 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -947,6 +947,15 @@ ctl_refresh(tsdn_t *tsdn) {
 			READ_GLOBAL_MUTEX_PROF_DATA(global_prof_mutex_prof,
 			    bt2gctx_mtx);
 		}
+		if (have_background_thread) {
+			READ_GLOBAL_MUTEX_PROF_DATA(
+			    global_prof_mutex_background_thread,
+			    background_thread_lock);
+		} else {
+			memset(&ctl_stats->mutex_prof_data[
+			    global_prof_mutex_background_thread], 0,
+			    sizeof(mutex_prof_data_t));
+		}
 		/* We own ctl mutex already. */
 		malloc_mutex_prof_read(tsdn,
 		    &ctl_stats->mutex_prof_data[global_prof_mutex_ctl],
@@ -2557,6 +2566,9 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 
 	/* Global mutexes: ctl and prof. */
 	MUTEX_PROF_RESET(ctl_mtx);
+	if (have_background_thread) {
+		MUTEX_PROF_RESET(background_thread_lock);
+	}
 	if (config_prof && opt_prof) {
 		MUTEX_PROF_RESET(bt2gctx_mtx);
 	}
diff --git a/src/mutex.c b/src/mutex.c
index c92ddd72..48e2940a 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -112,6 +112,8 @@ label_spin_done:
 static void
 mutex_prof_data_init(mutex_prof_data_t *data) {
 	memset(data, 0, sizeof(mutex_prof_data_t));
+	nstime_init(&data->max_wait_time, 0);
+	nstime_init(&data->tot_wait_time, 0);
 	data->prev_owner = NULL;
 }
 

From 44559e7cf179d1e9eddcc681d961e076511ee857 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 15 May 2017 13:50:42 -0700
Subject: [PATCH 0874/2608] Add documentation for background_thread related
 options.

---
 doc/jemalloc.xml.in | 78 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 78 insertions(+)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index be018bfb..57f5b0ba 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -737,6 +737,25 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         detecting whether another thread caused a refresh.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="background_thread">
+        <term>
+          <mallctl>background_thread</mallctl>
+          (<type>bool</type>)
+          <literal>rw</literal>
+        </term>
+        <listitem><para>Enable/disable internal background worker threads.  When
+        set to true, background threads are created on demand (the number of
+        background threads will be no more than the number of CPUs or active
+        arenas).  Threads run periodically, and handle <link
+        linkend="arena.i.decay">purging</link> asynchronously.  When switching
+        off, background threads are terminated synchronously.  See <link
+        linkend="stats.background_thread.num_threads"><mallctl>stats.background_thread</mallctl></link>
+        for related stats.  <link
+        linkend="opt.background_thread"><mallctl>opt.background_thread</mallctl></link>
+        can be used to set the default option.  This option is only available on
+        selected pthread-based platforms.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="config.cache_oblivious">
         <term>
           <mallctl>config.cache_oblivious</mallctl>
@@ -937,6 +956,18 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         <quote>percpu</quote>. </para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.background_thread">
+        <term>
+          <mallctl>opt.background_thread</mallctl>
+          (<type>const bool</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Internal background worker threads enabled/disabled. See
+        <link linkend="background_thread">background_thread</link> for dynamic
+        control options and details.  This option is disabled by
+        default.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="opt.dirty_decay_ms">
         <term>
           <mallctl>opt.dirty_decay_ms</mallctl>
@@ -2158,6 +2189,39 @@ struct extent_hooks_s {
         </para></listitem>
       </varlistentry>
 
+      <varlistentry id="stats.background_thread.num_threads">
+        <term>
+          <mallctl>stats.background_thread.num_threads</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para> Number of <link linkend="background_thread">background
+        threads</link> running currently.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="stats.background_thread.num_runs">
+        <term>
+          <mallctl>stats.background_thread.num_runs</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para> Total number of runs from all <link
+        linkend="background_thread">background threads</link>.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="stats.background_thread.run_interval">
+        <term>
+          <mallctl>stats.background_thread.run_interval</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para> Average run interval in nanoseconds of <link
+        linkend="background_thread">background threads</link>.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="stats.mutexes.ctl">
         <term>
           <mallctl>stats.mutexes.ctl.{counter};</mallctl>
@@ -2210,6 +2274,20 @@ struct extent_hooks_s {
 	</listitem>
       </varlistentry>
 
+      <varlistentry id="stats.mutexes.background_thread">
+        <term>
+          <mallctl>stats.mutexes.background_thread.{counter}</mallctl>
+	  (<type>counter specific type</type>) <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Statistics on <varname>background_thread</varname> mutex
+        (global scope; <link
+        linkend="background_thread"><mallctl>background_thread</mallctl></link>
+        related).  <mallctl>{counter}</mallctl> is one of the counters in <link
+        linkend="mutex_counters">mutex profiling
+        counters</link>.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="stats.mutexes.prof">
         <term>
           <mallctl>stats.mutexes.prof.{counter}</mallctl>

From 2c368284d2485bda47002f22dace6c0b55e4701e Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 15 May 2017 17:44:13 -0700
Subject: [PATCH 0875/2608] Add tests for background threads.

---
 Makefile.in                   |   1 +
 test/unit/background_thread.c | 117 ++++++++++++++++++++++++++++++++++
 test/unit/mallctl.c           |   1 +
 3 files changed, 119 insertions(+)
 create mode 100644 test/unit/background_thread.c

diff --git a/Makefile.in b/Makefile.in
index aa6f3f62..16fe30fd 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -158,6 +158,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/a0.c \
 	$(srcroot)test/unit/arena_reset.c \
 	$(srcroot)test/unit/atomic.c \
+	$(srcroot)test/unit/background_thread.c \
 	$(srcroot)test/unit/base.c \
 	$(srcroot)test/unit/bitmap.c \
 	$(srcroot)test/unit/ckh.c \
diff --git a/test/unit/background_thread.c b/test/unit/background_thread.c
new file mode 100644
index 00000000..05089c28
--- /dev/null
+++ b/test/unit/background_thread.c
@@ -0,0 +1,117 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/util.h"
+
+static void
+test_switch_background_thread_ctl(bool new_val) {
+	bool e0, e1;
+	size_t sz = sizeof(bool);
+
+	e1 = new_val;
+	assert_d_eq(mallctl("background_thread", (void *)&e0, &sz,
+	    &e1, sz), 0, "Unexpected mallctl() failure");
+	assert_b_eq(e0, !e1,
+	    "background_thread should be %d before.\n", !e1);
+	if (e1) {
+		assert_zu_gt(n_background_threads, 0,
+		    "Number of background threads should be non zero.\n");
+	} else {
+		assert_zu_eq(n_background_threads, 0,
+		    "Number of background threads should be zero.\n");
+	}
+}
+
+static void
+test_repeat_background_thread_ctl(bool before) {
+	bool e0, e1;
+	size_t sz = sizeof(bool);
+
+	e1 = before;
+	assert_d_eq(mallctl("background_thread", (void *)&e0, &sz,
+	    &e1, sz), 0, "Unexpected mallctl() failure");
+	assert_b_eq(e0, before,
+	    "background_thread should be %d.\n", before);
+	if (e1) {
+		assert_zu_gt(n_background_threads, 0,
+		    "Number of background threads should be non zero.\n");
+	} else {
+		assert_zu_eq(n_background_threads, 0,
+		    "Number of background threads should be zero.\n");
+	}
+}
+
+TEST_BEGIN(test_background_thread_ctl) {
+	test_skip_if(!have_background_thread);
+
+	bool e0, e1;
+	size_t sz = sizeof(bool);
+
+	assert_d_eq(mallctl("opt.background_thread", (void *)&e0, &sz,
+	    NULL, 0), 0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("background_thread", (void *)&e1, &sz,
+	    NULL, 0), 0, "Unexpected mallctl() failure");
+	assert_b_eq(e0, e1,
+	    "Default and opt.background_thread does not match.\n");
+	if (e0) {
+		test_switch_background_thread_ctl(false);
+	}
+	assert_zu_eq(n_background_threads, 0,
+	    "Number of background threads should be 0.\n");
+
+	for (unsigned i = 0; i < 4; i++) {
+		test_switch_background_thread_ctl(true);
+		test_repeat_background_thread_ctl(true);
+		test_repeat_background_thread_ctl(true);
+
+		test_switch_background_thread_ctl(false);
+		test_repeat_background_thread_ctl(false);
+		test_repeat_background_thread_ctl(false);
+	}
+}
+TEST_END
+
+TEST_BEGIN(test_background_thread_running) {
+	test_skip_if(!have_background_thread);
+	test_skip_if(!config_stats);
+
+#if defined(JEMALLOC_BACKGROUND_THREAD)
+	tsd_t *tsd = tsd_fetch();
+	background_thread_info_t *info = &background_thread_info[0];
+
+	test_repeat_background_thread_ctl(false);
+	test_switch_background_thread_ctl(true);
+
+	nstime_t start, now;
+	nstime_init(&start, 0);
+	nstime_update(&start);
+
+	bool ran = false;
+	while (true) {
+		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+		if (info->tot_n_runs > 0) {
+			ran = true;
+		}
+		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
+		if (ran) {
+			break;
+		}
+
+		nstime_init(&now, 0);
+		nstime_update(&now);
+		nstime_subtract(&now, &start);
+		assert_u64_lt(nstime_sec(&now), 10,
+		    "Background threads did not run for 10 seconds.");
+		usleep(10000);
+	}
+	test_switch_background_thread_ctl(false);
+#endif
+}
+TEST_END
+
+int
+main(void) {
+	/* Background_thread creation tests reentrancy naturally. */
+	return test_no_reentrancy(
+	    test_background_thread_ctl,
+	    test_background_thread_running);
+}
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index f721c21d..8339e8c5 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -161,6 +161,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(const char *, dss, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);
 	TEST_MALLCTL_OPT(const char *, percpu_arena, always);
+	TEST_MALLCTL_OPT(bool, background_thread, always);
 	TEST_MALLCTL_OPT(ssize_t, dirty_decay_ms, always);
 	TEST_MALLCTL_OPT(ssize_t, muzzy_decay_ms, always);
 	TEST_MALLCTL_OPT(bool, stats_print, always);

From 0eae838b0d4343b09d80dee00f20a39ce709ca8f Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 22 May 2017 15:26:25 -0700
Subject: [PATCH 0876/2608] Check for background thread inactivity on
 extents_dalloc.

To avoid background threads sleeping forever with idle arenas, we eagerly check
background threads' sleep time after extents_dalloc, and signal the thread if
necessary.
---
 .../internal/background_thread_inlines.h      | 35 +++++++++++
 .../internal/background_thread_structs.h      | 14 +++--
 src/arena.c                                   |  4 ++
 src/background_thread.c                       | 61 +++++++++++++------
 4 files changed, 91 insertions(+), 23 deletions(-)

diff --git a/include/jemalloc/internal/background_thread_inlines.h b/include/jemalloc/internal/background_thread_inlines.h
index 2709ae31..fd5095f2 100644
--- a/include/jemalloc/internal/background_thread_inlines.h
+++ b/include/jemalloc/internal/background_thread_inlines.h
@@ -18,4 +18,39 @@ arena_background_thread_info_get(arena_t *arena) {
 	return &background_thread_info[arena_ind % ncpus];
 }
 
+JEMALLOC_ALWAYS_INLINE uint64_t
+background_thread_wakeup_time_get(background_thread_info_t *info) {
+	uint64_t next_wakeup = nstime_ns(&info->next_wakeup);
+	assert(atomic_load_b(&info->indefinite_sleep, ATOMIC_ACQUIRE) ==
+	    (next_wakeup == BACKGROUND_THREAD_INDEFINITE_SLEEP));
+	return next_wakeup;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+background_thread_wakeup_time_set(tsdn_t *tsdn, background_thread_info_t *info,
+    uint64_t wakeup_time) {
+	malloc_mutex_assert_owner(tsdn, &info->mtx);
+	atomic_store_b(&info->indefinite_sleep,
+	    wakeup_time == BACKGROUND_THREAD_INDEFINITE_SLEEP, ATOMIC_RELEASE);
+	nstime_init(&info->next_wakeup, wakeup_time);
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+background_thread_indefinite_sleep(background_thread_info_t *info) {
+	return atomic_load_b(&info->indefinite_sleep, ATOMIC_ACQUIRE);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_background_thread_inactivity_check(tsdn_t *tsdn, arena_t *arena) {
+	if (!background_thread_enabled()) {
+		return;
+	}
+	background_thread_info_t *info =
+	    arena_background_thread_info_get(arena);
+	if (background_thread_indefinite_sleep(info)) {
+		background_thread_interval_check(tsdn, arena,
+		    &arena->decay_dirty, 0);
+	}
+}
+
 #endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_INLINES_H */
diff --git a/include/jemalloc/internal/background_thread_structs.h b/include/jemalloc/internal/background_thread_structs.h
index c4eb8273..9507abcd 100644
--- a/include/jemalloc/internal/background_thread_structs.h
+++ b/include/jemalloc/internal/background_thread_structs.h
@@ -1,15 +1,22 @@
 #ifndef JEMALLOC_INTERNAL_BACKGROUND_THREAD_STRUCTS_H
 #define JEMALLOC_INTERNAL_BACKGROUND_THREAD_STRUCTS_H
 
+/* This file really combines "structs" and "types", but only transitionally. */
+
+#define BACKGROUND_THREAD_INDEFINITE_SLEEP UINT64_MAX
+
 struct background_thread_info_s {
-	malloc_mutex_t		mtx;
 #ifdef JEMALLOC_BACKGROUND_THREAD
 	/* Background thread is pthread specific. */
-	pthread_cond_t		cond;
 	pthread_t		thread;
+	pthread_cond_t		cond;
+#endif
+	malloc_mutex_t		mtx;
 	/* Whether the thread has been created. */
 	bool			started;
-	/* Next scheduled wakeup time (absolute time). */
+	/* When true, it means no wakeup scheduled. */
+	atomic_b_t		indefinite_sleep;
+	/* Next scheduled wakeup time (absolute time in ns). */
 	nstime_t		next_wakeup;
 	/*
 	 *  Since the last background thread run, newly added number of pages
@@ -22,7 +29,6 @@ struct background_thread_info_s {
 	uint64_t		tot_n_runs;
 	/* Stats: total sleep time since started. */
 	nstime_t		tot_sleep_time;
-#endif /* ifdef JEMALLOC_BACKGROUND_THREAD */
 };
 typedef struct background_thread_info_s background_thread_info_t;
 
diff --git a/src/arena.c b/src/arena.c
index 0bd82dbc..7f75b642 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -367,6 +367,8 @@ arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
 	    extent);
 	if (arena_dirty_decay_ms_get(arena) == 0) {
 		arena_decay_dirty(tsdn, arena, false, true);
+	} else {
+		arena_background_thread_inactivity_check(tsdn, arena);
 	}
 }
 
@@ -919,6 +921,8 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
 			    extent_size_get(extent))) {
 				extents_dalloc(tsdn, arena, r_extent_hooks,
 				    &arena->extents_muzzy, extent);
+				arena_background_thread_inactivity_check(tsdn,
+				    arena);
 				break;
 			}
 			/* Fall through. */
diff --git a/src/background_thread.c b/src/background_thread.c
index 076d9815..1e725b4e 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -42,8 +42,8 @@ bool background_thread_stats_read(tsdn_t *tsdn,
 #else
 
 static void
-background_thread_info_reinit(background_thread_info_t *info) {
-	nstime_init(&info->next_wakeup, 0);
+background_thread_info_reinit(tsdn_t *tsdn, background_thread_info_t *info) {
+	background_thread_wakeup_time_set(tsdn, info, 0);
 	info->npages_to_purge_new = 0;
 	if (config_stats) {
 		info->tot_n_runs = 0;
@@ -80,8 +80,10 @@ background_threads_init(tsd_t *tsd) {
 		if (pthread_cond_init(&info->cond, NULL)) {
 			return true;
 		}
+		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
 		info->started = false;
-		background_thread_info_reinit(info);
+		background_thread_info_reinit(tsd_tsdn(tsd), info);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
 	}
 
 	return false;
@@ -106,7 +108,6 @@ set_current_thread_affinity(UNUSED int cpu) {
 #define BILLION UINT64_C(1000000000)
 /* Minimal sleep interval 100 ms. */
 #define BACKGROUND_THREAD_MIN_INTERVAL_NS (BILLION / 10)
-#define BACKGROUND_THREAD_INDEFINITE_SLEEP UINT64_MAX
 
 static inline size_t
 decay_npurge_after_interval(arena_decay_t *decay, size_t interval) {
@@ -258,6 +259,8 @@ background_work(tsdn_t *tsdn, unsigned ind) {
 	background_thread_info_t *info = &background_thread_info[ind];
 
 	malloc_mutex_lock(tsdn, &info->mtx);
+	background_thread_wakeup_time_set(tsdn, info,
+	    BACKGROUND_THREAD_INDEFINITE_SLEEP);
 	while (info->started) {
 		uint64_t interval = background_work_once(tsdn, ind);
 		if (config_stats) {
@@ -266,21 +269,27 @@ background_work(tsdn_t *tsdn, unsigned ind) {
 		info->npages_to_purge_new = 0;
 
 		struct timeval tv;
+		/* Specific clock required by timedwait. */
 		gettimeofday(&tv, NULL);
 		nstime_t before_sleep;
 		nstime_init2(&before_sleep, tv.tv_sec, tv.tv_usec * 1000);
 
 		if (interval == BACKGROUND_THREAD_INDEFINITE_SLEEP) {
-			nstime_init(&info->next_wakeup,
-			    BACKGROUND_THREAD_INDEFINITE_SLEEP);
+			assert(background_thread_indefinite_sleep(info));
 			ret = pthread_cond_wait(&info->cond, &info->mtx.lock);
 			assert(ret == 0);
 		} else {
 			assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS &&
 			    interval <= BACKGROUND_THREAD_INDEFINITE_SLEEP);
-			nstime_init(&info->next_wakeup, 0);
-			nstime_update(&info->next_wakeup);
-			nstime_iadd(&info->next_wakeup, interval);
+			/* We need malloc clock (can be different from tv). */
+			nstime_t next_wakeup;
+			nstime_init(&next_wakeup, 0);
+			nstime_update(&next_wakeup);
+			nstime_iadd(&next_wakeup, interval);
+			assert(nstime_ns(&next_wakeup) <
+			    BACKGROUND_THREAD_INDEFINITE_SLEEP);
+			background_thread_wakeup_time_set(tsdn, info,
+			    nstime_ns(&next_wakeup));
 
 			nstime_t ts_wakeup;
 			nstime_copy(&ts_wakeup, &before_sleep);
@@ -289,9 +298,12 @@ background_work(tsdn_t *tsdn, unsigned ind) {
 			ts.tv_sec = (size_t)nstime_sec(&ts_wakeup);
 			ts.tv_nsec = (size_t)nstime_nsec(&ts_wakeup);
 
+			assert(!background_thread_indefinite_sleep(info));
 			ret = pthread_cond_timedwait(&info->cond,
 			    &info->mtx.lock, &ts);
 			assert(ret == ETIMEDOUT || ret == 0);
+			background_thread_wakeup_time_set(tsdn, info,
+			    BACKGROUND_THREAD_INDEFINITE_SLEEP);
 		}
 
 		if (config_stats) {
@@ -304,6 +316,7 @@ background_work(tsdn_t *tsdn, unsigned ind) {
 			}
 		}
 	}
+	background_thread_wakeup_time_set(tsdn, info, 0);
 	malloc_mutex_unlock(tsdn, &info->mtx);
 }
 
@@ -360,7 +373,7 @@ background_thread_create(tsd_t *tsd, unsigned arena_ind) {
 	assert(info->started == false);
 	if (err == 0) {
 		info->started = true;
-		background_thread_info_reinit(info);
+		background_thread_info_reinit(tsd_tsdn(tsd), info);
 		n_background_threads++;
 	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
@@ -465,6 +478,7 @@ background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
 	if (!info->started) {
 		goto label_done;
 	}
+	assert(background_thread_enabled());
 	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
 		goto label_done;
 	}
@@ -474,14 +488,14 @@ background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
 		/* Purging is eagerly done or disabled currently. */
 		goto label_done_unlock2;
 	}
-	if (nstime_compare(&info->next_wakeup, &decay->epoch) <= 0) {
-		goto label_done_unlock2;
-	}
-
 	uint64_t decay_interval_ns = nstime_ns(&decay->interval);
 	assert(decay_interval_ns > 0);
+
 	nstime_t diff;
-	nstime_copy(&diff, &info->next_wakeup);
+	nstime_init(&diff, background_thread_wakeup_time_get(info));
+	if (nstime_compare(&diff, &decay->epoch) <= 0) {
+		goto label_done_unlock2;
+	}
 	nstime_subtract(&diff, &decay->epoch);
 	if (nstime_ns(&diff) < BACKGROUND_THREAD_MIN_INTERVAL_NS) {
 		goto label_done_unlock2;
@@ -508,9 +522,19 @@ background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
 		info->npages_to_purge_new += npurge_new;
 	}
 
-	if (info->npages_to_purge_new > BACKGROUND_THREAD_NPAGES_THRESHOLD ||
-	    (nstime_ns(&info->next_wakeup) ==
-	    BACKGROUND_THREAD_INDEFINITE_SLEEP && info->npages_to_purge_new > 0)) {
+	bool should_signal;
+	if (info->npages_to_purge_new > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
+		should_signal = true;
+	} else if (unlikely(background_thread_indefinite_sleep(info)) &&
+	    (extents_npages_get(&arena->extents_dirty) > 0 ||
+	    extents_npages_get(&arena->extents_muzzy) > 0 ||
+	    info->npages_to_purge_new > 0)) {
+		should_signal = true;
+	} else {
+		should_signal = false;
+	}
+
+	if (should_signal) {
 		info->npages_to_purge_new = 0;
 		pthread_cond_signal(&info->cond);
 	}
@@ -602,7 +626,6 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
 #undef BACKGROUND_THREAD_NPAGES_THRESHOLD
 #undef BILLION
 #undef BACKGROUND_THREAD_MIN_INTERVAL_NS
-#undef BACKGROUND_THREAD_INDEFINITE_SLEEP
 
 #endif /* defined(JEMALLOC_BACKGROUND_THREAD) */
 

From eeefdf3ce89e09ce7cc5c58d2a3730b83045eec1 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 22 May 2017 13:09:17 -0700
Subject: [PATCH 0877/2608] Fix # of unpurged pages in decay algorithm.

When # of dirty pages move below npages_limit (e.g. they are reused), we should
not lower number of unpurged pages because that would cause the reused pages to
be double counted in the backlog (as a result, decay happen slower than it
should).  Instead, set number of unpurged to the greater of current npages and
npages_limit.

Added an assertion: the ceiling # of pages should be greater than npages_limit.
---
 include/jemalloc/internal/arena_structs_b.h |  2 ++
 src/arena.c                                 | 38 +++++++++++++++------
 2 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index 459dd89d..95680c0f 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -103,6 +103,8 @@ struct arena_decay_s {
 	 *
 	 * Synchronization: Same as associated arena's stats field. */
 	decay_stats_t		*stats;
+	/* Peak number of pages in associated extents.  Used for debug only. */
+	uint64_t		ceil_npages;
 };
 
 struct arena_bin_s {
diff --git a/src/arena.c b/src/arena.c
index 7f75b642..48d536e2 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -639,6 +639,17 @@ arena_decay_backlog_update_last(arena_decay_t *decay, extents_t *extents) {
 	size_t ndirty_delta = (ndirty > decay->nunpurged) ? ndirty -
 	    decay->nunpurged : 0;
 	decay->backlog[SMOOTHSTEP_NSTEPS-1] = ndirty_delta;
+
+	if (config_debug) {
+		if (ndirty > decay->ceil_npages) {
+			decay->ceil_npages = ndirty;
+		}
+		size_t npages_limit = arena_decay_backlog_npages_limit(decay);
+		assert(decay->ceil_npages >= npages_limit);
+		if (decay->ceil_npages > npages_limit) {
+			decay->ceil_npages = npages_limit;
+		}
+	}
 }
 
 static void
@@ -664,11 +675,9 @@ arena_decay_backlog_update(arena_decay_t *decay, extents_t *extents,
 }
 
 static void
-arena_decay_try_purge(tsdn_t *tsdn, arena_t *arena,
-    arena_decay_t *decay, extents_t *extents) {
-	size_t npages_limit = arena_decay_backlog_npages_limit(decay);
-
-	if (extents_npages_get(extents) > npages_limit) {
+arena_decay_try_purge(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
+    extents_t *extents, size_t current_npages, size_t npages_limit) {
+	if (current_npages > npages_limit) {
 		arena_decay_to_limit(tsdn, arena, decay, extents, false,
 		    npages_limit);
 	}
@@ -702,16 +711,20 @@ static void
 arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
     extents_t *extents, const nstime_t *time, bool purge) {
 	arena_decay_epoch_advance_helper(decay, extents, time);
-	if (purge) {
-		arena_decay_try_purge(tsdn, arena, decay, extents);
-	}
 
+	size_t current_npages = extents_npages_get(extents);
+	size_t npages_limit = arena_decay_backlog_npages_limit(decay);
+	if (purge) {
+		arena_decay_try_purge(tsdn, arena, decay, extents,
+		    current_npages, npages_limit);
+	}
 	/*
 	 * There may be concurrent ndirty fluctuation between the purge above
 	 * and the nunpurged update below, but this is inconsequential to decay
 	 * machinery correctness.
 	 */
-	decay->nunpurged = extents_npages_get(extents);
+	decay->nunpurged = (npages_limit > current_npages) ? npages_limit :
+	    current_npages;
 }
 
 static void
@@ -727,7 +740,7 @@ arena_decay_reinit(arena_decay_t *decay, extents_t *extents, ssize_t decay_ms) {
 	nstime_update(&decay->epoch);
 	decay->jitter_state = (uint64_t)(uintptr_t)decay;
 	arena_decay_deadline_init(decay);
-	decay->nunpurged = extents_npages_get(extents);
+	decay->nunpurged = 0;
 	memset(decay->backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t));
 }
 
@@ -738,6 +751,7 @@ arena_decay_init(arena_decay_t *decay, extents_t *extents, ssize_t decay_ms,
 		for (size_t i = 0; i < sizeof(arena_decay_t); i++) {
 			assert(((char *)decay)[i] == 0);
 		}
+		decay->ceil_npages = 0;
 	}
 	if (malloc_mutex_init(&decay->mtx, "decay", WITNESS_RANK_DECAY,
 	    malloc_mutex_rank_exclusive)) {
@@ -814,7 +828,9 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 		arena_decay_epoch_advance(tsdn, arena, decay, extents, &time,
 		    should_purge);
 	} else if (is_background_thread) {
-		arena_decay_try_purge(tsdn, arena, decay, extents);
+		arena_decay_try_purge(tsdn, arena, decay, extents,
+		    extents_npages_get(extents),
+		    arena_decay_backlog_npages_limit(decay));
 	}
 
 	return advance_epoch;

From 9b1038d19c998b8c219eb08d083ca0328b7941f1 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 22 May 2017 17:08:21 -0700
Subject: [PATCH 0878/2608] Do not hold the base mutex while calling extent
 hooks.

Drop the base mutex while allocating new base blocks, because extent
allocation can enter code that prohibits holding non-core mutexes, e.g.
the extent_[d]alloc() and extent_purge_forced_wrapper() calls in
extent_alloc_dss().

This partially resolves #802.
---
 src/base.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/base.c b/src/base.c
index 7502a657..dd4b109d 100644
--- a/src/base.c
+++ b/src/base.c
@@ -199,8 +199,14 @@ base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
 	malloc_mutex_assert_owner(tsdn, &base->mtx);
 
 	extent_hooks_t *extent_hooks = base_extent_hooks_get(base);
+	/*
+	 * Drop mutex during base_block_alloc(), because an extent hook will be
+	 * called.
+	 */
+	malloc_mutex_unlock(tsdn, &base->mtx);
 	base_block_t *block = base_block_alloc(extent_hooks, base_ind_get(base),
 	    &base->pind_last, &base->extent_sn_next, size, alignment);
+	malloc_mutex_lock(tsdn, &base->mtx);
 	if (block == NULL) {
 		return NULL;
 	}

From 067b97013018211b39e9911ae528ff54edd8fe5e Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 22 May 2017 17:15:57 -0700
Subject: [PATCH 0879/2608] Add dss:primary testing.

Generalize the run_tests.sh and .travis.yml test generation to handle
combinations of arguments to the --with-malloc-conf configure option,
and merge "dss:primary" into the existing "tcache:false" testing.
---
 .travis.yml              | 18 ++++++++++++++++
 scripts/gen_run_tests.py | 45 ++++++++++++++++++++++++----------------
 scripts/gen_travis.py    | 16 ++++++++++++--
 3 files changed, 59 insertions(+), 20 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 4838cb37..6ca3ac44 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -22,6 +22,8 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
@@ -46,6 +48,8 @@ matrix:
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
       addons:
@@ -70,18 +74,32 @@ matrix:
         apt:
           packages:
             - gcc-multilib
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      addons:
+        apt:
+          packages:
+            - gcc-multilib
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
 
 
 before_script:
diff --git a/scripts/gen_run_tests.py b/scripts/gen_run_tests.py
index 0446c65c..9fbf71e5 100755
--- a/scripts/gen_run_tests.py
+++ b/scripts/gen_run_tests.py
@@ -20,6 +20,10 @@ possible_config_opts = [
     '--disable-stats',
     '--with-malloc-conf=tcache:false',
 ]
+possible_malloc_conf_opts = [
+    'tcache:false',
+    'dss:primary',
+]
 
 print 'set -e'
 print 'autoconf'
@@ -28,21 +32,26 @@ print 'unamestr=`uname`'
 for cc, cxx in possible_compilers:
     for compiler_opts in powerset(possible_compiler_opts):
         for config_opts in powerset(possible_config_opts):
-            if cc is 'clang' \
-              and '-m32' in possible_compiler_opts \
-              and '--enable-prof' in config_opts:
-                continue
-            config_line = (
-                'EXTRA_CFLAGS=-Werror EXTRA_CXXFLAGS=-Werror ./configure '
-                + 'CC="{} {}" '.format(cc, " ".join(compiler_opts))
-                + 'CXX="{} {}" '.format(cxx, " ".join(compiler_opts))
-                + " ".join(config_opts)
-            )
-            # Heap profiling is not supported on OS X.
-            if '--enable-prof' in config_opts:
-                print 'if [[ "$unamestr" != "Darwin" ]]; then'
-            print config_line
-            print "make clean"
-            print "make -j" + str(MAKE_J_VAL) + " check"
-            if '--enable-prof' in config_opts:
-                print 'fi'
+            for malloc_conf_opts in powerset(possible_malloc_conf_opts):
+                if cc is 'clang' \
+                  and '-m32' in possible_compiler_opts \
+                  and '--enable-prof' in config_opts:
+                    continue
+                config_line = (
+                    'EXTRA_CFLAGS=-Werror EXTRA_CXXFLAGS=-Werror ./configure '
+                    + 'CC="{} {}" '.format(cc, " ".join(compiler_opts))
+                    + 'CXX="{} {}" '.format(cxx, " ".join(compiler_opts))
+                    + " ".join(config_opts) + (' --with-malloc-conf=' +
+                    ",".join(malloc_conf_opts) if len(malloc_conf_opts) > 0
+                    else '')
+                )
+                # Heap profiling and dss are not supported on OS X.
+                darwin_unsupported = ('--enable-prof' in config_opts or \
+                  'dss:primary' in malloc_conf_opts)
+                if darwin_unsupported:
+                    print 'if [[ "$unamestr" != "Darwin" ]]; then'
+                print config_line
+                print "make clean"
+                print "make -j" + str(MAKE_J_VAL) + " check"
+                if darwin_unsupported:
+                    print 'fi'
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index 4649cb71..4a4c2144 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -43,12 +43,16 @@ configure_flag_unusuals = [
     '--enable-debug',
     '--enable-prof',
     '--disable-stats',
-    '--with-malloc-conf=tcache:false',
+]
+
+malloc_conf_unusuals = [
+    'tcache:false',
+    'dss:primary',
 ]
 
 all_unusuals = (
     [os_unusual] + [compilers_unusual] + compiler_flag_unusuals
-    + configure_flag_unusuals
+    + configure_flag_unusuals + malloc_conf_unusuals
 )
 
 unusual_combinations_to_test = []
@@ -71,6 +75,14 @@ for unusual_combination in unusual_combinations_to_test:
     configure_flags = [
         x for x in unusual_combination if x in configure_flag_unusuals]
 
+    malloc_conf = [
+        x for x in unusual_combination if x in malloc_conf_unusuals]
+    # Filter out an unsupported configuration - dss on OS X.
+    if os == 'osx' and 'dss:primary' in malloc_conf:
+        continue
+    if len(malloc_conf) > 0:
+        configure_flags.append('--with-malloc-conf=' + ",".join(malloc_conf))
+
     # Filter out an unsupported configuration - heap profiling on OS X.
     if os == 'osx' and '--enable-prof' in configure_flags:
         continue

From 196a53c2aec9fd9fbd17af6803d75c7f631c1ae3 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 16 May 2017 13:25:17 -0700
Subject: [PATCH 0880/2608] Do not assume dss never decreases.

An sbrk() caller outside jemalloc can decrease the dss, so add a
separate atomic boolean to explicitly track whether jemalloc is
concurrently calling sbrk(), rather than depending on state outside
jemalloc's full control.

This resolves #802.
---
 src/extent_dss.c | 72 +++++++++++++++++++++++-------------------------
 1 file changed, 34 insertions(+), 38 deletions(-)

diff --git a/src/extent_dss.c b/src/extent_dss.c
index 6b5d066f..8e0ca654 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -28,6 +28,8 @@ static atomic_u_t	dss_prec_default = ATOMIC_INIT(
 
 /* Base address of the DSS. */
 static void		*dss_base;
+/* Atomic boolean indicating whether a thread is currently extending DSS. */
+static atomic_b_t	dss_extending;
 /* Atomic boolean indicating whether the DSS is exhausted. */
 static atomic_b_t	dss_exhausted;
 /* Atomic current upper limit on DSS addresses. */
@@ -65,37 +67,41 @@ extent_dss_prec_set(dss_prec_t dss_prec) {
 	return false;
 }
 
+static void
+extent_dss_extending_start(void) {
+	spin_t spinner = SPIN_INITIALIZER;
+	while (true) {
+		bool expected = false;
+		if (atomic_compare_exchange_weak_b(&dss_extending, &expected,
+		    true, ATOMIC_ACQ_REL, ATOMIC_RELAXED)) {
+			break;
+		}
+		spin_adaptive(&spinner);
+	}
+}
+
+static void
+extent_dss_extending_finish(void) {
+	assert(atomic_load_b(&dss_extending, ATOMIC_RELAXED));
+
+	atomic_store_b(&dss_extending, false, ATOMIC_RELEASE);
+}
+
 static void *
 extent_dss_max_update(void *new_addr) {
-	void *max_cur;
-
 	/*
 	 * Get the current end of the DSS as max_cur and assure that dss_max is
 	 * up to date.
 	 */
-	spin_t spinner = SPIN_INITIALIZER;
-	while (true) {
-		void *max_prev = atomic_load_p(&dss_max, ATOMIC_RELAXED);
-
-		max_cur = extent_dss_sbrk(0);
-		if ((uintptr_t)max_prev > (uintptr_t)max_cur) {
-			/*
-			 * Another thread optimistically updated dss_max.  Wait
-			 * for it to finish.
-			 */
-			spin_adaptive(&spinner);
-			continue;
-		}
-		if (atomic_compare_exchange_weak_p(&dss_max, &max_prev,
-		    max_cur, ATOMIC_ACQ_REL, ATOMIC_RELAXED)) {
-			break;
-		}
+	void *max_cur = extent_dss_sbrk(0);
+	if (max_cur == (void *)-1) {
+		return NULL;
 	}
+	atomic_store_p(&dss_max, max_cur, ATOMIC_RELEASE);
 	/* Fixed new_addr can only be supported if it is at the edge of DSS. */
 	if (new_addr != NULL && max_cur != new_addr) {
 		return NULL;
 	}
-
 	return max_cur;
 }
 
@@ -121,6 +127,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 		return NULL;
 	}
 
+	extent_dss_extending_start();
 	if (!atomic_load_b(&dss_exhausted, ATOMIC_ACQUIRE)) {
 		/*
 		 * The loop is necessary to recover from races with other
@@ -168,21 +175,14 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			assert((uintptr_t)max_cur + incr == (uintptr_t)ret +
 			    size);
 
-			/*
-			 * Optimistically update dss_max, and roll back below if
-			 * sbrk() fails.  No other thread will try to extend the
-			 * DSS while dss_max is greater than the current DSS
-			 * max reported by sbrk(0).
-			 */
-			if (!atomic_compare_exchange_weak_p(&dss_max, &max_cur,
-			    dss_next, ATOMIC_ACQ_REL, ATOMIC_RELAXED)) {
-				continue;
-			}
-
 			/* Try to allocate. */
 			void *dss_prev = extent_dss_sbrk(incr);
 			if (dss_prev == max_cur) {
 				/* Success. */
+				atomic_store_p(&dss_max, dss_next,
+				    ATOMIC_RELEASE);
+				extent_dss_extending_finish();
+
 				if (gap_size_page != 0) {
 					extent_dalloc_gap(tsdn, arena, gap);
 				} else {
@@ -209,14 +209,8 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			}
 			/*
 			 * Failure, whether due to OOM or a race with a raw
-			 * sbrk() call from outside the allocator.  Try to roll
-			 * back optimistic dss_max update; if rollback fails,
-			 * it's due to another caller of this function having
-			 * succeeded since this invocation started, in which
-			 * case rollback is not necessary.
+			 * sbrk() call from outside the allocator.
 			 */
-			atomic_compare_exchange_strong_p(&dss_max, &dss_next,
-			    max_cur, ATOMIC_ACQ_REL, ATOMIC_RELAXED);
 			if (dss_prev == (void *)-1) {
 				/* OOM. */
 				atomic_store_b(&dss_exhausted, true,
@@ -226,6 +220,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 		}
 	}
 label_oom:
+	extent_dss_extending_finish();
 	extent_dalloc(tsdn, arena, gap);
 	return NULL;
 }
@@ -265,6 +260,7 @@ extent_dss_boot(void) {
 	cassert(have_dss);
 
 	dss_base = extent_dss_sbrk(0);
+	atomic_store_b(&dss_extending, false, ATOMIC_RELAXED);
 	atomic_store_b(&dss_exhausted, dss_base == (void *)-1, ATOMIC_RELAXED);
 	atomic_store_p(&dss_max, dss_base, ATOMIC_RELAXED);
 }

From 36195c8f4dc536f3609de539c5f26cdd44514800 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 23 May 2017 10:53:36 -0700
Subject: [PATCH 0881/2608] Disable percpu_arena by default.

---
 .travis.yml                             | 20 ++++++++++++++++++++
 doc/jemalloc.xml.in                     |  4 ++--
 include/jemalloc/internal/arena_types.h |  5 -----
 scripts/gen_run_tests.py                | 12 ++++++++++--
 scripts/gen_travis.py                   |  6 ++++--
 5 files changed, 36 insertions(+), 11 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 6ca3ac44..917314fa 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -24,6 +24,8 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
@@ -50,6 +52,8 @@ matrix:
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
       addons:
@@ -80,6 +84,12 @@ matrix:
         apt:
           packages:
             - gcc-multilib
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      addons:
+        apt:
+          packages:
+            - gcc-multilib
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
@@ -88,18 +98,28 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
 
 
 before_script:
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 57f5b0ba..16d9ce4e 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -952,8 +952,8 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         same CPU share one arena.  Note that no runtime checking regarding the
         availability of hyper threading is done at the moment.  When set to
         <quote>disabled</quote>, narenas and thread to arena association will
-        not be impacted by this option.  The default is
-        <quote>percpu</quote>. </para></listitem>
+        not be impacted by this option.  The default is <quote>disabled</quote>.
+        </para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.background_thread">
diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h
index 34d4f6f2..1374eeca 100644
--- a/include/jemalloc/internal/arena_types.h
+++ b/include/jemalloc/internal/arena_types.h
@@ -29,12 +29,7 @@ typedef enum {
 	percpu_arena_mode_limit = 3
 } percpu_arena_mode_t;
 
-#ifdef JEMALLOC_PERCPU_ARENA
-#define PERCPU_ARENA_MODE_DEFAULT	percpu_arena
-#define OPT_PERCPU_ARENA_DEFAULT	"percpu"
-#else
 #define PERCPU_ARENA_MODE_DEFAULT	percpu_arena_disabled
 #define OPT_PERCPU_ARENA_DEFAULT	"disabled"
-#endif
 
 #endif /* JEMALLOC_INTERNAL_ARENA_TYPES_H */
diff --git a/scripts/gen_run_tests.py b/scripts/gen_run_tests.py
index 9fbf71e5..875c6d03 100755
--- a/scripts/gen_run_tests.py
+++ b/scripts/gen_run_tests.py
@@ -23,6 +23,7 @@ possible_config_opts = [
 possible_malloc_conf_opts = [
     'tcache:false',
     'dss:primary',
+    'percpu_arena:percpu',
 ]
 
 print 'set -e'
@@ -45,13 +46,20 @@ for cc, cxx in possible_compilers:
                     ",".join(malloc_conf_opts) if len(malloc_conf_opts) > 0
                     else '')
                 )
+
+                # Per CPU arenas are only supported on Linux.
+                linux_supported = ('percpu_arena:percpu' in malloc_conf_opts)
                 # Heap profiling and dss are not supported on OS X.
                 darwin_unsupported = ('--enable-prof' in config_opts or \
                   'dss:primary' in malloc_conf_opts)
-                if darwin_unsupported:
+                if linux_supported:
+                    print 'if [[ "$unamestr" = "Linux" ]]; then'
+                elif darwin_unsupported:
                     print 'if [[ "$unamestr" != "Darwin" ]]; then'
+
                 print config_line
                 print "make clean"
                 print "make -j" + str(MAKE_J_VAL) + " check"
-                if darwin_unsupported:
+
+                if linux_supported or darwin_unsupported:
                     print 'fi'
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index 4a4c2144..26997b25 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -48,6 +48,7 @@ configure_flag_unusuals = [
 malloc_conf_unusuals = [
     'tcache:false',
     'dss:primary',
+    'percpu_arena:percpu',
 ]
 
 all_unusuals = (
@@ -77,8 +78,9 @@ for unusual_combination in unusual_combinations_to_test:
 
     malloc_conf = [
         x for x in unusual_combination if x in malloc_conf_unusuals]
-    # Filter out an unsupported configuration - dss on OS X.
-    if os == 'osx' and 'dss:primary' in malloc_conf:
+    # Filter out unsupported configurations on OS X.
+    if os == 'osx' and ('dss:primary' in malloc_conf or \
+      'percpu_arena:percpu' in malloc_conf):
         continue
     if len(malloc_conf) > 0:
         configure_flags.append('--with-malloc-conf=' + ",".join(malloc_conf))

From 9f822a1fd70a676d810a4c621346d6484b29cb77 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 22 May 2017 19:32:04 -0700
Subject: [PATCH 0882/2608] Header refactoring: unify and de-catchall witness
 code.

---
 .../internal/jemalloc_internal_includes.h     |   4 -
 .../internal/jemalloc_internal_inlines_c.h    |  22 +-
 include/jemalloc/internal/mutex_inlines.h     |  14 +-
 .../jemalloc/internal/mutex_pool_inlines.h    |   1 +
 include/jemalloc/internal/mutex_structs.h     |   3 +-
 include/jemalloc/internal/tsd.h               |  98 ++---
 include/jemalloc/internal/witness.h           | 345 ++++++++++++++++++
 include/jemalloc/internal/witness_externs.h   |  25 --
 include/jemalloc/internal/witness_inlines.h   | 188 ----------
 include/jemalloc/internal/witness_structs.h   |  28 --
 include/jemalloc/internal/witness_types.h     |  70 ----
 src/arena.c                                   |  18 +-
 src/extent.c                                  |  54 ++-
 src/jemalloc.c                                |  78 ++--
 src/tcache.c                                  |   4 +-
 src/tsd.c                                     |   2 +-
 src/witness.c                                 |  18 +-
 test/unit/witness.c                           | 168 ++++-----
 18 files changed, 602 insertions(+), 538 deletions(-)
 create mode 100644 include/jemalloc/internal/witness.h
 delete mode 100644 include/jemalloc/internal/witness_externs.h
 delete mode 100644 include/jemalloc/internal/witness_inlines.h
 delete mode 100644 include/jemalloc/internal/witness_structs.h
 delete mode 100644 include/jemalloc/internal/witness_types.h

diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index 45e648bc..2abc4781 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -40,7 +40,6 @@
 /* TYPES */
 /******************************************************************************/
 
-#include "jemalloc/internal/witness_types.h"
 #include "jemalloc/internal/mutex_types.h"
 #include "jemalloc/internal/extent_types.h"
 #include "jemalloc/internal/extent_dss_types.h"
@@ -54,7 +53,6 @@
 /* STRUCTS */
 /******************************************************************************/
 
-#include "jemalloc/internal/witness_structs.h"
 #include "jemalloc/internal/mutex_structs.h"
 #include "jemalloc/internal/mutex_pool_structs.h"
 #include "jemalloc/internal/arena_structs_a.h"
@@ -72,7 +70,6 @@
 /******************************************************************************/
 
 #include "jemalloc/internal/jemalloc_internal_externs.h"
-#include "jemalloc/internal/witness_externs.h"
 #include "jemalloc/internal/mutex_externs.h"
 #include "jemalloc/internal/extent_externs.h"
 #include "jemalloc/internal/extent_dss_externs.h"
@@ -89,7 +86,6 @@
 /* INLINES */
 /******************************************************************************/
 
-#include "jemalloc/internal/witness_inlines.h"
 #include "jemalloc/internal/mutex_inlines.h"
 #include "jemalloc/internal/mutex_pool_inlines.h"
 #include "jemalloc/internal/jemalloc_internal_inlines_a.h"
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 415c503b..80dfbeff 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_INLINES_C_H
 
 #include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/witness.h"
 
 JEMALLOC_ALWAYS_INLINE arena_t *
 iaalloc(tsdn_t *tsdn, const void *ptr) {
@@ -25,7 +26,8 @@ iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
 	assert(size != 0);
 	assert(!is_internal || tcache == NULL);
 	assert(!is_internal || arena == NULL || arena_is_auto(arena));
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
 
 	ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path);
 	if (config_stats && is_internal && likely(ret != NULL)) {
@@ -49,7 +51,8 @@ ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
 	assert(usize == sa2u(usize, alignment));
 	assert(!is_internal || tcache == NULL);
 	assert(!is_internal || arena == NULL || arena_is_auto(arena));
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
 
 	ret = arena_palloc(tsdn, arena, usize, alignment, zero, tcache);
 	assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret);
@@ -82,7 +85,8 @@ idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, alloc_ctx_t *alloc_ctx,
 	assert(ptr != NULL);
 	assert(!is_internal || tcache == NULL);
 	assert(!is_internal || arena_is_auto(iaalloc(tsdn, ptr)));
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
 	if (config_stats && is_internal) {
 		arena_internal_sub(iaalloc(tsdn, ptr), isalloc(tsdn, ptr));
 	}
@@ -100,7 +104,8 @@ idalloc(tsd_t *tsd, void *ptr) {
 JEMALLOC_ALWAYS_INLINE void
 isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
     alloc_ctx_t *alloc_ctx, bool slow_path) {
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
 	arena_sdalloc(tsdn, ptr, size, tcache, alloc_ctx, slow_path);
 }
 
@@ -108,7 +113,8 @@ JEMALLOC_ALWAYS_INLINE void *
 iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t extra, size_t alignment, bool zero, tcache_t *tcache,
     arena_t *arena) {
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
 	void *p;
 	size_t usize, copysize;
 
@@ -146,7 +152,8 @@ iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment,
     bool zero, tcache_t *tcache, arena_t *arena) {
 	assert(ptr != NULL);
 	assert(size != 0);
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
 
 	if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
 	    != 0) {
@@ -174,7 +181,8 @@ ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra,
     size_t alignment, bool zero) {
 	assert(ptr != NULL);
 	assert(size != 0);
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
 
 	if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
 	    != 0) {
diff --git a/include/jemalloc/internal/mutex_inlines.h b/include/jemalloc/internal/mutex_inlines.h
index babe8d3a..b86a4ad4 100644
--- a/include/jemalloc/internal/mutex_inlines.h
+++ b/include/jemalloc/internal/mutex_inlines.h
@@ -31,14 +31,14 @@ mutex_owner_stats_update(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 /* Trylock: return false if the lock is successfully acquired. */
 static inline bool
 malloc_mutex_trylock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
-	witness_assert_not_owner(tsdn, &mutex->witness);
+	witness_assert_not_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
 	if (isthreaded) {
 		if (malloc_mutex_trylock_final(mutex)) {
 			return true;
 		}
 		mutex_owner_stats_update(tsdn, mutex);
 	}
-	witness_lock(tsdn, &mutex->witness);
+	witness_lock(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
 
 	return false;
 }
@@ -69,19 +69,19 @@ malloc_mutex_prof_merge(mutex_prof_data_t *sum, mutex_prof_data_t *data) {
 
 static inline void
 malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
-	witness_assert_not_owner(tsdn, &mutex->witness);
+	witness_assert_not_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
 	if (isthreaded) {
 		if (malloc_mutex_trylock_final(mutex)) {
 			malloc_mutex_lock_slow(mutex);
 		}
 		mutex_owner_stats_update(tsdn, mutex);
 	}
-	witness_lock(tsdn, &mutex->witness);
+	witness_lock(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
 }
 
 static inline void
 malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
-	witness_unlock(tsdn, &mutex->witness);
+	witness_unlock(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
 	if (isthreaded) {
 		MALLOC_MUTEX_UNLOCK(mutex);
 	}
@@ -89,12 +89,12 @@ malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 
 static inline void
 malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) {
-	witness_assert_owner(tsdn, &mutex->witness);
+	witness_assert_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
 }
 
 static inline void
 malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) {
-	witness_assert_not_owner(tsdn, &mutex->witness);
+	witness_assert_not_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
 }
 
 /* Copy the prof data from mutex for processing. */
diff --git a/include/jemalloc/internal/mutex_pool_inlines.h b/include/jemalloc/internal/mutex_pool_inlines.h
index 0b667aaa..bc257ea8 100644
--- a/include/jemalloc/internal/mutex_pool_inlines.h
+++ b/include/jemalloc/internal/mutex_pool_inlines.h
@@ -4,6 +4,7 @@
 #include "jemalloc/internal/hash.h"
 #include "jemalloc/internal/mutex_inlines.h"
 #include "jemalloc/internal/mutex_pool_structs.h"
+#include "jemalloc/internal/witness.h"
 
 /*
  * This file really combines "inlines" and "externs", but only transitionally.
diff --git a/include/jemalloc/internal/mutex_structs.h b/include/jemalloc/internal/mutex_structs.h
index 92f41676..c1b65522 100644
--- a/include/jemalloc/internal/mutex_structs.h
+++ b/include/jemalloc/internal/mutex_structs.h
@@ -3,8 +3,7 @@
 
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/mutex_prof.h"
-#include "jemalloc/internal/witness_types.h"
-#include "jemalloc/internal/witness_structs.h"
+#include "jemalloc/internal/witness.h"
 
 struct malloc_mutex_s {
 	union {
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 1a269755..c192a6ca 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -10,8 +10,7 @@
 #include "jemalloc/internal/tcache_types.h"
 #include "jemalloc/internal/tcache_structs.h"
 #include "jemalloc/internal/util.h"
-#include "jemalloc/internal/witness_types.h"
-#include "jemalloc/internal/witness_structs.h"
+#include "jemalloc/internal/witness.h"
 
 /*
  * Thread-Specific-Data layout
@@ -52,30 +51,29 @@
 typedef void (*test_callback_t)(int *);
 #  define MALLOC_TSD_TEST_DATA_INIT 0x72b65c10
 #  define MALLOC_TEST_TSD \
-    O(test_data,		int)					\
-    O(test_callback,		test_callback_t)
+    O(test_data,		int,			int)		\
+    O(test_callback,		test_callback_t,	int)
 #  define MALLOC_TEST_TSD_INITIALIZER , MALLOC_TSD_TEST_DATA_INIT, NULL
 #else
 #  define MALLOC_TEST_TSD
 #  define MALLOC_TEST_TSD_INITIALIZER
 #endif
 
+/*  O(name,			type,			nullable type */
 #define MALLOC_TSD							\
-/*  O(name,			type) */ 				\
-    O(tcache_enabled,		bool)					\
-    O(arenas_tdata_bypass,	bool)					\
-    O(reentrancy_level,		int8_t)					\
-    O(narenas_tdata,		uint32_t)				\
-    O(thread_allocated,		uint64_t)				\
-    O(thread_deallocated,	uint64_t)				\
-    O(prof_tdata,		prof_tdata_t *)				\
-    O(rtree_ctx,		rtree_ctx_t)				\
-    O(iarena,			arena_t *)				\
-    O(arena,			arena_t *)				\
-    O(arenas_tdata,		arena_tdata_t *)			\
-    O(tcache,			tcache_t)				\
-    O(witnesses,		witness_list_t)				\
-    O(witness_fork,		bool)					\
+    O(tcache_enabled,		bool,			bool)		\
+    O(arenas_tdata_bypass,	bool,			bool)		\
+    O(reentrancy_level,		int8_t,			int8_t)		\
+    O(narenas_tdata,		uint32_t,		uint32_t)	\
+    O(thread_allocated,		uint64_t,		uint64_t)	\
+    O(thread_deallocated,	uint64_t,		uint64_t)	\
+    O(prof_tdata,		prof_tdata_t *,		prof_tdata_t *)	\
+    O(rtree_ctx,		rtree_ctx_t,		rtree_ctx_t)	\
+    O(iarena,			arena_t *,		arena_t *)	\
+    O(arena,			arena_t *,		arena_t *)	\
+    O(arenas_tdata,		arena_tdata_t *,	arena_tdata_t *)\
+    O(tcache,			tcache_t,		tcache_t)	\
+    O(witness_tsd,              witness_tsd_t,		witness_tsdn_t)	\
     MALLOC_TEST_TSD
 
 #define TSD_INITIALIZER {						\
@@ -92,8 +90,7 @@ typedef void (*test_callback_t)(int *);
     NULL,								\
     NULL,								\
     TCACHE_ZERO_INITIALIZER,						\
-    ql_head_initializer(witnesses),					\
-    false								\
+    WITNESS_TSD_INITIALIZER						\
     MALLOC_TEST_TSD_INITIALIZER						\
 }
 
@@ -119,7 +116,7 @@ struct tsd_s {
 	 * setters below.
 	 */
 	tsd_state_t	state;
-#define O(n, t)								\
+#define O(n, t, nt)							\
 	t use_a_getter_or_setter_instead_##n;
 MALLOC_TSD
 #undef O
@@ -135,6 +132,22 @@ struct tsdn_s {
 	tsd_t tsd;
 };
 #define TSDN_NULL ((tsdn_t *)0)
+JEMALLOC_ALWAYS_INLINE tsdn_t *
+tsd_tsdn(tsd_t *tsd) {
+	return (tsdn_t *)tsd;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsdn_null(const tsdn_t *tsdn) {
+	return tsdn == NULL;
+}
+
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsdn_tsd(tsdn_t *tsdn) {
+	assert(!tsdn_null(tsdn));
+
+	return &tsdn->tsd;
+}
 
 void *malloc_tsd_malloc(size_t size);
 void malloc_tsd_dalloc(void *wrapper);
@@ -166,7 +179,7 @@ void tsd_slow_update(tsd_t *tsd);
  * foo.  This omits some safety checks, and so can be used during tsd
  * initialization and cleanup.
  */
-#define O(n, t)								\
+#define O(n, t, nt)							\
 JEMALLOC_ALWAYS_INLINE t *						\
 tsd_##n##p_get_unsafe(tsd_t *tsd) {					\
 	return &tsd->use_a_getter_or_setter_instead_##n;		\
@@ -175,7 +188,7 @@ MALLOC_TSD
 #undef O
 
 /* tsd_foop_get(tsd) returns a pointer to the thread-local instance of foo. */
-#define O(n, t)								\
+#define O(n, t, nt)							\
 JEMALLOC_ALWAYS_INLINE t *						\
 tsd_##n##p_get(tsd_t *tsd) {						\
 	assert(tsd->state == tsd_state_nominal ||			\
@@ -186,8 +199,24 @@ tsd_##n##p_get(tsd_t *tsd) {						\
 MALLOC_TSD
 #undef O
 
+/*
+ * tsdn_foop_get(tsdn) returns either the thread-local instance of foo (if tsdn
+ * isn't NULL), or NULL (if tsdn is NULL), cast to the nullable pointer type.
+ */
+#define O(n, t, nt)							\
+JEMALLOC_ALWAYS_INLINE nt *						\
+tsdn_##n##p_get(tsdn_t *tsdn) {						\
+	if (tsdn_null(tsdn)) {						\
+		return NULL;						\
+	}								\
+	tsd_t *tsd = tsdn_tsd(tsdn);					\
+	return (nt *)tsd_##n##p_get(tsd);				\
+}
+MALLOC_TSD
+#undef O
+
 /* tsd_foo_get(tsd) returns the value of the thread-local instance of foo. */
-#define O(n, t)								\
+#define O(n, t, nt)							\
 JEMALLOC_ALWAYS_INLINE t						\
 tsd_##n##_get(tsd_t *tsd) {						\
 	return *tsd_##n##p_get(tsd);					\
@@ -196,7 +225,7 @@ MALLOC_TSD
 #undef O
 
 /* tsd_foo_set(tsd, val) updates the thread-local instance of foo to be val. */
-#define O(n, t)								\
+#define O(n, t, nt)							\
 JEMALLOC_ALWAYS_INLINE void						\
 tsd_##n##_set(tsd_t *tsd, t val) {					\
 	*tsd_##n##p_get(tsd) = val;					\
@@ -243,11 +272,6 @@ tsd_fetch(void) {
 	return tsd_fetch_impl(true);
 }
 
-JEMALLOC_ALWAYS_INLINE tsdn_t *
-tsd_tsdn(tsd_t *tsd) {
-	return (tsdn_t *)tsd;
-}
-
 static inline bool
 tsd_nominal(tsd_t *tsd) {
 	return (tsd->state <= tsd_state_nominal_max);
@@ -262,18 +286,6 @@ tsdn_fetch(void) {
 	return tsd_tsdn(tsd_fetch_impl(false));
 }
 
-JEMALLOC_ALWAYS_INLINE bool
-tsdn_null(const tsdn_t *tsdn) {
-	return tsdn == NULL;
-}
-
-JEMALLOC_ALWAYS_INLINE tsd_t *
-tsdn_tsd(tsdn_t *tsdn) {
-	assert(!tsdn_null(tsdn));
-
-	return &tsdn->tsd;
-}
-
 JEMALLOC_ALWAYS_INLINE rtree_ctx_t *
 tsd_rtree_ctx(tsd_t *tsd) {
 	return tsd_rtree_ctxp_get(tsd);
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
new file mode 100644
index 00000000..c71911f2
--- /dev/null
+++ b/include/jemalloc/internal/witness.h
@@ -0,0 +1,345 @@
+#ifndef JEMALLOC_INTERNAL_WITNESS_H
+#define JEMALLOC_INTERNAL_WITNESS_H
+
+#include "jemalloc/internal/ql.h"
+
+/******************************************************************************/
+/* LOCK RANKS */
+/******************************************************************************/
+
+/*
+ * Witnesses with rank WITNESS_RANK_OMIT are completely ignored by the witness
+ * machinery.
+ */
+
+#define WITNESS_RANK_OMIT		0U
+
+#define WITNESS_RANK_MIN		1U
+
+#define WITNESS_RANK_INIT		1U
+#define WITNESS_RANK_CTL		1U
+#define WITNESS_RANK_TCACHES		2U
+#define WITNESS_RANK_ARENAS		3U
+
+#define WITNESS_RANK_BACKGROUND_THREAD_GLOBAL	4U
+
+#define WITNESS_RANK_PROF_DUMP		5U
+#define WITNESS_RANK_PROF_BT2GCTX	6U
+#define WITNESS_RANK_PROF_TDATAS	7U
+#define WITNESS_RANK_PROF_TDATA		8U
+#define WITNESS_RANK_PROF_GCTX		9U
+
+#define WITNESS_RANK_BACKGROUND_THREAD	10U
+
+/*
+ * Used as an argument to witness_assert_depth_to_rank() in order to validate
+ * depth excluding non-core locks with lower ranks.  Since the rank argument to
+ * witness_assert_depth_to_rank() is inclusive rather than exclusive, this
+ * definition can have the same value as the minimally ranked core lock.
+ */
+#define WITNESS_RANK_CORE		11U
+
+#define WITNESS_RANK_DECAY		11U
+#define WITNESS_RANK_TCACHE_QL		12U
+#define WITNESS_RANK_EXTENTS		13U
+#define WITNESS_RANK_EXTENT_FREELIST	14U
+
+#define WITNESS_RANK_EXTENT_POOL	15U
+#define WITNESS_RANK_RTREE		16U
+#define WITNESS_RANK_BASE		17U
+#define WITNESS_RANK_ARENA_LARGE	18U
+
+#define WITNESS_RANK_LEAF		0xffffffffU
+#define WITNESS_RANK_ARENA_BIN		WITNESS_RANK_LEAF
+#define WITNESS_RANK_ARENA_STATS	WITNESS_RANK_LEAF
+#define WITNESS_RANK_DSS		WITNESS_RANK_LEAF
+#define WITNESS_RANK_PROF_ACTIVE	WITNESS_RANK_LEAF
+#define WITNESS_RANK_PROF_ACCUM		WITNESS_RANK_LEAF
+#define WITNESS_RANK_PROF_DUMP_SEQ	WITNESS_RANK_LEAF
+#define WITNESS_RANK_PROF_GDUMP		WITNESS_RANK_LEAF
+#define WITNESS_RANK_PROF_NEXT_THR_UID	WITNESS_RANK_LEAF
+#define WITNESS_RANK_PROF_THREAD_ACTIVE_INIT	WITNESS_RANK_LEAF
+
+/******************************************************************************/
+/* PER-WITNESS DATA */
+/******************************************************************************/
+#if defined(JEMALLOC_DEBUG)
+#  define WITNESS_INITIALIZER(name, rank) {name, rank, NULL, NULL, {NULL, NULL}}
+#else
+#  define WITNESS_INITIALIZER(name, rank)
+#endif
+
+typedef struct witness_s witness_t;
+typedef unsigned witness_rank_t;
+typedef ql_head(witness_t) witness_list_t;
+typedef int witness_comp_t (const witness_t *, void *, const witness_t *,
+    void *);
+
+struct witness_s {
+	/* Name, used for printing lock order reversal messages. */
+	const char		*name;
+
+	/*
+	 * Witness rank, where 0 is lowest and UINT_MAX is highest.  Witnesses
+	 * must be acquired in order of increasing rank.
+	 */
+	witness_rank_t		rank;
+
+	/*
+	 * If two witnesses are of equal rank and they have the samp comp
+	 * function pointer, it is called as a last attempt to differentiate
+	 * between witnesses of equal rank.
+	 */
+	witness_comp_t		*comp;
+
+	/* Opaque data, passed to comp(). */
+	void			*opaque;
+
+	/* Linkage for thread's currently owned locks. */
+	ql_elm(witness_t)	link;
+};
+
+/******************************************************************************/
+/* PER-THREAD DATA */
+/******************************************************************************/
+typedef struct witness_tsd_s witness_tsd_t;
+struct witness_tsd_s {
+	witness_list_t witnesses;
+	bool forking;
+};
+
+#define WITNESS_TSD_INITIALIZER { ql_head_initializer(witnesses), false }
+#define WITNESS_TSDN_NULL ((witness_tsdn_t *)0)
+
+/******************************************************************************/
+/* (PER-THREAD) NULLABILITY HELPERS */
+/******************************************************************************/
+typedef struct witness_tsdn_s witness_tsdn_t;
+struct witness_tsdn_s {
+	witness_tsd_t witness_tsd;
+};
+
+JEMALLOC_ALWAYS_INLINE witness_tsdn_t *
+witness_tsd_tsdn(witness_tsd_t *witness_tsd) {
+	return (witness_tsdn_t *)witness_tsd;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+witness_tsdn_null(witness_tsdn_t *witness_tsdn) {
+	return witness_tsdn == NULL;
+}
+
+JEMALLOC_ALWAYS_INLINE witness_tsd_t *
+witness_tsdn_tsd(witness_tsdn_t *witness_tsdn) {
+	assert(!witness_tsdn_null(witness_tsdn));
+	return &witness_tsdn->witness_tsd;
+}
+
+/******************************************************************************/
+/* API */
+/******************************************************************************/
+void witness_init(witness_t *witness, const char *name, witness_rank_t rank,
+    witness_comp_t *comp, void *opaque);
+
+typedef void (witness_lock_error_t)(const witness_list_t *, const witness_t *);
+extern witness_lock_error_t *JET_MUTABLE witness_lock_error;
+
+typedef void (witness_owner_error_t)(const witness_t *);
+extern witness_owner_error_t *JET_MUTABLE witness_owner_error;
+
+typedef void (witness_not_owner_error_t)(const witness_t *);
+extern witness_not_owner_error_t *JET_MUTABLE witness_not_owner_error;
+
+typedef void (witness_depth_error_t)(const witness_list_t *,
+    witness_rank_t rank_inclusive, unsigned depth);
+extern witness_depth_error_t *JET_MUTABLE witness_depth_error;
+
+void witnesses_cleanup(witness_tsd_t *witness_tsd);
+void witness_prefork(witness_tsd_t *witness_tsd);
+void witness_postfork_parent(witness_tsd_t *witness_tsd);
+void witness_postfork_child(witness_tsd_t *witness_tsd);
+
+/* Helper, not intended for direct use. */
+static inline bool
+witness_owner(witness_tsd_t *witness_tsd, const witness_t *witness) {
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	cassert(config_debug);
+
+	witnesses = &witness_tsd->witnesses;
+	ql_foreach(w, witnesses, link) {
+		if (w == witness) {
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static inline void
+witness_assert_owner(witness_tsdn_t *witness_tsdn, const witness_t *witness) {
+	witness_tsd_t *witness_tsd;
+
+	if (!config_debug) {
+		return;
+	}
+
+	if (witness_tsdn_null(witness_tsdn)) {
+		return;
+	}
+	witness_tsd = witness_tsdn_tsd(witness_tsdn);
+	if (witness->rank == WITNESS_RANK_OMIT) {
+		return;
+	}
+
+	if (witness_owner(witness_tsd, witness)) {
+		return;
+	}
+	witness_owner_error(witness);
+}
+
+static inline void
+witness_assert_not_owner(witness_tsdn_t *witness_tsdn,
+    const witness_t *witness) {
+	witness_tsd_t *witness_tsd;
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	if (!config_debug) {
+		return;
+	}
+
+	if (witness_tsdn_null(witness_tsdn)) {
+		return;
+	}
+	witness_tsd = witness_tsdn_tsd(witness_tsdn);
+	if (witness->rank == WITNESS_RANK_OMIT) {
+		return;
+	}
+
+	witnesses = &witness_tsd->witnesses;
+	ql_foreach(w, witnesses, link) {
+		if (w == witness) {
+			witness_not_owner_error(witness);
+		}
+	}
+}
+
+static inline void
+witness_assert_depth_to_rank(witness_tsdn_t *witness_tsdn,
+    witness_rank_t rank_inclusive, unsigned depth) {
+	witness_tsd_t *witness_tsd;
+	unsigned d;
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	if (!config_debug) {
+		return;
+	}
+
+	if (witness_tsdn_null(witness_tsdn)) {
+		return;
+	}
+	witness_tsd = witness_tsdn_tsd(witness_tsdn);
+
+	d = 0;
+	witnesses = &witness_tsd->witnesses;
+	w = ql_last(witnesses, link);
+	if (w != NULL) {
+		ql_reverse_foreach(w, witnesses, link) {
+			if (w->rank < rank_inclusive) {
+				break;
+			}
+			d++;
+		}
+	}
+	if (d != depth) {
+		witness_depth_error(witnesses, rank_inclusive, depth);
+	}
+}
+
+static inline void
+witness_assert_depth(witness_tsdn_t *witness_tsdn, unsigned depth) {
+	witness_assert_depth_to_rank(witness_tsdn, WITNESS_RANK_MIN, depth);
+}
+
+static inline void
+witness_assert_lockless(witness_tsdn_t *witness_tsdn) {
+	witness_assert_depth(witness_tsdn, 0);
+}
+
+static inline void
+witness_lock(witness_tsdn_t *witness_tsdn, witness_t *witness) {
+	witness_tsd_t *witness_tsd;
+	witness_list_t *witnesses;
+	witness_t *w;
+
+	if (!config_debug) {
+		return;
+	}
+
+	if (witness_tsdn_null(witness_tsdn)) {
+		return;
+	}
+	witness_tsd = witness_tsdn_tsd(witness_tsdn);
+	if (witness->rank == WITNESS_RANK_OMIT) {
+		return;
+	}
+
+	witness_assert_not_owner(witness_tsdn, witness);
+
+	witnesses = &witness_tsd->witnesses;
+	w = ql_last(witnesses, link);
+	if (w == NULL) {
+		/* No other locks; do nothing. */
+	} else if (witness_tsd->forking && w->rank <= witness->rank) {
+		/* Forking, and relaxed ranking satisfied. */
+	} else if (w->rank > witness->rank) {
+		/* Not forking, rank order reversal. */
+		witness_lock_error(witnesses, witness);
+	} else if (w->rank == witness->rank && (w->comp == NULL || w->comp !=
+	    witness->comp || w->comp(w, w->opaque, witness, witness->opaque) >
+	    0)) {
+		/*
+		 * Missing/incompatible comparison function, or comparison
+		 * function indicates rank order reversal.
+		 */
+		witness_lock_error(witnesses, witness);
+	}
+
+	ql_elm_new(witness, link);
+	ql_tail_insert(witnesses, witness, link);
+}
+
+static inline void
+witness_unlock(witness_tsdn_t *witness_tsdn, witness_t *witness) {
+	witness_tsd_t *witness_tsd;
+	witness_list_t *witnesses;
+
+	if (!config_debug) {
+		return;
+	}
+
+	if (witness_tsdn_null(witness_tsdn)) {
+		return;
+	}
+	witness_tsd = witness_tsdn_tsd(witness_tsdn);
+	if (witness->rank == WITNESS_RANK_OMIT) {
+		return;
+	}
+
+	/*
+	 * Check whether owner before removal, rather than relying on
+	 * witness_assert_owner() to abort, so that unit tests can test this
+	 * function's failure mode without causing undefined behavior.
+	 */
+	if (witness_owner(witness_tsd, witness)) {
+		witnesses = &witness_tsd->witnesses;
+		ql_remove(witnesses, witness, link);
+	} else {
+		witness_assert_owner(witness_tsdn, witness);
+	}
+}
+
+#endif /* JEMALLOC_INTERNAL_WITNESS_H */
diff --git a/include/jemalloc/internal/witness_externs.h b/include/jemalloc/internal/witness_externs.h
deleted file mode 100644
index 99df4c50..00000000
--- a/include/jemalloc/internal/witness_externs.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_WITNESS_EXTERNS_H
-#define JEMALLOC_INTERNAL_WITNESS_EXTERNS_H
-
-void witness_init(witness_t *witness, const char *name, witness_rank_t rank,
-    witness_comp_t *comp, void *opaque);
-
-typedef void (witness_lock_error_t)(const witness_list_t *, const witness_t *);
-extern witness_lock_error_t *JET_MUTABLE witness_lock_error;
-
-typedef void (witness_owner_error_t)(const witness_t *);
-extern witness_owner_error_t *JET_MUTABLE witness_owner_error;
-
-typedef void (witness_not_owner_error_t)(const witness_t *);
-extern witness_not_owner_error_t *JET_MUTABLE witness_not_owner_error;
-
-typedef void (witness_depth_error_t)(const witness_list_t *,
-    witness_rank_t rank_inclusive, unsigned depth);
-extern witness_depth_error_t *JET_MUTABLE witness_depth_error;
-
-void witnesses_cleanup(tsd_t *tsd);
-void witness_prefork(tsd_t *tsd);
-void witness_postfork_parent(tsd_t *tsd);
-void witness_postfork_child(tsd_t *tsd);
-
-#endif /* JEMALLOC_INTERNAL_WITNESS_EXTERNS_H */
diff --git a/include/jemalloc/internal/witness_inlines.h b/include/jemalloc/internal/witness_inlines.h
deleted file mode 100644
index 51d1af38..00000000
--- a/include/jemalloc/internal/witness_inlines.h
+++ /dev/null
@@ -1,188 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_WITNESS_INLINES_H
-#define JEMALLOC_INTERNAL_WITNESS_INLINES_H
-
-#include "jemalloc/internal/ql.h"
-
-/* Helper, not intended for direct use. */
-static inline bool
-witness_owner(tsd_t *tsd, const witness_t *witness) {
-	witness_list_t *witnesses;
-	witness_t *w;
-
-	cassert(config_debug);
-
-	witnesses = tsd_witnessesp_get(tsd);
-	ql_foreach(w, witnesses, link) {
-		if (w == witness) {
-			return true;
-		}
-	}
-
-	return false;
-}
-
-static inline void
-witness_assert_owner(tsdn_t *tsdn, const witness_t *witness) {
-	tsd_t *tsd;
-
-	if (!config_debug) {
-		return;
-	}
-
-	if (tsdn_null(tsdn)) {
-		return;
-	}
-	tsd = tsdn_tsd(tsdn);
-	if (witness->rank == WITNESS_RANK_OMIT) {
-		return;
-	}
-
-	if (witness_owner(tsd, witness)) {
-		return;
-	}
-	witness_owner_error(witness);
-}
-
-static inline void
-witness_assert_not_owner(tsdn_t *tsdn, const witness_t *witness) {
-	tsd_t *tsd;
-	witness_list_t *witnesses;
-	witness_t *w;
-
-	if (!config_debug) {
-		return;
-	}
-
-	if (tsdn_null(tsdn)) {
-		return;
-	}
-	tsd = tsdn_tsd(tsdn);
-	if (witness->rank == WITNESS_RANK_OMIT) {
-		return;
-	}
-
-	witnesses = tsd_witnessesp_get(tsd);
-	ql_foreach(w, witnesses, link) {
-		if (w == witness) {
-			witness_not_owner_error(witness);
-		}
-	}
-}
-
-static inline void
-witness_assert_depth_to_rank(tsdn_t *tsdn, witness_rank_t rank_inclusive,
-    unsigned depth) {
-	tsd_t *tsd;
-	unsigned d;
-	witness_list_t *witnesses;
-	witness_t *w;
-
-	if (!config_debug) {
-		return;
-	}
-
-	if (tsdn_null(tsdn)) {
-		return;
-	}
-	tsd = tsdn_tsd(tsdn);
-
-	d = 0;
-	witnesses = tsd_witnessesp_get(tsd);
-	w = ql_last(witnesses, link);
-	if (w != NULL) {
-		ql_reverse_foreach(w, witnesses, link) {
-			if (w->rank < rank_inclusive) {
-				break;
-			}
-			d++;
-		}
-	}
-	if (d != depth) {
-		witness_depth_error(witnesses, rank_inclusive, depth);
-	}
-}
-
-static inline void
-witness_assert_depth(tsdn_t *tsdn, unsigned depth) {
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_MIN, depth);
-}
-
-static inline void
-witness_assert_lockless(tsdn_t *tsdn) {
-	witness_assert_depth(tsdn, 0);
-}
-
-static inline void
-witness_lock(tsdn_t *tsdn, witness_t *witness) {
-	tsd_t *tsd;
-	witness_list_t *witnesses;
-	witness_t *w;
-
-	if (!config_debug) {
-		return;
-	}
-
-	if (tsdn_null(tsdn)) {
-		return;
-	}
-	tsd = tsdn_tsd(tsdn);
-	if (witness->rank == WITNESS_RANK_OMIT) {
-		return;
-	}
-
-	witness_assert_not_owner(tsdn, witness);
-
-	witnesses = tsd_witnessesp_get(tsd);
-	w = ql_last(witnesses, link);
-	if (w == NULL) {
-		/* No other locks; do nothing. */
-	} else if (tsd_witness_fork_get(tsd) && w->rank <= witness->rank) {
-		/* Forking, and relaxed ranking satisfied. */
-	} else if (w->rank > witness->rank) {
-		/* Not forking, rank order reversal. */
-		witness_lock_error(witnesses, witness);
-	} else if (w->rank == witness->rank && (w->comp == NULL || w->comp !=
-	    witness->comp || w->comp(w, w->opaque, witness, witness->opaque) >
-	    0)) {
-		/*
-		 * Missing/incompatible comparison function, or comparison
-		 * function indicates rank order reversal.
-		 */
-		witness_lock_error(witnesses, witness);
-	}
-
-	ql_elm_new(witness, link);
-	ql_tail_insert(witnesses, witness, link);
-}
-
-static inline void
-witness_unlock(tsdn_t *tsdn, witness_t *witness) {
-	tsd_t *tsd;
-	witness_list_t *witnesses;
-
-	if (!config_debug) {
-		return;
-	}
-
-	if (tsdn_null(tsdn)) {
-		return;
-	}
-	tsd = tsdn_tsd(tsdn);
-	if (witness->rank == WITNESS_RANK_OMIT) {
-		return;
-	}
-
-	/*
-	 * Check whether owner before removal, rather than relying on
-	 * witness_assert_owner() to abort, so that unit tests can test this
-	 * function's failure mode without causing undefined behavior.
-	 */
-	if (witness_owner(tsd, witness)) {
-		witnesses = tsd_witnessesp_get(tsd);
-		ql_remove(witnesses, witness, link);
-	} else {
-		witness_assert_owner(tsdn, witness);
-	}
-}
-
-#endif /* JEMALLOC_INTERNAL_WITNESS_INLINES_H */
diff --git a/include/jemalloc/internal/witness_structs.h b/include/jemalloc/internal/witness_structs.h
deleted file mode 100644
index 95d19706..00000000
--- a/include/jemalloc/internal/witness_structs.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_WITNESS_STRUCTS_H
-#define JEMALLOC_INTERNAL_WITNESS_STRUCTS_H
-
-struct witness_s {
-	/* Name, used for printing lock order reversal messages. */
-	const char		*name;
-
-	/*
-	 * Witness rank, where 0 is lowest and UINT_MAX is highest.  Witnesses
-	 * must be acquired in order of increasing rank.
-	 */
-	witness_rank_t		rank;
-
-	/*
-	 * If two witnesses are of equal rank and they have the samp comp
-	 * function pointer, it is called as a last attempt to differentiate
-	 * between witnesses of equal rank.
-	 */
-	witness_comp_t		*comp;
-
-	/* Opaque data, passed to comp(). */
-	void			*opaque;
-
-	/* Linkage for thread's currently owned locks. */
-	ql_elm(witness_t)	link;
-};
-
-#endif /* JEMALLOC_INTERNAL_WITNESS_STRUCTS_H */
diff --git a/include/jemalloc/internal/witness_types.h b/include/jemalloc/internal/witness_types.h
deleted file mode 100644
index 28ec7c8c..00000000
--- a/include/jemalloc/internal/witness_types.h
+++ /dev/null
@@ -1,70 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_WITNESS_TYPES_H
-#define JEMALLOC_INTERNAL_WITNESS_TYPES_H
-
-#include "jemalloc/internal/ql.h"
-
-typedef struct witness_s witness_t;
-typedef unsigned witness_rank_t;
-typedef ql_head(witness_t) witness_list_t;
-typedef int witness_comp_t (const witness_t *, void *, const witness_t *,
-    void *);
-
-/*
- * Lock ranks.  Witnesses with rank WITNESS_RANK_OMIT are completely ignored by
- * the witness machinery.
- */
-#define WITNESS_RANK_OMIT		0U
-
-#define WITNESS_RANK_MIN		1U
-
-#define WITNESS_RANK_INIT		1U
-#define WITNESS_RANK_CTL		1U
-#define WITNESS_RANK_TCACHES		2U
-#define WITNESS_RANK_ARENAS		3U
-
-#define WITNESS_RANK_BACKGROUND_THREAD_GLOBAL	4U
-
-#define WITNESS_RANK_PROF_DUMP		5U
-#define WITNESS_RANK_PROF_BT2GCTX	6U
-#define WITNESS_RANK_PROF_TDATAS	7U
-#define WITNESS_RANK_PROF_TDATA		8U
-#define WITNESS_RANK_PROF_GCTX		9U
-
-#define WITNESS_RANK_BACKGROUND_THREAD	10U
-
-/*
- * Used as an argument to witness_assert_depth_to_rank() in order to validate
- * depth excluding non-core locks with lower ranks.  Since the rank argument to
- * witness_assert_depth_to_rank() is inclusive rather than exclusive, this
- * definition can have the same value as the minimally ranked core lock.
- */
-#define WITNESS_RANK_CORE		11U
-
-#define WITNESS_RANK_DECAY		11U
-#define WITNESS_RANK_TCACHE_QL		12U
-#define WITNESS_RANK_EXTENTS		13U
-#define WITNESS_RANK_EXTENT_FREELIST	14U
-
-#define WITNESS_RANK_EXTENT_POOL	15U
-#define WITNESS_RANK_RTREE		16U
-#define WITNESS_RANK_BASE		17U
-#define WITNESS_RANK_ARENA_LARGE	18U
-
-#define WITNESS_RANK_LEAF		0xffffffffU
-#define WITNESS_RANK_ARENA_BIN		WITNESS_RANK_LEAF
-#define WITNESS_RANK_ARENA_STATS	WITNESS_RANK_LEAF
-#define WITNESS_RANK_DSS		WITNESS_RANK_LEAF
-#define WITNESS_RANK_PROF_ACTIVE	WITNESS_RANK_LEAF
-#define WITNESS_RANK_PROF_ACCUM		WITNESS_RANK_LEAF
-#define WITNESS_RANK_PROF_DUMP_SEQ	WITNESS_RANK_LEAF
-#define WITNESS_RANK_PROF_GDUMP		WITNESS_RANK_LEAF
-#define WITNESS_RANK_PROF_NEXT_THR_UID	WITNESS_RANK_LEAF
-#define WITNESS_RANK_PROF_THREAD_ACTIVE_INIT	WITNESS_RANK_LEAF
-
-#if defined(JEMALLOC_DEBUG)
-#  define WITNESS_INITIALIZER(name, rank) {name, rank, NULL, NULL, {NULL, NULL}}
-#else
-#  define WITNESS_INITIALIZER(name, rank)
-#endif
-
-#endif /* JEMALLOC_INTERNAL_WITNESS_TYPES_H */
diff --git a/src/arena.c b/src/arena.c
index 48d536e2..35b4e5a7 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -361,7 +361,8 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 void
 arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent) {
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
 
 	extents_dalloc(tsdn, arena, r_extent_hooks, &arena->extents_dirty,
 	    extent);
@@ -497,7 +498,8 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool *zero) {
 	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
 
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
 
 	szind_t szind = size2index(usize);
 	size_t mapped_add;
@@ -892,7 +894,8 @@ static size_t
 arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extents_t *extents, size_t npages_limit,
     extent_list_t *decay_extents) {
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
 
 	/* Stash extents according to npages_limit. */
 	size_t nstashed = 0;
@@ -978,7 +981,8 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
 static void
 arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
     extents_t *extents, bool all, size_t npages_limit) {
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 1);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 1);
 	malloc_mutex_assert_owner(tsdn, &decay->mtx);
 
 	if (decay->purging) {
@@ -1253,7 +1257,8 @@ arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena,
 	extent_t *slab;
 	bool zero, commit;
 
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
 
 	zero = false;
 	commit = true;
@@ -1271,7 +1276,8 @@ arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena,
 static extent_t *
 arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind,
     const arena_bin_info_t *bin_info) {
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
 
 	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
 	szind_t szind = size2index(bin_info->reg_size);
diff --git a/src/extent.c b/src/extent.c
index 6503f2a1..ff09f7fb 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -149,7 +149,8 @@ extent_lock_from_addr(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, void *addr) {
 
 extent_t *
 extent_alloc(tsdn_t *tsdn, arena_t *arena) {
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
 
 	malloc_mutex_lock(tsdn, &arena->extent_avail_mtx);
 	extent_t *extent = extent_avail_first(&arena->extent_avail);
@@ -164,7 +165,8 @@ extent_alloc(tsdn_t *tsdn, arena_t *arena) {
 
 void
 extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
 
 	malloc_mutex_lock(tsdn, &arena->extent_avail_mtx);
 	extent_avail_insert(&arena->extent_avail, extent);
@@ -415,7 +417,8 @@ extents_alloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
     size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit) {
 	assert(size + pad != 0);
 	assert(alignment != 0);
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
 
 	return extent_recycle(tsdn, arena, r_extent_hooks, extents, new_addr,
 	    size, pad, alignment, slab, szind, zero, commit);
@@ -426,7 +429,8 @@ extents_dalloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
     extents_t *extents, extent_t *extent) {
 	assert(extent_base_get(extent) != NULL);
 	assert(extent_size_get(extent) != 0);
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
 
 	extent_addr_set(extent, extent_base_get(extent));
 	extent_zeroed_set(extent, false);
@@ -607,7 +611,8 @@ static void
 extent_gdump_add(tsdn_t *tsdn, const extent_t *extent) {
 	cassert(config_prof);
 	/* prof_gdump() requirement. */
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
 
 	if (opt_prof && extent_state_get(extent) == extent_state_active) {
 		size_t nadd = extent_size_get(extent) >> LG_PAGE;
@@ -730,7 +735,8 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
     bool locked, void *new_addr, size_t size, size_t pad, size_t alignment,
     bool slab, bool *zero, bool *commit) {
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, locked ? 1 : 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, locked ? 1 : 0);
 	if (locked) {
 		malloc_mutex_assert_owner(tsdn, &extents->mtx);
 	}
@@ -869,7 +875,8 @@ static extent_t *
 extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
     extents_t *extents, void *new_addr, size_t size, size_t pad,
     size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit) {
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
 	assert(new_addr == NULL || !slab);
 	assert(pad == 0 || !slab);
 	assert(!*zero || !slab);
@@ -1219,7 +1226,8 @@ extent_t *
 extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, void *new_addr, size_t size, size_t pad,
     size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit) {
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 
@@ -1385,7 +1393,8 @@ void
 extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
 	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
 
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
 
 	if (extent_register(tsdn, extent)) {
 		extents_leak(tsdn, arena, &extent_hooks,
@@ -1418,7 +1427,8 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena,
 
 	assert(extent_base_get(extent) != NULL);
 	assert(extent_size_get(extent) != 0);
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
 
 	extent_addr_set(extent, extent_base_get(extent));
 
@@ -1445,7 +1455,8 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena,
 void
 extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent) {
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
 
 	/*
 	 * Deregister first to avoid a race with other allocating threads, and
@@ -1508,7 +1519,8 @@ extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent) {
 	assert(extent_base_get(extent) != NULL);
 	assert(extent_size_get(extent) != 0);
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
 
 	/* Deregister first to avoid a race with other allocating threads. */
 	extent_deregister(tsdn, extent);
@@ -1543,7 +1555,8 @@ bool
 extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
     size_t length) {
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 	bool err = ((*r_extent_hooks)->commit == NULL ||
@@ -1566,7 +1579,8 @@ bool
 extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
     size_t length) {
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 
@@ -1597,7 +1611,8 @@ bool
 extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
     size_t length) {
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 	return ((*r_extent_hooks)->purge_lazy == NULL ||
@@ -1625,7 +1640,8 @@ bool
 extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
     size_t length) {
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 	return ((*r_extent_hooks)->purge_forced == NULL ||
@@ -1649,7 +1665,8 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a,
     szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b) {
 	assert(extent_size_get(extent) == size_a + size_b);
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 
@@ -1742,7 +1759,8 @@ extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
 bool
 extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b) {
-	witness_assert_depth_to_rank(tsdn, WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index b03e5f48..f083adc4 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1755,7 +1755,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 	 */
 	reentrancy_level = tsd_reentrancy_level_get(tsd);
 	if (reentrancy_level == 0) {
-		witness_assert_lockless(tsd_tsdn(tsd));
+		witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
 	}
 	if (sopts->slow && unlikely(reentrancy_level > 0)) {
 		/*
@@ -1832,7 +1832,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 
 	/* Success! */
 	if (reentrancy_level == 0) {
-		witness_assert_lockless(tsd_tsdn(tsd));
+		witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
 	}
 	*dopts->result = allocation;
 	return 0;
@@ -1847,7 +1847,7 @@ label_oom:
 		UTRACE(NULL, size, NULL);
 	}
 
-	witness_assert_lockless(tsd_tsdn(tsd));
+	witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
 
 	if (sopts->set_errno_on_error) {
 		set_errno(ENOMEM);
@@ -1878,7 +1878,7 @@ label_invalid_alignment:
 		UTRACE(NULL, size, NULL);
 	}
 
-	witness_assert_lockless(tsd_tsdn(tsd));
+	witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
 
 	if (sopts->null_out_result_on_error) {
 		*dopts->result = NULL;
@@ -2080,7 +2080,7 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 		tsd_assert_fast(tsd);
 	}
 	if (tsd_reentrancy_level_get(tsd) == 0) {
-		witness_assert_lockless(tsd_tsdn(tsd));
+		witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
 	} else {
 		assert(slow_path);
 	}
@@ -2120,7 +2120,7 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 		tsd_assert_fast(tsd);
 	}
 	if (tsd_reentrancy_level_get(tsd) == 0) {
-		witness_assert_lockless(tsd_tsdn(tsd));
+		witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
 	} else {
 		assert(slow_path);
 	}
@@ -2181,7 +2181,7 @@ je_realloc(void *ptr, size_t size) {
 		assert(malloc_initialized() || IS_INITIALIZER);
 		tsd_t *tsd = tsd_fetch();
 
-		witness_assert_lockless(tsd_tsdn(tsd));
+		witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
 
 		alloc_ctx_t alloc_ctx;
 		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
@@ -2224,7 +2224,7 @@ je_realloc(void *ptr, size_t size) {
 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
 	}
 	UTRACE(ptr, size, ret);
-	witness_assert_lockless(tsdn);
+	witness_assert_lockless(tsdn_witness_tsdp_get(tsdn));
 	return ret;
 }
 
@@ -2234,7 +2234,8 @@ je_free(void *ptr) {
 	if (likely(ptr != NULL)) {
 		tsd_t *tsd = tsd_fetch();
 		if (tsd_reentrancy_level_get(tsd) == 0) {
-			witness_assert_lockless(tsd_tsdn(tsd));
+			witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(
+			    tsd)));
 		}
 
 		tcache_t *tcache;
@@ -2252,7 +2253,8 @@ je_free(void *ptr) {
 			ifree(tsd, ptr, tcache, true);
 		}
 		if (tsd_reentrancy_level_get(tsd) == 0) {
-			witness_assert_lockless(tsd_tsdn(tsd));
+			witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(
+			    tsd)));
 		}
 	}
 }
@@ -2513,7 +2515,7 @@ je_rallocx(void *ptr, size_t size, int flags) {
 	assert(size != 0);
 	assert(malloc_initialized() || IS_INITIALIZER);
 	tsd = tsd_fetch();
-	witness_assert_lockless(tsd_tsdn(tsd));
+	witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
 
 	if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) {
 		unsigned arena_ind = MALLOCX_ARENA_GET(flags);
@@ -2569,7 +2571,7 @@ je_rallocx(void *ptr, size_t size, int flags) {
 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
 	}
 	UTRACE(ptr, size, p);
-	witness_assert_lockless(tsd_tsdn(tsd));
+	witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
 	return p;
 label_oom:
 	if (config_xmalloc && unlikely(opt_xmalloc)) {
@@ -2577,7 +2579,7 @@ label_oom:
 		abort();
 	}
 	UTRACE(ptr, size, 0);
-	witness_assert_lockless(tsd_tsdn(tsd));
+	witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
 	return NULL;
 }
 
@@ -2669,7 +2671,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	assert(SIZE_T_MAX - size >= extra);
 	assert(malloc_initialized() || IS_INITIALIZER);
 	tsd = tsd_fetch();
-	witness_assert_lockless(tsd_tsdn(tsd));
+	witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
 
 	alloc_ctx_t alloc_ctx;
 	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
@@ -2712,7 +2714,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	}
 label_not_resized:
 	UTRACE(ptr, size, ptr);
-	witness_assert_lockless(tsd_tsdn(tsd));
+	witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
 	return usize;
 }
 
@@ -2726,7 +2728,7 @@ je_sallocx(const void *ptr, int flags) {
 	assert(ptr != NULL);
 
 	tsdn = tsdn_fetch();
-	witness_assert_lockless(tsdn);
+	witness_assert_lockless(tsdn_witness_tsdp_get(tsdn));
 
 	if (config_debug || force_ivsalloc) {
 		usize = ivsalloc(tsdn, ptr);
@@ -2735,7 +2737,7 @@ je_sallocx(const void *ptr, int flags) {
 		usize = isalloc(tsdn, ptr);
 	}
 
-	witness_assert_lockless(tsdn);
+	witness_assert_lockless(tsdn_witness_tsdp_get(tsdn));
 	return usize;
 }
 
@@ -2746,7 +2748,7 @@ je_dallocx(void *ptr, int flags) {
 
 	tsd_t *tsd = tsd_fetch();
 	bool fast = tsd_fast(tsd);
-	witness_assert_lockless(tsd_tsdn(tsd));
+	witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
 
 	tcache_t *tcache;
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
@@ -2777,12 +2779,12 @@ je_dallocx(void *ptr, int flags) {
 	} else {
 		ifree(tsd, ptr, tcache, true);
 	}
-	witness_assert_lockless(tsd_tsdn(tsd));
+	witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
 inallocx(tsdn_t *tsdn, size_t size, int flags) {
-	witness_assert_lockless(tsdn);
+	witness_assert_lockless(tsdn_witness_tsdp_get(tsdn));
 
 	size_t usize;
 	if (likely((flags & MALLOCX_LG_ALIGN_MASK) == 0)) {
@@ -2790,7 +2792,7 @@ inallocx(tsdn_t *tsdn, size_t size, int flags) {
 	} else {
 		usize = sa2u(size, MALLOCX_ALIGN_GET_SPECIFIED(flags));
 	}
-	witness_assert_lockless(tsdn);
+	witness_assert_lockless(tsdn_witness_tsdp_get(tsdn));
 	return usize;
 }
 
@@ -2803,7 +2805,7 @@ je_sdallocx(void *ptr, size_t size, int flags) {
 	bool fast = tsd_fast(tsd);
 	size_t usize = inallocx(tsd_tsdn(tsd), size, flags);
 	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
-	witness_assert_lockless(tsd_tsdn(tsd));
+	witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
 
 	tcache_t *tcache;
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
@@ -2834,7 +2836,7 @@ je_sdallocx(void *ptr, size_t size, int flags) {
 	} else {
 		isfree(tsd, ptr, usize, tcache, true);
 	}
-	witness_assert_lockless(tsd_tsdn(tsd));
+	witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
@@ -2850,14 +2852,14 @@ je_nallocx(size_t size, int flags) {
 	}
 
 	tsdn = tsdn_fetch();
-	witness_assert_lockless(tsdn);
+	witness_assert_lockless(tsdn_witness_tsdp_get(tsdn));
 
 	usize = inallocx(tsdn, size, flags);
 	if (unlikely(usize > LARGE_MAXCLASS)) {
 		return 0;
 	}
 
-	witness_assert_lockless(tsdn);
+	witness_assert_lockless(tsdn_witness_tsdp_get(tsdn));
 	return usize;
 }
 
@@ -2872,9 +2874,9 @@ je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
 	}
 
 	tsd = tsd_fetch();
-	witness_assert_lockless(tsd_tsdn(tsd));
+	witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
 	ret = ctl_byname(tsd, name, oldp, oldlenp, newp, newlen);
-	witness_assert_lockless(tsd_tsdn(tsd));
+	witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
 	return ret;
 }
 
@@ -2888,9 +2890,9 @@ je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) {
 	}
 
 	tsdn = tsdn_fetch();
-	witness_assert_lockless(tsdn);
+	witness_assert_lockless(tsdn_witness_tsdp_get(tsdn));
 	ret = ctl_nametomib(tsdn, name, mibp, miblenp);
-	witness_assert_lockless(tsdn);
+	witness_assert_lockless(tsdn_witness_tsdp_get(tsdn));
 	return ret;
 }
 
@@ -2905,9 +2907,9 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	}
 
 	tsd = tsd_fetch();
-	witness_assert_lockless(tsd_tsdn(tsd));
+	witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
 	ret = ctl_bymib(tsd, mib, miblen, oldp, oldlenp, newp, newlen);
-	witness_assert_lockless(tsd_tsdn(tsd));
+	witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
 	return ret;
 }
 
@@ -2917,9 +2919,9 @@ je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	tsdn_t *tsdn;
 
 	tsdn = tsdn_fetch();
-	witness_assert_lockless(tsdn);
+	witness_assert_lockless(tsdn_witness_tsdp_get(tsdn));
 	stats_print(write_cb, cbopaque, opts);
-	witness_assert_lockless(tsdn);
+	witness_assert_lockless(tsdn_witness_tsdp_get(tsdn));
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
@@ -2930,7 +2932,7 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) {
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	tsdn = tsdn_fetch();
-	witness_assert_lockless(tsdn);
+	witness_assert_lockless(tsdn_witness_tsdp_get(tsdn));
 
 	if (unlikely(ptr == NULL)) {
 		ret = 0;
@@ -2943,7 +2945,7 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) {
 		}
 	}
 
-	witness_assert_lockless(tsdn);
+	witness_assert_lockless(tsdn_witness_tsdp_get(tsdn));
 	return ret;
 }
 
@@ -3000,7 +3002,7 @@ _malloc_prefork(void)
 
 	narenas = narenas_total_get();
 
-	witness_prefork(tsd);
+	witness_prefork(tsd_witness_tsdp_get(tsd));
 	/* Acquire all mutexes in a safe order. */
 	ctl_prefork(tsd_tsdn(tsd));
 	tcache_prefork(tsd_tsdn(tsd));
@@ -3067,7 +3069,7 @@ _malloc_postfork(void)
 
 	tsd = tsd_fetch();
 
-	witness_postfork_parent(tsd);
+	witness_postfork_parent(tsd_witness_tsdp_get(tsd));
 	/* Release all mutexes, now that fork() has completed. */
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
@@ -3094,7 +3096,7 @@ jemalloc_postfork_child(void) {
 
 	tsd = tsd_fetch();
 
-	witness_postfork_child(tsd);
+	witness_postfork_child(tsd_witness_tsdp_get(tsd));
 	/* Release all mutexes, now that fork() has completed. */
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
diff --git a/src/tcache.c b/src/tcache.c
index d9f5e7cb..4bb2fb86 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -568,7 +568,7 @@ label_return:
 
 bool
 tcaches_create(tsd_t *tsd, unsigned *r_ind) {
-	witness_assert_depth(tsd_tsdn(tsd), 0);
+	witness_assert_depth(tsdn_witness_tsdp_get(tsd_tsdn(tsd)), 0);
 
 	bool err;
 
@@ -600,7 +600,7 @@ tcaches_create(tsd_t *tsd, unsigned *r_ind) {
 
 	err = false;
 label_return:
-	witness_assert_depth(tsd_tsdn(tsd), 0);
+	witness_assert_depth(tsdn_witness_tsdp_get(tsd_tsdn(tsd)), 0);
 	return err;
 }
 
diff --git a/src/tsd.c b/src/tsd.c
index 612f7523..801d8127 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -140,7 +140,7 @@ tsd_do_data_cleanup(tsd_t *tsd) {
 	arena_cleanup(tsd);
 	arenas_tdata_cleanup(tsd);
 	tcache_cleanup(tsd);
-	witnesses_cleanup(tsd);
+	witnesses_cleanup(tsd_witness_tsdp_get_unsafe(tsd));
 }
 
 void
diff --git a/src/witness.c b/src/witness.c
index 0e910dca..f42b72ad 100644
--- a/src/witness.c
+++ b/src/witness.c
@@ -63,38 +63,38 @@ witness_depth_error_t *JET_MUTABLE witness_depth_error =
     witness_depth_error_impl;
 
 void
-witnesses_cleanup(tsd_t *tsd) {
-	witness_assert_lockless(tsd_tsdn(tsd));
+witnesses_cleanup(witness_tsd_t *witness_tsd) {
+	witness_assert_lockless(witness_tsd_tsdn(witness_tsd));
 
 	/* Do nothing. */
 }
 
 void
-witness_prefork(tsd_t *tsd) {
+witness_prefork(witness_tsd_t *witness_tsd) {
 	if (!config_debug) {
 		return;
 	}
-	tsd_witness_fork_set(tsd, true);
+	witness_tsd->forking = true;
 }
 
 void
-witness_postfork_parent(tsd_t *tsd) {
+witness_postfork_parent(witness_tsd_t *witness_tsd) {
 	if (!config_debug) {
 		return;
 	}
-	tsd_witness_fork_set(tsd, false);
+	witness_tsd->forking = false;
 }
 
 void
-witness_postfork_child(tsd_t *tsd) {
+witness_postfork_child(witness_tsd_t *witness_tsd) {
 	if (!config_debug) {
 		return;
 	}
 #ifndef JEMALLOC_MUTEX_INIT_CB
 	witness_list_t *witnesses;
 
-	witnesses = tsd_witnessesp_get(tsd);
+	witnesses = &witness_tsd->witnesses;
 	ql_new(witnesses);
 #endif
-	tsd_witness_fork_set(tsd, false);
+	witness_tsd->forking = false;
 }
diff --git a/test/unit/witness.c b/test/unit/witness.c
index de2e6028..5986da40 100644
--- a/test/unit/witness.c
+++ b/test/unit/witness.c
@@ -55,95 +55,91 @@ witness_comp_reverse(const witness_t *a, void *oa, const witness_t *b,
 
 TEST_BEGIN(test_witness) {
 	witness_t a, b;
-	tsdn_t *tsdn;
+	witness_tsdn_t witness_tsdn = { WITNESS_TSD_INITIALIZER };
 
 	test_skip_if(!config_debug);
 
-	tsdn = tsdn_fetch();
-
-	witness_assert_lockless(tsdn);
-	witness_assert_depth(tsdn, 0);
-	witness_assert_depth_to_rank(tsdn, (witness_rank_t)1U, 0);
+	witness_assert_lockless(&witness_tsdn);
+	witness_assert_depth(&witness_tsdn, 0);
+	witness_assert_depth_to_rank(&witness_tsdn, (witness_rank_t)1U, 0);
 
 	witness_init(&a, "a", 1, NULL, NULL);
-	witness_assert_not_owner(tsdn, &a);
-	witness_lock(tsdn, &a);
-	witness_assert_owner(tsdn, &a);
-	witness_assert_depth(tsdn, 1);
-	witness_assert_depth_to_rank(tsdn, (witness_rank_t)1U, 1);
-	witness_assert_depth_to_rank(tsdn, (witness_rank_t)2U, 0);
+	witness_assert_not_owner(&witness_tsdn, &a);
+	witness_lock(&witness_tsdn, &a);
+	witness_assert_owner(&witness_tsdn, &a);
+	witness_assert_depth(&witness_tsdn, 1);
+	witness_assert_depth_to_rank(&witness_tsdn, (witness_rank_t)1U, 1);
+	witness_assert_depth_to_rank(&witness_tsdn, (witness_rank_t)2U, 0);
 
 	witness_init(&b, "b", 2, NULL, NULL);
-	witness_assert_not_owner(tsdn, &b);
-	witness_lock(tsdn, &b);
-	witness_assert_owner(tsdn, &b);
-	witness_assert_depth(tsdn, 2);
-	witness_assert_depth_to_rank(tsdn, (witness_rank_t)1U, 2);
-	witness_assert_depth_to_rank(tsdn, (witness_rank_t)2U, 1);
-	witness_assert_depth_to_rank(tsdn, (witness_rank_t)3U, 0);
+	witness_assert_not_owner(&witness_tsdn, &b);
+	witness_lock(&witness_tsdn, &b);
+	witness_assert_owner(&witness_tsdn, &b);
+	witness_assert_depth(&witness_tsdn, 2);
+	witness_assert_depth_to_rank(&witness_tsdn, (witness_rank_t)1U, 2);
+	witness_assert_depth_to_rank(&witness_tsdn, (witness_rank_t)2U, 1);
+	witness_assert_depth_to_rank(&witness_tsdn, (witness_rank_t)3U, 0);
 
-	witness_unlock(tsdn, &a);
-	witness_assert_depth(tsdn, 1);
-	witness_assert_depth_to_rank(tsdn, (witness_rank_t)1U, 1);
-	witness_assert_depth_to_rank(tsdn, (witness_rank_t)2U, 1);
-	witness_assert_depth_to_rank(tsdn, (witness_rank_t)3U, 0);
-	witness_unlock(tsdn, &b);
+	witness_unlock(&witness_tsdn, &a);
+	witness_assert_depth(&witness_tsdn, 1);
+	witness_assert_depth_to_rank(&witness_tsdn, (witness_rank_t)1U, 1);
+	witness_assert_depth_to_rank(&witness_tsdn, (witness_rank_t)2U, 1);
+	witness_assert_depth_to_rank(&witness_tsdn, (witness_rank_t)3U, 0);
+	witness_unlock(&witness_tsdn, &b);
 
-	witness_assert_lockless(tsdn);
-	witness_assert_depth(tsdn, 0);
-	witness_assert_depth_to_rank(tsdn, (witness_rank_t)1U, 0);
+	witness_assert_lockless(&witness_tsdn);
+	witness_assert_depth(&witness_tsdn, 0);
+	witness_assert_depth_to_rank(&witness_tsdn, (witness_rank_t)1U, 0);
 }
 TEST_END
 
 TEST_BEGIN(test_witness_comp) {
 	witness_t a, b, c, d;
-	tsdn_t *tsdn;
+	witness_tsdn_t witness_tsdn = { WITNESS_TSD_INITIALIZER };
 
 	test_skip_if(!config_debug);
 
-	tsdn = tsdn_fetch();
-
-	witness_assert_lockless(tsdn);
+	witness_assert_lockless(&witness_tsdn);
 
 	witness_init(&a, "a", 1, witness_comp, &a);
-	witness_assert_not_owner(tsdn, &a);
-	witness_lock(tsdn, &a);
-	witness_assert_owner(tsdn, &a);
-	witness_assert_depth(tsdn, 1);
+	witness_assert_not_owner(&witness_tsdn, &a);
+	witness_lock(&witness_tsdn, &a);
+	witness_assert_owner(&witness_tsdn, &a);
+	witness_assert_depth(&witness_tsdn, 1);
 
 	witness_init(&b, "b", 1, witness_comp, &b);
-	witness_assert_not_owner(tsdn, &b);
-	witness_lock(tsdn, &b);
-	witness_assert_owner(tsdn, &b);
-	witness_assert_depth(tsdn, 2);
-	witness_unlock(tsdn, &b);
-	witness_assert_depth(tsdn, 1);
+	witness_assert_not_owner(&witness_tsdn, &b);
+	witness_lock(&witness_tsdn, &b);
+	witness_assert_owner(&witness_tsdn, &b);
+	witness_assert_depth(&witness_tsdn, 2);
+	witness_unlock(&witness_tsdn, &b);
+	witness_assert_depth(&witness_tsdn, 1);
 
 	witness_lock_error_orig = witness_lock_error;
 	witness_lock_error = witness_lock_error_intercept;
 	saw_lock_error = false;
 
 	witness_init(&c, "c", 1, witness_comp_reverse, &c);
-	witness_assert_not_owner(tsdn, &c);
+	witness_assert_not_owner(&witness_tsdn, &c);
 	assert_false(saw_lock_error, "Unexpected witness lock error");
-	witness_lock(tsdn, &c);
+	witness_lock(&witness_tsdn, &c);
 	assert_true(saw_lock_error, "Expected witness lock error");
-	witness_unlock(tsdn, &c);
-	witness_assert_depth(tsdn, 1);
+	witness_unlock(&witness_tsdn, &c);
+	witness_assert_depth(&witness_tsdn, 1);
 
 	saw_lock_error = false;
 
 	witness_init(&d, "d", 1, NULL, NULL);
-	witness_assert_not_owner(tsdn, &d);
+	witness_assert_not_owner(&witness_tsdn, &d);
 	assert_false(saw_lock_error, "Unexpected witness lock error");
-	witness_lock(tsdn, &d);
+	witness_lock(&witness_tsdn, &d);
 	assert_true(saw_lock_error, "Expected witness lock error");
-	witness_unlock(tsdn, &d);
-	witness_assert_depth(tsdn, 1);
+	witness_unlock(&witness_tsdn, &d);
+	witness_assert_depth(&witness_tsdn, 1);
 
-	witness_unlock(tsdn, &a);
+	witness_unlock(&witness_tsdn, &a);
 
-	witness_assert_lockless(tsdn);
+	witness_assert_lockless(&witness_tsdn);
 
 	witness_lock_error = witness_lock_error_orig;
 }
@@ -151,7 +147,7 @@ TEST_END
 
 TEST_BEGIN(test_witness_reversal) {
 	witness_t a, b;
-	tsdn_t *tsdn;
+	witness_tsdn_t witness_tsdn = { WITNESS_TSD_INITIALIZER };
 
 	test_skip_if(!config_debug);
 
@@ -159,24 +155,22 @@ TEST_BEGIN(test_witness_reversal) {
 	witness_lock_error = witness_lock_error_intercept;
 	saw_lock_error = false;
 
-	tsdn = tsdn_fetch();
-
-	witness_assert_lockless(tsdn);
+	witness_assert_lockless(&witness_tsdn);
 
 	witness_init(&a, "a", 1, NULL, NULL);
 	witness_init(&b, "b", 2, NULL, NULL);
 
-	witness_lock(tsdn, &b);
-	witness_assert_depth(tsdn, 1);
+	witness_lock(&witness_tsdn, &b);
+	witness_assert_depth(&witness_tsdn, 1);
 	assert_false(saw_lock_error, "Unexpected witness lock error");
-	witness_lock(tsdn, &a);
+	witness_lock(&witness_tsdn, &a);
 	assert_true(saw_lock_error, "Expected witness lock error");
 
-	witness_unlock(tsdn, &a);
-	witness_assert_depth(tsdn, 1);
-	witness_unlock(tsdn, &b);
+	witness_unlock(&witness_tsdn, &a);
+	witness_assert_depth(&witness_tsdn, 1);
+	witness_unlock(&witness_tsdn, &b);
 
-	witness_assert_lockless(tsdn);
+	witness_assert_lockless(&witness_tsdn);
 
 	witness_lock_error = witness_lock_error_orig;
 }
@@ -184,7 +178,7 @@ TEST_END
 
 TEST_BEGIN(test_witness_recursive) {
 	witness_t a;
-	tsdn_t *tsdn;
+	witness_tsdn_t witness_tsdn = { WITNESS_TSD_INITIALIZER };
 
 	test_skip_if(!config_debug);
 
@@ -196,22 +190,20 @@ TEST_BEGIN(test_witness_recursive) {
 	witness_lock_error = witness_lock_error_intercept;
 	saw_lock_error = false;
 
-	tsdn = tsdn_fetch();
-
-	witness_assert_lockless(tsdn);
+	witness_assert_lockless(&witness_tsdn);
 
 	witness_init(&a, "a", 1, NULL, NULL);
 
-	witness_lock(tsdn, &a);
+	witness_lock(&witness_tsdn, &a);
 	assert_false(saw_lock_error, "Unexpected witness lock error");
 	assert_false(saw_not_owner_error, "Unexpected witness not owner error");
-	witness_lock(tsdn, &a);
+	witness_lock(&witness_tsdn, &a);
 	assert_true(saw_lock_error, "Expected witness lock error");
 	assert_true(saw_not_owner_error, "Expected witness not owner error");
 
-	witness_unlock(tsdn, &a);
+	witness_unlock(&witness_tsdn, &a);
 
-	witness_assert_lockless(tsdn);
+	witness_assert_lockless(&witness_tsdn);
 
 	witness_owner_error = witness_owner_error_orig;
 	witness_lock_error = witness_lock_error_orig;
@@ -221,7 +213,7 @@ TEST_END
 
 TEST_BEGIN(test_witness_unlock_not_owned) {
 	witness_t a;
-	tsdn_t *tsdn;
+	witness_tsdn_t witness_tsdn = { WITNESS_TSD_INITIALIZER };
 
 	test_skip_if(!config_debug);
 
@@ -229,17 +221,15 @@ TEST_BEGIN(test_witness_unlock_not_owned) {
 	witness_owner_error = witness_owner_error_intercept;
 	saw_owner_error = false;
 
-	tsdn = tsdn_fetch();
-
-	witness_assert_lockless(tsdn);
+	witness_assert_lockless(&witness_tsdn);
 
 	witness_init(&a, "a", 1, NULL, NULL);
 
 	assert_false(saw_owner_error, "Unexpected owner error");
-	witness_unlock(tsdn, &a);
+	witness_unlock(&witness_tsdn, &a);
 	assert_true(saw_owner_error, "Expected owner error");
 
-	witness_assert_lockless(tsdn);
+	witness_assert_lockless(&witness_tsdn);
 
 	witness_owner_error = witness_owner_error_orig;
 }
@@ -247,7 +237,7 @@ TEST_END
 
 TEST_BEGIN(test_witness_depth) {
 	witness_t a;
-	tsdn_t *tsdn;
+	witness_tsdn_t witness_tsdn = { WITNESS_TSD_INITIALIZER };
 
 	test_skip_if(!config_debug);
 
@@ -255,26 +245,24 @@ TEST_BEGIN(test_witness_depth) {
 	witness_depth_error = witness_depth_error_intercept;
 	saw_depth_error = false;
 
-	tsdn = tsdn_fetch();
-
-	witness_assert_lockless(tsdn);
-	witness_assert_depth(tsdn, 0);
+	witness_assert_lockless(&witness_tsdn);
+	witness_assert_depth(&witness_tsdn, 0);
 
 	witness_init(&a, "a", 1, NULL, NULL);
 
 	assert_false(saw_depth_error, "Unexpected depth error");
-	witness_assert_lockless(tsdn);
-	witness_assert_depth(tsdn, 0);
+	witness_assert_lockless(&witness_tsdn);
+	witness_assert_depth(&witness_tsdn, 0);
 
-	witness_lock(tsdn, &a);
-	witness_assert_lockless(tsdn);
-	witness_assert_depth(tsdn, 0);
+	witness_lock(&witness_tsdn, &a);
+	witness_assert_lockless(&witness_tsdn);
+	witness_assert_depth(&witness_tsdn, 0);
 	assert_true(saw_depth_error, "Expected depth error");
 
-	witness_unlock(tsdn, &a);
+	witness_unlock(&witness_tsdn, &a);
 
-	witness_assert_lockless(tsdn);
-	witness_assert_depth(tsdn, 0);
+	witness_assert_lockless(&witness_tsdn);
+	witness_assert_depth(&witness_tsdn, 0);
 
 	witness_depth_error = witness_depth_error_orig;
 }

From 18ecbfa89e7dd39a802f52bcd461184b8065e97e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 23 May 2017 12:28:19 -0700
Subject: [PATCH 0883/2608] Header refactoring: unify and de-catchall mutex
 module

---
 include/jemalloc/internal/arena_inlines_b.h   |   1 +
 include/jemalloc/internal/arena_structs_b.h   |   1 +
 include/jemalloc/internal/base_structs.h      |   1 +
 include/jemalloc/internal/extent_externs.h    |   3 +-
 include/jemalloc/internal/extent_inlines.h    |   1 +
 include/jemalloc/internal/extent_structs.h    |   1 +
 .../internal/jemalloc_internal_includes.h     |   4 -
 include/jemalloc/internal/mutex.h             | 248 ++++++++++++++++++
 include/jemalloc/internal/mutex_externs.h     |  21 --
 include/jemalloc/internal/mutex_inlines.h     | 118 ---------
 .../jemalloc/internal/mutex_pool_inlines.h    |   2 +-
 .../jemalloc/internal/mutex_pool_structs.h    |   2 +
 include/jemalloc/internal/mutex_structs.h     |  55 ----
 include/jemalloc/internal/mutex_types.h       |  71 -----
 include/jemalloc/internal/prof_externs.h      |   2 +
 include/jemalloc/internal/prof_inlines_a.h    |   2 +
 include/jemalloc/internal/prof_structs.h      |   1 +
 include/jemalloc/internal/rtree_structs.h     |   2 +-
 include/jemalloc/internal/stats.h             |  12 +-
 include/jemalloc/internal/stats_tsd.h         |  12 +
 include/jemalloc/internal/tcache_structs.h    |   2 +-
 include/jemalloc/internal/tsd_generic.h       |   5 +-
 src/arena.c                                   |   1 +
 src/base.c                                    |   1 +
 src/ctl.c                                     |   1 +
 src/extent.c                                  |   1 +
 src/jemalloc.c                                |   1 +
 src/large.c                                   |   1 +
 src/mutex_pool.c                              |   2 +
 src/prof.c                                    |   1 +
 src/rtree.c                                   |   1 +
 src/stats.c                                   |   1 +
 src/tcache.c                                  |   1 +
 src/tsd.c                                     |  12 +
 34 files changed, 304 insertions(+), 287 deletions(-)
 create mode 100644 include/jemalloc/internal/mutex.h
 delete mode 100644 include/jemalloc/internal/mutex_externs.h
 delete mode 100644 include/jemalloc/internal/mutex_inlines.h
 delete mode 100644 include/jemalloc/internal/mutex_structs.h
 delete mode 100644 include/jemalloc/internal/mutex_types.h
 create mode 100644 include/jemalloc/internal/stats_tsd.h

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index a1057184..8db6e9a8 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_ARENA_INLINES_B_H
 
 #include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/ticker.h"
 
diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index 95680c0f..f98f45c1 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -4,6 +4,7 @@
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/bitmap.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/size_classes.h"
diff --git a/include/jemalloc/internal/base_structs.h b/include/jemalloc/internal/base_structs.h
index 1d0a1f3a..18e227bd 100644
--- a/include/jemalloc/internal/base_structs.h
+++ b/include/jemalloc/internal/base_structs.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_BASE_STRUCTS_H
 
 #include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/size_classes.h"
 
 /* Embedded at the beginning of every block of base-managed virtual memory. */
diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index 7a5b38c6..9d5daf5b 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -1,8 +1,9 @@
 #ifndef JEMALLOC_INTERNAL_EXTENT_EXTERNS_H
 #define JEMALLOC_INTERNAL_EXTENT_EXTERNS_H
 
-#include "jemalloc/internal/rb.h"
+#include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/ph.h"
+#include "jemalloc/internal/rb.h"
 
 extern rtree_t			extents_rtree;
 extern const extent_hooks_t	extent_hooks_default;
diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index 2ebd9452..a99a6351 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_EXTENT_INLINES_H
 #define JEMALLOC_INTERNAL_EXTENT_INLINES_H
 
+#include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/mutex_pool_inlines.h"
 #include "jemalloc/internal/pages.h"
 #include "jemalloc/internal/prng.h"
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index 62bae39a..457891df 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -3,6 +3,7 @@
 
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/bitmap.h"
+#include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/rb.h"
 #include "jemalloc/internal/ph.h"
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index 2abc4781..b1a6f17d 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -40,7 +40,6 @@
 /* TYPES */
 /******************************************************************************/
 
-#include "jemalloc/internal/mutex_types.h"
 #include "jemalloc/internal/extent_types.h"
 #include "jemalloc/internal/extent_dss_types.h"
 #include "jemalloc/internal/base_types.h"
@@ -53,7 +52,6 @@
 /* STRUCTS */
 /******************************************************************************/
 
-#include "jemalloc/internal/mutex_structs.h"
 #include "jemalloc/internal/mutex_pool_structs.h"
 #include "jemalloc/internal/arena_structs_a.h"
 #include "jemalloc/internal/extent_structs.h"
@@ -70,7 +68,6 @@
 /******************************************************************************/
 
 #include "jemalloc/internal/jemalloc_internal_externs.h"
-#include "jemalloc/internal/mutex_externs.h"
 #include "jemalloc/internal/extent_externs.h"
 #include "jemalloc/internal/extent_dss_externs.h"
 #include "jemalloc/internal/extent_mmap_externs.h"
@@ -86,7 +83,6 @@
 /* INLINES */
 /******************************************************************************/
 
-#include "jemalloc/internal/mutex_inlines.h"
 #include "jemalloc/internal/mutex_pool_inlines.h"
 #include "jemalloc/internal/jemalloc_internal_inlines_a.h"
 #include "jemalloc/internal/rtree_inlines.h"
diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
new file mode 100644
index 00000000..6520c251
--- /dev/null
+++ b/include/jemalloc/internal/mutex.h
@@ -0,0 +1,248 @@
+#ifndef JEMALLOC_INTERNAL_MUTEX_H
+#define JEMALLOC_INTERNAL_MUTEX_H
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/mutex_prof.h"
+#include "jemalloc/internal/tsd.h"
+#include "jemalloc/internal/witness.h"
+
+typedef enum {
+	/* Can only acquire one mutex of a given witness rank at a time. */
+	malloc_mutex_rank_exclusive,
+	/*
+	 * Can acquire multiple mutexes of the same witness rank, but in
+	 * address-ascending order only.
+	 */
+	malloc_mutex_address_ordered
+} malloc_mutex_lock_order_t;
+
+typedef struct malloc_mutex_s malloc_mutex_t;
+struct malloc_mutex_s {
+	union {
+		struct {
+			/*
+			 * prof_data is defined first to reduce cacheline
+			 * bouncing: the data is not touched by the mutex holder
+			 * during unlocking, while might be modified by
+			 * contenders.  Having it before the mutex itself could
+			 * avoid prefetching a modified cacheline (for the
+			 * unlocking thread).
+			 */
+			mutex_prof_data_t	prof_data;
+#ifdef _WIN32
+#  if _WIN32_WINNT >= 0x0600
+			SRWLOCK         	lock;
+#  else
+			CRITICAL_SECTION	lock;
+#  endif
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+			os_unfair_lock		lock;
+#elif (defined(JEMALLOC_OSSPIN))
+			OSSpinLock		lock;
+#elif (defined(JEMALLOC_MUTEX_INIT_CB))
+			pthread_mutex_t		lock;
+			malloc_mutex_t		*postponed_next;
+#else
+			pthread_mutex_t		lock;
+#endif
+		};
+		/*
+		 * We only touch witness when configured w/ debug.  However we
+		 * keep the field in a union when !debug so that we don't have
+		 * to pollute the code base with #ifdefs, while avoid paying the
+		 * memory cost.
+		 */
+#if !defined(JEMALLOC_DEBUG)
+		witness_t			witness;
+		malloc_mutex_lock_order_t	lock_order;
+#endif
+	};
+
+#if defined(JEMALLOC_DEBUG)
+	witness_t			witness;
+	malloc_mutex_lock_order_t	lock_order;
+#endif
+};
+
+/*
+ * Based on benchmark results, a fixed spin with this amount of retries works
+ * well for our critical sections.
+ */
+#define MALLOC_MUTEX_MAX_SPIN 250
+
+#ifdef _WIN32
+#  if _WIN32_WINNT >= 0x0600
+#    define MALLOC_MUTEX_LOCK(m)    AcquireSRWLockExclusive(&(m)->lock)
+#    define MALLOC_MUTEX_UNLOCK(m)  ReleaseSRWLockExclusive(&(m)->lock)
+#    define MALLOC_MUTEX_TRYLOCK(m) (!TryAcquireSRWLockExclusive(&(m)->lock))
+#  else
+#    define MALLOC_MUTEX_LOCK(m)    EnterCriticalSection(&(m)->lock)
+#    define MALLOC_MUTEX_UNLOCK(m)  LeaveCriticalSection(&(m)->lock)
+#    define MALLOC_MUTEX_TRYLOCK(m) (!TryEnterCriticalSection(&(m)->lock))
+#  endif
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+#    define MALLOC_MUTEX_LOCK(m)    os_unfair_lock_lock(&(m)->lock)
+#    define MALLOC_MUTEX_UNLOCK(m)  os_unfair_lock_unlock(&(m)->lock)
+#    define MALLOC_MUTEX_TRYLOCK(m) (!os_unfair_lock_trylock(&(m)->lock))
+#elif (defined(JEMALLOC_OSSPIN))
+#    define MALLOC_MUTEX_LOCK(m)    OSSpinLockLock(&(m)->lock)
+#    define MALLOC_MUTEX_UNLOCK(m)  OSSpinLockUnlock(&(m)->lock)
+#    define MALLOC_MUTEX_TRYLOCK(m) (!OSSpinLockTry(&(m)->lock))
+#else
+#    define MALLOC_MUTEX_LOCK(m)    pthread_mutex_lock(&(m)->lock)
+#    define MALLOC_MUTEX_UNLOCK(m)  pthread_mutex_unlock(&(m)->lock)
+#    define MALLOC_MUTEX_TRYLOCK(m) (pthread_mutex_trylock(&(m)->lock) != 0)
+#endif
+
+#define LOCK_PROF_DATA_INITIALIZER					\
+    {NSTIME_ZERO_INITIALIZER, NSTIME_ZERO_INITIALIZER, 0, 0, 0,		\
+	    ATOMIC_INIT(0), 0, NULL, 0}
+
+#ifdef _WIN32
+#  define MALLOC_MUTEX_INITIALIZER
+#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
+#  define MALLOC_MUTEX_INITIALIZER					\
+     {{{LOCK_PROF_DATA_INITIALIZER, OS_UNFAIR_LOCK_INIT}},		\
+      WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
+#elif (defined(JEMALLOC_OSSPIN))
+#  define MALLOC_MUTEX_INITIALIZER					\
+     {{{LOCK_PROF_DATA_INITIALIZER, 0}},				\
+      WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
+#elif (defined(JEMALLOC_MUTEX_INIT_CB))
+#  define MALLOC_MUTEX_INITIALIZER					\
+     {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, NULL}},	\
+      WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
+#else
+#    define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT
+#    define MALLOC_MUTEX_INITIALIZER					\
+       {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER}},	\
+        WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
+#endif
+
+#ifdef JEMALLOC_LAZY_LOCK
+extern bool isthreaded;
+#else
+#  undef isthreaded /* Undo private_namespace.h definition. */
+#  define isthreaded true
+#endif
+
+bool malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
+    witness_rank_t rank, malloc_mutex_lock_order_t lock_order);
+void malloc_mutex_prefork(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void malloc_mutex_postfork_parent(tsdn_t *tsdn, malloc_mutex_t *mutex);
+void malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex);
+bool malloc_mutex_boot(void);
+void malloc_mutex_prof_data_reset(tsdn_t *tsdn, malloc_mutex_t *mutex);
+
+void malloc_mutex_lock_slow(malloc_mutex_t *mutex);
+
+static inline void
+malloc_mutex_lock_final(malloc_mutex_t *mutex) {
+	MALLOC_MUTEX_LOCK(mutex);
+}
+
+static inline bool
+malloc_mutex_trylock_final(malloc_mutex_t *mutex) {
+	return MALLOC_MUTEX_TRYLOCK(mutex);
+}
+
+static inline void
+mutex_owner_stats_update(tsdn_t *tsdn, malloc_mutex_t *mutex) {
+	if (config_stats) {
+		mutex_prof_data_t *data = &mutex->prof_data;
+		data->n_lock_ops++;
+		if (data->prev_owner != tsdn) {
+			data->prev_owner = tsdn;
+			data->n_owner_switches++;
+		}
+	}
+}
+
+/* Trylock: return false if the lock is successfully acquired. */
+static inline bool
+malloc_mutex_trylock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
+	witness_assert_not_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
+	if (isthreaded) {
+		if (malloc_mutex_trylock_final(mutex)) {
+			return true;
+		}
+		mutex_owner_stats_update(tsdn, mutex);
+	}
+	witness_lock(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
+
+	return false;
+}
+
+/* Aggregate lock prof data. */
+static inline void
+malloc_mutex_prof_merge(mutex_prof_data_t *sum, mutex_prof_data_t *data) {
+	nstime_add(&sum->tot_wait_time, &data->tot_wait_time);
+	if (nstime_compare(&sum->max_wait_time, &data->max_wait_time) < 0) {
+		nstime_copy(&sum->max_wait_time, &data->max_wait_time);
+	}
+
+	sum->n_wait_times += data->n_wait_times;
+	sum->n_spin_acquired += data->n_spin_acquired;
+
+	if (sum->max_n_thds < data->max_n_thds) {
+		sum->max_n_thds = data->max_n_thds;
+	}
+	uint32_t cur_n_waiting_thds = atomic_load_u32(&sum->n_waiting_thds,
+	    ATOMIC_RELAXED);
+	uint32_t new_n_waiting_thds = cur_n_waiting_thds + atomic_load_u32(
+	    &data->n_waiting_thds, ATOMIC_RELAXED);
+	atomic_store_u32(&sum->n_waiting_thds, new_n_waiting_thds,
+	    ATOMIC_RELAXED);
+	sum->n_owner_switches += data->n_owner_switches;
+	sum->n_lock_ops += data->n_lock_ops;
+}
+
+static inline void
+malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
+	witness_assert_not_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
+	if (isthreaded) {
+		if (malloc_mutex_trylock_final(mutex)) {
+			malloc_mutex_lock_slow(mutex);
+		}
+		mutex_owner_stats_update(tsdn, mutex);
+	}
+	witness_lock(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
+}
+
+static inline void
+malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
+	witness_unlock(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
+	if (isthreaded) {
+		MALLOC_MUTEX_UNLOCK(mutex);
+	}
+}
+
+static inline void
+malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) {
+	witness_assert_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
+}
+
+static inline void
+malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) {
+	witness_assert_not_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
+}
+
+/* Copy the prof data from mutex for processing. */
+static inline void
+malloc_mutex_prof_read(tsdn_t *tsdn, mutex_prof_data_t *data,
+    malloc_mutex_t *mutex) {
+	mutex_prof_data_t *source = &mutex->prof_data;
+	/* Can only read holding the mutex. */
+	malloc_mutex_assert_owner(tsdn, mutex);
+
+	/*
+	 * Not *really* allowed (we shouldn't be doing non-atomic loads of
+	 * atomic data), but the mutex protection makes this safe, and writing
+	 * a member-for-member copy is tedious for this situation.
+	 */
+	*data = *source;
+	/* n_wait_thds is not reported (modified w/o locking). */
+	atomic_store_u32(&data->n_waiting_thds, 0, ATOMIC_RELAXED);
+}
+
+#endif /* JEMALLOC_INTERNAL_MUTEX_H */
diff --git a/include/jemalloc/internal/mutex_externs.h b/include/jemalloc/internal/mutex_externs.h
deleted file mode 100644
index d0139f2e..00000000
--- a/include/jemalloc/internal/mutex_externs.h
+++ /dev/null
@@ -1,21 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_MUTEX_EXTERNS_H
-#define JEMALLOC_INTERNAL_MUTEX_EXTERNS_H
-
-#include "jemalloc/internal/tsd_types.h"
-
-#ifdef JEMALLOC_LAZY_LOCK
-extern bool isthreaded;
-#else
-#  undef isthreaded /* Undo private_namespace.h definition. */
-#  define isthreaded true
-#endif
-
-bool malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
-    witness_rank_t rank, malloc_mutex_lock_order_t lock_order);
-void malloc_mutex_prefork(tsdn_t *tsdn, malloc_mutex_t *mutex);
-void malloc_mutex_postfork_parent(tsdn_t *tsdn, malloc_mutex_t *mutex);
-void malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex);
-bool malloc_mutex_boot(void);
-void malloc_mutex_prof_data_reset(tsdn_t *tsdn, malloc_mutex_t *mutex);
-
-#endif /* JEMALLOC_INTERNAL_MUTEX_EXTERNS_H */
diff --git a/include/jemalloc/internal/mutex_inlines.h b/include/jemalloc/internal/mutex_inlines.h
deleted file mode 100644
index b86a4ad4..00000000
--- a/include/jemalloc/internal/mutex_inlines.h
+++ /dev/null
@@ -1,118 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_MUTEX_INLINES_H
-#define JEMALLOC_INTERNAL_MUTEX_INLINES_H
-
-#include "jemalloc/internal/nstime.h"
-#include "jemalloc/internal/tsd_types.h"
-
-void	malloc_mutex_lock_slow(malloc_mutex_t *mutex);
-
-static inline void
-malloc_mutex_lock_final(malloc_mutex_t *mutex) {
-	MALLOC_MUTEX_LOCK(mutex);
-}
-
-static inline bool
-malloc_mutex_trylock_final(malloc_mutex_t *mutex) {
-	return MALLOC_MUTEX_TRYLOCK(mutex);
-}
-
-static inline void
-mutex_owner_stats_update(tsdn_t *tsdn, malloc_mutex_t *mutex) {
-	if (config_stats) {
-		mutex_prof_data_t *data = &mutex->prof_data;
-		data->n_lock_ops++;
-		if (data->prev_owner != tsdn) {
-			data->prev_owner = tsdn;
-			data->n_owner_switches++;
-		}
-	}
-}
-
-/* Trylock: return false if the lock is successfully acquired. */
-static inline bool
-malloc_mutex_trylock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
-	witness_assert_not_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
-	if (isthreaded) {
-		if (malloc_mutex_trylock_final(mutex)) {
-			return true;
-		}
-		mutex_owner_stats_update(tsdn, mutex);
-	}
-	witness_lock(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
-
-	return false;
-}
-
-/* Aggregate lock prof data. */
-static inline void
-malloc_mutex_prof_merge(mutex_prof_data_t *sum, mutex_prof_data_t *data) {
-	nstime_add(&sum->tot_wait_time, &data->tot_wait_time);
-	if (nstime_compare(&sum->max_wait_time, &data->max_wait_time) < 0) {
-		nstime_copy(&sum->max_wait_time, &data->max_wait_time);
-	}
-
-	sum->n_wait_times += data->n_wait_times;
-	sum->n_spin_acquired += data->n_spin_acquired;
-
-	if (sum->max_n_thds < data->max_n_thds) {
-		sum->max_n_thds = data->max_n_thds;
-	}
-	uint32_t cur_n_waiting_thds = atomic_load_u32(&sum->n_waiting_thds,
-	    ATOMIC_RELAXED);
-	uint32_t new_n_waiting_thds = cur_n_waiting_thds + atomic_load_u32(
-	    &data->n_waiting_thds, ATOMIC_RELAXED);
-	atomic_store_u32(&sum->n_waiting_thds, new_n_waiting_thds,
-	    ATOMIC_RELAXED);
-	sum->n_owner_switches += data->n_owner_switches;
-	sum->n_lock_ops += data->n_lock_ops;
-}
-
-static inline void
-malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
-	witness_assert_not_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
-	if (isthreaded) {
-		if (malloc_mutex_trylock_final(mutex)) {
-			malloc_mutex_lock_slow(mutex);
-		}
-		mutex_owner_stats_update(tsdn, mutex);
-	}
-	witness_lock(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
-}
-
-static inline void
-malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
-	witness_unlock(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
-	if (isthreaded) {
-		MALLOC_MUTEX_UNLOCK(mutex);
-	}
-}
-
-static inline void
-malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) {
-	witness_assert_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
-}
-
-static inline void
-malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) {
-	witness_assert_not_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
-}
-
-/* Copy the prof data from mutex for processing. */
-static inline void
-malloc_mutex_prof_read(tsdn_t *tsdn, mutex_prof_data_t *data,
-    malloc_mutex_t *mutex) {
-	mutex_prof_data_t *source = &mutex->prof_data;
-	/* Can only read holding the mutex. */
-	malloc_mutex_assert_owner(tsdn, mutex);
-
-	/*
-	 * Not *really* allowed (we shouldn't be doing non-atomic loads of
-	 * atomic data), but the mutex protection makes this safe, and writing
-	 * a member-for-member copy is tedious for this situation.
-	 */
-	*data = *source;
-	/* n_wait_thds is not reported (modified w/o locking). */
-	atomic_store_u32(&data->n_waiting_thds, 0, ATOMIC_RELAXED);
-}
-
-#endif /* JEMALLOC_INTERNAL_MUTEX_INLINES_H */
diff --git a/include/jemalloc/internal/mutex_pool_inlines.h b/include/jemalloc/internal/mutex_pool_inlines.h
index bc257ea8..19b5ab4c 100644
--- a/include/jemalloc/internal/mutex_pool_inlines.h
+++ b/include/jemalloc/internal/mutex_pool_inlines.h
@@ -2,7 +2,7 @@
 #define JEMALLOC_INTERNAL_MUTEX_POOL_INLINES_H
 
 #include "jemalloc/internal/hash.h"
-#include "jemalloc/internal/mutex_inlines.h"
+#include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/mutex_pool_structs.h"
 #include "jemalloc/internal/witness.h"
 
diff --git a/include/jemalloc/internal/mutex_pool_structs.h b/include/jemalloc/internal/mutex_pool_structs.h
index a662166c..b32fb5ac 100644
--- a/include/jemalloc/internal/mutex_pool_structs.h
+++ b/include/jemalloc/internal/mutex_pool_structs.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_MUTEX_POOL_STRUCTS_H
 #define JEMALLOC_INTERNAL_MUTEX_POOL_STRUCTS_H
 
+#include "jemalloc/internal/mutex.h"
+
 /* This file really combines "structs" and "types", but only transitionally. */
 
 /* We do mod reductions by this value, so it should be kept a power of 2. */
diff --git a/include/jemalloc/internal/mutex_structs.h b/include/jemalloc/internal/mutex_structs.h
deleted file mode 100644
index c1b65522..00000000
--- a/include/jemalloc/internal/mutex_structs.h
+++ /dev/null
@@ -1,55 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_MUTEX_STRUCTS_H
-#define JEMALLOC_INTERNAL_MUTEX_STRUCTS_H
-
-#include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/mutex_prof.h"
-#include "jemalloc/internal/witness.h"
-
-struct malloc_mutex_s {
-	union {
-		struct {
-			/*
-			 * prof_data is defined first to reduce cacheline
-			 * bouncing: the data is not touched by the mutex holder
-			 * during unlocking, while might be modified by
-			 * contenders.  Having it before the mutex itself could
-			 * avoid prefetching a modified cacheline (for the
-			 * unlocking thread).
-			 */
-			mutex_prof_data_t	prof_data;
-#ifdef _WIN32
-#  if _WIN32_WINNT >= 0x0600
-			SRWLOCK         	lock;
-#  else
-			CRITICAL_SECTION	lock;
-#  endif
-#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
-			os_unfair_lock		lock;
-#elif (defined(JEMALLOC_OSSPIN))
-			OSSpinLock		lock;
-#elif (defined(JEMALLOC_MUTEX_INIT_CB))
-			pthread_mutex_t		lock;
-			malloc_mutex_t		*postponed_next;
-#else
-			pthread_mutex_t		lock;
-#endif
-		};
-		/*
-		 * We only touch witness when configured w/ debug.  However we
-		 * keep the field in a union when !debug so that we don't have
-		 * to pollute the code base with #ifdefs, while avoid paying the
-		 * memory cost.
-		 */
-#if !defined(JEMALLOC_DEBUG)
-		witness_t			witness;
-		malloc_mutex_lock_order_t	lock_order;
-#endif
-	};
-
-#if defined(JEMALLOC_DEBUG)
-	witness_t			witness;
-	malloc_mutex_lock_order_t	lock_order;
-#endif
-};
-
-#endif /* JEMALLOC_INTERNAL_MUTEX_STRUCTS_H */
diff --git a/include/jemalloc/internal/mutex_types.h b/include/jemalloc/internal/mutex_types.h
deleted file mode 100644
index 65a9938d..00000000
--- a/include/jemalloc/internal/mutex_types.h
+++ /dev/null
@@ -1,71 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_MUTEX_TYPES_H
-#define JEMALLOC_INTERNAL_MUTEX_TYPES_H
-
-typedef struct malloc_mutex_s malloc_mutex_t;
-
-typedef enum {
-	/* Can only acquire one mutex of a given witness rank at a time. */
-	malloc_mutex_rank_exclusive,
-	/*
-	 * Can acquire multiple mutexes of the same witness rank, but in
-	 * address-ascending order only.
-	 */
-	malloc_mutex_address_ordered
-} malloc_mutex_lock_order_t;
-
-/*
- * Based on benchmark results, a fixed spin with this amount of retries works
- * well for our critical sections.
- */
-#define MALLOC_MUTEX_MAX_SPIN 250
-
-#ifdef _WIN32
-#  if _WIN32_WINNT >= 0x0600
-#    define MALLOC_MUTEX_LOCK(m)    AcquireSRWLockExclusive(&(m)->lock)
-#    define MALLOC_MUTEX_UNLOCK(m)  ReleaseSRWLockExclusive(&(m)->lock)
-#    define MALLOC_MUTEX_TRYLOCK(m) (!TryAcquireSRWLockExclusive(&(m)->lock))
-#  else
-#    define MALLOC_MUTEX_LOCK(m)    EnterCriticalSection(&(m)->lock)
-#    define MALLOC_MUTEX_UNLOCK(m)  LeaveCriticalSection(&(m)->lock)
-#    define MALLOC_MUTEX_TRYLOCK(m) (!TryEnterCriticalSection(&(m)->lock))
-#  endif
-#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
-#    define MALLOC_MUTEX_LOCK(m)    os_unfair_lock_lock(&(m)->lock)
-#    define MALLOC_MUTEX_UNLOCK(m)  os_unfair_lock_unlock(&(m)->lock)
-#    define MALLOC_MUTEX_TRYLOCK(m) (!os_unfair_lock_trylock(&(m)->lock))
-#elif (defined(JEMALLOC_OSSPIN))
-#    define MALLOC_MUTEX_LOCK(m)    OSSpinLockLock(&(m)->lock)
-#    define MALLOC_MUTEX_UNLOCK(m)  OSSpinLockUnlock(&(m)->lock)
-#    define MALLOC_MUTEX_TRYLOCK(m) (!OSSpinLockTry(&(m)->lock))
-#else
-#    define MALLOC_MUTEX_LOCK(m)    pthread_mutex_lock(&(m)->lock)
-#    define MALLOC_MUTEX_UNLOCK(m)  pthread_mutex_unlock(&(m)->lock)
-#    define MALLOC_MUTEX_TRYLOCK(m) (pthread_mutex_trylock(&(m)->lock) != 0)
-#endif
-
-#define LOCK_PROF_DATA_INITIALIZER					\
-    {NSTIME_ZERO_INITIALIZER, NSTIME_ZERO_INITIALIZER, 0, 0, 0,		\
-	    ATOMIC_INIT(0), 0, NULL, 0}
-
-#ifdef _WIN32
-#  define MALLOC_MUTEX_INITIALIZER
-#elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
-#  define MALLOC_MUTEX_INITIALIZER					\
-     {{{LOCK_PROF_DATA_INITIALIZER, OS_UNFAIR_LOCK_INIT}},		\
-      WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
-#elif (defined(JEMALLOC_OSSPIN))
-#  define MALLOC_MUTEX_INITIALIZER					\
-     {{{LOCK_PROF_DATA_INITIALIZER, 0}},				\
-      WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
-#elif (defined(JEMALLOC_MUTEX_INIT_CB))
-#  define MALLOC_MUTEX_INITIALIZER					\
-     {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, NULL}},	\
-      WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
-#else
-#    define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT
-#    define MALLOC_MUTEX_INITIALIZER					\
-       {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER}},	\
-        WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
-#endif
-
-#endif /* JEMALLOC_INTERNAL_MUTEX_TYPES_H */
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 2891b8bd..04348696 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_PROF_EXTERNS_H
 #define JEMALLOC_INTERNAL_PROF_EXTERNS_H
 
+#include "jemalloc/internal/mutex.h"
+
 extern malloc_mutex_t	bt2gctx_mtx;
 
 extern bool	opt_prof;
diff --git a/include/jemalloc/internal/prof_inlines_a.h b/include/jemalloc/internal/prof_inlines_a.h
index 6203cbd9..eda6839a 100644
--- a/include/jemalloc/internal/prof_inlines_a.h
+++ b/include/jemalloc/internal/prof_inlines_a.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_PROF_INLINES_A_H
 #define JEMALLOC_INTERNAL_PROF_INLINES_A_H
 
+#include "jemalloc/internal/mutex.h"
+
 static inline bool
 prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum, uint64_t accumbytes) {
 	cassert(config_prof);
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index a26a0420..0d58ae10 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_PROF_STRUCTS_H
 
 #include "jemalloc/internal/ckh.h"
+#include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/rb.h"
 
diff --git a/include/jemalloc/internal/rtree_structs.h b/include/jemalloc/internal/rtree_structs.h
index ba0f96d0..a02a1f60 100644
--- a/include/jemalloc/internal/rtree_structs.h
+++ b/include/jemalloc/internal/rtree_structs.h
@@ -2,7 +2,7 @@
 #define JEMALLOC_INTERNAL_RTREE_STRUCTS_H
 
 #include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/mutex_pool_structs.h"
+#include "jemalloc/internal/mutex.h"
 
 struct rtree_node_elm_s {
 	atomic_p_t	child; /* (rtree_{node,leaf}_elm_t *) */
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index 3f5c20c7..47ca4f9e 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -3,9 +3,9 @@
 
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/mutex_prof.h"
-#include "jemalloc/internal/mutex_types.h"
-#include "jemalloc/internal/mutex_structs.h"
+#include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/stats_tsd.h"
 
 /* The opt.stats_print storage. */
 extern bool opt_stats_print;
@@ -26,14 +26,6 @@ typedef atomic_u64_t arena_stats_u64_t;
 typedef uint64_t arena_stats_u64_t;
 #endif
 
-typedef struct tcache_bin_stats_s {
-	/*
-	 * Number of allocation requests that corresponded to the size of this
-	 * bin.
-	 */
-	uint64_t	nrequests;
-} tcache_bin_stats_t;
-
 typedef struct malloc_bin_stats_s {
 	/*
 	 * Total number of allocation/deallocation requests served directly by
diff --git a/include/jemalloc/internal/stats_tsd.h b/include/jemalloc/internal/stats_tsd.h
new file mode 100644
index 00000000..d0c3bbe4
--- /dev/null
+++ b/include/jemalloc/internal/stats_tsd.h
@@ -0,0 +1,12 @@
+#ifndef JEMALLOC_INTERNAL_STATS_TSD_H
+#define JEMALLOC_INTERNAL_STATS_TSD_H
+
+typedef struct tcache_bin_stats_s {
+	/*
+	 * Number of allocation requests that corresponded to the size of this
+	 * bin.
+	 */
+	uint64_t	nrequests;
+} tcache_bin_stats_t;
+
+#endif /* JEMALLOC_INTERNAL_STATS_TSD_H */
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index 7c0afb0a..7eb516fb 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -3,7 +3,7 @@
 
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/size_classes.h"
-#include "jemalloc/internal/stats.h"
+#include "jemalloc/internal/stats_tsd.h"
 #include "jemalloc/internal/ticker.h"
 
 /*
diff --git a/include/jemalloc/internal/tsd_generic.h b/include/jemalloc/internal/tsd_generic.h
index d59cb743..1e52ef76 100644
--- a/include/jemalloc/internal/tsd_generic.h
+++ b/include/jemalloc/internal/tsd_generic.h
@@ -10,11 +10,8 @@ struct tsd_init_block_s {
 	void *data;
 };
 
+/* Defined in tsd.c, to allow the mutex headers to have tsd dependencies. */
 typedef struct tsd_init_head_s tsd_init_head_t;
-struct tsd_init_head_s {
-	ql_head(tsd_init_block_t) blocks;
-	malloc_mutex_t lock;
-};
 
 typedef struct {
 	bool initialized;
diff --git a/src/arena.c b/src/arena.c
index 35b4e5a7..bc8fd28f 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -3,6 +3,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/util.h"
 
diff --git a/src/base.c b/src/base.c
index dd4b109d..892c28dd 100644
--- a/src/base.c
+++ b/src/base.c
@@ -3,6 +3,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/mutex.h"
 
 /******************************************************************************/
 /* Data. */
diff --git a/src/ctl.c b/src/ctl.c
index da5e1710..30704edd 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -4,6 +4,7 @@
 
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/util.h"
diff --git a/src/extent.c b/src/extent.c
index ff09f7fb..fee8198e 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -4,6 +4,7 @@
 
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/ph.h"
+#include "jemalloc/internal/mutex.h"
 
 /******************************************************************************/
 /* Data. */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index f083adc4..517fbb99 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -7,6 +7,7 @@
 #include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/malloc_io.h"
+#include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/spin.h"
 #include "jemalloc/internal/ticker.h"
diff --git a/src/large.c b/src/large.c
index ed73dc22..55ee3524 100644
--- a/src/large.c
+++ b/src/large.c
@@ -3,6 +3,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/util.h"
 
 /******************************************************************************/
diff --git a/src/mutex_pool.c b/src/mutex_pool.c
index 004d6d0f..95a45736 100644
--- a/src/mutex_pool.c
+++ b/src/mutex_pool.c
@@ -3,6 +3,8 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/mutex.h"
+
 bool
 mutex_pool_init(mutex_pool_t *pool, const char *name, witness_rank_t rank) {
 	for (int i = 0; i < MUTEX_POOL_SIZE; ++i) {
diff --git a/src/prof.c b/src/prof.c
index 18978810..639e5983 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -6,6 +6,7 @@
 #include "jemalloc/internal/ckh.h"
 #include "jemalloc/internal/hash.h"
 #include "jemalloc/internal/malloc_io.h"
+#include "jemalloc/internal/mutex.h"
 
 /******************************************************************************/
 
diff --git a/src/rtree.c b/src/rtree.c
index 637853c7..53702cf7 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -3,6 +3,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/mutex.h"
 
 /*
  * Only the most significant bits of keys passed to rtree_{read,write}() are
diff --git a/src/stats.c b/src/stats.c
index fd108162..b67d46dc 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -4,6 +4,7 @@
 
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/mutex_prof.h"
 
 const char *global_mutex_names[mutex_prof_num_global_mutexes] = {
diff --git a/src/tcache.c b/src/tcache.c
index 4bb2fb86..96ebe677 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -3,6 +3,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/size_classes.h"
 
 /******************************************************************************/
diff --git a/src/tsd.c b/src/tsd.c
index 801d8127..525432b6 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -3,6 +3,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/mutex.h"
 
 /******************************************************************************/
 /* Data. */
@@ -23,6 +24,17 @@ DWORD tsd_tsd;
 tsd_wrapper_t tsd_boot_wrapper = {false, TSD_INITIALIZER};
 bool tsd_booted = false;
 #else
+
+/*
+ * This contains a mutex, but it's pretty convenient to allow the mutex code to
+ * have a dependency on tsd.  So we define the struct here, and only refer to it
+ * by pointer in the header.
+ */
+struct tsd_init_head_s {
+	ql_head(tsd_init_block_t) blocks;
+	malloc_mutex_t lock;
+};
+
 pthread_key_t tsd_tsd;
 tsd_init_head_t	tsd_init_head = {
 	ql_head_initializer(blocks),

From 67c93c332aa5597e1331e20ac06dcfda74e60574 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 24 May 2017 11:33:06 -0700
Subject: [PATCH 0884/2608] Refactor run_tests to increase parallelism.

Rather than relying on parallel make to build individual configurations
one at a time, use xargs to build multiple configurations in parallel.
This allows the configure scripts to run in parallel.  On a 14-core
system (28 hyperthreads), this increases average CPU utilization from
~20% to ~90%.
---
 scripts/gen_run_tests.py | 58 +++++++++++++++++++++++++++++++---------
 1 file changed, 45 insertions(+), 13 deletions(-)

diff --git a/scripts/gen_run_tests.py b/scripts/gen_run_tests.py
index 875c6d03..f9b00604 100755
--- a/scripts/gen_run_tests.py
+++ b/scripts/gen_run_tests.py
@@ -1,6 +1,12 @@
 #!/usr/bin/env python
 
 from itertools import combinations
+from os import uname
+from multiprocessing import cpu_count
+
+nparallel = cpu_count() * 2
+
+uname = uname()[0]
 
 def powerset(items):
     result = []
@@ -8,8 +14,6 @@ def powerset(items):
         result += combinations(items, i)
     return result
 
-MAKE_J_VAL = 32
-
 possible_compilers = [('gcc', 'g++'), ('clang', 'clang++')]
 possible_compiler_opts = [
     '-m32',
@@ -28,8 +32,11 @@ possible_malloc_conf_opts = [
 
 print 'set -e'
 print 'autoconf'
-print 'unamestr=`uname`'
+print 'rm -rf run_tests.out'
+print 'mkdir run_tests.out'
+print 'cd run_tests.out'
 
+ind = 0
 for cc, cxx in possible_compilers:
     for compiler_opts in powerset(possible_compiler_opts):
         for config_opts in powerset(possible_config_opts):
@@ -39,9 +46,10 @@ for cc, cxx in possible_compilers:
                   and '--enable-prof' in config_opts:
                     continue
                 config_line = (
-                    'EXTRA_CFLAGS=-Werror EXTRA_CXXFLAGS=-Werror ./configure '
+                    'EXTRA_CFLAGS=-Werror EXTRA_CXXFLAGS=-Werror '
                     + 'CC="{} {}" '.format(cc, " ".join(compiler_opts))
                     + 'CXX="{} {}" '.format(cxx, " ".join(compiler_opts))
+                    + '../../configure '
                     + " ".join(config_opts) + (' --with-malloc-conf=' +
                     ",".join(malloc_conf_opts) if len(malloc_conf_opts) > 0
                     else '')
@@ -52,14 +60,38 @@ for cc, cxx in possible_compilers:
                 # Heap profiling and dss are not supported on OS X.
                 darwin_unsupported = ('--enable-prof' in config_opts or \
                   'dss:primary' in malloc_conf_opts)
-                if linux_supported:
-                    print 'if [[ "$unamestr" = "Linux" ]]; then'
-                elif darwin_unsupported:
-                    print 'if [[ "$unamestr" != "Darwin" ]]; then'
+                if uname is 'Linux' and linux_supported \
+                  or uname is not 'Darwin' \
+                  or not darwin_unsupported:
+                    print """cat <<EOF > run_test_%(ind)d.sh
+#!/bin/sh
 
-                print config_line
-                print "make clean"
-                print "make -j" + str(MAKE_J_VAL) + " check"
+set -e
 
-                if linux_supported or darwin_unsupported:
-                    print 'fi'
+abort() {
+    echo "==> Error" >> run_test.log
+    echo "Error; see run_tests.out/run_test_%(ind)d.out/run_test.log"
+    exit 255 # Special exit code tells xargs to terminate.
+}
+
+# Environment variables are not supported.
+run_cmd() {
+    echo "==> \$@" >> run_test.log
+    \$@ >> run_test.log 2>&1 || abort
+}
+
+echo "=> run_test_%(ind)d: %(config_line)s"
+mkdir run_test_%(ind)d.out
+cd run_test_%(ind)d.out
+
+echo "==> %(config_line)s" >> run_test.log
+%(config_line)s >> run_test.log 2>&1 || abort
+
+run_cmd make all tests
+run_cmd make check
+run_cmd make distclean
+EOF
+chmod 755 run_test_%(ind)d.sh""" % {'ind': ind, 'config_line': config_line}
+                    ind += 1
+
+print 'for i in `seq 0 %(last_ind)d` ; do echo run_test_${i}.sh ; done | xargs -P %(nparallel)d -n 1 sh' % {'last_ind': ind-1, 'nparallel': nparallel}

From 1df18d7c83bdb1995c088c85818733f164ed0595 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 24 May 2017 12:44:53 -0700
Subject: [PATCH 0885/2608] Fix stats.mapped during deallocation.

---
 src/arena.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arena.c b/src/arena.c
index bc8fd28f..fcbd57e1 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -968,7 +968,7 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
 		arena_stats_add_u64(tsdn, &arena->stats, &decay->stats->purged,
 		    npurged);
 		arena_stats_sub_zu(tsdn, &arena->stats, &arena->stats.mapped,
-		    nunmapped);
+		    nunmapped << LG_PAGE);
 		arena_stats_unlock(tsdn, &arena->stats);
 	}
 

From 927239b910310f95aebb1f0ffc6fda53f93c8b7e Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 25 May 2017 13:33:34 -0700
Subject: [PATCH 0886/2608] Cleanup smoothstep.sh / .h.

h_step_sum was used to compute moving sum.  Not in use anymore.
---
 include/jemalloc/internal/smoothstep.h  | 402 ++++++++++++------------
 include/jemalloc/internal/smoothstep.sh |   6 +-
 src/arena.c                             |   2 +-
 test/unit/smoothstep.c                  |   2 +-
 4 files changed, 205 insertions(+), 207 deletions(-)

diff --git a/include/jemalloc/internal/smoothstep.h b/include/jemalloc/internal/smoothstep.h
index 5bca6e8c..2e14430f 100644
--- a/include/jemalloc/internal/smoothstep.h
+++ b/include/jemalloc/internal/smoothstep.h
@@ -27,206 +27,206 @@
 #define SMOOTHSTEP_NSTEPS	200
 #define SMOOTHSTEP_BFP		24
 #define SMOOTHSTEP \
- /* STEP(step, h,                            x,     y,                 h_sum) */ \
-    STEP(   1, UINT64_C(0x0000000000000014), 0.005, 0.000001240643750, UINT64_C(0x0000000000000014)) \
-    STEP(   2, UINT64_C(0x00000000000000a5), 0.010, 0.000009850600000, UINT64_C(0x00000000000000b9)) \
-    STEP(   3, UINT64_C(0x0000000000000229), 0.015, 0.000032995181250, UINT64_C(0x00000000000002e2)) \
-    STEP(   4, UINT64_C(0x0000000000000516), 0.020, 0.000077619200000, UINT64_C(0x00000000000007f8)) \
-    STEP(   5, UINT64_C(0x00000000000009dc), 0.025, 0.000150449218750, UINT64_C(0x00000000000011d4)) \
-    STEP(   6, UINT64_C(0x00000000000010e8), 0.030, 0.000257995800000, UINT64_C(0x00000000000022bc)) \
-    STEP(   7, UINT64_C(0x0000000000001aa4), 0.035, 0.000406555756250, UINT64_C(0x0000000000003d60)) \
-    STEP(   8, UINT64_C(0x0000000000002777), 0.040, 0.000602214400000, UINT64_C(0x00000000000064d7)) \
-    STEP(   9, UINT64_C(0x00000000000037c2), 0.045, 0.000850847793750, UINT64_C(0x0000000000009c99)) \
-    STEP(  10, UINT64_C(0x0000000000004be6), 0.050, 0.001158125000000, UINT64_C(0x000000000000e87f)) \
-    STEP(  11, UINT64_C(0x000000000000643c), 0.055, 0.001529510331250, UINT64_C(0x0000000000014cbb)) \
-    STEP(  12, UINT64_C(0x000000000000811f), 0.060, 0.001970265600000, UINT64_C(0x000000000001cdda)) \
-    STEP(  13, UINT64_C(0x000000000000a2e2), 0.065, 0.002485452368750, UINT64_C(0x00000000000270bc)) \
-    STEP(  14, UINT64_C(0x000000000000c9d8), 0.070, 0.003079934200000, UINT64_C(0x0000000000033a94)) \
-    STEP(  15, UINT64_C(0x000000000000f64f), 0.075, 0.003758378906250, UINT64_C(0x00000000000430e3)) \
-    STEP(  16, UINT64_C(0x0000000000012891), 0.080, 0.004525260800000, UINT64_C(0x0000000000055974)) \
-    STEP(  17, UINT64_C(0x00000000000160e7), 0.085, 0.005384862943750, UINT64_C(0x000000000006ba5b)) \
-    STEP(  18, UINT64_C(0x0000000000019f95), 0.090, 0.006341279400000, UINT64_C(0x00000000000859f0)) \
-    STEP(  19, UINT64_C(0x000000000001e4dc), 0.095, 0.007398417481250, UINT64_C(0x00000000000a3ecc)) \
-    STEP(  20, UINT64_C(0x00000000000230fc), 0.100, 0.008560000000000, UINT64_C(0x00000000000c6fc8)) \
-    STEP(  21, UINT64_C(0x0000000000028430), 0.105, 0.009829567518750, UINT64_C(0x00000000000ef3f8)) \
-    STEP(  22, UINT64_C(0x000000000002deb0), 0.110, 0.011210480600000, UINT64_C(0x000000000011d2a8)) \
-    STEP(  23, UINT64_C(0x00000000000340b1), 0.115, 0.012705922056250, UINT64_C(0x0000000000151359)) \
-    STEP(  24, UINT64_C(0x000000000003aa67), 0.120, 0.014318899200000, UINT64_C(0x000000000018bdc0)) \
-    STEP(  25, UINT64_C(0x0000000000041c00), 0.125, 0.016052246093750, UINT64_C(0x00000000001cd9c0)) \
-    STEP(  26, UINT64_C(0x00000000000495a8), 0.130, 0.017908625800000, UINT64_C(0x0000000000216f68)) \
-    STEP(  27, UINT64_C(0x000000000005178b), 0.135, 0.019890532631250, UINT64_C(0x00000000002686f3)) \
-    STEP(  28, UINT64_C(0x000000000005a1cf), 0.140, 0.022000294400000, UINT64_C(0x00000000002c28c2)) \
-    STEP(  29, UINT64_C(0x0000000000063498), 0.145, 0.024240074668750, UINT64_C(0x0000000000325d5a)) \
-    STEP(  30, UINT64_C(0x000000000006d009), 0.150, 0.026611875000000, UINT64_C(0x0000000000392d63)) \
-    STEP(  31, UINT64_C(0x000000000007743f), 0.155, 0.029117537206250, UINT64_C(0x000000000040a1a2)) \
-    STEP(  32, UINT64_C(0x0000000000082157), 0.160, 0.031758745600000, UINT64_C(0x000000000048c2f9)) \
-    STEP(  33, UINT64_C(0x000000000008d76b), 0.165, 0.034537029243750, UINT64_C(0x0000000000519a64)) \
-    STEP(  34, UINT64_C(0x0000000000099691), 0.170, 0.037453764200000, UINT64_C(0x00000000005b30f5)) \
-    STEP(  35, UINT64_C(0x00000000000a5edf), 0.175, 0.040510175781250, UINT64_C(0x0000000000658fd4)) \
-    STEP(  36, UINT64_C(0x00000000000b3067), 0.180, 0.043707340800000, UINT64_C(0x000000000070c03b)) \
-    STEP(  37, UINT64_C(0x00000000000c0b38), 0.185, 0.047046189818750, UINT64_C(0x00000000007ccb73)) \
-    STEP(  38, UINT64_C(0x00000000000cef5e), 0.190, 0.050527509400000, UINT64_C(0x000000000089bad1)) \
-    STEP(  39, UINT64_C(0x00000000000ddce6), 0.195, 0.054151944356250, UINT64_C(0x00000000009797b7)) \
-    STEP(  40, UINT64_C(0x00000000000ed3d8), 0.200, 0.057920000000000, UINT64_C(0x0000000000a66b8f)) \
-    STEP(  41, UINT64_C(0x00000000000fd439), 0.205, 0.061832044393750, UINT64_C(0x0000000000b63fc8)) \
-    STEP(  42, UINT64_C(0x000000000010de0e), 0.210, 0.065888310600000, UINT64_C(0x0000000000c71dd6)) \
-    STEP(  43, UINT64_C(0x000000000011f158), 0.215, 0.070088898931250, UINT64_C(0x0000000000d90f2e)) \
-    STEP(  44, UINT64_C(0x0000000000130e17), 0.220, 0.074433779200000, UINT64_C(0x0000000000ec1d45)) \
-    STEP(  45, UINT64_C(0x0000000000143448), 0.225, 0.078922792968750, UINT64_C(0x000000000100518d)) \
-    STEP(  46, UINT64_C(0x00000000001563e7), 0.230, 0.083555655800000, UINT64_C(0x000000000115b574)) \
-    STEP(  47, UINT64_C(0x0000000000169cec), 0.235, 0.088331959506250, UINT64_C(0x00000000012c5260)) \
-    STEP(  48, UINT64_C(0x000000000017df4f), 0.240, 0.093251174400000, UINT64_C(0x00000000014431af)) \
-    STEP(  49, UINT64_C(0x0000000000192b04), 0.245, 0.098312651543750, UINT64_C(0x00000000015d5cb3)) \
-    STEP(  50, UINT64_C(0x00000000001a8000), 0.250, 0.103515625000000, UINT64_C(0x000000000177dcb3)) \
-    STEP(  51, UINT64_C(0x00000000001bde32), 0.255, 0.108859214081250, UINT64_C(0x000000000193bae5)) \
-    STEP(  52, UINT64_C(0x00000000001d458b), 0.260, 0.114342425600000, UINT64_C(0x0000000001b10070)) \
-    STEP(  53, UINT64_C(0x00000000001eb5f8), 0.265, 0.119964156118750, UINT64_C(0x0000000001cfb668)) \
-    STEP(  54, UINT64_C(0x0000000000202f65), 0.270, 0.125723194200000, UINT64_C(0x0000000001efe5cd)) \
-    STEP(  55, UINT64_C(0x000000000021b1bb), 0.275, 0.131618222656250, UINT64_C(0x0000000002119788)) \
-    STEP(  56, UINT64_C(0x0000000000233ce3), 0.280, 0.137647820800000, UINT64_C(0x000000000234d46b)) \
-    STEP(  57, UINT64_C(0x000000000024d0c3), 0.285, 0.143810466693750, UINT64_C(0x000000000259a52e)) \
-    STEP(  58, UINT64_C(0x0000000000266d40), 0.290, 0.150104539400000, UINT64_C(0x000000000280126e)) \
-    STEP(  59, UINT64_C(0x000000000028123d), 0.295, 0.156528321231250, UINT64_C(0x0000000002a824ab)) \
-    STEP(  60, UINT64_C(0x000000000029bf9c), 0.300, 0.163080000000000, UINT64_C(0x0000000002d1e447)) \
-    STEP(  61, UINT64_C(0x00000000002b753d), 0.305, 0.169757671268750, UINT64_C(0x0000000002fd5984)) \
-    STEP(  62, UINT64_C(0x00000000002d32fe), 0.310, 0.176559340600000, UINT64_C(0x00000000032a8c82)) \
-    STEP(  63, UINT64_C(0x00000000002ef8bc), 0.315, 0.183482925806250, UINT64_C(0x000000000359853e)) \
-    STEP(  64, UINT64_C(0x000000000030c654), 0.320, 0.190526259200000, UINT64_C(0x00000000038a4b92)) \
-    STEP(  65, UINT64_C(0x0000000000329b9f), 0.325, 0.197687089843750, UINT64_C(0x0000000003bce731)) \
-    STEP(  66, UINT64_C(0x0000000000347875), 0.330, 0.204963085800000, UINT64_C(0x0000000003f15fa6)) \
-    STEP(  67, UINT64_C(0x0000000000365cb0), 0.335, 0.212351836381250, UINT64_C(0x000000000427bc56)) \
-    STEP(  68, UINT64_C(0x0000000000384825), 0.340, 0.219850854400000, UINT64_C(0x000000000460047b)) \
-    STEP(  69, UINT64_C(0x00000000003a3aa8), 0.345, 0.227457578418750, UINT64_C(0x00000000049a3f23)) \
-    STEP(  70, UINT64_C(0x00000000003c340f), 0.350, 0.235169375000000, UINT64_C(0x0000000004d67332)) \
-    STEP(  71, UINT64_C(0x00000000003e342b), 0.355, 0.242983540956250, UINT64_C(0x000000000514a75d)) \
-    STEP(  72, UINT64_C(0x0000000000403ace), 0.360, 0.250897305600000, UINT64_C(0x000000000554e22b)) \
-    STEP(  73, UINT64_C(0x00000000004247c8), 0.365, 0.258907832993750, UINT64_C(0x00000000059729f3)) \
-    STEP(  74, UINT64_C(0x0000000000445ae9), 0.370, 0.267012224200000, UINT64_C(0x0000000005db84dc)) \
-    STEP(  75, UINT64_C(0x0000000000467400), 0.375, 0.275207519531250, UINT64_C(0x000000000621f8dc)) \
-    STEP(  76, UINT64_C(0x00000000004892d8), 0.380, 0.283490700800000, UINT64_C(0x00000000066a8bb4)) \
-    STEP(  77, UINT64_C(0x00000000004ab740), 0.385, 0.291858693568750, UINT64_C(0x0000000006b542f4)) \
-    STEP(  78, UINT64_C(0x00000000004ce102), 0.390, 0.300308369400000, UINT64_C(0x00000000070223f6)) \
-    STEP(  79, UINT64_C(0x00000000004f0fe9), 0.395, 0.308836548106250, UINT64_C(0x00000000075133df)) \
-    STEP(  80, UINT64_C(0x00000000005143bf), 0.400, 0.317440000000000, UINT64_C(0x0000000007a2779e)) \
-    STEP(  81, UINT64_C(0x0000000000537c4d), 0.405, 0.326115448143750, UINT64_C(0x0000000007f5f3eb)) \
-    STEP(  82, UINT64_C(0x000000000055b95b), 0.410, 0.334859570600000, UINT64_C(0x00000000084bad46)) \
-    STEP(  83, UINT64_C(0x000000000057fab1), 0.415, 0.343669002681250, UINT64_C(0x0000000008a3a7f7)) \
-    STEP(  84, UINT64_C(0x00000000005a4015), 0.420, 0.352540339200000, UINT64_C(0x0000000008fde80c)) \
-    STEP(  85, UINT64_C(0x00000000005c894e), 0.425, 0.361470136718750, UINT64_C(0x00000000095a715a)) \
-    STEP(  86, UINT64_C(0x00000000005ed622), 0.430, 0.370454915800000, UINT64_C(0x0000000009b9477c)) \
-    STEP(  87, UINT64_C(0x0000000000612655), 0.435, 0.379491163256250, UINT64_C(0x000000000a1a6dd1)) \
-    STEP(  88, UINT64_C(0x00000000006379ac), 0.440, 0.388575334400000, UINT64_C(0x000000000a7de77d)) \
-    STEP(  89, UINT64_C(0x000000000065cfeb), 0.445, 0.397703855293750, UINT64_C(0x000000000ae3b768)) \
-    STEP(  90, UINT64_C(0x00000000006828d6), 0.450, 0.406873125000000, UINT64_C(0x000000000b4be03e)) \
-    STEP(  91, UINT64_C(0x00000000006a842f), 0.455, 0.416079517831250, UINT64_C(0x000000000bb6646d)) \
-    STEP(  92, UINT64_C(0x00000000006ce1bb), 0.460, 0.425319385600000, UINT64_C(0x000000000c234628)) \
-    STEP(  93, UINT64_C(0x00000000006f413a), 0.465, 0.434589059868750, UINT64_C(0x000000000c928762)) \
-    STEP(  94, UINT64_C(0x000000000071a270), 0.470, 0.443884854200000, UINT64_C(0x000000000d0429d2)) \
-    STEP(  95, UINT64_C(0x000000000074051d), 0.475, 0.453203066406250, UINT64_C(0x000000000d782eef)) \
-    STEP(  96, UINT64_C(0x0000000000766905), 0.480, 0.462539980800000, UINT64_C(0x000000000dee97f4)) \
-    STEP(  97, UINT64_C(0x000000000078cde7), 0.485, 0.471891870443750, UINT64_C(0x000000000e6765db)) \
-    STEP(  98, UINT64_C(0x00000000007b3387), 0.490, 0.481254999400000, UINT64_C(0x000000000ee29962)) \
-    STEP(  99, UINT64_C(0x00000000007d99a4), 0.495, 0.490625624981250, UINT64_C(0x000000000f603306)) \
-    STEP( 100, UINT64_C(0x0000000000800000), 0.500, 0.500000000000000, UINT64_C(0x000000000fe03306)) \
-    STEP( 101, UINT64_C(0x000000000082665b), 0.505, 0.509374375018750, UINT64_C(0x0000000010629961)) \
-    STEP( 102, UINT64_C(0x000000000084cc78), 0.510, 0.518745000600000, UINT64_C(0x0000000010e765d9)) \
-    STEP( 103, UINT64_C(0x0000000000873218), 0.515, 0.528108129556250, UINT64_C(0x00000000116e97f1)) \
-    STEP( 104, UINT64_C(0x00000000008996fa), 0.520, 0.537460019200000, UINT64_C(0x0000000011f82eeb)) \
-    STEP( 105, UINT64_C(0x00000000008bfae2), 0.525, 0.546796933593750, UINT64_C(0x00000000128429cd)) \
-    STEP( 106, UINT64_C(0x00000000008e5d8f), 0.530, 0.556115145800000, UINT64_C(0x000000001312875c)) \
-    STEP( 107, UINT64_C(0x000000000090bec5), 0.535, 0.565410940131250, UINT64_C(0x0000000013a34621)) \
-    STEP( 108, UINT64_C(0x0000000000931e44), 0.540, 0.574680614400000, UINT64_C(0x0000000014366465)) \
-    STEP( 109, UINT64_C(0x0000000000957bd0), 0.545, 0.583920482168750, UINT64_C(0x0000000014cbe035)) \
-    STEP( 110, UINT64_C(0x000000000097d729), 0.550, 0.593126875000000, UINT64_C(0x000000001563b75e)) \
-    STEP( 111, UINT64_C(0x00000000009a3014), 0.555, 0.602296144706250, UINT64_C(0x0000000015fde772)) \
-    STEP( 112, UINT64_C(0x00000000009c8653), 0.560, 0.611424665600000, UINT64_C(0x00000000169a6dc5)) \
-    STEP( 113, UINT64_C(0x00000000009ed9aa), 0.565, 0.620508836743750, UINT64_C(0x000000001739476f)) \
-    STEP( 114, UINT64_C(0x0000000000a129dd), 0.570, 0.629545084200000, UINT64_C(0x0000000017da714c)) \
-    STEP( 115, UINT64_C(0x0000000000a376b1), 0.575, 0.638529863281250, UINT64_C(0x00000000187de7fd)) \
-    STEP( 116, UINT64_C(0x0000000000a5bfea), 0.580, 0.647459660800000, UINT64_C(0x000000001923a7e7)) \
-    STEP( 117, UINT64_C(0x0000000000a8054e), 0.585, 0.656330997318750, UINT64_C(0x0000000019cbad35)) \
-    STEP( 118, UINT64_C(0x0000000000aa46a4), 0.590, 0.665140429400000, UINT64_C(0x000000001a75f3d9)) \
-    STEP( 119, UINT64_C(0x0000000000ac83b2), 0.595, 0.673884551856250, UINT64_C(0x000000001b22778b)) \
-    STEP( 120, UINT64_C(0x0000000000aebc40), 0.600, 0.682560000000000, UINT64_C(0x000000001bd133cb)) \
-    STEP( 121, UINT64_C(0x0000000000b0f016), 0.605, 0.691163451893750, UINT64_C(0x000000001c8223e1)) \
-    STEP( 122, UINT64_C(0x0000000000b31efd), 0.610, 0.699691630600000, UINT64_C(0x000000001d3542de)) \
-    STEP( 123, UINT64_C(0x0000000000b548bf), 0.615, 0.708141306431250, UINT64_C(0x000000001dea8b9d)) \
-    STEP( 124, UINT64_C(0x0000000000b76d27), 0.620, 0.716509299200000, UINT64_C(0x000000001ea1f8c4)) \
-    STEP( 125, UINT64_C(0x0000000000b98c00), 0.625, 0.724792480468750, UINT64_C(0x000000001f5b84c4)) \
-    STEP( 126, UINT64_C(0x0000000000bba516), 0.630, 0.732987775800000, UINT64_C(0x00000000201729da)) \
-    STEP( 127, UINT64_C(0x0000000000bdb837), 0.635, 0.741092167006250, UINT64_C(0x0000000020d4e211)) \
-    STEP( 128, UINT64_C(0x0000000000bfc531), 0.640, 0.749102694400000, UINT64_C(0x000000002194a742)) \
-    STEP( 129, UINT64_C(0x0000000000c1cbd4), 0.645, 0.757016459043750, UINT64_C(0x0000000022567316)) \
-    STEP( 130, UINT64_C(0x0000000000c3cbf0), 0.650, 0.764830625000000, UINT64_C(0x00000000231a3f06)) \
-    STEP( 131, UINT64_C(0x0000000000c5c557), 0.655, 0.772542421581250, UINT64_C(0x0000000023e0045d)) \
-    STEP( 132, UINT64_C(0x0000000000c7b7da), 0.660, 0.780149145600000, UINT64_C(0x0000000024a7bc37)) \
-    STEP( 133, UINT64_C(0x0000000000c9a34f), 0.665, 0.787648163618750, UINT64_C(0x0000000025715f86)) \
-    STEP( 134, UINT64_C(0x0000000000cb878a), 0.670, 0.795036914200000, UINT64_C(0x00000000263ce710)) \
-    STEP( 135, UINT64_C(0x0000000000cd6460), 0.675, 0.802312910156250, UINT64_C(0x00000000270a4b70)) \
-    STEP( 136, UINT64_C(0x0000000000cf39ab), 0.680, 0.809473740800000, UINT64_C(0x0000000027d9851b)) \
-    STEP( 137, UINT64_C(0x0000000000d10743), 0.685, 0.816517074193750, UINT64_C(0x0000000028aa8c5e)) \
-    STEP( 138, UINT64_C(0x0000000000d2cd01), 0.690, 0.823440659400000, UINT64_C(0x00000000297d595f)) \
-    STEP( 139, UINT64_C(0x0000000000d48ac2), 0.695, 0.830242328731250, UINT64_C(0x000000002a51e421)) \
-    STEP( 140, UINT64_C(0x0000000000d64063), 0.700, 0.836920000000000, UINT64_C(0x000000002b282484)) \
-    STEP( 141, UINT64_C(0x0000000000d7edc2), 0.705, 0.843471678768750, UINT64_C(0x000000002c001246)) \
-    STEP( 142, UINT64_C(0x0000000000d992bf), 0.710, 0.849895460600000, UINT64_C(0x000000002cd9a505)) \
-    STEP( 143, UINT64_C(0x0000000000db2f3c), 0.715, 0.856189533306250, UINT64_C(0x000000002db4d441)) \
-    STEP( 144, UINT64_C(0x0000000000dcc31c), 0.720, 0.862352179200000, UINT64_C(0x000000002e91975d)) \
-    STEP( 145, UINT64_C(0x0000000000de4e44), 0.725, 0.868381777343750, UINT64_C(0x000000002f6fe5a1)) \
-    STEP( 146, UINT64_C(0x0000000000dfd09a), 0.730, 0.874276805800000, UINT64_C(0x00000000304fb63b)) \
-    STEP( 147, UINT64_C(0x0000000000e14a07), 0.735, 0.880035843881250, UINT64_C(0x0000000031310042)) \
-    STEP( 148, UINT64_C(0x0000000000e2ba74), 0.740, 0.885657574400000, UINT64_C(0x000000003213bab6)) \
-    STEP( 149, UINT64_C(0x0000000000e421cd), 0.745, 0.891140785918750, UINT64_C(0x0000000032f7dc83)) \
-    STEP( 150, UINT64_C(0x0000000000e58000), 0.750, 0.896484375000000, UINT64_C(0x0000000033dd5c83)) \
-    STEP( 151, UINT64_C(0x0000000000e6d4fb), 0.755, 0.901687348456250, UINT64_C(0x0000000034c4317e)) \
-    STEP( 152, UINT64_C(0x0000000000e820b0), 0.760, 0.906748825600000, UINT64_C(0x0000000035ac522e)) \
-    STEP( 153, UINT64_C(0x0000000000e96313), 0.765, 0.911668040493750, UINT64_C(0x000000003695b541)) \
-    STEP( 154, UINT64_C(0x0000000000ea9c18), 0.770, 0.916444344200000, UINT64_C(0x0000000037805159)) \
-    STEP( 155, UINT64_C(0x0000000000ebcbb7), 0.775, 0.921077207031250, UINT64_C(0x00000000386c1d10)) \
-    STEP( 156, UINT64_C(0x0000000000ecf1e8), 0.780, 0.925566220800000, UINT64_C(0x0000000039590ef8)) \
-    STEP( 157, UINT64_C(0x0000000000ee0ea7), 0.785, 0.929911101068750, UINT64_C(0x000000003a471d9f)) \
-    STEP( 158, UINT64_C(0x0000000000ef21f1), 0.790, 0.934111689400000, UINT64_C(0x000000003b363f90)) \
-    STEP( 159, UINT64_C(0x0000000000f02bc6), 0.795, 0.938167955606250, UINT64_C(0x000000003c266b56)) \
-    STEP( 160, UINT64_C(0x0000000000f12c27), 0.800, 0.942080000000000, UINT64_C(0x000000003d17977d)) \
-    STEP( 161, UINT64_C(0x0000000000f22319), 0.805, 0.945848055643750, UINT64_C(0x000000003e09ba96)) \
-    STEP( 162, UINT64_C(0x0000000000f310a1), 0.810, 0.949472490600000, UINT64_C(0x000000003efccb37)) \
-    STEP( 163, UINT64_C(0x0000000000f3f4c7), 0.815, 0.952953810181250, UINT64_C(0x000000003ff0bffe)) \
-    STEP( 164, UINT64_C(0x0000000000f4cf98), 0.820, 0.956292659200000, UINT64_C(0x0000000040e58f96)) \
-    STEP( 165, UINT64_C(0x0000000000f5a120), 0.825, 0.959489824218750, UINT64_C(0x0000000041db30b6)) \
-    STEP( 166, UINT64_C(0x0000000000f6696e), 0.830, 0.962546235800000, UINT64_C(0x0000000042d19a24)) \
-    STEP( 167, UINT64_C(0x0000000000f72894), 0.835, 0.965462970756250, UINT64_C(0x0000000043c8c2b8)) \
-    STEP( 168, UINT64_C(0x0000000000f7dea8), 0.840, 0.968241254400000, UINT64_C(0x0000000044c0a160)) \
-    STEP( 169, UINT64_C(0x0000000000f88bc0), 0.845, 0.970882462793750, UINT64_C(0x0000000045b92d20)) \
-    STEP( 170, UINT64_C(0x0000000000f92ff6), 0.850, 0.973388125000000, UINT64_C(0x0000000046b25d16)) \
-    STEP( 171, UINT64_C(0x0000000000f9cb67), 0.855, 0.975759925331250, UINT64_C(0x0000000047ac287d)) \
-    STEP( 172, UINT64_C(0x0000000000fa5e30), 0.860, 0.977999705600000, UINT64_C(0x0000000048a686ad)) \
-    STEP( 173, UINT64_C(0x0000000000fae874), 0.865, 0.980109467368750, UINT64_C(0x0000000049a16f21)) \
-    STEP( 174, UINT64_C(0x0000000000fb6a57), 0.870, 0.982091374200000, UINT64_C(0x000000004a9cd978)) \
-    STEP( 175, UINT64_C(0x0000000000fbe400), 0.875, 0.983947753906250, UINT64_C(0x000000004b98bd78)) \
-    STEP( 176, UINT64_C(0x0000000000fc5598), 0.880, 0.985681100800000, UINT64_C(0x000000004c951310)) \
-    STEP( 177, UINT64_C(0x0000000000fcbf4e), 0.885, 0.987294077943750, UINT64_C(0x000000004d91d25e)) \
-    STEP( 178, UINT64_C(0x0000000000fd214f), 0.890, 0.988789519400000, UINT64_C(0x000000004e8ef3ad)) \
-    STEP( 179, UINT64_C(0x0000000000fd7bcf), 0.895, 0.990170432481250, UINT64_C(0x000000004f8c6f7c)) \
-    STEP( 180, UINT64_C(0x0000000000fdcf03), 0.900, 0.991440000000000, UINT64_C(0x00000000508a3e7f)) \
-    STEP( 181, UINT64_C(0x0000000000fe1b23), 0.905, 0.992601582518750, UINT64_C(0x00000000518859a2)) \
-    STEP( 182, UINT64_C(0x0000000000fe606a), 0.910, 0.993658720600000, UINT64_C(0x000000005286ba0c)) \
-    STEP( 183, UINT64_C(0x0000000000fe9f18), 0.915, 0.994615137056250, UINT64_C(0x0000000053855924)) \
-    STEP( 184, UINT64_C(0x0000000000fed76e), 0.920, 0.995474739200000, UINT64_C(0x0000000054843092)) \
-    STEP( 185, UINT64_C(0x0000000000ff09b0), 0.925, 0.996241621093750, UINT64_C(0x0000000055833a42)) \
-    STEP( 186, UINT64_C(0x0000000000ff3627), 0.930, 0.996920065800000, UINT64_C(0x0000000056827069)) \
-    STEP( 187, UINT64_C(0x0000000000ff5d1d), 0.935, 0.997514547631250, UINT64_C(0x000000005781cd86)) \
-    STEP( 188, UINT64_C(0x0000000000ff7ee0), 0.940, 0.998029734400000, UINT64_C(0x0000000058814c66)) \
-    STEP( 189, UINT64_C(0x0000000000ff9bc3), 0.945, 0.998470489668750, UINT64_C(0x000000005980e829)) \
-    STEP( 190, UINT64_C(0x0000000000ffb419), 0.950, 0.998841875000000, UINT64_C(0x000000005a809c42)) \
-    STEP( 191, UINT64_C(0x0000000000ffc83d), 0.955, 0.999149152206250, UINT64_C(0x000000005b80647f)) \
-    STEP( 192, UINT64_C(0x0000000000ffd888), 0.960, 0.999397785600000, UINT64_C(0x000000005c803d07)) \
-    STEP( 193, UINT64_C(0x0000000000ffe55b), 0.965, 0.999593444243750, UINT64_C(0x000000005d802262)) \
-    STEP( 194, UINT64_C(0x0000000000ffef17), 0.970, 0.999742004200000, UINT64_C(0x000000005e801179)) \
-    STEP( 195, UINT64_C(0x0000000000fff623), 0.975, 0.999849550781250, UINT64_C(0x000000005f80079c)) \
-    STEP( 196, UINT64_C(0x0000000000fffae9), 0.980, 0.999922380800000, UINT64_C(0x0000000060800285)) \
-    STEP( 197, UINT64_C(0x0000000000fffdd6), 0.985, 0.999967004818750, UINT64_C(0x000000006180005b)) \
-    STEP( 198, UINT64_C(0x0000000000ffff5a), 0.990, 0.999990149400000, UINT64_C(0x00000000627fffb5)) \
-    STEP( 199, UINT64_C(0x0000000000ffffeb), 0.995, 0.999998759356250, UINT64_C(0x00000000637fffa0)) \
-    STEP( 200, UINT64_C(0x0000000001000000), 1.000, 1.000000000000000, UINT64_C(0x00000000647fffa0)) \
+ /* STEP(step, h,                            x,     y) */ \
+    STEP(   1, UINT64_C(0x0000000000000014), 0.005, 0.000001240643750) \
+    STEP(   2, UINT64_C(0x00000000000000a5), 0.010, 0.000009850600000) \
+    STEP(   3, UINT64_C(0x0000000000000229), 0.015, 0.000032995181250) \
+    STEP(   4, UINT64_C(0x0000000000000516), 0.020, 0.000077619200000) \
+    STEP(   5, UINT64_C(0x00000000000009dc), 0.025, 0.000150449218750) \
+    STEP(   6, UINT64_C(0x00000000000010e8), 0.030, 0.000257995800000) \
+    STEP(   7, UINT64_C(0x0000000000001aa4), 0.035, 0.000406555756250) \
+    STEP(   8, UINT64_C(0x0000000000002777), 0.040, 0.000602214400000) \
+    STEP(   9, UINT64_C(0x00000000000037c2), 0.045, 0.000850847793750) \
+    STEP(  10, UINT64_C(0x0000000000004be6), 0.050, 0.001158125000000) \
+    STEP(  11, UINT64_C(0x000000000000643c), 0.055, 0.001529510331250) \
+    STEP(  12, UINT64_C(0x000000000000811f), 0.060, 0.001970265600000) \
+    STEP(  13, UINT64_C(0x000000000000a2e2), 0.065, 0.002485452368750) \
+    STEP(  14, UINT64_C(0x000000000000c9d8), 0.070, 0.003079934200000) \
+    STEP(  15, UINT64_C(0x000000000000f64f), 0.075, 0.003758378906250) \
+    STEP(  16, UINT64_C(0x0000000000012891), 0.080, 0.004525260800000) \
+    STEP(  17, UINT64_C(0x00000000000160e7), 0.085, 0.005384862943750) \
+    STEP(  18, UINT64_C(0x0000000000019f95), 0.090, 0.006341279400000) \
+    STEP(  19, UINT64_C(0x000000000001e4dc), 0.095, 0.007398417481250) \
+    STEP(  20, UINT64_C(0x00000000000230fc), 0.100, 0.008560000000000) \
+    STEP(  21, UINT64_C(0x0000000000028430), 0.105, 0.009829567518750) \
+    STEP(  22, UINT64_C(0x000000000002deb0), 0.110, 0.011210480600000) \
+    STEP(  23, UINT64_C(0x00000000000340b1), 0.115, 0.012705922056250) \
+    STEP(  24, UINT64_C(0x000000000003aa67), 0.120, 0.014318899200000) \
+    STEP(  25, UINT64_C(0x0000000000041c00), 0.125, 0.016052246093750) \
+    STEP(  26, UINT64_C(0x00000000000495a8), 0.130, 0.017908625800000) \
+    STEP(  27, UINT64_C(0x000000000005178b), 0.135, 0.019890532631250) \
+    STEP(  28, UINT64_C(0x000000000005a1cf), 0.140, 0.022000294400000) \
+    STEP(  29, UINT64_C(0x0000000000063498), 0.145, 0.024240074668750) \
+    STEP(  30, UINT64_C(0x000000000006d009), 0.150, 0.026611875000000) \
+    STEP(  31, UINT64_C(0x000000000007743f), 0.155, 0.029117537206250) \
+    STEP(  32, UINT64_C(0x0000000000082157), 0.160, 0.031758745600000) \
+    STEP(  33, UINT64_C(0x000000000008d76b), 0.165, 0.034537029243750) \
+    STEP(  34, UINT64_C(0x0000000000099691), 0.170, 0.037453764200000) \
+    STEP(  35, UINT64_C(0x00000000000a5edf), 0.175, 0.040510175781250) \
+    STEP(  36, UINT64_C(0x00000000000b3067), 0.180, 0.043707340800000) \
+    STEP(  37, UINT64_C(0x00000000000c0b38), 0.185, 0.047046189818750) \
+    STEP(  38, UINT64_C(0x00000000000cef5e), 0.190, 0.050527509400000) \
+    STEP(  39, UINT64_C(0x00000000000ddce6), 0.195, 0.054151944356250) \
+    STEP(  40, UINT64_C(0x00000000000ed3d8), 0.200, 0.057920000000000) \
+    STEP(  41, UINT64_C(0x00000000000fd439), 0.205, 0.061832044393750) \
+    STEP(  42, UINT64_C(0x000000000010de0e), 0.210, 0.065888310600000) \
+    STEP(  43, UINT64_C(0x000000000011f158), 0.215, 0.070088898931250) \
+    STEP(  44, UINT64_C(0x0000000000130e17), 0.220, 0.074433779200000) \
+    STEP(  45, UINT64_C(0x0000000000143448), 0.225, 0.078922792968750) \
+    STEP(  46, UINT64_C(0x00000000001563e7), 0.230, 0.083555655800000) \
+    STEP(  47, UINT64_C(0x0000000000169cec), 0.235, 0.088331959506250) \
+    STEP(  48, UINT64_C(0x000000000017df4f), 0.240, 0.093251174400000) \
+    STEP(  49, UINT64_C(0x0000000000192b04), 0.245, 0.098312651543750) \
+    STEP(  50, UINT64_C(0x00000000001a8000), 0.250, 0.103515625000000) \
+    STEP(  51, UINT64_C(0x00000000001bde32), 0.255, 0.108859214081250) \
+    STEP(  52, UINT64_C(0x00000000001d458b), 0.260, 0.114342425600000) \
+    STEP(  53, UINT64_C(0x00000000001eb5f8), 0.265, 0.119964156118750) \
+    STEP(  54, UINT64_C(0x0000000000202f65), 0.270, 0.125723194200000) \
+    STEP(  55, UINT64_C(0x000000000021b1bb), 0.275, 0.131618222656250) \
+    STEP(  56, UINT64_C(0x0000000000233ce3), 0.280, 0.137647820800000) \
+    STEP(  57, UINT64_C(0x000000000024d0c3), 0.285, 0.143810466693750) \
+    STEP(  58, UINT64_C(0x0000000000266d40), 0.290, 0.150104539400000) \
+    STEP(  59, UINT64_C(0x000000000028123d), 0.295, 0.156528321231250) \
+    STEP(  60, UINT64_C(0x000000000029bf9c), 0.300, 0.163080000000000) \
+    STEP(  61, UINT64_C(0x00000000002b753d), 0.305, 0.169757671268750) \
+    STEP(  62, UINT64_C(0x00000000002d32fe), 0.310, 0.176559340600000) \
+    STEP(  63, UINT64_C(0x00000000002ef8bc), 0.315, 0.183482925806250) \
+    STEP(  64, UINT64_C(0x000000000030c654), 0.320, 0.190526259200000) \
+    STEP(  65, UINT64_C(0x0000000000329b9f), 0.325, 0.197687089843750) \
+    STEP(  66, UINT64_C(0x0000000000347875), 0.330, 0.204963085800000) \
+    STEP(  67, UINT64_C(0x0000000000365cb0), 0.335, 0.212351836381250) \
+    STEP(  68, UINT64_C(0x0000000000384825), 0.340, 0.219850854400000) \
+    STEP(  69, UINT64_C(0x00000000003a3aa8), 0.345, 0.227457578418750) \
+    STEP(  70, UINT64_C(0x00000000003c340f), 0.350, 0.235169375000000) \
+    STEP(  71, UINT64_C(0x00000000003e342b), 0.355, 0.242983540956250) \
+    STEP(  72, UINT64_C(0x0000000000403ace), 0.360, 0.250897305600000) \
+    STEP(  73, UINT64_C(0x00000000004247c8), 0.365, 0.258907832993750) \
+    STEP(  74, UINT64_C(0x0000000000445ae9), 0.370, 0.267012224200000) \
+    STEP(  75, UINT64_C(0x0000000000467400), 0.375, 0.275207519531250) \
+    STEP(  76, UINT64_C(0x00000000004892d8), 0.380, 0.283490700800000) \
+    STEP(  77, UINT64_C(0x00000000004ab740), 0.385, 0.291858693568750) \
+    STEP(  78, UINT64_C(0x00000000004ce102), 0.390, 0.300308369400000) \
+    STEP(  79, UINT64_C(0x00000000004f0fe9), 0.395, 0.308836548106250) \
+    STEP(  80, UINT64_C(0x00000000005143bf), 0.400, 0.317440000000000) \
+    STEP(  81, UINT64_C(0x0000000000537c4d), 0.405, 0.326115448143750) \
+    STEP(  82, UINT64_C(0x000000000055b95b), 0.410, 0.334859570600000) \
+    STEP(  83, UINT64_C(0x000000000057fab1), 0.415, 0.343669002681250) \
+    STEP(  84, UINT64_C(0x00000000005a4015), 0.420, 0.352540339200000) \
+    STEP(  85, UINT64_C(0x00000000005c894e), 0.425, 0.361470136718750) \
+    STEP(  86, UINT64_C(0x00000000005ed622), 0.430, 0.370454915800000) \
+    STEP(  87, UINT64_C(0x0000000000612655), 0.435, 0.379491163256250) \
+    STEP(  88, UINT64_C(0x00000000006379ac), 0.440, 0.388575334400000) \
+    STEP(  89, UINT64_C(0x000000000065cfeb), 0.445, 0.397703855293750) \
+    STEP(  90, UINT64_C(0x00000000006828d6), 0.450, 0.406873125000000) \
+    STEP(  91, UINT64_C(0x00000000006a842f), 0.455, 0.416079517831250) \
+    STEP(  92, UINT64_C(0x00000000006ce1bb), 0.460, 0.425319385600000) \
+    STEP(  93, UINT64_C(0x00000000006f413a), 0.465, 0.434589059868750) \
+    STEP(  94, UINT64_C(0x000000000071a270), 0.470, 0.443884854200000) \
+    STEP(  95, UINT64_C(0x000000000074051d), 0.475, 0.453203066406250) \
+    STEP(  96, UINT64_C(0x0000000000766905), 0.480, 0.462539980800000) \
+    STEP(  97, UINT64_C(0x000000000078cde7), 0.485, 0.471891870443750) \
+    STEP(  98, UINT64_C(0x00000000007b3387), 0.490, 0.481254999400000) \
+    STEP(  99, UINT64_C(0x00000000007d99a4), 0.495, 0.490625624981250) \
+    STEP( 100, UINT64_C(0x0000000000800000), 0.500, 0.500000000000000) \
+    STEP( 101, UINT64_C(0x000000000082665b), 0.505, 0.509374375018750) \
+    STEP( 102, UINT64_C(0x000000000084cc78), 0.510, 0.518745000600000) \
+    STEP( 103, UINT64_C(0x0000000000873218), 0.515, 0.528108129556250) \
+    STEP( 104, UINT64_C(0x00000000008996fa), 0.520, 0.537460019200000) \
+    STEP( 105, UINT64_C(0x00000000008bfae2), 0.525, 0.546796933593750) \
+    STEP( 106, UINT64_C(0x00000000008e5d8f), 0.530, 0.556115145800000) \
+    STEP( 107, UINT64_C(0x000000000090bec5), 0.535, 0.565410940131250) \
+    STEP( 108, UINT64_C(0x0000000000931e44), 0.540, 0.574680614400000) \
+    STEP( 109, UINT64_C(0x0000000000957bd0), 0.545, 0.583920482168750) \
+    STEP( 110, UINT64_C(0x000000000097d729), 0.550, 0.593126875000000) \
+    STEP( 111, UINT64_C(0x00000000009a3014), 0.555, 0.602296144706250) \
+    STEP( 112, UINT64_C(0x00000000009c8653), 0.560, 0.611424665600000) \
+    STEP( 113, UINT64_C(0x00000000009ed9aa), 0.565, 0.620508836743750) \
+    STEP( 114, UINT64_C(0x0000000000a129dd), 0.570, 0.629545084200000) \
+    STEP( 115, UINT64_C(0x0000000000a376b1), 0.575, 0.638529863281250) \
+    STEP( 116, UINT64_C(0x0000000000a5bfea), 0.580, 0.647459660800000) \
+    STEP( 117, UINT64_C(0x0000000000a8054e), 0.585, 0.656330997318750) \
+    STEP( 118, UINT64_C(0x0000000000aa46a4), 0.590, 0.665140429400000) \
+    STEP( 119, UINT64_C(0x0000000000ac83b2), 0.595, 0.673884551856250) \
+    STEP( 120, UINT64_C(0x0000000000aebc40), 0.600, 0.682560000000000) \
+    STEP( 121, UINT64_C(0x0000000000b0f016), 0.605, 0.691163451893750) \
+    STEP( 122, UINT64_C(0x0000000000b31efd), 0.610, 0.699691630600000) \
+    STEP( 123, UINT64_C(0x0000000000b548bf), 0.615, 0.708141306431250) \
+    STEP( 124, UINT64_C(0x0000000000b76d27), 0.620, 0.716509299200000) \
+    STEP( 125, UINT64_C(0x0000000000b98c00), 0.625, 0.724792480468750) \
+    STEP( 126, UINT64_C(0x0000000000bba516), 0.630, 0.732987775800000) \
+    STEP( 127, UINT64_C(0x0000000000bdb837), 0.635, 0.741092167006250) \
+    STEP( 128, UINT64_C(0x0000000000bfc531), 0.640, 0.749102694400000) \
+    STEP( 129, UINT64_C(0x0000000000c1cbd4), 0.645, 0.757016459043750) \
+    STEP( 130, UINT64_C(0x0000000000c3cbf0), 0.650, 0.764830625000000) \
+    STEP( 131, UINT64_C(0x0000000000c5c557), 0.655, 0.772542421581250) \
+    STEP( 132, UINT64_C(0x0000000000c7b7da), 0.660, 0.780149145600000) \
+    STEP( 133, UINT64_C(0x0000000000c9a34f), 0.665, 0.787648163618750) \
+    STEP( 134, UINT64_C(0x0000000000cb878a), 0.670, 0.795036914200000) \
+    STEP( 135, UINT64_C(0x0000000000cd6460), 0.675, 0.802312910156250) \
+    STEP( 136, UINT64_C(0x0000000000cf39ab), 0.680, 0.809473740800000) \
+    STEP( 137, UINT64_C(0x0000000000d10743), 0.685, 0.816517074193750) \
+    STEP( 138, UINT64_C(0x0000000000d2cd01), 0.690, 0.823440659400000) \
+    STEP( 139, UINT64_C(0x0000000000d48ac2), 0.695, 0.830242328731250) \
+    STEP( 140, UINT64_C(0x0000000000d64063), 0.700, 0.836920000000000) \
+    STEP( 141, UINT64_C(0x0000000000d7edc2), 0.705, 0.843471678768750) \
+    STEP( 142, UINT64_C(0x0000000000d992bf), 0.710, 0.849895460600000) \
+    STEP( 143, UINT64_C(0x0000000000db2f3c), 0.715, 0.856189533306250) \
+    STEP( 144, UINT64_C(0x0000000000dcc31c), 0.720, 0.862352179200000) \
+    STEP( 145, UINT64_C(0x0000000000de4e44), 0.725, 0.868381777343750) \
+    STEP( 146, UINT64_C(0x0000000000dfd09a), 0.730, 0.874276805800000) \
+    STEP( 147, UINT64_C(0x0000000000e14a07), 0.735, 0.880035843881250) \
+    STEP( 148, UINT64_C(0x0000000000e2ba74), 0.740, 0.885657574400000) \
+    STEP( 149, UINT64_C(0x0000000000e421cd), 0.745, 0.891140785918750) \
+    STEP( 150, UINT64_C(0x0000000000e58000), 0.750, 0.896484375000000) \
+    STEP( 151, UINT64_C(0x0000000000e6d4fb), 0.755, 0.901687348456250) \
+    STEP( 152, UINT64_C(0x0000000000e820b0), 0.760, 0.906748825600000) \
+    STEP( 153, UINT64_C(0x0000000000e96313), 0.765, 0.911668040493750) \
+    STEP( 154, UINT64_C(0x0000000000ea9c18), 0.770, 0.916444344200000) \
+    STEP( 155, UINT64_C(0x0000000000ebcbb7), 0.775, 0.921077207031250) \
+    STEP( 156, UINT64_C(0x0000000000ecf1e8), 0.780, 0.925566220800000) \
+    STEP( 157, UINT64_C(0x0000000000ee0ea7), 0.785, 0.929911101068750) \
+    STEP( 158, UINT64_C(0x0000000000ef21f1), 0.790, 0.934111689400000) \
+    STEP( 159, UINT64_C(0x0000000000f02bc6), 0.795, 0.938167955606250) \
+    STEP( 160, UINT64_C(0x0000000000f12c27), 0.800, 0.942080000000000) \
+    STEP( 161, UINT64_C(0x0000000000f22319), 0.805, 0.945848055643750) \
+    STEP( 162, UINT64_C(0x0000000000f310a1), 0.810, 0.949472490600000) \
+    STEP( 163, UINT64_C(0x0000000000f3f4c7), 0.815, 0.952953810181250) \
+    STEP( 164, UINT64_C(0x0000000000f4cf98), 0.820, 0.956292659200000) \
+    STEP( 165, UINT64_C(0x0000000000f5a120), 0.825, 0.959489824218750) \
+    STEP( 166, UINT64_C(0x0000000000f6696e), 0.830, 0.962546235800000) \
+    STEP( 167, UINT64_C(0x0000000000f72894), 0.835, 0.965462970756250) \
+    STEP( 168, UINT64_C(0x0000000000f7dea8), 0.840, 0.968241254400000) \
+    STEP( 169, UINT64_C(0x0000000000f88bc0), 0.845, 0.970882462793750) \
+    STEP( 170, UINT64_C(0x0000000000f92ff6), 0.850, 0.973388125000000) \
+    STEP( 171, UINT64_C(0x0000000000f9cb67), 0.855, 0.975759925331250) \
+    STEP( 172, UINT64_C(0x0000000000fa5e30), 0.860, 0.977999705600000) \
+    STEP( 173, UINT64_C(0x0000000000fae874), 0.865, 0.980109467368750) \
+    STEP( 174, UINT64_C(0x0000000000fb6a57), 0.870, 0.982091374200000) \
+    STEP( 175, UINT64_C(0x0000000000fbe400), 0.875, 0.983947753906250) \
+    STEP( 176, UINT64_C(0x0000000000fc5598), 0.880, 0.985681100800000) \
+    STEP( 177, UINT64_C(0x0000000000fcbf4e), 0.885, 0.987294077943750) \
+    STEP( 178, UINT64_C(0x0000000000fd214f), 0.890, 0.988789519400000) \
+    STEP( 179, UINT64_C(0x0000000000fd7bcf), 0.895, 0.990170432481250) \
+    STEP( 180, UINT64_C(0x0000000000fdcf03), 0.900, 0.991440000000000) \
+    STEP( 181, UINT64_C(0x0000000000fe1b23), 0.905, 0.992601582518750) \
+    STEP( 182, UINT64_C(0x0000000000fe606a), 0.910, 0.993658720600000) \
+    STEP( 183, UINT64_C(0x0000000000fe9f18), 0.915, 0.994615137056250) \
+    STEP( 184, UINT64_C(0x0000000000fed76e), 0.920, 0.995474739200000) \
+    STEP( 185, UINT64_C(0x0000000000ff09b0), 0.925, 0.996241621093750) \
+    STEP( 186, UINT64_C(0x0000000000ff3627), 0.930, 0.996920065800000) \
+    STEP( 187, UINT64_C(0x0000000000ff5d1d), 0.935, 0.997514547631250) \
+    STEP( 188, UINT64_C(0x0000000000ff7ee0), 0.940, 0.998029734400000) \
+    STEP( 189, UINT64_C(0x0000000000ff9bc3), 0.945, 0.998470489668750) \
+    STEP( 190, UINT64_C(0x0000000000ffb419), 0.950, 0.998841875000000) \
+    STEP( 191, UINT64_C(0x0000000000ffc83d), 0.955, 0.999149152206250) \
+    STEP( 192, UINT64_C(0x0000000000ffd888), 0.960, 0.999397785600000) \
+    STEP( 193, UINT64_C(0x0000000000ffe55b), 0.965, 0.999593444243750) \
+    STEP( 194, UINT64_C(0x0000000000ffef17), 0.970, 0.999742004200000) \
+    STEP( 195, UINT64_C(0x0000000000fff623), 0.975, 0.999849550781250) \
+    STEP( 196, UINT64_C(0x0000000000fffae9), 0.980, 0.999922380800000) \
+    STEP( 197, UINT64_C(0x0000000000fffdd6), 0.985, 0.999967004818750) \
+    STEP( 198, UINT64_C(0x0000000000ffff5a), 0.990, 0.999990149400000) \
+    STEP( 199, UINT64_C(0x0000000000ffffeb), 0.995, 0.999998759356250) \
+    STEP( 200, UINT64_C(0x0000000001000000), 1.000, 1.000000000000000) \
 
 #endif /* JEMALLOC_INTERNAL_SMOOTHSTEP_H */
diff --git a/include/jemalloc/internal/smoothstep.sh b/include/jemalloc/internal/smoothstep.sh
index 41164615..65de97bf 100755
--- a/include/jemalloc/internal/smoothstep.sh
+++ b/include/jemalloc/internal/smoothstep.sh
@@ -83,16 +83,14 @@ cat <<EOF
 #define SMOOTHSTEP_NSTEPS	${nsteps}
 #define SMOOTHSTEP_BFP		${bfp}
 #define SMOOTHSTEP \\
- /* STEP(step, h,                            x,     y,                 h_sum) */ \\
+ /* STEP(step, h,                            x,     y) */ \\
 EOF
 
 s=1
-h_sum=0
 while [ $s -le $nsteps ] ; do
   $variant ${s}
   x=`echo ${xprec} k ${s} ${nsteps} / p | dc | tr -d '\\\\\n' | sed -e 's#^\.#0.#g'`
-  h_sum=$((h_sum+h))
-  printf '    STEP(%4d, UINT64_C(0x%016x), %s, %s, UINT64_C(0x%016x)) \\\n' ${s} ${h} ${x} ${y} ${h_sum}
+  printf '    STEP(%4d, UINT64_C(0x%016x), %s, %s) \\\n' ${s} ${h} ${x} ${y}
 
   s=$((s+1))
 done
diff --git a/src/arena.c b/src/arena.c
index fcbd57e1..105d60ca 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -40,7 +40,7 @@ const arena_bin_info_t arena_bin_info[NBINS] = {
 };
 
 const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
-#define STEP(step, h, x, y, h_sum)		\
+#define STEP(step, h, x, y)			\
 		h,
 		SMOOTHSTEP
 #undef STEP
diff --git a/test/unit/smoothstep.c b/test/unit/smoothstep.c
index 549aed12..7c5dbb7e 100644
--- a/test/unit/smoothstep.c
+++ b/test/unit/smoothstep.c
@@ -1,7 +1,7 @@
 #include "test/jemalloc_test.h"
 
 static const uint64_t smoothstep_tab[] = {
-#define STEP(step, h, x, y, h_sum)		\
+#define STEP(step, h, x, y)			\
 	h,
 	SMOOTHSTEP
 #undef STEP

From 57aaa53f2bf85da0ea265f94c25990f108c3b342 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 26 May 2017 11:24:08 -0700
Subject: [PATCH 0887/2608] Fix run_tests to avoid percpu_arena on !Linux.

---
 scripts/gen_run_tests.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/gen_run_tests.py b/scripts/gen_run_tests.py
index f9b00604..1d70057f 100755
--- a/scripts/gen_run_tests.py
+++ b/scripts/gen_run_tests.py
@@ -60,9 +60,9 @@ for cc, cxx in possible_compilers:
                 # Heap profiling and dss are not supported on OS X.
                 darwin_unsupported = ('--enable-prof' in config_opts or \
                   'dss:primary' in malloc_conf_opts)
-                if uname is 'Linux' and linux_supported \
-                  or uname is not 'Darwin' \
-                  or not darwin_unsupported:
+                if (uname == 'Linux' and linux_supported) \
+                  or (not linux_supported and (uname != 'Darwin' or \
+                  not darwin_unsupported)):
                     print """cat <<EOF > run_test_%(ind)d.sh
 #!/bin/sh
 

From b86d271cbfc7bdeb077b663a2e526cf19f7c1840 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 25 May 2017 15:30:11 -0700
Subject: [PATCH 0888/2608] Added opt_abort_conf: abort on invalid config
 options.

---
 doc/jemalloc.xml.in                           | 20 ++++++++++++-
 .../internal/jemalloc_internal_externs.h      | 29 ++++++++++---------
 src/ctl.c                                     |  3 ++
 src/jemalloc.c                                | 18 ++++++++++++
 src/stats.c                                   |  1 +
 5 files changed, 56 insertions(+), 15 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 16d9ce4e..a45b3587 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -874,7 +874,25 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
           <literal>r-</literal>
         </term>
         <listitem><para>Abort-on-warning enabled/disabled.  If true, most
-        warnings are fatal.  The process will call
+        warnings are fatal.  Note that runtime option warnings are not included
+        (see <link
+        linkend="opt.abort_conf"><mallctl>opt.abort_conf</mallctl></link> for
+        that). The process will call
+        <citerefentry><refentrytitle>abort</refentrytitle>
+        <manvolnum>3</manvolnum></citerefentry> in these cases.  This option is
+        disabled by default unless <option>--enable-debug</option> is
+        specified during configuration, in which case it is enabled by default.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry id="opt.abort_conf">
+        <term>
+          <mallctl>opt.abort_conf</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Abort-on-invalid-configuration enabled/disabled.  If
+        true, invalid runtime options are fatal.  The process will call
         <citerefentry><refentrytitle>abort</refentrytitle>
         <manvolnum>3</manvolnum></citerefentry> in these cases.  This option is
         disabled by default unless <option>--enable-debug</option> is
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 9a431fc1..11e16ecc 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -6,23 +6,24 @@
 #include "jemalloc/internal/tsd_types.h"
 
 /* TSD checks this to set thread local slow state accordingly. */
-extern bool	malloc_slow;
+extern bool malloc_slow;
 
 /* Run-time options. */
-extern bool	opt_abort;
-extern const char	*opt_junk;
-extern bool	opt_junk_alloc;
-extern bool	opt_junk_free;
-extern bool	opt_utrace;
-extern bool	opt_xmalloc;
-extern bool	opt_zero;
-extern unsigned	opt_narenas;
+extern bool opt_abort;
+extern bool opt_abort_conf;
+extern const char *opt_junk;
+extern bool opt_junk_alloc;
+extern bool opt_junk_free;
+extern bool opt_utrace;
+extern bool opt_xmalloc;
+extern bool opt_zero;
+extern unsigned opt_narenas;
 
 /* Number of CPUs. */
-extern unsigned	ncpus;
+extern unsigned ncpus;
 
 /* Number of arenas used for automatic multiplexing of threads and arenas. */
-extern unsigned	narenas_auto;
+extern unsigned narenas_auto;
 
 /*
  * Arenas that are used to service external requests.  Not all elements of the
@@ -34,18 +35,18 @@ extern atomic_p_t arenas[];
  * pind2sz_tab encodes the same information as could be computed by
  * pind2sz_compute().
  */
-extern size_t const	pind2sz_tab[NPSIZES+1];
+extern size_t const pind2sz_tab[NPSIZES+1];
 /*
  * index2size_tab encodes the same information as could be computed (at
  * unacceptable cost in some code paths) by index2size_compute().
  */
-extern size_t const	index2size_tab[NSIZES];
+extern size_t const index2size_tab[NSIZES];
 /*
  * size2index_tab is a compact lookup table that rounds request sizes up to
  * size classes.  In order to reduce cache footprint, the table is compressed,
  * and all accesses are via size2index().
  */
-extern uint8_t const	size2index_tab[];
+extern uint8_t const size2index_tab[];
 
 void *a0malloc(size_t size);
 void a0dalloc(void *ptr);
diff --git a/src/ctl.c b/src/ctl.c
index 30704edd..28f49398 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -76,6 +76,7 @@ CTL_PROTO(config_stats)
 CTL_PROTO(config_utrace)
 CTL_PROTO(config_xmalloc)
 CTL_PROTO(opt_abort)
+CTL_PROTO(opt_abort_conf)
 CTL_PROTO(opt_retain)
 CTL_PROTO(opt_dss)
 CTL_PROTO(opt_narenas)
@@ -267,6 +268,7 @@ static const ctl_named_node_t	config_node[] = {
 
 static const ctl_named_node_t opt_node[] = {
 	{NAME("abort"),		CTL(opt_abort)},
+	{NAME("abort_conf"),	CTL(opt_abort_conf)},
 	{NAME("retain"),	CTL(opt_retain)},
 	{NAME("dss"),		CTL(opt_dss)},
 	{NAME("narenas"),	CTL(opt_narenas)},
@@ -1546,6 +1548,7 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool)
 /******************************************************************************/
 
 CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
+CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
 CTL_RO_NL_GEN(opt_retain, opt_retain, bool)
 CTL_RO_NL_GEN(opt_dss, opt_dss, const char *)
 CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 517fbb99..dd8365f9 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -23,6 +23,13 @@ const char	*je_malloc_conf
 #endif
     ;
 bool	opt_abort =
+#ifdef JEMALLOC_DEBUG
+    true
+#else
+    false
+#endif
+    ;
+bool	opt_abort_conf =
 #ifdef JEMALLOC_DEBUG
     true
 #else
@@ -274,6 +281,9 @@ typedef struct {
 #  define UTRACE(a, b, c)
 #endif
 
+/* Whether encountered any invalid config options. */
+static bool had_conf_error = false;
+
 /******************************************************************************/
 /*
  * Function prototypes for static functions that are referenced prior to
@@ -847,6 +857,10 @@ malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v,
     size_t vlen) {
 	malloc_printf("<jemalloc>: %s: %.*s:%.*s\n", msg, (int)klen, k,
 	    (int)vlen, v);
+	had_conf_error = true;
+	if (opt_abort_conf) {
+		abort();
+	}
 }
 
 static void
@@ -1045,6 +1059,10 @@ malloc_conf_init(void) {
 			}
 
 			CONF_HANDLE_BOOL(opt_abort, "abort")
+			CONF_HANDLE_BOOL(opt_abort_conf, "abort_conf")
+			if (opt_abort_conf && had_conf_error) {
+				abort();
+			}
 			CONF_HANDLE_BOOL(opt_retain, "retain")
 			if (strncmp("dss", k, klen) == 0) {
 				int i;
diff --git a/src/stats.c b/src/stats.c
index b67d46dc..48d3f59b 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -813,6 +813,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    "Run-time option settings:\n");
 	}
 	OPT_WRITE_BOOL(abort, ",")
+	OPT_WRITE_BOOL(abort_conf, ",")
 	OPT_WRITE_BOOL(retain, ",")
 	OPT_WRITE_CHAR_P(dss, ",")
 	OPT_WRITE_UNSIGNED(narenas, ",")

From 49505e558baade576bab40cbd3b9fdaa61ba77d2 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 26 May 2017 13:51:55 -0700
Subject: [PATCH 0889/2608] Make test/unit/background_thread not flaky.

---
 test/unit/background_thread.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/test/unit/background_thread.c b/test/unit/background_thread.c
index 05089c28..81f8aeed 100644
--- a/test/unit/background_thread.c
+++ b/test/unit/background_thread.c
@@ -80,6 +80,8 @@ TEST_BEGIN(test_background_thread_running) {
 
 	test_repeat_background_thread_ctl(false);
 	test_switch_background_thread_ctl(true);
+	assert_b_eq(info->started, true,
+	    "Background_thread did not start.\n");
 
 	nstime_t start, now;
 	nstime_init(&start, 0);
@@ -99,9 +101,9 @@ TEST_BEGIN(test_background_thread_running) {
 		nstime_init(&now, 0);
 		nstime_update(&now);
 		nstime_subtract(&now, &start);
-		assert_u64_lt(nstime_sec(&now), 10,
-		    "Background threads did not run for 10 seconds.");
-		usleep(10000);
+		assert_u64_lt(nstime_sec(&now), 1000,
+		    "Background threads did not run for 1000 seconds.");
+		sleep(1);
 	}
 	test_switch_background_thread_ctl(false);
 #endif

From d5ef5ae9344d72f39569a05e7c9349dded497e41 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Sat, 27 May 2017 15:35:36 -0700
Subject: [PATCH 0890/2608] Add opt.stats_print_opts.

The value is passed to atexit(3)-triggered malloc_stats_print() calls.
---
 doc/jemalloc.xml.in               | 23 +++++++++++-
 include/jemalloc/internal/stats.h | 21 ++++++++++-
 src/ctl.c                         |  3 ++
 src/jemalloc.c                    | 31 +++++++++++++++-
 src/stats.c                       | 61 ++++++++++---------------------
 5 files changed, 94 insertions(+), 45 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index a45b3587..c2c0e925 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -410,6 +410,8 @@ for (i = 0; i < nbins; i++) {
 	/* Do something with bin_size... */
 }]]></programlisting></para>
 
+      <varlistentry id="malloc_stats_print_opts">
+      </varlistentry>
       <para>The <function>malloc_stats_print()</function> function writes
       summary statistics via the <parameter>write_cb</parameter> callback
       function pointer and <parameter>cbopaque</parameter> data passed to
@@ -1046,7 +1048,9 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         enabled, the <function>malloc_stats_print()</function>
         function is called at program exit via an
         <citerefentry><refentrytitle>atexit</refentrytitle>
-        <manvolnum>3</manvolnum></citerefentry> function.  If
+        <manvolnum>3</manvolnum></citerefentry> function.  <link
+        linkend="opt.stats_print_opts"><mallctl>opt.stats_print_opts</mallctl></link>
+        can be combined to specify output options. If
         <option>--enable-stats</option> is specified during configuration, this
         has the potential to cause deadlock for a multi-threaded process that
         exits while one or more threads are executing in the memory allocation
@@ -1061,6 +1065,23 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         development.  This option is disabled by default.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.stats_print_opts">
+        <term>
+          <mallctl>opt.stats_print_opts</mallctl>
+          (<type>const char *</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Options (the <parameter>opts</parameter> string) to pass
+        to the <function>malloc_stats_print()</function> at exit (enabled
+        through <link
+        linkend="opt.stats_print"><mallctl>opt.stats_print</mallctl></link>). See
+        available options in <link
+        linkend="malloc_stats_print_opts"><function>malloc_stats_print()</function></link>.
+        Has no effect unless <link
+        linkend="opt.stats_print"><mallctl>opt.stats_print</mallctl></link> is
+        enabled.  The default is <quote></quote>.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="opt.junk">
         <term>
           <mallctl>opt.junk</mallctl>
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index 47ca4f9e..1198779a 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -7,8 +7,27 @@
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/stats_tsd.h"
 
-/* The opt.stats_print storage. */
+/*  OPTION(opt,		var_name,	default,	set_value_to) */
+#define STATS_PRINT_OPTIONS						\
+    OPTION('J',		json,		false,		true)		\
+    OPTION('g',		general,	true,		false)		\
+    OPTION('m',		merged,		config_stats,	false)		\
+    OPTION('d',		destroyed,	config_stats,	false)		\
+    OPTION('a',		unmerged,	config_stats,	false)		\
+    OPTION('b',		bins,		true,		false)		\
+    OPTION('l',		large,		true,		false)		\
+    OPTION('x',		mutex,		true,		false)
+
+enum {
+#define OPTION(o, v, d, s) stats_print_option_num_##v,
+    STATS_PRINT_OPTIONS
+#undef OPTION
+    stats_print_tot_num_options
+};
+
+/* Options for stats_print. */
 extern bool opt_stats_print;
+extern char opt_stats_print_opts[stats_print_tot_num_options+1];
 
 /* Implements je_malloc_stats_print. */
 void stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
diff --git a/src/ctl.c b/src/ctl.c
index 28f49398..d10c39bb 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -85,6 +85,7 @@ CTL_PROTO(opt_background_thread)
 CTL_PROTO(opt_dirty_decay_ms)
 CTL_PROTO(opt_muzzy_decay_ms)
 CTL_PROTO(opt_stats_print)
+CTL_PROTO(opt_stats_print_opts)
 CTL_PROTO(opt_junk)
 CTL_PROTO(opt_zero)
 CTL_PROTO(opt_utrace)
@@ -277,6 +278,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("dirty_decay_ms"), CTL(opt_dirty_decay_ms)},
 	{NAME("muzzy_decay_ms"), CTL(opt_muzzy_decay_ms)},
 	{NAME("stats_print"),	CTL(opt_stats_print)},
+	{NAME("stats_print_opts"),	CTL(opt_stats_print_opts)},
 	{NAME("junk"),		CTL(opt_junk)},
 	{NAME("zero"),		CTL(opt_zero)},
 	{NAME("utrace"),	CTL(opt_utrace)},
@@ -1557,6 +1559,7 @@ CTL_RO_NL_GEN(opt_background_thread, opt_background_thread, bool)
 CTL_RO_NL_GEN(opt_dirty_decay_ms, opt_dirty_decay_ms, ssize_t)
 CTL_RO_NL_GEN(opt_muzzy_decay_ms, opt_muzzy_decay_ms, ssize_t)
 CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool)
+CTL_RO_NL_GEN(opt_stats_print_opts, opt_stats_print_opts, const char *)
 CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, const char *)
 CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool)
 CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index dd8365f9..5e3072b5 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -724,7 +724,7 @@ stats_print_atexit(void) {
 			}
 		}
 	}
-	je_malloc_stats_print(NULL, NULL, NULL);
+	je_malloc_stats_print(NULL, NULL, opt_stats_print_opts);
 }
 
 /*
@@ -777,6 +777,31 @@ malloc_ncpus(void) {
 	return ((result == -1) ? 1 : (unsigned)result);
 }
 
+static void
+init_opt_stats_print_opts(const char *v, size_t vlen) {
+	size_t opts_len = strlen(opt_stats_print_opts);
+	assert(opts_len <= stats_print_tot_num_options);
+
+	for (size_t i = 0; i < vlen; i++) {
+		switch (v[i]) {
+#define OPTION(o, v, d, s) case o: break;
+			STATS_PRINT_OPTIONS
+#undef OPTION
+		default: continue;
+		}
+
+		if (strchr(opt_stats_print_opts, v[i]) != NULL) {
+			/* Ignore repeated. */
+			continue;
+		}
+
+		opt_stats_print_opts[opts_len++] = v[i];
+		opt_stats_print_opts[opts_len] = '\0';
+		assert(opts_len <= stats_print_tot_num_options);
+	}
+	assert(opts_len == strlen(opt_stats_print_opts));
+}
+
 static bool
 malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
     char const **v_p, size_t *vlen_p) {
@@ -1099,6 +1124,10 @@ malloc_conf_init(void) {
 			    QU(SSIZE_MAX) ? NSTIME_SEC_MAX * KQU(1000) :
 			    SSIZE_MAX);
 			CONF_HANDLE_BOOL(opt_stats_print, "stats_print")
+			if (CONF_MATCH("stats_print_opts")) {
+				init_opt_stats_print_opts(v, vlen);
+				continue;
+			}
 			if (config_fill) {
 				if (CONF_MATCH("junk")) {
 					if (CONF_MATCH_VALUE("true")) {
diff --git a/src/stats.c b/src/stats.c
index 48d3f59b..61550d83 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -46,7 +46,8 @@ const char *arena_mutex_names[mutex_prof_num_arena_mutexes] = {
 /******************************************************************************/
 /* Data. */
 
-bool	opt_stats_print = false;
+bool opt_stats_print = false;
+char opt_stats_print_opts[stats_print_tot_num_options+1] = "";
 
 /******************************************************************************/
 
@@ -838,12 +839,16 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	OPT_WRITE_BOOL(prof_gdump, ",")
 	OPT_WRITE_BOOL(prof_final, ",")
 	OPT_WRITE_BOOL(prof_leak, ",")
-	/*
-	 * stats_print is always emitted, so as long as stats_print comes last
-	 * it's safe to unconditionally omit the comma here (rather than having
-	 * to conditionally omit it elsewhere depending on configuration).
-	 */
-	OPT_WRITE_BOOL(stats_print, "")
+	OPT_WRITE_BOOL(stats_print, ",")
+	if (json || opt_stats_print) {
+		/*
+		 * stats_print_opts is always emitted for JSON, so as long as it
+		 * comes last it's safe to unconditionally omit the comma here
+		 * (rather than having to conditionally omit it elsewhere
+		 * depending on configuration).
+		 */
+		OPT_WRITE_CHAR_P(stats_print_opts, "")
+	}
 	if (json) {
 		malloc_cprintf(write_cb, cbopaque,
 		    "\t\t},\n");
@@ -1228,14 +1233,9 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	int err;
 	uint64_t epoch;
 	size_t u64sz;
-	bool json = false;
-	bool general = true;
-	bool merged = config_stats;
-	bool destroyed = config_stats;
-	bool unmerged = config_stats;
-	bool bins = true;
-	bool large = true;
-	bool mutex = true;
+#define OPTION(o, v, d, s) bool v = d;
+	STATS_PRINT_OPTIONS
+#undef OPTION
 
 	/*
 	 * Refresh stats, in case mallctl() was called by the application.
@@ -1260,34 +1260,11 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	}
 
 	if (opts != NULL) {
-		unsigned i;
-
-		for (i = 0; opts[i] != '\0'; i++) {
+		for (unsigned i = 0; opts[i] != '\0'; i++) {
 			switch (opts[i]) {
-			case 'J':
-				json = true;
-				break;
-			case 'g':
-				general = false;
-				break;
-			case 'm':
-				merged = false;
-				break;
-			case 'd':
-				destroyed = false;
-				break;
-			case 'a':
-				unmerged = false;
-				break;
-			case 'b':
-				bins = false;
-				break;
-			case 'l':
-				large = false;
-				break;
-			case 'x':
-				mutex = false;
-				break;
+#define OPTION(o, v, d, s) case o: v = s; break;
+				STATS_PRINT_OPTIONS
+#undef OPTION
 			default:;
 			}
 		}

From a16114866a8e52a4fc8057d5c3123e2dc5c0ceab Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 25 May 2017 18:15:41 -0700
Subject: [PATCH 0891/2608] Fix OOM paths in extent_grow_retained().

---
 src/extent.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index fee8198e..7359a558 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1046,10 +1046,15 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	extent_init(extent, arena, ptr, alloc_size, false, NSIZES,
 	    arena_extent_sn_next(arena), extent_state_active, zeroed,
 	    committed);
-	if (ptr == NULL || extent_register_no_gdump_add(tsdn, extent)) {
+	if (ptr == NULL) {
 		extent_dalloc(tsdn, arena, extent);
 		return NULL;
 	}
+	if (extent_register_no_gdump_add(tsdn, extent)) {
+		extents_leak(tsdn, arena, r_extent_hooks,
+		    &arena->extents_retained, extent);
+		return NULL;
+	}
 
 	size_t leadsize = ALIGNMENT_CEILING((uintptr_t)ptr,
 	    PAGE_CEILING(alignment)) - (uintptr_t)ptr;
@@ -1070,7 +1075,8 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 		    leadsize, NSIZES, false, esize + trailsize, szind, slab);
 		if (extent == NULL) {
 			extent_deregister(tsdn, lead);
-			extents_leak(tsdn, arena, r_extent_hooks, false, lead);
+			extents_leak(tsdn, arena, r_extent_hooks,
+			    &arena->extents_retained, lead);
 			return NULL;
 		}
 		extent_record(tsdn, arena, r_extent_hooks,

From 168793a1c1986890cb8eaeecb320c762ed3b8033 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 26 May 2017 11:06:01 -0700
Subject: [PATCH 0892/2608] Fix extent_grow_next management.

Fix management of extent_grow_next to serialize operations that may grow
retained memory.  This assures that the sizes of the newly allocated
extents correspond to the size classes in the intended growth sequence.

Fix management of extent_grow_next to skip size classes if a request is
too large to be satisfied by the next size in the growth sequence.  This
avoids the potential for an arbitrary number of requests to bypass
triggering extent_grow_next increases.

This resolves #858.
---
 include/jemalloc/internal/arena_structs_b.h |   5 +-
 include/jemalloc/internal/witness.h         |  13 +-
 src/arena.c                                 |   9 +-
 src/extent.c                                | 352 ++++++++++++--------
 4 files changed, 221 insertions(+), 158 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index f98f45c1..160ac4fa 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -238,9 +238,10 @@ struct arena_s {
 	 * be effective even if multiple arenas' extent allocation requests are
 	 * highly interleaved.
 	 *
-	 * Synchronization: atomic.
+	 * Synchronization: extent_grow_mtx
 	 */
-	atomic_u_t		extent_grow_next;
+	pszind_t		extent_grow_next;
+	malloc_mutex_t		extent_grow_mtx;
 
 	/*
 	 * Available extent structures that were allocated via
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index c71911f2..33be6661 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -41,13 +41,14 @@
 
 #define WITNESS_RANK_DECAY		11U
 #define WITNESS_RANK_TCACHE_QL		12U
-#define WITNESS_RANK_EXTENTS		13U
-#define WITNESS_RANK_EXTENT_FREELIST	14U
+#define WITNESS_RANK_EXTENT_GROW	13U
+#define WITNESS_RANK_EXTENTS		14U
+#define WITNESS_RANK_EXTENT_AVAIL	15U
 
-#define WITNESS_RANK_EXTENT_POOL	15U
-#define WITNESS_RANK_RTREE		16U
-#define WITNESS_RANK_BASE		17U
-#define WITNESS_RANK_ARENA_LARGE	18U
+#define WITNESS_RANK_EXTENT_POOL	16U
+#define WITNESS_RANK_RTREE		17U
+#define WITNESS_RANK_BASE		18U
+#define WITNESS_RANK_ARENA_LARGE	19U
 
 #define WITNESS_RANK_LEAF		0xffffffffU
 #define WITNESS_RANK_ARENA_BIN		WITNESS_RANK_LEAF
diff --git a/src/arena.c b/src/arena.c
index 105d60ca..de50d8c0 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2001,14 +2001,15 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		goto label_error;
 	}
 
-	if (opt_retain) {
-		atomic_store_u(&arena->extent_grow_next, psz2ind(HUGEPAGE),
-		    ATOMIC_RELAXED);
+	arena->extent_grow_next = psz2ind(HUGEPAGE);
+	if (malloc_mutex_init(&arena->extent_grow_mtx, "extent_grow",
+	    WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) {
+		goto label_error;
 	}
 
 	extent_avail_new(&arena->extent_avail);
 	if (malloc_mutex_init(&arena->extent_avail_mtx, "extent_avail",
-	    WITNESS_RANK_EXTENT_FREELIST, malloc_mutex_rank_exclusive)) {
+	    WITNESS_RANK_EXTENT_AVAIL, malloc_mutex_rank_exclusive)) {
 		goto label_error;
 	}
 
diff --git a/src/extent.c b/src/extent.c
index 7359a558..44e98789 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -16,33 +16,51 @@ mutex_pool_t	extent_mutex_pool;
 static const bitmap_info_t extents_bitmap_info =
     BITMAP_INFO_INITIALIZER(NPSIZES+1);
 
-static void	*extent_alloc_default(extent_hooks_t *extent_hooks,
-    void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit,
+static void *extent_alloc_default(extent_hooks_t *extent_hooks, void *new_addr,
+    size_t size, size_t alignment, bool *zero, bool *commit,
     unsigned arena_ind);
-static bool	extent_dalloc_default(extent_hooks_t *extent_hooks, void *addr,
+static bool extent_dalloc_default(extent_hooks_t *extent_hooks, void *addr,
     size_t size, bool committed, unsigned arena_ind);
-static void	extent_destroy_default(extent_hooks_t *extent_hooks, void *addr,
+static void extent_destroy_default(extent_hooks_t *extent_hooks, void *addr,
     size_t size, bool committed, unsigned arena_ind);
-static bool	extent_commit_default(extent_hooks_t *extent_hooks, void *addr,
+static bool extent_commit_default(extent_hooks_t *extent_hooks, void *addr,
     size_t size, size_t offset, size_t length, unsigned arena_ind);
-static bool	extent_decommit_default(extent_hooks_t *extent_hooks,
+static bool extent_commit_impl(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    size_t length, bool growing_retained);
+static bool extent_decommit_default(extent_hooks_t *extent_hooks,
     void *addr, size_t size, size_t offset, size_t length, unsigned arena_ind);
 #ifdef PAGES_CAN_PURGE_LAZY
-static bool	extent_purge_lazy_default(extent_hooks_t *extent_hooks,
-    void *addr, size_t size, size_t offset, size_t length, unsigned arena_ind);
+static bool extent_purge_lazy_default(extent_hooks_t *extent_hooks, void *addr,
+    size_t size, size_t offset, size_t length, unsigned arena_ind);
 #endif
+static bool extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    size_t length, bool growing_retained);
 #ifdef PAGES_CAN_PURGE_FORCED
-static bool	extent_purge_forced_default(extent_hooks_t *extent_hooks,
+static bool extent_purge_forced_default(extent_hooks_t *extent_hooks,
     void *addr, size_t size, size_t offset, size_t length, unsigned arena_ind);
 #endif
+static bool extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    size_t length, bool growing_retained);
 #ifdef JEMALLOC_MAPS_COALESCE
-static bool	extent_split_default(extent_hooks_t *extent_hooks, void *addr,
+static bool extent_split_default(extent_hooks_t *extent_hooks, void *addr,
     size_t size, size_t size_a, size_t size_b, bool committed,
     unsigned arena_ind);
-static bool	extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a,
+#endif
+static extent_t *extent_split_impl(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a,
+    szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
+    bool growing_retained);
+#ifdef JEMALLOC_MAPS_COALESCE
+static bool extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a,
     size_t size_a, void *addr_b, size_t size_b, bool committed,
     unsigned arena_ind);
 #endif
+static bool extent_merge_impl(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b,
+    bool growing_retained);
 
 const extent_hooks_t	extent_hooks_default = {
 	extent_alloc_default,
@@ -85,12 +103,13 @@ static void extent_deregister(tsdn_t *tsdn, extent_t *extent);
 static extent_t *extent_recycle(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extents_t *extents, void *new_addr,
     size_t usize, size_t pad, size_t alignment, bool slab, szind_t szind,
-    bool *zero, bool *commit);
+    bool *zero, bool *commit, bool growing_retained);
 static extent_t *extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
-    extent_t *extent, bool *coalesced);
+    extent_t *extent, bool *coalesced, bool growing_retained);
 static void extent_record(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extents_t *extents, extent_t *extent);
+    extent_hooks_t **r_extent_hooks, extents_t *extents, extent_t *extent,
+    bool growing_retained);
 
 /******************************************************************************/
 
@@ -150,9 +169,6 @@ extent_lock_from_addr(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, void *addr) {
 
 extent_t *
 extent_alloc(tsdn_t *tsdn, arena_t *arena) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
 	malloc_mutex_lock(tsdn, &arena->extent_avail_mtx);
 	extent_t *extent = extent_avail_first(&arena->extent_avail);
 	if (extent == NULL) {
@@ -166,9 +182,6 @@ extent_alloc(tsdn_t *tsdn, arena_t *arena) {
 
 void
 extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
 	malloc_mutex_lock(tsdn, &arena->extent_avail_mtx);
 	extent_avail_insert(&arena->extent_avail, extent);
 	malloc_mutex_unlock(tsdn, &arena->extent_avail_mtx);
@@ -402,7 +415,7 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena,
 	extent_state_set(extent, extent_state_active);
 	bool coalesced;
 	extent = extent_try_coalesce(tsdn, arena, r_extent_hooks, rtree_ctx,
-	    extents, extent, &coalesced);
+	    extents, extent, &coalesced, false);
 	extent_state_set(extent, extents_state_get(extents));
 
 	if (!coalesced) {
@@ -422,7 +435,7 @@ extents_alloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	    WITNESS_RANK_CORE, 0);
 
 	return extent_recycle(tsdn, arena, r_extent_hooks, extents, new_addr,
-	    size, pad, alignment, slab, szind, zero, commit);
+	    size, pad, alignment, slab, szind, zero, commit, false);
 }
 
 void
@@ -436,7 +449,7 @@ extents_dalloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	extent_addr_set(extent, extent_base_get(extent));
 	extent_zeroed_set(extent, false);
 
-	extent_record(tsdn, arena, r_extent_hooks, extents, extent);
+	extent_record(tsdn, arena, r_extent_hooks, extents, extent, false);
 }
 
 extent_t *
@@ -506,16 +519,17 @@ label_return:
 
 static void
 extents_leak(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    extents_t *extents, extent_t *extent) {
+    extents_t *extents, extent_t *extent, bool growing_retained) {
 	/*
 	 * Leak extent after making sure its pages have already been purged, so
 	 * that this is only a virtual memory leak.
 	 */
 	if (extents_state_get(extents) == extent_state_dirty) {
-		if (extent_purge_lazy_wrapper(tsdn, arena, r_extent_hooks,
-		    extent, 0, extent_size_get(extent))) {
-			extent_purge_forced_wrapper(tsdn, arena, r_extent_hooks,
-			    extent, 0, extent_size_get(extent));
+		if (extent_purge_lazy_impl(tsdn, arena, r_extent_hooks,
+		    extent, 0, extent_size_get(extent), growing_retained)) {
+			extent_purge_forced_impl(tsdn, arena, r_extent_hooks,
+			    extent, 0, extent_size_get(extent),
+			    growing_retained);
 		}
 	}
 	extent_dalloc(tsdn, arena, extent);
@@ -734,13 +748,10 @@ extent_deregister(tsdn_t *tsdn, extent_t *extent) {
 static extent_t *
 extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
-    bool locked, void *new_addr, size_t size, size_t pad, size_t alignment,
-    bool slab, bool *zero, bool *commit) {
+    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+    bool *zero, bool *commit, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, locked ? 1 : 0);
-	if (locked) {
-		malloc_mutex_assert_owner(tsdn, &extents->mtx);
-	}
+	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	assert(alignment > 0);
 	if (config_debug && new_addr != NULL) {
 		/*
@@ -765,9 +776,7 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
 	if (alloc_size < esize) {
 		return NULL;
 	}
-	if (!locked) {
-		malloc_mutex_lock(tsdn, &extents->mtx);
-	}
+	malloc_mutex_lock(tsdn, &extents->mtx);
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 	extent_t *extent;
 	if (new_addr != NULL) {
@@ -791,16 +800,12 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
 		extent = extents_fit_locked(tsdn, arena, extents, alloc_size);
 	}
 	if (extent == NULL) {
-		if (!locked) {
-			malloc_mutex_unlock(tsdn, &extents->mtx);
-		}
+		malloc_mutex_unlock(tsdn, &extents->mtx);
 		return NULL;
 	}
 
 	extent_activate_locked(tsdn, arena, extents, extent, false);
-	if (!locked) {
-		malloc_mutex_unlock(tsdn, &extents->mtx);
-	}
+	malloc_mutex_unlock(tsdn, &extents->mtx);
 
 	if (extent_zeroed_get(extent)) {
 		*zero = true;
@@ -816,7 +821,7 @@ static extent_t *
 extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    szind_t szind, extent_t *extent) {
+    szind_t szind, extent_t *extent, bool growing_retained) {
 	size_t esize = size + pad;
 	size_t leadsize = ALIGNMENT_CEILING((uintptr_t)extent_base_get(extent),
 	    PAGE_CEILING(alignment)) - (uintptr_t)extent_base_get(extent);
@@ -827,13 +832,13 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
 	/* Split the lead. */
 	if (leadsize != 0) {
 		extent_t *lead = extent;
-		extent = extent_split_wrapper(tsdn, arena, r_extent_hooks,
+		extent = extent_split_impl(tsdn, arena, r_extent_hooks,
 		    lead, leadsize, NSIZES, false, esize + trailsize, szind,
-		    slab);
+		    slab, growing_retained);
 		if (extent == NULL) {
 			extent_deregister(tsdn, lead);
 			extents_leak(tsdn, arena, r_extent_hooks, extents,
-			    lead);
+			    lead, growing_retained);
 			return NULL;
 		}
 		extent_deactivate(tsdn, arena, extents, lead, false);
@@ -841,13 +846,13 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
 
 	/* Split the trail. */
 	if (trailsize != 0) {
-		extent_t *trail = extent_split_wrapper(tsdn, arena,
+		extent_t *trail = extent_split_impl(tsdn, arena,
 		    r_extent_hooks, extent, esize, szind, slab, trailsize,
-		    NSIZES, false);
+		    NSIZES, false, growing_retained);
 		if (trail == NULL) {
 			extent_deregister(tsdn, extent);
 			extents_leak(tsdn, arena, r_extent_hooks, extents,
-			    extent);
+			    extent, growing_retained);
 			return NULL;
 		}
 		extent_deactivate(tsdn, arena, extents, trail, false);
@@ -875,9 +880,10 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
 static extent_t *
 extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
     extents_t *extents, void *new_addr, size_t size, size_t pad,
-    size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit) {
+    size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit,
+    bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	assert(new_addr == NULL || !slab);
 	assert(pad == 0 || !slab);
 	assert(!*zero || !slab);
@@ -887,8 +893,8 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 
 	bool committed = false;
 	extent_t *extent = extent_recycle_extract(tsdn, arena, r_extent_hooks,
-	    rtree_ctx, extents, false, new_addr, size, pad, alignment, slab,
-	    zero, &committed);
+	    rtree_ctx, extents, new_addr, size, pad, alignment, slab, zero,
+	    &committed, growing_retained);
 	if (extent == NULL) {
 		return NULL;
 	}
@@ -897,16 +903,17 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	}
 
 	extent = extent_recycle_split(tsdn, arena, r_extent_hooks, rtree_ctx,
-	    extents, new_addr, size, pad, alignment, slab, szind, extent);
+	    extents, new_addr, size, pad, alignment, slab, szind, extent,
+	    growing_retained);
 	if (extent == NULL) {
 		return NULL;
 	}
 
 	if (*commit && !extent_committed_get(extent)) {
-		if (extent_commit_wrapper(tsdn, arena, r_extent_hooks, extent,
-		    0, extent_size_get(extent))) {
+		if (extent_commit_impl(tsdn, arena, r_extent_hooks, extent,
+		    0, extent_size_get(extent), growing_retained)) {
 			extent_record(tsdn, arena, r_extent_hooks, extents,
-			    extent);
+			    extent, growing_retained);
 			return NULL;
 		}
 		extent_zeroed_set(extent, true);
@@ -1012,35 +1019,41 @@ extent_alloc_default(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
  */
 static extent_t *
 extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, void *new_addr, size_t size, size_t pad,
-    size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit) {
+    extent_hooks_t **r_extent_hooks, size_t size, size_t pad, size_t alignment,
+    bool slab, szind_t szind, bool *zero, bool *commit) {
+	malloc_mutex_assert_owner(tsdn, &arena->extent_grow_mtx);
 	assert(pad == 0 || !slab);
 	assert(!*zero || !slab);
 
-	/*
-	 * Check whether the next extent size in the series would be large
-	 * enough to satisfy this request.  If no, just bail, so that e.g. a
-	 * series of unsatisfiable allocation requests doesn't cause unused
-	 * extent creation as a side effect.
-	 */
 	size_t esize = size + pad;
-	size_t alloc_size = pind2sz(atomic_load_u(&arena->extent_grow_next,
-	    ATOMIC_RELAXED));
 	size_t alloc_size_min = esize + PAGE_CEILING(alignment) - PAGE;
 	/* Beware size_t wrap-around. */
 	if (alloc_size_min < esize) {
-		return NULL;
+		goto label_err;
 	}
-	if (alloc_size < alloc_size_min) {
-		return NULL;
+	/*
+	 * Find the next extent size in the series that would be large enough to
+	 * satisfy this request.
+	 */
+	pszind_t egn_skip = 0;
+	size_t alloc_size = pind2sz(arena->extent_grow_next + egn_skip);
+	while (alloc_size < alloc_size_min) {
+		egn_skip++;
+		if (arena->extent_grow_next + egn_skip == NPSIZES) {
+			/* Outside legal range. */
+			goto label_err;
+		}
+		assert(arena->extent_grow_next + egn_skip < NPSIZES);
+		alloc_size = pind2sz(arena->extent_grow_next + egn_skip);
 	}
+
 	extent_t *extent = extent_alloc(tsdn, arena);
 	if (extent == NULL) {
-		return NULL;
+		goto label_err;
 	}
 	bool zeroed = false;
 	bool committed = false;
-	void *ptr = extent_alloc_core(tsdn, arena, new_addr, alloc_size, PAGE,
+	void *ptr = extent_alloc_core(tsdn, arena, NULL, alloc_size, PAGE,
 	    &zeroed, &committed, (dss_prec_t)atomic_load_u(&arena->dss_prec,
 	    ATOMIC_RELAXED));
 	extent_init(extent, arena, ptr, alloc_size, false, NSIZES,
@@ -1048,17 +1061,16 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	    committed);
 	if (ptr == NULL) {
 		extent_dalloc(tsdn, arena, extent);
-		return NULL;
+		goto label_err;
 	}
 	if (extent_register_no_gdump_add(tsdn, extent)) {
 		extents_leak(tsdn, arena, r_extent_hooks,
-		    &arena->extents_retained, extent);
-		return NULL;
+		    &arena->extents_retained, extent, true);
+		goto label_err;
 	}
 
 	size_t leadsize = ALIGNMENT_CEILING((uintptr_t)ptr,
 	    PAGE_CEILING(alignment)) - (uintptr_t)ptr;
-	assert(new_addr == NULL || leadsize == 0);
 	assert(alloc_size >= leadsize + esize);
 	size_t trailsize = alloc_size - leadsize - esize;
 	if (extent_zeroed_get(extent) && extent_committed_get(extent)) {
@@ -1071,31 +1083,31 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	/* Split the lead. */
 	if (leadsize != 0) {
 		extent_t *lead = extent;
-		extent = extent_split_wrapper(tsdn, arena, r_extent_hooks, lead,
-		    leadsize, NSIZES, false, esize + trailsize, szind, slab);
+		extent = extent_split_impl(tsdn, arena, r_extent_hooks, lead,
+		    leadsize, NSIZES, false, esize + trailsize, szind, slab,
+		    true);
 		if (extent == NULL) {
 			extent_deregister(tsdn, lead);
 			extents_leak(tsdn, arena, r_extent_hooks,
-			    &arena->extents_retained, lead);
-			return NULL;
+			    &arena->extents_retained, lead, true);
+			goto label_err;
 		}
 		extent_record(tsdn, arena, r_extent_hooks,
-		    &arena->extents_retained, lead);
+		    &arena->extents_retained, lead, true);
 	}
 
 	/* Split the trail. */
 	if (trailsize != 0) {
-		extent_t *trail = extent_split_wrapper(tsdn, arena,
-		    r_extent_hooks, extent, esize, szind, slab, trailsize,
-		    NSIZES, false);
+		extent_t *trail = extent_split_impl(tsdn, arena, r_extent_hooks,
+		    extent, esize, szind, slab, trailsize, NSIZES, false, true);
 		if (trail == NULL) {
 			extent_deregister(tsdn, extent);
 			extents_leak(tsdn, arena, r_extent_hooks,
-			    &arena->extents_retained, extent);
-			return NULL;
+			    &arena->extents_retained, extent, true);
+			goto label_err;
 		}
 		extent_record(tsdn, arena, r_extent_hooks,
-		    &arena->extents_retained, trail);
+		    &arena->extents_retained, trail, true);
 	} else if (leadsize == 0) {
 		/*
 		 * Splitting causes szind to be set as a side effect, but no
@@ -1119,15 +1131,27 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	}
 
 	if (*commit && !extent_committed_get(extent)) {
-		if (extent_commit_wrapper(tsdn, arena, r_extent_hooks, extent,
-		    0, extent_size_get(extent))) {
+		if (extent_commit_impl(tsdn, arena, r_extent_hooks, extent, 0,
+		    extent_size_get(extent), true)) {
 			extent_record(tsdn, arena, r_extent_hooks,
-			    &arena->extents_retained, extent);
-			return NULL;
+			    &arena->extents_retained, extent, true);
+			goto label_err;
 		}
 		extent_zeroed_set(extent, true);
 	}
 
+	/*
+	 * Increment extent_grow_next if doing so wouldn't exceed the legal
+	 * range.
+	 */
+	if (arena->extent_grow_next + egn_skip + 1 < NPSIZES) {
+		arena->extent_grow_next += egn_skip + 1;
+	} else {
+		arena->extent_grow_next = NPSIZES - 1;
+	}
+	/* All opportunities for failure are past. */
+	malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
+
 	if (config_prof) {
 		/* Adjust gdump stats now that extent is final size. */
 		extent_gdump_add(tsdn, extent);
@@ -1150,45 +1174,38 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 			memset(addr, 0, size);
 		}
 	}
-	/*
-	 * Increment extent_grow_next, but take care to do so atomically and
-	 * bail out if the increment would exceed the legal range.
-	 */
-	pszind_t egn = atomic_load_u(&arena->extent_grow_next, ATOMIC_RELAXED);
-	while (true) {
-		if (egn + 1 == NPSIZES) {
-			break;
-		}
-		assert(egn + 1 < NPSIZES);
-		if (atomic_compare_exchange_weak_u(&arena->extent_grow_next,
-		    &egn, egn + 1, ATOMIC_RELAXED, ATOMIC_RELAXED)) {
-			break;
-		}
-	}
+
 	return extent;
+label_err:
+	malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
+	return NULL;
 }
 
 static extent_t *
 extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, void *new_addr, size_t size, size_t pad,
     size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit) {
-	extent_t *extent;
-
 	assert(size != 0);
 	assert(alignment != 0);
 
-	extent = extent_recycle(tsdn, arena, r_extent_hooks,
+	malloc_mutex_lock(tsdn, &arena->extent_grow_mtx);
+
+	extent_t *extent = extent_recycle(tsdn, arena, r_extent_hooks,
 	    &arena->extents_retained, new_addr, size, pad, alignment, slab,
-	    szind, zero, commit);
+	    szind, zero, commit, true);
 	if (extent != NULL) {
+		malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
 		if (config_prof) {
 			extent_gdump_add(tsdn, extent);
 		}
+	} else if (opt_retain && new_addr == NULL) {
+		extent = extent_grow_retained(tsdn, arena, r_extent_hooks, size,
+		    pad, alignment, slab, szind, zero, commit);
+		/* extent_grow_retained() always releases extent_grow_mtx. */
+	} else {
+		malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
 	}
-	if (opt_retain && extent == NULL) {
-		extent = extent_grow_retained(tsdn, arena, r_extent_hooks,
-		    new_addr, size, pad, alignment, slab, szind, zero, commit);
-	}
+	malloc_mutex_assert_not_owner(tsdn, &arena->extent_grow_mtx);
 
 	return extent;
 }
@@ -1222,7 +1239,7 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 	}
 	if (extent_register(tsdn, extent)) {
 		extents_leak(tsdn, arena, r_extent_hooks,
-		    &arena->extents_retained, extent);
+		    &arena->extents_retained, extent, false);
 		return NULL;
 	}
 
@@ -1270,7 +1287,8 @@ extent_can_coalesce(arena_t *arena, extents_t *extents, const extent_t *inner,
 
 static bool
 extent_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    extents_t *extents, extent_t *inner, extent_t *outer, bool forward) {
+    extents_t *extents, extent_t *inner, extent_t *outer, bool forward,
+    bool growing_retained) {
 	assert(extent_can_coalesce(arena, extents, inner, outer));
 
 	if (forward && extents->delay_coalesce) {
@@ -1285,8 +1303,8 @@ extent_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	    extents->delay_coalesce);
 
 	malloc_mutex_unlock(tsdn, &extents->mtx);
-	bool err = extent_merge_wrapper(tsdn, arena, r_extent_hooks,
-	    forward ? inner : outer, forward ? outer : inner);
+	bool err = extent_merge_impl(tsdn, arena, r_extent_hooks,
+	    forward ? inner : outer, forward ? outer : inner, growing_retained);
 	malloc_mutex_lock(tsdn, &extents->mtx);
 
 	if (err) {
@@ -1303,7 +1321,7 @@ extent_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 static extent_t *
 extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
-    extent_t *extent, bool *coalesced) {
+    extent_t *extent, bool *coalesced, bool growing_retained) {
 	/*
 	 * Continue attempting to coalesce until failure, to protect against
 	 * races with other threads that are thwarted by this one.
@@ -1327,7 +1345,8 @@ extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
 			extent_unlock(tsdn, next);
 
 			if (can_coalesce && !extent_coalesce(tsdn, arena,
-			    r_extent_hooks, extents, extent, next, true)) {
+			    r_extent_hooks, extents, extent, next, true,
+			    growing_retained)) {
 				if (extents->delay_coalesce) {
 					/* Do minimal coalescing. */
 					*coalesced = true;
@@ -1346,7 +1365,8 @@ extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
 			extent_unlock(tsdn, prev);
 
 			if (can_coalesce && !extent_coalesce(tsdn, arena,
-			    r_extent_hooks, extents, extent, prev, false)) {
+			    r_extent_hooks, extents, extent, prev, false, 
+			    growing_retained)) {
 				extent = prev;
 				if (extents->delay_coalesce) {
 					/* Do minimal coalescing. */
@@ -1366,7 +1386,7 @@ extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
 
 static void
 extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    extents_t *extents, extent_t *extent) {
+    extents_t *extents, extent_t *extent, bool growing_retained) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
@@ -1388,7 +1408,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 
 	if (!extents->delay_coalesce) {
 		extent = extent_try_coalesce(tsdn, arena, r_extent_hooks,
-		    rtree_ctx, extents, extent, NULL);
+		    rtree_ctx, extents, extent, NULL, growing_retained);
 	}
 
 	extent_deactivate_locked(tsdn, arena, extents, extent, false);
@@ -1405,7 +1425,7 @@ extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
 
 	if (extent_register(tsdn, extent)) {
 		extents_leak(tsdn, arena, &extent_hooks,
-		    &arena->extents_retained, extent);
+		    &arena->extents_retained, extent, false);
 		return;
 	}
 	extent_dalloc_wrapper(tsdn, arena, &extent_hooks, extent);
@@ -1503,7 +1523,7 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 	}
 
 	extent_record(tsdn, arena, r_extent_hooks, &arena->extents_retained,
-	    extent);
+	    extent, false);
 }
 
 static void
@@ -1558,12 +1578,12 @@ extent_commit_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	    length);
 }
 
-bool
-extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
+static bool
+extent_commit_impl(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
-    size_t length) {
+    size_t length, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 	bool err = ((*r_extent_hooks)->commit == NULL ||
@@ -1573,6 +1593,14 @@ extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
 	return err;
 }
 
+bool
+extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    size_t length) {
+	return extent_commit_impl(tsdn, arena, r_extent_hooks, extent, offset,
+	    length, false);
+}
+
 static bool
 extent_decommit_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
@@ -1614,12 +1642,12 @@ extent_purge_lazy_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
 }
 #endif
 
-bool
-extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena,
+static bool
+extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
-    size_t length) {
+    size_t length, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 	return ((*r_extent_hooks)->purge_lazy == NULL ||
@@ -1628,6 +1656,14 @@ extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena,
 	    arena_ind_get(arena)));
 }
 
+bool
+extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    size_t length) {
+	return extent_purge_lazy_impl(tsdn, arena, r_extent_hooks, extent,
+	    offset, length, false);
+}
+
 #ifdef PAGES_CAN_PURGE_FORCED
 static bool
 extent_purge_forced_default(extent_hooks_t *extent_hooks, void *addr,
@@ -1643,12 +1679,12 @@ extent_purge_forced_default(extent_hooks_t *extent_hooks, void *addr,
 }
 #endif
 
-bool
-extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena,
+static bool
+extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
-    size_t length) {
+    size_t length, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 	return ((*r_extent_hooks)->purge_forced == NULL ||
@@ -1657,6 +1693,14 @@ extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena,
 	    arena_ind_get(arena)));
 }
 
+bool
+extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    size_t length) {
+	return extent_purge_forced_impl(tsdn, arena, r_extent_hooks, extent,
+	    offset, length, false);
+}
+
 #ifdef JEMALLOC_MAPS_COALESCE
 static bool
 extent_split_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
@@ -1667,13 +1711,14 @@ extent_split_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
 }
 #endif
 
-extent_t *
-extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
+static extent_t *
+extent_split_impl(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a,
-    szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b) {
+    szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
+    bool growing_retained) {
 	assert(extent_size_get(extent) == size_a + size_b);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 
@@ -1741,6 +1786,14 @@ label_error_a:
 	return NULL;
 }
 
+extent_t *
+extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a,
+    szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b) {
+	return extent_split_impl(tsdn, arena, r_extent_hooks, extent, size_a,
+	    szind_a, slab_a, size_b, szind_b, slab_b, false);
+}
+
 static bool
 extent_merge_default_impl(void *addr_a, void *addr_b) {
 	if (!maps_coalesce) {
@@ -1763,11 +1816,12 @@ extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
 }
 #endif
 
-bool
-extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b) {
+static bool
+extent_merge_impl(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b,
+    bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 
@@ -1832,6 +1886,12 @@ extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
 	return false;
 }
 
+bool
+extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b) {
+	return extent_merge_impl(tsdn, arena, r_extent_hooks, a, b, false);
+}
+
 bool
 extent_boot(void) {
 	if (rtree_new(&extents_rtree, true)) {

From 4f0963b883ea8a3a9e438f91f866627546a89d0c Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 26 May 2017 09:52:33 -0700
Subject: [PATCH 0893/2608] Add test for excessive retained memory.

---
 Makefile.in          |   1 +
 test/unit/retained.c | 179 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 180 insertions(+)
 create mode 100644 test/unit/retained.c

diff --git a/Makefile.in b/Makefile.in
index 16fe30fd..868bf8cc 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -189,6 +189,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/ql.c \
 	$(srcroot)test/unit/qr.c \
 	$(srcroot)test/unit/rb.c \
+	$(srcroot)test/unit/retained.c \
 	$(srcroot)test/unit/rtree.c \
 	$(srcroot)test/unit/SFMT.c \
 	$(srcroot)test/unit/size_classes.c \
diff --git a/test/unit/retained.c b/test/unit/retained.c
new file mode 100644
index 00000000..883bf4af
--- /dev/null
+++ b/test/unit/retained.c
@@ -0,0 +1,179 @@
+#include "test/jemalloc_test.h"
+
+static unsigned		arena_ind;
+static size_t		sz;
+static size_t		esz;
+#define NEPOCHS		8
+#define PER_THD_NALLOCS	1
+static atomic_u_t	epoch;
+static atomic_u_t	nfinished;
+
+static unsigned
+do_arena_create(extent_hooks_t *h) {
+	unsigned arena_ind;
+	size_t sz = sizeof(unsigned);
+	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
+	    (void *)(h != NULL ? &h : NULL), (h != NULL ? sizeof(h) : 0)), 0,
+	    "Unexpected mallctl() failure");
+	return arena_ind;
+}
+
+static void
+do_arena_destroy(unsigned arena_ind) {
+	size_t mib[3];
+	size_t miblen;
+
+	miblen = sizeof(mib)/sizeof(size_t);
+	assert_d_eq(mallctlnametomib("arena.0.destroy", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[1] = (size_t)arena_ind;
+	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+}
+
+static void
+do_refresh(void) {
+	uint64_t epoch = 1;
+	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
+	    sizeof(epoch)), 0, "Unexpected mallctl() failure");
+}
+
+static size_t
+do_get_size_impl(const char *cmd, unsigned arena_ind) {
+	size_t mib[4];
+	size_t miblen = sizeof(mib) / sizeof(size_t);
+	size_t z = sizeof(size_t);
+
+	assert_d_eq(mallctlnametomib(cmd, mib, &miblen),
+	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
+	mib[2] = arena_ind;
+	size_t size;
+	assert_d_eq(mallctlbymib(mib, miblen, (void *)&size, &z, NULL, 0),
+	    0, "Unexpected mallctlbymib([\"%s\"], ...) failure", cmd);
+
+	return size;
+}
+
+static size_t
+do_get_active(unsigned arena_ind) {
+	return do_get_size_impl("stats.arenas.0.pactive", arena_ind) * PAGE;
+}
+
+static size_t
+do_get_mapped(unsigned arena_ind) {
+	return do_get_size_impl("stats.arenas.0.mapped", arena_ind);
+}
+
+static void *
+thd_start(void *arg) {
+	for (unsigned next_epoch = 1; next_epoch < NEPOCHS; next_epoch++) {
+		/* Busy-wait for next epoch. */
+		unsigned cur_epoch;
+		spin_t spinner = SPIN_INITIALIZER;
+		while ((cur_epoch = atomic_load_u(&epoch, ATOMIC_ACQUIRE)) !=
+		    next_epoch) {
+			spin_adaptive(&spinner);
+		}
+		assert_u_eq(cur_epoch, next_epoch, "Unexpected epoch");
+
+		/*
+		 * Allocate.  The main thread will reset the arena, so there's
+		 * no need to deallocate.
+		 */
+		for (unsigned i = 0; i < PER_THD_NALLOCS; i++) {
+			void *p = mallocx(sz, MALLOCX_ARENA(arena_ind) |
+			    MALLOCX_TCACHE_NONE
+			    );
+			assert_ptr_not_null(p,
+			    "Unexpected mallocx() failure\n");
+		}
+
+		/* Let the main thread know we've finished this iteration. */
+		atomic_fetch_add_u(&nfinished, 1, ATOMIC_RELEASE);
+	}
+
+	return NULL;
+}
+
+TEST_BEGIN(test_retained) {
+	test_skip_if(!config_stats);
+
+	arena_ind = do_arena_create(NULL);
+	sz = nallocx(HUGEPAGE, 0);
+	esz = sz + large_pad;
+
+	atomic_store_u(&epoch, 0, ATOMIC_RELAXED);
+
+	unsigned nthreads = ncpus * 2;
+	VARIABLE_ARRAY(thd_t, threads, nthreads);
+	for (unsigned i = 0; i < nthreads; i++) {
+		thd_create(&threads[i], thd_start, NULL);
+	}
+
+	for (unsigned e = 1; e < NEPOCHS; e++) {
+		atomic_store_u(&nfinished, 0, ATOMIC_RELEASE);
+		atomic_store_u(&epoch, e, ATOMIC_RELEASE);
+
+		/* Wait for threads to finish allocating. */
+		spin_t spinner = SPIN_INITIALIZER;
+		while (atomic_load_u(&nfinished, ATOMIC_ACQUIRE) < nthreads) {
+			spin_adaptive(&spinner);
+		}
+
+		/*
+		 * Assert that retained is no more than the sum of size classes
+		 * that should have been used to satisfy the worker threads'
+		 * requests, discounting per growth fragmentation.
+		 */
+		do_refresh();
+
+		size_t allocated = esz * nthreads * PER_THD_NALLOCS;
+		size_t active = do_get_active(arena_ind);
+		assert_zu_le(allocated, active, "Unexpected active memory");
+		size_t mapped = do_get_mapped(arena_ind);
+		assert_zu_le(active, mapped, "Unexpected mapped memory");
+
+		arena_t *arena = arena_get(tsdn_fetch(), arena_ind, false);
+		size_t usable = 0;
+		size_t fragmented = 0;
+		for (pszind_t pind = psz2ind(HUGEPAGE); pind <
+		    arena->extent_grow_next; pind++) {
+			size_t psz = pind2sz(pind);
+			size_t psz_fragmented = psz % esz;
+			size_t psz_usable = psz - psz_fragmented;
+			/*
+			 * Only consider size classes that wouldn't be skipped.
+			 */
+			if (psz_usable > 0) {
+				assert_zu_lt(usable, allocated,
+				    "Excessive retained memory "
+				    "(%#zx[+%#zx] > %#zx)", usable, psz_usable,
+				    allocated);
+				fragmented += psz_fragmented;
+				usable += psz_usable;
+			}
+		}
+
+		/*
+		 * Clean up arena.  Destroying and recreating the arena
+		 * is simpler that specifying extent hooks that deallocate
+		 * (rather than retaining) during reset.
+		 */
+		do_arena_destroy(arena_ind);
+		assert_u_eq(do_arena_create(NULL), arena_ind,
+		    "Unexpected arena index");
+	}
+
+	for (unsigned i = 0; i < nthreads; i++) {
+		thd_join(threads[i], NULL);
+	}
+
+	do_arena_destroy(arena_ind);
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_retained);
+}

From bf6673a070a7d0b12a4d25c1f64dcf562f61f10a Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 29 May 2017 22:47:16 -0700
Subject: [PATCH 0894/2608] Fix npages during arena_decay_epoch_advance().

We do not lock extents while advancing epoch.  This change makes sure that we
only read npages from extents once in order to avoid any inconsistency.
---
 src/arena.c | 34 ++++++++++++++--------------------
 1 file changed, 14 insertions(+), 20 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index de50d8c0..9b3ea235 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -637,15 +637,14 @@ arena_decay_backlog_npages_limit(const arena_decay_t *decay) {
 }
 
 static void
-arena_decay_backlog_update_last(arena_decay_t *decay, extents_t *extents) {
-	size_t ndirty = extents_npages_get(extents);
-	size_t ndirty_delta = (ndirty > decay->nunpurged) ? ndirty -
-	    decay->nunpurged : 0;
-	decay->backlog[SMOOTHSTEP_NSTEPS-1] = ndirty_delta;
+arena_decay_backlog_update_last(arena_decay_t *decay, size_t current_npages) {
+	size_t npages_delta = (current_npages > decay->nunpurged) ?
+	    current_npages - decay->nunpurged : 0;
+	decay->backlog[SMOOTHSTEP_NSTEPS-1] = npages_delta;
 
 	if (config_debug) {
-		if (ndirty > decay->ceil_npages) {
-			decay->ceil_npages = ndirty;
+		if (current_npages > decay->ceil_npages) {
+			decay->ceil_npages = current_npages;
 		}
 		size_t npages_limit = arena_decay_backlog_npages_limit(decay);
 		assert(decay->ceil_npages >= npages_limit);
@@ -656,8 +655,8 @@ arena_decay_backlog_update_last(arena_decay_t *decay, extents_t *extents) {
 }
 
 static void
-arena_decay_backlog_update(arena_decay_t *decay, extents_t *extents,
-    uint64_t nadvance_u64) {
+arena_decay_backlog_update(arena_decay_t *decay, uint64_t nadvance_u64,
+    size_t current_npages) {
 	if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) {
 		memset(decay->backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
 		    sizeof(size_t));
@@ -674,7 +673,7 @@ arena_decay_backlog_update(arena_decay_t *decay, extents_t *extents,
 		}
 	}
 
-	arena_decay_backlog_update_last(decay, extents);
+	arena_decay_backlog_update_last(decay, current_npages);
 }
 
 static void
@@ -687,8 +686,8 @@ arena_decay_try_purge(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 }
 
 static void
-arena_decay_epoch_advance_helper(arena_decay_t *decay, extents_t *extents,
-    const nstime_t *time) {
+arena_decay_epoch_advance_helper(arena_decay_t *decay, const nstime_t *time,
+    size_t current_npages) {
 	assert(arena_decay_deadline_reached(decay, time));
 
 	nstime_t delta;
@@ -707,25 +706,20 @@ arena_decay_epoch_advance_helper(arena_decay_t *decay, extents_t *extents,
 	arena_decay_deadline_init(decay);
 
 	/* Update the backlog. */
-	arena_decay_backlog_update(decay, extents, nadvance_u64);
+	arena_decay_backlog_update(decay, nadvance_u64, current_npages);
 }
 
 static void
 arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
     extents_t *extents, const nstime_t *time, bool purge) {
-	arena_decay_epoch_advance_helper(decay, extents, time);
-
 	size_t current_npages = extents_npages_get(extents);
+	arena_decay_epoch_advance_helper(decay, time, current_npages);
+
 	size_t npages_limit = arena_decay_backlog_npages_limit(decay);
 	if (purge) {
 		arena_decay_try_purge(tsdn, arena, decay, extents,
 		    current_npages, npages_limit);
 	}
-	/*
-	 * There may be concurrent ndirty fluctuation between the purge above
-	 * and the nunpurged update below, but this is inconsequential to decay
-	 * machinery correctness.
-	 */
 	decay->nunpurged = (npages_limit > current_npages) ? npages_limit :
 	    current_npages;
 }

From c606a87d2a2a946793cf0a29ca69a1962caf6008 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 30 May 2017 09:54:49 -0700
Subject: [PATCH 0895/2608] Add the --disable-thp option to support cross
 compiling.

This resolves #669.
---
 INSTALL.md                                    |  7 +++++-
 configure.ac                                  | 25 ++++++++++++++++---
 doc/jemalloc.xml.in                           | 11 ++++++++
 .../internal/jemalloc_internal_defs.h.in      |  4 +--
 .../jemalloc/internal/jemalloc_preamble.h.in  | 14 +++++------
 src/ctl.c                                     |  3 +++
 src/stats.c                                   |  1 +
 test/unit/pages.c                             |  2 +-
 8 files changed, 53 insertions(+), 14 deletions(-)

diff --git a/INSTALL.md b/INSTALL.md
index 25f625af..dff7cebb 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -157,6 +157,11 @@ any of the following arguments (not a definitive list) to 'configure':
     Statically link against the specified libunwind.a rather than dynamically
     linking with -lunwind.
 
+* `--disable-thp`
+
+    Disable transparent huge page (THP) integration.  This option can be useful
+    when cross compiling.
+
 * `--disable-fill`
 
     Disable support for junk/zero filling of memory.  See the "opt.junk" and
@@ -224,7 +229,7 @@ any of the following arguments (not a definitive list) to 'configure':
 * `--with-lg-page-sizes=<lg-page-sizes>`
 
     Specify the comma-separated base 2 logs of the page sizes to support.  This
-    option may be useful when cross-compiling in combination with
+    option may be useful when cross compiling in combination with
     `--with-lg-page`, but its primary use case is for integration with FreeBSD's
     libc, wherein jemalloc is embedded.
 
diff --git a/configure.ac b/configure.ac
index 8be4be45..bed01b7a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1792,11 +1792,29 @@ if test "x${je_cv_madvise}" = "xyes" ; then
 	madvise((void *)0, 0, MADV_HUGEPAGE);
 	madvise((void *)0, 0, MADV_NOHUGEPAGE);
 ], [je_cv_thp])
-  if test "x${je_cv_thp}" = "xyes" ; then
-    AC_DEFINE([JEMALLOC_THP], [ ])
-  fi
 fi
 
+dnl Enable transparent huge page support by default.
+AC_ARG_ENABLE([thp],
+  [AS_HELP_STRING([--disable-thp],
+                  [Disable transparent huge page support])],
+[if test "x$enable_thp" = "xno" -o "x${je_cv_thp}" != "xyes" ; then
+  enable_thp="0"
+else
+  enable_thp="1"
+fi
+],
+[if test "x${je_cv_thp}" = "xyes" ; then
+  enable_thp="1"
+else
+  enable_thp="0"
+fi
+])
+if test "x$enable_thp" = "x1" ; then
+  AC_DEFINE([JEMALLOC_THP], [ ])
+fi
+AC_SUBST([enable_thp])
+
 dnl ============================================================================
 dnl Check whether __sync_{add,sub}_and_fetch() are available despite
 dnl __GCC_HAVE_SYNC_COMPARE_AND_SWAP_n macros being undefined.
@@ -2141,6 +2159,7 @@ AC_MSG_RESULT([prof               : ${enable_prof}])
 AC_MSG_RESULT([prof-libunwind     : ${enable_prof_libunwind}])
 AC_MSG_RESULT([prof-libgcc        : ${enable_prof_libgcc}])
 AC_MSG_RESULT([prof-gcc           : ${enable_prof_gcc}])
+AC_MSG_RESULT([thp                : ${enable_thp}])
 AC_MSG_RESULT([fill               : ${enable_fill}])
 AC_MSG_RESULT([utrace             : ${enable_utrace}])
 AC_MSG_RESULT([xmalloc            : ${enable_xmalloc}])
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index c2c0e925..a9c3d403 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -849,6 +849,17 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         build configuration.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="config.thp">
+        <term>
+          <mallctl>config.thp</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para><option>--disable-thp</option> was not specified
+        during build configuration, and the system supports transparent huge
+        page manipulation.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="config.utrace">
         <term>
           <mallctl>config.utrace</mallctl>
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 75576a56..20a2358e 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -269,8 +269,8 @@
 #undef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS
 
 /*
- * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE
- * arguments to madvise(2).
+ * Defined if transparent huge pages (THPs) are supported via the
+ * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled.
  */
 #undef JEMALLOC_THP
 
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index 0e876103..46750e99 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -111,6 +111,13 @@ static const bool config_stats =
     false
 #endif
     ;
+static const bool config_thp =
+#ifdef JEMALLOC_THP
+    true
+#else
+    false
+#endif
+    ;
 static const bool config_tls =
 #ifdef JEMALLOC_TLS
     true
@@ -139,13 +146,6 @@ static const bool config_cache_oblivious =
     false
 #endif
     ;
-static const bool have_thp =
-#ifdef JEMALLOC_THP
-    true
-#else
-    false
-#endif
-    ;
 #ifdef JEMALLOC_HAVE_SCHED_GETCPU
 /* Currently percpu_arena depends on sched_getcpu. */
 #define JEMALLOC_PERCPU_ARENA
diff --git a/src/ctl.c b/src/ctl.c
index d10c39bb..e3337e8a 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -73,6 +73,7 @@ CTL_PROTO(config_prof)
 CTL_PROTO(config_prof_libgcc)
 CTL_PROTO(config_prof_libunwind)
 CTL_PROTO(config_stats)
+CTL_PROTO(config_thp)
 CTL_PROTO(config_utrace)
 CTL_PROTO(config_xmalloc)
 CTL_PROTO(opt_abort)
@@ -263,6 +264,7 @@ static const ctl_named_node_t	config_node[] = {
 	{NAME("prof_libgcc"),	CTL(config_prof_libgcc)},
 	{NAME("prof_libunwind"), CTL(config_prof_libunwind)},
 	{NAME("stats"),		CTL(config_stats)},
+	{NAME("thp"),		CTL(config_thp)},
 	{NAME("utrace"),	CTL(config_utrace)},
 	{NAME("xmalloc"),	CTL(config_xmalloc)}
 };
@@ -1544,6 +1546,7 @@ CTL_RO_CONFIG_GEN(config_prof, bool)
 CTL_RO_CONFIG_GEN(config_prof_libgcc, bool)
 CTL_RO_CONFIG_GEN(config_prof_libunwind, bool)
 CTL_RO_CONFIG_GEN(config_stats, bool)
+CTL_RO_CONFIG_GEN(config_thp, bool)
 CTL_RO_CONFIG_GEN(config_utrace, bool)
 CTL_RO_CONFIG_GEN(config_xmalloc, bool)
 
diff --git a/src/stats.c b/src/stats.c
index 61550d83..268862b0 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -725,6 +725,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	CONFIG_WRITE_BOOL_JSON(prof_libgcc, ",")
 	CONFIG_WRITE_BOOL_JSON(prof_libunwind, ",")
 	CONFIG_WRITE_BOOL_JSON(stats, ",")
+	CONFIG_WRITE_BOOL_JSON(thp, ",")
 	CONFIG_WRITE_BOOL_JSON(utrace, ",")
 	CONFIG_WRITE_BOOL_JSON(xmalloc, "")
 
diff --git a/test/unit/pages.c b/test/unit/pages.c
index 4457f369..67dbb4cd 100644
--- a/test/unit/pages.c
+++ b/test/unit/pages.c
@@ -11,7 +11,7 @@ TEST_BEGIN(test_pages_huge) {
 	assert_ptr_not_null(pages, "Unexpected pages_map() error");
 
 	hugepage = (void *)(ALIGNMENT_CEILING((uintptr_t)pages, HUGEPAGE));
-	assert_b_ne(pages_huge(hugepage, HUGEPAGE), have_thp,
+	assert_b_ne(pages_huge(hugepage, HUGEPAGE), config_thp,
 	    "Unexpected pages_huge() result");
 	assert_false(pages_nohuge(hugepage, HUGEPAGE),
 	    "Unexpected pages_nohuge() result");

From 685c97fc433dad4b872639b08aaf074a8db3fdb1 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 30 May 2017 13:51:33 -0700
Subject: [PATCH 0896/2608] More thoroughly document the
 *.{nmalloc,ndalloc,nrequests} mallctls.

This resolves #412.
---
 doc/jemalloc.xml.in | 74 ++++++++++++++++++++++++++++++---------------
 1 file changed, 50 insertions(+), 24 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index a9c3d403..41e80049 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -2601,8 +2601,11 @@ struct extent_hooks_s {
           <literal>r-</literal>
           [<option>--enable-stats</option>]
         </term>
-        <listitem><para>Cumulative number of allocation requests served by
-        small bins.</para></listitem>
+        <listitem><para>Cumulative number of times a small allocation was
+        requested from the arena's bins, whether to fill the relevant tcache if
+        <link linkend="opt.tcache"><mallctl>opt.tcache</mallctl></link> is
+        enabled, or to directly satisfy an allocation request
+        otherwise.</para></listitem>
       </varlistentry>
 
       <varlistentry id="stats.arenas.i.small.ndalloc">
@@ -2612,8 +2615,11 @@ struct extent_hooks_s {
           <literal>r-</literal>
           [<option>--enable-stats</option>]
         </term>
-        <listitem><para>Cumulative number of small objects returned to bins.
-        </para></listitem>
+        <listitem><para>Cumulative number of times a small allocation was
+        returned to the arena's bins, whether to flush the relevant tcache if
+        <link linkend="opt.tcache"><mallctl>opt.tcache</mallctl></link> is
+        enabled, or to directly deallocate an allocation
+        otherwise.</para></listitem>
       </varlistentry>
 
       <varlistentry id="stats.arenas.i.small.nrequests">
@@ -2623,8 +2629,8 @@ struct extent_hooks_s {
           <literal>r-</literal>
           [<option>--enable-stats</option>]
         </term>
-        <listitem><para>Cumulative number of small allocation requests.
-        </para></listitem>
+        <listitem><para>Cumulative number of allocation requests satisfied by
+        all bin size classes.</para></listitem>
       </varlistentry>
 
       <varlistentry id="stats.arenas.i.large.allocated">
@@ -2645,8 +2651,11 @@ struct extent_hooks_s {
           <literal>r-</literal>
           [<option>--enable-stats</option>]
         </term>
-        <listitem><para>Cumulative number of large allocation requests served
-        directly by the arena.</para></listitem>
+        <listitem><para>Cumulative number of times a large extent was allocated
+        from the arena, whether to fill the relevant tcache if <link
+        linkend="opt.tcache"><mallctl>opt.tcache</mallctl></link> is enabled and
+        the size class is within the range being cached, or to directly satisfy
+        an allocation request otherwise.</para></listitem>
       </varlistentry>
 
       <varlistentry id="stats.arenas.i.large.ndalloc">
@@ -2656,8 +2665,11 @@ struct extent_hooks_s {
           <literal>r-</literal>
           [<option>--enable-stats</option>]
         </term>
-        <listitem><para>Cumulative number of large deallocation requests served
-        directly by the arena.</para></listitem>
+        <listitem><para>Cumulative number of times a large extent was returned
+        to the arena, whether to flush the relevant tcache if <link
+        linkend="opt.tcache"><mallctl>opt.tcache</mallctl></link> is enabled and
+        the size class is within the range being cached, or to directly
+        deallocate an allocation otherwise.</para></listitem>
       </varlistentry>
 
       <varlistentry id="stats.arenas.i.large.nrequests">
@@ -2667,8 +2679,8 @@ struct extent_hooks_s {
           <literal>r-</literal>
           [<option>--enable-stats</option>]
         </term>
-        <listitem><para>Cumulative number of large allocation requests.
-        </para></listitem>
+        <listitem><para>Cumulative number of allocation requests satisfied by
+        all large size classes.</para></listitem>
       </varlistentry>
 
       <varlistentry id="stats.arenas.i.bins.j.nmalloc">
@@ -2678,8 +2690,11 @@ struct extent_hooks_s {
           <literal>r-</literal>
           [<option>--enable-stats</option>]
         </term>
-        <listitem><para>Cumulative number of allocations served by bin.
-        </para></listitem>
+        <listitem><para>Cumulative number of times a bin region of the
+        corresponding size class was allocated from the arena, whether to fill
+        the relevant tcache if <link
+        linkend="opt.tcache"><mallctl>opt.tcache</mallctl></link> is enabled, or
+        to directly satisfy an allocation request otherwise.</para></listitem>
       </varlistentry>
 
       <varlistentry id="stats.arenas.i.bins.j.ndalloc">
@@ -2689,8 +2704,11 @@ struct extent_hooks_s {
           <literal>r-</literal>
           [<option>--enable-stats</option>]
         </term>
-        <listitem><para>Cumulative number of allocations returned to bin.
-        </para></listitem>
+        <listitem><para>Cumulative number of times a bin region of the
+        corresponding size class was returned to the arena, whether to flush the
+        relevant tcache if <link
+        linkend="opt.tcache"><mallctl>opt.tcache</mallctl></link> is enabled, or
+        to directly deallocate an allocation otherwise.</para></listitem>
       </varlistentry>
 
       <varlistentry id="stats.arenas.i.bins.j.nrequests">
@@ -2700,8 +2718,8 @@ struct extent_hooks_s {
           <literal>r-</literal>
           [<option>--enable-stats</option>]
         </term>
-        <listitem><para>Cumulative number of allocation
-        requests.</para></listitem>
+        <listitem><para>Cumulative number of allocation requests satisfied by
+        bin regions of the corresponding size class.</para></listitem>
       </varlistentry>
 
       <varlistentry id="stats.arenas.i.bins.j.curregs">
@@ -2784,8 +2802,12 @@ struct extent_hooks_s {
           <literal>r-</literal>
           [<option>--enable-stats</option>]
         </term>
-        <listitem><para>Cumulative number of allocation requests for this size
-        class served directly by the arena.</para></listitem>
+        <listitem><para>Cumulative number of times a large extent of the
+        corresponding size class was allocated from the arena, whether to fill
+        the relevant tcache if <link
+        linkend="opt.tcache"><mallctl>opt.tcache</mallctl></link> is enabled and
+        the size class is within the range being cached, or to directly satisfy
+        an allocation request otherwise.</para></listitem>
       </varlistentry>
 
       <varlistentry id="stats.arenas.i.lextents.j.ndalloc">
@@ -2795,8 +2817,12 @@ struct extent_hooks_s {
           <literal>r-</literal>
           [<option>--enable-stats</option>]
         </term>
-        <listitem><para>Cumulative number of deallocation requests for this
-        size class served directly by the arena.</para></listitem>
+        <listitem><para>Cumulative number of times a large extent of the
+        corresponding size class was returned to the arena, whether to flush the
+        relevant tcache if <link
+        linkend="opt.tcache"><mallctl>opt.tcache</mallctl></link> is enabled and
+        the size class is within the range being cached, or to directly
+        deallocate an allocation otherwise.</para></listitem>
       </varlistentry>
 
       <varlistentry id="stats.arenas.i.lextents.j.nrequests">
@@ -2806,8 +2832,8 @@ struct extent_hooks_s {
           <literal>r-</literal>
           [<option>--enable-stats</option>]
         </term>
-        <listitem><para>Cumulative number of allocation requests for this size
-        class.</para></listitem>
+        <listitem><para>Cumulative number of allocation requests satisfied by
+        large extents of the corresponding size class.</para></listitem>
       </varlistentry>
 
       <varlistentry id="stats.arenas.i.lextents.j.curlextents">

From 7578b0e929f3380ba1713bd77a38c402a90f3b27 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 30 May 2017 13:17:10 -0700
Subject: [PATCH 0897/2608] Explicitly say so when aborting on opt_abort_conf.

---
 src/jemalloc.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 5e3072b5..ed22a258 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -877,6 +877,14 @@ malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
 	return false;
 }
 
+static void
+malloc_abort_invalid_conf(void) {
+	assert(opt_abort_conf);
+	malloc_printf("<jemalloc>: Abort (abort_conf:true) on invalid conf "
+	    "value (see above).\n");
+	abort();
+}
+
 static void
 malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v,
     size_t vlen) {
@@ -884,7 +892,7 @@ malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v,
 	    (int)vlen, v);
 	had_conf_error = true;
 	if (opt_abort_conf) {
-		abort();
+		malloc_abort_invalid_conf();
 	}
 }
 
@@ -1086,7 +1094,7 @@ malloc_conf_init(void) {
 			CONF_HANDLE_BOOL(opt_abort, "abort")
 			CONF_HANDLE_BOOL(opt_abort_conf, "abort_conf")
 			if (opt_abort_conf && had_conf_error) {
-				abort();
+				malloc_abort_invalid_conf();
 			}
 			CONF_HANDLE_BOOL(opt_retain, "retain")
 			if (strncmp("dss", k, klen) == 0) {

From ff8062a511f2f3e727891c21238d98c6757ee27e Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 30 May 2017 14:26:02 -0700
Subject: [PATCH 0898/2608] Add jemalloc prefix to allocator functions pruned
 by jeprof.

This resolves #507.
---
 bin/jeprof.in | 26 +++++++++++++-------------
 configure.ac  |  1 +
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/bin/jeprof.in b/bin/jeprof.in
index baa80a54..e6f4af4b 100644
--- a/bin/jeprof.in
+++ b/bin/jeprof.in
@@ -2892,21 +2892,21 @@ sub RemoveUninterestingFrames {
   my %skip = ();
   my $skip_regexp = 'NOMATCH';
   if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') {
-    foreach my $name ('calloc',
+    foreach my $name ('@JEMALLOC_PREFIX@calloc',
                       'cfree',
-                      'malloc',
-                      'free',
-                      'memalign',
-                      'posix_memalign',
-                      'aligned_alloc',
+                      '@JEMALLOC_PREFIX@malloc',
+                      '@JEMALLOC_PREFIX@free',
+                      '@JEMALLOC_PREFIX@memalign',
+                      '@JEMALLOC_PREFIX@posix_memalign',
+                      '@JEMALLOC_PREFIX@aligned_alloc',
                       'pvalloc',
-                      'valloc',
-                      'realloc',
-                      'mallocx', # jemalloc
-                      'rallocx', # jemalloc
-                      'xallocx', # jemalloc
-                      'dallocx', # jemalloc
-                      'sdallocx', # jemalloc
+                      '@JEMALLOC_PREFIX@valloc',
+                      '@JEMALLOC_PREFIX@realloc',
+                      '@JEMALLOC_PREFIX@mallocx',
+                      '@JEMALLOC_PREFIX@rallocx',
+                      '@JEMALLOC_PREFIX@xallocx',
+                      '@JEMALLOC_PREFIX@dallocx',
+                      '@JEMALLOC_PREFIX@sdallocx',
                       'tc_calloc',
                       'tc_cfree',
                       'tc_malloc',
diff --git a/configure.ac b/configure.ac
index bed01b7a..a00aab9b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -802,6 +802,7 @@ else
   AC_DEFINE_UNQUOTED([JEMALLOC_PREFIX], ["$JEMALLOC_PREFIX"])
   AC_DEFINE_UNQUOTED([JEMALLOC_CPREFIX], ["$JEMALLOC_CPREFIX"])
 fi
+AC_SUBST([JEMALLOC_PREFIX])
 AC_SUBST([JEMALLOC_CPREFIX])
 
 AC_ARG_WITH([export],

From 2e4d1a4e30ec602a692707ca2aa576ee9df2be76 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 30 May 2017 15:56:01 -0700
Subject: [PATCH 0899/2608] Output total_wait_ns for bin mutexes.

---
 src/stats.c | 24 +++++-------------------
 1 file changed, 5 insertions(+), 19 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index 268862b0..087df767 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -131,7 +131,7 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    "\t\t\t\t\"bins\": [\n");
 	} else {
 		char *mutex_counters = "   n_lock_ops    n_waiting"
-		    "   n_spin_acq  max_wait_ns\n";
+		    "   n_spin_acq  total_wait_ns  max_wait_ns\n";
 		malloc_cprintf(write_cb, cbopaque,
 		    "bins:           size ind    allocated      nmalloc"
 		    "      ndalloc    nrequests      curregs     curslabs regs"
@@ -221,28 +221,11 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 					not_reached();
 				}
 			}
-			/* Output less info for bin mutexes to save space. */
-			uint64_t num_ops, num_wait, max_wait;
-			CTL_M2_M4_GET("stats.arenas.0.bins.0.mutex.num_wait",
-			    i, j, &num_wait, uint64_t);
-			CTL_M2_M4_GET(
-			    "stats.arenas.0.bins.0.mutex.max_wait_time", i, j,
-			    &max_wait, uint64_t);
-			CTL_M2_M4_GET("stats.arenas.0.bins.0.mutex.num_ops",
-			    i, j, &num_ops, uint64_t);
 			uint64_t mutex_stats[mutex_prof_num_counters];
 			if (mutex) {
 				read_arena_bin_mutex_stats(i, j, mutex_stats);
 			}
 
-			char rate[6];
-			if (get_rate_str(num_wait, num_ops, rate)) {
-				if (num_ops == 0) {
-					malloc_snprintf(rate, sizeof(rate),
-					    "0");
-				}
-			}
-
 			malloc_cprintf(write_cb, cbopaque, "%20zu %3u %12zu %12"
 			    FMTu64" %12"FMTu64" %12"FMTu64" %12zu %12zu %4u"
 			    " %3zu %-5s %12"FMTu64" %12"FMTu64" %12"FMTu64
@@ -250,13 +233,16 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			    nmalloc, ndalloc, nrequests, curregs, curslabs,
 			    nregs, slab_size / page, util, nfills, nflushes,
 			    nslabs, nreslabs);
+
+			/* Output less info for bin mutexes to save space. */
 			if (mutex) {
 				malloc_cprintf(write_cb, cbopaque,
 				    " %12"FMTu64" %12"FMTu64" %12"FMTu64
-				    " %12"FMTu64"\n",
+				    " %14"FMTu64" %12"FMTu64"\n",
 				    mutex_stats[mutex_counter_num_ops],
 				    mutex_stats[mutex_counter_num_wait],
 				    mutex_stats[mutex_counter_num_spin_acq],
+				    mutex_stats[mutex_counter_total_wait_time],
 				    mutex_stats[mutex_counter_max_wait_time]);
 			} else {
 				malloc_cprintf(write_cb, cbopaque, "\n");

From 66813916b52c373707ecb7f6875b0c4062873214 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 30 May 2017 18:05:27 -0700
Subject: [PATCH 0900/2608] Track background thread status separately at fork.

Use a separate boolean to track the enabled status, instead of leaving the
global background thread status inconsistent.
---
 src/background_thread.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/background_thread.c b/src/background_thread.c
index 1e725b4e..d3e80b3d 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -41,6 +41,8 @@ bool background_thread_stats_read(tsdn_t *tsdn,
 #undef NOT_REACHED
 #else
 
+static bool background_thread_enabled_at_fork;
+
 static void
 background_thread_info_reinit(tsdn_t *tsdn, background_thread_info_t *info) {
 	background_thread_wakeup_time_set(tsdn, info, 0);
@@ -548,10 +550,11 @@ void
 background_thread_prefork0(tsdn_t *tsdn) {
 	malloc_mutex_prefork(tsdn, &background_thread_lock);
 	if (background_thread_enabled()) {
+		background_thread_enabled_at_fork = true;
 		background_thread_enabled_set(tsdn, false);
 		background_threads_disable(tsdn_tsd(tsdn));
-		/* Enable again to re-create threads after fork. */
-		background_thread_enabled_set(tsdn, true);
+	} else {
+		background_thread_enabled_at_fork = false;
 	}
 	assert(n_background_threads == 0);
 }
@@ -565,7 +568,9 @@ background_thread_prefork1(tsdn_t *tsdn) {
 
 static void
 background_thread_postfork_init(tsdn_t *tsdn) {
-	if (background_thread_enabled()) {
+	assert(n_background_threads == 0);
+	if (background_thread_enabled_at_fork) {
+		background_thread_enabled_set(tsdn, true);
 		background_threads_enable(tsdn_tsd(tsdn));
 	}
 }

From 10d090aae9834e1eb24b957d4fac042c205af52e Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 30 May 2017 14:36:55 -0700
Subject: [PATCH 0901/2608] Pass the O_CLOEXEC flag to open(2).

This resolves #528.
---
 src/pages.c | 7 ++++---
 src/prof.c  | 2 +-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/pages.c b/src/pages.c
index 3a048e3b..fec64dd0 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -353,12 +353,13 @@ os_overcommits_proc(void) {
 	ssize_t nread;
 
 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
-	fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY);
+	fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY |
+	    O_CLOEXEC);
 #elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat)
 	fd = (int)syscall(SYS_openat,
-	    AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY);
+	    AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
 #else
-	fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
+	fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
 #endif
 	if (fd == -1) {
 		return false; /* Error. */
diff --git a/src/prof.c b/src/prof.c
index 639e5983..aa67486d 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -1409,7 +1409,7 @@ prof_open_maps(const char *format, ...) {
 	va_start(ap, format);
 	malloc_vsnprintf(filename, sizeof(filename), format, ap);
 	va_end(ap);
-	mfd = open(filename, O_RDONLY);
+	mfd = open(filename, O_RDONLY | O_CLOEXEC);
 
 	return mfd;
 }

From b4b4a98bc8463cc9e4ef0a78244d5047d497eed7 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 31 May 2017 10:16:16 -0700
Subject: [PATCH 0902/2608] Add /run_tests.out/ to .gitignore.

---
 .gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitignore b/.gitignore
index 9acf374b..a25aaf7e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -44,6 +44,8 @@
 /src/*.[od]
 /src/*.sym
 
+/run_tests.out/
+
 /test/test.sh
 test/include/test/jemalloc_test.h
 test/include/test/jemalloc_test_defs.h

From 44f9bd147a3df96e42adbe7ad4f0195763105bbe Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 23 May 2017 14:26:31 -0700
Subject: [PATCH 0903/2608] Header refactoring: unify and de-catchall rtree
 module.

---
 include/jemalloc/internal/arena_inlines_b.h   |   1 +
 include/jemalloc/internal/extent_externs.h    |   1 +
 .../internal/jemalloc_internal_includes.h     |   4 -
 .../internal/jemalloc_internal_inlines_b.h    |   2 +
 .../internal/{rtree_inlines.h => rtree.h}     | 132 +++++++++++++++++-
 include/jemalloc/internal/rtree_ctx.h         |  22 ---
 include/jemalloc/internal/rtree_externs.h     |  45 ------
 include/jemalloc/internal/rtree_structs.h     |  53 -------
 .../internal/{rtree_types.h => rtree_tsd.h}   |  63 +++------
 include/jemalloc/internal/tsd.h               |   2 +-
 src/arena.c                                   |   1 +
 src/extent.c                                  |   1 +
 src/jemalloc.c                                |   1 +
 src/large.c                                   |   1 +
 src/tsd.c                                     |   1 +
 test/unit/arena_reset.c                       |   2 +
 test/unit/rtree.c                             |   2 +
 test/unit/spin.c                              |   2 +
 18 files changed, 166 insertions(+), 170 deletions(-)
 rename include/jemalloc/internal/{rtree_inlines.h => rtree.h} (74%)
 delete mode 100644 include/jemalloc/internal/rtree_ctx.h
 delete mode 100644 include/jemalloc/internal/rtree_externs.h
 delete mode 100644 include/jemalloc/internal/rtree_structs.h
 rename include/jemalloc/internal/{rtree_types.h => rtree_tsd.h} (51%)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 8db6e9a8..16635c1a 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -3,6 +3,7 @@
 
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/ticker.h"
 
diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index 9d5daf5b..96a71126 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -4,6 +4,7 @@
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/ph.h"
 #include "jemalloc/internal/rb.h"
+#include "jemalloc/internal/rtree.h"
 
 extern rtree_t			extents_rtree;
 extern const extent_hooks_t	extent_hooks_default;
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index b1a6f17d..770bcaab 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -44,7 +44,6 @@
 #include "jemalloc/internal/extent_dss_types.h"
 #include "jemalloc/internal/base_types.h"
 #include "jemalloc/internal/arena_types.h"
-#include "jemalloc/internal/rtree_types.h"
 #include "jemalloc/internal/tcache_types.h"
 #include "jemalloc/internal/prof_types.h"
 
@@ -59,7 +58,6 @@
 #include "jemalloc/internal/base_structs.h"
 #include "jemalloc/internal/prof_structs.h"
 #include "jemalloc/internal/arena_structs_b.h"
-#include "jemalloc/internal/rtree_structs.h"
 #include "jemalloc/internal/tcache_structs.h"
 #include "jemalloc/internal/background_thread_structs.h"
 
@@ -73,7 +71,6 @@
 #include "jemalloc/internal/extent_mmap_externs.h"
 #include "jemalloc/internal/base_externs.h"
 #include "jemalloc/internal/arena_externs.h"
-#include "jemalloc/internal/rtree_externs.h"
 #include "jemalloc/internal/large_externs.h"
 #include "jemalloc/internal/tcache_externs.h"
 #include "jemalloc/internal/prof_externs.h"
@@ -85,7 +82,6 @@
 
 #include "jemalloc/internal/mutex_pool_inlines.h"
 #include "jemalloc/internal/jemalloc_internal_inlines_a.h"
-#include "jemalloc/internal/rtree_inlines.h"
 #include "jemalloc/internal/base_inlines.h"
 /*
  * Include portions of arena code interleaved with tcache code in order to
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index cfc52094..37493160 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_INLINES_B_H
 #define JEMALLOC_INTERNAL_INLINES_B_H
 
+#include "jemalloc/internal/rtree.h"
+
 /* Choose an arena based on a per-thread value. */
 static inline arena_t *
 arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {
diff --git a/include/jemalloc/internal/rtree_inlines.h b/include/jemalloc/internal/rtree.h
similarity index 74%
rename from include/jemalloc/internal/rtree_inlines.h
rename to include/jemalloc/internal/rtree.h
index 335a89cf..b5d4db39 100644
--- a/include/jemalloc/internal/rtree_inlines.h
+++ b/include/jemalloc/internal/rtree.h
@@ -1,8 +1,132 @@
-#ifndef JEMALLOC_INTERNAL_RTREE_INLINES_H
-#define JEMALLOC_INTERNAL_RTREE_INLINES_H
+#ifndef JEMALLOC_INTERNAL_RTREE_H
+#define JEMALLOC_INTERNAL_RTREE_H
 
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/rtree_tsd.h"
 #include "jemalloc/internal/size_classes.h"
-#include "jemalloc/internal/spin.h"
+#include "jemalloc/internal/tsd.h"
+
+/*
+ * This radix tree implementation is tailored to the singular purpose of
+ * associating metadata with extents that are currently owned by jemalloc.
+ *
+ *******************************************************************************
+ */
+
+/* Number of high insignificant bits. */
+#define RTREE_NHIB ((1U << (LG_SIZEOF_PTR+3)) - LG_VADDR)
+/* Number of low insigificant bits. */
+#define RTREE_NLIB LG_PAGE
+/* Number of significant bits. */
+#define RTREE_NSB (LG_VADDR - RTREE_NLIB)
+/* Number of levels in radix tree. */
+#if RTREE_NSB <= 10
+#  define RTREE_HEIGHT 1
+#elif RTREE_NSB <= 36
+#  define RTREE_HEIGHT 2
+#elif RTREE_NSB <= 52
+#  define RTREE_HEIGHT 3
+#else
+#  error Unsupported number of significant virtual address bits
+#endif
+/* Use compact leaf representation if virtual address encoding allows. */
+#if RTREE_NHIB >= LG_CEIL_NSIZES
+#  define RTREE_LEAF_COMPACT
+#endif
+
+/* Needed for initialization only. */
+#define RTREE_LEAFKEY_INVALID ((uintptr_t)1)
+
+typedef struct rtree_node_elm_s rtree_node_elm_t;
+struct rtree_node_elm_s {
+	atomic_p_t	child; /* (rtree_{node,leaf}_elm_t *) */
+};
+
+struct rtree_leaf_elm_s {
+#ifdef RTREE_LEAF_COMPACT
+	/*
+	 * Single pointer-width field containing all three leaf element fields.
+	 * For example, on a 64-bit x64 system with 48 significant virtual
+	 * memory address bits, the index, extent, and slab fields are packed as
+	 * such:
+	 *
+	 * x: index
+	 * e: extent
+	 * b: slab
+	 *
+	 *   00000000 xxxxxxxx eeeeeeee [...] eeeeeeee eeee000b
+	 */
+	atomic_p_t	le_bits;
+#else
+	atomic_p_t	le_extent; /* (extent_t *) */
+	atomic_u_t	le_szind; /* (szind_t) */
+	atomic_b_t	le_slab; /* (bool) */
+#endif
+};
+
+typedef struct rtree_level_s rtree_level_t;
+struct rtree_level_s {
+	/* Number of key bits distinguished by this level. */
+	unsigned		bits;
+	/*
+	 * Cumulative number of key bits distinguished by traversing to
+	 * corresponding tree level.
+	 */
+	unsigned		cumbits;
+};
+
+typedef struct rtree_s rtree_t;
+struct rtree_s {
+	malloc_mutex_t		init_lock;
+	/* Number of elements based on rtree_levels[0].bits. */
+#if RTREE_HEIGHT > 1
+	rtree_node_elm_t	root[1U << (RTREE_NSB/RTREE_HEIGHT)];
+#else
+	rtree_leaf_elm_t	root[1U << (RTREE_NSB/RTREE_HEIGHT)];
+#endif
+};
+
+/*
+ * Split the bits into one to three partitions depending on number of
+ * significant bits.  It the number of bits does not divide evenly into the
+ * number of levels, place one remainder bit per level starting at the leaf
+ * level.
+ */
+static const rtree_level_t rtree_levels[] = {
+#if RTREE_HEIGHT == 1
+	{RTREE_NSB, RTREE_NHIB + RTREE_NSB}
+#elif RTREE_HEIGHT == 2
+	{RTREE_NSB/2, RTREE_NHIB + RTREE_NSB/2},
+	{RTREE_NSB/2 + RTREE_NSB%2, RTREE_NHIB + RTREE_NSB}
+#elif RTREE_HEIGHT == 3
+	{RTREE_NSB/3, RTREE_NHIB + RTREE_NSB/3},
+	{RTREE_NSB/3 + RTREE_NSB%3/2,
+	    RTREE_NHIB + RTREE_NSB/3*2 + RTREE_NSB%3/2},
+	{RTREE_NSB/3 + RTREE_NSB%3 - RTREE_NSB%3/2, RTREE_NHIB + RTREE_NSB}
+#else
+#  error Unsupported rtree height
+#endif
+};
+
+bool rtree_new(rtree_t *rtree, bool zeroed);
+
+typedef rtree_node_elm_t *(rtree_node_alloc_t)(tsdn_t *, rtree_t *, size_t);
+extern rtree_node_alloc_t *JET_MUTABLE rtree_node_alloc;
+
+typedef rtree_leaf_elm_t *(rtree_leaf_alloc_t)(tsdn_t *, rtree_t *, size_t);
+extern rtree_leaf_alloc_t *JET_MUTABLE rtree_leaf_alloc;
+
+typedef void (rtree_node_dalloc_t)(tsdn_t *, rtree_t *, rtree_node_elm_t *);
+extern rtree_node_dalloc_t *JET_MUTABLE rtree_node_dalloc;
+
+typedef void (rtree_leaf_dalloc_t)(tsdn_t *, rtree_t *, rtree_leaf_elm_t *);
+extern rtree_leaf_dalloc_t *JET_MUTABLE rtree_leaf_dalloc;
+#ifdef JEMALLOC_JET
+void rtree_delete(tsdn_t *tsdn, rtree_t *rtree);
+#endif
+rtree_leaf_elm_t *rtree_leaf_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, bool init_missing);
 
 JEMALLOC_ALWAYS_INLINE uintptr_t
 rtree_leafkey(uintptr_t key) {
@@ -347,4 +471,4 @@ rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	rtree_leaf_elm_write(tsdn, rtree, elm, NULL, NSIZES, false);
 }
 
-#endif /* JEMALLOC_INTERNAL_RTREE_INLINES_H */
+#endif /* JEMALLOC_INTERNAL_RTREE_H */
diff --git a/include/jemalloc/internal/rtree_ctx.h b/include/jemalloc/internal/rtree_ctx.h
deleted file mode 100644
index fe2c8bde..00000000
--- a/include/jemalloc/internal/rtree_ctx.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_RTREE_CTX_H
-#define JEMALLOC_INTERNAL_RTREE_CTX_H
-
-#include "jemalloc/internal/rtree_types.h"
-
-typedef struct rtree_ctx_cache_elm_s rtree_ctx_cache_elm_t;
-struct rtree_ctx_cache_elm_s {
-	uintptr_t		leafkey;
-	rtree_leaf_elm_t	*leaf;
-};
-
-typedef struct rtree_ctx_s rtree_ctx_t;
-struct rtree_ctx_s {
-	/* Direct mapped cache. */
-	rtree_ctx_cache_elm_t	cache[RTREE_CTX_NCACHE];
-	/* L2 LRU cache. */
-	rtree_ctx_cache_elm_t	l2_cache[RTREE_CTX_NCACHE_L2];
-};
-
-void rtree_ctx_data_init(rtree_ctx_t *ctx);
-
-#endif /* JEMALLOC_INTERNAL_RTREE_CTX_H */
diff --git a/include/jemalloc/internal/rtree_externs.h b/include/jemalloc/internal/rtree_externs.h
deleted file mode 100644
index d7d81654..00000000
--- a/include/jemalloc/internal/rtree_externs.h
+++ /dev/null
@@ -1,45 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_RTREE_EXTERNS_H
-#define JEMALLOC_INTERNAL_RTREE_EXTERNS_H
-
-/*
- * Split the bits into one to three partitions depending on number of
- * significant bits.  It the number of bits does not divide evenly into the
- * number of levels, place one remainder bit per level starting at the leaf
- * level.
- */
-static const rtree_level_t rtree_levels[] = {
-#if RTREE_HEIGHT == 1
-	{RTREE_NSB, RTREE_NHIB + RTREE_NSB}
-#elif RTREE_HEIGHT == 2
-	{RTREE_NSB/2, RTREE_NHIB + RTREE_NSB/2},
-	{RTREE_NSB/2 + RTREE_NSB%2, RTREE_NHIB + RTREE_NSB}
-#elif RTREE_HEIGHT == 3
-	{RTREE_NSB/3, RTREE_NHIB + RTREE_NSB/3},
-	{RTREE_NSB/3 + RTREE_NSB%3/2,
-	    RTREE_NHIB + RTREE_NSB/3*2 + RTREE_NSB%3/2},
-	{RTREE_NSB/3 + RTREE_NSB%3 - RTREE_NSB%3/2, RTREE_NHIB + RTREE_NSB}
-#else
-#  error Unsupported rtree height
-#endif
-};
-
-bool rtree_new(rtree_t *rtree, bool zeroed);
-
-typedef rtree_node_elm_t *(rtree_node_alloc_t)(tsdn_t *, rtree_t *, size_t);
-extern rtree_node_alloc_t *JET_MUTABLE rtree_node_alloc;
-
-typedef rtree_leaf_elm_t *(rtree_leaf_alloc_t)(tsdn_t *, rtree_t *, size_t);
-extern rtree_leaf_alloc_t *JET_MUTABLE rtree_leaf_alloc;
-
-typedef void (rtree_node_dalloc_t)(tsdn_t *, rtree_t *, rtree_node_elm_t *);
-extern rtree_node_dalloc_t *JET_MUTABLE rtree_node_dalloc;
-
-typedef void (rtree_leaf_dalloc_t)(tsdn_t *, rtree_t *, rtree_leaf_elm_t *);
-extern rtree_leaf_dalloc_t *JET_MUTABLE rtree_leaf_dalloc;
-#ifdef JEMALLOC_JET
-void rtree_delete(tsdn_t *tsdn, rtree_t *rtree);
-#endif
-rtree_leaf_elm_t *rtree_leaf_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, bool init_missing);
-
-#endif /* JEMALLOC_INTERNAL_RTREE_EXTERNS_H */
diff --git a/include/jemalloc/internal/rtree_structs.h b/include/jemalloc/internal/rtree_structs.h
deleted file mode 100644
index a02a1f60..00000000
--- a/include/jemalloc/internal/rtree_structs.h
+++ /dev/null
@@ -1,53 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_RTREE_STRUCTS_H
-#define JEMALLOC_INTERNAL_RTREE_STRUCTS_H
-
-#include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/mutex.h"
-
-struct rtree_node_elm_s {
-	atomic_p_t	child; /* (rtree_{node,leaf}_elm_t *) */
-};
-
-struct rtree_leaf_elm_s {
-#ifdef RTREE_LEAF_COMPACT
-	/*
-	 * Single pointer-width field containing all three leaf element fields.
-	 * For example, on a 64-bit x64 system with 48 significant virtual
-	 * memory address bits, the index, extent, and slab fields are packed as
-	 * such:
-	 *
-	 * x: index
-	 * e: extent
-	 * b: slab
-	 *
-	 *   00000000 xxxxxxxx eeeeeeee [...] eeeeeeee eeee000b
-	 */
-	atomic_p_t	le_bits;
-#else
-	atomic_p_t	le_extent; /* (extent_t *) */
-	atomic_u_t	le_szind; /* (szind_t) */
-	atomic_b_t	le_slab; /* (bool) */
-#endif
-};
-
-struct rtree_level_s {
-	/* Number of key bits distinguished by this level. */
-	unsigned		bits;
-	/*
-	 * Cumulative number of key bits distinguished by traversing to
-	 * corresponding tree level.
-	 */
-	unsigned		cumbits;
-};
-
-struct rtree_s {
-	malloc_mutex_t		init_lock;
-	/* Number of elements based on rtree_levels[0].bits. */
-#if RTREE_HEIGHT > 1
-	rtree_node_elm_t	root[1U << (RTREE_NSB/RTREE_HEIGHT)];
-#else
-	rtree_leaf_elm_t	root[1U << (RTREE_NSB/RTREE_HEIGHT)];
-#endif
-};
-
-#endif /* JEMALLOC_INTERNAL_RTREE_STRUCTS_H */
diff --git a/include/jemalloc/internal/rtree_types.h b/include/jemalloc/internal/rtree_tsd.h
similarity index 51%
rename from include/jemalloc/internal/rtree_types.h
rename to include/jemalloc/internal/rtree_tsd.h
index fd0f1409..3cdc8625 100644
--- a/include/jemalloc/internal/rtree_types.h
+++ b/include/jemalloc/internal/rtree_tsd.h
@@ -1,43 +1,5 @@
-#ifndef JEMALLOC_INTERNAL_RTREE_TYPES_H
-#define JEMALLOC_INTERNAL_RTREE_TYPES_H
-
-#include "jemalloc/internal/size_classes.h"
-
-/*
- * This radix tree implementation is tailored to the singular purpose of
- * associating metadata with extents that are currently owned by jemalloc.
- *
- *******************************************************************************
- */
-
-typedef struct rtree_node_elm_s rtree_node_elm_t;
-typedef struct rtree_leaf_elm_s rtree_leaf_elm_t;
-typedef struct rtree_level_s rtree_level_t;
-typedef struct rtree_s rtree_t;
-
-/* Number of high insignificant bits. */
-#define RTREE_NHIB ((1U << (LG_SIZEOF_PTR+3)) - LG_VADDR)
-/* Number of low insigificant bits. */
-#define RTREE_NLIB LG_PAGE
-/* Number of significant bits. */
-#define RTREE_NSB (LG_VADDR - RTREE_NLIB)
-/* Number of levels in radix tree. */
-#if RTREE_NSB <= 10
-#  define RTREE_HEIGHT 1
-#elif RTREE_NSB <= 36
-#  define RTREE_HEIGHT 2
-#elif RTREE_NSB <= 52
-#  define RTREE_HEIGHT 3
-#else
-#  error Unsupported number of significant virtual address bits
-#endif
-/* Use compact leaf representation if virtual address encoding allows. */
-#if RTREE_NHIB >= LG_CEIL_NSIZES
-#  define RTREE_LEAF_COMPACT
-#endif
-
-/* Needed for initialization only. */
-#define RTREE_LEAFKEY_INVALID ((uintptr_t)1)
+#ifndef JEMALLOC_INTERNAL_RTREE_CTX_H
+#define JEMALLOC_INTERNAL_RTREE_CTX_H
 
 /*
  * Number of leafkey/leaf pairs to cache in L1 and L2 level respectively.  Each
@@ -66,4 +28,23 @@ typedef struct rtree_s rtree_t;
  */
 #define RTREE_CTX_ZERO_INITIALIZER {{{0}}}
 
-#endif /* JEMALLOC_INTERNAL_RTREE_TYPES_H */
+
+typedef struct rtree_leaf_elm_s rtree_leaf_elm_t;
+
+typedef struct rtree_ctx_cache_elm_s rtree_ctx_cache_elm_t;
+struct rtree_ctx_cache_elm_s {
+	uintptr_t		leafkey;
+	rtree_leaf_elm_t	*leaf;
+};
+
+typedef struct rtree_ctx_s rtree_ctx_t;
+struct rtree_ctx_s {
+	/* Direct mapped cache. */
+	rtree_ctx_cache_elm_t	cache[RTREE_CTX_NCACHE];
+	/* L2 LRU cache. */
+	rtree_ctx_cache_elm_t	l2_cache[RTREE_CTX_NCACHE_L2];
+};
+
+void rtree_ctx_data_init(rtree_ctx_t *ctx);
+
+#endif /* JEMALLOC_INTERNAL_RTREE_CTX_H */
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index c192a6ca..f304e1d9 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -6,7 +6,7 @@
 #include "jemalloc/internal/jemalloc_internal_externs.h"
 #include "jemalloc/internal/prof_types.h"
 #include "jemalloc/internal/ql.h"
-#include "jemalloc/internal/rtree_ctx.h"
+#include "jemalloc/internal/rtree_tsd.h"
 #include "jemalloc/internal/tcache_types.h"
 #include "jemalloc/internal/tcache_structs.h"
 #include "jemalloc/internal/util.h"
diff --git a/src/arena.c b/src/arena.c
index 9b3ea235..3d0725f4 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -4,6 +4,7 @@
 
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/util.h"
 
diff --git a/src/extent.c b/src/extent.c
index 44e98789..2264a0ca 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -4,6 +4,7 @@
 
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/ph.h"
+#include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/mutex.h"
 
 /******************************************************************************/
diff --git a/src/jemalloc.c b/src/jemalloc.c
index ed22a258..00b645f0 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -8,6 +8,7 @@
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/spin.h"
 #include "jemalloc/internal/ticker.h"
diff --git a/src/large.c b/src/large.c
index 55ee3524..27c9bc60 100644
--- a/src/large.c
+++ b/src/large.c
@@ -4,6 +4,7 @@
 
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/util.h"
 
 /******************************************************************************/
diff --git a/src/tsd.c b/src/tsd.c
index 525432b6..29a56775 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -4,6 +4,7 @@
 
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/rtree.h"
 
 /******************************************************************************/
 /* Data. */
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index d1698325..678ae57c 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -2,6 +2,8 @@
 #include "test/jemalloc_test.h"
 #endif
 
+#include "jemalloc/internal/rtree.h"
+
 #include "test/extent_hooks.h"
 
 static unsigned
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index b854afd7..752dde99 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/rtree.h"
+
 rtree_node_alloc_t *rtree_node_alloc_orig;
 rtree_node_dalloc_t *rtree_node_dalloc_orig;
 rtree_leaf_alloc_t *rtree_leaf_alloc_orig;
diff --git a/test/unit/spin.c b/test/unit/spin.c
index bd368b3d..b965f742 100644
--- a/test/unit/spin.c
+++ b/test/unit/spin.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/spin.h"
+
 TEST_BEGIN(test_spin) {
 	spin_t spinner = SPIN_INITIALIZER;
 

From 93284bb53d9c44a5c36297450a82aed5b8051526 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 23 May 2017 14:36:09 -0700
Subject: [PATCH 0904/2608] Header refactoring: unify and de-catchall
 extent_dss.

---
 include/jemalloc/internal/arena_externs.h     |  1 +
 include/jemalloc/internal/arena_structs_b.h   |  1 +
 include/jemalloc/internal/extent_dss.h        | 26 +++++++++++++++++++
 .../jemalloc/internal/extent_dss_externs.h    | 14 ----------
 .../jemalloc/internal/extent_dss_structs.h    |  6 -----
 include/jemalloc/internal/extent_dss_types.h  | 14 ----------
 .../internal/jemalloc_internal_includes.h     |  3 ---
 src/arena.c                                   |  1 +
 src/ctl.c                                     |  1 +
 src/extent.c                                  |  1 +
 src/extent_dss.c                              |  1 +
 src/jemalloc.c                                |  1 +
 12 files changed, 33 insertions(+), 37 deletions(-)
 create mode 100644 include/jemalloc/internal/extent_dss.h
 delete mode 100644 include/jemalloc/internal/extent_dss_externs.h
 delete mode 100644 include/jemalloc/internal/extent_dss_structs.h
 delete mode 100644 include/jemalloc/internal/extent_dss_types.h

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 273705f7..08a6d174 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_EXTERNS_H
 #define JEMALLOC_INTERNAL_ARENA_EXTERNS_H
 
+#include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/pages.h"
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/stats.h"
diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index 160ac4fa..d1fffec1 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -3,6 +3,7 @@
 
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/bitmap.h"
+#include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/nstime.h"
diff --git a/include/jemalloc/internal/extent_dss.h b/include/jemalloc/internal/extent_dss.h
new file mode 100644
index 00000000..e8f02ce2
--- /dev/null
+++ b/include/jemalloc/internal/extent_dss.h
@@ -0,0 +1,26 @@
+#ifndef JEMALLOC_INTERNAL_EXTENT_DSS_H
+#define JEMALLOC_INTERNAL_EXTENT_DSS_H
+
+typedef enum {
+	dss_prec_disabled  = 0,
+	dss_prec_primary   = 1,
+	dss_prec_secondary = 2,
+
+	dss_prec_limit     = 3
+} dss_prec_t;
+#define DSS_PREC_DEFAULT dss_prec_secondary
+#define DSS_DEFAULT "secondary"
+
+extern const char *dss_prec_names[];
+
+extern const char *opt_dss;
+
+dss_prec_t extent_dss_prec_get(void);
+bool extent_dss_prec_set(dss_prec_t dss_prec);
+void *extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr,
+    size_t size, size_t alignment, bool *zero, bool *commit);
+bool extent_in_dss(void *addr);
+bool extent_dss_mergeable(void *addr_a, void *addr_b);
+void extent_dss_boot(void);
+
+#endif /* JEMALLOC_INTERNAL_EXTENT_DSS_H */
diff --git a/include/jemalloc/internal/extent_dss_externs.h b/include/jemalloc/internal/extent_dss_externs.h
deleted file mode 100644
index d376fa74..00000000
--- a/include/jemalloc/internal/extent_dss_externs.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_EXTENT_DSS_EXTERNS_H
-#define JEMALLOC_INTERNAL_EXTENT_DSS_EXTERNS_H
-
-extern const char	*opt_dss;
-
-dss_prec_t	extent_dss_prec_get(void);
-bool	extent_dss_prec_set(dss_prec_t dss_prec);
-void	*extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr,
-    size_t size, size_t alignment, bool *zero, bool *commit);
-bool	extent_in_dss(void *addr);
-bool	extent_dss_mergeable(void *addr_a, void *addr_b);
-void	extent_dss_boot(void);
-
-#endif /* JEMALLOC_INTERNAL_EXTENT_DSS_EXTERNS_H */
diff --git a/include/jemalloc/internal/extent_dss_structs.h b/include/jemalloc/internal/extent_dss_structs.h
deleted file mode 100644
index 2d8c6f05..00000000
--- a/include/jemalloc/internal/extent_dss_structs.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_EXTENT_DSS_STRUCTS_H
-#define JEMALLOC_INTERNAL_EXTENT_DSS_STRUCTS_H
-
-extern const char *dss_prec_names[];
-
-#endif /* JEMALLOC_INTERNAL_EXTENT_DSS_STRUCTS_H */
diff --git a/include/jemalloc/internal/extent_dss_types.h b/include/jemalloc/internal/extent_dss_types.h
deleted file mode 100644
index a851c7cb..00000000
--- a/include/jemalloc/internal/extent_dss_types.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_EXTENT_DSS_TYPES_H
-#define JEMALLOC_INTERNAL_EXTENT_DSS_TYPES_H
-
-typedef enum {
-	dss_prec_disabled  = 0,
-	dss_prec_primary   = 1,
-	dss_prec_secondary = 2,
-
-	dss_prec_limit     = 3
-} dss_prec_t;
-#define DSS_PREC_DEFAULT	dss_prec_secondary
-#define DSS_DEFAULT		"secondary"
-
-#endif /* JEMALLOC_INTERNAL_EXTENT_DSS_TYPES_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index 770bcaab..71c856b2 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -41,7 +41,6 @@
 /******************************************************************************/
 
 #include "jemalloc/internal/extent_types.h"
-#include "jemalloc/internal/extent_dss_types.h"
 #include "jemalloc/internal/base_types.h"
 #include "jemalloc/internal/arena_types.h"
 #include "jemalloc/internal/tcache_types.h"
@@ -54,7 +53,6 @@
 #include "jemalloc/internal/mutex_pool_structs.h"
 #include "jemalloc/internal/arena_structs_a.h"
 #include "jemalloc/internal/extent_structs.h"
-#include "jemalloc/internal/extent_dss_structs.h"
 #include "jemalloc/internal/base_structs.h"
 #include "jemalloc/internal/prof_structs.h"
 #include "jemalloc/internal/arena_structs_b.h"
@@ -67,7 +65,6 @@
 
 #include "jemalloc/internal/jemalloc_internal_externs.h"
 #include "jemalloc/internal/extent_externs.h"
-#include "jemalloc/internal/extent_dss_externs.h"
 #include "jemalloc/internal/extent_mmap_externs.h"
 #include "jemalloc/internal/base_externs.h"
 #include "jemalloc/internal/arena_externs.h"
diff --git a/src/arena.c b/src/arena.c
index 3d0725f4..01358959 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -3,6 +3,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/size_classes.h"
diff --git a/src/ctl.c b/src/ctl.c
index e3337e8a..e81ca417 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -4,6 +4,7 @@
 
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/size_classes.h"
diff --git a/src/extent.c b/src/extent.c
index 2264a0ca..0929aee4 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -3,6 +3,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/ph.h"
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/mutex.h"
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 8e0ca654..e72da958 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -3,6 +3,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/spin.h"
 
 /******************************************************************************/
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 00b645f0..32f4a4c5 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -5,6 +5,7 @@
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/mutex.h"

From 98774e64a4696c7bce6d2317aa59fe5b39bba69f Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 23 May 2017 14:42:32 -0700
Subject: [PATCH 0905/2608] Header refactoring: unify and de-catchall
 extent_mmap module.

---
 .../internal/{extent_mmap_externs.h => extent_mmap.h}       | 6 +++---
 include/jemalloc/internal/jemalloc_internal_includes.h      | 1 -
 src/arena.c                                                 | 1 +
 src/base.c                                                  | 1 +
 src/ctl.c                                                   | 1 +
 src/extent.c                                                | 1 +
 src/extent_mmap.c                                           | 1 +
 src/jemalloc.c                                              | 1 +
 src/large.c                                                 | 1 +
 test/unit/arena_reset.c                                     | 1 +
 10 files changed, 11 insertions(+), 4 deletions(-)
 rename include/jemalloc/internal/{extent_mmap_externs.h => extent_mmap.h} (57%)

diff --git a/include/jemalloc/internal/extent_mmap_externs.h b/include/jemalloc/internal/extent_mmap.h
similarity index 57%
rename from include/jemalloc/internal/extent_mmap_externs.h
rename to include/jemalloc/internal/extent_mmap.h
index fe9a79ac..55f17ee4 100644
--- a/include/jemalloc/internal/extent_mmap_externs.h
+++ b/include/jemalloc/internal/extent_mmap.h
@@ -1,10 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_EXTENT_MMAP_EXTERNS_H
 #define JEMALLOC_INTERNAL_EXTENT_MMAP_EXTERNS_H
 
-extern bool	opt_retain;
+extern bool opt_retain;
 
-void	*extent_alloc_mmap(void *new_addr, size_t size, size_t alignment,
+void *extent_alloc_mmap(void *new_addr, size_t size, size_t alignment,
     bool *zero, bool *commit);
-bool	extent_dalloc_mmap(void *addr, size_t size);
+bool extent_dalloc_mmap(void *addr, size_t size);
 
 #endif /* JEMALLOC_INTERNAL_EXTENT_MMAP_EXTERNS_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index 71c856b2..837e9e41 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -65,7 +65,6 @@
 
 #include "jemalloc/internal/jemalloc_internal_externs.h"
 #include "jemalloc/internal/extent_externs.h"
-#include "jemalloc/internal/extent_mmap_externs.h"
 #include "jemalloc/internal/base_externs.h"
 #include "jemalloc/internal/arena_externs.h"
 #include "jemalloc/internal/large_externs.h"
diff --git a/src/arena.c b/src/arena.c
index 01358959..bf1d6e65 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -4,6 +4,7 @@
 
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/extent_dss.h"
+#include "jemalloc/internal/extent_mmap.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/size_classes.h"
diff --git a/src/base.c b/src/base.c
index 892c28dd..498e5c9d 100644
--- a/src/base.c
+++ b/src/base.c
@@ -3,6 +3,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/extent_mmap.h"
 #include "jemalloc/internal/mutex.h"
 
 /******************************************************************************/
diff --git a/src/ctl.c b/src/ctl.c
index e81ca417..33275d70 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -5,6 +5,7 @@
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/extent_dss.h"
+#include "jemalloc/internal/extent_mmap.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/size_classes.h"
diff --git a/src/extent.c b/src/extent.c
index 0929aee4..6589de55 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -4,6 +4,7 @@
 
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/extent_dss.h"
+#include "jemalloc/internal/extent_mmap.h"
 #include "jemalloc/internal/ph.h"
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/mutex.h"
diff --git a/src/extent_mmap.c b/src/extent_mmap.c
index 3e4e1ef7..8d607dc8 100644
--- a/src/extent_mmap.c
+++ b/src/extent_mmap.c
@@ -3,6 +3,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/extent_mmap.h"
 
 /******************************************************************************/
 /* Data. */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 32f4a4c5..1befb64d 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -6,6 +6,7 @@
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/extent_dss.h"
+#include "jemalloc/internal/extent_mmap.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/mutex.h"
diff --git a/src/large.c b/src/large.c
index 27c9bc60..79d2c9da 100644
--- a/src/large.c
+++ b/src/large.c
@@ -3,6 +3,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/extent_mmap.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/util.h"
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index 678ae57c..958453d1 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -2,6 +2,7 @@
 #include "test/jemalloc_test.h"
 #endif
 
+#include "jemalloc/internal/extent_mmap.h"
 #include "jemalloc/internal/rtree.h"
 
 #include "test/extent_hooks.h"

From 041e041e1f23a03d1019330c8401a01285feb44f Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 23 May 2017 14:56:24 -0700
Subject: [PATCH 0906/2608] Header refactoring: unify and de-catchall
 mutex_pool.

---
 include/jemalloc/internal/extent_externs.h     |  1 +
 include/jemalloc/internal/extent_inlines.h     |  2 +-
 .../internal/jemalloc_internal_includes.h      |  2 --
 .../{mutex_pool_inlines.h => mutex_pool.h}     | 18 +++++++++++-------
 include/jemalloc/internal/mutex_pool_structs.h | 16 ----------------
 src/extent.c                                   |  1 +
 src/mutex_pool.c                               |  1 +
 7 files changed, 15 insertions(+), 26 deletions(-)
 rename include/jemalloc/internal/{mutex_pool_inlines.h => mutex_pool.h} (86%)
 delete mode 100644 include/jemalloc/internal/mutex_pool_structs.h

diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index 96a71126..acb3ef49 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_EXTENT_EXTERNS_H
 
 #include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/mutex_pool.h"
 #include "jemalloc/internal/ph.h"
 #include "jemalloc/internal/rb.h"
 #include "jemalloc/internal/rtree.h"
diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index a99a6351..94c41923 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -2,7 +2,7 @@
 #define JEMALLOC_INTERNAL_EXTENT_INLINES_H
 
 #include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/mutex_pool_inlines.h"
+#include "jemalloc/internal/mutex_pool.h"
 #include "jemalloc/internal/pages.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/ql.h"
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index 837e9e41..437eaa40 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -50,7 +50,6 @@
 /* STRUCTS */
 /******************************************************************************/
 
-#include "jemalloc/internal/mutex_pool_structs.h"
 #include "jemalloc/internal/arena_structs_a.h"
 #include "jemalloc/internal/extent_structs.h"
 #include "jemalloc/internal/base_structs.h"
@@ -76,7 +75,6 @@
 /* INLINES */
 /******************************************************************************/
 
-#include "jemalloc/internal/mutex_pool_inlines.h"
 #include "jemalloc/internal/jemalloc_internal_inlines_a.h"
 #include "jemalloc/internal/base_inlines.h"
 /*
diff --git a/include/jemalloc/internal/mutex_pool_inlines.h b/include/jemalloc/internal/mutex_pool.h
similarity index 86%
rename from include/jemalloc/internal/mutex_pool_inlines.h
rename to include/jemalloc/internal/mutex_pool.h
index 19b5ab4c..726cece9 100644
--- a/include/jemalloc/internal/mutex_pool_inlines.h
+++ b/include/jemalloc/internal/mutex_pool.h
@@ -1,17 +1,21 @@
-#ifndef JEMALLOC_INTERNAL_MUTEX_POOL_INLINES_H
-#define JEMALLOC_INTERNAL_MUTEX_POOL_INLINES_H
+#ifndef JEMALLOC_INTERNAL_MUTEX_POOL_H
+#define JEMALLOC_INTERNAL_MUTEX_POOL_H
 
 #include "jemalloc/internal/hash.h"
 #include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/mutex_pool_structs.h"
 #include "jemalloc/internal/witness.h"
 
-/*
- * This file really combines "inlines" and "externs", but only transitionally.
- */
+/* We do mod reductions by this value, so it should be kept a power of 2. */
+#define MUTEX_POOL_SIZE 256
+
+typedef struct mutex_pool_s mutex_pool_t;
+struct mutex_pool_s {
+	malloc_mutex_t mutexes[MUTEX_POOL_SIZE];
+};
 
 bool mutex_pool_init(mutex_pool_t *pool, const char *name, witness_rank_t rank);
 
+/* Internal helper - not meant to be called outside this module. */
 static inline malloc_mutex_t *
 mutex_pool_mutex(mutex_pool_t *pool, uintptr_t key) {
 	size_t hash_result[2];
@@ -87,4 +91,4 @@ mutex_pool_assert_owner(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key) {
 	malloc_mutex_assert_owner(tsdn, mutex_pool_mutex(pool, key));
 }
 
-#endif /* JEMALLOC_INTERNAL_MUTEX_POOL_INLINES_H */
+#endif /* JEMALLOC_INTERNAL_MUTEX_POOL_H */
diff --git a/include/jemalloc/internal/mutex_pool_structs.h b/include/jemalloc/internal/mutex_pool_structs.h
deleted file mode 100644
index b32fb5ac..00000000
--- a/include/jemalloc/internal/mutex_pool_structs.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_MUTEX_POOL_STRUCTS_H
-#define JEMALLOC_INTERNAL_MUTEX_POOL_STRUCTS_H
-
-#include "jemalloc/internal/mutex.h"
-
-/* This file really combines "structs" and "types", but only transitionally. */
-
-/* We do mod reductions by this value, so it should be kept a power of 2. */
-#define MUTEX_POOL_SIZE 256
-
-typedef struct mutex_pool_s mutex_pool_t;
-struct mutex_pool_s {
-	malloc_mutex_t mutexes[MUTEX_POOL_SIZE];
-};
-
-#endif /* JEMALLOC_INTERNAL_MUTEX_POOL_STRUCTS_H */
diff --git a/src/extent.c b/src/extent.c
index 6589de55..c3d9baae 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -8,6 +8,7 @@
 #include "jemalloc/internal/ph.h"
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/mutex_pool.h"
 
 /******************************************************************************/
 /* Data. */
diff --git a/src/mutex_pool.c b/src/mutex_pool.c
index 95a45736..f24d10e4 100644
--- a/src/mutex_pool.c
+++ b/src/mutex_pool.c
@@ -4,6 +4,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/mutex_pool.h"
 
 bool
 mutex_pool_init(mutex_pool_t *pool, const char *name, witness_rank_t rank) {

From 8261e581be517f4fe193ead2c9b662717d9ca5e0 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 30 May 2017 10:45:37 -0700
Subject: [PATCH 0907/2608] Header refactoring: Pull size helpers out of
 jemalloc module.

---
 Makefile.in                                   |   1 +
 include/jemalloc/internal/arena_externs.h     |   8 -
 include/jemalloc/internal/arena_inlines_b.h   |  13 +-
 include/jemalloc/internal/arena_types.h       |   2 -
 include/jemalloc/internal/extent_inlines.h    |   3 +-
 include/jemalloc/internal/extent_structs.h    |   2 +-
 .../internal/jemalloc_internal_externs.h      |  17 -
 .../internal/jemalloc_internal_inlines_a.h    | 267 ---------------
 .../internal/jemalloc_internal_inlines_c.h    |   7 +-
 include/jemalloc/internal/prof_inlines_b.h    |   4 +-
 include/jemalloc/internal/size_classes.sh     |   1 +
 include/jemalloc/internal/sz.h                | 317 ++++++++++++++++++
 include/jemalloc/internal/tcache_inlines.h    |   9 +-
 src/arena.c                                   |  55 +--
 src/base.c                                    |  15 +-
 src/ckh.c                                     |   6 +-
 src/ctl.c                                     |   5 +-
 src/extent.c                                  |  23 +-
 src/jemalloc.c                                | 149 ++------
 src/large.c                                   |  12 +-
 src/prof.c                                    |   8 +-
 src/sz.c                                      | 106 ++++++
 src/tcache.c                                  |   6 +-
 src/zone.c                                    |   2 +-
 test/unit/arena_reset.c                       |   2 +-
 test/unit/retained.c                          |   8 +-
 test/unit/rtree.c                             |   3 +-
 test/unit/size_classes.c                      | 175 +++++-----
 28 files changed, 636 insertions(+), 590 deletions(-)
 create mode 100644 include/jemalloc/internal/sz.h
 create mode 100644 src/sz.c

diff --git a/Makefile.in b/Makefile.in
index 868bf8cc..fec1397a 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -112,6 +112,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/rtree.c \
 	$(srcroot)src/stats.c \
 	$(srcroot)src/spin.c \
+	$(srcroot)src/sz.c \
 	$(srcroot)src/tcache.c \
 	$(srcroot)src/ticker.c \
 	$(srcroot)src/tsd.c \
diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 08a6d174..cfb7c6fb 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -6,14 +6,6 @@
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/stats.h"
 
-static const size_t large_pad =
-#ifdef JEMALLOC_CACHE_OBLIVIOUS
-    PAGE
-#else
-    0
-#endif
-    ;
-
 extern ssize_t opt_dirty_decay_ms;
 extern ssize_t opt_muzzy_decay_ms;
 
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 16635c1a..003abe11 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -5,6 +5,7 @@
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/sz.h"
 #include "jemalloc/internal/ticker.h"
 
 static inline szind_t
@@ -127,7 +128,7 @@ arena_salloc(tsdn_t *tsdn, const void *ptr) {
 	    (uintptr_t)ptr, true);
 	assert(szind != NSIZES);
 
-	return index2size(szind);
+	return sz_index2size(szind);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
@@ -160,7 +161,7 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
 
 	assert(szind != NSIZES);
 
-	return index2size(szind);
+	return sz_index2size(szind);
 }
 
 static inline void
@@ -257,7 +258,7 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
 		 * There is no risk of being confused by a promoted sampled
 		 * object, so base szind and slab on the given size.
 		 */
-		szind = size2index(size);
+		szind = sz_size2index(size);
 		slab = (szind < NBINS);
 	}
 
@@ -269,7 +270,7 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
 		rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx,
 		    (uintptr_t)ptr, true, &szind, &slab);
 
-		assert(szind == size2index(size));
+		assert(szind == sz_size2index(size));
 		assert((config_prof && opt_prof) || slab == (szind < NBINS));
 
 		if (config_debug) {
@@ -313,7 +314,7 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 			rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx,
 			    (uintptr_t)ptr, true, &local_ctx.szind,
 			    &local_ctx.slab);
-			assert(local_ctx.szind == size2index(size));
+			assert(local_ctx.szind == sz_size2index(size));
 			alloc_ctx = &local_ctx;
 		}
 		slab = alloc_ctx->slab;
@@ -323,7 +324,7 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 		 * There is no risk of being confused by a promoted sampled
 		 * object, so base szind and slab on the given size.
 		 */
-		szind = size2index(size);
+		szind = sz_size2index(size);
 		slab = (szind < NBINS);
 	}
 
diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h
index 1374eeca..01b9096a 100644
--- a/include/jemalloc/internal/arena_types.h
+++ b/include/jemalloc/internal/arena_types.h
@@ -1,8 +1,6 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_TYPES_H
 #define JEMALLOC_INTERNAL_ARENA_TYPES_H
 
-#define LARGE_MINCLASS		(ZU(1) << LG_LARGE_MINCLASS)
-
 /* Maximum number of regions in one slab. */
 #define LG_SLAB_MAXREGS		(LG_PAGE - LG_TINY_MIN)
 #define SLAB_MAXREGS		(1U << LG_SLAB_MAXREGS)
diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index 94c41923..bb2bd699 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -6,6 +6,7 @@
 #include "jemalloc/internal/pages.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/sz.h"
 
 static inline void
 extent_lock(tsdn_t *tsdn, extent_t *extent) {
@@ -65,7 +66,7 @@ extent_szind_get(const extent_t *extent) {
 
 static inline size_t
 extent_usize_get(const extent_t *extent) {
-	return index2size(extent_szind_get(extent));
+	return sz_index2size(extent_szind_get(extent));
 }
 
 static inline size_t
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index 457891df..d2979503 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -53,7 +53,7 @@ struct extent_s {
 	 * szind: The szind flag indicates usable size class index for
 	 *        allocations residing in this extent, regardless of whether the
 	 *        extent is a slab.  Extent size and usable size often differ
-	 *        even for non-slabs, either due to large_pad or promotion of
+	 *        even for non-slabs, either due to sz_large_pad or promotion of
 	 *        sampled small regions.
 	 *
 	 * nfree: Number of free regions in slab.
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 11e16ecc..e10fb275 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -31,23 +31,6 @@ extern unsigned narenas_auto;
  */
 extern atomic_p_t arenas[];
 
-/*
- * pind2sz_tab encodes the same information as could be computed by
- * pind2sz_compute().
- */
-extern size_t const pind2sz_tab[NPSIZES+1];
-/*
- * index2size_tab encodes the same information as could be computed (at
- * unacceptable cost in some code paths) by index2size_compute().
- */
-extern size_t const index2size_tab[NSIZES];
-/*
- * size2index_tab is a compact lookup table that rounds request sizes up to
- * size classes.  In order to reduce cache footprint, the table is compressed,
- * and all accesses are via size2index().
- */
-extern uint8_t const size2index_tab[];
-
 void *a0malloc(size_t size);
 void a0dalloc(void *ptr);
 void *bootstrap_malloc(size_t size);
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index c8e26298..d0bf2eee 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -7,273 +7,6 @@
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/ticker.h"
 
-JEMALLOC_ALWAYS_INLINE pszind_t
-psz2ind(size_t psz) {
-	if (unlikely(psz > LARGE_MAXCLASS)) {
-		return NPSIZES;
-	}
-	{
-		pszind_t x = lg_floor((psz<<1)-1);
-		pszind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_PAGE) ? 0 : x -
-		    (LG_SIZE_CLASS_GROUP + LG_PAGE);
-		pszind_t grp = shift << LG_SIZE_CLASS_GROUP;
-
-		pszind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ?
-		    LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1;
-
-		size_t delta_inverse_mask = ZD(-1) << lg_delta;
-		pszind_t mod = ((((psz-1) & delta_inverse_mask) >> lg_delta)) &
-		    ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
-
-		pszind_t ind = grp + mod;
-		return ind;
-	}
-}
-
-static inline size_t
-pind2sz_compute(pszind_t pind) {
-	if (unlikely(pind == NPSIZES)) {
-		return LARGE_MAXCLASS + PAGE;
-	}
-	{
-		size_t grp = pind >> LG_SIZE_CLASS_GROUP;
-		size_t mod = pind & ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
-
-		size_t grp_size_mask = ~((!!grp)-1);
-		size_t grp_size = ((ZU(1) << (LG_PAGE +
-		    (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask;
-
-		size_t shift = (grp == 0) ? 1 : grp;
-		size_t lg_delta = shift + (LG_PAGE-1);
-		size_t mod_size = (mod+1) << lg_delta;
-
-		size_t sz = grp_size + mod_size;
-		return sz;
-	}
-}
-
-static inline size_t
-pind2sz_lookup(pszind_t pind) {
-	size_t ret = (size_t)pind2sz_tab[pind];
-	assert(ret == pind2sz_compute(pind));
-	return ret;
-}
-
-static inline size_t
-pind2sz(pszind_t pind) {
-	assert(pind < NPSIZES+1);
-	return pind2sz_lookup(pind);
-}
-
-static inline size_t
-psz2u(size_t psz) {
-	if (unlikely(psz > LARGE_MAXCLASS)) {
-		return LARGE_MAXCLASS + PAGE;
-	}
-	{
-		size_t x = lg_floor((psz<<1)-1);
-		size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ?
-		    LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1;
-		size_t delta = ZU(1) << lg_delta;
-		size_t delta_mask = delta - 1;
-		size_t usize = (psz + delta_mask) & ~delta_mask;
-		return usize;
-	}
-}
-
-static inline szind_t
-size2index_compute(size_t size) {
-	if (unlikely(size > LARGE_MAXCLASS)) {
-		return NSIZES;
-	}
-#if (NTBINS != 0)
-	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
-		szind_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
-		szind_t lg_ceil = lg_floor(pow2_ceil_zu(size));
-		return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin);
-	}
-#endif
-	{
-		szind_t x = lg_floor((size<<1)-1);
-		szind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 :
-		    x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM);
-		szind_t grp = shift << LG_SIZE_CLASS_GROUP;
-
-		szind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
-		    ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
-
-		size_t delta_inverse_mask = ZD(-1) << lg_delta;
-		szind_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) &
-		    ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
-
-		szind_t index = NTBINS + grp + mod;
-		return index;
-	}
-}
-
-JEMALLOC_ALWAYS_INLINE szind_t
-size2index_lookup(size_t size) {
-	assert(size <= LOOKUP_MAXCLASS);
-	{
-		szind_t ret = (size2index_tab[(size-1) >> LG_TINY_MIN]);
-		assert(ret == size2index_compute(size));
-		return ret;
-	}
-}
-
-JEMALLOC_ALWAYS_INLINE szind_t
-size2index(size_t size) {
-	assert(size > 0);
-	if (likely(size <= LOOKUP_MAXCLASS)) {
-		return size2index_lookup(size);
-	}
-	return size2index_compute(size);
-}
-
-static inline size_t
-index2size_compute(szind_t index) {
-#if (NTBINS > 0)
-	if (index < NTBINS) {
-		return (ZU(1) << (LG_TINY_MAXCLASS - NTBINS + 1 + index));
-	}
-#endif
-	{
-		size_t reduced_index = index - NTBINS;
-		size_t grp = reduced_index >> LG_SIZE_CLASS_GROUP;
-		size_t mod = reduced_index & ((ZU(1) << LG_SIZE_CLASS_GROUP) -
-		    1);
-
-		size_t grp_size_mask = ~((!!grp)-1);
-		size_t grp_size = ((ZU(1) << (LG_QUANTUM +
-		    (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask;
-
-		size_t shift = (grp == 0) ? 1 : grp;
-		size_t lg_delta = shift + (LG_QUANTUM-1);
-		size_t mod_size = (mod+1) << lg_delta;
-
-		size_t usize = grp_size + mod_size;
-		return usize;
-	}
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-index2size_lookup(szind_t index) {
-	size_t ret = (size_t)index2size_tab[index];
-	assert(ret == index2size_compute(index));
-	return ret;
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-index2size(szind_t index) {
-	assert(index < NSIZES);
-	return index2size_lookup(index);
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-s2u_compute(size_t size) {
-	if (unlikely(size > LARGE_MAXCLASS)) {
-		return 0;
-	}
-#if (NTBINS > 0)
-	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
-		size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
-		size_t lg_ceil = lg_floor(pow2_ceil_zu(size));
-		return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) :
-		    (ZU(1) << lg_ceil));
-	}
-#endif
-	{
-		size_t x = lg_floor((size<<1)-1);
-		size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
-		    ?  LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
-		size_t delta = ZU(1) << lg_delta;
-		size_t delta_mask = delta - 1;
-		size_t usize = (size + delta_mask) & ~delta_mask;
-		return usize;
-	}
-}
-
-JEMALLOC_ALWAYS_INLINE size_t
-s2u_lookup(size_t size) {
-	size_t ret = index2size_lookup(size2index_lookup(size));
-
-	assert(ret == s2u_compute(size));
-	return ret;
-}
-
-/*
- * Compute usable size that would result from allocating an object with the
- * specified size.
- */
-JEMALLOC_ALWAYS_INLINE size_t
-s2u(size_t size) {
-	assert(size > 0);
-	if (likely(size <= LOOKUP_MAXCLASS)) {
-		return s2u_lookup(size);
-	}
-	return s2u_compute(size);
-}
-
-/*
- * Compute usable size that would result from allocating an object with the
- * specified size and alignment.
- */
-JEMALLOC_ALWAYS_INLINE size_t
-sa2u(size_t size, size_t alignment) {
-	size_t usize;
-
-	assert(alignment != 0 && ((alignment - 1) & alignment) == 0);
-
-	/* Try for a small size class. */
-	if (size <= SMALL_MAXCLASS && alignment < PAGE) {
-		/*
-		 * Round size up to the nearest multiple of alignment.
-		 *
-		 * This done, we can take advantage of the fact that for each
-		 * small size class, every object is aligned at the smallest
-		 * power of two that is non-zero in the base two representation
-		 * of the size.  For example:
-		 *
-		 *   Size |   Base 2 | Minimum alignment
-		 *   -----+----------+------------------
-		 *     96 |  1100000 |  32
-		 *    144 | 10100000 |  32
-		 *    192 | 11000000 |  64
-		 */
-		usize = s2u(ALIGNMENT_CEILING(size, alignment));
-		if (usize < LARGE_MINCLASS) {
-			return usize;
-		}
-	}
-
-	/* Large size class.  Beware of overflow. */
-
-	if (unlikely(alignment > LARGE_MAXCLASS)) {
-		return 0;
-	}
-
-	/* Make sure result is a large size class. */
-	if (size <= LARGE_MINCLASS) {
-		usize = LARGE_MINCLASS;
-	} else {
-		usize = s2u(size);
-		if (usize < size) {
-			/* size_t overflow. */
-			return 0;
-		}
-	}
-
-	/*
-	 * Calculate the multi-page mapping that large_palloc() would need in
-	 * order to guarantee the alignment.
-	 */
-	if (usize + large_pad + PAGE_CEILING(alignment) - PAGE < usize) {
-		/* size_t overflow. */
-		return 0;
-	}
-	return usize;
-}
-
 JEMALLOC_ALWAYS_INLINE malloc_cpuid_t
 malloc_getcpu(void) {
 	assert(have_percpu_arena);
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 80dfbeff..7ffce6fb 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_INLINES_C_H
 
 #include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/sz.h"
 #include "jemalloc/internal/witness.h"
 
 JEMALLOC_ALWAYS_INLINE arena_t *
@@ -48,7 +49,7 @@ ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
 	void *ret;
 
 	assert(usize != 0);
-	assert(usize == sa2u(usize, alignment));
+	assert(usize == sz_sa2u(usize, alignment));
 	assert(!is_internal || tcache == NULL);
 	assert(!is_internal || arena == NULL || arena_is_auto(arena));
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -118,7 +119,7 @@ iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 	void *p;
 	size_t usize, copysize;
 
-	usize = sa2u(size + extra, alignment);
+	usize = sz_sa2u(size + extra, alignment);
 	if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
 		return NULL;
 	}
@@ -128,7 +129,7 @@ iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 			return NULL;
 		}
 		/* Try again, without extra this time. */
-		usize = sa2u(size, alignment);
+		usize = sz_sa2u(size, alignment);
 		if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
 			return NULL;
 		}
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index fba7b998..d670cb7b 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_PROF_INLINES_B_H
 #define JEMALLOC_INTERNAL_PROF_INLINES_B_H
 
+#include "jemalloc/internal/sz.h"
+
 JEMALLOC_ALWAYS_INLINE bool
 prof_active_get_unlocked(void) {
 	/*
@@ -113,7 +115,7 @@ prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update) {
 	prof_tdata_t *tdata;
 	prof_bt_t bt;
 
-	assert(usize == s2u(usize));
+	assert(usize == sz_s2u(usize));
 
 	if (!prof_active || likely(prof_sample_accum_update(tsd, usize, update,
 	    &tdata))) {
diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh
index dd562db1..998994d0 100755
--- a/include/jemalloc/internal/size_classes.sh
+++ b/include/jemalloc/internal/size_classes.sh
@@ -334,6 +334,7 @@ for lg_z in ${lg_zarr} ; do
         echo "#define LOOKUP_MAXCLASS		${lookup_maxclass}"
         echo "#define SMALL_MAXCLASS		${small_maxclass}"
         echo "#define LG_LARGE_MINCLASS	${lg_large_minclass}"
+        echo "#define LARGE_MINCLASS		(ZU(1) << LG_LARGE_MINCLASS)"
         echo "#define LARGE_MAXCLASS		${large_maxclass}"
         echo "#endif"
         echo
diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
new file mode 100644
index 00000000..7f640d55
--- /dev/null
+++ b/include/jemalloc/internal/sz.h
@@ -0,0 +1,317 @@
+#ifndef JEMALLOC_INTERNAL_SIZE_H
+#define JEMALLOC_INTERNAL_SIZE_H
+
+#include "jemalloc/internal/bit_util.h"
+#include "jemalloc/internal/pages.h"
+#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/util.h"
+
+/*
+ * sz module: Size computations.
+ *
+ * Some abbreviations used here:
+ *   p: Page
+ *   ind: Index
+ *   s, sz: Size
+ *   u: Usable size
+ *   a: Aligned
+ *
+ * These are not always used completely consistently, but should be enough to
+ * interpret function names.  E.g. sz_psz2ind converts page size to page size
+ * index; sz_sa2u converts a (size, alignment) allocation request to the usable
+ * size that would result from such an allocation.
+ */
+
+/*
+ * sz_pind2sz_tab encodes the same information as could be computed by
+ * sz_pind2sz_compute().
+ */
+extern size_t const sz_pind2sz_tab[NPSIZES+1];
+/*
+ * sz_index2size_tab encodes the same information as could be computed (at
+ * unacceptable cost in some code paths) by sz_index2size_compute().
+ */
+extern size_t const sz_index2size_tab[NSIZES];
+/*
+ * sz_size2index_tab is a compact lookup table that rounds request sizes up to
+ * size classes.  In order to reduce cache footprint, the table is compressed,
+ * and all accesses are via sz_size2index().
+ */
+extern uint8_t const sz_size2index_tab[];
+
+static const size_t sz_large_pad =
+#ifdef JEMALLOC_CACHE_OBLIVIOUS
+    PAGE
+#else
+    0
+#endif
+    ;
+
+JEMALLOC_ALWAYS_INLINE pszind_t
+sz_psz2ind(size_t psz) {
+	if (unlikely(psz > LARGE_MAXCLASS)) {
+		return NPSIZES;
+	}
+	{
+		pszind_t x = lg_floor((psz<<1)-1);
+		pszind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_PAGE) ? 0 : x -
+		    (LG_SIZE_CLASS_GROUP + LG_PAGE);
+		pszind_t grp = shift << LG_SIZE_CLASS_GROUP;
+
+		pszind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ?
+		    LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1;
+
+		size_t delta_inverse_mask = ZD(-1) << lg_delta;
+		pszind_t mod = ((((psz-1) & delta_inverse_mask) >> lg_delta)) &
+		    ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
+
+		pszind_t ind = grp + mod;
+		return ind;
+	}
+}
+
+static inline size_t
+sz_pind2sz_compute(pszind_t pind) {
+	if (unlikely(pind == NPSIZES)) {
+		return LARGE_MAXCLASS + PAGE;
+	}
+	{
+		size_t grp = pind >> LG_SIZE_CLASS_GROUP;
+		size_t mod = pind & ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
+
+		size_t grp_size_mask = ~((!!grp)-1);
+		size_t grp_size = ((ZU(1) << (LG_PAGE +
+		    (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask;
+
+		size_t shift = (grp == 0) ? 1 : grp;
+		size_t lg_delta = shift + (LG_PAGE-1);
+		size_t mod_size = (mod+1) << lg_delta;
+
+		size_t sz = grp_size + mod_size;
+		return sz;
+	}
+}
+
+static inline size_t
+sz_pind2sz_lookup(pszind_t pind) {
+	size_t ret = (size_t)sz_pind2sz_tab[pind];
+	assert(ret == sz_pind2sz_compute(pind));
+	return ret;
+}
+
+static inline size_t
+sz_pind2sz(pszind_t pind) {
+	assert(pind < NPSIZES+1);
+	return sz_pind2sz_lookup(pind);
+}
+
+static inline size_t
+sz_psz2u(size_t psz) {
+	if (unlikely(psz > LARGE_MAXCLASS)) {
+		return LARGE_MAXCLASS + PAGE;
+	}
+	{
+		size_t x = lg_floor((psz<<1)-1);
+		size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ?
+		    LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1;
+		size_t delta = ZU(1) << lg_delta;
+		size_t delta_mask = delta - 1;
+		size_t usize = (psz + delta_mask) & ~delta_mask;
+		return usize;
+	}
+}
+
+static inline szind_t
+sz_size2index_compute(size_t size) {
+	if (unlikely(size > LARGE_MAXCLASS)) {
+		return NSIZES;
+	}
+#if (NTBINS != 0)
+	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
+		szind_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
+		szind_t lg_ceil = lg_floor(pow2_ceil_zu(size));
+		return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin);
+	}
+#endif
+	{
+		szind_t x = lg_floor((size<<1)-1);
+		szind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 :
+		    x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM);
+		szind_t grp = shift << LG_SIZE_CLASS_GROUP;
+
+		szind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
+		    ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
+
+		size_t delta_inverse_mask = ZD(-1) << lg_delta;
+		szind_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) &
+		    ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
+
+		szind_t index = NTBINS + grp + mod;
+		return index;
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE szind_t
+sz_size2index_lookup(size_t size) {
+	assert(size <= LOOKUP_MAXCLASS);
+	{
+		szind_t ret = (sz_size2index_tab[(size-1) >> LG_TINY_MIN]);
+		assert(ret == sz_size2index_compute(size));
+		return ret;
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE szind_t
+sz_size2index(size_t size) {
+	assert(size > 0);
+	if (likely(size <= LOOKUP_MAXCLASS)) {
+		return sz_size2index_lookup(size);
+	}
+	return sz_size2index_compute(size);
+}
+
+static inline size_t
+sz_index2size_compute(szind_t index) {
+#if (NTBINS > 0)
+	if (index < NTBINS) {
+		return (ZU(1) << (LG_TINY_MAXCLASS - NTBINS + 1 + index));
+	}
+#endif
+	{
+		size_t reduced_index = index - NTBINS;
+		size_t grp = reduced_index >> LG_SIZE_CLASS_GROUP;
+		size_t mod = reduced_index & ((ZU(1) << LG_SIZE_CLASS_GROUP) -
+		    1);
+
+		size_t grp_size_mask = ~((!!grp)-1);
+		size_t grp_size = ((ZU(1) << (LG_QUANTUM +
+		    (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask;
+
+		size_t shift = (grp == 0) ? 1 : grp;
+		size_t lg_delta = shift + (LG_QUANTUM-1);
+		size_t mod_size = (mod+1) << lg_delta;
+
+		size_t usize = grp_size + mod_size;
+		return usize;
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+sz_index2size_lookup(szind_t index) {
+	size_t ret = (size_t)sz_index2size_tab[index];
+	assert(ret == sz_index2size_compute(index));
+	return ret;
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+sz_index2size(szind_t index) {
+	assert(index < NSIZES);
+	return sz_index2size_lookup(index);
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+sz_s2u_compute(size_t size) {
+	if (unlikely(size > LARGE_MAXCLASS)) {
+		return 0;
+	}
+#if (NTBINS > 0)
+	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
+		size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
+		size_t lg_ceil = lg_floor(pow2_ceil_zu(size));
+		return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) :
+		    (ZU(1) << lg_ceil));
+	}
+#endif
+	{
+		size_t x = lg_floor((size<<1)-1);
+		size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
+		    ?  LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
+		size_t delta = ZU(1) << lg_delta;
+		size_t delta_mask = delta - 1;
+		size_t usize = (size + delta_mask) & ~delta_mask;
+		return usize;
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+sz_s2u_lookup(size_t size) {
+	size_t ret = sz_index2size_lookup(sz_size2index_lookup(size));
+
+	assert(ret == sz_s2u_compute(size));
+	return ret;
+}
+
+/*
+ * Compute usable size that would result from allocating an object with the
+ * specified size.
+ */
+JEMALLOC_ALWAYS_INLINE size_t
+sz_s2u(size_t size) {
+	assert(size > 0);
+	if (likely(size <= LOOKUP_MAXCLASS)) {
+		return sz_s2u_lookup(size);
+	}
+	return sz_s2u_compute(size);
+}
+
+/*
+ * Compute usable size that would result from allocating an object with the
+ * specified size and alignment.
+ */
+JEMALLOC_ALWAYS_INLINE size_t
+sz_sa2u(size_t size, size_t alignment) {
+	size_t usize;
+
+	assert(alignment != 0 && ((alignment - 1) & alignment) == 0);
+
+	/* Try for a small size class. */
+	if (size <= SMALL_MAXCLASS && alignment < PAGE) {
+		/*
+		 * Round size up to the nearest multiple of alignment.
+		 *
+		 * This done, we can take advantage of the fact that for each
+		 * small size class, every object is aligned at the smallest
+		 * power of two that is non-zero in the base two representation
+		 * of the size.  For example:
+		 *
+		 *   Size |   Base 2 | Minimum alignment
+		 *   -----+----------+------------------
+		 *     96 |  1100000 |  32
+		 *    144 | 10100000 |  32
+		 *    192 | 11000000 |  64
+		 */
+		usize = sz_s2u(ALIGNMENT_CEILING(size, alignment));
+		if (usize < LARGE_MINCLASS) {
+			return usize;
+		}
+	}
+
+	/* Large size class.  Beware of overflow. */
+
+	if (unlikely(alignment > LARGE_MAXCLASS)) {
+		return 0;
+	}
+
+	/* Make sure result is a large size class. */
+	if (size <= LARGE_MINCLASS) {
+		usize = LARGE_MINCLASS;
+	} else {
+		usize = sz_s2u(size);
+		if (usize < size) {
+			/* size_t overflow. */
+			return 0;
+		}
+	}
+
+	/*
+	 * Calculate the multi-page mapping that large_palloc() would need in
+	 * order to guarantee the alignment.
+	 */
+	if (usize + sz_large_pad + PAGE_CEILING(alignment) - PAGE < usize) {
+		/* size_t overflow. */
+		return 0;
+	}
+	return usize;
+}
+
+#endif /* JEMALLOC_INTERNAL_SIZE_H */
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 8a65ba2b..c55bcd27 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -3,6 +3,7 @@
 
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/sz.h"
 #include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/util.h"
 
@@ -95,7 +96,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 	 * statement are all static.
 	 */
 	if (config_prof || (slow_path && config_fill) || unlikely(zero)) {
-		usize = index2size(binind);
+		usize = sz_index2size(binind);
 		assert(tcache_salloc(tsd_tsdn(tsd), ret) == usize);
 	}
 
@@ -147,7 +148,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 			return NULL;
 		}
 
-		ret = large_malloc(tsd_tsdn(tsd), arena, s2u(size), zero);
+		ret = large_malloc(tsd_tsdn(tsd), arena, sz_s2u(size), zero);
 		if (ret == NULL) {
 			return NULL;
 		}
@@ -157,7 +158,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 		/* Only compute usize on demand */
 		if (config_prof || (slow_path && config_fill) ||
 		    unlikely(zero)) {
-			usize = index2size(binind);
+			usize = sz_index2size(binind);
 			assert(usize <= tcache_maxclass);
 		}
 
@@ -221,7 +222,7 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_maxclass);
 
 	if (slow_path && config_fill && unlikely(opt_junk_free)) {
-		large_dalloc_junk(ptr, index2size(binind));
+		large_dalloc_junk(ptr, sz_index2size(binind));
 	}
 
 	tbin = tcache_large_bin_get(tcache, binind);
diff --git a/src/arena.c b/src/arena.c
index bf1d6e65..151aad3e 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -289,7 +289,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		size_t curlextents = (size_t)(nmalloc - ndalloc);
 		lstats[i].curlextents += curlextents;
 		arena_stats_accum_zu(&astats->allocated_large,
-		    curlextents * index2size(NBINS + i));
+		    curlextents * sz_index2size(NBINS + i));
 	}
 
 	arena_stats_unlock(tsdn, &arena->stats);
@@ -303,12 +303,12 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		for (; i < NBINS; i++) {
 			tcache_bin_t *tbin = tcache_small_bin_get(tcache, i);
 			arena_stats_accum_zu(&astats->tcache_bytes,
-			    tbin->ncached * index2size(i));
+			    tbin->ncached * sz_index2size(i));
 		}
 		for (; i < nhbins; i++) {
 			tcache_bin_t *tbin = tcache_large_bin_get(tcache, i);
 			arena_stats_accum_zu(&astats->tcache_bytes,
-			    tbin->ncached * index2size(i));
+			    tbin->ncached * sz_index2size(i));
 		}
 	}
 	malloc_mutex_prof_read(tsdn,
@@ -467,7 +467,7 @@ arena_large_malloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 	if (usize < LARGE_MINCLASS) {
 		usize = LARGE_MINCLASS;
 	}
-	index = size2index(usize);
+	index = sz_size2index(usize);
 	hindex = (index >= NBINS) ? index - NBINS : 0;
 
 	arena_stats_add_u64(tsdn, &arena->stats,
@@ -483,7 +483,7 @@ arena_large_dalloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 	if (usize < LARGE_MINCLASS) {
 		usize = LARGE_MINCLASS;
 	}
-	index = size2index(usize);
+	index = sz_size2index(usize);
 	hindex = (index >= NBINS) ? index - NBINS : 0;
 
 	arena_stats_add_u64(tsdn, &arena->stats,
@@ -505,21 +505,22 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	szind_t szind = size2index(usize);
+	szind_t szind = sz_size2index(usize);
 	size_t mapped_add;
 	bool commit = true;
 	extent_t *extent = extents_alloc(tsdn, arena, &extent_hooks,
-	    &arena->extents_dirty, NULL, usize, large_pad, alignment, false,
+	    &arena->extents_dirty, NULL, usize, sz_large_pad, alignment, false,
 	    szind, zero, &commit);
 	if (extent == NULL) {
 		extent = extents_alloc(tsdn, arena, &extent_hooks,
-		    &arena->extents_muzzy, NULL, usize, large_pad, alignment,
+		    &arena->extents_muzzy, NULL, usize, sz_large_pad, alignment,
 		    false, szind, zero, &commit);
 	}
-	size_t size = usize + large_pad;
+	size_t size = usize + sz_large_pad;
 	if (extent == NULL) {
 		extent = extent_alloc_wrapper(tsdn, arena, &extent_hooks, NULL,
-		    usize, large_pad, alignment, false, szind, zero, &commit);
+		    usize, sz_large_pad, alignment, false, szind, zero,
+		    &commit);
 		if (config_stats) {
 			/*
 			 * extent may be NULL on OOM, but in that case
@@ -1146,7 +1147,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 		assert(alloc_ctx.szind != NSIZES);
 
 		if (config_stats || (config_prof && opt_prof)) {
-			usize = index2size(alloc_ctx.szind);
+			usize = sz_index2size(alloc_ctx.szind);
 			assert(usize == isalloc(tsd_tsdn(tsd), ptr));
 		}
 		/* Remove large allocation from prof sample set. */
@@ -1278,7 +1279,7 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 	    WITNESS_RANK_CORE, 0);
 
 	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
-	szind_t szind = size2index(bin_info->reg_size);
+	szind_t szind = sz_size2index(bin_info->reg_size);
 	bool zero = false;
 	bool commit = true;
 	extent_t *slab = extents_alloc(tsdn, arena, &extent_hooks,
@@ -1484,7 +1485,7 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
 
 	assert(binind < NBINS);
 	bin = &arena->bins[binind];
-	usize = index2size(binind);
+	usize = sz_index2size(binind);
 
 	malloc_mutex_lock(tsdn, &bin->lock);
 	if ((slab = bin->slabcur) != NULL && extent_nfree_get(slab) > 0) {
@@ -1544,7 +1545,7 @@ arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
 	if (likely(size <= SMALL_MAXCLASS)) {
 		return arena_malloc_small(tsdn, arena, ind, zero);
 	}
-	return large_malloc(tsdn, arena, index2size(ind), zero);
+	return large_malloc(tsdn, arena, sz_index2size(ind), zero);
 }
 
 void *
@@ -1555,8 +1556,8 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	if (usize <= SMALL_MAXCLASS && (alignment < PAGE || (alignment == PAGE
 	    && (usize & PAGE_MASK) == 0))) {
 		/* Small; alignment doesn't require special slab placement. */
-		ret = arena_malloc(tsdn, arena, usize, size2index(usize), zero,
-		    tcache, true);
+		ret = arena_malloc(tsdn, arena, usize, sz_size2index(usize),
+		    zero, tcache, true);
 	} else {
 		if (likely(alignment <= CACHELINE)) {
 			ret = large_malloc(tsdn, arena, usize, zero);
@@ -1581,7 +1582,7 @@ arena_prof_promote(tsdn_t *tsdn, const void *ptr, size_t usize) {
 	    (uintptr_t)ptr, true);
 	arena_t *arena = extent_arena_get(extent);
 
-	szind_t szind = size2index(usize);
+	szind_t szind = sz_size2index(usize);
 	extent_szind_set(extent, szind);
 	rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr,
 	    szind, false);
@@ -1617,7 +1618,7 @@ arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 	size_t usize = arena_prof_demote(tsdn, extent, ptr);
 	if (usize <= tcache_maxclass) {
 		tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
-		    size2index(usize), slow_path);
+		    sz_size2index(usize), slow_path);
 	} else {
 		large_dalloc(tsdn, extent);
 	}
@@ -1751,17 +1752,17 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 	}
 
 	extent_t *extent = iealloc(tsdn, ptr);
-	size_t usize_min = s2u(size);
-	size_t usize_max = s2u(size + extra);
+	size_t usize_min = sz_s2u(size);
+	size_t usize_max = sz_s2u(size + extra);
 	if (likely(oldsize <= SMALL_MAXCLASS && usize_min <= SMALL_MAXCLASS)) {
 		/*
 		 * Avoid moving the allocation if the size class can be left the
 		 * same.
 		 */
-		assert(arena_bin_info[size2index(oldsize)].reg_size ==
+		assert(arena_bin_info[sz_size2index(oldsize)].reg_size ==
 		    oldsize);
-		if ((usize_max > SMALL_MAXCLASS || size2index(usize_max) !=
-		    size2index(oldsize)) && (size > oldsize || usize_max <
+		if ((usize_max > SMALL_MAXCLASS || sz_size2index(usize_max) !=
+		    sz_size2index(oldsize)) && (size > oldsize || usize_max <
 		    oldsize)) {
 			return true;
 		}
@@ -1780,10 +1781,10 @@ static void *
 arena_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache) {
 	if (alignment == 0) {
-		return arena_malloc(tsdn, arena, usize, size2index(usize),
+		return arena_malloc(tsdn, arena, usize, sz_size2index(usize),
 		    zero, tcache, true);
 	}
-	usize = sa2u(usize, alignment);
+	usize = sz_sa2u(usize, alignment);
 	if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
 		return NULL;
 	}
@@ -1793,7 +1794,7 @@ arena_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
 void *
 arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
     size_t size, size_t alignment, bool zero, tcache_t *tcache) {
-	size_t usize = s2u(size);
+	size_t usize = sz_s2u(size);
 	if (unlikely(usize == 0 || size > LARGE_MAXCLASS)) {
 		return NULL;
 	}
@@ -1998,7 +1999,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		goto label_error;
 	}
 
-	arena->extent_grow_next = psz2ind(HUGEPAGE);
+	arena->extent_grow_next = sz_psz2ind(HUGEPAGE);
 	if (malloc_mutex_init(&arena->extent_grow_mtx, "extent_grow",
 	    WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) {
 		goto label_error;
diff --git a/src/base.c b/src/base.c
index 498e5c9d..8e1544fd 100644
--- a/src/base.c
+++ b/src/base.c
@@ -5,6 +5,7 @@
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/extent_mmap.h"
 #include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/sz.h"
 
 /******************************************************************************/
 /* Data. */
@@ -121,8 +122,8 @@ base_extent_bump_alloc_post(tsdn_t *tsdn, base_t *base, extent_t *extent,
 		 * Compute the index for the largest size class that does not
 		 * exceed extent's size.
 		 */
-		szind_t index_floor = size2index(extent_bsize_get(extent) + 1) -
-		    1;
+		szind_t index_floor =
+		    sz_size2index(extent_bsize_get(extent) + 1) - 1;
 		extent_heap_insert(&base->avail[index_floor], extent);
 	}
 
@@ -171,11 +172,11 @@ base_block_alloc(extent_hooks_t *extent_hooks, unsigned ind,
 	 * HUGEPAGE), or a size large enough to satisfy the requested size and
 	 * alignment, whichever is larger.
 	 */
-	size_t min_block_size = HUGEPAGE_CEILING(psz2u(header_size + gap_size +
-	    usize));
+	size_t min_block_size = HUGEPAGE_CEILING(sz_psz2u(header_size + gap_size
+	    + usize));
 	pszind_t pind_next = (*pind_last + 1 < NPSIZES) ? *pind_last + 1 :
 	    *pind_last;
-	size_t next_block_size = HUGEPAGE_CEILING(pind2sz(pind_next));
+	size_t next_block_size = HUGEPAGE_CEILING(sz_pind2sz(pind_next));
 	size_t block_size = (min_block_size > next_block_size) ? min_block_size
 	    : next_block_size;
 	base_block_t *block = (base_block_t *)base_map(extent_hooks, ind,
@@ -183,7 +184,7 @@ base_block_alloc(extent_hooks_t *extent_hooks, unsigned ind,
 	if (block == NULL) {
 		return NULL;
 	}
-	*pind_last = psz2ind(block_size);
+	*pind_last = sz_psz2ind(block_size);
 	block->size = block_size;
 	block->next = NULL;
 	assert(block_size >= header_size);
@@ -304,7 +305,7 @@ base_alloc_impl(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment,
 
 	extent_t *extent = NULL;
 	malloc_mutex_lock(tsdn, &base->mtx);
-	for (szind_t i = size2index(asize); i < NSIZES; i++) {
+	for (szind_t i = sz_size2index(asize); i < NSIZES; i++) {
 		extent = extent_heap_remove_first(&base->avail[i]);
 		if (extent != NULL) {
 			/* Use existing space. */
diff --git a/src/ckh.c b/src/ckh.c
index 013b6249..e95e0a3e 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -274,7 +274,7 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh) {
 		size_t usize;
 
 		lg_curcells++;
-		usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
+		usize = sz_sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
 		if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
 			ret = true;
 			goto label_return;
@@ -319,7 +319,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh) {
 	 */
 	lg_prevbuckets = ckh->lg_curbuckets;
 	lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1;
-	usize = sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
+	usize = sz_sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
 	if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
 		return;
 	}
@@ -395,7 +395,7 @@ ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
 	ckh->hash = hash;
 	ckh->keycomp = keycomp;
 
-	usize = sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE);
+	usize = sz_sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE);
 	if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
 		ret = true;
 		goto label_return;
diff --git a/src/ctl.c b/src/ctl.c
index 33275d70..1520c508 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -701,7 +701,7 @@ ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena) {
 		for (i = 0; i < NBINS; i++) {
 			ctl_arena->astats->allocated_small +=
 			    ctl_arena->astats->bstats[i].curregs *
-			    index2size(i);
+			    sz_index2size(i);
 			ctl_arena->astats->nmalloc_small +=
 			    ctl_arena->astats->bstats[i].nmalloc;
 			ctl_arena->astats->ndalloc_small +=
@@ -2274,7 +2274,8 @@ arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
 }
 
 CTL_RO_NL_GEN(arenas_nlextents, NSIZES - NBINS, unsigned)
-CTL_RO_NL_GEN(arenas_lextent_i_size, index2size(NBINS+(szind_t)mib[2]), size_t)
+CTL_RO_NL_GEN(arenas_lextent_i_size, sz_index2size(NBINS+(szind_t)mib[2]),
+    size_t)
 static const ctl_named_node_t *
 arenas_lextent_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
     size_t i) {
diff --git a/src/extent.c b/src/extent.c
index c3d9baae..fb7a1468 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -220,7 +220,7 @@ extent_size_quantize_floor(size_t size) {
 	assert(size > 0);
 	assert((size & PAGE_MASK) == 0);
 
-	pind = psz2ind(size - large_pad + 1);
+	pind = sz_psz2ind(size - sz_large_pad + 1);
 	if (pind == 0) {
 		/*
 		 * Avoid underflow.  This short-circuit would also do the right
@@ -230,7 +230,7 @@ extent_size_quantize_floor(size_t size) {
 		 */
 		return size;
 	}
-	ret = pind2sz(pind - 1) + large_pad;
+	ret = sz_pind2sz(pind - 1) + sz_large_pad;
 	assert(ret <= size);
 	return ret;
 }
@@ -243,7 +243,7 @@ extent_size_quantize_ceil(size_t size) {
 	size_t ret;
 
 	assert(size > 0);
-	assert(size - large_pad <= LARGE_MAXCLASS);
+	assert(size - sz_large_pad <= LARGE_MAXCLASS);
 	assert((size & PAGE_MASK) == 0);
 
 	ret = extent_size_quantize_floor(size);
@@ -256,7 +256,8 @@ extent_size_quantize_ceil(size_t size) {
 		 * search would potentially find sufficiently aligned available
 		 * memory somewhere lower.
 		 */
-		ret = pind2sz(psz2ind(ret - large_pad + 1)) + large_pad;
+		ret = sz_pind2sz(sz_psz2ind(ret - sz_large_pad + 1)) +
+		    sz_large_pad;
 	}
 	return ret;
 }
@@ -300,7 +301,7 @@ extents_insert_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent,
 
 	size_t size = extent_size_get(extent);
 	size_t psz = extent_size_quantize_floor(size);
-	pszind_t pind = psz2ind(psz);
+	pszind_t pind = sz_psz2ind(psz);
 	if (extent_heap_empty(&extents->heaps[pind])) {
 		bitmap_unset(extents->bitmap, &extents_bitmap_info,
 		    (size_t)pind);
@@ -329,7 +330,7 @@ extents_remove_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent,
 
 	size_t size = extent_size_get(extent);
 	size_t psz = extent_size_quantize_floor(size);
-	pszind_t pind = psz2ind(psz);
+	pszind_t pind = sz_psz2ind(psz);
 	extent_heap_remove(&extents->heaps[pind], extent);
 	if (extent_heap_empty(&extents->heaps[pind])) {
 		bitmap_set(extents->bitmap, &extents_bitmap_info,
@@ -354,7 +355,7 @@ extents_remove_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent,
 static extent_t *
 extents_best_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
     size_t size) {
-	pszind_t pind = psz2ind(extent_size_quantize_ceil(size));
+	pszind_t pind = sz_psz2ind(extent_size_quantize_ceil(size));
 	pszind_t i = (pszind_t)bitmap_ffu(extents->bitmap, &extents_bitmap_info,
 	    (size_t)pind);
 	if (i < NPSIZES+1) {
@@ -376,7 +377,7 @@ extents_first_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
     size_t size) {
 	extent_t *ret = NULL;
 
-	pszind_t pind = psz2ind(extent_size_quantize_ceil(size));
+	pszind_t pind = sz_psz2ind(extent_size_quantize_ceil(size));
 	for (pszind_t i = (pszind_t)bitmap_ffu(extents->bitmap,
 	    &extents_bitmap_info, (size_t)pind); i < NPSIZES+1; i =
 	    (pszind_t)bitmap_ffu(extents->bitmap, &extents_bitmap_info,
@@ -1040,7 +1041,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	 * satisfy this request.
 	 */
 	pszind_t egn_skip = 0;
-	size_t alloc_size = pind2sz(arena->extent_grow_next + egn_skip);
+	size_t alloc_size = sz_pind2sz(arena->extent_grow_next + egn_skip);
 	while (alloc_size < alloc_size_min) {
 		egn_skip++;
 		if (arena->extent_grow_next + egn_skip == NPSIZES) {
@@ -1048,7 +1049,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 			goto label_err;
 		}
 		assert(arena->extent_grow_next + egn_skip < NPSIZES);
-		alloc_size = pind2sz(arena->extent_grow_next + egn_skip);
+		alloc_size = sz_pind2sz(arena->extent_grow_next + egn_skip);
 	}
 
 	extent_t *extent = extent_alloc(tsdn, arena);
@@ -1369,7 +1370,7 @@ extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
 			extent_unlock(tsdn, prev);
 
 			if (can_coalesce && !extent_coalesce(tsdn, arena,
-			    r_extent_hooks, extents, extent, prev, false, 
+			    r_extent_hooks, extents, extent, prev, false,
 			    growing_retained)) {
 				extent = prev;
 				if (extents->delay_coalesce) {
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 1befb64d..268d19c9 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -13,6 +13,7 @@
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/spin.h"
+#include "jemalloc/internal/sz.h"
 #include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/util.h"
 
@@ -107,110 +108,6 @@ enum {
 };
 static uint8_t	malloc_slow_flags;
 
-JEMALLOC_ALIGNED(CACHELINE)
-const size_t	pind2sz_tab[NPSIZES+1] = {
-#define PSZ_yes(lg_grp, ndelta, lg_delta)				\
-	(((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta))),
-#define PSZ_no(lg_grp, ndelta, lg_delta)
-#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup) \
-	PSZ_##psz(lg_grp, ndelta, lg_delta)
-	SIZE_CLASSES
-#undef PSZ_yes
-#undef PSZ_no
-#undef SC
-	(LARGE_MAXCLASS + PAGE)
-};
-
-JEMALLOC_ALIGNED(CACHELINE)
-const size_t	index2size_tab[NSIZES] = {
-#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup) \
-	((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta)),
-	SIZE_CLASSES
-#undef SC
-};
-
-JEMALLOC_ALIGNED(CACHELINE)
-const uint8_t	size2index_tab[] = {
-#if LG_TINY_MIN == 0
-#warning "Dangerous LG_TINY_MIN"
-#define S2B_0(i)	i,
-#elif LG_TINY_MIN == 1
-#warning "Dangerous LG_TINY_MIN"
-#define S2B_1(i)	i,
-#elif LG_TINY_MIN == 2
-#warning "Dangerous LG_TINY_MIN"
-#define S2B_2(i)	i,
-#elif LG_TINY_MIN == 3
-#define S2B_3(i)	i,
-#elif LG_TINY_MIN == 4
-#define S2B_4(i)	i,
-#elif LG_TINY_MIN == 5
-#define S2B_5(i)	i,
-#elif LG_TINY_MIN == 6
-#define S2B_6(i)	i,
-#elif LG_TINY_MIN == 7
-#define S2B_7(i)	i,
-#elif LG_TINY_MIN == 8
-#define S2B_8(i)	i,
-#elif LG_TINY_MIN == 9
-#define S2B_9(i)	i,
-#elif LG_TINY_MIN == 10
-#define S2B_10(i)	i,
-#elif LG_TINY_MIN == 11
-#define S2B_11(i)	i,
-#else
-#error "Unsupported LG_TINY_MIN"
-#endif
-#if LG_TINY_MIN < 1
-#define S2B_1(i)	S2B_0(i) S2B_0(i)
-#endif
-#if LG_TINY_MIN < 2
-#define S2B_2(i)	S2B_1(i) S2B_1(i)
-#endif
-#if LG_TINY_MIN < 3
-#define S2B_3(i)	S2B_2(i) S2B_2(i)
-#endif
-#if LG_TINY_MIN < 4
-#define S2B_4(i)	S2B_3(i) S2B_3(i)
-#endif
-#if LG_TINY_MIN < 5
-#define S2B_5(i)	S2B_4(i) S2B_4(i)
-#endif
-#if LG_TINY_MIN < 6
-#define S2B_6(i)	S2B_5(i) S2B_5(i)
-#endif
-#if LG_TINY_MIN < 7
-#define S2B_7(i)	S2B_6(i) S2B_6(i)
-#endif
-#if LG_TINY_MIN < 8
-#define S2B_8(i)	S2B_7(i) S2B_7(i)
-#endif
-#if LG_TINY_MIN < 9
-#define S2B_9(i)	S2B_8(i) S2B_8(i)
-#endif
-#if LG_TINY_MIN < 10
-#define S2B_10(i)	S2B_9(i) S2B_9(i)
-#endif
-#if LG_TINY_MIN < 11
-#define S2B_11(i)	S2B_10(i) S2B_10(i)
-#endif
-#define S2B_no(i)
-#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup) \
-	S2B_##lg_delta_lookup(index)
-	SIZE_CLASSES
-#undef S2B_3
-#undef S2B_4
-#undef S2B_5
-#undef S2B_6
-#undef S2B_7
-#undef S2B_8
-#undef S2B_9
-#undef S2B_10
-#undef S2B_11
-#undef S2B_no
-#undef SC
-};
-
 #ifdef JEMALLOC_THREADED_INIT
 /* Used to let the initializing thread recursively allocate. */
 #  define NO_INITIALIZER	((unsigned long)0)
@@ -333,7 +230,7 @@ a0ialloc(size_t size, bool zero, bool is_internal) {
 		return NULL;
 	}
 
-	return iallocztm(TSDN_NULL, size, size2index(size), zero, NULL,
+	return iallocztm(TSDN_NULL, size, sz_size2index(size), zero, NULL,
 	    is_internal, arena_get(TSDN_NULL, 0, true), true);
 }
 
@@ -1687,10 +1584,11 @@ imalloc_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
 	size_t bumped_usize = usize;
 
 	if (usize <= SMALL_MAXCLASS) {
-		assert(((dopts->alignment == 0) ? s2u(LARGE_MINCLASS) :
-		    sa2u(LARGE_MINCLASS, dopts->alignment)) == LARGE_MINCLASS);
-		ind_large = size2index(LARGE_MINCLASS);
-		bumped_usize = s2u(LARGE_MINCLASS);
+		assert(((dopts->alignment == 0) ? sz_s2u(LARGE_MINCLASS) :
+		    sz_sa2u(LARGE_MINCLASS, dopts->alignment))
+		    == LARGE_MINCLASS);
+		ind_large = sz_size2index(LARGE_MINCLASS);
+		bumped_usize = sz_s2u(LARGE_MINCLASS);
 		ret = imalloc_no_sample(sopts, dopts, tsd, bumped_usize,
 		    bumped_usize, ind_large);
 		if (unlikely(ret == NULL)) {
@@ -1792,16 +1690,16 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 	/* This is the beginning of the "core" algorithm. */
 
 	if (dopts->alignment == 0) {
-		ind = size2index(size);
+		ind = sz_size2index(size);
 		if (unlikely(ind >= NSIZES)) {
 			goto label_oom;
 		}
 		if (config_stats || (config_prof && opt_prof)) {
-			usize = index2size(ind);
+			usize = sz_index2size(ind);
 			assert(usize > 0 && usize <= LARGE_MAXCLASS);
 		}
 	} else {
-		usize = sa2u(size, dopts->alignment);
+		usize = sz_sa2u(size, dopts->alignment);
 		if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
 			goto label_oom;
 		}
@@ -2155,10 +2053,10 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 
 	size_t usize;
 	if (config_prof && opt_prof) {
-		usize = index2size(alloc_ctx.szind);
+		usize = sz_index2size(alloc_ctx.szind);
 		prof_free(tsd, ptr, usize, &alloc_ctx);
 	} else if (config_stats) {
-		usize = index2size(alloc_ctx.szind);
+		usize = sz_index2size(alloc_ctx.szind);
 	}
 	if (config_stats) {
 		*tsd_thread_deallocatedp_get(tsd) += usize;
@@ -2192,7 +2090,7 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
 		rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
 		    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
-		assert(alloc_ctx.szind == size2index(usize));
+		assert(alloc_ctx.szind == sz_size2index(usize));
 		ctx = &alloc_ctx;
 		prof_free(tsd, ptr, usize, ctx);
 	} else {
@@ -2247,16 +2145,16 @@ je_realloc(void *ptr, size_t size) {
 		rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
 		    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
 		assert(alloc_ctx.szind != NSIZES);
-		old_usize = index2size(alloc_ctx.szind);
+		old_usize = sz_index2size(alloc_ctx.szind);
 		assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
 		if (config_prof && opt_prof) {
-			usize = s2u(size);
+			usize = sz_s2u(size);
 			ret = unlikely(usize == 0 || usize > LARGE_MAXCLASS) ?
 			    NULL : irealloc_prof(tsd, ptr, old_usize, usize,
 			    &alloc_ctx);
 		} else {
 			if (config_stats) {
-				usize = s2u(size);
+				usize = sz_s2u(size);
 			}
 			ret = iralloc(tsd, ptr, old_usize, size, 0, false);
 		}
@@ -2601,10 +2499,11 @@ je_rallocx(void *ptr, size_t size, int flags) {
 	rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
 	    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
 	assert(alloc_ctx.szind != NSIZES);
-	old_usize = index2size(alloc_ctx.szind);
+	old_usize = sz_index2size(alloc_ctx.szind);
 	assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
 	if (config_prof && opt_prof) {
-		usize = (alignment == 0) ? s2u(size) : sa2u(size, alignment);
+		usize = (alignment == 0) ?
+		    sz_s2u(size) : sz_sa2u(size, alignment);
 		if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
 			goto label_oom;
 		}
@@ -2685,10 +2584,10 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 	 * prof_realloc() will use the actual usize to decide whether to sample.
 	 */
 	if (alignment == 0) {
-		usize_max = s2u(size+extra);
+		usize_max = sz_s2u(size+extra);
 		assert(usize_max > 0 && usize_max <= LARGE_MAXCLASS);
 	} else {
-		usize_max = sa2u(size+extra, alignment);
+		usize_max = sz_sa2u(size+extra, alignment);
 		if (unlikely(usize_max == 0 || usize_max > LARGE_MAXCLASS)) {
 			/*
 			 * usize_max is out of range, and chances are that
@@ -2737,7 +2636,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
 	    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
 	assert(alloc_ctx.szind != NSIZES);
-	old_usize = index2size(alloc_ctx.szind);
+	old_usize = sz_index2size(alloc_ctx.szind);
 	assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
 	/*
 	 * The API explicitly absolves itself of protecting against (size +
@@ -2847,9 +2746,9 @@ inallocx(tsdn_t *tsdn, size_t size, int flags) {
 
 	size_t usize;
 	if (likely((flags & MALLOCX_LG_ALIGN_MASK) == 0)) {
-		usize = s2u(size);
+		usize = sz_s2u(size);
 	} else {
-		usize = sa2u(size, MALLOCX_ALIGN_GET_SPECIFIED(flags));
+		usize = sz_sa2u(size, MALLOCX_ALIGN_GET_SPECIFIED(flags));
 	}
 	witness_assert_lockless(tsdn_witness_tsdp_get(tsdn));
 	return usize;
diff --git a/src/large.c b/src/large.c
index 79d2c9da..27a2c679 100644
--- a/src/large.c
+++ b/src/large.c
@@ -12,7 +12,7 @@
 
 void *
 large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero) {
-	assert(usize == s2u(usize));
+	assert(usize == sz_s2u(usize));
 
 	return large_palloc(tsdn, arena, usize, CACHELINE, zero);
 }
@@ -27,7 +27,7 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 
 	assert(!tsdn_null(tsdn) || arena != NULL);
 
-	ausize = sa2u(usize, alignment);
+	ausize = sz_sa2u(usize, alignment);
 	if (unlikely(ausize == 0 || ausize > LARGE_MAXCLASS)) {
 		return NULL;
 	}
@@ -97,7 +97,7 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize) {
 	arena_t *arena = extent_arena_get(extent);
 	size_t oldusize = extent_usize_get(extent);
 	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
-	size_t diff = extent_size_get(extent) - (usize + large_pad);
+	size_t diff = extent_size_get(extent) - (usize + sz_large_pad);
 
 	assert(oldusize > usize);
 
@@ -108,8 +108,8 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize) {
 	/* Split excess pages. */
 	if (diff != 0) {
 		extent_t *trail = extent_split_wrapper(tsdn, arena,
-		    &extent_hooks, extent, usize + large_pad, size2index(usize),
-		    false, diff, NSIZES, false);
+		    &extent_hooks, extent, usize + sz_large_pad,
+		    sz_size2index(usize), false, diff, NSIZES, false);
 		if (trail == NULL) {
 			return true;
 		}
@@ -178,7 +178,7 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 	}
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-	szind_t szind = size2index(usize);
+	szind_t szind = sz_size2index(usize);
 	extent_szind_set(extent, szind);
 	rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx,
 	    (uintptr_t)extent_addr_get(extent), szind, false);
diff --git a/src/prof.c b/src/prof.c
index aa67486d..61dfa2ce 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -556,7 +556,7 @@ prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt) {
 	 */
 	size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *));
 	prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsdn, size,
-	    size2index(size), false, NULL, true, arena_get(TSDN_NULL, 0, true),
+	    sz_size2index(size), false, NULL, true, arena_get(TSDN_NULL, 0, true),
 	    true);
 	if (gctx == NULL) {
 		return NULL;
@@ -819,7 +819,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 
 		/* Link a prof_tctx_t into gctx for this thread. */
 		ret.v = iallocztm(tsd_tsdn(tsd), sizeof(prof_tctx_t),
-		    size2index(sizeof(prof_tctx_t)), false, NULL, true,
+		    sz_size2index(sizeof(prof_tctx_t)), false, NULL, true,
 		    arena_ichoose(tsd, NULL), true);
 		if (ret.p == NULL) {
 			if (new_gctx) {
@@ -1899,7 +1899,7 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
 
 	/* Initialize an empty cache for this thread. */
 	tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd), sizeof(prof_tdata_t),
-	    size2index(sizeof(prof_tdata_t)), false, NULL, true,
+	    sz_size2index(sizeof(prof_tdata_t)), false, NULL, true,
 	    arena_get(TSDN_NULL, 0, true), true);
 	if (tdata == NULL) {
 		return NULL;
@@ -2135,7 +2135,7 @@ prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name) {
 		return "";
 	}
 
-	ret = iallocztm(tsdn, size, size2index(size), false, NULL, true,
+	ret = iallocztm(tsdn, size, sz_size2index(size), false, NULL, true,
 	    arena_get(TSDN_NULL, 0, true), true);
 	if (ret == NULL) {
 		return NULL;
diff --git a/src/sz.c b/src/sz.c
new file mode 100644
index 00000000..0986615f
--- /dev/null
+++ b/src/sz.c
@@ -0,0 +1,106 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/sz.h"
+
+JEMALLOC_ALIGNED(CACHELINE)
+const size_t sz_pind2sz_tab[NPSIZES+1] = {
+#define PSZ_yes(lg_grp, ndelta, lg_delta)				\
+	(((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta))),
+#define PSZ_no(lg_grp, ndelta, lg_delta)
+#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup) \
+	PSZ_##psz(lg_grp, ndelta, lg_delta)
+	SIZE_CLASSES
+#undef PSZ_yes
+#undef PSZ_no
+#undef SC
+	(LARGE_MAXCLASS + PAGE)
+};
+
+JEMALLOC_ALIGNED(CACHELINE)
+const size_t sz_index2size_tab[NSIZES] = {
+#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup) \
+	((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta)),
+	SIZE_CLASSES
+#undef SC
+};
+
+JEMALLOC_ALIGNED(CACHELINE)
+const uint8_t sz_size2index_tab[] = {
+#if LG_TINY_MIN == 0
+#warning "Dangerous LG_TINY_MIN"
+#define S2B_0(i)	i,
+#elif LG_TINY_MIN == 1
+#warning "Dangerous LG_TINY_MIN"
+#define S2B_1(i)	i,
+#elif LG_TINY_MIN == 2
+#warning "Dangerous LG_TINY_MIN"
+#define S2B_2(i)	i,
+#elif LG_TINY_MIN == 3
+#define S2B_3(i)	i,
+#elif LG_TINY_MIN == 4
+#define S2B_4(i)	i,
+#elif LG_TINY_MIN == 5
+#define S2B_5(i)	i,
+#elif LG_TINY_MIN == 6
+#define S2B_6(i)	i,
+#elif LG_TINY_MIN == 7
+#define S2B_7(i)	i,
+#elif LG_TINY_MIN == 8
+#define S2B_8(i)	i,
+#elif LG_TINY_MIN == 9
+#define S2B_9(i)	i,
+#elif LG_TINY_MIN == 10
+#define S2B_10(i)	i,
+#elif LG_TINY_MIN == 11
+#define S2B_11(i)	i,
+#else
+#error "Unsupported LG_TINY_MIN"
+#endif
+#if LG_TINY_MIN < 1
+#define S2B_1(i)	S2B_0(i) S2B_0(i)
+#endif
+#if LG_TINY_MIN < 2
+#define S2B_2(i)	S2B_1(i) S2B_1(i)
+#endif
+#if LG_TINY_MIN < 3
+#define S2B_3(i)	S2B_2(i) S2B_2(i)
+#endif
+#if LG_TINY_MIN < 4
+#define S2B_4(i)	S2B_3(i) S2B_3(i)
+#endif
+#if LG_TINY_MIN < 5
+#define S2B_5(i)	S2B_4(i) S2B_4(i)
+#endif
+#if LG_TINY_MIN < 6
+#define S2B_6(i)	S2B_5(i) S2B_5(i)
+#endif
+#if LG_TINY_MIN < 7
+#define S2B_7(i)	S2B_6(i) S2B_6(i)
+#endif
+#if LG_TINY_MIN < 8
+#define S2B_8(i)	S2B_7(i) S2B_7(i)
+#endif
+#if LG_TINY_MIN < 9
+#define S2B_9(i)	S2B_8(i) S2B_8(i)
+#endif
+#if LG_TINY_MIN < 10
+#define S2B_10(i)	S2B_9(i) S2B_9(i)
+#endif
+#if LG_TINY_MIN < 11
+#define S2B_11(i)	S2B_10(i) S2B_10(i)
+#endif
+#define S2B_no(i)
+#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup) \
+	S2B_##lg_delta_lookup(index)
+	SIZE_CLASSES
+#undef S2B_3
+#undef S2B_4
+#undef S2B_5
+#undef S2B_6
+#undef S2B_7
+#undef S2B_8
+#undef S2B_9
+#undef S2B_10
+#undef S2B_11
+#undef S2B_no
+#undef SC
+};
diff --git a/src/tcache.c b/src/tcache.c
index 96ebe677..6355805b 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -383,7 +383,7 @@ tsd_tcache_data_init(tsd_t *tsd) {
 	assert(tcache_small_bin_get(tcache, 0)->avail == NULL);
 	size_t size = stack_nelms * sizeof(void *);
 	/* Avoid false cacheline sharing. */
-	size = sa2u(size, CACHELINE);
+	size = sz_sa2u(size, CACHELINE);
 
 	void *avail_array = ipallocztm(tsd_tsdn(tsd), size, CACHELINE, true,
 	    NULL, true, arena_get(TSDN_NULL, 0, true));
@@ -430,7 +430,7 @@ tcache_create_explicit(tsd_t *tsd) {
 	stack_offset = size;
 	size += stack_nelms * sizeof(void *);
 	/* Avoid false cacheline sharing. */
-	size = sa2u(size, CACHELINE);
+	size = sz_sa2u(size, CACHELINE);
 
 	tcache = ipallocztm(tsd_tsdn(tsd), size, CACHELINE, true, NULL, true,
 	    arena_get(TSDN_NULL, 0, true));
@@ -655,7 +655,7 @@ tcache_boot(tsdn_t *tsdn) {
 		return true;
 	}
 
-	nhbins = size2index(tcache_maxclass) + 1;
+	nhbins = sz_size2index(tcache_maxclass) + 1;
 
 	/* Initialize tcache_bin_info. */
 	tcache_bin_info = (tcache_bin_info_t *)base_alloc(tsdn, b0get(), nhbins
diff --git a/src/zone.c b/src/zone.c
index 37bc8da9..9d3b7b49 100644
--- a/src/zone.c
+++ b/src/zone.c
@@ -244,7 +244,7 @@ zone_good_size(malloc_zone_t *zone, size_t size) {
 	if (size == 0) {
 		size = 1;
 	}
-	return s2u(size);
+	return sz_s2u(size);
 }
 
 static kern_return_t
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index 958453d1..6409a922 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -81,7 +81,7 @@ vsalloc(tsdn_t *tsdn, const void *ptr) {
 		return 0;
 	}
 
-	return index2size(szind);
+	return sz_index2size(szind);
 }
 
 static unsigned
diff --git a/test/unit/retained.c b/test/unit/retained.c
index 883bf4af..d51a5981 100644
--- a/test/unit/retained.c
+++ b/test/unit/retained.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/spin.h"
+
 static unsigned		arena_ind;
 static size_t		sz;
 static size_t		esz;
@@ -100,7 +102,7 @@ TEST_BEGIN(test_retained) {
 
 	arena_ind = do_arena_create(NULL);
 	sz = nallocx(HUGEPAGE, 0);
-	esz = sz + large_pad;
+	esz = sz + sz_large_pad;
 
 	atomic_store_u(&epoch, 0, ATOMIC_RELAXED);
 
@@ -136,9 +138,9 @@ TEST_BEGIN(test_retained) {
 		arena_t *arena = arena_get(tsdn_fetch(), arena_ind, false);
 		size_t usable = 0;
 		size_t fragmented = 0;
-		for (pszind_t pind = psz2ind(HUGEPAGE); pind <
+		for (pszind_t pind = sz_psz2ind(HUGEPAGE); pind <
 		    arena->extent_grow_next; pind++) {
-			size_t psz = pind2sz(pind);
+			size_t psz = sz_pind2sz(pind);
 			size_t psz_fragmented = psz % esz;
 			size_t psz_usable = psz - psz_fragmented;
 			/*
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 752dde99..814837bf 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -86,7 +86,8 @@ TEST_END
 TEST_BEGIN(test_rtree_extrema) {
 	extent_t extent_a, extent_b;
 	extent_init(&extent_a, NULL, NULL, LARGE_MINCLASS, false,
-	    size2index(LARGE_MINCLASS), 0, extent_state_active, false, false);
+	    sz_size2index(LARGE_MINCLASS), 0, extent_state_active, false,
+	    false);
 	extent_init(&extent_b, NULL, NULL, 0, false, NSIZES, 0,
 	    extent_state_active, false, false);
 
diff --git a/test/unit/size_classes.c b/test/unit/size_classes.c
index 70a86ad9..bcff5609 100644
--- a/test/unit/size_classes.c
+++ b/test/unit/size_classes.c
@@ -27,11 +27,11 @@ TEST_BEGIN(test_size_classes) {
 	szind_t index, max_index;
 
 	max_size_class = get_max_size_class();
-	max_index = size2index(max_size_class);
+	max_index = sz_size2index(max_size_class);
 
-	for (index = 0, size_class = index2size(index); index < max_index ||
+	for (index = 0, size_class = sz_index2size(index); index < max_index ||
 	    size_class < max_size_class; index++, size_class =
-	    index2size(index)) {
+	    sz_index2size(index)) {
 		assert_true(index < max_index,
 		    "Loop conditionals should be equivalent; index=%u, "
 		    "size_class=%zu (%#zx)", index, size_class, size_class);
@@ -39,42 +39,44 @@ TEST_BEGIN(test_size_classes) {
 		    "Loop conditionals should be equivalent; index=%u, "
 		    "size_class=%zu (%#zx)", index, size_class, size_class);
 
-		assert_u_eq(index, size2index(size_class),
-		    "size2index() does not reverse index2size(): index=%u -->"
-		    " size_class=%zu --> index=%u --> size_class=%zu", index,
-		    size_class, size2index(size_class),
-		    index2size(size2index(size_class)));
-		assert_zu_eq(size_class, index2size(size2index(size_class)),
-		    "index2size() does not reverse size2index(): index=%u -->"
-		    " size_class=%zu --> index=%u --> size_class=%zu", index,
-		    size_class, size2index(size_class),
-		    index2size(size2index(size_class)));
+		assert_u_eq(index, sz_size2index(size_class),
+		    "sz_size2index() does not reverse sz_index2size(): index=%u"
+		    " --> size_class=%zu --> index=%u --> size_class=%zu",
+		    index, size_class, sz_size2index(size_class),
+		    sz_index2size(sz_size2index(size_class)));
+		assert_zu_eq(size_class,
+		    sz_index2size(sz_size2index(size_class)),
+		    "sz_index2size() does not reverse sz_size2index(): index=%u"
+		    " --> size_class=%zu --> index=%u --> size_class=%zu",
+		    index, size_class, sz_size2index(size_class),
+		    sz_index2size(sz_size2index(size_class)));
 
-		assert_u_eq(index+1, size2index(size_class+1),
+		assert_u_eq(index+1, sz_size2index(size_class+1),
 		    "Next size_class does not round up properly");
 
 		assert_zu_eq(size_class, (index > 0) ?
-		    s2u(index2size(index-1)+1) : s2u(1),
-		    "s2u() does not round up to size class");
-		assert_zu_eq(size_class, s2u(size_class-1),
-		    "s2u() does not round up to size class");
-		assert_zu_eq(size_class, s2u(size_class),
-		    "s2u() does not compute same size class");
-		assert_zu_eq(s2u(size_class+1), index2size(index+1),
-		    "s2u() does not round up to next size class");
+		    sz_s2u(sz_index2size(index-1)+1) : sz_s2u(1),
+		    "sz_s2u() does not round up to size class");
+		assert_zu_eq(size_class, sz_s2u(size_class-1),
+		    "sz_s2u() does not round up to size class");
+		assert_zu_eq(size_class, sz_s2u(size_class),
+		    "sz_s2u() does not compute same size class");
+		assert_zu_eq(sz_s2u(size_class+1), sz_index2size(index+1),
+		    "sz_s2u() does not round up to next size class");
 	}
 
-	assert_u_eq(index, size2index(index2size(index)),
-	    "size2index() does not reverse index2size()");
-	assert_zu_eq(max_size_class, index2size(size2index(max_size_class)),
-	    "index2size() does not reverse size2index()");
+	assert_u_eq(index, sz_size2index(sz_index2size(index)),
+	    "sz_size2index() does not reverse sz_index2size()");
+	assert_zu_eq(max_size_class, sz_index2size(
+	    sz_size2index(max_size_class)),
+	    "sz_index2size() does not reverse sz_size2index()");
 
-	assert_zu_eq(size_class, s2u(index2size(index-1)+1),
-	    "s2u() does not round up to size class");
-	assert_zu_eq(size_class, s2u(size_class-1),
-	    "s2u() does not round up to size class");
-	assert_zu_eq(size_class, s2u(size_class),
-	    "s2u() does not compute same size class");
+	assert_zu_eq(size_class, sz_s2u(sz_index2size(index-1)+1),
+	    "sz_s2u() does not round up to size class");
+	assert_zu_eq(size_class, sz_s2u(size_class-1),
+	    "sz_s2u() does not round up to size class");
+	assert_zu_eq(size_class, sz_s2u(size_class),
+	    "sz_s2u() does not compute same size class");
 }
 TEST_END
 
@@ -83,10 +85,11 @@ TEST_BEGIN(test_psize_classes) {
 	pszind_t pind, max_pind;
 
 	max_psz = get_max_size_class() + PAGE;
-	max_pind = psz2ind(max_psz);
+	max_pind = sz_psz2ind(max_psz);
 
-	for (pind = 0, size_class = pind2sz(pind); pind < max_pind || size_class
-	    < max_psz; pind++, size_class = pind2sz(pind)) {
+	for (pind = 0, size_class = sz_pind2sz(pind);
+	    pind < max_pind || size_class < max_psz;
+	    pind++, size_class = sz_pind2sz(pind)) {
 		assert_true(pind < max_pind,
 		    "Loop conditionals should be equivalent; pind=%u, "
 		    "size_class=%zu (%#zx)", pind, size_class, size_class);
@@ -94,42 +97,42 @@ TEST_BEGIN(test_psize_classes) {
 		    "Loop conditionals should be equivalent; pind=%u, "
 		    "size_class=%zu (%#zx)", pind, size_class, size_class);
 
-		assert_u_eq(pind, psz2ind(size_class),
-		    "psz2ind() does not reverse pind2sz(): pind=%u -->"
+		assert_u_eq(pind, sz_psz2ind(size_class),
+		    "sz_psz2ind() does not reverse sz_pind2sz(): pind=%u -->"
 		    " size_class=%zu --> pind=%u --> size_class=%zu", pind,
-		    size_class, psz2ind(size_class),
-		    pind2sz(psz2ind(size_class)));
-		assert_zu_eq(size_class, pind2sz(psz2ind(size_class)),
-		    "pind2sz() does not reverse psz2ind(): pind=%u -->"
+		    size_class, sz_psz2ind(size_class),
+		    sz_pind2sz(sz_psz2ind(size_class)));
+		assert_zu_eq(size_class, sz_pind2sz(sz_psz2ind(size_class)),
+		    "sz_pind2sz() does not reverse sz_psz2ind(): pind=%u -->"
 		    " size_class=%zu --> pind=%u --> size_class=%zu", pind,
-		    size_class, psz2ind(size_class),
-		    pind2sz(psz2ind(size_class)));
+		    size_class, sz_psz2ind(size_class),
+		    sz_pind2sz(sz_psz2ind(size_class)));
 
-		assert_u_eq(pind+1, psz2ind(size_class+1),
+		assert_u_eq(pind+1, sz_psz2ind(size_class+1),
 		    "Next size_class does not round up properly");
 
 		assert_zu_eq(size_class, (pind > 0) ?
-		    psz2u(pind2sz(pind-1)+1) : psz2u(1),
-		    "psz2u() does not round up to size class");
-		assert_zu_eq(size_class, psz2u(size_class-1),
-		    "psz2u() does not round up to size class");
-		assert_zu_eq(size_class, psz2u(size_class),
-		    "psz2u() does not compute same size class");
-		assert_zu_eq(psz2u(size_class+1), pind2sz(pind+1),
-		    "psz2u() does not round up to next size class");
+		    sz_psz2u(sz_pind2sz(pind-1)+1) : sz_psz2u(1),
+		    "sz_psz2u() does not round up to size class");
+		assert_zu_eq(size_class, sz_psz2u(size_class-1),
+		    "sz_psz2u() does not round up to size class");
+		assert_zu_eq(size_class, sz_psz2u(size_class),
+		    "sz_psz2u() does not compute same size class");
+		assert_zu_eq(sz_psz2u(size_class+1), sz_pind2sz(pind+1),
+		    "sz_psz2u() does not round up to next size class");
 	}
 
-	assert_u_eq(pind, psz2ind(pind2sz(pind)),
-	    "psz2ind() does not reverse pind2sz()");
-	assert_zu_eq(max_psz, pind2sz(psz2ind(max_psz)),
-	    "pind2sz() does not reverse psz2ind()");
+	assert_u_eq(pind, sz_psz2ind(sz_pind2sz(pind)),
+	    "sz_psz2ind() does not reverse sz_pind2sz()");
+	assert_zu_eq(max_psz, sz_pind2sz(sz_psz2ind(max_psz)),
+	    "sz_pind2sz() does not reverse sz_psz2ind()");
 
-	assert_zu_eq(size_class, psz2u(pind2sz(pind-1)+1),
-	    "psz2u() does not round up to size class");
-	assert_zu_eq(size_class, psz2u(size_class-1),
-	    "psz2u() does not round up to size class");
-	assert_zu_eq(size_class, psz2u(size_class),
-	    "psz2u() does not compute same size class");
+	assert_zu_eq(size_class, sz_psz2u(sz_pind2sz(pind-1)+1),
+	    "sz_psz2u() does not round up to size class");
+	assert_zu_eq(size_class, sz_psz2u(size_class-1),
+	    "sz_psz2u() does not round up to size class");
+	assert_zu_eq(size_class, sz_psz2u(size_class),
+	    "sz_psz2u() does not compute same size class");
 }
 TEST_END
 
@@ -139,35 +142,35 @@ TEST_BEGIN(test_overflow) {
 	max_size_class = get_max_size_class();
 	max_psz = max_size_class + PAGE;
 
-	assert_u_eq(size2index(max_size_class+1), NSIZES,
-	    "size2index() should return NSIZES on overflow");
-	assert_u_eq(size2index(ZU(PTRDIFF_MAX)+1), NSIZES,
-	    "size2index() should return NSIZES on overflow");
-	assert_u_eq(size2index(SIZE_T_MAX), NSIZES,
-	    "size2index() should return NSIZES on overflow");
+	assert_u_eq(sz_size2index(max_size_class+1), NSIZES,
+	    "sz_size2index() should return NSIZES on overflow");
+	assert_u_eq(sz_size2index(ZU(PTRDIFF_MAX)+1), NSIZES,
+	    "sz_size2index() should return NSIZES on overflow");
+	assert_u_eq(sz_size2index(SIZE_T_MAX), NSIZES,
+	    "sz_size2index() should return NSIZES on overflow");
 
-	assert_zu_eq(s2u(max_size_class+1), 0,
-	    "s2u() should return 0 for unsupported size");
-	assert_zu_eq(s2u(ZU(PTRDIFF_MAX)+1), 0,
-	    "s2u() should return 0 for unsupported size");
-	assert_zu_eq(s2u(SIZE_T_MAX), 0,
-	    "s2u() should return 0 on overflow");
+	assert_zu_eq(sz_s2u(max_size_class+1), 0,
+	    "sz_s2u() should return 0 for unsupported size");
+	assert_zu_eq(sz_s2u(ZU(PTRDIFF_MAX)+1), 0,
+	    "sz_s2u() should return 0 for unsupported size");
+	assert_zu_eq(sz_s2u(SIZE_T_MAX), 0,
+	    "sz_s2u() should return 0 on overflow");
 
-	assert_u_eq(psz2ind(max_size_class+1), NPSIZES,
-	    "psz2ind() should return NPSIZES on overflow");
-	assert_u_eq(psz2ind(ZU(PTRDIFF_MAX)+1), NPSIZES,
-	    "psz2ind() should return NPSIZES on overflow");
-	assert_u_eq(psz2ind(SIZE_T_MAX), NPSIZES,
-	    "psz2ind() should return NPSIZES on overflow");
+	assert_u_eq(sz_psz2ind(max_size_class+1), NPSIZES,
+	    "sz_psz2ind() should return NPSIZES on overflow");
+	assert_u_eq(sz_psz2ind(ZU(PTRDIFF_MAX)+1), NPSIZES,
+	    "sz_psz2ind() should return NPSIZES on overflow");
+	assert_u_eq(sz_psz2ind(SIZE_T_MAX), NPSIZES,
+	    "sz_psz2ind() should return NPSIZES on overflow");
 
-	assert_zu_eq(psz2u(max_size_class+1), max_psz,
-	    "psz2u() should return (LARGE_MAXCLASS + PAGE) for unsupported"
+	assert_zu_eq(sz_psz2u(max_size_class+1), max_psz,
+	    "sz_psz2u() should return (LARGE_MAXCLASS + PAGE) for unsupported"
 	    " size");
-	assert_zu_eq(psz2u(ZU(PTRDIFF_MAX)+1), max_psz,
-	    "psz2u() should return (LARGE_MAXCLASS + PAGE) for unsupported "
+	assert_zu_eq(sz_psz2u(ZU(PTRDIFF_MAX)+1), max_psz,
+	    "sz_psz2u() should return (LARGE_MAXCLASS + PAGE) for unsupported "
 	    "size");
-	assert_zu_eq(psz2u(SIZE_T_MAX), max_psz,
-	    "psz2u() should return (LARGE_MAXCLASS + PAGE) on overflow");
+	assert_zu_eq(sz_psz2u(SIZE_T_MAX), max_psz,
+	    "sz_psz2u() should return (LARGE_MAXCLASS + PAGE) on overflow");
 }
 TEST_END
 

From 9a86c9bd30e06daa20e4a4872d9292d177d66c8a Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 31 May 2017 14:15:53 -0700
Subject: [PATCH 0908/2608] Clean source directory before building tests in
 object directories.

---
 scripts/gen_run_tests.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/gen_run_tests.py b/scripts/gen_run_tests.py
index 1d70057f..39e2be24 100755
--- a/scripts/gen_run_tests.py
+++ b/scripts/gen_run_tests.py
@@ -31,6 +31,7 @@ possible_malloc_conf_opts = [
 ]
 
 print 'set -e'
+print 'if [ -f Makefile ] ; then make relclean ; fi'
 print 'autoconf'
 print 'rm -rf run_tests.out'
 print 'mkdir run_tests.out'

From 508f54b02bd08ac0d250df1fa15cf87d574ce8a1 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 30 May 2017 20:45:29 -0700
Subject: [PATCH 0909/2608] Use real pthread_create for creating background
 threads.

---
 src/background_thread.c | 10 ++++++++--
 src/mutex.c             |  2 +-
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/background_thread.c b/src/background_thread.c
index d3e80b3d..ccb50a21 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -364,7 +364,11 @@ background_thread_create(tsd_t *tsd, unsigned arena_ind) {
 	pre_reentrancy(tsd);
 	int err;
 	load_pthread_create_fptr();
-	if ((err = pthread_create(&info->thread, NULL,
+	/*
+	 * To avoid complications (besides reentrancy), create internal
+	 * background threads with the underlying pthread_create.
+	 */
+	if ((err = pthread_create_fptr(&info->thread, NULL,
 	    background_thread_entry, (void *)thread_ind)) != 0) {
 		malloc_printf("<jemalloc>: arena %u background thread creation "
 		    "failed (%d).\n", arena_ind, err);
@@ -645,7 +649,9 @@ load_pthread_create_fptr(void) {
 	if (pthread_create_fptr) {
 		return pthread_create_fptr;
 	}
-
+#ifdef JEMALLOC_LAZY_LOCK
+	isthreaded = true;
+#endif
 	pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
 	if (pthread_create_fptr == NULL) {
 		malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
diff --git a/src/mutex.c b/src/mutex.c
index 48e2940a..24852226 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -30,7 +30,7 @@ static malloc_mutex_t	*postponed_mutexes = NULL;
 static void
 pthread_create_once(void) {
 	pthread_create_fptr = load_pthread_create_fptr();
-	isthreaded = true;
+	assert(isthreaded);
 }
 
 JEMALLOC_EXPORT int

From fa35463d56be52a3a6e6b513fbb6cc6e63d9bcc7 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 31 May 2017 09:43:43 -0700
Subject: [PATCH 0910/2608] Witness assertions: only assert locklessness when
 non-reentrant.

Previously we could still hit these assertions down error paths or in the
extended API.
---
 src/jemalloc.c | 111 +++++++++++++++++++++++++++----------------------
 1 file changed, 62 insertions(+), 49 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 268d19c9..02e32dad 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -627,6 +627,31 @@ stats_print_atexit(void) {
 	je_malloc_stats_print(NULL, NULL, opt_stats_print_opts);
 }
 
+/*
+ * Ensure that we don't hold any locks upon entry to or exit from allocator
+ * code (in a "broad" sense that doesn't count a reentrant allocation as an
+ * entrance or exit).
+ */
+JEMALLOC_ALWAYS_INLINE void
+check_entry_exit_locking(tsdn_t *tsdn) {
+	if (!config_debug) {
+		return;
+	}
+	if (tsdn_null(tsdn)) {
+		return;
+	}
+	tsd_t *tsd = tsdn_tsd(tsdn);
+	/*
+	 * It's possible we hold locks at entry/exit if we're in a nested
+	 * allocation.
+	 */
+	int8_t reentrancy_level = tsd_reentrancy_level_get(tsd);
+	if (reentrancy_level != 0) {
+		return;
+	}
+	witness_assert_lockless(tsdn_witness_tsdp_get(tsdn));
+}
+
 /*
  * End miscellaneous support functions.
  */
@@ -1705,15 +1730,13 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		}
 	}
 
+	check_entry_exit_locking(tsd_tsdn(tsd));
 
 	/*
 	 * If we need to handle reentrancy, we can do it out of a
 	 * known-initialized arena (i.e. arena 0).
 	 */
 	reentrancy_level = tsd_reentrancy_level_get(tsd);
-	if (reentrancy_level == 0) {
-		witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
-	}
 	if (sopts->slow && unlikely(reentrancy_level > 0)) {
 		/*
 		 * We should never specify particular arenas or tcaches from
@@ -1788,9 +1811,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 	}
 
 	/* Success! */
-	if (reentrancy_level == 0) {
-		witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
-	}
+	check_entry_exit_locking(tsd_tsdn(tsd));
 	*dopts->result = allocation;
 	return 0;
 
@@ -1804,7 +1825,7 @@ label_oom:
 		UTRACE(NULL, size, NULL);
 	}
 
-	witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
+	check_entry_exit_locking(tsd_tsdn(tsd));
 
 	if (sopts->set_errno_on_error) {
 		set_errno(ENOMEM);
@@ -1835,7 +1856,7 @@ label_invalid_alignment:
 		UTRACE(NULL, size, NULL);
 	}
 
-	witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
+	check_entry_exit_locking(tsd_tsdn(tsd));
 
 	if (sopts->null_out_result_on_error) {
 		*dopts->result = NULL;
@@ -2036,9 +2057,8 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 	if (!slow_path) {
 		tsd_assert_fast(tsd);
 	}
-	if (tsd_reentrancy_level_get(tsd) == 0) {
-		witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
-	} else {
+	check_entry_exit_locking(tsd_tsdn(tsd));
+	if (tsd_reentrancy_level_get(tsd) != 0) {
 		assert(slow_path);
 	}
 
@@ -2076,9 +2096,8 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 	if (!slow_path) {
 		tsd_assert_fast(tsd);
 	}
-	if (tsd_reentrancy_level_get(tsd) == 0) {
-		witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
-	} else {
+	check_entry_exit_locking(tsd_tsdn(tsd));
+	if (tsd_reentrancy_level_get(tsd) != 0) {
 		assert(slow_path);
 	}
 
@@ -2138,7 +2157,7 @@ je_realloc(void *ptr, size_t size) {
 		assert(malloc_initialized() || IS_INITIALIZER);
 		tsd_t *tsd = tsd_fetch();
 
-		witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
+		check_entry_exit_locking(tsd_tsdn(tsd));
 
 		alloc_ctx_t alloc_ctx;
 		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
@@ -2181,7 +2200,7 @@ je_realloc(void *ptr, size_t size) {
 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
 	}
 	UTRACE(ptr, size, ret);
-	witness_assert_lockless(tsdn_witness_tsdp_get(tsdn));
+	check_entry_exit_locking(tsdn);
 	return ret;
 }
 
@@ -2190,10 +2209,7 @@ je_free(void *ptr) {
 	UTRACE(ptr, 0, 0);
 	if (likely(ptr != NULL)) {
 		tsd_t *tsd = tsd_fetch();
-		if (tsd_reentrancy_level_get(tsd) == 0) {
-			witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(
-			    tsd)));
-		}
+		check_entry_exit_locking(tsd_tsdn(tsd));
 
 		tcache_t *tcache;
 		if (likely(tsd_fast(tsd))) {
@@ -2209,10 +2225,7 @@ je_free(void *ptr) {
 			}
 			ifree(tsd, ptr, tcache, true);
 		}
-		if (tsd_reentrancy_level_get(tsd) == 0) {
-			witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(
-			    tsd)));
-		}
+		check_entry_exit_locking(tsd_tsdn(tsd));
 	}
 }
 
@@ -2472,7 +2485,7 @@ je_rallocx(void *ptr, size_t size, int flags) {
 	assert(size != 0);
 	assert(malloc_initialized() || IS_INITIALIZER);
 	tsd = tsd_fetch();
-	witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
+	check_entry_exit_locking(tsd_tsdn(tsd));
 
 	if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) {
 		unsigned arena_ind = MALLOCX_ARENA_GET(flags);
@@ -2529,7 +2542,7 @@ je_rallocx(void *ptr, size_t size, int flags) {
 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
 	}
 	UTRACE(ptr, size, p);
-	witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
+	check_entry_exit_locking(tsd_tsdn(tsd));
 	return p;
 label_oom:
 	if (config_xmalloc && unlikely(opt_xmalloc)) {
@@ -2537,7 +2550,7 @@ label_oom:
 		abort();
 	}
 	UTRACE(ptr, size, 0);
-	witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
+	check_entry_exit_locking(tsd_tsdn(tsd));
 	return NULL;
 }
 
@@ -2629,7 +2642,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	assert(SIZE_T_MAX - size >= extra);
 	assert(malloc_initialized() || IS_INITIALIZER);
 	tsd = tsd_fetch();
-	witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
+	check_entry_exit_locking(tsd_tsdn(tsd));
 
 	alloc_ctx_t alloc_ctx;
 	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
@@ -2672,7 +2685,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	}
 label_not_resized:
 	UTRACE(ptr, size, ptr);
-	witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
+	check_entry_exit_locking(tsd_tsdn(tsd));
 	return usize;
 }
 
@@ -2686,7 +2699,7 @@ je_sallocx(const void *ptr, int flags) {
 	assert(ptr != NULL);
 
 	tsdn = tsdn_fetch();
-	witness_assert_lockless(tsdn_witness_tsdp_get(tsdn));
+	check_entry_exit_locking(tsdn);
 
 	if (config_debug || force_ivsalloc) {
 		usize = ivsalloc(tsdn, ptr);
@@ -2695,7 +2708,7 @@ je_sallocx(const void *ptr, int flags) {
 		usize = isalloc(tsdn, ptr);
 	}
 
-	witness_assert_lockless(tsdn_witness_tsdp_get(tsdn));
+	check_entry_exit_locking(tsdn);
 	return usize;
 }
 
@@ -2706,7 +2719,7 @@ je_dallocx(void *ptr, int flags) {
 
 	tsd_t *tsd = tsd_fetch();
 	bool fast = tsd_fast(tsd);
-	witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
+	check_entry_exit_locking(tsd_tsdn(tsd));
 
 	tcache_t *tcache;
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
@@ -2737,12 +2750,12 @@ je_dallocx(void *ptr, int flags) {
 	} else {
 		ifree(tsd, ptr, tcache, true);
 	}
-	witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
+	check_entry_exit_locking(tsd_tsdn(tsd));
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
 inallocx(tsdn_t *tsdn, size_t size, int flags) {
-	witness_assert_lockless(tsdn_witness_tsdp_get(tsdn));
+	check_entry_exit_locking(tsdn);
 
 	size_t usize;
 	if (likely((flags & MALLOCX_LG_ALIGN_MASK) == 0)) {
@@ -2750,7 +2763,7 @@ inallocx(tsdn_t *tsdn, size_t size, int flags) {
 	} else {
 		usize = sz_sa2u(size, MALLOCX_ALIGN_GET_SPECIFIED(flags));
 	}
-	witness_assert_lockless(tsdn_witness_tsdp_get(tsdn));
+	check_entry_exit_locking(tsdn);
 	return usize;
 }
 
@@ -2763,7 +2776,7 @@ je_sdallocx(void *ptr, size_t size, int flags) {
 	bool fast = tsd_fast(tsd);
 	size_t usize = inallocx(tsd_tsdn(tsd), size, flags);
 	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
-	witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
+	check_entry_exit_locking(tsd_tsdn(tsd));
 
 	tcache_t *tcache;
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
@@ -2794,7 +2807,7 @@ je_sdallocx(void *ptr, size_t size, int flags) {
 	} else {
 		isfree(tsd, ptr, usize, tcache, true);
 	}
-	witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
+	check_entry_exit_locking(tsd_tsdn(tsd));
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
@@ -2810,14 +2823,14 @@ je_nallocx(size_t size, int flags) {
 	}
 
 	tsdn = tsdn_fetch();
-	witness_assert_lockless(tsdn_witness_tsdp_get(tsdn));
+	check_entry_exit_locking(tsdn);
 
 	usize = inallocx(tsdn, size, flags);
 	if (unlikely(usize > LARGE_MAXCLASS)) {
 		return 0;
 	}
 
-	witness_assert_lockless(tsdn_witness_tsdp_get(tsdn));
+	check_entry_exit_locking(tsdn);
 	return usize;
 }
 
@@ -2832,9 +2845,9 @@ je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
 	}
 
 	tsd = tsd_fetch();
-	witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
+	check_entry_exit_locking(tsd_tsdn(tsd));
 	ret = ctl_byname(tsd, name, oldp, oldlenp, newp, newlen);
-	witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
+	check_entry_exit_locking(tsd_tsdn(tsd));
 	return ret;
 }
 
@@ -2848,9 +2861,9 @@ je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) {
 	}
 
 	tsdn = tsdn_fetch();
-	witness_assert_lockless(tsdn_witness_tsdp_get(tsdn));
+	check_entry_exit_locking(tsdn);
 	ret = ctl_nametomib(tsdn, name, mibp, miblenp);
-	witness_assert_lockless(tsdn_witness_tsdp_get(tsdn));
+	check_entry_exit_locking(tsdn);
 	return ret;
 }
 
@@ -2865,9 +2878,9 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	}
 
 	tsd = tsd_fetch();
-	witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
+	check_entry_exit_locking(tsd_tsdn(tsd));
 	ret = ctl_bymib(tsd, mib, miblen, oldp, oldlenp, newp, newlen);
-	witness_assert_lockless(tsdn_witness_tsdp_get(tsd_tsdn(tsd)));
+	check_entry_exit_locking(tsd_tsdn(tsd));
 	return ret;
 }
 
@@ -2877,9 +2890,9 @@ je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	tsdn_t *tsdn;
 
 	tsdn = tsdn_fetch();
-	witness_assert_lockless(tsdn_witness_tsdp_get(tsdn));
+	check_entry_exit_locking(tsdn);
 	stats_print(write_cb, cbopaque, opts);
-	witness_assert_lockless(tsdn_witness_tsdp_get(tsdn));
+	check_entry_exit_locking(tsdn);
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
@@ -2890,7 +2903,7 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) {
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	tsdn = tsdn_fetch();
-	witness_assert_lockless(tsdn_witness_tsdp_get(tsdn));
+	check_entry_exit_locking(tsdn);
 
 	if (unlikely(ptr == NULL)) {
 		ret = 0;
@@ -2903,7 +2916,7 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) {
 		}
 	}
 
-	witness_assert_lockless(tsdn_witness_tsdp_get(tsdn));
+	check_entry_exit_locking(tsdn);
 	return ret;
 }
 

From 596b479d839d9f85538a6ff756a81e1ef8d4abb3 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 31 May 2017 21:34:26 -0700
Subject: [PATCH 0911/2608] Skip default tcache testing if !opt_tcache.

---
 test/unit/mallctl.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 8339e8c5..80b84a06 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -210,12 +210,12 @@ TEST_BEGIN(test_manpage_example) {
 TEST_END
 
 TEST_BEGIN(test_tcache_none) {
-	void *p0, *q, *p1;
+	test_skip_if(!opt_tcache);
 
 	/* Allocate p and q. */
-	p0 = mallocx(42, 0);
+	void *p0 = mallocx(42, 0);
 	assert_ptr_not_null(p0, "Unexpected mallocx() failure");
-	q = mallocx(42, 0);
+	void *q = mallocx(42, 0);
 	assert_ptr_not_null(q, "Unexpected mallocx() failure");
 
 	/* Deallocate p and q, but bypass the tcache for q. */
@@ -223,7 +223,7 @@ TEST_BEGIN(test_tcache_none) {
 	dallocx(q, MALLOCX_TCACHE_NONE);
 
 	/* Make sure that tcache-based allocation returns p, not q. */
-	p1 = mallocx(42, 0);
+	void *p1 = mallocx(42, 0);
 	assert_ptr_not_null(p1, "Unexpected mallocx() failure");
 	assert_ptr_eq(p0, p1, "Expected tcache to allocate cached region");
 

From b511232fcd8aeb85d5dc8e0515539baa5d333991 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 31 May 2017 16:45:14 -0700
Subject: [PATCH 0912/2608] Refactor/fix background_thread/percpu_arena
 bootstrapping.

Refactor bootstrapping such that dlsym() is called during the
bootstrapping phase that can tolerate reentrant allocation.
---
 include/jemalloc/internal/arena_externs.h     |   5 +-
 include/jemalloc/internal/arena_types.h       |  24 +++-
 .../internal/background_thread_externs.h      |   8 +-
 .../internal/background_thread_structs.h      |   4 +
 .../internal/jemalloc_internal_inlines_a.h    |  18 +--
 .../internal/jemalloc_internal_inlines_b.h    |   7 +-
 src/arena.c                                   |   9 +-
 src/background_thread.c                       | 116 ++++++++++--------
 src/ctl.c                                     |   7 +-
 src/jemalloc.c                                |  63 ++++++----
 src/mutex.c                                   |  12 +-
 test/unit/mallctl.c                           |  12 +-
 12 files changed, 161 insertions(+), 124 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index cfb7c6fb..9ad9786f 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -11,9 +11,8 @@ extern ssize_t opt_muzzy_decay_ms;
 
 extern const arena_bin_info_t arena_bin_info[NBINS];
 
-extern percpu_arena_mode_t percpu_arena_mode;
-extern const char	*opt_percpu_arena;
-extern const char	*percpu_arena_mode_names[];
+extern percpu_arena_mode_t opt_percpu_arena;
+extern const char *percpu_arena_mode_names[];
 
 extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS];
 
diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h
index 01b9096a..a691bd81 100644
--- a/include/jemalloc/internal/arena_types.h
+++ b/include/jemalloc/internal/arena_types.h
@@ -20,14 +20,26 @@ typedef struct arena_tdata_s arena_tdata_t;
 typedef struct alloc_ctx_s alloc_ctx_t;
 
 typedef enum {
-	percpu_arena_disabled = 0,
-	percpu_arena = 1,
-	per_phycpu_arena = 2, /* i.e. hyper threads share arena. */
+	percpu_arena_mode_names_base   = 0, /* Used for options processing. */
 
-	percpu_arena_mode_limit = 3
+	/*
+	 * *_uninit are used only during bootstrapping, and must correspond
+	 * to initialized variant plus percpu_arena_mode_enabled_base.
+	 */
+	percpu_arena_uninit            = 0,
+	per_phycpu_arena_uninit        = 1,
+
+	/* All non-disabled modes must come after percpu_arena_disabled. */
+	percpu_arena_disabled          = 2,
+
+	percpu_arena_mode_names_limit  = 3, /* Used for options processing. */
+	percpu_arena_mode_enabled_base = 3,
+
+	percpu_arena                   = 3,
+	per_phycpu_arena               = 4  /* Hyper threads share arena. */
 } percpu_arena_mode_t;
 
-#define PERCPU_ARENA_MODE_DEFAULT	percpu_arena_disabled
-#define OPT_PERCPU_ARENA_DEFAULT	"disabled"
+#define PERCPU_ARENA_ENABLED(m)	((m) >= percpu_arena_mode_enabled_base)
+#define PERCPU_ARENA_DEFAULT	percpu_arena_disabled
 
 #endif /* JEMALLOC_INTERNAL_ARENA_TYPES_H */
diff --git a/include/jemalloc/internal/background_thread_externs.h b/include/jemalloc/internal/background_thread_externs.h
index fe25acfe..a2d95a73 100644
--- a/include/jemalloc/internal/background_thread_externs.h
+++ b/include/jemalloc/internal/background_thread_externs.h
@@ -8,7 +8,6 @@ extern size_t n_background_threads;
 extern background_thread_info_t *background_thread_info;
 
 bool background_thread_create(tsd_t *tsd, unsigned arena_ind);
-bool background_threads_init(tsd_t *tsd);
 bool background_threads_enable(tsd_t *tsd);
 bool background_threads_disable(tsd_t *tsd);
 bool background_threads_disable_single(tsd_t *tsd,
@@ -22,10 +21,11 @@ void background_thread_postfork_child(tsdn_t *tsdn);
 bool background_thread_stats_read(tsdn_t *tsdn,
     background_thread_stats_t *stats);
 
-#if defined(JEMALLOC_BACKGROUND_THREAD) || defined(JEMALLOC_LAZY_LOCK)
-extern int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
+#ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
+extern int pthread_create_wrapper(pthread_t *__restrict, const pthread_attr_t *,
     void *(*)(void *), void *__restrict);
-void *load_pthread_create_fptr(void);
 #endif
+bool background_thread_boot0(void);
+bool background_thread_boot1(tsdn_t *tsdn);
 
 #endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H */
diff --git a/include/jemalloc/internal/background_thread_structs.h b/include/jemalloc/internal/background_thread_structs.h
index 9507abcd..edf90fe2 100644
--- a/include/jemalloc/internal/background_thread_structs.h
+++ b/include/jemalloc/internal/background_thread_structs.h
@@ -3,6 +3,10 @@
 
 /* This file really combines "structs" and "types", but only transitionally. */
 
+#if defined(JEMALLOC_BACKGROUND_THREAD) || defined(JEMALLOC_LAZY_LOCK)
+#  define JEMALLOC_PTHREAD_CREATE_WRAPPER
+#endif
+
 #define BACKGROUND_THREAD_INDEFINITE_SLEEP UINT64_MAX
 
 struct background_thread_info_s {
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index d0bf2eee..854fb1e2 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -21,17 +21,17 @@ malloc_getcpu(void) {
 /* Return the chosen arena index based on current cpu. */
 JEMALLOC_ALWAYS_INLINE unsigned
 percpu_arena_choose(void) {
-	unsigned arena_ind;
-	assert(have_percpu_arena && (percpu_arena_mode !=
-	    percpu_arena_disabled));
+	assert(have_percpu_arena && PERCPU_ARENA_ENABLED(opt_percpu_arena));
 
 	malloc_cpuid_t cpuid = malloc_getcpu();
 	assert(cpuid >= 0);
-	if ((percpu_arena_mode == percpu_arena) ||
-	    ((unsigned)cpuid < ncpus / 2)) {
+
+	unsigned arena_ind;
+	if ((opt_percpu_arena == percpu_arena) || ((unsigned)cpuid < ncpus /
+	    2)) {
 		arena_ind = cpuid;
 	} else {
-		assert(percpu_arena_mode == per_phycpu_arena);
+		assert(opt_percpu_arena == per_phycpu_arena);
 		/* Hyper threads on the same physical CPU share arena. */
 		arena_ind = cpuid - ncpus / 2;
 	}
@@ -41,9 +41,9 @@ percpu_arena_choose(void) {
 
 /* Return the limit of percpu auto arena range, i.e. arenas[0...ind_limit). */
 JEMALLOC_ALWAYS_INLINE unsigned
-percpu_arena_ind_limit(void) {
-	assert(have_percpu_arena && (percpu_arena_mode != percpu_arena_disabled));
-	if (percpu_arena_mode == per_phycpu_arena && ncpus > 1) {
+percpu_arena_ind_limit(percpu_arena_mode_t mode) {
+	assert(have_percpu_arena && PERCPU_ARENA_ENABLED(mode));
+	if (mode == per_phycpu_arena && ncpus > 1) {
 		if (ncpus % 2) {
 			/* This likely means a misconfig. */
 			return ncpus / 2 + 1;
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index 37493160..2e76e5d8 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -43,9 +43,10 @@ arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {
 	 * auto percpu arena range, (i.e. thread is assigned to a manually
 	 * managed arena), then percpu arena is skipped.
 	 */
-	if (have_percpu_arena && (percpu_arena_mode != percpu_arena_disabled) &&
-	    !internal && (arena_ind_get(ret) < percpu_arena_ind_limit()) &&
-	    (ret->last_thd != tsd_tsdn(tsd))) {
+	if (have_percpu_arena && PERCPU_ARENA_ENABLED(opt_percpu_arena) &&
+	    !internal && (arena_ind_get(ret) <
+	    percpu_arena_ind_limit(opt_percpu_arena)) && (ret->last_thd !=
+	    tsd_tsdn(tsd))) {
 		unsigned ind = percpu_arena_choose();
 		if (arena_ind_get(ret) != ind) {
 			percpu_arena_update(tsd, ind);
diff --git a/src/arena.c b/src/arena.c
index 151aad3e..dedbb3e3 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -13,13 +13,18 @@
 /******************************************************************************/
 /* Data. */
 
+/*
+ * Define names for both unininitialized and initialized phases, so that
+ * options and mallctl processing are straightforward.
+ */
 const char *percpu_arena_mode_names[] = {
+	"percpu",
+	"phycpu",
 	"disabled",
 	"percpu",
 	"phycpu"
 };
-const char *opt_percpu_arena = OPT_PERCPU_ARENA_DEFAULT;
-percpu_arena_mode_t percpu_arena_mode = PERCPU_ARENA_MODE_DEFAULT;
+percpu_arena_mode_t opt_percpu_arena = PERCPU_ARENA_DEFAULT;
 
 ssize_t opt_dirty_decay_ms = DIRTY_DECAY_MS_DEFAULT;
 ssize_t opt_muzzy_decay_ms = MUZZY_DECAY_MS_DEFAULT;
diff --git a/src/background_thread.c b/src/background_thread.c
index ccb50a21..800526f5 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -25,7 +25,6 @@ background_thread_info_t *background_thread_info;
 #ifndef JEMALLOC_BACKGROUND_THREAD
 #define NOT_REACHED { not_reached(); }
 bool background_thread_create(tsd_t *tsd, unsigned arena_ind) NOT_REACHED
-bool background_threads_init(tsd_t *tsd) NOT_REACHED
 bool background_threads_enable(tsd_t *tsd) NOT_REACHED
 bool background_threads_disable(tsd_t *tsd) NOT_REACHED
 bool background_threads_disable_single(tsd_t *tsd,
@@ -53,44 +52,6 @@ background_thread_info_reinit(tsdn_t *tsdn, background_thread_info_t *info) {
 	}
 }
 
-bool
-background_threads_init(tsd_t *tsd) {
-	assert(have_background_thread);
-	assert(narenas_total_get() > 0);
-
-	background_thread_enabled_set(tsd_tsdn(tsd), opt_background_thread);
-	if (malloc_mutex_init(&background_thread_lock,
-	    "background_thread_global",
-	    WITNESS_RANK_BACKGROUND_THREAD_GLOBAL,
-	    malloc_mutex_rank_exclusive)) {
-		return true;
-	}
-	background_thread_info = (background_thread_info_t *)base_alloc(
-	    tsd_tsdn(tsd), b0get(), ncpus * sizeof(background_thread_info_t),
-	    CACHELINE);
-	if (background_thread_info == NULL) {
-		return true;
-	}
-
-	for (unsigned i = 0; i < ncpus; i++) {
-		background_thread_info_t *info = &background_thread_info[i];
-		if (malloc_mutex_init(&info->mtx, "background_thread",
-		    WITNESS_RANK_BACKGROUND_THREAD,
-		    malloc_mutex_rank_exclusive)) {
-			return true;
-		}
-		if (pthread_cond_init(&info->cond, NULL)) {
-			return true;
-		}
-		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
-		info->started = false;
-		background_thread_info_reinit(tsd_tsdn(tsd), info);
-		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
-	}
-
-	return false;
-}
-
 static inline bool
 set_current_thread_affinity(UNUSED int cpu) {
 #if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
@@ -363,12 +324,11 @@ background_thread_create(tsd_t *tsd, unsigned arena_ind) {
 
 	pre_reentrancy(tsd);
 	int err;
-	load_pthread_create_fptr();
 	/*
 	 * To avoid complications (besides reentrancy), create internal
 	 * background threads with the underlying pthread_create.
 	 */
-	if ((err = pthread_create_fptr(&info->thread, NULL,
+	if ((err = pthread_create_wrapper(&info->thread, NULL,
 	    background_thread_entry, (void *)thread_ind)) != 0) {
 		malloc_printf("<jemalloc>: arena %u background thread creation "
 		    "failed (%d).\n", arena_ind, err);
@@ -638,28 +598,84 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
 
 #endif /* defined(JEMALLOC_BACKGROUND_THREAD) */
 
-#if defined(JEMALLOC_BACKGROUND_THREAD) || defined(JEMALLOC_LAZY_LOCK)
+#ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
 #include <dlfcn.h>
 
-int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
+static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
     void *(*)(void *), void *__restrict);
 
-void *
-load_pthread_create_fptr(void) {
-	if (pthread_create_fptr) {
-		return pthread_create_fptr;
-	}
+static void
+pthread_create_wrapper_once(void) {
 #ifdef JEMALLOC_LAZY_LOCK
 	isthreaded = true;
 #endif
+}
+
+int
+pthread_create_wrapper(pthread_t *__restrict thread, const pthread_attr_t *attr,
+    void *(*start_routine)(void *), void *__restrict arg) {
+	static pthread_once_t once_control = PTHREAD_ONCE_INIT;
+
+	pthread_once(&once_control, pthread_create_wrapper_once);
+
+	return pthread_create_fptr(thread, attr, start_routine, arg);
+}
+#endif
+
+bool
+background_thread_boot0(void) {
+	if (!have_background_thread && opt_background_thread) {
+		malloc_printf("<jemalloc>: option background_thread currently "
+		    "supports pthread only. \n");
+		return true;
+	}
+
+#ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
 	pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
 	if (pthread_create_fptr == NULL) {
 		malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
 		    "\"pthread_create\")\n");
 		abort();
 	}
-
-	return pthread_create_fptr;
+#endif
+	return false;
 }
 
+bool
+background_thread_boot1(tsdn_t *tsdn) {
+#ifdef JEMALLOC_BACKGROUND_THREAD
+	assert(have_background_thread);
+	assert(narenas_total_get() > 0);
+
+	background_thread_enabled_set(tsdn, opt_background_thread);
+	if (malloc_mutex_init(&background_thread_lock,
+	    "background_thread_global",
+	    WITNESS_RANK_BACKGROUND_THREAD_GLOBAL,
+	    malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+	background_thread_info = (background_thread_info_t *)base_alloc(tsdn,
+	    b0get(), ncpus * sizeof(background_thread_info_t), CACHELINE);
+	if (background_thread_info == NULL) {
+		return true;
+	}
+
+	for (unsigned i = 0; i < ncpus; i++) {
+		background_thread_info_t *info = &background_thread_info[i];
+		if (malloc_mutex_init(&info->mtx, "background_thread",
+		    WITNESS_RANK_BACKGROUND_THREAD,
+		    malloc_mutex_rank_exclusive)) {
+			return true;
+		}
+		if (pthread_cond_init(&info->cond, NULL)) {
+			return true;
+		}
+		malloc_mutex_lock(tsdn, &info->mtx);
+		info->started = false;
+		background_thread_info_reinit(tsdn, info);
+		malloc_mutex_unlock(tsdn, &info->mtx);
+	}
 #endif
+
+	return false;
+}
diff --git a/src/ctl.c b/src/ctl.c
index 1520c508..70059886 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1559,7 +1559,8 @@ CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
 CTL_RO_NL_GEN(opt_retain, opt_retain, bool)
 CTL_RO_NL_GEN(opt_dss, opt_dss, const char *)
 CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned)
-CTL_RO_NL_GEN(opt_percpu_arena, opt_percpu_arena, const char *)
+CTL_RO_NL_GEN(opt_percpu_arena, percpu_arena_mode_names[opt_percpu_arena],
+    const char *)
 CTL_RO_NL_GEN(opt_background_thread, opt_background_thread, bool)
 CTL_RO_NL_GEN(opt_dirty_decay_ms, opt_dirty_decay_ms, ssize_t)
 CTL_RO_NL_GEN(opt_muzzy_decay_ms, opt_muzzy_decay_ms, ssize_t)
@@ -1610,8 +1611,8 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 		}
 
 		if (have_percpu_arena &&
-		    (percpu_arena_mode != percpu_arena_disabled)) {
-			if (newind < percpu_arena_ind_limit()) {
+		    PERCPU_ARENA_ENABLED(opt_percpu_arena)) {
+			if (newind < percpu_arena_ind_limit(opt_percpu_arena)) {
 				/*
 				 * If perCPU arena is enabled, thread_arena
 				 * control is not allowed for the auto arena
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 02e32dad..c3983a5d 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -463,7 +463,7 @@ arena_t *
 arena_choose_hard(tsd_t *tsd, bool internal) {
 	arena_t *ret JEMALLOC_CC_SILENCE_INIT(NULL);
 
-	if (have_percpu_arena && percpu_arena_mode != percpu_arena_disabled) {
+	if (have_percpu_arena && PERCPU_ARENA_ENABLED(opt_percpu_arena)) {
 		unsigned choose = percpu_arena_choose();
 		ret = arena_get(tsd_tsdn(tsd), choose, true);
 		assert(ret != NULL);
@@ -1100,17 +1100,16 @@ malloc_conf_init(void) {
 			if (strncmp("percpu_arena", k, klen) == 0) {
 				int i;
 				bool match = false;
-				for (i = 0; i < percpu_arena_mode_limit; i++) {
+				for (i = percpu_arena_mode_names_base; i <
+				    percpu_arena_mode_names_limit; i++) {
 					if (strncmp(percpu_arena_mode_names[i],
-						    v, vlen) == 0) {
+					    v, vlen) == 0) {
 						if (!have_percpu_arena) {
 							malloc_conf_error(
 							    "No getcpu support",
 							    k, klen, v, vlen);
 						}
-						percpu_arena_mode = i;
-						opt_percpu_arena =
-						    percpu_arena_mode_names[i];
+						opt_percpu_arena = i;
 						match = true;
 						break;
 					}
@@ -1276,6 +1275,10 @@ malloc_init_hard_recursible(void) {
 	}
 #endif
 
+	if (background_thread_boot0()) {
+		return true;
+	}
+
 	return false;
 }
 
@@ -1293,13 +1296,25 @@ malloc_narenas_default(void) {
 	}
 }
 
+static percpu_arena_mode_t
+percpu_arena_as_initialized(percpu_arena_mode_t mode) {
+	assert(!malloc_initialized());
+	assert(mode <= percpu_arena_disabled);
+
+	if (mode != percpu_arena_disabled) {
+		mode += percpu_arena_mode_enabled_base;
+	}
+
+	return mode;
+}
+
 static bool
 malloc_init_narenas(void) {
 	assert(ncpus > 0);
 
-	if (percpu_arena_mode != percpu_arena_disabled) {
+	if (opt_percpu_arena != percpu_arena_disabled) {
 		if (!have_percpu_arena || malloc_getcpu() < 0) {
-			percpu_arena_mode = percpu_arena_disabled;
+			opt_percpu_arena = percpu_arena_disabled;
 			malloc_printf("<jemalloc>: perCPU arena getcpu() not "
 			    "available. Setting narenas to %u.\n", opt_narenas ?
 			    opt_narenas : malloc_narenas_default());
@@ -1315,8 +1330,9 @@ malloc_init_narenas(void) {
 				}
 				return true;
 			}
-			if ((percpu_arena_mode == per_phycpu_arena) &&
-			    (ncpus % 2 != 0)) {
+			/* NB: opt_percpu_arena isn't fully initialized yet. */
+			if (percpu_arena_as_initialized(opt_percpu_arena) ==
+			    per_phycpu_arena && ncpus % 2 != 0) {
 				malloc_printf("<jemalloc>: invalid "
 				    "configuration -- per physical CPU arena "
 				    "with odd number (%u) of CPUs (no hyper "
@@ -1324,7 +1340,8 @@ malloc_init_narenas(void) {
 				if (opt_abort)
 					abort();
 			}
-			unsigned n = percpu_arena_ind_limit();
+			unsigned n = percpu_arena_ind_limit(
+			    percpu_arena_as_initialized(opt_percpu_arena));
 			if (opt_narenas < n) {
 				/*
 				 * If narenas is specified with percpu_arena
@@ -1363,26 +1380,16 @@ malloc_init_narenas(void) {
 	return false;
 }
 
-static bool
-malloc_init_background_threads(tsd_t *tsd) {
-	malloc_mutex_assert_owner(tsd_tsdn(tsd), &init_lock);
-	if (!have_background_thread) {
-		if (opt_background_thread) {
-			malloc_printf("<jemalloc>: option background_thread "
-			    "currently supports pthread only. \n");
-			return true;
-		} else {
-			return false;
-		}
-	}
-
-	return background_threads_init(tsd);
+static void
+malloc_init_percpu(void) {
+	opt_percpu_arena = percpu_arena_as_initialized(opt_percpu_arena);
 }
 
 static bool
 malloc_init_hard_finish(void) {
-	if (malloc_mutex_boot())
+	if (malloc_mutex_boot()) {
 		return true;
+	}
 
 	malloc_init_state = malloc_init_initialized;
 	malloc_slow_flag_init();
@@ -1421,7 +1428,7 @@ malloc_init_hard(void) {
 	malloc_mutex_lock(tsd_tsdn(tsd), &init_lock);
 
 	/* Initialize narenas before prof_boot2 (for allocation). */
-	if (malloc_init_narenas() || malloc_init_background_threads(tsd)) {
+	if (malloc_init_narenas() || background_thread_boot1(tsd_tsdn(tsd))) {
 		malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
 		return true;
 	}
@@ -1431,6 +1438,8 @@ malloc_init_hard(void) {
 		return true;
 	}
 
+	malloc_init_percpu();
+
 	if (malloc_init_hard_finish()) {
 		malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
 		return true;
diff --git a/src/mutex.c b/src/mutex.c
index 24852226..a528ef0c 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -27,21 +27,11 @@ static malloc_mutex_t	*postponed_mutexes = NULL;
  */
 
 #if defined(JEMALLOC_LAZY_LOCK) && !defined(_WIN32)
-static void
-pthread_create_once(void) {
-	pthread_create_fptr = load_pthread_create_fptr();
-	assert(isthreaded);
-}
-
 JEMALLOC_EXPORT int
 pthread_create(pthread_t *__restrict thread,
     const pthread_attr_t *__restrict attr, void *(*start_routine)(void *),
     void *__restrict arg) {
-	static pthread_once_t once_control = PTHREAD_ONCE_INIT;
-
-	pthread_once(&once_control, pthread_create_once);
-
-	return pthread_create_fptr(thread, attr, start_routine, arg);
+	return pthread_create_wrapper(thread, attr, start_routine, arg);
 }
 #endif
 
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 80b84a06..f6116549 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -327,18 +327,18 @@ TEST_END
 
 TEST_BEGIN(test_thread_arena) {
 	unsigned old_arena_ind, new_arena_ind, narenas;
-	const char *opt_percpu_arena;
 
-	size_t sz = sizeof(opt_percpu_arena);
-	assert_d_eq(mallctl("opt.percpu_arena", &opt_percpu_arena, &sz, NULL,
-	    0), 0, "Unexpected mallctl() failure");
+	const char *opa;
+	size_t sz = sizeof(opa);
+	assert_d_eq(mallctl("opt.percpu_arena", &opa, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
 
 	sz = sizeof(unsigned);
 	assert_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
 	assert_u_eq(narenas, opt_narenas, "Number of arenas incorrect");
 
-	if (strcmp(opt_percpu_arena, "disabled") == 0) {
+	if (strcmp(opa, "disabled") == 0) {
 		new_arena_ind = narenas - 1;
 		assert_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
 		    (void *)&new_arena_ind, sizeof(unsigned)), 0,
@@ -350,7 +350,7 @@ TEST_BEGIN(test_thread_arena) {
 	} else {
 		assert_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
 		    NULL, 0), 0, "Unexpected mallctl() failure");
-		new_arena_ind = percpu_arena_ind_limit() - 1;
+		new_arena_ind = percpu_arena_ind_limit(opt_percpu_arena) - 1;
 		if (old_arena_ind != new_arena_ind) {
 			assert_d_eq(mallctl("thread.arena",
 			    (void *)&old_arena_ind, &sz, (void *)&new_arena_ind,

From fd0fa003e188e94beab8871ff0c17ea4a8a2c706 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 31 May 2017 11:56:31 -0700
Subject: [PATCH 0913/2608] Test with background_thread:true.

Add testing for background_thread:true, and condition a xallocx() -->
rallocx() escalation assertion to allow for spurious in-place rallocx()
following xallocx() failure.
---
 .travis.yml              | 22 ++++++++++++++++++++++
 scripts/gen_run_tests.py |  4 +++-
 scripts/gen_travis.py    |  4 +++-
 test/unit/junk.c         | 11 +++++++----
 4 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 917314fa..418fc6fd 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -26,6 +26,8 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
@@ -54,6 +56,8 @@ matrix:
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
       addons:
@@ -90,6 +94,12 @@ matrix:
         apt:
           packages:
             - gcc-multilib
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      addons:
+        apt:
+          packages:
+            - gcc-multilib
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
@@ -100,6 +110,8 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
@@ -108,18 +120,28 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
 
 
 before_script:
diff --git a/scripts/gen_run_tests.py b/scripts/gen_run_tests.py
index 39e2be24..ddf21533 100755
--- a/scripts/gen_run_tests.py
+++ b/scripts/gen_run_tests.py
@@ -28,6 +28,7 @@ possible_malloc_conf_opts = [
     'tcache:false',
     'dss:primary',
     'percpu_arena:percpu',
+    'background_thread:true',
 ]
 
 print 'set -e'
@@ -57,7 +58,8 @@ for cc, cxx in possible_compilers:
                 )
 
                 # Per CPU arenas are only supported on Linux.
-                linux_supported = ('percpu_arena:percpu' in malloc_conf_opts)
+                linux_supported = ('percpu_arena:percpu' in malloc_conf_opts \
+                  or 'background_thread:true' in malloc_conf_opts)
                 # Heap profiling and dss are not supported on OS X.
                 darwin_unsupported = ('--enable-prof' in config_opts or \
                   'dss:primary' in malloc_conf_opts)
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index 26997b25..6dd39290 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -49,6 +49,7 @@ malloc_conf_unusuals = [
     'tcache:false',
     'dss:primary',
     'percpu_arena:percpu',
+    'background_thread:true',
 ]
 
 all_unusuals = (
@@ -80,7 +81,8 @@ for unusual_combination in unusual_combinations_to_test:
         x for x in unusual_combination if x in malloc_conf_unusuals]
     # Filter out unsupported configurations on OS X.
     if os == 'osx' and ('dss:primary' in malloc_conf or \
-      'percpu_arena:percpu' in malloc_conf):
+      'percpu_arena:percpu' in malloc_conf or 'background_thread:true' \
+      in malloc_conf):
         continue
     if len(malloc_conf) > 0:
         configure_flags.append('--with-malloc-conf=' + ",".join(malloc_conf))
diff --git a/test/unit/junk.c b/test/unit/junk.c
index f9390e41..fd0e65b1 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -96,12 +96,15 @@ test_junk(size_t sz_min, size_t sz_max) {
 			t = (uint8_t *)rallocx(s, sz+1, 0);
 			assert_ptr_not_null((void *)t,
 			    "Unexpected rallocx() failure");
-			assert_ptr_ne(s, t, "Unexpected in-place rallocx()");
 			assert_zu_ge(sallocx(t, 0), sz+1,
 			    "Unexpectedly small rallocx() result");
-			assert_true(!opt_junk_free || saw_junking,
-			    "Expected region of size %zu to be junk-filled",
-			    sz);
+			if (!background_thread_enabled()) {
+				assert_ptr_ne(s, t,
+				    "Unexpected in-place rallocx()");
+				assert_true(!opt_junk_free || saw_junking,
+				    "Expected region of size %zu to be "
+				    "junk-filled", sz);
+			}
 			s = t;
 		}
 	}

From c84ec3e9da66162943ee33afe73c7f898fa134e2 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 1 Jun 2017 00:04:56 -0700
Subject: [PATCH 0914/2608] Fix background thread creation.

The state initialization should be done before pthread_create.
---
 src/background_thread.c | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/src/background_thread.c b/src/background_thread.c
index 800526f5..a89cad2e 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -317,34 +317,38 @@ background_thread_create(tsd_t *tsd, unsigned arena_ind) {
 	bool need_new_thread;
 	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
 	need_new_thread = background_thread_enabled() && !info->started;
+	if (need_new_thread) {
+		info->started = true;
+		background_thread_info_reinit(tsd_tsdn(tsd), info);
+		n_background_threads++;
+	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
 	if (!need_new_thread) {
 		return false;
 	}
 
 	pre_reentrancy(tsd);
-	int err;
 	/*
 	 * To avoid complications (besides reentrancy), create internal
 	 * background threads with the underlying pthread_create.
 	 */
-	if ((err = pthread_create_wrapper(&info->thread, NULL,
-	    background_thread_entry, (void *)thread_ind)) != 0) {
-		malloc_printf("<jemalloc>: arena %u background thread creation "
-		    "failed (%d).\n", arena_ind, err);
-	}
+	int err = pthread_create_wrapper(&info->thread, NULL,
+	    background_thread_entry, (void *)thread_ind);
 	post_reentrancy(tsd);
 
-	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
-	assert(info->started == false);
-	if (err == 0) {
-		info->started = true;
-		background_thread_info_reinit(tsd_tsdn(tsd), info);
-		n_background_threads++;
-	}
-	malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
+	if (err != 0) {
+		malloc_printf("<jemalloc>: arena %u background thread creation "
+		    "failed (%d).\n", arena_ind, err);
+		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+		info->started = false;
+		n_background_threads--;
+		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
 
-	return (err != 0);
+		return true;
+	}
+	assert(info->started);
+
+	return false;
 }
 
 bool

From 340071f0cf6902a79102328960f5cf1ced87f3c2 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 1 Jun 2017 12:52:09 -0700
Subject: [PATCH 0915/2608] Set isthreaded when enabling background_thread.

---
 .../internal/background_thread_externs.h      |  1 +
 src/background_thread.c                       | 62 ++++++++++++-------
 src/ctl.c                                     |  1 +
 3 files changed, 41 insertions(+), 23 deletions(-)

diff --git a/include/jemalloc/internal/background_thread_externs.h b/include/jemalloc/internal/background_thread_externs.h
index a2d95a73..aef1c90b 100644
--- a/include/jemalloc/internal/background_thread_externs.h
+++ b/include/jemalloc/internal/background_thread_externs.h
@@ -20,6 +20,7 @@ void background_thread_postfork_parent(tsdn_t *tsdn);
 void background_thread_postfork_child(tsdn_t *tsdn);
 bool background_thread_stats_read(tsdn_t *tsdn,
     background_thread_stats_t *stats);
+void background_thread_ctl_init(tsdn_t *tsdn);
 
 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
 extern int pthread_create_wrapper(pthread_t *__restrict, const pthread_attr_t *,
diff --git a/src/background_thread.c b/src/background_thread.c
index a89cad2e..64eba1a7 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -22,6 +22,29 @@ background_thread_info_t *background_thread_info;
 
 /******************************************************************************/
 
+#ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
+#include <dlfcn.h>
+
+static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
+    void *(*)(void *), void *__restrict);
+static pthread_once_t once_control = PTHREAD_ONCE_INIT;
+
+static void
+pthread_create_wrapper_once(void) {
+#ifdef JEMALLOC_LAZY_LOCK
+	isthreaded = true;
+#endif
+}
+
+int
+pthread_create_wrapper(pthread_t *__restrict thread, const pthread_attr_t *attr,
+    void *(*start_routine)(void *), void *__restrict arg) {
+	pthread_once(&once_control, pthread_create_wrapper_once);
+
+	return pthread_create_fptr(thread, attr, start_routine, arg);
+}
+#endif /* JEMALLOC_PTHREAD_CREATE_WRAPPER */
+
 #ifndef JEMALLOC_BACKGROUND_THREAD
 #define NOT_REACHED { not_reached(); }
 bool background_thread_create(tsd_t *tsd, unsigned arena_ind) NOT_REACHED
@@ -37,6 +60,7 @@ void background_thread_postfork_parent(tsdn_t *tsdn) NOT_REACHED
 void background_thread_postfork_child(tsdn_t *tsdn) NOT_REACHED
 bool background_thread_stats_read(tsdn_t *tsdn,
     background_thread_stats_t *stats) NOT_REACHED
+void background_thread_ctl_init(tsdn_t *tsdn) NOT_REACHED
 #undef NOT_REACHED
 #else
 
@@ -600,31 +624,19 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
 #undef BILLION
 #undef BACKGROUND_THREAD_MIN_INTERVAL_NS
 
-#endif /* defined(JEMALLOC_BACKGROUND_THREAD) */
-
-#ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
-#include <dlfcn.h>
-
-static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
-    void *(*)(void *), void *__restrict);
-
-static void
-pthread_create_wrapper_once(void) {
-#ifdef JEMALLOC_LAZY_LOCK
-	isthreaded = true;
-#endif
-}
-
-int
-pthread_create_wrapper(pthread_t *__restrict thread, const pthread_attr_t *attr,
-    void *(*start_routine)(void *), void *__restrict arg) {
-	static pthread_once_t once_control = PTHREAD_ONCE_INIT;
-
+/*
+ * When lazy lock is enabled, we need to make sure setting isthreaded before
+ * taking any background_thread locks.  This is called early in ctl (instead of
+ * wait for the pthread_create calls to trigger) because the mutex is required
+ * before creating background threads.
+ */
+void
+background_thread_ctl_init(tsdn_t *tsdn) {
+	malloc_mutex_assert_not_owner(tsdn, &background_thread_lock);
 	pthread_once(&once_control, pthread_create_wrapper_once);
-
-	return pthread_create_fptr(thread, attr, start_routine, arg);
 }
-#endif
+
+#endif /* defined(JEMALLOC_BACKGROUND_THREAD) */
 
 bool
 background_thread_boot0(void) {
@@ -658,6 +670,10 @@ background_thread_boot1(tsdn_t *tsdn) {
 	    malloc_mutex_rank_exclusive)) {
 		return true;
 	}
+	if (opt_background_thread) {
+		background_thread_ctl_init(tsdn);
+	}
+
 	background_thread_info = (background_thread_info_t *)base_alloc(tsdn,
 	    b0get(), ncpus * sizeof(background_thread_info_t), CACHELINE);
 	if (background_thread_info == NULL) {
diff --git a/src/ctl.c b/src/ctl.c
index 70059886..c3514bdd 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1499,6 +1499,7 @@ background_thread_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	if (!have_background_thread) {
 		return ENOENT;
 	}
+	background_thread_ctl_init(tsd_tsdn(tsd));
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock);
 	if (newp == NULL) {

From 530c07a45ba3ea744b280c9df5d94165839f7b09 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 31 May 2017 15:21:10 -0700
Subject: [PATCH 0916/2608] Set reentrancy level to 1 during init.

This makes sure we go down slow path w/ a0 in init.
---
 src/jemalloc.c | 43 ++++++++++++++++++++++++++++---------------
 1 file changed, 28 insertions(+), 15 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index c3983a5d..7e695d66 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1397,6 +1397,18 @@ malloc_init_hard_finish(void) {
 	return false;
 }
 
+static void
+malloc_init_hard_cleanup(tsdn_t *tsdn, bool reentrancy_set) {
+	malloc_mutex_assert_owner(tsdn, &init_lock);
+	malloc_mutex_unlock(tsdn, &init_lock);
+	if (reentrancy_set) {
+		assert(!tsdn_null(tsdn));
+		tsd_t *tsd = tsdn_tsd(tsdn);
+		assert(tsd_reentrancy_level_get(tsd) > 0);
+		post_reentrancy(tsd);
+	}
+}
+
 static bool
 malloc_init_hard(void) {
 	tsd_t *tsd;
@@ -1405,15 +1417,18 @@ malloc_init_hard(void) {
 	_init_init_lock();
 #endif
 	malloc_mutex_lock(TSDN_NULL, &init_lock);
+
+#define UNLOCK_RETURN(tsdn, ret, reentrancy)		\
+	malloc_init_hard_cleanup(tsdn, reentrancy);	\
+	return ret;
+
 	if (!malloc_init_hard_needed()) {
-		malloc_mutex_unlock(TSDN_NULL, &init_lock);
-		return false;
+		UNLOCK_RETURN(TSDN_NULL, false, false)
 	}
 
 	if (malloc_init_state != malloc_init_a0_initialized &&
 	    malloc_init_hard_a0_locked()) {
-		malloc_mutex_unlock(TSDN_NULL, &init_lock);
-		return true;
+		UNLOCK_RETURN(TSDN_NULL, true, false)
 	}
 
 	malloc_mutex_unlock(TSDN_NULL, &init_lock);
@@ -1425,29 +1440,27 @@ malloc_init_hard(void) {
 	if (malloc_init_hard_recursible()) {
 		return true;
 	}
-	malloc_mutex_lock(tsd_tsdn(tsd), &init_lock);
 
+	malloc_mutex_lock(tsd_tsdn(tsd), &init_lock);
+	/* Set reentrancy level to 1 during init. */
+	pre_reentrancy(tsd);
 	/* Initialize narenas before prof_boot2 (for allocation). */
 	if (malloc_init_narenas() || background_thread_boot1(tsd_tsdn(tsd))) {
-		malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
-		return true;
+		UNLOCK_RETURN(tsd_tsdn(tsd), true, true)
 	}
-
 	if (config_prof && prof_boot2(tsd)) {
-		malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
-		return true;
+		UNLOCK_RETURN(tsd_tsdn(tsd), true, true)
 	}
 
 	malloc_init_percpu();
 
 	if (malloc_init_hard_finish()) {
-		malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
-		return true;
+		UNLOCK_RETURN(tsd_tsdn(tsd), true, true)
 	}
-
+	post_reentrancy(tsd);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
-	malloc_tsd_boot1();
 
+	malloc_tsd_boot1();
 	/* Update TSD after tsd_boot1. */
 	tsd = tsd_fetch();
 	if (opt_background_thread) {
@@ -1463,7 +1476,7 @@ malloc_init_hard(void) {
 			return true;
 		}
 	}
-
+#undef UNLOCK_RETURN
 	return false;
 }
 

From 3a813946fb9b0ad93279ea30834df917b261a5a5 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 2 Jun 2017 16:27:05 -0700
Subject: [PATCH 0917/2608] Take background thread lock when setting extent
 hooks.

---
 include/jemalloc/internal/extent_externs.h |  3 ++-
 src/ctl.c                                  |  2 +-
 src/extent.c                               | 14 ++++++++++++--
 3 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index acb3ef49..489a813c 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -15,7 +15,8 @@ extent_t *extent_alloc(tsdn_t *tsdn, arena_t *arena);
 void extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
 
 extent_hooks_t *extent_hooks_get(arena_t *arena);
-extent_hooks_t *extent_hooks_set(arena_t *arena, extent_hooks_t *extent_hooks);
+extent_hooks_t *extent_hooks_set(tsd_t *tsd, arena_t *arena,
+    extent_hooks_t *extent_hooks);
 
 #ifdef JEMALLOC_JET
 size_t extent_size_quantize_floor(size_t size);
diff --git a/src/ctl.c b/src/ctl.c
index c3514bdd..2c3f9945 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2154,7 +2154,7 @@ arena_i_extent_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 			extent_hooks_t *new_extent_hooks
 			    JEMALLOC_CC_SILENCE_INIT(NULL);
 			WRITE(new_extent_hooks, extent_hooks_t *);
-			old_extent_hooks = extent_hooks_set(arena,
+			old_extent_hooks = extent_hooks_set(tsd, arena,
 			    new_extent_hooks);
 			READ(old_extent_hooks, extent_hooks_t *);
 		} else {
diff --git a/src/extent.c b/src/extent.c
index fb7a1468..e95858e3 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -197,8 +197,18 @@ extent_hooks_get(arena_t *arena) {
 }
 
 extent_hooks_t *
-extent_hooks_set(arena_t *arena, extent_hooks_t *extent_hooks) {
-	return base_extent_hooks_set(arena->base, extent_hooks);
+extent_hooks_set(tsd_t *tsd, arena_t *arena, extent_hooks_t *extent_hooks) {
+	background_thread_info_t *info;
+	if (have_background_thread) {
+		info = arena_background_thread_info_get(arena);
+		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+	}
+	extent_hooks_t *ret = base_extent_hooks_set(arena->base, extent_hooks);
+	if (have_background_thread) {
+		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
+	}
+
+	return ret;
 }
 
 static void

From 29c2577ee0bfa57009a5827bd44cab04b738a914 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Sat, 3 Jun 2017 17:00:48 -0700
Subject: [PATCH 0918/2608] Remove assertions on extent_hooks being default.

It's possible to customize the extent_hooks while still using part of the
default implementation.
---
 src/extent.c | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index e95858e3..386a7ce6 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1013,8 +1013,6 @@ extent_alloc_default(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
 	tsdn_t *tsdn;
 	arena_t *arena;
 
-	assert(extent_hooks == &extent_hooks_default);
-
 	tsdn = tsdn_fetch();
 	arena = arena_get(tsdn, arena_ind, false);
 	/*
@@ -1457,8 +1455,6 @@ extent_dalloc_default_impl(void *addr, size_t size) {
 static bool
 extent_dalloc_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
     bool committed, unsigned arena_ind) {
-	assert(extent_hooks == &extent_hooks_default);
-
 	return extent_dalloc_default_impl(addr, size);
 }
 
@@ -1551,8 +1547,6 @@ extent_destroy_default_impl(void *addr, size_t size) {
 static void
 extent_destroy_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
     bool committed, unsigned arena_ind) {
-	assert(extent_hooks == &extent_hooks_default);
-
 	extent_destroy_default_impl(addr, size);
 }
 
@@ -1587,8 +1581,6 @@ extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena,
 static bool
 extent_commit_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
-	assert(extent_hooks == &extent_hooks_default);
-
 	return pages_commit((void *)((uintptr_t)addr + (uintptr_t)offset),
 	    length);
 }
@@ -1619,8 +1611,6 @@ extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
 static bool
 extent_decommit_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
-	assert(extent_hooks == &extent_hooks_default);
-
 	return pages_decommit((void *)((uintptr_t)addr + (uintptr_t)offset),
 	    length);
 }
@@ -1646,7 +1636,6 @@ extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
 static bool
 extent_purge_lazy_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
-	assert(extent_hooks == &extent_hooks_default);
 	assert(addr != NULL);
 	assert((offset & PAGE_MASK) == 0);
 	assert(length != 0);
@@ -1683,7 +1672,6 @@ extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena,
 static bool
 extent_purge_forced_default(extent_hooks_t *extent_hooks, void *addr,
     size_t size, size_t offset, size_t length, unsigned arena_ind) {
-	assert(extent_hooks == &extent_hooks_default);
 	assert(addr != NULL);
 	assert((offset & PAGE_MASK) == 0);
 	assert(length != 0);
@@ -1720,8 +1708,6 @@ extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena,
 static bool
 extent_split_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t size_a, size_t size_b, bool committed, unsigned arena_ind) {
-	assert(extent_hooks == &extent_hooks_default);
-
 	return !maps_coalesce;
 }
 #endif
@@ -1825,8 +1811,6 @@ extent_merge_default_impl(void *addr_a, void *addr_b) {
 static bool
 extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
     void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
-	assert(extent_hooks == &extent_hooks_default);
-
 	return extent_merge_default_impl(addr_a, addr_b);
 }
 #endif

From 00869e39a334f3d869dfb9f8e651c2de3dded76f Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 6 Jun 2017 21:44:39 -0700
Subject: [PATCH 0919/2608] Make tsd no-cleanup during tsd reincarnation.

Since tsd cleanup isn't guaranteed when reincarnated, we set up tsd in a way
that needs no cleanup, by making it going through slow path instead.
---
 include/jemalloc/internal/tsd.h |  2 +-
 src/jemalloc.c                  |  3 +-
 src/tsd.c                       | 66 +++++++++++++++++++++++----------
 test/unit/tsd.c                 |  4 +-
 4 files changed, 51 insertions(+), 24 deletions(-)

diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index f304e1d9..4efaf4e2 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -154,7 +154,6 @@ void malloc_tsd_dalloc(void *wrapper);
 void malloc_tsd_cleanup_register(bool (*f)(void));
 tsd_t *malloc_tsd_boot0(void);
 void malloc_tsd_boot1(void);
-bool tsd_data_init(void *arg);
 void tsd_cleanup(void *arg);
 tsd_t *tsd_fetch_slow(tsd_t *tsd);
 void tsd_slow_update(tsd_t *tsd);
@@ -228,6 +227,7 @@ MALLOC_TSD
 #define O(n, t, nt)							\
 JEMALLOC_ALWAYS_INLINE void						\
 tsd_##n##_set(tsd_t *tsd, t val) {					\
+	assert(tsd->state != tsd_state_reincarnated);			\
 	*tsd_##n##p_get(tsd) = val;					\
 }
 MALLOC_TSD
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 7e695d66..9a5685b4 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1764,7 +1764,8 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		 * We should never specify particular arenas or tcaches from
 		 * within our internal allocations.
 		 */
-		assert(dopts->tcache_ind == TCACHE_IND_AUTOMATIC);
+		assert(dopts->tcache_ind == TCACHE_IND_AUTOMATIC ||
+		    dopts->tcache_ind == TCACHE_IND_NONE);
 		assert(dopts->arena_ind = ARENA_IND_AUTOMATIC);
 		dopts->tcache_ind = TCACHE_IND_NONE;
 		/* We know that arena 0 has already been initialized. */
diff --git a/src/tsd.c b/src/tsd.c
index 29a56775..6eb3b883 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -63,6 +63,45 @@ tsd_slow_update(tsd_t *tsd) {
 	}
 }
 
+static bool
+tsd_data_init(tsd_t *tsd) {
+	/*
+	 * We initialize the rtree context first (before the tcache), since the
+	 * tcache initialization depends on it.
+	 */
+	rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd));
+
+	return tsd_tcache_enabled_data_init(tsd);
+}
+
+static void
+assert_tsd_data_cleanup_done(tsd_t *tsd) {
+	assert(!tsd_nominal(tsd));
+	assert(*tsd_arenap_get_unsafe(tsd) == NULL);
+	assert(*tsd_iarenap_get_unsafe(tsd) == NULL);
+	assert(*tsd_arenas_tdata_bypassp_get_unsafe(tsd) == true);
+	assert(*tsd_arenas_tdatap_get_unsafe(tsd) == NULL);
+	assert(*tsd_tcache_enabledp_get_unsafe(tsd) == false);
+	assert(*tsd_prof_tdatap_get_unsafe(tsd) == NULL);
+}
+
+static bool
+tsd_data_init_nocleanup(tsd_t *tsd) {
+	assert(tsd->state == tsd_state_reincarnated);
+	/*
+	 * During reincarnation, there is no guarantee that the cleanup function
+	 * will be called (deallocation may happen after all tsd destructors).
+	 * We set up tsd in a way that no cleanup is needed.
+	 */
+	rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd));
+	*tsd_arenas_tdata_bypassp_get(tsd) = true;
+	*tsd_tcache_enabledp_get_unsafe(tsd) = false;
+	*tsd_reentrancy_levelp_get(tsd) = 1;
+	assert_tsd_data_cleanup_done(tsd);
+
+	return false;
+}
+
 tsd_t *
 tsd_fetch_slow(tsd_t *tsd) {
 	if (tsd->state == tsd_state_nominal_slow) {
@@ -79,7 +118,7 @@ tsd_fetch_slow(tsd_t *tsd) {
 	} else if (tsd->state == tsd_state_purgatory) {
 		tsd->state = tsd_state_reincarnated;
 		tsd_set(tsd);
-		tsd_data_init(tsd);
+		tsd_data_init_nocleanup(tsd);
 	} else {
 		assert(tsd->state == tsd_state_reincarnated);
 	}
@@ -131,21 +170,6 @@ malloc_tsd_cleanup_register(bool (*f)(void)) {
 	ncleanups++;
 }
 
-bool
-tsd_data_init(void *arg) {
-	tsd_t *tsd = (tsd_t *)arg;
-	/*
-	 * We initialize the rtree context first (before the tcache), since the
-	 * tcache initialization depends on it.
-	 */
-	rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd));
-
-	if (tsd_tcache_enabled_data_init(tsd)) {
-		return true;
-	}
-	return false;
-}
-
 static void
 tsd_do_data_cleanup(tsd_t *tsd) {
 	prof_tdata_cleanup(tsd);
@@ -164,14 +188,16 @@ tsd_cleanup(void *arg) {
 	case tsd_state_uninitialized:
 		/* Do nothing. */
 		break;
-	case tsd_state_nominal:
-	case tsd_state_nominal_slow:
 	case tsd_state_reincarnated:
 		/*
 		 * Reincarnated means another destructor deallocated memory
-		 * after this destructor was called.  Reset state to
-		 * tsd_state_purgatory and request another callback.
+		 * after the destructor was called.  Cleanup isn't required but
+		 * is still called for testing and completeness.
 		 */
+		assert_tsd_data_cleanup_done(tsd);
+		/* Fall through. */
+	case tsd_state_nominal:
+	case tsd_state_nominal_slow:
 		tsd_do_data_cleanup(tsd);
 		tsd->state = tsd_state_purgatory;
 		tsd_set(tsd);
diff --git a/test/unit/tsd.c b/test/unit/tsd.c
index c9a7d809..6c479139 100644
--- a/test/unit/tsd.c
+++ b/test/unit/tsd.c
@@ -106,8 +106,8 @@ thd_start_reincarnated(void *arg) {
 	    "TSD state should be reincarnated\n");
 	p = mallocx(1, MALLOCX_TCACHE_NONE);
 	assert_ptr_not_null(p, "Unexpected malloc() failure");
-	assert_ptr_not_null(*tsd_arenap_get_unsafe(tsd),
-	    "Should have tsd arena set after reincarnation.");
+	assert_ptr_null(*tsd_arenap_get_unsafe(tsd),
+	    "Should not have tsd arena set after reincarnation.");
 
 	free(p);
 	tsd_cleanup((void *)tsd);

From 73713fbb27cd1cf6754259b19a960e91a16c3638 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 7 Jun 2017 15:49:09 -0700
Subject: [PATCH 0920/2608] Drop high rank locks when creating threads.

Avoid holding arenas_lock and background_thread_lock when creating background
threads, because pthread_create may take internal locks, and potentially cause
deadlock with jemalloc internal locks.
---
 include/jemalloc/internal/arena_externs.h |  1 +
 src/arena.c                               | 11 -------
 src/background_thread.c                   |  5 +++-
 src/ctl.c                                 |  3 ++
 src/jemalloc.c                            | 36 ++++++++++++++++++++++-
 5 files changed, 43 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 9ad9786f..3a85bcbb 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -15,6 +15,7 @@ extern percpu_arena_mode_t opt_percpu_arena;
 extern const char *percpu_arena_mode_names[];
 
 extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS];
+extern malloc_mutex_t arenas_lock;
 
 void arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
     szind_t szind, uint64_t nrequests);
diff --git a/src/arena.c b/src/arena.c
index dedbb3e3..0912df31 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2050,17 +2050,6 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 			hooks_arena_new_hook();
 		}
 		post_reentrancy(tsdn_tsd(tsdn));
-
-		/* background_thread_create() handles reentrancy internally. */
-		if (have_background_thread) {
-			bool err;
-			malloc_mutex_lock(tsdn, &background_thread_lock);
-			err = background_thread_create(tsdn_tsd(tsdn), ind);
-			malloc_mutex_unlock(tsdn, &background_thread_lock);
-			if (err) {
-				goto label_error;
-			}
-		}
 	}
 
 	return arena;
diff --git a/src/background_thread.c b/src/background_thread.c
index 64eba1a7..50812c36 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -352,12 +352,15 @@ background_thread_create(tsd_t *tsd, unsigned arena_ind) {
 	}
 
 	pre_reentrancy(tsd);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
 	/*
 	 * To avoid complications (besides reentrancy), create internal
-	 * background threads with the underlying pthread_create.
+	 * background threads with the underlying pthread_create, and drop
+	 * background_thread_lock (pthread_create may take internal locks).
 	 */
 	int err = pthread_create_wrapper(&info->thread, NULL,
 	    background_thread_entry, (void *)thread_ind);
+	malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock);
 	post_reentrancy(tsd);
 
 	if (err != 0) {
diff --git a/src/ctl.c b/src/ctl.c
index 2c3f9945..134dbac9 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1501,6 +1501,7 @@ background_thread_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	}
 	background_thread_ctl_init(tsd_tsdn(tsd));
 
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock);
 	if (newp == NULL) {
 		oldval = background_thread_enabled();
@@ -1535,6 +1536,8 @@ background_thread_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	ret = 0;
 label_return:
 	malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
+
 	return ret;
 }
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 9a5685b4..5a0baf8f 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -70,7 +70,7 @@ unsigned	opt_narenas = 0;
 unsigned	ncpus;
 
 /* Protects arenas initialization. */
-static malloc_mutex_t	arenas_lock;
+malloc_mutex_t arenas_lock;
 /*
  * Arenas that are used to service external requests.  Not all elements of the
  * arenas array are necessarily used; arenas are created lazily as needed.
@@ -335,6 +335,25 @@ arena_init_locked(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	return arena;
 }
 
+static void
+arena_new_create_background_thread(tsdn_t *tsdn, unsigned ind) {
+	if (ind == 0) {
+		return;
+	}
+	/* background_thread_create() handles reentrancy internally. */
+	if (have_background_thread) {
+		bool err;
+		malloc_mutex_lock(tsdn, &background_thread_lock);
+		err = background_thread_create(tsdn_tsd(tsdn), ind);
+		malloc_mutex_unlock(tsdn, &background_thread_lock);
+		if (err) {
+			malloc_printf("<jemalloc>: error in background thread "
+				      "creation for arena %u. Abort.\n", ind);
+			abort();
+		}
+	}
+}
+
 arena_t *
 arena_init(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	arena_t *arena;
@@ -342,6 +361,9 @@ arena_init(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	malloc_mutex_lock(tsdn, &arenas_lock);
 	arena = arena_init_locked(tsdn, ind, extent_hooks);
 	malloc_mutex_unlock(tsdn, &arenas_lock);
+
+	arena_new_create_background_thread(tsdn, ind);
+
 	return arena;
 }
 
@@ -475,6 +497,7 @@ arena_choose_hard(tsd_t *tsd, bool internal) {
 
 	if (narenas_auto > 1) {
 		unsigned i, j, choose[2], first_null;
+		bool is_new_arena[2];
 
 		/*
 		 * Determine binding for both non-internal and internal
@@ -486,6 +509,7 @@ arena_choose_hard(tsd_t *tsd, bool internal) {
 
 		for (j = 0; j < 2; j++) {
 			choose[j] = 0;
+			is_new_arena[j] = false;
 		}
 
 		first_null = narenas_auto;
@@ -545,6 +569,7 @@ arena_choose_hard(tsd_t *tsd, bool internal) {
 					    &arenas_lock);
 					return NULL;
 				}
+				is_new_arena[j] = true;
 				if (!!j == internal) {
 					ret = arena;
 				}
@@ -552,6 +577,15 @@ arena_choose_hard(tsd_t *tsd, bool internal) {
 			arena_bind(tsd, choose[j], !!j);
 		}
 		malloc_mutex_unlock(tsd_tsdn(tsd), &arenas_lock);
+
+		for (j = 0; j < 2; j++) {
+			if (is_new_arena[j]) {
+				assert(choose[j] > 0);
+				arena_new_create_background_thread(
+				    tsd_tsdn(tsd), choose[j]);
+			}
+		}
+
 	} else {
 		ret = arena_get(tsd_tsdn(tsd), 0, false);
 		arena_bind(tsd, 0, false);

From 5642f03cae54eb8798dc4fa5ea28d9569572c1af Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 7 Jun 2017 16:12:50 -0700
Subject: [PATCH 0921/2608] Add internal tsd for background_thread.

---
 include/jemalloc/internal/tsd.h | 15 ++++++++++-----
 src/background_thread.c         |  9 ++++-----
 src/tsd.c                       | 11 ++++++++++-
 3 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 4efaf4e2..cab0b2fe 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -155,7 +155,7 @@ void malloc_tsd_cleanup_register(bool (*f)(void));
 tsd_t *malloc_tsd_boot0(void);
 void malloc_tsd_boot1(void);
 void tsd_cleanup(void *arg);
-tsd_t *tsd_fetch_slow(tsd_t *tsd);
+tsd_t *tsd_fetch_slow(tsd_t *tsd, bool internal);
 void tsd_slow_update(tsd_t *tsd);
 
 /*
@@ -250,7 +250,7 @@ tsd_fast(tsd_t *tsd) {
 }
 
 JEMALLOC_ALWAYS_INLINE tsd_t *
-tsd_fetch_impl(bool init) {
+tsd_fetch_impl(bool init, bool internal) {
 	tsd_t *tsd = tsd_get(init);
 
 	if (!init && tsd_get_allocates() && tsd == NULL) {
@@ -259,7 +259,7 @@ tsd_fetch_impl(bool init) {
 	assert(tsd != NULL);
 
 	if (unlikely(tsd->state != tsd_state_nominal)) {
-		return tsd_fetch_slow(tsd);
+		return tsd_fetch_slow(tsd, internal);
 	}
 	assert(tsd_fast(tsd));
 	tsd_assert_fast(tsd);
@@ -267,9 +267,14 @@ tsd_fetch_impl(bool init) {
 	return tsd;
 }
 
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsd_internal_fetch(void) {
+	return tsd_fetch_impl(true, true);
+}
+
 JEMALLOC_ALWAYS_INLINE tsd_t *
 tsd_fetch(void) {
-	return tsd_fetch_impl(true);
+	return tsd_fetch_impl(true, false);
 }
 
 static inline bool
@@ -283,7 +288,7 @@ tsdn_fetch(void) {
 		return NULL;
 	}
 
-	return tsd_tsdn(tsd_fetch_impl(false));
+	return tsd_tsdn(tsd_fetch_impl(false, false));
 }
 
 JEMALLOC_ALWAYS_INLINE rtree_ctx_t *
diff --git a/src/background_thread.c b/src/background_thread.c
index 50812c36..190fa2fd 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -316,12 +316,11 @@ background_thread_entry(void *ind_arg) {
 		set_current_thread_affinity((int)thread_ind);
 	}
 	/*
-	 * Start periodic background work.  We avoid fetching tsd to keep the
-	 * background thread "outside", since there may be side effects, for
-	 * example triggering new arena creation (which in turn triggers
-	 * background thread creation).
+	 * Start periodic background work.  We use internal tsd which avoids
+	 * side effects, for example triggering new arena creation (which in
+	 * turn triggers another background thread creation).
 	 */
-	background_work(TSDN_NULL, thread_ind);
+	background_work(tsd_tsdn(tsd_internal_fetch()), thread_ind);
 	assert(pthread_equal(pthread_self(),
 	    background_thread_info[thread_ind].thread));
 
diff --git a/src/tsd.c b/src/tsd.c
index 6eb3b883..97330332 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -103,7 +103,16 @@ tsd_data_init_nocleanup(tsd_t *tsd) {
 }
 
 tsd_t *
-tsd_fetch_slow(tsd_t *tsd) {
+tsd_fetch_slow(tsd_t *tsd, bool internal) {
+	if (internal) {
+		/* For internal background threads use only. */
+		assert(tsd->state == tsd_state_uninitialized);
+		tsd->state = tsd_state_reincarnated;
+		tsd_set(tsd);
+		tsd_data_init_nocleanup(tsd);
+		return tsd;
+	}
+
 	if (tsd->state == tsd_state_nominal_slow) {
 		/* On slow path but no work needed. */
 		assert(malloc_slow || !tsd_tcache_enabled_get(tsd) ||

From faaf458bad62daf67fc913f60e9e14f54aa55827 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 8 Jun 2017 12:19:29 -0700
Subject: [PATCH 0922/2608] Remove redundant typedefs.

Pre-C11 compilers do not support typedef redefinition.
---
 include/jemalloc/internal/tsd.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index cab0b2fe..631fbf1f 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -108,7 +108,6 @@ enum {
 typedef uint8_t tsd_state_t;
 
 /* The actual tsd. */
-typedef struct tsd_s tsd_t;
 struct tsd_s {
 	/*
 	 * The contents should be treated as totally opaque outside the tsd
@@ -127,7 +126,6 @@ MALLOC_TSD
  * between tsd_t and tsdn_t, where tsdn_t is "nullable" and has to be
  * explicitly converted to tsd_t, which is non-nullable.
  */
-typedef struct tsdn_s tsdn_t;
 struct tsdn_s {
 	tsd_t tsd;
 };

From 94d655b8bd1eac3d969dfe3c15aa7a6b1d26373d Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 8 Jun 2017 12:55:59 -0700
Subject: [PATCH 0923/2608] Update a UTRACE() size argument.

---
 src/jemalloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 5a0baf8f..e2865d25 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1930,7 +1930,7 @@ imalloc(static_opts_t *sopts, dynamic_opts_t *dopts) {
 			malloc_write(sopts->oom_string);
 			abort();
 		}
-		UTRACE(NULL, size, NULL);
+		UTRACE(NULL, dopts->num_items * dopts->item_size, NULL);
 		set_errno(ENOMEM);
 		*dopts->result = NULL;
 

From 13685ab1b767091d817cb4959d24a42447a6fb78 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 8 Jun 2017 22:07:21 -0700
Subject: [PATCH 0924/2608] Normalize background thread configuration.

Also fix a compilation error #ifndef JEMALLOC_PTHREAD_CREATE_WRAPPER.
---
 configure.ac                                          | 10 ++++++++++
 include/jemalloc/internal/jemalloc_internal_defs.h.in |  5 +++++
 include/jemalloc/internal/jemalloc_preamble.h.in      |  6 ------
 src/background_thread.c                               |  2 ++
 4 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/configure.ac b/configure.ac
index a00aab9b..32ae02c2 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1452,6 +1452,7 @@ if test "x$abi" != "xpecoff" ; then
                [AC_SEARCH_LIBS([pthread_create], , ,
                                AC_MSG_ERROR([libpthread is missing]))])
   wrap_syms="${wrap_syms} pthread_create"
+  have_pthread="1"
   dnl Check if we have dlsym support.
   have_dlsym="1"
   AC_CHECK_HEADERS([dlfcn.h],
@@ -1933,6 +1934,15 @@ if test "x${enable_zone_allocator}" = "x1" ; then
   AC_DEFINE([JEMALLOC_ZONE], [ ])
 fi
 
+dnl ============================================================================
+dnl Enable background threads if possible.
+
+if test "x${have_pthread}" = "x1" -a "x${have_dlsym}" = "x1" \
+    -a "x${je_cv_os_unfair_lock}" != "xyes" \
+    -a "x${je_cv_osspin}" != "xyes" ; then
+  AC_DEFINE([JEMALLOC_BACKGROUND_THREAD])
+fi
+
 dnl ============================================================================
 dnl Check for glibc malloc hooks
 
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 20a2358e..2bf9dea1 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -316,6 +316,11 @@
 /* GNU specific sched_setaffinity support */
 #undef JEMALLOC_HAVE_SCHED_SETAFFINITY
 
+/*
+ * If defined, all the features necessary for background threads are present.
+ */
+#undef JEMALLOC_BACKGROUND_THREAD
+
 /*
  * If defined, jemalloc symbols are not exported (doesn't work when
  * JEMALLOC_PREFIX is not defined).
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index 46750e99..18539a09 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -168,12 +168,6 @@ static const bool force_ivsalloc =
     false
 #endif
     ;
-
-#if (defined(JEMALLOC_HAVE_PTHREAD) && defined(JEMALLOC_HAVE_DLSYM)	\
-    && !defined(JEMALLOC_OSSPIN) && !defined(JEMALLOC_OS_UNFAIR_LOCK))
-/* Currently background thread supports pthread only. */
-#define JEMALLOC_BACKGROUND_THREAD
-#endif
 static const bool have_background_thread =
 #ifdef JEMALLOC_BACKGROUND_THREAD
     true
diff --git a/src/background_thread.c b/src/background_thread.c
index 190fa2fd..51d23cb9 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -635,7 +635,9 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
 void
 background_thread_ctl_init(tsdn_t *tsdn) {
 	malloc_mutex_assert_not_owner(tsdn, &background_thread_lock);
+#ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
 	pthread_once(&once_control, pthread_create_wrapper_once);
+#endif
 }
 
 #endif /* defined(JEMALLOC_BACKGROUND_THREAD) */

From 464cb60490efda800625b16fedd5bcd238e1526e Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 8 Jun 2017 22:46:31 -0700
Subject: [PATCH 0925/2608] Move background thread creation to
 background_thread_0.

To avoid complications, avoid invoking pthread_create "internally", instead rely
on thread0 to launch new threads, and also terminating threads when asked.
---
 .../internal/background_thread_externs.h      |   2 -
 .../internal/background_thread_structs.h      |   2 +
 src/background_thread.c                       | 505 +++++++++++-------
 src/ctl.c                                     |  12 +-
 4 files changed, 313 insertions(+), 208 deletions(-)

diff --git a/include/jemalloc/internal/background_thread_externs.h b/include/jemalloc/internal/background_thread_externs.h
index aef1c90b..7c883697 100644
--- a/include/jemalloc/internal/background_thread_externs.h
+++ b/include/jemalloc/internal/background_thread_externs.h
@@ -10,8 +10,6 @@ extern background_thread_info_t *background_thread_info;
 bool background_thread_create(tsd_t *tsd, unsigned arena_ind);
 bool background_threads_enable(tsd_t *tsd);
 bool background_threads_disable(tsd_t *tsd);
-bool background_threads_disable_single(tsd_t *tsd,
-    background_thread_info_t *info);
 void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
     arena_decay_t *decay, size_t npages_new);
 void background_thread_prefork0(tsdn_t *tsdn);
diff --git a/include/jemalloc/internal/background_thread_structs.h b/include/jemalloc/internal/background_thread_structs.h
index edf90fe2..f6ad4adb 100644
--- a/include/jemalloc/internal/background_thread_structs.h
+++ b/include/jemalloc/internal/background_thread_structs.h
@@ -18,6 +18,8 @@ struct background_thread_info_s {
 	malloc_mutex_t		mtx;
 	/* Whether the thread has been created. */
 	bool			started;
+	/* Pause execution (for arena reset / destroy). */
+	bool			pause;
 	/* When true, it means no wakeup scheduled. */
 	atomic_b_t		indefinite_sleep;
 	/* Next scheduled wakeup time (absolute time in ns). */
diff --git a/src/background_thread.c b/src/background_thread.c
index 51d23cb9..f2bc474a 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -50,8 +50,6 @@ pthread_create_wrapper(pthread_t *__restrict thread, const pthread_attr_t *attr,
 bool background_thread_create(tsd_t *tsd, unsigned arena_ind) NOT_REACHED
 bool background_threads_enable(tsd_t *tsd) NOT_REACHED
 bool background_threads_disable(tsd_t *tsd) NOT_REACHED
-bool background_threads_disable_single(tsd_t *tsd,
-    background_thread_info_t *info) NOT_REACHED
 void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
     arena_decay_t *decay, size_t npages_new) NOT_REACHED
 void background_thread_prefork0(tsdn_t *tsdn) NOT_REACHED
@@ -67,8 +65,9 @@ void background_thread_ctl_init(tsdn_t *tsdn) NOT_REACHED
 static bool background_thread_enabled_at_fork;
 
 static void
-background_thread_info_reinit(tsdn_t *tsdn, background_thread_info_t *info) {
+background_thread_info_init(tsdn_t *tsdn, background_thread_info_t *info) {
 	background_thread_wakeup_time_set(tsdn, info, 0);
+	info->pause = false;
 	info->npages_to_purge_new = 0;
 	if (config_stats) {
 		info->tot_n_runs = 0;
@@ -210,211 +209,107 @@ arena_decay_compute_purge_interval(tsdn_t *tsdn, arena_t *arena) {
 	return i1 < i2 ? i1 : i2;
 }
 
-static inline uint64_t
-background_work_once(tsdn_t *tsdn, unsigned ind) {
-	arena_t *arena;
-	unsigned i, narenas;
-	uint64_t min_interval;
+static void
+background_thread_sleep(tsdn_t *tsdn, background_thread_info_t *info,
+    uint64_t interval) {
+	if (config_stats) {
+		info->tot_n_runs++;
+	}
+	info->npages_to_purge_new = 0;
 
-	min_interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
-	narenas = narenas_total_get();
-	for (i = ind; i < narenas; i += ncpus) {
-		arena = arena_get(tsdn, i, false);
+	struct timeval tv;
+	/* Specific clock required by timedwait. */
+	gettimeofday(&tv, NULL);
+	nstime_t before_sleep;
+	nstime_init2(&before_sleep, tv.tv_sec, tv.tv_usec * 1000);
+
+	int ret;
+	if (interval == BACKGROUND_THREAD_INDEFINITE_SLEEP) {
+		assert(background_thread_indefinite_sleep(info));
+		ret = pthread_cond_wait(&info->cond, &info->mtx.lock);
+		assert(ret == 0);
+	} else {
+		assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS &&
+		    interval <= BACKGROUND_THREAD_INDEFINITE_SLEEP);
+		/* We need malloc clock (can be different from tv). */
+		nstime_t next_wakeup;
+		nstime_init(&next_wakeup, 0);
+		nstime_update(&next_wakeup);
+		nstime_iadd(&next_wakeup, interval);
+		assert(nstime_ns(&next_wakeup) <
+		    BACKGROUND_THREAD_INDEFINITE_SLEEP);
+		background_thread_wakeup_time_set(tsdn, info,
+		    nstime_ns(&next_wakeup));
+
+		nstime_t ts_wakeup;
+		nstime_copy(&ts_wakeup, &before_sleep);
+		nstime_iadd(&ts_wakeup, interval);
+		struct timespec ts;
+		ts.tv_sec = (size_t)nstime_sec(&ts_wakeup);
+		ts.tv_nsec = (size_t)nstime_nsec(&ts_wakeup);
+
+		assert(!background_thread_indefinite_sleep(info));
+		ret = pthread_cond_timedwait(&info->cond, &info->mtx.lock, &ts);
+		assert(ret == ETIMEDOUT || ret == 0);
+		background_thread_wakeup_time_set(tsdn, info,
+		    BACKGROUND_THREAD_INDEFINITE_SLEEP);
+	}
+	if (config_stats) {
+		gettimeofday(&tv, NULL);
+		nstime_t after_sleep;
+		nstime_init2(&after_sleep, tv.tv_sec, tv.tv_usec * 1000);
+		if (nstime_compare(&after_sleep, &before_sleep) > 0) {
+			nstime_subtract(&after_sleep, &before_sleep);
+			nstime_add(&info->tot_sleep_time, &after_sleep);
+		}
+	}
+	while (info->pause) {
+		malloc_mutex_unlock(tsdn, &info->mtx);
+		/* Wait on global lock to update status. */
+		malloc_mutex_lock(tsdn, &background_thread_lock);
+		malloc_mutex_unlock(tsdn, &background_thread_lock);
+		malloc_mutex_lock(tsdn, &info->mtx);
+	}
+}
+
+static inline void
+background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info, unsigned ind) {
+	uint64_t min_interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
+	unsigned narenas = narenas_total_get();
+
+	for (unsigned i = ind; i < narenas; i += ncpus) {
+		arena_t *arena = arena_get(tsdn, i, false);
 		if (!arena) {
 			continue;
 		}
-
 		arena_decay(tsdn, arena, true, false);
+		if (min_interval == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
+			/* Min interval will be used. */
+			continue;
+		}
 		uint64_t interval = arena_decay_compute_purge_interval(tsdn,
 		    arena);
-		if (interval == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
-			return interval;
-		}
-
-		assert(interval > BACKGROUND_THREAD_MIN_INTERVAL_NS);
+		assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS);
 		if (min_interval > interval) {
 			min_interval = interval;
 		}
 	}
-
-	return min_interval;
+	background_thread_sleep(tsdn, info, min_interval);
 }
 
-static void
-background_work(tsdn_t *tsdn, unsigned ind) {
-	int ret;
-	background_thread_info_t *info = &background_thread_info[ind];
-
-	malloc_mutex_lock(tsdn, &info->mtx);
-	background_thread_wakeup_time_set(tsdn, info,
-	    BACKGROUND_THREAD_INDEFINITE_SLEEP);
-	while (info->started) {
-		uint64_t interval = background_work_once(tsdn, ind);
-		if (config_stats) {
-			info->tot_n_runs++;
-		}
-		info->npages_to_purge_new = 0;
-
-		struct timeval tv;
-		/* Specific clock required by timedwait. */
-		gettimeofday(&tv, NULL);
-		nstime_t before_sleep;
-		nstime_init2(&before_sleep, tv.tv_sec, tv.tv_usec * 1000);
-
-		if (interval == BACKGROUND_THREAD_INDEFINITE_SLEEP) {
-			assert(background_thread_indefinite_sleep(info));
-			ret = pthread_cond_wait(&info->cond, &info->mtx.lock);
-			assert(ret == 0);
-		} else {
-			assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS &&
-			    interval <= BACKGROUND_THREAD_INDEFINITE_SLEEP);
-			/* We need malloc clock (can be different from tv). */
-			nstime_t next_wakeup;
-			nstime_init(&next_wakeup, 0);
-			nstime_update(&next_wakeup);
-			nstime_iadd(&next_wakeup, interval);
-			assert(nstime_ns(&next_wakeup) <
-			    BACKGROUND_THREAD_INDEFINITE_SLEEP);
-			background_thread_wakeup_time_set(tsdn, info,
-			    nstime_ns(&next_wakeup));
-
-			nstime_t ts_wakeup;
-			nstime_copy(&ts_wakeup, &before_sleep);
-			nstime_iadd(&ts_wakeup, interval);
-			struct timespec ts;
-			ts.tv_sec = (size_t)nstime_sec(&ts_wakeup);
-			ts.tv_nsec = (size_t)nstime_nsec(&ts_wakeup);
-
-			assert(!background_thread_indefinite_sleep(info));
-			ret = pthread_cond_timedwait(&info->cond,
-			    &info->mtx.lock, &ts);
-			assert(ret == ETIMEDOUT || ret == 0);
-			background_thread_wakeup_time_set(tsdn, info,
-			    BACKGROUND_THREAD_INDEFINITE_SLEEP);
-		}
-
-		if (config_stats) {
-			gettimeofday(&tv, NULL);
-			nstime_t after_sleep;
-			nstime_init2(&after_sleep, tv.tv_sec, tv.tv_usec * 1000);
-			if (nstime_compare(&after_sleep, &before_sleep) > 0) {
-				nstime_subtract(&after_sleep, &before_sleep);
-				nstime_add(&info->tot_sleep_time, &after_sleep);
-			}
-		}
-	}
-	background_thread_wakeup_time_set(tsdn, info, 0);
-	malloc_mutex_unlock(tsdn, &info->mtx);
-}
-
-static void *
-background_thread_entry(void *ind_arg) {
-	unsigned thread_ind = (unsigned)(uintptr_t)ind_arg;
-	assert(thread_ind < narenas_total_get() && thread_ind < ncpus);
-
-	if (opt_percpu_arena != percpu_arena_disabled) {
-		set_current_thread_affinity((int)thread_ind);
-	}
-	/*
-	 * Start periodic background work.  We use internal tsd which avoids
-	 * side effects, for example triggering new arena creation (which in
-	 * turn triggers another background thread creation).
-	 */
-	background_work(tsd_tsdn(tsd_internal_fetch()), thread_ind);
-	assert(pthread_equal(pthread_self(),
-	    background_thread_info[thread_ind].thread));
-
-	return NULL;
-}
-
-/* Create a new background thread if needed. */
-bool
-background_thread_create(tsd_t *tsd, unsigned arena_ind) {
-	assert(have_background_thread);
-	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
-
-	/* We create at most NCPUs threads. */
-	size_t thread_ind = arena_ind % ncpus;
-	background_thread_info_t *info = &background_thread_info[thread_ind];
-
-	bool need_new_thread;
-	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
-	need_new_thread = background_thread_enabled() && !info->started;
-	if (need_new_thread) {
-		info->started = true;
-		background_thread_info_reinit(tsd_tsdn(tsd), info);
-		n_background_threads++;
-	}
-	malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
-	if (!need_new_thread) {
-		return false;
-	}
-
-	pre_reentrancy(tsd);
-	malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
-	/*
-	 * To avoid complications (besides reentrancy), create internal
-	 * background threads with the underlying pthread_create, and drop
-	 * background_thread_lock (pthread_create may take internal locks).
-	 */
-	int err = pthread_create_wrapper(&info->thread, NULL,
-	    background_thread_entry, (void *)thread_ind);
-	malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock);
-	post_reentrancy(tsd);
-
-	if (err != 0) {
-		malloc_printf("<jemalloc>: arena %u background thread creation "
-		    "failed (%d).\n", arena_ind, err);
-		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
-		info->started = false;
-		n_background_threads--;
-		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
-
-		return true;
-	}
-	assert(info->started);
-
-	return false;
-}
-
-bool
-background_threads_enable(tsd_t *tsd) {
-	assert(n_background_threads == 0);
-	assert(background_thread_enabled());
-	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
-
-	VARIABLE_ARRAY(bool, created, ncpus);
-	unsigned i, ncreated;
-	for (i = 0; i < ncpus; i++) {
-		created[i] = false;
-	}
-	ncreated = 0;
-
-	unsigned n = narenas_total_get();
-	for (i = 0; i < n; i++) {
-		if (created[i % ncpus] ||
-		    arena_get(tsd_tsdn(tsd), i, false) == NULL) {
-			continue;
-		}
-		if (background_thread_create(tsd, i)) {
-			return true;
-		}
-		created[i % ncpus] = true;
-		if (++ncreated == ncpus) {
-			break;
-		}
-	}
-
-	return false;
-}
-
-bool
+static bool
 background_threads_disable_single(tsd_t *tsd, background_thread_info_t *info) {
-	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
-	pre_reentrancy(tsd);
+	if (info == &background_thread_info[0]) {
+		malloc_mutex_assert_owner(tsd_tsdn(tsd),
+		    &background_thread_lock);
+	} else {
+		malloc_mutex_assert_not_owner(tsd_tsdn(tsd),
+		    &background_thread_lock);
+	}
 
-	bool has_thread;
+	pre_reentrancy(tsd);
 	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+	bool has_thread;
 	if (info->started) {
 		has_thread = true;
 		info->started = false;
@@ -440,14 +335,221 @@ background_threads_disable_single(tsd_t *tsd, background_thread_info_t *info) {
 	return false;
 }
 
+static void *background_thread_entry(void *ind_arg);
+
+static void
+check_background_thread_creation(tsd_t *tsd, unsigned *n_created,
+    bool *created_threads) {
+	if (likely(*n_created == n_background_threads)) {
+		return;
+	}
+
+	for (unsigned i = 1; i < ncpus; i++) {
+		if (created_threads[i]) {
+			continue;
+		}
+		background_thread_info_t *info = &background_thread_info[i];
+		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+		if (info->started) {
+			pre_reentrancy(tsd);
+			int err = pthread_create_wrapper(&info->thread, NULL,
+			    background_thread_entry, (void *)(uintptr_t)i);
+			post_reentrancy(tsd);
+
+			if (err == 0) {
+				(*n_created)++;
+				created_threads[i] = true;
+			} else {
+				malloc_printf("<jemalloc>: background thread "
+				    "creation failed (%d)\n", err);
+				if (opt_abort) {
+					abort();
+				}
+			}
+		}
+		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
+	}
+}
+
+static void
+background_thread0_work(tsd_t *tsd) {
+	/* Thread0 is also responsible for launching / terminating threads. */
+	VARIABLE_ARRAY(bool, created_threads, ncpus);
+	unsigned i;
+	for (i = 1; i < ncpus; i++) {
+		created_threads[i] = false;
+	}
+	/* Start working, and create more threads when asked. */
+	unsigned n_created = 1;
+	while (background_thread_info[0].started) {
+		check_background_thread_creation(tsd, &n_created,
+		    (bool *)&created_threads);
+		background_work_sleep_once(tsd_tsdn(tsd),
+		    &background_thread_info[0], 0);
+	}
+
+	/*
+	 * Shut down other threads at exit.  Note that the ctl thread is holding
+	 * the global background_thread mutex (and is waiting) for us.
+	 */
+	assert(!background_thread_enabled());
+	for (i = 1; i < ncpus; i++) {
+		background_thread_info_t *info = &background_thread_info[i];
+		if (created_threads[i]) {
+			background_threads_disable_single(tsd, info);
+		} else {
+			malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+			/* Clear in case the thread wasn't created. */
+			info->started = false;
+			malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
+		}
+	}
+	background_thread_info[0].started = false;
+	assert(n_background_threads == 1);
+}
+
+static void
+background_work(tsd_t *tsd, unsigned ind) {
+	background_thread_info_t *info = &background_thread_info[ind];
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+	background_thread_wakeup_time_set(tsd_tsdn(tsd), info,
+	    BACKGROUND_THREAD_INDEFINITE_SLEEP);
+	if (ind == 0) {
+		background_thread0_work(tsd);
+	} else {
+		while (info->started) {
+			background_work_sleep_once(tsd_tsdn(tsd), info, ind);
+		}
+	}
+	background_thread_wakeup_time_set(tsd_tsdn(tsd), info, 0);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
+}
+
+static void *
+background_thread_entry(void *ind_arg) {
+	unsigned thread_ind = (unsigned)(uintptr_t)ind_arg;
+	assert(thread_ind < ncpus);
+
+	if (opt_percpu_arena != percpu_arena_disabled) {
+		set_current_thread_affinity((int)thread_ind);
+	}
+	/*
+	 * Start periodic background work.  We use internal tsd which avoids
+	 * side effects, for example triggering new arena creation (which in
+	 * turn triggers another background thread creation).
+	 */
+	background_work(tsd_internal_fetch(), thread_ind);
+	assert(pthread_equal(pthread_self(),
+	    background_thread_info[thread_ind].thread));
+
+	return NULL;
+}
+
+static void
+background_thread_init(tsd_t *tsd, background_thread_info_t *info) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
+	info->started = true;
+	background_thread_info_init(tsd_tsdn(tsd), info);
+	n_background_threads++;
+}
+
+/* Create a new background thread if needed. */
+bool
+background_thread_create(tsd_t *tsd, unsigned arena_ind) {
+	assert(have_background_thread);
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
+
+	/* We create at most NCPUs threads. */
+	size_t thread_ind = arena_ind % ncpus;
+	background_thread_info_t *info = &background_thread_info[thread_ind];
+
+	bool need_new_thread;
+	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+	need_new_thread = background_thread_enabled() && !info->started;
+	if (need_new_thread) {
+		background_thread_init(tsd, info);
+	}
+	malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
+	if (!need_new_thread) {
+		return false;
+	}
+	if (arena_ind != 0) {
+		/* Threads are created asynchronously by Thread 0. */
+		background_thread_info_t *t0 = &background_thread_info[0];
+		malloc_mutex_lock(tsd_tsdn(tsd), &t0->mtx);
+		assert(t0->started);
+		pthread_cond_signal(&t0->cond);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &t0->mtx);
+
+		return false;
+	}
+
+	pre_reentrancy(tsd);
+	/*
+	 * To avoid complications (besides reentrancy), create internal
+	 * background threads with the underlying pthread_create.
+	 */
+	int err = pthread_create_wrapper(&info->thread, NULL,
+	    background_thread_entry, (void *)thread_ind);
+	post_reentrancy(tsd);
+
+	if (err != 0) {
+		malloc_printf("<jemalloc>: arena 0 background thread creation "
+		    "failed (%d)\n", err);
+		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+		info->started = false;
+		n_background_threads--;
+		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
+
+		return true;
+	}
+
+	return false;
+}
+
+bool
+background_threads_enable(tsd_t *tsd) {
+	assert(n_background_threads == 0);
+	assert(background_thread_enabled());
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
+
+	VARIABLE_ARRAY(bool, marked, ncpus);
+	unsigned i, nmarked;
+	for (i = 0; i < ncpus; i++) {
+		marked[i] = false;
+	}
+	nmarked = 0;
+	/* Mark the threads we need to create for thread 0. */
+	unsigned n = narenas_total_get();
+	for (i = 1; i < n; i++) {
+		if (marked[i % ncpus] ||
+		    arena_get(tsd_tsdn(tsd), i, false) == NULL) {
+			continue;
+		}
+		background_thread_info_t *info = &background_thread_info[i];
+		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+		assert(!info->started);
+		background_thread_init(tsd, info);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
+		marked[i % ncpus] = true;
+		if (++nmarked == ncpus) {
+			break;
+		}
+	}
+
+	return background_thread_create(tsd, 0);
+}
+
 bool
 background_threads_disable(tsd_t *tsd) {
 	assert(!background_thread_enabled());
-	for (unsigned i = 0; i < ncpus; i++) {
-		background_thread_info_t *info = &background_thread_info[i];
-		if (background_threads_disable_single(tsd, info)) {
-			return true;
-		}
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
+
+	/* Thread 0 will be responsible for terminating other threads. */
+	if (background_threads_disable_single(tsd,
+	    &background_thread_info[0])) {
+		return true;
 	}
 	assert(n_background_threads == 0);
 
@@ -460,7 +562,6 @@ background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
     arena_decay_t *decay, size_t npages_new) {
 	background_thread_info_t *info = arena_background_thread_info_get(
 	    arena);
-
 	if (malloc_mutex_trylock(tsdn, &info->mtx)) {
 		/*
 		 * Background thread may hold the mutex for a long period of
@@ -474,7 +575,6 @@ background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
 	if (!info->started) {
 		goto label_done;
 	}
-	assert(background_thread_enabled());
 	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
 		goto label_done;
 	}
@@ -646,7 +746,7 @@ bool
 background_thread_boot0(void) {
 	if (!have_background_thread && opt_background_thread) {
 		malloc_printf("<jemalloc>: option background_thread currently "
-		    "supports pthread only. \n");
+		    "supports pthread only\n");
 		return true;
 	}
 
@@ -686,9 +786,10 @@ background_thread_boot1(tsdn_t *tsdn) {
 
 	for (unsigned i = 0; i < ncpus; i++) {
 		background_thread_info_t *info = &background_thread_info[i];
+		/* Thread mutex is rank_inclusive because of thread0. */
 		if (malloc_mutex_init(&info->mtx, "background_thread",
 		    WITNESS_RANK_BACKGROUND_THREAD,
-		    malloc_mutex_rank_exclusive)) {
+		    malloc_mutex_address_ordered)) {
 			return true;
 		}
 		if (pthread_cond_init(&info->cond, NULL)) {
@@ -696,7 +797,7 @@ background_thread_boot1(tsdn_t *tsdn) {
 		}
 		malloc_mutex_lock(tsdn, &info->mtx);
 		info->started = false;
-		background_thread_info_reinit(tsdn, info);
+		background_thread_info_init(tsdn, info);
 		malloc_mutex_unlock(tsdn, &info->mtx);
 	}
 #endif
diff --git a/src/ctl.c b/src/ctl.c
index 134dbac9..242b36d4 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1948,8 +1948,10 @@ arena_reset_prepare_background_thread(tsd_t *tsd, unsigned arena_ind) {
 			unsigned ind = arena_ind % ncpus;
 			background_thread_info_t *info =
 			    &background_thread_info[ind];
-			assert(info->started);
-			background_threads_disable_single(tsd, info);
+			assert(info->started && !info->pause);
+			malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+			info->pause = true;
+			malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
 		}
 	}
 }
@@ -1961,8 +1963,10 @@ arena_reset_finish_background_thread(tsd_t *tsd, unsigned arena_ind) {
 			unsigned ind = arena_ind % ncpus;
 			background_thread_info_t *info =
 			    &background_thread_info[ind];
-			assert(!info->started);
-			background_thread_create(tsd, ind);
+			assert(info->started && info->pause);
+			malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+			info->pause = false;
+			malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
 		}
 		malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
 	}

From b83b5ad44a51a18d9b9813906d22c7e008d2b517 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 9 Jun 2017 00:00:48 -0700
Subject: [PATCH 0926/2608] Not re-enable background thread after fork.

Avoid calling pthread_create in postfork handlers.
---
 doc/jemalloc.xml.in     |  5 ++-
 src/background_thread.c | 79 +++++++++++++++++++++++------------------
 src/jemalloc.c          |  1 -
 3 files changed, 49 insertions(+), 36 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 41e80049..21e401ac 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -750,7 +750,10 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         background threads will be no more than the number of CPUs or active
         arenas).  Threads run periodically, and handle <link
         linkend="arena.i.decay">purging</link> asynchronously.  When switching
-        off, background threads are terminated synchronously.  See <link
+        off, background threads are terminated synchronously.  Note that after
+        <citerefentry><refentrytitle>fork</refentrytitle><manvolnum>2</manvolnum></citerefentry>
+        function, the state in the child process will be disabled regardless
+        the state in parent process. See <link
         linkend="stats.background_thread.num_threads"><mallctl>stats.background_thread</mallctl></link>
         for related stats.  <link
         linkend="opt.background_thread"><mallctl>opt.background_thread</mallctl></link>
diff --git a/src/background_thread.c b/src/background_thread.c
index f2bc474a..09e08b0b 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -350,24 +350,38 @@ check_background_thread_creation(tsd_t *tsd, unsigned *n_created,
 		}
 		background_thread_info_t *info = &background_thread_info[i];
 		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
-		if (info->started) {
-			pre_reentrancy(tsd);
-			int err = pthread_create_wrapper(&info->thread, NULL,
-			    background_thread_entry, (void *)(uintptr_t)i);
-			post_reentrancy(tsd);
 
-			if (err == 0) {
-				(*n_created)++;
-				created_threads[i] = true;
-			} else {
-				malloc_printf("<jemalloc>: background thread "
-				    "creation failed (%d)\n", err);
-				if (opt_abort) {
-					abort();
-				}
+		bool create = info->started;
+		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
+		if (!create) {
+			continue;
+		}
+
+		/*
+		 * To avoid deadlock with prefork handlers (which waits for the
+		 * mutex held here), unlock before calling pthread_create().
+		 */
+		malloc_mutex_unlock(tsd_tsdn(tsd),
+		    &background_thread_info[0].mtx);
+		pre_reentrancy(tsd);
+		int err = pthread_create_wrapper(&info->thread, NULL,
+		    background_thread_entry, (void *)(uintptr_t)i);
+		post_reentrancy(tsd);
+		malloc_mutex_lock(tsd_tsdn(tsd),
+		    &background_thread_info[0].mtx);
+
+		if (err == 0) {
+			(*n_created)++;
+			created_threads[i] = true;
+		} else {
+			malloc_printf("<jemalloc>: background thread "
+			    "creation failed (%d)\n", err);
+			if (opt_abort) {
+				abort();
 			}
 		}
-		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
+		/* Since we unlocked and may miss signals, restart. */
+		i = 1;
 	}
 }
 
@@ -643,14 +657,7 @@ label_done:
 void
 background_thread_prefork0(tsdn_t *tsdn) {
 	malloc_mutex_prefork(tsdn, &background_thread_lock);
-	if (background_thread_enabled()) {
-		background_thread_enabled_at_fork = true;
-		background_thread_enabled_set(tsdn, false);
-		background_threads_disable(tsdn_tsd(tsdn));
-	} else {
-		background_thread_enabled_at_fork = false;
-	}
-	assert(n_background_threads == 0);
+	background_thread_enabled_at_fork = background_thread_enabled();
 }
 
 void
@@ -660,22 +667,12 @@ background_thread_prefork1(tsdn_t *tsdn) {
 	}
 }
 
-static void
-background_thread_postfork_init(tsdn_t *tsdn) {
-	assert(n_background_threads == 0);
-	if (background_thread_enabled_at_fork) {
-		background_thread_enabled_set(tsdn, true);
-		background_threads_enable(tsdn_tsd(tsdn));
-	}
-}
-
 void
 background_thread_postfork_parent(tsdn_t *tsdn) {
 	for (unsigned i = 0; i < ncpus; i++) {
 		malloc_mutex_postfork_parent(tsdn,
 		    &background_thread_info[i].mtx);
 	}
-	background_thread_postfork_init(tsdn);
 	malloc_mutex_postfork_parent(tsdn, &background_thread_lock);
 }
 
@@ -686,9 +683,23 @@ background_thread_postfork_child(tsdn_t *tsdn) {
 		    &background_thread_info[i].mtx);
 	}
 	malloc_mutex_postfork_child(tsdn, &background_thread_lock);
+	if (!background_thread_enabled_at_fork) {
+		return;
+	}
 
+	/* Clear background_thread state (reset to disabled for child). */
 	malloc_mutex_lock(tsdn, &background_thread_lock);
-	background_thread_postfork_init(tsdn);
+	n_background_threads = 0;
+	background_thread_enabled_set(tsdn, false);
+	for (unsigned i = 0; i < ncpus; i++) {
+		background_thread_info_t *info = &background_thread_info[i];
+		malloc_mutex_lock(tsdn, &info->mtx);
+		info->started = false;
+		int ret = pthread_cond_init(&info->cond, NULL);
+		assert(ret == 0);
+		background_thread_info_init(tsdn, info);
+		malloc_mutex_unlock(tsdn, &info->mtx);
+	}
 	malloc_mutex_unlock(tsdn, &background_thread_lock);
 }
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index e2865d25..52c86aa6 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -340,7 +340,6 @@ arena_new_create_background_thread(tsdn_t *tsdn, unsigned ind) {
 	if (ind == 0) {
 		return;
 	}
-	/* background_thread_create() handles reentrancy internally. */
 	if (have_background_thread) {
 		bool err;
 		malloc_mutex_lock(tsdn, &background_thread_lock);

From 394df9519d53e1d264d6d2f5375bb2fd70e0e5ed Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 9 Jun 2017 15:45:25 -0700
Subject: [PATCH 0927/2608] Combine background_thread started / paused into
 state.

---
 .../internal/background_thread_structs.h      | 12 ++--
 src/background_thread.c                       | 71 ++++++++++++-------
 src/ctl.c                                     |  8 +--
 test/unit/background_thread.c                 |  2 +-
 4 files changed, 59 insertions(+), 34 deletions(-)

diff --git a/include/jemalloc/internal/background_thread_structs.h b/include/jemalloc/internal/background_thread_structs.h
index f6ad4adb..e69a7d02 100644
--- a/include/jemalloc/internal/background_thread_structs.h
+++ b/include/jemalloc/internal/background_thread_structs.h
@@ -9,6 +9,13 @@
 
 #define BACKGROUND_THREAD_INDEFINITE_SLEEP UINT64_MAX
 
+typedef enum {
+	background_thread_stopped,
+	background_thread_started,
+	/* Thread waits on the global lock when paused (for arena_reset). */
+	background_thread_paused,
+} background_thread_state_t;
+
 struct background_thread_info_s {
 #ifdef JEMALLOC_BACKGROUND_THREAD
 	/* Background thread is pthread specific. */
@@ -16,10 +23,7 @@ struct background_thread_info_s {
 	pthread_cond_t		cond;
 #endif
 	malloc_mutex_t		mtx;
-	/* Whether the thread has been created. */
-	bool			started;
-	/* Pause execution (for arena reset / destroy). */
-	bool			pause;
+	background_thread_state_t	state;
 	/* When true, it means no wakeup scheduled. */
 	atomic_b_t		indefinite_sleep;
 	/* Next scheduled wakeup time (absolute time in ns). */
diff --git a/src/background_thread.c b/src/background_thread.c
index 09e08b0b..a7403b85 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -67,7 +67,6 @@ static bool background_thread_enabled_at_fork;
 static void
 background_thread_info_init(tsdn_t *tsdn, background_thread_info_t *info) {
 	background_thread_wakeup_time_set(tsdn, info, 0);
-	info->pause = false;
 	info->npages_to_purge_new = 0;
 	if (config_stats) {
 		info->tot_n_runs = 0;
@@ -263,13 +262,20 @@ background_thread_sleep(tsdn_t *tsdn, background_thread_info_t *info,
 			nstime_add(&info->tot_sleep_time, &after_sleep);
 		}
 	}
-	while (info->pause) {
+}
+
+static bool
+background_thread_pause_check(tsdn_t *tsdn, background_thread_info_t *info) {
+	if (unlikely(info->state == background_thread_paused)) {
 		malloc_mutex_unlock(tsdn, &info->mtx);
 		/* Wait on global lock to update status. */
 		malloc_mutex_lock(tsdn, &background_thread_lock);
 		malloc_mutex_unlock(tsdn, &background_thread_lock);
 		malloc_mutex_lock(tsdn, &info->mtx);
+		return true;
 	}
+
+	return false;
 }
 
 static inline void
@@ -310,9 +316,10 @@ background_threads_disable_single(tsd_t *tsd, background_thread_info_t *info) {
 	pre_reentrancy(tsd);
 	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
 	bool has_thread;
-	if (info->started) {
+	assert(info->state != background_thread_paused);
+	if (info->state == background_thread_started) {
 		has_thread = true;
-		info->started = false;
+		info->state = background_thread_stopped;
 		pthread_cond_signal(&info->cond);
 	} else {
 		has_thread = false;
@@ -344,14 +351,17 @@ check_background_thread_creation(tsd_t *tsd, unsigned *n_created,
 		return;
 	}
 
+	malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_info[0].mtx);
+label_restart:
+	malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock);
 	for (unsigned i = 1; i < ncpus; i++) {
 		if (created_threads[i]) {
 			continue;
 		}
 		background_thread_info_t *info = &background_thread_info[i];
 		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
-
-		bool create = info->started;
+		assert(info->state != background_thread_paused);
+		bool create = (info->state == background_thread_started);
 		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
 		if (!create) {
 			continue;
@@ -361,14 +371,12 @@ check_background_thread_creation(tsd_t *tsd, unsigned *n_created,
 		 * To avoid deadlock with prefork handlers (which waits for the
 		 * mutex held here), unlock before calling pthread_create().
 		 */
-		malloc_mutex_unlock(tsd_tsdn(tsd),
-		    &background_thread_info[0].mtx);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
+
 		pre_reentrancy(tsd);
 		int err = pthread_create_wrapper(&info->thread, NULL,
 		    background_thread_entry, (void *)(uintptr_t)i);
 		post_reentrancy(tsd);
-		malloc_mutex_lock(tsd_tsdn(tsd),
-		    &background_thread_info[0].mtx);
 
 		if (err == 0) {
 			(*n_created)++;
@@ -380,9 +388,11 @@ check_background_thread_creation(tsd_t *tsd, unsigned *n_created,
 				abort();
 			}
 		}
-		/* Since we unlocked and may miss signals, restart. */
-		i = 1;
+		/* Restart since we unlocked. */
+		goto label_restart;
 	}
+	malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_info[0].mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
 }
 
 static void
@@ -395,7 +405,11 @@ background_thread0_work(tsd_t *tsd) {
 	}
 	/* Start working, and create more threads when asked. */
 	unsigned n_created = 1;
-	while (background_thread_info[0].started) {
+	while (background_thread_info[0].state != background_thread_stopped) {
+		if (background_thread_pause_check(tsd_tsdn(tsd),
+		    &background_thread_info[0])) {
+			continue;
+		}
 		check_background_thread_creation(tsd, &n_created,
 		    (bool *)&created_threads);
 		background_work_sleep_once(tsd_tsdn(tsd),
@@ -409,16 +423,17 @@ background_thread0_work(tsd_t *tsd) {
 	assert(!background_thread_enabled());
 	for (i = 1; i < ncpus; i++) {
 		background_thread_info_t *info = &background_thread_info[i];
+		assert(info->state != background_thread_paused);
 		if (created_threads[i]) {
 			background_threads_disable_single(tsd, info);
 		} else {
 			malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
 			/* Clear in case the thread wasn't created. */
-			info->started = false;
+			info->state = background_thread_stopped;
 			malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
 		}
 	}
-	background_thread_info[0].started = false;
+	background_thread_info[0].state = background_thread_stopped;
 	assert(n_background_threads == 1);
 }
 
@@ -432,10 +447,15 @@ background_work(tsd_t *tsd, unsigned ind) {
 	if (ind == 0) {
 		background_thread0_work(tsd);
 	} else {
-		while (info->started) {
+		while (info->state != background_thread_stopped) {
+			if (background_thread_pause_check(tsd_tsdn(tsd),
+			    info)) {
+				continue;
+			}
 			background_work_sleep_once(tsd_tsdn(tsd), info, ind);
 		}
 	}
+	assert(info->state == background_thread_stopped);
 	background_thread_wakeup_time_set(tsd_tsdn(tsd), info, 0);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
 }
@@ -463,7 +483,7 @@ background_thread_entry(void *ind_arg) {
 static void
 background_thread_init(tsd_t *tsd, background_thread_info_t *info) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
-	info->started = true;
+	info->state = background_thread_started;
 	background_thread_info_init(tsd_tsdn(tsd), info);
 	n_background_threads++;
 }
@@ -480,7 +500,8 @@ background_thread_create(tsd_t *tsd, unsigned arena_ind) {
 
 	bool need_new_thread;
 	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
-	need_new_thread = background_thread_enabled() && !info->started;
+	need_new_thread = background_thread_enabled() &&
+	    (info->state == background_thread_stopped);
 	if (need_new_thread) {
 		background_thread_init(tsd, info);
 	}
@@ -492,7 +513,7 @@ background_thread_create(tsd_t *tsd, unsigned arena_ind) {
 		/* Threads are created asynchronously by Thread 0. */
 		background_thread_info_t *t0 = &background_thread_info[0];
 		malloc_mutex_lock(tsd_tsdn(tsd), &t0->mtx);
-		assert(t0->started);
+		assert(t0->state == background_thread_started);
 		pthread_cond_signal(&t0->cond);
 		malloc_mutex_unlock(tsd_tsdn(tsd), &t0->mtx);
 
@@ -512,7 +533,7 @@ background_thread_create(tsd_t *tsd, unsigned arena_ind) {
 		malloc_printf("<jemalloc>: arena 0 background thread creation "
 		    "failed (%d)\n", err);
 		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
-		info->started = false;
+		info->state = background_thread_stopped;
 		n_background_threads--;
 		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
 
@@ -543,7 +564,7 @@ background_threads_enable(tsd_t *tsd) {
 		}
 		background_thread_info_t *info = &background_thread_info[i];
 		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
-		assert(!info->started);
+		assert(info->state == background_thread_stopped);
 		background_thread_init(tsd, info);
 		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
 		marked[i % ncpus] = true;
@@ -586,7 +607,7 @@ background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
 		return;
 	}
 
-	if (!info->started) {
+	if (info->state != background_thread_started) {
 		goto label_done;
 	}
 	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
@@ -694,7 +715,7 @@ background_thread_postfork_child(tsdn_t *tsdn) {
 	for (unsigned i = 0; i < ncpus; i++) {
 		background_thread_info_t *info = &background_thread_info[i];
 		malloc_mutex_lock(tsdn, &info->mtx);
-		info->started = false;
+		info->state = background_thread_stopped;
 		int ret = pthread_cond_init(&info->cond, NULL);
 		assert(ret == 0);
 		background_thread_info_init(tsdn, info);
@@ -718,7 +739,7 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
 	for (unsigned i = 0; i < ncpus; i++) {
 		background_thread_info_t *info = &background_thread_info[i];
 		malloc_mutex_lock(tsdn, &info->mtx);
-		if (info->started) {
+		if (info->state != background_thread_stopped) {
 			num_runs += info->tot_n_runs;
 			nstime_add(&stats->run_interval, &info->tot_sleep_time);
 		}
@@ -807,7 +828,7 @@ background_thread_boot1(tsdn_t *tsdn) {
 			return true;
 		}
 		malloc_mutex_lock(tsdn, &info->mtx);
-		info->started = false;
+		info->state = background_thread_stopped;
 		background_thread_info_init(tsdn, info);
 		malloc_mutex_unlock(tsdn, &info->mtx);
 	}
diff --git a/src/ctl.c b/src/ctl.c
index 242b36d4..b3ae4aab 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1948,9 +1948,9 @@ arena_reset_prepare_background_thread(tsd_t *tsd, unsigned arena_ind) {
 			unsigned ind = arena_ind % ncpus;
 			background_thread_info_t *info =
 			    &background_thread_info[ind];
-			assert(info->started && !info->pause);
+			assert(info->state == background_thread_started);
 			malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
-			info->pause = true;
+			info->state = background_thread_paused;
 			malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
 		}
 	}
@@ -1963,9 +1963,9 @@ arena_reset_finish_background_thread(tsd_t *tsd, unsigned arena_ind) {
 			unsigned ind = arena_ind % ncpus;
 			background_thread_info_t *info =
 			    &background_thread_info[ind];
-			assert(info->started && info->pause);
+			assert(info->state = background_thread_paused);
 			malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
-			info->pause = false;
+			info->state = background_thread_started;
 			malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
 		}
 		malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
diff --git a/test/unit/background_thread.c b/test/unit/background_thread.c
index 81f8aeed..f7bd37c4 100644
--- a/test/unit/background_thread.c
+++ b/test/unit/background_thread.c
@@ -80,7 +80,7 @@ TEST_BEGIN(test_background_thread_running) {
 
 	test_repeat_background_thread_ctl(false);
 	test_switch_background_thread_ctl(true);
-	assert_b_eq(info->started, true,
+	assert_b_eq(info->state, background_thread_started,
 	    "Background_thread did not start.\n");
 
 	nstime_t start, now;

From 813643c6a7be11f957b3a3412022435e328b6c0d Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Sun, 11 Jun 2017 11:40:59 -0700
Subject: [PATCH 0928/2608] Prevent background threads from running in
 post_reset().

We lookup freed extents for testing in post_reset.  Take background_thread lock
so that the extents are not modified at the same time.
---
 test/unit/arena_reset.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index 6409a922..f5fb24d1 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -134,17 +134,25 @@ do_arena_reset_pre(unsigned arena_ind, void ***ptrs, unsigned *nptrs) {
 }
 
 static void
-do_arena_reset_post(void **ptrs, unsigned nptrs) {
+do_arena_reset_post(void **ptrs, unsigned nptrs, unsigned arena_ind) {
 	tsdn_t *tsdn;
 	unsigned i;
 
 	tsdn = tsdn_fetch();
 
+	if (have_background_thread) {
+		malloc_mutex_lock(tsdn,
+		    &background_thread_info[arena_ind % ncpus].mtx);
+	}
 	/* Verify allocations no longer exist. */
 	for (i = 0; i < nptrs; i++) {
 		assert_zu_eq(vsalloc(tsdn, ptrs[i]), 0,
 		    "Allocation should no longer exist");
 	}
+	if (have_background_thread) {
+		malloc_mutex_unlock(tsdn,
+		    &background_thread_info[arena_ind % ncpus].mtx);
+	}
 
 	free(ptrs);
 }
@@ -180,7 +188,7 @@ TEST_BEGIN(test_arena_reset) {
 	arena_ind = do_arena_create(NULL);
 	do_arena_reset_pre(arena_ind, &ptrs, &nptrs);
 	do_arena_reset(arena_ind);
-	do_arena_reset_post(ptrs, nptrs);
+	do_arena_reset_post(ptrs, nptrs, arena_ind);
 }
 TEST_END
 
@@ -239,7 +247,7 @@ TEST_BEGIN(test_arena_destroy_hooks_default) {
 	assert_true(arena_i_initialized(MALLCTL_ARENAS_DESTROYED, false),
 	    "Destroyed arena stats should be initialized");
 
-	do_arena_reset_post(ptrs, nptrs);
+	do_arena_reset_post(ptrs, nptrs, arena_ind);
 
 	arena_ind_prev = arena_ind;
 	arena_ind = do_arena_create(NULL);
@@ -247,7 +255,7 @@ TEST_BEGIN(test_arena_destroy_hooks_default) {
 	assert_u_eq(arena_ind, arena_ind_prev,
 	    "Arena index should have been recycled");
 	do_arena_destroy(arena_ind);
-	do_arena_reset_post(ptrs, nptrs);
+	do_arena_reset_post(ptrs, nptrs, arena_ind);
 
 	do_arena_destroy(arena_ind_another);
 }
@@ -320,7 +328,7 @@ TEST_BEGIN(test_arena_destroy_hooks_unmap) {
 	assert_true(arena_i_initialized(MALLCTL_ARENAS_DESTROYED, false),
 	    "Destroyed arena stats should be initialized");
 
-	do_arena_reset_post(ptrs, nptrs);
+	do_arena_reset_post(ptrs, nptrs, arena_ind);
 
 	memcpy(&hooks, &hooks_orig, sizeof(extent_hooks_t));
 }

From bff8db439c1b3222c83554d30c1e5f774eba7b48 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 5 Jun 2017 13:34:32 -0700
Subject: [PATCH 0929/2608] Update copyright dates.

---
 COPYING | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/COPYING b/COPYING
index 104b1f8b..e308632a 100644
--- a/COPYING
+++ b/COPYING
@@ -1,10 +1,10 @@
 Unless otherwise specified, files in the jemalloc source distribution are
 subject to the following license:
 --------------------------------------------------------------------------------
-Copyright (C) 2002-2016 Jason Evans <jasone@canonware.com>.
+Copyright (C) 2002-2017 Jason Evans <jasone@canonware.com>.
 All rights reserved.
 Copyright (C) 2007-2012 Mozilla Foundation.  All rights reserved.
-Copyright (C) 2009-2016 Facebook, Inc.  All rights reserved.
+Copyright (C) 2009-2017 Facebook, Inc.  All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:

From aae8fd95fbde0af427719875810aee6cafeca539 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 9 Jun 2017 09:41:09 -0700
Subject: [PATCH 0930/2608] Update ChangeLog for 5.0.0.

---
 ChangeLog | 187 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 187 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index e630595b..98c12f20 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,193 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
+* 5.0.0 (June 13, 2017)
+
+  Unlike all previous jemalloc releases, this release does not use naturally
+  aligned "chunks" for virtual memory management, and instead uses page-aligned
+  "extents".  This change has few externally visible effects, but the internal
+  impacts are... extensive.  Many other internal changes combine to make this
+  the most cohesively designed version of jemalloc so far, with ample
+  opportunity for further enhancements.
+
+  Continuous integration is now an integral aspect of development thanks to the
+  efforts of @davidtgoldblatt, and the dev branch tends to remain reasonably
+  stable on the tested platforms (Linux, FreeBSD, macOS, and Windows).  As a
+  side effect the official release frequency may decrease over time.
+
+  New features:
+  - Implement optional per-CPU arena support; threads choose which arena to use
+    based on current CPU rather than on fixed thread-->arena associations.
+    (@interwq)
+  - Implement two-phase decay of unused dirty pages.  Pages transition from
+    dirty-->muzzy-->clean, where the first phase transition relies on
+    madvise(... MADV_FREE) semantics, and the second phase transition discards
+    pages such that they are replaced with demand-zeroed pages on next access.
+    (@jasone)
+  - Increase decay time resolution from seconds to milliseconds.  (@jasone)
+  - Implement opt-in per CPU background threads, and use them for asynchronous
+    decay-driven unused dirty page purging.  (@interwq)
+  - Add mutex profiling, which collects a variety of statistics useful for
+    diagnosing overhead/contention issues.  (@interwq)
+  - Add C++ new/delete operator bindings.  (@djwatson)
+  - Support manually created arena destruction, such that all data and metadata
+    are discarded.  Add MALLCTL_ARENAS_DESTROYED for accessing merged stats
+    associated with destroyed arenas.  (@jasone)
+  - Add MALLCTL_ARENAS_ALL as a fixed index for use in accessing
+    merged/destroyed arena statistics via mallctl.  (@jasone)
+  - Add opt.abort_conf to optionally abort if invalid configuration options are
+    detected during initialization.  (@interwq)
+  - Add opt.stats_print_opts, so that e.g. JSON output can be selected for the
+    stats dumped during exit if opt.stats_print is true.  (@jasone)
+  - Add --with-version=VERSION for use when embedding jemalloc into another
+    project's git repository.  (@jasone)
+  - Add --disable-thp to support cross compiling.  (@jasone)
+  - Add --with-lg-hugepage to support cross compiling.  (@jasone)
+  - Add mallctl interfaces (various authors):
+    + background_thread
+    + opt.abort_conf
+    + opt.retain
+    + opt.percpu_arena
+    + opt.background_thread
+    + opt.{dirty,muzzy}_decay_ms
+    + opt.stats_print_opts
+    + arena.<i>.initialized
+    + arena.<i>.destroy
+    + arena.<i>.{dirty,muzzy}_decay_ms
+    + arena.<i>.extent_hooks
+    + arenas.{dirty,muzzy}_decay_ms
+    + arenas.bin.<i>.slab_size
+    + arenas.nlextents
+    + arenas.lextent.<i>.size
+    + arenas.create
+    + stats.background_thread.{num_threads,num_runs,run_interval}
+    + stats.mutexes.{ctl,background_thread,prof,reset}.
+      {num_ops,num_spin_acq,num_wait,max_wait_time,total_wait_time,max_num_thds,
+      num_owner_switch}
+    + stats.arenas.<i>.{dirty,muzzy}_decay_ms
+    + stats.arenas.<i>.uptime
+    + stats.arenas.<i>.{pmuzzy,base,internal,resident}
+    + stats.arenas.<i>.{dirty,muzzy}_{npurge,nmadvise,purged}
+    + stats.arenas.<i>.bins.<j>.{nslabs,reslabs,curslabs}
+    + stats.arenas.<i>.bins.<j>.mutex.
+      {num_ops,num_spin_acq,num_wait,max_wait_time,total_wait_time,max_num_thds,
+      num_owner_switch}
+    + stats.arenas.<i>.lextents.<j>.{nmalloc,ndalloc,nrequests,curlextents}
+    + stats.arenas.i.mutexes.{large,extent_avail,extents_dirty,extents_muzzy,
+      extents_retained,decay_dirty,decay_muzzy,base,tcache_list}.
+      {num_ops,num_spin_acq,num_wait,max_wait_time,total_wait_time,max_num_thds,
+      num_owner_switch}
+
+  Portability improvements:
+  - Improve reentrant allocation support, such that deadlock is less likely if
+    e.g. a system library call in turn allocates memory.  (@davidtgoldblatt,
+    @interwq)
+  - Support static linking of jemalloc with glibc.  (@djwatson)
+
+  Optimizations and refactors:
+  - Organize virtual memory as "extents" of virtual memory pages, rather than as
+    naturally aligned "chunks", and store all metadata in arbitrarily distant
+    locations.  This reduces virtual memory external fragmentation, and will
+    interact better with huge pages (not yet explicitly supported).  (@jasone)
+  - Fold large and huge size classes together; only small and large size classes
+    remain.  (@jasone)
+  - Unify the allocation paths, and merge most fast-path branching decisions.
+    (@davidtgoldblatt, @interwq)
+  - Embed per thread automatic tcache into thread-specific data, which reduces
+    conditional branches and dereferences.  Also reorganize tcache to increase
+    fast-path data locality.  (@interwq)
+  - Rewrite atomics to closely model the C11 API, convert various
+    synchronization from mutex-based to atomic, and use the explicit memory
+    ordering control to resolve various hypothetical races without increasing
+    synchronization overhead.  (@davidtgoldblatt)
+  - Extensively optimize rtree via various methods:
+    + Add multiple layers of rtree lookup caching, since rtree lookups are now
+      part of fast-path deallocation.  (@interwq)
+    + Determine rtree layout at compile time.  (@jasone)
+    + Make the tree shallower for common configurations.  (@jasone)
+    + Embed the root node in the top-level rtree data structure, thus avoiding
+      one level of indirection.  (@jasone)
+    + Further specialize leaf elements as compared to internal node elements,
+      and directly embed extent metadata needed for fast-path deallocation.
+      (@jasone)
+    + Ignore leading always-zero address bits (architecture-specific).
+      (@jasone)
+  - Reorganize headers (ongoing work) to make them hermetic, and disentangle
+    various module dependencies.  (@davidtgoldblatt)
+  - Convert various internal data structures such as size class metadata from
+    boot-time-initialized to compile-time-initialized.  Propagate resulting data
+    structure simplifications, such as making arena metadata fixed-size.
+    (@jasone)
+  - Simplify size class lookups when constrained to size classes that are
+    multiples of the page size.  This speeds lookups, but the primary benefit is
+    complexity reduction in code that was the source of numerous regressions.
+    (@jasone)
+  - Lock individual extents when possible for localized extent operations,
+    rather than relying on a top-level arena lock.  (@davidtgoldblatt, @jasone)
+  - Use first fit layout policy instead of best fit, in order to improve
+    packing.  (@jasone)
+  - If munmap(2) is not in use, use an exponential series to grow each arena's
+    virtual memory, so that the number of disjoint virtual memory mappings
+    remains low.  (@jasone)
+  - Implement per arena base allocators, so that arenas never share any virtual
+    memory pages.  (@jasone)
+  - Automatically generate private symbol name mangling macros.  (@jasone)
+
+  Incompatible changes:
+  - Replace chunk hooks with an expanded/normalized set of extent hooks.
+    (@jasone)
+  - Remove ratio-based purging.  (@jasone)
+  - Remove --disable-tcache.  (@jasone)
+  - Remove --disable-tls.  (@jasone)
+  - Remove --enable-ivsalloc.  (@jasone)
+  - Remove --with-lg-size-class-group.  (@jasone)
+  - Remove --with-lg-tiny-min.  (@jasone)
+  - Remove --disable-cc-silence.  (@jasone)
+  - Remove --enable-code-coverage.  (@jasone)
+  - Remove --disable-munmap (replaced by opt.retain).  (@jasone)
+  - Remove Valgrind support.  (@jasone)
+  - Remove quarantine support.  (@jasone)
+  - Remove redzone support.  (@jasone)
+  - Remove mallctl interfaces (various authors):
+    + config.munmap
+    + config.tcache
+    + config.tls
+    + config.valgrind
+    + opt.lg_chunk
+    + opt.purge
+    + opt.lg_dirty_mult
+    + opt.decay_time
+    + opt.quarantine
+    + opt.redzone
+    + opt.thp
+    + arena.<i>.lg_dirty_mult
+    + arena.<i>.decay_time
+    + arena.<i>.chunk_hooks
+    + arenas.initialized
+    + arenas.lg_dirty_mult
+    + arenas.decay_time
+    + arenas.bin.<i>.run_size
+    + arenas.nlruns
+    + arenas.lrun.<i>.size
+    + arenas.nhchunks
+    + arenas.hchunk.<i>.size
+    + arenas.extend
+    + stats.cactive
+    + stats.arenas.<i>.lg_dirty_mult
+    + stats.arenas.<i>.decay_time
+    + stats.arenas.<i>.metadata.{mapped,allocated}
+    + stats.arenas.<i>.{npurge,nmadvise,purged}
+    + stats.arenas.<i>.huge.{allocated,nmalloc,ndalloc,nrequests}
+    + stats.arenas.<i>.bins.<j>.{nruns,reruns,curruns}
+    + stats.arenas.<i>.lruns.<j>.{nmalloc,ndalloc,nrequests,curruns}
+    + stats.arenas.<i>.hchunks.<j>.{nmalloc,ndalloc,nrequests,curhchunks}
+
+  Bug fixes:
+  - Improve interval-based profile dump triggering to dump only one profile when
+    a single allocation's size exceeds the interval.  (@jasone)
+  - Use prefixed function names (as controlled by --with-jemalloc-prefix) when
+    pruning backtrace frames in jeprof.  (@jasone)
+
 * 4.5.0 (February 28, 2017)
 
   This is the first release to benefit from much broader continuous integration

From ba29113e5a58caeb6b4a65b1db6d8efae79cae45 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 13 Jun 2017 11:01:24 -0700
Subject: [PATCH 0931/2608] Update MSVC project files.

---
 .../projects/vc2015/jemalloc/jemalloc.vcxproj | 11 ++++++----
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  | 21 +++++++++++++------
 2 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 832ff69d..2addd295 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -36,6 +36,7 @@
   </ItemGroup>
   <ItemGroup>
     <ClCompile Include="..\..\..\..\src\arena.c" />
+    <ClCompile Include="..\..\..\..\src\background_thread.c" />
     <ClCompile Include="..\..\..\..\src\base.c" />
     <ClCompile Include="..\..\..\..\src\bitmap.c" />
     <ClCompile Include="..\..\..\..\src\ckh.c" />
@@ -49,6 +50,7 @@
     <ClCompile Include="..\..\..\..\src\large.c" />
     <ClCompile Include="..\..\..\..\src\malloc_io.c" />
     <ClCompile Include="..\..\..\..\src\mutex.c" />
+    <ClCompile Include="..\..\..\..\src\mutex_pool.c" />
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\prng.c" />
@@ -56,6 +58,7 @@
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\spin.c" />
     <ClCompile Include="..\..\..\..\src\stats.c" />
+    <ClCompile Include="..\..\..\..\src\sz.c" />
     <ClCompile Include="..\..\..\..\src\tcache.c" />
     <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
@@ -227,7 +230,7 @@
       </PrecompiledHeader>
       <WarningLevel>Level3</WarningLevel>
       <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
       <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
@@ -243,7 +246,7 @@
       </PrecompiledHeader>
       <WarningLevel>Level3</WarningLevel>
       <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
       <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
@@ -305,7 +308,7 @@
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <IntrinsicFunctions>true</IntrinsicFunctions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
-      <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
       <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
     </ClCompile>
@@ -324,7 +327,7 @@
       <Optimization>MaxSpeed</Optimization>
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>_REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
       <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 9d4a7c7d..4edf09b4 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -10,6 +10,9 @@
     <ClCompile Include="..\..\..\..\src\arena.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\background_thread.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\base.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -34,15 +37,24 @@
     <ClCompile Include="..\..\..\..\src\hash.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hooks.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\jemalloc.c">
       <Filter>Source Files</Filter>
     </ClCompile>
     <ClCompile Include="..\..\..\..\src\large.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\malloc_io.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\mutex.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\mutex_pool.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\nstime.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -64,6 +76,9 @@
     <ClCompile Include="..\..\..\..\src\stats.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\sz.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\tcache.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -76,11 +91,5 @@
     <ClCompile Include="..\..\..\..\src\witness.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\hooks.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\..\src\malloc_io.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
   </ItemGroup>
 </Project>
\ No newline at end of file

From d955d6f2be7f17ba1f9a81f457e72565474cf18d Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 13 Jun 2017 16:16:33 -0700
Subject: [PATCH 0932/2608] Fix extent_hooks in extent_grow_retained().

This issue caused the default extent alloc function to be incorrectly
used even when arena.<i>.extent_hooks is set.  This bug was introduced
by 411697adcda2fd75e135cdcdafb95f2bd295dc7f (Use exponential series to
size extents.), which was first released in 5.0.0.
---
 src/extent.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index 386a7ce6..f31ed32e 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1066,9 +1066,18 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	}
 	bool zeroed = false;
 	bool committed = false;
-	void *ptr = extent_alloc_core(tsdn, arena, NULL, alloc_size, PAGE,
-	    &zeroed, &committed, (dss_prec_t)atomic_load_u(&arena->dss_prec,
-	    ATOMIC_RELAXED));
+
+	void *ptr;
+	if (*r_extent_hooks == &extent_hooks_default) {
+		ptr = extent_alloc_core(tsdn, arena, NULL, alloc_size, PAGE,
+		    &zeroed, &committed, (dss_prec_t)atomic_load_u(
+		    &arena->dss_prec, ATOMIC_RELAXED));
+	} else {
+		ptr = (*r_extent_hooks)->alloc(*r_extent_hooks, NULL,
+		    alloc_size, PAGE, &zeroed, &committed,
+		    arena_ind_get(arena));
+	}
+
 	extent_init(extent, arena, ptr, alloc_size, false, NSIZES,
 	    arena_extent_sn_next(arena), extent_state_active, zeroed,
 	    committed);

From bdcf40a6208962008010c30463dc7dbddf3fc564 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 13 Jun 2017 16:35:35 -0700
Subject: [PATCH 0933/2608] Add alloc hook test in test/integration/extent.

---
 test/integration/extent.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/test/integration/extent.c b/test/integration/extent.c
index 7262b803..1dcf2176 100644
--- a/test/integration/extent.c
+++ b/test/integration/extent.c
@@ -39,10 +39,13 @@ test_extent_body(unsigned arena_ind) {
 	assert_d_eq(mallctlnametomib("arena.0.purge", purge_mib, &purge_miblen),
 	    0, "Unexpected mallctlnametomib() failure");
 	purge_mib[1] = (size_t)arena_ind;
+	called_alloc = false;
+	try_alloc = true;
 	try_dalloc = false;
 	try_decommit = false;
 	p = mallocx(large0 * 2, flags);
 	assert_ptr_not_null(p, "Unexpected mallocx() error");
+	assert_true(called_alloc, "Expected alloc call");
 	called_dalloc = false;
 	called_decommit = false;
 	did_purge_lazy = false;

From a4d6fe73cf07b3be3af6b7811cfc5950320bb37f Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 14 Jun 2017 12:12:23 -0700
Subject: [PATCH 0934/2608] Only abort on dlsym when necessary.

If neither background_thread nor lazy_lock is in use, do not abort on dlsym
errors.
---
 .../jemalloc/internal/background_thread_externs.h  |  1 +
 src/background_thread.c                            | 14 +++++++++++---
 src/ctl.c                                          |  7 +++++++
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/background_thread_externs.h b/include/jemalloc/internal/background_thread_externs.h
index 7c883697..8b4b8471 100644
--- a/include/jemalloc/internal/background_thread_externs.h
+++ b/include/jemalloc/internal/background_thread_externs.h
@@ -6,6 +6,7 @@ extern malloc_mutex_t background_thread_lock;
 extern atomic_b_t background_thread_enabled_state;
 extern size_t n_background_threads;
 extern background_thread_info_t *background_thread_info;
+extern bool can_enable_background_thread;
 
 bool background_thread_create(tsd_t *tsd, unsigned arena_ind);
 bool background_threads_enable(tsd_t *tsd);
diff --git a/src/background_thread.c b/src/background_thread.c
index a7403b85..1ff59447 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -20,6 +20,9 @@ size_t n_background_threads;
 /* Thread info per-index. */
 background_thread_info_t *background_thread_info;
 
+/* False if no necessary runtime support. */
+bool can_enable_background_thread;
+
 /******************************************************************************/
 
 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
@@ -785,9 +788,14 @@ background_thread_boot0(void) {
 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
 	pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
 	if (pthread_create_fptr == NULL) {
-		malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
-		    "\"pthread_create\")\n");
-		abort();
+		can_enable_background_thread = false;
+		if (config_lazy_lock || opt_background_thread) {
+			malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
+			    "\"pthread_create\")\n");
+			abort();
+		}
+	} else {
+		can_enable_background_thread = true;
 	}
 #endif
 	return false;
diff --git a/src/ctl.c b/src/ctl.c
index b3ae4aab..f1310cdf 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1522,6 +1522,13 @@ background_thread_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 
 		background_thread_enabled_set(tsd_tsdn(tsd), newval);
 		if (newval) {
+			if (!can_enable_background_thread) {
+				malloc_printf("<jemalloc>: Error in dlsym("
+			            "RTLD_NEXT, \"pthread_create\"). Cannot "
+				    "enable background_thread\n");
+				ret = EFAULT;
+				goto label_return;
+			}
 			if (background_threads_enable(tsd)) {
 				ret = EFAULT;
 				goto label_return;

From 84f6c2cae0fb1399377ef6aea9368444c4987cc6 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 14 Jun 2017 18:44:13 -0700
Subject: [PATCH 0935/2608] Log decay->nunpurged before purging.

During purging, we may unlock decay->mtx.  Therefore we should finish logging
decay related counters before attempt to purge.
---
 src/arena.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 0912df31..019dd877 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -725,12 +725,13 @@ arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	arena_decay_epoch_advance_helper(decay, time, current_npages);
 
 	size_t npages_limit = arena_decay_backlog_npages_limit(decay);
+	/* We may unlock decay->mtx when try_purge(). Finish logging first. */
+	decay->nunpurged = (npages_limit > current_npages) ? npages_limit :
+	    current_npages;
 	if (purge) {
 		arena_decay_try_purge(tsdn, arena, decay, extents,
 		    current_npages, npages_limit);
 	}
-	decay->nunpurged = (npages_limit > current_npages) ? npages_limit :
-	    current_npages;
 }
 
 static void

From ae93fb08e21284f025871e9f5daccf3d0329b99b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 15 Jun 2017 15:16:18 -0700
Subject: [PATCH 0936/2608] Pass tsd to tcache_flush().

---
 include/jemalloc/internal/tcache_externs.h | 2 +-
 src/ctl.c                                  | 2 +-
 src/tcache.c                               | 3 +--
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index abe133fa..db3e9c7d 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -48,7 +48,7 @@ void tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
 void tcache_prefork(tsdn_t *tsdn);
 void tcache_postfork_parent(tsdn_t *tsdn);
 void tcache_postfork_child(tsdn_t *tsdn);
-void tcache_flush(void);
+void tcache_flush(tsd_t *tsd);
 bool tsd_tcache_data_init(tsd_t *tsd);
 bool tsd_tcache_enabled_data_init(tsd_t *tsd);
 
diff --git a/src/ctl.c b/src/ctl.c
index f1310cdf..dfbeddda 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1696,7 +1696,7 @@ thread_tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	READONLY();
 	WRITEONLY();
 
-	tcache_flush();
+	tcache_flush(tsd);
 
 	ret = 0;
 label_return:
diff --git a/src/tcache.c b/src/tcache.c
index 6355805b..936ef314 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -474,8 +474,7 @@ tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
 }
 
 void
-tcache_flush(void) {
-	tsd_t *tsd = tsd_fetch();
+tcache_flush(tsd_t *tsd) {
 	assert(tcache_available(tsd));
 	tcache_flush_cache(tsd, tsd_tcachep_get(tsd));
 }

From 9b1befabbb7a7105501d27843873d14e1c2de54b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 15 Jun 2017 16:53:22 -0700
Subject: [PATCH 0937/2608] Add minimal initialized TSD.

We use the minimal_initilized tsd (which requires no cleanup) for free()
specifically, if tsd hasn't been initialized yet.

Any other activity will transit the state from minimal to normal.  This is to
workaround the case where a thread has no malloc calls in its lifetime until
during thread termination, free() happens after tls destructors.
---
 include/jemalloc/internal/tsd.h | 30 ++++++++++++++++------
 src/jemalloc.c                  | 10 +++++++-
 src/tsd.c                       | 44 ++++++++++++++++++++++-----------
 3 files changed, 60 insertions(+), 24 deletions(-)

diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 631fbf1f..155a2ec6 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -99,9 +99,10 @@ enum {
 	tsd_state_nominal_slow = 1, /* Initialized but on slow path. */
 	/* the above 2 nominal states should be lower values. */
 	tsd_state_nominal_max = 1, /* used for comparison only. */
-	tsd_state_purgatory = 2,
-	tsd_state_reincarnated = 3,
-	tsd_state_uninitialized = 4
+	tsd_state_minimal_initialized = 2,
+	tsd_state_purgatory = 3,
+	tsd_state_reincarnated = 4,
+	tsd_state_uninitialized = 5
 };
 
 /* Manually limit tsd_state_t to a single byte. */
@@ -190,7 +191,8 @@ JEMALLOC_ALWAYS_INLINE t *						\
 tsd_##n##p_get(tsd_t *tsd) {						\
 	assert(tsd->state == tsd_state_nominal ||			\
 	    tsd->state == tsd_state_nominal_slow ||			\
-	    tsd->state == tsd_state_reincarnated);			\
+	    tsd->state == tsd_state_reincarnated ||			\
+	    tsd->state == tsd_state_minimal_initialized);		\
 	return tsd_##n##p_get_unsafe(tsd);				\
 }
 MALLOC_TSD
@@ -225,7 +227,8 @@ MALLOC_TSD
 #define O(n, t, nt)							\
 JEMALLOC_ALWAYS_INLINE void						\
 tsd_##n##_set(tsd_t *tsd, t val) {					\
-	assert(tsd->state != tsd_state_reincarnated);			\
+	assert(tsd->state != tsd_state_reincarnated &&			\
+	    tsd->state != tsd_state_minimal_initialized);		\
 	*tsd_##n##p_get(tsd) = val;					\
 }
 MALLOC_TSD
@@ -248,7 +251,7 @@ tsd_fast(tsd_t *tsd) {
 }
 
 JEMALLOC_ALWAYS_INLINE tsd_t *
-tsd_fetch_impl(bool init, bool internal) {
+tsd_fetch_impl(bool init, bool minimal) {
 	tsd_t *tsd = tsd_get(init);
 
 	if (!init && tsd_get_allocates() && tsd == NULL) {
@@ -257,7 +260,7 @@ tsd_fetch_impl(bool init, bool internal) {
 	assert(tsd != NULL);
 
 	if (unlikely(tsd->state != tsd_state_nominal)) {
-		return tsd_fetch_slow(tsd, internal);
+		return tsd_fetch_slow(tsd, minimal);
 	}
 	assert(tsd_fast(tsd));
 	tsd_assert_fast(tsd);
@@ -265,9 +268,20 @@ tsd_fetch_impl(bool init, bool internal) {
 	return tsd;
 }
 
+/* Get a minimal TSD that requires no cleanup.  See comments in free(). */
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsd_fetch_min(void) {
+	return tsd_fetch_impl(true, true);
+}
+
+/* For internal background threads use only. */
 JEMALLOC_ALWAYS_INLINE tsd_t *
 tsd_internal_fetch(void) {
-	return tsd_fetch_impl(true, true);
+	tsd_t *tsd = tsd_fetch_min();
+	/* Use reincarnated state to prevent full initialization. */
+	tsd->state = tsd_state_reincarnated;
+
+	return tsd;
 }
 
 JEMALLOC_ALWAYS_INLINE tsd_t *
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 52c86aa6..c773cc44 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2264,7 +2264,15 @@ JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_free(void *ptr) {
 	UTRACE(ptr, 0, 0);
 	if (likely(ptr != NULL)) {
-		tsd_t *tsd = tsd_fetch();
+		/*
+		 * We avoid setting up tsd fully (e.g. tcache, arena binding)
+		 * based on only free() calls -- other activities trigger the
+		 * minimal to full transition.  This is because free() may
+		 * happen during thread shutdown after tls deallocation: if a
+		 * thread never had any malloc activities until then, a
+		 * fully-setup tsd won't be destructed properly.
+		 */
+		tsd_t *tsd = tsd_fetch_min();
 		check_entry_exit_locking(tsd_tsdn(tsd));
 
 		tcache_t *tcache;
diff --git a/src/tsd.c b/src/tsd.c
index 97330332..f968992f 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -87,7 +87,8 @@ assert_tsd_data_cleanup_done(tsd_t *tsd) {
 
 static bool
 tsd_data_init_nocleanup(tsd_t *tsd) {
-	assert(tsd->state == tsd_state_reincarnated);
+	assert(tsd->state == tsd_state_reincarnated ||
+	    tsd->state == tsd_state_minimal_initialized);
 	/*
 	 * During reincarnation, there is no guarantee that the cleanup function
 	 * will be called (deallocation may happen after all tsd destructors).
@@ -103,15 +104,8 @@ tsd_data_init_nocleanup(tsd_t *tsd) {
 }
 
 tsd_t *
-tsd_fetch_slow(tsd_t *tsd, bool internal) {
-	if (internal) {
-		/* For internal background threads use only. */
-		assert(tsd->state == tsd_state_uninitialized);
-		tsd->state = tsd_state_reincarnated;
-		tsd_set(tsd);
-		tsd_data_init_nocleanup(tsd);
-		return tsd;
-	}
+tsd_fetch_slow(tsd_t *tsd, bool minimal) {
+	assert(!tsd_fast(tsd));
 
 	if (tsd->state == tsd_state_nominal_slow) {
 		/* On slow path but no work needed. */
@@ -119,11 +113,28 @@ tsd_fetch_slow(tsd_t *tsd, bool internal) {
 		    tsd_reentrancy_level_get(tsd) > 0 ||
 		    *tsd_arenas_tdata_bypassp_get(tsd));
 	} else if (tsd->state == tsd_state_uninitialized) {
-		tsd->state = tsd_state_nominal;
-		tsd_slow_update(tsd);
-		/* Trigger cleanup handler registration. */
-		tsd_set(tsd);
-		tsd_data_init(tsd);
+		if (!minimal) {
+			tsd->state = tsd_state_nominal;
+			tsd_slow_update(tsd);
+			/* Trigger cleanup handler registration. */
+			tsd_set(tsd);
+			tsd_data_init(tsd);
+		} else {
+			tsd->state = tsd_state_minimal_initialized;
+			tsd_set(tsd);
+			tsd_data_init_nocleanup(tsd);
+		}
+	} else if (tsd->state == tsd_state_minimal_initialized) {
+		if (!minimal) {
+			/* Switch to fully initialized. */
+			tsd->state = tsd_state_nominal;
+			assert(*tsd_reentrancy_levelp_get(tsd) >= 1);
+			(*tsd_reentrancy_levelp_get(tsd))--;
+			tsd_slow_update(tsd);
+			tsd_data_init(tsd);
+		} else {
+			assert_tsd_data_cleanup_done(tsd);
+		}
 	} else if (tsd->state == tsd_state_purgatory) {
 		tsd->state = tsd_state_reincarnated;
 		tsd_set(tsd);
@@ -197,6 +208,9 @@ tsd_cleanup(void *arg) {
 	case tsd_state_uninitialized:
 		/* Do nothing. */
 		break;
+	case tsd_state_minimal_initialized:
+		/* This implies the thread only did free() in its life time. */
+		/* Fall through. */
 	case tsd_state_reincarnated:
 		/*
 		 * Reincarnated means another destructor deallocated memory

From d35c037e03e1450794dcf595e49a1e1f97f87ac4 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 19 Jun 2017 21:19:15 -0700
Subject: [PATCH 0938/2608] Clear tcache_ql after fork in child.

---
 src/arena.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/src/arena.c b/src/arena.c
index 019dd877..d401808b 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2133,6 +2133,23 @@ void
 arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	unsigned i;
 
+	atomic_store_u(&arena->nthreads[0], 0, ATOMIC_RELAXED);
+	atomic_store_u(&arena->nthreads[1], 0, ATOMIC_RELAXED);
+	if (tsd_arena_get(tsdn_tsd(tsdn)) == arena) {
+		arena_nthreads_inc(arena, false);
+	}
+	if (tsd_iarena_get(tsdn_tsd(tsdn)) == arena) {
+		arena_nthreads_inc(arena, true);
+	}
+	if (config_stats) {
+		ql_new(&arena->tcache_ql);
+		tcache_t *tcache = tcache_get(tsdn_tsd(tsdn));
+		if (tcache != NULL && tcache->arena == arena) {
+			ql_elm_new(tcache, link);
+			ql_tail_insert(&arena->tcache_ql, tcache, link);
+		}
+	}
+
 	for (i = 0; i < NBINS; i++) {
 		malloc_mutex_postfork_child(tsdn, &arena->bins[i].lock);
 	}

From 37f3fa0941022d047fdcd86959d5f94f872e45e2 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 19 Jun 2017 20:35:33 -0700
Subject: [PATCH 0939/2608] Mask signals during background thread creation.

This prevents signals from being inadvertently delivered to background
threads.
---
 .../internal/jemalloc_internal_decls.h        |  1 +
 src/background_thread.c                       | 38 +++++++++++++++++--
 2 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
index 1efdb56b..8ae5ef48 100644
--- a/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -22,6 +22,7 @@
 #    include <sys/uio.h>
 #  endif
 #  include <pthread.h>
+#  include <signal.h>
 #  ifdef JEMALLOC_OS_UNFAIR_LOCK
 #    include <os/lock.h>
 #  endif
diff --git a/src/background_thread.c b/src/background_thread.c
index 1ff59447..f0aa04f3 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -347,6 +347,38 @@ background_threads_disable_single(tsd_t *tsd, background_thread_info_t *info) {
 
 static void *background_thread_entry(void *ind_arg);
 
+static int
+background_thread_create_signals_masked(pthread_t *thread,
+    const pthread_attr_t *attr, void *(*start_routine)(void *), void *arg) {
+	/*
+	 * Mask signals during thread creation so that the thread inherits
+	 * an empty signal set.
+	 */
+	sigset_t set;
+	sigemptyset(&set);
+	sigset_t oldset;
+	int mask_err = pthread_sigmask(SIG_SETMASK, &set, &oldset);
+	if (mask_err != 0) {
+		return mask_err;
+	}
+	int create_err = pthread_create_wrapper(thread, attr, start_routine,
+	    arg);
+	/*
+	 * Restore the signal mask.  Failure to restore the signal mask here
+	 * changes program behavior.
+	 */
+	int restore_err = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
+	if (restore_err != 0) {
+		malloc_printf("<jemalloc>: background thread creation "
+		    "failed (%d), and signal mask restoration failed "
+		    "(%d)\n", create_err, restore_err);
+		if (opt_abort) {
+			abort();
+		}
+	}
+	return create_err;
+}
+
 static void
 check_background_thread_creation(tsd_t *tsd, unsigned *n_created,
     bool *created_threads) {
@@ -377,8 +409,8 @@ label_restart:
 		malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
 
 		pre_reentrancy(tsd);
-		int err = pthread_create_wrapper(&info->thread, NULL,
-		    background_thread_entry, (void *)(uintptr_t)i);
+		int err = background_thread_create_signals_masked(&info->thread,
+		    NULL, background_thread_entry, (void *)(uintptr_t)i);
 		post_reentrancy(tsd);
 
 		if (err == 0) {
@@ -528,7 +560,7 @@ background_thread_create(tsd_t *tsd, unsigned arena_ind) {
 	 * To avoid complications (besides reentrancy), create internal
 	 * background threads with the underlying pthread_create.
 	 */
-	int err = pthread_create_wrapper(&info->thread, NULL,
+	int err = background_thread_create_signals_masked(&info->thread, NULL,
 	    background_thread_entry, (void *)thread_ind);
 	post_reentrancy(tsd);
 

From 52fc887b49c768a9cee61d9bda9c885efb10fe95 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 22 Jun 2017 13:57:50 -0700
Subject: [PATCH 0940/2608] Avoid inactivity_check within background threads.

Passing is_background_thread down the decay path, so that background thread
itself won't attempt inactivity_check.  This fixes an issue with background
thread doing trylock on a mutex it already owns.
---
 .../internal/background_thread_inlines.h      |  5 ++-
 src/arena.c                                   | 39 +++++++++++--------
 2 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/include/jemalloc/internal/background_thread_inlines.h b/include/jemalloc/internal/background_thread_inlines.h
index fd5095f2..ef50231e 100644
--- a/include/jemalloc/internal/background_thread_inlines.h
+++ b/include/jemalloc/internal/background_thread_inlines.h
@@ -41,8 +41,9 @@ background_thread_indefinite_sleep(background_thread_info_t *info) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_background_thread_inactivity_check(tsdn_t *tsdn, arena_t *arena) {
-	if (!background_thread_enabled()) {
+arena_background_thread_inactivity_check(tsdn_t *tsdn, arena_t *arena,
+    bool is_background_thread) {
+	if (!background_thread_enabled() || is_background_thread) {
 		return;
 	}
 	background_thread_info_t *info =
diff --git a/src/arena.c b/src/arena.c
index d401808b..f9b0f685 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -61,7 +61,8 @@ const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
  */
 
 static void arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena,
-    arena_decay_t *decay, extents_t *extents, bool all, size_t npages_limit);
+    arena_decay_t *decay, extents_t *extents, bool all, size_t npages_limit,
+    bool is_background_thread);
 static bool arena_decay_dirty(tsdn_t *tsdn, arena_t *arena,
     bool is_background_thread, bool all);
 static void arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
@@ -378,7 +379,7 @@ arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
 	if (arena_dirty_decay_ms_get(arena) == 0) {
 		arena_decay_dirty(tsdn, arena, false, true);
 	} else {
-		arena_background_thread_inactivity_check(tsdn, arena);
+		arena_background_thread_inactivity_check(tsdn, arena, false);
 	}
 }
 
@@ -687,10 +688,11 @@ arena_decay_backlog_update(arena_decay_t *decay, uint64_t nadvance_u64,
 
 static void
 arena_decay_try_purge(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    extents_t *extents, size_t current_npages, size_t npages_limit) {
+    extents_t *extents, size_t current_npages, size_t npages_limit,
+    bool is_background_thread) {
 	if (current_npages > npages_limit) {
 		arena_decay_to_limit(tsdn, arena, decay, extents, false,
-		    npages_limit);
+		    npages_limit, is_background_thread);
 	}
 }
 
@@ -720,7 +722,7 @@ arena_decay_epoch_advance_helper(arena_decay_t *decay, const nstime_t *time,
 
 static void
 arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    extents_t *extents, const nstime_t *time, bool purge) {
+    extents_t *extents, const nstime_t *time, bool is_background_thread) {
 	size_t current_npages = extents_npages_get(extents);
 	arena_decay_epoch_advance_helper(decay, time, current_npages);
 
@@ -728,9 +730,10 @@ arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	/* We may unlock decay->mtx when try_purge(). Finish logging first. */
 	decay->nunpurged = (npages_limit > current_npages) ? npages_limit :
 	    current_npages;
-	if (purge) {
+
+	if (!background_thread_enabled() || is_background_thread) {
 		arena_decay_try_purge(tsdn, arena, decay, extents,
-		    current_npages, npages_limit);
+		    current_npages, npages_limit, is_background_thread);
 	}
 }
 
@@ -795,7 +798,7 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	if (decay_ms <= 0) {
 		if (decay_ms == 0) {
 			arena_decay_to_limit(tsdn, arena, decay, extents, false,
-			    0);
+			    0, is_background_thread);
 		}
 		return false;
 	}
@@ -830,14 +833,13 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	 */
 	bool advance_epoch = arena_decay_deadline_reached(decay, &time);
 	if (advance_epoch) {
-		bool should_purge = is_background_thread ||
-		    !background_thread_enabled();
 		arena_decay_epoch_advance(tsdn, arena, decay, extents, &time,
-		    should_purge);
+		    is_background_thread);
 	} else if (is_background_thread) {
 		arena_decay_try_purge(tsdn, arena, decay, extents,
 		    extents_npages_get(extents),
-		    arena_decay_backlog_npages_limit(decay));
+		    arena_decay_backlog_npages_limit(decay),
+		    is_background_thread);
 	}
 
 	return advance_epoch;
@@ -916,7 +918,7 @@ arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
 static size_t
 arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, arena_decay_t *decay, extents_t *extents,
-    bool all, extent_list_t *decay_extents) {
+    bool all, extent_list_t *decay_extents, bool is_background_thread) {
 	UNUSED size_t nmadvise, nunmapped;
 	size_t npurged;
 
@@ -946,7 +948,7 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
 				extents_dalloc(tsdn, arena, r_extent_hooks,
 				    &arena->extents_muzzy, extent);
 				arena_background_thread_inactivity_check(tsdn,
-				    arena);
+				    arena, is_background_thread);
 				break;
 			}
 			/* Fall through. */
@@ -985,7 +987,8 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
  */
 static void
 arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    extents_t *extents, bool all, size_t npages_limit) {
+    extents_t *extents, bool all, size_t npages_limit,
+    bool is_background_thread) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 1);
 	malloc_mutex_assert_owner(tsdn, &decay->mtx);
@@ -1005,7 +1008,8 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	    npages_limit, &decay_extents);
 	if (npurge != 0) {
 		UNUSED size_t npurged = arena_decay_stashed(tsdn, arena,
-		    &extent_hooks, decay, extents, all, &decay_extents);
+		    &extent_hooks, decay, extents, all, &decay_extents,
+		    is_background_thread);
 		assert(npurged == npurge);
 	}
 
@@ -1018,7 +1022,8 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
     extents_t *extents, bool is_background_thread, bool all) {
 	if (all) {
 		malloc_mutex_lock(tsdn, &decay->mtx);
-		arena_decay_to_limit(tsdn, arena, decay, extents, all, 0);
+		arena_decay_to_limit(tsdn, arena, decay, extents, all, 0,
+		    is_background_thread);
 		malloc_mutex_unlock(tsdn, &decay->mtx);
 
 		return false;

From a3f4977217af417b547e34cec3f5bd16874b8aa9 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 23 Jun 2017 09:58:35 -0700
Subject: [PATCH 0941/2608] Add thread name for background threads.

---
 configure.ac                                          | 9 +++++++++
 include/jemalloc/internal/jemalloc_internal_defs.h.in | 3 +++
 src/background_thread.c                               | 4 +++-
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 32ae02c2..770fff56 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1470,6 +1470,15 @@ if test "x$abi" != "xpecoff" ; then
   if test "x${je_cv_pthread_atfork}" = "xyes" ; then
     AC_DEFINE([JEMALLOC_HAVE_PTHREAD_ATFORK], [ ])
   fi
+  dnl Check if pthread_setname_np is available with the expected API.
+  JE_COMPILABLE([pthread_setname_np(3)], [
+#include <pthread.h>
+], [
+  pthread_setname_np(pthread_self(), "setname_test");
+], [je_cv_pthread_setname_np])
+  if test "x${je_cv_pthread_setname_np}" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_HAVE_PTHREAD_SETNAME_NP], [ ])
+  fi
 fi
 
 JE_APPEND_VS(CPPFLAGS, -D_REENTRANT)
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 2bf9dea1..c0f834f2 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -98,6 +98,9 @@
 /* Defined if pthread_atfork(3) is available. */
 #undef JEMALLOC_HAVE_PTHREAD_ATFORK
 
+/* Defined if pthread_setname_np(3) is available. */
+#undef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
+
 /*
  * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available.
  */
diff --git a/src/background_thread.c b/src/background_thread.c
index f0aa04f3..ab076fee 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -499,7 +499,9 @@ static void *
 background_thread_entry(void *ind_arg) {
 	unsigned thread_ind = (unsigned)(uintptr_t)ind_arg;
 	assert(thread_ind < ncpus);
-
+#ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
+	pthread_setname_np(pthread_self(), "jemalloc_bg_thd");
+#endif
 	if (opt_percpu_arena != percpu_arena_disabled) {
 		set_current_thread_affinity((int)thread_ind);
 	}

From d49ac4c7096b79539ce84fa1bfe122bc9e3f1b43 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 23 Jun 2017 10:40:02 -0700
Subject: [PATCH 0942/2608] Fix assertion typos.

Reported by Conrad Meyer.
---
 src/ctl.c      | 2 +-
 src/jemalloc.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index dfbeddda..a647c6c0 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1970,7 +1970,7 @@ arena_reset_finish_background_thread(tsd_t *tsd, unsigned arena_ind) {
 			unsigned ind = arena_ind % ncpus;
 			background_thread_info_t *info =
 			    &background_thread_info[ind];
-			assert(info->state = background_thread_paused);
+			assert(info->state == background_thread_paused);
 			malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
 			info->state = background_thread_started;
 			malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
diff --git a/src/jemalloc.c b/src/jemalloc.c
index c773cc44..aa79633c 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1799,7 +1799,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		 */
 		assert(dopts->tcache_ind == TCACHE_IND_AUTOMATIC ||
 		    dopts->tcache_ind == TCACHE_IND_NONE);
-		assert(dopts->arena_ind = ARENA_IND_AUTOMATIC);
+		assert(dopts->arena_ind == ARENA_IND_AUTOMATIC);
 		dopts->tcache_ind = TCACHE_IND_NONE;
 		/* We know that arena 0 has already been initialized. */
 		dopts->arena_ind = 0;

From d6eb8ac8f30745b06744ad5cb2988a392c4448cd Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 22 Jun 2017 15:36:41 -0700
Subject: [PATCH 0943/2608] Set reentrancy when invoking customized extent
 hooks.

Customized extent hooks may malloc / free thus trigger reentry.  Support this
behavior by adding reentrancy on hook functions.
---
 include/jemalloc/internal/base_externs.h |  2 +-
 src/arena.c                              |  4 +-
 src/base.c                               | 39 ++++++-----
 src/extent.c                             | 84 +++++++++++++++++++++---
 test/unit/base.c                         | 15 ++---
 5 files changed, 109 insertions(+), 35 deletions(-)

diff --git a/include/jemalloc/internal/base_externs.h b/include/jemalloc/internal/base_externs.h
index 0a1114f4..a4fd5ac7 100644
--- a/include/jemalloc/internal/base_externs.h
+++ b/include/jemalloc/internal/base_externs.h
@@ -3,7 +3,7 @@
 
 base_t *b0get(void);
 base_t *base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
-void base_delete(base_t *base);
+void base_delete(tsdn_t *tsdn, base_t *base);
 extent_hooks_t *base_extent_hooks_get(base_t *base);
 extent_hooks_t *base_extent_hooks_set(base_t *base,
     extent_hooks_t *extent_hooks);
diff --git a/src/arena.c b/src/arena.c
index f9b0f685..4e3bd6c1 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1257,7 +1257,7 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
 	 * Destroy the base allocator, which manages all metadata ever mapped by
 	 * this arena.
 	 */
-	base_delete(arena->base);
+	base_delete(tsd_tsdn(tsd), arena->base);
 }
 
 static extent_t *
@@ -2061,7 +2061,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	return arena;
 label_error:
 	if (ind != 0) {
-		base_delete(base);
+		base_delete(tsdn, base);
 	}
 	return NULL;
 }
diff --git a/src/base.c b/src/base.c
index 8e1544fd..22c94338 100644
--- a/src/base.c
+++ b/src/base.c
@@ -15,7 +15,7 @@ static base_t	*b0;
 /******************************************************************************/
 
 static void *
-base_map(extent_hooks_t *extent_hooks, unsigned ind, size_t size) {
+base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size) {
 	void *addr;
 	bool zero = true;
 	bool commit = true;
@@ -25,15 +25,18 @@ base_map(extent_hooks_t *extent_hooks, unsigned ind, size_t size) {
 	if (extent_hooks == &extent_hooks_default) {
 		addr = extent_alloc_mmap(NULL, size, PAGE, &zero, &commit);
 	} else {
+		assert(!tsdn_null(tsdn));
+		pre_reentrancy(tsdn_tsd(tsdn));
 		addr = extent_hooks->alloc(extent_hooks, NULL, size, PAGE,
 		    &zero, &commit, ind);
+		post_reentrancy(tsdn_tsd(tsdn));
 	}
 
 	return addr;
 }
 
 static void
-base_unmap(extent_hooks_t *extent_hooks, unsigned ind, void *addr,
+base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr,
     size_t size) {
 	/*
 	 * Cascade through dalloc, decommit, purge_forced, and purge_lazy,
@@ -61,27 +64,32 @@ base_unmap(extent_hooks_t *extent_hooks, unsigned ind, void *addr,
 		/* Nothing worked.  This should never happen. */
 		not_reached();
 	} else {
+		assert(!tsdn_null(tsdn));
+		pre_reentrancy(tsdn_tsd(tsdn));
 		if (extent_hooks->dalloc != NULL &&
 		    !extent_hooks->dalloc(extent_hooks, addr, size, true,
 		    ind)) {
-			return;
+			goto label_done;
 		}
 		if (extent_hooks->decommit != NULL &&
 		    !extent_hooks->decommit(extent_hooks, addr, size, 0, size,
 		    ind)) {
-			return;
+			goto label_done;
 		}
 		if (extent_hooks->purge_forced != NULL &&
 		    !extent_hooks->purge_forced(extent_hooks, addr, size, 0,
 		    size, ind)) {
-			return;
+			goto label_done;
 		}
 		if (extent_hooks->purge_lazy != NULL &&
 		    !extent_hooks->purge_lazy(extent_hooks, addr, size, 0, size,
 		    ind)) {
-			return;
+			goto label_done;
 		}
 		/* Nothing worked.  That's the application's problem. */
+	label_done:
+		post_reentrancy(tsdn_tsd(tsdn));
+		return;
 	}
 }
 
@@ -157,7 +165,7 @@ base_extent_bump_alloc(tsdn_t *tsdn, base_t *base, extent_t *extent,
  * On success a pointer to the initialized base_block_t header is returned.
  */
 static base_block_t *
-base_block_alloc(extent_hooks_t *extent_hooks, unsigned ind,
+base_block_alloc(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind,
     pszind_t *pind_last, size_t *extent_sn_next, size_t size,
     size_t alignment) {
 	alignment = ALIGNMENT_CEILING(alignment, QUANTUM);
@@ -179,7 +187,7 @@ base_block_alloc(extent_hooks_t *extent_hooks, unsigned ind,
 	size_t next_block_size = HUGEPAGE_CEILING(sz_pind2sz(pind_next));
 	size_t block_size = (min_block_size > next_block_size) ? min_block_size
 	    : next_block_size;
-	base_block_t *block = (base_block_t *)base_map(extent_hooks, ind,
+	base_block_t *block = (base_block_t *)base_map(tsdn, extent_hooks, ind,
 	    block_size);
 	if (block == NULL) {
 		return NULL;
@@ -207,8 +215,9 @@ base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
 	 * called.
 	 */
 	malloc_mutex_unlock(tsdn, &base->mtx);
-	base_block_t *block = base_block_alloc(extent_hooks, base_ind_get(base),
-	    &base->pind_last, &base->extent_sn_next, size, alignment);
+	base_block_t *block = base_block_alloc(tsdn, extent_hooks,
+	    base_ind_get(base), &base->pind_last, &base->extent_sn_next, size,
+	    alignment);
 	malloc_mutex_lock(tsdn, &base->mtx);
 	if (block == NULL) {
 		return NULL;
@@ -234,8 +243,8 @@ base_t *
 base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	pszind_t pind_last = 0;
 	size_t extent_sn_next = 0;
-	base_block_t *block = base_block_alloc(extent_hooks, ind, &pind_last,
-	    &extent_sn_next, sizeof(base_t), QUANTUM);
+	base_block_t *block = base_block_alloc(tsdn, extent_hooks, ind,
+	    &pind_last, &extent_sn_next, sizeof(base_t), QUANTUM);
 	if (block == NULL) {
 		return NULL;
 	}
@@ -249,7 +258,7 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	atomic_store_p(&base->extent_hooks, extent_hooks, ATOMIC_RELAXED);
 	if (malloc_mutex_init(&base->mtx, "base", WITNESS_RANK_BASE,
 	    malloc_mutex_rank_exclusive)) {
-		base_unmap(extent_hooks, ind, block, block->size);
+		base_unmap(tsdn, extent_hooks, ind, block, block->size);
 		return NULL;
 	}
 	base->pind_last = pind_last;
@@ -272,13 +281,13 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 }
 
 void
-base_delete(base_t *base) {
+base_delete(tsdn_t *tsdn, base_t *base) {
 	extent_hooks_t *extent_hooks = base_extent_hooks_get(base);
 	base_block_t *next = base->blocks;
 	do {
 		base_block_t *block = next;
 		next = block->next;
-		base_unmap(extent_hooks, base_ind_get(base), block,
+		base_unmap(tsdn, extent_hooks, base_ind_get(base), block,
 		    block->size);
 	} while (next != NULL);
 }
diff --git a/src/extent.c b/src/extent.c
index f31ed32e..4b66dd88 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1073,9 +1073,12 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 		    &zeroed, &committed, (dss_prec_t)atomic_load_u(
 		    &arena->dss_prec, ATOMIC_RELAXED));
 	} else {
+		assert(!tsdn_null(tsdn));
+		pre_reentrancy(tsdn_tsd(tsdn));
 		ptr = (*r_extent_hooks)->alloc(*r_extent_hooks, NULL,
 		    alloc_size, PAGE, &zeroed, &committed,
 		    arena_ind_get(arena));
+		post_reentrancy(tsdn_tsd(tsdn));
 	}
 
 	extent_init(extent, arena, ptr, alloc_size, false, NSIZES,
@@ -1247,8 +1250,11 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 		addr = extent_alloc_default_impl(tsdn, arena, new_addr, esize,
 		    alignment, zero, commit);
 	} else {
+		assert(!tsdn_null(tsdn));
+		pre_reentrancy(tsdn_tsd(tsdn));
 		addr = (*r_extent_hooks)->alloc(*r_extent_hooks, new_addr,
 		    esize, alignment, zero, commit, arena_ind_get(arena));
+		post_reentrancy(tsdn_tsd(tsdn));
 	}
 	if (addr == NULL) {
 		extent_dalloc(tsdn, arena, extent);
@@ -1486,10 +1492,13 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena,
 		err = extent_dalloc_default_impl(extent_base_get(extent),
 		    extent_size_get(extent));
 	} else {
+		assert(!tsdn_null(tsdn));
+		pre_reentrancy(tsdn_tsd(tsdn));
 		err = ((*r_extent_hooks)->dalloc == NULL ||
 		    (*r_extent_hooks)->dalloc(*r_extent_hooks,
 		    extent_base_get(extent), extent_size_get(extent),
 		    extent_committed_get(extent), arena_ind_get(arena)));
+		post_reentrancy(tsdn_tsd(tsdn));
 	}
 
 	if (!err) {
@@ -1515,6 +1524,10 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 	}
 
 	extent_reregister(tsdn, extent);
+	if (*r_extent_hooks != &extent_hooks_default) {
+		assert(!tsdn_null(tsdn));
+		pre_reentrancy(tsdn_tsd(tsdn));
+	}
 	/* Try to decommit; purge if that fails. */
 	bool zeroed;
 	if (!extent_committed_get(extent)) {
@@ -1536,6 +1549,9 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 	} else {
 		zeroed = false;
 	}
+	if (*r_extent_hooks != &extent_hooks_default) {
+		post_reentrancy(tsdn_tsd(tsdn));
+	}
 	extent_zeroed_set(extent, zeroed);
 
 	if (config_prof) {
@@ -1579,9 +1595,12 @@ extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena,
 		extent_destroy_default_impl(extent_base_get(extent),
 		    extent_size_get(extent));
 	} else if ((*r_extent_hooks)->destroy != NULL) {
+		assert(!tsdn_null(tsdn));
+		pre_reentrancy(tsdn_tsd(tsdn));
 		(*r_extent_hooks)->destroy(*r_extent_hooks,
 		    extent_base_get(extent), extent_size_get(extent),
 		    extent_committed_get(extent), arena_ind_get(arena));
+		post_reentrancy(tsdn_tsd(tsdn));
 	}
 
 	extent_dalloc(tsdn, arena, extent);
@@ -1602,9 +1621,16 @@ extent_commit_impl(tsdn_t *tsdn, arena_t *arena,
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
+	if (*r_extent_hooks != &extent_hooks_default) {
+		assert(!tsdn_null(tsdn));
+		pre_reentrancy(tsdn_tsd(tsdn));
+	}
 	bool err = ((*r_extent_hooks)->commit == NULL ||
 	    (*r_extent_hooks)->commit(*r_extent_hooks, extent_base_get(extent),
 	    extent_size_get(extent), offset, length, arena_ind_get(arena)));
+	if (*r_extent_hooks != &extent_hooks_default) {
+		post_reentrancy(tsdn_tsd(tsdn));
+	}
 	extent_committed_set(extent, extent_committed_get(extent) || !err);
 	return err;
 }
@@ -1633,10 +1659,17 @@ extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 
+	if (*r_extent_hooks != &extent_hooks_default) {
+		assert(!tsdn_null(tsdn));
+		pre_reentrancy(tsdn_tsd(tsdn));
+	}
 	bool err = ((*r_extent_hooks)->decommit == NULL ||
 	    (*r_extent_hooks)->decommit(*r_extent_hooks,
 	    extent_base_get(extent), extent_size_get(extent), offset, length,
 	    arena_ind_get(arena)));
+	if (*r_extent_hooks != &extent_hooks_default) {
+		post_reentrancy(tsdn_tsd(tsdn));
+	}
 	extent_committed_set(extent, extent_committed_get(extent) && err);
 	return err;
 }
@@ -1663,10 +1696,23 @@ extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena,
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
-	return ((*r_extent_hooks)->purge_lazy == NULL ||
-	    (*r_extent_hooks)->purge_lazy(*r_extent_hooks,
+
+	if ((*r_extent_hooks)->purge_lazy == NULL) {
+		return true;
+	}
+
+	if (*r_extent_hooks != &extent_hooks_default) {
+		assert(!tsdn_null(tsdn));
+		pre_reentrancy(tsdn_tsd(tsdn));
+	}
+	bool err = (*r_extent_hooks)->purge_lazy(*r_extent_hooks,
 	    extent_base_get(extent), extent_size_get(extent), offset, length,
-	    arena_ind_get(arena)));
+	    arena_ind_get(arena));
+	if (*r_extent_hooks != &extent_hooks_default) {
+		post_reentrancy(tsdn_tsd(tsdn));
+	}
+
+	return err;
 }
 
 bool
@@ -1699,10 +1745,21 @@ extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena,
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
-	return ((*r_extent_hooks)->purge_forced == NULL ||
-	    (*r_extent_hooks)->purge_forced(*r_extent_hooks,
+
+	if ((*r_extent_hooks)->purge_forced == NULL) {
+		return true;
+	}
+	if (*r_extent_hooks != &extent_hooks_default) {
+		assert(!tsdn_null(tsdn));
+		pre_reentrancy(tsdn_tsd(tsdn));
+	}
+	bool err = (*r_extent_hooks)->purge_forced(*r_extent_hooks,
 	    extent_base_get(extent), extent_size_get(extent), offset, length,
-	    arena_ind_get(arena)));
+	    arena_ind_get(arena));
+	if (*r_extent_hooks != &extent_hooks_default) {
+		post_reentrancy(tsdn_tsd(tsdn));
+	}
+	return err;
 }
 
 bool
@@ -1771,9 +1828,17 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena,
 
 	extent_lock2(tsdn, extent, trail);
 
-	if ((*r_extent_hooks)->split(*r_extent_hooks, extent_base_get(extent),
+	if (*r_extent_hooks != &extent_hooks_default) {
+		assert(!tsdn_null(tsdn));
+		pre_reentrancy(tsdn_tsd(tsdn));
+	}
+	bool err = (*r_extent_hooks)->split(*r_extent_hooks, extent_base_get(extent),
 	    size_a + size_b, size_a, size_b, extent_committed_get(extent),
-	    arena_ind_get(arena))) {
+	    arena_ind_get(arena));
+	if (*r_extent_hooks != &extent_hooks_default) {
+		post_reentrancy(tsdn_tsd(tsdn));
+	}
+	if (err) {
 		goto label_error_c;
 	}
 
@@ -1843,10 +1908,13 @@ extent_merge_impl(tsdn_t *tsdn, arena_t *arena,
 		err = extent_merge_default_impl(extent_base_get(a),
 		    extent_base_get(b));
 	} else {
+		assert(!tsdn_null(tsdn));
+		pre_reentrancy(tsdn_tsd(tsdn));
 		err = (*r_extent_hooks)->merge(*r_extent_hooks,
 		    extent_base_get(a), extent_size_get(a), extent_base_get(b),
 		    extent_size_get(b), extent_committed_get(a),
 		    arena_ind_get(arena));
+		post_reentrancy(tsdn_tsd(tsdn));
 	}
 
 	if (err) {
diff --git a/test/unit/base.c b/test/unit/base.c
index 5dc42f0a..7fa24ac0 100644
--- a/test/unit/base.c
+++ b/test/unit/base.c
@@ -27,11 +27,10 @@ static extent_hooks_t hooks_not_null = {
 };
 
 TEST_BEGIN(test_base_hooks_default) {
-	tsdn_t *tsdn;
 	base_t *base;
 	size_t allocated0, allocated1, resident, mapped;
 
-	tsdn = tsdn_fetch();
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 	base = base_new(tsdn, 0, (extent_hooks_t *)&extent_hooks_default);
 
 	if (config_stats) {
@@ -49,13 +48,12 @@ TEST_BEGIN(test_base_hooks_default) {
 		    "At least 42 bytes were allocated by base_alloc()");
 	}
 
-	base_delete(base);
+	base_delete(tsdn, base);
 }
 TEST_END
 
 TEST_BEGIN(test_base_hooks_null) {
 	extent_hooks_t hooks_orig;
-	tsdn_t *tsdn;
 	base_t *base;
 	size_t allocated0, allocated1, resident, mapped;
 
@@ -68,7 +66,7 @@ TEST_BEGIN(test_base_hooks_null) {
 	memcpy(&hooks_orig, &hooks, sizeof(extent_hooks_t));
 	memcpy(&hooks, &hooks_null, sizeof(extent_hooks_t));
 
-	tsdn = tsdn_fetch();
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 	base = base_new(tsdn, 0, &hooks);
 	assert_ptr_not_null(base, "Unexpected base_new() failure");
 
@@ -87,7 +85,7 @@ TEST_BEGIN(test_base_hooks_null) {
 		    "At least 42 bytes were allocated by base_alloc()");
 	}
 
-	base_delete(base);
+	base_delete(tsdn, base);
 
 	memcpy(&hooks, &hooks_orig, sizeof(extent_hooks_t));
 }
@@ -95,7 +93,6 @@ TEST_END
 
 TEST_BEGIN(test_base_hooks_not_null) {
 	extent_hooks_t hooks_orig;
-	tsdn_t *tsdn;
 	base_t *base;
 	void *p, *q, *r, *r_exp;
 
@@ -108,7 +105,7 @@ TEST_BEGIN(test_base_hooks_not_null) {
 	memcpy(&hooks_orig, &hooks, sizeof(extent_hooks_t));
 	memcpy(&hooks, &hooks_not_null, sizeof(extent_hooks_t));
 
-	tsdn = tsdn_fetch();
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 	did_alloc = false;
 	base = base_new(tsdn, 0, &hooks);
 	assert_ptr_not_null(base, "Unexpected base_new() failure");
@@ -200,7 +197,7 @@ TEST_BEGIN(test_base_hooks_not_null) {
 
 	called_dalloc = called_destroy = called_decommit = called_purge_lazy =
 	    called_purge_forced = false;
-	base_delete(base);
+	base_delete(tsdn, base);
 	assert_true(called_dalloc, "Expected dalloc call");
 	assert_true(!called_destroy, "Unexpected destroy call");
 	assert_true(called_decommit, "Expected decommit call");

From 425463a4465043f5f1ccb7f4b257e31ad95b1ed6 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 22 Jun 2017 16:18:30 -0700
Subject: [PATCH 0944/2608] Check arena in current context in pre_reentrancy.

---
 .../internal/jemalloc_internal_inlines_a.h    |  5 +-
 src/arena.c                                   |  2 +-
 src/background_thread.c                       |  6 +-
 src/base.c                                    | 13 ++--
 src/extent.c                                  | 68 +++++++++----------
 src/jemalloc.c                                |  2 +-
 src/prof.c                                    |  2 +-
 7 files changed, 51 insertions(+), 47 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index 854fb1e2..24ea4162 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -146,7 +146,10 @@ tcache_get(tsd_t *tsd) {
 }
 
 static inline void
-pre_reentrancy(tsd_t *tsd) {
+pre_reentrancy(tsd_t *tsd, arena_t *arena) {
+	/* arena is the current context.  Reentry from a0 is not allowed. */
+	assert(arena != arena_get(tsd_tsdn(tsd), 0, false));
+
 	bool fast = tsd_fast(tsd);
 	++*tsd_reentrancy_levelp_get(tsd);
 	if (fast) {
diff --git a/src/arena.c b/src/arena.c
index 4e3bd6c1..b2830866 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2051,7 +2051,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		 * is done enough that we should have tsd.
 		 */
 		assert(!tsdn_null(tsdn));
-		pre_reentrancy(tsdn_tsd(tsdn));
+		pre_reentrancy(tsdn_tsd(tsdn), arena);
 		if (hooks_arena_new_hook) {
 			hooks_arena_new_hook();
 		}
diff --git a/src/background_thread.c b/src/background_thread.c
index ab076fee..cc5100d1 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -316,7 +316,7 @@ background_threads_disable_single(tsd_t *tsd, background_thread_info_t *info) {
 		    &background_thread_lock);
 	}
 
-	pre_reentrancy(tsd);
+	pre_reentrancy(tsd, NULL);
 	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
 	bool has_thread;
 	assert(info->state != background_thread_paused);
@@ -408,7 +408,7 @@ label_restart:
 		 */
 		malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
 
-		pre_reentrancy(tsd);
+		pre_reentrancy(tsd, NULL);
 		int err = background_thread_create_signals_masked(&info->thread,
 		    NULL, background_thread_entry, (void *)(uintptr_t)i);
 		post_reentrancy(tsd);
@@ -557,7 +557,7 @@ background_thread_create(tsd_t *tsd, unsigned arena_ind) {
 		return false;
 	}
 
-	pre_reentrancy(tsd);
+	pre_reentrancy(tsd, NULL);
 	/*
 	 * To avoid complications (besides reentrancy), create internal
 	 * background threads with the underlying pthread_create.
diff --git a/src/base.c b/src/base.c
index 22c94338..97078b13 100644
--- a/src/base.c
+++ b/src/base.c
@@ -25,11 +25,12 @@ base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size)
 	if (extent_hooks == &extent_hooks_default) {
 		addr = extent_alloc_mmap(NULL, size, PAGE, &zero, &commit);
 	} else {
-		assert(!tsdn_null(tsdn));
-		pre_reentrancy(tsdn_tsd(tsdn));
+		/* No arena context as we are creating new arenas. */
+		tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
+		pre_reentrancy(tsd, NULL);
 		addr = extent_hooks->alloc(extent_hooks, NULL, size, PAGE,
 		    &zero, &commit, ind);
-		post_reentrancy(tsdn_tsd(tsdn));
+		post_reentrancy(tsd);
 	}
 
 	return addr;
@@ -64,8 +65,8 @@ base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr,
 		/* Nothing worked.  This should never happen. */
 		not_reached();
 	} else {
-		assert(!tsdn_null(tsdn));
-		pre_reentrancy(tsdn_tsd(tsdn));
+		tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
+		pre_reentrancy(tsd, NULL);
 		if (extent_hooks->dalloc != NULL &&
 		    !extent_hooks->dalloc(extent_hooks, addr, size, true,
 		    ind)) {
@@ -88,7 +89,7 @@ base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr,
 		}
 		/* Nothing worked.  That's the application's problem. */
 	label_done:
-		post_reentrancy(tsdn_tsd(tsdn));
+		post_reentrancy(tsd);
 		return;
 	}
 }
diff --git a/src/extent.c b/src/extent.c
index 4b66dd88..fa45c84d 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1025,6 +1025,18 @@ extent_alloc_default(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
 	    alignment, zero, commit);
 }
 
+static void
+extent_hook_pre_reentrancy(tsdn_t *tsdn, arena_t *arena) {
+	tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
+	pre_reentrancy(tsd, arena);
+}
+
+static void
+extent_hook_post_reentrancy(tsdn_t *tsdn) {
+	tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
+	post_reentrancy(tsd);
+}
+
 /*
  * If virtual memory is retained, create increasingly larger extents from which
  * to split requested extents in order to limit the total number of disjoint
@@ -1073,12 +1085,11 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 		    &zeroed, &committed, (dss_prec_t)atomic_load_u(
 		    &arena->dss_prec, ATOMIC_RELAXED));
 	} else {
-		assert(!tsdn_null(tsdn));
-		pre_reentrancy(tsdn_tsd(tsdn));
+		extent_hook_pre_reentrancy(tsdn, arena);
 		ptr = (*r_extent_hooks)->alloc(*r_extent_hooks, NULL,
 		    alloc_size, PAGE, &zeroed, &committed,
 		    arena_ind_get(arena));
-		post_reentrancy(tsdn_tsd(tsdn));
+		extent_hook_post_reentrancy(tsdn);
 	}
 
 	extent_init(extent, arena, ptr, alloc_size, false, NSIZES,
@@ -1250,11 +1261,10 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 		addr = extent_alloc_default_impl(tsdn, arena, new_addr, esize,
 		    alignment, zero, commit);
 	} else {
-		assert(!tsdn_null(tsdn));
-		pre_reentrancy(tsdn_tsd(tsdn));
+		extent_hook_pre_reentrancy(tsdn, arena);
 		addr = (*r_extent_hooks)->alloc(*r_extent_hooks, new_addr,
 		    esize, alignment, zero, commit, arena_ind_get(arena));
-		post_reentrancy(tsdn_tsd(tsdn));
+		extent_hook_post_reentrancy(tsdn);
 	}
 	if (addr == NULL) {
 		extent_dalloc(tsdn, arena, extent);
@@ -1492,13 +1502,12 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena,
 		err = extent_dalloc_default_impl(extent_base_get(extent),
 		    extent_size_get(extent));
 	} else {
-		assert(!tsdn_null(tsdn));
-		pre_reentrancy(tsdn_tsd(tsdn));
+		extent_hook_pre_reentrancy(tsdn, arena);
 		err = ((*r_extent_hooks)->dalloc == NULL ||
 		    (*r_extent_hooks)->dalloc(*r_extent_hooks,
 		    extent_base_get(extent), extent_size_get(extent),
 		    extent_committed_get(extent), arena_ind_get(arena)));
-		post_reentrancy(tsdn_tsd(tsdn));
+		extent_hook_post_reentrancy(tsdn);
 	}
 
 	if (!err) {
@@ -1525,8 +1534,7 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 
 	extent_reregister(tsdn, extent);
 	if (*r_extent_hooks != &extent_hooks_default) {
-		assert(!tsdn_null(tsdn));
-		pre_reentrancy(tsdn_tsd(tsdn));
+		extent_hook_pre_reentrancy(tsdn, arena);
 	}
 	/* Try to decommit; purge if that fails. */
 	bool zeroed;
@@ -1550,7 +1558,7 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 		zeroed = false;
 	}
 	if (*r_extent_hooks != &extent_hooks_default) {
-		post_reentrancy(tsdn_tsd(tsdn));
+		extent_hook_post_reentrancy(tsdn);
 	}
 	extent_zeroed_set(extent, zeroed);
 
@@ -1595,12 +1603,11 @@ extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena,
 		extent_destroy_default_impl(extent_base_get(extent),
 		    extent_size_get(extent));
 	} else if ((*r_extent_hooks)->destroy != NULL) {
-		assert(!tsdn_null(tsdn));
-		pre_reentrancy(tsdn_tsd(tsdn));
+		extent_hook_pre_reentrancy(tsdn, arena);
 		(*r_extent_hooks)->destroy(*r_extent_hooks,
 		    extent_base_get(extent), extent_size_get(extent),
 		    extent_committed_get(extent), arena_ind_get(arena));
-		post_reentrancy(tsdn_tsd(tsdn));
+		extent_hook_post_reentrancy(tsdn);
 	}
 
 	extent_dalloc(tsdn, arena, extent);
@@ -1622,14 +1629,13 @@ extent_commit_impl(tsdn_t *tsdn, arena_t *arena,
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 	if (*r_extent_hooks != &extent_hooks_default) {
-		assert(!tsdn_null(tsdn));
-		pre_reentrancy(tsdn_tsd(tsdn));
+		extent_hook_pre_reentrancy(tsdn, arena);
 	}
 	bool err = ((*r_extent_hooks)->commit == NULL ||
 	    (*r_extent_hooks)->commit(*r_extent_hooks, extent_base_get(extent),
 	    extent_size_get(extent), offset, length, arena_ind_get(arena)));
 	if (*r_extent_hooks != &extent_hooks_default) {
-		post_reentrancy(tsdn_tsd(tsdn));
+		extent_hook_post_reentrancy(tsdn);
 	}
 	extent_committed_set(extent, extent_committed_get(extent) || !err);
 	return err;
@@ -1660,15 +1666,14 @@ extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 
 	if (*r_extent_hooks != &extent_hooks_default) {
-		assert(!tsdn_null(tsdn));
-		pre_reentrancy(tsdn_tsd(tsdn));
+		extent_hook_pre_reentrancy(tsdn, arena);
 	}
 	bool err = ((*r_extent_hooks)->decommit == NULL ||
 	    (*r_extent_hooks)->decommit(*r_extent_hooks,
 	    extent_base_get(extent), extent_size_get(extent), offset, length,
 	    arena_ind_get(arena)));
 	if (*r_extent_hooks != &extent_hooks_default) {
-		post_reentrancy(tsdn_tsd(tsdn));
+		extent_hook_post_reentrancy(tsdn);
 	}
 	extent_committed_set(extent, extent_committed_get(extent) && err);
 	return err;
@@ -1700,16 +1705,14 @@ extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena,
 	if ((*r_extent_hooks)->purge_lazy == NULL) {
 		return true;
 	}
-
 	if (*r_extent_hooks != &extent_hooks_default) {
-		assert(!tsdn_null(tsdn));
-		pre_reentrancy(tsdn_tsd(tsdn));
+		extent_hook_pre_reentrancy(tsdn, arena);
 	}
 	bool err = (*r_extent_hooks)->purge_lazy(*r_extent_hooks,
 	    extent_base_get(extent), extent_size_get(extent), offset, length,
 	    arena_ind_get(arena));
 	if (*r_extent_hooks != &extent_hooks_default) {
-		post_reentrancy(tsdn_tsd(tsdn));
+		extent_hook_post_reentrancy(tsdn);
 	}
 
 	return err;
@@ -1750,14 +1753,13 @@ extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena,
 		return true;
 	}
 	if (*r_extent_hooks != &extent_hooks_default) {
-		assert(!tsdn_null(tsdn));
-		pre_reentrancy(tsdn_tsd(tsdn));
+		extent_hook_pre_reentrancy(tsdn, arena);
 	}
 	bool err = (*r_extent_hooks)->purge_forced(*r_extent_hooks,
 	    extent_base_get(extent), extent_size_get(extent), offset, length,
 	    arena_ind_get(arena));
 	if (*r_extent_hooks != &extent_hooks_default) {
-		post_reentrancy(tsdn_tsd(tsdn));
+		extent_hook_post_reentrancy(tsdn);
 	}
 	return err;
 }
@@ -1829,14 +1831,13 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena,
 	extent_lock2(tsdn, extent, trail);
 
 	if (*r_extent_hooks != &extent_hooks_default) {
-		assert(!tsdn_null(tsdn));
-		pre_reentrancy(tsdn_tsd(tsdn));
+		extent_hook_pre_reentrancy(tsdn, arena);
 	}
 	bool err = (*r_extent_hooks)->split(*r_extent_hooks, extent_base_get(extent),
 	    size_a + size_b, size_a, size_b, extent_committed_get(extent),
 	    arena_ind_get(arena));
 	if (*r_extent_hooks != &extent_hooks_default) {
-		post_reentrancy(tsdn_tsd(tsdn));
+		extent_hook_post_reentrancy(tsdn);
 	}
 	if (err) {
 		goto label_error_c;
@@ -1908,13 +1909,12 @@ extent_merge_impl(tsdn_t *tsdn, arena_t *arena,
 		err = extent_merge_default_impl(extent_base_get(a),
 		    extent_base_get(b));
 	} else {
-		assert(!tsdn_null(tsdn));
-		pre_reentrancy(tsdn_tsd(tsdn));
+		extent_hook_pre_reentrancy(tsdn, arena);
 		err = (*r_extent_hooks)->merge(*r_extent_hooks,
 		    extent_base_get(a), extent_size_get(a), extent_base_get(b),
 		    extent_size_get(b), extent_committed_get(a),
 		    arena_ind_get(arena));
-		post_reentrancy(tsdn_tsd(tsdn));
+		extent_hook_post_reentrancy(tsdn);
 	}
 
 	if (err) {
diff --git a/src/jemalloc.c b/src/jemalloc.c
index aa79633c..7bf23105 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1476,7 +1476,7 @@ malloc_init_hard(void) {
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &init_lock);
 	/* Set reentrancy level to 1 during init. */
-	pre_reentrancy(tsd);
+	pre_reentrancy(tsd, NULL);
 	/* Initialize narenas before prof_boot2 (for allocation). */
 	if (malloc_init_narenas() || background_thread_boot1(tsd_tsdn(tsd))) {
 		UNLOCK_RETURN(tsd_tsdn(tsd), true, true)
diff --git a/src/prof.c b/src/prof.c
index 61dfa2ce..975722c4 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -1633,7 +1633,7 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
 		return true;
 	}
 
-	pre_reentrancy(tsd);
+	pre_reentrancy(tsd, NULL);
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
 
 	prof_gctx_tree_t gctxs;

From 57beeb2fcb14210bf25e5a79c317e135392cfd86 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 22 Jun 2017 18:58:40 -0700
Subject: [PATCH 0945/2608] Switch ctl to explicitly use tsd instead of tsdn.

---
 include/jemalloc/internal/ctl.h |  3 +--
 src/ctl.c                       | 38 ++++++++++++++++-----------------
 src/jemalloc.c                  |  9 ++++----
 3 files changed, 24 insertions(+), 26 deletions(-)

diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index f159383d..a91c4cf5 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -91,8 +91,7 @@ typedef struct ctl_arenas_s {
 
 int ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
     void *newp, size_t newlen);
-int ctl_nametomib(tsdn_t *tsdn, const char *name, size_t *mibp,
-    size_t *miblenp);
+int ctl_nametomib(tsd_t *tsd, const char *name, size_t *mibp, size_t *miblenp);
 
 int ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen);
diff --git a/src/ctl.c b/src/ctl.c
index a647c6c0..36bc8fb5 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -622,7 +622,7 @@ arenas_i2a(size_t i) {
 }
 
 static ctl_arena_t *
-arenas_i_impl(tsdn_t *tsdn, size_t i, bool compat, bool init) {
+arenas_i_impl(tsd_t *tsd, size_t i, bool compat, bool init) {
 	ctl_arena_t *ret;
 
 	assert(!compat || !init);
@@ -635,15 +635,15 @@ arenas_i_impl(tsdn_t *tsdn, size_t i, bool compat, bool init) {
 				ctl_arena_stats_t	astats;
 			};
 			struct container_s *cont =
-			    (struct container_s *)base_alloc(tsdn, b0get(),
-			    sizeof(struct container_s), QUANTUM);
+			    (struct container_s *)base_alloc(tsd_tsdn(tsd),
+			    b0get(), sizeof(struct container_s), QUANTUM);
 			if (cont == NULL) {
 				return NULL;
 			}
 			ret = &cont->ctl_arena;
 			ret->astats = &cont->astats;
 		} else {
-			ret = (ctl_arena_t *)base_alloc(tsdn, b0get(),
+			ret = (ctl_arena_t *)base_alloc(tsd_tsdn(tsd), b0get(),
 			    sizeof(ctl_arena_t), QUANTUM);
 			if (ret == NULL) {
 				return NULL;
@@ -659,7 +659,7 @@ arenas_i_impl(tsdn_t *tsdn, size_t i, bool compat, bool init) {
 
 static ctl_arena_t *
 arenas_i(size_t i) {
-	ctl_arena_t *ret = arenas_i_impl(TSDN_NULL, i, true, false);
+	ctl_arena_t *ret = arenas_i_impl(tsd_fetch(), i, true, false);
 	assert(ret != NULL);
 	return ret;
 }
@@ -863,7 +863,7 @@ ctl_arena_refresh(tsdn_t *tsdn, arena_t *arena, ctl_arena_t *ctl_sdarena,
 }
 
 static unsigned
-ctl_arena_init(tsdn_t *tsdn, extent_hooks_t *extent_hooks) {
+ctl_arena_init(tsd_t *tsd, extent_hooks_t *extent_hooks) {
 	unsigned arena_ind;
 	ctl_arena_t *ctl_arena;
 
@@ -876,12 +876,12 @@ ctl_arena_init(tsdn_t *tsdn, extent_hooks_t *extent_hooks) {
 	}
 
 	/* Trigger stats allocation. */
-	if (arenas_i_impl(tsdn, arena_ind, false, true) == NULL) {
+	if (arenas_i_impl(tsd, arena_ind, false, true) == NULL) {
 		return UINT_MAX;
 	}
 
 	/* Initialize new arena. */
-	if (arena_init(tsdn, arena_ind, extent_hooks) == NULL) {
+	if (arena_init(tsd_tsdn(tsd), arena_ind, extent_hooks) == NULL) {
 		return UINT_MAX;
 	}
 
@@ -975,8 +975,9 @@ ctl_refresh(tsdn_t *tsdn) {
 }
 
 static bool
-ctl_init(tsdn_t *tsdn) {
+ctl_init(tsd_t *tsd) {
 	bool ret;
+	tsdn_t *tsdn = tsd_tsdn(tsd);
 
 	malloc_mutex_lock(tsdn, &ctl_mtx);
 	if (!ctl_initialized) {
@@ -1010,14 +1011,14 @@ ctl_init(tsdn_t *tsdn) {
 		 * here rather than doing it lazily elsewhere, in order
 		 * to limit when OOM-caused errors can occur.
 		 */
-		if ((ctl_sarena = arenas_i_impl(tsdn, MALLCTL_ARENAS_ALL, false,
+		if ((ctl_sarena = arenas_i_impl(tsd, MALLCTL_ARENAS_ALL, false,
 		    true)) == NULL) {
 			ret = true;
 			goto label_return;
 		}
 		ctl_sarena->initialized = true;
 
-		if ((ctl_darena = arenas_i_impl(tsdn, MALLCTL_ARENAS_DESTROYED,
+		if ((ctl_darena = arenas_i_impl(tsd, MALLCTL_ARENAS_DESTROYED,
 		    false, true)) == NULL) {
 			ret = true;
 			goto label_return;
@@ -1031,7 +1032,7 @@ ctl_init(tsdn_t *tsdn) {
 
 		ctl_arenas->narenas = narenas_total_get();
 		for (i = 0; i < ctl_arenas->narenas; i++) {
-			if (arenas_i_impl(tsdn, i, false, true) == NULL) {
+			if (arenas_i_impl(tsd, i, false, true) == NULL) {
 				ret = true;
 				goto label_return;
 			}
@@ -1156,7 +1157,7 @@ ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
 	size_t mib[CTL_MAX_DEPTH];
 	const ctl_named_node_t *node;
 
-	if (!ctl_initialized && ctl_init(tsd_tsdn(tsd))) {
+	if (!ctl_initialized && ctl_init(tsd)) {
 		ret = EAGAIN;
 		goto label_return;
 	}
@@ -1180,15 +1181,15 @@ label_return:
 }
 
 int
-ctl_nametomib(tsdn_t *tsdn, const char *name, size_t *mibp, size_t *miblenp) {
+ctl_nametomib(tsd_t *tsd, const char *name, size_t *mibp, size_t *miblenp) {
 	int ret;
 
-	if (!ctl_initialized && ctl_init(tsdn)) {
+	if (!ctl_initialized && ctl_init(tsd)) {
 		ret = EAGAIN;
 		goto label_return;
 	}
 
-	ret = ctl_lookup(tsdn, name, NULL, mibp, miblenp);
+	ret = ctl_lookup(tsd_tsdn(tsd), name, NULL, mibp, miblenp);
 label_return:
 	return(ret);
 }
@@ -1200,7 +1201,7 @@ ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	const ctl_named_node_t *node;
 	size_t i;
 
-	if (!ctl_initialized && ctl_init(tsd_tsdn(tsd))) {
+	if (!ctl_initialized && ctl_init(tsd)) {
 		ret = EAGAIN;
 		goto label_return;
 	}
@@ -2312,8 +2313,7 @@ arenas_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	extent_hooks = (extent_hooks_t *)&extent_hooks_default;
 	WRITE(extent_hooks, extent_hooks_t *);
-	if ((arena_ind = ctl_arena_init(tsd_tsdn(tsd), extent_hooks)) ==
-	    UINT_MAX) {
+	if ((arena_ind = ctl_arena_init(tsd, extent_hooks)) == UINT_MAX) {
 		ret = EAGAIN;
 		goto label_return;
 	}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 7bf23105..511710cc 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2918,16 +2918,15 @@ je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) {
 	int ret;
-	tsdn_t *tsdn;
 
 	if (unlikely(malloc_init())) {
 		return EAGAIN;
 	}
 
-	tsdn = tsdn_fetch();
-	check_entry_exit_locking(tsdn);
-	ret = ctl_nametomib(tsdn, name, mibp, miblenp);
-	check_entry_exit_locking(tsdn);
+	tsd_t *tsd = tsd_fetch();
+	check_entry_exit_locking(tsd_tsdn(tsd));
+	ret = ctl_nametomib(tsd, name, mibp, miblenp);
+	check_entry_exit_locking(tsd_tsdn(tsd));
 	return ret;
 }
 

From aa363f9388685a96a0af12b6f4a6dfa20d4243f9 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 26 Jun 2017 11:17:45 -0700
Subject: [PATCH 0946/2608] Fix pthread_sigmask() usage to block all signals.

---
 src/background_thread.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/background_thread.c b/src/background_thread.c
index cc5100d1..eb30eb5b 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -355,7 +355,7 @@ background_thread_create_signals_masked(pthread_t *thread,
 	 * an empty signal set.
 	 */
 	sigset_t set;
-	sigemptyset(&set);
+	sigfillset(&set);
 	sigset_t oldset;
 	int mask_err = pthread_sigmask(SIG_SETMASK, &set, &oldset);
 	if (mask_err != 0) {

From c99e570a48236ba771b61af31febaef6f8b7e887 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 23 Jun 2017 12:35:17 -0700
Subject: [PATCH 0947/2608] Make sure LG_PAGE <= LG_HUGEPAGE.

This resolves #883.
---
 configure.ac | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/configure.ac b/configure.ac
index 770fff56..496631a6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1373,6 +1373,10 @@ if test "x${je_cv_lg_hugepage}" = "x" ; then
     je_cv_lg_hugepage=21
   fi
 fi
+if test "x${LG_PAGE}" != "xundefined" -a \
+        "${je_cv_lg_hugepage}" -lt "${LG_PAGE}" ; then
+  AC_MSG_ERROR([Huge page size (2^${je_cv_lg_hugepage}) must be at least page size (2^${LG_PAGE})])
+fi
 AC_DEFINE_UNQUOTED([LG_HUGEPAGE], [${je_cv_lg_hugepage}])
 
 AC_ARG_WITH([lg_page_sizes],

From 2b31cf5bd272216e4b20c1463bb696b4c1e9a8e5 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Thu, 29 Jun 2017 13:56:22 -0700
Subject: [PATCH 0948/2608] Enforce minimum autoconf version (currently 2.68).

This resolves #912.
---
 configure.ac | 1 +
 1 file changed, 1 insertion(+)

diff --git a/configure.ac b/configure.ac
index 496631a6..1551ded8 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,4 +1,5 @@
 dnl Process this file with autoconf to produce a configure script.
+AC_PREREQ(2.68)
 AC_INIT([Makefile.in])
 
 AC_CONFIG_AUX_DIR([build-aux])

From cb032781bdfd778325284472c25172713414023f Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 29 Jun 2017 16:01:35 -0700
Subject: [PATCH 0949/2608] Add extent_grow_mtx in pre_ / post_fork handlers.

This fixed the issue that could cause the child process to stuck after fork.
---
 include/jemalloc/internal/arena_externs.h |  1 +
 src/arena.c                               | 15 +++++++++++----
 src/jemalloc.c                            |  5 ++++-
 3 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 3a85bcbb..af16d158 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -90,6 +90,7 @@ void arena_prefork3(tsdn_t *tsdn, arena_t *arena);
 void arena_prefork4(tsdn_t *tsdn, arena_t *arena);
 void arena_prefork5(tsdn_t *tsdn, arena_t *arena);
 void arena_prefork6(tsdn_t *tsdn, arena_t *arena);
+void arena_prefork7(tsdn_t *tsdn, arena_t *arena);
 void arena_postfork_parent(tsdn_t *tsdn, arena_t *arena);
 void arena_postfork_child(tsdn_t *tsdn, arena_t *arena);
 
diff --git a/src/arena.c b/src/arena.c
index b2830866..632fce52 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2087,28 +2087,33 @@ arena_prefork1(tsdn_t *tsdn, arena_t *arena) {
 
 void
 arena_prefork2(tsdn_t *tsdn, arena_t *arena) {
+	malloc_mutex_prefork(tsdn, &arena->extent_grow_mtx);
+}
+
+void
+arena_prefork3(tsdn_t *tsdn, arena_t *arena) {
 	extents_prefork(tsdn, &arena->extents_dirty);
 	extents_prefork(tsdn, &arena->extents_muzzy);
 	extents_prefork(tsdn, &arena->extents_retained);
 }
 
 void
-arena_prefork3(tsdn_t *tsdn, arena_t *arena) {
+arena_prefork4(tsdn_t *tsdn, arena_t *arena) {
 	malloc_mutex_prefork(tsdn, &arena->extent_avail_mtx);
 }
 
 void
-arena_prefork4(tsdn_t *tsdn, arena_t *arena) {
+arena_prefork5(tsdn_t *tsdn, arena_t *arena) {
 	base_prefork(tsdn, arena->base);
 }
 
 void
-arena_prefork5(tsdn_t *tsdn, arena_t *arena) {
+arena_prefork6(tsdn_t *tsdn, arena_t *arena) {
 	malloc_mutex_prefork(tsdn, &arena->large_mtx);
 }
 
 void
-arena_prefork6(tsdn_t *tsdn, arena_t *arena) {
+arena_prefork7(tsdn_t *tsdn, arena_t *arena) {
 	for (unsigned i = 0; i < NBINS; i++) {
 		malloc_mutex_prefork(tsdn, &arena->bins[i].lock);
 	}
@@ -2127,6 +2132,7 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 	extents_postfork_parent(tsdn, &arena->extents_dirty);
 	extents_postfork_parent(tsdn, &arena->extents_muzzy);
 	extents_postfork_parent(tsdn, &arena->extents_retained);
+	malloc_mutex_postfork_parent(tsdn, &arena->extent_grow_mtx);
 	malloc_mutex_postfork_parent(tsdn, &arena->decay_dirty.mtx);
 	malloc_mutex_postfork_parent(tsdn, &arena->decay_muzzy.mtx);
 	if (config_stats) {
@@ -2164,6 +2170,7 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	extents_postfork_child(tsdn, &arena->extents_dirty);
 	extents_postfork_child(tsdn, &arena->extents_muzzy);
 	extents_postfork_child(tsdn, &arena->extents_retained);
+	malloc_mutex_postfork_child(tsdn, &arena->extent_grow_mtx);
 	malloc_mutex_postfork_child(tsdn, &arena->decay_dirty.mtx);
 	malloc_mutex_postfork_child(tsdn, &arena->decay_muzzy.mtx);
 	if (config_stats) {
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 511710cc..0ee8ad48 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3049,7 +3049,7 @@ _malloc_prefork(void)
 		background_thread_prefork1(tsd_tsdn(tsd));
 	}
 	/* Break arena prefork into stages to preserve lock order. */
-	for (i = 0; i < 7; i++) {
+	for (i = 0; i < 8; i++) {
 		for (j = 0; j < narenas; j++) {
 			if ((arena = arena_get(tsd_tsdn(tsd), j, false)) !=
 			    NULL) {
@@ -3075,6 +3075,9 @@ _malloc_prefork(void)
 				case 6:
 					arena_prefork6(tsd_tsdn(tsd), arena);
 					break;
+				case 7:
+					arena_prefork7(tsd_tsdn(tsd), arena);
+					break;
 				default: not_reached();
 				}
 			}

From 284edf02b0de3231357497cf0367f6f64ab07cd8 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 1 Jul 2017 17:12:05 -0700
Subject: [PATCH 0950/2608] Update ChangeLog for 5.0.1.

---
 ChangeLog | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index 98c12f20..ee1b7ead 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,41 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
+* 5.0.1 (July 1, 2017)
+
+  This bugfix release fixes several issues, most of which are obscure enough
+  that typical applications are not impacted.
+
+  Bug fixes:
+  - Update decay->nunpurged before purging, in order to avoid potential update
+    races and subsequent incorrect purging volume.  (@interwq)
+  - Only abort on dlsym(3) error if the failure impacts an enabled feature (lazy
+    locking and/or background threads).  This mitigates an initialization
+    failure bug for which we still do not have a clear reproduction test case.
+    (@interwq)
+  - Modify tsd management so that it neither crashes nor leaks if a thread's
+    only allocation activity is to call free() after TLS destructors have been
+    executed.  This behavior was observed when operating with GNU libc, and is
+    unlikely to be an issue with other libc implementations.  (@interwq)
+  - Mask signals during background thread creation.  This prevents signals from
+    being inadvertently delivered to background threads.  (@jasone,
+    @davidgoldblatt, @interwq)
+  - Avoid inactivity checks within background threads, in order to prevent
+    recursive mutex acquisition.  (@interwq)
+  - Fix extent_grow_retained() to use the specified hooks when the
+    arena.<i>.extent_hooks mallctl is used to override the default hooks.
+    (@interwq)
+  - Add missing reentrancy support for custom extent hooks which allocate.
+    (@interwq)
+  - Post-fork(2), re-initialize the list of tcaches associated with each arena
+    to contain no tcaches except the forking thread's.  (@interwq)
+  - Add missing post-fork(2) mutex reinitialization for extent_grow_mtx.  This
+    fixes potential deadlocks after fork(2).  (@interwq)
+  - Enforce minimum autoconf version (currently 2.68), since 2.63 is known to
+    generate corrupt configure scripts.  (@jasone)
+  - Ensure that the configured page size (--with-lg-page) is no larger than the
+    configured huge page size (--with-lg-hugepage).  (@jasone)
+
 * 5.0.0 (June 13, 2017)
 
   Unlike all previous jemalloc releases, this release does not use naturally

From aa44ddbcdd43cc8a8352b654f4a003d83b9c15b7 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sun, 2 Jul 2017 17:55:52 -0700
Subject: [PATCH 0951/2608] Fix a typo.

---
 ChangeLog | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ChangeLog b/ChangeLog
index ee1b7ead..967d04d0 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -22,7 +22,7 @@ brevity.  Much more detail can be found in the git revision history:
     unlikely to be an issue with other libc implementations.  (@interwq)
   - Mask signals during background thread creation.  This prevents signals from
     being inadvertently delivered to background threads.  (@jasone,
-    @davidgoldblatt, @interwq)
+    @davidtgoldblatt, @interwq)
   - Avoid inactivity checks within background threads, in order to prevent
     recursive mutex acquisition.  (@interwq)
   - Fix extent_grow_retained() to use the specified hooks when the

From f9dfb8db73064e2bb3735d4b288168e722191fdd Mon Sep 17 00:00:00 2001
From: Tamir Duberstein <tamird@gmail.com>
Date: Thu, 6 Jul 2017 14:50:55 -0400
Subject: [PATCH 0952/2608] whitespace

---
 configure.ac | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/configure.ac b/configure.ac
index 1551ded8..82e04a5d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -8,7 +8,7 @@ dnl ============================================================================
 dnl Custom macro definitions.
 
 dnl JE_CONCAT_VVV(r, a, b)
-dnl 
+dnl
 dnl Set $r to the concatenation of $a and $b, with a space separating them iff
 dnl both $a and $b are non-emty.
 AC_DEFUN([JE_CONCAT_VVV],
@@ -20,7 +20,7 @@ fi
 )
 
 dnl JE_APPEND_VS(a, b)
-dnl 
+dnl
 dnl Set $a to the concatenation of $a and b, with a space separating them iff
 dnl both $a and b are non-empty.
 AC_DEFUN([JE_APPEND_VS],
@@ -31,7 +31,7 @@ AC_DEFUN([JE_APPEND_VS],
 CONFIGURE_CFLAGS=
 SPECIFIED_CFLAGS="${CFLAGS}"
 dnl JE_CFLAGS_ADD(cflag)
-dnl 
+dnl
 dnl CFLAGS is the concatenation of CONFIGURE_CFLAGS and SPECIFIED_CFLAGS
 dnl (ignoring EXTRA_CFLAGS, which does not impact configure tests.  This macro
 dnl appends to CONFIGURE_CFLAGS and regenerates CFLAGS.
@@ -57,7 +57,7 @@ JE_CONCAT_VVV(CFLAGS, CONFIGURE_CFLAGS, SPECIFIED_CFLAGS)
 
 dnl JE_CFLAGS_SAVE()
 dnl JE_CFLAGS_RESTORE()
-dnl 
+dnl
 dnl Save/restore CFLAGS.  Nesting is not supported.
 AC_DEFUN([JE_CFLAGS_SAVE],
 SAVED_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
@@ -91,7 +91,7 @@ JE_CONCAT_VVV(CXXFLAGS, CONFIGURE_CXXFLAGS, SPECIFIED_CXXFLAGS)
 ])
 
 dnl JE_COMPILABLE(label, hcode, mcode, rvar)
-dnl 
+dnl
 dnl Use AC_LINK_IFELSE() rather than AC_COMPILE_IFELSE() so that linker errors
 dnl cause failure.
 AC_DEFUN([JE_COMPILABLE],
@@ -516,7 +516,7 @@ AC_PROG_AWK
 dnl Platform-specific settings.  abi and RPATH can probably be determined
 dnl programmatically, but doing so is error-prone, which makes it generally
 dnl not worth the trouble.
-dnl 
+dnl
 dnl Define cpp macros in CPPFLAGS, rather than doing AC_DEFINE(macro), since the
 dnl definitions need to be seen before any headers are included, which is a pain
 dnl to make happen otherwise.
@@ -1387,7 +1387,7 @@ AC_ARG_WITH([lg_page_sizes],
 
 dnl ============================================================================
 dnl jemalloc configuration.
-dnl 
+dnl
 
 AC_ARG_WITH([version],
   [AS_HELP_STRING([--with-version=<major>.<minor>.<bugfix>-<nrev>-g<gid>],

From ef55006c1d324692408eed87421f486812d3645d Mon Sep 17 00:00:00 2001
From: Tamir Duberstein <tamird@gmail.com>
Date: Thu, 6 Jul 2017 15:14:48 -0400
Subject: [PATCH 0953/2608] dumpbin doesn't exist in mingw

---
 configure.ac | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 82e04a5d..8a34b476 100644
--- a/configure.ac
+++ b/configure.ac
@@ -630,7 +630,13 @@ case "${host}" in
 	  DSO_LDFLAGS="-shared"
 	  link_whole_archive="1"
 	fi
-	DUMP_SYMS="dumpbin /SYMBOLS"
+	case "${host}" in
+	  *-*-cygwin*)
+	    DUMP_SYMS="dumpbin /SYMBOLS"
+	    ;;
+	  *)
+	    ;;
+	esac
 	a="lib"
 	libprefix=""
 	SOREV="${so}"

From 3f5049340e66c6929c3270f7359617f62e053b11 Mon Sep 17 00:00:00 2001
From: Tamir Duberstein <tamird@gmail.com>
Date: Thu, 6 Jul 2017 15:04:17 -0400
Subject: [PATCH 0954/2608] Allow toolchain to determine nm

---
 configure.ac | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 8a34b476..1969d11f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -511,6 +511,11 @@ AN_PROGRAM([ar], [AC_PROG_AR])
 AC_DEFUN([AC_PROG_AR], [AC_CHECK_TOOL(AR, ar, :)])
 AC_PROG_AR
 
+AN_MAKEVAR([NM], [AC_PROG_NM])
+AN_PROGRAM([nm], [AC_PROG_NM])
+AC_DEFUN([AC_PROG_NM], [AC_CHECK_TOOL(NM, nm, :)])
+AC_PROG_NM
+
 AC_PROG_AWK
 
 dnl Platform-specific settings.  abi and RPATH can probably be determined
@@ -522,7 +527,7 @@ dnl definitions need to be seen before any headers are included, which is a pain
 dnl to make happen otherwise.
 default_retain="0"
 maps_coalesce="1"
-DUMP_SYMS="nm -a"
+DUMP_SYMS="${NM} -a"
 SYM_PREFIX=""
 case "${host}" in
   *-*-darwin* | *-*-ios*)

From 0a4f5a7eea5e42292cea95fd30a88201c8d4a1ca Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 10 Jul 2017 14:05:33 -0700
Subject: [PATCH 0955/2608] Fix deadlock in multithreaded fork in OS X.

On OS X, we rely on the zone machinery to call our prefork and postfork
handlers.

In zone_force_unlock, we call jemalloc_postfork_child, reinitializing all our
mutexes regardless of state, since the mutex implementation will assert if the
tid of the unlocker is different from that of the locker.  This has the effect
of unlocking the mutexes, but also fails to wake any threads waiting on them in
the parent.

To fix this, we track whether or not we're the parent or child after the fork,
and unlock or reinit as appropriate.

This resolves #895.
---
 src/zone.c | 30 ++++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/src/zone.c b/src/zone.c
index 9d3b7b49..23dfdd04 100644
--- a/src/zone.c
+++ b/src/zone.c
@@ -89,6 +89,7 @@ JEMALLOC_ATTR(weak_import);
 static malloc_zone_t *default_zone, *purgeable_zone;
 static malloc_zone_t jemalloc_zone;
 static struct malloc_introspection_t jemalloc_zone_introspect;
+static pid_t zone_force_lock_pid = -1;
 
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
@@ -270,6 +271,12 @@ zone_log(malloc_zone_t *zone, void *address) {
 static void
 zone_force_lock(malloc_zone_t *zone) {
 	if (isthreaded) {
+		/*
+		 * See the note in zone_force_unlock, below, to see why we need
+		 * this.
+		 */
+		assert(zone_force_lock_pid == -1);
+		zone_force_lock_pid = getpid();
 		jemalloc_prefork();
 	}
 }
@@ -277,14 +284,25 @@ zone_force_lock(malloc_zone_t *zone) {
 static void
 zone_force_unlock(malloc_zone_t *zone) {
 	/*
-	 * Call jemalloc_postfork_child() rather than
-	 * jemalloc_postfork_parent(), because this function is executed by both
-	 * parent and child.  The parent can tolerate having state
-	 * reinitialized, but the child cannot unlock mutexes that were locked
-	 * by the parent.
+	 * zone_force_lock and zone_force_unlock are the entry points to the
+	 * forking machinery on OS X.  The tricky thing is, the child is not
+	 * allowed to unlock mutexes locked in the parent, even if owned by the
+	 * forking thread (and the mutex type we use in OS X will fail an assert
+	 * if we try).  In the child, we can get away with reinitializing all
+	 * the mutexes, which has the effect of unlocking them.  In the parent,
+	 * doing this would mean we wouldn't wake any waiters blocked on the
+	 * mutexes we unlock.  So, we record the pid of the current thread in
+	 * zone_force_lock, and use that to detect if we're in the parent or
+	 * child here, to decide which unlock logic we need.
 	 */
 	if (isthreaded) {
-		jemalloc_postfork_child();
+		assert(zone_force_lock_pid != -1);
+		if (getpid() == zone_force_lock_pid) {
+			jemalloc_postfork_parent();
+		} else {
+			jemalloc_postfork_child();
+		}
+		zone_force_lock_pid = -1;
 	}
 }
 

From fb6787a78c3a1e3a4868520d0531fc2ebdda21d8 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 10 Jul 2017 13:19:31 -0700
Subject: [PATCH 0956/2608] Add a test of behavior under multi-threaded
 forking.

Forking a multithreaded process is dangerous but allowed, so long as the child
only executes async-signal-safe functions (e.g. exec).  Add a test to ensure
that we don't break this behavior.
---
 test/unit/fork.c | 108 ++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 87 insertions(+), 21 deletions(-)

diff --git a/test/unit/fork.c b/test/unit/fork.c
index afe22141..b1690750 100644
--- a/test/unit/fork.c
+++ b/test/unit/fork.c
@@ -4,6 +4,30 @@
 #include <sys/wait.h>
 #endif
 
+#ifndef _WIN32
+static void
+wait_for_child_exit(int pid) {
+	int status;
+	while (true) {
+		if (waitpid(pid, &status, 0) == -1) {
+			test_fail("Unexpected waitpid() failure.");
+		}
+		if (WIFSIGNALED(status)) {
+			test_fail("Unexpected child termination due to "
+			    "signal %d", WTERMSIG(status));
+			break;
+		}
+		if (WIFEXITED(status)) {
+			if (WEXITSTATUS(status) != 0) {
+				test_fail("Unexpected child exit value %d",
+				    WEXITSTATUS(status));
+			}
+			break;
+		}
+	}
+}
+#endif
+
 TEST_BEGIN(test_fork) {
 #ifndef _WIN32
 	void *p;
@@ -40,26 +64,67 @@ TEST_BEGIN(test_fork) {
 		/* Child. */
 		_exit(0);
 	} else {
-		int status;
+		wait_for_child_exit(pid);
+	}
+#else
+	test_skip("fork(2) is irrelevant to Windows");
+#endif
+}
+TEST_END
 
-		/* Parent. */
-		while (true) {
-			if (waitpid(pid, &status, 0) == -1) {
-				test_fail("Unexpected waitpid() failure");
-			}
-			if (WIFSIGNALED(status)) {
-				test_fail("Unexpected child termination due to "
-				    "signal %d", WTERMSIG(status));
-				break;
-			}
-			if (WIFEXITED(status)) {
-				if (WEXITSTATUS(status) != 0) {
-					test_fail(
-					    "Unexpected child exit value %d",
-					    WEXITSTATUS(status));
-				}
-				break;
-			}
+#ifndef _WIN32
+static void *
+do_fork_thd(void *arg) {
+	malloc(1);
+	int pid = fork();
+	if (pid == -1) {
+		/* Error. */
+		test_fail("Unexpected fork() failure");
+	} else if (pid == 0) {
+		/* Child. */
+		char *args[] = {"true", NULL};
+		execvp(args[0], args);
+		test_fail("Exec failed");
+	} else {
+		/* Parent */
+		wait_for_child_exit(pid);
+	}
+	return NULL;
+}
+#endif
+
+#ifndef _WIN32
+static void
+do_test_fork_multithreaded() {
+	thd_t child;
+	thd_create(&child, do_fork_thd, NULL);
+	do_fork_thd(NULL);
+	thd_join(child, NULL);
+}
+#endif
+
+TEST_BEGIN(test_fork_multithreaded) {
+#ifndef _WIN32
+	/*
+	 * We've seen bugs involving hanging on arenas_lock (though the same
+	 * class of bugs can happen on any mutex).  The bugs are intermittent
+	 * though, so we want to run the test multiple times.  Since we hold the
+	 * arenas lock only early in the process lifetime, we can't just run
+	 * this test in a loop (since, after all the arenas are initialized, we
+	 * won't acquire arenas_lock any further).  We therefore repeat the test
+	 * with multiple processes.
+	 */
+	for (int i = 0; i < 100; i++) {
+		int pid = fork();
+		if (pid == -1) {
+			/* Error. */
+			test_fail("Unexpected fork() failure,");
+		} else if (pid == 0) {
+			/* Child. */
+			do_test_fork_multithreaded();
+			_exit(0);
+		} else {
+			wait_for_child_exit(pid);
 		}
 	}
 #else
@@ -70,6 +135,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_fork);
+	return test_no_reentrancy(
+	    test_fork,
+	    test_fork_multithreaded);
 }

From 0975b88dfd3a890f469c8c282a5140013af85ab2 Mon Sep 17 00:00:00 2001
From: "Y. T. Chung" <zonyitoo@gmail.com>
Date: Thu, 20 Jul 2017 23:02:23 +0800
Subject: [PATCH 0957/2608] Fall back to FD_CLOEXEC when O_CLOEXEC is
 unavailable.

Older Linux systems don't have O_CLOEXEC.  If that's the case, we fcntl
immediately after open, to minimize the length of the racy period in
which an
operation in another thread can leak a file descriptor to a child.
---
 src/pages.c | 27 ++++++++++++++++++++++-----
 src/prof.c  |  6 ++++++
 2 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/src/pages.c b/src/pages.c
index fec64dd0..0883647b 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -353,14 +353,31 @@ os_overcommits_proc(void) {
 	ssize_t nread;
 
 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
-	fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY |
-	    O_CLOEXEC);
+	#if defined(O_CLOEXEC)
+		fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY |
+			O_CLOEXEC);
+	#else
+		fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY);
+		fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+	#endif
 #elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat)
-	fd = (int)syscall(SYS_openat,
-	    AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
+	#if defined(O_CLOEXEC)
+		fd = (int)syscall(SYS_openat,
+			AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
+	#else
+		fd = (int)syscall(SYS_openat,
+			AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY);
+		fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+	#endif
 #else
-	fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
+	#if defined(O_CLOEXEC)
+		fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
+	#else
+		fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
+		fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+	#endif
 #endif
+
 	if (fd == -1) {
 		return false; /* Error. */
 	}
diff --git a/src/prof.c b/src/prof.c
index 975722c4..a1ca9e2c 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -1409,7 +1409,13 @@ prof_open_maps(const char *format, ...) {
 	va_start(ap, format);
 	malloc_vsnprintf(filename, sizeof(filename), format, ap);
 	va_end(ap);
+
+#if defined(O_CLOEXEC)
 	mfd = open(filename, O_RDONLY | O_CLOEXEC);
+#else
+	mfd = open(filename, O_RDONLY);
+	fcntl(mfd, F_SETFD, fcntl(mfd, F_GETFD) | FD_CLOEXEC);
+#endif
 
 	return mfd;
 }

From 9761b449c8c6b70abdb4cfa953e59847a84af406 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Wed, 19 Jul 2017 16:36:46 -0700
Subject: [PATCH 0958/2608] Add a logging facility.

This sets up a hierarchical logging facility, so that we can add logging
statements liberally, and turn them on in a fine-grained manner.
---
 Makefile.in                                   |   2 +
 configure.ac                                  |  16 ++
 .../internal/jemalloc_internal_defs.h.in      |   6 +
 .../jemalloc/internal/jemalloc_preamble.h.in  |  11 ++
 include/jemalloc/internal/log.h               |  89 +++++++++
 src/jemalloc.c                                |  12 ++
 src/log.c                                     |  78 ++++++++
 test/unit/log.c                               | 182 ++++++++++++++++++
 8 files changed, 396 insertions(+)
 create mode 100644 include/jemalloc/internal/log.h
 create mode 100644 src/log.c
 create mode 100644 test/unit/log.c

diff --git a/Makefile.in b/Makefile.in
index fec1397a..6e3424fe 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -102,6 +102,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/hash.c \
 	$(srcroot)src/hooks.c \
 	$(srcroot)src/large.c \
+	$(srcroot)src/log.c \
 	$(srcroot)src/malloc_io.c \
 	$(srcroot)src/mutex.c \
 	$(srcroot)src/mutex_pool.c \
@@ -171,6 +172,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/junk.c \
 	$(srcroot)test/unit/junk_alloc.c \
 	$(srcroot)test/unit/junk_free.c \
+	$(srcroot)test/unit/log.c \
 	$(srcroot)test/unit/mallctl.c \
 	$(srcroot)test/unit/malloc_io.c \
 	$(srcroot)test/unit/math.c \
diff --git a/configure.ac b/configure.ac
index 1551ded8..02151543 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1226,6 +1226,21 @@ if test "x$enable_cache_oblivious" = "x1" ; then
 fi
 AC_SUBST([enable_cache_oblivious])
 
+dnl Do not log by default.
+AC_ARG_ENABLE([log],
+  [AS_HELP_STRING([--enable-log], [Support debug logging])],
+[if test "x$enable_log" = "xno" ; then
+  enable_log="0"
+else
+  enable_log="1"
+fi
+],
+[enable_log="0"]
+)
+if test "x$enable_log" = "x1" ; then
+  AC_DEFINE([JEMALLOC_LOG], [ ])
+fi
+AC_SUBST([enable_log])
 
 
 JE_COMPILABLE([a program using __builtin_unreachable], [
@@ -2188,6 +2203,7 @@ AC_MSG_RESULT([thp                : ${enable_thp}])
 AC_MSG_RESULT([fill               : ${enable_fill}])
 AC_MSG_RESULT([utrace             : ${enable_utrace}])
 AC_MSG_RESULT([xmalloc            : ${enable_xmalloc}])
+AC_MSG_RESULT([log                : ${enable_log}])
 AC_MSG_RESULT([lazy_lock          : ${enable_lazy_lock}])
 AC_MSG_RESULT([cache-oblivious    : ${enable_cache_oblivious}])
 AC_MSG_RESULT([cxx                : ${enable_cxx}])
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index c0f834f2..b73daf04 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -237,6 +237,12 @@
  */
 #undef JEMALLOC_CACHE_OBLIVIOUS
 
+/*
+ * If defined, enable logging facilities.  We make this a configure option to
+ * avoid taking extra branches everywhere.
+ */
+#undef JEMALLOC_LOG
+
 /*
  * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
  */
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index 18539a09..099f98d8 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -146,6 +146,17 @@ static const bool config_cache_oblivious =
     false
 #endif
     ;
+/*
+ * Undocumented, for jemalloc development use only at the moment.  See the note
+ * in jemalloc/internal/log.h.
+ */
+static const bool config_log =
+#ifdef JEMALLOC_LOG
+    true
+#else
+    false
+#endif
+    ;
 #ifdef JEMALLOC_HAVE_SCHED_GETCPU
 /* Currently percpu_arena depends on sched_getcpu. */
 #define JEMALLOC_PERCPU_ARENA
diff --git a/include/jemalloc/internal/log.h b/include/jemalloc/internal/log.h
new file mode 100644
index 00000000..8413a4d6
--- /dev/null
+++ b/include/jemalloc/internal/log.h
@@ -0,0 +1,89 @@
+#ifndef JEMALLOC_INTERNAL_LOG_H
+#define JEMALLOC_INTERNAL_LOG_H
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/mutex.h"
+
+#ifdef JEMALLOC_LOG
+#  define JEMALLOC_LOG_BUFSIZE 1000
+#else
+#  define JEMALLOC_LOG_BUFSIZE 1
+#endif
+
+/*
+ * The log_vars malloc_conf option is a '|'-delimited list of log_var name
+ * segments to log.  The log_var names are themselves hierarchical, with '.' as
+ * the delimiter (a "segment" is just a prefix in the log namespace).  So, if
+ * you have:
+ *
+ * static log_var_t log_arena = LOG_VAR_INIT("arena"); // 1
+ * static log_var_t log_arena_a = LOG_VAR_INIT("arena.a"); // 2
+ * static log_var_t log_arena_b = LOG_VAR_INIT("arena.b"); // 3
+ * static log_var_t log_arena_a_a = LOG_VAR_INIT("arena.a.a"); // 4
+ * static_log_var_t log_extent_a = LOG_VAR_INIT("extent.a"); // 5
+ * static_log_var_t log_extent_b = LOG_VAR_INIT("extent.b"); // 6
+ *
+ * And your malloc_conf option is "log_vars=arena.a|extent", then log_vars 2, 4,
+ * 5, and 6 will be enabled.  You can enable logging from all log vars by
+ * writing "log_vars=.".
+ *
+ * You can then log by writing:
+ *   log(log_var, "format string -- my int is %d", my_int);
+ *
+ * None of this should be regarded as a stable API for right now.  It's intended
+ * as a debugging interface, to let us keep around some of our printf-debugging
+ * statements.
+ */
+
+extern char log_var_names[JEMALLOC_LOG_BUFSIZE];
+extern atomic_b_t log_init_done;
+
+typedef struct log_var_s log_var_t;
+struct log_var_s {
+	/*
+	 * Lowest bit is "inited", second lowest is "enabled".  Putting them in
+	 * a single word lets us avoid any fences on weak architectures.
+	 */
+	atomic_u_t state;
+	const char *name;
+};
+
+#define LOG_NOT_INITIALIZED 0U
+#define LOG_INITIALIZED_NOT_ENABLED 1U
+#define LOG_ENABLED 2U
+
+#define LOG_VAR_INIT(name_str) {ATOMIC_INIT(LOG_NOT_INITIALIZED), name_str}
+
+/*
+ * Returns the value we should assume for state (which is not necessarily
+ * accurate; if logging is done before logging has finished initializing, then
+ * we default to doing the safe thing by logging everything).
+ */
+unsigned log_var_update_state(log_var_t *log_var);
+
+/* We factor out the metadata management to allow us to test more easily. */
+#define log_do_begin(log_var)						\
+if (config_log) {							\
+	unsigned log_state = atomic_load_u(&(log_var).state,		\
+	    ATOMIC_RELAXED);						\
+	if (unlikely(log_state == LOG_NOT_INITIALIZED)) {		\
+		log_state = log_var_update_state(&(log_var));		\
+		assert(log_state != LOG_NOT_INITIALIZED);		\
+	}								\
+	if (log_state == LOG_ENABLED) {					\
+		{
+			/* User code executes here. */
+#define log_do_end(log_var)						\
+		}							\
+	}								\
+}
+
+#define log(log_var, format, ...)					\
+do {									\
+	log_do_begin(log_var)						\
+		malloc_printf("%s: " format "\n",			\
+		    (log_var).name, __VA_ARGS__);			\
+	log_do_end(log_var)						\
+} while (0)
+
+#endif /* JEMALLOC_INTERNAL_LOG_H */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 0ee8ad48..09bac9eb 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -8,6 +8,7 @@
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/extent_mmap.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/log.h"
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/rtree.h"
@@ -1173,6 +1174,16 @@ malloc_conf_init(void) {
 				CONF_HANDLE_BOOL(opt_prof_final, "prof_final")
 				CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak")
 			}
+			if (config_log) {
+				if (CONF_MATCH("log_vars")) {
+					size_t cpylen = (
+					    vlen <= sizeof(log_var_names) ?
+					    vlen : sizeof(log_var_names) - 1);
+					strncpy(log_var_names, v, cpylen);
+					log_var_names[cpylen] = '\0';
+					continue;
+				}
+			}
 			malloc_conf_error("Invalid conf pair", k, klen, v,
 			    vlen);
 #undef CONF_MATCH
@@ -1189,6 +1200,7 @@ malloc_conf_init(void) {
 #undef CONF_HANDLE_CHAR_P
 		}
 	}
+	atomic_store_b(&log_init_done, true, ATOMIC_RELEASE);
 }
 
 static bool
diff --git a/src/log.c b/src/log.c
new file mode 100644
index 00000000..022dc584
--- /dev/null
+++ b/src/log.c
@@ -0,0 +1,78 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/log.h"
+
+char log_var_names[JEMALLOC_LOG_BUFSIZE];
+atomic_b_t log_init_done = ATOMIC_INIT(false);
+
+/*
+ * Returns true if we were able to pick out a segment.  Fills in r_segment_end
+ * with a pointer to the first character after the end of the string.
+ */
+static const char *
+log_var_extract_segment(const char* segment_begin) {
+	const char *end;
+	for (end = segment_begin; *end != '\0' && *end != '|'; end++) {
+	}
+	return end;
+}
+
+static bool
+log_var_matches_segment(const char *segment_begin, const char *segment_end,
+    const char *log_var_begin, const char *log_var_end) {
+	assert(segment_begin <= segment_end);
+	assert(log_var_begin < log_var_end);
+
+	ptrdiff_t segment_len = segment_end - segment_begin;
+	ptrdiff_t log_var_len = log_var_end - log_var_begin;
+	/* The special '.' segment matches everything. */
+	if (segment_len == 1 && *segment_begin == '.') {
+		return true;
+	}
+        if (segment_len == log_var_len) {
+		return strncmp(segment_begin, log_var_begin, segment_len) == 0;
+	} else if (segment_len < log_var_len) {
+		return strncmp(segment_begin, log_var_begin, segment_len) == 0
+		    && log_var_begin[segment_len] == '.';
+        } else {
+		return false;
+	}
+}
+
+unsigned
+log_var_update_state(log_var_t *log_var) {
+	const char *log_var_begin = log_var->name;
+	const char *log_var_end = log_var->name + strlen(log_var->name);
+
+	/* Pointer to one before the beginning of the current segment. */
+	const char *segment_begin = log_var_names;
+
+	/*
+	 * If log_init done is false, we haven't parsed the malloc conf yet.  To
+	 * avoid log-spew, we default to not displaying anything.
+	 */
+	if (!atomic_load_b(&log_init_done, ATOMIC_ACQUIRE)) {
+		return LOG_INITIALIZED_NOT_ENABLED;
+	}
+
+	while (true) {
+		const char *segment_end = log_var_extract_segment(
+		    segment_begin);
+		assert(segment_end < log_var_names + JEMALLOC_LOG_BUFSIZE);
+		if (log_var_matches_segment(segment_begin, segment_end,
+		    log_var_begin, log_var_end)) {
+			atomic_store_u(&log_var->state, LOG_ENABLED,
+			    ATOMIC_RELAXED);
+			return LOG_ENABLED;
+		}
+		if (*segment_end == '\0') {
+			/* Hit the end of the segment string with no match. */
+			atomic_store_u(&log_var->state,
+			    LOG_INITIALIZED_NOT_ENABLED, ATOMIC_RELAXED);
+			return LOG_INITIALIZED_NOT_ENABLED;
+		}
+		/* Otherwise, skip the delimiter and continue. */
+		segment_begin = segment_end + 1;
+	}
+}
diff --git a/test/unit/log.c b/test/unit/log.c
new file mode 100644
index 00000000..6db256f1
--- /dev/null
+++ b/test/unit/log.c
@@ -0,0 +1,182 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/log.h"
+
+static void
+expect_no_logging(const char *names) {
+	log_var_t log_l1 = LOG_VAR_INIT("l1");
+	log_var_t log_l2 = LOG_VAR_INIT("l2");
+	log_var_t log_l2_a = LOG_VAR_INIT("l2.a");
+
+	strcpy(log_var_names, names);
+
+	int count = 0;
+
+	for (int i = 0; i < 10; i++) {
+		log_do_begin(log_l1)
+			count++;
+		log_do_end(log_l1)
+
+		log_do_begin(log_l2)
+			count++;
+		log_do_end(log_l2)
+
+		log_do_begin(log_l2_a)
+			count++;
+		log_do_end(log_l2_a)
+	}
+	assert_d_eq(count, 0, "Disabled logging not ignored!");
+}
+
+TEST_BEGIN(test_log_disabled) {
+	test_skip_if(!config_log);
+	atomic_store_b(&log_init_done, true, ATOMIC_RELAXED);
+	expect_no_logging("");
+	expect_no_logging("abc");
+	expect_no_logging("a.b.c");
+	expect_no_logging("l12");
+	expect_no_logging("l123|a456|b789");
+	expect_no_logging("|||");
+}
+TEST_END
+
+TEST_BEGIN(test_log_enabled_direct) {
+	test_skip_if(!config_log);
+	atomic_store_b(&log_init_done, true, ATOMIC_RELAXED);
+	log_var_t log_l1 = LOG_VAR_INIT("l1");
+	log_var_t log_l1_a = LOG_VAR_INIT("l1.a");
+	log_var_t log_l2 = LOG_VAR_INIT("l2");
+
+	int count;
+
+	count = 0;
+	strcpy(log_var_names, "l1");
+	for (int i = 0; i < 10; i++) {
+		log_do_begin(log_l1)
+			count++;
+		log_do_end(log_l1)
+	}
+	assert_d_eq(count, 10, "Mis-logged!");
+
+	count = 0;
+	strcpy(log_var_names, "l1.a");
+	for (int i = 0; i < 10; i++) {
+		log_do_begin(log_l1_a)
+			count++;
+		log_do_end(log_l1_a)
+	}
+	assert_d_eq(count, 10, "Mis-logged!");
+
+	count = 0;
+	strcpy(log_var_names, "l1.a|abc|l2|def");
+	for (int i = 0; i < 10; i++) {
+		log_do_begin(log_l1_a)
+			count++;
+		log_do_end(log_l1_a)
+
+		log_do_begin(log_l2)
+			count++;
+		log_do_end(log_l2)
+	}
+	assert_d_eq(count, 20, "Mis-logged!");
+}
+TEST_END
+
+TEST_BEGIN(test_log_enabled_indirect) {
+	test_skip_if(!config_log);
+	atomic_store_b(&log_init_done, true, ATOMIC_RELAXED);
+	strcpy(log_var_names, "l0|l1|abc|l2.b|def");
+
+	/* On. */
+	log_var_t log_l1 = LOG_VAR_INIT("l1");
+	/* Off. */
+	log_var_t log_l1a = LOG_VAR_INIT("l1a");
+	/* On. */
+	log_var_t log_l1_a = LOG_VAR_INIT("l1.a");
+	/* Off. */
+	log_var_t log_l2_a = LOG_VAR_INIT("l2.a");
+	/* On. */
+	log_var_t log_l2_b_a = LOG_VAR_INIT("l2.b.a");
+	/* On. */
+	log_var_t log_l2_b_b = LOG_VAR_INIT("l2.b.b");
+
+	/* 4 are on total, so should sum to 40. */
+	int count = 0;
+	for (int i = 0; i < 10; i++) {
+		log_do_begin(log_l1)
+			count++;
+		log_do_end(log_l1)
+
+		log_do_begin(log_l1a)
+			count++;
+		log_do_end(log_l1a)
+
+		log_do_begin(log_l1_a)
+			count++;
+		log_do_end(log_l1_a)
+
+		log_do_begin(log_l2_a)
+			count++;
+		log_do_end(log_l2_a)
+
+		log_do_begin(log_l2_b_a)
+			count++;
+		log_do_end(log_l2_b_a)
+
+		log_do_begin(log_l2_b_b)
+			count++;
+		log_do_end(log_l2_b_b)
+	}
+
+	assert_d_eq(count, 40, "Mis-logged!");
+}
+TEST_END
+
+TEST_BEGIN(test_log_enabled_global) {
+	test_skip_if(!config_log);
+	atomic_store_b(&log_init_done, true, ATOMIC_RELAXED);
+	strcpy(log_var_names, "abc|.|def");
+
+	log_var_t log_l1 = LOG_VAR_INIT("l1");
+	log_var_t log_l2_a_a = LOG_VAR_INIT("l2.a.a");
+
+	int count = 0;
+	for (int i = 0; i < 10; i++) {
+		log_do_begin(log_l1)
+		    count++;
+		log_do_end(log_l1)
+
+		log_do_begin(log_l2_a_a)
+		    count++;
+		log_do_end(log_l2_a_a)
+	}
+	assert_d_eq(count, 20, "Mis-logged!");
+}
+TEST_END
+
+TEST_BEGIN(test_logs_if_no_init) {
+	test_skip_if(!config_log);
+	atomic_store_b(&log_init_done, false, ATOMIC_RELAXED);
+
+	log_var_t l = LOG_VAR_INIT("definitely.not.enabled");
+
+	int count = 0;
+	for (int i = 0; i < 10; i++) {
+		log_do_begin(l)
+			count++;
+		log_do_end(l)
+	}
+	assert_d_eq(count, 0, "Logging shouldn't happen if not initialized.");
+}
+TEST_END
+
+int
+main(void) {
+
+	return test(
+	    test_log_disabled,
+	    test_log_enabled_direct,
+	    test_log_enabled_indirect,
+	    test_log_enabled_global,
+	    test_logs_if_no_init);
+}

From e215a7bc18a2c3263a6fcca37c1ec53af6c4babd Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Wed, 19 Jul 2017 18:05:28 -0700
Subject: [PATCH 0959/2608] Add entry and exit logging to all core functions.

I.e. mallloc, free, the allocx API, the posix extensions.
---
 include/jemalloc/internal/log.h |   6 +
 src/jemalloc.c                  | 199 +++++++++++++++++++++++++++++++-
 2 files changed, 204 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/log.h b/include/jemalloc/internal/log.h
index 8413a4d6..1df8cfff 100644
--- a/include/jemalloc/internal/log.h
+++ b/include/jemalloc/internal/log.h
@@ -30,6 +30,12 @@
  * You can then log by writing:
  *   log(log_var, "format string -- my int is %d", my_int);
  *
+ * The namespaces currently in use:
+ *   core.[malloc|free|posix_memalign|...].[entry|exit]:
+ *       The entry/exit points of the functions publicly exposed by jemalloc.
+ *       The "entry" variants try to log arguments to the functions, and the
+ *       "exit" ones try to log return values.
+ *
  * None of this should be regarded as a stable API for right now.  It's intended
  * as a debugging interface, to let us keep around some of our printf-debugging
  * statements.
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 09bac9eb..48a268ff 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1974,6 +1974,13 @@ je_malloc(size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
+	static log_var_t log_core_malloc_entry = LOG_VAR_INIT(
+	    "core.malloc.entry");
+	static log_var_t log_core_malloc_exit = LOG_VAR_INIT(
+	    "core.malloc.exit");
+
+	log(log_core_malloc_entry, "size: %zu", size);
+
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
 
@@ -1988,6 +1995,8 @@ je_malloc(size_t size) {
 
 	imalloc(&sopts, &dopts);
 
+	log(log_core_malloc_exit, "result: %p", ret);
+
 	return ret;
 }
 
@@ -1998,6 +2007,14 @@ je_posix_memalign(void **memptr, size_t alignment, size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
+	static log_var_t log_core_posix_memalign_entry = LOG_VAR_INIT(
+	    "core.posix_memalign.entry");
+	static log_var_t log_core_posix_memalign_exit = LOG_VAR_INIT(
+	    "core.posix_memalign.exit");
+
+	log(log_core_posix_memalign_entry, "mem ptr: %p, alignment: %zu, "
+	    "size: %zu", memptr, alignment, size);
+
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
 
@@ -2014,6 +2031,10 @@ je_posix_memalign(void **memptr, size_t alignment, size_t size) {
 	dopts.alignment = alignment;
 
 	ret = imalloc(&sopts, &dopts);
+
+	log(log_core_posix_memalign_exit, "result: %d, alloc ptr: %p", ret,
+	    *memptr);
+
 	return ret;
 }
 
@@ -2026,6 +2047,14 @@ je_aligned_alloc(size_t alignment, size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
+	static log_var_t log_core_aligned_alloc_entry = LOG_VAR_INIT(
+	    "core.aligned_alloc.entry");
+	static log_var_t log_core_aligned_alloc_exit = LOG_VAR_INIT(
+	    "core.aligned_alloc.exit");
+
+	log(log_core_aligned_alloc_entry, "alignment: %zu, size: %zu\n",
+	    alignment, size);
+
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
 
@@ -2044,6 +2073,9 @@ je_aligned_alloc(size_t alignment, size_t size) {
 	dopts.alignment = alignment;
 
 	imalloc(&sopts, &dopts);
+
+	log(log_core_aligned_alloc_exit, "result: %p", ret);
+
 	return ret;
 }
 
@@ -2055,6 +2087,13 @@ je_calloc(size_t num, size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
+	static log_var_t log_core_calloc_entry = LOG_VAR_INIT(
+	    "core.calloc.entry");
+	static log_var_t log_core_calloc_exit = LOG_VAR_INIT(
+	    "core.calloc.exit");
+
+	log(log_core_calloc_entry, "num: %zu, size: %zu\n", num, size);
+
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
 
@@ -2071,6 +2110,8 @@ je_calloc(size_t num, size_t size) {
 
 	imalloc(&sopts, &dopts);
 
+	log(log_core_calloc_exit, "result: %p", ret);
+
 	return ret;
 }
 
@@ -2204,6 +2245,13 @@ je_realloc(void *ptr, size_t size) {
 	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 	size_t old_usize = 0;
 
+	static log_var_t log_core_realloc_entry = LOG_VAR_INIT(
+	    "core.realloc.entry");
+	static log_var_t log_core_realloc_exit = LOG_VAR_INIT(
+	    "core.realloc.exit");
+
+	log(log_core_realloc_entry, "ptr: %p, size: %zu\n", ptr, size);
+
 	if (unlikely(size == 0)) {
 		if (ptr != NULL) {
 			/* realloc(ptr, 0) is equivalent to free(ptr). */
@@ -2216,6 +2264,8 @@ je_realloc(void *ptr, size_t size) {
 				tcache = NULL;
 			}
 			ifree(tsd, ptr, tcache, true);
+
+			log(log_core_realloc_exit, "result: %p", NULL);
 			return NULL;
 		}
 		size = 1;
@@ -2248,7 +2298,9 @@ je_realloc(void *ptr, size_t size) {
 		tsdn = tsd_tsdn(tsd);
 	} else {
 		/* realloc(NULL, size) is equivalent to malloc(size). */
-		return je_malloc(size);
+		void *ret = je_malloc(size);
+		log(log_core_realloc_exit, "result: %p", ret);
+		return ret;
 	}
 
 	if (unlikely(ret == NULL)) {
@@ -2269,11 +2321,20 @@ je_realloc(void *ptr, size_t size) {
 	}
 	UTRACE(ptr, size, ret);
 	check_entry_exit_locking(tsdn);
+
+	log(log_core_realloc_exit, "result: %p", ret);
 	return ret;
 }
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_free(void *ptr) {
+	static log_var_t log_core_free_entry = LOG_VAR_INIT(
+	    "core.free.entry");
+	static log_var_t log_core_free_exit = LOG_VAR_INIT(
+	    "core.free.exit");
+
+	log(log_core_free_entry, "ptr: %p", ptr);
+
 	UTRACE(ptr, 0, 0);
 	if (likely(ptr != NULL)) {
 		/*
@@ -2303,6 +2364,7 @@ je_free(void *ptr) {
 		}
 		check_entry_exit_locking(tsd_tsdn(tsd));
 	}
+	log(log_core_free_exit, "%s", "");
 }
 
 /*
@@ -2322,6 +2384,14 @@ je_memalign(size_t alignment, size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
+	static log_var_t log_core_memalign_entry = LOG_VAR_INIT(
+	    "core.memalign.entry");
+	static log_var_t log_core_memalign_exit = LOG_VAR_INIT(
+	    "core.memalign.exit");
+
+	log(log_core_memalign_entry, "alignment: %zu, size: %zu\n", alignment,
+	    size);
+
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
 
@@ -2339,6 +2409,8 @@ je_memalign(size_t alignment, size_t size) {
 	dopts.alignment = alignment;
 
 	imalloc(&sopts, &dopts);
+
+	log(log_core_memalign_exit, "result: %p", ret);
 	return ret;
 }
 #endif
@@ -2353,6 +2425,13 @@ je_valloc(size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
+	static log_var_t log_core_valloc_entry = LOG_VAR_INIT(
+	    "core.valloc.entry");
+	static log_var_t log_core_valloc_exit = LOG_VAR_INIT(
+	    "core.valloc.exit");
+
+	log(log_core_valloc_entry, "size: %zu\n", size);
+
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
 
@@ -2371,6 +2450,7 @@ je_valloc(size_t size) {
 
 	imalloc(&sopts, &dopts);
 
+	log(log_core_valloc_exit, "result: %p\n", ret);
 	return ret;
 }
 #endif
@@ -2444,6 +2524,13 @@ je_mallocx(size_t size, int flags) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
+	static log_var_t log_core_mallocx_entry = LOG_VAR_INIT(
+	    "core.mallocx.entry");
+	static log_var_t log_core_mallocx_exit = LOG_VAR_INIT(
+	    "core.mallocx.exit");
+
+	log(log_core_mallocx_entry, "size: %zu, flags: %d", size, flags);
+
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
 
@@ -2477,6 +2564,8 @@ je_mallocx(size_t size, int flags) {
 	}
 
 	imalloc(&sopts, &dopts);
+
+	log(log_core_mallocx_exit, "result: %p", ret);
 	return ret;
 }
 
@@ -2557,6 +2646,15 @@ je_rallocx(void *ptr, size_t size, int flags) {
 	arena_t *arena;
 	tcache_t *tcache;
 
+	static log_var_t log_core_rallocx_entry = LOG_VAR_INIT(
+	    "core.rallocx.entry");
+	static log_var_t log_core_rallocx_exit = LOG_VAR_INIT(
+	    "core.rallocx.exit");
+
+	log(log_core_rallocx_entry, "ptr: %p, size: %zu, flags: %d", ptr,
+	    size, flags);
+
+
 	assert(ptr != NULL);
 	assert(size != 0);
 	assert(malloc_initialized() || IS_INITIALIZER);
@@ -2619,6 +2717,8 @@ je_rallocx(void *ptr, size_t size, int flags) {
 	}
 	UTRACE(ptr, size, p);
 	check_entry_exit_locking(tsd_tsdn(tsd));
+
+	log(log_core_rallocx_exit, "result: %p", p);
 	return p;
 label_oom:
 	if (config_xmalloc && unlikely(opt_xmalloc)) {
@@ -2627,6 +2727,8 @@ label_oom:
 	}
 	UTRACE(ptr, size, 0);
 	check_entry_exit_locking(tsd_tsdn(tsd));
+
+	log(log_core_rallocx_exit, "result: %p", NULL);
 	return NULL;
 }
 
@@ -2713,6 +2815,15 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	size_t alignment = MALLOCX_ALIGN_GET(flags);
 	bool zero = flags & MALLOCX_ZERO;
 
+	static log_var_t log_core_xallocx_entry = LOG_VAR_INIT(
+	    "core.xallocx.entry");
+	static log_var_t log_core_xallocx_exit = LOG_VAR_INIT(
+	    "core.xallocx.exit");
+
+	log(log_core_xallocx_entry, "ptr: %p, size: %zu, extra: %zu, "
+	    "flags: %d", ptr, size, extra, flags);
+
+
 	assert(ptr != NULL);
 	assert(size != 0);
 	assert(SIZE_T_MAX - size >= extra);
@@ -2762,6 +2873,8 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 label_not_resized:
 	UTRACE(ptr, size, ptr);
 	check_entry_exit_locking(tsd_tsdn(tsd));
+
+	log(log_core_xallocx_exit, "result: %zu", usize);
 	return usize;
 }
 
@@ -2771,6 +2884,13 @@ je_sallocx(const void *ptr, int flags) {
 	size_t usize;
 	tsdn_t *tsdn;
 
+	static log_var_t log_core_sallocx_entry = LOG_VAR_INIT(
+	    "core.sallocx.entry");
+	static log_var_t log_core_sallocx_exit = LOG_VAR_INIT(
+	    "core.sallocx.exit");
+
+	log(log_core_sallocx_entry, "ptr: %p, flags: %d", ptr, flags);
+
 	assert(malloc_initialized() || IS_INITIALIZER);
 	assert(ptr != NULL);
 
@@ -2785,11 +2905,20 @@ je_sallocx(const void *ptr, int flags) {
 	}
 
 	check_entry_exit_locking(tsdn);
+
+	log(log_core_sallocx_exit, "result: %zu", usize);
 	return usize;
 }
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_dallocx(void *ptr, int flags) {
+	static log_var_t log_core_dallocx_entry = LOG_VAR_INIT(
+	    "core.dallocx.entry");
+	static log_var_t log_core_dallocx_exit = LOG_VAR_INIT(
+	    "core.dallocx.exit");
+
+	log(log_core_dallocx_entry, "ptr: %p, flags: %d", ptr, flags);
+
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
@@ -2827,6 +2956,8 @@ je_dallocx(void *ptr, int flags) {
 		ifree(tsd, ptr, tcache, true);
 	}
 	check_entry_exit_locking(tsd_tsdn(tsd));
+
+	log(log_core_dallocx_exit, "%s", "");
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
@@ -2848,6 +2979,14 @@ je_sdallocx(void *ptr, size_t size, int flags) {
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
+	static log_var_t log_core_sdallocx_entry = LOG_VAR_INIT(
+	    "core.sdallocx.entry");
+	static log_var_t log_core_sdallocx_exit = LOG_VAR_INIT(
+	    "core.sdallocx.exit");
+
+	log(log_core_sdallocx_entry, "ptr: %p, size: %zu, flags: %d", ptr,
+	    size, flags);
+
 	tsd_t *tsd = tsd_fetch();
 	bool fast = tsd_fast(tsd);
 	size_t usize = inallocx(tsd_tsdn(tsd), size, flags);
@@ -2884,6 +3023,8 @@ je_sdallocx(void *ptr, size_t size, int flags) {
 		isfree(tsd, ptr, usize, tcache, true);
 	}
 	check_entry_exit_locking(tsd_tsdn(tsd));
+
+	log(log_core_sdallocx_exit, "%s", "");
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
@@ -2892,9 +3033,17 @@ je_nallocx(size_t size, int flags) {
 	size_t usize;
 	tsdn_t *tsdn;
 
+	static log_var_t log_core_nallocx_entry = LOG_VAR_INIT(
+	    "core.nallocx.entry");
+	static log_var_t log_core_nallocx_exit = LOG_VAR_INIT(
+	    "core.nallocx.exit");
+
+	log(log_core_nallocx_entry, "size: %zu, flags: %d", size, flags);
+
 	assert(size != 0);
 
 	if (unlikely(malloc_init())) {
+		log(log_core_nallocx_exit, "result: %zu", ZU(0));
 		return 0;
 	}
 
@@ -2903,10 +3052,12 @@ je_nallocx(size_t size, int flags) {
 
 	usize = inallocx(tsdn, size, flags);
 	if (unlikely(usize > LARGE_MAXCLASS)) {
+		log(log_core_nallocx_exit, "result: %zu", ZU(0));
 		return 0;
 	}
 
 	check_entry_exit_locking(tsdn);
+	log(log_core_nallocx_exit, "result: %zu", usize);
 	return usize;
 }
 
@@ -2916,7 +3067,15 @@ je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
 	int ret;
 	tsd_t *tsd;
 
+	static log_var_t log_core_mallctl_entry = LOG_VAR_INIT(
+	    "core.mallctl.entry");
+	static log_var_t log_core_mallctl_exit = LOG_VAR_INIT(
+	    "core.mallctl.exit");
+
+	log(log_core_mallctl_entry, "name: %s", name);
+
 	if (unlikely(malloc_init())) {
+		log(log_core_mallctl_exit, "result: %d", EAGAIN);
 		return EAGAIN;
 	}
 
@@ -2924,6 +3083,8 @@ je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
 	check_entry_exit_locking(tsd_tsdn(tsd));
 	ret = ctl_byname(tsd, name, oldp, oldlenp, newp, newlen);
 	check_entry_exit_locking(tsd_tsdn(tsd));
+	
+	log(log_core_mallctl_exit, "result: %d", ret);
 	return ret;
 }
 
@@ -2931,7 +3092,15 @@ JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) {
 	int ret;
 
+	static log_var_t log_core_mallctlnametomib_entry = LOG_VAR_INIT(
+	    "core.mallctlnametomib.entry");
+	static log_var_t log_core_mallctlnametomib_exit = LOG_VAR_INIT(
+	    "core.mallctlnametomib.exit");
+
+	log(log_core_mallctlnametomib_entry, "name: %s", name);
+
 	if (unlikely(malloc_init())) {
+		log(log_core_mallctlnametomib_exit, "result: %d", EAGAIN);
 		return EAGAIN;
 	}
 
@@ -2939,6 +3108,8 @@ je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) {
 	check_entry_exit_locking(tsd_tsdn(tsd));
 	ret = ctl_nametomib(tsd, name, mibp, miblenp);
 	check_entry_exit_locking(tsd_tsdn(tsd));
+
+	log(log_core_mallctlnametomib_exit, "result: %d", ret);
 	return ret;
 }
 
@@ -2948,7 +3119,16 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	int ret;
 	tsd_t *tsd;
 
+	static log_var_t log_core_mallctlbymib_entry = LOG_VAR_INIT(
+	    "core.mallctlbymib.entry");
+	static log_var_t log_core_mallctlbymib_exit = LOG_VAR_INIT(
+	    "core.mallctlbymib.exit");
+
+	log(log_core_mallctlbymib_entry, "%s", "");
+
+
 	if (unlikely(malloc_init())) {
+		log(log_core_mallctlbymib_exit, "result: %d", EAGAIN);
 		return EAGAIN;
 	}
 
@@ -2956,6 +3136,7 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	check_entry_exit_locking(tsd_tsdn(tsd));
 	ret = ctl_bymib(tsd, mib, miblen, oldp, oldlenp, newp, newlen);
 	check_entry_exit_locking(tsd_tsdn(tsd));
+	log(log_core_mallctlbymib_exit, "result: %d", ret);
 	return ret;
 }
 
@@ -2964,10 +3145,18 @@ je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
     const char *opts) {
 	tsdn_t *tsdn;
 
+	static log_var_t log_core_malloc_stats_print_entry = LOG_VAR_INIT(
+	    "core.malloc_stats_print.entry");
+	static log_var_t log_core_malloc_stats_print_exit = LOG_VAR_INIT(
+	    "core.malloc_stats_print.exit");
+
+	log(log_core_malloc_stats_print_entry, "%s", "");
+
 	tsdn = tsdn_fetch();
 	check_entry_exit_locking(tsdn);
 	stats_print(write_cb, cbopaque, opts);
 	check_entry_exit_locking(tsdn);
+	log(log_core_malloc_stats_print_exit, "%s", "");
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
@@ -2975,6 +3164,13 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) {
 	size_t ret;
 	tsdn_t *tsdn;
 
+	static log_var_t log_core_malloc_usable_size_entry = LOG_VAR_INIT(
+	    "core.malloc_usable_size.entry");
+	static log_var_t log_core_malloc_usable_size_exit = LOG_VAR_INIT(
+	    "core.malloc_usable_size.exit");
+
+	log(log_core_malloc_usable_size_entry, "ptr: %p", ptr);
+
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	tsdn = tsdn_fetch();
@@ -2992,6 +3188,7 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) {
 	}
 
 	check_entry_exit_locking(tsdn);
+	log(log_core_malloc_usable_size_exit, "result: %zu", ret);
 	return ret;
 }
 

From aa6c2821374f6dd6ed2e628c06bc08b0c4bc485c Mon Sep 17 00:00:00 2001
From: "Y. T. Chung" <zonyitoo@gmail.com>
Date: Fri, 21 Jul 2017 21:40:29 +0800
Subject: [PATCH 0960/2608] Validates fd before calling fcntl

---
 src/pages.c | 12 +++++++++---
 src/prof.c  |  4 +++-
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/pages.c b/src/pages.c
index 0883647b..f8ef2bcb 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -358,7 +358,9 @@ os_overcommits_proc(void) {
 			O_CLOEXEC);
 	#else
 		fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY);
-		fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+		if (fd != -1) {
+			fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+		}
 	#endif
 #elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat)
 	#if defined(O_CLOEXEC)
@@ -367,14 +369,18 @@ os_overcommits_proc(void) {
 	#else
 		fd = (int)syscall(SYS_openat,
 			AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY);
-		fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+		if (fd != -1) {
+			fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+		}
 	#endif
 #else
 	#if defined(O_CLOEXEC)
 		fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
 	#else
 		fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
-		fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+		if (fd != -1) {
+			fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+		}
 	#endif
 #endif
 
diff --git a/src/prof.c b/src/prof.c
index a1ca9e2c..32760e68 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -1414,7 +1414,9 @@ prof_open_maps(const char *format, ...) {
 	mfd = open(filename, O_RDONLY | O_CLOEXEC);
 #else
 	mfd = open(filename, O_RDONLY);
-	fcntl(mfd, F_SETFD, fcntl(mfd, F_GETFD) | FD_CLOEXEC);
+	if (mfd != -1) {
+		fcntl(mfd, F_SETFD, fcntl(mfd, F_GETFD) | FD_CLOEXEC);
+	}
 #endif
 
 	return mfd;

From a9f7732d45c22ca7d22bed6ff2eaeb702356884e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 21 Jul 2017 13:34:45 -0700
Subject: [PATCH 0961/2608] Logging: allow logging with empty varargs.

Currently, the log macro requires at least one argument after the format string,
because of the way the preprocessor handles varargs macros.  We can hide some of
that irritation by pushing the extra arguments into a varargs function.
---
 configure.ac                                  |  1 +
 .../internal/jemalloc_internal_macros.h       |  3 ++
 include/jemalloc/internal/log.h               | 40 ++++++++++++++++---
 include/jemalloc/internal/malloc_io.h         |  4 ++
 src/jemalloc.c                                | 14 +++----
 src/log.c                                     |  4 +-
 test/unit/log.c                               | 16 +++++++-
 7 files changed, 65 insertions(+), 17 deletions(-)

diff --git a/configure.ac b/configure.ac
index 02151543..ba0409a5 100644
--- a/configure.ac
+++ b/configure.ac
@@ -243,6 +243,7 @@ if test "x$GCC" = "xyes" ; then
   JE_CFLAGS_ADD([-Wshorten-64-to-32])
   JE_CFLAGS_ADD([-Wsign-compare])
   JE_CFLAGS_ADD([-Wundef])
+  JE_CFLAGS_ADD([-Wno-format-zero-length])
   JE_CFLAGS_ADD([-pipe])
   JE_CFLAGS_ADD([-g3])
 elif test "x$je_cv_msvc" = "xyes" ; then
diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h
index 4571895e..ed75d376 100644
--- a/include/jemalloc/internal/jemalloc_internal_macros.h
+++ b/include/jemalloc/internal/jemalloc_internal_macros.h
@@ -37,4 +37,7 @@
 #  define JET_MUTABLE const
 #endif
 
+#define JEMALLOC_VA_ARGS_HEAD(head, ...) head
+#define JEMALLOC_VA_ARGS_TAIL(head, ...) __VA_ARGS__
+
 #endif /* JEMALLOC_INTERNAL_MACROS_H */
diff --git a/include/jemalloc/internal/log.h b/include/jemalloc/internal/log.h
index 1df8cfff..5ce8c354 100644
--- a/include/jemalloc/internal/log.h
+++ b/include/jemalloc/internal/log.h
@@ -2,14 +2,17 @@
 #define JEMALLOC_INTERNAL_LOG_H
 
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/mutex.h"
 
 #ifdef JEMALLOC_LOG
-#  define JEMALLOC_LOG_BUFSIZE 1000
+#  define JEMALLOC_LOG_VAR_BUFSIZE 1000
 #else
-#  define JEMALLOC_LOG_BUFSIZE 1
+#  define JEMALLOC_LOG_VAR_BUFSIZE 1
 #endif
 
+#define JEMALLOC_LOG_BUFSIZE 4096
+
 /*
  * The log_vars malloc_conf option is a '|'-delimited list of log_var name
  * segments to log.  The log_var names are themselves hierarchical, with '.' as
@@ -41,7 +44,7 @@
  * statements.
  */
 
-extern char log_var_names[JEMALLOC_LOG_BUFSIZE];
+extern char log_var_names[JEMALLOC_LOG_VAR_BUFSIZE];
 extern atomic_b_t log_init_done;
 
 typedef struct log_var_s log_var_t;
@@ -84,11 +87,36 @@ if (config_log) {							\
 	}								\
 }
 
-#define log(log_var, format, ...)					\
+/*
+ * MSVC has some preprocessor bugs in its expansion of __VA_ARGS__ during
+ * preprocessing.  To work around this, we take all potential extra arguments in
+ * a var-args functions.  Since a varargs macro needs at least one argument in
+ * the "...", we accept the format string there, and require that the first
+ * argument in this "..." is a const char *.
+ */
+static inline void
+log_impl_varargs(const char *name, ...) {
+	char buf[JEMALLOC_LOG_BUFSIZE];
+	va_list ap;
+
+	va_start(ap, name);
+	const char *format = va_arg(ap, const char *);
+	size_t dst_offset = 0;
+	dst_offset += malloc_snprintf(buf, JEMALLOC_LOG_BUFSIZE, "%s: ", name);
+	dst_offset += malloc_vsnprintf(buf + dst_offset,
+	    JEMALLOC_LOG_BUFSIZE - dst_offset, format, ap);
+	dst_offset += malloc_snprintf(buf + dst_offset,
+	    JEMALLOC_LOG_BUFSIZE - dst_offset, "\n");
+	va_end(ap);
+
+	malloc_write(buf);
+}
+
+/* Call as log(log_var, "format_string %d", arg_for_format_string); */
+#define log(log_var, ...)						\
 do {									\
 	log_do_begin(log_var)						\
-		malloc_printf("%s: " format "\n",			\
-		    (log_var).name, __VA_ARGS__);			\
+		log_impl_varargs((log_var).name, __VA_ARGS__);		\
 	log_do_end(log_var)						\
 } while (0)
 
diff --git a/include/jemalloc/internal/malloc_io.h b/include/jemalloc/internal/malloc_io.h
index 47ae58ec..4992d1d8 100644
--- a/include/jemalloc/internal/malloc_io.h
+++ b/include/jemalloc/internal/malloc_io.h
@@ -53,6 +53,10 @@ size_t malloc_vsnprintf(char *str, size_t size, const char *format,
     va_list ap);
 size_t malloc_snprintf(char *str, size_t size, const char *format, ...)
     JEMALLOC_FORMAT_PRINTF(3, 4);
+/*
+ * The caller can set write_cb and cbopaque to null to choose to print with the
+ * je_malloc_message hook.
+ */
 void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
     const char *format, va_list ap);
 void malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque,
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 48a268ff..1dc66823 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2364,7 +2364,7 @@ je_free(void *ptr) {
 		}
 		check_entry_exit_locking(tsd_tsdn(tsd));
 	}
-	log(log_core_free_exit, "%s", "");
+	log(log_core_free_exit, "");
 }
 
 /*
@@ -2957,7 +2957,7 @@ je_dallocx(void *ptr, int flags) {
 	}
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	log(log_core_dallocx_exit, "%s", "");
+	log(log_core_dallocx_exit, "");
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
@@ -3024,7 +3024,7 @@ je_sdallocx(void *ptr, size_t size, int flags) {
 	}
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	log(log_core_sdallocx_exit, "%s", "");
+	log(log_core_sdallocx_exit, "");
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
@@ -3083,7 +3083,7 @@ je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
 	check_entry_exit_locking(tsd_tsdn(tsd));
 	ret = ctl_byname(tsd, name, oldp, oldlenp, newp, newlen);
 	check_entry_exit_locking(tsd_tsdn(tsd));
-	
+
 	log(log_core_mallctl_exit, "result: %d", ret);
 	return ret;
 }
@@ -3124,7 +3124,7 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	static log_var_t log_core_mallctlbymib_exit = LOG_VAR_INIT(
 	    "core.mallctlbymib.exit");
 
-	log(log_core_mallctlbymib_entry, "%s", "");
+	log(log_core_mallctlbymib_entry, "");
 
 
 	if (unlikely(malloc_init())) {
@@ -3150,13 +3150,13 @@ je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	static log_var_t log_core_malloc_stats_print_exit = LOG_VAR_INIT(
 	    "core.malloc_stats_print.exit");
 
-	log(log_core_malloc_stats_print_entry, "%s", "");
+	log(log_core_malloc_stats_print_entry, "");
 
 	tsdn = tsdn_fetch();
 	check_entry_exit_locking(tsdn);
 	stats_print(write_cb, cbopaque, opts);
 	check_entry_exit_locking(tsdn);
-	log(log_core_malloc_stats_print_exit, "%s", "");
+	log(log_core_malloc_stats_print_exit, "");
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
diff --git a/src/log.c b/src/log.c
index 022dc584..778902fb 100644
--- a/src/log.c
+++ b/src/log.c
@@ -3,7 +3,7 @@
 
 #include "jemalloc/internal/log.h"
 
-char log_var_names[JEMALLOC_LOG_BUFSIZE];
+char log_var_names[JEMALLOC_LOG_VAR_BUFSIZE];
 atomic_b_t log_init_done = ATOMIC_INIT(false);
 
 /*
@@ -59,7 +59,7 @@ log_var_update_state(log_var_t *log_var) {
 	while (true) {
 		const char *segment_end = log_var_extract_segment(
 		    segment_begin);
-		assert(segment_end < log_var_names + JEMALLOC_LOG_BUFSIZE);
+		assert(segment_end < log_var_names + JEMALLOC_LOG_VAR_BUFSIZE);
 		if (log_var_matches_segment(segment_begin, segment_end,
 		    log_var_begin, log_var_end)) {
 			atomic_store_u(&log_var->state, LOG_ENABLED,
diff --git a/test/unit/log.c b/test/unit/log.c
index 6db256f1..053fea41 100644
--- a/test/unit/log.c
+++ b/test/unit/log.c
@@ -170,13 +170,25 @@ TEST_BEGIN(test_logs_if_no_init) {
 }
 TEST_END
 
+/*
+ * This really just checks to make sure that this usage compiles; we don't have
+ * any test code to run.
+ */
+TEST_BEGIN(test_log_only_format_string) {
+	if (false) {
+		static log_var_t l = LOG_VAR_INIT("str");
+		log(l, "No arguments follow this format string.");
+	}
+}
+TEST_END
+
 int
 main(void) {
-
 	return test(
 	    test_log_disabled,
 	    test_log_enabled_direct,
 	    test_log_enabled_indirect,
 	    test_log_enabled_global,
-	    test_logs_if_no_init);
+	    test_logs_if_no_init,
+	    test_log_only_format_string);
 }

From b28f31e7ed6c987bdbf3bdd9ce4aa63245926b4d Mon Sep 17 00:00:00 2001
From: Qinfan Wu <wqfish@fb.com>
Date: Mon, 24 Jul 2017 11:59:29 -0700
Subject: [PATCH 0962/2608] Split out cold code path in newImpl

I noticed that the whole newImpl is inlined. Since OOM handling code is
rarely executed, we should only inline the hot path.
---
 src/jemalloc_cpp.cpp | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index 844ab398..f0ceddae 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -39,12 +39,10 @@ void	operator delete(void *ptr, std::size_t size) noexcept;
 void	operator delete[](void *ptr, std::size_t size) noexcept;
 #endif
 
-template <bool IsNoExcept>
-void *
-newImpl(std::size_t size) noexcept(IsNoExcept) {
-	void *ptr = je_malloc(size);
-	if (likely(ptr != nullptr))
-		return ptr;
+JEMALLOC_NOINLINE
+static void *
+handleOOM(std::size_t size, bool nothrow) {
+	void *ptr = nullptr;
 
 	while (ptr == nullptr) {
 		std::new_handler handler;
@@ -68,11 +66,22 @@ newImpl(std::size_t size) noexcept(IsNoExcept) {
 		ptr = je_malloc(size);
 	}
 
-	if (ptr == nullptr && !IsNoExcept)
+	if (ptr == nullptr && !nothrow)
 		std::__throw_bad_alloc();
 	return ptr;
 }
 
+template <bool IsNoExcept>
+JEMALLOC_ALWAYS_INLINE
+void *
+newImpl(std::size_t size) noexcept(IsNoExcept) {
+	void *ptr = je_malloc(size);
+	if (likely(ptr != nullptr))
+		return ptr;
+
+	return handleOOM(size, IsNoExcept);
+}
+
 void *
 operator new(std::size_t size) {
 	return newImpl<false>(size);

From e6aeceb6068ace14ca530506fdfeb5f1cadd9a19 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 24 Jul 2017 12:29:28 -0700
Subject: [PATCH 0963/2608] Logging: log using the log var names directly.

Currently we have to log by writing something like:

  static log_var_t log_a_b_c = LOG_VAR_INIT("a.b.c");
  log (log_a_b_c, "msg");

This is sort of annoying.  Let's just write:

  log("a.b.c", "msg");
---
 include/jemalloc/internal/log.h |   5 +-
 src/jemalloc.c                  | 198 ++++++++------------------------
 test/unit/log.c                 |   3 +-
 3 files changed, 51 insertions(+), 155 deletions(-)

diff --git a/include/jemalloc/internal/log.h b/include/jemalloc/internal/log.h
index 5ce8c354..9f32fb4f 100644
--- a/include/jemalloc/internal/log.h
+++ b/include/jemalloc/internal/log.h
@@ -112,9 +112,10 @@ log_impl_varargs(const char *name, ...) {
 	malloc_write(buf);
 }
 
-/* Call as log(log_var, "format_string %d", arg_for_format_string); */
-#define log(log_var, ...)						\
+/* Call as log("log.var.str", "format_string %d", arg_for_format_string); */
+#define log(log_var_str, ...)						\
 do {									\
+	static log_var_t log_var = LOG_VAR_INIT(log_var_str);		\
 	log_do_begin(log_var)						\
 		log_impl_varargs((log_var).name, __VA_ARGS__);		\
 	log_do_end(log_var)						\
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 1dc66823..ed470520 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1974,12 +1974,7 @@ je_malloc(size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
-	static log_var_t log_core_malloc_entry = LOG_VAR_INIT(
-	    "core.malloc.entry");
-	static log_var_t log_core_malloc_exit = LOG_VAR_INIT(
-	    "core.malloc.exit");
-
-	log(log_core_malloc_entry, "size: %zu", size);
+	log("core.malloc.entry", "size: %zu", size);
 
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
@@ -1995,7 +1990,7 @@ je_malloc(size_t size) {
 
 	imalloc(&sopts, &dopts);
 
-	log(log_core_malloc_exit, "result: %p", ret);
+	log("core.malloc.exit", "result: %p", ret);
 
 	return ret;
 }
@@ -2007,12 +2002,7 @@ je_posix_memalign(void **memptr, size_t alignment, size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
-	static log_var_t log_core_posix_memalign_entry = LOG_VAR_INIT(
-	    "core.posix_memalign.entry");
-	static log_var_t log_core_posix_memalign_exit = LOG_VAR_INIT(
-	    "core.posix_memalign.exit");
-
-	log(log_core_posix_memalign_entry, "mem ptr: %p, alignment: %zu, "
+	log("core.posix_memalign.entry", "mem ptr: %p, alignment: %zu, "
 	    "size: %zu", memptr, alignment, size);
 
 	static_opts_init(&sopts);
@@ -2032,7 +2022,7 @@ je_posix_memalign(void **memptr, size_t alignment, size_t size) {
 
 	ret = imalloc(&sopts, &dopts);
 
-	log(log_core_posix_memalign_exit, "result: %d, alloc ptr: %p", ret,
+	log("core.posix_memalign.exit", "result: %d, alloc ptr: %p", ret,
 	    *memptr);
 
 	return ret;
@@ -2047,12 +2037,7 @@ je_aligned_alloc(size_t alignment, size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
-	static log_var_t log_core_aligned_alloc_entry = LOG_VAR_INIT(
-	    "core.aligned_alloc.entry");
-	static log_var_t log_core_aligned_alloc_exit = LOG_VAR_INIT(
-	    "core.aligned_alloc.exit");
-
-	log(log_core_aligned_alloc_entry, "alignment: %zu, size: %zu\n",
+	log("core.aligned_alloc.entry", "alignment: %zu, size: %zu\n",
 	    alignment, size);
 
 	static_opts_init(&sopts);
@@ -2074,7 +2059,7 @@ je_aligned_alloc(size_t alignment, size_t size) {
 
 	imalloc(&sopts, &dopts);
 
-	log(log_core_aligned_alloc_exit, "result: %p", ret);
+	log("core.aligned_alloc.exit", "result: %p", ret);
 
 	return ret;
 }
@@ -2087,12 +2072,7 @@ je_calloc(size_t num, size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
-	static log_var_t log_core_calloc_entry = LOG_VAR_INIT(
-	    "core.calloc.entry");
-	static log_var_t log_core_calloc_exit = LOG_VAR_INIT(
-	    "core.calloc.exit");
-
-	log(log_core_calloc_entry, "num: %zu, size: %zu\n", num, size);
+	log("core.calloc.entry", "num: %zu, size: %zu\n", num, size);
 
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
@@ -2110,7 +2090,7 @@ je_calloc(size_t num, size_t size) {
 
 	imalloc(&sopts, &dopts);
 
-	log(log_core_calloc_exit, "result: %p", ret);
+	log("core.calloc.exit", "result: %p", ret);
 
 	return ret;
 }
@@ -2245,12 +2225,7 @@ je_realloc(void *ptr, size_t size) {
 	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 	size_t old_usize = 0;
 
-	static log_var_t log_core_realloc_entry = LOG_VAR_INIT(
-	    "core.realloc.entry");
-	static log_var_t log_core_realloc_exit = LOG_VAR_INIT(
-	    "core.realloc.exit");
-
-	log(log_core_realloc_entry, "ptr: %p, size: %zu\n", ptr, size);
+	log("core.realloc.entry", "ptr: %p, size: %zu\n", ptr, size);
 
 	if (unlikely(size == 0)) {
 		if (ptr != NULL) {
@@ -2265,7 +2240,7 @@ je_realloc(void *ptr, size_t size) {
 			}
 			ifree(tsd, ptr, tcache, true);
 
-			log(log_core_realloc_exit, "result: %p", NULL);
+			log("core.realloc.exit", "result: %p", NULL);
 			return NULL;
 		}
 		size = 1;
@@ -2299,7 +2274,7 @@ je_realloc(void *ptr, size_t size) {
 	} else {
 		/* realloc(NULL, size) is equivalent to malloc(size). */
 		void *ret = je_malloc(size);
-		log(log_core_realloc_exit, "result: %p", ret);
+		log("core.realloc.exit", "result: %p", ret);
 		return ret;
 	}
 
@@ -2322,18 +2297,13 @@ je_realloc(void *ptr, size_t size) {
 	UTRACE(ptr, size, ret);
 	check_entry_exit_locking(tsdn);
 
-	log(log_core_realloc_exit, "result: %p", ret);
+	log("core.realloc.exit", "result: %p", ret);
 	return ret;
 }
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_free(void *ptr) {
-	static log_var_t log_core_free_entry = LOG_VAR_INIT(
-	    "core.free.entry");
-	static log_var_t log_core_free_exit = LOG_VAR_INIT(
-	    "core.free.exit");
-
-	log(log_core_free_entry, "ptr: %p", ptr);
+	log("core.free.entry", "ptr: %p", ptr);
 
 	UTRACE(ptr, 0, 0);
 	if (likely(ptr != NULL)) {
@@ -2364,7 +2334,7 @@ je_free(void *ptr) {
 		}
 		check_entry_exit_locking(tsd_tsdn(tsd));
 	}
-	log(log_core_free_exit, "");
+	log("core.free.exit", "");
 }
 
 /*
@@ -2384,12 +2354,7 @@ je_memalign(size_t alignment, size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
-	static log_var_t log_core_memalign_entry = LOG_VAR_INIT(
-	    "core.memalign.entry");
-	static log_var_t log_core_memalign_exit = LOG_VAR_INIT(
-	    "core.memalign.exit");
-
-	log(log_core_memalign_entry, "alignment: %zu, size: %zu\n", alignment,
+	log("core.memalign.entry", "alignment: %zu, size: %zu\n", alignment,
 	    size);
 
 	static_opts_init(&sopts);
@@ -2410,7 +2375,7 @@ je_memalign(size_t alignment, size_t size) {
 
 	imalloc(&sopts, &dopts);
 
-	log(log_core_memalign_exit, "result: %p", ret);
+	log("core.memalign.exit", "result: %p", ret);
 	return ret;
 }
 #endif
@@ -2425,12 +2390,7 @@ je_valloc(size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
-	static log_var_t log_core_valloc_entry = LOG_VAR_INIT(
-	    "core.valloc.entry");
-	static log_var_t log_core_valloc_exit = LOG_VAR_INIT(
-	    "core.valloc.exit");
-
-	log(log_core_valloc_entry, "size: %zu\n", size);
+	log("core.valloc.entry", "size: %zu\n", size);
 
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
@@ -2450,7 +2410,7 @@ je_valloc(size_t size) {
 
 	imalloc(&sopts, &dopts);
 
-	log(log_core_valloc_exit, "result: %p\n", ret);
+	log("core.valloc.exit", "result: %p\n", ret);
 	return ret;
 }
 #endif
@@ -2524,12 +2484,7 @@ je_mallocx(size_t size, int flags) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
-	static log_var_t log_core_mallocx_entry = LOG_VAR_INIT(
-	    "core.mallocx.entry");
-	static log_var_t log_core_mallocx_exit = LOG_VAR_INIT(
-	    "core.mallocx.exit");
-
-	log(log_core_mallocx_entry, "size: %zu, flags: %d", size, flags);
+	log("core.mallocx.entry", "size: %zu, flags: %d", size, flags);
 
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
@@ -2565,7 +2520,7 @@ je_mallocx(size_t size, int flags) {
 
 	imalloc(&sopts, &dopts);
 
-	log(log_core_mallocx_exit, "result: %p", ret);
+	log("core.mallocx.exit", "result: %p", ret);
 	return ret;
 }
 
@@ -2646,12 +2601,7 @@ je_rallocx(void *ptr, size_t size, int flags) {
 	arena_t *arena;
 	tcache_t *tcache;
 
-	static log_var_t log_core_rallocx_entry = LOG_VAR_INIT(
-	    "core.rallocx.entry");
-	static log_var_t log_core_rallocx_exit = LOG_VAR_INIT(
-	    "core.rallocx.exit");
-
-	log(log_core_rallocx_entry, "ptr: %p, size: %zu, flags: %d", ptr,
+	log("core.rallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr,
 	    size, flags);
 
 
@@ -2718,7 +2668,7 @@ je_rallocx(void *ptr, size_t size, int flags) {
 	UTRACE(ptr, size, p);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	log(log_core_rallocx_exit, "result: %p", p);
+	log("core.rallocx.exit", "result: %p", p);
 	return p;
 label_oom:
 	if (config_xmalloc && unlikely(opt_xmalloc)) {
@@ -2728,7 +2678,7 @@ label_oom:
 	UTRACE(ptr, size, 0);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	log(log_core_rallocx_exit, "result: %p", NULL);
+	log("core.rallocx.exit", "result: %p", NULL);
 	return NULL;
 }
 
@@ -2815,15 +2765,9 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	size_t alignment = MALLOCX_ALIGN_GET(flags);
 	bool zero = flags & MALLOCX_ZERO;
 
-	static log_var_t log_core_xallocx_entry = LOG_VAR_INIT(
-	    "core.xallocx.entry");
-	static log_var_t log_core_xallocx_exit = LOG_VAR_INIT(
-	    "core.xallocx.exit");
-
-	log(log_core_xallocx_entry, "ptr: %p, size: %zu, extra: %zu, "
+	log("core.xallocx.entry", "ptr: %p, size: %zu, extra: %zu, "
 	    "flags: %d", ptr, size, extra, flags);
 
-
 	assert(ptr != NULL);
 	assert(size != 0);
 	assert(SIZE_T_MAX - size >= extra);
@@ -2874,7 +2818,7 @@ label_not_resized:
 	UTRACE(ptr, size, ptr);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	log(log_core_xallocx_exit, "result: %zu", usize);
+	log("core.xallocx.exit", "result: %zu", usize);
 	return usize;
 }
 
@@ -2884,12 +2828,7 @@ je_sallocx(const void *ptr, int flags) {
 	size_t usize;
 	tsdn_t *tsdn;
 
-	static log_var_t log_core_sallocx_entry = LOG_VAR_INIT(
-	    "core.sallocx.entry");
-	static log_var_t log_core_sallocx_exit = LOG_VAR_INIT(
-	    "core.sallocx.exit");
-
-	log(log_core_sallocx_entry, "ptr: %p, flags: %d", ptr, flags);
+	log("core.sallocx.entry", "ptr: %p, flags: %d", ptr, flags);
 
 	assert(malloc_initialized() || IS_INITIALIZER);
 	assert(ptr != NULL);
@@ -2906,18 +2845,13 @@ je_sallocx(const void *ptr, int flags) {
 
 	check_entry_exit_locking(tsdn);
 
-	log(log_core_sallocx_exit, "result: %zu", usize);
+	log("core.sallocx.exit", "result: %zu", usize);
 	return usize;
 }
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_dallocx(void *ptr, int flags) {
-	static log_var_t log_core_dallocx_entry = LOG_VAR_INIT(
-	    "core.dallocx.entry");
-	static log_var_t log_core_dallocx_exit = LOG_VAR_INIT(
-	    "core.dallocx.exit");
-
-	log(log_core_dallocx_entry, "ptr: %p, flags: %d", ptr, flags);
+	log("core.dallocx.entry", "ptr: %p, flags: %d", ptr, flags);
 
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
@@ -2957,7 +2891,7 @@ je_dallocx(void *ptr, int flags) {
 	}
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	log(log_core_dallocx_exit, "");
+	log("core.dallocx.exit", "");
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
@@ -2979,12 +2913,7 @@ je_sdallocx(void *ptr, size_t size, int flags) {
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
-	static log_var_t log_core_sdallocx_entry = LOG_VAR_INIT(
-	    "core.sdallocx.entry");
-	static log_var_t log_core_sdallocx_exit = LOG_VAR_INIT(
-	    "core.sdallocx.exit");
-
-	log(log_core_sdallocx_entry, "ptr: %p, size: %zu, flags: %d", ptr,
+	log("core.sdallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr,
 	    size, flags);
 
 	tsd_t *tsd = tsd_fetch();
@@ -3024,7 +2953,7 @@ je_sdallocx(void *ptr, size_t size, int flags) {
 	}
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	log(log_core_sdallocx_exit, "");
+	log("core.sdallocx.exit", "");
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
@@ -3033,17 +2962,10 @@ je_nallocx(size_t size, int flags) {
 	size_t usize;
 	tsdn_t *tsdn;
 
-	static log_var_t log_core_nallocx_entry = LOG_VAR_INIT(
-	    "core.nallocx.entry");
-	static log_var_t log_core_nallocx_exit = LOG_VAR_INIT(
-	    "core.nallocx.exit");
-
-	log(log_core_nallocx_entry, "size: %zu, flags: %d", size, flags);
-
 	assert(size != 0);
 
 	if (unlikely(malloc_init())) {
-		log(log_core_nallocx_exit, "result: %zu", ZU(0));
+		log("core.nallocx.exit", "result: %zu", ZU(0));
 		return 0;
 	}
 
@@ -3052,12 +2974,12 @@ je_nallocx(size_t size, int flags) {
 
 	usize = inallocx(tsdn, size, flags);
 	if (unlikely(usize > LARGE_MAXCLASS)) {
-		log(log_core_nallocx_exit, "result: %zu", ZU(0));
+		log("core.nallocx.exit", "result: %zu", ZU(0));
 		return 0;
 	}
 
 	check_entry_exit_locking(tsdn);
-	log(log_core_nallocx_exit, "result: %zu", usize);
+	log("core.nallocx.exit", "result: %zu", usize);
 	return usize;
 }
 
@@ -3067,15 +2989,10 @@ je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
 	int ret;
 	tsd_t *tsd;
 
-	static log_var_t log_core_mallctl_entry = LOG_VAR_INIT(
-	    "core.mallctl.entry");
-	static log_var_t log_core_mallctl_exit = LOG_VAR_INIT(
-	    "core.mallctl.exit");
-
-	log(log_core_mallctl_entry, "name: %s", name);
+	log("core.mallctl.entry", "name: %s", name);
 
 	if (unlikely(malloc_init())) {
-		log(log_core_mallctl_exit, "result: %d", EAGAIN);
+		log("core.mallctl.exit", "result: %d", EAGAIN);
 		return EAGAIN;
 	}
 
@@ -3084,7 +3001,7 @@ je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
 	ret = ctl_byname(tsd, name, oldp, oldlenp, newp, newlen);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	log(log_core_mallctl_exit, "result: %d", ret);
+	log("core.mallctl.exit", "result: %d", ret);
 	return ret;
 }
 
@@ -3092,15 +3009,10 @@ JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) {
 	int ret;
 
-	static log_var_t log_core_mallctlnametomib_entry = LOG_VAR_INIT(
-	    "core.mallctlnametomib.entry");
-	static log_var_t log_core_mallctlnametomib_exit = LOG_VAR_INIT(
-	    "core.mallctlnametomib.exit");
-
-	log(log_core_mallctlnametomib_entry, "name: %s", name);
+	log("core.mallctlnametomib.entry", "name: %s", name);
 
 	if (unlikely(malloc_init())) {
-		log(log_core_mallctlnametomib_exit, "result: %d", EAGAIN);
+		log("core.mallctlnametomib.exit", "result: %d", EAGAIN);
 		return EAGAIN;
 	}
 
@@ -3109,7 +3021,7 @@ je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) {
 	ret = ctl_nametomib(tsd, name, mibp, miblenp);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	log(log_core_mallctlnametomib_exit, "result: %d", ret);
+	log("core.mallctlnametomib.exit", "result: %d", ret);
 	return ret;
 }
 
@@ -3119,16 +3031,10 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	int ret;
 	tsd_t *tsd;
 
-	static log_var_t log_core_mallctlbymib_entry = LOG_VAR_INIT(
-	    "core.mallctlbymib.entry");
-	static log_var_t log_core_mallctlbymib_exit = LOG_VAR_INIT(
-	    "core.mallctlbymib.exit");
-
-	log(log_core_mallctlbymib_entry, "");
-
+	log("core.mallctlbymib.entry", "");
 
 	if (unlikely(malloc_init())) {
-		log(log_core_mallctlbymib_exit, "result: %d", EAGAIN);
+		log("core.mallctlbymib.exit", "result: %d", EAGAIN);
 		return EAGAIN;
 	}
 
@@ -3136,7 +3042,7 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	check_entry_exit_locking(tsd_tsdn(tsd));
 	ret = ctl_bymib(tsd, mib, miblen, oldp, oldlenp, newp, newlen);
 	check_entry_exit_locking(tsd_tsdn(tsd));
-	log(log_core_mallctlbymib_exit, "result: %d", ret);
+	log("core.mallctlbymib.exit", "result: %d", ret);
 	return ret;
 }
 
@@ -3145,18 +3051,13 @@ je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
     const char *opts) {
 	tsdn_t *tsdn;
 
-	static log_var_t log_core_malloc_stats_print_entry = LOG_VAR_INIT(
-	    "core.malloc_stats_print.entry");
-	static log_var_t log_core_malloc_stats_print_exit = LOG_VAR_INIT(
-	    "core.malloc_stats_print.exit");
-
-	log(log_core_malloc_stats_print_entry, "");
+	log("core.malloc_stats_print.entry", "");
 
 	tsdn = tsdn_fetch();
 	check_entry_exit_locking(tsdn);
 	stats_print(write_cb, cbopaque, opts);
 	check_entry_exit_locking(tsdn);
-	log(log_core_malloc_stats_print_exit, "");
+	log("core.malloc_stats_print.exit", "");
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
@@ -3164,12 +3065,7 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) {
 	size_t ret;
 	tsdn_t *tsdn;
 
-	static log_var_t log_core_malloc_usable_size_entry = LOG_VAR_INIT(
-	    "core.malloc_usable_size.entry");
-	static log_var_t log_core_malloc_usable_size_exit = LOG_VAR_INIT(
-	    "core.malloc_usable_size.exit");
-
-	log(log_core_malloc_usable_size_entry, "ptr: %p", ptr);
+	log("core.malloc_usable_size.entry", "ptr: %p", ptr);
 
 	assert(malloc_initialized() || IS_INITIALIZER);
 
@@ -3188,7 +3084,7 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) {
 	}
 
 	check_entry_exit_locking(tsdn);
-	log(log_core_malloc_usable_size_exit, "result: %zu", ret);
+	log("core.malloc_usable_size.exit", "result: %zu", ret);
 	return ret;
 }
 
diff --git a/test/unit/log.c b/test/unit/log.c
index 053fea41..3c1a208c 100644
--- a/test/unit/log.c
+++ b/test/unit/log.c
@@ -176,8 +176,7 @@ TEST_END
  */
 TEST_BEGIN(test_log_only_format_string) {
 	if (false) {
-		static log_var_t l = LOG_VAR_INIT("str");
-		log(l, "No arguments follow this format string.");
+		log("log_str", "No arguments follow this format string.");
 	}
 }
 TEST_END

From 7c22ea7a93f16c90f49de8ee226e3bcd1521c93e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 24 Jul 2017 12:56:02 -0700
Subject: [PATCH 0964/2608] Only run test/integration/sdallocx non-reentrantly.

This is a temporary workaround until we add some beefier CI machines.  Right
now, we're seeing too many OOMs for this to be useful.
---
 test/integration/sdallocx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/integration/sdallocx.c b/test/integration/sdallocx.c
index e7ea1d82..ca014485 100644
--- a/test/integration/sdallocx.c
+++ b/test/integration/sdallocx.c
@@ -49,7 +49,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
+	return test_no_reentrancy(
 	    test_basic,
 	    test_alignment_and_size);
 }

From 2d2fa72647e0e535088793a0335d0294277d2f09 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 28 Jul 2017 13:01:15 -0700
Subject: [PATCH 0965/2608] Filter out "newImpl" from profiling output.

---
 bin/jeprof.in | 1 +
 1 file changed, 1 insertion(+)

diff --git a/bin/jeprof.in b/bin/jeprof.in
index e6f4af4b..1bbc51ee 100644
--- a/bin/jeprof.in
+++ b/bin/jeprof.in
@@ -2895,6 +2895,7 @@ sub RemoveUninterestingFrames {
     foreach my $name ('@JEMALLOC_PREFIX@calloc',
                       'cfree',
                       '@JEMALLOC_PREFIX@malloc',
+                      'newImpl',
                       '@JEMALLOC_PREFIX@free',
                       '@JEMALLOC_PREFIX@memalign',
                       '@JEMALLOC_PREFIX@posix_memalign',

From 3800e55a2c6f4ffb03242db06437ad371db4ccd8 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 31 Jul 2017 13:01:07 -0700
Subject: [PATCH 0966/2608] Bypass extent_alloc_wrapper_hard for
 no_move_expand.

When retain is enabled, we should not attempt mmap for in-place expansion
(large_ralloc_no_move), because it's virtually impossible to succeed, and causes
unnecessary syscalls (which can cause lock contention under load).
---
 src/extent.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/extent.c b/src/extent.c
index fa45c84d..f464de4a 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1296,6 +1296,15 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 	extent_t *extent = extent_alloc_retained(tsdn, arena, r_extent_hooks,
 	    new_addr, size, pad, alignment, slab, szind, zero, commit);
 	if (extent == NULL) {
+		if (opt_retain && new_addr != NULL) {
+			/*
+			 * When retain is enabled and new_addr is set, we do not
+			 * attempt extent_alloc_wrapper_hard which does mmap
+			 * that is very unlikely to succeed (unless it happens
+			 * to be at the end).
+			 */
+			return NULL;
+		}
 		extent = extent_alloc_wrapper_hard(tsdn, arena, r_extent_hooks,
 		    new_addr, size, pad, alignment, slab, szind, zero, commit);
 	}

From 9a39b23c9c823e8157e2e6850014fa67c09f9351 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 31 Jul 2017 15:17:57 -0700
Subject: [PATCH 0967/2608] Remove a redundant
 '--with-malloc-conf=tcache:false' from gen_run_tests.py

This is already tested via its inclusion in possible_malloc_conf_opts.
---
 scripts/gen_run_tests.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/scripts/gen_run_tests.py b/scripts/gen_run_tests.py
index ddf21533..bf19c2c9 100755
--- a/scripts/gen_run_tests.py
+++ b/scripts/gen_run_tests.py
@@ -22,7 +22,6 @@ possible_config_opts = [
     '--enable-debug',
     '--enable-prof',
     '--disable-stats',
-    '--with-malloc-conf=tcache:false',
 ]
 possible_malloc_conf_opts = [
     'tcache:false',

From 1ab2ab294c8f29a6f314f3ff30fbf4cdb2f01af6 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 31 Jul 2017 14:35:33 -0700
Subject: [PATCH 0968/2608] Only read szind if ptr is not paged aligned in
 sdallocx.

If ptr is not page aligned, we know the allocation was not sampled. In this case
use the size passed into sdallocx directly w/o accessing rtree.  This improve
sdallocx efficiency in the common case (not sampled && small allocation).
---
 src/jemalloc.c | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index ed470520..4c73ba4a 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2194,17 +2194,37 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	alloc_ctx_t alloc_ctx, *ctx;
-	if (config_prof && opt_prof) {
+	if (!config_cache_oblivious && ((uintptr_t)ptr & PAGE_MASK) != 0) {
+		/*
+		 * When cache_oblivious is disabled and ptr is not page aligned,
+		 * the allocation was not sampled -- usize can be used to
+		 * determine szind directly.
+		 */
+		alloc_ctx.szind = sz_size2index(usize);
+		alloc_ctx.slab = true;
+		ctx = &alloc_ctx;
+		if (config_debug) {
+			alloc_ctx_t dbg_ctx;
+			rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
+			rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree,
+			    rtree_ctx, (uintptr_t)ptr, true, &dbg_ctx.szind,
+			    &dbg_ctx.slab);
+			assert(dbg_ctx.szind == alloc_ctx.szind);
+			assert(dbg_ctx.slab == alloc_ctx.slab);
+		}
+	} else if (config_prof && opt_prof) {
 		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
 		rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
 		    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
 		assert(alloc_ctx.szind == sz_size2index(usize));
 		ctx = &alloc_ctx;
-		prof_free(tsd, ptr, usize, ctx);
 	} else {
 		ctx = NULL;
 	}
 
+	if (config_prof && opt_prof) {
+		prof_free(tsd, ptr, usize, ctx);
+	}
 	if (config_stats) {
 		*tsd_thread_deallocatedp_get(tsd) += usize;
 	}

From 048c6679cd0ef1500d0609dce48fcd823d15d93b Mon Sep 17 00:00:00 2001
From: Ryan Libby <rlibby@FreeBSD.org>
Date: Mon, 7 Aug 2017 22:00:22 -0700
Subject: [PATCH 0969/2608] Remove external linkage for spin_adaptive

The external linkage for spin_adaptive was not used, and the inline
declaration of spin_adaptive that was used caused a probem on FreeBSD
where CPU_SPINWAIT is implemented as a call to a static procedure for
x86 architectures.
---
 Makefile.in                                            | 1 -
 include/jemalloc/internal/spin.h                       | 8 +-------
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj         | 1 -
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters | 3 ---
 src/spin.c                                             | 4 ----
 5 files changed, 1 insertion(+), 16 deletions(-)
 delete mode 100644 src/spin.c

diff --git a/Makefile.in b/Makefile.in
index 6e3424fe..0698633b 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -112,7 +112,6 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/prof.c \
 	$(srcroot)src/rtree.c \
 	$(srcroot)src/stats.c \
-	$(srcroot)src/spin.c \
 	$(srcroot)src/sz.c \
 	$(srcroot)src/tcache.c \
 	$(srcroot)src/ticker.c \
diff --git a/include/jemalloc/internal/spin.h b/include/jemalloc/internal/spin.h
index e2afc98c..aded0fcc 100644
--- a/include/jemalloc/internal/spin.h
+++ b/include/jemalloc/internal/spin.h
@@ -1,19 +1,13 @@
 #ifndef JEMALLOC_INTERNAL_SPIN_H
 #define JEMALLOC_INTERNAL_SPIN_H
 
-#ifdef JEMALLOC_SPIN_C_
-#  define SPIN_INLINE extern inline
-#else
-#  define SPIN_INLINE inline
-#endif
-
 #define SPIN_INITIALIZER {0U}
 
 typedef struct {
 	unsigned iteration;
 } spin_t;
 
-SPIN_INLINE void
+static inline void
 spin_adaptive(spin_t *spin) {
 	volatile uint32_t i;
 
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 2addd295..97f892e1 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -56,7 +56,6 @@
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
-    <ClCompile Include="..\..\..\..\src\spin.c" />
     <ClCompile Include="..\..\..\..\src\stats.c" />
     <ClCompile Include="..\..\..\..\src\sz.c" />
     <ClCompile Include="..\..\..\..\src\tcache.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 4edf09b4..d2de135b 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -70,9 +70,6 @@
     <ClCompile Include="..\..\..\..\src\rtree.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\spin.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\stats.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/spin.c b/src/spin.c
deleted file mode 100644
index 24372c26..00000000
--- a/src/spin.c
+++ /dev/null
@@ -1,4 +0,0 @@
-#define JEMALLOC_SPIN_C_
-#include "jemalloc/internal/jemalloc_preamble.h"
-
-#include "jemalloc/internal/spin.h"

From d157864027562dc17475edfd1bc6dce559b7ac4b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 4 Aug 2017 16:35:43 -0700
Subject: [PATCH 0970/2608] Filter out "void *newImpl" in prof output.

---
 bin/jeprof.in | 1 +
 1 file changed, 1 insertion(+)

diff --git a/bin/jeprof.in b/bin/jeprof.in
index 1bbc51ee..588c6b43 100644
--- a/bin/jeprof.in
+++ b/bin/jeprof.in
@@ -2896,6 +2896,7 @@ sub RemoveUninterestingFrames {
                       'cfree',
                       '@JEMALLOC_PREFIX@malloc',
                       'newImpl',
+                      'void* newImpl',
                       '@JEMALLOC_PREFIX@free',
                       '@JEMALLOC_PREFIX@memalign',
                       '@JEMALLOC_PREFIX@posix_memalign',

From 8fdd9a579779b84d6af27f94c295f82a4df8e5be Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 10 Aug 2017 13:14:26 -0700
Subject: [PATCH 0971/2608] Implement opt.metadata_thp

This option enables transparent huge page for base allocators (require
MADV_HUGEPAGE support).
---
 configure.ac                                  |  3 ++
 doc/jemalloc.xml.in                           | 17 +++++-
 include/jemalloc/internal/base_externs.h      |  2 +
 include/jemalloc/internal/base_types.h        |  2 +
 .../internal/jemalloc_internal_defs.h.in      |  6 +++
 include/jemalloc/internal/pages.h             |  3 ++
 src/base.c                                    | 43 ++++++++++-----
 src/ctl.c                                     |  3 ++
 src/jemalloc.c                                |  1 +
 src/pages.c                                   | 53 ++++++++++++++++++-
 src/stats.c                                   |  1 +
 test/unit/mallctl.c                           |  1 +
 12 files changed, 118 insertions(+), 17 deletions(-)

diff --git a/configure.ac b/configure.ac
index ba0409a5..e1a7343f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1824,6 +1824,9 @@ if test "x${je_cv_madvise}" = "xyes" ; then
 	madvise((void *)0, 0, MADV_HUGEPAGE);
 	madvise((void *)0, 0, MADV_NOHUGEPAGE);
 ], [je_cv_thp])
+  if test "x${je_cv_thp}" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_HAVE_MADVISE_HUGE], [ ])
+  fi
 fi
 
 dnl Enable transparent huge page support by default.
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 21e401ac..f1712f05 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -916,6 +916,18 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         </para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.metadata_thp">
+        <term>
+          <mallctl>opt.metadata_thp</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>If true, allow jemalloc to use transparent huge page
+        (THP) for internal metadata (see <link
+        linkend="stats.metadata">stats.metadata</link> for details).  This
+        option is disabled by default.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="opt.retain">
         <term>
           <mallctl>opt.retain</mallctl>
@@ -2187,7 +2199,10 @@ struct extent_hooks_s {
         metadata structures (see <link
         linkend="stats.arenas.i.base"><mallctl>stats.arenas.&lt;i&gt;.base</mallctl></link>)
         and internal allocations (see <link
-        linkend="stats.arenas.i.internal"><mallctl>stats.arenas.&lt;i&gt;.internal</mallctl></link>).</para></listitem>
+        linkend="stats.arenas.i.internal"><mallctl>stats.arenas.&lt;i&gt;.internal</mallctl></link>).
+        Transparent huge page (enabled with <link
+        linkend="opt.metadata_thp">opt.metadata_thp</link>) usage is not
+        considered.</para></listitem>
       </varlistentry>
 
       <varlistentry id="stats.resident">
diff --git a/include/jemalloc/internal/base_externs.h b/include/jemalloc/internal/base_externs.h
index a4fd5ac7..a5cb8a8d 100644
--- a/include/jemalloc/internal/base_externs.h
+++ b/include/jemalloc/internal/base_externs.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_BASE_EXTERNS_H
 #define JEMALLOC_INTERNAL_BASE_EXTERNS_H
 
+extern bool opt_metadata_thp;
+
 base_t *b0get(void);
 base_t *base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
 void base_delete(tsdn_t *tsdn, base_t *base);
diff --git a/include/jemalloc/internal/base_types.h b/include/jemalloc/internal/base_types.h
index be7ee825..6e710334 100644
--- a/include/jemalloc/internal/base_types.h
+++ b/include/jemalloc/internal/base_types.h
@@ -4,4 +4,6 @@
 typedef struct base_block_s base_block_t;
 typedef struct base_s base_t;
 
+#define METADATA_THP_DEFAULT false
+
 #endif /* JEMALLOC_INTERNAL_BASE_TYPES_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index b73daf04..5fa7f51f 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -260,6 +260,12 @@
 /* Defined if madvise(2) is available. */
 #undef JEMALLOC_HAVE_MADVISE
 
+/*
+ * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE
+ * arguments to madvise(2).
+ */
+#undef JEMALLOC_HAVE_MADVISE_HUGE
+
 /*
  * Methods for purging unused pages differ between operating systems.
  *
diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index 28383b7f..121fff38 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -58,6 +58,9 @@ static const bool pages_can_purge_forced =
 #endif
     ;
 
+/* Whether transparent huge page state is "madvise". */
+extern bool thp_state_madvise;
+
 void *pages_map(void *addr, size_t size, size_t alignment, bool *commit);
 void pages_unmap(void *addr, size_t size);
 bool pages_commit(void *addr, size_t size);
diff --git a/src/base.c b/src/base.c
index 97078b13..99259783 100644
--- a/src/base.c
+++ b/src/base.c
@@ -10,7 +10,9 @@
 /******************************************************************************/
 /* Data. */
 
-static base_t	*b0;
+static base_t *b0;
+
+bool opt_metadata_thp = METADATA_THP_DEFAULT;
 
 /******************************************************************************/
 
@@ -20,19 +22,26 @@ base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size)
 	bool zero = true;
 	bool commit = true;
 
+	/* We use hugepage sizes regardless of opt_metadata_thp. */
 	assert(size == HUGEPAGE_CEILING(size));
-
+	size_t alignment = opt_metadata_thp ? HUGEPAGE : PAGE;
 	if (extent_hooks == &extent_hooks_default) {
-		addr = extent_alloc_mmap(NULL, size, PAGE, &zero, &commit);
+		addr = extent_alloc_mmap(NULL, size, alignment, &zero, &commit);
 	} else {
 		/* No arena context as we are creating new arenas. */
 		tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
 		pre_reentrancy(tsd, NULL);
-		addr = extent_hooks->alloc(extent_hooks, NULL, size, PAGE,
+		addr = extent_hooks->alloc(extent_hooks, NULL, size, alignment,
 		    &zero, &commit, ind);
 		post_reentrancy(tsd);
 	}
 
+	if (addr != NULL && opt_metadata_thp && thp_state_madvise) {
+		assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
+		    (size & HUGEPAGE_MASK) == 0);
+		pages_huge(addr, size);
+	}
+
 	return addr;
 }
 
@@ -51,16 +60,16 @@ base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr,
 	 */
 	if (extent_hooks == &extent_hooks_default) {
 		if (!extent_dalloc_mmap(addr, size)) {
-			return;
+			goto label_done;
 		}
 		if (!pages_decommit(addr, size)) {
-			return;
+			goto label_done;
 		}
 		if (!pages_purge_forced(addr, size)) {
-			return;
+			goto label_done;
 		}
 		if (!pages_purge_lazy(addr, size)) {
-			return;
+			goto label_done;
 		}
 		/* Nothing worked.  This should never happen. */
 		not_reached();
@@ -70,27 +79,33 @@ base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr,
 		if (extent_hooks->dalloc != NULL &&
 		    !extent_hooks->dalloc(extent_hooks, addr, size, true,
 		    ind)) {
-			goto label_done;
+			goto label_post_reentrancy;
 		}
 		if (extent_hooks->decommit != NULL &&
 		    !extent_hooks->decommit(extent_hooks, addr, size, 0, size,
 		    ind)) {
-			goto label_done;
+			goto label_post_reentrancy;
 		}
 		if (extent_hooks->purge_forced != NULL &&
 		    !extent_hooks->purge_forced(extent_hooks, addr, size, 0,
 		    size, ind)) {
-			goto label_done;
+			goto label_post_reentrancy;
 		}
 		if (extent_hooks->purge_lazy != NULL &&
 		    !extent_hooks->purge_lazy(extent_hooks, addr, size, 0, size,
 		    ind)) {
-			goto label_done;
+			goto label_post_reentrancy;
 		}
 		/* Nothing worked.  That's the application's problem. */
-	label_done:
+	label_post_reentrancy:
 		post_reentrancy(tsd);
-		return;
+	}
+label_done:
+	if (opt_metadata_thp && thp_state_madvise) {
+		/* Set NOHUGEPAGE after unmap to avoid kernel defrag. */
+		assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
+		    (size & HUGEPAGE_MASK) == 0);
+		pages_nohuge(addr, size);
 	}
 }
 
diff --git a/src/ctl.c b/src/ctl.c
index 36bc8fb5..c2991036 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -80,6 +80,7 @@ CTL_PROTO(config_utrace)
 CTL_PROTO(config_xmalloc)
 CTL_PROTO(opt_abort)
 CTL_PROTO(opt_abort_conf)
+CTL_PROTO(opt_metadata_thp)
 CTL_PROTO(opt_retain)
 CTL_PROTO(opt_dss)
 CTL_PROTO(opt_narenas)
@@ -274,6 +275,7 @@ static const ctl_named_node_t	config_node[] = {
 static const ctl_named_node_t opt_node[] = {
 	{NAME("abort"),		CTL(opt_abort)},
 	{NAME("abort_conf"),	CTL(opt_abort_conf)},
+	{NAME("metadata_thp"),	CTL(opt_metadata_thp)},
 	{NAME("retain"),	CTL(opt_retain)},
 	{NAME("dss"),		CTL(opt_dss)},
 	{NAME("narenas"),	CTL(opt_narenas)},
@@ -1568,6 +1570,7 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool)
 
 CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
 CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
+CTL_RO_NL_GEN(opt_metadata_thp, opt_metadata_thp, bool)
 CTL_RO_NL_GEN(opt_retain, opt_retain, bool)
 CTL_RO_NL_GEN(opt_dss, opt_dss, const char *)
 CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 4c73ba4a..cbae259d 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1055,6 +1055,7 @@ malloc_conf_init(void) {
 			if (opt_abort_conf && had_conf_error) {
 				malloc_abort_invalid_conf();
 			}
+			CONF_HANDLE_BOOL(opt_metadata_thp, "metadata_thp")
 			CONF_HANDLE_BOOL(opt_retain, "retain")
 			if (strncmp("dss", k, klen) == 0) {
 				int i;
diff --git a/src/pages.c b/src/pages.c
index f8ef2bcb..9561f6de 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -25,6 +25,8 @@ static int	mmap_flags;
 #endif
 static bool	os_overcommits;
 
+bool thp_state_madvise;
+
 /******************************************************************************/
 /*
  * Function prototypes for static functions that are referenced prior to
@@ -291,7 +293,7 @@ pages_huge(void *addr, size_t size) {
 	assert(HUGEPAGE_ADDR2BASE(addr) == addr);
 	assert(HUGEPAGE_CEILING(size) == size);
 
-#ifdef JEMALLOC_THP
+#ifdef JEMALLOC_HAVE_MADVISE_HUGE
 	return (madvise(addr, size, MADV_HUGEPAGE) != 0);
 #else
 	return true;
@@ -303,7 +305,7 @@ pages_nohuge(void *addr, size_t size) {
 	assert(HUGEPAGE_ADDR2BASE(addr) == addr);
 	assert(HUGEPAGE_CEILING(size) == size);
 
-#ifdef JEMALLOC_THP
+#ifdef JEMALLOC_HAVE_MADVISE_HUGE
 	return (madvise(addr, size, MADV_NOHUGEPAGE) != 0);
 #else
 	return false;
@@ -413,6 +415,51 @@ os_overcommits_proc(void) {
 }
 #endif
 
+static void
+init_thp_state(void) {
+#ifndef JEMALLOC_HAVE_MADVISE_HUGE
+	if (opt_metadata_thp && opt_abort) {
+		malloc_write("<jemalloc>: no MADV_HUGEPAGE support\n");
+		abort();
+	}
+	goto label_error;
+#endif
+	static const char madvise_state[] = "always [madvise] never\n";
+	char buf[sizeof(madvise_state)];
+
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
+	int fd = (int)syscall(SYS_open,
+	    "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
+#else
+	int fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
+#endif
+	if (fd == -1) {
+		goto label_error;
+	}
+
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read)
+	ssize_t nread = (ssize_t)syscall(SYS_read, fd, &buf, sizeof(buf));
+#else
+	ssize_t nread = read(fd, &buf, sizeof(buf));
+#endif
+
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
+	syscall(SYS_close, fd);
+#else
+	close(fd);
+#endif
+
+	if (nread < 1) {
+		goto label_error;
+	}
+	if (strncmp(buf, madvise_state, (size_t)nread) == 0) {
+		thp_state_madvise = true;
+		return;
+	}
+label_error:
+	thp_state_madvise = false;
+}
+
 bool
 pages_boot(void) {
 	os_page = os_page_detect();
@@ -441,5 +488,7 @@ pages_boot(void) {
 	os_overcommits = false;
 #endif
 
+	init_thp_state();
+
 	return false;
 }
diff --git a/src/stats.c b/src/stats.c
index 087df767..746cc426 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -802,6 +802,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	}
 	OPT_WRITE_BOOL(abort, ",")
 	OPT_WRITE_BOOL(abort_conf, ",")
+	OPT_WRITE_BOOL(metadata_thp, ",")
 	OPT_WRITE_BOOL(retain, ",")
 	OPT_WRITE_CHAR_P(dss, ",")
 	OPT_WRITE_UNSIGNED(narenas, ",")
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index f6116549..d9fdd058 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -157,6 +157,7 @@ TEST_BEGIN(test_mallctl_opt) {
 } while (0)
 
 	TEST_MALLCTL_OPT(bool, abort, always);
+	TEST_MALLCTL_OPT(bool, metadata_thp, always);
 	TEST_MALLCTL_OPT(bool, retain, always);
 	TEST_MALLCTL_OPT(const char *, dss, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);

From 3ec279ba1c702286b2a7d4ce7aaf48d7905f1c5b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 11 Aug 2017 15:41:52 -0700
Subject: [PATCH 0972/2608] Fix test/unit/pages.

As part of the metadata_thp support, We now have a separate swtich
(JEMALLOC_HAVE_MADVISE_HUGE) for MADV_HUGEPAGE availability.  Use that instead
of JEMALLOC_THP (which doesn't guard pages_huge anymore) in tests.
---
 include/jemalloc/internal/jemalloc_preamble.h.in |  7 +++++++
 src/pages.c                                      | 13 +++++++------
 test/unit/pages.c                                |  2 +-
 3 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index 099f98d8..f6ed731c 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -61,6 +61,13 @@ static const bool have_dss =
     false
 #endif
     ;
+static const bool have_madvise_huge =
+#ifdef JEMALLOC_HAVE_MADVISE_HUGE
+    true
+#else
+    false
+#endif
+    ;
 static const bool config_fill =
 #ifdef JEMALLOC_FILL
     true
diff --git a/src/pages.c b/src/pages.c
index 9561f6de..70f1fd33 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -417,13 +417,14 @@ os_overcommits_proc(void) {
 
 static void
 init_thp_state(void) {
-#ifndef JEMALLOC_HAVE_MADVISE_HUGE
-	if (opt_metadata_thp && opt_abort) {
-		malloc_write("<jemalloc>: no MADV_HUGEPAGE support\n");
-		abort();
+	if (!have_madvise_huge) {
+		if (opt_metadata_thp && opt_abort) {
+			malloc_write("<jemalloc>: no MADV_HUGEPAGE support\n");
+			abort();
+		}
+		goto label_error;
 	}
-	goto label_error;
-#endif
+
 	static const char madvise_state[] = "always [madvise] never\n";
 	char buf[sizeof(madvise_state)];
 
diff --git a/test/unit/pages.c b/test/unit/pages.c
index 67dbb4cd..1a979e62 100644
--- a/test/unit/pages.c
+++ b/test/unit/pages.c
@@ -11,7 +11,7 @@ TEST_BEGIN(test_pages_huge) {
 	assert_ptr_not_null(pages, "Unexpected pages_map() error");
 
 	hugepage = (void *)(ALIGNMENT_CEILING((uintptr_t)pages, HUGEPAGE));
-	assert_b_ne(pages_huge(hugepage, HUGEPAGE), config_thp,
+	assert_b_ne(pages_huge(hugepage, HUGEPAGE), have_madvise_huge,
 	    "Unexpected pages_huge() result");
 	assert_false(pages_nohuge(hugepage, HUGEPAGE),
 	    "Unexpected pages_nohuge() result");

From 8da69b69e6c4cd951832138780ac632e57987b7c Mon Sep 17 00:00:00 2001
From: Faidon Liambotis <paravoid@debian.org>
Date: Mon, 7 Aug 2017 21:51:09 +0300
Subject: [PATCH 0973/2608] Fix support for GNU/kFreeBSD

The configure.ac seciton right now is the same for Linux and kFreeBSD,
which results into an incorrect configuration of e.g. defining
JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY instead of FreeBSD's
JEMALLOC_SYSCTL_VM_OVERCOMMIT.

GNU/kFreeBSD is really a glibc + FreeBSD kernel system, so it needs its
own entry which has a mixture of configuration options from Linux and
FreeBSD.
---
 configure.ac | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index e1a7343f..49b2df15 100644
--- a/configure.ac
+++ b/configure.ac
@@ -567,7 +567,7 @@ case "${host}" in
 	  default_retain="1"
 	fi
 	;;
-  *-*-linux* | *-*-kfreebsd*)
+  *-*-linux*)
 	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
 	JE_APPEND_VS(CPPFLAGS, -D_GNU_SOURCE)
 	abi="elf"
@@ -580,6 +580,15 @@ case "${host}" in
 	  default_retain="1"
 	fi
 	;;
+  *-*-kfreebsd*)
+	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
+	JE_APPEND_VS(CPPFLAGS, -D_GNU_SOURCE)
+	abi="elf"
+	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
+	AC_DEFINE([JEMALLOC_SYSCTL_VM_OVERCOMMIT], [ ])
+	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
+	AC_DEFINE([JEMALLOC_USE_CXX_THROW], [ ])
+	;;
   *-*-netbsd*)
 	AC_MSG_CHECKING([ABI])
         AC_COMPILE_IFELSE([AC_LANG_PROGRAM(

From 82d1a3fb318fb086cd4207ca03dbdd5b0e3bbb26 Mon Sep 17 00:00:00 2001
From: Faidon Liambotis <paravoid@debian.org>
Date: Mon, 7 Aug 2017 21:56:21 +0300
Subject: [PATCH 0974/2608] Add support for m68k, nios2, SH3 architectures

Add minimum alignment for three more architectures, as requested by
Debian users or porters (see Debian bugs #807554, #816236, #863424).
---
 include/jemalloc/internal/jemalloc_internal_types.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_types.h b/include/jemalloc/internal/jemalloc_internal_types.h
index 50f9d001..6b987d6f 100644
--- a/include/jemalloc/internal/jemalloc_internal_types.h
+++ b/include/jemalloc/internal/jemalloc_internal_types.h
@@ -79,9 +79,15 @@ typedef int malloc_cpuid_t;
 #  ifdef __hppa__
 #    define LG_QUANTUM		4
 #  endif
+#  ifdef __m68k__
+#    define LG_QUANTUM		3
+#  endif
 #  ifdef __mips__
 #    define LG_QUANTUM		3
 #  endif
+#  ifdef __nios2__
+#    define LG_QUANTUM		3
+#  endif
 #  ifdef __or1k__
 #    define LG_QUANTUM		3
 #  endif
@@ -94,7 +100,8 @@ typedef int malloc_cpuid_t;
 #  ifdef __s390__
 #    define LG_QUANTUM		4
 #  endif
-#  ifdef __SH4__
+#  if (defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || \
+	defined(__SH4_SINGLE_ONLY__))
 #    define LG_QUANTUM		4
 #  endif
 #  ifdef __tile__

From b0825351d9eb49976164cff969a93877ac11f2c0 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 11 Aug 2017 16:06:51 -0700
Subject: [PATCH 0975/2608] Add missing mallctl unit test for abort_conf.

The abort_conf option was missed from test/unit/mallctl.
---
 test/unit/mallctl.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index d9fdd058..0b14e78f 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -157,6 +157,7 @@ TEST_BEGIN(test_mallctl_opt) {
 } while (0)
 
 	TEST_MALLCTL_OPT(bool, abort, always);
+	TEST_MALLCTL_OPT(bool, abort_conf, always);
 	TEST_MALLCTL_OPT(bool, metadata_thp, always);
 	TEST_MALLCTL_OPT(bool, retain, always);
 	TEST_MALLCTL_OPT(const char *, dss, always);

From f3170baa30654b2f62547fa1ac80707d396e1245 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 10 Aug 2017 14:27:58 -0700
Subject: [PATCH 0976/2608] Pull out caching for a bin into its own file.

This is the first step towards breaking up the tcache and arena (since they
interact primarily at the bin level).  It should also make a future arena
caching implementation more straightforward.
---
 include/jemalloc/internal/arena_externs.h     |  2 +-
 include/jemalloc/internal/cache_bin.h         | 82 ++++++++++++++++++
 .../internal/jemalloc_internal_inlines_a.h    |  8 +-
 include/jemalloc/internal/stats.h             |  1 -
 include/jemalloc/internal/stats_tsd.h         | 12 ---
 include/jemalloc/internal/tcache_externs.h    |  8 +-
 include/jemalloc/internal/tcache_inlines.h    | 85 +++++++------------
 include/jemalloc/internal/tcache_structs.h    | 40 ++-------
 include/jemalloc/internal/tcache_types.h      |  5 --
 src/arena.c                                   |  6 +-
 src/tcache.c                                  | 38 ++++-----
 11 files changed, 148 insertions(+), 139 deletions(-)
 create mode 100644 include/jemalloc/internal/cache_bin.h
 delete mode 100644 include/jemalloc/internal/stats_tsd.h

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index af16d158..4e546c3b 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -50,7 +50,7 @@ void arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
 void arena_reset(tsd_t *tsd, arena_t *arena);
 void arena_destroy(tsd_t *tsd, arena_t *arena);
 void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
-    tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes);
+    cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes);
 void arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info,
     bool zero);
 
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
new file mode 100644
index 00000000..37025b5c
--- /dev/null
+++ b/include/jemalloc/internal/cache_bin.h
@@ -0,0 +1,82 @@
+#ifndef JEMALLOC_INTERNAL_CACHE_BIN_H
+#define JEMALLOC_INTERNAL_CACHE_BIN_H
+
+/*
+ * The count of the number of cached allocations in a bin.  We make this signed
+ * so that negative numbers can encode "invalid" states (e.g. a low water mark
+ * for a bin that has never been filled).
+ */
+typedef int32_t cache_bin_sz_t;
+
+typedef struct cache_bin_stats_s cache_bin_stats_t;
+struct cache_bin_stats_s {
+	/*
+	 * Number of allocation requests that corresponded to the size of this
+	 * bin.
+	 */
+	uint64_t nrequests;
+};
+
+/*
+ * Read-only information associated with each element of tcache_t's tbins array
+ * is stored separately, mainly to reduce memory usage.
+ */
+typedef struct cache_bin_info_s cache_bin_info_t;
+struct cache_bin_info_s {
+	/* Upper limit on ncached. */
+	cache_bin_sz_t ncached_max;
+};
+
+typedef struct cache_bin_s cache_bin_t;
+struct cache_bin_s {
+	/* Min # cached since last GC. */
+	cache_bin_sz_t low_water;
+	/* # of cached objects. */
+	cache_bin_sz_t ncached;
+	/*
+	 * ncached and stats are both modified frequently.  Let's keep them
+	 * close so that they have a higher chance of being on the same
+	 * cacheline, thus less write-backs.
+	 */
+	cache_bin_stats_t tstats;
+	/*
+	 * Stack of available objects.
+	 *
+	 * To make use of adjacent cacheline prefetch, the items in the avail
+	 * stack goes to higher address for newer allocations.  avail points
+	 * just above the available space, which means that
+	 * avail[-ncached, ... -1] are available items and the lowest item will
+	 * be allocated first.
+	 */
+	void **avail;
+};
+
+JEMALLOC_ALWAYS_INLINE void *
+cache_alloc_easy(cache_bin_t *bin, bool *success) {
+	void *ret;
+
+	if (unlikely(bin->ncached == 0)) {
+		bin->low_water = -1;
+		*success = false;
+		return NULL;
+	}
+	/*
+	 * success (instead of ret) should be checked upon the return of this
+	 * function.  We avoid checking (ret == NULL) because there is never a
+	 * null stored on the avail stack (which is unknown to the compiler),
+	 * and eagerly checking ret would cause pipeline stall (waiting for the
+	 * cacheline).
+	 */
+	*success = true;
+	ret = *(bin->avail - bin->ncached);
+	bin->ncached--;
+
+	if (unlikely(bin->ncached < bin->low_water)) {
+		bin->low_water = bin->ncached;
+	}
+
+	return ret;
+
+}
+
+#endif /* JEMALLOC_INTERNAL_CACHE_BIN_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index 24ea4162..5ec35db3 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -106,16 +106,16 @@ decay_ticker_get(tsd_t *tsd, unsigned ind) {
 	return &tdata->decay_ticker;
 }
 
-JEMALLOC_ALWAYS_INLINE tcache_bin_t *
+JEMALLOC_ALWAYS_INLINE cache_bin_t *
 tcache_small_bin_get(tcache_t *tcache, szind_t binind) {
 	assert(binind < NBINS);
-	return &tcache->tbins_small[binind];
+	return &tcache->bins_small[binind];
 }
 
-JEMALLOC_ALWAYS_INLINE tcache_bin_t *
+JEMALLOC_ALWAYS_INLINE cache_bin_t *
 tcache_large_bin_get(tcache_t *tcache, szind_t binind) {
 	assert(binind >= NBINS &&binind < nhbins);
-	return &tcache->tbins_large[binind - NBINS];
+	return &tcache->bins_large[binind - NBINS];
 }
 
 JEMALLOC_ALWAYS_INLINE bool
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index 1198779a..ab872e59 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -5,7 +5,6 @@
 #include "jemalloc/internal/mutex_prof.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/size_classes.h"
-#include "jemalloc/internal/stats_tsd.h"
 
 /*  OPTION(opt,		var_name,	default,	set_value_to) */
 #define STATS_PRINT_OPTIONS						\
diff --git a/include/jemalloc/internal/stats_tsd.h b/include/jemalloc/internal/stats_tsd.h
deleted file mode 100644
index d0c3bbe4..00000000
--- a/include/jemalloc/internal/stats_tsd.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_STATS_TSD_H
-#define JEMALLOC_INTERNAL_STATS_TSD_H
-
-typedef struct tcache_bin_stats_s {
-	/*
-	 * Number of allocation requests that corresponded to the size of this
-	 * bin.
-	 */
-	uint64_t	nrequests;
-} tcache_bin_stats_t;
-
-#endif /* JEMALLOC_INTERNAL_STATS_TSD_H */
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index db3e9c7d..790367bd 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -6,7 +6,7 @@
 extern bool	opt_tcache;
 extern ssize_t	opt_lg_tcache_max;
 
-extern tcache_bin_info_t	*tcache_bin_info;
+extern cache_bin_info_t	*tcache_bin_info;
 
 /*
  * Number of tcache bins.  There are NBINS small-object bins, plus 0 or more
@@ -30,10 +30,10 @@ extern tcaches_t	*tcaches;
 size_t	tcache_salloc(tsdn_t *tsdn, const void *ptr);
 void	tcache_event_hard(tsd_t *tsd, tcache_t *tcache);
 void	*tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
-    tcache_bin_t *tbin, szind_t binind, bool *tcache_success);
-void	tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
+    cache_bin_t *tbin, szind_t binind, bool *tcache_success);
+void	tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
     szind_t binind, unsigned rem);
-void	tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
+void	tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
     unsigned rem, tcache_t *tcache);
 void	tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache,
     arena_t *arena);
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index c55bcd27..d1632d8f 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -37,44 +37,17 @@ tcache_event(tsd_t *tsd, tcache_t *tcache) {
 	}
 }
 
-JEMALLOC_ALWAYS_INLINE void *
-tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success) {
-	void *ret;
-
-	if (unlikely(tbin->ncached == 0)) {
-		tbin->low_water = -1;
-		*tcache_success = false;
-		return NULL;
-	}
-	/*
-	 * tcache_success (instead of ret) should be checked upon the return of
-	 * this function.  We avoid checking (ret == NULL) because there is
-	 * never a null stored on the avail stack (which is unknown to the
-	 * compiler), and eagerly checking ret would cause pipeline stall
-	 * (waiting for the cacheline).
-	 */
-	*tcache_success = true;
-	ret = *(tbin->avail - tbin->ncached);
-	tbin->ncached--;
-
-	if (unlikely((low_water_t)tbin->ncached < tbin->low_water)) {
-		tbin->low_water = tbin->ncached;
-	}
-
-	return ret;
-}
-
 JEMALLOC_ALWAYS_INLINE void *
 tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
     szind_t binind, bool zero, bool slow_path) {
 	void *ret;
-	tcache_bin_t *tbin;
+	cache_bin_t *bin;
 	bool tcache_success;
 	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 
 	assert(binind < NBINS);
-	tbin = tcache_small_bin_get(tcache, binind);
-	ret = tcache_alloc_easy(tbin, &tcache_success);
+	bin = tcache_small_bin_get(tcache, binind);
+	ret = cache_alloc_easy(bin, &tcache_success);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
 		bool tcache_hard_success;
@@ -84,7 +57,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 		}
 
 		ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache,
-		    tbin, binind, &tcache_hard_success);
+		    bin, binind, &tcache_hard_success);
 		if (tcache_hard_success == false) {
 			return NULL;
 		}
@@ -118,7 +91,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 	}
 
 	if (config_stats) {
-		tbin->tstats.nrequests++;
+		bin->tstats.nrequests++;
 	}
 	if (config_prof) {
 		tcache->prof_accumbytes += usize;
@@ -131,12 +104,12 @@ JEMALLOC_ALWAYS_INLINE void *
 tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
     szind_t binind, bool zero, bool slow_path) {
 	void *ret;
-	tcache_bin_t *tbin;
+	cache_bin_t *bin;
 	bool tcache_success;
 
 	assert(binind >= NBINS &&binind < nhbins);
-	tbin = tcache_large_bin_get(tcache, binind);
-	ret = tcache_alloc_easy(tbin, &tcache_success);
+	bin = tcache_large_bin_get(tcache, binind);
+	ret = cache_alloc_easy(bin, &tcache_success);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
 		/*
@@ -176,7 +149,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 		}
 
 		if (config_stats) {
-			tbin->tstats.nrequests++;
+			bin->tstats.nrequests++;
 		}
 		if (config_prof) {
 			tcache->prof_accumbytes += usize;
@@ -190,8 +163,8 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 JEMALLOC_ALWAYS_INLINE void
 tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
     bool slow_path) {
-	tcache_bin_t *tbin;
-	tcache_bin_info_t *tbin_info;
+	cache_bin_t *bin;
+	cache_bin_info_t *bin_info;
 
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SMALL_MAXCLASS);
 
@@ -199,15 +172,15 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 		arena_dalloc_junk_small(ptr, &arena_bin_info[binind]);
 	}
 
-	tbin = tcache_small_bin_get(tcache, binind);
-	tbin_info = &tcache_bin_info[binind];
-	if (unlikely(tbin->ncached == tbin_info->ncached_max)) {
-		tcache_bin_flush_small(tsd, tcache, tbin, binind,
-		    (tbin_info->ncached_max >> 1));
+	bin = tcache_small_bin_get(tcache, binind);
+	bin_info = &tcache_bin_info[binind];
+	if (unlikely(bin->ncached == bin_info->ncached_max)) {
+		tcache_bin_flush_small(tsd, tcache, bin, binind,
+		    (bin_info->ncached_max >> 1));
 	}
-	assert(tbin->ncached < tbin_info->ncached_max);
-	tbin->ncached++;
-	*(tbin->avail - tbin->ncached) = ptr;
+	assert(bin->ncached < bin_info->ncached_max);
+	bin->ncached++;
+	*(bin->avail - bin->ncached) = ptr;
 
 	tcache_event(tsd, tcache);
 }
@@ -215,8 +188,8 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 JEMALLOC_ALWAYS_INLINE void
 tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
     bool slow_path) {
-	tcache_bin_t *tbin;
-	tcache_bin_info_t *tbin_info;
+	cache_bin_t *bin;
+	cache_bin_info_t *bin_info;
 
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr) > SMALL_MAXCLASS);
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_maxclass);
@@ -225,15 +198,15 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 		large_dalloc_junk(ptr, sz_index2size(binind));
 	}
 
-	tbin = tcache_large_bin_get(tcache, binind);
-	tbin_info = &tcache_bin_info[binind];
-	if (unlikely(tbin->ncached == tbin_info->ncached_max)) {
-		tcache_bin_flush_large(tsd, tbin, binind,
-		    (tbin_info->ncached_max >> 1), tcache);
+	bin = tcache_large_bin_get(tcache, binind);
+	bin_info = &tcache_bin_info[binind];
+	if (unlikely(bin->ncached == bin_info->ncached_max)) {
+		tcache_bin_flush_large(tsd, bin, binind,
+		    (bin_info->ncached_max >> 1), tcache);
 	}
-	assert(tbin->ncached < tbin_info->ncached_max);
-	tbin->ncached++;
-	*(tbin->avail - tbin->ncached) = ptr;
+	assert(bin->ncached < bin_info->ncached_max);
+	bin->ncached++;
+	*(bin->avail - bin->ncached) = ptr;
 
 	tcache_event(tsd, tcache);
 }
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index 7eb516fb..ad0fe66c 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -3,54 +3,26 @@
 
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/size_classes.h"
-#include "jemalloc/internal/stats_tsd.h"
+#include "jemalloc/internal/cache_bin.h"
 #include "jemalloc/internal/ticker.h"
 
-/*
- * Read-only information associated with each element of tcache_t's tbins array
- * is stored separately, mainly to reduce memory usage.
- */
-struct tcache_bin_info_s {
-	unsigned	ncached_max;	/* Upper limit on ncached. */
-};
-
-struct tcache_bin_s {
-	low_water_t	low_water;	/* Min # cached since last GC. */
-	uint32_t	ncached;	/* # of cached objects. */
-	/*
-	 * ncached and stats are both modified frequently.  Let's keep them
-	 * close so that they have a higher chance of being on the same
-	 * cacheline, thus less write-backs.
-	 */
-	tcache_bin_stats_t tstats;
-	/*
-	 * To make use of adjacent cacheline prefetch, the items in the avail
-	 * stack goes to higher address for newer allocations.  avail points
-	 * just above the available space, which means that
-	 * avail[-ncached, ... -1] are available items and the lowest item will
-	 * be allocated first.
-	 */
-	void		**avail;	/* Stack of available objects. */
-};
-
 struct tcache_s {
 	/* Data accessed frequently first: prof, ticker and small bins. */
 	uint64_t	prof_accumbytes;/* Cleared after arena_prof_accum(). */
 	ticker_t	gc_ticker;	/* Drives incremental GC. */
 	/*
-	 * The pointer stacks associated with tbins follow as a contiguous
-	 * array.  During tcache initialization, the avail pointer in each
-	 * element of tbins is initialized to point to the proper offset within
-	 * this array.
+	 * The pointer stacks associated with bins follow as a contiguous array.
+	 * During tcache initialization, the avail pointer in each element of
+	 * tbins is initialized to point to the proper offset within this array.
 	 */
-	tcache_bin_t	tbins_small[NBINS];
+	cache_bin_t	bins_small[NBINS];
 	/* Data accessed less often below. */
 	ql_elm(tcache_t) link;		/* Used for aggregating stats. */
 	arena_t		*arena;		/* Associated arena. */
 	szind_t		next_gc_bin;	/* Next bin to GC. */
 	/* For small bins, fill (ncached_max >> lg_fill_div). */
 	uint8_t		lg_fill_div[NBINS];
-	tcache_bin_t	tbins_large[NSIZES-NBINS];
+	cache_bin_t	bins_large[NSIZES-NBINS];
 };
 
 /* Linkage for list of available (previously used) explicit tcache IDs. */
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index 1155d62c..e49bc9d7 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -3,14 +3,9 @@
 
 #include "jemalloc/internal/size_classes.h"
 
-typedef struct tcache_bin_info_s tcache_bin_info_t;
-typedef struct tcache_bin_s tcache_bin_t;
 typedef struct tcache_s tcache_t;
 typedef struct tcaches_s tcaches_t;
 
-/* ncached is cast to this type for comparison. */
-typedef int32_t low_water_t;
-
 /*
  * tcache pointers close to NULL are used to encode state information that is
  * used for two purposes: preventing thread caching on a per thread basis and
diff --git a/src/arena.c b/src/arena.c
index 632fce52..60b482e9 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -307,12 +307,12 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	ql_foreach(tcache, &arena->tcache_ql, link) {
 		szind_t i = 0;
 		for (; i < NBINS; i++) {
-			tcache_bin_t *tbin = tcache_small_bin_get(tcache, i);
+			cache_bin_t *tbin = tcache_small_bin_get(tcache, i);
 			arena_stats_accum_zu(&astats->tcache_bytes,
 			    tbin->ncached * sz_index2size(i));
 		}
 		for (; i < nhbins; i++) {
-			tcache_bin_t *tbin = tcache_large_bin_get(tcache, i);
+			cache_bin_t *tbin = tcache_large_bin_get(tcache, i);
 			arena_stats_accum_zu(&astats->tcache_bytes,
 			    tbin->ncached * sz_index2size(i));
 		}
@@ -1420,7 +1420,7 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin,
 
 void
 arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
-    tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes) {
+    cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes) {
 	unsigned i, nfill;
 	arena_bin_t *bin;
 
diff --git a/src/tcache.c b/src/tcache.c
index 936ef314..7d32d4d5 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -12,7 +12,7 @@
 bool	opt_tcache = true;
 ssize_t	opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
 
-tcache_bin_info_t	*tcache_bin_info;
+cache_bin_info_t	*tcache_bin_info;
 static unsigned		stack_nelms; /* Total stack elms per tcache. */
 
 unsigned		nhbins;
@@ -40,7 +40,7 @@ void
 tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 	szind_t binind = tcache->next_gc_bin;
 
-	tcache_bin_t *tbin;
+	cache_bin_t *tbin;
 	if (binind < NBINS) {
 		tbin = tcache_small_bin_get(tcache, binind);
 	} else {
@@ -58,7 +58,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 			 * Reduce fill count by 2X.  Limit lg_fill_div such that
 			 * the fill count is always at least 1.
 			 */
-			tcache_bin_info_t *tbin_info = &tcache_bin_info[binind];
+			cache_bin_info_t *tbin_info = &tcache_bin_info[binind];
 			if ((tbin_info->ncached_max >>
 			     (tcache->lg_fill_div[binind] + 1)) >= 1) {
 				tcache->lg_fill_div[binind]++;
@@ -86,7 +86,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 
 void *
 tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
-    tcache_bin_t *tbin, szind_t binind, bool *tcache_success) {
+    cache_bin_t *tbin, szind_t binind, bool *tcache_success) {
 	void *ret;
 
 	assert(tcache->arena != NULL);
@@ -95,18 +95,18 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 	if (config_prof) {
 		tcache->prof_accumbytes = 0;
 	}
-	ret = tcache_alloc_easy(tbin, tcache_success);
+	ret = cache_alloc_easy(tbin, tcache_success);
 
 	return ret;
 }
 
 void
-tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
+tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
     szind_t binind, unsigned rem) {
 	bool merged_stats = false;
 
 	assert(binind < NBINS);
-	assert(rem <= tbin->ncached);
+	assert((cache_bin_sz_t)rem <= tbin->ncached);
 
 	arena_t *arena = tcache->arena;
 	assert(arena != NULL);
@@ -180,18 +180,18 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin,
 	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
 	    sizeof(void *));
 	tbin->ncached = rem;
-	if ((low_water_t)tbin->ncached < tbin->low_water) {
+	if (tbin->ncached < tbin->low_water) {
 		tbin->low_water = tbin->ncached;
 	}
 }
 
 void
-tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
+tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
     unsigned rem, tcache_t *tcache) {
 	bool merged_stats = false;
 
 	assert(binind < nhbins);
-	assert(rem <= tbin->ncached);
+	assert((cache_bin_sz_t)rem <= tbin->ncached);
 
 	arena_t *arena = tcache->arena;
 	assert(arena != NULL);
@@ -278,7 +278,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind,
 	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
 	    sizeof(void *));
 	tbin->ncached = rem;
-	if ((low_water_t)tbin->ncached < tbin->low_water) {
+	if (tbin->ncached < tbin->low_water) {
 		tbin->low_water = tbin->ncached;
 	}
 }
@@ -354,8 +354,8 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
 
 	size_t stack_offset = 0;
 	assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
-	memset(tcache->tbins_small, 0, sizeof(tcache_bin_t) * NBINS);
-	memset(tcache->tbins_large, 0, sizeof(tcache_bin_t) * (nhbins - NBINS));
+	memset(tcache->bins_small, 0, sizeof(cache_bin_t) * NBINS);
+	memset(tcache->bins_large, 0, sizeof(cache_bin_t) * (nhbins - NBINS));
 	unsigned i = 0;
 	for (; i < NBINS; i++) {
 		tcache->lg_fill_div[i] = 1;
@@ -450,7 +450,7 @@ tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
 	assert(tcache->arena != NULL);
 
 	for (unsigned i = 0; i < NBINS; i++) {
-		tcache_bin_t *tbin = tcache_small_bin_get(tcache, i);
+		cache_bin_t *tbin = tcache_small_bin_get(tcache, i);
 		tcache_bin_flush_small(tsd, tcache, tbin, i, 0);
 
 		if (config_stats) {
@@ -458,7 +458,7 @@ tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
 		}
 	}
 	for (unsigned i = NBINS; i < nhbins; i++) {
-		tcache_bin_t *tbin = tcache_large_bin_get(tcache, i);
+		cache_bin_t *tbin = tcache_large_bin_get(tcache, i);
 		tcache_bin_flush_large(tsd, tbin, i, 0, tcache);
 
 		if (config_stats) {
@@ -525,7 +525,7 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 	/* Merge and reset tcache stats. */
 	for (i = 0; i < NBINS; i++) {
 		arena_bin_t *bin = &arena->bins[i];
-		tcache_bin_t *tbin = tcache_small_bin_get(tcache, i);
+		cache_bin_t *tbin = tcache_small_bin_get(tcache, i);
 		malloc_mutex_lock(tsdn, &bin->lock);
 		bin->stats.nrequests += tbin->tstats.nrequests;
 		malloc_mutex_unlock(tsdn, &bin->lock);
@@ -533,7 +533,7 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 	}
 
 	for (; i < nhbins; i++) {
-		tcache_bin_t *tbin = tcache_large_bin_get(tcache, i);
+		cache_bin_t *tbin = tcache_large_bin_get(tcache, i);
 		arena_stats_large_nrequests_add(tsdn, &arena->stats, i,
 		    tbin->tstats.nrequests);
 		tbin->tstats.nrequests = 0;
@@ -657,8 +657,8 @@ tcache_boot(tsdn_t *tsdn) {
 	nhbins = sz_size2index(tcache_maxclass) + 1;
 
 	/* Initialize tcache_bin_info. */
-	tcache_bin_info = (tcache_bin_info_t *)base_alloc(tsdn, b0get(), nhbins
-	    * sizeof(tcache_bin_info_t), CACHELINE);
+	tcache_bin_info = (cache_bin_info_t *)base_alloc(tsdn, b0get(), nhbins
+	    * sizeof(cache_bin_info_t), CACHELINE);
 	if (tcache_bin_info == NULL) {
 		return true;
 	}

From 9c0549007dcb64f4ff35d37390a9a6a8d3cea880 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 11 Aug 2017 17:34:21 -0700
Subject: [PATCH 0977/2608] Make arena stats collection go through cache bins.

This eliminates the need for the arena stats code to "know" about tcaches; all
that it needs is a cache_bin_array_descriptor_t to tell it where to find
cache_bins whose stats it should aggregate.
---
 include/jemalloc/internal/arena_structs_b.h | 11 +++---
 include/jemalloc/internal/cache_bin.h       | 34 +++++++++++++++++-
 include/jemalloc/internal/tcache_structs.h  | 39 +++++++++++++++++----
 src/arena.c                                 |  8 ++---
 src/tcache.c                                |  9 +++++
 5 files changed, 84 insertions(+), 17 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index d1fffec1..c4e4310d 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -162,14 +162,15 @@ struct arena_s {
 	arena_stats_t		stats;
 
 	/*
-	 * List of tcaches for extant threads associated with this arena.
-	 * Stats from these are merged incrementally, and at exit if
-	 * opt_stats_print is enabled.
+	 * Lists of tcaches and cache_bin_array_descriptors for extant threads
+	 * associated with this arena.  Stats from these are merged
+	 * incrementally, and at exit if opt_stats_print is enabled.
 	 *
 	 * Synchronization: tcache_ql_mtx.
 	 */
-	ql_head(tcache_t)	tcache_ql;
-	malloc_mutex_t		tcache_ql_mtx;
+	ql_head(tcache_t)			tcache_ql;
+	ql_head(cache_bin_array_descriptor_t)	cache_bin_array_descriptor_ql;
+	malloc_mutex_t				tcache_ql_mtx;
 
 	/* Synchronization: internal. */
 	prof_accum_t		prof_accum;
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 37025b5c..85d9de01 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -1,6 +1,19 @@
 #ifndef JEMALLOC_INTERNAL_CACHE_BIN_H
 #define JEMALLOC_INTERNAL_CACHE_BIN_H
 
+#include "jemalloc/internal/ql.h"
+
+/*
+ * The cache_bins are the mechanism that the tcache and the arena use to
+ * communicate.  The tcache fills from and flushes to the arena by passing a
+ * cache_bin_t to fill/flush.  When the arena needs to pull stats from the
+ * tcaches associated with it, it does so by iterating over its
+ * cache_bin_array_descriptor_t objects and reading out per-bin stats it
+ * contains.  This makes it so that the arena need not know about the existence
+ * of the tcache at all.
+ */
+
+
 /*
  * The count of the number of cached allocations in a bin.  We make this signed
  * so that negative numbers can encode "invalid" states (e.g. a low water mark
@@ -51,6 +64,26 @@ struct cache_bin_s {
 	void **avail;
 };
 
+typedef struct cache_bin_array_descriptor_s cache_bin_array_descriptor_t;
+struct cache_bin_array_descriptor_s {
+	/*
+	 * The arena keeps a list of the cache bins associated with it, for
+	 * stats collection.
+	 */
+	ql_elm(cache_bin_array_descriptor_t) link;
+	/* Pointers to the tcache bins. */
+	cache_bin_t *bins_small;
+	cache_bin_t *bins_large;
+};
+
+static inline void
+cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
+    cache_bin_t *bins_small, cache_bin_t *bins_large) {
+	ql_elm_new(descriptor, link);
+	descriptor->bins_small = bins_small;
+	descriptor->bins_large = bins_large;
+}
+
 JEMALLOC_ALWAYS_INLINE void *
 cache_alloc_easy(cache_bin_t *bin, bool *success) {
 	void *ret;
@@ -76,7 +109,6 @@ cache_alloc_easy(cache_bin_t *bin, bool *success) {
 	}
 
 	return ret;
-
 }
 
 #endif /* JEMALLOC_INTERNAL_CACHE_BIN_H */
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index ad0fe66c..07b73870 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -7,21 +7,46 @@
 #include "jemalloc/internal/ticker.h"
 
 struct tcache_s {
-	/* Data accessed frequently first: prof, ticker and small bins. */
-	uint64_t	prof_accumbytes;/* Cleared after arena_prof_accum(). */
-	ticker_t	gc_ticker;	/* Drives incremental GC. */
+	/*
+	 * To minimize our cache-footprint, we put the frequently accessed data
+	 * together at the start of this struct.
+	 */
+
+	/* Cleared after arena_prof_accum(). */
+	uint64_t	prof_accumbytes;
+	/* Drives incremental GC. */
+	ticker_t	gc_ticker;
 	/*
 	 * The pointer stacks associated with bins follow as a contiguous array.
 	 * During tcache initialization, the avail pointer in each element of
 	 * tbins is initialized to point to the proper offset within this array.
 	 */
 	cache_bin_t	bins_small[NBINS];
-	/* Data accessed less often below. */
-	ql_elm(tcache_t) link;		/* Used for aggregating stats. */
-	arena_t		*arena;		/* Associated arena. */
-	szind_t		next_gc_bin;	/* Next bin to GC. */
+
+	/*
+	 * This data is less hot; we can be a little less careful with our
+	 * footprint here.
+	 */
+	/* Lets us track all the tcaches in an arena. */
+	ql_elm(tcache_t) link;
+	/*
+	 * The descriptor lets the arena find our cache bins without seeing the
+	 * tcache definition.  This enables arenas to aggregate stats across
+	 * tcaches without having a tcache dependency.
+	 */
+	cache_bin_array_descriptor_t cache_bin_array_descriptor;
+
+	/* The arena this tcache is associated with. */
+	arena_t		*arena;
+	/* Next bin to GC. */
+	szind_t		next_gc_bin;
 	/* For small bins, fill (ncached_max >> lg_fill_div). */
 	uint8_t		lg_fill_div[NBINS];
+	/*
+	 * We put the cache bins for large size classes at the end of the
+	 * struct, since some of them might not get used.  This might end up
+	 * letting us avoid touching an extra page if we don't have to.
+	 */
 	cache_bin_t	bins_large[NSIZES-NBINS];
 };
 
diff --git a/src/arena.c b/src/arena.c
index 60b482e9..19aafaf0 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -303,16 +303,16 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	/* tcache_bytes counts currently cached bytes. */
 	atomic_store_zu(&astats->tcache_bytes, 0, ATOMIC_RELAXED);
 	malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
-	tcache_t *tcache;
-	ql_foreach(tcache, &arena->tcache_ql, link) {
+	cache_bin_array_descriptor_t *descriptor;
+	ql_foreach(descriptor, &arena->cache_bin_array_descriptor_ql, link) {
 		szind_t i = 0;
 		for (; i < NBINS; i++) {
-			cache_bin_t *tbin = tcache_small_bin_get(tcache, i);
+			cache_bin_t *tbin = &descriptor->bins_small[i];
 			arena_stats_accum_zu(&astats->tcache_bytes,
 			    tbin->ncached * sz_index2size(i));
 		}
 		for (; i < nhbins; i++) {
-			cache_bin_t *tbin = tcache_large_bin_get(tcache, i);
+			cache_bin_t *tbin = &descriptor->bins_large[i];
 			arena_stats_accum_zu(&astats->tcache_bytes,
 			    tbin->ncached * sz_index2size(i));
 		}
diff --git a/src/tcache.c b/src/tcache.c
index 7d32d4d5..e22f8067 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -291,8 +291,15 @@ tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 	if (config_stats) {
 		/* Link into list of extant tcaches. */
 		malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
+
 		ql_elm_new(tcache, link);
 		ql_tail_insert(&arena->tcache_ql, tcache, link);
+		cache_bin_array_descriptor_init(
+		    &tcache->cache_bin_array_descriptor, tcache->bins_small,
+		    tcache->bins_large);
+		ql_tail_insert(&arena->cache_bin_array_descriptor_ql,
+		    &tcache->cache_bin_array_descriptor, link);
+
 		malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
 	}
 }
@@ -316,6 +323,8 @@ tcache_arena_dissociate(tsdn_t *tsdn, tcache_t *tcache) {
 			assert(in_ql);
 		}
 		ql_remove(&arena->tcache_ql, tcache, link);
+		ql_remove(&arena->cache_bin_array_descriptor_ql,
+		    &tcache->cache_bin_array_descriptor, link);
 		tcache_stats_merge(tsdn, tcache, arena);
 		malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
 	}

From ea91dfa58e11373748f747041c3041f72c9a7658 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 14 Aug 2017 13:32:28 -0700
Subject: [PATCH 0978/2608] Document the ialloc function abbreviations.

In the jemalloc_internal_inlines files, we have a lot of somewhat terse function
names.  This commit adds some documentation to aid in translation.
---
 include/jemalloc/internal/cache_bin.h          |  2 +-
 .../internal/jemalloc_internal_inlines_c.h     | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 85d9de01..9b874398 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -17,7 +17,7 @@
 /*
  * The count of the number of cached allocations in a bin.  We make this signed
  * so that negative numbers can encode "invalid" states (e.g. a low water mark
- * for a bin that has never been filled).
+ * of -1 for a cache that has been depleted).
  */
 typedef int32_t cache_bin_sz_t;
 
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 7ffce6fb..c54fc992 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -5,6 +5,24 @@
 #include "jemalloc/internal/sz.h"
 #include "jemalloc/internal/witness.h"
 
+/*
+ * Translating the names of the 'i' functions:
+ *   Abbreviations used in the first part of the function name (before
+ *   alloc/dalloc) describe what that function accomplishes:
+ *     a: arena (query)
+ *     s: size (query, or sized deallocation)
+ *     e: extent (query)
+ *     p: aligned (allocates)
+ *     vs: size (query, without knowing that the pointer is into the heap)
+ *     r: rallocx implementation
+ *     x: xallocx implementation
+ *   Abbreviations used in the second part of the function name (after
+ *   alloc/dalloc) describe the arguments it takes
+ *     z: whether to return zeroed memory
+ *     t: accepts a tcache_t * parameter
+ *     m: accepts an arena_t * parameter
+ */
+
 JEMALLOC_ALWAYS_INLINE arena_t *
 iaalloc(tsdn_t *tsdn, const void *ptr) {
 	assert(ptr != NULL);

From 47b20bb6544de9cdd4ca7ab870d6ad257c0ce4ff Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 24 Aug 2017 14:29:28 -0700
Subject: [PATCH 0979/2608] Change opt.metadata_thp to [disabled,auto,always].

To avoid the high RSS caused by THP + low usage arena (i.e. THP becomes a
significant percentage), added a new "auto" option which will only start using
THP after a base allocator used up the first THP region.  Starting from the
second hugepage (in a single arena), "auto" behaves the same as "always",
i.e. madvise hugepage right away.
---
 doc/jemalloc.xml.in                      | 12 ++++---
 include/jemalloc/internal/base_externs.h |  3 +-
 include/jemalloc/internal/base_inlines.h |  4 +++
 include/jemalloc/internal/base_types.h   | 17 ++++++++-
 src/base.c                               | 46 +++++++++++++++++-------
 src/ctl.c                                |  3 +-
 src/jemalloc.c                           | 18 +++++++++-
 src/pages.c                              |  2 +-
 src/stats.c                              |  2 +-
 test/unit/mallctl.c                      |  2 +-
 10 files changed, 84 insertions(+), 25 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index f1712f05..0c956040 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -919,13 +919,15 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
       <varlistentry id="opt.metadata_thp">
         <term>
           <mallctl>opt.metadata_thp</mallctl>
-          (<type>bool</type>)
+          (<type>const char *</type>)
           <literal>r-</literal>
         </term>
-        <listitem><para>If true, allow jemalloc to use transparent huge page
-        (THP) for internal metadata (see <link
-        linkend="stats.metadata">stats.metadata</link> for details).  This
-        option is disabled by default.</para></listitem>
+        <listitem><para>Controls whether to allow jemalloc to use transparent
+        huge page (THP) for internal metadata (see <link
+        linkend="stats.metadata">stats.metadata</link>).  <quote>always</quote>
+        allows such usage.  <quote>auto</quote> uses no THP initially, but may
+        begin to do so when metadata usage reaches certain level.  The default
+        is <quote>disabled</quote>.</para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.retain">
diff --git a/include/jemalloc/internal/base_externs.h b/include/jemalloc/internal/base_externs.h
index a5cb8a8d..6cd11877 100644
--- a/include/jemalloc/internal/base_externs.h
+++ b/include/jemalloc/internal/base_externs.h
@@ -1,7 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_BASE_EXTERNS_H
 #define JEMALLOC_INTERNAL_BASE_EXTERNS_H
 
-extern bool opt_metadata_thp;
+extern metadata_thp_mode_t opt_metadata_thp;
+extern const char *metadata_thp_mode_names[];
 
 base_t *b0get(void);
 base_t *base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
diff --git a/include/jemalloc/internal/base_inlines.h b/include/jemalloc/internal/base_inlines.h
index 931560bf..aec0e2e1 100644
--- a/include/jemalloc/internal/base_inlines.h
+++ b/include/jemalloc/internal/base_inlines.h
@@ -6,4 +6,8 @@ base_ind_get(const base_t *base) {
 	return base->ind;
 }
 
+static inline bool
+metadata_thp_enabled(void) {
+	return (opt_metadata_thp != metadata_thp_disabled);
+}
 #endif /* JEMALLOC_INTERNAL_BASE_INLINES_H */
diff --git a/include/jemalloc/internal/base_types.h b/include/jemalloc/internal/base_types.h
index 6e710334..97e38a97 100644
--- a/include/jemalloc/internal/base_types.h
+++ b/include/jemalloc/internal/base_types.h
@@ -4,6 +4,21 @@
 typedef struct base_block_s base_block_t;
 typedef struct base_s base_t;
 
-#define METADATA_THP_DEFAULT false
+#define METADATA_THP_DEFAULT metadata_thp_disabled
+
+typedef enum {
+	metadata_thp_disabled   = 0,
+	/*
+	 * Lazily enable hugepage for metadata. To avoid high RSS caused by THP
+	 * + low usage arena (i.e. THP becomes a significant percentage), the
+	 * "auto" option only starts using THP after a base allocator used up
+	 * the first THP region.  Starting from the second hugepage (in a single
+	 * arena), "auto" behaves the same as "always", i.e. madvise hugepage
+	 * right away.
+	 */
+	metadata_thp_auto       = 1,
+	metadata_thp_always     = 2,
+	metadata_thp_mode_limit = 3
+} metadata_thp_mode_t;
 
 #endif /* JEMALLOC_INTERNAL_BASE_TYPES_H */
diff --git a/src/base.c b/src/base.c
index 99259783..9cb02b63 100644
--- a/src/base.c
+++ b/src/base.c
@@ -12,7 +12,13 @@
 
 static base_t *b0;
 
-bool opt_metadata_thp = METADATA_THP_DEFAULT;
+metadata_thp_mode_t opt_metadata_thp = METADATA_THP_DEFAULT;
+
+const char *metadata_thp_mode_names[] = {
+	"disabled",
+	"auto",
+	"always"
+};
 
 /******************************************************************************/
 
@@ -24,7 +30,7 @@ base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size)
 
 	/* We use hugepage sizes regardless of opt_metadata_thp. */
 	assert(size == HUGEPAGE_CEILING(size));
-	size_t alignment = opt_metadata_thp ? HUGEPAGE : PAGE;
+	size_t alignment = metadata_thp_enabled() ? HUGEPAGE : PAGE;
 	if (extent_hooks == &extent_hooks_default) {
 		addr = extent_alloc_mmap(NULL, size, alignment, &zero, &commit);
 	} else {
@@ -36,12 +42,6 @@ base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size)
 		post_reentrancy(tsd);
 	}
 
-	if (addr != NULL && opt_metadata_thp && thp_state_madvise) {
-		assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
-		    (size & HUGEPAGE_MASK) == 0);
-		pages_huge(addr, size);
-	}
-
 	return addr;
 }
 
@@ -101,7 +101,7 @@ base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr,
 		post_reentrancy(tsd);
 	}
 label_done:
-	if (opt_metadata_thp && thp_state_madvise) {
+	if (metadata_thp_enabled() && thp_state_madvise) {
 		/* Set NOHUGEPAGE after unmap to avoid kernel defrag. */
 		assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
 		    (size & HUGEPAGE_MASK) == 0);
@@ -181,8 +181,8 @@ base_extent_bump_alloc(tsdn_t *tsdn, base_t *base, extent_t *extent,
  * On success a pointer to the initialized base_block_t header is returned.
  */
 static base_block_t *
-base_block_alloc(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind,
-    pszind_t *pind_last, size_t *extent_sn_next, size_t size,
+base_block_alloc(tsdn_t *tsdn, base_t *base, extent_hooks_t *extent_hooks,
+    unsigned ind, pszind_t *pind_last, size_t *extent_sn_next, size_t size,
     size_t alignment) {
 	alignment = ALIGNMENT_CEILING(alignment, QUANTUM);
 	size_t usize = ALIGNMENT_CEILING(size, alignment);
@@ -208,6 +208,26 @@ base_block_alloc(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind,
 	if (block == NULL) {
 		return NULL;
 	}
+
+	if (metadata_thp_enabled() && thp_state_madvise) {
+		void *addr = (void *)block;
+		assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
+		    (block_size & HUGEPAGE_MASK) == 0);
+		/* base == NULL indicates this is a new base. */
+		if (base != NULL || opt_metadata_thp == metadata_thp_always) {
+			/* Use hugepage for the new block. */
+			pages_huge(addr, block_size);
+		}
+		if (base != NULL && opt_metadata_thp == metadata_thp_auto) {
+			/* Make the first block THP lazily. */
+			base_block_t *first_block = base->blocks;
+			if (first_block->next == NULL) {
+				assert((first_block->size & HUGEPAGE_MASK) == 0);
+				pages_huge(first_block, first_block->size);
+			}
+		}
+	}
+
 	*pind_last = sz_psz2ind(block_size);
 	block->size = block_size;
 	block->next = NULL;
@@ -231,7 +251,7 @@ base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
 	 * called.
 	 */
 	malloc_mutex_unlock(tsdn, &base->mtx);
-	base_block_t *block = base_block_alloc(tsdn, extent_hooks,
+	base_block_t *block = base_block_alloc(tsdn, base, extent_hooks,
 	    base_ind_get(base), &base->pind_last, &base->extent_sn_next, size,
 	    alignment);
 	malloc_mutex_lock(tsdn, &base->mtx);
@@ -259,7 +279,7 @@ base_t *
 base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	pszind_t pind_last = 0;
 	size_t extent_sn_next = 0;
-	base_block_t *block = base_block_alloc(tsdn, extent_hooks, ind,
+	base_block_t *block = base_block_alloc(tsdn, NULL, extent_hooks, ind,
 	    &pind_last, &extent_sn_next, sizeof(base_t), QUANTUM);
 	if (block == NULL) {
 		return NULL;
diff --git a/src/ctl.c b/src/ctl.c
index c2991036..ace10b02 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1570,7 +1570,8 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool)
 
 CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
 CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
-CTL_RO_NL_GEN(opt_metadata_thp, opt_metadata_thp, bool)
+CTL_RO_NL_GEN(opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp],
+    const char *)
 CTL_RO_NL_GEN(opt_retain, opt_retain, bool)
 CTL_RO_NL_GEN(opt_dss, opt_dss, const char *)
 CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index cbae259d..3c0ea7d4 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1055,7 +1055,23 @@ malloc_conf_init(void) {
 			if (opt_abort_conf && had_conf_error) {
 				malloc_abort_invalid_conf();
 			}
-			CONF_HANDLE_BOOL(opt_metadata_thp, "metadata_thp")
+			if (strncmp("metadata_thp", k, klen) == 0) {
+				int i;
+				bool match = false;
+				for (i = 0; i < metadata_thp_mode_limit; i++) {
+					if (strncmp(metadata_thp_mode_names[i],
+					    v, vlen) == 0) {
+						opt_metadata_thp = i;
+						match = true;
+						break;
+					}
+				}
+				if (!match) {
+					malloc_conf_error("Invalid conf value",
+					    k, klen, v, vlen);
+				}
+				continue;
+			}
 			CONF_HANDLE_BOOL(opt_retain, "retain")
 			if (strncmp("dss", k, klen) == 0) {
 				int i;
diff --git a/src/pages.c b/src/pages.c
index 70f1fd33..4ca3107d 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -418,7 +418,7 @@ os_overcommits_proc(void) {
 static void
 init_thp_state(void) {
 	if (!have_madvise_huge) {
-		if (opt_metadata_thp && opt_abort) {
+		if (metadata_thp_enabled() && opt_abort) {
 			malloc_write("<jemalloc>: no MADV_HUGEPAGE support\n");
 			abort();
 		}
diff --git a/src/stats.c b/src/stats.c
index 746cc426..e1a3f8cf 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -802,11 +802,11 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	}
 	OPT_WRITE_BOOL(abort, ",")
 	OPT_WRITE_BOOL(abort_conf, ",")
-	OPT_WRITE_BOOL(metadata_thp, ",")
 	OPT_WRITE_BOOL(retain, ",")
 	OPT_WRITE_CHAR_P(dss, ",")
 	OPT_WRITE_UNSIGNED(narenas, ",")
 	OPT_WRITE_CHAR_P(percpu_arena, ",")
+	OPT_WRITE_CHAR_P(metadata_thp, ",")
 	OPT_WRITE_BOOL_MUTABLE(background_thread, background_thread, ",")
 	OPT_WRITE_SSIZE_T_MUTABLE(dirty_decay_ms, arenas.dirty_decay_ms, ",")
 	OPT_WRITE_SSIZE_T_MUTABLE(muzzy_decay_ms, arenas.muzzy_decay_ms, ",")
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 0b14e78f..5612cce5 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -158,7 +158,7 @@ TEST_BEGIN(test_mallctl_opt) {
 
 	TEST_MALLCTL_OPT(bool, abort, always);
 	TEST_MALLCTL_OPT(bool, abort_conf, always);
-	TEST_MALLCTL_OPT(bool, metadata_thp, always);
+	TEST_MALLCTL_OPT(const char *, metadata_thp, always);
 	TEST_MALLCTL_OPT(bool, retain, always);
 	TEST_MALLCTL_OPT(const char *, dss, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);

From e55c3ca26758bcb7f6f1621fd690caa245f16942 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 25 Aug 2017 13:24:49 -0700
Subject: [PATCH 0980/2608] Add stats for metadata_thp.

Report number of THPs used in arena and aggregated stats.
---
 doc/jemalloc.xml.in                      | 26 ++++++++++++
 include/jemalloc/internal/base_externs.h |  2 +-
 include/jemalloc/internal/base_structs.h |  2 +
 include/jemalloc/internal/ctl.h          |  1 +
 include/jemalloc/internal/stats.h        |  1 +
 src/arena.c                              |  5 ++-
 src/base.c                               | 51 ++++++++++++++++++++----
 src/ctl.c                                | 12 ++++++
 src/stats.c                              | 22 ++++++++--
 test/unit/base.c                         | 24 ++++++++---
 10 files changed, 125 insertions(+), 21 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 0c956040..f7fbe305 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -2207,6 +2207,20 @@ struct extent_hooks_s {
         considered.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="stats.metadata_thp">
+        <term>
+          <mallctl>stats.metadata_thp</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Number of transparent huge pages (THP) used for
+        metadata.  See <link
+        linkend="stats.metadata"><mallctl>stats.metadata</mallctl></link> and
+        <link linkend="opt.metadata_thp">opt.metadata_thp</link>) for
+        details.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="stats.resident">
         <term>
           <mallctl>stats.resident</mallctl>
@@ -2523,6 +2537,18 @@ struct extent_hooks_s {
         profiles.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="stats.arenas.i.metadata_thp">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.metadata_thp</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Number of transparent huge pages (THP) used for
+        metadata.  See <link linkend="opt.metadata_thp">opt.metadata_thp</link>
+        for details.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="stats.arenas.i.resident">
         <term>
           <mallctl>stats.arenas.&lt;i&gt;.resident</mallctl>
diff --git a/include/jemalloc/internal/base_externs.h b/include/jemalloc/internal/base_externs.h
index 6cd11877..7b705c9b 100644
--- a/include/jemalloc/internal/base_externs.h
+++ b/include/jemalloc/internal/base_externs.h
@@ -13,7 +13,7 @@ extent_hooks_t *base_extent_hooks_set(base_t *base,
 void *base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment);
 extent_t *base_alloc_extent(tsdn_t *tsdn, base_t *base);
 void base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated,
-    size_t *resident, size_t *mapped);
+    size_t *resident, size_t *mapped, size_t *n_thp);
 void base_prefork(tsdn_t *tsdn, base_t *base);
 void base_postfork_parent(tsdn_t *tsdn, base_t *base);
 void base_postfork_child(tsdn_t *tsdn, base_t *base);
diff --git a/include/jemalloc/internal/base_structs.h b/include/jemalloc/internal/base_structs.h
index 18e227bd..b5421693 100644
--- a/include/jemalloc/internal/base_structs.h
+++ b/include/jemalloc/internal/base_structs.h
@@ -50,6 +50,8 @@ struct base_s {
 	size_t		allocated;
 	size_t		resident;
 	size_t		mapped;
+	/* Number of THP regions touched. */
+	size_t		n_thp;
 };
 
 #endif /* JEMALLOC_INTERNAL_BASE_STRUCTS_H */
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index a91c4cf5..a36feaff 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -48,6 +48,7 @@ typedef struct ctl_stats_s {
 	size_t allocated;
 	size_t active;
 	size_t metadata;
+	size_t metadata_thp;
 	size_t resident;
 	size_t mapped;
 	size_t retained;
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index ab872e59..f19df374 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -142,6 +142,7 @@ typedef struct arena_stats_s {
 	atomic_zu_t		base; /* Derived. */
 	atomic_zu_t		internal;
 	atomic_zu_t		resident; /* Derived. */
+	atomic_zu_t		metadata_thp;
 
 	atomic_zu_t		allocated_large; /* Derived. */
 	arena_stats_u64_t	nmalloc_large; /* Derived. */
diff --git a/src/arena.c b/src/arena.c
index 19aafaf0..18ed5aac 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -234,9 +234,9 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	arena_basic_stats_merge(tsdn, arena, nthreads, dss, dirty_decay_ms,
 	    muzzy_decay_ms, nactive, ndirty, nmuzzy);
 
-	size_t base_allocated, base_resident, base_mapped;
+	size_t base_allocated, base_resident, base_mapped, metadata_thp;
 	base_stats_get(tsdn, arena->base, &base_allocated, &base_resident,
-	    &base_mapped);
+	    &base_mapped, &metadata_thp);
 
 	arena_stats_lock(tsdn, &arena->stats);
 
@@ -267,6 +267,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 
 	arena_stats_accum_zu(&astats->base, base_allocated);
 	arena_stats_accum_zu(&astats->internal, arena_internal_get(arena));
+	arena_stats_accum_zu(&astats->metadata_thp, metadata_thp);
 	arena_stats_accum_zu(&astats->resident, base_resident +
 	    (((atomic_load_zu(&arena->nactive, ATOMIC_RELAXED) +
 	    extents_npages_get(&arena->extents_dirty) +
diff --git a/src/base.c b/src/base.c
index 9cb02b63..609a445b 100644
--- a/src/base.c
+++ b/src/base.c
@@ -22,6 +22,11 @@ const char *metadata_thp_mode_names[] = {
 
 /******************************************************************************/
 
+static inline bool
+metadata_thp_madvise(void) {
+	return (metadata_thp_enabled() && thp_state_madvise);
+}
+
 static void *
 base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size) {
 	void *addr;
@@ -101,7 +106,7 @@ base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr,
 		post_reentrancy(tsd);
 	}
 label_done:
-	if (metadata_thp_enabled() && thp_state_madvise) {
+	if (metadata_thp_madvise()) {
 		/* Set NOHUGEPAGE after unmap to avoid kernel defrag. */
 		assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
 		    (size & HUGEPAGE_MASK) == 0);
@@ -120,6 +125,13 @@ base_extent_init(size_t *extent_sn_next, extent_t *extent, void *addr,
 	extent_binit(extent, addr, size, sn);
 }
 
+static bool
+base_is_single_block(base_t *base) {
+	assert(base->blocks != NULL &&
+	    (base->blocks->size & HUGEPAGE_MASK) == 0);
+	return (base->blocks->next == NULL);
+}
+
 static void *
 base_extent_bump_alloc_helper(extent_t *extent, size_t *gap_size, size_t size,
     size_t alignment) {
@@ -155,12 +167,20 @@ base_extent_bump_alloc_post(tsdn_t *tsdn, base_t *base, extent_t *extent,
 		base->allocated += size;
 		/*
 		 * Add one PAGE to base_resident for every page boundary that is
-		 * crossed by the new allocation.
+		 * crossed by the new allocation. Adjust n_thp similarly when
+		 * metadata_thp is enabled.
 		 */
 		base->resident += PAGE_CEILING((uintptr_t)addr + size) -
 		    PAGE_CEILING((uintptr_t)addr - gap_size);
 		assert(base->allocated <= base->resident);
 		assert(base->resident <= base->mapped);
+		if (metadata_thp_madvise() && (!base_is_single_block(base) ||
+		    opt_metadata_thp == metadata_thp_always)) {
+			base->n_thp += (HUGEPAGE_CEILING((uintptr_t)addr + size)
+			    - HUGEPAGE_CEILING((uintptr_t)addr - gap_size)) >>
+			    LG_HUGEPAGE;
+			assert(base->mapped >= base->n_thp << LG_HUGEPAGE);
+		}
 	}
 }
 
@@ -209,7 +229,7 @@ base_block_alloc(tsdn_t *tsdn, base_t *base, extent_hooks_t *extent_hooks,
 		return NULL;
 	}
 
-	if (metadata_thp_enabled() && thp_state_madvise) {
+	if (metadata_thp_madvise()) {
 		void *addr = (void *)block;
 		assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
 		    (block_size & HUGEPAGE_MASK) == 0);
@@ -218,12 +238,15 @@ base_block_alloc(tsdn_t *tsdn, base_t *base, extent_hooks_t *extent_hooks,
 			/* Use hugepage for the new block. */
 			pages_huge(addr, block_size);
 		}
-		if (base != NULL && opt_metadata_thp == metadata_thp_auto) {
+		if (base != NULL && base_is_single_block(base) &&
+		    opt_metadata_thp == metadata_thp_auto) {
 			/* Make the first block THP lazily. */
 			base_block_t *first_block = base->blocks;
-			if (first_block->next == NULL) {
-				assert((first_block->size & HUGEPAGE_MASK) == 0);
-				pages_huge(first_block, first_block->size);
+			assert((first_block->size & HUGEPAGE_MASK) == 0);
+			pages_huge(first_block, first_block->size);
+			if (config_stats) {
+				assert(base->n_thp == 0);
+				base->n_thp += first_block->size >> LG_HUGEPAGE;
 			}
 		}
 	}
@@ -264,8 +287,15 @@ base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
 		base->allocated += sizeof(base_block_t);
 		base->resident += PAGE_CEILING(sizeof(base_block_t));
 		base->mapped += block->size;
+		if (metadata_thp_madvise()) {
+			assert(!base_is_single_block(base));
+			assert(base->n_thp > 0);
+			base->n_thp += HUGEPAGE_CEILING(sizeof(base_block_t)) >>
+			    LG_HUGEPAGE;
+		}
 		assert(base->allocated <= base->resident);
 		assert(base->resident <= base->mapped);
+		assert(base->n_thp << LG_HUGEPAGE <= base->mapped);
 	}
 	return &block->extent;
 }
@@ -307,8 +337,12 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		base->allocated = sizeof(base_block_t);
 		base->resident = PAGE_CEILING(sizeof(base_block_t));
 		base->mapped = block->size;
+		base->n_thp = (opt_metadata_thp == metadata_thp_always) &&
+		    metadata_thp_madvise() ? HUGEPAGE_CEILING(sizeof(base_block_t))
+		    >> LG_HUGEPAGE : 0;
 		assert(base->allocated <= base->resident);
 		assert(base->resident <= base->mapped);
+		assert(base->n_thp << LG_HUGEPAGE <= base->mapped);
 	}
 	base_extent_bump_alloc_post(tsdn, base, &block->extent, gap_size, base,
 	    base_size);
@@ -403,7 +437,7 @@ base_alloc_extent(tsdn_t *tsdn, base_t *base) {
 
 void
 base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, size_t *resident,
-    size_t *mapped) {
+    size_t *mapped, size_t *n_thp) {
 	cassert(config_stats);
 
 	malloc_mutex_lock(tsdn, &base->mtx);
@@ -412,6 +446,7 @@ base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, size_t *resident,
 	*allocated = base->allocated;
 	*resident = base->resident;
 	*mapped = base->mapped;
+	*n_thp = base->n_thp;
 	malloc_mutex_unlock(tsdn, &base->mtx);
 }
 
diff --git a/src/ctl.c b/src/ctl.c
index ace10b02..a2f3837a 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -183,6 +183,7 @@ CTL_PROTO(stats_arenas_i_muzzy_nmadvise)
 CTL_PROTO(stats_arenas_i_muzzy_purged)
 CTL_PROTO(stats_arenas_i_base)
 CTL_PROTO(stats_arenas_i_internal)
+CTL_PROTO(stats_arenas_i_metadata_thp)
 CTL_PROTO(stats_arenas_i_tcache_bytes)
 CTL_PROTO(stats_arenas_i_resident)
 INDEX_PROTO(stats_arenas_i)
@@ -192,6 +193,7 @@ CTL_PROTO(stats_background_thread_num_threads)
 CTL_PROTO(stats_background_thread_num_runs)
 CTL_PROTO(stats_background_thread_run_interval)
 CTL_PROTO(stats_metadata)
+CTL_PROTO(stats_metadata_thp)
 CTL_PROTO(stats_resident)
 CTL_PROTO(stats_mapped)
 CTL_PROTO(stats_retained)
@@ -476,6 +478,7 @@ static const ctl_named_node_t stats_arenas_i_node[] = {
 	{NAME("muzzy_purged"),	CTL(stats_arenas_i_muzzy_purged)},
 	{NAME("base"),		CTL(stats_arenas_i_base)},
 	{NAME("internal"),	CTL(stats_arenas_i_internal)},
+	{NAME("metadata_thp"),	CTL(stats_arenas_i_metadata_thp)},
 	{NAME("tcache_bytes"),	CTL(stats_arenas_i_tcache_bytes)},
 	{NAME("resident"),	CTL(stats_arenas_i_resident)},
 	{NAME("small"),		CHILD(named, stats_arenas_i_small)},
@@ -514,6 +517,7 @@ static const ctl_named_node_t stats_node[] = {
 	{NAME("allocated"),	CTL(stats_allocated)},
 	{NAME("active"),	CTL(stats_active)},
 	{NAME("metadata"),	CTL(stats_metadata)},
+	{NAME("metadata_thp"),	CTL(stats_metadata_thp)},
 	{NAME("resident"),	CTL(stats_resident)},
 	{NAME("mapped"),	CTL(stats_mapped)},
 	{NAME("retained"),	CTL(stats_retained)},
@@ -775,6 +779,8 @@ MUTEX_PROF_ARENA_MUTEXES
 			    &astats->astats.internal);
 			accum_atomic_zu(&sdstats->astats.resident,
 			    &astats->astats.resident);
+			accum_atomic_zu(&sdstats->astats.metadata_thp,
+			    &astats->astats.metadata_thp);
 		} else {
 			assert(atomic_load_zu(
 			    &astats->astats.internal, ATOMIC_RELAXED) == 0);
@@ -940,6 +946,8 @@ ctl_refresh(tsdn_t *tsdn) {
 		    &ctl_sarena->astats->astats.base, ATOMIC_RELAXED) +
 		    atomic_load_zu(&ctl_sarena->astats->astats.internal,
 			ATOMIC_RELAXED);
+		ctl_stats->metadata_thp = atomic_load_zu(
+		    &ctl_sarena->astats->astats.metadata_thp, ATOMIC_RELAXED);
 		ctl_stats->resident = atomic_load_zu(
 		    &ctl_sarena->astats->astats.resident, ATOMIC_RELAXED);
 		ctl_stats->mapped = atomic_load_zu(
@@ -2464,6 +2472,7 @@ CTL_RO_NL_CGEN(config_prof, lg_prof_sample, lg_prof_sample, size_t)
 CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats->allocated, size_t)
 CTL_RO_CGEN(config_stats, stats_active, ctl_stats->active, size_t)
 CTL_RO_CGEN(config_stats, stats_metadata, ctl_stats->metadata, size_t)
+CTL_RO_CGEN(config_stats, stats_metadata_thp, ctl_stats->metadata_thp, size_t)
 CTL_RO_CGEN(config_stats, stats_resident, ctl_stats->resident, size_t)
 CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats->mapped, size_t)
 CTL_RO_CGEN(config_stats, stats_retained, ctl_stats->retained, size_t)
@@ -2519,6 +2528,9 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_base,
 CTL_RO_CGEN(config_stats, stats_arenas_i_internal,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.internal, ATOMIC_RELAXED),
     size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_metadata_thp,
+    atomic_load_zu(&arenas_i(mib[2])->astats->astats.metadata_thp,
+    ATOMIC_RELAXED), size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_tcache_bytes,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.tcache_bytes,
     ATOMIC_RELAXED), size_t)
diff --git a/src/stats.c b/src/stats.c
index e1a3f8cf..cbeb923d 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -401,7 +401,7 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	const char *dss;
 	ssize_t dirty_decay_ms, muzzy_decay_ms;
 	size_t page, pactive, pdirty, pmuzzy, mapped, retained;
-	size_t base, internal, resident;
+	size_t base, internal, resident, metadata_thp;
 	uint64_t dirty_npurge, dirty_nmadvise, dirty_purged;
 	uint64_t muzzy_npurge, muzzy_nmadvise, muzzy_purged;
 	size_t small_allocated;
@@ -613,6 +613,15 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    "internal:                %12zu\n", internal);
 	}
 
+	CTL_M2_GET("stats.arenas.0.metadata_thp", i, &metadata_thp, size_t);
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\t\"metadata_thp\": %zu,\n", metadata_thp);
+	} else {
+		malloc_cprintf(write_cb, cbopaque,
+		    "metadata_thp:            %12zu\n", metadata_thp);
+	}
+
 	CTL_M2_GET("stats.arenas.0.tcache_bytes", i, &tcache_bytes, size_t);
 	if (json) {
 		malloc_cprintf(write_cb, cbopaque,
@@ -1007,13 +1016,15 @@ static void
 stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
     bool json, bool merged, bool destroyed, bool unmerged, bool bins,
     bool large, bool mutex) {
-	size_t allocated, active, metadata, resident, mapped, retained;
+	size_t allocated, active, metadata, metadata_thp, resident, mapped,
+	    retained;
 	size_t num_background_threads;
 	uint64_t background_thread_num_runs, background_thread_run_interval;
 
 	CTL_GET("stats.allocated", &allocated, size_t);
 	CTL_GET("stats.active", &active, size_t);
 	CTL_GET("stats.metadata", &metadata, size_t);
+	CTL_GET("stats.metadata_thp", &metadata_thp, size_t);
 	CTL_GET("stats.resident", &resident, size_t);
 	CTL_GET("stats.mapped", &mapped, size_t);
 	CTL_GET("stats.retained", &retained, size_t);
@@ -1046,6 +1057,8 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 		    "\t\t\t\"active\": %zu,\n", active);
 		malloc_cprintf(write_cb, cbopaque,
 		    "\t\t\t\"metadata\": %zu,\n", metadata);
+		malloc_cprintf(write_cb, cbopaque,
+		    "\t\t\t\"metadata_thp\": %zu,\n", metadata_thp);
 		malloc_cprintf(write_cb, cbopaque,
 		    "\t\t\t\"resident\": %zu,\n", resident);
 		malloc_cprintf(write_cb, cbopaque,
@@ -1082,9 +1095,10 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 		    "\t\t}%s\n", (merged || unmerged || destroyed) ? "," : "");
 	} else {
 		malloc_cprintf(write_cb, cbopaque,
-		    "Allocated: %zu, active: %zu, metadata: %zu,"
+		    "Allocated: %zu, active: %zu, metadata: %zu (n_thp %zu),"
 		    " resident: %zu, mapped: %zu, retained: %zu\n",
-		    allocated, active, metadata, resident, mapped, retained);
+		    allocated, active, metadata, metadata_thp, resident, mapped,
+		    retained);
 
 		if (have_background_thread && num_background_threads > 0) {
 			malloc_cprintf(write_cb, cbopaque,
diff --git a/test/unit/base.c b/test/unit/base.c
index 7fa24ac0..6b792cf2 100644
--- a/test/unit/base.c
+++ b/test/unit/base.c
@@ -28,22 +28,28 @@ static extent_hooks_t hooks_not_null = {
 
 TEST_BEGIN(test_base_hooks_default) {
 	base_t *base;
-	size_t allocated0, allocated1, resident, mapped;
+	size_t allocated0, allocated1, resident, mapped, n_thp;
 
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 	base = base_new(tsdn, 0, (extent_hooks_t *)&extent_hooks_default);
 
 	if (config_stats) {
-		base_stats_get(tsdn, base, &allocated0, &resident, &mapped);
+		base_stats_get(tsdn, base, &allocated0, &resident, &mapped,
+		    &n_thp);
 		assert_zu_ge(allocated0, sizeof(base_t),
 		    "Base header should count as allocated");
+		if (opt_metadata_thp == metadata_thp_always) {
+			assert_zu_gt(n_thp, 0,
+			    "Base should have 1 THP at least.");
+		}
 	}
 
 	assert_ptr_not_null(base_alloc(tsdn, base, 42, 1),
 	    "Unexpected base_alloc() failure");
 
 	if (config_stats) {
-		base_stats_get(tsdn, base, &allocated1, &resident, &mapped);
+		base_stats_get(tsdn, base, &allocated1, &resident, &mapped,
+		    &n_thp);
 		assert_zu_ge(allocated1 - allocated0, 42,
 		    "At least 42 bytes were allocated by base_alloc()");
 	}
@@ -55,7 +61,7 @@ TEST_END
 TEST_BEGIN(test_base_hooks_null) {
 	extent_hooks_t hooks_orig;
 	base_t *base;
-	size_t allocated0, allocated1, resident, mapped;
+	size_t allocated0, allocated1, resident, mapped, n_thp;
 
 	extent_hooks_prep();
 	try_dalloc = false;
@@ -71,16 +77,22 @@ TEST_BEGIN(test_base_hooks_null) {
 	assert_ptr_not_null(base, "Unexpected base_new() failure");
 
 	if (config_stats) {
-		base_stats_get(tsdn, base, &allocated0, &resident, &mapped);
+		base_stats_get(tsdn, base, &allocated0, &resident, &mapped,
+		    &n_thp);
 		assert_zu_ge(allocated0, sizeof(base_t),
 		    "Base header should count as allocated");
+		if (opt_metadata_thp == metadata_thp_always) {
+			assert_zu_gt(n_thp, 0,
+			    "Base should have 1 THP at least.");
+		}
 	}
 
 	assert_ptr_not_null(base_alloc(tsdn, base, 42, 1),
 	    "Unexpected base_alloc() failure");
 
 	if (config_stats) {
-		base_stats_get(tsdn, base, &allocated1, &resident, &mapped);
+		base_stats_get(tsdn, base, &allocated1, &resident, &mapped,
+		    &n_thp);
 		assert_zu_ge(allocated1 - allocated0, 42,
 		    "At least 42 bytes were allocated by base_alloc()");
 	}

From a315688be0f38188f16fe89ee1657c7f596f8cbb Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 30 Aug 2017 16:17:04 -0700
Subject: [PATCH 0981/2608] Relax constraints on reentrancy for extent hooks.

If we guarantee no malloc activity in extent hooks, it's possible to make
customized hooks working on arena 0.  Remove the non-a0 assertion to enable such
use cases.
---
 .../jemalloc/internal/jemalloc_internal_inlines_a.h |  1 +
 src/extent.c                                        | 13 ++++++++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index 5ec35db3..c6a1f7eb 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -151,6 +151,7 @@ pre_reentrancy(tsd_t *tsd, arena_t *arena) {
 	assert(arena != arena_get(tsd_tsdn(tsd), 0, false));
 
 	bool fast = tsd_fast(tsd);
+	assert(tsd_reentrancy_level_get(tsd) < INT8_MAX);
 	++*tsd_reentrancy_levelp_get(tsd);
 	if (fast) {
 		/* Prepare slow path for reentrancy. */
diff --git a/src/extent.c b/src/extent.c
index f464de4a..3f1c76fd 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1028,7 +1028,18 @@ extent_alloc_default(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
 static void
 extent_hook_pre_reentrancy(tsdn_t *tsdn, arena_t *arena) {
 	tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
-	pre_reentrancy(tsd, arena);
+	if (arena == arena_get(tsd_tsdn(tsd), 0, false)) {
+		/*
+		 * The only legitimate case of customized extent hooks for a0 is
+		 * hooks with no allocation activities.  One such example is to
+		 * place metadata on pre-allocated resources such as huge pages.
+		 * In that case, rely on reentrancy_level checks to catch
+		 * infinite recursions.
+		 */
+		pre_reentrancy(tsd, NULL);
+	} else {
+		pre_reentrancy(tsd, arena);
+	}
 }
 
 static void

From cf4738455d990918914cdc8608936433ef897a6e Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 6 Sep 2017 10:15:33 -0700
Subject: [PATCH 0982/2608] Fix a link for dirty_decay_ms in manual.

---
 doc/jemalloc.xml.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index f7fbe305..dda9a733 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1036,7 +1036,7 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         The default decay time is 10 seconds.  See <link
         linkend="arenas.dirty_decay_ms"><mallctl>arenas.dirty_decay_ms</mallctl></link>
         and <link
-        linkend="arena.i.muzzy_decay_ms"><mallctl>arena.&lt;i&gt;.muzzy_decay_ms</mallctl></link>
+        linkend="arena.i.dirty_decay_ms"><mallctl>arena.&lt;i&gt;.dirty_decay_ms</mallctl></link>
         for related dynamic control options.  See <link
         linkend="opt.muzzy_decay_ms"><mallctl>opt.muzzy_decay_ms</mallctl></link>
         for a description of muzzy pages.</para></listitem>

From 886053b966f4108e4b9ee5e29a0a708e91bc72f8 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 11 Sep 2017 13:32:58 -0700
Subject: [PATCH 0983/2608] Fix huge page test in test/unit/pages.

Huge pages could be disabled even if the kernel header has MAD_HUGEPAGE
defined.  Guard the huge pagetest with runtime detection.
---
 test/unit/pages.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/test/unit/pages.c b/test/unit/pages.c
index 1a979e62..49ad0091 100644
--- a/test/unit/pages.c
+++ b/test/unit/pages.c
@@ -10,11 +10,13 @@ TEST_BEGIN(test_pages_huge) {
 	pages = pages_map(NULL, alloc_size, PAGE, &commit);
 	assert_ptr_not_null(pages, "Unexpected pages_map() error");
 
-	hugepage = (void *)(ALIGNMENT_CEILING((uintptr_t)pages, HUGEPAGE));
-	assert_b_ne(pages_huge(hugepage, HUGEPAGE), have_madvise_huge,
-	    "Unexpected pages_huge() result");
-	assert_false(pages_nohuge(hugepage, HUGEPAGE),
-	    "Unexpected pages_nohuge() result");
+	if (thp_state_madvise) {
+	    hugepage = (void *)(ALIGNMENT_CEILING((uintptr_t)pages, HUGEPAGE));
+	    assert_b_ne(pages_huge(hugepage, HUGEPAGE), have_madvise_huge,
+	        "Unexpected pages_huge() result");
+	    assert_false(pages_nohuge(hugepage, HUGEPAGE),
+	        "Unexpected pages_nohuge() result");
+	}
 
 	pages_unmap(pages, alloc_size);
 }

From 9b20a4bf70efd675604985ca37335f8b0136a289 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 12 Sep 2017 11:38:13 -0700
Subject: [PATCH 0984/2608] Clear cache bin ql postfork.

This fixes a regression in 9c05490, which introduced the new cache bin ql.  The
list needs to be cleaned up after fork, same as tcache_ql.
---
 src/arena.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/arena.c b/src/arena.c
index 18ed5aac..43ba6018 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1936,6 +1936,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		}
 
 		ql_new(&arena->tcache_ql);
+		ql_new(&arena->cache_bin_array_descriptor_ql);
 		if (malloc_mutex_init(&arena->tcache_ql_mtx, "tcache_ql",
 		    WITNESS_RANK_TCACHE_QL, malloc_mutex_rank_exclusive)) {
 			goto label_error;
@@ -2155,10 +2156,16 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	}
 	if (config_stats) {
 		ql_new(&arena->tcache_ql);
+		ql_new(&arena->cache_bin_array_descriptor_ql);
 		tcache_t *tcache = tcache_get(tsdn_tsd(tsdn));
 		if (tcache != NULL && tcache->arena == arena) {
 			ql_elm_new(tcache, link);
 			ql_tail_insert(&arena->tcache_ql, tcache, link);
+			cache_bin_array_descriptor_init(
+			    &tcache->cache_bin_array_descriptor,
+			    tcache->bins_small, tcache->bins_large);
+			ql_tail_insert(&arena->cache_bin_array_descriptor_ql,
+			    &tcache->cache_bin_array_descriptor, link);
 		}
 	}
 

From 9e39425bf1653e4bebb7b377dd716f98cab069ff Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 19 Sep 2017 09:27:33 -0700
Subject: [PATCH 0985/2608] Force Ubuntu "precise" for Travis CI builds.

We've been seeing strange errors in jemalloc_cpp.cpp since Travis upgraded from
precise to trusty as their default CI environment (seeming to stem from some
the new clang version finding the headers for an old version of libstdc++.  In
the long run we'll have to deal with this "for real", but at that point we may
have a better C++ story in general, making it a moot point.
---
 .travis.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.travis.yml b/.travis.yml
index 418fc6fd..4cc116e5 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,4 +1,5 @@
 language: generic
+dist: precise
 
 matrix:
   include:

From d60f3bac1237666922c16e7a1b281a2c7721863c Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 18 Sep 2017 14:22:44 -0700
Subject: [PATCH 0986/2608] Add missing field in initializer for rtree cache.

Fix a warning by -Wmissing-field-initializers.
---
 include/jemalloc/internal/rtree_tsd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/rtree_tsd.h b/include/jemalloc/internal/rtree_tsd.h
index 3cdc8625..93a75173 100644
--- a/include/jemalloc/internal/rtree_tsd.h
+++ b/include/jemalloc/internal/rtree_tsd.h
@@ -26,7 +26,7 @@
  * Zero initializer required for tsd initialization only.  Proper initialization
  * done via rtree_ctx_data_init().
  */
-#define RTREE_CTX_ZERO_INITIALIZER {{{0}}}
+#define RTREE_CTX_ZERO_INITIALIZER {{{0}}, {{0}}}
 
 
 typedef struct rtree_leaf_elm_s rtree_leaf_elm_t;

From eaa58a50267df6f5f2a5da38d654fd98fc4a1136 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 18 Sep 2017 14:36:43 -0700
Subject: [PATCH 0987/2608] Put static keyword first.

Fix a warning by -Wold-style-declaration.
---
 src/jemalloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 3c0ea7d4..4c31a2d2 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1730,7 +1730,7 @@ compute_size_with_overflow(bool may_overflow, dynamic_opts_t *dopts,
 	}
 
 	/* A size_t with its high-half bits all set to 1. */
-	const static size_t high_bits = SIZE_T_MAX << (sizeof(size_t) * 8 / 2);
+	static const size_t high_bits = SIZE_T_MAX << (sizeof(size_t) * 8 / 2);
 
 	*size = dopts->item_size * dopts->num_items;
 

From 96f1468221b9e846dd70eb7e65634a41e6804c20 Mon Sep 17 00:00:00 2001
From: Tamir Duberstein <tamird@gmail.com>
Date: Thu, 6 Jul 2017 14:50:55 -0400
Subject: [PATCH 0988/2608] whitespace

---
 configure.ac | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/configure.ac b/configure.ac
index 49b2df15..4373c21d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -8,7 +8,7 @@ dnl ============================================================================
 dnl Custom macro definitions.
 
 dnl JE_CONCAT_VVV(r, a, b)
-dnl 
+dnl
 dnl Set $r to the concatenation of $a and $b, with a space separating them iff
 dnl both $a and $b are non-emty.
 AC_DEFUN([JE_CONCAT_VVV],
@@ -20,7 +20,7 @@ fi
 )
 
 dnl JE_APPEND_VS(a, b)
-dnl 
+dnl
 dnl Set $a to the concatenation of $a and b, with a space separating them iff
 dnl both $a and b are non-empty.
 AC_DEFUN([JE_APPEND_VS],
@@ -31,7 +31,7 @@ AC_DEFUN([JE_APPEND_VS],
 CONFIGURE_CFLAGS=
 SPECIFIED_CFLAGS="${CFLAGS}"
 dnl JE_CFLAGS_ADD(cflag)
-dnl 
+dnl
 dnl CFLAGS is the concatenation of CONFIGURE_CFLAGS and SPECIFIED_CFLAGS
 dnl (ignoring EXTRA_CFLAGS, which does not impact configure tests.  This macro
 dnl appends to CONFIGURE_CFLAGS and regenerates CFLAGS.
@@ -57,7 +57,7 @@ JE_CONCAT_VVV(CFLAGS, CONFIGURE_CFLAGS, SPECIFIED_CFLAGS)
 
 dnl JE_CFLAGS_SAVE()
 dnl JE_CFLAGS_RESTORE()
-dnl 
+dnl
 dnl Save/restore CFLAGS.  Nesting is not supported.
 AC_DEFUN([JE_CFLAGS_SAVE],
 SAVED_CONFIGURE_CFLAGS="${CONFIGURE_CFLAGS}"
@@ -91,7 +91,7 @@ JE_CONCAT_VVV(CXXFLAGS, CONFIGURE_CXXFLAGS, SPECIFIED_CXXFLAGS)
 ])
 
 dnl JE_COMPILABLE(label, hcode, mcode, rvar)
-dnl 
+dnl
 dnl Use AC_LINK_IFELSE() rather than AC_COMPILE_IFELSE() so that linker errors
 dnl cause failure.
 AC_DEFUN([JE_COMPILABLE],
@@ -517,7 +517,7 @@ AC_PROG_AWK
 dnl Platform-specific settings.  abi and RPATH can probably be determined
 dnl programmatically, but doing so is error-prone, which makes it generally
 dnl not worth the trouble.
-dnl 
+dnl
 dnl Define cpp macros in CPPFLAGS, rather than doing AC_DEFINE(macro), since the
 dnl definitions need to be seen before any headers are included, which is a pain
 dnl to make happen otherwise.
@@ -1412,7 +1412,7 @@ AC_ARG_WITH([lg_page_sizes],
 
 dnl ============================================================================
 dnl jemalloc configuration.
-dnl 
+dnl
 
 AC_ARG_WITH([version],
   [AS_HELP_STRING([--with-version=<major>.<minor>.<bugfix>-<nrev>-g<gid>],

From 24766ccd5bcc379b7d518b3ec2480d2d146873ac Mon Sep 17 00:00:00 2001
From: Tamir Duberstein <tamird@gmail.com>
Date: Thu, 6 Jul 2017 15:04:17 -0400
Subject: [PATCH 0989/2608] Allow toolchain to determine nm

---
 configure.ac | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 4373c21d..c98f7b6d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -512,6 +512,11 @@ AN_PROGRAM([ar], [AC_PROG_AR])
 AC_DEFUN([AC_PROG_AR], [AC_CHECK_TOOL(AR, ar, :)])
 AC_PROG_AR
 
+AN_MAKEVAR([NM], [AC_PROG_NM])
+AN_PROGRAM([nm], [AC_PROG_NM])
+AC_DEFUN([AC_PROG_NM], [AC_CHECK_TOOL(NM, nm, :)])
+AC_PROG_NM
+
 AC_PROG_AWK
 
 dnl Platform-specific settings.  abi and RPATH can probably be determined
@@ -523,7 +528,7 @@ dnl definitions need to be seen before any headers are included, which is a pain
 dnl to make happen otherwise.
 default_retain="0"
 maps_coalesce="1"
-DUMP_SYMS="nm -a"
+DUMP_SYMS="${NM} -a"
 SYM_PREFIX=""
 case "${host}" in
   *-*-darwin* | *-*-ios*)

From a545f1804a19f48244ee5e328e32e2d036ffea0d Mon Sep 17 00:00:00 2001
From: Tamir Duberstein <tamird@gmail.com>
Date: Thu, 6 Jul 2017 15:14:48 -0400
Subject: [PATCH 0990/2608] dumpbin doesn't exist in mingw

---
 configure.ac | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index c98f7b6d..ab2f41ae 100644
--- a/configure.ac
+++ b/configure.ac
@@ -645,7 +645,13 @@ case "${host}" in
 	  DSO_LDFLAGS="-shared"
 	  link_whole_archive="1"
 	fi
-	DUMP_SYMS="dumpbin /SYMBOLS"
+	case "${host}" in
+	  *-*-cygwin*)
+	    DUMP_SYMS="dumpbin /SYMBOLS"
+	    ;;
+	  *)
+	    ;;
+	esac
 	a="lib"
 	libprefix=""
 	SOREV="${so}"

From 56f0e57844bc1d2c806738860bf93e2ccee135b5 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 18 Sep 2017 14:34:13 -0700
Subject: [PATCH 0991/2608] Add "falls through" comment explicitly.

Fix warnings by -Wimplicit-fallthrough.
---
 include/jemalloc/internal/hash.h | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h
index 188296cf..dcfc992d 100644
--- a/include/jemalloc/internal/hash.h
+++ b/include/jemalloc/internal/hash.h
@@ -260,22 +260,22 @@ hash_x64_128(const void *key, const int len, const uint32_t seed,
 		uint64_t k2 = 0;
 
 		switch (len & 15) {
-		case 15: k2 ^= ((uint64_t)(tail[14])) << 48;
-		case 14: k2 ^= ((uint64_t)(tail[13])) << 40;
-		case 13: k2 ^= ((uint64_t)(tail[12])) << 32;
-		case 12: k2 ^= ((uint64_t)(tail[11])) << 24;
-		case 11: k2 ^= ((uint64_t)(tail[10])) << 16;
-		case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8;
+		case 15: k2 ^= ((uint64_t)(tail[14])) << 48; /* falls through */
+		case 14: k2 ^= ((uint64_t)(tail[13])) << 40; /* falls through */
+		case 13: k2 ^= ((uint64_t)(tail[12])) << 32; /* falls through */
+		case 12: k2 ^= ((uint64_t)(tail[11])) << 24; /* falls through */
+		case 11: k2 ^= ((uint64_t)(tail[10])) << 16; /* falls through */
+		case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8;  /* falls through */
 		case  9: k2 ^= ((uint64_t)(tail[ 8])) << 0;
 			k2 *= c2; k2 = hash_rotl_64(k2, 33); k2 *= c1; h2 ^= k2;
-
-		case  8: k1 ^= ((uint64_t)(tail[ 7])) << 56;
-		case  7: k1 ^= ((uint64_t)(tail[ 6])) << 48;
-		case  6: k1 ^= ((uint64_t)(tail[ 5])) << 40;
-		case  5: k1 ^= ((uint64_t)(tail[ 4])) << 32;
-		case  4: k1 ^= ((uint64_t)(tail[ 3])) << 24;
-		case  3: k1 ^= ((uint64_t)(tail[ 2])) << 16;
-		case  2: k1 ^= ((uint64_t)(tail[ 1])) << 8;
+			/* falls through */
+		case  8: k1 ^= ((uint64_t)(tail[ 7])) << 56; /* falls through */
+		case  7: k1 ^= ((uint64_t)(tail[ 6])) << 48; /* falls through */
+		case  6: k1 ^= ((uint64_t)(tail[ 5])) << 40; /* falls through */
+		case  5: k1 ^= ((uint64_t)(tail[ 4])) << 32; /* falls through */
+		case  4: k1 ^= ((uint64_t)(tail[ 3])) << 24; /* falls through */
+		case  3: k1 ^= ((uint64_t)(tail[ 2])) << 16; /* falls through */
+		case  2: k1 ^= ((uint64_t)(tail[ 1])) << 8;  /* falls through */
 		case  1: k1 ^= ((uint64_t)(tail[ 0])) << 0;
 			k1 *= c1; k1 = hash_rotl_64(k1, 31); k1 *= c2; h1 ^= k1;
 		}

From 3959a9fe1973a7d7ddbbd99056c22e9b684a3275 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 22 Sep 2017 15:35:29 -0700
Subject: [PATCH 0992/2608] Avoid left shift by negative values.

Fix warnings on -Wshift-negative-value.
---
 include/jemalloc/internal/sz.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index 7f640d55..97946289 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -61,7 +61,7 @@ sz_psz2ind(size_t psz) {
 		pszind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ?
 		    LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1;
 
-		size_t delta_inverse_mask = ZD(-1) << lg_delta;
+		size_t delta_inverse_mask = ZU(-1) << lg_delta;
 		pszind_t mod = ((((psz-1) & delta_inverse_mask) >> lg_delta)) &
 		    ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
 
@@ -142,7 +142,7 @@ sz_size2index_compute(size_t size) {
 		szind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
 		    ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
 
-		size_t delta_inverse_mask = ZD(-1) << lg_delta;
+		size_t delta_inverse_mask = ZU(-1) << lg_delta;
 		szind_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) &
 		    ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
 

From 0720192a323f5dd2dd27828c6ab3061f8f039416 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 26 Sep 2017 13:45:21 -0700
Subject: [PATCH 0993/2608] Add runtime detection of lazy purging support.

It's possible to build with lazy purge enabled but depoly to systems without
such support.  In this case, rely on the boot time detection instead of keep
making unnecessary madvise calls (which all returns EINVAL).
---
 src/pages.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/src/pages.c b/src/pages.c
index 4ca3107d..84691883 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -27,6 +27,9 @@ static bool	os_overcommits;
 
 bool thp_state_madvise;
 
+/* Runtime support for lazy purge. Irrelevant when !pages_can_purge_lazy. */
+static bool pages_can_purge_lazy_runtime = true;
+
 /******************************************************************************/
 /*
  * Function prototypes for static functions that are referenced prior to
@@ -254,6 +257,13 @@ pages_purge_lazy(void *addr, size_t size) {
 	if (!pages_can_purge_lazy) {
 		return true;
 	}
+	if (!pages_can_purge_lazy_runtime) {
+		/*
+		 * Built with lazy purge enabled, but detected it was not
+		 * supported on the current system.
+		 */
+		return true;
+	}
 
 #ifdef _WIN32
 	VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE);
@@ -491,5 +501,19 @@ pages_boot(void) {
 
 	init_thp_state();
 
+	/* Detect lazy purge runtime support. */
+	if (pages_can_purge_lazy) {
+		bool committed = false;
+		void *madv_free_page = os_pages_map(NULL, PAGE, PAGE, &committed);
+		if (madv_free_page == NULL) {
+			return true;
+		}
+		assert(pages_can_purge_lazy_runtime);
+		if (pages_purge_lazy(madv_free_page, PAGE)) {
+			pages_can_purge_lazy_runtime = false;
+		}
+		os_pages_unmap(madv_free_page, PAGE);
+	}
+
 	return false;
 }

From 7a8bc7172b17e219b3603e99c8da44efb283e652 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 29 Sep 2017 13:54:08 -0700
Subject: [PATCH 0994/2608] ARM: Don't extend bit LG_VADDR to compute high
 address bits.

In userspace ARM on Linux, zero-ing the high bits is the correct way to do this.
This doesn't fix the fact that we currently set LG_VADDR to 48 on ARM, when in
fact larger virtual address sizes are coming soon.  We'll cross that bridge when
we come to it.
---
 include/jemalloc/internal/rtree.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index b5d4db39..4563db23 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -178,9 +178,21 @@ rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 
 JEMALLOC_ALWAYS_INLINE extent_t *
 rtree_leaf_elm_bits_extent_get(uintptr_t bits) {
+#    ifdef __aarch64__
+	/*
+	 * aarch64 doesn't sign extend the highest virtual address bit to set
+	 * the higher ones.  Instead, the high bits gets zeroed.
+	 */
+	uintptr_t high_bit_mask = ((uintptr_t)1 << LG_VADDR) - 1;
+	/* Mask off the slab bit. */
+	uintptr_t low_bit_mask = ~(uintptr_t)1;
+	uintptr_t mask = high_bit_mask & low_bit_mask;
+	return (extent_t *)(bits & mask);
+#    else
 	/* Restore sign-extended high bits, mask slab bit. */
 	return (extent_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB) >>
 	    RTREE_NHIB) & ~((uintptr_t)0x1));
+#    endif
 }
 
 JEMALLOC_ALWAYS_INLINE szind_t

From 8a7ee3014cea09e13e605bf47c11943df5a5eb2b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 2 Oct 2017 17:48:03 -0700
Subject: [PATCH 0995/2608] Logging: capitalize log macro.

Dodge a name-conflict with the math.h logarithm function. D'oh.
---
 include/jemalloc/internal/log.h | 33 +++++-------
 src/jemalloc.c                  | 96 ++++++++++++++++-----------------
 test/unit/log.c                 |  2 +-
 3 files changed, 61 insertions(+), 70 deletions(-)

diff --git a/include/jemalloc/internal/log.h b/include/jemalloc/internal/log.h
index 9f32fb4f..64208586 100644
--- a/include/jemalloc/internal/log.h
+++ b/include/jemalloc/internal/log.h
@@ -14,30 +14,21 @@
 #define JEMALLOC_LOG_BUFSIZE 4096
 
 /*
- * The log_vars malloc_conf option is a '|'-delimited list of log_var name
- * segments to log.  The log_var names are themselves hierarchical, with '.' as
+ * The log malloc_conf option is a '|'-delimited list of log_var name segments
+ * which should be logged.  The names are themselves hierarchical, with '.' as
  * the delimiter (a "segment" is just a prefix in the log namespace).  So, if
  * you have:
  *
- * static log_var_t log_arena = LOG_VAR_INIT("arena"); // 1
- * static log_var_t log_arena_a = LOG_VAR_INIT("arena.a"); // 2
- * static log_var_t log_arena_b = LOG_VAR_INIT("arena.b"); // 3
- * static log_var_t log_arena_a_a = LOG_VAR_INIT("arena.a.a"); // 4
- * static_log_var_t log_extent_a = LOG_VAR_INIT("extent.a"); // 5
- * static_log_var_t log_extent_b = LOG_VAR_INIT("extent.b"); // 6
+ * log("arena", "log msg for arena"); // 1
+ * log("arena.a", "log msg for arena.a"); // 2
+ * log("arena.b", "log msg for arena.b"); // 3
+ * log("arena.a.a", "log msg for arena.a.a"); // 4
+ * log("extent.a", "log msg for extent.a"); // 5
+ * log("extent.b", "log msg for extent.b"); // 6
  *
- * And your malloc_conf option is "log_vars=arena.a|extent", then log_vars 2, 4,
- * 5, and 6 will be enabled.  You can enable logging from all log vars by
- * writing "log_vars=.".
- *
- * You can then log by writing:
- *   log(log_var, "format string -- my int is %d", my_int);
- *
- * The namespaces currently in use:
- *   core.[malloc|free|posix_memalign|...].[entry|exit]:
- *       The entry/exit points of the functions publicly exposed by jemalloc.
- *       The "entry" variants try to log arguments to the functions, and the
- *       "exit" ones try to log return values.
+ * And your malloc_conf option is "log=arena.a|extent", then lines 2, 4, 5, and
+ * 6 will print at runtime.  You can enable logging from all log vars by
+ * writing "log=.".
  *
  * None of this should be regarded as a stable API for right now.  It's intended
  * as a debugging interface, to let us keep around some of our printf-debugging
@@ -113,7 +104,7 @@ log_impl_varargs(const char *name, ...) {
 }
 
 /* Call as log("log.var.str", "format_string %d", arg_for_format_string); */
-#define log(log_var_str, ...)						\
+#define LOG(log_var_str, ...)						\
 do {									\
 	static log_var_t log_var = LOG_VAR_INIT(log_var_str);		\
 	log_do_begin(log_var)						\
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 4c31a2d2..28e604bb 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1192,7 +1192,7 @@ malloc_conf_init(void) {
 				CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak")
 			}
 			if (config_log) {
-				if (CONF_MATCH("log_vars")) {
+				if (CONF_MATCH("log")) {
 					size_t cpylen = (
 					    vlen <= sizeof(log_var_names) ?
 					    vlen : sizeof(log_var_names) - 1);
@@ -1991,7 +1991,7 @@ je_malloc(size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
-	log("core.malloc.entry", "size: %zu", size);
+	LOG("core.malloc.entry", "size: %zu", size);
 
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
@@ -2007,7 +2007,7 @@ je_malloc(size_t size) {
 
 	imalloc(&sopts, &dopts);
 
-	log("core.malloc.exit", "result: %p", ret);
+	LOG("core.malloc.exit", "result: %p", ret);
 
 	return ret;
 }
@@ -2019,7 +2019,7 @@ je_posix_memalign(void **memptr, size_t alignment, size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
-	log("core.posix_memalign.entry", "mem ptr: %p, alignment: %zu, "
+	LOG("core.posix_memalign.entry", "mem ptr: %p, alignment: %zu, "
 	    "size: %zu", memptr, alignment, size);
 
 	static_opts_init(&sopts);
@@ -2039,7 +2039,7 @@ je_posix_memalign(void **memptr, size_t alignment, size_t size) {
 
 	ret = imalloc(&sopts, &dopts);
 
-	log("core.posix_memalign.exit", "result: %d, alloc ptr: %p", ret,
+	LOG("core.posix_memalign.exit", "result: %d, alloc ptr: %p", ret,
 	    *memptr);
 
 	return ret;
@@ -2054,7 +2054,7 @@ je_aligned_alloc(size_t alignment, size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
-	log("core.aligned_alloc.entry", "alignment: %zu, size: %zu\n",
+	LOG("core.aligned_alloc.entry", "alignment: %zu, size: %zu\n",
 	    alignment, size);
 
 	static_opts_init(&sopts);
@@ -2076,7 +2076,7 @@ je_aligned_alloc(size_t alignment, size_t size) {
 
 	imalloc(&sopts, &dopts);
 
-	log("core.aligned_alloc.exit", "result: %p", ret);
+	LOG("core.aligned_alloc.exit", "result: %p", ret);
 
 	return ret;
 }
@@ -2089,7 +2089,7 @@ je_calloc(size_t num, size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
-	log("core.calloc.entry", "num: %zu, size: %zu\n", num, size);
+	LOG("core.calloc.entry", "num: %zu, size: %zu\n", num, size);
 
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
@@ -2107,7 +2107,7 @@ je_calloc(size_t num, size_t size) {
 
 	imalloc(&sopts, &dopts);
 
-	log("core.calloc.exit", "result: %p", ret);
+	LOG("core.calloc.exit", "result: %p", ret);
 
 	return ret;
 }
@@ -2262,7 +2262,7 @@ je_realloc(void *ptr, size_t size) {
 	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 	size_t old_usize = 0;
 
-	log("core.realloc.entry", "ptr: %p, size: %zu\n", ptr, size);
+	LOG("core.realloc.entry", "ptr: %p, size: %zu\n", ptr, size);
 
 	if (unlikely(size == 0)) {
 		if (ptr != NULL) {
@@ -2277,7 +2277,7 @@ je_realloc(void *ptr, size_t size) {
 			}
 			ifree(tsd, ptr, tcache, true);
 
-			log("core.realloc.exit", "result: %p", NULL);
+			LOG("core.realloc.exit", "result: %p", NULL);
 			return NULL;
 		}
 		size = 1;
@@ -2311,7 +2311,7 @@ je_realloc(void *ptr, size_t size) {
 	} else {
 		/* realloc(NULL, size) is equivalent to malloc(size). */
 		void *ret = je_malloc(size);
-		log("core.realloc.exit", "result: %p", ret);
+		LOG("core.realloc.exit", "result: %p", ret);
 		return ret;
 	}
 
@@ -2334,13 +2334,13 @@ je_realloc(void *ptr, size_t size) {
 	UTRACE(ptr, size, ret);
 	check_entry_exit_locking(tsdn);
 
-	log("core.realloc.exit", "result: %p", ret);
+	LOG("core.realloc.exit", "result: %p", ret);
 	return ret;
 }
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_free(void *ptr) {
-	log("core.free.entry", "ptr: %p", ptr);
+	LOG("core.free.entry", "ptr: %p", ptr);
 
 	UTRACE(ptr, 0, 0);
 	if (likely(ptr != NULL)) {
@@ -2371,7 +2371,7 @@ je_free(void *ptr) {
 		}
 		check_entry_exit_locking(tsd_tsdn(tsd));
 	}
-	log("core.free.exit", "");
+	LOG("core.free.exit", "");
 }
 
 /*
@@ -2391,7 +2391,7 @@ je_memalign(size_t alignment, size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
-	log("core.memalign.entry", "alignment: %zu, size: %zu\n", alignment,
+	LOG("core.memalign.entry", "alignment: %zu, size: %zu\n", alignment,
 	    size);
 
 	static_opts_init(&sopts);
@@ -2412,7 +2412,7 @@ je_memalign(size_t alignment, size_t size) {
 
 	imalloc(&sopts, &dopts);
 
-	log("core.memalign.exit", "result: %p", ret);
+	LOG("core.memalign.exit", "result: %p", ret);
 	return ret;
 }
 #endif
@@ -2427,7 +2427,7 @@ je_valloc(size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
-	log("core.valloc.entry", "size: %zu\n", size);
+	LOG("core.valloc.entry", "size: %zu\n", size);
 
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
@@ -2447,7 +2447,7 @@ je_valloc(size_t size) {
 
 	imalloc(&sopts, &dopts);
 
-	log("core.valloc.exit", "result: %p\n", ret);
+	LOG("core.valloc.exit", "result: %p\n", ret);
 	return ret;
 }
 #endif
@@ -2521,7 +2521,7 @@ je_mallocx(size_t size, int flags) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
-	log("core.mallocx.entry", "size: %zu, flags: %d", size, flags);
+	LOG("core.mallocx.entry", "size: %zu, flags: %d", size, flags);
 
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
@@ -2557,7 +2557,7 @@ je_mallocx(size_t size, int flags) {
 
 	imalloc(&sopts, &dopts);
 
-	log("core.mallocx.exit", "result: %p", ret);
+	LOG("core.mallocx.exit", "result: %p", ret);
 	return ret;
 }
 
@@ -2638,7 +2638,7 @@ je_rallocx(void *ptr, size_t size, int flags) {
 	arena_t *arena;
 	tcache_t *tcache;
 
-	log("core.rallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr,
+	LOG("core.rallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr,
 	    size, flags);
 
 
@@ -2705,7 +2705,7 @@ je_rallocx(void *ptr, size_t size, int flags) {
 	UTRACE(ptr, size, p);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	log("core.rallocx.exit", "result: %p", p);
+	LOG("core.rallocx.exit", "result: %p", p);
 	return p;
 label_oom:
 	if (config_xmalloc && unlikely(opt_xmalloc)) {
@@ -2715,7 +2715,7 @@ label_oom:
 	UTRACE(ptr, size, 0);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	log("core.rallocx.exit", "result: %p", NULL);
+	LOG("core.rallocx.exit", "result: %p", NULL);
 	return NULL;
 }
 
@@ -2802,7 +2802,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	size_t alignment = MALLOCX_ALIGN_GET(flags);
 	bool zero = flags & MALLOCX_ZERO;
 
-	log("core.xallocx.entry", "ptr: %p, size: %zu, extra: %zu, "
+	LOG("core.xallocx.entry", "ptr: %p, size: %zu, extra: %zu, "
 	    "flags: %d", ptr, size, extra, flags);
 
 	assert(ptr != NULL);
@@ -2855,7 +2855,7 @@ label_not_resized:
 	UTRACE(ptr, size, ptr);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	log("core.xallocx.exit", "result: %zu", usize);
+	LOG("core.xallocx.exit", "result: %zu", usize);
 	return usize;
 }
 
@@ -2865,7 +2865,7 @@ je_sallocx(const void *ptr, int flags) {
 	size_t usize;
 	tsdn_t *tsdn;
 
-	log("core.sallocx.entry", "ptr: %p, flags: %d", ptr, flags);
+	LOG("core.sallocx.entry", "ptr: %p, flags: %d", ptr, flags);
 
 	assert(malloc_initialized() || IS_INITIALIZER);
 	assert(ptr != NULL);
@@ -2882,13 +2882,13 @@ je_sallocx(const void *ptr, int flags) {
 
 	check_entry_exit_locking(tsdn);
 
-	log("core.sallocx.exit", "result: %zu", usize);
+	LOG("core.sallocx.exit", "result: %zu", usize);
 	return usize;
 }
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_dallocx(void *ptr, int flags) {
-	log("core.dallocx.entry", "ptr: %p, flags: %d", ptr, flags);
+	LOG("core.dallocx.entry", "ptr: %p, flags: %d", ptr, flags);
 
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
@@ -2928,7 +2928,7 @@ je_dallocx(void *ptr, int flags) {
 	}
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	log("core.dallocx.exit", "");
+	LOG("core.dallocx.exit", "");
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
@@ -2950,7 +2950,7 @@ je_sdallocx(void *ptr, size_t size, int flags) {
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
-	log("core.sdallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr,
+	LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr,
 	    size, flags);
 
 	tsd_t *tsd = tsd_fetch();
@@ -2990,7 +2990,7 @@ je_sdallocx(void *ptr, size_t size, int flags) {
 	}
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	log("core.sdallocx.exit", "");
+	LOG("core.sdallocx.exit", "");
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
@@ -3002,7 +3002,7 @@ je_nallocx(size_t size, int flags) {
 	assert(size != 0);
 
 	if (unlikely(malloc_init())) {
-		log("core.nallocx.exit", "result: %zu", ZU(0));
+		LOG("core.nallocx.exit", "result: %zu", ZU(0));
 		return 0;
 	}
 
@@ -3011,12 +3011,12 @@ je_nallocx(size_t size, int flags) {
 
 	usize = inallocx(tsdn, size, flags);
 	if (unlikely(usize > LARGE_MAXCLASS)) {
-		log("core.nallocx.exit", "result: %zu", ZU(0));
+		LOG("core.nallocx.exit", "result: %zu", ZU(0));
 		return 0;
 	}
 
 	check_entry_exit_locking(tsdn);
-	log("core.nallocx.exit", "result: %zu", usize);
+	LOG("core.nallocx.exit", "result: %zu", usize);
 	return usize;
 }
 
@@ -3026,10 +3026,10 @@ je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
 	int ret;
 	tsd_t *tsd;
 
-	log("core.mallctl.entry", "name: %s", name);
+	LOG("core.mallctl.entry", "name: %s", name);
 
 	if (unlikely(malloc_init())) {
-		log("core.mallctl.exit", "result: %d", EAGAIN);
+		LOG("core.mallctl.exit", "result: %d", EAGAIN);
 		return EAGAIN;
 	}
 
@@ -3038,7 +3038,7 @@ je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
 	ret = ctl_byname(tsd, name, oldp, oldlenp, newp, newlen);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	log("core.mallctl.exit", "result: %d", ret);
+	LOG("core.mallctl.exit", "result: %d", ret);
 	return ret;
 }
 
@@ -3046,10 +3046,10 @@ JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) {
 	int ret;
 
-	log("core.mallctlnametomib.entry", "name: %s", name);
+	LOG("core.mallctlnametomib.entry", "name: %s", name);
 
 	if (unlikely(malloc_init())) {
-		log("core.mallctlnametomib.exit", "result: %d", EAGAIN);
+		LOG("core.mallctlnametomib.exit", "result: %d", EAGAIN);
 		return EAGAIN;
 	}
 
@@ -3058,7 +3058,7 @@ je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) {
 	ret = ctl_nametomib(tsd, name, mibp, miblenp);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	log("core.mallctlnametomib.exit", "result: %d", ret);
+	LOG("core.mallctlnametomib.exit", "result: %d", ret);
 	return ret;
 }
 
@@ -3068,10 +3068,10 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	int ret;
 	tsd_t *tsd;
 
-	log("core.mallctlbymib.entry", "");
+	LOG("core.mallctlbymib.entry", "");
 
 	if (unlikely(malloc_init())) {
-		log("core.mallctlbymib.exit", "result: %d", EAGAIN);
+		LOG("core.mallctlbymib.exit", "result: %d", EAGAIN);
 		return EAGAIN;
 	}
 
@@ -3079,7 +3079,7 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	check_entry_exit_locking(tsd_tsdn(tsd));
 	ret = ctl_bymib(tsd, mib, miblen, oldp, oldlenp, newp, newlen);
 	check_entry_exit_locking(tsd_tsdn(tsd));
-	log("core.mallctlbymib.exit", "result: %d", ret);
+	LOG("core.mallctlbymib.exit", "result: %d", ret);
 	return ret;
 }
 
@@ -3088,13 +3088,13 @@ je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
     const char *opts) {
 	tsdn_t *tsdn;
 
-	log("core.malloc_stats_print.entry", "");
+	LOG("core.malloc_stats_print.entry", "");
 
 	tsdn = tsdn_fetch();
 	check_entry_exit_locking(tsdn);
 	stats_print(write_cb, cbopaque, opts);
 	check_entry_exit_locking(tsdn);
-	log("core.malloc_stats_print.exit", "");
+	LOG("core.malloc_stats_print.exit", "");
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
@@ -3102,7 +3102,7 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) {
 	size_t ret;
 	tsdn_t *tsdn;
 
-	log("core.malloc_usable_size.entry", "ptr: %p", ptr);
+	LOG("core.malloc_usable_size.entry", "ptr: %p", ptr);
 
 	assert(malloc_initialized() || IS_INITIALIZER);
 
@@ -3121,7 +3121,7 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) {
 	}
 
 	check_entry_exit_locking(tsdn);
-	log("core.malloc_usable_size.exit", "result: %zu", ret);
+	LOG("core.malloc_usable_size.exit", "result: %zu", ret);
 	return ret;
 }
 
diff --git a/test/unit/log.c b/test/unit/log.c
index 3c1a208c..a52bd737 100644
--- a/test/unit/log.c
+++ b/test/unit/log.c
@@ -176,7 +176,7 @@ TEST_END
  */
 TEST_BEGIN(test_log_only_format_string) {
 	if (false) {
-		log("log_str", "No arguments follow this format string.");
+		LOG("log_str", "No arguments follow this format string.");
 	}
 }
 TEST_END

From 7c6c99b8295829580c506067495a23c07436e266 Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Tue, 26 Sep 2017 17:22:01 -0700
Subject: [PATCH 0996/2608] Use ph instead of rb tree for extents_avail_

There does not seem to be any overlap between usage of
extent_avail and extent_heap, so we can use the same hook.

The only remaining usage of rb trees is in the profiling code,
which has some 'interesting' iteration constraints.

Fixes #888
---
 include/jemalloc/internal/extent_externs.h |  1 -
 include/jemalloc/internal/extent_structs.h | 28 ++++++++++------------
 src/extent.c                               |  2 +-
 3 files changed, 14 insertions(+), 17 deletions(-)

diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index 489a813c..9da5d010 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -4,7 +4,6 @@
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/mutex_pool.h"
 #include "jemalloc/internal/ph.h"
-#include "jemalloc/internal/rb.h"
 #include "jemalloc/internal/rtree.h"
 
 extern rtree_t			extents_rtree;
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index d2979503..641a6325 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -5,7 +5,6 @@
 #include "jemalloc/internal/bitmap.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/ql.h"
-#include "jemalloc/internal/rb.h"
 #include "jemalloc/internal/ph.h"
 #include "jemalloc/internal/size_classes.h"
 
@@ -120,20 +119,19 @@ struct extent_s {
 		size_t			e_bsize;
 	};
 
-	union {
-		/*
-		 * List linkage, used by a variety of lists:
-		 * - arena_bin_t's slabs_full
-		 * - extents_t's LRU
-		 * - stashed dirty extents
-		 * - arena's large allocations
-		 */
-		ql_elm(extent_t)	ql_link;
-		/* Red-black tree linkage, used by arena's extent_avail. */
-		rb_node(extent_t)	rb_link;
-	};
+	/*
+	 * List linkage, used by a variety of lists:
+	 * - arena_bin_t's slabs_full
+	 * - extents_t's LRU
+	 * - stashed dirty extents
+	 * - arena's large allocations
+	 */
+	ql_elm(extent_t)	ql_link;
 
-	/* Linkage for per size class sn/address-ordered heaps. */
+	/* 
+	 * Linkage for per size class sn/address-ordered heaps, and
+	 * for extent_avail
+	 */
 	phn(extent_t)		ph_link;
 
 	union {
@@ -148,7 +146,7 @@ struct extent_s {
 	};
 };
 typedef ql_head(extent_t) extent_list_t;
-typedef rb_tree(extent_t) extent_tree_t;
+typedef ph(extent_t) extent_tree_t;
 typedef ph(extent_t) extent_heap_t;
 
 /* Quantized collection of extents, with built-in LRU queue. */
diff --git a/src/extent.c b/src/extent.c
index 3f1c76fd..221c80c0 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -117,7 +117,7 @@ static void extent_record(tsdn_t *tsdn, arena_t *arena,
 
 /******************************************************************************/
 
-rb_gen(UNUSED, extent_avail_, extent_tree_t, extent_t, rb_link,
+ph_gen(UNUSED, extent_avail_, extent_tree_t, extent_t, ph_link,
     extent_esnead_comp)
 
 typedef enum {

From 1245faae9052350a96dbcb22de7979bca566dbec Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 3 Oct 2017 18:03:02 -0700
Subject: [PATCH 0997/2608] Power: disable the CPU_SPINWAIT macro.

Quoting from https://github.com/jemalloc/jemalloc/issues/761 :

[...] reading the Power ISA documentation[1], the assembly in [the CPU_SPINWAIT
macro] isn't correct anyway (as @marxin points out): the setting of the
program-priority register is "sticky", and we never undo the lowering.

We could do something similar, but given that we don't have testing here in the
first place, I'm inclined to simply not try. I'll put something up reverting the
problematic commit tomorrow.

[1] Book II, chapter 3 of the 2.07B or 3.0B ISA documents.
---
 configure.ac                                         |  7 +++----
 .../jemalloc/internal/jemalloc_internal_defs.h.in    |  2 ++
 include/jemalloc/internal/spin.h                     | 12 +++++++++++-
 src/mutex.c                                          |  3 ++-
 4 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/configure.ac b/configure.ac
index ab2f41ae..f957377b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -381,6 +381,7 @@ dnl CPU-specific settings.
 CPU_SPINWAIT=""
 case "${host_cpu}" in
   i686|x86_64)
+	HAVE_CPU_SPINWAIT=1
 	if test "x${je_cv_msvc}" = "xyes" ; then
 	    AC_CACHE_VAL([je_cv_pause_msvc],
 	      [JE_COMPILABLE([pause instruction MSVC], [],
@@ -399,13 +400,11 @@ case "${host_cpu}" in
 	    fi
 	fi
 	;;
-  powerpc*)
-	AC_DEFINE_UNQUOTED([HAVE_ALTIVEC], [ ])
-	CPU_SPINWAIT='__asm__ volatile("or 31,31,31")'
-	;;
   *)
+	HAVE_CPU_SPINWAIT=0
 	;;
 esac
+AC_DEFINE_UNQUOTED([HAVE_CPU_SPINWAIT], [$HAVE_CPU_SPINWAIT])
 AC_DEFINE_UNQUOTED([CPU_SPINWAIT], [$CPU_SPINWAIT])
 
 case "${host_cpu}" in
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 5fa7f51f..31262fb2 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -33,6 +33,8 @@
  * order to yield to another virtual CPU.
  */
 #undef CPU_SPINWAIT
+/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */
+#undef HAVE_CPU_SPINWAIT
 
 /*
  * Number of significant bits in virtual addresses.  This may be less than the
diff --git a/include/jemalloc/internal/spin.h b/include/jemalloc/internal/spin.h
index aded0fcc..22804c68 100644
--- a/include/jemalloc/internal/spin.h
+++ b/include/jemalloc/internal/spin.h
@@ -7,13 +7,23 @@ typedef struct {
 	unsigned iteration;
 } spin_t;
 
+static inline void
+spin_cpu_spinwait() {
+#  if HAVE_CPU_SPINWAIT
+	CPU_SPINWAIT;
+#  else
+	volatile int x = 0;
+	x = x;
+#  endif
+}
+
 static inline void
 spin_adaptive(spin_t *spin) {
 	volatile uint32_t i;
 
 	if (spin->iteration < 5) {
 		for (i = 0; i < (1U << spin->iteration); i++) {
-			CPU_SPINWAIT;
+			spin_cpu_spinwait();
 		}
 		spin->iteration++;
 	} else {
diff --git a/src/mutex.c b/src/mutex.c
index a528ef0c..3de7f44a 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -4,6 +4,7 @@
 
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/malloc_io.h"
+#include "jemalloc/internal/spin.h"
 
 #ifndef _CRT_SPINCOUNT
 #define _CRT_SPINCOUNT 4000
@@ -53,7 +54,7 @@ malloc_mutex_lock_slow(malloc_mutex_t *mutex) {
 
 	int cnt = 0, max_cnt = MALLOC_MUTEX_MAX_SPIN;
 	do {
-		CPU_SPINWAIT;
+		spin_cpu_spinwait();
 		if (!malloc_mutex_trylock_final(mutex)) {
 			data->n_spin_acquired++;
 			return;

From 79e83451ff262fbc4bf66059eae672286b5eb9f0 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 4 Oct 2017 18:41:51 -0700
Subject: [PATCH 0998/2608] Enable a0 metadata thp on the 3rd base block.

Since we allocate rtree nodes from a0's base, it's pushed to over 1 block on
initialization right away, which makes the auto thp mode less effective on a0.
We change a0 to make the switch on the 3rd block instead.
---
 src/base.c | 85 ++++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 64 insertions(+), 21 deletions(-)

diff --git a/src/base.c b/src/base.c
index 609a445b..c6db425d 100644
--- a/src/base.c
+++ b/src/base.c
@@ -126,10 +126,58 @@ base_extent_init(size_t *extent_sn_next, extent_t *extent, void *addr,
 }
 
 static bool
-base_is_single_block(base_t *base) {
-	assert(base->blocks != NULL &&
-	    (base->blocks->size & HUGEPAGE_MASK) == 0);
-	return (base->blocks->next == NULL);
+base_auto_thp_triggered(base_t *base, bool with_new_block) {
+	assert(opt_metadata_thp == metadata_thp_auto);
+	base_block_t *b1 = base->blocks;
+	assert(b1 != NULL);
+
+	base_block_t *b2 = b1->next;
+	if (base_ind_get(base) != 0) {
+		return with_new_block ? true: b2 != NULL;
+	}
+
+	base_block_t *b3 = (b2 != NULL) ? b2->next : NULL;
+	return with_new_block ? b2 != NULL : b3 != NULL;
+}
+
+static void
+base_auto_thp_switch(base_t *base) {
+	assert(opt_metadata_thp == metadata_thp_auto);
+
+	base_block_t *b1 = base->blocks;
+	assert(b1 != NULL);
+	base_block_t *b2 = b1->next;
+
+	/* Called when adding a new block. */
+	bool should_switch;
+	if (base_ind_get(base) != 0) {
+		/* Makes the switch on the 2nd block. */
+		should_switch = (b2 == NULL);
+	} else {
+		/*
+		 * a0 switches to thp on the 3rd block, since rtree nodes are
+		 * allocated from a0 base, which takes an entire block on init.
+		 */
+		base_block_t *b3 = (b2 != NULL) ? b2->next :
+			NULL;
+		should_switch = (b2 != NULL) && (b3 == NULL);
+	}
+	if (!should_switch) {
+		return;
+	}
+
+	assert(base->n_thp == 0);
+	/* Make the initial blocks THP lazily. */
+	base_block_t *block = base->blocks;
+	while (block != NULL) {
+		assert((block->size & HUGEPAGE_MASK) == 0);
+		pages_huge(block, block->size);
+		if (config_stats) {
+			base->n_thp += block->size >> LG_HUGEPAGE;
+		}
+		block = block->next;
+		assert(block == NULL || (base_ind_get(base) == 0));
+	}
 }
 
 static void *
@@ -174,8 +222,8 @@ base_extent_bump_alloc_post(tsdn_t *tsdn, base_t *base, extent_t *extent,
 		    PAGE_CEILING((uintptr_t)addr - gap_size);
 		assert(base->allocated <= base->resident);
 		assert(base->resident <= base->mapped);
-		if (metadata_thp_madvise() && (!base_is_single_block(base) ||
-		    opt_metadata_thp == metadata_thp_always)) {
+		if (metadata_thp_madvise() && (opt_metadata_thp ==
+		    metadata_thp_always || base_auto_thp_triggered(base, false))) {
 			base->n_thp += (HUGEPAGE_CEILING((uintptr_t)addr + size)
 			    - HUGEPAGE_CEILING((uintptr_t)addr - gap_size)) >>
 			    LG_HUGEPAGE;
@@ -233,21 +281,15 @@ base_block_alloc(tsdn_t *tsdn, base_t *base, extent_hooks_t *extent_hooks,
 		void *addr = (void *)block;
 		assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
 		    (block_size & HUGEPAGE_MASK) == 0);
-		/* base == NULL indicates this is a new base. */
-		if (base != NULL || opt_metadata_thp == metadata_thp_always) {
-			/* Use hugepage for the new block. */
+		if (opt_metadata_thp == metadata_thp_always) {
 			pages_huge(addr, block_size);
-		}
-		if (base != NULL && base_is_single_block(base) &&
-		    opt_metadata_thp == metadata_thp_auto) {
-			/* Make the first block THP lazily. */
-			base_block_t *first_block = base->blocks;
-			assert((first_block->size & HUGEPAGE_MASK) == 0);
-			pages_huge(first_block, first_block->size);
-			if (config_stats) {
-				assert(base->n_thp == 0);
-				base->n_thp += first_block->size >> LG_HUGEPAGE;
+		} else if (opt_metadata_thp == metadata_thp_auto &&
+		    base != NULL) {
+			/* base != NULL indicates this is not a new base. */
+			if (base_auto_thp_triggered(base, true)) {
+				pages_huge(addr, block_size);
 			}
+			base_auto_thp_switch(base);
 		}
 	}
 
@@ -287,8 +329,9 @@ base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
 		base->allocated += sizeof(base_block_t);
 		base->resident += PAGE_CEILING(sizeof(base_block_t));
 		base->mapped += block->size;
-		if (metadata_thp_madvise()) {
-			assert(!base_is_single_block(base));
+		if (metadata_thp_madvise() &&
+		    !(opt_metadata_thp == metadata_thp_auto
+		      && !base_auto_thp_triggered(base, false))) {
 			assert(base->n_thp > 0);
 			base->n_thp += HUGEPAGE_CEILING(sizeof(base_block_t)) >>
 			    LG_HUGEPAGE;

From a2e6eb2c226ff63397220517883e13717f97da05 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 4 Oct 2017 16:39:33 -0700
Subject: [PATCH 0999/2608] Delay background_thread_ctl_init to right before
 thread creation.

ctl_init sets isthreaded, which means it should be done without holding any
locks.
---
 src/background_thread.c | 3 ---
 src/jemalloc.c          | 7 ++++++-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/background_thread.c b/src/background_thread.c
index eb30eb5b..609be520 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -848,9 +848,6 @@ background_thread_boot1(tsdn_t *tsdn) {
 	    malloc_mutex_rank_exclusive)) {
 		return true;
 	}
-	if (opt_background_thread) {
-		background_thread_ctl_init(tsdn);
-	}
 
 	background_thread_info = (background_thread_info_t *)base_alloc(tsdn,
 	    b0get(), ncpus * sizeof(background_thread_info_t), CACHELINE);
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 28e604bb..f29fc7da 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1522,6 +1522,8 @@ malloc_init_hard(void) {
 	post_reentrancy(tsd);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
 
+	witness_assert_lockless(witness_tsd_tsdn(
+	    tsd_witness_tsdp_get_unsafe(tsd)));
 	malloc_tsd_boot1();
 	/* Update TSD after tsd_boot1. */
 	tsd = tsd_fetch();
@@ -1529,8 +1531,11 @@ malloc_init_hard(void) {
 		assert(have_background_thread);
 		/*
 		 * Need to finish init & unlock first before creating background
-		 * threads (pthread_create depends on malloc).
+		 * threads (pthread_create depends on malloc).  ctl_init (which
+		 * sets isthreaded) needs to be called without holding any lock.
 		 */
+		background_thread_ctl_init(tsd_tsdn(tsd));
+
 		malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock);
 		bool err = background_thread_create(tsd, 0);
 		malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);

From 7e74093c96c019ce52aee9a03fc745647d79ca5f Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 5 Oct 2017 14:56:49 -0700
Subject: [PATCH 1000/2608] Set isthreaded manually.

Avoid relying pthread_once which creates dependency during init.
---
 src/background_thread.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/background_thread.c b/src/background_thread.c
index 609be520..6baff22b 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -30,19 +30,20 @@ bool can_enable_background_thread;
 
 static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
     void *(*)(void *), void *__restrict);
-static pthread_once_t once_control = PTHREAD_ONCE_INIT;
 
 static void
-pthread_create_wrapper_once(void) {
+pthread_create_wrapper_init(void) {
 #ifdef JEMALLOC_LAZY_LOCK
-	isthreaded = true;
+	if (!isthreaded) {
+		isthreaded = true;
+	}
 #endif
 }
 
 int
 pthread_create_wrapper(pthread_t *__restrict thread, const pthread_attr_t *attr,
     void *(*start_routine)(void *), void *__restrict arg) {
-	pthread_once(&once_control, pthread_create_wrapper_once);
+	pthread_create_wrapper_init();
 
 	return pthread_create_fptr(thread, attr, start_routine, arg);
 }
@@ -805,7 +806,7 @@ void
 background_thread_ctl_init(tsdn_t *tsdn) {
 	malloc_mutex_assert_not_owner(tsdn, &background_thread_lock);
 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
-	pthread_once(&once_control, pthread_create_wrapper_once);
+	pthread_create_wrapper_init();
 #endif
 }
 

From fc83de0384a2ad87cf5059d4345acf014c77e6e4 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 6 Oct 2017 15:51:35 -0700
Subject: [PATCH 1001/2608] Document the potential issues about
 opt.background_thread.

---
 doc/jemalloc.xml.in | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index dda9a733..8151b5ba 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1010,9 +1010,12 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
           (<type>const bool</type>)
           <literal>r-</literal>
         </term>
-        <listitem><para>Internal background worker threads enabled/disabled. See
-        <link linkend="background_thread">background_thread</link> for dynamic
-        control options and details.  This option is disabled by
+        <listitem><para>Internal background worker threads enabled/disabled.
+        Because of potential circular dependencies, enabling background thread
+        using this option may cause crash or deadlock during initialization. For
+        a reliable way to use this feature, see <link
+        linkend="background_thread">background_thread</link> for dynamic control
+        options and details.  This option is disabled by
         default.</para></listitem>
       </varlistentry>
 

From 31ab38be5f3c4b826db89ff3cd4f32f988747f06 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 5 Oct 2017 16:28:55 -0700
Subject: [PATCH 1002/2608] Define MADV_FREE on our own when needed.

On x86 Linux, we define our own MADV_FREE if madvise(2) is available, but no
MADV_FREE is detected.  This allows the feature to be built in and enabled with
runtime detection.
---
 configure.ac                                          | 9 +++++++++
 include/jemalloc/internal/jemalloc_internal_defs.h.in | 3 +++
 include/jemalloc/internal/jemalloc_preamble.h.in      | 4 ++++
 src/pages.c                                           | 8 +++++++-
 4 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index f957377b..b4c66fb8 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1824,6 +1824,15 @@ if test "x${je_cv_madvise}" = "xyes" ; then
 ], [je_cv_madv_free])
   if test "x${je_cv_madv_free}" = "xyes" ; then
     AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
+  elif test "x${je_cv_madvise}" = "xyes" ; then
+    case "${host_cpu}" in i686|x86_64)
+        case "${host}" in *-*-linux*)
+            AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
+            AC_DEFINE([JEMALLOC_DEFINE_MADVISE_FREE], [ ])
+	    ;;
+        esac
+        ;;
+    esac
   fi
 
   dnl Check for madvise(..., MADV_DONTNEED).
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 31262fb2..b56f21f8 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -285,6 +285,9 @@
 #undef JEMALLOC_PURGE_MADVISE_DONTNEED
 #undef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS
 
+/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */
+#undef JEMALLOC_DEFINE_MADVISE_FREE
+
 /*
  * Defined if transparent huge pages (THPs) are supported via the
  * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled.
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index f6ed731c..f81f3a40 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -47,6 +47,10 @@
 #endif
 #include "jemalloc/internal/hooks.h"
 
+#ifdef JEMALLOC_DEFINE_MADVISE_FREE
+#  define JEMALLOC_MADV_FREE 8
+#endif
+
 static const bool config_debug =
 #ifdef JEMALLOC_DEBUG
     true
diff --git a/src/pages.c b/src/pages.c
index 84691883..e8112f74 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -269,7 +269,13 @@ pages_purge_lazy(void *addr, size_t size) {
 	VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE);
 	return false;
 #elif defined(JEMALLOC_PURGE_MADVISE_FREE)
-	return (madvise(addr, size, MADV_FREE) != 0);
+	return (madvise(addr, size,
+#  ifdef MADV_FREE
+	    MADV_FREE
+#  else
+	    JEMALLOC_MADV_FREE
+#  endif
+	    ) != 0);
 #elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
     !defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
 	return (madvise(addr, size, MADV_DONTNEED) != 0);

From f4f814cd4cca4be270c22c4e943cd5ae6c40fea9 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 5 Oct 2017 16:32:28 -0700
Subject: [PATCH 1003/2608] Remove the default value for
 JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS.

---
 configure.ac | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index b4c66fb8..558489ce 100644
--- a/configure.ac
+++ b/configure.ac
@@ -561,7 +561,7 @@ case "${host}" in
 	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
 	JE_APPEND_VS(CPPFLAGS, -D_GNU_SOURCE)
 	abi="elf"
-	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS])
+	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS], [ ])
 	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
 	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ])
 	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
@@ -575,7 +575,7 @@ case "${host}" in
 	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
 	JE_APPEND_VS(CPPFLAGS, -D_GNU_SOURCE)
 	abi="elf"
-	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS])
+	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS], [ ])
 	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
 	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ])
 	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])

From 33df2fa1694c9fdc1912aecaa19babc194f377ac Mon Sep 17 00:00:00 2001
From: rustyx <me@rustyx.org>
Date: Mon, 16 Oct 2017 16:40:50 +0200
Subject: [PATCH 1004/2608] Fix MSVC 2015 project and add a VS 2017 solution

---
 .gitignore                                    |   2 +
 msvc/ReadMe.txt                               |   7 +-
 msvc/jemalloc_vc2017.sln                      |  63 ++++
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |   3 +
 .../vc2015/test_threads/test_threads.vcxproj  |   6 +-
 .../test_threads/test_threads.vcxproj.filters |   6 +-
 .../projects/vc2017/jemalloc/jemalloc.vcxproj | 345 ++++++++++++++++++
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |  95 +++++
 .../vc2017/test_threads/test_threads.vcxproj  | 326 +++++++++++++++++
 .../test_threads/test_threads.vcxproj.filters |  26 ++
 .../vc2015 => }/test_threads/test_threads.cpp |   0
 .../vc2015 => }/test_threads/test_threads.h   |   0
 .../test_threads/test_threads_main.cpp        |   0
 14 files changed, 870 insertions(+), 10 deletions(-)
 create mode 100644 msvc/jemalloc_vc2017.sln
 create mode 100644 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
 create mode 100644 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
 create mode 100644 msvc/projects/vc2017/test_threads/test_threads.vcxproj
 create mode 100644 msvc/projects/vc2017/test_threads/test_threads.vcxproj.filters
 rename msvc/{projects/vc2015 => }/test_threads/test_threads.cpp (100%)
 rename msvc/{projects/vc2015 => }/test_threads/test_threads.h (100%)
 rename msvc/{projects/vc2015 => }/test_threads/test_threads_main.cpp (100%)

diff --git a/.gitignore b/.gitignore
index a25aaf7e..19199ccb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -77,12 +77,14 @@ test/include/test/jemalloc_test_defs.h
 *.pdb
 *.sdf
 *.opendb
+*.VC.db
 *.opensdf
 *.cachefile
 *.suo
 *.user
 *.sln.docstates
 *.tmp
+.vs/
 /msvc/Win32/
 /msvc/x64/
 /msvc/projects/*/*/Debug*/
diff --git a/msvc/ReadMe.txt b/msvc/ReadMe.txt
index 77d567da..633a7d49 100644
--- a/msvc/ReadMe.txt
+++ b/msvc/ReadMe.txt
@@ -9,16 +9,15 @@ How to build jemalloc for Windows
    * grep
    * sed
 
-2. Install Visual Studio 2015 with Visual C++
+2. Install Visual Studio 2015 or 2017 with Visual C++
 
 3. Add Cygwin\bin to the PATH environment variable
 
-4. Open "VS2015 x86 Native Tools Command Prompt"
+4. Open "x64 Native Tools Command Prompt for VS 2017"
    (note: x86/x64 doesn't matter at this point)
 
 5. Generate header files:
    sh -c "CC=cl ./autogen.sh"
 
 6. Now the project can be opened and built in Visual Studio:
-   msvc\jemalloc_vc2015.sln
-
+   msvc\jemalloc_vc2017.sln
diff --git a/msvc/jemalloc_vc2017.sln b/msvc/jemalloc_vc2017.sln
new file mode 100644
index 00000000..c22fcb43
--- /dev/null
+++ b/msvc/jemalloc_vc2017.sln
@@ -0,0 +1,63 @@
+﻿
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 14
+VisualStudioVersion = 14.0.24720.0
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{70A99006-6DE9-472B-8F83-4CEE6C616DF3}"
+	ProjectSection(SolutionItems) = preProject
+		ReadMe.txt = ReadMe.txt
+	EndProjectSection
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "jemalloc", "projects\vc2017\jemalloc\jemalloc.vcxproj", "{8D6BB292-9E1C-413D-9F98-4864BDC1514A}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test_threads", "projects\vc2017\test_threads\test_threads.vcxproj", "{09028CFD-4EB7-491D-869C-0708DB97ED44}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|x64 = Debug|x64
+		Debug|x86 = Debug|x86
+		Debug-static|x64 = Debug-static|x64
+		Debug-static|x86 = Debug-static|x86
+		Release|x64 = Release|x64
+		Release|x86 = Release|x86
+		Release-static|x64 = Release-static|x64
+		Release-static|x86 = Release-static|x86
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x64.ActiveCfg = Debug|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x64.Build.0 = Debug|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x86.ActiveCfg = Debug|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x86.Build.0 = Debug|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x64.ActiveCfg = Debug-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x64.Build.0 = Debug-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x86.ActiveCfg = Debug-static|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x86.Build.0 = Debug-static|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x64.ActiveCfg = Release|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x64.Build.0 = Release|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x86.ActiveCfg = Release|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x86.Build.0 = Release|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x64.ActiveCfg = Release-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x64.Build.0 = Release-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x86.ActiveCfg = Release-static|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x86.Build.0 = Release-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x64.ActiveCfg = Debug|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x64.Build.0 = Debug|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x86.ActiveCfg = Debug|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x86.Build.0 = Debug|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x64.ActiveCfg = Debug-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x64.Build.0 = Debug-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x86.ActiveCfg = Debug-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x86.Build.0 = Debug-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x64.ActiveCfg = Release|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x64.Build.0 = Release|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x86.ActiveCfg = Release|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x86.Build.0 = Release|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x64.ActiveCfg = Release-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x64.Build.0 = Release-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x86.ActiveCfg = Release-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x86.Build.0 = Release-static|Win32
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 97f892e1..78f92c98 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -48,6 +48,7 @@
     <ClCompile Include="..\..\..\..\src\hooks.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
     <ClCompile Include="..\..\..\..\src\large.c" />
+    <ClCompile Include="..\..\..\..\src\log.c" />
     <ClCompile Include="..\..\..\..\src\malloc_io.c" />
     <ClCompile Include="..\..\..\..\src\mutex.c" />
     <ClCompile Include="..\..\..\..\src\mutex_pool.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index d2de135b..dba976ed 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -88,5 +88,8 @@
     <ClCompile Include="..\..\..\..\src\witness.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\log.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
 </Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2015/test_threads/test_threads.vcxproj b/msvc/projects/vc2015/test_threads/test_threads.vcxproj
index f5e9898f..325876d6 100644
--- a/msvc/projects/vc2015/test_threads/test_threads.vcxproj
+++ b/msvc/projects/vc2015/test_threads/test_threads.vcxproj
@@ -310,8 +310,8 @@
     </Link>
   </ItemDefinitionGroup>
   <ItemGroup>
-    <ClCompile Include="test_threads.cpp" />
-    <ClCompile Include="test_threads_main.cpp" />
+    <ClCompile Include="..\..\..\test_threads\test_threads.cpp" />
+    <ClCompile Include="..\..\..\test_threads\test_threads_main.cpp" />
   </ItemGroup>
   <ItemGroup>
     <ProjectReference Include="..\jemalloc\jemalloc.vcxproj">
@@ -319,7 +319,7 @@
     </ProjectReference>
   </ItemGroup>
   <ItemGroup>
-    <ClInclude Include="test_threads.h" />
+    <ClInclude Include="..\..\..\test_threads\test_threads.h" />
   </ItemGroup>
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
diff --git a/msvc/projects/vc2015/test_threads/test_threads.vcxproj.filters b/msvc/projects/vc2015/test_threads/test_threads.vcxproj.filters
index 4c233407..fa4588fd 100644
--- a/msvc/projects/vc2015/test_threads/test_threads.vcxproj.filters
+++ b/msvc/projects/vc2015/test_threads/test_threads.vcxproj.filters
@@ -11,15 +11,15 @@
     </Filter>
   </ItemGroup>
   <ItemGroup>
-    <ClCompile Include="test_threads.cpp">
+    <ClCompile Include="..\..\..\test_threads\test_threads.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="test_threads_main.cpp">
+    <ClCompile Include="..\..\..\test_threads\test_threads_main.cpp">
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
   <ItemGroup>
-    <ClInclude Include="test_threads.h">
+    <ClInclude Include="..\..\..\test_threads\test_threads.h">
       <Filter>Header Files</Filter>
     </ClInclude>
   </ItemGroup>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
new file mode 100644
index 00000000..e49dbbd6
--- /dev/null
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -0,0 +1,345 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug-static|Win32">
+      <Configuration>Debug-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug-static|x64">
+      <Configuration>Debug-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|Win32">
+      <Configuration>Release-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|x64">
+      <Configuration>Release-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\..\src\arena.c" />
+    <ClCompile Include="..\..\..\..\src\background_thread.c" />
+    <ClCompile Include="..\..\..\..\src\base.c" />
+    <ClCompile Include="..\..\..\..\src\bitmap.c" />
+    <ClCompile Include="..\..\..\..\src\ckh.c" />
+    <ClCompile Include="..\..\..\..\src\ctl.c" />
+    <ClCompile Include="..\..\..\..\src\extent.c" />
+    <ClCompile Include="..\..\..\..\src\extent_dss.c" />
+    <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
+    <ClCompile Include="..\..\..\..\src\hash.c" />
+    <ClCompile Include="..\..\..\..\src\hooks.c" />
+    <ClCompile Include="..\..\..\..\src\jemalloc.c" />
+    <ClCompile Include="..\..\..\..\src\large.c" />
+    <ClCompile Include="..\..\..\..\src\log.c" />
+    <ClCompile Include="..\..\..\..\src\malloc_io.c" />
+    <ClCompile Include="..\..\..\..\src\mutex.c" />
+    <ClCompile Include="..\..\..\..\src\mutex_pool.c" />
+    <ClCompile Include="..\..\..\..\src\nstime.c" />
+    <ClCompile Include="..\..\..\..\src\pages.c" />
+    <ClCompile Include="..\..\..\..\src\prng.c" />
+    <ClCompile Include="..\..\..\..\src\prof.c" />
+    <ClCompile Include="..\..\..\..\src\rtree.c" />
+    <ClCompile Include="..\..\..\..\src\stats.c" />
+    <ClCompile Include="..\..\..\..\src\sz.c" />
+    <ClCompile Include="..\..\..\..\src\tcache.c" />
+    <ClCompile Include="..\..\..\..\src\ticker.c" />
+    <ClCompile Include="..\..\..\..\src\tsd.c" />
+    <ClCompile Include="..\..\..\..\src\witness.c" />
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{8D6BB292-9E1C-413D-9F98-4864BDC1514A}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>jemalloc</RootNamespace>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)d</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-$(PlatformToolset)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-$(PlatformToolset)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)d</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-vc$(PlatformToolsetVersion)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-vc$(PlatformToolsetVersion)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <DebugInformationFormat>OldStyle</DebugInformationFormat>
+      <MinimalRebuild>false</MinimalRebuild>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>_REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <DebugInformationFormat>OldStyle</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
new file mode 100644
index 00000000..dba976ed
--- /dev/null
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -0,0 +1,95 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\..\src\arena.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\background_thread.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\base.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\bitmap.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ckh.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ctl.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\extent.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\extent_dss.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\extent_mmap.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hash.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hooks.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\jemalloc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\large.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\malloc_io.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\mutex.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\mutex_pool.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\nstime.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pages.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prng.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\rtree.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\stats.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\sz.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\tcache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ticker.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\tsd.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\witness.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\log.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2017/test_threads/test_threads.vcxproj b/msvc/projects/vc2017/test_threads/test_threads.vcxproj
new file mode 100644
index 00000000..c35b0f5a
--- /dev/null
+++ b/msvc/projects/vc2017/test_threads/test_threads.vcxproj
@@ -0,0 +1,326 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug-static|Win32">
+      <Configuration>Debug-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug-static|x64">
+      <Configuration>Debug-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|Win32">
+      <Configuration>Release-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|x64">
+      <Configuration>Release-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{09028CFD-4EB7-491D-869C-0708DB97ED44}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>test_threads</RootNamespace>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v141</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemallocd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>jemallocd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>jemalloc-vc$(PlatformToolsetVersion)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc-vc$(PlatformToolsetVersion)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\test_threads\test_threads.cpp" />
+    <ClCompile Include="..\..\..\test_threads\test_threads_main.cpp" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\jemalloc\jemalloc.vcxproj">
+      <Project>{8d6bb292-9e1c-413d-9f98-4864bdc1514a}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\test_threads\test_threads.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2017/test_threads/test_threads.vcxproj.filters b/msvc/projects/vc2017/test_threads/test_threads.vcxproj.filters
new file mode 100644
index 00000000..fa4588fd
--- /dev/null
+++ b/msvc/projects/vc2017/test_threads/test_threads.vcxproj.filters
@@ -0,0 +1,26 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\test_threads\test_threads.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\test_threads\test_threads_main.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\test_threads\test_threads.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2015/test_threads/test_threads.cpp b/msvc/test_threads/test_threads.cpp
similarity index 100%
rename from msvc/projects/vc2015/test_threads/test_threads.cpp
rename to msvc/test_threads/test_threads.cpp
diff --git a/msvc/projects/vc2015/test_threads/test_threads.h b/msvc/test_threads/test_threads.h
similarity index 100%
rename from msvc/projects/vc2015/test_threads/test_threads.h
rename to msvc/test_threads/test_threads.h
diff --git a/msvc/projects/vc2015/test_threads/test_threads_main.cpp b/msvc/test_threads/test_threads_main.cpp
similarity index 100%
rename from msvc/projects/vc2015/test_threads/test_threads_main.cpp
rename to msvc/test_threads/test_threads_main.cpp

From 5bad01c38ed0b1f647a6984c5f830b124cafdc94 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 28 Aug 2017 18:27:12 -0700
Subject: [PATCH 1005/2608] Document some of the internal extent functions.

---
 src/extent.c | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/src/extent.c b/src/extent.c
index 221c80c0..fd8eab69 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -723,6 +723,13 @@ extent_reregister(tsdn_t *tsdn, extent_t *extent) {
 	assert(!err);
 }
 
+/*
+ * Removes all pointers to the given extent from the global rtree indices for
+ * its interior.  This is relevant for slab extents, for which we need to do
+ * metadata lookups at places other than the head of the extent.  We deregister
+ * on the interior, then, when an extent moves from being an active slab to an
+ * inactive state.
+ */
 static void
 extent_interior_deregister(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
     extent_t *extent) {
@@ -737,6 +744,9 @@ extent_interior_deregister(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
 	}
 }
 
+/*
+ * Removes all pointers to the given extent from the global rtree.
+ */
 static void
 extent_deregister(tsdn_t *tsdn, extent_t *extent) {
 	rtree_ctx_t rtree_ctx_fallback;
@@ -760,6 +770,10 @@ extent_deregister(tsdn_t *tsdn, extent_t *extent) {
 	}
 }
 
+/*
+ * Tries to find and remove an extent from extents that can be used for the
+ * given allocation request.
+ */
 static extent_t *
 extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
@@ -832,6 +846,12 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
 	return extent;
 }
 
+/*
+ * This fulfills the indicated allocation request out of the given extent (which
+ * the caller should have ensured was big enough).  If there's any unused space
+ * before or after the resulting allocation, that space is given its own extent
+ * and put back into extents.
+ */
 static extent_t *
 extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
@@ -892,6 +912,10 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
 	return extent;
 }
 
+/*
+ * Tries to satisfy the given allocation request by reusing one of the extents
+ * in the given extents_t.
+ */
 static extent_t *
 extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
     extents_t *extents, void *new_addr, size_t size, size_t pad,
@@ -1442,6 +1466,10 @@ extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
 	return extent;
 }
 
+/*
+ * Does the metadata management portions of putting an unused extent into the
+ * given extents_t (coalesces, deregisters slab interiors, the heap operations).
+ */
 static void
 extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
     extents_t *extents, extent_t *extent, bool growing_retained) {
@@ -1800,6 +1828,13 @@ extent_split_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
 }
 #endif
 
+/*
+ * Accepts the extent to split, and the characteristics of each side of the
+ * split.  The 'a' parameters go with the 'lead' of the resulting pair of
+ * extents (the lower addressed portion of the split), and the 'b' parameters go
+ * with the trail (the higher addressed portion).  This makes 'extent' the lead,
+ * and returns the trail (except in case of error).
+ */
 static extent_t *
 extent_split_impl(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a,

From 211b1f3c7de23b1915f1ce8f9277e6c1ff60cfde Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 29 Aug 2017 16:50:57 -0700
Subject: [PATCH 1006/2608] Factor out extent-splitting core from extent
 lifetime management.

Before this commit, extent_recycle_split intermingles the splitting of an extent
and the return of parts of that extent to a given extents_t.  After it, that
logic is separated.  This will enable splitting extents that don't live in any
extents_t (as the grow retained region soon will).
---
 src/extent.c | 252 ++++++++++++++++++++++++++++++++-------------------
 1 file changed, 160 insertions(+), 92 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index fd8eab69..1dd1d1d1 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -846,6 +846,103 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
 	return extent;
 }
 
+/*
+ * Given an allocation request and an extent guaranteed to be able to satisfy
+ * it, this splits off lead and trail extents, leaving extent pointing to an
+ * extent satisfying the allocation.
+ * This function doesn't put lead or trail into any extents_t; it's the caller's
+ * job to ensure that they can be reused.
+ */
+typedef enum {
+	/*
+	 * Split successfully.  lead, extent, and trail, are modified to extents
+	 * describing the ranges before, in, and after the given allocation.
+	 */
+	extent_split_interior_ok,
+	/*
+	 * The extent can't satisfy the given allocation request.  None of the
+	 * input extent_t *s are touched.
+	 */
+	extent_split_interior_cant_alloc,
+	/*
+	 * In a potentially invalid state.  Must leak (if *to_leak is non-NULL),
+	 * and salvage what's still salvageable (if *to_salvage is non-NULL).
+	 * None of lead, extent, or trail are valid.
+	 */
+	extent_split_interior_error
+} extent_split_interior_result_t;
+
+static extent_split_interior_result_t
+extent_split_interior(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx,
+    /* The result of splitting, in case of success. */
+    extent_t **extent, extent_t **lead, extent_t **trail,
+    /* The mess to clean up, in case of error. */
+    extent_t **to_leak, extent_t **to_salvage,
+    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+    szind_t szind, bool growing_retained) {
+	size_t esize = size + pad;
+	size_t leadsize = ALIGNMENT_CEILING((uintptr_t)extent_base_get(*extent),
+	    PAGE_CEILING(alignment)) - (uintptr_t)extent_base_get(*extent);
+	assert(new_addr == NULL || leadsize == 0);
+	if (extent_size_get(*extent) < leadsize + esize) {
+		return extent_split_interior_cant_alloc;
+	}
+	size_t trailsize = extent_size_get(*extent) - leadsize - esize;
+
+	*lead = NULL;
+	*trail = NULL;
+	*to_leak = NULL;
+	*to_salvage = NULL;
+
+	/* Split the lead. */
+	if (leadsize != 0) {
+		*lead = *extent;
+		*extent = extent_split_impl(tsdn, arena, r_extent_hooks,
+		    *lead, leadsize, NSIZES, false, esize + trailsize, szind,
+		    slab, growing_retained);
+		if (*extent == NULL) {
+			*to_leak = *lead;
+			*lead = NULL;
+			return extent_split_interior_error;
+		}
+	}
+
+	/* Split the trail. */
+	if (trailsize != 0) {
+		*trail = extent_split_impl(tsdn, arena, r_extent_hooks, *extent,
+		    esize, szind, slab, trailsize, NSIZES, false,
+		    growing_retained);
+		if (*trail == NULL) {
+			*to_leak = *extent;
+			*to_salvage = *lead;
+			*lead = NULL;
+			*extent = NULL;
+			return extent_split_interior_error;
+		}
+	}
+
+	if (leadsize == 0 && trailsize == 0) {
+		/*
+		 * Splitting causes szind to be set as a side effect, but no
+		 * splitting occurred.
+		 */
+		extent_szind_set(*extent, szind);
+		if (szind != NSIZES) {
+			rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx,
+			    (uintptr_t)extent_addr_get(*extent), szind, slab);
+			if (slab && extent_size_get(*extent) > PAGE) {
+				rtree_szind_slab_update(tsdn, &extents_rtree,
+				    rtree_ctx,
+				    (uintptr_t)extent_past_get(*extent) -
+				    (uintptr_t)PAGE, szind, slab);
+			}
+		}
+	}
+
+	return extent_split_interior_ok;
+}
+
 /*
  * This fulfills the indicated allocation request out of the given extent (which
  * the caller should have ensured was big enough).  If there's any unused space
@@ -857,59 +954,40 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
     szind_t szind, extent_t *extent, bool growing_retained) {
-	size_t esize = size + pad;
-	size_t leadsize = ALIGNMENT_CEILING((uintptr_t)extent_base_get(extent),
-	    PAGE_CEILING(alignment)) - (uintptr_t)extent_base_get(extent);
-	assert(new_addr == NULL || leadsize == 0);
-	assert(extent_size_get(extent) >= leadsize + esize);
-	size_t trailsize = extent_size_get(extent) - leadsize - esize;
+	extent_t *lead;
+	extent_t *trail;
+	extent_t *to_leak;
+	extent_t *to_salvage;
 
-	/* Split the lead. */
-	if (leadsize != 0) {
-		extent_t *lead = extent;
-		extent = extent_split_impl(tsdn, arena, r_extent_hooks,
-		    lead, leadsize, NSIZES, false, esize + trailsize, szind,
-		    slab, growing_retained);
-		if (extent == NULL) {
-			extent_deregister(tsdn, lead);
-			extents_leak(tsdn, arena, r_extent_hooks, extents,
-			    lead, growing_retained);
-			return NULL;
-		}
-		extent_deactivate(tsdn, arena, extents, lead, false);
-	}
+	extent_split_interior_result_t result = extent_split_interior(
+	    tsdn, arena, r_extent_hooks, rtree_ctx, &extent, &lead, &trail,
+	    &to_leak, &to_salvage, new_addr, size, pad, alignment, slab, szind,
+	    growing_retained);
 
-	/* Split the trail. */
-	if (trailsize != 0) {
-		extent_t *trail = extent_split_impl(tsdn, arena,
-		    r_extent_hooks, extent, esize, szind, slab, trailsize,
-		    NSIZES, false, growing_retained);
-		if (trail == NULL) {
-			extent_deregister(tsdn, extent);
-			extents_leak(tsdn, arena, r_extent_hooks, extents,
-			    extent, growing_retained);
-			return NULL;
+	if (result == extent_split_interior_ok) {
+		if (lead != NULL) {
+			extent_deactivate(tsdn, arena, extents, lead, false);
 		}
-		extent_deactivate(tsdn, arena, extents, trail, false);
-	} else if (leadsize == 0) {
+		if (trail != NULL) {
+			extent_deactivate(tsdn, arena, extents, trail, false);
+		}
+		return extent;
+	} else {
 		/*
-		 * Splitting causes szind to be set as a side effect, but no
-		 * splitting occurred.
+		 * We should have picked an extent that was large enough to
+		 * fulfill our allocation request.
 		 */
-		extent_szind_set(extent, szind);
-		if (szind != NSIZES) {
-			rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx,
-			    (uintptr_t)extent_addr_get(extent), szind, slab);
-			if (slab && extent_size_get(extent) > PAGE) {
-				rtree_szind_slab_update(tsdn, &extents_rtree,
-				    rtree_ctx,
-				    (uintptr_t)extent_past_get(extent) -
-				    (uintptr_t)PAGE, szind, slab);
-			}
+		assert(result == extent_split_interior_error);
+		if (to_salvage != NULL) {
+			extent_deregister(tsdn, to_salvage);
 		}
+		if (to_leak != NULL) {
+			extents_leak(tsdn, arena, r_extent_hooks, extents,
+			    to_leak, growing_retained);
+		}
+		return NULL;
 	}
-
-	return extent;
+	unreachable();
 }
 
 /*
@@ -1140,10 +1218,6 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 		goto label_err;
 	}
 
-	size_t leadsize = ALIGNMENT_CEILING((uintptr_t)ptr,
-	    PAGE_CEILING(alignment)) - (uintptr_t)ptr;
-	assert(alloc_size >= leadsize + esize);
-	size_t trailsize = alloc_size - leadsize - esize;
 	if (extent_zeroed_get(extent) && extent_committed_get(extent)) {
 		*zero = true;
 	}
@@ -1151,54 +1225,48 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 		*commit = true;
 	}
 
-	/* Split the lead. */
-	if (leadsize != 0) {
-		extent_t *lead = extent;
-		extent = extent_split_impl(tsdn, arena, r_extent_hooks, lead,
-		    leadsize, NSIZES, false, esize + trailsize, szind, slab,
-		    true);
-		if (extent == NULL) {
-			extent_deregister(tsdn, lead);
-			extents_leak(tsdn, arena, r_extent_hooks,
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+	extent_t *lead;
+	extent_t *trail;
+	extent_t *to_leak;
+	extent_t *to_salvage;
+	extent_split_interior_result_t result = extent_split_interior(
+	    tsdn, arena, r_extent_hooks, rtree_ctx, &extent, &lead, &trail,
+	    &to_leak, &to_salvage, NULL, size, pad, alignment, slab, szind,
+	    true);
+
+	if (result == extent_split_interior_ok) {
+		if (lead != NULL) {
+			extent_record(tsdn, arena, r_extent_hooks,
 			    &arena->extents_retained, lead, true);
-			goto label_err;
 		}
-		extent_record(tsdn, arena, r_extent_hooks,
-		    &arena->extents_retained, lead, true);
-	}
-
-	/* Split the trail. */
-	if (trailsize != 0) {
-		extent_t *trail = extent_split_impl(tsdn, arena, r_extent_hooks,
-		    extent, esize, szind, slab, trailsize, NSIZES, false, true);
-		if (trail == NULL) {
-			extent_deregister(tsdn, extent);
-			extents_leak(tsdn, arena, r_extent_hooks,
-			    &arena->extents_retained, extent, true);
-			goto label_err;
+		if (trail != NULL) {
+			extent_record(tsdn, arena, r_extent_hooks,
+			    &arena->extents_retained, trail, true);
 		}
-		extent_record(tsdn, arena, r_extent_hooks,
-		    &arena->extents_retained, trail, true);
-	} else if (leadsize == 0) {
+	} else {
 		/*
-		 * Splitting causes szind to be set as a side effect, but no
-		 * splitting occurred.
+		 * We should have allocated a sufficiently large extent; the
+		 * cant_alloc case should not occur.
 		 */
-		rtree_ctx_t rtree_ctx_fallback;
-		rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn,
-		    &rtree_ctx_fallback);
-
-		extent_szind_set(extent, szind);
-		if (szind != NSIZES) {
-			rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx,
-			    (uintptr_t)extent_addr_get(extent), szind, slab);
-			if (slab && extent_size_get(extent) > PAGE) {
-				rtree_szind_slab_update(tsdn, &extents_rtree,
-				    rtree_ctx,
-				    (uintptr_t)extent_past_get(extent) -
-				    (uintptr_t)PAGE, szind, slab);
-			}
+		assert(result == extent_split_interior_error);
+		if (to_leak != NULL) {
+			extent_deregister(tsdn, to_leak);
+			extents_leak(tsdn, arena, r_extent_hooks,
+			    &arena->extents_retained, to_leak, true);
+			goto label_err;
 		}
+		/*
+		 * Note: we don't handle the non-NULL to_salvage case at all.
+		 * This maintains the behavior that was present when the
+		 * refactor pulling extent_split_interior into a helper function
+		 * was added.  I think this is actually a bug (we leak both the
+		 * memory and the extent_t in that case), but since this code is
+		 * getting deleted very shortly (in a subsequent commit),
+		 * ensuring correctness down this path isn't worth the effort.
+		 */
 	}
 
 	if (*commit && !extent_committed_get(extent)) {

From ccd09050aa53d083fe0b45d4704b1fe95fb00c92 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 18 Sep 2017 15:03:52 -0700
Subject: [PATCH 1007/2608] Add configure-time detection for madvise(...,
 MADV_DO[NT]DUMP)

---
 configure.ac                                          | 11 +++++++++++
 include/jemalloc/internal/jemalloc_internal_defs.h.in |  5 +++++
 2 files changed, 16 insertions(+)

diff --git a/configure.ac b/configure.ac
index 558489ce..7544f57e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1845,6 +1845,17 @@ if test "x${je_cv_madvise}" = "xyes" ; then
     AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ])
   fi
 
+  dnl Check for madvise(..., MADV_DO[NT]DUMP).
+  JE_COMPILABLE([madvise(..., MADV_DO[[NT]]DUMP)], [
+#include <sys/mman.h>
+], [
+	madvise((void *)0, 0, MADV_DONTDUMP);
+	madvise((void *)0, 0, MADV_DODUMP);
+], [je_cv_madv_dontdump])
+  if test "x${je_cv_madv_dontdump}" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_MADVISE_DONTDUMP], [ ])
+  fi
+ 
   dnl Check for madvise(..., MADV_[NO]HUGEPAGE).
   JE_COMPILABLE([madvise(..., MADV_[[NO]]HUGEPAGE)], [
 #include <sys/mman.h>
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index b56f21f8..aadfbed4 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -288,6 +288,11 @@
 /* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */
 #undef JEMALLOC_DEFINE_MADVISE_FREE
 
+/*
+ * Defined if MADV_DO[NT]DUMP is supported as an argument to madvise.
+ */
+#undef JEMALLOC_MADVISE_DONTDUMP
+
 /*
  * Defined if transparent huge pages (THPs) are supported via the
  * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled.

From bbaa72422bb086933890a125fd58bf199fe26f2d Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 18 Sep 2017 15:10:01 -0700
Subject: [PATCH 1008/2608] Add pages_dontdump and pages_dodump.

This will, eventually, enable us to avoid dumping eden regions.
---
 include/jemalloc/internal/pages.h |  2 ++
 src/pages.c                       | 23 +++++++++++++++++++++++
 2 files changed, 25 insertions(+)

diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index 121fff38..dff20515 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -69,6 +69,8 @@ bool pages_purge_lazy(void *addr, size_t size);
 bool pages_purge_forced(void *addr, size_t size);
 bool pages_huge(void *addr, size_t size);
 bool pages_nohuge(void *addr, size_t size);
+bool pages_dontdump(void *addr, size_t size);
+bool pages_dodump(void *addr, size_t size);
 bool pages_boot(void);
 
 #endif /* JEMALLOC_INTERNAL_PAGES_EXTERNS_H */
diff --git a/src/pages.c b/src/pages.c
index e8112f74..5e1043da 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -328,6 +328,29 @@ pages_nohuge(void *addr, size_t size) {
 #endif
 }
 
+bool
+pages_dontdump(void *addr, size_t size) {
+	assert(PAGE_ADDR2BASE(addr) == addr);
+	assert(PAGE_CEILING(size) == size);
+#ifdef JEMALLOC_MADVISE_DONTDUMP
+	return madvise(addr, size, MADV_DONTDUMP) != 0;
+#else
+	return false;
+#endif
+}
+
+bool
+pages_dodump(void *addr, size_t size) {
+	assert(PAGE_ADDR2BASE(addr) == addr);
+	assert(PAGE_CEILING(size) == size);
+#ifdef JEMALLOC_MADVISE_DONTDUMP
+	return madvise(addr, size, MADV_DODUMP) != 0;
+#else
+	return false;
+#endif
+}
+
+
 static size_t
 os_page_detect(void) {
 #ifdef _WIN32

From d14bbf8d8190df411f0daf182f73f7b7786288c4 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 18 Sep 2017 17:25:57 -0700
Subject: [PATCH 1009/2608] Add a "dumpable" bit to the extent state.

Currently, this is unused (i.e. all extents are always marked dumpable).  In the
future, we'll begin using this functionality.
---
 include/jemalloc/internal/extent_inlines.h | 16 +++++++++-
 include/jemalloc/internal/extent_structs.h | 36 +++++++++++++++++-----
 src/extent.c                               | 19 ++++++++----
 src/extent_dss.c                           |  5 +--
 test/unit/rtree.c                          |  8 ++---
 test/unit/slab.c                           |  2 +-
 6 files changed, 65 insertions(+), 21 deletions(-)

diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index bb2bd699..610072eb 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -93,6 +93,12 @@ extent_committed_get(const extent_t *extent) {
 	    EXTENT_BITS_COMMITTED_SHIFT);
 }
 
+static inline bool
+extent_dumpable_get(const extent_t *extent) {
+	return (bool)((extent->e_bits & EXTENT_BITS_DUMPABLE_MASK) >>
+	    EXTENT_BITS_DUMPABLE_SHIFT);
+}
+
 static inline bool
 extent_slab_get(const extent_t *extent) {
 	return (bool)((extent->e_bits & EXTENT_BITS_SLAB_MASK) >>
@@ -269,6 +275,12 @@ extent_committed_set(extent_t *extent, bool committed) {
 	    ((uint64_t)committed << EXTENT_BITS_COMMITTED_SHIFT);
 }
 
+static inline void
+extent_dumpable_set(extent_t *extent, bool dumpable) {
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_DUMPABLE_MASK) |
+	    ((uint64_t)dumpable << EXTENT_BITS_DUMPABLE_SHIFT);
+}
+
 static inline void
 extent_slab_set(extent_t *extent, bool slab) {
 	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SLAB_MASK) |
@@ -283,7 +295,7 @@ extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx) {
 static inline void
 extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
     bool slab, szind_t szind, size_t sn, extent_state_t state, bool zeroed,
-    bool committed) {
+    bool committed, bool dumpable) {
 	assert(addr == PAGE_ADDR2BASE(addr) || !slab);
 
 	extent_arena_set(extent, arena);
@@ -295,6 +307,7 @@ extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
 	extent_state_set(extent, state);
 	extent_zeroed_set(extent, zeroed);
 	extent_committed_set(extent, committed);
+	extent_dumpable_set(extent, dumpable);
 	ql_elm_new(extent, ql_link);
 	if (config_prof) {
 		extent_prof_tctx_set(extent, NULL);
@@ -312,6 +325,7 @@ extent_binit(extent_t *extent, void *addr, size_t bsize, size_t sn) {
 	extent_state_set(extent, extent_state_active);
 	extent_zeroed_set(extent, true);
 	extent_committed_set(extent, true);
+	extent_dumpable_set(extent, true);
 }
 
 static inline void
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index 641a6325..722963b5 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -23,13 +23,14 @@ struct extent_s {
 	 * a: arena_ind
 	 * b: slab
 	 * c: committed
+	 * d: dumpable
 	 * z: zeroed
 	 * t: state
 	 * i: szind
 	 * f: nfree
 	 * n: sn
 	 *
-	 * nnnnnnnn ... nnnnnfff fffffffi iiiiiiit tzcbaaaa aaaaaaaa
+	 * nnnnnnnn ... nnnnffff ffffffii iiiiiitt zdcbaaaa aaaaaaaa
 	 *
 	 * arena_ind: Arena from which this extent came, or all 1 bits if
 	 *            unassociated.
@@ -44,6 +45,23 @@ struct extent_s {
 	 *            as on a system that overcommits and satisfies physical
 	 *            memory needs on demand via soft page faults.
 	 *
+	 * dumpable: The dumpable flag indicates whether or not we've set the
+	 *           memory in question to be dumpable.  Note that this
+	 *           interacts somewhat subtly with user-specified extent hooks,
+	 *           since we don't know if *they* are fiddling with
+	 *           dumpability (in which case, we don't want to undo whatever
+	 *           they're doing).  To deal with this scenario, we:
+	 *             - Make dumpable false only for memory allocated with the
+	 *               default hooks.
+	 *             - Only allow memory to go from non-dumpable to dumpable,
+	 *               and only once.
+	 *             - Never make the OS call to allow dumping when the
+	 *               dumpable bit is already set.
+	 *           These three constraints mean that we will never
+	 *           accidentally dump user memory that the user meant to set
+	 *           nondumpable with their extent hooks.
+	 *
+	 *
 	 * zeroed: The zeroed flag is used by extent recycling code to track
 	 *         whether memory is zero-filled.
 	 *
@@ -80,25 +98,29 @@ struct extent_s {
 #define EXTENT_BITS_COMMITTED_MASK \
     ((uint64_t)0x1U << EXTENT_BITS_COMMITTED_SHIFT)
 
-#define EXTENT_BITS_ZEROED_SHIFT	(MALLOCX_ARENA_BITS + 2)
+#define EXTENT_BITS_DUMPABLE_SHIFT	(MALLOCX_ARENA_BITS + 2)
+#define EXTENT_BITS_DUMPABLE_MASK \
+    ((uint64_t)0x1U << EXTENT_BITS_DUMPABLE_SHIFT)
+
+#define EXTENT_BITS_ZEROED_SHIFT	(MALLOCX_ARENA_BITS + 3)
 #define EXTENT_BITS_ZEROED_MASK \
     ((uint64_t)0x1U << EXTENT_BITS_ZEROED_SHIFT)
 
-#define EXTENT_BITS_STATE_SHIFT		(MALLOCX_ARENA_BITS + 3)
+#define EXTENT_BITS_STATE_SHIFT		(MALLOCX_ARENA_BITS + 4)
 #define EXTENT_BITS_STATE_MASK \
     ((uint64_t)0x3U << EXTENT_BITS_STATE_SHIFT)
 
-#define EXTENT_BITS_SZIND_SHIFT		(MALLOCX_ARENA_BITS + 5)
+#define EXTENT_BITS_SZIND_SHIFT		(MALLOCX_ARENA_BITS + 6)
 #define EXTENT_BITS_SZIND_MASK \
     (((uint64_t)(1U << LG_CEIL_NSIZES) - 1) << EXTENT_BITS_SZIND_SHIFT)
 
 #define EXTENT_BITS_NFREE_SHIFT \
-    (MALLOCX_ARENA_BITS + 5 + LG_CEIL_NSIZES)
+    (MALLOCX_ARENA_BITS + 6 + LG_CEIL_NSIZES)
 #define EXTENT_BITS_NFREE_MASK \
     ((uint64_t)((1U << (LG_SLAB_MAXREGS + 1)) - 1) << EXTENT_BITS_NFREE_SHIFT)
 
 #define EXTENT_BITS_SN_SHIFT \
-    (MALLOCX_ARENA_BITS + 5 + LG_CEIL_NSIZES + (LG_SLAB_MAXREGS + 1))
+    (MALLOCX_ARENA_BITS + 6 + LG_CEIL_NSIZES + (LG_SLAB_MAXREGS + 1))
 #define EXTENT_BITS_SN_MASK		(UINT64_MAX << EXTENT_BITS_SN_SHIFT)
 
 	/* Pointer to the extent that this structure is responsible for. */
@@ -128,7 +150,7 @@ struct extent_s {
 	 */
 	ql_elm(extent_t)	ql_link;
 
-	/* 
+	/*
 	 * Linkage for per size class sn/address-ordered heaps, and
 	 * for extent_avail
 	 */
diff --git a/src/extent.c b/src/extent.c
index 1dd1d1d1..497f4e40 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -449,8 +449,10 @@ extents_alloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	return extent_recycle(tsdn, arena, r_extent_hooks, extents, new_addr,
-	    size, pad, alignment, slab, szind, zero, commit, false);
+	extent_t *extent = extent_recycle(tsdn, arena, r_extent_hooks, extents,
+	    new_addr, size, pad, alignment, slab, szind, zero, commit, false);
+	assert(extent == NULL || extent_dumpable_get(extent));
+	return extent;
 }
 
 void
@@ -458,6 +460,7 @@ extents_dalloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
     extents_t *extents, extent_t *extent) {
 	assert(extent_base_get(extent) != NULL);
 	assert(extent_size_get(extent) != 0);
+	assert(extent_dumpable_get(extent));
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
@@ -1207,11 +1210,12 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 
 	extent_init(extent, arena, ptr, alloc_size, false, NSIZES,
 	    arena_extent_sn_next(arena), extent_state_active, zeroed,
-	    committed);
+	    committed, true);
 	if (ptr == NULL) {
 		extent_dalloc(tsdn, arena, extent);
 		goto label_err;
 	}
+
 	if (extent_register_no_gdump_add(tsdn, extent)) {
 		extents_leak(tsdn, arena, r_extent_hooks,
 		    &arena->extents_retained, extent, true);
@@ -1374,7 +1378,8 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 		return NULL;
 	}
 	extent_init(extent, arena, addr, esize, slab, szind,
-	    arena_extent_sn_next(arena), extent_state_active, zero, commit);
+	    arena_extent_sn_next(arena), extent_state_active, zero, commit,
+	    true);
 	if (pad != 0) {
 		extent_addr_randomize(tsdn, extent, alignment);
 	}
@@ -1412,6 +1417,7 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 		    new_addr, size, pad, alignment, slab, szind, zero, commit);
 	}
 
+	assert(extent == NULL || extent_dumpable_get(extent));
 	return extent;
 }
 
@@ -1636,6 +1642,7 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena,
 void
 extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent) {
+	assert(extent_dumpable_get(extent));
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
@@ -1926,7 +1933,7 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena,
 	extent_init(trail, arena, (void *)((uintptr_t)extent_base_get(extent) +
 	    size_a), size_b, slab_b, szind_b, extent_sn_get(extent),
 	    extent_state_get(extent), extent_zeroed_get(extent),
-	    extent_committed_get(extent));
+	    extent_committed_get(extent), extent_dumpable_get(extent));
 
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
@@ -1937,7 +1944,7 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena,
 		extent_init(&lead, arena, extent_addr_get(extent), size_a,
 		    slab_a, szind_a, extent_sn_get(extent),
 		    extent_state_get(extent), extent_zeroed_get(extent),
-		    extent_committed_get(extent));
+		    extent_committed_get(extent), extent_dumpable_get(extent));
 
 		extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, &lead, false,
 		    true, &lead_elm_a, &lead_elm_b);
diff --git a/src/extent_dss.c b/src/extent_dss.c
index e72da958..2b1ea9ca 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -156,7 +156,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 				extent_init(gap, arena, gap_addr_page,
 				    gap_size_page, false, NSIZES,
 				    arena_extent_sn_next(arena),
-				    extent_state_active, false, true);
+				    extent_state_active, false, true, true);
 			}
 			/*
 			 * Compute the address just past the end of the desired
@@ -199,7 +199,8 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 
 					extent_init(&extent, arena, ret, size,
 					    size, false, NSIZES,
-					    extent_state_active, false, true);
+					    extent_state_active, false, true,
+					    true);
 					if (extent_purge_forced_wrapper(tsdn,
 					    arena, &extent_hooks, &extent, 0,
 					    size)) {
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 814837bf..908100fa 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -87,9 +87,9 @@ TEST_BEGIN(test_rtree_extrema) {
 	extent_t extent_a, extent_b;
 	extent_init(&extent_a, NULL, NULL, LARGE_MINCLASS, false,
 	    sz_size2index(LARGE_MINCLASS), 0, extent_state_active, false,
-	    false);
+	    false, true);
 	extent_init(&extent_b, NULL, NULL, 0, false, NSIZES, 0,
-	    extent_state_active, false, false);
+	    extent_state_active, false, false, true);
 
 	tsdn_t *tsdn = tsdn_fetch();
 
@@ -126,7 +126,7 @@ TEST_BEGIN(test_rtree_bits) {
 
 	extent_t extent;
 	extent_init(&extent, NULL, NULL, 0, false, NSIZES, 0,
-	    extent_state_active, false, false);
+	    extent_state_active, false, false, true);
 
 	rtree_t *rtree = &test_rtree;
 	rtree_ctx_t rtree_ctx;
@@ -167,7 +167,7 @@ TEST_BEGIN(test_rtree_random) {
 
 	extent_t extent;
 	extent_init(&extent, NULL, NULL, 0, false, NSIZES, 0,
-	    extent_state_active, false, false);
+	    extent_state_active, false, false, true);
 
 	assert_false(rtree_new(rtree, false), "Unexpected rtree_new() failure");
 
diff --git a/test/unit/slab.c b/test/unit/slab.c
index 6f40aeef..ea344f8f 100644
--- a/test/unit/slab.c
+++ b/test/unit/slab.c
@@ -9,7 +9,7 @@ TEST_BEGIN(test_arena_slab_regind) {
 		const arena_bin_info_t *bin_info = &arena_bin_info[binind];
 		extent_init(&slab, NULL, mallocx(bin_info->slab_size,
 		    MALLOCX_LG_ALIGN(LG_PAGE)), bin_info->slab_size, true,
-		    binind, 0, extent_state_active, false, true);
+		    binind, 0, extent_state_active, false, true, true);
 		assert_ptr_not_null(extent_addr_get(&slab),
 		    "Unexpected malloc() failure");
 		for (regind = 0; regind < bin_info->nregs; regind++) {

From 47203d5f422452def4cb29c0b7128cc068031100 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 19 Oct 2017 12:01:20 -0700
Subject: [PATCH 1010/2608] Output all counters for bin mutex stats.

The saved space is not worth the trouble of missing counters.
---
 src/stats.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index cbeb923d..0847f392 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -131,7 +131,8 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    "\t\t\t\t\"bins\": [\n");
 	} else {
 		char *mutex_counters = "   n_lock_ops    n_waiting"
-		    "   n_spin_acq  total_wait_ns  max_wait_ns\n";
+		    "   n_spin_acq n_owner_switch  total_wait_ns"
+		    "  max_wait_ns max_n_thds\n";
 		malloc_cprintf(write_cb, cbopaque,
 		    "bins:           size ind    allocated      nmalloc"
 		    "      ndalloc    nrequests      curregs     curslabs regs"
@@ -234,16 +235,18 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			    nregs, slab_size / page, util, nfills, nflushes,
 			    nslabs, nreslabs);
 
-			/* Output less info for bin mutexes to save space. */
 			if (mutex) {
 				malloc_cprintf(write_cb, cbopaque,
 				    " %12"FMTu64" %12"FMTu64" %12"FMTu64
-				    " %14"FMTu64" %12"FMTu64"\n",
+				    " %14"FMTu64" %14"FMTu64" %12"FMTu64
+				    " %10"FMTu64"\n",
 				    mutex_stats[mutex_counter_num_ops],
 				    mutex_stats[mutex_counter_num_wait],
 				    mutex_stats[mutex_counter_num_spin_acq],
+				    mutex_stats[mutex_counter_num_owner_switch],
 				    mutex_stats[mutex_counter_total_wait_time],
-				    mutex_stats[mutex_counter_max_wait_time]);
+				    mutex_stats[mutex_counter_max_wait_time],
+				    mutex_stats[mutex_counter_max_num_thds]);
 			} else {
 				malloc_cprintf(write_cb, cbopaque, "\n");
 			}

From 58eba024c0fbda463eaf8b42772407894dba6eff Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 31 Oct 2017 14:17:40 -0700
Subject: [PATCH 1011/2608] metadata_thp: auto mode adjustment for a0.

We observed that arena 0 can have much more metadata allocated comparing to
other arenas.  Tune the auto mode to only switch to huge page on the 5th block
(instead of 3 previously) for a0.
---
 include/jemalloc/internal/base_types.h |  9 ++++++
 src/base.c                             | 41 ++++++++++++++------------
 2 files changed, 31 insertions(+), 19 deletions(-)

diff --git a/include/jemalloc/internal/base_types.h b/include/jemalloc/internal/base_types.h
index 97e38a97..b6db77df 100644
--- a/include/jemalloc/internal/base_types.h
+++ b/include/jemalloc/internal/base_types.h
@@ -6,6 +6,15 @@ typedef struct base_s base_t;
 
 #define METADATA_THP_DEFAULT metadata_thp_disabled
 
+/*
+ * In auto mode, arenas switch to huge pages for the base allocator on the
+ * second base block.  a0 switches to thp on the 5th block (after 20 megabytes
+ * of metadata), since more metadata (e.g. rtree nodes) come from a0's base.
+ */
+
+#define BASE_AUTO_THP_THRESHOLD    2
+#define BASE_AUTO_THP_THRESHOLD_A0 5
+
 typedef enum {
 	metadata_thp_disabled   = 0,
 	/*
diff --git a/src/base.c b/src/base.c
index c6db425d..10369367 100644
--- a/src/base.c
+++ b/src/base.c
@@ -125,42 +125,45 @@ base_extent_init(size_t *extent_sn_next, extent_t *extent, void *addr,
 	extent_binit(extent, addr, size, sn);
 }
 
+static size_t
+base_get_num_blocks(base_t *base, bool with_new_block) {
+	base_block_t *b = base->blocks;
+	assert(b != NULL);
+
+	size_t n_blocks = with_new_block ? 2 : 1;
+	while (b->next != NULL) {
+		n_blocks++;
+		b = b->next;
+	}
+
+	return n_blocks;
+}
+
 static bool
 base_auto_thp_triggered(base_t *base, bool with_new_block) {
 	assert(opt_metadata_thp == metadata_thp_auto);
-	base_block_t *b1 = base->blocks;
-	assert(b1 != NULL);
 
-	base_block_t *b2 = b1->next;
 	if (base_ind_get(base) != 0) {
-		return with_new_block ? true: b2 != NULL;
+		return base_get_num_blocks(base, with_new_block) >=
+		    BASE_AUTO_THP_THRESHOLD;
 	}
 
-	base_block_t *b3 = (b2 != NULL) ? b2->next : NULL;
-	return with_new_block ? b2 != NULL : b3 != NULL;
+	return base_get_num_blocks(base, with_new_block) >=
+	    BASE_AUTO_THP_THRESHOLD_A0;
 }
 
 static void
 base_auto_thp_switch(base_t *base) {
 	assert(opt_metadata_thp == metadata_thp_auto);
 
-	base_block_t *b1 = base->blocks;
-	assert(b1 != NULL);
-	base_block_t *b2 = b1->next;
-
 	/* Called when adding a new block. */
 	bool should_switch;
 	if (base_ind_get(base) != 0) {
-		/* Makes the switch on the 2nd block. */
-		should_switch = (b2 == NULL);
+		should_switch = (base_get_num_blocks(base, true) ==
+		    BASE_AUTO_THP_THRESHOLD);
 	} else {
-		/*
-		 * a0 switches to thp on the 3rd block, since rtree nodes are
-		 * allocated from a0 base, which takes an entire block on init.
-		 */
-		base_block_t *b3 = (b2 != NULL) ? b2->next :
-			NULL;
-		should_switch = (b2 != NULL) && (b3 == NULL);
+		should_switch = (base_get_num_blocks(base, true) ==
+		    BASE_AUTO_THP_THRESHOLD_A0);
 	}
 	if (!should_switch) {
 		return;

From d591df05c86e89c0a5db98274bc7f280f910a0de Mon Sep 17 00:00:00 2001
From: Edward Tomasz Napierala <trasz@FreeBSD.org>
Date: Sun, 22 Oct 2017 12:04:59 +0100
Subject: [PATCH 1012/2608] Use getpagesize(3) under FreeBSD.

This avoids sysctl(2) syscall during binary startup, using the value
passed in the ELF aux vector instead.

Signed-off-by: Edward Tomasz Napierala <trasz@FreeBSD.org>
---
 src/pages.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/pages.c b/src/pages.c
index 5e1043da..14e63f9f 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -357,6 +357,8 @@ os_page_detect(void) {
 	SYSTEM_INFO si;
 	GetSystemInfo(&si);
 	return si.dwPageSize;
+#elif defined(__FreeBSD__)
+	return getpagesize();
 #else
 	long result = sysconf(_SC_PAGESIZE);
 	if (result == -1) {

From 9f455e2786685b443201c33119765c8093461174 Mon Sep 17 00:00:00 2001
From: Edward Tomasz Napierala <trasz@FreeBSD.org>
Date: Thu, 26 Oct 2017 16:55:43 +0100
Subject: [PATCH 1013/2608] Try to use sysctl(3) instead of sysctlbyname(3).

This attempts to use VM_OVERCOMMIT OID - newly introduced in -CURRENT
few days ago, specifically for this purpose - instead of querying the
sysctl by its string name.  Due to how syctlbyname(3) works, this means
we do one syscall during binary startup instead of two.

Signed-off-by: Edward Tomasz Napierala <trasz@FreeBSD.org>
---
 src/pages.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/src/pages.c b/src/pages.c
index 14e63f9f..c839471f 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -10,6 +10,9 @@
 
 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
 #include <sys/sysctl.h>
+#ifdef __FreeBSD__
+#include <vm/vm_param.h>
+#endif
 #endif
 
 /******************************************************************************/
@@ -375,9 +378,19 @@ os_overcommits_sysctl(void) {
 	size_t sz;
 
 	sz = sizeof(vm_overcommit);
+#if defined(__FreeBSD__) && defined(VM_OVERCOMMIT)
+	int mib[2];
+
+	mib[0] = CTL_VM;
+	mib[1] = VM_OVERCOMMIT;
+	if (sysctl(mib, 2, &vm_overcommit, &sz, NULL, 0) != 0) {
+		return false; /* Error. */
+	}
+#else
 	if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0) {
 		return false; /* Error. */
 	}
+#endif
 
 	return ((vm_overcommit & 0x3) == 0);
 }

From e422fa8e7ea749ab8c4783e405c0f4b19ac25db9 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 2 Nov 2017 17:48:39 -0700
Subject: [PATCH 1014/2608] Add arena.i.retain_grow_limit

This option controls the max size when grow_retained.  This is useful when we
have customized extent hooks reserving physical memory (e.g. 1G huge pages).
Without this feature, the default increasing sequence could result in fragmented
and wasted physical memory.
---
 doc/jemalloc.xml.in                         | 16 +++++++
 include/jemalloc/internal/arena_externs.h   |  2 +
 include/jemalloc/internal/arena_structs_b.h |  5 +++
 include/jemalloc/internal/extent_types.h    |  2 +
 src/arena.c                                 | 28 ++++++++++++
 src/ctl.c                                   | 42 +++++++++++++++++-
 src/extent.c                                |  7 +--
 test/unit/mallctl.c                         | 49 +++++++++++++++++++++
 8 files changed, 146 insertions(+), 5 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 8151b5ba..895b2d4d 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1683,6 +1683,22 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         for additional information.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="arena.i.retain_grow_limit">
+        <term>
+          <mallctl>arena.&lt;i&gt;.retain_grow_limit</mallctl>
+          (<type>size_t</type>)
+          <literal>rw</literal>
+        </term>
+        <listitem><para>Maximum size to grow retained region (only relevant when
+        <link linkend="opt.retain"><mallctl>opt.retain</mallctl></link> is
+        enabled).  This controls the maximum increment to expand virtual memory,
+        or allocation through <link
+        linkend="arena.i.extent_hooks"><mallctl>arena.&lt;i&gt;extent_hooks</mallctl></link>.
+        In particular, if customized extent hooks reserve physical memory
+        (e.g. 1G huge pages), this is useful to control the allocation hook's
+        input size.  The default is no limit.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="arena.i.extent_hooks">
         <term>
           <mallctl>arena.&lt;i&gt;.extent_hooks</mallctl>
diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 4e546c3b..5a0e3add 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -77,6 +77,8 @@ ssize_t arena_dirty_decay_ms_default_get(void);
 bool arena_dirty_decay_ms_default_set(ssize_t decay_ms);
 ssize_t arena_muzzy_decay_ms_default_get(void);
 bool arena_muzzy_decay_ms_default_set(ssize_t decay_ms);
+bool arena_retain_grow_limit_get_set(tsd_t *tsd, arena_t *arena,
+    size_t *old_limit, size_t *new_limit);
 unsigned arena_nthreads_get(arena_t *arena, bool internal);
 void arena_nthreads_inc(arena_t *arena, bool internal);
 void arena_nthreads_dec(arena_t *arena, bool internal);
diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index c4e4310d..f74ea97d 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -240,9 +240,14 @@ struct arena_s {
 	 * be effective even if multiple arenas' extent allocation requests are
 	 * highly interleaved.
 	 *
+	 * retain_grow_limit is the max allowed size ind to expand (unless the
+	 * required size is greater).  Default is no limit, and controlled
+	 * through mallctl only.
+	 *
 	 * Synchronization: extent_grow_mtx
 	 */
 	pszind_t		extent_grow_next;
+	pszind_t		retain_grow_limit;
 	malloc_mutex_t		extent_grow_mtx;
 
 	/*
diff --git a/include/jemalloc/internal/extent_types.h b/include/jemalloc/internal/extent_types.h
index b6905ce1..7efcd3a4 100644
--- a/include/jemalloc/internal/extent_types.h
+++ b/include/jemalloc/internal/extent_types.h
@@ -6,4 +6,6 @@ typedef struct extents_s extents_t;
 
 #define EXTENT_HOOKS_INITIALIZER	NULL
 
+#define EXTENT_GROW_MAX_PIND (NPSIZES - 1)
+
 #endif /* JEMALLOC_INTERNAL_EXTENT_TYPES_H */
diff --git a/src/arena.c b/src/arena.c
index 43ba6018..91dce1f5 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1886,6 +1886,33 @@ arena_muzzy_decay_ms_default_set(ssize_t decay_ms) {
 	return false;
 }
 
+bool
+arena_retain_grow_limit_get_set(tsd_t *tsd, arena_t *arena, size_t *old_limit,
+    size_t *new_limit) {
+	assert(opt_retain);
+
+	pszind_t new_ind JEMALLOC_CC_SILENCE_INIT(0);
+	if (new_limit != NULL) {
+		size_t limit = *new_limit;
+		/* Grow no more than the new limit. */
+		if ((new_ind = sz_psz2ind(limit + 1) - 1) >
+		     EXTENT_GROW_MAX_PIND) {
+			return true;
+		}
+	}
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &arena->extent_grow_mtx);
+	if (old_limit != NULL) {
+		*old_limit = sz_pind2sz(arena->retain_grow_limit);
+	}
+	if (new_limit != NULL) {
+		arena->retain_grow_limit = new_ind;
+	}
+	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->extent_grow_mtx);
+
+	return false;
+}
+
 unsigned
 arena_nthreads_get(arena_t *arena, bool internal) {
 	return atomic_load_u(&arena->nthreads[internal], ATOMIC_RELAXED);
@@ -2013,6 +2040,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	}
 
 	arena->extent_grow_next = sz_psz2ind(HUGEPAGE);
+	arena->retain_grow_limit = EXTENT_GROW_MAX_PIND;
 	if (malloc_mutex_init(&arena->extent_grow_mtx, "extent_grow",
 	    WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) {
 		goto label_error;
diff --git a/src/ctl.c b/src/ctl.c
index a2f3837a..11cd68db 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -118,6 +118,7 @@ CTL_PROTO(arena_i_dss)
 CTL_PROTO(arena_i_dirty_decay_ms)
 CTL_PROTO(arena_i_muzzy_decay_ms)
 CTL_PROTO(arena_i_extent_hooks)
+CTL_PROTO(arena_i_retain_grow_limit)
 INDEX_PROTO(arena_i)
 CTL_PROTO(arenas_bin_i_size)
 CTL_PROTO(arenas_bin_i_nregs)
@@ -320,7 +321,8 @@ static const ctl_named_node_t arena_i_node[] = {
 	{NAME("dss"),		CTL(arena_i_dss)},
 	{NAME("dirty_decay_ms"), CTL(arena_i_dirty_decay_ms)},
 	{NAME("muzzy_decay_ms"), CTL(arena_i_muzzy_decay_ms)},
-	{NAME("extent_hooks"),	CTL(arena_i_extent_hooks)}
+	{NAME("extent_hooks"),	CTL(arena_i_extent_hooks)},
+	{NAME("retain_grow_limit"),	CTL(arena_i_retain_grow_limit)}
 };
 static const ctl_named_node_t super_arena_i_node[] = {
 	{NAME(""),		CHILD(named, arena_i)}
@@ -2199,6 +2201,42 @@ label_return:
 	return ret;
 }
 
+static int
+arena_i_retain_grow_limit_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+	unsigned arena_ind;
+	arena_t *arena;
+
+	if (!opt_retain) {
+		/* Only relevant when retain is enabled. */
+		return ENOENT;
+	}
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
+	MIB_UNSIGNED(arena_ind, 1);
+	if (arena_ind < narenas_total_get() && (arena =
+	    arena_get(tsd_tsdn(tsd), arena_ind, false)) != NULL) {
+		size_t old_limit, new_limit;
+		if (newp != NULL) {
+			WRITE(new_limit, size_t);
+		}
+		bool err = arena_retain_grow_limit_get_set(tsd, arena,
+		    &old_limit, newp != NULL ? &new_limit : NULL);
+		if (!err) {
+			READ(old_limit, size_t);
+			ret = 0;
+		} else {
+			ret = EFAULT;
+		}
+	} else {
+		ret = EFAULT;
+	}
+label_return:
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
+	return ret;
+}
+
 static const ctl_named_node_t *
 arena_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
 	const ctl_named_node_t *ret;
@@ -2260,7 +2298,7 @@ arenas_decay_ms_ctl_impl(tsd_t *tsd, const size_t *mib, size_t miblen,
 			ret = EINVAL;
 			goto label_return;
 		}
-		if (dirty ?  arena_dirty_decay_ms_default_set(*(ssize_t *)newp)
+		if (dirty ? arena_dirty_decay_ms_default_set(*(ssize_t *)newp)
 		    : arena_muzzy_decay_ms_default_set(*(ssize_t *)newp)) {
 			ret = EFAULT;
 			goto label_return;
diff --git a/src/extent.c b/src/extent.c
index 497f4e40..d1324f99 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1284,13 +1284,14 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	}
 
 	/*
-	 * Increment extent_grow_next if doing so wouldn't exceed the legal
+	 * Increment extent_grow_next if doing so wouldn't exceed the allowed
 	 * range.
 	 */
-	if (arena->extent_grow_next + egn_skip + 1 < NPSIZES) {
+	if (arena->extent_grow_next + egn_skip + 1 <=
+	    arena->retain_grow_limit) {
 		arena->extent_grow_next += egn_skip + 1;
 	} else {
-		arena->extent_grow_next = NPSIZES - 1;
+		arena->extent_grow_next = arena->retain_grow_limit;
 	}
 	/* All opportunities for failure are past. */
 	malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 5612cce5..94f801e3 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -556,6 +556,54 @@ TEST_BEGIN(test_arena_i_dss) {
 }
 TEST_END
 
+TEST_BEGIN(test_arena_i_retain_grow_limit) {
+	size_t old_limit, new_limit, default_limit;
+	size_t mib[3];
+	size_t miblen;
+
+	bool retain_enabled;
+	size_t sz = sizeof(retain_enabled);
+	assert_d_eq(mallctl("opt.retain", &retain_enabled, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
+	test_skip_if(!retain_enabled);
+
+	sz = sizeof(default_limit);
+	miblen = sizeof(mib)/sizeof(size_t);
+	assert_d_eq(mallctlnametomib("arena.0.retain_grow_limit", mib, &miblen),
+	    0, "Unexpected mallctlnametomib() error");
+
+	assert_d_eq(mallctlbymib(mib, miblen, &default_limit, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+	assert_zu_eq(default_limit, sz_pind2sz(EXTENT_GROW_MAX_PIND),
+	    "Unexpected default for retain_grow_limit");
+
+	new_limit = PAGE - 1;
+	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &new_limit,
+	    sizeof(new_limit)), EFAULT, "Unexpected mallctl() success");
+
+	new_limit = PAGE + 1;
+	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &new_limit,
+	    sizeof(new_limit)), 0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctlbymib(mib, miblen, &old_limit, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+	assert_zu_eq(old_limit, PAGE,
+	    "Unexpected value for retain_grow_limit");
+
+	/* Expect grow less than psize class 10. */
+	new_limit = sz_pind2sz(10) - 1;
+	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &new_limit,
+	    sizeof(new_limit)), 0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctlbymib(mib, miblen, &old_limit, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+	assert_zu_eq(old_limit, sz_pind2sz(9),
+	    "Unexpected value for retain_grow_limit");
+
+	/* Restore to default. */
+	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &default_limit,
+	    sizeof(default_limit)), 0, "Unexpected mallctl() failure");
+}
+TEST_END
+
 TEST_BEGIN(test_arenas_dirty_decay_ms) {
 	ssize_t dirty_decay_ms, orig_dirty_decay_ms, prev_dirty_decay_ms;
 	size_t sz = sizeof(ssize_t);
@@ -727,6 +775,7 @@ main(void) {
 	    test_arena_i_purge,
 	    test_arena_i_decay,
 	    test_arena_i_dss,
+	    test_arena_i_retain_grow_limit,
 	    test_arenas_dirty_decay_ms,
 	    test_arenas_muzzy_decay_ms,
 	    test_arenas_constants,

From 6dd5681ab787b4153ad2fa425be72efece42d3c7 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 3 Nov 2017 13:58:59 -0700
Subject: [PATCH 1015/2608] Use hugepage alignment for base allocator.

This gives us an easier way to tell if the allocation is for metadata in the
extent hooks.
---
 src/base.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/base.c b/src/base.c
index 10369367..e3a89b09 100644
--- a/src/base.c
+++ b/src/base.c
@@ -33,9 +33,9 @@ base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size)
 	bool zero = true;
 	bool commit = true;
 
-	/* We use hugepage sizes regardless of opt_metadata_thp. */
+	/* Use huge page sizes and alignment regardless of opt_metadata_thp. */
 	assert(size == HUGEPAGE_CEILING(size));
-	size_t alignment = metadata_thp_enabled() ? HUGEPAGE : PAGE;
+	size_t alignment = HUGEPAGE;
 	if (extent_hooks == &extent_hooks_default) {
 		addr = extent_alloc_mmap(NULL, size, alignment, &zero, &commit);
 	} else {

From b5d071c26697813bcceae320ba88dee2a2a73e51 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 8 Nov 2017 13:59:21 -0800
Subject: [PATCH 1016/2608] Fix unbounded increase in stash_decayed.

Added an upper bound on how many pages we can decay during the current run.
Without this, decay could have unbounded increase in stashed, since other
threads could add new pages into the extents.
---
 include/jemalloc/internal/extent_externs.h |  3 ++-
 src/arena.c                                | 30 +++++++++++++---------
 src/extent.c                               |  5 ++--
 3 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index 9da5d010..132d8903 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -36,7 +36,8 @@ extent_t *extents_alloc(tsdn_t *tsdn, arena_t *arena,
 void extents_dalloc(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extents_t *extents, extent_t *extent);
 extent_t *extents_evict(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extents_t *extents, size_t npages_min);
+    extent_hooks_t **r_extent_hooks, extents_t *extents, size_t npages_min,
+    size_t npages_max);
 void extents_prefork(tsdn_t *tsdn, extents_t *extents);
 void extents_postfork_parent(tsdn_t *tsdn, extents_t *extents);
 void extents_postfork_child(tsdn_t *tsdn, extents_t *extents);
diff --git a/src/arena.c b/src/arena.c
index 91dce1f5..e2462bf7 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -62,7 +62,7 @@ const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
 
 static void arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena,
     arena_decay_t *decay, extents_t *extents, bool all, size_t npages_limit,
-    bool is_background_thread);
+    size_t npages_decay_max, bool is_background_thread);
 static bool arena_decay_dirty(tsdn_t *tsdn, arena_t *arena,
     bool is_background_thread, bool all);
 static void arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
@@ -693,7 +693,8 @@ arena_decay_try_purge(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
     bool is_background_thread) {
 	if (current_npages > npages_limit) {
 		arena_decay_to_limit(tsdn, arena, decay, extents, false,
-		    npages_limit, is_background_thread);
+		    npages_limit, current_npages - npages_limit,
+		    is_background_thread);
 	}
 }
 
@@ -799,7 +800,8 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	if (decay_ms <= 0) {
 		if (decay_ms == 0) {
 			arena_decay_to_limit(tsdn, arena, decay, extents, false,
-			    0, is_background_thread);
+			    0, extents_npages_get(extents),
+			    is_background_thread);
 		}
 		return false;
 	}
@@ -901,15 +903,16 @@ arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
 static size_t
 arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extents_t *extents, size_t npages_limit,
-    extent_list_t *decay_extents) {
+	size_t npages_decay_max, extent_list_t *decay_extents) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
 	/* Stash extents according to npages_limit. */
 	size_t nstashed = 0;
 	extent_t *extent;
-	while ((extent = extents_evict(tsdn, arena, r_extent_hooks, extents,
-	    npages_limit)) != NULL) {
+	while (nstashed < npages_decay_max &&
+	    (extent = extents_evict(tsdn, arena, r_extent_hooks, extents,
+	    npages_limit, npages_decay_max - nstashed)) != NULL) {
 		extent_list_append(decay_extents, extent);
 		nstashed += extent_size_get(extent) >> LG_PAGE;
 	}
@@ -983,12 +986,15 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
 }
 
 /*
- * npages_limit: Decay as many dirty extents as possible without violating the
- * invariant: (extents_npages_get(extents) >= npages_limit)
+ * npages_limit: Decay at most npages_decay_max pages without violating the
+ * invariant: (extents_npages_get(extents) >= npages_limit).  We need an upper
+ * bound on number of pages in order to prevent unbounded growth (namely in
+ * stashed), otherwise unbounded new pages could be added to extents during the
+ * current decay run, so that the purging thread never finishes.
  */
 static void
 arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    extents_t *extents, bool all, size_t npages_limit,
+    extents_t *extents, bool all, size_t npages_limit, size_t npages_decay_max,
     bool is_background_thread) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 1);
@@ -1006,7 +1012,7 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	extent_list_init(&decay_extents);
 
 	size_t npurge = arena_stash_decayed(tsdn, arena, &extent_hooks, extents,
-	    npages_limit, &decay_extents);
+	    npages_limit, npages_decay_max, &decay_extents);
 	if (npurge != 0) {
 		UNUSED size_t npurged = arena_decay_stashed(tsdn, arena,
 		    &extent_hooks, decay, extents, all, &decay_extents,
@@ -1024,7 +1030,7 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	if (all) {
 		malloc_mutex_lock(tsdn, &decay->mtx);
 		arena_decay_to_limit(tsdn, arena, decay, extents, all, 0,
-		    is_background_thread);
+		    extents_npages_get(extents), is_background_thread);
 		malloc_mutex_unlock(tsdn, &decay->mtx);
 
 		return false;
@@ -1220,7 +1226,7 @@ arena_destroy_retained(tsdn_t *tsdn, arena_t *arena) {
 	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
 	extent_t *extent;
 	while ((extent = extents_evict(tsdn, arena, &extent_hooks,
-	    &arena->extents_retained, 0)) != NULL) {
+	    &arena->extents_retained, 0, SIZE_MAX)) != NULL) {
 		extent_destroy_wrapper(tsdn, arena, &extent_hooks, extent);
 	}
 }
diff --git a/src/extent.c b/src/extent.c
index d1324f99..c8a30907 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -472,7 +472,7 @@ extents_dalloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 
 extent_t *
 extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    extents_t *extents, size_t npages_min) {
+    extents_t *extents, size_t npages_min, size_t npages_max) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
@@ -493,7 +493,8 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		size_t npages = extent_size_get(extent) >> LG_PAGE;
 		size_t extents_npages = atomic_load_zu(&extents->npages,
 		    ATOMIC_RELAXED);
-		if (extents_npages - npages < npages_min) {
+		if (extents_npages - npages < npages_min ||
+		    npages > npages_max) {
 			extent = NULL;
 			goto label_return;
 		}

From cb3b72b9756d124565ed12e005065ad6f0769568 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 7 Nov 2017 19:40:38 -0800
Subject: [PATCH 1017/2608] Fix base allocator THP auto mode locking and stats.

Added proper synchronization for switching to using THP in auto mode.  Also
fixed stats for number of THPs used.
---
 include/jemalloc/internal/base_structs.h |  2 ++
 src/base.c                               | 37 ++++++++++--------------
 2 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/include/jemalloc/internal/base_structs.h b/include/jemalloc/internal/base_structs.h
index b5421693..2102247a 100644
--- a/include/jemalloc/internal/base_structs.h
+++ b/include/jemalloc/internal/base_structs.h
@@ -30,6 +30,8 @@ struct base_s {
 	/* Protects base_alloc() and base_stats_get() operations. */
 	malloc_mutex_t	mtx;
 
+	/* Using THP when true (metadata_thp auto mode). */
+	bool		auto_thp_switched;
 	/*
 	 * Most recent size class in the series of increasingly large base
 	 * extents.  Logarithmic spacing between subsequent allocations ensures
diff --git a/src/base.c b/src/base.c
index e3a89b09..cc3d9781 100644
--- a/src/base.c
+++ b/src/base.c
@@ -139,23 +139,13 @@ base_get_num_blocks(base_t *base, bool with_new_block) {
 	return n_blocks;
 }
 
-static bool
-base_auto_thp_triggered(base_t *base, bool with_new_block) {
-	assert(opt_metadata_thp == metadata_thp_auto);
-
-	if (base_ind_get(base) != 0) {
-		return base_get_num_blocks(base, with_new_block) >=
-		    BASE_AUTO_THP_THRESHOLD;
-	}
-
-	return base_get_num_blocks(base, with_new_block) >=
-	    BASE_AUTO_THP_THRESHOLD_A0;
-}
-
 static void
-base_auto_thp_switch(base_t *base) {
+base_auto_thp_switch(tsdn_t *tsdn, base_t *base) {
 	assert(opt_metadata_thp == metadata_thp_auto);
-
+	malloc_mutex_assert_owner(tsdn, &base->mtx);
+	if (base->auto_thp_switched) {
+		return;
+	}
 	/* Called when adding a new block. */
 	bool should_switch;
 	if (base_ind_get(base) != 0) {
@@ -169,14 +159,16 @@ base_auto_thp_switch(base_t *base) {
 		return;
 	}
 
-	assert(base->n_thp == 0);
+	base->auto_thp_switched = true;
+	assert(!config_stats || base->n_thp == 0);
 	/* Make the initial blocks THP lazily. */
 	base_block_t *block = base->blocks;
 	while (block != NULL) {
 		assert((block->size & HUGEPAGE_MASK) == 0);
 		pages_huge(block, block->size);
 		if (config_stats) {
-			base->n_thp += block->size >> LG_HUGEPAGE;
+			base->n_thp += HUGEPAGE_CEILING(block->size -
+			    extent_bsize_get(&block->extent)) >> LG_HUGEPAGE;
 		}
 		block = block->next;
 		assert(block == NULL || (base_ind_get(base) == 0));
@@ -226,7 +218,7 @@ base_extent_bump_alloc_post(tsdn_t *tsdn, base_t *base, extent_t *extent,
 		assert(base->allocated <= base->resident);
 		assert(base->resident <= base->mapped);
 		if (metadata_thp_madvise() && (opt_metadata_thp ==
-		    metadata_thp_always || base_auto_thp_triggered(base, false))) {
+		    metadata_thp_always || base->auto_thp_switched)) {
 			base->n_thp += (HUGEPAGE_CEILING((uintptr_t)addr + size)
 			    - HUGEPAGE_CEILING((uintptr_t)addr - gap_size)) >>
 			    LG_HUGEPAGE;
@@ -289,10 +281,12 @@ base_block_alloc(tsdn_t *tsdn, base_t *base, extent_hooks_t *extent_hooks,
 		} else if (opt_metadata_thp == metadata_thp_auto &&
 		    base != NULL) {
 			/* base != NULL indicates this is not a new base. */
-			if (base_auto_thp_triggered(base, true)) {
+			malloc_mutex_lock(tsdn, &base->mtx);
+			base_auto_thp_switch(tsdn, base);
+			if (base->auto_thp_switched) {
 				pages_huge(addr, block_size);
 			}
-			base_auto_thp_switch(base);
+			malloc_mutex_unlock(tsdn, &base->mtx);
 		}
 	}
 
@@ -334,7 +328,7 @@ base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
 		base->mapped += block->size;
 		if (metadata_thp_madvise() &&
 		    !(opt_metadata_thp == metadata_thp_auto
-		      && !base_auto_thp_triggered(base, false))) {
+		      && !base->auto_thp_switched)) {
 			assert(base->n_thp > 0);
 			base->n_thp += HUGEPAGE_CEILING(sizeof(base_block_t)) >>
 			    LG_HUGEPAGE;
@@ -376,6 +370,7 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	base->pind_last = pind_last;
 	base->extent_sn_next = extent_sn_next;
 	base->blocks = block;
+	base->auto_thp_switched = false;
 	for (szind_t i = 0; i < NSIZES; i++) {
 		extent_heap_new(&base->avail[i]);
 	}

From d6feed6e6631d00806607cfe16a796e337752044 Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Fri, 13 Oct 2017 10:27:13 -0700
Subject: [PATCH 1018/2608] Use tsd offset_state instead of atomic

While working on #852, I noticed the prng state is atomic.  This is the only
atomic use of prng in all of jemalloc.  Instead, use a threadlocal prng
state if possible to avoid unnecessary cache line contention.
---
 include/jemalloc/internal/extent_inlines.h | 13 ++++++++++---
 include/jemalloc/internal/tsd.h            |  2 ++
 src/tsd.c                                  | 10 ++++++++++
 3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index 610072eb..9f5c5cd2 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -196,9 +196,16 @@ extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment) {
 	if (alignment < PAGE) {
 		unsigned lg_range = LG_PAGE -
 		    lg_floor(CACHELINE_CEILING(alignment));
-		size_t r =
-		    prng_lg_range_zu(&extent_arena_get(extent)->offset_state,
-		    lg_range, true);
+		size_t r;
+		if (!tsdn_null(tsdn)) {
+			tsd_t *tsd = tsdn_tsd(tsdn);
+			r = (size_t)prng_lg_range_u64(
+			    tsd_offset_statep_get(tsd), lg_range);
+		} else {
+			r = prng_lg_range_zu(
+			    &extent_arena_get(extent)->offset_state,
+			    lg_range, true);
+		}
 		uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE -
 		    lg_range);
 		extent->e_addr = (void *)((uintptr_t)extent->e_addr +
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 155a2ec6..0b9841aa 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -65,6 +65,7 @@ typedef void (*test_callback_t)(int *);
     O(arenas_tdata_bypass,	bool,			bool)		\
     O(reentrancy_level,		int8_t,			int8_t)		\
     O(narenas_tdata,		uint32_t,		uint32_t)	\
+    O(offset_state,		uint64_t,		uint64_t)	\
     O(thread_allocated,		uint64_t,		uint64_t)	\
     O(thread_deallocated,	uint64_t,		uint64_t)	\
     O(prof_tdata,		prof_tdata_t *,		prof_tdata_t *)	\
@@ -84,6 +85,7 @@ typedef void (*test_callback_t)(int *);
     0,									\
     0,									\
     0,									\
+    0,									\
     NULL,								\
     RTREE_CTX_ZERO_INITIALIZER,						\
     NULL,								\
diff --git a/src/tsd.c b/src/tsd.c
index f968992f..c1430682 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -71,6 +71,16 @@ tsd_data_init(tsd_t *tsd) {
 	 */
 	rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd));
 
+	/*
+	 * A nondeterministic seed based on the address of tsd reduces
+	 * the likelihood of lockstep non-uniform cache index
+	 * utilization among identical concurrent processes, but at the
+	 * cost of test repeatability.  For debug builds, instead use a
+	 * deterministic seed.
+	 */
+	*tsd_offset_statep_get(tsd) = config_debug ? 0 :
+	    (uint64_t)(uintptr_t)tsd;
+
 	return tsd_tcache_enabled_data_init(tsd);
 }
 

From 282a3faa1784783e2e2cb3698183927b3927b950 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 13 Nov 2017 11:41:53 -0800
Subject: [PATCH 1019/2608] Use extent_heap_first for best fit.

extent_heap_any makes the layout less predictable and as a result incurs more
fragmentation.
---
 src/extent.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/extent.c b/src/extent.c
index c8a30907..466e0b2a 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -370,7 +370,7 @@ extents_best_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 	    (size_t)pind);
 	if (i < NPSIZES+1) {
 		assert(!extent_heap_empty(&extents->heaps[i]));
-		extent_t *extent = extent_heap_any(&extents->heaps[i]);
+		extent_t *extent = extent_heap_first(&extents->heaps[i]);
 		assert(extent_size_get(extent) >= size);
 		return extent;
 	}

From fac706836ffda46759914508b918e8b54c8020c8 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 9 Nov 2017 13:51:39 -0800
Subject: [PATCH 1020/2608] Add opt.lg_extent_max_active_fit

When allocating from dirty extents (which we always prefer if available), large
active extents can get split even if the new allocation is much smaller, in
which case the introduced fragmentation causes high long term damage.  This new
option controls the threshold to reuse and split an existing active extent.  We
avoid using a large extent for much smaller sizes, in order to reduce
fragmentation.  In some workload, adding the threshold improves virtual memory
usage by >10x.
---
 doc/jemalloc.xml.in                        | 16 ++++++++++++++++
 include/jemalloc/internal/extent_externs.h |  8 +++++---
 include/jemalloc/internal/extent_types.h   |  6 ++++++
 src/ctl.c                                  |  4 ++++
 src/extent.c                               |  9 +++++++++
 src/jemalloc.c                             |  3 +++
 test/unit/mallctl.c                        |  1 +
 7 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 895b2d4d..3f9ba201 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1069,6 +1069,22 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         for related dynamic control options.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.lg_extent_max_active_fit">
+        <term>
+          <mallctl>opt.lg_extent_max_active_fit</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>When reusing dirty extents, this determines the (log
+        base 2 of the) maximum ratio between the size of the active extent
+        selected (to split off from) and the size of the requested allocation.
+        This prevents the splitting of large active extents for smaller
+        allocations, which can reduce fragmentation over the long run
+        (especially for non-active extents).  Lower value may reduce
+        fragmentation, at the cost of extra active extents.  The default value
+        is 6, which gives a maximum ratio of 64 (2^6).</para></listitem>
+      </varlistentry>
+
       <varlistentry id="opt.stats_print">
         <term>
           <mallctl>opt.stats_print</mallctl>
diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index 132d8903..a76d4e4a 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -6,9 +6,11 @@
 #include "jemalloc/internal/ph.h"
 #include "jemalloc/internal/rtree.h"
 
-extern rtree_t			extents_rtree;
-extern const extent_hooks_t	extent_hooks_default;
-extern mutex_pool_t		extent_mutex_pool;
+extern size_t opt_lg_extent_max_active_fit;
+
+extern rtree_t extents_rtree;
+extern const extent_hooks_t extent_hooks_default;
+extern mutex_pool_t extent_mutex_pool;
 
 extent_t *extent_alloc(tsdn_t *tsdn, arena_t *arena);
 void extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
diff --git a/include/jemalloc/internal/extent_types.h b/include/jemalloc/internal/extent_types.h
index 7efcd3a4..c0561d99 100644
--- a/include/jemalloc/internal/extent_types.h
+++ b/include/jemalloc/internal/extent_types.h
@@ -8,4 +8,10 @@ typedef struct extents_s extents_t;
 
 #define EXTENT_GROW_MAX_PIND (NPSIZES - 1)
 
+/*
+ * When reuse (and split) an active extent, (1U << opt_lg_extent_max_active_fit)
+ * is the max ratio between the size of the active extent and the new extent.
+ */
+#define LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT 6
+
 #endif /* JEMALLOC_INTERNAL_EXTENT_TYPES_H */
diff --git a/src/ctl.c b/src/ctl.c
index 11cd68db..1fdb772d 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -95,6 +95,7 @@ CTL_PROTO(opt_zero)
 CTL_PROTO(opt_utrace)
 CTL_PROTO(opt_xmalloc)
 CTL_PROTO(opt_tcache)
+CTL_PROTO(opt_lg_extent_max_active_fit)
 CTL_PROTO(opt_lg_tcache_max)
 CTL_PROTO(opt_prof)
 CTL_PROTO(opt_prof_prefix)
@@ -293,6 +294,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("utrace"),	CTL(opt_utrace)},
 	{NAME("xmalloc"),	CTL(opt_xmalloc)},
 	{NAME("tcache"),	CTL(opt_tcache)},
+	{NAME("lg_extent_max_active_fit"), CTL(opt_lg_extent_max_active_fit)},
 	{NAME("lg_tcache_max"),	CTL(opt_lg_tcache_max)},
 	{NAME("prof"),		CTL(opt_prof)},
 	{NAME("prof_prefix"),	CTL(opt_prof_prefix)},
@@ -1597,6 +1599,8 @@ CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool)
 CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool)
 CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool)
 CTL_RO_NL_GEN(opt_tcache, opt_tcache, bool)
+CTL_RO_NL_GEN(opt_lg_extent_max_active_fit, opt_lg_extent_max_active_fit,
+    size_t)
 CTL_RO_NL_GEN(opt_lg_tcache_max, opt_lg_tcache_max, ssize_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *)
diff --git a/src/extent.c b/src/extent.c
index 466e0b2a..548a93e2 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -17,6 +17,8 @@ rtree_t		extents_rtree;
 /* Keyed by the address of the extent_t being protected. */
 mutex_pool_t	extent_mutex_pool;
 
+size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT;
+
 static const bitmap_info_t extents_bitmap_info =
     BITMAP_INFO_INITIALIZER(NPSIZES+1);
 
@@ -369,6 +371,13 @@ extents_best_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 	pszind_t i = (pszind_t)bitmap_ffu(extents->bitmap, &extents_bitmap_info,
 	    (size_t)pind);
 	if (i < NPSIZES+1) {
+		/*
+		 * In order to reduce fragmentation, avoid reusing and splitting
+		 * large extents for much smaller sizes.
+		 */
+		if ((sz_pind2sz(i) >> opt_lg_extent_max_active_fit) > size) {
+			return NULL;
+		}
 		assert(!extent_heap_empty(&extents->heaps[i]));
 		extent_t *extent = extent_heap_first(&extents->heaps[i]);
 		assert(extent_size_get(extent) >= size);
diff --git a/src/jemalloc.c b/src/jemalloc.c
index f29fc7da..f4fd805e 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1146,6 +1146,9 @@ malloc_conf_init(void) {
 				CONF_HANDLE_BOOL(opt_xmalloc, "xmalloc")
 			}
 			CONF_HANDLE_BOOL(opt_tcache, "tcache")
+			CONF_HANDLE_SIZE_T(opt_lg_extent_max_active_fit,
+			    "lg_extent_max_active_fit", 0,
+			    (sizeof(size_t) << 3), yes, yes, false)
 			CONF_HANDLE_SSIZE_T(opt_lg_tcache_max, "lg_tcache_max",
 			    -1, (sizeof(size_t) << 3) - 1)
 			if (strncmp("percpu_arena", k, klen) == 0) {
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 94f801e3..4cfd981a 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -172,6 +172,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, utrace, utrace);
 	TEST_MALLCTL_OPT(bool, xmalloc, xmalloc);
 	TEST_MALLCTL_OPT(bool, tcache, always);
+	TEST_MALLCTL_OPT(size_t, lg_extent_max_active_fit, always);
 	TEST_MALLCTL_OPT(size_t, lg_tcache_max, always);
 	TEST_MALLCTL_OPT(bool, prof, prof);
 	TEST_MALLCTL_OPT(const char *, prof_prefix, prof);

From eb1b08daaea57d16ce720d97847d94cee2f867cc Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 14 Nov 2017 16:09:31 -0800
Subject: [PATCH 1021/2608] Fix an extent coalesce bug.

When coalescing, we should take both extents off the LRU list; otherwise decay
can grab the existing outer extent through extents_evict.
---
 include/jemalloc/internal/extent_inlines.h |  5 +++++
 src/extent.c                               | 20 +++++++++++++-------
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index 9f5c5cd2..9b8ddc27 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -355,6 +355,11 @@ extent_list_append(extent_list_t *list, extent_t *extent) {
 	ql_tail_insert(list, extent, ql_link);
 }
 
+static inline void
+extent_list_prepend(extent_list_t *list, extent_t *extent) {
+	ql_head_insert(list, extent, ql_link);
+}
+
 static inline void
 extent_list_replace(extent_list_t *list, extent_t *to_remove,
     extent_t *to_insert) {
diff --git a/src/extent.c b/src/extent.c
index 548a93e2..8b00ec94 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1458,13 +1458,12 @@ extent_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
     bool growing_retained) {
 	assert(extent_can_coalesce(arena, extents, inner, outer));
 
-	if (forward && extents->delay_coalesce) {
+	if (extents->delay_coalesce) {
 		/*
-		 * The extent that remains after coalescing must occupy the
-		 * outer extent's position in the LRU.  For forward coalescing,
-		 * swap the inner extent into the LRU.
+		 * Remove outer from the LRU list so that it won't be show up in
+		 * decay through extents_evict.
 		 */
-		extent_list_replace(&extents->lru, outer, inner);
+		extent_list_remove(&extents->lru, outer);
 	}
 	extent_activate_locked(tsdn, arena, extents, outer,
 	    extents->delay_coalesce);
@@ -1474,9 +1473,16 @@ extent_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	    forward ? inner : outer, forward ? outer : inner, growing_retained);
 	malloc_mutex_lock(tsdn, &extents->mtx);
 
+	if (!err && extents->delay_coalesce) {
+		if (forward) {
+			extent_list_prepend(&extents->lru, inner);
+		} else {
+			extent_list_prepend(&extents->lru, outer);
+		}
+	}
 	if (err) {
-		if (forward && extents->delay_coalesce) {
-			extent_list_replace(&extents->lru, inner, outer);
+		if (extents->delay_coalesce) {
+			extent_list_prepend(&extents->lru, outer);
 		}
 		extent_deactivate_locked(tsdn, arena, extents, outer,
 		    extents->delay_coalesce);

From 3e64dae802b9f7cd4f860b0d29126cd727d5166b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 15 Nov 2017 14:48:55 -0800
Subject: [PATCH 1022/2608] Eagerly coalesce large extents.

Coalescing is a small price to pay for large allocations since they happen less
frequently.  This reduces fragmentation while also potentially improving
locality.
---
 src/extent.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/extent.c b/src/extent.c
index 8b00ec94..23b64017 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1586,8 +1586,22 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	if (!extents->delay_coalesce) {
 		extent = extent_try_coalesce(tsdn, arena, r_extent_hooks,
 		    rtree_ctx, extents, extent, NULL, growing_retained);
+	} else if (extent_size_get(extent) >= LARGE_MINCLASS) {
+		/* Always coalesce large extents eagerly. */
+		bool coalesced;
+		size_t prev_size;
+		do {
+			prev_size = extent_size_get(extent);
+			assert(extent_state_get(extent) == extent_state_active);
+			extent = extent_try_coalesce(tsdn, arena,
+			    r_extent_hooks, rtree_ctx, extents, extent,
+			    &coalesced, growing_retained);
+			if (coalesced) {
+				extent_list_remove(&extents->lru, extent);
+			}
+		} while (coalesced &&
+		    extent_size_get(extent) >= prev_size + LARGE_MINCLASS);
 	}
-
 	extent_deactivate_locked(tsdn, arena, extents, extent, false);
 
 	malloc_mutex_unlock(tsdn, &extents->mtx);

From e475d03752d53e198143fdf58e7d0e2e14e5f1a2 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 16 Nov 2017 14:27:23 -0800
Subject: [PATCH 1023/2608] Avoid setting zero and commit if split fails in
 extent_recycle.

---
 src/extent.c | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index 23b64017..7c7da29a 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -791,7 +791,7 @@ static extent_t *
 extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    bool *zero, bool *commit, bool growing_retained) {
+    bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	assert(alignment > 0);
@@ -849,13 +849,6 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
 	extent_activate_locked(tsdn, arena, extents, extent, false);
 	malloc_mutex_unlock(tsdn, &extents->mtx);
 
-	if (extent_zeroed_get(extent)) {
-		*zero = true;
-	}
-	if (extent_committed_get(extent)) {
-		*commit = true;
-	}
-
 	return extent;
 }
 
@@ -1021,16 +1014,12 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	bool committed = false;
 	extent_t *extent = extent_recycle_extract(tsdn, arena, r_extent_hooks,
-	    rtree_ctx, extents, new_addr, size, pad, alignment, slab, zero,
-	    &committed, growing_retained);
+	    rtree_ctx, extents, new_addr, size, pad, alignment, slab,
+	    growing_retained);
 	if (extent == NULL) {
 		return NULL;
 	}
-	if (committed) {
-		*commit = true;
-	}
 
 	extent = extent_recycle_split(tsdn, arena, r_extent_hooks, rtree_ctx,
 	    extents, new_addr, size, pad, alignment, slab, szind, extent,
@@ -1049,6 +1038,13 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		extent_zeroed_set(extent, true);
 	}
 
+	if (extent_committed_get(extent)) {
+		*commit = true;
+	}
+	if (extent_zeroed_get(extent)) {
+		*zero = true;
+	}
+
 	if (pad != 0) {
 		extent_addr_randomize(tsdn, extent, alignment);
 	}

From 26a8f82c484eada4188e56daad32ed6a16b4b585 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Sun, 19 Nov 2017 17:01:53 -0800
Subject: [PATCH 1024/2608] Add missing deregister before extents_leak.

This fixes an regression introduced by 211b1f3 (refactor extent split).
---
 src/extent.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/extent.c b/src/extent.c
index 7c7da29a..ee50aff8 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -988,6 +988,7 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
 			extent_deregister(tsdn, to_salvage);
 		}
 		if (to_leak != NULL) {
+			extent_deregister(tsdn, to_leak);
 			extents_leak(tsdn, arena, r_extent_hooks, extents,
 			    to_leak, growing_retained);
 		}

From 6e841f618a5ff99001a9578e9ff73602e7a94620 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 28 Nov 2017 12:21:58 -0800
Subject: [PATCH 1025/2608] Add more tests for extent hooks failure paths.

---
 src/extent.c                     |  3 +++
 test/include/test/extent_hooks.h |  2 ++
 test/integration/extent.c        | 23 ++++++++++++++++++++---
 3 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index ee50aff8..7e10b7f5 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -988,9 +988,12 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
 			extent_deregister(tsdn, to_salvage);
 		}
 		if (to_leak != NULL) {
+			void *leak = extent_base_get(to_leak);
 			extent_deregister(tsdn, to_leak);
 			extents_leak(tsdn, arena, r_extent_hooks, extents,
 			    to_leak, growing_retained);
+			assert(extent_lock_from_addr(tsdn, rtree_ctx, leak)
+			    == NULL);
 		}
 		return NULL;
 	}
diff --git a/test/include/test/extent_hooks.h b/test/include/test/extent_hooks.h
index ea012857..1f062015 100644
--- a/test/include/test/extent_hooks.h
+++ b/test/include/test/extent_hooks.h
@@ -266,6 +266,8 @@ extent_merge_hook(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
 	    "extent_hooks should be same as pointer used to set hooks");
 	assert_ptr_eq(extent_hooks->merge, extent_merge_hook,
 	    "Wrong hook function");
+	assert_ptr_eq((void *)((uintptr_t)addr_a + size_a), addr_b,
+	    "Extents not mergeable");
 	called_merge = true;
 	if (!try_merge) {
 		return true;
diff --git a/test/integration/extent.c b/test/integration/extent.c
index 1dcf2176..7100b6af 100644
--- a/test/integration/extent.c
+++ b/test/integration/extent.c
@@ -98,7 +98,8 @@ test_extent_body(unsigned arena_ind) {
 	dallocx(p, flags);
 }
 
-TEST_BEGIN(test_extent_manual_hook) {
+static void
+test_manual_hook_body(void) {
 	unsigned arena_ind;
 	size_t old_size, new_size, sz;
 	size_t hooks_mib[3];
@@ -139,8 +140,9 @@ TEST_BEGIN(test_extent_manual_hook) {
 	assert_ptr_ne(old_hooks->merge, extent_merge_hook,
 	    "Unexpected extent_hooks error");
 
-	test_skip_if(check_background_thread_enabled());
-	test_extent_body(arena_ind);
+	if (check_background_thread_enabled()) {
+		test_extent_body(arena_ind);
+	}
 
 	/* Restore extent hooks. */
 	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, NULL, NULL,
@@ -165,6 +167,21 @@ TEST_BEGIN(test_extent_manual_hook) {
 	assert_ptr_eq(old_hooks->merge, default_hooks->merge,
 	    "Unexpected extent_hooks error");
 }
+
+TEST_BEGIN(test_extent_manual_hook) {
+	test_manual_hook_body();
+
+	/* Test failure paths. */
+	try_split = false;
+	test_manual_hook_body();
+	try_merge = false;
+	test_manual_hook_body();
+	try_purge_lazy = false;
+	try_purge_forced = false;
+	test_manual_hook_body();
+
+	try_split = try_merge = try_purge_lazy = try_purge_forced = true;
+}
 TEST_END
 
 TEST_BEGIN(test_extent_auto_hook) {

From b5ab3f91ea60b16819563b09aa01a0d339aa40b4 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 8 Dec 2017 13:43:21 -0800
Subject: [PATCH 1026/2608] Fix test/integration/extent.

Should only run the hook tests without background threads.  This was introduced
in 6e841f6.
---
 test/integration/extent.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/integration/extent.c b/test/integration/extent.c
index 7100b6af..c2dc1cb8 100644
--- a/test/integration/extent.c
+++ b/test/integration/extent.c
@@ -140,7 +140,7 @@ test_manual_hook_body(void) {
 	assert_ptr_ne(old_hooks->merge, extent_merge_hook,
 	    "Unexpected extent_hooks error");
 
-	if (check_background_thread_enabled()) {
+	if (!check_background_thread_enabled()) {
 		test_extent_body(arena_ind);
 	}
 

From 955b1d9cc574647d3d3dfb474b47b51b3a81453d Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 8 Dec 2017 15:06:08 -0800
Subject: [PATCH 1027/2608] Fix extent deregister on the leak path.

On leak path we should not adjust gdump when deregister.
---
 src/extent.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index 7e10b7f5..c531da24 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -761,7 +761,7 @@ extent_interior_deregister(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
  * Removes all pointers to the given extent from the global rtree.
  */
 static void
-extent_deregister(tsdn_t *tsdn, extent_t *extent) {
+extent_deregister_impl(tsdn_t *tsdn, extent_t *extent, bool gdump) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	rtree_leaf_elm_t *elm_a, *elm_b;
@@ -778,11 +778,21 @@ extent_deregister(tsdn_t *tsdn, extent_t *extent) {
 
 	extent_unlock(tsdn, extent);
 
-	if (config_prof) {
+	if (config_prof && gdump) {
 		extent_gdump_sub(tsdn, extent);
 	}
 }
 
+static void
+extent_deregister(tsdn_t *tsdn, extent_t *extent) {
+	extent_deregister_impl(tsdn, extent, true);
+}
+
+static void
+extent_deregister_no_gdump_sub(tsdn_t *tsdn, extent_t *extent) {
+	extent_deregister_impl(tsdn, extent, false);
+}
+
 /*
  * Tries to find and remove an extent from extents that can be used for the
  * given allocation request.
@@ -989,7 +999,7 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
 		}
 		if (to_leak != NULL) {
 			void *leak = extent_base_get(to_leak);
-			extent_deregister(tsdn, to_leak);
+			extent_deregister_no_gdump_sub(tsdn, to_leak);
 			extents_leak(tsdn, arena, r_extent_hooks, extents,
 			    to_leak, growing_retained);
 			assert(extent_lock_from_addr(tsdn, rtree_ctx, leak)
@@ -1267,7 +1277,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 		 */
 		assert(result == extent_split_interior_error);
 		if (to_leak != NULL) {
-			extent_deregister(tsdn, to_leak);
+			extent_deregister_no_gdump_sub(tsdn, to_leak);
 			extents_leak(tsdn, arena, r_extent_hooks,
 			    &arena->extents_retained, to_leak, true);
 			goto label_err;

From 749caf14ae73a9ab1c48e538a8af09addbb35ee7 Mon Sep 17 00:00:00 2001
From: Ed Schouten <ed@nuxi.nl>
Date: Sun, 3 Dec 2017 21:45:08 +0100
Subject: [PATCH 1028/2608] Also use __riscv to detect builds for RISC-V CPUs.

According to the RISC-V toolchain conventions, __riscv__ is the old
spelling of this definition. __riscv should be used going forward.

https://github.com/riscv/riscv-toolchain-conventions#cc-preprocessor-definitions
---
 include/jemalloc/internal/jemalloc_internal_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_types.h b/include/jemalloc/internal/jemalloc_internal_types.h
index 6b987d6f..1b750b12 100644
--- a/include/jemalloc/internal/jemalloc_internal_types.h
+++ b/include/jemalloc/internal/jemalloc_internal_types.h
@@ -94,7 +94,7 @@ typedef int malloc_cpuid_t;
 #  ifdef __powerpc__
 #    define LG_QUANTUM		4
 #  endif
-#  ifdef __riscv__
+#  if defined(__riscv) || defined(__riscv__)
 #    define LG_QUANTUM		4
 #  endif
 #  ifdef __s390__

From 22460cbebd2b7343319d9a8425f593c92facacab Mon Sep 17 00:00:00 2001
From: nicolov <nicolov@users.noreply.github.com>
Date: Sun, 10 Dec 2017 23:36:32 -0800
Subject: [PATCH 1029/2608] jemalloc_mangle.sh: set sh in strict mode

---
 include/jemalloc/jemalloc_mangle.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/jemalloc_mangle.sh b/include/jemalloc/jemalloc_mangle.sh
index df328b78..c675bb46 100755
--- a/include/jemalloc/jemalloc_mangle.sh
+++ b/include/jemalloc/jemalloc_mangle.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/sh -eu
 
 public_symbols_txt=$1
 symbol_prefix=$2

From 5e0332890f8e553e148b8c4b0130d84037339e6a Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 14 Dec 2017 11:14:08 -0800
Subject: [PATCH 1030/2608] Output opt.lg_extent_max_active_fit in stats.

---
 src/stats.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/stats.c b/src/stats.c
index 0847f392..33e44269 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -822,6 +822,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	OPT_WRITE_BOOL_MUTABLE(background_thread, background_thread, ",")
 	OPT_WRITE_SSIZE_T_MUTABLE(dirty_decay_ms, arenas.dirty_decay_ms, ",")
 	OPT_WRITE_SSIZE_T_MUTABLE(muzzy_decay_ms, arenas.muzzy_decay_ms, ",")
+	OPT_WRITE_UNSIGNED(lg_extent_max_active_fit, ",")
 	OPT_WRITE_CHAR_P(junk, ",")
 	OPT_WRITE_BOOL(zero, ",")
 	OPT_WRITE_BOOL(utrace, ",")
@@ -856,7 +857,9 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 
 #undef OPT_WRITE_BOOL
 #undef OPT_WRITE_BOOL_MUTABLE
+#undef OPT_WRITE_UNSIGNED
 #undef OPT_WRITE_SSIZE_T
+#undef OPT_WRITE_SSIZE_T_MUTABLE
 #undef OPT_WRITE_CHAR_P
 
 	/* arenas. */

From f70785de91ee14e8034f9bd64bf6590199c89e65 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 11 Dec 2017 14:04:07 -0800
Subject: [PATCH 1031/2608] Skip test/unit/pack when profiling is enabled.

The test assumes no sampled allocations.
---
 test/unit/pack.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/test/unit/pack.c b/test/unit/pack.c
index edfc548f..fc188b00 100644
--- a/test/unit/pack.c
+++ b/test/unit/pack.c
@@ -88,6 +88,12 @@ arena_reset_mallctl(unsigned arena_ind) {
 }
 
 TEST_BEGIN(test_pack) {
+	bool prof_enabled;
+	size_t sz = sizeof(prof_enabled);
+	if (mallctl("opt.prof", (void *)&prof_enabled, &sz, NULL, 0) == 0) {
+		test_skip_if(prof_enabled);
+	}
+
 	unsigned arena_ind = arenas_create_mallctl();
 	size_t nregs_per_run = nregs_per_run_compute();
 	size_t nregs = nregs_per_run * NSLABS;

From 740bdd68b1d4b9c39c68432e06deb70ad4da3210 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 8 Dec 2017 12:13:50 -0800
Subject: [PATCH 1032/2608] Over purge by 1 extent always.

When purging, large allocations are usually the ones that cross the npages_limit
threshold, simply because they are "large".  This means we often leave the large
extent around for a while, which has the downsides of: 1) high RSS and 2) more
chance of them getting fragmented.  Given that they are not likely to be reused
very soon (LRU), let's over purge by 1 extent (which is often large and not
reused frequently).
---
 include/jemalloc/internal/extent_externs.h | 3 +--
 src/arena.c                                | 4 ++--
 src/extent.c                               | 6 ++----
 3 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index a76d4e4a..b8a4d026 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -38,8 +38,7 @@ extent_t *extents_alloc(tsdn_t *tsdn, arena_t *arena,
 void extents_dalloc(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extents_t *extents, extent_t *extent);
 extent_t *extents_evict(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extents_t *extents, size_t npages_min,
-    size_t npages_max);
+    extent_hooks_t **r_extent_hooks, extents_t *extents, size_t npages_min);
 void extents_prefork(tsdn_t *tsdn, extents_t *extents);
 void extents_postfork_parent(tsdn_t *tsdn, extents_t *extents);
 void extents_postfork_child(tsdn_t *tsdn, extents_t *extents);
diff --git a/src/arena.c b/src/arena.c
index e2462bf7..a28dbfb0 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -912,7 +912,7 @@ arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
 	extent_t *extent;
 	while (nstashed < npages_decay_max &&
 	    (extent = extents_evict(tsdn, arena, r_extent_hooks, extents,
-	    npages_limit, npages_decay_max - nstashed)) != NULL) {
+	    npages_limit)) != NULL) {
 		extent_list_append(decay_extents, extent);
 		nstashed += extent_size_get(extent) >> LG_PAGE;
 	}
@@ -1226,7 +1226,7 @@ arena_destroy_retained(tsdn_t *tsdn, arena_t *arena) {
 	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
 	extent_t *extent;
 	while ((extent = extents_evict(tsdn, arena, &extent_hooks,
-	    &arena->extents_retained, 0, SIZE_MAX)) != NULL) {
+	    &arena->extents_retained, 0)) != NULL) {
 		extent_destroy_wrapper(tsdn, arena, &extent_hooks, extent);
 	}
 }
diff --git a/src/extent.c b/src/extent.c
index c531da24..bca703fc 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -481,7 +481,7 @@ extents_dalloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 
 extent_t *
 extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    extents_t *extents, size_t npages_min, size_t npages_max) {
+    extents_t *extents, size_t npages_min) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
@@ -499,11 +499,9 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 			goto label_return;
 		}
 		/* Check the eviction limit. */
-		size_t npages = extent_size_get(extent) >> LG_PAGE;
 		size_t extents_npages = atomic_load_zu(&extents->npages,
 		    ATOMIC_RELAXED);
-		if (extents_npages - npages < npages_min ||
-		    npages > npages_max) {
+		if (extents_npages <= npages_min) {
 			extent = NULL;
 			goto label_return;
 		}

From 4bf4a1c4ea418ba490d35d23aee0f535e96ddd23 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Sun, 1 Oct 2017 17:22:06 -0700
Subject: [PATCH 1033/2608] Pull out arena_bin_info_t and arena_bin_t into
 their own file.

In the process, kill arena_bin_index, which is unused.  To follow are several
diffs continuing this separation.
---
 Makefile.in                                 |  1 +
 include/jemalloc/internal/arena_externs.h   |  7 +-
 include/jemalloc/internal/arena_inlines_b.h |  7 --
 include/jemalloc/internal/arena_structs_b.h | 65 +-------------
 include/jemalloc/internal/arena_types.h     |  2 -
 include/jemalloc/internal/bin.h             | 81 +++++++++++++++++
 include/jemalloc/internal/extent_structs.h  |  2 +-
 include/jemalloc/internal/tcache_inlines.h  | 10 +--
 src/arena.c                                 | 96 +++++++++------------
 src/bin.c                                   | 21 +++++
 src/ctl.c                                   |  8 +-
 src/tcache.c                                | 12 +--
 test/unit/junk.c                            |  2 +-
 test/unit/mallctl.c                         |  6 +-
 test/unit/slab.c                            |  2 +-
 test/unit/stats.c                           |  2 +-
 16 files changed, 169 insertions(+), 155 deletions(-)
 create mode 100644 include/jemalloc/internal/bin.h
 create mode 100644 src/bin.c

diff --git a/Makefile.in b/Makefile.in
index 0698633b..2f0b3b24 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -93,6 +93,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/arena.c \
 	$(srcroot)src/background_thread.c \
 	$(srcroot)src/base.c \
+	$(srcroot)src/bin.c \
 	$(srcroot)src/bitmap.c \
 	$(srcroot)src/ckh.c \
 	$(srcroot)src/ctl.c \
diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 5a0e3add..77a2b541 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_EXTERNS_H
 #define JEMALLOC_INTERNAL_ARENA_EXTERNS_H
 
+#include "jemalloc/internal/bin.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/pages.h"
 #include "jemalloc/internal/size_classes.h"
@@ -9,8 +10,6 @@
 extern ssize_t opt_dirty_decay_ms;
 extern ssize_t opt_muzzy_decay_ms;
 
-extern const arena_bin_info_t arena_bin_info[NBINS];
-
 extern percpu_arena_mode_t opt_percpu_arena;
 extern const char *percpu_arena_mode_names[];
 
@@ -51,10 +50,10 @@ void arena_reset(tsd_t *tsd, arena_t *arena);
 void arena_destroy(tsd_t *tsd, arena_t *arena);
 void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes);
-void arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info,
+void arena_alloc_junk_small(void *ptr, const bin_info_t *bin_info,
     bool zero);
 
-typedef void (arena_dalloc_junk_small_t)(void *, const arena_bin_info_t *);
+typedef void (arena_dalloc_junk_small_t)(void *, const bin_info_t *);
 extern arena_dalloc_junk_small_t *JET_MUTABLE arena_dalloc_junk_small;
 
 void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 003abe11..7b10d9ef 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -8,13 +8,6 @@
 #include "jemalloc/internal/sz.h"
 #include "jemalloc/internal/ticker.h"
 
-static inline szind_t
-arena_bin_index(arena_t *arena, arena_bin_t *bin) {
-	szind_t binind = (szind_t)(bin - arena->bins);
-	assert(binind < NBINS);
-	return binind;
-}
-
 JEMALLOC_ALWAYS_INLINE prof_tctx_t *
 arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) {
 	cassert(config_prof);
diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index f74ea97d..d843b09b 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H
 
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/bin.h"
 #include "jemalloc/internal/bitmap.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
@@ -13,42 +14,6 @@
 #include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ticker.h"
 
-/*
- * Read-only information associated with each element of arena_t's bins array
- * is stored separately, partly to reduce memory usage (only one copy, rather
- * than one per arena), but mainly to avoid false cacheline sharing.
- *
- * Each slab has the following layout:
- *
- *   /--------------------\
- *   | region 0           |
- *   |--------------------|
- *   | region 1           |
- *   |--------------------|
- *   | ...                |
- *   | ...                |
- *   | ...                |
- *   |--------------------|
- *   | region nregs-1     |
- *   \--------------------/
- */
-struct arena_bin_info_s {
-	/* Size of regions in a slab for this bin's size class. */
-	size_t			reg_size;
-
-	/* Total size of a slab for this bin's size class. */
-	size_t			slab_size;
-
-	/* Total number of regions in a slab for this bin's size class. */
-	uint32_t		nregs;
-
-	/*
-	 * Metadata used to manipulate bitmaps for slabs associated with this
-	 * bin.
-	 */
-	bitmap_info_t		bitmap_info;
-};
-
 struct arena_decay_s {
 	/* Synchronizes all non-atomic fields. */
 	malloc_mutex_t		mtx;
@@ -109,32 +74,6 @@ struct arena_decay_s {
 	uint64_t		ceil_npages;
 };
 
-struct arena_bin_s {
-	/* All operations on arena_bin_t fields require lock ownership. */
-	malloc_mutex_t		lock;
-
-	/*
-	 * Current slab being used to service allocations of this bin's size
-	 * class.  slabcur is independent of slabs_{nonfull,full}; whenever
-	 * slabcur is reassigned, the previous slab must be deallocated or
-	 * inserted into slabs_{nonfull,full}.
-	 */
-	extent_t		*slabcur;
-
-	/*
-	 * Heap of non-full slabs.  This heap is used to assure that new
-	 * allocations come from the non-full slab that is oldest/lowest in
-	 * memory.
-	 */
-	extent_heap_t		slabs_nonfull;
-
-	/* List used to track full slabs. */
-	extent_list_t		slabs_full;
-
-	/* Bin statistics. */
-	malloc_bin_stats_t	stats;
-};
-
 struct arena_s {
 	/*
 	 * Number of threads currently assigned to this arena.  Each thread has
@@ -264,7 +203,7 @@ struct arena_s {
 	 *
 	 * Synchronization: internal.
 	 */
-	arena_bin_t		bins[NBINS];
+	bin_t			bins[NBINS];
 
 	/*
 	 * Base allocator, from which arena metadata are allocated.
diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h
index a691bd81..70001b5f 100644
--- a/include/jemalloc/internal/arena_types.h
+++ b/include/jemalloc/internal/arena_types.h
@@ -12,9 +12,7 @@
 #define DECAY_NTICKS_PER_UPDATE	1000
 
 typedef struct arena_slab_data_s arena_slab_data_t;
-typedef struct arena_bin_info_s arena_bin_info_t;
 typedef struct arena_decay_s arena_decay_t;
-typedef struct arena_bin_s arena_bin_t;
 typedef struct arena_s arena_t;
 typedef struct arena_tdata_s arena_tdata_t;
 typedef struct alloc_ctx_s alloc_ctx_t;
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
new file mode 100644
index 00000000..09717b14
--- /dev/null
+++ b/include/jemalloc/internal/bin.h
@@ -0,0 +1,81 @@
+#ifndef JEMALLOC_INTERNAL_BIN_H
+#define JEMALLOC_INTERNAL_BIN_H
+
+#include "jemalloc/internal/extent_types.h"
+#include "jemalloc/internal/extent_structs.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/stats.h"
+
+/*
+ * A bin contains a set of extents that are currently being used for slab
+ * allocations.
+ */
+
+/*
+ * Read-only information associated with each element of arena_t's bins array
+ * is stored separately, partly to reduce memory usage (only one copy, rather
+ * than one per arena), but mainly to avoid false cacheline sharing.
+ *
+ * Each slab has the following layout:
+ *
+ *   /--------------------\
+ *   | region 0           |
+ *   |--------------------|
+ *   | region 1           |
+ *   |--------------------|
+ *   | ...                |
+ *   | ...                |
+ *   | ...                |
+ *   |--------------------|
+ *   | region nregs-1     |
+ *   \--------------------/
+ */
+typedef struct bin_info_s bin_info_t;
+struct bin_info_s {
+	/* Size of regions in a slab for this bin's size class. */
+	size_t			reg_size;
+
+	/* Total size of a slab for this bin's size class. */
+	size_t			slab_size;
+
+	/* Total number of regions in a slab for this bin's size class. */
+	uint32_t		nregs;
+
+	/*
+	 * Metadata used to manipulate bitmaps for slabs associated with this
+	 * bin.
+	 */
+	bitmap_info_t		bitmap_info;
+};
+
+extern const bin_info_t bin_infos[NBINS];
+
+
+typedef struct bin_s bin_t;
+struct bin_s {
+	/* All operations on bin_t fields require lock ownership. */
+	malloc_mutex_t		lock;
+
+	/*
+	 * Current slab being used to service allocations of this bin's size
+	 * class.  slabcur is independent of slabs_{nonfull,full}; whenever
+	 * slabcur is reassigned, the previous slab must be deallocated or
+	 * inserted into slabs_{nonfull,full}.
+	 */
+	extent_t		*slabcur;
+
+	/*
+	 * Heap of non-full slabs.  This heap is used to assure that new
+	 * allocations come from the non-full slab that is oldest/lowest in
+	 * memory.
+	 */
+	extent_heap_t		slabs_nonfull;
+
+	/* List used to track full slabs. */
+	extent_list_t		slabs_full;
+
+	/* Bin statistics. */
+	malloc_bin_stats_t	stats;
+};
+
+#endif /* JEMALLOC_INTERNAL_BIN_H */
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index 722963b5..89b49c72 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -143,7 +143,7 @@ struct extent_s {
 
 	/*
 	 * List linkage, used by a variety of lists:
-	 * - arena_bin_t's slabs_full
+	 * - bin_t's slabs_full
 	 * - extents_t's LRU
 	 * - stashed dirty extents
 	 * - arena's large allocations
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index d1632d8f..14ab037f 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_TCACHE_INLINES_H
 #define JEMALLOC_INTERNAL_TCACHE_INLINES_H
 
+#include "jemalloc/internal/bin.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/sz.h"
@@ -76,16 +77,15 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 	if (likely(!zero)) {
 		if (slow_path && config_fill) {
 			if (unlikely(opt_junk_alloc)) {
-				arena_alloc_junk_small(ret,
-				    &arena_bin_info[binind], false);
+				arena_alloc_junk_small(ret, &bin_infos[binind],
+				    false);
 			} else if (unlikely(opt_zero)) {
 				memset(ret, 0, usize);
 			}
 		}
 	} else {
 		if (slow_path && config_fill && unlikely(opt_junk_alloc)) {
-			arena_alloc_junk_small(ret, &arena_bin_info[binind],
-			    true);
+			arena_alloc_junk_small(ret, &bin_infos[binind], true);
 		}
 		memset(ret, 0, usize);
 	}
@@ -169,7 +169,7 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SMALL_MAXCLASS);
 
 	if (slow_path && config_fill && unlikely(opt_junk_free)) {
-		arena_dalloc_junk_small(ptr, &arena_bin_info[binind]);
+		arena_dalloc_junk_small(ptr, &bin_infos[binind]);
 	}
 
 	bin = tcache_small_bin_get(tcache, binind);
diff --git a/src/arena.c b/src/arena.c
index a28dbfb0..2dcb447e 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -32,21 +32,6 @@ ssize_t opt_muzzy_decay_ms = MUZZY_DECAY_MS_DEFAULT;
 static atomic_zd_t dirty_decay_ms_default;
 static atomic_zd_t muzzy_decay_ms_default;
 
-const arena_bin_info_t arena_bin_info[NBINS] = {
-#define BIN_INFO_bin_yes(reg_size, slab_size, nregs)			\
-	{reg_size, slab_size, nregs, BITMAP_INFO_INITIALIZER(nregs)},
-#define BIN_INFO_bin_no(reg_size, slab_size, nregs)
-#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs,		\
-    lg_delta_lookup)							\
-	BIN_INFO_bin_##bin((1U<<lg_grp) + (ndelta<<lg_delta),		\
-	    (pgs << LG_PAGE), (pgs << LG_PAGE) / ((1U<<lg_grp) +	\
-	    (ndelta<<lg_delta)))
-	SIZE_CLASSES
-#undef BIN_INFO_bin_yes
-#undef BIN_INFO_bin_no
-#undef SC
-};
-
 const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
 #define STEP(step, h, x, y)			\
 		h,
@@ -66,9 +51,9 @@ static void arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena,
 static bool arena_decay_dirty(tsdn_t *tsdn, arena_t *arena,
     bool is_background_thread, bool all);
 static void arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
-    arena_bin_t *bin);
+    bin_t *bin);
 static void arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
-    arena_bin_t *bin);
+    bin_t *bin);
 
 /******************************************************************************/
 
@@ -352,7 +337,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	nstime_subtract(&astats->uptime, &arena->create_time);
 
 	for (szind_t i = 0; i < NBINS; i++) {
-		arena_bin_t *bin = &arena->bins[i];
+		bin_t *bin = &arena->bins[i];
 
 		malloc_mutex_lock(tsdn, &bin->lock);
 		malloc_mutex_prof_read(tsdn, &bstats[i].mutex_data, &bin->lock);
@@ -385,8 +370,7 @@ arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
 }
 
 static void *
-arena_slab_reg_alloc(tsdn_t *tsdn, extent_t *slab,
-    const arena_bin_info_t *bin_info) {
+arena_slab_reg_alloc(tsdn_t *tsdn, extent_t *slab, const bin_info_t *bin_info) {
 	void *ret;
 	arena_slab_data_t *slab_data = extent_slab_data_get(slab);
 	size_t regind;
@@ -413,7 +397,7 @@ arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr) {
 	assert((uintptr_t)ptr < (uintptr_t)extent_past_get(slab));
 	/* Freeing an interior pointer can cause assertion failure. */
 	assert(((uintptr_t)ptr - (uintptr_t)extent_addr_get(slab)) %
-	    (uintptr_t)arena_bin_info[binind].reg_size == 0);
+	    (uintptr_t)bin_infos[binind].reg_size == 0);
 
 	/* Avoid doing division with a variable divisor. */
 	diff = (size_t)((uintptr_t)ptr - (uintptr_t)extent_addr_get(slab));
@@ -434,7 +418,7 @@ arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr) {
 	default: not_reached();
 	}
 
-	assert(regind < arena_bin_info[binind].nregs);
+	assert(regind < bin_infos[binind].nregs);
 
 	return regind;
 }
@@ -443,7 +427,7 @@ static void
 arena_slab_reg_dalloc(tsdn_t *tsdn, extent_t *slab,
     arena_slab_data_t *slab_data, void *ptr) {
 	szind_t binind = extent_szind_get(slab);
-	const arena_bin_info_t *bin_info = &arena_bin_info[binind];
+	const bin_info_t *bin_info = &bin_infos[binind];
 	size_t regind = arena_slab_regind(slab, binind, ptr);
 
 	assert(extent_nfree_get(slab) < bin_info->nregs);
@@ -1089,18 +1073,18 @@ arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *slab) {
 }
 
 static void
-arena_bin_slabs_nonfull_insert(arena_bin_t *bin, extent_t *slab) {
+arena_bin_slabs_nonfull_insert(bin_t *bin, extent_t *slab) {
 	assert(extent_nfree_get(slab) > 0);
 	extent_heap_insert(&bin->slabs_nonfull, slab);
 }
 
 static void
-arena_bin_slabs_nonfull_remove(arena_bin_t *bin, extent_t *slab) {
+arena_bin_slabs_nonfull_remove(bin_t *bin, extent_t *slab) {
 	extent_heap_remove(&bin->slabs_nonfull, slab);
 }
 
 static extent_t *
-arena_bin_slabs_nonfull_tryget(arena_bin_t *bin) {
+arena_bin_slabs_nonfull_tryget(bin_t *bin) {
 	extent_t *slab = extent_heap_remove_first(&bin->slabs_nonfull);
 	if (slab == NULL) {
 		return NULL;
@@ -1112,7 +1096,7 @@ arena_bin_slabs_nonfull_tryget(arena_bin_t *bin) {
 }
 
 static void
-arena_bin_slabs_full_insert(arena_t *arena, arena_bin_t *bin, extent_t *slab) {
+arena_bin_slabs_full_insert(arena_t *arena, bin_t *bin, extent_t *slab) {
 	assert(extent_nfree_get(slab) == 0);
 	/*
 	 *  Tracking extents is required by arena_reset, which is not allowed
@@ -1126,7 +1110,7 @@ arena_bin_slabs_full_insert(arena_t *arena, arena_bin_t *bin, extent_t *slab) {
 }
 
 static void
-arena_bin_slabs_full_remove(arena_t *arena, arena_bin_t *bin, extent_t *slab) {
+arena_bin_slabs_full_remove(arena_t *arena, bin_t *bin, extent_t *slab) {
 	if (arena_is_auto(arena)) {
 		return;
 	}
@@ -1180,7 +1164,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 	/* Bins. */
 	for (unsigned i = 0; i < NBINS; i++) {
 		extent_t *slab;
-		arena_bin_t *bin = &arena->bins[i];
+		bin_t *bin = &arena->bins[i];
 		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 		if (bin->slabcur != NULL) {
 			slab = bin->slabcur;
@@ -1269,7 +1253,7 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
 
 static extent_t *
 arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, const arena_bin_info_t *bin_info,
+    extent_hooks_t **r_extent_hooks, const bin_info_t *bin_info,
     szind_t szind) {
 	extent_t *slab;
 	bool zero, commit;
@@ -1292,7 +1276,7 @@ arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena,
 
 static extent_t *
 arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind,
-    const arena_bin_info_t *bin_info) {
+    const bin_info_t *bin_info) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
@@ -1328,10 +1312,10 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 }
 
 static extent_t *
-arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin,
+arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
     szind_t binind) {
 	extent_t *slab;
-	const arena_bin_info_t *bin_info;
+	const bin_info_t *bin_info;
 
 	/* Look for a usable slab. */
 	slab = arena_bin_slabs_nonfull_tryget(bin);
@@ -1340,7 +1324,7 @@ arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin,
 	}
 	/* No existing slabs have any space available. */
 
-	bin_info = &arena_bin_info[binind];
+	bin_info = &bin_infos[binind];
 
 	/* Allocate a new slab. */
 	malloc_mutex_unlock(tsdn, &bin->lock);
@@ -1371,12 +1355,12 @@ arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin,
 
 /* Re-fill bin->slabcur, then call arena_slab_reg_alloc(). */
 static void *
-arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin,
+arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
     szind_t binind) {
-	const arena_bin_info_t *bin_info;
+	const bin_info_t *bin_info;
 	extent_t *slab;
 
-	bin_info = &arena_bin_info[binind];
+	bin_info = &bin_infos[binind];
 	if (!arena_is_auto(arena) && bin->slabcur != NULL) {
 		arena_bin_slabs_full_insert(arena, bin, bin->slabcur);
 		bin->slabcur = NULL;
@@ -1429,7 +1413,7 @@ void
 arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes) {
 	unsigned i, nfill;
-	arena_bin_t *bin;
+	bin_t *bin;
 
 	assert(tbin->ncached == 0);
 
@@ -1445,7 +1429,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 		if ((slab = bin->slabcur) != NULL && extent_nfree_get(slab) >
 		    0) {
 			ptr = arena_slab_reg_alloc(tsdn, slab,
-			    &arena_bin_info[binind]);
+			    &bin_infos[binind]);
 		} else {
 			ptr = arena_bin_malloc_hard(tsdn, arena, bin, binind);
 		}
@@ -1462,8 +1446,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 			break;
 		}
 		if (config_fill && unlikely(opt_junk_alloc)) {
-			arena_alloc_junk_small(ptr, &arena_bin_info[binind],
-			    true);
+			arena_alloc_junk_small(ptr, &bin_infos[binind], true);
 		}
 		/* Insert such that low regions get used first. */
 		*(tbin->avail - nfill + i) = ptr;
@@ -1481,14 +1464,14 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 }
 
 void
-arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info, bool zero) {
+arena_alloc_junk_small(void *ptr, const bin_info_t *bin_info, bool zero) {
 	if (!zero) {
 		memset(ptr, JEMALLOC_ALLOC_JUNK, bin_info->reg_size);
 	}
 }
 
 static void
-arena_dalloc_junk_small_impl(void *ptr, const arena_bin_info_t *bin_info) {
+arena_dalloc_junk_small_impl(void *ptr, const bin_info_t *bin_info) {
 	memset(ptr, JEMALLOC_FREE_JUNK, bin_info->reg_size);
 }
 arena_dalloc_junk_small_t *JET_MUTABLE arena_dalloc_junk_small =
@@ -1497,7 +1480,7 @@ arena_dalloc_junk_small_t *JET_MUTABLE arena_dalloc_junk_small =
 static void *
 arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
 	void *ret;
-	arena_bin_t *bin;
+	bin_t *bin;
 	size_t usize;
 	extent_t *slab;
 
@@ -1507,7 +1490,7 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
 
 	malloc_mutex_lock(tsdn, &bin->lock);
 	if ((slab = bin->slabcur) != NULL && extent_nfree_get(slab) > 0) {
-		ret = arena_slab_reg_alloc(tsdn, slab, &arena_bin_info[binind]);
+		ret = arena_slab_reg_alloc(tsdn, slab, &bin_infos[binind]);
 	} else {
 		ret = arena_bin_malloc_hard(tsdn, arena, bin, binind);
 	}
@@ -1531,14 +1514,14 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
 		if (config_fill) {
 			if (unlikely(opt_junk_alloc)) {
 				arena_alloc_junk_small(ret,
-				    &arena_bin_info[binind], false);
+				    &bin_infos[binind], false);
 			} else if (unlikely(opt_zero)) {
 				memset(ret, 0, usize);
 			}
 		}
 	} else {
 		if (config_fill && unlikely(opt_junk_alloc)) {
-			arena_alloc_junk_small(ret, &arena_bin_info[binind],
+			arena_alloc_junk_small(ret, &bin_infos[binind],
 			    true);
 		}
 		memset(ret, 0, usize);
@@ -1643,13 +1626,13 @@ arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 }
 
 static void
-arena_dissociate_bin_slab(arena_t *arena, extent_t *slab, arena_bin_t *bin) {
+arena_dissociate_bin_slab(arena_t *arena, extent_t *slab, bin_t *bin) {
 	/* Dissociate slab from bin. */
 	if (slab == bin->slabcur) {
 		bin->slabcur = NULL;
 	} else {
 		szind_t binind = extent_szind_get(slab);
-		const arena_bin_info_t *bin_info = &arena_bin_info[binind];
+		const bin_info_t *bin_info = &bin_infos[binind];
 
 		/*
 		 * The following block's conditional is necessary because if the
@@ -1666,7 +1649,7 @@ arena_dissociate_bin_slab(arena_t *arena, extent_t *slab, arena_bin_t *bin) {
 
 static void
 arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
-    arena_bin_t *bin) {
+    bin_t *bin) {
 	assert(slab != bin->slabcur);
 
 	malloc_mutex_unlock(tsdn, &bin->lock);
@@ -1680,8 +1663,7 @@ arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
 }
 
 static void
-arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
-    arena_bin_t *bin) {
+arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab, bin_t *bin) {
 	assert(extent_nfree_get(slab) > 0);
 
 	/*
@@ -1711,8 +1693,8 @@ arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
     void *ptr, bool junked) {
 	arena_slab_data_t *slab_data = extent_slab_data_get(slab);
 	szind_t binind = extent_szind_get(slab);
-	arena_bin_t *bin = &arena->bins[binind];
-	const arena_bin_info_t *bin_info = &arena_bin_info[binind];
+	bin_t *bin = &arena->bins[binind];
+	const bin_info_t *bin_info = &bin_infos[binind];
 
 	if (!junked && config_fill && unlikely(opt_junk_free)) {
 		arena_dalloc_junk_small(ptr, bin_info);
@@ -1743,7 +1725,7 @@ arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 static void
 arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr) {
 	szind_t binind = extent_szind_get(extent);
-	arena_bin_t *bin = &arena->bins[binind];
+	bin_t *bin = &arena->bins[binind];
 
 	malloc_mutex_lock(tsdn, &bin->lock);
 	arena_dalloc_bin_locked_impl(tsdn, arena, extent, ptr, false);
@@ -1777,7 +1759,7 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 		 * Avoid moving the allocation if the size class can be left the
 		 * same.
 		 */
-		assert(arena_bin_info[sz_size2index(oldsize)].reg_size ==
+		assert(bin_infos[sz_size2index(oldsize)].reg_size ==
 		    oldsize);
 		if ((usize_max > SMALL_MAXCLASS || sz_size2index(usize_max) !=
 		    sz_size2index(oldsize)) && (size > oldsize || usize_max <
@@ -2060,7 +2042,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 
 	/* Initialize bins. */
 	for (i = 0; i < NBINS; i++) {
-		arena_bin_t *bin = &arena->bins[i];
+		bin_t *bin = &arena->bins[i];
 		if (malloc_mutex_init(&bin->lock, "arena_bin",
 		    WITNESS_RANK_ARENA_BIN, malloc_mutex_rank_exclusive)) {
 			goto label_error;
diff --git a/src/bin.c b/src/bin.c
new file mode 100644
index 00000000..59cdd2c1
--- /dev/null
+++ b/src/bin.c
@@ -0,0 +1,21 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/bin.h"
+
+const bin_info_t bin_infos[NBINS] = {
+#define BIN_INFO_bin_yes(reg_size, slab_size, nregs)			\
+	{reg_size, slab_size, nregs, BITMAP_INFO_INITIALIZER(nregs)},
+#define BIN_INFO_bin_no(reg_size, slab_size, nregs)
+#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs,		\
+    lg_delta_lookup)							\
+	BIN_INFO_bin_##bin((1U<<lg_grp) + (ndelta<<lg_delta),		\
+	    (pgs << LG_PAGE), (pgs << LG_PAGE) / ((1U<<lg_grp) +	\
+	    (ndelta<<lg_delta)))
+	SIZE_CLASSES
+#undef BIN_INFO_bin_yes
+#undef BIN_INFO_bin_no
+#undef SC
+};
+
+
diff --git a/src/ctl.c b/src/ctl.c
index 1fdb772d..aae8b6ec 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2333,9 +2333,9 @@ CTL_RO_NL_GEN(arenas_page, PAGE, size_t)
 CTL_RO_NL_GEN(arenas_tcache_max, tcache_maxclass, size_t)
 CTL_RO_NL_GEN(arenas_nbins, NBINS, unsigned)
 CTL_RO_NL_GEN(arenas_nhbins, nhbins, unsigned)
-CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t)
-CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t)
-CTL_RO_NL_GEN(arenas_bin_i_slab_size, arena_bin_info[mib[2]].slab_size, size_t)
+CTL_RO_NL_GEN(arenas_bin_i_size, bin_infos[mib[2]].reg_size, size_t)
+CTL_RO_NL_GEN(arenas_bin_i_nregs, bin_infos[mib[2]].nregs, uint32_t)
+CTL_RO_NL_GEN(arenas_bin_i_slab_size, bin_infos[mib[2]].slab_size, size_t)
 static const ctl_named_node_t *
 arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
 	if (i > NBINS) {
@@ -2680,7 +2680,7 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 		MUTEX_PROF_RESET(arena->base->mtx);
 
 		for (szind_t i = 0; i < NBINS; i++) {
-			arena_bin_t *bin = &arena->bins[i];
+			bin_t *bin = &arena->bins[i];
 			MUTEX_PROF_RESET(bin->lock);
 		}
 	}
diff --git a/src/tcache.c b/src/tcache.c
index e22f8067..6d516731 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -121,7 +121,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 		/* Lock the arena bin associated with the first object. */
 		extent_t *extent = item_extent[0];
 		arena_t *bin_arena = extent_arena_get(extent);
-		arena_bin_t *bin = &bin_arena->bins[binind];
+		bin_t *bin = &bin_arena->bins[binind];
 
 		if (config_prof && bin_arena == arena) {
 			if (arena_prof_accum(tsd_tsdn(tsd), arena,
@@ -169,7 +169,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 		 * The flush loop didn't happen to flush to this thread's
 		 * arena, so the stats didn't get merged.  Manually do so now.
 		 */
-		arena_bin_t *bin = &arena->bins[binind];
+		bin_t *bin = &arena->bins[binind];
 		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 		bin->stats.nflushes++;
 		bin->stats.nrequests += tbin->tstats.nrequests;
@@ -533,7 +533,7 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 
 	/* Merge and reset tcache stats. */
 	for (i = 0; i < NBINS; i++) {
-		arena_bin_t *bin = &arena->bins[i];
+		bin_t *bin = &arena->bins[i];
 		cache_bin_t *tbin = tcache_small_bin_get(tcache, i);
 		malloc_mutex_lock(tsdn, &bin->lock);
 		bin->stats.nrequests += tbin->tstats.nrequests;
@@ -674,13 +674,13 @@ tcache_boot(tsdn_t *tsdn) {
 	stack_nelms = 0;
 	unsigned i;
 	for (i = 0; i < NBINS; i++) {
-		if ((arena_bin_info[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) {
+		if ((bin_infos[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) {
 			tcache_bin_info[i].ncached_max =
 			    TCACHE_NSLOTS_SMALL_MIN;
-		} else if ((arena_bin_info[i].nregs << 1) <=
+		} else if ((bin_infos[i].nregs << 1) <=
 		    TCACHE_NSLOTS_SMALL_MAX) {
 			tcache_bin_info[i].ncached_max =
-			    (arena_bin_info[i].nregs << 1);
+			    (bin_infos[i].nregs << 1);
 		} else {
 			tcache_bin_info[i].ncached_max =
 			    TCACHE_NSLOTS_SMALL_MAX;
diff --git a/test/unit/junk.c b/test/unit/junk.c
index fd0e65b1..243ced41 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -15,7 +15,7 @@ watch_junking(void *p) {
 }
 
 static void
-arena_dalloc_junk_small_intercept(void *ptr, const arena_bin_info_t *bin_info) {
+arena_dalloc_junk_small_intercept(void *ptr, const bin_info_t *bin_info) {
 	size_t i;
 
 	arena_dalloc_junk_small_orig(ptr, bin_info);
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 4cfd981a..e812b52f 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -696,10 +696,10 @@ TEST_BEGIN(test_arenas_bin_constants) {
 	assert_zu_eq(name, expected, "Incorrect "#name" size");		\
 } while (0)
 
-	TEST_ARENAS_BIN_CONSTANT(size_t, size, arena_bin_info[0].reg_size);
-	TEST_ARENAS_BIN_CONSTANT(uint32_t, nregs, arena_bin_info[0].nregs);
+	TEST_ARENAS_BIN_CONSTANT(size_t, size, bin_infos[0].reg_size);
+	TEST_ARENAS_BIN_CONSTANT(uint32_t, nregs, bin_infos[0].nregs);
 	TEST_ARENAS_BIN_CONSTANT(size_t, slab_size,
-	    arena_bin_info[0].slab_size);
+	    bin_infos[0].slab_size);
 
 #undef TEST_ARENAS_BIN_CONSTANT
 }
diff --git a/test/unit/slab.c b/test/unit/slab.c
index ea344f8f..7e662aed 100644
--- a/test/unit/slab.c
+++ b/test/unit/slab.c
@@ -6,7 +6,7 @@ TEST_BEGIN(test_arena_slab_regind) {
 	for (binind = 0; binind < NBINS; binind++) {
 		size_t regind;
 		extent_t slab;
-		const arena_bin_info_t *bin_info = &arena_bin_info[binind];
+		const bin_info_t *bin_info = &bin_infos[binind];
 		extent_init(&slab, NULL, mallocx(bin_info->slab_size,
 		    MALLOCX_LG_ALIGN(LG_PAGE)), bin_info->slab_size, true,
 		    binind, 0, extent_state_active, false, true, true);
diff --git a/test/unit/stats.c b/test/unit/stats.c
index d9849d80..231010e4 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -245,7 +245,7 @@ TEST_BEGIN(test_stats_arenas_bins) {
 	    (void *)&arena_ind, sizeof(arena_ind)), 0,
 	    "Unexpected mallctl() failure");
 
-	p = malloc(arena_bin_info[0].reg_size);
+	p = malloc(bin_infos[0].reg_size);
 	assert_ptr_not_null(p, "Unexpected malloc() failure");
 
 	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),

From a8dd8876fb483f402833fa05f0fb46fe7c5416e1 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Sun, 1 Oct 2017 18:02:39 -0700
Subject: [PATCH 1034/2608] Move bin initialization from arena module to bin
 module.

---
 include/jemalloc/internal/bin.h     |  3 +++
 include/jemalloc/internal/witness.h |  2 +-
 src/arena.c                         | 11 ++---------
 src/bin.c                           | 16 +++++++++++++++-
 4 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index 09717b14..d7927221 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -78,4 +78,7 @@ struct bin_s {
 	malloc_bin_stats_t	stats;
 };
 
+/* Returns true on error. */
+bool bin_init(bin_t *bin);
+
 #endif /* JEMALLOC_INTERNAL_BIN_H */
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 33be6661..7ace8ae4 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -51,7 +51,7 @@
 #define WITNESS_RANK_ARENA_LARGE	19U
 
 #define WITNESS_RANK_LEAF		0xffffffffU
-#define WITNESS_RANK_ARENA_BIN		WITNESS_RANK_LEAF
+#define WITNESS_RANK_BIN		WITNESS_RANK_LEAF
 #define WITNESS_RANK_ARENA_STATS	WITNESS_RANK_LEAF
 #define WITNESS_RANK_DSS		WITNESS_RANK_LEAF
 #define WITNESS_RANK_PROF_ACTIVE	WITNESS_RANK_LEAF
diff --git a/src/arena.c b/src/arena.c
index 2dcb447e..0d27ffb9 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2042,17 +2042,10 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 
 	/* Initialize bins. */
 	for (i = 0; i < NBINS; i++) {
-		bin_t *bin = &arena->bins[i];
-		if (malloc_mutex_init(&bin->lock, "arena_bin",
-		    WITNESS_RANK_ARENA_BIN, malloc_mutex_rank_exclusive)) {
+		bool err = bin_init(&arena->bins[i]);
+		if (err) {
 			goto label_error;
 		}
-		bin->slabcur = NULL;
-		extent_heap_new(&bin->slabs_nonfull);
-		extent_list_init(&bin->slabs_full);
-		if (config_stats) {
-			memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
-		}
 	}
 
 	arena->base = base;
diff --git a/src/bin.c b/src/bin.c
index 59cdd2c1..89b041db 100644
--- a/src/bin.c
+++ b/src/bin.c
@@ -2,6 +2,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/bin.h"
+#include "jemalloc/internal/witness.h"
 
 const bin_info_t bin_infos[NBINS] = {
 #define BIN_INFO_bin_yes(reg_size, slab_size, nregs)			\
@@ -18,4 +19,17 @@ const bin_info_t bin_infos[NBINS] = {
 #undef SC
 };
 
-
+bool
+bin_init(bin_t *bin) {
+	if (malloc_mutex_init(&bin->lock, "arena_bin", WITNESS_RANK_BIN,
+	    malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+	bin->slabcur = NULL;
+	extent_heap_new(&bin->slabs_nonfull);
+	extent_list_init(&bin->slabs_full);
+	if (config_stats) {
+		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
+	}
+	return false;
+}

From 48bb4a056be97214fa049f21bead9618429c807a Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Sun, 1 Oct 2017 18:10:36 -0700
Subject: [PATCH 1035/2608] Move bin forking code from arena to bin module.

---
 include/jemalloc/internal/bin.h |  3 +++
 src/arena.c                     |  6 +++---
 src/bin.c                       | 17 ++++++++++++++++-
 3 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index d7927221..4e551663 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -80,5 +80,8 @@ struct bin_s {
 
 /* Returns true on error. */
 bool bin_init(bin_t *bin);
+void bin_prefork(tsdn_t *tsdn, bin_t *bin);
+void bin_postfork_parent(tsdn_t *tsdn, bin_t *bin);
+void bin_postfork_child(tsdn_t *tsdn, bin_t *bin);
 
 #endif /* JEMALLOC_INTERNAL_BIN_H */
diff --git a/src/arena.c b/src/arena.c
index 0d27ffb9..a5f24498 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2126,7 +2126,7 @@ arena_prefork6(tsdn_t *tsdn, arena_t *arena) {
 void
 arena_prefork7(tsdn_t *tsdn, arena_t *arena) {
 	for (unsigned i = 0; i < NBINS; i++) {
-		malloc_mutex_prefork(tsdn, &arena->bins[i].lock);
+		bin_prefork(tsdn, &arena->bins[i]);
 	}
 }
 
@@ -2135,7 +2135,7 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 	unsigned i;
 
 	for (i = 0; i < NBINS; i++) {
-		malloc_mutex_postfork_parent(tsdn, &arena->bins[i].lock);
+		bin_postfork_parent(tsdn, &arena->bins[i]);
 	}
 	malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
 	base_postfork_parent(tsdn, arena->base);
@@ -2179,7 +2179,7 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	}
 
 	for (i = 0; i < NBINS; i++) {
-		malloc_mutex_postfork_child(tsdn, &arena->bins[i].lock);
+		bin_postfork_child(tsdn, &arena->bins[i]);
 	}
 	malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
 	base_postfork_child(tsdn, arena->base);
diff --git a/src/bin.c b/src/bin.c
index 89b041db..931a76e2 100644
--- a/src/bin.c
+++ b/src/bin.c
@@ -21,7 +21,7 @@ const bin_info_t bin_infos[NBINS] = {
 
 bool
 bin_init(bin_t *bin) {
-	if (malloc_mutex_init(&bin->lock, "arena_bin", WITNESS_RANK_BIN,
+	if (malloc_mutex_init(&bin->lock, "bin", WITNESS_RANK_BIN,
 	    malloc_mutex_rank_exclusive)) {
 		return true;
 	}
@@ -33,3 +33,18 @@ bin_init(bin_t *bin) {
 	}
 	return false;
 }
+
+void
+bin_prefork(tsdn_t *tsdn, bin_t *bin) {
+	malloc_mutex_prefork(tsdn, &bin->lock);
+}
+
+void
+bin_postfork_parent(tsdn_t *tsdn, bin_t *bin) {
+	malloc_mutex_postfork_parent(tsdn, &bin->lock);
+}
+
+void
+bin_postfork_child(tsdn_t *tsdn, bin_t *bin) {
+	malloc_mutex_postfork_child(tsdn, &bin->lock);
+}

From 8aafa270fd56c36db374fa9f294217fa80151b3d Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Sun, 1 Oct 2017 18:27:40 -0700
Subject: [PATCH 1036/2608] Move bin stats code from arena to bin module.

---
 include/jemalloc/internal/bin.h   | 21 ++++++++++++++++++++-
 include/jemalloc/internal/stats.h |  6 ++++++
 src/arena.c                       | 15 +--------------
 3 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index 4e551663..89572fa1 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -78,10 +78,29 @@ struct bin_s {
 	malloc_bin_stats_t	stats;
 };
 
-/* Returns true on error. */
+/* Initializes a bin to empty.  Returns true on error. */
 bool bin_init(bin_t *bin);
+
+/* Forking. */
 void bin_prefork(tsdn_t *tsdn, bin_t *bin);
 void bin_postfork_parent(tsdn_t *tsdn, bin_t *bin);
 void bin_postfork_child(tsdn_t *tsdn, bin_t *bin);
 
+/* Stats. */
+static inline void
+bin_stats_merge(tsdn_t *tsdn, malloc_bin_stats_t *dst_bin_stats, bin_t *bin) {
+	malloc_mutex_lock(tsdn, &bin->lock);
+	malloc_mutex_prof_read(tsdn, &dst_bin_stats->mutex_data, &bin->lock);
+	dst_bin_stats->nmalloc += bin->stats.nmalloc;
+	dst_bin_stats->ndalloc += bin->stats.ndalloc;
+	dst_bin_stats->nrequests += bin->stats.nrequests;
+	dst_bin_stats->curregs += bin->stats.curregs;
+	dst_bin_stats->nfills += bin->stats.nfills;
+	dst_bin_stats->nflushes += bin->stats.nflushes;
+	dst_bin_stats->nslabs += bin->stats.nslabs;
+	dst_bin_stats->reslabs += bin->stats.reslabs;
+	dst_bin_stats->curslabs += bin->stats.curslabs;
+	malloc_mutex_unlock(tsdn, &bin->lock);
+}
+
 #endif /* JEMALLOC_INTERNAL_BIN_H */
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index f19df374..1da5b024 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -6,6 +6,12 @@
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/size_classes.h"
 
+/*
+ * The synchronization for stats counters may piggyback on existing
+ * synchronization in the associated data.  Therefore, the merging functions for
+ * a module's stats will lie in the module, instead of with the stats.
+ */
+
 /*  OPTION(opt,		var_name,	default,	set_value_to) */
 #define STATS_PRINT_OPTIONS						\
     OPTION('J',		json,		false,		true)		\
diff --git a/src/arena.c b/src/arena.c
index a5f24498..c02dff18 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -337,20 +337,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	nstime_subtract(&astats->uptime, &arena->create_time);
 
 	for (szind_t i = 0; i < NBINS; i++) {
-		bin_t *bin = &arena->bins[i];
-
-		malloc_mutex_lock(tsdn, &bin->lock);
-		malloc_mutex_prof_read(tsdn, &bstats[i].mutex_data, &bin->lock);
-		bstats[i].nmalloc += bin->stats.nmalloc;
-		bstats[i].ndalloc += bin->stats.ndalloc;
-		bstats[i].nrequests += bin->stats.nrequests;
-		bstats[i].curregs += bin->stats.curregs;
-		bstats[i].nfills += bin->stats.nfills;
-		bstats[i].nflushes += bin->stats.nflushes;
-		bstats[i].nslabs += bin->stats.nslabs;
-		bstats[i].reslabs += bin->stats.reslabs;
-		bstats[i].curslabs += bin->stats.curslabs;
-		malloc_mutex_unlock(tsdn, &bin->lock);
+		bin_stats_merge(tsdn, &bstats[i], &arena->bins[i]);
 	}
 }
 

From 901d94a2b06df09c960836901f6a81a0d3d00732 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Sun, 1 Oct 2017 18:54:25 -0700
Subject: [PATCH 1037/2608] Rename cache_alloc_easy to cache_bin_alloc_easy.

This lives in the cache_bin module; just a typo.
---
 include/jemalloc/internal/cache_bin.h      | 2 +-
 include/jemalloc/internal/tcache_inlines.h | 4 ++--
 src/tcache.c                               | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 9b874398..12f3ef2d 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -85,7 +85,7 @@ cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-cache_alloc_easy(cache_bin_t *bin, bool *success) {
+cache_bin_alloc_easy(cache_bin_t *bin, bool *success) {
 	void *ret;
 
 	if (unlikely(bin->ncached == 0)) {
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 14ab037f..0a6feb59 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -48,7 +48,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 
 	assert(binind < NBINS);
 	bin = tcache_small_bin_get(tcache, binind);
-	ret = cache_alloc_easy(bin, &tcache_success);
+	ret = cache_bin_alloc_easy(bin, &tcache_success);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
 		bool tcache_hard_success;
@@ -109,7 +109,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 
 	assert(binind >= NBINS &&binind < nhbins);
 	bin = tcache_large_bin_get(tcache, binind);
-	ret = cache_alloc_easy(bin, &tcache_success);
+	ret = cache_bin_alloc_easy(bin, &tcache_success);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
 		/*
diff --git a/src/tcache.c b/src/tcache.c
index 6d516731..a769a6b1 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -95,7 +95,7 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 	if (config_prof) {
 		tcache->prof_accumbytes = 0;
 	}
-	ret = cache_alloc_easy(tbin, tcache_success);
+	ret = cache_bin_alloc_easy(tbin, tcache_success);
 
 	return ret;
 }

From 7f1b02e3fa9de7e0bb5e2562994b5ab3b82c0ec3 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Sat, 4 Nov 2017 12:50:19 -0700
Subject: [PATCH 1038/2608] Split up and standardize naming of stats code.

The arena-associated stats are now all prefixed with arena_stats_, and live in
their own file.  Likewise, malloc_bin_stats_t -> bin_stats_t, also in its own
file.
---
 include/jemalloc/internal/arena_externs.h   |   6 +-
 include/jemalloc/internal/arena_stats.h     | 237 ++++++++++++++++++++
 include/jemalloc/internal/arena_structs_b.h |   4 +-
 include/jemalloc/internal/bin.h             |   6 +-
 include/jemalloc/internal/bin_stats.h       |  51 +++++
 include/jemalloc/internal/ctl.h             |   4 +-
 include/jemalloc/internal/stats.h           | 140 ------------
 src/arena.c                                 | 143 +-----------
 src/bin.c                                   |   2 +-
 src/ctl.c                                   |  82 +++----
 10 files changed, 342 insertions(+), 333 deletions(-)
 create mode 100644 include/jemalloc/internal/arena_stats.h
 create mode 100644 include/jemalloc/internal/bin_stats.h

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 77a2b541..4b3732b4 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -16,17 +16,13 @@ extern const char *percpu_arena_mode_names[];
 extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS];
 extern malloc_mutex_t arenas_lock;
 
-void arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
-    szind_t szind, uint64_t nrequests);
-void arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
-    size_t size);
 void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
     unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms,
     ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy);
 void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
-    malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats);
+    bin_stats_t *bstats, arena_stats_large_t *lstats);
 void arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent);
 #ifdef JEMALLOC_JET
diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
new file mode 100644
index 00000000..837d4eb6
--- /dev/null
+++ b/include/jemalloc/internal/arena_stats.h
@@ -0,0 +1,237 @@
+#ifndef JEMALLOC_INTERNAL_ARENA_STATS_H
+#define JEMALLOC_INTERNAL_ARENA_STATS_H
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/mutex_prof.h"
+#include "jemalloc/internal/size_classes.h"
+
+/*
+ * In those architectures that support 64-bit atomics, we use atomic updates for
+ * our 64-bit values.  Otherwise, we use a plain uint64_t and synchronize
+ * externally.
+ */
+#ifdef JEMALLOC_ATOMIC_U64
+typedef atomic_u64_t arena_stats_u64_t;
+#else
+/* Must hold the arena stats mutex while reading atomically. */
+typedef uint64_t arena_stats_u64_t;
+#endif
+
+typedef struct arena_stats_large_s arena_stats_large_t;
+struct arena_stats_large_s {
+	/*
+	 * Total number of allocation/deallocation requests served directly by
+	 * the arena.
+	 */
+	arena_stats_u64_t	nmalloc;
+	arena_stats_u64_t	ndalloc;
+
+	/*
+	 * Number of allocation requests that correspond to this size class.
+	 * This includes requests served by tcache, though tcache only
+	 * periodically merges into this counter.
+	 */
+	arena_stats_u64_t	nrequests; /* Partially derived. */
+
+	/* Current number of allocations of this size class. */
+	size_t		curlextents; /* Derived. */
+};
+
+typedef struct arena_stats_decay_s arena_stats_decay_t;
+struct arena_stats_decay_s {
+	/* Total number of purge sweeps. */
+	arena_stats_u64_t	npurge;
+	/* Total number of madvise calls made. */
+	arena_stats_u64_t	nmadvise;
+	/* Total number of pages purged. */
+	arena_stats_u64_t	purged;
+};
+
+/*
+ * Arena stats.  Note that fields marked "derived" are not directly maintained
+ * within the arena code; rather their values are derived during stats merge
+ * requests.
+ */
+typedef struct arena_stats_s arena_stats_t;
+struct arena_stats_s {
+#ifndef JEMALLOC_ATOMIC_U64
+	malloc_mutex_t		mtx;
+#endif
+
+	/* Number of bytes currently mapped, excluding retained memory. */
+	atomic_zu_t		mapped; /* Partially derived. */
+
+	/*
+	 * Number of unused virtual memory bytes currently retained.  Retained
+	 * bytes are technically mapped (though always decommitted or purged),
+	 * but they are excluded from the mapped statistic (above).
+	 */
+	atomic_zu_t		retained; /* Derived. */
+
+	arena_stats_decay_t	decay_dirty;
+	arena_stats_decay_t	decay_muzzy;
+
+	atomic_zu_t		base; /* Derived. */
+	atomic_zu_t		internal;
+	atomic_zu_t		resident; /* Derived. */
+	atomic_zu_t		metadata_thp;
+
+	atomic_zu_t		allocated_large; /* Derived. */
+	arena_stats_u64_t	nmalloc_large; /* Derived. */
+	arena_stats_u64_t	ndalloc_large; /* Derived. */
+	arena_stats_u64_t	nrequests_large; /* Derived. */
+
+	/* Number of bytes cached in tcache associated with this arena. */
+	atomic_zu_t		tcache_bytes; /* Derived. */
+
+	mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes];
+
+	/* One element for each large size class. */
+	arena_stats_large_t	lstats[NSIZES - NBINS];
+
+	/* Arena uptime. */
+	nstime_t		uptime;
+};
+
+static inline bool
+arena_stats_init(tsdn_t *tsdn, arena_stats_t *arena_stats) {
+	if (config_debug) {
+		for (size_t i = 0; i < sizeof(arena_stats_t); i++) {
+			assert(((char *)arena_stats)[i] == 0);
+		}
+	}
+#ifndef JEMALLOC_ATOMIC_U64
+	if (malloc_mutex_init(&arena_stats->mtx, "arena_stats",
+	    WITNESS_RANK_ARENA_STATS, malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+#endif
+	/* Memory is zeroed, so there is no need to clear stats. */
+	return false;
+}
+
+static inline void
+arena_stats_lock(tsdn_t *tsdn, arena_stats_t *arena_stats) {
+#ifndef JEMALLOC_ATOMIC_U64
+	malloc_mutex_lock(tsdn, &arena_stats->mtx);
+#endif
+}
+
+static inline void
+arena_stats_unlock(tsdn_t *tsdn, arena_stats_t *arena_stats) {
+#ifndef JEMALLOC_ATOMIC_U64
+	malloc_mutex_unlock(tsdn, &arena_stats->mtx);
+#endif
+}
+
+static inline uint64_t
+arena_stats_read_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
+    arena_stats_u64_t *p) {
+#ifdef JEMALLOC_ATOMIC_U64
+	return atomic_load_u64(p, ATOMIC_RELAXED);
+#else
+	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
+	return *p;
+#endif
+}
+
+static inline void
+arena_stats_add_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
+    arena_stats_u64_t *p, uint64_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+	atomic_fetch_add_u64(p, x, ATOMIC_RELAXED);
+#else
+	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
+	*p += x;
+#endif
+}
+
+UNUSED static inline void
+arena_stats_sub_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
+    arena_stats_u64_t *p, uint64_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+	UNUSED uint64_t r = atomic_fetch_sub_u64(p, x, ATOMIC_RELAXED);
+	assert(r - x <= r);
+#else
+	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
+	*p -= x;
+	assert(*p + x >= *p);
+#endif
+}
+
+/*
+ * Non-atomically sets *dst += src.  *dst needs external synchronization.
+ * This lets us avoid the cost of a fetch_add when its unnecessary (note that
+ * the types here are atomic).
+ */
+static inline void
+arena_stats_accum_u64(arena_stats_u64_t *dst, uint64_t src) {
+#ifdef JEMALLOC_ATOMIC_U64
+	uint64_t cur_dst = atomic_load_u64(dst, ATOMIC_RELAXED);
+	atomic_store_u64(dst, src + cur_dst, ATOMIC_RELAXED);
+#else
+	*dst += src;
+#endif
+}
+
+static inline size_t
+arena_stats_read_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p) {
+#ifdef JEMALLOC_ATOMIC_U64
+	return atomic_load_zu(p, ATOMIC_RELAXED);
+#else
+	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
+	return atomic_load_zu(p, ATOMIC_RELAXED);
+#endif
+}
+
+static inline void
+arena_stats_add_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p,
+    size_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+	atomic_fetch_add_zu(p, x, ATOMIC_RELAXED);
+#else
+	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
+	size_t cur = atomic_load_zu(p, ATOMIC_RELAXED);
+	atomic_store_zu(p, cur + x, ATOMIC_RELAXED);
+#endif
+}
+
+static inline void
+arena_stats_sub_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p,
+    size_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+	UNUSED size_t r = atomic_fetch_sub_zu(p, x, ATOMIC_RELAXED);
+	assert(r - x <= r);
+#else
+	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
+	size_t cur = atomic_load_zu(p, ATOMIC_RELAXED);
+	atomic_store_zu(p, cur - x, ATOMIC_RELAXED);
+#endif
+}
+
+/* Like the _u64 variant, needs an externally synchronized *dst. */
+static inline void
+arena_stats_accum_zu(atomic_zu_t *dst, size_t src) {
+	size_t cur_dst = atomic_load_zu(dst, ATOMIC_RELAXED);
+	atomic_store_zu(dst, src + cur_dst, ATOMIC_RELAXED);
+}
+
+static inline void
+arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
+    szind_t szind, uint64_t nrequests) {
+	arena_stats_lock(tsdn, arena_stats);
+	arena_stats_add_u64(tsdn, arena_stats, &arena_stats->lstats[szind -
+	    NBINS].nrequests, nrequests);
+	arena_stats_unlock(tsdn, arena_stats);
+}
+
+static inline void
+arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats, size_t size) {
+	arena_stats_lock(tsdn, arena_stats);
+	arena_stats_add_zu(tsdn, arena_stats, &arena_stats->mapped, size);
+	arena_stats_unlock(tsdn, arena_stats);
+}
+
+
+#endif /* JEMALLOC_INTERNAL_ARENA_STATS_H */
diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index d843b09b..38bc9596 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H
 #define JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H
 
+#include "jemalloc/internal/arena_stats.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/bin.h"
 #include "jemalloc/internal/bitmap.h"
@@ -11,7 +12,6 @@
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/smoothstep.h"
-#include "jemalloc/internal/stats.h"
 #include "jemalloc/internal/ticker.h"
 
 struct arena_decay_s {
@@ -69,7 +69,7 @@ struct arena_decay_s {
 	 * arena and ctl code.
 	 *
 	 * Synchronization: Same as associated arena's stats field. */
-	decay_stats_t		*stats;
+	arena_stats_decay_t	*stats;
 	/* Peak number of pages in associated extents.  Used for debug only. */
 	uint64_t		ceil_npages;
 };
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index 89572fa1..9b416ada 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -4,7 +4,7 @@
 #include "jemalloc/internal/extent_types.h"
 #include "jemalloc/internal/extent_structs.h"
 #include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/stats.h"
+#include "jemalloc/internal/bin_stats.h"
 
 /*
  * A bin contains a set of extents that are currently being used for slab
@@ -75,7 +75,7 @@ struct bin_s {
 	extent_list_t		slabs_full;
 
 	/* Bin statistics. */
-	malloc_bin_stats_t	stats;
+	bin_stats_t	stats;
 };
 
 /* Initializes a bin to empty.  Returns true on error. */
@@ -88,7 +88,7 @@ void bin_postfork_child(tsdn_t *tsdn, bin_t *bin);
 
 /* Stats. */
 static inline void
-bin_stats_merge(tsdn_t *tsdn, malloc_bin_stats_t *dst_bin_stats, bin_t *bin) {
+bin_stats_merge(tsdn_t *tsdn, bin_stats_t *dst_bin_stats, bin_t *bin) {
 	malloc_mutex_lock(tsdn, &bin->lock);
 	malloc_mutex_prof_read(tsdn, &dst_bin_stats->mutex_data, &bin->lock);
 	dst_bin_stats->nmalloc += bin->stats.nmalloc;
diff --git a/include/jemalloc/internal/bin_stats.h b/include/jemalloc/internal/bin_stats.h
new file mode 100644
index 00000000..86e673ec
--- /dev/null
+++ b/include/jemalloc/internal/bin_stats.h
@@ -0,0 +1,51 @@
+#ifndef JEMALLOC_INTERNAL_BIN_STATS_H
+#define JEMALLOC_INTERNAL_BIN_STATS_H
+
+#include "jemalloc/internal/mutex_prof.h"
+
+typedef struct bin_stats_s bin_stats_t;
+struct bin_stats_s {
+	/*
+	 * Total number of allocation/deallocation requests served directly by
+	 * the bin.  Note that tcache may allocate an object, then recycle it
+	 * many times, resulting many increments to nrequests, but only one
+	 * each to nmalloc and ndalloc.
+	 */
+	uint64_t	nmalloc;
+	uint64_t	ndalloc;
+
+	/*
+	 * Number of allocation requests that correspond to the size of this
+	 * bin.  This includes requests served by tcache, though tcache only
+	 * periodically merges into this counter.
+	 */
+	uint64_t	nrequests;
+
+	/*
+	 * Current number of regions of this size class, including regions
+	 * currently cached by tcache.
+	 */
+	size_t		curregs;
+
+	/* Number of tcache fills from this bin. */
+	uint64_t	nfills;
+
+	/* Number of tcache flushes to this bin. */
+	uint64_t	nflushes;
+
+	/* Total number of slabs created for this bin's size class. */
+	uint64_t	nslabs;
+
+	/*
+	 * Total number of slabs reused by extracting them from the slabs heap
+	 * for this bin's size class.
+	 */
+	uint64_t	reslabs;
+
+	/* Current number of slabs in this bin. */
+	size_t		curslabs;
+
+	mutex_prof_data_t mutex_data;
+};
+
+#endif /* JEMALLOC_INTERNAL_BIN_STATS_H */
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index a36feaff..d927d948 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -40,8 +40,8 @@ typedef struct ctl_arena_stats_s {
 	uint64_t ndalloc_small;
 	uint64_t nrequests_small;
 
-	malloc_bin_stats_t bstats[NBINS];
-	malloc_large_stats_t lstats[NSIZES - NBINS];
+	bin_stats_t bstats[NBINS];
+	arena_stats_large_t lstats[NSIZES - NBINS];
 } ctl_arena_stats_t;
 
 typedef struct ctl_stats_s {
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index 1da5b024..852e3426 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -1,17 +1,6 @@
 #ifndef JEMALLOC_INTERNAL_STATS_H
 #define JEMALLOC_INTERNAL_STATS_H
 
-#include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/mutex_prof.h"
-#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/size_classes.h"
-
-/*
- * The synchronization for stats counters may piggyback on existing
- * synchronization in the associated data.  Therefore, the merging functions for
- * a module's stats will lie in the module, instead of with the stats.
- */
-
 /*  OPTION(opt,		var_name,	default,	set_value_to) */
 #define STATS_PRINT_OPTIONS						\
     OPTION('J',		json,		false,		true)		\
@@ -38,133 +27,4 @@ extern char opt_stats_print_opts[stats_print_tot_num_options+1];
 void stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
     const char *opts);
 
-/*
- * In those architectures that support 64-bit atomics, we use atomic updates for
- * our 64-bit values.  Otherwise, we use a plain uint64_t and synchronize
- * externally.
- */
-#ifdef JEMALLOC_ATOMIC_U64
-typedef atomic_u64_t arena_stats_u64_t;
-#else
-/* Must hold the arena stats mutex while reading atomically. */
-typedef uint64_t arena_stats_u64_t;
-#endif
-
-typedef struct malloc_bin_stats_s {
-	/*
-	 * Total number of allocation/deallocation requests served directly by
-	 * the bin.  Note that tcache may allocate an object, then recycle it
-	 * many times, resulting many increments to nrequests, but only one
-	 * each to nmalloc and ndalloc.
-	 */
-	uint64_t	nmalloc;
-	uint64_t	ndalloc;
-
-	/*
-	 * Number of allocation requests that correspond to the size of this
-	 * bin.  This includes requests served by tcache, though tcache only
-	 * periodically merges into this counter.
-	 */
-	uint64_t	nrequests;
-
-	/*
-	 * Current number of regions of this size class, including regions
-	 * currently cached by tcache.
-	 */
-	size_t		curregs;
-
-	/* Number of tcache fills from this bin. */
-	uint64_t	nfills;
-
-	/* Number of tcache flushes to this bin. */
-	uint64_t	nflushes;
-
-	/* Total number of slabs created for this bin's size class. */
-	uint64_t	nslabs;
-
-	/*
-	 * Total number of slabs reused by extracting them from the slabs heap
-	 * for this bin's size class.
-	 */
-	uint64_t	reslabs;
-
-	/* Current number of slabs in this bin. */
-	size_t		curslabs;
-
-	mutex_prof_data_t mutex_data;
-} malloc_bin_stats_t;
-
-typedef struct malloc_large_stats_s {
-	/*
-	 * Total number of allocation/deallocation requests served directly by
-	 * the arena.
-	 */
-	arena_stats_u64_t	nmalloc;
-	arena_stats_u64_t	ndalloc;
-
-	/*
-	 * Number of allocation requests that correspond to this size class.
-	 * This includes requests served by tcache, though tcache only
-	 * periodically merges into this counter.
-	 */
-	arena_stats_u64_t	nrequests; /* Partially derived. */
-
-	/* Current number of allocations of this size class. */
-	size_t		curlextents; /* Derived. */
-} malloc_large_stats_t;
-
-typedef struct decay_stats_s {
-	/* Total number of purge sweeps. */
-	arena_stats_u64_t	npurge;
-	/* Total number of madvise calls made. */
-	arena_stats_u64_t	nmadvise;
-	/* Total number of pages purged. */
-	arena_stats_u64_t	purged;
-} decay_stats_t;
-
-/*
- * Arena stats.  Note that fields marked "derived" are not directly maintained
- * within the arena code; rather their values are derived during stats merge
- * requests.
- */
-typedef struct arena_stats_s {
-#ifndef JEMALLOC_ATOMIC_U64
-	malloc_mutex_t		mtx;
-#endif
-
-	/* Number of bytes currently mapped, excluding retained memory. */
-	atomic_zu_t		mapped; /* Partially derived. */
-
-	/*
-	 * Number of unused virtual memory bytes currently retained.  Retained
-	 * bytes are technically mapped (though always decommitted or purged),
-	 * but they are excluded from the mapped statistic (above).
-	 */
-	atomic_zu_t		retained; /* Derived. */
-
-	decay_stats_t		decay_dirty;
-	decay_stats_t		decay_muzzy;
-
-	atomic_zu_t		base; /* Derived. */
-	atomic_zu_t		internal;
-	atomic_zu_t		resident; /* Derived. */
-	atomic_zu_t		metadata_thp;
-
-	atomic_zu_t		allocated_large; /* Derived. */
-	arena_stats_u64_t	nmalloc_large; /* Derived. */
-	arena_stats_u64_t	ndalloc_large; /* Derived. */
-	arena_stats_u64_t	nrequests_large; /* Derived. */
-
-	/* Number of bytes cached in tcache associated with this arena. */
-	atomic_zu_t		tcache_bytes; /* Derived. */
-
-	mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes];
-
-	/* One element for each large size class. */
-	malloc_large_stats_t	lstats[NSIZES - NBINS];
-
-	/* Arena uptime. */
-	nstime_t		uptime;
-} arena_stats_t;
-
 #endif /* JEMALLOC_INTERNAL_STATS_H */
diff --git a/src/arena.c b/src/arena.c
index c02dff18..e3693d54 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -57,145 +57,6 @@ static void arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
 
 /******************************************************************************/
 
-static bool
-arena_stats_init(tsdn_t *tsdn, arena_stats_t *arena_stats) {
-	if (config_debug) {
-		for (size_t i = 0; i < sizeof(arena_stats_t); i++) {
-			assert(((char *)arena_stats)[i] == 0);
-		}
-	}
-#ifndef JEMALLOC_ATOMIC_U64
-	if (malloc_mutex_init(&arena_stats->mtx, "arena_stats",
-	    WITNESS_RANK_ARENA_STATS, malloc_mutex_rank_exclusive)) {
-		return true;
-	}
-#endif
-	/* Memory is zeroed, so there is no need to clear stats. */
-	return false;
-}
-
-static void
-arena_stats_lock(tsdn_t *tsdn, arena_stats_t *arena_stats) {
-#ifndef JEMALLOC_ATOMIC_U64
-	malloc_mutex_lock(tsdn, &arena_stats->mtx);
-#endif
-}
-
-static void
-arena_stats_unlock(tsdn_t *tsdn, arena_stats_t *arena_stats) {
-#ifndef JEMALLOC_ATOMIC_U64
-	malloc_mutex_unlock(tsdn, &arena_stats->mtx);
-#endif
-}
-
-static uint64_t
-arena_stats_read_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
-    arena_stats_u64_t *p) {
-#ifdef JEMALLOC_ATOMIC_U64
-	return atomic_load_u64(p, ATOMIC_RELAXED);
-#else
-	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
-	return *p;
-#endif
-}
-
-static void
-arena_stats_add_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
-    arena_stats_u64_t *p, uint64_t x) {
-#ifdef JEMALLOC_ATOMIC_U64
-	atomic_fetch_add_u64(p, x, ATOMIC_RELAXED);
-#else
-	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
-	*p += x;
-#endif
-}
-
-UNUSED static void
-arena_stats_sub_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
-    arena_stats_u64_t *p, uint64_t x) {
-#ifdef JEMALLOC_ATOMIC_U64
-	UNUSED uint64_t r = atomic_fetch_sub_u64(p, x, ATOMIC_RELAXED);
-	assert(r - x <= r);
-#else
-	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
-	*p -= x;
-	assert(*p + x >= *p);
-#endif
-}
-
-/*
- * Non-atomically sets *dst += src.  *dst needs external synchronization.
- * This lets us avoid the cost of a fetch_add when its unnecessary (note that
- * the types here are atomic).
- */
-static void
-arena_stats_accum_u64(arena_stats_u64_t *dst, uint64_t src) {
-#ifdef JEMALLOC_ATOMIC_U64
-	uint64_t cur_dst = atomic_load_u64(dst, ATOMIC_RELAXED);
-	atomic_store_u64(dst, src + cur_dst, ATOMIC_RELAXED);
-#else
-	*dst += src;
-#endif
-}
-
-static size_t
-arena_stats_read_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p) {
-#ifdef JEMALLOC_ATOMIC_U64
-	return atomic_load_zu(p, ATOMIC_RELAXED);
-#else
-	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
-	return atomic_load_zu(p, ATOMIC_RELAXED);
-#endif
-}
-
-static void
-arena_stats_add_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p,
-    size_t x) {
-#ifdef JEMALLOC_ATOMIC_U64
-	atomic_fetch_add_zu(p, x, ATOMIC_RELAXED);
-#else
-	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
-	size_t cur = atomic_load_zu(p, ATOMIC_RELAXED);
-	atomic_store_zu(p, cur + x, ATOMIC_RELAXED);
-#endif
-}
-
-static void
-arena_stats_sub_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p,
-    size_t x) {
-#ifdef JEMALLOC_ATOMIC_U64
-	UNUSED size_t r = atomic_fetch_sub_zu(p, x, ATOMIC_RELAXED);
-	assert(r - x <= r);
-#else
-	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
-	size_t cur = atomic_load_zu(p, ATOMIC_RELAXED);
-	atomic_store_zu(p, cur - x, ATOMIC_RELAXED);
-#endif
-}
-
-/* Like the _u64 variant, needs an externally synchronized *dst. */
-static void
-arena_stats_accum_zu(atomic_zu_t *dst, size_t src) {
-	size_t cur_dst = atomic_load_zu(dst, ATOMIC_RELAXED);
-	atomic_store_zu(dst, src + cur_dst, ATOMIC_RELAXED);
-}
-
-void
-arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
-    szind_t szind, uint64_t nrequests) {
-	arena_stats_lock(tsdn, arena_stats);
-	arena_stats_add_u64(tsdn, arena_stats, &arena_stats->lstats[szind -
-	    NBINS].nrequests, nrequests);
-	arena_stats_unlock(tsdn, arena_stats);
-}
-
-void
-arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats, size_t size) {
-	arena_stats_lock(tsdn, arena_stats);
-	arena_stats_add_zu(tsdn, arena_stats, &arena_stats->mapped, size);
-	arena_stats_unlock(tsdn, arena_stats);
-}
-
 void
 arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
@@ -213,7 +74,7 @@ void
 arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
-    malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats) {
+    bin_stats_t *bstats, arena_stats_large_t *lstats) {
 	cassert(config_stats);
 
 	arena_basic_stats_merge(tsdn, arena, nthreads, dss, dirty_decay_ms,
@@ -729,7 +590,7 @@ arena_decay_reinit(arena_decay_t *decay, extents_t *extents, ssize_t decay_ms) {
 
 static bool
 arena_decay_init(arena_decay_t *decay, extents_t *extents, ssize_t decay_ms,
-    decay_stats_t *stats) {
+    arena_stats_decay_t *stats) {
 	if (config_debug) {
 		for (size_t i = 0; i < sizeof(arena_decay_t); i++) {
 			assert(((char *)decay)[i] == 0);
diff --git a/src/bin.c b/src/bin.c
index 931a76e2..0886bc4e 100644
--- a/src/bin.c
+++ b/src/bin.c
@@ -29,7 +29,7 @@ bin_init(bin_t *bin) {
 	extent_heap_new(&bin->slabs_nonfull);
 	extent_list_init(&bin->slabs_full);
 	if (config_stats) {
-		memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
+		memset(&bin->stats, 0, sizeof(bin_stats_t));
 	}
 	return false;
 }
diff --git a/src/ctl.c b/src/ctl.c
index aae8b6ec..3a22423b 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -560,7 +560,7 @@ static const ctl_named_node_t super_root_node[] = {
  * synchronized by the ctl mutex.
  */
 static void
-accum_arena_stats_u64(arena_stats_u64_t *dst, arena_stats_u64_t *src) {
+ctl_accum_arena_stats_u64(arena_stats_u64_t *dst, arena_stats_u64_t *src) {
 #ifdef JEMALLOC_ATOMIC_U64
 	uint64_t cur_dst = atomic_load_u64(dst, ATOMIC_RELAXED);
 	uint64_t cur_src = atomic_load_u64(src, ATOMIC_RELAXED);
@@ -572,7 +572,7 @@ accum_arena_stats_u64(arena_stats_u64_t *dst, arena_stats_u64_t *src) {
 
 /* Likewise: with ctl mutex synchronization, reading is simple. */
 static uint64_t
-arena_stats_read_u64(arena_stats_u64_t *p) {
+ctl_arena_stats_read_u64(arena_stats_u64_t *p) {
 #ifdef JEMALLOC_ATOMIC_U64
 	return atomic_load_u64(p, ATOMIC_RELAXED);
 #else
@@ -580,7 +580,8 @@ arena_stats_read_u64(arena_stats_u64_t *p) {
 #endif
 }
 
-static void accum_atomic_zu(atomic_zu_t *dst, atomic_zu_t *src) {
+static void
+accum_atomic_zu(atomic_zu_t *dst, atomic_zu_t *src) {
 	size_t cur_dst = atomic_load_zu(dst, ATOMIC_RELAXED);
 	size_t cur_src = atomic_load_zu(src, ATOMIC_RELAXED);
 	atomic_store_zu(dst, cur_dst + cur_src, ATOMIC_RELAXED);
@@ -690,9 +691,9 @@ ctl_arena_clear(ctl_arena_t *ctl_arena) {
 		ctl_arena->astats->ndalloc_small = 0;
 		ctl_arena->astats->nrequests_small = 0;
 		memset(ctl_arena->astats->bstats, 0, NBINS *
-		    sizeof(malloc_bin_stats_t));
+		    sizeof(bin_stats_t));
 		memset(ctl_arena->astats->lstats, 0, (NSIZES - NBINS) *
-		    sizeof(malloc_large_stats_t));
+		    sizeof(arena_stats_large_t));
 	}
 }
 
@@ -755,18 +756,18 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 			    &astats->astats.retained);
 		}
 
-		accum_arena_stats_u64(&sdstats->astats.decay_dirty.npurge,
+		ctl_accum_arena_stats_u64(&sdstats->astats.decay_dirty.npurge,
 		    &astats->astats.decay_dirty.npurge);
-		accum_arena_stats_u64(&sdstats->astats.decay_dirty.nmadvise,
+		ctl_accum_arena_stats_u64(&sdstats->astats.decay_dirty.nmadvise,
 		    &astats->astats.decay_dirty.nmadvise);
-		accum_arena_stats_u64(&sdstats->astats.decay_dirty.purged,
+		ctl_accum_arena_stats_u64(&sdstats->astats.decay_dirty.purged,
 		    &astats->astats.decay_dirty.purged);
 
-		accum_arena_stats_u64(&sdstats->astats.decay_muzzy.npurge,
+		ctl_accum_arena_stats_u64(&sdstats->astats.decay_muzzy.npurge,
 		    &astats->astats.decay_muzzy.npurge);
-		accum_arena_stats_u64(&sdstats->astats.decay_muzzy.nmadvise,
+		ctl_accum_arena_stats_u64(&sdstats->astats.decay_muzzy.nmadvise,
 		    &astats->astats.decay_muzzy.nmadvise);
-		accum_arena_stats_u64(&sdstats->astats.decay_muzzy.purged,
+		ctl_accum_arena_stats_u64(&sdstats->astats.decay_muzzy.purged,
 		    &astats->astats.decay_muzzy.purged);
 
 #define OP(mtx) malloc_mutex_prof_merge(				\
@@ -806,11 +807,11 @@ MUTEX_PROF_ARENA_MUTEXES
 			assert(atomic_load_zu(&astats->astats.allocated_large,
 			    ATOMIC_RELAXED) == 0);
 		}
-		accum_arena_stats_u64(&sdstats->astats.nmalloc_large,
+		ctl_accum_arena_stats_u64(&sdstats->astats.nmalloc_large,
 		    &astats->astats.nmalloc_large);
-		accum_arena_stats_u64(&sdstats->astats.ndalloc_large,
+		ctl_accum_arena_stats_u64(&sdstats->astats.ndalloc_large,
 		    &astats->astats.ndalloc_large);
-		accum_arena_stats_u64(&sdstats->astats.nrequests_large,
+		ctl_accum_arena_stats_u64(&sdstats->astats.nrequests_large,
 		    &astats->astats.nrequests_large);
 
 		accum_atomic_zu(&sdstats->astats.tcache_bytes,
@@ -847,11 +848,11 @@ MUTEX_PROF_ARENA_MUTEXES
 		}
 
 		for (i = 0; i < NSIZES - NBINS; i++) {
-			accum_arena_stats_u64(&sdstats->lstats[i].nmalloc,
+			ctl_accum_arena_stats_u64(&sdstats->lstats[i].nmalloc,
 			    &astats->lstats[i].nmalloc);
-			accum_arena_stats_u64(&sdstats->lstats[i].ndalloc,
+			ctl_accum_arena_stats_u64(&sdstats->lstats[i].ndalloc,
 			    &astats->lstats[i].ndalloc);
-			accum_arena_stats_u64(&sdstats->lstats[i].nrequests,
+			ctl_accum_arena_stats_u64(&sdstats->lstats[i].nrequests,
 			    &astats->lstats[i].nrequests);
 			if (!destroyed) {
 				sdstats->lstats[i].curlextents +=
@@ -2545,24 +2546,24 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_retained,
     size_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_npurge,
-    arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.decay_dirty.npurge),
-    uint64_t)
+    ctl_arena_stats_read_u64(
+    &arenas_i(mib[2])->astats->astats.decay_dirty.npurge), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_nmadvise,
-    arena_stats_read_u64(
+    ctl_arena_stats_read_u64(
     &arenas_i(mib[2])->astats->astats.decay_dirty.nmadvise), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_purged,
-    arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.decay_dirty.purged),
-    uint64_t)
+    ctl_arena_stats_read_u64(
+    &arenas_i(mib[2])->astats->astats.decay_dirty.purged), uint64_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_npurge,
-    arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.decay_muzzy.npurge),
-    uint64_t)
+    ctl_arena_stats_read_u64(
+    &arenas_i(mib[2])->astats->astats.decay_muzzy.npurge), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_nmadvise,
-    arena_stats_read_u64(
+    ctl_arena_stats_read_u64(
     &arenas_i(mib[2])->astats->astats.decay_muzzy.nmadvise), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_purged,
-    arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.decay_muzzy.purged),
-    uint64_t)
+    ctl_arena_stats_read_u64(
+    &arenas_i(mib[2])->astats->astats.decay_muzzy.purged), uint64_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_base,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.base, ATOMIC_RELAXED),
@@ -2592,14 +2593,17 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_large_allocated,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.allocated_large,
     ATOMIC_RELAXED), size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_nmalloc,
-    arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.nmalloc_large),
-    uint64_t)
+    ctl_arena_stats_read_u64(
+    &arenas_i(mib[2])->astats->astats.nmalloc_large), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_ndalloc,
-    arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.ndalloc_large),
-    uint64_t)
+    ctl_arena_stats_read_u64(
+    &arenas_i(mib[2])->astats->astats.ndalloc_large), uint64_t)
+/*
+ * Note: "nmalloc" here instead of "nrequests" in the read.  This is intentional.
+ */
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_nrequests,
-    arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.nmalloc_large),
-    uint64_t) /* Intentional. */
+    ctl_arena_stats_read_u64(
+    &arenas_i(mib[2])->astats->astats.nmalloc_large), uint64_t) /* Intentional. */
 
 /* Lock profiling related APIs below. */
 #define RO_MUTEX_CTL_GEN(n, l)						\
@@ -2717,14 +2721,14 @@ stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
 }
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_nmalloc,
-    arena_stats_read_u64(&arenas_i(mib[2])->astats->lstats[mib[4]].nmalloc),
-    uint64_t)
+    ctl_arena_stats_read_u64(
+    &arenas_i(mib[2])->astats->lstats[mib[4]].nmalloc), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_ndalloc,
-    arena_stats_read_u64(&arenas_i(mib[2])->astats->lstats[mib[4]].ndalloc),
-    uint64_t)
+    ctl_arena_stats_read_u64(
+    &arenas_i(mib[2])->astats->lstats[mib[4]].ndalloc), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_nrequests,
-    arena_stats_read_u64(&arenas_i(mib[2])->astats->lstats[mib[4]].nrequests),
-    uint64_t)
+    ctl_arena_stats_read_u64(
+    &arenas_i(mib[2])->astats->lstats[mib[4]].nrequests), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_curlextents,
     arenas_i(mib[2])->astats->lstats[mib[4]].curlextents, size_t)
 

From 21f7c13d0b172dac6ea76236bbe0a2f3ee4bcb7b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 19 Dec 2017 17:30:50 -0800
Subject: [PATCH 1039/2608] Add the div module, which allows fast division by
 dynamic values.

---
 Makefile.in                     |  2 ++
 include/jemalloc/internal/div.h | 41 ++++++++++++++++++++++++
 src/div.c                       | 55 +++++++++++++++++++++++++++++++++
 src/sz.c                        |  3 +-
 test/unit/div.c                 | 29 +++++++++++++++++
 5 files changed, 129 insertions(+), 1 deletion(-)
 create mode 100644 include/jemalloc/internal/div.h
 create mode 100644 src/div.c
 create mode 100644 test/unit/div.c

diff --git a/Makefile.in b/Makefile.in
index 2f0b3b24..96c4ae03 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -97,6 +97,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/bitmap.c \
 	$(srcroot)src/ckh.c \
 	$(srcroot)src/ctl.c \
+	$(srcroot)src/div.c \
 	$(srcroot)src/extent.c \
 	$(srcroot)src/extent_dss.c \
 	$(srcroot)src/extent_mmap.c \
@@ -165,6 +166,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/bitmap.c \
 	$(srcroot)test/unit/ckh.c \
 	$(srcroot)test/unit/decay.c \
+	$(srcroot)test/unit/div.c \
 	$(srcroot)test/unit/extent_quantize.c \
 	$(srcroot)test/unit/fork.c \
 	$(srcroot)test/unit/hash.c \
diff --git a/include/jemalloc/internal/div.h b/include/jemalloc/internal/div.h
new file mode 100644
index 00000000..aebae939
--- /dev/null
+++ b/include/jemalloc/internal/div.h
@@ -0,0 +1,41 @@
+#ifndef JEMALLOC_INTERNAL_DIV_H
+#define JEMALLOC_INTERNAL_DIV_H
+
+#include "jemalloc/internal/assert.h"
+
+/*
+ * This module does the division that computes the index of a region in a slab,
+ * given its offset relative to the base.
+ * That is, given a divisor d, an n = i * d (all integers), we'll return i.
+ * We do some pre-computation to do this more quickly than a CPU division
+ * instruction.
+ * We bound n < 2^32, and don't support dividing by one.
+ */
+
+typedef struct div_info_s div_info_t;
+struct div_info_s {
+	uint32_t magic;
+#ifdef JEMALLOC_DEBUG
+	size_t d;
+#endif
+};
+
+void div_init(div_info_t *div_info, size_t divisor);
+
+static inline size_t
+div_compute(div_info_t *div_info, size_t n) {
+	assert(n <= (uint32_t)-1);
+	/*
+	 * This generates, e.g. mov; imul; shr on x86-64. On a 32-bit machine,
+	 * the compilers I tried were all smart enough to turn this into the
+	 * appropriate "get the high 32 bits of the result of a multiply" (e.g.
+	 * mul; mov edx eax; on x86, umull on arm, etc.).
+	 */
+	size_t i = ((uint64_t)n * (uint64_t)div_info->magic) >> 32;
+#ifdef JEMALLOC_DEBUG
+	assert(i * div_info->d == n);
+#endif
+	return i;
+}
+
+#endif /* JEMALLOC_INTERNAL_DIV_H */
diff --git a/src/div.c b/src/div.c
new file mode 100644
index 00000000..808892a1
--- /dev/null
+++ b/src/div.c
@@ -0,0 +1,55 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+
+#include "jemalloc/internal/div.h"
+
+#include "jemalloc/internal/assert.h"
+
+/*
+ * Suppose we have n = q * d, all integers. We know n and d, and want q = n / d.
+ *
+ * For any k, we have (here, all division is exact; not C-style rounding):
+ * floor(ceil(2^k / d) * n / 2^k) = floor((2^k + r) / d * n / 2^k), where
+ * r = (-2^k) mod d.
+ *
+ * Expanding this out:
+ * ... = floor(2^k / d * n / 2^k + r / d * n / 2^k)
+ *     = floor(n / d + (r / d) * (n / 2^k)).
+ *
+ * The fractional part of n / d is 0 (because of the assumption that d divides n
+ * exactly), so we have:
+ * ... = n / d + floor((r / d) * (n / 2^k))
+ *
+ * So that our initial expression is equal to the quantity we seek, so long as
+ * (r / d) * (n / 2^k) < 1.
+ *
+ * r is a remainder mod d, so r < d and r / d < 1 always. We can make
+ * n / 2 ^ k < 1 by setting k = 32. This gets us a value of magic that works.
+ */
+
+void
+div_init(div_info_t *div_info, size_t d) {
+	/* Nonsensical. */
+	assert(d != 0);
+	/*
+	 * This would make the value of magic too high to fit into a uint32_t
+	 * (we would want magic = 2^32 exactly). This would mess with code gen
+	 * on 32-bit machines.
+	 */
+	assert(d != 1);
+
+	uint64_t two_to_k = ((uint64_t)1 << 32);
+	uint32_t magic = (uint32_t)(two_to_k / d);
+
+	/*
+	 * We want magic = ceil(2^k / d), but C gives us floor. We have to
+	 * increment it unless the result was exact (i.e. unless d is a power of
+	 * two).
+	 */
+	if (two_to_k % d != 0) {
+		magic++;
+	}
+	div_info->magic = magic;
+#ifdef JEMALLOC_DEBUG
+	div_info->d = d;
+#endif
+}
diff --git a/src/sz.c b/src/sz.c
index 0986615f..9de77e45 100644
--- a/src/sz.c
+++ b/src/sz.c
@@ -26,7 +26,8 @@ const size_t sz_index2size_tab[NSIZES] = {
 JEMALLOC_ALIGNED(CACHELINE)
 const uint8_t sz_size2index_tab[] = {
 #if LG_TINY_MIN == 0
-#warning "Dangerous LG_TINY_MIN"
+/* The div module doesn't support division by 1. */
+#error "Unsupported LG_TINY_MIN"
 #define S2B_0(i)	i,
 #elif LG_TINY_MIN == 1
 #warning "Dangerous LG_TINY_MIN"
diff --git a/test/unit/div.c b/test/unit/div.c
new file mode 100644
index 00000000..b47f10b2
--- /dev/null
+++ b/test/unit/div.c
@@ -0,0 +1,29 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/div.h"
+
+TEST_BEGIN(test_div_exhaustive) {
+	for (size_t divisor = 2; divisor < 1000 * 1000; ++divisor) {
+		div_info_t div_info;
+		div_init(&div_info, divisor);
+		size_t max = 1000 * divisor;
+		if (max < 1000 * 1000) {
+			max = 1000 * 1000;
+		}
+		for (size_t dividend = 0; dividend < 1000 * divisor;
+		    dividend += divisor) {
+			size_t quotient = div_compute(
+			    &div_info, dividend);
+			assert_zu_eq(dividend, quotient * divisor,
+			    "With divisor = %zu, dividend = %zu, "
+			    "got quotient %zu", divisor, dividend, quotient);
+		}
+	}
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_div_exhaustive);
+}

From d41b19f9c70c9dd8244e0879c7aef7943a34c750 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 20 Dec 2017 17:21:56 -0800
Subject: [PATCH 1040/2608] Implement arena regind computation using
 div_info_t.

This eliminates the need to generate an enormous switch statement in
arena_slab_regind.
---
 src/arena.c | 33 ++++++++++++++++-----------------
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index e3693d54..40ef143d 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -3,6 +3,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/div.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/extent_mmap.h"
 #include "jemalloc/internal/mutex.h"
@@ -39,6 +40,8 @@ const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
 #undef STEP
 };
 
+static div_info_t arena_binind_div_info[NBINS];
+
 /******************************************************************************/
 /*
  * Function prototypes for static functions that are referenced prior to
@@ -247,24 +250,10 @@ arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr) {
 	assert(((uintptr_t)ptr - (uintptr_t)extent_addr_get(slab)) %
 	    (uintptr_t)bin_infos[binind].reg_size == 0);
 
-	/* Avoid doing division with a variable divisor. */
 	diff = (size_t)((uintptr_t)ptr - (uintptr_t)extent_addr_get(slab));
-	switch (binind) {
-#define REGIND_bin_yes(index, reg_size)					\
-	case index:							\
-		regind = diff / (reg_size);				\
-		assert(diff == regind * (reg_size));			\
-		break;
-#define REGIND_bin_no(index, reg_size)
-#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs,		\
-    lg_delta_lookup)							\
-	REGIND_bin_##bin(index, (1U<<lg_grp) + (ndelta<<lg_delta))
-	SIZE_CLASSES
-#undef REGIND_bin_yes
-#undef REGIND_bin_no
-#undef SC
-	default: not_reached();
-	}
+
+	/* Avoid doing division with a variable divisor. */
+	regind = div_compute(&arena_binind_div_info[binind], diff);
 
 	assert(regind < bin_infos[binind].nregs);
 
@@ -1929,6 +1918,16 @@ void
 arena_boot(void) {
 	arena_dirty_decay_ms_default_set(opt_dirty_decay_ms);
 	arena_muzzy_decay_ms_default_set(opt_muzzy_decay_ms);
+#define REGIND_bin_yes(index, reg_size) 				\
+	div_init(&arena_binind_div_info[(index)], (reg_size));
+#define REGIND_bin_no(index, reg_size)
+#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs,		\
+    lg_delta_lookup)							\
+	REGIND_bin_##bin(index, (1U<<lg_grp) + (ndelta << lg_delta))
+	SIZE_CLASSES
+#undef REGIND_bin_yes
+#undef REGIND_bin_no
+#undef SC
 }
 
 void

From f47e39d11a0e7ef4201a1ac18efa7604c5152aa3 Mon Sep 17 00:00:00 2001
From: Rajeev Misra <rajeev.misra@gmail.com>
Date: Sat, 30 Dec 2017 14:31:34 -0800
Subject: [PATCH 1041/2608] handle 32 bit mutex counters

---
 include/jemalloc/internal/mutex_prof.h | 31 +++++++---
 src/stats.c                            | 83 +++++++++++++++-----------
 2 files changed, 69 insertions(+), 45 deletions(-)

diff --git a/include/jemalloc/internal/mutex_prof.h b/include/jemalloc/internal/mutex_prof.h
index 3358bcf5..735c0adb 100644
--- a/include/jemalloc/internal/mutex_prof.h
+++ b/include/jemalloc/internal/mutex_prof.h
@@ -35,21 +35,34 @@ typedef enum {
 	mutex_prof_num_arena_mutexes
 } mutex_prof_arena_ind_t;
 
-#define MUTEX_PROF_COUNTERS						\
+#define MUTEX_PROF_UINT64_COUNTERS				\
     OP(num_ops, uint64_t)						\
     OP(num_wait, uint64_t)						\
-    OP(num_spin_acq, uint64_t)						\
-    OP(num_owner_switch, uint64_t)					\
-    OP(total_wait_time, uint64_t)					\
-    OP(max_wait_time, uint64_t)						\
+    OP(num_spin_acq, uint64_t)					\
+    OP(num_owner_switch, uint64_t)				\
+    OP(total_wait_time, uint64_t)				\
+    OP(max_wait_time, uint64_t)
+
+#define MUTEX_PROF_UINT32_COUNTERS				\
     OP(max_num_thds, uint32_t)
 
-typedef enum {
+#define MUTEX_PROF_COUNTERS		\
+		MUTEX_PROF_UINT64_COUNTERS \
+		MUTEX_PROF_UINT32_COUNTERS
+
 #define OP(counter, type) mutex_counter_##counter,
-	MUTEX_PROF_COUNTERS
+
+#define COUNTER_ENUM(counter_list, t)           \
+		typedef enum {                          \
+			counter_list                        \
+			mutex_prof_num_##t##_counters       \
+		} mutex_prof_##t##_counter_ind_t;
+
+COUNTER_ENUM(MUTEX_PROF_UINT64_COUNTERS, uint64_t)
+COUNTER_ENUM(MUTEX_PROF_UINT32_COUNTERS, uint32_t)
+
+#undef COUNTER_ENUM
 #undef OP
-	mutex_prof_num_counters
-} mutex_prof_counter_ind_t;
 
 typedef struct {
 	/*
diff --git a/src/stats.c b/src/stats.c
index 33e44269..0a89b4b0 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -85,34 +85,38 @@ gen_mutex_ctl_str(char *str, size_t buf_len, const char *prefix,
 
 static void
 read_arena_bin_mutex_stats(unsigned arena_ind, unsigned bin_ind,
-    uint64_t results[mutex_prof_num_counters]) {
+    uint64_t results_uint64_t[mutex_prof_num_uint64_t_counters],
+	uint32_t results_uint32_t[mutex_prof_num_uint32_t_counters]) {
 	char cmd[MUTEX_CTL_STR_MAX_LENGTH];
 #define OP(c, t)							\
     gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,			\
         "arenas.0.bins.0","mutex", #c);					\
     CTL_M2_M4_GET(cmd, arena_ind, bin_ind,				\
-        (t *)&results[mutex_counter_##c], t);
-MUTEX_PROF_COUNTERS
+        (t *)&results_##t[mutex_counter_##c], t);
+	MUTEX_PROF_COUNTERS
 #undef OP
 }
 
 static void
 mutex_stats_output_json(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *name, uint64_t stats[mutex_prof_num_counters],
+    const char *name, uint64_t stats_uint64_t[mutex_prof_num_uint64_t_counters],
+    uint32_t stats_uint32_t[mutex_prof_num_uint32_t_counters],
     const char *json_indent, bool last) {
 	malloc_cprintf(write_cb, cbopaque, "%s\"%s\": {\n", json_indent, name);
 
-	mutex_prof_counter_ind_t k = 0;
+	mutex_prof_uint64_t_counter_ind_t k_uint64_t = 0;
+	mutex_prof_uint32_t_counter_ind_t k_uint32_t = 0;
 	char *fmt_str[2] = {"%s\t\"%s\": %"FMTu32"%s\n",
 	    "%s\t\"%s\": %"FMTu64"%s\n"};
 #define OP(c, t)							\
 	malloc_cprintf(write_cb, cbopaque,				\
 	    fmt_str[sizeof(t) / sizeof(uint32_t) - 1], 			\
-	    json_indent, #c, (t)stats[mutex_counter_##c],		\
-	    (++k == mutex_prof_num_counters) ? "" : ",");
-MUTEX_PROF_COUNTERS
+	    json_indent, #c, (t)stats_##t[mutex_counter_##c],		\
+	    (++k_##t && k_uint32_t == mutex_prof_num_uint32_t_counters) ? "" : ",");
+	MUTEX_PROF_COUNTERS
 #undef OP
-	malloc_cprintf(write_cb, cbopaque, "%s}%s\n", json_indent,
+
+malloc_cprintf(write_cb, cbopaque, "%s}%s\n", json_indent,
 	    last ? "" : ",");
 }
 
@@ -192,10 +196,11 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 			    nmalloc, ndalloc, curregs, nrequests, nfills,
 			    nflushes, nreslabs, curslabs, mutex ? "," : "");
 			if (mutex) {
-				uint64_t mutex_stats[mutex_prof_num_counters];
-				read_arena_bin_mutex_stats(i, j, mutex_stats);
+				uint64_t mutex_stats_64[mutex_prof_num_uint64_t_counters];
+				uint32_t mutex_stats_32[mutex_prof_num_uint32_t_counters];
+				read_arena_bin_mutex_stats(i, j, mutex_stats_64, mutex_stats_32);
 				mutex_stats_output_json(write_cb, cbopaque,
-				    "mutex", mutex_stats, "\t\t\t\t\t\t", true);
+				    "mutex", mutex_stats_64, mutex_stats_32, "\t\t\t\t\t\t", true);
 			}
 			malloc_cprintf(write_cb, cbopaque,
 			    "\t\t\t\t\t}%s\n",
@@ -222,9 +227,10 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 					not_reached();
 				}
 			}
-			uint64_t mutex_stats[mutex_prof_num_counters];
+			uint64_t mutex_stats_64[mutex_prof_num_uint64_t_counters];
+			uint32_t mutex_stats_32[mutex_prof_num_uint32_t_counters];
 			if (mutex) {
-				read_arena_bin_mutex_stats(i, j, mutex_stats);
+				read_arena_bin_mutex_stats(i, j, mutex_stats_64, mutex_stats_32);
 			}
 
 			malloc_cprintf(write_cb, cbopaque, "%20zu %3u %12zu %12"
@@ -239,14 +245,14 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 				malloc_cprintf(write_cb, cbopaque,
 				    " %12"FMTu64" %12"FMTu64" %12"FMTu64
 				    " %14"FMTu64" %14"FMTu64" %12"FMTu64
-				    " %10"FMTu64"\n",
-				    mutex_stats[mutex_counter_num_ops],
-				    mutex_stats[mutex_counter_num_wait],
-				    mutex_stats[mutex_counter_num_spin_acq],
-				    mutex_stats[mutex_counter_num_owner_switch],
-				    mutex_stats[mutex_counter_total_wait_time],
-				    mutex_stats[mutex_counter_max_wait_time],
-				    mutex_stats[mutex_counter_max_num_thds]);
+				    " %10"FMTu32"\n",
+					mutex_stats_64[mutex_counter_num_ops],
+					mutex_stats_64[mutex_counter_num_wait],
+					mutex_stats_64[mutex_counter_num_spin_acq],
+					mutex_stats_64[mutex_counter_num_owner_switch],
+					mutex_stats_64[mutex_counter_total_wait_time],
+					mutex_stats_64[mutex_counter_max_wait_time],
+					mutex_stats_32[mutex_counter_max_num_thds]);
 			} else {
 				malloc_cprintf(write_cb, cbopaque, "\n");
 			}
@@ -329,7 +335,8 @@ stats_arena_lextents_print(void (*write_cb)(void *, const char *),
 
 static void
 read_arena_mutex_stats(unsigned arena_ind,
-    uint64_t results[mutex_prof_num_arena_mutexes][mutex_prof_num_counters]) {
+    uint64_t results_uint64_t[mutex_prof_num_arena_mutexes][mutex_prof_num_uint64_t_counters],
+	uint32_t results_uint32_t[mutex_prof_num_arena_mutexes][mutex_prof_num_uint32_t_counters]) {
 	char cmd[MUTEX_CTL_STR_MAX_LENGTH];
 
 	mutex_prof_arena_ind_t i;
@@ -338,7 +345,7 @@ read_arena_mutex_stats(unsigned arena_ind,
 		gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,	\
 		    "arenas.0.mutexes",	arena_mutex_names[i], #c);	\
 		CTL_M2_GET(cmd, arena_ind,				\
-		    (t *)&results[i][mutex_counter_##c], t);
+		    (t *)&results_##t[i][mutex_counter_##c], t);
 MUTEX_PROF_COUNTERS
 #undef OP
 	}
@@ -346,7 +353,8 @@ MUTEX_PROF_COUNTERS
 
 static void
 mutex_stats_output(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *name, uint64_t stats[mutex_prof_num_counters],
+    const char *name, uint64_t stats_uint64_t[mutex_prof_num_uint64_t_counters],
+    uint32_t stats_uint32_t[mutex_prof_num_uint32_t_counters],
     bool first_mutex) {
 	if (first_mutex) {
 		/* Print title. */
@@ -364,7 +372,7 @@ mutex_stats_output(void (*write_cb)(void *, const char *), void *cbopaque,
 #define OP(c, t)							\
 	malloc_cprintf(write_cb, cbopaque,				\
 	    fmt_str[sizeof(t) / sizeof(uint32_t) - 1],			\
-	    (t)stats[mutex_counter_##c]);
+	    (t)stats_##t[mutex_counter_##c]);
 MUTEX_PROF_COUNTERS
 #undef OP
 	malloc_cprintf(write_cb, cbopaque, "\n");
@@ -373,8 +381,9 @@ MUTEX_PROF_COUNTERS
 static void
 stats_arena_mutexes_print(void (*write_cb)(void *, const char *),
     void *cbopaque, bool json, bool json_end, unsigned arena_ind) {
-	uint64_t mutex_stats[mutex_prof_num_arena_mutexes][mutex_prof_num_counters];
-	read_arena_mutex_stats(arena_ind, mutex_stats);
+	uint64_t mutex_stats_64[mutex_prof_num_arena_mutexes][mutex_prof_num_uint64_t_counters];
+	uint32_t mutex_stats_32[mutex_prof_num_arena_mutexes][mutex_prof_num_uint32_t_counters];
+	read_arena_mutex_stats(arena_ind, mutex_stats_64, mutex_stats_32);
 
 	/* Output mutex stats. */
 	if (json) {
@@ -383,7 +392,7 @@ stats_arena_mutexes_print(void (*write_cb)(void *, const char *),
 		last_mutex = mutex_prof_num_arena_mutexes - 1;
 		for (i = 0; i < mutex_prof_num_arena_mutexes; i++) {
 			mutex_stats_output_json(write_cb, cbopaque,
-			    arena_mutex_names[i], mutex_stats[i],
+			    arena_mutex_names[i], mutex_stats_64[i], mutex_stats_32[i],
 			    "\t\t\t\t\t", (i == last_mutex));
 		}
 		malloc_cprintf(write_cb, cbopaque, "\t\t\t\t}%s\n",
@@ -392,7 +401,7 @@ stats_arena_mutexes_print(void (*write_cb)(void *, const char *),
 		mutex_prof_arena_ind_t i;
 		for (i = 0; i < mutex_prof_num_arena_mutexes; i++) {
 			mutex_stats_output(write_cb, cbopaque,
-			    arena_mutex_names[i], mutex_stats[i], i == 0);
+			    arena_mutex_names[i], mutex_stats_64[i],  mutex_stats_32[i], i == 0);
 		}
 	}
 }
@@ -1004,7 +1013,8 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 
 static void
 read_global_mutex_stats(
-    uint64_t results[mutex_prof_num_global_mutexes][mutex_prof_num_counters]) {
+    uint64_t results_uint64_t[mutex_prof_num_global_mutexes][mutex_prof_num_uint64_t_counters],
+	uint32_t results_uint32_t[mutex_prof_num_global_mutexes][mutex_prof_num_uint32_t_counters]) {
 	char cmd[MUTEX_CTL_STR_MAX_LENGTH];
 
 	mutex_prof_global_ind_t i;
@@ -1012,7 +1022,7 @@ read_global_mutex_stats(
 #define OP(c, t)							\
 		gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,	\
 		    "mutexes", global_mutex_names[i], #c);		\
-		CTL_GET(cmd, (t *)&results[i][mutex_counter_##c], t);
+		CTL_GET(cmd, (t *)&results_##t[i][mutex_counter_##c], t);
 MUTEX_PROF_COUNTERS
 #undef OP
 	}
@@ -1035,9 +1045,10 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 	CTL_GET("stats.mapped", &mapped, size_t);
 	CTL_GET("stats.retained", &retained, size_t);
 
-	uint64_t mutex_stats[mutex_prof_num_global_mutexes][mutex_prof_num_counters];
+	uint64_t mutex_stats_uint64_t[mutex_prof_num_global_mutexes][mutex_prof_num_uint64_t_counters];
+	uint32_t mutex_stats_uint32_t[mutex_prof_num_global_mutexes][mutex_prof_num_uint32_t_counters];
 	if (mutex) {
-		read_global_mutex_stats(mutex_stats);
+		read_global_mutex_stats(mutex_stats_uint64_t, mutex_stats_uint32_t);
 	}
 
 	if (have_background_thread) {
@@ -1091,7 +1102,7 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 			mutex_prof_global_ind_t i;
 			for (i = 0; i < mutex_prof_num_global_mutexes; i++) {
 				mutex_stats_output_json(write_cb, cbopaque,
-				    global_mutex_names[i], mutex_stats[i],
+				    global_mutex_names[i], mutex_stats_uint64_t[i], mutex_stats_uint32_t[i],
 				    "\t\t\t\t",
 				    i == mutex_prof_num_global_mutexes - 1);
 			}
@@ -1118,7 +1129,7 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
 			mutex_prof_global_ind_t i;
 			for (i = 0; i < mutex_prof_num_global_mutexes; i++) {
 				mutex_stats_output(write_cb, cbopaque,
-				    global_mutex_names[i], mutex_stats[i],
+				    global_mutex_names[i], mutex_stats_uint64_t[i], mutex_stats_uint32_t[i],
 				    i == 0);
 			}
 		}

From 72bdbc35e3231db91def5f466d41778ee04d7e64 Mon Sep 17 00:00:00 2001
From: Rajeev Misra <rajeev.misra@gmail.com>
Date: Tue, 2 Jan 2018 21:10:01 -0800
Subject: [PATCH 1042/2608] extent_t bitpacking logic refactoring

---
 include/jemalloc/internal/extent_structs.h | 56 +++++++++++-----------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index 89b49c72..4873b9e9 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -86,42 +86,42 @@ struct extent_s {
 	 *     serial number to both resulting adjacent extents.
 	 */
 	uint64_t		e_bits;
-#define EXTENT_BITS_ARENA_SHIFT		0
-#define EXTENT_BITS_ARENA_MASK \
-    (((uint64_t)(1U << MALLOCX_ARENA_BITS) - 1) << EXTENT_BITS_ARENA_SHIFT)
+#define MASK(CURRENT_FIELD_WIDTH, CURRENT_FIELD_SHIFT) ((((((uint64_t)0x1U) << (CURRENT_FIELD_WIDTH)) - 1)) << (CURRENT_FIELD_SHIFT))
 
-#define EXTENT_BITS_SLAB_SHIFT		MALLOCX_ARENA_BITS
-#define EXTENT_BITS_SLAB_MASK \
-    ((uint64_t)0x1U << EXTENT_BITS_SLAB_SHIFT)
+#define EXTENT_BITS_ARENA_WIDTH  MALLOCX_ARENA_BITS
+#define EXTENT_BITS_ARENA_SHIFT  0
+#define EXTENT_BITS_ARENA_MASK  MASK(EXTENT_BITS_ARENA_WIDTH, EXTENT_BITS_ARENA_SHIFT)
 
-#define EXTENT_BITS_COMMITTED_SHIFT	(MALLOCX_ARENA_BITS + 1)
-#define EXTENT_BITS_COMMITTED_MASK \
-    ((uint64_t)0x1U << EXTENT_BITS_COMMITTED_SHIFT)
+#define EXTENT_BITS_SLAB_WIDTH  1
+#define EXTENT_BITS_SLAB_SHIFT  (EXTENT_BITS_ARENA_WIDTH + EXTENT_BITS_ARENA_SHIFT)
+#define EXTENT_BITS_SLAB_MASK  MASK(EXTENT_BITS_SLAB_WIDTH, EXTENT_BITS_SLAB_SHIFT)
 
-#define EXTENT_BITS_DUMPABLE_SHIFT	(MALLOCX_ARENA_BITS + 2)
-#define EXTENT_BITS_DUMPABLE_MASK \
-    ((uint64_t)0x1U << EXTENT_BITS_DUMPABLE_SHIFT)
+#define EXTENT_BITS_COMMITTED_WIDTH  1
+#define EXTENT_BITS_COMMITTED_SHIFT  (EXTENT_BITS_SLAB_WIDTH + EXTENT_BITS_SLAB_SHIFT)
+#define EXTENT_BITS_COMMITTED_MASK  MASK(EXTENT_BITS_COMMITTED_WIDTH, EXTENT_BITS_COMMITTED_SHIFT)
 
-#define EXTENT_BITS_ZEROED_SHIFT	(MALLOCX_ARENA_BITS + 3)
-#define EXTENT_BITS_ZEROED_MASK \
-    ((uint64_t)0x1U << EXTENT_BITS_ZEROED_SHIFT)
+#define EXTENT_BITS_DUMPABLE_WIDTH  1
+#define EXTENT_BITS_DUMPABLE_SHIFT  (EXTENT_BITS_COMMITTED_WIDTH + EXTENT_BITS_COMMITTED_SHIFT)
+#define EXTENT_BITS_DUMPABLE_MASK  MASK(EXTENT_BITS_DUMPABLE_WIDTH, EXTENT_BITS_DUMPABLE_SHIFT)
 
-#define EXTENT_BITS_STATE_SHIFT		(MALLOCX_ARENA_BITS + 4)
-#define EXTENT_BITS_STATE_MASK \
-    ((uint64_t)0x3U << EXTENT_BITS_STATE_SHIFT)
+#define EXTENT_BITS_ZEROED_WIDTH  1
+#define EXTENT_BITS_ZEROED_SHIFT  (EXTENT_BITS_DUMPABLE_WIDTH + EXTENT_BITS_DUMPABLE_SHIFT)
+#define EXTENT_BITS_ZEROED_MASK  MASK(EXTENT_BITS_ZEROED_WIDTH, EXTENT_BITS_ZEROED_SHIFT)
 
-#define EXTENT_BITS_SZIND_SHIFT		(MALLOCX_ARENA_BITS + 6)
-#define EXTENT_BITS_SZIND_MASK \
-    (((uint64_t)(1U << LG_CEIL_NSIZES) - 1) << EXTENT_BITS_SZIND_SHIFT)
+#define EXTENT_BITS_STATE_WIDTH  2
+#define EXTENT_BITS_STATE_SHIFT  (EXTENT_BITS_ZEROED_WIDTH + EXTENT_BITS_ZEROED_SHIFT)
+#define EXTENT_BITS_STATE_MASK  MASK(EXTENT_BITS_STATE_WIDTH, EXTENT_BITS_STATE_SHIFT)
 
-#define EXTENT_BITS_NFREE_SHIFT \
-    (MALLOCX_ARENA_BITS + 6 + LG_CEIL_NSIZES)
-#define EXTENT_BITS_NFREE_MASK \
-    ((uint64_t)((1U << (LG_SLAB_MAXREGS + 1)) - 1) << EXTENT_BITS_NFREE_SHIFT)
+#define EXTENT_BITS_SZIND_WIDTH  LG_CEIL_NSIZES
+#define EXTENT_BITS_SZIND_SHIFT  (EXTENT_BITS_STATE_WIDTH + EXTENT_BITS_STATE_SHIFT)
+#define EXTENT_BITS_SZIND_MASK  MASK(EXTENT_BITS_SZIND_WIDTH, EXTENT_BITS_SZIND_SHIFT)
 
-#define EXTENT_BITS_SN_SHIFT \
-    (MALLOCX_ARENA_BITS + 6 + LG_CEIL_NSIZES + (LG_SLAB_MAXREGS + 1))
-#define EXTENT_BITS_SN_MASK		(UINT64_MAX << EXTENT_BITS_SN_SHIFT)
+#define EXTENT_BITS_NFREE_WIDTH  (LG_SLAB_MAXREGS + 1)
+#define EXTENT_BITS_NFREE_SHIFT  (EXTENT_BITS_SZIND_WIDTH + EXTENT_BITS_SZIND_SHIFT)
+#define EXTENT_BITS_NFREE_MASK  MASK(EXTENT_BITS_NFREE_WIDTH, EXTENT_BITS_NFREE_SHIFT)
+
+#define EXTENT_BITS_SN_SHIFT  (EXTENT_BITS_NFREE_WIDTH + EXTENT_BITS_NFREE_SHIFT)
+#define EXTENT_BITS_SN_MASK  (UINT64_MAX << EXTENT_BITS_SN_SHIFT)
 
 	/* Pointer to the extent that this structure is responsible for. */
 	void			*e_addr;

From 433c2edabc5c03ae069ac652857c05c673807d0c Mon Sep 17 00:00:00 2001
From: marxin <mliska@suse.cz>
Date: Tue, 2 Jan 2018 10:29:19 +0100
Subject: [PATCH 1043/2608] Disable JEMALLOC_HAVE_MADVISE_HUGE for arm* CPUs.

---
 configure.ac | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/configure.ac b/configure.ac
index 7544f57e..9432dc60 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1863,9 +1863,15 @@ if test "x${je_cv_madvise}" = "xyes" ; then
 	madvise((void *)0, 0, MADV_HUGEPAGE);
 	madvise((void *)0, 0, MADV_NOHUGEPAGE);
 ], [je_cv_thp])
+case "${host_cpu}" in
+  arm*)
+    ;;
+  *)
   if test "x${je_cv_thp}" = "xyes" ; then
     AC_DEFINE([JEMALLOC_HAVE_MADVISE_HUGE], [ ])
   fi
+  ;;
+esac
 fi
 
 dnl Enable transparent huge page support by default.

From 78a87e4a80e9bf379c0dc660374173ef394252f6 Mon Sep 17 00:00:00 2001
From: Nehal J Wani <nehaljw.kkd1@gmail.com>
Date: Sun, 31 Dec 2017 06:52:33 -0600
Subject: [PATCH 1044/2608] Make sure JE_CXXFLAGS_ADD uses CPP compiler

All the invocations of AC_COMPILE_IFELSE inside JE_CXXFLAGS_ADD were
running 'the compiler and compilation flags of the current language'
which was always the C compiler and the CXXFLAGS were never being tested
against a C++ compiler. This patch fixes this issue by temporarily
changing the chosen compiler to C++ by pushing it over the stack and
popping it immediately after the compilation check.
---
 configure.ac | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/configure.ac b/configure.ac
index 9432dc60..231b6dfb 100644
--- a/configure.ac
+++ b/configure.ac
@@ -76,6 +76,7 @@ AC_MSG_CHECKING([whether compiler supports $1])
 T_CONFIGURE_CXXFLAGS="${CONFIGURE_CXXFLAGS}"
 JE_APPEND_VS(CONFIGURE_CXXFLAGS, $1)
 JE_CONCAT_VVV(CXXFLAGS, CONFIGURE_CXXFLAGS, SPECIFIED_CXXFLAGS)
+AC_LANG_PUSH([C++])
 AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
 [[
 ]], [[
@@ -87,6 +88,7 @@ AC_COMPILE_IFELSE([AC_LANG_PROGRAM(
               AC_MSG_RESULT([no])
               [CONFIGURE_CXXFLAGS="${T_CONFIGURE_CXXFLAGS}"]
 )
+AC_LANG_POP([C++])
 JE_CONCAT_VVV(CXXFLAGS, CONFIGURE_CXXFLAGS, SPECIFIED_CXXFLAGS)
 ])
 

From 91b247d311ce6837aa93d4315f5f7680abd8a11a Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 22 Dec 2017 11:19:50 -0800
Subject: [PATCH 1045/2608] In iallocztm, check lock rank only when not in
 reentrancy.

---
 include/jemalloc/internal/jemalloc_internal_inlines_c.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index c54fc992..499ac1b1 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -45,8 +45,10 @@ iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
 	assert(size != 0);
 	assert(!is_internal || tcache == NULL);
 	assert(!is_internal || arena == NULL || arena_is_auto(arena));
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	if (!tsdn_null(tsdn) && tsd_reentrancy_level_get(tsdn_tsd(tsdn)) == 0) {
+		witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+		    WITNESS_RANK_CORE, 0);
+	}
 
 	ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path);
 	if (config_stats && is_internal && likely(ret != NULL)) {

From 41790f4fa475434ea84b8509b9a68e63d9a86f95 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 22 Dec 2017 11:22:16 -0800
Subject: [PATCH 1046/2608] Check tsdn_null before reading reentrancy level.

---
 include/jemalloc/internal/jemalloc_internal_inlines_c.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 499ac1b1..c829ac60 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -111,7 +111,8 @@ idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, alloc_ctx_t *alloc_ctx,
 	if (config_stats && is_internal) {
 		arena_internal_sub(iaalloc(tsdn, ptr), isalloc(tsdn, ptr));
 	}
-	if (!is_internal && tsd_reentrancy_level_get(tsdn_tsd(tsdn)) != 0) {
+	if (!is_internal && !tsdn_null(tsdn) &&
+	    tsd_reentrancy_level_get(tsdn_tsd(tsdn)) != 0) {
 		assert(tcache == NULL);
 	}
 	arena_dalloc(tsdn, ptr, tcache, alloc_ctx, slow_path);

From ba5992fe9ac1708c812ec65bff3270bba17f1e1b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 21 Dec 2017 16:17:45 -0800
Subject: [PATCH 1047/2608] Improve the fit for aligned allocation.

We compute the max size required to satisfy an alignment.  However this can be
quite pessimistic, especially with frequent reuse (and combined with state-based
fragmentation).  This commit adds one more fit step specific to aligned
allocations, searching in all potential fit size classes.
---
 src/extent.c | 71 ++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 61 insertions(+), 10 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index bca703fc..517780ee 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -363,6 +363,43 @@ extents_remove_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent,
 	    cur_extents_npages - (size >> LG_PAGE), ATOMIC_RELAXED);
 }
 
+/*
+ * Find an extent with size [min_size, max_size) to satisfy the alignment
+ * requirement.  For each size, try only the first extent in the heap.
+ */
+static extent_t *
+extents_fit_alignment(extents_t *extents, size_t min_size, size_t max_size,
+    size_t alignment) {
+        pszind_t pind = sz_psz2ind(extent_size_quantize_ceil(min_size));
+        pszind_t pind_max = sz_psz2ind(extent_size_quantize_ceil(max_size));
+
+	for (pszind_t i = (pszind_t)bitmap_ffu(extents->bitmap,
+	    &extents_bitmap_info, (size_t)pind); i < pind_max; i =
+	    (pszind_t)bitmap_ffu(extents->bitmap, &extents_bitmap_info,
+	    (size_t)i+1)) {
+		assert(i < NPSIZES);
+		assert(!extent_heap_empty(&extents->heaps[i]));
+		extent_t *extent = extent_heap_first(&extents->heaps[i]);
+		uintptr_t base = (uintptr_t)extent_base_get(extent);
+		size_t candidate_size = extent_size_get(extent);
+		assert(candidate_size >= min_size);
+
+		uintptr_t next_align = ALIGNMENT_CEILING((uintptr_t)base,
+		    PAGE_CEILING(alignment));
+		if (base > next_align || base + candidate_size <= next_align) {
+			/* Overflow or not crossing the next alignment. */
+			continue;
+		}
+
+		size_t leadsize = next_align - base;
+		if (candidate_size - leadsize >= min_size) {
+			return extent;
+		}
+	}
+
+	return NULL;
+}
+
 /* Do any-best-fit extent selection, i.e. select any extent that best fits. */
 static extent_t *
 extents_best_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
@@ -424,12 +461,30 @@ extents_first_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
  */
 static extent_t *
 extents_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
-    size_t size) {
+    size_t esize, size_t alignment) {
 	malloc_mutex_assert_owner(tsdn, &extents->mtx);
 
-	return extents->delay_coalesce ? extents_best_fit_locked(tsdn, arena,
-	    extents, size) : extents_first_fit_locked(tsdn, arena, extents,
-	    size);
+	size_t max_size = esize + PAGE_CEILING(alignment) - PAGE;
+	/* Beware size_t wrap-around. */
+	if (max_size < esize) {
+		return NULL;
+	}
+
+	extent_t *extent = extents->delay_coalesce ?
+	    extents_best_fit_locked(tsdn, arena, extents, max_size) :
+	    extents_first_fit_locked(tsdn, arena, extents, max_size);
+
+	if (alignment > PAGE && extent == NULL) {
+		/*
+		 * max_size guarantees the alignment requirement but is rather
+		 * pessimistic.  Next we try to satisfy the aligned allocation
+		 * with sizes in [esize, max_size).
+		 */
+		extent = extents_fit_alignment(extents, esize, max_size,
+		    alignment);
+	}
+
+	return extent;
 }
 
 static bool
@@ -821,11 +876,6 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
 	}
 
 	size_t esize = size + pad;
-	size_t alloc_size = esize + PAGE_CEILING(alignment) - PAGE;
-	/* Beware size_t wrap-around. */
-	if (alloc_size < esize) {
-		return NULL;
-	}
 	malloc_mutex_lock(tsdn, &extents->mtx);
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 	extent_t *extent;
@@ -847,7 +897,8 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
 			extent_unlock(tsdn, unlock_extent);
 		}
 	} else {
-		extent = extents_fit_locked(tsdn, arena, extents, alloc_size);
+		extent = extents_fit_locked(tsdn, arena, extents, esize,
+		    alignment);
 	}
 	if (extent == NULL) {
 		malloc_mutex_unlock(tsdn, &extents->mtx);

From f78d4ca3fbff6cab0c704c787706a53ddafcbe13 Mon Sep 17 00:00:00 2001
From: Christopher Ferris <cferris@google.com>
Date: Fri, 22 Sep 2017 12:24:50 -0700
Subject: [PATCH 1048/2608] Modify configure to determine return value of
 strerror_r.

On glibc and Android's bionic, strerror_r returns char* when
_GNU_SOURCE is defined.

Add a configure check for this rather than assume glibc is the
only libc that behaves this way.
---
 configure.ac                                  | 19 +++++++++++++++++++
 .../internal/jemalloc_internal_defs.h.in      |  5 +++++
 src/malloc_io.c                               |  2 +-
 3 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 231b6dfb..b58540e1 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2072,6 +2072,25 @@ if test "x${je_cv_pthread_mutex_adaptive_np}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP], [ ])
 fi
 
+JE_CFLAGS_SAVE()
+JE_CFLAGS_ADD([-D_GNU_SOURCE])
+JE_CFLAGS_ADD([-Werror])
+JE_CFLAGS_ADD([-herror_on_warning])
+JE_COMPILABLE([strerror_r returns char with gnu source], [
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+], [
+  char *buffer = (char *) malloc(100);
+  char *error = strerror_r(EINVAL, buffer, 100);
+  printf("%s\n", error);
+], [je_cv_strerror_r_returns_char_with_gnu_source])
+JE_CFLAGS_RESTORE()
+if test "x${je_cv_strerror_r_returns_char_with_gnu_source}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE], [ ])
+fi
+
 dnl ============================================================================
 dnl Check for typedefs, structures, and compiler characteristics.
 AC_HEADER_STDBOOL
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index aadfbed4..8dad9a1d 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -358,4 +358,9 @@
 /* If defined, jemalloc takes the malloc/free/etc. symbol names. */
 #undef JEMALLOC_IS_MALLOC
 
+/*
+ * Defined if strerror_r returns char * if _GNU_SOURCE is defined.
+ */
+#undef JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE
+
 #endif /* JEMALLOC_INTERNAL_DEFS_H_ */
diff --git a/src/malloc_io.c b/src/malloc_io.c
index 6b99afcd..fd27bd1c 100644
--- a/src/malloc_io.c
+++ b/src/malloc_io.c
@@ -111,7 +111,7 @@ buferror(int err, char *buf, size_t buflen) {
 	FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, err, 0,
 	    (LPSTR)buf, (DWORD)buflen, NULL);
 	return 0;
-#elif defined(__GLIBC__) && defined(_GNU_SOURCE)
+#elif defined(JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE) && defined(_GNU_SOURCE)
 	char *b = strerror_r(err, buf, buflen);
 	if (b != buf) {
 		strncpy(buf, b, buflen);

From ed52d24f740ddf42b78c59ad0fdc8cd0ffe5c376 Mon Sep 17 00:00:00 2001
From: rustyx <me@rustyx.org>
Date: Fri, 19 Jan 2018 16:28:07 +0100
Subject: [PATCH 1049/2608] Define JEMALLOC_NO_PRIVATE_NAMESPACE also in Visual
 Studio x86 targets

---
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 78f92c98..c32d3e11 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -197,7 +197,7 @@
       </PrecompiledHeader>
       <WarningLevel>Level3</WarningLevel>
       <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
       <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
@@ -213,7 +213,7 @@
       </PrecompiledHeader>
       <WarningLevel>Level3</WarningLevel>
       <Optimization>Disabled</Optimization>
-      <PreprocessorDefinitions>JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
       <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
@@ -266,7 +266,7 @@
       <Optimization>MaxSpeed</Optimization>
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
       <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
@@ -286,7 +286,7 @@
       <Optimization>MaxSpeed</Optimization>
       <FunctionLevelLinking>true</FunctionLevelLinking>
       <IntrinsicFunctions>true</IntrinsicFunctions>
-      <PreprocessorDefinitions>_REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
       <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
       <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
       <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>

From 83aa9880b706ab185aa84f2bf6057477efdd5fd6 Mon Sep 17 00:00:00 2001
From: rustyx <me@rustyx.org>
Date: Fri, 19 Jan 2018 16:28:33 +0100
Subject: [PATCH 1050/2608] Make generated headers usable in both x86 and x64
 mode in Visual Studio

---
 include/jemalloc/internal/jemalloc_internal_decls.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
index 8ae5ef48..be70df51 100644
--- a/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -5,7 +5,16 @@
 #ifdef _WIN32
 #  include <windows.h>
 #  include "msvc_compat/windows_extra.h"
-
+#  ifdef _WIN64
+#    if LG_VADDR <= 32
+#      error Generate the headers using x64 vcargs
+#    endif
+#  else
+#    if LG_VADDR > 32
+#      undef LG_VADDR
+#      define LG_VADDR 32
+#    endif
+#  endif
 #else
 #  include <sys/param.h>
 #  include <sys/mman.h>

From a3abbb4bdf168dbaa32938a2e995005a65d142ba Mon Sep 17 00:00:00 2001
From: Maks Naumov <maksqwe1@ukr.net>
Date: Sun, 11 Feb 2018 19:47:04 +0200
Subject: [PATCH 1051/2608] Fix MSVC build

---
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj         | 2 ++
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters | 6 ++++++
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj         | 2 ++
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters | 6 ++++++
 4 files changed, 16 insertions(+)

diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index c32d3e11..f7b175b0 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -38,9 +38,11 @@
     <ClCompile Include="..\..\..\..\src\arena.c" />
     <ClCompile Include="..\..\..\..\src\background_thread.c" />
     <ClCompile Include="..\..\..\..\src\base.c" />
+    <ClCompile Include="..\..\..\..\src\bin.c" />
     <ClCompile Include="..\..\..\..\src\bitmap.c" />
     <ClCompile Include="..\..\..\..\src\ckh.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
+    <ClCompile Include="..\..\..\..\src\div.c" />
     <ClCompile Include="..\..\..\..\src\extent.c" />
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index dba976ed..11cfcd0b 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -91,5 +91,11 @@
     <ClCompile Include="..\..\..\..\src\log.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\bin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\div.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
 </Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index e49dbbd6..ed71de8a 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -38,9 +38,11 @@
     <ClCompile Include="..\..\..\..\src\arena.c" />
     <ClCompile Include="..\..\..\..\src\background_thread.c" />
     <ClCompile Include="..\..\..\..\src\base.c" />
+    <ClCompile Include="..\..\..\..\src\bin.c" />
     <ClCompile Include="..\..\..\..\src\bitmap.c" />
     <ClCompile Include="..\..\..\..\src\ckh.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
+    <ClCompile Include="..\..\..\..\src\div.c" />
     <ClCompile Include="..\..\..\..\src\extent.c" />
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index dba976ed..11cfcd0b 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -91,5 +91,11 @@
     <ClCompile Include="..\..\..\..\src\log.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\bin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\div.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
 </Project>
\ No newline at end of file

From ae0f5d5c3f29beb9977148dedb58575757139586 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 14 Feb 2018 12:03:42 -0800
Subject: [PATCH 1052/2608] CI: Remove "catgets" dependency on appveyor.

This seems to cause a configuration error with msys2.
---
 .appveyor.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.appveyor.yml b/.appveyor.yml
index 9a7d00a9..ad093c1c 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -31,6 +31,7 @@ install:
   - set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH%
   - if defined MSVC call "c:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" %MSVC%
   - if defined MSVC pacman --noconfirm -Rsc mingw-w64-%CPU%-gcc gcc
+  - pacman --noconfirm -Rsc catgets
   - pacman --noconfirm -Suy mingw-w64-%CPU%-make
 
 build_script:

From dd7e283b6f7f18054af3e14457251757945ab17d Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Fri, 2 Feb 2018 19:18:18 -0800
Subject: [PATCH 1053/2608] Tweak the ticker paths to help GCC generate better
 code.

GCC on its own isn't quite able to turn the ticker subtract into a memory
operation followed by a js.
---
 include/jemalloc/internal/ticker.h | 38 ++++++++++++++++++++++++++----
 1 file changed, 33 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/ticker.h b/include/jemalloc/internal/ticker.h
index 572b9645..4b360470 100644
--- a/include/jemalloc/internal/ticker.h
+++ b/include/jemalloc/internal/ticker.h
@@ -32,14 +32,42 @@ ticker_read(const ticker_t *ticker) {
 	return ticker->tick;
 }
 
+/*
+ * Not intended to be a public API.  Unfortunately, on x86, neither gcc nor
+ * clang seems smart enough to turn
+ *   ticker->tick -= nticks;
+ *   if (unlikely(ticker->tick < 0)) {
+ *     fixup ticker
+ *     return true;
+ *   }
+ *   return false;
+ * into
+ *   subq %nticks_reg, (%ticker_reg)
+ *   js fixup ticker
+ *
+ * unless we force "fixup ticker" out of line.  In that case, gcc gets it right,
+ * but clang now does worse than before.  So, on x86 with gcc, we force it out
+ * of line, but otherwise let the inlining occur.  Ordinarily this wouldn't be
+ * worth the hassle, but this is on the fast path of both malloc and free (via
+ * tcache_event).
+ */
+#if defined(__GNUC__) && !defined(__clang__)				\
+    && (defined(__x86_64__) || defined(__i386__))
+JEMALLOC_NOINLINE
+#endif
+static bool
+ticker_fixup(ticker_t *ticker) {
+	ticker->tick = ticker->nticks;
+	return true;
+}
+
 static inline bool
 ticker_ticks(ticker_t *ticker, int32_t nticks) {
-	if (unlikely(ticker->tick < nticks)) {
-		ticker->tick = ticker->nticks;
-		return true;
-	}
 	ticker->tick -= nticks;
-	return(false);
+	if (unlikely(ticker->tick < 0)) {
+		return ticker_fixup(ticker);
+	}
+	return false;
 }
 
 static inline bool

From 26b1c1398264dec25bf998f6bec21799ad4513da Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 26 Feb 2018 17:11:29 -0800
Subject: [PATCH 1054/2608] Background threads: fix an indexing bug.

We have a buffer overrun that manifests in the case where arena indices higher
than the number of CPUs are accessed before arena indices lower than the number
of CPUs.  This fixes the bug and adds a test.
---
 Makefile.in                          |  1 +
 src/background_thread.c              |  3 ++-
 test/unit/background_thread_enable.c | 34 ++++++++++++++++++++++++++++
 3 files changed, 37 insertions(+), 1 deletion(-)
 create mode 100644 test/unit/background_thread_enable.c

diff --git a/Makefile.in b/Makefile.in
index 96c4ae03..aefd6d87 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -162,6 +162,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/arena_reset.c \
 	$(srcroot)test/unit/atomic.c \
 	$(srcroot)test/unit/background_thread.c \
+	$(srcroot)test/unit/background_thread_enable.c \
 	$(srcroot)test/unit/base.c \
 	$(srcroot)test/unit/bitmap.c \
 	$(srcroot)test/unit/ckh.c \
diff --git a/src/background_thread.c b/src/background_thread.c
index 6baff22b..a8a5a052 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -600,7 +600,8 @@ background_threads_enable(tsd_t *tsd) {
 		    arena_get(tsd_tsdn(tsd), i, false) == NULL) {
 			continue;
 		}
-		background_thread_info_t *info = &background_thread_info[i];
+		background_thread_info_t *info = &background_thread_info[
+		    i % ncpus];
 		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
 		assert(info->state == background_thread_stopped);
 		background_thread_init(tsd, info);
diff --git a/test/unit/background_thread_enable.c b/test/unit/background_thread_enable.c
new file mode 100644
index 00000000..9bb58652
--- /dev/null
+++ b/test/unit/background_thread_enable.c
@@ -0,0 +1,34 @@
+#include "test/jemalloc_test.h"
+
+const char *malloc_conf = "background_thread:false,narenas:1";
+
+TEST_BEGIN(test_deferred) {
+	test_skip_if(!have_background_thread);
+
+	unsigned id;
+	size_t sz_u = sizeof(unsigned);
+
+	/*
+	 * 10 here is somewhat arbitrary, except insofar as we want to ensure
+	 * that the number of background threads is smaller than the number of
+	 * arenas.  I'll ragequit long before we have to spin up 10 threads per
+	 * cpu to handle background purging, so this is a conservative
+	 * approximation.
+	 */
+	for (unsigned i = 0; i < 10 * ncpus; i++) {
+		assert_d_eq(mallctl("arenas.create", &id, &sz_u, NULL, 0), 0,
+		    "Failed to create arena");
+	}
+
+	bool enable = true;
+	size_t sz_b = sizeof(bool);
+	assert_d_eq(mallctl("background_thread", NULL, NULL, &enable, sz_b), 0,
+	    "Failed to enable background threads");
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_deferred);
+}

From 548153e789580a3a943cc564c7d95fb0523e8b19 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 12 Jan 2018 15:55:24 -0800
Subject: [PATCH 1055/2608] Remove unused code in test/thread_tcache_enabled.

---
 test/integration/thread_tcache_enabled.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/test/integration/thread_tcache_enabled.c b/test/integration/thread_tcache_enabled.c
index 0c343a6c..95c9acc1 100644
--- a/test/integration/thread_tcache_enabled.c
+++ b/test/integration/thread_tcache_enabled.c
@@ -60,8 +60,6 @@ thd_start(void *arg) {
 
 	free(malloc(1));
 	return NULL;
-	test_skip("\"thread.tcache.enabled\" mallctl not available");
-	return NULL;
 }
 
 TEST_BEGIN(test_main_thread) {

From 6b35366ef55bb5987c7ac91e1c100e9e55ef15cc Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 12 Jan 2018 16:09:51 -0800
Subject: [PATCH 1056/2608] Skip test_alignment_and_size if percpu_arena is
 enabled.

test_alignment_and_size needs a lot of memory.  When percpu_arena is enabled,
multiple arenas may cause the test to OOM.
---
 test/integration/mallocx.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index b0b5cdac..35d72093 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -151,9 +151,17 @@ TEST_BEGIN(test_basic) {
 TEST_END
 
 TEST_BEGIN(test_alignment_and_size) {
+	const char *percpu_arena;
+	size_t sz = sizeof(percpu_arena);
+
+	if(mallctl("opt.percpu_arena", &percpu_arena, &sz, NULL, 0) ||
+	    strcmp(percpu_arena, "disabled") != 0) {
+		test_skip("test_alignment_and_size skipped: "
+		    "not working with percpu arena.");
+	};
 #define MAXALIGN (((size_t)1) << 23)
 #define NITER 4
-	size_t nsz, rsz, sz, alignment, total;
+	size_t nsz, rsz, alignment, total;
 	unsigned i;
 	void *ps[NITER];
 

From efa40532dc0fc000345086757ecaf8875313a012 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 5 Mar 2018 14:45:04 -0800
Subject: [PATCH 1057/2608] Remove config.thp which wasn't in use.

---
 configure.ac                                  | 22 -------------------
 doc/jemalloc.xml.in                           | 10 ---------
 .../jemalloc/internal/jemalloc_preamble.h.in  |  7 ------
 src/ctl.c                                     |  3 ---
 src/stats.c                                   |  1 -
 5 files changed, 43 deletions(-)

diff --git a/configure.ac b/configure.ac
index b58540e1..1e32f7fa 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1876,27 +1876,6 @@ case "${host_cpu}" in
 esac
 fi
 
-dnl Enable transparent huge page support by default.
-AC_ARG_ENABLE([thp],
-  [AS_HELP_STRING([--disable-thp],
-                  [Disable transparent huge page support])],
-[if test "x$enable_thp" = "xno" -o "x${je_cv_thp}" != "xyes" ; then
-  enable_thp="0"
-else
-  enable_thp="1"
-fi
-],
-[if test "x${je_cv_thp}" = "xyes" ; then
-  enable_thp="1"
-else
-  enable_thp="0"
-fi
-])
-if test "x$enable_thp" = "x1" ; then
-  AC_DEFINE([JEMALLOC_THP], [ ])
-fi
-AC_SUBST([enable_thp])
-
 dnl ============================================================================
 dnl Check whether __sync_{add,sub}_and_fetch() are available despite
 dnl __GCC_HAVE_SYNC_COMPARE_AND_SWAP_n macros being undefined.
@@ -2269,7 +2248,6 @@ AC_MSG_RESULT([prof               : ${enable_prof}])
 AC_MSG_RESULT([prof-libunwind     : ${enable_prof_libunwind}])
 AC_MSG_RESULT([prof-libgcc        : ${enable_prof_libgcc}])
 AC_MSG_RESULT([prof-gcc           : ${enable_prof_gcc}])
-AC_MSG_RESULT([thp                : ${enable_thp}])
 AC_MSG_RESULT([fill               : ${enable_fill}])
 AC_MSG_RESULT([utrace             : ${enable_utrace}])
 AC_MSG_RESULT([xmalloc            : ${enable_xmalloc}])
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 3f9ba201..4fdb53fc 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -852,16 +852,6 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         build configuration.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="config.thp">
-        <term>
-          <mallctl>config.thp</mallctl>
-          (<type>bool</type>)
-          <literal>r-</literal>
-        </term>
-        <listitem><para><option>--disable-thp</option> was not specified
-        during build configuration, and the system supports transparent huge
-        page manipulation.</para></listitem>
-      </varlistentry>
 
       <varlistentry id="config.utrace">
         <term>
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index f81f3a40..e621fbc8 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -122,13 +122,6 @@ static const bool config_stats =
     false
 #endif
     ;
-static const bool config_thp =
-#ifdef JEMALLOC_THP
-    true
-#else
-    false
-#endif
-    ;
 static const bool config_tls =
 #ifdef JEMALLOC_TLS
     true
diff --git a/src/ctl.c b/src/ctl.c
index 3a22423b..17672493 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -75,7 +75,6 @@ CTL_PROTO(config_prof)
 CTL_PROTO(config_prof_libgcc)
 CTL_PROTO(config_prof_libunwind)
 CTL_PROTO(config_stats)
-CTL_PROTO(config_thp)
 CTL_PROTO(config_utrace)
 CTL_PROTO(config_xmalloc)
 CTL_PROTO(opt_abort)
@@ -271,7 +270,6 @@ static const ctl_named_node_t	config_node[] = {
 	{NAME("prof_libgcc"),	CTL(config_prof_libgcc)},
 	{NAME("prof_libunwind"), CTL(config_prof_libunwind)},
 	{NAME("stats"),		CTL(config_stats)},
-	{NAME("thp"),		CTL(config_thp)},
 	{NAME("utrace"),	CTL(config_utrace)},
 	{NAME("xmalloc"),	CTL(config_xmalloc)}
 };
@@ -1575,7 +1573,6 @@ CTL_RO_CONFIG_GEN(config_prof, bool)
 CTL_RO_CONFIG_GEN(config_prof_libgcc, bool)
 CTL_RO_CONFIG_GEN(config_prof_libunwind, bool)
 CTL_RO_CONFIG_GEN(config_stats, bool)
-CTL_RO_CONFIG_GEN(config_thp, bool)
 CTL_RO_CONFIG_GEN(config_utrace, bool)
 CTL_RO_CONFIG_GEN(config_xmalloc, bool)
 
diff --git a/src/stats.c b/src/stats.c
index 0a89b4b0..11959cbe 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -732,7 +732,6 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	CONFIG_WRITE_BOOL_JSON(prof_libgcc, ",")
 	CONFIG_WRITE_BOOL_JSON(prof_libunwind, ",")
 	CONFIG_WRITE_BOOL_JSON(stats, ",")
-	CONFIG_WRITE_BOOL_JSON(thp, ",")
 	CONFIG_WRITE_BOOL_JSON(utrace, ",")
 	CONFIG_WRITE_BOOL_JSON(xmalloc, "")
 

From e4f090e8df5adf180662c5eeac2af214f9594de4 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 16 Feb 2018 14:19:19 -0800
Subject: [PATCH 1058/2608] Add opt.thp which allows explicit hugepage usage.

"always" marks all user mappings as MADV_HUGEPAGE; while "never" marks all
mappings as MADV_NOHUGEPAGE. The default setting "default" does not change any
settings.  Note that all the madvise calls are part of the default extent hooks
by design, so that customized extent hooks have complete control over the
mappings including hugepage settings.
---
 doc/jemalloc.xml.in               | 22 ++++++++
 include/jemalloc/internal/pages.h | 16 +++++-
 src/base.c                        |  3 +-
 src/ctl.c                         |  3 ++
 src/extent.c                      | 12 ++---
 src/jemalloc.c                    | 24 ++++++++-
 src/pages.c                       | 88 +++++++++++++++++++++++++------
 src/stats.c                       |  1 +
 test/unit/mallctl.c               |  1 +
 test/unit/pages.c                 |  2 +-
 10 files changed, 143 insertions(+), 29 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 4fdb53fc..9ecd8a1f 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1217,6 +1217,28 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         default maximum is 32 KiB (2^15).</para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.thp">
+        <term>
+          <mallctl>opt.thp</mallctl>
+          (<type>const char *</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Transparent hugepage (THP) mode. Settings "always",
+        "never" and "default" are available if THP is supported by the operating
+        system.  The "always" setting enables transparent hugepage for all user
+        memory mappings with
+        <parameter><constant>MADV_HUGEPAGE</constant></parameter>; "never"
+        ensures no transparent hugepage with
+        <parameter><constant>MADV_NOHUGEPAGE</constant></parameter>; the default
+        setting "default" makes no changes.  Note that: this option does not
+        affect THP for jemalloc internal metadata (see <link
+        linkend="opt.metadata_thp"><mallctl>opt.metadata_thp</mallctl></link>);
+        in addition, for arenas with customized <link
+        linkend="arena.i.extent_hooks"><mallctl>extent_hooks</mallctl></link>,
+        this option is bypassed as it is implemented as part of the default
+        extent hooks.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="opt.prof">
         <term>
           <mallctl>opt.prof</mallctl>
diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index dff20515..7dae633a 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -58,8 +58,19 @@ static const bool pages_can_purge_forced =
 #endif
     ;
 
-/* Whether transparent huge page state is "madvise". */
-extern bool thp_state_madvise;
+typedef enum {
+	thp_mode_default       = 0, /* Do not change hugepage settings. */
+	thp_mode_always        = 1, /* Always set MADV_HUGEPAGE. */
+	thp_mode_never         = 2, /* Always set MADV_NOHUGEPAGE. */
+
+	thp_mode_names_limit   = 3, /* Used for option processing. */
+	thp_mode_not_supported = 3  /* No THP support detected. */
+} thp_mode_t;
+
+#define THP_MODE_DEFAULT thp_mode_default
+extern thp_mode_t opt_thp;
+extern thp_mode_t init_system_thp_mode; /* Initial system wide state. */
+extern const char *thp_mode_names[];
 
 void *pages_map(void *addr, size_t size, size_t alignment, bool *commit);
 void pages_unmap(void *addr, size_t size);
@@ -72,5 +83,6 @@ bool pages_nohuge(void *addr, size_t size);
 bool pages_dontdump(void *addr, size_t size);
 bool pages_dodump(void *addr, size_t size);
 bool pages_boot(void);
+void pages_set_thp_state (void *ptr, size_t size);
 
 #endif /* JEMALLOC_INTERNAL_PAGES_EXTERNS_H */
diff --git a/src/base.c b/src/base.c
index cc3d9781..bb897a25 100644
--- a/src/base.c
+++ b/src/base.c
@@ -24,7 +24,8 @@ const char *metadata_thp_mode_names[] = {
 
 static inline bool
 metadata_thp_madvise(void) {
-	return (metadata_thp_enabled() && thp_state_madvise);
+	return (metadata_thp_enabled() &&
+	    (init_system_thp_mode == thp_mode_default));
 }
 
 static void *
diff --git a/src/ctl.c b/src/ctl.c
index 17672493..aaf6e35a 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -94,6 +94,7 @@ CTL_PROTO(opt_zero)
 CTL_PROTO(opt_utrace)
 CTL_PROTO(opt_xmalloc)
 CTL_PROTO(opt_tcache)
+CTL_PROTO(opt_thp)
 CTL_PROTO(opt_lg_extent_max_active_fit)
 CTL_PROTO(opt_lg_tcache_max)
 CTL_PROTO(opt_prof)
@@ -292,6 +293,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("utrace"),	CTL(opt_utrace)},
 	{NAME("xmalloc"),	CTL(opt_xmalloc)},
 	{NAME("tcache"),	CTL(opt_tcache)},
+	{NAME("thp"),		CTL(opt_thp)},
 	{NAME("lg_extent_max_active_fit"), CTL(opt_lg_extent_max_active_fit)},
 	{NAME("lg_tcache_max"),	CTL(opt_lg_tcache_max)},
 	{NAME("prof"),		CTL(opt_prof)},
@@ -1597,6 +1599,7 @@ CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool)
 CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool)
 CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool)
 CTL_RO_NL_GEN(opt_tcache, opt_tcache, bool)
+CTL_RO_NL_GEN(opt_thp, thp_mode_names[opt_thp], const char *)
 CTL_RO_NL_GEN(opt_lg_extent_max_active_fit, opt_lg_extent_max_active_fit,
     size_t)
 CTL_RO_NL_GEN(opt_lg_tcache_max, opt_lg_tcache_max, ssize_t)
diff --git a/src/extent.c b/src/extent.c
index 517780ee..88d331f7 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1173,11 +1173,12 @@ extent_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 static void *
 extent_alloc_default_impl(tsdn_t *tsdn, arena_t *arena, void *new_addr,
     size_t size, size_t alignment, bool *zero, bool *commit) {
-	void *ret;
-
-	ret = extent_alloc_core(tsdn, arena, new_addr, size, alignment, zero,
+	void *ret = extent_alloc_core(tsdn, arena, new_addr, size, alignment, zero,
 	    commit, (dss_prec_t)atomic_load_u(&arena->dss_prec,
 	    ATOMIC_RELAXED));
+	if (have_madvise_huge && ret) {
+		pages_set_thp_state(ret, size);
+	}
 	return ret;
 }
 
@@ -1266,9 +1267,8 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 
 	void *ptr;
 	if (*r_extent_hooks == &extent_hooks_default) {
-		ptr = extent_alloc_core(tsdn, arena, NULL, alloc_size, PAGE,
-		    &zeroed, &committed, (dss_prec_t)atomic_load_u(
-		    &arena->dss_prec, ATOMIC_RELAXED));
+		ptr = extent_alloc_default_impl(tsdn, arena, NULL,
+		    alloc_size, PAGE, &zeroed, &committed);
 	} else {
 		extent_hook_pre_reentrancy(tsdn, arena);
 		ptr = (*r_extent_hooks)->alloc(*r_extent_hooks, NULL,
diff --git a/src/jemalloc.c b/src/jemalloc.c
index f4fd805e..4dde8fbc 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1152,9 +1152,8 @@ malloc_conf_init(void) {
 			CONF_HANDLE_SSIZE_T(opt_lg_tcache_max, "lg_tcache_max",
 			    -1, (sizeof(size_t) << 3) - 1)
 			if (strncmp("percpu_arena", k, klen) == 0) {
-				int i;
 				bool match = false;
-				for (i = percpu_arena_mode_names_base; i <
+				for (int i = percpu_arena_mode_names_base; i <
 				    percpu_arena_mode_names_limit; i++) {
 					if (strncmp(percpu_arena_mode_names[i],
 					    v, vlen) == 0) {
@@ -1204,6 +1203,27 @@ malloc_conf_init(void) {
 					continue;
 				}
 			}
+			if (CONF_MATCH("thp")) {
+				bool match = false;
+				for (int i = 0; i < thp_mode_names_limit; i++) {
+					if (strncmp(thp_mode_names[i],v, vlen)
+					    == 0) {
+						if (!have_madvise_huge) {
+							malloc_conf_error(
+							    "No THP support",
+							    k, klen, v, vlen);
+						}
+						opt_thp = i;
+						match = true;
+						break;
+					}
+				}
+				if (!match) {
+					malloc_conf_error("Invalid conf value",
+					    k, klen, v, vlen);
+				}
+				continue;
+			}
 			malloc_conf_error("Invalid conf pair", k, klen, v,
 			    vlen);
 #undef CONF_MATCH
diff --git a/src/pages.c b/src/pages.c
index c839471f..82405219 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -28,7 +28,14 @@ static int	mmap_flags;
 #endif
 static bool	os_overcommits;
 
-bool thp_state_madvise;
+const char *thp_mode_names[] = {
+	"default",
+	"always",
+	"never",
+	"not supported"
+};
+thp_mode_t opt_thp = THP_MODE_DEFAULT;
+thp_mode_t init_system_thp_mode;
 
 /* Runtime support for lazy purge. Irrelevant when !pages_can_purge_lazy. */
 static bool pages_can_purge_lazy_runtime = true;
@@ -307,11 +314,12 @@ pages_purge_forced(void *addr, size_t size) {
 #endif
 }
 
-bool
-pages_huge(void *addr, size_t size) {
-	assert(HUGEPAGE_ADDR2BASE(addr) == addr);
-	assert(HUGEPAGE_CEILING(size) == size);
-
+static bool
+pages_huge_impl(void *addr, size_t size, bool aligned) {
+	if (aligned) {
+		assert(HUGEPAGE_ADDR2BASE(addr) == addr);
+		assert(HUGEPAGE_CEILING(size) == size);
+	}
 #ifdef JEMALLOC_HAVE_MADVISE_HUGE
 	return (madvise(addr, size, MADV_HUGEPAGE) != 0);
 #else
@@ -320,9 +328,21 @@ pages_huge(void *addr, size_t size) {
 }
 
 bool
-pages_nohuge(void *addr, size_t size) {
-	assert(HUGEPAGE_ADDR2BASE(addr) == addr);
-	assert(HUGEPAGE_CEILING(size) == size);
+pages_huge(void *addr, size_t size) {
+	return pages_huge_impl(addr, size, true);
+}
+
+static bool
+pages_huge_unaligned(void *addr, size_t size) {
+	return pages_huge_impl(addr, size, false);
+}
+
+static bool
+pages_nohuge_impl(void *addr, size_t size, bool aligned) {
+	if (aligned) {
+		assert(HUGEPAGE_ADDR2BASE(addr) == addr);
+		assert(HUGEPAGE_CEILING(size) == size);
+	}
 
 #ifdef JEMALLOC_HAVE_MADVISE_HUGE
 	return (madvise(addr, size, MADV_NOHUGEPAGE) != 0);
@@ -331,6 +351,16 @@ pages_nohuge(void *addr, size_t size) {
 #endif
 }
 
+bool
+pages_nohuge(void *addr, size_t size) {
+	return pages_nohuge_impl(addr, size, true);
+}
+
+static bool
+pages_nohuge_unaligned(void *addr, size_t size) {
+	return pages_nohuge_impl(addr, size, false);
+}
+
 bool
 pages_dontdump(void *addr, size_t size) {
 	assert(PAGE_ADDR2BASE(addr) == addr);
@@ -469,6 +499,25 @@ os_overcommits_proc(void) {
 }
 #endif
 
+void
+pages_set_thp_state (void *ptr, size_t size) {
+	if (opt_thp == thp_mode_default || opt_thp == init_system_thp_mode) {
+		return;
+	}
+	assert(opt_thp != thp_mode_not_supported &&
+	    init_system_thp_mode != thp_mode_not_supported);
+
+	if (opt_thp == thp_mode_always
+	    && init_system_thp_mode != thp_mode_never) {
+		assert(init_system_thp_mode == thp_mode_default);
+		pages_huge_unaligned(ptr, size);
+	} else if (opt_thp == thp_mode_never) {
+		assert(init_system_thp_mode == thp_mode_default ||
+		    init_system_thp_mode == thp_mode_always);
+		pages_nohuge_unaligned(ptr, size);
+	}
+}
+
 static void
 init_thp_state(void) {
 	if (!have_madvise_huge) {
@@ -479,8 +528,10 @@ init_thp_state(void) {
 		goto label_error;
 	}
 
-	static const char madvise_state[] = "always [madvise] never\n";
-	char buf[sizeof(madvise_state)];
+	static const char sys_state_madvise[] = "always [madvise] never\n";
+	static const char sys_state_always[] = "[always] madvise never\n";
+	static const char sys_state_never[] = "always madvise [never]\n";
+	char buf[sizeof(sys_state_madvise)];
 
 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
 	int fd = (int)syscall(SYS_open,
@@ -504,15 +555,18 @@ init_thp_state(void) {
 	close(fd);
 #endif
 
-	if (nread < 1) {
+	if (strncmp(buf, sys_state_madvise, (size_t)nread) == 0) {
+		init_system_thp_mode = thp_mode_default;
+	} else if (strncmp(buf, sys_state_always, (size_t)nread) == 0) {
+		init_system_thp_mode = thp_mode_always;
+	} else if (strncmp(buf, sys_state_never, (size_t)nread) == 0) {
+		init_system_thp_mode = thp_mode_never;
+	} else {
 		goto label_error;
 	}
-	if (strncmp(buf, madvise_state, (size_t)nread) == 0) {
-		thp_state_madvise = true;
-		return;
-	}
+	return;
 label_error:
-	thp_state_madvise = false;
+	opt_thp = init_system_thp_mode = thp_mode_not_supported;
 }
 
 bool
diff --git a/src/stats.c b/src/stats.c
index 11959cbe..9efb9a19 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -837,6 +837,7 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	OPT_WRITE_BOOL(xmalloc, ",")
 	OPT_WRITE_BOOL(tcache, ",")
 	OPT_WRITE_SSIZE_T(lg_tcache_max, ",")
+	OPT_WRITE_CHAR_P(thp, ",")
 	OPT_WRITE_BOOL(prof, ",")
 	OPT_WRITE_CHAR_P(prof_prefix, ",")
 	OPT_WRITE_BOOL_MUTABLE(prof_active, prof.active, ",")
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index e812b52f..c9ba6c5d 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -174,6 +174,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, tcache, always);
 	TEST_MALLCTL_OPT(size_t, lg_extent_max_active_fit, always);
 	TEST_MALLCTL_OPT(size_t, lg_tcache_max, always);
+	TEST_MALLCTL_OPT(const char *, thp, always);
 	TEST_MALLCTL_OPT(bool, prof, prof);
 	TEST_MALLCTL_OPT(const char *, prof_prefix, prof);
 	TEST_MALLCTL_OPT(bool, prof_active, prof);
diff --git a/test/unit/pages.c b/test/unit/pages.c
index 49ad0091..ee729eec 100644
--- a/test/unit/pages.c
+++ b/test/unit/pages.c
@@ -10,7 +10,7 @@ TEST_BEGIN(test_pages_huge) {
 	pages = pages_map(NULL, alloc_size, PAGE, &commit);
 	assert_ptr_not_null(pages, "Unexpected pages_map() error");
 
-	if (thp_state_madvise) {
+	if (init_system_thp_mode == thp_mode_default) {
 	    hugepage = (void *)(ALIGNMENT_CEILING((uintptr_t)pages, HUGEPAGE));
 	    assert_b_ne(pages_huge(hugepage, HUGEPAGE), have_madvise_huge,
 	        "Unexpected pages_huge() result");

From 27a8fe6780cb901668489495b2fc302a2d071d8c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 1 Mar 2018 15:48:46 -0800
Subject: [PATCH 1059/2608] Introduce the emitter module.

The emitter can be used to produce structured json or tabular output.  For now
it has no uses; in subsequent commits, I'll begin transitioning stats printing
code over.
---
 Makefile.in                         |   1 +
 include/jemalloc/internal/emitter.h | 381 ++++++++++++++++++++++++++++
 test/unit/emitter.c                 | 351 +++++++++++++++++++++++++
 3 files changed, 733 insertions(+)
 create mode 100644 include/jemalloc/internal/emitter.h
 create mode 100644 test/unit/emitter.c

diff --git a/Makefile.in b/Makefile.in
index aefd6d87..e229196c 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -168,6 +168,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/ckh.c \
 	$(srcroot)test/unit/decay.c \
 	$(srcroot)test/unit/div.c \
+	$(srcroot)test/unit/emitter.c \
 	$(srcroot)test/unit/extent_quantize.c \
 	$(srcroot)test/unit/fork.c \
 	$(srcroot)test/unit/hash.c \
diff --git a/include/jemalloc/internal/emitter.h b/include/jemalloc/internal/emitter.h
new file mode 100644
index 00000000..c82dbdb1
--- /dev/null
+++ b/include/jemalloc/internal/emitter.h
@@ -0,0 +1,381 @@
+#ifndef JEMALLOC_INTERNAL_EMITTER_H
+#define JEMALLOC_INTERNAL_EMITTER_H
+
+typedef enum emitter_output_e emitter_output_t;
+enum emitter_output_e {
+	emitter_output_json,
+	emitter_output_table
+};
+
+typedef enum emitter_justify_e emitter_justify_t;
+enum emitter_justify_e {
+	emitter_justify_left,
+	emitter_justify_right,
+	/* Not for users; just to pass to internal functions. */
+	emitter_justify_none
+};
+
+typedef enum emitter_type_e emitter_type_t;
+enum emitter_type_e {
+	emitter_type_bool,
+	emitter_type_int,
+	emitter_type_unsigned,
+	emitter_type_uint32,
+	emitter_type_uint64,
+	emitter_type_size,
+	emitter_type_ssize,
+	emitter_type_string,
+};
+
+typedef struct emitter_s emitter_t;
+struct emitter_s {
+	emitter_output_t output;
+	/* The output information. */
+	void (*write_cb)(void *, const char *);
+	void *cbopaque;
+	int nesting_depth;
+	/* True if we've already emitted a value at the given depth. */
+	bool item_at_depth;
+};
+
+static inline void
+emitter_init(emitter_t *emitter, emitter_output_t emitter_output,
+    void (*write_cb)(void *, const char *), void *cbopaque) {
+	emitter->output = emitter_output;
+	emitter->write_cb = write_cb;
+	emitter->cbopaque = cbopaque;
+	emitter->item_at_depth = false;
+	emitter->nesting_depth = 0;
+}
+
+/* Internal convenience function.  Write to the emitter the given string. */
+JEMALLOC_FORMAT_PRINTF(2, 3)
+static inline void
+emitter_printf(emitter_t *emitter, const char *format, ...) {
+	va_list ap;
+
+	va_start(ap, format);
+	malloc_vcprintf(emitter->write_cb, emitter->cbopaque, format, ap);
+	va_end(ap);
+}
+
+/* Write to the emitter the given string, but only in table mode. */
+JEMALLOC_FORMAT_PRINTF(2, 3)
+static inline void
+emitter_table_printf(emitter_t *emitter, const char *format, ...) {
+	if (emitter->output == emitter_output_table) {
+		va_list ap;
+		va_start(ap, format);
+		malloc_vcprintf(emitter->write_cb, emitter->cbopaque, format, ap);
+		va_end(ap);
+	}
+}
+
+static inline void
+emitter_gen_fmt(char *out_fmt, size_t out_size, const char *fmt_specifier,
+    emitter_justify_t justify, int width) {
+	size_t written;
+	if (justify == emitter_justify_none) {
+		written = malloc_snprintf(out_fmt, out_size,
+		    "%%%s", fmt_specifier);
+	} else if (justify == emitter_justify_left) {
+		written = malloc_snprintf(out_fmt, out_size,
+		    "%%-%d%s", width, fmt_specifier);
+	} else {
+		written = malloc_snprintf(out_fmt, out_size,
+		    "%%%d%s", width, fmt_specifier);
+	}
+	/* Only happens in case of bad format string, which *we* choose. */
+	assert(written <  out_size);
+}
+
+/*
+ * Internal.  Emit the given value type in the relevant encoding (so that the
+ * bool true gets mapped to json "true", but the string "true" gets mapped to
+ * json "\"true\"", for instance.
+ *
+ * Width is ignored if justify is emitter_justify_none.
+ */
+static inline void
+emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width,
+    emitter_type_t value_type, const void *value) {
+	size_t str_written;
+#define BUF_SIZE 256
+#define FMT_SIZE 10
+	/*
+	 * We dynamically generate a format string to emit, to let us use the
+	 * snprintf machinery.  This is kinda hacky, but gets the job done
+	 * quickly without having to think about the various snprintf edge
+	 * cases.
+	 */
+	char fmt[FMT_SIZE];
+	char buf[BUF_SIZE];
+
+#define EMIT_SIMPLE(type, format)					\
+	emitter_gen_fmt(fmt, FMT_SIZE, format, justify, width);		\
+	emitter_printf(emitter, fmt, *(const type *)value);		\
+
+	switch (value_type) {
+	case emitter_type_bool:
+		emitter_gen_fmt(fmt, FMT_SIZE, "s", justify, width);
+		emitter_printf(emitter, fmt, *(const bool *)value ?
+		    "true" : "false");
+		break;
+	case emitter_type_int:
+		EMIT_SIMPLE(int, "d")
+		break;
+	case emitter_type_unsigned:
+		EMIT_SIMPLE(unsigned, "u")
+		break;
+	case emitter_type_ssize:
+		EMIT_SIMPLE(ssize_t, "zd")
+		break;
+	case emitter_type_size:
+		EMIT_SIMPLE(size_t, "zu")
+		break;
+	case emitter_type_string:
+		str_written = malloc_snprintf(buf, BUF_SIZE, "\"%s\"",
+		    *(const char *const *)value);
+		/*
+		 * We control the strings we output; we shouldn't get anything
+		 * anywhere near the fmt size.
+		 */
+		assert(str_written < BUF_SIZE);
+
+		/*
+		 * We don't support justified quoted string primitive values for
+		 * now. Fortunately, we don't want to emit them.
+		 */
+
+		emitter_gen_fmt(fmt, FMT_SIZE, "s", justify, width);
+		emitter_printf(emitter, fmt, buf);
+		break;
+	case emitter_type_uint32:
+		EMIT_SIMPLE(uint32_t, FMTu32)
+		break;
+	case emitter_type_uint64:
+		EMIT_SIMPLE(uint64_t, FMTu64)
+		break;
+	default:
+		unreachable();
+	}
+#undef BUF_SIZE
+#undef FMT_SIZE
+}
+
+
+/* Internal functions.  In json mode, tracks nesting state. */
+static inline void
+emitter_nest_inc(emitter_t *emitter) {
+	emitter->nesting_depth++;
+	emitter->item_at_depth = false;
+}
+
+static inline void
+emitter_nest_dec(emitter_t *emitter) {
+	emitter->nesting_depth--;
+	emitter->item_at_depth = true;
+}
+
+static inline void
+emitter_indent(emitter_t *emitter) {
+	int amount = emitter->nesting_depth;
+	const char *indent_str;
+	if (emitter->output == emitter_output_json) {
+		indent_str = "\t";
+	} else {
+		amount *= 2;
+		indent_str = " ";
+	}
+	for (int i = 0; i < amount; i++) {
+		emitter_printf(emitter, "%s", indent_str);
+	}
+}
+
+static inline void
+emitter_json_key_prefix(emitter_t *emitter) {
+	emitter_printf(emitter, "%s\n", emitter->item_at_depth ? "," : "");
+	emitter_indent(emitter);
+}
+
+static inline void
+emitter_begin(emitter_t *emitter) {
+	if (emitter->output == emitter_output_json) {
+		assert(emitter->nesting_depth == 0);
+		emitter_printf(emitter, "{");
+		emitter_nest_inc(emitter);
+	} else {
+		// tabular init
+		emitter_printf(emitter, "");
+	}
+}
+
+static inline void
+emitter_end(emitter_t *emitter) {
+	if (emitter->output == emitter_output_json) {
+		assert(emitter->nesting_depth == 1);
+		emitter_nest_dec(emitter);
+		emitter_printf(emitter, "\n}\n");
+	}
+}
+
+/*
+ * Note emits a different kv pair as well, but only in table mode.  Omits the
+ * note if table_note_key is NULL.
+ */
+static inline void
+emitter_kv_note(emitter_t *emitter, const char *json_key, const char *table_key,
+    emitter_type_t value_type, const void *value,
+    const char *table_note_key, emitter_type_t table_note_value_type,
+    const void *table_note_value) {
+	if (emitter->output == emitter_output_json) {
+		assert(emitter->nesting_depth > 0);
+		emitter_json_key_prefix(emitter);
+		emitter_printf(emitter, "\"%s\": ", json_key);
+		emitter_print_value(emitter, emitter_justify_none, -1,
+		    value_type, value);
+	} else {
+		emitter_indent(emitter);
+		emitter_printf(emitter, "%s: ", table_key);
+		emitter_print_value(emitter, emitter_justify_none, -1,
+		    value_type, value);
+		if (table_note_key != NULL) {
+			emitter_printf(emitter, " (%s: ", table_note_key);
+			emitter_print_value(emitter, emitter_justify_none, -1,
+			    table_note_value_type, table_note_value);
+			emitter_printf(emitter, ")");
+		}
+		emitter_printf(emitter, "\n");
+	}
+	emitter->item_at_depth = true;
+}
+
+static inline void
+emitter_kv(emitter_t *emitter, const char *json_key, const char *table_key,
+    emitter_type_t value_type, const void *value) {
+	emitter_kv_note(emitter, json_key, table_key, value_type, value, NULL,
+	    emitter_type_bool, NULL);
+}
+
+static inline void
+emitter_json_kv(emitter_t *emitter, const char *json_key,
+    emitter_type_t value_type, const void *value) {
+	if (emitter->output == emitter_output_json) {
+		emitter_kv(emitter, json_key, NULL, value_type, value);
+	}
+}
+
+static inline void
+emitter_table_kv(emitter_t *emitter, const char *table_key,
+    emitter_type_t value_type, const void *value) {
+	if (emitter->output == emitter_output_table) {
+		emitter_kv(emitter, NULL, table_key, value_type, value);
+	}
+}
+
+static inline void
+emitter_dict_begin(emitter_t *emitter, const char *json_key,
+    const char *table_header) {
+	if (emitter->output == emitter_output_json) {
+		emitter_json_key_prefix(emitter);
+		emitter_printf(emitter, "\"%s\": {", json_key);
+		emitter_nest_inc(emitter);
+	} else {
+		emitter_indent(emitter);
+		emitter_printf(emitter, "%s\n", table_header);
+		emitter_nest_inc(emitter);
+	}
+}
+
+static inline void
+emitter_dict_end(emitter_t *emitter) {
+	if (emitter->output == emitter_output_json) {
+		assert(emitter->nesting_depth > 0);
+		emitter_nest_dec(emitter);
+		emitter_printf(emitter, "\n");
+		emitter_indent(emitter);
+		emitter_printf(emitter, "}");
+	} else {
+		emitter_nest_dec(emitter);
+	}
+}
+
+static inline void
+emitter_json_dict_begin(emitter_t *emitter, const char *json_key) {
+	if (emitter->output == emitter_output_json) {
+		emitter_dict_begin(emitter, json_key, NULL);
+	}
+}
+
+static inline void
+emitter_json_dict_end(emitter_t *emitter) {
+	if (emitter->output == emitter_output_json) {
+		emitter_dict_end(emitter);
+	}
+}
+
+static inline void
+emitter_table_dict_begin(emitter_t *emitter, const char *table_key) {
+	if (emitter->output == emitter_output_table) {
+		emitter_dict_begin(emitter, NULL, table_key);
+	}
+}
+
+static inline void
+emitter_table_dict_end(emitter_t *emitter) {
+	if (emitter->output == emitter_output_table) {
+		emitter_dict_end(emitter);
+	}
+}
+
+static inline void
+emitter_json_arr_begin(emitter_t *emitter, const char *json_key) {
+	if (emitter->output == emitter_output_json) {
+		emitter_json_key_prefix(emitter);
+		emitter_printf(emitter, "\"%s\": [", json_key);
+		emitter_nest_inc(emitter);
+	}
+}
+
+static inline void
+emitter_json_arr_end(emitter_t *emitter) {
+	if (emitter->output == emitter_output_json) {
+		assert(emitter->nesting_depth > 0);
+		emitter_nest_dec(emitter);
+		emitter_printf(emitter, "\n");
+		emitter_indent(emitter);
+		emitter_printf(emitter, "]");
+	}
+}
+
+static inline void
+emitter_json_arr_obj_begin(emitter_t *emitter) {
+	if (emitter->output == emitter_output_json) {
+		emitter_json_key_prefix(emitter);
+		emitter_printf(emitter, "{");
+		emitter_nest_inc(emitter);
+	}
+}
+
+static inline void
+emitter_json_arr_obj_end(emitter_t *emitter) {
+	if (emitter->output == emitter_output_json) {
+		assert(emitter->nesting_depth > 0);
+		emitter_nest_dec(emitter);
+		emitter_printf(emitter, "\n");
+		emitter_indent(emitter);
+		emitter_printf(emitter, "}");
+	}
+}
+
+static inline void
+emitter_json_arr_value(emitter_t *emitter, emitter_type_t value_type,
+    const void *value) {
+	if (emitter->output == emitter_output_json) {
+		emitter_json_key_prefix(emitter);
+		emitter_print_value(emitter, emitter_justify_none, -1,
+		    value_type, value);
+	}
+}
+
+#endif /* JEMALLOC_INTERNAL_EMITTER_H */
diff --git a/test/unit/emitter.c b/test/unit/emitter.c
new file mode 100644
index 00000000..c2216b22
--- /dev/null
+++ b/test/unit/emitter.c
@@ -0,0 +1,351 @@
+#include "test/jemalloc_test.h"
+#include "jemalloc/internal/emitter.h"
+
+/*
+ * This is so useful for debugging and feature work, we'll leave printing
+ * functionality committed but disabled by default.
+ */
+/* Print the text as it will appear. */
+static bool print_raw = false;
+/* Print the text escaped, so it can be copied back into the test case. */
+static bool print_escaped = false;
+
+typedef struct buf_descriptor_s buf_descriptor_t;
+struct buf_descriptor_s {
+	char *buf;
+	size_t len;
+	bool mid_quote;
+};
+
+/*
+ * Forwards all writes to the passed-in buf_v (which should be cast from a
+ * buf_descriptor_t *).
+ */
+static void
+forwarding_cb(void *buf_descriptor_v, const char *str) {
+	buf_descriptor_t *buf_descriptor = (buf_descriptor_t *)buf_descriptor_v;
+
+	if (print_raw) {
+		malloc_printf("%s", str);
+	}
+	if (print_escaped) {
+		const char *it = str;
+		while (*it != '\0') {
+			if (!buf_descriptor->mid_quote) {
+				malloc_printf("\"");
+				buf_descriptor->mid_quote = true;
+			}
+			switch (*it) {
+			case '\\':
+				malloc_printf("\\");
+				break;
+			case '\"':
+				malloc_printf("\\\"");
+				break;
+			case '\t':
+				malloc_printf("\\t");
+				break;
+			case '\n':
+				malloc_printf("\\n\"\n");
+				buf_descriptor->mid_quote = false;
+				break;
+			default:
+				malloc_printf("%c", *it);
+			}
+			it++;
+		}
+	}
+
+	size_t written = malloc_snprintf(buf_descriptor->buf,
+	    buf_descriptor->len, "%s", str);
+	assert_zu_eq(written, strlen(str), "Buffer overflow!");
+	buf_descriptor->buf += written;
+	buf_descriptor->len -= written;
+	assert_zu_gt(buf_descriptor->len, 0, "Buffer out of space!");
+}
+
+static void
+assert_emit_output(void (*emit_fn)(emitter_t *),
+    const char *expected_json_output, const char *expected_table_output) {
+	emitter_t emitter;
+	char buf[MALLOC_PRINTF_BUFSIZE];
+	buf_descriptor_t buf_descriptor;
+
+	buf_descriptor.buf = buf;
+	buf_descriptor.len = MALLOC_PRINTF_BUFSIZE;
+	buf_descriptor.mid_quote = false;
+
+	emitter_init(&emitter, emitter_output_json, &forwarding_cb,
+	    &buf_descriptor);
+	(*emit_fn)(&emitter);
+	assert_str_eq(expected_json_output, buf, "json output failure");
+
+	buf_descriptor.buf = buf;
+	buf_descriptor.len = MALLOC_PRINTF_BUFSIZE;
+	buf_descriptor.mid_quote = false;
+
+	emitter_init(&emitter, emitter_output_table, &forwarding_cb,
+	    &buf_descriptor);
+	(*emit_fn)(&emitter);
+	assert_str_eq(expected_table_output, buf, "table output failure");
+}
+
+static void
+emit_dict(emitter_t *emitter) {
+	bool b_false = false;
+	bool b_true = true;
+	int i_123 = 123;
+	const char *str = "a string";
+
+	emitter_begin(emitter);
+	emitter_dict_begin(emitter, "foo", "This is the foo table:");
+	emitter_kv(emitter, "abc", "ABC", emitter_type_bool, &b_false);
+	emitter_kv(emitter, "def", "DEF", emitter_type_bool, &b_true);
+	emitter_kv_note(emitter, "ghi", "GHI", emitter_type_int, &i_123,
+	    "note_key1", emitter_type_string, &str);
+	emitter_kv_note(emitter, "jkl", "JKL", emitter_type_string, &str,
+	    "note_key2", emitter_type_bool, &b_false);
+	emitter_dict_end(emitter);
+	emitter_end(emitter);
+}
+static const char *dict_json =
+"{\n"
+"\t\"foo\": {\n"
+"\t\t\"abc\": false,\n"
+"\t\t\"def\": true,\n"
+"\t\t\"ghi\": 123,\n"
+"\t\t\"jkl\": \"a string\"\n"
+"\t}\n"
+"}\n";
+static const char *dict_table =
+"This is the foo table:\n"
+"  ABC: false\n"
+"  DEF: true\n"
+"  GHI: 123 (note_key1: \"a string\")\n"
+"  JKL: \"a string\" (note_key2: false)\n";
+
+TEST_BEGIN(test_dict) {
+	assert_emit_output(&emit_dict, dict_json, dict_table);
+}
+TEST_END
+
+static void
+emit_table_printf(emitter_t *emitter) {
+	emitter_begin(emitter);
+	emitter_table_printf(emitter, "Table note 1\n");
+	emitter_table_printf(emitter, "Table note 2 %s\n",
+	    "with format string");
+	emitter_end(emitter);
+}
+
+static const char *table_printf_json =
+"{\n"
+"}\n";
+
+static const char *table_printf_table =
+"Table note 1\n"
+"Table note 2 with format string\n";
+
+TEST_BEGIN(test_table_printf) {
+	assert_emit_output(&emit_table_printf, table_printf_json,
+	    table_printf_table);
+}
+TEST_END
+
+static void emit_nested_dict(emitter_t *emitter) {
+	int val = 123;
+	emitter_begin(emitter);
+	emitter_dict_begin(emitter, "json1", "Dict 1");
+	emitter_dict_begin(emitter, "json2", "Dict 2");
+	emitter_kv(emitter, "primitive", "A primitive", emitter_type_int, &val);
+	emitter_dict_end(emitter); /* Close 2 */
+	emitter_dict_begin(emitter, "json3", "Dict 3");
+	emitter_dict_end(emitter); /* Close 3 */
+	emitter_dict_end(emitter); /* Close 1 */
+	emitter_dict_begin(emitter, "json4", "Dict 4");
+	emitter_kv(emitter, "primitive", "Another primitive",
+	    emitter_type_int, &val);
+	emitter_dict_end(emitter); /* Close 4 */
+	emitter_end(emitter);
+}
+
+static const char *nested_dict_json =
+"{\n"
+"\t\"json1\": {\n"
+"\t\t\"json2\": {\n"
+"\t\t\t\"primitive\": 123\n"
+"\t\t},\n"
+"\t\t\"json3\": {\n"
+"\t\t}\n"
+"\t},\n"
+"\t\"json4\": {\n"
+"\t\t\"primitive\": 123\n"
+"\t}\n"
+"}\n";
+
+static const char *nested_dict_table =
+"Dict 1\n"
+"  Dict 2\n"
+"    A primitive: 123\n"
+"  Dict 3\n"
+"Dict 4\n"
+"  Another primitive: 123\n";
+
+TEST_BEGIN(test_nested_dict) {
+	assert_emit_output(&emit_nested_dict, nested_dict_json,
+	    nested_dict_table);
+}
+TEST_END
+
+static void
+emit_types(emitter_t *emitter) {
+	bool b = false;
+	int i = -123;
+	unsigned u = 123;
+	ssize_t zd = -456;
+	size_t zu = 456;
+	const char *str = "string";
+	uint32_t u32 = 789;
+	uint64_t u64 = 10000000000ULL;
+
+	emitter_begin(emitter);
+	emitter_kv(emitter, "k1", "K1", emitter_type_bool, &b);
+	emitter_kv(emitter, "k2", "K2", emitter_type_int, &i);
+	emitter_kv(emitter, "k3", "K3", emitter_type_unsigned, &u);
+	emitter_kv(emitter, "k4", "K4", emitter_type_ssize, &zd);
+	emitter_kv(emitter, "k5", "K5", emitter_type_size, &zu);
+	emitter_kv(emitter, "k6", "K6", emitter_type_string, &str);
+	emitter_kv(emitter, "k7", "K7", emitter_type_uint32, &u32);
+	emitter_kv(emitter, "k8", "K8", emitter_type_uint64, &u64);
+	emitter_end(emitter);
+}
+
+static const char *types_json =
+"{\n"
+"\t\"k1\": false,\n"
+"\t\"k2\": -123,\n"
+"\t\"k3\": 123,\n"
+"\t\"k4\": -456,\n"
+"\t\"k5\": 456,\n"
+"\t\"k6\": \"string\",\n"
+"\t\"k7\": 789,\n"
+"\t\"k8\": 10000000000\n"
+"}\n";
+
+static const char *types_table =
+"K1: false\n"
+"K2: -123\n"
+"K3: 123\n"
+"K4: -456\n"
+"K5: 456\n"
+"K6: \"string\"\n"
+"K7: 789\n"
+"K8: 10000000000\n";
+
+TEST_BEGIN(test_types) {
+	assert_emit_output(&emit_types, types_json, types_table);
+}
+TEST_END
+
+static void
+emit_modal(emitter_t *emitter) {
+	int val = 123;
+	emitter_begin(emitter);
+	emitter_dict_begin(emitter, "j0", "T0");
+	emitter_json_dict_begin(emitter, "j1");
+	emitter_kv(emitter, "i1", "I1", emitter_type_int, &val);
+	emitter_json_kv(emitter, "i2", emitter_type_int, &val);
+	emitter_table_kv(emitter, "I3", emitter_type_int, &val);
+	emitter_table_dict_begin(emitter, "T1");
+	emitter_kv(emitter, "i4", "I4", emitter_type_int, &val);
+	emitter_json_dict_end(emitter); /* Close j1 */
+	emitter_kv(emitter, "i5", "I5", emitter_type_int, &val);
+	emitter_table_dict_end(emitter); /* Close T1 */
+	emitter_kv(emitter, "i6", "I6", emitter_type_int, &val);
+	emitter_dict_end(emitter); /* Close j0 / T0 */
+	emitter_end(emitter);
+}
+
+const char *modal_json =
+"{\n"
+"\t\"j0\": {\n"
+"\t\t\"j1\": {\n"
+"\t\t\t\"i1\": 123,\n"
+"\t\t\t\"i2\": 123,\n"
+"\t\t\t\"i4\": 123\n"
+"\t\t},\n"
+"\t\t\"i5\": 123,\n"
+"\t\t\"i6\": 123\n"
+"\t}\n"
+"}\n";
+
+const char *modal_table =
+"T0\n"
+"  I1: 123\n"
+"  I3: 123\n"
+"  T1\n"
+"    I4: 123\n"
+"    I5: 123\n"
+"  I6: 123\n";
+
+TEST_BEGIN(test_modal) {
+	assert_emit_output(&emit_modal, modal_json, modal_table);
+}
+TEST_END
+
+static void
+emit_json_arr(emitter_t *emitter) {
+	int ival = 123;
+
+	emitter_begin(emitter);
+	emitter_json_dict_begin(emitter, "dict");
+	emitter_json_arr_begin(emitter, "arr");
+	emitter_json_arr_obj_begin(emitter);
+	emitter_json_kv(emitter, "foo", emitter_type_int, &ival);
+	emitter_json_arr_obj_end(emitter); /* Close arr[0] */
+	/* arr[1] and arr[2] are primitives. */
+	emitter_json_arr_value(emitter, emitter_type_int, &ival);
+	emitter_json_arr_value(emitter, emitter_type_int, &ival);
+	emitter_json_arr_obj_begin(emitter);
+	emitter_json_kv(emitter, "bar", emitter_type_int, &ival);
+	emitter_json_kv(emitter, "baz", emitter_type_int, &ival);
+	emitter_json_arr_obj_end(emitter); /* Close arr[3]. */
+	emitter_json_arr_end(emitter); /* Close arr. */
+	emitter_json_dict_end(emitter); /* Close dict. */
+	emitter_end(emitter);
+}
+
+static const char *json_arr_json =
+"{\n"
+"\t\"dict\": {\n"
+"\t\t\"arr\": [\n"
+"\t\t\t{\n"
+"\t\t\t\t\"foo\": 123\n"
+"\t\t\t},\n"
+"\t\t\t123,\n"
+"\t\t\t123,\n"
+"\t\t\t{\n"
+"\t\t\t\t\"bar\": 123,\n"
+"\t\t\t\t\"baz\": 123\n"
+"\t\t\t}\n"
+"\t\t]\n"
+"\t}\n"
+"}\n";
+
+static const char *json_arr_table = "";
+
+TEST_BEGIN(test_json_arr) {
+	assert_emit_output(&emit_json_arr, json_arr_json, json_arr_table);
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_dict,
+	    test_table_printf,
+	    test_nested_dict,
+	    test_types,
+	    test_modal,
+	    test_json_arr);
+}

From b646f89173be53d4f5eb59a894dbcdd64b457bee Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 1 Mar 2018 17:29:58 -0800
Subject: [PATCH 1060/2608] Stats printing: Convert header and footer to use
 emitter.

---
 src/stats.c | 30 ++++++++++++++----------------
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index 9efb9a19..c238d34a 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -4,6 +4,7 @@
 
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/emitter.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/mutex_prof.h"
 
@@ -1289,15 +1290,17 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		}
 	}
 
-	if (json) {
-		malloc_cprintf(write_cb, cbopaque,
-		    "{\n"
-		    "\t\"jemalloc\": {\n");
-	} else {
-		malloc_cprintf(write_cb, cbopaque,
-		    "___ Begin jemalloc statistics ___\n");
-	}
+	emitter_t emitter;
+	emitter_init(&emitter,
+	    json ? emitter_output_json : emitter_output_table, write_cb,
+	    cbopaque);
+	emitter_begin(&emitter);
+	emitter_table_printf(&emitter, "___ Begin jemalloc statistics ___\n");
+	emitter_json_dict_begin(&emitter, "jemalloc");
 
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque, "\n");
+	}
 	if (general) {
 		stats_general_print(write_cb, cbopaque, json, config_stats);
 	}
@@ -1306,12 +1309,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    unmerged, bins, large, mutex);
 	}
 
-	if (json) {
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t}\n"
-		    "}\n");
-	} else {
-		malloc_cprintf(write_cb, cbopaque,
-		    "--- End jemalloc statistics ---\n");
-	}
+	emitter_json_dict_end(&emitter); /* Closes the "jemalloc" dict. */
+	emitter_table_printf(&emitter, "--- End jemalloc statistics ---\n");
+	emitter_end(&emitter);
 }

From 4a335e0c6f6fa371edcd7663eebfe11cf93a1f17 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 1 Mar 2018 17:38:15 -0800
Subject: [PATCH 1061/2608] Stats printing: convert config and opt output to
 use emitter.

This is a step along the path towards using the emitter for all stats output.
---
 src/stats.c | 283 +++++++++++++++++++---------------------------------
 1 file changed, 105 insertions(+), 178 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index c238d34a..7c849474 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -668,14 +668,21 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 }
 
 static void
-stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
-    bool json, bool more) {
+stats_general_print(emitter_t *emitter, bool more) {
+	/*
+	 * These should eventually be deleted; they are useful in converting
+	 * from manual to emitter-based stats output, though.
+	 */
+	void (*write_cb)(void *, const char *) = emitter->write_cb;
+	void *cbopaque = emitter->cbopaque;
+	bool json = (emitter->output == emitter_output_json);
+
 	const char *cpv;
-	bool bv;
+	bool bv, bv2;
 	unsigned uv;
 	uint32_t u32v;
 	uint64_t u64v;
-	ssize_t ssv;
+	ssize_t ssv, ssv2;
 	size_t sv, bsz, usz, ssz, sssz, cpsz;
 
 	bsz = sizeof(bool);
@@ -685,186 +692,104 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	cpsz = sizeof(const char *);
 
 	CTL_GET("version", &cpv, const char *);
-	if (json) {
-		malloc_cprintf(write_cb, cbopaque,
-		"\t\t\"version\": \"%s\",\n", cpv);
-	} else {
-		malloc_cprintf(write_cb, cbopaque, "Version: %s\n", cpv);
-	}
+	emitter_kv(emitter, "version", "Version", emitter_type_string, &cpv);
 
 	/* config. */
-#define CONFIG_WRITE_BOOL_JSON(n, c)					\
-	if (json) {							\
-		CTL_GET("config."#n, &bv, bool);			\
-		malloc_cprintf(write_cb, cbopaque,			\
-		    "\t\t\t\""#n"\": %s%s\n", bv ? "true" : "false",	\
-		    (c));						\
-	}
+	emitter_dict_begin(emitter, "config", "Build-time option settings");
+#define CONFIG_WRITE_BOOL(name)						\
+	do {								\
+		CTL_GET("config."#name, &bv, bool);			\
+		emitter_kv(emitter, #name, "config."#name,		\
+		    emitter_type_bool, &bv);				\
+	} while (0)
 
-	if (json) {
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\"config\": {\n");
-	}
+	CONFIG_WRITE_BOOL(cache_oblivious);
+	CONFIG_WRITE_BOOL(debug);
+	CONFIG_WRITE_BOOL(fill);
+	CONFIG_WRITE_BOOL(lazy_lock);
+	emitter_kv(emitter, "malloc_conf", "config.malloc_conf",
+	    emitter_type_string, &config_malloc_conf);
 
-	CONFIG_WRITE_BOOL_JSON(cache_oblivious, ",")
-
-	CTL_GET("config.debug", &bv, bool);
-	if (json) {
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\"debug\": %s,\n", bv ? "true" : "false");
-	} else {
-		malloc_cprintf(write_cb, cbopaque, "Assertions %s\n",
-		    bv ? "enabled" : "disabled");
-	}
-
-	CONFIG_WRITE_BOOL_JSON(fill, ",")
-	CONFIG_WRITE_BOOL_JSON(lazy_lock, ",")
-
-	if (json) {
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\"malloc_conf\": \"%s\",\n",
-		    config_malloc_conf);
-	} else {
-		malloc_cprintf(write_cb, cbopaque,
-		    "config.malloc_conf: \"%s\"\n", config_malloc_conf);
-	}
-
-	CONFIG_WRITE_BOOL_JSON(prof, ",")
-	CONFIG_WRITE_BOOL_JSON(prof_libgcc, ",")
-	CONFIG_WRITE_BOOL_JSON(prof_libunwind, ",")
-	CONFIG_WRITE_BOOL_JSON(stats, ",")
-	CONFIG_WRITE_BOOL_JSON(utrace, ",")
-	CONFIG_WRITE_BOOL_JSON(xmalloc, "")
-
-	if (json) {
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t},\n");
-	}
-#undef CONFIG_WRITE_BOOL_JSON
+	CONFIG_WRITE_BOOL(prof);
+	CONFIG_WRITE_BOOL(prof_libgcc);
+	CONFIG_WRITE_BOOL(prof_libunwind);
+	CONFIG_WRITE_BOOL(stats);
+	CONFIG_WRITE_BOOL(utrace);
+	CONFIG_WRITE_BOOL(xmalloc);
+#undef CONFIG_WRITE_BOOL
+	emitter_dict_end(emitter); /* Close "config" dict. */
 
 	/* opt. */
-#define OPT_WRITE_BOOL(n, c)						\
-	if (je_mallctl("opt."#n, (void *)&bv, &bsz, NULL, 0) == 0) {	\
-		if (json) {						\
-			malloc_cprintf(write_cb, cbopaque,		\
-			    "\t\t\t\""#n"\": %s%s\n", bv ? "true" :	\
-			    "false", (c));				\
-		} else {						\
-			malloc_cprintf(write_cb, cbopaque,		\
-			    "  opt."#n": %s\n", bv ? "true" : "false");	\
-		}							\
-	}
-#define OPT_WRITE_BOOL_MUTABLE(n, m, c) {				\
-	bool bv2;							\
-	if (je_mallctl("opt."#n, (void *)&bv, &bsz, NULL, 0) == 0 &&	\
-	    je_mallctl(#m, (void *)&bv2, &bsz, NULL, 0) == 0) {		\
-		if (json) {						\
-			malloc_cprintf(write_cb, cbopaque,		\
-			    "\t\t\t\""#n"\": %s%s\n", bv ? "true" :	\
-			    "false", (c));				\
-		} else {						\
-			malloc_cprintf(write_cb, cbopaque,		\
-			    "  opt."#n": %s ("#m": %s)\n", bv ? "true"	\
-			    : "false", bv2 ? "true" : "false");		\
-		}							\
-	}								\
-}
-#define OPT_WRITE_UNSIGNED(n, c)					\
-	if (je_mallctl("opt."#n, (void *)&uv, &usz, NULL, 0) == 0) {	\
-		if (json) {						\
-			malloc_cprintf(write_cb, cbopaque,		\
-			    "\t\t\t\""#n"\": %u%s\n", uv, (c));		\
-		} else {						\
-			malloc_cprintf(write_cb, cbopaque,		\
-			"  opt."#n": %u\n", uv);			\
-		}							\
-	}
-#define OPT_WRITE_SSIZE_T(n, c)						\
-	if (je_mallctl("opt."#n, (void *)&ssv, &sssz, NULL, 0) == 0) {	\
-		if (json) {						\
-			malloc_cprintf(write_cb, cbopaque,		\
-			    "\t\t\t\""#n"\": %zd%s\n", ssv, (c));	\
-		} else {						\
-			malloc_cprintf(write_cb, cbopaque,		\
-			    "  opt."#n": %zd\n", ssv);			\
-		}							\
-	}
-#define OPT_WRITE_SSIZE_T_MUTABLE(n, m, c) {				\
-	ssize_t ssv2;							\
-	if (je_mallctl("opt."#n, (void *)&ssv, &sssz, NULL, 0) == 0 &&	\
-	    je_mallctl(#m, (void *)&ssv2, &sssz, NULL, 0) == 0) {	\
-		if (json) {						\
-			malloc_cprintf(write_cb, cbopaque,		\
-			    "\t\t\t\""#n"\": %zd%s\n", ssv, (c));	\
-		} else {						\
-			malloc_cprintf(write_cb, cbopaque,		\
-			    "  opt."#n": %zd ("#m": %zd)\n",		\
-			    ssv, ssv2);					\
-		}							\
-	}								\
-}
-#define OPT_WRITE_CHAR_P(n, c)						\
-	if (je_mallctl("opt."#n, (void *)&cpv, &cpsz, NULL, 0) == 0) {	\
-		if (json) {						\
-			malloc_cprintf(write_cb, cbopaque,		\
-			    "\t\t\t\""#n"\": \"%s\"%s\n", cpv, (c));	\
-		} else {						\
-			malloc_cprintf(write_cb, cbopaque,		\
-			    "  opt."#n": \"%s\"\n", cpv);		\
-		}							\
+#define OPT_WRITE(name, var, size, emitter_type)			\
+	if (je_mallctl("opt."#name, (void *)&var, &size, NULL, 0) ==	\
+	    0) {							\
+		emitter_kv(emitter, #name, "opt."#name, emitter_type,	\
+		    &var);						\
 	}
 
-	if (json) {
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\"opt\": {\n");
-	} else {
-		malloc_cprintf(write_cb, cbopaque,
-		    "Run-time option settings:\n");
-	}
-	OPT_WRITE_BOOL(abort, ",")
-	OPT_WRITE_BOOL(abort_conf, ",")
-	OPT_WRITE_BOOL(retain, ",")
-	OPT_WRITE_CHAR_P(dss, ",")
-	OPT_WRITE_UNSIGNED(narenas, ",")
-	OPT_WRITE_CHAR_P(percpu_arena, ",")
-	OPT_WRITE_CHAR_P(metadata_thp, ",")
-	OPT_WRITE_BOOL_MUTABLE(background_thread, background_thread, ",")
-	OPT_WRITE_SSIZE_T_MUTABLE(dirty_decay_ms, arenas.dirty_decay_ms, ",")
-	OPT_WRITE_SSIZE_T_MUTABLE(muzzy_decay_ms, arenas.muzzy_decay_ms, ",")
-	OPT_WRITE_UNSIGNED(lg_extent_max_active_fit, ",")
-	OPT_WRITE_CHAR_P(junk, ",")
-	OPT_WRITE_BOOL(zero, ",")
-	OPT_WRITE_BOOL(utrace, ",")
-	OPT_WRITE_BOOL(xmalloc, ",")
-	OPT_WRITE_BOOL(tcache, ",")
-	OPT_WRITE_SSIZE_T(lg_tcache_max, ",")
-	OPT_WRITE_CHAR_P(thp, ",")
-	OPT_WRITE_BOOL(prof, ",")
-	OPT_WRITE_CHAR_P(prof_prefix, ",")
-	OPT_WRITE_BOOL_MUTABLE(prof_active, prof.active, ",")
-	OPT_WRITE_BOOL_MUTABLE(prof_thread_active_init, prof.thread_active_init,
-	    ",")
-	OPT_WRITE_SSIZE_T_MUTABLE(lg_prof_sample, prof.lg_sample, ",")
-	OPT_WRITE_BOOL(prof_accum, ",")
-	OPT_WRITE_SSIZE_T(lg_prof_interval, ",")
-	OPT_WRITE_BOOL(prof_gdump, ",")
-	OPT_WRITE_BOOL(prof_final, ",")
-	OPT_WRITE_BOOL(prof_leak, ",")
-	OPT_WRITE_BOOL(stats_print, ",")
-	if (json || opt_stats_print) {
-		/*
-		 * stats_print_opts is always emitted for JSON, so as long as it
-		 * comes last it's safe to unconditionally omit the comma here
-		 * (rather than having to conditionally omit it elsewhere
-		 * depending on configuration).
-		 */
-		OPT_WRITE_CHAR_P(stats_print_opts, "")
-	}
-	if (json) {
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t},\n");
+#define OPT_WRITE_MUTABLE(name, var1, var2, size, emitter_type,		\
+    altname)								\
+	if (je_mallctl("opt."#name, (void *)&var1, &size, NULL, 0) ==	\
+	    0 && je_mallctl(#altname, (void *)&var2, &size, NULL, 0)	\
+	    == 0) {							\
+		emitter_kv_note(emitter, #name, "opt."#name,		\
+		    emitter_type, &var1, #altname, emitter_type,	\
+		    &var2);						\
 	}
 
+#define OPT_WRITE_BOOL(name) OPT_WRITE(name, bv, bsz, emitter_type_bool)
+#define OPT_WRITE_BOOL_MUTABLE(name, altname)				\
+	OPT_WRITE_MUTABLE(name, bv, bv2, bsz, emitter_type_bool, altname)
+
+#define OPT_WRITE_UNSIGNED(name)					\
+	OPT_WRITE(name, uv, usz, emitter_type_unsigned)
+
+#define OPT_WRITE_SSIZE_T(name)						\
+	OPT_WRITE(name, ssv, sssz, emitter_type_ssize)
+#define OPT_WRITE_SSIZE_T_MUTABLE(name, altname)			\
+	OPT_WRITE_MUTABLE(name, ssv, ssv2, sssz, emitter_type_ssize,	\
+	    altname)
+
+#define OPT_WRITE_CHAR_P(name)						\
+	OPT_WRITE(name, cpv, cpsz, emitter_type_string)
+
+	emitter_dict_begin(emitter, "opt", "Run-time option settings");
+
+	OPT_WRITE_BOOL(abort)
+	OPT_WRITE_BOOL(abort_conf)
+	OPT_WRITE_BOOL(retain)
+	OPT_WRITE_CHAR_P(dss)
+	OPT_WRITE_UNSIGNED(narenas)
+	OPT_WRITE_CHAR_P(percpu_arena)
+	OPT_WRITE_CHAR_P(metadata_thp)
+	OPT_WRITE_BOOL_MUTABLE(background_thread, background_thread)
+	OPT_WRITE_SSIZE_T_MUTABLE(dirty_decay_ms, arenas.dirty_decay_ms)
+	OPT_WRITE_SSIZE_T_MUTABLE(muzzy_decay_ms, arenas.muzzy_decay_ms)
+	OPT_WRITE_UNSIGNED(lg_extent_max_active_fit)
+	OPT_WRITE_CHAR_P(junk)
+	OPT_WRITE_BOOL(zero)
+	OPT_WRITE_BOOL(utrace)
+	OPT_WRITE_BOOL(xmalloc)
+	OPT_WRITE_BOOL(tcache)
+	OPT_WRITE_SSIZE_T(lg_tcache_max)
+	OPT_WRITE_CHAR_P(thp)
+	OPT_WRITE_BOOL(prof)
+	OPT_WRITE_CHAR_P(prof_prefix)
+	OPT_WRITE_BOOL_MUTABLE(prof_active, prof.active)
+	OPT_WRITE_BOOL_MUTABLE(prof_thread_active_init, prof.thread_active_init)
+	OPT_WRITE_SSIZE_T_MUTABLE(lg_prof_sample, prof.lg_sample)
+	OPT_WRITE_BOOL(prof_accum)
+	OPT_WRITE_SSIZE_T(lg_prof_interval)
+	OPT_WRITE_BOOL(prof_gdump)
+	OPT_WRITE_BOOL(prof_final)
+	OPT_WRITE_BOOL(prof_leak)
+	OPT_WRITE_BOOL(stats_print)
+	OPT_WRITE_CHAR_P(stats_print_opts)
+
+	emitter_dict_end(emitter);
+
+#undef OPT_WRITE
+#undef OPT_WRITE_MUTABLE
 #undef OPT_WRITE_BOOL
 #undef OPT_WRITE_BOOL_MUTABLE
 #undef OPT_WRITE_UNSIGNED
@@ -872,6 +797,11 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque,
 #undef OPT_WRITE_SSIZE_T_MUTABLE
 #undef OPT_WRITE_CHAR_P
 
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque,
+		    ",\n");
+	}
+
 	/* arenas. */
 	if (json) {
 		malloc_cprintf(write_cb, cbopaque,
@@ -1298,11 +1228,8 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	emitter_table_printf(&emitter, "___ Begin jemalloc statistics ___\n");
 	emitter_json_dict_begin(&emitter, "jemalloc");
 
-	if (json) {
-		malloc_cprintf(write_cb, cbopaque, "\n");
-	}
 	if (general) {
-		stats_general_print(write_cb, cbopaque, json, config_stats);
+		stats_general_print(&emitter, config_stats);
 	}
 	if (config_stats) {
 		stats_print_helper(write_cb, cbopaque, json, merged, destroyed,

From e5acc3540011fc6c3cec6aa97c567ff280617b74 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 1 Mar 2018 18:02:42 -0800
Subject: [PATCH 1062/2608] Stats printing: Convert general arena stats to use
 the emitter.

---
 src/stats.c | 146 +++++++++++++++++++++++-----------------------------
 1 file changed, 64 insertions(+), 82 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index 7c849474..432c5a1a 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -797,118 +797,100 @@ stats_general_print(emitter_t *emitter, bool more) {
 #undef OPT_WRITE_SSIZE_T_MUTABLE
 #undef OPT_WRITE_CHAR_P
 
-	if (json) {
-		malloc_cprintf(write_cb, cbopaque,
-		    ",\n");
-	}
-
 	/* arenas. */
-	if (json) {
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\"arenas\": {\n");
-	}
+	/*
+	 * The json output sticks arena info into an "arenas" dict; the table
+	 * output puts them at the top-level.
+	 */
+	emitter_json_dict_begin(emitter, "arenas");
 
 	CTL_GET("arenas.narenas", &uv, unsigned);
-	if (json) {
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\"narenas\": %u,\n", uv);
-	} else {
-		malloc_cprintf(write_cb, cbopaque, "Arenas: %u\n", uv);
-	}
+	emitter_kv(emitter, "narenas", "Arenas", emitter_type_unsigned, &uv);
 
-	if (json) {
-		CTL_GET("arenas.dirty_decay_ms", &ssv, ssize_t);
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\"dirty_decay_ms\": %zd,\n", ssv);
+	/*
+	 * Decay settings are emitted only in json mode; in table mode, they're
+	 * emitted as notes with the opt output, above.
+	 */
+	CTL_GET("arenas.dirty_decay_ms", &ssv, ssize_t);
+	emitter_json_kv(emitter, "dirty_decay_ms", emitter_type_ssize, &ssv);
 
-		CTL_GET("arenas.muzzy_decay_ms", &ssv, ssize_t);
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\"muzzy_decay_ms\": %zd,\n", ssv);
-	}
+	CTL_GET("arenas.muzzy_decay_ms", &ssv, ssize_t);
+	emitter_json_kv(emitter, "muzzy_decay_ms", emitter_type_ssize, &ssv);
 
 	CTL_GET("arenas.quantum", &sv, size_t);
-	if (json) {
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\"quantum\": %zu,\n", sv);
-	} else {
-		malloc_cprintf(write_cb, cbopaque, "Quantum size: %zu\n", sv);
-	}
+	emitter_kv(emitter, "quantum", "Quantum size", emitter_type_size, &sv);
 
 	CTL_GET("arenas.page", &sv, size_t);
-	if (json) {
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\"page\": %zu,\n", sv);
-	} else {
-		malloc_cprintf(write_cb, cbopaque, "Page size: %zu\n", sv);
-	}
+	emitter_kv(emitter, "page", "Page size", emitter_type_size, &sv);
 
 	if (je_mallctl("arenas.tcache_max", (void *)&sv, &ssz, NULL, 0) == 0) {
-		if (json) {
-			malloc_cprintf(write_cb, cbopaque,
-			    "\t\t\t\"tcache_max\": %zu,\n", sv);
-		} else {
-			malloc_cprintf(write_cb, cbopaque,
-			    "Maximum thread-cached size class: %zu\n", sv);
-		}
+		emitter_kv(emitter, "tcache_max",
+		    "Maximum thread-cached size class", emitter_type_size, &sv);
 	}
 
-	if (json) {
-		unsigned nbins, nlextents, i;
+	unsigned nbins;
+	CTL_GET("arenas.nbins", &nbins, unsigned);
+	emitter_kv(emitter, "nbins", "Number of bin size classes",
+	    emitter_type_unsigned, &nbins);
 
-		CTL_GET("arenas.nbins", &nbins, unsigned);
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\"nbins\": %u,\n", nbins);
+	unsigned nhbins;
+	CTL_GET("arenas.nhbins", &nhbins, unsigned);
+	emitter_kv(emitter, "nhbins", "Number of thread-cache bin size classes",
+	    emitter_type_unsigned, &nhbins);
 
-		CTL_GET("arenas.nhbins", &uv, unsigned);
-		malloc_cprintf(write_cb, cbopaque, "\t\t\t\"nhbins\": %u,\n",
-		    uv);
-
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\"bin\": [\n");
-		for (i = 0; i < nbins; i++) {
-			malloc_cprintf(write_cb, cbopaque,
-			    "\t\t\t\t{\n");
+	/*
+	 * We do enough mallctls in a loop that we actually want to omit them
+	 * (not just omit the printing).
+	 */
+	if (emitter->output == emitter_output_json) {
+		emitter_json_arr_begin(emitter, "bin");
+		for (unsigned i = 0; i < nbins; i++) {
+			emitter_json_arr_obj_begin(emitter);
 
 			CTL_M2_GET("arenas.bin.0.size", i, &sv, size_t);
-			malloc_cprintf(write_cb, cbopaque,
-			    "\t\t\t\t\t\"size\": %zu,\n", sv);
+			emitter_json_kv(emitter, "size", emitter_type_size,
+			    &sv);
 
 			CTL_M2_GET("arenas.bin.0.nregs", i, &u32v, uint32_t);
-			malloc_cprintf(write_cb, cbopaque,
-			    "\t\t\t\t\t\"nregs\": %"FMTu32",\n", u32v);
+			emitter_json_kv(emitter, "nregs", emitter_type_uint32,
+			    &u32v);
 
 			CTL_M2_GET("arenas.bin.0.slab_size", i, &sv, size_t);
-			malloc_cprintf(write_cb, cbopaque,
-			    "\t\t\t\t\t\"slab_size\": %zu\n", sv);
+			emitter_json_kv(emitter, "slab_size", emitter_type_size,
+			    &sv);
 
-			malloc_cprintf(write_cb, cbopaque,
-			    "\t\t\t\t}%s\n", (i + 1 < nbins) ? "," : "");
+			emitter_json_arr_obj_end(emitter);
 		}
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t],\n");
+		emitter_json_arr_end(emitter); /* Close "bin". */
+	}
 
-		CTL_GET("arenas.nlextents", &nlextents, unsigned);
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\"nlextents\": %u,\n", nlextents);
+	unsigned nlextents;
+	CTL_GET("arenas.nlextents", &nlextents, unsigned);
+	emitter_kv(emitter, "nlextents", "Number of large size classes",
+	    emitter_type_unsigned, &nlextents);
 
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\"lextent\": [\n");
-		for (i = 0; i < nlextents; i++) {
-			malloc_cprintf(write_cb, cbopaque,
-			    "\t\t\t\t{\n");
+	if (emitter->output == emitter_output_json) {
+		emitter_json_arr_begin(emitter, "lextent");
+		for (unsigned i = 0; i < nlextents; i++) {
+			emitter_json_arr_obj_begin(emitter);
 
 			CTL_M2_GET("arenas.lextent.0.size", i, &sv, size_t);
-			malloc_cprintf(write_cb, cbopaque,
-			    "\t\t\t\t\t\"size\": %zu\n", sv);
+			emitter_json_kv(emitter, "size", emitter_type_size,
+			    &sv);
 
-			malloc_cprintf(write_cb, cbopaque,
-			    "\t\t\t\t}%s\n", (i + 1 < nlextents) ? "," : "");
+			emitter_json_arr_obj_end(emitter);
 		}
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t]\n");
+		emitter_json_arr_end(emitter); /* Close "lextent". */
+	}
 
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t}%s\n", (config_prof || more) ? "," : "");
+	emitter_json_dict_end(emitter); /* Close "arenas" */
+
+	if (json) {
+		if (more || config_prof) {
+			malloc_cprintf(write_cb, cbopaque, ",\n");
+		} else {
+			malloc_cprintf(write_cb, cbopaque, "\n");
+		}
 	}
 
 	/* prof. */

From ec31d476ffa36885182f2b569ee518d3dfd54761 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 1 Mar 2018 18:19:25 -0800
Subject: [PATCH 1063/2608] Stats printing: Convert profiling stats to use the
 emitter.

While we're at it, print them in table form, too.
---
 src/stats.c | 59 +++++++++++++++++++++++++----------------------------
 1 file changed, 28 insertions(+), 31 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index 432c5a1a..ca843e2d 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -797,6 +797,33 @@ stats_general_print(emitter_t *emitter, bool more) {
 #undef OPT_WRITE_SSIZE_T_MUTABLE
 #undef OPT_WRITE_CHAR_P
 
+	/* prof. */
+	if (config_prof) {
+		emitter_dict_begin(emitter, "prof", "Profiling settings");
+
+		CTL_GET("prof.thread_active_init", &bv, bool);
+		emitter_kv(emitter, "thread_active_init",
+		    "prof.thread_active_emit", emitter_type_bool, &bv);
+
+		CTL_GET("prof.active", &bv, bool);
+		emitter_kv(emitter, "active", "prof.active", emitter_type_bool,
+		    &bv);
+
+		CTL_GET("prof.gdump", &bv, bool);
+		emitter_kv(emitter, "gdump", "prof.gdump", emitter_type_bool,
+		    &bv);
+
+		CTL_GET("prof.interval", &u64v, uint64_t);
+		emitter_kv(emitter, "interval", "prof.interval",
+		    emitter_type_uint64, &u64v);
+
+		CTL_GET("prof.lg_sample", &ssv, ssize_t);
+		emitter_kv(emitter, "lg_sample", "prof.lg_sample",
+		    emitter_type_ssize, &ssv);
+
+		emitter_dict_end(emitter); /* Close "prof". */
+	}
+
 	/* arenas. */
 	/*
 	 * The json output sticks arena info into an "arenas" dict; the table
@@ -886,42 +913,12 @@ stats_general_print(emitter_t *emitter, bool more) {
 	emitter_json_dict_end(emitter); /* Close "arenas" */
 
 	if (json) {
-		if (more || config_prof) {
+		if (more) {
 			malloc_cprintf(write_cb, cbopaque, ",\n");
 		} else {
 			malloc_cprintf(write_cb, cbopaque, "\n");
 		}
 	}
-
-	/* prof. */
-	if (config_prof && json) {
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\"prof\": {\n");
-
-		CTL_GET("prof.thread_active_init", &bv, bool);
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\"thread_active_init\": %s,\n", bv ? "true" :
-		    "false");
-
-		CTL_GET("prof.active", &bv, bool);
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\"active\": %s,\n", bv ? "true" : "false");
-
-		CTL_GET("prof.gdump", &bv, bool);
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\"gdump\": %s,\n", bv ? "true" : "false");
-
-		CTL_GET("prof.interval", &u64v, uint64_t);
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\"interval\": %"FMTu64",\n", u64v);
-
-		CTL_GET("prof.lg_sample", &ssv, ssize_t);
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\"lg_sample\": %zd\n", ssv);
-
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t}%s\n", more ? "," : "");
-	}
 }
 
 static void

From 0d20eda127c4f35c16cfffad15857d3b286166ba Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 1 Mar 2018 19:01:05 -0800
Subject: [PATCH 1064/2608] Stats printing: Move emitter -> manual cutoff
 point.

This makes it so that the "general" portion of the stats code is completely
agnostic to emitter type.
---
 src/stats.c | 29 +++++++++++------------------
 1 file changed, 11 insertions(+), 18 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index ca843e2d..583c1316 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -668,15 +668,7 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 }
 
 static void
-stats_general_print(emitter_t *emitter, bool more) {
-	/*
-	 * These should eventually be deleted; they are useful in converting
-	 * from manual to emitter-based stats output, though.
-	 */
-	void (*write_cb)(void *, const char *) = emitter->write_cb;
-	void *cbopaque = emitter->cbopaque;
-	bool json = (emitter->output == emitter_output_json);
-
+stats_general_print(emitter_t *emitter) {
 	const char *cpv;
 	bool bv, bv2;
 	unsigned uv;
@@ -911,14 +903,6 @@ stats_general_print(emitter_t *emitter, bool more) {
 	}
 
 	emitter_json_dict_end(emitter); /* Close "arenas" */
-
-	if (json) {
-		if (more) {
-			malloc_cprintf(write_cb, cbopaque, ",\n");
-		} else {
-			malloc_cprintf(write_cb, cbopaque, "\n");
-		}
-	}
 }
 
 static void
@@ -1208,7 +1192,16 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	emitter_json_dict_begin(&emitter, "jemalloc");
 
 	if (general) {
-		stats_general_print(&emitter, config_stats);
+		stats_general_print(&emitter);
+
+		if (json) {
+			if (config_stats) {
+				malloc_cprintf(write_cb, cbopaque, ",");
+			}
+		}
+	}
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque, "\n");
 	}
 	if (config_stats) {
 		stats_print_helper(write_cb, cbopaque, json, merged, destroyed,

From 8076b28721e16d14a8a81bb6c17fba804812e110 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 2 Mar 2018 11:57:13 -0800
Subject: [PATCH 1065/2608] Stats printing: Remove explicit callback passing to
 stats_print_helper.

This makes the emitter the only source of callback information, which is a step
towards where we want to be.
---
 src/stats.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index 583c1316..709ce2d5 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -923,9 +923,16 @@ MUTEX_PROF_COUNTERS
 }
 
 static void
-stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque,
-    bool json, bool merged, bool destroyed, bool unmerged, bool bins,
-    bool large, bool mutex) {
+stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
+    bool unmerged, bool bins, bool large, bool mutex) {
+	/*
+	 * These should be deleted.  We keep them around for a while, to aid in
+	 * the transition to the emitter code.
+	 */
+	void (*write_cb)(void *, const char *) = emitter->write_cb;
+	void *cbopaque = emitter->cbopaque;
+	bool json = (emitter->output == emitter_output_json);
+
 	size_t allocated, active, metadata, metadata_thp, resident, mapped,
 	    retained;
 	size_t num_background_threads;
@@ -1204,8 +1211,8 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		malloc_cprintf(write_cb, cbopaque, "\n");
 	}
 	if (config_stats) {
-		stats_print_helper(write_cb, cbopaque, json, merged, destroyed,
-		    unmerged, bins, large, mutex);
+		stats_print_helper(&emitter, merged, destroyed, unmerged,
+		    bins, large, mutex);
 	}
 
 	emitter_json_dict_end(&emitter); /* Closes the "jemalloc" dict. */

From 9e1846b0041e29a331ecf76e9b23ddb730bc352f Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 2 Mar 2018 13:12:47 -0800
Subject: [PATCH 1066/2608] Stats printing: move non-mutex arena stats to the
 emitter.

Another step in the conversion process.  The mutex is a little different,
because we we want to emit it as an array.
---
 src/stats.c | 97 ++++++++++++++++++++++++-----------------------------
 1 file changed, 43 insertions(+), 54 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index 709ce2d5..5e5cc0c8 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -965,39 +965,40 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 		background_thread_run_interval = 0;
 	}
 
+	/* Generic global stats. */
+	emitter_json_dict_begin(emitter, "stats");
+	emitter_json_kv(emitter, "allocated", emitter_type_size, &allocated);
+	emitter_json_kv(emitter, "active", emitter_type_size, &active);
+	emitter_json_kv(emitter, "metadata", emitter_type_size, &metadata);
+	emitter_json_kv(emitter, "metadata_thp", emitter_type_size,
+	    &metadata_thp);
+	emitter_json_kv(emitter, "resident", emitter_type_size, &resident);
+	emitter_json_kv(emitter, "mapped", emitter_type_size, &mapped);
+	emitter_json_kv(emitter, "retained", emitter_type_size, &retained);
+
+	emitter_table_printf(emitter, "Allocated: %zu, active: %zu, "
+	    "metadata: %zu (n_thp %zu), resident: %zu, mapped: %zu, "
+	    "retained: %zu\n", allocated, active, metadata, metadata_thp,
+	    resident, mapped, retained);
+
+	/* Background thread stats. */
+	emitter_json_dict_begin(emitter, "background_thread");
+	emitter_json_kv(emitter, "num_threads", emitter_type_size,
+	    &num_background_threads);
+	emitter_json_kv(emitter, "num_runs", emitter_type_uint64,
+	    &background_thread_num_runs);
+	emitter_json_kv(emitter, "run_interval", emitter_type_uint64,
+	    &background_thread_run_interval);
+	emitter_json_dict_end(emitter); /* Close "background_thread". */
+
+	emitter_table_printf(emitter, "Background threads: %zu, "
+	    "num_runs: %"FMTu64", run_interval: %"FMTu64" ns\n",
+	    num_background_threads, background_thread_num_runs,
+	    background_thread_run_interval);
+
 	if (json) {
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\"stats\": {\n");
-
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\"allocated\": %zu,\n", allocated);
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\"active\": %zu,\n", active);
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\"metadata\": %zu,\n", metadata);
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\"metadata_thp\": %zu,\n", metadata_thp);
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\"resident\": %zu,\n", resident);
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\"mapped\": %zu,\n", mapped);
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\"retained\": %zu,\n", retained);
-
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\"background_thread\": {\n");
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\"num_threads\": %zu,\n", num_background_threads);
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\"num_runs\": %"FMTu64",\n",
-		    background_thread_num_runs);
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\"run_interval\": %"FMTu64"\n",
-		    background_thread_run_interval);
-		malloc_cprintf(write_cb, cbopaque, "\t\t\t}%s\n",
-		    mutex ? "," : "");
-
 		if (mutex) {
+			malloc_cprintf(write_cb, cbopaque, ",\n");
 			malloc_cprintf(write_cb, cbopaque,
 			    "\t\t\t\"mutexes\": {\n");
 			mutex_prof_global_ind_t i;
@@ -1007,25 +1008,9 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 				    "\t\t\t\t",
 				    i == mutex_prof_num_global_mutexes - 1);
 			}
-			malloc_cprintf(write_cb, cbopaque, "\t\t\t}\n");
+			malloc_cprintf(write_cb, cbopaque, "\t\t\t}");
 		}
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t}%s\n", (merged || unmerged || destroyed) ? "," : "");
 	} else {
-		malloc_cprintf(write_cb, cbopaque,
-		    "Allocated: %zu, active: %zu, metadata: %zu (n_thp %zu),"
-		    " resident: %zu, mapped: %zu, retained: %zu\n",
-		    allocated, active, metadata, metadata_thp, resident, mapped,
-		    retained);
-
-		if (have_background_thread && num_background_threads > 0) {
-			malloc_cprintf(write_cb, cbopaque,
-			    "Background threads: %zu, num_runs: %"FMTu64", "
-			    "run_interval: %"FMTu64" ns\n",
-			    num_background_threads,
-			    background_thread_num_runs,
-			    background_thread_run_interval);
-		}
 		if (mutex) {
 			mutex_prof_global_ind_t i;
 			for (i = 0; i < mutex_prof_num_global_mutexes; i++) {
@@ -1036,6 +1021,16 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 		}
 	}
 
+	emitter_json_dict_end(emitter); /* Close "stats". */
+
+	if (json) {
+		if (merged || unmerged || destroyed) {
+			malloc_cprintf(write_cb, cbopaque, ",\n");
+		} else {
+			malloc_cprintf(write_cb, cbopaque, "\n");
+		}
+	}
+
 	if (merged || destroyed || unmerged) {
 		unsigned narenas;
 
@@ -1200,12 +1195,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 
 	if (general) {
 		stats_general_print(&emitter);
-
-		if (json) {
-			if (config_stats) {
-				malloc_cprintf(write_cb, cbopaque, ",");
-			}
-		}
 	}
 	if (json) {
 		malloc_cprintf(write_cb, cbopaque, "\n");

From ebe0b5f8283b542f59cbe77f69e24935ebb5f866 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 2 Mar 2018 14:11:27 -0800
Subject: [PATCH 1067/2608] Emitter: Add support for row-based output in table
 mode.

This is needed for things like mutex stats in table mode.
---
 include/jemalloc/internal/emitter.h | 66 ++++++++++++++++++++++++++---
 test/unit/emitter.c                 | 64 +++++++++++++++++++++++++++-
 2 files changed, 123 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/emitter.h b/include/jemalloc/internal/emitter.h
index c82dbdb1..830d0f24 100644
--- a/include/jemalloc/internal/emitter.h
+++ b/include/jemalloc/internal/emitter.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_EMITTER_H
 #define JEMALLOC_INTERNAL_EMITTER_H
 
+#include "jemalloc/internal/ql.h"
+
 typedef enum emitter_output_e emitter_output_t;
 enum emitter_output_e {
 	emitter_output_json,
@@ -25,8 +27,50 @@ enum emitter_type_e {
 	emitter_type_size,
 	emitter_type_ssize,
 	emitter_type_string,
+	/*
+	 * A title is a column title in a table; it's just a string, but it's
+	 * not quoted.
+	 */
+	emitter_type_title,
 };
 
+typedef struct emitter_col_s emitter_col_t;
+struct emitter_col_s {
+	/* Filled in by the user. */
+	emitter_justify_t justify;
+	int width;
+	emitter_type_t type;
+	union {
+		bool bool_val;
+		int int_val;
+		unsigned unsigned_val;
+		uint32_t uint32_val;
+		uint64_t uint64_val;
+		size_t size_val;
+		ssize_t ssize_val;
+		const char *str_val;
+	};
+
+	/* Filled in by initialization. */
+	ql_elm(emitter_col_t) link;
+};
+
+typedef struct emitter_row_s emitter_row_t;
+struct emitter_row_s {
+	ql_head(emitter_col_t) cols;
+};
+
+static inline void
+emitter_row_init(emitter_row_t *row) {
+	ql_new(&row->cols);
+}
+
+static inline void
+emitter_col_init(emitter_col_t *col, emitter_row_t *row) {
+	ql_elm_new(col, link);
+	ql_tail_insert(&row->cols, col, link);
+}
+
 typedef struct emitter_s emitter_t;
 struct emitter_s {
 	emitter_output_t output;
@@ -141,12 +185,6 @@ emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width,
 		 * anywhere near the fmt size.
 		 */
 		assert(str_written < BUF_SIZE);
-
-		/*
-		 * We don't support justified quoted string primitive values for
-		 * now. Fortunately, we don't want to emit them.
-		 */
-
 		emitter_gen_fmt(fmt, FMT_SIZE, "s", justify, width);
 		emitter_printf(emitter, fmt, buf);
 		break;
@@ -156,6 +194,9 @@ emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width,
 	case emitter_type_uint64:
 		EMIT_SIMPLE(uint64_t, FMTu64)
 		break;
+	case emitter_type_title:
+		EMIT_SIMPLE(char *const, "s");
+		break;
 	default:
 		unreachable();
 	}
@@ -378,4 +419,17 @@ emitter_json_arr_value(emitter_t *emitter, emitter_type_t value_type,
 	}
 }
 
+static inline void
+emitter_table_row(emitter_t *emitter, emitter_row_t *row) {
+	if (emitter->output != emitter_output_table) {
+		return;
+	}
+	emitter_col_t *col;
+	ql_foreach(col, &row->cols, link) {
+		emitter_print_value(emitter, col->justify, col->width,
+		    col->type, (const void *)&col->bool_val);
+	}
+	emitter_table_printf(emitter, "\n");
+}
+
 #endif /* JEMALLOC_INTERNAL_EMITTER_H */
diff --git a/test/unit/emitter.c b/test/unit/emitter.c
index c2216b22..535c7cf1 100644
--- a/test/unit/emitter.c
+++ b/test/unit/emitter.c
@@ -217,6 +217,10 @@ emit_types(emitter_t *emitter) {
 	emitter_kv(emitter, "k6", "K6", emitter_type_string, &str);
 	emitter_kv(emitter, "k7", "K7", emitter_type_uint32, &u32);
 	emitter_kv(emitter, "k8", "K8", emitter_type_uint64, &u64);
+	/*
+	 * We don't test the title type, since it's only used for tables.  It's
+	 * tested in the emitter_table_row tests.
+	 */
 	emitter_end(emitter);
 }
 
@@ -339,6 +343,63 @@ TEST_BEGIN(test_json_arr) {
 }
 TEST_END
 
+static void
+emit_table_row(emitter_t *emitter) {
+	emitter_begin(emitter);
+	emitter_row_t row;
+	emitter_col_t abc = {emitter_justify_left, 10, emitter_type_title};
+	abc.str_val = "ABC title";
+	emitter_col_t def = {emitter_justify_right, 15, emitter_type_title};
+	def.str_val = "DEF title";
+	emitter_col_t ghi = {emitter_justify_right, 5, emitter_type_title};
+	ghi.str_val = "GHI";
+
+	emitter_row_init(&row);
+	emitter_col_init(&abc, &row);
+	emitter_col_init(&def, &row);
+	emitter_col_init(&ghi, &row);
+
+	emitter_table_row(emitter, &row);
+
+	abc.type = emitter_type_int;
+	def.type = emitter_type_bool;
+	ghi.type = emitter_type_int;
+
+	abc.int_val = 123;
+	def.bool_val = true;
+	ghi.int_val = 456;
+	emitter_table_row(emitter, &row);
+
+	abc.int_val = 789;
+	def.bool_val = false;
+	ghi.int_val = 1011;
+	emitter_table_row(emitter, &row);
+
+	abc.type = emitter_type_string;
+	abc.str_val = "a string";
+	def.bool_val = false;
+	ghi.type = emitter_type_title;
+	ghi.str_val = "ghi";
+	emitter_table_row(emitter, &row);
+
+	emitter_end(emitter);
+}
+
+static const char *table_row_json =
+"{\n"
+"}\n";
+
+static const char *table_row_table =
+"ABC title       DEF title  GHI\n"
+"123                  true  456\n"
+"789                 false 1011\n"
+"\"a string\"          false  ghi\n";
+
+TEST_BEGIN(test_table_row) {
+	assert_emit_output(&emit_table_row, table_row_json, table_row_table);
+}
+TEST_END
+
 int
 main(void) {
 	return test_no_reentrancy(
@@ -347,5 +408,6 @@ main(void) {
 	    test_nested_dict,
 	    test_types,
 	    test_modal,
-	    test_json_arr);
+	    test_json_arr,
+	    test_table_row);
 }

From 86c61d4a575e7eb57ade8a39e9d552d95c63aa31 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 2 Mar 2018 15:15:19 -0800
Subject: [PATCH 1068/2608] Stats printing: Move global mutex stats to use
 emitter.

---
 include/jemalloc/internal/mutex_prof.h |  32 +++---
 src/stats.c                            | 140 +++++++++++++++++--------
 2 files changed, 113 insertions(+), 59 deletions(-)

diff --git a/include/jemalloc/internal/mutex_prof.h b/include/jemalloc/internal/mutex_prof.h
index 735c0adb..ce183d33 100644
--- a/include/jemalloc/internal/mutex_prof.h
+++ b/include/jemalloc/internal/mutex_prof.h
@@ -35,27 +35,27 @@ typedef enum {
 	mutex_prof_num_arena_mutexes
 } mutex_prof_arena_ind_t;
 
-#define MUTEX_PROF_UINT64_COUNTERS				\
-    OP(num_ops, uint64_t)						\
-    OP(num_wait, uint64_t)						\
-    OP(num_spin_acq, uint64_t)					\
-    OP(num_owner_switch, uint64_t)				\
-    OP(total_wait_time, uint64_t)				\
-    OP(max_wait_time, uint64_t)
+#define MUTEX_PROF_UINT64_COUNTERS					\
+    OP(num_ops, uint64_t, "n_lock_ops")					\
+    OP(num_wait, uint64_t, "n_waiting")					\
+    OP(num_spin_acq, uint64_t, "n_spin_acq")				\
+    OP(num_owner_switch, uint64_t, "n_owner_switch")			\
+    OP(total_wait_time, uint64_t, "total_wait_ns")			\
+    OP(max_wait_time, uint64_t, "max_wait_ns")
 
-#define MUTEX_PROF_UINT32_COUNTERS				\
-    OP(max_num_thds, uint32_t)
+#define MUTEX_PROF_UINT32_COUNTERS					\
+    OP(max_num_thds, uint32_t, "max_n_thds")
 
-#define MUTEX_PROF_COUNTERS		\
-		MUTEX_PROF_UINT64_COUNTERS \
+#define MUTEX_PROF_COUNTERS						\
+		MUTEX_PROF_UINT64_COUNTERS				\
 		MUTEX_PROF_UINT32_COUNTERS
 
-#define OP(counter, type) mutex_counter_##counter,
+#define OP(counter, type, human) mutex_counter_##counter,
 
-#define COUNTER_ENUM(counter_list, t)           \
-		typedef enum {                          \
-			counter_list                        \
-			mutex_prof_num_##t##_counters       \
+#define COUNTER_ENUM(counter_list, t)					\
+		typedef enum {						\
+			counter_list					\
+			mutex_prof_num_##t##_counters			\
 		} mutex_prof_##t##_counter_ind_t;
 
 COUNTER_ENUM(MUTEX_PROF_UINT64_COUNTERS, uint64_t)
diff --git a/src/stats.c b/src/stats.c
index 5e5cc0c8..13b311be 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -89,7 +89,7 @@ read_arena_bin_mutex_stats(unsigned arena_ind, unsigned bin_ind,
     uint64_t results_uint64_t[mutex_prof_num_uint64_t_counters],
 	uint32_t results_uint32_t[mutex_prof_num_uint32_t_counters]) {
 	char cmd[MUTEX_CTL_STR_MAX_LENGTH];
-#define OP(c, t)							\
+#define OP(c, t, human)							\
     gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,			\
         "arenas.0.bins.0","mutex", #c);					\
     CTL_M2_M4_GET(cmd, arena_ind, bin_ind,				\
@@ -98,6 +98,63 @@ read_arena_bin_mutex_stats(unsigned arena_ind, unsigned bin_ind,
 #undef OP
 }
 
+static void
+mutex_stats_init_row(emitter_row_t *row, const char *table_name,
+    emitter_col_t *name,
+    emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters],
+    emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters]) {
+	mutex_prof_uint64_t_counter_ind_t k_uint64_t = 0;
+	mutex_prof_uint32_t_counter_ind_t k_uint32_t = 0;
+
+	emitter_col_t *col;
+
+	emitter_row_init(row);
+	emitter_col_init(name, row);
+	name->justify = emitter_justify_left;
+	name->width = 21;
+	name->type = emitter_type_title;
+	name->str_val = table_name;
+
+#define WIDTH_uint32_t 12
+#define WIDTH_uint64_t 16
+#define OP(counter, counter_type, human)				\
+	col = &col_##counter_type[k_##counter_type];			\
+	++k_##counter_type;						\
+	emitter_col_init(col, row);					\
+	col->justify = emitter_justify_right;				\
+	col->width = WIDTH_##counter_type;				\
+	col->type = emitter_type_title;					\
+	col->str_val = human;
+	MUTEX_PROF_COUNTERS
+#undef OP
+#undef WIDTH_uint32_t
+#undef WIDTH_uint64_t
+}
+
+static void
+mutex_stats_emit(emitter_t *emitter, emitter_row_t *row,
+    emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters],
+    emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters]) {
+	emitter_table_row(emitter, row);
+
+	mutex_prof_uint64_t_counter_ind_t k_uint64_t = 0;
+	mutex_prof_uint32_t_counter_ind_t k_uint32_t = 0;
+
+	emitter_col_t *col;
+
+#define EMITTER_TYPE_uint32_t emitter_type_uint32
+#define EMITTER_TYPE_uint64_t emitter_type_uint64
+#define OP(counter, type, human)					\
+	col = &col_##type[k_##type];						\
+	++k_##type;							\
+	emitter_json_kv(emitter, #counter, EMITTER_TYPE_##type,		\
+	    (const void *)&col->bool_val);
+	MUTEX_PROF_COUNTERS;
+#undef OP
+#undef EMITTER_TYPE_uint32_t
+#undef EMITTER_TYPE_uint64_t
+}
+
 static void
 mutex_stats_output_json(void (*write_cb)(void *, const char *), void *cbopaque,
     const char *name, uint64_t stats_uint64_t[mutex_prof_num_uint64_t_counters],
@@ -109,7 +166,7 @@ mutex_stats_output_json(void (*write_cb)(void *, const char *), void *cbopaque,
 	mutex_prof_uint32_t_counter_ind_t k_uint32_t = 0;
 	char *fmt_str[2] = {"%s\t\"%s\": %"FMTu32"%s\n",
 	    "%s\t\"%s\": %"FMTu64"%s\n"};
-#define OP(c, t)							\
+#define OP(c, t, human)							\
 	malloc_cprintf(write_cb, cbopaque,				\
 	    fmt_str[sizeof(t) / sizeof(uint32_t) - 1], 			\
 	    json_indent, #c, (t)stats_##t[mutex_counter_##c],		\
@@ -342,7 +399,7 @@ read_arena_mutex_stats(unsigned arena_ind,
 
 	mutex_prof_arena_ind_t i;
 	for (i = 0; i < mutex_prof_num_arena_mutexes; i++) {
-#define OP(c, t)							\
+#define OP(c, t, human)							\
 		gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,	\
 		    "arenas.0.mutexes",	arena_mutex_names[i], #c);	\
 		CTL_M2_GET(cmd, arena_ind,				\
@@ -370,7 +427,7 @@ mutex_stats_output(void (*write_cb)(void *, const char *), void *cbopaque,
 	    (int)(20 - strlen(name)), ' ');
 
 	char *fmt_str[2] = {"%12"FMTu32, "%16"FMTu64};
-#define OP(c, t)							\
+#define OP(c, t, human)							\
 	malloc_cprintf(write_cb, cbopaque,				\
 	    fmt_str[sizeof(t) / sizeof(uint32_t) - 1],			\
 	    (t)stats_##t[mutex_counter_##c]);
@@ -906,20 +963,26 @@ stats_general_print(emitter_t *emitter) {
 }
 
 static void
-read_global_mutex_stats(
-    uint64_t results_uint64_t[mutex_prof_num_global_mutexes][mutex_prof_num_uint64_t_counters],
-	uint32_t results_uint32_t[mutex_prof_num_global_mutexes][mutex_prof_num_uint32_t_counters]) {
+mutex_stats_read_global(const char *name, emitter_col_t *col_name,
+    emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters],
+    emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters]) {
 	char cmd[MUTEX_CTL_STR_MAX_LENGTH];
 
-	mutex_prof_global_ind_t i;
-	for (i = 0; i < mutex_prof_num_global_mutexes; i++) {
-#define OP(c, t)							\
-		gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,	\
-		    "mutexes", global_mutex_names[i], #c);		\
-		CTL_GET(cmd, (t *)&results_##t[i][mutex_counter_##c], t);
-MUTEX_PROF_COUNTERS
+	col_name->str_val = name;
+
+	emitter_col_t *dst;
+#define EMITTER_TYPE_uint32_t emitter_type_uint32
+#define EMITTER_TYPE_uint64_t emitter_type_uint64
+#define OP(counter, counter_type, human)				\
+	dst = &col_##counter_type[mutex_counter_##counter];		\
+	dst->type = EMITTER_TYPE_##counter_type;			\
+	gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,		\
+	    "mutexes", name, #counter);					\
+	CTL_GET(cmd, (counter_type *)&dst->bool_val, counter_type);
+	MUTEX_PROF_COUNTERS
 #undef OP
-	}
+#undef EMITTER_TYPE_uint32_t
+#undef EMITTER_TYPE_uint64_t
 }
 
 static void
@@ -946,12 +1009,6 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 	CTL_GET("stats.mapped", &mapped, size_t);
 	CTL_GET("stats.retained", &retained, size_t);
 
-	uint64_t mutex_stats_uint64_t[mutex_prof_num_global_mutexes][mutex_prof_num_uint64_t_counters];
-	uint32_t mutex_stats_uint32_t[mutex_prof_num_global_mutexes][mutex_prof_num_uint32_t_counters];
-	if (mutex) {
-		read_global_mutex_stats(mutex_stats_uint64_t, mutex_stats_uint32_t);
-	}
-
 	if (have_background_thread) {
 		CTL_GET("stats.background_thread.num_threads",
 		    &num_background_threads, size_t);
@@ -996,29 +1053,26 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 	    num_background_threads, background_thread_num_runs,
 	    background_thread_run_interval);
 
-	if (json) {
-		if (mutex) {
-			malloc_cprintf(write_cb, cbopaque, ",\n");
-			malloc_cprintf(write_cb, cbopaque,
-			    "\t\t\t\"mutexes\": {\n");
-			mutex_prof_global_ind_t i;
-			for (i = 0; i < mutex_prof_num_global_mutexes; i++) {
-				mutex_stats_output_json(write_cb, cbopaque,
-				    global_mutex_names[i], mutex_stats_uint64_t[i], mutex_stats_uint32_t[i],
-				    "\t\t\t\t",
-				    i == mutex_prof_num_global_mutexes - 1);
-			}
-			malloc_cprintf(write_cb, cbopaque, "\t\t\t}");
-		}
-	} else {
-		if (mutex) {
-			mutex_prof_global_ind_t i;
-			for (i = 0; i < mutex_prof_num_global_mutexes; i++) {
-				mutex_stats_output(write_cb, cbopaque,
-				    global_mutex_names[i], mutex_stats_uint64_t[i], mutex_stats_uint32_t[i],
-				    i == 0);
-			}
+	if (mutex) {
+		emitter_row_t row;
+		emitter_col_t name;
+		emitter_col_t col64[mutex_prof_num_uint64_t_counters];
+		emitter_col_t col32[mutex_prof_num_uint32_t_counters];
+
+		mutex_stats_init_row(&row, "", &name, col64, col32);
+
+		emitter_table_row(emitter, &row);
+		emitter_json_dict_begin(emitter, "mutexes");
+
+		for (int i = 0; i < mutex_prof_num_global_mutexes; i++) {
+			mutex_stats_read_global(global_mutex_names[i], &name,
+			    col64, col32);
+			emitter_json_dict_begin(emitter, global_mutex_names[i]);
+			mutex_stats_emit(emitter, &row, col64, col32);
+			emitter_json_dict_end(emitter);
 		}
+
+		emitter_json_dict_end(emitter); /* Close "mutexes". */
 	}
 
 	emitter_json_dict_end(emitter); /* Close "stats". */

From cbde666d9a5a2bf1cb741661aebec228aa9f5827 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 5 Mar 2018 12:58:24 -0800
Subject: [PATCH 1069/2608] Stats printing: move stats_print_helper to use
 emitter.

---
 src/stats.c | 161 ++++++++++++++++++++--------------------------------
 1 file changed, 60 insertions(+), 101 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index 13b311be..889c650a 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -481,6 +481,10 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	size_t tcache_bytes;
 	uint64_t uptime;
 
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque, "\n");
+	}
+
 	CTL_GET("arenas.page", &page, size_t);
 
 	CTL_M2_GET("stats.arenas.0.nthreads", i, &nthreads, unsigned);
@@ -1077,122 +1081,77 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 
 	emitter_json_dict_end(emitter); /* Close "stats". */
 
-	if (json) {
-		if (merged || unmerged || destroyed) {
-			malloc_cprintf(write_cb, cbopaque, ",\n");
-		} else {
-			malloc_cprintf(write_cb, cbopaque, "\n");
-		}
-	}
-
 	if (merged || destroyed || unmerged) {
 		unsigned narenas;
 
-		if (json) {
-			malloc_cprintf(write_cb, cbopaque,
-			    "\t\t\"stats.arenas\": {\n");
-		}
+		emitter_json_dict_begin(emitter, "stats.arenas");
 
 		CTL_GET("arenas.narenas", &narenas, unsigned);
-		{
-			size_t mib[3];
-			size_t miblen = sizeof(mib) / sizeof(size_t);
-			size_t sz;
-			VARIABLE_ARRAY(bool, initialized, narenas);
-			bool destroyed_initialized;
-			unsigned i, j, ninitialized;
+		size_t mib[3];
+		size_t miblen = sizeof(mib) / sizeof(size_t);
+		size_t sz;
+		VARIABLE_ARRAY(bool, initialized, narenas);
+		bool destroyed_initialized;
+		unsigned i, j, ninitialized;
 
-			xmallctlnametomib("arena.0.initialized", mib, &miblen);
-			for (i = ninitialized = 0; i < narenas; i++) {
-				mib[1] = i;
-				sz = sizeof(bool);
-				xmallctlbymib(mib, miblen, &initialized[i], &sz,
-				    NULL, 0);
-				if (initialized[i]) {
-					ninitialized++;
-				}
-			}
-			mib[1] = MALLCTL_ARENAS_DESTROYED;
+		xmallctlnametomib("arena.0.initialized", mib, &miblen);
+		for (i = ninitialized = 0; i < narenas; i++) {
+			mib[1] = i;
 			sz = sizeof(bool);
-			xmallctlbymib(mib, miblen, &destroyed_initialized, &sz,
+			xmallctlbymib(mib, miblen, &initialized[i], &sz,
 			    NULL, 0);
-
-			/* Merged stats. */
-			if (merged && (ninitialized > 1 || !unmerged)) {
-				/* Print merged arena stats. */
-				if (json) {
-					malloc_cprintf(write_cb, cbopaque,
-					    "\t\t\t\"merged\": {\n");
-				} else {
-					malloc_cprintf(write_cb, cbopaque,
-					    "\nMerged arenas stats:\n");
-				}
-				stats_arena_print(write_cb, cbopaque, json,
-				    MALLCTL_ARENAS_ALL, bins, large, mutex);
-				if (json) {
-					malloc_cprintf(write_cb, cbopaque,
-					    "\t\t\t}%s\n",
-					    ((destroyed_initialized &&
-					    destroyed) || unmerged) ?  "," :
-					    "");
-				}
+			if (initialized[i]) {
+				ninitialized++;
 			}
+		}
+		mib[1] = MALLCTL_ARENAS_DESTROYED;
+		sz = sizeof(bool);
+		xmallctlbymib(mib, miblen, &destroyed_initialized, &sz,
+		    NULL, 0);
 
-			/* Destroyed stats. */
-			if (destroyed_initialized && destroyed) {
-				/* Print destroyed arena stats. */
-				if (json) {
-					malloc_cprintf(write_cb, cbopaque,
-					    "\t\t\t\"destroyed\": {\n");
-				} else {
-					malloc_cprintf(write_cb, cbopaque,
-					    "\nDestroyed arenas stats:\n");
-				}
-				stats_arena_print(write_cb, cbopaque, json,
-				    MALLCTL_ARENAS_DESTROYED, bins, large,
-				    mutex);
-				if (json) {
-					malloc_cprintf(write_cb, cbopaque,
-					    "\t\t\t}%s\n", unmerged ?  "," :
-					    "");
-				}
-			}
+		/* Merged stats. */
+		if (merged && (ninitialized > 1 || !unmerged)) {
+			/* Print merged arena stats. */
+			emitter_table_printf(emitter, "Merged arenas stats:\n");
+			emitter_json_dict_begin(emitter, "merged");
+			stats_arena_print(write_cb, cbopaque, json,
+			    MALLCTL_ARENAS_ALL, bins, large, mutex);
+			emitter_json_dict_end(emitter); /* Close "merged". */
+		}
 
-			/* Unmerged stats. */
-			if (unmerged) {
-				for (i = j = 0; i < narenas; i++) {
-					if (initialized[i]) {
-						if (json) {
-							j++;
-							malloc_cprintf(write_cb,
-							    cbopaque,
-							    "\t\t\t\"%u\": {\n",
-							    i);
-						} else {
-							malloc_cprintf(write_cb,
-							    cbopaque,
-							    "\narenas[%u]:\n",
-							    i);
-						}
-						stats_arena_print(write_cb,
-						    cbopaque, json, i, bins,
-						    large, mutex);
-						if (json) {
-							malloc_cprintf(write_cb,
-							    cbopaque,
-							    "\t\t\t}%s\n", (j <
-							    ninitialized) ? ","
-							    : "");
-						}
-					}
+		/* Destroyed stats. */
+		if (destroyed_initialized && destroyed) {
+			/* Print destroyed arena stats. */
+			emitter_table_printf(emitter,
+			    "Destroyed arenas stats:\n");
+			emitter_json_dict_begin(emitter, "destroyed");
+			stats_arena_print(write_cb, cbopaque, json,
+			    MALLCTL_ARENAS_DESTROYED, bins, large,
+			    mutex);
+			emitter_json_dict_end(emitter); /* Close "destroyed". */
+		}
+
+		/* Unmerged stats. */
+		if (unmerged) {
+			for (i = j = 0; i < narenas; i++) {
+				if (initialized[i]) {
+					char arena_ind_str[20];
+					malloc_snprintf(arena_ind_str,
+					    sizeof(arena_ind_str), "%u", i);
+					emitter_json_dict_begin(emitter,
+					    arena_ind_str);
+					emitter_table_printf(emitter,
+					    "arenas[%s]:\n", arena_ind_str);
+					stats_arena_print(write_cb,
+					    cbopaque, json, i, bins,
+					    large, mutex);
+					/* Close "<arena-ind>". */
+					emitter_json_dict_end(emitter);
 				}
 			}
 		}
 
-		if (json) {
-			malloc_cprintf(write_cb, cbopaque,
-			    "\t\t}\n");
-		}
+		emitter_json_dict_end(emitter); /* Close "stats.arenas". */
 	}
 }
 

From a6ef061c4309852a8bb27c5374edb1bc6980ac06 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 5 Mar 2018 13:03:22 -0800
Subject: [PATCH 1070/2608] Stats printing: Move emitter cutoff point into
 stats_arena_print.

---
 src/stats.c | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index 889c650a..c3d9fbe3 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -465,8 +465,8 @@ stats_arena_mutexes_print(void (*write_cb)(void *, const char *),
 }
 
 static void
-stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
-    bool json, unsigned i, bool bins, bool large, bool mutex) {
+stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
+    bool mutex) {
 	unsigned nthreads;
 	const char *dss;
 	ssize_t dirty_decay_ms, muzzy_decay_ms;
@@ -481,6 +481,11 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	size_t tcache_bytes;
 	uint64_t uptime;
 
+	/* These should be removed once the emitter conversion is done. */
+	void (*write_cb)(void *, const char *) = emitter->write_cb;
+	void *cbopaque = emitter->cbopaque;
+	bool json = (emitter->output == emitter_output_json);
+
 	if (json) {
 		malloc_cprintf(write_cb, cbopaque, "\n");
 	}
@@ -996,10 +1001,6 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 	 * These should be deleted.  We keep them around for a while, to aid in
 	 * the transition to the emitter code.
 	 */
-	void (*write_cb)(void *, const char *) = emitter->write_cb;
-	void *cbopaque = emitter->cbopaque;
-	bool json = (emitter->output == emitter_output_json);
-
 	size_t allocated, active, metadata, metadata_thp, resident, mapped,
 	    retained;
 	size_t num_background_threads;
@@ -1114,8 +1115,8 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 			/* Print merged arena stats. */
 			emitter_table_printf(emitter, "Merged arenas stats:\n");
 			emitter_json_dict_begin(emitter, "merged");
-			stats_arena_print(write_cb, cbopaque, json,
-			    MALLCTL_ARENAS_ALL, bins, large, mutex);
+			stats_arena_print(emitter, MALLCTL_ARENAS_ALL, bins,
+			    large, mutex);
 			emitter_json_dict_end(emitter); /* Close "merged". */
 		}
 
@@ -1125,9 +1126,8 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 			emitter_table_printf(emitter,
 			    "Destroyed arenas stats:\n");
 			emitter_json_dict_begin(emitter, "destroyed");
-			stats_arena_print(write_cb, cbopaque, json,
-			    MALLCTL_ARENAS_DESTROYED, bins, large,
-			    mutex);
+			stats_arena_print(emitter, MALLCTL_ARENAS_DESTROYED,
+			    bins, large, mutex);
 			emitter_json_dict_end(emitter); /* Close "destroyed". */
 		}
 
@@ -1142,8 +1142,7 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 					    arena_ind_str);
 					emitter_table_printf(emitter,
 					    "arenas[%s]:\n", arena_ind_str);
-					stats_arena_print(write_cb,
-					    cbopaque, json, i, bins,
+					stats_arena_print(emitter, i, bins,
 					    large, mutex);
 					/* Close "<arena-ind>". */
 					emitter_json_dict_end(emitter);

From bc6620f73e205004b2dfaf0438daeab617609295 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 5 Mar 2018 16:26:32 -0800
Subject: [PATCH 1071/2608] Stats printing: convert decay stats to use the
 emitter.

---
 src/stats.c | 205 +++++++++++++++++++++++++++++++++-------------------
 1 file changed, 132 insertions(+), 73 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index c3d9fbe3..7b384df6 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -486,38 +486,19 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	void *cbopaque = emitter->cbopaque;
 	bool json = (emitter->output == emitter_output_json);
 
-	if (json) {
-		malloc_cprintf(write_cb, cbopaque, "\n");
-	}
-
 	CTL_GET("arenas.page", &page, size_t);
 
 	CTL_M2_GET("stats.arenas.0.nthreads", i, &nthreads, unsigned);
-	if (json) {
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\"nthreads\": %u,\n", nthreads);
-	} else {
-		malloc_cprintf(write_cb, cbopaque,
-		    "assigned threads: %u\n", nthreads);
-	}
+	emitter_kv(emitter, "nthreads", "assigned threads",
+	    emitter_type_unsigned, &nthreads);
 
 	CTL_M2_GET("stats.arenas.0.uptime", i, &uptime, uint64_t);
-	if (json) {
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\"uptime_ns\": %"FMTu64",\n", uptime);
-	} else {
-		malloc_cprintf(write_cb, cbopaque,
-		    "uptime: %"FMTu64"\n", uptime);
-	}
+	emitter_kv(emitter, "uptime_ns", "uptime", emitter_type_uint64,
+	    &uptime);
 
 	CTL_M2_GET("stats.arenas.0.dss", i, &dss, const char *);
-	if (json) {
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\"dss\": \"%s\",\n", dss);
-	} else {
-		malloc_cprintf(write_cb, cbopaque,
-		    "dss allocation precedence: %s\n", dss);
-	}
+	emitter_kv(emitter, "dss", "dss allocation precedence",
+	    emitter_type_string, &dss);
 
 	CTL_M2_GET("stats.arenas.0.dirty_decay_ms", i, &dirty_decay_ms,
 	    ssize_t);
@@ -534,55 +515,133 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	CTL_M2_GET("stats.arenas.0.muzzy_nmadvise", i, &muzzy_nmadvise,
 	    uint64_t);
 	CTL_M2_GET("stats.arenas.0.muzzy_purged", i, &muzzy_purged, uint64_t);
-	if (json) {
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\"dirty_decay_ms\": %zd,\n", dirty_decay_ms);
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\"muzzy_decay_ms\": %zd,\n", muzzy_decay_ms);
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\"pactive\": %zu,\n", pactive);
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\"pdirty\": %zu,\n", pdirty);
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\"pmuzzy\": %zu,\n", pmuzzy);
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\"dirty_npurge\": %"FMTu64",\n", dirty_npurge);
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\"dirty_nmadvise\": %"FMTu64",\n", dirty_nmadvise);
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\"dirty_purged\": %"FMTu64",\n", dirty_purged);
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\"muzzy_npurge\": %"FMTu64",\n", muzzy_npurge);
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\"muzzy_nmadvise\": %"FMTu64",\n", muzzy_nmadvise);
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\"muzzy_purged\": %"FMTu64",\n", muzzy_purged);
+
+	emitter_row_t decay_row;
+	emitter_row_init(&decay_row);
+
+	/* JSON-style emission. */
+	emitter_json_kv(emitter, "dirty_decay_ms", emitter_type_ssize,
+	    &dirty_decay_ms);
+	emitter_json_kv(emitter, "muzzy_decay_ms", emitter_type_ssize,
+	    &muzzy_decay_ms);
+
+	emitter_json_kv(emitter, "pactive", emitter_type_size, &pactive);
+	emitter_json_kv(emitter, "pdirty", emitter_type_size, &pdirty);
+	emitter_json_kv(emitter, "pmuzzy", emitter_type_size, &pmuzzy);
+
+	emitter_json_kv(emitter, "dirty_npurge", emitter_type_uint64,
+	    &dirty_npurge);
+	emitter_json_kv(emitter, "dirty_nmadvise", emitter_type_uint64,
+	    &dirty_nmadvise);
+	emitter_json_kv(emitter, "dirty_purged", emitter_type_uint64,
+	    &dirty_purged);
+
+	emitter_json_kv(emitter, "muzzy_npurge", emitter_type_uint64,
+	    &muzzy_npurge);
+	emitter_json_kv(emitter, "muzzy_nmadvise", emitter_type_uint64,
+	    &muzzy_nmadvise);
+	emitter_json_kv(emitter, "muzzy_purged", emitter_type_uint64,
+	    &muzzy_purged);
+
+
+	/* Table-style emission. */
+	emitter_col_t decay_type;
+	emitter_col_init(&decay_type, &decay_row);
+	decay_type.justify = emitter_justify_right;
+	decay_type.width = 9;
+	decay_type.type = emitter_type_title;
+	decay_type.str_val = "decaying:";
+
+	emitter_col_t decay_time;
+	emitter_col_init(&decay_time, &decay_row);
+	decay_time.justify = emitter_justify_right;
+	decay_time.width = 6;
+	decay_time.type = emitter_type_title;
+	decay_time.str_val = "time";
+
+	emitter_col_t decay_npages;
+	emitter_col_init(&decay_npages, &decay_row);
+	decay_npages.justify = emitter_justify_right;
+	decay_npages.width = 13;
+	decay_npages.type = emitter_type_title;
+	decay_npages.str_val = "npages";
+
+	emitter_col_t decay_sweeps;
+	emitter_col_init(&decay_sweeps, &decay_row);
+	decay_sweeps.justify = emitter_justify_right;
+	decay_sweeps.width = 13;
+	decay_sweeps.type = emitter_type_title;
+	decay_sweeps.str_val = "sweeps";
+
+	emitter_col_t decay_madvises;
+	emitter_col_init(&decay_madvises, &decay_row);
+	decay_madvises.justify = emitter_justify_right;
+	decay_madvises.width = 13;
+	decay_madvises.type = emitter_type_title;
+	decay_madvises.str_val = "madvises";
+
+	emitter_col_t decay_purged;
+	emitter_col_init(&decay_purged, &decay_row);
+	decay_purged.justify = emitter_justify_right;
+	decay_purged.width = 13;
+	decay_purged.type = emitter_type_title;
+	decay_purged.str_val = "purged";
+
+	/* Title row. */
+	emitter_table_row(emitter, &decay_row);
+
+	/* Dirty row. */
+	decay_type.str_val = "dirty:";
+
+	if (dirty_decay_ms >= 0) {
+		decay_time.type = emitter_type_ssize;
+		decay_time.ssize_val = dirty_decay_ms;
 	} else {
-		malloc_cprintf(write_cb, cbopaque,
-		    "decaying:  time       npages       sweeps     madvises"
-		    "       purged\n");
-		if (dirty_decay_ms >= 0) {
-			malloc_cprintf(write_cb, cbopaque,
-			    "   dirty: %5zd %12zu %12"FMTu64" %12"FMTu64" %12"
-			    FMTu64"\n", dirty_decay_ms, pdirty, dirty_npurge,
-			    dirty_nmadvise, dirty_purged);
-		} else {
-			malloc_cprintf(write_cb, cbopaque,
-			    "   dirty:   N/A %12zu %12"FMTu64" %12"FMTu64" %12"
-			    FMTu64"\n", pdirty, dirty_npurge, dirty_nmadvise,
-			    dirty_purged);
-		}
-		if (muzzy_decay_ms >= 0) {
-			malloc_cprintf(write_cb, cbopaque,
-			    "   muzzy: %5zd %12zu %12"FMTu64" %12"FMTu64" %12"
-			    FMTu64"\n", muzzy_decay_ms, pmuzzy, muzzy_npurge,
-			    muzzy_nmadvise, muzzy_purged);
-		} else {
-			malloc_cprintf(write_cb, cbopaque,
-			    "   muzzy:   N/A %12zu %12"FMTu64" %12"FMTu64" %12"
-			    FMTu64"\n", pmuzzy, muzzy_npurge, muzzy_nmadvise,
-			    muzzy_purged);
-		}
+		decay_time.type = emitter_type_title;
+		decay_time.str_val = "N/A";
+	}
+
+	decay_npages.type = emitter_type_size;
+	decay_npages.size_val = pdirty;
+
+	decay_sweeps.type = emitter_type_uint64;
+	decay_sweeps.uint64_val = dirty_npurge;
+
+	decay_madvises.type = emitter_type_uint64;
+	decay_madvises.uint64_val = dirty_nmadvise;
+
+	decay_purged.type = emitter_type_uint64;
+	decay_purged.uint64_val = dirty_purged;
+
+	emitter_table_row(emitter, &decay_row);
+
+	/* Muzzy row. */
+	decay_type.str_val = "muzzy:";
+
+	if (muzzy_decay_ms >= 0) {
+		decay_time.type = emitter_type_ssize;
+		decay_time.ssize_val = muzzy_decay_ms;
+	} else {
+		decay_time.type = emitter_type_title;
+		decay_time.str_val = "N/A";
+	}
+
+	decay_npages.type = emitter_type_size;
+	decay_npages.size_val = pmuzzy;
+
+	decay_sweeps.type = emitter_type_uint64;
+	decay_sweeps.uint64_val = muzzy_npurge;
+
+	decay_madvises.type = emitter_type_uint64;
+	decay_madvises.uint64_val = muzzy_nmadvise;
+
+	decay_purged.type = emitter_type_uint64;
+	decay_purged.uint64_val = muzzy_purged;
+
+	emitter_table_row(emitter, &decay_row);
+
+	if (json) {
+		malloc_cprintf(write_cb, cbopaque, ",\n");
 	}
 
 	CTL_M2_GET("stats.arenas.0.small.allocated", i, &small_allocated,

From 8fc850695dc70958cfeffd53e9d5df261697cff5 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 5 Mar 2018 16:52:34 -0800
Subject: [PATCH 1072/2608] Stats printing: convert paging and alloc counts to
 use the emitter.

---
 src/stats.c | 148 ++++++++++++++++++++++++++++------------------------
 1 file changed, 81 insertions(+), 67 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index 7b384df6..ceabdc3c 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -543,7 +543,6 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	emitter_json_kv(emitter, "muzzy_purged", emitter_type_uint64,
 	    &muzzy_purged);
 
-
 	/* Table-style emission. */
 	emitter_col_t decay_type;
 	emitter_col_init(&decay_type, &decay_row);
@@ -640,76 +639,91 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 
 	emitter_table_row(emitter, &decay_row);
 
+	/* Small / large / total allocation counts. */
+	emitter_row_t alloc_count_row;
+	emitter_row_init(&alloc_count_row);
+
+	emitter_col_t alloc_count_title;
+	emitter_col_init(&alloc_count_title, &alloc_count_row);
+	alloc_count_title.justify = emitter_justify_left;
+	alloc_count_title.width = 25;
+	alloc_count_title.type = emitter_type_title;
+	alloc_count_title.str_val = "";
+
+	emitter_col_t alloc_count_allocated;
+	emitter_col_init(&alloc_count_allocated, &alloc_count_row);
+	alloc_count_allocated.justify = emitter_justify_right;
+	alloc_count_allocated.width = 12;
+	alloc_count_allocated.type = emitter_type_title;
+	alloc_count_allocated.str_val = "allocated";
+
+	emitter_col_t alloc_count_nmalloc;
+	emitter_col_init(&alloc_count_nmalloc, &alloc_count_row);
+	alloc_count_nmalloc.justify = emitter_justify_right;
+	alloc_count_nmalloc.width = 12;
+	alloc_count_nmalloc.type = emitter_type_title;
+	alloc_count_nmalloc.str_val = "nmalloc";
+
+	emitter_col_t alloc_count_ndalloc;
+	emitter_col_init(&alloc_count_ndalloc, &alloc_count_row);
+	alloc_count_ndalloc.justify = emitter_justify_right;
+	alloc_count_ndalloc.width = 12;
+	alloc_count_ndalloc.type = emitter_type_title;
+	alloc_count_ndalloc.str_val = "ndalloc";
+
+	emitter_col_t alloc_count_nrequests;
+	emitter_col_init(&alloc_count_nrequests, &alloc_count_row);
+	alloc_count_nrequests.justify = emitter_justify_right;
+	alloc_count_nrequests.width = 12;
+	alloc_count_nrequests.type = emitter_type_title;
+	alloc_count_nrequests.str_val = "nrequests";
+
+	emitter_table_row(emitter, &alloc_count_row);
+
+#define GET_AND_EMIT_ALLOC_STAT(small_or_large, name, valtype)		\
+	CTL_M2_GET("stats.arenas.0." #small_or_large "." #name, i,	\
+	    &small_or_large##_##name, valtype##_t);			\
+	emitter_json_kv(emitter, #name, emitter_type_##valtype,		\
+	    &small_or_large##_##name);					\
+	alloc_count_##name.type = emitter_type_##valtype;		\
+	alloc_count_##name.valtype##_val = small_or_large##_##name;
+
+	emitter_json_dict_begin(emitter, "small");
+	alloc_count_title.str_val = "small:";
+
+	GET_AND_EMIT_ALLOC_STAT(small, allocated, size)
+	GET_AND_EMIT_ALLOC_STAT(small, nmalloc, uint64)
+	GET_AND_EMIT_ALLOC_STAT(small, ndalloc, uint64)
+	GET_AND_EMIT_ALLOC_STAT(small, nrequests, uint64)
+
+	emitter_table_row(emitter, &alloc_count_row);
+	emitter_json_dict_end(emitter); /* Close "small". */
+
+	emitter_json_dict_begin(emitter, "large");
+	alloc_count_title.str_val = "large:";
+
+	GET_AND_EMIT_ALLOC_STAT(large, allocated, size)
+	GET_AND_EMIT_ALLOC_STAT(large, nmalloc, uint64)
+	GET_AND_EMIT_ALLOC_STAT(large, ndalloc, uint64)
+	GET_AND_EMIT_ALLOC_STAT(large, nrequests, uint64)
+
+	emitter_table_row(emitter, &alloc_count_row);
+	emitter_json_dict_end(emitter); /* Close "large". */
+
+#undef GET_AND_EMIT_ALLOC_STAT
+
+	/* Aggregated small + large stats are emitter only in table mode. */
+	alloc_count_title.str_val = "total:";
+	alloc_count_allocated.size_val = small_allocated + large_allocated;
+	alloc_count_nmalloc.uint64_val = small_nmalloc + large_nmalloc;
+	alloc_count_ndalloc.uint64_val = small_ndalloc + large_ndalloc;
+	alloc_count_nrequests.uint64_val = small_nrequests + large_nrequests;
+	emitter_table_row(emitter, &alloc_count_row);
+
 	if (json) {
 		malloc_cprintf(write_cb, cbopaque, ",\n");
 	}
 
-	CTL_M2_GET("stats.arenas.0.small.allocated", i, &small_allocated,
-	    size_t);
-	CTL_M2_GET("stats.arenas.0.small.nmalloc", i, &small_nmalloc, uint64_t);
-	CTL_M2_GET("stats.arenas.0.small.ndalloc", i, &small_ndalloc, uint64_t);
-	CTL_M2_GET("stats.arenas.0.small.nrequests", i, &small_nrequests,
-	    uint64_t);
-	if (json) {
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\"small\": {\n");
-
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\t\"allocated\": %zu,\n", small_allocated);
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\t\"nmalloc\": %"FMTu64",\n", small_nmalloc);
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\t\"ndalloc\": %"FMTu64",\n", small_ndalloc);
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\t\"nrequests\": %"FMTu64"\n", small_nrequests);
-
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t},\n");
-	} else {
-		malloc_cprintf(write_cb, cbopaque,
-		    "                            allocated      nmalloc"
-		    "      ndalloc    nrequests\n");
-		malloc_cprintf(write_cb, cbopaque,
-		    "small:                   %12zu %12"FMTu64" %12"FMTu64
-		    " %12"FMTu64"\n",
-		    small_allocated, small_nmalloc, small_ndalloc,
-		    small_nrequests);
-	}
-
-	CTL_M2_GET("stats.arenas.0.large.allocated", i, &large_allocated,
-	    size_t);
-	CTL_M2_GET("stats.arenas.0.large.nmalloc", i, &large_nmalloc, uint64_t);
-	CTL_M2_GET("stats.arenas.0.large.ndalloc", i, &large_ndalloc, uint64_t);
-	CTL_M2_GET("stats.arenas.0.large.nrequests", i, &large_nrequests,
-	    uint64_t);
-	if (json) {
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\"large\": {\n");
-
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\t\"allocated\": %zu,\n", large_allocated);
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\t\"nmalloc\": %"FMTu64",\n", large_nmalloc);
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\t\"ndalloc\": %"FMTu64",\n", large_ndalloc);
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\t\"nrequests\": %"FMTu64"\n", large_nrequests);
-
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t},\n");
-	} else {
-		malloc_cprintf(write_cb, cbopaque,
-		    "large:                   %12zu %12"FMTu64" %12"FMTu64
-		    " %12"FMTu64"\n",
-		    large_allocated, large_nmalloc, large_ndalloc,
-		    large_nrequests);
-		malloc_cprintf(write_cb, cbopaque,
-		    "total:                   %12zu %12"FMTu64" %12"FMTu64
-		    " %12"FMTu64"\n",
-		    small_allocated + large_allocated, small_nmalloc +
-		    large_nmalloc, small_ndalloc + large_ndalloc,
-		    small_nrequests + large_nrequests);
-	}
 	if (!json) {
 		malloc_cprintf(write_cb, cbopaque,
 		    "active:                  %12zu\n", pactive * page);

From 07fb707623de5da5b58c448683a3f71df67531c9 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 5 Mar 2018 17:58:02 -0800
Subject: [PATCH 1073/2608] Stats printing: convert most per-arena stats to use
 the emitter.

---
 src/stats.c | 102 ++++++++++++++++++++--------------------------------
 1 file changed, 39 insertions(+), 63 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index ceabdc3c..3e91a3ff 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -720,77 +720,53 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	alloc_count_nrequests.uint64_val = small_nrequests + large_nrequests;
 	emitter_table_row(emitter, &alloc_count_row);
 
-	if (json) {
-		malloc_cprintf(write_cb, cbopaque, ",\n");
-	}
+	emitter_row_t mem_count_row;
+	emitter_row_init(&mem_count_row);
 
-	if (!json) {
-		malloc_cprintf(write_cb, cbopaque,
-		    "active:                  %12zu\n", pactive * page);
-	}
+	emitter_col_t mem_count_title;
+	emitter_col_init(&mem_count_title, &mem_count_row);
+	mem_count_title.justify = emitter_justify_left;
+	mem_count_title.width = 25;
+	mem_count_title.type = emitter_type_title;
+	mem_count_title.str_val = "";
 
-	CTL_M2_GET("stats.arenas.0.mapped", i, &mapped, size_t);
-	if (json) {
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\"mapped\": %zu,\n", mapped);
-	} else {
-		malloc_cprintf(write_cb, cbopaque,
-		    "mapped:                  %12zu\n", mapped);
-	}
+	emitter_col_t mem_count_val;
+	emitter_col_init(&mem_count_val, &mem_count_row);
+	mem_count_val.justify = emitter_justify_right;
+	mem_count_val.width = 12;
+	mem_count_val.type = emitter_type_title;
+	mem_count_val.str_val = "";
 
-	CTL_M2_GET("stats.arenas.0.retained", i, &retained, size_t);
-	if (json) {
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\"retained\": %zu,\n", retained);
-	} else {
-		malloc_cprintf(write_cb, cbopaque,
-		    "retained:                %12zu\n", retained);
-	}
+	emitter_table_row(emitter, &mem_count_row);
+	mem_count_val.type = emitter_type_size;
 
-	CTL_M2_GET("stats.arenas.0.base", i, &base, size_t);
-	if (json) {
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\"base\": %zu,\n", base);
-	} else {
-		malloc_cprintf(write_cb, cbopaque,
-		    "base:                    %12zu\n", base);
-	}
+	/* Active count in bytes is emitted only in table mode. */
+	mem_count_title.str_val = "active:";
+	mem_count_val.size_val = pactive * page;
+	emitter_table_row(emitter, &mem_count_row);
 
-	CTL_M2_GET("stats.arenas.0.internal", i, &internal, size_t);
-	if (json) {
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\"internal\": %zu,\n", internal);
-	} else {
-		malloc_cprintf(write_cb, cbopaque,
-		    "internal:                %12zu\n", internal);
-	}
+#define GET_AND_EMIT_MEM_STAT(stat)					\
+	CTL_M2_GET("stats.arenas.0."#stat, i, &stat, size_t);		\
+	emitter_json_kv(emitter, #stat, emitter_type_size, &stat);	\
+	mem_count_title.str_val = #stat":";				\
+	mem_count_val.size_val = stat;					\
+	emitter_table_row(emitter, &mem_count_row);
 
-	CTL_M2_GET("stats.arenas.0.metadata_thp", i, &metadata_thp, size_t);
-	if (json) {
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\"metadata_thp\": %zu,\n", metadata_thp);
-	} else {
-		malloc_cprintf(write_cb, cbopaque,
-		    "metadata_thp:            %12zu\n", metadata_thp);
-	}
+	GET_AND_EMIT_MEM_STAT(mapped)
+	GET_AND_EMIT_MEM_STAT(retained)
+	GET_AND_EMIT_MEM_STAT(base)
+	GET_AND_EMIT_MEM_STAT(internal)
+	GET_AND_EMIT_MEM_STAT(metadata_thp)
+	GET_AND_EMIT_MEM_STAT(tcache_bytes)
+	GET_AND_EMIT_MEM_STAT(resident)
+#undef GET_AND_EMIT_MEM_STAT
 
-	CTL_M2_GET("stats.arenas.0.tcache_bytes", i, &tcache_bytes, size_t);
 	if (json) {
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\"tcache\": %zu,\n", tcache_bytes);
-	} else {
-		malloc_cprintf(write_cb, cbopaque,
-		    "tcache:                  %12zu\n", tcache_bytes);
-	}
-
-	CTL_M2_GET("stats.arenas.0.resident", i, &resident, size_t);
-	if (json) {
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\"resident\": %zu%s\n", resident,
-		    (bins || large || mutex) ? "," : "");
-	} else {
-		malloc_cprintf(write_cb, cbopaque,
-		    "resident:                %12zu\n", resident);
+		if (bins || large || mutex) {
+			malloc_cprintf(write_cb, cbopaque, ",\n");
+		} else {
+			malloc_cprintf(write_cb, cbopaque, "\n");
+		}
 	}
 
 	if (mutex) {

From a1738f4efd7cfdaec576e54df90422e36cc6a8df Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 5 Mar 2018 18:46:44 -0800
Subject: [PATCH 1074/2608] Stats printing: Make arena mutex stats use the
 emitter.

---
 src/stats.c | 168 ++++++++++++++++++++++------------------------------
 1 file changed, 71 insertions(+), 97 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index 3e91a3ff..0d81584e 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -131,6 +131,52 @@ mutex_stats_init_row(emitter_row_t *row, const char *table_name,
 #undef WIDTH_uint64_t
 }
 
+static void
+mutex_stats_read_global(const char *name, emitter_col_t *col_name,
+    emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters],
+    emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters]) {
+	char cmd[MUTEX_CTL_STR_MAX_LENGTH];
+
+	col_name->str_val = name;
+
+	emitter_col_t *dst;
+#define EMITTER_TYPE_uint32_t emitter_type_uint32
+#define EMITTER_TYPE_uint64_t emitter_type_uint64
+#define OP(counter, counter_type, human)				\
+	dst = &col_##counter_type[mutex_counter_##counter];		\
+	dst->type = EMITTER_TYPE_##counter_type;			\
+	gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,		\
+	    "mutexes", name, #counter);					\
+	CTL_GET(cmd, (counter_type *)&dst->bool_val, counter_type);
+	MUTEX_PROF_COUNTERS
+#undef OP
+#undef EMITTER_TYPE_uint32_t
+#undef EMITTER_TYPE_uint64_t
+}
+
+static void
+mutex_stats_read_arena(unsigned arena_ind, mutex_prof_arena_ind_t mutex_ind,
+    const char *name, emitter_col_t *col_name,
+    emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters],
+    emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters]) {
+	char cmd[MUTEX_CTL_STR_MAX_LENGTH];
+
+	col_name->str_val = name;
+
+	emitter_col_t *dst;
+#define EMITTER_TYPE_uint32_t emitter_type_uint32
+#define EMITTER_TYPE_uint64_t emitter_type_uint64
+#define OP(counter, counter_type, human)				\
+	dst = &col_##counter_type[mutex_counter_##counter];		\
+	dst->type = EMITTER_TYPE_##counter_type;			\
+	gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,		\
+	    "arenas.0.mutexes",	arena_mutex_names[mutex_ind], #counter);\
+	CTL_M2_GET(cmd, arena_ind,					\
+	    (counter_type *)&dst->bool_val, counter_type);
+	MUTEX_PROF_COUNTERS
+#undef OP
+}
+
 static void
 mutex_stats_emit(emitter_t *emitter, emitter_row_t *row,
     emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters],
@@ -392,76 +438,27 @@ stats_arena_lextents_print(void (*write_cb)(void *, const char *),
 }
 
 static void
-read_arena_mutex_stats(unsigned arena_ind,
-    uint64_t results_uint64_t[mutex_prof_num_arena_mutexes][mutex_prof_num_uint64_t_counters],
-	uint32_t results_uint32_t[mutex_prof_num_arena_mutexes][mutex_prof_num_uint32_t_counters]) {
-	char cmd[MUTEX_CTL_STR_MAX_LENGTH];
+stats_arena_mutexes_print(emitter_t *emitter, unsigned arena_ind) {
+	emitter_row_t row;
+	emitter_col_t col_name;
+	emitter_col_t col64[mutex_prof_num_uint64_t_counters];
+	emitter_col_t col32[mutex_prof_num_uint32_t_counters];
 
-	mutex_prof_arena_ind_t i;
-	for (i = 0; i < mutex_prof_num_arena_mutexes; i++) {
-#define OP(c, t, human)							\
-		gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,	\
-		    "arenas.0.mutexes",	arena_mutex_names[i], #c);	\
-		CTL_M2_GET(cmd, arena_ind,				\
-		    (t *)&results_##t[i][mutex_counter_##c], t);
-MUTEX_PROF_COUNTERS
-#undef OP
-	}
-}
-
-static void
-mutex_stats_output(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *name, uint64_t stats_uint64_t[mutex_prof_num_uint64_t_counters],
-    uint32_t stats_uint32_t[mutex_prof_num_uint32_t_counters],
-    bool first_mutex) {
-	if (first_mutex) {
-		/* Print title. */
-		malloc_cprintf(write_cb, cbopaque,
-		    "                           n_lock_ops       n_waiting"
-		    "      n_spin_acq  n_owner_switch   total_wait_ns"
-		    "     max_wait_ns  max_n_thds\n");
-	}
-
-	malloc_cprintf(write_cb, cbopaque, "%s", name);
-	malloc_cprintf(write_cb, cbopaque, ":%*c",
-	    (int)(20 - strlen(name)), ' ');
-
-	char *fmt_str[2] = {"%12"FMTu32, "%16"FMTu64};
-#define OP(c, t, human)							\
-	malloc_cprintf(write_cb, cbopaque,				\
-	    fmt_str[sizeof(t) / sizeof(uint32_t) - 1],			\
-	    (t)stats_##t[mutex_counter_##c]);
-MUTEX_PROF_COUNTERS
-#undef OP
-	malloc_cprintf(write_cb, cbopaque, "\n");
-}
-
-static void
-stats_arena_mutexes_print(void (*write_cb)(void *, const char *),
-    void *cbopaque, bool json, bool json_end, unsigned arena_ind) {
-	uint64_t mutex_stats_64[mutex_prof_num_arena_mutexes][mutex_prof_num_uint64_t_counters];
-	uint32_t mutex_stats_32[mutex_prof_num_arena_mutexes][mutex_prof_num_uint32_t_counters];
-	read_arena_mutex_stats(arena_ind, mutex_stats_64, mutex_stats_32);
-
-	/* Output mutex stats. */
-	if (json) {
-		malloc_cprintf(write_cb, cbopaque, "\t\t\t\t\"mutexes\": {\n");
-		mutex_prof_arena_ind_t i, last_mutex;
-		last_mutex = mutex_prof_num_arena_mutexes - 1;
-		for (i = 0; i < mutex_prof_num_arena_mutexes; i++) {
-			mutex_stats_output_json(write_cb, cbopaque,
-			    arena_mutex_names[i], mutex_stats_64[i], mutex_stats_32[i],
-			    "\t\t\t\t\t", (i == last_mutex));
-		}
-		malloc_cprintf(write_cb, cbopaque, "\t\t\t\t}%s\n",
-		    json_end ? "" : ",");
-	} else {
-		mutex_prof_arena_ind_t i;
-		for (i = 0; i < mutex_prof_num_arena_mutexes; i++) {
-			mutex_stats_output(write_cb, cbopaque,
-			    arena_mutex_names[i], mutex_stats_64[i],  mutex_stats_32[i], i == 0);
-		}
+	mutex_stats_init_row(&row, "", &col_name, col64, col32);
+
+	emitter_json_dict_begin(emitter, "mutexes");
+	emitter_table_row(emitter, &row);
+
+	for (mutex_prof_arena_ind_t i = 0; i < mutex_prof_num_arena_mutexes;
+	    i++) {
+		const char *name = arena_mutex_names[i];
+		emitter_json_dict_begin(emitter, name);
+		mutex_stats_read_arena(arena_ind, i, name, &col_name, col64,
+		    col32);
+		mutex_stats_emit(emitter, &row, col64, col32);
+		emitter_json_dict_end(emitter); /* Close the mutex dict. */
 	}
+	emitter_json_dict_end(emitter); /* End "mutexes". */
 }
 
 static void
@@ -761,18 +758,19 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	GET_AND_EMIT_MEM_STAT(resident)
 #undef GET_AND_EMIT_MEM_STAT
 
+	if (mutex) {
+		stats_arena_mutexes_print(emitter, i);
+	}
+
+	/* Emitter conversion point. */
 	if (json) {
-		if (bins || large || mutex) {
+		if (bins || large) {
 			malloc_cprintf(write_cb, cbopaque, ",\n");
 		} else {
 			malloc_cprintf(write_cb, cbopaque, "\n");
 		}
 	}
 
-	if (mutex) {
-		stats_arena_mutexes_print(write_cb, cbopaque, json,
-		    !(bins || large), i);
-	}
 	if (bins) {
 		stats_arena_bins_print(write_cb, cbopaque, json, large, mutex,
 		    i);
@@ -1020,29 +1018,6 @@ stats_general_print(emitter_t *emitter) {
 	emitter_json_dict_end(emitter); /* Close "arenas" */
 }
 
-static void
-mutex_stats_read_global(const char *name, emitter_col_t *col_name,
-    emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters],
-    emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters]) {
-	char cmd[MUTEX_CTL_STR_MAX_LENGTH];
-
-	col_name->str_val = name;
-
-	emitter_col_t *dst;
-#define EMITTER_TYPE_uint32_t emitter_type_uint32
-#define EMITTER_TYPE_uint64_t emitter_type_uint64
-#define OP(counter, counter_type, human)				\
-	dst = &col_##counter_type[mutex_counter_##counter];		\
-	dst->type = EMITTER_TYPE_##counter_type;			\
-	gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,		\
-	    "mutexes", name, #counter);					\
-	CTL_GET(cmd, (counter_type *)&dst->bool_val, counter_type);
-	MUTEX_PROF_COUNTERS
-#undef OP
-#undef EMITTER_TYPE_uint32_t
-#undef EMITTER_TYPE_uint64_t
-}
-
 static void
 stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
     bool unmerged, bool bins, bool large, bool mutex) {
@@ -1198,7 +1173,6 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 				}
 			}
 		}
-
 		emitter_json_dict_end(emitter); /* Close "stats.arenas". */
 	}
 }

From a9f3cedc6ed6e923854edc5feddd42a39941f01c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 5 Mar 2018 18:49:58 -0800
Subject: [PATCH 1075/2608] Stats printing: remove a spurious newline.

This was left over from a previous emitter conversion.  It didn't affect the
correctness of the output.
---
 src/stats.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index 0d81584e..05a452fd 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1231,9 +1231,6 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	if (general) {
 		stats_general_print(&emitter);
 	}
-	if (json) {
-		malloc_cprintf(write_cb, cbopaque, "\n");
-	}
 	if (config_stats) {
 		stats_print_helper(&emitter, merged, destroyed, unmerged,
 		    bins, large, mutex);

From 4eed989bbfb7c56bdea97169ca07f9a7b7f14f27 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 6 Mar 2018 18:50:53 -0800
Subject: [PATCH 1076/2608] Stats printing: convert arena bin stats to use
 emitter.

---
 src/stats.c | 328 +++++++++++++++++++++++++++++-----------------------
 1 file changed, 181 insertions(+), 147 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index 05a452fd..fac54ad3 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -85,21 +85,7 @@ gen_mutex_ctl_str(char *str, size_t buf_len, const char *prefix,
 }
 
 static void
-read_arena_bin_mutex_stats(unsigned arena_ind, unsigned bin_ind,
-    uint64_t results_uint64_t[mutex_prof_num_uint64_t_counters],
-	uint32_t results_uint32_t[mutex_prof_num_uint32_t_counters]) {
-	char cmd[MUTEX_CTL_STR_MAX_LENGTH];
-#define OP(c, t, human)							\
-    gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,			\
-        "arenas.0.bins.0","mutex", #c);					\
-    CTL_M2_M4_GET(cmd, arena_ind, bin_ind,				\
-        (t *)&results_##t[mutex_counter_##c], t);
-	MUTEX_PROF_COUNTERS
-#undef OP
-}
-
-static void
-mutex_stats_init_row(emitter_row_t *row, const char *table_name,
+mutex_stats_init_cols(emitter_row_t *row, const char *table_name,
     emitter_col_t *name,
     emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters],
     emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters]) {
@@ -108,12 +94,13 @@ mutex_stats_init_row(emitter_row_t *row, const char *table_name,
 
 	emitter_col_t *col;
 
-	emitter_row_init(row);
-	emitter_col_init(name, row);
-	name->justify = emitter_justify_left;
-	name->width = 21;
-	name->type = emitter_type_title;
-	name->str_val = table_name;
+	if (name != NULL) {
+		emitter_col_init(name, row);
+		name->justify = emitter_justify_left;
+		name->width = 21;
+		name->type = emitter_type_title;
+		name->str_val = table_name;
+	}
 
 #define WIDTH_uint32_t 12
 #define WIDTH_uint64_t 16
@@ -175,13 +162,40 @@ mutex_stats_read_arena(unsigned arena_ind, mutex_prof_arena_ind_t mutex_ind,
 	    (counter_type *)&dst->bool_val, counter_type);
 	MUTEX_PROF_COUNTERS
 #undef OP
+#undef EMITTER_TYPE_uint32_t
+#undef EMITTER_TYPE_uint64_t
 }
 
+static void
+mutex_stats_read_arena_bin(unsigned arena_ind, unsigned bin_ind,
+    emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters],
+    emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters]) {
+	char cmd[MUTEX_CTL_STR_MAX_LENGTH];
+	emitter_col_t *dst;
+
+#define EMITTER_TYPE_uint32_t emitter_type_uint32
+#define EMITTER_TYPE_uint64_t emitter_type_uint64
+#define OP(counter, counter_type, human)				\
+	dst = &col_##counter_type[mutex_counter_##counter];		\
+	dst->type = EMITTER_TYPE_##counter_type;			\
+	gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,		\
+	    "arenas.0.bins.0","mutex", #counter);			\
+	CTL_M2_M4_GET(cmd, arena_ind, bin_ind,				\
+	    (counter_type *)&dst->bool_val, counter_type);
+	MUTEX_PROF_COUNTERS
+#undef OP
+#undef EMITTER_TYPE_uint32_t
+#undef EMITTER_TYPE_uint64_t
+}
+
+/* "row" can be NULL to avoid emitting in table mode. */
 static void
 mutex_stats_emit(emitter_t *emitter, emitter_row_t *row,
     emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters],
     emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters]) {
-	emitter_table_row(emitter, row);
+	if (row != NULL) {
+		emitter_table_row(emitter, row);
+	}
 
 	mutex_prof_uint64_t_counter_ind_t k_uint64_t = 0;
 	mutex_prof_uint32_t_counter_ind_t k_uint32_t = 0;
@@ -202,31 +216,7 @@ mutex_stats_emit(emitter_t *emitter, emitter_row_t *row,
 }
 
 static void
-mutex_stats_output_json(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *name, uint64_t stats_uint64_t[mutex_prof_num_uint64_t_counters],
-    uint32_t stats_uint32_t[mutex_prof_num_uint32_t_counters],
-    const char *json_indent, bool last) {
-	malloc_cprintf(write_cb, cbopaque, "%s\"%s\": {\n", json_indent, name);
-
-	mutex_prof_uint64_t_counter_ind_t k_uint64_t = 0;
-	mutex_prof_uint32_t_counter_ind_t k_uint32_t = 0;
-	char *fmt_str[2] = {"%s\t\"%s\": %"FMTu32"%s\n",
-	    "%s\t\"%s\": %"FMTu64"%s\n"};
-#define OP(c, t, human)							\
-	malloc_cprintf(write_cb, cbopaque,				\
-	    fmt_str[sizeof(t) / sizeof(uint32_t) - 1], 			\
-	    json_indent, #c, (t)stats_##t[mutex_counter_##c],		\
-	    (++k_##t && k_uint32_t == mutex_prof_num_uint32_t_counters) ? "" : ",");
-	MUTEX_PROF_COUNTERS
-#undef OP
-
-malloc_cprintf(write_cb, cbopaque, "%s}%s\n", json_indent,
-	    last ? "" : ",");
-}
-
-static void
-stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
-    bool json, bool large, bool mutex, unsigned i) {
+stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i) {
 	size_t page;
 	bool in_gap, in_gap_prev;
 	unsigned nbins, j;
@@ -234,19 +224,71 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	CTL_GET("arenas.page", &page, size_t);
 
 	CTL_GET("arenas.nbins", &nbins, unsigned);
-	if (json) {
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\"bins\": [\n");
-	} else {
-		char *mutex_counters = "   n_lock_ops    n_waiting"
-		    "   n_spin_acq n_owner_switch  total_wait_ns"
-		    "  max_wait_ns max_n_thds\n";
-		malloc_cprintf(write_cb, cbopaque,
-		    "bins:           size ind    allocated      nmalloc"
-		    "      ndalloc    nrequests      curregs     curslabs regs"
-		    " pgs  util       nfills     nflushes     newslabs"
-		    "      reslabs%s", mutex ? mutex_counters : "\n");
+
+	emitter_row_t header_row;
+	emitter_row_init(&header_row);
+
+	emitter_row_t row;
+	emitter_row_init(&row);
+#define COL(name, left_or_right, col_width, etype)			\
+	emitter_col_t col_##name;					\
+	emitter_col_init(&col_##name, &row);				\
+	col_##name.justify = emitter_justify_##left_or_right;		\
+	col_##name.width = col_width;					\
+	col_##name.type = emitter_type_##etype;				\
+	emitter_col_t header_col_##name;				\
+	emitter_col_init(&header_col_##name, &header_row);		\
+	header_col_##name.justify = emitter_justify_##left_or_right;	\
+	header_col_##name.width = col_width;				\
+	header_col_##name.type = emitter_type_title;			\
+	header_col_##name.str_val = #name;
+
+	COL(size, right, 20, size)
+	COL(ind, right, 4, unsigned)
+	COL(allocated, right, 13, uint64)
+	COL(nmalloc, right, 13, uint64)
+	COL(ndalloc, right, 13, uint64)
+	COL(nrequests, right, 13, uint64)
+	COL(curregs, right, 13, size)
+	COL(curslabs, right, 13, size)
+	COL(regs, right, 5, unsigned)
+	COL(pgs, right, 4, size)
+	/* To buffer a right- and left-justified column. */
+	COL(justify_spacer, right, 1, title)
+	COL(util, right, 6, title)
+	COL(nfills, right, 13, uint64)
+	COL(nflushes, right, 13, uint64)
+	COL(nslabs, right, 13, uint64)
+	COL(nreslabs, right, 13, uint64)
+#undef COL
+
+	/* Don't want to actually print the name. */
+	header_col_justify_spacer.str_val = " ";
+	col_justify_spacer.str_val = " ";
+
+
+	emitter_col_t col_mutex64[mutex_prof_num_uint64_t_counters];
+	emitter_col_t col_mutex32[mutex_prof_num_uint32_t_counters];
+
+	emitter_col_t header_mutex64[mutex_prof_num_uint64_t_counters];
+	emitter_col_t header_mutex32[mutex_prof_num_uint32_t_counters];
+
+	if (mutex) {
+		mutex_stats_init_cols(&row, NULL, NULL, col_mutex64,
+		    col_mutex32);
+		mutex_stats_init_cols(&header_row, NULL, NULL, header_mutex64,
+		    header_mutex32);
 	}
+
+	/*
+	 * We print a "bins:" header as part of the table row; we need to adjust
+	 * the header size column to compensate.
+	 */
+	header_col_size.width -=5;
+	emitter_table_printf(emitter, "bins:");
+	emitter_table_row(emitter, &header_row);
+	emitter_json_arr_begin(emitter, "bins");
+
 	for (j = 0, in_gap = false; j < nbins; j++) {
 		uint64_t nslabs;
 		size_t reg_size, slab_size, curregs;
@@ -260,8 +302,8 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		in_gap_prev = in_gap;
 		in_gap = (nslabs == 0);
 
-		if (!json && in_gap_prev && !in_gap) {
-			malloc_cprintf(write_cb, cbopaque,
+		if (in_gap_prev && !in_gap) {
+			emitter_table_printf(emitter,
 			    "                     ---\n");
 		}
 
@@ -286,90 +328,82 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		CTL_M2_M4_GET("stats.arenas.0.bins.0.curslabs", i, j, &curslabs,
 		    size_t);
 
-		if (json) {
-			malloc_cprintf(write_cb, cbopaque,
-			    "\t\t\t\t\t{\n"
-			    "\t\t\t\t\t\t\"nmalloc\": %"FMTu64",\n"
-			    "\t\t\t\t\t\t\"ndalloc\": %"FMTu64",\n"
-			    "\t\t\t\t\t\t\"curregs\": %zu,\n"
-			    "\t\t\t\t\t\t\"nrequests\": %"FMTu64",\n"
-			    "\t\t\t\t\t\t\"nfills\": %"FMTu64",\n"
-			    "\t\t\t\t\t\t\"nflushes\": %"FMTu64",\n"
-			    "\t\t\t\t\t\t\"nreslabs\": %"FMTu64",\n"
-			    "\t\t\t\t\t\t\"curslabs\": %zu%s\n",
-			    nmalloc, ndalloc, curregs, nrequests, nfills,
-			    nflushes, nreslabs, curslabs, mutex ? "," : "");
-			if (mutex) {
-				uint64_t mutex_stats_64[mutex_prof_num_uint64_t_counters];
-				uint32_t mutex_stats_32[mutex_prof_num_uint32_t_counters];
-				read_arena_bin_mutex_stats(i, j, mutex_stats_64, mutex_stats_32);
-				mutex_stats_output_json(write_cb, cbopaque,
-				    "mutex", mutex_stats_64, mutex_stats_32, "\t\t\t\t\t\t", true);
-			}
-			malloc_cprintf(write_cb, cbopaque,
-			    "\t\t\t\t\t}%s\n",
-			    (j + 1 < nbins) ? "," : "");
-		} else if (!in_gap) {
-			size_t availregs = nregs * curslabs;
-			char util[6];
-			if (get_rate_str((uint64_t)curregs, (uint64_t)availregs,
-			    util)) {
-				if (availregs == 0) {
-					malloc_snprintf(util, sizeof(util),
-					    "1");
-				} else if (curregs > availregs) {
-					/*
-					 * Race detected: the counters were read
-					 * in separate mallctl calls and
-					 * concurrent operations happened in
-					 * between. In this case no meaningful
-					 * utilization can be computed.
-					 */
-					malloc_snprintf(util, sizeof(util),
-					    " race");
-				} else {
-					not_reached();
-				}
-			}
-			uint64_t mutex_stats_64[mutex_prof_num_uint64_t_counters];
-			uint32_t mutex_stats_32[mutex_prof_num_uint32_t_counters];
-			if (mutex) {
-				read_arena_bin_mutex_stats(i, j, mutex_stats_64, mutex_stats_32);
-			}
+		if (mutex) {
+			mutex_stats_read_arena_bin(i, j, col_mutex64,
+			    col_mutex32);
+		}
 
-			malloc_cprintf(write_cb, cbopaque, "%20zu %3u %12zu %12"
-			    FMTu64" %12"FMTu64" %12"FMTu64" %12zu %12zu %4u"
-			    " %3zu %-5s %12"FMTu64" %12"FMTu64" %12"FMTu64
-			    " %12"FMTu64, reg_size, j, curregs * reg_size,
-			    nmalloc, ndalloc, nrequests, curregs, curslabs,
-			    nregs, slab_size / page, util, nfills, nflushes,
-			    nslabs, nreslabs);
+		emitter_json_arr_obj_begin(emitter);
+		emitter_json_kv(emitter, "nmalloc", emitter_type_uint64,
+		    &nmalloc);
+		emitter_json_kv(emitter, "ndalloc", emitter_type_uint64,
+		    &ndalloc);
+		emitter_json_kv(emitter, "curregs", emitter_type_size,
+		    &curregs);
+		emitter_json_kv(emitter, "nrequests", emitter_type_uint64,
+		    &nrequests);
+		emitter_json_kv(emitter, "nfills", emitter_type_uint64,
+		    &nfills);
+		emitter_json_kv(emitter, "nflushes", emitter_type_uint64,
+		    &nflushes);
+		emitter_json_kv(emitter, "nreslabs", emitter_type_uint64,
+		    &nreslabs);
+		emitter_json_kv(emitter, "curslabs", emitter_type_size,
+		    &curslabs);
+		if (mutex) {
+			emitter_json_dict_begin(emitter, "mutex");
+			mutex_stats_emit(emitter, NULL, col_mutex64,
+			    col_mutex32);
+			emitter_json_dict_end(emitter);
+		}
+		emitter_json_arr_obj_end(emitter);
 
-			if (mutex) {
-				malloc_cprintf(write_cb, cbopaque,
-				    " %12"FMTu64" %12"FMTu64" %12"FMTu64
-				    " %14"FMTu64" %14"FMTu64" %12"FMTu64
-				    " %10"FMTu32"\n",
-					mutex_stats_64[mutex_counter_num_ops],
-					mutex_stats_64[mutex_counter_num_wait],
-					mutex_stats_64[mutex_counter_num_spin_acq],
-					mutex_stats_64[mutex_counter_num_owner_switch],
-					mutex_stats_64[mutex_counter_total_wait_time],
-					mutex_stats_64[mutex_counter_max_wait_time],
-					mutex_stats_32[mutex_counter_max_num_thds]);
+		size_t availregs = nregs * curslabs;
+		char util[6];
+		if (get_rate_str((uint64_t)curregs, (uint64_t)availregs, util))
+		{
+			if (availregs == 0) {
+				malloc_snprintf(util, sizeof(util), "1");
+			} else if (curregs > availregs) {
+				/*
+				 * Race detected: the counters were read in
+				 * separate mallctl calls and concurrent
+				 * operations happened in between.  In this case
+				 * no meaningful utilization can be computed.
+				 */
+				malloc_snprintf(util, sizeof(util), " race");
 			} else {
-				malloc_cprintf(write_cb, cbopaque, "\n");
+				not_reached();
 			}
 		}
+
+		col_size.size_val = reg_size;
+		col_ind.unsigned_val = j;
+		col_allocated.size_val = curregs * reg_size;
+		col_nmalloc.uint64_val = nmalloc;
+		col_ndalloc.uint64_val = ndalloc;
+		col_nrequests.uint64_val = nrequests;
+		col_curregs.size_val = curregs;
+		col_curslabs.size_val = curslabs;
+		col_regs.unsigned_val = nregs;
+		col_pgs.size_val = slab_size / page;
+		col_util.str_val = util;
+		col_nfills.uint64_val = nfills;
+		col_nflushes.uint64_val = nflushes;
+		col_nslabs.uint64_val = nslabs;
+		col_nreslabs.uint64_val = nreslabs;
+
+		/*
+		 * Note that mutex columns were initialized above, if mutex ==
+		 * true.
+		 */
+
+		emitter_table_row(emitter, &row);
 	}
-	if (json) {
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t]%s\n", large ? "," : "");
-	} else {
-		if (in_gap) {
-			malloc_cprintf(write_cb, cbopaque,
-			    "                     ---\n");
-		}
+	emitter_json_arr_end(emitter); /* Close "bins". */
+
+	if (in_gap) {
+		emitter_table_printf(emitter, "                     ---\n");
 	}
 }
 
@@ -444,7 +478,8 @@ stats_arena_mutexes_print(emitter_t *emitter, unsigned arena_ind) {
 	emitter_col_t col64[mutex_prof_num_uint64_t_counters];
 	emitter_col_t col32[mutex_prof_num_uint32_t_counters];
 
-	mutex_stats_init_row(&row, "", &col_name, col64, col32);
+	emitter_row_init(&row);
+	mutex_stats_init_cols(&row, "", &col_name, col64, col32);
 
 	emitter_json_dict_begin(emitter, "mutexes");
 	emitter_table_row(emitter, &row);
@@ -761,20 +796,18 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	if (mutex) {
 		stats_arena_mutexes_print(emitter, i);
 	}
+	if (bins) {
+		stats_arena_bins_print(emitter, mutex, i);
+	}
 
 	/* Emitter conversion point. */
 	if (json) {
-		if (bins || large) {
+		if (large) {
 			malloc_cprintf(write_cb, cbopaque, ",\n");
 		} else {
 			malloc_cprintf(write_cb, cbopaque, "\n");
 		}
 	}
-
-	if (bins) {
-		stats_arena_bins_print(write_cb, cbopaque, json, large, mutex,
-		    i);
-	}
 	if (large) {
 		stats_arena_lextents_print(write_cb, cbopaque, json, i);
 	}
@@ -1088,7 +1121,8 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 		emitter_col_t col64[mutex_prof_num_uint64_t_counters];
 		emitter_col_t col32[mutex_prof_num_uint32_t_counters];
 
-		mutex_stats_init_row(&row, "", &name, col64, col32);
+		emitter_row_init(&row);
+		mutex_stats_init_cols(&row, "", &name, col64, col32);
 
 		emitter_table_row(emitter, &row);
 		emitter_json_dict_begin(emitter, "mutexes");

From 4c36cd2cc5c6ac7f27354b84606b0ca4d6178791 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 6 Mar 2018 20:25:35 -0800
Subject: [PATCH 1077/2608] Stats printing: Convert arena large stats to use
 emitter.

This completes the conversion; we now have only structured text output.
---
 src/stats.c | 107 ++++++++++++++++++++++++++++------------------------
 1 file changed, 58 insertions(+), 49 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index fac54ad3..e70a567c 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -408,21 +408,47 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i) {
 }
 
 static void
-stats_arena_lextents_print(void (*write_cb)(void *, const char *),
-    void *cbopaque, bool json, unsigned i) {
+stats_arena_lextents_print(emitter_t *emitter, unsigned i) {
 	unsigned nbins, nlextents, j;
 	bool in_gap, in_gap_prev;
 
 	CTL_GET("arenas.nbins", &nbins, unsigned);
 	CTL_GET("arenas.nlextents", &nlextents, unsigned);
-	if (json) {
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t\"lextents\": [\n");
-	} else {
-		malloc_cprintf(write_cb, cbopaque,
-		    "large:          size ind    allocated      nmalloc"
-		    "      ndalloc    nrequests  curlextents\n");
-	}
+
+	emitter_row_t header_row;
+	emitter_row_init(&header_row);
+	emitter_row_t row;
+	emitter_row_init(&row);
+
+#define COL(name, left_or_right, col_width, etype)			\
+	emitter_col_t header_##name;					\
+	emitter_col_init(&header_##name, &header_row);			\
+	header_##name.justify = emitter_justify_##left_or_right;	\
+	header_##name.width = col_width;				\
+	header_##name.type = emitter_type_title;			\
+	header_##name.str_val = #name;					\
+									\
+	emitter_col_t col_##name;					\
+	emitter_col_init(&col_##name, &row);				\
+	col_##name.justify = emitter_justify_##left_or_right;		\
+	col_##name.width = col_width;					\
+	col_##name.type = emitter_type_##etype;
+
+	COL(size, right, 20, size)
+	COL(ind, right, 4, unsigned)
+	COL(allocated, right, 13, size)
+	COL(nmalloc, right, 13, uint64)
+	COL(ndalloc, right, 13, uint64)
+	COL(nrequests, right, 13, uint64)
+	COL(curlextents, right, 13, size)
+#undef COL
+
+	/* As with bins, we label the large extents table. */
+	header_size.width -= 6;
+	emitter_table_printf(emitter, "large:");
+	emitter_table_row(emitter, &header_row);
+	emitter_json_arr_begin(emitter, "lextents");
+
 	for (j = 0, in_gap = false; j < nlextents; j++) {
 		uint64_t nmalloc, ndalloc, nrequests;
 		size_t lextent_size, curlextents;
@@ -436,38 +462,35 @@ stats_arena_lextents_print(void (*write_cb)(void *, const char *),
 		in_gap_prev = in_gap;
 		in_gap = (nrequests == 0);
 
-		if (!json && in_gap_prev && !in_gap) {
-			malloc_cprintf(write_cb, cbopaque,
+		if (in_gap_prev && !in_gap) {
+			emitter_table_printf(emitter,
 			    "                     ---\n");
 		}
 
 		CTL_M2_GET("arenas.lextent.0.size", j, &lextent_size, size_t);
 		CTL_M2_M4_GET("stats.arenas.0.lextents.0.curlextents", i, j,
 		    &curlextents, size_t);
-		if (json) {
-			malloc_cprintf(write_cb, cbopaque,
-			    "\t\t\t\t\t{\n"
-			    "\t\t\t\t\t\t\"curlextents\": %zu\n"
-			    "\t\t\t\t\t}%s\n",
-			    curlextents,
-			    (j + 1 < nlextents) ? "," : "");
-		} else if (!in_gap) {
-			malloc_cprintf(write_cb, cbopaque,
-			    "%20zu %3u %12zu %12"FMTu64" %12"FMTu64
-			    " %12"FMTu64" %12zu\n",
-			    lextent_size, nbins + j,
-			    curlextents * lextent_size, nmalloc, ndalloc,
-			    nrequests, curlextents);
+
+		emitter_json_arr_obj_begin(emitter);
+		emitter_json_kv(emitter, "curlextents", emitter_type_size,
+		    &curlextents);
+		emitter_json_arr_obj_end(emitter);
+
+		col_size.size_val = lextent_size;
+		col_ind.unsigned_val = nbins + j;
+		col_allocated.size_val = curlextents * lextent_size;
+		col_nmalloc.uint64_val = nmalloc;
+		col_ndalloc.uint64_val = ndalloc;
+		col_nrequests.uint64_val = nrequests;
+		col_curlextents.size_val = curlextents;
+
+		if (!in_gap) {
+			emitter_table_row(emitter, &row);
 		}
 	}
-	if (json) {
-		malloc_cprintf(write_cb, cbopaque,
-		    "\t\t\t\t]\n");
-	} else {
-		if (in_gap) {
-			malloc_cprintf(write_cb, cbopaque,
-			    "                     ---\n");
-		}
+	emitter_json_arr_end(emitter); /* Close "lextents". */
+	if (in_gap) {
+		emitter_table_printf(emitter, "                     ---\n");
 	}
 }
 
@@ -513,11 +536,6 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	size_t tcache_bytes;
 	uint64_t uptime;
 
-	/* These should be removed once the emitter conversion is done. */
-	void (*write_cb)(void *, const char *) = emitter->write_cb;
-	void *cbopaque = emitter->cbopaque;
-	bool json = (emitter->output == emitter_output_json);
-
 	CTL_GET("arenas.page", &page, size_t);
 
 	CTL_M2_GET("stats.arenas.0.nthreads", i, &nthreads, unsigned);
@@ -799,17 +817,8 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	if (bins) {
 		stats_arena_bins_print(emitter, mutex, i);
 	}
-
-	/* Emitter conversion point. */
-	if (json) {
-		if (large) {
-			malloc_cprintf(write_cb, cbopaque, ",\n");
-		} else {
-			malloc_cprintf(write_cb, cbopaque, "\n");
-		}
-	}
 	if (large) {
-		stats_arena_lextents_print(write_cb, cbopaque, json, i);
+		stats_arena_lextents_print(emitter, i);
 	}
 }
 

From 742416f64571e7a0b1d75ad116bc9f1794e67c1c Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 15 Mar 2018 11:32:58 -0700
Subject: [PATCH 1078/2608] Revert "CI: Remove "catgets" dependency on
 appveyor."

This reverts commit ae0f5d5c3f29beb9977148dedb58575757139586.
---
 .appveyor.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index ad093c1c..9a7d00a9 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -31,7 +31,6 @@ install:
   - set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH%
   - if defined MSVC call "c:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" %MSVC%
   - if defined MSVC pacman --noconfirm -Rsc mingw-w64-%CPU%-gcc gcc
-  - pacman --noconfirm -Rsc catgets
   - pacman --noconfirm -Suy mingw-w64-%CPU%-make
 
 build_script:

From baffeb1d0ab45e0bcaad7f326d9028372e2cb000 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 14 Mar 2018 18:36:12 -0700
Subject: [PATCH 1079/2608] Fix a typo in stats.

---
 src/stats.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/stats.c b/src/stats.c
index e70a567c..2af5f6cc 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -950,7 +950,7 @@ stats_general_print(emitter_t *emitter) {
 
 		CTL_GET("prof.thread_active_init", &bv, bool);
 		emitter_kv(emitter, "thread_active_init",
-		    "prof.thread_active_emit", emitter_type_bool, &bv);
+		    "prof.thread_active_init", emitter_type_bool, &bv);
 
 		CTL_GET("prof.active", &bv, bool);
 		emitter_kv(emitter, "active", "prof.active", emitter_type_bool,

From 956c4ad6b57318bc7b6cd02bf9bfeb45afc4e3e2 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 14 Mar 2018 20:06:04 -0700
Subject: [PATCH 1080/2608] Change mutable option output in stats to avoid
 stringify issues.

---
 src/stats.c | 73 +++++++++++++++++++++++++++--------------------------
 1 file changed, 37 insertions(+), 36 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index 2af5f6cc..08b9507c 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -868,19 +868,19 @@ stats_general_print(emitter_t *emitter) {
 
 	/* opt. */
 #define OPT_WRITE(name, var, size, emitter_type)			\
-	if (je_mallctl("opt."#name, (void *)&var, &size, NULL, 0) ==	\
+	if (je_mallctl("opt."name, (void *)&var, &size, NULL, 0) ==	\
 	    0) {							\
-		emitter_kv(emitter, #name, "opt."#name, emitter_type,	\
+		emitter_kv(emitter, name, "opt."name, emitter_type,	\
 		    &var);						\
 	}
 
 #define OPT_WRITE_MUTABLE(name, var1, var2, size, emitter_type,		\
     altname)								\
-	if (je_mallctl("opt."#name, (void *)&var1, &size, NULL, 0) ==	\
-	    0 && je_mallctl(#altname, (void *)&var2, &size, NULL, 0)	\
+	if (je_mallctl("opt."name, (void *)&var1, &size, NULL, 0) ==	\
+	    0 && je_mallctl(altname, (void *)&var2, &size, NULL, 0)	\
 	    == 0) {							\
-		emitter_kv_note(emitter, #name, "opt."#name,		\
-		    emitter_type, &var1, #altname, emitter_type,	\
+		emitter_kv_note(emitter, name, "opt."name,		\
+		    emitter_type, &var1, altname, emitter_type,		\
 		    &var2);						\
 	}
 
@@ -902,36 +902,37 @@ stats_general_print(emitter_t *emitter) {
 
 	emitter_dict_begin(emitter, "opt", "Run-time option settings");
 
-	OPT_WRITE_BOOL(abort)
-	OPT_WRITE_BOOL(abort_conf)
-	OPT_WRITE_BOOL(retain)
-	OPT_WRITE_CHAR_P(dss)
-	OPT_WRITE_UNSIGNED(narenas)
-	OPT_WRITE_CHAR_P(percpu_arena)
-	OPT_WRITE_CHAR_P(metadata_thp)
-	OPT_WRITE_BOOL_MUTABLE(background_thread, background_thread)
-	OPT_WRITE_SSIZE_T_MUTABLE(dirty_decay_ms, arenas.dirty_decay_ms)
-	OPT_WRITE_SSIZE_T_MUTABLE(muzzy_decay_ms, arenas.muzzy_decay_ms)
-	OPT_WRITE_UNSIGNED(lg_extent_max_active_fit)
-	OPT_WRITE_CHAR_P(junk)
-	OPT_WRITE_BOOL(zero)
-	OPT_WRITE_BOOL(utrace)
-	OPT_WRITE_BOOL(xmalloc)
-	OPT_WRITE_BOOL(tcache)
-	OPT_WRITE_SSIZE_T(lg_tcache_max)
-	OPT_WRITE_CHAR_P(thp)
-	OPT_WRITE_BOOL(prof)
-	OPT_WRITE_CHAR_P(prof_prefix)
-	OPT_WRITE_BOOL_MUTABLE(prof_active, prof.active)
-	OPT_WRITE_BOOL_MUTABLE(prof_thread_active_init, prof.thread_active_init)
-	OPT_WRITE_SSIZE_T_MUTABLE(lg_prof_sample, prof.lg_sample)
-	OPT_WRITE_BOOL(prof_accum)
-	OPT_WRITE_SSIZE_T(lg_prof_interval)
-	OPT_WRITE_BOOL(prof_gdump)
-	OPT_WRITE_BOOL(prof_final)
-	OPT_WRITE_BOOL(prof_leak)
-	OPT_WRITE_BOOL(stats_print)
-	OPT_WRITE_CHAR_P(stats_print_opts)
+	OPT_WRITE_BOOL("abort")
+	OPT_WRITE_BOOL("abort_conf")
+	OPT_WRITE_BOOL("retain")
+	OPT_WRITE_CHAR_P("dss")
+	OPT_WRITE_UNSIGNED("narenas")
+	OPT_WRITE_CHAR_P("percpu_arena")
+	OPT_WRITE_CHAR_P("metadata_thp")
+	OPT_WRITE_BOOL_MUTABLE("background_thread", "background_thread")
+	OPT_WRITE_SSIZE_T_MUTABLE("dirty_decay_ms", "arenas.dirty_decay_ms")
+	OPT_WRITE_SSIZE_T_MUTABLE("muzzy_decay_ms", "arenas.muzzy_decay_ms")
+	OPT_WRITE_UNSIGNED("lg_extent_max_active_fit")
+	OPT_WRITE_CHAR_P("junk")
+	OPT_WRITE_BOOL("zero")
+	OPT_WRITE_BOOL("utrace")
+	OPT_WRITE_BOOL("xmalloc")
+	OPT_WRITE_BOOL("tcache")
+	OPT_WRITE_SSIZE_T("lg_tcache_max")
+	OPT_WRITE_CHAR_P("thp")
+	OPT_WRITE_BOOL("prof")
+	OPT_WRITE_CHAR_P("prof_prefix")
+	OPT_WRITE_BOOL_MUTABLE("prof_active", "prof.active")
+	OPT_WRITE_BOOL_MUTABLE("prof_thread_active_init",
+	    "prof.thread_active_init")
+	OPT_WRITE_SSIZE_T_MUTABLE("lg_prof_sample", "prof.lg_sample")
+	OPT_WRITE_BOOL("prof_accum")
+	OPT_WRITE_SSIZE_T("lg_prof_interval")
+	OPT_WRITE_BOOL("prof_gdump")
+	OPT_WRITE_BOOL("prof_final")
+	OPT_WRITE_BOOL("prof_leak")
+	OPT_WRITE_BOOL("stats_print")
+	OPT_WRITE_CHAR_P("stats_print_opts")
 
 	emitter_dict_end(emitter);
 

From 21eb0d15a6cfdaee3aa78f724838b503053d7f00 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 30 Mar 2018 15:09:05 -0700
Subject: [PATCH 1081/2608] Fix a background_thread shutdown issue.

1) make sure background thread 0 is always created; and 2) fix synchronization
between thread 0 and the control thread.
---
 src/background_thread.c              | 47 +++++++++++++++-------------
 test/unit/background_thread_enable.c |  3 ++
 2 files changed, 29 insertions(+), 21 deletions(-)

diff --git a/src/background_thread.c b/src/background_thread.c
index a8a5a052..c16f0063 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -380,35 +380,29 @@ background_thread_create_signals_masked(pthread_t *thread,
 	return create_err;
 }
 
-static void
+static bool
 check_background_thread_creation(tsd_t *tsd, unsigned *n_created,
     bool *created_threads) {
+	bool ret = false;
 	if (likely(*n_created == n_background_threads)) {
-		return;
+		return ret;
 	}
 
-	malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_info[0].mtx);
-label_restart:
-	malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock);
+	tsdn_t *tsdn = tsd_tsdn(tsd);
+	malloc_mutex_unlock(tsdn, &background_thread_info[0].mtx);
 	for (unsigned i = 1; i < ncpus; i++) {
 		if (created_threads[i]) {
 			continue;
 		}
 		background_thread_info_t *info = &background_thread_info[i];
-		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+		malloc_mutex_lock(tsdn, &info->mtx);
 		assert(info->state != background_thread_paused);
 		bool create = (info->state == background_thread_started);
-		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
+		malloc_mutex_unlock(tsdn, &info->mtx);
 		if (!create) {
 			continue;
 		}
 
-		/*
-		 * To avoid deadlock with prefork handlers (which waits for the
-		 * mutex held here), unlock before calling pthread_create().
-		 */
-		malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
-
 		pre_reentrancy(tsd, NULL);
 		int err = background_thread_create_signals_masked(&info->thread,
 		    NULL, background_thread_entry, (void *)(uintptr_t)i);
@@ -424,11 +418,13 @@ label_restart:
 				abort();
 			}
 		}
-		/* Restart since we unlocked. */
-		goto label_restart;
+		/* Return to restart the loop since we unlocked. */
+		ret = true;
+		break;
 	}
-	malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_info[0].mtx);
-	malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
+	malloc_mutex_lock(tsdn, &background_thread_info[0].mtx);
+
+	return ret;
 }
 
 static void
@@ -446,8 +442,10 @@ background_thread0_work(tsd_t *tsd) {
 		    &background_thread_info[0])) {
 			continue;
 		}
-		check_background_thread_creation(tsd, &n_created,
-		    (bool *)&created_threads);
+		if (check_background_thread_creation(tsd, &n_created,
+		    (bool *)&created_threads)) {
+			continue;
+		}
 		background_work_sleep_once(tsd_tsdn(tsd),
 		    &background_thread_info[0], 0);
 	}
@@ -464,8 +462,13 @@ background_thread0_work(tsd_t *tsd) {
 			background_threads_disable_single(tsd, info);
 		} else {
 			malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
-			/* Clear in case the thread wasn't created. */
-			info->state = background_thread_stopped;
+			if (info->state != background_thread_stopped) {
+				/* The thread was not created. */
+				assert(info->state ==
+				    background_thread_started);
+				n_background_threads--;
+				info->state = background_thread_stopped;
+			}
 			malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
 		}
 	}
@@ -593,6 +596,8 @@ background_threads_enable(tsd_t *tsd) {
 		marked[i] = false;
 	}
 	nmarked = 0;
+	/* Thread 0 is required and created at the end. */
+	marked[0] = true;
 	/* Mark the threads we need to create for thread 0. */
 	unsigned n = narenas_total_get();
 	for (i = 1; i < n; i++) {
diff --git a/test/unit/background_thread_enable.c b/test/unit/background_thread_enable.c
index 9bb58652..7e4f6ed7 100644
--- a/test/unit/background_thread_enable.c
+++ b/test/unit/background_thread_enable.c
@@ -24,6 +24,9 @@ TEST_BEGIN(test_deferred) {
 	size_t sz_b = sizeof(bool);
 	assert_d_eq(mallctl("background_thread", NULL, NULL, &enable, sz_b), 0,
 	    "Failed to enable background threads");
+	enable = false;
+	assert_d_eq(mallctl("background_thread", NULL, NULL, &enable, sz_b), 0,
+	    "Failed to disable background threads");
 }
 TEST_END
 

From 6d02421730e2f2dc6985da699b8e10b3ed4061b6 Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Thu, 22 Feb 2018 09:40:27 -0800
Subject: [PATCH 1082/2608] extents: Remove preserve_lru feature.

preserve_lru feature adds lots of complication, for little value.
Removing it means merged extents are re-added to the lru list, and may
take longer to madvise away than they otherwise would.

Canaries after removal seem flat for several services (no change).
---
 src/extent.c | 64 +++++++++++++++-------------------------------------
 1 file changed, 18 insertions(+), 46 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index 88d331f7..f11e77d8 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -306,8 +306,7 @@ extents_npages_get(extents_t *extents) {
 }
 
 static void
-extents_insert_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent,
-    bool preserve_lru) {
+extents_insert_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent) {
 	malloc_mutex_assert_owner(tsdn, &extents->mtx);
 	assert(extent_state_get(extent) == extents->state);
 
@@ -319,9 +318,7 @@ extents_insert_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent,
 		    (size_t)pind);
 	}
 	extent_heap_insert(&extents->heaps[pind], extent);
-	if (!preserve_lru) {
-		extent_list_append(&extents->lru, extent);
-	}
+	extent_list_append(&extents->lru, extent);
 	size_t npages = size >> LG_PAGE;
 	/*
 	 * All modifications to npages hold the mutex (as asserted above), so we
@@ -335,8 +332,7 @@ extents_insert_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent,
 }
 
 static void
-extents_remove_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent,
-    bool preserve_lru) {
+extents_remove_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent) {
 	malloc_mutex_assert_owner(tsdn, &extents->mtx);
 	assert(extent_state_get(extent) == extents->state);
 
@@ -348,9 +344,7 @@ extents_remove_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent,
 		bitmap_set(extents->bitmap, &extents_bitmap_info,
 		    (size_t)pind);
 	}
-	if (!preserve_lru) {
-		extent_list_remove(&extents->lru, extent);
-	}
+	extent_list_remove(&extents->lru, extent);
 	size_t npages = size >> LG_PAGE;
 	/*
 	 * As in extents_insert_locked, we hold extents->mtx and so don't need
@@ -500,7 +494,7 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena,
 	if (!coalesced) {
 		return true;
 	}
-	extents_insert_locked(tsdn, extents, extent, true);
+	extents_insert_locked(tsdn, extents, extent);
 	return false;
 }
 
@@ -560,7 +554,7 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 			extent = NULL;
 			goto label_return;
 		}
-		extents_remove_locked(tsdn, extents, extent, false);
+		extents_remove_locked(tsdn, extents, extent);
 		if (!extents->delay_coalesce) {
 			break;
 		}
@@ -633,29 +627,29 @@ extents_postfork_child(tsdn_t *tsdn, extents_t *extents) {
 
 static void
 extent_deactivate_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
-    extent_t *extent, bool preserve_lru) {
+    extent_t *extent) {
 	assert(extent_arena_get(extent) == arena);
 	assert(extent_state_get(extent) == extent_state_active);
 
 	extent_state_set(extent, extents_state_get(extents));
-	extents_insert_locked(tsdn, extents, extent, preserve_lru);
+	extents_insert_locked(tsdn, extents, extent);
 }
 
 static void
 extent_deactivate(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
-    extent_t *extent, bool preserve_lru) {
+    extent_t *extent) {
 	malloc_mutex_lock(tsdn, &extents->mtx);
-	extent_deactivate_locked(tsdn, arena, extents, extent, preserve_lru);
+	extent_deactivate_locked(tsdn, arena, extents, extent);
 	malloc_mutex_unlock(tsdn, &extents->mtx);
 }
 
 static void
 extent_activate_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
-    extent_t *extent, bool preserve_lru) {
+    extent_t *extent) {
 	assert(extent_arena_get(extent) == arena);
 	assert(extent_state_get(extent) == extents_state_get(extents));
 
-	extents_remove_locked(tsdn, extents, extent, preserve_lru);
+	extents_remove_locked(tsdn, extents, extent);
 	extent_state_set(extent, extent_state_active);
 }
 
@@ -905,7 +899,7 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
 		return NULL;
 	}
 
-	extent_activate_locked(tsdn, arena, extents, extent, false);
+	extent_activate_locked(tsdn, arena, extents, extent);
 	malloc_mutex_unlock(tsdn, &extents->mtx);
 
 	return extent;
@@ -1031,10 +1025,10 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
 
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
-			extent_deactivate(tsdn, arena, extents, lead, false);
+			extent_deactivate(tsdn, arena, extents, lead);
 		}
 		if (trail != NULL) {
-			extent_deactivate(tsdn, arena, extents, trail, false);
+			extent_deactivate(tsdn, arena, extents, trail);
 		}
 		return extent;
 	} else {
@@ -1517,34 +1511,15 @@ extent_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
     bool growing_retained) {
 	assert(extent_can_coalesce(arena, extents, inner, outer));
 
-	if (extents->delay_coalesce) {
-		/*
-		 * Remove outer from the LRU list so that it won't be show up in
-		 * decay through extents_evict.
-		 */
-		extent_list_remove(&extents->lru, outer);
-	}
-	extent_activate_locked(tsdn, arena, extents, outer,
-	    extents->delay_coalesce);
+	extent_activate_locked(tsdn, arena, extents, outer);
 
 	malloc_mutex_unlock(tsdn, &extents->mtx);
 	bool err = extent_merge_impl(tsdn, arena, r_extent_hooks,
 	    forward ? inner : outer, forward ? outer : inner, growing_retained);
 	malloc_mutex_lock(tsdn, &extents->mtx);
 
-	if (!err && extents->delay_coalesce) {
-		if (forward) {
-			extent_list_prepend(&extents->lru, inner);
-		} else {
-			extent_list_prepend(&extents->lru, outer);
-		}
-	}
 	if (err) {
-		if (extents->delay_coalesce) {
-			extent_list_prepend(&extents->lru, outer);
-		}
-		extent_deactivate_locked(tsdn, arena, extents, outer,
-		    extents->delay_coalesce);
+		extent_deactivate_locked(tsdn, arena, extents, outer);
 	}
 
 	return err;
@@ -1655,13 +1630,10 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 			extent = extent_try_coalesce(tsdn, arena,
 			    r_extent_hooks, rtree_ctx, extents, extent,
 			    &coalesced, growing_retained);
-			if (coalesced) {
-				extent_list_remove(&extents->lru, extent);
-			}
 		} while (coalesced &&
 		    extent_size_get(extent) >= prev_size + LARGE_MINCLASS);
 	}
-	extent_deactivate_locked(tsdn, arena, extents, extent, false);
+	extent_deactivate_locked(tsdn, arena, extents, extent);
 
 	malloc_mutex_unlock(tsdn, &extents->mtx);
 }

From 2dccf4564016233bd4ef7772b43ec8423b8c44df Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 6 Apr 2018 13:45:37 -0700
Subject: [PATCH 1083/2608] Control idump and gdump with prof_active.

---
 include/jemalloc/internal/arena_inlines_a.h |  2 +-
 include/jemalloc/internal/prof_inlines_a.h  | 11 +++++++++++
 include/jemalloc/internal/prof_inlines_b.h  | 11 -----------
 src/prof.c                                  |  4 ++--
 4 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_a.h b/include/jemalloc/internal/arena_inlines_a.h
index da587706..9abf7f6a 100644
--- a/include/jemalloc/internal/arena_inlines_a.h
+++ b/include/jemalloc/internal/arena_inlines_a.h
@@ -25,7 +25,7 @@ static inline bool
 arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) {
 	cassert(config_prof);
 
-	if (likely(prof_interval == 0)) {
+	if (likely(prof_interval == 0 || !prof_active_get_unlocked())) {
 		return false;
 	}
 
diff --git a/include/jemalloc/internal/prof_inlines_a.h b/include/jemalloc/internal/prof_inlines_a.h
index eda6839a..a6efb485 100644
--- a/include/jemalloc/internal/prof_inlines_a.h
+++ b/include/jemalloc/internal/prof_inlines_a.h
@@ -69,4 +69,15 @@ prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum, size_t usize) {
 #endif
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+prof_active_get_unlocked(void) {
+	/*
+	 * Even if opt_prof is true, sampling can be temporarily disabled by
+	 * setting prof_active to false.  No locking is used when reading
+	 * prof_active in the fast path, so there are no guarantees regarding
+	 * how long it will take for all threads to notice state changes.
+	 */
+	return prof_active;
+}
+
 #endif /* JEMALLOC_INTERNAL_PROF_INLINES_A_H */
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index d670cb7b..6ff465ad 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -3,17 +3,6 @@
 
 #include "jemalloc/internal/sz.h"
 
-JEMALLOC_ALWAYS_INLINE bool
-prof_active_get_unlocked(void) {
-	/*
-	 * Even if opt_prof is true, sampling can be temporarily disabled by
-	 * setting prof_active to false.  No locking is used when reading
-	 * prof_active in the fast path, so there are no guarantees regarding
-	 * how long it will take for all threads to notice state changes.
-	 */
-	return prof_active;
-}
-
 JEMALLOC_ALWAYS_INLINE bool
 prof_gdump_get_unlocked(void) {
 	/*
diff --git a/src/prof.c b/src/prof.c
index 32760e68..293684ca 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -1780,7 +1780,7 @@ prof_idump(tsdn_t *tsdn) {
 
 	cassert(config_prof);
 
-	if (!prof_booted || tsdn_null(tsdn)) {
+	if (!prof_booted || tsdn_null(tsdn) || !prof_active_get_unlocked()) {
 		return;
 	}
 	tsd = tsdn_tsd(tsdn);
@@ -1837,7 +1837,7 @@ prof_gdump(tsdn_t *tsdn) {
 
 	cassert(config_prof);
 
-	if (!prof_booted || tsdn_null(tsdn)) {
+	if (!prof_booted || tsdn_null(tsdn) || !prof_active_get_unlocked()) {
 		return;
 	}
 	tsd = tsdn_tsd(tsdn);

From 4df483f0fd76a64e116b1c4f316f8b941078114d Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 6 Apr 2018 13:18:21 -0700
Subject: [PATCH 1084/2608] Fix arguments passed to extent_init.

---
 src/extent.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/extent.c b/src/extent.c
index f11e77d8..49b6d4b8 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1442,7 +1442,7 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 		return NULL;
 	}
 	extent_init(extent, arena, addr, esize, slab, szind,
-	    arena_extent_sn_next(arena), extent_state_active, zero, commit,
+	    arena_extent_sn_next(arena), extent_state_active, *zero, *commit,
 	    true);
 	if (pad != 0) {
 		extent_addr_randomize(tsdn, extent, alignment);

From d3e0976a2c1591b9fe433e7a383d8825683995f0 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 6 Apr 2018 11:40:44 -0700
Subject: [PATCH 1085/2608] Fix type warning on Windows.

Add cast since read / write has unsigned return type on windows.
---
 include/jemalloc/internal/malloc_io.h | 36 +++++++++++++++++++++++++++
 src/malloc_io.c                       | 15 +----------
 src/pages.c                           | 15 ++---------
 src/prof.c                            |  7 +++---
 test/unit/stats_print.c               |  2 +-
 5 files changed, 44 insertions(+), 31 deletions(-)

diff --git a/include/jemalloc/internal/malloc_io.h b/include/jemalloc/internal/malloc_io.h
index 4992d1d8..bfe556b5 100644
--- a/include/jemalloc/internal/malloc_io.h
+++ b/include/jemalloc/internal/malloc_io.h
@@ -63,4 +63,40 @@ void malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque,
     const char *format, ...) JEMALLOC_FORMAT_PRINTF(3, 4);
 void malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
 
+static inline ssize_t
+malloc_write_fd(int fd, const void *buf, size_t count) {
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_write)
+	/*
+	 * Use syscall(2) rather than write(2) when possible in order to avoid
+	 * the possibility of memory allocation within libc.  This is necessary
+	 * on FreeBSD; most operating systems do not have this problem though.
+	 *
+	 * syscall() returns long or int, depending on platform, so capture the
+	 * result in the widest plausible type to avoid compiler warnings.
+	 */
+	long result = syscall(SYS_write, fd, buf, count);
+#else
+	ssize_t result = (ssize_t)write(fd, buf,
+#ifdef _WIN32
+	    (unsigned int)
+#endif
+	    count);
+#endif
+	return (ssize_t)result;
+}
+
+static inline ssize_t
+malloc_read_fd(int fd, void *buf, size_t count) {
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read)
+	long result = syscall(SYS_read, fd, buf, count);
+#else
+	ssize_t result = read(fd, buf,
+#ifdef _WIN32
+	    (unsigned int)
+#endif
+	    count);
+#endif
+	return (ssize_t)result;
+}
+
 #endif /* JEMALLOC_INTERNAL_MALLOC_IO_H */
diff --git a/src/malloc_io.c b/src/malloc_io.c
index fd27bd1c..7bdc13f9 100644
--- a/src/malloc_io.c
+++ b/src/malloc_io.c
@@ -70,20 +70,7 @@ static char *x2s(uintmax_t x, bool alt_form, bool uppercase, char *s,
 /* malloc_message() setup. */
 static void
 wrtmessage(void *cbopaque, const char *s) {
-#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_write)
-	/*
-	 * Use syscall(2) rather than write(2) when possible in order to avoid
-	 * the possibility of memory allocation within libc.  This is necessary
-	 * on FreeBSD; most operating systems do not have this problem though.
-	 *
-	 * syscall() returns long or int, depending on platform, so capture the
-	 * unused result in the widest plausible type to avoid compiler
-	 * warnings.
-	 */
-	UNUSED long result = syscall(SYS_write, STDERR_FILENO, s, strlen(s));
-#else
-	UNUSED ssize_t result = write(STDERR_FILENO, s, strlen(s));
-#endif
+	malloc_write_fd(STDERR_FILENO, s, strlen(s));
 }
 
 JEMALLOC_EXPORT void	(*je_malloc_message)(void *, const char *s);
diff --git a/src/pages.c b/src/pages.c
index 82405219..26002692 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -436,7 +436,6 @@ static bool
 os_overcommits_proc(void) {
 	int fd;
 	char buf[1];
-	ssize_t nread;
 
 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
 	#if defined(O_CLOEXEC)
@@ -474,12 +473,7 @@ os_overcommits_proc(void) {
 		return false; /* Error. */
 	}
 
-#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read)
-	nread = (ssize_t)syscall(SYS_read, fd, &buf, sizeof(buf));
-#else
-	nread = read(fd, &buf, sizeof(buf));
-#endif
-
+	ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf));
 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
 	syscall(SYS_close, fd);
 #else
@@ -543,12 +537,7 @@ init_thp_state(void) {
 		goto label_error;
 	}
 
-#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read)
-	ssize_t nread = (ssize_t)syscall(SYS_read, fd, &buf, sizeof(buf));
-#else
-	ssize_t nread = read(fd, &buf, sizeof(buf));
-#endif
-
+	ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf));
 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
 	syscall(SYS_close, fd);
 #else
diff --git a/src/prof.c b/src/prof.c
index 293684ca..13df641a 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -978,7 +978,7 @@ prof_dump_flush(bool propagate_err) {
 
 	cassert(config_prof);
 
-	err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
+	err = malloc_write_fd(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
 	if (err == -1) {
 		if (!propagate_err) {
 			malloc_write("<jemalloc>: write() failed during heap "
@@ -1471,8 +1471,9 @@ prof_dump_maps(bool propagate_err) {
 					goto label_return;
 				}
 			}
-			nread = read(mfd, &prof_dump_buf[prof_dump_buf_end],
-			    PROF_DUMP_BUFSIZE - prof_dump_buf_end);
+			nread = malloc_read_fd(mfd,
+			    &prof_dump_buf[prof_dump_buf_end], PROF_DUMP_BUFSIZE
+			    - prof_dump_buf_end);
 		} while (nread > 0);
 	} else {
 		ret = true;
diff --git a/test/unit/stats_print.c b/test/unit/stats_print.c
index acb26b06..014d002f 100644
--- a/test/unit/stats_print.c
+++ b/test/unit/stats_print.c
@@ -67,7 +67,7 @@ token_error(token_t *token) {
 		    token->col);
 		break;
 	}
-	UNUSED ssize_t err = write(STDERR_FILENO,
+	UNUSED ssize_t err = malloc_write_fd(STDERR_FILENO,
 	    &token->parser->buf[token->pos], token->len);
 	malloc_printf("\n");
 }

From cf2f4aac1ca8c7d48a61a3921335fb411a3943a4 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 6 Apr 2018 11:50:17 -0700
Subject: [PATCH 1086/2608] Fix const qualifier warnings.

---
 test/integration/mallocx.c | 2 +-
 test/unit/mallctl.c        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index 35d72093..fd960f30 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -154,7 +154,7 @@ TEST_BEGIN(test_alignment_and_size) {
 	const char *percpu_arena;
 	size_t sz = sizeof(percpu_arena);
 
-	if(mallctl("opt.percpu_arena", &percpu_arena, &sz, NULL, 0) ||
+	if(mallctl("opt.percpu_arena", (void *)&percpu_arena, &sz, NULL, 0) ||
 	    strcmp(percpu_arena, "disabled") != 0) {
 		test_skip("test_alignment_and_size skipped: "
 		    "not working with percpu arena.");
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index c9ba6c5d..f8b180b1 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -334,7 +334,7 @@ TEST_BEGIN(test_thread_arena) {
 
 	const char *opa;
 	size_t sz = sizeof(opa);
-	assert_d_eq(mallctl("opt.percpu_arena", &opa, &sz, NULL, 0), 0,
+	assert_d_eq(mallctl("opt.percpu_arena", (void *)&opa, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
 	sz = sizeof(unsigned);

From 5f51882a0a7d529c90bbb15ccbabb064b0a11e80 Mon Sep 17 00:00:00 2001
From: Rajeev Misra <rajeev.misra@gmail.com>
Date: Mon, 9 Apr 2018 19:02:40 -0700
Subject: [PATCH 1087/2608] Stack address should not be used for ordering
 mutexes

---
 src/mutex.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mutex.c b/src/mutex.c
index 3de7f44a..30222b3e 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -174,7 +174,7 @@ malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
 		mutex->lock_order = lock_order;
 		if (lock_order == malloc_mutex_address_ordered) {
 			witness_init(&mutex->witness, name, rank,
-			    mutex_addr_comp, &mutex);
+			    mutex_addr_comp, mutex);
 		} else {
 			witness_init(&mutex->witness, name, rank, NULL, NULL);
 		}

From 4be74d51121e8772d356e8be088dc93f927fd709 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 5 Apr 2018 14:37:17 -0700
Subject: [PATCH 1088/2608] Consolidate the two memory loads in
 rtree_szind_slab_read().

szind and slab bits are read on fast path, where compiler generated two memory
loads separately for them before this diff.  Manually operate on the bits to
avoid the extra memory load.
---
 include/jemalloc/internal/rtree.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 4563db23..8ff20d72 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -460,8 +460,14 @@ rtree_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	if (!dependent && elm == NULL) {
 		return true;
 	}
+#ifdef RTREE_LEAF_COMPACT
+	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
+	*r_szind = rtree_leaf_elm_bits_szind_get(bits);
+	*r_slab = rtree_leaf_elm_bits_slab_get(bits);
+#else
 	*r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent);
 	*r_slab = rtree_leaf_elm_slab_read(tsdn, rtree, elm, dependent);
+#endif
 	return false;
 }
 

From 8b14f3abc05f01419f9321a6a65ab9dd68dcebac Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Thu, 29 Mar 2018 12:58:13 -0700
Subject: [PATCH 1089/2608] background_thread: add max thread count config

Looking at the thread counts in our services, jemalloc's background thread
is useful, but mostly idle.  Add a config option to tune down the number of threads.
---
 doc/jemalloc.xml.in                           | 23 ++++++
 .../internal/background_thread_externs.h      |  2 +
 .../internal/background_thread_structs.h      |  1 +
 src/background_thread.c                       | 49 +++++++------
 src/ctl.c                                     | 70 +++++++++++++++++++
 src/jemalloc.c                                |  4 ++
 test/unit/background_thread_enable.c          | 50 ++++++++++++-
 7 files changed, 177 insertions(+), 22 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 9ecd8a1f..2e7edc33 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -761,6 +761,18 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         selected pthread-based platforms.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="max_background_threads">
+        <term>
+          <mallctl>max_background_threads</mallctl>
+          (<type>size_t</type>)
+          <literal>rw</literal>
+        </term>
+        <listitem><para>Maximum number of background worker threads that will
+        be created.  This value is capped at <link
+        linkend="opt.max_background_threads"><mallctl>opt.max_background_threads</mallctl></link> at
+        startup.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="config.cache_oblivious">
         <term>
           <mallctl>config.cache_oblivious</mallctl>
@@ -1009,6 +1021,17 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         default.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.max_background_threads">
+        <term>
+          <mallctl>opt.max_background_threads</mallctl>
+          (<type>const size_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Maximum number of background threads that will be created
+        if <link linkend="background_thread">background_thread</link> is set.
+        Defaults to number of cpus.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="opt.dirty_decay_ms">
         <term>
           <mallctl>opt.dirty_decay_ms</mallctl>
diff --git a/include/jemalloc/internal/background_thread_externs.h b/include/jemalloc/internal/background_thread_externs.h
index 8b4b8471..3209aa49 100644
--- a/include/jemalloc/internal/background_thread_externs.h
+++ b/include/jemalloc/internal/background_thread_externs.h
@@ -2,9 +2,11 @@
 #define JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H
 
 extern bool opt_background_thread;
+extern size_t opt_max_background_threads;
 extern malloc_mutex_t background_thread_lock;
 extern atomic_b_t background_thread_enabled_state;
 extern size_t n_background_threads;
+extern size_t max_background_threads;
 extern background_thread_info_t *background_thread_info;
 extern bool can_enable_background_thread;
 
diff --git a/include/jemalloc/internal/background_thread_structs.h b/include/jemalloc/internal/background_thread_structs.h
index e69a7d02..c1107dfe 100644
--- a/include/jemalloc/internal/background_thread_structs.h
+++ b/include/jemalloc/internal/background_thread_structs.h
@@ -8,6 +8,7 @@
 #endif
 
 #define BACKGROUND_THREAD_INDEFINITE_SLEEP UINT64_MAX
+#define MAX_BACKGROUND_THREAD_LIMIT MALLOCX_ARENA_LIMIT
 
 typedef enum {
 	background_thread_stopped,
diff --git a/src/background_thread.c b/src/background_thread.c
index c16f0063..d2aa2745 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -11,12 +11,14 @@
 #define BACKGROUND_THREAD_DEFAULT false
 /* Read-only after initialization. */
 bool opt_background_thread = BACKGROUND_THREAD_DEFAULT;
+size_t opt_max_background_threads = MAX_BACKGROUND_THREAD_LIMIT;
 
 /* Used for thread creation, termination and stats. */
 malloc_mutex_t background_thread_lock;
 /* Indicates global state.  Atomic because decay reads this w/o locking. */
 atomic_b_t background_thread_enabled_state;
 size_t n_background_threads;
+size_t max_background_threads;
 /* Thread info per-index. */
 background_thread_info_t *background_thread_info;
 
@@ -287,7 +289,7 @@ background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info, unsigne
 	uint64_t min_interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
 	unsigned narenas = narenas_total_get();
 
-	for (unsigned i = ind; i < narenas; i += ncpus) {
+	for (unsigned i = ind; i < narenas; i += max_background_threads) {
 		arena_t *arena = arena_get(tsdn, i, false);
 		if (!arena) {
 			continue;
@@ -390,7 +392,7 @@ check_background_thread_creation(tsd_t *tsd, unsigned *n_created,
 
 	tsdn_t *tsdn = tsd_tsdn(tsd);
 	malloc_mutex_unlock(tsdn, &background_thread_info[0].mtx);
-	for (unsigned i = 1; i < ncpus; i++) {
+	for (unsigned i = 1; i < max_background_threads; i++) {
 		if (created_threads[i]) {
 			continue;
 		}
@@ -430,9 +432,9 @@ check_background_thread_creation(tsd_t *tsd, unsigned *n_created,
 static void
 background_thread0_work(tsd_t *tsd) {
 	/* Thread0 is also responsible for launching / terminating threads. */
-	VARIABLE_ARRAY(bool, created_threads, ncpus);
+	VARIABLE_ARRAY(bool, created_threads, max_background_threads);
 	unsigned i;
-	for (i = 1; i < ncpus; i++) {
+	for (i = 1; i < max_background_threads; i++) {
 		created_threads[i] = false;
 	}
 	/* Start working, and create more threads when asked. */
@@ -455,7 +457,7 @@ background_thread0_work(tsd_t *tsd) {
 	 * the global background_thread mutex (and is waiting) for us.
 	 */
 	assert(!background_thread_enabled());
-	for (i = 1; i < ncpus; i++) {
+	for (i = 1; i < max_background_threads; i++) {
 		background_thread_info_t *info = &background_thread_info[i];
 		assert(info->state != background_thread_paused);
 		if (created_threads[i]) {
@@ -502,7 +504,7 @@ background_work(tsd_t *tsd, unsigned ind) {
 static void *
 background_thread_entry(void *ind_arg) {
 	unsigned thread_ind = (unsigned)(uintptr_t)ind_arg;
-	assert(thread_ind < ncpus);
+	assert(thread_ind < max_background_threads);
 #ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
 	pthread_setname_np(pthread_self(), "jemalloc_bg_thd");
 #endif
@@ -536,7 +538,7 @@ background_thread_create(tsd_t *tsd, unsigned arena_ind) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
 
 	/* We create at most NCPUs threads. */
-	size_t thread_ind = arena_ind % ncpus;
+	size_t thread_ind = arena_ind % max_background_threads;
 	background_thread_info_t *info = &background_thread_info[thread_ind];
 
 	bool need_new_thread;
@@ -590,9 +592,9 @@ background_threads_enable(tsd_t *tsd) {
 	assert(background_thread_enabled());
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
 
-	VARIABLE_ARRAY(bool, marked, ncpus);
+	VARIABLE_ARRAY(bool, marked, max_background_threads);
 	unsigned i, nmarked;
-	for (i = 0; i < ncpus; i++) {
+	for (i = 0; i < max_background_threads; i++) {
 		marked[i] = false;
 	}
 	nmarked = 0;
@@ -601,18 +603,18 @@ background_threads_enable(tsd_t *tsd) {
 	/* Mark the threads we need to create for thread 0. */
 	unsigned n = narenas_total_get();
 	for (i = 1; i < n; i++) {
-		if (marked[i % ncpus] ||
+		if (marked[i % max_background_threads] ||
 		    arena_get(tsd_tsdn(tsd), i, false) == NULL) {
 			continue;
 		}
 		background_thread_info_t *info = &background_thread_info[
-		    i % ncpus];
+		    i % max_background_threads];
 		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
 		assert(info->state == background_thread_stopped);
 		background_thread_init(tsd, info);
 		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
-		marked[i % ncpus] = true;
-		if (++nmarked == ncpus) {
+		marked[i % max_background_threads] = true;
+		if (++nmarked == max_background_threads) {
 			break;
 		}
 	}
@@ -727,14 +729,14 @@ background_thread_prefork0(tsdn_t *tsdn) {
 
 void
 background_thread_prefork1(tsdn_t *tsdn) {
-	for (unsigned i = 0; i < ncpus; i++) {
+	for (unsigned i = 0; i < max_background_threads; i++) {
 		malloc_mutex_prefork(tsdn, &background_thread_info[i].mtx);
 	}
 }
 
 void
 background_thread_postfork_parent(tsdn_t *tsdn) {
-	for (unsigned i = 0; i < ncpus; i++) {
+	for (unsigned i = 0; i < max_background_threads; i++) {
 		malloc_mutex_postfork_parent(tsdn,
 		    &background_thread_info[i].mtx);
 	}
@@ -743,7 +745,7 @@ background_thread_postfork_parent(tsdn_t *tsdn) {
 
 void
 background_thread_postfork_child(tsdn_t *tsdn) {
-	for (unsigned i = 0; i < ncpus; i++) {
+	for (unsigned i = 0; i < max_background_threads; i++) {
 		malloc_mutex_postfork_child(tsdn,
 		    &background_thread_info[i].mtx);
 	}
@@ -756,7 +758,7 @@ background_thread_postfork_child(tsdn_t *tsdn) {
 	malloc_mutex_lock(tsdn, &background_thread_lock);
 	n_background_threads = 0;
 	background_thread_enabled_set(tsdn, false);
-	for (unsigned i = 0; i < ncpus; i++) {
+	for (unsigned i = 0; i < max_background_threads; i++) {
 		background_thread_info_t *info = &background_thread_info[i];
 		malloc_mutex_lock(tsdn, &info->mtx);
 		info->state = background_thread_stopped;
@@ -780,7 +782,7 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
 	stats->num_threads = n_background_threads;
 	uint64_t num_runs = 0;
 	nstime_init(&stats->run_interval, 0);
-	for (unsigned i = 0; i < ncpus; i++) {
+	for (unsigned i = 0; i < max_background_threads; i++) {
 		background_thread_info_t *info = &background_thread_info[i];
 		malloc_mutex_lock(tsdn, &info->mtx);
 		if (info->state != background_thread_stopped) {
@@ -848,6 +850,12 @@ background_thread_boot1(tsdn_t *tsdn) {
 	assert(have_background_thread);
 	assert(narenas_total_get() > 0);
 
+	if (opt_max_background_threads == MAX_BACKGROUND_THREAD_LIMIT &&
+	    ncpus < MAX_BACKGROUND_THREAD_LIMIT) {
+		opt_max_background_threads = ncpus;
+	}
+	max_background_threads = opt_max_background_threads;
+
 	background_thread_enabled_set(tsdn, opt_background_thread);
 	if (malloc_mutex_init(&background_thread_lock,
 	    "background_thread_global",
@@ -857,12 +865,13 @@ background_thread_boot1(tsdn_t *tsdn) {
 	}
 
 	background_thread_info = (background_thread_info_t *)base_alloc(tsdn,
-	    b0get(), ncpus * sizeof(background_thread_info_t), CACHELINE);
+	    b0get(), opt_max_background_threads *
+	    sizeof(background_thread_info_t), CACHELINE);
 	if (background_thread_info == NULL) {
 		return true;
 	}
 
-	for (unsigned i = 0; i < ncpus; i++) {
+	for (unsigned i = 0; i < max_background_threads; i++) {
 		background_thread_info_t *info = &background_thread_info[i];
 		/* Thread mutex is rank_inclusive because of thread0. */
 		if (malloc_mutex_init(&info->mtx, "background_thread",
diff --git a/src/ctl.c b/src/ctl.c
index aaf6e35a..02610cf0 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -57,6 +57,7 @@ static const ctl_named_node_t	*n##_index(tsdn_t *tsdn,		\
 CTL_PROTO(version)
 CTL_PROTO(epoch)
 CTL_PROTO(background_thread)
+CTL_PROTO(max_background_threads)
 CTL_PROTO(thread_tcache_enabled)
 CTL_PROTO(thread_tcache_flush)
 CTL_PROTO(thread_prof_name)
@@ -85,6 +86,7 @@ CTL_PROTO(opt_dss)
 CTL_PROTO(opt_narenas)
 CTL_PROTO(opt_percpu_arena)
 CTL_PROTO(opt_background_thread)
+CTL_PROTO(opt_max_background_threads)
 CTL_PROTO(opt_dirty_decay_ms)
 CTL_PROTO(opt_muzzy_decay_ms)
 CTL_PROTO(opt_stats_print)
@@ -284,6 +286,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("narenas"),	CTL(opt_narenas)},
 	{NAME("percpu_arena"),	CTL(opt_percpu_arena)},
 	{NAME("background_thread"),	CTL(opt_background_thread)},
+	{NAME("max_background_threads"),	CTL(opt_max_background_threads)},
 	{NAME("dirty_decay_ms"), CTL(opt_dirty_decay_ms)},
 	{NAME("muzzy_decay_ms"), CTL(opt_muzzy_decay_ms)},
 	{NAME("stats_print"),	CTL(opt_stats_print)},
@@ -535,6 +538,7 @@ static const ctl_named_node_t	root_node[] = {
 	{NAME("version"),	CTL(version)},
 	{NAME("epoch"),		CTL(epoch)},
 	{NAME("background_thread"),	CTL(background_thread)},
+	{NAME("max_background_threads"),	CTL(max_background_threads)},
 	{NAME("thread"),	CHILD(named, thread)},
 	{NAME("config"),	CHILD(named, config)},
 	{NAME("opt"),		CHILD(named, opt)},
@@ -1564,6 +1568,71 @@ label_return:
 	return ret;
 }
 
+static int
+max_background_threads_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+	size_t oldval;
+
+	if (!have_background_thread) {
+		return ENOENT;
+	}
+	background_thread_ctl_init(tsd_tsdn(tsd));
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock);
+	if (newp == NULL) {
+		oldval = max_background_threads;
+		READ(oldval, size_t);
+	} else {
+		if (newlen != sizeof(size_t)) {
+			ret = EINVAL;
+			goto label_return;
+		}
+		oldval = max_background_threads;
+		READ(oldval, size_t);
+
+		size_t newval = *(size_t *)newp;
+		if (newval == oldval) {
+			ret = 0;
+			goto label_return;
+		}
+		if (newval > opt_max_background_threads) {
+			ret = EINVAL;
+			goto label_return;
+		}
+
+		if (background_thread_enabled()) {
+			if (!can_enable_background_thread) {
+				malloc_printf("<jemalloc>: Error in dlsym("
+			            "RTLD_NEXT, \"pthread_create\"). Cannot "
+				    "enable background_thread\n");
+				ret = EFAULT;
+				goto label_return;
+			}
+			background_thread_enabled_set(tsd_tsdn(tsd), false);
+			if (background_threads_disable(tsd)) {
+				ret = EFAULT;
+				goto label_return;
+			}
+			max_background_threads = newval;
+			background_thread_enabled_set(tsd_tsdn(tsd), true);
+			if (background_threads_enable(tsd)) {
+				ret = EFAULT;
+				goto label_return;
+			}
+		} else {
+			max_background_threads = newval;
+		}
+	}
+	ret = 0;
+label_return:
+	malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
+
+	return ret;
+}
+
 /******************************************************************************/
 
 CTL_RO_CONFIG_GEN(config_cache_oblivious, bool)
@@ -1590,6 +1659,7 @@ CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned)
 CTL_RO_NL_GEN(opt_percpu_arena, percpu_arena_mode_names[opt_percpu_arena],
     const char *)
 CTL_RO_NL_GEN(opt_background_thread, opt_background_thread, bool)
+CTL_RO_NL_GEN(opt_max_background_threads, opt_max_background_threads, size_t)
 CTL_RO_NL_GEN(opt_dirty_decay_ms, opt_dirty_decay_ms, ssize_t)
 CTL_RO_NL_GEN(opt_muzzy_decay_ms, opt_muzzy_decay_ms, ssize_t)
 CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 4dde8fbc..912488d5 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1175,6 +1175,10 @@ malloc_conf_init(void) {
 			}
 			CONF_HANDLE_BOOL(opt_background_thread,
 			    "background_thread");
+			CONF_HANDLE_SIZE_T(opt_max_background_threads,
+					   "max_background_threads", 1,
+					   opt_max_background_threads, yes, yes,
+					   true);
 			if (config_prof) {
 				CONF_HANDLE_BOOL(opt_prof, "prof")
 				CONF_HANDLE_CHAR_P(opt_prof_prefix,
diff --git a/test/unit/background_thread_enable.c b/test/unit/background_thread_enable.c
index 7e4f6ed7..ff95e672 100644
--- a/test/unit/background_thread_enable.c
+++ b/test/unit/background_thread_enable.c
@@ -1,6 +1,6 @@
 #include "test/jemalloc_test.h"
 
-const char *malloc_conf = "background_thread:false,narenas:1";
+const char *malloc_conf = "background_thread:false,narenas:1,max_background_threads:20";
 
 TEST_BEGIN(test_deferred) {
 	test_skip_if(!have_background_thread);
@@ -30,8 +30,54 @@ TEST_BEGIN(test_deferred) {
 }
 TEST_END
 
+TEST_BEGIN(test_max_background_threads) {
+	test_skip_if(!have_background_thread);
+
+	size_t maxt;
+	size_t opt_maxt;
+	size_t sz_m = sizeof(maxt);
+	assert_d_eq(mallctl("opt.max_background_threads",
+			    &opt_maxt, &sz_m, NULL, 0), 0,
+			    "Failed to get opt.max_background_threads");
+	assert_d_eq(mallctl("max_background_threads", &maxt, &sz_m, NULL, 0), 0,
+		    "Failed to get max background threads");
+	assert_zu_eq(20, maxt, "should be ncpus");
+	assert_zu_eq(opt_maxt, maxt,
+		     "max_background_threads and "
+		     "opt.max_background_threads should match");
+	assert_d_eq(mallctl("max_background_threads", NULL, NULL, &maxt, sz_m),
+		    0, "Failed to set max background threads");
+
+	unsigned id;
+	size_t sz_u = sizeof(unsigned);
+
+	for (unsigned i = 0; i < 10 * ncpus; i++) {
+		assert_d_eq(mallctl("arenas.create", &id, &sz_u, NULL, 0), 0,
+		    "Failed to create arena");
+	}
+
+	bool enable = true;
+	size_t sz_b = sizeof(bool);
+	assert_d_eq(mallctl("background_thread", NULL, NULL, &enable, sz_b), 0,
+	    "Failed to enable background threads");
+	assert_zu_eq(n_background_threads, maxt,
+		     "Number of background threads should be 3.\n");
+	maxt = 10;
+	assert_d_eq(mallctl("max_background_threads", NULL, NULL, &maxt, sz_m),
+		    0, "Failed to set max background threads");
+	assert_zu_eq(n_background_threads, maxt,
+		     "Number of background threads should be 10.\n");
+	maxt = 3;
+	assert_d_eq(mallctl("max_background_threads", NULL, NULL, &maxt, sz_m),
+		    0, "Failed to set max background threads");
+	assert_zu_eq(n_background_threads, maxt,
+		     "Number of background threads should be 3.\n");
+}
+TEST_END
+
 int
 main(void) {
 	return test_no_reentrancy(
-	    test_deferred);
+		test_deferred,
+		test_max_background_threads);
 }

From 49373096206964c3d60c1deaa75dcab6e90b7f59 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 10 Apr 2018 15:18:58 -0700
Subject: [PATCH 1090/2608] Silence a compiler warning.

---
 src/arena.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 40ef143d..cbf27265 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -864,7 +864,7 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 
 	bool epoch_advanced = arena_maybe_decay(tsdn, arena, decay, extents,
 	    is_background_thread);
-	size_t npages_new;
+	UNUSED size_t npages_new;
 	if (epoch_advanced) {
 		/* Backlog is updated on epoch advance. */
 		npages_new = decay->backlog[SMOOTHSTEP_NSTEPS-1];
@@ -873,7 +873,8 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 
 	if (have_background_thread && background_thread_enabled() &&
 	    epoch_advanced && !is_background_thread) {
-		background_thread_interval_check(tsdn, arena, decay, npages_new);
+		background_thread_interval_check(tsdn, arena, decay,
+		    npages_new);
 	}
 
 	return false;

From cad27a894a2e043f3c1189201d6ff34a195dc658 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Tue, 10 Apr 2018 15:16:23 -0700
Subject: [PATCH 1091/2608] Fix a typo.

---
 ChangeLog | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ChangeLog b/ChangeLog
index 967d04d0..dd8f6ab1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -515,7 +515,7 @@ brevity.  Much more detail can be found in the git revision history:
   these fixes, xallocx() now tries harder to partially fulfill requests for
   optional extra space.  Note that a couple of minor heap profiling
   optimizations are included, but these are better thought of as performance
-  fixes that were integral to disovering most of the other bugs.
+  fixes that were integral to discovering most of the other bugs.
 
   Optimizations:
   - Avoid a chunk metadata read in arena_prof_tctx_set(), since it is in the

From f0b146acc4d48d1d829a8099aee7bc91267d8209 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 11 Apr 2018 10:21:13 -0700
Subject: [PATCH 1092/2608] Fix a typo.

---
 configure.ac | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 1e32f7fa..e45970ed 100644
--- a/configure.ac
+++ b/configure.ac
@@ -10,7 +10,7 @@ dnl Custom macro definitions.
 dnl JE_CONCAT_VVV(r, a, b)
 dnl
 dnl Set $r to the concatenation of $a and $b, with a space separating them iff
-dnl both $a and $b are non-emty.
+dnl both $a and $b are non-empty.
 AC_DEFUN([JE_CONCAT_VVV],
 if test "x[$]{$2}" = "x" -o "x[$]{$3}" = "x" ; then
   $1="[$]{$2}[$]{$3}"

From 02585420c34e08db1de4c26f3d5bc808d6910131 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 11 Apr 2018 12:09:48 -0700
Subject: [PATCH 1093/2608] Document liveness requirements for extent_hooks_t
 structures.

---
 doc/jemalloc.xml.in | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 2e7edc33..3ec43ce1 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1792,7 +1792,9 @@ struct extent_hooks_s {
         in favor of less permanent (and often less costly) operations.  All
         operations except allocation can be universally opted out of by setting
         the hook pointers to <constant>NULL</constant>, or selectively opted out
-        of by returning failure.</para>
+        of by returning failure.  Note that once the extent hook is set, the
+        structure is accessed directly by the associated arenas, so it must
+        remain valid for the entire lifetime of the arenas.</para>
 
         <funcsynopsis><funcprototype>
           <funcdef>typedef void *<function>(extent_alloc_t)</function></funcdef>

From 3f0dc64c6b8c1fd77c819028013dacbc6d2ad6b6 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 11 Apr 2018 15:15:26 -0700
Subject: [PATCH 1094/2608] Allow setting extent hooks on uninitialized auto
 arenas.

Setting extent hooks can result in initializing an unused auto arena.  This is
useful to install extent hooks on auto arenas from the beginning.
---
 src/ctl.c                 | 45 ++++++++++++++++++++++++++++-----------
 test/integration/extent.c | 38 +++++++++++++++++++++++++++++++++
 2 files changed, 71 insertions(+), 12 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index 02610cf0..86c2837a 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2251,20 +2251,41 @@ arena_i_extent_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	MIB_UNSIGNED(arena_ind, 1);
-	if (arena_ind < narenas_total_get() && (arena =
-	    arena_get(tsd_tsdn(tsd), arena_ind, false)) != NULL) {
-		if (newp != NULL) {
-			extent_hooks_t *old_extent_hooks;
-			extent_hooks_t *new_extent_hooks
-			    JEMALLOC_CC_SILENCE_INIT(NULL);
-			WRITE(new_extent_hooks, extent_hooks_t *);
-			old_extent_hooks = extent_hooks_set(tsd, arena,
-			    new_extent_hooks);
+	if (arena_ind < narenas_total_get()) {
+		extent_hooks_t *old_extent_hooks;
+		arena = arena_get(tsd_tsdn(tsd), arena_ind, false);
+		if (arena == NULL) {
+			if (arena_ind >= narenas_auto) {
+				ret = EFAULT;
+				goto label_return;
+			}
+			old_extent_hooks =
+			    (extent_hooks_t *)&extent_hooks_default;
 			READ(old_extent_hooks, extent_hooks_t *);
+			if (newp != NULL) {
+				/* Initialize a new arena as a side effect. */
+				extent_hooks_t *new_extent_hooks
+				    JEMALLOC_CC_SILENCE_INIT(NULL);
+				WRITE(new_extent_hooks, extent_hooks_t *);
+				arena = arena_init(tsd_tsdn(tsd), arena_ind,
+				    new_extent_hooks);
+				if (arena == NULL) {
+					ret = EFAULT;
+					goto label_return;
+				}
+			}
 		} else {
-			extent_hooks_t *old_extent_hooks =
-			    extent_hooks_get(arena);
-			READ(old_extent_hooks, extent_hooks_t *);
+			if (newp != NULL) {
+				extent_hooks_t *new_extent_hooks
+				    JEMALLOC_CC_SILENCE_INIT(NULL);
+				WRITE(new_extent_hooks, extent_hooks_t *);
+				old_extent_hooks = extent_hooks_set(tsd, arena,
+				    new_extent_hooks);
+				READ(old_extent_hooks, extent_hooks_t *);
+			} else {
+				old_extent_hooks = extent_hooks_get(arena);
+				READ(old_extent_hooks, extent_hooks_t *);
+			}
 		}
 	} else {
 		ret = EFAULT;
diff --git a/test/integration/extent.c b/test/integration/extent.c
index c2dc1cb8..b5db0876 100644
--- a/test/integration/extent.c
+++ b/test/integration/extent.c
@@ -98,6 +98,43 @@ test_extent_body(unsigned arena_ind) {
 	dallocx(p, flags);
 }
 
+static void
+test_manual_hook_auto_arena(void) {
+	unsigned narenas;
+	size_t old_size, new_size, sz;
+	size_t hooks_mib[3];
+	size_t hooks_miblen;
+	extent_hooks_t *new_hooks, *old_hooks;
+
+	extent_hooks_prep();
+
+	sz = sizeof(unsigned);
+	/* Get number of auto arenas. */
+	assert_d_eq(mallctl("opt.narenas", (void *)&narenas, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
+	if (narenas == 1) {
+		return;
+	}
+
+	/* Install custom extent hooks on arena 1 (might not be initialized). */
+	hooks_miblen = sizeof(hooks_mib)/sizeof(size_t);
+	assert_d_eq(mallctlnametomib("arena.0.extent_hooks", hooks_mib,
+	    &hooks_miblen), 0, "Unexpected mallctlnametomib() failure");
+	hooks_mib[1] = 1;
+	old_size = sizeof(extent_hooks_t *);
+	new_hooks = &hooks;
+	new_size = sizeof(extent_hooks_t *);
+	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks,
+	    &old_size, (void *)&new_hooks, new_size), 0,
+	    "Unexpected extent_hooks error");
+	static bool auto_arena_created = false;
+	if (old_hooks != &hooks) {
+		assert_b_eq(auto_arena_created, false,
+		    "Expected auto arena 1 created only once.");
+		auto_arena_created = true;
+	}
+}
+
 static void
 test_manual_hook_body(void) {
 	unsigned arena_ind;
@@ -169,6 +206,7 @@ test_manual_hook_body(void) {
 }
 
 TEST_BEGIN(test_extent_manual_hook) {
+	test_manual_hook_auto_arena();
 	test_manual_hook_body();
 
 	/* Test failure paths. */

From 2a80d6f15b18de2ef17b310e995af366cc20034c Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 16 Apr 2018 09:07:23 -0700
Subject: [PATCH 1095/2608] Avoid a printf format specifier warning.

This dodges a warning emitted by the FreeBSD system gcc when compiling
libc for architectures which don't use clang as the system compiler.
---
 include/jemalloc/internal/emitter.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/emitter.h b/include/jemalloc/internal/emitter.h
index 830d0f24..3a2b2f7f 100644
--- a/include/jemalloc/internal/emitter.h
+++ b/include/jemalloc/internal/emitter.h
@@ -247,7 +247,7 @@ emitter_begin(emitter_t *emitter) {
 		emitter_nest_inc(emitter);
 	} else {
 		// tabular init
-		emitter_printf(emitter, "");
+		emitter_printf(emitter, "%s", "");
 	}
 }
 

From 0fadf4a2e3e629b9fa43888f9754aea5327d038f Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 16 Apr 2018 12:08:27 -0700
Subject: [PATCH 1096/2608] Add UNUSED to avoid compiler warnings.

---
 include/jemalloc/internal/arena_inlines_b.h |  4 +--
 include/jemalloc/internal/arena_stats.h     |  2 +-
 include/jemalloc/internal/extent_inlines.h  |  2 +-
 include/jemalloc/internal/rtree.h           | 24 +++++++--------
 include/jemalloc/internal/tcache_inlines.h  |  4 +--
 include/jemalloc/internal/tsd_tls.h         |  2 +-
 src/arena.c                                 | 33 ++++++++++-----------
 src/base.c                                  | 14 ++++-----
 src/jemalloc.c                              |  2 +-
 9 files changed, 43 insertions(+), 44 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 7b10d9ef..2b7e77e7 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -28,7 +28,7 @@ arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
+arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, UNUSED size_t usize,
     alloc_ctx_t *alloc_ctx, prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
@@ -47,7 +47,7 @@ arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
 }
 
 static inline void
-arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) {
+arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, UNUSED prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 837d4eb6..5f3dca8b 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -95,7 +95,7 @@ struct arena_stats_s {
 };
 
 static inline bool
-arena_stats_init(tsdn_t *tsdn, arena_stats_t *arena_stats) {
+arena_stats_init(UNUSED tsdn_t *tsdn, arena_stats_t *arena_stats) {
 	if (config_debug) {
 		for (size_t i = 0; i < sizeof(arena_stats_t); i++) {
 			assert(((char *)arena_stats)[i] == 0);
diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index 9b8ddc27..77181df8 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -190,7 +190,7 @@ extent_addr_set(extent_t *extent, void *addr) {
 }
 
 static inline void
-extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment) {
+extent_addr_randomize(UNUSED tsdn_t *tsdn, extent_t *extent, size_t alignment) {
 	assert(extent_base_get(extent) == extent_addr_get(extent));
 
 	if (alignment < PAGE) {
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 8ff20d72..b59d33a8 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -208,8 +208,8 @@ rtree_leaf_elm_bits_slab_get(uintptr_t bits) {
 #  endif
 
 JEMALLOC_ALWAYS_INLINE extent_t *
-rtree_leaf_elm_extent_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
-    bool dependent) {
+rtree_leaf_elm_extent_read(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
+    rtree_leaf_elm_t *elm, bool dependent) {
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
 	return rtree_leaf_elm_bits_extent_get(bits);
@@ -221,8 +221,8 @@ rtree_leaf_elm_extent_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 }
 
 JEMALLOC_ALWAYS_INLINE szind_t
-rtree_leaf_elm_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
-    bool dependent) {
+rtree_leaf_elm_szind_read(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
+    rtree_leaf_elm_t *elm, bool dependent) {
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
 	return rtree_leaf_elm_bits_szind_get(bits);
@@ -233,8 +233,8 @@ rtree_leaf_elm_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 }
 
 JEMALLOC_ALWAYS_INLINE bool
-rtree_leaf_elm_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
-    bool dependent) {
+rtree_leaf_elm_slab_read(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
+    rtree_leaf_elm_t *elm, bool dependent) {
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
 	return rtree_leaf_elm_bits_slab_get(bits);
@@ -245,8 +245,8 @@ rtree_leaf_elm_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 }
 
 static inline void
-rtree_leaf_elm_extent_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
-    extent_t *extent) {
+rtree_leaf_elm_extent_write(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
+    rtree_leaf_elm_t *elm, extent_t *extent) {
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, true);
 	uintptr_t bits = ((uintptr_t)rtree_leaf_elm_bits_szind_get(old_bits) <<
@@ -259,8 +259,8 @@ rtree_leaf_elm_extent_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 }
 
 static inline void
-rtree_leaf_elm_szind_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
-    szind_t szind) {
+rtree_leaf_elm_szind_write(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
+    rtree_leaf_elm_t *elm, szind_t szind) {
 	assert(szind <= NSIZES);
 
 #ifdef RTREE_LEAF_COMPACT
@@ -277,8 +277,8 @@ rtree_leaf_elm_szind_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 }
 
 static inline void
-rtree_leaf_elm_slab_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
-     bool slab) {
+rtree_leaf_elm_slab_write(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
+    rtree_leaf_elm_t *elm, bool slab) {
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm,
 	    true);
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 0a6feb59..0f6ab8cb 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -39,8 +39,8 @@ tcache_event(tsd_t *tsd, tcache_t *tcache) {
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
-    szind_t binind, bool zero, bool slow_path) {
+tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
+    UNUSED size_t size, szind_t binind, bool zero, bool slow_path) {
 	void *ret;
 	cache_bin_t *bin;
 	bool tcache_success;
diff --git a/include/jemalloc/internal/tsd_tls.h b/include/jemalloc/internal/tsd_tls.h
index 757aaa0e..0de64b7b 100644
--- a/include/jemalloc/internal/tsd_tls.h
+++ b/include/jemalloc/internal/tsd_tls.h
@@ -39,7 +39,7 @@ tsd_get_allocates(void) {
 
 /* Get/set. */
 JEMALLOC_ALWAYS_INLINE tsd_t *
-tsd_get(bool init) {
+tsd_get(UNUSED bool init) {
 	assert(tsd_booted);
 	return &tsd_tls;
 }
diff --git a/src/arena.c b/src/arena.c
index cbf27265..5d55bf1a 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -61,7 +61,7 @@ static void arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
 /******************************************************************************/
 
 void
-arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
+arena_basic_stats_merge(UNUSED tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy) {
 	*nthreads += arena_nthreads_get(arena, false);
@@ -221,7 +221,7 @@ arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
 }
 
 static void *
-arena_slab_reg_alloc(tsdn_t *tsdn, extent_t *slab, const bin_info_t *bin_info) {
+arena_slab_reg_alloc(extent_t *slab, const bin_info_t *bin_info) {
 	void *ret;
 	arena_slab_data_t *slab_data = extent_slab_data_get(slab);
 	size_t regind;
@@ -261,8 +261,7 @@ arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr) {
 }
 
 static void
-arena_slab_reg_dalloc(tsdn_t *tsdn, extent_t *slab,
-    arena_slab_data_t *slab_data, void *ptr) {
+arena_slab_reg_dalloc(extent_t *slab, arena_slab_data_t *slab_data, void *ptr) {
 	szind_t binind = extent_szind_get(slab);
 	const bin_info_t *bin_info = &bin_infos[binind];
 	size_t regind = arena_slab_regind(slab, binind, ptr);
@@ -561,7 +560,7 @@ arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 }
 
 static void
-arena_decay_reinit(arena_decay_t *decay, extents_t *extents, ssize_t decay_ms) {
+arena_decay_reinit(arena_decay_t *decay, ssize_t decay_ms) {
 	arena_decay_ms_write(decay, decay_ms);
 	if (decay_ms > 0) {
 		nstime_init(&decay->interval, (uint64_t)decay_ms *
@@ -578,7 +577,7 @@ arena_decay_reinit(arena_decay_t *decay, extents_t *extents, ssize_t decay_ms) {
 }
 
 static bool
-arena_decay_init(arena_decay_t *decay, extents_t *extents, ssize_t decay_ms,
+arena_decay_init(arena_decay_t *decay, ssize_t decay_ms,
     arena_stats_decay_t *stats) {
 	if (config_debug) {
 		for (size_t i = 0; i < sizeof(arena_decay_t); i++) {
@@ -591,7 +590,7 @@ arena_decay_init(arena_decay_t *decay, extents_t *extents, ssize_t decay_ms,
 		return true;
 	}
 	decay->purging = false;
-	arena_decay_reinit(decay, extents, decay_ms);
+	arena_decay_reinit(decay, decay_ms);
 	/* Memory is zeroed, so there is no need to clear stats. */
 	if (config_stats) {
 		decay->stats = stats;
@@ -700,7 +699,7 @@ arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	 * infrequent, either between the {-1, 0, >0} states, or a one-time
 	 * arbitrary change during initial arena configuration.
 	 */
-	arena_decay_reinit(decay, extents, decay_ms);
+	arena_decay_reinit(decay, decay_ms);
 	arena_maybe_decay(tsdn, arena, decay, extents, false);
 	malloc_mutex_unlock(tsdn, &decay->mtx);
 
@@ -1210,7 +1209,7 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 		 * bin lock in arena_bin_nonfull_slab_get().
 		 */
 		if (extent_nfree_get(bin->slabcur) > 0) {
-			void *ret = arena_slab_reg_alloc(tsdn, bin->slabcur,
+			void *ret = arena_slab_reg_alloc(bin->slabcur,
 			    bin_info);
 			if (slab != NULL) {
 				/*
@@ -1244,7 +1243,7 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 
 	assert(extent_nfree_get(bin->slabcur) > 0);
 
-	return arena_slab_reg_alloc(tsdn, slab, bin_info);
+	return arena_slab_reg_alloc(slab, bin_info);
 }
 
 void
@@ -1266,8 +1265,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 		void *ptr;
 		if ((slab = bin->slabcur) != NULL && extent_nfree_get(slab) >
 		    0) {
-			ptr = arena_slab_reg_alloc(tsdn, slab,
-			    &bin_infos[binind]);
+			ptr = arena_slab_reg_alloc(slab, &bin_infos[binind]);
 		} else {
 			ptr = arena_bin_malloc_hard(tsdn, arena, bin, binind);
 		}
@@ -1328,7 +1326,7 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
 
 	malloc_mutex_lock(tsdn, &bin->lock);
 	if ((slab = bin->slabcur) != NULL && extent_nfree_get(slab) > 0) {
-		ret = arena_slab_reg_alloc(tsdn, slab, &bin_infos[binind]);
+		ret = arena_slab_reg_alloc(slab, &bin_infos[binind]);
 	} else {
 		ret = arena_bin_malloc_hard(tsdn, arena, bin, binind);
 	}
@@ -1501,7 +1499,8 @@ arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
 }
 
 static void
-arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab, bin_t *bin) {
+arena_bin_lower_slab(UNUSED tsdn_t *tsdn, arena_t *arena, extent_t *slab,
+    bin_t *bin) {
 	assert(extent_nfree_get(slab) > 0);
 
 	/*
@@ -1538,7 +1537,7 @@ arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
 		arena_dalloc_junk_small(ptr, bin_info);
 	}
 
-	arena_slab_reg_dalloc(tsdn, slab, slab_data, ptr);
+	arena_slab_reg_dalloc(slab, slab_data, ptr);
 	unsigned nfree = extent_nfree_get(slab);
 	if (nfree == bin_info->nregs) {
 		arena_dissociate_bin_slab(arena, slab, bin);
@@ -1856,11 +1855,11 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		goto label_error;
 	}
 
-	if (arena_decay_init(&arena->decay_dirty, &arena->extents_dirty,
+	if (arena_decay_init(&arena->decay_dirty,
 	    arena_dirty_decay_ms_default_get(), &arena->stats.decay_dirty)) {
 		goto label_error;
 	}
-	if (arena_decay_init(&arena->decay_muzzy, &arena->extents_muzzy,
+	if (arena_decay_init(&arena->decay_muzzy,
 	    arena_muzzy_decay_ms_default_get(), &arena->stats.decay_muzzy)) {
 		goto label_error;
 	}
diff --git a/src/base.c b/src/base.c
index bb897a25..b0324b5d 100644
--- a/src/base.c
+++ b/src/base.c
@@ -195,8 +195,8 @@ base_extent_bump_alloc_helper(extent_t *extent, size_t *gap_size, size_t size,
 }
 
 static void
-base_extent_bump_alloc_post(tsdn_t *tsdn, base_t *base, extent_t *extent,
-    size_t gap_size, void *addr, size_t size) {
+base_extent_bump_alloc_post(base_t *base, extent_t *extent, size_t gap_size,
+    void *addr, size_t size) {
 	if (extent_bsize_get(extent) > 0) {
 		/*
 		 * Compute the index for the largest size class that does not
@@ -229,13 +229,13 @@ base_extent_bump_alloc_post(tsdn_t *tsdn, base_t *base, extent_t *extent,
 }
 
 static void *
-base_extent_bump_alloc(tsdn_t *tsdn, base_t *base, extent_t *extent,
-    size_t size, size_t alignment) {
+base_extent_bump_alloc(base_t *base, extent_t *extent, size_t size,
+    size_t alignment) {
 	void *ret;
 	size_t gap_size;
 
 	ret = base_extent_bump_alloc_helper(extent, &gap_size, size, alignment);
-	base_extent_bump_alloc_post(tsdn, base, extent, gap_size, ret, size);
+	base_extent_bump_alloc_post(base, extent, gap_size, ret, size);
 	return ret;
 }
 
@@ -386,7 +386,7 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		assert(base->resident <= base->mapped);
 		assert(base->n_thp << LG_HUGEPAGE <= base->mapped);
 	}
-	base_extent_bump_alloc_post(tsdn, base, &block->extent, gap_size, base,
+	base_extent_bump_alloc_post(base, &block->extent, gap_size, base,
 	    base_size);
 
 	return base;
@@ -443,7 +443,7 @@ base_alloc_impl(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment,
 		goto label_return;
 	}
 
-	ret = base_extent_bump_alloc(tsdn, base, extent, usize, alignment);
+	ret = base_extent_bump_alloc(base, extent, usize, alignment);
 	if (esn != NULL) {
 		*esn = extent_sn_get(extent);
 	}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 912488d5..7e1f57af 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2893,7 +2893,7 @@ label_not_resized:
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
 JEMALLOC_ATTR(pure)
-je_sallocx(const void *ptr, int flags) {
+je_sallocx(const void *ptr, UNUSED int flags) {
 	size_t usize;
 	tsdn_t *tsdn;
 

From e40b2f75bdfc830a9a53b2cad4fb7261d39cec93 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 17 Apr 2018 14:26:26 -0700
Subject: [PATCH 1097/2608] Fix abort_conf processing.

When abort_conf is set, make sure we always error out at the end of the options
processing loop.
---
 src/jemalloc.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 7e1f57af..f93c16fa 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -849,10 +849,8 @@ malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v,
     size_t vlen) {
 	malloc_printf("<jemalloc>: %s: %.*s:%.*s\n", msg, (int)klen, k,
 	    (int)vlen, v);
+	/* If abort_conf is set, error out after processing all options. */
 	had_conf_error = true;
-	if (opt_abort_conf) {
-		malloc_abort_invalid_conf();
-	}
 }
 
 static void
@@ -1052,9 +1050,6 @@ malloc_conf_init(void) {
 
 			CONF_HANDLE_BOOL(opt_abort, "abort")
 			CONF_HANDLE_BOOL(opt_abort_conf, "abort_conf")
-			if (opt_abort_conf && had_conf_error) {
-				malloc_abort_invalid_conf();
-			}
 			if (strncmp("metadata_thp", k, klen) == 0) {
 				int i;
 				bool match = false;
@@ -1243,6 +1238,9 @@ malloc_conf_init(void) {
 #undef CONF_HANDLE_SSIZE_T
 #undef CONF_HANDLE_CHAR_P
 		}
+		if (opt_abort_conf && had_conf_error) {
+			malloc_abort_invalid_conf();
+		}
 	}
 	atomic_store_b(&log_init_done, true, ATOMIC_RELEASE);
 }

From a62e42baebe09dc84aaff731faa6ff87fde6bc4e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 17 Apr 2018 12:52:22 -0700
Subject: [PATCH 1098/2608] Add the --disable-initial-exec-tls configure
 option.

Right now we always make our TLS use the initial-exec model if the compiler
supports it.  This change allows configure-time disabling of this setting, which
can be helpful when dynamically loading jemalloc is the only option.
---
 INSTALL.md   |  9 +++++++++
 configure.ac | 32 ++++++++++++++++++++++++++------
 2 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/INSTALL.md b/INSTALL.md
index dff7cebb..082310f2 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -265,6 +265,15 @@ any of the following arguments (not a definitive list) to 'configure':
     configuration, jemalloc will provide additional size classes that are not
     16-byte-aligned (24, 40, and 56).
 
+* `--disable-initial-exec-tls`
+
+    Disable the initial-exec TLS model for jemalloc's internal thread-local
+    storage (on those platforms that support explicit settings).  This can allow
+    jemalloc to be dynamically loaded after program starup (e.g. using dlopen).
+    Note that in this case, there will be two malloc implementations operating
+    in the same process, which will almost certainly result in confusing runtime
+    crashes if pointers leak from one implementation to the other.
+
 The following environment variables (not a definitive list) impact configure's
 behavior:
 
diff --git a/configure.ac b/configure.ac
index e45970ed..ba0b694b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -733,12 +733,9 @@ JE_COMPILABLE([tls_model attribute], [],
                foo = 0;],
               [je_cv_tls_model])
 JE_CFLAGS_RESTORE()
-if test "x${je_cv_tls_model}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_TLS_MODEL],
-            [__attribute__((tls_model("initial-exec")))])
-else
-  AC_DEFINE([JEMALLOC_TLS_MODEL], [ ])
-fi
+dnl (Setting of JEMALLOC_TLS_MODEL is done later, after we've checked for
+dnl --disable-initial-exec-tls)
+
 dnl Check for alloc_size attribute support.
 JE_CFLAGS_SAVE()
 JE_CFLAGS_ADD([-Werror])
@@ -1993,6 +1990,29 @@ if test "x${enable_zone_allocator}" = "x1" ; then
   AC_DEFINE([JEMALLOC_ZONE], [ ])
 fi
 
+dnl ============================================================================
+dnl Use initial-exec TLS by default.
+AC_ARG_ENABLE([initial-exec-tls],
+  [AS_HELP_STRING([--disable-initial-exec-tls],
+                  [Disable the initial-exec tls model])],
+[if test "x$enable_initial_exec_tls" = "xno" ; then
+  enable_initial_exec_tls="0"
+else
+  enable_initial_exec_tls="1"
+fi
+],
+[enable_initial_exec_tls="1"]
+)
+AC_SUBST([enable_initial_exec_tls])
+
+if test "x${je_cv_tls_model}" = "xyes" -a \
+       "x${enable_initial_exec_tls}" = "x1" ; then
+  AC_DEFINE([JEMALLOC_TLS_MODEL],
+            [__attribute__((tls_model("initial-exec")))])
+else
+  AC_DEFINE([JEMALLOC_TLS_MODEL], [ ])
+fi
+
 dnl ============================================================================
 dnl Enable background threads if possible.
 

From c95284df1ab77f233562d9bc826523cfaaf7f41e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 17 Apr 2018 13:16:42 -0700
Subject: [PATCH 1099/2608] Avoid a resource leak down extent split failure
 paths.

Previously, we would leak the extent and memory associated with a salvageable
portion of an extent that we were trying to split in three, in the case where
the first split attempt succeeded and the second failed.
---
 src/extent.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index 49b6d4b8..09d6d771 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1319,21 +1319,19 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 		 * cant_alloc case should not occur.
 		 */
 		assert(result == extent_split_interior_error);
+		if (to_salvage != NULL) {
+			if (config_prof) {
+				extent_gdump_add(tsdn, to_salvage);
+			}
+			extent_record(tsdn, arena, r_extent_hooks,
+			    &arena->extents_retained, to_salvage, true);
+		}
 		if (to_leak != NULL) {
 			extent_deregister_no_gdump_sub(tsdn, to_leak);
 			extents_leak(tsdn, arena, r_extent_hooks,
 			    &arena->extents_retained, to_leak, true);
-			goto label_err;
 		}
-		/*
-		 * Note: we don't handle the non-NULL to_salvage case at all.
-		 * This maintains the behavior that was present when the
-		 * refactor pulling extent_split_interior into a helper function
-		 * was added.  I think this is actually a bug (we leak both the
-		 * memory and the extent_t in that case), but since this code is
-		 * getting deleted very shortly (in a subsequent commit),
-		 * ensuring correctness down this path isn't worth the effort.
-		 */
+		goto label_err;
 	}
 
 	if (*commit && !extent_committed_get(extent)) {

From dedfeecc4e69545efb2974ae42589985ed420821 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 11 Apr 2018 11:54:11 -0700
Subject: [PATCH 1100/2608] Invoke dlsym() on demand.

If no lazy lock or background thread is enabled, avoid dlsym pthread_create on
boot.
---
 src/background_thread.c | 35 ++++++++++++++++++++++++-----------
 1 file changed, 24 insertions(+), 11 deletions(-)

diff --git a/src/background_thread.c b/src/background_thread.c
index d2aa2745..9656e196 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -804,6 +804,26 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
 #undef BILLION
 #undef BACKGROUND_THREAD_MIN_INTERVAL_NS
 
+static bool
+pthread_create_fptr_init(void) {
+	if (pthread_create_fptr != NULL) {
+		return false;
+	}
+	pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
+	if (pthread_create_fptr == NULL) {
+		can_enable_background_thread = false;
+		if (config_lazy_lock || opt_background_thread) {
+			malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
+			    "\"pthread_create\")\n");
+			abort();
+		}
+	} else {
+		can_enable_background_thread = true;
+	}
+
+	return false;
+}
+
 /*
  * When lazy lock is enabled, we need to make sure setting isthreaded before
  * taking any background_thread locks.  This is called early in ctl (instead of
@@ -814,6 +834,7 @@ void
 background_thread_ctl_init(tsdn_t *tsdn) {
 	malloc_mutex_assert_not_owner(tsdn, &background_thread_lock);
 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
+	pthread_create_fptr_init();
 	pthread_create_wrapper_init();
 #endif
 }
@@ -827,18 +848,10 @@ background_thread_boot0(void) {
 		    "supports pthread only\n");
 		return true;
 	}
-
 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
-	pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
-	if (pthread_create_fptr == NULL) {
-		can_enable_background_thread = false;
-		if (config_lazy_lock || opt_background_thread) {
-			malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
-			    "\"pthread_create\")\n");
-			abort();
-		}
-	} else {
-		can_enable_background_thread = true;
+	if ((config_lazy_lock || opt_background_thread) &&
+	    pthread_create_fptr_init()) {
+		return true;
 	}
 #endif
 	return false;

From b8f4c730eff28edee4b583ff5b6ee1fac0f26c27 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 18 Apr 2018 11:30:03 -0700
Subject: [PATCH 1101/2608] Remove an incorrect assertion.

Background threads are created without holding the global background_thread
lock, which mean paused state is possible (and fine).
---
 src/background_thread.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/background_thread.c b/src/background_thread.c
index 9656e196..3517a3bb 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -398,7 +398,10 @@ check_background_thread_creation(tsd_t *tsd, unsigned *n_created,
 		}
 		background_thread_info_t *info = &background_thread_info[i];
 		malloc_mutex_lock(tsdn, &info->mtx);
-		assert(info->state != background_thread_paused);
+		/*
+		 * In case of the background_thread_paused state because of
+		 * arena reset, delay the creation.
+		 */
 		bool create = (info->state == background_thread_started);
 		malloc_mutex_unlock(tsdn, &info->mtx);
 		if (!create) {

From 39b1b2049934be5be7e5b1b6f77ff31cd02398c5 Mon Sep 17 00:00:00 2001
From: Issam Maghni <concatime@users.noreply.github.com>
Date: Sat, 21 Apr 2018 17:53:03 -0400
Subject: [PATCH 1102/2608] Adding `install_lib_pc`

Related to https://github.com/jemalloc/jemalloc/issues/974
---
 INSTALL.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/INSTALL.md b/INSTALL.md
index 082310f2..a01a7c04 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -338,6 +338,7 @@ To install only parts of jemalloc, use the following targets:
     install_include
     install_lib_shared
     install_lib_static
+    install_lib_pc
     install_lib
     install_doc_html
     install_doc_man

From 6df90600a7e4df51b06efe2d47df211cba5935a7 Mon Sep 17 00:00:00 2001
From: Christoph Muellner <christophm30@gmail.com>
Date: Sat, 28 Apr 2018 20:47:45 +0200
Subject: [PATCH 1103/2608] aarch64: Add ILP32 support.

Instead of setting a fix value of 48 allowed VA bits,
we distiguish between LP64 and ILP32.

Testsuite result with LP64:
Test suite summary: pass: 13/13, skip: 0/13, fail: 0/13

Testsuit result with ILP32:
Test suite summary: pass: 13/13, skip: 0/13, fail: 0/13

Signed-off-by: Christoph Muellner <christoph.muellner@theobroma-systems.com>
Reviewed-by: Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
---
 configure.ac | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index ba0b694b..8d21d0ae 100644
--- a/configure.ac
+++ b/configure.ac
@@ -412,7 +412,13 @@ AC_DEFINE_UNQUOTED([CPU_SPINWAIT], [$CPU_SPINWAIT])
 case "${host_cpu}" in
   aarch64)
     AC_MSG_CHECKING([number of significant virtual address bits])
-    LG_VADDR=48
+    if test "x${ac_cv_sizeof_void_p}" = "x4" ; then
+      #aarch64 ILP32
+      LG_VADDR=32
+    else
+      #aarch64 LP64
+      LG_VADDR=48
+    fi
     AC_MSG_RESULT([$LG_VADDR])
     ;;
   x86_64)

From a32b7bd5676e669821d15d319f686c3add451f4b Mon Sep 17 00:00:00 2001
From: Latchesar Ionkov <lionkov@lanl.gov>
Date: Tue, 1 May 2018 12:31:09 -0600
Subject: [PATCH 1104/2608] Mallctl: Add arenas.lookup

Implement a new mallctl operation that allows looking up the arena a
region of memory belongs to.
---
 doc/jemalloc.xml.in |  9 +++++++++
 src/ctl.c           | 34 +++++++++++++++++++++++++++++++++-
 test/unit/mallctl.c | 17 +++++++++++++++++
 3 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 3ec43ce1..1e12fd3a 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -2130,6 +2130,15 @@ struct extent_hooks_s {
         and return the new arena index.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="arenas.lookup">
+        <term>
+          <mallctl>arenas.lookup</mallctl>
+          (<type>unsigned</type>, <type>void*</type>)
+          <literal>rw</literal>
+        </term>
+        <listitem><para>Index of the arena to which an allocation belongs to.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="prof.thread_active_init">
         <term>
           <mallctl>prof.thread_active_init</mallctl>
diff --git a/src/ctl.c b/src/ctl.c
index 86c2837a..1e713a3d 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -139,6 +139,7 @@ CTL_PROTO(arenas_nbins)
 CTL_PROTO(arenas_nhbins)
 CTL_PROTO(arenas_nlextents)
 CTL_PROTO(arenas_create)
+CTL_PROTO(arenas_lookup)
 CTL_PROTO(prof_thread_active_init)
 CTL_PROTO(prof_active)
 CTL_PROTO(prof_dump)
@@ -373,7 +374,8 @@ static const ctl_named_node_t arenas_node[] = {
 	{NAME("bin"),		CHILD(indexed, arenas_bin)},
 	{NAME("nlextents"),	CTL(arenas_nlextents)},
 	{NAME("lextent"),	CHILD(indexed, arenas_lextent)},
-	{NAME("create"),	CTL(arenas_create)}
+	{NAME("create"),	CTL(arenas_create)},
+	{NAME("lookup"),	CTL(arenas_lookup)}
 };
 
 static const ctl_named_node_t	prof_node[] = {
@@ -2471,6 +2473,36 @@ label_return:
 	return ret;
 }
 
+static int
+arenas_lookup_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+	unsigned arena_ind;
+	void *ptr;
+	extent_t *extent;
+	arena_t *arena;
+
+	ptr = NULL;
+	ret = EINVAL;
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
+	WRITE(ptr, void *);
+	extent = iealloc(tsd_tsdn(tsd), ptr);
+	if (extent == NULL)
+		goto label_return;
+
+	arena = extent_arena_get(extent);
+	if (arena == NULL)
+		goto label_return;
+
+	arena_ind = arena_ind_get(arena);
+	READ(arena_ind, unsigned);
+
+	ret = 0;
+label_return:
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
+	return ret;
+}
+
 /******************************************************************************/
 
 static int
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index f8b180b1..1ecbab08 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -738,6 +738,22 @@ TEST_BEGIN(test_arenas_create) {
 }
 TEST_END
 
+TEST_BEGIN(test_arenas_lookup) {
+	unsigned arena, arena1;
+	void *ptr;
+	size_t sz = sizeof(unsigned);
+
+	assert_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+	ptr = mallocx(42, MALLOCX_ARENA(arena) | MALLOCX_TCACHE_NONE);
+	assert_ptr_not_null(ptr, "Unexpected mallocx() failure");
+	assert_d_eq(mallctl("arenas.lookup", &arena1, &sz, &ptr, sizeof(ptr)),
+	    0, "Unexpected mallctl() failure");
+	assert_u_eq(arena, arena1, "Unexpected arena index");
+	dallocx(ptr, 0);
+}
+TEST_END
+
 TEST_BEGIN(test_stats_arenas) {
 #define TEST_STATS_ARENAS(t, name) do {					\
 	t name;								\
@@ -784,5 +800,6 @@ main(void) {
 	    test_arenas_bin_constants,
 	    test_arenas_lextent_constants,
 	    test_arenas_create,
+	    test_arenas_lookup,
 	    test_stats_arenas);
 }

From c5b72a92cc40a0f95e13cb3e3bb4fba0f7ef36c3 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 2 May 2018 14:35:31 -0700
Subject: [PATCH 1105/2608] Fix a typo in INSTALL.md.

---
 INSTALL.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/INSTALL.md b/INSTALL.md
index a01a7c04..c9532ddb 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -269,7 +269,7 @@ any of the following arguments (not a definitive list) to 'configure':
 
     Disable the initial-exec TLS model for jemalloc's internal thread-local
     storage (on those platforms that support explicit settings).  This can allow
-    jemalloc to be dynamically loaded after program starup (e.g. using dlopen).
+    jemalloc to be dynamically loaded after program startup (e.g. using dlopen).
     Note that in this case, there will be two malloc implementations operating
     in the same process, which will almost certainly result in confusing runtime
     crashes if pointers leak from one implementation to the other.

From 3bcaedeea285edcf6006cbd12b906bd3dc11a8ba Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 9 Apr 2018 14:35:51 -0700
Subject: [PATCH 1106/2608] Remove documentation for --disable-thp which was
 removed.

---
 INSTALL.md | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/INSTALL.md b/INSTALL.md
index c9532ddb..7837b2d9 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -157,11 +157,6 @@ any of the following arguments (not a definitive list) to 'configure':
     Statically link against the specified libunwind.a rather than dynamically
     linking with -lunwind.
 
-* `--disable-thp`
-
-    Disable transparent huge page (THP) integration.  This option can be useful
-    when cross compiling.
-
 * `--disable-fill`
 
     Disable support for junk/zero filling of memory.  See the "opt.junk" and

From 2e7af1af733144b58e4977f526f11d015d8457b0 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 12 Apr 2018 10:49:35 -0700
Subject: [PATCH 1107/2608] Add TUNING.md.

---
 TUNING.md | 129 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 129 insertions(+)
 create mode 100644 TUNING.md

diff --git a/TUNING.md b/TUNING.md
new file mode 100644
index 00000000..34fca05b
--- /dev/null
+++ b/TUNING.md
@@ -0,0 +1,129 @@
+This document summarizes the common approaches for performance fine tuning with
+jemalloc (as of 5.1.0).  The default configuration of jemalloc tends to work
+reasonably well in practice, and most applications should not have to tune any
+options. However, in order to cover a wide range of applications and avoid
+pathological cases, the default setting is sometimes kept conservative and
+suboptimal, even for many common workloads.  When jemalloc is properly tuned for
+a specific application / workload, it is common to improve system level metrics
+by a few percent, or make favorable trade-offs.
+
+
+## Notable runtime options for performance tuning
+
+Runtime options can be set via
+[malloc_conf](http://jemalloc.net/jemalloc.3.html#tuning).
+
+* [background_thread](http://jemalloc.net/jemalloc.3.html#background_thread)
+
+    Enabling jemalloc background threads generally improves the tail latency for
+    application threads, since unused memory purging is shifted to the dedicated
+    background threads.  In addition, unintended purging delay caused by
+    application inactivity is avoided with background threads.
+
+    Suggested: `background_thread:true` when jemalloc managed threads can be
+    allowed.
+
+* [metadata_thp](http://jemalloc.net/jemalloc.3.html#opt.metadata_thp)
+
+    Allowing jemalloc to utilize transparent huge pages for its internal
+    metadata usually reduces TLB misses significantly, especially for programs
+    with large memory footprint and frequent allocation / deallocation
+    activities.  Metadata memory usage may increase due to the use of huge
+    pages.
+
+    Suggested for allocation intensive programs: `metadata_thp:auto` or
+    `metadata_thp:always`, which is expected to improve CPU utilization at a
+    small memory cost.
+
+* [dirty_decay_ms](http://jemalloc.net/jemalloc.3.html#opt.dirty_decay_ms) and
+  [muzzy_decay_ms](http://jemalloc.net/jemalloc.3.html#opt.muzzy_decay_ms)
+
+    Decay time determines how fast jemalloc returns unused pages back to the
+    operating system, and therefore provides a fairly straightforward trade-off
+    between CPU and memory usage.  Shorter decay time purges unused pages faster
+    to reduces memory usage (usually at the cost of more CPU cycles spent on
+    purging), and vice versa.
+
+    Suggested: tune the values based on the desired trade-offs.
+
+* [narenas](http://jemalloc.net/jemalloc.3.html#opt.narenas)
+
+    By default jemalloc uses multiple arenas to reduce internal lock contention.
+    However high arena count may also increase overall memory fragmentation,
+    since arenas manage memory independently.  When high degree of parallelism
+    is not expected at the allocator level, lower number of arenas often
+    improves memory usage.
+
+    Suggested: if low parallelism is expected, try lower arena count while
+    monitoring CPU and memory usage.
+
+* [percpu_arena](http://jemalloc.net/jemalloc.3.html#opt.percpu_arena)
+
+    Enable dynamic thread to arena association based on running CPU.  This has
+    the potential to improve locality, e.g. when thread to CPU affinity is
+    present.
+    
+    Suggested: try `percpu_arena:percpu` or `percpu_arena:phycpu` if
+    thread migration between processors is expected to be infrequent.
+
+Examples:
+
+* High resource consumption application, prioritizing CPU utilization:
+
+    `background_thread:true,metadata_thp:auto` combined with relaxed decay time
+    (increased `dirty_decay_ms` and / or `muzzy_decay_ms`,
+    e.g. `dirty_decay_ms:30000,muzzy_decay_ms:30000`).
+
+* High resource consumption application, prioritizing memory usage:
+
+    `background_thread:true` combined with shorter decay time (decreased
+    `dirty_decay_ms` and / or `muzzy_decay_ms`,
+    e.g. `dirty_decay_ms:5000,muzzy_decay_ms:5000`), and lower arena count
+    (e.g. number of CPUs).
+
+* Low resource consumption application:
+
+    `narenas:1,lg_tcache_max:13` combined with shorter decay time (decreased
+    `dirty_decay_ms` and / or `muzzy_decay_ms`,e.g.
+    `dirty_decay_ms:1000,muzzy_decay_ms:0`).
+
+* Extremely conservative -- minimize memory usage at all costs, only suitable when
+allocation activity is very rare:
+
+    `narenas:1,tcache:false,dirty_decay_ms:0,muzzy_decay_ms:0`
+
+Note that it is recommended to combine the options with `abort_conf:true` which
+aborts immediately on illegal options.
+
+## Beyond runtime options
+
+In addition to the runtime options, there are a number of programmatic ways to
+improve application performance with jemalloc.
+
+* [Explicit arenas](http://jemalloc.net/jemalloc.3.html#arenas.create)
+
+    Manually created arenas can help performance in various ways, e.g. by
+    managing locality and contention for specific usages.  For example,
+    applications can explicitly allocate frequently accessed objects from a
+    dedicated arena with
+    [mallocx()](http://jemalloc.net/jemalloc.3.html#MALLOCX_ARENA) to improve
+    locality.  In addition, explicit arenas often benefit from individually
+    tuned options, e.g. relaxed [decay
+    time](http://jemalloc.net/jemalloc.3.html#arena.i.dirty_decay_ms) if
+    frequent reuse is expected.
+
+* [Extent hooks](http://jemalloc.net/jemalloc.3.html#arena.i.extent_hooks)
+
+    Extent hooks allow customization for managing underlying memory.  One use
+    case for performance purpose is to utilize huge pages -- for example,
+    [HHVM](https://github.com/facebook/hhvm/blob/master/hphp/util/alloc.cpp)
+    uses explicit arenas with customized extent hooks to manage 1GB huge pages
+    for frequently accessed data, which reduces TLB misses significantly.
+
+* [Explicit thread-to-arena
+  binding](http://jemalloc.net/jemalloc.3.html#thread.arena)
+
+    It is common for some threads in an application to have different memory
+    access / allocation patterns.  Threads with heavy workloads often benefit
+    from explicit binding, e.g. binding very active threads to dedicated arenas
+    may reduce contention at the allocator level.

From 95789a24fab056e7a1ddc66e2366b1ec88aa2bcd Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 3 May 2018 15:26:14 -0700
Subject: [PATCH 1108/2608] Update copyright dates.

---
 COPYING | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/COPYING b/COPYING
index e308632a..98458d97 100644
--- a/COPYING
+++ b/COPYING
@@ -1,10 +1,10 @@
 Unless otherwise specified, files in the jemalloc source distribution are
 subject to the following license:
 --------------------------------------------------------------------------------
-Copyright (C) 2002-2017 Jason Evans <jasone@canonware.com>.
+Copyright (C) 2002-2018 Jason Evans <jasone@canonware.com>.
 All rights reserved.
 Copyright (C) 2007-2012 Mozilla Foundation.  All rights reserved.
-Copyright (C) 2009-2017 Facebook, Inc.  All rights reserved.
+Copyright (C) 2009-2018 Facebook, Inc.  All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:

From 63712b4c4e046e9d91807d0e1b5c890c52925379 Mon Sep 17 00:00:00 2001
From: Christoph Muellner <christoph.muellner@theobroma-systems.com>
Date: Thu, 3 May 2018 16:52:03 +0200
Subject: [PATCH 1109/2608] configure: Add --with-lg-vaddr configure option.

This patch allows to override the lg-vaddr values, which
are defined by the build machine's CPUID information (x86_64)
or default values (other architectures like aarch64).

Signed-off-by: Christoph Muellner <christoph.muellner@theobroma-systems.com>
---
 INSTALL.md   |  8 +++++++
 configure.ac | 66 ++++++++++++++++++++++++++++++----------------------
 2 files changed, 46 insertions(+), 28 deletions(-)

diff --git a/INSTALL.md b/INSTALL.md
index 7837b2d9..cd05ccba 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -260,6 +260,14 @@ any of the following arguments (not a definitive list) to 'configure':
     configuration, jemalloc will provide additional size classes that are not
     16-byte-aligned (24, 40, and 56).
 
+* `--with-lg-vaddr=<lg-vaddr>`
+
+    Specify the number of significant virtual address bits. jemalloc uses
+    pointer tagging if the pointer size is bigger than the required size for
+    virtual addresses. By default the configure script determines this via CPUID
+    information on x86_64 and uses default values for other architectures. This
+    option may be useful when cross compiling.
+
 * `--disable-initial-exec-tls`
 
     Disable the initial-exec TLS model for jemalloc's internal thread-local
diff --git a/configure.ac b/configure.ac
index 8d21d0ae..a6a08db0 100644
--- a/configure.ac
+++ b/configure.ac
@@ -409,22 +409,29 @@ esac
 AC_DEFINE_UNQUOTED([HAVE_CPU_SPINWAIT], [$HAVE_CPU_SPINWAIT])
 AC_DEFINE_UNQUOTED([CPU_SPINWAIT], [$CPU_SPINWAIT])
 
+AC_ARG_WITH([lg_vaddr],
+  [AS_HELP_STRING([--with-lg-vaddr=<lg-vaddr>], [Number of significant virtual address bits])],
+  [LG_VADDR="$with_lg_vaddr"], [LG_VADDR="detect"])
+
 case "${host_cpu}" in
   aarch64)
-    AC_MSG_CHECKING([number of significant virtual address bits])
-    if test "x${ac_cv_sizeof_void_p}" = "x4" ; then
-      #aarch64 ILP32
-      LG_VADDR=32
-    else
-      #aarch64 LP64
-      LG_VADDR=48
+    if test "x$LG_VADDR" = "xdetect"; then
+      AC_MSG_CHECKING([number of significant virtual address bits])
+      if test "x${LG_SIZEOF_PTR}" = "x2" ; then
+        #aarch64 ILP32
+        LG_VADDR=32
+      else
+        #aarch64 LP64
+        LG_VADDR=48
+      fi
+      AC_MSG_RESULT([$LG_VADDR])
     fi
-    AC_MSG_RESULT([$LG_VADDR])
     ;;
   x86_64)
-    AC_CACHE_CHECK([number of significant virtual address bits],
-                   [je_cv_lg_vaddr],
-                   AC_RUN_IFELSE([AC_LANG_PROGRAM(
+    if test "x$LG_VADDR" = "xdetect"; then
+      AC_CACHE_CHECK([number of significant virtual address bits],
+                     [je_cv_lg_vaddr],
+                     AC_RUN_IFELSE([AC_LANG_PROGRAM(
 [[
 #include <stdio.h>
 #ifdef _WIN32
@@ -461,27 +468,30 @@ typedef unsigned __int32 uint32_t;
                    [je_cv_lg_vaddr=`cat conftest.out`],
                    [je_cv_lg_vaddr=error],
                    [je_cv_lg_vaddr=57]))
-    if test "x${je_cv_lg_vaddr}" != "x" ; then
-      LG_VADDR="${je_cv_lg_vaddr}"
-    fi
-    if test "x${LG_VADDR}" != "xerror" ; then
-      AC_DEFINE_UNQUOTED([LG_VADDR], [$LG_VADDR])
-    else
-      AC_MSG_ERROR([cannot determine number of significant virtual address bits])
+      if test "x${je_cv_lg_vaddr}" != "x" ; then
+        LG_VADDR="${je_cv_lg_vaddr}"
+      fi
+      if test "x${LG_VADDR}" != "xerror" ; then
+        AC_DEFINE_UNQUOTED([LG_VADDR], [$LG_VADDR])
+      else
+        AC_MSG_ERROR([cannot determine number of significant virtual address bits])
+      fi
     fi
     ;;
   *)
-    AC_MSG_CHECKING([number of significant virtual address bits])
-    if test "x${LG_SIZEOF_PTR}" = "x3" ; then
-      LG_VADDR=64
-    elif test "x${LG_SIZEOF_PTR}" = "x2" ; then
-      LG_VADDR=32
-    elif test "x${LG_SIZEOF_PTR}" = "xLG_SIZEOF_PTR_WIN" ; then
-      LG_VADDR="(1U << (LG_SIZEOF_PTR_WIN+3))"
-    else
-      AC_MSG_ERROR([Unsupported lg(pointer size): ${LG_SIZEOF_PTR}])
+    if test "x$LG_VADDR" = "xdetect"; then
+      AC_MSG_CHECKING([number of significant virtual address bits])
+      if test "x${LG_SIZEOF_PTR}" = "x3" ; then
+        LG_VADDR=64
+      elif test "x${LG_SIZEOF_PTR}" = "x2" ; then
+        LG_VADDR=32
+      elif test "x${LG_SIZEOF_PTR}" = "xLG_SIZEOF_PTR_WIN" ; then
+        LG_VADDR="(1U << (LG_SIZEOF_PTR_WIN+3))"
+      else
+        AC_MSG_ERROR([Unsupported lg(pointer size): ${LG_SIZEOF_PTR}])
+      fi
+      AC_MSG_RESULT([$LG_VADDR])
     fi
-    AC_MSG_RESULT([$LG_VADDR])
     ;;
 esac
 AC_DEFINE_UNQUOTED([LG_VADDR], [$LG_VADDR])

From b001e6e7407cd7e07bad533445eee7f0224cb268 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 4 May 2018 10:44:17 -0700
Subject: [PATCH 1110/2608] INSTALL.md: Clarify --with-lg-vaddr.

The current wording can be taken to imply that we return tagged pointers to the
user, or otherwise rely on architectural support for them.
---
 INSTALL.md | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/INSTALL.md b/INSTALL.md
index cd05ccba..ef328c60 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -262,11 +262,10 @@ any of the following arguments (not a definitive list) to 'configure':
 
 * `--with-lg-vaddr=<lg-vaddr>`
 
-    Specify the number of significant virtual address bits. jemalloc uses
-    pointer tagging if the pointer size is bigger than the required size for
-    virtual addresses. By default the configure script determines this via CPUID
-    information on x86_64 and uses default values for other architectures. This
-    option may be useful when cross compiling.
+    Specify the number of significant virtual address bits.  By default, the
+    configure script attempts to detect virtual address size on those platforms
+    where it knows how, and picks a default otherwise.  This option may be
+    useful when cross-compiling.
 
 * `--disable-initial-exec-tls`
 

From 4c8829e6924ee7abae6f41ca57303a88dd6f1315 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 4 May 2018 11:46:01 -0700
Subject: [PATCH 1111/2608] run_tests.sh: Test --with-lg-vaddr.

---
 scripts/gen_run_tests.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/scripts/gen_run_tests.py b/scripts/gen_run_tests.py
index bf19c2c9..f1988fdc 100755
--- a/scripts/gen_run_tests.py
+++ b/scripts/gen_run_tests.py
@@ -1,9 +1,14 @@
 #!/usr/bin/env python
 
+import sys
 from itertools import combinations
 from os import uname
 from multiprocessing import cpu_count
 
+# Later, we want to test extended vaddr support.  Apparently, the "real" way of
+# checking this is flaky on OS X.
+bits_64 = sys.maxsize > 2**32
+
 nparallel = cpu_count() * 2
 
 uname = uname()[0]
@@ -23,6 +28,9 @@ possible_config_opts = [
     '--enable-prof',
     '--disable-stats',
 ]
+if bits_64:
+    possible_config_opts.append('--with-lg-vaddr=56')
+
 possible_malloc_conf_opts = [
     'tcache:false',
     'dss:primary',

From b73380bee0abde8e74f43d19d099cc151f51eb58 Mon Sep 17 00:00:00 2001
From: Christoph Muellner <christophm30@gmail.com>
Date: Sat, 5 May 2018 14:50:30 +0200
Subject: [PATCH 1112/2608] Fix include path order for out-of-tree builds.

When configuring out-of-tree (source directory is not build directory),
the generated include files from the build directory should have higher
priority than those in the source dir.

This is especially helpful when cross-compiling.

Signed-off-by: Christoph Muellner <christoph.muellner@theobroma-systems.com>
---
 Makefile.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile.in b/Makefile.in
index e229196c..9b9347ff 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -24,7 +24,7 @@ abs_srcroot := @abs_srcroot@
 abs_objroot := @abs_objroot@
 
 # Build parameters.
-CPPFLAGS := @CPPFLAGS@ -I$(srcroot)include -I$(objroot)include
+CPPFLAGS := @CPPFLAGS@ -I$(objroot)include -I$(srcroot)include
 CONFIGURE_CFLAGS := @CONFIGURE_CFLAGS@
 SPECIFIED_CFLAGS := @SPECIFIED_CFLAGS@
 EXTRA_CFLAGS := @EXTRA_CFLAGS@

From a308af360ca8fccb31f9dcdb0654b0d4cf6f776c Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 7 May 2018 14:51:33 -0700
Subject: [PATCH 1113/2608] Reformat the version number in jemalloc.pc.in.

---
 jemalloc.pc.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/jemalloc.pc.in b/jemalloc.pc.in
index a318e8dd..c428a86d 100644
--- a/jemalloc.pc.in
+++ b/jemalloc.pc.in
@@ -7,6 +7,6 @@ install_suffix=@install_suffix@
 Name: jemalloc
 Description: A general purpose malloc(3) implementation that emphasizes fragmentation avoidance and scalable concurrency support.
 URL: http://jemalloc.net/
-Version: @jemalloc_version@
+Version: @jemalloc_version_major@.@jemalloc_version_minor@.@jemalloc_version_bugfix@_@jemalloc_version_nrev@
 Cflags: -I${includedir}
 Libs: -L${libdir} -ljemalloc${install_suffix}

From e94ca7f3e2b0ef393d713e7287b7f6b61645322b Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Mon, 7 May 2018 15:48:14 -0700
Subject: [PATCH 1114/2608] run_tests.sh: Don't test large vaddr with -m32.

---
 scripts/gen_run_tests.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/scripts/gen_run_tests.py b/scripts/gen_run_tests.py
index f1988fdc..a87ecffb 100755
--- a/scripts/gen_run_tests.py
+++ b/scripts/gen_run_tests.py
@@ -64,6 +64,11 @@ for cc, cxx in possible_compilers:
                     else '')
                 )
 
+                # We don't want to test large vaddr spaces in 32-bit mode.
+		if ('-m32' in compiler_opts and '--with-lg-vaddr=56' in
+                  config_opts):
+		    continue
+
                 # Per CPU arenas are only supported on Linux.
                 linux_supported = ('percpu_arena:percpu' in malloc_conf_opts \
                   or 'background_thread:true' in malloc_conf_opts)

From 1c51381b7cc62b6e0e77d02c42925c3776dbc4a2 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 2 May 2018 14:34:19 -0700
Subject: [PATCH 1115/2608] Update ChangeLog for 5.1.0.

---
 ChangeLog | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 117 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index dd8f6ab1..29a00fb7 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,123 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
+* 5.1.0 (May 4th, 2018)
+
+  This release is primarily about fine-tuning, ranging from several new features
+  to numerous notable performance and portability enhancements.  The release and
+  prior dev versions have been running in multiple large scale applications for
+  months, and the cumulative improvements are substantial in many cases.
+
+  Given the long and successful production runs, this release is likely a good
+  candidate for applications to upgrade, from both jemalloc 5.0 and before.  For
+  performance-critical applications, the newly added TUNING.md provides
+  guidelines on jemalloc tuning.
+
+  New features:
+  - Implement transparent huge page support for internal metadata.  (@interwq)
+  - Add opt.thp to allow enabling / disabling transparent huge pages for all
+    mappings.  (@interwq)
+  - Add maximum background thread count option.  (@djwatson)
+  - Allow prof_active to control opt.lg_prof_interval and prof.gdump.
+    (@interwq)
+  - Allow arena index lookup based on allocation addresses via mallctl.
+    (@lionkov)
+  - Allow disabling initial-exec TLS model.  (@davidtgoldblatt, @KenMacD)
+  - Add opt.lg_extent_max_active_fit to set the max ratio between the size of
+    the active extent selected (to split off from) and the size of the requested
+    allocation.  (@interwq, @davidtgoldblatt)
+  - Add retain_grow_limit to set the max size when growing virtual address
+    space.  (@interwq)
+  - Add mallctl interfaces:
+    + arena.<i>.retain_grow_limit  (@interwq)
+    + arenas.lookup  (@lionkov)
+    + max_background_threads  (@djwatson)
+    + opt.lg_extent_max_active_fit  (@interwq)
+    + opt.max_background_threads  (@djwatson)
+    + opt.metadata_thp  (@interwq)
+    + opt.thp  (@interwq)
+    + stats.metadata_thp  (@interwq)
+
+  Portability improvements:
+  - Support GNU/kFreeBSD configuration.  (@paravoid)
+  - Support m68k, nios2 and SH3 architectures.  (@paravoid)
+  - Fall back to FD_CLOEXEC when O_CLOEXEC is unavailable.  (@zonyitoo)
+  - Fix symbol listing for cross-compiling.  (@tamird)
+  - Fix high bits computation on ARM.  (@davidtgoldblatt, @paravoid)
+  - Disable the CPU_SPINWAIT macro for Power.  (@davidtgoldblatt, @marxin)
+  - Fix MSVC 2015 & 2017 builds.  (@rustyx)
+  - Improve RISC-V support.  (@EdSchouten)
+  - Set name mangling script in strict mode.  (@nicolov)
+  - Avoid MADV_HUGEPAGE on ARM.  (@marxin)
+  - Modify configure to determine return value of strerror_r.
+    (@davidtgoldblatt, @cferris1000)
+  - Make sure CXXFLAGS is tested with CPP compiler.  (@nehaljwani)
+  - Fix 32-bit build on MSVC.  (@rustyx)
+  - Fix external symbol on MSVC.  (@maksqwe)
+  - Avoid a printf format specifier warning.  (@jasone)
+  - Add configure option --disable-initial-exec-tls which can allow jemalloc to
+    be dynamically loaded after program startup.  (@davidtgoldblatt, @KenMacD)
+  - AArch64: Add ILP32 support.  (@cmuellner)
+  - Add --with-lg-vaddr configure option to support cross compiling.
+    (@cmuellner, @davidtgoldblatt)
+
+  Optimizations and refactors:
+  - Improve active extent fit with extent_max_active_fit.  This considerably
+    reduces fragmentation over time and improves virtual memory and metadata
+    usage.  (@davidtgoldblatt, @interwq)
+  - Eagerly coalesce large extents to reduce fragmentation.  (@interwq)
+  - sdallocx: only read size info when page aligned (i.e. possibly sampled),
+    which speeds up the sized deallocation path significantly.  (@interwq)
+  - Avoid attempting new mappings for in place expansion with retain, since
+    it rarely succeeds in practice and causes high overhead.  (@interwq)
+  - Refactor OOM handling in newImpl.  (@wqfish)
+  - Add internal fine-grained logging functionality for debugging use.
+    (@davidtgoldblatt)
+  - Refactor arena / tcache interactions.  (@davidtgoldblatt)
+  - Refactor extent management with dumpable flag.  (@davidtgoldblatt)
+  - Add runtime detection of lazy purging.  (@interwq)
+  - Use pairing heap instead of red-black tree for extents_avail.  (@djwatson)
+  - Use sysctl on startup in FreeBSD.  (@trasz)
+  - Use thread local prng state instead of atomic.  (@djwatson)
+  - Make decay to always purge one more extent than before, because in
+    practice large extents are usually the ones that cross the decay threshold.
+    Purging the additional extent helps save memory as well as reduce VM
+    fragmentation.  (@interwq)
+  - Fast division by dynamic values.  (@davidtgoldblatt)
+  - Improve the fit for aligned allocation.  (@interwq, @edwinsmith)
+  - Refactor extent_t bitpacking.  (@rkmisra)
+  - Optimize the generated assembly for ticker operations.  (@davidtgoldblatt)
+  - Convert stats printing to use a structured text emitter.  (@davidtgoldblatt)
+  - Remove preserve_lru feature for extents management.  (@djwatson)
+  - Consolidate two memory loads into one on the fast deallocation path.
+    (@davidtgoldblatt, @interwq)
+
+  Bug fixes (most of the issues are only relevant to jemalloc 5.0):
+  - Fix deadlock with multithreaded fork in OS X.  (@davidtgoldblatt)
+  - Validate returned file descriptor before use.  (@zonyitoo)
+  - Fix a few background thread initialization and shutdown issues.  (@interwq)
+  - Fix an extent coalesce + decay race by taking both coalescing extents off
+    the LRU list.  (@interwq)
+  - Fix potentially unbound increase during decay, caused by one thread keep
+    stashing memory to purge while other threads generating new pages.  The
+    number of pages to purge is checked to prevent this.  (@interwq)
+  - Fix a FreeBSD bootstrap assertion.  (@strejda, @interwq)
+  - Handle 32 bit mutex counters.  (@rkmisra)
+  - Fix a indexing bug when creating background threads.  (@davidtgoldblatt,
+    @binliu19)
+  - Fix arguments passed to extent_init.  (@yuleniwo, @interwq)
+  - Fix addresses used for ordering mutexes.  (@rkmisra)
+  - Fix abort_conf processing during bootstrap.  (@interwq)
+  - Fix include path order for out-of-tree builds.  (@cmuellner)
+
+  Incompatible changes:
+  - Remove --disable-thp.  (@interwq)
+  - Remove mallctl interfaces:
+    + config.thp  (@interwq)
+
+  Documentation:
+  - Add TUNING.md.  (@interwq, @davidtgoldblatt, @djwatson)
+
 * 5.0.1 (July 1, 2017)
 
   This bugfix release fixes several issues, most of which are obscure enough

From e8a63b87c36ac814272d73b503658431d2000055 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 9 May 2018 15:03:38 -0700
Subject: [PATCH 1116/2608] Fix an incorrect assertion.

When configured with --with-lg-page, it's possible for the configured page size
to be greater than the system page size, in which case the page address may only
be aligned with the system page size.
---
 src/pages.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pages.c b/src/pages.c
index 26002692..cc967fcf 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -261,7 +261,7 @@ pages_decommit(void *addr, size_t size) {
 
 bool
 pages_purge_lazy(void *addr, size_t size) {
-	assert(PAGE_ADDR2BASE(addr) == addr);
+	assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr);
 	assert(PAGE_CEILING(size) == size);
 
 	if (!pages_can_purge_lazy) {

From 312352faa89a39ff1e690d709d7d6f852f89d61d Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 11 May 2018 16:32:29 -0700
Subject: [PATCH 1117/2608] Fix background thread index issues with
 max_background_threads.

---
 include/jemalloc/internal/background_thread_inlines.h | 7 ++++++-
 src/ctl.c                                             | 6 ++----
 test/unit/arena_reset.c                               | 4 ++--
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/background_thread_inlines.h b/include/jemalloc/internal/background_thread_inlines.h
index ef50231e..f85e86fa 100644
--- a/include/jemalloc/internal/background_thread_inlines.h
+++ b/include/jemalloc/internal/background_thread_inlines.h
@@ -15,7 +15,12 @@ background_thread_enabled_set(tsdn_t *tsdn, bool state) {
 JEMALLOC_ALWAYS_INLINE background_thread_info_t *
 arena_background_thread_info_get(arena_t *arena) {
 	unsigned arena_ind = arena_ind_get(arena);
-	return &background_thread_info[arena_ind % ncpus];
+	return &background_thread_info[arena_ind % max_background_threads];
+}
+
+JEMALLOC_ALWAYS_INLINE background_thread_info_t *
+background_thread_info_get(size_t ind) {
+	return &background_thread_info[ind % max_background_threads];
 }
 
 JEMALLOC_ALWAYS_INLINE uint64_t
diff --git a/src/ctl.c b/src/ctl.c
index 1e713a3d..0eb8de13 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2044,9 +2044,8 @@ arena_reset_prepare_background_thread(tsd_t *tsd, unsigned arena_ind) {
 	if (have_background_thread) {
 		malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock);
 		if (background_thread_enabled()) {
-			unsigned ind = arena_ind % ncpus;
 			background_thread_info_t *info =
-			    &background_thread_info[ind];
+			    background_thread_info_get(arena_ind);
 			assert(info->state == background_thread_started);
 			malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
 			info->state = background_thread_paused;
@@ -2059,9 +2058,8 @@ static void
 arena_reset_finish_background_thread(tsd_t *tsd, unsigned arena_ind) {
 	if (have_background_thread) {
 		if (background_thread_enabled()) {
-			unsigned ind = arena_ind % ncpus;
 			background_thread_info_t *info =
-			    &background_thread_info[ind];
+			    background_thread_info_get(arena_ind);
 			assert(info->state == background_thread_paused);
 			malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
 			info->state = background_thread_started;
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index f5fb24d1..c1ccb097 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -142,7 +142,7 @@ do_arena_reset_post(void **ptrs, unsigned nptrs, unsigned arena_ind) {
 
 	if (have_background_thread) {
 		malloc_mutex_lock(tsdn,
-		    &background_thread_info[arena_ind % ncpus].mtx);
+		    &background_thread_info_get(arena_ind)->mtx);
 	}
 	/* Verify allocations no longer exist. */
 	for (i = 0; i < nptrs; i++) {
@@ -151,7 +151,7 @@ do_arena_reset_post(void **ptrs, unsigned nptrs, unsigned arena_ind) {
 	}
 	if (have_background_thread) {
 		malloc_mutex_unlock(tsdn,
-		    &background_thread_info[arena_ind % ncpus].mtx);
+		    &background_thread_info_get(arena_ind)->mtx);
 	}
 
 	free(ptrs);

From b293a3eb86a32b9c242ac39d88312c0a9d317b8b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 9 May 2018 16:17:37 -0700
Subject: [PATCH 1118/2608] Fix the max_background_thread test.

We may set number of background threads separately, e.g. through
--with-malloc-conf, so avoid assuming the default number in the test.
---
 test/unit/background_thread_enable.c | 52 +++++++++++++++-------------
 1 file changed, 27 insertions(+), 25 deletions(-)

diff --git a/test/unit/background_thread_enable.c b/test/unit/background_thread_enable.c
index ff95e672..d894e937 100644
--- a/test/unit/background_thread_enable.c
+++ b/test/unit/background_thread_enable.c
@@ -33,20 +33,19 @@ TEST_END
 TEST_BEGIN(test_max_background_threads) {
 	test_skip_if(!have_background_thread);
 
-	size_t maxt;
-	size_t opt_maxt;
-	size_t sz_m = sizeof(maxt);
+	size_t max_n_thds;
+	size_t opt_max_n_thds;
+	size_t sz_m = sizeof(max_n_thds);
 	assert_d_eq(mallctl("opt.max_background_threads",
-			    &opt_maxt, &sz_m, NULL, 0), 0,
-			    "Failed to get opt.max_background_threads");
-	assert_d_eq(mallctl("max_background_threads", &maxt, &sz_m, NULL, 0), 0,
-		    "Failed to get max background threads");
-	assert_zu_eq(20, maxt, "should be ncpus");
-	assert_zu_eq(opt_maxt, maxt,
-		     "max_background_threads and "
-		     "opt.max_background_threads should match");
-	assert_d_eq(mallctl("max_background_threads", NULL, NULL, &maxt, sz_m),
-		    0, "Failed to set max background threads");
+	    &opt_max_n_thds, &sz_m, NULL, 0), 0,
+	    "Failed to get opt.max_background_threads");
+	assert_d_eq(mallctl("max_background_threads", &max_n_thds, &sz_m, NULL,
+	    0), 0, "Failed to get max background threads");
+	assert_zu_eq(opt_max_n_thds, max_n_thds,
+	    "max_background_threads and "
+	    "opt.max_background_threads should match");
+	assert_d_eq(mallctl("max_background_threads", NULL, NULL, &max_n_thds,
+	    sz_m), 0, "Failed to set max background threads");
 
 	unsigned id;
 	size_t sz_u = sizeof(unsigned);
@@ -60,18 +59,21 @@ TEST_BEGIN(test_max_background_threads) {
 	size_t sz_b = sizeof(bool);
 	assert_d_eq(mallctl("background_thread", NULL, NULL, &enable, sz_b), 0,
 	    "Failed to enable background threads");
-	assert_zu_eq(n_background_threads, maxt,
-		     "Number of background threads should be 3.\n");
-	maxt = 10;
-	assert_d_eq(mallctl("max_background_threads", NULL, NULL, &maxt, sz_m),
-		    0, "Failed to set max background threads");
-	assert_zu_eq(n_background_threads, maxt,
-		     "Number of background threads should be 10.\n");
-	maxt = 3;
-	assert_d_eq(mallctl("max_background_threads", NULL, NULL, &maxt, sz_m),
-		    0, "Failed to set max background threads");
-	assert_zu_eq(n_background_threads, maxt,
-		     "Number of background threads should be 3.\n");
+	assert_zu_eq(n_background_threads, max_n_thds,
+	    "Number of background threads should not change.\n");
+	size_t new_max_thds = max_n_thds - 1;
+	if (new_max_thds > 0) {
+		assert_d_eq(mallctl("max_background_threads", NULL, NULL,
+		    &new_max_thds, sz_m), 0,
+		    "Failed to set max background threads");
+		assert_zu_eq(n_background_threads, new_max_thds,
+		    "Number of background threads should decrease by 1.\n");
+	}
+	new_max_thds = 1;
+	assert_d_eq(mallctl("max_background_threads", NULL, NULL, &new_max_thds,
+	    sz_m), 0, "Failed to set max background threads");
+	assert_zu_eq(n_background_threads, new_max_thds,
+	    "Number of background threads should be 1.\n");
 }
 TEST_END
 

From 09edea3f5c98dae3f298b7ac9f5adad13e528bc9 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 17 May 2018 10:53:54 -0700
Subject: [PATCH 1119/2608] Tweak the format of the per arena summary section.

Increase the width to ensure enough space for long running programs.
---
 src/stats.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index 08b9507c..7411745f 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -696,35 +696,35 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	emitter_col_t alloc_count_title;
 	emitter_col_init(&alloc_count_title, &alloc_count_row);
 	alloc_count_title.justify = emitter_justify_left;
-	alloc_count_title.width = 25;
+	alloc_count_title.width = 21;
 	alloc_count_title.type = emitter_type_title;
 	alloc_count_title.str_val = "";
 
 	emitter_col_t alloc_count_allocated;
 	emitter_col_init(&alloc_count_allocated, &alloc_count_row);
 	alloc_count_allocated.justify = emitter_justify_right;
-	alloc_count_allocated.width = 12;
+	alloc_count_allocated.width = 16;
 	alloc_count_allocated.type = emitter_type_title;
 	alloc_count_allocated.str_val = "allocated";
 
 	emitter_col_t alloc_count_nmalloc;
 	emitter_col_init(&alloc_count_nmalloc, &alloc_count_row);
 	alloc_count_nmalloc.justify = emitter_justify_right;
-	alloc_count_nmalloc.width = 12;
+	alloc_count_nmalloc.width = 16;
 	alloc_count_nmalloc.type = emitter_type_title;
 	alloc_count_nmalloc.str_val = "nmalloc";
 
 	emitter_col_t alloc_count_ndalloc;
 	emitter_col_init(&alloc_count_ndalloc, &alloc_count_row);
 	alloc_count_ndalloc.justify = emitter_justify_right;
-	alloc_count_ndalloc.width = 12;
+	alloc_count_ndalloc.width = 16;
 	alloc_count_ndalloc.type = emitter_type_title;
 	alloc_count_ndalloc.str_val = "ndalloc";
 
 	emitter_col_t alloc_count_nrequests;
 	emitter_col_init(&alloc_count_nrequests, &alloc_count_row);
 	alloc_count_nrequests.justify = emitter_justify_right;
-	alloc_count_nrequests.width = 12;
+	alloc_count_nrequests.width = 16;
 	alloc_count_nrequests.type = emitter_type_title;
 	alloc_count_nrequests.str_val = "nrequests";
 
@@ -776,14 +776,14 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	emitter_col_t mem_count_title;
 	emitter_col_init(&mem_count_title, &mem_count_row);
 	mem_count_title.justify = emitter_justify_left;
-	mem_count_title.width = 25;
+	mem_count_title.width = 21;
 	mem_count_title.type = emitter_type_title;
 	mem_count_title.str_val = "";
 
 	emitter_col_t mem_count_val;
 	emitter_col_init(&mem_count_val, &mem_count_row);
 	mem_count_val.justify = emitter_justify_right;
-	mem_count_val.width = 12;
+	mem_count_val.width = 16;
 	mem_count_val.type = emitter_type_title;
 	mem_count_val.str_val = "";
 

From e74a1a37c82fa3a44cee1002d9d8957bcc8274a7 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 8 Mar 2018 16:15:00 -0800
Subject: [PATCH 1120/2608] Atomics: Add atomic_u8_t, force-inline operations.

We're about to need an atomic uint8_t for state operations.

Unfortunately, we're at the point where things won't get inlined into the key
methods unless they're force-inlined.  This is embarassing and we should do
something about it, but in the meantime we'll force-inline a little more when we
need to.
---
 include/jemalloc/internal/atomic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h
index adadb1a3..a184e465 100644
--- a/include/jemalloc/internal/atomic.h
+++ b/include/jemalloc/internal/atomic.h
@@ -1,7 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_ATOMIC_H
 #define JEMALLOC_INTERNAL_ATOMIC_H
 
-#define ATOMIC_INLINE static inline
+#define ATOMIC_INLINE JEMALLOC_ALWAYS_INLINE
 
 #if defined(JEMALLOC_GCC_ATOMIC_ATOMICS)
 #  include "jemalloc/internal/atomic_gcc_atomic.h"

From 982c10de3566f38628770e57c62d1a6cdc5a09f9 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 8 Mar 2018 16:34:17 -0800
Subject: [PATCH 1121/2608] TSD: Make all state access happen through a
 function.

Shortly, tsd state will be atomic and have some complicated enough logic down
the state-setting path that we should be aware of it.
---
 include/jemalloc/internal/atomic.h            |  2 +
 .../internal/jemalloc_internal_inlines_a.h    |  2 +-
 include/jemalloc/internal/tsd.h               | 37 ++++++++++++-------
 src/tsd.c                                     | 30 +++++++--------
 test/unit/tsd.c                               |  4 +-
 5 files changed, 43 insertions(+), 32 deletions(-)

diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h
index a184e465..bb751cfc 100644
--- a/include/jemalloc/internal/atomic.h
+++ b/include/jemalloc/internal/atomic.h
@@ -66,6 +66,8 @@ JEMALLOC_GENERATE_INT_ATOMICS(size_t, zu, LG_SIZEOF_PTR)
 
 JEMALLOC_GENERATE_INT_ATOMICS(ssize_t, zd, LG_SIZEOF_PTR)
 
+JEMALLOC_GENERATE_INT_ATOMICS(uint8_t, u8, 0)
+
 JEMALLOC_GENERATE_INT_ATOMICS(uint32_t, u32, 2)
 
 #ifdef JEMALLOC_ATOMIC_U64
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index c6a1f7eb..6577a4f2 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -156,7 +156,7 @@ pre_reentrancy(tsd_t *tsd, arena_t *arena) {
 	if (fast) {
 		/* Prepare slow path for reentrancy. */
 		tsd_slow_update(tsd);
-		assert(tsd->state == tsd_state_nominal_slow);
+		assert(tsd_state_get(tsd) == tsd_state_nominal_slow);
 	}
 }
 
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 0b9841aa..aa64d937 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -107,9 +107,6 @@ enum {
 	tsd_state_uninitialized = 5
 };
 
-/* Manually limit tsd_state_t to a single byte. */
-typedef uint8_t tsd_state_t;
-
 /* The actual tsd. */
 struct tsd_s {
 	/*
@@ -117,13 +114,25 @@ struct tsd_s {
 	 * module.  Access any thread-local state through the getters and
 	 * setters below.
 	 */
-	tsd_state_t	state;
+
+	/* We manually limit the state to just a single byte. */
+	uint8_t state;
 #define O(n, t, nt)							\
 	t use_a_getter_or_setter_instead_##n;
 MALLOC_TSD
 #undef O
 };
 
+JEMALLOC_ALWAYS_INLINE uint8_t
+tsd_state_get(tsd_t *tsd) {
+	return tsd->state;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tsd_state_set(tsd_t *tsd, uint8_t state) {
+	tsd->state = state;
+}
+
 /*
  * Wrapper around tsd_t that makes it possible to avoid implicit conversion
  * between tsd_t and tsdn_t, where tsdn_t is "nullable" and has to be
@@ -191,10 +200,10 @@ MALLOC_TSD
 #define O(n, t, nt)							\
 JEMALLOC_ALWAYS_INLINE t *						\
 tsd_##n##p_get(tsd_t *tsd) {						\
-	assert(tsd->state == tsd_state_nominal ||			\
-	    tsd->state == tsd_state_nominal_slow ||			\
-	    tsd->state == tsd_state_reincarnated ||			\
-	    tsd->state == tsd_state_minimal_initialized);		\
+	assert(tsd_state_get(tsd) == tsd_state_nominal ||		\
+	    tsd_state_get(tsd) == tsd_state_nominal_slow ||		\
+	    tsd_state_get(tsd) == tsd_state_reincarnated ||		\
+	    tsd_state_get(tsd) == tsd_state_minimal_initialized);	\
 	return tsd_##n##p_get_unsafe(tsd);				\
 }
 MALLOC_TSD
@@ -229,8 +238,8 @@ MALLOC_TSD
 #define O(n, t, nt)							\
 JEMALLOC_ALWAYS_INLINE void						\
 tsd_##n##_set(tsd_t *tsd, t val) {					\
-	assert(tsd->state != tsd_state_reincarnated &&			\
-	    tsd->state != tsd_state_minimal_initialized);		\
+	assert(tsd_state_get(tsd) != tsd_state_reincarnated &&		\
+	    tsd_state_get(tsd) != tsd_state_minimal_initialized);	\
 	*tsd_##n##p_get(tsd) = val;					\
 }
 MALLOC_TSD
@@ -244,7 +253,7 @@ tsd_assert_fast(tsd_t *tsd) {
 
 JEMALLOC_ALWAYS_INLINE bool
 tsd_fast(tsd_t *tsd) {
-	bool fast = (tsd->state == tsd_state_nominal);
+	bool fast = (tsd_state_get(tsd) == tsd_state_nominal);
 	if (fast) {
 		tsd_assert_fast(tsd);
 	}
@@ -261,7 +270,7 @@ tsd_fetch_impl(bool init, bool minimal) {
 	}
 	assert(tsd != NULL);
 
-	if (unlikely(tsd->state != tsd_state_nominal)) {
+	if (unlikely(tsd_state_get(tsd) != tsd_state_nominal)) {
 		return tsd_fetch_slow(tsd, minimal);
 	}
 	assert(tsd_fast(tsd));
@@ -281,7 +290,7 @@ JEMALLOC_ALWAYS_INLINE tsd_t *
 tsd_internal_fetch(void) {
 	tsd_t *tsd = tsd_fetch_min();
 	/* Use reincarnated state to prevent full initialization. */
-	tsd->state = tsd_state_reincarnated;
+	tsd_state_set(tsd, tsd_state_reincarnated);
 
 	return tsd;
 }
@@ -293,7 +302,7 @@ tsd_fetch(void) {
 
 static inline bool
 tsd_nominal(tsd_t *tsd) {
-	return (tsd->state <= tsd_state_nominal_max);
+	return (tsd_state_get(tsd) <= tsd_state_nominal_max);
 }
 
 JEMALLOC_ALWAYS_INLINE tsdn_t *
diff --git a/src/tsd.c b/src/tsd.c
index c1430682..f3320ebb 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -56,9 +56,9 @@ tsd_slow_update(tsd_t *tsd) {
 	if (tsd_nominal(tsd)) {
 		if (malloc_slow || !tsd_tcache_enabled_get(tsd) ||
 		    tsd_reentrancy_level_get(tsd) > 0) {
-			tsd->state = tsd_state_nominal_slow;
+			tsd_state_set(tsd, tsd_state_nominal_slow);
 		} else {
-			tsd->state = tsd_state_nominal;
+			tsd_state_set(tsd, tsd_state_nominal);
 		}
 	}
 }
@@ -97,8 +97,8 @@ assert_tsd_data_cleanup_done(tsd_t *tsd) {
 
 static bool
 tsd_data_init_nocleanup(tsd_t *tsd) {
-	assert(tsd->state == tsd_state_reincarnated ||
-	    tsd->state == tsd_state_minimal_initialized);
+	assert(tsd_state_get(tsd) == tsd_state_reincarnated ||
+	    tsd_state_get(tsd) == tsd_state_minimal_initialized);
 	/*
 	 * During reincarnation, there is no guarantee that the cleanup function
 	 * will be called (deallocation may happen after all tsd destructors).
@@ -117,27 +117,27 @@ tsd_t *
 tsd_fetch_slow(tsd_t *tsd, bool minimal) {
 	assert(!tsd_fast(tsd));
 
-	if (tsd->state == tsd_state_nominal_slow) {
+	if (tsd_state_get(tsd) == tsd_state_nominal_slow) {
 		/* On slow path but no work needed. */
 		assert(malloc_slow || !tsd_tcache_enabled_get(tsd) ||
 		    tsd_reentrancy_level_get(tsd) > 0 ||
 		    *tsd_arenas_tdata_bypassp_get(tsd));
-	} else if (tsd->state == tsd_state_uninitialized) {
+	} else if (tsd_state_get(tsd) == tsd_state_uninitialized) {
 		if (!minimal) {
-			tsd->state = tsd_state_nominal;
+			tsd_state_set(tsd, tsd_state_nominal);
 			tsd_slow_update(tsd);
 			/* Trigger cleanup handler registration. */
 			tsd_set(tsd);
 			tsd_data_init(tsd);
 		} else {
-			tsd->state = tsd_state_minimal_initialized;
+			tsd_state_set(tsd, tsd_state_minimal_initialized);
 			tsd_set(tsd);
 			tsd_data_init_nocleanup(tsd);
 		}
-	} else if (tsd->state == tsd_state_minimal_initialized) {
+	} else if (tsd_state_get(tsd) == tsd_state_minimal_initialized) {
 		if (!minimal) {
 			/* Switch to fully initialized. */
-			tsd->state = tsd_state_nominal;
+			tsd_state_set(tsd, tsd_state_nominal);
 			assert(*tsd_reentrancy_levelp_get(tsd) >= 1);
 			(*tsd_reentrancy_levelp_get(tsd))--;
 			tsd_slow_update(tsd);
@@ -145,12 +145,12 @@ tsd_fetch_slow(tsd_t *tsd, bool minimal) {
 		} else {
 			assert_tsd_data_cleanup_done(tsd);
 		}
-	} else if (tsd->state == tsd_state_purgatory) {
-		tsd->state = tsd_state_reincarnated;
+	} else if (tsd_state_get(tsd) == tsd_state_purgatory) {
+		tsd_state_set(tsd, tsd_state_reincarnated);
 		tsd_set(tsd);
 		tsd_data_init_nocleanup(tsd);
 	} else {
-		assert(tsd->state == tsd_state_reincarnated);
+		assert(tsd_state_get(tsd) == tsd_state_reincarnated);
 	}
 
 	return tsd;
@@ -214,7 +214,7 @@ void
 tsd_cleanup(void *arg) {
 	tsd_t *tsd = (tsd_t *)arg;
 
-	switch (tsd->state) {
+	switch (tsd_state_get(tsd)) {
 	case tsd_state_uninitialized:
 		/* Do nothing. */
 		break;
@@ -232,7 +232,7 @@ tsd_cleanup(void *arg) {
 	case tsd_state_nominal:
 	case tsd_state_nominal_slow:
 		tsd_do_data_cleanup(tsd);
-		tsd->state = tsd_state_purgatory;
+		tsd_state_set(tsd, tsd_state_purgatory);
 		tsd_set(tsd);
 		break;
 	case tsd_state_purgatory:
diff --git a/test/unit/tsd.c b/test/unit/tsd.c
index 6c479139..3379891b 100644
--- a/test/unit/tsd.c
+++ b/test/unit/tsd.c
@@ -98,11 +98,11 @@ thd_start_reincarnated(void *arg) {
 	tsd_cleanup((void *)tsd);
 	assert_ptr_null(*tsd_arenap_get_unsafe(tsd),
 	    "TSD arena should have been cleared.");
-	assert_u_eq(tsd->state, tsd_state_purgatory,
+	assert_u_eq(tsd_state_get(tsd), tsd_state_purgatory,
 	    "TSD state should be purgatory\n");
 
 	free(p);
-	assert_u_eq(tsd->state, tsd_state_reincarnated,
+	assert_u_eq(tsd_state_get(tsd), tsd_state_reincarnated,
 	    "TSD state should be reincarnated\n");
 	p = mallocx(1, MALLOCX_TCACHE_NONE);
 	assert_ptr_not_null(p, "Unexpected malloc() failure");

From 39d6420c0c39619176af3477b827e8a92442b768 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 8 Mar 2018 16:51:07 -0800
Subject: [PATCH 1122/2608] TSD: Make state atomic.

This will let us change the state of another thread remotely, eventually.
---
 include/jemalloc/internal/tsd.h | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index aa64d937..53ac7415 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -78,7 +78,7 @@ typedef void (*test_callback_t)(int *);
     MALLOC_TEST_TSD
 
 #define TSD_INITIALIZER {						\
-    tsd_state_uninitialized,						\
+    ATOMIC_INIT(tsd_state_uninitialized),				\
     TCACHE_ENABLED_ZERO_INITIALIZER,					\
     false,								\
     0,									\
@@ -116,7 +116,7 @@ struct tsd_s {
 	 */
 
 	/* We manually limit the state to just a single byte. */
-	uint8_t state;
+	atomic_u8_t state;
 #define O(n, t, nt)							\
 	t use_a_getter_or_setter_instead_##n;
 MALLOC_TSD
@@ -125,12 +125,18 @@ MALLOC_TSD
 
 JEMALLOC_ALWAYS_INLINE uint8_t
 tsd_state_get(tsd_t *tsd) {
-	return tsd->state;
+	/*
+	 * This should be atomic.  Unfortunately, compilers right now can't tell
+	 * that this can be done as a memory comparison, and forces a load into
+	 * a register that hurts fast-path performance.
+	 */
+	/* return atomic_load_u8(&tsd->state, ATOMIC_RELAXED); */
+	return *(uint8_t *)&tsd->state;
 }
 
 JEMALLOC_ALWAYS_INLINE void
 tsd_state_set(tsd_t *tsd, uint8_t state) {
-	tsd->state = state;
+	atomic_store_u8(&tsd->state, state, ATOMIC_RELAXED);
 }
 
 /*

From feff510b9f938ae1b4e2f43815bc7b10f70fac12 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 11 May 2018 15:18:52 -0700
Subject: [PATCH 1123/2608] TSD: Pull name mangling into a macro.

---
 include/jemalloc/internal/tsd.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 53ac7415..c4faba5f 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -107,6 +107,13 @@ enum {
 	tsd_state_uninitialized = 5
 };
 
+/*
+ * Some TSD accesses can only be done in a nominal state.  To enforce this, we
+ * wrap TSD member access in a function that asserts on TSD state, and mangle
+ * field names to prevent touching them accidentally.
+ */
+#define TSD_MANGLE(n) cant_access_tsd_items_directly_use_a_getter_or_setter_##n
+
 /* The actual tsd. */
 struct tsd_s {
 	/*
@@ -118,7 +125,7 @@ struct tsd_s {
 	/* We manually limit the state to just a single byte. */
 	atomic_u8_t state;
 #define O(n, t, nt)							\
-	t use_a_getter_or_setter_instead_##n;
+	t TSD_MANGLE(n);
 MALLOC_TSD
 #undef O
 };
@@ -197,7 +204,7 @@ void tsd_slow_update(tsd_t *tsd);
 #define O(n, t, nt)							\
 JEMALLOC_ALWAYS_INLINE t *						\
 tsd_##n##p_get_unsafe(tsd_t *tsd) {					\
-	return &tsd->use_a_getter_or_setter_instead_##n;		\
+	return &tsd->TSD_MANGLE(n);					\
 }
 MALLOC_TSD
 #undef O

From e870829e645bfd6d54e4a2d4cacce39478216a1e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 2 Apr 2018 19:16:41 -0700
Subject: [PATCH 1124/2608] TSD: Add the ability to enter a global slow path.

This gives any thread the ability to send other threads down slow paths the next
time they fetch tsd.
---
 include/jemalloc/internal/tsd.h | 100 ++++++++++++++-----
 src/tsd.c                       | 171 ++++++++++++++++++++++++++++++--
 test/unit/tsd.c                 | 130 +++++++++++++++++++++++-
 3 files changed, 365 insertions(+), 36 deletions(-)

diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index c4faba5f..251f5659 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -59,6 +59,9 @@ typedef void (*test_callback_t)(int *);
 #  define MALLOC_TEST_TSD_INITIALIZER
 #endif
 
+/* Various uses of this struct need it to be a named type. */
+typedef ql_elm(tsd_t) tsd_link_t;
+
 /*  O(name,			type,			nullable type */
 #define MALLOC_TSD							\
     O(tcache_enabled,		bool,			bool)		\
@@ -73,6 +76,7 @@ typedef void (*test_callback_t)(int *);
     O(iarena,			arena_t *,		arena_t *)	\
     O(arena,			arena_t *,		arena_t *)	\
     O(arenas_tdata,		arena_tdata_t *,	arena_tdata_t *)\
+    O(link,			tsd_link_t,		tsd_link_t)	\
     O(tcache,			tcache_t,		tcache_t)	\
     O(witness_tsd,              witness_tsd_t,		witness_tsdn_t)	\
     MALLOC_TEST_TSD
@@ -91,20 +95,67 @@ typedef void (*test_callback_t)(int *);
     NULL,								\
     NULL,								\
     NULL,								\
+    {NULL},								\
     TCACHE_ZERO_INITIALIZER,						\
     WITNESS_TSD_INITIALIZER						\
     MALLOC_TEST_TSD_INITIALIZER						\
 }
 
+void *malloc_tsd_malloc(size_t size);
+void malloc_tsd_dalloc(void *wrapper);
+void malloc_tsd_cleanup_register(bool (*f)(void));
+tsd_t *malloc_tsd_boot0(void);
+void malloc_tsd_boot1(void);
+void tsd_cleanup(void *arg);
+tsd_t *tsd_fetch_slow(tsd_t *tsd, bool internal);
+void tsd_state_set(tsd_t *tsd, uint8_t new_state);
+void tsd_slow_update(tsd_t *tsd);
+
+/*
+ * Call ..._inc when your module wants to take all threads down the slow paths,
+ * and ..._dec when it no longer needs to.
+ */
+void tsd_global_slow_inc(tsdn_t *tsdn);
+void tsd_global_slow_dec(tsdn_t *tsdn);
+
 enum {
-	tsd_state_nominal = 0, /* Common case --> jnz. */
-	tsd_state_nominal_slow = 1, /* Initialized but on slow path. */
-	/* the above 2 nominal states should be lower values. */
-	tsd_state_nominal_max = 1, /* used for comparison only. */
-	tsd_state_minimal_initialized = 2,
-	tsd_state_purgatory = 3,
-	tsd_state_reincarnated = 4,
-	tsd_state_uninitialized = 5
+	/* Common case --> jnz. */
+	tsd_state_nominal = 0,
+	/* Initialized but on slow path. */
+	tsd_state_nominal_slow = 1,
+	/*
+	 * Some thread has changed global state in such a way that all nominal
+	 * threads need to recompute their fast / slow status the next time they
+	 * get a chance.
+	 *
+	 * Any thread can change another thread's status *to* recompute, but
+	 * threads are the only ones who can change their status *from*
+	 * recompute.
+	 */
+	tsd_state_nominal_recompute = 2,
+	/*
+	 * The above nominal states should be lower values.  We use
+	 * tsd_nominal_max to separate nominal states from threads in the
+	 * process of being born / dying.
+	 */
+	tsd_state_nominal_max = 2,
+
+	/*
+	 * A thread might free() during its death as its only allocator action;
+	 * in such scenarios, we need tsd, but set up in such a way that no
+	 * cleanup is necessary.
+	 */
+	tsd_state_minimal_initialized = 3,
+	/* States during which we know we're in thread death. */
+	tsd_state_purgatory = 4,
+	tsd_state_reincarnated = 5,
+	/*
+	 * What it says on the tin; tsd that hasn't been initialized.  Note
+	 * that even when the tsd struct lives in TLS, when need to keep track
+	 * of stuff like whether or not our pthread destructors have been
+	 * scheduled, so this really truly is different than the nominal state.
+	 */
+	tsd_state_uninitialized = 6
 };
 
 /*
@@ -141,11 +192,6 @@ tsd_state_get(tsd_t *tsd) {
 	return *(uint8_t *)&tsd->state;
 }
 
-JEMALLOC_ALWAYS_INLINE void
-tsd_state_set(tsd_t *tsd, uint8_t state) {
-	atomic_store_u8(&tsd->state, state, ATOMIC_RELAXED);
-}
-
 /*
  * Wrapper around tsd_t that makes it possible to avoid implicit conversion
  * between tsd_t and tsdn_t, where tsdn_t is "nullable" and has to be
@@ -172,15 +218,6 @@ tsdn_tsd(tsdn_t *tsdn) {
 	return &tsdn->tsd;
 }
 
-void *malloc_tsd_malloc(size_t size);
-void malloc_tsd_dalloc(void *wrapper);
-void malloc_tsd_cleanup_register(bool (*f)(void));
-tsd_t *malloc_tsd_boot0(void);
-void malloc_tsd_boot1(void);
-void tsd_cleanup(void *arg);
-tsd_t *tsd_fetch_slow(tsd_t *tsd, bool internal);
-void tsd_slow_update(tsd_t *tsd);
-
 /*
  * We put the platform-specific data declarations and inlines into their own
  * header files to avoid cluttering this file.  They define tsd_boot0,
@@ -213,10 +250,16 @@ MALLOC_TSD
 #define O(n, t, nt)							\
 JEMALLOC_ALWAYS_INLINE t *						\
 tsd_##n##p_get(tsd_t *tsd) {						\
-	assert(tsd_state_get(tsd) == tsd_state_nominal ||		\
-	    tsd_state_get(tsd) == tsd_state_nominal_slow ||		\
-	    tsd_state_get(tsd) == tsd_state_reincarnated ||		\
-	    tsd_state_get(tsd) == tsd_state_minimal_initialized);	\
+	/*								\
+	 * Because the state might change asynchronously if it's	\
+	 * nominal, we need to make sure that we only read it once.	\
+	 */								\
+	uint8_t state = tsd_state_get(tsd);				\
+	assert(state == tsd_state_nominal ||				\
+	    state == tsd_state_nominal_slow ||				\
+	    state == tsd_state_nominal_recompute ||			\
+	    state == tsd_state_reincarnated ||				\
+	    state == tsd_state_minimal_initialized);			\
 	return tsd_##n##p_get_unsafe(tsd);				\
 }
 MALLOC_TSD
@@ -260,6 +303,11 @@ MALLOC_TSD
 
 JEMALLOC_ALWAYS_INLINE void
 tsd_assert_fast(tsd_t *tsd) {
+	/*
+	 * Note that our fastness assertion does *not* include global slowness
+	 * counters; it's not in general possible to ensure that they won't
+	 * change asynchronously from underneath us.
+	 */
 	assert(!malloc_slow && tsd_tcache_enabled_get(tsd) &&
 	    tsd_reentrancy_level_get(tsd) == 0);
 }
diff --git a/src/tsd.c b/src/tsd.c
index f3320ebb..c92cd228 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -51,14 +51,159 @@ bool tsd_booted = false;
 
 /******************************************************************************/
 
+/* A list of all the tsds in the nominal state. */
+typedef ql_head(tsd_t) tsd_list_t;
+static tsd_list_t tsd_nominal_tsds = ql_head_initializer(tsd_nominal_tsds);
+static malloc_mutex_t tsd_nominal_tsds_lock;
+
+/* How many slow-path-enabling features are turned on. */
+static atomic_u32_t tsd_global_slow_count = ATOMIC_INIT(0);
+
+static bool
+tsd_in_nominal_list(tsd_t *tsd) {
+	tsd_t *tsd_list;
+	bool found = false;
+	/*
+	 * We don't know that tsd is nominal; it might not be safe to get data
+	 * out of it here.
+	 */
+	malloc_mutex_lock(TSDN_NULL, &tsd_nominal_tsds_lock);
+	ql_foreach(tsd_list, &tsd_nominal_tsds, TSD_MANGLE(link)) {
+		if (tsd == tsd_list) {
+			found = true;
+			break;
+		}
+	}
+	malloc_mutex_unlock(TSDN_NULL, &tsd_nominal_tsds_lock);
+	return found;
+}
+
+static void
+tsd_add_nominal(tsd_t *tsd) {
+	assert(!tsd_in_nominal_list(tsd));
+	assert(tsd_state_get(tsd) <= tsd_state_nominal_max);
+	ql_elm_new(tsd, TSD_MANGLE(link));
+	malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
+	ql_tail_insert(&tsd_nominal_tsds, tsd, TSD_MANGLE(link));
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
+}
+
+static void
+tsd_remove_nominal(tsd_t *tsd) {
+	assert(tsd_in_nominal_list(tsd));
+	assert(tsd_state_get(tsd) <= tsd_state_nominal_max);
+	malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
+	ql_remove(&tsd_nominal_tsds, tsd, TSD_MANGLE(link));
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
+}
+
+static void
+tsd_force_recompute(tsdn_t *tsdn) {
+	/*
+	 * The stores to tsd->state here need to synchronize with the exchange
+	 * in tsd_slow_update.
+	 */
+	atomic_fence(ATOMIC_RELEASE);
+	malloc_mutex_lock(tsdn, &tsd_nominal_tsds_lock);
+	tsd_t *remote_tsd;
+	ql_foreach(remote_tsd, &tsd_nominal_tsds, TSD_MANGLE(link)) {
+		assert(atomic_load_u8(&remote_tsd->state, ATOMIC_RELAXED)
+		    <= tsd_state_nominal_max);
+		atomic_store_u8(&remote_tsd->state, tsd_state_nominal_recompute,
+		    ATOMIC_RELAXED);
+	}
+	malloc_mutex_unlock(tsdn, &tsd_nominal_tsds_lock);
+}
+
+void
+tsd_global_slow_inc(tsdn_t *tsdn) {
+	atomic_fetch_add_u32(&tsd_global_slow_count, 1, ATOMIC_RELAXED);
+	/*
+	 * We unconditionally force a recompute, even if the global slow count
+	 * was already positive.  If we didn't, then it would be possible for us
+	 * to return to the user, have the user synchronize externally with some
+	 * other thread, and then have that other thread not have picked up the
+	 * update yet (since the original incrementing thread might still be
+	 * making its way through the tsd list).
+	 */
+	tsd_force_recompute(tsdn);
+}
+
+void tsd_global_slow_dec(tsdn_t *tsdn) {
+	atomic_fetch_sub_u32(&tsd_global_slow_count, 1, ATOMIC_RELAXED);
+	/* See the note in ..._inc(). */
+	tsd_force_recompute(tsdn);
+}
+
+static bool
+tsd_local_slow(tsd_t *tsd) {
+	return !tsd_tcache_enabled_get(tsd)
+	    || tsd_reentrancy_level_get(tsd) > 0;
+}
+
+static bool
+tsd_global_slow() {
+	return atomic_load_u32(&tsd_global_slow_count, ATOMIC_RELAXED) > 0;
+}
+
+/******************************************************************************/
+
+static uint8_t
+tsd_state_compute(tsd_t *tsd) {
+	if (!tsd_nominal(tsd)) {
+		return tsd_state_get(tsd);
+	}
+	/* We're in *a* nominal state; but which one? */
+	if (malloc_slow || tsd_local_slow(tsd) || tsd_global_slow()) {
+		return tsd_state_nominal_slow;
+	} else {
+		return tsd_state_nominal;
+	}
+}
+
 void
 tsd_slow_update(tsd_t *tsd) {
-	if (tsd_nominal(tsd)) {
-		if (malloc_slow || !tsd_tcache_enabled_get(tsd) ||
-		    tsd_reentrancy_level_get(tsd) > 0) {
-			tsd_state_set(tsd, tsd_state_nominal_slow);
+	uint8_t old_state;
+	do {
+		uint8_t new_state = tsd_state_compute(tsd);
+		old_state = atomic_exchange_u8(&tsd->state, new_state,
+		    ATOMIC_ACQUIRE);
+	} while (old_state == tsd_state_nominal_recompute);
+}
+
+void
+tsd_state_set(tsd_t *tsd, uint8_t new_state) {
+	/* Only the tsd module can change the state *to* recompute. */
+	assert(new_state != tsd_state_nominal_recompute);
+	uint8_t old_state = atomic_load_u8(&tsd->state, ATOMIC_RELAXED);
+	if (old_state > tsd_state_nominal_max) {
+		/*
+		 * Not currently in the nominal list, but it might need to be
+		 * inserted there.
+		 */
+		assert(!tsd_in_nominal_list(tsd));
+		atomic_store_u8(&tsd->state, new_state, ATOMIC_RELAXED);
+		if (new_state <= tsd_state_nominal_max) {
+			tsd_add_nominal(tsd);
+		}
+	} else {
+		/*
+		 * We're currently nominal.  If the new state is non-nominal,
+		 * great; we take ourselves off the list and just enter the new
+		 * state.
+		 */
+		assert(tsd_in_nominal_list(tsd));
+		if (new_state > tsd_state_nominal_max) {
+			tsd_remove_nominal(tsd);
+			atomic_store_u8(&tsd->state, new_state, ATOMIC_RELAXED);
 		} else {
-			tsd_state_set(tsd, tsd_state_nominal);
+			/*
+			 * This is the tricky case.  We're transitioning from
+			 * one nominal state to another.  The caller can't know
+			 * about any races that are occuring at the same time,
+			 * so we always have to recompute no matter what.
+			 */
+			tsd_slow_update(tsd);
 		}
 	}
 }
@@ -118,10 +263,14 @@ tsd_fetch_slow(tsd_t *tsd, bool minimal) {
 	assert(!tsd_fast(tsd));
 
 	if (tsd_state_get(tsd) == tsd_state_nominal_slow) {
-		/* On slow path but no work needed. */
-		assert(malloc_slow || !tsd_tcache_enabled_get(tsd) ||
-		    tsd_reentrancy_level_get(tsd) > 0 ||
-		    *tsd_arenas_tdata_bypassp_get(tsd));
+		/*
+		 * On slow path but no work needed.  Note that we can't
+		 * necessarily *assert* that we're slow, because we might be
+		 * slow because of an asynchronous modification to global state,
+		 * which might be asynchronously modified *back*.
+		 */
+	} else if (tsd_state_get(tsd) == tsd_state_nominal_recompute) {
+		tsd_slow_update(tsd);
 	} else if (tsd_state_get(tsd) == tsd_state_uninitialized) {
 		if (!minimal) {
 			tsd_state_set(tsd, tsd_state_nominal);
@@ -260,6 +409,10 @@ malloc_tsd_boot0(void) {
 	tsd_t *tsd;
 
 	ncleanups = 0;
+	if (malloc_mutex_init(&tsd_nominal_tsds_lock, "tsd_nominal_tsds_lock",
+	    WITNESS_RANK_OMIT, malloc_mutex_rank_exclusive)) {
+		return NULL;
+	}
 	if (tsd_boot0()) {
 		return NULL;
 	}
diff --git a/test/unit/tsd.c b/test/unit/tsd.c
index 3379891b..917884dc 100644
--- a/test/unit/tsd.c
+++ b/test/unit/tsd.c
@@ -1,5 +1,10 @@
 #include "test/jemalloc_test.h"
 
+/*
+ * If we're e.g. in debug mode, we *never* enter the fast path, and so shouldn't
+ * be asserting that we're on one.
+ */
+static bool originally_fast;
 static int data_cleanup_count;
 
 void
@@ -124,6 +129,128 @@ TEST_BEGIN(test_tsd_reincarnation) {
 }
 TEST_END
 
+typedef struct {
+	atomic_u32_t phase;
+	atomic_b_t error;
+} global_slow_data_t;
+
+static void *
+thd_start_global_slow(void *arg) {
+	/* PHASE 0 */
+	global_slow_data_t *data = (global_slow_data_t *)arg;
+	free(mallocx(1, 0));
+
+	tsd_t *tsd = tsd_fetch();
+	/*
+	 * No global slowness has happened yet; there was an error if we were
+	 * originally fast but aren't now.
+	 */
+	atomic_store_b(&data->error, originally_fast && !tsd_fast(tsd),
+	    ATOMIC_SEQ_CST);
+	atomic_store_u32(&data->phase, 1, ATOMIC_SEQ_CST);
+
+	/* PHASE 2 */
+	while (atomic_load_u32(&data->phase, ATOMIC_SEQ_CST) != 2) {
+	}
+	free(mallocx(1, 0));
+	atomic_store_b(&data->error, tsd_fast(tsd), ATOMIC_SEQ_CST);
+	atomic_store_u32(&data->phase, 3, ATOMIC_SEQ_CST);
+
+	/* PHASE 4 */
+	while (atomic_load_u32(&data->phase, ATOMIC_SEQ_CST) != 4) {
+	}
+	free(mallocx(1, 0));
+	atomic_store_b(&data->error, tsd_fast(tsd), ATOMIC_SEQ_CST);
+	atomic_store_u32(&data->phase, 5, ATOMIC_SEQ_CST);
+
+	/* PHASE 6 */
+	while (atomic_load_u32(&data->phase, ATOMIC_SEQ_CST) != 6) {
+	}
+	free(mallocx(1, 0));
+	/* Only one decrement so far. */
+	atomic_store_b(&data->error, tsd_fast(tsd), ATOMIC_SEQ_CST);
+	atomic_store_u32(&data->phase, 7, ATOMIC_SEQ_CST);
+
+	/* PHASE 8 */
+	while (atomic_load_u32(&data->phase, ATOMIC_SEQ_CST) != 8) {
+	}
+	free(mallocx(1, 0));
+	/*
+	 * Both decrements happened; we should be fast again (if we ever
+	 * were)
+	 */
+	atomic_store_b(&data->error, originally_fast && !tsd_fast(tsd),
+	    ATOMIC_SEQ_CST);
+	atomic_store_u32(&data->phase, 9, ATOMIC_SEQ_CST);
+
+	return NULL;
+}
+
+TEST_BEGIN(test_tsd_global_slow) {
+	global_slow_data_t data = {ATOMIC_INIT(0), ATOMIC_INIT(false)};
+	/*
+	 * Note that the "mallocx" here (vs. malloc) is important, since the
+	 * compiler is allowed to optimize away free(malloc(1)) but not
+	 * free(mallocx(1)).
+	 */
+	free(mallocx(1, 0));
+	tsd_t *tsd = tsd_fetch();
+	originally_fast = tsd_fast(tsd);
+
+	thd_t thd;
+	thd_create(&thd, thd_start_global_slow, (void *)&data.phase);
+	/* PHASE 1 */
+	while (atomic_load_u32(&data.phase, ATOMIC_SEQ_CST) != 1) {
+		/*
+		 * We don't have a portable condvar/semaphore mechanism.
+		 * Spin-wait.
+		 */
+	}
+	assert_false(atomic_load_b(&data.error, ATOMIC_SEQ_CST), "");
+	tsd_global_slow_inc(tsd_tsdn(tsd));
+	free(mallocx(1, 0));
+	assert_false(tsd_fast(tsd), "");
+	atomic_store_u32(&data.phase, 2, ATOMIC_SEQ_CST);
+
+	/* PHASE 3 */
+	while (atomic_load_u32(&data.phase, ATOMIC_SEQ_CST) != 3) {
+	}
+	assert_false(atomic_load_b(&data.error, ATOMIC_SEQ_CST), "");
+	/* Increase again, so that we can test multiple fast/slow changes. */
+	tsd_global_slow_inc(tsd_tsdn(tsd));
+	atomic_store_u32(&data.phase, 4, ATOMIC_SEQ_CST);
+	free(mallocx(1, 0));
+	assert_false(tsd_fast(tsd), "");
+
+	/* PHASE 5 */
+	while (atomic_load_u32(&data.phase, ATOMIC_SEQ_CST) != 5) {
+	}
+	assert_false(atomic_load_b(&data.error, ATOMIC_SEQ_CST), "");
+	tsd_global_slow_dec(tsd_tsdn(tsd));
+	atomic_store_u32(&data.phase, 6, ATOMIC_SEQ_CST);
+	/* We only decreased once; things should still be slow. */
+	free(mallocx(1, 0));
+	assert_false(tsd_fast(tsd), "");
+
+	/* PHASE 7 */
+	while (atomic_load_u32(&data.phase, ATOMIC_SEQ_CST) != 7) {
+	}
+	assert_false(atomic_load_b(&data.error, ATOMIC_SEQ_CST), "");
+	tsd_global_slow_dec(tsd_tsdn(tsd));
+	atomic_store_u32(&data.phase, 8, ATOMIC_SEQ_CST);
+	/* We incremented and then decremented twice; we should be fast now. */
+	free(mallocx(1, 0));
+	assert_true(!originally_fast || tsd_fast(tsd), "");
+
+	/* PHASE 9 */
+	while (atomic_load_u32(&data.phase, ATOMIC_SEQ_CST) != 9) {
+	}
+	assert_false(atomic_load_b(&data.error, ATOMIC_SEQ_CST), "");
+
+	thd_join(thd, NULL);
+}
+TEST_END
+
 int
 main(void) {
 	/* Ensure tsd bootstrapped. */
@@ -135,5 +262,6 @@ main(void) {
 	return test_no_reentrancy(
 	    test_tsd_main_thread,
 	    test_tsd_sub_thread,
-	    test_tsd_reincarnation);
+	    test_tsd_reincarnation,
+	    test_tsd_global_slow);
 }

From c7a87e0e0bd02cf278760f3c22615d3129dc1ae2 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 9 Apr 2018 18:09:34 -0700
Subject: [PATCH 1125/2608] Rename hooks module to test_hooks.

"Hooks" is really the best name for the module that will contain the publicly
exposed hooks.  So lets rename the current "hooks" module (that hook external
dependencies, for reentrancy testing) to "test_hooks".
---
 Makefile.in                                   |  4 ++--
 include/jemalloc/internal/hooks.h             | 19 -------------------
 .../jemalloc/internal/jemalloc_preamble.h.in  |  2 +-
 include/jemalloc/internal/test_hooks.h        | 19 +++++++++++++++++++
 src/arena.c                                   |  4 ++--
 src/prof.c                                    |  2 +-
 src/{hooks.c => test_hooks.c}                 |  4 ++--
 test/include/test/jemalloc_test.h.in          |  2 +-
 test/src/test.c                               | 14 +++++++-------
 test/unit/{hooks.c => test_hooks.c}           |  6 +++---
 10 files changed, 38 insertions(+), 38 deletions(-)
 delete mode 100644 include/jemalloc/internal/hooks.h
 create mode 100644 include/jemalloc/internal/test_hooks.h
 rename src/{hooks.c => test_hooks.c} (79%)
 rename test/unit/{hooks.c => test_hooks.c} (82%)

diff --git a/Makefile.in b/Makefile.in
index 9b9347ff..c4f006b5 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -102,7 +102,6 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/extent_dss.c \
 	$(srcroot)src/extent_mmap.c \
 	$(srcroot)src/hash.c \
-	$(srcroot)src/hooks.c \
 	$(srcroot)src/large.c \
 	$(srcroot)src/log.c \
 	$(srcroot)src/malloc_io.c \
@@ -116,6 +115,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/stats.c \
 	$(srcroot)src/sz.c \
 	$(srcroot)src/tcache.c \
+	$(srcroot)src/test_hooks.c \
 	$(srcroot)src/ticker.c \
 	$(srcroot)src/tsd.c \
 	$(srcroot)src/witness.c
@@ -172,7 +172,6 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/extent_quantize.c \
 	$(srcroot)test/unit/fork.c \
 	$(srcroot)test/unit/hash.c \
-	$(srcroot)test/unit/hooks.c \
 	$(srcroot)test/unit/junk.c \
 	$(srcroot)test/unit/junk_alloc.c \
 	$(srcroot)test/unit/junk_free.c \
@@ -205,6 +204,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/spin.c \
 	$(srcroot)test/unit/stats.c \
 	$(srcroot)test/unit/stats_print.c \
+	$(srcroot)test/unit/test_hooks.c \
 	$(srcroot)test/unit/ticker.c \
 	$(srcroot)test/unit/nstime.c \
 	$(srcroot)test/unit/tsd.c \
diff --git a/include/jemalloc/internal/hooks.h b/include/jemalloc/internal/hooks.h
deleted file mode 100644
index cd49afcb..00000000
--- a/include/jemalloc/internal/hooks.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_HOOKS_H
-#define JEMALLOC_INTERNAL_HOOKS_H
-
-extern JEMALLOC_EXPORT void (*hooks_arena_new_hook)();
-extern JEMALLOC_EXPORT void (*hooks_libc_hook)();
-
-#define JEMALLOC_HOOK(fn, hook) ((void)(hook != NULL && (hook(), 0)), fn)
-
-#define open JEMALLOC_HOOK(open, hooks_libc_hook)
-#define read JEMALLOC_HOOK(read, hooks_libc_hook)
-#define write JEMALLOC_HOOK(write, hooks_libc_hook)
-#define readlink JEMALLOC_HOOK(readlink, hooks_libc_hook)
-#define close JEMALLOC_HOOK(close, hooks_libc_hook)
-#define creat JEMALLOC_HOOK(creat, hooks_libc_hook)
-#define secure_getenv JEMALLOC_HOOK(secure_getenv, hooks_libc_hook)
-/* Note that this is undef'd and re-define'd in src/prof.c. */
-#define _Unwind_Backtrace JEMALLOC_HOOK(_Unwind_Backtrace, hooks_libc_hook)
-
-#endif /* JEMALLOC_INTERNAL_HOOKS_H */
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index e621fbc8..1b12aeec 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -45,7 +45,7 @@
 #    include "jemalloc/internal/private_namespace_jet.h"
 #  endif
 #endif
-#include "jemalloc/internal/hooks.h"
+#include "jemalloc/internal/test_hooks.h"
 
 #ifdef JEMALLOC_DEFINE_MADVISE_FREE
 #  define JEMALLOC_MADV_FREE 8
diff --git a/include/jemalloc/internal/test_hooks.h b/include/jemalloc/internal/test_hooks.h
new file mode 100644
index 00000000..a6351e59
--- /dev/null
+++ b/include/jemalloc/internal/test_hooks.h
@@ -0,0 +1,19 @@
+#ifndef JEMALLOC_INTERNAL_TEST_HOOKS_H
+#define JEMALLOC_INTERNAL_TEST_HOOKS_H
+
+extern JEMALLOC_EXPORT void (*test_hooks_arena_new_hook)();
+extern JEMALLOC_EXPORT void (*test_hooks_libc_hook)();
+
+#define JEMALLOC_HOOK(fn, hook) ((void)(hook != NULL && (hook(), 0)), fn)
+
+#define open JEMALLOC_HOOK(open, test_hooks_libc_hook)
+#define read JEMALLOC_HOOK(read, test_hooks_libc_hook)
+#define write JEMALLOC_HOOK(write, test_hooks_libc_hook)
+#define readlink JEMALLOC_HOOK(readlink, test_hooks_libc_hook)
+#define close JEMALLOC_HOOK(close, test_hooks_libc_hook)
+#define creat JEMALLOC_HOOK(creat, test_hooks_libc_hook)
+#define secure_getenv JEMALLOC_HOOK(secure_getenv, test_hooks_libc_hook)
+/* Note that this is undef'd and re-define'd in src/prof.c. */
+#define _Unwind_Backtrace JEMALLOC_HOOK(_Unwind_Backtrace, test_hooks_libc_hook)
+
+#endif /* JEMALLOC_INTERNAL_TEST_HOOKS_H */
diff --git a/src/arena.c b/src/arena.c
index 5d55bf1a..311943f5 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1900,8 +1900,8 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		 */
 		assert(!tsdn_null(tsdn));
 		pre_reentrancy(tsdn_tsd(tsdn), arena);
-		if (hooks_arena_new_hook) {
-			hooks_arena_new_hook();
+		if (test_hooks_arena_new_hook) {
+			test_hooks_arena_new_hook();
 		}
 		post_reentrancy(tsdn_tsd(tsdn));
 	}
diff --git a/src/prof.c b/src/prof.c
index 13df641a..405de4b3 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -23,7 +23,7 @@
  */
 #undef _Unwind_Backtrace
 #include <unwind.h>
-#define _Unwind_Backtrace JEMALLOC_HOOK(_Unwind_Backtrace, hooks_libc_hook)
+#define _Unwind_Backtrace JEMALLOC_HOOK(_Unwind_Backtrace, test_hooks_libc_hook)
 #endif
 
 /******************************************************************************/
diff --git a/src/hooks.c b/src/test_hooks.c
similarity index 79%
rename from src/hooks.c
rename to src/test_hooks.c
index 6266ecd4..ace00d9c 100644
--- a/src/hooks.c
+++ b/src/test_hooks.c
@@ -6,7 +6,7 @@
  * from outside the generated library, so that we can use them in test code.
  */
 JEMALLOC_EXPORT
-void (*hooks_arena_new_hook)() = NULL;
+void (*test_hooks_arena_new_hook)() = NULL;
 
 JEMALLOC_EXPORT
-void (*hooks_libc_hook)() = NULL;
+void (*test_hooks_libc_hook)() = NULL;
diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in
index 67caa86b..0209aead 100644
--- a/test/include/test/jemalloc_test.h.in
+++ b/test/include/test/jemalloc_test.h.in
@@ -69,7 +69,7 @@ static const bool config_debug =
 
 #  define JEMALLOC_N(n) @private_namespace@##n
 #  include "jemalloc/internal/private_namespace.h"
-#  include "jemalloc/internal/hooks.h"
+#  include "jemalloc/internal/test_hooks.h"
 
 /* Hermetic headers. */
 #  include "jemalloc/internal/assert.h"
diff --git a/test/src/test.c b/test/src/test.c
index 01a4d738..9c754e33 100644
--- a/test/src/test.c
+++ b/test/src/test.c
@@ -48,12 +48,12 @@ do_hook(bool *hook_ran, void (**hook)()) {
 
 static void
 libc_reentrancy_hook() {
-	do_hook(&libc_hook_ran, &hooks_libc_hook);
+	do_hook(&libc_hook_ran, &test_hooks_libc_hook);
 }
 
 static void
 arena_new_reentrancy_hook() {
-	do_hook(&arena_new_hook_ran, &hooks_arena_new_hook);
+	do_hook(&arena_new_hook_ran, &test_hooks_arena_new_hook);
 }
 
 /* Actual test infrastructure. */
@@ -131,7 +131,7 @@ p_test_impl(bool do_malloc_init, bool do_reentrant, test_t *t, va_list ap) {
 	for (; t != NULL; t = va_arg(ap, test_t *)) {
 		/* Non-reentrant run. */
 		reentrancy = non_reentrant;
-		hooks_arena_new_hook = hooks_libc_hook = NULL;
+		test_hooks_arena_new_hook = test_hooks_libc_hook = NULL;
 		t();
 		if (test_status > ret) {
 			ret = test_status;
@@ -139,16 +139,16 @@ p_test_impl(bool do_malloc_init, bool do_reentrant, test_t *t, va_list ap) {
 		/* Reentrant run. */
 		if (do_reentrant) {
 			reentrancy = libc_reentrant;
-			hooks_arena_new_hook = NULL;
-			hooks_libc_hook = &libc_reentrancy_hook;
+			test_hooks_arena_new_hook = NULL;
+			test_hooks_libc_hook = &libc_reentrancy_hook;
 			t();
 			if (test_status > ret) {
 				ret = test_status;
 			}
 
 			reentrancy = arena_new_reentrant;
-			hooks_libc_hook = NULL;
-			hooks_arena_new_hook = &arena_new_reentrancy_hook;
+			test_hooks_libc_hook = NULL;
+			test_hooks_arena_new_hook = &arena_new_reentrancy_hook;
 			t();
 			if (test_status > ret) {
 				ret = test_status;
diff --git a/test/unit/hooks.c b/test/unit/test_hooks.c
similarity index 82%
rename from test/unit/hooks.c
rename to test/unit/test_hooks.c
index b70172e1..ded8698b 100644
--- a/test/unit/hooks.c
+++ b/test/unit/test_hooks.c
@@ -12,10 +12,10 @@ func_to_hook(int arg1, int arg2) {
 	return arg1 + arg2;
 }
 
-#define func_to_hook JEMALLOC_HOOK(func_to_hook, hooks_libc_hook)
+#define func_to_hook JEMALLOC_HOOK(func_to_hook, test_hooks_libc_hook)
 
 TEST_BEGIN(unhooked_call) {
-	hooks_libc_hook = NULL;
+	test_hooks_libc_hook = NULL;
 	hook_called = false;
 	assert_d_eq(3, func_to_hook(1, 2), "Hooking changed return value.");
 	assert_false(hook_called, "Nulling out hook didn't take.");
@@ -23,7 +23,7 @@ TEST_BEGIN(unhooked_call) {
 TEST_END
 
 TEST_BEGIN(hooked_call) {
-	hooks_libc_hook = &hook;
+	test_hooks_libc_hook = &hook;
 	hook_called = false;
 	assert_d_eq(3, func_to_hook(1, 2), "Hooking changed return value.");
 	assert_true(hook_called, "Hook should have executed.");

From 06a8c40b36403e902748d3f2a14e6dd43488ae89 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 13 Apr 2018 15:56:59 -0700
Subject: [PATCH 1126/2608] Add the Seq module, a simple seqlock
 implementation.

This allows fast reader-writer concurrency in cases where writers are rare.  The
immediate use case is for the hooking implementaiton.
---
 Makefile.in                     |  1 +
 include/jemalloc/internal/seq.h | 55 +++++++++++++++++++
 test/unit/seq.c                 | 95 +++++++++++++++++++++++++++++++++
 3 files changed, 151 insertions(+)
 create mode 100644 include/jemalloc/internal/seq.h
 create mode 100644 test/unit/seq.c

diff --git a/Makefile.in b/Makefile.in
index c4f006b5..074fdd45 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -197,6 +197,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/rb.c \
 	$(srcroot)test/unit/retained.c \
 	$(srcroot)test/unit/rtree.c \
+	$(srcroot)test/unit/seq.c \
 	$(srcroot)test/unit/SFMT.c \
 	$(srcroot)test/unit/size_classes.c \
 	$(srcroot)test/unit/slab.c \
diff --git a/include/jemalloc/internal/seq.h b/include/jemalloc/internal/seq.h
new file mode 100644
index 00000000..ef2df4c6
--- /dev/null
+++ b/include/jemalloc/internal/seq.h
@@ -0,0 +1,55 @@
+#ifndef JEMALLOC_INTERNAL_SEQ_H
+#define JEMALLOC_INTERNAL_SEQ_H
+
+#include "jemalloc/internal/atomic.h"
+
+/*
+ * A simple seqlock implementation.
+ */
+
+#define seq_define(type, short_type)					\
+typedef struct {							\
+	atomic_zu_t seq;						\
+	atomic_zu_t data[						\
+	    (sizeof(type) + sizeof(size_t) - 1) / sizeof(size_t)];	\
+} seq_##short_type##_t;							\
+									\
+/*									\
+ * No internal synchronization -- the caller must ensure that there's	\
+ * only a single writer at a time.					\
+ */									\
+static inline void							\
+seq_store_##short_type(seq_##short_type##_t *dst, type *src) {		\
+	size_t buf[sizeof(dst->data) / sizeof(size_t)];			\
+	buf[sizeof(buf) / sizeof(size_t) - 1] = 0;			\
+	memcpy(buf, src, sizeof(type));					\
+	size_t old_seq = atomic_load_zu(&dst->seq, ATOMIC_RELAXED);	\
+	atomic_store_zu(&dst->seq, old_seq + 1, ATOMIC_RELAXED);	\
+	atomic_fence(ATOMIC_RELEASE);					\
+	for (size_t i = 0; i < sizeof(buf) / sizeof(size_t); i++) {	\
+		atomic_store_zu(&dst->data[i], buf[i], ATOMIC_RELAXED);	\
+	}								\
+	atomic_store_zu(&dst->seq, old_seq + 2, ATOMIC_RELEASE);	\
+}									\
+									\
+/* Returns whether or not the read was consistent. */			\
+static inline bool							\
+seq_try_load_##short_type(type *dst, seq_##short_type##_t *src) {	\
+	size_t buf[sizeof(src->data) / sizeof(size_t)];			\
+	size_t seq1 = atomic_load_zu(&src->seq, ATOMIC_ACQUIRE);	\
+	if (seq1 % 2 != 0) {						\
+		return false;						\
+	}								\
+	for (size_t i = 0; i < sizeof(buf) / sizeof(size_t); i++) {	\
+		buf[i] = atomic_load_zu(&src->data[i], ATOMIC_RELAXED);	\
+	}								\
+	atomic_fence(ATOMIC_ACQUIRE);					\
+	size_t seq2 = atomic_load_zu(&src->seq, ATOMIC_RELAXED);	\
+	if (seq1 != seq2) {						\
+		return false;						\
+	}								\
+	memcpy(dst, buf, sizeof(type));					\
+	return true;							\
+}
+
+#endif /* JEMALLOC_INTERNAL_SEQ_H */
diff --git a/test/unit/seq.c b/test/unit/seq.c
new file mode 100644
index 00000000..19613b0b
--- /dev/null
+++ b/test/unit/seq.c
@@ -0,0 +1,95 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/seq.h"
+
+typedef struct data_s data_t;
+struct data_s {
+	int arr[10];
+};
+
+static void
+set_data(data_t *data, int num) {
+	for (int i = 0; i < 10; i++) {
+		data->arr[i] = num;
+	}
+}
+
+static void
+assert_data(data_t *data) {
+	int num = data->arr[0];
+	for (int i = 0; i < 10; i++) {
+		assert_d_eq(num, data->arr[i], "Data consistency error");
+	}
+}
+
+seq_define(data_t, data)
+
+typedef struct thd_data_s thd_data_t;
+struct thd_data_s {
+	seq_data_t data;
+};
+
+static void *
+seq_reader_thd(void *arg) {
+	thd_data_t *thd_data = (thd_data_t *)arg;
+	int iter = 0;
+	data_t local_data;
+	while (iter < 1000 * 1000 - 1) {
+		bool success = seq_try_load_data(&local_data, &thd_data->data);
+		if (success) {
+			assert_data(&local_data);
+			assert_d_le(iter, local_data.arr[0],
+			    "Seq read went back in time.");
+			iter = local_data.arr[0];
+		}
+	}
+	return NULL;
+}
+
+static void *
+seq_writer_thd(void *arg) {
+	thd_data_t *thd_data = (thd_data_t *)arg;
+	data_t local_data;
+	memset(&local_data, 0, sizeof(local_data));
+	for (int i = 0; i < 1000 * 1000; i++) {
+		set_data(&local_data, i);
+		seq_store_data(&thd_data->data, &local_data);
+	}
+	return NULL;
+}
+
+TEST_BEGIN(test_seq_threaded) {
+	thd_data_t thd_data;
+	memset(&thd_data, 0, sizeof(thd_data));
+
+	thd_t reader;
+	thd_t writer;
+
+	thd_create(&reader, seq_reader_thd, &thd_data);
+	thd_create(&writer, seq_writer_thd, &thd_data);
+
+	thd_join(reader, NULL);
+	thd_join(writer, NULL);
+}
+TEST_END
+
+TEST_BEGIN(test_seq_simple) {
+	data_t data;
+	seq_data_t seq;
+	memset(&seq, 0, sizeof(seq));
+	for (int i = 0; i < 1000 * 1000; i++) {
+		set_data(&data, i);
+		seq_store_data(&seq, &data);
+		set_data(&data, 0);
+		bool success = seq_try_load_data(&data, &seq);
+		assert_b_eq(success, true, "Failed non-racing read");
+		assert_data(&data);
+	}
+}
+TEST_END
+
+int main(void) {
+	return test_no_reentrancy(
+	    test_seq_simple,
+	    test_seq_threaded);
+}

From 5ae6e7cbfa6d6788340cc87d7717548f4d7960fe Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 9 Apr 2018 19:11:46 -0700
Subject: [PATCH 1127/2608] Add "hook" module.

The hook module allows a low-reader-overhead way of finding hooks to invoke and
calling them.

For now, none of the allocation pathways are tied into the hooks; this will come
later.
---
 Makefile.in                         |   2 +
 include/jemalloc/internal/hook.h    | 125 +++++++++++++++++++
 include/jemalloc/internal/witness.h |   1 +
 src/hook.c                          | 133 ++++++++++++++++++++
 src/jemalloc.c                      |   2 +
 test/unit/hook.c                    | 180 ++++++++++++++++++++++++++++
 6 files changed, 443 insertions(+)
 create mode 100644 include/jemalloc/internal/hook.h
 create mode 100644 src/hook.c
 create mode 100644 test/unit/hook.c

diff --git a/Makefile.in b/Makefile.in
index 074fdd45..3b3191f5 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -102,6 +102,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/extent_dss.c \
 	$(srcroot)src/extent_mmap.c \
 	$(srcroot)src/hash.c \
+	$(srcroot)src/hook.c \
 	$(srcroot)src/large.c \
 	$(srcroot)src/log.c \
 	$(srcroot)src/malloc_io.c \
@@ -172,6 +173,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/extent_quantize.c \
 	$(srcroot)test/unit/fork.c \
 	$(srcroot)test/unit/hash.c \
+	$(srcroot)test/unit/hook.c \
 	$(srcroot)test/unit/junk.c \
 	$(srcroot)test/unit/junk_alloc.c \
 	$(srcroot)test/unit/junk_free.c \
diff --git a/include/jemalloc/internal/hook.h b/include/jemalloc/internal/hook.h
new file mode 100644
index 00000000..847c91ba
--- /dev/null
+++ b/include/jemalloc/internal/hook.h
@@ -0,0 +1,125 @@
+#ifndef JEMALLOC_INTERNAL_HOOK_H
+#define JEMALLOC_INTERNAL_HOOK_H
+
+#include "jemalloc/internal/tsd.h"
+
+/*
+ * This API is *extremely* experimental, and may get ripped out, changed in API-
+ * and ABI-incompatible ways, be insufficiently or incorrectly documented, etc.
+ *
+ * It allows hooking the stateful parts of the API to see changes as they
+ * happen.
+ *
+ * Allocation hooks are called after the allocation is done, free hooks are
+ * called before the free is done, and expand hooks are called after the
+ * allocation is expanded.
+ *
+ * For realloc and rallocx, if the expansion happens in place, the expansion
+ * hook is called.  If it is moved, then the alloc hook is called on the new
+ * location, and then the free hook is called on the old location.
+ *
+ * (We omit no-ops, like free(NULL), etc.).
+ *
+ * Reentrancy:
+ *   Is not protected against.  If your hooks allocate, then the hooks will be
+ *   called again.  Note that you can guard against this with a thread-local
+ *   "in_hook" bool.
+ * Threading:
+ *   The installation of a hook synchronizes with all its uses.  If you can
+ *   prove the installation of a hook happens-before a jemalloc entry point,
+ *   then the hook will get invoked (unless there's a racing removal).
+ *
+ *   Hook insertion appears to be atomic at a per-thread level (i.e. if a thread
+ *   allocates and has the alloc hook invoked, then a subsequent free on the
+ *   same thread will also have the free hook invoked).
+ *
+ *   The *removal* of a hook does *not* block until all threads are done with
+ *   the hook.  Hook authors have to be resilient to this, and need some
+ *   out-of-band mechanism for cleaning up any dynamically allocated memory
+ *   associated with their hook.
+ * Ordering:
+ *   Order of hook execution is unspecified, and may be different than insertion
+ *   order.
+ */
+
+enum hook_alloc_e {
+	hook_alloc_malloc,
+	hook_alloc_posix_memalign,
+	hook_alloc_aligned_alloc,
+	hook_alloc_calloc,
+	hook_alloc_memalign,
+	hook_alloc_valloc,
+	hook_alloc_mallocx,
+
+	/* The reallocating functions have both alloc and dalloc variants */
+	hook_alloc_realloc,
+	hook_alloc_rallocx,
+};
+/*
+ * We put the enum typedef after the enum, since this file may get included by
+ * jemalloc_cpp.cpp, and C++ disallows enum forward declarations.
+ */
+typedef enum hook_alloc_e hook_alloc_t;
+
+enum hook_dalloc_e {
+	hook_dalloc_free,
+	hook_dalloc_dallocx,
+	hook_dalloc_sdallocx,
+
+	/*
+	 * The dalloc halves of reallocation (not called if in-place expansion
+	 * happens).
+	 */
+	hook_dalloc_realloc,
+	hook_dalloc_rallocx,
+};
+typedef enum hook_dalloc_e hook_dalloc_t;
+
+
+enum hook_expand_e {
+	hook_expand_realloc,
+	hook_expand_rallocx,
+	hook_expand_xallocx,
+};
+typedef enum hook_expand_e hook_expand_t;
+
+typedef void (*hook_alloc)(
+    void *extra, hook_alloc_t type, void *result, uintptr_t result_raw,
+    uintptr_t args_raw[3]);
+
+typedef void (*hook_dalloc)(
+    void *extra, hook_dalloc_t type, void *address, uintptr_t args_raw[3]);
+
+typedef void (*hook_expand)(
+    void *extra, hook_expand_t type, void *address, size_t old_usize,
+    size_t new_usize, uintptr_t result_raw, uintptr_t args_raw[4]);
+
+typedef struct hooks_s hooks_t;
+struct hooks_s {
+	hook_alloc alloc_hook;
+	hook_dalloc dalloc_hook;
+	hook_expand expand_hook;
+};
+
+/*
+ * Returns an opaque handle to be used when removing the hook.  NULL means that
+ * we couldn't install the hook.
+ */
+bool hook_boot();
+
+void *hook_install(tsdn_t *tsdn, hooks_t *hooks, void *extra);
+/* Uninstalls the hook with the handle previously returned from hook_install. */
+void hook_remove(tsdn_t *tsdn, void *opaque);
+
+/* Hooks */
+
+void hook_invoke_alloc(hook_alloc_t type, void *result, uintptr_t result_raw,
+    uintptr_t args_raw[3]);
+
+void hook_invoke_dalloc(hook_dalloc_t type, void *address,
+    uintptr_t args_raw[3]);
+
+void hook_invoke_expand(hook_expand_t type, void *address, size_t old_usize,
+    size_t new_usize, uintptr_t result_raw, uintptr_t args_raw[4]);
+
+#endif /* JEMALLOC_INTERNAL_HOOK_H */
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 7ace8ae4..80ea70c2 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -49,6 +49,7 @@
 #define WITNESS_RANK_RTREE		17U
 #define WITNESS_RANK_BASE		18U
 #define WITNESS_RANK_ARENA_LARGE	19U
+#define WITNESS_RANK_HOOK		20U
 
 #define WITNESS_RANK_LEAF		0xffffffffU
 #define WITNESS_RANK_BIN		WITNESS_RANK_LEAF
diff --git a/src/hook.c b/src/hook.c
new file mode 100644
index 00000000..6b154bd2
--- /dev/null
+++ b/src/hook.c
@@ -0,0 +1,133 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+
+#include "jemalloc/internal/hook.h"
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/seq.h"
+
+typedef struct hooks_internal_s hooks_internal_t;
+struct hooks_internal_s {
+	hooks_t hooks;
+	void *extra;
+	bool in_use;
+};
+
+seq_define(hooks_internal_t, hooks)
+
+#define HOOKS_MAX 4
+static seq_hooks_t hooks[HOOKS_MAX];
+static malloc_mutex_t hooks_mu;
+
+bool
+hook_boot() {
+	return malloc_mutex_init(&hooks_mu, "hooks", WITNESS_RANK_HOOK,
+	    malloc_mutex_rank_exclusive);
+}
+
+static void *
+hook_install_locked(hooks_t *to_install, void *extra) {
+	hooks_internal_t hooks_internal;
+	for (int i = 0; i < HOOKS_MAX; i++) {
+		bool success = seq_try_load_hooks(&hooks_internal, &hooks[i]);
+		/* We hold mu; no concurrent access. */
+		assert(success);
+		if (!hooks_internal.in_use) {
+			hooks_internal.hooks = *to_install;
+			hooks_internal.extra = extra;
+			hooks_internal.in_use = true;
+			seq_store_hooks(&hooks[i], &hooks_internal);
+			return &hooks[i];
+		}
+	}
+	return NULL;
+}
+
+void *
+hook_install(tsdn_t *tsdn, hooks_t *to_install, void *extra) {
+	malloc_mutex_lock(tsdn, &hooks_mu);
+	void *ret = hook_install_locked(to_install, extra);
+	if (ret != NULL) {
+		tsd_global_slow_inc(tsdn);
+	}
+	malloc_mutex_unlock(tsdn, &hooks_mu);
+	return ret;
+}
+
+static void
+hook_remove_locked(seq_hooks_t *to_remove) {
+	hooks_internal_t hooks_internal;
+	bool success = seq_try_load_hooks(&hooks_internal, to_remove);
+	/* We hold mu; no concurrent access. */
+	assert(success);
+	/* Should only remove hooks that were added. */
+	assert(hooks_internal.in_use);
+	hooks_internal.in_use = false;
+	seq_store_hooks(to_remove, &hooks_internal);
+}
+
+void
+hook_remove(tsdn_t *tsdn, void *opaque) {
+	if (config_debug) {
+		char *hooks_begin = (char *)&hooks[0];
+		char *hooks_end = (char *)&hooks[HOOKS_MAX];
+		char *hook = (char *)opaque;
+		assert(hooks_begin <= hook && hook < hooks_end
+		    && (hook - hooks_begin) % sizeof(seq_hooks_t) == 0);
+	}
+	malloc_mutex_lock(tsdn, &hooks_mu);
+	hook_remove_locked((seq_hooks_t *)opaque);
+	tsd_global_slow_dec(tsdn);
+	malloc_mutex_unlock(tsdn, &hooks_mu);
+}
+
+#define FOR_EACH_HOOK_BEGIN(hooks_internal_ptr)				\
+for (int for_each_hook_counter = 0;					\
+    for_each_hook_counter < HOOKS_MAX;					\
+    for_each_hook_counter++) {						\
+	bool for_each_hook_success = seq_try_load_hooks(		\
+	    (hooks_internal_ptr), &hooks[for_each_hook_counter]);	\
+	if (!for_each_hook_success) {					\
+		continue;						\
+	}								\
+	if (!(hooks_internal_ptr)->in_use) {				\
+		continue;						\
+	}
+#define FOR_EACH_HOOK_END						\
+}
+
+void
+hook_invoke_alloc(hook_alloc_t type, void *result, uintptr_t result_raw,
+    uintptr_t args_raw[3]) {
+	hooks_internal_t hook;
+	FOR_EACH_HOOK_BEGIN(&hook)
+		hook_alloc h = hook.hooks.alloc_hook;
+		if (h != NULL) {
+			h(hook.extra, type, result, result_raw, args_raw);
+		}
+	FOR_EACH_HOOK_END
+}
+
+void
+hook_invoke_dalloc(hook_dalloc_t type, void *address, uintptr_t args_raw[3]) {
+	hooks_internal_t hook;
+	FOR_EACH_HOOK_BEGIN(&hook)
+		hook_dalloc h = hook.hooks.dalloc_hook;
+		if (h != NULL) {
+			h(hook.extra, type, address, args_raw);
+		}
+	FOR_EACH_HOOK_END
+}
+
+void
+hook_invoke_expand(hook_expand_t type, void *address, size_t old_usize,
+    size_t new_usize, uintptr_t result_raw, uintptr_t args_raw[4]) {
+	hooks_internal_t hook;
+	FOR_EACH_HOOK_BEGIN(&hook)
+		hook_expand h = hook.hooks.expand_hook;
+		if (h != NULL) {
+			h(hook.extra, type, address, old_usize, new_usize,
+			    result_raw, args_raw);
+		}
+	FOR_EACH_HOOK_END
+}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index f93c16fa..f837e6b0 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -7,6 +7,7 @@
 #include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/extent_mmap.h"
+#include "jemalloc/internal/hook.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/log.h"
 #include "jemalloc/internal/malloc_io.h"
@@ -1311,6 +1312,7 @@ malloc_init_hard_a0_locked() {
 	    malloc_mutex_rank_exclusive)) {
 		return true;
 	}
+	hook_boot();
 	/*
 	 * Create enough scaffolding to allow recursive allocation in
 	 * malloc_ncpus().
diff --git a/test/unit/hook.c b/test/unit/hook.c
new file mode 100644
index 00000000..a9590968
--- /dev/null
+++ b/test/unit/hook.c
@@ -0,0 +1,180 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/hook.h"
+
+static void *arg_extra;
+static int arg_type;
+static void *arg_result;
+static void *arg_address;
+static size_t arg_old_usize;
+static size_t arg_new_usize;
+static uintptr_t arg_result_raw;
+static uintptr_t arg_args_raw[4];
+
+static int call_count = 0;
+
+static void
+reset_args() {
+	arg_extra = NULL;
+	arg_type = 12345;
+	arg_result = NULL;
+	arg_address = NULL;
+	arg_old_usize = 0;
+	arg_new_usize = 0;
+	arg_result_raw = 0;
+	memset(arg_args_raw, 77, sizeof(arg_args_raw));
+}
+
+static void
+set_args_raw(uintptr_t *args_raw, int nargs) {
+	memcpy(arg_args_raw, args_raw, sizeof(uintptr_t) * nargs);
+}
+
+static void
+assert_args_raw(uintptr_t *args_raw_expected, int nargs) {
+	int cmp = memcmp(args_raw_expected, arg_args_raw,
+	    sizeof(uintptr_t) * nargs);
+	assert_d_eq(cmp, 0, "Raw args mismatch");
+}
+
+static void
+test_alloc_hook(void *extra, hook_alloc_t type, void *result,
+    uintptr_t result_raw, uintptr_t args_raw[3]) {
+	call_count++;
+	arg_extra = extra;
+	arg_type = (int)type;
+	arg_result = result;
+	arg_result_raw = result_raw;
+	set_args_raw(args_raw, 3);
+}
+
+static void
+test_dalloc_hook(void *extra, hook_dalloc_t type, void *address,
+    uintptr_t args_raw[3]) {
+	call_count++;
+	arg_extra = extra;
+	arg_type = (int)type;
+	arg_address = address;
+	set_args_raw(args_raw, 3);
+}
+
+static void
+test_expand_hook(void *extra, hook_expand_t type, void *address,
+    size_t old_usize, size_t new_usize, uintptr_t result_raw,
+    uintptr_t args_raw[4]) {
+	call_count++;
+	arg_extra = extra;
+	arg_type = (int)type;
+	arg_address = address;
+	arg_old_usize = old_usize;
+	arg_new_usize = new_usize;
+	arg_result_raw = result_raw;
+	set_args_raw(args_raw, 4);
+}
+
+TEST_BEGIN(test_hooks_basic) {
+	/* Just verify that the record their arguments correctly. */
+	hooks_t hooks = {
+		&test_alloc_hook, &test_dalloc_hook, &test_expand_hook};
+	void *handle = hook_install(TSDN_NULL, &hooks, (void *)111);
+	uintptr_t args_raw[4] = {10, 20, 30, 40};
+
+	/* Alloc */
+	reset_args();
+	hook_invoke_alloc(hook_alloc_posix_memalign, (void *)222, 333,
+	    args_raw);
+	assert_ptr_eq(arg_extra, (void *)111, "Passed wrong user pointer");
+	assert_d_eq((int)hook_alloc_posix_memalign, arg_type,
+	    "Passed wrong alloc type");
+	assert_ptr_eq((void *)222, arg_result, "Passed wrong result address");
+	assert_u64_eq(333, arg_result_raw, "Passed wrong result");
+	assert_args_raw(args_raw, 3);
+
+	/* Dalloc */
+	reset_args();
+	hook_invoke_dalloc(hook_dalloc_sdallocx, (void *)222, args_raw);
+	assert_d_eq((int)hook_dalloc_sdallocx, arg_type,
+	    "Passed wrong dalloc type");
+	assert_ptr_eq((void *)111, arg_extra, "Passed wrong user pointer");
+	assert_ptr_eq((void *)222, arg_address, "Passed wrong address");
+	assert_args_raw(args_raw, 3);
+
+	/* Expand */
+	reset_args();
+	hook_invoke_expand(hook_expand_xallocx, (void *)222, 333, 444, 555,
+	    args_raw);
+	assert_d_eq((int)hook_expand_xallocx, arg_type,
+	    "Passed wrong expand type");
+	assert_ptr_eq((void *)111, arg_extra, "Passed wrong user pointer");
+	assert_ptr_eq((void *)222, arg_address, "Passed wrong address");
+	assert_zu_eq(333, arg_old_usize, "Passed wrong old usize");
+	assert_zu_eq(444, arg_new_usize, "Passed wrong new usize");
+	assert_zu_eq(555, arg_result_raw, "Passed wrong result");
+	assert_args_raw(args_raw, 4);
+
+	hook_remove(TSDN_NULL, handle);
+}
+TEST_END
+
+TEST_BEGIN(test_hooks_null) {
+	/* Null hooks should be ignored, not crash. */
+	hooks_t hooks1 = {NULL, NULL, NULL};
+	hooks_t hooks2 = {&test_alloc_hook, NULL, NULL};
+	hooks_t hooks3 = {NULL, &test_dalloc_hook, NULL};
+	hooks_t hooks4 = {NULL, NULL, &test_expand_hook};
+
+	void *handle1 = hook_install(TSDN_NULL, &hooks1, NULL);
+	void *handle2 = hook_install(TSDN_NULL, &hooks2, NULL);
+	void *handle3 = hook_install(TSDN_NULL, &hooks3, NULL);
+	void *handle4 = hook_install(TSDN_NULL, &hooks4, NULL);
+
+	assert_ptr_ne(handle1, NULL, "Hook installation failed");
+	assert_ptr_ne(handle2, NULL, "Hook installation failed");
+	assert_ptr_ne(handle3, NULL, "Hook installation failed");
+	assert_ptr_ne(handle4, NULL, "Hook installation failed");
+
+	uintptr_t args_raw[4] = {10, 20, 30, 40};
+
+	call_count = 0;
+	hook_invoke_alloc(hook_alloc_malloc, NULL, 0, args_raw);
+	assert_d_eq(call_count, 1, "Called wrong number of times");
+
+	call_count = 0;
+	hook_invoke_dalloc(hook_dalloc_free, NULL, args_raw);
+	assert_d_eq(call_count, 1, "Called wrong number of times");
+
+	call_count = 0;
+	hook_invoke_expand(hook_expand_realloc, NULL, 0, 0, 0, args_raw);
+	assert_d_eq(call_count, 1, "Called wrong number of times");
+
+	hook_remove(TSDN_NULL, handle1);
+	hook_remove(TSDN_NULL, handle2);
+	hook_remove(TSDN_NULL, handle3);
+	hook_remove(TSDN_NULL, handle4);
+}
+TEST_END
+
+TEST_BEGIN(test_hooks_remove) {
+	hooks_t hooks = {&test_alloc_hook, NULL, NULL};
+	void *handle = hook_install(TSDN_NULL, &hooks, NULL);
+	assert_ptr_ne(handle, NULL, "Hook installation failed");
+	call_count = 0;
+	uintptr_t args_raw[4] = {10, 20, 30, 40};
+	hook_invoke_alloc(hook_alloc_malloc, NULL, 0, args_raw);
+	assert_d_eq(call_count, 1, "Hook not invoked");
+
+	call_count = 0;
+	hook_remove(TSDN_NULL, handle);
+	hook_invoke_alloc(hook_alloc_malloc, NULL, 0, NULL);
+	assert_d_eq(call_count, 0, "Hook invoked after removal");
+
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_hooks_basic,
+	    test_hooks_null,
+	    test_hooks_remove);
+}

From fe0e39938593b5fb16dc09fcdbe29d6ad7b3cf05 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 19 Apr 2018 11:35:33 -0700
Subject: [PATCH 1128/2608] Hooks: add an early-exit path for the common
 no-hook case.

---
 src/hook.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/src/hook.c b/src/hook.c
index 6b154bd2..5c6818ff 100644
--- a/src/hook.c
+++ b/src/hook.c
@@ -16,6 +16,7 @@ struct hooks_internal_s {
 seq_define(hooks_internal_t, hooks)
 
 #define HOOKS_MAX 4
+static atomic_u_t nhooks = ATOMIC_INIT(0);
 static seq_hooks_t hooks[HOOKS_MAX];
 static malloc_mutex_t hooks_mu;
 
@@ -37,6 +38,9 @@ hook_install_locked(hooks_t *to_install, void *extra) {
 			hooks_internal.extra = extra;
 			hooks_internal.in_use = true;
 			seq_store_hooks(&hooks[i], &hooks_internal);
+			atomic_store_u(&nhooks,
+			    atomic_load_u(&nhooks, ATOMIC_RELAXED) + 1,
+			    ATOMIC_RELAXED);
 			return &hooks[i];
 		}
 	}
@@ -64,6 +68,8 @@ hook_remove_locked(seq_hooks_t *to_remove) {
 	assert(hooks_internal.in_use);
 	hooks_internal.in_use = false;
 	seq_store_hooks(to_remove, &hooks_internal);
+	atomic_store_u(&nhooks, atomic_load_u(&nhooks, ATOMIC_RELAXED) - 1,
+	    ATOMIC_RELAXED);
 }
 
 void
@@ -99,6 +105,9 @@ for (int for_each_hook_counter = 0;					\
 void
 hook_invoke_alloc(hook_alloc_t type, void *result, uintptr_t result_raw,
     uintptr_t args_raw[3]) {
+	if (likely(atomic_load_u(&nhooks, ATOMIC_RELAXED) == 0)) {
+		return;
+	}
 	hooks_internal_t hook;
 	FOR_EACH_HOOK_BEGIN(&hook)
 		hook_alloc h = hook.hooks.alloc_hook;
@@ -110,6 +119,9 @@ hook_invoke_alloc(hook_alloc_t type, void *result, uintptr_t result_raw,
 
 void
 hook_invoke_dalloc(hook_dalloc_t type, void *address, uintptr_t args_raw[3]) {
+	if (likely(atomic_load_u(&nhooks, ATOMIC_RELAXED) == 0)) {
+		return;
+	}
 	hooks_internal_t hook;
 	FOR_EACH_HOOK_BEGIN(&hook)
 		hook_dalloc h = hook.hooks.dalloc_hook;
@@ -122,6 +134,9 @@ hook_invoke_dalloc(hook_dalloc_t type, void *address, uintptr_t args_raw[3]) {
 void
 hook_invoke_expand(hook_expand_t type, void *address, size_t old_usize,
     size_t new_usize, uintptr_t result_raw, uintptr_t args_raw[4]) {
+	if (likely(atomic_load_u(&nhooks, ATOMIC_RELAXED) == 0)) {
+		return;
+	}
 	hooks_internal_t hook;
 	FOR_EACH_HOOK_BEGIN(&hook)
 		hook_expand h = hook.hooks.expand_hook;

From 226327cf66f6e1fb1aed24ed3e2e9c291d1843b7 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 19 Apr 2018 13:14:22 -0700
Subject: [PATCH 1129/2608] Hooks: hook the pure-allocation functions.

---
 include/jemalloc/internal/hook.h |   2 +-
 src/jemalloc.c                   |  66 ++++++++++++++--
 test/unit/hook.c                 | 124 ++++++++++++++++++++++++++++++-
 3 files changed, 184 insertions(+), 8 deletions(-)

diff --git a/include/jemalloc/internal/hook.h b/include/jemalloc/internal/hook.h
index 847c91ba..fbf3a077 100644
--- a/include/jemalloc/internal/hook.h
+++ b/include/jemalloc/internal/hook.h
@@ -18,7 +18,7 @@
  * hook is called.  If it is moved, then the alloc hook is called on the new
  * location, and then the free hook is called on the old location.
  *
- * (We omit no-ops, like free(NULL), etc.).
+ * If we return NULL from OOM, then usize might not be trustworthy.
  *
  * Reentrancy:
  *   Is not protected against.  If your hooks allocate, then the hooks will be
diff --git a/src/jemalloc.c b/src/jemalloc.c
index f837e6b0..df59f695 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2038,6 +2038,14 @@ je_malloc(size_t size) {
 	dopts.item_size = size;
 
 	imalloc(&sopts, &dopts);
+	/*
+	 * Note that this branch gets optimized away -- it immediately follows
+	 * the check on tsd_fast that sets sopts.slow.
+	 */
+	if (sopts.slow) {
+		uintptr_t args[3] = {size};
+		hook_invoke_alloc(hook_alloc_malloc, ret, (uintptr_t)ret, args);
+	}
 
 	LOG("core.malloc.exit", "result: %p", ret);
 
@@ -2070,6 +2078,12 @@ je_posix_memalign(void **memptr, size_t alignment, size_t size) {
 	dopts.alignment = alignment;
 
 	ret = imalloc(&sopts, &dopts);
+	if (sopts.slow) {
+		uintptr_t args[3] = {(uintptr_t)memptr, (uintptr_t)alignment,
+			(uintptr_t)size};
+		hook_invoke_alloc(hook_alloc_posix_memalign, *memptr,
+		    (uintptr_t)ret, args);
+	}
 
 	LOG("core.posix_memalign.exit", "result: %d, alloc ptr: %p", ret,
 	    *memptr);
@@ -2107,6 +2121,11 @@ je_aligned_alloc(size_t alignment, size_t size) {
 	dopts.alignment = alignment;
 
 	imalloc(&sopts, &dopts);
+	if (sopts.slow) {
+		uintptr_t args[3] = {(uintptr_t)alignment, (uintptr_t)size};
+		hook_invoke_alloc(hook_alloc_aligned_alloc, ret,
+		    (uintptr_t)ret, args);
+	}
 
 	LOG("core.aligned_alloc.exit", "result: %p", ret);
 
@@ -2138,6 +2157,10 @@ je_calloc(size_t num, size_t size) {
 	dopts.zero = true;
 
 	imalloc(&sopts, &dopts);
+	if (sopts.slow) {
+		uintptr_t args[3] = {(uintptr_t)num, (uintptr_t)size};
+		hook_invoke_alloc(hook_alloc_calloc, ret, (uintptr_t)ret, args);
+	}
 
 	LOG("core.calloc.exit", "result: %p", ret);
 
@@ -2307,6 +2330,7 @@ je_realloc(void *ptr, size_t size) {
 			} else {
 				tcache = NULL;
 			}
+
 			ifree(tsd, ptr, tcache, true);
 
 			LOG("core.realloc.exit", "result: %p", NULL);
@@ -2330,9 +2354,12 @@ je_realloc(void *ptr, size_t size) {
 		assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
 		if (config_prof && opt_prof) {
 			usize = sz_s2u(size);
-			ret = unlikely(usize == 0 || usize > LARGE_MAXCLASS) ?
-			    NULL : irealloc_prof(tsd, ptr, old_usize, usize,
-			    &alloc_ctx);
+			if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
+				ret = NULL;
+			} else {
+				ret = irealloc_prof(tsd, ptr, old_usize, usize,
+				    &alloc_ctx);
+			}
 		} else {
 			if (config_stats) {
 				usize = sz_s2u(size);
@@ -2342,8 +2369,23 @@ je_realloc(void *ptr, size_t size) {
 		tsdn = tsd_tsdn(tsd);
 	} else {
 		/* realloc(NULL, size) is equivalent to malloc(size). */
-		void *ret = je_malloc(size);
-		LOG("core.realloc.exit", "result: %p", ret);
+		static_opts_t sopts;
+		dynamic_opts_t dopts;
+
+		static_opts_init(&sopts);
+		dynamic_opts_init(&dopts);
+
+		sopts.bump_empty_alloc = true;
+		sopts.null_out_result_on_error = true;
+		sopts.set_errno_on_error = true;
+		sopts.oom_string =
+		    "<jemalloc>: Error in realloc(): out of memory\n";
+
+		dopts.result = &ret;
+		dopts.num_items = 1;
+		dopts.item_size = size;
+
+		imalloc(&sopts, &dopts);
 		return ret;
 	}
 
@@ -2443,6 +2485,11 @@ je_memalign(size_t alignment, size_t size) {
 	dopts.alignment = alignment;
 
 	imalloc(&sopts, &dopts);
+	if (sopts.slow) {
+		uintptr_t args[3] = {alignment, size};
+		hook_invoke_alloc(hook_alloc_memalign, ret, (uintptr_t)ret,
+		    args);
+	}
 
 	LOG("core.memalign.exit", "result: %p", ret);
 	return ret;
@@ -2478,6 +2525,10 @@ je_valloc(size_t size) {
 	dopts.alignment = PAGE;
 
 	imalloc(&sopts, &dopts);
+	if (sopts.slow) {
+		uintptr_t args[3] = {size};
+		hook_invoke_alloc(hook_alloc_valloc, ret, (uintptr_t)ret, args);
+	}
 
 	LOG("core.valloc.exit", "result: %p\n", ret);
 	return ret;
@@ -2588,6 +2639,11 @@ je_mallocx(size_t size, int flags) {
 	}
 
 	imalloc(&sopts, &dopts);
+	if (sopts.slow) {
+		uintptr_t args[3] = {size, flags};
+		hook_invoke_alloc(hook_alloc_mallocx, ret, (uintptr_t)ret,
+		    args);
+	}
 
 	LOG("core.mallocx.exit", "result: %p", ret);
 	return ret;
diff --git a/test/unit/hook.c b/test/unit/hook.c
index a9590968..06d4b82d 100644
--- a/test/unit/hook.c
+++ b/test/unit/hook.c
@@ -37,6 +37,12 @@ assert_args_raw(uintptr_t *args_raw_expected, int nargs) {
 	assert_d_eq(cmp, 0, "Raw args mismatch");
 }
 
+static void
+reset() {
+	call_count = 0;
+	reset_args();
+}
+
 static void
 test_alloc_hook(void *extra, hook_alloc_t type, void *result,
     uintptr_t result_raw, uintptr_t args_raw[3]) {
@@ -171,10 +177,124 @@ TEST_BEGIN(test_hooks_remove) {
 }
 TEST_END
 
+TEST_BEGIN(test_hooks_alloc_simple) {
+	/* "Simple" in the sense that we're not in a realloc variant. */
+
+	hooks_t hooks = {&test_alloc_hook, NULL, NULL};
+	void *handle = hook_install(TSDN_NULL, &hooks, (void *)123);
+	assert_ptr_ne(handle, NULL, "Hook installation failed");
+
+	/* Stop malloc from being optimized away. */
+	volatile int err;
+	void *volatile ptr;
+
+	/* malloc */
+	reset();
+	ptr = malloc(1);
+	assert_d_eq(call_count, 1, "Hook not called");
+	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	assert_d_eq(arg_type, (int)hook_alloc_malloc, "Wrong hook type");
+	assert_ptr_eq(ptr, arg_result, "Wrong result");
+	assert_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
+	    "Wrong raw result");
+	assert_u64_eq((uintptr_t)1, arg_args_raw[0], "Wrong argument");
+	free(ptr);
+
+	/* posix_memalign */
+	reset();
+	err = posix_memalign((void **)&ptr, 1024, 1);
+	assert_d_eq(call_count, 1, "Hook not called");
+	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	assert_d_eq(arg_type, (int)hook_alloc_posix_memalign,
+	    "Wrong hook type");
+	assert_ptr_eq(ptr, arg_result, "Wrong result");
+	assert_u64_eq((uintptr_t)err, (uintptr_t)arg_result_raw,
+	    "Wrong raw result");
+	assert_u64_eq((uintptr_t)&ptr, arg_args_raw[0], "Wrong argument");
+	assert_u64_eq((uintptr_t)1024, arg_args_raw[1], "Wrong argument");
+	assert_u64_eq((uintptr_t)1, arg_args_raw[2], "Wrong argument");
+	free(ptr);
+
+	/* aligned_alloc */
+	reset();
+	ptr = aligned_alloc(1024, 1);
+	assert_d_eq(call_count, 1, "Hook not called");
+	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	assert_d_eq(arg_type, (int)hook_alloc_aligned_alloc,
+	    "Wrong hook type");
+	assert_ptr_eq(ptr, arg_result, "Wrong result");
+	assert_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
+	    "Wrong raw result");
+	assert_u64_eq((uintptr_t)1024, arg_args_raw[0], "Wrong argument");
+	assert_u64_eq((uintptr_t)1, arg_args_raw[1], "Wrong argument");
+	free(ptr);
+
+	/* calloc */
+	reset();
+	ptr = calloc(11, 13);
+	assert_d_eq(call_count, 1, "Hook not called");
+	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	assert_d_eq(arg_type, (int)hook_alloc_calloc, "Wrong hook type");
+	assert_ptr_eq(ptr, arg_result, "Wrong result");
+	assert_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
+	    "Wrong raw result");
+	assert_u64_eq((uintptr_t)11, arg_args_raw[0], "Wrong argument");
+	assert_u64_eq((uintptr_t)13, arg_args_raw[1], "Wrong argument");
+	free(ptr);
+
+	/* memalign */
+#ifdef JEMALLOC_OVERRIDE_MEMALIGN
+	reset();
+	ptr = memalign(1024, 1);
+	assert_d_eq(call_count, 1, "Hook not called");
+	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	assert_d_eq(arg_type, (int)hook_alloc_memalign, "Wrong hook type");
+	assert_ptr_eq(ptr, arg_result, "Wrong result");
+	assert_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
+	    "Wrong raw result");
+	assert_u64_eq((uintptr_t)1024, arg_args_raw[0], "Wrong argument");
+	assert_u64_eq((uintptr_t)1, arg_args_raw[1], "Wrong argument");
+	free(ptr);
+#endif /* JEMALLOC_OVERRIDE_MEMALIGN */
+
+	/* valloc */
+#ifdef JEMALLOC_OVERRIDE_VALLOC
+	reset();
+	ptr = valloc(1);
+	assert_d_eq(call_count, 1, "Hook not called");
+	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	assert_d_eq(arg_type, (int)hook_alloc_valloc, "Wrong hook type");
+	assert_ptr_eq(ptr, arg_result, "Wrong result");
+	assert_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
+	    "Wrong raw result");
+	assert_u64_eq((uintptr_t)1, arg_args_raw[0], "Wrong argument");
+	free(ptr);
+#endif /* JEMALLOC_OVERRIDE_VALLOC */
+
+	/* mallocx */
+	reset();
+	ptr = mallocx(1, MALLOCX_LG_ALIGN(10));
+	assert_d_eq(call_count, 1, "Hook not called");
+	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	assert_d_eq(arg_type, (int)hook_alloc_mallocx, "Wrong hook type");
+	assert_ptr_eq(ptr, arg_result, "Wrong result");
+	assert_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
+	    "Wrong raw result");
+	assert_u64_eq((uintptr_t)1, arg_args_raw[0], "Wrong argument");
+	assert_u64_eq((uintptr_t)MALLOCX_LG_ALIGN(10), arg_args_raw[1],
+	    "Wrong flags");
+	free(ptr);
+
+	hook_remove(TSDN_NULL, handle);
+}
+TEST_END
+
 int
 main(void) {
-	return test(
+	/* We assert on call counts. */
+	return test_no_reentrancy(
 	    test_hooks_basic,
 	    test_hooks_null,
-	    test_hooks_remove);
+	    test_hooks_remove,
+	    test_hooks_alloc_simple);
 }

From c154f5881b72c52a131e88ade6108d663ac03700 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 19 Apr 2018 15:02:53 -0700
Subject: [PATCH 1130/2608] Hooks: hook the pure-deallocation functions.

---
 src/jemalloc.c   |  6 ++++++
 test/unit/hook.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index df59f695..42502ab3 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2441,6 +2441,8 @@ je_free(void *ptr) {
 			} else {
 				tcache = NULL;
 			}
+			uintptr_t args_raw[3] = {(uintptr_t)ptr};
+			hook_invoke_dalloc(hook_dalloc_free, ptr, args_raw);
 			ifree(tsd, ptr, tcache, true);
 		}
 		check_entry_exit_locking(tsd_tsdn(tsd));
@@ -3012,6 +3014,8 @@ je_dallocx(void *ptr, int flags) {
 		tsd_assert_fast(tsd);
 		ifree(tsd, ptr, tcache, false);
 	} else {
+		uintptr_t args_raw[3] = {(uintptr_t)ptr, flags};
+		hook_invoke_dalloc(hook_dalloc_dallocx, ptr, args_raw);
 		ifree(tsd, ptr, tcache, true);
 	}
 	check_entry_exit_locking(tsd_tsdn(tsd));
@@ -3074,6 +3078,8 @@ je_sdallocx(void *ptr, size_t size, int flags) {
 		tsd_assert_fast(tsd);
 		isfree(tsd, ptr, usize, tcache, false);
 	} else {
+		uintptr_t args_raw[3] = {(uintptr_t)ptr, size, flags};
+		hook_invoke_dalloc(hook_dalloc_sdallocx, ptr, args_raw);
 		isfree(tsd, ptr, usize, tcache, true);
 	}
 	check_entry_exit_locking(tsd_tsdn(tsd));
diff --git a/test/unit/hook.c b/test/unit/hook.c
index 06d4b82d..2b612014 100644
--- a/test/unit/hook.c
+++ b/test/unit/hook.c
@@ -289,6 +289,53 @@ TEST_BEGIN(test_hooks_alloc_simple) {
 }
 TEST_END
 
+TEST_BEGIN(test_hooks_dalloc_simple) {
+	/* "Simple" in the sense that we're not in a realloc variant. */
+	hooks_t hooks = {NULL, &test_dalloc_hook, NULL};
+	void *handle = hook_install(TSDN_NULL, &hooks, (void *)123);
+	assert_ptr_ne(handle, NULL, "Hook installation failed");
+
+	void *volatile ptr;
+
+	/* free() */
+	reset();
+	ptr = malloc(1);
+	free(ptr);
+	assert_d_eq(call_count, 1, "Hook not called");
+	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	assert_d_eq(arg_type, (int)hook_dalloc_free, "Wrong hook type");
+	assert_ptr_eq(ptr, arg_address, "Wrong pointer freed");
+	assert_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong raw arg");
+
+	/* dallocx() */
+	reset();
+	ptr = malloc(1);
+	dallocx(ptr, MALLOCX_TCACHE_NONE);
+	assert_d_eq(call_count, 1, "Hook not called");
+	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	assert_d_eq(arg_type, (int)hook_dalloc_dallocx, "Wrong hook type");
+	assert_ptr_eq(ptr, arg_address, "Wrong pointer freed");
+	assert_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong raw arg");
+	assert_u64_eq((uintptr_t)MALLOCX_TCACHE_NONE, arg_args_raw[1],
+	    "Wrong raw arg");
+
+	/* sdallocx() */
+	reset();
+	ptr = malloc(1);
+	sdallocx(ptr, 1, MALLOCX_TCACHE_NONE);
+	assert_d_eq(call_count, 1, "Hook not called");
+	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	assert_d_eq(arg_type, (int)hook_dalloc_sdallocx, "Wrong hook type");
+	assert_ptr_eq(ptr, arg_address, "Wrong pointer freed");
+	assert_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong raw arg");
+	assert_u64_eq((uintptr_t)1, arg_args_raw[1], "Wrong raw arg");
+	assert_u64_eq((uintptr_t)MALLOCX_TCACHE_NONE, arg_args_raw[2],
+	    "Wrong raw arg");
+
+	hook_remove(TSDN_NULL, handle);
+}
+TEST_END
+
 int
 main(void) {
 	/* We assert on call counts. */
@@ -296,5 +343,6 @@ main(void) {
 	    test_hooks_basic,
 	    test_hooks_null,
 	    test_hooks_remove,
-	    test_hooks_alloc_simple);
+	    test_hooks_alloc_simple,
+	    test_hooks_dalloc_simple);
 }

From 83e516154cfacfc1e010a03f2f420bf79913944a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 19 Apr 2018 16:19:38 -0700
Subject: [PATCH 1131/2608] Hooks: hook the pure-expand function.

---
 src/jemalloc.c   |  6 ++++++
 test/unit/hook.c | 32 +++++++++++++++++++++++++++++++-
 2 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 42502ab3..1a621806 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2942,6 +2942,12 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 		*tsd_thread_deallocatedp_get(tsd) += old_usize;
 	}
 label_not_resized:
+	if (unlikely(!tsd_fast(tsd))) {
+		uintptr_t args[4] = {(uintptr_t)ptr, size, extra, flags};
+		hook_invoke_expand(hook_expand_xallocx, ptr, old_usize,
+		    usize, (uintptr_t)usize, args);
+	}
+
 	UTRACE(ptr, size, ptr);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
diff --git a/test/unit/hook.c b/test/unit/hook.c
index 2b612014..f923f721 100644
--- a/test/unit/hook.c
+++ b/test/unit/hook.c
@@ -336,6 +336,35 @@ TEST_BEGIN(test_hooks_dalloc_simple) {
 }
 TEST_END
 
+TEST_BEGIN(test_hooks_expand_simple) {
+	/* "Simple" in the sense that we're not in a realloc variant. */
+	hooks_t hooks = {NULL, NULL, &test_expand_hook};
+	void *handle = hook_install(TSDN_NULL, &hooks, (void *)123);
+	assert_ptr_ne(handle, NULL, "Hook installation failed");
+
+	void *volatile ptr;
+
+	/* xallocx() */
+	reset();
+	ptr = malloc(1);
+	size_t new_usize = xallocx(ptr, 100, 200, MALLOCX_TCACHE_NONE);
+	assert_d_eq(call_count, 1, "Hook not called");
+	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	assert_d_eq(arg_type, (int)hook_expand_xallocx, "Wrong hook type");
+	assert_ptr_eq(ptr, arg_address, "Wrong pointer expanded");
+	assert_u64_eq(arg_old_usize, nallocx(1, 0), "Wrong old usize");
+	assert_u64_eq(arg_new_usize, sallocx(ptr, 0), "Wrong new usize");
+	assert_u64_eq(new_usize, arg_result_raw, "Wrong result");
+	assert_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong arg");
+	assert_u64_eq(100, arg_args_raw[1], "Wrong arg");
+	assert_u64_eq(200, arg_args_raw[2], "Wrong arg");
+	assert_u64_eq(MALLOCX_TCACHE_NONE, arg_args_raw[3], "Wrong arg");
+
+	hook_remove(TSDN_NULL, handle);
+}
+TEST_END
+
+
 int
 main(void) {
 	/* We assert on call counts. */
@@ -344,5 +373,6 @@ main(void) {
 	    test_hooks_null,
 	    test_hooks_remove,
 	    test_hooks_alloc_simple,
-	    test_hooks_dalloc_simple);
+	    test_hooks_dalloc_simple,
+	    test_hooks_expand_simple);
 }

From 67270040a56d8658ce6aec81b15d78571e0e9198 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 19 Apr 2018 16:44:25 -0700
Subject: [PATCH 1132/2608] Hooks: hook the realloc paths that act as pure
 malloc/free.

---
 include/jemalloc/internal/hook.h |  8 +++--
 src/jemalloc.c                   | 12 +++++++-
 test/unit/hook.c                 | 52 ++++++++++++++++++++++++++++++--
 3 files changed, 67 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/hook.h b/include/jemalloc/internal/hook.h
index fbf3a077..ac1bcdbc 100644
--- a/include/jemalloc/internal/hook.h
+++ b/include/jemalloc/internal/hook.h
@@ -16,9 +16,13 @@
  *
  * For realloc and rallocx, if the expansion happens in place, the expansion
  * hook is called.  If it is moved, then the alloc hook is called on the new
- * location, and then the free hook is called on the old location.
+ * location, and then the free hook is called on the old location (i.e. both
+ * hooks are invoked in between the alloc and the dalloc).
  *
- * If we return NULL from OOM, then usize might not be trustworthy.
+ * If we return NULL from OOM, then usize might not be trustworthy.  Calling
+ * realloc(NULL, size) only calls the alloc hook, and calling realloc(ptr, 0)
+ * only calls the free hook.  (Calling realloc(NULL, 0) is treated as malloc(0),
+ * and only calls the alloc hook).
  *
  * Reentrancy:
  *   Is not protected against.  If your hooks allocate, then the hooks will be
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 1a621806..57c20199 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2311,11 +2311,12 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
 void JEMALLOC_NOTHROW *
 JEMALLOC_ALLOC_SIZE(2)
-je_realloc(void *ptr, size_t size) {
+je_realloc(void *ptr, size_t arg_size) {
 	void *ret;
 	tsdn_t *tsdn JEMALLOC_CC_SILENCE_INIT(NULL);
 	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 	size_t old_usize = 0;
+	size_t size = arg_size;
 
 	LOG("core.realloc.entry", "ptr: %p, size: %zu\n", ptr, size);
 
@@ -2331,6 +2332,9 @@ je_realloc(void *ptr, size_t size) {
 				tcache = NULL;
 			}
 
+			uintptr_t args[3] = {(uintptr_t)ptr, size};
+			hook_invoke_dalloc(hook_dalloc_realloc, ptr, args);
+
 			ifree(tsd, ptr, tcache, true);
 
 			LOG("core.realloc.exit", "result: %p", NULL);
@@ -2386,6 +2390,12 @@ je_realloc(void *ptr, size_t size) {
 		dopts.item_size = size;
 
 		imalloc(&sopts, &dopts);
+		if (sopts.slow) {
+			uintptr_t args[3] = {(uintptr_t)ptr, arg_size};
+			hook_invoke_alloc(hook_alloc_realloc, ret,
+			    (uintptr_t)ret, args);
+		}
+
 		return ret;
 	}
 
diff --git a/test/unit/hook.c b/test/unit/hook.c
index f923f721..8c9d6800 100644
--- a/test/unit/hook.c
+++ b/test/unit/hook.c
@@ -179,7 +179,6 @@ TEST_END
 
 TEST_BEGIN(test_hooks_alloc_simple) {
 	/* "Simple" in the sense that we're not in a realloc variant. */
-
 	hooks_t hooks = {&test_alloc_hook, NULL, NULL};
 	void *handle = hook_install(TSDN_NULL, &hooks, (void *)123);
 	assert_ptr_ne(handle, NULL, "Hook installation failed");
@@ -364,6 +363,54 @@ TEST_BEGIN(test_hooks_expand_simple) {
 }
 TEST_END
 
+TEST_BEGIN(test_hooks_realloc_as_malloc_or_free) {
+	hooks_t hooks = {&test_alloc_hook, &test_dalloc_hook,
+		&test_expand_hook};
+	void *handle = hook_install(TSDN_NULL, &hooks, (void *)123);
+	assert_ptr_ne(handle, NULL, "Hook installation failed");
+
+	void *volatile ptr;
+
+	/* realloc(NULL, size) as malloc */
+	reset();
+	ptr = realloc(NULL, 1);
+	assert_d_eq(call_count, 1, "Hook not called");
+	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	assert_d_eq(arg_type, (int)hook_alloc_realloc, "Wrong hook type");
+	assert_ptr_eq(ptr, arg_result, "Wrong result");
+	assert_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
+	    "Wrong raw result");
+	assert_u64_eq((uintptr_t)NULL, arg_args_raw[0], "Wrong argument");
+	assert_u64_eq((uintptr_t)1, arg_args_raw[1], "Wrong argument");
+	free(ptr);
+
+	/* realloc(ptr, 0) as free */
+	ptr = malloc(1);
+	reset();
+	realloc(ptr, 0);
+	assert_d_eq(call_count, 1, "Hook not called");
+	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	assert_d_eq(arg_type, (int)hook_dalloc_realloc, "Wrong hook type");
+	assert_ptr_eq(ptr, arg_address, "Wrong pointer freed");
+	assert_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong raw arg");
+	assert_u64_eq((uintptr_t)0, arg_args_raw[1], "Wrong raw arg");
+
+	/* realloc(NULL, 0) as malloc(0) */
+	reset();
+	ptr = realloc(NULL, 0);
+	assert_d_eq(call_count, 1, "Hook not called");
+	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	assert_d_eq(arg_type, (int)hook_alloc_realloc, "Wrong hook type");
+	assert_ptr_eq(ptr, arg_result, "Wrong result");
+	assert_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
+	    "Wrong raw result");
+	assert_u64_eq((uintptr_t)NULL, arg_args_raw[0], "Wrong argument");
+	assert_u64_eq((uintptr_t)0, arg_args_raw[1], "Wrong argument");
+	free(ptr);
+
+	hook_remove(TSDN_NULL, handle);
+}
+TEST_END
 
 int
 main(void) {
@@ -374,5 +421,6 @@ main(void) {
 	    test_hooks_remove,
 	    test_hooks_alloc_simple,
 	    test_hooks_dalloc_simple,
-	    test_hooks_expand_simple);
+	    test_hooks_expand_simple,
+	    test_hooks_realloc_as_malloc_or_free);
 }

From cb0707c0fc948875876b93514938646455650e2b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 23 Apr 2018 18:07:40 -0700
Subject: [PATCH 1133/2608] Hooks: hook the realloc pathways that move/expand.

---
 include/jemalloc/internal/arena_externs.h     |   4 +-
 include/jemalloc/internal/hook.h              |  31 +++++
 .../internal/jemalloc_internal_inlines_c.h    |  45 +++----
 include/jemalloc/internal/large_externs.h     |   7 +-
 src/arena.c                                   |  18 ++-
 src/jemalloc.c                                |  44 ++++---
 src/large.c                                   |  17 ++-
 test/unit/hook.c                              | 113 +++++++++++++++++-
 8 files changed, 231 insertions(+), 48 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 4b3732b4..f4edcc73 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -3,6 +3,7 @@
 
 #include "jemalloc/internal/bin.h"
 #include "jemalloc/internal/extent_dss.h"
+#include "jemalloc/internal/hook.h"
 #include "jemalloc/internal/pages.h"
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/stats.h"
@@ -65,7 +66,8 @@ void arena_dalloc_small(tsdn_t *tsdn, void *ptr);
 bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t extra, bool zero);
 void *arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
-    size_t size, size_t alignment, bool zero, tcache_t *tcache);
+    size_t size, size_t alignment, bool zero, tcache_t *tcache,
+    hook_ralloc_args_t *hook_args);
 dss_prec_t arena_dss_prec_get(arena_t *arena);
 bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
 ssize_t arena_dirty_decay_ms_default_get(void);
diff --git a/include/jemalloc/internal/hook.h b/include/jemalloc/internal/hook.h
index ac1bcdbc..fc61e9bd 100644
--- a/include/jemalloc/internal/hook.h
+++ b/include/jemalloc/internal/hook.h
@@ -105,6 +105,37 @@ struct hooks_s {
 	hook_expand expand_hook;
 };
 
+/*
+ * Begin implementation details; everything above this point might one day live
+ * in a public API.  Everything below this point never will.
+ */
+
+/*
+ * The realloc pathways haven't gotten any refactoring love in a while, and it's
+ * fairly difficult to pass information from the entry point to the hooks.  We
+ * put the informaiton the hooks will need into a struct to encapsulate
+ * everything.
+ *
+ * Much of these pathways are force-inlined, so that the compiler can avoid
+ * materializing this struct until we hit an extern arena function.  For fairly
+ * goofy reasons, *many* of the realloc paths hit an extern arena function.
+ * These paths are cold enough that it doesn't matter; eventually, we should
+ * rewrite the realloc code to make the expand-in-place and the
+ * free-then-realloc paths more orthogonal, at which point we don't need to
+ * spread the hook logic all over the place.
+ */
+typedef struct hook_ralloc_args_s hook_ralloc_args_t;
+struct hook_ralloc_args_s {
+	/* I.e. as opposed to rallocx. */
+	bool is_realloc;
+	/*
+	 * The expand hook takes 4 arguments, even if only 3 are actually used;
+	 * we add an extra one in case the user decides to memcpy without
+	 * looking too closely at the hooked function.
+	 */
+	uintptr_t args[4];
+};
+
 /*
  * Returns an opaque handle to be used when removing the hook.  NULL means that
  * we couldn't install the hook.
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index c829ac60..1b5c11ed 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_INLINES_C_H
 #define JEMALLOC_INTERNAL_INLINES_C_H
 
+#include "jemalloc/internal/hook.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/sz.h"
 #include "jemalloc/internal/witness.h"
@@ -133,31 +134,20 @@ isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 
 JEMALLOC_ALWAYS_INLINE void *
 iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
-    size_t extra, size_t alignment, bool zero, tcache_t *tcache,
-    arena_t *arena) {
+    size_t alignment, bool zero, tcache_t *tcache, arena_t *arena,
+    hook_ralloc_args_t *hook_args) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 	void *p;
 	size_t usize, copysize;
 
-	usize = sz_sa2u(size + extra, alignment);
+	usize = sz_sa2u(size, alignment);
 	if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
 		return NULL;
 	}
 	p = ipalloct(tsdn, usize, alignment, zero, tcache, arena);
 	if (p == NULL) {
-		if (extra == 0) {
-			return NULL;
-		}
-		/* Try again, without extra this time. */
-		usize = sz_sa2u(size, alignment);
-		if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
-			return NULL;
-		}
-		p = ipalloct(tsdn, usize, alignment, zero, tcache, arena);
-		if (p == NULL) {
-			return NULL;
-		}
+		return NULL;
 	}
 	/*
 	 * Copy at most size bytes (not size+extra), since the caller has no
@@ -165,13 +155,26 @@ iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 	 */
 	copysize = (size < oldsize) ? size : oldsize;
 	memcpy(p, ptr, copysize);
+	hook_invoke_alloc(hook_args->is_realloc
+	    ? hook_alloc_realloc : hook_alloc_rallocx, p, (uintptr_t)p,
+	    hook_args->args);
+	hook_invoke_dalloc(hook_args->is_realloc
+	    ? hook_dalloc_realloc : hook_dalloc_rallocx, ptr, hook_args->args);
 	isdalloct(tsdn, ptr, oldsize, tcache, NULL, true);
 	return p;
 }
 
+/*
+ * is_realloc threads through the knowledge of whether or not this call comes
+ * from je_realloc (as opposed to je_rallocx); this ensures that we pass the
+ * correct entry point into any hooks.
+ * Note that these functions are all force-inlined, so no actual bool gets
+ * passed-around anywhere.
+ */
 JEMALLOC_ALWAYS_INLINE void *
 iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment,
-    bool zero, tcache_t *tcache, arena_t *arena) {
+    bool zero, tcache_t *tcache, arena_t *arena, hook_ralloc_args_t *hook_args)
+{
 	assert(ptr != NULL);
 	assert(size != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -183,19 +186,19 @@ iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment,
 		 * Existing object alignment is inadequate; allocate new space
 		 * and copy.
 		 */
-		return iralloct_realign(tsdn, ptr, oldsize, size, 0, alignment,
-		    zero, tcache, arena);
+		return iralloct_realign(tsdn, ptr, oldsize, size, alignment,
+		    zero, tcache, arena, hook_args);
 	}
 
 	return arena_ralloc(tsdn, arena, ptr, oldsize, size, alignment, zero,
-	    tcache);
+	    tcache, hook_args);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
 iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment,
-    bool zero) {
+    bool zero, hook_ralloc_args_t *hook_args) {
 	return iralloct(tsd_tsdn(tsd), ptr, oldsize, size, alignment, zero,
-	    tcache_get(tsd), NULL);
+	    tcache_get(tsd), NULL, hook_args);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
diff --git a/include/jemalloc/internal/large_externs.h b/include/jemalloc/internal/large_externs.h
index 3f36282c..88682eac 100644
--- a/include/jemalloc/internal/large_externs.h
+++ b/include/jemalloc/internal/large_externs.h
@@ -1,13 +1,16 @@
 #ifndef JEMALLOC_INTERNAL_LARGE_EXTERNS_H
 #define JEMALLOC_INTERNAL_LARGE_EXTERNS_H
 
+#include "jemalloc/internal/hook.h"
+
 void *large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero);
 void *large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
     bool zero);
 bool large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
     size_t usize_max, bool zero);
-void *large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
-    size_t alignment, bool zero, tcache_t *tcache);
+void *large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
+    size_t alignment, bool zero, tcache_t *tcache,
+    hook_ralloc_args_t *hook_args);
 
 typedef void (large_dalloc_junk_t)(void *, size_t);
 extern large_dalloc_junk_t *JET_MUTABLE large_dalloc_junk;
diff --git a/src/arena.c b/src/arena.c
index 311943f5..b76be5f7 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1630,7 +1630,8 @@ arena_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
 
 void *
 arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
-    size_t size, size_t alignment, bool zero, tcache_t *tcache) {
+    size_t size, size_t alignment, bool zero, tcache_t *tcache,
+    hook_ralloc_args_t *hook_args) {
 	size_t usize = sz_s2u(size);
 	if (unlikely(usize == 0 || size > LARGE_MAXCLASS)) {
 		return NULL;
@@ -1639,13 +1640,17 @@ arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
 	if (likely(usize <= SMALL_MAXCLASS)) {
 		/* Try to avoid moving the allocation. */
 		if (!arena_ralloc_no_move(tsdn, ptr, oldsize, usize, 0, zero)) {
+			hook_invoke_expand(hook_args->is_realloc
+			    ? hook_expand_realloc : hook_expand_rallocx,
+			    ptr, oldsize, usize, (uintptr_t)ptr,
+			    hook_args->args);
 			return ptr;
 		}
 	}
 
 	if (oldsize >= LARGE_MINCLASS && usize >= LARGE_MINCLASS) {
-		return large_ralloc(tsdn, arena, iealloc(tsdn, ptr), usize,
-		    alignment, zero, tcache);
+		return large_ralloc(tsdn, arena, ptr, usize,
+		    alignment, zero, tcache, hook_args);
 	}
 
 	/*
@@ -1658,11 +1663,16 @@ arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
 		return NULL;
 	}
 
+	hook_invoke_alloc(hook_args->is_realloc
+	    ? hook_alloc_realloc : hook_alloc_rallocx, ret, (uintptr_t)ret,
+	    hook_args->args);
+	hook_invoke_dalloc(hook_args->is_realloc
+	    ? hook_dalloc_realloc : hook_dalloc_rallocx, ptr, hook_args->args);
+
 	/*
 	 * Junk/zero-filling were already done by
 	 * ipalloc()/arena_malloc().
 	 */
-
 	size_t copysize = (usize < oldsize) ? usize : oldsize;
 	memcpy(ret, ptr, copysize);
 	isdalloct(tsdn, ptr, oldsize, tcache, NULL, true);
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 57c20199..264408fe 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2169,20 +2169,22 @@ je_calloc(size_t num, size_t size) {
 
 static void *
 irealloc_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize,
-    prof_tctx_t *tctx) {
+    prof_tctx_t *tctx, hook_ralloc_args_t *hook_args) {
 	void *p;
 
 	if (tctx == NULL) {
 		return NULL;
 	}
 	if (usize <= SMALL_MAXCLASS) {
-		p = iralloc(tsd, old_ptr, old_usize, LARGE_MINCLASS, 0, false);
+		p = iralloc(tsd, old_ptr, old_usize, LARGE_MINCLASS, 0, false,
+		    hook_args);
 		if (p == NULL) {
 			return NULL;
 		}
 		arena_prof_promote(tsd_tsdn(tsd), p, usize);
 	} else {
-		p = iralloc(tsd, old_ptr, old_usize, usize, 0, false);
+		p = iralloc(tsd, old_ptr, old_usize, usize, 0, false,
+		    hook_args);
 	}
 
 	return p;
@@ -2190,7 +2192,7 @@ irealloc_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize,
 
 JEMALLOC_ALWAYS_INLINE void *
 irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize,
-   alloc_ctx_t *alloc_ctx) {
+   alloc_ctx_t *alloc_ctx, hook_ralloc_args_t *hook_args) {
 	void *p;
 	bool prof_active;
 	prof_tctx_t *old_tctx, *tctx;
@@ -2199,9 +2201,11 @@ irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize,
 	old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_ptr, alloc_ctx);
 	tctx = prof_alloc_prep(tsd, usize, prof_active, true);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
-		p = irealloc_prof_sample(tsd, old_ptr, old_usize, usize, tctx);
+		p = irealloc_prof_sample(tsd, old_ptr, old_usize, usize, tctx,
+		    hook_args);
 	} else {
-		p = iralloc(tsd, old_ptr, old_usize, usize, 0, false);
+		p = iralloc(tsd, old_ptr, old_usize, usize, 0, false,
+		    hook_args);
 	}
 	if (unlikely(p == NULL)) {
 		prof_alloc_rollback(tsd, tctx, true);
@@ -2349,6 +2353,10 @@ je_realloc(void *ptr, size_t arg_size) {
 
 		check_entry_exit_locking(tsd_tsdn(tsd));
 
+
+		hook_ralloc_args_t hook_args = {true, {(uintptr_t)ptr,
+			(uintptr_t)arg_size, 0, 0}};
+
 		alloc_ctx_t alloc_ctx;
 		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
 		rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
@@ -2362,13 +2370,14 @@ je_realloc(void *ptr, size_t arg_size) {
 				ret = NULL;
 			} else {
 				ret = irealloc_prof(tsd, ptr, old_usize, usize,
-				    &alloc_ctx);
+				    &alloc_ctx, &hook_args);
 			}
 		} else {
 			if (config_stats) {
 				usize = sz_s2u(size);
 			}
-			ret = iralloc(tsd, ptr, old_usize, size, 0, false);
+			ret = iralloc(tsd, ptr, old_usize, size, 0, false,
+			    &hook_args);
 		}
 		tsdn = tsd_tsdn(tsd);
 	} else {
@@ -2664,7 +2673,7 @@ je_mallocx(size_t size, int flags) {
 static void *
 irallocx_prof_sample(tsdn_t *tsdn, void *old_ptr, size_t old_usize,
     size_t usize, size_t alignment, bool zero, tcache_t *tcache, arena_t *arena,
-    prof_tctx_t *tctx) {
+    prof_tctx_t *tctx, hook_ralloc_args_t *hook_args) {
 	void *p;
 
 	if (tctx == NULL) {
@@ -2672,14 +2681,14 @@ irallocx_prof_sample(tsdn_t *tsdn, void *old_ptr, size_t old_usize,
 	}
 	if (usize <= SMALL_MAXCLASS) {
 		p = iralloct(tsdn, old_ptr, old_usize, LARGE_MINCLASS,
-		    alignment, zero, tcache, arena);
+		    alignment, zero, tcache, arena, hook_args);
 		if (p == NULL) {
 			return NULL;
 		}
 		arena_prof_promote(tsdn, p, usize);
 	} else {
 		p = iralloct(tsdn, old_ptr, old_usize, usize, alignment, zero,
-		    tcache, arena);
+		    tcache, arena, hook_args);
 	}
 
 	return p;
@@ -2688,7 +2697,7 @@ irallocx_prof_sample(tsdn_t *tsdn, void *old_ptr, size_t old_usize,
 JEMALLOC_ALWAYS_INLINE void *
 irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
     size_t alignment, size_t *usize, bool zero, tcache_t *tcache,
-    arena_t *arena, alloc_ctx_t *alloc_ctx) {
+    arena_t *arena, alloc_ctx_t *alloc_ctx, hook_ralloc_args_t *hook_args) {
 	void *p;
 	bool prof_active;
 	prof_tctx_t *old_tctx, *tctx;
@@ -2698,10 +2707,10 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 	tctx = prof_alloc_prep(tsd, *usize, prof_active, false);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
 		p = irallocx_prof_sample(tsd_tsdn(tsd), old_ptr, old_usize,
-		    *usize, alignment, zero, tcache, arena, tctx);
+		    *usize, alignment, zero, tcache, arena, tctx, hook_args);
 	} else {
 		p = iralloct(tsd_tsdn(tsd), old_ptr, old_usize, size, alignment,
-		    zero, tcache, arena);
+		    zero, tcache, arena, hook_args);
 	}
 	if (unlikely(p == NULL)) {
 		prof_alloc_rollback(tsd, tctx, false);
@@ -2775,6 +2784,9 @@ je_rallocx(void *ptr, size_t size, int flags) {
 	assert(alloc_ctx.szind != NSIZES);
 	old_usize = sz_index2size(alloc_ctx.szind);
 	assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
+
+	hook_ralloc_args_t hook_args = {false, {(uintptr_t)ptr, size, flags,
+		0}};
 	if (config_prof && opt_prof) {
 		usize = (alignment == 0) ?
 		    sz_s2u(size) : sz_sa2u(size, alignment);
@@ -2782,13 +2794,13 @@ je_rallocx(void *ptr, size_t size, int flags) {
 			goto label_oom;
 		}
 		p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize,
-		    zero, tcache, arena, &alloc_ctx);
+		    zero, tcache, arena, &alloc_ctx, &hook_args);
 		if (unlikely(p == NULL)) {
 			goto label_oom;
 		}
 	} else {
 		p = iralloct(tsd_tsdn(tsd), ptr, old_usize, size, alignment,
-		    zero, tcache, arena);
+		    zero, tcache, arena, &hook_args);
 		if (unlikely(p == NULL)) {
 			goto label_oom;
 		}
diff --git a/src/large.c b/src/large.c
index 27a2c679..fdf183e4 100644
--- a/src/large.c
+++ b/src/large.c
@@ -270,10 +270,12 @@ large_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
 }
 
 void *
-large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
-    size_t alignment, bool zero, tcache_t *tcache) {
-	size_t oldusize = extent_usize_get(extent);
+large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
+    size_t alignment, bool zero, tcache_t *tcache,
+    hook_ralloc_args_t *hook_args) {
+	extent_t *extent = iealloc(tsdn, ptr);
 
+	size_t oldusize = extent_usize_get(extent);
 	/* The following should have been caught by callers. */
 	assert(usize > 0 && usize <= LARGE_MAXCLASS);
 	/* Both allocation sizes must be large to avoid a move. */
@@ -281,6 +283,9 @@ large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
 
 	/* Try to avoid moving the allocation. */
 	if (!large_ralloc_no_move(tsdn, extent, usize, usize, zero)) {
+		hook_invoke_expand(hook_args->is_realloc
+		    ? hook_expand_realloc : hook_expand_rallocx, ptr, oldusize,
+		    usize, (uintptr_t)ptr, hook_args->args);
 		return extent_addr_get(extent);
 	}
 
@@ -295,6 +300,12 @@ large_ralloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent, size_t usize,
 		return NULL;
 	}
 
+	hook_invoke_alloc(hook_args->is_realloc
+	    ? hook_alloc_realloc : hook_alloc_rallocx, ret, (uintptr_t)ret,
+	    hook_args->args);
+	hook_invoke_dalloc(hook_args->is_realloc
+	    ? hook_dalloc_realloc : hook_dalloc_rallocx, ptr, hook_args->args);
+
 	size_t copysize = (usize < oldusize) ? usize : oldusize;
 	memcpy(ret, extent_addr_get(extent), copysize);
 	isdalloct(tsdn, extent_addr_get(extent), oldusize, tcache, NULL, true);
diff --git a/test/unit/hook.c b/test/unit/hook.c
index 8c9d6800..693cb238 100644
--- a/test/unit/hook.c
+++ b/test/unit/hook.c
@@ -412,6 +412,115 @@ TEST_BEGIN(test_hooks_realloc_as_malloc_or_free) {
 }
 TEST_END
 
+static void
+do_realloc_test(void *(*ralloc)(void *, size_t, int), int flags,
+    int expand_type, int dalloc_type) {
+	hooks_t hooks = {&test_alloc_hook, &test_dalloc_hook,
+		&test_expand_hook};
+	void *handle = hook_install(TSDN_NULL, &hooks, (void *)123);
+	assert_ptr_ne(handle, NULL, "Hook installation failed");
+
+	void *volatile ptr;
+	void *volatile ptr2;
+
+	/* Realloc in-place, small. */
+	ptr = malloc(129);
+	reset();
+	ptr2 = ralloc(ptr, 130, flags);
+	assert_ptr_eq(ptr, ptr2, "Small realloc moved");
+
+	assert_d_eq(call_count, 1, "Hook not called");
+	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	assert_d_eq(arg_type, expand_type, "Wrong hook type");
+	assert_ptr_eq(ptr, arg_address, "Wrong address");
+	assert_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
+	    "Wrong raw result");
+	assert_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong argument");
+	assert_u64_eq((uintptr_t)130, arg_args_raw[1], "Wrong argument");
+	free(ptr);
+
+	/*
+	 * Realloc in-place, large.  Since we can't guarantee the large case
+	 * across all platforms, we stay resilient to moving results.
+	 */
+	ptr = malloc(2 * 1024 * 1024);
+	free(ptr);
+	ptr2 = malloc(1 * 1024 * 1024);
+	reset();
+	ptr = ralloc(ptr2, 2 * 1024 * 1024, flags);
+	/* ptr is the new address, ptr2 is the old address. */
+	if (ptr == ptr2) {
+		assert_d_eq(call_count, 1, "Hook not called");
+		assert_d_eq(arg_type, expand_type, "Wrong hook type");
+	} else {
+		assert_d_eq(call_count, 2, "Wrong hooks called");
+		assert_ptr_eq(ptr, arg_result, "Wrong address");
+		assert_d_eq(arg_type, dalloc_type, "Wrong hook type");
+	}
+	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	assert_ptr_eq(ptr2, arg_address, "Wrong address");
+	assert_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
+	    "Wrong raw result");
+	assert_u64_eq((uintptr_t)ptr2, arg_args_raw[0], "Wrong argument");
+	assert_u64_eq((uintptr_t)2 * 1024 * 1024, arg_args_raw[1],
+	    "Wrong argument");
+	free(ptr);
+
+	/* Realloc with move, small. */
+	ptr = malloc(8);
+	reset();
+	ptr2 = ralloc(ptr, 128, flags);
+	assert_ptr_ne(ptr, ptr2, "Small realloc didn't move");
+
+	assert_d_eq(call_count, 2, "Hook not called");
+	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	assert_d_eq(arg_type, dalloc_type, "Wrong hook type");
+	assert_ptr_eq(ptr, arg_address, "Wrong address");
+	assert_ptr_eq(ptr2, arg_result, "Wrong address");
+	assert_u64_eq((uintptr_t)ptr2, (uintptr_t)arg_result_raw,
+	    "Wrong raw result");
+	assert_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong argument");
+	assert_u64_eq((uintptr_t)128, arg_args_raw[1], "Wrong argument");
+	free(ptr2);
+
+	/* Realloc with move, large. */
+	ptr = malloc(1);
+	reset();
+	ptr2 = ralloc(ptr, 2 * 1024 * 1024, flags);
+	assert_ptr_ne(ptr, ptr2, "Large realloc didn't move");
+
+	assert_d_eq(call_count, 2, "Hook not called");
+	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	assert_d_eq(arg_type, dalloc_type, "Wrong hook type");
+	assert_ptr_eq(ptr, arg_address, "Wrong address");
+	assert_ptr_eq(ptr2, arg_result, "Wrong address");
+	assert_u64_eq((uintptr_t)ptr2, (uintptr_t)arg_result_raw,
+	    "Wrong raw result");
+	assert_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong argument");
+	assert_u64_eq((uintptr_t)2 * 1024 * 1024, arg_args_raw[1],
+	    "Wrong argument");
+	free(ptr2);
+
+	hook_remove(TSDN_NULL, handle);
+}
+
+static void *
+realloc_wrapper(void *ptr, size_t size, UNUSED int flags) {
+	return realloc(ptr, size);
+}
+
+TEST_BEGIN(test_hooks_realloc) {
+	do_realloc_test(&realloc_wrapper, 0, hook_expand_realloc,
+	    hook_dalloc_realloc);
+}
+TEST_END
+
+TEST_BEGIN(test_hooks_rallocx) {
+	do_realloc_test(&rallocx, MALLOCX_TCACHE_NONE, hook_expand_rallocx,
+	    hook_dalloc_rallocx);
+}
+TEST_END
+
 int
 main(void) {
 	/* We assert on call counts. */
@@ -422,5 +531,7 @@ main(void) {
 	    test_hooks_alloc_simple,
 	    test_hooks_dalloc_simple,
 	    test_hooks_expand_simple,
-	    test_hooks_realloc_as_malloc_or_free);
+	    test_hooks_realloc_as_malloc_or_free,
+	    test_hooks_realloc,
+	    test_hooks_rallocx);
 }

From 126e9a84a5a793fb0d53ca4656a91889b3ae40e8 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 26 Apr 2018 15:46:08 -0700
Subject: [PATCH 1134/2608] Hooks: move the "extra" pointer into the hook_t
 itself.

This simplifies the mallctl call to install a hook, which should only take a
single argument.
---
 include/jemalloc/internal/hook.h |  3 ++-
 src/hook.c                       | 14 +++++-----
 test/unit/hook.c                 | 45 ++++++++++++++++----------------
 3 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/include/jemalloc/internal/hook.h b/include/jemalloc/internal/hook.h
index fc61e9bd..678c6018 100644
--- a/include/jemalloc/internal/hook.h
+++ b/include/jemalloc/internal/hook.h
@@ -103,6 +103,7 @@ struct hooks_s {
 	hook_alloc alloc_hook;
 	hook_dalloc dalloc_hook;
 	hook_expand expand_hook;
+	void *extra;
 };
 
 /*
@@ -142,7 +143,7 @@ struct hook_ralloc_args_s {
  */
 bool hook_boot();
 
-void *hook_install(tsdn_t *tsdn, hooks_t *hooks, void *extra);
+void *hook_install(tsdn_t *tsdn, hooks_t *hooks);
 /* Uninstalls the hook with the handle previously returned from hook_install. */
 void hook_remove(tsdn_t *tsdn, void *opaque);
 
diff --git a/src/hook.c b/src/hook.c
index 5c6818ff..564c2a0c 100644
--- a/src/hook.c
+++ b/src/hook.c
@@ -9,7 +9,6 @@
 typedef struct hooks_internal_s hooks_internal_t;
 struct hooks_internal_s {
 	hooks_t hooks;
-	void *extra;
 	bool in_use;
 };
 
@@ -27,7 +26,7 @@ hook_boot() {
 }
 
 static void *
-hook_install_locked(hooks_t *to_install, void *extra) {
+hook_install_locked(hooks_t *to_install) {
 	hooks_internal_t hooks_internal;
 	for (int i = 0; i < HOOKS_MAX; i++) {
 		bool success = seq_try_load_hooks(&hooks_internal, &hooks[i]);
@@ -35,7 +34,6 @@ hook_install_locked(hooks_t *to_install, void *extra) {
 		assert(success);
 		if (!hooks_internal.in_use) {
 			hooks_internal.hooks = *to_install;
-			hooks_internal.extra = extra;
 			hooks_internal.in_use = true;
 			seq_store_hooks(&hooks[i], &hooks_internal);
 			atomic_store_u(&nhooks,
@@ -48,9 +46,9 @@ hook_install_locked(hooks_t *to_install, void *extra) {
 }
 
 void *
-hook_install(tsdn_t *tsdn, hooks_t *to_install, void *extra) {
+hook_install(tsdn_t *tsdn, hooks_t *to_install) {
 	malloc_mutex_lock(tsdn, &hooks_mu);
-	void *ret = hook_install_locked(to_install, extra);
+	void *ret = hook_install_locked(to_install);
 	if (ret != NULL) {
 		tsd_global_slow_inc(tsdn);
 	}
@@ -112,7 +110,7 @@ hook_invoke_alloc(hook_alloc_t type, void *result, uintptr_t result_raw,
 	FOR_EACH_HOOK_BEGIN(&hook)
 		hook_alloc h = hook.hooks.alloc_hook;
 		if (h != NULL) {
-			h(hook.extra, type, result, result_raw, args_raw);
+			h(hook.hooks.extra, type, result, result_raw, args_raw);
 		}
 	FOR_EACH_HOOK_END
 }
@@ -126,7 +124,7 @@ hook_invoke_dalloc(hook_dalloc_t type, void *address, uintptr_t args_raw[3]) {
 	FOR_EACH_HOOK_BEGIN(&hook)
 		hook_dalloc h = hook.hooks.dalloc_hook;
 		if (h != NULL) {
-			h(hook.extra, type, address, args_raw);
+			h(hook.hooks.extra, type, address, args_raw);
 		}
 	FOR_EACH_HOOK_END
 }
@@ -141,7 +139,7 @@ hook_invoke_expand(hook_expand_t type, void *address, size_t old_usize,
 	FOR_EACH_HOOK_BEGIN(&hook)
 		hook_expand h = hook.hooks.expand_hook;
 		if (h != NULL) {
-			h(hook.extra, type, address, old_usize, new_usize,
+			h(hook.hooks.extra, type, address, old_usize, new_usize,
 			    result_raw, args_raw);
 		}
 	FOR_EACH_HOOK_END
diff --git a/test/unit/hook.c b/test/unit/hook.c
index 693cb238..3f85ff10 100644
--- a/test/unit/hook.c
+++ b/test/unit/hook.c
@@ -81,8 +81,9 @@ test_expand_hook(void *extra, hook_expand_t type, void *address,
 TEST_BEGIN(test_hooks_basic) {
 	/* Just verify that the record their arguments correctly. */
 	hooks_t hooks = {
-		&test_alloc_hook, &test_dalloc_hook, &test_expand_hook};
-	void *handle = hook_install(TSDN_NULL, &hooks, (void *)111);
+		&test_alloc_hook, &test_dalloc_hook, &test_expand_hook,
+		(void *)111};
+	void *handle = hook_install(TSDN_NULL, &hooks);
 	uintptr_t args_raw[4] = {10, 20, 30, 40};
 
 	/* Alloc */
@@ -124,15 +125,15 @@ TEST_END
 
 TEST_BEGIN(test_hooks_null) {
 	/* Null hooks should be ignored, not crash. */
-	hooks_t hooks1 = {NULL, NULL, NULL};
-	hooks_t hooks2 = {&test_alloc_hook, NULL, NULL};
-	hooks_t hooks3 = {NULL, &test_dalloc_hook, NULL};
-	hooks_t hooks4 = {NULL, NULL, &test_expand_hook};
+	hooks_t hooks1 = {NULL, NULL, NULL, NULL};
+	hooks_t hooks2 = {&test_alloc_hook, NULL, NULL, NULL};
+	hooks_t hooks3 = {NULL, &test_dalloc_hook, NULL, NULL};
+	hooks_t hooks4 = {NULL, NULL, &test_expand_hook, NULL};
 
-	void *handle1 = hook_install(TSDN_NULL, &hooks1, NULL);
-	void *handle2 = hook_install(TSDN_NULL, &hooks2, NULL);
-	void *handle3 = hook_install(TSDN_NULL, &hooks3, NULL);
-	void *handle4 = hook_install(TSDN_NULL, &hooks4, NULL);
+	void *handle1 = hook_install(TSDN_NULL, &hooks1);
+	void *handle2 = hook_install(TSDN_NULL, &hooks2);
+	void *handle3 = hook_install(TSDN_NULL, &hooks3);
+	void *handle4 = hook_install(TSDN_NULL, &hooks4);
 
 	assert_ptr_ne(handle1, NULL, "Hook installation failed");
 	assert_ptr_ne(handle2, NULL, "Hook installation failed");
@@ -161,8 +162,8 @@ TEST_BEGIN(test_hooks_null) {
 TEST_END
 
 TEST_BEGIN(test_hooks_remove) {
-	hooks_t hooks = {&test_alloc_hook, NULL, NULL};
-	void *handle = hook_install(TSDN_NULL, &hooks, NULL);
+	hooks_t hooks = {&test_alloc_hook, NULL, NULL, NULL};
+	void *handle = hook_install(TSDN_NULL, &hooks);
 	assert_ptr_ne(handle, NULL, "Hook installation failed");
 	call_count = 0;
 	uintptr_t args_raw[4] = {10, 20, 30, 40};
@@ -179,8 +180,8 @@ TEST_END
 
 TEST_BEGIN(test_hooks_alloc_simple) {
 	/* "Simple" in the sense that we're not in a realloc variant. */
-	hooks_t hooks = {&test_alloc_hook, NULL, NULL};
-	void *handle = hook_install(TSDN_NULL, &hooks, (void *)123);
+	hooks_t hooks = {&test_alloc_hook, NULL, NULL, (void *)123};
+	void *handle = hook_install(TSDN_NULL, &hooks);
 	assert_ptr_ne(handle, NULL, "Hook installation failed");
 
 	/* Stop malloc from being optimized away. */
@@ -290,8 +291,8 @@ TEST_END
 
 TEST_BEGIN(test_hooks_dalloc_simple) {
 	/* "Simple" in the sense that we're not in a realloc variant. */
-	hooks_t hooks = {NULL, &test_dalloc_hook, NULL};
-	void *handle = hook_install(TSDN_NULL, &hooks, (void *)123);
+	hooks_t hooks = {NULL, &test_dalloc_hook, NULL, (void *)123};
+	void *handle = hook_install(TSDN_NULL, &hooks);
 	assert_ptr_ne(handle, NULL, "Hook installation failed");
 
 	void *volatile ptr;
@@ -337,8 +338,8 @@ TEST_END
 
 TEST_BEGIN(test_hooks_expand_simple) {
 	/* "Simple" in the sense that we're not in a realloc variant. */
-	hooks_t hooks = {NULL, NULL, &test_expand_hook};
-	void *handle = hook_install(TSDN_NULL, &hooks, (void *)123);
+	hooks_t hooks = {NULL, NULL, &test_expand_hook, (void *)123};
+	void *handle = hook_install(TSDN_NULL, &hooks);
 	assert_ptr_ne(handle, NULL, "Hook installation failed");
 
 	void *volatile ptr;
@@ -365,8 +366,8 @@ TEST_END
 
 TEST_BEGIN(test_hooks_realloc_as_malloc_or_free) {
 	hooks_t hooks = {&test_alloc_hook, &test_dalloc_hook,
-		&test_expand_hook};
-	void *handle = hook_install(TSDN_NULL, &hooks, (void *)123);
+		&test_expand_hook, (void *)123};
+	void *handle = hook_install(TSDN_NULL, &hooks);
 	assert_ptr_ne(handle, NULL, "Hook installation failed");
 
 	void *volatile ptr;
@@ -416,8 +417,8 @@ static void
 do_realloc_test(void *(*ralloc)(void *, size_t, int), int flags,
     int expand_type, int dalloc_type) {
 	hooks_t hooks = {&test_alloc_hook, &test_dalloc_hook,
-		&test_expand_hook};
-	void *handle = hook_install(TSDN_NULL, &hooks, (void *)123);
+		&test_expand_hook, (void *)123};
+	void *handle = hook_install(TSDN_NULL, &hooks);
 	assert_ptr_ne(handle, NULL, "Hook installation failed");
 
 	void *volatile ptr;

From bb071db92ee8368fb6e64ef328d49fae6ba48089 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 24 Apr 2018 14:45:41 -0700
Subject: [PATCH 1135/2608] Mallctl: Add experimental.hooks.[install|remove].

---
 src/ctl.c           | 59 ++++++++++++++++++++++++++++++++++++++++++++-
 test/unit/mallctl.c | 40 +++++++++++++++++++++++++++++-
 2 files changed, 97 insertions(+), 2 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index 0eb8de13..ef3eca4d 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -202,6 +202,8 @@ CTL_PROTO(stats_metadata_thp)
 CTL_PROTO(stats_resident)
 CTL_PROTO(stats_mapped)
 CTL_PROTO(stats_retained)
+CTL_PROTO(experimental_hooks_install)
+CTL_PROTO(experimental_hooks_remove)
 
 #define MUTEX_STATS_CTL_PROTO_GEN(n)					\
 CTL_PROTO(stats_##n##_num_ops)						\
@@ -536,6 +538,15 @@ static const ctl_named_node_t stats_node[] = {
 	{NAME("arenas"),	CHILD(indexed, stats_arenas)}
 };
 
+static const ctl_named_node_t hooks_node[] = {
+	{NAME("install"),	CTL(experimental_hooks_install)},
+	{NAME("remove"),	CTL(experimental_hooks_remove)},
+};
+
+static const ctl_named_node_t experimental_node[] = {
+	{NAME("hooks"),		CHILD(named, hooks)}
+};
+
 static const ctl_named_node_t	root_node[] = {
 	{NAME("version"),	CTL(version)},
 	{NAME("epoch"),		CTL(epoch)},
@@ -548,7 +559,8 @@ static const ctl_named_node_t	root_node[] = {
 	{NAME("arena"),		CHILD(indexed, arena)},
 	{NAME("arenas"),	CHILD(named, arenas)},
 	{NAME("prof"),		CHILD(named, prof)},
-	{NAME("stats"),		CHILD(named, stats)}
+	{NAME("stats"),		CHILD(named, stats)},
+	{NAME("experimental"),	CHILD(named, experimental)}
 };
 static const ctl_named_node_t super_root_node[] = {
 	{NAME(""),		CHILD(named, root)}
@@ -2879,3 +2891,48 @@ label_return:
 	malloc_mutex_unlock(tsdn, &ctl_mtx);
 	return ret;
 }
+
+static int
+experimental_hooks_install_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+	if (oldp == NULL || oldlenp == NULL|| newp == NULL) {
+		ret = EINVAL;
+		goto label_return;
+	}
+	/*
+	 * Note: this is a *private* struct.  This is an experimental interface;
+	 * forcing the user to know the jemalloc internals well enough to
+	 * extract the ABI hopefully ensures nobody gets too comfortable with
+	 * this API, which can change at a moment's notice.
+	 */
+	hooks_t hooks;
+	WRITE(hooks, hooks_t);
+	void *handle = hook_install(tsd_tsdn(tsd), &hooks);
+	if (handle == NULL) {
+		ret = EAGAIN;
+		goto label_return;
+	}
+	READ(handle, void *);
+
+	ret = 0;
+label_return:
+	return ret;
+}
+
+static int
+experimental_hooks_remove_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+	WRITEONLY();
+	void *handle = NULL;
+	WRITE(handle, void *);
+	if (handle == NULL) {
+		ret = EINVAL;
+		goto label_return;
+	}
+	hook_remove(tsd_tsdn(tsd), handle);
+	ret = 0;
+label_return:
+	return ret;
+}
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 1ecbab08..34a4d67c 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -773,6 +773,43 @@ TEST_BEGIN(test_stats_arenas) {
 }
 TEST_END
 
+static void
+alloc_hook(void *extra, UNUSED hook_alloc_t type, UNUSED void *result,
+    UNUSED uintptr_t result_raw, UNUSED uintptr_t args_raw[3]) {
+	*(bool *)extra = true;
+}
+
+static void
+dalloc_hook(void *extra, UNUSED hook_dalloc_t type,
+    UNUSED void *address, UNUSED uintptr_t args_raw[3]) {
+	*(bool *)extra = true;
+}
+
+TEST_BEGIN(test_hooks) {
+	bool hook_called = false;
+	hooks_t hooks = {&alloc_hook, &dalloc_hook, NULL, &hook_called};
+	void *handle = NULL;
+	size_t sz = sizeof(handle);
+	int err = mallctl("experimental.hooks.install", &handle, &sz, &hooks,
+	    sizeof(hooks));
+	assert_d_eq(err, 0, "Hook installation failed");
+	assert_ptr_ne(handle, NULL, "Hook installation gave null handle");
+	void *ptr = mallocx(1, 0);
+	assert_true(hook_called, "Alloc hook not called");
+	hook_called = false;
+	free(ptr);
+	assert_true(hook_called, "Free hook not called");
+
+	err = mallctl("experimental.hooks.remove", NULL, NULL, &handle,
+	    sizeof(handle));
+	assert_d_eq(err, 0, "Hook removal failed");
+	hook_called = false;
+	ptr = mallocx(1, 0);
+	free(ptr);
+	assert_false(hook_called, "Hook called after removal");
+}
+TEST_END
+
 int
 main(void) {
 	return test(
@@ -801,5 +838,6 @@ main(void) {
 	    test_arenas_lextent_constants,
 	    test_arenas_create,
 	    test_arenas_lookup,
-	    test_stats_arenas);
+	    test_stats_arenas,
+	    test_hooks);
 }

From 59e371f46331a3f4b688d6622a0af7ccc4f96be6 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 30 Apr 2018 16:24:36 -0700
Subject: [PATCH 1136/2608] Hooks: Add a hook exhaustion test.

When we run out of space in which to store hooks, we should return EAGAIN from
the mallctl, but not otherwise misbehave.
---
 include/jemalloc/internal/hook.h |  2 ++
 src/hook.c                       |  9 ++++---
 test/unit/mallctl.c              | 40 +++++++++++++++++++++++++++++++-
 3 files changed, 45 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/hook.h b/include/jemalloc/internal/hook.h
index 678c6018..9ea9c6f0 100644
--- a/include/jemalloc/internal/hook.h
+++ b/include/jemalloc/internal/hook.h
@@ -46,6 +46,8 @@
  *   order.
  */
 
+#define HOOK_MAX 4
+
 enum hook_alloc_e {
 	hook_alloc_malloc,
 	hook_alloc_posix_memalign,
diff --git a/src/hook.c b/src/hook.c
index 564c2a0c..24afe999 100644
--- a/src/hook.c
+++ b/src/hook.c
@@ -14,9 +14,8 @@ struct hooks_internal_s {
 
 seq_define(hooks_internal_t, hooks)
 
-#define HOOKS_MAX 4
 static atomic_u_t nhooks = ATOMIC_INIT(0);
-static seq_hooks_t hooks[HOOKS_MAX];
+static seq_hooks_t hooks[HOOK_MAX];
 static malloc_mutex_t hooks_mu;
 
 bool
@@ -28,7 +27,7 @@ hook_boot() {
 static void *
 hook_install_locked(hooks_t *to_install) {
 	hooks_internal_t hooks_internal;
-	for (int i = 0; i < HOOKS_MAX; i++) {
+	for (int i = 0; i < HOOK_MAX; i++) {
 		bool success = seq_try_load_hooks(&hooks_internal, &hooks[i]);
 		/* We hold mu; no concurrent access. */
 		assert(success);
@@ -74,7 +73,7 @@ void
 hook_remove(tsdn_t *tsdn, void *opaque) {
 	if (config_debug) {
 		char *hooks_begin = (char *)&hooks[0];
-		char *hooks_end = (char *)&hooks[HOOKS_MAX];
+		char *hooks_end = (char *)&hooks[HOOK_MAX];
 		char *hook = (char *)opaque;
 		assert(hooks_begin <= hook && hook < hooks_end
 		    && (hook - hooks_begin) % sizeof(seq_hooks_t) == 0);
@@ -87,7 +86,7 @@ hook_remove(tsdn_t *tsdn, void *opaque) {
 
 #define FOR_EACH_HOOK_BEGIN(hooks_internal_ptr)				\
 for (int for_each_hook_counter = 0;					\
-    for_each_hook_counter < HOOKS_MAX;					\
+    for_each_hook_counter < HOOK_MAX;					\
     for_each_hook_counter++) {						\
 	bool for_each_hook_success = seq_try_load_hooks(		\
 	    (hooks_internal_ptr), &hooks[for_each_hook_counter]);	\
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 34a4d67c..8a36c0a4 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -1,5 +1,6 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/hook.h"
 #include "jemalloc/internal/util.h"
 
 TEST_BEGIN(test_mallctl_errors) {
@@ -810,6 +811,42 @@ TEST_BEGIN(test_hooks) {
 }
 TEST_END
 
+TEST_BEGIN(test_hooks_exhaustion) {
+	bool hook_called = false;
+	hooks_t hooks = {&alloc_hook, &dalloc_hook, NULL, &hook_called};
+
+	void *handle;
+	void *handles[HOOK_MAX];
+	size_t sz = sizeof(handle);
+	int err;
+	for (int i = 0; i < HOOK_MAX; i++) {
+		handle = NULL;
+		err = mallctl("experimental.hooks.install", &handle, &sz,
+		    &hooks, sizeof(hooks));
+		assert_d_eq(err, 0, "Error installation hooks");
+		assert_ptr_ne(handle, NULL, "Got NULL handle");
+		handles[i] = handle;
+	}
+	err = mallctl("experimental.hooks.install", &handle, &sz, &hooks,
+	    sizeof(hooks));
+	assert_d_eq(err, EAGAIN, "Should have failed hook installation");
+	for (int i = 0; i < HOOK_MAX; i++) {
+		err = mallctl("experimental.hooks.remove", NULL, NULL,
+		    &handles[i], sizeof(handles[i]));
+		assert_d_eq(err, 0, "Hook removal failed");
+	}
+	/* Insertion failed, but then we removed some; it should work now. */
+	handle = NULL;
+	err = mallctl("experimental.hooks.install", &handle, &sz, &hooks,
+	    sizeof(hooks));
+	assert_d_eq(err, 0, "Hook insertion failed");
+	assert_ptr_ne(handle, NULL, "Got NULL handle");
+	err = mallctl("experimental.hooks.remove", NULL, NULL, &handle,
+	    sizeof(handle));
+	assert_d_eq(err, 0, "Hook removal failed");
+}
+TEST_END
+
 int
 main(void) {
 	return test(
@@ -839,5 +876,6 @@ main(void) {
 	    test_arenas_create,
 	    test_arenas_lookup,
 	    test_stats_arenas,
-	    test_hooks);
+	    test_hooks,
+	    test_hooks_exhaustion);
 }

From 0379235f47585ac8f583ba85aab9d294abfa44b5 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 11 May 2018 16:43:43 -0700
Subject: [PATCH 1137/2608] Tests: Shouldn't be able to change global slowness.

This can help ensure that we don't leave slowness changes behind in case of
resource exhaustion.
---
 include/jemalloc/internal/tsd.h |  1 +
 src/tsd.c                       |  2 +-
 test/src/test.c                 | 17 +++++++++++++++++
 3 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 251f5659..845a3f0d 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -117,6 +117,7 @@ void tsd_slow_update(tsd_t *tsd);
  */
 void tsd_global_slow_inc(tsdn_t *tsdn);
 void tsd_global_slow_dec(tsdn_t *tsdn);
+bool tsd_global_slow();
 
 enum {
 	/* Common case --> jnz. */
diff --git a/src/tsd.c b/src/tsd.c
index c92cd228..91a964ac 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -141,7 +141,7 @@ tsd_local_slow(tsd_t *tsd) {
 	    || tsd_reentrancy_level_get(tsd) > 0;
 }
 
-static bool
+bool
 tsd_global_slow() {
 	return atomic_load_u32(&tsd_global_slow_count, ATOMIC_RELAXED) > 0;
 }
diff --git a/test/src/test.c b/test/src/test.c
index 9c754e33..f97ce4d1 100644
--- a/test/src/test.c
+++ b/test/src/test.c
@@ -110,6 +110,20 @@ p_test_fini(void) {
 	    test_status_string(test_status));
 }
 
+static void
+check_global_slow(test_status_t *status) {
+#ifdef JEMALLOC_UNIT_TEST
+	/*
+	 * This check needs to peek into tsd internals, which is why it's only
+	 * exposed in unit tests.
+	 */
+	if (tsd_global_slow()) {
+		malloc_printf("Testing increased global slow count\n");
+		*status = test_status_fail;
+	}
+#endif
+}
+
 static test_status_t
 p_test_impl(bool do_malloc_init, bool do_reentrant, test_t *t, va_list ap) {
 	test_status_t ret;
@@ -136,6 +150,7 @@ p_test_impl(bool do_malloc_init, bool do_reentrant, test_t *t, va_list ap) {
 		if (test_status > ret) {
 			ret = test_status;
 		}
+		check_global_slow(&ret);
 		/* Reentrant run. */
 		if (do_reentrant) {
 			reentrancy = libc_reentrant;
@@ -145,6 +160,7 @@ p_test_impl(bool do_malloc_init, bool do_reentrant, test_t *t, va_list ap) {
 			if (test_status > ret) {
 				ret = test_status;
 			}
+			check_global_slow(&ret);
 
 			reentrancy = arena_new_reentrant;
 			test_hooks_libc_hook = NULL;
@@ -153,6 +169,7 @@ p_test_impl(bool do_malloc_init, bool do_reentrant, test_t *t, va_list ap) {
 			if (test_status > ret) {
 				ret = test_status;
 			}
+			check_global_slow(&ret);
 		}
 	}
 

From a7f749c9af0d5ca51b5b5eaf35c2c2913d8a77e1 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 15 May 2018 14:15:43 -0700
Subject: [PATCH 1138/2608] Hooks: Protect against reentrancy.

Previously, we made the user deal with this themselves, but that's not good
enough; if hooks may allocate, we should test the allocation pathways down
hooks.  If we're doing that, we might as well actually implement the protection
for the user.
---
 include/jemalloc/internal/hook.h |  6 +--
 include/jemalloc/internal/tsd.h  |  2 +
 src/hook.c                       | 68 +++++++++++++++++++++++++++-----
 test/unit/hook.c                 | 42 ++++++++++++++++++++
 4 files changed, 106 insertions(+), 12 deletions(-)

diff --git a/include/jemalloc/internal/hook.h b/include/jemalloc/internal/hook.h
index 9ea9c6f0..ee246b1e 100644
--- a/include/jemalloc/internal/hook.h
+++ b/include/jemalloc/internal/hook.h
@@ -25,9 +25,9 @@
  * and only calls the alloc hook).
  *
  * Reentrancy:
- *   Is not protected against.  If your hooks allocate, then the hooks will be
- *   called again.  Note that you can guard against this with a thread-local
- *   "in_hook" bool.
+ *   Reentrancy is guarded against from within the hook implementation.  If you
+ *   call allocator functions from within a hook, the hooks will not be invoked
+ *   again.
  * Threading:
  *   The installation of a hook synchronizes with all its uses.  If you can
  *   prove the installation of a hook happens-before a jemalloc entry point,
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 845a3f0d..3097ce06 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -66,6 +66,7 @@ typedef ql_elm(tsd_t) tsd_link_t;
 #define MALLOC_TSD							\
     O(tcache_enabled,		bool,			bool)		\
     O(arenas_tdata_bypass,	bool,			bool)		\
+    O(in_hook,			bool,			bool)		\
     O(reentrancy_level,		int8_t,			int8_t)		\
     O(narenas_tdata,		uint32_t,		uint32_t)	\
     O(offset_state,		uint64_t,		uint64_t)	\
@@ -85,6 +86,7 @@ typedef ql_elm(tsd_t) tsd_link_t;
     ATOMIC_INIT(tsd_state_uninitialized),				\
     TCACHE_ENABLED_ZERO_INITIALIZER,					\
     false,								\
+    false,								\
     0,									\
     0,									\
     0,									\
diff --git a/src/hook.c b/src/hook.c
index 24afe999..f66d4239 100644
--- a/src/hook.c
+++ b/src/hook.c
@@ -99,12 +99,62 @@ for (int for_each_hook_counter = 0;					\
 #define FOR_EACH_HOOK_END						\
 }
 
+static bool *
+hook_reentrantp() {
+	/*
+	 * We prevent user reentrancy within hooks.  This is basically just a
+	 * thread-local bool that triggers an early-exit.
+	 *
+	 * We don't fold in_hook into reentrancy.  There are two reasons for
+	 * this:
+	 * - Right now, we turn on reentrancy during things like extent hook
+	 *   execution.  Allocating during extent hooks is not officially
+	 *   supported, but we don't want to break it for the time being.  These
+	 *   sorts of allocations should probably still be hooked, though.
+	 * - If a hook allocates, we may want it to be relatively fast (after
+	 *   all, it executes on every allocator operation).  Turning on
+	 *   reentrancy is a fairly heavyweight mode (disabling tcache,
+	 *   redirecting to arena 0, etc.).  It's possible we may one day want
+	 *   to turn on reentrant mode here, if it proves too difficult to keep
+	 *   this working.  But that's fairly easy for us to see; OTOH, people
+	 *   not using hooks because they're too slow is easy for us to miss.
+	 *
+	 * The tricky part is
+	 * that this code might get invoked even if we don't have access to tsd.
+	 * This function mimics getting a pointer to thread-local data, except
+	 * that it might secretly return a pointer to some global data if we
+	 * know that the caller will take the early-exit path.
+	 * If we return a bool that indicates that we are reentrant, then the
+	 * caller will go down the early exit path, leaving the global
+	 * untouched.
+	 */
+	static bool in_hook_global = true;
+	tsdn_t *tsdn = tsdn_fetch();
+	bool *in_hook = tsdn_in_hookp_get(tsdn);
+	if (in_hook != NULL) {
+		return in_hook;
+	}
+	return &in_hook_global;
+}
+
+#define HOOK_PROLOGUE							\
+	if (likely(atomic_load_u(&nhooks, ATOMIC_RELAXED) == 0)) {	\
+		return;							\
+	}								\
+	bool *in_hook = hook_reentrantp();				\
+	if (*in_hook) {							\
+		return;							\
+	}								\
+	*in_hook = true;
+
+#define HOOK_EPILOGUE							\
+	*in_hook = false;
+
 void
 hook_invoke_alloc(hook_alloc_t type, void *result, uintptr_t result_raw,
     uintptr_t args_raw[3]) {
-	if (likely(atomic_load_u(&nhooks, ATOMIC_RELAXED) == 0)) {
-		return;
-	}
+	HOOK_PROLOGUE
+
 	hooks_internal_t hook;
 	FOR_EACH_HOOK_BEGIN(&hook)
 		hook_alloc h = hook.hooks.alloc_hook;
@@ -112,13 +162,13 @@ hook_invoke_alloc(hook_alloc_t type, void *result, uintptr_t result_raw,
 			h(hook.hooks.extra, type, result, result_raw, args_raw);
 		}
 	FOR_EACH_HOOK_END
+
+	HOOK_EPILOGUE
 }
 
 void
 hook_invoke_dalloc(hook_dalloc_t type, void *address, uintptr_t args_raw[3]) {
-	if (likely(atomic_load_u(&nhooks, ATOMIC_RELAXED) == 0)) {
-		return;
-	}
+	HOOK_PROLOGUE
 	hooks_internal_t hook;
 	FOR_EACH_HOOK_BEGIN(&hook)
 		hook_dalloc h = hook.hooks.dalloc_hook;
@@ -126,14 +176,13 @@ hook_invoke_dalloc(hook_dalloc_t type, void *address, uintptr_t args_raw[3]) {
 			h(hook.hooks.extra, type, address, args_raw);
 		}
 	FOR_EACH_HOOK_END
+	HOOK_EPILOGUE
 }
 
 void
 hook_invoke_expand(hook_expand_t type, void *address, size_t old_usize,
     size_t new_usize, uintptr_t result_raw, uintptr_t args_raw[4]) {
-	if (likely(atomic_load_u(&nhooks, ATOMIC_RELAXED) == 0)) {
-		return;
-	}
+	HOOK_PROLOGUE
 	hooks_internal_t hook;
 	FOR_EACH_HOOK_BEGIN(&hook)
 		hook_expand h = hook.hooks.expand_hook;
@@ -142,4 +191,5 @@ hook_invoke_expand(hook_expand_t type, void *address, size_t old_usize,
 			    result_raw, args_raw);
 		}
 	FOR_EACH_HOOK_END
+	HOOK_EPILOGUE
 }
diff --git a/test/unit/hook.c b/test/unit/hook.c
index 3f85ff10..72fcc433 100644
--- a/test/unit/hook.c
+++ b/test/unit/hook.c
@@ -25,6 +25,45 @@ reset_args() {
 	memset(arg_args_raw, 77, sizeof(arg_args_raw));
 }
 
+static void
+alloc_free_size(size_t sz) {
+	void *ptr = mallocx(1, 0);
+	free(ptr);
+	ptr = mallocx(1, 0);
+	free(ptr);
+	ptr = mallocx(1, MALLOCX_TCACHE_NONE);
+	dallocx(ptr, MALLOCX_TCACHE_NONE);
+}
+
+/*
+ * We want to support a degree of user reentrancy.  This tests a variety of
+ * allocation scenarios.
+ */
+static void
+be_reentrant() {
+	/* Let's make sure the tcache is non-empty if enabled. */
+	alloc_free_size(1);
+	alloc_free_size(1024);
+	alloc_free_size(64 * 1024);
+	alloc_free_size(256 * 1024);
+	alloc_free_size(1024 * 1024);
+
+	/* Some reallocation. */
+	void *ptr = mallocx(129, 0);
+	ptr = rallocx(ptr, 130, 0);
+	free(ptr);
+
+	ptr = mallocx(2 * 1024 * 1024, 0);
+	free(ptr);
+	ptr = mallocx(1 * 1024 * 1024, 0);
+	ptr = rallocx(ptr, 2 * 1024 * 1024, 0);
+	free(ptr);
+
+	ptr = mallocx(1, 0);
+	ptr = rallocx(ptr, 1000, 0);
+	free(ptr);
+}
+
 static void
 set_args_raw(uintptr_t *args_raw, int nargs) {
 	memcpy(arg_args_raw, args_raw, sizeof(uintptr_t) * nargs);
@@ -52,6 +91,7 @@ test_alloc_hook(void *extra, hook_alloc_t type, void *result,
 	arg_result = result;
 	arg_result_raw = result_raw;
 	set_args_raw(args_raw, 3);
+	be_reentrant();
 }
 
 static void
@@ -62,6 +102,7 @@ test_dalloc_hook(void *extra, hook_dalloc_t type, void *address,
 	arg_type = (int)type;
 	arg_address = address;
 	set_args_raw(args_raw, 3);
+	be_reentrant();
 }
 
 static void
@@ -76,6 +117,7 @@ test_expand_hook(void *extra, hook_expand_t type, void *address,
 	arg_new_usize = new_usize;
 	arg_result_raw = result_raw;
 	set_args_raw(args_raw, 4);
+	be_reentrant();
 }
 
 TEST_BEGIN(test_hooks_basic) {

From d22e150320801c114b3694e860195254bad1ef0f Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 24 May 2018 12:18:54 -0700
Subject: [PATCH 1139/2608] Avoid taking extents_muzzy mutex when muzzy is
 disabled.

When muzzy decay is disabled, no need to allocate from extents_muzzy.  This
saves us a couple of mutex operations down the extents_alloc path.
---
 src/arena.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index b76be5f7..1cecce7f 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -324,6 +324,11 @@ arena_large_ralloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t oldusize,
 	arena_large_malloc_stats_update(tsdn, arena, usize);
 }
 
+static bool
+arena_may_have_muzzy(arena_t *arena) {
+	return (pages_can_purge_lazy && (arena_muzzy_decay_ms_get(arena) != 0));
+}
+
 extent_t *
 arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool *zero) {
@@ -338,7 +343,7 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	extent_t *extent = extents_alloc(tsdn, arena, &extent_hooks,
 	    &arena->extents_dirty, NULL, usize, sz_large_pad, alignment, false,
 	    szind, zero, &commit);
-	if (extent == NULL) {
+	if (extent == NULL && arena_may_have_muzzy(arena)) {
 		extent = extents_alloc(tsdn, arena, &extent_hooks,
 		    &arena->extents_muzzy, NULL, usize, sz_large_pad, alignment,
 		    false, szind, zero, &commit);
@@ -1124,7 +1129,7 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 	extent_t *slab = extents_alloc(tsdn, arena, &extent_hooks,
 	    &arena->extents_dirty, NULL, bin_info->slab_size, 0, PAGE, true,
 	    binind, &zero, &commit);
-	if (slab == NULL) {
+	if (slab == NULL && arena_may_have_muzzy(arena)) {
 		slab = extents_alloc(tsdn, arena, &extent_hooks,
 		    &arena->extents_muzzy, NULL, bin_info->slab_size, 0, PAGE,
 		    true, binind, &zero, &commit);

From 9bd8deb26044b7a3f056f8995aae95ffe86d19ed Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 4 Jun 2018 11:06:23 -0700
Subject: [PATCH 1140/2608] Fix stats output for opt.lg_extent_max_active_fit.

---
 src/stats.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/stats.c b/src/stats.c
index 7411745f..85e68a70 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -891,6 +891,8 @@ stats_general_print(emitter_t *emitter) {
 #define OPT_WRITE_UNSIGNED(name)					\
 	OPT_WRITE(name, uv, usz, emitter_type_unsigned)
 
+#define OPT_WRITE_SIZE_T(name)						\
+	OPT_WRITE(name, sv, ssz, emitter_type_size)
 #define OPT_WRITE_SSIZE_T(name)						\
 	OPT_WRITE(name, ssv, sssz, emitter_type_ssize)
 #define OPT_WRITE_SSIZE_T_MUTABLE(name, altname)			\
@@ -912,7 +914,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_BOOL_MUTABLE("background_thread", "background_thread")
 	OPT_WRITE_SSIZE_T_MUTABLE("dirty_decay_ms", "arenas.dirty_decay_ms")
 	OPT_WRITE_SSIZE_T_MUTABLE("muzzy_decay_ms", "arenas.muzzy_decay_ms")
-	OPT_WRITE_UNSIGNED("lg_extent_max_active_fit")
+	OPT_WRITE_SIZE_T("lg_extent_max_active_fit")
 	OPT_WRITE_CHAR_P("junk")
 	OPT_WRITE_BOOL("zero")
 	OPT_WRITE_BOOL("utrace")

From c834912aa9503d470c3dae2b2b7840607f0d6e34 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 29 May 2018 15:55:04 -0700
Subject: [PATCH 1141/2608] Avoid taking large_mtx for auto arenas.

On tcache flush path, we can avoid touching the large_mtx for auto arenas, since
it was only needed for manual arenas where arena_reset is allowed.
---
 src/large.c  | 3 ++-
 src/tcache.c | 9 +++++++--
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/large.c b/src/large.c
index fdf183e4..4951f3ee 100644
--- a/src/large.c
+++ b/src/large.c
@@ -329,8 +329,9 @@ large_dalloc_prep_impl(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 		large_dalloc_maybe_junk(extent_addr_get(extent),
 		    extent_usize_get(extent));
 	} else {
-		malloc_mutex_assert_owner(tsdn, &arena->large_mtx);
+		/* Only hold the large_mtx if necessary. */
 		if (!arena_is_auto(arena)) {
+			malloc_mutex_assert_owner(tsdn, &arena->large_mtx);
 			extent_list_remove(&arena->large, extent);
 		}
 	}
diff --git a/src/tcache.c b/src/tcache.c
index a769a6b1..e2497667 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -212,7 +212,10 @@ tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
 			idump = false;
 		}
 
-		malloc_mutex_lock(tsd_tsdn(tsd), &locked_arena->large_mtx);
+		bool lock_large = !arena_is_auto(arena);
+		if (lock_large) {
+			malloc_mutex_lock(tsd_tsdn(tsd), &locked_arena->large_mtx);
+		}
 		for (unsigned i = 0; i < nflush; i++) {
 			void *ptr = *(tbin->avail - 1 - i);
 			assert(ptr != NULL);
@@ -236,7 +239,9 @@ tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
 				tbin->tstats.nrequests = 0;
 			}
 		}
-		malloc_mutex_unlock(tsd_tsdn(tsd), &locked_arena->large_mtx);
+		if (lock_large) {
+			malloc_mutex_unlock(tsd_tsdn(tsd), &locked_arena->large_mtx);
+		}
 
 		unsigned ndeferred = 0;
 		for (unsigned i = 0; i < nflush; i++) {

From 0ff7ff3ec7b322881fff3bd6d4861fda6e9331d9 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 4 Jun 2018 13:36:06 -0700
Subject: [PATCH 1142/2608] Optimize ixalloc by avoiding a size lookup.

---
 include/jemalloc/internal/arena_externs.h     |  2 +-
 .../internal/jemalloc_internal_inlines_c.h    |  6 +++--
 src/arena.c                                   | 26 +++++++++++++------
 src/jemalloc.c                                |  8 +++---
 4 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index f4edcc73..f46820f4 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -64,7 +64,7 @@ void arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena,
     extent_t *extent, void *ptr);
 void arena_dalloc_small(tsdn_t *tsdn, void *ptr);
 bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
-    size_t extra, bool zero);
+    size_t extra, bool zero, size_t *newsize);
 void *arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
     size_t size, size_t alignment, bool zero, tcache_t *tcache,
     hook_ralloc_args_t *hook_args);
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 1b5c11ed..2b0d4f44 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -203,7 +203,7 @@ iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment,
 
 JEMALLOC_ALWAYS_INLINE bool
 ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra,
-    size_t alignment, bool zero) {
+    size_t alignment, bool zero, size_t *newsize) {
 	assert(ptr != NULL);
 	assert(size != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -212,10 +212,12 @@ ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra,
 	if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
 	    != 0) {
 		/* Existing object alignment is inadequate. */
+		*newsize = oldsize;
 		return true;
 	}
 
-	return arena_ralloc_no_move(tsdn, ptr, oldsize, size, extra, zero);
+	return arena_ralloc_no_move(tsdn, ptr, oldsize, size, extra, zero,
+	    newsize);
 }
 
 #endif /* JEMALLOC_INTERNAL_INLINES_C_H */
diff --git a/src/arena.c b/src/arena.c
index 1cecce7f..b5c3dbec 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1585,15 +1585,17 @@ arena_dalloc_small(tsdn_t *tsdn, void *ptr) {
 
 bool
 arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
-    size_t extra, bool zero) {
+    size_t extra, bool zero, size_t *newsize) {
+	bool ret;
 	/* Calls with non-zero extra had to clamp extra. */
 	assert(extra == 0 || size + extra <= LARGE_MAXCLASS);
 
+	extent_t *extent = iealloc(tsdn, ptr);
 	if (unlikely(size > LARGE_MAXCLASS)) {
-		return true;
+		ret = true;
+		goto done;
 	}
 
-	extent_t *extent = iealloc(tsdn, ptr);
 	size_t usize_min = sz_s2u(size);
 	size_t usize_max = sz_s2u(size + extra);
 	if (likely(oldsize <= SMALL_MAXCLASS && usize_min <= SMALL_MAXCLASS)) {
@@ -1606,17 +1608,23 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 		if ((usize_max > SMALL_MAXCLASS || sz_size2index(usize_max) !=
 		    sz_size2index(oldsize)) && (size > oldsize || usize_max <
 		    oldsize)) {
-			return true;
+			ret = true;
+			goto done;
 		}
 
 		arena_decay_tick(tsdn, extent_arena_get(extent));
-		return false;
+		ret = false;
 	} else if (oldsize >= LARGE_MINCLASS && usize_max >= LARGE_MINCLASS) {
-		return large_ralloc_no_move(tsdn, extent, usize_min, usize_max,
+		ret = large_ralloc_no_move(tsdn, extent, usize_min, usize_max,
 		    zero);
+	} else {
+		ret = true;
 	}
+done:
+	assert(extent == iealloc(tsdn, ptr));
+	*newsize = extent_usize_get(extent);
 
-	return true;
+	return ret;
 }
 
 static void *
@@ -1644,7 +1652,9 @@ arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
 
 	if (likely(usize <= SMALL_MAXCLASS)) {
 		/* Try to avoid moving the allocation. */
-		if (!arena_ralloc_no_move(tsdn, ptr, oldsize, usize, 0, zero)) {
+		UNUSED size_t newsize;
+		if (!arena_ralloc_no_move(tsdn, ptr, oldsize, usize, 0, zero,
+		    &newsize)) {
 			hook_invoke_expand(hook_args->is_realloc
 			    ? hook_expand_realloc : hook_expand_rallocx,
 			    ptr, oldsize, usize, (uintptr_t)ptr,
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 264408fe..300e8976 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2834,14 +2834,14 @@ label_oom:
 JEMALLOC_ALWAYS_INLINE size_t
 ixallocx_helper(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
     size_t extra, size_t alignment, bool zero) {
-	size_t usize;
+	size_t newsize;
 
-	if (ixalloc(tsdn, ptr, old_usize, size, extra, alignment, zero)) {
+	if (ixalloc(tsdn, ptr, old_usize, size, extra, alignment, zero,
+	    &newsize)) {
 		return old_usize;
 	}
-	usize = isalloc(tsdn, ptr);
 
-	return usize;
+	return newsize;
 }
 
 static size_t

From fec1ef7c91b5368ad0d6f0c84bc77fa71d9dc949 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 26 Jun 2018 11:40:53 -0700
Subject: [PATCH 1143/2608] Fix arena locking in tcache_bin_flush_large().

This regression was introduced in c834912 (incorrect arena used).
---
 src/tcache.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index e2497667..af757540 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -193,8 +193,8 @@ tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
 	assert(binind < nhbins);
 	assert((cache_bin_sz_t)rem <= tbin->ncached);
 
-	arena_t *arena = tcache->arena;
-	assert(arena != NULL);
+	arena_t *tcache_arena = tcache->arena;
+	assert(tcache_arena != NULL);
 	unsigned nflush = tbin->ncached - rem;
 	VARIABLE_ARRAY(extent_t *, item_extent, nflush);
 	/* Look up extent once per item. */
@@ -212,7 +212,7 @@ tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
 			idump = false;
 		}
 
-		bool lock_large = !arena_is_auto(arena);
+		bool lock_large = !arena_is_auto(locked_arena);
 		if (lock_large) {
 			malloc_mutex_lock(tsd_tsdn(tsd), &locked_arena->large_mtx);
 		}
@@ -225,16 +225,17 @@ tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
 				    extent);
 			}
 		}
-		if ((config_prof || config_stats) && locked_arena == arena) {
+		if ((config_prof || config_stats) &&
+		    (locked_arena == tcache_arena)) {
 			if (config_prof) {
-				idump = arena_prof_accum(tsd_tsdn(tsd), arena,
-				    tcache->prof_accumbytes);
+				idump = arena_prof_accum(tsd_tsdn(tsd),
+				    tcache_arena, tcache->prof_accumbytes);
 				tcache->prof_accumbytes = 0;
 			}
 			if (config_stats) {
 				merged_stats = true;
 				arena_stats_large_nrequests_add(tsd_tsdn(tsd),
-				    &arena->stats, binind,
+				    &tcache_arena->stats, binind,
 				    tbin->tstats.nrequests);
 				tbin->tstats.nrequests = 0;
 			}
@@ -275,8 +276,8 @@ tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
 		 * The flush loop didn't happen to flush to this thread's
 		 * arena, so the stats didn't get merged.  Manually do so now.
 		 */
-		arena_stats_large_nrequests_add(tsd_tsdn(tsd), &arena->stats,
-		    binind, tbin->tstats.nrequests);
+		arena_stats_large_nrequests_add(tsd_tsdn(tsd),
+		    &tcache_arena->stats, binind, tbin->tstats.nrequests);
 		tbin->tstats.nrequests = 0;
 	}
 

From 50820010fef8f40e1221360ef745d9bb5fa93364 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 26 Jun 2018 13:27:44 -0700
Subject: [PATCH 1144/2608] Add test for remote deallocation.

---
 test/integration/mallocx.c | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index fd960f30..9fe3ad5d 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -71,6 +71,38 @@ TEST_BEGIN(test_overflow) {
 }
 TEST_END
 
+static void *
+remote_alloc(void *arg) {
+	unsigned arena;
+	size_t sz = sizeof(unsigned);
+	assert_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+	size_t large_sz;
+	sz = sizeof(size_t);
+	assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&large_sz, &sz,
+	    NULL, 0), 0, "Unexpected mallctl failure");
+
+	void *ptr = mallocx(large_sz, MALLOCX_ARENA(arena)
+	    | MALLOCX_TCACHE_NONE);
+	void **ret = (void **)arg;
+	*ret = ptr;
+
+	return NULL;
+}
+
+TEST_BEGIN(test_remote_free) {
+	thd_t thd;
+	void *ret;
+	thd_create(&thd, remote_alloc, (void *)&ret);
+	thd_join(thd, NULL);
+	assert_ptr_not_null(ret, "Unexpected mallocx failure");
+
+	/* Avoid TCACHE_NONE to explicitly test tcache_flush(). */
+	dallocx(ret, 0);
+	mallctl("thread.tcache.flush", NULL, NULL, NULL, 0);
+}
+TEST_END
+
 TEST_BEGIN(test_oom) {
 	size_t largemax;
 	bool oom;
@@ -223,6 +255,7 @@ main(void) {
 	return test(
 	    test_overflow,
 	    test_oom,
+	    test_remote_free,
 	    test_basic,
 	    test_alignment_and_size);
 }

From d1e11d48d4c706e17ef3508e2ddb910f109b779f Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 21 Jun 2018 13:02:49 -0700
Subject: [PATCH 1145/2608] Move tsd link and in_hook after tcache.

This can lead to better cache utilization down the common paths where we don't
touch the link.
---
 include/jemalloc/internal/tcache_structs.h |  9 +++++++++
 include/jemalloc/internal/tsd.h            |  7 -------
 src/hook.c                                 |  6 +++---
 src/tsd.c                                  | 10 +++++-----
 4 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index 07b73870..b3cd4e5f 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -6,6 +6,10 @@
 #include "jemalloc/internal/cache_bin.h"
 #include "jemalloc/internal/ticker.h"
 
+/* Various uses of this struct need it to be a named type. */
+typedef struct tsd_s tsd_t;
+typedef ql_elm(tsd_t) tsd_link_t;
+
 struct tcache_s {
 	/*
 	 * To minimize our cache-footprint, we put the frequently accessed data
@@ -29,6 +33,11 @@ struct tcache_s {
 	 */
 	/* Lets us track all the tcaches in an arena. */
 	ql_elm(tcache_t) link;
+
+	/* Logically scoped to tsd, but put here for cache layout reasons. */
+	ql_elm(tsd_t) tsd_link;
+	bool in_hook;
+
 	/*
 	 * The descriptor lets the arena find our cache bins without seeing the
 	 * tcache definition.  This enables arenas to aggregate stats across
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 3097ce06..e5e82f42 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -59,14 +59,10 @@ typedef void (*test_callback_t)(int *);
 #  define MALLOC_TEST_TSD_INITIALIZER
 #endif
 
-/* Various uses of this struct need it to be a named type. */
-typedef ql_elm(tsd_t) tsd_link_t;
-
 /*  O(name,			type,			nullable type */
 #define MALLOC_TSD							\
     O(tcache_enabled,		bool,			bool)		\
     O(arenas_tdata_bypass,	bool,			bool)		\
-    O(in_hook,			bool,			bool)		\
     O(reentrancy_level,		int8_t,			int8_t)		\
     O(narenas_tdata,		uint32_t,		uint32_t)	\
     O(offset_state,		uint64_t,		uint64_t)	\
@@ -77,7 +73,6 @@ typedef ql_elm(tsd_t) tsd_link_t;
     O(iarena,			arena_t *,		arena_t *)	\
     O(arena,			arena_t *,		arena_t *)	\
     O(arenas_tdata,		arena_tdata_t *,	arena_tdata_t *)\
-    O(link,			tsd_link_t,		tsd_link_t)	\
     O(tcache,			tcache_t,		tcache_t)	\
     O(witness_tsd,              witness_tsd_t,		witness_tsdn_t)	\
     MALLOC_TEST_TSD
@@ -86,7 +81,6 @@ typedef ql_elm(tsd_t) tsd_link_t;
     ATOMIC_INIT(tsd_state_uninitialized),				\
     TCACHE_ENABLED_ZERO_INITIALIZER,					\
     false,								\
-    false,								\
     0,									\
     0,									\
     0,									\
@@ -97,7 +91,6 @@ typedef ql_elm(tsd_t) tsd_link_t;
     NULL,								\
     NULL,								\
     NULL,								\
-    {NULL},								\
     TCACHE_ZERO_INITIALIZER,						\
     WITNESS_TSD_INITIALIZER						\
     MALLOC_TEST_TSD_INITIALIZER						\
diff --git a/src/hook.c b/src/hook.c
index f66d4239..9ac703cf 100644
--- a/src/hook.c
+++ b/src/hook.c
@@ -130,9 +130,9 @@ hook_reentrantp() {
 	 */
 	static bool in_hook_global = true;
 	tsdn_t *tsdn = tsdn_fetch();
-	bool *in_hook = tsdn_in_hookp_get(tsdn);
-	if (in_hook != NULL) {
-		return in_hook;
+	tcache_t *tcache = tsdn_tcachep_get(tsdn);
+	if (tcache != NULL) {
+		return &tcache->in_hook;
 	}
 	return &in_hook_global;
 }
diff --git a/src/tsd.c b/src/tsd.c
index 91a964ac..4eceee79 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -68,7 +68,7 @@ tsd_in_nominal_list(tsd_t *tsd) {
 	 * out of it here.
 	 */
 	malloc_mutex_lock(TSDN_NULL, &tsd_nominal_tsds_lock);
-	ql_foreach(tsd_list, &tsd_nominal_tsds, TSD_MANGLE(link)) {
+	ql_foreach(tsd_list, &tsd_nominal_tsds, TSD_MANGLE(tcache).tsd_link) {
 		if (tsd == tsd_list) {
 			found = true;
 			break;
@@ -82,9 +82,9 @@ static void
 tsd_add_nominal(tsd_t *tsd) {
 	assert(!tsd_in_nominal_list(tsd));
 	assert(tsd_state_get(tsd) <= tsd_state_nominal_max);
-	ql_elm_new(tsd, TSD_MANGLE(link));
+	ql_elm_new(tsd, TSD_MANGLE(tcache).tsd_link);
 	malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
-	ql_tail_insert(&tsd_nominal_tsds, tsd, TSD_MANGLE(link));
+	ql_tail_insert(&tsd_nominal_tsds, tsd, TSD_MANGLE(tcache).tsd_link);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
 }
 
@@ -93,7 +93,7 @@ tsd_remove_nominal(tsd_t *tsd) {
 	assert(tsd_in_nominal_list(tsd));
 	assert(tsd_state_get(tsd) <= tsd_state_nominal_max);
 	malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
-	ql_remove(&tsd_nominal_tsds, tsd, TSD_MANGLE(link));
+	ql_remove(&tsd_nominal_tsds, tsd, TSD_MANGLE(tcache).tsd_link);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
 }
 
@@ -106,7 +106,7 @@ tsd_force_recompute(tsdn_t *tsdn) {
 	atomic_fence(ATOMIC_RELEASE);
 	malloc_mutex_lock(tsdn, &tsd_nominal_tsds_lock);
 	tsd_t *remote_tsd;
-	ql_foreach(remote_tsd, &tsd_nominal_tsds, TSD_MANGLE(link)) {
+	ql_foreach(remote_tsd, &tsd_nominal_tsds, TSD_MANGLE(tcache).tsd_link) {
 		assert(atomic_load_u8(&remote_tsd->state, ATOMIC_RELAXED)
 		    <= tsd_state_nominal_max);
 		atomic_store_u8(&remote_tsd->state, tsd_state_nominal_recompute,

From 77a71ef2b76c2e858c81e10349f28534307f1c91 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 6 Jun 2018 15:52:52 -0700
Subject: [PATCH 1146/2608] Fall back to the default pthread_create if
 RTLD_NEXT fails.

---
 .../internal/background_thread_externs.h         |  1 -
 src/background_thread.c                          | 16 +++++++++-------
 src/ctl.c                                        | 14 --------------
 3 files changed, 9 insertions(+), 22 deletions(-)

diff --git a/include/jemalloc/internal/background_thread_externs.h b/include/jemalloc/internal/background_thread_externs.h
index 3209aa49..0f997e18 100644
--- a/include/jemalloc/internal/background_thread_externs.h
+++ b/include/jemalloc/internal/background_thread_externs.h
@@ -8,7 +8,6 @@ extern atomic_b_t background_thread_enabled_state;
 extern size_t n_background_threads;
 extern size_t max_background_threads;
 extern background_thread_info_t *background_thread_info;
-extern bool can_enable_background_thread;
 
 bool background_thread_create(tsd_t *tsd, unsigned arena_ind);
 bool background_threads_enable(tsd_t *tsd);
diff --git a/src/background_thread.c b/src/background_thread.c
index 3517a3bb..4613537c 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -22,9 +22,6 @@ size_t max_background_threads;
 /* Thread info per-index. */
 background_thread_info_t *background_thread_info;
 
-/* False if no necessary runtime support. */
-bool can_enable_background_thread;
-
 /******************************************************************************/
 
 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
@@ -812,16 +809,21 @@ pthread_create_fptr_init(void) {
 	if (pthread_create_fptr != NULL) {
 		return false;
 	}
+	/*
+	 * Try the next symbol first, because 1) when use lazy_lock we have a
+	 * wrapper for pthread_create; and 2) application may define its own
+	 * wrapper as well (and can call malloc within the wrapper).
+	 */
 	pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
 	if (pthread_create_fptr == NULL) {
-		can_enable_background_thread = false;
-		if (config_lazy_lock || opt_background_thread) {
+		if (config_lazy_lock) {
 			malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
 			    "\"pthread_create\")\n");
 			abort();
+		} else {
+			/* Fall back to the default symbol. */
+			pthread_create_fptr = pthread_create;
 		}
-	} else {
-		can_enable_background_thread = true;
 	}
 
 	return false;
diff --git a/src/ctl.c b/src/ctl.c
index ef3eca4d..9ea2bb30 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1556,13 +1556,6 @@ background_thread_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 
 		background_thread_enabled_set(tsd_tsdn(tsd), newval);
 		if (newval) {
-			if (!can_enable_background_thread) {
-				malloc_printf("<jemalloc>: Error in dlsym("
-			            "RTLD_NEXT, \"pthread_create\"). Cannot "
-				    "enable background_thread\n");
-				ret = EFAULT;
-				goto label_return;
-			}
 			if (background_threads_enable(tsd)) {
 				ret = EFAULT;
 				goto label_return;
@@ -1617,13 +1610,6 @@ max_background_threads_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 		}
 
 		if (background_thread_enabled()) {
-			if (!can_enable_background_thread) {
-				malloc_printf("<jemalloc>: Error in dlsym("
-			            "RTLD_NEXT, \"pthread_create\"). Cannot "
-				    "enable background_thread\n");
-				ret = EFAULT;
-				goto label_return;
-			}
 			background_thread_enabled_set(tsd_tsdn(tsd), false);
 			if (background_threads_disable(tsd)) {
 				ret = EFAULT;

From 94a88c26f4d9cffd884a349201e7605f13495f3f Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 21 May 2018 13:33:48 -0700
Subject: [PATCH 1147/2608] Implement huge arena: opt.huge_threshold.

The feature allows using a dedicated arena for huge allocations.  We want the
addtional arena to separate huge allocation because: 1) mixing small extents
with huge ones causes fragmentation over the long run (this feature reduces VM
size significantly); 2) with many arenas, huge extents rarely get reused across
threads; and 3) huge allocations happen way less frequently, therefore no
concerns for lock contention.
---
 include/jemalloc/internal/arena_externs.h     |  5 ++
 include/jemalloc/internal/arena_inlines_b.h   | 21 +++++++
 include/jemalloc/internal/arena_types.h       |  6 ++
 .../internal/jemalloc_internal_inlines_b.h    |  4 +-
 src/arena.c                                   | 58 ++++++++++++++++++-
 src/jemalloc.c                                | 13 ++++-
 src/large.c                                   |  2 +-
 test/unit/mallctl.c                           |  3 +
 8 files changed, 106 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index f46820f4..c145c91e 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -17,6 +17,9 @@ extern const char *percpu_arena_mode_names[];
 extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS];
 extern malloc_mutex_t arenas_lock;
 
+extern size_t opt_huge_threshold;
+extern size_t huge_threshold;
+
 void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
     unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms,
     ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy);
@@ -81,6 +84,8 @@ void arena_nthreads_inc(arena_t *arena, bool internal);
 void arena_nthreads_dec(arena_t *arena, bool internal);
 size_t arena_extent_sn_next(arena_t *arena);
 arena_t *arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
+bool arena_init_huge(void);
+arena_t *arena_choose_huge(tsd_t *tsd);
 void arena_boot(void);
 void arena_prefork0(tsdn_t *tsdn, arena_t *arena);
 void arena_prefork1(tsdn_t *tsdn, arena_t *arena);
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 2b7e77e7..401be758 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -8,6 +8,27 @@
 #include "jemalloc/internal/sz.h"
 #include "jemalloc/internal/ticker.h"
 
+JEMALLOC_ALWAYS_INLINE arena_t *
+arena_choose_maybe_huge(tsd_t *tsd, arena_t *arena, size_t size) {
+	if (arena != NULL) {
+		return arena;
+	}
+
+	/*
+	 * For huge allocations, use the dedicated huge arena if both are true:
+	 * 1) is using auto arena selection (i.e. arena == NULL), and 2) the
+	 * thread is not assigned to a manual arena.
+	 */
+	if (unlikely(size >= huge_threshold)) {
+		arena_t *tsd_arena = tsd_arena_get(tsd);
+		if (tsd_arena == NULL || arena_is_auto(tsd_arena)) {
+			return arena_choose_huge(tsd);
+		}
+	}
+
+	return arena_choose(tsd, NULL);
+}
+
 JEMALLOC_ALWAYS_INLINE prof_tctx_t *
 arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) {
 	cassert(config_prof);
diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h
index 70001b5f..759713c9 100644
--- a/include/jemalloc/internal/arena_types.h
+++ b/include/jemalloc/internal/arena_types.h
@@ -40,4 +40,10 @@ typedef enum {
 #define PERCPU_ARENA_ENABLED(m)	((m) >= percpu_arena_mode_enabled_base)
 #define PERCPU_ARENA_DEFAULT	percpu_arena_disabled
 
+/*
+ * When allocation_size >= huge_threshold, use the dedicated huge arena (unless
+ * have explicitly spicified arena index).  0 disables the feature.
+ */
+#define HUGE_THRESHOLD_DEFAULT 0
+
 #endif /* JEMALLOC_INTERNAL_ARENA_TYPES_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index 2e76e5d8..8b0ac462 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -71,7 +71,9 @@ arena_ichoose(tsd_t *tsd, arena_t *arena) {
 static inline bool
 arena_is_auto(arena_t *arena) {
 	assert(narenas_auto > 0);
-	return (arena_ind_get(arena) < narenas_auto);
+	unsigned offset = (opt_huge_threshold != 0) ? 1 : 0;
+
+	return (arena_ind_get(arena) < narenas_auto + offset);
 }
 
 JEMALLOC_ALWAYS_INLINE extent_t *
diff --git a/src/arena.c b/src/arena.c
index b5c3dbec..49d86d2c 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -42,6 +42,10 @@ const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
 
 static div_info_t arena_binind_div_info[NBINS];
 
+size_t opt_huge_threshold = HUGE_THRESHOLD_DEFAULT;
+size_t huge_threshold = HUGE_THRESHOLD_DEFAULT;
+static unsigned huge_arena_ind;
+
 /******************************************************************************/
 /*
  * Function prototypes for static functions that are referenced prior to
@@ -1378,7 +1382,7 @@ arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
 	assert(!tsdn_null(tsdn) || arena != NULL);
 
 	if (likely(!tsdn_null(tsdn))) {
-		arena = arena_choose(tsdn_tsd(tsdn), arena);
+		arena = arena_choose_maybe_huge(tsdn_tsd(tsdn), arena, size);
 	}
 	if (unlikely(arena == NULL)) {
 		return NULL;
@@ -1939,6 +1943,58 @@ label_error:
 	return NULL;
 }
 
+arena_t *
+arena_choose_huge(tsd_t *tsd) {
+	/* huge_arena_ind can be 0 during init (will use a0). */
+	if (huge_arena_ind == 0) {
+		assert(!malloc_initialized());
+	}
+
+	arena_t *huge_arena = arena_get(tsd_tsdn(tsd), huge_arena_ind, false);
+	if (huge_arena == NULL) {
+		/* Create the huge arena on demand. */
+		assert(huge_arena_ind != 0);
+		huge_arena = arena_get(tsd_tsdn(tsd), huge_arena_ind, true);
+		if (huge_arena == NULL) {
+			return NULL;
+		}
+		/*
+		 * Purge eagerly for huge allocations, because: 1) number of
+		 * huge allocations is usually small, which means ticker based
+		 * decay is not reliable; and 2) less immediate reuse is
+		 * expected for huge allocations.
+		 */
+		if (arena_dirty_decay_ms_default_get() > 0) {
+			arena_dirty_decay_ms_set(tsd_tsdn(tsd), huge_arena, 0);
+		}
+		if (arena_muzzy_decay_ms_default_get() > 0) {
+			arena_muzzy_decay_ms_set(tsd_tsdn(tsd), huge_arena, 0);
+		}
+	}
+
+	return huge_arena;
+}
+
+bool
+arena_init_huge(void) {
+	bool huge_enabled;
+
+	/* The threshold should be large size class. */
+	if (opt_huge_threshold > LARGE_MAXCLASS ||
+	    opt_huge_threshold < LARGE_MINCLASS) {
+		opt_huge_threshold = 0;
+		huge_threshold = LARGE_MAXCLASS + PAGE;
+		huge_enabled = false;
+	} else {
+		/* Reserve the index for the huge arena. */
+		huge_arena_ind = narenas_total_get();
+		huge_threshold = opt_huge_threshold;
+		huge_enabled = true;
+	}
+
+	return huge_enabled;
+}
+
 void
 arena_boot(void) {
 	arena_dirty_decay_ms_default_set(opt_dirty_decay_ms);
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 300e8976..594669c3 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -327,7 +327,7 @@ arena_init_locked(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 */
 	arena = arena_get(tsdn, ind, false);
 	if (arena != NULL) {
-		assert(ind < narenas_auto);
+		assert(arena_is_auto(arena));
 		return arena;
 	}
 
@@ -1142,11 +1142,15 @@ malloc_conf_init(void) {
 				CONF_HANDLE_BOOL(opt_xmalloc, "xmalloc")
 			}
 			CONF_HANDLE_BOOL(opt_tcache, "tcache")
+			CONF_HANDLE_SSIZE_T(opt_lg_tcache_max, "lg_tcache_max",
+			    -1, (sizeof(size_t) << 3) - 1)
+
+			CONF_HANDLE_SIZE_T(opt_huge_threshold, "huge_threshold",
+			    LARGE_MINCLASS, LARGE_MAXCLASS, yes, yes, false)
 			CONF_HANDLE_SIZE_T(opt_lg_extent_max_active_fit,
 			    "lg_extent_max_active_fit", 0,
 			    (sizeof(size_t) << 3), yes, yes, false)
-			CONF_HANDLE_SSIZE_T(opt_lg_tcache_max, "lg_tcache_max",
-			    -1, (sizeof(size_t) << 3) - 1)
+
 			if (strncmp("percpu_arena", k, klen) == 0) {
 				bool match = false;
 				for (int i = percpu_arena_mode_names_base; i <
@@ -1465,6 +1469,9 @@ malloc_init_narenas(void) {
 		    narenas_auto);
 	}
 	narenas_total_set(narenas_auto);
+	if (arena_init_huge()) {
+		narenas_total_inc();
+	}
 
 	return false;
 }
diff --git a/src/large.c b/src/large.c
index 4951f3ee..03eecfad 100644
--- a/src/large.c
+++ b/src/large.c
@@ -42,7 +42,7 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	 */
 	is_zeroed = zero;
 	if (likely(!tsdn_null(tsdn))) {
-		arena = arena_choose(tsdn_tsd(tsdn), arena);
+		arena = arena_choose_maybe_huge(tsdn_tsd(tsdn), arena, usize);
 	}
 	if (unlikely(arena == NULL) || (extent = arena_extent_alloc_large(tsdn,
 	    arena, usize, alignment, &is_zeroed)) == NULL) {
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 8a36c0a4..4ecf5bd2 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -341,6 +341,9 @@ TEST_BEGIN(test_thread_arena) {
 	sz = sizeof(unsigned);
 	assert_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
+	if (opt_huge_threshold != 0) {
+		narenas--;
+	}
 	assert_u_eq(narenas, opt_narenas, "Number of arenas incorrect");
 
 	if (strcmp(opa, "disabled") == 0) {

From 79522b2fc225f709a4ca7503c00f56df5d667160 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 1 Jun 2018 15:06:36 -0700
Subject: [PATCH 1148/2608] Refactor arena_is_auto.

---
 include/jemalloc/internal/jemalloc_internal_externs.h   | 3 +++
 include/jemalloc/internal/jemalloc_internal_inlines_b.h | 3 +--
 src/jemalloc.c                                          | 8 ++++++--
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index e10fb275..5beebc01 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -25,6 +25,9 @@ extern unsigned ncpus;
 /* Number of arenas used for automatic multiplexing of threads and arenas. */
 extern unsigned narenas_auto;
 
+/* Base index for manual arenas. */
+extern unsigned manual_arena_base;
+
 /*
  * Arenas that are used to service external requests.  Not all elements of the
  * arenas array are necessarily used; arenas are created lazily as needed.
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index 8b0ac462..70d6e578 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -71,9 +71,8 @@ arena_ichoose(tsd_t *tsd, arena_t *arena) {
 static inline bool
 arena_is_auto(arena_t *arena) {
 	assert(narenas_auto > 0);
-	unsigned offset = (opt_huge_threshold != 0) ? 1 : 0;
 
-	return (arena_ind_get(arena) < narenas_auto + offset);
+	return (arena_ind_get(arena) < manual_arena_base);
 }
 
 JEMALLOC_ALWAYS_INLINE extent_t *
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 594669c3..aded1398 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -86,8 +86,10 @@ malloc_mutex_t arenas_lock;
 JEMALLOC_ALIGNED(CACHELINE)
 atomic_p_t		arenas[MALLOCX_ARENA_LIMIT];
 static atomic_u_t	narenas_total; /* Use narenas_total_*(). */
-static arena_t		*a0; /* arenas[0]; read-only after initialization. */
-unsigned		narenas_auto; /* Read-only after initialization. */
+/* Below three are read-only after initialization. */
+static arena_t		*a0; /* arenas[0]. */
+unsigned		narenas_auto;
+unsigned		manual_arena_base;
 
 typedef enum {
 	malloc_init_uninitialized	= 3,
@@ -1322,6 +1324,7 @@ malloc_init_hard_a0_locked() {
 	 * malloc_ncpus().
 	 */
 	narenas_auto = 1;
+	manual_arena_base = narenas_auto + 1;
 	memset(arenas, 0, sizeof(arena_t *) * narenas_auto);
 	/*
 	 * Initialize one arena here.  The rest are lazily created in
@@ -1472,6 +1475,7 @@ malloc_init_narenas(void) {
 	if (arena_init_huge()) {
 		narenas_total_inc();
 	}
+	manual_arena_base = narenas_total_get();
 
 	return false;
 }

From 1302af4c43e031304b422e36fcbb9e159804e0ac Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 1 Jun 2018 14:45:19 -0700
Subject: [PATCH 1149/2608] Add ctl and stats for opt.huge_threshold.

---
 src/ctl.c           | 3 +++
 src/stats.c         | 1 +
 test/unit/mallctl.c | 1 +
 3 files changed, 5 insertions(+)

diff --git a/src/ctl.c b/src/ctl.c
index 9ea2bb30..6d0bb92f 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -85,6 +85,7 @@ CTL_PROTO(opt_retain)
 CTL_PROTO(opt_dss)
 CTL_PROTO(opt_narenas)
 CTL_PROTO(opt_percpu_arena)
+CTL_PROTO(opt_huge_threshold)
 CTL_PROTO(opt_background_thread)
 CTL_PROTO(opt_max_background_threads)
 CTL_PROTO(opt_dirty_decay_ms)
@@ -288,6 +289,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("dss"),		CTL(opt_dss)},
 	{NAME("narenas"),	CTL(opt_narenas)},
 	{NAME("percpu_arena"),	CTL(opt_percpu_arena)},
+	{NAME("huge_threshold"),	CTL(opt_huge_threshold)},
 	{NAME("background_thread"),	CTL(opt_background_thread)},
 	{NAME("max_background_threads"),	CTL(opt_max_background_threads)},
 	{NAME("dirty_decay_ms"), CTL(opt_dirty_decay_ms)},
@@ -1658,6 +1660,7 @@ CTL_RO_NL_GEN(opt_dss, opt_dss, const char *)
 CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned)
 CTL_RO_NL_GEN(opt_percpu_arena, percpu_arena_mode_names[opt_percpu_arena],
     const char *)
+CTL_RO_NL_GEN(opt_huge_threshold, opt_huge_threshold, size_t)
 CTL_RO_NL_GEN(opt_background_thread, opt_background_thread, bool)
 CTL_RO_NL_GEN(opt_max_background_threads, opt_max_background_threads, size_t)
 CTL_RO_NL_GEN(opt_dirty_decay_ms, opt_dirty_decay_ms, ssize_t)
diff --git a/src/stats.c b/src/stats.c
index 85e68a70..9cfc8503 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -910,6 +910,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_CHAR_P("dss")
 	OPT_WRITE_UNSIGNED("narenas")
 	OPT_WRITE_CHAR_P("percpu_arena")
+	OPT_WRITE_UNSIGNED("huge_threshold")
 	OPT_WRITE_CHAR_P("metadata_thp")
 	OPT_WRITE_BOOL_MUTABLE("background_thread", "background_thread")
 	OPT_WRITE_SSIZE_T_MUTABLE("dirty_decay_ms", "arenas.dirty_decay_ms")
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 4ecf5bd2..b4e01af4 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -164,6 +164,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(const char *, dss, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);
 	TEST_MALLCTL_OPT(const char *, percpu_arena, always);
+	TEST_MALLCTL_OPT(size_t, huge_threshold, always);
 	TEST_MALLCTL_OPT(bool, background_thread, always);
 	TEST_MALLCTL_OPT(ssize_t, dirty_decay_ms, always);
 	TEST_MALLCTL_OPT(ssize_t, muzzy_decay_ms, always);

From ff622eeab51325979226d5430c68a08d3e00b26b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 1 Jun 2018 15:58:31 -0700
Subject: [PATCH 1150/2608] Add unit test for opt.huge_threshold.

---
 Makefile.in      |   1 +
 test/unit/huge.c | 108 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 109 insertions(+)
 create mode 100644 test/unit/huge.c

diff --git a/Makefile.in b/Makefile.in
index 3b3191f5..81f899fe 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -174,6 +174,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/fork.c \
 	$(srcroot)test/unit/hash.c \
 	$(srcroot)test/unit/hook.c \
+	$(srcroot)test/unit/huge.c \
 	$(srcroot)test/unit/junk.c \
 	$(srcroot)test/unit/junk_alloc.c \
 	$(srcroot)test/unit/junk_free.c \
diff --git a/test/unit/huge.c b/test/unit/huge.c
new file mode 100644
index 00000000..7e54d076
--- /dev/null
+++ b/test/unit/huge.c
@@ -0,0 +1,108 @@
+#include "test/jemalloc_test.h"
+
+/* Threshold: 2 << 20 = 2097152. */
+const char *malloc_conf = "huge_threshold:2097152";
+
+#define HUGE_SZ (2 << 20)
+#define SMALL_SZ (8)
+
+TEST_BEGIN(huge_bind_thread) {
+	unsigned arena1, arena2;
+	size_t sz = sizeof(unsigned);
+
+	/* Bind to a manual arena. */
+	assert_d_eq(mallctl("arenas.create", &arena1, &sz, NULL, 0), 0,
+	    "Failed to create arena");
+	assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena1,
+	    sizeof(arena1)), 0, "Fail to bind thread");
+
+	void *ptr = mallocx(HUGE_SZ, 0);
+	assert_ptr_not_null(ptr, "Fail to allocate huge size");
+	assert_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
+	    sizeof(ptr)), 0, "Unexpected mallctl() failure");
+	assert_u_eq(arena1, arena2, "Wrong arena used after binding");
+	dallocx(ptr, 0);
+
+	/* Switch back to arena 0. */
+	test_skip_if(have_percpu_arena &&
+	    PERCPU_ARENA_ENABLED(opt_percpu_arena));
+	arena2 = 0;
+	assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena2,
+	    sizeof(arena2)), 0, "Fail to bind thread");
+	ptr = mallocx(SMALL_SZ, MALLOCX_TCACHE_NONE);
+	assert_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
+	    sizeof(ptr)), 0, "Unexpected mallctl() failure");
+	assert_u_eq(arena2, 0, "Wrong arena used after binding");
+	dallocx(ptr, MALLOCX_TCACHE_NONE);
+
+	/* Then huge allocation should use the huge arena. */
+	ptr = mallocx(HUGE_SZ, 0);
+	assert_ptr_not_null(ptr, "Fail to allocate huge size");
+	assert_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
+	    sizeof(ptr)), 0, "Unexpected mallctl() failure");
+	assert_u_ne(arena2, 0, "Wrong arena used after binding");
+	assert_u_ne(arena1, arena2, "Wrong arena used after binding");
+	dallocx(ptr, 0);
+}
+TEST_END
+
+TEST_BEGIN(huge_mallocx) {
+	unsigned arena1, arena2;
+	size_t sz = sizeof(unsigned);
+
+	assert_d_eq(mallctl("arenas.create", &arena1, &sz, NULL, 0), 0,
+	    "Failed to create arena");
+	void *huge = mallocx(HUGE_SZ, MALLOCX_ARENA(arena1));
+	assert_ptr_not_null(huge, "Fail to allocate huge size");
+	assert_d_eq(mallctl("arenas.lookup", &arena2, &sz, &huge,
+	    sizeof(huge)), 0, "Unexpected mallctl() failure");
+	assert_u_eq(arena1, arena2, "Wrong arena used for mallocx");
+	dallocx(huge, MALLOCX_ARENA(arena1));
+
+	void *huge2 = mallocx(HUGE_SZ, 0);
+	assert_ptr_not_null(huge, "Fail to allocate huge size");
+	assert_d_eq(mallctl("arenas.lookup", &arena2, &sz, &huge2,
+	    sizeof(huge2)), 0, "Unexpected mallctl() failure");
+	assert_u_ne(arena1, arena2,
+	    "Huge allocation should not come from the manual arena.");
+	assert_u_ne(arena2, 0,
+	    "Huge allocation should not come from the arena 0.");
+	dallocx(huge2, 0);
+}
+TEST_END
+
+TEST_BEGIN(huge_allocation) {
+	unsigned arena1, arena2;
+
+	void *ptr = mallocx(HUGE_SZ, 0);
+	assert_ptr_not_null(ptr, "Fail to allocate huge size");
+	size_t sz = sizeof(unsigned);
+	assert_d_eq(mallctl("arenas.lookup", &arena1, &sz, &ptr, sizeof(ptr)),
+	    0, "Unexpected mallctl() failure");
+	assert_u_gt(arena1, 0, "Huge allocation should not come from arena 0");
+	dallocx(ptr, 0);
+
+	ptr = mallocx(HUGE_SZ >> 1, 0);
+	assert_ptr_not_null(ptr, "Fail to allocate half huge size");
+	assert_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
+	    sizeof(ptr)), 0, "Unexpected mallctl() failure");
+	assert_u_ne(arena1, arena2, "Wrong arena used for half huge");
+	dallocx(ptr, 0);
+
+	ptr = mallocx(SMALL_SZ, MALLOCX_TCACHE_NONE);
+	assert_ptr_not_null(ptr, "Fail to allocate small size");
+	assert_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
+	    sizeof(ptr)), 0, "Unexpected mallctl() failure");
+	assert_u_ne(arena1, arena2,
+	    "Huge and small should be from different arenas");
+	dallocx(ptr, 0);
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    huge_allocation,
+	    huge_mallocx,
+	    huge_bind_thread);
+}

From cdf15b458a1c348722fa43cb1813ac3a93fdc634 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 4 Jun 2018 11:04:29 -0700
Subject: [PATCH 1151/2608] Rename huge_threshold to experimental, and tweak
 documentation.

---
 doc/jemalloc.xml.in | 13 ++++++++-----
 src/ctl.c           |  2 +-
 src/jemalloc.c      |  4 +++-
 src/stats.c         |  2 +-
 test/unit/huge.c    |  2 +-
 test/unit/mallctl.c |  2 +-
 6 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 1e12fd3a..0dcfb98d 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1055,7 +1055,9 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         linkend="arena.i.dirty_decay_ms"><mallctl>arena.&lt;i&gt;.dirty_decay_ms</mallctl></link>
         for related dynamic control options.  See <link
         linkend="opt.muzzy_decay_ms"><mallctl>opt.muzzy_decay_ms</mallctl></link>
-        for a description of muzzy pages.</para></listitem>
+        for a description of muzzy pages.  Note that when the huge_threshold
+        feature is enabled, the special auto arenas may use its own decay
+        settings.</para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.muzzy_decay_ms">
@@ -1763,10 +1765,11 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         to control allocation for arenas explicitly created via <link
         linkend="arenas.create"><mallctl>arenas.create</mallctl></link> such
         that all extents originate from an application-supplied extent allocator
-        (by specifying the custom extent hook functions during arena creation),
-        but the automatically created arenas will have already created extents
-        prior to the application having an opportunity to take over extent
-        allocation.</para>
+        (by specifying the custom extent hook functions during arena creation).
+        However, the API guarantees for the automatically created arenas may be
+        relaxed -- hooks set there may be called in a "best effort" fashion; in
+        addition there may be extents created prior to the application having an
+        opportunity to take over extent allocation.</para>
 
         <programlisting language="C"><![CDATA[
 typedef extent_hooks_s extent_hooks_t;
diff --git a/src/ctl.c b/src/ctl.c
index 6d0bb92f..5c94cdbc 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -289,7 +289,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("dss"),		CTL(opt_dss)},
 	{NAME("narenas"),	CTL(opt_narenas)},
 	{NAME("percpu_arena"),	CTL(opt_percpu_arena)},
-	{NAME("huge_threshold"),	CTL(opt_huge_threshold)},
+	{NAME("experimental_huge_threshold"),	CTL(opt_huge_threshold)},
 	{NAME("background_thread"),	CTL(opt_background_thread)},
 	{NAME("max_background_threads"),	CTL(opt_max_background_threads)},
 	{NAME("dirty_decay_ms"), CTL(opt_dirty_decay_ms)},
diff --git a/src/jemalloc.c b/src/jemalloc.c
index aded1398..28d1344c 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1147,7 +1147,9 @@ malloc_conf_init(void) {
 			CONF_HANDLE_SSIZE_T(opt_lg_tcache_max, "lg_tcache_max",
 			    -1, (sizeof(size_t) << 3) - 1)
 
-			CONF_HANDLE_SIZE_T(opt_huge_threshold, "huge_threshold",
+			/* Experimental feature.  Will be documented later.*/
+			CONF_HANDLE_SIZE_T(opt_huge_threshold,
+			    "experimental_huge_threshold",
 			    LARGE_MINCLASS, LARGE_MAXCLASS, yes, yes, false)
 			CONF_HANDLE_SIZE_T(opt_lg_extent_max_active_fit,
 			    "lg_extent_max_active_fit", 0,
diff --git a/src/stats.c b/src/stats.c
index 9cfc8503..93a04b73 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -910,7 +910,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_CHAR_P("dss")
 	OPT_WRITE_UNSIGNED("narenas")
 	OPT_WRITE_CHAR_P("percpu_arena")
-	OPT_WRITE_UNSIGNED("huge_threshold")
+	OPT_WRITE_SIZE_T("experimental_huge_threshold")
 	OPT_WRITE_CHAR_P("metadata_thp")
 	OPT_WRITE_BOOL_MUTABLE("background_thread", "background_thread")
 	OPT_WRITE_SSIZE_T_MUTABLE("dirty_decay_ms", "arenas.dirty_decay_ms")
diff --git a/test/unit/huge.c b/test/unit/huge.c
index 7e54d076..f371198f 100644
--- a/test/unit/huge.c
+++ b/test/unit/huge.c
@@ -1,7 +1,7 @@
 #include "test/jemalloc_test.h"
 
 /* Threshold: 2 << 20 = 2097152. */
-const char *malloc_conf = "huge_threshold:2097152";
+const char *malloc_conf = "experimental_huge_threshold:2097152";
 
 #define HUGE_SZ (2 << 20)
 #define SMALL_SZ (8)
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index b4e01af4..d64b4019 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -164,7 +164,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(const char *, dss, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);
 	TEST_MALLCTL_OPT(const char *, percpu_arena, always);
-	TEST_MALLCTL_OPT(size_t, huge_threshold, always);
+	TEST_MALLCTL_OPT(size_t, experimental_huge_threshold, always);
 	TEST_MALLCTL_OPT(bool, background_thread, always);
 	TEST_MALLCTL_OPT(ssize_t, dirty_decay_ms, always);
 	TEST_MALLCTL_OPT(ssize_t, muzzy_decay_ms, always);

From ce5c073fe5017e802d1e272a9e057f7b631da345 Mon Sep 17 00:00:00 2001
From: Maks Naumov <maksqwe1@ukr.net>
Date: Thu, 31 May 2018 19:28:06 +0300
Subject: [PATCH 1152/2608] Fix MSVC build

---
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj         | 2 +-
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters | 2 +-
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj         | 3 ++-
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters | 5 ++++-
 4 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index f7b175b0..be252d76 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -47,7 +47,7 @@
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
     <ClCompile Include="..\..\..\..\src\hash.c" />
-    <ClCompile Include="..\..\..\..\src\hooks.c" />
+    <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
     <ClCompile Include="..\..\..\..\src\large.c" />
     <ClCompile Include="..\..\..\..\src\log.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 11cfcd0b..00d09609 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -37,7 +37,7 @@
     <ClCompile Include="..\..\..\..\src\hash.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\hooks.c">
+    <ClCompile Include="..\..\..\..\src\hook.c">
       <Filter>Source Files</Filter>
     </ClCompile>
     <ClCompile Include="..\..\..\..\src\jemalloc.c">
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index ed71de8a..599cc42f 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -47,7 +47,7 @@
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
     <ClCompile Include="..\..\..\..\src\hash.c" />
-    <ClCompile Include="..\..\..\..\src\hooks.c" />
+    <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
     <ClCompile Include="..\..\..\..\src\large.c" />
     <ClCompile Include="..\..\..\..\src\log.c" />
@@ -62,6 +62,7 @@
     <ClCompile Include="..\..\..\..\src\stats.c" />
     <ClCompile Include="..\..\..\..\src\sz.c" />
     <ClCompile Include="..\..\..\..\src\tcache.c" />
+    <ClCompile Include="..\..\..\..\src\test_hooks.c" />
     <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
     <ClCompile Include="..\..\..\..\src\witness.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 11cfcd0b..b352721c 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -37,7 +37,7 @@
     <ClCompile Include="..\..\..\..\src\hash.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\hooks.c">
+    <ClCompile Include="..\..\..\..\src\hook.c">
       <Filter>Source Files</Filter>
     </ClCompile>
     <ClCompile Include="..\..\..\..\src\jemalloc.c">
@@ -97,5 +97,8 @@
     <ClCompile Include="..\..\..\..\src\div.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\test_hooks.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
 </Project>
\ No newline at end of file

From 3d29d11ac2c1583b9959f73c0548545018d31c8a Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Thu, 3 May 2018 11:40:53 +0200
Subject: [PATCH 1153/2608] Clean compilation -Wextra

Before this commit jemalloc produced many warnings when compiled with -Wextra
with both Clang and GCC. This commit fixes the issues raised by these warnings
or suppresses them if they were spurious at least for the Clang and GCC
versions covered by CI.

This commit:

* adds `JEMALLOC_DIAGNOSTIC` macros: `JEMALLOC_DIAGNOSTIC_{PUSH,POP}` are
  used to modify the stack of enabled diagnostics. The
  `JEMALLOC_DIAGNOSTIC_IGNORE_...` macros are used to ignore a concrete
  diagnostic.

* adds `JEMALLOC_FALLTHROUGH` macro to explicitly state that falling
  through `case` labels in a `switch` statement is intended

* Removes all UNUSED annotations on function parameters. The warning
  -Wunused-parameter is now disabled globally in
  `jemalloc_internal_macros.h` for all translation units that include
  that header. It is never re-enabled since that header cannot be
  included by users.

* locally suppresses some -Wextra diagnostics:

  * `-Wmissing-field-initializer` is buggy in older Clang and GCC versions,
    where it does not understanding that, in C, `= {0}` is a common C idiom
    to initialize a struct to zero

  * `-Wtype-bounds` is suppressed in a particular situation where a generic
    macro, used in multiple different places, compares an unsigned integer for
    smaller than zero, which is always true.

  * `-Walloc-larger-than-size=` diagnostics warn when an allocation function is
    called with a size that is too large (out-of-range). These are suppressed in
    the parts of the tests where `jemalloc` explicitly does this to test that the
    allocation functions fail properly.

* adds a new CI build bot that runs the log unit test on CI.

Closes #1196 .
---
 .travis.yml                                   |   7 +-
 configure.ac                                  |   2 +
 include/jemalloc/internal/arena_inlines_b.h   |   6 +-
 include/jemalloc/internal/arena_stats.h       |  22 +--
 include/jemalloc/internal/atomic_gcc_sync.h   |  14 +-
 include/jemalloc/internal/extent_inlines.h    |   2 +-
 include/jemalloc/internal/hash.h              |  65 ++++-----
 .../internal/jemalloc_internal_macros.h       |  58 ++++++++
 include/jemalloc/internal/mutex.h             |  18 ++-
 include/jemalloc/internal/prof_inlines_a.h    |   6 +-
 include/jemalloc/internal/rtree.h             |  20 +--
 include/jemalloc/internal/rtree_tsd.h         |   2 +-
 include/jemalloc/internal/tcache_inlines.h    |   2 +-
 include/jemalloc/internal/tsd_generic.h       |   6 +
 include/jemalloc/internal/tsd_tls.h           |   2 +-
 src/arena.c                                   |  12 +-
 src/background_thread.c                       |   4 +-
 src/ctl.c                                     | 136 ++++++++++--------
 src/extent.c                                  |   6 +-
 src/jemalloc.c                                |  12 +-
 src/mutex.c                                   |   2 +-
 src/rtree.c                                   |   4 +-
 src/tcache.c                                  |   2 +-
 src/tsd.c                                     |   6 +
 test/integration/aligned_alloc.c              |  14 ++
 test/integration/mallocx.c                    |  13 ++
 test/integration/overflow.c                   |  13 ++
 test/integration/rallocx.c                    |  13 ++
 test/unit/emitter.c                           |   6 +-
 29 files changed, 328 insertions(+), 147 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 4cc116e5..7d93ead5 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -143,7 +143,12 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      script:
+        - make check
+        - make -j test/unit/log
+        - test/unit/log
 
 before_script:
   - autoconf
diff --git a/configure.ac b/configure.ac
index a6a08db0..1c209117 100644
--- a/configure.ac
+++ b/configure.ac
@@ -242,6 +242,7 @@ if test "x$GCC" = "xyes" ; then
     fi
   fi
   JE_CFLAGS_ADD([-Wall])
+  JE_CFLAGS_ADD([-Wextra])
   JE_CFLAGS_ADD([-Wshorten-64-to-32])
   JE_CFLAGS_ADD([-Wsign-compare])
   JE_CFLAGS_ADD([-Wundef])
@@ -289,6 +290,7 @@ if test "x$enable_cxx" = "x1" ; then
   AX_CXX_COMPILE_STDCXX([14], [noext], [optional])
   if test "x${HAVE_CXX14}" = "x1" ; then
     JE_CXXFLAGS_ADD([-Wall])
+    JE_CXXFLAGS_ADD([-Wextra])
     JE_CXXFLAGS_ADD([-g3])
 
     SAVED_LIBS="${LIBS}"
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 401be758..d388cae9 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -49,7 +49,7 @@ arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, UNUSED size_t usize,
+arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
     alloc_ctx_t *alloc_ctx, prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
@@ -68,7 +68,7 @@ arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, UNUSED size_t usize,
 }
 
 static inline void
-arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, UNUSED prof_tctx_t *tctx) {
+arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
@@ -318,7 +318,7 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 
 	szind_t szind;
 	bool slab;
-	UNUSED alloc_ctx_t local_ctx;
+	alloc_ctx_t local_ctx;
 	if (config_prof && opt_prof) {
 		if (alloc_ctx == NULL) {
 			/* Uncommon case and should be a static check. */
diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 5f3dca8b..39b7262a 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -6,6 +6,8 @@
 #include "jemalloc/internal/mutex_prof.h"
 #include "jemalloc/internal/size_classes.h"
 
+JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
+
 /*
  * In those architectures that support 64-bit atomics, we use atomic updates for
  * our 64-bit values.  Otherwise, we use a plain uint64_t and synchronize
@@ -95,7 +97,7 @@ struct arena_stats_s {
 };
 
 static inline bool
-arena_stats_init(UNUSED tsdn_t *tsdn, arena_stats_t *arena_stats) {
+arena_stats_init(tsdn_t *tsdn, arena_stats_t *arena_stats) {
 	if (config_debug) {
 		for (size_t i = 0; i < sizeof(arena_stats_t); i++) {
 			assert(((char *)arena_stats)[i] == 0);
@@ -147,11 +149,11 @@ arena_stats_add_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
 #endif
 }
 
-UNUSED static inline void
+static inline void
 arena_stats_sub_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
     arena_stats_u64_t *p, uint64_t x) {
 #ifdef JEMALLOC_ATOMIC_U64
-	UNUSED uint64_t r = atomic_fetch_sub_u64(p, x, ATOMIC_RELAXED);
+	uint64_t r = atomic_fetch_sub_u64(p, x, ATOMIC_RELAXED);
 	assert(r - x <= r);
 #else
 	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
@@ -176,7 +178,8 @@ arena_stats_accum_u64(arena_stats_u64_t *dst, uint64_t src) {
 }
 
 static inline size_t
-arena_stats_read_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p) {
+arena_stats_read_zu(tsdn_t *tsdn, arena_stats_t *arena_stats,
+    atomic_zu_t *p) {
 #ifdef JEMALLOC_ATOMIC_U64
 	return atomic_load_zu(p, ATOMIC_RELAXED);
 #else
@@ -186,8 +189,8 @@ arena_stats_read_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p) {
 }
 
 static inline void
-arena_stats_add_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p,
-    size_t x) {
+arena_stats_add_zu(tsdn_t *tsdn, arena_stats_t *arena_stats,
+    atomic_zu_t *p, size_t x) {
 #ifdef JEMALLOC_ATOMIC_U64
 	atomic_fetch_add_zu(p, x, ATOMIC_RELAXED);
 #else
@@ -198,10 +201,10 @@ arena_stats_add_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p,
 }
 
 static inline void
-arena_stats_sub_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p,
-    size_t x) {
+arena_stats_sub_zu(tsdn_t *tsdn, arena_stats_t *arena_stats,
+    atomic_zu_t *p, size_t x) {
 #ifdef JEMALLOC_ATOMIC_U64
-	UNUSED size_t r = atomic_fetch_sub_zu(p, x, ATOMIC_RELAXED);
+	size_t r = atomic_fetch_sub_zu(p, x, ATOMIC_RELAXED);
 	assert(r - x <= r);
 #else
 	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
@@ -233,5 +236,4 @@ arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats, size_t size) {
 	arena_stats_unlock(tsdn, arena_stats);
 }
 
-
 #endif /* JEMALLOC_INTERNAL_ARENA_STATS_H */
diff --git a/include/jemalloc/internal/atomic_gcc_sync.h b/include/jemalloc/internal/atomic_gcc_sync.h
index 30846e4d..06a0acf3 100644
--- a/include/jemalloc/internal/atomic_gcc_sync.h
+++ b/include/jemalloc/internal/atomic_gcc_sync.h
@@ -113,8 +113,8 @@ atomic_store_##short_type(atomic_##short_type##_t *a,			\
 }									\
 									\
 ATOMIC_INLINE type							\
-atomic_exchange_##short_type(atomic_##short_type##_t *a, type val,	\
-    atomic_memory_order_t mo) {						\
+atomic_exchange_##short_type(atomic_##short_type##_t *a, type val, \
+    atomic_memory_order_t mo) {                  					 \
 	/*								\
 	 * Because of FreeBSD, we care about gcc 4.2, which doesn't have\
 	 * an atomic exchange builtin.  We fake it with a CAS loop.	\
@@ -129,8 +129,9 @@ atomic_exchange_##short_type(atomic_##short_type##_t *a, type val,	\
 									\
 ATOMIC_INLINE bool							\
 atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a,	\
-    type *expected, type desired, atomic_memory_order_t success_mo,	\
-    atomic_memory_order_t failure_mo) {					\
+    type *expected, type desired,                                     \
+    atomic_memory_order_t success_mo,                          \
+    atomic_memory_order_t failure_mo) {				                \
 	type prev = __sync_val_compare_and_swap(&a->repr, *expected,	\
 	    desired);							\
 	if (prev == *expected) {					\
@@ -142,8 +143,9 @@ atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a,	\
 }									\
 ATOMIC_INLINE bool							\
 atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a,	\
-    type *expected, type desired, atomic_memory_order_t success_mo,	\
-    atomic_memory_order_t failure_mo) {					\
+    type *expected, type desired,                                       \
+    atomic_memory_order_t success_mo,                            \
+    atomic_memory_order_t failure_mo) {                          \
 	type prev = __sync_val_compare_and_swap(&a->repr, *expected,	\
 	    desired);							\
 	if (prev == *expected) {					\
diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index 77181df8..9b8ddc27 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -190,7 +190,7 @@ extent_addr_set(extent_t *extent, void *addr) {
 }
 
 static inline void
-extent_addr_randomize(UNUSED tsdn_t *tsdn, extent_t *extent, size_t alignment) {
+extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment) {
 	assert(extent_base_get(extent) == extent_addr_get(extent));
 
 	if (alignment < PAGE) {
diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h
index dcfc992d..0270034e 100644
--- a/include/jemalloc/internal/hash.h
+++ b/include/jemalloc/internal/hash.h
@@ -104,8 +104,8 @@ hash_x86_32(const void *key, int len, uint32_t seed) {
 		uint32_t k1 = 0;
 
 		switch (len & 3) {
-		case 3: k1 ^= tail[2] << 16;
-		case 2: k1 ^= tail[1] << 8;
+		case 3: k1 ^= tail[2] << 16; JEMALLOC_FALLTHROUGH
+		case 2: k1 ^= tail[1] << 8; JEMALLOC_FALLTHROUGH
 		case 1: k1 ^= tail[0]; k1 *= c1; k1 = hash_rotl_32(k1, 15);
 			k1 *= c2; h1 ^= k1;
 		}
@@ -119,7 +119,7 @@ hash_x86_32(const void *key, int len, uint32_t seed) {
 	return h1;
 }
 
-UNUSED static inline void
+static inline void
 hash_x86_128(const void *key, const int len, uint32_t seed,
     uint64_t r_out[2]) {
 	const uint8_t * data = (const uint8_t *) key;
@@ -177,28 +177,29 @@ hash_x86_128(const void *key, const int len, uint32_t seed,
 		uint32_t k4 = 0;
 
 		switch (len & 15) {
-		case 15: k4 ^= tail[14] << 16;
-		case 14: k4 ^= tail[13] << 8;
+		case 15: k4 ^= tail[14] << 16; JEMALLOC_FALLTHROUGH
+		case 14: k4 ^= tail[13] << 8; JEMALLOC_FALLTHROUGH
 		case 13: k4 ^= tail[12] << 0;
 			k4 *= c4; k4 = hash_rotl_32(k4, 18); k4 *= c1; h4 ^= k4;
-
-		case 12: k3 ^= tail[11] << 24;
-		case 11: k3 ^= tail[10] << 16;
-		case 10: k3 ^= tail[ 9] << 8;
+      JEMALLOC_FALLTHROUGH
+		case 12: k3 ^= tail[11] << 24; JEMALLOC_FALLTHROUGH
+		case 11: k3 ^= tail[10] << 16; JEMALLOC_FALLTHROUGH
+		case 10: k3 ^= tail[ 9] << 8; JEMALLOC_FALLTHROUGH
 		case  9: k3 ^= tail[ 8] << 0;
 		     k3 *= c3; k3 = hash_rotl_32(k3, 17); k3 *= c4; h3 ^= k3;
-
-		case  8: k2 ^= tail[ 7] << 24;
-		case  7: k2 ^= tail[ 6] << 16;
-		case  6: k2 ^= tail[ 5] << 8;
+         JEMALLOC_FALLTHROUGH
+		case  8: k2 ^= tail[ 7] << 24; JEMALLOC_FALLTHROUGH
+		case  7: k2 ^= tail[ 6] << 16; JEMALLOC_FALLTHROUGH
+		case  6: k2 ^= tail[ 5] << 8; JEMALLOC_FALLTHROUGH
 		case  5: k2 ^= tail[ 4] << 0;
 			k2 *= c2; k2 = hash_rotl_32(k2, 16); k2 *= c3; h2 ^= k2;
-
-		case  4: k1 ^= tail[ 3] << 24;
-		case  3: k1 ^= tail[ 2] << 16;
-		case  2: k1 ^= tail[ 1] << 8;
+      JEMALLOC_FALLTHROUGH
+		case  4: k1 ^= tail[ 3] << 24; JEMALLOC_FALLTHROUGH
+		case  3: k1 ^= tail[ 2] << 16; JEMALLOC_FALLTHROUGH
+		case  2: k1 ^= tail[ 1] << 8; JEMALLOC_FALLTHROUGH
 		case  1: k1 ^= tail[ 0] << 0;
 			k1 *= c1; k1 = hash_rotl_32(k1, 15); k1 *= c2; h1 ^= k1;
+      JEMALLOC_FALLTHROUGH
 		}
 	}
 
@@ -220,7 +221,7 @@ hash_x86_128(const void *key, const int len, uint32_t seed,
 	r_out[1] = (((uint64_t) h4) << 32) | h3;
 }
 
-UNUSED static inline void
+static inline void
 hash_x64_128(const void *key, const int len, const uint32_t seed,
     uint64_t r_out[2]) {
 	const uint8_t *data = (const uint8_t *) key;
@@ -260,22 +261,22 @@ hash_x64_128(const void *key, const int len, const uint32_t seed,
 		uint64_t k2 = 0;
 
 		switch (len & 15) {
-		case 15: k2 ^= ((uint64_t)(tail[14])) << 48; /* falls through */
-		case 14: k2 ^= ((uint64_t)(tail[13])) << 40; /* falls through */
-		case 13: k2 ^= ((uint64_t)(tail[12])) << 32; /* falls through */
-		case 12: k2 ^= ((uint64_t)(tail[11])) << 24; /* falls through */
-		case 11: k2 ^= ((uint64_t)(tail[10])) << 16; /* falls through */
-		case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8;  /* falls through */
+		case 15: k2 ^= ((uint64_t)(tail[14])) << 48; JEMALLOC_FALLTHROUGH
+		case 14: k2 ^= ((uint64_t)(tail[13])) << 40; JEMALLOC_FALLTHROUGH
+		case 13: k2 ^= ((uint64_t)(tail[12])) << 32; JEMALLOC_FALLTHROUGH
+		case 12: k2 ^= ((uint64_t)(tail[11])) << 24; JEMALLOC_FALLTHROUGH
+		case 11: k2 ^= ((uint64_t)(tail[10])) << 16; JEMALLOC_FALLTHROUGH
+		case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8;  JEMALLOC_FALLTHROUGH
 		case  9: k2 ^= ((uint64_t)(tail[ 8])) << 0;
 			k2 *= c2; k2 = hash_rotl_64(k2, 33); k2 *= c1; h2 ^= k2;
-			/* falls through */
-		case  8: k1 ^= ((uint64_t)(tail[ 7])) << 56; /* falls through */
-		case  7: k1 ^= ((uint64_t)(tail[ 6])) << 48; /* falls through */
-		case  6: k1 ^= ((uint64_t)(tail[ 5])) << 40; /* falls through */
-		case  5: k1 ^= ((uint64_t)(tail[ 4])) << 32; /* falls through */
-		case  4: k1 ^= ((uint64_t)(tail[ 3])) << 24; /* falls through */
-		case  3: k1 ^= ((uint64_t)(tail[ 2])) << 16; /* falls through */
-		case  2: k1 ^= ((uint64_t)(tail[ 1])) << 8;  /* falls through */
+			JEMALLOC_FALLTHROUGH
+		case  8: k1 ^= ((uint64_t)(tail[ 7])) << 56; JEMALLOC_FALLTHROUGH
+		case  7: k1 ^= ((uint64_t)(tail[ 6])) << 48; JEMALLOC_FALLTHROUGH
+		case  6: k1 ^= ((uint64_t)(tail[ 5])) << 40; JEMALLOC_FALLTHROUGH
+		case  5: k1 ^= ((uint64_t)(tail[ 4])) << 32; JEMALLOC_FALLTHROUGH
+		case  4: k1 ^= ((uint64_t)(tail[ 3])) << 24; JEMALLOC_FALLTHROUGH
+		case  3: k1 ^= ((uint64_t)(tail[ 2])) << 16; JEMALLOC_FALLTHROUGH
+		case  2: k1 ^= ((uint64_t)(tail[ 1])) << 8;  JEMALLOC_FALLTHROUGH
 		case  1: k1 ^= ((uint64_t)(tail[ 0])) << 0;
 			k1 *= c1; k1 = hash_rotl_64(k1, 31); k1 *= c2; h1 ^= k1;
 		}
diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h
index ed75d376..a1a761b8 100644
--- a/include/jemalloc/internal/jemalloc_internal_macros.h
+++ b/include/jemalloc/internal/jemalloc_internal_macros.h
@@ -40,4 +40,62 @@
 #define JEMALLOC_VA_ARGS_HEAD(head, ...) head
 #define JEMALLOC_VA_ARGS_TAIL(head, ...) __VA_ARGS__
 
+#if (defined(__GNUC__) || defined(__GNUG__)) && !defined(__clang__) \
+  && defined(JEMALLOC_HAVE_ATTR) && (__GNUC__ >= 7)
+#define JEMALLOC_FALLTHROUGH JEMALLOC_ATTR(fallthrough);
+#else
+#define JEMALLOC_FALLTHROUGH /* falls through */
+#endif
+
+
+/* Diagnostic suppression macros */
+#if defined(_MSC_VER) && !defined(__clang__)
+#  define JEMALLOC_DIAGNOSTIC_PUSH __pragma(warning(push))
+#  define JEMALLOC_DIAGNOSTIC_POP __pragma(warning(pop))
+#  define JEMALLOC_DIAGNOSTIC_IGNORE(W) __pragma(warning(disable:W))
+#  define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
+#  define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
+#  define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
+#  define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
+#elif defined(__GNUC__) || defined(__clang__)
+/*
+ * The JEMALLOC_PRAGMA__ macro is an implementation detail of the GCC and Clang
+ * diagnostic suppression macros and should not be used anywhere else.
+ */
+#  define JEMALLOC_PRAGMA__(X) _Pragma(#X)
+#  define JEMALLOC_DIAGNOSTIC_PUSH JEMALLOC_PRAGMA__(GCC diagnostic push)
+#  define JEMALLOC_DIAGNOSTIC_POP JEMALLOC_PRAGMA__(GCC diagnostic pop)
+#  define JEMALLOC_DIAGNOSTIC_IGNORE(W) JEMALLOC_PRAGMA__(GCC diagnostic ignored W)
+#  define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS \
+     JEMALLOC_DIAGNOSTIC_IGNORE("-Wmissing-field-initializers")
+#  define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS  \
+     JEMALLOC_DIAGNOSTIC_IGNORE("-Wtype-limits")
+#  define JEMALLOC_DIAGNOSTIC_IGNORE_UNUSED_PARAMETER \
+     JEMALLOC_DIAGNOSTIC_IGNORE("-Wunused-parameter")
+#  if defined(__GNUC__) && !defined(__clang__) && (__GNUC__ >= 7)
+#    define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN \
+       JEMALLOC_DIAGNOSTIC_IGNORE("-Walloc-size-larger-than=")
+#  else
+#    define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
+#  endif
+#  define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS \
+  JEMALLOC_DIAGNOSTIC_PUSH \
+  JEMALLOC_DIAGNOSTIC_IGNORE_UNUSED_PARAMETER
+#else
+#  define JEMALLOC_DIAGNOSTIC_PUSH
+#  define JEMALLOC_DIAGNOSTIC_POP
+#  define JEMALLOC_DIAGNOSTIC_IGNORE(W)
+#  define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
+#  define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
+#  define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
+#  define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
+#endif
+
+/*
+ * Disables spurious diagnostics for all headers
+ * Since these headers are not included by users directly,
+ * it does not affect their diagnostic settings.
+ */
+JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
+
 #endif /* JEMALLOC_INTERNAL_MACROS_H */
diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index 6520c251..651ce5f9 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -101,9 +101,15 @@ struct malloc_mutex_s {
 #ifdef _WIN32
 #  define MALLOC_MUTEX_INITIALIZER
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
-#  define MALLOC_MUTEX_INITIALIZER					\
+#  if defined(JEMALLOC_DEBUG)
+#    define MALLOC_MUTEX_INITIALIZER					\
      {{{LOCK_PROF_DATA_INITIALIZER, OS_UNFAIR_LOCK_INIT}},		\
+         WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0}
+#  else
+#    define MALLOC_MUTEX_INITIALIZER                      \
+  {{{LOCK_PROF_DATA_INITIALIZER, OS_UNFAIR_LOCK_INIT}},		\
       WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
+#  endif
 #elif (defined(JEMALLOC_OSSPIN))
 #  define MALLOC_MUTEX_INITIALIZER					\
      {{{LOCK_PROF_DATA_INITIALIZER, 0}},				\
@@ -111,12 +117,18 @@ struct malloc_mutex_s {
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
 #  define MALLOC_MUTEX_INITIALIZER					\
      {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, NULL}},	\
-      WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
+         WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
 #else
 #    define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT
+#  if defined(JEMALLOC_DEBUG)
 #    define MALLOC_MUTEX_INITIALIZER					\
        {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER}},	\
-        WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
+           WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0}
+#  else
+#    define MALLOC_MUTEX_INITIALIZER                          \
+  {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER}},	\
+      WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
+#  endif
 #endif
 
 #ifdef JEMALLOC_LAZY_LOCK
diff --git a/include/jemalloc/internal/prof_inlines_a.h b/include/jemalloc/internal/prof_inlines_a.h
index a6efb485..c39bc3d4 100644
--- a/include/jemalloc/internal/prof_inlines_a.h
+++ b/include/jemalloc/internal/prof_inlines_a.h
@@ -4,7 +4,8 @@
 #include "jemalloc/internal/mutex.h"
 
 static inline bool
-prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum, uint64_t accumbytes) {
+prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum,
+    uint64_t accumbytes) {
 	cassert(config_prof);
 
 	bool overflow;
@@ -42,7 +43,8 @@ prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum, uint64_t accumbytes) {
 }
 
 static inline void
-prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum, size_t usize) {
+prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum,
+    size_t usize) {
 	cassert(config_prof);
 
 	/*
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index b59d33a8..dd452f16 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -170,8 +170,8 @@ rtree_subkey(uintptr_t key, unsigned level) {
  */
 #  ifdef RTREE_LEAF_COMPACT
 JEMALLOC_ALWAYS_INLINE uintptr_t
-rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
-    bool dependent) {
+rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_leaf_elm_t *elm, bool dependent) {
 	return (uintptr_t)atomic_load_p(&elm->le_bits, dependent
 	    ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
 }
@@ -208,7 +208,7 @@ rtree_leaf_elm_bits_slab_get(uintptr_t bits) {
 #  endif
 
 JEMALLOC_ALWAYS_INLINE extent_t *
-rtree_leaf_elm_extent_read(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
+rtree_leaf_elm_extent_read(tsdn_t *tsdn, rtree_t *rtree,
     rtree_leaf_elm_t *elm, bool dependent) {
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
@@ -221,7 +221,7 @@ rtree_leaf_elm_extent_read(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
 }
 
 JEMALLOC_ALWAYS_INLINE szind_t
-rtree_leaf_elm_szind_read(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
+rtree_leaf_elm_szind_read(tsdn_t *tsdn, rtree_t *rtree,
     rtree_leaf_elm_t *elm, bool dependent) {
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
@@ -233,7 +233,7 @@ rtree_leaf_elm_szind_read(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
 }
 
 JEMALLOC_ALWAYS_INLINE bool
-rtree_leaf_elm_slab_read(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
+rtree_leaf_elm_slab_read(tsdn_t *tsdn, rtree_t *rtree,
     rtree_leaf_elm_t *elm, bool dependent) {
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
@@ -245,7 +245,7 @@ rtree_leaf_elm_slab_read(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
 }
 
 static inline void
-rtree_leaf_elm_extent_write(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
+rtree_leaf_elm_extent_write(tsdn_t *tsdn, rtree_t *rtree,
     rtree_leaf_elm_t *elm, extent_t *extent) {
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, true);
@@ -259,7 +259,7 @@ rtree_leaf_elm_extent_write(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
 }
 
 static inline void
-rtree_leaf_elm_szind_write(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
+rtree_leaf_elm_szind_write(tsdn_t *tsdn, rtree_t *rtree,
     rtree_leaf_elm_t *elm, szind_t szind) {
 	assert(szind <= NSIZES);
 
@@ -277,7 +277,7 @@ rtree_leaf_elm_szind_write(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
 }
 
 static inline void
-rtree_leaf_elm_slab_write(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
+rtree_leaf_elm_slab_write(tsdn_t *tsdn, rtree_t *rtree,
     rtree_leaf_elm_t *elm, bool slab) {
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm,
@@ -292,8 +292,8 @@ rtree_leaf_elm_slab_write(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree,
 }
 
 static inline void
-rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
-    extent_t *extent, szind_t szind, bool slab) {
+rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_leaf_elm_t *elm, extent_t *extent, szind_t szind, bool slab) {
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t bits = ((uintptr_t)szind << LG_VADDR) |
 	    ((uintptr_t)extent & (((uintptr_t)0x1 << LG_VADDR) - 1)) |
diff --git a/include/jemalloc/internal/rtree_tsd.h b/include/jemalloc/internal/rtree_tsd.h
index 93a75173..562e2929 100644
--- a/include/jemalloc/internal/rtree_tsd.h
+++ b/include/jemalloc/internal/rtree_tsd.h
@@ -26,7 +26,7 @@
  * Zero initializer required for tsd initialization only.  Proper initialization
  * done via rtree_ctx_data_init().
  */
-#define RTREE_CTX_ZERO_INITIALIZER {{{0}}, {{0}}}
+#define RTREE_CTX_ZERO_INITIALIZER {{{0, 0}}, {{0, 0}}}
 
 
 typedef struct rtree_leaf_elm_s rtree_leaf_elm_t;
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 0f6ab8cb..c426c567 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -40,7 +40,7 @@ tcache_event(tsd_t *tsd, tcache_t *tcache) {
 
 JEMALLOC_ALWAYS_INLINE void *
 tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
-    UNUSED size_t size, szind_t binind, bool zero, bool slow_path) {
+    size_t size, szind_t binind, bool zero, bool slow_path) {
 	void *ret;
 	cache_bin_t *bin;
 	bool tcache_success;
diff --git a/include/jemalloc/internal/tsd_generic.h b/include/jemalloc/internal/tsd_generic.h
index 1e52ef76..cf73c0c7 100644
--- a/include/jemalloc/internal/tsd_generic.h
+++ b/include/jemalloc/internal/tsd_generic.h
@@ -77,7 +77,10 @@ tsd_wrapper_get(bool init) {
 			abort();
 		} else {
 			wrapper->initialized = false;
+      JEMALLOC_DIAGNOSTIC_PUSH
+      JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
 			tsd_t initializer = TSD_INITIALIZER;
+      JEMALLOC_DIAGNOSTIC_POP
 			wrapper->val = initializer;
 		}
 		tsd_wrapper_set(wrapper);
@@ -107,7 +110,10 @@ tsd_boot1(void) {
 	tsd_boot_wrapper.initialized = false;
 	tsd_cleanup(&tsd_boot_wrapper.val);
 	wrapper->initialized = false;
+  JEMALLOC_DIAGNOSTIC_PUSH
+  JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
 	tsd_t initializer = TSD_INITIALIZER;
+  JEMALLOC_DIAGNOSTIC_POP
 	wrapper->val = initializer;
 	tsd_wrapper_set(wrapper);
 }
diff --git a/include/jemalloc/internal/tsd_tls.h b/include/jemalloc/internal/tsd_tls.h
index 0de64b7b..757aaa0e 100644
--- a/include/jemalloc/internal/tsd_tls.h
+++ b/include/jemalloc/internal/tsd_tls.h
@@ -39,7 +39,7 @@ tsd_get_allocates(void) {
 
 /* Get/set. */
 JEMALLOC_ALWAYS_INLINE tsd_t *
-tsd_get(UNUSED bool init) {
+tsd_get(bool init) {
 	assert(tsd_booted);
 	return &tsd_tls;
 }
diff --git a/src/arena.c b/src/arena.c
index 49d86d2c..eefea0dc 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -11,6 +11,8 @@
 #include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/util.h"
 
+JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
+
 /******************************************************************************/
 /* Data. */
 
@@ -65,7 +67,7 @@ static void arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
 /******************************************************************************/
 
 void
-arena_basic_stats_merge(UNUSED tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
+arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy) {
 	*nthreads += arena_nthreads_get(arena, false);
@@ -752,7 +754,7 @@ static size_t
 arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, arena_decay_t *decay, extents_t *extents,
     bool all, extent_list_t *decay_extents, bool is_background_thread) {
-	UNUSED size_t nmadvise, nunmapped;
+	size_t nmadvise, nunmapped;
 	size_t npurged;
 
 	if (config_stats) {
@@ -843,7 +845,7 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	size_t npurge = arena_stash_decayed(tsdn, arena, &extent_hooks, extents,
 	    npages_limit, npages_decay_max, &decay_extents);
 	if (npurge != 0) {
-		UNUSED size_t npurged = arena_decay_stashed(tsdn, arena,
+		size_t npurged = arena_decay_stashed(tsdn, arena,
 		    &extent_hooks, decay, extents, all, &decay_extents,
 		    is_background_thread);
 		assert(npurged == npurge);
@@ -872,7 +874,7 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 
 	bool epoch_advanced = arena_maybe_decay(tsdn, arena, decay, extents,
 	    is_background_thread);
-	UNUSED size_t npages_new;
+	size_t npages_new;
 	if (epoch_advanced) {
 		/* Backlog is updated on epoch advance. */
 		npages_new = decay->backlog[SMOOTHSTEP_NSTEPS-1];
@@ -1508,7 +1510,7 @@ arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
 }
 
 static void
-arena_bin_lower_slab(UNUSED tsdn_t *tsdn, arena_t *arena, extent_t *slab,
+arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
     bin_t *bin) {
 	assert(extent_nfree_get(slab) > 0);
 
diff --git a/src/background_thread.c b/src/background_thread.c
index 4613537c..feed8564 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -4,6 +4,8 @@
 
 #include "jemalloc/internal/assert.h"
 
+JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
+
 /******************************************************************************/
 /* Data. */
 
@@ -78,7 +80,7 @@ background_thread_info_init(tsdn_t *tsdn, background_thread_info_t *info) {
 }
 
 static inline bool
-set_current_thread_affinity(UNUSED int cpu) {
+set_current_thread_affinity(int cpu) {
 #if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
 	cpu_set_t cpuset;
 	CPU_ZERO(&cpuset);
diff --git a/src/ctl.c b/src/ctl.c
index 5c94cdbc..3f7dea16 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1392,8 +1392,8 @@ label_return:								\
 
 #define CTL_RO_CGEN(c, n, v, t)						\
 static int								\
-n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
-    size_t *oldlenp, void *newp, size_t newlen) {			\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, \
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {			\
 	int ret;							\
 	t oldval;							\
 									\
@@ -1435,8 +1435,8 @@ label_return:								\
  */
 #define CTL_RO_NL_CGEN(c, n, v, t)					\
 static int								\
-n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
-    size_t *oldlenp, void *newp, size_t newlen) {			\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, \
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {			\
 	int ret;							\
 	t oldval;							\
 									\
@@ -1454,8 +1454,8 @@ label_return:								\
 
 #define CTL_RO_NL_GEN(n, v, t)						\
 static int								\
-n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
-    size_t *oldlenp, void *newp, size_t newlen) {			\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, \
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {			\
 	int ret;							\
 	t oldval;							\
 									\
@@ -1489,8 +1489,8 @@ label_return:								\
 
 #define CTL_RO_CONFIG_GEN(n, t)						\
 static int								\
-n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
-    size_t *oldlenp, void *newp, size_t newlen) {			\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, \
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {			\
 	int ret;							\
 	t oldval;							\
 									\
@@ -1508,8 +1508,8 @@ label_return:								\
 CTL_RO_NL_GEN(version, JEMALLOC_VERSION, const char *)
 
 static int
-epoch_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen) {
+epoch_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	UNUSED uint64_t newval;
 
@@ -1527,8 +1527,9 @@ label_return:
 }
 
 static int
-background_thread_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+background_thread_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp,
+    void *newp, size_t newlen) {
 	int ret;
 	bool oldval;
 
@@ -1578,8 +1579,9 @@ label_return:
 }
 
 static int
-max_background_threads_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+max_background_threads_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
+    size_t newlen) {
 	int ret;
 	size_t oldval;
 
@@ -1691,8 +1693,8 @@ CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool)
 /******************************************************************************/
 
 static int
-thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen) {
+thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	arena_t *oldarena;
 	unsigned newind, oldind;
@@ -1756,8 +1758,9 @@ CTL_TSD_RO_NL_CGEN(config_stats, thread_deallocatedp,
     tsd_thread_deallocatedp_get, uint64_t *)
 
 static int
-thread_tcache_enabled_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+thread_tcache_enabled_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
+    size_t newlen) {
 	int ret;
 	bool oldval;
 
@@ -1777,8 +1780,9 @@ label_return:
 }
 
 static int
-thread_tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+thread_tcache_flush_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
+    size_t newlen) {
 	int ret;
 
 	if (!tcache_available(tsd)) {
@@ -1797,8 +1801,9 @@ label_return:
 }
 
 static int
-thread_prof_name_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen) {
+thread_prof_name_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
+    size_t newlen) {
 	int ret;
 
 	if (!config_prof) {
@@ -1828,8 +1833,9 @@ label_return:
 }
 
 static int
-thread_prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen) {
+thread_prof_active_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
+    size_t newlen) {
 	int ret;
 	bool oldval;
 
@@ -1858,8 +1864,8 @@ label_return:
 /******************************************************************************/
 
 static int
-tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen) {
+tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	unsigned tcache_ind;
 
@@ -1876,8 +1882,8 @@ label_return:
 }
 
 static int
-tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen) {
+tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	unsigned tcache_ind;
 
@@ -1896,8 +1902,8 @@ label_return:
 }
 
 static int
-tcache_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen) {
+tcache_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	unsigned tcache_ind;
 
@@ -2299,8 +2305,9 @@ label_return:
 }
 
 static int
-arena_i_retain_grow_limit_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+arena_i_retain_grow_limit_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
+    size_t newlen) {
 	int ret;
 	unsigned arena_ind;
 	arena_t *arena;
@@ -2335,7 +2342,8 @@ label_return:
 }
 
 static const ctl_named_node_t *
-arena_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
+arena_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
+    size_t i) {
 	const ctl_named_node_t *ret;
 
 	malloc_mutex_lock(tsdn, &ctl_mtx);
@@ -2360,8 +2368,8 @@ label_return:
 /******************************************************************************/
 
 static int
-arenas_narenas_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen) {
+arenas_narenas_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	unsigned narenas;
 
@@ -2381,8 +2389,9 @@ label_return:
 }
 
 static int
-arenas_decay_ms_ctl_impl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen, bool dirty) {
+arenas_decay_ms_ctl_impl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
+    size_t newlen, bool dirty) {
 	int ret;
 
 	if (oldp != NULL && oldlenp != NULL) {
@@ -2430,7 +2439,8 @@ CTL_RO_NL_GEN(arenas_bin_i_size, bin_infos[mib[2]].reg_size, size_t)
 CTL_RO_NL_GEN(arenas_bin_i_nregs, bin_infos[mib[2]].nregs, uint32_t)
 CTL_RO_NL_GEN(arenas_bin_i_slab_size, bin_infos[mib[2]].slab_size, size_t)
 static const ctl_named_node_t *
-arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
+arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib,
+    size_t miblen, size_t i) {
 	if (i > NBINS) {
 		return NULL;
 	}
@@ -2441,8 +2451,8 @@ CTL_RO_NL_GEN(arenas_nlextents, NSIZES - NBINS, unsigned)
 CTL_RO_NL_GEN(arenas_lextent_i_size, sz_index2size(NBINS+(szind_t)mib[2]),
     size_t)
 static const ctl_named_node_t *
-arenas_lextent_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
-    size_t i) {
+arenas_lextent_i_index(tsdn_t *tsdn, const size_t *mib,
+    size_t miblen, size_t i) {
 	if (i > NSIZES - NBINS) {
 		return NULL;
 	}
@@ -2450,8 +2460,8 @@ arenas_lextent_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
 }
 
 static int
-arenas_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen) {
+arenas_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	extent_hooks_t *extent_hooks;
 	unsigned arena_ind;
@@ -2473,8 +2483,9 @@ label_return:
 }
 
 static int
-arenas_lookup_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen) {
+arenas_lookup_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
+    size_t newlen) {
 	int ret;
 	unsigned arena_ind;
 	void *ptr;
@@ -2505,8 +2516,9 @@ label_return:
 /******************************************************************************/
 
 static int
-prof_thread_active_init_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+prof_thread_active_init_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
+    size_t newlen) {
 	int ret;
 	bool oldval;
 
@@ -2532,8 +2544,8 @@ label_return:
 }
 
 static int
-prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen) {
+prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	bool oldval;
 
@@ -2558,8 +2570,8 @@ label_return:
 }
 
 static int
-prof_dump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen) {
+prof_dump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	const char *filename = NULL;
 
@@ -2581,8 +2593,8 @@ label_return:
 }
 
 static int
-prof_gdump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen) {
+prof_gdump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	bool oldval;
 
@@ -2607,8 +2619,8 @@ label_return:
 }
 
 static int
-prof_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen) {
+prof_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	size_t lg_sample = lg_prof_sample;
 
@@ -2764,8 +2776,9 @@ RO_MUTEX_CTL_GEN(arenas_i_bins_j_mutex,
 
 /* Resets all mutex stats, including global, arena and bin mutexes. */
 static int
-stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp,
+    void *newp, size_t newlen) {
 	if (!config_stats) {
 		return ENOENT;
 	}
@@ -2834,8 +2847,8 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curslabs,
     arenas_i(mib[2])->astats->bstats[mib[4]].curslabs, size_t)
 
 static const ctl_named_node_t *
-stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
-    size_t j) {
+stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib,
+    size_t miblen, size_t j) {
 	if (j > NBINS) {
 		return NULL;
 	}
@@ -2855,8 +2868,8 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_curlextents,
     arenas_i(mib[2])->astats->lstats[mib[4]].curlextents, size_t)
 
 static const ctl_named_node_t *
-stats_arenas_i_lextents_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
-    size_t j) {
+stats_arenas_i_lextents_j_index(tsdn_t *tsdn, const size_t *mib,
+    size_t miblen, size_t j) {
 	if (j > NSIZES - NBINS) {
 		return NULL;
 	}
@@ -2864,7 +2877,8 @@ stats_arenas_i_lextents_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
 }
 
 static const ctl_named_node_t *
-stats_arenas_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
+stats_arenas_i_index(tsdn_t *tsdn, const size_t *mib,
+    size_t miblen, size_t i) {
 	const ctl_named_node_t *ret;
 	size_t a;
 
diff --git a/src/extent.c b/src/extent.c
index 09d6d771..4b1a6dfd 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -119,9 +119,13 @@ static void extent_record(tsdn_t *tsdn, arena_t *arena,
 
 /******************************************************************************/
 
-ph_gen(UNUSED, extent_avail_, extent_tree_t, extent_t, ph_link,
+#define ATTR_NONE /* does nothing */
+
+ph_gen(ATTR_NONE, extent_avail_, extent_tree_t, extent_t, ph_link,
     extent_esnead_comp)
 
+#undef ATTR_NONE
+
 typedef enum {
 	lock_result_success,
 	lock_result_failure,
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 28d1344c..82c08877 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -970,6 +970,14 @@ malloc_conf_init(void) {
 				}					\
 				continue;				\
 			}
+      /*
+       * One of the CONF_MIN macros below expands, in one of the use points,
+       * to "unsigned integer < 0", which is always false, triggering the
+       * GCC -Wtype-limits warning, which we disable here and re-enable below.
+       */
+      JEMALLOC_DIAGNOSTIC_PUSH
+      JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
+
 #define CONF_MIN_no(um, min)	false
 #define CONF_MIN_yes(um, min)	((um) < (min))
 #define CONF_MAX_no(um, max)	false
@@ -1246,6 +1254,8 @@ malloc_conf_init(void) {
 #undef CONF_HANDLE_SIZE_T
 #undef CONF_HANDLE_SSIZE_T
 #undef CONF_HANDLE_CHAR_P
+    /* Re-enable diagnostic "-Wtype-limits" */
+    JEMALLOC_DIAGNOSTIC_POP
 		}
 		if (opt_abort_conf && had_conf_error) {
 			malloc_abort_invalid_conf();
@@ -2992,7 +3002,7 @@ label_not_resized:
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
 JEMALLOC_ATTR(pure)
-je_sallocx(const void *ptr, UNUSED int flags) {
+je_sallocx(const void *ptr, int flags) {
 	size_t usize;
 	tsdn_t *tsdn;
 
diff --git a/src/mutex.c b/src/mutex.c
index 30222b3e..55e37ad4 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -46,7 +46,7 @@ JEMALLOC_EXPORT int	_pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
 void
 malloc_mutex_lock_slow(malloc_mutex_t *mutex) {
 	mutex_prof_data_t *data = &mutex->prof_data;
-	UNUSED nstime_t before = NSTIME_ZERO_INITIALIZER;
+	nstime_t before = NSTIME_ZERO_INITIALIZER;
 
 	if (ncpus == 1) {
 		goto label_spin_done;
diff --git a/src/rtree.c b/src/rtree.c
index 53702cf7..4ae41fe2 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -39,7 +39,7 @@ rtree_node_dalloc_impl(tsdn_t *tsdn, rtree_t *rtree, rtree_node_elm_t *node) {
 	/* Nodes are never deleted during normal operation. */
 	not_reached();
 }
-UNUSED rtree_node_dalloc_t *JET_MUTABLE rtree_node_dalloc =
+rtree_node_dalloc_t *JET_MUTABLE rtree_node_dalloc =
     rtree_node_dalloc_impl;
 
 static rtree_leaf_elm_t *
@@ -54,7 +54,7 @@ rtree_leaf_dalloc_impl(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *leaf) {
 	/* Leaves are never deleted during normal operation. */
 	not_reached();
 }
-UNUSED rtree_leaf_dalloc_t *JET_MUTABLE rtree_leaf_dalloc =
+rtree_leaf_dalloc_t *JET_MUTABLE rtree_leaf_dalloc =
     rtree_leaf_dalloc_impl;
 
 #ifdef JEMALLOC_JET
diff --git a/src/tcache.c b/src/tcache.c
index af757540..d624d924 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -206,7 +206,7 @@ tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
 		/* Lock the arena associated with the first object. */
 		extent_t *extent = item_extent[0];
 		arena_t *locked_arena = extent_arena_get(extent);
-		UNUSED bool idump;
+		bool idump;
 
 		if (config_prof) {
 			idump = false;
diff --git a/src/tsd.c b/src/tsd.c
index 4eceee79..f2b601dd 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -12,6 +12,10 @@
 static unsigned ncleanups;
 static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX];
 
+/* TSD_INITIALIZER triggers "-Wmissing-field-initializer" */
+JEMALLOC_DIAGNOSTIC_PUSH
+JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
+
 #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
 __thread tsd_t JEMALLOC_TLS_MODEL tsd_tls = TSD_INITIALIZER;
 __thread bool JEMALLOC_TLS_MODEL tsd_initialized = false;
@@ -41,6 +45,7 @@ tsd_init_head_t	tsd_init_head = {
 	ql_head_initializer(blocks),
 	MALLOC_MUTEX_INITIALIZER
 };
+
 tsd_wrapper_t tsd_boot_wrapper = {
 	false,
 	TSD_INITIALIZER
@@ -48,6 +53,7 @@ tsd_wrapper_t tsd_boot_wrapper = {
 bool tsd_booted = false;
 #endif
 
+JEMALLOC_DIAGNOSTIC_POP
 
 /******************************************************************************/
 
diff --git a/test/integration/aligned_alloc.c b/test/integration/aligned_alloc.c
index 536b67ea..cfe1df9d 100644
--- a/test/integration/aligned_alloc.c
+++ b/test/integration/aligned_alloc.c
@@ -34,6 +34,17 @@ TEST_BEGIN(test_alignment_errors) {
 }
 TEST_END
 
+
+/*
+ * GCC "-Walloc-size-larger-than" warning detects when one of the memory
+ * allocation functions is called with a size larger than the maximum size that
+ * they support. Here we want to explicitly test that the allocation functions
+ * do indeed fail properly when this is the case, which triggers the warning.
+ * Therefore we disable the warning for these tests.
+ */
+JEMALLOC_DIAGNOSTIC_PUSH
+JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
+
 TEST_BEGIN(test_oom_errors) {
 	size_t alignment, size;
 	void *p;
@@ -78,6 +89,9 @@ TEST_BEGIN(test_oom_errors) {
 }
 TEST_END
 
+/* Re-enable the "-Walloc-size-larger-than=" warning */
+JEMALLOC_DIAGNOSTIC_POP
+
 TEST_BEGIN(test_alignment_and_size) {
 #define NITER 4
 	size_t alignment, size, total;
diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index 9fe3ad5d..ce5069a7 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -51,6 +51,16 @@ purge(void) {
 	    "Unexpected mallctl error");
 }
 
+/*
+ * GCC "-Walloc-size-larger-than" warning detects when one of the memory
+ * allocation functions is called with a size larger than the maximum size that
+ * they support. Here we want to explicitly test that the allocation functions
+ * do indeed fail properly when this is the case, which triggers the warning.
+ * Therefore we disable the warning for these tests.
+ */
+JEMALLOC_DIAGNOSTIC_PUSH
+JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
+
 TEST_BEGIN(test_overflow) {
 	size_t largemax;
 
@@ -145,6 +155,9 @@ TEST_BEGIN(test_oom) {
 }
 TEST_END
 
+/* Re-enable the "-Walloc-size-larger-than=" warning */
+JEMALLOC_DIAGNOSTIC_POP
+
 TEST_BEGIN(test_basic) {
 #define MAXSZ (((size_t)1) << 23)
 	size_t sz;
diff --git a/test/integration/overflow.c b/test/integration/overflow.c
index 6a9785b2..748ebb67 100644
--- a/test/integration/overflow.c
+++ b/test/integration/overflow.c
@@ -1,5 +1,15 @@
 #include "test/jemalloc_test.h"
 
+/*
+ * GCC "-Walloc-size-larger-than" warning detects when one of the memory
+ * allocation functions is called with a size larger than the maximum size that
+ * they support. Here we want to explicitly test that the allocation functions
+ * do indeed fail properly when this is the case, which triggers the warning.
+ * Therefore we disable the warning for these tests.
+ */
+JEMALLOC_DIAGNOSTIC_PUSH
+JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
+
 TEST_BEGIN(test_overflow) {
 	unsigned nlextents;
 	size_t mib[4];
@@ -39,6 +49,9 @@ TEST_BEGIN(test_overflow) {
 }
 TEST_END
 
+/* Re-enable the "-Walloc-size-larger-than=" warning */
+JEMALLOC_DIAGNOSTIC_POP
+
 int
 main(void) {
 	return test(
diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c
index 7821ca5f..08ed08d3 100644
--- a/test/integration/rallocx.c
+++ b/test/integration/rallocx.c
@@ -208,6 +208,16 @@ TEST_BEGIN(test_lg_align_and_zero) {
 }
 TEST_END
 
+/*
+ * GCC "-Walloc-size-larger-than" warning detects when one of the memory
+ * allocation functions is called with a size larger than the maximum size that
+ * they support. Here we want to explicitly test that the allocation functions
+ * do indeed fail properly when this is the case, which triggers the warning.
+ * Therefore we disable the warning for these tests.
+ */
+JEMALLOC_DIAGNOSTIC_PUSH
+JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
+
 TEST_BEGIN(test_overflow) {
 	size_t largemax;
 	void *p;
@@ -234,6 +244,9 @@ TEST_BEGIN(test_overflow) {
 }
 TEST_END
 
+/* Re-enable the "-Walloc-size-larger-than=" warning */
+JEMALLOC_DIAGNOSTIC_POP
+
 int
 main(void) {
 	return test(
diff --git a/test/unit/emitter.c b/test/unit/emitter.c
index 535c7cf1..6ffd1c3a 100644
--- a/test/unit/emitter.c
+++ b/test/unit/emitter.c
@@ -347,11 +347,11 @@ static void
 emit_table_row(emitter_t *emitter) {
 	emitter_begin(emitter);
 	emitter_row_t row;
-	emitter_col_t abc = {emitter_justify_left, 10, emitter_type_title};
+	emitter_col_t abc = {emitter_justify_left, 10, emitter_type_title, {0}, {0, 0}};
 	abc.str_val = "ABC title";
-	emitter_col_t def = {emitter_justify_right, 15, emitter_type_title};
+	emitter_col_t def = {emitter_justify_right, 15, emitter_type_title, {0}, {0, 0}};
 	def.str_val = "DEF title";
-	emitter_col_t ghi = {emitter_justify_right, 5, emitter_type_title};
+	emitter_col_t ghi = {emitter_justify_right, 5, emitter_type_title, {0}, {0, 0}};
 	ghi.str_val = "GHI";
 
 	emitter_row_init(&row);

From fb924dd7bf5e765ffcb273b6b88a515fea54fea8 Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Tue, 10 Jul 2018 14:48:18 +0200
Subject: [PATCH 1154/2608] Suppress -Wmissing-field-initializer warning only
 for compilers with buggy implementation

---
 .../jemalloc/internal/jemalloc_internal_macros.h  | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h
index a1a761b8..ec8782e6 100644
--- a/include/jemalloc/internal/jemalloc_internal_macros.h
+++ b/include/jemalloc/internal/jemalloc_internal_macros.h
@@ -66,8 +66,19 @@
 #  define JEMALLOC_DIAGNOSTIC_PUSH JEMALLOC_PRAGMA__(GCC diagnostic push)
 #  define JEMALLOC_DIAGNOSTIC_POP JEMALLOC_PRAGMA__(GCC diagnostic pop)
 #  define JEMALLOC_DIAGNOSTIC_IGNORE(W) JEMALLOC_PRAGMA__(GCC diagnostic ignored W)
-#  define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS \
-     JEMALLOC_DIAGNOSTIC_IGNORE("-Wmissing-field-initializers")
+
+/*
+ * The -Wmissing-field-initializers warning is buggy in GCC versions < 5.1 and
+ * all clang versions up to version 7 (currently trunk, unreleased).
+ * This macro suppresses the warning for the affected compiler versions only.
+ */
+#  if ((defined(__GNUC__) && !defined(__clang__)) && (__GNUC__ < 5)) || defined(__clang__)
+#    define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS  \
+          JEMALLOC_DIAGNOSTIC_IGNORE("-Wmissing-field-initializers")
+#  else
+#    define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
+#  endif
+
 #  define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS  \
      JEMALLOC_DIAGNOSTIC_IGNORE("-Wtype-limits")
 #  define JEMALLOC_DIAGNOSTIC_IGNORE_UNUSED_PARAMETER \

From e904f813b40b4286e10172163c880fd9e1d0608a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 14 Dec 2017 12:46:39 -0800
Subject: [PATCH 1155/2608] Hide size class computation behind a layer of
 indirection.

This class removes almost all the dependencies on size_classes.h, accessing the
data there only via the new module sc.h, which does not depend on any
configuration options.

In a subsequent commit, we'll remove the configure-time size class computations,
doing them at boot time, instead.
---
 Makefile.in                                   |   1 +
 include/jemalloc/internal/arena_externs.h     |   1 -
 include/jemalloc/internal/arena_inlines_b.h   |  28 +-
 include/jemalloc/internal/arena_stats.h       |   6 +-
 include/jemalloc/internal/arena_structs_b.h   |   4 +-
 include/jemalloc/internal/arena_types.h       |   4 +-
 include/jemalloc/internal/base_structs.h      |   4 +-
 include/jemalloc/internal/bin.h               |   8 +-
 include/jemalloc/internal/bit_util.h          |  68 ++++
 include/jemalloc/internal/bitmap.h            |   6 +-
 include/jemalloc/internal/ctl.h               |   6 +-
 include/jemalloc/internal/extent_inlines.h    |   9 +-
 include/jemalloc/internal/extent_structs.h    |   9 +-
 include/jemalloc/internal/extent_types.h      |   2 -
 .../internal/jemalloc_internal_externs.h      |   1 -
 .../internal/jemalloc_internal_inlines_a.h    |   8 +-
 .../internal/jemalloc_internal_inlines_c.h    |   2 +-
 include/jemalloc/internal/prof_inlines_a.h    |   8 +-
 include/jemalloc/internal/rtree.h             |  14 +-
 include/jemalloc/internal/sc.h                | 302 ++++++++++++++++++
 include/jemalloc/internal/sz.h                | 165 +++++-----
 include/jemalloc/internal/tcache_externs.h    |   4 +-
 include/jemalloc/internal/tcache_inlines.h    |  12 +-
 include/jemalloc/internal/tcache_structs.h    |  10 +-
 include/jemalloc/internal/tcache_types.h      |   4 +-
 src/arena.c                                   | 108 +++----
 src/base.c                                    |   8 +-
 src/bin.c                                     |  39 ++-
 src/ckh.c                                     |   7 +-
 src/ctl.c                                     |  28 +-
 src/extent.c                                  |  51 +--
 src/extent_dss.c                              |   4 +-
 src/jemalloc.c                                |  78 +++--
 src/large.c                                   |  20 +-
 src/sc.c                                      |  62 ++++
 src/sz.c                                      | 152 +++------
 src/tcache.c                                  |  28 +-
 test/unit/arena_reset.c                       |   2 +-
 test/unit/junk.c                              |   5 +-
 test/unit/mallctl.c                           |   9 +-
 test/unit/prof_gdump.c                        |   8 +-
 test/unit/rtree.c                             |  17 +-
 test/unit/size_classes.c                      |  15 +-
 test/unit/slab.c                              |   2 +-
 test/unit/stats.c                             |  11 +-
 test/unit/zero.c                              |   5 +-
 46 files changed, 886 insertions(+), 459 deletions(-)
 create mode 100644 include/jemalloc/internal/sc.h
 create mode 100644 src/sc.c

diff --git a/Makefile.in b/Makefile.in
index 81f899fe..619aae70 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -114,6 +114,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/prof.c \
 	$(srcroot)src/rtree.c \
 	$(srcroot)src/stats.c \
+	$(srcroot)src/sc.c \
 	$(srcroot)src/sz.c \
 	$(srcroot)src/tcache.c \
 	$(srcroot)src/test_hooks.c \
diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index c145c91e..7a469462 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -5,7 +5,6 @@
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/hook.h"
 #include "jemalloc/internal/pages.h"
-#include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/stats.h"
 
 extern ssize_t opt_dirty_decay_ms;
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index d388cae9..89603966 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -4,7 +4,7 @@
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/rtree.h"
-#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/sz.h"
 #include "jemalloc/internal/ticker.h"
 
@@ -111,7 +111,7 @@ arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
 	assert(size != 0);
 
 	if (likely(tcache != NULL)) {
-		if (likely(size <= SMALL_MAXCLASS)) {
+		if (likely(size <= sc_data_global.small_maxclass)) {
 			return tcache_alloc_small(tsdn_tsd(tsdn), arena,
 			    tcache, size, ind, zero, slow_path);
 		}
@@ -140,7 +140,7 @@ arena_salloc(tsdn_t *tsdn, const void *ptr) {
 
 	szind_t szind = rtree_szind_read(tsdn, &extents_rtree, rtree_ctx,
 	    (uintptr_t)ptr, true);
-	assert(szind != NSIZES);
+	assert(szind != SC_NSIZES);
 
 	return sz_index2size(szind);
 }
@@ -173,7 +173,7 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
 	/* Only slab members should be looked up via interior pointers. */
 	assert(extent_addr_get(extent) == ptr || extent_slab_get(extent));
 
-	assert(szind != NSIZES);
+	assert(szind != SC_NSIZES);
 
 	return sz_index2size(szind);
 }
@@ -194,7 +194,7 @@ arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
 		extent_t *extent = rtree_extent_read(tsdn, &extents_rtree,
 		    rtree_ctx, (uintptr_t)ptr, true);
 		assert(szind == extent_szind_get(extent));
-		assert(szind < NSIZES);
+		assert(szind < SC_NSIZES);
 		assert(slab == extent_slab_get(extent));
 	}
 
@@ -224,7 +224,7 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 	if (alloc_ctx != NULL) {
 		szind = alloc_ctx->szind;
 		slab = alloc_ctx->slab;
-		assert(szind != NSIZES);
+		assert(szind != SC_NSIZES);
 	} else {
 		rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn));
 		rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx,
@@ -236,7 +236,7 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 		extent_t *extent = rtree_extent_read(tsdn, &extents_rtree,
 		    rtree_ctx, (uintptr_t)ptr, true);
 		assert(szind == extent_szind_get(extent));
-		assert(szind < NSIZES);
+		assert(szind < SC_NSIZES);
 		assert(slab == extent_slab_get(extent));
 	}
 
@@ -246,7 +246,7 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 		    slow_path);
 	} else {
 		if (szind < nhbins) {
-			if (config_prof && unlikely(szind < NBINS)) {
+			if (config_prof && unlikely(szind < SC_NBINS)) {
 				arena_dalloc_promoted(tsdn, ptr, tcache,
 				    slow_path);
 			} else {
@@ -263,7 +263,7 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 static inline void
 arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
 	assert(ptr != NULL);
-	assert(size <= LARGE_MAXCLASS);
+	assert(size <= sc_data_global.large_maxclass);
 
 	szind_t szind;
 	bool slab;
@@ -273,7 +273,7 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
 		 * object, so base szind and slab on the given size.
 		 */
 		szind = sz_size2index(size);
-		slab = (szind < NBINS);
+		slab = (szind < SC_NBINS);
 	}
 
 	if ((config_prof && opt_prof) || config_debug) {
@@ -285,7 +285,7 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
 		    (uintptr_t)ptr, true, &szind, &slab);
 
 		assert(szind == sz_size2index(size));
-		assert((config_prof && opt_prof) || slab == (szind < NBINS));
+		assert((config_prof && opt_prof) || slab == (szind < SC_NBINS));
 
 		if (config_debug) {
 			extent_t *extent = rtree_extent_read(tsdn,
@@ -309,7 +309,7 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
     alloc_ctx_t *alloc_ctx, bool slow_path) {
 	assert(!tsdn_null(tsdn) || tcache == NULL);
 	assert(ptr != NULL);
-	assert(size <= LARGE_MAXCLASS);
+	assert(size <= sc_data_global.large_maxclass);
 
 	if (unlikely(tcache == NULL)) {
 		arena_sdalloc_no_tcache(tsdn, ptr, size);
@@ -339,7 +339,7 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 		 * object, so base szind and slab on the given size.
 		 */
 		szind = sz_size2index(size);
-		slab = (szind < NBINS);
+		slab = (szind < SC_NBINS);
 	}
 
 	if (config_debug) {
@@ -358,7 +358,7 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 		    slow_path);
 	} else {
 		if (szind < nhbins) {
-			if (config_prof && unlikely(szind < NBINS)) {
+			if (config_prof && unlikely(szind < SC_NBINS)) {
 				arena_dalloc_promoted(tsdn, ptr, tcache,
 				    slow_path);
 			} else {
diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 39b7262a..6dacf74f 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -4,7 +4,7 @@
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/mutex_prof.h"
-#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/sc.h"
 
 JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
 
@@ -90,7 +90,7 @@ struct arena_stats_s {
 	mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes];
 
 	/* One element for each large size class. */
-	arena_stats_large_t	lstats[NSIZES - NBINS];
+	arena_stats_large_t	lstats[SC_NSIZES - SC_NBINS];
 
 	/* Arena uptime. */
 	nstime_t		uptime;
@@ -225,7 +225,7 @@ arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
     szind_t szind, uint64_t nrequests) {
 	arena_stats_lock(tsdn, arena_stats);
 	arena_stats_add_u64(tsdn, arena_stats, &arena_stats->lstats[szind -
-	    NBINS].nrequests, nrequests);
+	    SC_NBINS].nrequests, nrequests);
 	arena_stats_unlock(tsdn, arena_stats);
 }
 
diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index 38bc9596..96f25f8a 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -10,7 +10,7 @@
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/ql.h"
-#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/smoothstep.h"
 #include "jemalloc/internal/ticker.h"
 
@@ -203,7 +203,7 @@ struct arena_s {
 	 *
 	 * Synchronization: internal.
 	 */
-	bin_t			bins[NBINS];
+	bin_t			bins[SC_NBINS];
 
 	/*
 	 * Base allocator, from which arena metadata are allocated.
diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h
index 759713c9..c40ae6fd 100644
--- a/include/jemalloc/internal/arena_types.h
+++ b/include/jemalloc/internal/arena_types.h
@@ -1,8 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_TYPES_H
 #define JEMALLOC_INTERNAL_ARENA_TYPES_H
 
+#include "jemalloc/internal/sc.h"
+
 /* Maximum number of regions in one slab. */
-#define LG_SLAB_MAXREGS		(LG_PAGE - LG_TINY_MIN)
+#define LG_SLAB_MAXREGS		(LG_PAGE - SC_LG_TINY_MIN)
 #define SLAB_MAXREGS		(1U << LG_SLAB_MAXREGS)
 
 /* Default decay times in milliseconds. */
diff --git a/include/jemalloc/internal/base_structs.h b/include/jemalloc/internal/base_structs.h
index 2102247a..07f214eb 100644
--- a/include/jemalloc/internal/base_structs.h
+++ b/include/jemalloc/internal/base_structs.h
@@ -3,7 +3,7 @@
 
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/sc.h"
 
 /* Embedded at the beginning of every block of base-managed virtual memory. */
 struct base_block_s {
@@ -46,7 +46,7 @@ struct base_s {
 	base_block_t	*blocks;
 
 	/* Heap of extents that track unused trailing space within blocks. */
-	extent_heap_t	avail[NSIZES];
+	extent_heap_t	avail[SC_NSIZES];
 
 	/* Stats, only maintained if config_stats. */
 	size_t		allocated;
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index 9b416ada..e04b6c67 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -1,10 +1,11 @@
 #ifndef JEMALLOC_INTERNAL_BIN_H
 #define JEMALLOC_INTERNAL_BIN_H
 
+#include "jemalloc/internal/bin_stats.h"
 #include "jemalloc/internal/extent_types.h"
 #include "jemalloc/internal/extent_structs.h"
 #include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/bin_stats.h"
+#include "jemalloc/internal/sc.h"
 
 /*
  * A bin contains a set of extents that are currently being used for slab
@@ -48,7 +49,7 @@ struct bin_info_s {
 	bitmap_info_t		bitmap_info;
 };
 
-extern const bin_info_t bin_infos[NBINS];
+extern bin_info_t bin_infos[SC_NBINS];
 
 
 typedef struct bin_s bin_t;
@@ -78,6 +79,9 @@ struct bin_s {
 	bin_stats_t	stats;
 };
 
+void bin_infos_init(sc_data_t *sc_data, bin_info_t bin_infos[SC_NBINS]);
+void bin_boot();
+
 /* Initializes a bin to empty.  Returns true on error. */
 bool bin_init(bin_t *bin);
 
diff --git a/include/jemalloc/internal/bit_util.h b/include/jemalloc/internal/bit_util.h
index 8d078a8a..435b497f 100644
--- a/include/jemalloc/internal/bit_util.h
+++ b/include/jemalloc/internal/bit_util.h
@@ -162,4 +162,72 @@ lg_floor(size_t x) {
 
 #undef BIT_UTIL_INLINE
 
+/* A compile-time version of lg_ceil */
+#define LG_CEIL(x) (							\
+    (x) <= (1ULL << 0ULL) ? 0 :						\
+    (x) <= (1ULL << 1ULL) ? 1 :						\
+    (x) <= (1ULL << 2ULL) ? 2 :						\
+    (x) <= (1ULL << 3ULL) ? 3 :						\
+    (x) <= (1ULL << 4ULL) ? 4 :						\
+    (x) <= (1ULL << 5ULL) ? 5 :						\
+    (x) <= (1ULL << 6ULL) ? 6 :						\
+    (x) <= (1ULL << 7ULL) ? 7 :						\
+    (x) <= (1ULL << 8ULL) ? 8 :						\
+    (x) <= (1ULL << 9ULL) ? 9 :						\
+    (x) <= (1ULL << 10ULL) ? 10 :					\
+    (x) <= (1ULL << 11ULL) ? 11 :					\
+    (x) <= (1ULL << 12ULL) ? 12 :					\
+    (x) <= (1ULL << 13ULL) ? 13 :					\
+    (x) <= (1ULL << 14ULL) ? 14 :					\
+    (x) <= (1ULL << 15ULL) ? 15 :					\
+    (x) <= (1ULL << 16ULL) ? 16 :					\
+    (x) <= (1ULL << 17ULL) ? 17 :					\
+    (x) <= (1ULL << 18ULL) ? 18 :					\
+    (x) <= (1ULL << 19ULL) ? 19 :					\
+    (x) <= (1ULL << 20ULL) ? 20 :					\
+    (x) <= (1ULL << 21ULL) ? 21 :					\
+    (x) <= (1ULL << 22ULL) ? 22 :					\
+    (x) <= (1ULL << 23ULL) ? 23 :					\
+    (x) <= (1ULL << 24ULL) ? 24 :					\
+    (x) <= (1ULL << 25ULL) ? 25 :					\
+    (x) <= (1ULL << 26ULL) ? 26 :					\
+    (x) <= (1ULL << 27ULL) ? 27 :					\
+    (x) <= (1ULL << 28ULL) ? 28 :					\
+    (x) <= (1ULL << 29ULL) ? 29 :					\
+    (x) <= (1ULL << 30ULL) ? 30 :					\
+    (x) <= (1ULL << 31ULL) ? 31 :					\
+    (x) <= (1ULL << 32ULL) ? 32 :					\
+    (x) <= (1ULL << 33ULL) ? 33 :					\
+    (x) <= (1ULL << 34ULL) ? 34 :					\
+    (x) <= (1ULL << 35ULL) ? 35 :					\
+    (x) <= (1ULL << 36ULL) ? 36 :					\
+    (x) <= (1ULL << 37ULL) ? 37 :					\
+    (x) <= (1ULL << 38ULL) ? 38 :					\
+    (x) <= (1ULL << 39ULL) ? 39 :					\
+    (x) <= (1ULL << 40ULL) ? 40 :					\
+    (x) <= (1ULL << 41ULL) ? 41 :					\
+    (x) <= (1ULL << 42ULL) ? 42 :					\
+    (x) <= (1ULL << 43ULL) ? 43 :					\
+    (x) <= (1ULL << 44ULL) ? 44 :					\
+    (x) <= (1ULL << 45ULL) ? 45 :					\
+    (x) <= (1ULL << 46ULL) ? 46 :					\
+    (x) <= (1ULL << 47ULL) ? 47 :					\
+    (x) <= (1ULL << 48ULL) ? 48 :					\
+    (x) <= (1ULL << 49ULL) ? 49 :					\
+    (x) <= (1ULL << 50ULL) ? 50 :					\
+    (x) <= (1ULL << 51ULL) ? 51 :					\
+    (x) <= (1ULL << 52ULL) ? 52 :					\
+    (x) <= (1ULL << 53ULL) ? 53 :					\
+    (x) <= (1ULL << 54ULL) ? 54 :					\
+    (x) <= (1ULL << 55ULL) ? 55 :					\
+    (x) <= (1ULL << 56ULL) ? 56 :					\
+    (x) <= (1ULL << 57ULL) ? 57 :					\
+    (x) <= (1ULL << 58ULL) ? 58 :					\
+    (x) <= (1ULL << 59ULL) ? 59 :					\
+    (x) <= (1ULL << 60ULL) ? 60 :					\
+    (x) <= (1ULL << 61ULL) ? 61 :					\
+    (x) <= (1ULL << 62ULL) ? 62 :					\
+    (x) <= (1ULL << 63ULL) ? 63 :					\
+    64)
+
 #endif /* JEMALLOC_INTERNAL_BIT_UTIL_H */
diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h
index ac990290..c3f9cb49 100644
--- a/include/jemalloc/internal/bitmap.h
+++ b/include/jemalloc/internal/bitmap.h
@@ -3,18 +3,18 @@
 
 #include "jemalloc/internal/arena_types.h"
 #include "jemalloc/internal/bit_util.h"
-#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/sc.h"
 
 typedef unsigned long bitmap_t;
 #define LG_SIZEOF_BITMAP	LG_SIZEOF_LONG
 
 /* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */
-#if LG_SLAB_MAXREGS > LG_CEIL_NSIZES
+#if LG_SLAB_MAXREGS > LG_CEIL(SC_NSIZES)
 /* Maximum bitmap bit count is determined by maximum regions per slab. */
 #  define LG_BITMAP_MAXBITS	LG_SLAB_MAXREGS
 #else
 /* Maximum bitmap bit count is determined by number of extent size classes. */
-#  define LG_BITMAP_MAXBITS	LG_CEIL_NSIZES
+#  define LG_BITMAP_MAXBITS	LG_CEIL(SC_NSIZES)
 #endif
 #define BITMAP_MAXBITS		(ZU(1) << LG_BITMAP_MAXBITS)
 
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index d927d948..5576310c 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -5,7 +5,7 @@
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/mutex_prof.h"
 #include "jemalloc/internal/ql.h"
-#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/stats.h"
 
 /* Maximum ctl tree depth. */
@@ -40,8 +40,8 @@ typedef struct ctl_arena_stats_s {
 	uint64_t ndalloc_small;
 	uint64_t nrequests_small;
 
-	bin_stats_t bstats[NBINS];
-	arena_stats_large_t lstats[NSIZES - NBINS];
+	bin_stats_t bstats[SC_NBINS];
+	arena_stats_large_t lstats[SC_NSIZES - SC_NBINS];
 } ctl_arena_stats_t;
 
 typedef struct ctl_stats_s {
diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index 9b8ddc27..a43d00db 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -6,6 +6,7 @@
 #include "jemalloc/internal/pages.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/sz.h"
 
 static inline void
@@ -53,14 +54,14 @@ static inline szind_t
 extent_szind_get_maybe_invalid(const extent_t *extent) {
 	szind_t szind = (szind_t)((extent->e_bits & EXTENT_BITS_SZIND_MASK) >>
 	    EXTENT_BITS_SZIND_SHIFT);
-	assert(szind <= NSIZES);
+	assert(szind <= SC_NSIZES);
 	return szind;
 }
 
 static inline szind_t
 extent_szind_get(const extent_t *extent) {
 	szind_t szind = extent_szind_get_maybe_invalid(extent);
-	assert(szind < NSIZES); /* Never call when "invalid". */
+	assert(szind < SC_NSIZES); /* Never call when "invalid". */
 	return szind;
 }
 
@@ -234,7 +235,7 @@ extent_bsize_set(extent_t *extent, size_t bsize) {
 
 static inline void
 extent_szind_set(extent_t *extent, szind_t szind) {
-	assert(szind <= NSIZES); /* NSIZES means "invalid". */
+	assert(szind <= SC_NSIZES); /* SC_NSIZES means "invalid". */
 	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SZIND_MASK) |
 	    ((uint64_t)szind << EXTENT_BITS_SZIND_SHIFT);
 }
@@ -327,7 +328,7 @@ extent_binit(extent_t *extent, void *addr, size_t bsize, size_t sn) {
 	extent_addr_set(extent, addr);
 	extent_bsize_set(extent, bsize);
 	extent_slab_set(extent, false);
-	extent_szind_set(extent, NSIZES);
+	extent_szind_set(extent, SC_NSIZES);
 	extent_sn_set(extent, sn);
 	extent_state_set(extent, extent_state_active);
 	extent_zeroed_set(extent, true);
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index 4873b9e9..c6c1e234 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -2,11 +2,12 @@
 #define JEMALLOC_INTERNAL_EXTENT_STRUCTS_H
 
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/bit_util.h"
 #include "jemalloc/internal/bitmap.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/ph.h"
-#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/sc.h"
 
 typedef enum {
 	extent_state_active   = 0,
@@ -112,7 +113,7 @@ struct extent_s {
 #define EXTENT_BITS_STATE_SHIFT  (EXTENT_BITS_ZEROED_WIDTH + EXTENT_BITS_ZEROED_SHIFT)
 #define EXTENT_BITS_STATE_MASK  MASK(EXTENT_BITS_STATE_WIDTH, EXTENT_BITS_STATE_SHIFT)
 
-#define EXTENT_BITS_SZIND_WIDTH  LG_CEIL_NSIZES
+#define EXTENT_BITS_SZIND_WIDTH  LG_CEIL(SC_NSIZES)
 #define EXTENT_BITS_SZIND_SHIFT  (EXTENT_BITS_STATE_WIDTH + EXTENT_BITS_STATE_SHIFT)
 #define EXTENT_BITS_SZIND_MASK  MASK(EXTENT_BITS_SZIND_WIDTH, EXTENT_BITS_SZIND_SHIFT)
 
@@ -180,14 +181,14 @@ struct extents_s {
 	 *
 	 * Synchronization: mtx.
 	 */
-	extent_heap_t		heaps[NPSIZES+1];
+	extent_heap_t		heaps[SC_NPSIZES_MAX + 1];
 
 	/*
 	 * Bitmap for which set bits correspond to non-empty heaps.
 	 *
 	 * Synchronization: mtx.
 	 */
-	bitmap_t		bitmap[BITMAP_GROUPS(NPSIZES+1)];
+	bitmap_t		bitmap[BITMAP_GROUPS(SC_NPSIZES_MAX + 1)];
 
 	/*
 	 * LRU of all extents in heaps.
diff --git a/include/jemalloc/internal/extent_types.h b/include/jemalloc/internal/extent_types.h
index c0561d99..acbcf27b 100644
--- a/include/jemalloc/internal/extent_types.h
+++ b/include/jemalloc/internal/extent_types.h
@@ -6,8 +6,6 @@ typedef struct extents_s extents_t;
 
 #define EXTENT_HOOKS_INITIALIZER	NULL
 
-#define EXTENT_GROW_MAX_PIND (NPSIZES - 1)
-
 /*
  * When reuse (and split) an active extent, (1U << opt_lg_extent_max_active_fit)
  * is the max ratio between the size of the active extent and the new extent.
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 5beebc01..b7843623 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -2,7 +2,6 @@
 #define JEMALLOC_INTERNAL_EXTERNS_H
 
 #include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/tsd_types.h"
 
 /* TSD checks this to set thread local slow state accordingly. */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index 6577a4f2..8adc02a6 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -4,7 +4,7 @@
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/bit_util.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
-#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/ticker.h"
 
 JEMALLOC_ALWAYS_INLINE malloc_cpuid_t
@@ -108,14 +108,14 @@ decay_ticker_get(tsd_t *tsd, unsigned ind) {
 
 JEMALLOC_ALWAYS_INLINE cache_bin_t *
 tcache_small_bin_get(tcache_t *tcache, szind_t binind) {
-	assert(binind < NBINS);
+	assert(binind < SC_NBINS);
 	return &tcache->bins_small[binind];
 }
 
 JEMALLOC_ALWAYS_INLINE cache_bin_t *
 tcache_large_bin_get(tcache_t *tcache, szind_t binind) {
-	assert(binind >= NBINS &&binind < nhbins);
-	return &tcache->bins_large[binind - NBINS];
+	assert(binind >= SC_NBINS &&binind < nhbins);
+	return &tcache->bins_large[binind - SC_NBINS];
 }
 
 JEMALLOC_ALWAYS_INLINE bool
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 2b0d4f44..83ad10ff 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -142,7 +142,7 @@ iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 	size_t usize, copysize;
 
 	usize = sz_sa2u(size, alignment);
-	if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
+	if (unlikely(usize == 0 || usize > sc_data_global.large_maxclass)) {
 		return NULL;
 	}
 	p = ipalloct(tsdn, usize, alignment, zero, tcache, arena);
diff --git a/include/jemalloc/internal/prof_inlines_a.h b/include/jemalloc/internal/prof_inlines_a.h
index c39bc3d4..07bfd9f3 100644
--- a/include/jemalloc/internal/prof_inlines_a.h
+++ b/include/jemalloc/internal/prof_inlines_a.h
@@ -57,15 +57,15 @@ prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum,
 #ifdef JEMALLOC_ATOMIC_U64
 	a0 = atomic_load_u64(&prof_accum->accumbytes, ATOMIC_RELAXED);
 	do {
-		a1 = (a0 >= LARGE_MINCLASS - usize) ?  a0 - (LARGE_MINCLASS -
-		    usize) : 0;
+		a1 = (a0 >= sc_data_global.large_minclass - usize)
+		    ? a0 - (sc_data_global.large_minclass - usize) : 0;
 	} while (!atomic_compare_exchange_weak_u64(&prof_accum->accumbytes, &a0,
 	    a1, ATOMIC_RELAXED, ATOMIC_RELAXED));
 #else
 	malloc_mutex_lock(tsdn, &prof_accum->mtx);
 	a0 = prof_accum->accumbytes;
-	a1 = (a0 >= LARGE_MINCLASS - usize) ?  a0 - (LARGE_MINCLASS - usize) :
-	    0;
+	a1 = (a0 >= sc_data_global.large_minclass - usize)
+	    ?  a0 - (sc_data_global.large_minclass - usize) : 0;
 	prof_accum->accumbytes = a1;
 	malloc_mutex_unlock(tsdn, &prof_accum->mtx);
 #endif
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index dd452f16..8564965f 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -4,7 +4,7 @@
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/rtree_tsd.h"
-#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/tsd.h"
 
 /*
@@ -31,7 +31,7 @@
 #  error Unsupported number of significant virtual address bits
 #endif
 /* Use compact leaf representation if virtual address encoding allows. */
-#if RTREE_NHIB >= LG_CEIL_NSIZES
+#if RTREE_NHIB >= LG_CEIL(SC_NSIZES)
 #  define RTREE_LEAF_COMPACT
 #endif
 
@@ -261,7 +261,7 @@ rtree_leaf_elm_extent_write(tsdn_t *tsdn, rtree_t *rtree,
 static inline void
 rtree_leaf_elm_szind_write(tsdn_t *tsdn, rtree_t *rtree,
     rtree_leaf_elm_t *elm, szind_t szind) {
-	assert(szind <= NSIZES);
+	assert(szind <= SC_NSIZES);
 
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm,
@@ -313,7 +313,7 @@ rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree,
 static inline void
 rtree_leaf_elm_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree,
     rtree_leaf_elm_t *elm, szind_t szind, bool slab) {
-	assert(!slab || szind < NBINS);
+	assert(!slab || szind < SC_NBINS);
 
 	/*
 	 * The caller implicitly assures that it is the only writer to the szind
@@ -429,7 +429,7 @@ rtree_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key,
 	    dependent);
 	if (!dependent && elm == NULL) {
-		return NSIZES;
+		return SC_NSIZES;
 	}
 	return rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent);
 }
@@ -474,7 +474,7 @@ rtree_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 static inline void
 rtree_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key, szind_t szind, bool slab) {
-	assert(!slab || szind < NBINS);
+	assert(!slab || szind < SC_NBINS);
 
 	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, true);
 	rtree_leaf_elm_szind_slab_update(tsdn, rtree, elm, szind, slab);
@@ -486,7 +486,7 @@ rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, true);
 	assert(rtree_leaf_elm_extent_read(tsdn, rtree, elm, false) !=
 	    NULL);
-	rtree_leaf_elm_write(tsdn, rtree, elm, NULL, NSIZES, false);
+	rtree_leaf_elm_write(tsdn, rtree, elm, NULL, SC_NSIZES, false);
 }
 
 #endif /* JEMALLOC_INTERNAL_RTREE_H */
diff --git a/include/jemalloc/internal/sc.h b/include/jemalloc/internal/sc.h
new file mode 100644
index 00000000..df295bca
--- /dev/null
+++ b/include/jemalloc/internal/sc.h
@@ -0,0 +1,302 @@
+#ifndef JEMALLOC_INTERNAL_SC_H
+#define JEMALLOC_INTERNAL_SC_H
+
+#include "jemalloc/internal/jemalloc_internal_types.h"
+
+/*
+ * Size class computations:
+ *
+ * These are a little tricky; we'll first start by describing how things
+ * generally work, and then describe some of the details.
+ *
+ * Ignore the first few size classes for a moment. We can then split all the
+ * remaining size classes into groups. The size classes in a group are spaced
+ * such that they cover allocation request sizes in a power-of-2 range. The
+ * power of two is called the base of the group, and the size classes in it
+ * satisfy allocations in the half-open range (base, base * 2]. There are
+ * SC_NGROUP size classes in each group, equally spaced in the range, so that
+ * each one covers allocations for base / SC_NGROUP possible allocation sizes.
+ * We call that value (base / SC_NGROUP) the delta of the group. Each size class
+ * is delta larger than the one before it (including the initial size class in a
+ * group, which is delta large than 2**base, the largest size class in the
+ * previous group).
+ * To make the math all work out nicely, we require that SC_NGROUP is a power of
+ * two, and define it in terms of SC_LG_NGROUP. We'll often talk in terms of
+ * lg_base and lg_delta. For each of these groups then, we have that
+ * lg_delta == lg_base - SC_LG_NGROUP.
+ * The size classes in a group with a given lg_base and lg_delta (which, recall,
+ * can be computed from lg_base for these groups) are therefore:
+ *   base + 1 * delta
+ *     which covers allocations in (base, base + 1 * delta]
+ *   base + 2 * delta
+ *     which covers allocations in (base + 1 * delta, base + 2 * delta].
+ *   base + 3 * delta
+ *     which covers allocations in (base + 2 * delta, base + 3 * delta].
+ *   ...
+ *   base + SC_NGROUP * delta ( == 2 * base)
+ *     which covers allocations in (base + (SC_NGROUP - 1) * delta, 2 * base].
+ * (Note that currently SC_NGROUP is always 4, so the "..." is empty in
+ * practice.)
+ * Note that the last size class in the group is the next power of two (after
+ * base), so that we've set up the induction correctly for the next group's
+ * selection of delta.
+ *
+ * Now, let's start considering the first few size classes. Two extra constants
+ * come into play here: LG_QUANTUM and SC_LG_TINY_MIN. LG_QUANTUM ensures
+ * correct platform alignment; all objects of size (1 << LG_QUANTUM) or larger
+ * are at least (1 << LG_QUANTUM) aligned; this can be used to ensure that we
+ * never return improperly aligned memory, by making (1 << LG_QUANTUM) equal the
+ * highest required alignment of a platform. For allocation sizes smaller than
+ * (1 << LG_QUANTUM) though, we can be more relaxed (since we don't support
+ * platforms with types with alignment larger than their size). To allow such
+ * allocations (without wasting space unnecessarily), we introduce tiny size
+ * classes; one per power of two, up until we hit the quantum size. There are
+ * therefore LG_QUANTUM - SC_LG_TINY_MIN such size classes.
+ *
+ * Next, we have a size class of size LG_QUANTUM. This can't be the start of a
+ * group in the sense we described above (covering a power of two range) since,
+ * if we divided into it to pick a value of delta, we'd get a delta smaller than
+ * (1 << LG_QUANTUM) for sizes >= (1 << LG_QUANTUM), which is against the rules.
+ *
+ * The first base we can divide by SC_NGROUP while still being at least
+ * (1 << LG_QUANTUM) is SC_NGROUP * (1 << LG_QUANTUM). We can get there by
+ * having SC_NGROUP size classes, spaced (1 << LG_QUANTUM) apart. These size
+ * classes are:
+ *   1 * (1 << LG_QUANTUM)
+ *   2 * (1 << LG_QUANTUM)
+ *   3 * (1 << LG_QUANTUM)
+ *   ... (although, as above, this "..." is empty in practice)
+ *   SC_NGROUP * (1 << LG_QUANTUM).
+ *
+ * There are SC_NGROUP of these size classes, so we can regard it as a sort of
+ * pseudo-group, even though it spans multiple powers of 2, is divided
+ * differently, and both starts and ends on a power of 2 (as opposed to just
+ * ending). SC_NGROUP is itself a power of two, so the first group after the
+ * pseudo-group has the power-of-two base SC_NGROUP * (1 << LG_QUANTUM), for a
+ * lg_base of LG_QUANTUM + SC_LG_NGROUP. We can divide this base into SC_NGROUP
+ * sizes without violating our LG_QUANTUM requirements, so we can safely set
+ * lg_delta = lg_base - SC_LG_GROUP (== LG_QUANTUM).
+ *
+ * So, in order, the size classes are:
+ *
+ * Tiny size classes:
+ * - Count: LG_QUANTUM - SC_LG_TINY_MIN.
+ * - Sizes:
+ *     1 << SC_LG_TINY_MIN
+ *     1 << (SC_LG_TINY_MIN + 1)
+ *     1 << (SC_LG_TINY_MIN + 2)
+ *     ...
+ *     1 << (LG_QUANTUM - 1)
+ *
+ * Initial pseudo-group:
+ * - Count: SC_NGROUP
+ * - Sizes:
+ *     1 * (1 << LG_QUANTUM)
+ *     2 * (1 << LG_QUANTUM)
+ *     3 * (1 << LG_QUANTUM)
+ *     ...
+ *     SC_NGROUP * (1 << LG_QUANTUM)
+ *
+ * Regular group 0:
+ * - Count: SC_NGROUP
+ * - Sizes:
+ *   (relative to lg_base of LG_QUANTUM + SC_LG_NGROUP and lg_delta of
+ *   lg_base - SC_LG_NGROUP)
+ *     (1 << lg_base) + 1 * (1 << lg_delta)
+ *     (1 << lg_base) + 2 * (1 << lg_delta)
+ *     (1 << lg_base) + 3 * (1 << lg_delta)
+ *     ...
+ *     (1 << lg_base) + SC_NGROUP * (1 << lg_delta) [ == (1 << (lg_base + 1)) ]
+ *
+ * Regular group 1:
+ * - Count: SC_NGROUP
+ * - Sizes:
+ *   (relative to lg_base of LG_QUANTUM + SC_LG_NGROUP + 1 and lg_delta of
+ *   lg_base - SC_LG_NGROUP)
+ *     (1 << lg_base) + 1 * (1 << lg_delta)
+ *     (1 << lg_base) + 2 * (1 << lg_delta)
+ *     (1 << lg_base) + 3 * (1 << lg_delta)
+ *     ...
+ *     (1 << lg_base) + SC_NGROUP * (1 << lg_delta) [ == (1 << (lg_base + 1)) ]
+ *
+ * ...
+ *
+ * Regular group N:
+ * - Count: SC_NGROUP
+ * - Sizes:
+ *   (relative to lg_base of LG_QUANTUM + SC_LG_NGROUP + N and lg_delta of
+ *   lg_base - SC_LG_NGROUP)
+ *     (1 << lg_base) + 1 * (1 << lg_delta)
+ *     (1 << lg_base) + 2 * (1 << lg_delta)
+ *     (1 << lg_base) + 3 * (1 << lg_delta)
+ *     ...
+ *     (1 << lg_base) + SC_NGROUP * (1 << lg_delta) [ == (1 << (lg_base + 1)) ]
+ *
+ *
+ * Representation of metadata:
+ * To make the math easy, we'll mostly work in lg quantities. We record lg_base,
+ * lg_delta, and ndelta (i.e. number of deltas above the base) on a
+ * per-size-class basis, and maintain the invariant that, across all size
+ * classes, size == (1 << lg_base) + ndelta * (1 << lg_delta).
+ *
+ * For regular groups (i.e. those with lg_base >= LG_QUANTUM + SC_LG_NGROUP),
+ * lg_delta is lg_base - SC_LG_NGROUP, and ndelta goes from 1 to SC_NGROUP.
+ *
+ * For the initial tiny size classes (if any), lg_base is lg(size class size).
+ * lg_delta is lg_base for the first size class, and lg_base - 1 for all
+ * subsequent ones. ndelta is always 0.
+ *
+ * For the pseudo-group, if there are no tiny size classes, then we set
+ * lg_base == LG_QUANTUM, lg_delta == LG_QUANTUM, and have ndelta range from 0
+ * to SC_NGROUP - 1. (Note that delta == base, so base + (SC_NGROUP - 1) * delta
+ * is just SC_NGROUP * base, or (1 << (SC_LG_NGROUP + LG_QUANTUM)), so we do
+ * indeed get a power of two that way). If there *are* tiny size classes, then
+ * the first size class needs to have lg_delta relative to the largest tiny size
+ * class. We therefore set lg_base == LG_QUANTUM - 1,
+ * lg_delta == LG_QUANTUM - 1, and ndelta == 1, keeping the rest of the
+ * pseudo-group the same.
+ *
+ *
+ * Other terminology:
+ * "Small" size classes mean those that are allocated out of bins, which is the
+ * same as those that are slab allocated.
+ * "Large" size classes are those that are not small. The cutoff for counting as
+ * large is page size * group size.
+ */
+
+/*
+ * Size class N + (1 << SC_LG_NGROUP) twice the size of size class N.
+ */
+#define SC_LG_NGROUP 2
+#define SC_LG_TINY_MIN 3
+
+#if SC_LG_TINY_MIN == 0
+/* The div module doesn't support division by 1, which this would require. */
+#error "Unsupported LG_TINY_MIN"
+#endif
+
+/*
+ * The definitions below are all determined by the above settings and system
+ * characteristics.
+ */
+#define SC_NGROUP (1ULL << SC_LG_NGROUP)
+#define SC_PTR_BITS ((1ULL << LG_SIZEOF_PTR) * 8)
+#define SC_NTINY (LG_QUANTUM - SC_LG_TINY_MIN)
+#define SC_NPSEUDO SC_NGROUP
+#define SC_LG_FIRST_REGULAR_BASE (LG_QUANTUM + SC_LG_NGROUP)
+/*
+ * We cap allocations to be less than 2 ** (ptr_bits - 1), so the highest base
+ * we need is 2 ** (ptr_bits - 2). (This also means that the last group is 1
+ * size class shorter than the others).
+ * We could probably save some space in arenas by capping this at LG_VADDR size.
+ */
+#define SC_LG_BASE_MAX (SC_PTR_BITS - 2)
+#define SC_NREGULAR (SC_NGROUP * 					\
+    (SC_LG_BASE_MAX - SC_LG_FIRST_REGULAR_BASE + 1) - 1)
+#define SC_NSIZES (SC_NTINY + SC_NPSEUDO + SC_NREGULAR)
+
+/*
+ * The number of size classes that are at least a page in size. Note that
+ * because delta may be smaller than a page, this is not the same as the number
+ * of size classes that are *multiples* of the page size.
+ */
+#define SC_NPSIZES_MAX (						\
+    /* Start with all the size classes. */				\
+    SC_NSIZES								\
+    /* Subtract out those groups with too small a base. */		\
+    - (LG_PAGE - 1 - SC_LG_FIRST_REGULAR_BASE) * SC_NGROUP		\
+    /* And the pseudo-group. */						\
+    - SC_NPSEUDO							\
+    /* And the tiny group. */						\
+    - SC_NTINY								\
+    /*									\
+     * In the lg_base == lg_page - 1 group, only the last sc is big	\
+     * enough to make it to lg_page.					\
+     */									\
+    - (SC_NGROUP - 1))
+
+/*
+ * We declare a size class is binnable if size < page size * group. Or, in other
+ * words, lg(size) < lg(page size) + lg(group size).
+ */
+#define SC_NBINS (							\
+    /* Sub-regular size classes. */					\
+    SC_NTINY + SC_NPSEUDO						\
+    /* Groups with lg_regular_min_base <= lg_base <= lg_base_max */	\
+    + SC_NGROUP * (LG_PAGE + SC_LG_NGROUP - SC_LG_FIRST_REGULAR_BASE)	\
+    /* Last SC of the last group hits the bound exactly; exclude it. */	\
+    - 1)
+
+/*
+ * The size2index_tab lookup table uses uint8_t to encode each bin index, so we
+ * cannot support more than 256 small size classes.
+ */
+#if (SC_NBINS > 256)
+#  error "Too many small size classes"
+#endif
+
+/* The largest size class in the lookup table. */
+#define SC_LOOKUP_MAXCLASS ((size_t)1 << 12)
+
+typedef struct sc_s sc_t;
+struct sc_s {
+	/* Size class index, or -1 if not a valid size class. */
+	int index;
+	/* Lg group base size (no deltas added). */
+	int lg_base;
+	/* Lg delta to previous size class. */
+	int lg_delta;
+	/* Delta multiplier.  size == 1<<lg_base + ndelta<<lg_delta */
+	int ndelta;
+	/*
+	 * True if the size class is a multiple of the page size, false
+	 * otherwise.
+	 */
+	bool psz;
+	/*
+	 * True if the size class is a small, bin, size class. False otherwise.
+	 */
+	bool bin;
+	/* The slab page count if a small bin size class, 0 otherwise. */
+	int pgs;
+	/* Same as lg_delta if a lookup table size class, 0 otherwise. */
+	int lg_delta_lookup;
+};
+
+typedef struct sc_data_s sc_data_t;
+struct sc_data_s {
+	/* Number of tiny size classes. */
+	unsigned ntiny;
+	/* Number of bins supported by the lookup table. */
+	int nlbins;
+	/* Number of small size class bins. */
+	int nbins;
+	/* Number of size classes. */
+	int nsizes;
+	/* Number of bits required to store NSIZES. */
+	int lg_ceil_nsizes;
+	/* Number of size classes that are a multiple of (1U << LG_PAGE). */
+	unsigned npsizes;
+	/* Lg of maximum tiny size class (or -1, if none). */
+	int lg_tiny_maxclass;
+	/* Maximum size class included in lookup table. */
+	size_t lookup_maxclass;
+	/* Maximum small size class. */
+	size_t small_maxclass;
+	/* Lg of minimum large size class. */
+	int lg_large_minclass;
+	/* The minimum large size class. */
+	size_t large_minclass;
+	/* Maximum (large) size class. */
+	size_t large_maxclass;
+	/* True if the sc_data_t has been initialized (for debugging only). */
+	bool initialized;
+
+	sc_t sc[SC_NSIZES];
+};
+
+extern sc_data_t sc_data_global;
+void sc_data_init(sc_data_t *data);
+void sc_boot();
+
+#endif /* JEMALLOC_INTERNAL_SC_H */
diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index 97946289..b37e7969 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -3,7 +3,7 @@
 
 #include "jemalloc/internal/bit_util.h"
 #include "jemalloc/internal/pages.h"
-#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/util.h"
 
 /*
@@ -26,18 +26,18 @@
  * sz_pind2sz_tab encodes the same information as could be computed by
  * sz_pind2sz_compute().
  */
-extern size_t const sz_pind2sz_tab[NPSIZES+1];
+extern size_t sz_pind2sz_tab[SC_NPSIZES_MAX + 1];
 /*
  * sz_index2size_tab encodes the same information as could be computed (at
  * unacceptable cost in some code paths) by sz_index2size_compute().
  */
-extern size_t const sz_index2size_tab[NSIZES];
+extern size_t sz_index2size_tab[SC_NSIZES];
 /*
  * sz_size2index_tab is a compact lookup table that rounds request sizes up to
  * size classes.  In order to reduce cache footprint, the table is compressed,
  * and all accesses are via sz_size2index().
  */
-extern uint8_t const sz_size2index_tab[];
+extern uint8_t sz_size2index_tab[];
 
 static const size_t sz_large_pad =
 #ifdef JEMALLOC_CACHE_OBLIVIOUS
@@ -47,49 +47,47 @@ static const size_t sz_large_pad =
 #endif
     ;
 
+extern void sz_boot(const sc_data_t *sc_data);
+
 JEMALLOC_ALWAYS_INLINE pszind_t
 sz_psz2ind(size_t psz) {
-	if (unlikely(psz > LARGE_MAXCLASS)) {
-		return NPSIZES;
+	if (unlikely(psz > sc_data_global.large_maxclass)) {
+		return sc_data_global.npsizes;
 	}
-	{
-		pszind_t x = lg_floor((psz<<1)-1);
-		pszind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_PAGE) ? 0 : x -
-		    (LG_SIZE_CLASS_GROUP + LG_PAGE);
-		pszind_t grp = shift << LG_SIZE_CLASS_GROUP;
+	pszind_t x = lg_floor((psz<<1)-1);
+	pszind_t shift = (x < SC_LG_NGROUP + LG_PAGE) ?
+	    0 : x - (SC_LG_NGROUP + LG_PAGE);
+	pszind_t grp = shift << SC_LG_NGROUP;
 
-		pszind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ?
-		    LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1;
+	pszind_t lg_delta = (x < SC_LG_NGROUP + LG_PAGE + 1) ?
+	    LG_PAGE : x - SC_LG_NGROUP - 1;
 
-		size_t delta_inverse_mask = ZU(-1) << lg_delta;
-		pszind_t mod = ((((psz-1) & delta_inverse_mask) >> lg_delta)) &
-		    ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
+	size_t delta_inverse_mask = ZU(-1) << lg_delta;
+	pszind_t mod = ((((psz-1) & delta_inverse_mask) >> lg_delta)) &
+	    ((ZU(1) << SC_LG_NGROUP) - 1);
 
-		pszind_t ind = grp + mod;
-		return ind;
-	}
+	pszind_t ind = grp + mod;
+	return ind;
 }
 
 static inline size_t
 sz_pind2sz_compute(pszind_t pind) {
-	if (unlikely(pind == NPSIZES)) {
-		return LARGE_MAXCLASS + PAGE;
+	if (unlikely(pind == sc_data_global.npsizes)) {
+		return sc_data_global.large_maxclass + PAGE;
 	}
-	{
-		size_t grp = pind >> LG_SIZE_CLASS_GROUP;
-		size_t mod = pind & ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
+	size_t grp = pind >> SC_LG_NGROUP;
+	size_t mod = pind & ((ZU(1) << SC_LG_NGROUP) - 1);
 
-		size_t grp_size_mask = ~((!!grp)-1);
-		size_t grp_size = ((ZU(1) << (LG_PAGE +
-		    (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask;
+	size_t grp_size_mask = ~((!!grp)-1);
+	size_t grp_size = ((ZU(1) << (LG_PAGE + (SC_LG_NGROUP-1))) << grp)
+	    & grp_size_mask;
 
-		size_t shift = (grp == 0) ? 1 : grp;
-		size_t lg_delta = shift + (LG_PAGE-1);
-		size_t mod_size = (mod+1) << lg_delta;
+	size_t shift = (grp == 0) ? 1 : grp;
+	size_t lg_delta = shift + (LG_PAGE-1);
+	size_t mod_size = (mod+1) << lg_delta;
 
-		size_t sz = grp_size + mod_size;
-		return sz;
-	}
+	size_t sz = grp_size + mod_size;
+	return sz;
 }
 
 static inline size_t
@@ -101,70 +99,67 @@ sz_pind2sz_lookup(pszind_t pind) {
 
 static inline size_t
 sz_pind2sz(pszind_t pind) {
-	assert(pind < NPSIZES+1);
+	assert(pind < sc_data_global.npsizes + 1);
 	return sz_pind2sz_lookup(pind);
 }
 
 static inline size_t
 sz_psz2u(size_t psz) {
-	if (unlikely(psz > LARGE_MAXCLASS)) {
-		return LARGE_MAXCLASS + PAGE;
-	}
-	{
-		size_t x = lg_floor((psz<<1)-1);
-		size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ?
-		    LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1;
-		size_t delta = ZU(1) << lg_delta;
-		size_t delta_mask = delta - 1;
-		size_t usize = (psz + delta_mask) & ~delta_mask;
-		return usize;
+	if (unlikely(psz > sc_data_global.large_maxclass)) {
+		return sc_data_global.large_maxclass + PAGE;
 	}
+	size_t x = lg_floor((psz<<1)-1);
+	size_t lg_delta = (x < SC_LG_NGROUP + LG_PAGE + 1) ?
+	    LG_PAGE : x - SC_LG_NGROUP - 1;
+	size_t delta = ZU(1) << lg_delta;
+	size_t delta_mask = delta - 1;
+	size_t usize = (psz + delta_mask) & ~delta_mask;
+	return usize;
 }
 
 static inline szind_t
 sz_size2index_compute(size_t size) {
-	if (unlikely(size > LARGE_MAXCLASS)) {
-		return NSIZES;
+	if (unlikely(size > sc_data_global.large_maxclass)) {
+		return SC_NSIZES;
 	}
-#if (NTBINS != 0)
-	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
-		szind_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
+#if (SC_NTINY != 0)
+	if (size <= (ZU(1) << sc_data_global.lg_tiny_maxclass)) {
+		szind_t lg_tmin = sc_data_global.lg_tiny_maxclass
+		    - sc_data_global.ntiny + 1;
 		szind_t lg_ceil = lg_floor(pow2_ceil_zu(size));
 		return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin);
 	}
 #endif
 	{
 		szind_t x = lg_floor((size<<1)-1);
-		szind_t shift = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM) ? 0 :
-		    x - (LG_SIZE_CLASS_GROUP + LG_QUANTUM);
-		szind_t grp = shift << LG_SIZE_CLASS_GROUP;
+		szind_t shift = (x < SC_LG_NGROUP + LG_QUANTUM) ? 0 :
+		    x - (SC_LG_NGROUP + LG_QUANTUM);
+		szind_t grp = shift << SC_LG_NGROUP;
 
-		szind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
-		    ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
+		szind_t lg_delta = (x < SC_LG_NGROUP + LG_QUANTUM + 1)
+		    ? LG_QUANTUM : x - SC_LG_NGROUP - 1;
 
 		size_t delta_inverse_mask = ZU(-1) << lg_delta;
 		szind_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) &
-		    ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1);
+		    ((ZU(1) << SC_LG_NGROUP) - 1);
 
-		szind_t index = NTBINS + grp + mod;
+		szind_t index = sc_data_global.ntiny + grp + mod;
 		return index;
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE szind_t
 sz_size2index_lookup(size_t size) {
-	assert(size <= LOOKUP_MAXCLASS);
-	{
-		szind_t ret = (sz_size2index_tab[(size-1) >> LG_TINY_MIN]);
-		assert(ret == sz_size2index_compute(size));
-		return ret;
-	}
+	assert(size <= SC_LOOKUP_MAXCLASS);
+	szind_t ret = (sz_size2index_tab[(size-1) >> SC_LG_TINY_MIN]);
+	assert(ret == sz_size2index_compute(size));
+	return ret;
 }
 
 JEMALLOC_ALWAYS_INLINE szind_t
 sz_size2index(size_t size) {
 	assert(size > 0);
-	if (likely(size <= LOOKUP_MAXCLASS)) {
+	if (likely(size <= SC_LOOKUP_MAXCLASS)) {
 		return sz_size2index_lookup(size);
 	}
 	return sz_size2index_compute(size);
@@ -172,20 +167,21 @@ sz_size2index(size_t size) {
 
 static inline size_t
 sz_index2size_compute(szind_t index) {
-#if (NTBINS > 0)
-	if (index < NTBINS) {
-		return (ZU(1) << (LG_TINY_MAXCLASS - NTBINS + 1 + index));
+#if (SC_NTINY > 0)
+	if (index < sc_data_global.ntiny) {
+		return (ZU(1) << (sc_data_global.lg_tiny_maxclass
+		    - sc_data_global.ntiny + 1 + index));
 	}
 #endif
 	{
-		size_t reduced_index = index - NTBINS;
-		size_t grp = reduced_index >> LG_SIZE_CLASS_GROUP;
-		size_t mod = reduced_index & ((ZU(1) << LG_SIZE_CLASS_GROUP) -
+		size_t reduced_index = index - sc_data_global.ntiny;
+		size_t grp = reduced_index >> SC_LG_NGROUP;
+		size_t mod = reduced_index & ((ZU(1) << SC_LG_NGROUP) -
 		    1);
 
 		size_t grp_size_mask = ~((!!grp)-1);
 		size_t grp_size = ((ZU(1) << (LG_QUANTUM +
-		    (LG_SIZE_CLASS_GROUP-1))) << grp) & grp_size_mask;
+		    (SC_LG_NGROUP-1))) << grp) & grp_size_mask;
 
 		size_t shift = (grp == 0) ? 1 : grp;
 		size_t lg_delta = shift + (LG_QUANTUM-1);
@@ -205,18 +201,19 @@ sz_index2size_lookup(szind_t index) {
 
 JEMALLOC_ALWAYS_INLINE size_t
 sz_index2size(szind_t index) {
-	assert(index < NSIZES);
+	assert(index < SC_NSIZES);
 	return sz_index2size_lookup(index);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
 sz_s2u_compute(size_t size) {
-	if (unlikely(size > LARGE_MAXCLASS)) {
+	if (unlikely(size > sc_data_global.large_maxclass)) {
 		return 0;
 	}
-#if (NTBINS > 0)
-	if (size <= (ZU(1) << LG_TINY_MAXCLASS)) {
-		size_t lg_tmin = LG_TINY_MAXCLASS - NTBINS + 1;
+#if (SC_NTINY > 0)
+	if (size <= (ZU(1) << sc_data_global.lg_tiny_maxclass)) {
+		size_t lg_tmin = sc_data_global.lg_tiny_maxclass
+		    - sc_data_global.ntiny + 1;
 		size_t lg_ceil = lg_floor(pow2_ceil_zu(size));
 		return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) :
 		    (ZU(1) << lg_ceil));
@@ -224,8 +221,8 @@ sz_s2u_compute(size_t size) {
 #endif
 	{
 		size_t x = lg_floor((size<<1)-1);
-		size_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1)
-		    ?  LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1;
+		size_t lg_delta = (x < SC_LG_NGROUP + LG_QUANTUM + 1)
+		    ?  LG_QUANTUM : x - SC_LG_NGROUP - 1;
 		size_t delta = ZU(1) << lg_delta;
 		size_t delta_mask = delta - 1;
 		size_t usize = (size + delta_mask) & ~delta_mask;
@@ -248,7 +245,7 @@ sz_s2u_lookup(size_t size) {
 JEMALLOC_ALWAYS_INLINE size_t
 sz_s2u(size_t size) {
 	assert(size > 0);
-	if (likely(size <= LOOKUP_MAXCLASS)) {
+	if (likely(size <= SC_LOOKUP_MAXCLASS)) {
 		return sz_s2u_lookup(size);
 	}
 	return sz_s2u_compute(size);
@@ -265,7 +262,7 @@ sz_sa2u(size_t size, size_t alignment) {
 	assert(alignment != 0 && ((alignment - 1) & alignment) == 0);
 
 	/* Try for a small size class. */
-	if (size <= SMALL_MAXCLASS && alignment < PAGE) {
+	if (size <= sc_data_global.small_maxclass && alignment < PAGE) {
 		/*
 		 * Round size up to the nearest multiple of alignment.
 		 *
@@ -281,20 +278,20 @@ sz_sa2u(size_t size, size_t alignment) {
 		 *    192 | 11000000 |  64
 		 */
 		usize = sz_s2u(ALIGNMENT_CEILING(size, alignment));
-		if (usize < LARGE_MINCLASS) {
+		if (usize < sc_data_global.large_minclass) {
 			return usize;
 		}
 	}
 
 	/* Large size class.  Beware of overflow. */
 
-	if (unlikely(alignment > LARGE_MAXCLASS)) {
+	if (unlikely(alignment > sc_data_global.large_maxclass)) {
 		return 0;
 	}
 
 	/* Make sure result is a large size class. */
-	if (size <= LARGE_MINCLASS) {
-		usize = LARGE_MINCLASS;
+	if (size <= sc_data_global.large_minclass) {
+		usize = sc_data_global.large_minclass;
 	} else {
 		usize = sz_s2u(size);
 		if (usize < size) {
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 790367bd..d63eafde 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -1,15 +1,13 @@
 #ifndef JEMALLOC_INTERNAL_TCACHE_EXTERNS_H
 #define JEMALLOC_INTERNAL_TCACHE_EXTERNS_H
 
-#include "jemalloc/internal/size_classes.h"
-
 extern bool	opt_tcache;
 extern ssize_t	opt_lg_tcache_max;
 
 extern cache_bin_info_t	*tcache_bin_info;
 
 /*
- * Number of tcache bins.  There are NBINS small-object bins, plus 0 or more
+ * Number of tcache bins.  There are SC_NBINS small-object bins, plus 0 or more
  * large-object bins.
  */
 extern unsigned	nhbins;
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index c426c567..b060043b 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -3,7 +3,7 @@
 
 #include "jemalloc/internal/bin.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
-#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/sz.h"
 #include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/util.h"
@@ -46,7 +46,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
 	bool tcache_success;
 	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 
-	assert(binind < NBINS);
+	assert(binind < SC_NBINS);
 	bin = tcache_small_bin_get(tcache, binind);
 	ret = cache_bin_alloc_easy(bin, &tcache_success);
 	assert(tcache_success == (ret != NULL));
@@ -107,7 +107,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 	cache_bin_t *bin;
 	bool tcache_success;
 
-	assert(binind >= NBINS &&binind < nhbins);
+	assert(binind >= SC_NBINS &&binind < nhbins);
 	bin = tcache_large_bin_get(tcache, binind);
 	ret = cache_bin_alloc_easy(bin, &tcache_success);
 	assert(tcache_success == (ret != NULL));
@@ -166,7 +166,8 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	cache_bin_t *bin;
 	cache_bin_info_t *bin_info;
 
-	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SMALL_MAXCLASS);
+	assert(tcache_salloc(tsd_tsdn(tsd), ptr)
+	    <= sc_data_global.small_maxclass);
 
 	if (slow_path && config_fill && unlikely(opt_junk_free)) {
 		arena_dalloc_junk_small(ptr, &bin_infos[binind]);
@@ -191,7 +192,8 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	cache_bin_t *bin;
 	cache_bin_info_t *bin_info;
 
-	assert(tcache_salloc(tsd_tsdn(tsd), ptr) > SMALL_MAXCLASS);
+	assert(tcache_salloc(tsd_tsdn(tsd), ptr)
+	    > sc_data_global.small_maxclass);
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_maxclass);
 
 	if (slow_path && config_fill && unlikely(opt_junk_free)) {
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index b3cd4e5f..27087031 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -1,9 +1,9 @@
 #ifndef JEMALLOC_INTERNAL_TCACHE_STRUCTS_H
 #define JEMALLOC_INTERNAL_TCACHE_STRUCTS_H
 
-#include "jemalloc/internal/ql.h"
-#include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/cache_bin.h"
+#include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/ticker.h"
 
 /* Various uses of this struct need it to be a named type. */
@@ -25,7 +25,7 @@ struct tcache_s {
 	 * During tcache initialization, the avail pointer in each element of
 	 * tbins is initialized to point to the proper offset within this array.
 	 */
-	cache_bin_t	bins_small[NBINS];
+	cache_bin_t	bins_small[SC_NBINS];
 
 	/*
 	 * This data is less hot; we can be a little less careful with our
@@ -50,13 +50,13 @@ struct tcache_s {
 	/* Next bin to GC. */
 	szind_t		next_gc_bin;
 	/* For small bins, fill (ncached_max >> lg_fill_div). */
-	uint8_t		lg_fill_div[NBINS];
+	uint8_t		lg_fill_div[SC_NBINS];
 	/*
 	 * We put the cache bins for large size classes at the end of the
 	 * struct, since some of them might not get used.  This might end up
 	 * letting us avoid touching an extra page if we don't have to.
 	 */
-	cache_bin_t	bins_large[NSIZES-NBINS];
+	cache_bin_t	bins_large[SC_NSIZES-SC_NBINS];
 };
 
 /* Linkage for list of available (previously used) explicit tcache IDs. */
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index e49bc9d7..f953b8c8 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -1,7 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_TCACHE_TYPES_H
 #define JEMALLOC_INTERNAL_TCACHE_TYPES_H
 
-#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/sc.h"
 
 typedef struct tcache_s tcache_t;
 typedef struct tcaches_s tcaches_t;
@@ -45,7 +45,7 @@ typedef struct tcaches_s tcaches_t;
 
 /* Number of tcache allocation/deallocation events between incremental GCs. */
 #define TCACHE_GC_INCR							\
-    ((TCACHE_GC_SWEEP / NBINS) + ((TCACHE_GC_SWEEP / NBINS == 0) ? 0 : 1))
+    ((TCACHE_GC_SWEEP / SC_NBINS) + ((TCACHE_GC_SWEEP / SC_NBINS == 0) ? 0 : 1))
 
 /* Used in TSD static initializer only. Real init in tcache_data_init(). */
 #define TCACHE_ZERO_INITIALIZER {0}
diff --git a/src/arena.c b/src/arena.c
index eefea0dc..07d91039 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -8,7 +8,6 @@
 #include "jemalloc/internal/extent_mmap.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/rtree.h"
-#include "jemalloc/internal/size_classes.h"
 #include "jemalloc/internal/util.h"
 
 JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
@@ -42,7 +41,7 @@ const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
 #undef STEP
 };
 
-static div_info_t arena_binind_div_info[NBINS];
+static div_info_t arena_binind_div_info[SC_NBINS];
 
 size_t opt_huge_threshold = HUGE_THRESHOLD_DEFAULT;
 size_t huge_threshold = HUGE_THRESHOLD_DEFAULT;
@@ -128,7 +127,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	    extents_npages_get(&arena->extents_dirty) +
 	    extents_npages_get(&arena->extents_muzzy)) << LG_PAGE)));
 
-	for (szind_t i = 0; i < NSIZES - NBINS; i++) {
+	for (szind_t i = 0; i < SC_NSIZES - SC_NBINS; i++) {
 		uint64_t nmalloc = arena_stats_read_u64(tsdn, &arena->stats,
 		    &arena->stats.lstats[i].nmalloc);
 		arena_stats_accum_u64(&lstats[i].nmalloc, nmalloc);
@@ -151,7 +150,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		size_t curlextents = (size_t)(nmalloc - ndalloc);
 		lstats[i].curlextents += curlextents;
 		arena_stats_accum_zu(&astats->allocated_large,
-		    curlextents * sz_index2size(NBINS + i));
+		    curlextents * sz_index2size(SC_NBINS + i));
 	}
 
 	arena_stats_unlock(tsdn, &arena->stats);
@@ -162,7 +161,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	cache_bin_array_descriptor_t *descriptor;
 	ql_foreach(descriptor, &arena->cache_bin_array_descriptor_ql, link) {
 		szind_t i = 0;
-		for (; i < NBINS; i++) {
+		for (; i < SC_NBINS; i++) {
 			cache_bin_t *tbin = &descriptor->bins_small[i];
 			arena_stats_accum_zu(&astats->tcache_bytes,
 			    tbin->ncached * sz_index2size(i));
@@ -206,7 +205,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	nstime_update(&astats->uptime);
 	nstime_subtract(&astats->uptime, &arena->create_time);
 
-	for (szind_t i = 0; i < NBINS; i++) {
+	for (szind_t i = 0; i < SC_NBINS; i++) {
 		bin_stats_merge(tsdn, &bstats[i], &arena->bins[i]);
 	}
 }
@@ -297,11 +296,11 @@ arena_large_malloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 
 	cassert(config_stats);
 
-	if (usize < LARGE_MINCLASS) {
-		usize = LARGE_MINCLASS;
+	if (usize < sc_data_global.large_minclass) {
+		usize = sc_data_global.large_minclass;
 	}
 	index = sz_size2index(usize);
-	hindex = (index >= NBINS) ? index - NBINS : 0;
+	hindex = (index >= SC_NBINS) ? index - SC_NBINS : 0;
 
 	arena_stats_add_u64(tsdn, &arena->stats,
 	    &arena->stats.lstats[hindex].nmalloc, 1);
@@ -313,11 +312,11 @@ arena_large_dalloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 
 	cassert(config_stats);
 
-	if (usize < LARGE_MINCLASS) {
-		usize = LARGE_MINCLASS;
+	if (usize < sc_data_global.large_minclass) {
+		usize = sc_data_global.large_minclass;
 	}
 	index = sz_size2index(usize);
-	hindex = (index >= NBINS) ? index - NBINS : 0;
+	hindex = (index >= SC_NBINS) ? index - SC_NBINS : 0;
 
 	arena_stats_add_u64(tsdn, &arena->stats,
 	    &arena->stats.lstats[hindex].ndalloc, 1);
@@ -994,7 +993,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
 		rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
 		    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
-		assert(alloc_ctx.szind != NSIZES);
+		assert(alloc_ctx.szind != SC_NSIZES);
 
 		if (config_stats || (config_prof && opt_prof)) {
 			usize = sz_index2size(alloc_ctx.szind);
@@ -1010,7 +1009,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->large_mtx);
 
 	/* Bins. */
-	for (unsigned i = 0; i < NBINS; i++) {
+	for (unsigned i = 0; i < SC_NBINS; i++) {
 		extent_t *slab;
 		bin_t *bin = &arena->bins[i];
 		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
@@ -1331,7 +1330,7 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
 	size_t usize;
 	extent_t *slab;
 
-	assert(binind < NBINS);
+	assert(binind < SC_NBINS);
 	bin = &arena->bins[binind];
 	usize = sz_index2size(binind);
 
@@ -1390,7 +1389,7 @@ arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
 		return NULL;
 	}
 
-	if (likely(size <= SMALL_MAXCLASS)) {
+	if (likely(size <= sc_data_global.small_maxclass)) {
 		return arena_malloc_small(tsdn, arena, ind, zero);
 	}
 	return large_malloc(tsdn, arena, sz_index2size(ind), zero);
@@ -1401,8 +1400,9 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
     bool zero, tcache_t *tcache) {
 	void *ret;
 
-	if (usize <= SMALL_MAXCLASS && (alignment < PAGE || (alignment == PAGE
-	    && (usize & PAGE_MASK) == 0))) {
+	if (usize <= sc_data_global.small_maxclass
+	    && (alignment < PAGE
+	    || (alignment == PAGE && (usize & PAGE_MASK) == 0))) {
 		/* Small; alignment doesn't require special slab placement. */
 		ret = arena_malloc(tsdn, arena, usize, sz_size2index(usize),
 		    zero, tcache, true);
@@ -1420,8 +1420,8 @@ void
 arena_prof_promote(tsdn_t *tsdn, const void *ptr, size_t usize) {
 	cassert(config_prof);
 	assert(ptr != NULL);
-	assert(isalloc(tsdn, ptr) == LARGE_MINCLASS);
-	assert(usize <= SMALL_MAXCLASS);
+	assert(isalloc(tsdn, ptr) == sc_data_global.large_minclass);
+	assert(usize <= sc_data_global.small_maxclass);
 
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
@@ -1445,15 +1445,15 @@ arena_prof_demote(tsdn_t *tsdn, extent_t *extent, const void *ptr) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	extent_szind_set(extent, NBINS);
+	extent_szind_set(extent, SC_NBINS);
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr,
-	    NBINS, false);
+	    SC_NBINS, false);
 
-	assert(isalloc(tsdn, ptr) == LARGE_MINCLASS);
+	assert(isalloc(tsdn, ptr) == sc_data_global.large_minclass);
 
-	return LARGE_MINCLASS;
+	return sc_data_global.large_minclass;
 }
 
 void
@@ -1594,33 +1594,35 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t extra, bool zero, size_t *newsize) {
 	bool ret;
 	/* Calls with non-zero extra had to clamp extra. */
-	assert(extra == 0 || size + extra <= LARGE_MAXCLASS);
+	assert(extra == 0 || size + extra <= sc_data_global.large_maxclass);
 
 	extent_t *extent = iealloc(tsdn, ptr);
-	if (unlikely(size > LARGE_MAXCLASS)) {
+	if (unlikely(size > sc_data_global.large_maxclass)) {
 		ret = true;
 		goto done;
 	}
 
 	size_t usize_min = sz_s2u(size);
 	size_t usize_max = sz_s2u(size + extra);
-	if (likely(oldsize <= SMALL_MAXCLASS && usize_min <= SMALL_MAXCLASS)) {
+	if (likely(oldsize <= sc_data_global.small_maxclass && usize_min
+	    <= sc_data_global.small_maxclass)) {
 		/*
 		 * Avoid moving the allocation if the size class can be left the
 		 * same.
 		 */
 		assert(bin_infos[sz_size2index(oldsize)].reg_size ==
 		    oldsize);
-		if ((usize_max > SMALL_MAXCLASS || sz_size2index(usize_max) !=
-		    sz_size2index(oldsize)) && (size > oldsize || usize_max <
-		    oldsize)) {
+		if ((usize_max > sc_data_global.small_maxclass
+		    || sz_size2index(usize_max) != sz_size2index(oldsize))
+		    && (size > oldsize || usize_max < oldsize)) {
 			ret = true;
 			goto done;
 		}
 
 		arena_decay_tick(tsdn, extent_arena_get(extent));
 		ret = false;
-	} else if (oldsize >= LARGE_MINCLASS && usize_max >= LARGE_MINCLASS) {
+	} else if (oldsize >= sc_data_global.large_minclass
+	    && usize_max >= sc_data_global.large_minclass) {
 		ret = large_ralloc_no_move(tsdn, extent, usize_min, usize_max,
 		    zero);
 	} else {
@@ -1641,7 +1643,7 @@ arena_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
 		    zero, tcache, true);
 	}
 	usize = sz_sa2u(usize, alignment);
-	if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
+	if (unlikely(usize == 0 || usize > sc_data_global.large_maxclass)) {
 		return NULL;
 	}
 	return ipalloct(tsdn, usize, alignment, zero, tcache, arena);
@@ -1652,11 +1654,11 @@ arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
     size_t size, size_t alignment, bool zero, tcache_t *tcache,
     hook_ralloc_args_t *hook_args) {
 	size_t usize = sz_s2u(size);
-	if (unlikely(usize == 0 || size > LARGE_MAXCLASS)) {
+	if (unlikely(usize == 0 || size > sc_data_global.large_maxclass)) {
 		return NULL;
 	}
 
-	if (likely(usize <= SMALL_MAXCLASS)) {
+	if (likely(usize <= sc_data_global.small_maxclass)) {
 		/* Try to avoid moving the allocation. */
 		UNUSED size_t newsize;
 		if (!arena_ralloc_no_move(tsdn, ptr, oldsize, usize, 0, zero,
@@ -1669,7 +1671,8 @@ arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
 		}
 	}
 
-	if (oldsize >= LARGE_MINCLASS && usize >= LARGE_MINCLASS) {
+	if (oldsize >= sc_data_global.large_minclass
+	    && usize >= sc_data_global.large_minclass) {
 		return large_ralloc(tsdn, arena, ptr, usize,
 		    alignment, zero, tcache, hook_args);
 	}
@@ -1751,8 +1754,8 @@ arena_retain_grow_limit_get_set(tsd_t *tsd, arena_t *arena, size_t *old_limit,
 	if (new_limit != NULL) {
 		size_t limit = *new_limit;
 		/* Grow no more than the new limit. */
-		if ((new_ind = sz_psz2ind(limit + 1) - 1) >
-		     EXTENT_GROW_MAX_PIND) {
+		if ((new_ind = sz_psz2ind(limit + 1) - 1)
+		    >= sc_data_global.npsizes) {
 			return true;
 		}
 	}
@@ -1896,7 +1899,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	}
 
 	arena->extent_grow_next = sz_psz2ind(HUGEPAGE);
-	arena->retain_grow_limit = EXTENT_GROW_MAX_PIND;
+	arena->retain_grow_limit = sc_data_global.npsizes - 1;
 	if (malloc_mutex_init(&arena->extent_grow_mtx, "extent_grow",
 	    WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) {
 		goto label_error;
@@ -1909,7 +1912,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	}
 
 	/* Initialize bins. */
-	for (i = 0; i < NBINS; i++) {
+	for (i = 0; i < SC_NBINS; i++) {
 		bool err = bin_init(&arena->bins[i]);
 		if (err) {
 			goto label_error;
@@ -1982,10 +1985,10 @@ arena_init_huge(void) {
 	bool huge_enabled;
 
 	/* The threshold should be large size class. */
-	if (opt_huge_threshold > LARGE_MAXCLASS ||
-	    opt_huge_threshold < LARGE_MINCLASS) {
+	if (opt_huge_threshold > sc_data_global.large_maxclass ||
+	    opt_huge_threshold < sc_data_global.large_minclass) {
 		opt_huge_threshold = 0;
-		huge_threshold = LARGE_MAXCLASS + PAGE;
+		huge_threshold = sc_data_global.large_maxclass + PAGE;
 		huge_enabled = false;
 	} else {
 		/* Reserve the index for the huge arena. */
@@ -2001,16 +2004,11 @@ void
 arena_boot(void) {
 	arena_dirty_decay_ms_default_set(opt_dirty_decay_ms);
 	arena_muzzy_decay_ms_default_set(opt_muzzy_decay_ms);
-#define REGIND_bin_yes(index, reg_size) 				\
-	div_init(&arena_binind_div_info[(index)], (reg_size));
-#define REGIND_bin_no(index, reg_size)
-#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs,		\
-    lg_delta_lookup)							\
-	REGIND_bin_##bin(index, (1U<<lg_grp) + (ndelta << lg_delta))
-	SIZE_CLASSES
-#undef REGIND_bin_yes
-#undef REGIND_bin_no
-#undef SC
+	for (unsigned i = 0; i < SC_NBINS; i++) {
+		sc_t *sc = &sc_data_global.sc[i];
+		div_init(&arena_binind_div_info[i],
+		    (1U << sc->lg_base) + (sc->ndelta << sc->lg_delta));
+	}
 }
 
 void
@@ -2055,7 +2053,7 @@ arena_prefork6(tsdn_t *tsdn, arena_t *arena) {
 
 void
 arena_prefork7(tsdn_t *tsdn, arena_t *arena) {
-	for (unsigned i = 0; i < NBINS; i++) {
+	for (unsigned i = 0; i < SC_NBINS; i++) {
 		bin_prefork(tsdn, &arena->bins[i]);
 	}
 }
@@ -2064,7 +2062,7 @@ void
 arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 	unsigned i;
 
-	for (i = 0; i < NBINS; i++) {
+	for (i = 0; i < SC_NBINS; i++) {
 		bin_postfork_parent(tsdn, &arena->bins[i]);
 	}
 	malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
@@ -2108,7 +2106,7 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 		}
 	}
 
-	for (i = 0; i < NBINS; i++) {
+	for (i = 0; i < SC_NBINS; i++) {
 		bin_postfork_child(tsdn, &arena->bins[i]);
 	}
 	malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
diff --git a/src/base.c b/src/base.c
index b0324b5d..cabf66c4 100644
--- a/src/base.c
+++ b/src/base.c
@@ -262,8 +262,8 @@ base_block_alloc(tsdn_t *tsdn, base_t *base, extent_hooks_t *extent_hooks,
 	 */
 	size_t min_block_size = HUGEPAGE_CEILING(sz_psz2u(header_size + gap_size
 	    + usize));
-	pszind_t pind_next = (*pind_last + 1 < NPSIZES) ? *pind_last + 1 :
-	    *pind_last;
+	pszind_t pind_next = (*pind_last + 1 < sc_data_global.npsizes) ?
+	    *pind_last + 1 : *pind_last;
 	size_t next_block_size = HUGEPAGE_CEILING(sz_pind2sz(pind_next));
 	size_t block_size = (min_block_size > next_block_size) ? min_block_size
 	    : next_block_size;
@@ -372,7 +372,7 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	base->extent_sn_next = extent_sn_next;
 	base->blocks = block;
 	base->auto_thp_switched = false;
-	for (szind_t i = 0; i < NSIZES; i++) {
+	for (szind_t i = 0; i < SC_NSIZES; i++) {
 		extent_heap_new(&base->avail[i]);
 	}
 	if (config_stats) {
@@ -426,7 +426,7 @@ base_alloc_impl(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment,
 
 	extent_t *extent = NULL;
 	malloc_mutex_lock(tsdn, &base->mtx);
-	for (szind_t i = sz_size2index(asize); i < NSIZES; i++) {
+	for (szind_t i = sz_size2index(asize); i < SC_NSIZES; i++) {
 		extent = extent_heap_remove_first(&base->avail[i]);
 		if (extent != NULL) {
 			/* Use existing space. */
diff --git a/src/bin.c b/src/bin.c
index 0886bc4e..e62babdd 100644
--- a/src/bin.c
+++ b/src/bin.c
@@ -1,23 +1,34 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/bin.h"
+#include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/witness.h"
 
-const bin_info_t bin_infos[NBINS] = {
-#define BIN_INFO_bin_yes(reg_size, slab_size, nregs)			\
-	{reg_size, slab_size, nregs, BITMAP_INFO_INITIALIZER(nregs)},
-#define BIN_INFO_bin_no(reg_size, slab_size, nregs)
-#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs,		\
-    lg_delta_lookup)							\
-	BIN_INFO_bin_##bin((1U<<lg_grp) + (ndelta<<lg_delta),		\
-	    (pgs << LG_PAGE), (pgs << LG_PAGE) / ((1U<<lg_grp) +	\
-	    (ndelta<<lg_delta)))
-	SIZE_CLASSES
-#undef BIN_INFO_bin_yes
-#undef BIN_INFO_bin_no
-#undef SC
-};
+bin_info_t bin_infos[SC_NBINS];
+
+void
+bin_infos_init(sc_data_t *sc_data, bin_info_t bin_infos[SC_NBINS]) {
+	for (unsigned i = 0; i < SC_NBINS; i++) {
+		bin_info_t *bin_info = &bin_infos[i];
+		sc_t *sc = &sc_data->sc[i];
+		bin_info->reg_size = ((size_t)1U << sc->lg_base)
+		    + ((size_t)sc->ndelta << sc->lg_delta);
+		bin_info->slab_size = (sc->pgs << LG_PAGE);
+		bin_info->nregs =
+		    (uint32_t)(bin_info->slab_size / bin_info->reg_size);
+		bitmap_info_t bitmap_info = BITMAP_INFO_INITIALIZER(
+		    bin_info->nregs);
+		bin_info->bitmap_info = bitmap_info;
+	}
+}
+
+void
+bin_boot(sc_data_t *sc_data) {
+	assert(sc_data->initialized);
+	bin_infos_init(sc_data, bin_infos);
+}
 
 bool
 bin_init(bin_t *bin) {
diff --git a/src/ckh.c b/src/ckh.c
index e95e0a3e..94c4fe69 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -275,7 +275,8 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh) {
 
 		lg_curcells++;
 		usize = sz_sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
-		if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
+		if (unlikely(usize == 0
+		    || usize > sc_data_global.large_maxclass)) {
 			ret = true;
 			goto label_return;
 		}
@@ -320,7 +321,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh) {
 	lg_prevbuckets = ckh->lg_curbuckets;
 	lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1;
 	usize = sz_sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
-	if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
+	if (unlikely(usize == 0 || usize > sc_data_global.large_maxclass)) {
 		return;
 	}
 	tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE, true, NULL,
@@ -396,7 +397,7 @@ ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
 	ckh->keycomp = keycomp;
 
 	usize = sz_sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE);
-	if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
+	if (unlikely(usize == 0 || usize > sc_data_global.large_maxclass)) {
 		ret = true;
 		goto label_return;
 	}
diff --git a/src/ctl.c b/src/ctl.c
index 3f7dea16..38529d08 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -8,7 +8,7 @@
 #include "jemalloc/internal/extent_mmap.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/nstime.h"
-#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/util.h"
 
 /******************************************************************************/
@@ -710,9 +710,9 @@ ctl_arena_clear(ctl_arena_t *ctl_arena) {
 		ctl_arena->astats->nmalloc_small = 0;
 		ctl_arena->astats->ndalloc_small = 0;
 		ctl_arena->astats->nrequests_small = 0;
-		memset(ctl_arena->astats->bstats, 0, NBINS *
+		memset(ctl_arena->astats->bstats, 0, SC_NBINS *
 		    sizeof(bin_stats_t));
-		memset(ctl_arena->astats->lstats, 0, (NSIZES - NBINS) *
+		memset(ctl_arena->astats->lstats, 0, (SC_NSIZES - SC_NBINS) *
 		    sizeof(arena_stats_large_t));
 	}
 }
@@ -729,7 +729,7 @@ ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena) {
 		    &ctl_arena->astats->astats, ctl_arena->astats->bstats,
 		    ctl_arena->astats->lstats);
 
-		for (i = 0; i < NBINS; i++) {
+		for (i = 0; i < SC_NBINS; i++) {
 			ctl_arena->astats->allocated_small +=
 			    ctl_arena->astats->bstats[i].curregs *
 			    sz_index2size(i);
@@ -841,7 +841,7 @@ MUTEX_PROF_ARENA_MUTEXES
 			sdstats->astats.uptime = astats->astats.uptime;
 		}
 
-		for (i = 0; i < NBINS; i++) {
+		for (i = 0; i < SC_NBINS; i++) {
 			sdstats->bstats[i].nmalloc += astats->bstats[i].nmalloc;
 			sdstats->bstats[i].ndalloc += astats->bstats[i].ndalloc;
 			sdstats->bstats[i].nrequests +=
@@ -867,7 +867,7 @@ MUTEX_PROF_ARENA_MUTEXES
 			    &astats->bstats[i].mutex_data);
 		}
 
-		for (i = 0; i < NSIZES - NBINS; i++) {
+		for (i = 0; i < SC_NSIZES - SC_NBINS; i++) {
 			ctl_accum_arena_stats_u64(&sdstats->lstats[i].nmalloc,
 			    &astats->lstats[i].nmalloc);
 			ctl_accum_arena_stats_u64(&sdstats->lstats[i].ndalloc,
@@ -2433,7 +2433,7 @@ arenas_muzzy_decay_ms_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 CTL_RO_NL_GEN(arenas_quantum, QUANTUM, size_t)
 CTL_RO_NL_GEN(arenas_page, PAGE, size_t)
 CTL_RO_NL_GEN(arenas_tcache_max, tcache_maxclass, size_t)
-CTL_RO_NL_GEN(arenas_nbins, NBINS, unsigned)
+CTL_RO_NL_GEN(arenas_nbins, SC_NBINS, unsigned)
 CTL_RO_NL_GEN(arenas_nhbins, nhbins, unsigned)
 CTL_RO_NL_GEN(arenas_bin_i_size, bin_infos[mib[2]].reg_size, size_t)
 CTL_RO_NL_GEN(arenas_bin_i_nregs, bin_infos[mib[2]].nregs, uint32_t)
@@ -2441,19 +2441,19 @@ CTL_RO_NL_GEN(arenas_bin_i_slab_size, bin_infos[mib[2]].slab_size, size_t)
 static const ctl_named_node_t *
 arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib,
     size_t miblen, size_t i) {
-	if (i > NBINS) {
+	if (i > SC_NBINS) {
 		return NULL;
 	}
 	return super_arenas_bin_i_node;
 }
 
-CTL_RO_NL_GEN(arenas_nlextents, NSIZES - NBINS, unsigned)
-CTL_RO_NL_GEN(arenas_lextent_i_size, sz_index2size(NBINS+(szind_t)mib[2]),
+CTL_RO_NL_GEN(arenas_nlextents, SC_NSIZES - SC_NBINS, unsigned)
+CTL_RO_NL_GEN(arenas_lextent_i_size, sz_index2size(SC_NBINS+(szind_t)mib[2]),
     size_t)
 static const ctl_named_node_t *
 arenas_lextent_i_index(tsdn_t *tsdn, const size_t *mib,
     size_t miblen, size_t i) {
-	if (i > NSIZES - NBINS) {
+	if (i > SC_NSIZES - SC_NBINS) {
 		return NULL;
 	}
 	return super_arenas_lextent_i_node;
@@ -2818,7 +2818,7 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
 		MUTEX_PROF_RESET(arena->tcache_ql_mtx);
 		MUTEX_PROF_RESET(arena->base->mtx);
 
-		for (szind_t i = 0; i < NBINS; i++) {
+		for (szind_t i = 0; i < SC_NBINS; i++) {
 			bin_t *bin = &arena->bins[i];
 			MUTEX_PROF_RESET(bin->lock);
 		}
@@ -2849,7 +2849,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curslabs,
 static const ctl_named_node_t *
 stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib,
     size_t miblen, size_t j) {
-	if (j > NBINS) {
+	if (j > SC_NBINS) {
 		return NULL;
 	}
 	return super_stats_arenas_i_bins_j_node;
@@ -2870,7 +2870,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_curlextents,
 static const ctl_named_node_t *
 stats_arenas_i_lextents_j_index(tsdn_t *tsdn, const size_t *mib,
     size_t miblen, size_t j) {
-	if (j > NSIZES - NBINS) {
+	if (j > SC_NSIZES - SC_NBINS) {
 		return NULL;
 	}
 	return super_stats_arenas_i_lextents_j_node;
diff --git a/src/extent.c b/src/extent.c
index 4b1a6dfd..0953940b 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -20,7 +20,7 @@ mutex_pool_t	extent_mutex_pool;
 size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT;
 
 static const bitmap_info_t extents_bitmap_info =
-    BITMAP_INFO_INITIALIZER(NPSIZES+1);
+    BITMAP_INFO_INITIALIZER(SC_NPSIZES_MAX+1);
 
 static void *extent_alloc_default(extent_hooks_t *extent_hooks, void *new_addr,
     size_t size, size_t alignment, bool *zero, bool *commit,
@@ -259,7 +259,7 @@ extent_size_quantize_ceil(size_t size) {
 	size_t ret;
 
 	assert(size > 0);
-	assert(size - sz_large_pad <= LARGE_MAXCLASS);
+	assert(size - sz_large_pad <= sc_data_global.large_maxclass);
 	assert((size & PAGE_MASK) == 0);
 
 	ret = extent_size_quantize_floor(size);
@@ -288,7 +288,7 @@ extents_init(tsdn_t *tsdn, extents_t *extents, extent_state_t state,
 	    malloc_mutex_rank_exclusive)) {
 		return true;
 	}
-	for (unsigned i = 0; i < NPSIZES+1; i++) {
+	for (unsigned i = 0; i < sc_data_global.npsizes + 1; i++) {
 		extent_heap_new(&extents->heaps[i]);
 	}
 	bitmap_init(extents->bitmap, &extents_bitmap_info, true);
@@ -375,7 +375,7 @@ extents_fit_alignment(extents_t *extents, size_t min_size, size_t max_size,
 	    &extents_bitmap_info, (size_t)pind); i < pind_max; i =
 	    (pszind_t)bitmap_ffu(extents->bitmap, &extents_bitmap_info,
 	    (size_t)i+1)) {
-		assert(i < NPSIZES);
+		assert(i < sc_data_global.npsizes);
 		assert(!extent_heap_empty(&extents->heaps[i]));
 		extent_t *extent = extent_heap_first(&extents->heaps[i]);
 		uintptr_t base = (uintptr_t)extent_base_get(extent);
@@ -405,7 +405,7 @@ extents_best_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 	pszind_t pind = sz_psz2ind(extent_size_quantize_ceil(size));
 	pszind_t i = (pszind_t)bitmap_ffu(extents->bitmap, &extents_bitmap_info,
 	    (size_t)pind);
-	if (i < NPSIZES+1) {
+	if (i < sc_data_global.npsizes + 1) {
 		/*
 		 * In order to reduce fragmentation, avoid reusing and splitting
 		 * large extents for much smaller sizes.
@@ -433,8 +433,9 @@ extents_first_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 
 	pszind_t pind = sz_psz2ind(extent_size_quantize_ceil(size));
 	for (pszind_t i = (pszind_t)bitmap_ffu(extents->bitmap,
-	    &extents_bitmap_info, (size_t)pind); i < NPSIZES+1; i =
-	    (pszind_t)bitmap_ffu(extents->bitmap, &extents_bitmap_info,
+	    &extents_bitmap_info, (size_t)pind);
+	    i < sc_data_global.npsizes + 1;
+	    i = (pszind_t)bitmap_ffu(extents->bitmap, &extents_bitmap_info,
 	    (size_t)i+1)) {
 		assert(!extent_heap_empty(&extents->heaps[i]));
 		extent_t *extent = extent_heap_first(&extents->heaps[i]);
@@ -442,10 +443,10 @@ extents_first_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 		if (ret == NULL || extent_snad_comp(extent, ret) < 0) {
 			ret = extent;
 		}
-		if (i == NPSIZES) {
+		if (i == sc_data_global.npsizes) {
 			break;
 		}
-		assert(i < NPSIZES);
+		assert(i < sc_data_global.npsizes);
 	}
 
 	return ret;
@@ -821,7 +822,7 @@ extent_deregister_impl(tsdn_t *tsdn, extent_t *extent, bool gdump) {
 
 	extent_lock(tsdn, extent);
 
-	extent_rtree_write_acquired(tsdn, elm_a, elm_b, NULL, NSIZES, false);
+	extent_rtree_write_acquired(tsdn, elm_a, elm_b, NULL, SC_NSIZES, false);
 	if (extent_slab_get(extent)) {
 		extent_interior_deregister(tsdn, rtree_ctx, extent);
 		extent_slab_set(extent, false);
@@ -962,7 +963,7 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena,
 	if (leadsize != 0) {
 		*lead = *extent;
 		*extent = extent_split_impl(tsdn, arena, r_extent_hooks,
-		    *lead, leadsize, NSIZES, false, esize + trailsize, szind,
+		    *lead, leadsize, SC_NSIZES, false, esize + trailsize, szind,
 		    slab, growing_retained);
 		if (*extent == NULL) {
 			*to_leak = *lead;
@@ -974,7 +975,7 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena,
 	/* Split the trail. */
 	if (trailsize != 0) {
 		*trail = extent_split_impl(tsdn, arena, r_extent_hooks, *extent,
-		    esize, szind, slab, trailsize, NSIZES, false,
+		    esize, szind, slab, trailsize, SC_NSIZES, false,
 		    growing_retained);
 		if (*trail == NULL) {
 			*to_leak = *extent;
@@ -991,7 +992,7 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena,
 		 * splitting occurred.
 		 */
 		extent_szind_set(*extent, szind);
-		if (szind != NSIZES) {
+		if (szind != SC_NSIZES) {
 			rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx,
 			    (uintptr_t)extent_addr_get(*extent), szind, slab);
 			if (slab && extent_size_get(*extent) > PAGE) {
@@ -1248,11 +1249,13 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	size_t alloc_size = sz_pind2sz(arena->extent_grow_next + egn_skip);
 	while (alloc_size < alloc_size_min) {
 		egn_skip++;
-		if (arena->extent_grow_next + egn_skip == NPSIZES) {
+		if (arena->extent_grow_next + egn_skip ==
+		    sc_data_global.npsizes) {
 			/* Outside legal range. */
 			goto label_err;
 		}
-		assert(arena->extent_grow_next + egn_skip < NPSIZES);
+		assert(arena->extent_grow_next + egn_skip
+		    < sc_data_global.npsizes);
 		alloc_size = sz_pind2sz(arena->extent_grow_next + egn_skip);
 	}
 
@@ -1275,7 +1278,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 		extent_hook_post_reentrancy(tsdn);
 	}
 
-	extent_init(extent, arena, ptr, alloc_size, false, NSIZES,
+	extent_init(extent, arena, ptr, alloc_size, false, SC_NSIZES,
 	    arena_extent_sn_next(arena), extent_state_active, zeroed,
 	    committed, true);
 	if (ptr == NULL) {
@@ -1610,7 +1613,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	malloc_mutex_lock(tsdn, &extents->mtx);
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 
-	extent_szind_set(extent, NSIZES);
+	extent_szind_set(extent, SC_NSIZES);
 	if (extent_slab_get(extent)) {
 		extent_interior_deregister(tsdn, rtree_ctx, extent);
 		extent_slab_set(extent, false);
@@ -1622,7 +1625,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	if (!extents->delay_coalesce) {
 		extent = extent_try_coalesce(tsdn, arena, r_extent_hooks,
 		    rtree_ctx, extents, extent, NULL, growing_retained);
-	} else if (extent_size_get(extent) >= LARGE_MINCLASS) {
+	} else if (extent_size_get(extent) >= sc_data_global.large_minclass) {
 		/* Always coalesce large extents eagerly. */
 		bool coalesced;
 		size_t prev_size;
@@ -1633,7 +1636,8 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 			    r_extent_hooks, rtree_ctx, extents, extent,
 			    &coalesced, growing_retained);
 		} while (coalesced &&
-		    extent_size_get(extent) >= prev_size + LARGE_MINCLASS);
+		    extent_size_get(extent)
+		    >= prev_size + sc_data_global.large_minclass);
 	}
 	extent_deactivate_locked(tsdn, arena, extents, extent);
 
@@ -2132,22 +2136,23 @@ extent_merge_impl(tsdn_t *tsdn, arena_t *arena,
 
 	if (a_elm_b != NULL) {
 		rtree_leaf_elm_write(tsdn, &extents_rtree, a_elm_b, NULL,
-		    NSIZES, false);
+		    SC_NSIZES, false);
 	}
 	if (b_elm_b != NULL) {
 		rtree_leaf_elm_write(tsdn, &extents_rtree, b_elm_a, NULL,
-		    NSIZES, false);
+		    SC_NSIZES, false);
 	} else {
 		b_elm_b = b_elm_a;
 	}
 
 	extent_size_set(a, extent_size_get(a) + extent_size_get(b));
-	extent_szind_set(a, NSIZES);
+	extent_szind_set(a, SC_NSIZES);
 	extent_sn_set(a, (extent_sn_get(a) < extent_sn_get(b)) ?
 	    extent_sn_get(a) : extent_sn_get(b));
 	extent_zeroed_set(a, extent_zeroed_get(a) && extent_zeroed_get(b));
 
-	extent_rtree_write_acquired(tsdn, a_elm_a, b_elm_b, a, NSIZES, false);
+	extent_rtree_write_acquired(tsdn, a_elm_a, b_elm_b, a, SC_NSIZES,
+	    false);
 
 	extent_unlock2(tsdn, a, b);
 
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 2b1ea9ca..6c56cf65 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -154,7 +154,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			    (uintptr_t)gap_addr_page;
 			if (gap_size_page != 0) {
 				extent_init(gap, arena, gap_addr_page,
-				    gap_size_page, false, NSIZES,
+				    gap_size_page, false, SC_NSIZES,
 				    arena_extent_sn_next(arena),
 				    extent_state_active, false, true, true);
 			}
@@ -198,7 +198,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					extent_t extent;
 
 					extent_init(&extent, arena, ret, size,
-					    size, false, NSIZES,
+					    size, false, SC_NSIZES,
 					    extent_state_active, false, true,
 					    true);
 					if (extent_purge_forced_wrapper(tsdn,
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 82c08877..664c5f89 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -13,7 +13,7 @@
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/rtree.h"
-#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/spin.h"
 #include "jemalloc/internal/sz.h"
 #include "jemalloc/internal/ticker.h"
@@ -1158,7 +1158,8 @@ malloc_conf_init(void) {
 			/* Experimental feature.  Will be documented later.*/
 			CONF_HANDLE_SIZE_T(opt_huge_threshold,
 			    "experimental_huge_threshold",
-			    LARGE_MINCLASS, LARGE_MAXCLASS, yes, yes, false)
+			    sc_data_global.large_minclass,
+			    sc_data_global.large_maxclass, yes, yes, false)
 			CONF_HANDLE_SIZE_T(opt_lg_extent_max_active_fit,
 			    "lg_extent_max_active_fit", 0,
 			    (sizeof(size_t) << 3), yes, yes, false)
@@ -1294,6 +1295,10 @@ static bool
 malloc_init_hard_a0_locked() {
 	malloc_initializer = INITIALIZER;
 
+	sc_boot();
+	sz_boot(&sc_data_global);
+	bin_boot(&sc_data_global);
+
 	if (config_prof) {
 		prof_boot0();
 	}
@@ -1747,12 +1752,13 @@ imalloc_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
 	szind_t ind_large;
 	size_t bumped_usize = usize;
 
-	if (usize <= SMALL_MAXCLASS) {
-		assert(((dopts->alignment == 0) ? sz_s2u(LARGE_MINCLASS) :
-		    sz_sa2u(LARGE_MINCLASS, dopts->alignment))
-		    == LARGE_MINCLASS);
-		ind_large = sz_size2index(LARGE_MINCLASS);
-		bumped_usize = sz_s2u(LARGE_MINCLASS);
+	if (usize <= sc_data_global.small_maxclass) {
+		assert(((dopts->alignment == 0) ?
+		    sz_s2u(sc_data_global.large_minclass) :
+		    sz_sa2u(sc_data_global.large_minclass, dopts->alignment))
+			== sc_data_global.large_minclass);
+		ind_large = sz_size2index(sc_data_global.large_minclass);
+		bumped_usize = sz_s2u(sc_data_global.large_minclass);
 		ret = imalloc_no_sample(sopts, dopts, tsd, bumped_usize,
 		    bumped_usize, ind_large);
 		if (unlikely(ret == NULL)) {
@@ -1855,16 +1861,18 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 
 	if (dopts->alignment == 0) {
 		ind = sz_size2index(size);
-		if (unlikely(ind >= NSIZES)) {
+		if (unlikely(ind >= SC_NSIZES)) {
 			goto label_oom;
 		}
 		if (config_stats || (config_prof && opt_prof)) {
 			usize = sz_index2size(ind);
-			assert(usize > 0 && usize <= LARGE_MAXCLASS);
+			assert(usize > 0 && usize
+			    <= sc_data_global.large_maxclass);
 		}
 	} else {
 		usize = sz_sa2u(size, dopts->alignment);
-		if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
+		if (unlikely(usize == 0
+		    || usize > sc_data_global.large_maxclass)) {
 			goto label_oom;
 		}
 	}
@@ -1900,7 +1908,8 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 
 		alloc_ctx_t alloc_ctx;
 		if (likely((uintptr_t)tctx == (uintptr_t)1U)) {
-			alloc_ctx.slab = (usize <= SMALL_MAXCLASS);
+			alloc_ctx.slab = (usize
+			    <= sc_data_global.small_maxclass);
 			allocation = imalloc_no_sample(
 			    sopts, dopts, tsd, usize, usize, ind);
 		} else if ((uintptr_t)tctx > (uintptr_t)1U) {
@@ -2198,9 +2207,9 @@ irealloc_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize,
 	if (tctx == NULL) {
 		return NULL;
 	}
-	if (usize <= SMALL_MAXCLASS) {
-		p = iralloc(tsd, old_ptr, old_usize, LARGE_MINCLASS, 0, false,
-		    hook_args);
+	if (usize <= sc_data_global.small_maxclass) {
+		p = iralloc(tsd, old_ptr, old_usize,
+		    sc_data_global.large_minclass, 0, false, hook_args);
 		if (p == NULL) {
 			return NULL;
 		}
@@ -2257,7 +2266,7 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
 	rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
 	    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
-	assert(alloc_ctx.szind != NSIZES);
+	assert(alloc_ctx.szind != SC_NSIZES);
 
 	size_t usize;
 	if (config_prof && opt_prof) {
@@ -2384,12 +2393,13 @@ je_realloc(void *ptr, size_t arg_size) {
 		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
 		rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
 		    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
-		assert(alloc_ctx.szind != NSIZES);
+		assert(alloc_ctx.szind != SC_NSIZES);
 		old_usize = sz_index2size(alloc_ctx.szind);
 		assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
 		if (config_prof && opt_prof) {
 			usize = sz_s2u(size);
-			if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
+			if (unlikely(usize == 0
+			    || usize > sc_data_global.large_maxclass)) {
 				ret = NULL;
 			} else {
 				ret = irealloc_prof(tsd, ptr, old_usize, usize,
@@ -2702,9 +2712,10 @@ irallocx_prof_sample(tsdn_t *tsdn, void *old_ptr, size_t old_usize,
 	if (tctx == NULL) {
 		return NULL;
 	}
-	if (usize <= SMALL_MAXCLASS) {
-		p = iralloct(tsdn, old_ptr, old_usize, LARGE_MINCLASS,
-		    alignment, zero, tcache, arena, hook_args);
+	if (usize <= sc_data_global.small_maxclass) {
+		p = iralloct(tsdn, old_ptr, old_usize,
+		    sc_data_global.large_minclass, alignment, zero, tcache,
+		    arena, hook_args);
 		if (p == NULL) {
 			return NULL;
 		}
@@ -2804,7 +2815,7 @@ je_rallocx(void *ptr, size_t size, int flags) {
 	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
 	rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
 	    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
-	assert(alloc_ctx.szind != NSIZES);
+	assert(alloc_ctx.szind != SC_NSIZES);
 	old_usize = sz_index2size(alloc_ctx.szind);
 	assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
 
@@ -2813,7 +2824,8 @@ je_rallocx(void *ptr, size_t size, int flags) {
 	if (config_prof && opt_prof) {
 		usize = (alignment == 0) ?
 		    sz_s2u(size) : sz_sa2u(size, alignment);
-		if (unlikely(usize == 0 || usize > LARGE_MAXCLASS)) {
+		if (unlikely(usize == 0
+		    || usize > sc_data_global.large_maxclass)) {
 			goto label_oom;
 		}
 		p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize,
@@ -2898,17 +2910,19 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 	 */
 	if (alignment == 0) {
 		usize_max = sz_s2u(size+extra);
-		assert(usize_max > 0 && usize_max <= LARGE_MAXCLASS);
+		assert(usize_max > 0
+		    && usize_max <= sc_data_global.large_maxclass);
 	} else {
 		usize_max = sz_sa2u(size+extra, alignment);
-		if (unlikely(usize_max == 0 || usize_max > LARGE_MAXCLASS)) {
+		if (unlikely(usize_max == 0
+		    || usize_max > sc_data_global.large_maxclass)) {
 			/*
 			 * usize_max is out of range, and chances are that
 			 * allocation will fail, but use the maximum possible
 			 * value and carry on with prof_alloc_prep(), just in
 			 * case allocation succeeds.
 			 */
-			usize_max = LARGE_MAXCLASS;
+			usize_max = sc_data_global.large_maxclass;
 		}
 	}
 	tctx = prof_alloc_prep(tsd, usize_max, prof_active, false);
@@ -2951,24 +2965,24 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
 	rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
 	    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
-	assert(alloc_ctx.szind != NSIZES);
+	assert(alloc_ctx.szind != SC_NSIZES);
 	old_usize = sz_index2size(alloc_ctx.szind);
 	assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
 	/*
 	 * The API explicitly absolves itself of protecting against (size +
 	 * extra) numerical overflow, but we may need to clamp extra to avoid
-	 * exceeding LARGE_MAXCLASS.
+	 * exceeding sc_data_global.large_maxclass.
 	 *
 	 * Ordinarily, size limit checking is handled deeper down, but here we
 	 * have to check as part of (size + extra) clamping, since we need the
 	 * clamped value in the above helper functions.
 	 */
-	if (unlikely(size > LARGE_MAXCLASS)) {
+	if (unlikely(size > sc_data_global.large_maxclass)) {
 		usize = old_usize;
 		goto label_not_resized;
 	}
-	if (unlikely(LARGE_MAXCLASS - size < extra)) {
-		extra = LARGE_MAXCLASS - size;
+	if (unlikely(sc_data_global.large_maxclass - size < extra)) {
+		extra = sc_data_global.large_maxclass - size;
 	}
 
 	if (config_prof && opt_prof) {
@@ -3155,7 +3169,7 @@ je_nallocx(size_t size, int flags) {
 	check_entry_exit_locking(tsdn);
 
 	usize = inallocx(tsdn, size, flags);
-	if (unlikely(usize > LARGE_MAXCLASS)) {
+	if (unlikely(usize > sc_data_global.large_maxclass)) {
 		LOG("core.nallocx.exit", "result: %zu", ZU(0));
 		return 0;
 	}
diff --git a/src/large.c b/src/large.c
index 03eecfad..87d9ec0b 100644
--- a/src/large.c
+++ b/src/large.c
@@ -28,7 +28,7 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	assert(!tsdn_null(tsdn) || arena != NULL);
 
 	ausize = sz_sa2u(usize, alignment);
-	if (unlikely(ausize == 0 || ausize > LARGE_MAXCLASS)) {
+	if (unlikely(ausize == 0 || ausize > sc_data_global.large_maxclass)) {
 		return NULL;
 	}
 
@@ -109,7 +109,7 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize) {
 	if (diff != 0) {
 		extent_t *trail = extent_split_wrapper(tsdn, arena,
 		    &extent_hooks, extent, usize + sz_large_pad,
-		    sz_size2index(usize), false, diff, NSIZES, false);
+		    sz_size2index(usize), false, diff, SC_NSIZES, false);
 		if (trail == NULL) {
 			return true;
 		}
@@ -154,17 +154,17 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 	bool new_mapping;
 	if ((trail = extents_alloc(tsdn, arena, &extent_hooks,
 	    &arena->extents_dirty, extent_past_get(extent), trailsize, 0,
-	    CACHELINE, false, NSIZES, &is_zeroed_trail, &commit)) != NULL
+	    CACHELINE, false, SC_NSIZES, &is_zeroed_trail, &commit)) != NULL
 	    || (trail = extents_alloc(tsdn, arena, &extent_hooks,
 	    &arena->extents_muzzy, extent_past_get(extent), trailsize, 0,
-	    CACHELINE, false, NSIZES, &is_zeroed_trail, &commit)) != NULL) {
+	    CACHELINE, false, SC_NSIZES, &is_zeroed_trail, &commit)) != NULL) {
 		if (config_stats) {
 			new_mapping = false;
 		}
 	} else {
 		if ((trail = extent_alloc_wrapper(tsdn, arena, &extent_hooks,
 		    extent_past_get(extent), trailsize, 0, CACHELINE, false,
-		    NSIZES, &is_zeroed_trail, &commit)) == NULL) {
+		    SC_NSIZES, &is_zeroed_trail, &commit)) == NULL) {
 			return true;
 		}
 		if (config_stats) {
@@ -221,9 +221,10 @@ large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
 	size_t oldusize = extent_usize_get(extent);
 
 	/* The following should have been caught by callers. */
-	assert(usize_min > 0 && usize_max <= LARGE_MAXCLASS);
+	assert(usize_min > 0 && usize_max <= sc_data_global.large_maxclass);
 	/* Both allocation sizes must be large to avoid a move. */
-	assert(oldusize >= LARGE_MINCLASS && usize_max >= LARGE_MINCLASS);
+	assert(oldusize >= sc_data_global.large_minclass
+	    && usize_max >= sc_data_global.large_minclass);
 
 	if (usize_max > oldusize) {
 		/* Attempt to expand the allocation in-place. */
@@ -277,9 +278,10 @@ large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
 
 	size_t oldusize = extent_usize_get(extent);
 	/* The following should have been caught by callers. */
-	assert(usize > 0 && usize <= LARGE_MAXCLASS);
+	assert(usize > 0 && usize <= sc_data_global.large_maxclass);
 	/* Both allocation sizes must be large to avoid a move. */
-	assert(oldusize >= LARGE_MINCLASS && usize >= LARGE_MINCLASS);
+	assert(oldusize >= sc_data_global.large_minclass
+	    && usize >= sc_data_global.large_minclass);
 
 	/* Try to avoid moving the allocation. */
 	if (!large_ralloc_no_move(tsdn, extent, usize, usize, zero)) {
diff --git a/src/sc.c b/src/sc.c
new file mode 100644
index 00000000..943d7875
--- /dev/null
+++ b/src/sc.c
@@ -0,0 +1,62 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/sc.h"
+#include "jemalloc/internal/size_classes.h"
+
+sc_data_t sc_data_global;
+
+static void
+fill_sc(sc_data_t *data, int index, int lg_base, int lg_delta, int ndelta,
+    bool psz, bool bin, int pgs, int lg_delta_lookup) {
+	sc_t *sc = &data->sc[index];
+	sc->index = index;
+	sc->lg_base = lg_base;
+	sc->lg_delta = lg_delta;
+	sc->ndelta = ndelta;
+	sc->psz = psz;
+	sc->bin = bin;
+	sc->pgs = pgs;
+	sc->lg_delta_lookup = lg_delta_lookup;
+}
+
+void
+sc_data_init(sc_data_t *data) {
+	assert(SC_NTINY == NTBINS);
+	assert(SC_NSIZES == NSIZES);
+	assert(SC_NBINS == NBINS);
+	assert(NPSIZES <= SC_NPSIZES_MAX);
+	assert(!data->initialized);
+	data->initialized = true;
+	data->ntiny = NTBINS;
+	data->nlbins = NLBINS;
+	data->nbins = NBINS;
+	data->nsizes = NSIZES;
+	data->lg_ceil_nsizes = LG_CEIL_NSIZES;
+	data->npsizes = NPSIZES;
+#if SC_NTINY != 0
+	data->lg_tiny_maxclass = LG_TINY_MAXCLASS;
+#else
+	data->lg_tiny_maxclass = -1;
+#endif
+	data->lookup_maxclass = LOOKUP_MAXCLASS;
+	data->small_maxclass = SMALL_MAXCLASS;
+	data->lg_large_minclass = LG_LARGE_MINCLASS;
+	data->large_minclass = LARGE_MINCLASS;
+	data->large_maxclass = LARGE_MAXCLASS;
+#define no 0
+#define yes 1
+#define SC(index, lg_base_base, lg_delta, ndelta, psz, bin, pgs,	\
+    lg_delta_lookup)							\
+	fill_sc(data, index, lg_base_base, lg_delta, ndelta, psz, bin, 	\
+	    pgs, lg_delta_lookup);
+	SIZE_CLASSES
+#undef no
+#undef yes
+#undef SC
+}
+
+void
+sc_boot() {
+	sc_data_init(&sc_data_global);
+}
diff --git a/src/sz.c b/src/sz.c
index 9de77e45..e038728e 100644
--- a/src/sz.c
+++ b/src/sz.c
@@ -2,106 +2,60 @@
 #include "jemalloc/internal/sz.h"
 
 JEMALLOC_ALIGNED(CACHELINE)
-const size_t sz_pind2sz_tab[NPSIZES+1] = {
-#define PSZ_yes(lg_grp, ndelta, lg_delta)				\
-	(((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta))),
-#define PSZ_no(lg_grp, ndelta, lg_delta)
-#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup) \
-	PSZ_##psz(lg_grp, ndelta, lg_delta)
-	SIZE_CLASSES
-#undef PSZ_yes
-#undef PSZ_no
-#undef SC
-	(LARGE_MAXCLASS + PAGE)
-};
+size_t sz_pind2sz_tab[SC_NPSIZES_MAX+1];
+
+static void
+sz_boot_pind2sz_tab(const sc_data_t *sc_data) {
+	int pind = 0;
+	for (unsigned i = 0; i < SC_NSIZES; i++) {
+		const sc_t *sc = &sc_data->sc[i];
+		if (sc->psz) {
+			sz_pind2sz_tab[pind] = (ZU(1) << sc->lg_base)
+			    + (ZU(sc->ndelta) << sc->lg_delta);
+			pind++;
+		}
+	}
+	sz_pind2sz_tab[pind] = sc_data->large_maxclass + PAGE;
+}
 
 JEMALLOC_ALIGNED(CACHELINE)
-const size_t sz_index2size_tab[NSIZES] = {
-#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup) \
-	((ZU(1)<<lg_grp) + (ZU(ndelta)<<lg_delta)),
-	SIZE_CLASSES
-#undef SC
-};
+size_t sz_index2size_tab[SC_NSIZES];
 
+static void
+sz_boot_index2size_tab(const sc_data_t *sc_data) {
+	for (unsigned i = 0; i < SC_NSIZES; i++) {
+		const sc_t *sc = &sc_data->sc[i];
+		sz_index2size_tab[i] = (ZU(1) << sc->lg_base)
+		    + (ZU(sc->ndelta) << (sc->lg_delta));
+	}
+}
+
+/*
+ * To keep this table small, we divide sizes by the tiny min size, which gives
+ * the smallest interval for which the result can change.
+ */
 JEMALLOC_ALIGNED(CACHELINE)
-const uint8_t sz_size2index_tab[] = {
-#if LG_TINY_MIN == 0
-/* The div module doesn't support division by 1. */
-#error "Unsupported LG_TINY_MIN"
-#define S2B_0(i)	i,
-#elif LG_TINY_MIN == 1
-#warning "Dangerous LG_TINY_MIN"
-#define S2B_1(i)	i,
-#elif LG_TINY_MIN == 2
-#warning "Dangerous LG_TINY_MIN"
-#define S2B_2(i)	i,
-#elif LG_TINY_MIN == 3
-#define S2B_3(i)	i,
-#elif LG_TINY_MIN == 4
-#define S2B_4(i)	i,
-#elif LG_TINY_MIN == 5
-#define S2B_5(i)	i,
-#elif LG_TINY_MIN == 6
-#define S2B_6(i)	i,
-#elif LG_TINY_MIN == 7
-#define S2B_7(i)	i,
-#elif LG_TINY_MIN == 8
-#define S2B_8(i)	i,
-#elif LG_TINY_MIN == 9
-#define S2B_9(i)	i,
-#elif LG_TINY_MIN == 10
-#define S2B_10(i)	i,
-#elif LG_TINY_MIN == 11
-#define S2B_11(i)	i,
-#else
-#error "Unsupported LG_TINY_MIN"
-#endif
-#if LG_TINY_MIN < 1
-#define S2B_1(i)	S2B_0(i) S2B_0(i)
-#endif
-#if LG_TINY_MIN < 2
-#define S2B_2(i)	S2B_1(i) S2B_1(i)
-#endif
-#if LG_TINY_MIN < 3
-#define S2B_3(i)	S2B_2(i) S2B_2(i)
-#endif
-#if LG_TINY_MIN < 4
-#define S2B_4(i)	S2B_3(i) S2B_3(i)
-#endif
-#if LG_TINY_MIN < 5
-#define S2B_5(i)	S2B_4(i) S2B_4(i)
-#endif
-#if LG_TINY_MIN < 6
-#define S2B_6(i)	S2B_5(i) S2B_5(i)
-#endif
-#if LG_TINY_MIN < 7
-#define S2B_7(i)	S2B_6(i) S2B_6(i)
-#endif
-#if LG_TINY_MIN < 8
-#define S2B_8(i)	S2B_7(i) S2B_7(i)
-#endif
-#if LG_TINY_MIN < 9
-#define S2B_9(i)	S2B_8(i) S2B_8(i)
-#endif
-#if LG_TINY_MIN < 10
-#define S2B_10(i)	S2B_9(i) S2B_9(i)
-#endif
-#if LG_TINY_MIN < 11
-#define S2B_11(i)	S2B_10(i) S2B_10(i)
-#endif
-#define S2B_no(i)
-#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup) \
-	S2B_##lg_delta_lookup(index)
-	SIZE_CLASSES
-#undef S2B_3
-#undef S2B_4
-#undef S2B_5
-#undef S2B_6
-#undef S2B_7
-#undef S2B_8
-#undef S2B_9
-#undef S2B_10
-#undef S2B_11
-#undef S2B_no
-#undef SC
-};
+uint8_t sz_size2index_tab[SC_LOOKUP_MAXCLASS >> SC_LG_TINY_MIN];
+
+static void
+sz_boot_size2index_tab(const sc_data_t *sc_data) {
+	size_t dst_max = (SC_LOOKUP_MAXCLASS >> SC_LG_TINY_MIN);
+	size_t dst_ind = 0;
+	for (unsigned sc_ind = 0; sc_ind < SC_NSIZES && dst_ind < dst_max;
+	    sc_ind++) {
+		const sc_t *sc = &sc_data->sc[sc_ind];
+		size_t sz = (ZU(1) << sc->lg_base)
+		    + (ZU(sc->ndelta) << sc->lg_delta);
+		size_t max_ind = ((sz - 1) >> SC_LG_TINY_MIN);
+		for (; dst_ind <= max_ind && dst_ind < dst_max; dst_ind++) {
+			sz_size2index_tab[dst_ind] = sc_ind;
+		}
+	}
+}
+
+void
+sz_boot(const sc_data_t *sc_data) {
+	sz_boot_pind2sz_tab(sc_data);
+	sz_boot_index2size_tab(sc_data);
+	sz_boot_size2index_tab(sc_data);
+}
diff --git a/src/tcache.c b/src/tcache.c
index d624d924..edd047ab 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -4,7 +4,7 @@
 
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/size_classes.h"
+#include "jemalloc/internal/sc.h"
 
 /******************************************************************************/
 /* Data. */
@@ -41,7 +41,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 	szind_t binind = tcache->next_gc_bin;
 
 	cache_bin_t *tbin;
-	if (binind < NBINS) {
+	if (binind < SC_NBINS) {
 		tbin = tcache_small_bin_get(tcache, binind);
 	} else {
 		tbin = tcache_large_bin_get(tcache, binind);
@@ -50,7 +50,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 		/*
 		 * Flush (ceiling) 3/4 of the objects below the low water mark.
 		 */
-		if (binind < NBINS) {
+		if (binind < SC_NBINS) {
 			tcache_bin_flush_small(tsd, tcache, tbin, binind,
 			    tbin->ncached - tbin->low_water + (tbin->low_water
 			    >> 2));
@@ -72,7 +72,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 		 * Increase fill count by 2X for small bins.  Make sure
 		 * lg_fill_div stays greater than 0.
 		 */
-		if (binind < NBINS && tcache->lg_fill_div[binind] > 1) {
+		if (binind < SC_NBINS && tcache->lg_fill_div[binind] > 1) {
 			tcache->lg_fill_div[binind]--;
 		}
 	}
@@ -105,7 +105,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
     szind_t binind, unsigned rem) {
 	bool merged_stats = false;
 
-	assert(binind < NBINS);
+	assert(binind < SC_NBINS);
 	assert((cache_bin_sz_t)rem <= tbin->ncached);
 
 	arena_t *arena = tcache->arena;
@@ -369,10 +369,10 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
 
 	size_t stack_offset = 0;
 	assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
-	memset(tcache->bins_small, 0, sizeof(cache_bin_t) * NBINS);
-	memset(tcache->bins_large, 0, sizeof(cache_bin_t) * (nhbins - NBINS));
+	memset(tcache->bins_small, 0, sizeof(cache_bin_t) * SC_NBINS);
+	memset(tcache->bins_large, 0, sizeof(cache_bin_t) * (nhbins - SC_NBINS));
 	unsigned i = 0;
-	for (; i < NBINS; i++) {
+	for (; i < SC_NBINS; i++) {
 		tcache->lg_fill_div[i] = 1;
 		stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
 		/*
@@ -464,7 +464,7 @@ static void
 tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
 	assert(tcache->arena != NULL);
 
-	for (unsigned i = 0; i < NBINS; i++) {
+	for (unsigned i = 0; i < SC_NBINS; i++) {
 		cache_bin_t *tbin = tcache_small_bin_get(tcache, i);
 		tcache_bin_flush_small(tsd, tcache, tbin, i, 0);
 
@@ -472,7 +472,7 @@ tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
 			assert(tbin->tstats.nrequests == 0);
 		}
 	}
-	for (unsigned i = NBINS; i < nhbins; i++) {
+	for (unsigned i = SC_NBINS; i < nhbins; i++) {
 		cache_bin_t *tbin = tcache_large_bin_get(tcache, i);
 		tcache_bin_flush_large(tsd, tbin, i, 0, tcache);
 
@@ -538,7 +538,7 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 	cassert(config_stats);
 
 	/* Merge and reset tcache stats. */
-	for (i = 0; i < NBINS; i++) {
+	for (i = 0; i < SC_NBINS; i++) {
 		bin_t *bin = &arena->bins[i];
 		cache_bin_t *tbin = tcache_small_bin_get(tcache, i);
 		malloc_mutex_lock(tsdn, &bin->lock);
@@ -658,8 +658,8 @@ bool
 tcache_boot(tsdn_t *tsdn) {
 	/* If necessary, clamp opt_lg_tcache_max. */
 	if (opt_lg_tcache_max < 0 || (ZU(1) << opt_lg_tcache_max) <
-	    SMALL_MAXCLASS) {
-		tcache_maxclass = SMALL_MAXCLASS;
+	    sc_data_global.small_maxclass) {
+		tcache_maxclass = sc_data_global.small_maxclass;
 	} else {
 		tcache_maxclass = (ZU(1) << opt_lg_tcache_max);
 	}
@@ -679,7 +679,7 @@ tcache_boot(tsdn_t *tsdn) {
 	}
 	stack_nelms = 0;
 	unsigned i;
-	for (i = 0; i < NBINS; i++) {
+	for (i = 0; i < SC_NBINS; i++) {
 		if ((bin_infos[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) {
 			tcache_bin_info[i].ncached_max =
 			    TCACHE_NSLOTS_SMALL_MIN;
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index c1ccb097..96b042dd 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -77,7 +77,7 @@ vsalloc(tsdn_t *tsdn, const void *ptr) {
 		return 0;
 	}
 
-	if (szind == NSIZES) {
+	if (szind == SC_NSIZES) {
 		return 0;
 	}
 
diff --git a/test/unit/junk.c b/test/unit/junk.c
index 243ced41..91c6e5b1 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -123,13 +123,14 @@ test_junk(size_t sz_min, size_t sz_max) {
 
 TEST_BEGIN(test_junk_small) {
 	test_skip_if(!config_fill);
-	test_junk(1, SMALL_MAXCLASS-1);
+	test_junk(1, sc_data_global.small_maxclass - 1);
 }
 TEST_END
 
 TEST_BEGIN(test_junk_large) {
 	test_skip_if(!config_fill);
-	test_junk(SMALL_MAXCLASS+1, (1U << (LG_LARGE_MINCLASS+1)));
+	test_junk(sc_data_global.small_maxclass + 1,
+	    (1U << (sc_data_global.lg_large_minclass + 1)));
 }
 TEST_END
 
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index d64b4019..230ecb0e 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -581,7 +581,7 @@ TEST_BEGIN(test_arena_i_retain_grow_limit) {
 
 	assert_d_eq(mallctlbymib(mib, miblen, &default_limit, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
-	assert_zu_eq(default_limit, sz_pind2sz(EXTENT_GROW_MAX_PIND),
+	assert_zu_eq(default_limit, sz_pind2sz(sc_data_global.npsizes - 1),
 	    "Unexpected default for retain_grow_limit");
 
 	new_limit = PAGE - 1;
@@ -686,8 +686,8 @@ TEST_BEGIN(test_arenas_constants) {
 
 	TEST_ARENAS_CONSTANT(size_t, quantum, QUANTUM);
 	TEST_ARENAS_CONSTANT(size_t, page, PAGE);
-	TEST_ARENAS_CONSTANT(unsigned, nbins, NBINS);
-	TEST_ARENAS_CONSTANT(unsigned, nlextents, NSIZES - NBINS);
+	TEST_ARENAS_CONSTANT(unsigned, nbins, SC_NBINS);
+	TEST_ARENAS_CONSTANT(unsigned, nlextents, SC_NSIZES - SC_NBINS);
 
 #undef TEST_ARENAS_CONSTANT
 }
@@ -720,7 +720,8 @@ TEST_BEGIN(test_arenas_lextent_constants) {
 	assert_zu_eq(name, expected, "Incorrect "#name" size");		\
 } while (0)
 
-	TEST_ARENAS_LEXTENT_CONSTANT(size_t, size, LARGE_MINCLASS);
+	TEST_ARENAS_LEXTENT_CONSTANT(size_t, size,
+	    sc_data_global.large_minclass);
 
 #undef TEST_ARENAS_LEXTENT_CONSTANT
 }
diff --git a/test/unit/prof_gdump.c b/test/unit/prof_gdump.c
index fcb434cb..0b8d7c34 100644
--- a/test/unit/prof_gdump.c
+++ b/test/unit/prof_gdump.c
@@ -29,12 +29,12 @@ TEST_BEGIN(test_gdump) {
 	prof_dump_open = prof_dump_open_intercept;
 
 	did_prof_dump_open = false;
-	p = mallocx((1U << LG_LARGE_MINCLASS), 0);
+	p = mallocx((1U << sc_data_global.lg_large_minclass), 0);
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
 	assert_true(did_prof_dump_open, "Expected a profile dump");
 
 	did_prof_dump_open = false;
-	q = mallocx((1U << LG_LARGE_MINCLASS), 0);
+	q = mallocx((1U << sc_data_global.lg_large_minclass), 0);
 	assert_ptr_not_null(q, "Unexpected mallocx() failure");
 	assert_true(did_prof_dump_open, "Expected a profile dump");
 
@@ -45,7 +45,7 @@ TEST_BEGIN(test_gdump) {
 	    "Unexpected mallctl failure while disabling prof.gdump");
 	assert(gdump_old);
 	did_prof_dump_open = false;
-	r = mallocx((1U << LG_LARGE_MINCLASS), 0);
+	r = mallocx((1U << sc_data_global.lg_large_minclass), 0);
 	assert_ptr_not_null(q, "Unexpected mallocx() failure");
 	assert_false(did_prof_dump_open, "Unexpected profile dump");
 
@@ -56,7 +56,7 @@ TEST_BEGIN(test_gdump) {
 	    "Unexpected mallctl failure while enabling prof.gdump");
 	assert(!gdump_old);
 	did_prof_dump_open = false;
-	s = mallocx((1U << LG_LARGE_MINCLASS), 0);
+	s = mallocx((1U << sc_data_global.lg_large_minclass), 0);
 	assert_ptr_not_null(q, "Unexpected mallocx() failure");
 	assert_true(did_prof_dump_open, "Expected a profile dump");
 
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 908100fa..4d1daf2c 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -85,10 +85,10 @@ TEST_END
 
 TEST_BEGIN(test_rtree_extrema) {
 	extent_t extent_a, extent_b;
-	extent_init(&extent_a, NULL, NULL, LARGE_MINCLASS, false,
-	    sz_size2index(LARGE_MINCLASS), 0, extent_state_active, false,
-	    false, true);
-	extent_init(&extent_b, NULL, NULL, 0, false, NSIZES, 0,
+	extent_init(&extent_a, NULL, NULL, sc_data_global.large_minclass, false,
+	    sz_size2index(sc_data_global.large_minclass), 0,
+	    extent_state_active, false, false, true);
+	extent_init(&extent_b, NULL, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, true);
 
 	tsdn_t *tsdn = tsdn_fetch();
@@ -125,7 +125,7 @@ TEST_BEGIN(test_rtree_bits) {
 	    PAGE + (((uintptr_t)1) << LG_PAGE) - 1};
 
 	extent_t extent;
-	extent_init(&extent, NULL, NULL, 0, false, NSIZES, 0,
+	extent_init(&extent, NULL, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, true);
 
 	rtree_t *rtree = &test_rtree;
@@ -135,7 +135,7 @@ TEST_BEGIN(test_rtree_bits) {
 
 	for (unsigned i = 0; i < sizeof(keys)/sizeof(uintptr_t); i++) {
 		assert_false(rtree_write(tsdn, rtree, &rtree_ctx, keys[i],
-		    &extent, NSIZES, false),
+		    &extent, SC_NSIZES, false),
 		    "Unexpected rtree_write() failure");
 		for (unsigned j = 0; j < sizeof(keys)/sizeof(uintptr_t); j++) {
 			assert_ptr_eq(rtree_extent_read(tsdn, rtree, &rtree_ctx,
@@ -166,7 +166,7 @@ TEST_BEGIN(test_rtree_random) {
 	rtree_ctx_data_init(&rtree_ctx);
 
 	extent_t extent;
-	extent_init(&extent, NULL, NULL, 0, false, NSIZES, 0,
+	extent_init(&extent, NULL, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, true);
 
 	assert_false(rtree_new(rtree, false), "Unexpected rtree_new() failure");
@@ -177,7 +177,8 @@ TEST_BEGIN(test_rtree_random) {
 		    &rtree_ctx, keys[i], false, true);
 		assert_ptr_not_null(elm,
 		    "Unexpected rtree_leaf_elm_lookup() failure");
-		rtree_leaf_elm_write(tsdn, rtree, elm, &extent, NSIZES, false);
+		rtree_leaf_elm_write(tsdn, rtree, elm, &extent, SC_NSIZES,
+		    false);
 		assert_ptr_eq(rtree_extent_read(tsdn, rtree, &rtree_ctx,
 		    keys[i], true), &extent,
 		    "rtree_extent_read() should return previously set value");
diff --git a/test/unit/size_classes.c b/test/unit/size_classes.c
index bcff5609..7c28e166 100644
--- a/test/unit/size_classes.c
+++ b/test/unit/size_classes.c
@@ -142,11 +142,11 @@ TEST_BEGIN(test_overflow) {
 	max_size_class = get_max_size_class();
 	max_psz = max_size_class + PAGE;
 
-	assert_u_eq(sz_size2index(max_size_class+1), NSIZES,
+	assert_u_eq(sz_size2index(max_size_class+1), SC_NSIZES,
 	    "sz_size2index() should return NSIZES on overflow");
-	assert_u_eq(sz_size2index(ZU(PTRDIFF_MAX)+1), NSIZES,
+	assert_u_eq(sz_size2index(ZU(PTRDIFF_MAX)+1), SC_NSIZES,
 	    "sz_size2index() should return NSIZES on overflow");
-	assert_u_eq(sz_size2index(SIZE_T_MAX), NSIZES,
+	assert_u_eq(sz_size2index(SIZE_T_MAX), SC_NSIZES,
 	    "sz_size2index() should return NSIZES on overflow");
 
 	assert_zu_eq(sz_s2u(max_size_class+1), 0,
@@ -156,13 +156,16 @@ TEST_BEGIN(test_overflow) {
 	assert_zu_eq(sz_s2u(SIZE_T_MAX), 0,
 	    "sz_s2u() should return 0 on overflow");
 
-	assert_u_eq(sz_psz2ind(max_size_class+1), NPSIZES,
+	assert_u_eq(sz_psz2ind(max_size_class+1), sc_data_global.npsizes,
 	    "sz_psz2ind() should return NPSIZES on overflow");
-	assert_u_eq(sz_psz2ind(ZU(PTRDIFF_MAX)+1), NPSIZES,
+	assert_u_eq(sz_psz2ind(ZU(PTRDIFF_MAX)+1), sc_data_global.npsizes,
 	    "sz_psz2ind() should return NPSIZES on overflow");
-	assert_u_eq(sz_psz2ind(SIZE_T_MAX), NPSIZES,
+	assert_u_eq(sz_psz2ind(SIZE_T_MAX), sc_data_global.npsizes,
 	    "sz_psz2ind() should return NPSIZES on overflow");
 
+	assert_u_le(sc_data_global.npsizes, SC_NPSIZES_MAX,
+	    "Dynamic value of npsizes is higher than static bound.");
+
 	assert_zu_eq(sz_psz2u(max_size_class+1), max_psz,
 	    "sz_psz2u() should return (LARGE_MAXCLASS + PAGE) for unsupported"
 	    " size");
diff --git a/test/unit/slab.c b/test/unit/slab.c
index 7e662aed..ef718821 100644
--- a/test/unit/slab.c
+++ b/test/unit/slab.c
@@ -3,7 +3,7 @@
 TEST_BEGIN(test_arena_slab_regind) {
 	szind_t binind;
 
-	for (binind = 0; binind < NBINS; binind++) {
+	for (binind = 0; binind < SC_NBINS; binind++) {
 		size_t regind;
 		extent_t slab;
 		const bin_info_t *bin_info = &bin_infos[binind];
diff --git a/test/unit/stats.c b/test/unit/stats.c
index 231010e4..8fe0f3ad 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -33,7 +33,7 @@ TEST_BEGIN(test_stats_large) {
 	size_t sz;
 	int expected = config_stats ? 0 : ENOENT;
 
-	p = mallocx(SMALL_MAXCLASS+1, MALLOCX_ARENA(0));
+	p = mallocx(sc_data_global.small_maxclass + 1, MALLOCX_ARENA(0));
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
 
 	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
@@ -74,9 +74,10 @@ TEST_BEGIN(test_stats_arenas_summary) {
 	uint64_t dirty_npurge, dirty_nmadvise, dirty_purged;
 	uint64_t muzzy_npurge, muzzy_nmadvise, muzzy_purged;
 
-	little = mallocx(SMALL_MAXCLASS, MALLOCX_ARENA(0));
+	little = mallocx(sc_data_global.small_maxclass, MALLOCX_ARENA(0));
 	assert_ptr_not_null(little, "Unexpected mallocx() failure");
-	large = mallocx((1U << LG_LARGE_MINCLASS), MALLOCX_ARENA(0));
+	large = mallocx((1U << sc_data_global.lg_large_minclass),
+	    MALLOCX_ARENA(0));
 	assert_ptr_not_null(large, "Unexpected mallocx() failure");
 
 	dallocx(little, 0);
@@ -148,7 +149,7 @@ TEST_BEGIN(test_stats_arenas_small) {
 
 	no_lazy_lock(); /* Lazy locking would dodge tcache testing. */
 
-	p = mallocx(SMALL_MAXCLASS, MALLOCX_ARENA(0));
+	p = mallocx(sc_data_global.small_maxclass, MALLOCX_ARENA(0));
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
 
 	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
@@ -191,7 +192,7 @@ TEST_BEGIN(test_stats_arenas_large) {
 	uint64_t epoch, nmalloc, ndalloc;
 	int expected = config_stats ? 0 : ENOENT;
 
-	p = mallocx((1U << LG_LARGE_MINCLASS), MALLOCX_ARENA(0));
+	p = mallocx((1U << sc_data_global.lg_large_minclass), MALLOCX_ARENA(0));
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
 
 	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
diff --git a/test/unit/zero.c b/test/unit/zero.c
index 553692ba..20a70628 100644
--- a/test/unit/zero.c
+++ b/test/unit/zero.c
@@ -41,13 +41,14 @@ test_zero(size_t sz_min, size_t sz_max) {
 
 TEST_BEGIN(test_zero_small) {
 	test_skip_if(!config_fill);
-	test_zero(1, SMALL_MAXCLASS-1);
+	test_zero(1, sc_data_global.small_maxclass - 1);
 }
 TEST_END
 
 TEST_BEGIN(test_zero_large) {
 	test_skip_if(!config_fill);
-	test_zero(SMALL_MAXCLASS+1, (1U << (LG_LARGE_MINCLASS+1)));
+	test_zero(sc_data_global.small_maxclass + 1,
+	    1U << (sc_data_global.lg_large_minclass + 1));
 }
 TEST_END
 

From 07b89c76736313159e952648a9df3bdcfe57eda2 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 18 Dec 2017 17:45:21 -0800
Subject: [PATCH 1156/2608] Move quantum detection into its own file.

This is logically fairly independent.
---
 .../internal/jemalloc_internal_types.h        | 75 +-----------------
 include/jemalloc/internal/quantum.h           | 77 +++++++++++++++++++
 2 files changed, 79 insertions(+), 73 deletions(-)
 create mode 100644 include/jemalloc/internal/quantum.h

diff --git a/include/jemalloc/internal/jemalloc_internal_types.h b/include/jemalloc/internal/jemalloc_internal_types.h
index 1b750b12..e296c5a7 100644
--- a/include/jemalloc/internal/jemalloc_internal_types.h
+++ b/include/jemalloc/internal/jemalloc_internal_types.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_TYPES_H
 #define JEMALLOC_INTERNAL_TYPES_H
 
+#include "jemalloc/internal/quantum.h"
+
 /* Page size index type. */
 typedef unsigned pszind_t;
 
@@ -50,79 +52,6 @@ typedef int malloc_cpuid_t;
 /* Smallest size class to support. */
 #define TINY_MIN		(1U << LG_TINY_MIN)
 
-/*
- * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size
- * classes).
- */
-#ifndef LG_QUANTUM
-#  if (defined(__i386__) || defined(_M_IX86))
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __ia64__
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __alpha__
-#    define LG_QUANTUM		4
-#  endif
-#  if (defined(__sparc64__) || defined(__sparcv9) || defined(__sparc_v9__))
-#    define LG_QUANTUM		4
-#  endif
-#  if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64))
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __arm__
-#    define LG_QUANTUM		3
-#  endif
-#  ifdef __aarch64__
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __hppa__
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __m68k__
-#    define LG_QUANTUM		3
-#  endif
-#  ifdef __mips__
-#    define LG_QUANTUM		3
-#  endif
-#  ifdef __nios2__
-#    define LG_QUANTUM		3
-#  endif
-#  ifdef __or1k__
-#    define LG_QUANTUM		3
-#  endif
-#  ifdef __powerpc__
-#    define LG_QUANTUM		4
-#  endif
-#  if defined(__riscv) || defined(__riscv__)
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __s390__
-#    define LG_QUANTUM		4
-#  endif
-#  if (defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || \
-	defined(__SH4_SINGLE_ONLY__))
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __tile__
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __le32__
-#    define LG_QUANTUM		4
-#  endif
-#  ifndef LG_QUANTUM
-#    error "Unknown minimum alignment for architecture; specify via "
-	 "--with-lg-quantum"
-#  endif
-#endif
-
-#define QUANTUM			((size_t)(1U << LG_QUANTUM))
-#define QUANTUM_MASK		(QUANTUM - 1)
-
-/* Return the smallest quantum multiple that is >= a. */
-#define QUANTUM_CEILING(a)						\
-	(((a) + QUANTUM_MASK) & ~QUANTUM_MASK)
-
 #define LONG			((size_t)(1U << LG_SIZEOF_LONG))
 #define LONG_MASK		(LONG - 1)
 
diff --git a/include/jemalloc/internal/quantum.h b/include/jemalloc/internal/quantum.h
new file mode 100644
index 00000000..821086e9
--- /dev/null
+++ b/include/jemalloc/internal/quantum.h
@@ -0,0 +1,77 @@
+#ifndef JEMALLOC_INTERNAL_QUANTUM_H
+#define JEMALLOC_INTERNAL_QUANTUM_H
+
+/*
+ * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size
+ * classes).
+ */
+#ifndef LG_QUANTUM
+#  if (defined(__i386__) || defined(_M_IX86))
+#    define LG_QUANTUM		4
+#  endif
+#  ifdef __ia64__
+#    define LG_QUANTUM		4
+#  endif
+#  ifdef __alpha__
+#    define LG_QUANTUM		4
+#  endif
+#  if (defined(__sparc64__) || defined(__sparcv9) || defined(__sparc_v9__))
+#    define LG_QUANTUM		4
+#  endif
+#  if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64))
+#    define LG_QUANTUM		4
+#  endif
+#  ifdef __arm__
+#    define LG_QUANTUM		3
+#  endif
+#  ifdef __aarch64__
+#    define LG_QUANTUM		4
+#  endif
+#  ifdef __hppa__
+#    define LG_QUANTUM		4
+#  endif
+#  ifdef __m68k__
+#    define LG_QUANTUM		3
+#  endif
+#  ifdef __mips__
+#    define LG_QUANTUM		3
+#  endif
+#  ifdef __nios2__
+#    define LG_QUANTUM		3
+#  endif
+#  ifdef __or1k__
+#    define LG_QUANTUM		3
+#  endif
+#  ifdef __powerpc__
+#    define LG_QUANTUM		4
+#  endif
+#  if defined(__riscv) || defined(__riscv__)
+#    define LG_QUANTUM		4
+#  endif
+#  ifdef __s390__
+#    define LG_QUANTUM		4
+#  endif
+#  if (defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || \
+	defined(__SH4_SINGLE_ONLY__))
+#    define LG_QUANTUM		4
+#  endif
+#  ifdef __tile__
+#    define LG_QUANTUM		4
+#  endif
+#  ifdef __le32__
+#    define LG_QUANTUM		4
+#  endif
+#  ifndef LG_QUANTUM
+#    error "Unknown minimum alignment for architecture; specify via "
+	 "--with-lg-quantum"
+#  endif
+#endif
+
+#define QUANTUM			((size_t)(1U << LG_QUANTUM))
+#define QUANTUM_MASK		(QUANTUM - 1)
+
+/* Return the smallest quantum multiple that is >= a. */
+#define QUANTUM_CEILING(a)						\
+	(((a) + QUANTUM_MASK) & ~QUANTUM_MASK)
+
+#endif /* JEMALLOC_INTERNAL_QUANTUM_H */

From 2f07e92adb7060045e9e8601126e5ec071091c42 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 22 Dec 2017 15:14:44 -0800
Subject: [PATCH 1157/2608] Add lg_ceil to bit_util.

Also, add the bit_util test back to the Makefile.
---
 Makefile.in                          |  1 +
 include/jemalloc/internal/bit_util.h | 87 +++++++---------------------
 test/unit/bit_util.c                 | 56 +++++++++++++++++-
 3 files changed, 76 insertions(+), 68 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index 619aae70..a747d6ea 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -167,6 +167,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/background_thread_enable.c \
 	$(srcroot)test/unit/base.c \
 	$(srcroot)test/unit/bitmap.c \
+	$(srcroot)test/unit/bit_util.c \
 	$(srcroot)test/unit/ckh.c \
 	$(srcroot)test/unit/decay.c \
 	$(srcroot)test/unit/div.c \
diff --git a/include/jemalloc/internal/bit_util.h b/include/jemalloc/internal/bit_util.h
index 435b497f..521f71b3 100644
--- a/include/jemalloc/internal/bit_util.h
+++ b/include/jemalloc/internal/bit_util.h
@@ -160,74 +160,27 @@ lg_floor(size_t x) {
 }
 #endif
 
+BIT_UTIL_INLINE unsigned
+lg_ceil(size_t x) {
+	return lg_floor(x) + ((x & (x - 1)) == 0 ? 0 : 1);
+}
+
 #undef BIT_UTIL_INLINE
 
-/* A compile-time version of lg_ceil */
-#define LG_CEIL(x) (							\
-    (x) <= (1ULL << 0ULL) ? 0 :						\
-    (x) <= (1ULL << 1ULL) ? 1 :						\
-    (x) <= (1ULL << 2ULL) ? 2 :						\
-    (x) <= (1ULL << 3ULL) ? 3 :						\
-    (x) <= (1ULL << 4ULL) ? 4 :						\
-    (x) <= (1ULL << 5ULL) ? 5 :						\
-    (x) <= (1ULL << 6ULL) ? 6 :						\
-    (x) <= (1ULL << 7ULL) ? 7 :						\
-    (x) <= (1ULL << 8ULL) ? 8 :						\
-    (x) <= (1ULL << 9ULL) ? 9 :						\
-    (x) <= (1ULL << 10ULL) ? 10 :					\
-    (x) <= (1ULL << 11ULL) ? 11 :					\
-    (x) <= (1ULL << 12ULL) ? 12 :					\
-    (x) <= (1ULL << 13ULL) ? 13 :					\
-    (x) <= (1ULL << 14ULL) ? 14 :					\
-    (x) <= (1ULL << 15ULL) ? 15 :					\
-    (x) <= (1ULL << 16ULL) ? 16 :					\
-    (x) <= (1ULL << 17ULL) ? 17 :					\
-    (x) <= (1ULL << 18ULL) ? 18 :					\
-    (x) <= (1ULL << 19ULL) ? 19 :					\
-    (x) <= (1ULL << 20ULL) ? 20 :					\
-    (x) <= (1ULL << 21ULL) ? 21 :					\
-    (x) <= (1ULL << 22ULL) ? 22 :					\
-    (x) <= (1ULL << 23ULL) ? 23 :					\
-    (x) <= (1ULL << 24ULL) ? 24 :					\
-    (x) <= (1ULL << 25ULL) ? 25 :					\
-    (x) <= (1ULL << 26ULL) ? 26 :					\
-    (x) <= (1ULL << 27ULL) ? 27 :					\
-    (x) <= (1ULL << 28ULL) ? 28 :					\
-    (x) <= (1ULL << 29ULL) ? 29 :					\
-    (x) <= (1ULL << 30ULL) ? 30 :					\
-    (x) <= (1ULL << 31ULL) ? 31 :					\
-    (x) <= (1ULL << 32ULL) ? 32 :					\
-    (x) <= (1ULL << 33ULL) ? 33 :					\
-    (x) <= (1ULL << 34ULL) ? 34 :					\
-    (x) <= (1ULL << 35ULL) ? 35 :					\
-    (x) <= (1ULL << 36ULL) ? 36 :					\
-    (x) <= (1ULL << 37ULL) ? 37 :					\
-    (x) <= (1ULL << 38ULL) ? 38 :					\
-    (x) <= (1ULL << 39ULL) ? 39 :					\
-    (x) <= (1ULL << 40ULL) ? 40 :					\
-    (x) <= (1ULL << 41ULL) ? 41 :					\
-    (x) <= (1ULL << 42ULL) ? 42 :					\
-    (x) <= (1ULL << 43ULL) ? 43 :					\
-    (x) <= (1ULL << 44ULL) ? 44 :					\
-    (x) <= (1ULL << 45ULL) ? 45 :					\
-    (x) <= (1ULL << 46ULL) ? 46 :					\
-    (x) <= (1ULL << 47ULL) ? 47 :					\
-    (x) <= (1ULL << 48ULL) ? 48 :					\
-    (x) <= (1ULL << 49ULL) ? 49 :					\
-    (x) <= (1ULL << 50ULL) ? 50 :					\
-    (x) <= (1ULL << 51ULL) ? 51 :					\
-    (x) <= (1ULL << 52ULL) ? 52 :					\
-    (x) <= (1ULL << 53ULL) ? 53 :					\
-    (x) <= (1ULL << 54ULL) ? 54 :					\
-    (x) <= (1ULL << 55ULL) ? 55 :					\
-    (x) <= (1ULL << 56ULL) ? 56 :					\
-    (x) <= (1ULL << 57ULL) ? 57 :					\
-    (x) <= (1ULL << 58ULL) ? 58 :					\
-    (x) <= (1ULL << 59ULL) ? 59 :					\
-    (x) <= (1ULL << 60ULL) ? 60 :					\
-    (x) <= (1ULL << 61ULL) ? 61 :					\
-    (x) <= (1ULL << 62ULL) ? 62 :					\
-    (x) <= (1ULL << 63ULL) ? 63 :					\
-    64)
+/* A compile-time version of lg_floor and lg_ceil. */
+#define LG_FLOOR_1(x) 0
+#define LG_FLOOR_2(x) (x < (1ULL << 1) ? LG_FLOOR_1(x) : 1 + LG_FLOOR_1(x >> 1))
+#define LG_FLOOR_4(x) (x < (1ULL << 2) ? LG_FLOOR_2(x) : 2 + LG_FLOOR_2(x >> 2))
+#define LG_FLOOR_8(x) (x < (1ULL << 4) ? LG_FLOOR_4(x) : 4 + LG_FLOOR_4(x >> 4))
+#define LG_FLOOR_16(x) (x < (1ULL << 8) ? LG_FLOOR_8(x) : 8 + LG_FLOOR_8(x >> 8))
+#define LG_FLOOR_32(x) (x < (1ULL << 16) ? LG_FLOOR_16(x) : 16 + LG_FLOOR_16(x >> 16))
+#define LG_FLOOR_64(x) (x < (1ULL << 32) ? LG_FLOOR_32(x) : 32 + LG_FLOOR_32(x >> 32))
+#if LG_SIZEOF_PTR == 2
+#  define LG_FLOOR(x) LG_FLOOR_32((x))
+#else
+#  define LG_FLOOR(x) LG_FLOOR_64((x))
+#endif
+
+#define LG_CEIL(x) (LG_FLOOR(x) + (((x) & ((x) - 1)) == 0 ? 0 : 1))
 
 #endif /* JEMALLOC_INTERNAL_BIT_UTIL_H */
diff --git a/test/unit/bit_util.c b/test/unit/bit_util.c
index 42a97013..b747deb4 100644
--- a/test/unit/bit_util.c
+++ b/test/unit/bit_util.c
@@ -48,10 +48,64 @@ TEST_BEGIN(test_pow2_ceil_zu) {
 }
 TEST_END
 
+void
+assert_lg_ceil_range(size_t input, unsigned answer) {
+	if (input == 1) {
+		assert_u_eq(0, answer, "Got %u as lg_ceil of 1", answer);
+		return;
+	}
+	assert_zu_le(input, (ZU(1) << answer),
+	    "Got %u as lg_ceil of %zu", answer, input);
+	assert_zu_gt(input, (ZU(1) << (answer - 1)),
+	    "Got %u as lg_ceil of %zu", answer, input);
+}
+
+void
+assert_lg_floor_range(size_t input, unsigned answer) {
+	if (input == 1) {
+		assert_u_eq(0, answer, "Got %u as lg_floor of 1", answer);
+		return;
+	}
+	assert_zu_ge(input, (ZU(1) << answer),
+	    "Got %u as lg_floor of %zu", answer, input);
+	assert_zu_lt(input, (ZU(1) << (answer + 1)),
+	    "Got %u as lg_floor of %zu", answer, input);
+}
+
+TEST_BEGIN(test_lg_ceil_floor) {
+	for (size_t i = 1; i < 10 * 1000 * 1000; i++) {
+		assert_lg_ceil_range(i, lg_ceil(i));
+		assert_lg_ceil_range(i, LG_CEIL(i));
+		assert_lg_floor_range(i, lg_floor(i));
+		assert_lg_floor_range(i, LG_FLOOR(i));
+	}
+	for (int i = 10; i < 8 * (1 << LG_SIZEOF_PTR) - 5; i++) {
+		for (size_t j = 0; j < (1 << 4); j++) {
+			size_t num1 = ((size_t)1 << i)
+			    - j * ((size_t)1 << (i - 4));
+			size_t num2 = ((size_t)1 << i)
+			    + j * ((size_t)1 << (i - 4));
+			assert_zu_ne(num1, 0, "Invalid lg argument");
+			assert_zu_ne(num2, 0, "Invalid lg argument");
+			assert_lg_ceil_range(num1, lg_ceil(num1));
+			assert_lg_ceil_range(num1, LG_CEIL(num1));
+			assert_lg_ceil_range(num2, lg_ceil(num2));
+			assert_lg_ceil_range(num2, LG_CEIL(num2));
+
+			assert_lg_floor_range(num1, lg_floor(num1));
+			assert_lg_floor_range(num1, LG_FLOOR(num1));
+			assert_lg_floor_range(num2, lg_floor(num2));
+			assert_lg_floor_range(num2, LG_FLOOR(num2));
+		}
+	}
+}
+TEST_END
+
 int
 main(void) {
 	return test(
 	    test_pow2_ceil_u64,
 	    test_pow2_ceil_u32,
-	    test_pow2_ceil_zu);
+	    test_pow2_ceil_zu,
+	    test_lg_ceil_floor);
 }

From 4f55c0ec220ae97eb5bc7e2bebc07d5c6100fa83 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 22 Dec 2017 15:01:34 -0800
Subject: [PATCH 1158/2608] Translate size class computation from bash shell
 into C.

This is the last big step in making size classes a runtime computation rather
than a configure-time one.

The compile-time computation has been left in, for now, to allow assertion
checking that the results are identical.
---
 src/sc.c | 321 ++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 280 insertions(+), 41 deletions(-)

diff --git a/src/sc.c b/src/sc.c
index 943d7875..1d343d36 100644
--- a/src/sc.c
+++ b/src/sc.c
@@ -1,62 +1,301 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/bit_util.h"
 #include "jemalloc/internal/sc.h"
-#include "jemalloc/internal/size_classes.h"
 
 sc_data_t sc_data_global;
 
+static size_t
+reg_size_compute(int lg_base, int lg_delta, int ndelta) {
+	return (ZU(1) << lg_base) + (ZU(ndelta) << lg_delta);
+}
+
+/* Returns the number of pages in the slab. */
+static int
+slab_size(int lg_page, int lg_base, int lg_delta, int ndelta) {
+	size_t page = (ZU(1) << lg_page);
+	size_t reg_size = reg_size_compute(lg_base, lg_delta, ndelta);
+
+	size_t try_slab_size = page;
+	size_t try_nregs = try_slab_size / reg_size;
+	size_t perfect_slab_size = 0;
+	bool perfect = false;
+	/*
+	 * This loop continues until we find the least common multiple of the
+	 * page size and size class size.  Size classes are all of the form
+	 * base + ndelta * delta == (ndelta + base/ndelta) * delta, which is
+	 * (ndelta + ngroup) * delta.  The way we choose slabbing strategies
+	 * means that delta is at most the page size and ndelta < ngroup.  So
+	 * the loop executes for at most 2 * ngroup - 1 iterations, which is
+	 * also the bound on the number of pages in a slab chosen by default.
+	 * With the current default settings, this is at most 7.
+	 */
+	while (!perfect) {
+		perfect_slab_size = try_slab_size;
+		size_t perfect_nregs = try_nregs;
+		try_slab_size += page;
+		try_nregs = try_slab_size / reg_size;
+		if (perfect_slab_size == perfect_nregs * reg_size) {
+			perfect = true;
+		}
+	}
+	return (int)(perfect_slab_size / page);
+}
+
 static void
-fill_sc(sc_data_t *data, int index, int lg_base, int lg_delta, int ndelta,
-    bool psz, bool bin, int pgs, int lg_delta_lookup) {
-	sc_t *sc = &data->sc[index];
+size_class(
+    /* Output. */
+    sc_t *sc,
+    /* Configuration decisions. */
+    int lg_max_lookup, int lg_page, int lg_ngroup,
+    /* Inputs specific to the size class. */
+    int index, int lg_base, int lg_delta, int ndelta) {
 	sc->index = index;
 	sc->lg_base = lg_base;
 	sc->lg_delta = lg_delta;
 	sc->ndelta = ndelta;
-	sc->psz = psz;
-	sc->bin = bin;
-	sc->pgs = pgs;
-	sc->lg_delta_lookup = lg_delta_lookup;
+	sc->psz = (reg_size_compute(lg_base, lg_delta, ndelta)
+	    % (ZU(1) << lg_page) == 0);
+	size_t size = (ZU(1) << lg_base) + (ZU(ndelta) << lg_delta);
+	if (index == 0) {
+		assert(!sc->psz);
+	}
+	if (size < (ZU(1) << (lg_page + lg_ngroup))) {
+		sc->bin = true;
+		sc->pgs = slab_size(lg_page, lg_base, lg_delta, ndelta);
+	} else {
+		sc->bin = false;
+		sc->pgs = 0;
+	}
+	if (size <= (ZU(1) << lg_max_lookup)) {
+		sc->lg_delta_lookup = lg_delta;
+	} else {
+		sc->lg_delta_lookup = 0;
+	}
 }
 
+static void
+size_classes(
+    /* Output. */
+    sc_data_t *sc_data,
+    /* Determined by the system. */
+    size_t lg_ptr_size, int lg_quantum,
+    /* Configuration decisions. */
+    int lg_tiny_min, int lg_max_lookup, int lg_page, int lg_ngroup) {
+	int ptr_bits = (1 << lg_ptr_size) * 8;
+	int ngroup = (1 << lg_ngroup);
+	int ntiny = 0;
+	int nlbins = 0;
+	int lg_tiny_maxclass = (unsigned)-1;
+	int nbins = 0;
+	int npsizes = 0;
+
+	int index = 0;
+
+	int ndelta = 0;
+	int lg_base = lg_tiny_min;
+	int lg_delta = lg_base;
+
+	/* Outputs that we update as we go. */
+	size_t lookup_maxclass = 0;
+	size_t small_maxclass = 0;
+	int lg_large_minclass = 0;
+	size_t large_maxclass = 0;
+
+	/* Tiny size classes. */
+	while (lg_base < lg_quantum) {
+		sc_t *sc = &sc_data->sc[index];
+		size_class(sc, lg_max_lookup, lg_page, lg_ngroup, index,
+		    lg_base, lg_delta, ndelta);
+		if (sc->lg_delta_lookup != 0) {
+			nlbins = index + 1;
+		}
+		if (sc->psz) {
+			npsizes++;
+		}
+		if (sc->bin) {
+			nbins++;
+		}
+		ntiny++;
+		/* Final written value is correct. */
+		lg_tiny_maxclass = lg_base;
+		index++;
+		lg_delta = lg_base;
+		lg_base++;
+	}
+
+	/* First non-tiny (pseudo) group. */
+	if (ntiny != 0) {
+		sc_t *sc = &sc_data->sc[index];
+		/*
+		 * See the note in sc.h; the first non-tiny size class has an
+		 * unusual encoding.
+		 */
+		lg_base--;
+		ndelta = 1;
+		size_class(sc, lg_max_lookup, lg_page, lg_ngroup, index,
+		    lg_base, lg_delta, ndelta);
+		index++;
+		lg_base++;
+		lg_delta++;
+		if (sc->psz) {
+			npsizes++;
+		}
+		if (sc->bin) {
+			nbins++;
+		}
+	}
+	while (ndelta < ngroup) {
+		sc_t *sc = &sc_data->sc[index];
+		size_class(sc, lg_max_lookup, lg_page, lg_ngroup, index,
+		    lg_base, lg_delta, ndelta);
+		index++;
+		ndelta++;
+		if (sc->psz) {
+			npsizes++;
+		}
+		if (sc->bin) {
+			nbins++;
+		}
+	}
+
+	/* All remaining groups. */
+	lg_base = lg_base + lg_ngroup;
+	while (lg_base < ptr_bits - 1) {
+		ndelta = 1;
+		int ndelta_limit;
+		if (lg_base == ptr_bits - 2) {
+			ndelta_limit = ngroup - 1;
+		} else {
+			ndelta_limit = ngroup;
+		}
+		while (ndelta <= ndelta_limit) {
+			sc_t *sc = &sc_data->sc[index];
+			size_class(sc, lg_max_lookup, lg_page, lg_ngroup, index,
+			    lg_base, lg_delta, ndelta);
+			if (sc->lg_delta_lookup != 0) {
+				nlbins = index + 1;
+				/* Final written value is correct. */
+				lookup_maxclass = (ZU(1) << lg_base)
+				    + (ZU(ndelta) << lg_delta);
+			}
+			if (sc->psz) {
+				npsizes++;
+			}
+			if (sc->bin) {
+				nbins++;
+				/* Final written value is correct. */
+				small_maxclass = (ZU(1) << lg_base)
+				    + (ZU(ndelta) << lg_delta);
+				if (lg_ngroup > 0) {
+					lg_large_minclass = lg_base + 1;
+				} else {
+					lg_large_minclass = lg_base + 2;
+				}
+			}
+			large_maxclass = (ZU(1) << lg_base)
+			    + (ZU(ndelta) << lg_delta);
+			index++;
+			ndelta++;
+		}
+		lg_base++;
+		lg_delta++;
+	}
+	/* Additional outputs. */
+	int nsizes = index;
+	unsigned lg_ceil_nsizes = lg_ceil(nsizes);
+
+	/* Fill in the output data. */
+	sc_data->ntiny = ntiny;
+	sc_data->nlbins = nlbins;
+	sc_data->nbins = nbins;
+	sc_data->nsizes = nsizes;
+	sc_data->lg_ceil_nsizes = lg_ceil_nsizes;
+	sc_data->npsizes = npsizes;
+	sc_data->lg_tiny_maxclass = lg_tiny_maxclass;
+	sc_data->lookup_maxclass = lookup_maxclass;
+	sc_data->small_maxclass = small_maxclass;
+	sc_data->lg_large_minclass = lg_large_minclass;
+	sc_data->large_minclass = (ZU(1) << lg_large_minclass);
+	sc_data->large_maxclass = large_maxclass;
+}
+
+/*
+ * Defined later (after size_classes.h becomes visible), but called during
+ * initialization.
+ */
+static void sc_data_assert(sc_data_t *sc_data);
+
 void
-sc_data_init(sc_data_t *data) {
-	assert(SC_NTINY == NTBINS);
-	assert(SC_NSIZES == NSIZES);
-	assert(SC_NBINS == NBINS);
-	assert(NPSIZES <= SC_NPSIZES_MAX);
-	assert(!data->initialized);
-	data->initialized = true;
-	data->ntiny = NTBINS;
-	data->nlbins = NLBINS;
-	data->nbins = NBINS;
-	data->nsizes = NSIZES;
-	data->lg_ceil_nsizes = LG_CEIL_NSIZES;
-	data->npsizes = NPSIZES;
-#if SC_NTINY != 0
-	data->lg_tiny_maxclass = LG_TINY_MAXCLASS;
-#else
-	data->lg_tiny_maxclass = -1;
-#endif
-	data->lookup_maxclass = LOOKUP_MAXCLASS;
-	data->small_maxclass = SMALL_MAXCLASS;
-	data->lg_large_minclass = LG_LARGE_MINCLASS;
-	data->large_minclass = LARGE_MINCLASS;
-	data->large_maxclass = LARGE_MAXCLASS;
-#define no 0
-#define yes 1
-#define SC(index, lg_base_base, lg_delta, ndelta, psz, bin, pgs,	\
-    lg_delta_lookup)							\
-	fill_sc(data, index, lg_base_base, lg_delta, ndelta, psz, bin, 	\
-	    pgs, lg_delta_lookup);
-	SIZE_CLASSES
-#undef no
-#undef yes
-#undef SC
+sc_data_init(sc_data_t *sc_data) {
+	assert(!sc_data->initialized);
+
+	int lg_max_lookup = 12;
+
+	size_classes(sc_data, LG_SIZEOF_PTR, LG_QUANTUM, SC_LG_TINY_MIN,
+	    lg_max_lookup, LG_PAGE, 2);
+
+	sc_data->initialized = true;
+
+	sc_data_assert(sc_data);
 }
 
 void
 sc_boot() {
 	sc_data_init(&sc_data_global);
 }
+
+/*
+ * We don't include size_classes.h until this point, to ensure only the asserts
+ * can see it.
+ */
+#include "jemalloc/internal/size_classes.h"
+
+static void
+sc_assert(sc_t *sc, int index, int lg_base, int lg_delta, int ndelta, int psz,
+    int bin, int pgs, int lg_delta_lookup) {
+	assert(sc->index == index);
+	assert(sc->lg_base == lg_base);
+	assert(sc->lg_delta == lg_delta);
+	assert(sc->ndelta == ndelta);
+	assert(sc->psz == psz);
+	assert(sc->bin == bin);
+	assert(sc->pgs == pgs);
+	assert(sc->lg_delta_lookup == lg_delta_lookup);
+}
+
+static void
+sc_data_assert(sc_data_t *sc_data) {
+	assert(SC_NTINY == NTBINS);
+	assert(SC_NSIZES == NSIZES);
+	assert(SC_NBINS == NBINS);
+	assert(NPSIZES <= SC_NPSIZES_MAX);
+	assert(sc_data->ntiny == NTBINS);
+	assert(sc_data->nlbins == NLBINS);
+	assert(sc_data->nbins == NBINS);
+	assert(sc_data->nsizes == NSIZES);
+	assert(sc_data->lg_ceil_nsizes == LG_CEIL_NSIZES);
+	assert(sc_data->npsizes == NPSIZES);
+#if NTBINS > 0
+	assert(sc_data->lg_tiny_maxclass == LG_TINY_MAXCLASS);
+#else
+	assert(sc_data->lg_tiny_maxclass == -1);
+#endif
+	assert(sc_data->lookup_maxclass == LOOKUP_MAXCLASS);
+	assert(sc_data->small_maxclass == SMALL_MAXCLASS);
+	assert(sc_data->lg_large_minclass == LG_LARGE_MINCLASS);
+	assert(sc_data->large_minclass == LARGE_MINCLASS);
+	assert(sc_data->large_maxclass == LARGE_MAXCLASS);
+	assert(sc_data->initialized);
+#define no 0
+#define yes 1
+#define SC(index, lg_base, lg_delta, ndelta, psz, bin, pgs,		\
+    lg_delta_lookup)							\
+	sc_assert(&sc_data->sc[index], index, lg_base, lg_delta,	\
+	    ndelta, psz, bin, pgs, lg_delta_lookup);
+	SIZE_CLASSES
+#undef no
+#undef yes
+#undef SC
+}

From 0552aad91b955db7ad1806907255e943af2fdb88 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 5 Jan 2018 13:11:44 -0800
Subject: [PATCH 1159/2608] Kill size_classes.sh.

We've moved size class computations to boot time; they were being used only to
check that the computations resulted in equal values.
---
 .gitignore                                |   1 -
 configure.ac                              |  12 -
 include/jemalloc/internal/size_classes.sh | 361 ----------------------
 src/sc.c                                  |  62 ----
 4 files changed, 436 deletions(-)
 delete mode 100755 include/jemalloc/internal/size_classes.sh

diff --git a/.gitignore b/.gitignore
index 19199ccb..5ca0ad1d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -30,7 +30,6 @@
 /include/jemalloc/internal/public_namespace.h
 /include/jemalloc/internal/public_symbols.txt
 /include/jemalloc/internal/public_unnamespace.h
-/include/jemalloc/internal/size_classes.h
 /include/jemalloc/jemalloc.h
 /include/jemalloc/jemalloc_defs.h
 /include/jemalloc/jemalloc_macros.h
diff --git a/configure.ac b/configure.ac
index 1c209117..87270876 100644
--- a/configure.ac
+++ b/configure.ac
@@ -968,7 +968,6 @@ cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/private_symbols.sh"
 cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/private_namespace.sh"
 cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/public_namespace.sh"
 cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/public_unnamespace.sh"
-cfghdrs_in="${cfghdrs_in} include/jemalloc/internal/size_classes.sh"
 cfghdrs_in="${cfghdrs_in} include/jemalloc/jemalloc_rename.sh"
 cfghdrs_in="${cfghdrs_in} include/jemalloc/jemalloc_mangle.sh"
 cfghdrs_in="${cfghdrs_in} include/jemalloc/jemalloc.sh"
@@ -981,7 +980,6 @@ cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/private_symbols_jet.awk"
 cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/public_symbols.txt"
 cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/public_namespace.h"
 cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/public_unnamespace.h"
-cfghdrs_out="${cfghdrs_out} include/jemalloc/internal/size_classes.h"
 cfghdrs_out="${cfghdrs_out} include/jemalloc/jemalloc_protos_jet.h"
 cfghdrs_out="${cfghdrs_out} include/jemalloc/jemalloc_rename.h"
 cfghdrs_out="${cfghdrs_out} include/jemalloc/jemalloc_mangle.h"
@@ -2177,16 +2175,6 @@ AC_CONFIG_COMMANDS([include/jemalloc/internal/public_unnamespace.h], [
   srcdir="${srcdir}"
   objroot="${objroot}"
 ])
-AC_CONFIG_COMMANDS([include/jemalloc/internal/size_classes.h], [
-  mkdir -p "${objroot}include/jemalloc/internal"
-  "${SHELL}" "${srcdir}/include/jemalloc/internal/size_classes.sh" "${LG_QUANTA}" 3 "${LG_PAGE_SIZES}" 2 > "${objroot}include/jemalloc/internal/size_classes.h"
-], [
-  SHELL="${SHELL}"
-  srcdir="${srcdir}"
-  objroot="${objroot}"
-  LG_QUANTA="${LG_QUANTA}"
-  LG_PAGE_SIZES="${LG_PAGE_SIZES}"
-])
 AC_CONFIG_COMMANDS([include/jemalloc/jemalloc_protos_jet.h], [
   mkdir -p "${objroot}include/jemalloc"
   cat "${srcdir}/include/jemalloc/jemalloc_protos.h.in" | sed -e 's/@je_@/jet_/g' > "${objroot}include/jemalloc/jemalloc_protos_jet.h"
diff --git a/include/jemalloc/internal/size_classes.sh b/include/jemalloc/internal/size_classes.sh
deleted file mode 100755
index 998994d0..00000000
--- a/include/jemalloc/internal/size_classes.sh
+++ /dev/null
@@ -1,361 +0,0 @@
-#!/bin/sh
-#
-# Usage: size_classes.sh <lg_qarr> <lg_tmin> <lg_parr> <lg_g>
-
-# The following limits are chosen such that they cover all supported platforms.
-
-# Pointer sizes.
-lg_zarr="2 3"
-
-# Quanta.
-lg_qarr=$1
-
-# The range of tiny size classes is [2^lg_tmin..2^(lg_q-1)].
-lg_tmin=$2
-
-# Maximum lookup size.
-lg_kmax=12
-
-# Page sizes.
-lg_parr=`echo $3 | tr ',' ' '`
-
-# Size class group size (number of size classes for each size doubling).
-lg_g=$4
-
-pow2() {
-  e=$1
-  pow2_result=1
-  while [ ${e} -gt 0 ] ; do
-    pow2_result=$((${pow2_result} + ${pow2_result}))
-    e=$((${e} - 1))
-  done
-}
-
-lg() {
-  x=$1
-  lg_result=0
-  while [ ${x} -gt 1 ] ; do
-    lg_result=$((${lg_result} + 1))
-    x=$((${x} / 2))
-  done
-}
-
-lg_ceil() {
-  y=$1
-  lg ${y}; lg_floor=${lg_result}
-  pow2 ${lg_floor}; pow2_floor=${pow2_result}
-  if [ ${pow2_floor} -lt ${y} ] ; then
-    lg_ceil_result=$((${lg_floor} + 1))
-  else
-    lg_ceil_result=${lg_floor}
-  fi
-}
-
-reg_size_compute() {
-  lg_grp=$1
-  lg_delta=$2
-  ndelta=$3
-
-  pow2 ${lg_grp}; grp=${pow2_result}
-  pow2 ${lg_delta}; delta=${pow2_result}
-  reg_size=$((${grp} + ${delta}*${ndelta}))
-}
-
-slab_size() {
-  lg_p=$1
-  lg_grp=$2
-  lg_delta=$3
-  ndelta=$4
-
-  pow2 ${lg_p}; p=${pow2_result}
-  reg_size_compute ${lg_grp} ${lg_delta} ${ndelta}
-
-  # Compute smallest slab size that is an integer multiple of reg_size.
-  try_slab_size=${p}
-  try_nregs=$((${try_slab_size} / ${reg_size}))
-  perfect=0
-  while [ ${perfect} -eq 0 ] ; do
-    perfect_slab_size=${try_slab_size}
-    perfect_nregs=${try_nregs}
-
-    try_slab_size=$((${try_slab_size} + ${p}))
-    try_nregs=$((${try_slab_size} / ${reg_size}))
-    if [ ${perfect_slab_size} -eq $((${perfect_nregs} * ${reg_size})) ] ; then
-      perfect=1
-    fi
-  done
-
-  slab_size_pgs=$((${perfect_slab_size} / ${p}))
-}
-
-size_class() {
-  index=$1
-  lg_grp=$2
-  lg_delta=$3
-  ndelta=$4
-  lg_p=$5
-  lg_kmax=$6
-
-  if [ ${lg_delta} -ge ${lg_p} ] ; then
-    psz="yes"
-  else
-    pow2 ${lg_p}; p=${pow2_result}
-    pow2 ${lg_grp}; grp=${pow2_result}
-    pow2 ${lg_delta}; delta=${pow2_result}
-    sz=$((${grp} + ${delta} * ${ndelta}))
-    npgs=$((${sz} / ${p}))
-    if [ ${sz} -eq $((${npgs} * ${p})) ] ; then
-      psz="yes"
-    else
-      psz="no"
-    fi
-  fi
-
-  lg ${ndelta}; lg_ndelta=${lg_result}; pow2 ${lg_ndelta}
-  if [ ${pow2_result} -lt ${ndelta} ] ; then
-    rem="yes"
-  else
-    rem="no"
-  fi
-
-  lg_size=${lg_grp}
-  if [ $((${lg_delta} + ${lg_ndelta})) -eq ${lg_grp} ] ; then
-    lg_size=$((${lg_grp} + 1))
-  else
-    lg_size=${lg_grp}
-    rem="yes"
-  fi
-
-  if [ ${lg_size} -lt $((${lg_p} + ${lg_g})) ] ; then
-    bin="yes"
-    slab_size ${lg_p} ${lg_grp} ${lg_delta} ${ndelta}; pgs=${slab_size_pgs}
-  else
-    bin="no"
-    pgs=0
-  fi
-  if [ ${lg_size} -lt ${lg_kmax} \
-      -o ${lg_size} -eq ${lg_kmax} -a ${rem} = "no" ] ; then
-    lg_delta_lookup=${lg_delta}
-  else
-    lg_delta_lookup="no"
-  fi
-  printf '    SC(%3d, %6d, %8d, %6d, %3s, %3s, %3d, %2s) \\\n' ${index} ${lg_grp} ${lg_delta} ${ndelta} ${psz} ${bin} ${pgs} ${lg_delta_lookup}
-  # Defined upon return:
-  # - psz ("yes" or "no")
-  # - bin ("yes" or "no")
-  # - pgs
-  # - lg_delta_lookup (${lg_delta} or "no")
-}
-
-sep_line() {
-  echo "                                                         \\"
-}
-
-size_classes() {
-  lg_z=$1
-  lg_q=$2
-  lg_t=$3
-  lg_p=$4
-  lg_g=$5
-
-  pow2 $((${lg_z} + 3)); ptr_bits=${pow2_result}
-  pow2 ${lg_g}; g=${pow2_result}
-
-  echo "#define SIZE_CLASSES \\"
-  echo "  /* index, lg_grp, lg_delta, ndelta, psz, bin, pgs, lg_delta_lookup */ \\"
-
-  ntbins=0
-  nlbins=0
-  lg_tiny_maxclass='"NA"'
-  nbins=0
-  npsizes=0
-
-  # Tiny size classes.
-  ndelta=0
-  index=0
-  lg_grp=${lg_t}
-  lg_delta=${lg_grp}
-  while [ ${lg_grp} -lt ${lg_q} ] ; do
-    size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
-    if [ ${lg_delta_lookup} != "no" ] ; then
-      nlbins=$((${index} + 1))
-    fi
-    if [ ${psz} = "yes" ] ; then
-      npsizes=$((${npsizes} + 1))
-    fi
-    if [ ${bin} != "no" ] ; then
-      nbins=$((${index} + 1))
-    fi
-    ntbins=$((${ntbins} + 1))
-    lg_tiny_maxclass=${lg_grp} # Final written value is correct.
-    index=$((${index} + 1))
-    lg_delta=${lg_grp}
-    lg_grp=$((${lg_grp} + 1))
-  done
-
-  # First non-tiny group.
-  if [ ${ntbins} -gt 0 ] ; then
-    sep_line
-    # The first size class has an unusual encoding, because the size has to be
-    # split between grp and delta*ndelta.
-    lg_grp=$((${lg_grp} - 1))
-    ndelta=1
-    size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
-    index=$((${index} + 1))
-    lg_grp=$((${lg_grp} + 1))
-    lg_delta=$((${lg_delta} + 1))
-    if [ ${psz} = "yes" ] ; then
-      npsizes=$((${npsizes} + 1))
-    fi
-  fi
-  while [ ${ndelta} -lt ${g} ] ; do
-    size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
-    index=$((${index} + 1))
-    ndelta=$((${ndelta} + 1))
-    if [ ${psz} = "yes" ] ; then
-      npsizes=$((${npsizes} + 1))
-    fi
-  done
-
-  # All remaining groups.
-  lg_grp=$((${lg_grp} + ${lg_g}))
-  while [ ${lg_grp} -lt $((${ptr_bits} - 1)) ] ; do
-    sep_line
-    ndelta=1
-    if [ ${lg_grp} -eq $((${ptr_bits} - 2)) ] ; then
-      ndelta_limit=$((${g} - 1))
-    else
-      ndelta_limit=${g}
-    fi
-    while [ ${ndelta} -le ${ndelta_limit} ] ; do
-      size_class ${index} ${lg_grp} ${lg_delta} ${ndelta} ${lg_p} ${lg_kmax}
-      if [ ${lg_delta_lookup} != "no" ] ; then
-        nlbins=$((${index} + 1))
-        # Final written value is correct:
-        lookup_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))"
-      fi
-      if [ ${psz} = "yes" ] ; then
-        npsizes=$((${npsizes} + 1))
-      fi
-      if [ ${bin} != "no" ] ; then
-        nbins=$((${index} + 1))
-        # Final written value is correct:
-        small_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))"
-        if [ ${lg_g} -gt 0 ] ; then
-          lg_large_minclass=$((${lg_grp} + 1))
-        else
-          lg_large_minclass=$((${lg_grp} + 2))
-        fi
-      fi
-      # Final written value is correct:
-      large_maxclass="((((size_t)1) << ${lg_grp}) + (((size_t)${ndelta}) << ${lg_delta}))"
-      index=$((${index} + 1))
-      ndelta=$((${ndelta} + 1))
-    done
-    lg_grp=$((${lg_grp} + 1))
-    lg_delta=$((${lg_delta} + 1))
-  done
-  echo
-  nsizes=${index}
-  lg_ceil ${nsizes}; lg_ceil_nsizes=${lg_ceil_result}
-
-  # Defined upon completion:
-  # - ntbins
-  # - nlbins
-  # - nbins
-  # - nsizes
-  # - lg_ceil_nsizes
-  # - npsizes
-  # - lg_tiny_maxclass
-  # - lookup_maxclass
-  # - small_maxclass
-  # - lg_large_minclass
-  # - large_maxclass
-}
-
-cat <<EOF
-#ifndef JEMALLOC_INTERNAL_SIZE_CLASSES_H
-#define JEMALLOC_INTERNAL_SIZE_CLASSES_H
-
-/* This file was automatically generated by size_classes.sh. */
-
-#include "jemalloc/internal/jemalloc_internal_types.h"
-
-/*
- * This header file defines:
- *
- *   LG_SIZE_CLASS_GROUP: Lg of size class count for each size doubling.
- *   LG_TINY_MIN: Lg of minimum size class to support.
- *   SIZE_CLASSES: Complete table of SC(index, lg_grp, lg_delta, ndelta, psz,
- *                 bin, pgs, lg_delta_lookup) tuples.
- *     index: Size class index.
- *     lg_grp: Lg group base size (no deltas added).
- *     lg_delta: Lg delta to previous size class.
- *     ndelta: Delta multiplier.  size == 1<<lg_grp + ndelta<<lg_delta
- *     psz: 'yes' if a multiple of the page size, 'no' otherwise.
- *     bin: 'yes' if a small bin size class, 'no' otherwise.
- *     pgs: Slab page count if a small bin size class, 0 otherwise.
- *     lg_delta_lookup: Same as lg_delta if a lookup table size class, 'no'
- *                      otherwise.
- *   NTBINS: Number of tiny bins.
- *   NLBINS: Number of bins supported by the lookup table.
- *   NBINS: Number of small size class bins.
- *   NSIZES: Number of size classes.
- *   LG_CEIL_NSIZES: Number of bits required to store NSIZES.
- *   NPSIZES: Number of size classes that are a multiple of (1U << LG_PAGE).
- *   LG_TINY_MAXCLASS: Lg of maximum tiny size class.
- *   LOOKUP_MAXCLASS: Maximum size class included in lookup table.
- *   SMALL_MAXCLASS: Maximum small size class.
- *   LG_LARGE_MINCLASS: Lg of minimum large size class.
- *   LARGE_MAXCLASS: Maximum (large) size class.
- */
-
-#define LG_SIZE_CLASS_GROUP	${lg_g}
-#define LG_TINY_MIN		${lg_tmin}
-
-EOF
-
-for lg_z in ${lg_zarr} ; do
-  for lg_q in ${lg_qarr} ; do
-    lg_t=${lg_tmin}
-    while [ ${lg_t} -le ${lg_q} ] ; do
-      # Iterate through page sizes and compute how many bins there are.
-      for lg_p in ${lg_parr} ; do
-        echo "#if (LG_SIZEOF_PTR == ${lg_z} && LG_TINY_MIN == ${lg_t} && LG_QUANTUM == ${lg_q} && LG_PAGE == ${lg_p})"
-        size_classes ${lg_z} ${lg_q} ${lg_t} ${lg_p} ${lg_g}
-        echo "#define SIZE_CLASSES_DEFINED"
-        echo "#define NTBINS			${ntbins}"
-        echo "#define NLBINS			${nlbins}"
-        echo "#define NBINS			${nbins}"
-        echo "#define NSIZES			${nsizes}"
-        echo "#define LG_CEIL_NSIZES		${lg_ceil_nsizes}"
-        echo "#define NPSIZES			${npsizes}"
-        echo "#define LG_TINY_MAXCLASS	${lg_tiny_maxclass}"
-        echo "#define LOOKUP_MAXCLASS		${lookup_maxclass}"
-        echo "#define SMALL_MAXCLASS		${small_maxclass}"
-        echo "#define LG_LARGE_MINCLASS	${lg_large_minclass}"
-        echo "#define LARGE_MINCLASS		(ZU(1) << LG_LARGE_MINCLASS)"
-        echo "#define LARGE_MAXCLASS		${large_maxclass}"
-        echo "#endif"
-        echo
-      done
-      lg_t=$((${lg_t} + 1))
-    done
-  done
-done
-
-cat <<EOF
-#ifndef SIZE_CLASSES_DEFINED
-#  error "No size class definitions match configuration"
-#endif
-#undef SIZE_CLASSES_DEFINED
-/*
- * The size2index_tab lookup table uses uint8_t to encode each bin index, so we
- * cannot support more than 256 small size classes.
- */
-#if (NBINS > 256)
-#  error "Too many small size classes"
-#endif
-
-#endif /* JEMALLOC_INTERNAL_SIZE_CLASSES_H */
-EOF
diff --git a/src/sc.c b/src/sc.c
index 1d343d36..61e11973 100644
--- a/src/sc.c
+++ b/src/sc.c
@@ -221,12 +221,6 @@ size_classes(
 	sc_data->large_maxclass = large_maxclass;
 }
 
-/*
- * Defined later (after size_classes.h becomes visible), but called during
- * initialization.
- */
-static void sc_data_assert(sc_data_t *sc_data);
-
 void
 sc_data_init(sc_data_t *sc_data) {
 	assert(!sc_data->initialized);
@@ -237,65 +231,9 @@ sc_data_init(sc_data_t *sc_data) {
 	    lg_max_lookup, LG_PAGE, 2);
 
 	sc_data->initialized = true;
-
-	sc_data_assert(sc_data);
 }
 
 void
 sc_boot() {
 	sc_data_init(&sc_data_global);
 }
-
-/*
- * We don't include size_classes.h until this point, to ensure only the asserts
- * can see it.
- */
-#include "jemalloc/internal/size_classes.h"
-
-static void
-sc_assert(sc_t *sc, int index, int lg_base, int lg_delta, int ndelta, int psz,
-    int bin, int pgs, int lg_delta_lookup) {
-	assert(sc->index == index);
-	assert(sc->lg_base == lg_base);
-	assert(sc->lg_delta == lg_delta);
-	assert(sc->ndelta == ndelta);
-	assert(sc->psz == psz);
-	assert(sc->bin == bin);
-	assert(sc->pgs == pgs);
-	assert(sc->lg_delta_lookup == lg_delta_lookup);
-}
-
-static void
-sc_data_assert(sc_data_t *sc_data) {
-	assert(SC_NTINY == NTBINS);
-	assert(SC_NSIZES == NSIZES);
-	assert(SC_NBINS == NBINS);
-	assert(NPSIZES <= SC_NPSIZES_MAX);
-	assert(sc_data->ntiny == NTBINS);
-	assert(sc_data->nlbins == NLBINS);
-	assert(sc_data->nbins == NBINS);
-	assert(sc_data->nsizes == NSIZES);
-	assert(sc_data->lg_ceil_nsizes == LG_CEIL_NSIZES);
-	assert(sc_data->npsizes == NPSIZES);
-#if NTBINS > 0
-	assert(sc_data->lg_tiny_maxclass == LG_TINY_MAXCLASS);
-#else
-	assert(sc_data->lg_tiny_maxclass == -1);
-#endif
-	assert(sc_data->lookup_maxclass == LOOKUP_MAXCLASS);
-	assert(sc_data->small_maxclass == SMALL_MAXCLASS);
-	assert(sc_data->lg_large_minclass == LG_LARGE_MINCLASS);
-	assert(sc_data->large_minclass == LARGE_MINCLASS);
-	assert(sc_data->large_maxclass == LARGE_MAXCLASS);
-	assert(sc_data->initialized);
-#define no 0
-#define yes 1
-#define SC(index, lg_base, lg_delta, ndelta, psz, bin, pgs,		\
-    lg_delta_lookup)							\
-	sc_assert(&sc_data->sc[index], index, lg_base, lg_delta,	\
-	    ndelta, psz, bin, pgs, lg_delta_lookup);
-	SIZE_CLASSES
-#undef no
-#undef yes
-#undef SC
-}

From 5b7fc9056c8114d0774282d293cd5c9cce4ff931 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 5 Jan 2018 13:33:37 -0800
Subject: [PATCH 1160/2608] Remove the --with-lg-page-sizes configure option.

This appears to be unused.
---
 INSTALL.md   | 7 -------
 configure.ac | 5 -----
 2 files changed, 12 deletions(-)

diff --git a/INSTALL.md b/INSTALL.md
index ef328c60..18cf2883 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -221,13 +221,6 @@ any of the following arguments (not a definitive list) to 'configure':
     system page size may change between configuration and execution, e.g. when
     cross compiling.
 
-* `--with-lg-page-sizes=<lg-page-sizes>`
-
-    Specify the comma-separated base 2 logs of the page sizes to support.  This
-    option may be useful when cross compiling in combination with
-    `--with-lg-page`, but its primary use case is for integration with FreeBSD's
-    libc, wherein jemalloc is embedded.
-
 * `--with-lg-hugepage=<lg-hugepage>`
 
     Specify the base 2 log of the system huge page size.  This option is useful
diff --git a/configure.ac b/configure.ac
index 87270876..e18bc4b2 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1430,11 +1430,6 @@ if test "x${LG_PAGE}" != "xundefined" -a \
 fi
 AC_DEFINE_UNQUOTED([LG_HUGEPAGE], [${je_cv_lg_hugepage}])
 
-AC_ARG_WITH([lg_page_sizes],
-  [AS_HELP_STRING([--with-lg-page-sizes=<lg-page-sizes>],
-   [Base 2 logs of system page sizes to support])],
-  [LG_PAGE_SIZES="$with_lg_page_sizes"], [LG_PAGE_SIZES="$LG_PAGE"])
-
 dnl ============================================================================
 dnl jemalloc configuration.
 dnl

From 017dca198c74792967771d00b7501beade5b6fd0 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Wed, 18 Apr 2018 19:36:40 -0700
Subject: [PATCH 1161/2608] SC module: Add a note on style.

---
 src/sc.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/sc.c b/src/sc.c
index 61e11973..f7458c55 100644
--- a/src/sc.c
+++ b/src/sc.c
@@ -4,6 +4,13 @@
 #include "jemalloc/internal/bit_util.h"
 #include "jemalloc/internal/sc.h"
 
+/*
+ * This module computes the size classes used to satisfy allocations.  The logic
+ * here was ported more or less line-by-line from a shell script, and because of
+ * that is not the most idiomatic C.  Eventually we should fix this, but for now
+ * at least the damage is compartmentalized to this file.
+ */
+
 sc_data_t sc_data_global;
 
 static size_t

From a7f68aed3ef53a194f6b932b92bddd8c84c43de4 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Wed, 18 Apr 2018 20:32:12 -0700
Subject: [PATCH 1162/2608] SC: Add page customization functionality.

---
 Makefile.in                    |  1 +
 include/jemalloc/internal/sc.h |  6 +++++
 src/sc.c                       | 42 ++++++++++++++++++++++++++++++++++
 test/unit/sc.c                 | 33 ++++++++++++++++++++++++++
 4 files changed, 82 insertions(+)
 create mode 100644 test/unit/sc.c

diff --git a/Makefile.in b/Makefile.in
index a747d6ea..05f67d93 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -204,6 +204,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/rtree.c \
 	$(srcroot)test/unit/seq.c \
 	$(srcroot)test/unit/SFMT.c \
+	$(srcroot)test/unit/sc.c \
 	$(srcroot)test/unit/size_classes.c \
 	$(srcroot)test/unit/slab.c \
 	$(srcroot)test/unit/smoothstep.c \
diff --git a/include/jemalloc/internal/sc.h b/include/jemalloc/internal/sc.h
index df295bca..592115a7 100644
--- a/include/jemalloc/internal/sc.h
+++ b/include/jemalloc/internal/sc.h
@@ -297,6 +297,12 @@ struct sc_data_s {
 
 extern sc_data_t sc_data_global;
 void sc_data_init(sc_data_t *data);
+/*
+ * Updates slab sizes in [begin, end] to be pgs pages in length, if possible.
+ * Otherwise, does its best to accomodate the request.
+ */
+void sc_data_update_slab_size(sc_data_t *data, size_t begin, size_t end,
+    int pgs);
 void sc_boot();
 
 #endif /* JEMALLOC_INTERNAL_SC_H */
diff --git a/src/sc.c b/src/sc.c
index f7458c55..e8eef1c2 100644
--- a/src/sc.c
+++ b/src/sc.c
@@ -2,6 +2,8 @@
 
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/bit_util.h"
+#include "jemalloc/internal/bitmap.h"
+#include "jemalloc/internal/pages.h"
 #include "jemalloc/internal/sc.h"
 
 /*
@@ -240,6 +242,46 @@ sc_data_init(sc_data_t *sc_data) {
 	sc_data->initialized = true;
 }
 
+static void
+sc_data_update_sc_slab_size(sc_t *sc, size_t reg_size, size_t pgs_guess) {
+	size_t min_pgs = reg_size / PAGE;
+	if (reg_size % PAGE != 0) {
+		min_pgs++;
+	}
+	/*
+	 * BITMAP_MAXBITS is actually determined by putting the smallest
+	 * possible size-class on one page, so this can never be 0.
+	 */
+	size_t max_pgs = BITMAP_MAXBITS * reg_size / PAGE;
+
+	assert(min_pgs <= max_pgs);
+	assert(min_pgs > 0);
+	assert(max_pgs >= 1);
+	if (pgs_guess < min_pgs) {
+		sc->pgs = (int)min_pgs;
+	} else if (pgs_guess > max_pgs) {
+		sc->pgs = (int)max_pgs;
+	} else {
+		sc->pgs = (int)pgs_guess;
+	}
+}
+
+void
+sc_data_update_slab_size(sc_data_t *data, size_t begin, size_t end, int pgs) {
+	assert(data->initialized);
+	for (int i = 0; i < data->nsizes; i++) {
+		sc_t *sc = &data->sc[i];
+		if (!sc->bin) {
+			break;
+		}
+		size_t reg_size = reg_size_compute(sc->lg_base, sc->lg_delta,
+		    sc->ndelta);
+		if (begin <= reg_size && reg_size <= end) {
+			sc_data_update_sc_slab_size(sc, reg_size, pgs);
+		}
+	}
+}
+
 void
 sc_boot() {
 	sc_data_init(&sc_data_global);
diff --git a/test/unit/sc.c b/test/unit/sc.c
new file mode 100644
index 00000000..bf51d8e5
--- /dev/null
+++ b/test/unit/sc.c
@@ -0,0 +1,33 @@
+#include "test/jemalloc_test.h"
+
+TEST_BEGIN(test_update_slab_size) {
+	sc_data_t data;
+	memset(&data, 0, sizeof(data));
+	sc_data_init(&data);
+	sc_t *tiny = &data.sc[0];
+	size_t tiny_size = (ZU(1) << tiny->lg_base)
+	    + (ZU(tiny->ndelta) << tiny->lg_delta);
+	size_t pgs_too_big = (tiny_size * BITMAP_MAXBITS + PAGE - 1) / PAGE + 1;
+	sc_data_update_slab_size(&data, tiny_size, tiny_size, (int)pgs_too_big);
+	assert_zu_lt((size_t)tiny->pgs, pgs_too_big, "Allowed excessive pages");
+
+	sc_data_update_slab_size(&data, 1, 10 * PAGE, 1);
+	for (int i = 0; i < data.nbins; i++) {
+		sc_t *sc = &data.sc[i];
+		size_t reg_size = (ZU(1) << sc->lg_base)
+		    + (ZU(sc->ndelta) << sc->lg_delta);
+		if (reg_size <= PAGE) {
+			assert_d_eq(sc->pgs, 1, "Ignored valid page size hint");
+		} else {
+			assert_d_gt(sc->pgs, 1,
+			    "Allowed invalid page size hint");
+		}
+	}
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_update_slab_size);
+}

From 4610ffa942a00d80a8e8af2365069bed7d561415 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Fri, 20 Apr 2018 19:12:45 -0700
Subject: [PATCH 1163/2608] Bootstrapping: Parse MALLOC_CONF before using slab
 sizes.

I.e., parse before booting the bin module or sz module.  This lets us tweak size
class settings before committing to them by letting them leak into other
modules.

This commit does not actually do any tweaking of the size classes; it *just*
chanchanges bootstrapping order; this may help bisecting any bootstrapping
failures on poorly-tested architectures.
---
 src/jemalloc.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 664c5f89..902bf9c3 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1295,14 +1295,21 @@ static bool
 malloc_init_hard_a0_locked() {
 	malloc_initializer = INITIALIZER;
 
+	/*
+	 * Ordering here is somewhat tricky; we need sc_boot() first, since that
+	 * determines what the size classes will be, and then
+	 * malloc_conf_init(), since any slab size tweaking will need to be done
+	 * before sz_boot and bin_boot, which assume that the values they read
+	 * out of sc_data_global are final.
+	 */
 	sc_boot();
+	malloc_conf_init();
 	sz_boot(&sc_data_global);
 	bin_boot(&sc_data_global);
 
 	if (config_prof) {
 		prof_boot0();
 	}
-	malloc_conf_init();
 	if (opt_stats_print) {
 		/* Print statistics at exit. */
 		if (atexit(stats_print_atexit) != 0) {

From 5112d9e5fd2a15d6b75523a3a4122b726fbae479 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Fri, 20 Apr 2018 21:11:03 -0700
Subject: [PATCH 1164/2608] Add MALLOC_CONF parsing for dynamic slab sizes.

This actually enables us to change the values.
---
 Makefile.in                    |  1 +
 src/jemalloc.c                 | 68 +++++++++++++++++++++++++++++
 test/integration/slab_sizes.c  | 80 ++++++++++++++++++++++++++++++++++
 test/integration/slab_sizes.sh |  4 ++
 4 files changed, 153 insertions(+)
 create mode 100644 test/integration/slab_sizes.c
 create mode 100644 test/integration/slab_sizes.sh

diff --git a/Makefile.in b/Makefile.in
index 05f67d93..8b2f5ca6 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -230,6 +230,7 @@ TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \
 	$(srcroot)test/integration/posix_memalign.c \
 	$(srcroot)test/integration/rallocx.c \
 	$(srcroot)test/integration/sdallocx.c \
+	$(srcroot)test/integration/slab_sizes.c \
 	$(srcroot)test/integration/thread_arena.c \
 	$(srcroot)test/integration/thread_tcache_enabled.c \
 	$(srcroot)test/integration/xallocx.c
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 902bf9c3..4ffe5aaa 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -764,6 +764,49 @@ init_opt_stats_print_opts(const char *v, size_t vlen) {
 	assert(opts_len == strlen(opt_stats_print_opts));
 }
 
+static bool
+malloc_conf_slab_sizes_next(const char **slab_size_segment_cur,
+    size_t *vlen_left, size_t *slab_start, size_t *slab_end, size_t *pgs) {
+	const char *cur = *slab_size_segment_cur;
+	char *end;
+	uintmax_t um;
+
+	set_errno(0);
+
+	/* First number, then '-' */
+	um = malloc_strtoumax(cur, &end, 0);
+	if (get_errno() != 0 || *end != '-') {
+		return true;
+	}
+	*slab_start = (size_t)um;
+	cur = end + 1;
+
+	/* Second number, then ':' */
+	um = malloc_strtoumax(cur, &end, 0);
+	if (get_errno() != 0 || *end != ':') {
+		return true;
+	}
+	*slab_end = (size_t)um;
+	cur = end + 1;
+
+	/* Last number */
+	um = malloc_strtoumax(cur, &end, 0);
+	if (get_errno() != 0) {
+		return true;
+	}
+	*pgs = (size_t)um;
+
+	/* Consume the separator if there is one. */
+	if (*end == '|') {
+		end++;
+	}
+
+	*vlen_left -= end - *slab_size_segment_cur;
+	*slab_size_segment_cur = end;
+
+	return false;
+}
+
 static bool
 malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
     char const **v_p, size_t *vlen_p) {
@@ -1192,6 +1235,31 @@ malloc_conf_init(void) {
 					   "max_background_threads", 1,
 					   opt_max_background_threads, yes, yes,
 					   true);
+			if (CONF_MATCH("slab_sizes")) {
+				bool err;
+				const char *slab_size_segment_cur = v;
+				size_t vlen_left = vlen;
+				do {
+					size_t slab_start;
+					size_t slab_end;
+					size_t pgs;
+					err = malloc_conf_slab_sizes_next(
+					    &slab_size_segment_cur,
+					    &vlen_left, &slab_start, &slab_end,
+					    &pgs);
+					if (!err) {
+						sc_data_update_slab_size(
+						    &sc_data_global, slab_start,
+						    slab_end, (int)pgs);
+					} else {
+						malloc_conf_error(
+						    "Invalid settings for "
+						    "slab_sizes", k, klen, v,
+						    vlen);
+					}
+				} while (!err && vlen_left > 0);
+				continue;
+			}
 			if (config_prof) {
 				CONF_HANDLE_BOOL(opt_prof, "prof")
 				CONF_HANDLE_CHAR_P(opt_prof_prefix,
diff --git a/test/integration/slab_sizes.c b/test/integration/slab_sizes.c
new file mode 100644
index 00000000..af250c3f
--- /dev/null
+++ b/test/integration/slab_sizes.c
@@ -0,0 +1,80 @@
+#include "test/jemalloc_test.h"
+
+/* Note that this test relies on the unusual slab sizes set in slab_sizes.sh. */
+
+TEST_BEGIN(test_slab_sizes) {
+	unsigned nbins;
+	size_t page;
+	size_t sizemib[4];
+	size_t slabmib[4];
+	size_t len;
+
+	len = sizeof(nbins);
+	assert_d_eq(mallctl("arenas.nbins", &nbins, &len, NULL, 0), 0,
+	    "nbins mallctl failure");
+
+	len = sizeof(page);
+	assert_d_eq(mallctl("arenas.page", &page, &len, NULL, 0), 0,
+	    "page mallctl failure");
+
+	len = 4;
+	assert_d_eq(mallctlnametomib("arenas.bin.0.size", sizemib, &len), 0,
+	    "bin size mallctlnametomib failure");
+
+	len = 4;
+	assert_d_eq(mallctlnametomib("arenas.bin.0.slab_size", slabmib, &len),
+	    0, "slab size mallctlnametomib failure");
+
+	size_t biggest_slab_seen = 0;
+
+	for (unsigned i = 0; i < nbins; i++) {
+		size_t bin_size;
+		size_t slab_size;
+		len = sizeof(size_t);
+		sizemib[2] = i;
+		slabmib[2] = i;
+		assert_d_eq(mallctlbymib(sizemib, 4, (void *)&bin_size, &len,
+		    NULL, 0), 0, "bin size mallctlbymib failure");
+
+		len = sizeof(size_t);
+		assert_d_eq(mallctlbymib(slabmib, 4, (void *)&slab_size, &len,
+		    NULL, 0), 0, "slab size mallctlbymib failure");
+
+		if (bin_size < 100) {
+			/*
+			 * Then we should be as close to 17 as possible.  Since
+			 * not all page sizes are valid (because of bitmap
+			 * limitations on the number of items in a slab), we
+			 * should at least make sure that the number of pages
+			 * goes up.
+			 */
+			assert_zu_ge(slab_size, biggest_slab_seen,
+			    "Slab sizes should go up");
+			biggest_slab_seen = slab_size;
+		} else if (
+		    (100 <= bin_size && bin_size < 128)
+		    || (128 < bin_size && bin_size <= 200)) {
+			assert_zu_eq(slab_size, page,
+			    "Forced-small slabs should be small");
+		} else if (bin_size == 128) {
+			assert_zu_eq(slab_size, 2 * page,
+			    "Forced-2-page slab should be 2 pages");
+		} else if (200 < bin_size && bin_size <= 4096) {
+			assert_zu_ge(slab_size, biggest_slab_seen,
+			    "Slab sizes should go up");
+			biggest_slab_seen = slab_size;
+		}
+	}
+	/*
+	 * For any reasonable configuration, 17 pages should be a valid slab
+	 * size for 4096-byte items.
+	 */
+	assert_zu_eq(biggest_slab_seen, 17 * page, "Didn't hit page target");
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_slab_sizes);
+}
diff --git a/test/integration/slab_sizes.sh b/test/integration/slab_sizes.sh
new file mode 100644
index 00000000..07e3db81
--- /dev/null
+++ b/test/integration/slab_sizes.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+
+# Some screwy-looking slab sizes.
+export MALLOC_CONF="slab_sizes:1-4096:17|100-200:1|128-128:2"

From 55e5cc1341de87ad06254d719946a5ecd05f06ab Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Jul 2018 16:05:58 -0700
Subject: [PATCH 1165/2608] SC: Make some key size classes static.

The largest small class, smallest large class, and largest large class may all
be needed down fast paths; to avoid the risk of touching another cache line, we
can make them available as constants.
---
 include/jemalloc/internal/arena_inlines_b.h   |  6 +--
 .../internal/jemalloc_internal_inlines_c.h    |  2 +-
 include/jemalloc/internal/prof_inlines_a.h    |  8 +--
 include/jemalloc/internal/sc.h                | 19 +++++++
 include/jemalloc/internal/sz.h                | 22 ++++----
 include/jemalloc/internal/tcache_inlines.h    |  4 +-
 src/arena.c                                   | 50 +++++++++----------
 src/ckh.c                                     |  6 +--
 src/extent.c                                  |  6 +--
 src/jemalloc.c                                | 50 +++++++++----------
 src/large.c                                   | 14 +++---
 src/sc.c                                      | 14 ++++++
 src/tcache.c                                  |  4 +-
 test/unit/junk.c                              |  4 +-
 test/unit/mallctl.c                           |  2 +-
 test/unit/rtree.c                             |  4 +-
 test/unit/stats.c                             |  6 +--
 test/unit/zero.c                              |  4 +-
 18 files changed, 129 insertions(+), 96 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 89603966..2b3915ae 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -111,7 +111,7 @@ arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
 	assert(size != 0);
 
 	if (likely(tcache != NULL)) {
-		if (likely(size <= sc_data_global.small_maxclass)) {
+		if (likely(size <= SC_SMALL_MAXCLASS)) {
 			return tcache_alloc_small(tsdn_tsd(tsdn), arena,
 			    tcache, size, ind, zero, slow_path);
 		}
@@ -263,7 +263,7 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 static inline void
 arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
 	assert(ptr != NULL);
-	assert(size <= sc_data_global.large_maxclass);
+	assert(size <= SC_LARGE_MAXCLASS);
 
 	szind_t szind;
 	bool slab;
@@ -309,7 +309,7 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
     alloc_ctx_t *alloc_ctx, bool slow_path) {
 	assert(!tsdn_null(tsdn) || tcache == NULL);
 	assert(ptr != NULL);
-	assert(size <= sc_data_global.large_maxclass);
+	assert(size <= SC_LARGE_MAXCLASS);
 
 	if (unlikely(tcache == NULL)) {
 		arena_sdalloc_no_tcache(tsdn, ptr, size);
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 83ad10ff..9c5fec62 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -142,7 +142,7 @@ iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 	size_t usize, copysize;
 
 	usize = sz_sa2u(size, alignment);
-	if (unlikely(usize == 0 || usize > sc_data_global.large_maxclass)) {
+	if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
 		return NULL;
 	}
 	p = ipalloct(tsdn, usize, alignment, zero, tcache, arena);
diff --git a/include/jemalloc/internal/prof_inlines_a.h b/include/jemalloc/internal/prof_inlines_a.h
index 07bfd9f3..471d9853 100644
--- a/include/jemalloc/internal/prof_inlines_a.h
+++ b/include/jemalloc/internal/prof_inlines_a.h
@@ -57,15 +57,15 @@ prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum,
 #ifdef JEMALLOC_ATOMIC_U64
 	a0 = atomic_load_u64(&prof_accum->accumbytes, ATOMIC_RELAXED);
 	do {
-		a1 = (a0 >= sc_data_global.large_minclass - usize)
-		    ? a0 - (sc_data_global.large_minclass - usize) : 0;
+		a1 = (a0 >= SC_LARGE_MINCLASS - usize)
+		    ? a0 - (SC_LARGE_MINCLASS - usize) : 0;
 	} while (!atomic_compare_exchange_weak_u64(&prof_accum->accumbytes, &a0,
 	    a1, ATOMIC_RELAXED, ATOMIC_RELAXED));
 #else
 	malloc_mutex_lock(tsdn, &prof_accum->mtx);
 	a0 = prof_accum->accumbytes;
-	a1 = (a0 >= sc_data_global.large_minclass - usize)
-	    ?  a0 - (sc_data_global.large_minclass - usize) : 0;
+	a1 = (a0 >= SC_LARGE_MINCLASS - usize)
+	    ?  a0 - (SC_LARGE_MINCLASS - usize) : 0;
 	prof_accum->accumbytes = a1;
 	malloc_mutex_unlock(tsdn, &prof_accum->mtx);
 #endif
diff --git a/include/jemalloc/internal/sc.h b/include/jemalloc/internal/sc.h
index 592115a7..5c94378c 100644
--- a/include/jemalloc/internal/sc.h
+++ b/include/jemalloc/internal/sc.h
@@ -238,6 +238,25 @@
 /* The largest size class in the lookup table. */
 #define SC_LOOKUP_MAXCLASS ((size_t)1 << 12)
 
+/* Internal, only used for the definition of SC_SMALL_MAXCLASS. */
+#define SC_SMALL_MAX_BASE ((size_t)1 << (LG_PAGE + SC_LG_NGROUP - 1))
+#define SC_SMALL_MAX_DELTA ((size_t)1 << (LG_PAGE - 1))
+
+/* The largest size class allocated out of a slab. */
+#define SC_SMALL_MAXCLASS (SC_SMALL_MAX_BASE				\
+    + (SC_NGROUP - 1) * SC_SMALL_MAX_DELTA)
+
+/* The smallest size class not allocated out of a slab. */
+#define SC_LARGE_MINCLASS ((size_t)1ULL << (LG_PAGE + SC_LG_NGROUP))
+#define SC_LG_LARGE_MINCLASS (LG_PAGE + SC_LG_NGROUP)
+
+/* Internal; only used for the definition of SC_LARGE_MAXCLASS. */
+#define SC_MAX_BASE ((size_t)1 << (SC_PTR_BITS - 2))
+#define SC_MAX_DELTA ((size_t)1 << (SC_PTR_BITS - 2 - SC_LG_NGROUP))
+
+/* The largest size class supported. */
+#define SC_LARGE_MAXCLASS (SC_MAX_BASE + (SC_NGROUP - 1) * SC_MAX_DELTA)
+
 typedef struct sc_s sc_t;
 struct sc_s {
 	/* Size class index, or -1 if not a valid size class. */
diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index b37e7969..e743d878 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -51,7 +51,7 @@ extern void sz_boot(const sc_data_t *sc_data);
 
 JEMALLOC_ALWAYS_INLINE pszind_t
 sz_psz2ind(size_t psz) {
-	if (unlikely(psz > sc_data_global.large_maxclass)) {
+	if (unlikely(psz > SC_LARGE_MAXCLASS)) {
 		return sc_data_global.npsizes;
 	}
 	pszind_t x = lg_floor((psz<<1)-1);
@@ -73,7 +73,7 @@ sz_psz2ind(size_t psz) {
 static inline size_t
 sz_pind2sz_compute(pszind_t pind) {
 	if (unlikely(pind == sc_data_global.npsizes)) {
-		return sc_data_global.large_maxclass + PAGE;
+		return SC_LARGE_MAXCLASS + PAGE;
 	}
 	size_t grp = pind >> SC_LG_NGROUP;
 	size_t mod = pind & ((ZU(1) << SC_LG_NGROUP) - 1);
@@ -105,8 +105,8 @@ sz_pind2sz(pszind_t pind) {
 
 static inline size_t
 sz_psz2u(size_t psz) {
-	if (unlikely(psz > sc_data_global.large_maxclass)) {
-		return sc_data_global.large_maxclass + PAGE;
+	if (unlikely(psz > SC_LARGE_MAXCLASS)) {
+		return SC_LARGE_MAXCLASS + PAGE;
 	}
 	size_t x = lg_floor((psz<<1)-1);
 	size_t lg_delta = (x < SC_LG_NGROUP + LG_PAGE + 1) ?
@@ -119,7 +119,7 @@ sz_psz2u(size_t psz) {
 
 static inline szind_t
 sz_size2index_compute(size_t size) {
-	if (unlikely(size > sc_data_global.large_maxclass)) {
+	if (unlikely(size > SC_LARGE_MAXCLASS)) {
 		return SC_NSIZES;
 	}
 #if (SC_NTINY != 0)
@@ -207,7 +207,7 @@ sz_index2size(szind_t index) {
 
 JEMALLOC_ALWAYS_INLINE size_t
 sz_s2u_compute(size_t size) {
-	if (unlikely(size > sc_data_global.large_maxclass)) {
+	if (unlikely(size > SC_LARGE_MAXCLASS)) {
 		return 0;
 	}
 #if (SC_NTINY > 0)
@@ -262,7 +262,7 @@ sz_sa2u(size_t size, size_t alignment) {
 	assert(alignment != 0 && ((alignment - 1) & alignment) == 0);
 
 	/* Try for a small size class. */
-	if (size <= sc_data_global.small_maxclass && alignment < PAGE) {
+	if (size <= SC_SMALL_MAXCLASS && alignment < PAGE) {
 		/*
 		 * Round size up to the nearest multiple of alignment.
 		 *
@@ -278,20 +278,20 @@ sz_sa2u(size_t size, size_t alignment) {
 		 *    192 | 11000000 |  64
 		 */
 		usize = sz_s2u(ALIGNMENT_CEILING(size, alignment));
-		if (usize < sc_data_global.large_minclass) {
+		if (usize < SC_LARGE_MINCLASS) {
 			return usize;
 		}
 	}
 
 	/* Large size class.  Beware of overflow. */
 
-	if (unlikely(alignment > sc_data_global.large_maxclass)) {
+	if (unlikely(alignment > SC_LARGE_MAXCLASS)) {
 		return 0;
 	}
 
 	/* Make sure result is a large size class. */
-	if (size <= sc_data_global.large_minclass) {
-		usize = sc_data_global.large_minclass;
+	if (size <= SC_LARGE_MINCLASS) {
+		usize = SC_LARGE_MINCLASS;
 	} else {
 		usize = sz_s2u(size);
 		if (usize < size) {
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index b060043b..7c956468 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -167,7 +167,7 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	cache_bin_info_t *bin_info;
 
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr)
-	    <= sc_data_global.small_maxclass);
+	    <= SC_SMALL_MAXCLASS);
 
 	if (slow_path && config_fill && unlikely(opt_junk_free)) {
 		arena_dalloc_junk_small(ptr, &bin_infos[binind]);
@@ -193,7 +193,7 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	cache_bin_info_t *bin_info;
 
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr)
-	    > sc_data_global.small_maxclass);
+	    > SC_SMALL_MAXCLASS);
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_maxclass);
 
 	if (slow_path && config_fill && unlikely(opt_junk_free)) {
diff --git a/src/arena.c b/src/arena.c
index 07d91039..91043cff 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -296,8 +296,8 @@ arena_large_malloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 
 	cassert(config_stats);
 
-	if (usize < sc_data_global.large_minclass) {
-		usize = sc_data_global.large_minclass;
+	if (usize < SC_LARGE_MINCLASS) {
+		usize = SC_LARGE_MINCLASS;
 	}
 	index = sz_size2index(usize);
 	hindex = (index >= SC_NBINS) ? index - SC_NBINS : 0;
@@ -312,8 +312,8 @@ arena_large_dalloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 
 	cassert(config_stats);
 
-	if (usize < sc_data_global.large_minclass) {
-		usize = sc_data_global.large_minclass;
+	if (usize < SC_LARGE_MINCLASS) {
+		usize = SC_LARGE_MINCLASS;
 	}
 	index = sz_size2index(usize);
 	hindex = (index >= SC_NBINS) ? index - SC_NBINS : 0;
@@ -1389,7 +1389,7 @@ arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
 		return NULL;
 	}
 
-	if (likely(size <= sc_data_global.small_maxclass)) {
+	if (likely(size <= SC_SMALL_MAXCLASS)) {
 		return arena_malloc_small(tsdn, arena, ind, zero);
 	}
 	return large_malloc(tsdn, arena, sz_index2size(ind), zero);
@@ -1400,7 +1400,7 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
     bool zero, tcache_t *tcache) {
 	void *ret;
 
-	if (usize <= sc_data_global.small_maxclass
+	if (usize <= SC_SMALL_MAXCLASS
 	    && (alignment < PAGE
 	    || (alignment == PAGE && (usize & PAGE_MASK) == 0))) {
 		/* Small; alignment doesn't require special slab placement. */
@@ -1420,8 +1420,8 @@ void
 arena_prof_promote(tsdn_t *tsdn, const void *ptr, size_t usize) {
 	cassert(config_prof);
 	assert(ptr != NULL);
-	assert(isalloc(tsdn, ptr) == sc_data_global.large_minclass);
-	assert(usize <= sc_data_global.small_maxclass);
+	assert(isalloc(tsdn, ptr) == SC_LARGE_MINCLASS);
+	assert(usize <= SC_SMALL_MAXCLASS);
 
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
@@ -1451,9 +1451,9 @@ arena_prof_demote(tsdn_t *tsdn, extent_t *extent, const void *ptr) {
 	rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr,
 	    SC_NBINS, false);
 
-	assert(isalloc(tsdn, ptr) == sc_data_global.large_minclass);
+	assert(isalloc(tsdn, ptr) == SC_LARGE_MINCLASS);
 
-	return sc_data_global.large_minclass;
+	return SC_LARGE_MINCLASS;
 }
 
 void
@@ -1594,25 +1594,25 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t extra, bool zero, size_t *newsize) {
 	bool ret;
 	/* Calls with non-zero extra had to clamp extra. */
-	assert(extra == 0 || size + extra <= sc_data_global.large_maxclass);
+	assert(extra == 0 || size + extra <= SC_LARGE_MAXCLASS);
 
 	extent_t *extent = iealloc(tsdn, ptr);
-	if (unlikely(size > sc_data_global.large_maxclass)) {
+	if (unlikely(size > SC_LARGE_MAXCLASS)) {
 		ret = true;
 		goto done;
 	}
 
 	size_t usize_min = sz_s2u(size);
 	size_t usize_max = sz_s2u(size + extra);
-	if (likely(oldsize <= sc_data_global.small_maxclass && usize_min
-	    <= sc_data_global.small_maxclass)) {
+	if (likely(oldsize <= SC_SMALL_MAXCLASS && usize_min
+	    <= SC_SMALL_MAXCLASS)) {
 		/*
 		 * Avoid moving the allocation if the size class can be left the
 		 * same.
 		 */
 		assert(bin_infos[sz_size2index(oldsize)].reg_size ==
 		    oldsize);
-		if ((usize_max > sc_data_global.small_maxclass
+		if ((usize_max > SC_SMALL_MAXCLASS
 		    || sz_size2index(usize_max) != sz_size2index(oldsize))
 		    && (size > oldsize || usize_max < oldsize)) {
 			ret = true;
@@ -1621,8 +1621,8 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 
 		arena_decay_tick(tsdn, extent_arena_get(extent));
 		ret = false;
-	} else if (oldsize >= sc_data_global.large_minclass
-	    && usize_max >= sc_data_global.large_minclass) {
+	} else if (oldsize >= SC_LARGE_MINCLASS
+	    && usize_max >= SC_LARGE_MINCLASS) {
 		ret = large_ralloc_no_move(tsdn, extent, usize_min, usize_max,
 		    zero);
 	} else {
@@ -1643,7 +1643,7 @@ arena_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
 		    zero, tcache, true);
 	}
 	usize = sz_sa2u(usize, alignment);
-	if (unlikely(usize == 0 || usize > sc_data_global.large_maxclass)) {
+	if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
 		return NULL;
 	}
 	return ipalloct(tsdn, usize, alignment, zero, tcache, arena);
@@ -1654,11 +1654,11 @@ arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
     size_t size, size_t alignment, bool zero, tcache_t *tcache,
     hook_ralloc_args_t *hook_args) {
 	size_t usize = sz_s2u(size);
-	if (unlikely(usize == 0 || size > sc_data_global.large_maxclass)) {
+	if (unlikely(usize == 0 || size > SC_LARGE_MAXCLASS)) {
 		return NULL;
 	}
 
-	if (likely(usize <= sc_data_global.small_maxclass)) {
+	if (likely(usize <= SC_SMALL_MAXCLASS)) {
 		/* Try to avoid moving the allocation. */
 		UNUSED size_t newsize;
 		if (!arena_ralloc_no_move(tsdn, ptr, oldsize, usize, 0, zero,
@@ -1671,8 +1671,8 @@ arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
 		}
 	}
 
-	if (oldsize >= sc_data_global.large_minclass
-	    && usize >= sc_data_global.large_minclass) {
+	if (oldsize >= SC_LARGE_MINCLASS
+	    && usize >= SC_LARGE_MINCLASS) {
 		return large_ralloc(tsdn, arena, ptr, usize,
 		    alignment, zero, tcache, hook_args);
 	}
@@ -1985,10 +1985,10 @@ arena_init_huge(void) {
 	bool huge_enabled;
 
 	/* The threshold should be large size class. */
-	if (opt_huge_threshold > sc_data_global.large_maxclass ||
-	    opt_huge_threshold < sc_data_global.large_minclass) {
+	if (opt_huge_threshold > SC_LARGE_MAXCLASS ||
+	    opt_huge_threshold < SC_LARGE_MINCLASS) {
 		opt_huge_threshold = 0;
-		huge_threshold = sc_data_global.large_maxclass + PAGE;
+		huge_threshold = SC_LARGE_MAXCLASS + PAGE;
 		huge_enabled = false;
 	} else {
 		/* Reserve the index for the huge arena. */
diff --git a/src/ckh.c b/src/ckh.c
index 94c4fe69..1bf6df5a 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -276,7 +276,7 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh) {
 		lg_curcells++;
 		usize = sz_sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
 		if (unlikely(usize == 0
-		    || usize > sc_data_global.large_maxclass)) {
+		    || usize > SC_LARGE_MAXCLASS)) {
 			ret = true;
 			goto label_return;
 		}
@@ -321,7 +321,7 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh) {
 	lg_prevbuckets = ckh->lg_curbuckets;
 	lg_curcells = ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 1;
 	usize = sz_sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
-	if (unlikely(usize == 0 || usize > sc_data_global.large_maxclass)) {
+	if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
 		return;
 	}
 	tab = (ckhc_t *)ipallocztm(tsd_tsdn(tsd), usize, CACHELINE, true, NULL,
@@ -397,7 +397,7 @@ ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
 	ckh->keycomp = keycomp;
 
 	usize = sz_sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE);
-	if (unlikely(usize == 0 || usize > sc_data_global.large_maxclass)) {
+	if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
 		ret = true;
 		goto label_return;
 	}
diff --git a/src/extent.c b/src/extent.c
index 0953940b..74076b66 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -259,7 +259,7 @@ extent_size_quantize_ceil(size_t size) {
 	size_t ret;
 
 	assert(size > 0);
-	assert(size - sz_large_pad <= sc_data_global.large_maxclass);
+	assert(size - sz_large_pad <= SC_LARGE_MAXCLASS);
 	assert((size & PAGE_MASK) == 0);
 
 	ret = extent_size_quantize_floor(size);
@@ -1625,7 +1625,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	if (!extents->delay_coalesce) {
 		extent = extent_try_coalesce(tsdn, arena, r_extent_hooks,
 		    rtree_ctx, extents, extent, NULL, growing_retained);
-	} else if (extent_size_get(extent) >= sc_data_global.large_minclass) {
+	} else if (extent_size_get(extent) >= SC_LARGE_MINCLASS) {
 		/* Always coalesce large extents eagerly. */
 		bool coalesced;
 		size_t prev_size;
@@ -1637,7 +1637,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 			    &coalesced, growing_retained);
 		} while (coalesced &&
 		    extent_size_get(extent)
-		    >= prev_size + sc_data_global.large_minclass);
+		    >= prev_size + SC_LARGE_MINCLASS);
 	}
 	extent_deactivate_locked(tsdn, arena, extents, extent);
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 4ffe5aaa..e66735c8 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1201,8 +1201,8 @@ malloc_conf_init(void) {
 			/* Experimental feature.  Will be documented later.*/
 			CONF_HANDLE_SIZE_T(opt_huge_threshold,
 			    "experimental_huge_threshold",
-			    sc_data_global.large_minclass,
-			    sc_data_global.large_maxclass, yes, yes, false)
+			    SC_LARGE_MINCLASS,
+			    SC_LARGE_MAXCLASS, yes, yes, false)
 			CONF_HANDLE_SIZE_T(opt_lg_extent_max_active_fit,
 			    "lg_extent_max_active_fit", 0,
 			    (sizeof(size_t) << 3), yes, yes, false)
@@ -1827,13 +1827,13 @@ imalloc_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
 	szind_t ind_large;
 	size_t bumped_usize = usize;
 
-	if (usize <= sc_data_global.small_maxclass) {
+	if (usize <= SC_SMALL_MAXCLASS) {
 		assert(((dopts->alignment == 0) ?
-		    sz_s2u(sc_data_global.large_minclass) :
-		    sz_sa2u(sc_data_global.large_minclass, dopts->alignment))
-			== sc_data_global.large_minclass);
-		ind_large = sz_size2index(sc_data_global.large_minclass);
-		bumped_usize = sz_s2u(sc_data_global.large_minclass);
+		    sz_s2u(SC_LARGE_MINCLASS) :
+		    sz_sa2u(SC_LARGE_MINCLASS, dopts->alignment))
+			== SC_LARGE_MINCLASS);
+		ind_large = sz_size2index(SC_LARGE_MINCLASS);
+		bumped_usize = sz_s2u(SC_LARGE_MINCLASS);
 		ret = imalloc_no_sample(sopts, dopts, tsd, bumped_usize,
 		    bumped_usize, ind_large);
 		if (unlikely(ret == NULL)) {
@@ -1942,12 +1942,12 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		if (config_stats || (config_prof && opt_prof)) {
 			usize = sz_index2size(ind);
 			assert(usize > 0 && usize
-			    <= sc_data_global.large_maxclass);
+			    <= SC_LARGE_MAXCLASS);
 		}
 	} else {
 		usize = sz_sa2u(size, dopts->alignment);
 		if (unlikely(usize == 0
-		    || usize > sc_data_global.large_maxclass)) {
+		    || usize > SC_LARGE_MAXCLASS)) {
 			goto label_oom;
 		}
 	}
@@ -1984,7 +1984,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		alloc_ctx_t alloc_ctx;
 		if (likely((uintptr_t)tctx == (uintptr_t)1U)) {
 			alloc_ctx.slab = (usize
-			    <= sc_data_global.small_maxclass);
+			    <= SC_SMALL_MAXCLASS);
 			allocation = imalloc_no_sample(
 			    sopts, dopts, tsd, usize, usize, ind);
 		} else if ((uintptr_t)tctx > (uintptr_t)1U) {
@@ -2282,9 +2282,9 @@ irealloc_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize,
 	if (tctx == NULL) {
 		return NULL;
 	}
-	if (usize <= sc_data_global.small_maxclass) {
+	if (usize <= SC_SMALL_MAXCLASS) {
 		p = iralloc(tsd, old_ptr, old_usize,
-		    sc_data_global.large_minclass, 0, false, hook_args);
+		    SC_LARGE_MINCLASS, 0, false, hook_args);
 		if (p == NULL) {
 			return NULL;
 		}
@@ -2474,7 +2474,7 @@ je_realloc(void *ptr, size_t arg_size) {
 		if (config_prof && opt_prof) {
 			usize = sz_s2u(size);
 			if (unlikely(usize == 0
-			    || usize > sc_data_global.large_maxclass)) {
+			    || usize > SC_LARGE_MAXCLASS)) {
 				ret = NULL;
 			} else {
 				ret = irealloc_prof(tsd, ptr, old_usize, usize,
@@ -2787,9 +2787,9 @@ irallocx_prof_sample(tsdn_t *tsdn, void *old_ptr, size_t old_usize,
 	if (tctx == NULL) {
 		return NULL;
 	}
-	if (usize <= sc_data_global.small_maxclass) {
+	if (usize <= SC_SMALL_MAXCLASS) {
 		p = iralloct(tsdn, old_ptr, old_usize,
-		    sc_data_global.large_minclass, alignment, zero, tcache,
+		    SC_LARGE_MINCLASS, alignment, zero, tcache,
 		    arena, hook_args);
 		if (p == NULL) {
 			return NULL;
@@ -2900,7 +2900,7 @@ je_rallocx(void *ptr, size_t size, int flags) {
 		usize = (alignment == 0) ?
 		    sz_s2u(size) : sz_sa2u(size, alignment);
 		if (unlikely(usize == 0
-		    || usize > sc_data_global.large_maxclass)) {
+		    || usize > SC_LARGE_MAXCLASS)) {
 			goto label_oom;
 		}
 		p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize,
@@ -2986,18 +2986,18 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 	if (alignment == 0) {
 		usize_max = sz_s2u(size+extra);
 		assert(usize_max > 0
-		    && usize_max <= sc_data_global.large_maxclass);
+		    && usize_max <= SC_LARGE_MAXCLASS);
 	} else {
 		usize_max = sz_sa2u(size+extra, alignment);
 		if (unlikely(usize_max == 0
-		    || usize_max > sc_data_global.large_maxclass)) {
+		    || usize_max > SC_LARGE_MAXCLASS)) {
 			/*
 			 * usize_max is out of range, and chances are that
 			 * allocation will fail, but use the maximum possible
 			 * value and carry on with prof_alloc_prep(), just in
 			 * case allocation succeeds.
 			 */
-			usize_max = sc_data_global.large_maxclass;
+			usize_max = SC_LARGE_MAXCLASS;
 		}
 	}
 	tctx = prof_alloc_prep(tsd, usize_max, prof_active, false);
@@ -3046,18 +3046,18 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	/*
 	 * The API explicitly absolves itself of protecting against (size +
 	 * extra) numerical overflow, but we may need to clamp extra to avoid
-	 * exceeding sc_data_global.large_maxclass.
+	 * exceeding SC_LARGE_MAXCLASS.
 	 *
 	 * Ordinarily, size limit checking is handled deeper down, but here we
 	 * have to check as part of (size + extra) clamping, since we need the
 	 * clamped value in the above helper functions.
 	 */
-	if (unlikely(size > sc_data_global.large_maxclass)) {
+	if (unlikely(size > SC_LARGE_MAXCLASS)) {
 		usize = old_usize;
 		goto label_not_resized;
 	}
-	if (unlikely(sc_data_global.large_maxclass - size < extra)) {
-		extra = sc_data_global.large_maxclass - size;
+	if (unlikely(SC_LARGE_MAXCLASS - size < extra)) {
+		extra = SC_LARGE_MAXCLASS - size;
 	}
 
 	if (config_prof && opt_prof) {
@@ -3244,7 +3244,7 @@ je_nallocx(size_t size, int flags) {
 	check_entry_exit_locking(tsdn);
 
 	usize = inallocx(tsdn, size, flags);
-	if (unlikely(usize > sc_data_global.large_maxclass)) {
+	if (unlikely(usize > SC_LARGE_MAXCLASS)) {
 		LOG("core.nallocx.exit", "result: %zu", ZU(0));
 		return 0;
 	}
diff --git a/src/large.c b/src/large.c
index 87d9ec0b..84073618 100644
--- a/src/large.c
+++ b/src/large.c
@@ -28,7 +28,7 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	assert(!tsdn_null(tsdn) || arena != NULL);
 
 	ausize = sz_sa2u(usize, alignment);
-	if (unlikely(ausize == 0 || ausize > sc_data_global.large_maxclass)) {
+	if (unlikely(ausize == 0 || ausize > SC_LARGE_MAXCLASS)) {
 		return NULL;
 	}
 
@@ -221,10 +221,10 @@ large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
 	size_t oldusize = extent_usize_get(extent);
 
 	/* The following should have been caught by callers. */
-	assert(usize_min > 0 && usize_max <= sc_data_global.large_maxclass);
+	assert(usize_min > 0 && usize_max <= SC_LARGE_MAXCLASS);
 	/* Both allocation sizes must be large to avoid a move. */
-	assert(oldusize >= sc_data_global.large_minclass
-	    && usize_max >= sc_data_global.large_minclass);
+	assert(oldusize >= SC_LARGE_MINCLASS
+	    && usize_max >= SC_LARGE_MINCLASS);
 
 	if (usize_max > oldusize) {
 		/* Attempt to expand the allocation in-place. */
@@ -278,10 +278,10 @@ large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
 
 	size_t oldusize = extent_usize_get(extent);
 	/* The following should have been caught by callers. */
-	assert(usize > 0 && usize <= sc_data_global.large_maxclass);
+	assert(usize > 0 && usize <= SC_LARGE_MAXCLASS);
 	/* Both allocation sizes must be large to avoid a move. */
-	assert(oldusize >= sc_data_global.large_minclass
-	    && usize >= sc_data_global.large_minclass);
+	assert(oldusize >= SC_LARGE_MINCLASS
+	    && usize >= SC_LARGE_MINCLASS);
 
 	/* Try to avoid moving the allocation. */
 	if (!large_ralloc_no_move(tsdn, extent, usize, usize, zero)) {
diff --git a/src/sc.c b/src/sc.c
index e8eef1c2..74c91018 100644
--- a/src/sc.c
+++ b/src/sc.c
@@ -228,6 +228,20 @@ size_classes(
 	sc_data->lg_large_minclass = lg_large_minclass;
 	sc_data->large_minclass = (ZU(1) << lg_large_minclass);
 	sc_data->large_maxclass = large_maxclass;
+
+	/*
+	 * We compute these values in two ways:
+	 *   - Incrementally, as above.
+	 *   - In macros, in sc.h.
+	 * The computation is easier when done incrementally, but putting it in
+	 * a constant makes it available to the fast paths without having to
+	 * touch the extra global cacheline.  We assert, however, that the two
+	 * computations are equivalent.
+	 */
+	assert(sc_data->small_maxclass == SC_SMALL_MAXCLASS);
+	assert(sc_data->large_minclass == SC_LARGE_MINCLASS);
+	assert(sc_data->lg_large_minclass == SC_LG_LARGE_MINCLASS);
+	assert(sc_data->large_maxclass == SC_LARGE_MAXCLASS);
 }
 
 void
diff --git a/src/tcache.c b/src/tcache.c
index edd047ab..7346df8c 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -658,8 +658,8 @@ bool
 tcache_boot(tsdn_t *tsdn) {
 	/* If necessary, clamp opt_lg_tcache_max. */
 	if (opt_lg_tcache_max < 0 || (ZU(1) << opt_lg_tcache_max) <
-	    sc_data_global.small_maxclass) {
-		tcache_maxclass = sc_data_global.small_maxclass;
+	    SC_SMALL_MAXCLASS) {
+		tcache_maxclass = SC_SMALL_MAXCLASS;
 	} else {
 		tcache_maxclass = (ZU(1) << opt_lg_tcache_max);
 	}
diff --git a/test/unit/junk.c b/test/unit/junk.c
index 91c6e5b1..be8933a7 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -123,13 +123,13 @@ test_junk(size_t sz_min, size_t sz_max) {
 
 TEST_BEGIN(test_junk_small) {
 	test_skip_if(!config_fill);
-	test_junk(1, sc_data_global.small_maxclass - 1);
+	test_junk(1, SC_SMALL_MAXCLASS - 1);
 }
 TEST_END
 
 TEST_BEGIN(test_junk_large) {
 	test_skip_if(!config_fill);
-	test_junk(sc_data_global.small_maxclass + 1,
+	test_junk(SC_SMALL_MAXCLASS + 1,
 	    (1U << (sc_data_global.lg_large_minclass + 1)));
 }
 TEST_END
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 230ecb0e..f6362008 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -721,7 +721,7 @@ TEST_BEGIN(test_arenas_lextent_constants) {
 } while (0)
 
 	TEST_ARENAS_LEXTENT_CONSTANT(size_t, size,
-	    sc_data_global.large_minclass);
+	    SC_LARGE_MINCLASS);
 
 #undef TEST_ARENAS_LEXTENT_CONSTANT
 }
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 4d1daf2c..b017bc03 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -85,8 +85,8 @@ TEST_END
 
 TEST_BEGIN(test_rtree_extrema) {
 	extent_t extent_a, extent_b;
-	extent_init(&extent_a, NULL, NULL, sc_data_global.large_minclass, false,
-	    sz_size2index(sc_data_global.large_minclass), 0,
+	extent_init(&extent_a, NULL, NULL, SC_LARGE_MINCLASS, false,
+	    sz_size2index(SC_LARGE_MINCLASS), 0,
 	    extent_state_active, false, false, true);
 	extent_init(&extent_b, NULL, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, true);
diff --git a/test/unit/stats.c b/test/unit/stats.c
index 8fe0f3ad..b8f549be 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -33,7 +33,7 @@ TEST_BEGIN(test_stats_large) {
 	size_t sz;
 	int expected = config_stats ? 0 : ENOENT;
 
-	p = mallocx(sc_data_global.small_maxclass + 1, MALLOCX_ARENA(0));
+	p = mallocx(SC_SMALL_MAXCLASS + 1, MALLOCX_ARENA(0));
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
 
 	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
@@ -74,7 +74,7 @@ TEST_BEGIN(test_stats_arenas_summary) {
 	uint64_t dirty_npurge, dirty_nmadvise, dirty_purged;
 	uint64_t muzzy_npurge, muzzy_nmadvise, muzzy_purged;
 
-	little = mallocx(sc_data_global.small_maxclass, MALLOCX_ARENA(0));
+	little = mallocx(SC_SMALL_MAXCLASS, MALLOCX_ARENA(0));
 	assert_ptr_not_null(little, "Unexpected mallocx() failure");
 	large = mallocx((1U << sc_data_global.lg_large_minclass),
 	    MALLOCX_ARENA(0));
@@ -149,7 +149,7 @@ TEST_BEGIN(test_stats_arenas_small) {
 
 	no_lazy_lock(); /* Lazy locking would dodge tcache testing. */
 
-	p = mallocx(sc_data_global.small_maxclass, MALLOCX_ARENA(0));
+	p = mallocx(SC_SMALL_MAXCLASS, MALLOCX_ARENA(0));
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
 
 	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
diff --git a/test/unit/zero.c b/test/unit/zero.c
index 20a70628..8b8d2072 100644
--- a/test/unit/zero.c
+++ b/test/unit/zero.c
@@ -41,13 +41,13 @@ test_zero(size_t sz_min, size_t sz_max) {
 
 TEST_BEGIN(test_zero_small) {
 	test_skip_if(!config_fill);
-	test_zero(1, sc_data_global.small_maxclass - 1);
+	test_zero(1, SC_SMALL_MAXCLASS - 1);
 }
 TEST_END
 
 TEST_BEGIN(test_zero_large) {
 	test_skip_if(!config_fill);
-	test_zero(sc_data_global.small_maxclass + 1,
+	test_zero(SC_SMALL_MAXCLASS + 1,
 	    1U << (sc_data_global.lg_large_minclass + 1));
 }
 TEST_END

From 0eb0641cac0c3031f84469953b5e75b380867ccb Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Tue, 10 Jul 2018 14:41:20 +0200
Subject: [PATCH 1166/2608] Simplify output of gen_travis.py script

This commit simplifies the output of the
`gen_travis.py` script by reusing addons.

The `.travis.yml` script is updated to
reflect these changes.
---
 .travis.yml           | 43 ++++++++-----------------------
 scripts/gen_travis.py | 60 ++++++++++++++++++++++++++-----------------
 2 files changed, 46 insertions(+), 57 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 7d93ead5..854f8787 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -11,7 +11,7 @@ matrix:
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-      addons:
+      addons: &gcc_multilib
         apt:
           packages:
             - gcc-multilib
@@ -41,10 +41,7 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=clang CXX=clang++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-      addons:
-        apt:
-          packages:
-            - gcc-multilib
+      addons: *gcc_multilib
     - os: linux
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
@@ -61,46 +58,25 @@ matrix:
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-      addons:
-        apt:
-          packages:
-            - gcc-multilib
+      addons: *gcc_multilib
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-      addons:
-        apt:
-          packages:
-            - gcc-multilib
+      addons: *gcc_multilib
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-      addons:
-        apt:
-          packages:
-            - gcc-multilib
+      addons: *gcc_multilib
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-      addons:
-        apt:
-          packages:
-            - gcc-multilib
+      addons: *gcc_multilib
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-      addons:
-        apt:
-          packages:
-            - gcc-multilib
+      addons: *gcc_multilib
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-      addons:
-        apt:
-          packages:
-            - gcc-multilib
+      addons: *gcc_multilib
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-      addons:
-        apt:
-          packages:
-            - gcc-multilib
+      addons: *gcc_multilib
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
@@ -150,6 +126,7 @@ matrix:
         - make -j test/unit/log
         - test/unit/log
 
+
 before_script:
   - autoconf
   - ./configure ${COMPILER_FLAGS:+       CC="$CC $COMPILER_FLAGS"       CXX="$CXX $COMPILER_FLAGS" }       $CONFIGURE_FLAGS
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index 6dd39290..15708834 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -4,6 +4,7 @@ from itertools import combinations
 
 travis_template = """\
 language: generic
+dist: precise
 
 matrix:
   include:
@@ -61,47 +62,58 @@ unusual_combinations_to_test = []
 for i in xrange(MAX_UNUSUAL_OPTIONS + 1):
     unusual_combinations_to_test += combinations(all_unusuals, i)
 
-include_rows = ""
-for unusual_combination in unusual_combinations_to_test:
-    os = os_default
-    if os_unusual in unusual_combination:
-        os = os_unusual
+gcc_multilib_set = False
+# Formats a job from a combination of flags
+def format_job(combination):
+    global gcc_multilib_set
 
-    compilers = compilers_default
-    if compilers_unusual in unusual_combination:
-        compilers = compilers_unusual
+    os = os_unusual if os_unusual in combination else os_default
+    compilers = compilers_unusual if compilers_unusual in combination else compilers_default
 
-    compiler_flags = [
-        x for x in unusual_combination if x in compiler_flag_unusuals]
+    compiler_flags = [x for x in combination if x in compiler_flag_unusuals]
+    configure_flags = [x for x in combination if x in configure_flag_unusuals]
+    malloc_conf = [x for x in combination if x in malloc_conf_unusuals]
 
-    configure_flags = [
-        x for x in unusual_combination if x in configure_flag_unusuals]
-
-    malloc_conf = [
-        x for x in unusual_combination if x in malloc_conf_unusuals]
     # Filter out unsupported configurations on OS X.
     if os == 'osx' and ('dss:primary' in malloc_conf or \
       'percpu_arena:percpu' in malloc_conf or 'background_thread:true' \
       in malloc_conf):
-        continue
+        return ""
     if len(malloc_conf) > 0:
         configure_flags.append('--with-malloc-conf=' + ",".join(malloc_conf))
 
     # Filter out an unsupported configuration - heap profiling on OS X.
     if os == 'osx' and '--enable-prof' in configure_flags:
-        continue
+        return ""
 
     # We get some spurious errors when -Warray-bounds is enabled.
     env_string = ('{} COMPILER_FLAGS="{}" CONFIGURE_FLAGS="{}" '
 	'EXTRA_CFLAGS="-Werror -Wno-array-bounds"').format(
         compilers, " ".join(compiler_flags), " ".join(configure_flags))
 
-    include_rows += '    - os: %s\n' % os
-    include_rows += '      env: %s\n' % env_string
-    if '-m32' in unusual_combination and os == 'linux':
-        include_rows += '      addons:\n'
-	include_rows += '        apt:\n'
-	include_rows += '          packages:\n'
-	include_rows += '            - gcc-multilib\n'
+    job = ""
+    job += '    - os: %s\n' % os
+    job += '      env: %s\n' % env_string
+    if '-m32' in combination and os == 'linux':
+        job += '      addons:'
+        if gcc_multilib_set:
+            job += ' *gcc_multilib\n'
+        else:
+            job += ' &gcc_multilib\n'
+            job += '        apt:\n'
+            job += '          packages:\n'
+            job += '            - gcc-multilib\n'
+            gcc_multilib_set = True
+    return job
+
+include_rows = ""
+for combination in unusual_combinations_to_test:
+    include_rows += format_job(combination)
+
+# Development build
+include_rows += '''\
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+'''
 
 print travis_template % include_rows

From 6deed86deb48d3b432d972a139a413a9fb38283b Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Wed, 11 Jul 2018 15:18:40 +0200
Subject: [PATCH 1167/2608] Test that .travis.yml has been produced by
 gen_travis.py on CI

This commits checks on Travis-CI that the current `.travis.yml` file
equals the output of the `gen_travis.py` script, and updated
the `.travis.yml` file accordingly.
---
 .travis.yml           | 5 +----
 scripts/gen_travis.py | 1 +
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 854f8787..cd3be832 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -121,14 +121,11 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-      script:
-        - make check
-        - make -j test/unit/log
-        - test/unit/log
 
 
 before_script:
   - autoconf
+  - scripts/gen_travis.py > travis_script && diff .travis.yml travis_script
   - ./configure ${COMPILER_FLAGS:+       CC="$CC $COMPILER_FLAGS"       CXX="$CXX $COMPILER_FLAGS" }       $CONFIGURE_FLAGS
   - make -j3
   - make -j3 tests
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index 15708834..44732052 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -12,6 +12,7 @@ matrix:
 
 before_script:
   - autoconf
+  - scripts/gen_travis.py > travis_script && diff .travis.yml travis_script
   - ./configure ${COMPILER_FLAGS:+ \
       CC="$CC $COMPILER_FLAGS" \
       CXX="$CXX $COMPILER_FLAGS" } \

From 4bc48718b2eb98e3646a86af816f9c6db29d1612 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 17 Jul 2018 14:09:31 -0700
Subject: [PATCH 1168/2608] Tolerate experimental features for abort_conf.

Not aborting with unrecognized experimental options.  This helps us testing
experimental features with abort_conf enabled.
---
 src/jemalloc.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index e66735c8..8e0a581b 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -896,6 +896,11 @@ malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v,
 	malloc_printf("<jemalloc>: %s: %.*s:%.*s\n", msg, (int)klen, k,
 	    (int)vlen, v);
 	/* If abort_conf is set, error out after processing all options. */
+	const char *experimental = "experimental_";
+	if (strncmp(k, experimental, strlen(experimental)) == 0) {
+		/* However, tolerate experimental features. */
+		return;
+	}
 	had_conf_error = true;
 }
 

From 3aba072cef71d0f2bacc4ef10932a46f1df43192 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 19 Jul 2018 17:08:10 -0700
Subject: [PATCH 1169/2608] SC: Remove global data.

The global data is mostly only used at initialization, or for easy access to
values we could compute statically.  Instead of consuming that space (and
risking TLB misses), we can just pass around a pointer to stack data during
bootstrapping.
---
 include/jemalloc/internal/arena_externs.h  |  2 +-
 include/jemalloc/internal/extent_structs.h |  4 ++--
 include/jemalloc/internal/sc.h             | 13 ++++-------
 include/jemalloc/internal/sz.h             | 27 ++++++++++------------
 src/arena.c                                |  9 ++++----
 src/base.c                                 |  2 +-
 src/extent.c                               | 20 ++++++++--------
 src/jemalloc.c                             | 19 +++++++++------
 src/sc.c                                   |  6 +++--
 src/sz.c                                   |  6 +++--
 test/unit/junk.c                           |  3 +--
 test/unit/mallctl.c                        |  2 +-
 test/unit/prof_gdump.c                     |  8 +++----
 test/unit/size_classes.c                   | 18 ++++++++-------
 test/unit/stats.c                          |  4 ++--
 test/unit/zero.c                           |  3 +--
 16 files changed, 73 insertions(+), 73 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 7a469462..4f744cac 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -85,7 +85,7 @@ size_t arena_extent_sn_next(arena_t *arena);
 arena_t *arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
 bool arena_init_huge(void);
 arena_t *arena_choose_huge(tsd_t *tsd);
-void arena_boot(void);
+void arena_boot(sc_data_t *sc_data);
 void arena_prefork0(tsdn_t *tsdn, arena_t *arena);
 void arena_prefork1(tsdn_t *tsdn, arena_t *arena);
 void arena_prefork2(tsdn_t *tsdn, arena_t *arena);
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index c6c1e234..1983097e 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -181,14 +181,14 @@ struct extents_s {
 	 *
 	 * Synchronization: mtx.
 	 */
-	extent_heap_t		heaps[SC_NPSIZES_MAX + 1];
+	extent_heap_t		heaps[SC_NPSIZES + 1];
 
 	/*
 	 * Bitmap for which set bits correspond to non-empty heaps.
 	 *
 	 * Synchronization: mtx.
 	 */
-	bitmap_t		bitmap[BITMAP_GROUPS(SC_NPSIZES_MAX + 1)];
+	bitmap_t		bitmap[BITMAP_GROUPS(SC_NPSIZES + 1)];
 
 	/*
 	 * LRU of all extents in heaps.
diff --git a/include/jemalloc/internal/sc.h b/include/jemalloc/internal/sc.h
index 5c94378c..5b79bb47 100644
--- a/include/jemalloc/internal/sc.h
+++ b/include/jemalloc/internal/sc.h
@@ -182,6 +182,7 @@
 #define SC_NGROUP (1ULL << SC_LG_NGROUP)
 #define SC_PTR_BITS ((1ULL << LG_SIZEOF_PTR) * 8)
 #define SC_NTINY (LG_QUANTUM - SC_LG_TINY_MIN)
+#define SC_LG_TINY_MAXCLASS (LG_QUANTUM > SC_LG_TINY_MIN ? LG_QUANTUM - 1 : -1)
 #define SC_NPSEUDO SC_NGROUP
 #define SC_LG_FIRST_REGULAR_BASE (LG_QUANTUM + SC_LG_NGROUP)
 /*
@@ -200,7 +201,7 @@
  * because delta may be smaller than a page, this is not the same as the number
  * of size classes that are *multiples* of the page size.
  */
-#define SC_NPSIZES_MAX (						\
+#define SC_NPSIZES (							\
     /* Start with all the size classes. */				\
     SC_NSIZES								\
     /* Subtract out those groups with too small a base. */		\
@@ -209,11 +210,8 @@
     - SC_NPSEUDO							\
     /* And the tiny group. */						\
     - SC_NTINY								\
-    /*									\
-     * In the lg_base == lg_page - 1 group, only the last sc is big	\
-     * enough to make it to lg_page.					\
-     */									\
-    - (SC_NGROUP - 1))
+    /* Groups where ndelta*delta is not a multiple of the page size. */	\
+    - (2 * (SC_NGROUP)))
 
 /*
  * We declare a size class is binnable if size < page size * group. Or, in other
@@ -314,7 +312,6 @@ struct sc_data_s {
 	sc_t sc[SC_NSIZES];
 };
 
-extern sc_data_t sc_data_global;
 void sc_data_init(sc_data_t *data);
 /*
  * Updates slab sizes in [begin, end] to be pgs pages in length, if possible.
@@ -322,6 +319,6 @@ void sc_data_init(sc_data_t *data);
  */
 void sc_data_update_slab_size(sc_data_t *data, size_t begin, size_t end,
     int pgs);
-void sc_boot();
+void sc_boot(sc_data_t *data);
 
 #endif /* JEMALLOC_INTERNAL_SC_H */
diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index e743d878..69625ee2 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -26,7 +26,7 @@
  * sz_pind2sz_tab encodes the same information as could be computed by
  * sz_pind2sz_compute().
  */
-extern size_t sz_pind2sz_tab[SC_NPSIZES_MAX + 1];
+extern size_t sz_pind2sz_tab[SC_NPSIZES + 1];
 /*
  * sz_index2size_tab encodes the same information as could be computed (at
  * unacceptable cost in some code paths) by sz_index2size_compute().
@@ -52,7 +52,7 @@ extern void sz_boot(const sc_data_t *sc_data);
 JEMALLOC_ALWAYS_INLINE pszind_t
 sz_psz2ind(size_t psz) {
 	if (unlikely(psz > SC_LARGE_MAXCLASS)) {
-		return sc_data_global.npsizes;
+		return SC_NPSIZES;
 	}
 	pszind_t x = lg_floor((psz<<1)-1);
 	pszind_t shift = (x < SC_LG_NGROUP + LG_PAGE) ?
@@ -72,7 +72,7 @@ sz_psz2ind(size_t psz) {
 
 static inline size_t
 sz_pind2sz_compute(pszind_t pind) {
-	if (unlikely(pind == sc_data_global.npsizes)) {
+	if (unlikely(pind == SC_NPSIZES)) {
 		return SC_LARGE_MAXCLASS + PAGE;
 	}
 	size_t grp = pind >> SC_LG_NGROUP;
@@ -99,7 +99,7 @@ sz_pind2sz_lookup(pszind_t pind) {
 
 static inline size_t
 sz_pind2sz(pszind_t pind) {
-	assert(pind < sc_data_global.npsizes + 1);
+	assert(pind < SC_NPSIZES + 1);
 	return sz_pind2sz_lookup(pind);
 }
 
@@ -123,9 +123,8 @@ sz_size2index_compute(size_t size) {
 		return SC_NSIZES;
 	}
 #if (SC_NTINY != 0)
-	if (size <= (ZU(1) << sc_data_global.lg_tiny_maxclass)) {
-		szind_t lg_tmin = sc_data_global.lg_tiny_maxclass
-		    - sc_data_global.ntiny + 1;
+	if (size <= (ZU(1) << SC_LG_TINY_MAXCLASS)) {
+		szind_t lg_tmin = SC_LG_TINY_MAXCLASS - SC_NTINY + 1;
 		szind_t lg_ceil = lg_floor(pow2_ceil_zu(size));
 		return (lg_ceil < lg_tmin ? 0 : lg_ceil - lg_tmin);
 	}
@@ -143,7 +142,7 @@ sz_size2index_compute(size_t size) {
 		szind_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) &
 		    ((ZU(1) << SC_LG_NGROUP) - 1);
 
-		szind_t index = sc_data_global.ntiny + grp + mod;
+		szind_t index = SC_NTINY + grp + mod;
 		return index;
 	}
 }
@@ -168,13 +167,12 @@ sz_size2index(size_t size) {
 static inline size_t
 sz_index2size_compute(szind_t index) {
 #if (SC_NTINY > 0)
-	if (index < sc_data_global.ntiny) {
-		return (ZU(1) << (sc_data_global.lg_tiny_maxclass
-		    - sc_data_global.ntiny + 1 + index));
+	if (index < SC_NTINY) {
+		return (ZU(1) << (SC_LG_TINY_MAXCLASS - SC_NTINY + 1 + index));
 	}
 #endif
 	{
-		size_t reduced_index = index - sc_data_global.ntiny;
+		size_t reduced_index = index - SC_NTINY;
 		size_t grp = reduced_index >> SC_LG_NGROUP;
 		size_t mod = reduced_index & ((ZU(1) << SC_LG_NGROUP) -
 		    1);
@@ -211,9 +209,8 @@ sz_s2u_compute(size_t size) {
 		return 0;
 	}
 #if (SC_NTINY > 0)
-	if (size <= (ZU(1) << sc_data_global.lg_tiny_maxclass)) {
-		size_t lg_tmin = sc_data_global.lg_tiny_maxclass
-		    - sc_data_global.ntiny + 1;
+	if (size <= (ZU(1) << SC_LG_TINY_MAXCLASS)) {
+		size_t lg_tmin = SC_LG_TINY_MAXCLASS - SC_NTINY + 1;
 		size_t lg_ceil = lg_floor(pow2_ceil_zu(size));
 		return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) :
 		    (ZU(1) << lg_ceil));
diff --git a/src/arena.c b/src/arena.c
index 91043cff..da7fd78b 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1754,8 +1754,7 @@ arena_retain_grow_limit_get_set(tsd_t *tsd, arena_t *arena, size_t *old_limit,
 	if (new_limit != NULL) {
 		size_t limit = *new_limit;
 		/* Grow no more than the new limit. */
-		if ((new_ind = sz_psz2ind(limit + 1) - 1)
-		    >= sc_data_global.npsizes) {
+		if ((new_ind = sz_psz2ind(limit + 1) - 1) >= SC_NPSIZES) {
 			return true;
 		}
 	}
@@ -1899,7 +1898,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	}
 
 	arena->extent_grow_next = sz_psz2ind(HUGEPAGE);
-	arena->retain_grow_limit = sc_data_global.npsizes - 1;
+	arena->retain_grow_limit = sz_psz2ind(SC_LARGE_MAXCLASS);
 	if (malloc_mutex_init(&arena->extent_grow_mtx, "extent_grow",
 	    WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) {
 		goto label_error;
@@ -2001,11 +2000,11 @@ arena_init_huge(void) {
 }
 
 void
-arena_boot(void) {
+arena_boot(sc_data_t *sc_data) {
 	arena_dirty_decay_ms_default_set(opt_dirty_decay_ms);
 	arena_muzzy_decay_ms_default_set(opt_muzzy_decay_ms);
 	for (unsigned i = 0; i < SC_NBINS; i++) {
-		sc_t *sc = &sc_data_global.sc[i];
+		sc_t *sc = &sc_data->sc[i];
 		div_init(&arena_binind_div_info[i],
 		    (1U << sc->lg_base) + (sc->ndelta << sc->lg_delta));
 	}
diff --git a/src/base.c b/src/base.c
index cabf66c4..f3c61661 100644
--- a/src/base.c
+++ b/src/base.c
@@ -262,7 +262,7 @@ base_block_alloc(tsdn_t *tsdn, base_t *base, extent_hooks_t *extent_hooks,
 	 */
 	size_t min_block_size = HUGEPAGE_CEILING(sz_psz2u(header_size + gap_size
 	    + usize));
-	pszind_t pind_next = (*pind_last + 1 < sc_data_global.npsizes) ?
+	pszind_t pind_next = (*pind_last + 1 < sz_psz2ind(SC_LARGE_MAXCLASS)) ?
 	    *pind_last + 1 : *pind_last;
 	size_t next_block_size = HUGEPAGE_CEILING(sz_pind2sz(pind_next));
 	size_t block_size = (min_block_size > next_block_size) ? min_block_size
diff --git a/src/extent.c b/src/extent.c
index 74076b66..592974a8 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -20,7 +20,7 @@ mutex_pool_t	extent_mutex_pool;
 size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT;
 
 static const bitmap_info_t extents_bitmap_info =
-    BITMAP_INFO_INITIALIZER(SC_NPSIZES_MAX+1);
+    BITMAP_INFO_INITIALIZER(SC_NPSIZES+1);
 
 static void *extent_alloc_default(extent_hooks_t *extent_hooks, void *new_addr,
     size_t size, size_t alignment, bool *zero, bool *commit,
@@ -288,7 +288,7 @@ extents_init(tsdn_t *tsdn, extents_t *extents, extent_state_t state,
 	    malloc_mutex_rank_exclusive)) {
 		return true;
 	}
-	for (unsigned i = 0; i < sc_data_global.npsizes + 1; i++) {
+	for (unsigned i = 0; i < SC_NPSIZES + 1; i++) {
 		extent_heap_new(&extents->heaps[i]);
 	}
 	bitmap_init(extents->bitmap, &extents_bitmap_info, true);
@@ -375,7 +375,7 @@ extents_fit_alignment(extents_t *extents, size_t min_size, size_t max_size,
 	    &extents_bitmap_info, (size_t)pind); i < pind_max; i =
 	    (pszind_t)bitmap_ffu(extents->bitmap, &extents_bitmap_info,
 	    (size_t)i+1)) {
-		assert(i < sc_data_global.npsizes);
+		assert(i < SC_NPSIZES);
 		assert(!extent_heap_empty(&extents->heaps[i]));
 		extent_t *extent = extent_heap_first(&extents->heaps[i]);
 		uintptr_t base = (uintptr_t)extent_base_get(extent);
@@ -405,7 +405,7 @@ extents_best_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 	pszind_t pind = sz_psz2ind(extent_size_quantize_ceil(size));
 	pszind_t i = (pszind_t)bitmap_ffu(extents->bitmap, &extents_bitmap_info,
 	    (size_t)pind);
-	if (i < sc_data_global.npsizes + 1) {
+	if (i < SC_NPSIZES + 1) {
 		/*
 		 * In order to reduce fragmentation, avoid reusing and splitting
 		 * large extents for much smaller sizes.
@@ -434,7 +434,7 @@ extents_first_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 	pszind_t pind = sz_psz2ind(extent_size_quantize_ceil(size));
 	for (pszind_t i = (pszind_t)bitmap_ffu(extents->bitmap,
 	    &extents_bitmap_info, (size_t)pind);
-	    i < sc_data_global.npsizes + 1;
+	    i < SC_NPSIZES + 1;
 	    i = (pszind_t)bitmap_ffu(extents->bitmap, &extents_bitmap_info,
 	    (size_t)i+1)) {
 		assert(!extent_heap_empty(&extents->heaps[i]));
@@ -443,10 +443,10 @@ extents_first_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 		if (ret == NULL || extent_snad_comp(extent, ret) < 0) {
 			ret = extent;
 		}
-		if (i == sc_data_global.npsizes) {
+		if (i == SC_NPSIZES) {
 			break;
 		}
-		assert(i < sc_data_global.npsizes);
+		assert(i < SC_NPSIZES);
 	}
 
 	return ret;
@@ -1249,13 +1249,11 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	size_t alloc_size = sz_pind2sz(arena->extent_grow_next + egn_skip);
 	while (alloc_size < alloc_size_min) {
 		egn_skip++;
-		if (arena->extent_grow_next + egn_skip ==
-		    sc_data_global.npsizes) {
+		if (arena->extent_grow_next + egn_skip >=
+		    sz_psz2ind(SC_LARGE_MAXCLASS)) {
 			/* Outside legal range. */
 			goto label_err;
 		}
-		assert(arena->extent_grow_next + egn_skip
-		    < sc_data_global.npsizes);
 		alloc_size = sz_pind2sz(arena->extent_grow_next + egn_skip);
 	}
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 8e0a581b..d473664f 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -920,7 +920,7 @@ malloc_slow_flag_init(void) {
 }
 
 static void
-malloc_conf_init(void) {
+malloc_conf_init(sc_data_t *sc_data) {
 	unsigned i;
 	char buf[PATH_MAX + 1];
 	const char *opts, *k, *v;
@@ -1254,7 +1254,7 @@ malloc_conf_init(void) {
 					    &pgs);
 					if (!err) {
 						sc_data_update_slab_size(
-						    &sc_data_global, slab_start,
+						    sc_data, slab_start,
 						    slab_end, (int)pgs);
 					} else {
 						malloc_conf_error(
@@ -1368,6 +1368,11 @@ static bool
 malloc_init_hard_a0_locked() {
 	malloc_initializer = INITIALIZER;
 
+	JEMALLOC_DIAGNOSTIC_PUSH
+	JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
+	sc_data_t sc_data = {0};
+	JEMALLOC_DIAGNOSTIC_POP
+
 	/*
 	 * Ordering here is somewhat tricky; we need sc_boot() first, since that
 	 * determines what the size classes will be, and then
@@ -1375,10 +1380,10 @@ malloc_init_hard_a0_locked() {
 	 * before sz_boot and bin_boot, which assume that the values they read
 	 * out of sc_data_global are final.
 	 */
-	sc_boot();
-	malloc_conf_init();
-	sz_boot(&sc_data_global);
-	bin_boot(&sc_data_global);
+	sc_boot(&sc_data);
+	malloc_conf_init(&sc_data);
+	sz_boot(&sc_data);
+	bin_boot(&sc_data);
 
 	if (config_prof) {
 		prof_boot0();
@@ -1407,7 +1412,7 @@ malloc_init_hard_a0_locked() {
 	if (config_prof) {
 		prof_boot1();
 	}
-	arena_boot();
+	arena_boot(&sc_data);
 	if (tcache_boot(TSDN_NULL)) {
 		return true;
 	}
diff --git a/src/sc.c b/src/sc.c
index 74c91018..8784bdd0 100644
--- a/src/sc.c
+++ b/src/sc.c
@@ -238,6 +238,8 @@ size_classes(
 	 * touch the extra global cacheline.  We assert, however, that the two
 	 * computations are equivalent.
 	 */
+	assert(sc_data->npsizes == SC_NPSIZES);
+	assert(sc_data->lg_tiny_maxclass == SC_LG_TINY_MAXCLASS);
 	assert(sc_data->small_maxclass == SC_SMALL_MAXCLASS);
 	assert(sc_data->large_minclass == SC_LARGE_MINCLASS);
 	assert(sc_data->lg_large_minclass == SC_LG_LARGE_MINCLASS);
@@ -297,6 +299,6 @@ sc_data_update_slab_size(sc_data_t *data, size_t begin, size_t end, int pgs) {
 }
 
 void
-sc_boot() {
-	sc_data_init(&sc_data_global);
+sc_boot(sc_data_t *data) {
+	sc_data_init(data);
 }
diff --git a/src/sz.c b/src/sz.c
index e038728e..77f89c62 100644
--- a/src/sz.c
+++ b/src/sz.c
@@ -2,7 +2,7 @@
 #include "jemalloc/internal/sz.h"
 
 JEMALLOC_ALIGNED(CACHELINE)
-size_t sz_pind2sz_tab[SC_NPSIZES_MAX+1];
+size_t sz_pind2sz_tab[SC_NPSIZES+1];
 
 static void
 sz_boot_pind2sz_tab(const sc_data_t *sc_data) {
@@ -15,7 +15,9 @@ sz_boot_pind2sz_tab(const sc_data_t *sc_data) {
 			pind++;
 		}
 	}
-	sz_pind2sz_tab[pind] = sc_data->large_maxclass + PAGE;
+	for (int i = pind; i <= (int)SC_NPSIZES; i++) {
+		sz_pind2sz_tab[pind] = sc_data->large_maxclass + PAGE;
+	}
 }
 
 JEMALLOC_ALIGNED(CACHELINE)
diff --git a/test/unit/junk.c b/test/unit/junk.c
index be8933a7..57e3ad43 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -129,8 +129,7 @@ TEST_END
 
 TEST_BEGIN(test_junk_large) {
 	test_skip_if(!config_fill);
-	test_junk(SC_SMALL_MAXCLASS + 1,
-	    (1U << (sc_data_global.lg_large_minclass + 1)));
+	test_junk(SC_SMALL_MAXCLASS + 1, (1U << (SC_LG_LARGE_MINCLASS + 1)));
 }
 TEST_END
 
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index f6362008..452d884d 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -581,7 +581,7 @@ TEST_BEGIN(test_arena_i_retain_grow_limit) {
 
 	assert_d_eq(mallctlbymib(mib, miblen, &default_limit, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
-	assert_zu_eq(default_limit, sz_pind2sz(sc_data_global.npsizes - 1),
+	assert_zu_eq(default_limit, SC_LARGE_MAXCLASS,
 	    "Unexpected default for retain_grow_limit");
 
 	new_limit = PAGE - 1;
diff --git a/test/unit/prof_gdump.c b/test/unit/prof_gdump.c
index 0b8d7c34..f7e0aac7 100644
--- a/test/unit/prof_gdump.c
+++ b/test/unit/prof_gdump.c
@@ -29,12 +29,12 @@ TEST_BEGIN(test_gdump) {
 	prof_dump_open = prof_dump_open_intercept;
 
 	did_prof_dump_open = false;
-	p = mallocx((1U << sc_data_global.lg_large_minclass), 0);
+	p = mallocx((1U << SC_LG_LARGE_MINCLASS), 0);
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
 	assert_true(did_prof_dump_open, "Expected a profile dump");
 
 	did_prof_dump_open = false;
-	q = mallocx((1U << sc_data_global.lg_large_minclass), 0);
+	q = mallocx((1U << SC_LG_LARGE_MINCLASS), 0);
 	assert_ptr_not_null(q, "Unexpected mallocx() failure");
 	assert_true(did_prof_dump_open, "Expected a profile dump");
 
@@ -45,7 +45,7 @@ TEST_BEGIN(test_gdump) {
 	    "Unexpected mallctl failure while disabling prof.gdump");
 	assert(gdump_old);
 	did_prof_dump_open = false;
-	r = mallocx((1U << sc_data_global.lg_large_minclass), 0);
+	r = mallocx((1U << SC_LG_LARGE_MINCLASS), 0);
 	assert_ptr_not_null(q, "Unexpected mallocx() failure");
 	assert_false(did_prof_dump_open, "Unexpected profile dump");
 
@@ -56,7 +56,7 @@ TEST_BEGIN(test_gdump) {
 	    "Unexpected mallctl failure while enabling prof.gdump");
 	assert(!gdump_old);
 	did_prof_dump_open = false;
-	s = mallocx((1U << sc_data_global.lg_large_minclass), 0);
+	s = mallocx((1U << SC_LG_LARGE_MINCLASS), 0);
 	assert_ptr_not_null(q, "Unexpected mallocx() failure");
 	assert_true(did_prof_dump_open, "Expected a profile dump");
 
diff --git a/test/unit/size_classes.c b/test/unit/size_classes.c
index 7c28e166..69473363 100644
--- a/test/unit/size_classes.c
+++ b/test/unit/size_classes.c
@@ -108,8 +108,13 @@ TEST_BEGIN(test_psize_classes) {
 		    size_class, sz_psz2ind(size_class),
 		    sz_pind2sz(sz_psz2ind(size_class)));
 
-		assert_u_eq(pind+1, sz_psz2ind(size_class+1),
-		    "Next size_class does not round up properly");
+		if (size_class == SC_LARGE_MAXCLASS) {
+			assert_u_eq(SC_NPSIZES, sz_psz2ind(size_class + 1),
+			    "Next size_class does not round up properly");
+		} else {
+			assert_u_eq(pind + 1, sz_psz2ind(size_class + 1),
+			    "Next size_class does not round up properly");
+		}
 
 		assert_zu_eq(size_class, (pind > 0) ?
 		    sz_psz2u(sz_pind2sz(pind-1)+1) : sz_psz2u(1),
@@ -156,16 +161,13 @@ TEST_BEGIN(test_overflow) {
 	assert_zu_eq(sz_s2u(SIZE_T_MAX), 0,
 	    "sz_s2u() should return 0 on overflow");
 
-	assert_u_eq(sz_psz2ind(max_size_class+1), sc_data_global.npsizes,
+	assert_u_eq(sz_psz2ind(max_size_class+1), SC_NPSIZES,
 	    "sz_psz2ind() should return NPSIZES on overflow");
-	assert_u_eq(sz_psz2ind(ZU(PTRDIFF_MAX)+1), sc_data_global.npsizes,
+	assert_u_eq(sz_psz2ind(ZU(PTRDIFF_MAX)+1), SC_NPSIZES,
 	    "sz_psz2ind() should return NPSIZES on overflow");
-	assert_u_eq(sz_psz2ind(SIZE_T_MAX), sc_data_global.npsizes,
+	assert_u_eq(sz_psz2ind(SIZE_T_MAX), SC_NPSIZES,
 	    "sz_psz2ind() should return NPSIZES on overflow");
 
-	assert_u_le(sc_data_global.npsizes, SC_NPSIZES_MAX,
-	    "Dynamic value of npsizes is higher than static bound.");
-
 	assert_zu_eq(sz_psz2u(max_size_class+1), max_psz,
 	    "sz_psz2u() should return (LARGE_MAXCLASS + PAGE) for unsupported"
 	    " size");
diff --git a/test/unit/stats.c b/test/unit/stats.c
index b8f549be..4323bfa3 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -76,7 +76,7 @@ TEST_BEGIN(test_stats_arenas_summary) {
 
 	little = mallocx(SC_SMALL_MAXCLASS, MALLOCX_ARENA(0));
 	assert_ptr_not_null(little, "Unexpected mallocx() failure");
-	large = mallocx((1U << sc_data_global.lg_large_minclass),
+	large = mallocx((1U << SC_LG_LARGE_MINCLASS),
 	    MALLOCX_ARENA(0));
 	assert_ptr_not_null(large, "Unexpected mallocx() failure");
 
@@ -192,7 +192,7 @@ TEST_BEGIN(test_stats_arenas_large) {
 	uint64_t epoch, nmalloc, ndalloc;
 	int expected = config_stats ? 0 : ENOENT;
 
-	p = mallocx((1U << sc_data_global.lg_large_minclass), MALLOCX_ARENA(0));
+	p = mallocx((1U << SC_LG_LARGE_MINCLASS), MALLOCX_ARENA(0));
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
 
 	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
diff --git a/test/unit/zero.c b/test/unit/zero.c
index 8b8d2072..271fd5cb 100644
--- a/test/unit/zero.c
+++ b/test/unit/zero.c
@@ -47,8 +47,7 @@ TEST_END
 
 TEST_BEGIN(test_zero_large) {
 	test_skip_if(!config_fill);
-	test_zero(SC_SMALL_MAXCLASS + 1,
-	    1U << (sc_data_global.lg_large_minclass + 1));
+	test_zero(SC_SMALL_MAXCLASS + 1, 1U << (SC_LG_LARGE_MINCLASS + 1));
 }
 TEST_END
 

From 013ab26c8674e07d40098f7385e570c6d8b0dee9 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 26 Jul 2018 14:17:36 -0700
Subject: [PATCH 1170/2608] TSD: Add a tsd_nominal_list death assertion.

A thread should have had its state transition away from nominal before it dies.
This change adds that to the list of thread death assertions.
---
 src/tsd.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/tsd.c b/src/tsd.c
index f2b601dd..26142ff9 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -238,6 +238,7 @@ tsd_data_init(tsd_t *tsd) {
 static void
 assert_tsd_data_cleanup_done(tsd_t *tsd) {
 	assert(!tsd_nominal(tsd));
+	assert(!tsd_in_nominal_list(tsd));
 	assert(*tsd_arenap_get_unsafe(tsd) == NULL);
 	assert(*tsd_iarenap_get_unsafe(tsd) == NULL);
 	assert(*tsd_arenas_tdata_bypassp_get_unsafe(tsd) == true);

From 41b7372eadee941b9164751b8d4963f915d3ceae Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 26 Jul 2018 14:42:37 -0700
Subject: [PATCH 1171/2608] TSD: Add fork support to tsd_nominal_tsds.

In case of multithreaded fork, we want to leave the child in a reasonable state,
in which tsd_nominal_tsds is either empty or contains only the forking thread.
---
 include/jemalloc/internal/tsd.h |  3 +++
 src/jemalloc.c                  |  5 +++++
 src/tsd.c                       | 20 ++++++++++++++++++++
 3 files changed, 28 insertions(+)

diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index e5e82f42..59a18857 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -105,6 +105,9 @@ void tsd_cleanup(void *arg);
 tsd_t *tsd_fetch_slow(tsd_t *tsd, bool internal);
 void tsd_state_set(tsd_t *tsd, uint8_t new_state);
 void tsd_slow_update(tsd_t *tsd);
+void tsd_prefork(tsd_t *tsd);
+void tsd_postfork_parent(tsd_t *tsd);
+void tsd_postfork_child(tsd_t *tsd);
 
 /*
  * Call ..._inc when your module wants to take all threads down the slow paths,
diff --git a/src/jemalloc.c b/src/jemalloc.c
index d473664f..85ec9e0b 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3470,6 +3470,7 @@ _malloc_prefork(void)
 		}
 	}
 	prof_prefork1(tsd_tsdn(tsd));
+	tsd_prefork(tsd);
 }
 
 #ifndef JEMALLOC_MUTEX_INIT_CB
@@ -3492,6 +3493,8 @@ _malloc_postfork(void)
 
 	tsd = tsd_fetch();
 
+	tsd_postfork_parent(tsd);
+
 	witness_postfork_parent(tsd_witness_tsdp_get(tsd));
 	/* Release all mutexes, now that fork() has completed. */
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
@@ -3519,6 +3522,8 @@ jemalloc_postfork_child(void) {
 
 	tsd = tsd_fetch();
 
+	tsd_postfork_child(tsd);
+
 	witness_postfork_child(tsd_witness_tsdp_get(tsd));
 	/* Release all mutexes, now that fork() has completed. */
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
diff --git a/src/tsd.c b/src/tsd.c
index 26142ff9..1204a0de 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -509,3 +509,23 @@ tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block) {
 	malloc_mutex_unlock(TSDN_NULL, &head->lock);
 }
 #endif
+
+void
+tsd_prefork(tsd_t *tsd) {
+	malloc_mutex_prefork(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
+}
+
+void
+tsd_postfork_parent(tsd_t *tsd) {
+	malloc_mutex_postfork_parent(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
+}
+
+void
+tsd_postfork_child(tsd_t *tsd) {
+	malloc_mutex_postfork_child(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
+	ql_new(&tsd_nominal_tsds);
+
+	if (tsd_state_get(tsd) <= tsd_state_nominal_max) {
+		tsd_add_nominal(tsd);
+	}
+}

From eb261e53a6bfaef9797395fe09d6a425b11acb42 Mon Sep 17 00:00:00 2001
From: Tyler Etzel <tyleretzel1@gmail.com>
Date: Thu, 5 Jul 2018 10:31:43 -0700
Subject: [PATCH 1172/2608] Small refactoring of emitter

- Make API more clear for using as standalone json emitter
- Support cases that weren't possible before, e.g.
	- emitting primitive values in an array
	- emitting nested arrays
---
 include/jemalloc/internal/emitter.h | 402 ++++++++++++++++------------
 src/stats.c                         |  92 +++----
 test/unit/emitter.c                 |  94 +++++--
 3 files changed, 345 insertions(+), 243 deletions(-)

diff --git a/include/jemalloc/internal/emitter.h b/include/jemalloc/internal/emitter.h
index 3a2b2f7f..f8da2285 100644
--- a/include/jemalloc/internal/emitter.h
+++ b/include/jemalloc/internal/emitter.h
@@ -60,17 +60,6 @@ struct emitter_row_s {
 	ql_head(emitter_col_t) cols;
 };
 
-static inline void
-emitter_row_init(emitter_row_t *row) {
-	ql_new(&row->cols);
-}
-
-static inline void
-emitter_col_init(emitter_col_t *col, emitter_row_t *row) {
-	ql_elm_new(col, link);
-	ql_tail_insert(&row->cols, col, link);
-}
-
 typedef struct emitter_s emitter_t;
 struct emitter_s {
 	emitter_output_t output;
@@ -80,18 +69,10 @@ struct emitter_s {
 	int nesting_depth;
 	/* True if we've already emitted a value at the given depth. */
 	bool item_at_depth;
+	/* True if we emitted a key and will emit corresponding value next. */
+	bool emitted_key;
 };
 
-static inline void
-emitter_init(emitter_t *emitter, emitter_output_t emitter_output,
-    void (*write_cb)(void *, const char *), void *cbopaque) {
-	emitter->output = emitter_output;
-	emitter->write_cb = write_cb;
-	emitter->cbopaque = cbopaque;
-	emitter->item_at_depth = false;
-	emitter->nesting_depth = 0;
-}
-
 /* Internal convenience function.  Write to the emitter the given string. */
 JEMALLOC_FORMAT_PRINTF(2, 3)
 static inline void
@@ -103,18 +84,6 @@ emitter_printf(emitter_t *emitter, const char *format, ...) {
 	va_end(ap);
 }
 
-/* Write to the emitter the given string, but only in table mode. */
-JEMALLOC_FORMAT_PRINTF(2, 3)
-static inline void
-emitter_table_printf(emitter_t *emitter, const char *format, ...) {
-	if (emitter->output == emitter_output_table) {
-		va_list ap;
-		va_start(ap, format);
-		malloc_vcprintf(emitter->write_cb, emitter->cbopaque, format, ap);
-		va_end(ap);
-	}
-}
-
 static inline void
 emitter_gen_fmt(char *out_fmt, size_t out_size, const char *fmt_specifier,
     emitter_justify_t justify, int width) {
@@ -235,47 +204,143 @@ emitter_indent(emitter_t *emitter) {
 
 static inline void
 emitter_json_key_prefix(emitter_t *emitter) {
+	if (emitter->emitted_key) {
+		emitter->emitted_key = false;
+		return;
+	}
 	emitter_printf(emitter, "%s\n", emitter->item_at_depth ? "," : "");
 	emitter_indent(emitter);
 }
 
-static inline void
-emitter_begin(emitter_t *emitter) {
-	if (emitter->output == emitter_output_json) {
-		assert(emitter->nesting_depth == 0);
-		emitter_printf(emitter, "{");
-		emitter_nest_inc(emitter);
-	} else {
-		// tabular init
-		emitter_printf(emitter, "%s", "");
-	}
-}
+/******************************************************************************/
+/* Public functions for emitter_t. */
 
 static inline void
-emitter_end(emitter_t *emitter) {
-	if (emitter->output == emitter_output_json) {
-		assert(emitter->nesting_depth == 1);
-		emitter_nest_dec(emitter);
-		emitter_printf(emitter, "\n}\n");
-	}
+emitter_init(emitter_t *emitter, emitter_output_t emitter_output,
+    void (*write_cb)(void *, const char *), void *cbopaque) {
+	emitter->output = emitter_output;
+	emitter->write_cb = write_cb;
+	emitter->cbopaque = cbopaque;
+	emitter->item_at_depth = false;
+	emitter->emitted_key = false; 
+	emitter->nesting_depth = 0;
 }
 
-/*
- * Note emits a different kv pair as well, but only in table mode.  Omits the
- * note if table_note_key is NULL.
+/******************************************************************************/
+/* JSON public API. */
+
+/* 
+ * Emits a key (e.g. as appears in an object). The next json entity emitted will
+ * be the corresponding value.
  */
 static inline void
-emitter_kv_note(emitter_t *emitter, const char *json_key, const char *table_key,
+emitter_json_key(emitter_t *emitter, const char *json_key) {
+	if (emitter->output == emitter_output_json) {
+		emitter_json_key_prefix(emitter);
+		emitter_printf(emitter, "\"%s\": ", json_key);
+		emitter->emitted_key = true;
+	}
+}
+
+static inline void
+emitter_json_value(emitter_t *emitter, emitter_type_t value_type,
+    const void *value) {
+	if (emitter->output == emitter_output_json) {
+		emitter_json_key_prefix(emitter);
+		emitter_print_value(emitter, emitter_justify_none, -1,
+		    value_type, value);
+		emitter->item_at_depth = true;
+	}
+}
+
+/* Shorthand for calling emitter_json_key and then emitter_json_value. */
+static inline void
+emitter_json_kv(emitter_t *emitter, const char *json_key,
+    emitter_type_t value_type, const void *value) {
+	emitter_json_key(emitter, json_key);
+	emitter_json_value(emitter, value_type, value);
+}
+
+static inline void
+emitter_json_array_begin(emitter_t *emitter) {
+	if (emitter->output == emitter_output_json) {
+		emitter_json_key_prefix(emitter);
+		emitter_printf(emitter, "[");
+		emitter_nest_inc(emitter);
+	}
+}
+
+/* Shorthand for calling emitter_json_key and then emitter_json_array_begin. */
+static inline void
+emitter_json_array_kv_begin(emitter_t *emitter, const char *json_key) {
+	emitter_json_key(emitter, json_key);
+	emitter_json_array_begin(emitter);
+}
+
+static inline void
+emitter_json_array_end(emitter_t *emitter) {
+	if (emitter->output == emitter_output_json) {
+		assert(emitter->nesting_depth > 0);
+		emitter_nest_dec(emitter);
+		emitter_printf(emitter, "\n");
+		emitter_indent(emitter);
+		emitter_printf(emitter, "]");
+	}
+}
+
+static inline void
+emitter_json_object_begin(emitter_t *emitter) {
+	if (emitter->output == emitter_output_json) {
+		emitter_json_key_prefix(emitter);
+		emitter_printf(emitter, "{");
+		emitter_nest_inc(emitter);
+	}
+}
+
+/* Shorthand for calling emitter_json_key and then emitter_json_object_begin. */
+static inline void
+emitter_json_object_kv_begin(emitter_t *emitter, const char *json_key) {
+	emitter_json_key(emitter, json_key);
+	emitter_json_object_begin(emitter);
+}
+
+static inline void
+emitter_json_object_end(emitter_t *emitter) {
+	if (emitter->output == emitter_output_json) {
+		assert(emitter->nesting_depth > 0);
+		emitter_nest_dec(emitter);
+		emitter_printf(emitter, "\n");
+		emitter_indent(emitter);
+		emitter_printf(emitter, "}");
+	}
+}
+
+
+/******************************************************************************/
+/* Table public API. */
+
+static inline void
+emitter_table_dict_begin(emitter_t *emitter, const char *table_key) {
+	if (emitter->output == emitter_output_table) {
+		emitter_indent(emitter);
+		emitter_printf(emitter, "%s\n", table_key);
+		emitter_nest_inc(emitter);
+	}
+}
+
+static inline void
+emitter_table_dict_end(emitter_t *emitter) {
+	if (emitter->output == emitter_output_table) {
+		emitter_nest_dec(emitter);
+	}
+}
+
+static inline void
+emitter_table_kv_note(emitter_t *emitter, const char *table_key,
     emitter_type_t value_type, const void *value,
     const char *table_note_key, emitter_type_t table_note_value_type,
     const void *table_note_value) {
-	if (emitter->output == emitter_output_json) {
-		assert(emitter->nesting_depth > 0);
-		emitter_json_key_prefix(emitter);
-		emitter_printf(emitter, "\"%s\": ", json_key);
-		emitter_print_value(emitter, emitter_justify_none, -1,
-		    value_type, value);
-	} else {
+	if (emitter->output == emitter_output_table) {
 		emitter_indent(emitter);
 		emitter_printf(emitter, "%s: ", table_key);
 		emitter_print_value(emitter, emitter_justify_none, -1,
@@ -292,130 +357,22 @@ emitter_kv_note(emitter_t *emitter, const char *json_key, const char *table_key,
 }
 
 static inline void
-emitter_kv(emitter_t *emitter, const char *json_key, const char *table_key,
+emitter_table_kv(emitter_t *emitter, const char *table_key,
     emitter_type_t value_type, const void *value) {
-	emitter_kv_note(emitter, json_key, table_key, value_type, value, NULL,
+	emitter_table_kv_note(emitter, table_key, value_type, value, NULL,
 	    emitter_type_bool, NULL);
 }
 
-static inline void
-emitter_json_kv(emitter_t *emitter, const char *json_key,
-    emitter_type_t value_type, const void *value) {
-	if (emitter->output == emitter_output_json) {
-		emitter_kv(emitter, json_key, NULL, value_type, value);
-	}
-}
 
+/* Write to the emitter the given string, but only in table mode. */
+JEMALLOC_FORMAT_PRINTF(2, 3)
 static inline void
-emitter_table_kv(emitter_t *emitter, const char *table_key,
-    emitter_type_t value_type, const void *value) {
+emitter_table_printf(emitter_t *emitter, const char *format, ...) {
 	if (emitter->output == emitter_output_table) {
-		emitter_kv(emitter, NULL, table_key, value_type, value);
-	}
-}
-
-static inline void
-emitter_dict_begin(emitter_t *emitter, const char *json_key,
-    const char *table_header) {
-	if (emitter->output == emitter_output_json) {
-		emitter_json_key_prefix(emitter);
-		emitter_printf(emitter, "\"%s\": {", json_key);
-		emitter_nest_inc(emitter);
-	} else {
-		emitter_indent(emitter);
-		emitter_printf(emitter, "%s\n", table_header);
-		emitter_nest_inc(emitter);
-	}
-}
-
-static inline void
-emitter_dict_end(emitter_t *emitter) {
-	if (emitter->output == emitter_output_json) {
-		assert(emitter->nesting_depth > 0);
-		emitter_nest_dec(emitter);
-		emitter_printf(emitter, "\n");
-		emitter_indent(emitter);
-		emitter_printf(emitter, "}");
-	} else {
-		emitter_nest_dec(emitter);
-	}
-}
-
-static inline void
-emitter_json_dict_begin(emitter_t *emitter, const char *json_key) {
-	if (emitter->output == emitter_output_json) {
-		emitter_dict_begin(emitter, json_key, NULL);
-	}
-}
-
-static inline void
-emitter_json_dict_end(emitter_t *emitter) {
-	if (emitter->output == emitter_output_json) {
-		emitter_dict_end(emitter);
-	}
-}
-
-static inline void
-emitter_table_dict_begin(emitter_t *emitter, const char *table_key) {
-	if (emitter->output == emitter_output_table) {
-		emitter_dict_begin(emitter, NULL, table_key);
-	}
-}
-
-static inline void
-emitter_table_dict_end(emitter_t *emitter) {
-	if (emitter->output == emitter_output_table) {
-		emitter_dict_end(emitter);
-	}
-}
-
-static inline void
-emitter_json_arr_begin(emitter_t *emitter, const char *json_key) {
-	if (emitter->output == emitter_output_json) {
-		emitter_json_key_prefix(emitter);
-		emitter_printf(emitter, "\"%s\": [", json_key);
-		emitter_nest_inc(emitter);
-	}
-}
-
-static inline void
-emitter_json_arr_end(emitter_t *emitter) {
-	if (emitter->output == emitter_output_json) {
-		assert(emitter->nesting_depth > 0);
-		emitter_nest_dec(emitter);
-		emitter_printf(emitter, "\n");
-		emitter_indent(emitter);
-		emitter_printf(emitter, "]");
-	}
-}
-
-static inline void
-emitter_json_arr_obj_begin(emitter_t *emitter) {
-	if (emitter->output == emitter_output_json) {
-		emitter_json_key_prefix(emitter);
-		emitter_printf(emitter, "{");
-		emitter_nest_inc(emitter);
-	}
-}
-
-static inline void
-emitter_json_arr_obj_end(emitter_t *emitter) {
-	if (emitter->output == emitter_output_json) {
-		assert(emitter->nesting_depth > 0);
-		emitter_nest_dec(emitter);
-		emitter_printf(emitter, "\n");
-		emitter_indent(emitter);
-		emitter_printf(emitter, "}");
-	}
-}
-
-static inline void
-emitter_json_arr_value(emitter_t *emitter, emitter_type_t value_type,
-    const void *value) {
-	if (emitter->output == emitter_output_json) {
-		emitter_json_key_prefix(emitter);
-		emitter_print_value(emitter, emitter_justify_none, -1,
-		    value_type, value);
+		va_list ap;
+		va_start(ap, format);
+		malloc_vcprintf(emitter->write_cb, emitter->cbopaque, format, ap);
+		va_end(ap);
 	}
 }
 
@@ -432,4 +389,93 @@ emitter_table_row(emitter_t *emitter, emitter_row_t *row) {
 	emitter_table_printf(emitter, "\n");
 }
 
+static inline void
+emitter_row_init(emitter_row_t *row) {
+	ql_new(&row->cols);
+}
+
+static inline void
+emitter_col_init(emitter_col_t *col, emitter_row_t *row) {
+	ql_elm_new(col, link);
+	ql_tail_insert(&row->cols, col, link);
+}
+
+
+/******************************************************************************/
+/*
+ * Generalized public API. Emits using either JSON or table, according to
+ * settings in the emitter_t. */
+
+/*
+ * Note emits a different kv pair as well, but only in table mode.  Omits the
+ * note if table_note_key is NULL.
+ */
+static inline void
+emitter_kv_note(emitter_t *emitter, const char *json_key, const char *table_key,
+    emitter_type_t value_type, const void *value,
+    const char *table_note_key, emitter_type_t table_note_value_type,
+    const void *table_note_value) {
+	if (emitter->output == emitter_output_json) {
+		emitter_json_key(emitter, json_key);
+		emitter_json_value(emitter, value_type, value);
+	} else {
+		emitter_table_kv_note(emitter, table_key, value_type, value,
+		    table_note_key, table_note_value_type, table_note_value);
+	}
+	emitter->item_at_depth = true;
+}
+
+static inline void
+emitter_kv(emitter_t *emitter, const char *json_key, const char *table_key,
+    emitter_type_t value_type, const void *value) {
+	emitter_kv_note(emitter, json_key, table_key, value_type, value, NULL,
+	    emitter_type_bool, NULL);
+}
+
+static inline void
+emitter_dict_begin(emitter_t *emitter, const char *json_key,
+    const char *table_header) {
+	if (emitter->output == emitter_output_json) {
+		emitter_json_key(emitter, json_key);
+		emitter_json_object_begin(emitter);
+	} else {
+		emitter_table_dict_begin(emitter, table_header);
+	}
+}
+
+static inline void
+emitter_dict_end(emitter_t *emitter) {
+	if (emitter->output == emitter_output_json) {
+		emitter_json_object_end(emitter);
+	} else {
+		emitter_table_dict_end(emitter);
+	}
+}
+
+static inline void
+emitter_begin(emitter_t *emitter) {
+	if (emitter->output == emitter_output_json) {
+		assert(emitter->nesting_depth == 0);
+		emitter_printf(emitter, "{");
+		emitter_nest_inc(emitter);
+	} else {
+		/*
+		 * This guarantees that we always call write_cb at least once.
+		 * This is useful if some invariant is established by each call
+		 * to write_cb, but doesn't hold initially: e.g., some buffer
+		 * holds a null-terminated string.
+		 */
+		emitter_printf(emitter, "%s", "");
+	}
+}
+
+static inline void
+emitter_end(emitter_t *emitter) {
+	if (emitter->output == emitter_output_json) {
+		assert(emitter->nesting_depth == 1);
+		emitter_nest_dec(emitter);
+		emitter_printf(emitter, "\n}\n");
+	}
+}
+
 #endif /* JEMALLOC_INTERNAL_EMITTER_H */
diff --git a/src/stats.c b/src/stats.c
index 93a04b73..64d73235 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -287,7 +287,7 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i) {
 	header_col_size.width -=5;
 	emitter_table_printf(emitter, "bins:");
 	emitter_table_row(emitter, &header_row);
-	emitter_json_arr_begin(emitter, "bins");
+	emitter_json_array_kv_begin(emitter, "bins");
 
 	for (j = 0, in_gap = false; j < nbins; j++) {
 		uint64_t nslabs;
@@ -333,7 +333,7 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i) {
 			    col_mutex32);
 		}
 
-		emitter_json_arr_obj_begin(emitter);
+		emitter_json_object_begin(emitter);
 		emitter_json_kv(emitter, "nmalloc", emitter_type_uint64,
 		    &nmalloc);
 		emitter_json_kv(emitter, "ndalloc", emitter_type_uint64,
@@ -351,12 +351,12 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i) {
 		emitter_json_kv(emitter, "curslabs", emitter_type_size,
 		    &curslabs);
 		if (mutex) {
-			emitter_json_dict_begin(emitter, "mutex");
+			emitter_json_object_kv_begin(emitter, "mutex");
 			mutex_stats_emit(emitter, NULL, col_mutex64,
 			    col_mutex32);
-			emitter_json_dict_end(emitter);
+			emitter_json_object_end(emitter);
 		}
-		emitter_json_arr_obj_end(emitter);
+		emitter_json_object_end(emitter);
 
 		size_t availregs = nregs * curslabs;
 		char util[6];
@@ -400,7 +400,7 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i) {
 
 		emitter_table_row(emitter, &row);
 	}
-	emitter_json_arr_end(emitter); /* Close "bins". */
+	emitter_json_array_end(emitter); /* Close "bins". */
 
 	if (in_gap) {
 		emitter_table_printf(emitter, "                     ---\n");
@@ -447,7 +447,7 @@ stats_arena_lextents_print(emitter_t *emitter, unsigned i) {
 	header_size.width -= 6;
 	emitter_table_printf(emitter, "large:");
 	emitter_table_row(emitter, &header_row);
-	emitter_json_arr_begin(emitter, "lextents");
+	emitter_json_array_kv_begin(emitter, "lextents");
 
 	for (j = 0, in_gap = false; j < nlextents; j++) {
 		uint64_t nmalloc, ndalloc, nrequests;
@@ -471,10 +471,10 @@ stats_arena_lextents_print(emitter_t *emitter, unsigned i) {
 		CTL_M2_M4_GET("stats.arenas.0.lextents.0.curlextents", i, j,
 		    &curlextents, size_t);
 
-		emitter_json_arr_obj_begin(emitter);
+		emitter_json_object_begin(emitter);
 		emitter_json_kv(emitter, "curlextents", emitter_type_size,
 		    &curlextents);
-		emitter_json_arr_obj_end(emitter);
+		emitter_json_object_end(emitter);
 
 		col_size.size_val = lextent_size;
 		col_ind.unsigned_val = nbins + j;
@@ -488,7 +488,7 @@ stats_arena_lextents_print(emitter_t *emitter, unsigned i) {
 			emitter_table_row(emitter, &row);
 		}
 	}
-	emitter_json_arr_end(emitter); /* Close "lextents". */
+	emitter_json_array_end(emitter); /* Close "lextents". */
 	if (in_gap) {
 		emitter_table_printf(emitter, "                     ---\n");
 	}
@@ -504,19 +504,19 @@ stats_arena_mutexes_print(emitter_t *emitter, unsigned arena_ind) {
 	emitter_row_init(&row);
 	mutex_stats_init_cols(&row, "", &col_name, col64, col32);
 
-	emitter_json_dict_begin(emitter, "mutexes");
+	emitter_json_object_kv_begin(emitter, "mutexes");
 	emitter_table_row(emitter, &row);
 
 	for (mutex_prof_arena_ind_t i = 0; i < mutex_prof_num_arena_mutexes;
 	    i++) {
 		const char *name = arena_mutex_names[i];
-		emitter_json_dict_begin(emitter, name);
+		emitter_json_object_kv_begin(emitter, name);
 		mutex_stats_read_arena(arena_ind, i, name, &col_name, col64,
 		    col32);
 		mutex_stats_emit(emitter, &row, col64, col32);
-		emitter_json_dict_end(emitter); /* Close the mutex dict. */
+		emitter_json_object_end(emitter); /* Close the mutex dict. */
 	}
-	emitter_json_dict_end(emitter); /* End "mutexes". */
+	emitter_json_object_end(emitter); /* End "mutexes". */
 }
 
 static void
@@ -738,7 +738,7 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	alloc_count_##name.type = emitter_type_##valtype;		\
 	alloc_count_##name.valtype##_val = small_or_large##_##name;
 
-	emitter_json_dict_begin(emitter, "small");
+	emitter_json_object_kv_begin(emitter, "small");
 	alloc_count_title.str_val = "small:";
 
 	GET_AND_EMIT_ALLOC_STAT(small, allocated, size)
@@ -747,9 +747,9 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	GET_AND_EMIT_ALLOC_STAT(small, nrequests, uint64)
 
 	emitter_table_row(emitter, &alloc_count_row);
-	emitter_json_dict_end(emitter); /* Close "small". */
+	emitter_json_object_end(emitter); /* Close "small". */
 
-	emitter_json_dict_begin(emitter, "large");
+	emitter_json_object_kv_begin(emitter, "large");
 	alloc_count_title.str_val = "large:";
 
 	GET_AND_EMIT_ALLOC_STAT(large, allocated, size)
@@ -758,7 +758,7 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	GET_AND_EMIT_ALLOC_STAT(large, nrequests, uint64)
 
 	emitter_table_row(emitter, &alloc_count_row);
-	emitter_json_dict_end(emitter); /* Close "large". */
+	emitter_json_object_end(emitter); /* Close "large". */
 
 #undef GET_AND_EMIT_ALLOC_STAT
 
@@ -980,7 +980,7 @@ stats_general_print(emitter_t *emitter) {
 	 * The json output sticks arena info into an "arenas" dict; the table
 	 * output puts them at the top-level.
 	 */
-	emitter_json_dict_begin(emitter, "arenas");
+	emitter_json_object_kv_begin(emitter, "arenas");
 
 	CTL_GET("arenas.narenas", &uv, unsigned);
 	emitter_kv(emitter, "narenas", "Arenas", emitter_type_unsigned, &uv);
@@ -1021,9 +1021,9 @@ stats_general_print(emitter_t *emitter) {
 	 * (not just omit the printing).
 	 */
 	if (emitter->output == emitter_output_json) {
-		emitter_json_arr_begin(emitter, "bin");
+		emitter_json_array_kv_begin(emitter, "bin");
 		for (unsigned i = 0; i < nbins; i++) {
-			emitter_json_arr_obj_begin(emitter);
+			emitter_json_object_begin(emitter);
 
 			CTL_M2_GET("arenas.bin.0.size", i, &sv, size_t);
 			emitter_json_kv(emitter, "size", emitter_type_size,
@@ -1037,9 +1037,9 @@ stats_general_print(emitter_t *emitter) {
 			emitter_json_kv(emitter, "slab_size", emitter_type_size,
 			    &sv);
 
-			emitter_json_arr_obj_end(emitter);
+			emitter_json_object_end(emitter);
 		}
-		emitter_json_arr_end(emitter); /* Close "bin". */
+		emitter_json_array_end(emitter); /* Close "bin". */
 	}
 
 	unsigned nlextents;
@@ -1048,20 +1048,20 @@ stats_general_print(emitter_t *emitter) {
 	    emitter_type_unsigned, &nlextents);
 
 	if (emitter->output == emitter_output_json) {
-		emitter_json_arr_begin(emitter, "lextent");
+		emitter_json_array_kv_begin(emitter, "lextent");
 		for (unsigned i = 0; i < nlextents; i++) {
-			emitter_json_arr_obj_begin(emitter);
+			emitter_json_object_begin(emitter);
 
 			CTL_M2_GET("arenas.lextent.0.size", i, &sv, size_t);
 			emitter_json_kv(emitter, "size", emitter_type_size,
 			    &sv);
 
-			emitter_json_arr_obj_end(emitter);
+			emitter_json_object_end(emitter);
 		}
-		emitter_json_arr_end(emitter); /* Close "lextent". */
+		emitter_json_array_end(emitter); /* Close "lextent". */
 	}
 
-	emitter_json_dict_end(emitter); /* Close "arenas" */
+	emitter_json_object_end(emitter); /* Close "arenas" */
 }
 
 static void
@@ -1098,7 +1098,7 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 	}
 
 	/* Generic global stats. */
-	emitter_json_dict_begin(emitter, "stats");
+	emitter_json_object_kv_begin(emitter, "stats");
 	emitter_json_kv(emitter, "allocated", emitter_type_size, &allocated);
 	emitter_json_kv(emitter, "active", emitter_type_size, &active);
 	emitter_json_kv(emitter, "metadata", emitter_type_size, &metadata);
@@ -1114,14 +1114,14 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 	    resident, mapped, retained);
 
 	/* Background thread stats. */
-	emitter_json_dict_begin(emitter, "background_thread");
+	emitter_json_object_kv_begin(emitter, "background_thread");
 	emitter_json_kv(emitter, "num_threads", emitter_type_size,
 	    &num_background_threads);
 	emitter_json_kv(emitter, "num_runs", emitter_type_uint64,
 	    &background_thread_num_runs);
 	emitter_json_kv(emitter, "run_interval", emitter_type_uint64,
 	    &background_thread_run_interval);
-	emitter_json_dict_end(emitter); /* Close "background_thread". */
+	emitter_json_object_end(emitter); /* Close "background_thread". */
 
 	emitter_table_printf(emitter, "Background threads: %zu, "
 	    "num_runs: %"FMTu64", run_interval: %"FMTu64" ns\n",
@@ -1138,25 +1138,25 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 		mutex_stats_init_cols(&row, "", &name, col64, col32);
 
 		emitter_table_row(emitter, &row);
-		emitter_json_dict_begin(emitter, "mutexes");
+		emitter_json_object_kv_begin(emitter, "mutexes");
 
 		for (int i = 0; i < mutex_prof_num_global_mutexes; i++) {
 			mutex_stats_read_global(global_mutex_names[i], &name,
 			    col64, col32);
-			emitter_json_dict_begin(emitter, global_mutex_names[i]);
+			emitter_json_object_kv_begin(emitter, global_mutex_names[i]);
 			mutex_stats_emit(emitter, &row, col64, col32);
-			emitter_json_dict_end(emitter);
+			emitter_json_object_end(emitter);
 		}
 
-		emitter_json_dict_end(emitter); /* Close "mutexes". */
+		emitter_json_object_end(emitter); /* Close "mutexes". */
 	}
 
-	emitter_json_dict_end(emitter); /* Close "stats". */
+	emitter_json_object_end(emitter); /* Close "stats". */
 
 	if (merged || destroyed || unmerged) {
 		unsigned narenas;
 
-		emitter_json_dict_begin(emitter, "stats.arenas");
+		emitter_json_object_kv_begin(emitter, "stats.arenas");
 
 		CTL_GET("arenas.narenas", &narenas, unsigned);
 		size_t mib[3];
@@ -1185,10 +1185,10 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 		if (merged && (ninitialized > 1 || !unmerged)) {
 			/* Print merged arena stats. */
 			emitter_table_printf(emitter, "Merged arenas stats:\n");
-			emitter_json_dict_begin(emitter, "merged");
+			emitter_json_object_kv_begin(emitter, "merged");
 			stats_arena_print(emitter, MALLCTL_ARENAS_ALL, bins,
 			    large, mutex);
-			emitter_json_dict_end(emitter); /* Close "merged". */
+			emitter_json_object_end(emitter); /* Close "merged". */
 		}
 
 		/* Destroyed stats. */
@@ -1196,10 +1196,10 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 			/* Print destroyed arena stats. */
 			emitter_table_printf(emitter,
 			    "Destroyed arenas stats:\n");
-			emitter_json_dict_begin(emitter, "destroyed");
+			emitter_json_object_kv_begin(emitter, "destroyed");
 			stats_arena_print(emitter, MALLCTL_ARENAS_DESTROYED,
 			    bins, large, mutex);
-			emitter_json_dict_end(emitter); /* Close "destroyed". */
+			emitter_json_object_end(emitter); /* Close "destroyed". */
 		}
 
 		/* Unmerged stats. */
@@ -1209,18 +1209,18 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 					char arena_ind_str[20];
 					malloc_snprintf(arena_ind_str,
 					    sizeof(arena_ind_str), "%u", i);
-					emitter_json_dict_begin(emitter,
+					emitter_json_object_kv_begin(emitter,
 					    arena_ind_str);
 					emitter_table_printf(emitter,
 					    "arenas[%s]:\n", arena_ind_str);
 					stats_arena_print(emitter, i, bins,
 					    large, mutex);
 					/* Close "<arena-ind>". */
-					emitter_json_dict_end(emitter);
+					emitter_json_object_end(emitter);
 				}
 			}
 		}
-		emitter_json_dict_end(emitter); /* Close "stats.arenas". */
+		emitter_json_object_end(emitter); /* Close "stats.arenas". */
 	}
 }
 
@@ -1273,7 +1273,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	    cbopaque);
 	emitter_begin(&emitter);
 	emitter_table_printf(&emitter, "___ Begin jemalloc statistics ___\n");
-	emitter_json_dict_begin(&emitter, "jemalloc");
+	emitter_json_object_kv_begin(&emitter, "jemalloc");
 
 	if (general) {
 		stats_general_print(&emitter);
@@ -1283,7 +1283,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		    bins, large, mutex);
 	}
 
-	emitter_json_dict_end(&emitter); /* Closes the "jemalloc" dict. */
+	emitter_json_object_end(&emitter); /* Closes the "jemalloc" dict. */
 	emitter_table_printf(&emitter, "--- End jemalloc statistics ---\n");
 	emitter_end(&emitter);
 }
diff --git a/test/unit/emitter.c b/test/unit/emitter.c
index 6ffd1c3a..b4a693f4 100644
--- a/test/unit/emitter.c
+++ b/test/unit/emitter.c
@@ -169,7 +169,7 @@ static void emit_nested_dict(emitter_t *emitter) {
 	emitter_end(emitter);
 }
 
-static const char *nested_dict_json =
+static const char *nested_object_json =
 "{\n"
 "\t\"json1\": {\n"
 "\t\t\"json2\": {\n"
@@ -183,7 +183,7 @@ static const char *nested_dict_json =
 "\t}\n"
 "}\n";
 
-static const char *nested_dict_table =
+static const char *nested_object_table =
 "Dict 1\n"
 "  Dict 2\n"
 "    A primitive: 123\n"
@@ -192,8 +192,8 @@ static const char *nested_dict_table =
 "  Another primitive: 123\n";
 
 TEST_BEGIN(test_nested_dict) {
-	assert_emit_output(&emit_nested_dict, nested_dict_json,
-	    nested_dict_table);
+	assert_emit_output(&emit_nested_dict, nested_object_json,
+	    nested_object_table);
 }
 TEST_END
 
@@ -256,13 +256,14 @@ emit_modal(emitter_t *emitter) {
 	int val = 123;
 	emitter_begin(emitter);
 	emitter_dict_begin(emitter, "j0", "T0");
-	emitter_json_dict_begin(emitter, "j1");
+	emitter_json_key(emitter, "j1");
+	emitter_json_object_begin(emitter);
 	emitter_kv(emitter, "i1", "I1", emitter_type_int, &val);
 	emitter_json_kv(emitter, "i2", emitter_type_int, &val);
 	emitter_table_kv(emitter, "I3", emitter_type_int, &val);
 	emitter_table_dict_begin(emitter, "T1");
 	emitter_kv(emitter, "i4", "I4", emitter_type_int, &val);
-	emitter_json_dict_end(emitter); /* Close j1 */
+	emitter_json_object_end(emitter); /* Close j1 */
 	emitter_kv(emitter, "i5", "I5", emitter_type_int, &val);
 	emitter_table_dict_end(emitter); /* Close T1 */
 	emitter_kv(emitter, "i6", "I6", emitter_type_int, &val);
@@ -302,24 +303,26 @@ emit_json_arr(emitter_t *emitter) {
 	int ival = 123;
 
 	emitter_begin(emitter);
-	emitter_json_dict_begin(emitter, "dict");
-	emitter_json_arr_begin(emitter, "arr");
-	emitter_json_arr_obj_begin(emitter);
+	emitter_json_key(emitter, "dict");
+	emitter_json_object_begin(emitter);
+	emitter_json_key(emitter, "arr");
+	emitter_json_array_begin(emitter);
+	emitter_json_object_begin(emitter);
 	emitter_json_kv(emitter, "foo", emitter_type_int, &ival);
-	emitter_json_arr_obj_end(emitter); /* Close arr[0] */
+	emitter_json_object_end(emitter); /* Close arr[0] */
 	/* arr[1] and arr[2] are primitives. */
-	emitter_json_arr_value(emitter, emitter_type_int, &ival);
-	emitter_json_arr_value(emitter, emitter_type_int, &ival);
-	emitter_json_arr_obj_begin(emitter);
+	emitter_json_value(emitter, emitter_type_int, &ival);
+	emitter_json_value(emitter, emitter_type_int, &ival);
+	emitter_json_object_begin(emitter);
 	emitter_json_kv(emitter, "bar", emitter_type_int, &ival);
 	emitter_json_kv(emitter, "baz", emitter_type_int, &ival);
-	emitter_json_arr_obj_end(emitter); /* Close arr[3]. */
-	emitter_json_arr_end(emitter); /* Close arr. */
-	emitter_json_dict_end(emitter); /* Close dict. */
+	emitter_json_object_end(emitter); /* Close arr[3]. */
+	emitter_json_array_end(emitter); /* Close arr. */
+	emitter_json_object_end(emitter); /* Close dict. */
 	emitter_end(emitter);
 }
 
-static const char *json_arr_json =
+static const char *json_array_json =
 "{\n"
 "\t\"dict\": {\n"
 "\t\t\"arr\": [\n"
@@ -336,10 +339,62 @@ static const char *json_arr_json =
 "\t}\n"
 "}\n";
 
-static const char *json_arr_table = "";
+static const char *json_array_table = "";
 
 TEST_BEGIN(test_json_arr) {
-	assert_emit_output(&emit_json_arr, json_arr_json, json_arr_table);
+	assert_emit_output(&emit_json_arr, json_array_json, json_array_table);
+}
+TEST_END
+
+static void
+emit_json_nested_array(emitter_t *emitter) {
+	int ival = 123;
+	char *sval = "foo";
+	emitter_begin(emitter);
+	emitter_json_array_begin(emitter);
+		emitter_json_array_begin(emitter);
+		emitter_json_value(emitter, emitter_type_int, &ival);
+		emitter_json_value(emitter, emitter_type_string, &sval);
+		emitter_json_value(emitter, emitter_type_int, &ival);
+		emitter_json_value(emitter, emitter_type_string, &sval);
+		emitter_json_array_end(emitter);
+		emitter_json_array_begin(emitter);
+		emitter_json_value(emitter, emitter_type_int, &ival);
+		emitter_json_array_end(emitter);
+		emitter_json_array_begin(emitter);
+		emitter_json_value(emitter, emitter_type_string, &sval);
+		emitter_json_value(emitter, emitter_type_int, &ival);
+		emitter_json_array_end(emitter);
+		emitter_json_array_begin(emitter);
+		emitter_json_array_end(emitter);
+	emitter_json_array_end(emitter);
+	emitter_end(emitter);
+}
+
+static const char *json_nested_array_json =
+"{\n"
+"\t[\n"
+"\t\t[\n"
+"\t\t\t123,\n"
+"\t\t\t\"foo\",\n"
+"\t\t\t123,\n"
+"\t\t\t\"foo\"\n"
+"\t\t],\n"
+"\t\t[\n"
+"\t\t\t123\n"
+"\t\t],\n"
+"\t\t[\n"
+"\t\t\t\"foo\",\n"
+"\t\t\t123\n"
+"\t\t],\n"
+"\t\t[\n"
+"\t\t]\n"
+"\t]\n"
+"}\n";
+
+TEST_BEGIN(test_json_nested_arr) {
+	assert_emit_output(&emit_json_nested_array, json_nested_array_json,
+	    json_array_table);
 }
 TEST_END
 
@@ -409,5 +464,6 @@ main(void) {
 	    test_types,
 	    test_modal,
 	    test_json_arr,
+	    test_json_nested_arr,
 	    test_table_row);
 }

From b664bd79356d7f6da6f413023f9aef014b85c145 Mon Sep 17 00:00:00 2001
From: Tyler Etzel <tyleretzel1@gmail.com>
Date: Thu, 5 Jul 2018 10:56:33 -0700
Subject: [PATCH 1173/2608] Add logging for sampled allocations

- prof_opt_log flag starts logging automatically at runtime
- prof_log_{start,stop} mallctl for manual control
---
 include/jemalloc/internal/arena_inlines_b.h |  26 +
 include/jemalloc/internal/extent_inlines.h  |  10 +
 include/jemalloc/internal/extent_structs.h  |  12 +-
 include/jemalloc/internal/large_externs.h   |   3 +
 include/jemalloc/internal/prof_externs.h    |   6 +-
 include/jemalloc/internal/prof_inlines_b.h  |  21 +-
 include/jemalloc/internal/witness.h         |  28 +-
 src/ctl.c                                   |  44 +-
 src/jemalloc.c                              |   1 +
 src/large.c                                 |  10 +
 src/prof.c                                  | 565 +++++++++++++++++++-
 11 files changed, 702 insertions(+), 24 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 2b3915ae..8bf0a817 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -78,6 +78,32 @@ arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) {
 	large_prof_tctx_reset(tsdn, extent);
 }
 
+JEMALLOC_ALWAYS_INLINE nstime_t
+arena_prof_alloc_time_get(tsdn_t *tsdn, const void *ptr,
+    alloc_ctx_t *alloc_ctx) {
+	cassert(config_prof);
+	assert(ptr != NULL);
+
+	extent_t *extent = iealloc(tsdn, ptr);
+	/* 
+	 * Unlike arena_prof_prof_tctx_{get, set}, we only call this once we're
+	 * sure we have a sampled allocation.
+	 */
+	assert(!extent_slab_get(extent));
+	return large_prof_alloc_time_get(extent);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx,
+    nstime_t t) {
+	cassert(config_prof);
+	assert(ptr != NULL);
+
+	extent_t *extent = iealloc(tsdn, ptr);
+	assert(!extent_slab_get(extent));
+	large_prof_alloc_time_set(extent, t);
+}
+
 JEMALLOC_ALWAYS_INLINE void
 arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) {
 	tsd_t *tsd;
diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index a43d00db..145fa2d6 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -177,6 +177,11 @@ extent_prof_tctx_get(const extent_t *extent) {
 	    ATOMIC_ACQUIRE);
 }
 
+static inline nstime_t
+extent_prof_alloc_time_get(const extent_t *extent) {
+	return extent->e_alloc_time;
+}
+
 static inline void
 extent_arena_set(extent_t *extent, arena_t *arena) {
 	unsigned arena_ind = (arena != NULL) ? arena_ind_get(arena) : ((1U <<
@@ -300,6 +305,11 @@ extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx) {
 	atomic_store_p(&extent->e_prof_tctx, tctx, ATOMIC_RELEASE);
 }
 
+static inline void
+extent_prof_alloc_time_set(extent_t *extent, nstime_t t) {
+	nstime_copy(&extent->e_alloc_time, &t);
+}
+
 static inline void
 extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
     bool slab, szind_t szind, size_t sn, extent_state_t state, bool zeroed,
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index 1983097e..d709577e 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -161,11 +161,13 @@ struct extent_s {
 		/* Small region slab metadata. */
 		arena_slab_data_t	e_slab_data;
 
-		/*
-		 * Profile counters, used for large objects.  Points to a
-		 * prof_tctx_t.
-		 */
-		atomic_p_t		e_prof_tctx;
+		/* Profiling data, used for large objects. */
+		struct {
+			/* Time when this was allocated. */
+			nstime_t		e_alloc_time;
+			/* Points to a prof_tctx_t. */
+			atomic_p_t		e_prof_tctx;
+		};
 	};
 };
 typedef ql_head(extent_t) extent_list_t;
diff --git a/include/jemalloc/internal/large_externs.h b/include/jemalloc/internal/large_externs.h
index 88682eac..a05019e8 100644
--- a/include/jemalloc/internal/large_externs.h
+++ b/include/jemalloc/internal/large_externs.h
@@ -26,4 +26,7 @@ prof_tctx_t *large_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent);
 void large_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, prof_tctx_t *tctx);
 void large_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent);
 
+nstime_t large_prof_alloc_time_get(const extent_t *extent);
+void large_prof_alloc_time_set(extent_t *extent, nstime_t time);
+
 #endif /* JEMALLOC_INTERNAL_LARGE_EXTERNS_H */
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 04348696..74315ce5 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -14,6 +14,7 @@ extern bool	opt_prof_gdump;       /* High-water memory dumping. */
 extern bool	opt_prof_final;       /* Final profile dumping. */
 extern bool	opt_prof_leak;        /* Dump leak summary at exit. */
 extern bool	opt_prof_accum;       /* Report cumulative bytes. */
+extern bool	opt_prof_log;	      /* Turn logging on at boot. */
 extern char	opt_prof_prefix[
     /* Minimize memory bloat for non-prof builds. */
 #ifdef JEMALLOC_PROF
@@ -45,7 +46,8 @@ extern size_t	lg_prof_sample;
 void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
 void prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
     prof_tctx_t *tctx);
-void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx);
+void prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize,
+    prof_tctx_t *tctx);
 void bt_init(prof_bt_t *bt, void **vec);
 void prof_backtrace(prof_bt_t *bt);
 prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt);
@@ -72,6 +74,8 @@ void prof_reset(tsd_t *tsd, size_t lg_sample);
 void prof_tdata_cleanup(tsd_t *tsd);
 bool prof_active_get(tsdn_t *tsdn);
 bool prof_active_set(tsdn_t *tsdn, bool active);
+bool prof_log_start(tsdn_t *tsdn, const char *filename);
+bool prof_log_stop(tsdn_t *tsdn);
 const char *prof_thread_name_get(tsd_t *tsd);
 int prof_thread_name_set(tsd_t *tsd, const char *thread_name);
 bool prof_thread_active_get(tsd_t *tsd);
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 6ff465ad..5e0b0642 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -61,6 +61,23 @@ prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) {
 	arena_prof_tctx_reset(tsdn, ptr, tctx);
 }
 
+JEMALLOC_ALWAYS_INLINE nstime_t
+prof_alloc_time_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) {
+	cassert(config_prof);
+	assert(ptr != NULL);
+
+	return arena_prof_alloc_time_get(tsdn, ptr, alloc_ctx);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx,
+    nstime_t t) { 
+	cassert(config_prof);
+	assert(ptr != NULL);
+
+	arena_prof_alloc_time_set(tsdn, ptr, alloc_ctx, t);
+}
+
 JEMALLOC_ALWAYS_INLINE bool
 prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
     prof_tdata_t **tdata_out) {
@@ -187,7 +204,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 	 * counters.
 	 */
 	if (unlikely(old_sampled)) {
-		prof_free_sampled_object(tsd, old_usize, old_tctx);
+		prof_free_sampled_object(tsd, ptr, old_usize, old_tctx);
 	}
 }
 
@@ -199,7 +216,7 @@ prof_free(tsd_t *tsd, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx) {
 	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
 
 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) {
-		prof_free_sampled_object(tsd, usize, tctx);
+		prof_free_sampled_object(tsd, ptr, usize, tctx);
 	}
 }
 
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 80ea70c2..fff9e98c 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -27,9 +27,9 @@
 #define WITNESS_RANK_PROF_BT2GCTX	6U
 #define WITNESS_RANK_PROF_TDATAS	7U
 #define WITNESS_RANK_PROF_TDATA		8U
-#define WITNESS_RANK_PROF_GCTX		9U
-
-#define WITNESS_RANK_BACKGROUND_THREAD	10U
+#define WITNESS_RANK_PROF_LOG		9U
+#define WITNESS_RANK_PROF_GCTX		10U
+#define WITNESS_RANK_BACKGROUND_THREAD	11U
 
 /*
  * Used as an argument to witness_assert_depth_to_rank() in order to validate
@@ -37,19 +37,19 @@
  * witness_assert_depth_to_rank() is inclusive rather than exclusive, this
  * definition can have the same value as the minimally ranked core lock.
  */
-#define WITNESS_RANK_CORE		11U
+#define WITNESS_RANK_CORE		12U
 
-#define WITNESS_RANK_DECAY		11U
-#define WITNESS_RANK_TCACHE_QL		12U
-#define WITNESS_RANK_EXTENT_GROW	13U
-#define WITNESS_RANK_EXTENTS		14U
-#define WITNESS_RANK_EXTENT_AVAIL	15U
+#define WITNESS_RANK_DECAY		12U
+#define WITNESS_RANK_TCACHE_QL		13U
+#define WITNESS_RANK_EXTENT_GROW	14U
+#define WITNESS_RANK_EXTENTS		15U
+#define WITNESS_RANK_EXTENT_AVAIL	16U
 
-#define WITNESS_RANK_EXTENT_POOL	16U
-#define WITNESS_RANK_RTREE		17U
-#define WITNESS_RANK_BASE		18U
-#define WITNESS_RANK_ARENA_LARGE	19U
-#define WITNESS_RANK_HOOK		20U
+#define WITNESS_RANK_EXTENT_POOL	17U
+#define WITNESS_RANK_RTREE		18U
+#define WITNESS_RANK_BASE		19U
+#define WITNESS_RANK_ARENA_LARGE	20U
+#define WITNESS_RANK_HOOK		21U
 
 #define WITNESS_RANK_LEAF		0xffffffffU
 #define WITNESS_RANK_BIN		WITNESS_RANK_LEAF
diff --git a/src/ctl.c b/src/ctl.c
index 38529d08..448ec7bf 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -148,6 +148,8 @@ CTL_PROTO(prof_gdump)
 CTL_PROTO(prof_reset)
 CTL_PROTO(prof_interval)
 CTL_PROTO(lg_prof_sample)
+CTL_PROTO(prof_log_start)
+CTL_PROTO(prof_log_stop)
 CTL_PROTO(stats_arenas_i_small_allocated)
 CTL_PROTO(stats_arenas_i_small_nmalloc)
 CTL_PROTO(stats_arenas_i_small_ndalloc)
@@ -389,7 +391,9 @@ static const ctl_named_node_t	prof_node[] = {
 	{NAME("gdump"),		CTL(prof_gdump)},
 	{NAME("reset"),		CTL(prof_reset)},
 	{NAME("interval"),	CTL(prof_interval)},
-	{NAME("lg_sample"),	CTL(lg_prof_sample)}
+	{NAME("lg_sample"),	CTL(lg_prof_sample)},
+	{NAME("log_start"),	CTL(prof_log_start)},
+	{NAME("log_stop"),	CTL(prof_log_stop)}
 };
 
 static const ctl_named_node_t stats_arenas_i_small_node[] = {
@@ -2644,6 +2648,44 @@ label_return:
 CTL_RO_NL_CGEN(config_prof, prof_interval, prof_interval, uint64_t)
 CTL_RO_NL_CGEN(config_prof, lg_prof_sample, lg_prof_sample, size_t)
 
+static int
+prof_log_start_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+	
+	const char *filename = NULL;
+
+	if (!config_prof) {
+		return ENOENT;
+	}
+
+	WRITEONLY();
+	WRITE(filename, const char *);
+
+	if (prof_log_start(tsd_tsdn(tsd), filename)) {
+		ret = EFAULT;
+		goto label_return; 
+	}
+
+	ret = 0;
+label_return:
+	return ret;
+}
+
+static int
+prof_log_stop_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	if (!config_prof) {
+		return ENOENT;
+	}
+
+	if (prof_log_stop(tsd_tsdn(tsd))) {
+		return EFAULT;
+	}
+
+	return 0;
+}
+
 /******************************************************************************/
 
 CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats->allocated, size_t)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 85ec9e0b..e8f110f7 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1282,6 +1282,7 @@ malloc_conf_init(sc_data_t *sc_data) {
 				CONF_HANDLE_BOOL(opt_prof_gdump, "prof_gdump")
 				CONF_HANDLE_BOOL(opt_prof_final, "prof_final")
 				CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak")
+				CONF_HANDLE_BOOL(opt_prof_log, "prof_log")
 			}
 			if (config_log) {
 				if (CONF_MATCH("log")) {
diff --git a/src/large.c b/src/large.c
index 84073618..8e7a781d 100644
--- a/src/large.c
+++ b/src/large.c
@@ -383,3 +383,13 @@ void
 large_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent) {
 	large_prof_tctx_set(tsdn, extent, (prof_tctx_t *)(uintptr_t)1U);
 }
+
+nstime_t
+large_prof_alloc_time_get(const extent_t *extent) {
+	return extent_prof_alloc_time_get(extent);
+}
+
+void
+large_prof_alloc_time_set(extent_t *extent, nstime_t t) {
+	extent_prof_alloc_time_set(extent, t);
+}
diff --git a/src/prof.c b/src/prof.c
index 405de4b3..21421c04 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -7,6 +7,7 @@
 #include "jemalloc/internal/hash.h"
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/emitter.h"
 
 /******************************************************************************/
 
@@ -38,6 +39,7 @@ bool		opt_prof_gdump = false;
 bool		opt_prof_final = false;
 bool		opt_prof_leak = false;
 bool		opt_prof_accum = false;
+bool		opt_prof_log = false;
 char		opt_prof_prefix[
     /* Minimize memory bloat for non-prof builds. */
 #ifdef JEMALLOC_PROF
@@ -70,6 +72,96 @@ uint64_t	prof_interval = 0;
 
 size_t		lg_prof_sample;
 
+typedef enum prof_logging_state_e prof_logging_state_t;
+enum prof_logging_state_e {
+	prof_logging_state_stopped,
+	prof_logging_state_started,
+	prof_logging_state_dumping
+};
+
+/*
+ * - stopped: log_start never called, or previous log_stop has completed.
+ * - started: log_start called, log_stop not called yet. Allocations are logged.
+ * - dumping: log_stop called but not finished; samples are not logged anymore.
+ */
+prof_logging_state_t prof_logging_state = prof_logging_state_stopped;
+
+/* Incremented for every log file that is output. */
+static uint64_t log_seq = 0;
+static char log_filename[
+    /* Minimize memory bloat for non-prof builds. */
+#ifdef JEMALLOC_PROF
+    PATH_MAX +
+#endif
+    1];
+
+/* Timestamp for most recent call to log_start(). */
+static nstime_t log_start_timestamp = NSTIME_ZERO_INITIALIZER;
+
+/* Increment these when adding to the log_bt and log_thr linked lists. */
+static size_t log_bt_index = 0;
+static size_t log_thr_index = 0;
+
+/* Linked list node definitions. These are only used in prof.c. */
+typedef struct prof_bt_node_s prof_bt_node_t;
+
+struct prof_bt_node_s {
+	prof_bt_node_t *next;
+	size_t index;
+	prof_bt_t bt;
+	/* Variable size backtrace vector pointed to by bt. */
+	void *vec[1];
+};
+
+typedef struct prof_thr_node_s prof_thr_node_t;
+
+struct prof_thr_node_s {
+	prof_thr_node_t *next;
+	size_t index;
+	uint64_t thr_uid;
+	/* Variable size based on thr_name_sz. */
+	char name[1];
+}; 
+
+typedef struct prof_alloc_node_s prof_alloc_node_t;
+
+/* This is output when logging sampled allocations. */
+struct prof_alloc_node_s {
+	prof_alloc_node_t *next;
+	/* Indices into an array of thread data. */
+	size_t alloc_thr_ind;
+	size_t free_thr_ind;
+
+	/* Indices into an array of backtraces. */
+	size_t alloc_bt_ind;
+	size_t free_bt_ind;
+
+	uint64_t alloc_time_ns;
+	uint64_t free_time_ns;
+
+	size_t usize;
+};
+
+/*
+ * Created on the first call to prof_log_start and deleted on prof_log_stop.
+ * These are the backtraces and threads that have already been logged by an
+ * allocation.
+ */
+static bool log_tables_initialized = false;
+static ckh_t log_bt_node_set;
+static ckh_t log_thr_node_set;
+
+/* Store linked lists for logged data. */
+static prof_bt_node_t *log_bt_first = NULL;
+static prof_bt_node_t *log_bt_last = NULL;
+static prof_thr_node_t *log_thr_first = NULL;
+static prof_thr_node_t *log_thr_last = NULL;
+static prof_alloc_node_t *log_alloc_first = NULL;
+static prof_alloc_node_t *log_alloc_last = NULL;
+
+/* Protects the prof_logging_state and any log_{...} variable. */
+static malloc_mutex_t log_mtx;
+
 /*
  * Table of mutexes that are shared among gctx's.  These are leaf locks, so
  * there is no problem with using them for more than one gctx at the same time.
@@ -145,6 +237,12 @@ static void	prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata,
     bool even_if_attached);
 static char	*prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name);
 
+/* Hashtable functions for log_bt_node_set and log_thr_node_set. */
+static void prof_thr_node_hash(const void *key, size_t r_hash[2]);
+static bool prof_thr_node_keycomp(const void *k1, const void *k2);
+static void prof_bt_node_hash(const void *key, size_t r_hash[2]);
+static bool prof_bt_node_keycomp(const void *k1, const void *k2);
+
 /******************************************************************************/
 /* Red-black trees. */
 
@@ -242,6 +340,12 @@ prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
     prof_tctx_t *tctx) {
 	prof_tctx_set(tsdn, ptr, usize, NULL, tctx);
 
+	/* Get the current time and set this in the extent_t. We'll read this
+	 * when free() is called. */
+	nstime_t t = NSTIME_ZERO_INITIALIZER;
+	nstime_update(&t);
+	prof_alloc_time_set(tsdn, ptr, NULL, t);
+
 	malloc_mutex_lock(tsdn, tctx->tdata->lock);
 	tctx->cnts.curobjs++;
 	tctx->cnts.curbytes += usize;
@@ -253,14 +357,171 @@ prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
 	malloc_mutex_unlock(tsdn, tctx->tdata->lock);
 }
 
+static size_t
+prof_log_bt_index(tsd_t *tsd, prof_bt_t *bt) {
+	assert(prof_logging_state == prof_logging_state_started);
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &log_mtx);
+
+	prof_bt_node_t dummy_node;
+	dummy_node.bt = *bt;
+	prof_bt_node_t *node;
+
+	/* See if this backtrace is already cached in the table. */
+	if (ckh_search(&log_bt_node_set, (void *)(&dummy_node),
+	    (void **)(&node), NULL)) {
+		size_t sz = offsetof(prof_bt_node_t, vec) +
+			        (bt->len * sizeof(void *));
+		prof_bt_node_t *new_node = (prof_bt_node_t *)
+			ialloc(tsd, sz, sz_size2index(sz), false, true);
+		if (log_bt_first == NULL) {
+			log_bt_first = new_node;
+			log_bt_last = new_node;
+		} else {
+			log_bt_last->next = new_node;
+			log_bt_last = new_node;
+		}
+
+		new_node->next = NULL;
+		new_node->index = log_bt_index;
+		/* 
+		 * Copy the backtrace: bt is inside a tdata or gctx, which
+		 * might die before prof_log_stop is called.
+		 */
+		new_node->bt.len = bt->len;
+		memcpy(new_node->vec, bt->vec, bt->len * sizeof(void *));
+		new_node->bt.vec = new_node->vec;
+
+		log_bt_index++;
+		ckh_insert(tsd, &log_bt_node_set, (void *)new_node, NULL);
+		return new_node->index;
+	} else {
+		return node->index;
+	}
+} 
+static size_t
+prof_log_thr_index(tsd_t *tsd, uint64_t thr_uid, const char *name) {
+	assert(prof_logging_state == prof_logging_state_started);
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &log_mtx);
+
+	prof_thr_node_t dummy_node;
+	dummy_node.thr_uid = thr_uid;
+	prof_thr_node_t *node;
+
+	/* See if this thread is already cached in the table. */
+	if (ckh_search(&log_thr_node_set, (void *)(&dummy_node),
+	    (void **)(&node), NULL)) {
+		size_t sz = offsetof(prof_thr_node_t, name) + strlen(name) + 1;
+		prof_thr_node_t *new_node = (prof_thr_node_t *)
+			ialloc(tsd, sz, sz_size2index(sz), false, true);
+		if (log_thr_first == NULL) {
+			log_thr_first = new_node;
+			log_thr_last = new_node;
+		} else {
+			log_thr_last->next = new_node;
+			log_thr_last = new_node;
+		}
+
+		new_node->next = NULL;
+		new_node->index = log_thr_index;
+		new_node->thr_uid = thr_uid;
+		strcpy(new_node->name, name);
+
+		log_thr_index++;
+		ckh_insert(tsd, &log_thr_node_set, (void *)new_node, NULL);
+		return new_node->index;
+	} else {
+		return node->index;
+	}
+}
+
+static void
+prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
+
+	prof_tdata_t *cons_tdata = prof_tdata_get(tsd, false);
+	if (cons_tdata == NULL) {
+		/*
+		 * We decide not to log these allocations. cons_tdata will be
+		 * NULL only when the current thread is in a weird state (e.g.
+		 * it's being destroyed).
+		 */
+		return;
+	}	
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &log_mtx);
+
+	if (prof_logging_state != prof_logging_state_started) {
+		goto label_done;
+	}
+
+	if (!log_tables_initialized) {
+		bool err1 = ckh_new(tsd, &log_bt_node_set, PROF_CKH_MINITEMS,
+				prof_bt_node_hash, prof_bt_node_keycomp);
+		bool err2 = ckh_new(tsd, &log_thr_node_set, PROF_CKH_MINITEMS,
+				prof_thr_node_hash, prof_thr_node_keycomp);
+		if (err1 || err2) {
+			goto label_done;
+		}
+		log_tables_initialized = true;
+	}
+
+	nstime_t alloc_time = prof_alloc_time_get(tsd_tsdn(tsd), ptr,
+			          (alloc_ctx_t *)NULL);
+	nstime_t free_time = NSTIME_ZERO_INITIALIZER;
+	nstime_update(&free_time);
+
+	prof_alloc_node_t *new_node = (prof_alloc_node_t *)
+		ialloc(tsd, sizeof(prof_alloc_node_t),
+		    sz_size2index(sizeof(prof_alloc_node_t)), false, true);
+ 
+	const char *prod_thr_name = (tctx->tdata->thread_name == NULL)?
+				        "" : tctx->tdata->thread_name;
+	const char *cons_thr_name = prof_thread_name_get(tsd);
+
+	prof_bt_t bt;
+	/* Initialize the backtrace, using the buffer in tdata to store it. */
+	bt_init(&bt, cons_tdata->vec);
+	prof_backtrace(&bt);
+	prof_bt_t *cons_bt = &bt;
+
+	/* We haven't destroyed tctx yet, so gctx should be good to read. */
+	prof_bt_t *prod_bt = &tctx->gctx->bt;
+
+	new_node->next = NULL;
+	new_node->alloc_thr_ind = prof_log_thr_index(tsd, tctx->tdata->thr_uid,
+				      prod_thr_name);
+	new_node->free_thr_ind = prof_log_thr_index(tsd, cons_tdata->thr_uid,
+				     cons_thr_name);
+	new_node->alloc_bt_ind = prof_log_bt_index(tsd, prod_bt);
+	new_node->free_bt_ind = prof_log_bt_index(tsd, cons_bt);
+	new_node->alloc_time_ns = nstime_ns(&alloc_time);
+	new_node->free_time_ns = nstime_ns(&free_time);
+	new_node->usize = usize;
+
+	if (log_alloc_first == NULL) {
+		log_alloc_first = new_node;
+		log_alloc_last = new_node;
+	} else {
+		log_alloc_last->next = new_node;
+		log_alloc_last = new_node;
+	}
+
+label_done:
+	malloc_mutex_unlock(tsd_tsdn(tsd), &log_mtx);	
+}
+
 void
-prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_tctx_t *tctx) {
+prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize, 
+    prof_tctx_t *tctx) {
 	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
+
 	assert(tctx->cnts.curobjs > 0);
 	assert(tctx->cnts.curbytes >= usize);
 	tctx->cnts.curobjs--;
 	tctx->cnts.curbytes -= usize;
 
+	prof_try_log(tsd, ptr, usize, tctx);
+
 	if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx)) {
 		prof_tctx_destroy(tsd, tctx);
 	} else {
@@ -1887,6 +2148,33 @@ prof_bt_keycomp(const void *k1, const void *k2) {
 	return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
 }
 
+static void
+prof_bt_node_hash(const void *key, size_t r_hash[2]) {
+	const prof_bt_node_t *bt_node = (prof_bt_node_t *)key;
+	prof_bt_hash((void *)(&bt_node->bt), r_hash);
+}
+
+static bool
+prof_bt_node_keycomp(const void *k1, const void *k2) {
+	const prof_bt_node_t *bt_node1 = (prof_bt_node_t *)k1;
+	const prof_bt_node_t *bt_node2 = (prof_bt_node_t *)k2;
+	return prof_bt_keycomp((void *)(&bt_node1->bt),
+	    (void *)(&bt_node2->bt));
+}
+
+static void
+prof_thr_node_hash(const void *key, size_t r_hash[2]) {
+	const prof_thr_node_t *thr_node = (prof_thr_node_t *)key;
+	hash(&thr_node->thr_uid, sizeof(uint64_t), 0x94122f35U, r_hash);
+}
+
+static bool
+prof_thr_node_keycomp(const void *k1, const void *k2) {
+	const prof_thr_node_t *thr_node1 = (prof_thr_node_t *)k1;
+	const prof_thr_node_t *thr_node2 = (prof_thr_node_t *)k2;
+	return thr_node1->thr_uid == thr_node2->thr_uid;
+}
+
 static uint64_t
 prof_thr_uid_alloc(tsdn_t *tsdn) {
 	uint64_t thr_uid;
@@ -2119,6 +2407,252 @@ prof_active_set(tsdn_t *tsdn, bool active) {
 	return prof_active_old;
 }
 
+bool
+prof_log_start(tsdn_t *tsdn, const char *filename) {
+	if (!opt_prof || !prof_booted) {
+		return true;
+	}
+
+	bool ret = false;
+	size_t buf_size = PATH_MAX + 1;
+
+	malloc_mutex_lock(tsdn, &log_mtx);
+
+	if (prof_logging_state != prof_logging_state_stopped) {
+		ret = true;
+	} else if (filename == NULL) {
+		/* Make default name. */
+		malloc_snprintf(log_filename, buf_size, "%s.%d.%"FMTu64".json",
+		    opt_prof_prefix, prof_getpid(), log_seq);
+		log_seq++;
+		prof_logging_state = prof_logging_state_started;
+	} else if (strlen(filename) >= buf_size) {
+		ret = true;
+	} else {
+		strcpy(log_filename, filename);
+		prof_logging_state = prof_logging_state_started;
+	}
+
+	if (!ret) {
+		nstime_update(&log_start_timestamp);
+	}
+
+	malloc_mutex_unlock(tsdn, &log_mtx);
+
+	return ret;
+}
+
+/* Used as an atexit function to stop logging on exit. */
+static void
+prof_log_stop_final(void) {
+	tsd_t *tsd = tsd_fetch();
+	prof_log_stop(tsd_tsdn(tsd));
+}
+
+struct prof_emitter_cb_arg_s {
+	int fd;
+	ssize_t ret;
+};
+
+static void
+prof_emitter_write_cb(void *opaque, const char *to_write) {
+	struct prof_emitter_cb_arg_s *arg =
+	    (struct prof_emitter_cb_arg_s *)opaque;
+	size_t bytes = strlen(to_write);
+	arg->ret = write(arg->fd, (void *)to_write, bytes);
+}
+
+/*
+ * prof_log_emit_{...} goes through the appropriate linked list, emitting each
+ * node to the json and deallocating it.
+ */
+static void
+prof_log_emit_threads(tsd_t *tsd, emitter_t *emitter) {
+	emitter_json_array_kv_begin(emitter, "threads");
+	prof_thr_node_t *thr_node = log_thr_first;
+	prof_thr_node_t *thr_old_node;
+	while (thr_node != NULL) {
+		emitter_json_object_begin(emitter);
+
+		emitter_json_kv(emitter, "thr_uid", emitter_type_uint64,
+		    &thr_node->thr_uid);
+
+		char *thr_name = thr_node->name;
+
+		emitter_json_kv(emitter, "thr_name", emitter_type_string,
+		    &thr_name);
+
+		emitter_json_object_end(emitter);
+		thr_old_node = thr_node;
+		thr_node = thr_node->next;
+		idalloc(tsd, thr_old_node);
+	}
+	emitter_json_array_end(emitter);
+}
+
+static void
+prof_log_emit_traces(tsd_t *tsd, emitter_t *emitter) {
+	emitter_json_array_kv_begin(emitter, "stack_traces");
+	prof_bt_node_t *bt_node = log_bt_first;
+	prof_bt_node_t *bt_old_node; 
+	/* 
+	 * Calculate how many hex digits we need: twice number of bytes, two for
+	 * "0x", and then one more for terminating '\0'.
+	 */
+	char buf[2 * sizeof(intptr_t) + 3];
+	size_t buf_sz = sizeof(buf);
+	while (bt_node != NULL) {
+		emitter_json_array_begin(emitter);
+		size_t i;
+		for (i = 0; i < bt_node->bt.len; i++) {
+			malloc_snprintf(buf, buf_sz, "%p", bt_node->bt.vec[i]);
+			char *trace_str = buf;
+			emitter_json_value(emitter, emitter_type_string,
+			    &trace_str);
+		}
+		emitter_json_array_end(emitter);
+
+		bt_old_node = bt_node;
+		bt_node = bt_node->next;
+		idalloc(tsd, bt_old_node);
+	}
+	emitter_json_array_end(emitter);
+}
+
+static void
+prof_log_emit_allocs(tsd_t *tsd, emitter_t *emitter) {
+	emitter_json_array_kv_begin(emitter, "allocations");
+	prof_alloc_node_t *alloc_node = log_alloc_first;
+	prof_alloc_node_t *alloc_old_node;
+	while (alloc_node != NULL) {
+		emitter_json_object_begin(emitter);
+
+		emitter_json_kv(emitter, "alloc_thread", emitter_type_size,
+		    &alloc_node->alloc_thr_ind);
+
+		emitter_json_kv(emitter, "free_thread", emitter_type_size,
+		    &alloc_node->free_thr_ind);
+
+		emitter_json_kv(emitter, "alloc_trace", emitter_type_size,
+		    &alloc_node->alloc_bt_ind);
+
+		emitter_json_kv(emitter, "free_trace", emitter_type_size,
+		    &alloc_node->free_bt_ind);
+
+		emitter_json_kv(emitter, "alloc_timestamp",
+		    emitter_type_uint64, &alloc_node->alloc_time_ns);
+
+		emitter_json_kv(emitter, "free_timestamp", emitter_type_uint64,
+		    &alloc_node->free_time_ns);
+
+		emitter_json_kv(emitter, "usize", emitter_type_uint64,
+		    &alloc_node->usize);
+
+		emitter_json_object_end(emitter);
+
+		alloc_old_node = alloc_node;
+		alloc_node = alloc_node->next;
+		idalloc(tsd, alloc_old_node);
+	}
+	emitter_json_array_end(emitter);
+}
+
+static void
+prof_log_emit_metadata(emitter_t *emitter) {
+	emitter_json_object_kv_begin(emitter, "info");
+
+	nstime_t now = NSTIME_ZERO_INITIALIZER;
+
+	nstime_update(&now);
+	uint64_t ns = nstime_ns(&now) - nstime_ns(&log_start_timestamp);
+	emitter_json_kv(emitter, "duration", emitter_type_uint64, &ns);
+
+	char *vers = JEMALLOC_VERSION;
+	emitter_json_kv(emitter, "version",
+	    emitter_type_string, &vers);
+
+	emitter_json_kv(emitter, "lg_sample_rate",
+	    emitter_type_int, &lg_prof_sample);
+
+	int pid = prof_getpid();
+	emitter_json_kv(emitter, "pid", emitter_type_int, &pid);
+
+	emitter_json_object_end(emitter);
+}
+
+
+bool
+prof_log_stop(tsdn_t *tsdn) {
+	if (!opt_prof || !prof_booted) {
+		return true;
+	}
+
+	tsd_t *tsd = tsdn_tsd(tsdn);
+	malloc_mutex_lock(tsdn, &log_mtx);
+
+	if (prof_logging_state != prof_logging_state_started) {
+		malloc_mutex_unlock(tsdn, &log_mtx);
+		return true;
+	}
+
+	/*
+	 * Set the state to dumping. We'll set it to stopped when we're done.
+	 * Since other threads won't be able to start/stop/log when the state is
+	 * dumping, we don't have to hold the lock during the whole method.
+	 */
+	prof_logging_state = prof_logging_state_dumping;
+	malloc_mutex_unlock(tsdn, &log_mtx);
+
+
+	emitter_t emitter;
+
+	/* Create a file. */
+	int fd = creat(log_filename, 0644);
+
+	if (fd == -1) {
+		malloc_printf("<jemalloc>: creat() for log file \"%s\" "
+			      " failed with %d\n", log_filename, errno);
+		if (opt_abort) {
+			abort();
+		}
+		return true;
+	}
+
+	/* Emit to json. */
+	struct prof_emitter_cb_arg_s arg;
+	arg.fd = fd;
+	emitter_init(&emitter, emitter_output_json, &prof_emitter_write_cb,
+	    (void *)(&arg));
+
+	emitter_json_object_begin(&emitter);
+	prof_log_emit_metadata(&emitter);
+	prof_log_emit_threads(tsd, &emitter);
+	prof_log_emit_traces(tsd, &emitter);
+	prof_log_emit_allocs(tsd, &emitter);
+	emitter_json_object_end(&emitter);
+
+	/* Reset global state. */
+	if (log_tables_initialized) {
+		ckh_delete(tsd, &log_bt_node_set);
+		ckh_delete(tsd, &log_thr_node_set);
+	}
+	log_tables_initialized = false;
+	log_bt_index = 0;
+	log_thr_index = 0;
+	log_bt_first = NULL;
+	log_bt_last = NULL;
+	log_thr_first = NULL;
+	log_thr_last = NULL;
+	log_alloc_first = NULL;
+	log_alloc_last = NULL;
+
+	malloc_mutex_lock(tsdn, &log_mtx);
+	prof_logging_state = prof_logging_state_stopped;
+	malloc_mutex_unlock(tsdn, &log_mtx);
+
+	return close(fd);
+}
+
 const char *
 prof_thread_name_get(tsd_t *tsd) {
 	prof_tdata_t *tdata;
@@ -2355,6 +2889,35 @@ prof_boot2(tsd_t *tsd) {
 			}
 		}
 
+		if (opt_prof_log) {
+			prof_log_start(tsd_tsdn(tsd), NULL);
+		}
+
+		if (atexit(prof_log_stop_final) != 0) {
+			malloc_write("<jemalloc>: Error in atexit() "
+				     "for logging\n");
+			if (opt_abort) {
+				abort();
+			}
+		}
+
+		if (malloc_mutex_init(&log_mtx, "prof_log",
+		    WITNESS_RANK_PROF_LOG, malloc_mutex_rank_exclusive)) {
+			return true;
+		}
+
+		if (ckh_new(tsd, &log_bt_node_set, PROF_CKH_MINITEMS,
+		    prof_bt_node_hash, prof_bt_node_keycomp)) {
+			return true;
+		}
+
+		if (ckh_new(tsd, &log_thr_node_set, PROF_CKH_MINITEMS,
+		    prof_thr_node_hash, prof_thr_node_keycomp)) {
+			return true;
+		}
+
+		log_tables_initialized = true;
+
 		gctx_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
 		    b0get(), PROF_NCTX_LOCKS * sizeof(malloc_mutex_t),
 		    CACHELINE);

From 5e23f96dd4e4ff2847a85d44a01b66e4ed2da21f Mon Sep 17 00:00:00 2001
From: Tyler Etzel <tyleretzel1@gmail.com>
Date: Tue, 3 Jul 2018 11:10:09 -0700
Subject: [PATCH 1174/2608] Add unit tests for logging

---
 Makefile.in                              |   1 +
 include/jemalloc/internal/prof_externs.h |  13 +-
 src/prof.c                               | 122 ++++++++++++++++++-
 test/unit/prof_log.c                     | 146 +++++++++++++++++++++++
 test/unit/prof_log.sh                    |   5 +
 5 files changed, 284 insertions(+), 3 deletions(-)
 create mode 100644 test/unit/prof_log.c
 create mode 100644 test/unit/prof_log.sh

diff --git a/Makefile.in b/Makefile.in
index 8b2f5ca6..49585ed9 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -194,6 +194,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/prof_active.c \
 	$(srcroot)test/unit/prof_gdump.c \
 	$(srcroot)test/unit/prof_idump.c \
+	$(srcroot)test/unit/prof_log.c \
 	$(srcroot)test/unit/prof_reset.c \
 	$(srcroot)test/unit/prof_tctx.c \
 	$(srcroot)test/unit/prof_thread_name.c \
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 74315ce5..094f3e17 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -74,8 +74,6 @@ void prof_reset(tsd_t *tsd, size_t lg_sample);
 void prof_tdata_cleanup(tsd_t *tsd);
 bool prof_active_get(tsdn_t *tsdn);
 bool prof_active_set(tsdn_t *tsdn, bool active);
-bool prof_log_start(tsdn_t *tsdn, const char *filename);
-bool prof_log_stop(tsdn_t *tsdn);
 const char *prof_thread_name_get(tsd_t *tsd);
 int prof_thread_name_set(tsd_t *tsd, const char *thread_name);
 bool prof_thread_active_get(tsd_t *tsd);
@@ -93,4 +91,15 @@ void prof_postfork_parent(tsdn_t *tsdn);
 void prof_postfork_child(tsdn_t *tsdn);
 void prof_sample_threshold_update(prof_tdata_t *tdata);
 
+bool prof_log_start(tsdn_t *tsdn, const char *filename);
+bool prof_log_stop(tsdn_t *tsdn);
+#ifdef JEMALLOC_JET
+size_t prof_log_bt_count(void);
+size_t prof_log_alloc_count(void);
+size_t prof_log_thr_count(void);
+bool prof_log_is_logging(void);
+bool prof_log_rep_check(void);
+void prof_log_dummy_set(bool new_value);
+#endif
+
 #endif /* JEMALLOC_INTERNAL_PROF_EXTERNS_H */
diff --git a/src/prof.c b/src/prof.c
index 21421c04..458c6cd0 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -86,6 +86,10 @@ enum prof_logging_state_e {
  */
 prof_logging_state_t prof_logging_state = prof_logging_state_stopped;
 
+#ifdef JEMALLOC_JET
+static bool prof_log_dummy = false;
+#endif
+
 /* Incremented for every log file that is output. */
 static uint64_t log_seq = 0;
 static char log_filename[
@@ -2407,6 +2411,102 @@ prof_active_set(tsdn_t *tsdn, bool active) {
 	return prof_active_old;
 }
 
+#ifdef JEMALLOC_JET
+size_t
+prof_log_bt_count(void) {
+	size_t cnt = 0;
+	prof_bt_node_t *node = log_bt_first;
+	while (node != NULL) {
+		cnt++;
+		node = node->next;
+	}
+	return cnt;
+}
+
+size_t
+prof_log_alloc_count(void) {
+	size_t cnt = 0;
+	prof_alloc_node_t *node = log_alloc_first;
+	while (node != NULL) {
+		cnt++;
+		node = node->next;
+	}
+	return cnt;
+}
+
+size_t
+prof_log_thr_count(void) {
+	size_t cnt = 0;
+	prof_thr_node_t *node = log_thr_first;
+	while (node != NULL) {
+		cnt++;
+		node = node->next;
+	}
+	return cnt;
+}
+
+bool
+prof_log_is_logging(void) {
+	return prof_logging_state == prof_logging_state_started;
+}
+
+bool
+prof_log_rep_check(void) {
+	if (prof_logging_state == prof_logging_state_stopped
+	    && log_tables_initialized) {
+		return true;
+	}
+
+	if (log_bt_last != NULL && log_bt_last->next != NULL) {
+		return true;
+	}
+	if (log_thr_last != NULL && log_thr_last->next != NULL) {
+		return true;
+	}
+	if (log_alloc_last != NULL && log_alloc_last->next != NULL) {
+		return true;
+	}
+
+	size_t bt_count = prof_log_bt_count();
+	size_t thr_count = prof_log_thr_count();
+	size_t alloc_count = prof_log_alloc_count();
+
+
+	if (prof_logging_state == prof_logging_state_stopped) {
+		if (bt_count != 0 || thr_count != 0 || alloc_count || 0) {
+			return true;
+		}
+	}
+
+	prof_alloc_node_t *node = log_alloc_first;
+	while (node != NULL) {
+		if (node->alloc_bt_ind >= bt_count) {
+			return true;
+		}
+		if (node->free_bt_ind >= bt_count) {
+			return true;
+		}
+		if (node->alloc_thr_ind >= thr_count) {
+			return true;
+		}
+		if (node->free_thr_ind >= thr_count) {
+			return true;
+		}
+		if (node->alloc_time_ns > node->free_time_ns) {
+			return true;
+		}
+		node = node->next;
+	}
+
+	return false;
+}
+
+void
+prof_log_dummy_set(bool new_value) {
+	prof_log_dummy = new_value;
+}
+#endif
+
 bool
 prof_log_start(tsdn_t *tsdn, const char *filename) {
 	if (!opt_prof || !prof_booted) {
@@ -2459,6 +2559,11 @@ prof_emitter_write_cb(void *opaque, const char *to_write) {
 	struct prof_emitter_cb_arg_s *arg =
 	    (struct prof_emitter_cb_arg_s *)opaque;
 	size_t bytes = strlen(to_write);
+#ifdef JEMALLOC_JET
+	if (prof_log_dummy) {
+		return;
+	}
+#endif
 	arg->ret = write(arg->fd, (void *)to_write, bytes);
 }
 
@@ -2607,7 +2712,17 @@ prof_log_stop(tsdn_t *tsdn) {
 	emitter_t emitter;
 
 	/* Create a file. */
-	int fd = creat(log_filename, 0644);
+
+	int fd;
+#ifdef JEMALLOC_JET
+	if (prof_log_dummy) {
+		fd = 0;
+	} else {
+		fd = creat(log_filename, 0644);
+	}
+#else
+	fd = creat(log_filename, 0644);
+#endif
 
 	if (fd == -1) {
 		malloc_printf("<jemalloc>: creat() for log file \"%s\" "
@@ -2650,6 +2765,11 @@ prof_log_stop(tsdn_t *tsdn) {
 	prof_logging_state = prof_logging_state_stopped;
 	malloc_mutex_unlock(tsdn, &log_mtx);
 
+#ifdef JEMALLOC_JET
+	if (prof_log_dummy) {
+		return false;
+	}
+#endif
 	return close(fd);
 }
 
diff --git a/test/unit/prof_log.c b/test/unit/prof_log.c
new file mode 100644
index 00000000..6a3464b4
--- /dev/null
+++ b/test/unit/prof_log.c
@@ -0,0 +1,146 @@
+#include "test/jemalloc_test.h"
+
+#define N_PARAM 100
+#define N_THREADS 10
+
+static void assert_rep() {
+	assert_b_eq(prof_log_rep_check(), false, "Rep check failed");
+}
+
+static void assert_log_empty() {
+	assert_zu_eq(prof_log_bt_count(), 0,
+	    "The log has backtraces; it isn't empty");
+	assert_zu_eq(prof_log_thr_count(), 0,
+	    "The log has threads; it isn't empty");
+	assert_zu_eq(prof_log_alloc_count(), 0,
+	    "The log has allocations; it isn't empty");
+}
+
+void *buf[N_PARAM];
+
+static void f() {
+	int i;
+	for (i = 0; i < N_PARAM; i++) {
+		buf[i] = malloc(100);
+	}
+	for (i = 0; i < N_PARAM; i++) {
+		free(buf[i]);
+	}
+}
+
+TEST_BEGIN(test_prof_log_many_logs) {
+	int i;
+
+	test_skip_if(!config_prof);
+
+	for (i = 0; i < N_PARAM; i++) {
+		assert_b_eq(prof_log_is_logging(), false,
+		    "Logging shouldn't have started yet");
+		assert_d_eq(mallctl("prof.log_start", NULL, NULL, NULL, 0), 0,
+		    "Unexpected mallctl failure when starting logging");
+		assert_b_eq(prof_log_is_logging(), true,
+		    "Logging should be started by now");
+		assert_log_empty();
+		assert_rep();
+		f();
+		assert_zu_eq(prof_log_thr_count(), 1, "Wrong thread count");
+		assert_rep();
+		assert_b_eq(prof_log_is_logging(), true,
+		    "Logging should still be on");
+		assert_d_eq(mallctl("prof.log_stop", NULL, NULL, NULL, 0), 0,
+		    "Unexpected mallctl failure when stopping logging");
+		assert_b_eq(prof_log_is_logging(), false,
+		    "Logging should have turned off");
+	}
+}
+TEST_END
+
+thd_t thr_buf[N_THREADS];
+
+static void *f_thread(void *unused) {
+	int i;
+	for (i = 0; i < N_PARAM; i++) {
+		void *p = malloc(100);
+		memset(p, 100, sizeof(char));
+		free(p);
+	}
+
+	return NULL;
+}
+
+TEST_BEGIN(test_prof_log_many_threads) {
+
+	test_skip_if(!config_prof);
+
+	int i;
+	assert_d_eq(mallctl("prof.log_start", NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctl failure when starting logging");
+	for (i = 0; i < N_THREADS; i++) {
+		thd_create(&thr_buf[i], &f_thread, NULL);
+	}
+
+	for (i = 0; i < N_THREADS; i++) {
+		thd_join(thr_buf[i], NULL);
+	}
+	assert_zu_eq(prof_log_thr_count(), N_THREADS,
+	    "Wrong number of thread entries");
+	assert_rep();
+	assert_d_eq(mallctl("prof.log_stop", NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctl failure when stopping logging");
+}
+TEST_END
+
+static void f3() {
+	void *p = malloc(100);
+	free(p);
+}
+
+static void f1() {
+	void *p = malloc(100);
+	f3();
+	free(p);
+}
+
+static void f2() {
+	void *p = malloc(100);
+	free(p);
+}
+
+TEST_BEGIN(test_prof_log_many_traces) {
+
+	test_skip_if(!config_prof);
+
+	assert_d_eq(mallctl("prof.log_start", NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctl failure when starting logging");
+	int i;
+	assert_rep();
+	assert_log_empty();
+	for (i = 0; i < N_PARAM; i++) {
+		assert_rep();
+		f1();
+		assert_rep();
+		f2();
+		assert_rep();
+		f3();
+		assert_rep();
+	}
+	/*
+	 * There should be 8 total backtraces: two for malloc/free in f1(),
+	 * two for malloc/free in f2(), two for malloc/free in f3(), and then
+	 * two for malloc/free in f1()'s call to f3().
+	 */
+	assert_zu_eq(prof_log_bt_count(), 8,
+	    "Wrong number of backtraces given sample workload");
+	assert_d_eq(mallctl("prof.log_stop", NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctl failure when stopping logging");
+}
+TEST_END
+
+int
+main(void) {
+	prof_log_dummy_set(true);
+	return test_no_reentrancy(
+	    test_prof_log_many_logs,
+	    test_prof_log_many_traces,
+	    test_prof_log_many_threads);
+}
diff --git a/test/unit/prof_log.sh b/test/unit/prof_log.sh
new file mode 100644
index 00000000..8fcc7d8a
--- /dev/null
+++ b/test/unit/prof_log.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:true,lg_prof_sample:0"
+fi

From 33f1aa5badd2f9caf91991bab60df64a37c394bb Mon Sep 17 00:00:00 2001
From: Tyler Etzel <tyleretzel1@gmail.com>
Date: Mon, 30 Jul 2018 13:31:34 -0700
Subject: [PATCH 1175/2608] Fix comment on SC_NPSIZES.

---
 include/jemalloc/internal/sc.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/sc.h b/include/jemalloc/internal/sc.h
index 5b79bb47..ef0a4512 100644
--- a/include/jemalloc/internal/sc.h
+++ b/include/jemalloc/internal/sc.h
@@ -196,11 +196,7 @@
     (SC_LG_BASE_MAX - SC_LG_FIRST_REGULAR_BASE + 1) - 1)
 #define SC_NSIZES (SC_NTINY + SC_NPSEUDO + SC_NREGULAR)
 
-/*
- * The number of size classes that are at least a page in size. Note that
- * because delta may be smaller than a page, this is not the same as the number
- * of size classes that are *multiples* of the page size.
- */
+ /* The number of size classes that are a multiple of the page size. */
 #define SC_NPSIZES (							\
     /* Start with all the size classes. */				\
     SC_NSIZES								\

From c14e6c08192034d9140d61197d7c4981ca293610 Mon Sep 17 00:00:00 2001
From: Tyler Etzel <tyleretzel1@gmail.com>
Date: Tue, 31 Jul 2018 09:49:49 -0700
Subject: [PATCH 1176/2608] Add extents information to mallocstats output

- Show number/bytes of extents of each size that are dirty, muzzy, retained.
---
 doc/jemalloc.xml.in                        |  33 +++++-
 include/jemalloc/internal/arena_externs.h  |   3 +-
 include/jemalloc/internal/arena_stats.h    |  16 +++
 include/jemalloc/internal/ctl.h            |   1 +
 include/jemalloc/internal/extent_externs.h |   4 +
 include/jemalloc/internal/extent_structs.h |   2 +
 include/jemalloc/internal/stats.h          |   3 +-
 src/arena.c                                |  25 ++++-
 src/ctl.c                                  |  82 ++++++++++++++-
 src/extent.c                               |  36 +++++++
 src/stats.c                                | 117 +++++++++++++++++++--
 11 files changed, 307 insertions(+), 15 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 0dcfb98d..08d48303 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -433,10 +433,11 @@ for (i = 0; i < nbins; i++) {
       arena statistics, respectively; <quote>b</quote> and <quote>l</quote> can
       be specified to omit per size class statistics for bins and large objects,
       respectively; <quote>x</quote> can be specified to omit all mutex
-      statistics.  Unrecognized characters are silently ignored.  Note that
-      thread caching may prevent some statistics from being completely up to
-      date, since extra locking would be required to merge counters that track
-      thread cache operations.</para>
+      statistics; <quote>e</quote> can be used to omit extent statistics.
+      Unrecognized characters are silently ignored.  Note that thread caching
+      may prevent some statistics from being completely up to date, since extra
+      locking would be required to merge counters that track thread cache
+      operations.</para>
 
       <para>The <function>malloc_usable_size()</function> function
       returns the usable size of the allocation pointed to by
@@ -2925,6 +2926,30 @@ struct extent_hooks_s {
         counters</link>.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="stats.arenas.i.extents.n">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.extents.&lt;j&gt;.n{extent_type}</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para> Number of extents of the given type in this arena in
+	the bucket corresponding to page size index &lt;j&gt;. The extent type
+	is one of dirty, muzzy, or retained.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="stats.arenas.i.extents.bytes">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.extents.&lt;j&gt;.{extent_type}_bytes</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+	<listitem><para> Sum of the bytes managed by extents of the given type
+	in this arena in the bucket corresponding to page size index &lt;j&gt;.
+	The extent type is one of dirty, muzzy, or retained.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="stats.arenas.i.lextents.j.nmalloc">
         <term>
           <mallctl>stats.arenas.&lt;i&gt;.lextents.&lt;j&gt;.nmalloc</mallctl>
diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 4f744cac..073e587d 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -25,7 +25,8 @@ void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
 void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
-    bin_stats_t *bstats, arena_stats_large_t *lstats);
+    bin_stats_t *bstats, arena_stats_large_t *lstats,
+    arena_stats_extents_t *estats);
 void arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent);
 #ifdef JEMALLOC_JET
diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 6dacf74f..470ddfcd 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -50,6 +50,22 @@ struct arena_stats_decay_s {
 	arena_stats_u64_t	purged;
 };
 
+typedef struct arena_stats_extents_s arena_stats_extents_t;
+struct arena_stats_extents_s {
+	/*
+	 * Stats for a given index in the range [0, SC_NPSIZES] in an extents_t.
+	 * We track both bytes and # of extents: two extents in the same bucket
+	 * may have different sizes if adjacent size classes differ by more than
+	 * a page, so bytes cannot always be derived from # of extents.
+	 */
+	atomic_zu_t ndirty;
+	atomic_zu_t dirty_bytes;
+	atomic_zu_t nmuzzy;
+	atomic_zu_t muzzy_bytes;
+	atomic_zu_t nretained;
+	atomic_zu_t retained_bytes;
+};
+
 /*
  * Arena stats.  Note that fields marked "derived" are not directly maintained
  * within the arena code; rather their values are derived during stats merge
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index 5576310c..775fdec0 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -42,6 +42,7 @@ typedef struct ctl_arena_stats_s {
 
 	bin_stats_t bstats[SC_NBINS];
 	arena_stats_large_t lstats[SC_NSIZES - SC_NBINS];
+	arena_stats_extents_t estats[SC_NPSIZES];
 } ctl_arena_stats_t;
 
 typedef struct ctl_stats_s {
diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index b8a4d026..8680251a 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -31,6 +31,10 @@ bool extents_init(tsdn_t *tsdn, extents_t *extents, extent_state_t state,
     bool delay_coalesce);
 extent_state_t extents_state_get(const extents_t *extents);
 size_t extents_npages_get(extents_t *extents);
+/* Get the number of extents in the given page size index. */
+size_t extents_nextents_get(extents_t *extents, pszind_t ind);
+/* Get the sum total bytes of the extents in the given page size index. */
+size_t extents_nbytes_get(extents_t *extents, pszind_t ind);
 extent_t *extents_alloc(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extents_t *extents, void *new_addr,
     size_t size, size_t pad, size_t alignment, bool slab, szind_t szind,
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index d709577e..50e77bff 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -184,6 +184,8 @@ struct extents_s {
 	 * Synchronization: mtx.
 	 */
 	extent_heap_t		heaps[SC_NPSIZES + 1];
+	atomic_zu_t		nextents[SC_NPSIZES + 1];
+	atomic_zu_t		nbytes[SC_NPSIZES + 1];
 
 	/*
 	 * Bitmap for which set bits correspond to non-empty heaps.
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index 852e3426..3b9e0eac 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -10,7 +10,8 @@
     OPTION('a',		unmerged,	config_stats,	false)		\
     OPTION('b',		bins,		true,		false)		\
     OPTION('l',		large,		true,		false)		\
-    OPTION('x',		mutex,		true,		false)
+    OPTION('x',		mutex,		true,		false)		\
+    OPTION('e',		extents,	true,		false)
 
 enum {
 #define OPTION(o, v, d, s) stats_print_option_num_##v,
diff --git a/src/arena.c b/src/arena.c
index da7fd78b..ab3f1386 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -82,7 +82,8 @@ void
 arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
-    bin_stats_t *bstats, arena_stats_large_t *lstats) {
+    bin_stats_t *bstats, arena_stats_large_t *lstats,
+    arena_stats_extents_t *estats) {
 	cassert(config_stats);
 
 	arena_basic_stats_merge(tsdn, arena, nthreads, dss, dirty_decay_ms,
@@ -153,6 +154,28 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		    curlextents * sz_index2size(SC_NBINS + i));
 	}
 
+	for (pszind_t i = 0; i < SC_NPSIZES; i++) {
+		size_t dirty, muzzy, retained, dirty_bytes, muzzy_bytes,
+		    retained_bytes;
+		dirty = extents_nextents_get(&arena->extents_dirty, i);
+		muzzy = extents_nextents_get(&arena->extents_muzzy, i);
+		retained = extents_nextents_get(&arena->extents_retained, i);
+		dirty_bytes = extents_nbytes_get(&arena->extents_dirty, i);
+		muzzy_bytes = extents_nbytes_get(&arena->extents_muzzy, i);
+		retained_bytes =
+		    extents_nbytes_get(&arena->extents_retained, i);
+
+		atomic_store_zu(&estats[i].ndirty, dirty, ATOMIC_RELAXED);
+		atomic_store_zu(&estats[i].nmuzzy, muzzy, ATOMIC_RELAXED);
+		atomic_store_zu(&estats[i].nretained, retained, ATOMIC_RELAXED);
+		atomic_store_zu(&estats[i].dirty_bytes, dirty_bytes,
+		    ATOMIC_RELAXED);
+		atomic_store_zu(&estats[i].muzzy_bytes, muzzy_bytes,
+		    ATOMIC_RELAXED);
+		atomic_store_zu(&estats[i].retained_bytes, retained_bytes,
+		    ATOMIC_RELAXED);
+	}
+
 	arena_stats_unlock(tsdn, &arena->stats);
 
 	/* tcache_bytes counts currently cached bytes. */
diff --git a/src/ctl.c b/src/ctl.c
index 448ec7bf..10bdc8ee 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -173,6 +173,13 @@ CTL_PROTO(stats_arenas_i_lextents_j_ndalloc)
 CTL_PROTO(stats_arenas_i_lextents_j_nrequests)
 CTL_PROTO(stats_arenas_i_lextents_j_curlextents)
 INDEX_PROTO(stats_arenas_i_lextents_j)
+CTL_PROTO(stats_arenas_i_extents_j_ndirty)
+CTL_PROTO(stats_arenas_i_extents_j_nmuzzy)
+CTL_PROTO(stats_arenas_i_extents_j_nretained)
+CTL_PROTO(stats_arenas_i_extents_j_dirty_bytes)
+CTL_PROTO(stats_arenas_i_extents_j_muzzy_bytes)
+CTL_PROTO(stats_arenas_i_extents_j_retained_bytes)
+INDEX_PROTO(stats_arenas_i_extents_j)
 CTL_PROTO(stats_arenas_i_nthreads)
 CTL_PROTO(stats_arenas_i_uptime)
 CTL_PROTO(stats_arenas_i_dss)
@@ -395,7 +402,6 @@ static const ctl_named_node_t	prof_node[] = {
 	{NAME("log_start"),	CTL(prof_log_start)},
 	{NAME("log_stop"),	CTL(prof_log_stop)}
 };
-
 static const ctl_named_node_t stats_arenas_i_small_node[] = {
 	{NAME("allocated"),	CTL(stats_arenas_i_small_allocated)},
 	{NAME("nmalloc"),	CTL(stats_arenas_i_small_nmalloc)},
@@ -466,6 +472,23 @@ static const ctl_indexed_node_t stats_arenas_i_lextents_node[] = {
 	{INDEX(stats_arenas_i_lextents_j)}
 };
 
+static const ctl_named_node_t stats_arenas_i_extents_j_node[] = {
+	{NAME("ndirty"),	CTL(stats_arenas_i_extents_j_ndirty)},
+	{NAME("nmuzzy"),	CTL(stats_arenas_i_extents_j_nmuzzy)},
+	{NAME("nretained"),	CTL(stats_arenas_i_extents_j_nretained)},
+	{NAME("dirty_bytes"),	CTL(stats_arenas_i_extents_j_dirty_bytes)},
+	{NAME("muzzy_bytes"),	CTL(stats_arenas_i_extents_j_muzzy_bytes)},
+	{NAME("retained_bytes"), CTL(stats_arenas_i_extents_j_retained_bytes)}
+};
+
+static const ctl_named_node_t super_stats_arenas_i_extents_j_node[] = {
+	{NAME(""),		CHILD(named, stats_arenas_i_extents_j)}
+};
+
+static const ctl_indexed_node_t stats_arenas_i_extents_node[] = {
+	{INDEX(stats_arenas_i_extents_j)}
+};
+
 #define OP(mtx)  MUTEX_PROF_DATA_NODE(arenas_i_mutexes_##mtx)
 MUTEX_PROF_ARENA_MUTEXES
 #undef OP
@@ -502,6 +525,7 @@ static const ctl_named_node_t stats_arenas_i_node[] = {
 	{NAME("large"),		CHILD(named, stats_arenas_i_large)},
 	{NAME("bins"),		CHILD(indexed, stats_arenas_i_bins)},
 	{NAME("lextents"),	CHILD(indexed, stats_arenas_i_lextents)},
+	{NAME("extents"),	CHILD(indexed, stats_arenas_i_extents)},
 	{NAME("mutexes"),	CHILD(named, stats_arenas_i_mutexes)}
 };
 static const ctl_named_node_t super_stats_arenas_i_node[] = {
@@ -718,6 +742,8 @@ ctl_arena_clear(ctl_arena_t *ctl_arena) {
 		    sizeof(bin_stats_t));
 		memset(ctl_arena->astats->lstats, 0, (SC_NSIZES - SC_NBINS) *
 		    sizeof(arena_stats_large_t));
+		memset(ctl_arena->astats->estats, 0, SC_NPSIZES *
+		    sizeof(arena_stats_extents_t));
 	}
 }
 
@@ -731,7 +757,7 @@ ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena) {
 		    &ctl_arena->muzzy_decay_ms, &ctl_arena->pactive,
 		    &ctl_arena->pdirty, &ctl_arena->pmuzzy,
 		    &ctl_arena->astats->astats, ctl_arena->astats->bstats,
-		    ctl_arena->astats->lstats);
+		    ctl_arena->astats->lstats, ctl_arena->astats->estats);
 
 		for (i = 0; i < SC_NBINS; i++) {
 			ctl_arena->astats->allocated_small +=
@@ -845,6 +871,7 @@ MUTEX_PROF_ARENA_MUTEXES
 			sdstats->astats.uptime = astats->astats.uptime;
 		}
 
+		/* Merge bin stats. */
 		for (i = 0; i < SC_NBINS; i++) {
 			sdstats->bstats[i].nmalloc += astats->bstats[i].nmalloc;
 			sdstats->bstats[i].ndalloc += astats->bstats[i].ndalloc;
@@ -871,6 +898,7 @@ MUTEX_PROF_ARENA_MUTEXES
 			    &astats->bstats[i].mutex_data);
 		}
 
+		/* Merge stats for large allocations. */
 		for (i = 0; i < SC_NSIZES - SC_NBINS; i++) {
 			ctl_accum_arena_stats_u64(&sdstats->lstats[i].nmalloc,
 			    &astats->lstats[i].nmalloc);
@@ -885,6 +913,22 @@ MUTEX_PROF_ARENA_MUTEXES
 				assert(astats->lstats[i].curlextents == 0);
 			}
 		}
+
+		/* Merge extents stats. */
+		for (i = 0; i < SC_NPSIZES; i++) {
+			accum_atomic_zu(&sdstats->estats[i].ndirty,
+			    &astats->estats[i].ndirty);
+			accum_atomic_zu(&sdstats->estats[i].nmuzzy,
+			    &astats->estats[i].nmuzzy);
+			accum_atomic_zu(&sdstats->estats[i].nretained,
+			    &astats->estats[i].nretained);
+			accum_atomic_zu(&sdstats->estats[i].dirty_bytes,
+			    &astats->estats[i].dirty_bytes);
+			accum_atomic_zu(&sdstats->estats[i].muzzy_bytes,
+			    &astats->estats[i].muzzy_bytes);
+			accum_atomic_zu(&sdstats->estats[i].retained_bytes,
+			    &astats->estats[i].retained_bytes);
+		}
 	}
 }
 
@@ -2918,6 +2962,40 @@ stats_arenas_i_lextents_j_index(tsdn_t *tsdn, const size_t *mib,
 	return super_stats_arenas_i_lextents_j_node;
 }
 
+CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_ndirty,
+    atomic_load_zu(
+        &arenas_i(mib[2])->astats->estats[mib[4]].ndirty,
+	ATOMIC_RELAXED), size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_nmuzzy,
+    atomic_load_zu(
+        &arenas_i(mib[2])->astats->estats[mib[4]].nmuzzy,
+	ATOMIC_RELAXED), size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_nretained,
+    atomic_load_zu(
+        &arenas_i(mib[2])->astats->estats[mib[4]].nretained,
+	ATOMIC_RELAXED), size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_dirty_bytes,
+    atomic_load_zu(
+        &arenas_i(mib[2])->astats->estats[mib[4]].dirty_bytes,
+	ATOMIC_RELAXED), size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_muzzy_bytes,
+    atomic_load_zu(
+        &arenas_i(mib[2])->astats->estats[mib[4]].muzzy_bytes,
+	ATOMIC_RELAXED), size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_retained_bytes,
+    atomic_load_zu(
+        &arenas_i(mib[2])->astats->estats[mib[4]].retained_bytes,
+	ATOMIC_RELAXED), size_t);
+
+static const ctl_named_node_t *
+stats_arenas_i_extents_j_index(tsdn_t *tsdn, const size_t *mib,
+    size_t miblen, size_t j) {
+	if (j >= SC_NPSIZES) {
+		return NULL;
+	}
+	return super_stats_arenas_i_extents_j_node;
+}
+
 static const ctl_named_node_t *
 stats_arenas_i_index(tsdn_t *tsdn, const size_t *mib,
     size_t miblen, size_t i) {
diff --git a/src/extent.c b/src/extent.c
index 592974a8..1af93bb5 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -309,6 +309,32 @@ extents_npages_get(extents_t *extents) {
 	return atomic_load_zu(&extents->npages, ATOMIC_RELAXED);
 }
 
+size_t
+extents_nextents_get(extents_t *extents, pszind_t pind) {
+	return atomic_load_zu(&extents->nextents[pind], ATOMIC_RELAXED);
+}
+
+size_t
+extents_nbytes_get(extents_t *extents, pszind_t pind) {
+	return atomic_load_zu(&extents->nbytes[pind], ATOMIC_RELAXED);
+}
+
+static void
+extents_stats_add(extents_t *extent, pszind_t pind, size_t sz) {
+	size_t cur = atomic_load_zu(&extent->nextents[pind], ATOMIC_RELAXED);
+	atomic_store_zu(&extent->nextents[pind], cur + 1, ATOMIC_RELAXED);
+	cur = atomic_load_zu(&extent->nbytes[pind], ATOMIC_RELAXED);
+	atomic_store_zu(&extent->nbytes[pind], cur + sz, ATOMIC_RELAXED);
+}
+
+static void
+extents_stats_sub(extents_t *extent, pszind_t pind, size_t sz) {
+	size_t cur = atomic_load_zu(&extent->nextents[pind], ATOMIC_RELAXED);
+	atomic_store_zu(&extent->nextents[pind], cur - 1, ATOMIC_RELAXED);
+	cur = atomic_load_zu(&extent->nbytes[pind], ATOMIC_RELAXED);
+	atomic_store_zu(&extent->nbytes[pind], cur - sz, ATOMIC_RELAXED);
+}
+
 static void
 extents_insert_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent) {
 	malloc_mutex_assert_owner(tsdn, &extents->mtx);
@@ -322,6 +348,11 @@ extents_insert_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent) {
 		    (size_t)pind);
 	}
 	extent_heap_insert(&extents->heaps[pind], extent);
+
+	if (config_stats) {
+		extents_stats_add(extents, pind, size);
+	}
+
 	extent_list_append(&extents->lru, extent);
 	size_t npages = size >> LG_PAGE;
 	/*
@@ -344,6 +375,11 @@ extents_remove_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent) {
 	size_t psz = extent_size_quantize_floor(size);
 	pszind_t pind = sz_psz2ind(psz);
 	extent_heap_remove(&extents->heaps[pind], extent);
+
+	if (config_stats) {
+		extents_stats_sub(extents, pind, size);
+	}
+
 	if (extent_heap_empty(&extents->heaps[pind])) {
 		bitmap_set(extents->bitmap, &extents_bitmap_info,
 		    (size_t)pind);
diff --git a/src/stats.c b/src/stats.c
index 64d73235..754b641e 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -494,6 +494,108 @@ stats_arena_lextents_print(emitter_t *emitter, unsigned i) {
 	}
 }
 
+static void
+stats_arena_extents_print(emitter_t *emitter, unsigned i) {
+	unsigned j;
+	bool in_gap, in_gap_prev;
+	emitter_row_t header_row;
+	emitter_row_init(&header_row);
+	emitter_row_t row;
+	emitter_row_init(&row);
+#define COL(name, left_or_right, col_width, etype)			\
+	emitter_col_t header_##name;					\
+	emitter_col_init(&header_##name, &header_row);			\
+	header_##name.justify = emitter_justify_##left_or_right;	\
+	header_##name.width = col_width;				\
+	header_##name.type = emitter_type_title;			\
+	header_##name.str_val = #name;					\
+									\
+	emitter_col_t col_##name;					\
+	emitter_col_init(&col_##name, &row);				\
+	col_##name.justify = emitter_justify_##left_or_right;		\
+	col_##name.width = col_width;					\
+	col_##name.type = emitter_type_##etype;
+
+	COL(size, right, 20, size)
+	COL(ind, right, 4, unsigned)
+	COL(ndirty, right, 13, size)
+	COL(dirty, right, 13, size)
+	COL(nmuzzy, right, 13, size)
+	COL(muzzy, right, 13, size)
+	COL(nretained, right, 13, size)
+	COL(retained, right, 13, size)
+	COL(ntotal, right, 13, size)
+	COL(total, right, 13, size)
+#undef COL
+
+	/* Label this section. */
+	header_size.width -= 8;
+	emitter_table_printf(emitter, "extents:");
+	emitter_table_row(emitter, &header_row);
+	emitter_json_array_kv_begin(emitter, "extents");
+
+	in_gap = false;
+	for (j = 0; j < SC_NPSIZES; j++) {
+		size_t ndirty, nmuzzy, nretained, total, dirty_bytes,
+		    muzzy_bytes, retained_bytes, total_bytes;
+		CTL_M2_M4_GET("stats.arenas.0.extents.0.ndirty", i, j,
+		    &ndirty, size_t);
+		CTL_M2_M4_GET("stats.arenas.0.extents.0.nmuzzy", i, j,
+		    &nmuzzy, size_t);
+		CTL_M2_M4_GET("stats.arenas.0.extents.0.nretained", i, j,
+		    &nretained, size_t);
+		CTL_M2_M4_GET("stats.arenas.0.extents.0.dirty_bytes", i, j,
+		    &dirty_bytes, size_t);
+		CTL_M2_M4_GET("stats.arenas.0.extents.0.muzzy_bytes", i, j,
+		    &muzzy_bytes, size_t);
+		CTL_M2_M4_GET("stats.arenas.0.extents.0.retained_bytes", i, j,
+		    &retained_bytes, size_t);
+		total = ndirty + nmuzzy + nretained;
+		total_bytes = dirty_bytes + muzzy_bytes + retained_bytes;
+
+		in_gap_prev = in_gap;
+		in_gap = (total == 0);
+
+		if (in_gap_prev && !in_gap) {
+			emitter_table_printf(emitter,
+			    "                     ---\n");
+		}
+
+		emitter_json_object_begin(emitter);
+		emitter_json_kv(emitter, "ndirty", emitter_type_size, &ndirty);
+		emitter_json_kv(emitter, "nmuzzy", emitter_type_size, &nmuzzy);
+		emitter_json_kv(emitter, "nretained", emitter_type_size,
+		    &nretained);
+
+		emitter_json_kv(emitter, "dirty_bytes", emitter_type_size,
+		    &dirty_bytes);
+		emitter_json_kv(emitter, "muzzy_bytes", emitter_type_size,
+		    &muzzy_bytes);
+		emitter_json_kv(emitter, "retained_bytes", emitter_type_size,
+		    &retained_bytes);
+		emitter_json_object_end(emitter);
+
+		col_size.size_val = sz_pind2sz(j);
+		col_ind.size_val = j;
+		col_ndirty.size_val = ndirty;
+		col_dirty.size_val = dirty_bytes;
+		col_nmuzzy.size_val = nmuzzy;
+		col_muzzy.size_val = muzzy_bytes;
+		col_nretained.size_val = nretained;
+		col_retained.size_val = retained_bytes;
+		col_ntotal.size_val = total;
+		col_total.size_val = total_bytes;
+
+		if (!in_gap) {
+			emitter_table_row(emitter, &row);
+		}
+	}
+	emitter_json_array_end(emitter); /* Close "extents". */
+	if (in_gap) {
+		emitter_table_printf(emitter, "                     ---\n");
+	}
+}
+
 static void
 stats_arena_mutexes_print(emitter_t *emitter, unsigned arena_ind) {
 	emitter_row_t row;
@@ -521,7 +623,7 @@ stats_arena_mutexes_print(emitter_t *emitter, unsigned arena_ind) {
 
 static void
 stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
-    bool mutex) {
+    bool mutex, bool extents) {
 	unsigned nthreads;
 	const char *dss;
 	ssize_t dirty_decay_ms, muzzy_decay_ms;
@@ -820,6 +922,9 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	if (large) {
 		stats_arena_lextents_print(emitter, i);
 	}
+	if (extents) {
+		stats_arena_extents_print(emitter, i);
+	}
 }
 
 static void
@@ -1066,7 +1171,7 @@ stats_general_print(emitter_t *emitter) {
 
 static void
 stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
-    bool unmerged, bool bins, bool large, bool mutex) {
+    bool unmerged, bool bins, bool large, bool mutex, bool extents) {
 	/*
 	 * These should be deleted.  We keep them around for a while, to aid in
 	 * the transition to the emitter code.
@@ -1187,7 +1292,7 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 			emitter_table_printf(emitter, "Merged arenas stats:\n");
 			emitter_json_object_kv_begin(emitter, "merged");
 			stats_arena_print(emitter, MALLCTL_ARENAS_ALL, bins,
-			    large, mutex);
+			    large, mutex, extents);
 			emitter_json_object_end(emitter); /* Close "merged". */
 		}
 
@@ -1198,7 +1303,7 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 			    "Destroyed arenas stats:\n");
 			emitter_json_object_kv_begin(emitter, "destroyed");
 			stats_arena_print(emitter, MALLCTL_ARENAS_DESTROYED,
-			    bins, large, mutex);
+			    bins, large, mutex, extents);
 			emitter_json_object_end(emitter); /* Close "destroyed". */
 		}
 
@@ -1214,7 +1319,7 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 					emitter_table_printf(emitter,
 					    "arenas[%s]:\n", arena_ind_str);
 					stats_arena_print(emitter, i, bins,
-					    large, mutex);
+					    large, mutex, extents);
 					/* Close "<arena-ind>". */
 					emitter_json_object_end(emitter);
 				}
@@ -1280,7 +1385,7 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	}
 	if (config_stats) {
 		stats_print_helper(&emitter, merged, destroyed, unmerged,
-		    bins, large, mutex);
+		    bins, large, mutex, extents);
 	}
 
 	emitter_json_object_end(&emitter); /* Closes the "jemalloc" dict. */

From 126252a7e6bd098d649f6a82a947c7c056816c2c Mon Sep 17 00:00:00 2001
From: Tyler Etzel <tyleretzel1@gmail.com>
Date: Wed, 1 Aug 2018 14:14:33 -0700
Subject: [PATCH 1177/2608] Add stats for the size of extent_avail heap

---
 doc/jemalloc.xml.in                         | 11 +++++++++++
 include/jemalloc/internal/arena_stats.h     |  3 +++
 include/jemalloc/internal/arena_structs_b.h |  1 +
 src/arena.c                                 |  4 ++++
 src/ctl.c                                   |  8 ++++++++
 src/extent.c                                |  2 ++
 src/stats.c                                 |  3 ++-
 7 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 08d48303..058e9db9 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -2597,6 +2597,17 @@ struct extent_hooks_s {
         details.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="stats.arenas.i.extent_avail">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.extent_avail</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Number of allocated (but unused) extent structs in this
+	arena.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="stats.arenas.i.base">
         <term>
           <mallctl>stats.arenas.&lt;i&gt;.base</mallctl>
diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 470ddfcd..ef1e25b3 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -87,6 +87,9 @@ struct arena_stats_s {
 	 */
 	atomic_zu_t		retained; /* Derived. */
 
+	/* Number of extent_t structs allocated by base, but not being used. */
+	atomic_zu_t		extent_avail;
+
 	arena_stats_decay_t	decay_dirty;
 	arena_stats_decay_t	decay_muzzy;
 
diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index 96f25f8a..509f11c1 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -196,6 +196,7 @@ struct arena_s {
 	 * Synchronization: extent_avail_mtx.
 	 */
 	extent_tree_t		extent_avail;
+	atomic_zu_t		extent_avail_cnt;
 	malloc_mutex_t		extent_avail_mtx;
 
 	/*
diff --git a/src/arena.c b/src/arena.c
index ab3f1386..29f447bb 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -100,6 +100,10 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	arena_stats_accum_zu(&astats->retained,
 	    extents_npages_get(&arena->extents_retained) << LG_PAGE);
 
+	atomic_store_zu(&astats->extent_avail,
+	    atomic_load_zu(&arena->extent_avail_cnt, ATOMIC_RELAXED),
+	    ATOMIC_RELAXED);
+
 	arena_stats_accum_u64(&astats->decay_dirty.npurge,
 	    arena_stats_read_u64(tsdn, &arena->stats,
 	    &arena->stats.decay_dirty.npurge));
diff --git a/src/ctl.c b/src/ctl.c
index 10bdc8ee..b482fc56 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -190,6 +190,7 @@ CTL_PROTO(stats_arenas_i_pdirty)
 CTL_PROTO(stats_arenas_i_pmuzzy)
 CTL_PROTO(stats_arenas_i_mapped)
 CTL_PROTO(stats_arenas_i_retained)
+CTL_PROTO(stats_arenas_i_extent_avail)
 CTL_PROTO(stats_arenas_i_dirty_npurge)
 CTL_PROTO(stats_arenas_i_dirty_nmadvise)
 CTL_PROTO(stats_arenas_i_dirty_purged)
@@ -510,6 +511,7 @@ static const ctl_named_node_t stats_arenas_i_node[] = {
 	{NAME("pmuzzy"),	CTL(stats_arenas_i_pmuzzy)},
 	{NAME("mapped"),	CTL(stats_arenas_i_mapped)},
 	{NAME("retained"),	CTL(stats_arenas_i_retained)},
+	{NAME("extent_avail"),	CTL(stats_arenas_i_extent_avail)},
 	{NAME("dirty_npurge"),	CTL(stats_arenas_i_dirty_npurge)},
 	{NAME("dirty_nmadvise"), CTL(stats_arenas_i_dirty_nmadvise)},
 	{NAME("dirty_purged"),	CTL(stats_arenas_i_dirty_purged)},
@@ -804,6 +806,8 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 			    &astats->astats.mapped);
 			accum_atomic_zu(&sdstats->astats.retained,
 			    &astats->astats.retained);
+			accum_atomic_zu(&sdstats->astats.extent_avail,
+			    &astats->astats.extent_avail);
 		}
 
 		ctl_accum_arena_stats_u64(&sdstats->astats.decay_dirty.npurge,
@@ -2764,6 +2768,10 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_mapped,
 CTL_RO_CGEN(config_stats, stats_arenas_i_retained,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.retained, ATOMIC_RELAXED),
     size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_extent_avail,
+    atomic_load_zu(&arenas_i(mib[2])->astats->astats.extent_avail,
+        ATOMIC_RELAXED),
+    size_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_npurge,
     ctl_arena_stats_read_u64(
diff --git a/src/extent.c b/src/extent.c
index 1af93bb5..847e4b99 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -186,6 +186,7 @@ extent_alloc(tsdn_t *tsdn, arena_t *arena) {
 		return base_alloc_extent(tsdn, arena->base);
 	}
 	extent_avail_remove(&arena->extent_avail, extent);
+	atomic_fetch_sub_zu(&arena->extent_avail_cnt, 1, ATOMIC_RELAXED);
 	malloc_mutex_unlock(tsdn, &arena->extent_avail_mtx);
 	return extent;
 }
@@ -194,6 +195,7 @@ void
 extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
 	malloc_mutex_lock(tsdn, &arena->extent_avail_mtx);
 	extent_avail_insert(&arena->extent_avail, extent);
+	atomic_fetch_add_zu(&arena->extent_avail_cnt, 1, ATOMIC_RELAXED);
 	malloc_mutex_unlock(tsdn, &arena->extent_avail_mtx);
 }
 
diff --git a/src/stats.c b/src/stats.c
index 754b641e..e4e13378 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -628,7 +628,7 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	const char *dss;
 	ssize_t dirty_decay_ms, muzzy_decay_ms;
 	size_t page, pactive, pdirty, pmuzzy, mapped, retained;
-	size_t base, internal, resident, metadata_thp;
+	size_t base, internal, resident, metadata_thp, extent_avail;
 	uint64_t dirty_npurge, dirty_nmadvise, dirty_purged;
 	uint64_t muzzy_npurge, muzzy_nmadvise, muzzy_purged;
 	size_t small_allocated;
@@ -911,6 +911,7 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	GET_AND_EMIT_MEM_STAT(metadata_thp)
 	GET_AND_EMIT_MEM_STAT(tcache_bytes)
 	GET_AND_EMIT_MEM_STAT(resident)
+	GET_AND_EMIT_MEM_STAT(extent_avail)
 #undef GET_AND_EMIT_MEM_STAT
 
 	if (mutex) {

From e8ec9528abac90efe4e0cc3a29da8d7aea59f23d Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 3 Aug 2018 12:47:40 -0700
Subject: [PATCH 1178/2608] Allow the use of readlinkat over readlink.

This can be useful in situations where readlink is disallowed.
---
 configure.ac                                     | 16 ++++++++++++++++
 .../internal/jemalloc_internal_defs.h.in         |  6 ++++++
 src/jemalloc.c                                   |  5 +++++
 3 files changed, 27 insertions(+)

diff --git a/configure.ac b/configure.ac
index e18bc4b2..cd5bdd64 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1277,6 +1277,22 @@ if test "x$enable_log" = "x1" ; then
 fi
 AC_SUBST([enable_log])
 
+dnl Do not use readlinkat by default
+AC_ARG_ENABLE([readlinkat],
+  [AS_HELP_STRING([--enable-readlinkat], [Use readlinkat over readlink])],
+[if test "x$enable_readlinkat" = "xno" ; then
+  enable_readlinkat="0"
+else
+  enable_readlinkat="1"
+fi
+],
+[enable_readlinkat="0"]
+)
+if test "x$enable_readlinkat" = "x1" ; then
+  AC_DEFINE([JEMALLOC_READLINKAT], [ ])
+fi
+AC_SUBST([enable_readlinkat])
+
 
 JE_COMPILABLE([a program using __builtin_unreachable], [
 void foo (void) {
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 8dad9a1d..cec41aa6 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -245,6 +245,12 @@
  */
 #undef JEMALLOC_LOG
 
+/*
+ * If defined, use readlinkat() (instead of readlink()) to follow
+ * /etc/malloc_conf.
+ */
+#undef JEMALLOC_READLINKAT
+
 /*
  * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
  */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index e8f110f7..2828c175 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -961,7 +961,12 @@ malloc_conf_init(sc_data_t *sc_data) {
 			 * Try to use the contents of the "/etc/malloc.conf"
 			 * symbolic link's name.
 			 */
+#ifndef JEMALLOC_READLINKAT
 			linklen = readlink(linkname, buf, sizeof(buf) - 1);
+#else
+			linklen = readlinkat(AT_FDCWD, linkname, buf,
+			    sizeof(buf) - 1);
+#endif
 			if (linklen == -1) {
 				/* No configuration specified. */
 				linklen = 0;

From 0771ff2cea6dc18fcd3f6bf452b4224a4e17ae38 Mon Sep 17 00:00:00 2001
From: David Carlier <devnexen@gmail.com>
Date: Sun, 5 Aug 2018 10:37:53 +0100
Subject: [PATCH 1179/2608] FreeBSD build changes and allow to run the tests.

---
 include/jemalloc/internal/mutex.h | 13 ++++++++++---
 scripts/gen_run_tests.py          | 25 +++++++++++++++++++------
 src/pages.c                       |  2 --
 3 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index 651ce5f9..5a955d9e 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -115,9 +115,16 @@ struct malloc_mutex_s {
      {{{LOCK_PROF_DATA_INITIALIZER, 0}},				\
       WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
-#  define MALLOC_MUTEX_INITIALIZER					\
-     {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, NULL}},	\
-         WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
+#  if (defined(JEMALLOC_DEBUG))
+#     define MALLOC_MUTEX_INITIALIZER					\
+       {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, NULL}},	\
+           WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0}
+#  else
+#     define MALLOC_MUTEX_INITIALIZER					\
+       {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, NULL}},	\
+           WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
+#  endif
+
 #else
 #    define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT
 #  if defined(JEMALLOC_DEBUG)
diff --git a/scripts/gen_run_tests.py b/scripts/gen_run_tests.py
index a87ecffb..5052b3e0 100755
--- a/scripts/gen_run_tests.py
+++ b/scripts/gen_run_tests.py
@@ -4,6 +4,7 @@ import sys
 from itertools import combinations
 from os import uname
 from multiprocessing import cpu_count
+from subprocess import call
 
 # Later, we want to test extended vaddr support.  Apparently, the "real" way of
 # checking this is flaky on OS X.
@@ -13,13 +14,25 @@ nparallel = cpu_count() * 2
 
 uname = uname()[0]
 
+if "BSD" in uname:
+    make_cmd = 'gmake'
+else:
+    make_cmd = 'make'
+
 def powerset(items):
     result = []
     for i in xrange(len(items) + 1):
         result += combinations(items, i)
     return result
 
-possible_compilers = [('gcc', 'g++'), ('clang', 'clang++')]
+possible_compilers = []
+for cc, cxx in (['gcc', 'g++'], ['clang', 'clang++']):
+    try:
+        cmd_ret = call([cc, "-v"])
+        if cmd_ret == 0:
+            possible_compilers.append((cc, cxx))
+    except:
+        pass
 possible_compiler_opts = [
     '-m32',
 ]
@@ -39,7 +52,7 @@ possible_malloc_conf_opts = [
 ]
 
 print 'set -e'
-print 'if [ -f Makefile ] ; then make relclean ; fi'
+print 'if [ -f Makefile ] ; then %(make_cmd)s relclean ; fi' % {'make_cmd': make_cmd}
 print 'autoconf'
 print 'rm -rf run_tests.out'
 print 'mkdir run_tests.out'
@@ -102,11 +115,11 @@ cd run_test_%(ind)d.out
 echo "==> %(config_line)s" >> run_test.log
 %(config_line)s >> run_test.log 2>&1 || abort
 
-run_cmd make all tests
-run_cmd make check
-run_cmd make distclean
+run_cmd %(make_cmd)s all tests
+run_cmd %(make_cmd)s check
+run_cmd %(make_cmd)s distclean
 EOF
-chmod 755 run_test_%(ind)d.sh""" % {'ind': ind, 'config_line': config_line}
+chmod 755 run_test_%(ind)d.sh""" % {'ind': ind, 'config_line': config_line, 'make_cmd': make_cmd}
                     ind += 1
 
 print 'for i in `seq 0 %(last_ind)d` ; do echo run_test_${i}.sh ; done | xargs -P %(nparallel)d -n 1 sh' % {'last_ind': ind-1, 'nparallel': nparallel}
diff --git a/src/pages.c b/src/pages.c
index cc967fcf..9561eb36 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -390,8 +390,6 @@ os_page_detect(void) {
 	SYSTEM_INFO si;
 	GetSystemInfo(&si);
 	return si.dwPageSize;
-#elif defined(__FreeBSD__)
-	return getpagesize();
 #else
 	long result = sysconf(_SC_PAGESIZE);
 	if (result == -1) {

From 1f71e1ca4319de7788d53d1d0ba905995c7f52bd Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 1 Aug 2018 14:22:05 -0700
Subject: [PATCH 1180/2608] Add hook microbenchmark.

---
 Makefile.in             |  4 ++-
 test/stress/hookbench.c | 73 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 76 insertions(+), 1 deletion(-)
 create mode 100644 test/stress/hookbench.c

diff --git a/Makefile.in b/Makefile.in
index 49585ed9..c35bb7ed 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -242,7 +242,9 @@ else
 CPP_SRCS :=
 TESTS_INTEGRATION_CPP :=
 endif
-TESTS_STRESS := $(srcroot)test/stress/microbench.c
+TESTS_STRESS := $(srcroot)test/stress/microbench.c \
+	$(srcroot)test/stress/hookbench.c
+
 
 TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_INTEGRATION_CPP) $(TESTS_STRESS)
 
diff --git a/test/stress/hookbench.c b/test/stress/hookbench.c
new file mode 100644
index 00000000..97e90b0e
--- /dev/null
+++ b/test/stress/hookbench.c
@@ -0,0 +1,73 @@
+#include "test/jemalloc_test.h"
+
+static void
+noop_alloc_hook(void *extra, hook_alloc_t type, void *result,
+    uintptr_t result_raw, uintptr_t args_raw[3]) {
+}
+
+static void
+noop_dalloc_hook(void *extra, hook_dalloc_t type, void *address,
+    uintptr_t args_raw[3]) {
+}
+
+static void
+noop_expand_hook(void *extra, hook_expand_t type, void *address,
+    size_t old_usize, size_t new_usize, uintptr_t result_raw,
+    uintptr_t args_raw[4]) {
+}
+
+static void
+malloc_free_loop(int iters) {
+	for (int i = 0; i < iters; i++) {
+		void *p = mallocx(1, 0);
+		free(p);
+	}
+}
+
+static void
+test_hooked(int iters) {
+	hooks_t hooks = {&noop_alloc_hook, &noop_dalloc_hook, &noop_expand_hook,
+		NULL};
+
+	int err;
+	void *handles[HOOK_MAX];
+	size_t sz = sizeof(handles[0]);
+
+	for (int i = 0; i < HOOK_MAX; i++) {
+		err = mallctl("experimental.hooks.install", &handles[i],
+		    &sz, &hooks, sizeof(hooks));
+		assert(err == 0);
+
+		timedelta_t timer;
+		timer_start(&timer);
+		malloc_free_loop(iters);
+		timer_stop(&timer);
+		malloc_printf("With %d hook%s: %"FMTu64"us\n", i + 1,
+		    i + 1 == 1 ? "" : "s", timer_usec(&timer));
+	}
+	for (int i = 0; i < HOOK_MAX; i++) {
+		err = mallctl("experimental.hooks.remove", NULL, NULL,
+		    &handles[i], sizeof(handles[i]));
+		assert(err == 0);
+	}
+}
+
+static void
+test_unhooked(int iters) {
+	timedelta_t timer;
+	timer_start(&timer);
+	malloc_free_loop(iters);
+	timer_stop(&timer);
+
+	malloc_printf("Without hooks: %"FMTu64"us\n", timer_usec(&timer));
+}
+
+int
+main(void) {
+	/* Initialize */
+	free(mallocx(1, 0));
+	int iters = 10 * 1000 * 1000;
+	malloc_printf("Benchmarking hooks with %d iterations:\n", iters);
+	test_hooked(iters);
+	test_unhooked(iters);
+}

From 36eb0b3d77404f389cfddad6675fe1f479e76be7 Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Tue, 10 Jul 2018 13:58:37 +0200
Subject: [PATCH 1181/2608] Add valgrind build bots to CI

This commit adds two build-bots to CI that test the release builds
of jemalloc on linux and macOS under valgrind.

The macOS build is not enabled because valgrind reports
errors about reads of uninitialized memory in some tests and
segfaults in others.
---
 .travis.yml           |  9 +++++++++
 scripts/gen_travis.py | 20 ++++++++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index cd3be832..07d30815 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -119,9 +119,18 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    # Development build
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
 
+    # Valgrind
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds" JEMALLOC_TEST_PREFIX="valgrind"
+      addons:
+        apt:
+          packages:
+            - valgrind
+
 
 before_script:
   - autoconf
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index 44732052..743f1e5d 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -113,8 +113,28 @@ for combination in unusual_combinations_to_test:
 
 # Development build
 include_rows += '''\
+    # Development build
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
 '''
 
+# Valgrind build bots
+include_rows += '''
+    # Valgrind
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds" JEMALLOC_TEST_PREFIX="valgrind"
+      addons:
+        apt:
+          packages:
+            - valgrind
+'''
+
+# To enable valgrind on macosx add:
+#
+#  - os: osx
+#    env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds" JEMALLOC_TEST_PREFIX="valgrind"
+#    install: brew install valgrind
+#
+# It currently fails due to: https://github.com/jemalloc/jemalloc/issues/1274
+
 print travis_template % include_rows

From 4c548a61c89b0472b9952fcc4090eb00c2a88870 Mon Sep 17 00:00:00 2001
From: Rajeev Misra <rajeev.misra@gmail.com>
Date: Fri, 10 Aug 2018 20:27:35 -0700
Subject: [PATCH 1182/2608] Bit_util: Use intrinsics for pow2_ceil, where
 available.

---
 include/jemalloc/internal/bit_util.h | 34 ++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/include/jemalloc/internal/bit_util.h b/include/jemalloc/internal/bit_util.h
index 521f71b3..27a8c97d 100644
--- a/include/jemalloc/internal/bit_util.h
+++ b/include/jemalloc/internal/bit_util.h
@@ -63,6 +63,22 @@ ffs_u32(uint32_t bitmap) {
 
 BIT_UTIL_INLINE uint64_t
 pow2_ceil_u64(uint64_t x) {
+#if (defined(__amd64__) || defined(__x86_64__) || defined(JEMALLOC_HAVE_BUILTIN_CLZ))
+	if(unlikely(x <= 1)) {
+		return x;
+	}
+	size_t msb_on_index;
+#if (defined(__amd64__) || defined(__x86_64__))
+	asm ("bsrq %1, %0"
+			: "=r"(msb_on_index) // Outputs.
+			: "r"(x-1)           // Inputs.
+		);
+#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ))
+	msb_on_index = (63 ^ __builtin_clzll(x - 1));
+#endif
+	assert(msb_on_index < 63);
+	return 1ULL << (msb_on_index + 1);
+#else
 	x--;
 	x |= x >> 1;
 	x |= x >> 2;
@@ -72,10 +88,27 @@ pow2_ceil_u64(uint64_t x) {
 	x |= x >> 32;
 	x++;
 	return x;
+#endif
 }
 
 BIT_UTIL_INLINE uint32_t
 pow2_ceil_u32(uint32_t x) {
+#if (defined(__i386__) || defined(JEMALLOC_HAVE_BUILTIN_CLZ))
+	if(unlikely(x <= 1)) {
+		return x;
+	}
+	size_t msb_on_index;
+#if (defined(__i386__))
+	asm ("bsr %1, %0"
+			: "=r"(msb_on_index) // Outputs.
+			: "r"(x-1)           // Inputs.
+		);
+#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ))
+	msb_on_index = (31 ^ __builtin_clz(x - 1));
+#endif
+	assert(msb_on_index < 31);
+	return 1U << (msb_on_index + 1);
+#else
 	x--;
 	x |= x >> 1;
 	x |= x >> 2;
@@ -84,6 +117,7 @@ pow2_ceil_u32(uint32_t x) {
 	x |= x >> 16;
 	x++;
 	return x;
+#endif
 }
 
 /* Compute the smallest power of 2 that is >= x. */

From 9f43defb6eac30c36dbde25d82e88be23f97309f Mon Sep 17 00:00:00 2001
From: rustyx <me@rustyx.org>
Date: Fri, 31 Aug 2018 15:45:47 +0200
Subject: [PATCH 1183/2608] Add sc.c to the MSVC project

---
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj         | 1 +
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters | 3 +++
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj         | 1 +
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters | 3 +++
 4 files changed, 8 insertions(+)

diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index be252d76..ddc6781c 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -59,6 +59,7 @@
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
+    <ClCompile Include="..\..\..\..\src\sc.c" />
     <ClCompile Include="..\..\..\..\src\stats.c" />
     <ClCompile Include="..\..\..\..\src\sz.c" />
     <ClCompile Include="..\..\..\..\src\tcache.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 00d09609..1dcf4ed5 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -70,6 +70,9 @@
     <ClCompile Include="..\..\..\..\src\rtree.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\sc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\stats.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 599cc42f..21481d5e 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -59,6 +59,7 @@
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
+    <ClCompile Include="..\..\..\..\src\sc.c" />
     <ClCompile Include="..\..\..\..\src\stats.c" />
     <ClCompile Include="..\..\..\..\src\sz.c" />
     <ClCompile Include="..\..\..\..\src\tcache.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index b352721c..466dc63f 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -70,6 +70,9 @@
     <ClCompile Include="..\..\..\..\src\rtree.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\sc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\stats.c">
       <Filter>Source Files</Filter>
     </ClCompile>

From 88771fa0138c75a2d29601cc33025d81822b082a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 12 Sep 2018 15:32:16 -0700
Subject: [PATCH 1184/2608] Bootstrapping: don't overwrite opt_prof_prefix.

---
 src/jemalloc.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 2828c175..15c0609f 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1387,13 +1387,18 @@ malloc_init_hard_a0_locked() {
 	 * out of sc_data_global are final.
 	 */
 	sc_boot(&sc_data);
+	/*
+	 * prof_boot0 only initializes opt_prof_prefix.  We need to do it before
+	 * we parse malloc_conf options, in case malloc_conf parsing overwrites
+	 * it.
+	 */
+	if (config_prof) {
+		prof_boot0();
+	}
 	malloc_conf_init(&sc_data);
 	sz_boot(&sc_data);
 	bin_boot(&sc_data);
 
-	if (config_prof) {
-		prof_boot0();
-	}
 	if (opt_stats_print) {
 		/* Print statistics at exit. */
 		if (atexit(stats_print_atexit) != 0) {

From 115ce93562ab76f90a2509bf0640bc7df6b2d48f Mon Sep 17 00:00:00 2001
From: Rajeev Misra <rajeev.misra@gmail.com>
Date: Thu, 23 Aug 2018 20:58:48 -0700
Subject: [PATCH 1185/2608] bit_util: Don't use __builtin_clz on s390x

There's an optimizer bug upstream that results in test failures; reported at
https://bugzilla.redhat.com/show_bug.cgi?id=1619354.  This works around the
failure reported at https://github.com/jemalloc/jemalloc/issues/1307.
---
 include/jemalloc/internal/bit_util.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/bit_util.h b/include/jemalloc/internal/bit_util.h
index 27a8c97d..8c59c39e 100644
--- a/include/jemalloc/internal/bit_util.h
+++ b/include/jemalloc/internal/bit_util.h
@@ -93,7 +93,7 @@ pow2_ceil_u64(uint64_t x) {
 
 BIT_UTIL_INLINE uint32_t
 pow2_ceil_u32(uint32_t x) {
-#if (defined(__i386__) || defined(JEMALLOC_HAVE_BUILTIN_CLZ))
+#if ((defined(__i386__) || defined(JEMALLOC_HAVE_BUILTIN_CLZ)) && (!defined(__s390__)))
 	if(unlikely(x <= 1)) {
 		return x;
 	}

From 676cdd66792ccb629a978837ea2a066d5db342cc Mon Sep 17 00:00:00 2001
From: Edward Tomasz Napierala <trasz@FreeBSD.org>
Date: Sat, 23 Jun 2018 05:44:23 +0100
Subject: [PATCH 1186/2608] Disable runtime detection of lazy purging support
 on FreeBSD.

The check doesn't seem to serve any purpose here, and this shaves
off three syscalls on binary startup.
---
 src/pages.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/pages.c b/src/pages.c
index 9561eb36..7ef3de75 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -586,6 +586,11 @@ pages_boot(void) {
 
 	init_thp_state();
 
+#ifdef __FreeBSD__
+	/*
+	 * FreeBSD doesn't need the check; madvise(2) is known to work.
+	 */
+#else
 	/* Detect lazy purge runtime support. */
 	if (pages_can_purge_lazy) {
 		bool committed = false;
@@ -599,6 +604,7 @@ pages_boot(void) {
 		}
 		os_pages_unmap(madv_free_page, PAGE);
 	}
+#endif
 
 	return false;
 }

From f80c97e477d1b3fe7778c65d9439d673738b4131 Mon Sep 17 00:00:00 2001
From: Edward Tomasz Napierala <trasz@FreeBSD.org>
Date: Sat, 23 Jun 2018 06:51:33 +0100
Subject: [PATCH 1187/2608] Rework the way jemalloc uses mmap(2) on FreeBSD.

This makes it directly use MAP_EXCL and MAP_ALIGNED() instead
of weird workarounds involving mapping at random places and then
unmapping parts of them.
---
 src/pages.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/src/pages.c b/src/pages.c
index 7ef3de75..88a9d630 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -180,6 +180,31 @@ pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
 	assert(alignment >= PAGE);
 	assert(ALIGNMENT_ADDR2BASE(addr, alignment) == addr);
 
+#if defined(__FreeBSD__) && defined(MAP_EXCL)
+	/*
+	 * FreeBSD has mechanisms both to mmap at specific address without
+	 * touching existing mappings, and to mmap with specific alignment.
+	 */
+	{
+		int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
+		int flags = mmap_flags;
+
+		if (addr != NULL) {
+			flags |= MAP_FIXED | MAP_EXCL;
+		} else {
+			unsigned alignment_bits = ffs_zu(alignment);
+			assert(alignment_bits > 1);
+			flags |= MAP_ALIGNED(alignment_bits - 1);
+		}
+
+		void *ret = mmap(addr, size, prot, flags, -1, 0);
+		if (ret == MAP_FAILED) {
+			ret = NULL;
+		}
+
+		return ret;
+	}
+#endif
 	/*
 	 * Ideally, there would be a way to specify alignment to mmap() (like
 	 * NetBSD has), but in the absence of such a feature, we have to work

From 856319dc8a3d15c3eddf83d106e01e6f63c349a7 Mon Sep 17 00:00:00 2001
From: jsteemann <jan@arangodb.com>
Date: Fri, 5 Oct 2018 01:29:19 +0200
Subject: [PATCH 1188/2608] check return value of `malloc_read_fd`

in case `malloc_read_fd` returns a negative error number, the result
would afterwards be casted to an unsigned size_t, and may have
theoretically caused an out-of-bounds memory access in the following
`strncmp` call.
---
 src/pages.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/pages.c b/src/pages.c
index 88a9d630..479a89e5 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -567,6 +567,10 @@ init_thp_state(void) {
 	close(fd);
 #endif
 
+        if (nread < 0) {
+		goto label_error; 
+        }
+
 	if (strncmp(buf, sys_state_madvise, (size_t)nread) == 0) {
 		init_system_thp_mode = thp_mode_default;
 	} else if (strncmp(buf, sys_state_always, (size_t)nread) == 0) {

From 09adf18f1aefcee71cc716f4f366c7e2e889b7fa Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Mon, 8 Oct 2018 12:29:57 -0700
Subject: [PATCH 1189/2608] Remove a branch from cache_bin_alloc_easy

Combine the branches for checking for an empty cache_bin, and
checking for the low watermark.
---
 include/jemalloc/internal/cache_bin.h | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 12f3ef2d..40d942e5 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -88,11 +88,21 @@ JEMALLOC_ALWAYS_INLINE void *
 cache_bin_alloc_easy(cache_bin_t *bin, bool *success) {
 	void *ret;
 
-	if (unlikely(bin->ncached == 0)) {
-		bin->low_water = -1;
-		*success = false;
-		return NULL;
+	bin->ncached--;
+
+	/* 
+	 * Check for both bin->ncached == 0 and ncached < low_water
+	 * in a single branch.
+	 */
+	if (unlikely(bin->ncached <= bin->low_water)) {
+		bin->low_water = bin->ncached;
+		if (bin->ncached == -1) {
+			bin->ncached = 0;
+			*success = false;
+			return NULL;
+		}
 	}
+        
 	/*
 	 * success (instead of ret) should be checked upon the return of this
 	 * function.  We avoid checking (ret == NULL) because there is never a
@@ -101,12 +111,7 @@ cache_bin_alloc_easy(cache_bin_t *bin, bool *success) {
 	 * cacheline).
 	 */
 	*success = true;
-	ret = *(bin->avail - bin->ncached);
-	bin->ncached--;
-
-	if (unlikely(bin->ncached < bin->low_water)) {
-		bin->low_water = bin->ncached;
-	}
+	ret = *(bin->avail - (bin->ncached + 1));
 
 	return ret;
 }

From 9ed3bdc8484049bd304c771a1b10070d5d7c95db Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Tue, 9 Oct 2018 10:59:02 -0700
Subject: [PATCH 1190/2608] move bytes until sample to tsd.  Fastpath
 allocation does not need to load tdata now, avoiding several branches.

---
 include/jemalloc/internal/prof_inlines_b.h |  8 +++++---
 include/jemalloc/internal/prof_structs.h   |  1 -
 include/jemalloc/internal/tsd.h            |  2 ++
 src/prof.c                                 | 13 ++++++-------
 4 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 5e0b0642..bfc66f77 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -82,6 +82,7 @@ JEMALLOC_ALWAYS_INLINE bool
 prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
     prof_tdata_t **tdata_out) {
 	prof_tdata_t *tdata;
+	uint64_t bytes_until_sample;
 
 	cassert(config_prof);
 
@@ -98,9 +99,10 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
 		return true;
 	}
 
-	if (likely(tdata->bytes_until_sample >= usize)) {
-		if (update) {
-			tdata->bytes_until_sample -= usize;
+	bytes_until_sample = tsd_bytes_until_sample_get(tsd);
+	if (likely(bytes_until_sample >= usize)) {
+		if (update && tsd_nominal(tsd)) {
+			tsd_bytes_until_sample_set(tsd, bytes_until_sample - usize);
 		}
 		return true;
 	} else {
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index 0d58ae10..34ed4822 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -169,7 +169,6 @@ struct prof_tdata_s {
 
 	/* Sampling state. */
 	uint64_t		prng_state;
-	uint64_t		bytes_until_sample;
 
 	/* State used to avoid dumping while operating on prof internals. */
 	bool			enq;
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 59a18857..69fb05cb 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -68,6 +68,7 @@ typedef void (*test_callback_t)(int *);
     O(offset_state,		uint64_t,		uint64_t)	\
     O(thread_allocated,		uint64_t,		uint64_t)	\
     O(thread_deallocated,	uint64_t,		uint64_t)	\
+    O(bytes_until_sample,	uint64_t,		uint64_t)	\
     O(prof_tdata,		prof_tdata_t *,		prof_tdata_t *)	\
     O(rtree_ctx,		rtree_ctx_t,		rtree_ctx_t)	\
     O(iarena,			arena_t *,		arena_t *)	\
@@ -86,6 +87,7 @@ typedef void (*test_callback_t)(int *);
     0,									\
     0,									\
     0,									\
+    0,									\
     NULL,								\
     RTREE_CTX_ZERO_INITIALIZER,						\
     NULL,								\
diff --git a/src/prof.c b/src/prof.c
index 458c6cd0..83d492d4 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -1136,15 +1136,12 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 void
 prof_sample_threshold_update(prof_tdata_t *tdata) {
 #ifdef JEMALLOC_PROF
-	uint64_t r;
-	double u;
-
 	if (!config_prof) {
 		return;
 	}
 
 	if (lg_prof_sample == 0) {
-		tdata->bytes_until_sample = 0;
+		tsd_bytes_until_sample_set(tsd_fetch(), 0);
 		return;
 	}
 
@@ -1166,11 +1163,13 @@ prof_sample_threshold_update(prof_tdata_t *tdata) {
 	 *   pp 500
 	 *   (http://luc.devroye.org/rnbookindex.html)
 	 */
-	r = prng_lg_range_u64(&tdata->prng_state, 53);
-	u = (double)r * (1.0/9007199254740992.0L);
-	tdata->bytes_until_sample = (uint64_t)(log(u) /
+	uint64_t r = prng_lg_range_u64(&tdata->prng_state, 53);
+	double u = (double)r * (1.0/9007199254740992.0L);
+	uint64_t bytes_until_sample = (uint64_t)(log(u) /
 	    log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
 	    + (uint64_t)1U;
+	tsd_bytes_until_sample_set(tsd_fetch(), bytes_until_sample);
+
 #endif
 }
 

From 0ac524308d3f636d1a4b5149fa7adf24cf426d9c Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Tue, 9 Oct 2018 11:07:24 -0700
Subject: [PATCH 1191/2608] refactor prof accum, so that tdata is not loaded if
 we aren't going to sample.

---
 include/jemalloc/internal/prof_inlines_b.h | 28 +++++++++++-----------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index bfc66f77..b2f5a04e 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -86,6 +86,14 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
 
 	cassert(config_prof);
 
+	bytes_until_sample = tsd_bytes_until_sample_get(tsd);
+	if (likely(bytes_until_sample >= usize)) {
+		if (update && tsd_nominal(tsd)) {
+			tsd_bytes_until_sample_set(tsd, bytes_until_sample - usize);
+		}
+		return true;
+	}
+
 	tdata = prof_tdata_get(tsd, true);
 	if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) {
 		tdata = NULL;
@@ -99,22 +107,14 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
 		return true;
 	}
 
-	bytes_until_sample = tsd_bytes_until_sample_get(tsd);
-	if (likely(bytes_until_sample >= usize)) {
-		if (update && tsd_nominal(tsd)) {
-			tsd_bytes_until_sample_set(tsd, bytes_until_sample - usize);
-		}
+	if (tsd_reentrancy_level_get(tsd) > 0) {
 		return true;
-	} else {
-		if (tsd_reentrancy_level_get(tsd) > 0) {
-			return true;
-		}
-		/* Compute new sample threshold. */
-		if (update) {
-			prof_sample_threshold_update(tdata);
-		}
-		return !tdata->active;
 	}
+	/* Compute new sample threshold. */
+	if (update) {
+		prof_sample_threshold_update(tdata);
+	}
+	return !tdata->active;
 }
 
 JEMALLOC_ALWAYS_INLINE prof_tctx_t *

From d1a861fa80c66221be8c4d94e51128a4641809da Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Tue, 9 Oct 2018 11:16:19 -0700
Subject: [PATCH 1192/2608] add a check for SC_LARGE_MAXCLASS

If we assume SC_LARGE_MAXCLASS will always fit in a SSIZE_T, then we can
optimize some checks by unconditional subtraction, and then checking flags
only, without a compare statement in x86.
---
 src/sc.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/sc.c b/src/sc.c
index 8784bdd0..89ddb6ba 100644
--- a/src/sc.c
+++ b/src/sc.c
@@ -244,6 +244,15 @@ size_classes(
 	assert(sc_data->large_minclass == SC_LARGE_MINCLASS);
 	assert(sc_data->lg_large_minclass == SC_LG_LARGE_MINCLASS);
 	assert(sc_data->large_maxclass == SC_LARGE_MAXCLASS);
+
+	/* 
+	 * In the allocation fastpath, we want to assume that we can
+	 * unconditionally subtract the requested allocation size from
+	 * a ssize_t, and detect passing through 0 correctly.  This
+	 * results in optimal generated code.  For this to work, the
+	 * maximum allocation size must be less than SSIZE_MAX.
+	 */
+	assert(SC_LARGE_MAXCLASS < SSIZE_MAX);
 }
 
 void

From 997d86acc6d2cc632b79669ebf3f938290e9f5da Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Tue, 9 Oct 2018 11:25:36 -0700
Subject: [PATCH 1193/2608] restrict bytes_until_sample to int64_t.  This
 allows optimal asm generation of sub bytes_until_sample, usize; je; for x86
 arch. Subtraction is unconditional, and only flags are checked for the jump,
 no extra compare is necessary.  This also reduces register pressure.

---
 include/jemalloc/internal/prof_inlines_b.h | 14 +++++++++-----
 include/jemalloc/internal/tsd.h            |  2 +-
 src/prof.c                                 |  3 +++
 3 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index b2f5a04e..085111f4 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -82,17 +82,21 @@ JEMALLOC_ALWAYS_INLINE bool
 prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
     prof_tdata_t **tdata_out) {
 	prof_tdata_t *tdata;
-	uint64_t bytes_until_sample;
+	int64_t bytes_until_sample;
 
 	cassert(config_prof);
+	ssize_t check = update ? 0 : usize;
 
 	bytes_until_sample = tsd_bytes_until_sample_get(tsd);
-	if (likely(bytes_until_sample >= usize)) {
-		if (update && tsd_nominal(tsd)) {
-			tsd_bytes_until_sample_set(tsd, bytes_until_sample - usize);
+	if (update) {
+		bytes_until_sample -= usize;
+		if (tsd_nominal(tsd)) {
+			tsd_bytes_until_sample_set(tsd, bytes_until_sample);
 		}
-		return true;
 	}
+	if (likely(bytes_until_sample >= check)) {
+		return true;
+	} 
 
 	tdata = prof_tdata_get(tsd, true);
 	if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) {
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 69fb05cb..c931441b 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -68,7 +68,7 @@ typedef void (*test_callback_t)(int *);
     O(offset_state,		uint64_t,		uint64_t)	\
     O(thread_allocated,		uint64_t,		uint64_t)	\
     O(thread_deallocated,	uint64_t,		uint64_t)	\
-    O(bytes_until_sample,	uint64_t,		uint64_t)	\
+    O(bytes_until_sample,	int64_t,		int64_t)	\
     O(prof_tdata,		prof_tdata_t *,		prof_tdata_t *)	\
     O(rtree_ctx,		rtree_ctx_t,		rtree_ctx_t)	\
     O(iarena,			arena_t *,		arena_t *)	\
diff --git a/src/prof.c b/src/prof.c
index 83d492d4..71de2d34 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -1168,6 +1168,9 @@ prof_sample_threshold_update(prof_tdata_t *tdata) {
 	uint64_t bytes_until_sample = (uint64_t)(log(u) /
 	    log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
 	    + (uint64_t)1U;
+	if (bytes_until_sample > SSIZE_MAX) {
+		bytes_until_sample = SSIZE_MAX;
+	}
 	tsd_bytes_until_sample_set(tsd_fetch(), bytes_until_sample);
 
 #endif

From 325e3305fc7563600a710341d1f98cb8e04caaba Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Wed, 3 Oct 2018 14:47:31 -0700
Subject: [PATCH 1194/2608] remove malloc_init() off the fastpath

---
 .../internal/tsd_malloc_thread_cleanup.h      |  1 -
 include/jemalloc/internal/tsd_tls.h           |  1 -
 src/jemalloc.c                                | 19 ++++++++++++++++---
 src/tsd.c                                     | 12 +++++++-----
 4 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/include/jemalloc/internal/tsd_malloc_thread_cleanup.h b/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
index beb467a6..bf8801ef 100644
--- a/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
+++ b/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
@@ -47,7 +47,6 @@ tsd_get_allocates(void) {
 /* Get/set. */
 JEMALLOC_ALWAYS_INLINE tsd_t *
 tsd_get(bool init) {
-	assert(tsd_booted);
 	return &tsd_tls;
 }
 JEMALLOC_ALWAYS_INLINE void
diff --git a/include/jemalloc/internal/tsd_tls.h b/include/jemalloc/internal/tsd_tls.h
index 757aaa0e..f4f165c7 100644
--- a/include/jemalloc/internal/tsd_tls.h
+++ b/include/jemalloc/internal/tsd_tls.h
@@ -40,7 +40,6 @@ tsd_get_allocates(void) {
 /* Get/set. */
 JEMALLOC_ALWAYS_INLINE tsd_t *
 tsd_get(bool init) {
-	assert(tsd_booted);
 	return &tsd_tls;
 }
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 15c0609f..237bfe7c 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2110,9 +2110,8 @@ label_invalid_alignment:
 	return EINVAL;
 }
 
-/* Returns the errno-style error code of the allocation. */
-JEMALLOC_ALWAYS_INLINE int
-imalloc(static_opts_t *sopts, dynamic_opts_t *dopts) {
+JEMALLOC_ALWAYS_INLINE bool
+imalloc_init_check(static_opts_t *sopts, dynamic_opts_t *dopts) {
 	if (unlikely(!malloc_initialized()) && unlikely(malloc_init())) {
 		if (config_xmalloc && unlikely(opt_xmalloc)) {
 			malloc_write(sopts->oom_string);
@@ -2122,6 +2121,16 @@ imalloc(static_opts_t *sopts, dynamic_opts_t *dopts) {
 		set_errno(ENOMEM);
 		*dopts->result = NULL;
 
+		return false;
+	}
+
+	return true;
+}
+
+/* Returns the errno-style error code of the allocation. */
+JEMALLOC_ALWAYS_INLINE int
+imalloc(static_opts_t *sopts, dynamic_opts_t *dopts) {
+	if (tsd_get_allocates() && !imalloc_init_check(sopts, dopts)) {
 		return ENOMEM;
 	}
 
@@ -2134,6 +2143,10 @@ imalloc(static_opts_t *sopts, dynamic_opts_t *dopts) {
 		sopts->slow = false;
 		return imalloc_body(sopts, dopts, tsd);
 	} else {
+		if (!tsd_get_allocates() && !imalloc_init_check(sopts, dopts)) {
+			return ENOMEM;
+		}
+          
 		sopts->slow = true;
 		return imalloc_body(sopts, dopts, tsd);
 	}
diff --git a/src/tsd.c b/src/tsd.c
index 1204a0de..f317d486 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -280,11 +280,13 @@ tsd_fetch_slow(tsd_t *tsd, bool minimal) {
 		tsd_slow_update(tsd);
 	} else if (tsd_state_get(tsd) == tsd_state_uninitialized) {
 		if (!minimal) {
-			tsd_state_set(tsd, tsd_state_nominal);
-			tsd_slow_update(tsd);
-			/* Trigger cleanup handler registration. */
-			tsd_set(tsd);
-			tsd_data_init(tsd);
+			if (tsd_booted) {
+				tsd_state_set(tsd, tsd_state_nominal);
+				tsd_slow_update(tsd);
+				/* Trigger cleanup handler registration. */
+				tsd_set(tsd);
+				tsd_data_init(tsd);
+			}
 		} else {
 			tsd_state_set(tsd, tsd_state_minimal_initialized);
 			tsd_set(tsd);

From 08260a6b944a67a3d9f63e7eb738718fc760e0ea Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Wed, 15 Nov 2017 18:26:49 +0100
Subject: [PATCH 1195/2608] Add experimental API: smallocx_return_t
 smallocx(size, flags)

---

Motivation:

This new experimental memory-allocaction API returns a pointer to
the allocation as well as the usable size of the allocated memory
region.

The `s` in `smallocx` stands for `sized`-`mallocx`, attempting to
convey that this API returns the size of the allocated memory region.

It should allow C++ P0901r0 [0] and Rust Alloc::alloc_excess to make
use of it.

The main purpose of these APIs is to improve telemetry. It is more accurate
to register `smallocx(size, flags)` than `smallocx(nallocx(size), flags)`,
for example. The latter will always line up perfectly with the existing
size classes, causing a loss of telemetry information about the internal
fragmentation induced by potentially poor size-classes choices.

Instrumenting `nallocx` does not help much since user code can cache its
result and use it repeatedly.

---

Implementation:

The implementation adds a new `usize` option to `static_opts_s` and an `usize`
variable to `dynamic_opts_s`. These are then used to cache the result of
`sz_index2size` and similar functions in the code paths in which they are
unconditionally invoked. In the code-paths in which these functions are not
unconditionally invoked, `smallocx` calls, as opposed to `mallocx`, these
functions explicitly.

---

[0]: http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2018/p0901r0.html
---
 configure.ac                                  | 19 ++++-
 .../internal/jemalloc_internal_defs.h.in      |  3 +
 include/jemalloc/jemalloc_protos.h.in         |  4 +
 include/jemalloc/jemalloc_typedefs.h.in       |  7 ++
 src/jemalloc.c                                | 77 ++++++++++++++++++-
 5 files changed, 108 insertions(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index cd5bdd64..018ee3f6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -850,7 +850,7 @@ AC_ARG_WITH([export],
 fi]
 )
 
-public_syms="aligned_alloc calloc dallocx free mallctl mallctlbymib mallctlnametomib malloc malloc_conf malloc_message malloc_stats_print malloc_usable_size mallocx nallocx posix_memalign rallocx realloc sallocx sdallocx xallocx"
+public_syms="aligned_alloc calloc dallocx free mallctl mallctlbymib mallctlnametomib malloc malloc_conf malloc_message malloc_stats_print malloc_usable_size mallocx smallocx nallocx posix_memalign rallocx realloc sallocx sdallocx xallocx"
 dnl Check for additional platform-specific public API functions.
 AC_CHECK_FUNC([memalign],
 	      [AC_DEFINE([JEMALLOC_OVERRIDE_MEMALIGN], [ ])
@@ -1043,6 +1043,22 @@ if test "x$enable_stats" = "x1" ; then
 fi
 AC_SUBST([enable_stats])
 
+dnl Do not enable smallocx by default.
+AC_ARG_ENABLE([experimental_smallocx],
+  [AS_HELP_STRING([--enable-experimental-smallocx], [Enable experimental smallocx API])],
+[if test "x$enable_experimental_smallocx" = "xno" ; then
+enable_experimental_smallocx="0"
+else
+enable_experimental_smallocx="1"
+fi
+],
+[enable_experimental_smallocx="0"]
+)
+if test "x$enable_experimental_smallocx" = "x1" ; then
+  AC_DEFINE([JEMALLOC_EXPERIMENTAL_SMALLOCX_API])
+fi
+AC_SUBST([enable_experimental_smallocx])
+
 dnl Do not enable profiling by default.
 AC_ARG_ENABLE([prof],
   [AS_HELP_STRING([--enable-prof], [Enable allocation profiling])],
@@ -2281,6 +2297,7 @@ AC_MSG_RESULT([malloc_conf        : ${config_malloc_conf}])
 AC_MSG_RESULT([autogen            : ${enable_autogen}])
 AC_MSG_RESULT([debug              : ${enable_debug}])
 AC_MSG_RESULT([stats              : ${enable_stats}])
+AC_MSG_RESULT([experimetal_smallocx : ${enable_experimental_smallocx}])
 AC_MSG_RESULT([prof               : ${enable_prof}])
 AC_MSG_RESULT([prof-libunwind     : ${enable_prof_libunwind}])
 AC_MSG_RESULT([prof-libgcc        : ${enable_prof_libgcc}])
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index cec41aa6..c1eb8edc 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -153,6 +153,9 @@
 /* JEMALLOC_STATS enables statistics calculation. */
 #undef JEMALLOC_STATS
 
+/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */
+#undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API
+
 /* JEMALLOC_PROF enables allocation profiling. */
 #undef JEMALLOC_PROF
 
diff --git a/include/jemalloc/jemalloc_protos.h.in b/include/jemalloc/jemalloc_protos.h.in
index a78414b1..05fc056f 100644
--- a/include/jemalloc/jemalloc_protos.h.in
+++ b/include/jemalloc/jemalloc_protos.h.in
@@ -28,6 +28,10 @@ JEMALLOC_EXPORT void JEMALLOC_NOTHROW	@je_@free(void *ptr)
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
     void JEMALLOC_NOTHROW	*@je_@mallocx(size_t size, int flags)
     JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1);
+#ifdef JEMALLOC_EXPERIMENTAL_SMALLOCX_API
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+     smallocx_return_t JEMALLOC_NOTHROW @je_@smallocx(size_t size, int flags);
+#endif
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
     void JEMALLOC_NOTHROW	*@je_@rallocx(void *ptr, size_t size,
     int flags) JEMALLOC_ALLOC_SIZE(2);
diff --git a/include/jemalloc/jemalloc_typedefs.h.in b/include/jemalloc/jemalloc_typedefs.h.in
index 1a588743..fe0d7d1e 100644
--- a/include/jemalloc/jemalloc_typedefs.h.in
+++ b/include/jemalloc/jemalloc_typedefs.h.in
@@ -75,3 +75,10 @@ struct extent_hooks_s {
 	extent_split_t		*split;
 	extent_merge_t		*merge;
 };
+
+#ifdef JEMALLOC_EXPERIMENTAL_SMALLOCX_API
+typedef struct {
+	void *ptr;
+	size_t size;
+} smallocx_return_t;
+#endif
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 237bfe7c..01e2db97 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1747,6 +1747,11 @@ struct static_opts_s {
 	 * initialization) options.
 	 */
 	bool slow;
+	/*
+	 * Return size
+	 *
+	 */
+	bool usize;
 };
 
 JEMALLOC_ALWAYS_INLINE void
@@ -1760,6 +1765,7 @@ static_opts_init(static_opts_t *static_opts) {
 	static_opts->oom_string = "";
 	static_opts->invalid_alignment_string = "";
 	static_opts->slow = false;
+	static_opts->usize = false;
 }
 
 /*
@@ -1774,6 +1780,7 @@ static_opts_init(static_opts_t *static_opts) {
 typedef struct dynamic_opts_s dynamic_opts_t;
 struct dynamic_opts_s {
 	void **result;
+	size_t usize;
 	size_t num_items;
 	size_t item_size;
 	size_t alignment;
@@ -1785,6 +1792,7 @@ struct dynamic_opts_s {
 JEMALLOC_ALWAYS_INLINE void
 dynamic_opts_init(dynamic_opts_t *dynamic_opts) {
 	dynamic_opts->result = NULL;
+	dynamic_opts->usize = 0;
 	dynamic_opts->num_items = 0;
 	dynamic_opts->item_size = 0;
 	dynamic_opts->alignment = 0;
@@ -1960,13 +1968,15 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		if (unlikely(ind >= SC_NSIZES)) {
 			goto label_oom;
 		}
-		if (config_stats || (config_prof && opt_prof)) {
+		if (config_stats || (config_prof && opt_prof) || sopts->usize) {
 			usize = sz_index2size(ind);
+			dopts->usize = usize;
 			assert(usize > 0 && usize
 			    <= SC_LARGE_MAXCLASS);
 		}
 	} else {
 		usize = sz_sa2u(size, dopts->alignment);
+		dopts->usize = usize;
 		if (unlikely(usize == 0
 		    || usize > SC_LARGE_MAXCLASS)) {
 			goto label_oom;
@@ -2759,6 +2769,71 @@ int __posix_memalign(void** r, size_t a, size_t s) PREALIAS(je_posix_memalign);
  * Begin non-standard functions.
  */
 
+#ifdef JEMALLOC_EXPERIMENTAL_SMALLOCX_API
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+smallocx_return_t JEMALLOC_NOTHROW
+/*
+ * The attribute JEMALLOC_ATTR(malloc) cannot be used due to:
+ *  - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86488
+ */
+  je_smallocx(size_t size, int flags) {
+	/*
+	 * Note: the attribute JEMALLOC_ALLOC_SIZE(1) cannot be
+	 * used here because it makes writing beyond the `size`
+	 * of the `ptr` undefined behavior, but the objective
+	 * of this function is to allow writing beyond `size`
+	 * up to `smallocx_return_t::size`.
+	 */
+	smallocx_return_t ret;
+	static_opts_t sopts;
+	dynamic_opts_t dopts;
+
+	LOG("core.smallocx.entry", "size: %zu, flags: %d", size, flags);
+
+	static_opts_init(&sopts);
+	dynamic_opts_init(&dopts);
+
+	sopts.assert_nonempty_alloc = true;
+	sopts.null_out_result_on_error = true;
+	sopts.oom_string = "<jemalloc>: Error in mallocx(): out of memory\n";
+	sopts.usize = true;
+
+	dopts.result = &ret.ptr;
+	dopts.num_items = 1;
+	dopts.item_size = size;
+	if (unlikely(flags != 0)) {
+		if ((flags & MALLOCX_LG_ALIGN_MASK) != 0) {
+			dopts.alignment = MALLOCX_ALIGN_GET_SPECIFIED(flags);
+		}
+
+		dopts.zero = MALLOCX_ZERO_GET(flags);
+
+		if ((flags & MALLOCX_TCACHE_MASK) != 0) {
+			if ((flags & MALLOCX_TCACHE_MASK)
+			    == MALLOCX_TCACHE_NONE) {
+				dopts.tcache_ind = TCACHE_IND_NONE;
+			} else {
+				dopts.tcache_ind = MALLOCX_TCACHE_GET(flags);
+			}
+		} else {
+			dopts.tcache_ind = TCACHE_IND_AUTOMATIC;
+		}
+
+		if ((flags & MALLOCX_ARENA_MASK) != 0)
+			dopts.arena_ind = MALLOCX_ARENA_GET(flags);
+	}
+
+
+
+	imalloc(&sopts, &dopts);
+	assert(dopts.usize == je_nallocx(size, flags));
+	ret.size = dopts.usize;
+
+	LOG("core.smallocx.exit", "result: %p, size: %zu", ret.ptr, ret.size);
+	return ret;
+}
+#endif
+
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
 void JEMALLOC_NOTHROW *
 JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)

From 730e57b08fe5bd6bdc38ca4ff6a73959984d8ef0 Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Wed, 11 Jul 2018 15:04:48 +0200
Subject: [PATCH 1196/2608] Adapts mallocx integration tests for smallocx

---
 Makefile.in                  |   5 +
 test/integration/smallocx.c  | 292 +++++++++++++++++++++++++++++++++++
 test/integration/smallocx.sh |   5 +
 3 files changed, 302 insertions(+)
 create mode 100644 test/integration/smallocx.c
 create mode 100644 test/integration/smallocx.sh

diff --git a/Makefile.in b/Makefile.in
index c35bb7ed..3d99a409 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -57,6 +57,7 @@ cfgoutputs_out := @cfgoutputs_out@
 enable_autogen := @enable_autogen@
 enable_prof := @enable_prof@
 enable_zone_allocator := @enable_zone_allocator@
+enable_experimental_smallocx := @enable_experimental_smallocx@
 MALLOC_CONF := @JEMALLOC_CPREFIX@MALLOC_CONF
 link_whole_archive := @link_whole_archive@
 DSO_LDFLAGS = @DSO_LDFLAGS@
@@ -235,6 +236,10 @@ TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \
 	$(srcroot)test/integration/thread_arena.c \
 	$(srcroot)test/integration/thread_tcache_enabled.c \
 	$(srcroot)test/integration/xallocx.c
+ifeq (@enable_experimental_smallocx@, 1)
+TESTS_INTEGRATION += \
+  $(srcroot)test/integration/smallocx.c
+endif
 ifeq (@enable_cxx@, 1)
 CPP_SRCS := $(srcroot)src/jemalloc_cpp.cpp
 TESTS_INTEGRATION_CPP := $(srcroot)test/integration/cpp/basic.cpp
diff --git a/test/integration/smallocx.c b/test/integration/smallocx.c
new file mode 100644
index 00000000..376fec25
--- /dev/null
+++ b/test/integration/smallocx.c
@@ -0,0 +1,292 @@
+#include "test/jemalloc_test.h"
+
+static unsigned
+get_nsizes_impl(const char *cmd) {
+	unsigned ret;
+	size_t z;
+
+	z = sizeof(unsigned);
+	assert_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
+	    "Unexpected mallctl(\"%s\", ...) failure", cmd);
+
+	return ret;
+}
+
+static unsigned
+get_nlarge(void) {
+	return get_nsizes_impl("arenas.nlextents");
+}
+
+static size_t
+get_size_impl(const char *cmd, size_t ind) {
+	size_t ret;
+	size_t z;
+	size_t mib[4];
+	size_t miblen = 4;
+
+	z = sizeof(size_t);
+	assert_d_eq(mallctlnametomib(cmd, mib, &miblen),
+	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
+	mib[2] = ind;
+	z = sizeof(size_t);
+	assert_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
+	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
+
+	return ret;
+}
+
+static size_t
+get_large_size(size_t ind) {
+	return get_size_impl("arenas.lextent.0.size", ind);
+}
+
+/*
+ * On systems which can't merge extents, tests that call this function generate
+ * a lot of dirty memory very quickly.  Purging between cycles mitigates
+ * potential OOM on e.g. 32-bit Windows.
+ */
+static void
+purge(void) {
+	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctl error");
+}
+
+/*
+ * GCC "-Walloc-size-larger-than" warning detects when one of the memory
+ * allocation functions is called with a size larger than the maximum size that
+ * they support. Here we want to explicitly test that the allocation functions
+ * do indeed fail properly when this is the case, which triggers the warning.
+ * Therefore we disable the warning for these tests.
+ */
+JEMALLOC_DIAGNOSTIC_PUSH
+JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
+
+TEST_BEGIN(test_overflow) {
+	size_t largemax;
+
+	largemax = get_large_size(get_nlarge()-1);
+
+	assert_ptr_null(smallocx(largemax+1, 0).ptr,
+	    "Expected OOM for smallocx(size=%#zx, 0)", largemax+1);
+
+	assert_ptr_null(smallocx(ZU(PTRDIFF_MAX)+1, 0).ptr,
+	    "Expected OOM for smallocx(size=%#zx, 0)", ZU(PTRDIFF_MAX)+1);
+
+	assert_ptr_null(smallocx(SIZE_T_MAX, 0).ptr,
+	    "Expected OOM for smallocx(size=%#zx, 0)", SIZE_T_MAX);
+
+	assert_ptr_null(smallocx(1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)).ptr,
+	    "Expected OOM for smallocx(size=1, MALLOCX_ALIGN(%#zx))",
+	    ZU(PTRDIFF_MAX)+1);
+}
+TEST_END
+
+static void *
+remote_alloc(void *arg) {
+	unsigned arena;
+	size_t sz = sizeof(unsigned);
+	assert_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+	size_t large_sz;
+	sz = sizeof(size_t);
+	assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&large_sz, &sz,
+	    NULL, 0), 0, "Unexpected mallctl failure");
+
+	smallocx_return_t r = smallocx(large_sz, MALLOCX_ARENA(arena)
+                                 | MALLOCX_TCACHE_NONE);
+	void *ptr = r.ptr;
+  assert_zu_eq(r.size, nallocx(large_sz, MALLOCX_ARENA(arena)
+                               | MALLOCX_TCACHE_NONE),
+               "Expected smalloc(size,flags).size == nallocx(size,flags)");
+	void **ret = (void **)arg;
+	*ret = ptr;
+
+	return NULL;
+}
+
+TEST_BEGIN(test_remote_free) {
+	thd_t thd;
+	void *ret;
+	thd_create(&thd, remote_alloc, (void *)&ret);
+	thd_join(thd, NULL);
+	assert_ptr_not_null(ret, "Unexpected smallocx failure");
+
+	/* Avoid TCACHE_NONE to explicitly test tcache_flush(). */
+	dallocx(ret, 0);
+	mallctl("thread.tcache.flush", NULL, NULL, NULL, 0);
+}
+TEST_END
+
+TEST_BEGIN(test_oom) {
+	size_t largemax;
+	bool oom;
+	void *ptrs[3];
+	unsigned i;
+
+	/*
+	 * It should be impossible to allocate three objects that each consume
+	 * nearly half the virtual address space.
+	 */
+	largemax = get_large_size(get_nlarge()-1);
+	oom = false;
+	for (i = 0; i < sizeof(ptrs) / sizeof(void *); i++) {
+		ptrs[i] = smallocx(largemax, 0).ptr;
+		if (ptrs[i] == NULL) {
+			oom = true;
+		}
+	}
+	assert_true(oom,
+	    "Expected OOM during series of calls to smallocx(size=%zu, 0)",
+	    largemax);
+	for (i = 0; i < sizeof(ptrs) / sizeof(void *); i++) {
+		if (ptrs[i] != NULL) {
+			dallocx(ptrs[i], 0);
+		}
+	}
+	purge();
+
+#if LG_SIZEOF_PTR == 3
+	assert_ptr_null(smallocx(0x8000000000000000ULL,
+	    MALLOCX_ALIGN(0x8000000000000000ULL)).ptr,
+	    "Expected OOM for smallocx()");
+	assert_ptr_null(smallocx(0x8000000000000000ULL,
+	    MALLOCX_ALIGN(0x80000000)).ptr,
+	    "Expected OOM for smallocx()");
+#else
+	assert_ptr_null(smallocx(0x80000000UL, MALLOCX_ALIGN(0x80000000UL)).ptr,
+	    "Expected OOM for smallocx()");
+#endif
+}
+TEST_END
+
+/* Re-enable the "-Walloc-size-larger-than=" warning */
+JEMALLOC_DIAGNOSTIC_POP
+
+TEST_BEGIN(test_basic) {
+#define MAXSZ (((size_t)1) << 23)
+	size_t sz;
+
+	for (sz = 1; sz < MAXSZ; sz = nallocx(sz, 0) + 1) {
+    smallocx_return_t ret;
+		size_t nsz, rsz, smz;
+		void *p;
+		nsz = nallocx(sz, 0);
+		assert_zu_ne(nsz, 0, "Unexpected nallocx() error");
+		ret = smallocx(sz, 0);
+    p = ret.ptr;
+    smz = ret.size;
+		assert_ptr_not_null(p,
+		    "Unexpected smallocx(size=%zx, flags=0) error", sz);
+		rsz = sallocx(p, 0);
+		assert_zu_ge(rsz, sz, "Real size smaller than expected");
+		assert_zu_eq(nsz, rsz, "nallocx()/sallocx() size mismatch");
+    assert_zu_eq(nsz, smz, "nallocx()/smallocx() size mismatch");
+		dallocx(p, 0);
+
+		ret = smallocx(sz, 0);
+    p = ret.ptr;
+    smz = ret.size;
+		assert_ptr_not_null(p,
+		    "Unexpected smallocx(size=%zx, flags=0) error", sz);
+		dallocx(p, 0);
+
+		nsz = nallocx(sz, MALLOCX_ZERO);
+		assert_zu_ne(nsz, 0, "Unexpected nallocx() error");
+    assert_zu_ne(smz, 0, "Unexpected smallocx() error");
+    ret = smallocx(sz, MALLOCX_ZERO);
+		p = ret.ptr;
+		assert_ptr_not_null(p,
+		    "Unexpected smallocx(size=%zx, flags=MALLOCX_ZERO) error",
+		    nsz);
+		rsz = sallocx(p, 0);
+		assert_zu_eq(nsz, rsz, "nallocx()/sallocx() rsize mismatch");
+    assert_zu_eq(nsz, smz, "nallocx()/smallocx() size mismatch");
+		dallocx(p, 0);
+		purge();
+	}
+#undef MAXSZ
+}
+TEST_END
+
+TEST_BEGIN(test_alignment_and_size) {
+	const char *percpu_arena;
+	size_t sz = sizeof(percpu_arena);
+
+	if(mallctl("opt.percpu_arena", (void *)&percpu_arena, &sz, NULL, 0) ||
+	    strcmp(percpu_arena, "disabled") != 0) {
+		test_skip("test_alignment_and_size skipped: "
+		    "not working with percpu arena.");
+	};
+#define MAXALIGN (((size_t)1) << 23)
+#define NITER 4
+	size_t nsz, rsz, smz, alignment, total;
+	unsigned i;
+	void *ps[NITER];
+
+	for (i = 0; i < NITER; i++) {
+		ps[i] = NULL;
+	}
+
+	for (alignment = 8;
+	    alignment <= MAXALIGN;
+	    alignment <<= 1) {
+		total = 0;
+		for (sz = 1;
+		    sz < 3 * alignment && sz < (1U << 31);
+		    sz += (alignment >> (LG_SIZEOF_PTR-1)) - 1) {
+			for (i = 0; i < NITER; i++) {
+				nsz = nallocx(sz, MALLOCX_ALIGN(alignment) |
+				    MALLOCX_ZERO);
+				assert_zu_ne(nsz, 0,
+				    "nallocx() error for alignment=%zu, "
+				    "size=%zu (%#zx)", alignment, sz, sz);
+        smallocx_return_t ret = smallocx(sz, MALLOCX_ALIGN(alignment) |
+                                         MALLOCX_ZERO);
+				ps[i] = ret.ptr;
+				assert_ptr_not_null(ps[i],
+				    "smallocx() error for alignment=%zu, "
+				    "size=%zu (%#zx)", alignment, sz, sz);
+				rsz = sallocx(ps[i], 0);
+        smz = ret.size;
+				assert_zu_ge(rsz, sz,
+				    "Real size smaller than expected for "
+				    "alignment=%zu, size=%zu", alignment, sz);
+				assert_zu_eq(nsz, rsz,
+				    "nallocx()/sallocx() size mismatch for "
+				    "alignment=%zu, size=%zu", alignment, sz);
+        assert_zu_eq(nsz, smz,
+            "nallocx()/smallocx() size mismatch for "
+            "alignment=%zu, size=%zu", alignment, sz);
+				assert_ptr_null(
+				    (void *)((uintptr_t)ps[i] & (alignment-1)),
+				    "%p inadequately aligned for"
+				    " alignment=%zu, size=%zu", ps[i],
+				    alignment, sz);
+				total += rsz;
+				if (total >= (MAXALIGN << 1)) {
+					break;
+				}
+			}
+			for (i = 0; i < NITER; i++) {
+				if (ps[i] != NULL) {
+					dallocx(ps[i], 0);
+					ps[i] = NULL;
+				}
+			}
+		}
+		purge();
+	}
+#undef MAXALIGN
+#undef NITER
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_overflow,
+	    test_oom,
+	    test_remote_free,
+	    test_basic,
+	    test_alignment_and_size);
+}
diff --git a/test/integration/smallocx.sh b/test/integration/smallocx.sh
new file mode 100644
index 00000000..d07f10f3
--- /dev/null
+++ b/test/integration/smallocx.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_fill}" = "x1" ] ; then
+    export MALLOC_CONF="junk:false"
+fi

From 741fca1bb7773e14cf929824b94506eb9f545e5e Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Wed, 11 Jul 2018 21:39:44 +0200
Subject: [PATCH 1197/2608] Hide smallocx even when enabled from the library
 API

The experimental `smallocx` API is not exposed via header files,
requiring the users to peek at `jemalloc`'s source code to manually
add the external declarations to their own programs.

This should reinforce that `smallocx` is experimental, and that `jemalloc`
does not offer any kind of backwards compatiblity or ABI gurantees for it.
---
 include/jemalloc/jemalloc_protos.h.in   | 4 ----
 include/jemalloc/jemalloc_typedefs.h.in | 7 -------
 src/jemalloc.c                          | 5 +++++
 test/integration/smallocx.c             | 7 +++++++
 4 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/include/jemalloc/jemalloc_protos.h.in b/include/jemalloc/jemalloc_protos.h.in
index 05fc056f..a78414b1 100644
--- a/include/jemalloc/jemalloc_protos.h.in
+++ b/include/jemalloc/jemalloc_protos.h.in
@@ -28,10 +28,6 @@ JEMALLOC_EXPORT void JEMALLOC_NOTHROW	@je_@free(void *ptr)
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
     void JEMALLOC_NOTHROW	*@je_@mallocx(size_t size, int flags)
     JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1);
-#ifdef JEMALLOC_EXPERIMENTAL_SMALLOCX_API
-JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-     smallocx_return_t JEMALLOC_NOTHROW @je_@smallocx(size_t size, int flags);
-#endif
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
     void JEMALLOC_NOTHROW	*@je_@rallocx(void *ptr, size_t size,
     int flags) JEMALLOC_ALLOC_SIZE(2);
diff --git a/include/jemalloc/jemalloc_typedefs.h.in b/include/jemalloc/jemalloc_typedefs.h.in
index fe0d7d1e..1a588743 100644
--- a/include/jemalloc/jemalloc_typedefs.h.in
+++ b/include/jemalloc/jemalloc_typedefs.h.in
@@ -75,10 +75,3 @@ struct extent_hooks_s {
 	extent_split_t		*split;
 	extent_merge_t		*merge;
 };
-
-#ifdef JEMALLOC_EXPERIMENTAL_SMALLOCX_API
-typedef struct {
-	void *ptr;
-	size_t size;
-} smallocx_return_t;
-#endif
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 01e2db97..57d9f157 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2770,6 +2770,11 @@ int __posix_memalign(void** r, size_t a, size_t s) PREALIAS(je_posix_memalign);
  */
 
 #ifdef JEMALLOC_EXPERIMENTAL_SMALLOCX_API
+typedef struct {
+	void *ptr;
+	size_t size;
+} smallocx_return_t;
+
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
 smallocx_return_t JEMALLOC_NOTHROW
 /*
diff --git a/test/integration/smallocx.c b/test/integration/smallocx.c
index 376fec25..f49ec845 100644
--- a/test/integration/smallocx.c
+++ b/test/integration/smallocx.c
@@ -1,5 +1,12 @@
 #include "test/jemalloc_test.h"
 
+typedef struct {
+	void *ptr;
+	size_t size;
+} smallocx_return_t;
+
+extern smallocx_return_t smallocx(size_t size, int flags);
+
 static unsigned
 get_nsizes_impl(const char *cmd) {
 	unsigned ret;

From 837de32496b1f20524c723516775a11bf236f891 Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Wed, 11 Jul 2018 15:11:53 +0200
Subject: [PATCH 1198/2608] Test smallocx on Travis-CI

This commit updates the gen_travis script with a new build bot
that covers the experimental `smallocx` API and updates the
travis CI script to test this API under travis.
---
 .travis.yml           | 3 +++
 scripts/gen_travis.py | 7 +++++++
 2 files changed, 10 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index 07d30815..38e66551 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -122,6 +122,9 @@ matrix:
     # Development build
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    # --enable-expermental-smallocx:
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --enable-experimental-smallocx --enable-stats --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
 
     # Valgrind
     - os: linux
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index 743f1e5d..e92660f7 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -118,6 +118,13 @@ include_rows += '''\
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
 '''
 
+# Enable-expermental-smallocx
+include_rows += '''\
+    # --enable-expermental-smallocx:
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --enable-experimental-smallocx --enable-stats --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+'''
+
 # Valgrind build bots
 include_rows += '''
     # Valgrind

From 01e2a38e5a5523350496b11af46cf1d4c1d74e4c Mon Sep 17 00:00:00 2001
From: gnzlbg <gonzalobg88@gmail.com>
Date: Fri, 5 Oct 2018 13:11:21 +0200
Subject: [PATCH 1199/2608] Make `smallocx` symbol name depend on the
 `JEMALLOC_VERSION_GID`

This comments concatenates the `JEMALLOC_VERSION_GID` to the
`smallocx` symbol name, such that the symbol ends up exported
as `smallocx_{git_hash}`.
---
 configure.ac                          | 126 +++++++++++++-------------
 include/jemalloc/jemalloc_macros.h.in |   1 +
 src/jemalloc.c                        |  15 ++-
 test/integration/smallocx.c           |  55 ++++++-----
 4 files changed, 110 insertions(+), 87 deletions(-)

diff --git a/configure.ac b/configure.ac
index 018ee3f6..e27ea912 100644
--- a/configure.ac
+++ b/configure.ac
@@ -538,6 +538,66 @@ AC_PROG_NM
 
 AC_PROG_AWK
 
+dnl ============================================================================
+dnl jemalloc version.
+dnl
+
+AC_ARG_WITH([version],
+  [AS_HELP_STRING([--with-version=<major>.<minor>.<bugfix>-<nrev>-g<gid>],
+   [Version string])],
+  [
+    echo "${with_version}" | grep ['^[0-9]\+\.[0-9]\+\.[0-9]\+-[0-9]\+-g[0-9a-f]\+$'] 2>&1 1>/dev/null
+    if test $? -eq 0 ; then
+      echo "$with_version" > "${objroot}VERSION"
+    else
+      echo "${with_version}" | grep ['^VERSION$'] 2>&1 1>/dev/null
+      if test $? -ne 0 ; then
+        AC_MSG_ERROR([${with_version} does not match <major>.<minor>.<bugfix>-<nrev>-g<gid> or VERSION])
+      fi
+    fi
+  ], [
+    dnl Set VERSION if source directory is inside a git repository.
+    if test "x`test ! \"${srcroot}\" && cd \"${srcroot}\"; git rev-parse --is-inside-work-tree 2>/dev/null`" = "xtrue" ; then
+      dnl Pattern globs aren't powerful enough to match both single- and
+      dnl double-digit version numbers, so iterate over patterns to support up
+      dnl to version 99.99.99 without any accidental matches.
+      for pattern in ['[0-9].[0-9].[0-9]' '[0-9].[0-9].[0-9][0-9]' \
+                     '[0-9].[0-9][0-9].[0-9]' '[0-9].[0-9][0-9].[0-9][0-9]' \
+                     '[0-9][0-9].[0-9].[0-9]' '[0-9][0-9].[0-9].[0-9][0-9]' \
+                     '[0-9][0-9].[0-9][0-9].[0-9]' \
+                     '[0-9][0-9].[0-9][0-9].[0-9][0-9]']; do
+        (test ! "${srcroot}" && cd "${srcroot}"; git describe --long --abbrev=40 --match="${pattern}") > "${objroot}VERSION.tmp" 2>/dev/null
+        if test $? -eq 0 ; then
+          mv "${objroot}VERSION.tmp" "${objroot}VERSION"
+          break
+        fi
+      done
+    fi
+    rm -f "${objroot}VERSION.tmp"
+  ])
+
+if test ! -e "${objroot}VERSION" ; then
+  if test ! -e "${srcroot}VERSION" ; then
+    AC_MSG_RESULT(
+      [Missing VERSION file, and unable to generate it; creating bogus VERSION])
+    echo "0.0.0-0-g0000000000000000000000000000000000000000" > "${objroot}VERSION"
+  else
+    cp ${srcroot}VERSION ${objroot}VERSION
+  fi
+fi
+jemalloc_version=`cat "${objroot}VERSION"`
+jemalloc_version_major=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]1}'`
+jemalloc_version_minor=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]2}'`
+jemalloc_version_bugfix=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]3}'`
+jemalloc_version_nrev=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]4}'`
+jemalloc_version_gid=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]5}'`
+AC_SUBST([jemalloc_version])
+AC_SUBST([jemalloc_version_major])
+AC_SUBST([jemalloc_version_minor])
+AC_SUBST([jemalloc_version_bugfix])
+AC_SUBST([jemalloc_version_nrev])
+AC_SUBST([jemalloc_version_gid])
+
 dnl Platform-specific settings.  abi and RPATH can probably be determined
 dnl programmatically, but doing so is error-prone, which makes it generally
 dnl not worth the trouble.
@@ -850,7 +910,7 @@ AC_ARG_WITH([export],
 fi]
 )
 
-public_syms="aligned_alloc calloc dallocx free mallctl mallctlbymib mallctlnametomib malloc malloc_conf malloc_message malloc_stats_print malloc_usable_size mallocx smallocx nallocx posix_memalign rallocx realloc sallocx sdallocx xallocx"
+public_syms="aligned_alloc calloc dallocx free mallctl mallctlbymib mallctlnametomib malloc malloc_conf malloc_message malloc_stats_print malloc_usable_size mallocx smallocx_${jemalloc_version_gid} nallocx posix_memalign rallocx realloc sallocx sdallocx xallocx"
 dnl Check for additional platform-specific public API functions.
 AC_CHECK_FUNC([memalign],
 	      [AC_DEFINE([JEMALLOC_OVERRIDE_MEMALIGN], [ ])
@@ -991,6 +1051,10 @@ cfghdrs_tup="include/jemalloc/jemalloc_defs.h:include/jemalloc/jemalloc_defs.h.i
 cfghdrs_tup="${cfghdrs_tup} include/jemalloc/internal/jemalloc_internal_defs.h:include/jemalloc/internal/jemalloc_internal_defs.h.in"
 cfghdrs_tup="${cfghdrs_tup} test/include/test/jemalloc_test_defs.h:test/include/test/jemalloc_test_defs.h.in"
 
+dnl ============================================================================
+dnl jemalloc build options.
+dnl
+
 dnl Do not compile with debugging by default.
 AC_ARG_ENABLE([debug],
   [AS_HELP_STRING([--enable-debug],
@@ -1462,66 +1526,6 @@ if test "x${LG_PAGE}" != "xundefined" -a \
 fi
 AC_DEFINE_UNQUOTED([LG_HUGEPAGE], [${je_cv_lg_hugepage}])
 
-dnl ============================================================================
-dnl jemalloc configuration.
-dnl
-
-AC_ARG_WITH([version],
-  [AS_HELP_STRING([--with-version=<major>.<minor>.<bugfix>-<nrev>-g<gid>],
-   [Version string])],
-  [
-    echo "${with_version}" | grep ['^[0-9]\+\.[0-9]\+\.[0-9]\+-[0-9]\+-g[0-9a-f]\+$'] 2>&1 1>/dev/null
-    if test $? -eq 0 ; then
-      echo "$with_version" > "${objroot}VERSION"
-    else
-      echo "${with_version}" | grep ['^VERSION$'] 2>&1 1>/dev/null
-      if test $? -ne 0 ; then
-        AC_MSG_ERROR([${with_version} does not match <major>.<minor>.<bugfix>-<nrev>-g<gid> or VERSION])
-      fi
-    fi
-  ], [
-    dnl Set VERSION if source directory is inside a git repository.
-    if test "x`test ! \"${srcroot}\" && cd \"${srcroot}\"; git rev-parse --is-inside-work-tree 2>/dev/null`" = "xtrue" ; then
-      dnl Pattern globs aren't powerful enough to match both single- and
-      dnl double-digit version numbers, so iterate over patterns to support up
-      dnl to version 99.99.99 without any accidental matches.
-      for pattern in ['[0-9].[0-9].[0-9]' '[0-9].[0-9].[0-9][0-9]' \
-                     '[0-9].[0-9][0-9].[0-9]' '[0-9].[0-9][0-9].[0-9][0-9]' \
-                     '[0-9][0-9].[0-9].[0-9]' '[0-9][0-9].[0-9].[0-9][0-9]' \
-                     '[0-9][0-9].[0-9][0-9].[0-9]' \
-                     '[0-9][0-9].[0-9][0-9].[0-9][0-9]']; do
-        (test ! "${srcroot}" && cd "${srcroot}"; git describe --long --abbrev=40 --match="${pattern}") > "${objroot}VERSION.tmp" 2>/dev/null
-        if test $? -eq 0 ; then
-          mv "${objroot}VERSION.tmp" "${objroot}VERSION"
-          break
-        fi
-      done
-    fi
-    rm -f "${objroot}VERSION.tmp"
-  ])
-
-if test ! -e "${objroot}VERSION" ; then
-  if test ! -e "${srcroot}VERSION" ; then
-    AC_MSG_RESULT(
-      [Missing VERSION file, and unable to generate it; creating bogus VERSION])
-    echo "0.0.0-0-g0000000000000000000000000000000000000000" > "${objroot}VERSION"
-  else
-    cp ${srcroot}VERSION ${objroot}VERSION
-  fi
-fi
-jemalloc_version=`cat "${objroot}VERSION"`
-jemalloc_version_major=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]1}'`
-jemalloc_version_minor=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]2}'`
-jemalloc_version_bugfix=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]3}'`
-jemalloc_version_nrev=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]4}'`
-jemalloc_version_gid=`echo ${jemalloc_version} | tr ".g-" " " | awk '{print [$]5}'`
-AC_SUBST([jemalloc_version])
-AC_SUBST([jemalloc_version_major])
-AC_SUBST([jemalloc_version_minor])
-AC_SUBST([jemalloc_version_bugfix])
-AC_SUBST([jemalloc_version_nrev])
-AC_SUBST([jemalloc_version_gid])
-
 dnl ============================================================================
 dnl Configure pthreads.
 
diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in
index aee55438..a00ce11a 100644
--- a/include/jemalloc/jemalloc_macros.h.in
+++ b/include/jemalloc/jemalloc_macros.h.in
@@ -10,6 +10,7 @@
 #define JEMALLOC_VERSION_BUGFIX @jemalloc_version_bugfix@
 #define JEMALLOC_VERSION_NREV @jemalloc_version_nrev@
 #define JEMALLOC_VERSION_GID "@jemalloc_version_gid@"
+#define JEMALLOC_VERSION_GID_IDENT @jemalloc_version_gid@
 
 #define MALLOCX_LG_ALIGN(la)	((int)(la))
 #if LG_SIZEOF_PTR == 2
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 57d9f157..f1bec9ac 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1748,8 +1748,7 @@ struct static_opts_s {
 	 */
 	bool slow;
 	/*
-	 * Return size
-	 *
+	 * Return size.
 	 */
 	bool usize;
 };
@@ -2770,6 +2769,11 @@ int __posix_memalign(void** r, size_t a, size_t s) PREALIAS(je_posix_memalign);
  */
 
 #ifdef JEMALLOC_EXPERIMENTAL_SMALLOCX_API
+
+#define JEMALLOC_SMALLOCX_CONCAT_HELPER(x, y) x ## y
+#define JEMALLOC_SMALLOCX_CONCAT_HELPER2(x, y)  \
+  JEMALLOC_SMALLOCX_CONCAT_HELPER(x, y)
+
 typedef struct {
 	void *ptr;
 	size_t size;
@@ -2781,7 +2785,8 @@ smallocx_return_t JEMALLOC_NOTHROW
  * The attribute JEMALLOC_ATTR(malloc) cannot be used due to:
  *  - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86488
  */
-  je_smallocx(size_t size, int flags) {
+JEMALLOC_SMALLOCX_CONCAT_HELPER2(je_smallocx_, JEMALLOC_VERSION_GID_IDENT)
+  (size_t size, int flags) {
 	/*
 	 * Note: the attribute JEMALLOC_ALLOC_SIZE(1) cannot be
 	 * used here because it makes writing beyond the `size`
@@ -2828,8 +2833,6 @@ smallocx_return_t JEMALLOC_NOTHROW
 			dopts.arena_ind = MALLOCX_ARENA_GET(flags);
 	}
 
-
-
 	imalloc(&sopts, &dopts);
 	assert(dopts.usize == je_nallocx(size, flags));
 	ret.size = dopts.usize;
@@ -2837,6 +2840,8 @@ smallocx_return_t JEMALLOC_NOTHROW
 	LOG("core.smallocx.exit", "result: %p, size: %zu", ret.ptr, ret.size);
 	return ret;
 }
+#undef JEMALLOC_SMALLOCX_CONCAT_HELPER
+#undef JEMALLOC_SMALLOCX_CONCAT_HELPER2
 #endif
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
diff --git a/test/integration/smallocx.c b/test/integration/smallocx.c
index f49ec845..2486752b 100644
--- a/test/integration/smallocx.c
+++ b/test/integration/smallocx.c
@@ -1,11 +1,24 @@
 #include "test/jemalloc_test.h"
+#include "jemalloc/jemalloc_macros.h"
+
+#define STR_HELPER(x) #x
+#define STR(x) STR_HELPER(x)
+
+#ifndef JEMALLOC_VERSION_GID_IDENT
+  #error "JEMALLOC_VERSION_GID_IDENT not defined"
+#endif
+
+#define JOIN(x, y) x ## y
+#define JOIN2(x, y) JOIN(x, y)
+#define smallocx JOIN2(smallocx_, JEMALLOC_VERSION_GID_IDENT)
 
 typedef struct {
 	void *ptr;
 	size_t size;
 } smallocx_return_t;
 
-extern smallocx_return_t smallocx(size_t size, int flags);
+extern smallocx_return_t
+smallocx(size_t size, int flags);
 
 static unsigned
 get_nsizes_impl(const char *cmd) {
@@ -99,12 +112,12 @@ remote_alloc(void *arg) {
 	assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&large_sz, &sz,
 	    NULL, 0), 0, "Unexpected mallctl failure");
 
-	smallocx_return_t r = smallocx(large_sz, MALLOCX_ARENA(arena)
-                                 | MALLOCX_TCACHE_NONE);
+	smallocx_return_t r
+	    = smallocx(large_sz, MALLOCX_ARENA(arena) | MALLOCX_TCACHE_NONE);
 	void *ptr = r.ptr;
-  assert_zu_eq(r.size, nallocx(large_sz, MALLOCX_ARENA(arena)
-                               | MALLOCX_TCACHE_NONE),
-               "Expected smalloc(size,flags).size == nallocx(size,flags)");
+	assert_zu_eq(r.size,
+	    nallocx(large_sz, MALLOCX_ARENA(arena) | MALLOCX_TCACHE_NONE),
+	    "Expected smalloc(size,flags).size == nallocx(size,flags)");
 	void **ret = (void **)arg;
 	*ret = ptr;
 
@@ -174,40 +187,40 @@ TEST_BEGIN(test_basic) {
 	size_t sz;
 
 	for (sz = 1; sz < MAXSZ; sz = nallocx(sz, 0) + 1) {
-    smallocx_return_t ret;
+		smallocx_return_t ret;
 		size_t nsz, rsz, smz;
 		void *p;
 		nsz = nallocx(sz, 0);
 		assert_zu_ne(nsz, 0, "Unexpected nallocx() error");
 		ret = smallocx(sz, 0);
-    p = ret.ptr;
-    smz = ret.size;
+		p = ret.ptr;
+		smz = ret.size;
 		assert_ptr_not_null(p,
 		    "Unexpected smallocx(size=%zx, flags=0) error", sz);
 		rsz = sallocx(p, 0);
 		assert_zu_ge(rsz, sz, "Real size smaller than expected");
 		assert_zu_eq(nsz, rsz, "nallocx()/sallocx() size mismatch");
-    assert_zu_eq(nsz, smz, "nallocx()/smallocx() size mismatch");
+		assert_zu_eq(nsz, smz, "nallocx()/smallocx() size mismatch");
 		dallocx(p, 0);
 
 		ret = smallocx(sz, 0);
-    p = ret.ptr;
-    smz = ret.size;
+		p = ret.ptr;
+		smz = ret.size;
 		assert_ptr_not_null(p,
 		    "Unexpected smallocx(size=%zx, flags=0) error", sz);
 		dallocx(p, 0);
 
 		nsz = nallocx(sz, MALLOCX_ZERO);
 		assert_zu_ne(nsz, 0, "Unexpected nallocx() error");
-    assert_zu_ne(smz, 0, "Unexpected smallocx() error");
-    ret = smallocx(sz, MALLOCX_ZERO);
+		assert_zu_ne(smz, 0, "Unexpected smallocx() error");
+		ret = smallocx(sz, MALLOCX_ZERO);
 		p = ret.ptr;
 		assert_ptr_not_null(p,
 		    "Unexpected smallocx(size=%zx, flags=MALLOCX_ZERO) error",
 		    nsz);
 		rsz = sallocx(p, 0);
 		assert_zu_eq(nsz, rsz, "nallocx()/sallocx() rsize mismatch");
-    assert_zu_eq(nsz, smz, "nallocx()/smallocx() size mismatch");
+		assert_zu_eq(nsz, smz, "nallocx()/smallocx() size mismatch");
 		dallocx(p, 0);
 		purge();
 	}
@@ -247,23 +260,23 @@ TEST_BEGIN(test_alignment_and_size) {
 				assert_zu_ne(nsz, 0,
 				    "nallocx() error for alignment=%zu, "
 				    "size=%zu (%#zx)", alignment, sz, sz);
-        smallocx_return_t ret = smallocx(sz, MALLOCX_ALIGN(alignment) |
-                                         MALLOCX_ZERO);
+				smallocx_return_t ret
+				    = smallocx(sz, MALLOCX_ALIGN(alignment) | MALLOCX_ZERO);
 				ps[i] = ret.ptr;
 				assert_ptr_not_null(ps[i],
 				    "smallocx() error for alignment=%zu, "
 				    "size=%zu (%#zx)", alignment, sz, sz);
 				rsz = sallocx(ps[i], 0);
-        smz = ret.size;
+				smz = ret.size;
 				assert_zu_ge(rsz, sz,
 				    "Real size smaller than expected for "
 				    "alignment=%zu, size=%zu", alignment, sz);
 				assert_zu_eq(nsz, rsz,
 				    "nallocx()/sallocx() size mismatch for "
 				    "alignment=%zu, size=%zu", alignment, sz);
-        assert_zu_eq(nsz, smz,
-            "nallocx()/smallocx() size mismatch for "
-            "alignment=%zu, size=%zu", alignment, sz);
+				assert_zu_eq(nsz, smz,
+				    "nallocx()/smallocx() size mismatch for "
+				    "alignment=%zu, size=%zu", alignment, sz);
 				assert_ptr_null(
 				    (void *)((uintptr_t)ps[i] & (alignment-1)),
 				    "%p inadequately aligned for"

From 2b112ea5932d280288882d8bb38e7942b166fe5a Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Tue, 9 Oct 2018 08:41:36 -0700
Subject: [PATCH 1200/2608] add test for zero-sized alloc and aligned alloc

---
 Makefile.in                      |  1 +
 test/integration/aligned_alloc.c | 12 +++++++++++-
 test/integration/malloc.c        | 16 ++++++++++++++++
 3 files changed, 28 insertions(+), 1 deletion(-)
 create mode 100644 test/integration/malloc.c

diff --git a/Makefile.in b/Makefile.in
index 3d99a409..c9bd95a3 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -226,6 +226,7 @@ endif
 TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \
 	$(srcroot)test/integration/allocated.c \
 	$(srcroot)test/integration/extent.c \
+	$(srcroot)test/integration/malloc.c \
 	$(srcroot)test/integration/mallocx.c \
 	$(srcroot)test/integration/MALLOCX_ARENA.c \
 	$(srcroot)test/integration/overflow.c \
diff --git a/test/integration/aligned_alloc.c b/test/integration/aligned_alloc.c
index cfe1df9d..4375b172 100644
--- a/test/integration/aligned_alloc.c
+++ b/test/integration/aligned_alloc.c
@@ -138,10 +138,20 @@ TEST_BEGIN(test_alignment_and_size) {
 }
 TEST_END
 
+TEST_BEGIN(test_zero_alloc) {
+	void *res = aligned_alloc(8, 0);
+	assert(res);
+	size_t usable = malloc_usable_size(res);
+	assert(usable > 0);
+	free(res);
+}
+TEST_END
+
 int
 main(void) {
 	return test(
 	    test_alignment_errors,
 	    test_oom_errors,
-	    test_alignment_and_size);
+	    test_alignment_and_size,
+	    test_zero_alloc);
 }
diff --git a/test/integration/malloc.c b/test/integration/malloc.c
new file mode 100644
index 00000000..8b33bc8f
--- /dev/null
+++ b/test/integration/malloc.c
@@ -0,0 +1,16 @@
+#include "test/jemalloc_test.h"
+
+TEST_BEGIN(test_zero_alloc) {
+	void *res = malloc(0);
+	assert(res);
+	size_t usable = malloc_usable_size(res);
+	assert(usable > 0);
+	free(res);
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_zero_alloc);
+}

From 4edbb7c64c83aa2059ade469bc798dadf3da194c Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Mon, 8 Oct 2018 10:11:04 -0700
Subject: [PATCH 1201/2608] sz: Support 0 size in size2index lookup/compute

---
 include/jemalloc/internal/sz.h | 13 ++++++++++---
 src/sz.c                       |  7 ++++---
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index 69625ee2..68e558ab 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -122,6 +122,10 @@ sz_size2index_compute(size_t size) {
 	if (unlikely(size > SC_LARGE_MAXCLASS)) {
 		return SC_NSIZES;
 	}
+
+	if (size == 0) {
+		return 0;
+	}
 #if (SC_NTINY != 0)
 	if (size <= (ZU(1) << SC_LG_TINY_MAXCLASS)) {
 		szind_t lg_tmin = SC_LG_TINY_MAXCLASS - SC_NTINY + 1;
@@ -150,14 +154,14 @@ sz_size2index_compute(size_t size) {
 JEMALLOC_ALWAYS_INLINE szind_t
 sz_size2index_lookup(size_t size) {
 	assert(size <= SC_LOOKUP_MAXCLASS);
-	szind_t ret = (sz_size2index_tab[(size-1) >> SC_LG_TINY_MIN]);
+	szind_t ret = (sz_size2index_tab[(size + (ZU(1) << SC_LG_TINY_MIN) - 1)
+					 >> SC_LG_TINY_MIN]);
 	assert(ret == sz_size2index_compute(size));
 	return ret;
 }
 
 JEMALLOC_ALWAYS_INLINE szind_t
 sz_size2index(size_t size) {
-	assert(size > 0);
 	if (likely(size <= SC_LOOKUP_MAXCLASS)) {
 		return sz_size2index_lookup(size);
 	}
@@ -208,6 +212,10 @@ sz_s2u_compute(size_t size) {
 	if (unlikely(size > SC_LARGE_MAXCLASS)) {
 		return 0;
 	}
+
+	if (size == 0) {
+		size++;
+	}
 #if (SC_NTINY > 0)
 	if (size <= (ZU(1) << SC_LG_TINY_MAXCLASS)) {
 		size_t lg_tmin = SC_LG_TINY_MAXCLASS - SC_NTINY + 1;
@@ -241,7 +249,6 @@ sz_s2u_lookup(size_t size) {
  */
 JEMALLOC_ALWAYS_INLINE size_t
 sz_s2u(size_t size) {
-	assert(size > 0);
 	if (likely(size <= SC_LOOKUP_MAXCLASS)) {
 		return sz_s2u_lookup(size);
 	}
diff --git a/src/sz.c b/src/sz.c
index 77f89c62..8633fb05 100644
--- a/src/sz.c
+++ b/src/sz.c
@@ -37,18 +37,19 @@ sz_boot_index2size_tab(const sc_data_t *sc_data) {
  * the smallest interval for which the result can change.
  */
 JEMALLOC_ALIGNED(CACHELINE)
-uint8_t sz_size2index_tab[SC_LOOKUP_MAXCLASS >> SC_LG_TINY_MIN];
+uint8_t sz_size2index_tab[(SC_LOOKUP_MAXCLASS >> SC_LG_TINY_MIN) + 1];
 
 static void
 sz_boot_size2index_tab(const sc_data_t *sc_data) {
-	size_t dst_max = (SC_LOOKUP_MAXCLASS >> SC_LG_TINY_MIN);
+	size_t dst_max = (SC_LOOKUP_MAXCLASS >> SC_LG_TINY_MIN) + 1;
 	size_t dst_ind = 0;
 	for (unsigned sc_ind = 0; sc_ind < SC_NSIZES && dst_ind < dst_max;
 	    sc_ind++) {
 		const sc_t *sc = &sc_data->sc[sc_ind];
 		size_t sz = (ZU(1) << sc->lg_base)
 		    + (ZU(sc->ndelta) << sc->lg_delta);
-		size_t max_ind = ((sz - 1) >> SC_LG_TINY_MIN);
+		size_t max_ind = ((sz + (ZU(1) << SC_LG_TINY_MIN) - 1)
+				   >> SC_LG_TINY_MIN);
 		for (; dst_ind <= max_ind && dst_ind < dst_max; dst_ind++) {
 			sz_size2index_tab[dst_ind] = sc_ind;
 		}

From ac34afb4037d7e9e87efde2b8e913d87aae131da Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Mon, 8 Oct 2018 10:13:02 -0700
Subject: [PATCH 1202/2608] drop bump_empty_alloc option.  Size class lookup
 support used instead.

---
 include/jemalloc/internal/arena_inlines_b.h     |  1 -
 .../internal/jemalloc_internal_inlines_c.h      |  1 -
 src/jemalloc.c                                  | 17 +----------------
 3 files changed, 1 insertion(+), 18 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 8bf0a817..3d0121d5 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -134,7 +134,6 @@ JEMALLOC_ALWAYS_INLINE void *
 arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
     tcache_t *tcache, bool slow_path) {
 	assert(!tsdn_null(tsdn) || tcache == NULL);
-	assert(size != 0);
 
 	if (likely(tcache != NULL)) {
 		if (likely(size <= SC_SMALL_MAXCLASS)) {
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 9c5fec62..cdb10eb2 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -43,7 +43,6 @@ iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
     bool is_internal, arena_t *arena, bool slow_path) {
 	void *ret;
 
-	assert(size != 0);
 	assert(!is_internal || tcache == NULL);
 	assert(!is_internal || arena == NULL || arena_is_auto(arena));
 	if (!tsdn_null(tsdn) && tsd_reentrancy_level_get(tsdn_tsd(tsdn)) == 0) {
diff --git a/src/jemalloc.c b/src/jemalloc.c
index f1bec9ac..0636c83a 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1713,8 +1713,7 @@ typedef struct static_opts_s static_opts_t;
 struct static_opts_s {
 	/* Whether or not allocation size may overflow. */
 	bool may_overflow;
-	/* Whether or not allocations of size 0 should be treated as size 1. */
-	bool bump_empty_alloc;
+
 	/*
 	 * Whether to assert that allocations are not of size 0 (after any
 	 * bumping).
@@ -1756,7 +1755,6 @@ struct static_opts_s {
 JEMALLOC_ALWAYS_INLINE void
 static_opts_init(static_opts_t *static_opts) {
 	static_opts->may_overflow = false;
-	static_opts->bump_empty_alloc = false;
 	static_opts->assert_nonempty_alloc = false;
 	static_opts->null_out_result_on_error = false;
 	static_opts->set_errno_on_error = false;
@@ -1945,12 +1943,6 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 	}
 
 	/* Validate the user input. */
-	if (sopts->bump_empty_alloc) {
-		if (unlikely(size == 0)) {
-			size = 1;
-		}
-	}
-
 	if (sopts->assert_nonempty_alloc) {
 		assert (size != 0);
 	}
@@ -2178,7 +2170,6 @@ je_malloc(size_t size) {
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
 
-	sopts.bump_empty_alloc = true;
 	sopts.null_out_result_on_error = true;
 	sopts.set_errno_on_error = true;
 	sopts.oom_string = "<jemalloc>: Error in malloc(): out of memory\n";
@@ -2215,7 +2206,6 @@ je_posix_memalign(void **memptr, size_t alignment, size_t size) {
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
 
-	sopts.bump_empty_alloc = true;
 	sopts.min_alignment = sizeof(void *);
 	sopts.oom_string =
 	    "<jemalloc>: Error allocating aligned memory: out of memory\n";
@@ -2256,7 +2246,6 @@ je_aligned_alloc(size_t alignment, size_t size) {
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
 
-	sopts.bump_empty_alloc = true;
 	sopts.null_out_result_on_error = true;
 	sopts.set_errno_on_error = true;
 	sopts.min_alignment = 1;
@@ -2296,7 +2285,6 @@ je_calloc(size_t num, size_t size) {
 	dynamic_opts_init(&dopts);
 
 	sopts.may_overflow = true;
-	sopts.bump_empty_alloc = true;
 	sopts.null_out_result_on_error = true;
 	sopts.set_errno_on_error = true;
 	sopts.oom_string = "<jemalloc>: Error in calloc(): out of memory\n";
@@ -2539,7 +2527,6 @@ je_realloc(void *ptr, size_t arg_size) {
 		static_opts_init(&sopts);
 		dynamic_opts_init(&dopts);
 
-		sopts.bump_empty_alloc = true;
 		sopts.null_out_result_on_error = true;
 		sopts.set_errno_on_error = true;
 		sopts.oom_string =
@@ -2643,7 +2630,6 @@ je_memalign(size_t alignment, size_t size) {
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
 
-	sopts.bump_empty_alloc = true;
 	sopts.min_alignment = 1;
 	sopts.oom_string =
 	    "<jemalloc>: Error allocating aligned memory: out of memory\n";
@@ -2683,7 +2669,6 @@ je_valloc(size_t size) {
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
 
-	sopts.bump_empty_alloc = true;
 	sopts.null_out_result_on_error = true;
 	sopts.min_alignment = PAGE;
 	sopts.oom_string =

From 0ec656eb7117127602f295510de694083353f23e Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Tue, 16 Oct 2018 10:23:08 -0700
Subject: [PATCH 1203/2608] ticker: add ticker_trytick

For the fastpath, we want to tick, but undo the tick and jump to the
slowpath if ticker would fire.
---
 include/jemalloc/internal/ticker.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/include/jemalloc/internal/ticker.h b/include/jemalloc/internal/ticker.h
index 4b360470..52d0db4c 100644
--- a/include/jemalloc/internal/ticker.h
+++ b/include/jemalloc/internal/ticker.h
@@ -75,4 +75,17 @@ ticker_tick(ticker_t *ticker) {
 	return ticker_ticks(ticker, 1);
 }
 
+/* 
+ * Try to tick.  If ticker would fire, return true, but rely on
+ * slowpath to reset ticker.
+ */
+static inline bool
+ticker_trytick(ticker_t *ticker) {
+	--ticker->tick;
+	if (unlikely(ticker->tick < 0)) {
+		return true;
+	}
+	return false;
+}
+
 #endif /* JEMALLOC_INTERNAL_TICKER_H */

From 0f8313659e93379d930995ea2d2af0a079cc422e Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Wed, 10 Oct 2018 11:54:58 -0700
Subject: [PATCH 1204/2608] malloc: Add a fastpath

This diff adds a fastpath that assumes size <= SC_LOOKUP_MAXCLASS, and
that we hit tcache.  If either of these is false, we fall back to
the previous codepath (renamed 'malloc_default').

Crucially, we only tail call malloc_default, and with the same kind
and number of arguments, so that both clang and gcc tail-calling
will kick in - therefore malloc() gets treated as a leaf function,
and there are *no* caller-saved registers.   Previously malloc() contained
5 caller saved registers on x64, resulting in at least 10 extra
memory-movement instructions.

In microbenchmarks this results in up to ~10% improvement in malloc()
fastpath.  In real programs, this is a ~1% CPU and latency improvement
overall.
---
 src/jemalloc.c | 97 +++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 89 insertions(+), 8 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 0636c83a..f1f9e39f 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2152,15 +2152,9 @@ imalloc(static_opts_t *sopts, dynamic_opts_t *dopts) {
 		return imalloc_body(sopts, dopts, tsd);
 	}
 }
-/******************************************************************************/
-/*
- * Begin malloc(3)-compatible functions.
- */
 
-JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-void JEMALLOC_NOTHROW *
-JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)
-je_malloc(size_t size) {
+void *
+malloc_default(size_t size) {
 	void *ret;
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
@@ -2193,6 +2187,93 @@ je_malloc(size_t size) {
 	return ret;
 }
 
+/******************************************************************************/
+/*
+ * Begin malloc(3)-compatible functions.
+ */
+
+/*
+ * malloc() fastpath.
+ *
+ * Fastpath assumes size <= SC_LOOKUP_MAXCLASS, and that we hit
+ * tcache.  If either of these is false, we tail-call to the slowpath,
+ * malloc_default().  Tail-calling is used to avoid any caller-saved
+ * registers.
+ *
+ * fastpath supports ticker and profiling, both of which will also
+ * tail-call to the slowpath if they fire.
+ */
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+void JEMALLOC_NOTHROW *
+JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)
+je_malloc(size_t size) {
+	LOG("core.malloc.entry", "size: %zu", size);
+
+	if (tsd_get_allocates() && unlikely(!malloc_initialized())) {
+		return malloc_default(size);
+	}
+
+	tsd_t *tsd = tsd_get(false);
+	if (unlikely(!tsd || !tsd_fast(tsd) || (size > SC_LOOKUP_MAXCLASS))) {
+		return malloc_default(size);
+	}
+
+	tcache_t *tcache = tsd_tcachep_get(tsd);
+
+	if (unlikely(ticker_trytick(&tcache->gc_ticker))) {
+		return malloc_default(size);
+	}
+
+	szind_t ind = sz_size2index_lookup(size);
+	size_t usize;
+	if (config_stats || config_prof) {
+		usize = sz_index2size(ind);
+	}
+	/* Fast path relies on size being a bin. I.e. SC_LOOKUP_MAXCLASS < SC_SMALL_MAXCLASS */
+	assert(ind < SC_NBINS);
+	assert(size <= SC_SMALL_MAXCLASS);
+
+	if (config_prof) {
+		int64_t bytes_until_sample = tsd_bytes_until_sample_get(tsd);
+		bytes_until_sample -= usize;
+		tsd_bytes_until_sample_set(tsd, bytes_until_sample);
+
+		if (unlikely(bytes_until_sample < 0)) {
+			/* 
+			 * Avoid a prof_active check on the fastpath.
+			 * If prof_active is false, set bytes_until_sample to
+			 * a large value.  If prof_active is set to true,
+			 * bytes_until_sample will be reset.
+			 */
+			if (!prof_active) {
+				tsd_bytes_until_sample_set(tsd, SSIZE_MAX);
+			}
+			return malloc_default(size);
+		}
+	}
+
+	cache_bin_t *bin = tcache_small_bin_get(tcache, ind);
+	bool tcache_success;
+	void* ret = cache_bin_alloc_easy(bin, &tcache_success);
+
+	if (tcache_success) {
+		if (config_stats) {
+			*tsd_thread_allocatedp_get(tsd) += usize;
+			bin->tstats.nrequests++;
+		}
+		if (config_prof) {
+			tcache->prof_accumbytes += usize;
+		}
+
+		LOG("core.malloc.exit", "result: %p", ret);
+
+		/* Fastpath success */
+		return ret;
+	}
+
+	return malloc_default(size);
+}
+
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 JEMALLOC_ATTR(nonnull(1))
 je_posix_memalign(void **memptr, size_t alignment, size_t size) {

From 936bc2aa15504076f884ed97a51e169924fe4a89 Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Tue, 23 Oct 2018 08:12:46 -0700
Subject: [PATCH 1205/2608] prof: Fix memory regression

The diff 'refactor prof accum...' moved the bytes_until_sample
subtraction before the load of tdata.  If tdata is null,
tdata_get(true) will overwrite bytes_until_sample, but we
still sample the current allocation.   Instead, do the subtraction
and check logic again, to keep the previous behavior.

blame-rev: 0ac524308d3f636d1a4b5149fa7adf24cf426d9c
---
 include/jemalloc/internal/prof_inlines_b.h | 36 +++++++++++++++++-----
 1 file changed, 28 insertions(+), 8 deletions(-)

diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 085111f4..8358bffb 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -79,15 +79,10 @@ prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx,
 }
 
 JEMALLOC_ALWAYS_INLINE bool
-prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
-    prof_tdata_t **tdata_out) {
-	prof_tdata_t *tdata;
-	int64_t bytes_until_sample;
-
-	cassert(config_prof);
+prof_sample_check(tsd_t *tsd, size_t usize, bool update) {
 	ssize_t check = update ? 0 : usize;
 
-	bytes_until_sample = tsd_bytes_until_sample_get(tsd);
+	int64_t bytes_until_sample = tsd_bytes_until_sample_get(tsd);
 	if (update) {
 		bytes_until_sample -= usize;
 		if (tsd_nominal(tsd)) {
@@ -96,8 +91,24 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
 	}
 	if (likely(bytes_until_sample >= check)) {
 		return true;
-	} 
+	}
 
+	return false;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
+			 prof_tdata_t **tdata_out) {
+	prof_tdata_t *tdata;
+
+	cassert(config_prof);
+
+	/* Fastpath: no need to load tdata */
+	if (likely(prof_sample_check(tsd, usize, update))) {
+		return true;
+	}
+
+	bool booted = tsd_prof_tdata_get(tsd);
 	tdata = prof_tdata_get(tsd, true);
 	if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) {
 		tdata = NULL;
@@ -111,6 +122,15 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
 		return true;
 	}
 
+	/*
+	 * If this was the first creation of tdata, then
+	 * prof_tdata_get() reset bytes_until_sample, so decrement and
+	 * check it again
+	 */
+	if (!booted && prof_sample_check(tsd, usize, update)) {
+		return true;
+	}
+
 	if (tsd_reentrancy_level_get(tsd) > 0) {
 		return true;
 	}

From ceba1dde2774e4eae659a548263970cd9b74d319 Mon Sep 17 00:00:00 2001
From: Edward Tomasz Napierala <trasz@FreeBSD.org>
Date: Sat, 6 Oct 2018 16:43:07 +0100
Subject: [PATCH 1206/2608] Make use of pthread_set_name_np(3) on FreeBSD.

---
 include/jemalloc/internal/jemalloc_internal_decls.h | 3 +++
 src/background_thread.c                             | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
index be70df51..7d6053e2 100644
--- a/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -31,6 +31,9 @@
 #    include <sys/uio.h>
 #  endif
 #  include <pthread.h>
+#  ifdef __FreeBSD__
+#  include <pthread_np.h>
+#  endif
 #  include <signal.h>
 #  ifdef JEMALLOC_OS_UNFAIR_LOCK
 #    include <os/lock.h>
diff --git a/src/background_thread.c b/src/background_thread.c
index feed8564..24f67305 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -509,6 +509,8 @@ background_thread_entry(void *ind_arg) {
 	assert(thread_ind < max_background_threads);
 #ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
 	pthread_setname_np(pthread_self(), "jemalloc_bg_thd");
+#elif defined(__FreeBSD__)
+	pthread_set_name_np(pthread_self(), "jemalloc_bg_thd");
 #endif
 	if (opt_percpu_arena != percpu_arena_disabled) {
 		set_current_thread_affinity((int)thread_ind);

From be0749f59151ffecbdf7d9f82193350f018904dd Mon Sep 17 00:00:00 2001
From: Justin Hibbits <chmeeedalf@gmail.com>
Date: Tue, 23 Oct 2018 16:41:14 -0500
Subject: [PATCH 1207/2608] Restrict lwsync to powerpc64 only

Nearly all 32-bit powerpc hardware treats lwsync as sync, and some cores
(Freescale e500) trap lwsync as an illegal instruction, which then gets
emulated in the kernel.  To avoid unnecessary traps on the e500, use
sync on all 32-bit powerpc.  This pessimizes 32-bit software running on
64-bit hardware, but those numbers should be slim.
---
 include/jemalloc/internal/atomic_gcc_sync.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/atomic_gcc_sync.h b/include/jemalloc/internal/atomic_gcc_sync.h
index 06a0acf3..e02b7cbe 100644
--- a/include/jemalloc/internal/atomic_gcc_sync.h
+++ b/include/jemalloc/internal/atomic_gcc_sync.h
@@ -27,8 +27,10 @@ atomic_fence(atomic_memory_order_t mo) {
 	asm volatile("" ::: "memory");
 #  if defined(__i386__) || defined(__x86_64__)
 	/* This is implicit on x86. */
-#  elif defined(__ppc__)
+#  elif defined(__ppc64__)
 	asm volatile("lwsync");
+#  elif defined(__ppc__)
+	asm volatile("sync");
 #  elif defined(__sparc__) && defined(__arch64__)
 	if (mo == atomic_memory_order_acquire) {
 		asm volatile("membar #LoadLoad | #LoadStore");

From 50b473c8839f5408df179bdf6f2b3fd2cf5c3b2f Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 2 Nov 2018 14:01:45 -0700
Subject: [PATCH 1208/2608] Set commit properly for FreeBSD w/ overcommit.

When overcommit is enabled, commit needs to be set when doing mmap().  The
regression was introduced in f80c97e.
---
 src/pages.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/pages.c b/src/pages.c
index 479a89e5..9f3085cb 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -186,6 +186,10 @@ pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
 	 * touching existing mappings, and to mmap with specific alignment.
 	 */
 	{
+		if (os_overcommits) {
+			*commit = true;
+		}
+
 		int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
 		int flags = mmap_flags;
 

From 8dabf81df1b7db0fd16903abab889dfd61b4c07f Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 31 Oct 2018 14:54:53 -0700
Subject: [PATCH 1209/2608] Bypass extent_dalloc when retain is enabled.

When retain is enabled, the default dalloc hook does nothing (since we avoid
munmap).  But the overhead preparing the call is high, specifically the extent
de-register and re-register involve locking and extent / rtree modifications.
Bypass the call with retain in this diff.
---
 src/extent.c | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index 847e4b99..b787b21f 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1695,6 +1695,12 @@ extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
 	extent_dalloc_wrapper(tsdn, arena, &extent_hooks, extent);
 }
 
+static bool
+extent_may_dalloc(void) {
+	/* With retain enabled, the default dalloc always fails. */
+	return !opt_retain;
+}
+
 static bool
 extent_dalloc_default_impl(void *addr, size_t size) {
 	if (!have_dss || !extent_in_dss(addr)) {
@@ -1750,16 +1756,20 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	/*
-	 * Deregister first to avoid a race with other allocating threads, and
-	 * reregister if deallocation fails.
-	 */
-	extent_deregister(tsdn, extent);
-	if (!extent_dalloc_wrapper_try(tsdn, arena, r_extent_hooks, extent)) {
-		return;
+	/* Avoid calling the default extent_dalloc unless have to. */
+	if (*r_extent_hooks != &extent_hooks_default || extent_may_dalloc()) {
+		/*
+		 * Deregister first to avoid a race with other allocating
+		 * threads, and reregister if deallocation fails.
+		 */
+		extent_deregister(tsdn, extent);
+		if (!extent_dalloc_wrapper_try(tsdn, arena, r_extent_hooks,
+		    extent)) {
+			return;
+		}
+		extent_reregister(tsdn, extent);
 	}
 
-	extent_reregister(tsdn, extent);
 	if (*r_extent_hooks != &extent_hooks_default) {
 		extent_hook_pre_reentrancy(tsdn, arena);
 	}

From d66f97662879a1a0c61ee12ba4b760fa6f458eef Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 23 Oct 2018 13:50:42 -0700
Subject: [PATCH 1210/2608] Optimize large deallocation.

We eagerly coalesce large buffers when deallocating, however the previous logic
around this introduced extra lock overhead -- when coalescing we always lock the
neighbors even if they are active, while for active extents nothing can be done.

This commit checks if the neighbor extents are potentially active before
locking, and avoids locking if possible.  This speeds up large_dalloc by ~20%.
It also fixes some undesired behavior: we could stop coalescing because a small
buffer was merged, while a large neighbor was ignored on the other side.
---
 src/extent.c | 58 +++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 41 insertions(+), 17 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index b787b21f..ab712153 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -134,13 +134,16 @@ typedef enum {
 
 static lock_result_t
 extent_rtree_leaf_elm_try_lock(tsdn_t *tsdn, rtree_leaf_elm_t *elm,
-    extent_t **result) {
+    extent_t **result, bool inactive_only) {
 	extent_t *extent1 = rtree_leaf_elm_extent_read(tsdn, &extents_rtree,
 	    elm, true);
 
-	if (extent1 == NULL) {
+	/* Slab implies active extents and should be skipped. */
+	if (extent1 == NULL || (inactive_only && rtree_leaf_elm_slab_read(tsdn,
+	    &extents_rtree, elm, true))) {
 		return lock_result_no_extent;
 	}
+
 	/*
 	 * It's possible that the extent changed out from under us, and with it
 	 * the leaf->extent mapping.  We have to recheck while holding the lock.
@@ -163,7 +166,8 @@ extent_rtree_leaf_elm_try_lock(tsdn_t *tsdn, rtree_leaf_elm_t *elm,
  * address, and NULL otherwise.
  */
 static extent_t *
-extent_lock_from_addr(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, void *addr) {
+extent_lock_from_addr(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, void *addr,
+    bool inactive_only) {
 	extent_t *ret = NULL;
 	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, &extents_rtree,
 	    rtree_ctx, (uintptr_t)addr, false, false);
@@ -172,7 +176,8 @@ extent_lock_from_addr(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, void *addr) {
 	}
 	lock_result_t lock_result;
 	do {
-		lock_result = extent_rtree_leaf_elm_try_lock(tsdn, elm, &ret);
+		lock_result = extent_rtree_leaf_elm_try_lock(tsdn, elm, &ret,
+		    inactive_only);
 	} while (lock_result == lock_result_failure);
 	return ret;
 }
@@ -917,7 +922,8 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 	extent_t *extent;
 	if (new_addr != NULL) {
-		extent = extent_lock_from_addr(tsdn, rtree_ctx, new_addr);
+		extent = extent_lock_from_addr(tsdn, rtree_ctx, new_addr,
+		    false);
 		if (extent != NULL) {
 			/*
 			 * We might null-out extent to report an error, but we
@@ -1088,8 +1094,8 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
 			extent_deregister_no_gdump_sub(tsdn, to_leak);
 			extents_leak(tsdn, arena, r_extent_hooks, extents,
 			    to_leak, growing_retained);
-			assert(extent_lock_from_addr(tsdn, rtree_ctx, leak)
-			    == NULL);
+			assert(extent_lock_from_addr(tsdn, rtree_ctx, leak,
+			    false) == NULL);
 		}
 		return NULL;
 	}
@@ -1567,9 +1573,15 @@ extent_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 }
 
 static extent_t *
-extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
+extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
-    extent_t *extent, bool *coalesced, bool growing_retained) {
+    extent_t *extent, bool *coalesced, bool growing_retained,
+    bool inactive_only) {
+	/*
+	 * We avoid checking / locking inactive neighbors for large size
+	 * classes, since they are eagerly coalesced on deallocation which can
+	 * cause lock contention.
+	 */
 	/*
 	 * Continue attempting to coalesce until failure, to protect against
 	 * races with other threads that are thwarted by this one.
@@ -1580,7 +1592,7 @@ extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
 
 		/* Try to coalesce forward. */
 		extent_t *next = extent_lock_from_addr(tsdn, rtree_ctx,
-		    extent_past_get(extent));
+		    extent_past_get(extent), inactive_only);
 		if (next != NULL) {
 			/*
 			 * extents->mtx only protects against races for
@@ -1606,7 +1618,7 @@ extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
 
 		/* Try to coalesce backward. */
 		extent_t *prev = extent_lock_from_addr(tsdn, rtree_ctx,
-		    extent_before_get(extent));
+		    extent_before_get(extent), inactive_only);
 		if (prev != NULL) {
 			bool can_coalesce = extent_can_coalesce(arena, extents,
 			    extent, prev);
@@ -1632,6 +1644,22 @@ extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
 	return extent;
 }
 
+static extent_t *
+extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
+    extent_t *extent, bool *coalesced, bool growing_retained) {
+	return extent_try_coalesce_impl(tsdn, arena, r_extent_hooks, rtree_ctx,
+	    extents, extent, coalesced, growing_retained, false);
+}
+
+static extent_t *
+extent_try_coalesce_large(tsdn_t *tsdn, arena_t *arena,
+    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
+    extent_t *extent, bool *coalesced, bool growing_retained) {
+	return extent_try_coalesce_impl(tsdn, arena, r_extent_hooks, rtree_ctx,
+	    extents, extent, coalesced, growing_retained, true);
+}
+
 /*
  * Does the metadata management portions of putting an unused extent into the
  * given extents_t (coalesces, deregisters slab interiors, the heap operations).
@@ -1664,16 +1692,12 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	} else if (extent_size_get(extent) >= SC_LARGE_MINCLASS) {
 		/* Always coalesce large extents eagerly. */
 		bool coalesced;
-		size_t prev_size;
 		do {
-			prev_size = extent_size_get(extent);
 			assert(extent_state_get(extent) == extent_state_active);
-			extent = extent_try_coalesce(tsdn, arena,
+			extent = extent_try_coalesce_large(tsdn, arena,
 			    r_extent_hooks, rtree_ctx, extents, extent,
 			    &coalesced, growing_retained);
-		} while (coalesced &&
-		    extent_size_get(extent)
-		    >= prev_size + SC_LARGE_MINCLASS);
+		} while (coalesced);
 	}
 	extent_deactivate_locked(tsdn, arena, extents, extent);
 

From 7ee0b6cc37ecbecf8f53ba46326258275053ca50 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 8 Nov 2018 12:24:38 -0800
Subject: [PATCH 1211/2608] Properly trigger decay on tcache destory.

When destroying tcache, decay may not be triggered since tsd is non-nominal.
Explicitly decay to avoid pathological cases.
---
 src/tcache.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/src/tcache.c b/src/tcache.c
index 7346df8c..bc9e435d 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -497,6 +497,7 @@ tcache_flush(tsd_t *tsd) {
 static void
 tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
 	tcache_flush_cache(tsd, tcache);
+	arena_t *arena = tcache->arena;
 	tcache_arena_dissociate(tsd_tsdn(tsd), tcache);
 
 	if (tsd_tcache) {
@@ -509,6 +510,23 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
 		/* Release both the tcache struct and avail array. */
 		idalloctm(tsd_tsdn(tsd), tcache, NULL, NULL, true, true);
 	}
+
+	/*
+	 * The deallocation and tcache flush above may not trigger decay since
+	 * we are on the tcache shutdown path (potentially with non-nominal
+	 * tsd).  Manually trigger decay to avoid pathological cases.  Also
+	 * include arena 0 because the tcache array is allocated from it.
+	 */
+	arena_decay(tsd_tsdn(tsd), arena_get(tsd_tsdn(tsd), 0, false),
+	    false, false);
+
+	unsigned nthreads = arena_nthreads_get(arena, false);
+	if (nthreads == 0) {
+		/* Force purging when no threads assigned to the arena anymore. */
+		arena_decay(tsd_tsdn(tsd), arena, false, true);
+	} else {
+		arena_decay(tsd_tsdn(tsd), arena, false, false);
+	}
 }
 
 /* For auto tcache (embedded in TSD) only. */

From cd2931ad9bbd78208565716ab102e86d858c2fff Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 8 Nov 2018 16:20:48 -0800
Subject: [PATCH 1212/2608] Fix tcaches_flush.

The regression was introduced in 3a1363b.
---
 src/tcache.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tcache.c b/src/tcache.c
index bc9e435d..7859da94 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -655,7 +655,7 @@ tcaches_flush(tsd_t *tsd, unsigned ind) {
 	tcache_t *tcache = tcaches_elm_remove(tsd, &tcaches[ind]);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
 	if (tcache != NULL) {
-		tcache_destroy(tsd, tcache, false);
+		tcache_flush_cache(tsd, tcache);
 	}
 }
 

From a4c6b9ae011628d012dd8eaab39fb60aa595b922 Mon Sep 17 00:00:00 2001
From: Edward Tomasz Napierala <trasz@FreeBSD.org>
Date: Thu, 25 Oct 2018 16:06:42 +0100
Subject: [PATCH 1213/2608] Restore a FreeBSD-specific getpagesize(3)
 optimization.

It was removed in 0771ff2cea6dc18fcd3f6bf452b4224a4e17ae38.
Add a comment explaining its purpose.
---
 src/pages.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/pages.c b/src/pages.c
index 9f3085cb..13de27a0 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -419,6 +419,12 @@ os_page_detect(void) {
 	SYSTEM_INFO si;
 	GetSystemInfo(&si);
 	return si.dwPageSize;
+#elif defined(__FreeBSD__)
+	/*
+	 * This returns the value obtained from
+	 * the auxv vector, avoiding a syscall.
+	 */
+	return getpagesize();
 #else
 	long result = sysconf(_SC_PAGESIZE);
 	if (result == -1) {

From 5e795297b33f25329a034fd898ee7d80c57b9a8f Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Thu, 18 Oct 2018 12:51:54 -0700
Subject: [PATCH 1214/2608] rtree: add rtree_szind_slab_read_fast

For a free fastpath, we want something that will not make additional
calls.  Assume most free() calls will hit the L1 cache, and use
a custom rtree function for this.

Additionally, roll the ptr=NULL check in to the rtree cache check.
---
 include/jemalloc/internal/rtree.h | 36 +++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 8564965f..16ccbebe 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -452,6 +452,42 @@ rtree_extent_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	return false;
 }
 
+/*
+ * Try to read szind_slab from the L1 cache.  Returns true on a hit,
+ * and fills in r_szind and r_slab.  Otherwise returns false.
+ *
+ * Key is allowed to be NULL in order to save an extra branch on the
+ * fastpath.  returns false in this case.
+ */
+JEMALLOC_ALWAYS_INLINE bool
+rtree_szind_slab_read_fast(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+			    uintptr_t key, szind_t *r_szind, bool *r_slab) {
+	rtree_leaf_elm_t *elm;
+
+	size_t slot = rtree_cache_direct_map(key);
+	uintptr_t leafkey = rtree_leafkey(key);
+	assert(leafkey != RTREE_LEAFKEY_INVALID);
+
+	if (likely(rtree_ctx->cache[slot].leafkey == leafkey)) {
+		rtree_leaf_elm_t *leaf = rtree_ctx->cache[slot].leaf;
+		assert(leaf != NULL);
+		uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1);
+		elm = &leaf[subkey];
+
+#ifdef RTREE_LEAF_COMPACT
+		uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree,
+							  elm, true);
+		*r_szind = rtree_leaf_elm_bits_szind_get(bits);
+		*r_slab = rtree_leaf_elm_bits_slab_get(bits);
+#else
+		*r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, true);
+		*r_slab = rtree_leaf_elm_slab_read(tsdn, rtree, elm, true);
+#endif
+		return true;
+	} else {
+		return false;
+	}
+}
 JEMALLOC_ALWAYS_INLINE bool
 rtree_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key, bool dependent, szind_t *r_szind, bool *r_slab) {

From e2ab215324d7d19e37f4be87beb7a179528a300f Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Thu, 18 Oct 2018 13:13:57 -0700
Subject: [PATCH 1215/2608] refactor tcache_dalloc_small

Add a cache_bin_dalloc_easy (to match the alloc_easy function),
and use it in tcache_dalloc_small.  It will also be used in the
new free fastpath.
---
 include/jemalloc/internal/cache_bin.h      | 16 ++++++++++++++--
 include/jemalloc/internal/tcache_inlines.h |  7 +++----
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 40d942e5..d14556a3 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -90,7 +90,7 @@ cache_bin_alloc_easy(cache_bin_t *bin, bool *success) {
 
 	bin->ncached--;
 
-	/* 
+	/*
 	 * Check for both bin->ncached == 0 and ncached < low_water
 	 * in a single branch.
 	 */
@@ -102,7 +102,7 @@ cache_bin_alloc_easy(cache_bin_t *bin, bool *success) {
 			return NULL;
 		}
 	}
-        
+
 	/*
 	 * success (instead of ret) should be checked upon the return of this
 	 * function.  We avoid checking (ret == NULL) because there is never a
@@ -116,4 +116,16 @@ cache_bin_alloc_easy(cache_bin_t *bin, bool *success) {
 	return ret;
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+cache_bin_dalloc_easy(cache_bin_t *bin, cache_bin_info_t *bin_info, void *ptr) {
+	if (unlikely(bin->ncached == bin_info->ncached_max)) {
+		return false;
+	}
+	assert(bin->ncached < bin_info->ncached_max);
+	bin->ncached++;
+	*(bin->avail - bin->ncached) = ptr;
+
+	return true;
+}
+
 #endif /* JEMALLOC_INTERNAL_CACHE_BIN_H */
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 7c956468..c2c3ac37 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -175,13 +175,12 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 
 	bin = tcache_small_bin_get(tcache, binind);
 	bin_info = &tcache_bin_info[binind];
-	if (unlikely(bin->ncached == bin_info->ncached_max)) {
+	if (unlikely(!cache_bin_dalloc_easy(bin, bin_info, ptr))) {
 		tcache_bin_flush_small(tsd, tcache, bin, binind,
 		    (bin_info->ncached_max >> 1));
+		bool ret = cache_bin_dalloc_easy(bin, bin_info, ptr);
+		assert(ret);
 	}
-	assert(bin->ncached < bin_info->ncached_max);
-	bin->ncached++;
-	*(bin->avail - bin->ncached) = ptr;
 
 	tcache_event(tsd, tcache);
 }

From 794e29c0abbd77624d1e5599313ebd77bdc17ccc Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Thu, 18 Oct 2018 13:14:04 -0700
Subject: [PATCH 1216/2608] Add a free() and sdallocx(where flags=0) fastpath

Add unsized and sized deallocation fastpaths.  Similar to the malloc()
fastpath, this removes all frame manipulation for the majority of
free() calls.  The performance advantages here are less than that
of the malloc() fastpath, but from prod tests seems to still be half
a percent or so of improvement.

Stats and sampling a both supported (sdallocx needs a sampling check,
for rtree lookups slab will only be set for unsampled objects).

We don't support flush, any flush requests go to the slowpath.
---
 src/jemalloc.c | 97 ++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 86 insertions(+), 11 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index f1f9e39f..68a21f9d 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2147,12 +2147,13 @@ imalloc(static_opts_t *sopts, dynamic_opts_t *dopts) {
 		if (!tsd_get_allocates() && !imalloc_init_check(sopts, dopts)) {
 			return ENOMEM;
 		}
-          
+
 		sopts->slow = true;
 		return imalloc_body(sopts, dopts, tsd);
 	}
 }
 
+JEMALLOC_NOINLINE
 void *
 malloc_default(size_t size) {
 	void *ret;
@@ -2239,7 +2240,7 @@ je_malloc(size_t size) {
 		tsd_bytes_until_sample_set(tsd, bytes_until_sample);
 
 		if (unlikely(bytes_until_sample < 0)) {
-			/* 
+			/*
 			 * Avoid a prof_active check on the fastpath.
 			 * If prof_active is false, set bytes_until_sample to
 			 * a large value.  If prof_active is set to true,
@@ -2650,10 +2651,9 @@ je_realloc(void *ptr, size_t arg_size) {
 	return ret;
 }
 
-JEMALLOC_EXPORT void JEMALLOC_NOTHROW
-je_free(void *ptr) {
-	LOG("core.free.entry", "ptr: %p", ptr);
-
+JEMALLOC_NOINLINE
+void
+free_default(void *ptr) {
 	UTRACE(ptr, 0, 0);
 	if (likely(ptr != NULL)) {
 		/*
@@ -2685,6 +2685,73 @@ je_free(void *ptr) {
 		}
 		check_entry_exit_locking(tsd_tsdn(tsd));
 	}
+}
+
+JEMALLOC_ALWAYS_INLINE
+bool free_fastpath(void *ptr, size_t size, bool size_hint) {
+	tsd_t *tsd = tsd_get(false);
+	if (unlikely(!tsd || !tsd_fast(tsd))) {
+		return false;
+	}
+
+	tcache_t *tcache = tsd_tcachep_get(tsd);
+
+	alloc_ctx_t alloc_ctx;
+	/* 
+	 * If !config_cache_oblivious, we can check PAGE alignment to
+	 * detect sampled objects.  Otherwise addresses are
+	 * randomized, and we have to look it up in the rtree anyway.
+	 * See also isfree().
+	 */
+	if (!size_hint || config_cache_oblivious) {
+		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
+		bool res = rtree_szind_slab_read_fast(tsd_tsdn(tsd), &extents_rtree,
+						      rtree_ctx, (uintptr_t)ptr,
+						      &alloc_ctx.szind, &alloc_ctx.slab);
+		assert(alloc_ctx.szind != SC_NSIZES);
+
+		/* Note: profiled objects will have alloc_ctx.slab set */
+		if (!res || !alloc_ctx.slab) {
+			return false;
+		}
+	} else {
+		/*
+		 * Check for both sizes that are too large, and for sampled objects.
+		 * Sampled objects are always page-aligned.  The sampled object check
+		 * will also check for null ptr.
+		 */
+		if (size > SC_LOOKUP_MAXCLASS || (((uintptr_t)ptr & PAGE_MASK) == 0)) {
+			return false;
+		}
+		alloc_ctx.szind = sz_size2index_lookup(size);
+	}
+
+	if (unlikely(ticker_trytick(&tcache->gc_ticker))) {
+		return false;
+	}
+
+	cache_bin_t *bin = tcache_small_bin_get(tcache, alloc_ctx.szind);
+	cache_bin_info_t *bin_info = &tcache_bin_info[alloc_ctx.szind];
+	if (!cache_bin_dalloc_easy(bin, bin_info, ptr)) {
+		return false;
+	}
+
+	if (config_stats) {
+		size_t usize = sz_index2size(alloc_ctx.szind);
+		*tsd_thread_deallocatedp_get(tsd) += usize;
+	}
+
+	return true;
+}
+
+JEMALLOC_EXPORT void JEMALLOC_NOTHROW
+je_free(void *ptr) {
+	LOG("core.free.entry", "ptr: %p", ptr);
+
+	if (!free_fastpath(ptr, 0, false)) {
+		free_default(ptr);
+	}
+
 	LOG("core.free.exit", "");
 }
 
@@ -3362,14 +3429,11 @@ inallocx(tsdn_t *tsdn, size_t size, int flags) {
 	return usize;
 }
 
-JEMALLOC_EXPORT void JEMALLOC_NOTHROW
-je_sdallocx(void *ptr, size_t size, int flags) {
+JEMALLOC_NOINLINE void
+sdallocx_default(void *ptr, size_t size, int flags) {
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
-	LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr,
-	    size, flags);
-
 	tsd_t *tsd = tsd_fetch();
 	bool fast = tsd_fast(tsd);
 	size_t usize = inallocx(tsd_tsdn(tsd), size, flags);
@@ -3409,6 +3473,17 @@ je_sdallocx(void *ptr, size_t size, int flags) {
 	}
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
+}
+
+JEMALLOC_EXPORT void JEMALLOC_NOTHROW
+je_sdallocx(void *ptr, size_t size, int flags) {
+	LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr,
+		size, flags);
+
+	if (flags !=0 || !free_fastpath(ptr, size, true)) {
+		sdallocx_default(ptr, size, flags);
+	}
+
 	LOG("core.sdallocx.exit", "");
 }
 

From 1f561157042a779be12a2159a385de0416133f6b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 9 Nov 2018 14:45:06 -0800
Subject: [PATCH 1217/2608] Fix tcache_flush (follow up cd2931a).

Also catch invalid tcache id.
---
 include/jemalloc/internal/tcache_inlines.h |  3 +++
 include/jemalloc/internal/tcache_types.h   |  3 +++
 src/tcache.c                               | 19 ++++++++++++++-----
 3 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index c2c3ac37..5eca20e8 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -216,6 +216,9 @@ JEMALLOC_ALWAYS_INLINE tcache_t *
 tcaches_get(tsd_t *tsd, unsigned ind) {
 	tcaches_t *elm = &tcaches[ind];
 	if (unlikely(elm->tcache == NULL)) {
+		malloc_printf("<jemalloc>: invalid tcache id (%u).\n", ind);
+		abort();
+	} else if (unlikely(elm->tcache == TCACHES_ELM_NEED_REINIT)) {
 		elm->tcache = tcache_create_explicit(tsd);
 	}
 	return elm->tcache;
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index f953b8c8..dce69382 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -53,4 +53,7 @@ typedef struct tcaches_s tcaches_t;
 /* Used in TSD static initializer only. Will be initialized to opt_tcache. */
 #define TCACHE_ENABLED_ZERO_INITIALIZER false
 
+/* Used for explicit tcache only. Means flushed but not destroyed. */
+#define TCACHES_ELM_NEED_REINIT ((tcache_t *)(uintptr_t)1)
+
 #endif /* JEMALLOC_INTERNAL_TCACHE_TYPES_H */
diff --git a/src/tcache.c b/src/tcache.c
index 7859da94..ee632f6f 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -638,24 +638,33 @@ label_return:
 }
 
 static tcache_t *
-tcaches_elm_remove(tsd_t *tsd, tcaches_t *elm) {
+tcaches_elm_remove(tsd_t *tsd, tcaches_t *elm, bool allow_reinit) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tcaches_mtx);
 
 	if (elm->tcache == NULL) {
 		return NULL;
 	}
 	tcache_t *tcache = elm->tcache;
-	elm->tcache = NULL;
+	if (allow_reinit) {
+		elm->tcache = TCACHES_ELM_NEED_REINIT;
+	} else {
+		elm->tcache = NULL;
+	}
+
+	if (tcache == TCACHES_ELM_NEED_REINIT) {
+		return NULL;
+	}
 	return tcache;
 }
 
 void
 tcaches_flush(tsd_t *tsd, unsigned ind) {
 	malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
-	tcache_t *tcache = tcaches_elm_remove(tsd, &tcaches[ind]);
+	tcache_t *tcache = tcaches_elm_remove(tsd, &tcaches[ind], true);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
 	if (tcache != NULL) {
-		tcache_flush_cache(tsd, tcache);
+		/* Destroy the tcache; recreate in tcaches_get() if needed. */
+		tcache_destroy(tsd, tcache, false);
 	}
 }
 
@@ -663,7 +672,7 @@ void
 tcaches_destroy(tsd_t *tsd, unsigned ind) {
 	malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
 	tcaches_t *elm = &tcaches[ind];
-	tcache_t *tcache = tcaches_elm_remove(tsd, elm);
+	tcache_t *tcache = tcaches_elm_remove(tsd, elm, false);
 	elm->next = tcaches_avail;
 	tcaches_avail = elm;
 	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);

From 57553c3b1a5592dc4c03f3c6831d9b794e523865 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 12 Nov 2018 11:15:03 -0800
Subject: [PATCH 1218/2608] Avoid touching all pages in extent_recycle for
 debug build.

We may have a large number of pages with *zero set (since they are populated on
demand).  Only check the first page to avoid paging in all of them.
---
 src/extent.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index ab712153..9605dacc 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1162,14 +1162,15 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 
 	if (*zero) {
 		void *addr = extent_base_get(extent);
-		size_t size = extent_size_get(extent);
 		if (!extent_zeroed_get(extent)) {
+			size_t size = extent_size_get(extent);
 			if (pages_purge_forced(addr, size)) {
 				memset(addr, 0, size);
 			}
 		} else if (config_debug) {
 			size_t *p = (size_t *)(uintptr_t)addr;
-			for (size_t i = 0; i < size / sizeof(size_t); i++) {
+			/* Check the first page only. */
+			for (size_t i = 0; i < PAGE / sizeof(size_t); i++) {
 				assert(p[i] == 0);
 			}
 		}

From 4b82872ebf5e8b701e8b37c6d1297ceb88405df8 Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Thu, 18 Oct 2018 09:49:45 -0700
Subject: [PATCH 1219/2608] arena: Refactor tcache_fill to batch fill from slab

Refactor tcache_fill, introducing a new function arena_slab_reg_alloc_batch,
which will fill multiple pointers from a slab.

There should be no functional changes here, but allows future optimization
on reg_alloc_batch.
---
 src/arena.c | 60 ++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 46 insertions(+), 14 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 29f447bb..fc2a7dff 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -268,6 +268,27 @@ arena_slab_reg_alloc(extent_t *slab, const bin_info_t *bin_info) {
 	return ret;
 }
 
+static void
+arena_slab_reg_alloc_batch(extent_t *slab, const bin_info_t *bin_info,
+			   unsigned cnt, void** ptrs) {
+	arena_slab_data_t *slab_data = extent_slab_data_get(slab);
+
+	assert(extent_nfree_get(slab) > 0);
+	assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info));
+
+	size_t regind = 0;
+	for (unsigned i = 0; i < cnt; i++) {
+		void *ret;
+
+		regind = bitmap_sfu(slab_data->bitmap, &bin_info->bitmap_info);
+		ret = (void *)((uintptr_t)extent_addr_get(slab) +
+		    (uintptr_t)(bin_info->reg_size * regind));
+		extent_nfree_dec(slab);
+
+		*(ptrs + i) = ret;
+	}
+}
+
 #ifndef JEMALLOC_JET
 static
 #endif
@@ -1286,7 +1307,7 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 void
 arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes) {
-	unsigned i, nfill;
+	unsigned i, nfill, cnt;
 	bin_t *bin;
 
 	assert(tbin->ncached == 0);
@@ -1297,32 +1318,43 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 	bin = &arena->bins[binind];
 	malloc_mutex_lock(tsdn, &bin->lock);
 	for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >>
-	    tcache->lg_fill_div[binind]); i < nfill; i++) {
+	    tcache->lg_fill_div[binind]); i < nfill; i += cnt) {
 		extent_t *slab;
-		void *ptr;
 		if ((slab = bin->slabcur) != NULL && extent_nfree_get(slab) >
 		    0) {
-			ptr = arena_slab_reg_alloc(slab, &bin_infos[binind]);
+			unsigned tofill = nfill - i;
+			cnt = tofill < extent_nfree_get(slab) ?
+				tofill : extent_nfree_get(slab);
+			arena_slab_reg_alloc_batch(
+			   slab, &bin_infos[binind], cnt,
+			   tbin->avail - nfill + i);
 		} else {
-			ptr = arena_bin_malloc_hard(tsdn, arena, bin, binind);
-		}
-		if (ptr == NULL) {
+			cnt = 1;
+			void *ptr = arena_bin_malloc_hard(tsdn, arena, bin,
+							  binind);
 			/*
 			 * OOM.  tbin->avail isn't yet filled down to its first
 			 * element, so the successful allocations (if any) must
 			 * be moved just before tbin->avail before bailing out.
 			 */
-			if (i > 0) {
-				memmove(tbin->avail - i, tbin->avail - nfill,
-				    i * sizeof(void *));
+			if (ptr == NULL) {
+				if (i > 0) {
+					memmove(tbin->avail - i,
+						tbin->avail - nfill,
+						i * sizeof(void *));
+				}
+				break;
 			}
-			break;
+			/* Insert such that low regions get used first. */
+			*(tbin->avail - nfill + i) = ptr;
 		}
 		if (config_fill && unlikely(opt_junk_alloc)) {
-			arena_alloc_junk_small(ptr, &bin_infos[binind], true);
+			for (unsigned j = 0; j < cnt; j++) {
+				void* ptr = *(tbin->avail - nfill + i + j);
+				arena_alloc_junk_small(ptr, &bin_infos[binind],
+						       true);
+			}
 		}
-		/* Insert such that low regions get used first. */
-		*(tbin->avail - nfill + i) = ptr;
 	}
 	if (config_stats) {
 		bin->stats.nmalloc += i;

From 17aa470760cefb3057be746f7022196035f0cfbe Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Mon, 29 Oct 2018 15:09:21 -0700
Subject: [PATCH 1220/2608] add extent_nfree_sub

---
 include/jemalloc/internal/extent_inlines.h | 6 ++++++
 src/arena.c                                | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index 145fa2d6..c931fd58 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -264,6 +264,12 @@ extent_nfree_dec(extent_t *extent) {
 	extent->e_bits -= ((uint64_t)1U << EXTENT_BITS_NFREE_SHIFT);
 }
 
+static inline void
+extent_nfree_sub(extent_t *extent, uint64_t n) {
+	assert(extent_slab_get(extent));
+	extent->e_bits -= (n << EXTENT_BITS_NFREE_SHIFT);
+}
+
 static inline void
 extent_sn_set(extent_t *extent, size_t sn) {
 	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SN_MASK) |
diff --git a/src/arena.c b/src/arena.c
index fc2a7dff..841f2950 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -283,10 +283,10 @@ arena_slab_reg_alloc_batch(extent_t *slab, const bin_info_t *bin_info,
 		regind = bitmap_sfu(slab_data->bitmap, &bin_info->bitmap_info);
 		ret = (void *)((uintptr_t)extent_addr_get(slab) +
 		    (uintptr_t)(bin_info->reg_size * regind));
-		extent_nfree_dec(slab);
 
 		*(ptrs + i) = ret;
 	}
+	extent_nfree_sub(slab, cnt);
 }
 
 #ifndef JEMALLOC_JET

From 13c237c7ef5baa63c820539e0cfef4c4c5c74ea2 Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Mon, 29 Oct 2018 16:01:09 -0700
Subject: [PATCH 1221/2608] Add a fastpath for arena_slab_reg_alloc_batch

Also adds a configure.ac check for __builtin_popcount, which is used
in the new fastpath.
---
 configure.ac                                  | 17 ++++++-
 include/jemalloc/internal/bit_util.h          | 19 ++++++++
 .../internal/jemalloc_internal_defs.h.in      |  6 +++
 src/arena.c                                   | 47 +++++++++++++++----
 4 files changed, 78 insertions(+), 11 deletions(-)

diff --git a/configure.ac b/configure.ac
index e27ea912..5cfe9af3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1429,6 +1429,21 @@ else
   fi
 fi
 
+JE_COMPILABLE([a program using __builtin_popcountl], [
+#include <stdio.h>
+#include <strings.h>
+#include <string.h>
+], [
+	{
+		int rv = __builtin_popcountl(0x08);
+		printf("%d\n", rv);
+	}
+], [je_cv_gcc_builtin_popcountl])
+if test "x${je_cv_gcc_builtin_popcountl}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_INTERNAL_POPCOUNT], [__builtin_popcount])
+  AC_DEFINE([JEMALLOC_INTERNAL_POPCOUNTL], [__builtin_popcountl])
+fi
+
 AC_ARG_WITH([lg_quantum],
   [AS_HELP_STRING([--with-lg-quantum=<lg-quantum>],
    [Base 2 log of minimum allocation alignment])],
@@ -1901,7 +1916,7 @@ if test "x${je_cv_madvise}" = "xyes" ; then
   if test "x${je_cv_madv_dontdump}" = "xyes" ; then
     AC_DEFINE([JEMALLOC_MADVISE_DONTDUMP], [ ])
   fi
- 
+
   dnl Check for madvise(..., MADV_[NO]HUGEPAGE).
   JE_COMPILABLE([madvise(..., MADV_[[NO]]HUGEPAGE)], [
 #include <sys/mman.h>
diff --git a/include/jemalloc/internal/bit_util.h b/include/jemalloc/internal/bit_util.h
index 8c59c39e..c045eb86 100644
--- a/include/jemalloc/internal/bit_util.h
+++ b/include/jemalloc/internal/bit_util.h
@@ -27,6 +27,25 @@ ffs_u(unsigned bitmap) {
 	return JEMALLOC_INTERNAL_FFS(bitmap);
 }
 
+#ifdef JEMALLOC_INTERNAL_POPCOUNTL
+BIT_UTIL_INLINE unsigned
+popcount_lu(unsigned long bitmap) {
+  return JEMALLOC_INTERNAL_POPCOUNTL(bitmap);
+}
+#endif
+
+/*
+ * Clears first unset bit in bitmap, and returns
+ * place of bit.  bitmap *must not* be 0.
+ */
+
+BIT_UTIL_INLINE size_t
+cfs_lu(unsigned long* bitmap) {
+	size_t bit = ffs_lu(*bitmap) - 1;
+	*bitmap ^= ZU(1) << bit;
+	return bit;
+}
+
 BIT_UTIL_INLINE unsigned
 ffs_zu(size_t bitmap) {
 #if LG_SIZEOF_PTR == LG_SIZEOF_INT
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index c1eb8edc..3eac2754 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -236,6 +236,12 @@
 #undef JEMALLOC_INTERNAL_FFSL
 #undef JEMALLOC_INTERNAL_FFS
 
+/*
+ * popcount*() functions to use for bitmapping.
+ */
+#undef JEMALLOC_INTERNAL_POPCOUNTL
+#undef JEMALLOC_INTERNAL_POPCOUNT
+
 /*
  * If defined, explicitly attempt to more uniformly distribute large allocation
  * pointer alignments across all cache indices.
diff --git a/src/arena.c b/src/arena.c
index 841f2950..5fc90c54 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -273,19 +273,46 @@ arena_slab_reg_alloc_batch(extent_t *slab, const bin_info_t *bin_info,
 			   unsigned cnt, void** ptrs) {
 	arena_slab_data_t *slab_data = extent_slab_data_get(slab);
 
-	assert(extent_nfree_get(slab) > 0);
+	assert(extent_nfree_get(slab) >= cnt);
 	assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info));
 
-	size_t regind = 0;
+#if (! defined JEMALLOC_INTERNAL_POPCOUNTL) || (defined BITMAP_USE_TREE)
 	for (unsigned i = 0; i < cnt; i++) {
-		void *ret;
-
-		regind = bitmap_sfu(slab_data->bitmap, &bin_info->bitmap_info);
-		ret = (void *)((uintptr_t)extent_addr_get(slab) +
+		size_t regind = bitmap_sfu(slab_data->bitmap,
+					   &bin_info->bitmap_info);
+		*(ptrs + i) = (void *)((uintptr_t)extent_addr_get(slab) +
 		    (uintptr_t)(bin_info->reg_size * regind));
-
-		*(ptrs + i) = ret;
 	}
+#else
+	unsigned group = 0;
+	bitmap_t g = slab_data->bitmap[group];
+	unsigned i = 0;
+	while (i < cnt) {
+		while (g == 0) {
+			g = slab_data->bitmap[++group];
+		}
+		size_t shift = group << LG_BITMAP_GROUP_NBITS;
+		size_t pop = popcount_lu(g);
+		if (pop > (cnt - i)) {
+			pop = cnt - i;
+		}
+
+		/*
+		 * Load from memory locations only once, outside the
+		 * hot loop below.
+		 */
+		uintptr_t base = (uintptr_t)extent_addr_get(slab);
+		uintptr_t regsize = (uintptr_t)bin_info->reg_size;
+		while (pop--) {
+			size_t bit = cfs_lu(&g);
+			size_t regind = shift + bit;
+			*(ptrs + i) = (void *)(base + regsize * regind);
+
+			i++;
+		}
+		slab_data->bitmap[group] = g;
+	}
+#endif
 	extent_nfree_sub(slab, cnt);
 }
 
@@ -1331,7 +1358,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 		} else {
 			cnt = 1;
 			void *ptr = arena_bin_malloc_hard(tsdn, arena, bin,
-							  binind);
+								binind);
 			/*
 			 * OOM.  tbin->avail isn't yet filled down to its first
 			 * element, so the successful allocations (if any) must
@@ -1352,7 +1379,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 			for (unsigned j = 0; j < cnt; j++) {
 				void* ptr = *(tbin->avail - nfill + i + j);
 				arena_alloc_junk_small(ptr, &bin_infos[binind],
-						       true);
+							true);
 			}
 		}
 	}

From 43f3b1ad0cd0900797688aa8b52b1face6416999 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 8 Nov 2018 15:43:09 -0800
Subject: [PATCH 1222/2608] Deprecate OSSpinLock.

---
 configure.ac                                  | 20 ++-----------------
 .../internal/jemalloc_internal_defs.h.in      |  6 ------
 .../jemalloc/internal/jemalloc_preamble.h.in  |  2 +-
 include/jemalloc/internal/mutex.h             | 10 ----------
 src/mutex.c                                   |  4 +---
 test/include/test/jemalloc_test.h.in          |  2 +-
 test/include/test/mtx.h                       |  2 --
 test/src/mtx.c                                |  7 -------
 8 files changed, 5 insertions(+), 48 deletions(-)

diff --git a/configure.ac b/configure.ac
index 5cfe9af3..072808cb 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1845,7 +1845,7 @@ fi
 dnl ============================================================================
 dnl Check for atomic(3) operations as provided on Darwin.
 dnl We need this not for the atomic operations (which are provided above), but
-dnl rather for the OSSpinLock type it exposes.
+dnl rather for the OS_unfair_lock type it exposes.
 
 JE_COMPILABLE([Darwin OSAtomic*()], [
 #include <libkern/OSAtomic.h>
@@ -2011,21 +2011,6 @@ if test "x${je_cv_os_unfair_lock}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_OS_UNFAIR_LOCK], [ ])
 fi
 
-dnl ============================================================================
-dnl Check for spinlock(3) operations as provided on Darwin.
-
-JE_COMPILABLE([Darwin OSSpin*()], [
-#include <libkern/OSAtomic.h>
-#include <inttypes.h>
-], [
-	OSSpinLock lock = 0;
-	OSSpinLockLock(&lock);
-	OSSpinLockUnlock(&lock);
-], [je_cv_osspin])
-if test "x${je_cv_osspin}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_OSSPIN], [ ])
-fi
-
 dnl ============================================================================
 dnl Darwin-related configuration.
 
@@ -2079,8 +2064,7 @@ dnl ============================================================================
 dnl Enable background threads if possible.
 
 if test "x${have_pthread}" = "x1" -a "x${have_dlsym}" = "x1" \
-    -a "x${je_cv_os_unfair_lock}" != "xyes" \
-    -a "x${je_cv_osspin}" != "xyes" ; then
+    -a "x${je_cv_os_unfair_lock}" != "xyes" ; then
   AC_DEFINE([JEMALLOC_BACKGROUND_THREAD])
 fi
 
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 3eac2754..3e94c023 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -78,12 +78,6 @@
  */
 #undef JEMALLOC_OS_UNFAIR_LOCK
 
-/*
- * Defined if OSSpin*() functions are available, as provided by Darwin, and
- * documented in the spinlock(3) manual page.
- */
-#undef JEMALLOC_OSSPIN
-
 /* Defined if syscall(2) is usable. */
 #undef JEMALLOC_USE_SYSCALL
 
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index 1b12aeec..857fa326 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -21,7 +21,7 @@
 #  include "../jemalloc@install_suffix@.h"
 #endif
 
-#if (defined(JEMALLOC_OSATOMIC) || defined(JEMALLOC_OSSPIN))
+#if defined(JEMALLOC_OSATOMIC)
 #include <libkern/OSAtomic.h>
 #endif
 
diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index 5a955d9e..c530cc9d 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -37,8 +37,6 @@ struct malloc_mutex_s {
 #  endif
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
 			os_unfair_lock		lock;
-#elif (defined(JEMALLOC_OSSPIN))
-			OSSpinLock		lock;
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
 			pthread_mutex_t		lock;
 			malloc_mutex_t		*postponed_next;
@@ -84,10 +82,6 @@ struct malloc_mutex_s {
 #    define MALLOC_MUTEX_LOCK(m)    os_unfair_lock_lock(&(m)->lock)
 #    define MALLOC_MUTEX_UNLOCK(m)  os_unfair_lock_unlock(&(m)->lock)
 #    define MALLOC_MUTEX_TRYLOCK(m) (!os_unfair_lock_trylock(&(m)->lock))
-#elif (defined(JEMALLOC_OSSPIN))
-#    define MALLOC_MUTEX_LOCK(m)    OSSpinLockLock(&(m)->lock)
-#    define MALLOC_MUTEX_UNLOCK(m)  OSSpinLockUnlock(&(m)->lock)
-#    define MALLOC_MUTEX_TRYLOCK(m) (!OSSpinLockTry(&(m)->lock))
 #else
 #    define MALLOC_MUTEX_LOCK(m)    pthread_mutex_lock(&(m)->lock)
 #    define MALLOC_MUTEX_UNLOCK(m)  pthread_mutex_unlock(&(m)->lock)
@@ -110,10 +104,6 @@ struct malloc_mutex_s {
   {{{LOCK_PROF_DATA_INITIALIZER, OS_UNFAIR_LOCK_INIT}},		\
       WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
 #  endif
-#elif (defined(JEMALLOC_OSSPIN))
-#  define MALLOC_MUTEX_INITIALIZER					\
-     {{{LOCK_PROF_DATA_INITIALIZER, 0}},				\
-      WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
 #  if (defined(JEMALLOC_DEBUG))
 #     define MALLOC_MUTEX_INITIALIZER					\
diff --git a/src/mutex.c b/src/mutex.c
index 55e37ad4..eb6c4c6d 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -144,9 +144,7 @@ malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
 	}
 #  endif
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
-	mutex->lock = OS_UNFAIR_LOCK_INIT;
-#elif (defined(JEMALLOC_OSSPIN))
-	mutex->lock = 0;
+       mutex->lock = OS_UNFAIR_LOCK_INIT;
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
 	if (postpone_init) {
 		mutex->postponed_next = postponed_mutexes;
diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in
index 0209aead..c46af5d9 100644
--- a/test/include/test/jemalloc_test.h.in
+++ b/test/include/test/jemalloc_test.h.in
@@ -25,7 +25,7 @@ extern "C" {
 
 #include "test/jemalloc_test_defs.h"
 
-#ifdef JEMALLOC_OSSPIN
+#if defined(JEMALLOC_OSATOMIC)
 #  include <libkern/OSAtomic.h>
 #endif
 
diff --git a/test/include/test/mtx.h b/test/include/test/mtx.h
index 58afbc3d..066a2137 100644
--- a/test/include/test/mtx.h
+++ b/test/include/test/mtx.h
@@ -10,8 +10,6 @@ typedef struct {
 	CRITICAL_SECTION	lock;
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
 	os_unfair_lock		lock;
-#elif (defined(JEMALLOC_OSSPIN))
-	OSSpinLock		lock;
 #else
 	pthread_mutex_t		lock;
 #endif
diff --git a/test/src/mtx.c b/test/src/mtx.c
index a393c01f..d9ce375c 100644
--- a/test/src/mtx.c
+++ b/test/src/mtx.c
@@ -13,8 +13,6 @@ mtx_init(mtx_t *mtx) {
 	}
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
 	mtx->lock = OS_UNFAIR_LOCK_INIT;
-#elif (defined(JEMALLOC_OSSPIN))
-	mtx->lock = 0;
 #else
 	pthread_mutexattr_t attr;
 
@@ -35,7 +33,6 @@ void
 mtx_fini(mtx_t *mtx) {
 #ifdef _WIN32
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
-#elif (defined(JEMALLOC_OSSPIN))
 #else
 	pthread_mutex_destroy(&mtx->lock);
 #endif
@@ -47,8 +44,6 @@ mtx_lock(mtx_t *mtx) {
 	EnterCriticalSection(&mtx->lock);
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
 	os_unfair_lock_lock(&mtx->lock);
-#elif (defined(JEMALLOC_OSSPIN))
-	OSSpinLockLock(&mtx->lock);
 #else
 	pthread_mutex_lock(&mtx->lock);
 #endif
@@ -60,8 +55,6 @@ mtx_unlock(mtx_t *mtx) {
 	LeaveCriticalSection(&mtx->lock);
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
 	os_unfair_lock_unlock(&mtx->lock);
-#elif (defined(JEMALLOC_OSSPIN))
-	OSSpinLockUnlock(&mtx->lock);
 #else
 	pthread_mutex_unlock(&mtx->lock);
 #endif

From c4063ce439523d382f2dfbbc5bf6da657e6badb0 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 15 Nov 2018 13:01:05 -0800
Subject: [PATCH 1223/2608] Set the default number of background threads to 4.

The setting has been tested in production for a while.  No negative effect while
we were able to reduce number of threads per process.
---
 include/jemalloc/internal/background_thread_structs.h | 1 +
 src/background_thread.c                               | 7 +++----
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/background_thread_structs.h b/include/jemalloc/internal/background_thread_structs.h
index c1107dfe..c02aa434 100644
--- a/include/jemalloc/internal/background_thread_structs.h
+++ b/include/jemalloc/internal/background_thread_structs.h
@@ -9,6 +9,7 @@
 
 #define BACKGROUND_THREAD_INDEFINITE_SLEEP UINT64_MAX
 #define MAX_BACKGROUND_THREAD_LIMIT MALLOCX_ARENA_LIMIT
+#define DEFAULT_NUM_BACKGROUND_THREAD 4
 
 typedef enum {
 	background_thread_stopped,
diff --git a/src/background_thread.c b/src/background_thread.c
index 24f67305..813867ef 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -13,7 +13,7 @@ JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
 #define BACKGROUND_THREAD_DEFAULT false
 /* Read-only after initialization. */
 bool opt_background_thread = BACKGROUND_THREAD_DEFAULT;
-size_t opt_max_background_threads = MAX_BACKGROUND_THREAD_LIMIT;
+size_t opt_max_background_threads = MAX_BACKGROUND_THREAD_LIMIT + 1;
 
 /* Used for thread creation, termination and stats. */
 malloc_mutex_t background_thread_lock;
@@ -872,9 +872,8 @@ background_thread_boot1(tsdn_t *tsdn) {
 	assert(have_background_thread);
 	assert(narenas_total_get() > 0);
 
-	if (opt_max_background_threads == MAX_BACKGROUND_THREAD_LIMIT &&
-	    ncpus < MAX_BACKGROUND_THREAD_LIMIT) {
-		opt_max_background_threads = ncpus;
+	if (opt_max_background_threads > MAX_BACKGROUND_THREAD_LIMIT) {
+		opt_max_background_threads = DEFAULT_NUM_BACKGROUND_THREAD;
 	}
 	max_background_threads = opt_max_background_threads;
 

From b23336af96e6ef9efb47591ce7bf2c8a1eab866b Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Mon, 26 Nov 2018 08:11:00 -0800
Subject: [PATCH 1224/2608] mutex: fix trylock spin wait contention

If there are 3 or more threads spin-waiting on the same mutex,
there will be excessive exclusive cacheline contention because
pthread_trylock() immediately tries to CAS in a new value, instead
of first checking if the lock is locked.

This diff adds a 'locked' hint flag, and we will only spin wait
without trylock()ing while set.  I don't know of any other portable
way to get the same behavior as pthread_mutex_lock().

This is pretty easy to test via ttest, e.g.

./ttest1 500 3 10000 1 100

Throughput is nearly 3x as fast.

This blames to the mutex profiling changes, however, we almost never
have 3 or more threads contending in properly configured production
workloads, but still worth fixing.
---
 include/jemalloc/internal/mutex.h | 21 +++++++++++++++------
 src/mutex.c                       |  3 ++-
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index c530cc9d..8f4a3072 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -43,6 +43,11 @@ struct malloc_mutex_s {
 #else
 			pthread_mutex_t		lock;
 #endif
+			/* 
+			 * Hint flag to avoid exclusive cache line contention
+			 * during spin waiting
+			 */
+			atomic_b_t		locked;
 		};
 		/*
 		 * We only touch witness when configured w/ debug.  However we
@@ -97,21 +102,21 @@ struct malloc_mutex_s {
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
 #  if defined(JEMALLOC_DEBUG)
 #    define MALLOC_MUTEX_INITIALIZER					\
-     {{{LOCK_PROF_DATA_INITIALIZER, OS_UNFAIR_LOCK_INIT}},		\
+  {{{LOCK_PROF_DATA_INITIALIZER, OS_UNFAIR_LOCK_INIT, ATOMIC_INIT(false)}}, \
          WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0}
 #  else
 #    define MALLOC_MUTEX_INITIALIZER                      \
-  {{{LOCK_PROF_DATA_INITIALIZER, OS_UNFAIR_LOCK_INIT}},		\
+  {{{LOCK_PROF_DATA_INITIALIZER, OS_UNFAIR_LOCK_INIT, ATOMIC_INIT(false)}},  \
       WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
 #  endif
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
 #  if (defined(JEMALLOC_DEBUG))
 #     define MALLOC_MUTEX_INITIALIZER					\
-       {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, NULL}},	\
+      {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, NULL, ATOMIC_INIT(false)}},	\
            WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0}
 #  else
 #     define MALLOC_MUTEX_INITIALIZER					\
-       {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, NULL}},	\
+      {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, NULL, ATOMIC_INIT(false)}},	\
            WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
 #  endif
 
@@ -119,11 +124,11 @@ struct malloc_mutex_s {
 #    define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT
 #  if defined(JEMALLOC_DEBUG)
 #    define MALLOC_MUTEX_INITIALIZER					\
-       {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER}},	\
+     {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, ATOMIC_INIT(false)}}, \
            WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0}
 #  else
 #    define MALLOC_MUTEX_INITIALIZER                          \
-  {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER}},	\
+     {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, ATOMIC_INIT(false)}},	\
       WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
 #  endif
 #endif
@@ -148,6 +153,7 @@ void malloc_mutex_lock_slow(malloc_mutex_t *mutex);
 static inline void
 malloc_mutex_lock_final(malloc_mutex_t *mutex) {
 	MALLOC_MUTEX_LOCK(mutex);
+	atomic_store_b(&mutex->locked, true, ATOMIC_RELAXED);
 }
 
 static inline bool
@@ -173,6 +179,7 @@ malloc_mutex_trylock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 	witness_assert_not_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
 	if (isthreaded) {
 		if (malloc_mutex_trylock_final(mutex)) {
+			atomic_store_b(&mutex->locked, true, ATOMIC_RELAXED);
 			return true;
 		}
 		mutex_owner_stats_update(tsdn, mutex);
@@ -212,6 +219,7 @@ malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 	if (isthreaded) {
 		if (malloc_mutex_trylock_final(mutex)) {
 			malloc_mutex_lock_slow(mutex);
+			atomic_store_b(&mutex->locked, true, ATOMIC_RELAXED);
 		}
 		mutex_owner_stats_update(tsdn, mutex);
 	}
@@ -220,6 +228,7 @@ malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 
 static inline void
 malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
+	atomic_store_b(&mutex->locked, false, ATOMIC_RELAXED);
 	witness_unlock(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
 	if (isthreaded) {
 		MALLOC_MUTEX_UNLOCK(mutex);
diff --git a/src/mutex.c b/src/mutex.c
index eb6c4c6d..3f920f5b 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -55,7 +55,8 @@ malloc_mutex_lock_slow(malloc_mutex_t *mutex) {
 	int cnt = 0, max_cnt = MALLOC_MUTEX_MAX_SPIN;
 	do {
 		spin_cpu_spinwait();
-		if (!malloc_mutex_trylock_final(mutex)) {
+		if (!atomic_load_b(&mutex->locked, ATOMIC_RELAXED)
+                    && !malloc_mutex_trylock_final(mutex)) {
 			data->n_spin_acquired++;
 			return;
 		}

From 37b89139252db18c95ebce3e0eac67817fa4a8ab Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 12 Nov 2018 15:56:04 -0800
Subject: [PATCH 1225/2608] Add support for sharded bins within an arena.

This makes it possible to have multiple set of bins in an arena, which improves
arena scalability because the bins (especially the small ones) are always the
limiting factor in production workload.

A bin shard is picked on allocation; each extent tracks the bin shard id for
deallocation.  The shard size will be determined using runtime options.
---
 include/jemalloc/internal/arena_externs.h   |   6 +-
 include/jemalloc/internal/arena_structs_b.h |   5 +-
 include/jemalloc/internal/bin.h             |  18 ++-
 include/jemalloc/internal/extent_inlines.h  |  26 ++++
 include/jemalloc/internal/extent_structs.h  |  15 +-
 include/jemalloc/internal/mutex.h           |  22 +++
 include/jemalloc/internal/tsd.h             |   2 +
 src/arena.c                                 | 163 +++++++++++++-------
 src/bin.c                                   |   6 +
 src/ctl.c                                   |   6 +-
 src/jemalloc.c                              |   3 +
 src/tcache.c                                |  18 ++-
 12 files changed, 217 insertions(+), 73 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 073e587d..04d99545 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -63,8 +63,8 @@ void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
 void arena_prof_promote(tsdn_t *tsdn, const void *ptr, size_t usize);
 void arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
     bool slow_path);
-void arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena,
-    extent_t *extent, void *ptr);
+void arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
+    szind_t binind, extent_t *extent, void *ptr);
 void arena_dalloc_small(tsdn_t *tsdn, void *ptr);
 bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t extra, bool zero, size_t *newsize);
@@ -86,6 +86,8 @@ size_t arena_extent_sn_next(arena_t *arena);
 arena_t *arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
 bool arena_init_huge(void);
 arena_t *arena_choose_huge(tsd_t *tsd);
+bin_t *arena_bin_choose_lock(tsdn_t *tsdn, arena_t *arena, szind_t binind,
+    unsigned *binshard);
 void arena_boot(sc_data_t *sc_data);
 void arena_prefork0(tsdn_t *tsdn, arena_t *arena);
 void arena_prefork1(tsdn_t *tsdn, arena_t *arena);
diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index 509f11c1..950bd13c 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -90,6 +90,9 @@ struct arena_s {
 	 */
 	atomic_u_t		nthreads[2];
 
+	/* Next bin shard for binding new threads. Synchronization: atomic. */
+	atomic_u_t		binshard_next;
+
 	/*
 	 * When percpu_arena is enabled, to amortize the cost of reading /
 	 * updating the current CPU id, track the most recent thread accessing
@@ -204,7 +207,7 @@ struct arena_s {
 	 *
 	 * Synchronization: internal.
 	 */
-	bin_t			bins[SC_NBINS];
+	bins_t			bins[SC_NBINS];
 
 	/*
 	 * Base allocator, from which arena metadata are allocated.
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index e04b6c67..3fddef73 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -7,6 +7,11 @@
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/sc.h"
 
+#define BIN_SHARDS_MAX (1 << EXTENT_BITS_BINSHARD_WIDTH)
+
+extern unsigned opt_bin_shard_maxszind;
+extern unsigned opt_n_bin_shards;
+
 /*
  * A bin contains a set of extents that are currently being used for slab
  * allocations.
@@ -42,6 +47,9 @@ struct bin_info_s {
 	/* Total number of regions in a slab for this bin's size class. */
 	uint32_t		nregs;
 
+	/* Number of sharded bins in each arena for this size class. */
+	uint32_t		n_shards;
+
 	/*
 	 * Metadata used to manipulate bitmaps for slabs associated with this
 	 * bin.
@@ -51,7 +59,6 @@ struct bin_info_s {
 
 extern bin_info_t bin_infos[SC_NBINS];
 
-
 typedef struct bin_s bin_t;
 struct bin_s {
 	/* All operations on bin_t fields require lock ownership. */
@@ -79,6 +86,13 @@ struct bin_s {
 	bin_stats_t	stats;
 };
 
+/* A set of sharded bins of the same size class. */
+typedef struct bins_s bins_t;
+struct bins_s {
+	/* Sharded bins.  Dynamically sized. */
+	bin_t *bin_shards;
+};
+
 void bin_infos_init(sc_data_t *sc_data, bin_info_t bin_infos[SC_NBINS]);
 void bin_boot();
 
@@ -94,7 +108,7 @@ void bin_postfork_child(tsdn_t *tsdn, bin_t *bin);
 static inline void
 bin_stats_merge(tsdn_t *tsdn, bin_stats_t *dst_bin_stats, bin_t *bin) {
 	malloc_mutex_lock(tsdn, &bin->lock);
-	malloc_mutex_prof_read(tsdn, &dst_bin_stats->mutex_data, &bin->lock);
+	malloc_mutex_prof_accum(tsdn, &dst_bin_stats->mutex_data, &bin->lock);
 	dst_bin_stats->nmalloc += bin->stats.nmalloc;
 	dst_bin_stats->ndalloc += bin->stats.ndalloc;
 	dst_bin_stats->nrequests += bin->stats.nrequests;
diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index c931fd58..b5728608 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -70,6 +70,14 @@ extent_usize_get(const extent_t *extent) {
 	return sz_index2size(extent_szind_get(extent));
 }
 
+static inline unsigned
+extent_binshard_get(const extent_t *extent) {
+	unsigned binshard = (unsigned)((extent->e_bits &
+	    EXTENT_BITS_BINSHARD_MASK) >> EXTENT_BITS_BINSHARD_SHIFT);
+	assert(binshard < bin_infos[extent_szind_get(extent)].n_shards);
+	return binshard;
+}
+
 static inline size_t
 extent_sn_get(const extent_t *extent) {
 	return (size_t)((extent->e_bits & EXTENT_BITS_SN_MASK) >>
@@ -190,6 +198,14 @@ extent_arena_set(extent_t *extent, arena_t *arena) {
 	    ((uint64_t)arena_ind << EXTENT_BITS_ARENA_SHIFT);
 }
 
+static inline void
+extent_binshard_set(extent_t *extent, unsigned binshard) {
+	/* The assertion assumes szind is set already. */
+	assert(binshard < bin_infos[extent_szind_get(extent)].n_shards);
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_BINSHARD_MASK) |
+	    ((uint64_t)binshard << EXTENT_BITS_BINSHARD_SHIFT);
+}
+
 static inline void
 extent_addr_set(extent_t *extent, void *addr) {
 	extent->e_addr = addr;
@@ -252,6 +268,16 @@ extent_nfree_set(extent_t *extent, unsigned nfree) {
 	    ((uint64_t)nfree << EXTENT_BITS_NFREE_SHIFT);
 }
 
+static inline void
+extent_nfree_binshard_set(extent_t *extent, unsigned nfree, unsigned binshard) {
+	/* The assertion assumes szind is set already. */
+	assert(binshard < bin_infos[extent_szind_get(extent)].n_shards);
+	extent->e_bits = (extent->e_bits &
+	    (~EXTENT_BITS_NFREE_MASK & ~EXTENT_BITS_BINSHARD_MASK)) |
+	    ((uint64_t)binshard << EXTENT_BITS_BINSHARD_SHIFT) |
+	    ((uint64_t)nfree << EXTENT_BITS_NFREE_SHIFT);
+}
+
 static inline void
 extent_nfree_inc(extent_t *extent) {
 	assert(extent_slab_get(extent));
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index 50e77bff..16264528 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -29,9 +29,10 @@ struct extent_s {
 	 * t: state
 	 * i: szind
 	 * f: nfree
+	 * s: bin_shard
 	 * n: sn
 	 *
-	 * nnnnnnnn ... nnnnffff ffffffii iiiiiitt zdcbaaaa aaaaaaaa
+	 * nnnnnnnn ... nnnnnnss ssssffff ffffffii iiiiiitt zdcbaaaa aaaaaaaa
 	 *
 	 * arena_ind: Arena from which this extent came, or all 1 bits if
 	 *            unassociated.
@@ -76,6 +77,8 @@ struct extent_s {
 	 *
 	 * nfree: Number of free regions in slab.
 	 *
+	 * bin_shard: the shard of the bin from which this extent came.
+	 *
 	 * sn: Serial number (potentially non-unique).
 	 *
 	 *     Serial numbers may wrap around if !opt_retain, but as long as
@@ -121,7 +124,15 @@ struct extent_s {
 #define EXTENT_BITS_NFREE_SHIFT  (EXTENT_BITS_SZIND_WIDTH + EXTENT_BITS_SZIND_SHIFT)
 #define EXTENT_BITS_NFREE_MASK  MASK(EXTENT_BITS_NFREE_WIDTH, EXTENT_BITS_NFREE_SHIFT)
 
-#define EXTENT_BITS_SN_SHIFT  (EXTENT_BITS_NFREE_WIDTH + EXTENT_BITS_NFREE_SHIFT)
+#define EXTENT_BITS_BINSHARD_WIDTH  6
+#define EXTENT_BITS_BINSHARD_SHIFT  (EXTENT_BITS_NFREE_WIDTH + EXTENT_BITS_NFREE_SHIFT)
+#define EXTENT_BITS_BINSHARD_MASK  MASK(EXTENT_BITS_BINSHARD_WIDTH, EXTENT_BITS_BINSHARD_SHIFT)
+
+/* Will make dynamic options. */
+#define OPT_N_BIN_SHARDS (1)
+#define OPT_BIN_SHARD_MAXSZIND (0)
+
+#define EXTENT_BITS_SN_SHIFT (EXTENT_BITS_BINSHARD_WIDTH + EXTENT_BITS_BINSHARD_SHIFT)
 #define EXTENT_BITS_SN_MASK  (UINT64_MAX << EXTENT_BITS_SN_SHIFT)
 
 	/* Pointer to the extent that this structure is responsible for. */
diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index 8f4a3072..7c24f072 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -263,4 +263,26 @@ malloc_mutex_prof_read(tsdn_t *tsdn, mutex_prof_data_t *data,
 	atomic_store_u32(&data->n_waiting_thds, 0, ATOMIC_RELAXED);
 }
 
+static inline void
+malloc_mutex_prof_accum(tsdn_t *tsdn, mutex_prof_data_t *data,
+    malloc_mutex_t *mutex) {
+	mutex_prof_data_t *source = &mutex->prof_data;
+	/* Can only read holding the mutex. */
+	malloc_mutex_assert_owner(tsdn, mutex);
+
+	nstime_add(&data->tot_wait_time, &source->tot_wait_time);
+	if (nstime_compare(&source->max_wait_time, &data->max_wait_time) > 0) {
+		nstime_copy(&data->max_wait_time, &source->max_wait_time);
+	}
+	data->n_wait_times += source->n_wait_times;
+	data->n_spin_acquired += source->n_spin_acquired;
+	if (data->max_n_thds < source->max_n_thds) {
+		data->max_n_thds = source->max_n_thds;
+	}
+	/* n_wait_thds is not reported. */
+	atomic_store_u32(&data->n_waiting_thds, 0, ATOMIC_RELAXED);
+	data->n_owner_switches += source->n_owner_switches;
+	data->n_lock_ops += source->n_lock_ops;
+}
+
 #endif /* JEMALLOC_INTERNAL_MUTEX_H */
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index c931441b..4dc2274a 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -74,6 +74,7 @@ typedef void (*test_callback_t)(int *);
     O(iarena,			arena_t *,		arena_t *)	\
     O(arena,			arena_t *,		arena_t *)	\
     O(arenas_tdata,		arena_tdata_t *,	arena_tdata_t *)\
+    O(binshard,			unsigned,		unsigned)	\
     O(tcache,			tcache_t,		tcache_t)	\
     O(witness_tsd,              witness_tsd_t,		witness_tsdn_t)	\
     MALLOC_TEST_TSD
@@ -93,6 +94,7 @@ typedef void (*test_callback_t)(int *);
     NULL,								\
     NULL,								\
     NULL,								\
+    ((unsigned)-1),							\
     TCACHE_ZERO_INITIALIZER,						\
     WITNESS_TSD_INITIALIZER						\
     MALLOC_TEST_TSD_INITIALIZER						\
diff --git a/src/arena.c b/src/arena.c
index 5fc90c54..893c9b56 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -233,7 +233,10 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	nstime_subtract(&astats->uptime, &arena->create_time);
 
 	for (szind_t i = 0; i < SC_NBINS; i++) {
-		bin_stats_merge(tsdn, &bstats[i], &arena->bins[i]);
+		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
+			bin_stats_merge(tsdn, &bstats[i],
+			    &arena->bins[i].bin_shards[j]);
+		}
 	}
 }
 
@@ -1039,6 +1042,37 @@ arena_bin_slabs_full_remove(arena_t *arena, bin_t *bin, extent_t *slab) {
 	extent_list_remove(&bin->slabs_full, slab);
 }
 
+static void
+arena_bin_reset(tsd_t *tsd, arena_t *arena, bin_t *bin) {
+	extent_t *slab;
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
+	if (bin->slabcur != NULL) {
+		slab = bin->slabcur;
+		bin->slabcur = NULL;
+		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
+		arena_slab_dalloc(tsd_tsdn(tsd), arena, slab);
+		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
+	}
+	while ((slab = extent_heap_remove_first(&bin->slabs_nonfull)) != NULL) {
+		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
+		arena_slab_dalloc(tsd_tsdn(tsd), arena, slab);
+		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
+	}
+	for (slab = extent_list_first(&bin->slabs_full); slab != NULL;
+	     slab = extent_list_first(&bin->slabs_full)) {
+		arena_bin_slabs_full_remove(arena, bin, slab);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
+		arena_slab_dalloc(tsd_tsdn(tsd), arena, slab);
+		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
+	}
+	if (config_stats) {
+		bin->stats.curregs = 0;
+		bin->stats.curslabs = 0;
+	}
+	malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
+}
+
 void
 arena_reset(tsd_t *tsd, arena_t *arena) {
 	/*
@@ -1085,34 +1119,10 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 
 	/* Bins. */
 	for (unsigned i = 0; i < SC_NBINS; i++) {
-		extent_t *slab;
-		bin_t *bin = &arena->bins[i];
-		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
-		if (bin->slabcur != NULL) {
-			slab = bin->slabcur;
-			bin->slabcur = NULL;
-			malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
-			arena_slab_dalloc(tsd_tsdn(tsd), arena, slab);
-			malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
+		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
+			arena_bin_reset(tsd, arena,
+			    &arena->bins[i].bin_shards[j]);
 		}
-		while ((slab = extent_heap_remove_first(&bin->slabs_nonfull)) !=
-		    NULL) {
-			malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
-			arena_slab_dalloc(tsd_tsdn(tsd), arena, slab);
-			malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
-		}
-		for (slab = extent_list_first(&bin->slabs_full); slab != NULL;
-		    slab = extent_list_first(&bin->slabs_full)) {
-			arena_bin_slabs_full_remove(arena, bin, slab);
-			malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
-			arena_slab_dalloc(tsd_tsdn(tsd), arena, slab);
-			malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
-		}
-		if (config_stats) {
-			bin->stats.curregs = 0;
-			bin->stats.curslabs = 0;
-		}
-		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 	}
 
 	atomic_store_zu(&arena->nactive, 0, ATOMIC_RELAXED);
@@ -1197,7 +1207,7 @@ arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena,
 }
 
 static extent_t *
-arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind,
+arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard,
     const bin_info_t *bin_info) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
@@ -1225,7 +1235,7 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 
 	/* Initialize slab internals. */
 	arena_slab_data_t *slab_data = extent_slab_data_get(slab);
-	extent_nfree_set(slab, bin_info->nregs);
+	extent_nfree_binshard_set(slab, bin_info->nregs, binshard);
 	bitmap_init(slab_data->bitmap, &bin_info->bitmap_info, false);
 
 	arena_nactive_add(arena, extent_size_get(slab) >> LG_PAGE);
@@ -1235,7 +1245,7 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 
 static extent_t *
 arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    szind_t binind) {
+    szind_t binind, unsigned binshard) {
 	extent_t *slab;
 	const bin_info_t *bin_info;
 
@@ -1251,7 +1261,7 @@ arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 	/* Allocate a new slab. */
 	malloc_mutex_unlock(tsdn, &bin->lock);
 	/******************************/
-	slab = arena_slab_alloc(tsdn, arena, binind, bin_info);
+	slab = arena_slab_alloc(tsdn, arena, binind, binshard, bin_info);
 	/********************************/
 	malloc_mutex_lock(tsdn, &bin->lock);
 	if (slab != NULL) {
@@ -1278,7 +1288,7 @@ arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 /* Re-fill bin->slabcur, then call arena_slab_reg_alloc(). */
 static void *
 arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    szind_t binind) {
+    szind_t binind, unsigned binshard) {
 	const bin_info_t *bin_info;
 	extent_t *slab;
 
@@ -1287,7 +1297,7 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 		arena_bin_slabs_full_insert(arena, bin, bin->slabcur);
 		bin->slabcur = NULL;
 	}
-	slab = arena_bin_nonfull_slab_get(tsdn, arena, bin, binind);
+	slab = arena_bin_nonfull_slab_get(tsdn, arena, bin, binind, binshard);
 	if (bin->slabcur != NULL) {
 		/*
 		 * Another thread updated slabcur while this one ran without the
@@ -1331,19 +1341,39 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 	return arena_slab_reg_alloc(slab, bin_info);
 }
 
+/* Choose a bin shard and return the locked bin. */
+bin_t *
+arena_bin_choose_lock(tsdn_t *tsdn, arena_t *arena, szind_t binind,
+    unsigned *binshard) {
+	bin_t *bin;
+	if (binind >= opt_bin_shard_maxszind || tsdn_null(tsdn) ||
+	    tsd_arena_get(tsdn_tsd(tsdn)) == NULL) {
+		*binshard = 0;
+	} else {
+		*binshard = tsd_binshard_get(tsdn_tsd(tsdn)) %
+		    bin_infos[binind].n_shards;
+	}
+	assert(*binshard < bin_infos[binind].n_shards);
+	bin = &arena->bins[binind].bin_shards[*binshard];
+	malloc_mutex_lock(tsdn, &bin->lock);
+
+	return bin;
+}
+
 void
 arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes) {
 	unsigned i, nfill, cnt;
-	bin_t *bin;
 
 	assert(tbin->ncached == 0);
 
 	if (config_prof && arena_prof_accum(tsdn, arena, prof_accumbytes)) {
 		prof_idump(tsdn);
 	}
-	bin = &arena->bins[binind];
-	malloc_mutex_lock(tsdn, &bin->lock);
+
+	unsigned binshard;
+	bin_t *bin = arena_bin_choose_lock(tsdn, arena, binind, &binshard);
+
 	for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >>
 	    tcache->lg_fill_div[binind]); i < nfill; i += cnt) {
 		extent_t *slab;
@@ -1358,7 +1388,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 		} else {
 			cnt = 1;
 			void *ptr = arena_bin_malloc_hard(tsdn, arena, bin,
-								binind);
+			    binind, binshard);
 			/*
 			 * OOM.  tbin->avail isn't yet filled down to its first
 			 * element, so the successful allocations (if any) must
@@ -1417,14 +1447,14 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
 	extent_t *slab;
 
 	assert(binind < SC_NBINS);
-	bin = &arena->bins[binind];
 	usize = sz_index2size(binind);
+	unsigned binshard;
+	bin = arena_bin_choose_lock(tsdn, arena, binind, &binshard);
 
-	malloc_mutex_lock(tsdn, &bin->lock);
 	if ((slab = bin->slabcur) != NULL && extent_nfree_get(slab) > 0) {
 		ret = arena_slab_reg_alloc(slab, &bin_infos[binind]);
 	} else {
-		ret = arena_bin_malloc_hard(tsdn, arena, bin, binind);
+		ret = arena_bin_malloc_hard(tsdn, arena, bin, binind, binshard);
 	}
 
 	if (ret == NULL) {
@@ -1623,11 +1653,9 @@ arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
 }
 
 static void
-arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
-    void *ptr, bool junked) {
+arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
+    szind_t binind, extent_t *slab, void *ptr, bool junked) {
 	arena_slab_data_t *slab_data = extent_slab_data_get(slab);
-	szind_t binind = extent_szind_get(slab);
-	bin_t *bin = &arena->bins[binind];
 	const bin_info_t *bin_info = &bin_infos[binind];
 
 	if (!junked && config_fill && unlikely(opt_junk_free)) {
@@ -1651,18 +1679,21 @@ arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
 }
 
 void
-arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
-    void *ptr) {
-	arena_dalloc_bin_locked_impl(tsdn, arena, extent, ptr, true);
+arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
+    szind_t binind, extent_t *extent, void *ptr) {
+	arena_dalloc_bin_locked_impl(tsdn, arena, bin, binind, extent, ptr,
+	    true);
 }
 
 static void
 arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr) {
 	szind_t binind = extent_szind_get(extent);
-	bin_t *bin = &arena->bins[binind];
+	unsigned binshard = extent_binshard_get(extent);
+	bin_t *bin = &arena->bins[binind].bin_shards[binshard];
 
 	malloc_mutex_lock(tsdn, &bin->lock);
-	arena_dalloc_bin_locked_impl(tsdn, arena, extent, ptr, false);
+	arena_dalloc_bin_locked_impl(tsdn, arena, bin, binind, extent, ptr,
+	    false);
 	malloc_mutex_unlock(tsdn, &bin->lock);
 }
 
@@ -1892,7 +1923,10 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		}
 	}
 
-	arena = (arena_t *)base_alloc(tsdn, base, sizeof(arena_t), CACHELINE);
+	size_t arena_size = sizeof(arena_t) +
+	    sizeof(bin_t) * opt_n_bin_shards * opt_bin_shard_maxszind +
+	    sizeof(bin_t) * (SC_NBINS - opt_bin_shard_maxszind);
+	arena = (arena_t *)base_alloc(tsdn, base, arena_size, CACHELINE);
 	if (arena == NULL) {
 		goto label_error;
 	}
@@ -1997,12 +2031,20 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	}
 
 	/* Initialize bins. */
+	uintptr_t bin_addr = (uintptr_t)arena + sizeof(arena_t);
+	atomic_store_u(&arena->binshard_next, 0, ATOMIC_RELEASE);
 	for (i = 0; i < SC_NBINS; i++) {
-		bool err = bin_init(&arena->bins[i]);
-		if (err) {
-			goto label_error;
+		unsigned nshards = bin_infos[i].n_shards;
+		arena->bins[i].bin_shards = (bin_t *)bin_addr;
+		bin_addr += nshards * sizeof(bin_t);
+		for (unsigned j = 0; j < nshards; j++) {
+			bool err = bin_init(&arena->bins[i].bin_shards[j]);
+			if (err) {
+				goto label_error;
+			}
 		}
 	}
+	assert(bin_addr == (uintptr_t)arena + arena_size);
 
 	arena->base = base;
 	/* Set arena before creating background threads. */
@@ -2139,7 +2181,9 @@ arena_prefork6(tsdn_t *tsdn, arena_t *arena) {
 void
 arena_prefork7(tsdn_t *tsdn, arena_t *arena) {
 	for (unsigned i = 0; i < SC_NBINS; i++) {
-		bin_prefork(tsdn, &arena->bins[i]);
+		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
+			bin_prefork(tsdn, &arena->bins[i].bin_shards[j]);
+		}
 	}
 }
 
@@ -2148,7 +2192,10 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 	unsigned i;
 
 	for (i = 0; i < SC_NBINS; i++) {
-		bin_postfork_parent(tsdn, &arena->bins[i]);
+		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
+			bin_postfork_parent(tsdn,
+			    &arena->bins[i].bin_shards[j]);
+		}
 	}
 	malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
 	base_postfork_parent(tsdn, arena->base);
@@ -2192,7 +2239,9 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	}
 
 	for (i = 0; i < SC_NBINS; i++) {
-		bin_postfork_child(tsdn, &arena->bins[i]);
+		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
+			bin_postfork_child(tsdn, &arena->bins[i].bin_shards[j]);
+		}
 	}
 	malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
 	base_postfork_child(tsdn, arena->base);
diff --git a/src/bin.c b/src/bin.c
index e62babdd..8dd964fa 100644
--- a/src/bin.c
+++ b/src/bin.c
@@ -6,6 +6,9 @@
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/witness.h"
 
+unsigned opt_bin_shard_maxszind;
+unsigned opt_n_bin_shards;
+
 bin_info_t bin_infos[SC_NBINS];
 
 void
@@ -18,6 +21,7 @@ bin_infos_init(sc_data_t *sc_data, bin_info_t bin_infos[SC_NBINS]) {
 		bin_info->slab_size = (sc->pgs << LG_PAGE);
 		bin_info->nregs =
 		    (uint32_t)(bin_info->slab_size / bin_info->reg_size);
+		bin_info->n_shards = (i < opt_bin_shard_maxszind) ? opt_n_bin_shards : 1;
 		bitmap_info_t bitmap_info = BITMAP_INFO_INITIALIZER(
 		    bin_info->nregs);
 		bin_info->bitmap_info = bitmap_info;
@@ -27,6 +31,8 @@ bin_infos_init(sc_data_t *sc_data, bin_info_t bin_infos[SC_NBINS]) {
 void
 bin_boot(sc_data_t *sc_data) {
 	assert(sc_data->initialized);
+	opt_bin_shard_maxszind = OPT_BIN_SHARD_MAXSZIND;
+	opt_n_bin_shards = OPT_N_BIN_SHARDS;
 	bin_infos_init(sc_data, bin_infos);
 }
 
diff --git a/src/ctl.c b/src/ctl.c
index b482fc56..72ad587d 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2913,8 +2913,10 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
 		MUTEX_PROF_RESET(arena->base->mtx);
 
 		for (szind_t i = 0; i < SC_NBINS; i++) {
-			bin_t *bin = &arena->bins[i];
-			MUTEX_PROF_RESET(bin->lock);
+			for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
+				bin_t *bin = &arena->bins[i].bin_shards[j];
+				MUTEX_PROF_RESET(bin->lock);
+			}
 		}
 	}
 #undef MUTEX_PROF_RESET
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 68a21f9d..c635ecb4 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -379,6 +379,9 @@ arena_bind(tsd_t *tsd, unsigned ind, bool internal) {
 		tsd_iarena_set(tsd, arena);
 	} else {
 		tsd_arena_set(tsd, arena);
+		unsigned binshard = atomic_fetch_add_u(&arena->binshard_next, 1,
+		    ATOMIC_RELAXED) % BIN_SHARDS_MAX;
+		tsd_binshard_set(tsd, binshard);
 	}
 }
 
diff --git a/src/tcache.c b/src/tcache.c
index ee632f6f..51e3131e 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -121,7 +121,9 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 		/* Lock the arena bin associated with the first object. */
 		extent_t *extent = item_extent[0];
 		arena_t *bin_arena = extent_arena_get(extent);
-		bin_t *bin = &bin_arena->bins[binind];
+		unsigned binshard = extent_binshard_get(extent);
+		assert(binshard < bin_infos[binind].n_shards);
+		bin_t *bin = &bin_arena->bins[binind].bin_shards[binshard];
 
 		if (config_prof && bin_arena == arena) {
 			if (arena_prof_accum(tsd_tsdn(tsd), arena,
@@ -145,9 +147,10 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 			extent = item_extent[i];
 			assert(ptr != NULL && extent != NULL);
 
-			if (extent_arena_get(extent) == bin_arena) {
+			if (extent_arena_get(extent) == bin_arena
+			    && extent_binshard_get(extent) == binshard) {
 				arena_dalloc_bin_junked_locked(tsd_tsdn(tsd),
-				    bin_arena, extent, ptr);
+				    bin_arena, bin, binind, extent, ptr);
 			} else {
 				/*
 				 * This object was allocated via a different
@@ -169,8 +172,9 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 		 * The flush loop didn't happen to flush to this thread's
 		 * arena, so the stats didn't get merged.  Manually do so now.
 		 */
-		bin_t *bin = &arena->bins[binind];
-		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
+		unsigned binshard;
+		bin_t *bin = arena_bin_choose_lock(tsd_tsdn(tsd), arena, binind,
+		    &binshard);
 		bin->stats.nflushes++;
 		bin->stats.nrequests += tbin->tstats.nrequests;
 		tbin->tstats.nrequests = 0;
@@ -557,9 +561,9 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 
 	/* Merge and reset tcache stats. */
 	for (i = 0; i < SC_NBINS; i++) {
-		bin_t *bin = &arena->bins[i];
 		cache_bin_t *tbin = tcache_small_bin_get(tcache, i);
-		malloc_mutex_lock(tsdn, &bin->lock);
+		unsigned binshard;
+		bin_t *bin = arena_bin_choose_lock(tsdn, arena, i, &binshard);
 		bin->stats.nrequests += tbin->tstats.nrequests;
 		malloc_mutex_unlock(tsdn, &bin->lock);
 		tbin->tstats.nrequests = 0;

From 3f9f2833f6228e07673d75c9bce6f5fb58c5f3b0 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 20 Nov 2018 13:51:32 -0800
Subject: [PATCH 1226/2608] Add opt.bin_shards to specify number of bin shards.

The option uses the same format as "slab_sizes" to specify number of shards for
each bin size.
---
 include/jemalloc/internal/bin.h            | 10 ++---
 include/jemalloc/internal/extent_structs.h |  4 --
 src/arena.c                                | 11 ++---
 src/bin.c                                  | 48 +++++++++++++++++-----
 src/jemalloc.c                             | 39 ++++++++++++++----
 5 files changed, 81 insertions(+), 31 deletions(-)

diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index 3fddef73..baa0acf3 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -8,9 +8,7 @@
 #include "jemalloc/internal/sc.h"
 
 #define BIN_SHARDS_MAX (1 << EXTENT_BITS_BINSHARD_WIDTH)
-
-extern unsigned opt_bin_shard_maxszind;
-extern unsigned opt_n_bin_shards;
+#define N_BIN_SHARDS_DEFAULT 1
 
 /*
  * A bin contains a set of extents that are currently being used for slab
@@ -93,8 +91,10 @@ struct bins_s {
 	bin_t *bin_shards;
 };
 
-void bin_infos_init(sc_data_t *sc_data, bin_info_t bin_infos[SC_NBINS]);
-void bin_boot();
+void bin_shard_sizes_boot(unsigned bin_shards[SC_NBINS]);
+bool bin_update_shard_size(unsigned bin_shards[SC_NBINS], size_t start_size,
+    size_t end_size, size_t nshards);
+void bin_boot(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]);
 
 /* Initializes a bin to empty.  Returns true on error. */
 bool bin_init(bin_t *bin);
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index 16264528..ceb18979 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -128,10 +128,6 @@ struct extent_s {
 #define EXTENT_BITS_BINSHARD_SHIFT  (EXTENT_BITS_NFREE_WIDTH + EXTENT_BITS_NFREE_SHIFT)
 #define EXTENT_BITS_BINSHARD_MASK  MASK(EXTENT_BITS_BINSHARD_WIDTH, EXTENT_BITS_BINSHARD_SHIFT)
 
-/* Will make dynamic options. */
-#define OPT_N_BIN_SHARDS (1)
-#define OPT_BIN_SHARD_MAXSZIND (0)
-
 #define EXTENT_BITS_SN_SHIFT (EXTENT_BITS_BINSHARD_WIDTH + EXTENT_BITS_BINSHARD_SHIFT)
 #define EXTENT_BITS_SN_MASK  (UINT64_MAX << EXTENT_BITS_SN_SHIFT)
 
diff --git a/src/arena.c b/src/arena.c
index 893c9b56..7017bd7a 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1346,8 +1346,7 @@ bin_t *
 arena_bin_choose_lock(tsdn_t *tsdn, arena_t *arena, szind_t binind,
     unsigned *binshard) {
 	bin_t *bin;
-	if (binind >= opt_bin_shard_maxszind || tsdn_null(tsdn) ||
-	    tsd_arena_get(tsdn_tsd(tsdn)) == NULL) {
+	if (tsdn_null(tsdn) || tsd_arena_get(tsdn_tsd(tsdn)) == NULL) {
 		*binshard = 0;
 	} else {
 		*binshard = tsd_binshard_get(tsdn_tsd(tsdn)) %
@@ -1923,9 +1922,11 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		}
 	}
 
-	size_t arena_size = sizeof(arena_t) +
-	    sizeof(bin_t) * opt_n_bin_shards * opt_bin_shard_maxszind +
-	    sizeof(bin_t) * (SC_NBINS - opt_bin_shard_maxszind);
+	unsigned nbins_total = 0;
+	for (i = 0; i < SC_NBINS; i++) {
+		nbins_total += bin_infos[i].n_shards;
+	}
+	size_t arena_size = sizeof(arena_t) + sizeof(bin_t) * nbins_total;
 	arena = (arena_t *)base_alloc(tsdn, base, arena_size, CACHELINE);
 	if (arena == NULL) {
 		goto label_error;
diff --git a/src/bin.c b/src/bin.c
index 8dd964fa..bca6b12c 100644
--- a/src/bin.c
+++ b/src/bin.c
@@ -6,13 +6,11 @@
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/witness.h"
 
-unsigned opt_bin_shard_maxszind;
-unsigned opt_n_bin_shards;
-
 bin_info_t bin_infos[SC_NBINS];
 
-void
-bin_infos_init(sc_data_t *sc_data, bin_info_t bin_infos[SC_NBINS]) {
+static void
+bin_infos_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
+    bin_info_t bin_infos[SC_NBINS]) {
 	for (unsigned i = 0; i < SC_NBINS; i++) {
 		bin_info_t *bin_info = &bin_infos[i];
 		sc_t *sc = &sc_data->sc[i];
@@ -21,19 +19,49 @@ bin_infos_init(sc_data_t *sc_data, bin_info_t bin_infos[SC_NBINS]) {
 		bin_info->slab_size = (sc->pgs << LG_PAGE);
 		bin_info->nregs =
 		    (uint32_t)(bin_info->slab_size / bin_info->reg_size);
-		bin_info->n_shards = (i < opt_bin_shard_maxszind) ? opt_n_bin_shards : 1;
+		bin_info->n_shards = bin_shard_sizes[i];
 		bitmap_info_t bitmap_info = BITMAP_INFO_INITIALIZER(
 		    bin_info->nregs);
 		bin_info->bitmap_info = bitmap_info;
 	}
 }
 
+bool
+bin_update_shard_size(unsigned bin_shard_sizes[SC_NBINS], size_t start_size,
+    size_t end_size, size_t nshards) {
+	if (nshards > BIN_SHARDS_MAX || nshards == 0) {
+		return true;
+	}
+
+	if (start_size > SC_SMALL_MAXCLASS) {
+		return false;
+	}
+	if (end_size > SC_SMALL_MAXCLASS) {
+		end_size = SC_SMALL_MAXCLASS;
+	}
+
+	/* Compute the index since this may happen before sz init. */
+	szind_t ind1 = sz_size2index_compute(start_size);
+	szind_t ind2 = sz_size2index_compute(end_size);
+	for (unsigned i = ind1; i <= ind2; i++) {
+		bin_shard_sizes[i] = (unsigned)nshards;
+	}
+
+	return false;
+}
+
 void
-bin_boot(sc_data_t *sc_data) {
+bin_shard_sizes_boot(unsigned bin_shard_sizes[SC_NBINS]) {
+	/* Load the default number of shards. */
+	for (unsigned i = 0; i < SC_NBINS; i++) {
+		bin_shard_sizes[i] = N_BIN_SHARDS_DEFAULT;
+	}
+}
+
+void
+bin_boot(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 	assert(sc_data->initialized);
-	opt_bin_shard_maxszind = OPT_BIN_SHARD_MAXSZIND;
-	opt_n_bin_shards = OPT_N_BIN_SHARDS;
-	bin_infos_init(sc_data, bin_infos);
+	bin_infos_init(sc_data, bin_shard_sizes, bin_infos);
 }
 
 bool
diff --git a/src/jemalloc.c b/src/jemalloc.c
index c635ecb4..1f7ed2eb 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -767,9 +767,10 @@ init_opt_stats_print_opts(const char *v, size_t vlen) {
 	assert(opts_len == strlen(opt_stats_print_opts));
 }
 
+/* Reads the next size pair in a multi-sized option. */
 static bool
-malloc_conf_slab_sizes_next(const char **slab_size_segment_cur,
-    size_t *vlen_left, size_t *slab_start, size_t *slab_end, size_t *pgs) {
+malloc_conf_multi_sizes_next(const char **slab_size_segment_cur,
+    size_t *vlen_left, size_t *slab_start, size_t *slab_end, size_t *new_size) {
 	const char *cur = *slab_size_segment_cur;
 	char *end;
 	uintmax_t um;
@@ -797,7 +798,7 @@ malloc_conf_slab_sizes_next(const char **slab_size_segment_cur,
 	if (get_errno() != 0) {
 		return true;
 	}
-	*pgs = (size_t)um;
+	*new_size = (size_t)um;
 
 	/* Consume the separator if there is one. */
 	if (*end == '|') {
@@ -923,7 +924,7 @@ malloc_slow_flag_init(void) {
 }
 
 static void
-malloc_conf_init(sc_data_t *sc_data) {
+malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 	unsigned i;
 	char buf[PATH_MAX + 1];
 	const char *opts, *k, *v;
@@ -1161,6 +1162,28 @@ malloc_conf_init(sc_data_t *sc_data) {
 			}
 			CONF_HANDLE_UNSIGNED(opt_narenas, "narenas", 1,
 			    UINT_MAX, yes, no, false)
+			if (CONF_MATCH("bin_shards")) {
+				const char *bin_shards_segment_cur = v;
+				size_t vlen_left = vlen;
+				do {
+					size_t size_start;
+					size_t size_end;
+					size_t nshards;
+					bool err = malloc_conf_multi_sizes_next(
+					    &bin_shards_segment_cur, &vlen_left,
+					    &size_start, &size_end, &nshards);
+					if (err || bin_update_shard_size(
+					    bin_shard_sizes, size_start,
+					    size_end, nshards)) {
+						malloc_conf_error(
+						    "Invalid settings for "
+						    "bin_shards", k, klen, v,
+						    vlen);
+						break;
+					}
+				} while (vlen_left > 0);
+				continue;
+			}
 			CONF_HANDLE_SSIZE_T(opt_dirty_decay_ms,
 			    "dirty_decay_ms", -1, NSTIME_SEC_MAX * KQU(1000) <
 			    QU(SSIZE_MAX) ? NSTIME_SEC_MAX * KQU(1000) :
@@ -1256,7 +1279,7 @@ malloc_conf_init(sc_data_t *sc_data) {
 					size_t slab_start;
 					size_t slab_end;
 					size_t pgs;
-					err = malloc_conf_slab_sizes_next(
+					err = malloc_conf_multi_sizes_next(
 					    &slab_size_segment_cur,
 					    &vlen_left, &slab_start, &slab_end,
 					    &pgs);
@@ -1390,6 +1413,8 @@ malloc_init_hard_a0_locked() {
 	 * out of sc_data_global are final.
 	 */
 	sc_boot(&sc_data);
+	unsigned bin_shard_sizes[SC_NBINS];
+	bin_shard_sizes_boot(bin_shard_sizes);
 	/*
 	 * prof_boot0 only initializes opt_prof_prefix.  We need to do it before
 	 * we parse malloc_conf options, in case malloc_conf parsing overwrites
@@ -1398,9 +1423,9 @@ malloc_init_hard_a0_locked() {
 	if (config_prof) {
 		prof_boot0();
 	}
-	malloc_conf_init(&sc_data);
+	malloc_conf_init(&sc_data, bin_shard_sizes);
 	sz_boot(&sc_data);
-	bin_boot(&sc_data);
+	bin_boot(&sc_data, bin_shard_sizes);
 
 	if (opt_stats_print) {
 		/* Print statistics at exit. */

From 45bb4483baef0f9bb1362349d9838ee041c42754 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 21 Nov 2018 11:17:31 -0800
Subject: [PATCH 1227/2608] Add stats for arenas.bin.i.nshards.

---
 src/ctl.c           | 5 ++++-
 src/stats.c         | 9 ++++++++-
 test/unit/mallctl.c | 1 +
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index 72ad587d..a1508910 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -127,6 +127,7 @@ INDEX_PROTO(arena_i)
 CTL_PROTO(arenas_bin_i_size)
 CTL_PROTO(arenas_bin_i_nregs)
 CTL_PROTO(arenas_bin_i_slab_size)
+CTL_PROTO(arenas_bin_i_nshards)
 INDEX_PROTO(arenas_bin_i)
 CTL_PROTO(arenas_lextent_i_size)
 INDEX_PROTO(arenas_lextent_i)
@@ -355,7 +356,8 @@ static const ctl_indexed_node_t arena_node[] = {
 static const ctl_named_node_t arenas_bin_i_node[] = {
 	{NAME("size"),		CTL(arenas_bin_i_size)},
 	{NAME("nregs"),		CTL(arenas_bin_i_nregs)},
-	{NAME("slab_size"),	CTL(arenas_bin_i_slab_size)}
+	{NAME("slab_size"),	CTL(arenas_bin_i_slab_size)},
+	{NAME("nshards"),	CTL(arenas_bin_i_nshards)}
 };
 static const ctl_named_node_t super_arenas_bin_i_node[] = {
 	{NAME(""),		CHILD(named, arenas_bin_i)}
@@ -2490,6 +2492,7 @@ CTL_RO_NL_GEN(arenas_nhbins, nhbins, unsigned)
 CTL_RO_NL_GEN(arenas_bin_i_size, bin_infos[mib[2]].reg_size, size_t)
 CTL_RO_NL_GEN(arenas_bin_i_nregs, bin_infos[mib[2]].nregs, uint32_t)
 CTL_RO_NL_GEN(arenas_bin_i_slab_size, bin_infos[mib[2]].slab_size, size_t)
+CTL_RO_NL_GEN(arenas_bin_i_nshards, bin_infos[mib[2]].n_shards, uint32_t)
 static const ctl_named_node_t *
 arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib,
     size_t miblen, size_t i) {
diff --git a/src/stats.c b/src/stats.c
index e4e13378..e2a1100c 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -249,6 +249,7 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i) {
 	COL(nmalloc, right, 13, uint64)
 	COL(ndalloc, right, 13, uint64)
 	COL(nrequests, right, 13, uint64)
+	COL(nshards, right, 9, unsigned)
 	COL(curregs, right, 13, size)
 	COL(curslabs, right, 13, size)
 	COL(regs, right, 5, unsigned)
@@ -293,7 +294,7 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i) {
 		uint64_t nslabs;
 		size_t reg_size, slab_size, curregs;
 		size_t curslabs;
-		uint32_t nregs;
+		uint32_t nregs, nshards;
 		uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes;
 		uint64_t nreslabs;
 
@@ -310,6 +311,7 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i) {
 		CTL_M2_GET("arenas.bin.0.size", j, &reg_size, size_t);
 		CTL_M2_GET("arenas.bin.0.nregs", j, &nregs, uint32_t);
 		CTL_M2_GET("arenas.bin.0.slab_size", j, &slab_size, size_t);
+		CTL_M2_GET("arenas.bin.0.nshards", j, &nshards, uint32_t);
 
 		CTL_M2_M4_GET("stats.arenas.0.bins.0.nmalloc", i, j, &nmalloc,
 		    uint64_t);
@@ -383,6 +385,7 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i) {
 		col_nmalloc.uint64_val = nmalloc;
 		col_ndalloc.uint64_val = ndalloc;
 		col_nrequests.uint64_val = nrequests;
+		col_nshards.unsigned_val = nshards;
 		col_curregs.size_val = curregs;
 		col_curslabs.size_val = curslabs;
 		col_regs.unsigned_val = nregs;
@@ -1143,6 +1146,10 @@ stats_general_print(emitter_t *emitter) {
 			emitter_json_kv(emitter, "slab_size", emitter_type_size,
 			    &sv);
 
+			CTL_M2_GET("arenas.bin.0.nshards", i, &u32v, uint32_t);
+			emitter_json_kv(emitter, "nshards", emitter_type_uint32,
+			    &u32v);
+
 			emitter_json_object_end(emitter);
 		}
 		emitter_json_array_end(emitter); /* Close "bin". */
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 452d884d..039a8810 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -706,6 +706,7 @@ TEST_BEGIN(test_arenas_bin_constants) {
 	TEST_ARENAS_BIN_CONSTANT(uint32_t, nregs, bin_infos[0].nregs);
 	TEST_ARENAS_BIN_CONSTANT(size_t, slab_size,
 	    bin_infos[0].slab_size);
+	TEST_ARENAS_BIN_CONSTANT(uint32_t, nshards, bin_infos[0].n_shards);
 
 #undef TEST_ARENAS_BIN_CONSTANT
 }

From 98b56ab23dd4d3dc826f06906e6c51c9c9d4d52a Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 27 Nov 2018 12:38:47 -0800
Subject: [PATCH 1228/2608] Store the bin shard selection in TSD.

This avoids having to choose bin shard on the fly, also will allow flexible bin
binding for each thread.
---
 include/jemalloc/internal/bin.h       |  4 +---
 include/jemalloc/internal/bin_types.h | 17 +++++++++++++++++
 include/jemalloc/internal/tsd.h       |  5 +++--
 src/arena.c                           |  3 +--
 src/jemalloc.c                        | 11 ++++++++---
 5 files changed, 30 insertions(+), 10 deletions(-)
 create mode 100644 include/jemalloc/internal/bin_types.h

diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index baa0acf3..f542c882 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -2,14 +2,12 @@
 #define JEMALLOC_INTERNAL_BIN_H
 
 #include "jemalloc/internal/bin_stats.h"
+#include "jemalloc/internal/bin_types.h"
 #include "jemalloc/internal/extent_types.h"
 #include "jemalloc/internal/extent_structs.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/sc.h"
 
-#define BIN_SHARDS_MAX (1 << EXTENT_BITS_BINSHARD_WIDTH)
-#define N_BIN_SHARDS_DEFAULT 1
-
 /*
  * A bin contains a set of extents that are currently being used for slab
  * allocations.
diff --git a/include/jemalloc/internal/bin_types.h b/include/jemalloc/internal/bin_types.h
new file mode 100644
index 00000000..3533606b
--- /dev/null
+++ b/include/jemalloc/internal/bin_types.h
@@ -0,0 +1,17 @@
+#ifndef JEMALLOC_INTERNAL_BIN_TYPES_H
+#define JEMALLOC_INTERNAL_BIN_TYPES_H
+
+#include "jemalloc/internal/sc.h"
+
+#define BIN_SHARDS_MAX (1 << EXTENT_BITS_BINSHARD_WIDTH)
+#define N_BIN_SHARDS_DEFAULT 1
+
+/* Used in TSD static initializer only. Real init in arena_bind(). */
+#define TSD_BINSHARDS_ZERO_INITIALIZER {{UINT8_MAX}}
+
+typedef struct tsd_binshards_s tsd_binshards_t;
+struct tsd_binshards_s {
+	uint8_t binshard[SC_NBINS];
+};
+
+#endif /* JEMALLOC_INTERNAL_BIN_TYPES_H */
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 4dc2274a..00a9500b 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -3,6 +3,7 @@
 
 #include "jemalloc/internal/arena_types.h"
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/bin_types.h"
 #include "jemalloc/internal/jemalloc_internal_externs.h"
 #include "jemalloc/internal/prof_types.h"
 #include "jemalloc/internal/ql.h"
@@ -74,7 +75,7 @@ typedef void (*test_callback_t)(int *);
     O(iarena,			arena_t *,		arena_t *)	\
     O(arena,			arena_t *,		arena_t *)	\
     O(arenas_tdata,		arena_tdata_t *,	arena_tdata_t *)\
-    O(binshard,			unsigned,		unsigned)	\
+    O(binshards,		tsd_binshards_t,	tsd_binshards_t)\
     O(tcache,			tcache_t,		tcache_t)	\
     O(witness_tsd,              witness_tsd_t,		witness_tsdn_t)	\
     MALLOC_TEST_TSD
@@ -94,7 +95,7 @@ typedef void (*test_callback_t)(int *);
     NULL,								\
     NULL,								\
     NULL,								\
-    ((unsigned)-1),							\
+    TSD_BINSHARDS_ZERO_INITIALIZER,					\
     TCACHE_ZERO_INITIALIZER,						\
     WITNESS_TSD_INITIALIZER						\
     MALLOC_TEST_TSD_INITIALIZER						\
diff --git a/src/arena.c b/src/arena.c
index 7017bd7a..d34de859 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1349,8 +1349,7 @@ arena_bin_choose_lock(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 	if (tsdn_null(tsdn) || tsd_arena_get(tsdn_tsd(tsdn)) == NULL) {
 		*binshard = 0;
 	} else {
-		*binshard = tsd_binshard_get(tsdn_tsd(tsdn)) %
-		    bin_infos[binind].n_shards;
+		*binshard = tsd_binshardsp_get(tsdn_tsd(tsdn))->binshard[binind];
 	}
 	assert(*binshard < bin_infos[binind].n_shards);
 	bin = &arena->bins[binind].bin_shards[*binshard];
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 1f7ed2eb..1620d0d3 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -379,9 +379,14 @@ arena_bind(tsd_t *tsd, unsigned ind, bool internal) {
 		tsd_iarena_set(tsd, arena);
 	} else {
 		tsd_arena_set(tsd, arena);
-		unsigned binshard = atomic_fetch_add_u(&arena->binshard_next, 1,
-		    ATOMIC_RELAXED) % BIN_SHARDS_MAX;
-		tsd_binshard_set(tsd, binshard);
+		unsigned shard = atomic_fetch_add_u(&arena->binshard_next, 1,
+		    ATOMIC_RELAXED);
+		tsd_binshards_t *bins = tsd_binshardsp_get(tsd);
+		for (unsigned i = 0; i < SC_NBINS; i++) {
+			assert(bin_infos[i].n_shards > 0 &&
+			    bin_infos[i].n_shards <= BIN_SHARDS_MAX);
+			bins->binshard[i] = shard % bin_infos[i].n_shards;
+		}
 	}
 }
 

From 711a61f3b41880718eb23fcfdd572d0daa5fb6ca Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 28 Nov 2018 16:23:18 -0800
Subject: [PATCH 1229/2608] Add unit test for sharded bins.

---
 Makefile.in           |   1 +
 test/unit/binshard.c  | 103 ++++++++++++++++++++++++++++++++++++++++++
 test/unit/binshard.sh |   3 ++
 3 files changed, 107 insertions(+)
 create mode 100644 test/unit/binshard.c
 create mode 100644 test/unit/binshard.sh

diff --git a/Makefile.in b/Makefile.in
index c9bd95a3..31a9ceaa 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -169,6 +169,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/base.c \
 	$(srcroot)test/unit/bitmap.c \
 	$(srcroot)test/unit/bit_util.c \
+	$(srcroot)test/unit/binshard.c \
 	$(srcroot)test/unit/ckh.c \
 	$(srcroot)test/unit/decay.c \
 	$(srcroot)test/unit/div.c \
diff --git a/test/unit/binshard.c b/test/unit/binshard.c
new file mode 100644
index 00000000..829ba433
--- /dev/null
+++ b/test/unit/binshard.c
@@ -0,0 +1,103 @@
+#include "test/jemalloc_test.h"
+
+/* Config -- "narenas:1,bin_shards:1-160:16|129-512:4|256-256:8" */
+
+static void *
+thd_start(void *varg) {
+	void *ptr, *ptr2;
+	extent_t *extent;
+	unsigned shard1, shard2;
+
+	tsdn_t *tsdn = tsdn_fetch();
+	/* Try triggering allocations from sharded bins. */
+	for (unsigned i = 0; i < 1024; i++) {
+		ptr = mallocx(1, MALLOCX_TCACHE_NONE);
+		ptr2 = mallocx(129, MALLOCX_TCACHE_NONE);
+
+		extent = iealloc(tsdn, ptr);
+		shard1 = extent_binshard_get(extent);
+		dallocx(ptr, 0);
+		assert_u_lt(shard1, 16, "Unexpected bin shard used");
+
+		extent = iealloc(tsdn, ptr2);
+		shard2 = extent_binshard_get(extent);
+		dallocx(ptr2, 0);
+		assert_u_lt(shard2, 4, "Unexpected bin shard used");
+
+		if (shard1 > 0 || shard2 > 0) {
+			/* Triggered sharded bin usage. */
+			return (void *)(uintptr_t)shard1;
+		}
+	}
+
+	return NULL;
+}
+
+TEST_BEGIN(test_bin_shard_mt) {
+#define NTHREADS 16
+	thd_t thds[NTHREADS];
+	unsigned i;
+	for (i = 0; i < NTHREADS; i++) {
+		thd_create(&thds[i], thd_start, NULL);
+	}
+	bool sharded = false;
+	for (i = 0; i < NTHREADS; i++) {
+		void *ret;
+		thd_join(thds[i], &ret);
+		if (ret != NULL) {
+			sharded = true;
+		}
+	}
+	assert_b_eq(sharded, true, "Did not find sharded bins");
+}
+TEST_END
+
+TEST_BEGIN(test_bin_shard) {
+	unsigned nbins, i;
+	size_t mib[4], mib2[4];
+	size_t miblen, miblen2, len;
+
+	len = sizeof(nbins);
+	assert_d_eq(mallctl("arenas.nbins", (void *)&nbins, &len, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+
+	miblen = 4;
+	assert_d_eq(mallctlnametomib("arenas.bin.0.nshards", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	miblen2 = 4;
+	assert_d_eq(mallctlnametomib("arenas.bin.0.size", mib2, &miblen2), 0,
+	    "Unexpected mallctlnametomib() failure");
+
+	for (i = 0; i < nbins; i++) {
+		uint32_t nshards;
+		size_t size, sz1, sz2;
+
+		mib[2] = i;
+		sz1 = sizeof(nshards);
+		assert_d_eq(mallctlbymib(mib, miblen, (void *)&nshards, &sz1,
+		    NULL, 0), 0, "Unexpected mallctlbymib() failure");
+
+		mib2[2] = i;
+		sz2 = sizeof(size);
+		assert_d_eq(mallctlbymib(mib2, miblen2, (void *)&size, &sz2,
+		    NULL, 0), 0, "Unexpected mallctlbymib() failure");
+
+		if (size >= 1 && size <= 128) {
+			assert_u_eq(nshards, 16, "Unexpected nshards");
+		} else if (size == 256) {
+			assert_u_eq(nshards, 8, "Unexpected nshards");
+		} else if (size > 128 && size <= 512) {
+			assert_u_eq(nshards, 4, "Unexpected nshards");
+		} else {
+			assert_u_eq(nshards, 1, "Unexpected nshards");
+		}
+	}
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_bin_shard,
+	    test_bin_shard_mt);
+}
diff --git a/test/unit/binshard.sh b/test/unit/binshard.sh
new file mode 100644
index 00000000..c1d58c88
--- /dev/null
+++ b/test/unit/binshard.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="narenas:1,bin_shards:1-160:16|129-512:4|256-256:8"

From 99f4eefb61ae1f13e47af6eac34748fd0a789404 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 7 Dec 2018 18:06:04 -0800
Subject: [PATCH 1230/2608] Fix incorrect stats mreging with sharded bins.

With sharded bins, we may not flush all items from the same arena in one run.
Adjust the stats merging logic accordingly.
---
 src/tcache.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index 51e3131e..92be273a 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -134,8 +134,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 		}
 
 		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
-		if (config_stats && bin_arena == arena) {
-			assert(!merged_stats);
+		if (config_stats && bin_arena == arena && !merged_stats) {
 			merged_stats = true;
 			bin->stats.nflushes++;
 			bin->stats.nrequests += tbin->tstats.nrequests;

From 36de5189c70fee959ebcdfadd8dfa374ff430de5 Mon Sep 17 00:00:00 2001
From: Alexander Zinoviev <zin@fb.com>
Date: Mon, 10 Dec 2018 11:29:44 -0800
Subject: [PATCH 1231/2608] Add rate counters to stats

---
 include/jemalloc/internal/emitter.h    |   2 +
 include/jemalloc/internal/mutex_prof.h |  25 +-
 src/stats.c                            | 432 +++++++++++++------------
 3 files changed, 238 insertions(+), 221 deletions(-)

diff --git a/include/jemalloc/internal/emitter.h b/include/jemalloc/internal/emitter.h
index f8da2285..0a8bc2c0 100644
--- a/include/jemalloc/internal/emitter.h
+++ b/include/jemalloc/internal/emitter.h
@@ -45,7 +45,9 @@ struct emitter_col_s {
 		int int_val;
 		unsigned unsigned_val;
 		uint32_t uint32_val;
+		uint32_t uint32_t_val;
 		uint64_t uint64_val;
+		uint64_t uint64_t_val;
 		size_t size_val;
 		ssize_t ssize_val;
 		const char *str_val;
diff --git a/include/jemalloc/internal/mutex_prof.h b/include/jemalloc/internal/mutex_prof.h
index ce183d33..2cb8fb0c 100644
--- a/include/jemalloc/internal/mutex_prof.h
+++ b/include/jemalloc/internal/mutex_prof.h
@@ -35,22 +35,31 @@ typedef enum {
 	mutex_prof_num_arena_mutexes
 } mutex_prof_arena_ind_t;
 
+/*
+ * The forth parameter is a boolean value that is true for derived rate counters
+ * and false for real ones.
+ */
 #define MUTEX_PROF_UINT64_COUNTERS					\
-    OP(num_ops, uint64_t, "n_lock_ops")					\
-    OP(num_wait, uint64_t, "n_waiting")					\
-    OP(num_spin_acq, uint64_t, "n_spin_acq")				\
-    OP(num_owner_switch, uint64_t, "n_owner_switch")			\
-    OP(total_wait_time, uint64_t, "total_wait_ns")			\
-    OP(max_wait_time, uint64_t, "max_wait_ns")
+    OP(num_ops, uint64_t, "n_lock_ops", false, num_ops)					\
+    OP(num_ops_ps, uint64_t, "(#/sec)", true, num_ops)				\
+    OP(num_wait, uint64_t, "n_waiting", false, num_wait)				\
+    OP(num_wait_ps, uint64_t, "(#/sec)", true, num_wait)				\
+    OP(num_spin_acq, uint64_t, "n_spin_acq", false, num_spin_acq)			\
+    OP(num_spin_acq_ps, uint64_t, "(#/sec)", true, num_spin_acq)			\
+    OP(num_owner_switch, uint64_t, "n_owner_switch", false, num_owner_switch)		\
+    OP(num_owner_switch_ps, uint64_t, "(#/sec)", true, num_owner_switch)	\
+    OP(total_wait_time, uint64_t, "total_wait_ns", false, total_wait_time)		\
+    OP(total_wait_time_ps, uint64_t, "(#/sec)", true, total_wait_time)		\
+    OP(max_wait_time, uint64_t, "max_wait_ns", false, max_wait_time)
 
 #define MUTEX_PROF_UINT32_COUNTERS					\
-    OP(max_num_thds, uint32_t, "max_n_thds")
+    OP(max_num_thds, uint32_t, "max_n_thds", false, max_num_thds)
 
 #define MUTEX_PROF_COUNTERS						\
 		MUTEX_PROF_UINT64_COUNTERS				\
 		MUTEX_PROF_UINT32_COUNTERS
 
-#define OP(counter, type, human) mutex_counter_##counter,
+#define OP(counter, type, human, derived, base_counter) mutex_counter_##counter,
 
 #define COUNTER_ENUM(counter_list, t)					\
 		typedef enum {						\
diff --git a/src/stats.c b/src/stats.c
index e2a1100c..f105e260 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -52,6 +52,20 @@ char opt_stats_print_opts[stats_print_tot_num_options+1] = "";
 
 /******************************************************************************/
 
+static uint64_t
+rate_per_second(uint64_t value, uint64_t uptime_ns) {
+	uint64_t billion = 1000000000;
+	if (uptime_ns == 0 || value == 0) {
+		return 0;
+	}
+	if (uptime_ns < billion) {
+		return value;
+	} else {
+		uint64_t uptime_s = uptime_ns / billion;
+		return value / uptime_s;
+	}
+}
+
 /* Calculate x.yyy and output a string (takes a fixed sized char array). */
 static bool
 get_rate_str(uint64_t dividend, uint64_t divisor, char str[6]) {
@@ -104,12 +118,12 @@ mutex_stats_init_cols(emitter_row_t *row, const char *table_name,
 
 #define WIDTH_uint32_t 12
 #define WIDTH_uint64_t 16
-#define OP(counter, counter_type, human)				\
+#define OP(counter, counter_type, human, derived, base_counter)	\
 	col = &col_##counter_type[k_##counter_type];			\
 	++k_##counter_type;						\
 	emitter_col_init(col, row);					\
 	col->justify = emitter_justify_right;				\
-	col->width = WIDTH_##counter_type;				\
+	col->width = derived ? 8 : WIDTH_##counter_type;		\
 	col->type = emitter_type_title;					\
 	col->str_val = human;
 	MUTEX_PROF_COUNTERS
@@ -121,7 +135,8 @@ mutex_stats_init_cols(emitter_row_t *row, const char *table_name,
 static void
 mutex_stats_read_global(const char *name, emitter_col_t *col_name,
     emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters],
-    emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters]) {
+    emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters],
+    uint64_t uptime) {
 	char cmd[MUTEX_CTL_STR_MAX_LENGTH];
 
 	col_name->str_val = name;
@@ -129,12 +144,17 @@ mutex_stats_read_global(const char *name, emitter_col_t *col_name,
 	emitter_col_t *dst;
 #define EMITTER_TYPE_uint32_t emitter_type_uint32
 #define EMITTER_TYPE_uint64_t emitter_type_uint64
-#define OP(counter, counter_type, human)				\
+#define OP(counter, counter_type, human, derived, base_counter)	\
 	dst = &col_##counter_type[mutex_counter_##counter];		\
 	dst->type = EMITTER_TYPE_##counter_type;			\
-	gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,		\
-	    "mutexes", name, #counter);					\
-	CTL_GET(cmd, (counter_type *)&dst->bool_val, counter_type);
+	if (!derived) {							\
+		gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,	\
+		    "mutexes", name, #counter);				\
+		CTL_GET(cmd, (counter_type *)&dst->bool_val, counter_type);	\
+	} else { \
+	    emitter_col_t *base = &col_##counter_type[mutex_counter_##base_counter];	\
+	    dst->counter_type##_val = rate_per_second(base->counter_type##_val, uptime); \
+	}
 	MUTEX_PROF_COUNTERS
 #undef OP
 #undef EMITTER_TYPE_uint32_t
@@ -145,7 +165,8 @@ static void
 mutex_stats_read_arena(unsigned arena_ind, mutex_prof_arena_ind_t mutex_ind,
     const char *name, emitter_col_t *col_name,
     emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters],
-    emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters]) {
+    emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters],
+    uint64_t uptime) {
 	char cmd[MUTEX_CTL_STR_MAX_LENGTH];
 
 	col_name->str_val = name;
@@ -153,13 +174,17 @@ mutex_stats_read_arena(unsigned arena_ind, mutex_prof_arena_ind_t mutex_ind,
 	emitter_col_t *dst;
 #define EMITTER_TYPE_uint32_t emitter_type_uint32
 #define EMITTER_TYPE_uint64_t emitter_type_uint64
-#define OP(counter, counter_type, human)				\
+#define OP(counter, counter_type, human, derived, base_counter)	\
 	dst = &col_##counter_type[mutex_counter_##counter];		\
 	dst->type = EMITTER_TYPE_##counter_type;			\
-	gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,		\
-	    "arenas.0.mutexes",	arena_mutex_names[mutex_ind], #counter);\
-	CTL_M2_GET(cmd, arena_ind,					\
-	    (counter_type *)&dst->bool_val, counter_type);
+	if (!derived) {                                   \
+		gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,        \
+		    "arenas.0.mutexes", arena_mutex_names[mutex_ind], #counter);\
+		CTL_M2_GET(cmd, arena_ind, (counter_type *)&dst->bool_val, counter_type); \
+	} else {                      \
+		emitter_col_t *base = &col_##counter_type[mutex_counter_##base_counter];	\
+		dst->counter_type##_val = rate_per_second(base->counter_type##_val, uptime); \
+	}
 	MUTEX_PROF_COUNTERS
 #undef OP
 #undef EMITTER_TYPE_uint32_t
@@ -169,19 +194,25 @@ mutex_stats_read_arena(unsigned arena_ind, mutex_prof_arena_ind_t mutex_ind,
 static void
 mutex_stats_read_arena_bin(unsigned arena_ind, unsigned bin_ind,
     emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters],
-    emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters]) {
+    emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters],
+    uint64_t uptime) {
 	char cmd[MUTEX_CTL_STR_MAX_LENGTH];
 	emitter_col_t *dst;
 
 #define EMITTER_TYPE_uint32_t emitter_type_uint32
 #define EMITTER_TYPE_uint64_t emitter_type_uint64
-#define OP(counter, counter_type, human)				\
+#define OP(counter, counter_type, human, derived, base_counter)	\
 	dst = &col_##counter_type[mutex_counter_##counter];		\
 	dst->type = EMITTER_TYPE_##counter_type;			\
-	gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,		\
-	    "arenas.0.bins.0","mutex", #counter);			\
-	CTL_M2_M4_GET(cmd, arena_ind, bin_ind,				\
-	    (counter_type *)&dst->bool_val, counter_type);
+	if (!derived) {                                   \
+		gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,        \
+		    "arenas.0.bins.0","mutex", #counter);            \
+		CTL_M2_M4_GET(cmd, arena_ind, bin_ind,                \
+		    (counter_type *)&dst->bool_val, counter_type);  \
+	} else {                      \
+		emitter_col_t *base = &col_##counter_type[mutex_counter_##base_counter]; \
+		dst->counter_type##_val = rate_per_second(base->counter_type##_val, uptime); \
+	}
 	MUTEX_PROF_COUNTERS
 #undef OP
 #undef EMITTER_TYPE_uint32_t
@@ -204,19 +235,38 @@ mutex_stats_emit(emitter_t *emitter, emitter_row_t *row,
 
 #define EMITTER_TYPE_uint32_t emitter_type_uint32
 #define EMITTER_TYPE_uint64_t emitter_type_uint64
-#define OP(counter, type, human)					\
-	col = &col_##type[k_##type];						\
-	++k_##type;							\
-	emitter_json_kv(emitter, #counter, EMITTER_TYPE_##type,		\
-	    (const void *)&col->bool_val);
+#define OP(counter, type, human, derived, base_counter)		\
+	if (!derived) {                    \
+		col = &col_##type[k_##type];                        \
+		++k_##type;                            \
+		emitter_json_kv(emitter, #counter, EMITTER_TYPE_##type,        \
+		    (const void *)&col->bool_val); \
+	}
 	MUTEX_PROF_COUNTERS;
 #undef OP
 #undef EMITTER_TYPE_uint32_t
 #undef EMITTER_TYPE_uint64_t
 }
 
+#define COL(row_name, column_name, left_or_right, col_width, etype)      \
+	emitter_col_t col_##column_name;                                     \
+	emitter_col_init(&col_##column_name, &row_name);                     \
+	col_##column_name.justify = emitter_justify_##left_or_right;         \
+	col_##column_name.width = col_width;                                 \
+	col_##column_name.type = emitter_type_##etype;
+
+#define COL_HDR(row_name, column_name, human, left_or_right, col_width, etype)  \
+	COL(row_name, column_name, left_or_right, col_width, etype)	         \
+	emitter_col_t header_##column_name;                                  \
+	emitter_col_init(&header_##column_name, &header_##row_name);         \
+	header_##column_name.justify = emitter_justify_##left_or_right;      \
+	header_##column_name.width = col_width;                              \
+	header_##column_name.type = emitter_type_title;                      \
+	header_##column_name.str_val = human ? human : #column_name;
+
+
 static void
-stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i) {
+stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t uptime) {
 	size_t page;
 	bool in_gap, in_gap_prev;
 	unsigned nbins, j;
@@ -230,44 +280,36 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i) {
 
 	emitter_row_t row;
 	emitter_row_init(&row);
-#define COL(name, left_or_right, col_width, etype)			\
-	emitter_col_t col_##name;					\
-	emitter_col_init(&col_##name, &row);				\
-	col_##name.justify = emitter_justify_##left_or_right;		\
-	col_##name.width = col_width;					\
-	col_##name.type = emitter_type_##etype;				\
-	emitter_col_t header_col_##name;				\
-	emitter_col_init(&header_col_##name, &header_row);		\
-	header_col_##name.justify = emitter_justify_##left_or_right;	\
-	header_col_##name.width = col_width;				\
-	header_col_##name.type = emitter_type_title;			\
-	header_col_##name.str_val = #name;
 
-	COL(size, right, 20, size)
-	COL(ind, right, 4, unsigned)
-	COL(allocated, right, 13, uint64)
-	COL(nmalloc, right, 13, uint64)
-	COL(ndalloc, right, 13, uint64)
-	COL(nrequests, right, 13, uint64)
-	COL(nshards, right, 9, unsigned)
-	COL(curregs, right, 13, size)
-	COL(curslabs, right, 13, size)
-	COL(regs, right, 5, unsigned)
-	COL(pgs, right, 4, size)
+	COL_HDR(row, size, NULL, right, 20, size)
+	COL_HDR(row, ind, NULL, right, 4, unsigned)
+	COL_HDR(row, allocated, NULL, right, 13, uint64)
+	COL_HDR(row, nmalloc, NULL, right, 13, uint64)
+	COL_HDR(row, nmalloc_ps, "(#/sec)", right, 8, uint64)
+	COL_HDR(row, ndalloc, NULL, right, 13, uint64)
+	COL_HDR(row, ndalloc_ps, "(#/sec)", right, 8, uint64)
+	COL_HDR(row, nrequests, NULL, right, 13, uint64)
+	COL_HDR(row, nrequests_ps, "(#/sec)", right, 8, uint64)
+	COL_HDR(row, nshards, NULL, right, 9, unsigned)
+	COL_HDR(row, curregs, NULL, right, 13, size)
+	COL_HDR(row, curslabs, NULL, right, 13, size)
+	COL_HDR(row, regs, NULL, right, 5, unsigned)
+	COL_HDR(row, pgs, NULL, right, 4, size)
 	/* To buffer a right- and left-justified column. */
-	COL(justify_spacer, right, 1, title)
-	COL(util, right, 6, title)
-	COL(nfills, right, 13, uint64)
-	COL(nflushes, right, 13, uint64)
-	COL(nslabs, right, 13, uint64)
-	COL(nreslabs, right, 13, uint64)
-#undef COL
+	COL_HDR(row, justify_spacer, NULL, right, 1, title)
+	COL_HDR(row, util, NULL, right, 6, title)
+	COL_HDR(row, nfills, NULL, right, 13, uint64)
+	COL_HDR(row, nfills_ps, "(#/sec)", right, 8, uint64)
+	COL_HDR(row, nflushes, NULL, right, 13, uint64)
+	COL_HDR(row, nflushes_ps, "(#/sec)", right, 8, uint64)
+	COL_HDR(row, nslabs, NULL, right, 13, uint64)
+	COL_HDR(row, nreslabs, NULL, right, 13, uint64)
+	COL_HDR(row, nreslabs_ps, "(#/sec)", right, 8, uint64)
 
 	/* Don't want to actually print the name. */
-	header_col_justify_spacer.str_val = " ";
+	header_justify_spacer.str_val = " ";
 	col_justify_spacer.str_val = " ";
 
-
 	emitter_col_t col_mutex64[mutex_prof_num_uint64_t_counters];
 	emitter_col_t col_mutex32[mutex_prof_num_uint32_t_counters];
 
@@ -285,7 +327,7 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i) {
 	 * We print a "bins:" header as part of the table row; we need to adjust
 	 * the header size column to compensate.
 	 */
-	header_col_size.width -=5;
+	header_size.width -=5;
 	emitter_table_printf(emitter, "bins:");
 	emitter_table_row(emitter, &header_row);
 	emitter_json_array_kv_begin(emitter, "bins");
@@ -332,7 +374,7 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i) {
 
 		if (mutex) {
 			mutex_stats_read_arena_bin(i, j, col_mutex64,
-			    col_mutex32);
+			    col_mutex32, uptime);
 		}
 
 		emitter_json_object_begin(emitter);
@@ -383,8 +425,11 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i) {
 		col_ind.unsigned_val = j;
 		col_allocated.size_val = curregs * reg_size;
 		col_nmalloc.uint64_val = nmalloc;
+		col_nmalloc_ps.uint64_val = rate_per_second(nmalloc, uptime);
 		col_ndalloc.uint64_val = ndalloc;
+		col_ndalloc_ps.uint64_val = rate_per_second(ndalloc, uptime);
 		col_nrequests.uint64_val = nrequests;
+		col_nrequests_ps.uint64_val = rate_per_second(nrequests, uptime);
 		col_nshards.unsigned_val = nshards;
 		col_curregs.size_val = curregs;
 		col_curslabs.size_val = curslabs;
@@ -392,9 +437,12 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i) {
 		col_pgs.size_val = slab_size / page;
 		col_util.str_val = util;
 		col_nfills.uint64_val = nfills;
+		col_nfills_ps.uint64_val = rate_per_second(nfills, uptime);
 		col_nflushes.uint64_val = nflushes;
+		col_nflushes_ps.uint64_val = rate_per_second(nflushes, uptime);
 		col_nslabs.uint64_val = nslabs;
 		col_nreslabs.uint64_val = nreslabs;
+		col_nreslabs_ps.uint64_val = rate_per_second(nreslabs, uptime);
 
 		/*
 		 * Note that mutex columns were initialized above, if mutex ==
@@ -411,7 +459,7 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i) {
 }
 
 static void
-stats_arena_lextents_print(emitter_t *emitter, unsigned i) {
+stats_arena_lextents_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	unsigned nbins, nlextents, j;
 	bool in_gap, in_gap_prev;
 
@@ -423,28 +471,16 @@ stats_arena_lextents_print(emitter_t *emitter, unsigned i) {
 	emitter_row_t row;
 	emitter_row_init(&row);
 
-#define COL(name, left_or_right, col_width, etype)			\
-	emitter_col_t header_##name;					\
-	emitter_col_init(&header_##name, &header_row);			\
-	header_##name.justify = emitter_justify_##left_or_right;	\
-	header_##name.width = col_width;				\
-	header_##name.type = emitter_type_title;			\
-	header_##name.str_val = #name;					\
-									\
-	emitter_col_t col_##name;					\
-	emitter_col_init(&col_##name, &row);				\
-	col_##name.justify = emitter_justify_##left_or_right;		\
-	col_##name.width = col_width;					\
-	col_##name.type = emitter_type_##etype;
-
-	COL(size, right, 20, size)
-	COL(ind, right, 4, unsigned)
-	COL(allocated, right, 13, size)
-	COL(nmalloc, right, 13, uint64)
-	COL(ndalloc, right, 13, uint64)
-	COL(nrequests, right, 13, uint64)
-	COL(curlextents, right, 13, size)
-#undef COL
+	COL_HDR(row, size, NULL, right, 20, size)
+	COL_HDR(row, ind, NULL, right, 4, unsigned)
+	COL_HDR(row, allocated, NULL, right, 13, size)
+	COL_HDR(row, nmalloc, NULL, right, 13, uint64)
+	COL_HDR(row, nmalloc_ps, "(#/sec)", right, 8, uint64)
+	COL_HDR(row, ndalloc, NULL, right, 13, uint64)
+	COL_HDR(row, ndalloc_ps, "(#/sec)", right, 8, uint64)
+	COL_HDR(row, nrequests, NULL, right, 13, uint64)
+	COL_HDR(row, nrequests_ps, "(#/sec)", right, 8, uint64)
+	COL_HDR(row, curlextents, NULL, right, 13, size)
 
 	/* As with bins, we label the large extents table. */
 	header_size.width -= 6;
@@ -483,8 +519,11 @@ stats_arena_lextents_print(emitter_t *emitter, unsigned i) {
 		col_ind.unsigned_val = nbins + j;
 		col_allocated.size_val = curlextents * lextent_size;
 		col_nmalloc.uint64_val = nmalloc;
+		col_nmalloc_ps.uint64_val = rate_per_second(nmalloc, uptime);
 		col_ndalloc.uint64_val = ndalloc;
+		col_ndalloc_ps.uint64_val = rate_per_second(ndalloc, uptime);
 		col_nrequests.uint64_val = nrequests;
+		col_nrequests_ps.uint64_val = rate_per_second(nrequests, uptime);
 		col_curlextents.size_val = curlextents;
 
 		if (!in_gap) {
@@ -505,31 +544,17 @@ stats_arena_extents_print(emitter_t *emitter, unsigned i) {
 	emitter_row_init(&header_row);
 	emitter_row_t row;
 	emitter_row_init(&row);
-#define COL(name, left_or_right, col_width, etype)			\
-	emitter_col_t header_##name;					\
-	emitter_col_init(&header_##name, &header_row);			\
-	header_##name.justify = emitter_justify_##left_or_right;	\
-	header_##name.width = col_width;				\
-	header_##name.type = emitter_type_title;			\
-	header_##name.str_val = #name;					\
-									\
-	emitter_col_t col_##name;					\
-	emitter_col_init(&col_##name, &row);				\
-	col_##name.justify = emitter_justify_##left_or_right;		\
-	col_##name.width = col_width;					\
-	col_##name.type = emitter_type_##etype;
 
-	COL(size, right, 20, size)
-	COL(ind, right, 4, unsigned)
-	COL(ndirty, right, 13, size)
-	COL(dirty, right, 13, size)
-	COL(nmuzzy, right, 13, size)
-	COL(muzzy, right, 13, size)
-	COL(nretained, right, 13, size)
-	COL(retained, right, 13, size)
-	COL(ntotal, right, 13, size)
-	COL(total, right, 13, size)
-#undef COL
+	COL_HDR(row, size, NULL, right, 20, size)
+	COL_HDR(row, ind, NULL, right, 4, unsigned)
+	COL_HDR(row, ndirty, NULL, right, 13, size)
+	COL_HDR(row, dirty, NULL, right, 13, size)
+	COL_HDR(row, nmuzzy, NULL, right, 13, size)
+	COL_HDR(row, muzzy, NULL, right, 13, size)
+	COL_HDR(row, nretained, NULL, right, 13, size)
+	COL_HDR(row, retained, NULL, right, 13, size)
+	COL_HDR(row, ntotal, NULL, right, 13, size)
+	COL_HDR(row, total, NULL, right, 13, size)
 
 	/* Label this section. */
 	header_size.width -= 8;
@@ -600,7 +625,7 @@ stats_arena_extents_print(emitter_t *emitter, unsigned i) {
 }
 
 static void
-stats_arena_mutexes_print(emitter_t *emitter, unsigned arena_ind) {
+stats_arena_mutexes_print(emitter_t *emitter, unsigned arena_ind, uint64_t uptime) {
 	emitter_row_t row;
 	emitter_col_t col_name;
 	emitter_col_t col64[mutex_prof_num_uint64_t_counters];
@@ -617,7 +642,7 @@ stats_arena_mutexes_print(emitter_t *emitter, unsigned arena_ind) {
 		const char *name = arena_mutex_names[i];
 		emitter_json_object_kv_begin(emitter, name);
 		mutex_stats_read_arena(arena_ind, i, name, &col_name, col64,
-		    col32);
+		    col32, uptime);
 		mutex_stats_emit(emitter, &row, col64, col32);
 		emitter_json_object_end(emitter); /* Close the mutex dict. */
 	}
@@ -699,98 +724,74 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	    &muzzy_purged);
 
 	/* Table-style emission. */
-	emitter_col_t decay_type;
-	emitter_col_init(&decay_type, &decay_row);
-	decay_type.justify = emitter_justify_right;
-	decay_type.width = 9;
-	decay_type.type = emitter_type_title;
-	decay_type.str_val = "decaying:";
+	COL(decay_row, decay_type, right, 9, title);
+	col_decay_type.str_val = "decaying:";
 
-	emitter_col_t decay_time;
-	emitter_col_init(&decay_time, &decay_row);
-	decay_time.justify = emitter_justify_right;
-	decay_time.width = 6;
-	decay_time.type = emitter_type_title;
-	decay_time.str_val = "time";
+	COL(decay_row, decay_time, right, 6, title);
+	col_decay_time.str_val = "time";
 
-	emitter_col_t decay_npages;
-	emitter_col_init(&decay_npages, &decay_row);
-	decay_npages.justify = emitter_justify_right;
-	decay_npages.width = 13;
-	decay_npages.type = emitter_type_title;
-	decay_npages.str_val = "npages";
+	COL(decay_row, decay_npages, right, 13, title);
+	col_decay_npages.str_val = "npages";
 
-	emitter_col_t decay_sweeps;
-	emitter_col_init(&decay_sweeps, &decay_row);
-	decay_sweeps.justify = emitter_justify_right;
-	decay_sweeps.width = 13;
-	decay_sweeps.type = emitter_type_title;
-	decay_sweeps.str_val = "sweeps";
+	COL(decay_row, decay_sweeps, right, 13, title);
+	col_decay_sweeps.str_val = "sweeps";
 
-	emitter_col_t decay_madvises;
-	emitter_col_init(&decay_madvises, &decay_row);
-	decay_madvises.justify = emitter_justify_right;
-	decay_madvises.width = 13;
-	decay_madvises.type = emitter_type_title;
-	decay_madvises.str_val = "madvises";
+	COL(decay_row, decay_madvises, right, 13, title);
+	col_decay_madvises.str_val = "madvises";
 
-	emitter_col_t decay_purged;
-	emitter_col_init(&decay_purged, &decay_row);
-	decay_purged.justify = emitter_justify_right;
-	decay_purged.width = 13;
-	decay_purged.type = emitter_type_title;
-	decay_purged.str_val = "purged";
+	COL(decay_row, decay_purged, right, 13, title);
+	col_decay_purged.str_val = "purged";
 
 	/* Title row. */
 	emitter_table_row(emitter, &decay_row);
 
 	/* Dirty row. */
-	decay_type.str_val = "dirty:";
+	col_decay_type.str_val = "dirty:";
 
 	if (dirty_decay_ms >= 0) {
-		decay_time.type = emitter_type_ssize;
-		decay_time.ssize_val = dirty_decay_ms;
+		col_decay_time.type = emitter_type_ssize;
+		col_decay_time.ssize_val = dirty_decay_ms;
 	} else {
-		decay_time.type = emitter_type_title;
-		decay_time.str_val = "N/A";
+		col_decay_time.type = emitter_type_title;
+		col_decay_time.str_val = "N/A";
 	}
 
-	decay_npages.type = emitter_type_size;
-	decay_npages.size_val = pdirty;
+	col_decay_npages.type = emitter_type_size;
+	col_decay_npages.size_val = pdirty;
 
-	decay_sweeps.type = emitter_type_uint64;
-	decay_sweeps.uint64_val = dirty_npurge;
+	col_decay_sweeps.type = emitter_type_uint64;
+	col_decay_sweeps.uint64_val = dirty_npurge;
 
-	decay_madvises.type = emitter_type_uint64;
-	decay_madvises.uint64_val = dirty_nmadvise;
+	col_decay_madvises.type = emitter_type_uint64;
+	col_decay_madvises.uint64_val = dirty_nmadvise;
 
-	decay_purged.type = emitter_type_uint64;
-	decay_purged.uint64_val = dirty_purged;
+	col_decay_purged.type = emitter_type_uint64;
+	col_decay_purged.uint64_val = dirty_purged;
 
 	emitter_table_row(emitter, &decay_row);
 
 	/* Muzzy row. */
-	decay_type.str_val = "muzzy:";
+	col_decay_type.str_val = "muzzy:";
 
 	if (muzzy_decay_ms >= 0) {
-		decay_time.type = emitter_type_ssize;
-		decay_time.ssize_val = muzzy_decay_ms;
+		col_decay_time.type = emitter_type_ssize;
+		col_decay_time.ssize_val = muzzy_decay_ms;
 	} else {
-		decay_time.type = emitter_type_title;
-		decay_time.str_val = "N/A";
+		col_decay_time.type = emitter_type_title;
+		col_decay_time.str_val = "N/A";
 	}
 
-	decay_npages.type = emitter_type_size;
-	decay_npages.size_val = pmuzzy;
+	col_decay_npages.type = emitter_type_size;
+	col_decay_npages.size_val = pmuzzy;
 
-	decay_sweeps.type = emitter_type_uint64;
-	decay_sweeps.uint64_val = muzzy_npurge;
+	col_decay_sweeps.type = emitter_type_uint64;
+	col_decay_sweeps.uint64_val = muzzy_npurge;
 
-	decay_madvises.type = emitter_type_uint64;
-	decay_madvises.uint64_val = muzzy_nmadvise;
+	col_decay_madvises.type = emitter_type_uint64;
+	col_decay_madvises.uint64_val = muzzy_nmadvise;
 
-	decay_purged.type = emitter_type_uint64;
-	decay_purged.uint64_val = muzzy_purged;
+	col_decay_purged.type = emitter_type_uint64;
+	col_decay_purged.uint64_val = muzzy_purged;
 
 	emitter_table_row(emitter, &decay_row);
 
@@ -798,69 +799,71 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	emitter_row_t alloc_count_row;
 	emitter_row_init(&alloc_count_row);
 
-	emitter_col_t alloc_count_title;
-	emitter_col_init(&alloc_count_title, &alloc_count_row);
-	alloc_count_title.justify = emitter_justify_left;
-	alloc_count_title.width = 21;
-	alloc_count_title.type = emitter_type_title;
-	alloc_count_title.str_val = "";
+	COL(alloc_count_row, count_title, left, 21, title);
+	col_count_title.str_val = "";
 
-	emitter_col_t alloc_count_allocated;
-	emitter_col_init(&alloc_count_allocated, &alloc_count_row);
-	alloc_count_allocated.justify = emitter_justify_right;
-	alloc_count_allocated.width = 16;
-	alloc_count_allocated.type = emitter_type_title;
-	alloc_count_allocated.str_val = "allocated";
+	COL(alloc_count_row, count_allocated, right, 16, title);
+	col_count_allocated.str_val = "allocated";
 
-	emitter_col_t alloc_count_nmalloc;
-	emitter_col_init(&alloc_count_nmalloc, &alloc_count_row);
-	alloc_count_nmalloc.justify = emitter_justify_right;
-	alloc_count_nmalloc.width = 16;
-	alloc_count_nmalloc.type = emitter_type_title;
-	alloc_count_nmalloc.str_val = "nmalloc";
+	COL(alloc_count_row, count_nmalloc, right, 16, title);
+	col_count_nmalloc.str_val = "nmalloc";
+	COL(alloc_count_row, count_nmalloc_ps, right, 8, title);
+	col_count_nmalloc_ps.str_val = "(#/sec)";
 
-	emitter_col_t alloc_count_ndalloc;
-	emitter_col_init(&alloc_count_ndalloc, &alloc_count_row);
-	alloc_count_ndalloc.justify = emitter_justify_right;
-	alloc_count_ndalloc.width = 16;
-	alloc_count_ndalloc.type = emitter_type_title;
-	alloc_count_ndalloc.str_val = "ndalloc";
+	COL(alloc_count_row, count_ndalloc, right, 16, title);
+	col_count_ndalloc.str_val = "ndalloc";
+	COL(alloc_count_row, count_ndalloc_ps, right, 8, title);
+	col_count_ndalloc_ps.str_val = "(#/sec)";
 
-	emitter_col_t alloc_count_nrequests;
-	emitter_col_init(&alloc_count_nrequests, &alloc_count_row);
-	alloc_count_nrequests.justify = emitter_justify_right;
-	alloc_count_nrequests.width = 16;
-	alloc_count_nrequests.type = emitter_type_title;
-	alloc_count_nrequests.str_val = "nrequests";
+	COL(alloc_count_row, count_nrequests, right, 16, title);
+	col_count_nrequests.str_val = "nrequests";
+	COL(alloc_count_row, count_nrequests_ps, right, 8, title);
+	col_count_nrequests_ps.str_val = "(#/sec)";
 
 	emitter_table_row(emitter, &alloc_count_row);
 
+	col_count_nmalloc_ps.type = emitter_type_uint64;
+	col_count_ndalloc_ps.type = emitter_type_uint64;
+	col_count_nrequests_ps.type = emitter_type_uint64;
+
 #define GET_AND_EMIT_ALLOC_STAT(small_or_large, name, valtype)		\
 	CTL_M2_GET("stats.arenas.0." #small_or_large "." #name, i,	\
 	    &small_or_large##_##name, valtype##_t);			\
 	emitter_json_kv(emitter, #name, emitter_type_##valtype,		\
 	    &small_or_large##_##name);					\
-	alloc_count_##name.type = emitter_type_##valtype;		\
-	alloc_count_##name.valtype##_val = small_or_large##_##name;
+	col_count_##name.type = emitter_type_##valtype;		\
+	col_count_##name.valtype##_val = small_or_large##_##name;
 
 	emitter_json_object_kv_begin(emitter, "small");
-	alloc_count_title.str_val = "small:";
+	col_count_title.str_val = "small:";
 
 	GET_AND_EMIT_ALLOC_STAT(small, allocated, size)
 	GET_AND_EMIT_ALLOC_STAT(small, nmalloc, uint64)
+	col_count_nmalloc_ps.uint64_val =
+	    rate_per_second(col_count_nmalloc.uint64_val, uptime);
 	GET_AND_EMIT_ALLOC_STAT(small, ndalloc, uint64)
+	col_count_ndalloc_ps.uint64_val =
+	    rate_per_second(col_count_ndalloc.uint64_val, uptime);
 	GET_AND_EMIT_ALLOC_STAT(small, nrequests, uint64)
+	col_count_nrequests_ps.uint64_val =
+	    rate_per_second(col_count_nrequests.uint64_val, uptime);
 
 	emitter_table_row(emitter, &alloc_count_row);
 	emitter_json_object_end(emitter); /* Close "small". */
 
 	emitter_json_object_kv_begin(emitter, "large");
-	alloc_count_title.str_val = "large:";
+	col_count_title.str_val = "large:";
 
 	GET_AND_EMIT_ALLOC_STAT(large, allocated, size)
 	GET_AND_EMIT_ALLOC_STAT(large, nmalloc, uint64)
+	col_count_nmalloc_ps.uint64_val =
+	    rate_per_second(col_count_nmalloc.uint64_val, uptime);
 	GET_AND_EMIT_ALLOC_STAT(large, ndalloc, uint64)
+	col_count_ndalloc_ps.uint64_val =
+	    rate_per_second(col_count_ndalloc.uint64_val, uptime);
 	GET_AND_EMIT_ALLOC_STAT(large, nrequests, uint64)
+	col_count_nrequests_ps.uint64_val =
+	    rate_per_second(col_count_nrequests.uint64_val, uptime);
 
 	emitter_table_row(emitter, &alloc_count_row);
 	emitter_json_object_end(emitter); /* Close "large". */
@@ -868,11 +871,11 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 #undef GET_AND_EMIT_ALLOC_STAT
 
 	/* Aggregated small + large stats are emitter only in table mode. */
-	alloc_count_title.str_val = "total:";
-	alloc_count_allocated.size_val = small_allocated + large_allocated;
-	alloc_count_nmalloc.uint64_val = small_nmalloc + large_nmalloc;
-	alloc_count_ndalloc.uint64_val = small_ndalloc + large_ndalloc;
-	alloc_count_nrequests.uint64_val = small_nrequests + large_nrequests;
+	col_count_title.str_val = "total:";
+	col_count_allocated.size_val = small_allocated + large_allocated;
+	col_count_nmalloc.uint64_val = small_nmalloc + large_nmalloc;
+	col_count_ndalloc.uint64_val = small_ndalloc + large_ndalloc;
+	col_count_nrequests.uint64_val = small_nrequests + large_nrequests;
 	emitter_table_row(emitter, &alloc_count_row);
 
 	emitter_row_t mem_count_row;
@@ -918,13 +921,13 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 #undef GET_AND_EMIT_MEM_STAT
 
 	if (mutex) {
-		stats_arena_mutexes_print(emitter, i);
+		stats_arena_mutexes_print(emitter, i, uptime);
 	}
 	if (bins) {
-		stats_arena_bins_print(emitter, mutex, i);
+		stats_arena_bins_print(emitter, mutex, i, uptime);
 	}
 	if (large) {
-		stats_arena_lextents_print(emitter, i);
+		stats_arena_lextents_print(emitter, i, uptime);
 	}
 	if (extents) {
 		stats_arena_extents_print(emitter, i);
@@ -1246,6 +1249,7 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 		emitter_col_t name;
 		emitter_col_t col64[mutex_prof_num_uint64_t_counters];
 		emitter_col_t col32[mutex_prof_num_uint32_t_counters];
+		uint64_t uptime;
 
 		emitter_row_init(&row);
 		mutex_stats_init_cols(&row, "", &name, col64, col32);
@@ -1253,9 +1257,11 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 		emitter_table_row(emitter, &row);
 		emitter_json_object_kv_begin(emitter, "mutexes");
 
+		CTL_M2_GET("stats.arenas.0.uptime", 0, &uptime, uint64_t);
+
 		for (int i = 0; i < mutex_prof_num_global_mutexes; i++) {
 			mutex_stats_read_global(global_mutex_names[i], &name,
-			    col64, col32);
+			    col64, col32, uptime);
 			emitter_json_object_kv_begin(emitter, global_mutex_names[i]);
 			mutex_stats_emit(emitter, &row, col64, col32);
 			emitter_json_object_end(emitter);

From 441335d924984022a3e17c3f013a0ad33806a5ff Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 17 Dec 2018 15:29:37 -0800
Subject: [PATCH 1232/2608] Add unit test for producer-consumer pattern.

---
 test/unit/binshard.c | 52 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 50 insertions(+), 2 deletions(-)

diff --git a/test/unit/binshard.c b/test/unit/binshard.c
index 829ba433..406c46ca 100644
--- a/test/unit/binshard.c
+++ b/test/unit/binshard.c
@@ -2,6 +2,54 @@
 
 /* Config -- "narenas:1,bin_shards:1-160:16|129-512:4|256-256:8" */
 
+#define NTHREADS 16
+#define REMOTE_NALLOC 256
+
+static void *
+thd_producer(void *varg) {
+	void **mem = varg;
+	unsigned arena, i;
+	size_t sz;
+
+	sz = sizeof(arena);
+	/* Remote arena. */
+	assert_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+	for (i = 0; i < REMOTE_NALLOC / 2; i++) {
+		mem[i] = mallocx(1, MALLOCX_TCACHE_NONE | MALLOCX_ARENA(arena));
+	}
+
+	/* Remote bin. */
+	for (; i < REMOTE_NALLOC; i++) {
+		mem[i] = mallocx(1, MALLOCX_TCACHE_NONE | MALLOCX_ARENA(0));
+	}
+
+	return NULL;
+}
+
+TEST_BEGIN(test_producer_consumer) {
+	thd_t thds[NTHREADS];
+	void *mem[NTHREADS][REMOTE_NALLOC];
+	unsigned i;
+
+	/* Create producer threads to allocate. */
+	for (i = 0; i < NTHREADS; i++) {
+		thd_create(&thds[i], thd_producer, mem[i]);
+	}
+	for (i = 0; i < NTHREADS; i++) {
+		thd_join(thds[i], NULL);
+	}
+	/* Remote deallocation by the current thread. */
+	for (i = 0; i < NTHREADS; i++) {
+		for (unsigned j = 0; j < REMOTE_NALLOC; j++) {
+			assert_ptr_not_null(mem[i][j],
+			    "Unexpected remote allocation failure");
+			dallocx(mem[i][j], 0);
+		}
+	}
+}
+TEST_END
+
 static void *
 thd_start(void *varg) {
 	void *ptr, *ptr2;
@@ -34,7 +82,6 @@ thd_start(void *varg) {
 }
 
 TEST_BEGIN(test_bin_shard_mt) {
-#define NTHREADS 16
 	thd_t thds[NTHREADS];
 	unsigned i;
 	for (i = 0; i < NTHREADS; i++) {
@@ -99,5 +146,6 @@ int
 main(void) {
 	return test_no_reentrancy(
 	    test_bin_shard,
-	    test_bin_shard_mt);
+	    test_bin_shard_mt,
+	    test_producer_consumer);
 }

From 7241bf5b745ba5ec24b26b0ef2bd30b1c0a428dc Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 3 Dec 2018 18:30:58 -0800
Subject: [PATCH 1233/2608] Only read arena index from extent on the tcache
 flush path.

Add exten_arena_ind_get() to avoid loading the actual arena ptr in case we just
need to check arena matching.
---
 include/jemalloc/internal/extent_inlines.h | 19 ++++++++++---------
 src/tcache.c                               | 14 +++++++++-----
 2 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index b5728608..63b710dc 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -35,18 +35,19 @@ extent_unlock2(tsdn_t *tsdn, extent_t *extent1, extent_t *extent2) {
 	    (uintptr_t)extent2);
 }
 
-static inline arena_t *
-extent_arena_get(const extent_t *extent) {
+static inline unsigned
+extent_arena_ind_get(const extent_t *extent) {
 	unsigned arena_ind = (unsigned)((extent->e_bits &
 	    EXTENT_BITS_ARENA_MASK) >> EXTENT_BITS_ARENA_SHIFT);
-	/*
-	 * The following check is omitted because we should never actually read
-	 * a NULL arena pointer.
-	 */
-	if (false && arena_ind >= MALLOCX_ARENA_LIMIT) {
-		return NULL;
-	}
 	assert(arena_ind < MALLOCX_ARENA_LIMIT);
+
+	return arena_ind;
+}
+
+static inline arena_t *
+extent_arena_get(const extent_t *extent) {
+	unsigned arena_ind = extent_arena_ind_get(extent);
+
 	return (arena_t *)atomic_load_p(&arenas[arena_ind], ATOMIC_ACQUIRE);
 }
 
diff --git a/src/tcache.c b/src/tcache.c
index 92be273a..182e8bf4 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -120,7 +120,9 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	while (nflush > 0) {
 		/* Lock the arena bin associated with the first object. */
 		extent_t *extent = item_extent[0];
-		arena_t *bin_arena = extent_arena_get(extent);
+		unsigned bin_arena_ind = extent_arena_ind_get(extent);
+		arena_t *bin_arena = arena_get(tsd_tsdn(tsd), bin_arena_ind,
+		    false);
 		unsigned binshard = extent_binshard_get(extent);
 		assert(binshard < bin_infos[binind].n_shards);
 		bin_t *bin = &bin_arena->bins[binind].bin_shards[binshard];
@@ -146,7 +148,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 			extent = item_extent[i];
 			assert(ptr != NULL && extent != NULL);
 
-			if (extent_arena_get(extent) == bin_arena
+			if (extent_arena_ind_get(extent) == bin_arena_ind
 			    && extent_binshard_get(extent) == binshard) {
 				arena_dalloc_bin_junked_locked(tsd_tsdn(tsd),
 				    bin_arena, bin, binind, extent, ptr);
@@ -208,7 +210,9 @@ tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
 	while (nflush > 0) {
 		/* Lock the arena associated with the first object. */
 		extent_t *extent = item_extent[0];
-		arena_t *locked_arena = extent_arena_get(extent);
+		unsigned locked_arena_ind = extent_arena_ind_get(extent);
+		arena_t *locked_arena = arena_get(tsd_tsdn(tsd),
+		    locked_arena_ind, false);
 		bool idump;
 
 		if (config_prof) {
@@ -223,7 +227,7 @@ tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
 			void *ptr = *(tbin->avail - 1 - i);
 			assert(ptr != NULL);
 			extent = item_extent[i];
-			if (extent_arena_get(extent) == locked_arena) {
+			if (extent_arena_ind_get(extent) == locked_arena_ind) {
 				large_dalloc_prep_junked_locked(tsd_tsdn(tsd),
 				    extent);
 			}
@@ -253,7 +257,7 @@ tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
 			extent = item_extent[i];
 			assert(ptr != NULL && extent != NULL);
 
-			if (extent_arena_get(extent) == locked_arena) {
+			if (extent_arena_ind_get(extent) == locked_arena_ind) {
 				large_dalloc_finish(tsd_tsdn(tsd), extent);
 			} else {
 				/*

From 4e920d2c9d5aecc9dec7069a0c9736b1f14eead9 Mon Sep 17 00:00:00 2001
From: John Ericson <John.Ericson@Obsidian.Systems>
Date: Fri, 14 Dec 2018 15:28:34 -0500
Subject: [PATCH 1234/2608] Add --{enable,disable}-{static,shared} to configure
 script

My distro offers a custom toolchain where it's not possible to make
static libs, so it's insufficient to just delete the libs I don't want.
I actually need to avoid building them in the first place.
---
 Makefile.in  | 17 +++++++++++++++--
 configure.ac | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index 31a9ceaa..b788a09a 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -55,6 +55,8 @@ cfghdrs_out := @cfghdrs_out@
 cfgoutputs_in := $(addprefix $(srcroot),@cfgoutputs_in@)
 cfgoutputs_out := @cfgoutputs_out@
 enable_autogen := @enable_autogen@
+enable_shared := @enable_shared@
+enable_static := @enable_static@
 enable_prof := @enable_prof@
 enable_zone_allocator := @enable_zone_allocator@
 enable_experimental_smallocx := @enable_experimental_smallocx@
@@ -430,7 +432,12 @@ $(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TE
 
 build_lib_shared: $(DSOS)
 build_lib_static: $(STATIC_LIBS)
-build_lib: build_lib_shared build_lib_static
+ifeq ($(enable_shared), 1)
+build_lib: build_lib_shared
+endif
+ifeq ($(enable_static), 1)
+build_lib: build_lib_static
+endif
 
 install_bin:
 	$(INSTALL) -d $(BINDIR)
@@ -467,7 +474,13 @@ install_lib_pc: $(PC)
 	$(INSTALL) -m 644 $$l $(LIBDIR)/pkgconfig; \
 done
 
-install_lib: install_lib_shared install_lib_static install_lib_pc
+ifeq ($(enable_shared), 1)
+install_lib: install_lib_shared
+endif
+ifeq ($(enable_static), 1)
+install_lib: install_lib_static
+endif
+install_lib: install_lib_pc
 
 install_doc_html:
 	$(INSTALL) -d $(DATADIR)/doc/jemalloc$(install_suffix)
diff --git a/configure.ac b/configure.ac
index 072808cb..e9093e81 100644
--- a/configure.ac
+++ b/configure.ac
@@ -878,6 +878,36 @@ AC_PROG_RANLIB
 AC_PATH_PROG([LD], [ld], [false], [$PATH])
 AC_PATH_PROG([AUTOCONF], [autoconf], [false], [$PATH])
 
+dnl Enable shared libs
+AC_ARG_ENABLE([shared],
+  [AS_HELP_STRING([--enable-shared], [Build shared libaries])],
+if test "x$enable_shared" = "xno" ; then
+  enable_shared="0"
+else
+  enable_shared="1"
+fi
+,
+enable_shared="1"
+)
+AC_SUBST([enable_shared])
+
+dnl Enable static libs
+AC_ARG_ENABLE([static],
+  [AS_HELP_STRING([--enable-static], [Build static libaries])],
+if test "x$enable_static" = "xno" ; then
+  enable_static="0"
+else
+  enable_static="1"
+fi
+,
+enable_static="1"
+)
+AC_SUBST([enable_static])
+
+if test "$enable_shared$enable_static" = "00" ; then
+  AC_MSG_ERROR([Please enable one of shared or static builds])
+fi
+
 dnl Perform no name mangling by default.
 AC_ARG_WITH([mangling],
   [AS_HELP_STRING([--with-mangling=<map>], [Mangle symbols in <map>])],
@@ -2297,6 +2327,8 @@ AC_MSG_RESULT([JEMALLOC_PRIVATE_NAMESPACE])
 AC_MSG_RESULT([                   : ${JEMALLOC_PRIVATE_NAMESPACE}])
 AC_MSG_RESULT([install_suffix     : ${install_suffix}])
 AC_MSG_RESULT([malloc_conf        : ${config_malloc_conf}])
+AC_MSG_RESULT([shared libs        : ${enable_shared}])
+AC_MSG_RESULT([static libs        : ${enable_static}])
 AC_MSG_RESULT([autogen            : ${enable_autogen}])
 AC_MSG_RESULT([debug              : ${enable_debug}])
 AC_MSG_RESULT([stats              : ${enable_stats}])

From daa0e436ba232d67b832e1b270b13c5061eebfe9 Mon Sep 17 00:00:00 2001
From: Leonardo Santagada <santagada@gmail.com>
Date: Wed, 31 Oct 2018 12:03:42 +0100
Subject: [PATCH 1235/2608] implement malloc_getcpu for windows

---
 include/jemalloc/internal/jemalloc_internal_inlines_a.h | 4 +++-
 include/jemalloc/internal/jemalloc_preamble.h.in        | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index 8adc02a6..ddde9b4e 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -10,7 +10,9 @@
 JEMALLOC_ALWAYS_INLINE malloc_cpuid_t
 malloc_getcpu(void) {
 	assert(have_percpu_arena);
-#if defined(JEMALLOC_HAVE_SCHED_GETCPU)
+#if defined(_WIN32)
+	return GetCurrentProcessorNumber();
+#elif defined(JEMALLOC_HAVE_SCHED_GETCPU)
 	return (malloc_cpuid_t)sched_getcpu();
 #else
 	not_reached();
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index 857fa326..4bfdb32c 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -161,7 +161,7 @@ static const bool config_log =
     false
 #endif
     ;
-#ifdef JEMALLOC_HAVE_SCHED_GETCPU
+#if defined(_WIN32) || defined(JEMALLOC_HAVE_SCHED_GETCPU)
 /* Currently percpu_arena depends on sched_getcpu. */
 #define JEMALLOC_PERCPU_ARENA
 #endif

From 471191075d6a88eb1364fb5f332237eb3d512872 Mon Sep 17 00:00:00 2001
From: Faidon Liambotis <paravoid@debian.org>
Date: Tue, 8 Jan 2019 03:31:53 +0200
Subject: [PATCH 1236/2608] Replace -lpthread with -pthread

This automatically adds -latomic if and when needed, e.g. on riscv64
systems.

Fixes #1401.
---
 Makefile.in  | 2 +-
 configure.ac | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index b788a09a..2d59e595 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -420,7 +420,7 @@ $(objroot)test/unit/%$(EXE): $(objroot)test/unit/%.$(O) $(C_JET_OBJS) $(C_TESTLI
 
 $(objroot)test/integration/%$(EXE): $(objroot)test/integration/%.$(O) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
-	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LJEMALLOC) $(LDFLAGS) $(filter-out -lm,$(filter -lrt -lpthread -lstdc++,$(LIBS))) $(LM) $(EXTRA_LDFLAGS)
+	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LJEMALLOC) $(LDFLAGS) $(filter-out -lm,$(filter -lrt -pthread -lstdc++,$(LIBS))) $(LM) $(EXTRA_LDFLAGS)
 
 $(objroot)test/integration/cpp/%$(EXE): $(objroot)test/integration/cpp/%.$(O) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
diff --git a/configure.ac b/configure.ac
index e9093e81..c0911db1 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1579,7 +1579,7 @@ if test "x$abi" != "xpecoff" ; then
   AC_CHECK_HEADERS([pthread.h], , [AC_MSG_ERROR([pthread.h is missing])])
   dnl Some systems may embed pthreads functionality in libc; check for libpthread
   dnl first, but try libc too before failing.
-  AC_CHECK_LIB([pthread], [pthread_create], [JE_APPEND_VS(LIBS, -lpthread)],
+  AC_CHECK_LIB([pthread], [pthread_create], [JE_APPEND_VS(LIBS, -pthread)],
                [AC_SEARCH_LIBS([pthread_create], , ,
                                AC_MSG_ERROR([libpthread is missing]))])
   wrap_syms="${wrap_syms} pthread_create"

From 6910fcb208e2703f72bcbfbd1db22426d02b1e27 Mon Sep 17 00:00:00 2001
From: Li-Wen Hsu <lwhsu@lwhsu.org>
Date: Fri, 4 Jan 2019 17:07:09 +0800
Subject: [PATCH 1237/2608] Add Cirrus-CI config for FreeBSD builds

---
 .cirrus.yml | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 .cirrus.yml

diff --git a/.cirrus.yml b/.cirrus.yml
new file mode 100644
index 00000000..019d2c38
--- /dev/null
+++ b/.cirrus.yml
@@ -0,0 +1,21 @@
+env:
+  CIRRUS_CLONE_DEPTH: 1
+  ARCH: amd64
+
+task:
+  freebsd_instance:
+    matrix:
+      image: freebsd-12-0-release-amd64
+      image: freebsd-11-2-release-amd64
+  install_script:
+    - sed -i.bak -e 's,pkg+http://pkg.FreeBSD.org/\${ABI}/quarterly,pkg+http://pkg.FreeBSD.org/\${ABI}/latest,' /etc/pkg/FreeBSD.conf
+    - pkg upgrade -y
+    - pkg install -y autoconf gmake
+  script:
+    - autoconf
+    #- ./configure ${COMPILER_FLAGS:+       CC="$CC $COMPILER_FLAGS"       CXX="$CXX $COMPILER_FLAGS" }       $CONFIGURE_FLAGS
+    - ./configure
+    - export JFLAG=`sysctl -n kern.smp.cpus`
+    - gmake -j${JFLAG}
+    - gmake -j${JFLAG} tests
+    - gmake check

From 646af596d8c4ffefc1f7edf432aa2b4e669bcc78 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 9 Jan 2019 17:07:11 -0800
Subject: [PATCH 1238/2608] Customize cloning to include tags so that VERSION
 is valid.

---
 .cirrus.yml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.cirrus.yml b/.cirrus.yml
index 019d2c38..8b1b38dd 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -10,7 +10,11 @@ task:
   install_script:
     - sed -i.bak -e 's,pkg+http://pkg.FreeBSD.org/\${ABI}/quarterly,pkg+http://pkg.FreeBSD.org/\${ABI}/latest,' /etc/pkg/FreeBSD.conf
     - pkg upgrade -y
-    - pkg install -y autoconf gmake
+    - pkg install -y autoconf git gmake
+  clone_script:
+    - git clone --tags --branch=${CIRRUS_BASE_BRANCH} https://x-access-token:${CIRRUS_REPO_CLONE_TOKEN}@github.com/${CIRRUS_REPO_FULL_NAME}.git ${CIRRUS_WORKING_DIR}
+    - git fetch origin ${CIRRUS_BRANCH}/head:${CIRRUS_BRANCH}
+    - git checkout ${CIRRUS_BRANCH}
   script:
     - autoconf
     #- ./configure ${COMPILER_FLAGS:+       CC="$CC $COMPILER_FLAGS"       CXX="$CXX $COMPILER_FLAGS" }       $CONFIGURE_FLAGS

From fc13a7f1fa7d1cfc1d393d7a448e68d0f433d840 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 11 Jan 2019 12:38:14 -0800
Subject: [PATCH 1239/2608] Remove --branch=${CIRRUS_BASE_BRANCH} in git clone
 command.

The --branch parameter is unnecessary, and may avoid problems when
testing directly on the dev branch.
---
 .cirrus.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.cirrus.yml b/.cirrus.yml
index 8b1b38dd..5e6756a3 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -12,7 +12,7 @@ task:
     - pkg upgrade -y
     - pkg install -y autoconf git gmake
   clone_script:
-    - git clone --tags --branch=${CIRRUS_BASE_BRANCH} https://x-access-token:${CIRRUS_REPO_CLONE_TOKEN}@github.com/${CIRRUS_REPO_FULL_NAME}.git ${CIRRUS_WORKING_DIR}
+    - git clone --tags https://x-access-token:${CIRRUS_REPO_CLONE_TOKEN}@github.com/${CIRRUS_REPO_FULL_NAME}.git ${CIRRUS_WORKING_DIR}
     - git fetch origin ${CIRRUS_BRANCH}/head:${CIRRUS_BRANCH}
     - git checkout ${CIRRUS_BRANCH}
   script:

From 0ecd5addb1215f5ae9fad2b9cb4cf91ed5376ee8 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 11 Jan 2019 11:22:11 -0800
Subject: [PATCH 1240/2608] Force purge on thread death only when w/o bg thds.

---
 src/tcache.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index 182e8bf4..9125179a 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -527,8 +527,8 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
 	arena_decay(tsd_tsdn(tsd), arena_get(tsd_tsdn(tsd), 0, false),
 	    false, false);
 
-	unsigned nthreads = arena_nthreads_get(arena, false);
-	if (nthreads == 0) {
+	if (arena_nthreads_get(arena, false) == 0 &&
+	    !background_thread_enabled()) {
 		/* Force purging when no threads assigned to the arena anymore. */
 		arena_decay(tsd_tsdn(tsd), arena, false, true);
 	} else {

From f459454afe019251712728b983d2eed0b03f5c80 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 10 Aug 2018 16:08:50 -0700
Subject: [PATCH 1241/2608] Avoid potential issues on extent zero-out.

When custom extent_hooks or transparent huge pages are in use, the purging
semantics may change, which means we may not get zeroed pages on repopulating.
Fixing the issue by manually memset for such cases.
---
 include/jemalloc/internal/arena_inlines_b.h |  5 +++++
 src/extent.c                                | 25 +++++++++++++++++----
 2 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 3d0121d5..c7d35b74 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -8,6 +8,11 @@
 #include "jemalloc/internal/sz.h"
 #include "jemalloc/internal/ticker.h"
 
+JEMALLOC_ALWAYS_INLINE bool
+arena_has_default_hooks(arena_t *arena) {
+	return (extent_hooks_get(arena) == &extent_hooks_default);
+}
+
 JEMALLOC_ALWAYS_INLINE arena_t *
 arena_choose_maybe_huge(tsd_t *tsd, arena_t *arena, size_t size) {
 	if (arena != NULL) {
diff --git a/src/extent.c b/src/extent.c
index 9605dacc..fd6c837f 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1102,6 +1102,17 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
 	unreachable();
 }
 
+static bool
+extent_need_manual_zero(arena_t *arena) {
+	/*
+	 * Need to manually zero the extent on repopulating if either; 1) non
+	 * default extent hooks installed (in which case the purge semantics may
+	 * change); or 2) transparent huge pages enabled.
+	 */
+	return (!arena_has_default_hooks(arena) ||
+		(opt_thp == thp_mode_always));
+}
+
 /*
  * Tries to satisfy the given allocation request by reusing one of the extents
  * in the given extents_t.
@@ -1141,7 +1152,9 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 			    extent, growing_retained);
 			return NULL;
 		}
-		extent_zeroed_set(extent, true);
+		if (!extent_need_manual_zero(arena)) {
+			extent_zeroed_set(extent, true);
+		}
 	}
 
 	if (extent_committed_get(extent)) {
@@ -1164,7 +1177,8 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		void *addr = extent_base_get(extent);
 		if (!extent_zeroed_get(extent)) {
 			size_t size = extent_size_get(extent);
-			if (pages_purge_forced(addr, size)) {
+			if (extent_need_manual_zero(arena) ||
+			    pages_purge_forced(addr, size)) {
 				memset(addr, 0, size);
 			}
 		} else if (config_debug) {
@@ -1391,7 +1405,9 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 			    &arena->extents_retained, extent, true);
 			goto label_err;
 		}
-		extent_zeroed_set(extent, true);
+		if (!extent_need_manual_zero(arena)) {
+			extent_zeroed_set(extent, true);
+		}
 	}
 
 	/*
@@ -1425,7 +1441,8 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	if (*zero && !extent_zeroed_get(extent)) {
 		void *addr = extent_base_get(extent);
 		size_t size = extent_size_get(extent);
-		if (pages_purge_forced(addr, size)) {
+		if (extent_need_manual_zero(arena) ||
+		    pages_purge_forced(addr, size)) {
 			memset(addr, 0, size);
 		}
 	}

From 225d89998bae562b13b681f74019697b66e07f02 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 14 Jan 2019 07:10:39 -0800
Subject: [PATCH 1242/2608] Revert "Remove --branch=${CIRRUS_BASE_BRANCH} in
 git clone command."

This reverts commit fc13a7f1fa7d1cfc1d393d7a448e68d0f433d840.
---
 .cirrus.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.cirrus.yml b/.cirrus.yml
index 5e6756a3..8b1b38dd 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -12,7 +12,7 @@ task:
     - pkg upgrade -y
     - pkg install -y autoconf git gmake
   clone_script:
-    - git clone --tags https://x-access-token:${CIRRUS_REPO_CLONE_TOKEN}@github.com/${CIRRUS_REPO_FULL_NAME}.git ${CIRRUS_WORKING_DIR}
+    - git clone --tags --branch=${CIRRUS_BASE_BRANCH} https://x-access-token:${CIRRUS_REPO_CLONE_TOKEN}@github.com/${CIRRUS_REPO_FULL_NAME}.git ${CIRRUS_WORKING_DIR}
     - git fetch origin ${CIRRUS_BRANCH}/head:${CIRRUS_BRANCH}
     - git checkout ${CIRRUS_BRANCH}
   script:

From b6f1f2669a0961fa463afede7d4b190d79647c90 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Mon, 14 Jan 2019 07:11:04 -0800
Subject: [PATCH 1243/2608] Revert "Customize cloning to include tags so that
 VERSION is valid."

This reverts commit 646af596d8c4ffefc1f7edf432aa2b4e669bcc78.
---
 .cirrus.yml | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/.cirrus.yml b/.cirrus.yml
index 8b1b38dd..019d2c38 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -10,11 +10,7 @@ task:
   install_script:
     - sed -i.bak -e 's,pkg+http://pkg.FreeBSD.org/\${ABI}/quarterly,pkg+http://pkg.FreeBSD.org/\${ABI}/latest,' /etc/pkg/FreeBSD.conf
     - pkg upgrade -y
-    - pkg install -y autoconf git gmake
-  clone_script:
-    - git clone --tags --branch=${CIRRUS_BASE_BRANCH} https://x-access-token:${CIRRUS_REPO_CLONE_TOKEN}@github.com/${CIRRUS_REPO_FULL_NAME}.git ${CIRRUS_WORKING_DIR}
-    - git fetch origin ${CIRRUS_BRANCH}/head:${CIRRUS_BRANCH}
-    - git checkout ${CIRRUS_BRANCH}
+    - pkg install -y autoconf gmake
   script:
     - autoconf
     #- ./configure ${COMPILER_FLAGS:+       CC="$CC $COMPILER_FLAGS"       CXX="$CXX $COMPILER_FLAGS" }       $CONFIGURE_FLAGS

From bbe8e6a9097203c7b29140b5410c787a6e204593 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 14 Jan 2019 14:16:09 -0800
Subject: [PATCH 1244/2608] Avoid creating bg thds for huge arena lone.

For low arena count settings, the huge threshold feature may trigger an unwanted
bg thd creation.  Given that the huge arena does eager purging by default,
bypass bg thd creation when initializing the huge arena.
---
 include/jemalloc/internal/arena_externs.h |  1 +
 src/arena.c                               |  8 ++++++++
 src/background_thread.c                   | 20 ++++++++++++++++----
 src/ctl.c                                 | 11 +++++++++++
 src/jemalloc.c                            | 18 +++++++-----------
 5 files changed, 43 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 04d99545..bcc016e8 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -85,6 +85,7 @@ void arena_nthreads_dec(arena_t *arena, bool internal);
 size_t arena_extent_sn_next(arena_t *arena);
 arena_t *arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
 bool arena_init_huge(void);
+bool arena_is_huge(unsigned arena_ind);
 arena_t *arena_choose_huge(tsd_t *tsd);
 bin_t *arena_bin_choose_lock(tsdn_t *tsdn, arena_t *arena, szind_t binind,
     unsigned *binshard);
diff --git a/src/arena.c b/src/arena.c
index d34de859..552a0f3b 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2127,6 +2127,14 @@ arena_init_huge(void) {
 	return huge_enabled;
 }
 
+bool
+arena_is_huge(unsigned arena_ind) {
+	if (huge_arena_ind == 0) {
+		return false;
+	}
+	return (arena_ind == huge_arena_ind);
+}
+
 void
 arena_boot(sc_data_t *sc_data) {
 	arena_dirty_decay_ms_default_set(opt_dirty_decay_ms);
diff --git a/src/background_thread.c b/src/background_thread.c
index 813867ef..acf8083b 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -535,9 +535,8 @@ background_thread_init(tsd_t *tsd, background_thread_info_t *info) {
 	n_background_threads++;
 }
 
-/* Create a new background thread if needed. */
-bool
-background_thread_create(tsd_t *tsd, unsigned arena_ind) {
+static bool
+background_thread_create_locked(tsd_t *tsd, unsigned arena_ind) {
 	assert(have_background_thread);
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
 
@@ -590,6 +589,19 @@ background_thread_create(tsd_t *tsd, unsigned arena_ind) {
 	return false;
 }
 
+/* Create a new background thread if needed. */
+bool
+background_thread_create(tsd_t *tsd, unsigned arena_ind) {
+	assert(have_background_thread);
+
+	bool ret;
+	malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock);
+	ret = background_thread_create_locked(tsd, arena_ind);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
+
+	return ret;
+}
+
 bool
 background_threads_enable(tsd_t *tsd) {
 	assert(n_background_threads == 0);
@@ -623,7 +635,7 @@ background_threads_enable(tsd_t *tsd) {
 		}
 	}
 
-	return background_thread_create(tsd, 0);
+	return background_thread_create_locked(tsd, 0);
 }
 
 bool
diff --git a/src/ctl.c b/src/ctl.c
index a1508910..81e8fbe9 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2276,6 +2276,17 @@ arena_i_decay_ms_ctl_impl(tsd_t *tsd, const size_t *mib, size_t miblen,
 			ret = EINVAL;
 			goto label_return;
 		}
+		if (arena_is_huge(arena_ind) && *(ssize_t *)newp > 0) {
+			/*
+			 * By default the huge arena purges eagerly.  If it is
+			 * set to non-zero decay time afterwards, background
+			 * thread might be needed.
+			 */
+			if (background_thread_create(tsd, arena_ind)) {
+				ret = EFAULT;
+				goto label_return;
+			}
+		}
 		if (dirty ? arena_dirty_decay_ms_set(tsd_tsdn(tsd), arena,
 		    *(ssize_t *)newp) : arena_muzzy_decay_ms_set(tsd_tsdn(tsd),
 		    arena, *(ssize_t *)newp)) {
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 1620d0d3..2a47dcb5 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -344,12 +344,12 @@ arena_new_create_background_thread(tsdn_t *tsdn, unsigned ind) {
 	if (ind == 0) {
 		return;
 	}
-	if (have_background_thread) {
-		bool err;
-		malloc_mutex_lock(tsdn, &background_thread_lock);
-		err = background_thread_create(tsdn_tsd(tsdn), ind);
-		malloc_mutex_unlock(tsdn, &background_thread_lock);
-		if (err) {
+	/*
+	 * Avoid creating a new background thread just for the huge arena, which
+	 * purges eagerly by default.
+	 */
+	if (have_background_thread && !arena_is_huge(ind)) {
+		if (background_thread_create(tsdn_tsd(tsdn), ind)) {
 			malloc_printf("<jemalloc>: error in background thread "
 				      "creation for arena %u. Abort.\n", ind);
 			abort();
@@ -1719,11 +1719,7 @@ malloc_init_hard(void) {
 		 * sets isthreaded) needs to be called without holding any lock.
 		 */
 		background_thread_ctl_init(tsd_tsdn(tsd));
-
-		malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock);
-		bool err = background_thread_create(tsd, 0);
-		malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
-		if (err) {
+		if (background_thread_create(tsd, 0)) {
 			return true;
 		}
 	}

From 7a815c1b7c796ef35e7ede60cb2dd44aba9626b4 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 15 Jan 2019 16:14:18 -0800
Subject: [PATCH 1245/2608] Un-experimental the huge_threshold feature.

---
 src/ctl.c           | 2 +-
 src/jemalloc.c      | 3 +--
 src/stats.c         | 2 +-
 test/unit/huge.c    | 2 +-
 test/unit/mallctl.c | 2 +-
 5 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index 81e8fbe9..0ec92249 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -300,7 +300,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("dss"),		CTL(opt_dss)},
 	{NAME("narenas"),	CTL(opt_narenas)},
 	{NAME("percpu_arena"),	CTL(opt_percpu_arena)},
-	{NAME("experimental_huge_threshold"),	CTL(opt_huge_threshold)},
+	{NAME("huge_threshold"),	CTL(opt_huge_threshold)},
 	{NAME("background_thread"),	CTL(opt_background_thread)},
 	{NAME("max_background_threads"),	CTL(opt_max_background_threads)},
 	{NAME("dirty_decay_ms"), CTL(opt_dirty_decay_ms)},
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 2a47dcb5..6745df6a 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1241,8 +1241,7 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 
 			/* Experimental feature.  Will be documented later.*/
 			CONF_HANDLE_SIZE_T(opt_huge_threshold,
-			    "experimental_huge_threshold",
-			    SC_LARGE_MINCLASS,
+			    "huge_threshold", SC_LARGE_MINCLASS,
 			    SC_LARGE_MAXCLASS, yes, yes, false)
 			CONF_HANDLE_SIZE_T(opt_lg_extent_max_active_fit,
 			    "lg_extent_max_active_fit", 0,
diff --git a/src/stats.c b/src/stats.c
index f105e260..87948809 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1022,7 +1022,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_CHAR_P("dss")
 	OPT_WRITE_UNSIGNED("narenas")
 	OPT_WRITE_CHAR_P("percpu_arena")
-	OPT_WRITE_SIZE_T("experimental_huge_threshold")
+	OPT_WRITE_SIZE_T("huge_threshold")
 	OPT_WRITE_CHAR_P("metadata_thp")
 	OPT_WRITE_BOOL_MUTABLE("background_thread", "background_thread")
 	OPT_WRITE_SSIZE_T_MUTABLE("dirty_decay_ms", "arenas.dirty_decay_ms")
diff --git a/test/unit/huge.c b/test/unit/huge.c
index f371198f..7e54d076 100644
--- a/test/unit/huge.c
+++ b/test/unit/huge.c
@@ -1,7 +1,7 @@
 #include "test/jemalloc_test.h"
 
 /* Threshold: 2 << 20 = 2097152. */
-const char *malloc_conf = "experimental_huge_threshold:2097152";
+const char *malloc_conf = "huge_threshold:2097152";
 
 #define HUGE_SZ (2 << 20)
 #define SMALL_SZ (8)
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 039a8810..b8b93405 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -164,7 +164,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(const char *, dss, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);
 	TEST_MALLCTL_OPT(const char *, percpu_arena, always);
-	TEST_MALLCTL_OPT(size_t, experimental_huge_threshold, always);
+	TEST_MALLCTL_OPT(size_t, huge_threshold, always);
 	TEST_MALLCTL_OPT(bool, background_thread, always);
 	TEST_MALLCTL_OPT(ssize_t, dirty_decay_ms, always);
 	TEST_MALLCTL_OPT(ssize_t, muzzy_decay_ms, always);

From 8c9571376e65c8099ea315261c24e940410386c8 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 18 Jan 2019 15:22:44 -0800
Subject: [PATCH 1246/2608] Fix stats output (rate for total # of requests).

The rate calculation for the total row was missing.
---
 src/stats.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/stats.c b/src/stats.c
index 87948809..986f51bd 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -876,6 +876,12 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	col_count_nmalloc.uint64_val = small_nmalloc + large_nmalloc;
 	col_count_ndalloc.uint64_val = small_ndalloc + large_ndalloc;
 	col_count_nrequests.uint64_val = small_nrequests + large_nrequests;
+	col_count_nmalloc_ps.uint64_val =
+	    rate_per_second(col_count_nmalloc.uint64_val, uptime);
+	col_count_ndalloc_ps.uint64_val =
+	    rate_per_second(col_count_ndalloc.uint64_val, uptime);
+	col_count_nrequests_ps.uint64_val =
+	    rate_per_second(col_count_nrequests.uint64_val, uptime);
 	emitter_table_row(emitter, &alloc_count_row);
 
 	emitter_row_t mem_count_row;

From 522d1e7b4b603d9ddc11c684c16d37113a9c0c12 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 18 Jan 2019 15:51:01 -0800
Subject: [PATCH 1247/2608] Tweak the spacing for nrequests in stats output.

---
 src/stats.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index 986f51bd..75ccf3b8 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -289,7 +289,7 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t upti
 	COL_HDR(row, ndalloc, NULL, right, 13, uint64)
 	COL_HDR(row, ndalloc_ps, "(#/sec)", right, 8, uint64)
 	COL_HDR(row, nrequests, NULL, right, 13, uint64)
-	COL_HDR(row, nrequests_ps, "(#/sec)", right, 8, uint64)
+	COL_HDR(row, nrequests_ps, "(#/sec)", right, 10, uint64)
 	COL_HDR(row, nshards, NULL, right, 9, unsigned)
 	COL_HDR(row, curregs, NULL, right, 13, size)
 	COL_HDR(row, curslabs, NULL, right, 13, size)
@@ -817,7 +817,7 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 
 	COL(alloc_count_row, count_nrequests, right, 16, title);
 	col_count_nrequests.str_val = "nrequests";
-	COL(alloc_count_row, count_nrequests_ps, right, 8, title);
+	COL(alloc_count_row, count_nrequests_ps, right, 10, title);
 	col_count_nrequests_ps.str_val = "(#/sec)";
 
 	emitter_table_row(emitter, &alloc_count_row);

From a7b0a124c3ebe505cfd8c2d5cc797b8f0c96fbc6 Mon Sep 17 00:00:00 2001
From: Edward Tomasz Napierala <trasz@FreeBSD.org>
Date: Fri, 30 Nov 2018 13:57:49 +0000
Subject: [PATCH 1248/2608] Mention different mmap(2) behaviour with
 retain:true.

---
 doc/jemalloc.xml.in | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 058e9db9..a73f0ad7 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -944,6 +944,9 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         <citerefentry><refentrytitle>munmap</refentrytitle>
         <manvolnum>2</manvolnum></citerefentry> or equivalent (see <link
         linkend="stats.retained">stats.retained</link> for related details).
+        It also makes jemalloc use <citerefentry>
+        <refentrytitle>mmap</refentrytitle><manvolnum>2</manvolnum>
+        </citerefentry> in a more greedy way, mapping larger chunks in one go.
         This option is disabled by default unless discarding virtual memory is
         known to trigger
         platform-specific performance problems, e.g. for [64-bit] Linux, which

From d3145014a00d6420824a45bb24fa9237a553d8dc Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 18 Jan 2019 14:20:07 -0800
Subject: [PATCH 1249/2608] Explicitly use arena 0 in alignment and OOM tests.

This helps us avoid issues with size based routing (i.e. the huge_threshold
feature).
---
 test/integration/mallocx.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index ce5069a7..645d4db4 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -126,7 +126,7 @@ TEST_BEGIN(test_oom) {
 	largemax = get_large_size(get_nlarge()-1);
 	oom = false;
 	for (i = 0; i < sizeof(ptrs) / sizeof(void *); i++) {
-		ptrs[i] = mallocx(largemax, 0);
+		ptrs[i] = mallocx(largemax, MALLOCX_ARENA(0));
 		if (ptrs[i] == NULL) {
 			oom = true;
 		}
@@ -223,12 +223,12 @@ TEST_BEGIN(test_alignment_and_size) {
 		    sz += (alignment >> (LG_SIZEOF_PTR-1)) - 1) {
 			for (i = 0; i < NITER; i++) {
 				nsz = nallocx(sz, MALLOCX_ALIGN(alignment) |
-				    MALLOCX_ZERO);
+				    MALLOCX_ZERO | MALLOCX_ARENA(0));
 				assert_zu_ne(nsz, 0,
 				    "nallocx() error for alignment=%zu, "
 				    "size=%zu (%#zx)", alignment, sz, sz);
 				ps[i] = mallocx(sz, MALLOCX_ALIGN(alignment) |
-				    MALLOCX_ZERO);
+				    MALLOCX_ZERO | MALLOCX_ARENA(0));
 				assert_ptr_not_null(ps[i],
 				    "mallocx() error for alignment=%zu, "
 				    "size=%zu (%#zx)", alignment, sz, sz);

From 350809dc5d43ea994de04f7a970b6978a8fec6d2 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 16 Jan 2019 12:25:24 -0800
Subject: [PATCH 1250/2608] Set huge_threshold to 8M by default.

This feature uses an dedicated arena to handle huge requests, which
significantly improves VM fragmentation.  In production workload we tested it
often reduces VM size by >30%.
---
 include/jemalloc/internal/arena_types.h | 2 +-
 src/jemalloc.c                          | 9 ++++++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h
index c40ae6fd..cf07cc02 100644
--- a/include/jemalloc/internal/arena_types.h
+++ b/include/jemalloc/internal/arena_types.h
@@ -46,6 +46,6 @@ typedef enum {
  * When allocation_size >= huge_threshold, use the dedicated huge arena (unless
  * have explicitly spicified arena index).  0 disables the feature.
  */
-#define HUGE_THRESHOLD_DEFAULT 0
+#define HUGE_THRESHOLD_DEFAULT (8 << 20)
 
 #endif /* JEMALLOC_INTERNAL_ARENA_TYPES_H */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 6745df6a..6bfc6133 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1239,7 +1239,14 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 			CONF_HANDLE_SSIZE_T(opt_lg_tcache_max, "lg_tcache_max",
 			    -1, (sizeof(size_t) << 3) - 1)
 
-			/* Experimental feature.  Will be documented later.*/
+			/*
+			 * The runtime option of huge_threshold remains
+			 * undocumented.  It may be tweaked in the next major
+			 * release (6.0).  The default value 8M is rather
+			 * conservative / safe.  Tuning it further down may
+			 * improve fragmentation a bit more, but may also cause
+			 * contention on the huge arena.
+			 */
 			CONF_HANDLE_SIZE_T(opt_huge_threshold,
 			    "huge_threshold", SC_LARGE_MINCLASS,
 			    SC_LARGE_MAXCLASS, yes, yes, false)

From e3db480f6f3c147a8630c0ec45fde1da5764270b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 24 Jan 2019 16:15:04 -0800
Subject: [PATCH 1251/2608] Rename huge_threshold to oversize_threshold.

The keyword huge tend to remind people of huge pages which is not relevent to
the feature.
---
 doc/jemalloc.xml.in                         |  4 +---
 include/jemalloc/internal/arena_externs.h   |  4 ++--
 include/jemalloc/internal/arena_inlines_b.h |  2 +-
 include/jemalloc/internal/arena_types.h     |  6 +++---
 src/arena.c                                 | 14 +++++++-------
 src/ctl.c                                   |  6 +++---
 src/jemalloc.c                              |  6 +++---
 src/stats.c                                 |  2 +-
 test/unit/huge.c                            |  2 +-
 test/unit/mallctl.c                         |  4 ++--
 10 files changed, 24 insertions(+), 26 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index a73f0ad7..fe322e1d 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1059,9 +1059,7 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         linkend="arena.i.dirty_decay_ms"><mallctl>arena.&lt;i&gt;.dirty_decay_ms</mallctl></link>
         for related dynamic control options.  See <link
         linkend="opt.muzzy_decay_ms"><mallctl>opt.muzzy_decay_ms</mallctl></link>
-        for a description of muzzy pages.  Note that when the huge_threshold
-        feature is enabled, the special auto arenas may use its own decay
-        settings.</para></listitem>
+        for a description of muzzy pages.</para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.muzzy_decay_ms">
diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index bcc016e8..2bdddb77 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -16,8 +16,8 @@ extern const char *percpu_arena_mode_names[];
 extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS];
 extern malloc_mutex_t arenas_lock;
 
-extern size_t opt_huge_threshold;
-extern size_t huge_threshold;
+extern size_t opt_oversize_threshold;
+extern size_t oversize_threshold;
 
 void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
     unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms,
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index c7d35b74..b7cdcea0 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -24,7 +24,7 @@ arena_choose_maybe_huge(tsd_t *tsd, arena_t *arena, size_t size) {
 	 * 1) is using auto arena selection (i.e. arena == NULL), and 2) the
 	 * thread is not assigned to a manual arena.
 	 */
-	if (unlikely(size >= huge_threshold)) {
+	if (unlikely(size >= oversize_threshold)) {
 		arena_t *tsd_arena = tsd_arena_get(tsd);
 		if (tsd_arena == NULL || arena_is_auto(tsd_arena)) {
 			return arena_choose_huge(tsd);
diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h
index cf07cc02..8917ea3a 100644
--- a/include/jemalloc/internal/arena_types.h
+++ b/include/jemalloc/internal/arena_types.h
@@ -43,9 +43,9 @@ typedef enum {
 #define PERCPU_ARENA_DEFAULT	percpu_arena_disabled
 
 /*
- * When allocation_size >= huge_threshold, use the dedicated huge arena (unless
- * have explicitly spicified arena index).  0 disables the feature.
+ * When allocation_size >= oversize_threshold, use the dedicated huge arena
+ * (unless have explicitly spicified arena index).  0 disables the feature.
  */
-#define HUGE_THRESHOLD_DEFAULT (8 << 20)
+#define OVERSIZE_THRESHOLD_DEFAULT (8 << 20)
 
 #endif /* JEMALLOC_INTERNAL_ARENA_TYPES_H */
diff --git a/src/arena.c b/src/arena.c
index 552a0f3b..60eac232 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -43,8 +43,8 @@ const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
 
 static div_info_t arena_binind_div_info[SC_NBINS];
 
-size_t opt_huge_threshold = HUGE_THRESHOLD_DEFAULT;
-size_t huge_threshold = HUGE_THRESHOLD_DEFAULT;
+size_t opt_oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
+size_t oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
 static unsigned huge_arena_ind;
 
 /******************************************************************************/
@@ -2112,15 +2112,15 @@ arena_init_huge(void) {
 	bool huge_enabled;
 
 	/* The threshold should be large size class. */
-	if (opt_huge_threshold > SC_LARGE_MAXCLASS ||
-	    opt_huge_threshold < SC_LARGE_MINCLASS) {
-		opt_huge_threshold = 0;
-		huge_threshold = SC_LARGE_MAXCLASS + PAGE;
+	if (opt_oversize_threshold > SC_LARGE_MAXCLASS ||
+	    opt_oversize_threshold < SC_LARGE_MINCLASS) {
+		opt_oversize_threshold = 0;
+		oversize_threshold = SC_LARGE_MAXCLASS + PAGE;
 		huge_enabled = false;
 	} else {
 		/* Reserve the index for the huge arena. */
 		huge_arena_ind = narenas_total_get();
-		huge_threshold = opt_huge_threshold;
+		oversize_threshold = opt_oversize_threshold;
 		huge_enabled = true;
 	}
 
diff --git a/src/ctl.c b/src/ctl.c
index 0ec92249..09310a9d 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -85,7 +85,7 @@ CTL_PROTO(opt_retain)
 CTL_PROTO(opt_dss)
 CTL_PROTO(opt_narenas)
 CTL_PROTO(opt_percpu_arena)
-CTL_PROTO(opt_huge_threshold)
+CTL_PROTO(opt_oversize_threshold)
 CTL_PROTO(opt_background_thread)
 CTL_PROTO(opt_max_background_threads)
 CTL_PROTO(opt_dirty_decay_ms)
@@ -300,7 +300,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("dss"),		CTL(opt_dss)},
 	{NAME("narenas"),	CTL(opt_narenas)},
 	{NAME("percpu_arena"),	CTL(opt_percpu_arena)},
-	{NAME("huge_threshold"),	CTL(opt_huge_threshold)},
+	{NAME("oversize_threshold"),	CTL(opt_oversize_threshold)},
 	{NAME("background_thread"),	CTL(opt_background_thread)},
 	{NAME("max_background_threads"),	CTL(opt_max_background_threads)},
 	{NAME("dirty_decay_ms"), CTL(opt_dirty_decay_ms)},
@@ -1716,7 +1716,7 @@ CTL_RO_NL_GEN(opt_dss, opt_dss, const char *)
 CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned)
 CTL_RO_NL_GEN(opt_percpu_arena, percpu_arena_mode_names[opt_percpu_arena],
     const char *)
-CTL_RO_NL_GEN(opt_huge_threshold, opt_huge_threshold, size_t)
+CTL_RO_NL_GEN(opt_oversize_threshold, opt_oversize_threshold, size_t)
 CTL_RO_NL_GEN(opt_background_thread, opt_background_thread, bool)
 CTL_RO_NL_GEN(opt_max_background_threads, opt_max_background_threads, size_t)
 CTL_RO_NL_GEN(opt_dirty_decay_ms, opt_dirty_decay_ms, ssize_t)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 6bfc6133..855a98b4 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1240,15 +1240,15 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 			    -1, (sizeof(size_t) << 3) - 1)
 
 			/*
-			 * The runtime option of huge_threshold remains
+			 * The runtime option of oversize_threshold remains
 			 * undocumented.  It may be tweaked in the next major
 			 * release (6.0).  The default value 8M is rather
 			 * conservative / safe.  Tuning it further down may
 			 * improve fragmentation a bit more, but may also cause
 			 * contention on the huge arena.
 			 */
-			CONF_HANDLE_SIZE_T(opt_huge_threshold,
-			    "huge_threshold", SC_LARGE_MINCLASS,
+			CONF_HANDLE_SIZE_T(opt_oversize_threshold,
+			    "oversize_threshold", SC_LARGE_MINCLASS,
 			    SC_LARGE_MAXCLASS, yes, yes, false)
 			CONF_HANDLE_SIZE_T(opt_lg_extent_max_active_fit,
 			    "lg_extent_max_active_fit", 0,
diff --git a/src/stats.c b/src/stats.c
index 75ccf3b8..eb210758 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1028,7 +1028,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_CHAR_P("dss")
 	OPT_WRITE_UNSIGNED("narenas")
 	OPT_WRITE_CHAR_P("percpu_arena")
-	OPT_WRITE_SIZE_T("huge_threshold")
+	OPT_WRITE_SIZE_T("oversize_threshold")
 	OPT_WRITE_CHAR_P("metadata_thp")
 	OPT_WRITE_BOOL_MUTABLE("background_thread", "background_thread")
 	OPT_WRITE_SSIZE_T_MUTABLE("dirty_decay_ms", "arenas.dirty_decay_ms")
diff --git a/test/unit/huge.c b/test/unit/huge.c
index 7e54d076..ab72cf00 100644
--- a/test/unit/huge.c
+++ b/test/unit/huge.c
@@ -1,7 +1,7 @@
 #include "test/jemalloc_test.h"
 
 /* Threshold: 2 << 20 = 2097152. */
-const char *malloc_conf = "huge_threshold:2097152";
+const char *malloc_conf = "oversize_threshold:2097152";
 
 #define HUGE_SZ (2 << 20)
 #define SMALL_SZ (8)
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index b8b93405..498f9e06 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -164,7 +164,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(const char *, dss, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);
 	TEST_MALLCTL_OPT(const char *, percpu_arena, always);
-	TEST_MALLCTL_OPT(size_t, huge_threshold, always);
+	TEST_MALLCTL_OPT(size_t, oversize_threshold, always);
 	TEST_MALLCTL_OPT(bool, background_thread, always);
 	TEST_MALLCTL_OPT(ssize_t, dirty_decay_ms, always);
 	TEST_MALLCTL_OPT(ssize_t, muzzy_decay_ms, always);
@@ -342,7 +342,7 @@ TEST_BEGIN(test_thread_arena) {
 	sz = sizeof(unsigned);
 	assert_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
-	if (opt_huge_threshold != 0) {
+	if (opt_oversize_threshold != 0) {
 		narenas--;
 	}
 	assert_u_eq(narenas, opt_narenas, "Number of arenas incorrect");

From 374dc30d3dc6c5b664fda9b1fa0510559e568b6a Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 24 Jan 2019 16:18:30 -0800
Subject: [PATCH 1252/2608] Update copyright dates.

---
 COPYING | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/COPYING b/COPYING
index 98458d97..3b7fd358 100644
--- a/COPYING
+++ b/COPYING
@@ -1,10 +1,10 @@
 Unless otherwise specified, files in the jemalloc source distribution are
 subject to the following license:
 --------------------------------------------------------------------------------
-Copyright (C) 2002-2018 Jason Evans <jasone@canonware.com>.
+Copyright (C) 2002-present Jason Evans <jasone@canonware.com>.
 All rights reserved.
 Copyright (C) 2007-2012 Mozilla Foundation.  All rights reserved.
-Copyright (C) 2009-2018 Facebook, Inc.  All rights reserved.
+Copyright (C) 2009-present Facebook, Inc.  All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are met:

From b33eb26dee1c161572b209a8fe3f58419ce4874f Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 28 Jan 2019 14:05:20 -0800
Subject: [PATCH 1253/2608] Tweak the spacing for the total_wait_time per
 second.

---
 src/stats.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/stats.c b/src/stats.c
index eb210758..4c427e0d 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -130,6 +130,7 @@ mutex_stats_init_cols(emitter_row_t *row, const char *table_name,
 #undef OP
 #undef WIDTH_uint32_t
 #undef WIDTH_uint64_t
+	col_uint64_t[mutex_counter_total_wait_time_ps].width = 10;
 }
 
 static void

From e13400c919e6b6730284ff011875bbcdd6821f1c Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 22 Jan 2019 13:59:23 -0800
Subject: [PATCH 1254/2608] Sanity check szind on tcache flush.

This adds some overhead to the tcache flush path (which is one of the
popular paths).  Guard it behind a config option.
---
 configure.ac                                  | 16 +++++++
 .../internal/jemalloc_internal_defs.h.in      |  3 ++
 src/tcache.c                                  | 42 ++++++++++++++++++-
 3 files changed, 59 insertions(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index c0911db1..8049ded3 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1403,6 +1403,22 @@ if test "x$enable_readlinkat" = "x1" ; then
 fi
 AC_SUBST([enable_readlinkat])
 
+dnl Avoid the extra size checking by default
+AC_ARG_ENABLE([extra-size-check],
+  [AS_HELP_STRING([--enable-extra-size-check],
+  [Perform additonal size related sanity checks])],
+[if test "x$enable_extra_size_check" = "xno" ; then
+  enable_extra_size_check="0"
+else
+  enable_extra_size_check="1"
+fi
+],
+[enable_extra_size_check=="0"]
+)
+if test "x$enable_extra_size_check" = "x1" ; then
+  AC_DEFINE([JEMALLOC_EXTRA_SIZE_CHECK], [ ])
+fi
+AC_SUBST([enable_extra_size_check])
 
 JE_COMPILABLE([a program using __builtin_unreachable], [
 void foo (void) {
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 3e94c023..4f0359a8 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -372,4 +372,7 @@
  */
 #undef JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE
 
+/* Performs additional size-matching sanity checks when defined. */
+#undef JEMALLOC_EXTRA_SIZE_CHECK
+
 #endif /* JEMALLOC_INTERNAL_DEFS_H_ */
diff --git a/src/tcache.c b/src/tcache.c
index 9125179a..be4fb878 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -100,6 +100,34 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 	return ret;
 }
 
+/* Enabled with --enable-extra-size-check. */
+#ifdef JEMALLOC_EXTRA_SIZE_CHECK
+static void
+tbin_extents_lookup_size_check(tsdn_t *tsdn, cache_bin_t *tbin, szind_t binind,
+    size_t nflush, extent_t **extents){
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+	/*
+	 * Verify that the items in the tcache all have the correct size; this
+	 * is useful for catching sized deallocation bugs, also to fail early
+	 * instead of corrupting metadata.  Since this can be turned on for opt
+	 * builds, avoid the branch in the loop.
+	 */
+	szind_t szind;
+	size_t sz_sum = binind * nflush;
+	for (unsigned i = 0 ; i < nflush; i++) {
+		rtree_extent_szind_read(tsdn, &extents_rtree,
+		    rtree_ctx, (uintptr_t)*(tbin->avail - 1 - i), true,
+		    &extents[i], &szind);
+		sz_sum -= szind;
+	}
+	if (sz_sum != 0) {
+		abort();
+	}
+}
+#endif
+
 void
 tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
     szind_t binind, unsigned rem) {
@@ -112,11 +140,16 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	assert(arena != NULL);
 	unsigned nflush = tbin->ncached - rem;
 	VARIABLE_ARRAY(extent_t *, item_extent, nflush);
+
+#ifndef JEMALLOC_EXTRA_SIZE_CHECK
 	/* Look up extent once per item. */
 	for (unsigned i = 0 ; i < nflush; i++) {
 		item_extent[i] = iealloc(tsd_tsdn(tsd), *(tbin->avail - 1 - i));
 	}
-
+#else
+	tbin_extents_lookup_size_check(tsd_tsdn(tsd), tbin, binind, nflush,
+	    item_extent);
+#endif
 	while (nflush > 0) {
 		/* Lock the arena bin associated with the first object. */
 		extent_t *extent = item_extent[0];
@@ -202,11 +235,16 @@ tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
 	assert(tcache_arena != NULL);
 	unsigned nflush = tbin->ncached - rem;
 	VARIABLE_ARRAY(extent_t *, item_extent, nflush);
+
+#ifndef JEMALLOC_EXTRA_SIZE_CHECK
 	/* Look up extent once per item. */
 	for (unsigned i = 0 ; i < nflush; i++) {
 		item_extent[i] = iealloc(tsd_tsdn(tsd), *(tbin->avail - 1 - i));
 	}
-
+#else
+	tbin_extents_lookup_size_check(tsd_tsdn(tsd), tbin, binind, nflush,
+	    item_extent);
+#endif
 	while (nflush > 0) {
 		/* Lock the arena associated with the first object. */
 		extent_t *extent = item_extent[0];

From 8e9a613122251d4c519059f8e1e11f27f6572b4c Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 28 Jan 2019 15:25:30 -0800
Subject: [PATCH 1255/2608] Disable muzzy decay by default.

---
 include/jemalloc/internal/arena_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h
index 8917ea3a..624937e4 100644
--- a/include/jemalloc/internal/arena_types.h
+++ b/include/jemalloc/internal/arena_types.h
@@ -9,7 +9,7 @@
 
 /* Default decay times in milliseconds. */
 #define DIRTY_DECAY_MS_DEFAULT	ZD(10 * 1000)
-#define MUZZY_DECAY_MS_DEFAULT	ZD(10 * 1000)
+#define MUZZY_DECAY_MS_DEFAULT	(0)
 /* Number of event ticks between time checks. */
 #define DECAY_NTICKS_PER_UPDATE	1000
 

From 1f55a15467357bb559701687dbef1be84047ddfe Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 7 Jun 2018 12:27:19 -0700
Subject: [PATCH 1256/2608] Add configure option --disable-libdl.

This makes it possible to build full static binary.
---
 INSTALL.md   |  5 +++++
 configure.ac | 37 +++++++++++++++++++++++++++++--------
 2 files changed, 34 insertions(+), 8 deletions(-)

diff --git a/INSTALL.md b/INSTALL.md
index 18cf2883..b8f729b0 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -269,6 +269,11 @@ any of the following arguments (not a definitive list) to 'configure':
     in the same process, which will almost certainly result in confusing runtime
     crashes if pointers leak from one implementation to the other.
 
+* `--disable-libdl`
+
+    Disable the usage of libdl, namely dlsym(3) which is required by the lazy
+    lock option.  This can allow building static binaries.
+
 The following environment variables (not a definitive list) impact configure's
 behavior:
 
diff --git a/configure.ac b/configure.ac
index 8049ded3..fd468dff 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1587,6 +1587,21 @@ if test "x${LG_PAGE}" != "xundefined" -a \
 fi
 AC_DEFINE_UNQUOTED([LG_HUGEPAGE], [${je_cv_lg_hugepage}])
 
+dnl ============================================================================
+dnl Enable libdl by default.
+AC_ARG_ENABLE([libdl],
+  [AS_HELP_STRING([--disable-libdl],
+  [Do not use libdl])],
+[if test "x$enable_libdl" = "xno" ; then
+  enable_libdl="0"
+else
+  enable_libdl="1"
+fi
+],
+[enable_libdl="1"]
+)
+AC_SUBST([libdl])
+
 dnl ============================================================================
 dnl Configure pthreads.
 
@@ -1600,15 +1615,21 @@ if test "x$abi" != "xpecoff" ; then
                                AC_MSG_ERROR([libpthread is missing]))])
   wrap_syms="${wrap_syms} pthread_create"
   have_pthread="1"
-  dnl Check if we have dlsym support.
-  have_dlsym="1"
-  AC_CHECK_HEADERS([dlfcn.h],
-    AC_CHECK_FUNC([dlsym], [],
-      [AC_CHECK_LIB([dl], [dlsym], [LIBS="$LIBS -ldl"], [have_dlsym="0"])]),
-    [have_dlsym="0"])
-  if test "x$have_dlsym" = "x1" ; then
-    AC_DEFINE([JEMALLOC_HAVE_DLSYM], [ ])
+
+dnl Check if we have dlsym support.
+  if test "x$enable_libdl" = "x1" ; then
+    have_dlsym="1"
+    AC_CHECK_HEADERS([dlfcn.h],
+      AC_CHECK_FUNC([dlsym], [],
+        [AC_CHECK_LIB([dl], [dlsym], [LIBS="$LIBS -ldl"], [have_dlsym="0"])]),
+      [have_dlsym="0"])
+    if test "x$have_dlsym" = "x1" ; then
+      AC_DEFINE([JEMALLOC_HAVE_DLSYM], [ ])
+    fi
+  else
+    have_dlsym="0"
   fi
+
   JE_COMPILABLE([pthread_atfork(3)], [
 #include <pthread.h>
 ], [

From 2db2d2ef5e1cf2eb2c0de362c916d0f7a2f1a9ef Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 7 Jun 2018 12:28:40 -0700
Subject: [PATCH 1257/2608] Make background_thread not dependent on libdl.

When not using libdl, still allows background_thread to be enabled.
---
 configure.ac            | 3 +--
 src/background_thread.c | 9 ++++++++-
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/configure.ac b/configure.ac
index fd468dff..a668e670 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2130,8 +2130,7 @@ fi
 dnl ============================================================================
 dnl Enable background threads if possible.
 
-if test "x${have_pthread}" = "x1" -a "x${have_dlsym}" = "x1" \
-    -a "x${je_cv_os_unfair_lock}" != "xyes" ; then
+if test "x${have_pthread}" = "x1" -a "x${je_cv_os_unfair_lock}" != "xyes" ; then
   AC_DEFINE([JEMALLOC_BACKGROUND_THREAD])
 fi
 
diff --git a/src/background_thread.c b/src/background_thread.c
index acf8083b..5ed6c1c9 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -27,7 +27,6 @@ background_thread_info_t *background_thread_info;
 /******************************************************************************/
 
 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
-#include <dlfcn.h>
 
 static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
     void *(*)(void *), void *__restrict);
@@ -820,6 +819,10 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
 #undef BILLION
 #undef BACKGROUND_THREAD_MIN_INTERVAL_NS
 
+#ifdef JEMALLOC_HAVE_DLSYM
+#include <dlfcn.h>
+#endif
+
 static bool
 pthread_create_fptr_init(void) {
 	if (pthread_create_fptr != NULL) {
@@ -830,7 +833,11 @@ pthread_create_fptr_init(void) {
 	 * wrapper for pthread_create; and 2) application may define its own
 	 * wrapper as well (and can call malloc within the wrapper).
 	 */
+#ifdef JEMALLOC_HAVE_DLSYM
 	pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
+#else
+	pthread_create_fptr = NULL;
+#endif
 	if (pthread_create_fptr == NULL) {
 		if (config_lazy_lock) {
 			malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "

From 23b15e764b3d87c8e69a348d60d13e7e44f137b5 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 6 Feb 2019 13:36:56 -0800
Subject: [PATCH 1258/2608] Add --disable-libdl to travis.

---
 .travis.yml           | 23 +++++++++++++++++++++++
 scripts/gen_travis.py |  1 +
 2 files changed, 24 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index 38e66551..40b2eb5f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -21,6 +21,8 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
@@ -37,6 +39,8 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: osx
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
@@ -48,6 +52,8 @@ matrix:
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
@@ -65,6 +71,9 @@ matrix:
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
       addons: *gcc_multilib
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      addons: *gcc_multilib
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
       addons: *gcc_multilib
@@ -81,6 +90,8 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
@@ -91,6 +102,8 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
@@ -99,6 +112,8 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
@@ -107,6 +122,14 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index e92660f7..65b0b67c 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -45,6 +45,7 @@ configure_flag_unusuals = [
     '--enable-debug',
     '--enable-prof',
     '--disable-stats',
+    '--disable-libdl',
 ]
 
 malloc_conf_unusuals = [

From 9015deb126d7b2b90ef822cf0183f96abb9b97f9 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 7 Feb 2019 18:47:49 -0800
Subject: [PATCH 1259/2608] Add build_doc by default.

However, skip building the docs (and output warnings) if XML support is missing.
This allows `make install` to succeed w/o `make dist`.
---
 Makefile.in  | 17 ++++++++++++++++-
 configure.ac |  3 +++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/Makefile.in b/Makefile.in
index 2d59e595..0777f6a8 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -47,6 +47,7 @@ REV := @rev@
 install_suffix := @install_suffix@
 ABI := @abi@
 XSLTPROC := @XSLTPROC@
+XSLROOT := @XSLROOT@
 AUTOCONF := @AUTOCONF@
 _RPATH = @RPATH@
 RPATH = $(if $(1),$(call _RPATH,$(1)))
@@ -294,10 +295,24 @@ all: build_lib
 dist: build_doc
 
 $(objroot)doc/%.html : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/html.xsl
+ifneq ($(XSLROOT),)
 	$(XSLTPROC) -o $@ $(objroot)doc/html.xsl $<
+else
+ifeq ($(wildcard $(DOCS_HTML)),)
+	@echo "<p>Missing xsltproc.  Doc not built.</p>" > $@
+endif
+	@echo "Missing xsltproc.  "$@" not (re)built."
+endif
 
 $(objroot)doc/%.3 : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/manpages.xsl
+ifneq ($(XSLROOT),)
 	$(XSLTPROC) -o $@ $(objroot)doc/manpages.xsl $<
+else
+ifeq ($(wildcard $(DOCS_MAN3)),)
+	@echo "Missing xsltproc.  Doc not built." > $@
+endif
+	@echo "Missing xsltproc.  "$@" not (re)built."
+endif
 
 build_doc_html: $(DOCS_HTML)
 build_doc_man: $(DOCS_MAN3)
@@ -496,7 +511,7 @@ install_doc_man:
 	$(INSTALL) -m 644 $$d $(MANDIR)/man3; \
 done
 
-install_doc: install_doc_html install_doc_man
+install_doc: build_doc install_doc_html install_doc_man
 
 install: install_bin install_include install_lib install_doc
 
diff --git a/configure.ac b/configure.ac
index a668e670..9b00bbfc 100644
--- a/configure.ac
+++ b/configure.ac
@@ -175,6 +175,9 @@ fi
 ],
   XSLROOT="${DEFAULT_XSLROOT}"
 )
+if test "x$XSLTPROC" = "xfalse" ; then
+  XSLROOT=""
+fi
 AC_SUBST([XSLROOT])
 
 dnl If CFLAGS isn't defined, set CFLAGS to something reasonable.  Otherwise,

From dca7060d5e49b8a07179a1f13bf39f6d30e709c8 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Wed, 20 Feb 2019 18:45:23 -0800
Subject: [PATCH 1260/2608] Avoid redefining tsd_t.

This fixes a build failure when integrating with FreeBSD's libc.  This
regression was introduced by d1e11d48d4c706e17ef3508e2ddb910f109b779f
(Move tsd link and in_hook after tcache.).
---
 include/jemalloc/internal/tcache_structs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index 27087031..172ef904 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -5,9 +5,9 @@
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/ticker.h"
+#include "jemalloc/internal/tsd_types.h"
 
 /* Various uses of this struct need it to be a named type. */
-typedef struct tsd_s tsd_t;
 typedef ql_elm(tsd_t) tsd_link_t;
 
 struct tcache_s {

From 18450d0abe36757fe6e4eb08f6b15f8ce943f9cb Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 19 Feb 2019 15:58:13 -0800
Subject: [PATCH 1261/2608] Guard libgcc unwind init with opt_prof.

Only triggers libgcc unwind init when prof is enabled.  This helps workaround
some bootstrapping issues.
---
 src/prof.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/src/prof.c b/src/prof.c
index 71de2d34..296de527 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -3067,16 +3067,14 @@ prof_boot2(tsd_t *tsd) {
 				return true;
 			}
 		}
-	}
-
 #ifdef JEMALLOC_PROF_LIBGCC
-	/*
-	 * Cause the backtracing machinery to allocate its internal state
-	 * before enabling profiling.
-	 */
-	_Unwind_Backtrace(prof_unwind_init_callback, NULL);
+		/*
+		 * Cause the backtracing machinery to allocate its internal
+		 * state before enabling profiling.
+		 */
+		_Unwind_Backtrace(prof_unwind_init_callback, NULL);
 #endif
-
+	}
 	prof_booted = true;
 
 	return false;

From cbdb1807cea6828d0f61e1a0516613efc3e7189e Mon Sep 17 00:00:00 2001
From: Dave Rigby <d.rigby@me.com>
Date: Fri, 22 Feb 2019 19:00:46 +0000
Subject: [PATCH 1262/2608] Stringify tls_callback linker directive

Proposed fix for #1444 - ensure that `tls_callback` in the `#pragma comment(linker)`directive gets the same prefix added as it does i the C declaration.
---
 src/tsd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tsd.c b/src/tsd.c
index f317d486..2eceed90 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -472,7 +472,7 @@ _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) {
 #    pragma comment(linker, "/INCLUDE:_tls_callback")
 #  else
 #    pragma comment(linker, "/INCLUDE:_tls_used")
-#    pragma comment(linker, "/INCLUDE:tls_callback")
+#    pragma comment(linker, "/INCLUDE:" STRINGIFY(tls_callback) )
 #  endif
 #  pragma section(".CRT$XLY",long,read)
 #endif

From 775fe302a75c4770edd9708e7348e626c96dfe58 Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Fri, 22 Feb 2019 11:10:11 -0800
Subject: [PATCH 1263/2608] Remove JE_FORCE_SYNC_COMPARE_AND_SWAP_[48].

These macros have been unused since
d4ac7582f32f506d5203bea2f0115076202add38 (Introduce a backport of C11
atomics).
---
 configure.ac                                  | 34 -------------------
 .../internal/jemalloc_internal_defs.h.in      | 16 ---------
 2 files changed, 50 deletions(-)

diff --git a/configure.ac b/configure.ac
index 9b00bbfc..4dafed58 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2005,40 +2005,6 @@ case "${host_cpu}" in
 esac
 fi
 
-dnl ============================================================================
-dnl Check whether __sync_{add,sub}_and_fetch() are available despite
-dnl __GCC_HAVE_SYNC_COMPARE_AND_SWAP_n macros being undefined.
-
-AC_DEFUN([JE_SYNC_COMPARE_AND_SWAP_CHECK],[
-  AC_CACHE_CHECK([whether to force $1-bit __sync_{add,sub}_and_fetch()],
-               [je_cv_sync_compare_and_swap_$2],
-               [AC_LINK_IFELSE([AC_LANG_PROGRAM([
-                                                 #include <stdint.h>
-                                                ],
-                                                [
-                                                 #ifndef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_$2
-                                                 {
-                                                    uint$1_t x$1 = 0;
-                                                    __sync_add_and_fetch(&x$1, 42);
-                                                    __sync_sub_and_fetch(&x$1, 1);
-                                                 }
-                                                 #else
-                                                 #error __GCC_HAVE_SYNC_COMPARE_AND_SWAP_$2 is defined, no need to force
-                                                 #endif
-                                                ])],
-                               [je_cv_sync_compare_and_swap_$2=yes],
-                               [je_cv_sync_compare_and_swap_$2=no])])
-
-  if test "x${je_cv_sync_compare_and_swap_$2}" = "xyes" ; then
-    AC_DEFINE([JE_FORCE_SYNC_COMPARE_AND_SWAP_$2], [ ])
-  fi
-])
-
-if test "x${je_cv_atomic9}" != "xyes" -a "x${je_cv_osatomic}" != "xyes" ; then
-  JE_SYNC_COMPARE_AND_SWAP_CHECK(32, 4)
-  JE_SYNC_COMPARE_AND_SWAP_CHECK(64, 8)
-fi
-
 dnl ============================================================================
 dnl Check for __builtin_clz() and __builtin_clzl().
 
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 4f0359a8..7914b2f6 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -52,22 +52,6 @@
 /* Defined if GCC __sync atomics are available. */
 #undef JEMALLOC_GCC_SYNC_ATOMICS
 
-/*
- * Defined if __sync_add_and_fetch(uint32_t *, uint32_t) and
- * __sync_sub_and_fetch(uint32_t *, uint32_t) are available, despite
- * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 not being defined (which means the
- * functions are defined in libgcc instead of being inlines).
- */
-#undef JE_FORCE_SYNC_COMPARE_AND_SWAP_4
-
-/*
- * Defined if __sync_add_and_fetch(uint64_t *, uint64_t) and
- * __sync_sub_and_fetch(uint64_t *, uint64_t) are available, despite
- * __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 not being defined (which means the
- * functions are defined in libgcc instead of being inlines).
- */
-#undef JE_FORCE_SYNC_COMPARE_AND_SWAP_8
-
 /*
  * Defined if __builtin_clz() and __builtin_clzl() are available.
  */

From ac24ffb21e28ba1ed86250fa6a6dcaf02b43f7da Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 22 Feb 2019 13:00:14 -0800
Subject: [PATCH 1264/2608] Fix a syntax error in configure.ac

Introduced in e13400c919e6b6730284ff011875bbcdd6821f1c.
---
 configure.ac | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 4dafed58..afaaf5de 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1416,7 +1416,7 @@ else
   enable_extra_size_check="1"
 fi
 ],
-[enable_extra_size_check=="0"]
+[enable_extra_size_check="0"]
 )
 if test "x$enable_extra_size_check" = "x1" ; then
   AC_DEFINE([JEMALLOC_EXTRA_SIZE_CHECK], [ ])

From 14d3686c9f3ed28f1ef4c9ec5f7bde945473194b Mon Sep 17 00:00:00 2001
From: Jason Evans <jasone@canonware.com>
Date: Sat, 9 Mar 2019 10:51:03 -0800
Subject: [PATCH 1265/2608] Do not use #pragma GCC diagnostic with gcc < 4.6.

This regression was introduced by
3d29d11ac2c1583b9959f73c0548545018d31c8a (Clean compilation -Wextra).
---
 .../internal/jemalloc_internal_macros.h       | 22 ++++++++++---------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h
index ec8782e6..d8ea06f6 100644
--- a/include/jemalloc/internal/jemalloc_internal_macros.h
+++ b/include/jemalloc/internal/jemalloc_internal_macros.h
@@ -30,7 +30,7 @@
 #  define restrict
 #endif
 
-/* Various function pointers are statick and immutable except during testing. */
+/* Various function pointers are static and immutable except during testing. */
 #ifdef JEMALLOC_JET
 #  define JET_MUTABLE
 #else
@@ -47,7 +47,6 @@
 #define JEMALLOC_FALLTHROUGH /* falls through */
 #endif
 
-
 /* Diagnostic suppression macros */
 #if defined(_MSC_VER) && !defined(__clang__)
 #  define JEMALLOC_DIAGNOSTIC_PUSH __pragma(warning(push))
@@ -57,7 +56,9 @@
 #  define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
 #  define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
 #  define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
-#elif defined(__GNUC__) || defined(__clang__)
+/* #pragma GCC diagnostic first appeared in gcc 4.6. */
+#elif (defined(__GNUC__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && \
+  (__GNUC_MINOR__ > 5)))) || defined(__clang__)
 /*
  * The JEMALLOC_PRAGMA__ macro is an implementation detail of the GCC and Clang
  * diagnostic suppression macros and should not be used anywhere else.
@@ -65,14 +66,16 @@
 #  define JEMALLOC_PRAGMA__(X) _Pragma(#X)
 #  define JEMALLOC_DIAGNOSTIC_PUSH JEMALLOC_PRAGMA__(GCC diagnostic push)
 #  define JEMALLOC_DIAGNOSTIC_POP JEMALLOC_PRAGMA__(GCC diagnostic pop)
-#  define JEMALLOC_DIAGNOSTIC_IGNORE(W) JEMALLOC_PRAGMA__(GCC diagnostic ignored W)
+#  define JEMALLOC_DIAGNOSTIC_IGNORE(W) \
+     JEMALLOC_PRAGMA__(GCC diagnostic ignored W)
 
 /*
  * The -Wmissing-field-initializers warning is buggy in GCC versions < 5.1 and
- * all clang versions up to version 7 (currently trunk, unreleased).
- * This macro suppresses the warning for the affected compiler versions only.
+ * all clang versions up to version 7 (currently trunk, unreleased).  This macro
+ * suppresses the warning for the affected compiler versions only.
  */
-#  if ((defined(__GNUC__) && !defined(__clang__)) && (__GNUC__ < 5)) || defined(__clang__)
+#  if ((defined(__GNUC__) && !defined(__clang__)) && (__GNUC__ < 5)) || \
+     defined(__clang__)
 #    define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS  \
           JEMALLOC_DIAGNOSTIC_IGNORE("-Wmissing-field-initializers")
 #  else
@@ -103,9 +106,8 @@
 #endif
 
 /*
- * Disables spurious diagnostics for all headers
- * Since these headers are not included by users directly,
- * it does not affect their diagnostic settings.
+ * Disables spurious diagnostics for all headers.  Since these headers are not
+ * included by users directly, it does not affect their diagnostic settings.
  */
 JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
 

From 06f0850427e26cb24950de60bbe70bc192ffce6a Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 7 Mar 2019 15:58:26 -0800
Subject: [PATCH 1266/2608] Detect if 8-bit atomics are available.

In some rare cases (older compiler, e.g. gcc 4.2 w/ MIPS), 8-bit atomics might
be unavailable.  Detect such cases so that we can workaround.
---
 configure.ac                                  | 25 +++++++++++++++++++
 include/jemalloc/internal/atomic.h            |  7 ++++++
 .../internal/jemalloc_internal_defs.h.in      |  7 ++++++
 3 files changed, 39 insertions(+)

diff --git a/configure.ac b/configure.ac
index afaaf5de..96f76d35 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1896,6 +1896,19 @@ JE_COMPILABLE([GCC __atomic atomics], [
 ], [je_cv_gcc_atomic_atomics])
 if test "x${je_cv_gcc_atomic_atomics}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_GCC_ATOMIC_ATOMICS])
+
+  dnl check for 8-bit atomic support
+  JE_COMPILABLE([GCC 8-bit __atomic atomics], [
+  ], [
+      unsigned char x = 0;
+      int val = 1;
+      int y = __atomic_fetch_add(&x, val, __ATOMIC_RELAXED);
+      int after_add = (int)x;
+      return after_add == 1;
+  ], [je_cv_gcc_u8_atomic_atomics])
+  if test "x${je_cv_gcc_u8_atomic_atomics}" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_GCC_U8_ATOMIC_ATOMICS])
+  fi
 fi
 
 dnl ============================================================================
@@ -1910,6 +1923,18 @@ JE_COMPILABLE([GCC __sync atomics], [
 ], [je_cv_gcc_sync_atomics])
 if test "x${je_cv_gcc_sync_atomics}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_GCC_SYNC_ATOMICS])
+
+  dnl check for 8-bit atomic support
+  JE_COMPILABLE([GCC 8-bit __sync atomics], [
+  ], [
+      unsigned char x = 0;
+      int before_add = __sync_fetch_and_add(&x, 1);
+      int after_add = (int)x;
+      return (before_add == 0) && (after_add == 1);
+  ], [je_cv_gcc_u8_sync_atomics])
+  if test "x${je_cv_gcc_u8_sync_atomics}" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_GCC_U8_SYNC_ATOMICS])
+  fi
 fi
 
 dnl ============================================================================
diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h
index bb751cfc..a76f54ce 100644
--- a/include/jemalloc/internal/atomic.h
+++ b/include/jemalloc/internal/atomic.h
@@ -3,10 +3,17 @@
 
 #define ATOMIC_INLINE JEMALLOC_ALWAYS_INLINE
 
+#define JEMALLOC_U8_ATOMICS
 #if defined(JEMALLOC_GCC_ATOMIC_ATOMICS)
 #  include "jemalloc/internal/atomic_gcc_atomic.h"
+#  if !defined(JEMALLOC_GCC_U8_ATOMIC_ATOMICS)
+#    undef JEMALLOC_U8_ATOMICS
+#  endif
 #elif defined(JEMALLOC_GCC_SYNC_ATOMICS)
 #  include "jemalloc/internal/atomic_gcc_sync.h"
+#  if !defined(JEMALLOC_GCC_U8_SYNC_ATOMICS)
+#    undef JEMALLOC_U8_ATOMICS
+#  endif
 #elif defined(_MSC_VER)
 #  include "jemalloc/internal/atomic_msvc.h"
 #elif defined(JEMALLOC_C11_ATOMICS)
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 7914b2f6..05016b1e 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -48,9 +48,16 @@
 
 /* Defined if GCC __atomic atomics are available. */
 #undef JEMALLOC_GCC_ATOMIC_ATOMICS
+/* and the 8-bit variant support. */
+#undef JEMALLOC_GCC_U8_ATOMIC_ATOMICS
 
 /* Defined if GCC __sync atomics are available. */
 #undef JEMALLOC_GCC_SYNC_ATOMICS
+/* and the 8-bit variant support. */
+#undef JEMALLOC_GCC_U8_SYNC_ATOMICS
+
+/* Defined if 8-bit atomics are supported. */
+
 
 /*
  * Defined if __builtin_clz() and __builtin_clzl() are available.

From b804d0f019df87d8cc96e3c812e98793256cb418 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 7 Mar 2019 16:01:55 -0800
Subject: [PATCH 1267/2608] Fallback to 32-bit when 8-bit atomics are missing
 for TSD.

When it happens, this might cause a slowdown on the fast path operations.
However such case is very rare.
---
 include/jemalloc/internal/tsd.h | 19 +++++++++++++++++--
 src/tsd.c                       | 13 +++++++------
 2 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 00a9500b..9ba26004 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -169,6 +169,18 @@ enum {
  */
 #define TSD_MANGLE(n) cant_access_tsd_items_directly_use_a_getter_or_setter_##n
 
+#ifdef JEMALLOC_U8_ATOMICS
+#  define tsd_state_t atomic_u8_t
+#  define tsd_atomic_load atomic_load_u8
+#  define tsd_atomic_store atomic_store_u8
+#  define tsd_atomic_exchange atomic_exchange_u8
+#else
+#  define tsd_state_t atomic_u32_t
+#  define tsd_atomic_load atomic_load_u32
+#  define tsd_atomic_store atomic_store_u32
+#  define tsd_atomic_exchange atomic_exchange_u32
+#endif
+
 /* The actual tsd. */
 struct tsd_s {
 	/*
@@ -177,8 +189,11 @@ struct tsd_s {
 	 * setters below.
 	 */
 
-	/* We manually limit the state to just a single byte. */
-	atomic_u8_t state;
+	/*
+	 * We manually limit the state to just a single byte.  Unless the 8-bit
+	 * atomics are unavailable (which is rare).
+	 */
+	tsd_state_t state;
 #define O(n, t, nt)							\
 	t TSD_MANGLE(n);
 MALLOC_TSD
diff --git a/src/tsd.c b/src/tsd.c
index 2eceed90..d5fb4d6f 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -113,9 +113,9 @@ tsd_force_recompute(tsdn_t *tsdn) {
 	malloc_mutex_lock(tsdn, &tsd_nominal_tsds_lock);
 	tsd_t *remote_tsd;
 	ql_foreach(remote_tsd, &tsd_nominal_tsds, TSD_MANGLE(tcache).tsd_link) {
-		assert(atomic_load_u8(&remote_tsd->state, ATOMIC_RELAXED)
+		assert(tsd_atomic_load(&remote_tsd->state, ATOMIC_RELAXED)
 		    <= tsd_state_nominal_max);
-		atomic_store_u8(&remote_tsd->state, tsd_state_nominal_recompute,
+		tsd_atomic_store(&remote_tsd->state, tsd_state_nominal_recompute,
 		    ATOMIC_RELAXED);
 	}
 	malloc_mutex_unlock(tsdn, &tsd_nominal_tsds_lock);
@@ -172,7 +172,7 @@ tsd_slow_update(tsd_t *tsd) {
 	uint8_t old_state;
 	do {
 		uint8_t new_state = tsd_state_compute(tsd);
-		old_state = atomic_exchange_u8(&tsd->state, new_state,
+		old_state = tsd_atomic_exchange(&tsd->state, new_state,
 		    ATOMIC_ACQUIRE);
 	} while (old_state == tsd_state_nominal_recompute);
 }
@@ -181,14 +181,14 @@ void
 tsd_state_set(tsd_t *tsd, uint8_t new_state) {
 	/* Only the tsd module can change the state *to* recompute. */
 	assert(new_state != tsd_state_nominal_recompute);
-	uint8_t old_state = atomic_load_u8(&tsd->state, ATOMIC_RELAXED);
+	uint8_t old_state = tsd_atomic_load(&tsd->state, ATOMIC_RELAXED);
 	if (old_state > tsd_state_nominal_max) {
 		/*
 		 * Not currently in the nominal list, but it might need to be
 		 * inserted there.
 		 */
 		assert(!tsd_in_nominal_list(tsd));
-		atomic_store_u8(&tsd->state, new_state, ATOMIC_RELAXED);
+		tsd_atomic_store(&tsd->state, new_state, ATOMIC_RELAXED);
 		if (new_state <= tsd_state_nominal_max) {
 			tsd_add_nominal(tsd);
 		}
@@ -201,7 +201,8 @@ tsd_state_set(tsd_t *tsd, uint8_t new_state) {
 		assert(tsd_in_nominal_list(tsd));
 		if (new_state > tsd_state_nominal_max) {
 			tsd_remove_nominal(tsd);
-			atomic_store_u8(&tsd->state, new_state, ATOMIC_RELAXED);
+			tsd_atomic_store(&tsd->state, new_state,
+			    ATOMIC_RELAXED);
 		} else {
 			/*
 			 * This is the tricky case.  We're transitioning from

From f6c30cbafab1a841dd08f00541ed9651054bbe4a Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 11 Mar 2019 13:17:20 -0700
Subject: [PATCH 1268/2608] Remove some unused comments.

---
 include/jemalloc/internal/jemalloc_internal_defs.h.in | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 05016b1e..21b65147 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -56,9 +56,6 @@
 /* and the 8-bit variant support. */
 #undef JEMALLOC_GCC_U8_SYNC_ATOMICS
 
-/* Defined if 8-bit atomics are supported. */
-
-
 /*
  * Defined if __builtin_clz() and __builtin_clzl() are available.
  */

From fb56766ca9b398d07e2def5ead75a021fc08da03 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 12 Mar 2019 15:02:41 -0700
Subject: [PATCH 1269/2608] Eagerly purge oversized merged extents.

This change improves memory usage slightly, at virtually no CPU cost.
---
 include/jemalloc/internal/arena_inlines_b.h | 20 ++++++++++++++++++++
 src/extent.c                                |  7 +++++++
 test/unit/decay.c                           | 12 +++++++++---
 3 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index b7cdcea0..614deddd 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -135,6 +135,26 @@ arena_decay_tick(tsdn_t *tsdn, arena_t *arena) {
 	arena_decay_ticks(tsdn, arena, 1);
 }
 
+/* Purge a single extent to retained / unmapped directly. */
+JEMALLOC_ALWAYS_INLINE void
+arena_decay_extent(tsdn_t *tsdn,arena_t *arena, extent_hooks_t **r_extent_hooks,
+    extent_t *extent) {
+	size_t extent_size = extent_size_get(extent);
+	extent_dalloc_wrapper(tsdn, arena,
+	    r_extent_hooks, extent);
+	if (config_stats) {
+		/* Update stats accordingly. */
+		arena_stats_lock(tsdn, &arena->stats);
+		arena_stats_add_u64(tsdn, &arena->stats,
+		    &arena->decay_dirty.stats->nmadvise, 1);
+		arena_stats_add_u64(tsdn, &arena->stats,
+		    &arena->decay_dirty.stats->purged, extent_size >> LG_PAGE);
+		arena_stats_sub_zu(tsdn, &arena->stats, &arena->stats.mapped,
+		    extent_size);
+		arena_stats_unlock(tsdn, &arena->stats);
+	}
+}
+
 JEMALLOC_ALWAYS_INLINE void *
 arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
     tcache_t *tcache, bool slow_path) {
diff --git a/src/extent.c b/src/extent.c
index fd6c837f..3396a9d6 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1708,6 +1708,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		extent = extent_try_coalesce(tsdn, arena, r_extent_hooks,
 		    rtree_ctx, extents, extent, NULL, growing_retained);
 	} else if (extent_size_get(extent) >= SC_LARGE_MINCLASS) {
+		assert(extents == &arena->extents_dirty);
 		/* Always coalesce large extents eagerly. */
 		bool coalesced;
 		do {
@@ -1716,6 +1717,12 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 			    r_extent_hooks, rtree_ctx, extents, extent,
 			    &coalesced, growing_retained);
 		} while (coalesced);
+		if (extent_size_get(extent) >= oversize_threshold) {
+			/* Shortcut to purge the oversize extent eagerly. */
+			malloc_mutex_unlock(tsdn, &extents->mtx);
+			arena_decay_extent(tsdn, arena, r_extent_hooks, extent);
+			return;
+		}
 	}
 	extent_deactivate_locked(tsdn, arena, extents, extent);
 
diff --git a/test/unit/decay.c b/test/unit/decay.c
index f727bf93..cf3c0796 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -121,6 +121,12 @@ get_arena_dirty_npurge(unsigned arena_ind) {
 	return get_arena_npurge_impl("stats.arenas.0.dirty_npurge", arena_ind);
 }
 
+static uint64_t
+get_arena_dirty_purged(unsigned arena_ind) {
+	do_epoch();
+	return get_arena_npurge_impl("stats.arenas.0.dirty_purged", arena_ind);
+}
+
 static uint64_t
 get_arena_muzzy_npurge(unsigned arena_ind) {
 	do_epoch();
@@ -559,7 +565,7 @@ TEST_BEGIN(test_decay_now) {
 TEST_END
 
 TEST_BEGIN(test_decay_never) {
-	test_skip_if(check_background_thread_enabled());
+	test_skip_if(check_background_thread_enabled() || !config_stats);
 
 	unsigned arena_ind = do_arena_create(-1, -1);
 	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
@@ -579,8 +585,8 @@ TEST_BEGIN(test_decay_never) {
 		dallocx(ptrs[i], flags);
 		size_t pdirty = get_arena_pdirty(arena_ind);
 		size_t pmuzzy = get_arena_pmuzzy(arena_ind);
-		assert_zu_gt(pdirty, pdirty_prev,
-		    "Expected dirty pages to increase.");
+		assert_zu_gt(pdirty + (size_t)get_arena_dirty_purged(arena_ind),
+		    pdirty_prev, "Expected dirty pages to increase.");
 		assert_zu_eq(pmuzzy, 0, "Unexpected muzzy pages");
 		pdirty_prev = pdirty;
 	}

From a4d017f5e5aea12b745e67679ba40753f6d7a778 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 21 Mar 2019 22:21:43 -0700
Subject: [PATCH 1270/2608] Output message before aborting on tcache
 size-matching check.

---
 src/tcache.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/tcache.c b/src/tcache.c
index be4fb878..e7b970d9 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -123,6 +123,9 @@ tbin_extents_lookup_size_check(tsdn_t *tsdn, cache_bin_t *tbin, szind_t binind,
 		sz_sum -= szind;
 	}
 	if (sz_sum != 0) {
+		malloc_printf("<jemalloc>: size mismatch in thread cache "
+		    "detected, likely caused by sized deallocation bugs by "
+		    "application. Abort.\n");
 		abort();
 	}
 }

From 788a657cee745c1f827ddf1db50d580bd5e4347b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 27 Mar 2019 21:47:20 -0700
Subject: [PATCH 1271/2608] Allow low values of oversize_threshold to disable
 the feature.

We should allow a way to easily disable the feature (e.g. not reserving the
arena id at all).
---
 src/jemalloc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 855a98b4..bb703957 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1248,8 +1248,8 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 			 * contention on the huge arena.
 			 */
 			CONF_HANDLE_SIZE_T(opt_oversize_threshold,
-			    "oversize_threshold", SC_LARGE_MINCLASS,
-			    SC_LARGE_MAXCLASS, yes, yes, false)
+			    "oversize_threshold", 0, SC_LARGE_MAXCLASS, no, yes,
+			    false)
 			CONF_HANDLE_SIZE_T(opt_lg_extent_max_active_fit,
 			    "lg_extent_max_active_fit", 0,
 			    (sizeof(size_t) << 3), yes, yes, false)

From ce03e4c7b8ddeaec5e72c8fb160e378f418ed651 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 28 Mar 2019 20:30:56 -0700
Subject: [PATCH 1272/2608] Document opt.oversize_threshold.

---
 doc/jemalloc.xml.in | 28 +++++++++++++++++++++++++---
 1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index fe322e1d..4acc2123 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -992,6 +992,24 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         number of CPUs, or one if there is a single CPU.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.oversize_threshold">
+        <term>
+          <mallctl>opt.oversize_threshold</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>The threshold in bytes of which requests are considered
+        oversize.  Allocation requests with greater sizes are fulfilled from a
+        dedicated arena (automatically managed, however not within
+        <literal>narenas</literal>), in order to reduce fragmentation by not
+        mixing huge allocations with small ones.  In addition, the reserved
+        special arena may have its own default decay settings.  Note that
+        requests with arena index specified via
+        <constant>MALLOCX_ARENA</constant>, or threads associated with explicit
+        arenas will not be considered.  The default threshold is 8MiB.  Values
+        not within large size classes disables this feature.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="opt.percpu_arena">
         <term>
           <mallctl>opt.percpu_arena</mallctl>
@@ -1013,7 +1031,7 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
       <varlistentry id="opt.background_thread">
         <term>
           <mallctl>opt.background_thread</mallctl>
-          (<type>const bool</type>)
+          (<type>bool</type>)
           <literal>r-</literal>
         </term>
         <listitem><para>Internal background worker threads enabled/disabled.
@@ -1028,7 +1046,7 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
       <varlistentry id="opt.max_background_threads">
         <term>
           <mallctl>opt.max_background_threads</mallctl>
-          (<type>const size_t</type>)
+          (<type>size_t</type>)
           <literal>r-</literal>
         </term>
         <listitem><para>Maximum number of background threads that will be created
@@ -1059,7 +1077,11 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         linkend="arena.i.dirty_decay_ms"><mallctl>arena.&lt;i&gt;.dirty_decay_ms</mallctl></link>
         for related dynamic control options.  See <link
         linkend="opt.muzzy_decay_ms"><mallctl>opt.muzzy_decay_ms</mallctl></link>
-        for a description of muzzy pages.</para></listitem>
+        for a description of muzzy pages.for a description of muzzy pages.  Note
+        that when the <link
+        linkend="opt.oversize_threshold"><mallctl>oversize_threshold</mallctl></link>
+        feature is enabled, the arenas reserved for oversize requests may have
+        its own default decay settings.</para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.muzzy_decay_ms">

From 59d98919482b2a101c4092428a4c0092abb797a1 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 29 Mar 2019 13:27:20 -0700
Subject: [PATCH 1273/2608] Add the missing unlock in the error path of
 extent_register.

---
 src/extent.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/extent.c b/src/extent.c
index 3396a9d6..62086c7d 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -796,6 +796,7 @@ extent_register_impl(tsdn_t *tsdn, extent_t *extent, bool gdump_add) {
 
 	if (extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, extent, false, true,
 	    &elm_a, &elm_b)) {
+		extent_unlock(tsdn, extent);
 		return true;
 	}
 

From 0101d5ebef7230ef5aa1597be425e2a60e92f348 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 29 Mar 2019 13:31:02 -0700
Subject: [PATCH 1274/2608] Avoid check_min for opt_lg_extent_max_active_fit.

This fixes a compiler warning.
---
 src/jemalloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index bb703957..c8afa9c4 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1252,7 +1252,7 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 			    false)
 			CONF_HANDLE_SIZE_T(opt_lg_extent_max_active_fit,
 			    "lg_extent_max_active_fit", 0,
-			    (sizeof(size_t) << 3), yes, yes, false)
+			    (sizeof(size_t) << 3), no, yes, false)
 
 			if (strncmp("percpu_arena", k, klen) == 0) {
 				bool match = false;

From 064d6e570e7073096471413f6a5159541478eb01 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Sun, 31 Mar 2019 17:45:22 -0700
Subject: [PATCH 1275/2608] Tweak the wording about oversize_threshold.

---
 doc/jemalloc.xml.in | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 4acc2123..fd0edb30 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1002,9 +1002,9 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         oversize.  Allocation requests with greater sizes are fulfilled from a
         dedicated arena (automatically managed, however not within
         <literal>narenas</literal>), in order to reduce fragmentation by not
-        mixing huge allocations with small ones.  In addition, the reserved
-        special arena may have its own default decay settings.  Note that
-        requests with arena index specified via
+        mixing huge allocations with small ones.  In addition, the decay API
+        guarantees on the extents greater than the specified threshold may be
+        overridden.  Note that requests with arena index specified via
         <constant>MALLOCX_ARENA</constant>, or threads associated with explicit
         arenas will not be considered.  The default threshold is 8MiB.  Values
         not within large size classes disables this feature.</para></listitem>

From 6fe11633b066d74bdbb0f037a373af6e12a8b6c2 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 2 Apr 2019 13:02:56 -0700
Subject: [PATCH 1276/2608] Fix the binshard unit test.

The test attempts to trigger usage of multiple sharded bins, which percpu_arena
makes it less reliable.
---
 test/unit/binshard.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/test/unit/binshard.c b/test/unit/binshard.c
index 406c46ca..d7a8df8f 100644
--- a/test/unit/binshard.c
+++ b/test/unit/binshard.c
@@ -82,6 +82,9 @@ thd_start(void *varg) {
 }
 
 TEST_BEGIN(test_bin_shard_mt) {
+	test_skip_if(have_percpu_arena &&
+	    PERCPU_ARENA_ENABLED(opt_percpu_arena));
+
 	thd_t thds[NTHREADS];
 	unsigned i;
 	for (i = 0; i < NTHREADS; i++) {

From 978a7a21ae5fe8e5367732b2dba9f92742aef9f1 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 2 Apr 2019 13:34:50 -0700
Subject: [PATCH 1277/2608] Use iallocztm instead of ialloc in prof_log
 functions.

Explicitly use iallocztm for internal allocations.  ialloc could trigger arena
creation, which may cause lock order reversal (narenas_mtx and log_mtx).
---
 src/prof.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/prof.c b/src/prof.c
index 296de527..4d7d65db 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -376,7 +376,8 @@ prof_log_bt_index(tsd_t *tsd, prof_bt_t *bt) {
 		size_t sz = offsetof(prof_bt_node_t, vec) +
 			        (bt->len * sizeof(void *));
 		prof_bt_node_t *new_node = (prof_bt_node_t *)
-			ialloc(tsd, sz, sz_size2index(sz), false, true);
+		    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL,
+		    true, arena_get(TSDN_NULL, 0, true), true);
 		if (log_bt_first == NULL) {
 			log_bt_first = new_node;
 			log_bt_last = new_node;
@@ -416,7 +417,8 @@ prof_log_thr_index(tsd_t *tsd, uint64_t thr_uid, const char *name) {
 	    (void **)(&node), NULL)) {
 		size_t sz = offsetof(prof_thr_node_t, name) + strlen(name) + 1;
 		prof_thr_node_t *new_node = (prof_thr_node_t *)
-			ialloc(tsd, sz, sz_size2index(sz), false, true);
+		    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL,
+		    true, arena_get(TSDN_NULL, 0, true), true);
 		if (log_thr_first == NULL) {
 			log_thr_first = new_node;
 			log_thr_last = new_node;
@@ -474,10 +476,11 @@ prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx) {
 	nstime_t free_time = NSTIME_ZERO_INITIALIZER;
 	nstime_update(&free_time);
 
+	size_t sz = sizeof(prof_alloc_node_t);
 	prof_alloc_node_t *new_node = (prof_alloc_node_t *)
-		ialloc(tsd, sizeof(prof_alloc_node_t),
-		    sz_size2index(sizeof(prof_alloc_node_t)), false, true);
- 
+	    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL, true,
+	    arena_get(TSDN_NULL, 0, true), true);
+
 	const char *prod_thr_name = (tctx->tdata->thread_name == NULL)?
 				        "" : tctx->tdata->thread_name;
 	const char *cons_thr_name = prof_thread_name_get(tsd);

From f7489dc8f1fac233b0cd4e40331de8b738b1f2e2 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 19 Mar 2019 16:57:55 -0700
Subject: [PATCH 1278/2608] Update Changelog for 5.2.0.

---
 ChangeLog | 105 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 104 insertions(+), 1 deletion(-)

diff --git a/ChangeLog b/ChangeLog
index 29a00fb7..7c73a8f2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,7 +4,110 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
-* 5.1.0 (May 4th, 2018)
+* 5.2.0 (April 2, 2019)
+
+  This release includes a few notable improvements, which are summarized below:
+  1) improved fast-path performance from the optimizations by @djwatson; 2)
+  reduced virtual memory fragmentation and metadata usage; and 3) bug fixes on
+  setting the number of background threads.  In addition, peak / spike memory
+  usage is improved with certain allocation patterns.  As usual, the release and
+  prior dev versions have gone through large-scale production testing.
+
+  New features:
+  - Implement oversize_threshold, which uses a dedicated arena for allocations
+    crossing the specified threshold to reduce fragmentation.  (@interwq)
+  - Add extents usage information to stats.  (@tyleretzel)
+  - Log time information for sampled allocations.  (@tyleretzel)
+  - Support 0 size in sdallocx.  (@djwatson)
+  - Output rate for certain counters in malloc_stats.  (@zinoale)
+  - Add configure option --enable-readlinkat, which allows the use of readlinkat
+    over readlink.  (@davidtgoldblatt)
+  - Add configure options --{enable,disable}-{static,shared} to allow not
+    building unwanted libraries.  (@Ericson2314)
+  - Add configure option --disable-libdl to enable fully static builds.
+    (@interwq)
+  - Add mallctl interfaces:
+	+ opt.oversize_threshold (@interwq)
+	+ stats.arenas.<i>.extent_avail (@tyleretzel)
+	+ stats.arenas.<i>.extents.<j>.n{dirty,muzzy,retained} (@tyleretzel)
+	+ stats.arenas.<i>.extents.<j>.{dirty,muzzy,retained}_bytes
+	  (@tyleretzel)
+
+  Portability improvements:
+  - Update MSVC builds.  (@maksqwe, @rustyx)
+  - Workaround a compiler optimizer bug on s390x.  (@rkmisra)
+  - Make use of pthread_set_name_np(3) on FreeBSD.  (@trasz)
+  - Implement malloc_getcpu() to enable percpu_arena for windows.  (@santagada)
+  - Link against -pthread instead of -lpthread.  (@paravoid)
+  - Make background_thread not dependent on libdl.  (@interwq)
+  - Add stringify to fix a linker directive issue on MSVC.  (@daverigby)
+  - Detect and fall back when 8-bit atomics are unavailable.  (@interwq)
+  - Fall back to the default pthread_create if dlsym(3) fails.  (@interwq)
+
+  Optimizations and refactors:
+  - Refactor the TSD module.  (@davidtgoldblatt)
+  - Avoid taking extents_muzzy mutex when muzzy is disabled.  (@interwq)
+  - Avoid taking large_mtx for auto arenas on the tcache flush path.  (@interwq)
+  - Optimize ixalloc by avoiding a size lookup.  (@interwq)
+  - Implement opt.oversize_threshold which uses a dedicated arena for requests
+    crossing the threshold, also eagerly purges the oversize extents.  Default
+    the threshold to 8 MiB.  (@interwq)
+  - Clean compilation with -Wextra.  (@gnzlbg, @jasone)
+  - Refactor the size class module.  (@davidtgoldblatt)
+  - Refactor the stats emitter.  (@tyleretzel)
+  - Optimize pow2_ceil.  (@rkmisra)
+  - Avoid runtime detection of lazy purging on FreeBSD.  (@trasz)
+  - Optimize mmap(2) alignment handling on FreeBSD.  (@trasz)
+  - Improve error handling for THP state initialization.  (@jsteemann)
+  - Rework the malloc() fast path.  (@djwatson)
+  - Rework the free() fast path.  (@djwatson)
+  - Refactor and optimize the tcache fill / flush paths.  (@djwatson)
+  - Optimize sync / lwsync on PowerPC.  (@chmeeedalf)
+  - Bypass extent_dalloc() when retain is enabled.  (@interwq)
+  - Optimize the locking on large deallocation.  (@interwq)
+  - Reduce the number of pages committed from sanity checking in debug build.
+    (@trasz, @interwq)
+  - Deprecate OSSpinLock.  (@interwq)
+  - Lower the default number of background threads to 4 (when the feature
+    is enabled).  (@interwq)
+  - Optimize the trylock spin wait.  (@djwatson)
+  - Use arena index for arena-matching checks.  (@interwq)
+  - Avoid forced decay on thread termination when using background threads.
+    (@interwq)
+  - Disable muzzy decay by default.  (@djwatson, @interwq)
+  - Only initialize libgcc unwinder when profiling is enabled.  (@paravoid,
+    @interwq)
+
+  Bug fixes (all only relevant to jemalloc 5.x):
+  - Fix background thread index issues with max_background_threads.  (@djwatson,
+    @interwq)
+  - Fix stats output for opt.lg_extent_max_active_fit.  (@interwq)
+  - Fix opt.prof_prefix initialization.  (@davidtgoldblatt)
+  - Properly trigger decay on tcache destroy.  (@interwq, @amosbird)
+  - Fix tcache.flush.  (@interwq)
+  - Detect whether explicit extent zero out is necessary with huge pages or
+    custom extent hooks, which may change the purge semantics.  (@interwq)
+  - Fix a side effect caused by extent_max_active_fit combined with decay-based
+    purging, where freed extents can accumulate and not be reused for an
+    extended period of time.  (@interwq, @mpghf)
+  - Fix a missing unlock on extent register error handling.  (@zoulasc)
+
+  Testing:
+  - Simplify the Travis script output.  (@gnzlbg)
+  - Update the test scripts for FreeBSD.  (@devnexen)
+  - Add unit tests for the producer-consumer pattern.  (@interwq)
+  - Add Cirrus-CI config for FreeBSD builds.  (@jasone)
+  - Add size-matching sanity checks on tcache flush.  (@davidtgoldblatt,
+    @interwq)
+
+  Incompatible changes:
+  - Remove --with-lg-page-sizes.  (@davidtgoldblatt)
+
+  Documentation:
+  - Attempt to build docs by default, however skip doc building when xsltproc
+    is missing. (@interwq, @cmuellner)
+
+* 5.1.0 (May 4, 2018)
 
   This release is primarily about fine-tuning, ranging from several new features
   to numerous notable performance and portability enhancements.  The release and

From 9aab3f2be041b09f42375d3bf173d1a8795a1ee9 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 15 Mar 2019 11:01:45 -0700
Subject: [PATCH 1279/2608] Add memory utilization analytics to mallctl

The analytics tool is put under experimental.utilization namespace in
mallctl.  Input is one pointer or an array of pointers and the output
is a list of memory utilization statistics.
---
 include/jemalloc/internal/extent_externs.h |   6 +
 include/jemalloc/internal/extent_structs.h |  21 ++
 include/jemalloc/internal/extent_types.h   |   3 +
 src/ctl.c                                  | 236 ++++++++++++++++++++-
 src/extent.c                               |  69 ++++++
 test/unit/mallctl.c                        | 196 ++++++++++++++++-
 6 files changed, 526 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index 8680251a..5d53aad1 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -74,4 +74,10 @@ bool extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
 
 bool extent_boot(void);
 
+void extent_util_stats_get(tsdn_t *tsdn, const void *ptr,
+    size_t *nfree, size_t *nregs, size_t *size);
+void extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
+    size_t *nfree, size_t *nregs, size_t *size,
+    size_t *bin_nfree, size_t *bin_nregs, void **slabcur_addr);
+
 #endif /* JEMALLOC_INTERNAL_EXTENT_EXTERNS_H */
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index ceb18979..ad6710e7 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -228,4 +228,25 @@ struct extents_s {
 	bool			delay_coalesce;
 };
 
+/*
+ * The following two structs are for experimental purposes. See
+ * experimental_utilization_query_ctl and
+ * experimental_utilization_batch_query_ctl in src/ctl.c.
+ */
+
+struct extent_util_stats_s {
+	size_t nfree;
+	size_t nregs;
+	size_t size;
+};
+
+struct extent_util_stats_verbose_s {
+	void *slabcur_addr;
+	size_t nfree;
+	size_t nregs;
+	size_t size;
+	size_t bin_nfree;
+	size_t bin_nregs;
+};
+
 #endif /* JEMALLOC_INTERNAL_EXTENT_STRUCTS_H */
diff --git a/include/jemalloc/internal/extent_types.h b/include/jemalloc/internal/extent_types.h
index acbcf27b..865f8a10 100644
--- a/include/jemalloc/internal/extent_types.h
+++ b/include/jemalloc/internal/extent_types.h
@@ -4,6 +4,9 @@
 typedef struct extent_s extent_t;
 typedef struct extents_s extents_t;
 
+typedef struct extent_util_stats_s extent_util_stats_t;
+typedef struct extent_util_stats_verbose_s extent_util_stats_verbose_t;
+
 #define EXTENT_HOOKS_INITIALIZER	NULL
 
 /*
diff --git a/src/ctl.c b/src/ctl.c
index 09310a9d..dd7e4672 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -216,6 +216,8 @@ CTL_PROTO(stats_mapped)
 CTL_PROTO(stats_retained)
 CTL_PROTO(experimental_hooks_install)
 CTL_PROTO(experimental_hooks_remove)
+CTL_PROTO(experimental_utilization_query)
+CTL_PROTO(experimental_utilization_batch_query)
 
 #define MUTEX_STATS_CTL_PROTO_GEN(n)					\
 CTL_PROTO(stats_##n##_num_ops)						\
@@ -574,11 +576,17 @@ static const ctl_named_node_t stats_node[] = {
 
 static const ctl_named_node_t hooks_node[] = {
 	{NAME("install"),	CTL(experimental_hooks_install)},
-	{NAME("remove"),	CTL(experimental_hooks_remove)},
+	{NAME("remove"),	CTL(experimental_hooks_remove)}
+};
+
+static const ctl_named_node_t utilization_node[] = {
+	{NAME("query"),		CTL(experimental_utilization_query)},
+	{NAME("batch_query"),	CTL(experimental_utilization_batch_query)}
 };
 
 static const ctl_named_node_t experimental_node[] = {
-	{NAME("hooks"),		CHILD(named, hooks)}
+	{NAME("hooks"),		CHILD(named, hooks)},
+	{NAME("utilization"),	CHILD(named, utilization)}
 };
 
 static const ctl_named_node_t	root_node[] = {
@@ -2714,7 +2722,7 @@ static int
 prof_log_start_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
-	
+
 	const char *filename = NULL;
 
 	if (!config_prof) {
@@ -2726,7 +2734,7 @@ prof_log_start_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	if (prof_log_start(tsd_tsdn(tsd), filename)) {
 		ret = EFAULT;
-		goto label_return; 
+		goto label_return;
 	}
 
 	ret = 0;
@@ -3083,3 +3091,223 @@ experimental_hooks_remove_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 label_return:
 	return ret;
 }
+
+/*
+ * Output six memory utilization entries for an input pointer, the first one of
+ * type (void *) and the remaining five of type size_t, describing the following
+ * (in the same order):
+ *
+ * (a) memory address of the extent a potential reallocation would go into,
+ * == the five fields below describe about the extent the pointer resides in ==
+ * (b) number of free regions in the extent,
+ * (c) number of regions in the extent,
+ * (d) size of the extent in terms of bytes,
+ * (e) total number of free regions in the bin the extent belongs to, and
+ * (f) total number of regions in the bin the extent belongs to.
+ *
+ * Note that "(e)" and "(f)" are only available when stats are enabled;
+ * otherwise both are set zero.
+ *
+ * This API is mainly intended for small class allocations, where extents are
+ * used as slab.
+ *
+ * In case of large class allocations, "(a)" will be NULL, and "(e)" and "(f)"
+ * will be zero.  The other three fields will be properly set though the values
+ * are trivial: "(b)" will be 0, "(c)" will be 1, and "(d)" will be the usable
+ * size.
+ *
+ * The input pointer and size are respectively passed in by newp and newlen,
+ * and the output fields and size are respectively oldp and *oldlenp.
+ *
+ * It can be beneficial to define the following macros to make it easier to
+ * access the output:
+ *
+ * #define SLABCUR_READ(out) (*(void **)out)
+ * #define COUNTS(out) ((size_t *)((void **)out + 1))
+ * #define NFREE_READ(out) COUNTS(out)[0]
+ * #define NREGS_READ(out) COUNTS(out)[1]
+ * #define SIZE_READ(out) COUNTS(out)[2]
+ * #define BIN_NFREE_READ(out) COUNTS(out)[3]
+ * #define BIN_NREGS_READ(out) COUNTS(out)[4]
+ *
+ * and then write e.g. NFREE_READ(oldp) to fetch the output.  See the unit test
+ * test_utilization_query in test/unit/mallctl.c for an example.
+ *
+ * For a typical defragmentation workflow making use of this API for
+ * understanding the fragmentation level, please refer to the comment for
+ * experimental_utilization_batch_query_ctl.
+ *
+ * It's up to the application how to determine the significance of
+ * fragmentation relying on the outputs returned.  Possible choices are:
+ *
+ * (a) if extent utilization ratio is below certain threshold,
+ * (b) if extent memory consumption is above certain threshold,
+ * (c) if extent utilization ratio is significantly below bin utilization ratio,
+ * (d) if input pointer deviates a lot from potential reallocation address, or
+ * (e) some selection/combination of the above.
+ *
+ * The caller needs to make sure that the input/output arguments are valid,
+ * in particular, that the size of the output is correct, i.e.:
+ *
+ *     *oldlenp = sizeof(void *) + sizeof(size_t) * 5
+ *
+ * Otherwise, the function immediately returns EINVAL without touching anything.
+ *
+ * In the rare case where there's no associated extent found for the input
+ * pointer, the function zeros out all output fields and return.  Please refer
+ * to the comment for experimental_utilization_batch_query_ctl to understand the
+ * motivation from C++.
+ */
+static int
+experimental_utilization_query_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+
+	assert(sizeof(extent_util_stats_verbose_t)
+	    == sizeof(void *) + sizeof(size_t) * 5);
+
+	if (oldp == NULL || oldlenp == NULL
+	    || *oldlenp != sizeof(extent_util_stats_verbose_t)
+	    || newp == NULL) {
+		ret = EINVAL;
+		goto label_return;
+	}
+
+	void *ptr = NULL;
+	WRITE(ptr, void *);
+	extent_util_stats_verbose_t *util_stats
+	    = (extent_util_stats_verbose_t *)oldp;
+	extent_util_stats_verbose_get(tsd_tsdn(tsd), ptr,
+	    &util_stats->nfree, &util_stats->nregs, &util_stats->size,
+	    &util_stats->bin_nfree, &util_stats->bin_nregs,
+	    &util_stats->slabcur_addr);
+	ret = 0;
+
+label_return:
+	return ret;
+}
+
+/*
+ * Given an input array of pointers, output three memory utilization entries of
+ * type size_t for each input pointer about the extent it resides in:
+ *
+ * (a) number of free regions in the extent,
+ * (b) number of regions in the extent, and
+ * (c) size of the extent in terms of bytes.
+ *
+ * This API is mainly intended for small class allocations, where extents are
+ * used as slab.  In case of large class allocations, the outputs are trivial:
+ * "(a)" will be 0, "(b)" will be 1, and "(c)" will be the usable size.
+ *
+ * Note that multiple input pointers may reside on a same extent so the output
+ * fields may contain duplicates.
+ *
+ * The format of the input/output looks like:
+ *
+ * input[0]:  1st_pointer_to_query	|  output[0]: 1st_extent_n_free_regions
+ *					|  output[1]: 1st_extent_n_regions
+ *					|  output[2]: 1st_extent_size
+ * input[1]:  2nd_pointer_to_query	|  output[3]: 2nd_extent_n_free_regions
+ *					|  output[4]: 2nd_extent_n_regions
+ *					|  output[5]: 2nd_extent_size
+ * ...					|  ...
+ *
+ * The input array and size are respectively passed in by newp and newlen, and
+ * the output array and size are respectively oldp and *oldlenp.
+ *
+ * It can be beneficial to define the following macros to make it easier to
+ * access the output:
+ *
+ * #define NFREE_READ(out, i) out[(i) * 3]
+ * #define NREGS_READ(out, i) out[(i) * 3 + 1]
+ * #define SIZE_READ(out, i) out[(i) * 3 + 2]
+ *
+ * and then write e.g. NFREE_READ(oldp, i) to fetch the output.  See the unit
+ * test test_utilization_batch in test/unit/mallctl.c for a concrete example.
+ *
+ * A typical workflow would be composed of the following steps:
+ *
+ * (1) flush tcache: mallctl("thread.tcache.flush", ...)
+ * (2) initialize input array of pointers to query fragmentation
+ * (3) allocate output array to hold utilization statistics
+ * (4) query utilization: mallctl("experimental.utilization.batch_query", ...)
+ * (5) (optional) decide if it's worthwhile to defragment; otherwise stop here
+ * (6) disable tcache: mallctl("thread.tcache.enabled", ...)
+ * (7) defragment allocations with significant fragmentation, e.g.:
+ *         for each allocation {
+ *             if it's fragmented {
+ *                 malloc(...);
+ *                 memcpy(...);
+ *                 free(...);
+ *             }
+ *         }
+ * (8) enable tcache: mallctl("thread.tcache.enabled", ...)
+ *
+ * The application can determine the significance of fragmentation themselves
+ * relying on the statistics returned, both at the overall level i.e. step "(5)"
+ * and at individual allocation level i.e. within step "(7)".  Possible choices
+ * are:
+ *
+ * (a) whether memory utilization ratio is below certain threshold,
+ * (b) whether memory consumption is above certain threshold, or
+ * (c) some combination of the two.
+ *
+ * The caller needs to make sure that the input/output arrays are valid and
+ * their sizes are proper as well as matched, meaning:
+ *
+ * (a) newlen = n_pointers * sizeof(const void *)
+ * (b) *oldlenp = n_pointers * sizeof(size_t) * 3
+ * (c) n_pointers > 0
+ *
+ * Otherwise, the function immediately returns EINVAL without touching anything.
+ *
+ * In the rare case where there's no associated extent found for some pointers,
+ * rather than immediately terminating the computation and raising an error,
+ * the function simply zeros out the corresponding output fields and continues
+ * the computation until all input pointers are handled.  The motivations of
+ * such a design are as follows:
+ *
+ * (a) The function always either processes nothing or processes everything, and
+ * never leaves the output half touched and half untouched.
+ *
+ * (b) It facilitates usage needs especially common in C++.  A vast variety of
+ * C++ objects are instantiated with multiple dynamic memory allocations.  For
+ * example, std::string and std::vector typically use at least two allocations,
+ * one for the metadata and one for the actual content.  Other types may use
+ * even more allocations.  When inquiring about utilization statistics, the
+ * caller often wants to examine into all such allocations, especially internal
+ * one(s), rather than just the topmost one.  The issue comes when some
+ * implementations do certain optimizations to reduce/aggregate some internal
+ * allocations, e.g. putting short strings directly into the metadata, and such
+ * decisions are not known to the caller.  Therefore, we permit pointers to
+ * memory usages that may not be returned by previous malloc calls, and we
+ * provide the caller a convenient way to identify such cases.
+ */
+static int
+experimental_utilization_batch_query_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+
+	assert(sizeof(extent_util_stats_t) == sizeof(size_t) * 3);
+
+	const size_t len = newlen / sizeof(const void *);
+	if (oldp == NULL || oldlenp == NULL || newp == NULL || newlen == 0
+	    || newlen != len * sizeof(const void *)
+	    || *oldlenp != len * sizeof(extent_util_stats_t)) {
+		ret = EINVAL;
+		goto label_return;
+	}
+
+	void **ptrs = (void **)newp;
+	extent_util_stats_t *util_stats = (extent_util_stats_t *)oldp;
+	size_t i;
+	for (i = 0; i < len; ++i) {
+		extent_util_stats_get(tsd_tsdn(tsd), ptrs[i],
+		    &util_stats[i].nfree, &util_stats[i].nregs,
+		    &util_stats[i].size);
+	}
+	ret = 0;
+
+label_return:
+	return ret;
+}
diff --git a/src/extent.c b/src/extent.c
index 62086c7d..814f0a32 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -2280,3 +2280,72 @@ extent_boot(void) {
 
 	return false;
 }
+
+void
+extent_util_stats_get(tsdn_t *tsdn, const void *ptr,
+    size_t *nfree, size_t *nregs, size_t *size) {
+	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL);
+
+	const extent_t *extent = iealloc(tsdn, ptr);
+	if (unlikely(extent == NULL)) {
+		*nfree = *nregs = *size = 0;
+		return;
+	}
+
+	*size = extent_size_get(extent);
+	if (!extent_slab_get(extent)) {
+		*nfree = 0;
+		*nregs = 1;
+	} else {
+		*nfree = extent_nfree_get(extent);
+		*nregs = bin_infos[extent_szind_get(extent)].nregs;
+		assert(*nfree <= *nregs);
+		assert(*nfree * extent_usize_get(extent) <= *size);
+	}
+}
+
+void
+extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
+    size_t *nfree, size_t *nregs, size_t *size,
+    size_t *bin_nfree, size_t *bin_nregs, void **slabcur_addr) {
+	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL
+	    && bin_nfree != NULL && bin_nregs != NULL && slabcur_addr != NULL);
+
+	const extent_t *extent = iealloc(tsdn, ptr);
+	if (unlikely(extent == NULL)) {
+		*nfree = *nregs = *size = *bin_nfree = *bin_nregs = 0;
+		*slabcur_addr = NULL;
+		return;
+	}
+
+	*size = extent_size_get(extent);
+	if (!extent_slab_get(extent)) {
+		*nfree = *bin_nfree = *bin_nregs = 0;
+		*nregs = 1;
+		*slabcur_addr = NULL;
+		return;
+	}
+
+	*nfree = extent_nfree_get(extent);
+	const szind_t szind = extent_szind_get(extent);
+	*nregs = bin_infos[szind].nregs;
+	assert(*nfree <= *nregs);
+	assert(*nfree * extent_usize_get(extent) <= *size);
+
+	const arena_t *arena = extent_arena_get(extent);
+	assert(arena != NULL);
+	const unsigned binshard = extent_binshard_get(extent);
+	bin_t *bin = &arena->bins[szind].bin_shards[binshard];
+
+	malloc_mutex_lock(tsdn, &bin->lock);
+	if (config_stats) {
+		*bin_nregs = *nregs * bin->stats.curslabs;
+		assert(*bin_nregs >= bin->stats.curregs);
+		*bin_nfree = *bin_nregs - bin->stats.curregs;
+	} else {
+		*bin_nfree = *bin_nregs = 0;
+	}
+	*slabcur_addr = extent_addr_get(bin->slabcur);
+	assert(*slabcur_addr != NULL);
+	malloc_mutex_unlock(tsdn, &bin->lock);
+}
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 498f9e06..ef00a3df 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -853,6 +853,198 @@ TEST_BEGIN(test_hooks_exhaustion) {
 }
 TEST_END
 
+#define TEST_UTIL_EINVAL(node, a, b, c, d, why_inval) do {		\
+	assert_d_eq(mallctl("experimental.utilization." node,		\
+	    a, b, c, d), EINVAL, "Should fail when " why_inval);	\
+	assert_zu_eq(out_sz, out_sz_ref,				\
+	    "Output size touched when given invalid arguments");	\
+	assert_d_eq(memcmp(out, out_ref, out_sz_ref), 0,		\
+	    "Output content touched when given invalid arguments");	\
+} while (0)
+
+#define TEST_UTIL_VALID(node) do {					\
+        assert_d_eq(mallctl("experimental.utilization." node,		\
+	    out, &out_sz, in, in_sz), 0,				\
+	    "Should return 0 on correct arguments");			\
+        assert_zu_eq(out_sz, out_sz_ref, "incorrect output size");	\
+	assert_d_ne(memcmp(out, out_ref, out_sz_ref), 0,		\
+	    "Output content should be changed");			\
+} while (0)
+
+TEST_BEGIN(test_utilization_query) {
+	void *p = mallocx(1, 0);
+	void **in = &p;
+	size_t in_sz = sizeof(const void *);
+	size_t out_sz = sizeof(void *) + sizeof(size_t) * 5;
+	void *out = mallocx(out_sz, 0);
+	void *out_ref = mallocx(out_sz, 0);
+	size_t out_sz_ref = out_sz;
+
+	assert_ptr_not_null(p, "test pointer allocation failed");
+	assert_ptr_not_null(out, "test output allocation failed");
+	assert_ptr_not_null(out_ref, "test reference output allocation failed");
+
+#define SLABCUR_READ(out) (*(void **)out)
+#define COUNTS(out) ((size_t *)((void **)out + 1))
+#define NFREE_READ(out) COUNTS(out)[0]
+#define NREGS_READ(out) COUNTS(out)[1]
+#define SIZE_READ(out) COUNTS(out)[2]
+#define BIN_NFREE_READ(out) COUNTS(out)[3]
+#define BIN_NREGS_READ(out) COUNTS(out)[4]
+
+	SLABCUR_READ(out) = NULL;
+	NFREE_READ(out) = NREGS_READ(out) = SIZE_READ(out) = -1;
+	BIN_NFREE_READ(out) = BIN_NREGS_READ(out) = -1;
+	memcpy(out_ref, out, out_sz);
+
+	/* Test invalid argument(s) errors */
+#define TEST_UTIL_QUERY_EINVAL(a, b, c, d, why_inval) \
+	TEST_UTIL_EINVAL("query", a, b, c, d, why_inval)
+
+	TEST_UTIL_QUERY_EINVAL(NULL, &out_sz, in, in_sz, "old is NULL");
+	TEST_UTIL_QUERY_EINVAL(out, NULL, in, in_sz, "oldlenp is NULL");
+	TEST_UTIL_QUERY_EINVAL(out, &out_sz, NULL, in_sz, "newp is NULL");
+	TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, 0, "newlen is zero");
+	in_sz -= 1;
+	TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, in_sz, "invalid newlen");
+	in_sz += 1;
+	out_sz_ref = out_sz -= 2 * sizeof(size_t);
+	TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, in_sz, "invalid *oldlenp");
+	out_sz_ref = out_sz += 2 * sizeof(size_t);
+
+#undef TEST_UTIL_QUERY_EINVAL
+
+	/* Examine output for valid call */
+	TEST_UTIL_VALID("query");
+	assert_zu_le(NFREE_READ(out), NREGS_READ(out),
+	    "Extent free count exceeded region count");
+	assert_zu_le(NREGS_READ(out), SIZE_READ(out),
+	    "Extent region count exceeded size");
+	assert_zu_ne(NREGS_READ(out), 0,
+	    "Extent region count must be positive");
+	assert_zu_ne(SIZE_READ(out), 0, "Extent size must be positive");
+	if (config_stats) {
+		assert_zu_le(BIN_NFREE_READ(out), BIN_NREGS_READ(out),
+		    "Bin free count exceeded region count");
+		assert_zu_ne(BIN_NREGS_READ(out), 0,
+		    "Bin region count must be positive");
+		assert_zu_le(NFREE_READ(out), BIN_NFREE_READ(out),
+		    "Extent free count exceeded bin free count");
+		assert_zu_le(NREGS_READ(out), BIN_NREGS_READ(out),
+		    "Extent region count exceeded bin region count");
+		assert_zu_eq(BIN_NREGS_READ(out) % NREGS_READ(out), 0,
+		    "Bin region count isn't a multiple of extent region count");
+		assert_zu_le(NREGS_READ(out) - NFREE_READ(out),
+		    BIN_NREGS_READ(out) - BIN_NFREE_READ(out),
+		    "Extent utilized count exceeded bin utilized count");
+	} else {
+		assert_zu_eq(BIN_NFREE_READ(out), 0,
+		    "Bin free count should be zero when stats are disabled");
+		assert_zu_eq(BIN_NREGS_READ(out), 0,
+		    "Bin region count should be zero when stats are disabled");
+	}
+	assert_ptr_not_null(SLABCUR_READ(out), "Current slab is null");
+	assert_true(NFREE_READ(out) == 0 || SLABCUR_READ(out) <= p,
+	    "Allocation should follow first fit principle");
+
+#undef BIN_NREGS_READ
+#undef BIN_NFREE_READ
+#undef SIZE_READ
+#undef NREGS_READ
+#undef NFREE_READ
+#undef COUNTS
+#undef SLABCUR_READ
+
+	free(out_ref);
+	free(out);
+	free(p);
+}
+TEST_END
+
+TEST_BEGIN(test_utilization_batch_query) {
+	void *p = mallocx(1, 0);
+	void *q = mallocx(1, 0);
+	void *in[] = {p, q};
+	size_t in_sz = sizeof(const void *) * 2;
+	size_t out[] = {-1, -1, -1, -1, -1, -1};
+	size_t out_sz = sizeof(size_t) * 6;
+	size_t out_ref[] = {-1, -1, -1, -1, -1, -1};
+	size_t out_sz_ref = out_sz;
+
+	assert_ptr_not_null(p, "test pointer allocation failed");
+	assert_ptr_not_null(q, "test pointer allocation failed");
+
+	/* Test invalid argument(s) errors */
+#define TEST_UTIL_BATCH_EINVAL(a, b, c, d, why_inval)			\
+	TEST_UTIL_EINVAL("batch_query", a, b, c, d, why_inval)
+
+	TEST_UTIL_BATCH_EINVAL(NULL, &out_sz, in, in_sz, "old is NULL");
+	TEST_UTIL_BATCH_EINVAL(out, NULL, in, in_sz, "oldlenp is NULL");
+	TEST_UTIL_BATCH_EINVAL(out, &out_sz, NULL, in_sz, "newp is NULL");
+	TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, 0, "newlen is zero");
+	in_sz -= 1;
+	TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, in_sz,
+	    "newlen is not an exact multiple");
+	in_sz += 1;
+	out_sz_ref = out_sz -= 2 * sizeof(size_t);
+	TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, in_sz,
+	    "*oldlenp is not an exact multiple");
+	out_sz_ref = out_sz += 2 * sizeof(size_t);
+	in_sz -= sizeof(const void *);
+	TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, in_sz,
+	    "*oldlenp and newlen do not match");
+	in_sz += sizeof(const void *);
+
+#undef TEST_UTIL_BATCH_EINVAL
+
+	/* Examine output for valid calls */
+#define TEST_UTIL_BATCH_VALID TEST_UTIL_VALID("batch_query")
+#define TEST_EQUAL_REF(i, message) \
+	assert_d_eq(memcmp(out + (i) * 3, out_ref + (i) * 3, 3), 0, message)
+
+#define NFREE_READ(out, i) out[(i) * 3]
+#define NREGS_READ(out, i) out[(i) * 3 + 1]
+#define SIZE_READ(out, i) out[(i) * 3 + 2]
+
+	out_sz_ref = out_sz /= 2;
+	in_sz /= 2;
+	TEST_UTIL_BATCH_VALID;
+	assert_zu_le(NFREE_READ(out, 0), NREGS_READ(out, 0),
+	    "Extent free count exceeded region count");
+	assert_zu_le(NREGS_READ(out, 0), SIZE_READ(out, 0),
+	    "Extent region count exceeded size");
+	assert_zu_ne(NREGS_READ(out, 0), 0,
+	    "Extent region count must be positive");
+	assert_zu_ne(SIZE_READ(out, 0), 0, "Extent size must be positive");
+	TEST_EQUAL_REF(1, "Should not overwrite content beyond what's needed");
+	in_sz *= 2;
+	out_sz_ref = out_sz *= 2;
+
+	memcpy(out_ref, out, 3 * sizeof(size_t));
+	TEST_UTIL_BATCH_VALID;
+	TEST_EQUAL_REF(0, "Statistics should be stable across calls");
+	assert_zu_le(NFREE_READ(out, 1), NREGS_READ(out, 1),
+	    "Extent free count exceeded region count");
+	assert_zu_eq(NREGS_READ(out, 0), NREGS_READ(out, 1),
+	    "Extent region count should be same for same region size");
+	assert_zu_eq(SIZE_READ(out, 0), SIZE_READ(out, 1),
+	    "Extent size should be same for same region size");
+
+#undef SIZE_READ
+#undef NREGS_READ
+#undef NFREE_READ
+
+#undef TEST_EQUAL_REF
+#undef TEST_UTIL_BATCH_VALID
+
+	free(q);
+	free(p);
+}
+TEST_END
+
+#undef TEST_UTIL_VALID
+#undef TEST_UTIL_EINVAL
+
 int
 main(void) {
 	return test(
@@ -883,5 +1075,7 @@ main(void) {
 	    test_arenas_lookup,
 	    test_stats_arenas,
 	    test_hooks,
-	    test_hooks_exhaustion);
+	    test_hooks_exhaustion,
+	    test_utilization_query,
+	    test_utilization_batch_query);
 }

From 93084cdc8960935d0acc93424dddd3a79a86e2da Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 28 Mar 2019 20:42:40 -0700
Subject: [PATCH 1280/2608] Ensure page alignment on extent_alloc.

This is discovered and suggested by @jasone in #1468.  When custom extent hooks
are in use, we should ensure page alignment on the extent alloc path, instead of
relying on the user hooks to do so.
---
 src/extent.c      | 7 ++++---
 src/extent_dss.c  | 2 +-
 src/extent_mmap.c | 4 ++--
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index 814f0a32..66cbf05b 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1256,7 +1256,7 @@ extent_alloc_default(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
 	assert(arena != NULL);
 
 	return extent_alloc_default_impl(tsdn, arena, new_addr, size,
-	    alignment, zero, commit);
+	    ALIGNMENT_CEILING(alignment, PAGE), zero, commit);
 }
 
 static void
@@ -1493,14 +1493,15 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 		return NULL;
 	}
 	void *addr;
+	size_t palignment = ALIGNMENT_CEILING(alignment, PAGE);
 	if (*r_extent_hooks == &extent_hooks_default) {
 		/* Call directly to propagate tsdn. */
 		addr = extent_alloc_default_impl(tsdn, arena, new_addr, esize,
-		    alignment, zero, commit);
+		    palignment, zero, commit);
 	} else {
 		extent_hook_pre_reentrancy(tsdn, arena);
 		addr = (*r_extent_hooks)->alloc(*r_extent_hooks, new_addr,
-		    esize, alignment, zero, commit, arena_ind_get(arena));
+		    esize, palignment, zero, commit, arena_ind_get(arena));
 		extent_hook_post_reentrancy(tsdn);
 	}
 	if (addr == NULL) {
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 6c56cf65..69a7bee8 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -113,7 +113,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 
 	cassert(have_dss);
 	assert(size > 0);
-	assert(alignment > 0);
+	assert(alignment == ALIGNMENT_CEILING(alignment, PAGE));
 
 	/*
 	 * sbrk() uses a signed increment argument, so take care not to
diff --git a/src/extent_mmap.c b/src/extent_mmap.c
index 8d607dc8..17fd1c8f 100644
--- a/src/extent_mmap.c
+++ b/src/extent_mmap.c
@@ -21,8 +21,8 @@ bool	opt_retain =
 void *
 extent_alloc_mmap(void *new_addr, size_t size, size_t alignment, bool *zero,
     bool *commit) {
-	void *ret = pages_map(new_addr, size, ALIGNMENT_CEILING(alignment,
-	    PAGE), commit);
+	assert(alignment == ALIGNMENT_CEILING(alignment, PAGE));
+	void *ret = pages_map(new_addr, size, alignment, commit);
 	if (ret == NULL) {
 		return NULL;
 	}

From c2a3a7cd3f3cbc177d677101be85a31a39c26bd0 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 3 Apr 2019 16:19:00 -0700
Subject: [PATCH 1281/2608] Fix test/unit/prof_log

Compiler optimizations may produce traces more than expected.  Instead verify
the lower bound only.
---
 test/unit/prof_log.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/test/unit/prof_log.c b/test/unit/prof_log.c
index 6a3464b4..92fbd7ce 100644
--- a/test/unit/prof_log.c
+++ b/test/unit/prof_log.c
@@ -125,12 +125,14 @@ TEST_BEGIN(test_prof_log_many_traces) {
 		assert_rep();
 	}
 	/*
-	 * There should be 8 total backtraces: two for malloc/free in f1(),
-	 * two for malloc/free in f2(), two for malloc/free in f3(), and then
-	 * two for malloc/free in f1()'s call to f3().
+	 * There should be 8 total backtraces: two for malloc/free in f1(), two
+	 * for malloc/free in f2(), two for malloc/free in f3(), and then two
+	 * for malloc/free in f1()'s call to f3().  However compiler
+	 * optimizations such as loop unrolling might generate more call sites.
+	 * So >= 8 traces are expected.
 	 */
-	assert_zu_eq(prof_log_bt_count(), 8,
-	    "Wrong number of backtraces given sample workload");
+	assert_zu_ge(prof_log_bt_count(), 8,
+	    "Expect at least 8 backtraces given sample workload");
 	assert_d_eq(mallctl("prof.log_stop", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl failure when stopping logging");
 }

From d3d7a8ef09b6fa79109e8930aaba7a677f8b24ac Mon Sep 17 00:00:00 2001
From: mgrice <mgrice@fb.com>
Date: Fri, 8 Mar 2019 11:50:30 -0800
Subject: [PATCH 1282/2608] remove compare and branch in fast path for c++
 operator delete[]

Summary: sdallocx is checking a flag that will never be set (at least in the provided C++ destructor implementation).  This branch will probably only rarely be mispredicted however it removes two instructions in sdallocx and one at the callsite (to zero out flags).
---
 bin/jeprof.in                                      |  1 +
 .../jemalloc/internal/jemalloc_internal_externs.h  |  1 +
 src/jemalloc.c                                     | 14 +++++++++++++-
 src/jemalloc_cpp.cpp                               |  4 ++--
 4 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/bin/jeprof.in b/bin/jeprof.in
index 588c6b43..16a76c6c 100644
--- a/bin/jeprof.in
+++ b/bin/jeprof.in
@@ -2909,6 +2909,7 @@ sub RemoveUninterestingFrames {
                       '@JEMALLOC_PREFIX@xallocx',
                       '@JEMALLOC_PREFIX@dallocx',
                       '@JEMALLOC_PREFIX@sdallocx',
+                      '@JEMALLOC_PREFIX@sdallocx_noflags',
                       'tc_calloc',
                       'tc_cfree',
                       'tc_malloc',
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index b7843623..cdbc33a2 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -51,5 +51,6 @@ void jemalloc_prefork(void);
 void jemalloc_postfork_parent(void);
 void jemalloc_postfork_child(void);
 bool malloc_initialized(void);
+void je_sdallocx_noflags(void *ptr, size_t size);
 
 #endif /* JEMALLOC_INTERNAL_EXTERNS_H */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index c8afa9c4..7bc7b957 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2732,7 +2732,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 	tcache_t *tcache = tsd_tcachep_get(tsd);
 
 	alloc_ctx_t alloc_ctx;
-	/* 
+	/*
 	 * If !config_cache_oblivious, we can check PAGE alignment to
 	 * detect sampled objects.  Otherwise addresses are
 	 * randomized, and we have to look it up in the rtree anyway.
@@ -3522,6 +3522,18 @@ je_sdallocx(void *ptr, size_t size, int flags) {
 	LOG("core.sdallocx.exit", "");
 }
 
+void JEMALLOC_NOTHROW
+je_sdallocx_noflags(void *ptr, size_t size) {
+	LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: 0", ptr,
+		size);
+
+	if (!free_fastpath(ptr, size, true)) {
+		sdallocx_default(ptr, size, 0);
+	}
+
+	LOG("core.sdallocx.exit", "");
+}
+
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
 JEMALLOC_ATTR(pure)
 je_nallocx(size_t size, int flags) {
diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index f0ceddae..da0441a7 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -128,14 +128,14 @@ operator delete(void *ptr, std::size_t size) noexcept {
 	if (unlikely(ptr == nullptr)) {
 		return;
 	}
-	je_sdallocx(ptr, size, /*flags=*/0);
+	je_sdallocx_noflags(ptr, size);
 }
 
 void operator delete[](void *ptr, std::size_t size) noexcept {
 	if (unlikely(ptr == nullptr)) {
 		return;
 	}
-	je_sdallocx(ptr, size, /*flags=*/0);
+	je_sdallocx_noflags(ptr, size);
 }
 
 #endif  // __cpp_sized_deallocation

From 7ee3897740aabdccb2381b7b6ab68fff0aac3ec4 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 9 Apr 2019 11:01:26 -0700
Subject: [PATCH 1283/2608] Separate tests for extent utilization API

As title.
---
 Makefile.in             |   1 +
 src/ctl.c               |   4 +-
 test/unit/extent_util.c | 190 ++++++++++++++++++++++++++++++++++++++
 test/unit/mallctl.c     | 196 +---------------------------------------
 4 files changed, 194 insertions(+), 197 deletions(-)
 create mode 100644 test/unit/extent_util.c

diff --git a/Makefile.in b/Makefile.in
index 0777f6a8..3a09442c 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -178,6 +178,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/div.c \
 	$(srcroot)test/unit/emitter.c \
 	$(srcroot)test/unit/extent_quantize.c \
+	$(srcroot)test/unit/extent_util.c \
 	$(srcroot)test/unit/fork.c \
 	$(srcroot)test/unit/hash.c \
 	$(srcroot)test/unit/hook.c \
diff --git a/src/ctl.c b/src/ctl.c
index dd7e4672..193d2b00 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3131,7 +3131,7 @@ label_return:
  * #define BIN_NREGS_READ(out) COUNTS(out)[4]
  *
  * and then write e.g. NFREE_READ(oldp) to fetch the output.  See the unit test
- * test_utilization_query in test/unit/mallctl.c for an example.
+ * test_query in test/unit/extent_util.c for an example.
  *
  * For a typical defragmentation workflow making use of this API for
  * understanding the fragmentation level, please refer to the comment for
@@ -3223,7 +3223,7 @@ label_return:
  * #define SIZE_READ(out, i) out[(i) * 3 + 2]
  *
  * and then write e.g. NFREE_READ(oldp, i) to fetch the output.  See the unit
- * test test_utilization_batch in test/unit/mallctl.c for a concrete example.
+ * test test_batch in test/unit/extent_util.c for a concrete example.
  *
  * A typical workflow would be composed of the following steps:
  *
diff --git a/test/unit/extent_util.c b/test/unit/extent_util.c
new file mode 100644
index 00000000..6995325f
--- /dev/null
+++ b/test/unit/extent_util.c
@@ -0,0 +1,190 @@
+#include "test/jemalloc_test.h"
+
+#define TEST_UTIL_EINVAL(node, a, b, c, d, why_inval) do {		\
+	assert_d_eq(mallctl("experimental.utilization." node,		\
+	    a, b, c, d), EINVAL, "Should fail when " why_inval);	\
+	assert_zu_eq(out_sz, out_sz_ref,				\
+	    "Output size touched when given invalid arguments");	\
+	assert_d_eq(memcmp(out, out_ref, out_sz_ref), 0,		\
+	    "Output content touched when given invalid arguments");	\
+} while (0)
+
+#define TEST_UTIL_QUERY_EINVAL(a, b, c, d, why_inval)			\
+	TEST_UTIL_EINVAL("query", a, b, c, d, why_inval)
+#define TEST_UTIL_BATCH_EINVAL(a, b, c, d, why_inval)			\
+	TEST_UTIL_EINVAL("batch_query", a, b, c, d, why_inval)
+
+#define TEST_UTIL_VALID(node) do {					\
+        assert_d_eq(mallctl("experimental.utilization." node,		\
+	    out, &out_sz, in, in_sz), 0,				\
+	    "Should return 0 on correct arguments");			\
+        assert_zu_eq(out_sz, out_sz_ref, "incorrect output size");	\
+	assert_d_ne(memcmp(out, out_ref, out_sz_ref), 0,		\
+	    "Output content should be changed");			\
+} while (0)
+
+#define TEST_UTIL_BATCH_VALID TEST_UTIL_VALID("batch_query")
+
+TEST_BEGIN(test_query) {
+	void *p = mallocx(1, 0);
+	void **in = &p;
+	size_t in_sz = sizeof(const void *);
+	size_t out_sz = sizeof(void *) + sizeof(size_t) * 5;
+	void *out = mallocx(out_sz, 0);
+	void *out_ref = mallocx(out_sz, 0);
+	size_t out_sz_ref = out_sz;
+
+	assert_ptr_not_null(p, "test pointer allocation failed");
+	assert_ptr_not_null(out, "test output allocation failed");
+	assert_ptr_not_null(out_ref, "test reference output allocation failed");
+
+#define SLABCUR_READ(out) (*(void **)out)
+#define COUNTS(out) ((size_t *)((void **)out + 1))
+#define NFREE_READ(out) COUNTS(out)[0]
+#define NREGS_READ(out) COUNTS(out)[1]
+#define SIZE_READ(out) COUNTS(out)[2]
+#define BIN_NFREE_READ(out) COUNTS(out)[3]
+#define BIN_NREGS_READ(out) COUNTS(out)[4]
+
+	SLABCUR_READ(out) = NULL;
+	NFREE_READ(out) = NREGS_READ(out) = SIZE_READ(out) = -1;
+	BIN_NFREE_READ(out) = BIN_NREGS_READ(out) = -1;
+	memcpy(out_ref, out, out_sz);
+
+	/* Test invalid argument(s) errors */
+	TEST_UTIL_QUERY_EINVAL(NULL, &out_sz, in, in_sz, "old is NULL");
+	TEST_UTIL_QUERY_EINVAL(out, NULL, in, in_sz, "oldlenp is NULL");
+	TEST_UTIL_QUERY_EINVAL(out, &out_sz, NULL, in_sz, "newp is NULL");
+	TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, 0, "newlen is zero");
+	in_sz -= 1;
+	TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, in_sz, "invalid newlen");
+	in_sz += 1;
+	out_sz_ref = out_sz -= 2 * sizeof(size_t);
+	TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, in_sz, "invalid *oldlenp");
+	out_sz_ref = out_sz += 2 * sizeof(size_t);
+
+	/* Examine output for valid call */
+	TEST_UTIL_VALID("query");
+	assert_zu_le(NFREE_READ(out), NREGS_READ(out),
+	    "Extent free count exceeded region count");
+	assert_zu_le(NREGS_READ(out), SIZE_READ(out),
+	    "Extent region count exceeded size");
+	assert_zu_ne(NREGS_READ(out), 0,
+	    "Extent region count must be positive");
+	assert_zu_ne(SIZE_READ(out), 0, "Extent size must be positive");
+	if (config_stats) {
+		assert_zu_le(BIN_NFREE_READ(out), BIN_NREGS_READ(out),
+		    "Bin free count exceeded region count");
+		assert_zu_ne(BIN_NREGS_READ(out), 0,
+		    "Bin region count must be positive");
+		assert_zu_le(NFREE_READ(out), BIN_NFREE_READ(out),
+		    "Extent free count exceeded bin free count");
+		assert_zu_le(NREGS_READ(out), BIN_NREGS_READ(out),
+		    "Extent region count exceeded bin region count");
+		assert_zu_eq(BIN_NREGS_READ(out) % NREGS_READ(out), 0,
+		    "Bin region count isn't a multiple of extent region count");
+		assert_zu_le(NREGS_READ(out) - NFREE_READ(out),
+		    BIN_NREGS_READ(out) - BIN_NFREE_READ(out),
+		    "Extent utilized count exceeded bin utilized count");
+	} else {
+		assert_zu_eq(BIN_NFREE_READ(out), 0,
+		    "Bin free count should be zero when stats are disabled");
+		assert_zu_eq(BIN_NREGS_READ(out), 0,
+		    "Bin region count should be zero when stats are disabled");
+	}
+	assert_ptr_not_null(SLABCUR_READ(out), "Current slab is null");
+	assert_true(NFREE_READ(out) == 0 || SLABCUR_READ(out) <= p,
+	    "Allocation should follow first fit principle");
+
+#undef BIN_NREGS_READ
+#undef BIN_NFREE_READ
+#undef SIZE_READ
+#undef NREGS_READ
+#undef NFREE_READ
+#undef COUNTS
+#undef SLABCUR_READ
+
+	free(out_ref);
+	free(out);
+	free(p);
+}
+TEST_END
+
+TEST_BEGIN(test_batch) {
+	void *p = mallocx(1, 0);
+	void *q = mallocx(1, 0);
+	void *in[] = {p, q};
+	size_t in_sz = sizeof(const void *) * 2;
+	size_t out[] = {-1, -1, -1, -1, -1, -1};
+	size_t out_sz = sizeof(size_t) * 6;
+	size_t out_ref[] = {-1, -1, -1, -1, -1, -1};
+	size_t out_sz_ref = out_sz;
+
+	assert_ptr_not_null(p, "test pointer allocation failed");
+	assert_ptr_not_null(q, "test pointer allocation failed");
+
+	/* Test invalid argument(s) errors */
+	TEST_UTIL_BATCH_EINVAL(NULL, &out_sz, in, in_sz, "old is NULL");
+	TEST_UTIL_BATCH_EINVAL(out, NULL, in, in_sz, "oldlenp is NULL");
+	TEST_UTIL_BATCH_EINVAL(out, &out_sz, NULL, in_sz, "newp is NULL");
+	TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, 0, "newlen is zero");
+	in_sz -= 1;
+	TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, in_sz,
+	    "newlen is not an exact multiple");
+	in_sz += 1;
+	out_sz_ref = out_sz -= 2 * sizeof(size_t);
+	TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, in_sz,
+	    "*oldlenp is not an exact multiple");
+	out_sz_ref = out_sz += 2 * sizeof(size_t);
+	in_sz -= sizeof(const void *);
+	TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, in_sz,
+	    "*oldlenp and newlen do not match");
+	in_sz += sizeof(const void *);
+
+	/* Examine output for valid calls */
+#define TEST_EQUAL_REF(i, message) \
+	assert_d_eq(memcmp(out + (i) * 3, out_ref + (i) * 3, 3), 0, message)
+
+#define NFREE_READ(out, i) out[(i) * 3]
+#define NREGS_READ(out, i) out[(i) * 3 + 1]
+#define SIZE_READ(out, i) out[(i) * 3 + 2]
+
+	out_sz_ref = out_sz /= 2;
+	in_sz /= 2;
+	TEST_UTIL_BATCH_VALID;
+	assert_zu_le(NFREE_READ(out, 0), NREGS_READ(out, 0),
+	    "Extent free count exceeded region count");
+	assert_zu_le(NREGS_READ(out, 0), SIZE_READ(out, 0),
+	    "Extent region count exceeded size");
+	assert_zu_ne(NREGS_READ(out, 0), 0,
+	    "Extent region count must be positive");
+	assert_zu_ne(SIZE_READ(out, 0), 0, "Extent size must be positive");
+	TEST_EQUAL_REF(1, "Should not overwrite content beyond what's needed");
+	in_sz *= 2;
+	out_sz_ref = out_sz *= 2;
+
+	memcpy(out_ref, out, 3 * sizeof(size_t));
+	TEST_UTIL_BATCH_VALID;
+	TEST_EQUAL_REF(0, "Statistics should be stable across calls");
+	assert_zu_le(NFREE_READ(out, 1), NREGS_READ(out, 1),
+	    "Extent free count exceeded region count");
+	assert_zu_eq(NREGS_READ(out, 0), NREGS_READ(out, 1),
+	    "Extent region count should be same for same region size");
+	assert_zu_eq(SIZE_READ(out, 0), SIZE_READ(out, 1),
+	    "Extent size should be same for same region size");
+
+#undef SIZE_READ
+#undef NREGS_READ
+#undef NFREE_READ
+
+#undef TEST_EQUAL_REF
+
+	free(q);
+	free(p);
+}
+TEST_END
+
+int
+main(void) {
+	return test(test_query, test_batch);
+}
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index ef00a3df..498f9e06 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -853,198 +853,6 @@ TEST_BEGIN(test_hooks_exhaustion) {
 }
 TEST_END
 
-#define TEST_UTIL_EINVAL(node, a, b, c, d, why_inval) do {		\
-	assert_d_eq(mallctl("experimental.utilization." node,		\
-	    a, b, c, d), EINVAL, "Should fail when " why_inval);	\
-	assert_zu_eq(out_sz, out_sz_ref,				\
-	    "Output size touched when given invalid arguments");	\
-	assert_d_eq(memcmp(out, out_ref, out_sz_ref), 0,		\
-	    "Output content touched when given invalid arguments");	\
-} while (0)
-
-#define TEST_UTIL_VALID(node) do {					\
-        assert_d_eq(mallctl("experimental.utilization." node,		\
-	    out, &out_sz, in, in_sz), 0,				\
-	    "Should return 0 on correct arguments");			\
-        assert_zu_eq(out_sz, out_sz_ref, "incorrect output size");	\
-	assert_d_ne(memcmp(out, out_ref, out_sz_ref), 0,		\
-	    "Output content should be changed");			\
-} while (0)
-
-TEST_BEGIN(test_utilization_query) {
-	void *p = mallocx(1, 0);
-	void **in = &p;
-	size_t in_sz = sizeof(const void *);
-	size_t out_sz = sizeof(void *) + sizeof(size_t) * 5;
-	void *out = mallocx(out_sz, 0);
-	void *out_ref = mallocx(out_sz, 0);
-	size_t out_sz_ref = out_sz;
-
-	assert_ptr_not_null(p, "test pointer allocation failed");
-	assert_ptr_not_null(out, "test output allocation failed");
-	assert_ptr_not_null(out_ref, "test reference output allocation failed");
-
-#define SLABCUR_READ(out) (*(void **)out)
-#define COUNTS(out) ((size_t *)((void **)out + 1))
-#define NFREE_READ(out) COUNTS(out)[0]
-#define NREGS_READ(out) COUNTS(out)[1]
-#define SIZE_READ(out) COUNTS(out)[2]
-#define BIN_NFREE_READ(out) COUNTS(out)[3]
-#define BIN_NREGS_READ(out) COUNTS(out)[4]
-
-	SLABCUR_READ(out) = NULL;
-	NFREE_READ(out) = NREGS_READ(out) = SIZE_READ(out) = -1;
-	BIN_NFREE_READ(out) = BIN_NREGS_READ(out) = -1;
-	memcpy(out_ref, out, out_sz);
-
-	/* Test invalid argument(s) errors */
-#define TEST_UTIL_QUERY_EINVAL(a, b, c, d, why_inval) \
-	TEST_UTIL_EINVAL("query", a, b, c, d, why_inval)
-
-	TEST_UTIL_QUERY_EINVAL(NULL, &out_sz, in, in_sz, "old is NULL");
-	TEST_UTIL_QUERY_EINVAL(out, NULL, in, in_sz, "oldlenp is NULL");
-	TEST_UTIL_QUERY_EINVAL(out, &out_sz, NULL, in_sz, "newp is NULL");
-	TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, 0, "newlen is zero");
-	in_sz -= 1;
-	TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, in_sz, "invalid newlen");
-	in_sz += 1;
-	out_sz_ref = out_sz -= 2 * sizeof(size_t);
-	TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, in_sz, "invalid *oldlenp");
-	out_sz_ref = out_sz += 2 * sizeof(size_t);
-
-#undef TEST_UTIL_QUERY_EINVAL
-
-	/* Examine output for valid call */
-	TEST_UTIL_VALID("query");
-	assert_zu_le(NFREE_READ(out), NREGS_READ(out),
-	    "Extent free count exceeded region count");
-	assert_zu_le(NREGS_READ(out), SIZE_READ(out),
-	    "Extent region count exceeded size");
-	assert_zu_ne(NREGS_READ(out), 0,
-	    "Extent region count must be positive");
-	assert_zu_ne(SIZE_READ(out), 0, "Extent size must be positive");
-	if (config_stats) {
-		assert_zu_le(BIN_NFREE_READ(out), BIN_NREGS_READ(out),
-		    "Bin free count exceeded region count");
-		assert_zu_ne(BIN_NREGS_READ(out), 0,
-		    "Bin region count must be positive");
-		assert_zu_le(NFREE_READ(out), BIN_NFREE_READ(out),
-		    "Extent free count exceeded bin free count");
-		assert_zu_le(NREGS_READ(out), BIN_NREGS_READ(out),
-		    "Extent region count exceeded bin region count");
-		assert_zu_eq(BIN_NREGS_READ(out) % NREGS_READ(out), 0,
-		    "Bin region count isn't a multiple of extent region count");
-		assert_zu_le(NREGS_READ(out) - NFREE_READ(out),
-		    BIN_NREGS_READ(out) - BIN_NFREE_READ(out),
-		    "Extent utilized count exceeded bin utilized count");
-	} else {
-		assert_zu_eq(BIN_NFREE_READ(out), 0,
-		    "Bin free count should be zero when stats are disabled");
-		assert_zu_eq(BIN_NREGS_READ(out), 0,
-		    "Bin region count should be zero when stats are disabled");
-	}
-	assert_ptr_not_null(SLABCUR_READ(out), "Current slab is null");
-	assert_true(NFREE_READ(out) == 0 || SLABCUR_READ(out) <= p,
-	    "Allocation should follow first fit principle");
-
-#undef BIN_NREGS_READ
-#undef BIN_NFREE_READ
-#undef SIZE_READ
-#undef NREGS_READ
-#undef NFREE_READ
-#undef COUNTS
-#undef SLABCUR_READ
-
-	free(out_ref);
-	free(out);
-	free(p);
-}
-TEST_END
-
-TEST_BEGIN(test_utilization_batch_query) {
-	void *p = mallocx(1, 0);
-	void *q = mallocx(1, 0);
-	void *in[] = {p, q};
-	size_t in_sz = sizeof(const void *) * 2;
-	size_t out[] = {-1, -1, -1, -1, -1, -1};
-	size_t out_sz = sizeof(size_t) * 6;
-	size_t out_ref[] = {-1, -1, -1, -1, -1, -1};
-	size_t out_sz_ref = out_sz;
-
-	assert_ptr_not_null(p, "test pointer allocation failed");
-	assert_ptr_not_null(q, "test pointer allocation failed");
-
-	/* Test invalid argument(s) errors */
-#define TEST_UTIL_BATCH_EINVAL(a, b, c, d, why_inval)			\
-	TEST_UTIL_EINVAL("batch_query", a, b, c, d, why_inval)
-
-	TEST_UTIL_BATCH_EINVAL(NULL, &out_sz, in, in_sz, "old is NULL");
-	TEST_UTIL_BATCH_EINVAL(out, NULL, in, in_sz, "oldlenp is NULL");
-	TEST_UTIL_BATCH_EINVAL(out, &out_sz, NULL, in_sz, "newp is NULL");
-	TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, 0, "newlen is zero");
-	in_sz -= 1;
-	TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, in_sz,
-	    "newlen is not an exact multiple");
-	in_sz += 1;
-	out_sz_ref = out_sz -= 2 * sizeof(size_t);
-	TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, in_sz,
-	    "*oldlenp is not an exact multiple");
-	out_sz_ref = out_sz += 2 * sizeof(size_t);
-	in_sz -= sizeof(const void *);
-	TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, in_sz,
-	    "*oldlenp and newlen do not match");
-	in_sz += sizeof(const void *);
-
-#undef TEST_UTIL_BATCH_EINVAL
-
-	/* Examine output for valid calls */
-#define TEST_UTIL_BATCH_VALID TEST_UTIL_VALID("batch_query")
-#define TEST_EQUAL_REF(i, message) \
-	assert_d_eq(memcmp(out + (i) * 3, out_ref + (i) * 3, 3), 0, message)
-
-#define NFREE_READ(out, i) out[(i) * 3]
-#define NREGS_READ(out, i) out[(i) * 3 + 1]
-#define SIZE_READ(out, i) out[(i) * 3 + 2]
-
-	out_sz_ref = out_sz /= 2;
-	in_sz /= 2;
-	TEST_UTIL_BATCH_VALID;
-	assert_zu_le(NFREE_READ(out, 0), NREGS_READ(out, 0),
-	    "Extent free count exceeded region count");
-	assert_zu_le(NREGS_READ(out, 0), SIZE_READ(out, 0),
-	    "Extent region count exceeded size");
-	assert_zu_ne(NREGS_READ(out, 0), 0,
-	    "Extent region count must be positive");
-	assert_zu_ne(SIZE_READ(out, 0), 0, "Extent size must be positive");
-	TEST_EQUAL_REF(1, "Should not overwrite content beyond what's needed");
-	in_sz *= 2;
-	out_sz_ref = out_sz *= 2;
-
-	memcpy(out_ref, out, 3 * sizeof(size_t));
-	TEST_UTIL_BATCH_VALID;
-	TEST_EQUAL_REF(0, "Statistics should be stable across calls");
-	assert_zu_le(NFREE_READ(out, 1), NREGS_READ(out, 1),
-	    "Extent free count exceeded region count");
-	assert_zu_eq(NREGS_READ(out, 0), NREGS_READ(out, 1),
-	    "Extent region count should be same for same region size");
-	assert_zu_eq(SIZE_READ(out, 0), SIZE_READ(out, 1),
-	    "Extent size should be same for same region size");
-
-#undef SIZE_READ
-#undef NREGS_READ
-#undef NFREE_READ
-
-#undef TEST_EQUAL_REF
-#undef TEST_UTIL_BATCH_VALID
-
-	free(q);
-	free(p);
-}
-TEST_END
-
-#undef TEST_UTIL_VALID
-#undef TEST_UTIL_EINVAL
-
 int
 main(void) {
 	return test(
@@ -1075,7 +883,5 @@ main(void) {
 	    test_arenas_lookup,
 	    test_stats_arenas,
 	    test_hooks,
-	    test_hooks_exhaustion,
-	    test_utilization_query,
-	    test_utilization_batch_query);
+	    test_hooks_exhaustion);
 }

From 020b5dc7ac5138a347e5462508b2b5e4ecd6bc52 Mon Sep 17 00:00:00 2001
From: zoulasc <christos@zoulas.com>
Date: Fri, 15 Mar 2019 12:56:03 -0400
Subject: [PATCH 1284/2608] Convert the format generator function to an
 annotated format function, so that the generated formats can be checked by
 the compiler.

---
 include/jemalloc/internal/emitter.h | 33 ++++++++++++++++-------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/emitter.h b/include/jemalloc/internal/emitter.h
index 0a8bc2c0..981dbe0c 100644
--- a/include/jemalloc/internal/emitter.h
+++ b/include/jemalloc/internal/emitter.h
@@ -86,10 +86,11 @@ emitter_printf(emitter_t *emitter, const char *format, ...) {
 	va_end(ap);
 }
 
-static inline void
+static inline const char * __attribute__((__format_arg__(3)))
 emitter_gen_fmt(char *out_fmt, size_t out_size, const char *fmt_specifier,
     emitter_justify_t justify, int width) {
 	size_t written;
+	fmt_specifier++;
 	if (justify == emitter_justify_none) {
 		written = malloc_snprintf(out_fmt, out_size,
 		    "%%%s", fmt_specifier);
@@ -102,6 +103,7 @@ emitter_gen_fmt(char *out_fmt, size_t out_size, const char *fmt_specifier,
 	}
 	/* Only happens in case of bad format string, which *we* choose. */
 	assert(written <  out_size);
+	return out_fmt;
 }
 
 /*
@@ -127,26 +129,27 @@ emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width,
 	char buf[BUF_SIZE];
 
 #define EMIT_SIMPLE(type, format)					\
-	emitter_gen_fmt(fmt, FMT_SIZE, format, justify, width);		\
-	emitter_printf(emitter, fmt, *(const type *)value);		\
+	emitter_printf(emitter,						\
+	    emitter_gen_fmt(fmt, FMT_SIZE, format, justify, width),	\
+	    *(const type *)value);
 
 	switch (value_type) {
 	case emitter_type_bool:
-		emitter_gen_fmt(fmt, FMT_SIZE, "s", justify, width);
-		emitter_printf(emitter, fmt, *(const bool *)value ?
-		    "true" : "false");
+		emitter_printf(emitter, 
+		    emitter_gen_fmt(fmt, FMT_SIZE, "%s", justify, width),
+		    *(const bool *)value ?  "true" : "false");
 		break;
 	case emitter_type_int:
-		EMIT_SIMPLE(int, "d")
+		EMIT_SIMPLE(int, "%d")
 		break;
 	case emitter_type_unsigned:
-		EMIT_SIMPLE(unsigned, "u")
+		EMIT_SIMPLE(unsigned, "%u")
 		break;
 	case emitter_type_ssize:
-		EMIT_SIMPLE(ssize_t, "zd")
+		EMIT_SIMPLE(ssize_t, "%zd")
 		break;
 	case emitter_type_size:
-		EMIT_SIMPLE(size_t, "zu")
+		EMIT_SIMPLE(size_t, "%zu")
 		break;
 	case emitter_type_string:
 		str_written = malloc_snprintf(buf, BUF_SIZE, "\"%s\"",
@@ -156,17 +159,17 @@ emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width,
 		 * anywhere near the fmt size.
 		 */
 		assert(str_written < BUF_SIZE);
-		emitter_gen_fmt(fmt, FMT_SIZE, "s", justify, width);
-		emitter_printf(emitter, fmt, buf);
+		emitter_printf(emitter, 
+		    emitter_gen_fmt(fmt, FMT_SIZE, "%s", justify, width), buf);
 		break;
 	case emitter_type_uint32:
-		EMIT_SIMPLE(uint32_t, FMTu32)
+		EMIT_SIMPLE(uint32_t, "%" FMTu32)
 		break;
 	case emitter_type_uint64:
-		EMIT_SIMPLE(uint64_t, FMTu64)
+		EMIT_SIMPLE(uint64_t, "%" FMTu64)
 		break;
 	case emitter_type_title:
-		EMIT_SIMPLE(char *const, "s");
+		EMIT_SIMPLE(char *const, "%s");
 		break;
 	default:
 		unreachable();

From 14e4176758379875c4ef486d6c57327ed07edd86 Mon Sep 17 00:00:00 2001
From: zoulasc <christos@zoulas.com>
Date: Fri, 15 Mar 2019 12:59:56 -0400
Subject: [PATCH 1285/2608] Fix incorrect macro use. Compiling with warnings
 produces missing prototype warnings.

---
 include/jemalloc/internal/extent_externs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index 5d53aad1..8aba5763 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -24,7 +24,7 @@ size_t extent_size_quantize_floor(size_t size);
 size_t extent_size_quantize_ceil(size_t size);
 #endif
 
-rb_proto(, extent_avail_, extent_tree_t, extent_t)
+ph_proto(, extent_avail_, extent_tree_t, extent_t)
 ph_proto(, extent_heap_, extent_heap_t, extent_t)
 
 bool extents_init(tsdn_t *tsdn, extents_t *extents, extent_state_t state,

From 7f7935cf7805036d42fb510592ab8b40bcfb0690 Mon Sep 17 00:00:00 2001
From: zoulasc <christos@zoulas.com>
Date: Fri, 15 Mar 2019 20:19:16 -0400
Subject: [PATCH 1286/2608] Add an autoconf feature test for format_arg and a
 jemalloc-specific macro for it.

---
 configure.ac                          | 12 ++++++++++++
 include/jemalloc/internal/emitter.h   |  2 +-
 include/jemalloc/jemalloc_defs.h.in   |  3 +++
 include/jemalloc/jemalloc_macros.h.in |  6 ++++++
 4 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 96f76d35..9cc2a6b5 100644
--- a/configure.ac
+++ b/configure.ac
@@ -851,6 +851,18 @@ if test "x${je_cv_format_printf}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_ATTR_FORMAT_PRINTF], [ ])
 fi
 
+dnl Check for format_arg(...) attribute support.
+JE_CFLAGS_SAVE()
+JE_CFLAGS_ADD([-Werror])
+JE_CFLAGS_ADD([-herror_on_warning])
+JE_COMPILABLE([format(printf, ...) attribute], [#include <stdlib.h>],
+              [const char * __attribute__((__format_arg__(1))) foo(const char *format);],
+              [je_cv_format_arg])
+JE_CFLAGS_RESTORE()
+if test "x${je_cv_format_arg}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_ATTR_FORMAT_ARG], [ ])
+fi
+
 dnl Support optional additions to rpath.
 AC_ARG_WITH([rpath],
   [AS_HELP_STRING([--with-rpath=<rpath>], [Colon-separated rpath (ELF systems only)])],
diff --git a/include/jemalloc/internal/emitter.h b/include/jemalloc/internal/emitter.h
index 981dbe0c..542bc79c 100644
--- a/include/jemalloc/internal/emitter.h
+++ b/include/jemalloc/internal/emitter.h
@@ -86,7 +86,7 @@ emitter_printf(emitter_t *emitter, const char *format, ...) {
 	va_end(ap);
 }
 
-static inline const char * __attribute__((__format_arg__(3)))
+static inline const char * JEMALLOC_FORMAT_ARG(3)
 emitter_gen_fmt(char *out_fmt, size_t out_size, const char *fmt_specifier,
     emitter_justify_t justify, int width) {
 	size_t written;
diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in
index 6d89435c..11c39181 100644
--- a/include/jemalloc/jemalloc_defs.h.in
+++ b/include/jemalloc/jemalloc_defs.h.in
@@ -4,6 +4,9 @@
 /* Defined if alloc_size attribute is supported. */
 #undef JEMALLOC_HAVE_ATTR_ALLOC_SIZE
 
+/* Defined if format_arg(...) attribute is supported. */
+#undef JEMALLOC_HAVE_ATTR_FORMAT_ARG
+
 /* Defined if format(gnu_printf, ...) attribute is supported. */
 #undef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF
 
diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in
index a00ce11a..59e29558 100644
--- a/include/jemalloc/jemalloc_macros.h.in
+++ b/include/jemalloc/jemalloc_macros.h.in
@@ -69,6 +69,7 @@
 #      define JEMALLOC_EXPORT __declspec(dllimport)
 #    endif
 #  endif
+#  define JEMALLOC_FORMAT_ARG(i)
 #  define JEMALLOC_FORMAT_PRINTF(s, i)
 #  define JEMALLOC_NOINLINE __declspec(noinline)
 #  ifdef __cplusplus
@@ -96,6 +97,11 @@
 #  ifndef JEMALLOC_EXPORT
 #    define JEMALLOC_EXPORT JEMALLOC_ATTR(visibility("default"))
 #  endif
+#  ifdef JEMALLOC_HAVE_ATTR_FORMAT_ARG
+#    define JEMALLOC_FORMAT_ARG(i) JEMALLOC_ATTR(__format_arg__(3))
+#  else
+#    define JEMALLOC_FORMAT_ARG(i)
+#  endif
 #  ifdef JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF
 #    define JEMALLOC_FORMAT_PRINTF(s, i) JEMALLOC_ATTR(format(gnu_printf, s, i))
 #  elif defined(JEMALLOC_HAVE_ATTR_FORMAT_PRINTF)

From f4d24f05e1f270c43bc4129c0d18d673b8ac85b8 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 19 Mar 2019 16:04:35 -0700
Subject: [PATCH 1287/2608] Move extra size checks behind a config flag.

This will let us turn that flag into a generic "turn on runtime checks" flag
that guards other functionality we have planned.
---
 configure.ac                                  | 22 +++++++++----------
 .../internal/jemalloc_internal_defs.h.in      |  4 ++--
 .../jemalloc/internal/jemalloc_preamble.h.in  | 19 ++++++++++++++++
 src/tcache.c                                  | 17 +++++++-------
 4 files changed, 40 insertions(+), 22 deletions(-)

diff --git a/configure.ac b/configure.ac
index 9cc2a6b5..7a83a1a4 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1418,22 +1418,22 @@ if test "x$enable_readlinkat" = "x1" ; then
 fi
 AC_SUBST([enable_readlinkat])
 
-dnl Avoid the extra size checking by default
-AC_ARG_ENABLE([extra-size-check],
-  [AS_HELP_STRING([--enable-extra-size-check],
-  [Perform additonal size related sanity checks])],
-[if test "x$enable_extra_size_check" = "xno" ; then
-  enable_extra_size_check="0"
+dnl Avoid extra safety checks by default
+AC_ARG_ENABLE([opt-safety-checks],
+  [AS_HELP_STRING([--enable-opt-safety-checks],
+  [Perform certain low-overhead checks, even in opt mode])],
+[if test "x$enable_opt_safety_checks" = "xno" ; then
+  enable_opt_safety_checks="0"
 else
-  enable_extra_size_check="1"
+  enable_opt_safety_checks="1"
 fi
 ],
-[enable_extra_size_check="0"]
+[enable_opt_safety_checks="0"]
 )
-if test "x$enable_extra_size_check" = "x1" ; then
-  AC_DEFINE([JEMALLOC_EXTRA_SIZE_CHECK], [ ])
+if test "x$enable_opt_safety_checks" = "x1" ; then
+  AC_DEFINE([JEMALLOC_OPT_SAFETY_CHECKS], [ ])
 fi
-AC_SUBST([enable_extra_size_check])
+AC_SUBST([enable_opt_safety_checks])
 
 JE_COMPILABLE([a program using __builtin_unreachable], [
 void foo (void) {
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 21b65147..c442a219 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -360,7 +360,7 @@
  */
 #undef JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE
 
-/* Performs additional size-matching sanity checks when defined. */
-#undef JEMALLOC_EXTRA_SIZE_CHECK
+/* Performs additional safety checks when defined. */
+#undef JEMALLOC_OPT_SAFETY_CHECKS
 
 #endif /* JEMALLOC_INTERNAL_DEFS_H_ */
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index 4bfdb32c..9fd2a7f6 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -161,6 +161,25 @@ static const bool config_log =
     false
 #endif
     ;
+/*
+ * Are extra safety checks enabled; things like checking the size of sized
+ * deallocations, double-frees, etc.
+ */
+static const bool config_opt_safety_checks =
+#if defined(JEMALLOC_EXTRA_SAFETY_CHECKS)
+    true
+#elif defined(JEMALLOC_DEBUG)
+    /*
+     * This lets us only guard safety checks by one flag instead of two; fast
+     * checks can guard solely by config_opt_safety_checks and run in debug mode
+     * too.
+     */
+    true
+#else
+    false
+#endif
+    ;
+
 #if defined(_WIN32) || defined(JEMALLOC_HAVE_SCHED_GETCPU)
 /* Currently percpu_arena depends on sched_getcpu. */
 #define JEMALLOC_PERCPU_ARENA
diff --git a/src/tcache.c b/src/tcache.c
index e7b970d9..160b0b77 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -101,7 +101,6 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 }
 
 /* Enabled with --enable-extra-size-check. */
-#ifdef JEMALLOC_EXTRA_SIZE_CHECK
 static void
 tbin_extents_lookup_size_check(tsdn_t *tsdn, cache_bin_t *tbin, szind_t binind,
     size_t nflush, extent_t **extents){
@@ -129,7 +128,6 @@ tbin_extents_lookup_size_check(tsdn_t *tsdn, cache_bin_t *tbin, szind_t binind,
 		abort();
 	}
 }
-#endif
 
 void
 tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
@@ -144,15 +142,16 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	unsigned nflush = tbin->ncached - rem;
 	VARIABLE_ARRAY(extent_t *, item_extent, nflush);
 
-#ifndef JEMALLOC_EXTRA_SIZE_CHECK
 	/* Look up extent once per item. */
-	for (unsigned i = 0 ; i < nflush; i++) {
-		item_extent[i] = iealloc(tsd_tsdn(tsd), *(tbin->avail - 1 - i));
+	if (config_opt_safety_checks) {
+		tbin_extents_lookup_size_check(tsd_tsdn(tsd), tbin, binind,
+		    nflush, item_extent);
+	} else {
+		for (unsigned i = 0 ; i < nflush; i++) {
+			item_extent[i] = iealloc(tsd_tsdn(tsd),
+			    *(tbin->avail - 1 - i));
+		}
 	}
-#else
-	tbin_extents_lookup_size_check(tsd_tsdn(tsd), tbin, binind, nflush,
-	    item_extent);
-#endif
 	while (nflush > 0) {
 		/* Lock the arena bin associated with the first object. */
 		extent_t *extent = item_extent[0];

From f95a88fcd92e8ead1a6c5c8b2ca8c401c6eba162 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 22 Mar 2019 17:13:45 -0700
Subject: [PATCH 1288/2608] Safety checks: Expose config value via mallctl and
 stats.

---
 src/ctl.c   | 3 +++
 src/stats.c | 1 +
 2 files changed, 4 insertions(+)

diff --git a/src/ctl.c b/src/ctl.c
index 193d2b00..c113bf24 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -72,6 +72,7 @@ CTL_PROTO(config_debug)
 CTL_PROTO(config_fill)
 CTL_PROTO(config_lazy_lock)
 CTL_PROTO(config_malloc_conf)
+CTL_PROTO(config_opt_safety_checks)
 CTL_PROTO(config_prof)
 CTL_PROTO(config_prof_libgcc)
 CTL_PROTO(config_prof_libunwind)
@@ -286,6 +287,7 @@ static const ctl_named_node_t	config_node[] = {
 	{NAME("fill"),		CTL(config_fill)},
 	{NAME("lazy_lock"),	CTL(config_lazy_lock)},
 	{NAME("malloc_conf"),	CTL(config_malloc_conf)},
+	{NAME("opt_safety_checks"),	CTL(config_opt_safety_checks)},
 	{NAME("prof"),		CTL(config_prof)},
 	{NAME("prof_libgcc"),	CTL(config_prof_libgcc)},
 	{NAME("prof_libunwind"), CTL(config_prof_libunwind)},
@@ -1706,6 +1708,7 @@ CTL_RO_CONFIG_GEN(config_debug, bool)
 CTL_RO_CONFIG_GEN(config_fill, bool)
 CTL_RO_CONFIG_GEN(config_lazy_lock, bool)
 CTL_RO_CONFIG_GEN(config_malloc_conf, const char *)
+CTL_RO_CONFIG_GEN(config_opt_safety_checks, bool)
 CTL_RO_CONFIG_GEN(config_prof, bool)
 CTL_RO_CONFIG_GEN(config_prof_libgcc, bool)
 CTL_RO_CONFIG_GEN(config_prof_libunwind, bool)
diff --git a/src/stats.c b/src/stats.c
index 4c427e0d..2be9a7e2 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -976,6 +976,7 @@ stats_general_print(emitter_t *emitter) {
 	emitter_kv(emitter, "malloc_conf", "config.malloc_conf",
 	    emitter_type_string, &config_malloc_conf);
 
+	CONFIG_WRITE_BOOL(opt_safety_checks);
 	CONFIG_WRITE_BOOL(prof);
 	CONFIG_WRITE_BOOL(prof_libgcc);
 	CONFIG_WRITE_BOOL(prof_libunwind);

From b92c9a1a81f3f68da87afe5887d8450fef0700d3 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 20 Mar 2019 13:06:53 -0700
Subject: [PATCH 1289/2608] Safety checks: Indirect through a function.

This will let us share code on failure pathways.pathways
---
 Makefile.in                              |  1 +
 include/jemalloc/internal/safety_check.h |  6 ++++++
 src/safety_check.c                       | 11 +++++++++++
 src/tcache.c                             |  3 ++-
 4 files changed, 20 insertions(+), 1 deletion(-)
 create mode 100644 include/jemalloc/internal/safety_check.h
 create mode 100644 src/safety_check.c

diff --git a/Makefile.in b/Makefile.in
index 3a09442c..8b4a98fd 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -117,6 +117,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/prng.c \
 	$(srcroot)src/prof.c \
 	$(srcroot)src/rtree.c \
+	$(srcroot)src/safety_check.c \
 	$(srcroot)src/stats.c \
 	$(srcroot)src/sc.c \
 	$(srcroot)src/sz.c \
diff --git a/include/jemalloc/internal/safety_check.h b/include/jemalloc/internal/safety_check.h
new file mode 100644
index 00000000..52157d16
--- /dev/null
+++ b/include/jemalloc/internal/safety_check.h
@@ -0,0 +1,6 @@
+#ifndef JEMALLOC_INTERNAL_SAFETY_CHECK_H
+#define JEMALLOC_INTERNAL_SAFETY_CHECK_H
+
+void safety_check_fail(const char *format, ...);
+
+#endif /*JEMALLOC_INTERNAL_SAFETY_CHECK_H */
diff --git a/src/safety_check.c b/src/safety_check.c
new file mode 100644
index 00000000..cbec1907
--- /dev/null
+++ b/src/safety_check.c
@@ -0,0 +1,11 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+void safety_check_fail(const char *format, ...) {
+	va_list ap;
+
+	va_start(ap, format);
+	malloc_vcprintf(NULL, NULL, format, ap);
+	va_end(ap);
+	abort();
+}
diff --git a/src/tcache.c b/src/tcache.c
index 160b0b77..034c69a0 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -4,6 +4,7 @@
 
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/safety_check.h"
 #include "jemalloc/internal/sc.h"
 
 /******************************************************************************/
@@ -122,7 +123,7 @@ tbin_extents_lookup_size_check(tsdn_t *tsdn, cache_bin_t *tbin, szind_t binind,
 		sz_sum -= szind;
 	}
 	if (sz_sum != 0) {
-		malloc_printf("<jemalloc>: size mismatch in thread cache "
+		safety_check_fail("<jemalloc>: size mismatch in thread cache "
 		    "detected, likely caused by sized deallocation bugs by "
 		    "application. Abort.\n");
 		abort();

From 33e1dad6803ea3e20971b46baa299045f736d22a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 22 Mar 2019 12:53:11 -0700
Subject: [PATCH 1290/2608] Safety checks: Add a redzoning feature.

---
 Makefile.in                                   |   1 +
 include/jemalloc/internal/arena_externs.h     |   2 +-
 include/jemalloc/internal/arena_inlines_b.h   |   2 +-
 .../jemalloc/internal/jemalloc_preamble.h.in  |   2 +-
 include/jemalloc/internal/prof_inlines_b.h    |   3 +-
 include/jemalloc/internal/safety_check.h      |  20 +++
 src/arena.c                                   |  22 ++-
 src/jemalloc.c                                |   1 +
 src/prof.c                                    |  16 +-
 src/safety_check.c                            |  25 ++-
 test/unit/safety_check.c                      | 156 ++++++++++++++++++
 test/unit/safety_check.sh                     |   5 +
 12 files changed, 233 insertions(+), 22 deletions(-)
 create mode 100644 test/unit/safety_check.c
 create mode 100644 test/unit/safety_check.sh

diff --git a/Makefile.in b/Makefile.in
index 8b4a98fd..38722ff9 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -210,6 +210,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/rb.c \
 	$(srcroot)test/unit/retained.c \
 	$(srcroot)test/unit/rtree.c \
+	$(srcroot)test/unit/safety_check.c \
 	$(srcroot)test/unit/seq.c \
 	$(srcroot)test/unit/SFMT.c \
 	$(srcroot)test/unit/sc.c \
diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 2bdddb77..a4523ae0 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -60,7 +60,7 @@ void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
     szind_t ind, bool zero);
 void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache);
-void arena_prof_promote(tsdn_t *tsdn, const void *ptr, size_t usize);
+void arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize);
 void arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
     bool slow_path);
 void arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 614deddd..7e61a44c 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -90,7 +90,7 @@ arena_prof_alloc_time_get(tsdn_t *tsdn, const void *ptr,
 	assert(ptr != NULL);
 
 	extent_t *extent = iealloc(tsdn, ptr);
-	/* 
+	/*
 	 * Unlike arena_prof_prof_tctx_{get, set}, we only call this once we're
 	 * sure we have a sampled allocation.
 	 */
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index 9fd2a7f6..3418cbfa 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -166,7 +166,7 @@ static const bool config_log =
  * deallocations, double-frees, etc.
  */
 static const bool config_opt_safety_checks =
-#if defined(JEMALLOC_EXTRA_SAFETY_CHECKS)
+#ifdef JEMALLOC_OPT_SAFETY_CHECKS
     true
 #elif defined(JEMALLOC_DEBUG)
     /*
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 8358bffb..8ba8a1e1 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_PROF_INLINES_B_H
 #define JEMALLOC_INTERNAL_PROF_INLINES_B_H
 
+#include "jemalloc/internal/safety_check.h"
 #include "jemalloc/internal/sz.h"
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -71,7 +72,7 @@ prof_alloc_time_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) {
 
 JEMALLOC_ALWAYS_INLINE void
 prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx,
-    nstime_t t) { 
+    nstime_t t) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
diff --git a/include/jemalloc/internal/safety_check.h b/include/jemalloc/internal/safety_check.h
index 52157d16..1b53fc4c 100644
--- a/include/jemalloc/internal/safety_check.h
+++ b/include/jemalloc/internal/safety_check.h
@@ -2,5 +2,25 @@
 #define JEMALLOC_INTERNAL_SAFETY_CHECK_H
 
 void safety_check_fail(const char *format, ...);
+/* Can set to NULL for a default. */
+void safety_check_set_abort(void (*abort_fn)());
+
+JEMALLOC_ALWAYS_INLINE void
+safety_check_set_redzone(void *ptr, size_t usize, size_t bumped_usize) {
+	assert(usize < bumped_usize);
+	for (size_t i = usize; i < bumped_usize && i < usize + 32; ++i) {
+		*((unsigned char *)ptr + usize) = 0xBC;
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void
+safety_check_verify_redzone(const void *ptr, size_t usize, size_t bumped_usize)
+{
+	for (size_t i = usize; i < bumped_usize && i < usize + 32; ++i) {
+		if (unlikely(*((unsigned char *)ptr + usize) != 0xBC)) {
+			safety_check_fail("Use after free error\n");
+		}
+	}
+}
 
 #endif /*JEMALLOC_INTERNAL_SAFETY_CHECK_H */
diff --git a/src/arena.c b/src/arena.c
index 60eac232..084df855 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -8,6 +8,7 @@
 #include "jemalloc/internal/extent_mmap.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/rtree.h"
+#include "jemalloc/internal/safety_check.h"
 #include "jemalloc/internal/util.h"
 
 JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
@@ -1531,12 +1532,16 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 }
 
 void
-arena_prof_promote(tsdn_t *tsdn, const void *ptr, size_t usize) {
+arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 	assert(isalloc(tsdn, ptr) == SC_LARGE_MINCLASS);
 	assert(usize <= SC_SMALL_MAXCLASS);
 
+	if (config_opt_safety_checks) {
+		safety_check_set_redzone(ptr, usize, SC_LARGE_MINCLASS);
+	}
+
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
@@ -1577,10 +1582,19 @@ arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 	assert(opt_prof);
 
 	extent_t *extent = iealloc(tsdn, ptr);
-	size_t usize = arena_prof_demote(tsdn, extent, ptr);
-	if (usize <= tcache_maxclass) {
+	size_t usize = extent_usize_get(extent);
+	size_t bumped_usize = arena_prof_demote(tsdn, extent, ptr);
+	if (config_opt_safety_checks && usize < SC_LARGE_MINCLASS) {
+		/*
+		 * Currently, we only do redzoning for small sampled
+		 * allocations.
+		 */
+		assert(bumped_usize == SC_LARGE_MINCLASS);
+		safety_check_verify_redzone(ptr, usize, bumped_usize);
+	}
+	if (bumped_usize <= tcache_maxclass) {
 		tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
-		    sz_size2index(usize), slow_path);
+		    sz_size2index(bumped_usize), slow_path);
 	} else {
 		large_dalloc(tsdn, extent);
 	}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 7bc7b957..818ce3aa 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -13,6 +13,7 @@
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/rtree.h"
+#include "jemalloc/internal/safety_check.h"
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/spin.h"
 #include "jemalloc/internal/sz.h"
diff --git a/src/prof.c b/src/prof.c
index 4d7d65db..a4e30f42 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -125,7 +125,7 @@ struct prof_thr_node_s {
 	uint64_t thr_uid;
 	/* Variable size based on thr_name_sz. */
 	char name[1];
-}; 
+};
 
 typedef struct prof_alloc_node_s prof_alloc_node_t;
 
@@ -388,7 +388,7 @@ prof_log_bt_index(tsd_t *tsd, prof_bt_t *bt) {
 
 		new_node->next = NULL;
 		new_node->index = log_bt_index;
-		/* 
+		/*
 		 * Copy the backtrace: bt is inside a tdata or gctx, which
 		 * might die before prof_log_stop is called.
 		 */
@@ -402,7 +402,7 @@ prof_log_bt_index(tsd_t *tsd, prof_bt_t *bt) {
 	} else {
 		return node->index;
 	}
-} 
+}
 static size_t
 prof_log_thr_index(tsd_t *tsd, uint64_t thr_uid, const char *name) {
 	assert(prof_logging_state == prof_logging_state_started);
@@ -452,7 +452,7 @@ prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx) {
 		 * it's being destroyed).
 		 */
 		return;
-	}	
+	}
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &log_mtx);
 
@@ -514,11 +514,11 @@ prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx) {
 	}
 
 label_done:
-	malloc_mutex_unlock(tsd_tsdn(tsd), &log_mtx);	
+	malloc_mutex_unlock(tsd_tsdn(tsd), &log_mtx);
 }
 
 void
-prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize, 
+prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize,
     prof_tctx_t *tctx) {
 	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
 
@@ -2604,8 +2604,8 @@ static void
 prof_log_emit_traces(tsd_t *tsd, emitter_t *emitter) {
 	emitter_json_array_kv_begin(emitter, "stack_traces");
 	prof_bt_node_t *bt_node = log_bt_first;
-	prof_bt_node_t *bt_old_node; 
-	/* 
+	prof_bt_node_t *bt_old_node;
+	/*
 	 * Calculate how many hex digits we need: twice number of bytes, two for
 	 * "0x", and then one more for terminating '\0'.
 	 */
diff --git a/src/safety_check.c b/src/safety_check.c
index cbec1907..804155dc 100644
--- a/src/safety_check.c
+++ b/src/safety_check.c
@@ -1,11 +1,24 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
-void safety_check_fail(const char *format, ...) {
-	va_list ap;
+static void (*safety_check_abort)(const char *message);
 
-	va_start(ap, format);
-	malloc_vcprintf(NULL, NULL, format, ap);
-	va_end(ap);
-	abort();
+void safety_check_set_abort(void (*abort_fn)(const char *)) {
+	safety_check_abort = abort_fn;
+}
+
+void safety_check_fail(const char *format, ...) {
+	char buf[MALLOC_PRINTF_BUFSIZE];
+
+	va_list ap;
+	va_start(ap, format);
+	malloc_vsnprintf(buf, MALLOC_PRINTF_BUFSIZE, format, ap);
+	va_end(ap);
+
+	if (safety_check_abort == NULL) {
+		malloc_write(buf);
+		abort();
+	} else {
+		safety_check_abort(buf);
+	}
 }
diff --git a/test/unit/safety_check.c b/test/unit/safety_check.c
new file mode 100644
index 00000000..bf4bd86d
--- /dev/null
+++ b/test/unit/safety_check.c
@@ -0,0 +1,156 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/safety_check.h"
+
+/*
+ * Note that we get called through safety_check.sh, which turns on sampling for
+ * everything.
+ */
+
+bool fake_abort_called;
+void fake_abort(const char *message) {
+	(void)message;
+	fake_abort_called = true;
+}
+
+TEST_BEGIN(test_malloc_free_overflow) {
+	test_skip_if(!config_prof);
+	test_skip_if(!config_opt_safety_checks);
+
+	safety_check_set_abort(&fake_abort);
+	/* Buffer overflow! */
+	char* ptr = malloc(128);
+	ptr[128] = 0;
+	free(ptr);
+	safety_check_set_abort(NULL);
+
+	assert_b_eq(fake_abort_called, true, "Redzone check didn't fire.");
+	fake_abort_called = false;
+}
+TEST_END
+
+TEST_BEGIN(test_mallocx_dallocx_overflow) {
+	test_skip_if(!config_prof);
+	test_skip_if(!config_opt_safety_checks);
+
+	safety_check_set_abort(&fake_abort);
+	/* Buffer overflow! */
+	char* ptr = mallocx(128, 0);
+	ptr[128] = 0;
+	dallocx(ptr, 0);
+	safety_check_set_abort(NULL);
+
+	assert_b_eq(fake_abort_called, true, "Redzone check didn't fire.");
+	fake_abort_called = false;
+}
+TEST_END
+
+TEST_BEGIN(test_malloc_sdallocx_overflow) {
+	test_skip_if(!config_prof);
+	test_skip_if(!config_opt_safety_checks);
+
+	safety_check_set_abort(&fake_abort);
+	/* Buffer overflow! */
+	char* ptr = malloc(128);
+	ptr[128] = 0;
+	sdallocx(ptr, 128, 0);
+	safety_check_set_abort(NULL);
+
+	assert_b_eq(fake_abort_called, true, "Redzone check didn't fire.");
+	fake_abort_called = false;
+}
+TEST_END
+
+TEST_BEGIN(test_realloc_overflow) {
+	test_skip_if(!config_prof);
+	test_skip_if(!config_opt_safety_checks);
+
+	safety_check_set_abort(&fake_abort);
+	/* Buffer overflow! */
+	char* ptr = malloc(128);
+	ptr[128] = 0;
+	ptr = realloc(ptr, 129);
+	safety_check_set_abort(NULL);
+	free(ptr);
+
+	assert_b_eq(fake_abort_called, true, "Redzone check didn't fire.");
+	fake_abort_called = false;
+}
+TEST_END
+
+TEST_BEGIN(test_rallocx_overflow) {
+	test_skip_if(!config_prof);
+	test_skip_if(!config_opt_safety_checks);
+
+	safety_check_set_abort(&fake_abort);
+	/* Buffer overflow! */
+	char* ptr = malloc(128);
+	ptr[128] = 0;
+	ptr = rallocx(ptr, 129, 0);
+	safety_check_set_abort(NULL);
+	free(ptr);
+
+	assert_b_eq(fake_abort_called, true, "Redzone check didn't fire.");
+	fake_abort_called = false;
+}
+TEST_END
+
+TEST_BEGIN(test_xallocx_overflow) {
+	test_skip_if(!config_prof);
+	test_skip_if(!config_opt_safety_checks);
+
+	safety_check_set_abort(&fake_abort);
+	/* Buffer overflow! */
+	char* ptr = malloc(128);
+	ptr[128] = 0;
+	size_t result = xallocx(ptr, 129, 0, 0);
+	assert_zu_eq(result, 128, "");
+	free(ptr);
+	assert_b_eq(fake_abort_called, true, "Redzone check didn't fire.");
+	fake_abort_called = false;
+	safety_check_set_abort(NULL);
+}
+TEST_END
+
+TEST_BEGIN(test_realloc_no_overflow) {
+	char* ptr = malloc(128);
+	ptr = realloc(ptr, 256);
+	ptr[128] = 0;
+	ptr[255] = 0;
+	free(ptr);
+
+	ptr = malloc(128);
+	ptr = realloc(ptr, 64);
+	ptr[63] = 0;
+	ptr[0] = 0;
+	free(ptr);
+}
+TEST_END
+
+TEST_BEGIN(test_rallocx_no_overflow) {
+	char* ptr = malloc(128);
+	ptr = rallocx(ptr, 256, 0);
+	ptr[128] = 0;
+	ptr[255] = 0;
+	free(ptr);
+
+	ptr = malloc(128);
+	ptr = rallocx(ptr, 64, 0);
+	ptr[63] = 0;
+	ptr[0] = 0;
+	free(ptr);
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_malloc_free_overflow,
+	    test_mallocx_dallocx_overflow,
+	    test_malloc_sdallocx_overflow,
+	    test_realloc_overflow,
+	    test_rallocx_overflow,
+	    test_xallocx_overflow,
+	    test_realloc_no_overflow,
+	    test_rallocx_no_overflow);
+}
diff --git a/test/unit/safety_check.sh b/test/unit/safety_check.sh
new file mode 100644
index 00000000..8fcc7d8a
--- /dev/null
+++ b/test/unit/safety_check.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:true,lg_prof_sample:0"
+fi

From 21cfe59ff7b10a61dabe26cd3dbfb7a255e1f5e8 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 22 Mar 2019 17:08:53 -0700
Subject: [PATCH 1291/2608] Safety checks: Run tests by default

---
 .travis.yml              | 25 +++++++++++++++++++++++++
 scripts/gen_run_tests.py |  1 +
 scripts/gen_travis.py    |  1 +
 3 files changed, 27 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index 40b2eb5f..2da5da8e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -23,6 +23,8 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
@@ -41,6 +43,8 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: osx
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
@@ -54,6 +58,8 @@ matrix:
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
@@ -74,6 +80,9 @@ matrix:
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
       addons: *gcc_multilib
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      addons: *gcc_multilib
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
       addons: *gcc_multilib
@@ -92,6 +101,8 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
@@ -104,6 +115,8 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
@@ -114,6 +127,8 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
@@ -122,6 +137,8 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
@@ -130,6 +147,14 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
diff --git a/scripts/gen_run_tests.py b/scripts/gen_run_tests.py
index 5052b3e0..a414f812 100755
--- a/scripts/gen_run_tests.py
+++ b/scripts/gen_run_tests.py
@@ -40,6 +40,7 @@ possible_config_opts = [
     '--enable-debug',
     '--enable-prof',
     '--disable-stats',
+    '--enable-opt-safety-checks',
 ]
 if bits_64:
     possible_config_opts.append('--with-lg-vaddr=56')
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index 65b0b67c..f1478c62 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -46,6 +46,7 @@ configure_flag_unusuals = [
     '--enable-prof',
     '--disable-stats',
     '--disable-libdl',
+    '--enable-opt-safety-checks',
 ]
 
 malloc_conf_unusuals = [

From 1aabab5fdca1cd76be3900e9272ef83549006ac0 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 15 Apr 2019 15:36:31 -0700
Subject: [PATCH 1292/2608] Enforce TLS_MODEL attribute.

Caught by @zoulasc in #1460.  The attribute needs to be added in the headers as
well.
---
 include/jemalloc/internal/tsd_malloc_thread_cleanup.h | 6 ++++--
 include/jemalloc/internal/tsd_tls.h                   | 4 +++-
 src/tsd.c                                             | 6 +++---
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/tsd_malloc_thread_cleanup.h b/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
index bf8801ef..65852d5c 100644
--- a/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
+++ b/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
@@ -3,8 +3,10 @@
 #endif
 #define JEMALLOC_INTERNAL_TSD_MALLOC_THREAD_CLEANUP_H
 
-extern __thread tsd_t tsd_tls;
-extern __thread bool tsd_initialized;
+#define JEMALLOC_TSD_TYPE_ATTR(type) __thread type JEMALLOC_TLS_MODEL
+
+extern JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls;
+extern JEMALLOC_TSD_TYPE_ATTR(bool) tsd_initialized;
 extern bool tsd_booted;
 
 /* Initialization/cleanup. */
diff --git a/include/jemalloc/internal/tsd_tls.h b/include/jemalloc/internal/tsd_tls.h
index f4f165c7..7d6c805b 100644
--- a/include/jemalloc/internal/tsd_tls.h
+++ b/include/jemalloc/internal/tsd_tls.h
@@ -3,7 +3,9 @@
 #endif
 #define JEMALLOC_INTERNAL_TSD_TLS_H
 
-extern __thread tsd_t tsd_tls;
+#define JEMALLOC_TSD_TYPE_ATTR(type) __thread type JEMALLOC_TLS_MODEL
+
+extern JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls;
 extern pthread_key_t tsd_tsd;
 extern bool tsd_booted;
 
diff --git a/src/tsd.c b/src/tsd.c
index d5fb4d6f..a31f6b96 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -17,11 +17,11 @@ JEMALLOC_DIAGNOSTIC_PUSH
 JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
 
 #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
-__thread tsd_t JEMALLOC_TLS_MODEL tsd_tls = TSD_INITIALIZER;
-__thread bool JEMALLOC_TLS_MODEL tsd_initialized = false;
+JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER;
+JEMALLOC_TSD_TYPE_ATTR(bool) JEMALLOC_TLS_MODEL tsd_initialized = false;
 bool tsd_booted = false;
 #elif (defined(JEMALLOC_TLS))
-__thread tsd_t JEMALLOC_TLS_MODEL tsd_tls = TSD_INITIALIZER;
+JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER;
 pthread_key_t tsd_tsd;
 bool tsd_booted = false;
 #elif (defined(_WIN32))

From 498f47e1ec83431426cdff256c23eceade41b4ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=A3=8E?= <invalid_ms_user@live.com>
Date: Mon, 22 Apr 2019 14:21:12 +0800
Subject: [PATCH 1293/2608] Fix typo derived from tcmalloc's pprof

The same pr is submitted into gperftools:

https://github.com/gperftools/gperftools/pull/1105
---
 bin/jeprof.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/jeprof.in b/bin/jeprof.in
index 16a76c6c..3ed408c9 100644
--- a/bin/jeprof.in
+++ b/bin/jeprof.in
@@ -5367,7 +5367,7 @@ sub GetProcedureBoundaries {
   my $demangle_flag = "";
   my $cppfilt_flag = "";
   my $to_devnull = ">$dev_null 2>&1";
-  if (system(ShellEscape($nm, "--demangle", "image") . $to_devnull) == 0) {
+  if (system(ShellEscape($nm, "--demangle", $image) . $to_devnull) == 0) {
     # In this mode, we do "nm --demangle <foo>"
     $demangle_flag = "--demangle";
     $cppfilt_flag = "";

From 702d76dbd03e4fe7347399e1e322c80102c95544 Mon Sep 17 00:00:00 2001
From: Fabrice Fontaine <fontaine.fabrice@gmail.com>
Date: Fri, 19 Apr 2019 13:44:18 +0200
Subject: [PATCH 1294/2608] configure.ac: Add an option to disable doc

Signed-off-by: Fabrice Fontaine <fontaine.fabrice@gmail.com>
---
 Makefile.in  |  7 ++++++-
 configure.ac | 14 ++++++++++++++
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/Makefile.in b/Makefile.in
index 38722ff9..7128b007 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -56,6 +56,7 @@ cfghdrs_out := @cfghdrs_out@
 cfgoutputs_in := $(addprefix $(srcroot),@cfgoutputs_in@)
 cfgoutputs_out := @cfgoutputs_out@
 enable_autogen := @enable_autogen@
+enable_doc := @enable_doc@
 enable_shared := @enable_shared@
 enable_static := @enable_static@
 enable_prof := @enable_prof@
@@ -516,7 +517,11 @@ done
 
 install_doc: build_doc install_doc_html install_doc_man
 
-install: install_bin install_include install_lib install_doc
+install: install_bin install_include install_lib
+
+ifeq ($(enable_doc), 1)
+install: install_doc
+endif
 
 tests_unit: $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%$(EXE))
 tests_integration: $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%$(EXE)) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%$(EXE))
diff --git a/configure.ac b/configure.ac
index 7a83a1a4..39a540fd 100644
--- a/configure.ac
+++ b/configure.ac
@@ -893,6 +893,19 @@ AC_PROG_RANLIB
 AC_PATH_PROG([LD], [ld], [false], [$PATH])
 AC_PATH_PROG([AUTOCONF], [autoconf], [false], [$PATH])
 
+dnl Enable documentation
+AC_ARG_ENABLE([doc],
+	      [AS_HELP_STRING([--enable-documentation], [Build documentation])],
+if test "x$enable_doc" = "xno" ; then
+  enable_doc="0"
+else
+  enable_doc="1"
+fi
+,
+enable_doc="1"
+)
+AC_SUBST([enable_doc])
+
 dnl Enable shared libs
 AC_ARG_ENABLE([shared],
   [AS_HELP_STRING([--enable-shared], [Build shared libaries])],
@@ -2369,6 +2382,7 @@ AC_MSG_RESULT([JEMALLOC_PRIVATE_NAMESPACE])
 AC_MSG_RESULT([                   : ${JEMALLOC_PRIVATE_NAMESPACE}])
 AC_MSG_RESULT([install_suffix     : ${install_suffix}])
 AC_MSG_RESULT([malloc_conf        : ${config_malloc_conf}])
+AC_MSG_RESULT([documentation      : ${enable_doc}])
 AC_MSG_RESULT([shared libs        : ${enable_shared}])
 AC_MSG_RESULT([static libs        : ${enable_static}])
 AC_MSG_RESULT([autogen            : ${enable_autogen}])

From ae124b86849bb5464940db6731183dede6a70873 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 18 Apr 2019 15:11:07 -0700
Subject: [PATCH 1295/2608] Improve size class header

Mainly fixing typos.  The only non-trivial change is in the
computation for SC_NPSIZES, though the result wouldn't be any
different when SC_NGROUP = 4 as is always the case at the moment.
---
 include/jemalloc/internal/sc.h | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/include/jemalloc/internal/sc.h b/include/jemalloc/internal/sc.h
index ef0a4512..9a099d8b 100644
--- a/include/jemalloc/internal/sc.h
+++ b/include/jemalloc/internal/sc.h
@@ -18,7 +18,7 @@
  * each one covers allocations for base / SC_NGROUP possible allocation sizes.
  * We call that value (base / SC_NGROUP) the delta of the group. Each size class
  * is delta larger than the one before it (including the initial size class in a
- * group, which is delta large than 2**base, the largest size class in the
+ * group, which is delta larger than base, the largest size class in the
  * previous group).
  * To make the math all work out nicely, we require that SC_NGROUP is a power of
  * two, and define it in terms of SC_LG_NGROUP. We'll often talk in terms of
@@ -53,10 +53,11 @@
  * classes; one per power of two, up until we hit the quantum size. There are
  * therefore LG_QUANTUM - SC_LG_TINY_MIN such size classes.
  *
- * Next, we have a size class of size LG_QUANTUM. This can't be the start of a
- * group in the sense we described above (covering a power of two range) since,
- * if we divided into it to pick a value of delta, we'd get a delta smaller than
- * (1 << LG_QUANTUM) for sizes >= (1 << LG_QUANTUM), which is against the rules.
+ * Next, we have a size class of size (1 << LG_QUANTUM).  This can't be the
+ * start of a group in the sense we described above (covering a power of two
+ * range) since, if we divided into it to pick a value of delta, we'd get a
+ * delta smaller than (1 << LG_QUANTUM) for sizes >= (1 << LG_QUANTUM), which
+ * is against the rules.
  *
  * The first base we can divide by SC_NGROUP while still being at least
  * (1 << LG_QUANTUM) is SC_NGROUP * (1 << LG_QUANTUM). We can get there by
@@ -196,7 +197,7 @@
     (SC_LG_BASE_MAX - SC_LG_FIRST_REGULAR_BASE + 1) - 1)
 #define SC_NSIZES (SC_NTINY + SC_NPSEUDO + SC_NREGULAR)
 
- /* The number of size classes that are a multiple of the page size. */
+/* The number of size classes that are a multiple of the page size. */
 #define SC_NPSIZES (							\
     /* Start with all the size classes. */				\
     SC_NSIZES								\
@@ -206,8 +207,20 @@
     - SC_NPSEUDO							\
     /* And the tiny group. */						\
     - SC_NTINY								\
-    /* Groups where ndelta*delta is not a multiple of the page size. */	\
-    - (2 * (SC_NGROUP)))
+    /* Sizes where ndelta*delta is not a multiple of the page size. */	\
+    - (SC_LG_NGROUP * SC_NGROUP))
+/*
+ * Note that the last line is computed as the sum of the second column in the
+ * following table:
+ *                      lg(base) | count of sizes to exclude
+ * ------------------------------|-----------------------------
+ *                   LG_PAGE - 1 | SC_NGROUP - 1
+ *                       LG_PAGE | SC_NGROUP - 1
+ *                   LG_PAGE + 1 | SC_NGROUP - 2
+ *                   LG_PAGE + 2 | SC_NGROUP - 4
+ *                           ... | ...
+ *  LG_PAGE + (SC_LG_NGROUP - 1) | SC_NGROUP - (SC_NGROUP / 2)
+ */
 
 /*
  * We declare a size class is binnable if size < page size * group. Or, in other

From 7fc4f2a32c74701e40e98c8ac05aa7cf12d876c9 Mon Sep 17 00:00:00 2001
From: Doron Roberts-Kedes <doronrk@fb.com>
Date: Fri, 12 Apr 2019 07:08:50 -0400
Subject: [PATCH 1296/2608] Add nonfull_slabs to bin_stats_t.

When config_stats is enabled track the size of bin->slabs_nonfull in
the new nonfull_slabs counter in bin_stats_t. This metric should be
useful for establishing an upper ceiling on the savings possible by
meshing.
---
 doc/jemalloc.xml.in                   | 11 +++++++++++
 include/jemalloc/internal/bin.h       |  1 +
 include/jemalloc/internal/bin_stats.h |  3 +++
 src/arena.c                           |  7 +++++++
 src/ctl.c                             |  7 +++++++
 src/stats.c                           |  7 +++++++
 test/unit/stats.c                     |  7 ++++++-
 7 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index fd0edb30..2bdbe978 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -2947,6 +2947,17 @@ struct extent_hooks_s {
         <listitem><para>Current number of slabs.</para></listitem>
       </varlistentry>
 
+
+      <varlistentry id="stats.arenas.i.bins.j.nonfull_slabs">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.nonfull_slabs</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Current number of nonfull slabs.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="stats.arenas.i.bins.mutex">
         <term>
           <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.mutex.{counter}</mallctl>
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index f542c882..8547e893 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -116,6 +116,7 @@ bin_stats_merge(tsdn_t *tsdn, bin_stats_t *dst_bin_stats, bin_t *bin) {
 	dst_bin_stats->nslabs += bin->stats.nslabs;
 	dst_bin_stats->reslabs += bin->stats.reslabs;
 	dst_bin_stats->curslabs += bin->stats.curslabs;
+	dst_bin_stats->nonfull_slabs += bin->stats.nonfull_slabs;
 	malloc_mutex_unlock(tsdn, &bin->lock);
 }
 
diff --git a/include/jemalloc/internal/bin_stats.h b/include/jemalloc/internal/bin_stats.h
index 86e673ec..d04519c8 100644
--- a/include/jemalloc/internal/bin_stats.h
+++ b/include/jemalloc/internal/bin_stats.h
@@ -45,6 +45,9 @@ struct bin_stats_s {
 	/* Current number of slabs in this bin. */
 	size_t		curslabs;
 
+	/* Current size of nonfull slabs heap in this bin. */
+	size_t		nonfull_slabs;
+
 	mutex_prof_data_t mutex_data;
 };
 
diff --git a/src/arena.c b/src/arena.c
index 084df855..a0804f61 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1002,11 +1002,17 @@ static void
 arena_bin_slabs_nonfull_insert(bin_t *bin, extent_t *slab) {
 	assert(extent_nfree_get(slab) > 0);
 	extent_heap_insert(&bin->slabs_nonfull, slab);
+	if (config_stats) {
+		bin->stats.nonfull_slabs++;
+	}
 }
 
 static void
 arena_bin_slabs_nonfull_remove(bin_t *bin, extent_t *slab) {
 	extent_heap_remove(&bin->slabs_nonfull, slab);
+	if (config_stats) {
+		bin->stats.nonfull_slabs--;
+	}
 }
 
 static extent_t *
@@ -1017,6 +1023,7 @@ arena_bin_slabs_nonfull_tryget(bin_t *bin) {
 	}
 	if (config_stats) {
 		bin->stats.reslabs++;
+		bin->stats.nonfull_slabs--;
 	}
 	return slab;
 }
diff --git a/src/ctl.c b/src/ctl.c
index c113bf24..d258b8eb 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -169,6 +169,7 @@ CTL_PROTO(stats_arenas_i_bins_j_nflushes)
 CTL_PROTO(stats_arenas_i_bins_j_nslabs)
 CTL_PROTO(stats_arenas_i_bins_j_nreslabs)
 CTL_PROTO(stats_arenas_i_bins_j_curslabs)
+CTL_PROTO(stats_arenas_i_bins_j_nonfull_slabs)
 INDEX_PROTO(stats_arenas_i_bins_j)
 CTL_PROTO(stats_arenas_i_lextents_j_nmalloc)
 CTL_PROTO(stats_arenas_i_lextents_j_ndalloc)
@@ -454,6 +455,7 @@ static const ctl_named_node_t stats_arenas_i_bins_j_node[] = {
 	{NAME("nslabs"),	CTL(stats_arenas_i_bins_j_nslabs)},
 	{NAME("nreslabs"),	CTL(stats_arenas_i_bins_j_nreslabs)},
 	{NAME("curslabs"),	CTL(stats_arenas_i_bins_j_curslabs)},
+	{NAME("nonfull_slabs"),	CTL(stats_arenas_i_bins_j_nonfull_slabs)},
 	{NAME("mutex"),		CHILD(named, stats_arenas_i_bins_j_mutex)}
 };
 
@@ -907,8 +909,11 @@ MUTEX_PROF_ARENA_MUTEXES
 			if (!destroyed) {
 				sdstats->bstats[i].curslabs +=
 				    astats->bstats[i].curslabs;
+				sdstats->bstats[i].nonfull_slabs +=
+				    astats->bstats[i].nonfull_slabs;
 			} else {
 				assert(astats->bstats[i].curslabs == 0);
+				assert(astats->bstats[i].nonfull_slabs == 0);
 			}
 			malloc_mutex_prof_merge(&sdstats->bstats[i].mutex_data,
 			    &astats->bstats[i].mutex_data);
@@ -2966,6 +2971,8 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nreslabs,
     arenas_i(mib[2])->astats->bstats[mib[4]].reslabs, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curslabs,
     arenas_i(mib[2])->astats->bstats[mib[4]].curslabs, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nonfull_slabs,
+    arenas_i(mib[2])->astats->bstats[mib[4]].nonfull_slabs, size_t)
 
 static const ctl_named_node_t *
 stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib,
diff --git a/src/stats.c b/src/stats.c
index 2be9a7e2..d196666f 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -294,6 +294,7 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t upti
 	COL_HDR(row, nshards, NULL, right, 9, unsigned)
 	COL_HDR(row, curregs, NULL, right, 13, size)
 	COL_HDR(row, curslabs, NULL, right, 13, size)
+	COL_HDR(row, nonfull_slabs, NULL, right, 15, size)
 	COL_HDR(row, regs, NULL, right, 5, unsigned)
 	COL_HDR(row, pgs, NULL, right, 4, size)
 	/* To buffer a right- and left-justified column. */
@@ -337,6 +338,7 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t upti
 		uint64_t nslabs;
 		size_t reg_size, slab_size, curregs;
 		size_t curslabs;
+		size_t nonfull_slabs;
 		uint32_t nregs, nshards;
 		uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes;
 		uint64_t nreslabs;
@@ -372,6 +374,8 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t upti
 		    uint64_t);
 		CTL_M2_M4_GET("stats.arenas.0.bins.0.curslabs", i, j, &curslabs,
 		    size_t);
+		CTL_M2_M4_GET("stats.arenas.0.bins.0.nonfull_slabs", i, j, &nonfull_slabs,
+		    size_t);
 
 		if (mutex) {
 			mutex_stats_read_arena_bin(i, j, col_mutex64,
@@ -395,6 +399,8 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t upti
 		    &nreslabs);
 		emitter_json_kv(emitter, "curslabs", emitter_type_size,
 		    &curslabs);
+		emitter_json_kv(emitter, "nonfull_slabs", emitter_type_size,
+		    &nonfull_slabs);
 		if (mutex) {
 			emitter_json_object_kv_begin(emitter, "mutex");
 			mutex_stats_emit(emitter, NULL, col_mutex64,
@@ -434,6 +440,7 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t upti
 		col_nshards.unsigned_val = nshards;
 		col_curregs.size_val = curregs;
 		col_curslabs.size_val = curslabs;
+		col_nonfull_slabs.size_val = nonfull_slabs;
 		col_regs.unsigned_val = nregs;
 		col_pgs.size_val = slab_size / page;
 		col_util.str_val = util;
diff --git a/test/unit/stats.c b/test/unit/stats.c
index 4323bfa3..646768e8 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -228,7 +228,7 @@ gen_mallctl_str(char *cmd, char *name, unsigned arena_ind) {
 
 TEST_BEGIN(test_stats_arenas_bins) {
 	void *p;
-	size_t sz, curslabs, curregs;
+	size_t sz, curslabs, curregs, nonfull_slabs;
 	uint64_t epoch, nmalloc, ndalloc, nrequests, nfills, nflushes;
 	uint64_t nslabs, nreslabs;
 	int expected = config_stats ? 0 : ENOENT;
@@ -289,6 +289,9 @@ TEST_BEGIN(test_stats_arenas_bins) {
 	gen_mallctl_str(cmd, "curslabs", arena_ind);
 	assert_d_eq(mallctl(cmd, (void *)&curslabs, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
+	gen_mallctl_str(cmd, "nonfull_slabs", arena_ind);
+	assert_d_eq(mallctl(cmd, (void *)&nonfull_slabs, &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
 		assert_u64_gt(nmalloc, 0,
@@ -309,6 +312,8 @@ TEST_BEGIN(test_stats_arenas_bins) {
 		    "At least one slab should have been allocated");
 		assert_zu_gt(curslabs, 0,
 		    "At least one slab should be currently allocated");
+		assert_zu_eq(nonfull_slabs, 0,
+		    "slabs_nonfull should be empty");
 	}
 
 	dallocx(p, 0);

From b62d126df894dac00772eb5f3d170a1c1d3d1614 Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Mon, 8 Apr 2019 09:37:58 -0700
Subject: [PATCH 1297/2608] Add max_active_fit to first_fit

The max_active_fit check is currently only on the best_fit
path, add it to the first_fit path also.
---
 src/extent.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/extent.c b/src/extent.c
index 66cbf05b..c8d1dd5f 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -483,7 +483,16 @@ extents_first_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 		assert(!extent_heap_empty(&extents->heaps[i]));
 		extent_t *extent = extent_heap_first(&extents->heaps[i]);
 		assert(extent_size_get(extent) >= size);
-		if (ret == NULL || extent_snad_comp(extent, ret) < 0) {
+                bool size_ok = true;
+		/*
+		 * In order to reduce fragmentation, avoid reusing and splitting
+		 * large extents for much smaller sizes.
+		 */
+		if ((sz_pind2sz(i) >> opt_lg_extent_max_active_fit) > size) {
+			size_ok = false;
+		}
+		if (size_ok &&
+                    (ret == NULL || extent_snad_comp(extent, ret) < 0)) {
 			ret = extent;
 		}
 		if (i == SC_NPSIZES) {

From 56797512083fe1457163170dfa44ee5ec12abe5f Mon Sep 17 00:00:00 2001
From: Dave Watson <davejwatson@fb.com>
Date: Thu, 7 Mar 2019 11:14:31 -0800
Subject: [PATCH 1298/2608] Remove best fit

This option saves a few CPU cycles, but potentially adds a lot of
fragmentation - so much so that there are workarounds like
max_active.  Instead, let's just drop it entirely.  It only made
a difference in one service I tested (.3% cpu regression), while
many services saw a memory win (also small, less than 1% mem P99)
---
 src/extent.c | 40 ++++++++--------------------------------
 1 file changed, 8 insertions(+), 32 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index c8d1dd5f..e83d9c8c 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -441,30 +441,6 @@ extents_fit_alignment(extents_t *extents, size_t min_size, size_t max_size,
 	return NULL;
 }
 
-/* Do any-best-fit extent selection, i.e. select any extent that best fits. */
-static extent_t *
-extents_best_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
-    size_t size) {
-	pszind_t pind = sz_psz2ind(extent_size_quantize_ceil(size));
-	pszind_t i = (pszind_t)bitmap_ffu(extents->bitmap, &extents_bitmap_info,
-	    (size_t)pind);
-	if (i < SC_NPSIZES + 1) {
-		/*
-		 * In order to reduce fragmentation, avoid reusing and splitting
-		 * large extents for much smaller sizes.
-		 */
-		if ((sz_pind2sz(i) >> opt_lg_extent_max_active_fit) > size) {
-			return NULL;
-		}
-		assert(!extent_heap_empty(&extents->heaps[i]));
-		extent_t *extent = extent_heap_first(&extents->heaps[i]);
-		assert(extent_size_get(extent) >= size);
-		return extent;
-	}
-
-	return NULL;
-}
-
 /*
  * Do first-fit extent selection, i.e. select the oldest/lowest extent that is
  * large enough.
@@ -487,12 +463,15 @@ extents_first_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 		/*
 		 * In order to reduce fragmentation, avoid reusing and splitting
 		 * large extents for much smaller sizes.
+		 *
+		 * Only do check for dirty extents (delay_coalesce).
 		 */
-		if ((sz_pind2sz(i) >> opt_lg_extent_max_active_fit) > size) {
+		if (extents->delay_coalesce &&
+		    (sz_pind2sz(i) >> opt_lg_extent_max_active_fit) > size) {
 			size_ok = false;
 		}
 		if (size_ok &&
-                    (ret == NULL || extent_snad_comp(extent, ret) < 0)) {
+		    (ret == NULL || extent_snad_comp(extent, ret) < 0)) {
 			ret = extent;
 		}
 		if (i == SC_NPSIZES) {
@@ -505,10 +484,8 @@ extents_first_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 }
 
 /*
- * Do {best,first}-fit extent selection, where the selection policy choice is
- * based on extents->delay_coalesce.  Best-fit selection requires less
- * searching, but its layout policy is less stable and may cause higher virtual
- * memory fragmentation as a side effect.
+ * Do first-fit extent selection, where the selection policy choice is
+ * based on extents->delay_coalesce.
  */
 static extent_t *
 extents_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
@@ -521,8 +498,7 @@ extents_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 		return NULL;
 	}
 
-	extent_t *extent = extents->delay_coalesce ?
-	    extents_best_fit_locked(tsdn, arena, extents, max_size) :
+	extent_t *extent =
 	    extents_first_fit_locked(tsdn, arena, extents, max_size);
 
 	if (alignment > PAGE && extent == NULL) {

From 259b15dec5bff8b67b331b63703aa8511c759077 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 2 May 2019 16:22:10 -0700
Subject: [PATCH 1299/2608] Improve macro readability in malloc_conf_init

Define more readable macros than yes and no.
---
 src/jemalloc.c | 44 ++++++++++++++++++++++----------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 818ce3aa..04ebe51f 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1041,10 +1041,10 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
       JEMALLOC_DIAGNOSTIC_PUSH
       JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
 
-#define CONF_MIN_no(um, min)	false
-#define CONF_MIN_yes(um, min)	((um) < (min))
-#define CONF_MAX_no(um, max)	false
-#define CONF_MAX_yes(um, max)	((um) > (max))
+#define CONF_DONT_CHECK_MIN(um, min)	false
+#define CONF_CHECK_MIN(um, min)	((um) < (min))
+#define CONF_DONT_CHECK_MAX(um, max)	false
+#define CONF_CHECK_MAX(um, max)	((um) > (max))
 #define CONF_HANDLE_T_U(t, o, n, min, max, check_min, check_max, clip)	\
 			if (CONF_MATCH(n)) {				\
 				uintmax_t um;				\
@@ -1058,21 +1058,17 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 					    "Invalid conf value",	\
 					    k, klen, v, vlen);		\
 				} else if (clip) {			\
-					if (CONF_MIN_##check_min(um,	\
-					    (t)(min))) {		\
+					if (check_min(um, (t)(min))) {	\
 						o = (t)(min);		\
 					} else if (			\
-					    CONF_MAX_##check_max(um,	\
-					    (t)(max))) {		\
+					    check_max(um, (t)(max))) {	\
 						o = (t)(max);		\
 					} else {			\
 						o = (t)um;		\
 					}				\
 				} else {				\
-					if (CONF_MIN_##check_min(um,	\
-					    (t)(min)) ||		\
-					    CONF_MAX_##check_max(um,	\
-					    (t)(max))) {		\
+					if (check_min(um, (t)(min)) ||	\
+					    check_max(um, (t)(max))) {	\
 						malloc_conf_error(	\
 						    "Out-of-range "	\
 						    "conf value",	\
@@ -1167,7 +1163,8 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 				continue;
 			}
 			CONF_HANDLE_UNSIGNED(opt_narenas, "narenas", 1,
-			    UINT_MAX, yes, no, false)
+			    UINT_MAX, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX,
+			    false)
 			if (CONF_MATCH("bin_shards")) {
 				const char *bin_shards_segment_cur = v;
 				size_t vlen_left = vlen;
@@ -1249,11 +1246,12 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 			 * contention on the huge arena.
 			 */
 			CONF_HANDLE_SIZE_T(opt_oversize_threshold,
-			    "oversize_threshold", 0, SC_LARGE_MAXCLASS, no, yes,
-			    false)
+			    "oversize_threshold", 0, SC_LARGE_MAXCLASS,
+			    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX, false)
 			CONF_HANDLE_SIZE_T(opt_lg_extent_max_active_fit,
 			    "lg_extent_max_active_fit", 0,
-			    (sizeof(size_t) << 3), no, yes, false)
+			    (sizeof(size_t) << 3), CONF_DONT_CHECK_MIN,
+			    CONF_CHECK_MAX, false)
 
 			if (strncmp("percpu_arena", k, klen) == 0) {
 				bool match = false;
@@ -1281,7 +1279,8 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 			    "background_thread");
 			CONF_HANDLE_SIZE_T(opt_max_background_threads,
 					   "max_background_threads", 1,
-					   opt_max_background_threads, yes, yes,
+					   opt_max_background_threads,
+					   CONF_CHECK_MIN, CONF_CHECK_MAX,
 					   true);
 			if (CONF_MATCH("slab_sizes")) {
 				bool err;
@@ -1317,7 +1316,8 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 				    "prof_thread_active_init")
 				CONF_HANDLE_SIZE_T(opt_lg_prof_sample,
 				    "lg_prof_sample", 0, (sizeof(uint64_t) << 3)
-				    - 1, no, yes, true)
+				    - 1, CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
+				    true)
 				CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum")
 				CONF_HANDLE_SSIZE_T(opt_lg_prof_interval,
 				    "lg_prof_interval", -1,
@@ -1363,10 +1363,10 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 #undef CONF_MATCH
 #undef CONF_MATCH_VALUE
 #undef CONF_HANDLE_BOOL
-#undef CONF_MIN_no
-#undef CONF_MIN_yes
-#undef CONF_MAX_no
-#undef CONF_MAX_yes
+#undef CONF_DONT_CHECK_MIN
+#undef CONF_CHECK_MIN
+#undef CONF_DONT_CHECK_MAX
+#undef CONF_CHECK_MAX
 #undef CONF_HANDLE_T_U
 #undef CONF_HANDLE_UNSIGNED
 #undef CONF_HANDLE_SIZE_T

From 13e88ae9700416b43bf88c596ea15c85bdb9f9e7 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 15 May 2019 07:50:10 -0700
Subject: [PATCH 1300/2608] Fix assert in free fastpath

rtree_szind_slab_read_fast() may have not initialized
alloc_ctx.szind, unless after confirming the return is true.
---
 src/jemalloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 04ebe51f..ec6b400a 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2744,12 +2744,12 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 		bool res = rtree_szind_slab_read_fast(tsd_tsdn(tsd), &extents_rtree,
 						      rtree_ctx, (uintptr_t)ptr,
 						      &alloc_ctx.szind, &alloc_ctx.slab);
-		assert(alloc_ctx.szind != SC_NSIZES);
 
 		/* Note: profiled objects will have alloc_ctx.slab set */
 		if (!res || !alloc_ctx.slab) {
 			return false;
 		}
+		assert(alloc_ctx.szind != SC_NSIZES);
 	} else {
 		/*
 		 * Check for both sizes that are too large, and for sampled objects.

From 07c44847c24634d0d11f9ceab7318400ffc1a16e Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 6 May 2019 16:36:55 -0700
Subject: [PATCH 1301/2608] Track nfills and nflushes for arenas.i.small /
 large.

Small is added purely for convenience.  Large flushes wasn't tracked before and
can be useful in analysis.  Large fill simply reports nmalloc, since there is no
batch fill for large currently.
---
 doc/jemalloc.xml.in                     | 44 +++++++++++++++++++++++++
 include/jemalloc/internal/arena_stats.h | 16 +++++++--
 include/jemalloc/internal/ctl.h         |  2 ++
 src/arena.c                             |  9 +++++
 src/ctl.c                               | 39 ++++++++++++++++++----
 src/stats.c                             | 36 ++++++++++++++++++--
 src/tcache.c                            |  8 ++---
 7 files changed, 139 insertions(+), 15 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 2bdbe978..04a47648 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -2798,6 +2798,28 @@ struct extent_hooks_s {
         all bin size classes.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="stats.arenas.i.small.nfills">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.small.nfills</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Cumulative number of tcache fills by all small size
+	classes.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="stats.arenas.i.small.nflushes">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.small.nflushes</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Cumulative number of tcache flushes by all small size
+        classes.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="stats.arenas.i.large.allocated">
         <term>
           <mallctl>stats.arenas.&lt;i&gt;.large.allocated</mallctl>
@@ -2848,6 +2870,28 @@ struct extent_hooks_s {
         all large size classes.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="stats.arenas.i.large.nfills">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.large.nfills</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Cumulative number of tcache fills by all large size
+	classes.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="stats.arenas.i.large.nflushes">
+        <term>
+          <mallctl>stats.arenas.&lt;i&gt;.large.nflushes</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Cumulative number of tcache flushes by all large size
+        classes.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="stats.arenas.i.bins.j.nmalloc">
         <term>
           <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.nmalloc</mallctl>
diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index ef1e25b3..3ffe9c78 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -35,6 +35,13 @@ struct arena_stats_large_s {
 	 * periodically merges into this counter.
 	 */
 	arena_stats_u64_t	nrequests; /* Partially derived. */
+	/*
+	 * Number of tcache fills / flushes for large (similarly, periodically
+	 * merged).  Note that there is no large tcache batch-fill currently
+	 * (i.e. only fill 1 at a time); however flush may be batched.
+	 */
+	arena_stats_u64_t	nfills; /* Partially derived. */
+	arena_stats_u64_t	nflushes; /* Partially derived. */
 
 	/* Current number of allocations of this size class. */
 	size_t		curlextents; /* Derived. */
@@ -101,6 +108,8 @@ struct arena_stats_s {
 	atomic_zu_t		allocated_large; /* Derived. */
 	arena_stats_u64_t	nmalloc_large; /* Derived. */
 	arena_stats_u64_t	ndalloc_large; /* Derived. */
+	arena_stats_u64_t	nfills_large; /* Derived. */
+	arena_stats_u64_t	nflushes_large; /* Derived. */
 	arena_stats_u64_t	nrequests_large; /* Derived. */
 
 	/* Number of bytes cached in tcache associated with this arena. */
@@ -240,11 +249,12 @@ arena_stats_accum_zu(atomic_zu_t *dst, size_t src) {
 }
 
 static inline void
-arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
+arena_stats_large_flush_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
     szind_t szind, uint64_t nrequests) {
 	arena_stats_lock(tsdn, arena_stats);
-	arena_stats_add_u64(tsdn, arena_stats, &arena_stats->lstats[szind -
-	    SC_NBINS].nrequests, nrequests);
+	arena_stats_large_t *lstats = &arena_stats->lstats[szind - SC_NBINS];
+	arena_stats_add_u64(tsdn, arena_stats, &lstats->nrequests, nrequests);
+	arena_stats_add_u64(tsdn, arena_stats, &lstats->nflushes, 1);
 	arena_stats_unlock(tsdn, arena_stats);
 }
 
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index 775fdec0..1d1aacc6 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -39,6 +39,8 @@ typedef struct ctl_arena_stats_s {
 	uint64_t nmalloc_small;
 	uint64_t ndalloc_small;
 	uint64_t nrequests_small;
+	uint64_t nfills_small;
+	uint64_t nflushes_small;
 
 	bin_stats_t bstats[SC_NBINS];
 	arena_stats_large_t lstats[SC_NSIZES - SC_NBINS];
diff --git a/src/arena.c b/src/arena.c
index a0804f61..f9336fee 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -151,6 +151,15 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		arena_stats_accum_u64(&astats->nrequests_large,
 		    nmalloc + nrequests);
 
+		/* nfill == nmalloc for large currently. */
+		arena_stats_accum_u64(&lstats[i].nfills, nmalloc);
+		arena_stats_accum_u64(&astats->nfills_large, nmalloc);
+
+		uint64_t nflush = arena_stats_read_u64(tsdn, &arena->stats,
+		    &arena->stats.lstats[i].nflushes);
+		arena_stats_accum_u64(&lstats[i].nflushes, nflush);
+		arena_stats_accum_u64(&astats->nflushes_large, nflush);
+
 		assert(nmalloc >= ndalloc);
 		assert(nmalloc - ndalloc <= SIZE_T_MAX);
 		size_t curlextents = (size_t)(nmalloc - ndalloc);
diff --git a/src/ctl.c b/src/ctl.c
index d258b8eb..f0d51df9 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -156,10 +156,14 @@ CTL_PROTO(stats_arenas_i_small_allocated)
 CTL_PROTO(stats_arenas_i_small_nmalloc)
 CTL_PROTO(stats_arenas_i_small_ndalloc)
 CTL_PROTO(stats_arenas_i_small_nrequests)
+CTL_PROTO(stats_arenas_i_small_nfills)
+CTL_PROTO(stats_arenas_i_small_nflushes)
 CTL_PROTO(stats_arenas_i_large_allocated)
 CTL_PROTO(stats_arenas_i_large_nmalloc)
 CTL_PROTO(stats_arenas_i_large_ndalloc)
 CTL_PROTO(stats_arenas_i_large_nrequests)
+CTL_PROTO(stats_arenas_i_large_nfills)
+CTL_PROTO(stats_arenas_i_large_nflushes)
 CTL_PROTO(stats_arenas_i_bins_j_nmalloc)
 CTL_PROTO(stats_arenas_i_bins_j_ndalloc)
 CTL_PROTO(stats_arenas_i_bins_j_nrequests)
@@ -414,14 +418,18 @@ static const ctl_named_node_t stats_arenas_i_small_node[] = {
 	{NAME("allocated"),	CTL(stats_arenas_i_small_allocated)},
 	{NAME("nmalloc"),	CTL(stats_arenas_i_small_nmalloc)},
 	{NAME("ndalloc"),	CTL(stats_arenas_i_small_ndalloc)},
-	{NAME("nrequests"),	CTL(stats_arenas_i_small_nrequests)}
+	{NAME("nrequests"),	CTL(stats_arenas_i_small_nrequests)},
+	{NAME("nfills"),	CTL(stats_arenas_i_small_nfills)},
+	{NAME("nflushes"),	CTL(stats_arenas_i_small_nflushes)}
 };
 
 static const ctl_named_node_t stats_arenas_i_large_node[] = {
 	{NAME("allocated"),	CTL(stats_arenas_i_large_allocated)},
 	{NAME("nmalloc"),	CTL(stats_arenas_i_large_nmalloc)},
 	{NAME("ndalloc"),	CTL(stats_arenas_i_large_ndalloc)},
-	{NAME("nrequests"),	CTL(stats_arenas_i_large_nrequests)}
+	{NAME("nrequests"),	CTL(stats_arenas_i_large_nrequests)},
+	{NAME("nfills"),	CTL(stats_arenas_i_large_nfills)},
+	{NAME("nflushes"),	CTL(stats_arenas_i_large_nflushes)}
 };
 
 #define MUTEX_PROF_DATA_NODE(prefix)					\
@@ -754,6 +762,8 @@ ctl_arena_clear(ctl_arena_t *ctl_arena) {
 		ctl_arena->astats->nmalloc_small = 0;
 		ctl_arena->astats->ndalloc_small = 0;
 		ctl_arena->astats->nrequests_small = 0;
+		ctl_arena->astats->nfills_small = 0;
+		ctl_arena->astats->nflushes_small = 0;
 		memset(ctl_arena->astats->bstats, 0, SC_NBINS *
 		    sizeof(bin_stats_t));
 		memset(ctl_arena->astats->lstats, 0, (SC_NSIZES - SC_NBINS) *
@@ -785,6 +795,10 @@ ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena) {
 			    ctl_arena->astats->bstats[i].ndalloc;
 			ctl_arena->astats->nrequests_small +=
 			    ctl_arena->astats->bstats[i].nrequests;
+			ctl_arena->astats->nfills_small +=
+			    ctl_arena->astats->bstats[i].nfills;
+			ctl_arena->astats->nflushes_small +=
+			    ctl_arena->astats->bstats[i].nflushes;
 		}
 	} else {
 		arena_basic_stats_merge(tsdn, arena, &ctl_arena->nthreads,
@@ -867,6 +881,8 @@ MUTEX_PROF_ARENA_MUTEXES
 		sdstats->nmalloc_small += astats->nmalloc_small;
 		sdstats->ndalloc_small += astats->ndalloc_small;
 		sdstats->nrequests_small += astats->nrequests_small;
+		sdstats->nfills_small += astats->nfills_small;
+		sdstats->nflushes_small += astats->nflushes_small;
 
 		if (!destroyed) {
 			accum_atomic_zu(&sdstats->astats.allocated_large,
@@ -2847,6 +2863,10 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_small_ndalloc,
     arenas_i(mib[2])->astats->ndalloc_small, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_small_nrequests,
     arenas_i(mib[2])->astats->nrequests_small, uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_small_nfills,
+    arenas_i(mib[2])->astats->nfills_small, uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_small_nflushes,
+    arenas_i(mib[2])->astats->nflushes_small, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_allocated,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.allocated_large,
     ATOMIC_RELAXED), size_t)
@@ -2856,12 +2876,19 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_large_nmalloc,
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_ndalloc,
     ctl_arena_stats_read_u64(
     &arenas_i(mib[2])->astats->astats.ndalloc_large), uint64_t)
-/*
- * Note: "nmalloc" here instead of "nrequests" in the read.  This is intentional.
- */
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_nrequests,
     ctl_arena_stats_read_u64(
-    &arenas_i(mib[2])->astats->astats.nmalloc_large), uint64_t) /* Intentional. */
+    &arenas_i(mib[2])->astats->astats.nrequests_large), uint64_t)
+/*
+ * Note: "nmalloc_large" here instead of "nfills" in the read.  This is
+ * intentional (large has no batch fill).
+ */
+CTL_RO_CGEN(config_stats, stats_arenas_i_large_nfills,
+    ctl_arena_stats_read_u64(
+    &arenas_i(mib[2])->astats->astats.nmalloc_large), uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_large_nflushes,
+    ctl_arena_stats_read_u64(
+    &arenas_i(mib[2])->astats->astats.nflushes_large), uint64_t)
 
 /* Lock profiling related APIs below. */
 #define RO_MUTEX_CTL_GEN(n, l)						\
diff --git a/src/stats.c b/src/stats.c
index d196666f..55a59994 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -668,9 +668,11 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	uint64_t dirty_npurge, dirty_nmadvise, dirty_purged;
 	uint64_t muzzy_npurge, muzzy_nmadvise, muzzy_purged;
 	size_t small_allocated;
-	uint64_t small_nmalloc, small_ndalloc, small_nrequests;
+	uint64_t small_nmalloc, small_ndalloc, small_nrequests, small_nfills,
+	    small_nflushes;
 	size_t large_allocated;
-	uint64_t large_nmalloc, large_ndalloc, large_nrequests;
+	uint64_t large_nmalloc, large_ndalloc, large_nrequests, large_nfills,
+	    large_nflushes;
 	size_t tcache_bytes;
 	uint64_t uptime;
 
@@ -828,11 +830,23 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	COL(alloc_count_row, count_nrequests_ps, right, 10, title);
 	col_count_nrequests_ps.str_val = "(#/sec)";
 
+	COL(alloc_count_row, count_nfills, right, 16, title);
+	col_count_nfills.str_val = "nfill";
+	COL(alloc_count_row, count_nfills_ps, right, 10, title);
+	col_count_nfills_ps.str_val = "(#/sec)";
+
+	COL(alloc_count_row, count_nflushes, right, 16, title);
+	col_count_nflushes.str_val = "nflush";
+	COL(alloc_count_row, count_nflushes_ps, right, 10, title);
+	col_count_nflushes_ps.str_val = "(#/sec)";
+
 	emitter_table_row(emitter, &alloc_count_row);
 
 	col_count_nmalloc_ps.type = emitter_type_uint64;
 	col_count_ndalloc_ps.type = emitter_type_uint64;
 	col_count_nrequests_ps.type = emitter_type_uint64;
+	col_count_nfills_ps.type = emitter_type_uint64;
+	col_count_nflushes_ps.type = emitter_type_uint64;
 
 #define GET_AND_EMIT_ALLOC_STAT(small_or_large, name, valtype)		\
 	CTL_M2_GET("stats.arenas.0." #small_or_large "." #name, i,	\
@@ -855,6 +869,12 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	GET_AND_EMIT_ALLOC_STAT(small, nrequests, uint64)
 	col_count_nrequests_ps.uint64_val =
 	    rate_per_second(col_count_nrequests.uint64_val, uptime);
+	GET_AND_EMIT_ALLOC_STAT(small, nfills, uint64)
+	col_count_nfills_ps.uint64_val =
+	    rate_per_second(col_count_nfills.uint64_val, uptime);
+	GET_AND_EMIT_ALLOC_STAT(small, nflushes, uint64)
+	col_count_nflushes_ps.uint64_val =
+	    rate_per_second(col_count_nflushes.uint64_val, uptime);
 
 	emitter_table_row(emitter, &alloc_count_row);
 	emitter_json_object_end(emitter); /* Close "small". */
@@ -872,6 +892,12 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	GET_AND_EMIT_ALLOC_STAT(large, nrequests, uint64)
 	col_count_nrequests_ps.uint64_val =
 	    rate_per_second(col_count_nrequests.uint64_val, uptime);
+	GET_AND_EMIT_ALLOC_STAT(large, nfills, uint64)
+	col_count_nfills_ps.uint64_val =
+	    rate_per_second(col_count_nfills.uint64_val, uptime);
+	GET_AND_EMIT_ALLOC_STAT(large, nflushes, uint64)
+	col_count_nflushes_ps.uint64_val =
+	    rate_per_second(col_count_nflushes.uint64_val, uptime);
 
 	emitter_table_row(emitter, &alloc_count_row);
 	emitter_json_object_end(emitter); /* Close "large". */
@@ -884,12 +910,18 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	col_count_nmalloc.uint64_val = small_nmalloc + large_nmalloc;
 	col_count_ndalloc.uint64_val = small_ndalloc + large_ndalloc;
 	col_count_nrequests.uint64_val = small_nrequests + large_nrequests;
+	col_count_nfills.uint64_val = small_nfills + large_nfills;
+	col_count_nflushes.uint64_val = small_nflushes + large_nflushes;
 	col_count_nmalloc_ps.uint64_val =
 	    rate_per_second(col_count_nmalloc.uint64_val, uptime);
 	col_count_ndalloc_ps.uint64_val =
 	    rate_per_second(col_count_ndalloc.uint64_val, uptime);
 	col_count_nrequests_ps.uint64_val =
 	    rate_per_second(col_count_nrequests.uint64_val, uptime);
+	col_count_nfills_ps.uint64_val =
+	    rate_per_second(col_count_nfills.uint64_val, uptime);
+	col_count_nflushes_ps.uint64_val =
+	    rate_per_second(col_count_nflushes.uint64_val, uptime);
 	emitter_table_row(emitter, &alloc_count_row);
 
 	emitter_row_t mem_count_row;
diff --git a/src/tcache.c b/src/tcache.c
index 034c69a0..50099a9f 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -282,8 +282,8 @@ tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
 			}
 			if (config_stats) {
 				merged_stats = true;
-				arena_stats_large_nrequests_add(tsd_tsdn(tsd),
-				    &tcache_arena->stats, binind,
+				arena_stats_large_flush_nrequests_add(
+				    tsd_tsdn(tsd), &tcache_arena->stats, binind,
 				    tbin->tstats.nrequests);
 				tbin->tstats.nrequests = 0;
 			}
@@ -324,7 +324,7 @@ tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
 		 * The flush loop didn't happen to flush to this thread's
 		 * arena, so the stats didn't get merged.  Manually do so now.
 		 */
-		arena_stats_large_nrequests_add(tsd_tsdn(tsd),
+		arena_stats_large_flush_nrequests_add(tsd_tsdn(tsd),
 		    &tcache_arena->stats, binind, tbin->tstats.nrequests);
 		tbin->tstats.nrequests = 0;
 	}
@@ -615,7 +615,7 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 
 	for (; i < nhbins; i++) {
 		cache_bin_t *tbin = tcache_large_bin_get(tcache, i);
-		arena_stats_large_nrequests_add(tsdn, &arena->stats, i,
+		arena_stats_large_flush_nrequests_add(tsdn, &arena->stats, i,
 		    tbin->tstats.nrequests);
 		tbin->tstats.nrequests = 0;
 	}

From 2d6d099fed05b1509e81e54458516528bfbbf38d Mon Sep 17 00:00:00 2001
From: Vaibhav Jain <vaibhav@linux.ibm.com>
Date: Tue, 21 May 2019 12:06:16 +0530
Subject: [PATCH 1302/2608] Fix GCC-9.1 warning with macro GET_ARG_NUMERIC
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

GCC-9.1 reports following error when trying to compile file
src/malloc_io.c and with CFLAGS='-Werror' :

src/malloc_io.c: In function ‘malloc_vsnprintf’:
src/malloc_io.c:369:2: error: case label value exceeds maximum value for type [-Werror]
  369 |  case '?' | 0x80:      \
      |  ^~~~
src/malloc_io.c:581:5: note: in expansion of macro ‘GET_ARG_NUMERIC’
  581 |     GET_ARG_NUMERIC(val, 'p');
      |     ^~~~~~~~~~~~~~~
...
<snip>
cc1: all warnings being treated as errors
make: *** [Makefile:388: src/malloc_io.sym.o] Error 1

The warning is reported as by default the type 'char' is 'signed char'
and or-ing 0x80 will turn the case label char negative which will be
beyond the printable ascii range (0 - 127).

The patch fixes this by explicitly casting the 'len' variable as
unsigned char' inside the 'switch' statement so that value of
expression " '?' | 0x80 " falls within the legal values of the
variable 'len'.
---
 src/malloc_io.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/malloc_io.c b/src/malloc_io.c
index 7bdc13f9..dd882651 100644
--- a/src/malloc_io.c
+++ b/src/malloc_io.c
@@ -362,7 +362,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 	}								\
 } while (0)
 #define GET_ARG_NUMERIC(val, len) do {					\
-	switch (len) {							\
+	switch ((unsigned char)len) {					\
 	case '?':							\
 		val = va_arg(ap, int);					\
 		break;							\

From 4c63b0e76a693b0cfdf209cb4f8fbd1ed74453b0 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 13 May 2019 14:59:33 -0700
Subject: [PATCH 1303/2608] Improve memory utilization tests

Added tests for large size classes and expanded the tests to
cover wider range of allocation sizes.
---
 src/ctl.c               |   8 +-
 test/unit/extent_util.c | 303 +++++++++++++++++++++++++---------------
 2 files changed, 195 insertions(+), 116 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index f0d51df9..176cb65f 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3143,15 +3143,15 @@ label_return:
  * (f) total number of regions in the bin the extent belongs to.
  *
  * Note that "(e)" and "(f)" are only available when stats are enabled;
- * otherwise both are set zero.
+ * otherwise their values are undefined.
  *
  * This API is mainly intended for small class allocations, where extents are
  * used as slab.
  *
  * In case of large class allocations, "(a)" will be NULL, and "(e)" and "(f)"
- * will be zero.  The other three fields will be properly set though the values
- * are trivial: "(b)" will be 0, "(c)" will be 1, and "(d)" will be the usable
- * size.
+ * will be zero (if stats are enabled; otherwise undefined).  The other three
+ * fields will be properly set though the values are trivial: "(b)" will be 0,
+ * "(c)" will be 1, and "(d)" will be the usable size.
  *
  * The input pointer and size are respectively passed in by newp and newlen,
  * and the output fields and size are respectively oldp and *oldlenp.
diff --git a/test/unit/extent_util.c b/test/unit/extent_util.c
index 6995325f..97e55f0f 100644
--- a/test/unit/extent_util.c
+++ b/test/unit/extent_util.c
@@ -25,18 +25,30 @@
 
 #define TEST_UTIL_BATCH_VALID TEST_UTIL_VALID("batch_query")
 
-TEST_BEGIN(test_query) {
-	void *p = mallocx(1, 0);
-	void **in = &p;
-	size_t in_sz = sizeof(const void *);
-	size_t out_sz = sizeof(void *) + sizeof(size_t) * 5;
-	void *out = mallocx(out_sz, 0);
-	void *out_ref = mallocx(out_sz, 0);
-	size_t out_sz_ref = out_sz;
+#define TEST_MAX_SIZE (1 << 20)
 
-	assert_ptr_not_null(p, "test pointer allocation failed");
-	assert_ptr_not_null(out, "test output allocation failed");
-	assert_ptr_not_null(out_ref, "test reference output allocation failed");
+TEST_BEGIN(test_query) {
+	size_t sz;
+	/*
+	 * Select some sizes that can span both small and large sizes, and are
+	 * numerically unrelated to any size boundaries.
+	 */
+	for (sz = 7; sz <= TEST_MAX_SIZE && sz <= SC_LARGE_MAXCLASS;
+	    sz += (sz <= SC_SMALL_MAXCLASS ? 1009 : 99989)) {
+		void *p = mallocx(sz, 0);
+		void **in = &p;
+		size_t in_sz = sizeof(const void *);
+		size_t out_sz = sizeof(void *) + sizeof(size_t) * 5;
+		void *out = mallocx(out_sz, 0);
+		void *out_ref = mallocx(out_sz, 0);
+		size_t out_sz_ref = out_sz;
+
+		assert_ptr_not_null(p,
+		    "test pointer allocation failed");
+		assert_ptr_not_null(out,
+		    "test output allocation failed");
+		assert_ptr_not_null(out_ref,
+		    "test reference output allocation failed");
 
 #define SLABCUR_READ(out) (*(void **)out)
 #define COUNTS(out) ((size_t *)((void **)out + 1))
@@ -46,55 +58,91 @@ TEST_BEGIN(test_query) {
 #define BIN_NFREE_READ(out) COUNTS(out)[3]
 #define BIN_NREGS_READ(out) COUNTS(out)[4]
 
-	SLABCUR_READ(out) = NULL;
-	NFREE_READ(out) = NREGS_READ(out) = SIZE_READ(out) = -1;
-	BIN_NFREE_READ(out) = BIN_NREGS_READ(out) = -1;
-	memcpy(out_ref, out, out_sz);
+		SLABCUR_READ(out) = NULL;
+		NFREE_READ(out) = NREGS_READ(out) = SIZE_READ(out) = -1;
+		BIN_NFREE_READ(out) = BIN_NREGS_READ(out) = -1;
+		memcpy(out_ref, out, out_sz);
 
-	/* Test invalid argument(s) errors */
-	TEST_UTIL_QUERY_EINVAL(NULL, &out_sz, in, in_sz, "old is NULL");
-	TEST_UTIL_QUERY_EINVAL(out, NULL, in, in_sz, "oldlenp is NULL");
-	TEST_UTIL_QUERY_EINVAL(out, &out_sz, NULL, in_sz, "newp is NULL");
-	TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, 0, "newlen is zero");
-	in_sz -= 1;
-	TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, in_sz, "invalid newlen");
-	in_sz += 1;
-	out_sz_ref = out_sz -= 2 * sizeof(size_t);
-	TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, in_sz, "invalid *oldlenp");
-	out_sz_ref = out_sz += 2 * sizeof(size_t);
+		/* Test invalid argument(s) errors */
+		TEST_UTIL_QUERY_EINVAL(NULL, &out_sz, in, in_sz,
+		    "old is NULL");
+		TEST_UTIL_QUERY_EINVAL(out, NULL, in, in_sz,
+		    "oldlenp is NULL");
+		TEST_UTIL_QUERY_EINVAL(out, &out_sz, NULL, in_sz,
+		    "newp is NULL");
+		TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, 0,
+		    "newlen is zero");
+		in_sz -= 1;
+		TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, in_sz,
+		    "invalid newlen");
+		in_sz += 1;
+		out_sz_ref = out_sz -= 2 * sizeof(size_t);
+		TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, in_sz,
+		    "invalid *oldlenp");
+		out_sz_ref = out_sz += 2 * sizeof(size_t);
 
-	/* Examine output for valid call */
-	TEST_UTIL_VALID("query");
-	assert_zu_le(NFREE_READ(out), NREGS_READ(out),
-	    "Extent free count exceeded region count");
-	assert_zu_le(NREGS_READ(out), SIZE_READ(out),
-	    "Extent region count exceeded size");
-	assert_zu_ne(NREGS_READ(out), 0,
-	    "Extent region count must be positive");
-	assert_zu_ne(SIZE_READ(out), 0, "Extent size must be positive");
-	if (config_stats) {
-		assert_zu_le(BIN_NFREE_READ(out), BIN_NREGS_READ(out),
-		    "Bin free count exceeded region count");
-		assert_zu_ne(BIN_NREGS_READ(out), 0,
-		    "Bin region count must be positive");
-		assert_zu_le(NFREE_READ(out), BIN_NFREE_READ(out),
-		    "Extent free count exceeded bin free count");
-		assert_zu_le(NREGS_READ(out), BIN_NREGS_READ(out),
-		    "Extent region count exceeded bin region count");
-		assert_zu_eq(BIN_NREGS_READ(out) % NREGS_READ(out), 0,
-		    "Bin region count isn't a multiple of extent region count");
-		assert_zu_le(NREGS_READ(out) - NFREE_READ(out),
-		    BIN_NREGS_READ(out) - BIN_NFREE_READ(out),
-		    "Extent utilized count exceeded bin utilized count");
-	} else {
-		assert_zu_eq(BIN_NFREE_READ(out), 0,
-		    "Bin free count should be zero when stats are disabled");
-		assert_zu_eq(BIN_NREGS_READ(out), 0,
-		    "Bin region count should be zero when stats are disabled");
-	}
-	assert_ptr_not_null(SLABCUR_READ(out), "Current slab is null");
-	assert_true(NFREE_READ(out) == 0 || SLABCUR_READ(out) <= p,
-	    "Allocation should follow first fit principle");
+		/* Examine output for valid call */
+		TEST_UTIL_VALID("query");
+		assert_zu_le(sz, SIZE_READ(out),
+		    "Extent size should be at least allocation size");
+		assert_zu_eq(SIZE_READ(out) & (PAGE - 1), 0,
+		    "Extent size should be a multiple of page size");
+		if (sz <= SC_SMALL_MAXCLASS) {
+			assert_zu_le(NFREE_READ(out), NREGS_READ(out),
+			    "Extent free count exceeded region count");
+			assert_zu_le(NREGS_READ(out), SIZE_READ(out),
+			    "Extent region count exceeded size");
+			assert_zu_ne(NREGS_READ(out), 0,
+			    "Extent region count must be positive");
+			assert_ptr_not_null(SLABCUR_READ(out),
+			    "Current slab is null");
+			assert_true(NFREE_READ(out) == 0
+			    || SLABCUR_READ(out) <= p,
+			    "Allocation should follow first fit principle");
+			if (config_stats) {
+				assert_zu_le(BIN_NFREE_READ(out),
+				    BIN_NREGS_READ(out),
+				    "Bin free count exceeded region count");
+				assert_zu_ne(BIN_NREGS_READ(out), 0,
+				    "Bin region count must be positive");
+				assert_zu_le(NFREE_READ(out),
+				    BIN_NFREE_READ(out),
+				    "Extent free count exceeded bin free count");
+				assert_zu_le(NREGS_READ(out),
+				    BIN_NREGS_READ(out),
+				    "Extent region count exceeded "
+				    "bin region count");
+				assert_zu_eq(BIN_NREGS_READ(out)
+				    % NREGS_READ(out), 0,
+				    "Bin region count isn't a multiple of "
+				    "extent region count");
+				assert_zu_le(
+				    BIN_NFREE_READ(out) - NFREE_READ(out),
+				    BIN_NREGS_READ(out) - NREGS_READ(out),
+				    "Free count in other extents in the bin "
+				    "exceeded region count in other extents "
+				    "in the bin");
+				assert_zu_le(NREGS_READ(out) - NFREE_READ(out),
+				    BIN_NREGS_READ(out) - BIN_NFREE_READ(out),
+				    "Extent utilized count exceeded "
+				    "bin utilized count");
+			}
+		} else {
+			assert_zu_eq(NFREE_READ(out), 0,
+			    "Extent free count should be zero");
+			assert_zu_eq(NREGS_READ(out), 1,
+			    "Extent region count should be one");
+			assert_ptr_null(SLABCUR_READ(out),
+			    "Current slab must be null for large size classes");
+			if (config_stats) {
+				assert_zu_eq(BIN_NFREE_READ(out), 0,
+				    "Bin free count must be zero for "
+				    "large sizes");
+				assert_zu_eq(BIN_NREGS_READ(out), 0,
+				    "Bin region count must be zero for "
+				    "large sizes");
+			}
+		}
 
 #undef BIN_NREGS_READ
 #undef BIN_NFREE_READ
@@ -104,42 +152,54 @@ TEST_BEGIN(test_query) {
 #undef COUNTS
 #undef SLABCUR_READ
 
-	free(out_ref);
-	free(out);
-	free(p);
+		free(out_ref);
+		free(out);
+		free(p);
+	}
 }
 TEST_END
 
 TEST_BEGIN(test_batch) {
-	void *p = mallocx(1, 0);
-	void *q = mallocx(1, 0);
-	void *in[] = {p, q};
-	size_t in_sz = sizeof(const void *) * 2;
-	size_t out[] = {-1, -1, -1, -1, -1, -1};
-	size_t out_sz = sizeof(size_t) * 6;
-	size_t out_ref[] = {-1, -1, -1, -1, -1, -1};
-	size_t out_sz_ref = out_sz;
+	size_t sz;
+	/*
+	 * Select some sizes that can span both small and large sizes, and are
+	 * numerically unrelated to any size boundaries.
+	 */
+	for (sz = 17; sz <= TEST_MAX_SIZE && sz <= SC_LARGE_MAXCLASS;
+	    sz += (sz <= SC_SMALL_MAXCLASS ? 1019 : 99991)) {
+		void *p = mallocx(sz, 0);
+		void *q = mallocx(sz, 0);
+		void *in[] = {p, q};
+		size_t in_sz = sizeof(const void *) * 2;
+		size_t out[] = {-1, -1, -1, -1, -1, -1};
+		size_t out_sz = sizeof(size_t) * 6;
+		size_t out_ref[] = {-1, -1, -1, -1, -1, -1};
+		size_t out_sz_ref = out_sz;
 
-	assert_ptr_not_null(p, "test pointer allocation failed");
-	assert_ptr_not_null(q, "test pointer allocation failed");
+		assert_ptr_not_null(p, "test pointer allocation failed");
+		assert_ptr_not_null(q, "test pointer allocation failed");
 
-	/* Test invalid argument(s) errors */
-	TEST_UTIL_BATCH_EINVAL(NULL, &out_sz, in, in_sz, "old is NULL");
-	TEST_UTIL_BATCH_EINVAL(out, NULL, in, in_sz, "oldlenp is NULL");
-	TEST_UTIL_BATCH_EINVAL(out, &out_sz, NULL, in_sz, "newp is NULL");
-	TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, 0, "newlen is zero");
-	in_sz -= 1;
-	TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, in_sz,
-	    "newlen is not an exact multiple");
-	in_sz += 1;
-	out_sz_ref = out_sz -= 2 * sizeof(size_t);
-	TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, in_sz,
-	    "*oldlenp is not an exact multiple");
-	out_sz_ref = out_sz += 2 * sizeof(size_t);
-	in_sz -= sizeof(const void *);
-	TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, in_sz,
-	    "*oldlenp and newlen do not match");
-	in_sz += sizeof(const void *);
+		/* Test invalid argument(s) errors */
+		TEST_UTIL_BATCH_EINVAL(NULL, &out_sz, in, in_sz,
+		    "old is NULL");
+		TEST_UTIL_BATCH_EINVAL(out, NULL, in, in_sz,
+		    "oldlenp is NULL");
+		TEST_UTIL_BATCH_EINVAL(out, &out_sz, NULL, in_sz,
+		    "newp is NULL");
+		TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, 0,
+		    "newlen is zero");
+		in_sz -= 1;
+		TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, in_sz,
+		    "newlen is not an exact multiple");
+		in_sz += 1;
+		out_sz_ref = out_sz -= 2 * sizeof(size_t);
+		TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, in_sz,
+		    "*oldlenp is not an exact multiple");
+		out_sz_ref = out_sz += 2 * sizeof(size_t);
+		in_sz -= sizeof(const void *);
+		TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, in_sz,
+		    "*oldlenp and newlen do not match");
+		in_sz += sizeof(const void *);
 
 	/* Examine output for valid calls */
 #define TEST_EQUAL_REF(i, message) \
@@ -149,29 +209,45 @@ TEST_BEGIN(test_batch) {
 #define NREGS_READ(out, i) out[(i) * 3 + 1]
 #define SIZE_READ(out, i) out[(i) * 3 + 2]
 
-	out_sz_ref = out_sz /= 2;
-	in_sz /= 2;
-	TEST_UTIL_BATCH_VALID;
-	assert_zu_le(NFREE_READ(out, 0), NREGS_READ(out, 0),
-	    "Extent free count exceeded region count");
-	assert_zu_le(NREGS_READ(out, 0), SIZE_READ(out, 0),
-	    "Extent region count exceeded size");
-	assert_zu_ne(NREGS_READ(out, 0), 0,
-	    "Extent region count must be positive");
-	assert_zu_ne(SIZE_READ(out, 0), 0, "Extent size must be positive");
-	TEST_EQUAL_REF(1, "Should not overwrite content beyond what's needed");
-	in_sz *= 2;
-	out_sz_ref = out_sz *= 2;
+		out_sz_ref = out_sz /= 2;
+		in_sz /= 2;
+		TEST_UTIL_BATCH_VALID;
+		assert_zu_le(sz, SIZE_READ(out, 0),
+		    "Extent size should be at least allocation size");
+		assert_zu_eq(SIZE_READ(out, 0) & (PAGE - 1), 0,
+		    "Extent size should be a multiple of page size");
+		if (sz <= SC_SMALL_MAXCLASS) {
+			assert_zu_le(NFREE_READ(out, 0), NREGS_READ(out, 0),
+			    "Extent free count exceeded region count");
+			assert_zu_le(NREGS_READ(out, 0), SIZE_READ(out, 0),
+			    "Extent region count exceeded size");
+			assert_zu_ne(NREGS_READ(out, 0), 0,
+			    "Extent region count must be positive");
+		} else {
+			assert_zu_eq(NFREE_READ(out, 0), 0,
+			    "Extent free count should be zero");
+			assert_zu_eq(NREGS_READ(out, 0), 1,
+			    "Extent region count should be one");
+		}
+		TEST_EQUAL_REF(1,
+		    "Should not overwrite content beyond what's needed");
+		in_sz *= 2;
+		out_sz_ref = out_sz *= 2;
 
-	memcpy(out_ref, out, 3 * sizeof(size_t));
-	TEST_UTIL_BATCH_VALID;
-	TEST_EQUAL_REF(0, "Statistics should be stable across calls");
-	assert_zu_le(NFREE_READ(out, 1), NREGS_READ(out, 1),
-	    "Extent free count exceeded region count");
-	assert_zu_eq(NREGS_READ(out, 0), NREGS_READ(out, 1),
-	    "Extent region count should be same for same region size");
-	assert_zu_eq(SIZE_READ(out, 0), SIZE_READ(out, 1),
-	    "Extent size should be same for same region size");
+		memcpy(out_ref, out, 3 * sizeof(size_t));
+		TEST_UTIL_BATCH_VALID;
+		TEST_EQUAL_REF(0, "Statistics should be stable across calls");
+		if (sz <= SC_SMALL_MAXCLASS) {
+			assert_zu_le(NFREE_READ(out, 1), NREGS_READ(out, 1),
+			    "Extent free count exceeded region count");
+		} else {
+			assert_zu_eq(NFREE_READ(out, 0), 0,
+			    "Extent free count should be zero");
+		}
+		assert_zu_eq(NREGS_READ(out, 0), NREGS_READ(out, 1),
+		    "Extent region count should be same for same region size");
+		assert_zu_eq(SIZE_READ(out, 0), SIZE_READ(out, 1),
+		    "Extent size should be same for same region size");
 
 #undef SIZE_READ
 #undef NREGS_READ
@@ -179,12 +255,15 @@ TEST_BEGIN(test_batch) {
 
 #undef TEST_EQUAL_REF
 
-	free(q);
-	free(p);
+		free(q);
+		free(p);
+	}
 }
 TEST_END
 
 int
 main(void) {
+	assert_zu_lt(SC_SMALL_MAXCLASS, TEST_MAX_SIZE,
+	    "Test case cannot cover large classes");
 	return test(test_query, test_batch);
 }

From c92ac306013bc95cd5f34de421b1aa5eb1f28971 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 30 Apr 2019 13:54:00 -0700
Subject: [PATCH 1304/2608] Add confirm_conf option

If the confirm_conf option is set, when the program starts, each of
the four malloc_conf strings will be printed, and each option will
be printed when being set.
---
 doc/jemalloc.xml.in                           |  17 +
 .../internal/jemalloc_internal_externs.h      |   1 +
 src/ctl.c                                     |   3 +
 src/jemalloc.c                                | 296 +++++++++++-------
 src/stats.c                                   |   1 +
 test/unit/mallctl.c                           |   1 +
 6 files changed, 204 insertions(+), 115 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 04a47648..194f1efc 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -904,6 +904,23 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         </para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.confirm_conf">
+        <term>
+          <mallctl>opt.confirm_conf</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+        </term>
+	<listitem><para>Confirm-runtime-options-when-program-starts
+	enabled/disabled.  If true, the string specified via
+	<option>--with-malloc-conf</option>, the string pointed to by the
+	global variable <varname>malloc_conf</varname>, the <quote>name</quote>
+	of the file referenced by the symbolic link named
+	<filename class="symlink">/etc/malloc.conf</filename>, and the value of
+	the environment variable <envar>MALLOC_CONF</envar>, will be printed in
+	order.  Then, each option being set will be individually printed.  This
+	option is disabled by default.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="opt.abort_conf">
         <term>
           <mallctl>opt.abort_conf</mallctl>
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index cdbc33a2..d291170b 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -10,6 +10,7 @@ extern bool malloc_slow;
 /* Run-time options. */
 extern bool opt_abort;
 extern bool opt_abort_conf;
+extern bool opt_confirm_conf;
 extern const char *opt_junk;
 extern bool opt_junk_alloc;
 extern bool opt_junk_free;
diff --git a/src/ctl.c b/src/ctl.c
index 176cb65f..271881e8 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -81,6 +81,7 @@ CTL_PROTO(config_utrace)
 CTL_PROTO(config_xmalloc)
 CTL_PROTO(opt_abort)
 CTL_PROTO(opt_abort_conf)
+CTL_PROTO(opt_confirm_conf)
 CTL_PROTO(opt_metadata_thp)
 CTL_PROTO(opt_retain)
 CTL_PROTO(opt_dss)
@@ -304,6 +305,7 @@ static const ctl_named_node_t	config_node[] = {
 static const ctl_named_node_t opt_node[] = {
 	{NAME("abort"),		CTL(opt_abort)},
 	{NAME("abort_conf"),	CTL(opt_abort_conf)},
+	{NAME("confirm_conf"),	CTL(opt_confirm_conf)},
 	{NAME("metadata_thp"),	CTL(opt_metadata_thp)},
 	{NAME("retain"),	CTL(opt_retain)},
 	{NAME("dss"),		CTL(opt_dss)},
@@ -1741,6 +1743,7 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool)
 
 CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
 CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
+CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
 CTL_RO_NL_GEN(opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp],
     const char *)
 CTL_RO_NL_GEN(opt_retain, opt_retain, bool)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index ec6b400a..1e99a591 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -43,6 +43,8 @@ bool	opt_abort_conf =
     false
 #endif
     ;
+/* Intentionally default off, even with debug builds. */
+bool	opt_confirm_conf = false;
 const char	*opt_junk =
 #if (defined(JEMALLOC_DEBUG) && defined(JEMALLOC_FILL))
     "true"
@@ -929,93 +931,140 @@ malloc_slow_flag_init(void) {
 	malloc_slow = (malloc_slow_flags != 0);
 }
 
+/* Number of sources for initializing malloc_conf */
+#define MALLOC_CONF_NSOURCES 4
+
+static const char *
+obtain_malloc_conf(unsigned which_source, char buf[PATH_MAX + 1]) {
+	if (config_debug) {
+		static unsigned read_source = 0;
+		/*
+		 * Each source should only be read once, to minimize # of
+		 * syscalls on init.
+		 */
+		assert(read_source++ == which_source);
+	}
+	assert(which_source < MALLOC_CONF_NSOURCES);
+
+	const char *ret;
+	switch (which_source) {
+	case 0:
+		ret = config_malloc_conf;
+		break;
+	case 1:
+		if (je_malloc_conf != NULL) {
+			/* Use options that were compiled into the program. */
+			ret = je_malloc_conf;
+		} else {
+			/* No configuration specified. */
+			ret = NULL;
+		}
+		break;
+	case 2: {
+		ssize_t linklen = 0;
+#ifndef _WIN32
+		int saved_errno = errno;
+		const char *linkname =
+#  ifdef JEMALLOC_PREFIX
+		    "/etc/"JEMALLOC_PREFIX"malloc.conf"
+#  else
+		    "/etc/malloc.conf"
+#  endif
+		    ;
+
+		/*
+		 * Try to use the contents of the "/etc/malloc.conf" symbolic
+		 * link's name.
+		 */
+#ifndef JEMALLOC_READLINKAT
+		linklen = readlink(linkname, buf, PATH_MAX);
+#else
+		linklen = readlinkat(AT_FDCWD, linkname, buf, PATH_MAX);
+#endif
+		if (linklen == -1) {
+			/* No configuration specified. */
+			linklen = 0;
+			/* Restore errno. */
+			set_errno(saved_errno);
+		}
+#endif
+		buf[linklen] = '\0';
+		ret = buf;
+		break;
+	} case 3: {
+		const char *envname =
+#ifdef JEMALLOC_PREFIX
+		    JEMALLOC_CPREFIX"MALLOC_CONF"
+#else
+		    "MALLOC_CONF"
+#endif
+		    ;
+
+		if ((ret = jemalloc_secure_getenv(envname)) != NULL) {
+			/*
+			 * Do nothing; opts is already initialized to the value
+			 * of the MALLOC_CONF environment variable.
+			 */
+		} else {
+			/* No configuration specified. */
+			ret = NULL;
+		}
+		break;
+	} default:
+		not_reached();
+		ret = NULL;
+	}
+	return ret;
+}
+
 static void
-malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
+malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
+    bool initial_call, const char *opts_cache[MALLOC_CONF_NSOURCES],
+    char buf[PATH_MAX + 1]) {
+	static const char *opts_explain[MALLOC_CONF_NSOURCES] = {
+		"string specified via --with-malloc-conf",
+		"string pointed to by the global variable malloc_conf",
+		"\"name\" of the file referenced by the symbolic link named "
+		    "/etc/malloc.conf",
+		"value of the environment variable MALLOC_CONF"
+	};
 	unsigned i;
-	char buf[PATH_MAX + 1];
 	const char *opts, *k, *v;
 	size_t klen, vlen;
 
-	for (i = 0; i < 4; i++) {
+	for (i = 0; i < MALLOC_CONF_NSOURCES; i++) {
 		/* Get runtime configuration. */
-		switch (i) {
-		case 0:
-			opts = config_malloc_conf;
-			break;
-		case 1:
-			if (je_malloc_conf != NULL) {
-				/*
-				 * Use options that were compiled into the
-				 * program.
-				 */
-				opts = je_malloc_conf;
-			} else {
-				/* No configuration specified. */
-				buf[0] = '\0';
-				opts = buf;
-			}
-			break;
-		case 2: {
-			ssize_t linklen = 0;
-#ifndef _WIN32
-			int saved_errno = errno;
-			const char *linkname =
-#  ifdef JEMALLOC_PREFIX
-			    "/etc/"JEMALLOC_PREFIX"malloc.conf"
-#  else
-			    "/etc/malloc.conf"
-#  endif
-			    ;
-
-			/*
-			 * Try to use the contents of the "/etc/malloc.conf"
-			 * symbolic link's name.
-			 */
-#ifndef JEMALLOC_READLINKAT
-			linklen = readlink(linkname, buf, sizeof(buf) - 1);
-#else
-			linklen = readlinkat(AT_FDCWD, linkname, buf,
-			    sizeof(buf) - 1);
-#endif
-			if (linklen == -1) {
-				/* No configuration specified. */
-				linklen = 0;
-				/* Restore errno. */
-				set_errno(saved_errno);
-			}
-#endif
-			buf[linklen] = '\0';
-			opts = buf;
-			break;
-		} case 3: {
-			const char *envname =
-#ifdef JEMALLOC_PREFIX
-			    JEMALLOC_CPREFIX"MALLOC_CONF"
-#else
-			    "MALLOC_CONF"
-#endif
-			    ;
-
-			if ((opts = jemalloc_secure_getenv(envname)) != NULL) {
-				/*
-				 * Do nothing; opts is already initialized to
-				 * the value of the MALLOC_CONF environment
-				 * variable.
-				 */
-			} else {
-				/* No configuration specified. */
-				buf[0] = '\0';
-				opts = buf;
-			}
-			break;
-		} default:
-			not_reached();
-			buf[0] = '\0';
-			opts = buf;
+		if (initial_call) {
+			opts_cache[i] = obtain_malloc_conf(i, buf);
+		}
+		opts = opts_cache[i];
+		if (!initial_call && opt_confirm_conf) {
+			malloc_printf(
+			    "<jemalloc>: malloc_conf #%u (%s): \"%s\"\n",
+			    i + 1, opts_explain[i], opts != NULL ? opts : "");
+		}
+		if (opts == NULL) {
+			continue;
 		}
 
 		while (*opts != '\0' && !malloc_conf_next(&opts, &k, &klen, &v,
 		    &vlen)) {
+
+#define CONF_ERROR(msg, k, klen, v, vlen)				\
+			if (!initial_call) {				\
+				malloc_conf_error(			\
+				    msg, k, klen, v, vlen);		\
+				cur_opt_valid = false;			\
+			}
+#define CONF_CONTINUE	{						\
+				if (!initial_call && opt_confirm_conf	\
+				    && cur_opt_valid) {			\
+					malloc_printf("<jemalloc>: Set "\
+					    "conf value: %.*s:%.*s\n",	\
+					    (int)klen, k, (int)vlen, v);\
+				}					\
+				continue;				\
+			}
 #define CONF_MATCH(n)							\
 	(sizeof(n)-1 == klen && strncmp(n, k, klen) == 0)
 #define CONF_MATCH_VALUE(n)						\
@@ -1027,11 +1076,10 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 				} else if (CONF_MATCH_VALUE("false")) {	\
 					o = false;			\
 				} else {				\
-					malloc_conf_error(		\
-					    "Invalid conf value",	\
+					CONF_ERROR("Invalid conf value",\
 					    k, klen, v, vlen);		\
 				}					\
-				continue;				\
+				CONF_CONTINUE;				\
 			}
       /*
        * One of the CONF_MIN macros below expands, in one of the use points,
@@ -1054,8 +1102,7 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 				um = malloc_strtoumax(v, &end, 0);	\
 				if (get_errno() != 0 || (uintptr_t)end -\
 				    (uintptr_t)v != vlen) {		\
-					malloc_conf_error(		\
-					    "Invalid conf value",	\
+					CONF_ERROR("Invalid conf value",\
 					    k, klen, v, vlen);		\
 				} else if (clip) {			\
 					if (check_min(um, (t)(min))) {	\
@@ -1069,7 +1116,7 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 				} else {				\
 					if (check_min(um, (t)(min)) ||	\
 					    check_max(um, (t)(max))) {	\
-						malloc_conf_error(	\
+						CONF_ERROR(		\
 						    "Out-of-range "	\
 						    "conf value",	\
 						    k, klen, v, vlen);	\
@@ -1077,7 +1124,7 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 						o = (t)um;		\
 					}				\
 				}					\
-				continue;				\
+				CONF_CONTINUE;				\
 			}
 #define CONF_HANDLE_UNSIGNED(o, n, min, max, check_min, check_max,	\
     clip)								\
@@ -1095,18 +1142,17 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 				l = strtol(v, &end, 0);			\
 				if (get_errno() != 0 || (uintptr_t)end -\
 				    (uintptr_t)v != vlen) {		\
-					malloc_conf_error(		\
-					    "Invalid conf value",	\
+					CONF_ERROR("Invalid conf value",\
 					    k, klen, v, vlen);		\
 				} else if (l < (ssize_t)(min) || l >	\
 				    (ssize_t)(max)) {			\
-					malloc_conf_error(		\
+					CONF_ERROR(			\
 					    "Out-of-range conf value",	\
 					    k, klen, v, vlen);		\
 				} else {				\
 					o = l;				\
 				}					\
-				continue;				\
+				CONF_CONTINUE;				\
 			}
 #define CONF_HANDLE_CHAR_P(o, n, d)					\
 			if (CONF_MATCH(n)) {				\
@@ -1115,7 +1161,14 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 				    sizeof(o)-1;			\
 				strncpy(o, v, cpylen);			\
 				o[cpylen] = '\0';			\
-				continue;				\
+				CONF_CONTINUE;				\
+			}
+
+			bool cur_opt_valid = true;
+
+			CONF_HANDLE_BOOL(opt_confirm_conf, "confirm_conf")
+			if (initial_call) {
+				continue;
 			}
 
 			CONF_HANDLE_BOOL(opt_abort, "abort")
@@ -1132,10 +1185,10 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 					}
 				}
 				if (!match) {
-					malloc_conf_error("Invalid conf value",
+					CONF_ERROR("Invalid conf value",
 					    k, klen, v, vlen);
 				}
-				continue;
+				CONF_CONTINUE;
 			}
 			CONF_HANDLE_BOOL(opt_retain, "retain")
 			if (strncmp("dss", k, klen) == 0) {
@@ -1145,7 +1198,7 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 					if (strncmp(dss_prec_names[i], v, vlen)
 					    == 0) {
 						if (extent_dss_prec_set(i)) {
-							malloc_conf_error(
+							CONF_ERROR(
 							    "Error setting dss",
 							    k, klen, v, vlen);
 						} else {
@@ -1157,10 +1210,10 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 					}
 				}
 				if (!match) {
-					malloc_conf_error("Invalid conf value",
+					CONF_ERROR("Invalid conf value",
 					    k, klen, v, vlen);
 				}
-				continue;
+				CONF_CONTINUE;
 			}
 			CONF_HANDLE_UNSIGNED(opt_narenas, "narenas", 1,
 			    UINT_MAX, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX,
@@ -1178,14 +1231,14 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 					if (err || bin_update_shard_size(
 					    bin_shard_sizes, size_start,
 					    size_end, nshards)) {
-						malloc_conf_error(
+						CONF_ERROR(
 						    "Invalid settings for "
 						    "bin_shards", k, klen, v,
 						    vlen);
 						break;
 					}
 				} while (vlen_left > 0);
-				continue;
+				CONF_CONTINUE;
 			}
 			CONF_HANDLE_SSIZE_T(opt_dirty_decay_ms,
 			    "dirty_decay_ms", -1, NSTIME_SEC_MAX * KQU(1000) <
@@ -1198,7 +1251,7 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 			CONF_HANDLE_BOOL(opt_stats_print, "stats_print")
 			if (CONF_MATCH("stats_print_opts")) {
 				init_opt_stats_print_opts(v, vlen);
-				continue;
+				CONF_CONTINUE;
 			}
 			if (config_fill) {
 				if (CONF_MATCH("junk")) {
@@ -1219,11 +1272,11 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 						opt_junk_alloc = false;
 						opt_junk_free = true;
 					} else {
-						malloc_conf_error(
-						    "Invalid conf value", k,
-						    klen, v, vlen);
+						CONF_ERROR(
+						    "Invalid conf value",
+						    k, klen, v, vlen);
 					}
-					continue;
+					CONF_CONTINUE;
 				}
 				CONF_HANDLE_BOOL(opt_zero, "zero")
 			}
@@ -1260,7 +1313,7 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 					if (strncmp(percpu_arena_mode_names[i],
 					    v, vlen) == 0) {
 						if (!have_percpu_arena) {
-							malloc_conf_error(
+							CONF_ERROR(
 							    "No getcpu support",
 							    k, klen, v, vlen);
 						}
@@ -1270,10 +1323,10 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 					}
 				}
 				if (!match) {
-					malloc_conf_error("Invalid conf value",
+					CONF_ERROR("Invalid conf value",
 					    k, klen, v, vlen);
 				}
-				continue;
+				CONF_CONTINUE;
 			}
 			CONF_HANDLE_BOOL(opt_background_thread,
 			    "background_thread");
@@ -1299,13 +1352,12 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 						    sc_data, slab_start,
 						    slab_end, (int)pgs);
 					} else {
-						malloc_conf_error(
-						    "Invalid settings for "
-						    "slab_sizes", k, klen, v,
-						    vlen);
+						CONF_ERROR("Invalid settings "
+						    "for slab_sizes",
+						    k, klen, v, vlen);
 					}
 				} while (!err && vlen_left > 0);
-				continue;
+				CONF_CONTINUE;
 			}
 			if (config_prof) {
 				CONF_HANDLE_BOOL(opt_prof, "prof")
@@ -1334,7 +1386,7 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 					    vlen : sizeof(log_var_names) - 1);
 					strncpy(log_var_names, v, cpylen);
 					log_var_names[cpylen] = '\0';
-					continue;
+					CONF_CONTINUE;
 				}
 			}
 			if (CONF_MATCH("thp")) {
@@ -1343,7 +1395,7 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 					if (strncmp(thp_mode_names[i],v, vlen)
 					    == 0) {
 						if (!have_madvise_huge) {
-							malloc_conf_error(
+							CONF_ERROR(
 							    "No THP support",
 							    k, klen, v, vlen);
 						}
@@ -1353,13 +1405,14 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 					}
 				}
 				if (!match) {
-					malloc_conf_error("Invalid conf value",
+					CONF_ERROR("Invalid conf value",
 					    k, klen, v, vlen);
 				}
-				continue;
+				CONF_CONTINUE;
 			}
-			malloc_conf_error("Invalid conf pair", k, klen, v,
-			    vlen);
+			CONF_ERROR("Invalid conf pair", k, klen, v, vlen);
+#undef CONF_ERROR
+#undef CONF_CONTINUE
 #undef CONF_MATCH
 #undef CONF_MATCH_VALUE
 #undef CONF_HANDLE_BOOL
@@ -1382,6 +1435,19 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 	atomic_store_b(&log_init_done, true, ATOMIC_RELEASE);
 }
 
+static void
+malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
+	const char *opts_cache[MALLOC_CONF_NSOURCES] = {NULL, NULL, NULL, NULL};
+	char buf[PATH_MAX + 1];
+
+	/* The first call only set the confirm_conf option and opts_cache */
+	malloc_conf_init_helper(NULL, NULL, true, opts_cache, buf);
+	malloc_conf_init_helper(sc_data, bin_shard_sizes, false, opts_cache,
+	    NULL);
+}
+
+#undef MALLOC_CONF_NSOURCES
+
 static bool
 malloc_init_hard_needed(void) {
 	if (malloc_initialized() || (IS_INITIALIZER && malloc_init_state ==
diff --git a/src/stats.c b/src/stats.c
index 55a59994..bce9f458 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1065,6 +1065,7 @@ stats_general_print(emitter_t *emitter) {
 
 	OPT_WRITE_BOOL("abort")
 	OPT_WRITE_BOOL("abort_conf")
+	OPT_WRITE_BOOL("confirm_conf")
 	OPT_WRITE_BOOL("retain")
 	OPT_WRITE_CHAR_P("dss")
 	OPT_WRITE_UNSIGNED("narenas")
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 498f9e06..3a75ac04 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -159,6 +159,7 @@ TEST_BEGIN(test_mallctl_opt) {
 
 	TEST_MALLCTL_OPT(bool, abort, always);
 	TEST_MALLCTL_OPT(bool, abort_conf, always);
+	TEST_MALLCTL_OPT(bool, confirm_conf, always);
 	TEST_MALLCTL_OPT(const char *, metadata_thp, always);
 	TEST_MALLCTL_OPT(bool, retain, always);
 	TEST_MALLCTL_OPT(const char *, dss, always);

From e13cf65a5f37bbd9b44badb198ccc138cbacc219 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 20 May 2019 13:41:43 -0700
Subject: [PATCH 1305/2608] Add experimental.arenas.i.pactivep.

The new experimental mallctl exposes the arena pactive counter to applications,
which allows fast read w/o going through the mallctl / epoch steps.  This is
particularly useful when frequent balancing is required, e.g. when having
multiple manual arenas, and threads are multiplexed to them based on usage.
---
 src/ctl.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 82 insertions(+), 7 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index 271881e8..1d830874 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -225,6 +225,8 @@ CTL_PROTO(experimental_hooks_install)
 CTL_PROTO(experimental_hooks_remove)
 CTL_PROTO(experimental_utilization_query)
 CTL_PROTO(experimental_utilization_batch_query)
+CTL_PROTO(experimental_arenas_i_pactivep)
+INDEX_PROTO(experimental_arenas_i)
 
 #define MUTEX_STATS_CTL_PROTO_GEN(n)					\
 CTL_PROTO(stats_##n##_num_ops)						\
@@ -588,19 +590,31 @@ static const ctl_named_node_t stats_node[] = {
 	{NAME("arenas"),	CHILD(indexed, stats_arenas)}
 };
 
-static const ctl_named_node_t hooks_node[] = {
+static const ctl_named_node_t experimental_hooks_node[] = {
 	{NAME("install"),	CTL(experimental_hooks_install)},
 	{NAME("remove"),	CTL(experimental_hooks_remove)}
 };
 
-static const ctl_named_node_t utilization_node[] = {
+static const ctl_named_node_t experimental_utilization_node[] = {
 	{NAME("query"),		CTL(experimental_utilization_query)},
 	{NAME("batch_query"),	CTL(experimental_utilization_batch_query)}
 };
 
+static const ctl_named_node_t experimental_arenas_i_node[] = {
+	{NAME("pactivep"),	CTL(experimental_arenas_i_pactivep)}
+};
+static const ctl_named_node_t super_experimental_arenas_i_node[] = {
+	{NAME(""),		CHILD(named, experimental_arenas_i)}
+};
+
+static const ctl_indexed_node_t experimental_arenas_node[] = {
+	{INDEX(experimental_arenas_i)}
+};
+
 static const ctl_named_node_t experimental_node[] = {
-	{NAME("hooks"),		CHILD(named, hooks)},
-	{NAME("utilization"),	CHILD(named, utilization)}
+	{NAME("hooks"),		CHILD(named, experimental_hooks)},
+	{NAME("utilization"),	CHILD(named, experimental_utilization)},
+	{NAME("arenas"),	CHILD(indexed, experimental_arenas)}
 };
 
 static const ctl_named_node_t	root_node[] = {
@@ -3068,15 +3082,23 @@ stats_arenas_i_extents_j_index(tsdn_t *tsdn, const size_t *mib,
 	return super_stats_arenas_i_extents_j_node;
 }
 
+static bool
+ctl_arenas_i_verify(size_t i) {
+	size_t a = arenas_i2a_impl(i, true, true);
+	if (a == UINT_MAX || !ctl_arenas->arenas[a]->initialized) {
+		return true;
+	}
+
+	return false;
+}
+
 static const ctl_named_node_t *
 stats_arenas_i_index(tsdn_t *tsdn, const size_t *mib,
     size_t miblen, size_t i) {
 	const ctl_named_node_t *ret;
-	size_t a;
 
 	malloc_mutex_lock(tsdn, &ctl_mtx);
-	a = arenas_i2a_impl(i, true, true);
-	if (a == UINT_MAX || !ctl_arenas->arenas[a]->initialized) {
+	if (ctl_arenas_i_verify(i)) {
 		ret = NULL;
 		goto label_return;
 	}
@@ -3351,3 +3373,56 @@ experimental_utilization_batch_query_ctl(tsd_t *tsd, const size_t *mib,
 label_return:
 	return ret;
 }
+
+static const ctl_named_node_t *
+experimental_arenas_i_index(tsdn_t *tsdn, const size_t *mib,
+    size_t miblen, size_t i) {
+	const ctl_named_node_t *ret;
+
+	malloc_mutex_lock(tsdn, &ctl_mtx);
+	if (ctl_arenas_i_verify(i)) {
+		ret = NULL;
+		goto label_return;
+	}
+	ret = super_experimental_arenas_i_node;
+label_return:
+	malloc_mutex_unlock(tsdn, &ctl_mtx);
+	return ret;
+}
+
+static int
+experimental_arenas_i_pactivep_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	if (!config_stats) {
+		return ENOENT;
+	}
+	if (oldp == NULL || oldlenp == NULL || *oldlenp != sizeof(size_t *)) {
+		return EINVAL;
+	}
+
+	unsigned arena_ind;
+	arena_t *arena;
+	int ret;
+	size_t *pactivep;
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
+	READONLY();
+	MIB_UNSIGNED(arena_ind, 2);
+	if (arena_ind < narenas_total_get() && (arena =
+	    arena_get(tsd_tsdn(tsd), arena_ind, false)) != NULL) {
+#if defined(JEMALLOC_GCC_ATOMIC_ATOMICS) ||				\
+    defined(JEMALLOC_GCC_SYNC_ATOMICS) || defined(_MSC_VER)
+		/* Expose the underlying counter for fast read. */
+		pactivep = (size_t *)&(arena->nactive.repr);
+		READ(pactivep, size_t *);
+		ret = 0;
+#else
+		ret = EFAULT;
+#endif
+	} else {
+		ret = EFAULT;
+	}
+label_return:
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
+	return ret;
+}

From 1a71533511027dbe3f9d989659efeec446915d6b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 22 May 2019 10:21:53 -0700
Subject: [PATCH 1306/2608] Avoid blocking on background thread lock for stats.

Background threads may run for a long time, especially when the # of dirty pages
is high.  Avoid blocking stats calls because of this (which may cause latency
spikes).
---
 src/background_thread.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/background_thread.c b/src/background_thread.c
index 5ed6c1c9..57b9b256 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -799,7 +799,13 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
 	nstime_init(&stats->run_interval, 0);
 	for (unsigned i = 0; i < max_background_threads; i++) {
 		background_thread_info_t *info = &background_thread_info[i];
-		malloc_mutex_lock(tsdn, &info->mtx);
+		if (malloc_mutex_trylock(tsdn, &info->mtx)) {
+			/*
+			 * Each background thread run may take a long time;
+			 * avoid waiting on the stats if the thread is active.
+			 */
+			continue;
+		}
 		if (info->state != background_thread_stopped) {
 			num_runs += info->tot_n_runs;
 			nstime_add(&stats->run_interval, &info->tot_sleep_time);

From 40a3435b8dc225ad61329aca89d9c8d0dfbc03ab Mon Sep 17 00:00:00 2001
From: frederik-h <frederik-h@users.noreply.github.com>
Date: Fri, 24 May 2019 11:36:21 +0200
Subject: [PATCH 1307/2608] Add missing safety_check.c to MSBuild projects

The file is included in the list of source files in Makefile.in,
but it is missing from the project files. This causes the
build to fail due to unresolved symbols.
---
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj         | 3 ++-
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters | 5 ++++-
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj         | 3 ++-
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters | 5 ++++-
 4 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index ddc6781c..228e8be0 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -66,6 +66,7 @@
     <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
     <ClCompile Include="..\..\..\..\src\witness.c" />
+    <ClCompile Include="..\..\..\..\src\safety_check.c" />
   </ItemGroup>
   <PropertyGroup Label="Globals">
     <ProjectGuid>{8D6BB292-9E1C-413D-9F98-4864BDC1514A}</ProjectGuid>
@@ -346,4 +347,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 1dcf4ed5..d839515b 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -100,5 +100,8 @@
     <ClCompile Include="..\..\..\..\src\div.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\safety_check.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 21481d5e..edcceede 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -67,6 +67,7 @@
     <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
     <ClCompile Include="..\..\..\..\src\witness.c" />
+    <ClCompile Include="..\..\..\..\src\safety_check.c" />
   </ItemGroup>
   <PropertyGroup Label="Globals">
     <ProjectGuid>{8D6BB292-9E1C-413D-9F98-4864BDC1514A}</ProjectGuid>
@@ -346,4 +347,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 466dc63f..6df72601 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -103,5 +103,8 @@
     <ClCompile Include="..\..\..\..\src\test_hooks.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\safety_check.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>

From 7720b6e3851d200449914448c7163f7af92cd63f Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 3 Jul 2019 16:48:47 -0700
Subject: [PATCH 1308/2608] Fix redzone setting and checking

---
 include/jemalloc/internal/safety_check.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/safety_check.h b/include/jemalloc/internal/safety_check.h
index 1b53fc4c..53339ac1 100644
--- a/include/jemalloc/internal/safety_check.h
+++ b/include/jemalloc/internal/safety_check.h
@@ -9,7 +9,7 @@ JEMALLOC_ALWAYS_INLINE void
 safety_check_set_redzone(void *ptr, size_t usize, size_t bumped_usize) {
 	assert(usize < bumped_usize);
 	for (size_t i = usize; i < bumped_usize && i < usize + 32; ++i) {
-		*((unsigned char *)ptr + usize) = 0xBC;
+		*((unsigned char *)ptr + i) = 0xBC;
 	}
 }
 
@@ -17,7 +17,7 @@ JEMALLOC_ALWAYS_INLINE void
 safety_check_verify_redzone(const void *ptr, size_t usize, size_t bumped_usize)
 {
 	for (size_t i = usize; i < bumped_usize && i < usize + 32; ++i) {
-		if (unlikely(*((unsigned char *)ptr + usize) != 0xBC)) {
+		if (unlikely(*((unsigned char *)ptr + i) != 0xBC)) {
 			safety_check_fail("Use after free error\n");
 		}
 	}

From 34e75630cc512423b4f227338056a2f5d7e81740 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Sat, 13 Jul 2019 20:27:15 -0700
Subject: [PATCH 1309/2608] Reorder the configs for AppVeyor.

Enable-debug and 64-bit runs tend to be more relevant. 	Run them first.
---
 .appveyor.yml | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index 9a7d00a9..90b03688 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -5,27 +5,27 @@ environment:
   - MSYSTEM: MINGW64
     CPU: x86_64
     MSVC: amd64
+    CONFIG_FLAGS: --enable-debug
+  - MSYSTEM: MINGW64
+    CPU: x86_64
+    CONFIG_FLAGS: --enable-debug
   - MSYSTEM: MINGW32
     CPU: i686
     MSVC: x86
-  - MSYSTEM: MINGW64
-    CPU: x86_64
+    CONFIG_FLAGS: --enable-debug
   - MSYSTEM: MINGW32
     CPU: i686
+    CONFIG_FLAGS: --enable-debug
   - MSYSTEM: MINGW64
     CPU: x86_64
     MSVC: amd64
-    CONFIG_FLAGS: --enable-debug
+  - MSYSTEM: MINGW64
+    CPU: x86_64
   - MSYSTEM: MINGW32
     CPU: i686
     MSVC: x86
-    CONFIG_FLAGS: --enable-debug
-  - MSYSTEM: MINGW64
-    CPU: x86_64
-    CONFIG_FLAGS: --enable-debug
   - MSYSTEM: MINGW32
     CPU: i686
-    CONFIG_FLAGS: --enable-debug
 
 install:
   - set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH%

From d26636d566167a439ea18da7a234f9040668023b Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 4 Jun 2019 11:13:00 -0700
Subject: [PATCH 1310/2608] Fix logic in printing

`cbopaque` can now be overriden without overriding `write_cb` in
the first place.  (Otherwise there would be no need to have the
`cbopaque` parameter in `malloc_message`.)
---
 doc/jemalloc.xml.in                   | 2 +-
 include/jemalloc/internal/malloc_io.h | 2 +-
 src/malloc_io.c                       | 1 -
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 194f1efc..e23ccb7f 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -424,7 +424,7 @@ for (i = 0; i < nbins; i++) {
       called repeatedly.  General information that never changes during
       execution can be omitted by specifying <quote>g</quote> as a character
       within the <parameter>opts</parameter> string.  Note that
-      <function>malloc_message()</function> uses the
+      <function>malloc_stats_print()</function> uses the
       <function>mallctl*()</function> functions internally, so inconsistent
       statistics can be reported if multiple threads use these functions
       simultaneously.  If <option>--enable-stats</option> is specified during
diff --git a/include/jemalloc/internal/malloc_io.h b/include/jemalloc/internal/malloc_io.h
index bfe556b5..1d1a414e 100644
--- a/include/jemalloc/internal/malloc_io.h
+++ b/include/jemalloc/internal/malloc_io.h
@@ -54,7 +54,7 @@ size_t malloc_vsnprintf(char *str, size_t size, const char *format,
 size_t malloc_snprintf(char *str, size_t size, const char *format, ...)
     JEMALLOC_FORMAT_PRINTF(3, 4);
 /*
- * The caller can set write_cb and cbopaque to null to choose to print with the
+ * The caller can set write_cb to null to choose to print with the
  * je_malloc_message hook.
  */
 void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
diff --git a/src/malloc_io.c b/src/malloc_io.c
index dd882651..d7cb0f52 100644
--- a/src/malloc_io.c
+++ b/src/malloc_io.c
@@ -632,7 +632,6 @@ malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
 		 */
 		write_cb = (je_malloc_message != NULL) ? je_malloc_message :
 		    wrtmessage;
-		cbopaque = NULL;
 	}
 
 	malloc_vsnprintf(buf, sizeof(buf), format, ap);

From e0a0c8d4bf512283e8c85fb4a51761fce5e0c08f Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 4 Jun 2019 16:34:29 -0700
Subject: [PATCH 1311/2608] Fix a bug in prof_dump_write

The original logic can be disastrous if `PROF_DUMP_BUFSIZE` is less
than `slen` -- `prof_dump_buf_end + slen <= PROF_DUMP_BUFSIZE` would
always be `false`, so `memcpy` would always try to copy
`PROF_DUMP_BUFSIZE - prof_dump_buf_end` chars, which can be
dangerous: in the last round of the `while` loop it would not only
illegally read the memory beyond `s` (which might not always be
disastrous), but it would also illegally overwrite the memory beyond
`prof_dump_buf` (which can be pretty disastrous).  `slen` probably
has never gone beyond `PROF_DUMP_BUFSIZE` so we were just lucky.
---
 src/prof.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/prof.c b/src/prof.c
index a4e30f42..4ebe2799 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -1292,7 +1292,7 @@ prof_dump_write(bool propagate_err, const char *s) {
 			}
 		}
 
-		if (prof_dump_buf_end + slen <= PROF_DUMP_BUFSIZE) {
+		if (prof_dump_buf_end + slen - i <= PROF_DUMP_BUFSIZE) {
 			/* Finish writing. */
 			n = slen - i;
 		} else {

From a2a693e722d3ec0f0fb7dfcac54e775b1837efda Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 5 Jun 2019 15:26:08 -0700
Subject: [PATCH 1312/2608] Remove prof_accumbytes in arena

`prof_accumbytes` was supposed to be replaced by `prof_accum` in
https://github.com/jemalloc/jemalloc/pull/623.
---
 include/jemalloc/internal/arena_structs_b.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs_b.h
index 950bd13c..eeab57fd 100644
--- a/include/jemalloc/internal/arena_structs_b.h
+++ b/include/jemalloc/internal/arena_structs_b.h
@@ -116,7 +116,6 @@ struct arena_s {
 
 	/* Synchronization: internal. */
 	prof_accum_t		prof_accum;
-	uint64_t		prof_accumbytes;
 
 	/*
 	 * PRNG state for cache index randomization of large allocation base

From f32f23d6cc3ac9e663983ae62371acd47405c886 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 16 Jul 2019 14:35:53 -0700
Subject: [PATCH 1313/2608] Fix posix_memalign with input size 0.

Return a valid pointer instead of failed assertion.
---
 src/jemalloc.c                    | 22 +++++++++++++++++-----
 test/integration/posix_memalign.c |  5 +++--
 2 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 1e99a591..b6c8d992 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1816,6 +1816,11 @@ struct static_opts_s {
 	/* Whether or not allocation size may overflow. */
 	bool may_overflow;
 
+	/*
+	 * Whether or not allocations (with alignment) of size 0 should be
+	 * treated as size 1.
+	 */
+	bool bump_empty_aligned_alloc;
 	/*
 	 * Whether to assert that allocations are not of size 0 (after any
 	 * bumping).
@@ -1857,6 +1862,7 @@ struct static_opts_s {
 JEMALLOC_ALWAYS_INLINE void
 static_opts_init(static_opts_t *static_opts) {
 	static_opts->may_overflow = false;
+	static_opts->bump_empty_aligned_alloc = false;
 	static_opts->assert_nonempty_alloc = false;
 	static_opts->null_out_result_on_error = false;
 	static_opts->set_errno_on_error = false;
@@ -2044,11 +2050,6 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		goto label_oom;
 	}
 
-	/* Validate the user input. */
-	if (sopts->assert_nonempty_alloc) {
-		assert (size != 0);
-	}
-
 	if (unlikely(dopts->alignment < sopts->min_alignment
 	    || (dopts->alignment & (dopts->alignment - 1)) != 0)) {
 		goto label_invalid_alignment;
@@ -2068,6 +2069,11 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 			    <= SC_LARGE_MAXCLASS);
 		}
 	} else {
+		if (sopts->bump_empty_aligned_alloc) {
+			if (unlikely(size == 0)) {
+				size = 1;
+			}
+		}
 		usize = sz_sa2u(size, dopts->alignment);
 		dopts->usize = usize;
 		if (unlikely(usize == 0
@@ -2075,6 +2081,10 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 			goto label_oom;
 		}
 	}
+	/* Validate the user input. */
+	if (sopts->assert_nonempty_alloc) {
+		assert (size != 0);
+	}
 
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
@@ -2390,6 +2400,7 @@ je_posix_memalign(void **memptr, size_t alignment, size_t size) {
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
 
+	sopts.bump_empty_aligned_alloc = true;
 	sopts.min_alignment = sizeof(void *);
 	sopts.oom_string =
 	    "<jemalloc>: Error allocating aligned memory: out of memory\n";
@@ -2430,6 +2441,7 @@ je_aligned_alloc(size_t alignment, size_t size) {
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
 
+	sopts.bump_empty_aligned_alloc = true;
 	sopts.null_out_result_on_error = true;
 	sopts.set_errno_on_error = true;
 	sopts.min_alignment = 1;
diff --git a/test/integration/posix_memalign.c b/test/integration/posix_memalign.c
index 2c2726de..d992260a 100644
--- a/test/integration/posix_memalign.c
+++ b/test/integration/posix_memalign.c
@@ -85,9 +85,10 @@ TEST_BEGIN(test_alignment_and_size) {
 	    alignment <= MAXALIGN;
 	    alignment <<= 1) {
 		total = 0;
-		for (size = 1;
+		for (size = 0;
 		    size < 3 * alignment && size < (1U << 31);
-		    size += (alignment >> (LG_SIZEOF_PTR-1)) - 1) {
+		    size += ((size == 0) ? 1 :
+		    (alignment >> (LG_SIZEOF_PTR-1)) - 1)) {
 			for (i = 0; i < NITER; i++) {
 				err = posix_memalign(&ps[i],
 				    alignment, size);

From 9a86c65abc2cf242efe9354c9ce16901673eeb0c Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 12 Jul 2019 16:20:23 -0700
Subject: [PATCH 1314/2608] Implement retain on Windows.

The VirtualAlloc and VirtualFree APIs are different because MEM_DECOMMIT cannot
be used across multiple VirtualAlloc regions.  To properly support decommit,
only allow merge / split within the same region -- this is done by tracking the
"is_head" state of extents and not merging cross-region.

Add a new state is_head (only relevant for retain && !maps_coalesce), which is
true for the first extent in each VirtualAlloc region.  Determine if two extents
can be merged based on the head state, and use serial numbers for sanity checks.
---
 include/jemalloc/internal/extent_inlines.h | 26 +++++++-
 include/jemalloc/internal/extent_structs.h |  6 +-
 include/jemalloc/internal/extent_types.h   |  5 ++
 src/extent.c                               | 76 +++++++++++++++++-----
 src/extent_dss.c                           |  5 +-
 test/unit/arena_reset.c                    |  9 ++-
 test/unit/rtree.c                          |  8 +--
 test/unit/slab.c                           |  3 +-
 8 files changed, 110 insertions(+), 28 deletions(-)

diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index 63b710dc..77fa4c4a 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -343,10 +343,30 @@ extent_prof_alloc_time_set(extent_t *extent, nstime_t t) {
 	nstime_copy(&extent->e_alloc_time, &t);
 }
 
+static inline bool
+extent_is_head_get(extent_t *extent) {
+	if (maps_coalesce) {
+		not_reached();
+	}
+
+	return (bool)((extent->e_bits & EXTENT_BITS_IS_HEAD_MASK) >>
+	    EXTENT_BITS_IS_HEAD_SHIFT);
+}
+
+static inline void
+extent_is_head_set(extent_t *extent, bool is_head) {
+	if (maps_coalesce) {
+		not_reached();
+	}
+
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_IS_HEAD_MASK) |
+	    ((uint64_t)is_head << EXTENT_BITS_IS_HEAD_SHIFT);
+}
+
 static inline void
 extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
     bool slab, szind_t szind, size_t sn, extent_state_t state, bool zeroed,
-    bool committed, bool dumpable) {
+    bool committed, bool dumpable, extent_head_state_t is_head) {
 	assert(addr == PAGE_ADDR2BASE(addr) || !slab);
 
 	extent_arena_set(extent, arena);
@@ -360,6 +380,10 @@ extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
 	extent_committed_set(extent, committed);
 	extent_dumpable_set(extent, dumpable);
 	ql_elm_new(extent, ql_link);
+	if (!maps_coalesce) {
+		extent_is_head_set(extent, (is_head == EXTENT_IS_HEAD) ? true :
+		    false);
+	}
 	if (config_prof) {
 		extent_prof_tctx_set(extent, NULL);
 	}
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index ad6710e7..767cd893 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -128,7 +128,11 @@ struct extent_s {
 #define EXTENT_BITS_BINSHARD_SHIFT  (EXTENT_BITS_NFREE_WIDTH + EXTENT_BITS_NFREE_SHIFT)
 #define EXTENT_BITS_BINSHARD_MASK  MASK(EXTENT_BITS_BINSHARD_WIDTH, EXTENT_BITS_BINSHARD_SHIFT)
 
-#define EXTENT_BITS_SN_SHIFT (EXTENT_BITS_BINSHARD_WIDTH + EXTENT_BITS_BINSHARD_SHIFT)
+#define EXTENT_BITS_IS_HEAD_WIDTH 1
+#define EXTENT_BITS_IS_HEAD_SHIFT  (EXTENT_BITS_BINSHARD_WIDTH + EXTENT_BITS_BINSHARD_SHIFT)
+#define EXTENT_BITS_IS_HEAD_MASK  MASK(EXTENT_BITS_IS_HEAD_WIDTH, EXTENT_BITS_IS_HEAD_SHIFT)
+
+#define EXTENT_BITS_SN_SHIFT   (EXTENT_BITS_IS_HEAD_WIDTH + EXTENT_BITS_IS_HEAD_SHIFT)
 #define EXTENT_BITS_SN_MASK  (UINT64_MAX << EXTENT_BITS_SN_SHIFT)
 
 	/* Pointer to the extent that this structure is responsible for. */
diff --git a/include/jemalloc/internal/extent_types.h b/include/jemalloc/internal/extent_types.h
index 865f8a10..96925cf9 100644
--- a/include/jemalloc/internal/extent_types.h
+++ b/include/jemalloc/internal/extent_types.h
@@ -15,4 +15,9 @@ typedef struct extent_util_stats_verbose_s extent_util_stats_verbose_t;
  */
 #define LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT 6
 
+typedef enum {
+	EXTENT_NOT_HEAD,
+	EXTENT_IS_HEAD   /* Only relevant for Windows && opt.retain. */
+} extent_head_state_t;
+
 #endif /* JEMALLOC_INTERNAL_EXTENT_TYPES_H */
diff --git a/src/extent.c b/src/extent.c
index e83d9c8c..416d68fb 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -50,20 +50,16 @@ static bool extent_purge_forced_default(extent_hooks_t *extent_hooks,
 static bool extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
     size_t length, bool growing_retained);
-#ifdef JEMALLOC_MAPS_COALESCE
 static bool extent_split_default(extent_hooks_t *extent_hooks, void *addr,
     size_t size, size_t size_a, size_t size_b, bool committed,
     unsigned arena_ind);
-#endif
 static extent_t *extent_split_impl(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a,
     szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
     bool growing_retained);
-#ifdef JEMALLOC_MAPS_COALESCE
 static bool extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a,
     size_t size_a, void *addr_b, size_t size_b, bool committed,
     unsigned arena_ind);
-#endif
 static bool extent_merge_impl(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b,
     bool growing_retained);
@@ -88,11 +84,9 @@ const extent_hooks_t	extent_hooks_default = {
 	,
 	NULL
 #endif
-#ifdef JEMALLOC_MAPS_COALESCE
 	,
 	extent_split_default,
 	extent_merge_default
-#endif
 };
 
 /* Used exclusively for gdump triggering. */
@@ -1323,7 +1317,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 
 	extent_init(extent, arena, ptr, alloc_size, false, SC_NSIZES,
 	    arena_extent_sn_next(arena), extent_state_active, zeroed,
-	    committed, true);
+	    committed, true, EXTENT_IS_HEAD);
 	if (ptr == NULL) {
 		extent_dalloc(tsdn, arena, extent);
 		goto label_err;
@@ -1495,7 +1489,7 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 	}
 	extent_init(extent, arena, addr, esize, slab, szind,
 	    arena_extent_sn_next(arena), extent_state_active, *zero, *commit,
-	    true);
+	    true, EXTENT_NOT_HEAD);
 	if (pad != 0) {
 		extent_addr_randomize(tsdn, extent, alignment);
 	}
@@ -2045,13 +2039,20 @@ extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena,
 	    offset, length, false);
 }
 
-#ifdef JEMALLOC_MAPS_COALESCE
 static bool
 extent_split_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t size_a, size_t size_b, bool committed, unsigned arena_ind) {
-	return !maps_coalesce;
+	if (!maps_coalesce) {
+		/*
+		 * Without retain, only whole regions can be purged (required by
+		 * MEM_RELEASE on Windows) -- therefore disallow splitting.  See
+		 * comments in extent_head_no_merge().
+		 */
+		return !opt_retain;
+	}
+
+	return false;
 }
-#endif
 
 /*
  * Accepts the extent to split, and the characteristics of each side of the
@@ -2083,7 +2084,8 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena,
 	extent_init(trail, arena, (void *)((uintptr_t)extent_base_get(extent) +
 	    size_a), size_b, slab_b, szind_b, extent_sn_get(extent),
 	    extent_state_get(extent), extent_zeroed_get(extent),
-	    extent_committed_get(extent), extent_dumpable_get(extent));
+	    extent_committed_get(extent), extent_dumpable_get(extent),
+	    EXTENT_NOT_HEAD);
 
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
@@ -2094,7 +2096,8 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena,
 		extent_init(&lead, arena, extent_addr_get(extent), size_a,
 		    slab_a, szind_a, extent_sn_get(extent),
 		    extent_state_get(extent), extent_zeroed_get(extent),
-		    extent_committed_get(extent), extent_dumpable_get(extent));
+		    extent_committed_get(extent), extent_dumpable_get(extent),
+		    EXTENT_NOT_HEAD);
 
 		extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, &lead, false,
 		    true, &lead_elm_a, &lead_elm_b);
@@ -2152,7 +2155,7 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
 
 static bool
 extent_merge_default_impl(void *addr_a, void *addr_b) {
-	if (!maps_coalesce) {
+	if (!maps_coalesce && !opt_retain) {
 		return true;
 	}
 	if (have_dss && !extent_dss_mergeable(addr_a, addr_b)) {
@@ -2162,13 +2165,51 @@ extent_merge_default_impl(void *addr_a, void *addr_b) {
 	return false;
 }
 
-#ifdef JEMALLOC_MAPS_COALESCE
+/*
+ * Returns true if the given extents can't be merged because of their head bit
+ * settings.  Assumes the second extent has the higher address.
+ */
+static bool
+extent_head_no_merge(extent_t *a, extent_t *b) {
+	assert(extent_base_get(a) < extent_base_get(b));
+	/*
+	 * When coalesce is not always allowed (Windows), only merge extents
+	 * from the same VirtualAlloc region under opt.retain (in which case
+	 * MEM_DECOMMIT is utilized for purging).
+	 */
+	if (maps_coalesce) {
+		return false;
+	}
+	if (!opt_retain) {
+		return true;
+	}
+	/* If b is a head extent, disallow the cross-region merge. */
+	if (extent_is_head_get(b)) {
+		/*
+		 * Additionally, sn should not overflow with retain; sanity
+		 * check that different regions have unique sn.
+		 */
+		assert(extent_sn_comp(a, b) != 0);
+		return true;
+	}
+	assert(extent_sn_comp(a, b) == 0);
+
+	return false;
+}
+
 static bool
 extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
     void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
+	if (!maps_coalesce) {
+		tsdn_t *tsdn = tsdn_fetch();
+		extent_t *a = iealloc(tsdn, addr_a);
+		extent_t *b = iealloc(tsdn, addr_b);
+		if (extent_head_no_merge(a, b)) {
+			return true;
+		}
+	}
 	return extent_merge_default_impl(addr_a, addr_b);
 }
-#endif
 
 static bool
 extent_merge_impl(tsdn_t *tsdn, arena_t *arena,
@@ -2176,10 +2217,11 @@ extent_merge_impl(tsdn_t *tsdn, arena_t *arena,
     bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+	assert(extent_base_get(a) < extent_base_get(b));
 
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 
-	if ((*r_extent_hooks)->merge == NULL) {
+	if ((*r_extent_hooks)->merge == NULL || extent_head_no_merge(a, b)) {
 		return true;
 	}
 
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 69a7bee8..85817891 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -156,7 +156,8 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 				extent_init(gap, arena, gap_addr_page,
 				    gap_size_page, false, SC_NSIZES,
 				    arena_extent_sn_next(arena),
-				    extent_state_active, false, true, true);
+				    extent_state_active, false, true, true,
+				    EXTENT_NOT_HEAD);
 			}
 			/*
 			 * Compute the address just past the end of the desired
@@ -200,7 +201,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					extent_init(&extent, arena, ret, size,
 					    size, false, SC_NSIZES,
 					    extent_state_active, false, true,
-					    true);
+					    true, EXTENT_NOT_HEAD);
 					if (extent_purge_forced_wrapper(tsdn,
 					    arena, &extent_hooks, &extent, 0,
 					    size)) {
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index 96b042dd..b182f31a 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -279,8 +279,11 @@ extent_dalloc_unmap(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	if (!try_dalloc) {
 		return true;
 	}
-	pages_unmap(addr, size);
 	did_dalloc = true;
+	if (!maps_coalesce && opt_retain) {
+		return true;
+	}
+	pages_unmap(addr, size);
 	return false;
 }
 
@@ -304,7 +307,9 @@ TEST_BEGIN(test_arena_destroy_hooks_unmap) {
 	unsigned nptrs;
 
 	extent_hooks_prep();
-	try_decommit = false;
+	if (maps_coalesce) {
+		try_decommit = false;
+	}
 	memcpy(&hooks_orig, &hooks, sizeof(extent_hooks_t));
 	memcpy(&hooks, &hooks_unmap, sizeof(extent_hooks_t));
 
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index b017bc03..90adca13 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -87,9 +87,9 @@ TEST_BEGIN(test_rtree_extrema) {
 	extent_t extent_a, extent_b;
 	extent_init(&extent_a, NULL, NULL, SC_LARGE_MINCLASS, false,
 	    sz_size2index(SC_LARGE_MINCLASS), 0,
-	    extent_state_active, false, false, true);
+	    extent_state_active, false, false, true, EXTENT_NOT_HEAD);
 	extent_init(&extent_b, NULL, NULL, 0, false, SC_NSIZES, 0,
-	    extent_state_active, false, false, true);
+	    extent_state_active, false, false, true, EXTENT_NOT_HEAD);
 
 	tsdn_t *tsdn = tsdn_fetch();
 
@@ -126,7 +126,7 @@ TEST_BEGIN(test_rtree_bits) {
 
 	extent_t extent;
 	extent_init(&extent, NULL, NULL, 0, false, SC_NSIZES, 0,
-	    extent_state_active, false, false, true);
+	    extent_state_active, false, false, true, EXTENT_NOT_HEAD);
 
 	rtree_t *rtree = &test_rtree;
 	rtree_ctx_t rtree_ctx;
@@ -167,7 +167,7 @@ TEST_BEGIN(test_rtree_random) {
 
 	extent_t extent;
 	extent_init(&extent, NULL, NULL, 0, false, SC_NSIZES, 0,
-	    extent_state_active, false, false, true);
+	    extent_state_active, false, false, true, EXTENT_NOT_HEAD);
 
 	assert_false(rtree_new(rtree, false), "Unexpected rtree_new() failure");
 
diff --git a/test/unit/slab.c b/test/unit/slab.c
index ef718821..c56af25f 100644
--- a/test/unit/slab.c
+++ b/test/unit/slab.c
@@ -9,7 +9,8 @@ TEST_BEGIN(test_arena_slab_regind) {
 		const bin_info_t *bin_info = &bin_infos[binind];
 		extent_init(&slab, NULL, mallocx(bin_info->slab_size,
 		    MALLOCX_LG_ALIGN(LG_PAGE)), bin_info->slab_size, true,
-		    binind, 0, extent_state_active, false, true, true);
+		    binind, 0, extent_state_active, false, true, true,
+		    EXTENT_NOT_HEAD);
 		assert_ptr_not_null(extent_addr_get(&slab),
 		    "Unexpected malloc() failure");
 		for (regind = 0; regind < bin_info->nregs; regind++) {

From badf8d95f11cf8ead0f8b7192663002d1d4dc4b2 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 15 Jul 2019 12:09:41 -0700
Subject: [PATCH 1315/2608] Enable opt.retain by default on Windows.

---
 configure.ac | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/configure.ac b/configure.ac
index 39a540fd..261d81c0 100644
--- a/configure.ac
+++ b/configure.ac
@@ -738,6 +738,9 @@ case "${host}" in
 	libprefix=""
 	SOREV="${so}"
 	PIC_CFLAGS=""
+	if test "${LG_SIZEOF_PTR}" = "3"; then
+	  default_retain="1"
+	fi
 	;;
   *)
 	AC_MSG_RESULT([Unsupported operating system: ${host}])

From 57dbab5d6bc764a8b971334ec80977d6333688af Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 15 Jul 2019 12:16:02 -0700
Subject: [PATCH 1316/2608] Avoid leaking extents / VM when split is not
 supported.

This can only happen on Windows and with opt.retain disabled (which isn't the
default).  The solution is suboptimal, however not a common case as retain is
the long term plan for all platforms anyway.
---
 src/extent.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/extent.c b/src/extent.c
index 416d68fb..c6d402b0 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1052,6 +1052,17 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
 	    &to_leak, &to_salvage, new_addr, size, pad, alignment, slab, szind,
 	    growing_retained);
 
+	if (!maps_coalesce && result != extent_split_interior_ok
+	    && !opt_retain) {
+		/*
+		 * Split isn't supported (implies Windows w/o retain).  Avoid
+		 * leaking the extents.
+		 */
+		assert(to_leak != NULL && lead == NULL && trail == NULL);
+		extent_deactivate(tsdn, arena, extents, to_leak);
+		return NULL;
+	}
+
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
 			extent_deactivate(tsdn, arena, extents, lead);

From 42807fcd9ed68c78f660c6dd85bcf9d82e134244 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 15 Jul 2019 14:55:43 -0700
Subject: [PATCH 1317/2608] extent_dalloc instead of leak when register fails.

extent_register may only fail if the underlying extent and region got stolen /
coalesced before we lock.  Avoid doing extent_leak (which purges the region)
since we don't really own the region.
---
 src/extent.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index c6d402b0..c2637d27 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1335,8 +1335,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	}
 
 	if (extent_register_no_gdump_add(tsdn, extent)) {
-		extents_leak(tsdn, arena, r_extent_hooks,
-		    &arena->extents_retained, extent, true);
+		extent_dalloc(tsdn, arena, extent);
 		goto label_err;
 	}
 
@@ -1505,8 +1504,7 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 		extent_addr_randomize(tsdn, extent, alignment);
 	}
 	if (extent_register(tsdn, extent)) {
-		extents_leak(tsdn, arena, r_extent_hooks,
-		    &arena->extents_retained, extent, false);
+		extent_dalloc(tsdn, arena, extent);
 		return NULL;
 	}
 
@@ -1729,8 +1727,7 @@ extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
 	    WITNESS_RANK_CORE, 0);
 
 	if (extent_register(tsdn, extent)) {
-		extents_leak(tsdn, arena, &extent_hooks,
-		    &arena->extents_retained, extent, false);
+		extent_dalloc(tsdn, arena, extent);
 		return;
 	}
 	extent_dalloc_wrapper(tsdn, arena, &extent_hooks, extent);

From 4e36ce34c1e6a6f470a9355b90b0a757c6fdb0b5 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 15 Jul 2019 15:56:05 -0700
Subject: [PATCH 1318/2608] Track the leaked VM space via the abandoned_vm
 counter.

The counter is 0 unless metadata allocation failed (indicates OOM), and is
mainly for sanity checking.
---
 include/jemalloc/internal/arena_stats.h |  3 +++
 src/arena.c                             |  2 ++
 src/ctl.c                               |  7 +++++++
 src/extent.c                            | 16 ++++++++++++----
 src/stats.c                             |  3 ++-
 5 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 3ffe9c78..23949ed9 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -112,6 +112,9 @@ struct arena_stats_s {
 	arena_stats_u64_t	nflushes_large; /* Derived. */
 	arena_stats_u64_t	nrequests_large; /* Derived. */
 
+	/* VM space had to be leaked (undocumented).  Normally 0. */
+	atomic_zu_t		abandoned_vm;
+
 	/* Number of bytes cached in tcache associated with this arena. */
 	atomic_zu_t		tcache_bytes; /* Derived. */
 
diff --git a/src/arena.c b/src/arena.c
index f9336fee..a44d0da3 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -132,6 +132,8 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	    (((atomic_load_zu(&arena->nactive, ATOMIC_RELAXED) +
 	    extents_npages_get(&arena->extents_dirty) +
 	    extents_npages_get(&arena->extents_muzzy)) << LG_PAGE)));
+	arena_stats_accum_zu(&astats->abandoned_vm, atomic_load_zu(
+	    &arena->stats.abandoned_vm, ATOMIC_RELAXED));
 
 	for (szind_t i = 0; i < SC_NSIZES - SC_NBINS; i++) {
 		uint64_t nmalloc = arena_stats_read_u64(tsdn, &arena->stats,
diff --git a/src/ctl.c b/src/ctl.c
index 1d830874..48afaa61 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -210,6 +210,7 @@ CTL_PROTO(stats_arenas_i_internal)
 CTL_PROTO(stats_arenas_i_metadata_thp)
 CTL_PROTO(stats_arenas_i_tcache_bytes)
 CTL_PROTO(stats_arenas_i_resident)
+CTL_PROTO(stats_arenas_i_abandoned_vm)
 INDEX_PROTO(stats_arenas_i)
 CTL_PROTO(stats_allocated)
 CTL_PROTO(stats_active)
@@ -543,6 +544,7 @@ static const ctl_named_node_t stats_arenas_i_node[] = {
 	{NAME("metadata_thp"),	CTL(stats_arenas_i_metadata_thp)},
 	{NAME("tcache_bytes"),	CTL(stats_arenas_i_tcache_bytes)},
 	{NAME("resident"),	CTL(stats_arenas_i_resident)},
+	{NAME("abandoned_vm"),	CTL(stats_arenas_i_abandoned_vm)},
 	{NAME("small"),		CHILD(named, stats_arenas_i_small)},
 	{NAME("large"),		CHILD(named, stats_arenas_i_large)},
 	{NAME("bins"),		CHILD(indexed, stats_arenas_i_bins)},
@@ -913,6 +915,8 @@ MUTEX_PROF_ARENA_MUTEXES
 		    &astats->astats.ndalloc_large);
 		ctl_accum_arena_stats_u64(&sdstats->astats.nrequests_large,
 		    &astats->astats.nrequests_large);
+		accum_atomic_zu(&sdstats->astats.abandoned_vm,
+		    &astats->astats.abandoned_vm);
 
 		accum_atomic_zu(&sdstats->astats.tcache_bytes,
 		    &astats->astats.tcache_bytes);
@@ -2871,6 +2875,9 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_tcache_bytes,
 CTL_RO_CGEN(config_stats, stats_arenas_i_resident,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.resident, ATOMIC_RELAXED),
     size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_abandoned_vm,
+    atomic_load_zu(&arenas_i(mib[2])->astats->astats.abandoned_vm,
+    ATOMIC_RELAXED), size_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_small_allocated,
     arenas_i(mib[2])->astats->allocated_small, size_t)
diff --git a/src/extent.c b/src/extent.c
index c2637d27..6fdb7b02 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -619,16 +619,24 @@ label_return:
 	return extent;
 }
 
+/*
+ * This can only happen when we fail to allocate a new extent struct (which
+ * indicates OOM), e.g. when trying to split an existing extent.
+ */
 static void
-extents_leak(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
+extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
     extents_t *extents, extent_t *extent, bool growing_retained) {
+	size_t sz = extent_size_get(extent);
+	if (config_stats) {
+		arena_stats_accum_zu(&arena->stats.abandoned_vm, sz);
+	}
 	/*
 	 * Leak extent after making sure its pages have already been purged, so
 	 * that this is only a virtual memory leak.
 	 */
 	if (extents_state_get(extents) == extent_state_dirty) {
 		if (extent_purge_lazy_impl(tsdn, arena, r_extent_hooks,
-		    extent, 0, extent_size_get(extent), growing_retained)) {
+		    extent, 0, sz, growing_retained)) {
 			extent_purge_forced_impl(tsdn, arena, r_extent_hooks,
 			    extent, 0, extent_size_get(extent),
 			    growing_retained);
@@ -1083,7 +1091,7 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
 		if (to_leak != NULL) {
 			void *leak = extent_base_get(to_leak);
 			extent_deregister_no_gdump_sub(tsdn, to_leak);
-			extents_leak(tsdn, arena, r_extent_hooks, extents,
+			extents_abandon_vm(tsdn, arena, r_extent_hooks, extents,
 			    to_leak, growing_retained);
 			assert(extent_lock_from_addr(tsdn, rtree_ctx, leak,
 			    false) == NULL);
@@ -1382,7 +1390,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 		}
 		if (to_leak != NULL) {
 			extent_deregister_no_gdump_sub(tsdn, to_leak);
-			extents_leak(tsdn, arena, r_extent_hooks,
+			extents_abandon_vm(tsdn, arena, r_extent_hooks,
 			    &arena->extents_retained, to_leak, true);
 		}
 		goto label_err;
diff --git a/src/stats.c b/src/stats.c
index bce9f458..118e05d2 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -673,7 +673,7 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	size_t large_allocated;
 	uint64_t large_nmalloc, large_ndalloc, large_nrequests, large_nfills,
 	    large_nflushes;
-	size_t tcache_bytes;
+	size_t tcache_bytes, abandoned_vm;
 	uint64_t uptime;
 
 	CTL_GET("arenas.page", &page, size_t);
@@ -963,6 +963,7 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	GET_AND_EMIT_MEM_STAT(metadata_thp)
 	GET_AND_EMIT_MEM_STAT(tcache_bytes)
 	GET_AND_EMIT_MEM_STAT(resident)
+	GET_AND_EMIT_MEM_STAT(abandoned_vm)
 	GET_AND_EMIT_MEM_STAT(extent_avail)
 #undef GET_AND_EMIT_MEM_STAT
 

From 1d148f353a2c71bc12fd066e467649fd17df3c95 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 23 Jul 2019 12:49:17 -0700
Subject: [PATCH 1319/2608] Optimize max_active_fit in first_fit.

Stop scanning once reached the first max_active_fit size.
---
 src/extent.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index 6fdb7b02..a2dbde12 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -453,7 +453,6 @@ extents_first_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 		assert(!extent_heap_empty(&extents->heaps[i]));
 		extent_t *extent = extent_heap_first(&extents->heaps[i]);
 		assert(extent_size_get(extent) >= size);
-                bool size_ok = true;
 		/*
 		 * In order to reduce fragmentation, avoid reusing and splitting
 		 * large extents for much smaller sizes.
@@ -462,10 +461,9 @@ extents_first_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 		 */
 		if (extents->delay_coalesce &&
 		    (sz_pind2sz(i) >> opt_lg_extent_max_active_fit) > size) {
-			size_ok = false;
+			break;
 		}
-		if (size_ok &&
-		    (ret == NULL || extent_snad_comp(extent, ret) < 0)) {
+		if (ret == NULL || extent_snad_comp(extent, ret) < 0) {
 			ret = extent;
 		}
 		if (i == SC_NPSIZES) {

From bc0998a9052957584b6944b6f43fffe0648f603e Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 24 Jul 2019 16:12:06 -0700
Subject: [PATCH 1320/2608] Invoke arena_dalloc_promoted() properly w/o tcache.

When tcache was disabled, the dalloc promoted case was missing.
---
 include/jemalloc/internal/arena_inlines_b.h | 16 ++++++++++++----
 src/arena.c                                 |  2 +-
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 7e61a44c..8b657abe 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -228,6 +228,16 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
 	return sz_index2size(szind);
 }
 
+static inline void
+arena_dalloc_large_no_tcache(tsdn_t *tsdn, void *ptr, szind_t szind) {
+	if (config_prof && unlikely(szind < SC_NBINS)) {
+		arena_dalloc_promoted(tsdn, ptr, NULL, true);
+	} else {
+		extent_t *extent = iealloc(tsdn, ptr);
+		large_dalloc(tsdn, extent);
+	}
+}
+
 static inline void
 arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
 	assert(ptr != NULL);
@@ -252,8 +262,7 @@ arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
 		/* Small allocation. */
 		arena_dalloc_small(tsdn, ptr);
 	} else {
-		extent_t *extent = iealloc(tsdn, ptr);
-		large_dalloc(tsdn, extent);
+		arena_dalloc_large_no_tcache(tsdn, ptr, szind);
 	}
 }
 
@@ -349,8 +358,7 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
 		/* Small allocation. */
 		arena_dalloc_small(tsdn, ptr);
 	} else {
-		extent_t *extent = iealloc(tsdn, ptr);
-		large_dalloc(tsdn, extent);
+		arena_dalloc_large_no_tcache(tsdn, ptr, szind);
 	}
 }
 
diff --git a/src/arena.c b/src/arena.c
index a44d0da3..ba50e410 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1610,7 +1610,7 @@ arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 		assert(bumped_usize == SC_LARGE_MINCLASS);
 		safety_check_verify_redzone(ptr, usize, bumped_usize);
 	}
-	if (bumped_usize <= tcache_maxclass) {
+	if (bumped_usize <= tcache_maxclass && tcache != NULL) {
 		tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
 		    sz_size2index(bumped_usize), slow_path);
 	} else {

From a3fa597921987709eb0aa2258f1b35cc433ae5d4 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 24 Jul 2019 16:27:30 -0700
Subject: [PATCH 1321/2608] Refactor arena_dalloc() / _sdalloc().

---
 include/jemalloc/internal/arena_inlines_b.h | 42 +++++++++------------
 1 file changed, 18 insertions(+), 24 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 8b657abe..dd926575 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -266,6 +266,22 @@ arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
 	}
 }
 
+JEMALLOC_ALWAYS_INLINE void
+arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
+    bool slow_path) {
+	if (szind < nhbins) {
+		if (config_prof && unlikely(szind < SC_NBINS)) {
+			arena_dalloc_promoted(tsdn, ptr, tcache, slow_path);
+		} else {
+			tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr, szind,
+			    slow_path);
+		}
+	} else {
+		extent_t *extent = iealloc(tsdn, ptr);
+		large_dalloc(tsdn, extent);
+	}
+}
+
 JEMALLOC_ALWAYS_INLINE void
 arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
     alloc_ctx_t *alloc_ctx, bool slow_path) {
@@ -304,18 +320,7 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 		tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind,
 		    slow_path);
 	} else {
-		if (szind < nhbins) {
-			if (config_prof && unlikely(szind < SC_NBINS)) {
-				arena_dalloc_promoted(tsdn, ptr, tcache,
-				    slow_path);
-			} else {
-				tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
-				    szind, slow_path);
-			}
-		} else {
-			extent_t *extent = iealloc(tsdn, ptr);
-			large_dalloc(tsdn, extent);
-		}
+		arena_dalloc_large(tsdn, ptr, tcache, szind, slow_path);
 	}
 }
 
@@ -415,18 +420,7 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 		tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind,
 		    slow_path);
 	} else {
-		if (szind < nhbins) {
-			if (config_prof && unlikely(szind < SC_NBINS)) {
-				arena_dalloc_promoted(tsdn, ptr, tcache,
-				    slow_path);
-			} else {
-				tcache_dalloc_large(tsdn_tsd(tsdn),
-				    tcache, ptr, szind, slow_path);
-			}
-		} else {
-			extent_t *extent = iealloc(tsdn, ptr);
-			large_dalloc(tsdn, extent);
-		}
+		arena_dalloc_large(tsdn, ptr, tcache, szind, slow_path);
 	}
 }
 

From 10fcff6c38c08bc2b1a672ff92701012944d843a Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 25 Jul 2019 11:15:08 -0700
Subject: [PATCH 1322/2608] Lower nthreads in test/unit/retained on 32-bit to
 avoid OOM.

---
 test/unit/retained.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/test/unit/retained.c b/test/unit/retained.c
index d51a5981..7993fd3d 100644
--- a/test/unit/retained.c
+++ b/test/unit/retained.c
@@ -107,6 +107,9 @@ TEST_BEGIN(test_retained) {
 	atomic_store_u(&epoch, 0, ATOMIC_RELAXED);
 
 	unsigned nthreads = ncpus * 2;
+	if (LG_SIZEOF_PTR < 3 && nthreads > 16) {
+		nthreads = 16; /* 32-bit platform could run out of vaddr. */
+	}
 	VARIABLE_ARRAY(thd_t, threads, nthreads);
 	for (unsigned i = 0; i < nthreads; i++) {
 		thd_create(&threads[i], thd_start, NULL);

From 9f6a9f4c1f78fd61297e01ae1521af9696d2023b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 25 Jul 2019 13:43:59 -0700
Subject: [PATCH 1323/2608] Update manual for opt.retain (new default on
 Windows).

---
 doc/jemalloc.xml.in | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index e23ccb7f..7fecda7c 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -963,17 +963,17 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         linkend="stats.retained">stats.retained</link> for related details).
         It also makes jemalloc use <citerefentry>
         <refentrytitle>mmap</refentrytitle><manvolnum>2</manvolnum>
-        </citerefentry> in a more greedy way, mapping larger chunks in one go.
-        This option is disabled by default unless discarding virtual memory is
-        known to trigger
-        platform-specific performance problems, e.g. for [64-bit] Linux, which
-        has a quirk in its virtual memory allocation algorithm that causes
-        semi-permanent VM map holes under normal jemalloc operation.  Although
-        <citerefentry><refentrytitle>munmap</refentrytitle>
-        <manvolnum>2</manvolnum></citerefentry> causes issues on 32-bit Linux as
-        well, retaining virtual memory for 32-bit Linux is disabled by default
-        due to the practical possibility of address space exhaustion.
-        </para></listitem>
+        </citerefentry> or equivalent in a more greedy way, mapping larger
+        chunks in one go.  This option is disabled by default unless discarding
+        virtual memory is known to trigger platform-specific performance
+        problems, namely 1) for [64-bit] Linux, which has a quirk in its virtual
+        memory allocation algorithm that causes semi-permanent VM map holes
+        under normal jemalloc operation; and 2) for [64-bit] Windows, which
+        disallows split / merged regions with
+        <parameter><constant>MEM_RELEASE</constant></parameter>.  Although the
+        same issues may present on 32-bit platforms as well, retaining virtual
+        memory for 32-bit Linux and Windows is disabled by default due to the
+        practical possibility of address space exhaustion.  </para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.dss">

From 85f0cb2d0c0a05e9fc926544c65ca784c03ab239 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 25 Jul 2019 14:16:56 -0700
Subject: [PATCH 1324/2608] Add indent to individual options for confirm_conf.

---
 src/jemalloc.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index b6c8d992..ed13718d 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1059,9 +1059,10 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 #define CONF_CONTINUE	{						\
 				if (!initial_call && opt_confirm_conf	\
 				    && cur_opt_valid) {			\
-					malloc_printf("<jemalloc>: Set "\
-					    "conf value: %.*s:%.*s\n",	\
-					    (int)klen, k, (int)vlen, v);\
+					malloc_printf("<jemalloc>: -- "	\
+					    "Set conf value: %.*s:%.*s"	\
+					    "\n", (int)klen, k,		\
+					    (int)vlen, v);		\
 				}					\
 				continue;				\
 			}

From 7618b0b8e458d9c0db6e4b05ccbe6c6308952890 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 12 Jul 2019 16:37:37 -0700
Subject: [PATCH 1325/2608] Refactor prof log

`prof.c` is growing too long, so trying to modularize it.  There are
a few internal functions that had to be exposed but I think it is a
fair trade-off.
---
 Makefile.in                                   |   1 +
 include/jemalloc/internal/prof_externs.h      |   8 +
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |   3 +
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |   3 +
 src/prof.c                                    | 682 +----------------
 src/prof_log.c                                | 698 ++++++++++++++++++
 8 files changed, 720 insertions(+), 677 deletions(-)
 create mode 100644 src/prof_log.c

diff --git a/Makefile.in b/Makefile.in
index 7128b007..1cd973d7 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -117,6 +117,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/pages.c \
 	$(srcroot)src/prng.c \
 	$(srcroot)src/prof.c \
+	$(srcroot)src/prof_log.c \
 	$(srcroot)src/rtree.c \
 	$(srcroot)src/safety_check.c \
 	$(srcroot)src/stats.c \
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 094f3e17..e94ac3b2 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -43,6 +43,8 @@ extern uint64_t	prof_interval;
  */
 extern size_t	lg_prof_sample;
 
+extern bool	prof_booted;
+
 void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
 void prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
     prof_tctx_t *tctx);
@@ -64,10 +66,14 @@ extern prof_dump_header_t *JET_MUTABLE prof_dump_header;
 void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
     uint64_t *accumbytes);
 #endif
+int prof_getpid(void);
 bool prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum);
 void prof_idump(tsdn_t *tsdn);
 bool prof_mdump(tsd_t *tsd, const char *filename);
 void prof_gdump(tsdn_t *tsdn);
+
+void prof_bt_hash(const void *key, size_t r_hash[2]);
+bool prof_bt_keycomp(const void *k1, const void *k2);
 prof_tdata_t *prof_tdata_init(tsd_t *tsd);
 prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
 void prof_reset(tsd_t *tsd, size_t lg_sample);
@@ -91,8 +97,10 @@ void prof_postfork_parent(tsdn_t *tsdn);
 void prof_postfork_child(tsdn_t *tsdn);
 void prof_sample_threshold_update(prof_tdata_t *tdata);
 
+void prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx);
 bool prof_log_start(tsdn_t *tsdn, const char *filename);
 bool prof_log_stop(tsdn_t *tsdn);
+bool prof_log_init(tsd_t *tsdn);
 #ifdef JEMALLOC_JET
 size_t prof_log_bt_count(void);
 size_t prof_log_alloc_count(void);
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 228e8be0..d93d9099 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -58,6 +58,7 @@
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
+    <ClCompile Include="..\..\..\..\src\prof_log.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
     <ClCompile Include="..\..\..\..\src\stats.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index d839515b..7b09d4e6 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -67,6 +67,9 @@
     <ClCompile Include="..\..\..\..\src\prof.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_log.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\rtree.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index edcceede..28bd3cd6 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -58,6 +58,7 @@
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
+    <ClCompile Include="..\..\..\..\src\prof_log.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
     <ClCompile Include="..\..\..\..\src\stats.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 6df72601..a66c209b 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -67,6 +67,9 @@
     <ClCompile Include="..\..\..\..\src\prof.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_log.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\rtree.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/prof.c b/src/prof.c
index 4ebe2799..9d1edb32 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -7,7 +7,6 @@
 #include "jemalloc/internal/hash.h"
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/emitter.h"
 
 /******************************************************************************/
 
@@ -39,7 +38,6 @@ bool		opt_prof_gdump = false;
 bool		opt_prof_final = false;
 bool		opt_prof_leak = false;
 bool		opt_prof_accum = false;
-bool		opt_prof_log = false;
 char		opt_prof_prefix[
     /* Minimize memory bloat for non-prof builds. */
 #ifdef JEMALLOC_PROF
@@ -72,100 +70,6 @@ uint64_t	prof_interval = 0;
 
 size_t		lg_prof_sample;
 
-typedef enum prof_logging_state_e prof_logging_state_t;
-enum prof_logging_state_e {
-	prof_logging_state_stopped,
-	prof_logging_state_started,
-	prof_logging_state_dumping
-};
-
-/*
- * - stopped: log_start never called, or previous log_stop has completed.
- * - started: log_start called, log_stop not called yet. Allocations are logged.
- * - dumping: log_stop called but not finished; samples are not logged anymore.
- */
-prof_logging_state_t prof_logging_state = prof_logging_state_stopped;
-
-#ifdef JEMALLOC_JET
-static bool prof_log_dummy = false;
-#endif
-
-/* Incremented for every log file that is output. */
-static uint64_t log_seq = 0;
-static char log_filename[
-    /* Minimize memory bloat for non-prof builds. */
-#ifdef JEMALLOC_PROF
-    PATH_MAX +
-#endif
-    1];
-
-/* Timestamp for most recent call to log_start(). */
-static nstime_t log_start_timestamp = NSTIME_ZERO_INITIALIZER;
-
-/* Increment these when adding to the log_bt and log_thr linked lists. */
-static size_t log_bt_index = 0;
-static size_t log_thr_index = 0;
-
-/* Linked list node definitions. These are only used in prof.c. */
-typedef struct prof_bt_node_s prof_bt_node_t;
-
-struct prof_bt_node_s {
-	prof_bt_node_t *next;
-	size_t index;
-	prof_bt_t bt;
-	/* Variable size backtrace vector pointed to by bt. */
-	void *vec[1];
-};
-
-typedef struct prof_thr_node_s prof_thr_node_t;
-
-struct prof_thr_node_s {
-	prof_thr_node_t *next;
-	size_t index;
-	uint64_t thr_uid;
-	/* Variable size based on thr_name_sz. */
-	char name[1];
-};
-
-typedef struct prof_alloc_node_s prof_alloc_node_t;
-
-/* This is output when logging sampled allocations. */
-struct prof_alloc_node_s {
-	prof_alloc_node_t *next;
-	/* Indices into an array of thread data. */
-	size_t alloc_thr_ind;
-	size_t free_thr_ind;
-
-	/* Indices into an array of backtraces. */
-	size_t alloc_bt_ind;
-	size_t free_bt_ind;
-
-	uint64_t alloc_time_ns;
-	uint64_t free_time_ns;
-
-	size_t usize;
-};
-
-/*
- * Created on the first call to prof_log_start and deleted on prof_log_stop.
- * These are the backtraces and threads that have already been logged by an
- * allocation.
- */
-static bool log_tables_initialized = false;
-static ckh_t log_bt_node_set;
-static ckh_t log_thr_node_set;
-
-/* Store linked lists for logged data. */
-static prof_bt_node_t *log_bt_first = NULL;
-static prof_bt_node_t *log_bt_last = NULL;
-static prof_thr_node_t *log_thr_first = NULL;
-static prof_thr_node_t *log_thr_last = NULL;
-static prof_alloc_node_t *log_alloc_first = NULL;
-static prof_alloc_node_t *log_alloc_last = NULL;
-
-/* Protects the prof_logging_state and any log_{...} variable. */
-static malloc_mutex_t log_mtx;
-
 /*
  * Table of mutexes that are shared among gctx's.  These are leaf locks, so
  * there is no problem with using them for more than one gctx at the same time.
@@ -225,7 +129,7 @@ static size_t		prof_dump_buf_end;
 static int		prof_dump_fd;
 
 /* Do not dump any profiles until bootstrapping is complete. */
-static bool		prof_booted = false;
+bool			prof_booted = false;
 
 /******************************************************************************/
 /*
@@ -241,12 +145,6 @@ static void	prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata,
     bool even_if_attached);
 static char	*prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name);
 
-/* Hashtable functions for log_bt_node_set and log_thr_node_set. */
-static void prof_thr_node_hash(const void *key, size_t r_hash[2]);
-static bool prof_thr_node_keycomp(const void *k1, const void *k2);
-static void prof_bt_node_hash(const void *key, size_t r_hash[2]);
-static bool prof_bt_node_keycomp(const void *k1, const void *k2);
-
 /******************************************************************************/
 /* Red-black trees. */
 
@@ -361,162 +259,6 @@ prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
 	malloc_mutex_unlock(tsdn, tctx->tdata->lock);
 }
 
-static size_t
-prof_log_bt_index(tsd_t *tsd, prof_bt_t *bt) {
-	assert(prof_logging_state == prof_logging_state_started);
-	malloc_mutex_assert_owner(tsd_tsdn(tsd), &log_mtx);
-
-	prof_bt_node_t dummy_node;
-	dummy_node.bt = *bt;
-	prof_bt_node_t *node;
-
-	/* See if this backtrace is already cached in the table. */
-	if (ckh_search(&log_bt_node_set, (void *)(&dummy_node),
-	    (void **)(&node), NULL)) {
-		size_t sz = offsetof(prof_bt_node_t, vec) +
-			        (bt->len * sizeof(void *));
-		prof_bt_node_t *new_node = (prof_bt_node_t *)
-		    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL,
-		    true, arena_get(TSDN_NULL, 0, true), true);
-		if (log_bt_first == NULL) {
-			log_bt_first = new_node;
-			log_bt_last = new_node;
-		} else {
-			log_bt_last->next = new_node;
-			log_bt_last = new_node;
-		}
-
-		new_node->next = NULL;
-		new_node->index = log_bt_index;
-		/*
-		 * Copy the backtrace: bt is inside a tdata or gctx, which
-		 * might die before prof_log_stop is called.
-		 */
-		new_node->bt.len = bt->len;
-		memcpy(new_node->vec, bt->vec, bt->len * sizeof(void *));
-		new_node->bt.vec = new_node->vec;
-
-		log_bt_index++;
-		ckh_insert(tsd, &log_bt_node_set, (void *)new_node, NULL);
-		return new_node->index;
-	} else {
-		return node->index;
-	}
-}
-static size_t
-prof_log_thr_index(tsd_t *tsd, uint64_t thr_uid, const char *name) {
-	assert(prof_logging_state == prof_logging_state_started);
-	malloc_mutex_assert_owner(tsd_tsdn(tsd), &log_mtx);
-
-	prof_thr_node_t dummy_node;
-	dummy_node.thr_uid = thr_uid;
-	prof_thr_node_t *node;
-
-	/* See if this thread is already cached in the table. */
-	if (ckh_search(&log_thr_node_set, (void *)(&dummy_node),
-	    (void **)(&node), NULL)) {
-		size_t sz = offsetof(prof_thr_node_t, name) + strlen(name) + 1;
-		prof_thr_node_t *new_node = (prof_thr_node_t *)
-		    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL,
-		    true, arena_get(TSDN_NULL, 0, true), true);
-		if (log_thr_first == NULL) {
-			log_thr_first = new_node;
-			log_thr_last = new_node;
-		} else {
-			log_thr_last->next = new_node;
-			log_thr_last = new_node;
-		}
-
-		new_node->next = NULL;
-		new_node->index = log_thr_index;
-		new_node->thr_uid = thr_uid;
-		strcpy(new_node->name, name);
-
-		log_thr_index++;
-		ckh_insert(tsd, &log_thr_node_set, (void *)new_node, NULL);
-		return new_node->index;
-	} else {
-		return node->index;
-	}
-}
-
-static void
-prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx) {
-	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
-
-	prof_tdata_t *cons_tdata = prof_tdata_get(tsd, false);
-	if (cons_tdata == NULL) {
-		/*
-		 * We decide not to log these allocations. cons_tdata will be
-		 * NULL only when the current thread is in a weird state (e.g.
-		 * it's being destroyed).
-		 */
-		return;
-	}
-
-	malloc_mutex_lock(tsd_tsdn(tsd), &log_mtx);
-
-	if (prof_logging_state != prof_logging_state_started) {
-		goto label_done;
-	}
-
-	if (!log_tables_initialized) {
-		bool err1 = ckh_new(tsd, &log_bt_node_set, PROF_CKH_MINITEMS,
-				prof_bt_node_hash, prof_bt_node_keycomp);
-		bool err2 = ckh_new(tsd, &log_thr_node_set, PROF_CKH_MINITEMS,
-				prof_thr_node_hash, prof_thr_node_keycomp);
-		if (err1 || err2) {
-			goto label_done;
-		}
-		log_tables_initialized = true;
-	}
-
-	nstime_t alloc_time = prof_alloc_time_get(tsd_tsdn(tsd), ptr,
-			          (alloc_ctx_t *)NULL);
-	nstime_t free_time = NSTIME_ZERO_INITIALIZER;
-	nstime_update(&free_time);
-
-	size_t sz = sizeof(prof_alloc_node_t);
-	prof_alloc_node_t *new_node = (prof_alloc_node_t *)
-	    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL, true,
-	    arena_get(TSDN_NULL, 0, true), true);
-
-	const char *prod_thr_name = (tctx->tdata->thread_name == NULL)?
-				        "" : tctx->tdata->thread_name;
-	const char *cons_thr_name = prof_thread_name_get(tsd);
-
-	prof_bt_t bt;
-	/* Initialize the backtrace, using the buffer in tdata to store it. */
-	bt_init(&bt, cons_tdata->vec);
-	prof_backtrace(&bt);
-	prof_bt_t *cons_bt = &bt;
-
-	/* We haven't destroyed tctx yet, so gctx should be good to read. */
-	prof_bt_t *prod_bt = &tctx->gctx->bt;
-
-	new_node->next = NULL;
-	new_node->alloc_thr_ind = prof_log_thr_index(tsd, tctx->tdata->thr_uid,
-				      prod_thr_name);
-	new_node->free_thr_ind = prof_log_thr_index(tsd, cons_tdata->thr_uid,
-				     cons_thr_name);
-	new_node->alloc_bt_ind = prof_log_bt_index(tsd, prod_bt);
-	new_node->free_bt_ind = prof_log_bt_index(tsd, cons_bt);
-	new_node->alloc_time_ns = nstime_ns(&alloc_time);
-	new_node->free_time_ns = nstime_ns(&free_time);
-	new_node->usize = usize;
-
-	if (log_alloc_first == NULL) {
-		log_alloc_first = new_node;
-		log_alloc_last = new_node;
-	} else {
-		log_alloc_last->next = new_node;
-		log_alloc_last = new_node;
-	}
-
-label_done:
-	malloc_mutex_unlock(tsd_tsdn(tsd), &log_mtx);
-}
-
 void
 prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize,
     prof_tctx_t *tctx) {
@@ -1693,7 +1435,7 @@ prof_open_maps(const char *format, ...) {
 }
 #endif
 
-static int
+int
 prof_getpid(void) {
 #ifdef _WIN32
 	return GetCurrentProcessId();
@@ -2135,7 +1877,7 @@ prof_gdump(tsdn_t *tsdn) {
 	}
 }
 
-static void
+void
 prof_bt_hash(const void *key, size_t r_hash[2]) {
 	prof_bt_t *bt = (prof_bt_t *)key;
 
@@ -2144,7 +1886,7 @@ prof_bt_hash(const void *key, size_t r_hash[2]) {
 	hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash);
 }
 
-static bool
+bool
 prof_bt_keycomp(const void *k1, const void *k2) {
 	const prof_bt_t *bt1 = (prof_bt_t *)k1;
 	const prof_bt_t *bt2 = (prof_bt_t *)k2;
@@ -2157,33 +1899,6 @@ prof_bt_keycomp(const void *k1, const void *k2) {
 	return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
 }
 
-static void
-prof_bt_node_hash(const void *key, size_t r_hash[2]) {
-	const prof_bt_node_t *bt_node = (prof_bt_node_t *)key;
-	prof_bt_hash((void *)(&bt_node->bt), r_hash);
-}
-
-static bool
-prof_bt_node_keycomp(const void *k1, const void *k2) {
-	const prof_bt_node_t *bt_node1 = (prof_bt_node_t *)k1;
-	const prof_bt_node_t *bt_node2 = (prof_bt_node_t *)k2;
-	return prof_bt_keycomp((void *)(&bt_node1->bt),
-	    (void *)(&bt_node2->bt));
-}
-
-static void
-prof_thr_node_hash(const void *key, size_t r_hash[2]) {
-	const prof_thr_node_t *thr_node = (prof_thr_node_t *)key;
-	hash(&thr_node->thr_uid, sizeof(uint64_t), 0x94122f35U, r_hash);
-}
-
-static bool
-prof_thr_node_keycomp(const void *k1, const void *k2) {
-	const prof_thr_node_t *thr_node1 = (prof_thr_node_t *)k1;
-	const prof_thr_node_t *thr_node2 = (prof_thr_node_t *)k2;
-	return thr_node1->thr_uid == thr_node2->thr_uid;
-}
-
 static uint64_t
 prof_thr_uid_alloc(tsdn_t *tsdn) {
 	uint64_t thr_uid;
@@ -2416,368 +2131,6 @@ prof_active_set(tsdn_t *tsdn, bool active) {
 	return prof_active_old;
 }
 
-#ifdef JEMALLOC_JET
-size_t
-prof_log_bt_count(void) {
-	size_t cnt = 0;
-	prof_bt_node_t *node = log_bt_first;
-	while (node != NULL) {
-		cnt++;
-		node = node->next;
-	}
-	return cnt;
-}
-
-size_t
-prof_log_alloc_count(void) {
-	size_t cnt = 0;
-	prof_alloc_node_t *node = log_alloc_first;
-	while (node != NULL) {
-		cnt++;
-		node = node->next;
-	}
-	return cnt;
-}
-
-size_t
-prof_log_thr_count(void) {
-	size_t cnt = 0;
-	prof_thr_node_t *node = log_thr_first;
-	while (node != NULL) {
-		cnt++;
-		node = node->next;
-	}
-	return cnt;
-}
-
-bool
-prof_log_is_logging(void) {
-	return prof_logging_state == prof_logging_state_started;
-}
-
-bool
-prof_log_rep_check(void) {
-	if (prof_logging_state == prof_logging_state_stopped
-	    && log_tables_initialized) {
-		return true;
-	}
-
-	if (log_bt_last != NULL && log_bt_last->next != NULL) {
-		return true;
-	}
-	if (log_thr_last != NULL && log_thr_last->next != NULL) {
-		return true;
-	}
-	if (log_alloc_last != NULL && log_alloc_last->next != NULL) {
-		return true;
-	}
-
-	size_t bt_count = prof_log_bt_count();
-	size_t thr_count = prof_log_thr_count();
-	size_t alloc_count = prof_log_alloc_count();
-
-
-	if (prof_logging_state == prof_logging_state_stopped) {
-		if (bt_count != 0 || thr_count != 0 || alloc_count || 0) {
-			return true;
-		}
-	}
-
-	prof_alloc_node_t *node = log_alloc_first;
-	while (node != NULL) {
-		if (node->alloc_bt_ind >= bt_count) {
-			return true;
-		}
-		if (node->free_bt_ind >= bt_count) {
-			return true;
-		}
-		if (node->alloc_thr_ind >= thr_count) {
-			return true;
-		}
-		if (node->free_thr_ind >= thr_count) {
-			return true;
-		}
-		if (node->alloc_time_ns > node->free_time_ns) {
-			return true;
-		}
-		node = node->next;
-	}
-
-	return false;
-}
-
-void
-prof_log_dummy_set(bool new_value) {
-	prof_log_dummy = new_value;
-}
-#endif
-
-bool
-prof_log_start(tsdn_t *tsdn, const char *filename) {
-	if (!opt_prof || !prof_booted) {
-		return true;
-	}
-
-	bool ret = false;
-	size_t buf_size = PATH_MAX + 1;
-
-	malloc_mutex_lock(tsdn, &log_mtx);
-
-	if (prof_logging_state != prof_logging_state_stopped) {
-		ret = true;
-	} else if (filename == NULL) {
-		/* Make default name. */
-		malloc_snprintf(log_filename, buf_size, "%s.%d.%"FMTu64".json",
-		    opt_prof_prefix, prof_getpid(), log_seq);
-		log_seq++;
-		prof_logging_state = prof_logging_state_started;
-	} else if (strlen(filename) >= buf_size) {
-		ret = true;
-	} else {
-		strcpy(log_filename, filename);
-		prof_logging_state = prof_logging_state_started;
-	}
-
-	if (!ret) {
-		nstime_update(&log_start_timestamp);
-	}
-
-	malloc_mutex_unlock(tsdn, &log_mtx);
-
-	return ret;
-}
-
-/* Used as an atexit function to stop logging on exit. */
-static void
-prof_log_stop_final(void) {
-	tsd_t *tsd = tsd_fetch();
-	prof_log_stop(tsd_tsdn(tsd));
-}
-
-struct prof_emitter_cb_arg_s {
-	int fd;
-	ssize_t ret;
-};
-
-static void
-prof_emitter_write_cb(void *opaque, const char *to_write) {
-	struct prof_emitter_cb_arg_s *arg =
-	    (struct prof_emitter_cb_arg_s *)opaque;
-	size_t bytes = strlen(to_write);
-#ifdef JEMALLOC_JET
-	if (prof_log_dummy) {
-		return;
-	}
-#endif
-	arg->ret = write(arg->fd, (void *)to_write, bytes);
-}
-
-/*
- * prof_log_emit_{...} goes through the appropriate linked list, emitting each
- * node to the json and deallocating it.
- */
-static void
-prof_log_emit_threads(tsd_t *tsd, emitter_t *emitter) {
-	emitter_json_array_kv_begin(emitter, "threads");
-	prof_thr_node_t *thr_node = log_thr_first;
-	prof_thr_node_t *thr_old_node;
-	while (thr_node != NULL) {
-		emitter_json_object_begin(emitter);
-
-		emitter_json_kv(emitter, "thr_uid", emitter_type_uint64,
-		    &thr_node->thr_uid);
-
-		char *thr_name = thr_node->name;
-
-		emitter_json_kv(emitter, "thr_name", emitter_type_string,
-		    &thr_name);
-
-		emitter_json_object_end(emitter);
-		thr_old_node = thr_node;
-		thr_node = thr_node->next;
-		idalloc(tsd, thr_old_node);
-	}
-	emitter_json_array_end(emitter);
-}
-
-static void
-prof_log_emit_traces(tsd_t *tsd, emitter_t *emitter) {
-	emitter_json_array_kv_begin(emitter, "stack_traces");
-	prof_bt_node_t *bt_node = log_bt_first;
-	prof_bt_node_t *bt_old_node;
-	/*
-	 * Calculate how many hex digits we need: twice number of bytes, two for
-	 * "0x", and then one more for terminating '\0'.
-	 */
-	char buf[2 * sizeof(intptr_t) + 3];
-	size_t buf_sz = sizeof(buf);
-	while (bt_node != NULL) {
-		emitter_json_array_begin(emitter);
-		size_t i;
-		for (i = 0; i < bt_node->bt.len; i++) {
-			malloc_snprintf(buf, buf_sz, "%p", bt_node->bt.vec[i]);
-			char *trace_str = buf;
-			emitter_json_value(emitter, emitter_type_string,
-			    &trace_str);
-		}
-		emitter_json_array_end(emitter);
-
-		bt_old_node = bt_node;
-		bt_node = bt_node->next;
-		idalloc(tsd, bt_old_node);
-	}
-	emitter_json_array_end(emitter);
-}
-
-static void
-prof_log_emit_allocs(tsd_t *tsd, emitter_t *emitter) {
-	emitter_json_array_kv_begin(emitter, "allocations");
-	prof_alloc_node_t *alloc_node = log_alloc_first;
-	prof_alloc_node_t *alloc_old_node;
-	while (alloc_node != NULL) {
-		emitter_json_object_begin(emitter);
-
-		emitter_json_kv(emitter, "alloc_thread", emitter_type_size,
-		    &alloc_node->alloc_thr_ind);
-
-		emitter_json_kv(emitter, "free_thread", emitter_type_size,
-		    &alloc_node->free_thr_ind);
-
-		emitter_json_kv(emitter, "alloc_trace", emitter_type_size,
-		    &alloc_node->alloc_bt_ind);
-
-		emitter_json_kv(emitter, "free_trace", emitter_type_size,
-		    &alloc_node->free_bt_ind);
-
-		emitter_json_kv(emitter, "alloc_timestamp",
-		    emitter_type_uint64, &alloc_node->alloc_time_ns);
-
-		emitter_json_kv(emitter, "free_timestamp", emitter_type_uint64,
-		    &alloc_node->free_time_ns);
-
-		emitter_json_kv(emitter, "usize", emitter_type_uint64,
-		    &alloc_node->usize);
-
-		emitter_json_object_end(emitter);
-
-		alloc_old_node = alloc_node;
-		alloc_node = alloc_node->next;
-		idalloc(tsd, alloc_old_node);
-	}
-	emitter_json_array_end(emitter);
-}
-
-static void
-prof_log_emit_metadata(emitter_t *emitter) {
-	emitter_json_object_kv_begin(emitter, "info");
-
-	nstime_t now = NSTIME_ZERO_INITIALIZER;
-
-	nstime_update(&now);
-	uint64_t ns = nstime_ns(&now) - nstime_ns(&log_start_timestamp);
-	emitter_json_kv(emitter, "duration", emitter_type_uint64, &ns);
-
-	char *vers = JEMALLOC_VERSION;
-	emitter_json_kv(emitter, "version",
-	    emitter_type_string, &vers);
-
-	emitter_json_kv(emitter, "lg_sample_rate",
-	    emitter_type_int, &lg_prof_sample);
-
-	int pid = prof_getpid();
-	emitter_json_kv(emitter, "pid", emitter_type_int, &pid);
-
-	emitter_json_object_end(emitter);
-}
-
-
-bool
-prof_log_stop(tsdn_t *tsdn) {
-	if (!opt_prof || !prof_booted) {
-		return true;
-	}
-
-	tsd_t *tsd = tsdn_tsd(tsdn);
-	malloc_mutex_lock(tsdn, &log_mtx);
-
-	if (prof_logging_state != prof_logging_state_started) {
-		malloc_mutex_unlock(tsdn, &log_mtx);
-		return true;
-	}
-
-	/*
-	 * Set the state to dumping. We'll set it to stopped when we're done.
-	 * Since other threads won't be able to start/stop/log when the state is
-	 * dumping, we don't have to hold the lock during the whole method.
-	 */
-	prof_logging_state = prof_logging_state_dumping;
-	malloc_mutex_unlock(tsdn, &log_mtx);
-
-
-	emitter_t emitter;
-
-	/* Create a file. */
-
-	int fd;
-#ifdef JEMALLOC_JET
-	if (prof_log_dummy) {
-		fd = 0;
-	} else {
-		fd = creat(log_filename, 0644);
-	}
-#else
-	fd = creat(log_filename, 0644);
-#endif
-
-	if (fd == -1) {
-		malloc_printf("<jemalloc>: creat() for log file \"%s\" "
-			      " failed with %d\n", log_filename, errno);
-		if (opt_abort) {
-			abort();
-		}
-		return true;
-	}
-
-	/* Emit to json. */
-	struct prof_emitter_cb_arg_s arg;
-	arg.fd = fd;
-	emitter_init(&emitter, emitter_output_json, &prof_emitter_write_cb,
-	    (void *)(&arg));
-
-	emitter_json_object_begin(&emitter);
-	prof_log_emit_metadata(&emitter);
-	prof_log_emit_threads(tsd, &emitter);
-	prof_log_emit_traces(tsd, &emitter);
-	prof_log_emit_allocs(tsd, &emitter);
-	emitter_json_object_end(&emitter);
-
-	/* Reset global state. */
-	if (log_tables_initialized) {
-		ckh_delete(tsd, &log_bt_node_set);
-		ckh_delete(tsd, &log_thr_node_set);
-	}
-	log_tables_initialized = false;
-	log_bt_index = 0;
-	log_thr_index = 0;
-	log_bt_first = NULL;
-	log_bt_last = NULL;
-	log_thr_first = NULL;
-	log_thr_last = NULL;
-	log_alloc_first = NULL;
-	log_alloc_last = NULL;
-
-	malloc_mutex_lock(tsdn, &log_mtx);
-	prof_logging_state = prof_logging_state_stopped;
-	malloc_mutex_unlock(tsdn, &log_mtx);
-
-#ifdef JEMALLOC_JET
-	if (prof_log_dummy) {
-		return false;
-	}
-#endif
-	return close(fd);
-}
-
 const char *
 prof_thread_name_get(tsd_t *tsd) {
 	prof_tdata_t *tdata;
@@ -3014,35 +2367,10 @@ prof_boot2(tsd_t *tsd) {
 			}
 		}
 
-		if (opt_prof_log) {
-			prof_log_start(tsd_tsdn(tsd), NULL);
-		}
-
-		if (atexit(prof_log_stop_final) != 0) {
-			malloc_write("<jemalloc>: Error in atexit() "
-				     "for logging\n");
-			if (opt_abort) {
-				abort();
-			}
-		}
-
-		if (malloc_mutex_init(&log_mtx, "prof_log",
-		    WITNESS_RANK_PROF_LOG, malloc_mutex_rank_exclusive)) {
+		if (prof_log_init(tsd)) {
 			return true;
 		}
 
-		if (ckh_new(tsd, &log_bt_node_set, PROF_CKH_MINITEMS,
-		    prof_bt_node_hash, prof_bt_node_keycomp)) {
-			return true;
-		}
-
-		if (ckh_new(tsd, &log_thr_node_set, PROF_CKH_MINITEMS,
-		    prof_thr_node_hash, prof_thr_node_keycomp)) {
-			return true;
-		}
-
-		log_tables_initialized = true;
-
 		gctx_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
 		    b0get(), PROF_NCTX_LOCKS * sizeof(malloc_mutex_t),
 		    CACHELINE);
diff --git a/src/prof_log.c b/src/prof_log.c
new file mode 100644
index 00000000..25a6abee
--- /dev/null
+++ b/src/prof_log.c
@@ -0,0 +1,698 @@
+#define JEMALLOC_PROF_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/ckh.h"
+#include "jemalloc/internal/hash.h"
+#include "jemalloc/internal/malloc_io.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/emitter.h"
+
+bool opt_prof_log = false;
+typedef enum prof_logging_state_e prof_logging_state_t;
+enum prof_logging_state_e {
+	prof_logging_state_stopped,
+	prof_logging_state_started,
+	prof_logging_state_dumping
+};
+
+/*
+ * - stopped: log_start never called, or previous log_stop has completed.
+ * - started: log_start called, log_stop not called yet. Allocations are logged.
+ * - dumping: log_stop called but not finished; samples are not logged anymore.
+ */
+prof_logging_state_t prof_logging_state = prof_logging_state_stopped;
+
+#ifdef JEMALLOC_JET
+static bool prof_log_dummy = false;
+#endif
+
+/* Incremented for every log file that is output. */
+static uint64_t log_seq = 0;
+static char log_filename[
+    /* Minimize memory bloat for non-prof builds. */
+#ifdef JEMALLOC_PROF
+    PATH_MAX +
+#endif
+    1];
+
+/* Timestamp for most recent call to log_start(). */
+static nstime_t log_start_timestamp = NSTIME_ZERO_INITIALIZER;
+
+/* Increment these when adding to the log_bt and log_thr linked lists. */
+static size_t log_bt_index = 0;
+static size_t log_thr_index = 0;
+
+/* Linked list node definitions. These are only used in this file. */
+typedef struct prof_bt_node_s prof_bt_node_t;
+
+struct prof_bt_node_s {
+	prof_bt_node_t *next;
+	size_t index;
+	prof_bt_t bt;
+	/* Variable size backtrace vector pointed to by bt. */
+	void *vec[1];
+};
+
+typedef struct prof_thr_node_s prof_thr_node_t;
+
+struct prof_thr_node_s {
+	prof_thr_node_t *next;
+	size_t index;
+	uint64_t thr_uid;
+	/* Variable size based on thr_name_sz. */
+	char name[1];
+};
+
+typedef struct prof_alloc_node_s prof_alloc_node_t;
+
+/* This is output when logging sampled allocations. */
+struct prof_alloc_node_s {
+	prof_alloc_node_t *next;
+	/* Indices into an array of thread data. */
+	size_t alloc_thr_ind;
+	size_t free_thr_ind;
+
+	/* Indices into an array of backtraces. */
+	size_t alloc_bt_ind;
+	size_t free_bt_ind;
+
+	uint64_t alloc_time_ns;
+	uint64_t free_time_ns;
+
+	size_t usize;
+};
+
+/*
+ * Created on the first call to prof_log_start and deleted on prof_log_stop.
+ * These are the backtraces and threads that have already been logged by an
+ * allocation.
+ */
+static bool log_tables_initialized = false;
+static ckh_t log_bt_node_set;
+static ckh_t log_thr_node_set;
+
+/* Store linked lists for logged data. */
+static prof_bt_node_t *log_bt_first = NULL;
+static prof_bt_node_t *log_bt_last = NULL;
+static prof_thr_node_t *log_thr_first = NULL;
+static prof_thr_node_t *log_thr_last = NULL;
+static prof_alloc_node_t *log_alloc_first = NULL;
+static prof_alloc_node_t *log_alloc_last = NULL;
+
+/* Protects the prof_logging_state and any log_{...} variable. */
+static malloc_mutex_t log_mtx;
+
+/******************************************************************************/
+/*
+ * Function prototypes for static functions that are referenced prior to
+ * definition.
+ */
+
+/* Hashtable functions for log_bt_node_set and log_thr_node_set. */
+static void prof_thr_node_hash(const void *key, size_t r_hash[2]);
+static bool prof_thr_node_keycomp(const void *k1, const void *k2);
+static void prof_bt_node_hash(const void *key, size_t r_hash[2]);
+static bool prof_bt_node_keycomp(const void *k1, const void *k2);
+
+/******************************************************************************/
+
+static size_t
+prof_log_bt_index(tsd_t *tsd, prof_bt_t *bt) {
+	assert(prof_logging_state == prof_logging_state_started);
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &log_mtx);
+
+	prof_bt_node_t dummy_node;
+	dummy_node.bt = *bt;
+	prof_bt_node_t *node;
+
+	/* See if this backtrace is already cached in the table. */
+	if (ckh_search(&log_bt_node_set, (void *)(&dummy_node),
+	    (void **)(&node), NULL)) {
+		size_t sz = offsetof(prof_bt_node_t, vec) +
+			        (bt->len * sizeof(void *));
+		prof_bt_node_t *new_node = (prof_bt_node_t *)
+		    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL,
+		    true, arena_get(TSDN_NULL, 0, true), true);
+		if (log_bt_first == NULL) {
+			log_bt_first = new_node;
+			log_bt_last = new_node;
+		} else {
+			log_bt_last->next = new_node;
+			log_bt_last = new_node;
+		}
+
+		new_node->next = NULL;
+		new_node->index = log_bt_index;
+		/*
+		 * Copy the backtrace: bt is inside a tdata or gctx, which
+		 * might die before prof_log_stop is called.
+		 */
+		new_node->bt.len = bt->len;
+		memcpy(new_node->vec, bt->vec, bt->len * sizeof(void *));
+		new_node->bt.vec = new_node->vec;
+
+		log_bt_index++;
+		ckh_insert(tsd, &log_bt_node_set, (void *)new_node, NULL);
+		return new_node->index;
+	} else {
+		return node->index;
+	}
+}
+static size_t
+prof_log_thr_index(tsd_t *tsd, uint64_t thr_uid, const char *name) {
+	assert(prof_logging_state == prof_logging_state_started);
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &log_mtx);
+
+	prof_thr_node_t dummy_node;
+	dummy_node.thr_uid = thr_uid;
+	prof_thr_node_t *node;
+
+	/* See if this thread is already cached in the table. */
+	if (ckh_search(&log_thr_node_set, (void *)(&dummy_node),
+	    (void **)(&node), NULL)) {
+		size_t sz = offsetof(prof_thr_node_t, name) + strlen(name) + 1;
+		prof_thr_node_t *new_node = (prof_thr_node_t *)
+		    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL,
+		    true, arena_get(TSDN_NULL, 0, true), true);
+		if (log_thr_first == NULL) {
+			log_thr_first = new_node;
+			log_thr_last = new_node;
+		} else {
+			log_thr_last->next = new_node;
+			log_thr_last = new_node;
+		}
+
+		new_node->next = NULL;
+		new_node->index = log_thr_index;
+		new_node->thr_uid = thr_uid;
+		strcpy(new_node->name, name);
+
+		log_thr_index++;
+		ckh_insert(tsd, &log_thr_node_set, (void *)new_node, NULL);
+		return new_node->index;
+	} else {
+		return node->index;
+	}
+}
+
+void
+prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
+
+	prof_tdata_t *cons_tdata = prof_tdata_get(tsd, false);
+	if (cons_tdata == NULL) {
+		/*
+		 * We decide not to log these allocations. cons_tdata will be
+		 * NULL only when the current thread is in a weird state (e.g.
+		 * it's being destroyed).
+		 */
+		return;
+	}
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &log_mtx);
+
+	if (prof_logging_state != prof_logging_state_started) {
+		goto label_done;
+	}
+
+	if (!log_tables_initialized) {
+		bool err1 = ckh_new(tsd, &log_bt_node_set, PROF_CKH_MINITEMS,
+				prof_bt_node_hash, prof_bt_node_keycomp);
+		bool err2 = ckh_new(tsd, &log_thr_node_set, PROF_CKH_MINITEMS,
+				prof_thr_node_hash, prof_thr_node_keycomp);
+		if (err1 || err2) {
+			goto label_done;
+		}
+		log_tables_initialized = true;
+	}
+
+	nstime_t alloc_time = prof_alloc_time_get(tsd_tsdn(tsd), ptr,
+			          (alloc_ctx_t *)NULL);
+	nstime_t free_time = NSTIME_ZERO_INITIALIZER;
+	nstime_update(&free_time);
+
+	size_t sz = sizeof(prof_alloc_node_t);
+	prof_alloc_node_t *new_node = (prof_alloc_node_t *)
+	    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL, true,
+	    arena_get(TSDN_NULL, 0, true), true);
+
+	const char *prod_thr_name = (tctx->tdata->thread_name == NULL)?
+				        "" : tctx->tdata->thread_name;
+	const char *cons_thr_name = prof_thread_name_get(tsd);
+
+	prof_bt_t bt;
+	/* Initialize the backtrace, using the buffer in tdata to store it. */
+	bt_init(&bt, cons_tdata->vec);
+	prof_backtrace(&bt);
+	prof_bt_t *cons_bt = &bt;
+
+	/* We haven't destroyed tctx yet, so gctx should be good to read. */
+	prof_bt_t *prod_bt = &tctx->gctx->bt;
+
+	new_node->next = NULL;
+	new_node->alloc_thr_ind = prof_log_thr_index(tsd, tctx->tdata->thr_uid,
+				      prod_thr_name);
+	new_node->free_thr_ind = prof_log_thr_index(tsd, cons_tdata->thr_uid,
+				     cons_thr_name);
+	new_node->alloc_bt_ind = prof_log_bt_index(tsd, prod_bt);
+	new_node->free_bt_ind = prof_log_bt_index(tsd, cons_bt);
+	new_node->alloc_time_ns = nstime_ns(&alloc_time);
+	new_node->free_time_ns = nstime_ns(&free_time);
+	new_node->usize = usize;
+
+	if (log_alloc_first == NULL) {
+		log_alloc_first = new_node;
+		log_alloc_last = new_node;
+	} else {
+		log_alloc_last->next = new_node;
+		log_alloc_last = new_node;
+	}
+
+label_done:
+	malloc_mutex_unlock(tsd_tsdn(tsd), &log_mtx);
+}
+
+static void
+prof_bt_node_hash(const void *key, size_t r_hash[2]) {
+	const prof_bt_node_t *bt_node = (prof_bt_node_t *)key;
+	prof_bt_hash((void *)(&bt_node->bt), r_hash);
+}
+
+static bool
+prof_bt_node_keycomp(const void *k1, const void *k2) {
+	const prof_bt_node_t *bt_node1 = (prof_bt_node_t *)k1;
+	const prof_bt_node_t *bt_node2 = (prof_bt_node_t *)k2;
+	return prof_bt_keycomp((void *)(&bt_node1->bt),
+	    (void *)(&bt_node2->bt));
+}
+
+static void
+prof_thr_node_hash(const void *key, size_t r_hash[2]) {
+	const prof_thr_node_t *thr_node = (prof_thr_node_t *)key;
+	hash(&thr_node->thr_uid, sizeof(uint64_t), 0x94122f35U, r_hash);
+}
+
+static bool
+prof_thr_node_keycomp(const void *k1, const void *k2) {
+	const prof_thr_node_t *thr_node1 = (prof_thr_node_t *)k1;
+	const prof_thr_node_t *thr_node2 = (prof_thr_node_t *)k2;
+	return thr_node1->thr_uid == thr_node2->thr_uid;
+}
+
+#ifdef JEMALLOC_JET
+size_t
+prof_log_bt_count(void) {
+	size_t cnt = 0;
+	prof_bt_node_t *node = log_bt_first;
+	while (node != NULL) {
+		cnt++;
+		node = node->next;
+	}
+	return cnt;
+}
+
+size_t
+prof_log_alloc_count(void) {
+	size_t cnt = 0;
+	prof_alloc_node_t *node = log_alloc_first;
+	while (node != NULL) {
+		cnt++;
+		node = node->next;
+	}
+	return cnt;
+}
+
+size_t
+prof_log_thr_count(void) {
+	size_t cnt = 0;
+	prof_thr_node_t *node = log_thr_first;
+	while (node != NULL) {
+		cnt++;
+		node = node->next;
+	}
+	return cnt;
+}
+
+bool
+prof_log_is_logging(void) {
+	return prof_logging_state == prof_logging_state_started;
+}
+
+bool
+prof_log_rep_check(void) {
+	if (prof_logging_state == prof_logging_state_stopped
+	    && log_tables_initialized) {
+		return true;
+	}
+
+	if (log_bt_last != NULL && log_bt_last->next != NULL) {
+		return true;
+	}
+	if (log_thr_last != NULL && log_thr_last->next != NULL) {
+		return true;
+	}
+	if (log_alloc_last != NULL && log_alloc_last->next != NULL) {
+		return true;
+	}
+
+	size_t bt_count = prof_log_bt_count();
+	size_t thr_count = prof_log_thr_count();
+	size_t alloc_count = prof_log_alloc_count();
+
+
+	if (prof_logging_state == prof_logging_state_stopped) {
+		if (bt_count != 0 || thr_count != 0 || alloc_count || 0) {
+			return true;
+		}
+	}
+
+	prof_alloc_node_t *node = log_alloc_first;
+	while (node != NULL) {
+		if (node->alloc_bt_ind >= bt_count) {
+			return true;
+		}
+		if (node->free_bt_ind >= bt_count) {
+			return true;
+		}
+		if (node->alloc_thr_ind >= thr_count) {
+			return true;
+		}
+		if (node->free_thr_ind >= thr_count) {
+			return true;
+		}
+		if (node->alloc_time_ns > node->free_time_ns) {
+			return true;
+		}
+		node = node->next;
+	}
+
+	return false;
+}
+
+void
+prof_log_dummy_set(bool new_value) {
+	prof_log_dummy = new_value;
+}
+#endif
+
+bool
+prof_log_start(tsdn_t *tsdn, const char *filename) {
+	if (!opt_prof || !prof_booted) {
+		return true;
+	}
+
+	bool ret = false;
+	size_t buf_size = PATH_MAX + 1;
+
+	malloc_mutex_lock(tsdn, &log_mtx);
+
+	if (prof_logging_state != prof_logging_state_stopped) {
+		ret = true;
+	} else if (filename == NULL) {
+		/* Make default name. */
+		malloc_snprintf(log_filename, buf_size, "%s.%d.%"FMTu64".json",
+		    opt_prof_prefix, prof_getpid(), log_seq);
+		log_seq++;
+		prof_logging_state = prof_logging_state_started;
+	} else if (strlen(filename) >= buf_size) {
+		ret = true;
+	} else {
+		strcpy(log_filename, filename);
+		prof_logging_state = prof_logging_state_started;
+	}
+
+	if (!ret) {
+		nstime_update(&log_start_timestamp);
+	}
+
+	malloc_mutex_unlock(tsdn, &log_mtx);
+
+	return ret;
+}
+
+/* Used as an atexit function to stop logging on exit. */
+static void
+prof_log_stop_final(void) {
+	tsd_t *tsd = tsd_fetch();
+	prof_log_stop(tsd_tsdn(tsd));
+}
+
+struct prof_emitter_cb_arg_s {
+	int fd;
+	ssize_t ret;
+};
+
+static void
+prof_emitter_write_cb(void *opaque, const char *to_write) {
+	struct prof_emitter_cb_arg_s *arg =
+	    (struct prof_emitter_cb_arg_s *)opaque;
+	size_t bytes = strlen(to_write);
+#ifdef JEMALLOC_JET
+	if (prof_log_dummy) {
+		return;
+	}
+#endif
+	arg->ret = write(arg->fd, (void *)to_write, bytes);
+}
+
+/*
+ * prof_log_emit_{...} goes through the appropriate linked list, emitting each
+ * node to the json and deallocating it.
+ */
+static void
+prof_log_emit_threads(tsd_t *tsd, emitter_t *emitter) {
+	emitter_json_array_kv_begin(emitter, "threads");
+	prof_thr_node_t *thr_node = log_thr_first;
+	prof_thr_node_t *thr_old_node;
+	while (thr_node != NULL) {
+		emitter_json_object_begin(emitter);
+
+		emitter_json_kv(emitter, "thr_uid", emitter_type_uint64,
+		    &thr_node->thr_uid);
+
+		char *thr_name = thr_node->name;
+
+		emitter_json_kv(emitter, "thr_name", emitter_type_string,
+		    &thr_name);
+
+		emitter_json_object_end(emitter);
+		thr_old_node = thr_node;
+		thr_node = thr_node->next;
+		idalloc(tsd, thr_old_node);
+	}
+	emitter_json_array_end(emitter);
+}
+
+static void
+prof_log_emit_traces(tsd_t *tsd, emitter_t *emitter) {
+	emitter_json_array_kv_begin(emitter, "stack_traces");
+	prof_bt_node_t *bt_node = log_bt_first;
+	prof_bt_node_t *bt_old_node;
+	/*
+	 * Calculate how many hex digits we need: twice number of bytes, two for
+	 * "0x", and then one more for terminating '\0'.
+	 */
+	char buf[2 * sizeof(intptr_t) + 3];
+	size_t buf_sz = sizeof(buf);
+	while (bt_node != NULL) {
+		emitter_json_array_begin(emitter);
+		size_t i;
+		for (i = 0; i < bt_node->bt.len; i++) {
+			malloc_snprintf(buf, buf_sz, "%p", bt_node->bt.vec[i]);
+			char *trace_str = buf;
+			emitter_json_value(emitter, emitter_type_string,
+			    &trace_str);
+		}
+		emitter_json_array_end(emitter);
+
+		bt_old_node = bt_node;
+		bt_node = bt_node->next;
+		idalloc(tsd, bt_old_node);
+	}
+	emitter_json_array_end(emitter);
+}
+
+static void
+prof_log_emit_allocs(tsd_t *tsd, emitter_t *emitter) {
+	emitter_json_array_kv_begin(emitter, "allocations");
+	prof_alloc_node_t *alloc_node = log_alloc_first;
+	prof_alloc_node_t *alloc_old_node;
+	while (alloc_node != NULL) {
+		emitter_json_object_begin(emitter);
+
+		emitter_json_kv(emitter, "alloc_thread", emitter_type_size,
+		    &alloc_node->alloc_thr_ind);
+
+		emitter_json_kv(emitter, "free_thread", emitter_type_size,
+		    &alloc_node->free_thr_ind);
+
+		emitter_json_kv(emitter, "alloc_trace", emitter_type_size,
+		    &alloc_node->alloc_bt_ind);
+
+		emitter_json_kv(emitter, "free_trace", emitter_type_size,
+		    &alloc_node->free_bt_ind);
+
+		emitter_json_kv(emitter, "alloc_timestamp",
+		    emitter_type_uint64, &alloc_node->alloc_time_ns);
+
+		emitter_json_kv(emitter, "free_timestamp", emitter_type_uint64,
+		    &alloc_node->free_time_ns);
+
+		emitter_json_kv(emitter, "usize", emitter_type_uint64,
+		    &alloc_node->usize);
+
+		emitter_json_object_end(emitter);
+
+		alloc_old_node = alloc_node;
+		alloc_node = alloc_node->next;
+		idalloc(tsd, alloc_old_node);
+	}
+	emitter_json_array_end(emitter);
+}
+
+static void
+prof_log_emit_metadata(emitter_t *emitter) {
+	emitter_json_object_kv_begin(emitter, "info");
+
+	nstime_t now = NSTIME_ZERO_INITIALIZER;
+
+	nstime_update(&now);
+	uint64_t ns = nstime_ns(&now) - nstime_ns(&log_start_timestamp);
+	emitter_json_kv(emitter, "duration", emitter_type_uint64, &ns);
+
+	char *vers = JEMALLOC_VERSION;
+	emitter_json_kv(emitter, "version",
+	    emitter_type_string, &vers);
+
+	emitter_json_kv(emitter, "lg_sample_rate",
+	    emitter_type_int, &lg_prof_sample);
+
+	int pid = prof_getpid();
+	emitter_json_kv(emitter, "pid", emitter_type_int, &pid);
+
+	emitter_json_object_end(emitter);
+}
+
+
+bool
+prof_log_stop(tsdn_t *tsdn) {
+	if (!opt_prof || !prof_booted) {
+		return true;
+	}
+
+	tsd_t *tsd = tsdn_tsd(tsdn);
+	malloc_mutex_lock(tsdn, &log_mtx);
+
+	if (prof_logging_state != prof_logging_state_started) {
+		malloc_mutex_unlock(tsdn, &log_mtx);
+		return true;
+	}
+
+	/*
+	 * Set the state to dumping. We'll set it to stopped when we're done.
+	 * Since other threads won't be able to start/stop/log when the state is
+	 * dumping, we don't have to hold the lock during the whole method.
+	 */
+	prof_logging_state = prof_logging_state_dumping;
+	malloc_mutex_unlock(tsdn, &log_mtx);
+
+
+	emitter_t emitter;
+
+	/* Create a file. */
+
+	int fd;
+#ifdef JEMALLOC_JET
+	if (prof_log_dummy) {
+		fd = 0;
+	} else {
+		fd = creat(log_filename, 0644);
+	}
+#else
+	fd = creat(log_filename, 0644);
+#endif
+
+	if (fd == -1) {
+		malloc_printf("<jemalloc>: creat() for log file \"%s\" "
+			      " failed with %d\n", log_filename, errno);
+		if (opt_abort) {
+			abort();
+		}
+		return true;
+	}
+
+	/* Emit to json. */
+	struct prof_emitter_cb_arg_s arg;
+	arg.fd = fd;
+	emitter_init(&emitter, emitter_output_json, &prof_emitter_write_cb,
+	    (void *)(&arg));
+
+	emitter_json_object_begin(&emitter);
+	prof_log_emit_metadata(&emitter);
+	prof_log_emit_threads(tsd, &emitter);
+	prof_log_emit_traces(tsd, &emitter);
+	prof_log_emit_allocs(tsd, &emitter);
+	emitter_json_object_end(&emitter);
+
+	/* Reset global state. */
+	if (log_tables_initialized) {
+		ckh_delete(tsd, &log_bt_node_set);
+		ckh_delete(tsd, &log_thr_node_set);
+	}
+	log_tables_initialized = false;
+	log_bt_index = 0;
+	log_thr_index = 0;
+	log_bt_first = NULL;
+	log_bt_last = NULL;
+	log_thr_first = NULL;
+	log_thr_last = NULL;
+	log_alloc_first = NULL;
+	log_alloc_last = NULL;
+
+	malloc_mutex_lock(tsdn, &log_mtx);
+	prof_logging_state = prof_logging_state_stopped;
+	malloc_mutex_unlock(tsdn, &log_mtx);
+
+#ifdef JEMALLOC_JET
+	if (prof_log_dummy) {
+		return false;
+	}
+#endif
+	return close(fd);
+}
+
+bool prof_log_init(tsd_t *tsd) {
+	if (opt_prof_log) {
+		prof_log_start(tsd_tsdn(tsd), NULL);
+	}
+
+	if (atexit(prof_log_stop_final) != 0) {
+		malloc_write("<jemalloc>: Error in atexit() "
+			     "for logging\n");
+		if (opt_abort) {
+			abort();
+		}
+	}
+
+	if (malloc_mutex_init(&log_mtx, "prof_log",
+	    WITNESS_RANK_PROF_LOG, malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+
+	if (ckh_new(tsd, &log_bt_node_set, PROF_CKH_MINITEMS,
+	    prof_bt_node_hash, prof_bt_node_keycomp)) {
+		return true;
+	}
+
+	if (ckh_new(tsd, &log_thr_node_set, PROF_CKH_MINITEMS,
+	    prof_thr_node_hash, prof_thr_node_keycomp)) {
+		return true;
+	}
+
+	log_tables_initialized = true;
+	return false;
+}
+
+/******************************************************************************/

From 0b462407ae84a62b3c097f0e9f18df487a47d9a7 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 17 Jul 2019 15:52:50 -0700
Subject: [PATCH 1326/2608] Refactor profiling

Refactored core profiling codebase into two logical parts:

(a) `prof_data.c`: core internal data structure managing & dumping;
(b) `prof.c`: mutexes & outward-facing APIs.

Some internal functions had to be exposed out, but there are not
that many of them if the modularization is (hopefully) clean enough.
---
 Makefile.in                                   |    1 +
 include/jemalloc/internal/prof_externs.h      |   14 +
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |    1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |    3 +
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |    1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |    3 +
 src/prof.c                                    | 1490 +----------------
 src/prof_data.c                               | 1440 ++++++++++++++++
 8 files changed, 1502 insertions(+), 1451 deletions(-)
 create mode 100644 src/prof_data.c

diff --git a/Makefile.in b/Makefile.in
index 1cd973d7..40daf115 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -117,6 +117,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/pages.c \
 	$(srcroot)src/prng.c \
 	$(srcroot)src/prof.c \
+	$(srcroot)src/prof_data.c \
 	$(srcroot)src/prof_log.c \
 	$(srcroot)src/rtree.c \
 	$(srcroot)src/safety_check.c \
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index e94ac3b2..8fc45cf7 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -4,6 +4,11 @@
 #include "jemalloc/internal/mutex.h"
 
 extern malloc_mutex_t	bt2gctx_mtx;
+extern malloc_mutex_t	tdatas_mtx;
+extern malloc_mutex_t	prof_dump_mtx;
+
+malloc_mutex_t *prof_gctx_mutex_choose(void);
+malloc_mutex_t *prof_tdata_mutex_choose(uint64_t thr_uid);
 
 extern bool	opt_prof;
 extern bool	opt_prof_active;
@@ -110,4 +115,13 @@ bool prof_log_rep_check(void);
 void prof_log_dummy_set(bool new_value);
 #endif
 
+/* Functions in prof_data.c only accessed in prof.c */
+bool prof_data_init(tsd_t *tsd);
+bool prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
+    bool leakcheck);
+prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
+    uint64_t thr_discrim, char *thread_name, bool active);
+void prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata);
+void prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx);
+
 #endif /* JEMALLOC_INTERNAL_PROF_EXTERNS_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index d93d9099..387f14be 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -58,6 +58,7 @@
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
+    <ClCompile Include="..\..\..\..\src\prof_data.c" />
     <ClCompile Include="..\..\..\..\src\prof_log.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 7b09d4e6..030d8266 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -67,6 +67,9 @@
     <ClCompile Include="..\..\..\..\src\prof.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_data.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\prof_log.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 28bd3cd6..1606a3ab 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -58,6 +58,7 @@
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
+    <ClCompile Include="..\..\..\..\src\prof_data.c" />
     <ClCompile Include="..\..\..\..\src\prof_log.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index a66c209b..622b93f1 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -67,6 +67,9 @@
     <ClCompile Include="..\..\..\..\src\prof.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_data.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\prof_log.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/prof.c b/src/prof.c
index 9d1edb32..79a0ffc8 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -3,11 +3,14 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
-#include "jemalloc/internal/ckh.h"
-#include "jemalloc/internal/hash.h"
-#include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/mutex.h"
 
+/*
+ * This file implements the profiling "APIs" needed by other parts of jemalloc,
+ * and also manages the relevant "operational" data, mainly options and mutexes;
+ * the core profiling data structures are encapsulated in prof_data.c.
+ */
+
 /******************************************************************************/
 
 #ifdef JEMALLOC_PROF_LIBUNWIND
@@ -88,20 +91,10 @@ static atomic_u_t	cum_gctxs; /* Atomic counter. */
  */
 static malloc_mutex_t	*tdata_locks;
 
-/*
- * Global hash of (prof_bt_t *)-->(prof_gctx_t *).  This is the master data
- * structure that knows about all backtraces currently captured.
- */
-static ckh_t		bt2gctx;
 /* Non static to enable profiling. */
 malloc_mutex_t		bt2gctx_mtx;
 
-/*
- * Tree of all extant prof_tdata_t structures, regardless of state,
- * {attached,detached,expired}.
- */
-static prof_tdata_tree_t	tdatas;
-static malloc_mutex_t	tdatas_mtx;
+malloc_mutex_t	tdatas_mtx;
 
 static uint64_t		next_thr_uid;
 static malloc_mutex_t	next_thr_uid_mtx;
@@ -112,101 +105,29 @@ static uint64_t		prof_dump_iseq;
 static uint64_t		prof_dump_mseq;
 static uint64_t		prof_dump_useq;
 
-/*
- * This buffer is rather large for stack allocation, so use a single buffer for
- * all profile dumps.
- */
-static malloc_mutex_t	prof_dump_mtx;
-static char		prof_dump_buf[
-    /* Minimize memory bloat for non-prof builds. */
-#ifdef JEMALLOC_PROF
-    PROF_DUMP_BUFSIZE
-#else
-    1
-#endif
-];
-static size_t		prof_dump_buf_end;
-static int		prof_dump_fd;
+malloc_mutex_t	prof_dump_mtx;
 
 /* Do not dump any profiles until bootstrapping is complete. */
 bool			prof_booted = false;
 
 /******************************************************************************/
-/*
- * Function prototypes for static functions that are referenced prior to
- * definition.
- */
 
-static bool	prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx);
-static void	prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx);
-static bool	prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
-    bool even_if_attached);
-static void	prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata,
-    bool even_if_attached);
-static char	*prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name);
+static bool
+prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx) {
+	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
 
-/******************************************************************************/
-/* Red-black trees. */
-
-static int
-prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b) {
-	uint64_t a_thr_uid = a->thr_uid;
-	uint64_t b_thr_uid = b->thr_uid;
-	int ret = (a_thr_uid > b_thr_uid) - (a_thr_uid < b_thr_uid);
-	if (ret == 0) {
-		uint64_t a_thr_discrim = a->thr_discrim;
-		uint64_t b_thr_discrim = b->thr_discrim;
-		ret = (a_thr_discrim > b_thr_discrim) - (a_thr_discrim <
-		    b_thr_discrim);
-		if (ret == 0) {
-			uint64_t a_tctx_uid = a->tctx_uid;
-			uint64_t b_tctx_uid = b->tctx_uid;
-			ret = (a_tctx_uid > b_tctx_uid) - (a_tctx_uid <
-			    b_tctx_uid);
-		}
+	if (opt_prof_accum) {
+		return false;
 	}
-	return ret;
-}
-
-rb_gen(static UNUSED, tctx_tree_, prof_tctx_tree_t, prof_tctx_t,
-    tctx_link, prof_tctx_comp)
-
-static int
-prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b) {
-	unsigned a_len = a->bt.len;
-	unsigned b_len = b->bt.len;
-	unsigned comp_len = (a_len < b_len) ? a_len : b_len;
-	int ret = memcmp(a->bt.vec, b->bt.vec, comp_len * sizeof(void *));
-	if (ret == 0) {
-		ret = (a_len > b_len) - (a_len < b_len);
+	if (tctx->cnts.curobjs != 0) {
+		return false;
 	}
-	return ret;
-}
-
-rb_gen(static UNUSED, gctx_tree_, prof_gctx_tree_t, prof_gctx_t, dump_link,
-    prof_gctx_comp)
-
-static int
-prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b) {
-	int ret;
-	uint64_t a_uid = a->thr_uid;
-	uint64_t b_uid = b->thr_uid;
-
-	ret = ((a_uid > b_uid) - (a_uid < b_uid));
-	if (ret == 0) {
-		uint64_t a_discrim = a->thr_discrim;
-		uint64_t b_discrim = b->thr_discrim;
-
-		ret = ((a_discrim > b_discrim) - (a_discrim < b_discrim));
+	if (tctx->prepared) {
+		return false;
 	}
-	return ret;
+	return true;
 }
 
-rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link,
-    prof_tdata_comp)
-
-/******************************************************************************/
-
 void
 prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
 	prof_tdata_t *tdata;
@@ -286,45 +207,6 @@ bt_init(prof_bt_t *bt, void **vec) {
 	bt->len = 0;
 }
 
-static void
-prof_enter(tsd_t *tsd, prof_tdata_t *tdata) {
-	cassert(config_prof);
-	assert(tdata == prof_tdata_get(tsd, false));
-
-	if (tdata != NULL) {
-		assert(!tdata->enq);
-		tdata->enq = true;
-	}
-
-	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
-}
-
-static void
-prof_leave(tsd_t *tsd, prof_tdata_t *tdata) {
-	cassert(config_prof);
-	assert(tdata == prof_tdata_get(tsd, false));
-
-	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
-
-	if (tdata != NULL) {
-		bool idump, gdump;
-
-		assert(tdata->enq);
-		tdata->enq = false;
-		idump = tdata->enq_idump;
-		tdata->enq_idump = false;
-		gdump = tdata->enq_gdump;
-		tdata->enq_gdump = false;
-
-		if (idump) {
-			prof_idump(tsd_tsdn(tsd));
-		}
-		if (gdump) {
-			prof_gdump(tsd_tsdn(tsd));
-		}
-	}
-}
-
 #ifdef JEMALLOC_PROF_LIBUNWIND
 void
 prof_backtrace(prof_bt_t *bt) {
@@ -547,324 +429,18 @@ prof_backtrace(prof_bt_t *bt) {
 }
 #endif
 
-static malloc_mutex_t *
+malloc_mutex_t *
 prof_gctx_mutex_choose(void) {
 	unsigned ngctxs = atomic_fetch_add_u(&cum_gctxs, 1, ATOMIC_RELAXED);
 
 	return &gctx_locks[(ngctxs - 1) % PROF_NCTX_LOCKS];
 }
 
-static malloc_mutex_t *
+malloc_mutex_t *
 prof_tdata_mutex_choose(uint64_t thr_uid) {
 	return &tdata_locks[thr_uid % PROF_NTDATA_LOCKS];
 }
 
-static prof_gctx_t *
-prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt) {
-	/*
-	 * Create a single allocation that has space for vec of length bt->len.
-	 */
-	size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *));
-	prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsdn, size,
-	    sz_size2index(size), false, NULL, true, arena_get(TSDN_NULL, 0, true),
-	    true);
-	if (gctx == NULL) {
-		return NULL;
-	}
-	gctx->lock = prof_gctx_mutex_choose();
-	/*
-	 * Set nlimbo to 1, in order to avoid a race condition with
-	 * prof_tctx_destroy()/prof_gctx_try_destroy().
-	 */
-	gctx->nlimbo = 1;
-	tctx_tree_new(&gctx->tctxs);
-	/* Duplicate bt. */
-	memcpy(gctx->vec, bt->vec, bt->len * sizeof(void *));
-	gctx->bt.vec = gctx->vec;
-	gctx->bt.len = bt->len;
-	return gctx;
-}
-
-static void
-prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
-    prof_tdata_t *tdata) {
-	cassert(config_prof);
-
-	/*
-	 * Check that gctx is still unused by any thread cache before destroying
-	 * it.  prof_lookup() increments gctx->nlimbo in order to avoid a race
-	 * condition with this function, as does prof_tctx_destroy() in order to
-	 * avoid a race between the main body of prof_tctx_destroy() and entry
-	 * into this function.
-	 */
-	prof_enter(tsd, tdata_self);
-	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
-	assert(gctx->nlimbo != 0);
-	if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) {
-		/* Remove gctx from bt2gctx. */
-		if (ckh_remove(tsd, &bt2gctx, &gctx->bt, NULL, NULL)) {
-			not_reached();
-		}
-		prof_leave(tsd, tdata_self);
-		/* Destroy gctx. */
-		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
-		idalloctm(tsd_tsdn(tsd), gctx, NULL, NULL, true, true);
-	} else {
-		/*
-		 * Compensate for increment in prof_tctx_destroy() or
-		 * prof_lookup().
-		 */
-		gctx->nlimbo--;
-		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
-		prof_leave(tsd, tdata_self);
-	}
-}
-
-static bool
-prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx) {
-	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
-
-	if (opt_prof_accum) {
-		return false;
-	}
-	if (tctx->cnts.curobjs != 0) {
-		return false;
-	}
-	if (tctx->prepared) {
-		return false;
-	}
-	return true;
-}
-
-static bool
-prof_gctx_should_destroy(prof_gctx_t *gctx) {
-	if (opt_prof_accum) {
-		return false;
-	}
-	if (!tctx_tree_empty(&gctx->tctxs)) {
-		return false;
-	}
-	if (gctx->nlimbo != 0) {
-		return false;
-	}
-	return true;
-}
-
-static void
-prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
-	prof_tdata_t *tdata = tctx->tdata;
-	prof_gctx_t *gctx = tctx->gctx;
-	bool destroy_tdata, destroy_tctx, destroy_gctx;
-
-	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
-
-	assert(tctx->cnts.curobjs == 0);
-	assert(tctx->cnts.curbytes == 0);
-	assert(!opt_prof_accum);
-	assert(tctx->cnts.accumobjs == 0);
-	assert(tctx->cnts.accumbytes == 0);
-
-	ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL);
-	destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata, false);
-	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
-
-	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
-	switch (tctx->state) {
-	case prof_tctx_state_nominal:
-		tctx_tree_remove(&gctx->tctxs, tctx);
-		destroy_tctx = true;
-		if (prof_gctx_should_destroy(gctx)) {
-			/*
-			 * Increment gctx->nlimbo in order to keep another
-			 * thread from winning the race to destroy gctx while
-			 * this one has gctx->lock dropped.  Without this, it
-			 * would be possible for another thread to:
-			 *
-			 * 1) Sample an allocation associated with gctx.
-			 * 2) Deallocate the sampled object.
-			 * 3) Successfully prof_gctx_try_destroy(gctx).
-			 *
-			 * The result would be that gctx no longer exists by the
-			 * time this thread accesses it in
-			 * prof_gctx_try_destroy().
-			 */
-			gctx->nlimbo++;
-			destroy_gctx = true;
-		} else {
-			destroy_gctx = false;
-		}
-		break;
-	case prof_tctx_state_dumping:
-		/*
-		 * A dumping thread needs tctx to remain valid until dumping
-		 * has finished.  Change state such that the dumping thread will
-		 * complete destruction during a late dump iteration phase.
-		 */
-		tctx->state = prof_tctx_state_purgatory;
-		destroy_tctx = false;
-		destroy_gctx = false;
-		break;
-	default:
-		not_reached();
-		destroy_tctx = false;
-		destroy_gctx = false;
-	}
-	malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
-	if (destroy_gctx) {
-		prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx,
-		    tdata);
-	}
-
-	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock);
-
-	if (destroy_tdata) {
-		prof_tdata_destroy(tsd, tdata, false);
-	}
-
-	if (destroy_tctx) {
-		idalloctm(tsd_tsdn(tsd), tctx, NULL, NULL, true, true);
-	}
-}
-
-static bool
-prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
-    void **p_btkey, prof_gctx_t **p_gctx, bool *p_new_gctx) {
-	union {
-		prof_gctx_t	*p;
-		void		*v;
-	} gctx, tgctx;
-	union {
-		prof_bt_t	*p;
-		void		*v;
-	} btkey;
-	bool new_gctx;
-
-	prof_enter(tsd, tdata);
-	if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
-		/* bt has never been seen before.  Insert it. */
-		prof_leave(tsd, tdata);
-		tgctx.p = prof_gctx_create(tsd_tsdn(tsd), bt);
-		if (tgctx.v == NULL) {
-			return true;
-		}
-		prof_enter(tsd, tdata);
-		if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
-			gctx.p = tgctx.p;
-			btkey.p = &gctx.p->bt;
-			if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) {
-				/* OOM. */
-				prof_leave(tsd, tdata);
-				idalloctm(tsd_tsdn(tsd), gctx.v, NULL, NULL,
-				    true, true);
-				return true;
-			}
-			new_gctx = true;
-		} else {
-			new_gctx = false;
-		}
-	} else {
-		tgctx.v = NULL;
-		new_gctx = false;
-	}
-
-	if (!new_gctx) {
-		/*
-		 * Increment nlimbo, in order to avoid a race condition with
-		 * prof_tctx_destroy()/prof_gctx_try_destroy().
-		 */
-		malloc_mutex_lock(tsd_tsdn(tsd), gctx.p->lock);
-		gctx.p->nlimbo++;
-		malloc_mutex_unlock(tsd_tsdn(tsd), gctx.p->lock);
-		new_gctx = false;
-
-		if (tgctx.v != NULL) {
-			/* Lost race to insert. */
-			idalloctm(tsd_tsdn(tsd), tgctx.v, NULL, NULL, true,
-			    true);
-		}
-	}
-	prof_leave(tsd, tdata);
-
-	*p_btkey = btkey.v;
-	*p_gctx = gctx.p;
-	*p_new_gctx = new_gctx;
-	return false;
-}
-
-prof_tctx_t *
-prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
-	union {
-		prof_tctx_t	*p;
-		void		*v;
-	} ret;
-	prof_tdata_t *tdata;
-	bool not_found;
-
-	cassert(config_prof);
-
-	tdata = prof_tdata_get(tsd, false);
-	if (tdata == NULL) {
-		return NULL;
-	}
-
-	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
-	not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v);
-	if (!not_found) { /* Note double negative! */
-		ret.p->prepared = true;
-	}
-	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
-	if (not_found) {
-		void *btkey;
-		prof_gctx_t *gctx;
-		bool new_gctx, error;
-
-		/*
-		 * This thread's cache lacks bt.  Look for it in the global
-		 * cache.
-		 */
-		if (prof_lookup_global(tsd, bt, tdata, &btkey, &gctx,
-		    &new_gctx)) {
-			return NULL;
-		}
-
-		/* Link a prof_tctx_t into gctx for this thread. */
-		ret.v = iallocztm(tsd_tsdn(tsd), sizeof(prof_tctx_t),
-		    sz_size2index(sizeof(prof_tctx_t)), false, NULL, true,
-		    arena_ichoose(tsd, NULL), true);
-		if (ret.p == NULL) {
-			if (new_gctx) {
-				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
-			}
-			return NULL;
-		}
-		ret.p->tdata = tdata;
-		ret.p->thr_uid = tdata->thr_uid;
-		ret.p->thr_discrim = tdata->thr_discrim;
-		memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
-		ret.p->gctx = gctx;
-		ret.p->tctx_uid = tdata->tctx_uid_next++;
-		ret.p->prepared = true;
-		ret.p->state = prof_tctx_state_initializing;
-		malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
-		error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v);
-		malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
-		if (error) {
-			if (new_gctx) {
-				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
-			}
-			idalloctm(tsd_tsdn(tsd), ret.v, NULL, NULL, true, true);
-			return NULL;
-		}
-		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
-		ret.p->state = prof_tctx_state_nominal;
-		tctx_tree_insert(&gctx->tctxs, ret.p);
-		gctx->nlimbo--;
-		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
-	}
-
-	return ret.p;
-}
-
 /*
  * The bodies of this function and prof_leakcheck() are compiled out unless heap
  * profiling is enabled, so that it is possible to compile jemalloc with
@@ -921,520 +497,6 @@ prof_sample_threshold_update(prof_tdata_t *tdata) {
 #endif
 }
 
-#ifdef JEMALLOC_JET
-static prof_tdata_t *
-prof_tdata_count_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
-    void *arg) {
-	size_t *tdata_count = (size_t *)arg;
-
-	(*tdata_count)++;
-
-	return NULL;
-}
-
-size_t
-prof_tdata_count(void) {
-	size_t tdata_count = 0;
-	tsdn_t *tsdn;
-
-	tsdn = tsdn_fetch();
-	malloc_mutex_lock(tsdn, &tdatas_mtx);
-	tdata_tree_iter(&tdatas, NULL, prof_tdata_count_iter,
-	    (void *)&tdata_count);
-	malloc_mutex_unlock(tsdn, &tdatas_mtx);
-
-	return tdata_count;
-}
-
-size_t
-prof_bt_count(void) {
-	size_t bt_count;
-	tsd_t *tsd;
-	prof_tdata_t *tdata;
-
-	tsd = tsd_fetch();
-	tdata = prof_tdata_get(tsd, false);
-	if (tdata == NULL) {
-		return 0;
-	}
-
-	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
-	bt_count = ckh_count(&bt2gctx);
-	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
-
-	return bt_count;
-}
-#endif
-
-static int
-prof_dump_open_impl(bool propagate_err, const char *filename) {
-	int fd;
-
-	fd = creat(filename, 0644);
-	if (fd == -1 && !propagate_err) {
-		malloc_printf("<jemalloc>: creat(\"%s\"), 0644) failed\n",
-		    filename);
-		if (opt_abort) {
-			abort();
-		}
-	}
-
-	return fd;
-}
-prof_dump_open_t *JET_MUTABLE prof_dump_open = prof_dump_open_impl;
-
-static bool
-prof_dump_flush(bool propagate_err) {
-	bool ret = false;
-	ssize_t err;
-
-	cassert(config_prof);
-
-	err = malloc_write_fd(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
-	if (err == -1) {
-		if (!propagate_err) {
-			malloc_write("<jemalloc>: write() failed during heap "
-			    "profile flush\n");
-			if (opt_abort) {
-				abort();
-			}
-		}
-		ret = true;
-	}
-	prof_dump_buf_end = 0;
-
-	return ret;
-}
-
-static bool
-prof_dump_close(bool propagate_err) {
-	bool ret;
-
-	assert(prof_dump_fd != -1);
-	ret = prof_dump_flush(propagate_err);
-	close(prof_dump_fd);
-	prof_dump_fd = -1;
-
-	return ret;
-}
-
-static bool
-prof_dump_write(bool propagate_err, const char *s) {
-	size_t i, slen, n;
-
-	cassert(config_prof);
-
-	i = 0;
-	slen = strlen(s);
-	while (i < slen) {
-		/* Flush the buffer if it is full. */
-		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
-			if (prof_dump_flush(propagate_err) && propagate_err) {
-				return true;
-			}
-		}
-
-		if (prof_dump_buf_end + slen - i <= PROF_DUMP_BUFSIZE) {
-			/* Finish writing. */
-			n = slen - i;
-		} else {
-			/* Write as much of s as will fit. */
-			n = PROF_DUMP_BUFSIZE - prof_dump_buf_end;
-		}
-		memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n);
-		prof_dump_buf_end += n;
-		i += n;
-	}
-
-	return false;
-}
-
-JEMALLOC_FORMAT_PRINTF(2, 3)
-static bool
-prof_dump_printf(bool propagate_err, const char *format, ...) {
-	bool ret;
-	va_list ap;
-	char buf[PROF_PRINTF_BUFSIZE];
-
-	va_start(ap, format);
-	malloc_vsnprintf(buf, sizeof(buf), format, ap);
-	va_end(ap);
-	ret = prof_dump_write(propagate_err, buf);
-
-	return ret;
-}
-
-static void
-prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata) {
-	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
-
-	malloc_mutex_lock(tsdn, tctx->gctx->lock);
-
-	switch (tctx->state) {
-	case prof_tctx_state_initializing:
-		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
-		return;
-	case prof_tctx_state_nominal:
-		tctx->state = prof_tctx_state_dumping;
-		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
-
-		memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t));
-
-		tdata->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
-		tdata->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
-		if (opt_prof_accum) {
-			tdata->cnt_summed.accumobjs +=
-			    tctx->dump_cnts.accumobjs;
-			tdata->cnt_summed.accumbytes +=
-			    tctx->dump_cnts.accumbytes;
-		}
-		break;
-	case prof_tctx_state_dumping:
-	case prof_tctx_state_purgatory:
-		not_reached();
-	}
-}
-
-static void
-prof_tctx_merge_gctx(tsdn_t *tsdn, prof_tctx_t *tctx, prof_gctx_t *gctx) {
-	malloc_mutex_assert_owner(tsdn, gctx->lock);
-
-	gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
-	gctx->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
-	if (opt_prof_accum) {
-		gctx->cnt_summed.accumobjs += tctx->dump_cnts.accumobjs;
-		gctx->cnt_summed.accumbytes += tctx->dump_cnts.accumbytes;
-	}
-}
-
-static prof_tctx_t *
-prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
-	tsdn_t *tsdn = (tsdn_t *)arg;
-
-	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
-
-	switch (tctx->state) {
-	case prof_tctx_state_nominal:
-		/* New since dumping started; ignore. */
-		break;
-	case prof_tctx_state_dumping:
-	case prof_tctx_state_purgatory:
-		prof_tctx_merge_gctx(tsdn, tctx, tctx->gctx);
-		break;
-	default:
-		not_reached();
-	}
-
-	return NULL;
-}
-
-struct prof_tctx_dump_iter_arg_s {
-	tsdn_t	*tsdn;
-	bool	propagate_err;
-};
-
-static prof_tctx_t *
-prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) {
-	struct prof_tctx_dump_iter_arg_s *arg =
-	    (struct prof_tctx_dump_iter_arg_s *)opaque;
-
-	malloc_mutex_assert_owner(arg->tsdn, tctx->gctx->lock);
-
-	switch (tctx->state) {
-	case prof_tctx_state_initializing:
-	case prof_tctx_state_nominal:
-		/* Not captured by this dump. */
-		break;
-	case prof_tctx_state_dumping:
-	case prof_tctx_state_purgatory:
-		if (prof_dump_printf(arg->propagate_err,
-		    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": "
-		    "%"FMTu64"]\n", tctx->thr_uid, tctx->dump_cnts.curobjs,
-		    tctx->dump_cnts.curbytes, tctx->dump_cnts.accumobjs,
-		    tctx->dump_cnts.accumbytes)) {
-			return tctx;
-		}
-		break;
-	default:
-		not_reached();
-	}
-	return NULL;
-}
-
-static prof_tctx_t *
-prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
-	tsdn_t *tsdn = (tsdn_t *)arg;
-	prof_tctx_t *ret;
-
-	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
-
-	switch (tctx->state) {
-	case prof_tctx_state_nominal:
-		/* New since dumping started; ignore. */
-		break;
-	case prof_tctx_state_dumping:
-		tctx->state = prof_tctx_state_nominal;
-		break;
-	case prof_tctx_state_purgatory:
-		ret = tctx;
-		goto label_return;
-	default:
-		not_reached();
-	}
-
-	ret = NULL;
-label_return:
-	return ret;
-}
-
-static void
-prof_dump_gctx_prep(tsdn_t *tsdn, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs) {
-	cassert(config_prof);
-
-	malloc_mutex_lock(tsdn, gctx->lock);
-
-	/*
-	 * Increment nlimbo so that gctx won't go away before dump.
-	 * Additionally, link gctx into the dump list so that it is included in
-	 * prof_dump()'s second pass.
-	 */
-	gctx->nlimbo++;
-	gctx_tree_insert(gctxs, gctx);
-
-	memset(&gctx->cnt_summed, 0, sizeof(prof_cnt_t));
-
-	malloc_mutex_unlock(tsdn, gctx->lock);
-}
-
-struct prof_gctx_merge_iter_arg_s {
-	tsdn_t	*tsdn;
-	size_t	leak_ngctx;
-};
-
-static prof_gctx_t *
-prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
-	struct prof_gctx_merge_iter_arg_s *arg =
-	    (struct prof_gctx_merge_iter_arg_s *)opaque;
-
-	malloc_mutex_lock(arg->tsdn, gctx->lock);
-	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter,
-	    (void *)arg->tsdn);
-	if (gctx->cnt_summed.curobjs != 0) {
-		arg->leak_ngctx++;
-	}
-	malloc_mutex_unlock(arg->tsdn, gctx->lock);
-
-	return NULL;
-}
-
-static void
-prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) {
-	prof_tdata_t *tdata = prof_tdata_get(tsd, false);
-	prof_gctx_t *gctx;
-
-	/*
-	 * Standard tree iteration won't work here, because as soon as we
-	 * decrement gctx->nlimbo and unlock gctx, another thread can
-	 * concurrently destroy it, which will corrupt the tree.  Therefore,
-	 * tear down the tree one node at a time during iteration.
-	 */
-	while ((gctx = gctx_tree_first(gctxs)) != NULL) {
-		gctx_tree_remove(gctxs, gctx);
-		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
-		{
-			prof_tctx_t *next;
-
-			next = NULL;
-			do {
-				prof_tctx_t *to_destroy =
-				    tctx_tree_iter(&gctx->tctxs, next,
-				    prof_tctx_finish_iter,
-				    (void *)tsd_tsdn(tsd));
-				if (to_destroy != NULL) {
-					next = tctx_tree_next(&gctx->tctxs,
-					    to_destroy);
-					tctx_tree_remove(&gctx->tctxs,
-					    to_destroy);
-					idalloctm(tsd_tsdn(tsd), to_destroy,
-					    NULL, NULL, true, true);
-				} else {
-					next = NULL;
-				}
-			} while (next != NULL);
-		}
-		gctx->nlimbo--;
-		if (prof_gctx_should_destroy(gctx)) {
-			gctx->nlimbo++;
-			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
-			prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
-		} else {
-			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
-		}
-	}
-}
-
-struct prof_tdata_merge_iter_arg_s {
-	tsdn_t		*tsdn;
-	prof_cnt_t	cnt_all;
-};
-
-static prof_tdata_t *
-prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
-    void *opaque) {
-	struct prof_tdata_merge_iter_arg_s *arg =
-	    (struct prof_tdata_merge_iter_arg_s *)opaque;
-
-	malloc_mutex_lock(arg->tsdn, tdata->lock);
-	if (!tdata->expired) {
-		size_t tabind;
-		union {
-			prof_tctx_t	*p;
-			void		*v;
-		} tctx;
-
-		tdata->dumping = true;
-		memset(&tdata->cnt_summed, 0, sizeof(prof_cnt_t));
-		for (tabind = 0; !ckh_iter(&tdata->bt2tctx, &tabind, NULL,
-		    &tctx.v);) {
-			prof_tctx_merge_tdata(arg->tsdn, tctx.p, tdata);
-		}
-
-		arg->cnt_all.curobjs += tdata->cnt_summed.curobjs;
-		arg->cnt_all.curbytes += tdata->cnt_summed.curbytes;
-		if (opt_prof_accum) {
-			arg->cnt_all.accumobjs += tdata->cnt_summed.accumobjs;
-			arg->cnt_all.accumbytes += tdata->cnt_summed.accumbytes;
-		}
-	} else {
-		tdata->dumping = false;
-	}
-	malloc_mutex_unlock(arg->tsdn, tdata->lock);
-
-	return NULL;
-}
-
-static prof_tdata_t *
-prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
-    void *arg) {
-	bool propagate_err = *(bool *)arg;
-
-	if (!tdata->dumping) {
-		return NULL;
-	}
-
-	if (prof_dump_printf(propagate_err,
-	    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]%s%s\n",
-	    tdata->thr_uid, tdata->cnt_summed.curobjs,
-	    tdata->cnt_summed.curbytes, tdata->cnt_summed.accumobjs,
-	    tdata->cnt_summed.accumbytes,
-	    (tdata->thread_name != NULL) ? " " : "",
-	    (tdata->thread_name != NULL) ? tdata->thread_name : "")) {
-		return tdata;
-	}
-	return NULL;
-}
-
-static bool
-prof_dump_header_impl(tsdn_t *tsdn, bool propagate_err,
-    const prof_cnt_t *cnt_all) {
-	bool ret;
-
-	if (prof_dump_printf(propagate_err,
-	    "heap_v2/%"FMTu64"\n"
-	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
-	    ((uint64_t)1U << lg_prof_sample), cnt_all->curobjs,
-	    cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes)) {
-		return true;
-	}
-
-	malloc_mutex_lock(tsdn, &tdatas_mtx);
-	ret = (tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter,
-	    (void *)&propagate_err) != NULL);
-	malloc_mutex_unlock(tsdn, &tdatas_mtx);
-	return ret;
-}
-prof_dump_header_t *JET_MUTABLE prof_dump_header = prof_dump_header_impl;
-
-static bool
-prof_dump_gctx(tsdn_t *tsdn, bool propagate_err, prof_gctx_t *gctx,
-    const prof_bt_t *bt, prof_gctx_tree_t *gctxs) {
-	bool ret;
-	unsigned i;
-	struct prof_tctx_dump_iter_arg_s prof_tctx_dump_iter_arg;
-
-	cassert(config_prof);
-	malloc_mutex_assert_owner(tsdn, gctx->lock);
-
-	/* Avoid dumping such gctx's that have no useful data. */
-	if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0) ||
-	    (opt_prof_accum && gctx->cnt_summed.accumobjs == 0)) {
-		assert(gctx->cnt_summed.curobjs == 0);
-		assert(gctx->cnt_summed.curbytes == 0);
-		assert(gctx->cnt_summed.accumobjs == 0);
-		assert(gctx->cnt_summed.accumbytes == 0);
-		ret = false;
-		goto label_return;
-	}
-
-	if (prof_dump_printf(propagate_err, "@")) {
-		ret = true;
-		goto label_return;
-	}
-	for (i = 0; i < bt->len; i++) {
-		if (prof_dump_printf(propagate_err, " %#"FMTxPTR,
-		    (uintptr_t)bt->vec[i])) {
-			ret = true;
-			goto label_return;
-		}
-	}
-
-	if (prof_dump_printf(propagate_err,
-	    "\n"
-	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
-	    gctx->cnt_summed.curobjs, gctx->cnt_summed.curbytes,
-	    gctx->cnt_summed.accumobjs, gctx->cnt_summed.accumbytes)) {
-		ret = true;
-		goto label_return;
-	}
-
-	prof_tctx_dump_iter_arg.tsdn = tsdn;
-	prof_tctx_dump_iter_arg.propagate_err = propagate_err;
-	if (tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter,
-	    (void *)&prof_tctx_dump_iter_arg) != NULL) {
-		ret = true;
-		goto label_return;
-	}
-
-	ret = false;
-label_return:
-	return ret;
-}
-
-#ifndef _WIN32
-JEMALLOC_FORMAT_PRINTF(1, 2)
-static int
-prof_open_maps(const char *format, ...) {
-	int mfd;
-	va_list ap;
-	char filename[PATH_MAX + 1];
-
-	va_start(ap, format);
-	malloc_vsnprintf(filename, sizeof(filename), format, ap);
-	va_end(ap);
-
-#if defined(O_CLOEXEC)
-	mfd = open(filename, O_RDONLY | O_CLOEXEC);
-#else
-	mfd = open(filename, O_RDONLY);
-	if (mfd != -1) {
-		fcntl(mfd, F_SETFD, fcntl(mfd, F_GETFD) | FD_CLOEXEC);
-	}
-#endif
-
-	return mfd;
-}
-#endif
-
 int
 prof_getpid(void) {
 #ifdef _WIN32
@@ -1444,291 +506,6 @@ prof_getpid(void) {
 #endif
 }
 
-static bool
-prof_dump_maps(bool propagate_err) {
-	bool ret;
-	int mfd;
-
-	cassert(config_prof);
-#ifdef __FreeBSD__
-	mfd = prof_open_maps("/proc/curproc/map");
-#elif defined(_WIN32)
-	mfd = -1; // Not implemented
-#else
-	{
-		int pid = prof_getpid();
-
-		mfd = prof_open_maps("/proc/%d/task/%d/maps", pid, pid);
-		if (mfd == -1) {
-			mfd = prof_open_maps("/proc/%d/maps", pid);
-		}
-	}
-#endif
-	if (mfd != -1) {
-		ssize_t nread;
-
-		if (prof_dump_write(propagate_err, "\nMAPPED_LIBRARIES:\n") &&
-		    propagate_err) {
-			ret = true;
-			goto label_return;
-		}
-		nread = 0;
-		do {
-			prof_dump_buf_end += nread;
-			if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
-				/* Make space in prof_dump_buf before read(). */
-				if (prof_dump_flush(propagate_err) &&
-				    propagate_err) {
-					ret = true;
-					goto label_return;
-				}
-			}
-			nread = malloc_read_fd(mfd,
-			    &prof_dump_buf[prof_dump_buf_end], PROF_DUMP_BUFSIZE
-			    - prof_dump_buf_end);
-		} while (nread > 0);
-	} else {
-		ret = true;
-		goto label_return;
-	}
-
-	ret = false;
-label_return:
-	if (mfd != -1) {
-		close(mfd);
-	}
-	return ret;
-}
-
-/*
- * See prof_sample_threshold_update() comment for why the body of this function
- * is conditionally compiled.
- */
-static void
-prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx,
-    const char *filename) {
-#ifdef JEMALLOC_PROF
-	/*
-	 * Scaling is equivalent AdjustSamples() in jeprof, but the result may
-	 * differ slightly from what jeprof reports, because here we scale the
-	 * summary values, whereas jeprof scales each context individually and
-	 * reports the sums of the scaled values.
-	 */
-	if (cnt_all->curbytes != 0) {
-		double sample_period = (double)((uint64_t)1 << lg_prof_sample);
-		double ratio = (((double)cnt_all->curbytes) /
-		    (double)cnt_all->curobjs) / sample_period;
-		double scale_factor = 1.0 / (1.0 - exp(-ratio));
-		uint64_t curbytes = (uint64_t)round(((double)cnt_all->curbytes)
-		    * scale_factor);
-		uint64_t curobjs = (uint64_t)round(((double)cnt_all->curobjs) *
-		    scale_factor);
-
-		malloc_printf("<jemalloc>: Leak approximation summary: ~%"FMTu64
-		    " byte%s, ~%"FMTu64" object%s, >= %zu context%s\n",
-		    curbytes, (curbytes != 1) ? "s" : "", curobjs, (curobjs !=
-		    1) ? "s" : "", leak_ngctx, (leak_ngctx != 1) ? "s" : "");
-		malloc_printf(
-		    "<jemalloc>: Run jeprof on \"%s\" for leak detail\n",
-		    filename);
-	}
-#endif
-}
-
-struct prof_gctx_dump_iter_arg_s {
-	tsdn_t	*tsdn;
-	bool	propagate_err;
-};
-
-static prof_gctx_t *
-prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
-	prof_gctx_t *ret;
-	struct prof_gctx_dump_iter_arg_s *arg =
-	    (struct prof_gctx_dump_iter_arg_s *)opaque;
-
-	malloc_mutex_lock(arg->tsdn, gctx->lock);
-
-	if (prof_dump_gctx(arg->tsdn, arg->propagate_err, gctx, &gctx->bt,
-	    gctxs)) {
-		ret = gctx;
-		goto label_return;
-	}
-
-	ret = NULL;
-label_return:
-	malloc_mutex_unlock(arg->tsdn, gctx->lock);
-	return ret;
-}
-
-static void
-prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata,
-    struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
-    struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
-    prof_gctx_tree_t *gctxs) {
-	size_t tabind;
-	union {
-		prof_gctx_t	*p;
-		void		*v;
-	} gctx;
-
-	prof_enter(tsd, tdata);
-
-	/*
-	 * Put gctx's in limbo and clear their counters in preparation for
-	 * summing.
-	 */
-	gctx_tree_new(gctxs);
-	for (tabind = 0; !ckh_iter(&bt2gctx, &tabind, NULL, &gctx.v);) {
-		prof_dump_gctx_prep(tsd_tsdn(tsd), gctx.p, gctxs);
-	}
-
-	/*
-	 * Iterate over tdatas, and for the non-expired ones snapshot their tctx
-	 * stats and merge them into the associated gctx's.
-	 */
-	prof_tdata_merge_iter_arg->tsdn = tsd_tsdn(tsd);
-	memset(&prof_tdata_merge_iter_arg->cnt_all, 0, sizeof(prof_cnt_t));
-	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
-	tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter,
-	    (void *)prof_tdata_merge_iter_arg);
-	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
-
-	/* Merge tctx stats into gctx's. */
-	prof_gctx_merge_iter_arg->tsdn = tsd_tsdn(tsd);
-	prof_gctx_merge_iter_arg->leak_ngctx = 0;
-	gctx_tree_iter(gctxs, NULL, prof_gctx_merge_iter,
-	    (void *)prof_gctx_merge_iter_arg);
-
-	prof_leave(tsd, tdata);
-}
-
-static bool
-prof_dump_file(tsd_t *tsd, bool propagate_err, const char *filename,
-    bool leakcheck, prof_tdata_t *tdata,
-    struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
-    struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
-    struct prof_gctx_dump_iter_arg_s *prof_gctx_dump_iter_arg,
-    prof_gctx_tree_t *gctxs) {
-	/* Create dump file. */
-	if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1) {
-		return true;
-	}
-
-	/* Dump profile header. */
-	if (prof_dump_header(tsd_tsdn(tsd), propagate_err,
-	    &prof_tdata_merge_iter_arg->cnt_all)) {
-		goto label_write_error;
-	}
-
-	/* Dump per gctx profile stats. */
-	prof_gctx_dump_iter_arg->tsdn = tsd_tsdn(tsd);
-	prof_gctx_dump_iter_arg->propagate_err = propagate_err;
-	if (gctx_tree_iter(gctxs, NULL, prof_gctx_dump_iter,
-	    (void *)prof_gctx_dump_iter_arg) != NULL) {
-		goto label_write_error;
-	}
-
-	/* Dump /proc/<pid>/maps if possible. */
-	if (prof_dump_maps(propagate_err)) {
-		goto label_write_error;
-	}
-
-	if (prof_dump_close(propagate_err)) {
-		return true;
-	}
-
-	return false;
-label_write_error:
-	prof_dump_close(propagate_err);
-	return true;
-}
-
-static bool
-prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
-    bool leakcheck) {
-	cassert(config_prof);
-	assert(tsd_reentrancy_level_get(tsd) == 0);
-
-	prof_tdata_t * tdata = prof_tdata_get(tsd, true);
-	if (tdata == NULL) {
-		return true;
-	}
-
-	pre_reentrancy(tsd, NULL);
-	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
-
-	prof_gctx_tree_t gctxs;
-	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
-	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
-	struct prof_gctx_dump_iter_arg_s prof_gctx_dump_iter_arg;
-	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
-	    &prof_gctx_merge_iter_arg, &gctxs);
-	bool err = prof_dump_file(tsd, propagate_err, filename, leakcheck, tdata,
-	    &prof_tdata_merge_iter_arg, &prof_gctx_merge_iter_arg,
-	    &prof_gctx_dump_iter_arg, &gctxs);
-	prof_gctx_finish(tsd, &gctxs);
-
-	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
-	post_reentrancy(tsd);
-
-	if (err) {
-		return true;
-	}
-
-	if (leakcheck) {
-		prof_leakcheck(&prof_tdata_merge_iter_arg.cnt_all,
-		    prof_gctx_merge_iter_arg.leak_ngctx, filename);
-	}
-	return false;
-}
-
-#ifdef JEMALLOC_JET
-void
-prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
-    uint64_t *accumbytes) {
-	tsd_t *tsd;
-	prof_tdata_t *tdata;
-	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
-	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
-	prof_gctx_tree_t gctxs;
-
-	tsd = tsd_fetch();
-	tdata = prof_tdata_get(tsd, false);
-	if (tdata == NULL) {
-		if (curobjs != NULL) {
-			*curobjs = 0;
-		}
-		if (curbytes != NULL) {
-			*curbytes = 0;
-		}
-		if (accumobjs != NULL) {
-			*accumobjs = 0;
-		}
-		if (accumbytes != NULL) {
-			*accumbytes = 0;
-		}
-		return;
-	}
-
-	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
-	    &prof_gctx_merge_iter_arg, &gctxs);
-	prof_gctx_finish(tsd, &gctxs);
-
-	if (curobjs != NULL) {
-		*curobjs = prof_tdata_merge_iter_arg.cnt_all.curobjs;
-	}
-	if (curbytes != NULL) {
-		*curbytes = prof_tdata_merge_iter_arg.cnt_all.curbytes;
-	}
-	if (accumobjs != NULL) {
-		*accumobjs = prof_tdata_merge_iter_arg.cnt_all.accumobjs;
-	}
-	if (accumbytes != NULL) {
-		*accumbytes = prof_tdata_merge_iter_arg.cnt_all.accumbytes;
-	}
-}
-#endif
-
 #define DUMP_FILENAME_BUFSIZE	(PATH_MAX + 1)
 #define VSEQ_INVALID		UINT64_C(0xffffffffffffffff)
 static void
@@ -1877,28 +654,6 @@ prof_gdump(tsdn_t *tsdn) {
 	}
 }
 
-void
-prof_bt_hash(const void *key, size_t r_hash[2]) {
-	prof_bt_t *bt = (prof_bt_t *)key;
-
-	cassert(config_prof);
-
-	hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash);
-}
-
-bool
-prof_bt_keycomp(const void *k1, const void *k2) {
-	const prof_bt_t *bt1 = (prof_bt_t *)k1;
-	const prof_bt_t *bt2 = (prof_bt_t *)k2;
-
-	cassert(config_prof);
-
-	if (bt1->len != bt2->len) {
-		return false;
-	}
-	return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
-}
-
 static uint64_t
 prof_thr_uid_alloc(tsdn_t *tsdn) {
 	uint64_t thr_uid;
@@ -1911,124 +666,33 @@ prof_thr_uid_alloc(tsdn_t *tsdn) {
 	return thr_uid;
 }
 
-static prof_tdata_t *
-prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
-    char *thread_name, bool active) {
-	prof_tdata_t *tdata;
-
-	cassert(config_prof);
-
-	/* Initialize an empty cache for this thread. */
-	tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd), sizeof(prof_tdata_t),
-	    sz_size2index(sizeof(prof_tdata_t)), false, NULL, true,
-	    arena_get(TSDN_NULL, 0, true), true);
-	if (tdata == NULL) {
-		return NULL;
-	}
-
-	tdata->lock = prof_tdata_mutex_choose(thr_uid);
-	tdata->thr_uid = thr_uid;
-	tdata->thr_discrim = thr_discrim;
-	tdata->thread_name = thread_name;
-	tdata->attached = true;
-	tdata->expired = false;
-	tdata->tctx_uid_next = 0;
-
-	if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash,
-	    prof_bt_keycomp)) {
-		idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
-		return NULL;
-	}
-
-	tdata->prng_state = (uint64_t)(uintptr_t)tdata;
-	prof_sample_threshold_update(tdata);
-
-	tdata->enq = false;
-	tdata->enq_idump = false;
-	tdata->enq_gdump = false;
-
-	tdata->dumping = false;
-	tdata->active = active;
-
-	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
-	tdata_tree_insert(&tdatas, tdata);
-	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
-
-	return tdata;
-}
-
 prof_tdata_t *
 prof_tdata_init(tsd_t *tsd) {
 	return prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0,
 	    NULL, prof_thread_active_init_get(tsd_tsdn(tsd)));
 }
 
-static bool
-prof_tdata_should_destroy_unlocked(prof_tdata_t *tdata, bool even_if_attached) {
-	if (tdata->attached && !even_if_attached) {
-		return false;
+static char *
+prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name) {
+	char *ret;
+	size_t size;
+
+	if (thread_name == NULL) {
+		return NULL;
 	}
-	if (ckh_count(&tdata->bt2tctx) != 0) {
-		return false;
+
+	size = strlen(thread_name) + 1;
+	if (size == 1) {
+		return "";
 	}
-	return true;
-}
 
-static bool
-prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
-    bool even_if_attached) {
-	malloc_mutex_assert_owner(tsdn, tdata->lock);
-
-	return prof_tdata_should_destroy_unlocked(tdata, even_if_attached);
-}
-
-static void
-prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
-    bool even_if_attached) {
-	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tdatas_mtx);
-
-	tdata_tree_remove(&tdatas, tdata);
-
-	assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
-
-	if (tdata->thread_name != NULL) {
-		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
-		    true);
-	}
-	ckh_delete(tsd, &tdata->bt2tctx);
-	idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
-}
-
-static void
-prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached) {
-	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
-	prof_tdata_destroy_locked(tsd, tdata, even_if_attached);
-	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
-}
-
-static void
-prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata) {
-	bool destroy_tdata;
-
-	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
-	if (tdata->attached) {
-		destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata,
-		    true);
-		/*
-		 * Only detach if !destroy_tdata, because detaching would allow
-		 * another thread to win the race to destroy tdata.
-		 */
-		if (!destroy_tdata) {
-			tdata->attached = false;
-		}
-		tsd_prof_tdata_set(tsd, NULL);
-	} else {
-		destroy_tdata = false;
-	}
-	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
-	if (destroy_tdata) {
-		prof_tdata_destroy(tsd, tdata, true);
+	ret = iallocztm(tsdn, size, sz_size2index(size), false, NULL, true,
+	    arena_get(TSDN_NULL, 0, true), true);
+	if (ret == NULL) {
+		return NULL;
 	}
+	memcpy(ret, thread_name, size);
+	return ret;
 }
 
 prof_tdata_t *
@@ -2044,58 +708,6 @@ prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) {
 	    active);
 }
 
-static bool
-prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata) {
-	bool destroy_tdata;
-
-	malloc_mutex_lock(tsdn, tdata->lock);
-	if (!tdata->expired) {
-		tdata->expired = true;
-		destroy_tdata = tdata->attached ? false :
-		    prof_tdata_should_destroy(tsdn, tdata, false);
-	} else {
-		destroy_tdata = false;
-	}
-	malloc_mutex_unlock(tsdn, tdata->lock);
-
-	return destroy_tdata;
-}
-
-static prof_tdata_t *
-prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
-    void *arg) {
-	tsdn_t *tsdn = (tsdn_t *)arg;
-
-	return (prof_tdata_expire(tsdn, tdata) ? tdata : NULL);
-}
-
-void
-prof_reset(tsd_t *tsd, size_t lg_sample) {
-	prof_tdata_t *next;
-
-	assert(lg_sample < (sizeof(uint64_t) << 3));
-
-	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
-	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
-
-	lg_prof_sample = lg_sample;
-
-	next = NULL;
-	do {
-		prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next,
-		    prof_tdata_reset_iter, (void *)tsd);
-		if (to_destroy != NULL) {
-			next = tdata_tree_next(&tdatas, to_destroy);
-			prof_tdata_destroy_locked(tsd, to_destroy, false);
-		} else {
-			next = NULL;
-		}
-	} while (next != NULL);
-
-	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
-	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
-}
-
 void
 prof_tdata_cleanup(tsd_t *tsd) {
 	prof_tdata_t *tdata;
@@ -2142,29 +754,6 @@ prof_thread_name_get(tsd_t *tsd) {
 	return (tdata->thread_name != NULL ? tdata->thread_name : "");
 }
 
-static char *
-prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name) {
-	char *ret;
-	size_t size;
-
-	if (thread_name == NULL) {
-		return NULL;
-	}
-
-	size = strlen(thread_name) + 1;
-	if (size == 1) {
-		return "";
-	}
-
-	ret = iallocztm(tsdn, size, sz_size2index(size), false, NULL, true,
-	    arena_get(TSDN_NULL, 0, true), true);
-	if (ret == NULL) {
-		return NULL;
-	}
-	memcpy(ret, thread_name, size);
-	return ret;
-}
-
 int
 prof_thread_name_set(tsd_t *tsd, const char *thread_name) {
 	prof_tdata_t *tdata;
@@ -2329,16 +918,15 @@ prof_boot2(tsd_t *tsd) {
 			return true;
 		}
 
-		if (ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash,
-		    prof_bt_keycomp)) {
+		if (prof_data_init(tsd)) {
 			return true;
 		}
+
 		if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx",
 		    WITNESS_RANK_PROF_BT2GCTX, malloc_mutex_rank_exclusive)) {
 			return true;
 		}
 
-		tdata_tree_new(&tdatas);
 		if (malloc_mutex_init(&tdatas_mtx, "prof_tdatas",
 		    WITNESS_RANK_PROF_TDATAS, malloc_mutex_rank_exclusive)) {
 			return true;
diff --git a/src/prof_data.c b/src/prof_data.c
new file mode 100644
index 00000000..a4cb749f
--- /dev/null
+++ b/src/prof_data.c
@@ -0,0 +1,1440 @@
+#define JEMALLOC_PROF_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/ckh.h"
+#include "jemalloc/internal/hash.h"
+#include "jemalloc/internal/malloc_io.h"
+
+/*
+ * This file defines and manages the core profiling data structures.
+ *
+ * Conceptually, profiling data can be imagined as a table with three columns:
+ * thread, stack trace, and current allocation size.  (When prof_accum is on,
+ * there's one additional column which is the cumulative allocation size.)
+ *
+ * Implementation wise, each thread maintains a hash recording the stack trace
+ * to allocation size correspondences, which are basically the individual rows
+ * in the table.  In addition, two global "indices" are built to make data
+ * aggregation efficient (for dumping): bt2gctx and tdatas, which are basically
+ * the "grouped by stack trace" and "grouped by thread" views of the same table,
+ * respectively.  Note that the allocation size is only aggregated to the two
+ * indices at dumping time, so as to optimize for performance.
+ */
+
+/******************************************************************************/
+
+/*
+ * Global hash of (prof_bt_t *)-->(prof_gctx_t *).  This is the master data
+ * structure that knows about all backtraces currently captured.
+ */
+static ckh_t		bt2gctx;
+
+/*
+ * Tree of all extant prof_tdata_t structures, regardless of state,
+ * {attached,detached,expired}.
+ */
+static prof_tdata_tree_t	tdatas;
+
+/*
+ * This buffer is rather large for stack allocation, so use a single buffer for
+ * all profile dumps.
+ */
+static char		prof_dump_buf[
+    /* Minimize memory bloat for non-prof builds. */
+#ifdef JEMALLOC_PROF
+    PROF_DUMP_BUFSIZE
+#else
+    1
+#endif
+];
+static size_t		prof_dump_buf_end;
+static int		prof_dump_fd;
+
+/******************************************************************************/
+/* Red-black trees. */
+
+static int
+prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b) {
+	uint64_t a_thr_uid = a->thr_uid;
+	uint64_t b_thr_uid = b->thr_uid;
+	int ret = (a_thr_uid > b_thr_uid) - (a_thr_uid < b_thr_uid);
+	if (ret == 0) {
+		uint64_t a_thr_discrim = a->thr_discrim;
+		uint64_t b_thr_discrim = b->thr_discrim;
+		ret = (a_thr_discrim > b_thr_discrim) - (a_thr_discrim <
+		    b_thr_discrim);
+		if (ret == 0) {
+			uint64_t a_tctx_uid = a->tctx_uid;
+			uint64_t b_tctx_uid = b->tctx_uid;
+			ret = (a_tctx_uid > b_tctx_uid) - (a_tctx_uid <
+			    b_tctx_uid);
+		}
+	}
+	return ret;
+}
+
+rb_gen(static UNUSED, tctx_tree_, prof_tctx_tree_t, prof_tctx_t,
+    tctx_link, prof_tctx_comp)
+
+static int
+prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b) {
+	unsigned a_len = a->bt.len;
+	unsigned b_len = b->bt.len;
+	unsigned comp_len = (a_len < b_len) ? a_len : b_len;
+	int ret = memcmp(a->bt.vec, b->bt.vec, comp_len * sizeof(void *));
+	if (ret == 0) {
+		ret = (a_len > b_len) - (a_len < b_len);
+	}
+	return ret;
+}
+
+rb_gen(static UNUSED, gctx_tree_, prof_gctx_tree_t, prof_gctx_t, dump_link,
+    prof_gctx_comp)
+
+static int
+prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b) {
+	int ret;
+	uint64_t a_uid = a->thr_uid;
+	uint64_t b_uid = b->thr_uid;
+
+	ret = ((a_uid > b_uid) - (a_uid < b_uid));
+	if (ret == 0) {
+		uint64_t a_discrim = a->thr_discrim;
+		uint64_t b_discrim = b->thr_discrim;
+
+		ret = ((a_discrim > b_discrim) - (a_discrim < b_discrim));
+	}
+	return ret;
+}
+
+rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link,
+    prof_tdata_comp)
+
+/******************************************************************************/
+
+bool
+prof_data_init(tsd_t *tsd) {
+	tdata_tree_new(&tdatas);
+	return ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS,
+	    prof_bt_hash, prof_bt_keycomp);
+}
+
+static void
+prof_enter(tsd_t *tsd, prof_tdata_t *tdata) {
+	cassert(config_prof);
+	assert(tdata == prof_tdata_get(tsd, false));
+
+	if (tdata != NULL) {
+		assert(!tdata->enq);
+		tdata->enq = true;
+	}
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
+}
+
+static void
+prof_leave(tsd_t *tsd, prof_tdata_t *tdata) {
+	cassert(config_prof);
+	assert(tdata == prof_tdata_get(tsd, false));
+
+	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
+
+	if (tdata != NULL) {
+		bool idump, gdump;
+
+		assert(tdata->enq);
+		tdata->enq = false;
+		idump = tdata->enq_idump;
+		tdata->enq_idump = false;
+		gdump = tdata->enq_gdump;
+		tdata->enq_gdump = false;
+
+		if (idump) {
+			prof_idump(tsd_tsdn(tsd));
+		}
+		if (gdump) {
+			prof_gdump(tsd_tsdn(tsd));
+		}
+	}
+}
+
+static prof_gctx_t *
+prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt) {
+	/*
+	 * Create a single allocation that has space for vec of length bt->len.
+	 */
+	size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *));
+	prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsdn, size,
+	    sz_size2index(size), false, NULL, true, arena_get(TSDN_NULL, 0, true),
+	    true);
+	if (gctx == NULL) {
+		return NULL;
+	}
+	gctx->lock = prof_gctx_mutex_choose();
+	/*
+	 * Set nlimbo to 1, in order to avoid a race condition with
+	 * prof_tctx_destroy()/prof_gctx_try_destroy().
+	 */
+	gctx->nlimbo = 1;
+	tctx_tree_new(&gctx->tctxs);
+	/* Duplicate bt. */
+	memcpy(gctx->vec, bt->vec, bt->len * sizeof(void *));
+	gctx->bt.vec = gctx->vec;
+	gctx->bt.len = bt->len;
+	return gctx;
+}
+
+static void
+prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
+    prof_tdata_t *tdata) {
+	cassert(config_prof);
+
+	/*
+	 * Check that gctx is still unused by any thread cache before destroying
+	 * it.  prof_lookup() increments gctx->nlimbo in order to avoid a race
+	 * condition with this function, as does prof_tctx_destroy() in order to
+	 * avoid a race between the main body of prof_tctx_destroy() and entry
+	 * into this function.
+	 */
+	prof_enter(tsd, tdata_self);
+	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
+	assert(gctx->nlimbo != 0);
+	if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) {
+		/* Remove gctx from bt2gctx. */
+		if (ckh_remove(tsd, &bt2gctx, &gctx->bt, NULL, NULL)) {
+			not_reached();
+		}
+		prof_leave(tsd, tdata_self);
+		/* Destroy gctx. */
+		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
+		idalloctm(tsd_tsdn(tsd), gctx, NULL, NULL, true, true);
+	} else {
+		/*
+		 * Compensate for increment in prof_tctx_destroy() or
+		 * prof_lookup().
+		 */
+		gctx->nlimbo--;
+		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
+		prof_leave(tsd, tdata_self);
+	}
+}
+
+static bool
+prof_gctx_should_destroy(prof_gctx_t *gctx) {
+	if (opt_prof_accum) {
+		return false;
+	}
+	if (!tctx_tree_empty(&gctx->tctxs)) {
+		return false;
+	}
+	if (gctx->nlimbo != 0) {
+		return false;
+	}
+	return true;
+}
+
+static bool
+prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
+    void **p_btkey, prof_gctx_t **p_gctx, bool *p_new_gctx) {
+	union {
+		prof_gctx_t	*p;
+		void		*v;
+	} gctx, tgctx;
+	union {
+		prof_bt_t	*p;
+		void		*v;
+	} btkey;
+	bool new_gctx;
+
+	prof_enter(tsd, tdata);
+	if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
+		/* bt has never been seen before.  Insert it. */
+		prof_leave(tsd, tdata);
+		tgctx.p = prof_gctx_create(tsd_tsdn(tsd), bt);
+		if (tgctx.v == NULL) {
+			return true;
+		}
+		prof_enter(tsd, tdata);
+		if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
+			gctx.p = tgctx.p;
+			btkey.p = &gctx.p->bt;
+			if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) {
+				/* OOM. */
+				prof_leave(tsd, tdata);
+				idalloctm(tsd_tsdn(tsd), gctx.v, NULL, NULL,
+				    true, true);
+				return true;
+			}
+			new_gctx = true;
+		} else {
+			new_gctx = false;
+		}
+	} else {
+		tgctx.v = NULL;
+		new_gctx = false;
+	}
+
+	if (!new_gctx) {
+		/*
+		 * Increment nlimbo, in order to avoid a race condition with
+		 * prof_tctx_destroy()/prof_gctx_try_destroy().
+		 */
+		malloc_mutex_lock(tsd_tsdn(tsd), gctx.p->lock);
+		gctx.p->nlimbo++;
+		malloc_mutex_unlock(tsd_tsdn(tsd), gctx.p->lock);
+		new_gctx = false;
+
+		if (tgctx.v != NULL) {
+			/* Lost race to insert. */
+			idalloctm(tsd_tsdn(tsd), tgctx.v, NULL, NULL, true,
+			    true);
+		}
+	}
+	prof_leave(tsd, tdata);
+
+	*p_btkey = btkey.v;
+	*p_gctx = gctx.p;
+	*p_new_gctx = new_gctx;
+	return false;
+}
+
+prof_tctx_t *
+prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
+	union {
+		prof_tctx_t	*p;
+		void		*v;
+	} ret;
+	prof_tdata_t *tdata;
+	bool not_found;
+
+	cassert(config_prof);
+
+	tdata = prof_tdata_get(tsd, false);
+	if (tdata == NULL) {
+		return NULL;
+	}
+
+	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
+	not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v);
+	if (!not_found) { /* Note double negative! */
+		ret.p->prepared = true;
+	}
+	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
+	if (not_found) {
+		void *btkey;
+		prof_gctx_t *gctx;
+		bool new_gctx, error;
+
+		/*
+		 * This thread's cache lacks bt.  Look for it in the global
+		 * cache.
+		 */
+		if (prof_lookup_global(tsd, bt, tdata, &btkey, &gctx,
+		    &new_gctx)) {
+			return NULL;
+		}
+
+		/* Link a prof_tctx_t into gctx for this thread. */
+		ret.v = iallocztm(tsd_tsdn(tsd), sizeof(prof_tctx_t),
+		    sz_size2index(sizeof(prof_tctx_t)), false, NULL, true,
+		    arena_ichoose(tsd, NULL), true);
+		if (ret.p == NULL) {
+			if (new_gctx) {
+				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
+			}
+			return NULL;
+		}
+		ret.p->tdata = tdata;
+		ret.p->thr_uid = tdata->thr_uid;
+		ret.p->thr_discrim = tdata->thr_discrim;
+		memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
+		ret.p->gctx = gctx;
+		ret.p->tctx_uid = tdata->tctx_uid_next++;
+		ret.p->prepared = true;
+		ret.p->state = prof_tctx_state_initializing;
+		malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
+		error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v);
+		malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
+		if (error) {
+			if (new_gctx) {
+				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
+			}
+			idalloctm(tsd_tsdn(tsd), ret.v, NULL, NULL, true, true);
+			return NULL;
+		}
+		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
+		ret.p->state = prof_tctx_state_nominal;
+		tctx_tree_insert(&gctx->tctxs, ret.p);
+		gctx->nlimbo--;
+		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
+	}
+
+	return ret.p;
+}
+
+#ifdef JEMALLOC_JET
+static prof_tdata_t *
+prof_tdata_count_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
+    void *arg) {
+	size_t *tdata_count = (size_t *)arg;
+
+	(*tdata_count)++;
+
+	return NULL;
+}
+
+size_t
+prof_tdata_count(void) {
+	size_t tdata_count = 0;
+	tsdn_t *tsdn;
+
+	tsdn = tsdn_fetch();
+	malloc_mutex_lock(tsdn, &tdatas_mtx);
+	tdata_tree_iter(&tdatas, NULL, prof_tdata_count_iter,
+	    (void *)&tdata_count);
+	malloc_mutex_unlock(tsdn, &tdatas_mtx);
+
+	return tdata_count;
+}
+
+size_t
+prof_bt_count(void) {
+	size_t bt_count;
+	tsd_t *tsd;
+	prof_tdata_t *tdata;
+
+	tsd = tsd_fetch();
+	tdata = prof_tdata_get(tsd, false);
+	if (tdata == NULL) {
+		return 0;
+	}
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
+	bt_count = ckh_count(&bt2gctx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
+
+	return bt_count;
+}
+#endif
+
+static int
+prof_dump_open_impl(bool propagate_err, const char *filename) {
+	int fd;
+
+	fd = creat(filename, 0644);
+	if (fd == -1 && !propagate_err) {
+		malloc_printf("<jemalloc>: creat(\"%s\"), 0644) failed\n",
+		    filename);
+		if (opt_abort) {
+			abort();
+		}
+	}
+
+	return fd;
+}
+prof_dump_open_t *JET_MUTABLE prof_dump_open = prof_dump_open_impl;
+
+static bool
+prof_dump_flush(bool propagate_err) {
+	bool ret = false;
+	ssize_t err;
+
+	cassert(config_prof);
+
+	err = malloc_write_fd(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
+	if (err == -1) {
+		if (!propagate_err) {
+			malloc_write("<jemalloc>: write() failed during heap "
+			    "profile flush\n");
+			if (opt_abort) {
+				abort();
+			}
+		}
+		ret = true;
+	}
+	prof_dump_buf_end = 0;
+
+	return ret;
+}
+
+static bool
+prof_dump_close(bool propagate_err) {
+	bool ret;
+
+	assert(prof_dump_fd != -1);
+	ret = prof_dump_flush(propagate_err);
+	close(prof_dump_fd);
+	prof_dump_fd = -1;
+
+	return ret;
+}
+
+static bool
+prof_dump_write(bool propagate_err, const char *s) {
+	size_t i, slen, n;
+
+	cassert(config_prof);
+
+	i = 0;
+	slen = strlen(s);
+	while (i < slen) {
+		/* Flush the buffer if it is full. */
+		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
+			if (prof_dump_flush(propagate_err) && propagate_err) {
+				return true;
+			}
+		}
+
+		if (prof_dump_buf_end + slen - i <= PROF_DUMP_BUFSIZE) {
+			/* Finish writing. */
+			n = slen - i;
+		} else {
+			/* Write as much of s as will fit. */
+			n = PROF_DUMP_BUFSIZE - prof_dump_buf_end;
+		}
+		memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n);
+		prof_dump_buf_end += n;
+		i += n;
+	}
+
+	return false;
+}
+
+JEMALLOC_FORMAT_PRINTF(2, 3)
+static bool
+prof_dump_printf(bool propagate_err, const char *format, ...) {
+	bool ret;
+	va_list ap;
+	char buf[PROF_PRINTF_BUFSIZE];
+
+	va_start(ap, format);
+	malloc_vsnprintf(buf, sizeof(buf), format, ap);
+	va_end(ap);
+	ret = prof_dump_write(propagate_err, buf);
+
+	return ret;
+}
+
+static void
+prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata) {
+	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
+
+	malloc_mutex_lock(tsdn, tctx->gctx->lock);
+
+	switch (tctx->state) {
+	case prof_tctx_state_initializing:
+		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
+		return;
+	case prof_tctx_state_nominal:
+		tctx->state = prof_tctx_state_dumping;
+		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
+
+		memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t));
+
+		tdata->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
+		tdata->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
+		if (opt_prof_accum) {
+			tdata->cnt_summed.accumobjs +=
+			    tctx->dump_cnts.accumobjs;
+			tdata->cnt_summed.accumbytes +=
+			    tctx->dump_cnts.accumbytes;
+		}
+		break;
+	case prof_tctx_state_dumping:
+	case prof_tctx_state_purgatory:
+		not_reached();
+	}
+}
+
+static void
+prof_tctx_merge_gctx(tsdn_t *tsdn, prof_tctx_t *tctx, prof_gctx_t *gctx) {
+	malloc_mutex_assert_owner(tsdn, gctx->lock);
+
+	gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
+	gctx->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
+	if (opt_prof_accum) {
+		gctx->cnt_summed.accumobjs += tctx->dump_cnts.accumobjs;
+		gctx->cnt_summed.accumbytes += tctx->dump_cnts.accumbytes;
+	}
+}
+
+static prof_tctx_t *
+prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
+	tsdn_t *tsdn = (tsdn_t *)arg;
+
+	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
+
+	switch (tctx->state) {
+	case prof_tctx_state_nominal:
+		/* New since dumping started; ignore. */
+		break;
+	case prof_tctx_state_dumping:
+	case prof_tctx_state_purgatory:
+		prof_tctx_merge_gctx(tsdn, tctx, tctx->gctx);
+		break;
+	default:
+		not_reached();
+	}
+
+	return NULL;
+}
+
+struct prof_tctx_dump_iter_arg_s {
+	tsdn_t	*tsdn;
+	bool	propagate_err;
+};
+
+static prof_tctx_t *
+prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) {
+	struct prof_tctx_dump_iter_arg_s *arg =
+	    (struct prof_tctx_dump_iter_arg_s *)opaque;
+
+	malloc_mutex_assert_owner(arg->tsdn, tctx->gctx->lock);
+
+	switch (tctx->state) {
+	case prof_tctx_state_initializing:
+	case prof_tctx_state_nominal:
+		/* Not captured by this dump. */
+		break;
+	case prof_tctx_state_dumping:
+	case prof_tctx_state_purgatory:
+		if (prof_dump_printf(arg->propagate_err,
+		    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": "
+		    "%"FMTu64"]\n", tctx->thr_uid, tctx->dump_cnts.curobjs,
+		    tctx->dump_cnts.curbytes, tctx->dump_cnts.accumobjs,
+		    tctx->dump_cnts.accumbytes)) {
+			return tctx;
+		}
+		break;
+	default:
+		not_reached();
+	}
+	return NULL;
+}
+
+static prof_tctx_t *
+prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
+	tsdn_t *tsdn = (tsdn_t *)arg;
+	prof_tctx_t *ret;
+
+	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
+
+	switch (tctx->state) {
+	case prof_tctx_state_nominal:
+		/* New since dumping started; ignore. */
+		break;
+	case prof_tctx_state_dumping:
+		tctx->state = prof_tctx_state_nominal;
+		break;
+	case prof_tctx_state_purgatory:
+		ret = tctx;
+		goto label_return;
+	default:
+		not_reached();
+	}
+
+	ret = NULL;
+label_return:
+	return ret;
+}
+
+static void
+prof_dump_gctx_prep(tsdn_t *tsdn, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs) {
+	cassert(config_prof);
+
+	malloc_mutex_lock(tsdn, gctx->lock);
+
+	/*
+	 * Increment nlimbo so that gctx won't go away before dump.
+	 * Additionally, link gctx into the dump list so that it is included in
+	 * prof_dump()'s second pass.
+	 */
+	gctx->nlimbo++;
+	gctx_tree_insert(gctxs, gctx);
+
+	memset(&gctx->cnt_summed, 0, sizeof(prof_cnt_t));
+
+	malloc_mutex_unlock(tsdn, gctx->lock);
+}
+
+struct prof_gctx_merge_iter_arg_s {
+	tsdn_t	*tsdn;
+	size_t	leak_ngctx;
+};
+
+static prof_gctx_t *
+prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
+	struct prof_gctx_merge_iter_arg_s *arg =
+	    (struct prof_gctx_merge_iter_arg_s *)opaque;
+
+	malloc_mutex_lock(arg->tsdn, gctx->lock);
+	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter,
+	    (void *)arg->tsdn);
+	if (gctx->cnt_summed.curobjs != 0) {
+		arg->leak_ngctx++;
+	}
+	malloc_mutex_unlock(arg->tsdn, gctx->lock);
+
+	return NULL;
+}
+
+static void
+prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) {
+	prof_tdata_t *tdata = prof_tdata_get(tsd, false);
+	prof_gctx_t *gctx;
+
+	/*
+	 * Standard tree iteration won't work here, because as soon as we
+	 * decrement gctx->nlimbo and unlock gctx, another thread can
+	 * concurrently destroy it, which will corrupt the tree.  Therefore,
+	 * tear down the tree one node at a time during iteration.
+	 */
+	while ((gctx = gctx_tree_first(gctxs)) != NULL) {
+		gctx_tree_remove(gctxs, gctx);
+		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
+		{
+			prof_tctx_t *next;
+
+			next = NULL;
+			do {
+				prof_tctx_t *to_destroy =
+				    tctx_tree_iter(&gctx->tctxs, next,
+				    prof_tctx_finish_iter,
+				    (void *)tsd_tsdn(tsd));
+				if (to_destroy != NULL) {
+					next = tctx_tree_next(&gctx->tctxs,
+					    to_destroy);
+					tctx_tree_remove(&gctx->tctxs,
+					    to_destroy);
+					idalloctm(tsd_tsdn(tsd), to_destroy,
+					    NULL, NULL, true, true);
+				} else {
+					next = NULL;
+				}
+			} while (next != NULL);
+		}
+		gctx->nlimbo--;
+		if (prof_gctx_should_destroy(gctx)) {
+			gctx->nlimbo++;
+			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
+			prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
+		} else {
+			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
+		}
+	}
+}
+
+struct prof_tdata_merge_iter_arg_s {
+	tsdn_t		*tsdn;
+	prof_cnt_t	cnt_all;
+};
+
+static prof_tdata_t *
+prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
+    void *opaque) {
+	struct prof_tdata_merge_iter_arg_s *arg =
+	    (struct prof_tdata_merge_iter_arg_s *)opaque;
+
+	malloc_mutex_lock(arg->tsdn, tdata->lock);
+	if (!tdata->expired) {
+		size_t tabind;
+		union {
+			prof_tctx_t	*p;
+			void		*v;
+		} tctx;
+
+		tdata->dumping = true;
+		memset(&tdata->cnt_summed, 0, sizeof(prof_cnt_t));
+		for (tabind = 0; !ckh_iter(&tdata->bt2tctx, &tabind, NULL,
+		    &tctx.v);) {
+			prof_tctx_merge_tdata(arg->tsdn, tctx.p, tdata);
+		}
+
+		arg->cnt_all.curobjs += tdata->cnt_summed.curobjs;
+		arg->cnt_all.curbytes += tdata->cnt_summed.curbytes;
+		if (opt_prof_accum) {
+			arg->cnt_all.accumobjs += tdata->cnt_summed.accumobjs;
+			arg->cnt_all.accumbytes += tdata->cnt_summed.accumbytes;
+		}
+	} else {
+		tdata->dumping = false;
+	}
+	malloc_mutex_unlock(arg->tsdn, tdata->lock);
+
+	return NULL;
+}
+
+static prof_tdata_t *
+prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
+    void *arg) {
+	bool propagate_err = *(bool *)arg;
+
+	if (!tdata->dumping) {
+		return NULL;
+	}
+
+	if (prof_dump_printf(propagate_err,
+	    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]%s%s\n",
+	    tdata->thr_uid, tdata->cnt_summed.curobjs,
+	    tdata->cnt_summed.curbytes, tdata->cnt_summed.accumobjs,
+	    tdata->cnt_summed.accumbytes,
+	    (tdata->thread_name != NULL) ? " " : "",
+	    (tdata->thread_name != NULL) ? tdata->thread_name : "")) {
+		return tdata;
+	}
+	return NULL;
+}
+
+static bool
+prof_dump_header_impl(tsdn_t *tsdn, bool propagate_err,
+    const prof_cnt_t *cnt_all) {
+	bool ret;
+
+	if (prof_dump_printf(propagate_err,
+	    "heap_v2/%"FMTu64"\n"
+	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
+	    ((uint64_t)1U << lg_prof_sample), cnt_all->curobjs,
+	    cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes)) {
+		return true;
+	}
+
+	malloc_mutex_lock(tsdn, &tdatas_mtx);
+	ret = (tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter,
+	    (void *)&propagate_err) != NULL);
+	malloc_mutex_unlock(tsdn, &tdatas_mtx);
+	return ret;
+}
+prof_dump_header_t *JET_MUTABLE prof_dump_header = prof_dump_header_impl;
+
+static bool
+prof_dump_gctx(tsdn_t *tsdn, bool propagate_err, prof_gctx_t *gctx,
+    const prof_bt_t *bt, prof_gctx_tree_t *gctxs) {
+	bool ret;
+	unsigned i;
+	struct prof_tctx_dump_iter_arg_s prof_tctx_dump_iter_arg;
+
+	cassert(config_prof);
+	malloc_mutex_assert_owner(tsdn, gctx->lock);
+
+	/* Avoid dumping such gctx's that have no useful data. */
+	if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0) ||
+	    (opt_prof_accum && gctx->cnt_summed.accumobjs == 0)) {
+		assert(gctx->cnt_summed.curobjs == 0);
+		assert(gctx->cnt_summed.curbytes == 0);
+		assert(gctx->cnt_summed.accumobjs == 0);
+		assert(gctx->cnt_summed.accumbytes == 0);
+		ret = false;
+		goto label_return;
+	}
+
+	if (prof_dump_printf(propagate_err, "@")) {
+		ret = true;
+		goto label_return;
+	}
+	for (i = 0; i < bt->len; i++) {
+		if (prof_dump_printf(propagate_err, " %#"FMTxPTR,
+		    (uintptr_t)bt->vec[i])) {
+			ret = true;
+			goto label_return;
+		}
+	}
+
+	if (prof_dump_printf(propagate_err,
+	    "\n"
+	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
+	    gctx->cnt_summed.curobjs, gctx->cnt_summed.curbytes,
+	    gctx->cnt_summed.accumobjs, gctx->cnt_summed.accumbytes)) {
+		ret = true;
+		goto label_return;
+	}
+
+	prof_tctx_dump_iter_arg.tsdn = tsdn;
+	prof_tctx_dump_iter_arg.propagate_err = propagate_err;
+	if (tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter,
+	    (void *)&prof_tctx_dump_iter_arg) != NULL) {
+		ret = true;
+		goto label_return;
+	}
+
+	ret = false;
+label_return:
+	return ret;
+}
+
+#ifndef _WIN32
+JEMALLOC_FORMAT_PRINTF(1, 2)
+static int
+prof_open_maps(const char *format, ...) {
+	int mfd;
+	va_list ap;
+	char filename[PATH_MAX + 1];
+
+	va_start(ap, format);
+	malloc_vsnprintf(filename, sizeof(filename), format, ap);
+	va_end(ap);
+
+#if defined(O_CLOEXEC)
+	mfd = open(filename, O_RDONLY | O_CLOEXEC);
+#else
+	mfd = open(filename, O_RDONLY);
+	if (mfd != -1) {
+		fcntl(mfd, F_SETFD, fcntl(mfd, F_GETFD) | FD_CLOEXEC);
+	}
+#endif
+
+	return mfd;
+}
+#endif
+
+static bool
+prof_dump_maps(bool propagate_err) {
+	bool ret;
+	int mfd;
+
+	cassert(config_prof);
+#ifdef __FreeBSD__
+	mfd = prof_open_maps("/proc/curproc/map");
+#elif defined(_WIN32)
+	mfd = -1; // Not implemented
+#else
+	{
+		int pid = prof_getpid();
+
+		mfd = prof_open_maps("/proc/%d/task/%d/maps", pid, pid);
+		if (mfd == -1) {
+			mfd = prof_open_maps("/proc/%d/maps", pid);
+		}
+	}
+#endif
+	if (mfd != -1) {
+		ssize_t nread;
+
+		if (prof_dump_write(propagate_err, "\nMAPPED_LIBRARIES:\n") &&
+		    propagate_err) {
+			ret = true;
+			goto label_return;
+		}
+		nread = 0;
+		do {
+			prof_dump_buf_end += nread;
+			if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
+				/* Make space in prof_dump_buf before read(). */
+				if (prof_dump_flush(propagate_err) &&
+				    propagate_err) {
+					ret = true;
+					goto label_return;
+				}
+			}
+			nread = malloc_read_fd(mfd,
+			    &prof_dump_buf[prof_dump_buf_end], PROF_DUMP_BUFSIZE
+			    - prof_dump_buf_end);
+		} while (nread > 0);
+	} else {
+		ret = true;
+		goto label_return;
+	}
+
+	ret = false;
+label_return:
+	if (mfd != -1) {
+		close(mfd);
+	}
+	return ret;
+}
+
+/*
+ * See prof_sample_threshold_update() comment for why the body of this function
+ * is conditionally compiled.
+ */
+static void
+prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx,
+    const char *filename) {
+#ifdef JEMALLOC_PROF
+	/*
+	 * Scaling is equivalent AdjustSamples() in jeprof, but the result may
+	 * differ slightly from what jeprof reports, because here we scale the
+	 * summary values, whereas jeprof scales each context individually and
+	 * reports the sums of the scaled values.
+	 */
+	if (cnt_all->curbytes != 0) {
+		double sample_period = (double)((uint64_t)1 << lg_prof_sample);
+		double ratio = (((double)cnt_all->curbytes) /
+		    (double)cnt_all->curobjs) / sample_period;
+		double scale_factor = 1.0 / (1.0 - exp(-ratio));
+		uint64_t curbytes = (uint64_t)round(((double)cnt_all->curbytes)
+		    * scale_factor);
+		uint64_t curobjs = (uint64_t)round(((double)cnt_all->curobjs) *
+		    scale_factor);
+
+		malloc_printf("<jemalloc>: Leak approximation summary: ~%"FMTu64
+		    " byte%s, ~%"FMTu64" object%s, >= %zu context%s\n",
+		    curbytes, (curbytes != 1) ? "s" : "", curobjs, (curobjs !=
+		    1) ? "s" : "", leak_ngctx, (leak_ngctx != 1) ? "s" : "");
+		malloc_printf(
+		    "<jemalloc>: Run jeprof on \"%s\" for leak detail\n",
+		    filename);
+	}
+#endif
+}
+
+struct prof_gctx_dump_iter_arg_s {
+	tsdn_t	*tsdn;
+	bool	propagate_err;
+};
+
+static prof_gctx_t *
+prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
+	prof_gctx_t *ret;
+	struct prof_gctx_dump_iter_arg_s *arg =
+	    (struct prof_gctx_dump_iter_arg_s *)opaque;
+
+	malloc_mutex_lock(arg->tsdn, gctx->lock);
+
+	if (prof_dump_gctx(arg->tsdn, arg->propagate_err, gctx, &gctx->bt,
+	    gctxs)) {
+		ret = gctx;
+		goto label_return;
+	}
+
+	ret = NULL;
+label_return:
+	malloc_mutex_unlock(arg->tsdn, gctx->lock);
+	return ret;
+}
+
+static void
+prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata,
+    struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
+    struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
+    prof_gctx_tree_t *gctxs) {
+	size_t tabind;
+	union {
+		prof_gctx_t	*p;
+		void		*v;
+	} gctx;
+
+	prof_enter(tsd, tdata);
+
+	/*
+	 * Put gctx's in limbo and clear their counters in preparation for
+	 * summing.
+	 */
+	gctx_tree_new(gctxs);
+	for (tabind = 0; !ckh_iter(&bt2gctx, &tabind, NULL, &gctx.v);) {
+		prof_dump_gctx_prep(tsd_tsdn(tsd), gctx.p, gctxs);
+	}
+
+	/*
+	 * Iterate over tdatas, and for the non-expired ones snapshot their tctx
+	 * stats and merge them into the associated gctx's.
+	 */
+	prof_tdata_merge_iter_arg->tsdn = tsd_tsdn(tsd);
+	memset(&prof_tdata_merge_iter_arg->cnt_all, 0, sizeof(prof_cnt_t));
+	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
+	tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter,
+	    (void *)prof_tdata_merge_iter_arg);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
+
+	/* Merge tctx stats into gctx's. */
+	prof_gctx_merge_iter_arg->tsdn = tsd_tsdn(tsd);
+	prof_gctx_merge_iter_arg->leak_ngctx = 0;
+	gctx_tree_iter(gctxs, NULL, prof_gctx_merge_iter,
+	    (void *)prof_gctx_merge_iter_arg);
+
+	prof_leave(tsd, tdata);
+}
+
+static bool
+prof_dump_file(tsd_t *tsd, bool propagate_err, const char *filename,
+    bool leakcheck, prof_tdata_t *tdata,
+    struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
+    struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
+    struct prof_gctx_dump_iter_arg_s *prof_gctx_dump_iter_arg,
+    prof_gctx_tree_t *gctxs) {
+	/* Create dump file. */
+	if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1) {
+		return true;
+	}
+
+	/* Dump profile header. */
+	if (prof_dump_header(tsd_tsdn(tsd), propagate_err,
+	    &prof_tdata_merge_iter_arg->cnt_all)) {
+		goto label_write_error;
+	}
+
+	/* Dump per gctx profile stats. */
+	prof_gctx_dump_iter_arg->tsdn = tsd_tsdn(tsd);
+	prof_gctx_dump_iter_arg->propagate_err = propagate_err;
+	if (gctx_tree_iter(gctxs, NULL, prof_gctx_dump_iter,
+	    (void *)prof_gctx_dump_iter_arg) != NULL) {
+		goto label_write_error;
+	}
+
+	/* Dump /proc/<pid>/maps if possible. */
+	if (prof_dump_maps(propagate_err)) {
+		goto label_write_error;
+	}
+
+	if (prof_dump_close(propagate_err)) {
+		return true;
+	}
+
+	return false;
+label_write_error:
+	prof_dump_close(propagate_err);
+	return true;
+}
+
+bool
+prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
+    bool leakcheck) {
+	cassert(config_prof);
+	assert(tsd_reentrancy_level_get(tsd) == 0);
+
+	prof_tdata_t * tdata = prof_tdata_get(tsd, true);
+	if (tdata == NULL) {
+		return true;
+	}
+
+	pre_reentrancy(tsd, NULL);
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
+
+	prof_gctx_tree_t gctxs;
+	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
+	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
+	struct prof_gctx_dump_iter_arg_s prof_gctx_dump_iter_arg;
+	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
+	    &prof_gctx_merge_iter_arg, &gctxs);
+	bool err = prof_dump_file(tsd, propagate_err, filename, leakcheck, tdata,
+	    &prof_tdata_merge_iter_arg, &prof_gctx_merge_iter_arg,
+	    &prof_gctx_dump_iter_arg, &gctxs);
+	prof_gctx_finish(tsd, &gctxs);
+
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
+	post_reentrancy(tsd);
+
+	if (err) {
+		return true;
+	}
+
+	if (leakcheck) {
+		prof_leakcheck(&prof_tdata_merge_iter_arg.cnt_all,
+		    prof_gctx_merge_iter_arg.leak_ngctx, filename);
+	}
+	return false;
+}
+
+#ifdef JEMALLOC_JET
+void
+prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
+    uint64_t *accumbytes) {
+	tsd_t *tsd;
+	prof_tdata_t *tdata;
+	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
+	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
+	prof_gctx_tree_t gctxs;
+
+	tsd = tsd_fetch();
+	tdata = prof_tdata_get(tsd, false);
+	if (tdata == NULL) {
+		if (curobjs != NULL) {
+			*curobjs = 0;
+		}
+		if (curbytes != NULL) {
+			*curbytes = 0;
+		}
+		if (accumobjs != NULL) {
+			*accumobjs = 0;
+		}
+		if (accumbytes != NULL) {
+			*accumbytes = 0;
+		}
+		return;
+	}
+
+	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
+	    &prof_gctx_merge_iter_arg, &gctxs);
+	prof_gctx_finish(tsd, &gctxs);
+
+	if (curobjs != NULL) {
+		*curobjs = prof_tdata_merge_iter_arg.cnt_all.curobjs;
+	}
+	if (curbytes != NULL) {
+		*curbytes = prof_tdata_merge_iter_arg.cnt_all.curbytes;
+	}
+	if (accumobjs != NULL) {
+		*accumobjs = prof_tdata_merge_iter_arg.cnt_all.accumobjs;
+	}
+	if (accumbytes != NULL) {
+		*accumbytes = prof_tdata_merge_iter_arg.cnt_all.accumbytes;
+	}
+}
+#endif
+
+void
+prof_bt_hash(const void *key, size_t r_hash[2]) {
+	prof_bt_t *bt = (prof_bt_t *)key;
+
+	cassert(config_prof);
+
+	hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash);
+}
+
+bool
+prof_bt_keycomp(const void *k1, const void *k2) {
+	const prof_bt_t *bt1 = (prof_bt_t *)k1;
+	const prof_bt_t *bt2 = (prof_bt_t *)k2;
+
+	cassert(config_prof);
+
+	if (bt1->len != bt2->len) {
+		return false;
+	}
+	return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
+}
+
+prof_tdata_t *
+prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
+    char *thread_name, bool active) {
+	prof_tdata_t *tdata;
+
+	cassert(config_prof);
+
+	/* Initialize an empty cache for this thread. */
+	tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd), sizeof(prof_tdata_t),
+	    sz_size2index(sizeof(prof_tdata_t)), false, NULL, true,
+	    arena_get(TSDN_NULL, 0, true), true);
+	if (tdata == NULL) {
+		return NULL;
+	}
+
+	tdata->lock = prof_tdata_mutex_choose(thr_uid);
+	tdata->thr_uid = thr_uid;
+	tdata->thr_discrim = thr_discrim;
+	tdata->thread_name = thread_name;
+	tdata->attached = true;
+	tdata->expired = false;
+	tdata->tctx_uid_next = 0;
+
+	if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash,
+	    prof_bt_keycomp)) {
+		idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
+		return NULL;
+	}
+
+	tdata->prng_state = (uint64_t)(uintptr_t)tdata;
+	prof_sample_threshold_update(tdata);
+
+	tdata->enq = false;
+	tdata->enq_idump = false;
+	tdata->enq_gdump = false;
+
+	tdata->dumping = false;
+	tdata->active = active;
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
+	tdata_tree_insert(&tdatas, tdata);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
+
+	return tdata;
+}
+
+static bool
+prof_tdata_should_destroy_unlocked(prof_tdata_t *tdata, bool even_if_attached) {
+	if (tdata->attached && !even_if_attached) {
+		return false;
+	}
+	if (ckh_count(&tdata->bt2tctx) != 0) {
+		return false;
+	}
+	return true;
+}
+
+static bool
+prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
+    bool even_if_attached) {
+	malloc_mutex_assert_owner(tsdn, tdata->lock);
+
+	return prof_tdata_should_destroy_unlocked(tdata, even_if_attached);
+}
+
+static void
+prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
+    bool even_if_attached) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tdatas_mtx);
+
+	tdata_tree_remove(&tdatas, tdata);
+
+	assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
+
+	if (tdata->thread_name != NULL) {
+		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
+		    true);
+	}
+	ckh_delete(tsd, &tdata->bt2tctx);
+	idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
+}
+
+static void
+prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached) {
+	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
+	prof_tdata_destroy_locked(tsd, tdata, even_if_attached);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
+}
+
+void
+prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata) {
+	bool destroy_tdata;
+
+	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
+	if (tdata->attached) {
+		destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata,
+		    true);
+		/*
+		 * Only detach if !destroy_tdata, because detaching would allow
+		 * another thread to win the race to destroy tdata.
+		 */
+		if (!destroy_tdata) {
+			tdata->attached = false;
+		}
+		tsd_prof_tdata_set(tsd, NULL);
+	} else {
+		destroy_tdata = false;
+	}
+	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
+	if (destroy_tdata) {
+		prof_tdata_destroy(tsd, tdata, true);
+	}
+}
+
+static bool
+prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata) {
+	bool destroy_tdata;
+
+	malloc_mutex_lock(tsdn, tdata->lock);
+	if (!tdata->expired) {
+		tdata->expired = true;
+		destroy_tdata = tdata->attached ? false :
+		    prof_tdata_should_destroy(tsdn, tdata, false);
+	} else {
+		destroy_tdata = false;
+	}
+	malloc_mutex_unlock(tsdn, tdata->lock);
+
+	return destroy_tdata;
+}
+
+static prof_tdata_t *
+prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
+    void *arg) {
+	tsdn_t *tsdn = (tsdn_t *)arg;
+
+	return (prof_tdata_expire(tsdn, tdata) ? tdata : NULL);
+}
+
+void
+prof_reset(tsd_t *tsd, size_t lg_sample) {
+	prof_tdata_t *next;
+
+	assert(lg_sample < (sizeof(uint64_t) << 3));
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
+
+	lg_prof_sample = lg_sample;
+
+	next = NULL;
+	do {
+		prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next,
+		    prof_tdata_reset_iter, (void *)tsd);
+		if (to_destroy != NULL) {
+			next = tdata_tree_next(&tdatas, to_destroy);
+			prof_tdata_destroy_locked(tsd, to_destroy, false);
+		} else {
+			next = NULL;
+		}
+	} while (next != NULL);
+
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
+}
+
+void
+prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
+	prof_tdata_t *tdata = tctx->tdata;
+	prof_gctx_t *gctx = tctx->gctx;
+	bool destroy_tdata, destroy_tctx, destroy_gctx;
+
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
+
+	assert(tctx->cnts.curobjs == 0);
+	assert(tctx->cnts.curbytes == 0);
+	assert(!opt_prof_accum);
+	assert(tctx->cnts.accumobjs == 0);
+	assert(tctx->cnts.accumbytes == 0);
+
+	ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL);
+	destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata, false);
+	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
+
+	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
+	switch (tctx->state) {
+	case prof_tctx_state_nominal:
+		tctx_tree_remove(&gctx->tctxs, tctx);
+		destroy_tctx = true;
+		if (prof_gctx_should_destroy(gctx)) {
+			/*
+			 * Increment gctx->nlimbo in order to keep another
+			 * thread from winning the race to destroy gctx while
+			 * this one has gctx->lock dropped.  Without this, it
+			 * would be possible for another thread to:
+			 *
+			 * 1) Sample an allocation associated with gctx.
+			 * 2) Deallocate the sampled object.
+			 * 3) Successfully prof_gctx_try_destroy(gctx).
+			 *
+			 * The result would be that gctx no longer exists by the
+			 * time this thread accesses it in
+			 * prof_gctx_try_destroy().
+			 */
+			gctx->nlimbo++;
+			destroy_gctx = true;
+		} else {
+			destroy_gctx = false;
+		}
+		break;
+	case prof_tctx_state_dumping:
+		/*
+		 * A dumping thread needs tctx to remain valid until dumping
+		 * has finished.  Change state such that the dumping thread will
+		 * complete destruction during a late dump iteration phase.
+		 */
+		tctx->state = prof_tctx_state_purgatory;
+		destroy_tctx = false;
+		destroy_gctx = false;
+		break;
+	default:
+		not_reached();
+		destroy_tctx = false;
+		destroy_gctx = false;
+	}
+	malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
+	if (destroy_gctx) {
+		prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx,
+		    tdata);
+	}
+
+	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock);
+
+	if (destroy_tdata) {
+		prof_tdata_destroy(tsd, tdata, false);
+	}
+
+	if (destroy_tctx) {
+		idalloctm(tsd_tsdn(tsd), tctx, NULL, NULL, true, true);
+	}
+}
+
+/******************************************************************************/

From 1a0503367be5950a8da648996ba7ae2620e39393 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gmail.com>
Date: Mon, 29 Jul 2019 14:09:20 -0700
Subject: [PATCH 1327/2608] Revert "Refactor profiling"

This reverts commit 0b462407ae84a62b3c097f0e9f18df487a47d9a7.
---
 Makefile.in                                   |    1 -
 include/jemalloc/internal/prof_externs.h      |   14 -
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |    1 -
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |    3 -
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |    1 -
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |    3 -
 src/prof.c                                    | 1494 ++++++++++++++++-
 src/prof_data.c                               | 1440 ----------------
 8 files changed, 1453 insertions(+), 1504 deletions(-)
 delete mode 100644 src/prof_data.c

diff --git a/Makefile.in b/Makefile.in
index 40daf115..1cd973d7 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -117,7 +117,6 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/pages.c \
 	$(srcroot)src/prng.c \
 	$(srcroot)src/prof.c \
-	$(srcroot)src/prof_data.c \
 	$(srcroot)src/prof_log.c \
 	$(srcroot)src/rtree.c \
 	$(srcroot)src/safety_check.c \
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 8fc45cf7..e94ac3b2 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -4,11 +4,6 @@
 #include "jemalloc/internal/mutex.h"
 
 extern malloc_mutex_t	bt2gctx_mtx;
-extern malloc_mutex_t	tdatas_mtx;
-extern malloc_mutex_t	prof_dump_mtx;
-
-malloc_mutex_t *prof_gctx_mutex_choose(void);
-malloc_mutex_t *prof_tdata_mutex_choose(uint64_t thr_uid);
 
 extern bool	opt_prof;
 extern bool	opt_prof_active;
@@ -115,13 +110,4 @@ bool prof_log_rep_check(void);
 void prof_log_dummy_set(bool new_value);
 #endif
 
-/* Functions in prof_data.c only accessed in prof.c */
-bool prof_data_init(tsd_t *tsd);
-bool prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
-    bool leakcheck);
-prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
-    uint64_t thr_discrim, char *thread_name, bool active);
-void prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata);
-void prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx);
-
 #endif /* JEMALLOC_INTERNAL_PROF_EXTERNS_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 387f14be..d93d9099 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -58,7 +58,6 @@
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
-    <ClCompile Include="..\..\..\..\src\prof_data.c" />
     <ClCompile Include="..\..\..\..\src\prof_log.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 030d8266..7b09d4e6 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -67,9 +67,6 @@
     <ClCompile Include="..\..\..\..\src\prof.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\prof_data.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\prof_log.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 1606a3ab..28bd3cd6 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -58,7 +58,6 @@
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
-    <ClCompile Include="..\..\..\..\src\prof_data.c" />
     <ClCompile Include="..\..\..\..\src\prof_log.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 622b93f1..a66c209b 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -67,9 +67,6 @@
     <ClCompile Include="..\..\..\..\src\prof.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\prof_data.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\prof_log.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/prof.c b/src/prof.c
index 79a0ffc8..9d1edb32 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -3,14 +3,11 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/ckh.h"
+#include "jemalloc/internal/hash.h"
+#include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/mutex.h"
 
-/*
- * This file implements the profiling "APIs" needed by other parts of jemalloc,
- * and also manages the relevant "operational" data, mainly options and mutexes;
- * the core profiling data structures are encapsulated in prof_data.c.
- */
-
 /******************************************************************************/
 
 #ifdef JEMALLOC_PROF_LIBUNWIND
@@ -91,10 +88,20 @@ static atomic_u_t	cum_gctxs; /* Atomic counter. */
  */
 static malloc_mutex_t	*tdata_locks;
 
+/*
+ * Global hash of (prof_bt_t *)-->(prof_gctx_t *).  This is the master data
+ * structure that knows about all backtraces currently captured.
+ */
+static ckh_t		bt2gctx;
 /* Non static to enable profiling. */
 malloc_mutex_t		bt2gctx_mtx;
 
-malloc_mutex_t	tdatas_mtx;
+/*
+ * Tree of all extant prof_tdata_t structures, regardless of state,
+ * {attached,detached,expired}.
+ */
+static prof_tdata_tree_t	tdatas;
+static malloc_mutex_t	tdatas_mtx;
 
 static uint64_t		next_thr_uid;
 static malloc_mutex_t	next_thr_uid_mtx;
@@ -105,29 +112,101 @@ static uint64_t		prof_dump_iseq;
 static uint64_t		prof_dump_mseq;
 static uint64_t		prof_dump_useq;
 
-malloc_mutex_t	prof_dump_mtx;
+/*
+ * This buffer is rather large for stack allocation, so use a single buffer for
+ * all profile dumps.
+ */
+static malloc_mutex_t	prof_dump_mtx;
+static char		prof_dump_buf[
+    /* Minimize memory bloat for non-prof builds. */
+#ifdef JEMALLOC_PROF
+    PROF_DUMP_BUFSIZE
+#else
+    1
+#endif
+];
+static size_t		prof_dump_buf_end;
+static int		prof_dump_fd;
 
 /* Do not dump any profiles until bootstrapping is complete. */
 bool			prof_booted = false;
 
 /******************************************************************************/
+/*
+ * Function prototypes for static functions that are referenced prior to
+ * definition.
+ */
 
-static bool
-prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx) {
-	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
+static bool	prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx);
+static void	prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx);
+static bool	prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
+    bool even_if_attached);
+static void	prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata,
+    bool even_if_attached);
+static char	*prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name);
 
-	if (opt_prof_accum) {
-		return false;
+/******************************************************************************/
+/* Red-black trees. */
+
+static int
+prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b) {
+	uint64_t a_thr_uid = a->thr_uid;
+	uint64_t b_thr_uid = b->thr_uid;
+	int ret = (a_thr_uid > b_thr_uid) - (a_thr_uid < b_thr_uid);
+	if (ret == 0) {
+		uint64_t a_thr_discrim = a->thr_discrim;
+		uint64_t b_thr_discrim = b->thr_discrim;
+		ret = (a_thr_discrim > b_thr_discrim) - (a_thr_discrim <
+		    b_thr_discrim);
+		if (ret == 0) {
+			uint64_t a_tctx_uid = a->tctx_uid;
+			uint64_t b_tctx_uid = b->tctx_uid;
+			ret = (a_tctx_uid > b_tctx_uid) - (a_tctx_uid <
+			    b_tctx_uid);
+		}
 	}
-	if (tctx->cnts.curobjs != 0) {
-		return false;
-	}
-	if (tctx->prepared) {
-		return false;
-	}
-	return true;
+	return ret;
 }
 
+rb_gen(static UNUSED, tctx_tree_, prof_tctx_tree_t, prof_tctx_t,
+    tctx_link, prof_tctx_comp)
+
+static int
+prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b) {
+	unsigned a_len = a->bt.len;
+	unsigned b_len = b->bt.len;
+	unsigned comp_len = (a_len < b_len) ? a_len : b_len;
+	int ret = memcmp(a->bt.vec, b->bt.vec, comp_len * sizeof(void *));
+	if (ret == 0) {
+		ret = (a_len > b_len) - (a_len < b_len);
+	}
+	return ret;
+}
+
+rb_gen(static UNUSED, gctx_tree_, prof_gctx_tree_t, prof_gctx_t, dump_link,
+    prof_gctx_comp)
+
+static int
+prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b) {
+	int ret;
+	uint64_t a_uid = a->thr_uid;
+	uint64_t b_uid = b->thr_uid;
+
+	ret = ((a_uid > b_uid) - (a_uid < b_uid));
+	if (ret == 0) {
+		uint64_t a_discrim = a->thr_discrim;
+		uint64_t b_discrim = b->thr_discrim;
+
+		ret = ((a_discrim > b_discrim) - (a_discrim < b_discrim));
+	}
+	return ret;
+}
+
+rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link,
+    prof_tdata_comp)
+
+/******************************************************************************/
+
 void
 prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
 	prof_tdata_t *tdata;
@@ -207,6 +286,45 @@ bt_init(prof_bt_t *bt, void **vec) {
 	bt->len = 0;
 }
 
+static void
+prof_enter(tsd_t *tsd, prof_tdata_t *tdata) {
+	cassert(config_prof);
+	assert(tdata == prof_tdata_get(tsd, false));
+
+	if (tdata != NULL) {
+		assert(!tdata->enq);
+		tdata->enq = true;
+	}
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
+}
+
+static void
+prof_leave(tsd_t *tsd, prof_tdata_t *tdata) {
+	cassert(config_prof);
+	assert(tdata == prof_tdata_get(tsd, false));
+
+	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
+
+	if (tdata != NULL) {
+		bool idump, gdump;
+
+		assert(tdata->enq);
+		tdata->enq = false;
+		idump = tdata->enq_idump;
+		tdata->enq_idump = false;
+		gdump = tdata->enq_gdump;
+		tdata->enq_gdump = false;
+
+		if (idump) {
+			prof_idump(tsd_tsdn(tsd));
+		}
+		if (gdump) {
+			prof_gdump(tsd_tsdn(tsd));
+		}
+	}
+}
+
 #ifdef JEMALLOC_PROF_LIBUNWIND
 void
 prof_backtrace(prof_bt_t *bt) {
@@ -429,18 +547,324 @@ prof_backtrace(prof_bt_t *bt) {
 }
 #endif
 
-malloc_mutex_t *
+static malloc_mutex_t *
 prof_gctx_mutex_choose(void) {
 	unsigned ngctxs = atomic_fetch_add_u(&cum_gctxs, 1, ATOMIC_RELAXED);
 
 	return &gctx_locks[(ngctxs - 1) % PROF_NCTX_LOCKS];
 }
 
-malloc_mutex_t *
+static malloc_mutex_t *
 prof_tdata_mutex_choose(uint64_t thr_uid) {
 	return &tdata_locks[thr_uid % PROF_NTDATA_LOCKS];
 }
 
+static prof_gctx_t *
+prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt) {
+	/*
+	 * Create a single allocation that has space for vec of length bt->len.
+	 */
+	size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *));
+	prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsdn, size,
+	    sz_size2index(size), false, NULL, true, arena_get(TSDN_NULL, 0, true),
+	    true);
+	if (gctx == NULL) {
+		return NULL;
+	}
+	gctx->lock = prof_gctx_mutex_choose();
+	/*
+	 * Set nlimbo to 1, in order to avoid a race condition with
+	 * prof_tctx_destroy()/prof_gctx_try_destroy().
+	 */
+	gctx->nlimbo = 1;
+	tctx_tree_new(&gctx->tctxs);
+	/* Duplicate bt. */
+	memcpy(gctx->vec, bt->vec, bt->len * sizeof(void *));
+	gctx->bt.vec = gctx->vec;
+	gctx->bt.len = bt->len;
+	return gctx;
+}
+
+static void
+prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
+    prof_tdata_t *tdata) {
+	cassert(config_prof);
+
+	/*
+	 * Check that gctx is still unused by any thread cache before destroying
+	 * it.  prof_lookup() increments gctx->nlimbo in order to avoid a race
+	 * condition with this function, as does prof_tctx_destroy() in order to
+	 * avoid a race between the main body of prof_tctx_destroy() and entry
+	 * into this function.
+	 */
+	prof_enter(tsd, tdata_self);
+	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
+	assert(gctx->nlimbo != 0);
+	if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) {
+		/* Remove gctx from bt2gctx. */
+		if (ckh_remove(tsd, &bt2gctx, &gctx->bt, NULL, NULL)) {
+			not_reached();
+		}
+		prof_leave(tsd, tdata_self);
+		/* Destroy gctx. */
+		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
+		idalloctm(tsd_tsdn(tsd), gctx, NULL, NULL, true, true);
+	} else {
+		/*
+		 * Compensate for increment in prof_tctx_destroy() or
+		 * prof_lookup().
+		 */
+		gctx->nlimbo--;
+		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
+		prof_leave(tsd, tdata_self);
+	}
+}
+
+static bool
+prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx) {
+	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
+
+	if (opt_prof_accum) {
+		return false;
+	}
+	if (tctx->cnts.curobjs != 0) {
+		return false;
+	}
+	if (tctx->prepared) {
+		return false;
+	}
+	return true;
+}
+
+static bool
+prof_gctx_should_destroy(prof_gctx_t *gctx) {
+	if (opt_prof_accum) {
+		return false;
+	}
+	if (!tctx_tree_empty(&gctx->tctxs)) {
+		return false;
+	}
+	if (gctx->nlimbo != 0) {
+		return false;
+	}
+	return true;
+}
+
+static void
+prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
+	prof_tdata_t *tdata = tctx->tdata;
+	prof_gctx_t *gctx = tctx->gctx;
+	bool destroy_tdata, destroy_tctx, destroy_gctx;
+
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
+
+	assert(tctx->cnts.curobjs == 0);
+	assert(tctx->cnts.curbytes == 0);
+	assert(!opt_prof_accum);
+	assert(tctx->cnts.accumobjs == 0);
+	assert(tctx->cnts.accumbytes == 0);
+
+	ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL);
+	destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata, false);
+	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
+
+	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
+	switch (tctx->state) {
+	case prof_tctx_state_nominal:
+		tctx_tree_remove(&gctx->tctxs, tctx);
+		destroy_tctx = true;
+		if (prof_gctx_should_destroy(gctx)) {
+			/*
+			 * Increment gctx->nlimbo in order to keep another
+			 * thread from winning the race to destroy gctx while
+			 * this one has gctx->lock dropped.  Without this, it
+			 * would be possible for another thread to:
+			 *
+			 * 1) Sample an allocation associated with gctx.
+			 * 2) Deallocate the sampled object.
+			 * 3) Successfully prof_gctx_try_destroy(gctx).
+			 *
+			 * The result would be that gctx no longer exists by the
+			 * time this thread accesses it in
+			 * prof_gctx_try_destroy().
+			 */
+			gctx->nlimbo++;
+			destroy_gctx = true;
+		} else {
+			destroy_gctx = false;
+		}
+		break;
+	case prof_tctx_state_dumping:
+		/*
+		 * A dumping thread needs tctx to remain valid until dumping
+		 * has finished.  Change state such that the dumping thread will
+		 * complete destruction during a late dump iteration phase.
+		 */
+		tctx->state = prof_tctx_state_purgatory;
+		destroy_tctx = false;
+		destroy_gctx = false;
+		break;
+	default:
+		not_reached();
+		destroy_tctx = false;
+		destroy_gctx = false;
+	}
+	malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
+	if (destroy_gctx) {
+		prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx,
+		    tdata);
+	}
+
+	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock);
+
+	if (destroy_tdata) {
+		prof_tdata_destroy(tsd, tdata, false);
+	}
+
+	if (destroy_tctx) {
+		idalloctm(tsd_tsdn(tsd), tctx, NULL, NULL, true, true);
+	}
+}
+
+static bool
+prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
+    void **p_btkey, prof_gctx_t **p_gctx, bool *p_new_gctx) {
+	union {
+		prof_gctx_t	*p;
+		void		*v;
+	} gctx, tgctx;
+	union {
+		prof_bt_t	*p;
+		void		*v;
+	} btkey;
+	bool new_gctx;
+
+	prof_enter(tsd, tdata);
+	if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
+		/* bt has never been seen before.  Insert it. */
+		prof_leave(tsd, tdata);
+		tgctx.p = prof_gctx_create(tsd_tsdn(tsd), bt);
+		if (tgctx.v == NULL) {
+			return true;
+		}
+		prof_enter(tsd, tdata);
+		if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
+			gctx.p = tgctx.p;
+			btkey.p = &gctx.p->bt;
+			if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) {
+				/* OOM. */
+				prof_leave(tsd, tdata);
+				idalloctm(tsd_tsdn(tsd), gctx.v, NULL, NULL,
+				    true, true);
+				return true;
+			}
+			new_gctx = true;
+		} else {
+			new_gctx = false;
+		}
+	} else {
+		tgctx.v = NULL;
+		new_gctx = false;
+	}
+
+	if (!new_gctx) {
+		/*
+		 * Increment nlimbo, in order to avoid a race condition with
+		 * prof_tctx_destroy()/prof_gctx_try_destroy().
+		 */
+		malloc_mutex_lock(tsd_tsdn(tsd), gctx.p->lock);
+		gctx.p->nlimbo++;
+		malloc_mutex_unlock(tsd_tsdn(tsd), gctx.p->lock);
+		new_gctx = false;
+
+		if (tgctx.v != NULL) {
+			/* Lost race to insert. */
+			idalloctm(tsd_tsdn(tsd), tgctx.v, NULL, NULL, true,
+			    true);
+		}
+	}
+	prof_leave(tsd, tdata);
+
+	*p_btkey = btkey.v;
+	*p_gctx = gctx.p;
+	*p_new_gctx = new_gctx;
+	return false;
+}
+
+prof_tctx_t *
+prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
+	union {
+		prof_tctx_t	*p;
+		void		*v;
+	} ret;
+	prof_tdata_t *tdata;
+	bool not_found;
+
+	cassert(config_prof);
+
+	tdata = prof_tdata_get(tsd, false);
+	if (tdata == NULL) {
+		return NULL;
+	}
+
+	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
+	not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v);
+	if (!not_found) { /* Note double negative! */
+		ret.p->prepared = true;
+	}
+	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
+	if (not_found) {
+		void *btkey;
+		prof_gctx_t *gctx;
+		bool new_gctx, error;
+
+		/*
+		 * This thread's cache lacks bt.  Look for it in the global
+		 * cache.
+		 */
+		if (prof_lookup_global(tsd, bt, tdata, &btkey, &gctx,
+		    &new_gctx)) {
+			return NULL;
+		}
+
+		/* Link a prof_tctx_t into gctx for this thread. */
+		ret.v = iallocztm(tsd_tsdn(tsd), sizeof(prof_tctx_t),
+		    sz_size2index(sizeof(prof_tctx_t)), false, NULL, true,
+		    arena_ichoose(tsd, NULL), true);
+		if (ret.p == NULL) {
+			if (new_gctx) {
+				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
+			}
+			return NULL;
+		}
+		ret.p->tdata = tdata;
+		ret.p->thr_uid = tdata->thr_uid;
+		ret.p->thr_discrim = tdata->thr_discrim;
+		memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
+		ret.p->gctx = gctx;
+		ret.p->tctx_uid = tdata->tctx_uid_next++;
+		ret.p->prepared = true;
+		ret.p->state = prof_tctx_state_initializing;
+		malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
+		error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v);
+		malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
+		if (error) {
+			if (new_gctx) {
+				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
+			}
+			idalloctm(tsd_tsdn(tsd), ret.v, NULL, NULL, true, true);
+			return NULL;
+		}
+		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
+		ret.p->state = prof_tctx_state_nominal;
+		tctx_tree_insert(&gctx->tctxs, ret.p);
+		gctx->nlimbo--;
+		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
+	}
+
+	return ret.p;
+}
+
 /*
  * The bodies of this function and prof_leakcheck() are compiled out unless heap
  * profiling is enabled, so that it is possible to compile jemalloc with
@@ -497,6 +921,520 @@ prof_sample_threshold_update(prof_tdata_t *tdata) {
 #endif
 }
 
+#ifdef JEMALLOC_JET
+static prof_tdata_t *
+prof_tdata_count_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
+    void *arg) {
+	size_t *tdata_count = (size_t *)arg;
+
+	(*tdata_count)++;
+
+	return NULL;
+}
+
+size_t
+prof_tdata_count(void) {
+	size_t tdata_count = 0;
+	tsdn_t *tsdn;
+
+	tsdn = tsdn_fetch();
+	malloc_mutex_lock(tsdn, &tdatas_mtx);
+	tdata_tree_iter(&tdatas, NULL, prof_tdata_count_iter,
+	    (void *)&tdata_count);
+	malloc_mutex_unlock(tsdn, &tdatas_mtx);
+
+	return tdata_count;
+}
+
+size_t
+prof_bt_count(void) {
+	size_t bt_count;
+	tsd_t *tsd;
+	prof_tdata_t *tdata;
+
+	tsd = tsd_fetch();
+	tdata = prof_tdata_get(tsd, false);
+	if (tdata == NULL) {
+		return 0;
+	}
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
+	bt_count = ckh_count(&bt2gctx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
+
+	return bt_count;
+}
+#endif
+
+static int
+prof_dump_open_impl(bool propagate_err, const char *filename) {
+	int fd;
+
+	fd = creat(filename, 0644);
+	if (fd == -1 && !propagate_err) {
+		malloc_printf("<jemalloc>: creat(\"%s\"), 0644) failed\n",
+		    filename);
+		if (opt_abort) {
+			abort();
+		}
+	}
+
+	return fd;
+}
+prof_dump_open_t *JET_MUTABLE prof_dump_open = prof_dump_open_impl;
+
+static bool
+prof_dump_flush(bool propagate_err) {
+	bool ret = false;
+	ssize_t err;
+
+	cassert(config_prof);
+
+	err = malloc_write_fd(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
+	if (err == -1) {
+		if (!propagate_err) {
+			malloc_write("<jemalloc>: write() failed during heap "
+			    "profile flush\n");
+			if (opt_abort) {
+				abort();
+			}
+		}
+		ret = true;
+	}
+	prof_dump_buf_end = 0;
+
+	return ret;
+}
+
+static bool
+prof_dump_close(bool propagate_err) {
+	bool ret;
+
+	assert(prof_dump_fd != -1);
+	ret = prof_dump_flush(propagate_err);
+	close(prof_dump_fd);
+	prof_dump_fd = -1;
+
+	return ret;
+}
+
+static bool
+prof_dump_write(bool propagate_err, const char *s) {
+	size_t i, slen, n;
+
+	cassert(config_prof);
+
+	i = 0;
+	slen = strlen(s);
+	while (i < slen) {
+		/* Flush the buffer if it is full. */
+		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
+			if (prof_dump_flush(propagate_err) && propagate_err) {
+				return true;
+			}
+		}
+
+		if (prof_dump_buf_end + slen - i <= PROF_DUMP_BUFSIZE) {
+			/* Finish writing. */
+			n = slen - i;
+		} else {
+			/* Write as much of s as will fit. */
+			n = PROF_DUMP_BUFSIZE - prof_dump_buf_end;
+		}
+		memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n);
+		prof_dump_buf_end += n;
+		i += n;
+	}
+
+	return false;
+}
+
+JEMALLOC_FORMAT_PRINTF(2, 3)
+static bool
+prof_dump_printf(bool propagate_err, const char *format, ...) {
+	bool ret;
+	va_list ap;
+	char buf[PROF_PRINTF_BUFSIZE];
+
+	va_start(ap, format);
+	malloc_vsnprintf(buf, sizeof(buf), format, ap);
+	va_end(ap);
+	ret = prof_dump_write(propagate_err, buf);
+
+	return ret;
+}
+
+static void
+prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata) {
+	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
+
+	malloc_mutex_lock(tsdn, tctx->gctx->lock);
+
+	switch (tctx->state) {
+	case prof_tctx_state_initializing:
+		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
+		return;
+	case prof_tctx_state_nominal:
+		tctx->state = prof_tctx_state_dumping;
+		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
+
+		memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t));
+
+		tdata->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
+		tdata->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
+		if (opt_prof_accum) {
+			tdata->cnt_summed.accumobjs +=
+			    tctx->dump_cnts.accumobjs;
+			tdata->cnt_summed.accumbytes +=
+			    tctx->dump_cnts.accumbytes;
+		}
+		break;
+	case prof_tctx_state_dumping:
+	case prof_tctx_state_purgatory:
+		not_reached();
+	}
+}
+
+static void
+prof_tctx_merge_gctx(tsdn_t *tsdn, prof_tctx_t *tctx, prof_gctx_t *gctx) {
+	malloc_mutex_assert_owner(tsdn, gctx->lock);
+
+	gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
+	gctx->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
+	if (opt_prof_accum) {
+		gctx->cnt_summed.accumobjs += tctx->dump_cnts.accumobjs;
+		gctx->cnt_summed.accumbytes += tctx->dump_cnts.accumbytes;
+	}
+}
+
+static prof_tctx_t *
+prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
+	tsdn_t *tsdn = (tsdn_t *)arg;
+
+	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
+
+	switch (tctx->state) {
+	case prof_tctx_state_nominal:
+		/* New since dumping started; ignore. */
+		break;
+	case prof_tctx_state_dumping:
+	case prof_tctx_state_purgatory:
+		prof_tctx_merge_gctx(tsdn, tctx, tctx->gctx);
+		break;
+	default:
+		not_reached();
+	}
+
+	return NULL;
+}
+
+struct prof_tctx_dump_iter_arg_s {
+	tsdn_t	*tsdn;
+	bool	propagate_err;
+};
+
+static prof_tctx_t *
+prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) {
+	struct prof_tctx_dump_iter_arg_s *arg =
+	    (struct prof_tctx_dump_iter_arg_s *)opaque;
+
+	malloc_mutex_assert_owner(arg->tsdn, tctx->gctx->lock);
+
+	switch (tctx->state) {
+	case prof_tctx_state_initializing:
+	case prof_tctx_state_nominal:
+		/* Not captured by this dump. */
+		break;
+	case prof_tctx_state_dumping:
+	case prof_tctx_state_purgatory:
+		if (prof_dump_printf(arg->propagate_err,
+		    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": "
+		    "%"FMTu64"]\n", tctx->thr_uid, tctx->dump_cnts.curobjs,
+		    tctx->dump_cnts.curbytes, tctx->dump_cnts.accumobjs,
+		    tctx->dump_cnts.accumbytes)) {
+			return tctx;
+		}
+		break;
+	default:
+		not_reached();
+	}
+	return NULL;
+}
+
+static prof_tctx_t *
+prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
+	tsdn_t *tsdn = (tsdn_t *)arg;
+	prof_tctx_t *ret;
+
+	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
+
+	switch (tctx->state) {
+	case prof_tctx_state_nominal:
+		/* New since dumping started; ignore. */
+		break;
+	case prof_tctx_state_dumping:
+		tctx->state = prof_tctx_state_nominal;
+		break;
+	case prof_tctx_state_purgatory:
+		ret = tctx;
+		goto label_return;
+	default:
+		not_reached();
+	}
+
+	ret = NULL;
+label_return:
+	return ret;
+}
+
+static void
+prof_dump_gctx_prep(tsdn_t *tsdn, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs) {
+	cassert(config_prof);
+
+	malloc_mutex_lock(tsdn, gctx->lock);
+
+	/*
+	 * Increment nlimbo so that gctx won't go away before dump.
+	 * Additionally, link gctx into the dump list so that it is included in
+	 * prof_dump()'s second pass.
+	 */
+	gctx->nlimbo++;
+	gctx_tree_insert(gctxs, gctx);
+
+	memset(&gctx->cnt_summed, 0, sizeof(prof_cnt_t));
+
+	malloc_mutex_unlock(tsdn, gctx->lock);
+}
+
+struct prof_gctx_merge_iter_arg_s {
+	tsdn_t	*tsdn;
+	size_t	leak_ngctx;
+};
+
+static prof_gctx_t *
+prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
+	struct prof_gctx_merge_iter_arg_s *arg =
+	    (struct prof_gctx_merge_iter_arg_s *)opaque;
+
+	malloc_mutex_lock(arg->tsdn, gctx->lock);
+	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter,
+	    (void *)arg->tsdn);
+	if (gctx->cnt_summed.curobjs != 0) {
+		arg->leak_ngctx++;
+	}
+	malloc_mutex_unlock(arg->tsdn, gctx->lock);
+
+	return NULL;
+}
+
+static void
+prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) {
+	prof_tdata_t *tdata = prof_tdata_get(tsd, false);
+	prof_gctx_t *gctx;
+
+	/*
+	 * Standard tree iteration won't work here, because as soon as we
+	 * decrement gctx->nlimbo and unlock gctx, another thread can
+	 * concurrently destroy it, which will corrupt the tree.  Therefore,
+	 * tear down the tree one node at a time during iteration.
+	 */
+	while ((gctx = gctx_tree_first(gctxs)) != NULL) {
+		gctx_tree_remove(gctxs, gctx);
+		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
+		{
+			prof_tctx_t *next;
+
+			next = NULL;
+			do {
+				prof_tctx_t *to_destroy =
+				    tctx_tree_iter(&gctx->tctxs, next,
+				    prof_tctx_finish_iter,
+				    (void *)tsd_tsdn(tsd));
+				if (to_destroy != NULL) {
+					next = tctx_tree_next(&gctx->tctxs,
+					    to_destroy);
+					tctx_tree_remove(&gctx->tctxs,
+					    to_destroy);
+					idalloctm(tsd_tsdn(tsd), to_destroy,
+					    NULL, NULL, true, true);
+				} else {
+					next = NULL;
+				}
+			} while (next != NULL);
+		}
+		gctx->nlimbo--;
+		if (prof_gctx_should_destroy(gctx)) {
+			gctx->nlimbo++;
+			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
+			prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
+		} else {
+			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
+		}
+	}
+}
+
+struct prof_tdata_merge_iter_arg_s {
+	tsdn_t		*tsdn;
+	prof_cnt_t	cnt_all;
+};
+
+static prof_tdata_t *
+prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
+    void *opaque) {
+	struct prof_tdata_merge_iter_arg_s *arg =
+	    (struct prof_tdata_merge_iter_arg_s *)opaque;
+
+	malloc_mutex_lock(arg->tsdn, tdata->lock);
+	if (!tdata->expired) {
+		size_t tabind;
+		union {
+			prof_tctx_t	*p;
+			void		*v;
+		} tctx;
+
+		tdata->dumping = true;
+		memset(&tdata->cnt_summed, 0, sizeof(prof_cnt_t));
+		for (tabind = 0; !ckh_iter(&tdata->bt2tctx, &tabind, NULL,
+		    &tctx.v);) {
+			prof_tctx_merge_tdata(arg->tsdn, tctx.p, tdata);
+		}
+
+		arg->cnt_all.curobjs += tdata->cnt_summed.curobjs;
+		arg->cnt_all.curbytes += tdata->cnt_summed.curbytes;
+		if (opt_prof_accum) {
+			arg->cnt_all.accumobjs += tdata->cnt_summed.accumobjs;
+			arg->cnt_all.accumbytes += tdata->cnt_summed.accumbytes;
+		}
+	} else {
+		tdata->dumping = false;
+	}
+	malloc_mutex_unlock(arg->tsdn, tdata->lock);
+
+	return NULL;
+}
+
+static prof_tdata_t *
+prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
+    void *arg) {
+	bool propagate_err = *(bool *)arg;
+
+	if (!tdata->dumping) {
+		return NULL;
+	}
+
+	if (prof_dump_printf(propagate_err,
+	    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]%s%s\n",
+	    tdata->thr_uid, tdata->cnt_summed.curobjs,
+	    tdata->cnt_summed.curbytes, tdata->cnt_summed.accumobjs,
+	    tdata->cnt_summed.accumbytes,
+	    (tdata->thread_name != NULL) ? " " : "",
+	    (tdata->thread_name != NULL) ? tdata->thread_name : "")) {
+		return tdata;
+	}
+	return NULL;
+}
+
+static bool
+prof_dump_header_impl(tsdn_t *tsdn, bool propagate_err,
+    const prof_cnt_t *cnt_all) {
+	bool ret;
+
+	if (prof_dump_printf(propagate_err,
+	    "heap_v2/%"FMTu64"\n"
+	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
+	    ((uint64_t)1U << lg_prof_sample), cnt_all->curobjs,
+	    cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes)) {
+		return true;
+	}
+
+	malloc_mutex_lock(tsdn, &tdatas_mtx);
+	ret = (tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter,
+	    (void *)&propagate_err) != NULL);
+	malloc_mutex_unlock(tsdn, &tdatas_mtx);
+	return ret;
+}
+prof_dump_header_t *JET_MUTABLE prof_dump_header = prof_dump_header_impl;
+
+static bool
+prof_dump_gctx(tsdn_t *tsdn, bool propagate_err, prof_gctx_t *gctx,
+    const prof_bt_t *bt, prof_gctx_tree_t *gctxs) {
+	bool ret;
+	unsigned i;
+	struct prof_tctx_dump_iter_arg_s prof_tctx_dump_iter_arg;
+
+	cassert(config_prof);
+	malloc_mutex_assert_owner(tsdn, gctx->lock);
+
+	/* Avoid dumping such gctx's that have no useful data. */
+	if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0) ||
+	    (opt_prof_accum && gctx->cnt_summed.accumobjs == 0)) {
+		assert(gctx->cnt_summed.curobjs == 0);
+		assert(gctx->cnt_summed.curbytes == 0);
+		assert(gctx->cnt_summed.accumobjs == 0);
+		assert(gctx->cnt_summed.accumbytes == 0);
+		ret = false;
+		goto label_return;
+	}
+
+	if (prof_dump_printf(propagate_err, "@")) {
+		ret = true;
+		goto label_return;
+	}
+	for (i = 0; i < bt->len; i++) {
+		if (prof_dump_printf(propagate_err, " %#"FMTxPTR,
+		    (uintptr_t)bt->vec[i])) {
+			ret = true;
+			goto label_return;
+		}
+	}
+
+	if (prof_dump_printf(propagate_err,
+	    "\n"
+	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
+	    gctx->cnt_summed.curobjs, gctx->cnt_summed.curbytes,
+	    gctx->cnt_summed.accumobjs, gctx->cnt_summed.accumbytes)) {
+		ret = true;
+		goto label_return;
+	}
+
+	prof_tctx_dump_iter_arg.tsdn = tsdn;
+	prof_tctx_dump_iter_arg.propagate_err = propagate_err;
+	if (tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter,
+	    (void *)&prof_tctx_dump_iter_arg) != NULL) {
+		ret = true;
+		goto label_return;
+	}
+
+	ret = false;
+label_return:
+	return ret;
+}
+
+#ifndef _WIN32
+JEMALLOC_FORMAT_PRINTF(1, 2)
+static int
+prof_open_maps(const char *format, ...) {
+	int mfd;
+	va_list ap;
+	char filename[PATH_MAX + 1];
+
+	va_start(ap, format);
+	malloc_vsnprintf(filename, sizeof(filename), format, ap);
+	va_end(ap);
+
+#if defined(O_CLOEXEC)
+	mfd = open(filename, O_RDONLY | O_CLOEXEC);
+#else
+	mfd = open(filename, O_RDONLY);
+	if (mfd != -1) {
+		fcntl(mfd, F_SETFD, fcntl(mfd, F_GETFD) | FD_CLOEXEC);
+	}
+#endif
+
+	return mfd;
+}
+#endif
+
 int
 prof_getpid(void) {
 #ifdef _WIN32
@@ -506,6 +1444,291 @@ prof_getpid(void) {
 #endif
 }
 
+static bool
+prof_dump_maps(bool propagate_err) {
+	bool ret;
+	int mfd;
+
+	cassert(config_prof);
+#ifdef __FreeBSD__
+	mfd = prof_open_maps("/proc/curproc/map");
+#elif defined(_WIN32)
+	mfd = -1; // Not implemented
+#else
+	{
+		int pid = prof_getpid();
+
+		mfd = prof_open_maps("/proc/%d/task/%d/maps", pid, pid);
+		if (mfd == -1) {
+			mfd = prof_open_maps("/proc/%d/maps", pid);
+		}
+	}
+#endif
+	if (mfd != -1) {
+		ssize_t nread;
+
+		if (prof_dump_write(propagate_err, "\nMAPPED_LIBRARIES:\n") &&
+		    propagate_err) {
+			ret = true;
+			goto label_return;
+		}
+		nread = 0;
+		do {
+			prof_dump_buf_end += nread;
+			if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
+				/* Make space in prof_dump_buf before read(). */
+				if (prof_dump_flush(propagate_err) &&
+				    propagate_err) {
+					ret = true;
+					goto label_return;
+				}
+			}
+			nread = malloc_read_fd(mfd,
+			    &prof_dump_buf[prof_dump_buf_end], PROF_DUMP_BUFSIZE
+			    - prof_dump_buf_end);
+		} while (nread > 0);
+	} else {
+		ret = true;
+		goto label_return;
+	}
+
+	ret = false;
+label_return:
+	if (mfd != -1) {
+		close(mfd);
+	}
+	return ret;
+}
+
+/*
+ * See prof_sample_threshold_update() comment for why the body of this function
+ * is conditionally compiled.
+ */
+static void
+prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx,
+    const char *filename) {
+#ifdef JEMALLOC_PROF
+	/*
+	 * Scaling is equivalent AdjustSamples() in jeprof, but the result may
+	 * differ slightly from what jeprof reports, because here we scale the
+	 * summary values, whereas jeprof scales each context individually and
+	 * reports the sums of the scaled values.
+	 */
+	if (cnt_all->curbytes != 0) {
+		double sample_period = (double)((uint64_t)1 << lg_prof_sample);
+		double ratio = (((double)cnt_all->curbytes) /
+		    (double)cnt_all->curobjs) / sample_period;
+		double scale_factor = 1.0 / (1.0 - exp(-ratio));
+		uint64_t curbytes = (uint64_t)round(((double)cnt_all->curbytes)
+		    * scale_factor);
+		uint64_t curobjs = (uint64_t)round(((double)cnt_all->curobjs) *
+		    scale_factor);
+
+		malloc_printf("<jemalloc>: Leak approximation summary: ~%"FMTu64
+		    " byte%s, ~%"FMTu64" object%s, >= %zu context%s\n",
+		    curbytes, (curbytes != 1) ? "s" : "", curobjs, (curobjs !=
+		    1) ? "s" : "", leak_ngctx, (leak_ngctx != 1) ? "s" : "");
+		malloc_printf(
+		    "<jemalloc>: Run jeprof on \"%s\" for leak detail\n",
+		    filename);
+	}
+#endif
+}
+
+struct prof_gctx_dump_iter_arg_s {
+	tsdn_t	*tsdn;
+	bool	propagate_err;
+};
+
+static prof_gctx_t *
+prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
+	prof_gctx_t *ret;
+	struct prof_gctx_dump_iter_arg_s *arg =
+	    (struct prof_gctx_dump_iter_arg_s *)opaque;
+
+	malloc_mutex_lock(arg->tsdn, gctx->lock);
+
+	if (prof_dump_gctx(arg->tsdn, arg->propagate_err, gctx, &gctx->bt,
+	    gctxs)) {
+		ret = gctx;
+		goto label_return;
+	}
+
+	ret = NULL;
+label_return:
+	malloc_mutex_unlock(arg->tsdn, gctx->lock);
+	return ret;
+}
+
+static void
+prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata,
+    struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
+    struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
+    prof_gctx_tree_t *gctxs) {
+	size_t tabind;
+	union {
+		prof_gctx_t	*p;
+		void		*v;
+	} gctx;
+
+	prof_enter(tsd, tdata);
+
+	/*
+	 * Put gctx's in limbo and clear their counters in preparation for
+	 * summing.
+	 */
+	gctx_tree_new(gctxs);
+	for (tabind = 0; !ckh_iter(&bt2gctx, &tabind, NULL, &gctx.v);) {
+		prof_dump_gctx_prep(tsd_tsdn(tsd), gctx.p, gctxs);
+	}
+
+	/*
+	 * Iterate over tdatas, and for the non-expired ones snapshot their tctx
+	 * stats and merge them into the associated gctx's.
+	 */
+	prof_tdata_merge_iter_arg->tsdn = tsd_tsdn(tsd);
+	memset(&prof_tdata_merge_iter_arg->cnt_all, 0, sizeof(prof_cnt_t));
+	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
+	tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter,
+	    (void *)prof_tdata_merge_iter_arg);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
+
+	/* Merge tctx stats into gctx's. */
+	prof_gctx_merge_iter_arg->tsdn = tsd_tsdn(tsd);
+	prof_gctx_merge_iter_arg->leak_ngctx = 0;
+	gctx_tree_iter(gctxs, NULL, prof_gctx_merge_iter,
+	    (void *)prof_gctx_merge_iter_arg);
+
+	prof_leave(tsd, tdata);
+}
+
+static bool
+prof_dump_file(tsd_t *tsd, bool propagate_err, const char *filename,
+    bool leakcheck, prof_tdata_t *tdata,
+    struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
+    struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
+    struct prof_gctx_dump_iter_arg_s *prof_gctx_dump_iter_arg,
+    prof_gctx_tree_t *gctxs) {
+	/* Create dump file. */
+	if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1) {
+		return true;
+	}
+
+	/* Dump profile header. */
+	if (prof_dump_header(tsd_tsdn(tsd), propagate_err,
+	    &prof_tdata_merge_iter_arg->cnt_all)) {
+		goto label_write_error;
+	}
+
+	/* Dump per gctx profile stats. */
+	prof_gctx_dump_iter_arg->tsdn = tsd_tsdn(tsd);
+	prof_gctx_dump_iter_arg->propagate_err = propagate_err;
+	if (gctx_tree_iter(gctxs, NULL, prof_gctx_dump_iter,
+	    (void *)prof_gctx_dump_iter_arg) != NULL) {
+		goto label_write_error;
+	}
+
+	/* Dump /proc/<pid>/maps if possible. */
+	if (prof_dump_maps(propagate_err)) {
+		goto label_write_error;
+	}
+
+	if (prof_dump_close(propagate_err)) {
+		return true;
+	}
+
+	return false;
+label_write_error:
+	prof_dump_close(propagate_err);
+	return true;
+}
+
+static bool
+prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
+    bool leakcheck) {
+	cassert(config_prof);
+	assert(tsd_reentrancy_level_get(tsd) == 0);
+
+	prof_tdata_t * tdata = prof_tdata_get(tsd, true);
+	if (tdata == NULL) {
+		return true;
+	}
+
+	pre_reentrancy(tsd, NULL);
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
+
+	prof_gctx_tree_t gctxs;
+	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
+	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
+	struct prof_gctx_dump_iter_arg_s prof_gctx_dump_iter_arg;
+	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
+	    &prof_gctx_merge_iter_arg, &gctxs);
+	bool err = prof_dump_file(tsd, propagate_err, filename, leakcheck, tdata,
+	    &prof_tdata_merge_iter_arg, &prof_gctx_merge_iter_arg,
+	    &prof_gctx_dump_iter_arg, &gctxs);
+	prof_gctx_finish(tsd, &gctxs);
+
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
+	post_reentrancy(tsd);
+
+	if (err) {
+		return true;
+	}
+
+	if (leakcheck) {
+		prof_leakcheck(&prof_tdata_merge_iter_arg.cnt_all,
+		    prof_gctx_merge_iter_arg.leak_ngctx, filename);
+	}
+	return false;
+}
+
+#ifdef JEMALLOC_JET
+void
+prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
+    uint64_t *accumbytes) {
+	tsd_t *tsd;
+	prof_tdata_t *tdata;
+	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
+	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
+	prof_gctx_tree_t gctxs;
+
+	tsd = tsd_fetch();
+	tdata = prof_tdata_get(tsd, false);
+	if (tdata == NULL) {
+		if (curobjs != NULL) {
+			*curobjs = 0;
+		}
+		if (curbytes != NULL) {
+			*curbytes = 0;
+		}
+		if (accumobjs != NULL) {
+			*accumobjs = 0;
+		}
+		if (accumbytes != NULL) {
+			*accumbytes = 0;
+		}
+		return;
+	}
+
+	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
+	    &prof_gctx_merge_iter_arg, &gctxs);
+	prof_gctx_finish(tsd, &gctxs);
+
+	if (curobjs != NULL) {
+		*curobjs = prof_tdata_merge_iter_arg.cnt_all.curobjs;
+	}
+	if (curbytes != NULL) {
+		*curbytes = prof_tdata_merge_iter_arg.cnt_all.curbytes;
+	}
+	if (accumobjs != NULL) {
+		*accumobjs = prof_tdata_merge_iter_arg.cnt_all.accumobjs;
+	}
+	if (accumbytes != NULL) {
+		*accumbytes = prof_tdata_merge_iter_arg.cnt_all.accumbytes;
+	}
+}
+#endif
+
 #define DUMP_FILENAME_BUFSIZE	(PATH_MAX + 1)
 #define VSEQ_INVALID		UINT64_C(0xffffffffffffffff)
 static void
@@ -654,6 +1877,28 @@ prof_gdump(tsdn_t *tsdn) {
 	}
 }
 
+void
+prof_bt_hash(const void *key, size_t r_hash[2]) {
+	prof_bt_t *bt = (prof_bt_t *)key;
+
+	cassert(config_prof);
+
+	hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash);
+}
+
+bool
+prof_bt_keycomp(const void *k1, const void *k2) {
+	const prof_bt_t *bt1 = (prof_bt_t *)k1;
+	const prof_bt_t *bt2 = (prof_bt_t *)k2;
+
+	cassert(config_prof);
+
+	if (bt1->len != bt2->len) {
+		return false;
+	}
+	return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
+}
+
 static uint64_t
 prof_thr_uid_alloc(tsdn_t *tsdn) {
 	uint64_t thr_uid;
@@ -666,33 +1911,124 @@ prof_thr_uid_alloc(tsdn_t *tsdn) {
 	return thr_uid;
 }
 
+static prof_tdata_t *
+prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
+    char *thread_name, bool active) {
+	prof_tdata_t *tdata;
+
+	cassert(config_prof);
+
+	/* Initialize an empty cache for this thread. */
+	tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd), sizeof(prof_tdata_t),
+	    sz_size2index(sizeof(prof_tdata_t)), false, NULL, true,
+	    arena_get(TSDN_NULL, 0, true), true);
+	if (tdata == NULL) {
+		return NULL;
+	}
+
+	tdata->lock = prof_tdata_mutex_choose(thr_uid);
+	tdata->thr_uid = thr_uid;
+	tdata->thr_discrim = thr_discrim;
+	tdata->thread_name = thread_name;
+	tdata->attached = true;
+	tdata->expired = false;
+	tdata->tctx_uid_next = 0;
+
+	if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash,
+	    prof_bt_keycomp)) {
+		idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
+		return NULL;
+	}
+
+	tdata->prng_state = (uint64_t)(uintptr_t)tdata;
+	prof_sample_threshold_update(tdata);
+
+	tdata->enq = false;
+	tdata->enq_idump = false;
+	tdata->enq_gdump = false;
+
+	tdata->dumping = false;
+	tdata->active = active;
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
+	tdata_tree_insert(&tdatas, tdata);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
+
+	return tdata;
+}
+
 prof_tdata_t *
 prof_tdata_init(tsd_t *tsd) {
 	return prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0,
 	    NULL, prof_thread_active_init_get(tsd_tsdn(tsd)));
 }
 
-static char *
-prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name) {
-	char *ret;
-	size_t size;
-
-	if (thread_name == NULL) {
-		return NULL;
+static bool
+prof_tdata_should_destroy_unlocked(prof_tdata_t *tdata, bool even_if_attached) {
+	if (tdata->attached && !even_if_attached) {
+		return false;
 	}
-
-	size = strlen(thread_name) + 1;
-	if (size == 1) {
-		return "";
+	if (ckh_count(&tdata->bt2tctx) != 0) {
+		return false;
 	}
+	return true;
+}
 
-	ret = iallocztm(tsdn, size, sz_size2index(size), false, NULL, true,
-	    arena_get(TSDN_NULL, 0, true), true);
-	if (ret == NULL) {
-		return NULL;
+static bool
+prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
+    bool even_if_attached) {
+	malloc_mutex_assert_owner(tsdn, tdata->lock);
+
+	return prof_tdata_should_destroy_unlocked(tdata, even_if_attached);
+}
+
+static void
+prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
+    bool even_if_attached) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tdatas_mtx);
+
+	tdata_tree_remove(&tdatas, tdata);
+
+	assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
+
+	if (tdata->thread_name != NULL) {
+		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
+		    true);
+	}
+	ckh_delete(tsd, &tdata->bt2tctx);
+	idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
+}
+
+static void
+prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached) {
+	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
+	prof_tdata_destroy_locked(tsd, tdata, even_if_attached);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
+}
+
+static void
+prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata) {
+	bool destroy_tdata;
+
+	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
+	if (tdata->attached) {
+		destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata,
+		    true);
+		/*
+		 * Only detach if !destroy_tdata, because detaching would allow
+		 * another thread to win the race to destroy tdata.
+		 */
+		if (!destroy_tdata) {
+			tdata->attached = false;
+		}
+		tsd_prof_tdata_set(tsd, NULL);
+	} else {
+		destroy_tdata = false;
+	}
+	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
+	if (destroy_tdata) {
+		prof_tdata_destroy(tsd, tdata, true);
 	}
-	memcpy(ret, thread_name, size);
-	return ret;
 }
 
 prof_tdata_t *
@@ -708,6 +2044,58 @@ prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) {
 	    active);
 }
 
+static bool
+prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata) {
+	bool destroy_tdata;
+
+	malloc_mutex_lock(tsdn, tdata->lock);
+	if (!tdata->expired) {
+		tdata->expired = true;
+		destroy_tdata = tdata->attached ? false :
+		    prof_tdata_should_destroy(tsdn, tdata, false);
+	} else {
+		destroy_tdata = false;
+	}
+	malloc_mutex_unlock(tsdn, tdata->lock);
+
+	return destroy_tdata;
+}
+
+static prof_tdata_t *
+prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
+    void *arg) {
+	tsdn_t *tsdn = (tsdn_t *)arg;
+
+	return (prof_tdata_expire(tsdn, tdata) ? tdata : NULL);
+}
+
+void
+prof_reset(tsd_t *tsd, size_t lg_sample) {
+	prof_tdata_t *next;
+
+	assert(lg_sample < (sizeof(uint64_t) << 3));
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
+
+	lg_prof_sample = lg_sample;
+
+	next = NULL;
+	do {
+		prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next,
+		    prof_tdata_reset_iter, (void *)tsd);
+		if (to_destroy != NULL) {
+			next = tdata_tree_next(&tdatas, to_destroy);
+			prof_tdata_destroy_locked(tsd, to_destroy, false);
+		} else {
+			next = NULL;
+		}
+	} while (next != NULL);
+
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
+}
+
 void
 prof_tdata_cleanup(tsd_t *tsd) {
 	prof_tdata_t *tdata;
@@ -754,6 +2142,29 @@ prof_thread_name_get(tsd_t *tsd) {
 	return (tdata->thread_name != NULL ? tdata->thread_name : "");
 }
 
+static char *
+prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name) {
+	char *ret;
+	size_t size;
+
+	if (thread_name == NULL) {
+		return NULL;
+	}
+
+	size = strlen(thread_name) + 1;
+	if (size == 1) {
+		return "";
+	}
+
+	ret = iallocztm(tsdn, size, sz_size2index(size), false, NULL, true,
+	    arena_get(TSDN_NULL, 0, true), true);
+	if (ret == NULL) {
+		return NULL;
+	}
+	memcpy(ret, thread_name, size);
+	return ret;
+}
+
 int
 prof_thread_name_set(tsd_t *tsd, const char *thread_name) {
 	prof_tdata_t *tdata;
@@ -918,15 +2329,16 @@ prof_boot2(tsd_t *tsd) {
 			return true;
 		}
 
-		if (prof_data_init(tsd)) {
+		if (ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash,
+		    prof_bt_keycomp)) {
 			return true;
 		}
-
 		if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx",
 		    WITNESS_RANK_PROF_BT2GCTX, malloc_mutex_rank_exclusive)) {
 			return true;
 		}
 
+		tdata_tree_new(&tdatas);
 		if (malloc_mutex_init(&tdatas_mtx, "prof_tdatas",
 		    WITNESS_RANK_PROF_TDATAS, malloc_mutex_rank_exclusive)) {
 			return true;
diff --git a/src/prof_data.c b/src/prof_data.c
deleted file mode 100644
index a4cb749f..00000000
--- a/src/prof_data.c
+++ /dev/null
@@ -1,1440 +0,0 @@
-#define JEMALLOC_PROF_C_
-#include "jemalloc/internal/jemalloc_preamble.h"
-#include "jemalloc/internal/jemalloc_internal_includes.h"
-
-#include "jemalloc/internal/assert.h"
-#include "jemalloc/internal/ckh.h"
-#include "jemalloc/internal/hash.h"
-#include "jemalloc/internal/malloc_io.h"
-
-/*
- * This file defines and manages the core profiling data structures.
- *
- * Conceptually, profiling data can be imagined as a table with three columns:
- * thread, stack trace, and current allocation size.  (When prof_accum is on,
- * there's one additional column which is the cumulative allocation size.)
- *
- * Implementation wise, each thread maintains a hash recording the stack trace
- * to allocation size correspondences, which are basically the individual rows
- * in the table.  In addition, two global "indices" are built to make data
- * aggregation efficient (for dumping): bt2gctx and tdatas, which are basically
- * the "grouped by stack trace" and "grouped by thread" views of the same table,
- * respectively.  Note that the allocation size is only aggregated to the two
- * indices at dumping time, so as to optimize for performance.
- */
-
-/******************************************************************************/
-
-/*
- * Global hash of (prof_bt_t *)-->(prof_gctx_t *).  This is the master data
- * structure that knows about all backtraces currently captured.
- */
-static ckh_t		bt2gctx;
-
-/*
- * Tree of all extant prof_tdata_t structures, regardless of state,
- * {attached,detached,expired}.
- */
-static prof_tdata_tree_t	tdatas;
-
-/*
- * This buffer is rather large for stack allocation, so use a single buffer for
- * all profile dumps.
- */
-static char		prof_dump_buf[
-    /* Minimize memory bloat for non-prof builds. */
-#ifdef JEMALLOC_PROF
-    PROF_DUMP_BUFSIZE
-#else
-    1
-#endif
-];
-static size_t		prof_dump_buf_end;
-static int		prof_dump_fd;
-
-/******************************************************************************/
-/* Red-black trees. */
-
-static int
-prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b) {
-	uint64_t a_thr_uid = a->thr_uid;
-	uint64_t b_thr_uid = b->thr_uid;
-	int ret = (a_thr_uid > b_thr_uid) - (a_thr_uid < b_thr_uid);
-	if (ret == 0) {
-		uint64_t a_thr_discrim = a->thr_discrim;
-		uint64_t b_thr_discrim = b->thr_discrim;
-		ret = (a_thr_discrim > b_thr_discrim) - (a_thr_discrim <
-		    b_thr_discrim);
-		if (ret == 0) {
-			uint64_t a_tctx_uid = a->tctx_uid;
-			uint64_t b_tctx_uid = b->tctx_uid;
-			ret = (a_tctx_uid > b_tctx_uid) - (a_tctx_uid <
-			    b_tctx_uid);
-		}
-	}
-	return ret;
-}
-
-rb_gen(static UNUSED, tctx_tree_, prof_tctx_tree_t, prof_tctx_t,
-    tctx_link, prof_tctx_comp)
-
-static int
-prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b) {
-	unsigned a_len = a->bt.len;
-	unsigned b_len = b->bt.len;
-	unsigned comp_len = (a_len < b_len) ? a_len : b_len;
-	int ret = memcmp(a->bt.vec, b->bt.vec, comp_len * sizeof(void *));
-	if (ret == 0) {
-		ret = (a_len > b_len) - (a_len < b_len);
-	}
-	return ret;
-}
-
-rb_gen(static UNUSED, gctx_tree_, prof_gctx_tree_t, prof_gctx_t, dump_link,
-    prof_gctx_comp)
-
-static int
-prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b) {
-	int ret;
-	uint64_t a_uid = a->thr_uid;
-	uint64_t b_uid = b->thr_uid;
-
-	ret = ((a_uid > b_uid) - (a_uid < b_uid));
-	if (ret == 0) {
-		uint64_t a_discrim = a->thr_discrim;
-		uint64_t b_discrim = b->thr_discrim;
-
-		ret = ((a_discrim > b_discrim) - (a_discrim < b_discrim));
-	}
-	return ret;
-}
-
-rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link,
-    prof_tdata_comp)
-
-/******************************************************************************/
-
-bool
-prof_data_init(tsd_t *tsd) {
-	tdata_tree_new(&tdatas);
-	return ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS,
-	    prof_bt_hash, prof_bt_keycomp);
-}
-
-static void
-prof_enter(tsd_t *tsd, prof_tdata_t *tdata) {
-	cassert(config_prof);
-	assert(tdata == prof_tdata_get(tsd, false));
-
-	if (tdata != NULL) {
-		assert(!tdata->enq);
-		tdata->enq = true;
-	}
-
-	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
-}
-
-static void
-prof_leave(tsd_t *tsd, prof_tdata_t *tdata) {
-	cassert(config_prof);
-	assert(tdata == prof_tdata_get(tsd, false));
-
-	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
-
-	if (tdata != NULL) {
-		bool idump, gdump;
-
-		assert(tdata->enq);
-		tdata->enq = false;
-		idump = tdata->enq_idump;
-		tdata->enq_idump = false;
-		gdump = tdata->enq_gdump;
-		tdata->enq_gdump = false;
-
-		if (idump) {
-			prof_idump(tsd_tsdn(tsd));
-		}
-		if (gdump) {
-			prof_gdump(tsd_tsdn(tsd));
-		}
-	}
-}
-
-static prof_gctx_t *
-prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt) {
-	/*
-	 * Create a single allocation that has space for vec of length bt->len.
-	 */
-	size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *));
-	prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsdn, size,
-	    sz_size2index(size), false, NULL, true, arena_get(TSDN_NULL, 0, true),
-	    true);
-	if (gctx == NULL) {
-		return NULL;
-	}
-	gctx->lock = prof_gctx_mutex_choose();
-	/*
-	 * Set nlimbo to 1, in order to avoid a race condition with
-	 * prof_tctx_destroy()/prof_gctx_try_destroy().
-	 */
-	gctx->nlimbo = 1;
-	tctx_tree_new(&gctx->tctxs);
-	/* Duplicate bt. */
-	memcpy(gctx->vec, bt->vec, bt->len * sizeof(void *));
-	gctx->bt.vec = gctx->vec;
-	gctx->bt.len = bt->len;
-	return gctx;
-}
-
-static void
-prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
-    prof_tdata_t *tdata) {
-	cassert(config_prof);
-
-	/*
-	 * Check that gctx is still unused by any thread cache before destroying
-	 * it.  prof_lookup() increments gctx->nlimbo in order to avoid a race
-	 * condition with this function, as does prof_tctx_destroy() in order to
-	 * avoid a race between the main body of prof_tctx_destroy() and entry
-	 * into this function.
-	 */
-	prof_enter(tsd, tdata_self);
-	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
-	assert(gctx->nlimbo != 0);
-	if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) {
-		/* Remove gctx from bt2gctx. */
-		if (ckh_remove(tsd, &bt2gctx, &gctx->bt, NULL, NULL)) {
-			not_reached();
-		}
-		prof_leave(tsd, tdata_self);
-		/* Destroy gctx. */
-		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
-		idalloctm(tsd_tsdn(tsd), gctx, NULL, NULL, true, true);
-	} else {
-		/*
-		 * Compensate for increment in prof_tctx_destroy() or
-		 * prof_lookup().
-		 */
-		gctx->nlimbo--;
-		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
-		prof_leave(tsd, tdata_self);
-	}
-}
-
-static bool
-prof_gctx_should_destroy(prof_gctx_t *gctx) {
-	if (opt_prof_accum) {
-		return false;
-	}
-	if (!tctx_tree_empty(&gctx->tctxs)) {
-		return false;
-	}
-	if (gctx->nlimbo != 0) {
-		return false;
-	}
-	return true;
-}
-
-static bool
-prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
-    void **p_btkey, prof_gctx_t **p_gctx, bool *p_new_gctx) {
-	union {
-		prof_gctx_t	*p;
-		void		*v;
-	} gctx, tgctx;
-	union {
-		prof_bt_t	*p;
-		void		*v;
-	} btkey;
-	bool new_gctx;
-
-	prof_enter(tsd, tdata);
-	if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
-		/* bt has never been seen before.  Insert it. */
-		prof_leave(tsd, tdata);
-		tgctx.p = prof_gctx_create(tsd_tsdn(tsd), bt);
-		if (tgctx.v == NULL) {
-			return true;
-		}
-		prof_enter(tsd, tdata);
-		if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
-			gctx.p = tgctx.p;
-			btkey.p = &gctx.p->bt;
-			if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) {
-				/* OOM. */
-				prof_leave(tsd, tdata);
-				idalloctm(tsd_tsdn(tsd), gctx.v, NULL, NULL,
-				    true, true);
-				return true;
-			}
-			new_gctx = true;
-		} else {
-			new_gctx = false;
-		}
-	} else {
-		tgctx.v = NULL;
-		new_gctx = false;
-	}
-
-	if (!new_gctx) {
-		/*
-		 * Increment nlimbo, in order to avoid a race condition with
-		 * prof_tctx_destroy()/prof_gctx_try_destroy().
-		 */
-		malloc_mutex_lock(tsd_tsdn(tsd), gctx.p->lock);
-		gctx.p->nlimbo++;
-		malloc_mutex_unlock(tsd_tsdn(tsd), gctx.p->lock);
-		new_gctx = false;
-
-		if (tgctx.v != NULL) {
-			/* Lost race to insert. */
-			idalloctm(tsd_tsdn(tsd), tgctx.v, NULL, NULL, true,
-			    true);
-		}
-	}
-	prof_leave(tsd, tdata);
-
-	*p_btkey = btkey.v;
-	*p_gctx = gctx.p;
-	*p_new_gctx = new_gctx;
-	return false;
-}
-
-prof_tctx_t *
-prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
-	union {
-		prof_tctx_t	*p;
-		void		*v;
-	} ret;
-	prof_tdata_t *tdata;
-	bool not_found;
-
-	cassert(config_prof);
-
-	tdata = prof_tdata_get(tsd, false);
-	if (tdata == NULL) {
-		return NULL;
-	}
-
-	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
-	not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v);
-	if (!not_found) { /* Note double negative! */
-		ret.p->prepared = true;
-	}
-	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
-	if (not_found) {
-		void *btkey;
-		prof_gctx_t *gctx;
-		bool new_gctx, error;
-
-		/*
-		 * This thread's cache lacks bt.  Look for it in the global
-		 * cache.
-		 */
-		if (prof_lookup_global(tsd, bt, tdata, &btkey, &gctx,
-		    &new_gctx)) {
-			return NULL;
-		}
-
-		/* Link a prof_tctx_t into gctx for this thread. */
-		ret.v = iallocztm(tsd_tsdn(tsd), sizeof(prof_tctx_t),
-		    sz_size2index(sizeof(prof_tctx_t)), false, NULL, true,
-		    arena_ichoose(tsd, NULL), true);
-		if (ret.p == NULL) {
-			if (new_gctx) {
-				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
-			}
-			return NULL;
-		}
-		ret.p->tdata = tdata;
-		ret.p->thr_uid = tdata->thr_uid;
-		ret.p->thr_discrim = tdata->thr_discrim;
-		memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
-		ret.p->gctx = gctx;
-		ret.p->tctx_uid = tdata->tctx_uid_next++;
-		ret.p->prepared = true;
-		ret.p->state = prof_tctx_state_initializing;
-		malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
-		error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v);
-		malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
-		if (error) {
-			if (new_gctx) {
-				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
-			}
-			idalloctm(tsd_tsdn(tsd), ret.v, NULL, NULL, true, true);
-			return NULL;
-		}
-		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
-		ret.p->state = prof_tctx_state_nominal;
-		tctx_tree_insert(&gctx->tctxs, ret.p);
-		gctx->nlimbo--;
-		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
-	}
-
-	return ret.p;
-}
-
-#ifdef JEMALLOC_JET
-static prof_tdata_t *
-prof_tdata_count_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
-    void *arg) {
-	size_t *tdata_count = (size_t *)arg;
-
-	(*tdata_count)++;
-
-	return NULL;
-}
-
-size_t
-prof_tdata_count(void) {
-	size_t tdata_count = 0;
-	tsdn_t *tsdn;
-
-	tsdn = tsdn_fetch();
-	malloc_mutex_lock(tsdn, &tdatas_mtx);
-	tdata_tree_iter(&tdatas, NULL, prof_tdata_count_iter,
-	    (void *)&tdata_count);
-	malloc_mutex_unlock(tsdn, &tdatas_mtx);
-
-	return tdata_count;
-}
-
-size_t
-prof_bt_count(void) {
-	size_t bt_count;
-	tsd_t *tsd;
-	prof_tdata_t *tdata;
-
-	tsd = tsd_fetch();
-	tdata = prof_tdata_get(tsd, false);
-	if (tdata == NULL) {
-		return 0;
-	}
-
-	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
-	bt_count = ckh_count(&bt2gctx);
-	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
-
-	return bt_count;
-}
-#endif
-
-static int
-prof_dump_open_impl(bool propagate_err, const char *filename) {
-	int fd;
-
-	fd = creat(filename, 0644);
-	if (fd == -1 && !propagate_err) {
-		malloc_printf("<jemalloc>: creat(\"%s\"), 0644) failed\n",
-		    filename);
-		if (opt_abort) {
-			abort();
-		}
-	}
-
-	return fd;
-}
-prof_dump_open_t *JET_MUTABLE prof_dump_open = prof_dump_open_impl;
-
-static bool
-prof_dump_flush(bool propagate_err) {
-	bool ret = false;
-	ssize_t err;
-
-	cassert(config_prof);
-
-	err = malloc_write_fd(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
-	if (err == -1) {
-		if (!propagate_err) {
-			malloc_write("<jemalloc>: write() failed during heap "
-			    "profile flush\n");
-			if (opt_abort) {
-				abort();
-			}
-		}
-		ret = true;
-	}
-	prof_dump_buf_end = 0;
-
-	return ret;
-}
-
-static bool
-prof_dump_close(bool propagate_err) {
-	bool ret;
-
-	assert(prof_dump_fd != -1);
-	ret = prof_dump_flush(propagate_err);
-	close(prof_dump_fd);
-	prof_dump_fd = -1;
-
-	return ret;
-}
-
-static bool
-prof_dump_write(bool propagate_err, const char *s) {
-	size_t i, slen, n;
-
-	cassert(config_prof);
-
-	i = 0;
-	slen = strlen(s);
-	while (i < slen) {
-		/* Flush the buffer if it is full. */
-		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
-			if (prof_dump_flush(propagate_err) && propagate_err) {
-				return true;
-			}
-		}
-
-		if (prof_dump_buf_end + slen - i <= PROF_DUMP_BUFSIZE) {
-			/* Finish writing. */
-			n = slen - i;
-		} else {
-			/* Write as much of s as will fit. */
-			n = PROF_DUMP_BUFSIZE - prof_dump_buf_end;
-		}
-		memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n);
-		prof_dump_buf_end += n;
-		i += n;
-	}
-
-	return false;
-}
-
-JEMALLOC_FORMAT_PRINTF(2, 3)
-static bool
-prof_dump_printf(bool propagate_err, const char *format, ...) {
-	bool ret;
-	va_list ap;
-	char buf[PROF_PRINTF_BUFSIZE];
-
-	va_start(ap, format);
-	malloc_vsnprintf(buf, sizeof(buf), format, ap);
-	va_end(ap);
-	ret = prof_dump_write(propagate_err, buf);
-
-	return ret;
-}
-
-static void
-prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata) {
-	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
-
-	malloc_mutex_lock(tsdn, tctx->gctx->lock);
-
-	switch (tctx->state) {
-	case prof_tctx_state_initializing:
-		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
-		return;
-	case prof_tctx_state_nominal:
-		tctx->state = prof_tctx_state_dumping;
-		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
-
-		memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t));
-
-		tdata->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
-		tdata->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
-		if (opt_prof_accum) {
-			tdata->cnt_summed.accumobjs +=
-			    tctx->dump_cnts.accumobjs;
-			tdata->cnt_summed.accumbytes +=
-			    tctx->dump_cnts.accumbytes;
-		}
-		break;
-	case prof_tctx_state_dumping:
-	case prof_tctx_state_purgatory:
-		not_reached();
-	}
-}
-
-static void
-prof_tctx_merge_gctx(tsdn_t *tsdn, prof_tctx_t *tctx, prof_gctx_t *gctx) {
-	malloc_mutex_assert_owner(tsdn, gctx->lock);
-
-	gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
-	gctx->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
-	if (opt_prof_accum) {
-		gctx->cnt_summed.accumobjs += tctx->dump_cnts.accumobjs;
-		gctx->cnt_summed.accumbytes += tctx->dump_cnts.accumbytes;
-	}
-}
-
-static prof_tctx_t *
-prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
-	tsdn_t *tsdn = (tsdn_t *)arg;
-
-	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
-
-	switch (tctx->state) {
-	case prof_tctx_state_nominal:
-		/* New since dumping started; ignore. */
-		break;
-	case prof_tctx_state_dumping:
-	case prof_tctx_state_purgatory:
-		prof_tctx_merge_gctx(tsdn, tctx, tctx->gctx);
-		break;
-	default:
-		not_reached();
-	}
-
-	return NULL;
-}
-
-struct prof_tctx_dump_iter_arg_s {
-	tsdn_t	*tsdn;
-	bool	propagate_err;
-};
-
-static prof_tctx_t *
-prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) {
-	struct prof_tctx_dump_iter_arg_s *arg =
-	    (struct prof_tctx_dump_iter_arg_s *)opaque;
-
-	malloc_mutex_assert_owner(arg->tsdn, tctx->gctx->lock);
-
-	switch (tctx->state) {
-	case prof_tctx_state_initializing:
-	case prof_tctx_state_nominal:
-		/* Not captured by this dump. */
-		break;
-	case prof_tctx_state_dumping:
-	case prof_tctx_state_purgatory:
-		if (prof_dump_printf(arg->propagate_err,
-		    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": "
-		    "%"FMTu64"]\n", tctx->thr_uid, tctx->dump_cnts.curobjs,
-		    tctx->dump_cnts.curbytes, tctx->dump_cnts.accumobjs,
-		    tctx->dump_cnts.accumbytes)) {
-			return tctx;
-		}
-		break;
-	default:
-		not_reached();
-	}
-	return NULL;
-}
-
-static prof_tctx_t *
-prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
-	tsdn_t *tsdn = (tsdn_t *)arg;
-	prof_tctx_t *ret;
-
-	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
-
-	switch (tctx->state) {
-	case prof_tctx_state_nominal:
-		/* New since dumping started; ignore. */
-		break;
-	case prof_tctx_state_dumping:
-		tctx->state = prof_tctx_state_nominal;
-		break;
-	case prof_tctx_state_purgatory:
-		ret = tctx;
-		goto label_return;
-	default:
-		not_reached();
-	}
-
-	ret = NULL;
-label_return:
-	return ret;
-}
-
-static void
-prof_dump_gctx_prep(tsdn_t *tsdn, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs) {
-	cassert(config_prof);
-
-	malloc_mutex_lock(tsdn, gctx->lock);
-
-	/*
-	 * Increment nlimbo so that gctx won't go away before dump.
-	 * Additionally, link gctx into the dump list so that it is included in
-	 * prof_dump()'s second pass.
-	 */
-	gctx->nlimbo++;
-	gctx_tree_insert(gctxs, gctx);
-
-	memset(&gctx->cnt_summed, 0, sizeof(prof_cnt_t));
-
-	malloc_mutex_unlock(tsdn, gctx->lock);
-}
-
-struct prof_gctx_merge_iter_arg_s {
-	tsdn_t	*tsdn;
-	size_t	leak_ngctx;
-};
-
-static prof_gctx_t *
-prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
-	struct prof_gctx_merge_iter_arg_s *arg =
-	    (struct prof_gctx_merge_iter_arg_s *)opaque;
-
-	malloc_mutex_lock(arg->tsdn, gctx->lock);
-	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter,
-	    (void *)arg->tsdn);
-	if (gctx->cnt_summed.curobjs != 0) {
-		arg->leak_ngctx++;
-	}
-	malloc_mutex_unlock(arg->tsdn, gctx->lock);
-
-	return NULL;
-}
-
-static void
-prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) {
-	prof_tdata_t *tdata = prof_tdata_get(tsd, false);
-	prof_gctx_t *gctx;
-
-	/*
-	 * Standard tree iteration won't work here, because as soon as we
-	 * decrement gctx->nlimbo and unlock gctx, another thread can
-	 * concurrently destroy it, which will corrupt the tree.  Therefore,
-	 * tear down the tree one node at a time during iteration.
-	 */
-	while ((gctx = gctx_tree_first(gctxs)) != NULL) {
-		gctx_tree_remove(gctxs, gctx);
-		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
-		{
-			prof_tctx_t *next;
-
-			next = NULL;
-			do {
-				prof_tctx_t *to_destroy =
-				    tctx_tree_iter(&gctx->tctxs, next,
-				    prof_tctx_finish_iter,
-				    (void *)tsd_tsdn(tsd));
-				if (to_destroy != NULL) {
-					next = tctx_tree_next(&gctx->tctxs,
-					    to_destroy);
-					tctx_tree_remove(&gctx->tctxs,
-					    to_destroy);
-					idalloctm(tsd_tsdn(tsd), to_destroy,
-					    NULL, NULL, true, true);
-				} else {
-					next = NULL;
-				}
-			} while (next != NULL);
-		}
-		gctx->nlimbo--;
-		if (prof_gctx_should_destroy(gctx)) {
-			gctx->nlimbo++;
-			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
-			prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
-		} else {
-			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
-		}
-	}
-}
-
-struct prof_tdata_merge_iter_arg_s {
-	tsdn_t		*tsdn;
-	prof_cnt_t	cnt_all;
-};
-
-static prof_tdata_t *
-prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
-    void *opaque) {
-	struct prof_tdata_merge_iter_arg_s *arg =
-	    (struct prof_tdata_merge_iter_arg_s *)opaque;
-
-	malloc_mutex_lock(arg->tsdn, tdata->lock);
-	if (!tdata->expired) {
-		size_t tabind;
-		union {
-			prof_tctx_t	*p;
-			void		*v;
-		} tctx;
-
-		tdata->dumping = true;
-		memset(&tdata->cnt_summed, 0, sizeof(prof_cnt_t));
-		for (tabind = 0; !ckh_iter(&tdata->bt2tctx, &tabind, NULL,
-		    &tctx.v);) {
-			prof_tctx_merge_tdata(arg->tsdn, tctx.p, tdata);
-		}
-
-		arg->cnt_all.curobjs += tdata->cnt_summed.curobjs;
-		arg->cnt_all.curbytes += tdata->cnt_summed.curbytes;
-		if (opt_prof_accum) {
-			arg->cnt_all.accumobjs += tdata->cnt_summed.accumobjs;
-			arg->cnt_all.accumbytes += tdata->cnt_summed.accumbytes;
-		}
-	} else {
-		tdata->dumping = false;
-	}
-	malloc_mutex_unlock(arg->tsdn, tdata->lock);
-
-	return NULL;
-}
-
-static prof_tdata_t *
-prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
-    void *arg) {
-	bool propagate_err = *(bool *)arg;
-
-	if (!tdata->dumping) {
-		return NULL;
-	}
-
-	if (prof_dump_printf(propagate_err,
-	    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]%s%s\n",
-	    tdata->thr_uid, tdata->cnt_summed.curobjs,
-	    tdata->cnt_summed.curbytes, tdata->cnt_summed.accumobjs,
-	    tdata->cnt_summed.accumbytes,
-	    (tdata->thread_name != NULL) ? " " : "",
-	    (tdata->thread_name != NULL) ? tdata->thread_name : "")) {
-		return tdata;
-	}
-	return NULL;
-}
-
-static bool
-prof_dump_header_impl(tsdn_t *tsdn, bool propagate_err,
-    const prof_cnt_t *cnt_all) {
-	bool ret;
-
-	if (prof_dump_printf(propagate_err,
-	    "heap_v2/%"FMTu64"\n"
-	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
-	    ((uint64_t)1U << lg_prof_sample), cnt_all->curobjs,
-	    cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes)) {
-		return true;
-	}
-
-	malloc_mutex_lock(tsdn, &tdatas_mtx);
-	ret = (tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter,
-	    (void *)&propagate_err) != NULL);
-	malloc_mutex_unlock(tsdn, &tdatas_mtx);
-	return ret;
-}
-prof_dump_header_t *JET_MUTABLE prof_dump_header = prof_dump_header_impl;
-
-static bool
-prof_dump_gctx(tsdn_t *tsdn, bool propagate_err, prof_gctx_t *gctx,
-    const prof_bt_t *bt, prof_gctx_tree_t *gctxs) {
-	bool ret;
-	unsigned i;
-	struct prof_tctx_dump_iter_arg_s prof_tctx_dump_iter_arg;
-
-	cassert(config_prof);
-	malloc_mutex_assert_owner(tsdn, gctx->lock);
-
-	/* Avoid dumping such gctx's that have no useful data. */
-	if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0) ||
-	    (opt_prof_accum && gctx->cnt_summed.accumobjs == 0)) {
-		assert(gctx->cnt_summed.curobjs == 0);
-		assert(gctx->cnt_summed.curbytes == 0);
-		assert(gctx->cnt_summed.accumobjs == 0);
-		assert(gctx->cnt_summed.accumbytes == 0);
-		ret = false;
-		goto label_return;
-	}
-
-	if (prof_dump_printf(propagate_err, "@")) {
-		ret = true;
-		goto label_return;
-	}
-	for (i = 0; i < bt->len; i++) {
-		if (prof_dump_printf(propagate_err, " %#"FMTxPTR,
-		    (uintptr_t)bt->vec[i])) {
-			ret = true;
-			goto label_return;
-		}
-	}
-
-	if (prof_dump_printf(propagate_err,
-	    "\n"
-	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
-	    gctx->cnt_summed.curobjs, gctx->cnt_summed.curbytes,
-	    gctx->cnt_summed.accumobjs, gctx->cnt_summed.accumbytes)) {
-		ret = true;
-		goto label_return;
-	}
-
-	prof_tctx_dump_iter_arg.tsdn = tsdn;
-	prof_tctx_dump_iter_arg.propagate_err = propagate_err;
-	if (tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter,
-	    (void *)&prof_tctx_dump_iter_arg) != NULL) {
-		ret = true;
-		goto label_return;
-	}
-
-	ret = false;
-label_return:
-	return ret;
-}
-
-#ifndef _WIN32
-JEMALLOC_FORMAT_PRINTF(1, 2)
-static int
-prof_open_maps(const char *format, ...) {
-	int mfd;
-	va_list ap;
-	char filename[PATH_MAX + 1];
-
-	va_start(ap, format);
-	malloc_vsnprintf(filename, sizeof(filename), format, ap);
-	va_end(ap);
-
-#if defined(O_CLOEXEC)
-	mfd = open(filename, O_RDONLY | O_CLOEXEC);
-#else
-	mfd = open(filename, O_RDONLY);
-	if (mfd != -1) {
-		fcntl(mfd, F_SETFD, fcntl(mfd, F_GETFD) | FD_CLOEXEC);
-	}
-#endif
-
-	return mfd;
-}
-#endif
-
-static bool
-prof_dump_maps(bool propagate_err) {
-	bool ret;
-	int mfd;
-
-	cassert(config_prof);
-#ifdef __FreeBSD__
-	mfd = prof_open_maps("/proc/curproc/map");
-#elif defined(_WIN32)
-	mfd = -1; // Not implemented
-#else
-	{
-		int pid = prof_getpid();
-
-		mfd = prof_open_maps("/proc/%d/task/%d/maps", pid, pid);
-		if (mfd == -1) {
-			mfd = prof_open_maps("/proc/%d/maps", pid);
-		}
-	}
-#endif
-	if (mfd != -1) {
-		ssize_t nread;
-
-		if (prof_dump_write(propagate_err, "\nMAPPED_LIBRARIES:\n") &&
-		    propagate_err) {
-			ret = true;
-			goto label_return;
-		}
-		nread = 0;
-		do {
-			prof_dump_buf_end += nread;
-			if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
-				/* Make space in prof_dump_buf before read(). */
-				if (prof_dump_flush(propagate_err) &&
-				    propagate_err) {
-					ret = true;
-					goto label_return;
-				}
-			}
-			nread = malloc_read_fd(mfd,
-			    &prof_dump_buf[prof_dump_buf_end], PROF_DUMP_BUFSIZE
-			    - prof_dump_buf_end);
-		} while (nread > 0);
-	} else {
-		ret = true;
-		goto label_return;
-	}
-
-	ret = false;
-label_return:
-	if (mfd != -1) {
-		close(mfd);
-	}
-	return ret;
-}
-
-/*
- * See prof_sample_threshold_update() comment for why the body of this function
- * is conditionally compiled.
- */
-static void
-prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx,
-    const char *filename) {
-#ifdef JEMALLOC_PROF
-	/*
-	 * Scaling is equivalent AdjustSamples() in jeprof, but the result may
-	 * differ slightly from what jeprof reports, because here we scale the
-	 * summary values, whereas jeprof scales each context individually and
-	 * reports the sums of the scaled values.
-	 */
-	if (cnt_all->curbytes != 0) {
-		double sample_period = (double)((uint64_t)1 << lg_prof_sample);
-		double ratio = (((double)cnt_all->curbytes) /
-		    (double)cnt_all->curobjs) / sample_period;
-		double scale_factor = 1.0 / (1.0 - exp(-ratio));
-		uint64_t curbytes = (uint64_t)round(((double)cnt_all->curbytes)
-		    * scale_factor);
-		uint64_t curobjs = (uint64_t)round(((double)cnt_all->curobjs) *
-		    scale_factor);
-
-		malloc_printf("<jemalloc>: Leak approximation summary: ~%"FMTu64
-		    " byte%s, ~%"FMTu64" object%s, >= %zu context%s\n",
-		    curbytes, (curbytes != 1) ? "s" : "", curobjs, (curobjs !=
-		    1) ? "s" : "", leak_ngctx, (leak_ngctx != 1) ? "s" : "");
-		malloc_printf(
-		    "<jemalloc>: Run jeprof on \"%s\" for leak detail\n",
-		    filename);
-	}
-#endif
-}
-
-struct prof_gctx_dump_iter_arg_s {
-	tsdn_t	*tsdn;
-	bool	propagate_err;
-};
-
-static prof_gctx_t *
-prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
-	prof_gctx_t *ret;
-	struct prof_gctx_dump_iter_arg_s *arg =
-	    (struct prof_gctx_dump_iter_arg_s *)opaque;
-
-	malloc_mutex_lock(arg->tsdn, gctx->lock);
-
-	if (prof_dump_gctx(arg->tsdn, arg->propagate_err, gctx, &gctx->bt,
-	    gctxs)) {
-		ret = gctx;
-		goto label_return;
-	}
-
-	ret = NULL;
-label_return:
-	malloc_mutex_unlock(arg->tsdn, gctx->lock);
-	return ret;
-}
-
-static void
-prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata,
-    struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
-    struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
-    prof_gctx_tree_t *gctxs) {
-	size_t tabind;
-	union {
-		prof_gctx_t	*p;
-		void		*v;
-	} gctx;
-
-	prof_enter(tsd, tdata);
-
-	/*
-	 * Put gctx's in limbo and clear their counters in preparation for
-	 * summing.
-	 */
-	gctx_tree_new(gctxs);
-	for (tabind = 0; !ckh_iter(&bt2gctx, &tabind, NULL, &gctx.v);) {
-		prof_dump_gctx_prep(tsd_tsdn(tsd), gctx.p, gctxs);
-	}
-
-	/*
-	 * Iterate over tdatas, and for the non-expired ones snapshot their tctx
-	 * stats and merge them into the associated gctx's.
-	 */
-	prof_tdata_merge_iter_arg->tsdn = tsd_tsdn(tsd);
-	memset(&prof_tdata_merge_iter_arg->cnt_all, 0, sizeof(prof_cnt_t));
-	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
-	tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter,
-	    (void *)prof_tdata_merge_iter_arg);
-	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
-
-	/* Merge tctx stats into gctx's. */
-	prof_gctx_merge_iter_arg->tsdn = tsd_tsdn(tsd);
-	prof_gctx_merge_iter_arg->leak_ngctx = 0;
-	gctx_tree_iter(gctxs, NULL, prof_gctx_merge_iter,
-	    (void *)prof_gctx_merge_iter_arg);
-
-	prof_leave(tsd, tdata);
-}
-
-static bool
-prof_dump_file(tsd_t *tsd, bool propagate_err, const char *filename,
-    bool leakcheck, prof_tdata_t *tdata,
-    struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
-    struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
-    struct prof_gctx_dump_iter_arg_s *prof_gctx_dump_iter_arg,
-    prof_gctx_tree_t *gctxs) {
-	/* Create dump file. */
-	if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1) {
-		return true;
-	}
-
-	/* Dump profile header. */
-	if (prof_dump_header(tsd_tsdn(tsd), propagate_err,
-	    &prof_tdata_merge_iter_arg->cnt_all)) {
-		goto label_write_error;
-	}
-
-	/* Dump per gctx profile stats. */
-	prof_gctx_dump_iter_arg->tsdn = tsd_tsdn(tsd);
-	prof_gctx_dump_iter_arg->propagate_err = propagate_err;
-	if (gctx_tree_iter(gctxs, NULL, prof_gctx_dump_iter,
-	    (void *)prof_gctx_dump_iter_arg) != NULL) {
-		goto label_write_error;
-	}
-
-	/* Dump /proc/<pid>/maps if possible. */
-	if (prof_dump_maps(propagate_err)) {
-		goto label_write_error;
-	}
-
-	if (prof_dump_close(propagate_err)) {
-		return true;
-	}
-
-	return false;
-label_write_error:
-	prof_dump_close(propagate_err);
-	return true;
-}
-
-bool
-prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
-    bool leakcheck) {
-	cassert(config_prof);
-	assert(tsd_reentrancy_level_get(tsd) == 0);
-
-	prof_tdata_t * tdata = prof_tdata_get(tsd, true);
-	if (tdata == NULL) {
-		return true;
-	}
-
-	pre_reentrancy(tsd, NULL);
-	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
-
-	prof_gctx_tree_t gctxs;
-	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
-	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
-	struct prof_gctx_dump_iter_arg_s prof_gctx_dump_iter_arg;
-	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
-	    &prof_gctx_merge_iter_arg, &gctxs);
-	bool err = prof_dump_file(tsd, propagate_err, filename, leakcheck, tdata,
-	    &prof_tdata_merge_iter_arg, &prof_gctx_merge_iter_arg,
-	    &prof_gctx_dump_iter_arg, &gctxs);
-	prof_gctx_finish(tsd, &gctxs);
-
-	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
-	post_reentrancy(tsd);
-
-	if (err) {
-		return true;
-	}
-
-	if (leakcheck) {
-		prof_leakcheck(&prof_tdata_merge_iter_arg.cnt_all,
-		    prof_gctx_merge_iter_arg.leak_ngctx, filename);
-	}
-	return false;
-}
-
-#ifdef JEMALLOC_JET
-void
-prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
-    uint64_t *accumbytes) {
-	tsd_t *tsd;
-	prof_tdata_t *tdata;
-	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
-	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
-	prof_gctx_tree_t gctxs;
-
-	tsd = tsd_fetch();
-	tdata = prof_tdata_get(tsd, false);
-	if (tdata == NULL) {
-		if (curobjs != NULL) {
-			*curobjs = 0;
-		}
-		if (curbytes != NULL) {
-			*curbytes = 0;
-		}
-		if (accumobjs != NULL) {
-			*accumobjs = 0;
-		}
-		if (accumbytes != NULL) {
-			*accumbytes = 0;
-		}
-		return;
-	}
-
-	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
-	    &prof_gctx_merge_iter_arg, &gctxs);
-	prof_gctx_finish(tsd, &gctxs);
-
-	if (curobjs != NULL) {
-		*curobjs = prof_tdata_merge_iter_arg.cnt_all.curobjs;
-	}
-	if (curbytes != NULL) {
-		*curbytes = prof_tdata_merge_iter_arg.cnt_all.curbytes;
-	}
-	if (accumobjs != NULL) {
-		*accumobjs = prof_tdata_merge_iter_arg.cnt_all.accumobjs;
-	}
-	if (accumbytes != NULL) {
-		*accumbytes = prof_tdata_merge_iter_arg.cnt_all.accumbytes;
-	}
-}
-#endif
-
-void
-prof_bt_hash(const void *key, size_t r_hash[2]) {
-	prof_bt_t *bt = (prof_bt_t *)key;
-
-	cassert(config_prof);
-
-	hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash);
-}
-
-bool
-prof_bt_keycomp(const void *k1, const void *k2) {
-	const prof_bt_t *bt1 = (prof_bt_t *)k1;
-	const prof_bt_t *bt2 = (prof_bt_t *)k2;
-
-	cassert(config_prof);
-
-	if (bt1->len != bt2->len) {
-		return false;
-	}
-	return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
-}
-
-prof_tdata_t *
-prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
-    char *thread_name, bool active) {
-	prof_tdata_t *tdata;
-
-	cassert(config_prof);
-
-	/* Initialize an empty cache for this thread. */
-	tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd), sizeof(prof_tdata_t),
-	    sz_size2index(sizeof(prof_tdata_t)), false, NULL, true,
-	    arena_get(TSDN_NULL, 0, true), true);
-	if (tdata == NULL) {
-		return NULL;
-	}
-
-	tdata->lock = prof_tdata_mutex_choose(thr_uid);
-	tdata->thr_uid = thr_uid;
-	tdata->thr_discrim = thr_discrim;
-	tdata->thread_name = thread_name;
-	tdata->attached = true;
-	tdata->expired = false;
-	tdata->tctx_uid_next = 0;
-
-	if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash,
-	    prof_bt_keycomp)) {
-		idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
-		return NULL;
-	}
-
-	tdata->prng_state = (uint64_t)(uintptr_t)tdata;
-	prof_sample_threshold_update(tdata);
-
-	tdata->enq = false;
-	tdata->enq_idump = false;
-	tdata->enq_gdump = false;
-
-	tdata->dumping = false;
-	tdata->active = active;
-
-	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
-	tdata_tree_insert(&tdatas, tdata);
-	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
-
-	return tdata;
-}
-
-static bool
-prof_tdata_should_destroy_unlocked(prof_tdata_t *tdata, bool even_if_attached) {
-	if (tdata->attached && !even_if_attached) {
-		return false;
-	}
-	if (ckh_count(&tdata->bt2tctx) != 0) {
-		return false;
-	}
-	return true;
-}
-
-static bool
-prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
-    bool even_if_attached) {
-	malloc_mutex_assert_owner(tsdn, tdata->lock);
-
-	return prof_tdata_should_destroy_unlocked(tdata, even_if_attached);
-}
-
-static void
-prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
-    bool even_if_attached) {
-	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tdatas_mtx);
-
-	tdata_tree_remove(&tdatas, tdata);
-
-	assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
-
-	if (tdata->thread_name != NULL) {
-		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
-		    true);
-	}
-	ckh_delete(tsd, &tdata->bt2tctx);
-	idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
-}
-
-static void
-prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached) {
-	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
-	prof_tdata_destroy_locked(tsd, tdata, even_if_attached);
-	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
-}
-
-void
-prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata) {
-	bool destroy_tdata;
-
-	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
-	if (tdata->attached) {
-		destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata,
-		    true);
-		/*
-		 * Only detach if !destroy_tdata, because detaching would allow
-		 * another thread to win the race to destroy tdata.
-		 */
-		if (!destroy_tdata) {
-			tdata->attached = false;
-		}
-		tsd_prof_tdata_set(tsd, NULL);
-	} else {
-		destroy_tdata = false;
-	}
-	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
-	if (destroy_tdata) {
-		prof_tdata_destroy(tsd, tdata, true);
-	}
-}
-
-static bool
-prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata) {
-	bool destroy_tdata;
-
-	malloc_mutex_lock(tsdn, tdata->lock);
-	if (!tdata->expired) {
-		tdata->expired = true;
-		destroy_tdata = tdata->attached ? false :
-		    prof_tdata_should_destroy(tsdn, tdata, false);
-	} else {
-		destroy_tdata = false;
-	}
-	malloc_mutex_unlock(tsdn, tdata->lock);
-
-	return destroy_tdata;
-}
-
-static prof_tdata_t *
-prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
-    void *arg) {
-	tsdn_t *tsdn = (tsdn_t *)arg;
-
-	return (prof_tdata_expire(tsdn, tdata) ? tdata : NULL);
-}
-
-void
-prof_reset(tsd_t *tsd, size_t lg_sample) {
-	prof_tdata_t *next;
-
-	assert(lg_sample < (sizeof(uint64_t) << 3));
-
-	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
-	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
-
-	lg_prof_sample = lg_sample;
-
-	next = NULL;
-	do {
-		prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next,
-		    prof_tdata_reset_iter, (void *)tsd);
-		if (to_destroy != NULL) {
-			next = tdata_tree_next(&tdatas, to_destroy);
-			prof_tdata_destroy_locked(tsd, to_destroy, false);
-		} else {
-			next = NULL;
-		}
-	} while (next != NULL);
-
-	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
-	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
-}
-
-void
-prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
-	prof_tdata_t *tdata = tctx->tdata;
-	prof_gctx_t *gctx = tctx->gctx;
-	bool destroy_tdata, destroy_tctx, destroy_gctx;
-
-	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
-
-	assert(tctx->cnts.curobjs == 0);
-	assert(tctx->cnts.curbytes == 0);
-	assert(!opt_prof_accum);
-	assert(tctx->cnts.accumobjs == 0);
-	assert(tctx->cnts.accumbytes == 0);
-
-	ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL);
-	destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata, false);
-	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
-
-	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
-	switch (tctx->state) {
-	case prof_tctx_state_nominal:
-		tctx_tree_remove(&gctx->tctxs, tctx);
-		destroy_tctx = true;
-		if (prof_gctx_should_destroy(gctx)) {
-			/*
-			 * Increment gctx->nlimbo in order to keep another
-			 * thread from winning the race to destroy gctx while
-			 * this one has gctx->lock dropped.  Without this, it
-			 * would be possible for another thread to:
-			 *
-			 * 1) Sample an allocation associated with gctx.
-			 * 2) Deallocate the sampled object.
-			 * 3) Successfully prof_gctx_try_destroy(gctx).
-			 *
-			 * The result would be that gctx no longer exists by the
-			 * time this thread accesses it in
-			 * prof_gctx_try_destroy().
-			 */
-			gctx->nlimbo++;
-			destroy_gctx = true;
-		} else {
-			destroy_gctx = false;
-		}
-		break;
-	case prof_tctx_state_dumping:
-		/*
-		 * A dumping thread needs tctx to remain valid until dumping
-		 * has finished.  Change state such that the dumping thread will
-		 * complete destruction during a late dump iteration phase.
-		 */
-		tctx->state = prof_tctx_state_purgatory;
-		destroy_tctx = false;
-		destroy_gctx = false;
-		break;
-	default:
-		not_reached();
-		destroy_tctx = false;
-		destroy_gctx = false;
-	}
-	malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
-	if (destroy_gctx) {
-		prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx,
-		    tdata);
-	}
-
-	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock);
-
-	if (destroy_tdata) {
-		prof_tdata_destroy(tsd, tdata, false);
-	}
-
-	if (destroy_tctx) {
-		idalloctm(tsd_tsdn(tsd), tctx, NULL, NULL, true, true);
-	}
-}
-
-/******************************************************************************/

From 5742473cc87558b4655064ebacfd837119673928 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gmail.com>
Date: Mon, 29 Jul 2019 14:09:20 -0700
Subject: [PATCH 1328/2608] Revert "Refactor prof log"

This reverts commit 7618b0b8e458d9c0db6e4b05ccbe6c6308952890.
---
 Makefile.in                                   |   1 -
 include/jemalloc/internal/prof_externs.h      |   8 -
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |   1 -
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |   3 -
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |   1 -
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |   3 -
 src/prof.c                                    | 682 ++++++++++++++++-
 src/prof_log.c                                | 698 ------------------
 8 files changed, 677 insertions(+), 720 deletions(-)
 delete mode 100644 src/prof_log.c

diff --git a/Makefile.in b/Makefile.in
index 1cd973d7..7128b007 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -117,7 +117,6 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/pages.c \
 	$(srcroot)src/prng.c \
 	$(srcroot)src/prof.c \
-	$(srcroot)src/prof_log.c \
 	$(srcroot)src/rtree.c \
 	$(srcroot)src/safety_check.c \
 	$(srcroot)src/stats.c \
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index e94ac3b2..094f3e17 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -43,8 +43,6 @@ extern uint64_t	prof_interval;
  */
 extern size_t	lg_prof_sample;
 
-extern bool	prof_booted;
-
 void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
 void prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
     prof_tctx_t *tctx);
@@ -66,14 +64,10 @@ extern prof_dump_header_t *JET_MUTABLE prof_dump_header;
 void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
     uint64_t *accumbytes);
 #endif
-int prof_getpid(void);
 bool prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum);
 void prof_idump(tsdn_t *tsdn);
 bool prof_mdump(tsd_t *tsd, const char *filename);
 void prof_gdump(tsdn_t *tsdn);
-
-void prof_bt_hash(const void *key, size_t r_hash[2]);
-bool prof_bt_keycomp(const void *k1, const void *k2);
 prof_tdata_t *prof_tdata_init(tsd_t *tsd);
 prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
 void prof_reset(tsd_t *tsd, size_t lg_sample);
@@ -97,10 +91,8 @@ void prof_postfork_parent(tsdn_t *tsdn);
 void prof_postfork_child(tsdn_t *tsdn);
 void prof_sample_threshold_update(prof_tdata_t *tdata);
 
-void prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx);
 bool prof_log_start(tsdn_t *tsdn, const char *filename);
 bool prof_log_stop(tsdn_t *tsdn);
-bool prof_log_init(tsd_t *tsdn);
 #ifdef JEMALLOC_JET
 size_t prof_log_bt_count(void);
 size_t prof_log_alloc_count(void);
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index d93d9099..228e8be0 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -58,7 +58,6 @@
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
-    <ClCompile Include="..\..\..\..\src\prof_log.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
     <ClCompile Include="..\..\..\..\src\stats.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 7b09d4e6..d839515b 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -67,9 +67,6 @@
     <ClCompile Include="..\..\..\..\src\prof.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\prof_log.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\rtree.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 28bd3cd6..edcceede 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -58,7 +58,6 @@
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
-    <ClCompile Include="..\..\..\..\src\prof_log.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
     <ClCompile Include="..\..\..\..\src\stats.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index a66c209b..6df72601 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -67,9 +67,6 @@
     <ClCompile Include="..\..\..\..\src\prof.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\prof_log.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\rtree.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/prof.c b/src/prof.c
index 9d1edb32..4ebe2799 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -7,6 +7,7 @@
 #include "jemalloc/internal/hash.h"
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/emitter.h"
 
 /******************************************************************************/
 
@@ -38,6 +39,7 @@ bool		opt_prof_gdump = false;
 bool		opt_prof_final = false;
 bool		opt_prof_leak = false;
 bool		opt_prof_accum = false;
+bool		opt_prof_log = false;
 char		opt_prof_prefix[
     /* Minimize memory bloat for non-prof builds. */
 #ifdef JEMALLOC_PROF
@@ -70,6 +72,100 @@ uint64_t	prof_interval = 0;
 
 size_t		lg_prof_sample;
 
+typedef enum prof_logging_state_e prof_logging_state_t;
+enum prof_logging_state_e {
+	prof_logging_state_stopped,
+	prof_logging_state_started,
+	prof_logging_state_dumping
+};
+
+/*
+ * - stopped: log_start never called, or previous log_stop has completed.
+ * - started: log_start called, log_stop not called yet. Allocations are logged.
+ * - dumping: log_stop called but not finished; samples are not logged anymore.
+ */
+prof_logging_state_t prof_logging_state = prof_logging_state_stopped;
+
+#ifdef JEMALLOC_JET
+static bool prof_log_dummy = false;
+#endif
+
+/* Incremented for every log file that is output. */
+static uint64_t log_seq = 0;
+static char log_filename[
+    /* Minimize memory bloat for non-prof builds. */
+#ifdef JEMALLOC_PROF
+    PATH_MAX +
+#endif
+    1];
+
+/* Timestamp for most recent call to log_start(). */
+static nstime_t log_start_timestamp = NSTIME_ZERO_INITIALIZER;
+
+/* Increment these when adding to the log_bt and log_thr linked lists. */
+static size_t log_bt_index = 0;
+static size_t log_thr_index = 0;
+
+/* Linked list node definitions. These are only used in prof.c. */
+typedef struct prof_bt_node_s prof_bt_node_t;
+
+struct prof_bt_node_s {
+	prof_bt_node_t *next;
+	size_t index;
+	prof_bt_t bt;
+	/* Variable size backtrace vector pointed to by bt. */
+	void *vec[1];
+};
+
+typedef struct prof_thr_node_s prof_thr_node_t;
+
+struct prof_thr_node_s {
+	prof_thr_node_t *next;
+	size_t index;
+	uint64_t thr_uid;
+	/* Variable size based on thr_name_sz. */
+	char name[1];
+};
+
+typedef struct prof_alloc_node_s prof_alloc_node_t;
+
+/* This is output when logging sampled allocations. */
+struct prof_alloc_node_s {
+	prof_alloc_node_t *next;
+	/* Indices into an array of thread data. */
+	size_t alloc_thr_ind;
+	size_t free_thr_ind;
+
+	/* Indices into an array of backtraces. */
+	size_t alloc_bt_ind;
+	size_t free_bt_ind;
+
+	uint64_t alloc_time_ns;
+	uint64_t free_time_ns;
+
+	size_t usize;
+};
+
+/*
+ * Created on the first call to prof_log_start and deleted on prof_log_stop.
+ * These are the backtraces and threads that have already been logged by an
+ * allocation.
+ */
+static bool log_tables_initialized = false;
+static ckh_t log_bt_node_set;
+static ckh_t log_thr_node_set;
+
+/* Store linked lists for logged data. */
+static prof_bt_node_t *log_bt_first = NULL;
+static prof_bt_node_t *log_bt_last = NULL;
+static prof_thr_node_t *log_thr_first = NULL;
+static prof_thr_node_t *log_thr_last = NULL;
+static prof_alloc_node_t *log_alloc_first = NULL;
+static prof_alloc_node_t *log_alloc_last = NULL;
+
+/* Protects the prof_logging_state and any log_{...} variable. */
+static malloc_mutex_t log_mtx;
+
 /*
  * Table of mutexes that are shared among gctx's.  These are leaf locks, so
  * there is no problem with using them for more than one gctx at the same time.
@@ -129,7 +225,7 @@ static size_t		prof_dump_buf_end;
 static int		prof_dump_fd;
 
 /* Do not dump any profiles until bootstrapping is complete. */
-bool			prof_booted = false;
+static bool		prof_booted = false;
 
 /******************************************************************************/
 /*
@@ -145,6 +241,12 @@ static void	prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata,
     bool even_if_attached);
 static char	*prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name);
 
+/* Hashtable functions for log_bt_node_set and log_thr_node_set. */
+static void prof_thr_node_hash(const void *key, size_t r_hash[2]);
+static bool prof_thr_node_keycomp(const void *k1, const void *k2);
+static void prof_bt_node_hash(const void *key, size_t r_hash[2]);
+static bool prof_bt_node_keycomp(const void *k1, const void *k2);
+
 /******************************************************************************/
 /* Red-black trees. */
 
@@ -259,6 +361,162 @@ prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
 	malloc_mutex_unlock(tsdn, tctx->tdata->lock);
 }
 
+static size_t
+prof_log_bt_index(tsd_t *tsd, prof_bt_t *bt) {
+	assert(prof_logging_state == prof_logging_state_started);
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &log_mtx);
+
+	prof_bt_node_t dummy_node;
+	dummy_node.bt = *bt;
+	prof_bt_node_t *node;
+
+	/* See if this backtrace is already cached in the table. */
+	if (ckh_search(&log_bt_node_set, (void *)(&dummy_node),
+	    (void **)(&node), NULL)) {
+		size_t sz = offsetof(prof_bt_node_t, vec) +
+			        (bt->len * sizeof(void *));
+		prof_bt_node_t *new_node = (prof_bt_node_t *)
+		    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL,
+		    true, arena_get(TSDN_NULL, 0, true), true);
+		if (log_bt_first == NULL) {
+			log_bt_first = new_node;
+			log_bt_last = new_node;
+		} else {
+			log_bt_last->next = new_node;
+			log_bt_last = new_node;
+		}
+
+		new_node->next = NULL;
+		new_node->index = log_bt_index;
+		/*
+		 * Copy the backtrace: bt is inside a tdata or gctx, which
+		 * might die before prof_log_stop is called.
+		 */
+		new_node->bt.len = bt->len;
+		memcpy(new_node->vec, bt->vec, bt->len * sizeof(void *));
+		new_node->bt.vec = new_node->vec;
+
+		log_bt_index++;
+		ckh_insert(tsd, &log_bt_node_set, (void *)new_node, NULL);
+		return new_node->index;
+	} else {
+		return node->index;
+	}
+}
+static size_t
+prof_log_thr_index(tsd_t *tsd, uint64_t thr_uid, const char *name) {
+	assert(prof_logging_state == prof_logging_state_started);
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &log_mtx);
+
+	prof_thr_node_t dummy_node;
+	dummy_node.thr_uid = thr_uid;
+	prof_thr_node_t *node;
+
+	/* See if this thread is already cached in the table. */
+	if (ckh_search(&log_thr_node_set, (void *)(&dummy_node),
+	    (void **)(&node), NULL)) {
+		size_t sz = offsetof(prof_thr_node_t, name) + strlen(name) + 1;
+		prof_thr_node_t *new_node = (prof_thr_node_t *)
+		    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL,
+		    true, arena_get(TSDN_NULL, 0, true), true);
+		if (log_thr_first == NULL) {
+			log_thr_first = new_node;
+			log_thr_last = new_node;
+		} else {
+			log_thr_last->next = new_node;
+			log_thr_last = new_node;
+		}
+
+		new_node->next = NULL;
+		new_node->index = log_thr_index;
+		new_node->thr_uid = thr_uid;
+		strcpy(new_node->name, name);
+
+		log_thr_index++;
+		ckh_insert(tsd, &log_thr_node_set, (void *)new_node, NULL);
+		return new_node->index;
+	} else {
+		return node->index;
+	}
+}
+
+static void
+prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
+
+	prof_tdata_t *cons_tdata = prof_tdata_get(tsd, false);
+	if (cons_tdata == NULL) {
+		/*
+		 * We decide not to log these allocations. cons_tdata will be
+		 * NULL only when the current thread is in a weird state (e.g.
+		 * it's being destroyed).
+		 */
+		return;
+	}
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &log_mtx);
+
+	if (prof_logging_state != prof_logging_state_started) {
+		goto label_done;
+	}
+
+	if (!log_tables_initialized) {
+		bool err1 = ckh_new(tsd, &log_bt_node_set, PROF_CKH_MINITEMS,
+				prof_bt_node_hash, prof_bt_node_keycomp);
+		bool err2 = ckh_new(tsd, &log_thr_node_set, PROF_CKH_MINITEMS,
+				prof_thr_node_hash, prof_thr_node_keycomp);
+		if (err1 || err2) {
+			goto label_done;
+		}
+		log_tables_initialized = true;
+	}
+
+	nstime_t alloc_time = prof_alloc_time_get(tsd_tsdn(tsd), ptr,
+			          (alloc_ctx_t *)NULL);
+	nstime_t free_time = NSTIME_ZERO_INITIALIZER;
+	nstime_update(&free_time);
+
+	size_t sz = sizeof(prof_alloc_node_t);
+	prof_alloc_node_t *new_node = (prof_alloc_node_t *)
+	    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL, true,
+	    arena_get(TSDN_NULL, 0, true), true);
+
+	const char *prod_thr_name = (tctx->tdata->thread_name == NULL)?
+				        "" : tctx->tdata->thread_name;
+	const char *cons_thr_name = prof_thread_name_get(tsd);
+
+	prof_bt_t bt;
+	/* Initialize the backtrace, using the buffer in tdata to store it. */
+	bt_init(&bt, cons_tdata->vec);
+	prof_backtrace(&bt);
+	prof_bt_t *cons_bt = &bt;
+
+	/* We haven't destroyed tctx yet, so gctx should be good to read. */
+	prof_bt_t *prod_bt = &tctx->gctx->bt;
+
+	new_node->next = NULL;
+	new_node->alloc_thr_ind = prof_log_thr_index(tsd, tctx->tdata->thr_uid,
+				      prod_thr_name);
+	new_node->free_thr_ind = prof_log_thr_index(tsd, cons_tdata->thr_uid,
+				     cons_thr_name);
+	new_node->alloc_bt_ind = prof_log_bt_index(tsd, prod_bt);
+	new_node->free_bt_ind = prof_log_bt_index(tsd, cons_bt);
+	new_node->alloc_time_ns = nstime_ns(&alloc_time);
+	new_node->free_time_ns = nstime_ns(&free_time);
+	new_node->usize = usize;
+
+	if (log_alloc_first == NULL) {
+		log_alloc_first = new_node;
+		log_alloc_last = new_node;
+	} else {
+		log_alloc_last->next = new_node;
+		log_alloc_last = new_node;
+	}
+
+label_done:
+	malloc_mutex_unlock(tsd_tsdn(tsd), &log_mtx);
+}
+
 void
 prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize,
     prof_tctx_t *tctx) {
@@ -1435,7 +1693,7 @@ prof_open_maps(const char *format, ...) {
 }
 #endif
 
-int
+static int
 prof_getpid(void) {
 #ifdef _WIN32
 	return GetCurrentProcessId();
@@ -1877,7 +2135,7 @@ prof_gdump(tsdn_t *tsdn) {
 	}
 }
 
-void
+static void
 prof_bt_hash(const void *key, size_t r_hash[2]) {
 	prof_bt_t *bt = (prof_bt_t *)key;
 
@@ -1886,7 +2144,7 @@ prof_bt_hash(const void *key, size_t r_hash[2]) {
 	hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash);
 }
 
-bool
+static bool
 prof_bt_keycomp(const void *k1, const void *k2) {
 	const prof_bt_t *bt1 = (prof_bt_t *)k1;
 	const prof_bt_t *bt2 = (prof_bt_t *)k2;
@@ -1899,6 +2157,33 @@ prof_bt_keycomp(const void *k1, const void *k2) {
 	return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
 }
 
+static void
+prof_bt_node_hash(const void *key, size_t r_hash[2]) {
+	const prof_bt_node_t *bt_node = (prof_bt_node_t *)key;
+	prof_bt_hash((void *)(&bt_node->bt), r_hash);
+}
+
+static bool
+prof_bt_node_keycomp(const void *k1, const void *k2) {
+	const prof_bt_node_t *bt_node1 = (prof_bt_node_t *)k1;
+	const prof_bt_node_t *bt_node2 = (prof_bt_node_t *)k2;
+	return prof_bt_keycomp((void *)(&bt_node1->bt),
+	    (void *)(&bt_node2->bt));
+}
+
+static void
+prof_thr_node_hash(const void *key, size_t r_hash[2]) {
+	const prof_thr_node_t *thr_node = (prof_thr_node_t *)key;
+	hash(&thr_node->thr_uid, sizeof(uint64_t), 0x94122f35U, r_hash);
+}
+
+static bool
+prof_thr_node_keycomp(const void *k1, const void *k2) {
+	const prof_thr_node_t *thr_node1 = (prof_thr_node_t *)k1;
+	const prof_thr_node_t *thr_node2 = (prof_thr_node_t *)k2;
+	return thr_node1->thr_uid == thr_node2->thr_uid;
+}
+
 static uint64_t
 prof_thr_uid_alloc(tsdn_t *tsdn) {
 	uint64_t thr_uid;
@@ -2131,6 +2416,368 @@ prof_active_set(tsdn_t *tsdn, bool active) {
 	return prof_active_old;
 }
 
+#ifdef JEMALLOC_JET
+size_t
+prof_log_bt_count(void) {
+	size_t cnt = 0;
+	prof_bt_node_t *node = log_bt_first;
+	while (node != NULL) {
+		cnt++;
+		node = node->next;
+	}
+	return cnt;
+}
+
+size_t
+prof_log_alloc_count(void) {
+	size_t cnt = 0;
+	prof_alloc_node_t *node = log_alloc_first;
+	while (node != NULL) {
+		cnt++;
+		node = node->next;
+	}
+	return cnt;
+}
+
+size_t
+prof_log_thr_count(void) {
+	size_t cnt = 0;
+	prof_thr_node_t *node = log_thr_first;
+	while (node != NULL) {
+		cnt++;
+		node = node->next;
+	}
+	return cnt;
+}
+
+bool
+prof_log_is_logging(void) {
+	return prof_logging_state == prof_logging_state_started;
+}
+
+bool
+prof_log_rep_check(void) {
+	if (prof_logging_state == prof_logging_state_stopped
+	    && log_tables_initialized) {
+		return true;
+	}
+
+	if (log_bt_last != NULL && log_bt_last->next != NULL) {
+		return true;
+	}
+	if (log_thr_last != NULL && log_thr_last->next != NULL) {
+		return true;
+	}
+	if (log_alloc_last != NULL && log_alloc_last->next != NULL) {
+		return true;
+	}
+
+	size_t bt_count = prof_log_bt_count();
+	size_t thr_count = prof_log_thr_count();
+	size_t alloc_count = prof_log_alloc_count();
+
+
+	if (prof_logging_state == prof_logging_state_stopped) {
+		if (bt_count != 0 || thr_count != 0 || alloc_count || 0) {
+			return true;
+		}
+	}
+
+	prof_alloc_node_t *node = log_alloc_first;
+	while (node != NULL) {
+		if (node->alloc_bt_ind >= bt_count) {
+			return true;
+		}
+		if (node->free_bt_ind >= bt_count) {
+			return true;
+		}
+		if (node->alloc_thr_ind >= thr_count) {
+			return true;
+		}
+		if (node->free_thr_ind >= thr_count) {
+			return true;
+		}
+		if (node->alloc_time_ns > node->free_time_ns) {
+			return true;
+		}
+		node = node->next;
+	}
+
+	return false;
+}
+
+void
+prof_log_dummy_set(bool new_value) {
+	prof_log_dummy = new_value;
+}
+#endif
+
+bool
+prof_log_start(tsdn_t *tsdn, const char *filename) {
+	if (!opt_prof || !prof_booted) {
+		return true;
+	}
+
+	bool ret = false;
+	size_t buf_size = PATH_MAX + 1;
+
+	malloc_mutex_lock(tsdn, &log_mtx);
+
+	if (prof_logging_state != prof_logging_state_stopped) {
+		ret = true;
+	} else if (filename == NULL) {
+		/* Make default name. */
+		malloc_snprintf(log_filename, buf_size, "%s.%d.%"FMTu64".json",
+		    opt_prof_prefix, prof_getpid(), log_seq);
+		log_seq++;
+		prof_logging_state = prof_logging_state_started;
+	} else if (strlen(filename) >= buf_size) {
+		ret = true;
+	} else {
+		strcpy(log_filename, filename);
+		prof_logging_state = prof_logging_state_started;
+	}
+
+	if (!ret) {
+		nstime_update(&log_start_timestamp);
+	}
+
+	malloc_mutex_unlock(tsdn, &log_mtx);
+
+	return ret;
+}
+
+/* Used as an atexit function to stop logging on exit. */
+static void
+prof_log_stop_final(void) {
+	tsd_t *tsd = tsd_fetch();
+	prof_log_stop(tsd_tsdn(tsd));
+}
+
+struct prof_emitter_cb_arg_s {
+	int fd;
+	ssize_t ret;
+};
+
+static void
+prof_emitter_write_cb(void *opaque, const char *to_write) {
+	struct prof_emitter_cb_arg_s *arg =
+	    (struct prof_emitter_cb_arg_s *)opaque;
+	size_t bytes = strlen(to_write);
+#ifdef JEMALLOC_JET
+	if (prof_log_dummy) {
+		return;
+	}
+#endif
+	arg->ret = write(arg->fd, (void *)to_write, bytes);
+}
+
+/*
+ * prof_log_emit_{...} goes through the appropriate linked list, emitting each
+ * node to the json and deallocating it.
+ */
+static void
+prof_log_emit_threads(tsd_t *tsd, emitter_t *emitter) {
+	emitter_json_array_kv_begin(emitter, "threads");
+	prof_thr_node_t *thr_node = log_thr_first;
+	prof_thr_node_t *thr_old_node;
+	while (thr_node != NULL) {
+		emitter_json_object_begin(emitter);
+
+		emitter_json_kv(emitter, "thr_uid", emitter_type_uint64,
+		    &thr_node->thr_uid);
+
+		char *thr_name = thr_node->name;
+
+		emitter_json_kv(emitter, "thr_name", emitter_type_string,
+		    &thr_name);
+
+		emitter_json_object_end(emitter);
+		thr_old_node = thr_node;
+		thr_node = thr_node->next;
+		idalloc(tsd, thr_old_node);
+	}
+	emitter_json_array_end(emitter);
+}
+
+static void
+prof_log_emit_traces(tsd_t *tsd, emitter_t *emitter) {
+	emitter_json_array_kv_begin(emitter, "stack_traces");
+	prof_bt_node_t *bt_node = log_bt_first;
+	prof_bt_node_t *bt_old_node;
+	/*
+	 * Calculate how many hex digits we need: twice number of bytes, two for
+	 * "0x", and then one more for terminating '\0'.
+	 */
+	char buf[2 * sizeof(intptr_t) + 3];
+	size_t buf_sz = sizeof(buf);
+	while (bt_node != NULL) {
+		emitter_json_array_begin(emitter);
+		size_t i;
+		for (i = 0; i < bt_node->bt.len; i++) {
+			malloc_snprintf(buf, buf_sz, "%p", bt_node->bt.vec[i]);
+			char *trace_str = buf;
+			emitter_json_value(emitter, emitter_type_string,
+			    &trace_str);
+		}
+		emitter_json_array_end(emitter);
+
+		bt_old_node = bt_node;
+		bt_node = bt_node->next;
+		idalloc(tsd, bt_old_node);
+	}
+	emitter_json_array_end(emitter);
+}
+
+static void
+prof_log_emit_allocs(tsd_t *tsd, emitter_t *emitter) {
+	emitter_json_array_kv_begin(emitter, "allocations");
+	prof_alloc_node_t *alloc_node = log_alloc_first;
+	prof_alloc_node_t *alloc_old_node;
+	while (alloc_node != NULL) {
+		emitter_json_object_begin(emitter);
+
+		emitter_json_kv(emitter, "alloc_thread", emitter_type_size,
+		    &alloc_node->alloc_thr_ind);
+
+		emitter_json_kv(emitter, "free_thread", emitter_type_size,
+		    &alloc_node->free_thr_ind);
+
+		emitter_json_kv(emitter, "alloc_trace", emitter_type_size,
+		    &alloc_node->alloc_bt_ind);
+
+		emitter_json_kv(emitter, "free_trace", emitter_type_size,
+		    &alloc_node->free_bt_ind);
+
+		emitter_json_kv(emitter, "alloc_timestamp",
+		    emitter_type_uint64, &alloc_node->alloc_time_ns);
+
+		emitter_json_kv(emitter, "free_timestamp", emitter_type_uint64,
+		    &alloc_node->free_time_ns);
+
+		emitter_json_kv(emitter, "usize", emitter_type_uint64,
+		    &alloc_node->usize);
+
+		emitter_json_object_end(emitter);
+
+		alloc_old_node = alloc_node;
+		alloc_node = alloc_node->next;
+		idalloc(tsd, alloc_old_node);
+	}
+	emitter_json_array_end(emitter);
+}
+
+static void
+prof_log_emit_metadata(emitter_t *emitter) {
+	emitter_json_object_kv_begin(emitter, "info");
+
+	nstime_t now = NSTIME_ZERO_INITIALIZER;
+
+	nstime_update(&now);
+	uint64_t ns = nstime_ns(&now) - nstime_ns(&log_start_timestamp);
+	emitter_json_kv(emitter, "duration", emitter_type_uint64, &ns);
+
+	char *vers = JEMALLOC_VERSION;
+	emitter_json_kv(emitter, "version",
+	    emitter_type_string, &vers);
+
+	emitter_json_kv(emitter, "lg_sample_rate",
+	    emitter_type_int, &lg_prof_sample);
+
+	int pid = prof_getpid();
+	emitter_json_kv(emitter, "pid", emitter_type_int, &pid);
+
+	emitter_json_object_end(emitter);
+}
+
+
+bool
+prof_log_stop(tsdn_t *tsdn) {
+	if (!opt_prof || !prof_booted) {
+		return true;
+	}
+
+	tsd_t *tsd = tsdn_tsd(tsdn);
+	malloc_mutex_lock(tsdn, &log_mtx);
+
+	if (prof_logging_state != prof_logging_state_started) {
+		malloc_mutex_unlock(tsdn, &log_mtx);
+		return true;
+	}
+
+	/*
+	 * Set the state to dumping. We'll set it to stopped when we're done.
+	 * Since other threads won't be able to start/stop/log when the state is
+	 * dumping, we don't have to hold the lock during the whole method.
+	 */
+	prof_logging_state = prof_logging_state_dumping;
+	malloc_mutex_unlock(tsdn, &log_mtx);
+
+
+	emitter_t emitter;
+
+	/* Create a file. */
+
+	int fd;
+#ifdef JEMALLOC_JET
+	if (prof_log_dummy) {
+		fd = 0;
+	} else {
+		fd = creat(log_filename, 0644);
+	}
+#else
+	fd = creat(log_filename, 0644);
+#endif
+
+	if (fd == -1) {
+		malloc_printf("<jemalloc>: creat() for log file \"%s\" "
+			      " failed with %d\n", log_filename, errno);
+		if (opt_abort) {
+			abort();
+		}
+		return true;
+	}
+
+	/* Emit to json. */
+	struct prof_emitter_cb_arg_s arg;
+	arg.fd = fd;
+	emitter_init(&emitter, emitter_output_json, &prof_emitter_write_cb,
+	    (void *)(&arg));
+
+	emitter_json_object_begin(&emitter);
+	prof_log_emit_metadata(&emitter);
+	prof_log_emit_threads(tsd, &emitter);
+	prof_log_emit_traces(tsd, &emitter);
+	prof_log_emit_allocs(tsd, &emitter);
+	emitter_json_object_end(&emitter);
+
+	/* Reset global state. */
+	if (log_tables_initialized) {
+		ckh_delete(tsd, &log_bt_node_set);
+		ckh_delete(tsd, &log_thr_node_set);
+	}
+	log_tables_initialized = false;
+	log_bt_index = 0;
+	log_thr_index = 0;
+	log_bt_first = NULL;
+	log_bt_last = NULL;
+	log_thr_first = NULL;
+	log_thr_last = NULL;
+	log_alloc_first = NULL;
+	log_alloc_last = NULL;
+
+	malloc_mutex_lock(tsdn, &log_mtx);
+	prof_logging_state = prof_logging_state_stopped;
+	malloc_mutex_unlock(tsdn, &log_mtx);
+
+#ifdef JEMALLOC_JET
+	if (prof_log_dummy) {
+		return false;
+	}
+#endif
+	return close(fd);
+}
+
 const char *
 prof_thread_name_get(tsd_t *tsd) {
 	prof_tdata_t *tdata;
@@ -2367,10 +3014,35 @@ prof_boot2(tsd_t *tsd) {
 			}
 		}
 
-		if (prof_log_init(tsd)) {
+		if (opt_prof_log) {
+			prof_log_start(tsd_tsdn(tsd), NULL);
+		}
+
+		if (atexit(prof_log_stop_final) != 0) {
+			malloc_write("<jemalloc>: Error in atexit() "
+				     "for logging\n");
+			if (opt_abort) {
+				abort();
+			}
+		}
+
+		if (malloc_mutex_init(&log_mtx, "prof_log",
+		    WITNESS_RANK_PROF_LOG, malloc_mutex_rank_exclusive)) {
 			return true;
 		}
 
+		if (ckh_new(tsd, &log_bt_node_set, PROF_CKH_MINITEMS,
+		    prof_bt_node_hash, prof_bt_node_keycomp)) {
+			return true;
+		}
+
+		if (ckh_new(tsd, &log_thr_node_set, PROF_CKH_MINITEMS,
+		    prof_thr_node_hash, prof_thr_node_keycomp)) {
+			return true;
+		}
+
+		log_tables_initialized = true;
+
 		gctx_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
 		    b0get(), PROF_NCTX_LOCKS * sizeof(malloc_mutex_t),
 		    CACHELINE);
diff --git a/src/prof_log.c b/src/prof_log.c
deleted file mode 100644
index 25a6abee..00000000
--- a/src/prof_log.c
+++ /dev/null
@@ -1,698 +0,0 @@
-#define JEMALLOC_PROF_C_
-#include "jemalloc/internal/jemalloc_preamble.h"
-#include "jemalloc/internal/jemalloc_internal_includes.h"
-
-#include "jemalloc/internal/assert.h"
-#include "jemalloc/internal/ckh.h"
-#include "jemalloc/internal/hash.h"
-#include "jemalloc/internal/malloc_io.h"
-#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/emitter.h"
-
-bool opt_prof_log = false;
-typedef enum prof_logging_state_e prof_logging_state_t;
-enum prof_logging_state_e {
-	prof_logging_state_stopped,
-	prof_logging_state_started,
-	prof_logging_state_dumping
-};
-
-/*
- * - stopped: log_start never called, or previous log_stop has completed.
- * - started: log_start called, log_stop not called yet. Allocations are logged.
- * - dumping: log_stop called but not finished; samples are not logged anymore.
- */
-prof_logging_state_t prof_logging_state = prof_logging_state_stopped;
-
-#ifdef JEMALLOC_JET
-static bool prof_log_dummy = false;
-#endif
-
-/* Incremented for every log file that is output. */
-static uint64_t log_seq = 0;
-static char log_filename[
-    /* Minimize memory bloat for non-prof builds. */
-#ifdef JEMALLOC_PROF
-    PATH_MAX +
-#endif
-    1];
-
-/* Timestamp for most recent call to log_start(). */
-static nstime_t log_start_timestamp = NSTIME_ZERO_INITIALIZER;
-
-/* Increment these when adding to the log_bt and log_thr linked lists. */
-static size_t log_bt_index = 0;
-static size_t log_thr_index = 0;
-
-/* Linked list node definitions. These are only used in this file. */
-typedef struct prof_bt_node_s prof_bt_node_t;
-
-struct prof_bt_node_s {
-	prof_bt_node_t *next;
-	size_t index;
-	prof_bt_t bt;
-	/* Variable size backtrace vector pointed to by bt. */
-	void *vec[1];
-};
-
-typedef struct prof_thr_node_s prof_thr_node_t;
-
-struct prof_thr_node_s {
-	prof_thr_node_t *next;
-	size_t index;
-	uint64_t thr_uid;
-	/* Variable size based on thr_name_sz. */
-	char name[1];
-};
-
-typedef struct prof_alloc_node_s prof_alloc_node_t;
-
-/* This is output when logging sampled allocations. */
-struct prof_alloc_node_s {
-	prof_alloc_node_t *next;
-	/* Indices into an array of thread data. */
-	size_t alloc_thr_ind;
-	size_t free_thr_ind;
-
-	/* Indices into an array of backtraces. */
-	size_t alloc_bt_ind;
-	size_t free_bt_ind;
-
-	uint64_t alloc_time_ns;
-	uint64_t free_time_ns;
-
-	size_t usize;
-};
-
-/*
- * Created on the first call to prof_log_start and deleted on prof_log_stop.
- * These are the backtraces and threads that have already been logged by an
- * allocation.
- */
-static bool log_tables_initialized = false;
-static ckh_t log_bt_node_set;
-static ckh_t log_thr_node_set;
-
-/* Store linked lists for logged data. */
-static prof_bt_node_t *log_bt_first = NULL;
-static prof_bt_node_t *log_bt_last = NULL;
-static prof_thr_node_t *log_thr_first = NULL;
-static prof_thr_node_t *log_thr_last = NULL;
-static prof_alloc_node_t *log_alloc_first = NULL;
-static prof_alloc_node_t *log_alloc_last = NULL;
-
-/* Protects the prof_logging_state and any log_{...} variable. */
-static malloc_mutex_t log_mtx;
-
-/******************************************************************************/
-/*
- * Function prototypes for static functions that are referenced prior to
- * definition.
- */
-
-/* Hashtable functions for log_bt_node_set and log_thr_node_set. */
-static void prof_thr_node_hash(const void *key, size_t r_hash[2]);
-static bool prof_thr_node_keycomp(const void *k1, const void *k2);
-static void prof_bt_node_hash(const void *key, size_t r_hash[2]);
-static bool prof_bt_node_keycomp(const void *k1, const void *k2);
-
-/******************************************************************************/
-
-static size_t
-prof_log_bt_index(tsd_t *tsd, prof_bt_t *bt) {
-	assert(prof_logging_state == prof_logging_state_started);
-	malloc_mutex_assert_owner(tsd_tsdn(tsd), &log_mtx);
-
-	prof_bt_node_t dummy_node;
-	dummy_node.bt = *bt;
-	prof_bt_node_t *node;
-
-	/* See if this backtrace is already cached in the table. */
-	if (ckh_search(&log_bt_node_set, (void *)(&dummy_node),
-	    (void **)(&node), NULL)) {
-		size_t sz = offsetof(prof_bt_node_t, vec) +
-			        (bt->len * sizeof(void *));
-		prof_bt_node_t *new_node = (prof_bt_node_t *)
-		    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL,
-		    true, arena_get(TSDN_NULL, 0, true), true);
-		if (log_bt_first == NULL) {
-			log_bt_first = new_node;
-			log_bt_last = new_node;
-		} else {
-			log_bt_last->next = new_node;
-			log_bt_last = new_node;
-		}
-
-		new_node->next = NULL;
-		new_node->index = log_bt_index;
-		/*
-		 * Copy the backtrace: bt is inside a tdata or gctx, which
-		 * might die before prof_log_stop is called.
-		 */
-		new_node->bt.len = bt->len;
-		memcpy(new_node->vec, bt->vec, bt->len * sizeof(void *));
-		new_node->bt.vec = new_node->vec;
-
-		log_bt_index++;
-		ckh_insert(tsd, &log_bt_node_set, (void *)new_node, NULL);
-		return new_node->index;
-	} else {
-		return node->index;
-	}
-}
-static size_t
-prof_log_thr_index(tsd_t *tsd, uint64_t thr_uid, const char *name) {
-	assert(prof_logging_state == prof_logging_state_started);
-	malloc_mutex_assert_owner(tsd_tsdn(tsd), &log_mtx);
-
-	prof_thr_node_t dummy_node;
-	dummy_node.thr_uid = thr_uid;
-	prof_thr_node_t *node;
-
-	/* See if this thread is already cached in the table. */
-	if (ckh_search(&log_thr_node_set, (void *)(&dummy_node),
-	    (void **)(&node), NULL)) {
-		size_t sz = offsetof(prof_thr_node_t, name) + strlen(name) + 1;
-		prof_thr_node_t *new_node = (prof_thr_node_t *)
-		    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL,
-		    true, arena_get(TSDN_NULL, 0, true), true);
-		if (log_thr_first == NULL) {
-			log_thr_first = new_node;
-			log_thr_last = new_node;
-		} else {
-			log_thr_last->next = new_node;
-			log_thr_last = new_node;
-		}
-
-		new_node->next = NULL;
-		new_node->index = log_thr_index;
-		new_node->thr_uid = thr_uid;
-		strcpy(new_node->name, name);
-
-		log_thr_index++;
-		ckh_insert(tsd, &log_thr_node_set, (void *)new_node, NULL);
-		return new_node->index;
-	} else {
-		return node->index;
-	}
-}
-
-void
-prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx) {
-	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
-
-	prof_tdata_t *cons_tdata = prof_tdata_get(tsd, false);
-	if (cons_tdata == NULL) {
-		/*
-		 * We decide not to log these allocations. cons_tdata will be
-		 * NULL only when the current thread is in a weird state (e.g.
-		 * it's being destroyed).
-		 */
-		return;
-	}
-
-	malloc_mutex_lock(tsd_tsdn(tsd), &log_mtx);
-
-	if (prof_logging_state != prof_logging_state_started) {
-		goto label_done;
-	}
-
-	if (!log_tables_initialized) {
-		bool err1 = ckh_new(tsd, &log_bt_node_set, PROF_CKH_MINITEMS,
-				prof_bt_node_hash, prof_bt_node_keycomp);
-		bool err2 = ckh_new(tsd, &log_thr_node_set, PROF_CKH_MINITEMS,
-				prof_thr_node_hash, prof_thr_node_keycomp);
-		if (err1 || err2) {
-			goto label_done;
-		}
-		log_tables_initialized = true;
-	}
-
-	nstime_t alloc_time = prof_alloc_time_get(tsd_tsdn(tsd), ptr,
-			          (alloc_ctx_t *)NULL);
-	nstime_t free_time = NSTIME_ZERO_INITIALIZER;
-	nstime_update(&free_time);
-
-	size_t sz = sizeof(prof_alloc_node_t);
-	prof_alloc_node_t *new_node = (prof_alloc_node_t *)
-	    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL, true,
-	    arena_get(TSDN_NULL, 0, true), true);
-
-	const char *prod_thr_name = (tctx->tdata->thread_name == NULL)?
-				        "" : tctx->tdata->thread_name;
-	const char *cons_thr_name = prof_thread_name_get(tsd);
-
-	prof_bt_t bt;
-	/* Initialize the backtrace, using the buffer in tdata to store it. */
-	bt_init(&bt, cons_tdata->vec);
-	prof_backtrace(&bt);
-	prof_bt_t *cons_bt = &bt;
-
-	/* We haven't destroyed tctx yet, so gctx should be good to read. */
-	prof_bt_t *prod_bt = &tctx->gctx->bt;
-
-	new_node->next = NULL;
-	new_node->alloc_thr_ind = prof_log_thr_index(tsd, tctx->tdata->thr_uid,
-				      prod_thr_name);
-	new_node->free_thr_ind = prof_log_thr_index(tsd, cons_tdata->thr_uid,
-				     cons_thr_name);
-	new_node->alloc_bt_ind = prof_log_bt_index(tsd, prod_bt);
-	new_node->free_bt_ind = prof_log_bt_index(tsd, cons_bt);
-	new_node->alloc_time_ns = nstime_ns(&alloc_time);
-	new_node->free_time_ns = nstime_ns(&free_time);
-	new_node->usize = usize;
-
-	if (log_alloc_first == NULL) {
-		log_alloc_first = new_node;
-		log_alloc_last = new_node;
-	} else {
-		log_alloc_last->next = new_node;
-		log_alloc_last = new_node;
-	}
-
-label_done:
-	malloc_mutex_unlock(tsd_tsdn(tsd), &log_mtx);
-}
-
-static void
-prof_bt_node_hash(const void *key, size_t r_hash[2]) {
-	const prof_bt_node_t *bt_node = (prof_bt_node_t *)key;
-	prof_bt_hash((void *)(&bt_node->bt), r_hash);
-}
-
-static bool
-prof_bt_node_keycomp(const void *k1, const void *k2) {
-	const prof_bt_node_t *bt_node1 = (prof_bt_node_t *)k1;
-	const prof_bt_node_t *bt_node2 = (prof_bt_node_t *)k2;
-	return prof_bt_keycomp((void *)(&bt_node1->bt),
-	    (void *)(&bt_node2->bt));
-}
-
-static void
-prof_thr_node_hash(const void *key, size_t r_hash[2]) {
-	const prof_thr_node_t *thr_node = (prof_thr_node_t *)key;
-	hash(&thr_node->thr_uid, sizeof(uint64_t), 0x94122f35U, r_hash);
-}
-
-static bool
-prof_thr_node_keycomp(const void *k1, const void *k2) {
-	const prof_thr_node_t *thr_node1 = (prof_thr_node_t *)k1;
-	const prof_thr_node_t *thr_node2 = (prof_thr_node_t *)k2;
-	return thr_node1->thr_uid == thr_node2->thr_uid;
-}
-
-#ifdef JEMALLOC_JET
-size_t
-prof_log_bt_count(void) {
-	size_t cnt = 0;
-	prof_bt_node_t *node = log_bt_first;
-	while (node != NULL) {
-		cnt++;
-		node = node->next;
-	}
-	return cnt;
-}
-
-size_t
-prof_log_alloc_count(void) {
-	size_t cnt = 0;
-	prof_alloc_node_t *node = log_alloc_first;
-	while (node != NULL) {
-		cnt++;
-		node = node->next;
-	}
-	return cnt;
-}
-
-size_t
-prof_log_thr_count(void) {
-	size_t cnt = 0;
-	prof_thr_node_t *node = log_thr_first;
-	while (node != NULL) {
-		cnt++;
-		node = node->next;
-	}
-	return cnt;
-}
-
-bool
-prof_log_is_logging(void) {
-	return prof_logging_state == prof_logging_state_started;
-}
-
-bool
-prof_log_rep_check(void) {
-	if (prof_logging_state == prof_logging_state_stopped
-	    && log_tables_initialized) {
-		return true;
-	}
-
-	if (log_bt_last != NULL && log_bt_last->next != NULL) {
-		return true;
-	}
-	if (log_thr_last != NULL && log_thr_last->next != NULL) {
-		return true;
-	}
-	if (log_alloc_last != NULL && log_alloc_last->next != NULL) {
-		return true;
-	}
-
-	size_t bt_count = prof_log_bt_count();
-	size_t thr_count = prof_log_thr_count();
-	size_t alloc_count = prof_log_alloc_count();
-
-
-	if (prof_logging_state == prof_logging_state_stopped) {
-		if (bt_count != 0 || thr_count != 0 || alloc_count || 0) {
-			return true;
-		}
-	}
-
-	prof_alloc_node_t *node = log_alloc_first;
-	while (node != NULL) {
-		if (node->alloc_bt_ind >= bt_count) {
-			return true;
-		}
-		if (node->free_bt_ind >= bt_count) {
-			return true;
-		}
-		if (node->alloc_thr_ind >= thr_count) {
-			return true;
-		}
-		if (node->free_thr_ind >= thr_count) {
-			return true;
-		}
-		if (node->alloc_time_ns > node->free_time_ns) {
-			return true;
-		}
-		node = node->next;
-	}
-
-	return false;
-}
-
-void
-prof_log_dummy_set(bool new_value) {
-	prof_log_dummy = new_value;
-}
-#endif
-
-bool
-prof_log_start(tsdn_t *tsdn, const char *filename) {
-	if (!opt_prof || !prof_booted) {
-		return true;
-	}
-
-	bool ret = false;
-	size_t buf_size = PATH_MAX + 1;
-
-	malloc_mutex_lock(tsdn, &log_mtx);
-
-	if (prof_logging_state != prof_logging_state_stopped) {
-		ret = true;
-	} else if (filename == NULL) {
-		/* Make default name. */
-		malloc_snprintf(log_filename, buf_size, "%s.%d.%"FMTu64".json",
-		    opt_prof_prefix, prof_getpid(), log_seq);
-		log_seq++;
-		prof_logging_state = prof_logging_state_started;
-	} else if (strlen(filename) >= buf_size) {
-		ret = true;
-	} else {
-		strcpy(log_filename, filename);
-		prof_logging_state = prof_logging_state_started;
-	}
-
-	if (!ret) {
-		nstime_update(&log_start_timestamp);
-	}
-
-	malloc_mutex_unlock(tsdn, &log_mtx);
-
-	return ret;
-}
-
-/* Used as an atexit function to stop logging on exit. */
-static void
-prof_log_stop_final(void) {
-	tsd_t *tsd = tsd_fetch();
-	prof_log_stop(tsd_tsdn(tsd));
-}
-
-struct prof_emitter_cb_arg_s {
-	int fd;
-	ssize_t ret;
-};
-
-static void
-prof_emitter_write_cb(void *opaque, const char *to_write) {
-	struct prof_emitter_cb_arg_s *arg =
-	    (struct prof_emitter_cb_arg_s *)opaque;
-	size_t bytes = strlen(to_write);
-#ifdef JEMALLOC_JET
-	if (prof_log_dummy) {
-		return;
-	}
-#endif
-	arg->ret = write(arg->fd, (void *)to_write, bytes);
-}
-
-/*
- * prof_log_emit_{...} goes through the appropriate linked list, emitting each
- * node to the json and deallocating it.
- */
-static void
-prof_log_emit_threads(tsd_t *tsd, emitter_t *emitter) {
-	emitter_json_array_kv_begin(emitter, "threads");
-	prof_thr_node_t *thr_node = log_thr_first;
-	prof_thr_node_t *thr_old_node;
-	while (thr_node != NULL) {
-		emitter_json_object_begin(emitter);
-
-		emitter_json_kv(emitter, "thr_uid", emitter_type_uint64,
-		    &thr_node->thr_uid);
-
-		char *thr_name = thr_node->name;
-
-		emitter_json_kv(emitter, "thr_name", emitter_type_string,
-		    &thr_name);
-
-		emitter_json_object_end(emitter);
-		thr_old_node = thr_node;
-		thr_node = thr_node->next;
-		idalloc(tsd, thr_old_node);
-	}
-	emitter_json_array_end(emitter);
-}
-
-static void
-prof_log_emit_traces(tsd_t *tsd, emitter_t *emitter) {
-	emitter_json_array_kv_begin(emitter, "stack_traces");
-	prof_bt_node_t *bt_node = log_bt_first;
-	prof_bt_node_t *bt_old_node;
-	/*
-	 * Calculate how many hex digits we need: twice number of bytes, two for
-	 * "0x", and then one more for terminating '\0'.
-	 */
-	char buf[2 * sizeof(intptr_t) + 3];
-	size_t buf_sz = sizeof(buf);
-	while (bt_node != NULL) {
-		emitter_json_array_begin(emitter);
-		size_t i;
-		for (i = 0; i < bt_node->bt.len; i++) {
-			malloc_snprintf(buf, buf_sz, "%p", bt_node->bt.vec[i]);
-			char *trace_str = buf;
-			emitter_json_value(emitter, emitter_type_string,
-			    &trace_str);
-		}
-		emitter_json_array_end(emitter);
-
-		bt_old_node = bt_node;
-		bt_node = bt_node->next;
-		idalloc(tsd, bt_old_node);
-	}
-	emitter_json_array_end(emitter);
-}
-
-static void
-prof_log_emit_allocs(tsd_t *tsd, emitter_t *emitter) {
-	emitter_json_array_kv_begin(emitter, "allocations");
-	prof_alloc_node_t *alloc_node = log_alloc_first;
-	prof_alloc_node_t *alloc_old_node;
-	while (alloc_node != NULL) {
-		emitter_json_object_begin(emitter);
-
-		emitter_json_kv(emitter, "alloc_thread", emitter_type_size,
-		    &alloc_node->alloc_thr_ind);
-
-		emitter_json_kv(emitter, "free_thread", emitter_type_size,
-		    &alloc_node->free_thr_ind);
-
-		emitter_json_kv(emitter, "alloc_trace", emitter_type_size,
-		    &alloc_node->alloc_bt_ind);
-
-		emitter_json_kv(emitter, "free_trace", emitter_type_size,
-		    &alloc_node->free_bt_ind);
-
-		emitter_json_kv(emitter, "alloc_timestamp",
-		    emitter_type_uint64, &alloc_node->alloc_time_ns);
-
-		emitter_json_kv(emitter, "free_timestamp", emitter_type_uint64,
-		    &alloc_node->free_time_ns);
-
-		emitter_json_kv(emitter, "usize", emitter_type_uint64,
-		    &alloc_node->usize);
-
-		emitter_json_object_end(emitter);
-
-		alloc_old_node = alloc_node;
-		alloc_node = alloc_node->next;
-		idalloc(tsd, alloc_old_node);
-	}
-	emitter_json_array_end(emitter);
-}
-
-static void
-prof_log_emit_metadata(emitter_t *emitter) {
-	emitter_json_object_kv_begin(emitter, "info");
-
-	nstime_t now = NSTIME_ZERO_INITIALIZER;
-
-	nstime_update(&now);
-	uint64_t ns = nstime_ns(&now) - nstime_ns(&log_start_timestamp);
-	emitter_json_kv(emitter, "duration", emitter_type_uint64, &ns);
-
-	char *vers = JEMALLOC_VERSION;
-	emitter_json_kv(emitter, "version",
-	    emitter_type_string, &vers);
-
-	emitter_json_kv(emitter, "lg_sample_rate",
-	    emitter_type_int, &lg_prof_sample);
-
-	int pid = prof_getpid();
-	emitter_json_kv(emitter, "pid", emitter_type_int, &pid);
-
-	emitter_json_object_end(emitter);
-}
-
-
-bool
-prof_log_stop(tsdn_t *tsdn) {
-	if (!opt_prof || !prof_booted) {
-		return true;
-	}
-
-	tsd_t *tsd = tsdn_tsd(tsdn);
-	malloc_mutex_lock(tsdn, &log_mtx);
-
-	if (prof_logging_state != prof_logging_state_started) {
-		malloc_mutex_unlock(tsdn, &log_mtx);
-		return true;
-	}
-
-	/*
-	 * Set the state to dumping. We'll set it to stopped when we're done.
-	 * Since other threads won't be able to start/stop/log when the state is
-	 * dumping, we don't have to hold the lock during the whole method.
-	 */
-	prof_logging_state = prof_logging_state_dumping;
-	malloc_mutex_unlock(tsdn, &log_mtx);
-
-
-	emitter_t emitter;
-
-	/* Create a file. */
-
-	int fd;
-#ifdef JEMALLOC_JET
-	if (prof_log_dummy) {
-		fd = 0;
-	} else {
-		fd = creat(log_filename, 0644);
-	}
-#else
-	fd = creat(log_filename, 0644);
-#endif
-
-	if (fd == -1) {
-		malloc_printf("<jemalloc>: creat() for log file \"%s\" "
-			      " failed with %d\n", log_filename, errno);
-		if (opt_abort) {
-			abort();
-		}
-		return true;
-	}
-
-	/* Emit to json. */
-	struct prof_emitter_cb_arg_s arg;
-	arg.fd = fd;
-	emitter_init(&emitter, emitter_output_json, &prof_emitter_write_cb,
-	    (void *)(&arg));
-
-	emitter_json_object_begin(&emitter);
-	prof_log_emit_metadata(&emitter);
-	prof_log_emit_threads(tsd, &emitter);
-	prof_log_emit_traces(tsd, &emitter);
-	prof_log_emit_allocs(tsd, &emitter);
-	emitter_json_object_end(&emitter);
-
-	/* Reset global state. */
-	if (log_tables_initialized) {
-		ckh_delete(tsd, &log_bt_node_set);
-		ckh_delete(tsd, &log_thr_node_set);
-	}
-	log_tables_initialized = false;
-	log_bt_index = 0;
-	log_thr_index = 0;
-	log_bt_first = NULL;
-	log_bt_last = NULL;
-	log_thr_first = NULL;
-	log_thr_last = NULL;
-	log_alloc_first = NULL;
-	log_alloc_last = NULL;
-
-	malloc_mutex_lock(tsdn, &log_mtx);
-	prof_logging_state = prof_logging_state_stopped;
-	malloc_mutex_unlock(tsdn, &log_mtx);
-
-#ifdef JEMALLOC_JET
-	if (prof_log_dummy) {
-		return false;
-	}
-#endif
-	return close(fd);
-}
-
-bool prof_log_init(tsd_t *tsd) {
-	if (opt_prof_log) {
-		prof_log_start(tsd_tsdn(tsd), NULL);
-	}
-
-	if (atexit(prof_log_stop_final) != 0) {
-		malloc_write("<jemalloc>: Error in atexit() "
-			     "for logging\n");
-		if (opt_abort) {
-			abort();
-		}
-	}
-
-	if (malloc_mutex_init(&log_mtx, "prof_log",
-	    WITNESS_RANK_PROF_LOG, malloc_mutex_rank_exclusive)) {
-		return true;
-	}
-
-	if (ckh_new(tsd, &log_bt_node_set, PROF_CKH_MINITEMS,
-	    prof_bt_node_hash, prof_bt_node_keycomp)) {
-		return true;
-	}
-
-	if (ckh_new(tsd, &log_thr_node_set, PROF_CKH_MINITEMS,
-	    prof_thr_node_hash, prof_thr_node_keycomp)) {
-		return true;
-	}
-
-	log_tables_initialized = true;
-	return false;
-}
-
-/******************************************************************************/

From c9cdc1b27f8aa9c1e81e733e60d470c04be960b3 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 29 Jul 2019 11:43:08 -0700
Subject: [PATCH 1329/2608] Limit to exact fit on Windows with retain off.

W/o retain, split and merge are disallowed on Windows.  Avoid doing first-fit
which needs splitting almost always.  Instead, try exact fit only and bail out
early.
---
 src/extent.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/extent.c b/src/extent.c
index a2dbde12..9237f903 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -445,6 +445,16 @@ extents_first_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 	extent_t *ret = NULL;
 
 	pszind_t pind = sz_psz2ind(extent_size_quantize_ceil(size));
+
+	if (!maps_coalesce && !opt_retain) {
+		/*
+		 * No split / merge allowed (Windows w/o retain). Try exact fit
+		 * only.
+		 */
+		return extent_heap_empty(&extents->heaps[pind]) ? NULL :
+		    extent_heap_first(&extents->heaps[pind]);
+	}
+
 	for (pszind_t i = (pszind_t)bitmap_ffu(extents->bitmap,
 	    &extents_bitmap_info, (size_t)pind);
 	    i < SC_NPSIZES + 1;

From 9344d25488b626739c9080eb471d1bd15eeb046b Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 26 Jul 2019 17:00:24 -0700
Subject: [PATCH 1330/2608] Workaround to address g++ unused variable warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

g++ 5.5.0+ complained `parameter ‘expected’ set but not used
[-Werror=unused-but-set-parameter]` (despite that `expected` is in
fact used).
---
 include/jemalloc/internal/atomic_gcc_atomic.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/atomic_gcc_atomic.h b/include/jemalloc/internal/atomic_gcc_atomic.h
index 6b73a14f..471515e8 100644
--- a/include/jemalloc/internal/atomic_gcc_atomic.h
+++ b/include/jemalloc/internal/atomic_gcc_atomic.h
@@ -67,7 +67,8 @@ atomic_exchange_##short_type(atomic_##short_type##_t *a, type val,	\
 									\
 ATOMIC_INLINE bool							\
 atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a,	\
-    type *expected, type desired, atomic_memory_order_t success_mo,	\
+    UNUSED type *expected, type desired,				\
+    atomic_memory_order_t success_mo,					\
     atomic_memory_order_t failure_mo) {					\
 	return __atomic_compare_exchange(&a->repr, expected, &desired,	\
 	    true, atomic_enum_to_builtin(success_mo),			\
@@ -76,7 +77,8 @@ atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a,	\
 									\
 ATOMIC_INLINE bool							\
 atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a,	\
-    type *expected, type desired, atomic_memory_order_t success_mo,	\
+    UNUSED type *expected, type desired,				\
+    atomic_memory_order_t success_mo,					\
     atomic_memory_order_t failure_mo) {					\
 	return __atomic_compare_exchange(&a->repr, expected, &desired,	\
 	    false,							\

From 82b8aaaeb68ccb65ca52532f4806a43fbdb26b7a Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 30 Jul 2019 11:26:13 -0700
Subject: [PATCH 1331/2608] Quick fix for prof log printing

The emitter APIs used were incorrect, a side effect of which was
extra lines being printed.
---
 src/prof.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/prof.c b/src/prof.c
index 4ebe2799..fcf9c6f6 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -2744,12 +2744,12 @@ prof_log_stop(tsdn_t *tsdn) {
 	emitter_init(&emitter, emitter_output_json, &prof_emitter_write_cb,
 	    (void *)(&arg));
 
-	emitter_json_object_begin(&emitter);
+	emitter_begin(&emitter);
 	prof_log_emit_metadata(&emitter);
 	prof_log_emit_threads(tsd, &emitter);
 	prof_log_emit_traces(tsd, &emitter);
 	prof_log_emit_allocs(tsd, &emitter);
-	emitter_json_object_end(&emitter);
+	emitter_end(&emitter);
 
 	/* Reset global state. */
 	if (log_tables_initialized) {

From 8a94ac25d597e439b05b38c013e4cb2d1169c681 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 1 Aug 2019 16:16:44 -0700
Subject: [PATCH 1332/2608] Sanity check on prof dump buffer size.

---
 src/prof.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/prof.c b/src/prof.c
index fcf9c6f6..13334cb4 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -1303,6 +1303,7 @@ prof_dump_write(bool propagate_err, const char *s) {
 		prof_dump_buf_end += n;
 		i += n;
 	}
+	assert(i == slen);
 
 	return false;
 }

From 0cfa36a58a91b30996b30c948d67e1daf184c663 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 29 Jul 2019 11:30:30 -0700
Subject: [PATCH 1333/2608] Update Changelog for 5.2.1.

---
 ChangeLog | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index 7c73a8f2..e55813b7 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,39 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
+* 5.2.1 (August 5, 2019)
+
+  This release is primarily about Windows.  A critical virtual memory leak is
+  resolved on all Windows platforms.  The regression was present in all releases
+  since 5.0.0.
+
+  Bug fixes:
+  - Fix a severe virtual memory leak on Windows.  This regression was first
+    released in 5.0.0.  (@Ignition, @j0t, @frederik-h, @davidtgoldblatt,
+    @interwq)
+  - Fix size 0 handling in posix_memalign().  This regression was first released
+    in 5.2.0.  (@interwq)
+  - Fix the prof_log unit test which may observe unexpected backtraces from
+    compiler optimizations.  The test was first added in 5.2.0.  (@marxin,
+    @gnzlbg, @interwq)
+  - Fix the declaration of the extent_avail tree.  This regression was first
+    released in 5.1.0.  (@zoulasc)
+  - Fix an incorrect reference in jeprof.  This functionality was first released
+    in 3.0.0.  (@prehistoric-penguin)
+  - Fix an assertion on the deallocation fast-path.  This regression was first
+    released in 5.2.0.  (@yinan1048576)
+  - Fix the TLS_MODEL attribute in headers.  This regression was first released
+    in 5.0.0.  (@zoulasc, @interwq)
+
+  Optimizations and refactors:
+  - Implement opt.retain on Windows and enable by default on 64-bit.  (@interwq,
+    @davidtgoldblatt)
+  - Optimize away a branch on the operator delete[] path.  (@mgrice)
+  - Add format annotation to the format generator function.  (@zoulasc)
+  - Refactor and improve the size class header generation.  (@yinan1048576)
+  - Remove best fit.  (@djwatson)
+  - Avoid blocking on background thread locks for stats.  (@oranagra, @interwq)
+
 * 5.2.0 (April 2, 2019)
 
   This release includes a few notable improvements, which are summarized below:

From 56c8ecffc1f84f630e10f775bc29fcf4c743a3c9 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 6 Jun 2019 09:22:10 -0700
Subject: [PATCH 1334/2608] Correct tsd layout graph

Augmented the tsd layout graph so that the two recently added fields,
`offset_state` and `bytes_until_sample`, are properly reflected.
As is shown, the cache footprint is 16 bytes larger than before.
---
 include/jemalloc/internal/tsd.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 9ba26004..18b2476b 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -20,6 +20,7 @@
  * e: tcache_enabled
  * m: thread_allocated (config_stats)
  * f: thread_deallocated (config_stats)
+ * b: bytes_until_sample (config_prof)
  * p: prof_tdata (config_prof)
  * c: rtree_ctx (rtree cache accessed on deallocation)
  * t: tcache
@@ -27,6 +28,7 @@
  * d: arenas_tdata_bypass
  * r: reentrancy_level
  * x: narenas_tdata
+ * v: offset_state
  * i: iarena
  * a: arena
  * o: arenas_tdata
@@ -35,11 +37,13 @@
  * Use a compact layout to reduce cache footprint.
  * +--- 64-bit and 64B cacheline; 1B each letter; First byte on the left. ---+
  * |----------------------------  1st cacheline  ----------------------------|
- * | sedrxxxx mmmmmmmm ffffffff pppppppp [c * 32  ........ ........ .......] |
+ * | sedrxxxx vvvvvvvv mmmmmmmm ffffffff bbbbbbbb pppppppp [c * 16  .......] |
  * |----------------------------  2nd cacheline  ----------------------------|
  * | [c * 64  ........ ........ ........ ........ ........ ........ .......] |
  * |----------------------------  3nd cacheline  ----------------------------|
- * | [c * 32  ........ ........ .......] iiiiiiii aaaaaaaa oooooooo [t...... |
+ * | [c * 48  ........ ........ ........ ........ .......] iiiiiiii aaaaaaaa |
+ * +----------------------------  4th cacheline  ----------------------------+
+ * | oooooooo [t...... ........ ........ ........ ........ ........ ........ |
  * +-------------------------------------------------------------------------+
  * Note: the entire tcache is embedded into TSD and spans multiple cachelines.
  *

From 56126d0d2d0730acde6416cf02efdb9ed19d578b Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 12 Jul 2019 16:37:37 -0700
Subject: [PATCH 1335/2608] Refactor prof log

Prof logging is conceptually seperate from core profiling, so
split it out as a module of its own.  There are a few internal
functions that had to be exposed but I think it is a fair trade-off.
---
 Makefile.in                                   |   1 +
 include/jemalloc/internal/prof_externs.h      |   8 +
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |   3 +
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |   3 +
 src/prof.c                                    | 682 +----------------
 src/prof_log.c                                | 698 ++++++++++++++++++
 8 files changed, 720 insertions(+), 677 deletions(-)
 create mode 100644 src/prof_log.c

diff --git a/Makefile.in b/Makefile.in
index 7128b007..1cd973d7 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -117,6 +117,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/pages.c \
 	$(srcroot)src/prng.c \
 	$(srcroot)src/prof.c \
+	$(srcroot)src/prof_log.c \
 	$(srcroot)src/rtree.c \
 	$(srcroot)src/safety_check.c \
 	$(srcroot)src/stats.c \
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 094f3e17..e94ac3b2 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -43,6 +43,8 @@ extern uint64_t	prof_interval;
  */
 extern size_t	lg_prof_sample;
 
+extern bool	prof_booted;
+
 void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
 void prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
     prof_tctx_t *tctx);
@@ -64,10 +66,14 @@ extern prof_dump_header_t *JET_MUTABLE prof_dump_header;
 void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
     uint64_t *accumbytes);
 #endif
+int prof_getpid(void);
 bool prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum);
 void prof_idump(tsdn_t *tsdn);
 bool prof_mdump(tsd_t *tsd, const char *filename);
 void prof_gdump(tsdn_t *tsdn);
+
+void prof_bt_hash(const void *key, size_t r_hash[2]);
+bool prof_bt_keycomp(const void *k1, const void *k2);
 prof_tdata_t *prof_tdata_init(tsd_t *tsd);
 prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
 void prof_reset(tsd_t *tsd, size_t lg_sample);
@@ -91,8 +97,10 @@ void prof_postfork_parent(tsdn_t *tsdn);
 void prof_postfork_child(tsdn_t *tsdn);
 void prof_sample_threshold_update(prof_tdata_t *tdata);
 
+void prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx);
 bool prof_log_start(tsdn_t *tsdn, const char *filename);
 bool prof_log_stop(tsdn_t *tsdn);
+bool prof_log_init(tsd_t *tsdn);
 #ifdef JEMALLOC_JET
 size_t prof_log_bt_count(void);
 size_t prof_log_alloc_count(void);
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 228e8be0..d93d9099 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -58,6 +58,7 @@
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
+    <ClCompile Include="..\..\..\..\src\prof_log.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
     <ClCompile Include="..\..\..\..\src\stats.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index d839515b..7b09d4e6 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -67,6 +67,9 @@
     <ClCompile Include="..\..\..\..\src\prof.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_log.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\rtree.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index edcceede..28bd3cd6 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -58,6 +58,7 @@
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
+    <ClCompile Include="..\..\..\..\src\prof_log.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
     <ClCompile Include="..\..\..\..\src\stats.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 6df72601..a66c209b 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -67,6 +67,9 @@
     <ClCompile Include="..\..\..\..\src\prof.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_log.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\rtree.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/prof.c b/src/prof.c
index 13334cb4..7efa20db 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -7,7 +7,6 @@
 #include "jemalloc/internal/hash.h"
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/emitter.h"
 
 /******************************************************************************/
 
@@ -39,7 +38,6 @@ bool		opt_prof_gdump = false;
 bool		opt_prof_final = false;
 bool		opt_prof_leak = false;
 bool		opt_prof_accum = false;
-bool		opt_prof_log = false;
 char		opt_prof_prefix[
     /* Minimize memory bloat for non-prof builds. */
 #ifdef JEMALLOC_PROF
@@ -72,100 +70,6 @@ uint64_t	prof_interval = 0;
 
 size_t		lg_prof_sample;
 
-typedef enum prof_logging_state_e prof_logging_state_t;
-enum prof_logging_state_e {
-	prof_logging_state_stopped,
-	prof_logging_state_started,
-	prof_logging_state_dumping
-};
-
-/*
- * - stopped: log_start never called, or previous log_stop has completed.
- * - started: log_start called, log_stop not called yet. Allocations are logged.
- * - dumping: log_stop called but not finished; samples are not logged anymore.
- */
-prof_logging_state_t prof_logging_state = prof_logging_state_stopped;
-
-#ifdef JEMALLOC_JET
-static bool prof_log_dummy = false;
-#endif
-
-/* Incremented for every log file that is output. */
-static uint64_t log_seq = 0;
-static char log_filename[
-    /* Minimize memory bloat for non-prof builds. */
-#ifdef JEMALLOC_PROF
-    PATH_MAX +
-#endif
-    1];
-
-/* Timestamp for most recent call to log_start(). */
-static nstime_t log_start_timestamp = NSTIME_ZERO_INITIALIZER;
-
-/* Increment these when adding to the log_bt and log_thr linked lists. */
-static size_t log_bt_index = 0;
-static size_t log_thr_index = 0;
-
-/* Linked list node definitions. These are only used in prof.c. */
-typedef struct prof_bt_node_s prof_bt_node_t;
-
-struct prof_bt_node_s {
-	prof_bt_node_t *next;
-	size_t index;
-	prof_bt_t bt;
-	/* Variable size backtrace vector pointed to by bt. */
-	void *vec[1];
-};
-
-typedef struct prof_thr_node_s prof_thr_node_t;
-
-struct prof_thr_node_s {
-	prof_thr_node_t *next;
-	size_t index;
-	uint64_t thr_uid;
-	/* Variable size based on thr_name_sz. */
-	char name[1];
-};
-
-typedef struct prof_alloc_node_s prof_alloc_node_t;
-
-/* This is output when logging sampled allocations. */
-struct prof_alloc_node_s {
-	prof_alloc_node_t *next;
-	/* Indices into an array of thread data. */
-	size_t alloc_thr_ind;
-	size_t free_thr_ind;
-
-	/* Indices into an array of backtraces. */
-	size_t alloc_bt_ind;
-	size_t free_bt_ind;
-
-	uint64_t alloc_time_ns;
-	uint64_t free_time_ns;
-
-	size_t usize;
-};
-
-/*
- * Created on the first call to prof_log_start and deleted on prof_log_stop.
- * These are the backtraces and threads that have already been logged by an
- * allocation.
- */
-static bool log_tables_initialized = false;
-static ckh_t log_bt_node_set;
-static ckh_t log_thr_node_set;
-
-/* Store linked lists for logged data. */
-static prof_bt_node_t *log_bt_first = NULL;
-static prof_bt_node_t *log_bt_last = NULL;
-static prof_thr_node_t *log_thr_first = NULL;
-static prof_thr_node_t *log_thr_last = NULL;
-static prof_alloc_node_t *log_alloc_first = NULL;
-static prof_alloc_node_t *log_alloc_last = NULL;
-
-/* Protects the prof_logging_state and any log_{...} variable. */
-static malloc_mutex_t log_mtx;
-
 /*
  * Table of mutexes that are shared among gctx's.  These are leaf locks, so
  * there is no problem with using them for more than one gctx at the same time.
@@ -225,7 +129,7 @@ static size_t		prof_dump_buf_end;
 static int		prof_dump_fd;
 
 /* Do not dump any profiles until bootstrapping is complete. */
-static bool		prof_booted = false;
+bool			prof_booted = false;
 
 /******************************************************************************/
 /*
@@ -241,12 +145,6 @@ static void	prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata,
     bool even_if_attached);
 static char	*prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name);
 
-/* Hashtable functions for log_bt_node_set and log_thr_node_set. */
-static void prof_thr_node_hash(const void *key, size_t r_hash[2]);
-static bool prof_thr_node_keycomp(const void *k1, const void *k2);
-static void prof_bt_node_hash(const void *key, size_t r_hash[2]);
-static bool prof_bt_node_keycomp(const void *k1, const void *k2);
-
 /******************************************************************************/
 /* Red-black trees. */
 
@@ -361,162 +259,6 @@ prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
 	malloc_mutex_unlock(tsdn, tctx->tdata->lock);
 }
 
-static size_t
-prof_log_bt_index(tsd_t *tsd, prof_bt_t *bt) {
-	assert(prof_logging_state == prof_logging_state_started);
-	malloc_mutex_assert_owner(tsd_tsdn(tsd), &log_mtx);
-
-	prof_bt_node_t dummy_node;
-	dummy_node.bt = *bt;
-	prof_bt_node_t *node;
-
-	/* See if this backtrace is already cached in the table. */
-	if (ckh_search(&log_bt_node_set, (void *)(&dummy_node),
-	    (void **)(&node), NULL)) {
-		size_t sz = offsetof(prof_bt_node_t, vec) +
-			        (bt->len * sizeof(void *));
-		prof_bt_node_t *new_node = (prof_bt_node_t *)
-		    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL,
-		    true, arena_get(TSDN_NULL, 0, true), true);
-		if (log_bt_first == NULL) {
-			log_bt_first = new_node;
-			log_bt_last = new_node;
-		} else {
-			log_bt_last->next = new_node;
-			log_bt_last = new_node;
-		}
-
-		new_node->next = NULL;
-		new_node->index = log_bt_index;
-		/*
-		 * Copy the backtrace: bt is inside a tdata or gctx, which
-		 * might die before prof_log_stop is called.
-		 */
-		new_node->bt.len = bt->len;
-		memcpy(new_node->vec, bt->vec, bt->len * sizeof(void *));
-		new_node->bt.vec = new_node->vec;
-
-		log_bt_index++;
-		ckh_insert(tsd, &log_bt_node_set, (void *)new_node, NULL);
-		return new_node->index;
-	} else {
-		return node->index;
-	}
-}
-static size_t
-prof_log_thr_index(tsd_t *tsd, uint64_t thr_uid, const char *name) {
-	assert(prof_logging_state == prof_logging_state_started);
-	malloc_mutex_assert_owner(tsd_tsdn(tsd), &log_mtx);
-
-	prof_thr_node_t dummy_node;
-	dummy_node.thr_uid = thr_uid;
-	prof_thr_node_t *node;
-
-	/* See if this thread is already cached in the table. */
-	if (ckh_search(&log_thr_node_set, (void *)(&dummy_node),
-	    (void **)(&node), NULL)) {
-		size_t sz = offsetof(prof_thr_node_t, name) + strlen(name) + 1;
-		prof_thr_node_t *new_node = (prof_thr_node_t *)
-		    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL,
-		    true, arena_get(TSDN_NULL, 0, true), true);
-		if (log_thr_first == NULL) {
-			log_thr_first = new_node;
-			log_thr_last = new_node;
-		} else {
-			log_thr_last->next = new_node;
-			log_thr_last = new_node;
-		}
-
-		new_node->next = NULL;
-		new_node->index = log_thr_index;
-		new_node->thr_uid = thr_uid;
-		strcpy(new_node->name, name);
-
-		log_thr_index++;
-		ckh_insert(tsd, &log_thr_node_set, (void *)new_node, NULL);
-		return new_node->index;
-	} else {
-		return node->index;
-	}
-}
-
-static void
-prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx) {
-	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
-
-	prof_tdata_t *cons_tdata = prof_tdata_get(tsd, false);
-	if (cons_tdata == NULL) {
-		/*
-		 * We decide not to log these allocations. cons_tdata will be
-		 * NULL only when the current thread is in a weird state (e.g.
-		 * it's being destroyed).
-		 */
-		return;
-	}
-
-	malloc_mutex_lock(tsd_tsdn(tsd), &log_mtx);
-
-	if (prof_logging_state != prof_logging_state_started) {
-		goto label_done;
-	}
-
-	if (!log_tables_initialized) {
-		bool err1 = ckh_new(tsd, &log_bt_node_set, PROF_CKH_MINITEMS,
-				prof_bt_node_hash, prof_bt_node_keycomp);
-		bool err2 = ckh_new(tsd, &log_thr_node_set, PROF_CKH_MINITEMS,
-				prof_thr_node_hash, prof_thr_node_keycomp);
-		if (err1 || err2) {
-			goto label_done;
-		}
-		log_tables_initialized = true;
-	}
-
-	nstime_t alloc_time = prof_alloc_time_get(tsd_tsdn(tsd), ptr,
-			          (alloc_ctx_t *)NULL);
-	nstime_t free_time = NSTIME_ZERO_INITIALIZER;
-	nstime_update(&free_time);
-
-	size_t sz = sizeof(prof_alloc_node_t);
-	prof_alloc_node_t *new_node = (prof_alloc_node_t *)
-	    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL, true,
-	    arena_get(TSDN_NULL, 0, true), true);
-
-	const char *prod_thr_name = (tctx->tdata->thread_name == NULL)?
-				        "" : tctx->tdata->thread_name;
-	const char *cons_thr_name = prof_thread_name_get(tsd);
-
-	prof_bt_t bt;
-	/* Initialize the backtrace, using the buffer in tdata to store it. */
-	bt_init(&bt, cons_tdata->vec);
-	prof_backtrace(&bt);
-	prof_bt_t *cons_bt = &bt;
-
-	/* We haven't destroyed tctx yet, so gctx should be good to read. */
-	prof_bt_t *prod_bt = &tctx->gctx->bt;
-
-	new_node->next = NULL;
-	new_node->alloc_thr_ind = prof_log_thr_index(tsd, tctx->tdata->thr_uid,
-				      prod_thr_name);
-	new_node->free_thr_ind = prof_log_thr_index(tsd, cons_tdata->thr_uid,
-				     cons_thr_name);
-	new_node->alloc_bt_ind = prof_log_bt_index(tsd, prod_bt);
-	new_node->free_bt_ind = prof_log_bt_index(tsd, cons_bt);
-	new_node->alloc_time_ns = nstime_ns(&alloc_time);
-	new_node->free_time_ns = nstime_ns(&free_time);
-	new_node->usize = usize;
-
-	if (log_alloc_first == NULL) {
-		log_alloc_first = new_node;
-		log_alloc_last = new_node;
-	} else {
-		log_alloc_last->next = new_node;
-		log_alloc_last = new_node;
-	}
-
-label_done:
-	malloc_mutex_unlock(tsd_tsdn(tsd), &log_mtx);
-}
-
 void
 prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize,
     prof_tctx_t *tctx) {
@@ -1694,7 +1436,7 @@ prof_open_maps(const char *format, ...) {
 }
 #endif
 
-static int
+int
 prof_getpid(void) {
 #ifdef _WIN32
 	return GetCurrentProcessId();
@@ -2136,7 +1878,7 @@ prof_gdump(tsdn_t *tsdn) {
 	}
 }
 
-static void
+void
 prof_bt_hash(const void *key, size_t r_hash[2]) {
 	prof_bt_t *bt = (prof_bt_t *)key;
 
@@ -2145,7 +1887,7 @@ prof_bt_hash(const void *key, size_t r_hash[2]) {
 	hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash);
 }
 
-static bool
+bool
 prof_bt_keycomp(const void *k1, const void *k2) {
 	const prof_bt_t *bt1 = (prof_bt_t *)k1;
 	const prof_bt_t *bt2 = (prof_bt_t *)k2;
@@ -2158,33 +1900,6 @@ prof_bt_keycomp(const void *k1, const void *k2) {
 	return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
 }
 
-static void
-prof_bt_node_hash(const void *key, size_t r_hash[2]) {
-	const prof_bt_node_t *bt_node = (prof_bt_node_t *)key;
-	prof_bt_hash((void *)(&bt_node->bt), r_hash);
-}
-
-static bool
-prof_bt_node_keycomp(const void *k1, const void *k2) {
-	const prof_bt_node_t *bt_node1 = (prof_bt_node_t *)k1;
-	const prof_bt_node_t *bt_node2 = (prof_bt_node_t *)k2;
-	return prof_bt_keycomp((void *)(&bt_node1->bt),
-	    (void *)(&bt_node2->bt));
-}
-
-static void
-prof_thr_node_hash(const void *key, size_t r_hash[2]) {
-	const prof_thr_node_t *thr_node = (prof_thr_node_t *)key;
-	hash(&thr_node->thr_uid, sizeof(uint64_t), 0x94122f35U, r_hash);
-}
-
-static bool
-prof_thr_node_keycomp(const void *k1, const void *k2) {
-	const prof_thr_node_t *thr_node1 = (prof_thr_node_t *)k1;
-	const prof_thr_node_t *thr_node2 = (prof_thr_node_t *)k2;
-	return thr_node1->thr_uid == thr_node2->thr_uid;
-}
-
 static uint64_t
 prof_thr_uid_alloc(tsdn_t *tsdn) {
 	uint64_t thr_uid;
@@ -2417,368 +2132,6 @@ prof_active_set(tsdn_t *tsdn, bool active) {
 	return prof_active_old;
 }
 
-#ifdef JEMALLOC_JET
-size_t
-prof_log_bt_count(void) {
-	size_t cnt = 0;
-	prof_bt_node_t *node = log_bt_first;
-	while (node != NULL) {
-		cnt++;
-		node = node->next;
-	}
-	return cnt;
-}
-
-size_t
-prof_log_alloc_count(void) {
-	size_t cnt = 0;
-	prof_alloc_node_t *node = log_alloc_first;
-	while (node != NULL) {
-		cnt++;
-		node = node->next;
-	}
-	return cnt;
-}
-
-size_t
-prof_log_thr_count(void) {
-	size_t cnt = 0;
-	prof_thr_node_t *node = log_thr_first;
-	while (node != NULL) {
-		cnt++;
-		node = node->next;
-	}
-	return cnt;
-}
-
-bool
-prof_log_is_logging(void) {
-	return prof_logging_state == prof_logging_state_started;
-}
-
-bool
-prof_log_rep_check(void) {
-	if (prof_logging_state == prof_logging_state_stopped
-	    && log_tables_initialized) {
-		return true;
-	}
-
-	if (log_bt_last != NULL && log_bt_last->next != NULL) {
-		return true;
-	}
-	if (log_thr_last != NULL && log_thr_last->next != NULL) {
-		return true;
-	}
-	if (log_alloc_last != NULL && log_alloc_last->next != NULL) {
-		return true;
-	}
-
-	size_t bt_count = prof_log_bt_count();
-	size_t thr_count = prof_log_thr_count();
-	size_t alloc_count = prof_log_alloc_count();
-
-
-	if (prof_logging_state == prof_logging_state_stopped) {
-		if (bt_count != 0 || thr_count != 0 || alloc_count || 0) {
-			return true;
-		}
-	}
-
-	prof_alloc_node_t *node = log_alloc_first;
-	while (node != NULL) {
-		if (node->alloc_bt_ind >= bt_count) {
-			return true;
-		}
-		if (node->free_bt_ind >= bt_count) {
-			return true;
-		}
-		if (node->alloc_thr_ind >= thr_count) {
-			return true;
-		}
-		if (node->free_thr_ind >= thr_count) {
-			return true;
-		}
-		if (node->alloc_time_ns > node->free_time_ns) {
-			return true;
-		}
-		node = node->next;
-	}
-
-	return false;
-}
-
-void
-prof_log_dummy_set(bool new_value) {
-	prof_log_dummy = new_value;
-}
-#endif
-
-bool
-prof_log_start(tsdn_t *tsdn, const char *filename) {
-	if (!opt_prof || !prof_booted) {
-		return true;
-	}
-
-	bool ret = false;
-	size_t buf_size = PATH_MAX + 1;
-
-	malloc_mutex_lock(tsdn, &log_mtx);
-
-	if (prof_logging_state != prof_logging_state_stopped) {
-		ret = true;
-	} else if (filename == NULL) {
-		/* Make default name. */
-		malloc_snprintf(log_filename, buf_size, "%s.%d.%"FMTu64".json",
-		    opt_prof_prefix, prof_getpid(), log_seq);
-		log_seq++;
-		prof_logging_state = prof_logging_state_started;
-	} else if (strlen(filename) >= buf_size) {
-		ret = true;
-	} else {
-		strcpy(log_filename, filename);
-		prof_logging_state = prof_logging_state_started;
-	}
-
-	if (!ret) {
-		nstime_update(&log_start_timestamp);
-	}
-
-	malloc_mutex_unlock(tsdn, &log_mtx);
-
-	return ret;
-}
-
-/* Used as an atexit function to stop logging on exit. */
-static void
-prof_log_stop_final(void) {
-	tsd_t *tsd = tsd_fetch();
-	prof_log_stop(tsd_tsdn(tsd));
-}
-
-struct prof_emitter_cb_arg_s {
-	int fd;
-	ssize_t ret;
-};
-
-static void
-prof_emitter_write_cb(void *opaque, const char *to_write) {
-	struct prof_emitter_cb_arg_s *arg =
-	    (struct prof_emitter_cb_arg_s *)opaque;
-	size_t bytes = strlen(to_write);
-#ifdef JEMALLOC_JET
-	if (prof_log_dummy) {
-		return;
-	}
-#endif
-	arg->ret = write(arg->fd, (void *)to_write, bytes);
-}
-
-/*
- * prof_log_emit_{...} goes through the appropriate linked list, emitting each
- * node to the json and deallocating it.
- */
-static void
-prof_log_emit_threads(tsd_t *tsd, emitter_t *emitter) {
-	emitter_json_array_kv_begin(emitter, "threads");
-	prof_thr_node_t *thr_node = log_thr_first;
-	prof_thr_node_t *thr_old_node;
-	while (thr_node != NULL) {
-		emitter_json_object_begin(emitter);
-
-		emitter_json_kv(emitter, "thr_uid", emitter_type_uint64,
-		    &thr_node->thr_uid);
-
-		char *thr_name = thr_node->name;
-
-		emitter_json_kv(emitter, "thr_name", emitter_type_string,
-		    &thr_name);
-
-		emitter_json_object_end(emitter);
-		thr_old_node = thr_node;
-		thr_node = thr_node->next;
-		idalloc(tsd, thr_old_node);
-	}
-	emitter_json_array_end(emitter);
-}
-
-static void
-prof_log_emit_traces(tsd_t *tsd, emitter_t *emitter) {
-	emitter_json_array_kv_begin(emitter, "stack_traces");
-	prof_bt_node_t *bt_node = log_bt_first;
-	prof_bt_node_t *bt_old_node;
-	/*
-	 * Calculate how many hex digits we need: twice number of bytes, two for
-	 * "0x", and then one more for terminating '\0'.
-	 */
-	char buf[2 * sizeof(intptr_t) + 3];
-	size_t buf_sz = sizeof(buf);
-	while (bt_node != NULL) {
-		emitter_json_array_begin(emitter);
-		size_t i;
-		for (i = 0; i < bt_node->bt.len; i++) {
-			malloc_snprintf(buf, buf_sz, "%p", bt_node->bt.vec[i]);
-			char *trace_str = buf;
-			emitter_json_value(emitter, emitter_type_string,
-			    &trace_str);
-		}
-		emitter_json_array_end(emitter);
-
-		bt_old_node = bt_node;
-		bt_node = bt_node->next;
-		idalloc(tsd, bt_old_node);
-	}
-	emitter_json_array_end(emitter);
-}
-
-static void
-prof_log_emit_allocs(tsd_t *tsd, emitter_t *emitter) {
-	emitter_json_array_kv_begin(emitter, "allocations");
-	prof_alloc_node_t *alloc_node = log_alloc_first;
-	prof_alloc_node_t *alloc_old_node;
-	while (alloc_node != NULL) {
-		emitter_json_object_begin(emitter);
-
-		emitter_json_kv(emitter, "alloc_thread", emitter_type_size,
-		    &alloc_node->alloc_thr_ind);
-
-		emitter_json_kv(emitter, "free_thread", emitter_type_size,
-		    &alloc_node->free_thr_ind);
-
-		emitter_json_kv(emitter, "alloc_trace", emitter_type_size,
-		    &alloc_node->alloc_bt_ind);
-
-		emitter_json_kv(emitter, "free_trace", emitter_type_size,
-		    &alloc_node->free_bt_ind);
-
-		emitter_json_kv(emitter, "alloc_timestamp",
-		    emitter_type_uint64, &alloc_node->alloc_time_ns);
-
-		emitter_json_kv(emitter, "free_timestamp", emitter_type_uint64,
-		    &alloc_node->free_time_ns);
-
-		emitter_json_kv(emitter, "usize", emitter_type_uint64,
-		    &alloc_node->usize);
-
-		emitter_json_object_end(emitter);
-
-		alloc_old_node = alloc_node;
-		alloc_node = alloc_node->next;
-		idalloc(tsd, alloc_old_node);
-	}
-	emitter_json_array_end(emitter);
-}
-
-static void
-prof_log_emit_metadata(emitter_t *emitter) {
-	emitter_json_object_kv_begin(emitter, "info");
-
-	nstime_t now = NSTIME_ZERO_INITIALIZER;
-
-	nstime_update(&now);
-	uint64_t ns = nstime_ns(&now) - nstime_ns(&log_start_timestamp);
-	emitter_json_kv(emitter, "duration", emitter_type_uint64, &ns);
-
-	char *vers = JEMALLOC_VERSION;
-	emitter_json_kv(emitter, "version",
-	    emitter_type_string, &vers);
-
-	emitter_json_kv(emitter, "lg_sample_rate",
-	    emitter_type_int, &lg_prof_sample);
-
-	int pid = prof_getpid();
-	emitter_json_kv(emitter, "pid", emitter_type_int, &pid);
-
-	emitter_json_object_end(emitter);
-}
-
-
-bool
-prof_log_stop(tsdn_t *tsdn) {
-	if (!opt_prof || !prof_booted) {
-		return true;
-	}
-
-	tsd_t *tsd = tsdn_tsd(tsdn);
-	malloc_mutex_lock(tsdn, &log_mtx);
-
-	if (prof_logging_state != prof_logging_state_started) {
-		malloc_mutex_unlock(tsdn, &log_mtx);
-		return true;
-	}
-
-	/*
-	 * Set the state to dumping. We'll set it to stopped when we're done.
-	 * Since other threads won't be able to start/stop/log when the state is
-	 * dumping, we don't have to hold the lock during the whole method.
-	 */
-	prof_logging_state = prof_logging_state_dumping;
-	malloc_mutex_unlock(tsdn, &log_mtx);
-
-
-	emitter_t emitter;
-
-	/* Create a file. */
-
-	int fd;
-#ifdef JEMALLOC_JET
-	if (prof_log_dummy) {
-		fd = 0;
-	} else {
-		fd = creat(log_filename, 0644);
-	}
-#else
-	fd = creat(log_filename, 0644);
-#endif
-
-	if (fd == -1) {
-		malloc_printf("<jemalloc>: creat() for log file \"%s\" "
-			      " failed with %d\n", log_filename, errno);
-		if (opt_abort) {
-			abort();
-		}
-		return true;
-	}
-
-	/* Emit to json. */
-	struct prof_emitter_cb_arg_s arg;
-	arg.fd = fd;
-	emitter_init(&emitter, emitter_output_json, &prof_emitter_write_cb,
-	    (void *)(&arg));
-
-	emitter_begin(&emitter);
-	prof_log_emit_metadata(&emitter);
-	prof_log_emit_threads(tsd, &emitter);
-	prof_log_emit_traces(tsd, &emitter);
-	prof_log_emit_allocs(tsd, &emitter);
-	emitter_end(&emitter);
-
-	/* Reset global state. */
-	if (log_tables_initialized) {
-		ckh_delete(tsd, &log_bt_node_set);
-		ckh_delete(tsd, &log_thr_node_set);
-	}
-	log_tables_initialized = false;
-	log_bt_index = 0;
-	log_thr_index = 0;
-	log_bt_first = NULL;
-	log_bt_last = NULL;
-	log_thr_first = NULL;
-	log_thr_last = NULL;
-	log_alloc_first = NULL;
-	log_alloc_last = NULL;
-
-	malloc_mutex_lock(tsdn, &log_mtx);
-	prof_logging_state = prof_logging_state_stopped;
-	malloc_mutex_unlock(tsdn, &log_mtx);
-
-#ifdef JEMALLOC_JET
-	if (prof_log_dummy) {
-		return false;
-	}
-#endif
-	return close(fd);
-}
-
 const char *
 prof_thread_name_get(tsd_t *tsd) {
 	prof_tdata_t *tdata;
@@ -3015,35 +2368,10 @@ prof_boot2(tsd_t *tsd) {
 			}
 		}
 
-		if (opt_prof_log) {
-			prof_log_start(tsd_tsdn(tsd), NULL);
-		}
-
-		if (atexit(prof_log_stop_final) != 0) {
-			malloc_write("<jemalloc>: Error in atexit() "
-				     "for logging\n");
-			if (opt_abort) {
-				abort();
-			}
-		}
-
-		if (malloc_mutex_init(&log_mtx, "prof_log",
-		    WITNESS_RANK_PROF_LOG, malloc_mutex_rank_exclusive)) {
+		if (prof_log_init(tsd)) {
 			return true;
 		}
 
-		if (ckh_new(tsd, &log_bt_node_set, PROF_CKH_MINITEMS,
-		    prof_bt_node_hash, prof_bt_node_keycomp)) {
-			return true;
-		}
-
-		if (ckh_new(tsd, &log_thr_node_set, PROF_CKH_MINITEMS,
-		    prof_thr_node_hash, prof_thr_node_keycomp)) {
-			return true;
-		}
-
-		log_tables_initialized = true;
-
 		gctx_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
 		    b0get(), PROF_NCTX_LOCKS * sizeof(malloc_mutex_t),
 		    CACHELINE);
diff --git a/src/prof_log.c b/src/prof_log.c
new file mode 100644
index 00000000..56d4e035
--- /dev/null
+++ b/src/prof_log.c
@@ -0,0 +1,698 @@
+#define JEMALLOC_PROF_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/ckh.h"
+#include "jemalloc/internal/hash.h"
+#include "jemalloc/internal/malloc_io.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/emitter.h"
+
+bool opt_prof_log = false;
+typedef enum prof_logging_state_e prof_logging_state_t;
+enum prof_logging_state_e {
+	prof_logging_state_stopped,
+	prof_logging_state_started,
+	prof_logging_state_dumping
+};
+
+/*
+ * - stopped: log_start never called, or previous log_stop has completed.
+ * - started: log_start called, log_stop not called yet. Allocations are logged.
+ * - dumping: log_stop called but not finished; samples are not logged anymore.
+ */
+prof_logging_state_t prof_logging_state = prof_logging_state_stopped;
+
+#ifdef JEMALLOC_JET
+static bool prof_log_dummy = false;
+#endif
+
+/* Incremented for every log file that is output. */
+static uint64_t log_seq = 0;
+static char log_filename[
+    /* Minimize memory bloat for non-prof builds. */
+#ifdef JEMALLOC_PROF
+    PATH_MAX +
+#endif
+    1];
+
+/* Timestamp for most recent call to log_start(). */
+static nstime_t log_start_timestamp = NSTIME_ZERO_INITIALIZER;
+
+/* Increment these when adding to the log_bt and log_thr linked lists. */
+static size_t log_bt_index = 0;
+static size_t log_thr_index = 0;
+
+/* Linked list node definitions. These are only used in this file. */
+typedef struct prof_bt_node_s prof_bt_node_t;
+
+struct prof_bt_node_s {
+	prof_bt_node_t *next;
+	size_t index;
+	prof_bt_t bt;
+	/* Variable size backtrace vector pointed to by bt. */
+	void *vec[1];
+};
+
+typedef struct prof_thr_node_s prof_thr_node_t;
+
+struct prof_thr_node_s {
+	prof_thr_node_t *next;
+	size_t index;
+	uint64_t thr_uid;
+	/* Variable size based on thr_name_sz. */
+	char name[1];
+};
+
+typedef struct prof_alloc_node_s prof_alloc_node_t;
+
+/* This is output when logging sampled allocations. */
+struct prof_alloc_node_s {
+	prof_alloc_node_t *next;
+	/* Indices into an array of thread data. */
+	size_t alloc_thr_ind;
+	size_t free_thr_ind;
+
+	/* Indices into an array of backtraces. */
+	size_t alloc_bt_ind;
+	size_t free_bt_ind;
+
+	uint64_t alloc_time_ns;
+	uint64_t free_time_ns;
+
+	size_t usize;
+};
+
+/*
+ * Created on the first call to prof_log_start and deleted on prof_log_stop.
+ * These are the backtraces and threads that have already been logged by an
+ * allocation.
+ */
+static bool log_tables_initialized = false;
+static ckh_t log_bt_node_set;
+static ckh_t log_thr_node_set;
+
+/* Store linked lists for logged data. */
+static prof_bt_node_t *log_bt_first = NULL;
+static prof_bt_node_t *log_bt_last = NULL;
+static prof_thr_node_t *log_thr_first = NULL;
+static prof_thr_node_t *log_thr_last = NULL;
+static prof_alloc_node_t *log_alloc_first = NULL;
+static prof_alloc_node_t *log_alloc_last = NULL;
+
+/* Protects the prof_logging_state and any log_{...} variable. */
+static malloc_mutex_t log_mtx;
+
+/******************************************************************************/
+/*
+ * Function prototypes for static functions that are referenced prior to
+ * definition.
+ */
+
+/* Hashtable functions for log_bt_node_set and log_thr_node_set. */
+static void prof_thr_node_hash(const void *key, size_t r_hash[2]);
+static bool prof_thr_node_keycomp(const void *k1, const void *k2);
+static void prof_bt_node_hash(const void *key, size_t r_hash[2]);
+static bool prof_bt_node_keycomp(const void *k1, const void *k2);
+
+/******************************************************************************/
+
+static size_t
+prof_log_bt_index(tsd_t *tsd, prof_bt_t *bt) {
+	assert(prof_logging_state == prof_logging_state_started);
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &log_mtx);
+
+	prof_bt_node_t dummy_node;
+	dummy_node.bt = *bt;
+	prof_bt_node_t *node;
+
+	/* See if this backtrace is already cached in the table. */
+	if (ckh_search(&log_bt_node_set, (void *)(&dummy_node),
+	    (void **)(&node), NULL)) {
+		size_t sz = offsetof(prof_bt_node_t, vec) +
+			        (bt->len * sizeof(void *));
+		prof_bt_node_t *new_node = (prof_bt_node_t *)
+		    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL,
+		    true, arena_get(TSDN_NULL, 0, true), true);
+		if (log_bt_first == NULL) {
+			log_bt_first = new_node;
+			log_bt_last = new_node;
+		} else {
+			log_bt_last->next = new_node;
+			log_bt_last = new_node;
+		}
+
+		new_node->next = NULL;
+		new_node->index = log_bt_index;
+		/*
+		 * Copy the backtrace: bt is inside a tdata or gctx, which
+		 * might die before prof_log_stop is called.
+		 */
+		new_node->bt.len = bt->len;
+		memcpy(new_node->vec, bt->vec, bt->len * sizeof(void *));
+		new_node->bt.vec = new_node->vec;
+
+		log_bt_index++;
+		ckh_insert(tsd, &log_bt_node_set, (void *)new_node, NULL);
+		return new_node->index;
+	} else {
+		return node->index;
+	}
+}
+static size_t
+prof_log_thr_index(tsd_t *tsd, uint64_t thr_uid, const char *name) {
+	assert(prof_logging_state == prof_logging_state_started);
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &log_mtx);
+
+	prof_thr_node_t dummy_node;
+	dummy_node.thr_uid = thr_uid;
+	prof_thr_node_t *node;
+
+	/* See if this thread is already cached in the table. */
+	if (ckh_search(&log_thr_node_set, (void *)(&dummy_node),
+	    (void **)(&node), NULL)) {
+		size_t sz = offsetof(prof_thr_node_t, name) + strlen(name) + 1;
+		prof_thr_node_t *new_node = (prof_thr_node_t *)
+		    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL,
+		    true, arena_get(TSDN_NULL, 0, true), true);
+		if (log_thr_first == NULL) {
+			log_thr_first = new_node;
+			log_thr_last = new_node;
+		} else {
+			log_thr_last->next = new_node;
+			log_thr_last = new_node;
+		}
+
+		new_node->next = NULL;
+		new_node->index = log_thr_index;
+		new_node->thr_uid = thr_uid;
+		strcpy(new_node->name, name);
+
+		log_thr_index++;
+		ckh_insert(tsd, &log_thr_node_set, (void *)new_node, NULL);
+		return new_node->index;
+	} else {
+		return node->index;
+	}
+}
+
+void
+prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
+
+	prof_tdata_t *cons_tdata = prof_tdata_get(tsd, false);
+	if (cons_tdata == NULL) {
+		/*
+		 * We decide not to log these allocations. cons_tdata will be
+		 * NULL only when the current thread is in a weird state (e.g.
+		 * it's being destroyed).
+		 */
+		return;
+	}
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &log_mtx);
+
+	if (prof_logging_state != prof_logging_state_started) {
+		goto label_done;
+	}
+
+	if (!log_tables_initialized) {
+		bool err1 = ckh_new(tsd, &log_bt_node_set, PROF_CKH_MINITEMS,
+				prof_bt_node_hash, prof_bt_node_keycomp);
+		bool err2 = ckh_new(tsd, &log_thr_node_set, PROF_CKH_MINITEMS,
+				prof_thr_node_hash, prof_thr_node_keycomp);
+		if (err1 || err2) {
+			goto label_done;
+		}
+		log_tables_initialized = true;
+	}
+
+	nstime_t alloc_time = prof_alloc_time_get(tsd_tsdn(tsd), ptr,
+			          (alloc_ctx_t *)NULL);
+	nstime_t free_time = NSTIME_ZERO_INITIALIZER;
+	nstime_update(&free_time);
+
+	size_t sz = sizeof(prof_alloc_node_t);
+	prof_alloc_node_t *new_node = (prof_alloc_node_t *)
+	    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL, true,
+	    arena_get(TSDN_NULL, 0, true), true);
+
+	const char *prod_thr_name = (tctx->tdata->thread_name == NULL)?
+				        "" : tctx->tdata->thread_name;
+	const char *cons_thr_name = prof_thread_name_get(tsd);
+
+	prof_bt_t bt;
+	/* Initialize the backtrace, using the buffer in tdata to store it. */
+	bt_init(&bt, cons_tdata->vec);
+	prof_backtrace(&bt);
+	prof_bt_t *cons_bt = &bt;
+
+	/* We haven't destroyed tctx yet, so gctx should be good to read. */
+	prof_bt_t *prod_bt = &tctx->gctx->bt;
+
+	new_node->next = NULL;
+	new_node->alloc_thr_ind = prof_log_thr_index(tsd, tctx->tdata->thr_uid,
+				      prod_thr_name);
+	new_node->free_thr_ind = prof_log_thr_index(tsd, cons_tdata->thr_uid,
+				     cons_thr_name);
+	new_node->alloc_bt_ind = prof_log_bt_index(tsd, prod_bt);
+	new_node->free_bt_ind = prof_log_bt_index(tsd, cons_bt);
+	new_node->alloc_time_ns = nstime_ns(&alloc_time);
+	new_node->free_time_ns = nstime_ns(&free_time);
+	new_node->usize = usize;
+
+	if (log_alloc_first == NULL) {
+		log_alloc_first = new_node;
+		log_alloc_last = new_node;
+	} else {
+		log_alloc_last->next = new_node;
+		log_alloc_last = new_node;
+	}
+
+label_done:
+	malloc_mutex_unlock(tsd_tsdn(tsd), &log_mtx);
+}
+
+static void
+prof_bt_node_hash(const void *key, size_t r_hash[2]) {
+	const prof_bt_node_t *bt_node = (prof_bt_node_t *)key;
+	prof_bt_hash((void *)(&bt_node->bt), r_hash);
+}
+
+static bool
+prof_bt_node_keycomp(const void *k1, const void *k2) {
+	const prof_bt_node_t *bt_node1 = (prof_bt_node_t *)k1;
+	const prof_bt_node_t *bt_node2 = (prof_bt_node_t *)k2;
+	return prof_bt_keycomp((void *)(&bt_node1->bt),
+	    (void *)(&bt_node2->bt));
+}
+
+static void
+prof_thr_node_hash(const void *key, size_t r_hash[2]) {
+	const prof_thr_node_t *thr_node = (prof_thr_node_t *)key;
+	hash(&thr_node->thr_uid, sizeof(uint64_t), 0x94122f35U, r_hash);
+}
+
+static bool
+prof_thr_node_keycomp(const void *k1, const void *k2) {
+	const prof_thr_node_t *thr_node1 = (prof_thr_node_t *)k1;
+	const prof_thr_node_t *thr_node2 = (prof_thr_node_t *)k2;
+	return thr_node1->thr_uid == thr_node2->thr_uid;
+}
+
+#ifdef JEMALLOC_JET
+size_t
+prof_log_bt_count(void) {
+	size_t cnt = 0;
+	prof_bt_node_t *node = log_bt_first;
+	while (node != NULL) {
+		cnt++;
+		node = node->next;
+	}
+	return cnt;
+}
+
+size_t
+prof_log_alloc_count(void) {
+	size_t cnt = 0;
+	prof_alloc_node_t *node = log_alloc_first;
+	while (node != NULL) {
+		cnt++;
+		node = node->next;
+	}
+	return cnt;
+}
+
+size_t
+prof_log_thr_count(void) {
+	size_t cnt = 0;
+	prof_thr_node_t *node = log_thr_first;
+	while (node != NULL) {
+		cnt++;
+		node = node->next;
+	}
+	return cnt;
+}
+
+bool
+prof_log_is_logging(void) {
+	return prof_logging_state == prof_logging_state_started;
+}
+
+bool
+prof_log_rep_check(void) {
+	if (prof_logging_state == prof_logging_state_stopped
+	    && log_tables_initialized) {
+		return true;
+	}
+
+	if (log_bt_last != NULL && log_bt_last->next != NULL) {
+		return true;
+	}
+	if (log_thr_last != NULL && log_thr_last->next != NULL) {
+		return true;
+	}
+	if (log_alloc_last != NULL && log_alloc_last->next != NULL) {
+		return true;
+	}
+
+	size_t bt_count = prof_log_bt_count();
+	size_t thr_count = prof_log_thr_count();
+	size_t alloc_count = prof_log_alloc_count();
+
+
+	if (prof_logging_state == prof_logging_state_stopped) {
+		if (bt_count != 0 || thr_count != 0 || alloc_count || 0) {
+			return true;
+		}
+	}
+
+	prof_alloc_node_t *node = log_alloc_first;
+	while (node != NULL) {
+		if (node->alloc_bt_ind >= bt_count) {
+			return true;
+		}
+		if (node->free_bt_ind >= bt_count) {
+			return true;
+		}
+		if (node->alloc_thr_ind >= thr_count) {
+			return true;
+		}
+		if (node->free_thr_ind >= thr_count) {
+			return true;
+		}
+		if (node->alloc_time_ns > node->free_time_ns) {
+			return true;
+		}
+		node = node->next;
+	}
+
+	return false;
+}
+
+void
+prof_log_dummy_set(bool new_value) {
+	prof_log_dummy = new_value;
+}
+#endif
+
+bool
+prof_log_start(tsdn_t *tsdn, const char *filename) {
+	if (!opt_prof || !prof_booted) {
+		return true;
+	}
+
+	bool ret = false;
+	size_t buf_size = PATH_MAX + 1;
+
+	malloc_mutex_lock(tsdn, &log_mtx);
+
+	if (prof_logging_state != prof_logging_state_stopped) {
+		ret = true;
+	} else if (filename == NULL) {
+		/* Make default name. */
+		malloc_snprintf(log_filename, buf_size, "%s.%d.%"FMTu64".json",
+		    opt_prof_prefix, prof_getpid(), log_seq);
+		log_seq++;
+		prof_logging_state = prof_logging_state_started;
+	} else if (strlen(filename) >= buf_size) {
+		ret = true;
+	} else {
+		strcpy(log_filename, filename);
+		prof_logging_state = prof_logging_state_started;
+	}
+
+	if (!ret) {
+		nstime_update(&log_start_timestamp);
+	}
+
+	malloc_mutex_unlock(tsdn, &log_mtx);
+
+	return ret;
+}
+
+/* Used as an atexit function to stop logging on exit. */
+static void
+prof_log_stop_final(void) {
+	tsd_t *tsd = tsd_fetch();
+	prof_log_stop(tsd_tsdn(tsd));
+}
+
+struct prof_emitter_cb_arg_s {
+	int fd;
+	ssize_t ret;
+};
+
+static void
+prof_emitter_write_cb(void *opaque, const char *to_write) {
+	struct prof_emitter_cb_arg_s *arg =
+	    (struct prof_emitter_cb_arg_s *)opaque;
+	size_t bytes = strlen(to_write);
+#ifdef JEMALLOC_JET
+	if (prof_log_dummy) {
+		return;
+	}
+#endif
+	arg->ret = write(arg->fd, (void *)to_write, bytes);
+}
+
+/*
+ * prof_log_emit_{...} goes through the appropriate linked list, emitting each
+ * node to the json and deallocating it.
+ */
+static void
+prof_log_emit_threads(tsd_t *tsd, emitter_t *emitter) {
+	emitter_json_array_kv_begin(emitter, "threads");
+	prof_thr_node_t *thr_node = log_thr_first;
+	prof_thr_node_t *thr_old_node;
+	while (thr_node != NULL) {
+		emitter_json_object_begin(emitter);
+
+		emitter_json_kv(emitter, "thr_uid", emitter_type_uint64,
+		    &thr_node->thr_uid);
+
+		char *thr_name = thr_node->name;
+
+		emitter_json_kv(emitter, "thr_name", emitter_type_string,
+		    &thr_name);
+
+		emitter_json_object_end(emitter);
+		thr_old_node = thr_node;
+		thr_node = thr_node->next;
+		idalloc(tsd, thr_old_node);
+	}
+	emitter_json_array_end(emitter);
+}
+
+static void
+prof_log_emit_traces(tsd_t *tsd, emitter_t *emitter) {
+	emitter_json_array_kv_begin(emitter, "stack_traces");
+	prof_bt_node_t *bt_node = log_bt_first;
+	prof_bt_node_t *bt_old_node;
+	/*
+	 * Calculate how many hex digits we need: twice number of bytes, two for
+	 * "0x", and then one more for terminating '\0'.
+	 */
+	char buf[2 * sizeof(intptr_t) + 3];
+	size_t buf_sz = sizeof(buf);
+	while (bt_node != NULL) {
+		emitter_json_array_begin(emitter);
+		size_t i;
+		for (i = 0; i < bt_node->bt.len; i++) {
+			malloc_snprintf(buf, buf_sz, "%p", bt_node->bt.vec[i]);
+			char *trace_str = buf;
+			emitter_json_value(emitter, emitter_type_string,
+			    &trace_str);
+		}
+		emitter_json_array_end(emitter);
+
+		bt_old_node = bt_node;
+		bt_node = bt_node->next;
+		idalloc(tsd, bt_old_node);
+	}
+	emitter_json_array_end(emitter);
+}
+
+static void
+prof_log_emit_allocs(tsd_t *tsd, emitter_t *emitter) {
+	emitter_json_array_kv_begin(emitter, "allocations");
+	prof_alloc_node_t *alloc_node = log_alloc_first;
+	prof_alloc_node_t *alloc_old_node;
+	while (alloc_node != NULL) {
+		emitter_json_object_begin(emitter);
+
+		emitter_json_kv(emitter, "alloc_thread", emitter_type_size,
+		    &alloc_node->alloc_thr_ind);
+
+		emitter_json_kv(emitter, "free_thread", emitter_type_size,
+		    &alloc_node->free_thr_ind);
+
+		emitter_json_kv(emitter, "alloc_trace", emitter_type_size,
+		    &alloc_node->alloc_bt_ind);
+
+		emitter_json_kv(emitter, "free_trace", emitter_type_size,
+		    &alloc_node->free_bt_ind);
+
+		emitter_json_kv(emitter, "alloc_timestamp",
+		    emitter_type_uint64, &alloc_node->alloc_time_ns);
+
+		emitter_json_kv(emitter, "free_timestamp", emitter_type_uint64,
+		    &alloc_node->free_time_ns);
+
+		emitter_json_kv(emitter, "usize", emitter_type_uint64,
+		    &alloc_node->usize);
+
+		emitter_json_object_end(emitter);
+
+		alloc_old_node = alloc_node;
+		alloc_node = alloc_node->next;
+		idalloc(tsd, alloc_old_node);
+	}
+	emitter_json_array_end(emitter);
+}
+
+static void
+prof_log_emit_metadata(emitter_t *emitter) {
+	emitter_json_object_kv_begin(emitter, "info");
+
+	nstime_t now = NSTIME_ZERO_INITIALIZER;
+
+	nstime_update(&now);
+	uint64_t ns = nstime_ns(&now) - nstime_ns(&log_start_timestamp);
+	emitter_json_kv(emitter, "duration", emitter_type_uint64, &ns);
+
+	char *vers = JEMALLOC_VERSION;
+	emitter_json_kv(emitter, "version",
+	    emitter_type_string, &vers);
+
+	emitter_json_kv(emitter, "lg_sample_rate",
+	    emitter_type_int, &lg_prof_sample);
+
+	int pid = prof_getpid();
+	emitter_json_kv(emitter, "pid", emitter_type_int, &pid);
+
+	emitter_json_object_end(emitter);
+}
+
+
+bool
+prof_log_stop(tsdn_t *tsdn) {
+	if (!opt_prof || !prof_booted) {
+		return true;
+	}
+
+	tsd_t *tsd = tsdn_tsd(tsdn);
+	malloc_mutex_lock(tsdn, &log_mtx);
+
+	if (prof_logging_state != prof_logging_state_started) {
+		malloc_mutex_unlock(tsdn, &log_mtx);
+		return true;
+	}
+
+	/*
+	 * Set the state to dumping. We'll set it to stopped when we're done.
+	 * Since other threads won't be able to start/stop/log when the state is
+	 * dumping, we don't have to hold the lock during the whole method.
+	 */
+	prof_logging_state = prof_logging_state_dumping;
+	malloc_mutex_unlock(tsdn, &log_mtx);
+
+
+	emitter_t emitter;
+
+	/* Create a file. */
+
+	int fd;
+#ifdef JEMALLOC_JET
+	if (prof_log_dummy) {
+		fd = 0;
+	} else {
+		fd = creat(log_filename, 0644);
+	}
+#else
+	fd = creat(log_filename, 0644);
+#endif
+
+	if (fd == -1) {
+		malloc_printf("<jemalloc>: creat() for log file \"%s\" "
+			      " failed with %d\n", log_filename, errno);
+		if (opt_abort) {
+			abort();
+		}
+		return true;
+	}
+
+	/* Emit to json. */
+	struct prof_emitter_cb_arg_s arg;
+	arg.fd = fd;
+	emitter_init(&emitter, emitter_output_json, &prof_emitter_write_cb,
+	    (void *)(&arg));
+
+	emitter_begin(&emitter);
+	prof_log_emit_metadata(&emitter);
+	prof_log_emit_threads(tsd, &emitter);
+	prof_log_emit_traces(tsd, &emitter);
+	prof_log_emit_allocs(tsd, &emitter);
+	emitter_end(&emitter);
+
+	/* Reset global state. */
+	if (log_tables_initialized) {
+		ckh_delete(tsd, &log_bt_node_set);
+		ckh_delete(tsd, &log_thr_node_set);
+	}
+	log_tables_initialized = false;
+	log_bt_index = 0;
+	log_thr_index = 0;
+	log_bt_first = NULL;
+	log_bt_last = NULL;
+	log_thr_first = NULL;
+	log_thr_last = NULL;
+	log_alloc_first = NULL;
+	log_alloc_last = NULL;
+
+	malloc_mutex_lock(tsdn, &log_mtx);
+	prof_logging_state = prof_logging_state_stopped;
+	malloc_mutex_unlock(tsdn, &log_mtx);
+
+#ifdef JEMALLOC_JET
+	if (prof_log_dummy) {
+		return false;
+	}
+#endif
+	return close(fd);
+}
+
+bool prof_log_init(tsd_t *tsd) {
+	if (opt_prof_log) {
+		prof_log_start(tsd_tsdn(tsd), NULL);
+	}
+
+	if (atexit(prof_log_stop_final) != 0) {
+		malloc_write("<jemalloc>: Error in atexit() "
+			     "for logging\n");
+		if (opt_abort) {
+			abort();
+		}
+	}
+
+	if (malloc_mutex_init(&log_mtx, "prof_log",
+	    WITNESS_RANK_PROF_LOG, malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+
+	if (ckh_new(tsd, &log_bt_node_set, PROF_CKH_MINITEMS,
+	    prof_bt_node_hash, prof_bt_node_keycomp)) {
+		return true;
+	}
+
+	if (ckh_new(tsd, &log_thr_node_set, PROF_CKH_MINITEMS,
+	    prof_thr_node_hash, prof_thr_node_keycomp)) {
+		return true;
+	}
+
+	log_tables_initialized = true;
+	return false;
+}
+
+/******************************************************************************/

From 07ce2434bf45420ff9d9d22590f68540c6dd7b78 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 17 Jul 2019 15:52:50 -0700
Subject: [PATCH 1336/2608] Refactor profiling

Refactored core profiling codebase into two logical parts:

(a) `prof_data.c`: core internal data structure managing & dumping;
(b) `prof.c`: mutexes & outward-facing APIs.

Some internal functions had to be exposed out, but there are not
that many of them if the modularization is (hopefully) clean enough.
---
 Makefile.in                                   |    1 +
 include/jemalloc/internal/prof_externs.h      |   14 +
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |    1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |    3 +
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |    1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |    3 +
 src/prof.c                                    | 1491 +----------------
 src/prof_data.c                               | 1441 ++++++++++++++++
 8 files changed, 1503 insertions(+), 1452 deletions(-)
 create mode 100644 src/prof_data.c

diff --git a/Makefile.in b/Makefile.in
index 1cd973d7..40daf115 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -117,6 +117,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/pages.c \
 	$(srcroot)src/prng.c \
 	$(srcroot)src/prof.c \
+	$(srcroot)src/prof_data.c \
 	$(srcroot)src/prof_log.c \
 	$(srcroot)src/rtree.c \
 	$(srcroot)src/safety_check.c \
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index e94ac3b2..8fc45cf7 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -4,6 +4,11 @@
 #include "jemalloc/internal/mutex.h"
 
 extern malloc_mutex_t	bt2gctx_mtx;
+extern malloc_mutex_t	tdatas_mtx;
+extern malloc_mutex_t	prof_dump_mtx;
+
+malloc_mutex_t *prof_gctx_mutex_choose(void);
+malloc_mutex_t *prof_tdata_mutex_choose(uint64_t thr_uid);
 
 extern bool	opt_prof;
 extern bool	opt_prof_active;
@@ -110,4 +115,13 @@ bool prof_log_rep_check(void);
 void prof_log_dummy_set(bool new_value);
 #endif
 
+/* Functions in prof_data.c only accessed in prof.c */
+bool prof_data_init(tsd_t *tsd);
+bool prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
+    bool leakcheck);
+prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
+    uint64_t thr_discrim, char *thread_name, bool active);
+void prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata);
+void prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx);
+
 #endif /* JEMALLOC_INTERNAL_PROF_EXTERNS_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index d93d9099..387f14be 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -58,6 +58,7 @@
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
+    <ClCompile Include="..\..\..\..\src\prof_data.c" />
     <ClCompile Include="..\..\..\..\src\prof_log.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 7b09d4e6..030d8266 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -67,6 +67,9 @@
     <ClCompile Include="..\..\..\..\src\prof.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_data.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\prof_log.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 28bd3cd6..1606a3ab 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -58,6 +58,7 @@
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
+    <ClCompile Include="..\..\..\..\src\prof_data.c" />
     <ClCompile Include="..\..\..\..\src\prof_log.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index a66c209b..622b93f1 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -67,6 +67,9 @@
     <ClCompile Include="..\..\..\..\src\prof.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_data.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\prof_log.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/prof.c b/src/prof.c
index 7efa20db..79a0ffc8 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -3,11 +3,14 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
-#include "jemalloc/internal/ckh.h"
-#include "jemalloc/internal/hash.h"
-#include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/mutex.h"
 
+/*
+ * This file implements the profiling "APIs" needed by other parts of jemalloc,
+ * and also manages the relevant "operational" data, mainly options and mutexes;
+ * the core profiling data structures are encapsulated in prof_data.c.
+ */
+
 /******************************************************************************/
 
 #ifdef JEMALLOC_PROF_LIBUNWIND
@@ -88,20 +91,10 @@ static atomic_u_t	cum_gctxs; /* Atomic counter. */
  */
 static malloc_mutex_t	*tdata_locks;
 
-/*
- * Global hash of (prof_bt_t *)-->(prof_gctx_t *).  This is the master data
- * structure that knows about all backtraces currently captured.
- */
-static ckh_t		bt2gctx;
 /* Non static to enable profiling. */
 malloc_mutex_t		bt2gctx_mtx;
 
-/*
- * Tree of all extant prof_tdata_t structures, regardless of state,
- * {attached,detached,expired}.
- */
-static prof_tdata_tree_t	tdatas;
-static malloc_mutex_t	tdatas_mtx;
+malloc_mutex_t	tdatas_mtx;
 
 static uint64_t		next_thr_uid;
 static malloc_mutex_t	next_thr_uid_mtx;
@@ -112,101 +105,29 @@ static uint64_t		prof_dump_iseq;
 static uint64_t		prof_dump_mseq;
 static uint64_t		prof_dump_useq;
 
-/*
- * This buffer is rather large for stack allocation, so use a single buffer for
- * all profile dumps.
- */
-static malloc_mutex_t	prof_dump_mtx;
-static char		prof_dump_buf[
-    /* Minimize memory bloat for non-prof builds. */
-#ifdef JEMALLOC_PROF
-    PROF_DUMP_BUFSIZE
-#else
-    1
-#endif
-];
-static size_t		prof_dump_buf_end;
-static int		prof_dump_fd;
+malloc_mutex_t	prof_dump_mtx;
 
 /* Do not dump any profiles until bootstrapping is complete. */
 bool			prof_booted = false;
 
 /******************************************************************************/
-/*
- * Function prototypes for static functions that are referenced prior to
- * definition.
- */
 
-static bool	prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx);
-static void	prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx);
-static bool	prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
-    bool even_if_attached);
-static void	prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata,
-    bool even_if_attached);
-static char	*prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name);
+static bool
+prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx) {
+	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
 
-/******************************************************************************/
-/* Red-black trees. */
-
-static int
-prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b) {
-	uint64_t a_thr_uid = a->thr_uid;
-	uint64_t b_thr_uid = b->thr_uid;
-	int ret = (a_thr_uid > b_thr_uid) - (a_thr_uid < b_thr_uid);
-	if (ret == 0) {
-		uint64_t a_thr_discrim = a->thr_discrim;
-		uint64_t b_thr_discrim = b->thr_discrim;
-		ret = (a_thr_discrim > b_thr_discrim) - (a_thr_discrim <
-		    b_thr_discrim);
-		if (ret == 0) {
-			uint64_t a_tctx_uid = a->tctx_uid;
-			uint64_t b_tctx_uid = b->tctx_uid;
-			ret = (a_tctx_uid > b_tctx_uid) - (a_tctx_uid <
-			    b_tctx_uid);
-		}
+	if (opt_prof_accum) {
+		return false;
 	}
-	return ret;
-}
-
-rb_gen(static UNUSED, tctx_tree_, prof_tctx_tree_t, prof_tctx_t,
-    tctx_link, prof_tctx_comp)
-
-static int
-prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b) {
-	unsigned a_len = a->bt.len;
-	unsigned b_len = b->bt.len;
-	unsigned comp_len = (a_len < b_len) ? a_len : b_len;
-	int ret = memcmp(a->bt.vec, b->bt.vec, comp_len * sizeof(void *));
-	if (ret == 0) {
-		ret = (a_len > b_len) - (a_len < b_len);
+	if (tctx->cnts.curobjs != 0) {
+		return false;
 	}
-	return ret;
-}
-
-rb_gen(static UNUSED, gctx_tree_, prof_gctx_tree_t, prof_gctx_t, dump_link,
-    prof_gctx_comp)
-
-static int
-prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b) {
-	int ret;
-	uint64_t a_uid = a->thr_uid;
-	uint64_t b_uid = b->thr_uid;
-
-	ret = ((a_uid > b_uid) - (a_uid < b_uid));
-	if (ret == 0) {
-		uint64_t a_discrim = a->thr_discrim;
-		uint64_t b_discrim = b->thr_discrim;
-
-		ret = ((a_discrim > b_discrim) - (a_discrim < b_discrim));
+	if (tctx->prepared) {
+		return false;
 	}
-	return ret;
+	return true;
 }
 
-rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link,
-    prof_tdata_comp)
-
-/******************************************************************************/
-
 void
 prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
 	prof_tdata_t *tdata;
@@ -286,45 +207,6 @@ bt_init(prof_bt_t *bt, void **vec) {
 	bt->len = 0;
 }
 
-static void
-prof_enter(tsd_t *tsd, prof_tdata_t *tdata) {
-	cassert(config_prof);
-	assert(tdata == prof_tdata_get(tsd, false));
-
-	if (tdata != NULL) {
-		assert(!tdata->enq);
-		tdata->enq = true;
-	}
-
-	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
-}
-
-static void
-prof_leave(tsd_t *tsd, prof_tdata_t *tdata) {
-	cassert(config_prof);
-	assert(tdata == prof_tdata_get(tsd, false));
-
-	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
-
-	if (tdata != NULL) {
-		bool idump, gdump;
-
-		assert(tdata->enq);
-		tdata->enq = false;
-		idump = tdata->enq_idump;
-		tdata->enq_idump = false;
-		gdump = tdata->enq_gdump;
-		tdata->enq_gdump = false;
-
-		if (idump) {
-			prof_idump(tsd_tsdn(tsd));
-		}
-		if (gdump) {
-			prof_gdump(tsd_tsdn(tsd));
-		}
-	}
-}
-
 #ifdef JEMALLOC_PROF_LIBUNWIND
 void
 prof_backtrace(prof_bt_t *bt) {
@@ -547,324 +429,18 @@ prof_backtrace(prof_bt_t *bt) {
 }
 #endif
 
-static malloc_mutex_t *
+malloc_mutex_t *
 prof_gctx_mutex_choose(void) {
 	unsigned ngctxs = atomic_fetch_add_u(&cum_gctxs, 1, ATOMIC_RELAXED);
 
 	return &gctx_locks[(ngctxs - 1) % PROF_NCTX_LOCKS];
 }
 
-static malloc_mutex_t *
+malloc_mutex_t *
 prof_tdata_mutex_choose(uint64_t thr_uid) {
 	return &tdata_locks[thr_uid % PROF_NTDATA_LOCKS];
 }
 
-static prof_gctx_t *
-prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt) {
-	/*
-	 * Create a single allocation that has space for vec of length bt->len.
-	 */
-	size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *));
-	prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsdn, size,
-	    sz_size2index(size), false, NULL, true, arena_get(TSDN_NULL, 0, true),
-	    true);
-	if (gctx == NULL) {
-		return NULL;
-	}
-	gctx->lock = prof_gctx_mutex_choose();
-	/*
-	 * Set nlimbo to 1, in order to avoid a race condition with
-	 * prof_tctx_destroy()/prof_gctx_try_destroy().
-	 */
-	gctx->nlimbo = 1;
-	tctx_tree_new(&gctx->tctxs);
-	/* Duplicate bt. */
-	memcpy(gctx->vec, bt->vec, bt->len * sizeof(void *));
-	gctx->bt.vec = gctx->vec;
-	gctx->bt.len = bt->len;
-	return gctx;
-}
-
-static void
-prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
-    prof_tdata_t *tdata) {
-	cassert(config_prof);
-
-	/*
-	 * Check that gctx is still unused by any thread cache before destroying
-	 * it.  prof_lookup() increments gctx->nlimbo in order to avoid a race
-	 * condition with this function, as does prof_tctx_destroy() in order to
-	 * avoid a race between the main body of prof_tctx_destroy() and entry
-	 * into this function.
-	 */
-	prof_enter(tsd, tdata_self);
-	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
-	assert(gctx->nlimbo != 0);
-	if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) {
-		/* Remove gctx from bt2gctx. */
-		if (ckh_remove(tsd, &bt2gctx, &gctx->bt, NULL, NULL)) {
-			not_reached();
-		}
-		prof_leave(tsd, tdata_self);
-		/* Destroy gctx. */
-		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
-		idalloctm(tsd_tsdn(tsd), gctx, NULL, NULL, true, true);
-	} else {
-		/*
-		 * Compensate for increment in prof_tctx_destroy() or
-		 * prof_lookup().
-		 */
-		gctx->nlimbo--;
-		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
-		prof_leave(tsd, tdata_self);
-	}
-}
-
-static bool
-prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx) {
-	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
-
-	if (opt_prof_accum) {
-		return false;
-	}
-	if (tctx->cnts.curobjs != 0) {
-		return false;
-	}
-	if (tctx->prepared) {
-		return false;
-	}
-	return true;
-}
-
-static bool
-prof_gctx_should_destroy(prof_gctx_t *gctx) {
-	if (opt_prof_accum) {
-		return false;
-	}
-	if (!tctx_tree_empty(&gctx->tctxs)) {
-		return false;
-	}
-	if (gctx->nlimbo != 0) {
-		return false;
-	}
-	return true;
-}
-
-static void
-prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
-	prof_tdata_t *tdata = tctx->tdata;
-	prof_gctx_t *gctx = tctx->gctx;
-	bool destroy_tdata, destroy_tctx, destroy_gctx;
-
-	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
-
-	assert(tctx->cnts.curobjs == 0);
-	assert(tctx->cnts.curbytes == 0);
-	assert(!opt_prof_accum);
-	assert(tctx->cnts.accumobjs == 0);
-	assert(tctx->cnts.accumbytes == 0);
-
-	ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL);
-	destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata, false);
-	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
-
-	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
-	switch (tctx->state) {
-	case prof_tctx_state_nominal:
-		tctx_tree_remove(&gctx->tctxs, tctx);
-		destroy_tctx = true;
-		if (prof_gctx_should_destroy(gctx)) {
-			/*
-			 * Increment gctx->nlimbo in order to keep another
-			 * thread from winning the race to destroy gctx while
-			 * this one has gctx->lock dropped.  Without this, it
-			 * would be possible for another thread to:
-			 *
-			 * 1) Sample an allocation associated with gctx.
-			 * 2) Deallocate the sampled object.
-			 * 3) Successfully prof_gctx_try_destroy(gctx).
-			 *
-			 * The result would be that gctx no longer exists by the
-			 * time this thread accesses it in
-			 * prof_gctx_try_destroy().
-			 */
-			gctx->nlimbo++;
-			destroy_gctx = true;
-		} else {
-			destroy_gctx = false;
-		}
-		break;
-	case prof_tctx_state_dumping:
-		/*
-		 * A dumping thread needs tctx to remain valid until dumping
-		 * has finished.  Change state such that the dumping thread will
-		 * complete destruction during a late dump iteration phase.
-		 */
-		tctx->state = prof_tctx_state_purgatory;
-		destroy_tctx = false;
-		destroy_gctx = false;
-		break;
-	default:
-		not_reached();
-		destroy_tctx = false;
-		destroy_gctx = false;
-	}
-	malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
-	if (destroy_gctx) {
-		prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx,
-		    tdata);
-	}
-
-	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock);
-
-	if (destroy_tdata) {
-		prof_tdata_destroy(tsd, tdata, false);
-	}
-
-	if (destroy_tctx) {
-		idalloctm(tsd_tsdn(tsd), tctx, NULL, NULL, true, true);
-	}
-}
-
-static bool
-prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
-    void **p_btkey, prof_gctx_t **p_gctx, bool *p_new_gctx) {
-	union {
-		prof_gctx_t	*p;
-		void		*v;
-	} gctx, tgctx;
-	union {
-		prof_bt_t	*p;
-		void		*v;
-	} btkey;
-	bool new_gctx;
-
-	prof_enter(tsd, tdata);
-	if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
-		/* bt has never been seen before.  Insert it. */
-		prof_leave(tsd, tdata);
-		tgctx.p = prof_gctx_create(tsd_tsdn(tsd), bt);
-		if (tgctx.v == NULL) {
-			return true;
-		}
-		prof_enter(tsd, tdata);
-		if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
-			gctx.p = tgctx.p;
-			btkey.p = &gctx.p->bt;
-			if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) {
-				/* OOM. */
-				prof_leave(tsd, tdata);
-				idalloctm(tsd_tsdn(tsd), gctx.v, NULL, NULL,
-				    true, true);
-				return true;
-			}
-			new_gctx = true;
-		} else {
-			new_gctx = false;
-		}
-	} else {
-		tgctx.v = NULL;
-		new_gctx = false;
-	}
-
-	if (!new_gctx) {
-		/*
-		 * Increment nlimbo, in order to avoid a race condition with
-		 * prof_tctx_destroy()/prof_gctx_try_destroy().
-		 */
-		malloc_mutex_lock(tsd_tsdn(tsd), gctx.p->lock);
-		gctx.p->nlimbo++;
-		malloc_mutex_unlock(tsd_tsdn(tsd), gctx.p->lock);
-		new_gctx = false;
-
-		if (tgctx.v != NULL) {
-			/* Lost race to insert. */
-			idalloctm(tsd_tsdn(tsd), tgctx.v, NULL, NULL, true,
-			    true);
-		}
-	}
-	prof_leave(tsd, tdata);
-
-	*p_btkey = btkey.v;
-	*p_gctx = gctx.p;
-	*p_new_gctx = new_gctx;
-	return false;
-}
-
-prof_tctx_t *
-prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
-	union {
-		prof_tctx_t	*p;
-		void		*v;
-	} ret;
-	prof_tdata_t *tdata;
-	bool not_found;
-
-	cassert(config_prof);
-
-	tdata = prof_tdata_get(tsd, false);
-	if (tdata == NULL) {
-		return NULL;
-	}
-
-	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
-	not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v);
-	if (!not_found) { /* Note double negative! */
-		ret.p->prepared = true;
-	}
-	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
-	if (not_found) {
-		void *btkey;
-		prof_gctx_t *gctx;
-		bool new_gctx, error;
-
-		/*
-		 * This thread's cache lacks bt.  Look for it in the global
-		 * cache.
-		 */
-		if (prof_lookup_global(tsd, bt, tdata, &btkey, &gctx,
-		    &new_gctx)) {
-			return NULL;
-		}
-
-		/* Link a prof_tctx_t into gctx for this thread. */
-		ret.v = iallocztm(tsd_tsdn(tsd), sizeof(prof_tctx_t),
-		    sz_size2index(sizeof(prof_tctx_t)), false, NULL, true,
-		    arena_ichoose(tsd, NULL), true);
-		if (ret.p == NULL) {
-			if (new_gctx) {
-				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
-			}
-			return NULL;
-		}
-		ret.p->tdata = tdata;
-		ret.p->thr_uid = tdata->thr_uid;
-		ret.p->thr_discrim = tdata->thr_discrim;
-		memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
-		ret.p->gctx = gctx;
-		ret.p->tctx_uid = tdata->tctx_uid_next++;
-		ret.p->prepared = true;
-		ret.p->state = prof_tctx_state_initializing;
-		malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
-		error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v);
-		malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
-		if (error) {
-			if (new_gctx) {
-				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
-			}
-			idalloctm(tsd_tsdn(tsd), ret.v, NULL, NULL, true, true);
-			return NULL;
-		}
-		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
-		ret.p->state = prof_tctx_state_nominal;
-		tctx_tree_insert(&gctx->tctxs, ret.p);
-		gctx->nlimbo--;
-		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
-	}
-
-	return ret.p;
-}
-
 /*
  * The bodies of this function and prof_leakcheck() are compiled out unless heap
  * profiling is enabled, so that it is possible to compile jemalloc with
@@ -921,521 +497,6 @@ prof_sample_threshold_update(prof_tdata_t *tdata) {
 #endif
 }
 
-#ifdef JEMALLOC_JET
-static prof_tdata_t *
-prof_tdata_count_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
-    void *arg) {
-	size_t *tdata_count = (size_t *)arg;
-
-	(*tdata_count)++;
-
-	return NULL;
-}
-
-size_t
-prof_tdata_count(void) {
-	size_t tdata_count = 0;
-	tsdn_t *tsdn;
-
-	tsdn = tsdn_fetch();
-	malloc_mutex_lock(tsdn, &tdatas_mtx);
-	tdata_tree_iter(&tdatas, NULL, prof_tdata_count_iter,
-	    (void *)&tdata_count);
-	malloc_mutex_unlock(tsdn, &tdatas_mtx);
-
-	return tdata_count;
-}
-
-size_t
-prof_bt_count(void) {
-	size_t bt_count;
-	tsd_t *tsd;
-	prof_tdata_t *tdata;
-
-	tsd = tsd_fetch();
-	tdata = prof_tdata_get(tsd, false);
-	if (tdata == NULL) {
-		return 0;
-	}
-
-	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
-	bt_count = ckh_count(&bt2gctx);
-	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
-
-	return bt_count;
-}
-#endif
-
-static int
-prof_dump_open_impl(bool propagate_err, const char *filename) {
-	int fd;
-
-	fd = creat(filename, 0644);
-	if (fd == -1 && !propagate_err) {
-		malloc_printf("<jemalloc>: creat(\"%s\"), 0644) failed\n",
-		    filename);
-		if (opt_abort) {
-			abort();
-		}
-	}
-
-	return fd;
-}
-prof_dump_open_t *JET_MUTABLE prof_dump_open = prof_dump_open_impl;
-
-static bool
-prof_dump_flush(bool propagate_err) {
-	bool ret = false;
-	ssize_t err;
-
-	cassert(config_prof);
-
-	err = malloc_write_fd(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
-	if (err == -1) {
-		if (!propagate_err) {
-			malloc_write("<jemalloc>: write() failed during heap "
-			    "profile flush\n");
-			if (opt_abort) {
-				abort();
-			}
-		}
-		ret = true;
-	}
-	prof_dump_buf_end = 0;
-
-	return ret;
-}
-
-static bool
-prof_dump_close(bool propagate_err) {
-	bool ret;
-
-	assert(prof_dump_fd != -1);
-	ret = prof_dump_flush(propagate_err);
-	close(prof_dump_fd);
-	prof_dump_fd = -1;
-
-	return ret;
-}
-
-static bool
-prof_dump_write(bool propagate_err, const char *s) {
-	size_t i, slen, n;
-
-	cassert(config_prof);
-
-	i = 0;
-	slen = strlen(s);
-	while (i < slen) {
-		/* Flush the buffer if it is full. */
-		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
-			if (prof_dump_flush(propagate_err) && propagate_err) {
-				return true;
-			}
-		}
-
-		if (prof_dump_buf_end + slen - i <= PROF_DUMP_BUFSIZE) {
-			/* Finish writing. */
-			n = slen - i;
-		} else {
-			/* Write as much of s as will fit. */
-			n = PROF_DUMP_BUFSIZE - prof_dump_buf_end;
-		}
-		memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n);
-		prof_dump_buf_end += n;
-		i += n;
-	}
-	assert(i == slen);
-
-	return false;
-}
-
-JEMALLOC_FORMAT_PRINTF(2, 3)
-static bool
-prof_dump_printf(bool propagate_err, const char *format, ...) {
-	bool ret;
-	va_list ap;
-	char buf[PROF_PRINTF_BUFSIZE];
-
-	va_start(ap, format);
-	malloc_vsnprintf(buf, sizeof(buf), format, ap);
-	va_end(ap);
-	ret = prof_dump_write(propagate_err, buf);
-
-	return ret;
-}
-
-static void
-prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata) {
-	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
-
-	malloc_mutex_lock(tsdn, tctx->gctx->lock);
-
-	switch (tctx->state) {
-	case prof_tctx_state_initializing:
-		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
-		return;
-	case prof_tctx_state_nominal:
-		tctx->state = prof_tctx_state_dumping;
-		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
-
-		memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t));
-
-		tdata->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
-		tdata->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
-		if (opt_prof_accum) {
-			tdata->cnt_summed.accumobjs +=
-			    tctx->dump_cnts.accumobjs;
-			tdata->cnt_summed.accumbytes +=
-			    tctx->dump_cnts.accumbytes;
-		}
-		break;
-	case prof_tctx_state_dumping:
-	case prof_tctx_state_purgatory:
-		not_reached();
-	}
-}
-
-static void
-prof_tctx_merge_gctx(tsdn_t *tsdn, prof_tctx_t *tctx, prof_gctx_t *gctx) {
-	malloc_mutex_assert_owner(tsdn, gctx->lock);
-
-	gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
-	gctx->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
-	if (opt_prof_accum) {
-		gctx->cnt_summed.accumobjs += tctx->dump_cnts.accumobjs;
-		gctx->cnt_summed.accumbytes += tctx->dump_cnts.accumbytes;
-	}
-}
-
-static prof_tctx_t *
-prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
-	tsdn_t *tsdn = (tsdn_t *)arg;
-
-	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
-
-	switch (tctx->state) {
-	case prof_tctx_state_nominal:
-		/* New since dumping started; ignore. */
-		break;
-	case prof_tctx_state_dumping:
-	case prof_tctx_state_purgatory:
-		prof_tctx_merge_gctx(tsdn, tctx, tctx->gctx);
-		break;
-	default:
-		not_reached();
-	}
-
-	return NULL;
-}
-
-struct prof_tctx_dump_iter_arg_s {
-	tsdn_t	*tsdn;
-	bool	propagate_err;
-};
-
-static prof_tctx_t *
-prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) {
-	struct prof_tctx_dump_iter_arg_s *arg =
-	    (struct prof_tctx_dump_iter_arg_s *)opaque;
-
-	malloc_mutex_assert_owner(arg->tsdn, tctx->gctx->lock);
-
-	switch (tctx->state) {
-	case prof_tctx_state_initializing:
-	case prof_tctx_state_nominal:
-		/* Not captured by this dump. */
-		break;
-	case prof_tctx_state_dumping:
-	case prof_tctx_state_purgatory:
-		if (prof_dump_printf(arg->propagate_err,
-		    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": "
-		    "%"FMTu64"]\n", tctx->thr_uid, tctx->dump_cnts.curobjs,
-		    tctx->dump_cnts.curbytes, tctx->dump_cnts.accumobjs,
-		    tctx->dump_cnts.accumbytes)) {
-			return tctx;
-		}
-		break;
-	default:
-		not_reached();
-	}
-	return NULL;
-}
-
-static prof_tctx_t *
-prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
-	tsdn_t *tsdn = (tsdn_t *)arg;
-	prof_tctx_t *ret;
-
-	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
-
-	switch (tctx->state) {
-	case prof_tctx_state_nominal:
-		/* New since dumping started; ignore. */
-		break;
-	case prof_tctx_state_dumping:
-		tctx->state = prof_tctx_state_nominal;
-		break;
-	case prof_tctx_state_purgatory:
-		ret = tctx;
-		goto label_return;
-	default:
-		not_reached();
-	}
-
-	ret = NULL;
-label_return:
-	return ret;
-}
-
-static void
-prof_dump_gctx_prep(tsdn_t *tsdn, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs) {
-	cassert(config_prof);
-
-	malloc_mutex_lock(tsdn, gctx->lock);
-
-	/*
-	 * Increment nlimbo so that gctx won't go away before dump.
-	 * Additionally, link gctx into the dump list so that it is included in
-	 * prof_dump()'s second pass.
-	 */
-	gctx->nlimbo++;
-	gctx_tree_insert(gctxs, gctx);
-
-	memset(&gctx->cnt_summed, 0, sizeof(prof_cnt_t));
-
-	malloc_mutex_unlock(tsdn, gctx->lock);
-}
-
-struct prof_gctx_merge_iter_arg_s {
-	tsdn_t	*tsdn;
-	size_t	leak_ngctx;
-};
-
-static prof_gctx_t *
-prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
-	struct prof_gctx_merge_iter_arg_s *arg =
-	    (struct prof_gctx_merge_iter_arg_s *)opaque;
-
-	malloc_mutex_lock(arg->tsdn, gctx->lock);
-	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter,
-	    (void *)arg->tsdn);
-	if (gctx->cnt_summed.curobjs != 0) {
-		arg->leak_ngctx++;
-	}
-	malloc_mutex_unlock(arg->tsdn, gctx->lock);
-
-	return NULL;
-}
-
-static void
-prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) {
-	prof_tdata_t *tdata = prof_tdata_get(tsd, false);
-	prof_gctx_t *gctx;
-
-	/*
-	 * Standard tree iteration won't work here, because as soon as we
-	 * decrement gctx->nlimbo and unlock gctx, another thread can
-	 * concurrently destroy it, which will corrupt the tree.  Therefore,
-	 * tear down the tree one node at a time during iteration.
-	 */
-	while ((gctx = gctx_tree_first(gctxs)) != NULL) {
-		gctx_tree_remove(gctxs, gctx);
-		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
-		{
-			prof_tctx_t *next;
-
-			next = NULL;
-			do {
-				prof_tctx_t *to_destroy =
-				    tctx_tree_iter(&gctx->tctxs, next,
-				    prof_tctx_finish_iter,
-				    (void *)tsd_tsdn(tsd));
-				if (to_destroy != NULL) {
-					next = tctx_tree_next(&gctx->tctxs,
-					    to_destroy);
-					tctx_tree_remove(&gctx->tctxs,
-					    to_destroy);
-					idalloctm(tsd_tsdn(tsd), to_destroy,
-					    NULL, NULL, true, true);
-				} else {
-					next = NULL;
-				}
-			} while (next != NULL);
-		}
-		gctx->nlimbo--;
-		if (prof_gctx_should_destroy(gctx)) {
-			gctx->nlimbo++;
-			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
-			prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
-		} else {
-			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
-		}
-	}
-}
-
-struct prof_tdata_merge_iter_arg_s {
-	tsdn_t		*tsdn;
-	prof_cnt_t	cnt_all;
-};
-
-static prof_tdata_t *
-prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
-    void *opaque) {
-	struct prof_tdata_merge_iter_arg_s *arg =
-	    (struct prof_tdata_merge_iter_arg_s *)opaque;
-
-	malloc_mutex_lock(arg->tsdn, tdata->lock);
-	if (!tdata->expired) {
-		size_t tabind;
-		union {
-			prof_tctx_t	*p;
-			void		*v;
-		} tctx;
-
-		tdata->dumping = true;
-		memset(&tdata->cnt_summed, 0, sizeof(prof_cnt_t));
-		for (tabind = 0; !ckh_iter(&tdata->bt2tctx, &tabind, NULL,
-		    &tctx.v);) {
-			prof_tctx_merge_tdata(arg->tsdn, tctx.p, tdata);
-		}
-
-		arg->cnt_all.curobjs += tdata->cnt_summed.curobjs;
-		arg->cnt_all.curbytes += tdata->cnt_summed.curbytes;
-		if (opt_prof_accum) {
-			arg->cnt_all.accumobjs += tdata->cnt_summed.accumobjs;
-			arg->cnt_all.accumbytes += tdata->cnt_summed.accumbytes;
-		}
-	} else {
-		tdata->dumping = false;
-	}
-	malloc_mutex_unlock(arg->tsdn, tdata->lock);
-
-	return NULL;
-}
-
-static prof_tdata_t *
-prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
-    void *arg) {
-	bool propagate_err = *(bool *)arg;
-
-	if (!tdata->dumping) {
-		return NULL;
-	}
-
-	if (prof_dump_printf(propagate_err,
-	    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]%s%s\n",
-	    tdata->thr_uid, tdata->cnt_summed.curobjs,
-	    tdata->cnt_summed.curbytes, tdata->cnt_summed.accumobjs,
-	    tdata->cnt_summed.accumbytes,
-	    (tdata->thread_name != NULL) ? " " : "",
-	    (tdata->thread_name != NULL) ? tdata->thread_name : "")) {
-		return tdata;
-	}
-	return NULL;
-}
-
-static bool
-prof_dump_header_impl(tsdn_t *tsdn, bool propagate_err,
-    const prof_cnt_t *cnt_all) {
-	bool ret;
-
-	if (prof_dump_printf(propagate_err,
-	    "heap_v2/%"FMTu64"\n"
-	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
-	    ((uint64_t)1U << lg_prof_sample), cnt_all->curobjs,
-	    cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes)) {
-		return true;
-	}
-
-	malloc_mutex_lock(tsdn, &tdatas_mtx);
-	ret = (tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter,
-	    (void *)&propagate_err) != NULL);
-	malloc_mutex_unlock(tsdn, &tdatas_mtx);
-	return ret;
-}
-prof_dump_header_t *JET_MUTABLE prof_dump_header = prof_dump_header_impl;
-
-static bool
-prof_dump_gctx(tsdn_t *tsdn, bool propagate_err, prof_gctx_t *gctx,
-    const prof_bt_t *bt, prof_gctx_tree_t *gctxs) {
-	bool ret;
-	unsigned i;
-	struct prof_tctx_dump_iter_arg_s prof_tctx_dump_iter_arg;
-
-	cassert(config_prof);
-	malloc_mutex_assert_owner(tsdn, gctx->lock);
-
-	/* Avoid dumping such gctx's that have no useful data. */
-	if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0) ||
-	    (opt_prof_accum && gctx->cnt_summed.accumobjs == 0)) {
-		assert(gctx->cnt_summed.curobjs == 0);
-		assert(gctx->cnt_summed.curbytes == 0);
-		assert(gctx->cnt_summed.accumobjs == 0);
-		assert(gctx->cnt_summed.accumbytes == 0);
-		ret = false;
-		goto label_return;
-	}
-
-	if (prof_dump_printf(propagate_err, "@")) {
-		ret = true;
-		goto label_return;
-	}
-	for (i = 0; i < bt->len; i++) {
-		if (prof_dump_printf(propagate_err, " %#"FMTxPTR,
-		    (uintptr_t)bt->vec[i])) {
-			ret = true;
-			goto label_return;
-		}
-	}
-
-	if (prof_dump_printf(propagate_err,
-	    "\n"
-	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
-	    gctx->cnt_summed.curobjs, gctx->cnt_summed.curbytes,
-	    gctx->cnt_summed.accumobjs, gctx->cnt_summed.accumbytes)) {
-		ret = true;
-		goto label_return;
-	}
-
-	prof_tctx_dump_iter_arg.tsdn = tsdn;
-	prof_tctx_dump_iter_arg.propagate_err = propagate_err;
-	if (tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter,
-	    (void *)&prof_tctx_dump_iter_arg) != NULL) {
-		ret = true;
-		goto label_return;
-	}
-
-	ret = false;
-label_return:
-	return ret;
-}
-
-#ifndef _WIN32
-JEMALLOC_FORMAT_PRINTF(1, 2)
-static int
-prof_open_maps(const char *format, ...) {
-	int mfd;
-	va_list ap;
-	char filename[PATH_MAX + 1];
-
-	va_start(ap, format);
-	malloc_vsnprintf(filename, sizeof(filename), format, ap);
-	va_end(ap);
-
-#if defined(O_CLOEXEC)
-	mfd = open(filename, O_RDONLY | O_CLOEXEC);
-#else
-	mfd = open(filename, O_RDONLY);
-	if (mfd != -1) {
-		fcntl(mfd, F_SETFD, fcntl(mfd, F_GETFD) | FD_CLOEXEC);
-	}
-#endif
-
-	return mfd;
-}
-#endif
-
 int
 prof_getpid(void) {
 #ifdef _WIN32
@@ -1445,291 +506,6 @@ prof_getpid(void) {
 #endif
 }
 
-static bool
-prof_dump_maps(bool propagate_err) {
-	bool ret;
-	int mfd;
-
-	cassert(config_prof);
-#ifdef __FreeBSD__
-	mfd = prof_open_maps("/proc/curproc/map");
-#elif defined(_WIN32)
-	mfd = -1; // Not implemented
-#else
-	{
-		int pid = prof_getpid();
-
-		mfd = prof_open_maps("/proc/%d/task/%d/maps", pid, pid);
-		if (mfd == -1) {
-			mfd = prof_open_maps("/proc/%d/maps", pid);
-		}
-	}
-#endif
-	if (mfd != -1) {
-		ssize_t nread;
-
-		if (prof_dump_write(propagate_err, "\nMAPPED_LIBRARIES:\n") &&
-		    propagate_err) {
-			ret = true;
-			goto label_return;
-		}
-		nread = 0;
-		do {
-			prof_dump_buf_end += nread;
-			if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
-				/* Make space in prof_dump_buf before read(). */
-				if (prof_dump_flush(propagate_err) &&
-				    propagate_err) {
-					ret = true;
-					goto label_return;
-				}
-			}
-			nread = malloc_read_fd(mfd,
-			    &prof_dump_buf[prof_dump_buf_end], PROF_DUMP_BUFSIZE
-			    - prof_dump_buf_end);
-		} while (nread > 0);
-	} else {
-		ret = true;
-		goto label_return;
-	}
-
-	ret = false;
-label_return:
-	if (mfd != -1) {
-		close(mfd);
-	}
-	return ret;
-}
-
-/*
- * See prof_sample_threshold_update() comment for why the body of this function
- * is conditionally compiled.
- */
-static void
-prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx,
-    const char *filename) {
-#ifdef JEMALLOC_PROF
-	/*
-	 * Scaling is equivalent AdjustSamples() in jeprof, but the result may
-	 * differ slightly from what jeprof reports, because here we scale the
-	 * summary values, whereas jeprof scales each context individually and
-	 * reports the sums of the scaled values.
-	 */
-	if (cnt_all->curbytes != 0) {
-		double sample_period = (double)((uint64_t)1 << lg_prof_sample);
-		double ratio = (((double)cnt_all->curbytes) /
-		    (double)cnt_all->curobjs) / sample_period;
-		double scale_factor = 1.0 / (1.0 - exp(-ratio));
-		uint64_t curbytes = (uint64_t)round(((double)cnt_all->curbytes)
-		    * scale_factor);
-		uint64_t curobjs = (uint64_t)round(((double)cnt_all->curobjs) *
-		    scale_factor);
-
-		malloc_printf("<jemalloc>: Leak approximation summary: ~%"FMTu64
-		    " byte%s, ~%"FMTu64" object%s, >= %zu context%s\n",
-		    curbytes, (curbytes != 1) ? "s" : "", curobjs, (curobjs !=
-		    1) ? "s" : "", leak_ngctx, (leak_ngctx != 1) ? "s" : "");
-		malloc_printf(
-		    "<jemalloc>: Run jeprof on \"%s\" for leak detail\n",
-		    filename);
-	}
-#endif
-}
-
-struct prof_gctx_dump_iter_arg_s {
-	tsdn_t	*tsdn;
-	bool	propagate_err;
-};
-
-static prof_gctx_t *
-prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
-	prof_gctx_t *ret;
-	struct prof_gctx_dump_iter_arg_s *arg =
-	    (struct prof_gctx_dump_iter_arg_s *)opaque;
-
-	malloc_mutex_lock(arg->tsdn, gctx->lock);
-
-	if (prof_dump_gctx(arg->tsdn, arg->propagate_err, gctx, &gctx->bt,
-	    gctxs)) {
-		ret = gctx;
-		goto label_return;
-	}
-
-	ret = NULL;
-label_return:
-	malloc_mutex_unlock(arg->tsdn, gctx->lock);
-	return ret;
-}
-
-static void
-prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata,
-    struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
-    struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
-    prof_gctx_tree_t *gctxs) {
-	size_t tabind;
-	union {
-		prof_gctx_t	*p;
-		void		*v;
-	} gctx;
-
-	prof_enter(tsd, tdata);
-
-	/*
-	 * Put gctx's in limbo and clear their counters in preparation for
-	 * summing.
-	 */
-	gctx_tree_new(gctxs);
-	for (tabind = 0; !ckh_iter(&bt2gctx, &tabind, NULL, &gctx.v);) {
-		prof_dump_gctx_prep(tsd_tsdn(tsd), gctx.p, gctxs);
-	}
-
-	/*
-	 * Iterate over tdatas, and for the non-expired ones snapshot their tctx
-	 * stats and merge them into the associated gctx's.
-	 */
-	prof_tdata_merge_iter_arg->tsdn = tsd_tsdn(tsd);
-	memset(&prof_tdata_merge_iter_arg->cnt_all, 0, sizeof(prof_cnt_t));
-	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
-	tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter,
-	    (void *)prof_tdata_merge_iter_arg);
-	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
-
-	/* Merge tctx stats into gctx's. */
-	prof_gctx_merge_iter_arg->tsdn = tsd_tsdn(tsd);
-	prof_gctx_merge_iter_arg->leak_ngctx = 0;
-	gctx_tree_iter(gctxs, NULL, prof_gctx_merge_iter,
-	    (void *)prof_gctx_merge_iter_arg);
-
-	prof_leave(tsd, tdata);
-}
-
-static bool
-prof_dump_file(tsd_t *tsd, bool propagate_err, const char *filename,
-    bool leakcheck, prof_tdata_t *tdata,
-    struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
-    struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
-    struct prof_gctx_dump_iter_arg_s *prof_gctx_dump_iter_arg,
-    prof_gctx_tree_t *gctxs) {
-	/* Create dump file. */
-	if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1) {
-		return true;
-	}
-
-	/* Dump profile header. */
-	if (prof_dump_header(tsd_tsdn(tsd), propagate_err,
-	    &prof_tdata_merge_iter_arg->cnt_all)) {
-		goto label_write_error;
-	}
-
-	/* Dump per gctx profile stats. */
-	prof_gctx_dump_iter_arg->tsdn = tsd_tsdn(tsd);
-	prof_gctx_dump_iter_arg->propagate_err = propagate_err;
-	if (gctx_tree_iter(gctxs, NULL, prof_gctx_dump_iter,
-	    (void *)prof_gctx_dump_iter_arg) != NULL) {
-		goto label_write_error;
-	}
-
-	/* Dump /proc/<pid>/maps if possible. */
-	if (prof_dump_maps(propagate_err)) {
-		goto label_write_error;
-	}
-
-	if (prof_dump_close(propagate_err)) {
-		return true;
-	}
-
-	return false;
-label_write_error:
-	prof_dump_close(propagate_err);
-	return true;
-}
-
-static bool
-prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
-    bool leakcheck) {
-	cassert(config_prof);
-	assert(tsd_reentrancy_level_get(tsd) == 0);
-
-	prof_tdata_t * tdata = prof_tdata_get(tsd, true);
-	if (tdata == NULL) {
-		return true;
-	}
-
-	pre_reentrancy(tsd, NULL);
-	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
-
-	prof_gctx_tree_t gctxs;
-	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
-	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
-	struct prof_gctx_dump_iter_arg_s prof_gctx_dump_iter_arg;
-	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
-	    &prof_gctx_merge_iter_arg, &gctxs);
-	bool err = prof_dump_file(tsd, propagate_err, filename, leakcheck, tdata,
-	    &prof_tdata_merge_iter_arg, &prof_gctx_merge_iter_arg,
-	    &prof_gctx_dump_iter_arg, &gctxs);
-	prof_gctx_finish(tsd, &gctxs);
-
-	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
-	post_reentrancy(tsd);
-
-	if (err) {
-		return true;
-	}
-
-	if (leakcheck) {
-		prof_leakcheck(&prof_tdata_merge_iter_arg.cnt_all,
-		    prof_gctx_merge_iter_arg.leak_ngctx, filename);
-	}
-	return false;
-}
-
-#ifdef JEMALLOC_JET
-void
-prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
-    uint64_t *accumbytes) {
-	tsd_t *tsd;
-	prof_tdata_t *tdata;
-	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
-	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
-	prof_gctx_tree_t gctxs;
-
-	tsd = tsd_fetch();
-	tdata = prof_tdata_get(tsd, false);
-	if (tdata == NULL) {
-		if (curobjs != NULL) {
-			*curobjs = 0;
-		}
-		if (curbytes != NULL) {
-			*curbytes = 0;
-		}
-		if (accumobjs != NULL) {
-			*accumobjs = 0;
-		}
-		if (accumbytes != NULL) {
-			*accumbytes = 0;
-		}
-		return;
-	}
-
-	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
-	    &prof_gctx_merge_iter_arg, &gctxs);
-	prof_gctx_finish(tsd, &gctxs);
-
-	if (curobjs != NULL) {
-		*curobjs = prof_tdata_merge_iter_arg.cnt_all.curobjs;
-	}
-	if (curbytes != NULL) {
-		*curbytes = prof_tdata_merge_iter_arg.cnt_all.curbytes;
-	}
-	if (accumobjs != NULL) {
-		*accumobjs = prof_tdata_merge_iter_arg.cnt_all.accumobjs;
-	}
-	if (accumbytes != NULL) {
-		*accumbytes = prof_tdata_merge_iter_arg.cnt_all.accumbytes;
-	}
-}
-#endif
-
 #define DUMP_FILENAME_BUFSIZE	(PATH_MAX + 1)
 #define VSEQ_INVALID		UINT64_C(0xffffffffffffffff)
 static void
@@ -1878,28 +654,6 @@ prof_gdump(tsdn_t *tsdn) {
 	}
 }
 
-void
-prof_bt_hash(const void *key, size_t r_hash[2]) {
-	prof_bt_t *bt = (prof_bt_t *)key;
-
-	cassert(config_prof);
-
-	hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash);
-}
-
-bool
-prof_bt_keycomp(const void *k1, const void *k2) {
-	const prof_bt_t *bt1 = (prof_bt_t *)k1;
-	const prof_bt_t *bt2 = (prof_bt_t *)k2;
-
-	cassert(config_prof);
-
-	if (bt1->len != bt2->len) {
-		return false;
-	}
-	return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
-}
-
 static uint64_t
 prof_thr_uid_alloc(tsdn_t *tsdn) {
 	uint64_t thr_uid;
@@ -1912,124 +666,33 @@ prof_thr_uid_alloc(tsdn_t *tsdn) {
 	return thr_uid;
 }
 
-static prof_tdata_t *
-prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
-    char *thread_name, bool active) {
-	prof_tdata_t *tdata;
-
-	cassert(config_prof);
-
-	/* Initialize an empty cache for this thread. */
-	tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd), sizeof(prof_tdata_t),
-	    sz_size2index(sizeof(prof_tdata_t)), false, NULL, true,
-	    arena_get(TSDN_NULL, 0, true), true);
-	if (tdata == NULL) {
-		return NULL;
-	}
-
-	tdata->lock = prof_tdata_mutex_choose(thr_uid);
-	tdata->thr_uid = thr_uid;
-	tdata->thr_discrim = thr_discrim;
-	tdata->thread_name = thread_name;
-	tdata->attached = true;
-	tdata->expired = false;
-	tdata->tctx_uid_next = 0;
-
-	if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash,
-	    prof_bt_keycomp)) {
-		idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
-		return NULL;
-	}
-
-	tdata->prng_state = (uint64_t)(uintptr_t)tdata;
-	prof_sample_threshold_update(tdata);
-
-	tdata->enq = false;
-	tdata->enq_idump = false;
-	tdata->enq_gdump = false;
-
-	tdata->dumping = false;
-	tdata->active = active;
-
-	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
-	tdata_tree_insert(&tdatas, tdata);
-	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
-
-	return tdata;
-}
-
 prof_tdata_t *
 prof_tdata_init(tsd_t *tsd) {
 	return prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0,
 	    NULL, prof_thread_active_init_get(tsd_tsdn(tsd)));
 }
 
-static bool
-prof_tdata_should_destroy_unlocked(prof_tdata_t *tdata, bool even_if_attached) {
-	if (tdata->attached && !even_if_attached) {
-		return false;
+static char *
+prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name) {
+	char *ret;
+	size_t size;
+
+	if (thread_name == NULL) {
+		return NULL;
 	}
-	if (ckh_count(&tdata->bt2tctx) != 0) {
-		return false;
+
+	size = strlen(thread_name) + 1;
+	if (size == 1) {
+		return "";
 	}
-	return true;
-}
 
-static bool
-prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
-    bool even_if_attached) {
-	malloc_mutex_assert_owner(tsdn, tdata->lock);
-
-	return prof_tdata_should_destroy_unlocked(tdata, even_if_attached);
-}
-
-static void
-prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
-    bool even_if_attached) {
-	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tdatas_mtx);
-
-	tdata_tree_remove(&tdatas, tdata);
-
-	assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
-
-	if (tdata->thread_name != NULL) {
-		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
-		    true);
-	}
-	ckh_delete(tsd, &tdata->bt2tctx);
-	idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
-}
-
-static void
-prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached) {
-	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
-	prof_tdata_destroy_locked(tsd, tdata, even_if_attached);
-	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
-}
-
-static void
-prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata) {
-	bool destroy_tdata;
-
-	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
-	if (tdata->attached) {
-		destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata,
-		    true);
-		/*
-		 * Only detach if !destroy_tdata, because detaching would allow
-		 * another thread to win the race to destroy tdata.
-		 */
-		if (!destroy_tdata) {
-			tdata->attached = false;
-		}
-		tsd_prof_tdata_set(tsd, NULL);
-	} else {
-		destroy_tdata = false;
-	}
-	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
-	if (destroy_tdata) {
-		prof_tdata_destroy(tsd, tdata, true);
+	ret = iallocztm(tsdn, size, sz_size2index(size), false, NULL, true,
+	    arena_get(TSDN_NULL, 0, true), true);
+	if (ret == NULL) {
+		return NULL;
 	}
+	memcpy(ret, thread_name, size);
+	return ret;
 }
 
 prof_tdata_t *
@@ -2045,58 +708,6 @@ prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) {
 	    active);
 }
 
-static bool
-prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata) {
-	bool destroy_tdata;
-
-	malloc_mutex_lock(tsdn, tdata->lock);
-	if (!tdata->expired) {
-		tdata->expired = true;
-		destroy_tdata = tdata->attached ? false :
-		    prof_tdata_should_destroy(tsdn, tdata, false);
-	} else {
-		destroy_tdata = false;
-	}
-	malloc_mutex_unlock(tsdn, tdata->lock);
-
-	return destroy_tdata;
-}
-
-static prof_tdata_t *
-prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
-    void *arg) {
-	tsdn_t *tsdn = (tsdn_t *)arg;
-
-	return (prof_tdata_expire(tsdn, tdata) ? tdata : NULL);
-}
-
-void
-prof_reset(tsd_t *tsd, size_t lg_sample) {
-	prof_tdata_t *next;
-
-	assert(lg_sample < (sizeof(uint64_t) << 3));
-
-	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
-	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
-
-	lg_prof_sample = lg_sample;
-
-	next = NULL;
-	do {
-		prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next,
-		    prof_tdata_reset_iter, (void *)tsd);
-		if (to_destroy != NULL) {
-			next = tdata_tree_next(&tdatas, to_destroy);
-			prof_tdata_destroy_locked(tsd, to_destroy, false);
-		} else {
-			next = NULL;
-		}
-	} while (next != NULL);
-
-	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
-	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
-}
-
 void
 prof_tdata_cleanup(tsd_t *tsd) {
 	prof_tdata_t *tdata;
@@ -2143,29 +754,6 @@ prof_thread_name_get(tsd_t *tsd) {
 	return (tdata->thread_name != NULL ? tdata->thread_name : "");
 }
 
-static char *
-prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name) {
-	char *ret;
-	size_t size;
-
-	if (thread_name == NULL) {
-		return NULL;
-	}
-
-	size = strlen(thread_name) + 1;
-	if (size == 1) {
-		return "";
-	}
-
-	ret = iallocztm(tsdn, size, sz_size2index(size), false, NULL, true,
-	    arena_get(TSDN_NULL, 0, true), true);
-	if (ret == NULL) {
-		return NULL;
-	}
-	memcpy(ret, thread_name, size);
-	return ret;
-}
-
 int
 prof_thread_name_set(tsd_t *tsd, const char *thread_name) {
 	prof_tdata_t *tdata;
@@ -2330,16 +918,15 @@ prof_boot2(tsd_t *tsd) {
 			return true;
 		}
 
-		if (ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash,
-		    prof_bt_keycomp)) {
+		if (prof_data_init(tsd)) {
 			return true;
 		}
+
 		if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx",
 		    WITNESS_RANK_PROF_BT2GCTX, malloc_mutex_rank_exclusive)) {
 			return true;
 		}
 
-		tdata_tree_new(&tdatas);
 		if (malloc_mutex_init(&tdatas_mtx, "prof_tdatas",
 		    WITNESS_RANK_PROF_TDATAS, malloc_mutex_rank_exclusive)) {
 			return true;
diff --git a/src/prof_data.c b/src/prof_data.c
new file mode 100644
index 00000000..bab8e5c0
--- /dev/null
+++ b/src/prof_data.c
@@ -0,0 +1,1441 @@
+#define JEMALLOC_PROF_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/ckh.h"
+#include "jemalloc/internal/hash.h"
+#include "jemalloc/internal/malloc_io.h"
+
+/*
+ * This file defines and manages the core profiling data structures.
+ *
+ * Conceptually, profiling data can be imagined as a table with three columns:
+ * thread, stack trace, and current allocation size.  (When prof_accum is on,
+ * there's one additional column which is the cumulative allocation size.)
+ *
+ * Implementation wise, each thread maintains a hash recording the stack trace
+ * to allocation size correspondences, which are basically the individual rows
+ * in the table.  In addition, two global "indices" are built to make data
+ * aggregation efficient (for dumping): bt2gctx and tdatas, which are basically
+ * the "grouped by stack trace" and "grouped by thread" views of the same table,
+ * respectively.  Note that the allocation size is only aggregated to the two
+ * indices at dumping time, so as to optimize for performance.
+ */
+
+/******************************************************************************/
+
+/*
+ * Global hash of (prof_bt_t *)-->(prof_gctx_t *).  This is the master data
+ * structure that knows about all backtraces currently captured.
+ */
+static ckh_t		bt2gctx;
+
+/*
+ * Tree of all extant prof_tdata_t structures, regardless of state,
+ * {attached,detached,expired}.
+ */
+static prof_tdata_tree_t	tdatas;
+
+/*
+ * This buffer is rather large for stack allocation, so use a single buffer for
+ * all profile dumps.
+ */
+static char		prof_dump_buf[
+    /* Minimize memory bloat for non-prof builds. */
+#ifdef JEMALLOC_PROF
+    PROF_DUMP_BUFSIZE
+#else
+    1
+#endif
+];
+static size_t		prof_dump_buf_end;
+static int		prof_dump_fd;
+
+/******************************************************************************/
+/* Red-black trees. */
+
+static int
+prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b) {
+	uint64_t a_thr_uid = a->thr_uid;
+	uint64_t b_thr_uid = b->thr_uid;
+	int ret = (a_thr_uid > b_thr_uid) - (a_thr_uid < b_thr_uid);
+	if (ret == 0) {
+		uint64_t a_thr_discrim = a->thr_discrim;
+		uint64_t b_thr_discrim = b->thr_discrim;
+		ret = (a_thr_discrim > b_thr_discrim) - (a_thr_discrim <
+		    b_thr_discrim);
+		if (ret == 0) {
+			uint64_t a_tctx_uid = a->tctx_uid;
+			uint64_t b_tctx_uid = b->tctx_uid;
+			ret = (a_tctx_uid > b_tctx_uid) - (a_tctx_uid <
+			    b_tctx_uid);
+		}
+	}
+	return ret;
+}
+
+rb_gen(static UNUSED, tctx_tree_, prof_tctx_tree_t, prof_tctx_t,
+    tctx_link, prof_tctx_comp)
+
+static int
+prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b) {
+	unsigned a_len = a->bt.len;
+	unsigned b_len = b->bt.len;
+	unsigned comp_len = (a_len < b_len) ? a_len : b_len;
+	int ret = memcmp(a->bt.vec, b->bt.vec, comp_len * sizeof(void *));
+	if (ret == 0) {
+		ret = (a_len > b_len) - (a_len < b_len);
+	}
+	return ret;
+}
+
+rb_gen(static UNUSED, gctx_tree_, prof_gctx_tree_t, prof_gctx_t, dump_link,
+    prof_gctx_comp)
+
+static int
+prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b) {
+	int ret;
+	uint64_t a_uid = a->thr_uid;
+	uint64_t b_uid = b->thr_uid;
+
+	ret = ((a_uid > b_uid) - (a_uid < b_uid));
+	if (ret == 0) {
+		uint64_t a_discrim = a->thr_discrim;
+		uint64_t b_discrim = b->thr_discrim;
+
+		ret = ((a_discrim > b_discrim) - (a_discrim < b_discrim));
+	}
+	return ret;
+}
+
+rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link,
+    prof_tdata_comp)
+
+/******************************************************************************/
+
+bool
+prof_data_init(tsd_t *tsd) {
+	tdata_tree_new(&tdatas);
+	return ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS,
+	    prof_bt_hash, prof_bt_keycomp);
+}
+
+static void
+prof_enter(tsd_t *tsd, prof_tdata_t *tdata) {
+	cassert(config_prof);
+	assert(tdata == prof_tdata_get(tsd, false));
+
+	if (tdata != NULL) {
+		assert(!tdata->enq);
+		tdata->enq = true;
+	}
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
+}
+
+static void
+prof_leave(tsd_t *tsd, prof_tdata_t *tdata) {
+	cassert(config_prof);
+	assert(tdata == prof_tdata_get(tsd, false));
+
+	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
+
+	if (tdata != NULL) {
+		bool idump, gdump;
+
+		assert(tdata->enq);
+		tdata->enq = false;
+		idump = tdata->enq_idump;
+		tdata->enq_idump = false;
+		gdump = tdata->enq_gdump;
+		tdata->enq_gdump = false;
+
+		if (idump) {
+			prof_idump(tsd_tsdn(tsd));
+		}
+		if (gdump) {
+			prof_gdump(tsd_tsdn(tsd));
+		}
+	}
+}
+
+static prof_gctx_t *
+prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt) {
+	/*
+	 * Create a single allocation that has space for vec of length bt->len.
+	 */
+	size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *));
+	prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsdn, size,
+	    sz_size2index(size), false, NULL, true, arena_get(TSDN_NULL, 0, true),
+	    true);
+	if (gctx == NULL) {
+		return NULL;
+	}
+	gctx->lock = prof_gctx_mutex_choose();
+	/*
+	 * Set nlimbo to 1, in order to avoid a race condition with
+	 * prof_tctx_destroy()/prof_gctx_try_destroy().
+	 */
+	gctx->nlimbo = 1;
+	tctx_tree_new(&gctx->tctxs);
+	/* Duplicate bt. */
+	memcpy(gctx->vec, bt->vec, bt->len * sizeof(void *));
+	gctx->bt.vec = gctx->vec;
+	gctx->bt.len = bt->len;
+	return gctx;
+}
+
+static void
+prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
+    prof_tdata_t *tdata) {
+	cassert(config_prof);
+
+	/*
+	 * Check that gctx is still unused by any thread cache before destroying
+	 * it.  prof_lookup() increments gctx->nlimbo in order to avoid a race
+	 * condition with this function, as does prof_tctx_destroy() in order to
+	 * avoid a race between the main body of prof_tctx_destroy() and entry
+	 * into this function.
+	 */
+	prof_enter(tsd, tdata_self);
+	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
+	assert(gctx->nlimbo != 0);
+	if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) {
+		/* Remove gctx from bt2gctx. */
+		if (ckh_remove(tsd, &bt2gctx, &gctx->bt, NULL, NULL)) {
+			not_reached();
+		}
+		prof_leave(tsd, tdata_self);
+		/* Destroy gctx. */
+		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
+		idalloctm(tsd_tsdn(tsd), gctx, NULL, NULL, true, true);
+	} else {
+		/*
+		 * Compensate for increment in prof_tctx_destroy() or
+		 * prof_lookup().
+		 */
+		gctx->nlimbo--;
+		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
+		prof_leave(tsd, tdata_self);
+	}
+}
+
+static bool
+prof_gctx_should_destroy(prof_gctx_t *gctx) {
+	if (opt_prof_accum) {
+		return false;
+	}
+	if (!tctx_tree_empty(&gctx->tctxs)) {
+		return false;
+	}
+	if (gctx->nlimbo != 0) {
+		return false;
+	}
+	return true;
+}
+
+static bool
+prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
+    void **p_btkey, prof_gctx_t **p_gctx, bool *p_new_gctx) {
+	union {
+		prof_gctx_t	*p;
+		void		*v;
+	} gctx, tgctx;
+	union {
+		prof_bt_t	*p;
+		void		*v;
+	} btkey;
+	bool new_gctx;
+
+	prof_enter(tsd, tdata);
+	if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
+		/* bt has never been seen before.  Insert it. */
+		prof_leave(tsd, tdata);
+		tgctx.p = prof_gctx_create(tsd_tsdn(tsd), bt);
+		if (tgctx.v == NULL) {
+			return true;
+		}
+		prof_enter(tsd, tdata);
+		if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
+			gctx.p = tgctx.p;
+			btkey.p = &gctx.p->bt;
+			if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) {
+				/* OOM. */
+				prof_leave(tsd, tdata);
+				idalloctm(tsd_tsdn(tsd), gctx.v, NULL, NULL,
+				    true, true);
+				return true;
+			}
+			new_gctx = true;
+		} else {
+			new_gctx = false;
+		}
+	} else {
+		tgctx.v = NULL;
+		new_gctx = false;
+	}
+
+	if (!new_gctx) {
+		/*
+		 * Increment nlimbo, in order to avoid a race condition with
+		 * prof_tctx_destroy()/prof_gctx_try_destroy().
+		 */
+		malloc_mutex_lock(tsd_tsdn(tsd), gctx.p->lock);
+		gctx.p->nlimbo++;
+		malloc_mutex_unlock(tsd_tsdn(tsd), gctx.p->lock);
+		new_gctx = false;
+
+		if (tgctx.v != NULL) {
+			/* Lost race to insert. */
+			idalloctm(tsd_tsdn(tsd), tgctx.v, NULL, NULL, true,
+			    true);
+		}
+	}
+	prof_leave(tsd, tdata);
+
+	*p_btkey = btkey.v;
+	*p_gctx = gctx.p;
+	*p_new_gctx = new_gctx;
+	return false;
+}
+
+prof_tctx_t *
+prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
+	union {
+		prof_tctx_t	*p;
+		void		*v;
+	} ret;
+	prof_tdata_t *tdata;
+	bool not_found;
+
+	cassert(config_prof);
+
+	tdata = prof_tdata_get(tsd, false);
+	if (tdata == NULL) {
+		return NULL;
+	}
+
+	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
+	not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v);
+	if (!not_found) { /* Note double negative! */
+		ret.p->prepared = true;
+	}
+	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
+	if (not_found) {
+		void *btkey;
+		prof_gctx_t *gctx;
+		bool new_gctx, error;
+
+		/*
+		 * This thread's cache lacks bt.  Look for it in the global
+		 * cache.
+		 */
+		if (prof_lookup_global(tsd, bt, tdata, &btkey, &gctx,
+		    &new_gctx)) {
+			return NULL;
+		}
+
+		/* Link a prof_tctx_t into gctx for this thread. */
+		ret.v = iallocztm(tsd_tsdn(tsd), sizeof(prof_tctx_t),
+		    sz_size2index(sizeof(prof_tctx_t)), false, NULL, true,
+		    arena_ichoose(tsd, NULL), true);
+		if (ret.p == NULL) {
+			if (new_gctx) {
+				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
+			}
+			return NULL;
+		}
+		ret.p->tdata = tdata;
+		ret.p->thr_uid = tdata->thr_uid;
+		ret.p->thr_discrim = tdata->thr_discrim;
+		memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
+		ret.p->gctx = gctx;
+		ret.p->tctx_uid = tdata->tctx_uid_next++;
+		ret.p->prepared = true;
+		ret.p->state = prof_tctx_state_initializing;
+		malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
+		error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v);
+		malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
+		if (error) {
+			if (new_gctx) {
+				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
+			}
+			idalloctm(tsd_tsdn(tsd), ret.v, NULL, NULL, true, true);
+			return NULL;
+		}
+		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
+		ret.p->state = prof_tctx_state_nominal;
+		tctx_tree_insert(&gctx->tctxs, ret.p);
+		gctx->nlimbo--;
+		malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
+	}
+
+	return ret.p;
+}
+
+#ifdef JEMALLOC_JET
+static prof_tdata_t *
+prof_tdata_count_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
+    void *arg) {
+	size_t *tdata_count = (size_t *)arg;
+
+	(*tdata_count)++;
+
+	return NULL;
+}
+
+size_t
+prof_tdata_count(void) {
+	size_t tdata_count = 0;
+	tsdn_t *tsdn;
+
+	tsdn = tsdn_fetch();
+	malloc_mutex_lock(tsdn, &tdatas_mtx);
+	tdata_tree_iter(&tdatas, NULL, prof_tdata_count_iter,
+	    (void *)&tdata_count);
+	malloc_mutex_unlock(tsdn, &tdatas_mtx);
+
+	return tdata_count;
+}
+
+size_t
+prof_bt_count(void) {
+	size_t bt_count;
+	tsd_t *tsd;
+	prof_tdata_t *tdata;
+
+	tsd = tsd_fetch();
+	tdata = prof_tdata_get(tsd, false);
+	if (tdata == NULL) {
+		return 0;
+	}
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
+	bt_count = ckh_count(&bt2gctx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
+
+	return bt_count;
+}
+#endif
+
+static int
+prof_dump_open_impl(bool propagate_err, const char *filename) {
+	int fd;
+
+	fd = creat(filename, 0644);
+	if (fd == -1 && !propagate_err) {
+		malloc_printf("<jemalloc>: creat(\"%s\"), 0644) failed\n",
+		    filename);
+		if (opt_abort) {
+			abort();
+		}
+	}
+
+	return fd;
+}
+prof_dump_open_t *JET_MUTABLE prof_dump_open = prof_dump_open_impl;
+
+static bool
+prof_dump_flush(bool propagate_err) {
+	bool ret = false;
+	ssize_t err;
+
+	cassert(config_prof);
+
+	err = malloc_write_fd(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
+	if (err == -1) {
+		if (!propagate_err) {
+			malloc_write("<jemalloc>: write() failed during heap "
+			    "profile flush\n");
+			if (opt_abort) {
+				abort();
+			}
+		}
+		ret = true;
+	}
+	prof_dump_buf_end = 0;
+
+	return ret;
+}
+
+static bool
+prof_dump_close(bool propagate_err) {
+	bool ret;
+
+	assert(prof_dump_fd != -1);
+	ret = prof_dump_flush(propagate_err);
+	close(prof_dump_fd);
+	prof_dump_fd = -1;
+
+	return ret;
+}
+
+static bool
+prof_dump_write(bool propagate_err, const char *s) {
+	size_t i, slen, n;
+
+	cassert(config_prof);
+
+	i = 0;
+	slen = strlen(s);
+	while (i < slen) {
+		/* Flush the buffer if it is full. */
+		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
+			if (prof_dump_flush(propagate_err) && propagate_err) {
+				return true;
+			}
+		}
+
+		if (prof_dump_buf_end + slen - i <= PROF_DUMP_BUFSIZE) {
+			/* Finish writing. */
+			n = slen - i;
+		} else {
+			/* Write as much of s as will fit. */
+			n = PROF_DUMP_BUFSIZE - prof_dump_buf_end;
+		}
+		memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n);
+		prof_dump_buf_end += n;
+		i += n;
+	}
+	assert(i == slen);
+
+	return false;
+}
+
+JEMALLOC_FORMAT_PRINTF(2, 3)
+static bool
+prof_dump_printf(bool propagate_err, const char *format, ...) {
+	bool ret;
+	va_list ap;
+	char buf[PROF_PRINTF_BUFSIZE];
+
+	va_start(ap, format);
+	malloc_vsnprintf(buf, sizeof(buf), format, ap);
+	va_end(ap);
+	ret = prof_dump_write(propagate_err, buf);
+
+	return ret;
+}
+
+static void
+prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata) {
+	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
+
+	malloc_mutex_lock(tsdn, tctx->gctx->lock);
+
+	switch (tctx->state) {
+	case prof_tctx_state_initializing:
+		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
+		return;
+	case prof_tctx_state_nominal:
+		tctx->state = prof_tctx_state_dumping;
+		malloc_mutex_unlock(tsdn, tctx->gctx->lock);
+
+		memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t));
+
+		tdata->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
+		tdata->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
+		if (opt_prof_accum) {
+			tdata->cnt_summed.accumobjs +=
+			    tctx->dump_cnts.accumobjs;
+			tdata->cnt_summed.accumbytes +=
+			    tctx->dump_cnts.accumbytes;
+		}
+		break;
+	case prof_tctx_state_dumping:
+	case prof_tctx_state_purgatory:
+		not_reached();
+	}
+}
+
+static void
+prof_tctx_merge_gctx(tsdn_t *tsdn, prof_tctx_t *tctx, prof_gctx_t *gctx) {
+	malloc_mutex_assert_owner(tsdn, gctx->lock);
+
+	gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
+	gctx->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
+	if (opt_prof_accum) {
+		gctx->cnt_summed.accumobjs += tctx->dump_cnts.accumobjs;
+		gctx->cnt_summed.accumbytes += tctx->dump_cnts.accumbytes;
+	}
+}
+
+static prof_tctx_t *
+prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
+	tsdn_t *tsdn = (tsdn_t *)arg;
+
+	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
+
+	switch (tctx->state) {
+	case prof_tctx_state_nominal:
+		/* New since dumping started; ignore. */
+		break;
+	case prof_tctx_state_dumping:
+	case prof_tctx_state_purgatory:
+		prof_tctx_merge_gctx(tsdn, tctx, tctx->gctx);
+		break;
+	default:
+		not_reached();
+	}
+
+	return NULL;
+}
+
+struct prof_tctx_dump_iter_arg_s {
+	tsdn_t	*tsdn;
+	bool	propagate_err;
+};
+
+static prof_tctx_t *
+prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) {
+	struct prof_tctx_dump_iter_arg_s *arg =
+	    (struct prof_tctx_dump_iter_arg_s *)opaque;
+
+	malloc_mutex_assert_owner(arg->tsdn, tctx->gctx->lock);
+
+	switch (tctx->state) {
+	case prof_tctx_state_initializing:
+	case prof_tctx_state_nominal:
+		/* Not captured by this dump. */
+		break;
+	case prof_tctx_state_dumping:
+	case prof_tctx_state_purgatory:
+		if (prof_dump_printf(arg->propagate_err,
+		    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": "
+		    "%"FMTu64"]\n", tctx->thr_uid, tctx->dump_cnts.curobjs,
+		    tctx->dump_cnts.curbytes, tctx->dump_cnts.accumobjs,
+		    tctx->dump_cnts.accumbytes)) {
+			return tctx;
+		}
+		break;
+	default:
+		not_reached();
+	}
+	return NULL;
+}
+
+static prof_tctx_t *
+prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
+	tsdn_t *tsdn = (tsdn_t *)arg;
+	prof_tctx_t *ret;
+
+	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
+
+	switch (tctx->state) {
+	case prof_tctx_state_nominal:
+		/* New since dumping started; ignore. */
+		break;
+	case prof_tctx_state_dumping:
+		tctx->state = prof_tctx_state_nominal;
+		break;
+	case prof_tctx_state_purgatory:
+		ret = tctx;
+		goto label_return;
+	default:
+		not_reached();
+	}
+
+	ret = NULL;
+label_return:
+	return ret;
+}
+
+static void
+prof_dump_gctx_prep(tsdn_t *tsdn, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs) {
+	cassert(config_prof);
+
+	malloc_mutex_lock(tsdn, gctx->lock);
+
+	/*
+	 * Increment nlimbo so that gctx won't go away before dump.
+	 * Additionally, link gctx into the dump list so that it is included in
+	 * prof_dump()'s second pass.
+	 */
+	gctx->nlimbo++;
+	gctx_tree_insert(gctxs, gctx);
+
+	memset(&gctx->cnt_summed, 0, sizeof(prof_cnt_t));
+
+	malloc_mutex_unlock(tsdn, gctx->lock);
+}
+
+struct prof_gctx_merge_iter_arg_s {
+	tsdn_t	*tsdn;
+	size_t	leak_ngctx;
+};
+
+static prof_gctx_t *
+prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
+	struct prof_gctx_merge_iter_arg_s *arg =
+	    (struct prof_gctx_merge_iter_arg_s *)opaque;
+
+	malloc_mutex_lock(arg->tsdn, gctx->lock);
+	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter,
+	    (void *)arg->tsdn);
+	if (gctx->cnt_summed.curobjs != 0) {
+		arg->leak_ngctx++;
+	}
+	malloc_mutex_unlock(arg->tsdn, gctx->lock);
+
+	return NULL;
+}
+
+static void
+prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) {
+	prof_tdata_t *tdata = prof_tdata_get(tsd, false);
+	prof_gctx_t *gctx;
+
+	/*
+	 * Standard tree iteration won't work here, because as soon as we
+	 * decrement gctx->nlimbo and unlock gctx, another thread can
+	 * concurrently destroy it, which will corrupt the tree.  Therefore,
+	 * tear down the tree one node at a time during iteration.
+	 */
+	while ((gctx = gctx_tree_first(gctxs)) != NULL) {
+		gctx_tree_remove(gctxs, gctx);
+		malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
+		{
+			prof_tctx_t *next;
+
+			next = NULL;
+			do {
+				prof_tctx_t *to_destroy =
+				    tctx_tree_iter(&gctx->tctxs, next,
+				    prof_tctx_finish_iter,
+				    (void *)tsd_tsdn(tsd));
+				if (to_destroy != NULL) {
+					next = tctx_tree_next(&gctx->tctxs,
+					    to_destroy);
+					tctx_tree_remove(&gctx->tctxs,
+					    to_destroy);
+					idalloctm(tsd_tsdn(tsd), to_destroy,
+					    NULL, NULL, true, true);
+				} else {
+					next = NULL;
+				}
+			} while (next != NULL);
+		}
+		gctx->nlimbo--;
+		if (prof_gctx_should_destroy(gctx)) {
+			gctx->nlimbo++;
+			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
+			prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
+		} else {
+			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
+		}
+	}
+}
+
+struct prof_tdata_merge_iter_arg_s {
+	tsdn_t		*tsdn;
+	prof_cnt_t	cnt_all;
+};
+
+static prof_tdata_t *
+prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
+    void *opaque) {
+	struct prof_tdata_merge_iter_arg_s *arg =
+	    (struct prof_tdata_merge_iter_arg_s *)opaque;
+
+	malloc_mutex_lock(arg->tsdn, tdata->lock);
+	if (!tdata->expired) {
+		size_t tabind;
+		union {
+			prof_tctx_t	*p;
+			void		*v;
+		} tctx;
+
+		tdata->dumping = true;
+		memset(&tdata->cnt_summed, 0, sizeof(prof_cnt_t));
+		for (tabind = 0; !ckh_iter(&tdata->bt2tctx, &tabind, NULL,
+		    &tctx.v);) {
+			prof_tctx_merge_tdata(arg->tsdn, tctx.p, tdata);
+		}
+
+		arg->cnt_all.curobjs += tdata->cnt_summed.curobjs;
+		arg->cnt_all.curbytes += tdata->cnt_summed.curbytes;
+		if (opt_prof_accum) {
+			arg->cnt_all.accumobjs += tdata->cnt_summed.accumobjs;
+			arg->cnt_all.accumbytes += tdata->cnt_summed.accumbytes;
+		}
+	} else {
+		tdata->dumping = false;
+	}
+	malloc_mutex_unlock(arg->tsdn, tdata->lock);
+
+	return NULL;
+}
+
+static prof_tdata_t *
+prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
+    void *arg) {
+	bool propagate_err = *(bool *)arg;
+
+	if (!tdata->dumping) {
+		return NULL;
+	}
+
+	if (prof_dump_printf(propagate_err,
+	    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]%s%s\n",
+	    tdata->thr_uid, tdata->cnt_summed.curobjs,
+	    tdata->cnt_summed.curbytes, tdata->cnt_summed.accumobjs,
+	    tdata->cnt_summed.accumbytes,
+	    (tdata->thread_name != NULL) ? " " : "",
+	    (tdata->thread_name != NULL) ? tdata->thread_name : "")) {
+		return tdata;
+	}
+	return NULL;
+}
+
+static bool
+prof_dump_header_impl(tsdn_t *tsdn, bool propagate_err,
+    const prof_cnt_t *cnt_all) {
+	bool ret;
+
+	if (prof_dump_printf(propagate_err,
+	    "heap_v2/%"FMTu64"\n"
+	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
+	    ((uint64_t)1U << lg_prof_sample), cnt_all->curobjs,
+	    cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes)) {
+		return true;
+	}
+
+	malloc_mutex_lock(tsdn, &tdatas_mtx);
+	ret = (tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter,
+	    (void *)&propagate_err) != NULL);
+	malloc_mutex_unlock(tsdn, &tdatas_mtx);
+	return ret;
+}
+prof_dump_header_t *JET_MUTABLE prof_dump_header = prof_dump_header_impl;
+
+static bool
+prof_dump_gctx(tsdn_t *tsdn, bool propagate_err, prof_gctx_t *gctx,
+    const prof_bt_t *bt, prof_gctx_tree_t *gctxs) {
+	bool ret;
+	unsigned i;
+	struct prof_tctx_dump_iter_arg_s prof_tctx_dump_iter_arg;
+
+	cassert(config_prof);
+	malloc_mutex_assert_owner(tsdn, gctx->lock);
+
+	/* Avoid dumping such gctx's that have no useful data. */
+	if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0) ||
+	    (opt_prof_accum && gctx->cnt_summed.accumobjs == 0)) {
+		assert(gctx->cnt_summed.curobjs == 0);
+		assert(gctx->cnt_summed.curbytes == 0);
+		assert(gctx->cnt_summed.accumobjs == 0);
+		assert(gctx->cnt_summed.accumbytes == 0);
+		ret = false;
+		goto label_return;
+	}
+
+	if (prof_dump_printf(propagate_err, "@")) {
+		ret = true;
+		goto label_return;
+	}
+	for (i = 0; i < bt->len; i++) {
+		if (prof_dump_printf(propagate_err, " %#"FMTxPTR,
+		    (uintptr_t)bt->vec[i])) {
+			ret = true;
+			goto label_return;
+		}
+	}
+
+	if (prof_dump_printf(propagate_err,
+	    "\n"
+	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
+	    gctx->cnt_summed.curobjs, gctx->cnt_summed.curbytes,
+	    gctx->cnt_summed.accumobjs, gctx->cnt_summed.accumbytes)) {
+		ret = true;
+		goto label_return;
+	}
+
+	prof_tctx_dump_iter_arg.tsdn = tsdn;
+	prof_tctx_dump_iter_arg.propagate_err = propagate_err;
+	if (tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter,
+	    (void *)&prof_tctx_dump_iter_arg) != NULL) {
+		ret = true;
+		goto label_return;
+	}
+
+	ret = false;
+label_return:
+	return ret;
+}
+
+#ifndef _WIN32
+JEMALLOC_FORMAT_PRINTF(1, 2)
+static int
+prof_open_maps(const char *format, ...) {
+	int mfd;
+	va_list ap;
+	char filename[PATH_MAX + 1];
+
+	va_start(ap, format);
+	malloc_vsnprintf(filename, sizeof(filename), format, ap);
+	va_end(ap);
+
+#if defined(O_CLOEXEC)
+	mfd = open(filename, O_RDONLY | O_CLOEXEC);
+#else
+	mfd = open(filename, O_RDONLY);
+	if (mfd != -1) {
+		fcntl(mfd, F_SETFD, fcntl(mfd, F_GETFD) | FD_CLOEXEC);
+	}
+#endif
+
+	return mfd;
+}
+#endif
+
+static bool
+prof_dump_maps(bool propagate_err) {
+	bool ret;
+	int mfd;
+
+	cassert(config_prof);
+#ifdef __FreeBSD__
+	mfd = prof_open_maps("/proc/curproc/map");
+#elif defined(_WIN32)
+	mfd = -1; // Not implemented
+#else
+	{
+		int pid = prof_getpid();
+
+		mfd = prof_open_maps("/proc/%d/task/%d/maps", pid, pid);
+		if (mfd == -1) {
+			mfd = prof_open_maps("/proc/%d/maps", pid);
+		}
+	}
+#endif
+	if (mfd != -1) {
+		ssize_t nread;
+
+		if (prof_dump_write(propagate_err, "\nMAPPED_LIBRARIES:\n") &&
+		    propagate_err) {
+			ret = true;
+			goto label_return;
+		}
+		nread = 0;
+		do {
+			prof_dump_buf_end += nread;
+			if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
+				/* Make space in prof_dump_buf before read(). */
+				if (prof_dump_flush(propagate_err) &&
+				    propagate_err) {
+					ret = true;
+					goto label_return;
+				}
+			}
+			nread = malloc_read_fd(mfd,
+			    &prof_dump_buf[prof_dump_buf_end], PROF_DUMP_BUFSIZE
+			    - prof_dump_buf_end);
+		} while (nread > 0);
+	} else {
+		ret = true;
+		goto label_return;
+	}
+
+	ret = false;
+label_return:
+	if (mfd != -1) {
+		close(mfd);
+	}
+	return ret;
+}
+
+/*
+ * See prof_sample_threshold_update() comment for why the body of this function
+ * is conditionally compiled.
+ */
+static void
+prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx,
+    const char *filename) {
+#ifdef JEMALLOC_PROF
+	/*
+	 * Scaling is equivalent AdjustSamples() in jeprof, but the result may
+	 * differ slightly from what jeprof reports, because here we scale the
+	 * summary values, whereas jeprof scales each context individually and
+	 * reports the sums of the scaled values.
+	 */
+	if (cnt_all->curbytes != 0) {
+		double sample_period = (double)((uint64_t)1 << lg_prof_sample);
+		double ratio = (((double)cnt_all->curbytes) /
+		    (double)cnt_all->curobjs) / sample_period;
+		double scale_factor = 1.0 / (1.0 - exp(-ratio));
+		uint64_t curbytes = (uint64_t)round(((double)cnt_all->curbytes)
+		    * scale_factor);
+		uint64_t curobjs = (uint64_t)round(((double)cnt_all->curobjs) *
+		    scale_factor);
+
+		malloc_printf("<jemalloc>: Leak approximation summary: ~%"FMTu64
+		    " byte%s, ~%"FMTu64" object%s, >= %zu context%s\n",
+		    curbytes, (curbytes != 1) ? "s" : "", curobjs, (curobjs !=
+		    1) ? "s" : "", leak_ngctx, (leak_ngctx != 1) ? "s" : "");
+		malloc_printf(
+		    "<jemalloc>: Run jeprof on \"%s\" for leak detail\n",
+		    filename);
+	}
+#endif
+}
+
+struct prof_gctx_dump_iter_arg_s {
+	tsdn_t	*tsdn;
+	bool	propagate_err;
+};
+
+static prof_gctx_t *
+prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
+	prof_gctx_t *ret;
+	struct prof_gctx_dump_iter_arg_s *arg =
+	    (struct prof_gctx_dump_iter_arg_s *)opaque;
+
+	malloc_mutex_lock(arg->tsdn, gctx->lock);
+
+	if (prof_dump_gctx(arg->tsdn, arg->propagate_err, gctx, &gctx->bt,
+	    gctxs)) {
+		ret = gctx;
+		goto label_return;
+	}
+
+	ret = NULL;
+label_return:
+	malloc_mutex_unlock(arg->tsdn, gctx->lock);
+	return ret;
+}
+
+static void
+prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata,
+    struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
+    struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
+    prof_gctx_tree_t *gctxs) {
+	size_t tabind;
+	union {
+		prof_gctx_t	*p;
+		void		*v;
+	} gctx;
+
+	prof_enter(tsd, tdata);
+
+	/*
+	 * Put gctx's in limbo and clear their counters in preparation for
+	 * summing.
+	 */
+	gctx_tree_new(gctxs);
+	for (tabind = 0; !ckh_iter(&bt2gctx, &tabind, NULL, &gctx.v);) {
+		prof_dump_gctx_prep(tsd_tsdn(tsd), gctx.p, gctxs);
+	}
+
+	/*
+	 * Iterate over tdatas, and for the non-expired ones snapshot their tctx
+	 * stats and merge them into the associated gctx's.
+	 */
+	prof_tdata_merge_iter_arg->tsdn = tsd_tsdn(tsd);
+	memset(&prof_tdata_merge_iter_arg->cnt_all, 0, sizeof(prof_cnt_t));
+	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
+	tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter,
+	    (void *)prof_tdata_merge_iter_arg);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
+
+	/* Merge tctx stats into gctx's. */
+	prof_gctx_merge_iter_arg->tsdn = tsd_tsdn(tsd);
+	prof_gctx_merge_iter_arg->leak_ngctx = 0;
+	gctx_tree_iter(gctxs, NULL, prof_gctx_merge_iter,
+	    (void *)prof_gctx_merge_iter_arg);
+
+	prof_leave(tsd, tdata);
+}
+
+static bool
+prof_dump_file(tsd_t *tsd, bool propagate_err, const char *filename,
+    bool leakcheck, prof_tdata_t *tdata,
+    struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
+    struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
+    struct prof_gctx_dump_iter_arg_s *prof_gctx_dump_iter_arg,
+    prof_gctx_tree_t *gctxs) {
+	/* Create dump file. */
+	if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1) {
+		return true;
+	}
+
+	/* Dump profile header. */
+	if (prof_dump_header(tsd_tsdn(tsd), propagate_err,
+	    &prof_tdata_merge_iter_arg->cnt_all)) {
+		goto label_write_error;
+	}
+
+	/* Dump per gctx profile stats. */
+	prof_gctx_dump_iter_arg->tsdn = tsd_tsdn(tsd);
+	prof_gctx_dump_iter_arg->propagate_err = propagate_err;
+	if (gctx_tree_iter(gctxs, NULL, prof_gctx_dump_iter,
+	    (void *)prof_gctx_dump_iter_arg) != NULL) {
+		goto label_write_error;
+	}
+
+	/* Dump /proc/<pid>/maps if possible. */
+	if (prof_dump_maps(propagate_err)) {
+		goto label_write_error;
+	}
+
+	if (prof_dump_close(propagate_err)) {
+		return true;
+	}
+
+	return false;
+label_write_error:
+	prof_dump_close(propagate_err);
+	return true;
+}
+
+bool
+prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
+    bool leakcheck) {
+	cassert(config_prof);
+	assert(tsd_reentrancy_level_get(tsd) == 0);
+
+	prof_tdata_t * tdata = prof_tdata_get(tsd, true);
+	if (tdata == NULL) {
+		return true;
+	}
+
+	pre_reentrancy(tsd, NULL);
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
+
+	prof_gctx_tree_t gctxs;
+	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
+	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
+	struct prof_gctx_dump_iter_arg_s prof_gctx_dump_iter_arg;
+	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
+	    &prof_gctx_merge_iter_arg, &gctxs);
+	bool err = prof_dump_file(tsd, propagate_err, filename, leakcheck, tdata,
+	    &prof_tdata_merge_iter_arg, &prof_gctx_merge_iter_arg,
+	    &prof_gctx_dump_iter_arg, &gctxs);
+	prof_gctx_finish(tsd, &gctxs);
+
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
+	post_reentrancy(tsd);
+
+	if (err) {
+		return true;
+	}
+
+	if (leakcheck) {
+		prof_leakcheck(&prof_tdata_merge_iter_arg.cnt_all,
+		    prof_gctx_merge_iter_arg.leak_ngctx, filename);
+	}
+	return false;
+}
+
+#ifdef JEMALLOC_JET
+void
+prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
+    uint64_t *accumbytes) {
+	tsd_t *tsd;
+	prof_tdata_t *tdata;
+	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
+	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
+	prof_gctx_tree_t gctxs;
+
+	tsd = tsd_fetch();
+	tdata = prof_tdata_get(tsd, false);
+	if (tdata == NULL) {
+		if (curobjs != NULL) {
+			*curobjs = 0;
+		}
+		if (curbytes != NULL) {
+			*curbytes = 0;
+		}
+		if (accumobjs != NULL) {
+			*accumobjs = 0;
+		}
+		if (accumbytes != NULL) {
+			*accumbytes = 0;
+		}
+		return;
+	}
+
+	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
+	    &prof_gctx_merge_iter_arg, &gctxs);
+	prof_gctx_finish(tsd, &gctxs);
+
+	if (curobjs != NULL) {
+		*curobjs = prof_tdata_merge_iter_arg.cnt_all.curobjs;
+	}
+	if (curbytes != NULL) {
+		*curbytes = prof_tdata_merge_iter_arg.cnt_all.curbytes;
+	}
+	if (accumobjs != NULL) {
+		*accumobjs = prof_tdata_merge_iter_arg.cnt_all.accumobjs;
+	}
+	if (accumbytes != NULL) {
+		*accumbytes = prof_tdata_merge_iter_arg.cnt_all.accumbytes;
+	}
+}
+#endif
+
+void
+prof_bt_hash(const void *key, size_t r_hash[2]) {
+	prof_bt_t *bt = (prof_bt_t *)key;
+
+	cassert(config_prof);
+
+	hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash);
+}
+
+bool
+prof_bt_keycomp(const void *k1, const void *k2) {
+	const prof_bt_t *bt1 = (prof_bt_t *)k1;
+	const prof_bt_t *bt2 = (prof_bt_t *)k2;
+
+	cassert(config_prof);
+
+	if (bt1->len != bt2->len) {
+		return false;
+	}
+	return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
+}
+
+prof_tdata_t *
+prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
+    char *thread_name, bool active) {
+	prof_tdata_t *tdata;
+
+	cassert(config_prof);
+
+	/* Initialize an empty cache for this thread. */
+	tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd), sizeof(prof_tdata_t),
+	    sz_size2index(sizeof(prof_tdata_t)), false, NULL, true,
+	    arena_get(TSDN_NULL, 0, true), true);
+	if (tdata == NULL) {
+		return NULL;
+	}
+
+	tdata->lock = prof_tdata_mutex_choose(thr_uid);
+	tdata->thr_uid = thr_uid;
+	tdata->thr_discrim = thr_discrim;
+	tdata->thread_name = thread_name;
+	tdata->attached = true;
+	tdata->expired = false;
+	tdata->tctx_uid_next = 0;
+
+	if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash,
+	    prof_bt_keycomp)) {
+		idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
+		return NULL;
+	}
+
+	tdata->prng_state = (uint64_t)(uintptr_t)tdata;
+	prof_sample_threshold_update(tdata);
+
+	tdata->enq = false;
+	tdata->enq_idump = false;
+	tdata->enq_gdump = false;
+
+	tdata->dumping = false;
+	tdata->active = active;
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
+	tdata_tree_insert(&tdatas, tdata);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
+
+	return tdata;
+}
+
+static bool
+prof_tdata_should_destroy_unlocked(prof_tdata_t *tdata, bool even_if_attached) {
+	if (tdata->attached && !even_if_attached) {
+		return false;
+	}
+	if (ckh_count(&tdata->bt2tctx) != 0) {
+		return false;
+	}
+	return true;
+}
+
+static bool
+prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
+    bool even_if_attached) {
+	malloc_mutex_assert_owner(tsdn, tdata->lock);
+
+	return prof_tdata_should_destroy_unlocked(tdata, even_if_attached);
+}
+
+static void
+prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
+    bool even_if_attached) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tdatas_mtx);
+
+	tdata_tree_remove(&tdatas, tdata);
+
+	assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
+
+	if (tdata->thread_name != NULL) {
+		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
+		    true);
+	}
+	ckh_delete(tsd, &tdata->bt2tctx);
+	idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
+}
+
+static void
+prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached) {
+	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
+	prof_tdata_destroy_locked(tsd, tdata, even_if_attached);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
+}
+
+void
+prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata) {
+	bool destroy_tdata;
+
+	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
+	if (tdata->attached) {
+		destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata,
+		    true);
+		/*
+		 * Only detach if !destroy_tdata, because detaching would allow
+		 * another thread to win the race to destroy tdata.
+		 */
+		if (!destroy_tdata) {
+			tdata->attached = false;
+		}
+		tsd_prof_tdata_set(tsd, NULL);
+	} else {
+		destroy_tdata = false;
+	}
+	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
+	if (destroy_tdata) {
+		prof_tdata_destroy(tsd, tdata, true);
+	}
+}
+
+static bool
+prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata) {
+	bool destroy_tdata;
+
+	malloc_mutex_lock(tsdn, tdata->lock);
+	if (!tdata->expired) {
+		tdata->expired = true;
+		destroy_tdata = tdata->attached ? false :
+		    prof_tdata_should_destroy(tsdn, tdata, false);
+	} else {
+		destroy_tdata = false;
+	}
+	malloc_mutex_unlock(tsdn, tdata->lock);
+
+	return destroy_tdata;
+}
+
+static prof_tdata_t *
+prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
+    void *arg) {
+	tsdn_t *tsdn = (tsdn_t *)arg;
+
+	return (prof_tdata_expire(tsdn, tdata) ? tdata : NULL);
+}
+
+void
+prof_reset(tsd_t *tsd, size_t lg_sample) {
+	prof_tdata_t *next;
+
+	assert(lg_sample < (sizeof(uint64_t) << 3));
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
+
+	lg_prof_sample = lg_sample;
+
+	next = NULL;
+	do {
+		prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next,
+		    prof_tdata_reset_iter, (void *)tsd);
+		if (to_destroy != NULL) {
+			next = tdata_tree_next(&tdatas, to_destroy);
+			prof_tdata_destroy_locked(tsd, to_destroy, false);
+		} else {
+			next = NULL;
+		}
+	} while (next != NULL);
+
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
+}
+
+void
+prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
+	prof_tdata_t *tdata = tctx->tdata;
+	prof_gctx_t *gctx = tctx->gctx;
+	bool destroy_tdata, destroy_tctx, destroy_gctx;
+
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
+
+	assert(tctx->cnts.curobjs == 0);
+	assert(tctx->cnts.curbytes == 0);
+	assert(!opt_prof_accum);
+	assert(tctx->cnts.accumobjs == 0);
+	assert(tctx->cnts.accumbytes == 0);
+
+	ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL);
+	destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata, false);
+	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
+
+	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
+	switch (tctx->state) {
+	case prof_tctx_state_nominal:
+		tctx_tree_remove(&gctx->tctxs, tctx);
+		destroy_tctx = true;
+		if (prof_gctx_should_destroy(gctx)) {
+			/*
+			 * Increment gctx->nlimbo in order to keep another
+			 * thread from winning the race to destroy gctx while
+			 * this one has gctx->lock dropped.  Without this, it
+			 * would be possible for another thread to:
+			 *
+			 * 1) Sample an allocation associated with gctx.
+			 * 2) Deallocate the sampled object.
+			 * 3) Successfully prof_gctx_try_destroy(gctx).
+			 *
+			 * The result would be that gctx no longer exists by the
+			 * time this thread accesses it in
+			 * prof_gctx_try_destroy().
+			 */
+			gctx->nlimbo++;
+			destroy_gctx = true;
+		} else {
+			destroy_gctx = false;
+		}
+		break;
+	case prof_tctx_state_dumping:
+		/*
+		 * A dumping thread needs tctx to remain valid until dumping
+		 * has finished.  Change state such that the dumping thread will
+		 * complete destruction during a late dump iteration phase.
+		 */
+		tctx->state = prof_tctx_state_purgatory;
+		destroy_tctx = false;
+		destroy_gctx = false;
+		break;
+	default:
+		not_reached();
+		destroy_tctx = false;
+		destroy_gctx = false;
+	}
+	malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
+	if (destroy_gctx) {
+		prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx,
+		    tdata);
+	}
+
+	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock);
+
+	if (destroy_tdata) {
+		prof_tdata_destroy(tsd, tdata, false);
+	}
+
+	if (destroy_tctx) {
+		idalloctm(tsd_tsdn(tsd), tctx, NULL, NULL, true, true);
+	}
+}
+
+/******************************************************************************/

From 87e2400cbb8b5a49f910b3c72b10297fcc9df839 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 7 Aug 2019 20:12:25 -0700
Subject: [PATCH 1337/2608] Fix tcaches mutex pre- / post-fork handling.

---
 src/tcache.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index 50099a9f..01c61609 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -778,21 +778,15 @@ tcache_boot(tsdn_t *tsdn) {
 
 void
 tcache_prefork(tsdn_t *tsdn) {
-	if (!config_prof && opt_tcache) {
-		malloc_mutex_prefork(tsdn, &tcaches_mtx);
-	}
+	malloc_mutex_prefork(tsdn, &tcaches_mtx);
 }
 
 void
 tcache_postfork_parent(tsdn_t *tsdn) {
-	if (!config_prof && opt_tcache) {
-		malloc_mutex_postfork_parent(tsdn, &tcaches_mtx);
-	}
+	malloc_mutex_postfork_parent(tsdn, &tcaches_mtx);
 }
 
 void
 tcache_postfork_child(tsdn_t *tsdn) {
-	if (!config_prof && opt_tcache) {
-		malloc_mutex_postfork_child(tsdn, &tcaches_mtx);
-	}
+	malloc_mutex_postfork_child(tsdn, &tcaches_mtx);
 }

From 39343555d6ac84a105a2d5e8ba0059115eb20f93 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 2 Aug 2019 09:41:35 -0700
Subject: [PATCH 1338/2608] Report stats for tdatas_mtx and prof_dump_mtx

---
 doc/jemalloc.xml.in                    | 24 ++++++++++++++++++++++++
 include/jemalloc/internal/mutex_prof.h |  4 +++-
 src/ctl.c                              | 10 ++++++++--
 3 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 7fecda7c..5636fb90 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -2509,6 +2509,30 @@ struct extent_hooks_s {
         counters</link>.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="stats.mutexes.prof_tdatas">
+        <term>
+          <mallctl>stats.mutexes.prof_thds_data.{counter}</mallctl>
+	  (<type>counter specific type</type>) <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+	<listitem><para>Statistics on <varname>prof</varname> threads data mutex
+	(global scope; profiling related).  <mallctl>{counter}</mallctl> is one
+	of the counters in <link linkend="mutex_counters">mutex profiling
+        counters</link>.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="stats.mutexes.prof_dump">
+        <term>
+          <mallctl>stats.mutexes.prof_dump.{counter}</mallctl>
+	  (<type>counter specific type</type>) <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+	<listitem><para>Statistics on <varname>prof</varname> dumping mutex
+	(global scope; profiling related).  <mallctl>{counter}</mallctl> is one
+	of the counters in <link linkend="mutex_counters">mutex profiling
+        counters</link>.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="stats.mutexes.reset">
         <term>
           <mallctl>stats.mutexes.reset</mallctl>
diff --git a/include/jemalloc/internal/mutex_prof.h b/include/jemalloc/internal/mutex_prof.h
index 2cb8fb0c..6288ede5 100644
--- a/include/jemalloc/internal/mutex_prof.h
+++ b/include/jemalloc/internal/mutex_prof.h
@@ -8,7 +8,9 @@
 #define MUTEX_PROF_GLOBAL_MUTEXES					\
     OP(background_thread)						\
     OP(ctl)								\
-    OP(prof)
+    OP(prof)								\
+    OP(prof_thds_data)							\
+    OP(prof_dump)
 
 typedef enum {
 #define OP(mtx) global_prof_mutex_##mtx,
diff --git a/src/ctl.c b/src/ctl.c
index 48afaa61..a89a7096 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1093,8 +1093,12 @@ ctl_refresh(tsdn_t *tsdn) {
     malloc_mutex_unlock(tsdn, &mtx);
 
 		if (config_prof && opt_prof) {
-			READ_GLOBAL_MUTEX_PROF_DATA(global_prof_mutex_prof,
-			    bt2gctx_mtx);
+			READ_GLOBAL_MUTEX_PROF_DATA(
+			    global_prof_mutex_prof, bt2gctx_mtx);
+			READ_GLOBAL_MUTEX_PROF_DATA(
+			    global_prof_mutex_prof_thds_data, tdatas_mtx);
+			READ_GLOBAL_MUTEX_PROF_DATA(
+			    global_prof_mutex_prof_dump, prof_dump_mtx);
 		}
 		if (have_background_thread) {
 			READ_GLOBAL_MUTEX_PROF_DATA(
@@ -2972,6 +2976,8 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
 	}
 	if (config_prof && opt_prof) {
 		MUTEX_PROF_RESET(bt2gctx_mtx);
+		MUTEX_PROF_RESET(tdatas_mtx);
+		MUTEX_PROF_RESET(prof_dump_mtx);
 	}
 
 

From 7fc6b1b259fd1c38a59341ad555a47790da6f773 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 7 Jun 2019 14:04:59 -0700
Subject: [PATCH 1339/2608] Add buffered writer

The buffered writer adopts a signature identical to `write_cb`,
so that it can be plugged into anywhere `write_cb` appears.
---
 Makefile.in                           |  1 +
 include/jemalloc/internal/malloc_io.h | 25 +++++++++++
 src/malloc_io.c                       | 30 +++++++++++++
 test/unit/buf_writer.c                | 64 +++++++++++++++++++++++++++
 4 files changed, 120 insertions(+)
 create mode 100644 test/unit/buf_writer.c

diff --git a/Makefile.in b/Makefile.in
index 40daf115..ef75d8ac 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -177,6 +177,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/bitmap.c \
 	$(srcroot)test/unit/bit_util.c \
 	$(srcroot)test/unit/binshard.c \
+	$(srcroot)test/unit/buf_writer.c \
 	$(srcroot)test/unit/ckh.c \
 	$(srcroot)test/unit/decay.c \
 	$(srcroot)test/unit/div.c \
diff --git a/include/jemalloc/internal/malloc_io.h b/include/jemalloc/internal/malloc_io.h
index 1d1a414e..f5d16a5e 100644
--- a/include/jemalloc/internal/malloc_io.h
+++ b/include/jemalloc/internal/malloc_io.h
@@ -99,4 +99,29 @@ malloc_read_fd(int fd, void *buf, size_t count) {
 	return (ssize_t)result;
 }
 
+/******************************************************************************/
+
+/*
+ * The rest is buffered writing utility.
+ *
+ * The only difference when using the buffered writer is that cbopaque is
+ * passed to write_cb only when the buffer is flushed.  It would make a
+ * difference if cbopaque points to something that's changing for each write_cb
+ * call, or something that affects write_cb in a way dependent on the content
+ * of the output string.  However, the most typical usage case in practice is
+ * that cbopaque points to some "option like" content for the write_cb, so it
+ * doesn't matter.
+ */
+
+typedef struct {
+	void (*write_cb)(void *, const char *);
+	void *cbopaque;
+	char *buf;
+	size_t buf_size; /* must be one less than the capacity of buf array */
+	size_t buf_end;
+} buf_writer_arg_t;
+
+void buf_writer_flush(buf_writer_arg_t *arg);
+void buffered_write_cb(void *buf_writer_arg, const char *s);
+
 #endif /* JEMALLOC_INTERNAL_MALLOC_IO_H */
diff --git a/src/malloc_io.c b/src/malloc_io.c
index d7cb0f52..2fae7570 100644
--- a/src/malloc_io.c
+++ b/src/malloc_io.c
@@ -664,6 +664,36 @@ malloc_printf(const char *format, ...) {
 	va_end(ap);
 }
 
+void
+buf_writer_flush(buf_writer_arg_t *arg) {
+	assert(arg->buf_end <= arg->buf_size);
+	arg->buf[arg->buf_end] = '\0';
+	if (arg->write_cb == NULL) {
+		arg->write_cb = je_malloc_message != NULL ?
+		    je_malloc_message : wrtmessage;
+	}
+	arg->write_cb(arg->cbopaque, arg->buf);
+	arg->buf_end = 0;
+}
+
+void
+buffered_write_cb(void *buf_writer_arg, const char *s) {
+	buf_writer_arg_t *arg = (buf_writer_arg_t *)buf_writer_arg;
+	size_t i, slen, n, s_remain, buf_remain;
+	assert(arg->buf_end <= arg->buf_size);
+	for (i = 0, slen = strlen(s); i < slen; i += n) {
+		if (arg->buf_end == arg->buf_size) {
+			buf_writer_flush(arg);
+		}
+		s_remain = slen - i;
+		buf_remain = arg->buf_size - arg->buf_end;
+		n = s_remain < buf_remain ? s_remain : buf_remain;
+		memcpy(arg->buf + arg->buf_end, s + i, n);
+		arg->buf_end += n;
+	}
+	assert(i == slen);
+}
+
 /*
  * Restore normal assertion macros, in order to make it possible to compile all
  * C files as a single concatenation.
diff --git a/test/unit/buf_writer.c b/test/unit/buf_writer.c
new file mode 100644
index 00000000..4d8ae99b
--- /dev/null
+++ b/test/unit/buf_writer.c
@@ -0,0 +1,64 @@
+#include "test/jemalloc_test.h"
+
+#define TEST_BUF_SIZE 16
+#define UNIT_MAX (TEST_BUF_SIZE * 3)
+
+static size_t test_write_len;
+static char test_buf[TEST_BUF_SIZE];
+static uint64_t arg_store;
+
+static void test_write_cb(void *cbopaque, const char *s) {
+	size_t prev_test_write_len = test_write_len;
+	test_write_len += strlen(s); /* only increase the length */
+	arg_store = *(uint64_t *)cbopaque; /* only pass along the argument */
+	assert_zu_le(prev_test_write_len, test_write_len,
+	    "Test write overflowed");
+}
+
+TEST_BEGIN(test_buf_write) {
+	char s[UNIT_MAX + 1];
+	size_t n_unit, remain, i;
+	ssize_t unit;
+	uint64_t arg = 4; /* Starting value of random argument. */
+	buf_writer_arg_t test_buf_arg =
+	    {test_write_cb, &arg, test_buf, TEST_BUF_SIZE - 1, 0};
+
+	memset(s, 'a', UNIT_MAX);
+	arg_store = arg;
+	for (unit = UNIT_MAX; unit >= 0; --unit) {
+		/* unit keeps decreasing, so strlen(s) is always unit. */
+		s[unit] = '\0';
+		for (n_unit = 1; n_unit <= 3; ++n_unit) {
+			test_write_len = 0;
+			remain = 0;
+			for (i = 1; i <= n_unit; ++i) {
+				arg = prng_lg_range_u64(&arg, 64);
+				buffered_write_cb(&test_buf_arg, s);
+				remain += unit;
+				if (remain > test_buf_arg.buf_size) {
+					/* Flushes should have happened. */
+					assert_u64_eq(arg_store, arg, "Call "
+					    "back argument didn't get through");
+					remain %= test_buf_arg.buf_size;
+					if (remain == 0) {
+						/* Last flush should be lazy. */
+						remain += test_buf_arg.buf_size;
+					}
+				}
+				assert_zu_eq(test_write_len + remain, i * unit,
+				    "Incorrect length after writing %zu strings"
+				    " of length %zu", i, unit);
+			}
+			buf_writer_flush(&test_buf_arg);
+			assert_zu_eq(test_write_len, n_unit * unit,
+			    "Incorrect length after flushing at the end of"
+			    " writing %zu strings of length %zu", n_unit, unit);
+		}
+	}
+}
+TEST_END
+
+int
+main(void) {
+	return test(test_buf_write);
+}

From 8c8466fa6e413b08ce83c6f5ac96d2b1454e3afe Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 30 Jul 2019 11:07:24 -0700
Subject: [PATCH 1340/2608] Add compact json option for emitter

JSON format is largely meant for machine-machine communication, so
adding the option to the emitter.  According to local testing, the
savings in terms of bytes outputted is around 50% for stats printing
and around 25% for prof log printing.
---
 include/jemalloc/internal/emitter.h |  66 +++++++----
 test/unit/emitter.c                 | 170 +++++++++++++++++++---------
 2 files changed, 160 insertions(+), 76 deletions(-)

diff --git a/include/jemalloc/internal/emitter.h b/include/jemalloc/internal/emitter.h
index 542bc79c..009bf9ac 100644
--- a/include/jemalloc/internal/emitter.h
+++ b/include/jemalloc/internal/emitter.h
@@ -6,6 +6,7 @@
 typedef enum emitter_output_e emitter_output_t;
 enum emitter_output_e {
 	emitter_output_json,
+	emitter_output_json_compact,
 	emitter_output_table
 };
 
@@ -75,6 +76,12 @@ struct emitter_s {
 	bool emitted_key;
 };
 
+static inline bool
+emitter_outputs_json(emitter_t *emitter) {
+	return emitter->output == emitter_output_json ||
+	    emitter->output == emitter_output_json_compact;
+}
+
 /* Internal convenience function.  Write to the emitter the given string. */
 JEMALLOC_FORMAT_PRINTF(2, 3)
 static inline void
@@ -135,7 +142,7 @@ emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width,
 
 	switch (value_type) {
 	case emitter_type_bool:
-		emitter_printf(emitter, 
+		emitter_printf(emitter,
 		    emitter_gen_fmt(fmt, FMT_SIZE, "%s", justify, width),
 		    *(const bool *)value ?  "true" : "false");
 		break;
@@ -159,7 +166,7 @@ emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width,
 		 * anywhere near the fmt size.
 		 */
 		assert(str_written < BUF_SIZE);
-		emitter_printf(emitter, 
+		emitter_printf(emitter,
 		    emitter_gen_fmt(fmt, FMT_SIZE, "%s", justify, width), buf);
 		break;
 	case emitter_type_uint32:
@@ -196,6 +203,7 @@ static inline void
 emitter_indent(emitter_t *emitter) {
 	int amount = emitter->nesting_depth;
 	const char *indent_str;
+	assert(emitter->output != emitter_output_json_compact);
 	if (emitter->output == emitter_output_json) {
 		indent_str = "\t";
 	} else {
@@ -209,12 +217,18 @@ emitter_indent(emitter_t *emitter) {
 
 static inline void
 emitter_json_key_prefix(emitter_t *emitter) {
+	assert(emitter_outputs_json(emitter));
 	if (emitter->emitted_key) {
 		emitter->emitted_key = false;
 		return;
 	}
-	emitter_printf(emitter, "%s\n", emitter->item_at_depth ? "," : "");
-	emitter_indent(emitter);
+	if (emitter->item_at_depth) {
+		emitter_printf(emitter, ",");
+	}
+	if (emitter->output != emitter_output_json_compact) {
+		emitter_printf(emitter, "\n");
+		emitter_indent(emitter);
+	}
 }
 
 /******************************************************************************/
@@ -227,22 +241,23 @@ emitter_init(emitter_t *emitter, emitter_output_t emitter_output,
 	emitter->write_cb = write_cb;
 	emitter->cbopaque = cbopaque;
 	emitter->item_at_depth = false;
-	emitter->emitted_key = false; 
+	emitter->emitted_key = false;
 	emitter->nesting_depth = 0;
 }
 
 /******************************************************************************/
 /* JSON public API. */
 
-/* 
+/*
  * Emits a key (e.g. as appears in an object). The next json entity emitted will
  * be the corresponding value.
  */
 static inline void
 emitter_json_key(emitter_t *emitter, const char *json_key) {
-	if (emitter->output == emitter_output_json) {
+	if (emitter_outputs_json(emitter)) {
 		emitter_json_key_prefix(emitter);
-		emitter_printf(emitter, "\"%s\": ", json_key);
+		emitter_printf(emitter, "\"%s\":%s", json_key,
+		    emitter->output == emitter_output_json_compact ? "" : " ");
 		emitter->emitted_key = true;
 	}
 }
@@ -250,7 +265,7 @@ emitter_json_key(emitter_t *emitter, const char *json_key) {
 static inline void
 emitter_json_value(emitter_t *emitter, emitter_type_t value_type,
     const void *value) {
-	if (emitter->output == emitter_output_json) {
+	if (emitter_outputs_json(emitter)) {
 		emitter_json_key_prefix(emitter);
 		emitter_print_value(emitter, emitter_justify_none, -1,
 		    value_type, value);
@@ -268,7 +283,7 @@ emitter_json_kv(emitter_t *emitter, const char *json_key,
 
 static inline void
 emitter_json_array_begin(emitter_t *emitter) {
-	if (emitter->output == emitter_output_json) {
+	if (emitter_outputs_json(emitter)) {
 		emitter_json_key_prefix(emitter);
 		emitter_printf(emitter, "[");
 		emitter_nest_inc(emitter);
@@ -284,18 +299,20 @@ emitter_json_array_kv_begin(emitter_t *emitter, const char *json_key) {
 
 static inline void
 emitter_json_array_end(emitter_t *emitter) {
-	if (emitter->output == emitter_output_json) {
+	if (emitter_outputs_json(emitter)) {
 		assert(emitter->nesting_depth > 0);
 		emitter_nest_dec(emitter);
-		emitter_printf(emitter, "\n");
-		emitter_indent(emitter);
+		if (emitter->output != emitter_output_json_compact) {
+			emitter_printf(emitter, "\n");
+			emitter_indent(emitter);
+		}
 		emitter_printf(emitter, "]");
 	}
 }
 
 static inline void
 emitter_json_object_begin(emitter_t *emitter) {
-	if (emitter->output == emitter_output_json) {
+	if (emitter_outputs_json(emitter)) {
 		emitter_json_key_prefix(emitter);
 		emitter_printf(emitter, "{");
 		emitter_nest_inc(emitter);
@@ -311,11 +328,13 @@ emitter_json_object_kv_begin(emitter_t *emitter, const char *json_key) {
 
 static inline void
 emitter_json_object_end(emitter_t *emitter) {
-	if (emitter->output == emitter_output_json) {
+	if (emitter_outputs_json(emitter)) {
 		assert(emitter->nesting_depth > 0);
 		emitter_nest_dec(emitter);
-		emitter_printf(emitter, "\n");
-		emitter_indent(emitter);
+		if (emitter->output != emitter_output_json_compact) {
+			emitter_printf(emitter, "\n");
+			emitter_indent(emitter);
+		}
 		emitter_printf(emitter, "}");
 	}
 }
@@ -420,7 +439,7 @@ emitter_kv_note(emitter_t *emitter, const char *json_key, const char *table_key,
     emitter_type_t value_type, const void *value,
     const char *table_note_key, emitter_type_t table_note_value_type,
     const void *table_note_value) {
-	if (emitter->output == emitter_output_json) {
+	if (emitter_outputs_json(emitter)) {
 		emitter_json_key(emitter, json_key);
 		emitter_json_value(emitter, value_type, value);
 	} else {
@@ -440,7 +459,7 @@ emitter_kv(emitter_t *emitter, const char *json_key, const char *table_key,
 static inline void
 emitter_dict_begin(emitter_t *emitter, const char *json_key,
     const char *table_header) {
-	if (emitter->output == emitter_output_json) {
+	if (emitter_outputs_json(emitter)) {
 		emitter_json_key(emitter, json_key);
 		emitter_json_object_begin(emitter);
 	} else {
@@ -450,7 +469,7 @@ emitter_dict_begin(emitter_t *emitter, const char *json_key,
 
 static inline void
 emitter_dict_end(emitter_t *emitter) {
-	if (emitter->output == emitter_output_json) {
+	if (emitter_outputs_json(emitter)) {
 		emitter_json_object_end(emitter);
 	} else {
 		emitter_table_dict_end(emitter);
@@ -459,7 +478,7 @@ emitter_dict_end(emitter_t *emitter) {
 
 static inline void
 emitter_begin(emitter_t *emitter) {
-	if (emitter->output == emitter_output_json) {
+	if (emitter_outputs_json(emitter)) {
 		assert(emitter->nesting_depth == 0);
 		emitter_printf(emitter, "{");
 		emitter_nest_inc(emitter);
@@ -476,10 +495,11 @@ emitter_begin(emitter_t *emitter) {
 
 static inline void
 emitter_end(emitter_t *emitter) {
-	if (emitter->output == emitter_output_json) {
+	if (emitter_outputs_json(emitter)) {
 		assert(emitter->nesting_depth == 1);
 		emitter_nest_dec(emitter);
-		emitter_printf(emitter, "\n}\n");
+		emitter_printf(emitter, "%s", emitter->output ==
+		    emitter_output_json_compact ? "}" : "\n}\n");
 	}
 }
 
diff --git a/test/unit/emitter.c b/test/unit/emitter.c
index b4a693f4..712c9e10 100644
--- a/test/unit/emitter.c
+++ b/test/unit/emitter.c
@@ -66,7 +66,9 @@ forwarding_cb(void *buf_descriptor_v, const char *str) {
 
 static void
 assert_emit_output(void (*emit_fn)(emitter_t *),
-    const char *expected_json_output, const char *expected_table_output) {
+    const char *expected_json_output,
+    const char *expected_json_compact_output,
+    const char *expected_table_output) {
 	emitter_t emitter;
 	char buf[MALLOC_PRINTF_BUFSIZE];
 	buf_descriptor_t buf_descriptor;
@@ -84,6 +86,16 @@ assert_emit_output(void (*emit_fn)(emitter_t *),
 	buf_descriptor.len = MALLOC_PRINTF_BUFSIZE;
 	buf_descriptor.mid_quote = false;
 
+	emitter_init(&emitter, emitter_output_json_compact, &forwarding_cb,
+	    &buf_descriptor);
+	(*emit_fn)(&emitter);
+	assert_str_eq(expected_json_compact_output, buf,
+	    "compact json output failure");
+
+	buf_descriptor.buf = buf;
+	buf_descriptor.len = MALLOC_PRINTF_BUFSIZE;
+	buf_descriptor.mid_quote = false;
+
 	emitter_init(&emitter, emitter_output_table, &forwarding_cb,
 	    &buf_descriptor);
 	(*emit_fn)(&emitter);
@@ -108,6 +120,7 @@ emit_dict(emitter_t *emitter) {
 	emitter_dict_end(emitter);
 	emitter_end(emitter);
 }
+
 static const char *dict_json =
 "{\n"
 "\t\"foo\": {\n"
@@ -117,6 +130,15 @@ static const char *dict_json =
 "\t\t\"jkl\": \"a string\"\n"
 "\t}\n"
 "}\n";
+static const char *dict_json_compact =
+"{"
+	"\"foo\":{"
+		"\"abc\":false,"
+		"\"def\":true,"
+		"\"ghi\":123,"
+		"\"jkl\":\"a string\""
+	"}"
+"}";
 static const char *dict_table =
 "This is the foo table:\n"
 "  ABC: false\n"
@@ -124,11 +146,6 @@ static const char *dict_table =
 "  GHI: 123 (note_key1: \"a string\")\n"
 "  JKL: \"a string\" (note_key2: false)\n";
 
-TEST_BEGIN(test_dict) {
-	assert_emit_output(&emit_dict, dict_json, dict_table);
-}
-TEST_END
-
 static void
 emit_table_printf(emitter_t *emitter) {
 	emitter_begin(emitter);
@@ -141,17 +158,11 @@ emit_table_printf(emitter_t *emitter) {
 static const char *table_printf_json =
 "{\n"
 "}\n";
-
+static const char *table_printf_json_compact = "{}";
 static const char *table_printf_table =
 "Table note 1\n"
 "Table note 2 with format string\n";
 
-TEST_BEGIN(test_table_printf) {
-	assert_emit_output(&emit_table_printf, table_printf_json,
-	    table_printf_table);
-}
-TEST_END
-
 static void emit_nested_dict(emitter_t *emitter) {
 	int val = 123;
 	emitter_begin(emitter);
@@ -169,7 +180,7 @@ static void emit_nested_dict(emitter_t *emitter) {
 	emitter_end(emitter);
 }
 
-static const char *nested_object_json =
+static const char *nested_dict_json =
 "{\n"
 "\t\"json1\": {\n"
 "\t\t\"json2\": {\n"
@@ -182,8 +193,20 @@ static const char *nested_object_json =
 "\t\t\"primitive\": 123\n"
 "\t}\n"
 "}\n";
-
-static const char *nested_object_table =
+static const char *nested_dict_json_compact =
+"{"
+	"\"json1\":{"
+		"\"json2\":{"
+			"\"primitive\":123"
+		"},"
+		"\"json3\":{"
+		"}"
+	"},"
+	"\"json4\":{"
+		"\"primitive\":123"
+	"}"
+"}";
+static const char *nested_dict_table =
 "Dict 1\n"
 "  Dict 2\n"
 "    A primitive: 123\n"
@@ -191,12 +214,6 @@ static const char *nested_object_table =
 "Dict 4\n"
 "  Another primitive: 123\n";
 
-TEST_BEGIN(test_nested_dict) {
-	assert_emit_output(&emit_nested_dict, nested_object_json,
-	    nested_object_table);
-}
-TEST_END
-
 static void
 emit_types(emitter_t *emitter) {
 	bool b = false;
@@ -235,7 +252,17 @@ static const char *types_json =
 "\t\"k7\": 789,\n"
 "\t\"k8\": 10000000000\n"
 "}\n";
-
+static const char *types_json_compact =
+"{"
+	"\"k1\":false,"
+	"\"k2\":-123,"
+	"\"k3\":123,"
+	"\"k4\":-456,"
+	"\"k5\":456,"
+	"\"k6\":\"string\","
+	"\"k7\":789,"
+	"\"k8\":10000000000"
+"}";
 static const char *types_table =
 "K1: false\n"
 "K2: -123\n"
@@ -246,11 +273,6 @@ static const char *types_table =
 "K7: 789\n"
 "K8: 10000000000\n";
 
-TEST_BEGIN(test_types) {
-	assert_emit_output(&emit_types, types_json, types_table);
-}
-TEST_END
-
 static void
 emit_modal(emitter_t *emitter) {
 	int val = 123;
@@ -283,7 +305,18 @@ const char *modal_json =
 "\t\t\"i6\": 123\n"
 "\t}\n"
 "}\n";
-
+const char *modal_json_compact =
+"{"
+	"\"j0\":{"
+		"\"j1\":{"
+			"\"i1\":123,"
+			"\"i2\":123,"
+			"\"i4\":123"
+		"},"
+		"\"i5\":123,"
+		"\"i6\":123"
+	"}"
+"}";
 const char *modal_table =
 "T0\n"
 "  I1: 123\n"
@@ -293,13 +326,8 @@ const char *modal_table =
 "    I5: 123\n"
 "  I6: 123\n";
 
-TEST_BEGIN(test_modal) {
-	assert_emit_output(&emit_modal, modal_json, modal_table);
-}
-TEST_END
-
 static void
-emit_json_arr(emitter_t *emitter) {
+emit_json_array(emitter_t *emitter) {
 	int ival = 123;
 
 	emitter_begin(emitter);
@@ -338,14 +366,24 @@ static const char *json_array_json =
 "\t\t]\n"
 "\t}\n"
 "}\n";
-
+static const char *json_array_json_compact =
+"{"
+	"\"dict\":{"
+		"\"arr\":["
+			"{"
+				"\"foo\":123"
+			"},"
+			"123,"
+			"123,"
+			"{"
+				"\"bar\":123,"
+				"\"baz\":123"
+			"}"
+		"]"
+	"}"
+"}";
 static const char *json_array_table = "";
 
-TEST_BEGIN(test_json_arr) {
-	assert_emit_output(&emit_json_arr, json_array_json, json_array_table);
-}
-TEST_END
-
 static void
 emit_json_nested_array(emitter_t *emitter) {
 	int ival = 123;
@@ -391,12 +429,27 @@ static const char *json_nested_array_json =
 "\t\t]\n"
 "\t]\n"
 "}\n";
-
-TEST_BEGIN(test_json_nested_arr) {
-	assert_emit_output(&emit_json_nested_array, json_nested_array_json,
-	    json_array_table);
-}
-TEST_END
+static const char *json_nested_array_json_compact =
+"{"
+	"["
+		"["
+			"123,"
+			"\"foo\","
+			"123,"
+			"\"foo\""
+		"],"
+		"["
+			"123"
+		"],"
+		"["
+			"\"foo\","
+			"123"
+		"],"
+		"["
+		"]"
+	"]"
+"}";
+static const char *json_nested_array_table = "";
 
 static void
 emit_table_row(emitter_t *emitter) {
@@ -443,18 +496,29 @@ emit_table_row(emitter_t *emitter) {
 static const char *table_row_json =
 "{\n"
 "}\n";
-
+static const char *table_row_json_compact = "{}";
 static const char *table_row_table =
 "ABC title       DEF title  GHI\n"
 "123                  true  456\n"
 "789                 false 1011\n"
 "\"a string\"          false  ghi\n";
 
-TEST_BEGIN(test_table_row) {
-	assert_emit_output(&emit_table_row, table_row_json, table_row_table);
-}
+#define GENERATE_TEST(feature)					\
+TEST_BEGIN(test_##feature) {					\
+	assert_emit_output(emit_##feature, feature##_json,	\
+	    feature##_json_compact, feature##_table);		\
+}								\
 TEST_END
 
+GENERATE_TEST(dict)
+GENERATE_TEST(table_printf)
+GENERATE_TEST(nested_dict)
+GENERATE_TEST(types)
+GENERATE_TEST(modal)
+GENERATE_TEST(json_array)
+GENERATE_TEST(json_nested_array)
+GENERATE_TEST(table_row)
+
 int
 main(void) {
 	return test_no_reentrancy(
@@ -463,7 +527,7 @@ main(void) {
 	    test_nested_dict,
 	    test_types,
 	    test_modal,
-	    test_json_arr,
-	    test_json_nested_arr,
+	    test_json_array,
+	    test_json_nested_array,
 	    test_table_row);
 }

From 22746d3c9fddd5486e9ec5c0c6b2e25230db9a8e Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 8 Aug 2019 12:46:22 -0700
Subject: [PATCH 1341/2608] Properly dalloc prof nodes with idalloctm.

The prof_alloc_node is allocated through ialloc as internal.  Switch to
idalloctm with tcache and is_internal properly set.
---
 src/prof_log.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/prof_log.c b/src/prof_log.c
index 56d4e035..ad1cb381 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -480,7 +480,7 @@ prof_log_emit_threads(tsd_t *tsd, emitter_t *emitter) {
 		emitter_json_object_end(emitter);
 		thr_old_node = thr_node;
 		thr_node = thr_node->next;
-		idalloc(tsd, thr_old_node);
+		idalloctm(tsd_tsdn(tsd), thr_old_node, NULL, NULL, true, true);
 	}
 	emitter_json_array_end(emitter);
 }
@@ -509,7 +509,7 @@ prof_log_emit_traces(tsd_t *tsd, emitter_t *emitter) {
 
 		bt_old_node = bt_node;
 		bt_node = bt_node->next;
-		idalloc(tsd, bt_old_node);
+		idalloctm(tsd_tsdn(tsd), bt_old_node, NULL, NULL, true, true);
 	}
 	emitter_json_array_end(emitter);
 }
@@ -547,7 +547,8 @@ prof_log_emit_allocs(tsd_t *tsd, emitter_t *emitter) {
 
 		alloc_old_node = alloc_node;
 		alloc_node = alloc_node->next;
-		idalloc(tsd, alloc_old_node);
+		idalloctm(tsd_tsdn(tsd), alloc_old_node, NULL, NULL, true,
+		    true);
 	}
 	emitter_json_array_end(emitter);
 }

From 593484661261c20f75557279931eb2d9ca165185 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 9 Aug 2019 22:15:42 -0700
Subject: [PATCH 1342/2608] Fix large bin index accessed through cache bin
 descriptor.

---
 src/arena.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index ba50e410..e956c394 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -199,13 +199,12 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
 	cache_bin_array_descriptor_t *descriptor;
 	ql_foreach(descriptor, &arena->cache_bin_array_descriptor_ql, link) {
-		szind_t i = 0;
-		for (; i < SC_NBINS; i++) {
+		for (szind_t i = 0; i < SC_NBINS; i++) {
 			cache_bin_t *tbin = &descriptor->bins_small[i];
 			arena_stats_accum_zu(&astats->tcache_bytes,
 			    tbin->ncached * sz_index2size(i));
 		}
-		for (; i < nhbins; i++) {
+		for (szind_t i = 0; i < nhbins - SC_NBINS; i++) {
 			cache_bin_t *tbin = &descriptor->bins_large[i];
 			arena_stats_accum_zu(&astats->tcache_bytes,
 			    tbin->ncached * sz_index2size(i));

From ad3f7dbfa0f6b510d6e1e0dbaf859506d5ad2a96 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 7 Aug 2019 14:34:34 -0700
Subject: [PATCH 1343/2608] Buffer prof_log_stop

Make use of the new buffered writer for the output of `prof_log_stop`.
---
 src/prof_log.c       | 22 +++++++++++++++++-----
 test/unit/prof_log.c |  2 +-
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/src/prof_log.c b/src/prof_log.c
index ad1cb381..a659f87f 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -160,6 +160,7 @@ prof_log_bt_index(tsd_t *tsd, prof_bt_t *bt) {
 		return node->index;
 	}
 }
+
 static size_t
 prof_log_thr_index(tsd_t *tsd, uint64_t thr_uid, const char *name) {
 	assert(prof_logging_state == prof_logging_state_started);
@@ -576,7 +577,7 @@ prof_log_emit_metadata(emitter_t *emitter) {
 	emitter_json_object_end(emitter);
 }
 
-
+#define PROF_LOG_STOP_BUFSIZE PROF_DUMP_BUFSIZE
 bool
 prof_log_stop(tsdn_t *tsdn) {
 	if (!opt_prof || !prof_booted) {
@@ -624,11 +625,18 @@ prof_log_stop(tsdn_t *tsdn) {
 		return true;
 	}
 
-	/* Emit to json. */
 	struct prof_emitter_cb_arg_s arg;
 	arg.fd = fd;
-	emitter_init(&emitter, emitter_output_json, &prof_emitter_write_cb,
-	    (void *)(&arg));
+
+	char *prof_log_stop_buf = (char *)iallocztm(tsdn,
+	    PROF_LOG_STOP_BUFSIZE, sz_size2index(PROF_LOG_STOP_BUFSIZE),
+	    false, NULL, true, arena_get(TSDN_NULL, 0, true), true);
+	buf_writer_arg_t prof_log_stop_buf_arg = {prof_emitter_write_cb, &arg,
+	    prof_log_stop_buf, PROF_LOG_STOP_BUFSIZE - 1, 0};
+
+	/* Emit to json. */
+	emitter_init(&emitter, emitter_output_json, buffered_write_cb,
+	    &prof_log_stop_buf_arg);
 
 	emitter_begin(&emitter);
 	prof_log_emit_metadata(&emitter);
@@ -637,6 +645,9 @@ prof_log_stop(tsdn_t *tsdn) {
 	prof_log_emit_allocs(tsd, &emitter);
 	emitter_end(&emitter);
 
+	buf_writer_flush(&prof_log_stop_buf_arg);
+	idalloctm(tsdn, prof_log_stop_buf, NULL, NULL, true, true);
+
 	/* Reset global state. */
 	if (log_tables_initialized) {
 		ckh_delete(tsd, &log_bt_node_set);
@@ -661,8 +672,9 @@ prof_log_stop(tsdn_t *tsdn) {
 		return false;
 	}
 #endif
-	return close(fd);
+	return close(fd) || arg.ret == -1;
 }
+#undef PROF_LOG_STOP_BUFSIZE
 
 bool prof_log_init(tsd_t *tsd) {
 	if (opt_prof_log) {
diff --git a/test/unit/prof_log.c b/test/unit/prof_log.c
index 92fbd7ce..9336ebca 100644
--- a/test/unit/prof_log.c
+++ b/test/unit/prof_log.c
@@ -61,7 +61,7 @@ static void *f_thread(void *unused) {
 	int i;
 	for (i = 0; i < N_PARAM; i++) {
 		void *p = malloc(100);
-		memset(p, 100, sizeof(char));
+		memset(p, 100, 1);
 		free(p);
 	}
 

From a219cfcda34e9916c14ff9f9e198b18b41b71fbc Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 12 Jun 2019 17:24:30 -0700
Subject: [PATCH 1344/2608] Clear tcache prof_accumbytes in tcache_flush_cache

`tcache->prof_accumbytes` should always be cleared after being
transferred to arena; otherwise the allocations would be double
counted, leading to excessive prof dumps.
---
 src/tcache.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index 01c61609..3e1b55cc 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -529,10 +529,12 @@ tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
 		}
 	}
 
-	if (config_prof && tcache->prof_accumbytes > 0 &&
-	    arena_prof_accum(tsd_tsdn(tsd), tcache->arena,
-	    tcache->prof_accumbytes)) {
-		prof_idump(tsd_tsdn(tsd));
+	if (config_prof && tcache->prof_accumbytes > 0) {
+		if (arena_prof_accum(tsd_tsdn(tsd), tcache->arena,
+		    tcache->prof_accumbytes)) {
+			prof_idump(tsd_tsdn(tsd));
+		}
+		tcache->prof_accumbytes = 0;
 	}
 }
 

From eb70fef8ca86363a036a962852808675ed1598c1 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 9 Aug 2019 10:19:51 -0700
Subject: [PATCH 1345/2608] Make compact json format as default

Saves 20-50% of the output size.
---
 src/prof_log.c | 4 ++--
 src/stats.c    | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/prof_log.c b/src/prof_log.c
index a659f87f..39976564 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -635,8 +635,8 @@ prof_log_stop(tsdn_t *tsdn) {
 	    prof_log_stop_buf, PROF_LOG_STOP_BUFSIZE - 1, 0};
 
 	/* Emit to json. */
-	emitter_init(&emitter, emitter_output_json, buffered_write_cb,
-	    &prof_log_stop_buf_arg);
+	emitter_init(&emitter, emitter_output_json_compact,
+	    buffered_write_cb, &prof_log_stop_buf_arg);
 
 	emitter_begin(&emitter);
 	prof_log_emit_metadata(&emitter);
diff --git a/src/stats.c b/src/stats.c
index 118e05d2..cf75810a 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1181,7 +1181,7 @@ stats_general_print(emitter_t *emitter) {
 	 * We do enough mallctls in a loop that we actually want to omit them
 	 * (not just omit the printing).
 	 */
-	if (emitter->output == emitter_output_json) {
+	if (emitter_outputs_json(emitter)) {
 		emitter_json_array_kv_begin(emitter, "bin");
 		for (unsigned i = 0; i < nbins; i++) {
 			emitter_json_object_begin(emitter);
@@ -1212,7 +1212,7 @@ stats_general_print(emitter_t *emitter) {
 	emitter_kv(emitter, "nlextents", "Number of large size classes",
 	    emitter_type_unsigned, &nlextents);
 
-	if (emitter->output == emitter_output_json) {
+	if (emitter_outputs_json(emitter)) {
 		emitter_json_array_kv_begin(emitter, "lextent");
 		for (unsigned i = 0; i < nlextents; i++) {
 			emitter_json_object_begin(emitter);
@@ -1437,8 +1437,8 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 
 	emitter_t emitter;
 	emitter_init(&emitter,
-	    json ? emitter_output_json : emitter_output_table, write_cb,
-	    cbopaque);
+	    json ? emitter_output_json_compact : emitter_output_table,
+	    write_cb, cbopaque);
 	emitter_begin(&emitter);
 	emitter_table_printf(&emitter, "___ Begin jemalloc statistics ___\n");
 	emitter_json_object_kv_begin(&emitter, "jemalloc");

From 28ed9b9a5198ed866750361fe2c36f83742900ac Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 18 Jul 2019 10:10:45 -0700
Subject: [PATCH 1346/2608] Buffer stats printing

Without buffering `malloc_stats_print` would invoke the write back
call (which could mean an expensive `malloc_write_fd` call) for every
single `printf` (including printing each line break and each leading
tab/space for indentation).
---
 src/jemalloc.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index ed13718d..dec987c5 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3704,6 +3704,7 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 	return ret;
 }
 
+#define STATS_PRINT_BUFSIZE 65536
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
     const char *opts) {
@@ -3713,10 +3714,24 @@ je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 
 	tsdn = tsdn_fetch();
 	check_entry_exit_locking(tsdn);
-	stats_print(write_cb, cbopaque, opts);
+
+	if (config_debug) {
+		stats_print(write_cb, cbopaque, opts);
+	} else {
+		char *stats_print_buf = (char *)iallocztm(tsdn,
+		    STATS_PRINT_BUFSIZE, sz_size2index(STATS_PRINT_BUFSIZE),
+		    false, NULL, true, arena_get(TSDN_NULL, 0, true), true);
+		buf_writer_arg_t stats_print_buf_arg = {write_cb, cbopaque,
+		    stats_print_buf, STATS_PRINT_BUFSIZE - 1, 0};
+		stats_print(buffered_write_cb, &stats_print_buf_arg, opts);
+		buf_writer_flush(&stats_print_buf_arg);
+		idalloctm(tsdn, stats_print_buf, NULL, NULL, true, true);
+	}
+
 	check_entry_exit_locking(tsdn);
 	LOG("core.malloc_stats_print.exit", "");
 }
+#undef STATS_PRINT_BUFSIZE
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
 je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) {

From 9c5c2a2c86d473a63806e534c39fb74a882fa558 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 12 Aug 2019 11:08:39 -0700
Subject: [PATCH 1347/2608] Unify the signature of tcache_flush small and
 large.

---
 include/jemalloc/internal/tcache_externs.h |  4 ++--
 include/jemalloc/internal/tcache_inlines.h |  4 ++--
 src/tcache.c                               | 11 ++++++-----
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index d63eafde..266f246b 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -31,8 +31,8 @@ void	*tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     cache_bin_t *tbin, szind_t binind, bool *tcache_success);
 void	tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
     szind_t binind, unsigned rem);
-void	tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
-    unsigned rem, tcache_t *tcache);
+void	tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
+    szind_t binind, unsigned rem);
 void	tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache,
     arena_t *arena);
 tcache_t *tcache_create_explicit(tsd_t *tsd);
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 5eca20e8..46b9af42 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -202,8 +202,8 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	bin = tcache_large_bin_get(tcache, binind);
 	bin_info = &tcache_bin_info[binind];
 	if (unlikely(bin->ncached == bin_info->ncached_max)) {
-		tcache_bin_flush_large(tsd, bin, binind,
-		    (bin_info->ncached_max >> 1), tcache);
+		tcache_bin_flush_large(tsd, tcache, bin, binind,
+		    (bin_info->ncached_max >> 1));
 	}
 	assert(bin->ncached < bin_info->ncached_max);
 	bin->ncached++;
diff --git a/src/tcache.c b/src/tcache.c
index 3e1b55cc..c5fe67a9 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -65,8 +65,9 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 				tcache->lg_fill_div[binind]++;
 			}
 		} else {
-			tcache_bin_flush_large(tsd, tbin, binind, tbin->ncached
-			    - tbin->low_water + (tbin->low_water >> 2), tcache);
+			tcache_bin_flush_large(tsd, tcache, tbin, binind,
+			    tbin->ncached - tbin->low_water + (tbin->low_water
+			    >> 2));
 		}
 	} else if (tbin->low_water < 0) {
 		/*
@@ -227,8 +228,8 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 }
 
 void
-tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
-    unsigned rem, tcache_t *tcache) {
+tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t binind,
+    unsigned rem) {
 	bool merged_stats = false;
 
 	assert(binind < nhbins);
@@ -522,7 +523,7 @@ tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
 	}
 	for (unsigned i = SC_NBINS; i < nhbins; i++) {
 		cache_bin_t *tbin = tcache_large_bin_get(tcache, i);
-		tcache_bin_flush_large(tsd, tbin, i, 0, tcache);
+		tcache_bin_flush_large(tsd, tcache, tbin, i, 0);
 
 		if (config_stats) {
 			assert(tbin->tstats.nrequests == 0);

From e2c7584361718ccb12c932d2236a16ec3a31f1a7 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 12 Aug 2019 11:11:01 -0700
Subject: [PATCH 1348/2608] Simplify / refactor tcache_dalloc_large.

---
 include/jemalloc/internal/tcache_inlines.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 46b9af42..4815774b 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -201,13 +201,12 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 
 	bin = tcache_large_bin_get(tcache, binind);
 	bin_info = &tcache_bin_info[binind];
-	if (unlikely(bin->ncached == bin_info->ncached_max)) {
+	if (unlikely(!cache_bin_dalloc_easy(bin, bin_info, ptr))) {
 		tcache_bin_flush_large(tsd, tcache, bin, binind,
 		    (bin_info->ncached_max >> 1));
+		bool ret = cache_bin_dalloc_easy(bin, bin_info, ptr);
+		assert(ret);
 	}
-	assert(bin->ncached < bin_info->ncached_max);
-	bin->ncached++;
-	*(bin->avail - bin->ncached) = ptr;
 
 	tcache_event(tsd, tcache);
 }

From d6b7995c1629768590366a6ff2170d65c4cc6d9b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 15 Aug 2019 22:33:34 -0700
Subject: [PATCH 1349/2608] Update INSTALL.md about the default doc build.

---
 INSTALL.md | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/INSTALL.md b/INSTALL.md
index b8f729b0..eb55acfd 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -9,14 +9,11 @@ If building from unpackaged developer sources, the simplest command sequence
 that might work is:
 
     ./autogen.sh
-    make dist
     make
     make install
 
-Note that documentation is not built by the default target because doing so
-would create a dependency on xsltproc in packaged releases, hence the
-requirement to either run 'make dist' or avoid installing docs via the various
-install_* targets documented below.
+Note that documentation is built by the default target only when xsltproc is
+available.  Build will warn but not stop if the dependency is missing.
 
 
 ## Advanced configuration

From d2dddfb82aac9f2212922eb90324e84790704bfe Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 15 Aug 2019 22:11:21 -0700
Subject: [PATCH 1350/2608] Add hint in the bogus version string.

---
 configure.ac | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 261d81c0..bca422af 100644
--- a/configure.ac
+++ b/configure.ac
@@ -583,7 +583,7 @@ if test ! -e "${objroot}VERSION" ; then
   if test ! -e "${srcroot}VERSION" ; then
     AC_MSG_RESULT(
       [Missing VERSION file, and unable to generate it; creating bogus VERSION])
-    echo "0.0.0-0-g0000000000000000000000000000000000000000" > "${objroot}VERSION"
+    echo "0.0.0-0-g000000missing_version_try_git_fetch_tags" > "${objroot}VERSION"
   else
     cp ${srcroot}VERSION ${objroot}VERSION
   fi

From 7599c82d48ffaa07ce934320f7256b56b200dace Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 9 Aug 2019 22:12:47 -0700
Subject: [PATCH 1351/2608] Redesign the cache bin metadata for fast path.

Implement the pointer-based metadata for tcache bins --
- 3 pointers are maintained to represent each bin;
- 2 of the pointers are compressed on 64-bit;
- is_full / is_empty done through pointer comparison;

Comparing to the previous counter based design --
- fast-path speed up ~15% in benchmarks
- direct pointer comparison and de-reference
- no need to access tcache_bin_info in common case
---
 Makefile.in                                   |   1 +
 include/jemalloc/internal/cache_bin.h         | 160 ++++++++++++---
 .../internal/jemalloc_internal_inlines_a.h    |   4 +-
 include/jemalloc/internal/tcache_externs.h    |   2 -
 include/jemalloc/internal/tcache_inlines.h    |  20 +-
 src/arena.c                                   |  20 +-
 src/jemalloc.c                                |   5 +-
 src/tcache.c                                  | 186 ++++++++++++------
 test/unit/cache_bin.c                         |  64 ++++++
 9 files changed, 340 insertions(+), 122 deletions(-)
 create mode 100644 test/unit/cache_bin.c

diff --git a/Makefile.in b/Makefile.in
index ef75d8ac..7584f598 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -178,6 +178,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/bit_util.c \
 	$(srcroot)test/unit/binshard.c \
 	$(srcroot)test/unit/buf_writer.c \
+	$(srcroot)test/unit/cache_bin.c \
 	$(srcroot)test/unit/ckh.c \
 	$(srcroot)test/unit/decay.c \
 	$(srcroot)test/unit/div.c \
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index d14556a3..67180cfa 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -13,7 +13,6 @@
  * of the tcache at all.
  */
 
-
 /*
  * The count of the number of cached allocations in a bin.  We make this signed
  * so that negative numbers can encode "invalid" states (e.g. a low water mark
@@ -39,29 +38,67 @@ struct cache_bin_info_s {
 	/* Upper limit on ncached. */
 	cache_bin_sz_t ncached_max;
 };
+extern cache_bin_info_t	*tcache_bin_info;
 
 typedef struct cache_bin_s cache_bin_t;
 struct cache_bin_s {
-	/* Min # cached since last GC. */
-	cache_bin_sz_t low_water;
-	/* # of cached objects. */
-	cache_bin_sz_t ncached;
 	/*
-	 * ncached and stats are both modified frequently.  Let's keep them
+	 * The cache bin stack is represented using 3 pointers: cur_ptr,
+	 * low_water and full, optimized for the fast path efficiency.
+	 *
+	 * low addr ==> high addr
+	 * |----|----|----|item1|item2|.....................|itemN|
+	 *  full            cur                                    empty
+	 * (ncached == N; full + ncached_max == empty)
+	 *
+	 * Data directly stored:
+	 * 1) cur_ptr points to the current item to be allocated, i.e. *cur_ptr.
+	 * 2) full points to the top of the stack (i.e. ncached == ncached_max),
+	 * which is compared against on free_fastpath to check "is_full".
+	 * 3) low_water indicates a low water mark of ncached.
+	 * Range of low_water is [cur, empty + 1], i.e. values of [ncached, -1].
+	 *
+	 * The empty position (ncached == 0) is derived via full + ncached_max
+	 * and not accessed in the common case (guarded behind low_water).
+	 *
+	 * On 64-bit, 2 of the 3 pointers (full and low water) are compressed by
+	 * omitting the high 32 bits.  Overflow of the half pointers is avoided
+	 * when allocating / initializing the stack space.  As a result,
+	 * cur_ptr.lowbits can be safely used for pointer comparisons.
+	 */
+	union {
+		void **ptr;
+		struct {
+			/* highbits never accessed directly. */
+#if (LG_SIZEOF_PTR == 3 && defined(JEMALLOC_BIG_ENDIAN))
+			uint32_t __highbits;
+#endif
+			uint32_t lowbits;
+#if (LG_SIZEOF_PTR == 3 && !defined(JEMALLOC_BIG_ENDIAN))
+			uint32_t __highbits;
+#endif
+		};
+	} cur_ptr;
+	/*
+	 * cur_ptr and stats are both modified frequently.  Let's keep them
 	 * close so that they have a higher chance of being on the same
 	 * cacheline, thus less write-backs.
 	 */
 	cache_bin_stats_t tstats;
 	/*
-	 * Stack of available objects.
+	 * Points to the first item that hasn't been used since last GC, to
+	 * track the low water mark (min # of cached).  It may point to
+	 * empty_position + 1, which indicates the cache has been depleted and
+	 * refilled (low_water == -1).
+	 */
+	uint32_t low_water_position;
+	/*
+	 * Points to the position when the cache is full.
 	 *
 	 * To make use of adjacent cacheline prefetch, the items in the avail
-	 * stack goes to higher address for newer allocations.  avail points
-	 * just above the available space, which means that
-	 * avail[-ncached, ... -1] are available items and the lowest item will
-	 * be allocated first.
+	 * stack goes to higher address for newer allocations (i.e. cur_ptr++).
 	 */
-	void **avail;
+	uint32_t full_position;
 };
 
 typedef struct cache_bin_array_descriptor_s cache_bin_array_descriptor_t;
@@ -76,6 +113,67 @@ struct cache_bin_array_descriptor_s {
 	cache_bin_t *bins_large;
 };
 
+/*
+ * None of the cache_bin_*_get / _set functions is used on the fast path, which
+ * relies on pointer comparisons to determine if the cache is full / empty.
+ */
+static inline cache_bin_sz_t
+cache_bin_ncached_get(cache_bin_t *bin, szind_t ind) {
+	cache_bin_sz_t n = tcache_bin_info[ind].ncached_max -
+	    (bin->cur_ptr.lowbits - bin->full_position) / sizeof(void *);
+	assert(n >= 0 && n <= tcache_bin_info[ind].ncached_max);
+	assert(n == 0 || *(bin->cur_ptr.ptr) != NULL);
+
+	return n;
+}
+
+static inline void **
+cache_bin_empty_position_get(cache_bin_t *bin, szind_t ind) {
+	void **ret = bin->cur_ptr.ptr + cache_bin_ncached_get(bin, ind);
+	/* Low bits overflow disallowed when allocating the space. */
+	assert((uint32_t)(uintptr_t)ret >= bin->cur_ptr.lowbits);
+	assert(bin->full_position + tcache_bin_info[ind].ncached_max *
+	    sizeof(void *) > bin->full_position);
+
+	/* Can also be computed via (full_position + ncached_max) | highbits. */
+	assert(ret == (void **)((uintptr_t)(bin->full_position +
+	    tcache_bin_info[ind].ncached_max * sizeof(void *)) |
+	    (uintptr_t)((uintptr_t)bin->cur_ptr.ptr &
+	    ~(((uint64_t)1 << 32) - 1))));
+
+	return ret;
+}
+
+/* Returns the position of the bottom item on the stack; for convenience. */
+static inline void **
+cache_bin_bottom_item_get(cache_bin_t *bin, szind_t ind) {
+	void **bottom = cache_bin_empty_position_get(bin, ind) - 1;
+	assert(cache_bin_ncached_get(bin, ind) == 0 || *bottom != NULL);
+
+	return bottom;
+}
+
+/* Returns the numeric value of low water in [-1, ncached]. */
+static inline cache_bin_sz_t
+cache_bin_low_water_get(cache_bin_t *bin, szind_t ind) {
+	cache_bin_sz_t low_water = tcache_bin_info[ind].ncached_max -
+	    (bin->low_water_position - bin->full_position) / sizeof(void *);
+	assert(low_water >= -1 && low_water <=
+	    tcache_bin_info[ind].ncached_max);
+	assert(low_water <= cache_bin_ncached_get(bin, ind));
+	assert(bin->low_water_position >= bin->cur_ptr.lowbits);
+
+	return low_water;
+}
+
+static inline void
+cache_bin_ncached_set(cache_bin_t *bin, szind_t ind, cache_bin_sz_t n) {
+	bin->cur_ptr.lowbits = bin->full_position +
+	    (tcache_bin_info[ind].ncached_max - n) * sizeof(void *);
+	assert(n >= 0 && n <= tcache_bin_info[ind].ncached_max);
+	assert(n == 0 || *bin->cur_ptr.ptr != NULL);
+}
+
 static inline void
 cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
     cache_bin_t *bins_small, cache_bin_t *bins_large) {
@@ -85,19 +183,24 @@ cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-cache_bin_alloc_easy(cache_bin_t *bin, bool *success) {
-	void *ret;
-
-	bin->ncached--;
-
+cache_bin_alloc_easy(cache_bin_t *bin, bool *success, cache_bin_sz_t ind) {
 	/*
-	 * Check for both bin->ncached == 0 and ncached < low_water
-	 * in a single branch.
+	 * This may read from the empty position; however the loaded value won't
+	 * be used.  It's safe because the stack has one more slot reserved.
 	 */
-	if (unlikely(bin->ncached <= bin->low_water)) {
-		bin->low_water = bin->ncached;
-		if (bin->ncached == -1) {
-			bin->ncached = 0;
+	void *ret = *(bin->cur_ptr.ptr++);
+	/*
+	 * Check for both bin->ncached == 0 and ncached < low_water in a single
+	 * branch.  This also avoids accessing tcache_bin_info (which is on a
+	 * separate cacheline / page) in the common case.
+	 */
+	if (unlikely(bin->cur_ptr.lowbits >= bin->low_water_position)) {
+		bin->low_water_position = bin->cur_ptr.lowbits;
+		uint32_t empty_position = bin->full_position +
+		    tcache_bin_info[ind].ncached_max * sizeof(void *);
+		if (bin->cur_ptr.lowbits > empty_position) {
+			bin->cur_ptr.ptr--;
+			assert(bin->cur_ptr.lowbits == empty_position);
 			*success = false;
 			return NULL;
 		}
@@ -111,19 +214,18 @@ cache_bin_alloc_easy(cache_bin_t *bin, bool *success) {
 	 * cacheline).
 	 */
 	*success = true;
-	ret = *(bin->avail - (bin->ncached + 1));
 
 	return ret;
 }
 
 JEMALLOC_ALWAYS_INLINE bool
-cache_bin_dalloc_easy(cache_bin_t *bin, cache_bin_info_t *bin_info, void *ptr) {
-	if (unlikely(bin->ncached == bin_info->ncached_max)) {
+cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) {
+	if (unlikely(bin->cur_ptr.lowbits == bin->full_position)) {
 		return false;
 	}
-	assert(bin->ncached < bin_info->ncached_max);
-	bin->ncached++;
-	*(bin->avail - bin->ncached) = ptr;
+
+	*(--bin->cur_ptr.ptr) = ptr;
+	assert(bin->cur_ptr.lowbits >= bin->full_position);
 
 	return true;
 }
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index ddde9b4e..fedbd862 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -130,8 +130,8 @@ tcache_available(tsd_t *tsd) {
 	if (likely(tsd_tcache_enabled_get(tsd))) {
 		/* Associated arena == NULL implies tcache init in progress. */
 		assert(tsd_tcachep_get(tsd)->arena == NULL ||
-		    tcache_small_bin_get(tsd_tcachep_get(tsd), 0)->avail !=
-		    NULL);
+		    tcache_small_bin_get(tsd_tcachep_get(tsd), 0)->cur_ptr.ptr
+		    != NULL);
 		return true;
 	}
 
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 266f246b..2060bb19 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -4,8 +4,6 @@
 extern bool	opt_tcache;
 extern ssize_t	opt_lg_tcache_max;
 
-extern cache_bin_info_t	*tcache_bin_info;
-
 /*
  * Number of tcache bins.  There are SC_NBINS small-object bins, plus 0 or more
  * large-object bins.
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 4815774b..4f7e02a8 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -48,7 +48,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
 
 	assert(binind < SC_NBINS);
 	bin = tcache_small_bin_get(tcache, binind);
-	ret = cache_bin_alloc_easy(bin, &tcache_success);
+	ret = cache_bin_alloc_easy(bin, &tcache_success, binind);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
 		bool tcache_hard_success;
@@ -109,7 +109,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 
 	assert(binind >= SC_NBINS &&binind < nhbins);
 	bin = tcache_large_bin_get(tcache, binind);
-	ret = cache_bin_alloc_easy(bin, &tcache_success);
+	ret = cache_bin_alloc_easy(bin, &tcache_success, binind);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
 		/*
@@ -164,7 +164,6 @@ JEMALLOC_ALWAYS_INLINE void
 tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
     bool slow_path) {
 	cache_bin_t *bin;
-	cache_bin_info_t *bin_info;
 
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr)
 	    <= SC_SMALL_MAXCLASS);
@@ -174,11 +173,10 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	}
 
 	bin = tcache_small_bin_get(tcache, binind);
-	bin_info = &tcache_bin_info[binind];
-	if (unlikely(!cache_bin_dalloc_easy(bin, bin_info, ptr))) {
+	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
 		tcache_bin_flush_small(tsd, tcache, bin, binind,
-		    (bin_info->ncached_max >> 1));
-		bool ret = cache_bin_dalloc_easy(bin, bin_info, ptr);
+		    tcache_bin_info[binind].ncached_max >> 1);
+		bool ret = cache_bin_dalloc_easy(bin, ptr);
 		assert(ret);
 	}
 
@@ -189,7 +187,6 @@ JEMALLOC_ALWAYS_INLINE void
 tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
     bool slow_path) {
 	cache_bin_t *bin;
-	cache_bin_info_t *bin_info;
 
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr)
 	    > SC_SMALL_MAXCLASS);
@@ -200,11 +197,10 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	}
 
 	bin = tcache_large_bin_get(tcache, binind);
-	bin_info = &tcache_bin_info[binind];
-	if (unlikely(!cache_bin_dalloc_easy(bin, bin_info, ptr))) {
+	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
 		tcache_bin_flush_large(tsd, tcache, bin, binind,
-		    (bin_info->ncached_max >> 1));
-		bool ret = cache_bin_dalloc_easy(bin, bin_info, ptr);
+		    tcache_bin_info[binind].ncached_max >> 1);
+		bool ret = cache_bin_dalloc_easy(bin, ptr);
 		assert(ret);
 	}
 
diff --git a/src/arena.c b/src/arena.c
index e956c394..23d0294b 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -202,12 +202,13 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		for (szind_t i = 0; i < SC_NBINS; i++) {
 			cache_bin_t *tbin = &descriptor->bins_small[i];
 			arena_stats_accum_zu(&astats->tcache_bytes,
-			    tbin->ncached * sz_index2size(i));
+			    cache_bin_ncached_get(tbin, i) * sz_index2size(i));
 		}
 		for (szind_t i = 0; i < nhbins - SC_NBINS; i++) {
 			cache_bin_t *tbin = &descriptor->bins_large[i];
 			arena_stats_accum_zu(&astats->tcache_bytes,
-			    tbin->ncached * sz_index2size(i));
+			    cache_bin_ncached_get(tbin, i + SC_NBINS) *
+			    sz_index2size(i));
 		}
 	}
 	malloc_mutex_prof_read(tsdn,
@@ -1381,7 +1382,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes) {
 	unsigned i, nfill, cnt;
 
-	assert(tbin->ncached == 0);
+	assert(cache_bin_ncached_get(tbin, binind) == 0);
 
 	if (config_prof && arena_prof_accum(tsdn, arena, prof_accumbytes)) {
 		prof_idump(tsdn);
@@ -1390,6 +1391,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 	unsigned binshard;
 	bin_t *bin = arena_bin_choose_lock(tsdn, arena, binind, &binshard);
 
+	void **empty_position = cache_bin_empty_position_get(tbin, binind);
 	for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >>
 	    tcache->lg_fill_div[binind]); i < nfill; i += cnt) {
 		extent_t *slab;
@@ -1400,7 +1402,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 				tofill : extent_nfree_get(slab);
 			arena_slab_reg_alloc_batch(
 			   slab, &bin_infos[binind], cnt,
-			   tbin->avail - nfill + i);
+			   empty_position - nfill + i);
 		} else {
 			cnt = 1;
 			void *ptr = arena_bin_malloc_hard(tsdn, arena, bin,
@@ -1412,18 +1414,18 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 			 */
 			if (ptr == NULL) {
 				if (i > 0) {
-					memmove(tbin->avail - i,
-						tbin->avail - nfill,
+					memmove(empty_position - i,
+						empty_position - nfill,
 						i * sizeof(void *));
 				}
 				break;
 			}
 			/* Insert such that low regions get used first. */
-			*(tbin->avail - nfill + i) = ptr;
+			*(empty_position - nfill + i) = ptr;
 		}
 		if (config_fill && unlikely(opt_junk_alloc)) {
 			for (unsigned j = 0; j < cnt; j++) {
-				void* ptr = *(tbin->avail - nfill + i + j);
+				void* ptr = *(empty_position - nfill + i + j);
 				arena_alloc_junk_small(ptr, &bin_infos[binind],
 							true);
 			}
@@ -1437,7 +1439,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 		tbin->tstats.nrequests = 0;
 	}
 	malloc_mutex_unlock(tsdn, &bin->lock);
-	tbin->ncached = i;
+	cache_bin_ncached_set(tbin, binind, i);
 	arena_decay_tick(tsdn, arena);
 }
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index dec987c5..75a40277 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2368,7 +2368,7 @@ je_malloc(size_t size) {
 
 	cache_bin_t *bin = tcache_small_bin_get(tcache, ind);
 	bool tcache_success;
-	void* ret = cache_bin_alloc_easy(bin, &tcache_success);
+	void *ret = cache_bin_alloc_easy(bin, &tcache_success, ind);
 
 	if (tcache_success) {
 		if (config_stats) {
@@ -2846,8 +2846,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 	}
 
 	cache_bin_t *bin = tcache_small_bin_get(tcache, alloc_ctx.szind);
-	cache_bin_info_t *bin_info = &tcache_bin_info[alloc_ctx.szind];
-	if (!cache_bin_dalloc_easy(bin, bin_info, ptr)) {
+	if (!cache_bin_dalloc_easy(bin, ptr)) {
 		return false;
 	}
 
diff --git a/src/tcache.c b/src/tcache.c
index c5fe67a9..d282e1fa 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -14,7 +14,16 @@ bool	opt_tcache = true;
 ssize_t	opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
 
 cache_bin_info_t	*tcache_bin_info;
-static unsigned		stack_nelms; /* Total stack elms per tcache. */
+/*
+ * For the total bin stack region (per tcache), reserve 2 more slots so that 1)
+ * the empty position can be safely read on the fast path before checking
+ * "is_empty"; and 2) the low_water == -1 case can go beyond the empty position
+ * by 1 step safely (i.e. no overflow).
+ */
+static const unsigned total_stack_padding = sizeof(void *) * 2;
+
+/* Total stack size required (per tcache).  Include the padding above. */
+static uint32_t total_stack_bytes;
 
 unsigned		nhbins;
 size_t			tcache_maxclass;
@@ -47,14 +56,16 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 	} else {
 		tbin = tcache_large_bin_get(tcache, binind);
 	}
-	if (tbin->low_water > 0) {
+
+	cache_bin_sz_t low_water = cache_bin_low_water_get(tbin, binind);
+	cache_bin_sz_t ncached = cache_bin_ncached_get(tbin, binind);
+	if (low_water > 0) {
 		/*
 		 * Flush (ceiling) 3/4 of the objects below the low water mark.
 		 */
 		if (binind < SC_NBINS) {
 			tcache_bin_flush_small(tsd, tcache, tbin, binind,
-			    tbin->ncached - tbin->low_water + (tbin->low_water
-			    >> 2));
+			    ncached - low_water + (low_water >> 2));
 			/*
 			 * Reduce fill count by 2X.  Limit lg_fill_div such that
 			 * the fill count is always at least 1.
@@ -66,10 +77,10 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 			}
 		} else {
 			tcache_bin_flush_large(tsd, tcache, tbin, binind,
-			    tbin->ncached - tbin->low_water + (tbin->low_water
-			    >> 2));
+			     ncached - low_water + (low_water >> 2));
 		}
-	} else if (tbin->low_water < 0) {
+	} else if (low_water < 0) {
+		assert(low_water == -1);
 		/*
 		 * Increase fill count by 2X for small bins.  Make sure
 		 * lg_fill_div stays greater than 0.
@@ -78,7 +89,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 			tcache->lg_fill_div[binind]--;
 		}
 	}
-	tbin->low_water = tbin->ncached;
+	tbin->low_water_position = tbin->cur_ptr.lowbits;
 
 	tcache->next_gc_bin++;
 	if (tcache->next_gc_bin == nhbins) {
@@ -97,7 +108,7 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 	if (config_prof) {
 		tcache->prof_accumbytes = 0;
 	}
-	ret = cache_bin_alloc_easy(tbin, tcache_success);
+	ret = cache_bin_alloc_easy(tbin, tcache_success, binind);
 
 	return ret;
 }
@@ -117,9 +128,10 @@ tbin_extents_lookup_size_check(tsdn_t *tsdn, cache_bin_t *tbin, szind_t binind,
 	 */
 	szind_t szind;
 	size_t sz_sum = binind * nflush;
+	void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
 	for (unsigned i = 0 ; i < nflush; i++) {
 		rtree_extent_szind_read(tsdn, &extents_rtree,
-		    rtree_ctx, (uintptr_t)*(tbin->avail - 1 - i), true,
+		    rtree_ctx, (uintptr_t)*(bottom_item - i), true,
 		    &extents[i], &szind);
 		sz_sum -= szind;
 	}
@@ -137,13 +149,15 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	bool merged_stats = false;
 
 	assert(binind < SC_NBINS);
-	assert((cache_bin_sz_t)rem <= tbin->ncached);
+	cache_bin_sz_t ncached = cache_bin_ncached_get(tbin, binind);
+	assert((cache_bin_sz_t)rem <= ncached);
 
 	arena_t *arena = tcache->arena;
 	assert(arena != NULL);
-	unsigned nflush = tbin->ncached - rem;
+	unsigned nflush = ncached - rem;
 	VARIABLE_ARRAY(extent_t *, item_extent, nflush);
 
+	void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
 	/* Look up extent once per item. */
 	if (config_opt_safety_checks) {
 		tbin_extents_lookup_size_check(tsd_tsdn(tsd), tbin, binind,
@@ -151,7 +165,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	} else {
 		for (unsigned i = 0 ; i < nflush; i++) {
 			item_extent[i] = iealloc(tsd_tsdn(tsd),
-			    *(tbin->avail - 1 - i));
+			    *(bottom_item - i));
 		}
 	}
 	while (nflush > 0) {
@@ -181,7 +195,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 		}
 		unsigned ndeferred = 0;
 		for (unsigned i = 0; i < nflush; i++) {
-			void *ptr = *(tbin->avail - 1 - i);
+			void *ptr = *(bottom_item - i);
 			extent = item_extent[i];
 			assert(ptr != NULL && extent != NULL);
 
@@ -196,7 +210,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 				 * locked.  Stash the object, so that it can be
 				 * handled in a future pass.
 				 */
-				*(tbin->avail - 1 - ndeferred) = ptr;
+				*(bottom_item - ndeferred) = ptr;
 				item_extent[ndeferred] = extent;
 				ndeferred++;
 			}
@@ -219,11 +233,11 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 	}
 
-	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
+	memmove(tbin->cur_ptr.ptr + (ncached - rem), tbin->cur_ptr.ptr, rem *
 	    sizeof(void *));
-	tbin->ncached = rem;
-	if (tbin->ncached < tbin->low_water) {
-		tbin->low_water = tbin->ncached;
+	cache_bin_ncached_set(tbin, binind, rem);
+	if (tbin->cur_ptr.lowbits > tbin->low_water_position) {
+		tbin->low_water_position = tbin->cur_ptr.lowbits;
 	}
 }
 
@@ -233,17 +247,19 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
 	bool merged_stats = false;
 
 	assert(binind < nhbins);
-	assert((cache_bin_sz_t)rem <= tbin->ncached);
+	cache_bin_sz_t ncached = cache_bin_ncached_get(tbin, binind);
+	assert((cache_bin_sz_t)rem <= ncached);
 
 	arena_t *tcache_arena = tcache->arena;
 	assert(tcache_arena != NULL);
-	unsigned nflush = tbin->ncached - rem;
+	unsigned nflush = ncached - rem;
 	VARIABLE_ARRAY(extent_t *, item_extent, nflush);
 
+	void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
 #ifndef JEMALLOC_EXTRA_SIZE_CHECK
 	/* Look up extent once per item. */
 	for (unsigned i = 0 ; i < nflush; i++) {
-		item_extent[i] = iealloc(tsd_tsdn(tsd), *(tbin->avail - 1 - i));
+		item_extent[i] = iealloc(tsd_tsdn(tsd), *(bottom_item - i));
 	}
 #else
 	tbin_extents_lookup_size_check(tsd_tsdn(tsd), tbin, binind, nflush,
@@ -266,7 +282,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
 			malloc_mutex_lock(tsd_tsdn(tsd), &locked_arena->large_mtx);
 		}
 		for (unsigned i = 0; i < nflush; i++) {
-			void *ptr = *(tbin->avail - 1 - i);
+			void *ptr = *(bottom_item - i);
 			assert(ptr != NULL);
 			extent = item_extent[i];
 			if (extent_arena_ind_get(extent) == locked_arena_ind) {
@@ -295,7 +311,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
 
 		unsigned ndeferred = 0;
 		for (unsigned i = 0; i < nflush; i++) {
-			void *ptr = *(tbin->avail - 1 - i);
+			void *ptr = *(bottom_item - i);
 			extent = item_extent[i];
 			assert(ptr != NULL && extent != NULL);
 
@@ -308,7 +324,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
 				 * Stash the object, so that it can be handled
 				 * in a future pass.
 				 */
-				*(tbin->avail - 1 - ndeferred) = ptr;
+				*(bottom_item - ndeferred) = ptr;
 				item_extent[ndeferred] = extent;
 				ndeferred++;
 			}
@@ -330,11 +346,11 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
 		tbin->tstats.nrequests = 0;
 	}
 
-	memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
+	memmove(tbin->cur_ptr.ptr + (ncached - rem), tbin->cur_ptr.ptr, rem *
 	    sizeof(void *));
-	tbin->ncached = rem;
-	if (tbin->ncached < tbin->low_water) {
-		tbin->low_water = tbin->ncached;
+	cache_bin_ncached_set(tbin, binind, rem);
+	if (tbin->cur_ptr.lowbits > tbin->low_water_position) {
+		tbin->low_water_position = tbin->cur_ptr.lowbits;
 	}
 }
 
@@ -406,9 +422,43 @@ tsd_tcache_enabled_data_init(tsd_t *tsd) {
 	return false;
 }
 
-/* Initialize auto tcache (embedded in TSD). */
+static bool
+tcache_bin_init(cache_bin_t *bin, szind_t ind, uintptr_t *stack_cur) {
+	cassert(sizeof(bin->cur_ptr) == sizeof(void *));
+	/*
+	 * The full_position points to the lowest available space.  Allocations
+	 * will access the slots toward higher addresses (for the benefit of
+	 * adjacent prefetch).
+	 */
+	void *full_position = (void *)*stack_cur;
+	uint32_t bin_stack_size = tcache_bin_info[ind].ncached_max *
+	    sizeof(void *);
+
+	*stack_cur += bin_stack_size;
+	void *empty_position = (void *)*stack_cur;
+
+	/* Init to the empty position. */
+	bin->cur_ptr.ptr = empty_position;
+	bin->low_water_position = bin->cur_ptr.lowbits;
+	bin->full_position = (uint32_t)(uintptr_t)full_position;
+	assert(bin->cur_ptr.lowbits - bin->full_position == bin_stack_size);
+	assert(cache_bin_ncached_get(bin, ind) == 0);
+	assert(cache_bin_empty_position_get(bin, ind) == empty_position);
+
+	return false;
+}
+
+/* Sanity check only. */
+static bool
+tcache_bin_lowbits_overflowable(void *ptr) {
+	uint32_t lowbits = (uint32_t)((uintptr_t)ptr + total_stack_bytes);
+	return lowbits < (uint32_t)(uintptr_t)ptr;
+}
+
 static void
 tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
+	assert(!tcache_bin_lowbits_overflowable(avail_stack));
+
 	memset(&tcache->link, 0, sizeof(ql_elm(tcache_t)));
 	tcache->prof_accumbytes = 0;
 	tcache->next_gc_bin = 0;
@@ -416,41 +466,43 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
 
 	ticker_init(&tcache->gc_ticker, TCACHE_GC_INCR);
 
-	size_t stack_offset = 0;
 	assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
 	memset(tcache->bins_small, 0, sizeof(cache_bin_t) * SC_NBINS);
 	memset(tcache->bins_large, 0, sizeof(cache_bin_t) * (nhbins - SC_NBINS));
+
 	unsigned i = 0;
+	uintptr_t stack_cur = (uintptr_t)avail_stack;
 	for (; i < SC_NBINS; i++) {
 		tcache->lg_fill_div[i] = 1;
-		stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
-		/*
-		 * avail points past the available space.  Allocations will
-		 * access the slots toward higher addresses (for the benefit of
-		 * prefetch).
-		 */
-		tcache_small_bin_get(tcache, i)->avail =
-		    (void **)((uintptr_t)avail_stack + (uintptr_t)stack_offset);
+		cache_bin_t *bin = tcache_small_bin_get(tcache, i);
+		tcache_bin_init(bin, i, &stack_cur);
 	}
 	for (; i < nhbins; i++) {
-		stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
-		tcache_large_bin_get(tcache, i)->avail =
-		    (void **)((uintptr_t)avail_stack + (uintptr_t)stack_offset);
+		cache_bin_t *bin = tcache_large_bin_get(tcache, i);
+		tcache_bin_init(bin, i, &stack_cur);
 	}
-	assert(stack_offset == stack_nelms * sizeof(void *));
+
+	/* Sanity check that the whole stack is used. */
+	size_t stack_offset = stack_cur - (uintptr_t)avail_stack;
+	assert(stack_offset + total_stack_padding == total_stack_bytes);
+}
+
+static size_t
+tcache_bin_stack_alignment (size_t size) {
+	/* Align pow2 to avoid overflow the cache bin compressed pointers. */
+	return (LG_SIZEOF_PTR == 3) ? pow2_ceil_zu(size) : CACHELINE;
 }
 
 /* Initialize auto tcache (embedded in TSD). */
 bool
 tsd_tcache_data_init(tsd_t *tsd) {
 	tcache_t *tcache = tsd_tcachep_get_unsafe(tsd);
-	assert(tcache_small_bin_get(tcache, 0)->avail == NULL);
-	size_t size = stack_nelms * sizeof(void *);
+	assert(tcache_small_bin_get(tcache, 0)->cur_ptr.ptr == NULL);
 	/* Avoid false cacheline sharing. */
-	size = sz_sa2u(size, CACHELINE);
-
-	void *avail_array = ipallocztm(tsd_tsdn(tsd), size, CACHELINE, true,
-	    NULL, true, arena_get(TSDN_NULL, 0, true));
+	size_t size = sz_sa2u(total_stack_bytes, CACHELINE);
+	void *avail_array = ipallocztm(tsd_tsdn(tsd), size,
+	    tcache_bin_stack_alignment(size), true, NULL, true,
+	    arena_get(TSDN_NULL, 0, true));
 	if (avail_array == NULL) {
 		return true;
 	}
@@ -485,25 +537,24 @@ tsd_tcache_data_init(tsd_t *tsd) {
 /* Created manual tcache for tcache.create mallctl. */
 tcache_t *
 tcache_create_explicit(tsd_t *tsd) {
-	tcache_t *tcache;
-	size_t size, stack_offset;
-
-	size = sizeof(tcache_t);
+	size_t size = sizeof(tcache_t);
 	/* Naturally align the pointer stacks. */
 	size = PTR_CEILING(size);
-	stack_offset = size;
-	size += stack_nelms * sizeof(void *);
+	size_t stack_offset = size;
+	size += total_stack_bytes;
 	/* Avoid false cacheline sharing. */
 	size = sz_sa2u(size, CACHELINE);
 
-	tcache = ipallocztm(tsd_tsdn(tsd), size, CACHELINE, true, NULL, true,
+	tcache_t *tcache = ipallocztm(tsd_tsdn(tsd), size,
+	    tcache_bin_stack_alignment(size), true, NULL, true,
 	    arena_get(TSDN_NULL, 0, true));
 	if (tcache == NULL) {
 		return NULL;
 	}
 
-	tcache_init(tsd, tcache,
-	    (void *)((uintptr_t)tcache + (uintptr_t)stack_offset));
+	void *avail_array = (void *)((uintptr_t)tcache +
+	    (uintptr_t)stack_offset);
+	tcache_init(tsd, tcache, avail_array);
 	tcache_arena_associate(tsd_tsdn(tsd), tcache, arena_ichoose(tsd, NULL));
 
 	return tcache;
@@ -553,9 +604,12 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
 
 	if (tsd_tcache) {
 		/* Release the avail array for the TSD embedded auto tcache. */
-		void *avail_array =
-		    (void *)((uintptr_t)tcache_small_bin_get(tcache, 0)->avail -
-		    (uintptr_t)tcache_bin_info[0].ncached_max * sizeof(void *));
+		cache_bin_t *bin = tcache_small_bin_get(tcache, 0);
+		assert(cache_bin_ncached_get(bin, 0) == 0);
+		assert(cache_bin_empty_position_get(bin, 0) ==
+		    bin->cur_ptr.ptr);
+		void *avail_array = bin->cur_ptr.ptr -
+		    tcache_bin_info[0].ncached_max;
 		idalloctm(tsd_tsdn(tsd), avail_array, NULL, NULL, true, true);
 	} else {
 		/* Release both the tcache struct and avail array. */
@@ -587,16 +641,17 @@ tcache_cleanup(tsd_t *tsd) {
 	if (!tcache_available(tsd)) {
 		assert(tsd_tcache_enabled_get(tsd) == false);
 		if (config_debug) {
-			assert(tcache_small_bin_get(tcache, 0)->avail == NULL);
+			assert(tcache_small_bin_get(tcache, 0)->cur_ptr.ptr
+			    == NULL);
 		}
 		return;
 	}
 	assert(tsd_tcache_enabled_get(tsd));
-	assert(tcache_small_bin_get(tcache, 0)->avail != NULL);
+	assert(tcache_small_bin_get(tcache, 0)->cur_ptr.ptr != NULL);
 
 	tcache_destroy(tsd, tcache, true);
 	if (config_debug) {
-		tcache_small_bin_get(tcache, 0)->avail = NULL;
+		tcache_small_bin_get(tcache, 0)->cur_ptr.ptr = NULL;
 	}
 }
 
@@ -755,8 +810,8 @@ tcache_boot(tsdn_t *tsdn) {
 	if (tcache_bin_info == NULL) {
 		return true;
 	}
+	unsigned i, stack_nelms;
 	stack_nelms = 0;
-	unsigned i;
 	for (i = 0; i < SC_NBINS; i++) {
 		if ((bin_infos[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) {
 			tcache_bin_info[i].ncached_max =
@@ -775,6 +830,7 @@ tcache_boot(tsdn_t *tsdn) {
 		tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE;
 		stack_nelms += tcache_bin_info[i].ncached_max;
 	}
+	total_stack_bytes = stack_nelms * sizeof(void *) + total_stack_padding;
 
 	return false;
 }
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
new file mode 100644
index 00000000..74cf24cb
--- /dev/null
+++ b/test/unit/cache_bin.c
@@ -0,0 +1,64 @@
+#include "test/jemalloc_test.h"
+
+cache_bin_t test_bin;
+
+TEST_BEGIN(test_cache_bin) {
+	cache_bin_t *bin = &test_bin;
+	cassert(PAGE > TCACHE_NSLOTS_SMALL_MAX * sizeof(void *));
+	/* Page aligned to make sure lowbits not overflowable. */
+	void **stack = mallocx(PAGE, MALLOCX_TCACHE_NONE | MALLOCX_ALIGN(PAGE));
+
+	assert_ptr_not_null(stack, "Unexpected mallocx failure");
+	/* Initialize to empty; bin 0. */
+	cache_bin_sz_t ncached_max = tcache_bin_info[0].ncached_max;
+	void **empty_position = stack + ncached_max;
+	bin->cur_ptr.ptr = empty_position;
+	bin->low_water_position = bin->cur_ptr.lowbits;
+	bin->full_position = (uint32_t)(uintptr_t)stack;
+	assert_ptr_eq(cache_bin_empty_position_get(bin, 0), empty_position,
+	    "Incorrect empty position");
+	/* Not using assert_zu etc on cache_bin_sz_t since it may change. */
+	assert_true(cache_bin_ncached_get(bin, 0) == 0, "Incorrect cache size");
+
+	bool success;
+	void *ret = cache_bin_alloc_easy(bin, &success, 0);
+	assert_false(success, "Empty cache bin should not alloc");
+	assert_true(cache_bin_low_water_get(bin, 0) == - 1,
+	    "Incorrect low water mark");
+
+	cache_bin_ncached_set(bin, 0, 0);
+	assert_ptr_eq(bin->cur_ptr.ptr, empty_position, "Bin should be empty");
+	for (cache_bin_sz_t i = 1; i < ncached_max + 1; i++) {
+		success = cache_bin_dalloc_easy(bin, (void *)(uintptr_t)i);
+		assert_true(success && cache_bin_ncached_get(bin, 0) == i,
+		    "Bin dalloc failure");
+	}
+	success = cache_bin_dalloc_easy(bin, (void *)1);
+	assert_false(success, "Bin should be full");
+	assert_ptr_eq(bin->cur_ptr.ptr, stack, "Incorrect bin cur_ptr");
+
+	cache_bin_ncached_set(bin, 0, ncached_max);
+	assert_ptr_eq(bin->cur_ptr.ptr, stack, "cur_ptr should not change");
+	/* Emulate low water after refill. */
+	bin->low_water_position = bin->full_position;
+	for (cache_bin_sz_t i = ncached_max; i > 0; i--) {
+		ret = cache_bin_alloc_easy(bin, &success, 0);
+		cache_bin_sz_t ncached = cache_bin_ncached_get(bin, 0);
+		assert_true(success && ncached == i - 1,
+		    "Cache bin alloc failure");
+		assert_ptr_eq(ret, (void *)(uintptr_t)i, "Bin alloc failure");
+		assert_true(cache_bin_low_water_get(bin, 0) == ncached,
+		    "Incorrect low water mark");
+	}
+
+	ret = cache_bin_alloc_easy(bin, &success, 0);
+	assert_false(success, "Empty cache bin should not alloc.");
+	assert_ptr_eq(bin->cur_ptr.ptr, stack + ncached_max,
+	    "Bin should be empty");
+}
+TEST_END
+
+int
+main(void) {
+	return test(test_cache_bin);
+}

From 937ca1db9fa1f3c5c54e189049e181b6de5e7133 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 14 Aug 2019 13:08:06 -0700
Subject: [PATCH 1352/2608] Store ncached_max * ptr_size in tcache_bin_info.

With the cache bin metadata switched to pointers, ncached_max is usually
accessed and timed by sizeof(ptr). Store the results in tcache_bin_info for
direct access, and add a helper function for the ncached_max value.
---
 include/jemalloc/internal/cache_bin.h      | 40 +++++++++++++---------
 include/jemalloc/internal/tcache_inlines.h |  8 ++---
 src/arena.c                                |  2 +-
 src/tcache.c                               | 34 +++++++++---------
 test/unit/cache_bin.c                      |  2 +-
 5 files changed, 45 insertions(+), 41 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 67180cfa..775eb3fa 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -35,8 +35,8 @@ struct cache_bin_stats_s {
  */
 typedef struct cache_bin_info_s cache_bin_info_t;
 struct cache_bin_info_s {
-	/* Upper limit on ncached. */
-	cache_bin_sz_t ncached_max;
+	/* The size of the bin stack, i.e. ncached_max * sizeof(ptr). */
+	cache_bin_sz_t stack_size;
 };
 extern cache_bin_info_t	*tcache_bin_info;
 
@@ -117,11 +117,18 @@ struct cache_bin_array_descriptor_s {
  * None of the cache_bin_*_get / _set functions is used on the fast path, which
  * relies on pointer comparisons to determine if the cache is full / empty.
  */
+
+/* Returns ncached_max: Upper limit on ncached. */
+static inline cache_bin_sz_t
+cache_bin_ncached_max_get(szind_t ind) {
+	return tcache_bin_info[ind].stack_size / sizeof(void *);
+}
+
 static inline cache_bin_sz_t
 cache_bin_ncached_get(cache_bin_t *bin, szind_t ind) {
-	cache_bin_sz_t n = tcache_bin_info[ind].ncached_max -
-	    (bin->cur_ptr.lowbits - bin->full_position) / sizeof(void *);
-	assert(n >= 0 && n <= tcache_bin_info[ind].ncached_max);
+	cache_bin_sz_t n = (tcache_bin_info[ind].stack_size +
+	    bin->full_position - bin->cur_ptr.lowbits) / sizeof(void *);
+	assert(n >= 0 && n <= cache_bin_ncached_max_get(ind));
 	assert(n == 0 || *(bin->cur_ptr.ptr) != NULL);
 
 	return n;
@@ -132,14 +139,13 @@ cache_bin_empty_position_get(cache_bin_t *bin, szind_t ind) {
 	void **ret = bin->cur_ptr.ptr + cache_bin_ncached_get(bin, ind);
 	/* Low bits overflow disallowed when allocating the space. */
 	assert((uint32_t)(uintptr_t)ret >= bin->cur_ptr.lowbits);
-	assert(bin->full_position + tcache_bin_info[ind].ncached_max *
-	    sizeof(void *) > bin->full_position);
 
 	/* Can also be computed via (full_position + ncached_max) | highbits. */
-	assert(ret == (void **)((uintptr_t)(bin->full_position +
-	    tcache_bin_info[ind].ncached_max * sizeof(void *)) |
-	    (uintptr_t)((uintptr_t)bin->cur_ptr.ptr &
-	    ~(((uint64_t)1 << 32) - 1))));
+	uintptr_t lowbits = bin->full_position +
+	    tcache_bin_info[ind].stack_size;
+	uintptr_t highbits = (uintptr_t)bin->cur_ptr.ptr &
+	    ~(((uint64_t)1 << 32) - 1);
+	assert(ret == (void **)(lowbits | highbits));
 
 	return ret;
 }
@@ -156,10 +162,10 @@ cache_bin_bottom_item_get(cache_bin_t *bin, szind_t ind) {
 /* Returns the numeric value of low water in [-1, ncached]. */
 static inline cache_bin_sz_t
 cache_bin_low_water_get(cache_bin_t *bin, szind_t ind) {
-	cache_bin_sz_t low_water = tcache_bin_info[ind].ncached_max -
+	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get(ind);
+	cache_bin_sz_t low_water = ncached_max -
 	    (bin->low_water_position - bin->full_position) / sizeof(void *);
-	assert(low_water >= -1 && low_water <=
-	    tcache_bin_info[ind].ncached_max);
+	assert(low_water >= -1 && low_water <= ncached_max);
 	assert(low_water <= cache_bin_ncached_get(bin, ind));
 	assert(bin->low_water_position >= bin->cur_ptr.lowbits);
 
@@ -169,8 +175,8 @@ cache_bin_low_water_get(cache_bin_t *bin, szind_t ind) {
 static inline void
 cache_bin_ncached_set(cache_bin_t *bin, szind_t ind, cache_bin_sz_t n) {
 	bin->cur_ptr.lowbits = bin->full_position +
-	    (tcache_bin_info[ind].ncached_max - n) * sizeof(void *);
-	assert(n >= 0 && n <= tcache_bin_info[ind].ncached_max);
+	    tcache_bin_info[ind].stack_size - n * sizeof(void *);
+	assert(n >= 0 && n <= cache_bin_ncached_max_get(ind));
 	assert(n == 0 || *bin->cur_ptr.ptr != NULL);
 }
 
@@ -197,7 +203,7 @@ cache_bin_alloc_easy(cache_bin_t *bin, bool *success, cache_bin_sz_t ind) {
 	if (unlikely(bin->cur_ptr.lowbits >= bin->low_water_position)) {
 		bin->low_water_position = bin->cur_ptr.lowbits;
 		uint32_t empty_position = bin->full_position +
-		    tcache_bin_info[ind].ncached_max * sizeof(void *);
+		    tcache_bin_info[ind].stack_size;
 		if (bin->cur_ptr.lowbits > empty_position) {
 			bin->cur_ptr.ptr--;
 			assert(bin->cur_ptr.lowbits == empty_position);
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 4f7e02a8..8988ae9d 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -174,8 +174,8 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 
 	bin = tcache_small_bin_get(tcache, binind);
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
-		tcache_bin_flush_small(tsd, tcache, bin, binind,
-		    tcache_bin_info[binind].ncached_max >> 1);
+		unsigned remain = cache_bin_ncached_max_get(binind) >> 1;
+		tcache_bin_flush_small(tsd, tcache, bin, binind, remain);
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
 		assert(ret);
 	}
@@ -198,8 +198,8 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 
 	bin = tcache_large_bin_get(tcache, binind);
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
-		tcache_bin_flush_large(tsd, tcache, bin, binind,
-		    tcache_bin_info[binind].ncached_max >> 1);
+		unsigned remain = cache_bin_ncached_max_get(binind) >> 1;
+		tcache_bin_flush_large(tsd, tcache, bin, binind, remain);
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
 		assert(ret);
 	}
diff --git a/src/arena.c b/src/arena.c
index 23d0294b..b383befe 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1392,7 +1392,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 	bin_t *bin = arena_bin_choose_lock(tsdn, arena, binind, &binshard);
 
 	void **empty_position = cache_bin_empty_position_get(tbin, binind);
-	for (i = 0, nfill = (tcache_bin_info[binind].ncached_max >>
+	for (i = 0, nfill = (cache_bin_ncached_max_get(binind) >>
 	    tcache->lg_fill_div[binind]); i < nfill; i += cnt) {
 		extent_t *slab;
 		if ((slab = bin->slabcur) != NULL && extent_nfree_get(slab) >
diff --git a/src/tcache.c b/src/tcache.c
index d282e1fa..2594a029 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -70,8 +70,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 			 * Reduce fill count by 2X.  Limit lg_fill_div such that
 			 * the fill count is always at least 1.
 			 */
-			cache_bin_info_t *tbin_info = &tcache_bin_info[binind];
-			if ((tbin_info->ncached_max >>
+			if ((cache_bin_ncached_max_get(binind) >>
 			     (tcache->lg_fill_div[binind] + 1)) >= 1) {
 				tcache->lg_fill_div[binind]++;
 			}
@@ -431,8 +430,7 @@ tcache_bin_init(cache_bin_t *bin, szind_t ind, uintptr_t *stack_cur) {
 	 * adjacent prefetch).
 	 */
 	void *full_position = (void *)*stack_cur;
-	uint32_t bin_stack_size = tcache_bin_info[ind].ncached_max *
-	    sizeof(void *);
+	uint32_t bin_stack_size = tcache_bin_info[ind].stack_size;
 
 	*stack_cur += bin_stack_size;
 	void *empty_position = (void *)*stack_cur;
@@ -608,8 +606,8 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
 		assert(cache_bin_ncached_get(bin, 0) == 0);
 		assert(cache_bin_empty_position_get(bin, 0) ==
 		    bin->cur_ptr.ptr);
-		void *avail_array = bin->cur_ptr.ptr -
-		    tcache_bin_info[0].ncached_max;
+		void *avail_array = (void *)((uintptr_t)bin->cur_ptr.ptr -
+		    tcache_bin_info[0].stack_size);
 		idalloctm(tsd_tsdn(tsd), avail_array, NULL, NULL, true, true);
 	} else {
 		/* Release both the tcache struct and avail array. */
@@ -810,27 +808,27 @@ tcache_boot(tsdn_t *tsdn) {
 	if (tcache_bin_info == NULL) {
 		return true;
 	}
-	unsigned i, stack_nelms;
-	stack_nelms = 0;
+	unsigned i, ncached_max;
+	total_stack_bytes = 0;
 	for (i = 0; i < SC_NBINS; i++) {
 		if ((bin_infos[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) {
-			tcache_bin_info[i].ncached_max =
-			    TCACHE_NSLOTS_SMALL_MIN;
+			ncached_max = TCACHE_NSLOTS_SMALL_MIN;
 		} else if ((bin_infos[i].nregs << 1) <=
 		    TCACHE_NSLOTS_SMALL_MAX) {
-			tcache_bin_info[i].ncached_max =
-			    (bin_infos[i].nregs << 1);
+			ncached_max = bin_infos[i].nregs << 1;
 		} else {
-			tcache_bin_info[i].ncached_max =
-			    TCACHE_NSLOTS_SMALL_MAX;
+			ncached_max = TCACHE_NSLOTS_SMALL_MAX;
 		}
-		stack_nelms += tcache_bin_info[i].ncached_max;
+		unsigned stack_size = ncached_max * sizeof(void *);
+		tcache_bin_info[i].stack_size = stack_size;
+		total_stack_bytes += stack_size;
 	}
 	for (; i < nhbins; i++) {
-		tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE;
-		stack_nelms += tcache_bin_info[i].ncached_max;
+		unsigned stack_size = TCACHE_NSLOTS_LARGE * sizeof(void *);
+		tcache_bin_info[i].stack_size = stack_size;
+		total_stack_bytes += stack_size;
 	}
-	total_stack_bytes = stack_nelms * sizeof(void *) + total_stack_padding;
+	total_stack_bytes += total_stack_padding;
 
 	return false;
 }
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index 74cf24cb..d8900417 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -10,7 +10,7 @@ TEST_BEGIN(test_cache_bin) {
 
 	assert_ptr_not_null(stack, "Unexpected mallocx failure");
 	/* Initialize to empty; bin 0. */
-	cache_bin_sz_t ncached_max = tcache_bin_info[0].ncached_max;
+	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get(0);
 	void **empty_position = stack + ncached_max;
 	bin->cur_ptr.ptr = empty_position;
 	bin->low_water_position = bin->cur_ptr.lowbits;

From 0043e68d4c54a305d84ead95cae27a730540451b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 20 Aug 2019 18:14:18 -0700
Subject: [PATCH 1353/2608] Track low_water == -1 case explicitly.

The -1 value of low_water indicates if the cache has been depleted and
refilled.  Track the status explicitly in the tcache struct.

This allows the fast path to check if (cur_ptr > low_water), instead of >=,
which avoids reaching slow path when the last item is allocated.
---
 include/jemalloc/internal/cache_bin.h      | 15 +++++++--------
 include/jemalloc/internal/tcache_structs.h |  2 ++
 src/arena.c                                |  2 +-
 src/tcache.c                               | 19 ++++++++++++-------
 test/unit/cache_bin.c                      |  2 +-
 5 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 775eb3fa..7ec1ccbf 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -56,7 +56,7 @@ struct cache_bin_s {
 	 * 2) full points to the top of the stack (i.e. ncached == ncached_max),
 	 * which is compared against on free_fastpath to check "is_full".
 	 * 3) low_water indicates a low water mark of ncached.
-	 * Range of low_water is [cur, empty + 1], i.e. values of [ncached, -1].
+	 * Range of low_water is [cur, empty], i.e. values of [ncached, 0].
 	 *
 	 * The empty position (ncached == 0) is derived via full + ncached_max
 	 * and not accessed in the common case (guarded behind low_water).
@@ -87,9 +87,7 @@ struct cache_bin_s {
 	cache_bin_stats_t tstats;
 	/*
 	 * Points to the first item that hasn't been used since last GC, to
-	 * track the low water mark (min # of cached).  It may point to
-	 * empty_position + 1, which indicates the cache has been depleted and
-	 * refilled (low_water == -1).
+	 * track the low water mark (min # of cached).
 	 */
 	uint32_t low_water_position;
 	/*
@@ -165,7 +163,7 @@ cache_bin_low_water_get(cache_bin_t *bin, szind_t ind) {
 	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get(ind);
 	cache_bin_sz_t low_water = ncached_max -
 	    (bin->low_water_position - bin->full_position) / sizeof(void *);
-	assert(low_water >= -1 && low_water <= ncached_max);
+	assert(low_water >= 0 && low_water <= ncached_max);
 	assert(low_water <= cache_bin_ncached_get(bin, ind));
 	assert(bin->low_water_position >= bin->cur_ptr.lowbits);
 
@@ -200,16 +198,17 @@ cache_bin_alloc_easy(cache_bin_t *bin, bool *success, cache_bin_sz_t ind) {
 	 * branch.  This also avoids accessing tcache_bin_info (which is on a
 	 * separate cacheline / page) in the common case.
 	 */
-	if (unlikely(bin->cur_ptr.lowbits >= bin->low_water_position)) {
-		bin->low_water_position = bin->cur_ptr.lowbits;
+	if (unlikely(bin->cur_ptr.lowbits > bin->low_water_position)) {
 		uint32_t empty_position = bin->full_position +
 		    tcache_bin_info[ind].stack_size;
-		if (bin->cur_ptr.lowbits > empty_position) {
+		if (unlikely(bin->cur_ptr.lowbits > empty_position)) {
+			/* Over-allocated; revert. */
 			bin->cur_ptr.ptr--;
 			assert(bin->cur_ptr.lowbits == empty_position);
 			*success = false;
 			return NULL;
 		}
+		bin->low_water_position = bin->cur_ptr.lowbits;
 	}
 
 	/*
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index 172ef904..008b1f73 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -51,6 +51,8 @@ struct tcache_s {
 	szind_t		next_gc_bin;
 	/* For small bins, fill (ncached_max >> lg_fill_div). */
 	uint8_t		lg_fill_div[SC_NBINS];
+	/* For small bins, whether has been refilled since last GC. */
+	bool		bin_refilled[SC_NBINS];
 	/*
 	 * We put the cache bins for large size classes at the end of the
 	 * struct, since some of them might not get used.  This might end up
diff --git a/src/arena.c b/src/arena.c
index b383befe..aa707f43 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1383,10 +1383,10 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 	unsigned i, nfill, cnt;
 
 	assert(cache_bin_ncached_get(tbin, binind) == 0);
-
 	if (config_prof && arena_prof_accum(tsdn, arena, prof_accumbytes)) {
 		prof_idump(tsdn);
 	}
+	tcache->bin_refilled[binind] = true;
 
 	unsigned binshard;
 	bin_t *bin = arena_bin_choose_lock(tsdn, arena, binind, &binshard);
diff --git a/src/tcache.c b/src/tcache.c
index 2594a029..8f89c55f 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -17,8 +17,8 @@ cache_bin_info_t	*tcache_bin_info;
 /*
  * For the total bin stack region (per tcache), reserve 2 more slots so that 1)
  * the empty position can be safely read on the fast path before checking
- * "is_empty"; and 2) the low_water == -1 case can go beyond the empty position
- * by 1 step safely (i.e. no overflow).
+ * "is_empty"; and 2) the cur_ptr can go beyond the empty position by 1 step
+ * safely on the fast path (i.e. no overflow).
  */
 static const unsigned total_stack_padding = sizeof(void *) * 2;
 
@@ -49,12 +49,14 @@ tcache_salloc(tsdn_t *tsdn, const void *ptr) {
 void
 tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 	szind_t binind = tcache->next_gc_bin;
-
 	cache_bin_t *tbin;
+	bool is_small;
 	if (binind < SC_NBINS) {
 		tbin = tcache_small_bin_get(tcache, binind);
+		is_small = true;
 	} else {
 		tbin = tcache_large_bin_get(tcache, binind);
+		is_small = false;
 	}
 
 	cache_bin_sz_t low_water = cache_bin_low_water_get(tbin, binind);
@@ -63,7 +65,8 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 		/*
 		 * Flush (ceiling) 3/4 of the objects below the low water mark.
 		 */
-		if (binind < SC_NBINS) {
+		if (is_small) {
+			assert(!tcache->bin_refilled[binind]);
 			tcache_bin_flush_small(tsd, tcache, tbin, binind,
 			    ncached - low_water + (low_water >> 2));
 			/*
@@ -78,15 +81,16 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 			tcache_bin_flush_large(tsd, tcache, tbin, binind,
 			     ncached - low_water + (low_water >> 2));
 		}
-	} else if (low_water < 0) {
-		assert(low_water == -1);
+	} else if (is_small && tcache->bin_refilled[binind]) {
+		assert(low_water == 0);
 		/*
 		 * Increase fill count by 2X for small bins.  Make sure
 		 * lg_fill_div stays greater than 0.
 		 */
-		if (binind < SC_NBINS && tcache->lg_fill_div[binind] > 1) {
+		if (tcache->lg_fill_div[binind] > 1) {
 			tcache->lg_fill_div[binind]--;
 		}
+		tcache->bin_refilled[binind] = false;
 	}
 	tbin->low_water_position = tbin->cur_ptr.lowbits;
 
@@ -472,6 +476,7 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
 	uintptr_t stack_cur = (uintptr_t)avail_stack;
 	for (; i < SC_NBINS; i++) {
 		tcache->lg_fill_div[i] = 1;
+		tcache->bin_refilled[i] = false;
 		cache_bin_t *bin = tcache_small_bin_get(tcache, i);
 		tcache_bin_init(bin, i, &stack_cur);
 	}
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index d8900417..f469b8da 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -23,7 +23,7 @@ TEST_BEGIN(test_cache_bin) {
 	bool success;
 	void *ret = cache_bin_alloc_easy(bin, &success, 0);
 	assert_false(success, "Empty cache bin should not alloc");
-	assert_true(cache_bin_low_water_get(bin, 0) == - 1,
+	assert_true(cache_bin_low_water_get(bin, 0) == 0,
 	    "Incorrect low water mark");
 
 	cache_bin_ncached_set(bin, 0, 0);

From 9e031c1d1128af879589f5e5c37960edd87238c6 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 21 Aug 2019 16:38:44 -0700
Subject: [PATCH 1354/2608] Bug fix for prof_active switch

The bug is subtle but critical: if application performs the following
three actions in sequence: (a) turn `prof_active` off, (b) make at
least one allocation that triggers the malloc slow path via the
`if (unlikely(bytes_until_sample < 0))` path, and (c) turn
`prof_active` back on, then the application would never get another
sample (until a very very long time later).

The fix is to properly reset `bytes_until_sample` rather than
throwing it all the way to `SSIZE_MAX`.

A side minor change is to call `prof_active_get_unlocked()` rather
than directly grabbing the `prof_active` variable - it is the very
reason why we defined the `prof_active_get_unlocked()` function.
---
 src/jemalloc.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 75a40277..dd206884 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2356,13 +2356,15 @@ je_malloc(size_t size) {
 			/*
 			 * Avoid a prof_active check on the fastpath.
 			 * If prof_active is false, set bytes_until_sample to
-			 * a large value.  If prof_active is set to true,
+			 * sampling interval.  If prof_active is set to true,
 			 * bytes_until_sample will be reset.
 			 */
-			if (!prof_active) {
-				tsd_bytes_until_sample_set(tsd, SSIZE_MAX);
+			if (!prof_active_get_unlocked()) {
+				tsd_bytes_until_sample_set(tsd,
+				    ((uint64_t)1U << lg_prof_sample));
+			} else {
+				return malloc_default(size);
 			}
-			return malloc_default(size);
 		}
 	}
 

From 57b81c078e24cf05025f51dddc7c1b9353999390 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 12 Aug 2019 11:03:36 -0700
Subject: [PATCH 1355/2608] Pull thread_(de)allocated out of config_stats

---
 include/jemalloc/internal/tsd.h |  4 ++--
 src/ctl.c                       | 31 ++++---------------------------
 2 files changed, 6 insertions(+), 29 deletions(-)

diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 18b2476b..e2cc7747 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -18,8 +18,8 @@
  * --- data accessed on tcache fast path: state, rtree_ctx, stats, prof ---
  * s: state
  * e: tcache_enabled
- * m: thread_allocated (config_stats)
- * f: thread_deallocated (config_stats)
+ * m: thread_allocated
+ * f: thread_deallocated
  * b: bytes_until_sample (config_prof)
  * p: prof_tdata (config_prof)
  * c: rtree_ctx (rtree cache accessed on deallocation)
diff --git a/src/ctl.c b/src/ctl.c
index a89a7096..4bc09a34 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1577,25 +1577,6 @@ label_return:								\
 	return ret;							\
 }
 
-#define CTL_TSD_RO_NL_CGEN(c, n, m, t)					\
-static int								\
-n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
-    size_t *oldlenp, void *newp, size_t newlen) {			\
-	int ret;							\
-	t oldval;							\
-									\
-	if (!(c)) {							\
-		return ENOENT;						\
-	}								\
-	READONLY();							\
-	oldval = (m(tsd));						\
-	READ(oldval, t);						\
-									\
-	ret = 0;							\
-label_return:								\
-	return ret;							\
-}
-
 #define CTL_RO_CONFIG_GEN(n, t)						\
 static int								\
 n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, \
@@ -1859,14 +1840,10 @@ label_return:
 	return ret;
 }
 
-CTL_TSD_RO_NL_CGEN(config_stats, thread_allocated, tsd_thread_allocated_get,
-    uint64_t)
-CTL_TSD_RO_NL_CGEN(config_stats, thread_allocatedp, tsd_thread_allocatedp_get,
-    uint64_t *)
-CTL_TSD_RO_NL_CGEN(config_stats, thread_deallocated, tsd_thread_deallocated_get,
-    uint64_t)
-CTL_TSD_RO_NL_CGEN(config_stats, thread_deallocatedp,
-    tsd_thread_deallocatedp_get, uint64_t *)
+CTL_RO_NL_GEN(thread_allocated, tsd_thread_allocated_get(tsd), uint64_t)
+CTL_RO_NL_GEN(thread_allocatedp, tsd_thread_allocatedp_get(tsd), uint64_t *)
+CTL_RO_NL_GEN(thread_deallocated, tsd_thread_deallocated_get(tsd), uint64_t)
+CTL_RO_NL_GEN(thread_deallocatedp, tsd_thread_deallocatedp_get(tsd), uint64_t *)
 
 static int
 thread_tcache_enabled_ctl(tsd_t *tsd, const size_t *mib,

From 49e6fbce78ee2541e41f9d587ae5f31110433ce7 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 22 Aug 2019 15:56:47 -0700
Subject: [PATCH 1356/2608] Always adjust thread_(de)allocated

---
 src/jemalloc.c | 119 ++++++++++++++++++-------------------------------
 1 file changed, 43 insertions(+), 76 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index dd206884..3961984b 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2031,16 +2031,14 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 	/* Filled in by compute_size_with_overflow below. */
 	size_t size = 0;
 	/*
-	 * For unaligned allocations, we need only ind.  For aligned
-	 * allocations, or in case of stats or profiling we need usize.
-	 *
-	 * These are actually dead stores, in that their values are reset before
-	 * any branch on their value is taken.  Sometimes though, it's
-	 * convenient to pass them as arguments before this point.  To avoid
-	 * undefined behavior then, we initialize them with dummy stores.
+	 * The zero initialization for ind is actually dead store, in that its
+	 * value is reset before any branch on its value is taken.  Sometimes
+	 * though, it's convenient to pass it as arguments before this point.
+	 * To avoid undefined behavior then, we initialize it with dummy stores.
 	 */
 	szind_t ind = 0;
-	size_t usize = 0;
+	/* usize will always be properly initialized. */
+	size_t usize;
 
 	/* Reentrancy is only checked on slow path. */
 	int8_t reentrancy_level;
@@ -2063,12 +2061,9 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		if (unlikely(ind >= SC_NSIZES)) {
 			goto label_oom;
 		}
-		if (config_stats || (config_prof && opt_prof) || sopts->usize) {
-			usize = sz_index2size(ind);
-			dopts->usize = usize;
-			assert(usize > 0 && usize
-			    <= SC_LARGE_MAXCLASS);
-		}
+		usize = sz_index2size(ind);
+		assert(usize > 0 && usize <= SC_LARGE_MAXCLASS);
+		dopts->usize = usize;
 	} else {
 		if (sopts->bump_empty_aligned_alloc) {
 			if (unlikely(size == 0)) {
@@ -2077,8 +2072,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		}
 		usize = sz_sa2u(size, dopts->alignment);
 		dopts->usize = usize;
-		if (unlikely(usize == 0
-		    || usize > SC_LARGE_MAXCLASS)) {
+		if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
 			goto label_oom;
 		}
 	}
@@ -2107,26 +2101,23 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		dopts->arena_ind = 0;
 	}
 
+	/*
+	 * If dopts->alignment > 0, then ind is still 0, but usize was computed
+	 * in the previous if statement.  Down the positive alignment path,
+	 * imalloc_no_sample and imalloc_sample will ignore ind.
+	 */
+
 	/* If profiling is on, get our profiling context. */
 	if (config_prof && opt_prof) {
-		/*
-		 * Note that if we're going down this path, usize must have been
-		 * initialized in the previous if statement.
-		 */
 		prof_tctx_t *tctx = prof_alloc_prep(
 		    tsd, usize, prof_active_get_unlocked(), true);
 
 		alloc_ctx_t alloc_ctx;
 		if (likely((uintptr_t)tctx == (uintptr_t)1U)) {
-			alloc_ctx.slab = (usize
-			    <= SC_SMALL_MAXCLASS);
+			alloc_ctx.slab = (usize <= SC_SMALL_MAXCLASS);
 			allocation = imalloc_no_sample(
 			    sopts, dopts, tsd, usize, usize, ind);
 		} else if ((uintptr_t)tctx > (uintptr_t)1U) {
-			/*
-			 * Note that ind might still be 0 here.  This is fine;
-			 * imalloc_sample ignores ind if dopts->alignment > 0.
-			 */
 			allocation = imalloc_sample(
 			    sopts, dopts, tsd, usize, ind);
 			alloc_ctx.slab = false;
@@ -2140,12 +2131,6 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		}
 		prof_malloc(tsd_tsdn(tsd), allocation, usize, &alloc_ctx, tctx);
 	} else {
-		/*
-		 * If dopts->alignment > 0, then ind is still 0, but usize was
-		 * computed in the previous if statement.  Down the positive
-		 * alignment path, imalloc_no_sample ignores ind and size
-		 * (relying only on usize).
-		 */
 		allocation = imalloc_no_sample(sopts, dopts, tsd, size, usize,
 		    ind);
 		if (unlikely(allocation == NULL)) {
@@ -2160,10 +2145,8 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 	assert(dopts->alignment == 0
 	    || ((uintptr_t)allocation & (dopts->alignment - 1)) == ZU(0));
 
-	if (config_stats) {
-		assert(usize == isalloc(tsd_tsdn(tsd), allocation));
-		*tsd_thread_allocatedp_get(tsd) += usize;
-	}
+	assert(usize == isalloc(tsd_tsdn(tsd), allocation));
+	*tsd_thread_allocatedp_get(tsd) += usize;
 
 	if (sopts->slow) {
 		UTRACE(0, size, allocation);
@@ -2339,11 +2322,12 @@ je_malloc(size_t size) {
 	}
 
 	szind_t ind = sz_size2index_lookup(size);
-	size_t usize;
-	if (config_stats || config_prof) {
-		usize = sz_index2size(ind);
-	}
-	/* Fast path relies on size being a bin. I.e. SC_LOOKUP_MAXCLASS < SC_SMALL_MAXCLASS */
+	/* usize is always needed to increment thread_allocated. */
+	size_t usize = sz_index2size(ind);
+	/*
+	 * Fast path relies on size being a bin.
+	 * I.e. SC_LOOKUP_MAXCLASS < SC_SMALL_MAXCLASS
+	 */
 	assert(ind < SC_NBINS);
 	assert(size <= SC_SMALL_MAXCLASS);
 
@@ -2373,8 +2357,8 @@ je_malloc(size_t size) {
 	void *ret = cache_bin_alloc_easy(bin, &tcache_success, ind);
 
 	if (tcache_success) {
+		*tsd_thread_allocatedp_get(tsd) += usize;
 		if (config_stats) {
-			*tsd_thread_allocatedp_get(tsd) += usize;
 			bin->tstats.nrequests++;
 		}
 		if (config_prof) {
@@ -2573,16 +2557,11 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 	    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
 	assert(alloc_ctx.szind != SC_NSIZES);
 
-	size_t usize;
+	size_t usize = sz_index2size(alloc_ctx.szind);
 	if (config_prof && opt_prof) {
-		usize = sz_index2size(alloc_ctx.szind);
 		prof_free(tsd, ptr, usize, &alloc_ctx);
-	} else if (config_stats) {
-		usize = sz_index2size(alloc_ctx.szind);
-	}
-	if (config_stats) {
-		*tsd_thread_deallocatedp_get(tsd) += usize;
 	}
+	*tsd_thread_deallocatedp_get(tsd) += usize;
 
 	if (likely(!slow_path)) {
 		idalloctm(tsd_tsdn(tsd), ptr, tcache, &alloc_ctx, false,
@@ -2638,9 +2617,8 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 	if (config_prof && opt_prof) {
 		prof_free(tsd, ptr, usize, ctx);
 	}
-	if (config_stats) {
-		*tsd_thread_deallocatedp_get(tsd) += usize;
-	}
+
+	*tsd_thread_deallocatedp_get(tsd) += usize;
 
 	if (likely(!slow_path)) {
 		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, ctx, false);
@@ -2701,19 +2679,15 @@ je_realloc(void *ptr, size_t arg_size) {
 		assert(alloc_ctx.szind != SC_NSIZES);
 		old_usize = sz_index2size(alloc_ctx.szind);
 		assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
+		usize = sz_s2u(size);
 		if (config_prof && opt_prof) {
-			usize = sz_s2u(size);
-			if (unlikely(usize == 0
-			    || usize > SC_LARGE_MAXCLASS)) {
+			if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
 				ret = NULL;
 			} else {
 				ret = irealloc_prof(tsd, ptr, old_usize, usize,
 				    &alloc_ctx, &hook_args);
 			}
 		} else {
-			if (config_stats) {
-				usize = sz_s2u(size);
-			}
 			ret = iralloc(tsd, ptr, old_usize, size, 0, false,
 			    &hook_args);
 		}
@@ -2753,7 +2727,7 @@ je_realloc(void *ptr, size_t arg_size) {
 		}
 		set_errno(ENOMEM);
 	}
-	if (config_stats && likely(ret != NULL)) {
+	if (likely(ret != NULL)) {
 		tsd_t *tsd;
 
 		assert(usize == isalloc(tsdn, ret));
@@ -2852,10 +2826,8 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 		return false;
 	}
 
-	if (config_stats) {
-		size_t usize = sz_index2size(alloc_ctx.szind);
-		*tsd_thread_deallocatedp_get(tsd) += usize;
-	}
+	size_t usize = sz_index2size(alloc_ctx.szind);
+	*tsd_thread_deallocatedp_get(tsd) += usize;
 
 	return true;
 }
@@ -3267,8 +3239,7 @@ je_rallocx(void *ptr, size_t size, int flags) {
 	if (config_prof && opt_prof) {
 		usize = (alignment == 0) ?
 		    sz_s2u(size) : sz_sa2u(size, alignment);
-		if (unlikely(usize == 0
-		    || usize > SC_LARGE_MAXCLASS)) {
+		if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
 			goto label_oom;
 		}
 		p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize,
@@ -3282,16 +3253,13 @@ je_rallocx(void *ptr, size_t size, int flags) {
 		if (unlikely(p == NULL)) {
 			goto label_oom;
 		}
-		if (config_stats) {
-			usize = isalloc(tsd_tsdn(tsd), p);
-		}
+		usize = isalloc(tsd_tsdn(tsd), p);
 	}
 	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
 
-	if (config_stats) {
-		*tsd_thread_allocatedp_get(tsd) += usize;
-		*tsd_thread_deallocatedp_get(tsd) += old_usize;
-	}
+	*tsd_thread_allocatedp_get(tsd) += usize;
+	*tsd_thread_deallocatedp_get(tsd) += old_usize;
+
 	UTRACE(ptr, size, p);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
@@ -3439,10 +3407,9 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 		goto label_not_resized;
 	}
 
-	if (config_stats) {
-		*tsd_thread_allocatedp_get(tsd) += usize;
-		*tsd_thread_deallocatedp_get(tsd) += old_usize;
-	}
+	*tsd_thread_allocatedp_get(tsd) += usize;
+	*tsd_thread_deallocatedp_get(tsd) += old_usize;
+
 label_not_resized:
 	if (unlikely(!tsd_fast(tsd))) {
 		uintptr_t args[4] = {(uintptr_t)ptr, size, extra, flags};

From adce29c88597c97f46fd02e28ce2689872ac1b0a Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 26 Aug 2019 14:41:32 -0700
Subject: [PATCH 1357/2608] Optimize for prof_active off

Move the handling of `prof_active` off case completely to slow path,
so as to reduce register pressure on malloc fast path.
---
 src/jemalloc.c | 36 +++++++++++++++++++++++++++---------
 1 file changed, 27 insertions(+), 9 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 3961984b..753fcbea 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2109,6 +2109,20 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 
 	/* If profiling is on, get our profiling context. */
 	if (config_prof && opt_prof) {
+		/*
+		 * The fast path modifies bytes_until_sample regardless of
+		 * prof_active.  We reset it to be the sample interval, so that
+		 * there won't be excessive routings to the slow path, and that
+		 * when prof_active is turned on later, the counting for
+		 * sampling can immediately resume as normal (though the very
+		 * first sampling interval is not randomized).
+		 */
+		if (unlikely(tsd_bytes_until_sample_get(tsd) < 0) &&
+		    !prof_active_get_unlocked()) {
+			tsd_bytes_until_sample_set(tsd,
+			    (ssize_t)(1 << lg_prof_sample));
+		}
+
 		prof_tctx_t *tctx = prof_alloc_prep(
 		    tsd, usize, prof_active_get_unlocked(), true);
 
@@ -2131,6 +2145,16 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		}
 		prof_malloc(tsd_tsdn(tsd), allocation, usize, &alloc_ctx, tctx);
 	} else {
+		assert(!opt_prof);
+		/*
+		 * The fast path modifies bytes_until_sample regardless of
+		 * opt_prof.  We reset it to a huge value here, so as to
+		 * minimize the triggering for slow path.
+		 */
+		if (config_prof &&
+		    unlikely(tsd_bytes_until_sample_get(tsd) < 0)) {
+			tsd_bytes_until_sample_set(tsd, SSIZE_MAX);
+		}
 		allocation = imalloc_no_sample(sopts, dopts, tsd, size, usize,
 		    ind);
 		if (unlikely(allocation == NULL)) {
@@ -2339,16 +2363,10 @@ je_malloc(size_t size) {
 		if (unlikely(bytes_until_sample < 0)) {
 			/*
 			 * Avoid a prof_active check on the fastpath.
-			 * If prof_active is false, set bytes_until_sample to
-			 * sampling interval.  If prof_active is set to true,
-			 * bytes_until_sample will be reset.
+			 * If prof_active is false, bytes_until_sample will be
+			 * reset in slow path.
 			 */
-			if (!prof_active_get_unlocked()) {
-				tsd_bytes_until_sample_set(tsd,
-				    ((uint64_t)1U << lg_prof_sample));
-			} else {
-				return malloc_default(size);
-			}
+			return malloc_default(size);
 		}
 	}
 

From 719583f14acc3dc0d24287e18a80b280e46aebb3 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 26 Aug 2019 13:18:50 -0700
Subject: [PATCH 1358/2608] Fix large.nflushes in the merged stats.

---
 src/ctl.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/ctl.c b/src/ctl.c
index 4bc09a34..d6f803cd 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -915,6 +915,8 @@ MUTEX_PROF_ARENA_MUTEXES
 		    &astats->astats.ndalloc_large);
 		ctl_accum_arena_stats_u64(&sdstats->astats.nrequests_large,
 		    &astats->astats.nrequests_large);
+		ctl_accum_arena_stats_u64(&sdstats->astats.nflushes_large,
+		    &astats->astats.nflushes_large);
 		accum_atomic_zu(&sdstats->astats.abandoned_vm,
 		    &astats->astats.abandoned_vm);
 

From 2abb02ecd74e7e65d3992a542ffb43abe91a8a7f Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 26 Aug 2019 14:15:54 -0700
Subject: [PATCH 1359/2608] Fix MSVC 2015 build, as proposed by
 @christianaguilera-foundry.

---
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj         | 1 +
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 387f14be..ed0e7b9f 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -65,6 +65,7 @@
     <ClCompile Include="..\..\..\..\src\stats.c" />
     <ClCompile Include="..\..\..\..\src\sz.c" />
     <ClCompile Include="..\..\..\..\src\tcache.c" />
+    <ClCompile Include="..\..\..\..\src\test_hooks.c" />
     <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
     <ClCompile Include="..\..\..\..\src\witness.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 030d8266..bc40883b 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -88,6 +88,9 @@
     <ClCompile Include="..\..\..\..\src\tcache.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\test_hooks.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\ticker.c">
       <Filter>Source Files</Filter>
     </ClCompile>

From 23dc7a7fba904d3893c0f335dfc2d16439b7109c Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 30 Aug 2019 11:54:35 -0700
Subject: [PATCH 1360/2608] Fix index type for cache_bin_alloc_easy.

---
 include/jemalloc/internal/cache_bin.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 7ec1ccbf..0ce3cab3 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -187,7 +187,7 @@ cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-cache_bin_alloc_easy(cache_bin_t *bin, bool *success, cache_bin_sz_t ind) {
+cache_bin_alloc_easy(cache_bin_t *bin, bool *success, szind_t ind) {
 	/*
 	 * This may read from the empty position; however the loaded value won't
 	 * be used.  It's safe because the stack has one more slot reserved.

From 785b84e60382515f1bf1a63457da7a7ab5d0a96b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 30 Aug 2019 11:52:15 -0700
Subject: [PATCH 1361/2608] Make cache_bin_sz_t unsigned.

The bin size type was made signed only because the low_water could go -1, which
was already removed.
---
 include/jemalloc/internal/cache_bin.h | 16 ++++++----------
 src/tcache.c                          |  2 ++
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 0ce3cab3..5396c2d9 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -13,12 +13,8 @@
  * of the tcache at all.
  */
 
-/*
- * The count of the number of cached allocations in a bin.  We make this signed
- * so that negative numbers can encode "invalid" states (e.g. a low water mark
- * of -1 for a cache that has been depleted).
- */
-typedef int32_t cache_bin_sz_t;
+/* The size in bytes of each cache bin stack. */
+typedef uint16_t cache_bin_sz_t;
 
 typedef struct cache_bin_stats_s cache_bin_stats_t;
 struct cache_bin_stats_s {
@@ -126,7 +122,7 @@ static inline cache_bin_sz_t
 cache_bin_ncached_get(cache_bin_t *bin, szind_t ind) {
 	cache_bin_sz_t n = (tcache_bin_info[ind].stack_size +
 	    bin->full_position - bin->cur_ptr.lowbits) / sizeof(void *);
-	assert(n >= 0 && n <= cache_bin_ncached_max_get(ind));
+	assert(n <= cache_bin_ncached_max_get(ind));
 	assert(n == 0 || *(bin->cur_ptr.ptr) != NULL);
 
 	return n;
@@ -157,13 +153,13 @@ cache_bin_bottom_item_get(cache_bin_t *bin, szind_t ind) {
 	return bottom;
 }
 
-/* Returns the numeric value of low water in [-1, ncached]. */
+/* Returns the numeric value of low water in [0, ncached]. */
 static inline cache_bin_sz_t
 cache_bin_low_water_get(cache_bin_t *bin, szind_t ind) {
 	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get(ind);
 	cache_bin_sz_t low_water = ncached_max -
 	    (bin->low_water_position - bin->full_position) / sizeof(void *);
-	assert(low_water >= 0 && low_water <= ncached_max);
+	assert(low_water <= ncached_max);
 	assert(low_water <= cache_bin_ncached_get(bin, ind));
 	assert(bin->low_water_position >= bin->cur_ptr.lowbits);
 
@@ -174,7 +170,7 @@ static inline void
 cache_bin_ncached_set(cache_bin_t *bin, szind_t ind, cache_bin_sz_t n) {
 	bin->cur_ptr.lowbits = bin->full_position +
 	    tcache_bin_info[ind].stack_size - n * sizeof(void *);
-	assert(n >= 0 && n <= cache_bin_ncached_max_get(ind));
+	assert(n <= cache_bin_ncached_max_get(ind));
 	assert(n == 0 || *bin->cur_ptr.ptr != NULL);
 }
 
diff --git a/src/tcache.c b/src/tcache.c
index 8f89c55f..5dc2b0ad 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -825,6 +825,8 @@ tcache_boot(tsdn_t *tsdn) {
 			ncached_max = TCACHE_NSLOTS_SMALL_MAX;
 		}
 		unsigned stack_size = ncached_max * sizeof(void *);
+		assert(stack_size < ((uint64_t)1 <<
+		    (sizeof(cache_bin_sz_t) * 8)));
 		tcache_bin_info[i].stack_size = stack_size;
 		total_stack_bytes += stack_size;
 	}

From 671f120e2669f9574449d4ddad06e561ac8553c3 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 3 Sep 2019 17:11:06 -0700
Subject: [PATCH 1362/2608] Fix prof_backtrace() reentrancy level

---
 src/prof.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/src/prof.c b/src/prof.c
index 79a0ffc8..f7311c38 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -208,8 +208,8 @@ bt_init(prof_bt_t *bt, void **vec) {
 }
 
 #ifdef JEMALLOC_PROF_LIBUNWIND
-void
-prof_backtrace(prof_bt_t *bt) {
+static void
+prof_backtrace_impl(prof_bt_t *bt) {
 	int nframes;
 
 	cassert(config_prof);
@@ -250,8 +250,8 @@ prof_unwind_callback(struct _Unwind_Context *context, void *arg) {
 	return _URC_NO_REASON;
 }
 
-void
-prof_backtrace(prof_bt_t *bt) {
+static void
+prof_backtrace_impl(prof_bt_t *bt) {
 	prof_unwind_data_t data = {bt, PROF_BT_MAX};
 
 	cassert(config_prof);
@@ -259,8 +259,8 @@ prof_backtrace(prof_bt_t *bt) {
 	_Unwind_Backtrace(prof_unwind_callback, &data);
 }
 #elif (defined(JEMALLOC_PROF_GCC))
-void
-prof_backtrace(prof_bt_t *bt) {
+static void
+prof_backtrace_impl(prof_bt_t *bt) {
 #define BT_FRAME(i)							\
 	if ((i) < PROF_BT_MAX) {					\
 		void *p;						\
@@ -422,13 +422,22 @@ prof_backtrace(prof_bt_t *bt) {
 #undef BT_FRAME
 }
 #else
-void
-prof_backtrace(prof_bt_t *bt) {
+static void
+prof_backtrace_impl(prof_bt_t *bt) {
 	cassert(config_prof);
 	not_reached();
 }
 #endif
 
+void
+prof_backtrace(prof_bt_t *bt) {
+	cassert(config_prof);
+	tsd_t *tsd = tsd_fetch();
+	pre_reentrancy(tsd, NULL);
+	prof_backtrace_impl(bt);
+	post_reentrancy(tsd);
+}
+
 malloc_mutex_t *
 prof_gctx_mutex_choose(void) {
 	unsigned ngctxs = atomic_fetch_add_u(&cum_gctxs, 1, ATOMIC_RELAXED);

From 93d61518005d868c08b597a2d39bdd1775b2a211 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 4 Sep 2019 09:24:34 -0700
Subject: [PATCH 1363/2608] Pass tsd down to prof_backtrace()

---
 include/jemalloc/internal/prof_externs.h   | 2 +-
 include/jemalloc/internal/prof_inlines_b.h | 2 +-
 src/prof.c                                 | 3 +--
 src/prof_log.c                             | 2 +-
 4 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 8fc45cf7..c0471f52 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -56,7 +56,7 @@ void prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
 void prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize,
     prof_tctx_t *tctx);
 void bt_init(prof_bt_t *bt, void **vec);
-void prof_backtrace(prof_bt_t *bt);
+void prof_backtrace(tsd_t *tsd, prof_bt_t *bt);
 prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt);
 #ifdef JEMALLOC_JET
 size_t prof_tdata_count(void);
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 8ba8a1e1..860dfbee 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -155,7 +155,7 @@ prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update) {
 		ret = (prof_tctx_t *)(uintptr_t)1U;
 	} else {
 		bt_init(&bt, tdata->vec);
-		prof_backtrace(&bt);
+		prof_backtrace(tsd, &bt);
 		ret = prof_lookup(tsd, &bt);
 	}
 
diff --git a/src/prof.c b/src/prof.c
index f7311c38..6a0a9dee 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -430,9 +430,8 @@ prof_backtrace_impl(prof_bt_t *bt) {
 #endif
 
 void
-prof_backtrace(prof_bt_t *bt) {
+prof_backtrace(tsd_t *tsd, prof_bt_t *bt) {
 	cassert(config_prof);
-	tsd_t *tsd = tsd_fetch();
 	pre_reentrancy(tsd, NULL);
 	prof_backtrace_impl(bt);
 	post_reentrancy(tsd);
diff --git a/src/prof_log.c b/src/prof_log.c
index 39976564..8274cfcf 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -246,7 +246,7 @@ prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx) {
 	prof_bt_t bt;
 	/* Initialize the backtrace, using the buffer in tdata to store it. */
 	bt_init(&bt, cons_tdata->vec);
-	prof_backtrace(&bt);
+	prof_backtrace(tsd, &bt);
 	prof_bt_t *cons_bt = &bt;
 
 	/* We haven't destroyed tctx yet, so gctx should be good to read. */

From 22bc75ee3e98fb45058fbee45210ed3ab65da6f4 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 27 Aug 2019 13:44:41 -0700
Subject: [PATCH 1364/2608] Workaround the stringop-overflow check false
 positives.

---
 test/include/test/test.h |  2 ++
 test/src/test.c          | 13 +++++++++++++
 test/unit/log.c          | 17 +++++++++++------
 3 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/test/include/test/test.h b/test/include/test/test.h
index fd0e5265..07f58a47 100644
--- a/test/include/test/test.h
+++ b/test/include/test/test.h
@@ -336,3 +336,5 @@ test_status_t	p_test_no_malloc_init(test_t *t, ...);
 void	p_test_init(const char *name);
 void	p_test_fini(void);
 void	p_test_fail(const char *prefix, const char *message);
+
+void strncpy_cond(void *dst, const char *src, bool cond);
diff --git a/test/src/test.c b/test/src/test.c
index f97ce4d1..4583e55a 100644
--- a/test/src/test.c
+++ b/test/src/test.c
@@ -232,3 +232,16 @@ p_test_fail(const char *prefix, const char *message) {
 	malloc_cprintf(NULL, NULL, "%s%s\n", prefix, message);
 	test_status = test_status_fail;
 }
+
+void
+strncpy_cond(void *dst, const char *src, bool cond) {
+	if (cond) {
+		/*
+		 * Avoid strcpy and explicitly set length to 0 because the
+		 * `stringop-overflow` check may warn even if the specific test
+		 * is unreachable.
+		 */
+		size_t n = cond ? strlen(src) + 1 : 0;
+		strncpy(dst, src, n);
+	}
+}
diff --git a/test/unit/log.c b/test/unit/log.c
index a52bd737..10f45bc1 100644
--- a/test/unit/log.c
+++ b/test/unit/log.c
@@ -2,13 +2,18 @@
 
 #include "jemalloc/internal/log.h"
 
+static void
+update_log_var_names(const char *names) {
+	strncpy_cond(log_var_names, names, config_log);
+}
+
 static void
 expect_no_logging(const char *names) {
 	log_var_t log_l1 = LOG_VAR_INIT("l1");
 	log_var_t log_l2 = LOG_VAR_INIT("l2");
 	log_var_t log_l2_a = LOG_VAR_INIT("l2.a");
 
-	strcpy(log_var_names, names);
+	update_log_var_names(names);
 
 	int count = 0;
 
@@ -50,7 +55,7 @@ TEST_BEGIN(test_log_enabled_direct) {
 	int count;
 
 	count = 0;
-	strcpy(log_var_names, "l1");
+	update_log_var_names("l1");
 	for (int i = 0; i < 10; i++) {
 		log_do_begin(log_l1)
 			count++;
@@ -59,7 +64,7 @@ TEST_BEGIN(test_log_enabled_direct) {
 	assert_d_eq(count, 10, "Mis-logged!");
 
 	count = 0;
-	strcpy(log_var_names, "l1.a");
+	update_log_var_names("l1.a");
 	for (int i = 0; i < 10; i++) {
 		log_do_begin(log_l1_a)
 			count++;
@@ -68,7 +73,7 @@ TEST_BEGIN(test_log_enabled_direct) {
 	assert_d_eq(count, 10, "Mis-logged!");
 
 	count = 0;
-	strcpy(log_var_names, "l1.a|abc|l2|def");
+	update_log_var_names("l1.a|abc|l2|def");
 	for (int i = 0; i < 10; i++) {
 		log_do_begin(log_l1_a)
 			count++;
@@ -85,7 +90,7 @@ TEST_END
 TEST_BEGIN(test_log_enabled_indirect) {
 	test_skip_if(!config_log);
 	atomic_store_b(&log_init_done, true, ATOMIC_RELAXED);
-	strcpy(log_var_names, "l0|l1|abc|l2.b|def");
+	update_log_var_names("l0|l1|abc|l2.b|def");
 
 	/* On. */
 	log_var_t log_l1 = LOG_VAR_INIT("l1");
@@ -135,7 +140,7 @@ TEST_END
 TEST_BEGIN(test_log_enabled_global) {
 	test_skip_if(!config_log);
 	atomic_store_b(&log_init_done, true, ATOMIC_RELAXED);
-	strcpy(log_var_names, "abc|.|def");
+	update_log_var_names("abc|.|def");
 
 	log_var_t log_l1 = LOG_VAR_INIT("l1");
 	log_var_t log_l2_a_a = LOG_VAR_INIT("l2.a.a");

From e06658cb24e9f880570c5a44a5ad6b11b620efc5 Mon Sep 17 00:00:00 2001
From: Giridhar Prasath R <cristianoprasath@gmail.com>
Date: Thu, 12 Sep 2019 07:35:32 +0530
Subject: [PATCH 1365/2608] check GNU make exists in path

Signed-off-by: Giridhar Prasath R <cristianoprasath@gmail.com>
---
 scripts/gen_run_tests.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/gen_run_tests.py b/scripts/gen_run_tests.py
index a414f812..6875a495 100755
--- a/scripts/gen_run_tests.py
+++ b/scripts/gen_run_tests.py
@@ -14,7 +14,7 @@ nparallel = cpu_count() * 2
 
 uname = uname()[0]
 
-if "BSD" in uname:
+if call("command -v gmake", shell=True) == 0:
     make_cmd = 'gmake'
 else:
     make_cmd = 'make'

From 242af439b81044b2604a515ad5d3a8c2d6fbbdfd Mon Sep 17 00:00:00 2001
From: zhxchen17 <zhxchen17@outlook.com>
Date: Mon, 9 Sep 2019 20:04:18 -0700
Subject: [PATCH 1366/2608] Rename "prof_dump_seq_mtx" to
 "prof_dump_filename_mtx".

---
 include/jemalloc/internal/witness.h |  2 +-
 src/prof.c                          | 28 ++++++++++++++--------------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index fff9e98c..d76b7908 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -57,7 +57,7 @@
 #define WITNESS_RANK_DSS		WITNESS_RANK_LEAF
 #define WITNESS_RANK_PROF_ACTIVE	WITNESS_RANK_LEAF
 #define WITNESS_RANK_PROF_ACCUM		WITNESS_RANK_LEAF
-#define WITNESS_RANK_PROF_DUMP_SEQ	WITNESS_RANK_LEAF
+#define WITNESS_RANK_PROF_DUMP_FILENAME	WITNESS_RANK_LEAF
 #define WITNESS_RANK_PROF_GDUMP		WITNESS_RANK_LEAF
 #define WITNESS_RANK_PROF_NEXT_THR_UID	WITNESS_RANK_LEAF
 #define WITNESS_RANK_PROF_THREAD_ACTIVE_INIT	WITNESS_RANK_LEAF
diff --git a/src/prof.c b/src/prof.c
index 6a0a9dee..c7c91ef4 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -99,7 +99,7 @@ malloc_mutex_t	tdatas_mtx;
 static uint64_t		next_thr_uid;
 static malloc_mutex_t	next_thr_uid_mtx;
 
-static malloc_mutex_t	prof_dump_seq_mtx;
+static malloc_mutex_t	prof_dump_filename_mtx;
 static uint64_t		prof_dump_seq;
 static uint64_t		prof_dump_iseq;
 static uint64_t		prof_dump_mseq;
@@ -549,9 +549,9 @@ prof_fdump(void) {
 	tsd = tsd_fetch();
 	assert(tsd_reentrancy_level_get(tsd) == 0);
 
-	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
 	prof_dump_filename(filename, 'f', VSEQ_INVALID);
-	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
 	prof_dump(tsd, false, filename, opt_prof_leak);
 }
 
@@ -597,10 +597,10 @@ prof_idump(tsdn_t *tsdn) {
 
 	if (opt_prof_prefix[0] != '\0') {
 		char filename[PATH_MAX + 1];
-		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
+		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
 		prof_dump_filename(filename, 'i', prof_dump_iseq);
 		prof_dump_iseq++;
-		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
 		prof_dump(tsd, false, filename, false);
 	}
 }
@@ -619,10 +619,10 @@ prof_mdump(tsd_t *tsd, const char *filename) {
 		if (opt_prof_prefix[0] == '\0') {
 			return true;
 		}
-		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
+		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
 		prof_dump_filename(filename_buf, 'm', prof_dump_mseq);
 		prof_dump_mseq++;
-		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
 		filename = filename_buf;
 	}
 	return prof_dump(tsd, true, filename, false);
@@ -654,10 +654,10 @@ prof_gdump(tsdn_t *tsdn) {
 
 	if (opt_prof_prefix[0] != '\0') {
 		char filename[DUMP_FILENAME_BUFSIZE];
-		malloc_mutex_lock(tsdn, &prof_dump_seq_mtx);
+		malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
 		prof_dump_filename(filename, 'u', prof_dump_useq);
 		prof_dump_useq++;
-		malloc_mutex_unlock(tsdn, &prof_dump_seq_mtx);
+		malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
 		prof_dump(tsd, false, filename, false);
 	}
 }
@@ -946,8 +946,8 @@ prof_boot2(tsd_t *tsd) {
 			return true;
 		}
 
-		if (malloc_mutex_init(&prof_dump_seq_mtx, "prof_dump_seq",
-		    WITNESS_RANK_PROF_DUMP_SEQ, malloc_mutex_rank_exclusive)) {
+		if (malloc_mutex_init(&prof_dump_filename_mtx, "prof_dump_filename",
+		    WITNESS_RANK_PROF_DUMP_FILENAME, malloc_mutex_rank_exclusive)) {
 			return true;
 		}
 		if (malloc_mutex_init(&prof_dump_mtx, "prof_dump",
@@ -1028,7 +1028,7 @@ void
 prof_prefork1(tsdn_t *tsdn) {
 	if (config_prof && opt_prof) {
 		malloc_mutex_prefork(tsdn, &prof_active_mtx);
-		malloc_mutex_prefork(tsdn, &prof_dump_seq_mtx);
+		malloc_mutex_prefork(tsdn, &prof_dump_filename_mtx);
 		malloc_mutex_prefork(tsdn, &prof_gdump_mtx);
 		malloc_mutex_prefork(tsdn, &next_thr_uid_mtx);
 		malloc_mutex_prefork(tsdn, &prof_thread_active_init_mtx);
@@ -1044,7 +1044,7 @@ prof_postfork_parent(tsdn_t *tsdn) {
 		    &prof_thread_active_init_mtx);
 		malloc_mutex_postfork_parent(tsdn, &next_thr_uid_mtx);
 		malloc_mutex_postfork_parent(tsdn, &prof_gdump_mtx);
-		malloc_mutex_postfork_parent(tsdn, &prof_dump_seq_mtx);
+		malloc_mutex_postfork_parent(tsdn, &prof_dump_filename_mtx);
 		malloc_mutex_postfork_parent(tsdn, &prof_active_mtx);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
 			malloc_mutex_postfork_parent(tsdn, &gctx_locks[i]);
@@ -1066,7 +1066,7 @@ prof_postfork_child(tsdn_t *tsdn) {
 		malloc_mutex_postfork_child(tsdn, &prof_thread_active_init_mtx);
 		malloc_mutex_postfork_child(tsdn, &next_thr_uid_mtx);
 		malloc_mutex_postfork_child(tsdn, &prof_gdump_mtx);
-		malloc_mutex_postfork_child(tsdn, &prof_dump_seq_mtx);
+		malloc_mutex_postfork_child(tsdn, &prof_dump_filename_mtx);
 		malloc_mutex_postfork_child(tsdn, &prof_active_mtx);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
 			malloc_mutex_postfork_child(tsdn, &gctx_locks[i]);

From 4b76c684bb8d7f0b7960bfac84391e9fd51a234e Mon Sep 17 00:00:00 2001
From: zhxchen17 <zhxchen17@outlook.com>
Date: Mon, 9 Sep 2019 20:18:41 -0700
Subject: [PATCH 1367/2608] Add "prof.dump_prefix" to override filename
 prefixes for dumps.

---
 doc/jemalloc.xml.in                      |  42 +++++++--
 include/jemalloc/internal/ctl.h          |   1 +
 include/jemalloc/internal/prof_externs.h |   2 +
 include/jemalloc/internal/prof_types.h   |   7 ++
 src/ctl.c                                |  27 ++++++
 src/prof.c                               | 106 +++++++++++++++++------
 src/prof_log.c                           |   6 +-
 test/unit/prof_idump.c                   |  13 +++
 8 files changed, 166 insertions(+), 38 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 5636fb90..e83bfbff 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1344,7 +1344,10 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         set to the empty string, no automatic dumps will occur; this is
         primarily useful for disabling the automatic final heap dump (which
         also disables leak reporting, if enabled).  The default prefix is
-        <filename>jeprof</filename>.</para></listitem>
+        <filename>jeprof</filename>.  This prefix value can be overriden by
+        <link
+        linkend="prof.dump_prefix"><mallctl>prof.dump_prefix</mallctl></link>.
+        </para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.prof_active">
@@ -1423,8 +1426,10 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         <filename>&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.i&lt;iseq&gt;.heap</filename>,
         where <literal>&lt;prefix&gt;</literal> is controlled by the
         <link
-        linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link>
-        option.  By default, interval-triggered profile dumping is disabled
+        linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link> and
+        <link
+        linkend="prof.dump_prefix"><mallctl>prof.dump_prefix</mallctl></link>
+        options.  By default, interval-triggered profile dumping is disabled
         (encoded as -1).
         </para></listitem>
       </varlistentry>
@@ -1456,8 +1461,10 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         usage to a file named according to the pattern
         <filename>&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.f.heap</filename>,
         where <literal>&lt;prefix&gt;</literal> is controlled by the <link
-        linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link>
-        option.  Note that <function>atexit()</function> may allocate
+        linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link> and
+        <link
+        linkend="prof.dump_prefix"><mallctl>prof.dump_prefix</mallctl></link>
+        options.  Note that <function>atexit()</function> may allocate
         memory during application initialization and then deadlock internally
         when jemalloc in turn calls <function>atexit()</function>, so
         this option is not universally usable (though the application can
@@ -2224,8 +2231,25 @@ struct extent_hooks_s {
         <filename>&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.m&lt;mseq&gt;.heap</filename>,
         where <literal>&lt;prefix&gt;</literal> is controlled by the
         <link
+        linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link> and
+        <link
+        linkend="prof.dump_prefix"><mallctl>prof.dump_prefix</mallctl></link>
+        options.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="prof.dump_prefix">
+        <term>
+          <mallctl>prof.dump_prefix</mallctl>
+          (<type>const char *</type>)
+          <literal>-w</literal>
+          [<option>--enable-prof</option>]
+        </term>
+        <listitem><para>Set the filename prefix for profile dumps. See
+        <link
         linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link>
-        option.</para></listitem>
+        for the default setting.  This can be useful to differentiate profile
+        dumps such as from forked processes.
+        </para></listitem>
       </varlistentry>
 
       <varlistentry id="prof.gdump">
@@ -2240,8 +2264,10 @@ struct extent_hooks_s {
         dumped to files named according to the pattern
         <filename>&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.u&lt;useq&gt;.heap</filename>,
         where <literal>&lt;prefix&gt;</literal> is controlled by the <link
-        linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link>
-        option.</para></listitem>
+        linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link> and
+        <link
+        linkend="prof.dump_prefix"><mallctl>prof.dump_prefix</mallctl></link>
+        options.</para></listitem>
       </varlistentry>
 
       <varlistentry id="prof.reset">
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index 1d1aacc6..8ddf7f86 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -103,6 +103,7 @@ bool ctl_boot(void);
 void ctl_prefork(tsdn_t *tsdn);
 void ctl_postfork_parent(tsdn_t *tsdn);
 void ctl_postfork_child(tsdn_t *tsdn);
+void ctl_mtx_assert_held(tsdn_t *tsdn);
 
 #define xmallctl(name, oldp, oldlenp, newp, newlen) do {		\
 	if (je_mallctl(name, oldp, oldlenp, newp, newlen)		\
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index c0471f52..7befad64 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -72,10 +72,12 @@ void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
     uint64_t *accumbytes);
 #endif
 int prof_getpid(void);
+void prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind);
 bool prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum);
 void prof_idump(tsdn_t *tsdn);
 bool prof_mdump(tsd_t *tsd, const char *filename);
 void prof_gdump(tsdn_t *tsdn);
+bool prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix);
 
 void prof_bt_hash(const void *key, size_t r_hash[2]);
 bool prof_bt_keycomp(const void *k1, const void *k2);
diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h
index 1eff995e..a50653bb 100644
--- a/include/jemalloc/internal/prof_types.h
+++ b/include/jemalloc/internal/prof_types.h
@@ -53,4 +53,11 @@ typedef struct prof_tdata_s prof_tdata_t;
 #define PROF_TDATA_STATE_PURGATORY	((prof_tdata_t *)(uintptr_t)2)
 #define PROF_TDATA_STATE_MAX		PROF_TDATA_STATE_PURGATORY
 
+/* Minimize memory bloat for non-prof builds. */
+#ifdef JEMALLOC_PROF
+#define PROF_DUMP_FILENAME_LEN (PATH_MAX + 1)
+#else
+#define PROF_DUMP_FILENAME_LEN 1
+#endif
+
 #endif /* JEMALLOC_INTERNAL_PROF_TYPES_H */
diff --git a/src/ctl.c b/src/ctl.c
index d6f803cd..0beef6e0 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -148,6 +148,7 @@ CTL_PROTO(prof_thread_active_init)
 CTL_PROTO(prof_active)
 CTL_PROTO(prof_dump)
 CTL_PROTO(prof_gdump)
+CTL_PROTO(prof_dump_prefix)
 CTL_PROTO(prof_reset)
 CTL_PROTO(prof_interval)
 CTL_PROTO(lg_prof_sample)
@@ -413,6 +414,7 @@ static const ctl_named_node_t	prof_node[] = {
 	{NAME("active"),	CTL(prof_active)},
 	{NAME("dump"),		CTL(prof_dump)},
 	{NAME("gdump"),		CTL(prof_gdump)},
+	{NAME("dump_prefix"),	CTL(prof_dump_prefix)},
 	{NAME("reset"),		CTL(prof_reset)},
 	{NAME("interval"),	CTL(prof_interval)},
 	{NAME("lg_sample"),	CTL(lg_prof_sample)},
@@ -1416,6 +1418,11 @@ ctl_postfork_child(tsdn_t *tsdn) {
 	malloc_mutex_postfork_child(tsdn, &ctl_mtx);
 }
 
+void
+ctl_mtx_assert_held(tsdn_t *tsdn) {
+	malloc_mutex_assert_owner(tsdn, &ctl_mtx);
+}
+
 /******************************************************************************/
 /* *_ctl() functions. */
 
@@ -2720,6 +2727,26 @@ label_return:
 	return ret;
 }
 
+static int
+prof_dump_prefix_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+	const char *prefix = NULL;
+
+	if (!config_prof) {
+		return ENOENT;
+	}
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
+	WRITEONLY();
+	WRITE(prefix, const char *);
+
+	ret = prof_dump_prefix_set(tsd_tsdn(tsd), prefix) ? EFAULT : 0;
+label_return:
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
+	return ret;
+}
+
 static int
 prof_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
diff --git a/src/prof.c b/src/prof.c
index c7c91ef4..9ea4eda4 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -2,6 +2,7 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/mutex.h"
 
@@ -41,12 +42,7 @@ bool		opt_prof_gdump = false;
 bool		opt_prof_final = false;
 bool		opt_prof_leak = false;
 bool		opt_prof_accum = false;
-char		opt_prof_prefix[
-    /* Minimize memory bloat for non-prof builds. */
-#ifdef JEMALLOC_PROF
-    PATH_MAX +
-#endif
-    1];
+char		opt_prof_prefix[PROF_DUMP_FILENAME_LEN];
 
 /*
  * Initialized as opt_prof_active, and accessed via
@@ -106,6 +102,7 @@ static uint64_t		prof_dump_mseq;
 static uint64_t		prof_dump_useq;
 
 malloc_mutex_t	prof_dump_mtx;
+static char	*prof_dump_prefix = NULL;
 
 /* Do not dump any profiles until bootstrapping is complete. */
 bool			prof_booted = false;
@@ -514,26 +511,53 @@ prof_getpid(void) {
 #endif
 }
 
+static const char *
+prof_dump_prefix_get(tsdn_t* tsdn) {
+	malloc_mutex_assert_owner(tsdn, &prof_dump_filename_mtx);
+
+	return prof_dump_prefix == NULL ? opt_prof_prefix : prof_dump_prefix;
+}
+
+static bool
+prof_dump_prefix_is_empty(tsdn_t *tsdn) {
+	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
+	bool ret = (prof_dump_prefix_get(tsdn)[0] == '\0');
+	malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
+	return ret;
+}
+
 #define DUMP_FILENAME_BUFSIZE	(PATH_MAX + 1)
 #define VSEQ_INVALID		UINT64_C(0xffffffffffffffff)
 static void
-prof_dump_filename(char *filename, char v, uint64_t vseq) {
+prof_dump_filename(tsd_t *tsd, char *filename, char v, uint64_t vseq) {
 	cassert(config_prof);
 
+	assert(tsd_reentrancy_level_get(tsd) == 0);
+	const char *prof_prefix = prof_dump_prefix_get(tsd_tsdn(tsd));
+
 	if (vseq != VSEQ_INVALID) {
 	        /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
 		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
 		    "%s.%d.%"FMTu64".%c%"FMTu64".heap",
-		    opt_prof_prefix, prof_getpid(), prof_dump_seq, v, vseq);
+		    prof_prefix, prof_getpid(), prof_dump_seq, v, vseq);
 	} else {
 	        /* "<prefix>.<pid>.<seq>.<v>.heap" */
 		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
 		    "%s.%d.%"FMTu64".%c.heap",
-		    opt_prof_prefix, prof_getpid(), prof_dump_seq, v);
+		    prof_prefix, prof_getpid(), prof_dump_seq, v);
 	}
 	prof_dump_seq++;
 }
 
+void
+prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind) {
+	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
+	malloc_snprintf(filename, PROF_DUMP_FILENAME_LEN,
+	    "%s.%d.%"FMTu64".json", prof_dump_prefix_get(tsdn), prof_getpid(),
+	    ind);
+	malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
+}
+
 static void
 prof_fdump(void) {
 	tsd_t *tsd;
@@ -541,16 +565,16 @@ prof_fdump(void) {
 
 	cassert(config_prof);
 	assert(opt_prof_final);
-	assert(opt_prof_prefix[0] != '\0');
 
 	if (!prof_booted) {
 		return;
 	}
 	tsd = tsd_fetch();
 	assert(tsd_reentrancy_level_get(tsd) == 0);
+	assert(!prof_dump_prefix_is_empty(tsd_tsdn(tsd)));
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
-	prof_dump_filename(filename, 'f', VSEQ_INVALID);
+	prof_dump_filename(tsd, filename, 'f', VSEQ_INVALID);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
 	prof_dump(tsd, false, filename, opt_prof_leak);
 }
@@ -571,6 +595,31 @@ prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum) {
 	return false;
 }
 
+bool
+prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix) {
+	cassert(config_prof);
+	ctl_mtx_assert_held(tsdn);
+	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
+	if (prof_dump_prefix == NULL) {
+		malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
+		/* Everything is still guarded by ctl_mtx. */
+		char *buffer = base_alloc(tsdn, b0get(), PROF_DUMP_FILENAME_LEN,
+		    QUANTUM);
+		if (buffer == NULL) {
+			return true;
+		}
+		malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
+		prof_dump_prefix = buffer;
+	}
+	assert(prof_dump_prefix != NULL);
+
+	strncpy(prof_dump_prefix, prefix, PROF_DUMP_FILENAME_LEN - 1);
+	prof_dump_prefix[PROF_DUMP_FILENAME_LEN - 1] = '\0';
+	malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
+
+	return false;
+}
+
 void
 prof_idump(tsdn_t *tsdn) {
 	tsd_t *tsd;
@@ -595,14 +644,16 @@ prof_idump(tsdn_t *tsdn) {
 		return;
 	}
 
-	if (opt_prof_prefix[0] != '\0') {
-		char filename[PATH_MAX + 1];
-		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
-		prof_dump_filename(filename, 'i', prof_dump_iseq);
-		prof_dump_iseq++;
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
+	if (prof_dump_prefix_get(tsd_tsdn(tsd))[0] == '\0') {
 		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
-		prof_dump(tsd, false, filename, false);
+		return;
 	}
+	char filename[PATH_MAX + 1];
+	prof_dump_filename(tsd, filename, 'i', prof_dump_iseq);
+	prof_dump_iseq++;
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
+	prof_dump(tsd, false, filename, false);
 }
 
 bool
@@ -616,11 +667,12 @@ prof_mdump(tsd_t *tsd, const char *filename) {
 	char filename_buf[DUMP_FILENAME_BUFSIZE];
 	if (filename == NULL) {
 		/* No filename specified, so automatically generate one. */
-		if (opt_prof_prefix[0] == '\0') {
+		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
+		if (prof_dump_prefix_get(tsd_tsdn(tsd))[0] == '\0') {
+			malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
 			return true;
 		}
-		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
-		prof_dump_filename(filename_buf, 'm', prof_dump_mseq);
+		prof_dump_filename(tsd, filename_buf, 'm', prof_dump_mseq);
 		prof_dump_mseq++;
 		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
 		filename = filename_buf;
@@ -652,14 +704,16 @@ prof_gdump(tsdn_t *tsdn) {
 		return;
 	}
 
-	if (opt_prof_prefix[0] != '\0') {
-		char filename[DUMP_FILENAME_BUFSIZE];
-		malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
-		prof_dump_filename(filename, 'u', prof_dump_useq);
-		prof_dump_useq++;
+	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
+	if (prof_dump_prefix_get(tsdn)[0] == '\0') {
 		malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
-		prof_dump(tsd, false, filename, false);
+		return;
 	}
+	char filename[DUMP_FILENAME_BUFSIZE];
+	prof_dump_filename(tsd, filename, 'u', prof_dump_useq);
+	prof_dump_useq++;
+	malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
+	prof_dump(tsd, false, filename, false);
 }
 
 static uint64_t
diff --git a/src/prof_log.c b/src/prof_log.c
index 8274cfcf..af91af7d 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -405,7 +405,6 @@ prof_log_start(tsdn_t *tsdn, const char *filename) {
 	}
 
 	bool ret = false;
-	size_t buf_size = PATH_MAX + 1;
 
 	malloc_mutex_lock(tsdn, &log_mtx);
 
@@ -413,11 +412,10 @@ prof_log_start(tsdn_t *tsdn, const char *filename) {
 		ret = true;
 	} else if (filename == NULL) {
 		/* Make default name. */
-		malloc_snprintf(log_filename, buf_size, "%s.%d.%"FMTu64".json",
-		    opt_prof_prefix, prof_getpid(), log_seq);
+		prof_get_default_filename(tsdn, log_filename, log_seq);
 		log_seq++;
 		prof_logging_state = prof_logging_state_started;
-	} else if (strlen(filename) >= buf_size) {
+	} else if (strlen(filename) >= PROF_DUMP_FILENAME_LEN) {
 		ret = true;
 	} else {
 		strcpy(log_filename, filename);
diff --git a/test/unit/prof_idump.c b/test/unit/prof_idump.c
index 1cc6c98c..7a9b2882 100644
--- a/test/unit/prof_idump.c
+++ b/test/unit/prof_idump.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#define TEST_PREFIX "test_prefix"
+
 static bool did_prof_dump_open;
 
 static int
@@ -8,6 +10,10 @@ prof_dump_open_intercept(bool propagate_err, const char *filename) {
 
 	did_prof_dump_open = true;
 
+	const char filename_prefix[] = TEST_PREFIX ".";
+	assert_d_eq(strncmp(filename_prefix, filename, sizeof(filename_prefix)
+	    - 1), 0, "Dump file name should start with \"" TEST_PREFIX ".\"");
+
 	fd = open("/dev/null", O_WRONLY);
 	assert_d_ne(fd, -1, "Unexpected open() failure");
 
@@ -18,9 +24,16 @@ TEST_BEGIN(test_idump) {
 	bool active;
 	void *p;
 
+	const char *dump_prefix = TEST_PREFIX;
+
 	test_skip_if(!config_prof);
 
 	active = true;
+
+	assert_d_eq(mallctl("prof.dump_prefix", NULL, NULL,
+	    (void *)&dump_prefix, sizeof(dump_prefix)), 0,
+	    "Unexpected mallctl failure while overwriting dump prefix");
+
 	assert_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
 	    sizeof(active)), 0,
 	    "Unexpected mallctl failure while activating profiling");

From b7c7df24ba7c3b76b4985084de6e20356b26547e Mon Sep 17 00:00:00 2001
From: zhxchen17 <zhxchen17@fb.com>
Date: Wed, 14 Aug 2019 16:10:09 -0700
Subject: [PATCH 1368/2608] Add max_per_bg_thd stats for per background thread
 mutexes.

Added a new stats row to aggregate the maximum value of mutex counters for each
background threads.  Given that the per bg thd mutex is not expected to be
contended, this counter is mainly for sanity check / debugging.
---
 .../internal/background_thread_structs.h      |  1 +
 include/jemalloc/internal/mutex.h             | 53 +++++++++++++++----
 include/jemalloc/internal/mutex_prof.h        |  1 +
 src/background_thread.c                       |  8 ++-
 src/ctl.c                                     |  3 ++
 5 files changed, 55 insertions(+), 11 deletions(-)

diff --git a/include/jemalloc/internal/background_thread_structs.h b/include/jemalloc/internal/background_thread_structs.h
index c02aa434..249115c3 100644
--- a/include/jemalloc/internal/background_thread_structs.h
+++ b/include/jemalloc/internal/background_thread_structs.h
@@ -48,6 +48,7 @@ struct background_thread_stats_s {
 	size_t num_threads;
 	uint64_t num_runs;
 	nstime_t run_interval;
+	mutex_prof_data_t max_counter_per_bg_thd;
 };
 typedef struct background_thread_stats_s background_thread_stats_t;
 
diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index 7c24f072..f5b1163a 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -245,22 +245,25 @@ malloc_mutex_assert_not_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 	witness_assert_not_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
 }
 
-/* Copy the prof data from mutex for processing. */
 static inline void
-malloc_mutex_prof_read(tsdn_t *tsdn, mutex_prof_data_t *data,
-    malloc_mutex_t *mutex) {
-	mutex_prof_data_t *source = &mutex->prof_data;
-	/* Can only read holding the mutex. */
-	malloc_mutex_assert_owner(tsdn, mutex);
-
+malloc_mutex_prof_copy(mutex_prof_data_t *dst, mutex_prof_data_t *source) {
 	/*
 	 * Not *really* allowed (we shouldn't be doing non-atomic loads of
 	 * atomic data), but the mutex protection makes this safe, and writing
 	 * a member-for-member copy is tedious for this situation.
 	 */
-	*data = *source;
+	*dst = *source;
 	/* n_wait_thds is not reported (modified w/o locking). */
-	atomic_store_u32(&data->n_waiting_thds, 0, ATOMIC_RELAXED);
+	atomic_store_u32(&dst->n_waiting_thds, 0, ATOMIC_RELAXED);
+}
+
+/* Copy the prof data from mutex for processing. */
+static inline void
+malloc_mutex_prof_read(tsdn_t *tsdn, mutex_prof_data_t *data,
+    malloc_mutex_t *mutex) {
+	/* Can only read holding the mutex. */
+	malloc_mutex_assert_owner(tsdn, mutex);
+	malloc_mutex_prof_copy(data, &mutex->prof_data);
 }
 
 static inline void
@@ -285,4 +288,36 @@ malloc_mutex_prof_accum(tsdn_t *tsdn, mutex_prof_data_t *data,
 	data->n_lock_ops += source->n_lock_ops;
 }
 
+/* Compare the prof data and update to the maximum. */
+static inline void
+malloc_mutex_prof_max_update(tsdn_t *tsdn, mutex_prof_data_t *data,
+    malloc_mutex_t *mutex) {
+	mutex_prof_data_t *source = &mutex->prof_data;
+	/* Can only read holding the mutex. */
+	malloc_mutex_assert_owner(tsdn, mutex);
+
+	if (nstime_compare(&source->tot_wait_time, &data->tot_wait_time) > 0) {
+		nstime_copy(&data->tot_wait_time, &source->tot_wait_time);
+	}
+	if (nstime_compare(&source->max_wait_time, &data->max_wait_time) > 0) {
+		nstime_copy(&data->max_wait_time, &source->max_wait_time);
+	}
+	if (source->n_wait_times > data->n_wait_times) {
+		data->n_wait_times = source->n_wait_times;
+	}
+	if (source->n_spin_acquired > data->n_spin_acquired) {
+		data->n_spin_acquired = source->n_spin_acquired;
+	}
+	if (source->max_n_thds > data->max_n_thds) {
+		data->max_n_thds = source->max_n_thds;
+	}
+	if (source->n_owner_switches > data->n_owner_switches) {
+		data->n_owner_switches = source->n_owner_switches;
+	}
+	if (source->n_lock_ops > data->n_lock_ops) {
+		data->n_lock_ops = source->n_lock_ops;
+	}
+	/* n_wait_thds is not reported. */
+}
+
 #endif /* JEMALLOC_INTERNAL_MUTEX_H */
diff --git a/include/jemalloc/internal/mutex_prof.h b/include/jemalloc/internal/mutex_prof.h
index 6288ede5..190402e6 100644
--- a/include/jemalloc/internal/mutex_prof.h
+++ b/include/jemalloc/internal/mutex_prof.h
@@ -7,6 +7,7 @@
 
 #define MUTEX_PROF_GLOBAL_MUTEXES					\
     OP(background_thread)						\
+    OP(max_per_bg_thd)							\
     OP(ctl)								\
     OP(prof)								\
     OP(prof_thds_data)							\
diff --git a/src/background_thread.c b/src/background_thread.c
index 57b9b256..bea445f2 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -794,9 +794,11 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
 		return true;
 	}
 
-	stats->num_threads = n_background_threads;
-	uint64_t num_runs = 0;
 	nstime_init(&stats->run_interval, 0);
+	memset(&stats->max_counter_per_bg_thd, 0, sizeof(mutex_prof_data_t));
+
+	uint64_t num_runs = 0;
+	stats->num_threads = n_background_threads;
 	for (unsigned i = 0; i < max_background_threads; i++) {
 		background_thread_info_t *info = &background_thread_info[i];
 		if (malloc_mutex_trylock(tsdn, &info->mtx)) {
@@ -809,6 +811,8 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
 		if (info->state != background_thread_stopped) {
 			num_runs += info->tot_n_runs;
 			nstime_add(&stats->run_interval, &info->tot_sleep_time);
+			malloc_mutex_prof_max_update(tsdn,
+			    &stats->max_counter_per_bg_thd, &info->mtx);
 		}
 		malloc_mutex_unlock(tsdn, &info->mtx);
 	}
diff --git a/src/ctl.c b/src/ctl.c
index 0beef6e0..3ec6ca24 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1042,6 +1042,9 @@ ctl_background_thread_stats_read(tsdn_t *tsdn) {
 		memset(stats, 0, sizeof(background_thread_stats_t));
 		nstime_init(&stats->run_interval, 0);
 	}
+	malloc_mutex_prof_copy(
+	    &ctl_stats->mutex_prof_data[global_prof_mutex_max_per_bg_thd],
+	    &stats->max_counter_per_bg_thd);
 }
 
 static void

From ac5185f73e4dc6b8d9a48b7080d07b11ef231765 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 23 Aug 2019 16:06:50 -0700
Subject: [PATCH 1369/2608] Fix tcache bin stack alignment.

Set the proper alignment when allocating space for the tcache bin stack.
---
 src/tcache.c | 29 ++++++++++++++++++-----------
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index 5dc2b0ad..e17b67a3 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -492,8 +492,16 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
 
 static size_t
 tcache_bin_stack_alignment (size_t size) {
+	/*
+	 * 1) Align to at least PAGE, to minimize the # of TLBs needed by the
+	 * smaller sizes; also helps if the larger sizes don't get used at all.
+	 * 2) On 32-bit the pointers won't be compressed; use minimal alignment.
+	 */
+	if (LG_SIZEOF_PTR < 3 || size < PAGE) {
+		return PAGE;
+	}
 	/* Align pow2 to avoid overflow the cache bin compressed pointers. */
-	return (LG_SIZEOF_PTR == 3) ? pow2_ceil_zu(size) : CACHELINE;
+	return pow2_ceil_zu(size);
 }
 
 /* Initialize auto tcache (embedded in TSD). */
@@ -501,11 +509,11 @@ bool
 tsd_tcache_data_init(tsd_t *tsd) {
 	tcache_t *tcache = tsd_tcachep_get_unsafe(tsd);
 	assert(tcache_small_bin_get(tcache, 0)->cur_ptr.ptr == NULL);
-	/* Avoid false cacheline sharing. */
-	size_t size = sz_sa2u(total_stack_bytes, CACHELINE);
-	void *avail_array = ipallocztm(tsd_tsdn(tsd), size,
-	    tcache_bin_stack_alignment(size), true, NULL, true,
-	    arena_get(TSDN_NULL, 0, true));
+	size_t alignment = tcache_bin_stack_alignment(total_stack_bytes);
+	size_t size = sz_sa2u(total_stack_bytes, alignment);
+
+	void *avail_array = ipallocztm(tsd_tsdn(tsd), size, alignment, true,
+	    NULL, true, arena_get(TSDN_NULL, 0, true));
 	if (avail_array == NULL) {
 		return true;
 	}
@@ -545,12 +553,11 @@ tcache_create_explicit(tsd_t *tsd) {
 	size = PTR_CEILING(size);
 	size_t stack_offset = size;
 	size += total_stack_bytes;
-	/* Avoid false cacheline sharing. */
-	size = sz_sa2u(size, CACHELINE);
+	size_t alignment = tcache_bin_stack_alignment(size);
+	size = sz_sa2u(size, alignment);
 
-	tcache_t *tcache = ipallocztm(tsd_tsdn(tsd), size,
-	    tcache_bin_stack_alignment(size), true, NULL, true,
-	    arena_get(TSDN_NULL, 0, true));
+	tcache_t *tcache = ipallocztm(tsd_tsdn(tsd), size, alignment, true,
+	    NULL, true, arena_get(TSDN_NULL, 0, true));
 	if (tcache == NULL) {
 		return NULL;
 	}

From d1be488cd8ceab285b93265ae70a258779ab8310 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 11 Sep 2019 10:31:30 -0700
Subject: [PATCH 1370/2608] Add --with-lg-page=16 to CI.

---
 .travis.yml              | 27 +++++++++++++++++++++++++++
 scripts/gen_run_tests.py |  1 +
 scripts/gen_travis.py    |  1 +
 3 files changed, 29 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index 2da5da8e..777aa3ec 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -25,6 +25,8 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
@@ -45,6 +47,8 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: osx
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
@@ -60,6 +64,8 @@ matrix:
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
@@ -83,6 +89,9 @@ matrix:
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
       addons: *gcc_multilib
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      addons: *gcc_multilib
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
       addons: *gcc_multilib
@@ -103,6 +112,8 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
@@ -117,6 +128,8 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
@@ -129,6 +142,8 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
@@ -139,6 +154,8 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
@@ -147,6 +164,8 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
@@ -155,6 +174,14 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
diff --git a/scripts/gen_run_tests.py b/scripts/gen_run_tests.py
index 6875a495..77c2ce53 100755
--- a/scripts/gen_run_tests.py
+++ b/scripts/gen_run_tests.py
@@ -41,6 +41,7 @@ possible_config_opts = [
     '--enable-prof',
     '--disable-stats',
     '--enable-opt-safety-checks',
+    '--with-lg-page=16',
 ]
 if bits_64:
     possible_config_opts.append('--with-lg-vaddr=56')
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index f1478c62..b46bd001 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -47,6 +47,7 @@ configure_flag_unusuals = [
     '--disable-stats',
     '--disable-libdl',
     '--enable-opt-safety-checks',
+    '--with-lg-page=16',
 ]
 
 malloc_conf_unusuals = [

From e7cf84a8dd19af5957f2542934180fe95fdb0885 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Fri, 20 Sep 2019 16:36:42 -0700
Subject: [PATCH 1371/2608] Rearrange slab data and constants

The constants logically belong in the sc module. The slab data bitmap isn't
really scoped to an arena; move it to its own module.
---
 include/jemalloc/internal/arena_structs_a.h          | 11 -----------
 include/jemalloc/internal/arena_types.h              |  5 -----
 include/jemalloc/internal/bitmap.h                   |  5 ++---
 include/jemalloc/internal/extent_inlines.h           |  4 ++--
 include/jemalloc/internal/extent_structs.h           |  5 +++--
 .../jemalloc/internal/jemalloc_internal_includes.h   |  1 -
 include/jemalloc/internal/sc.h                       |  5 +++++
 include/jemalloc/internal/slab_data.h                | 12 ++++++++++++
 src/arena.c                                          | 10 +++++-----
 9 files changed, 29 insertions(+), 29 deletions(-)
 delete mode 100644 include/jemalloc/internal/arena_structs_a.h
 create mode 100644 include/jemalloc/internal/slab_data.h

diff --git a/include/jemalloc/internal/arena_structs_a.h b/include/jemalloc/internal/arena_structs_a.h
deleted file mode 100644
index 46aa77c8..00000000
--- a/include/jemalloc/internal/arena_structs_a.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_ARENA_STRUCTS_A_H
-#define JEMALLOC_INTERNAL_ARENA_STRUCTS_A_H
-
-#include "jemalloc/internal/bitmap.h"
-
-struct arena_slab_data_s {
-	/* Per region allocated/deallocated bitmap. */
-	bitmap_t	bitmap[BITMAP_GROUPS_MAX];
-};
-
-#endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_A_H */
diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h
index 624937e4..369dff06 100644
--- a/include/jemalloc/internal/arena_types.h
+++ b/include/jemalloc/internal/arena_types.h
@@ -3,17 +3,12 @@
 
 #include "jemalloc/internal/sc.h"
 
-/* Maximum number of regions in one slab. */
-#define LG_SLAB_MAXREGS		(LG_PAGE - SC_LG_TINY_MIN)
-#define SLAB_MAXREGS		(1U << LG_SLAB_MAXREGS)
-
 /* Default decay times in milliseconds. */
 #define DIRTY_DECAY_MS_DEFAULT	ZD(10 * 1000)
 #define MUZZY_DECAY_MS_DEFAULT	(0)
 /* Number of event ticks between time checks. */
 #define DECAY_NTICKS_PER_UPDATE	1000
 
-typedef struct arena_slab_data_s arena_slab_data_t;
 typedef struct arena_decay_s arena_decay_t;
 typedef struct arena_s arena_t;
 typedef struct arena_tdata_s arena_tdata_t;
diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h
index c3f9cb49..f7152a6a 100644
--- a/include/jemalloc/internal/bitmap.h
+++ b/include/jemalloc/internal/bitmap.h
@@ -1,7 +1,6 @@
 #ifndef JEMALLOC_INTERNAL_BITMAP_H
 #define JEMALLOC_INTERNAL_BITMAP_H
 
-#include "jemalloc/internal/arena_types.h"
 #include "jemalloc/internal/bit_util.h"
 #include "jemalloc/internal/sc.h"
 
@@ -9,9 +8,9 @@ typedef unsigned long bitmap_t;
 #define LG_SIZEOF_BITMAP	LG_SIZEOF_LONG
 
 /* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */
-#if LG_SLAB_MAXREGS > LG_CEIL(SC_NSIZES)
+#if SC_LG_SLAB_MAXREGS > LG_CEIL(SC_NSIZES)
 /* Maximum bitmap bit count is determined by maximum regions per slab. */
-#  define LG_BITMAP_MAXBITS	LG_SLAB_MAXREGS
+#  define LG_BITMAP_MAXBITS	SC_LG_SLAB_MAXREGS
 #else
 /* Maximum bitmap bit count is determined by number of extent size classes. */
 #  define LG_BITMAP_MAXBITS	LG_CEIL(SC_NSIZES)
diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index 77fa4c4a..97dca048 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -168,13 +168,13 @@ extent_past_get(const extent_t *extent) {
 	    extent_size_get(extent));
 }
 
-static inline arena_slab_data_t *
+static inline slab_data_t *
 extent_slab_data_get(extent_t *extent) {
 	assert(extent_slab_get(extent));
 	return &extent->e_slab_data;
 }
 
-static inline const arena_slab_data_t *
+static inline const slab_data_t *
 extent_slab_data_get_const(const extent_t *extent) {
 	assert(extent_slab_get(extent));
 	return &extent->e_slab_data;
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index 767cd893..827bd3b4 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -8,6 +8,7 @@
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/ph.h"
 #include "jemalloc/internal/sc.h"
+#include "jemalloc/internal/slab_data.h"
 
 typedef enum {
 	extent_state_active   = 0,
@@ -120,7 +121,7 @@ struct extent_s {
 #define EXTENT_BITS_SZIND_SHIFT  (EXTENT_BITS_STATE_WIDTH + EXTENT_BITS_STATE_SHIFT)
 #define EXTENT_BITS_SZIND_MASK  MASK(EXTENT_BITS_SZIND_WIDTH, EXTENT_BITS_SZIND_SHIFT)
 
-#define EXTENT_BITS_NFREE_WIDTH  (LG_SLAB_MAXREGS + 1)
+#define EXTENT_BITS_NFREE_WIDTH  (SC_LG_SLAB_MAXREGS + 1)
 #define EXTENT_BITS_NFREE_SHIFT  (EXTENT_BITS_SZIND_WIDTH + EXTENT_BITS_SZIND_SHIFT)
 #define EXTENT_BITS_NFREE_MASK  MASK(EXTENT_BITS_NFREE_WIDTH, EXTENT_BITS_NFREE_SHIFT)
 
@@ -170,7 +171,7 @@ struct extent_s {
 
 	union {
 		/* Small region slab metadata. */
-		arena_slab_data_t	e_slab_data;
+		slab_data_t	e_slab_data;
 
 		/* Profiling data, used for large objects. */
 		struct {
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index 437eaa40..cb76a5e8 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -50,7 +50,6 @@
 /* STRUCTS */
 /******************************************************************************/
 
-#include "jemalloc/internal/arena_structs_a.h"
 #include "jemalloc/internal/extent_structs.h"
 #include "jemalloc/internal/base_structs.h"
 #include "jemalloc/internal/prof_structs.h"
diff --git a/include/jemalloc/internal/sc.h b/include/jemalloc/internal/sc.h
index 9a099d8b..a6341a3d 100644
--- a/include/jemalloc/internal/sc.h
+++ b/include/jemalloc/internal/sc.h
@@ -264,6 +264,11 @@
 /* The largest size class supported. */
 #define SC_LARGE_MAXCLASS (SC_MAX_BASE + (SC_NGROUP - 1) * SC_MAX_DELTA)
 
+/* Maximum number of regions in one slab. */
+#define SC_LG_SLAB_MAXREGS (LG_PAGE - SC_LG_TINY_MIN)
+#define SC_SLAB_MAXREGS (1U << LG_SLAB_MAXREGS)
+
+
 typedef struct sc_s sc_t;
 struct sc_s {
 	/* Size class index, or -1 if not a valid size class. */
diff --git a/include/jemalloc/internal/slab_data.h b/include/jemalloc/internal/slab_data.h
new file mode 100644
index 00000000..e821863d
--- /dev/null
+++ b/include/jemalloc/internal/slab_data.h
@@ -0,0 +1,12 @@
+#ifndef JEMALLOC_INTERNAL_SLAB_DATA_H
+#define JEMALLOC_INTERNAL_SLAB_DATA_H
+
+#include "jemalloc/internal/bitmap.h"
+
+typedef struct slab_data_s slab_data_t;
+struct slab_data_s {
+	/* Per region allocated/deallocated bitmap. */
+	bitmap_t bitmap[BITMAP_GROUPS_MAX];
+};
+
+#endif /* JEMALLOC_INTERNAL_SLAB_DATA_H */
diff --git a/src/arena.c b/src/arena.c
index aa707f43..1a3cf7b9 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -270,7 +270,7 @@ arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
 static void *
 arena_slab_reg_alloc(extent_t *slab, const bin_info_t *bin_info) {
 	void *ret;
-	arena_slab_data_t *slab_data = extent_slab_data_get(slab);
+	slab_data_t *slab_data = extent_slab_data_get(slab);
 	size_t regind;
 
 	assert(extent_nfree_get(slab) > 0);
@@ -286,7 +286,7 @@ arena_slab_reg_alloc(extent_t *slab, const bin_info_t *bin_info) {
 static void
 arena_slab_reg_alloc_batch(extent_t *slab, const bin_info_t *bin_info,
 			   unsigned cnt, void** ptrs) {
-	arena_slab_data_t *slab_data = extent_slab_data_get(slab);
+	slab_data_t *slab_data = extent_slab_data_get(slab);
 
 	assert(extent_nfree_get(slab) >= cnt);
 	assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info));
@@ -356,7 +356,7 @@ arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr) {
 }
 
 static void
-arena_slab_reg_dalloc(extent_t *slab, arena_slab_data_t *slab_data, void *ptr) {
+arena_slab_reg_dalloc(extent_t *slab, slab_data_t *slab_data, void *ptr) {
 	szind_t binind = extent_szind_get(slab);
 	const bin_info_t *bin_info = &bin_infos[binind];
 	size_t regind = arena_slab_regind(slab, binind, ptr);
@@ -1253,7 +1253,7 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	assert(extent_slab_get(slab));
 
 	/* Initialize slab internals. */
-	arena_slab_data_t *slab_data = extent_slab_data_get(slab);
+	slab_data_t *slab_data = extent_slab_data_get(slab);
 	extent_nfree_binshard_set(slab, bin_info->nregs, binshard);
 	bitmap_init(slab_data->bitmap, &bin_info->bitmap_info, false);
 
@@ -1686,7 +1686,7 @@ arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
 static void
 arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
     szind_t binind, extent_t *slab, void *ptr, bool junked) {
-	arena_slab_data_t *slab_data = extent_slab_data_get(slab);
+	slab_data_t *slab_data = extent_slab_data_get(slab);
 	const bin_info_t *bin_info = &bin_infos[binind];
 
 	if (!junked && config_fill && unlikely(opt_junk_free)) {

From 529cfe2abc7d10272c218a2b9047a85a49a9cd2a Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Fri, 20 Sep 2019 16:43:54 -0700
Subject: [PATCH 1372/2608] Arena: rename arena_structs_b.h -> arena_structs.h

arena_structs_a.h was removed in the previous commit.
---
 .../jemalloc/internal/{arena_structs_b.h => arena_structs.h}    | 0
 include/jemalloc/internal/jemalloc_internal_includes.h          | 2 +-
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename include/jemalloc/internal/{arena_structs_b.h => arena_structs.h} (100%)

diff --git a/include/jemalloc/internal/arena_structs_b.h b/include/jemalloc/internal/arena_structs.h
similarity index 100%
rename from include/jemalloc/internal/arena_structs_b.h
rename to include/jemalloc/internal/arena_structs.h
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index cb76a5e8..55fcf3eb 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -53,7 +53,7 @@
 #include "jemalloc/internal/extent_structs.h"
 #include "jemalloc/internal/base_structs.h"
 #include "jemalloc/internal/prof_structs.h"
-#include "jemalloc/internal/arena_structs_b.h"
+#include "jemalloc/internal/arena_structs.h"
 #include "jemalloc/internal/tcache_structs.h"
 #include "jemalloc/internal/background_thread_structs.h"
 

From 41187bdfb024dcadcb0c279572dd6440084655f3 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Fri, 20 Sep 2019 18:20:22 -0700
Subject: [PATCH 1373/2608] Extents: Break extent-struct/arena interactions

Specifically, the extent_arena_[g|s]et functions and the address randomization.

These are the only things that tie the extent struct itself to the arena code.
---
 include/jemalloc/internal/arena_inlines_b.h |  3 +-
 include/jemalloc/internal/extent_inlines.h  | 43 ++----------
 src/arena.c                                 | 10 ++-
 src/ctl.c                                   |  3 +-
 src/extent.c                                | 72 +++++++++++++++------
 src/extent_dss.c                            |  9 +--
 src/large.c                                 | 30 ++++++---
 test/unit/rtree.c                           | 12 ++--
 test/unit/slab.c                            |  7 +-
 9 files changed, 106 insertions(+), 83 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index dd926575..917a4916 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -178,7 +178,8 @@ arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
 
 JEMALLOC_ALWAYS_INLINE arena_t *
 arena_aalloc(tsdn_t *tsdn, const void *ptr) {
-	return extent_arena_get(iealloc(tsdn, ptr));
+	return (arena_t *)atomic_load_p(&arenas[extent_arena_ind_get(
+	    iealloc(tsdn, ptr))], ATOMIC_RELAXED);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index 97dca048..95be0848 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -44,13 +44,6 @@ extent_arena_ind_get(const extent_t *extent) {
 	return arena_ind;
 }
 
-static inline arena_t *
-extent_arena_get(const extent_t *extent) {
-	unsigned arena_ind = extent_arena_ind_get(extent);
-
-	return (arena_t *)atomic_load_p(&arenas[arena_ind], ATOMIC_ACQUIRE);
-}
-
 static inline szind_t
 extent_szind_get_maybe_invalid(const extent_t *extent) {
 	szind_t szind = (szind_t)((extent->e_bits & EXTENT_BITS_SZIND_MASK) >>
@@ -192,9 +185,7 @@ extent_prof_alloc_time_get(const extent_t *extent) {
 }
 
 static inline void
-extent_arena_set(extent_t *extent, arena_t *arena) {
-	unsigned arena_ind = (arena != NULL) ? arena_ind_get(arena) : ((1U <<
-	    MALLOCX_ARENA_BITS) - 1);
+extent_arena_ind_set(extent_t *extent, unsigned arena_ind) {
 	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_ARENA_MASK) |
 	    ((uint64_t)arena_ind << EXTENT_BITS_ARENA_SHIFT);
 }
@@ -212,32 +203,6 @@ extent_addr_set(extent_t *extent, void *addr) {
 	extent->e_addr = addr;
 }
 
-static inline void
-extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment) {
-	assert(extent_base_get(extent) == extent_addr_get(extent));
-
-	if (alignment < PAGE) {
-		unsigned lg_range = LG_PAGE -
-		    lg_floor(CACHELINE_CEILING(alignment));
-		size_t r;
-		if (!tsdn_null(tsdn)) {
-			tsd_t *tsd = tsdn_tsd(tsdn);
-			r = (size_t)prng_lg_range_u64(
-			    tsd_offset_statep_get(tsd), lg_range);
-		} else {
-			r = prng_lg_range_zu(
-			    &extent_arena_get(extent)->offset_state,
-			    lg_range, true);
-		}
-		uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE -
-		    lg_range);
-		extent->e_addr = (void *)((uintptr_t)extent->e_addr +
-		    random_offset);
-		assert(ALIGNMENT_ADDR2BASE(extent->e_addr, alignment) ==
-		    extent->e_addr);
-	}
-}
-
 static inline void
 extent_size_set(extent_t *extent, size_t size) {
 	assert((size & ~EXTENT_SIZE_MASK) == 0);
@@ -364,12 +329,12 @@ extent_is_head_set(extent_t *extent, bool is_head) {
 }
 
 static inline void
-extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
+extent_init(extent_t *extent, unsigned arena_ind, void *addr, size_t size,
     bool slab, szind_t szind, size_t sn, extent_state_t state, bool zeroed,
     bool committed, bool dumpable, extent_head_state_t is_head) {
 	assert(addr == PAGE_ADDR2BASE(addr) || !slab);
 
-	extent_arena_set(extent, arena);
+	extent_arena_ind_set(extent, arena_ind);
 	extent_addr_set(extent, addr);
 	extent_size_set(extent, size);
 	extent_slab_set(extent, slab);
@@ -391,7 +356,7 @@ extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size,
 
 static inline void
 extent_binit(extent_t *extent, void *addr, size_t bsize, size_t sn) {
-	extent_arena_set(extent, NULL);
+	extent_arena_ind_set(extent, (1U << MALLOCX_ARENA_BITS) - 1);
 	extent_addr_set(extent, addr);
 	extent_bsize_set(extent, bsize);
 	extent_slab_set(extent, false);
diff --git a/src/arena.c b/src/arena.c
index 1a3cf7b9..231d6681 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1566,7 +1566,8 @@ arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize) {
 
 	extent_t *extent = rtree_extent_read(tsdn, &extents_rtree, rtree_ctx,
 	    (uintptr_t)ptr, true);
-	arena_t *arena = extent_arena_get(extent);
+	arena_t *arena = atomic_load_p(&arenas[extent_arena_ind_get(extent)],
+	    ATOMIC_RELAXED);
 
 	szind_t szind = sz_size2index(usize);
 	extent_szind_set(extent, szind);
@@ -1731,7 +1732,8 @@ arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr) {
 void
 arena_dalloc_small(tsdn_t *tsdn, void *ptr) {
 	extent_t *extent = iealloc(tsdn, ptr);
-	arena_t *arena = extent_arena_get(extent);
+	arena_t *arena = atomic_load_p(&arenas[extent_arena_ind_get(extent)],
+	    ATOMIC_RELAXED);
 
 	arena_dalloc_bin(tsdn, arena, extent, ptr);
 	arena_decay_tick(tsdn, arena);
@@ -1767,7 +1769,9 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 			goto done;
 		}
 
-		arena_decay_tick(tsdn, extent_arena_get(extent));
+		arena_t *arena = atomic_load_p(
+		    &arenas[extent_arena_ind_get(extent)], ATOMIC_RELAXED);
+		arena_decay_tick(tsdn, arena);
 		ret = false;
 	} else if (oldsize >= SC_LARGE_MINCLASS
 	    && usize_max >= SC_LARGE_MINCLASS) {
diff --git a/src/ctl.c b/src/ctl.c
index 3ec6ca24..2be2f328 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2612,7 +2612,8 @@ arenas_lookup_ctl(tsd_t *tsd, const size_t *mib,
 	if (extent == NULL)
 		goto label_return;
 
-	arena = extent_arena_get(extent);
+	arena = atomic_load_p(&arenas[extent_arena_ind_get(extent)],
+	    ATOMIC_RELAXED);
 	if (arena == NULL)
 		goto label_return;
 
diff --git a/src/extent.c b/src/extent.c
index 9237f903..aac54556 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -176,6 +176,32 @@ extent_lock_from_addr(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, void *addr,
 	return ret;
 }
 
+static void
+extent_addr_randomize(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+    size_t alignment) {
+	assert(extent_base_get(extent) == extent_addr_get(extent));
+
+	if (alignment < PAGE) {
+		unsigned lg_range = LG_PAGE -
+		    lg_floor(CACHELINE_CEILING(alignment));
+		size_t r;
+		if (!tsdn_null(tsdn)) {
+			tsd_t *tsd = tsdn_tsd(tsdn);
+			r = (size_t)prng_lg_range_u64(
+			    tsd_offset_statep_get(tsd), lg_range);
+		} else {
+			r = prng_lg_range_zu(&arena->offset_state, lg_range,
+			    true);
+		}
+		uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE -
+		    lg_range);
+		extent->e_addr = (void *)((uintptr_t)extent->e_addr +
+		    random_offset);
+		assert(ALIGNMENT_ADDR2BASE(extent->e_addr, alignment) ==
+		    extent->e_addr);
+	}
+}
+
 extent_t *
 extent_alloc(tsdn_t *tsdn, arena_t *arena) {
 	malloc_mutex_lock(tsdn, &arena->extent_avail_mtx);
@@ -671,7 +697,7 @@ extents_postfork_child(tsdn_t *tsdn, extents_t *extents) {
 static void
 extent_deactivate_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
     extent_t *extent) {
-	assert(extent_arena_get(extent) == arena);
+	assert(extent_arena_ind_get(extent) == arena_ind_get(arena));
 	assert(extent_state_get(extent) == extent_state_active);
 
 	extent_state_set(extent, extents_state_get(extents));
@@ -689,7 +715,7 @@ extent_deactivate(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 static void
 extent_activate_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
     extent_t *extent) {
-	assert(extent_arena_get(extent) == arena);
+	assert(extent_arena_ind_get(extent) == arena_ind_get(arena));
 	assert(extent_state_get(extent) == extents_state_get(extents));
 
 	extents_remove_locked(tsdn, extents, extent);
@@ -927,7 +953,8 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
 			 */
 			extent_t *unlock_extent = extent;
 			assert(extent_base_get(extent) == new_addr);
-			if (extent_arena_get(extent) != arena ||
+			if (extent_arena_ind_get(extent)
+			    != arena_ind_get(arena) ||
 			    extent_size_get(extent) < esize ||
 			    extent_state_get(extent) !=
 			    extents_state_get(extents)) {
@@ -1172,7 +1199,7 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	}
 
 	if (pad != 0) {
-		extent_addr_randomize(tsdn, extent, alignment);
+		extent_addr_randomize(tsdn, arena, extent, alignment);
 	}
 	assert(extent_state_get(extent) == extent_state_active);
 	if (slab) {
@@ -1342,8 +1369,8 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 		extent_hook_post_reentrancy(tsdn);
 	}
 
-	extent_init(extent, arena, ptr, alloc_size, false, SC_NSIZES,
-	    arena_extent_sn_next(arena), extent_state_active, zeroed,
+	extent_init(extent, arena_ind_get(arena), ptr, alloc_size, false,
+	    SC_NSIZES, arena_extent_sn_next(arena), extent_state_active, zeroed,
 	    committed, true, EXTENT_IS_HEAD);
 	if (ptr == NULL) {
 		extent_dalloc(tsdn, arena, extent);
@@ -1434,7 +1461,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 		extent_gdump_add(tsdn, extent);
 	}
 	if (pad != 0) {
-		extent_addr_randomize(tsdn, extent, alignment);
+		extent_addr_randomize(tsdn, arena, extent, alignment);
 	}
 	if (slab) {
 		rtree_ctx_t rtree_ctx_fallback;
@@ -1513,11 +1540,11 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 		extent_dalloc(tsdn, arena, extent);
 		return NULL;
 	}
-	extent_init(extent, arena, addr, esize, slab, szind,
+	extent_init(extent, arena_ind_get(arena), addr, esize, slab, szind,
 	    arena_extent_sn_next(arena), extent_state_active, *zero, *commit,
 	    true, EXTENT_NOT_HEAD);
 	if (pad != 0) {
-		extent_addr_randomize(tsdn, extent, alignment);
+		extent_addr_randomize(tsdn, arena, extent, alignment);
 	}
 	if (extent_register(tsdn, extent)) {
 		extent_dalloc(tsdn, arena, extent);
@@ -1559,8 +1586,8 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 static bool
 extent_can_coalesce(arena_t *arena, extents_t *extents, const extent_t *inner,
     const extent_t *outer) {
-	assert(extent_arena_get(inner) == arena);
-	if (extent_arena_get(outer) != arena) {
+	assert(extent_arena_ind_get(inner) == arena_ind_get(arena));
+	if (extent_arena_ind_get(outer) != arena_ind_get(arena)) {
 		return false;
 	}
 
@@ -2105,11 +2132,11 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena,
 		goto label_error_a;
 	}
 
-	extent_init(trail, arena, (void *)((uintptr_t)extent_base_get(extent) +
-	    size_a), size_b, slab_b, szind_b, extent_sn_get(extent),
-	    extent_state_get(extent), extent_zeroed_get(extent),
-	    extent_committed_get(extent), extent_dumpable_get(extent),
-	    EXTENT_NOT_HEAD);
+	extent_init(trail, arena_ind_get(arena),
+	    (void *)((uintptr_t)extent_base_get(extent) + size_a), size_b,
+	    slab_b, szind_b, extent_sn_get(extent), extent_state_get(extent),
+	    extent_zeroed_get(extent), extent_committed_get(extent),
+	    extent_dumpable_get(extent), EXTENT_NOT_HEAD);
 
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
@@ -2117,7 +2144,8 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena,
 	{
 		extent_t lead;
 
-		extent_init(&lead, arena, extent_addr_get(extent), size_a,
+		extent_init(&lead, arena_ind_get(arena),
+		    extent_addr_get(extent), size_a,
 		    slab_a, szind_a, extent_sn_get(extent),
 		    extent_state_get(extent), extent_zeroed_get(extent),
 		    extent_committed_get(extent), extent_dumpable_get(extent),
@@ -2304,7 +2332,12 @@ extent_merge_impl(tsdn_t *tsdn, arena_t *arena,
 
 	extent_unlock2(tsdn, a, b);
 
-	extent_dalloc(tsdn, extent_arena_get(b), b);
+	/*
+	 * If we got here, we merged the extents; so they must be from the same
+	 * arena (i.e. this one).
+	 */
+	assert(extent_arena_ind_get(b) == arena_ind_get(arena));
+	extent_dalloc(tsdn, arena, b);
 
 	return false;
 }
@@ -2384,7 +2417,8 @@ extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
 	assert(*nfree <= *nregs);
 	assert(*nfree * extent_usize_get(extent) <= *size);
 
-	const arena_t *arena = extent_arena_get(extent);
+	const arena_t *arena = (arena_t *)atomic_load_p(
+	    &arenas[extent_arena_ind_get(extent)], ATOMIC_RELAXED);
 	assert(arena != NULL);
 	const unsigned binshard = extent_binshard_get(extent);
 	bin_t *bin = &arena->bins[szind].bin_shards[binshard];
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 85817891..eb074800 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -153,9 +153,9 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			size_t gap_size_page = (uintptr_t)ret -
 			    (uintptr_t)gap_addr_page;
 			if (gap_size_page != 0) {
-				extent_init(gap, arena, gap_addr_page,
-				    gap_size_page, false, SC_NSIZES,
-				    arena_extent_sn_next(arena),
+				extent_init(gap, arena_ind_get(arena),
+				    gap_addr_page, gap_size_page, false,
+				    SC_NSIZES, arena_extent_sn_next(arena),
 				    extent_state_active, false, true, true,
 				    EXTENT_NOT_HEAD);
 			}
@@ -198,7 +198,8 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					    EXTENT_HOOKS_INITIALIZER;
 					extent_t extent;
 
-					extent_init(&extent, arena, ret, size,
+					extent_init(&extent,
+					    arena_ind_get(arena), ret, size,
 					    size, false, SC_NSIZES,
 					    extent_state_active, false, true,
 					    true, EXTENT_NOT_HEAD);
diff --git a/src/large.c b/src/large.c
index 8e7a781d..a5c2f9ab 100644
--- a/src/large.c
+++ b/src/large.c
@@ -94,7 +94,8 @@ large_dalloc_maybe_junk_t *JET_MUTABLE large_dalloc_maybe_junk =
 
 static bool
 large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize) {
-	arena_t *arena = extent_arena_get(extent);
+	arena_t *arena = atomic_load_p(&arenas[extent_arena_ind_get(extent)],
+	    ATOMIC_RELAXED);
 	size_t oldusize = extent_usize_get(extent);
 	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
 	size_t diff = extent_size_get(extent) - (usize + sz_large_pad);
@@ -130,7 +131,8 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize) {
 static bool
 large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
     bool zero) {
-	arena_t *arena = extent_arena_get(extent);
+	arena_t *arena = atomic_load_p(&arenas[extent_arena_ind_get(extent)],
+	    ATOMIC_RELAXED);
 	size_t oldusize = extent_usize_get(extent);
 	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
 	size_t trailsize = usize - oldusize;
@@ -230,14 +232,18 @@ large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
 		/* Attempt to expand the allocation in-place. */
 		if (!large_ralloc_no_move_expand(tsdn, extent, usize_max,
 		    zero)) {
-			arena_decay_tick(tsdn, extent_arena_get(extent));
+			arena_decay_tick(tsdn,
+			    atomic_load_p(&arenas[extent_arena_ind_get(extent)],
+			    ATOMIC_RELAXED));
 			return false;
 		}
 		/* Try again, this time with usize_min. */
 		if (usize_min < usize_max && usize_min > oldusize &&
 		    large_ralloc_no_move_expand(tsdn, extent, usize_min,
 		    zero)) {
-			arena_decay_tick(tsdn, extent_arena_get(extent));
+			arena_decay_tick(tsdn, atomic_load_p(
+			    &arenas[extent_arena_ind_get(extent)],
+			    ATOMIC_RELAXED));
 			return false;
 		}
 	}
@@ -247,14 +253,17 @@ large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
 	 * the new size.
 	 */
 	if (oldusize >= usize_min && oldusize <= usize_max) {
-		arena_decay_tick(tsdn, extent_arena_get(extent));
+		arena_decay_tick(tsdn, atomic_load_p(
+		    &arenas[extent_arena_ind_get(extent)], ATOMIC_RELAXED));
 		return false;
 	}
 
 	/* Attempt to shrink the allocation in-place. */
 	if (oldusize > usize_max) {
 		if (!large_ralloc_no_move_shrink(tsdn, extent, usize_max)) {
-			arena_decay_tick(tsdn, extent_arena_get(extent));
+			arena_decay_tick(tsdn, atomic_load_p(
+			    &arenas[extent_arena_ind_get(extent)],
+			    ATOMIC_RELAXED));
 			return false;
 		}
 	}
@@ -348,17 +357,20 @@ large_dalloc_finish_impl(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
 
 void
 large_dalloc_prep_junked_locked(tsdn_t *tsdn, extent_t *extent) {
-	large_dalloc_prep_impl(tsdn, extent_arena_get(extent), extent, true);
+	large_dalloc_prep_impl(tsdn, atomic_load_p(
+	    &arenas[extent_arena_ind_get(extent)], ATOMIC_RELAXED), extent, true);
 }
 
 void
 large_dalloc_finish(tsdn_t *tsdn, extent_t *extent) {
-	large_dalloc_finish_impl(tsdn, extent_arena_get(extent), extent);
+	large_dalloc_finish_impl(tsdn, atomic_load_p(
+	    &arenas[extent_arena_ind_get(extent)], ATOMIC_RELAXED), extent);
 }
 
 void
 large_dalloc(tsdn_t *tsdn, extent_t *extent) {
-	arena_t *arena = extent_arena_get(extent);
+	arena_t *arena = atomic_load_p(
+	    &arenas[extent_arena_ind_get(extent)], ATOMIC_RELAXED);
 	large_dalloc_prep_impl(tsdn, arena, extent, false);
 	large_dalloc_finish_impl(tsdn, arena, extent);
 	arena_decay_tick(tsdn, arena);
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 90adca13..9105e3e5 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -2,6 +2,8 @@
 
 #include "jemalloc/internal/rtree.h"
 
+#define INVALID_ARENA_IND ((1U << MALLOCX_ARENA_BITS) - 1)
+
 rtree_node_alloc_t *rtree_node_alloc_orig;
 rtree_node_dalloc_t *rtree_node_dalloc_orig;
 rtree_leaf_alloc_t *rtree_leaf_alloc_orig;
@@ -85,10 +87,10 @@ TEST_END
 
 TEST_BEGIN(test_rtree_extrema) {
 	extent_t extent_a, extent_b;
-	extent_init(&extent_a, NULL, NULL, SC_LARGE_MINCLASS, false,
-	    sz_size2index(SC_LARGE_MINCLASS), 0,
+	extent_init(&extent_a, INVALID_ARENA_IND, NULL, SC_LARGE_MINCLASS,
+	    false, sz_size2index(SC_LARGE_MINCLASS), 0,
 	    extent_state_active, false, false, true, EXTENT_NOT_HEAD);
-	extent_init(&extent_b, NULL, NULL, 0, false, SC_NSIZES, 0,
+	extent_init(&extent_b, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, true, EXTENT_NOT_HEAD);
 
 	tsdn_t *tsdn = tsdn_fetch();
@@ -125,7 +127,7 @@ TEST_BEGIN(test_rtree_bits) {
 	    PAGE + (((uintptr_t)1) << LG_PAGE) - 1};
 
 	extent_t extent;
-	extent_init(&extent, NULL, NULL, 0, false, SC_NSIZES, 0,
+	extent_init(&extent, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, true, EXTENT_NOT_HEAD);
 
 	rtree_t *rtree = &test_rtree;
@@ -166,7 +168,7 @@ TEST_BEGIN(test_rtree_random) {
 	rtree_ctx_data_init(&rtree_ctx);
 
 	extent_t extent;
-	extent_init(&extent, NULL, NULL, 0, false, SC_NSIZES, 0,
+	extent_init(&extent, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, true, EXTENT_NOT_HEAD);
 
 	assert_false(rtree_new(rtree, false), "Unexpected rtree_new() failure");
diff --git a/test/unit/slab.c b/test/unit/slab.c
index c56af25f..bcc752e4 100644
--- a/test/unit/slab.c
+++ b/test/unit/slab.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#define INVALID_ARENA_IND ((1U << MALLOCX_ARENA_BITS) - 1)
+
 TEST_BEGIN(test_arena_slab_regind) {
 	szind_t binind;
 
@@ -7,8 +9,9 @@ TEST_BEGIN(test_arena_slab_regind) {
 		size_t regind;
 		extent_t slab;
 		const bin_info_t *bin_info = &bin_infos[binind];
-		extent_init(&slab, NULL, mallocx(bin_info->slab_size,
-		    MALLOCX_LG_ALIGN(LG_PAGE)), bin_info->slab_size, true,
+		extent_init(&slab, INVALID_ARENA_IND,
+		    mallocx(bin_info->slab_size, MALLOCX_LG_ALIGN(LG_PAGE)),
+		    bin_info->slab_size, true,
 		    binind, 0, extent_state_active, false, true, true,
 		    EXTENT_NOT_HEAD);
 		assert_ptr_not_null(extent_addr_get(&slab),

From 723ccc6c2757974112d31d254bcf74bf2beac6ec Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Fri, 20 Sep 2019 16:18:41 -0700
Subject: [PATCH 1374/2608] Extents: Split out extent struct.

---
 Makefile.in                                   |   1 +
 include/jemalloc/internal/base_structs.h      |   1 +
 include/jemalloc/internal/bin.h               |  45 +-
 include/jemalloc/internal/bin_info.h          |  50 ++
 include/jemalloc/internal/extent.h            | 626 ++++++++++++++++++
 include/jemalloc/internal/extent_inlines.h    | 428 ------------
 include/jemalloc/internal/extent_structs.h    | 177 -----
 include/jemalloc/internal/extent_types.h      |   6 -
 .../internal/jemalloc_internal_includes.h     |   3 +-
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |   1 +
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |   1 +
 src/bin.c                                     |  26 -
 src/bin_info.c                                |  30 +
 src/jemalloc.c                                |   6 +-
 14 files changed, 715 insertions(+), 686 deletions(-)
 create mode 100644 include/jemalloc/internal/bin_info.h
 create mode 100644 include/jemalloc/internal/extent.h
 create mode 100644 src/bin_info.c

diff --git a/Makefile.in b/Makefile.in
index 7584f598..62ae71f0 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -99,6 +99,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/background_thread.c \
 	$(srcroot)src/base.c \
 	$(srcroot)src/bin.c \
+	$(srcroot)src/bin_info.c \
 	$(srcroot)src/bitmap.c \
 	$(srcroot)src/ckh.c \
 	$(srcroot)src/ctl.c \
diff --git a/include/jemalloc/internal/base_structs.h b/include/jemalloc/internal/base_structs.h
index 07f214eb..cc0f9a57 100644
--- a/include/jemalloc/internal/base_structs.h
+++ b/include/jemalloc/internal/base_structs.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_BASE_STRUCTS_H
 #define JEMALLOC_INTERNAL_BASE_STRUCTS_H
 
+#include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/sc.h"
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index 8547e893..70250a40 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -3,6 +3,7 @@
 
 #include "jemalloc/internal/bin_stats.h"
 #include "jemalloc/internal/bin_types.h"
+#include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/extent_types.h"
 #include "jemalloc/internal/extent_structs.h"
 #include "jemalloc/internal/mutex.h"
@@ -12,49 +13,6 @@
  * A bin contains a set of extents that are currently being used for slab
  * allocations.
  */
-
-/*
- * Read-only information associated with each element of arena_t's bins array
- * is stored separately, partly to reduce memory usage (only one copy, rather
- * than one per arena), but mainly to avoid false cacheline sharing.
- *
- * Each slab has the following layout:
- *
- *   /--------------------\
- *   | region 0           |
- *   |--------------------|
- *   | region 1           |
- *   |--------------------|
- *   | ...                |
- *   | ...                |
- *   | ...                |
- *   |--------------------|
- *   | region nregs-1     |
- *   \--------------------/
- */
-typedef struct bin_info_s bin_info_t;
-struct bin_info_s {
-	/* Size of regions in a slab for this bin's size class. */
-	size_t			reg_size;
-
-	/* Total size of a slab for this bin's size class. */
-	size_t			slab_size;
-
-	/* Total number of regions in a slab for this bin's size class. */
-	uint32_t		nregs;
-
-	/* Number of sharded bins in each arena for this size class. */
-	uint32_t		n_shards;
-
-	/*
-	 * Metadata used to manipulate bitmaps for slabs associated with this
-	 * bin.
-	 */
-	bitmap_info_t		bitmap_info;
-};
-
-extern bin_info_t bin_infos[SC_NBINS];
-
 typedef struct bin_s bin_t;
 struct bin_s {
 	/* All operations on bin_t fields require lock ownership. */
@@ -92,7 +50,6 @@ struct bins_s {
 void bin_shard_sizes_boot(unsigned bin_shards[SC_NBINS]);
 bool bin_update_shard_size(unsigned bin_shards[SC_NBINS], size_t start_size,
     size_t end_size, size_t nshards);
-void bin_boot(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]);
 
 /* Initializes a bin to empty.  Returns true on error. */
 bool bin_init(bin_t *bin);
diff --git a/include/jemalloc/internal/bin_info.h b/include/jemalloc/internal/bin_info.h
new file mode 100644
index 00000000..7fe65c86
--- /dev/null
+++ b/include/jemalloc/internal/bin_info.h
@@ -0,0 +1,50 @@
+#ifndef JEMALLOC_INTERNAL_BIN_INFO_H
+#define JEMALLOC_INTERNAL_BIN_INFO_H
+
+#include "jemalloc/internal/bitmap.h"
+
+/*
+ * Read-only information associated with each element of arena_t's bins array
+ * is stored separately, partly to reduce memory usage (only one copy, rather
+ * than one per arena), but mainly to avoid false cacheline sharing.
+ *
+ * Each slab has the following layout:
+ *
+ *   /--------------------\
+ *   | region 0           |
+ *   |--------------------|
+ *   | region 1           |
+ *   |--------------------|
+ *   | ...                |
+ *   | ...                |
+ *   | ...                |
+ *   |--------------------|
+ *   | region nregs-1     |
+ *   \--------------------/
+ */
+typedef struct bin_info_s bin_info_t;
+struct bin_info_s {
+	/* Size of regions in a slab for this bin's size class. */
+	size_t			reg_size;
+
+	/* Total size of a slab for this bin's size class. */
+	size_t			slab_size;
+
+	/* Total number of regions in a slab for this bin's size class. */
+	uint32_t		nregs;
+
+	/* Number of sharded bins in each arena for this size class. */
+	uint32_t		n_shards;
+
+	/*
+	 * Metadata used to manipulate bitmaps for slabs associated with this
+	 * bin.
+	 */
+	bitmap_info_t		bitmap_info;
+};
+
+extern bin_info_t bin_infos[SC_NBINS];
+
+void bin_info_boot(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]);
+
+#endif /* JEMALLOC_INTERNAL_BIN_INFO_H */
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
new file mode 100644
index 00000000..92c34aec
--- /dev/null
+++ b/include/jemalloc/internal/extent.h
@@ -0,0 +1,626 @@
+#ifndef JEMALLOC_INTERNAL_EXTENT_H
+#define JEMALLOC_INTERNAL_EXTENT_H
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/bin_info.h"
+#include "jemalloc/internal/bit_util.h"
+#include "jemalloc/internal/nstime.h"
+#include "jemalloc/internal/ph.h"
+#include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/sc.h"
+#include "jemalloc/internal/slab_data.h"
+#include "jemalloc/internal/sz.h"
+
+enum extent_state_e {
+	extent_state_active   = 0,
+	extent_state_dirty    = 1,
+	extent_state_muzzy    = 2,
+	extent_state_retained = 3
+};
+typedef enum extent_state_e extent_state_t;
+
+enum extent_head_state_e {
+	EXTENT_NOT_HEAD,
+	EXTENT_IS_HEAD   /* Only relevant for Windows && opt.retain. */
+};
+typedef enum extent_head_state_e extent_head_state_t;
+
+/* Extent (span of pages).  Use accessor functions for e_* fields. */
+typedef struct extent_s extent_t;
+typedef ql_head(extent_t) extent_list_t;
+typedef ph(extent_t) extent_tree_t;
+typedef ph(extent_t) extent_heap_t;
+struct extent_s {
+	/*
+	 * Bitfield containing several fields:
+	 *
+	 * a: arena_ind
+	 * b: slab
+	 * c: committed
+	 * d: dumpable
+	 * z: zeroed
+	 * t: state
+	 * i: szind
+	 * f: nfree
+	 * s: bin_shard
+	 * n: sn
+	 *
+	 * nnnnnnnn ... nnnnnnss ssssffff ffffffii iiiiiitt zdcbaaaa aaaaaaaa
+	 *
+	 * arena_ind: Arena from which this extent came, or all 1 bits if
+	 *            unassociated.
+	 *
+	 * slab: The slab flag indicates whether the extent is used for a slab
+	 *       of small regions.  This helps differentiate small size classes,
+	 *       and it indicates whether interior pointers can be looked up via
+	 *       iealloc().
+	 *
+	 * committed: The committed flag indicates whether physical memory is
+	 *            committed to the extent, whether explicitly or implicitly
+	 *            as on a system that overcommits and satisfies physical
+	 *            memory needs on demand via soft page faults.
+	 *
+	 * dumpable: The dumpable flag indicates whether or not we've set the
+	 *           memory in question to be dumpable.  Note that this
+	 *           interacts somewhat subtly with user-specified extent hooks,
+	 *           since we don't know if *they* are fiddling with
+	 *           dumpability (in which case, we don't want to undo whatever
+	 *           they're doing).  To deal with this scenario, we:
+	 *             - Make dumpable false only for memory allocated with the
+	 *               default hooks.
+	 *             - Only allow memory to go from non-dumpable to dumpable,
+	 *               and only once.
+	 *             - Never make the OS call to allow dumping when the
+	 *               dumpable bit is already set.
+	 *           These three constraints mean that we will never
+	 *           accidentally dump user memory that the user meant to set
+	 *           nondumpable with their extent hooks.
+	 *
+	 *
+	 * zeroed: The zeroed flag is used by extent recycling code to track
+	 *         whether memory is zero-filled.
+	 *
+	 * state: The state flag is an extent_state_t.
+	 *
+	 * szind: The szind flag indicates usable size class index for
+	 *        allocations residing in this extent, regardless of whether the
+	 *        extent is a slab.  Extent size and usable size often differ
+	 *        even for non-slabs, either due to sz_large_pad or promotion of
+	 *        sampled small regions.
+	 *
+	 * nfree: Number of free regions in slab.
+	 *
+	 * bin_shard: the shard of the bin from which this extent came.
+	 *
+	 * sn: Serial number (potentially non-unique).
+	 *
+	 *     Serial numbers may wrap around if !opt_retain, but as long as
+	 *     comparison functions fall back on address comparison for equal
+	 *     serial numbers, stable (if imperfect) ordering is maintained.
+	 *
+	 *     Serial numbers may not be unique even in the absence of
+	 *     wrap-around, e.g. when splitting an extent and assigning the same
+	 *     serial number to both resulting adjacent extents.
+	 */
+	uint64_t		e_bits;
+#define MASK(CURRENT_FIELD_WIDTH, CURRENT_FIELD_SHIFT) ((((((uint64_t)0x1U) << (CURRENT_FIELD_WIDTH)) - 1)) << (CURRENT_FIELD_SHIFT))
+
+#define EXTENT_BITS_ARENA_WIDTH  MALLOCX_ARENA_BITS
+#define EXTENT_BITS_ARENA_SHIFT  0
+#define EXTENT_BITS_ARENA_MASK  MASK(EXTENT_BITS_ARENA_WIDTH, EXTENT_BITS_ARENA_SHIFT)
+
+#define EXTENT_BITS_SLAB_WIDTH  1
+#define EXTENT_BITS_SLAB_SHIFT  (EXTENT_BITS_ARENA_WIDTH + EXTENT_BITS_ARENA_SHIFT)
+#define EXTENT_BITS_SLAB_MASK  MASK(EXTENT_BITS_SLAB_WIDTH, EXTENT_BITS_SLAB_SHIFT)
+
+#define EXTENT_BITS_COMMITTED_WIDTH  1
+#define EXTENT_BITS_COMMITTED_SHIFT  (EXTENT_BITS_SLAB_WIDTH + EXTENT_BITS_SLAB_SHIFT)
+#define EXTENT_BITS_COMMITTED_MASK  MASK(EXTENT_BITS_COMMITTED_WIDTH, EXTENT_BITS_COMMITTED_SHIFT)
+
+#define EXTENT_BITS_DUMPABLE_WIDTH  1
+#define EXTENT_BITS_DUMPABLE_SHIFT  (EXTENT_BITS_COMMITTED_WIDTH + EXTENT_BITS_COMMITTED_SHIFT)
+#define EXTENT_BITS_DUMPABLE_MASK  MASK(EXTENT_BITS_DUMPABLE_WIDTH, EXTENT_BITS_DUMPABLE_SHIFT)
+
+#define EXTENT_BITS_ZEROED_WIDTH  1
+#define EXTENT_BITS_ZEROED_SHIFT  (EXTENT_BITS_DUMPABLE_WIDTH + EXTENT_BITS_DUMPABLE_SHIFT)
+#define EXTENT_BITS_ZEROED_MASK  MASK(EXTENT_BITS_ZEROED_WIDTH, EXTENT_BITS_ZEROED_SHIFT)
+
+#define EXTENT_BITS_STATE_WIDTH  2
+#define EXTENT_BITS_STATE_SHIFT  (EXTENT_BITS_ZEROED_WIDTH + EXTENT_BITS_ZEROED_SHIFT)
+#define EXTENT_BITS_STATE_MASK  MASK(EXTENT_BITS_STATE_WIDTH, EXTENT_BITS_STATE_SHIFT)
+
+#define EXTENT_BITS_SZIND_WIDTH  LG_CEIL(SC_NSIZES)
+#define EXTENT_BITS_SZIND_SHIFT  (EXTENT_BITS_STATE_WIDTH + EXTENT_BITS_STATE_SHIFT)
+#define EXTENT_BITS_SZIND_MASK  MASK(EXTENT_BITS_SZIND_WIDTH, EXTENT_BITS_SZIND_SHIFT)
+
+#define EXTENT_BITS_NFREE_WIDTH  (SC_LG_SLAB_MAXREGS + 1)
+#define EXTENT_BITS_NFREE_SHIFT  (EXTENT_BITS_SZIND_WIDTH + EXTENT_BITS_SZIND_SHIFT)
+#define EXTENT_BITS_NFREE_MASK  MASK(EXTENT_BITS_NFREE_WIDTH, EXTENT_BITS_NFREE_SHIFT)
+
+#define EXTENT_BITS_BINSHARD_WIDTH  6
+#define EXTENT_BITS_BINSHARD_SHIFT  (EXTENT_BITS_NFREE_WIDTH + EXTENT_BITS_NFREE_SHIFT)
+#define EXTENT_BITS_BINSHARD_MASK  MASK(EXTENT_BITS_BINSHARD_WIDTH, EXTENT_BITS_BINSHARD_SHIFT)
+
+#define EXTENT_BITS_IS_HEAD_WIDTH 1
+#define EXTENT_BITS_IS_HEAD_SHIFT  (EXTENT_BITS_BINSHARD_WIDTH + EXTENT_BITS_BINSHARD_SHIFT)
+#define EXTENT_BITS_IS_HEAD_MASK  MASK(EXTENT_BITS_IS_HEAD_WIDTH, EXTENT_BITS_IS_HEAD_SHIFT)
+
+#define EXTENT_BITS_SN_SHIFT   (EXTENT_BITS_IS_HEAD_WIDTH + EXTENT_BITS_IS_HEAD_SHIFT)
+#define EXTENT_BITS_SN_MASK  (UINT64_MAX << EXTENT_BITS_SN_SHIFT)
+
+	/* Pointer to the extent that this structure is responsible for. */
+	void			*e_addr;
+
+	union {
+		/*
+		 * Extent size and serial number associated with the extent
+		 * structure (different than the serial number for the extent at
+		 * e_addr).
+		 *
+		 * ssssssss [...] ssssssss ssssnnnn nnnnnnnn
+		 */
+		size_t			e_size_esn;
+	#define EXTENT_SIZE_MASK	((size_t)~(PAGE-1))
+	#define EXTENT_ESN_MASK		((size_t)PAGE-1)
+		/* Base extent size, which may not be a multiple of PAGE. */
+		size_t			e_bsize;
+	};
+
+	/*
+	 * List linkage, used by a variety of lists:
+	 * - bin_t's slabs_full
+	 * - extents_t's LRU
+	 * - stashed dirty extents
+	 * - arena's large allocations
+	 */
+	ql_elm(extent_t)	ql_link;
+
+	/*
+	 * Linkage for per size class sn/address-ordered heaps, and
+	 * for extent_avail
+	 */
+	phn(extent_t)		ph_link;
+
+	union {
+		/* Small region slab metadata. */
+		slab_data_t	e_slab_data;
+
+		/* Profiling data, used for large objects. */
+		struct {
+			/* Time when this was allocated. */
+			nstime_t		e_alloc_time;
+			/* Points to a prof_tctx_t. */
+			atomic_p_t		e_prof_tctx;
+		};
+	};
+};
+
+static inline unsigned
+extent_arena_ind_get(const extent_t *extent) {
+	unsigned arena_ind = (unsigned)((extent->e_bits &
+	    EXTENT_BITS_ARENA_MASK) >> EXTENT_BITS_ARENA_SHIFT);
+	assert(arena_ind < MALLOCX_ARENA_LIMIT);
+
+	return arena_ind;
+}
+
+static inline szind_t
+extent_szind_get_maybe_invalid(const extent_t *extent) {
+	szind_t szind = (szind_t)((extent->e_bits & EXTENT_BITS_SZIND_MASK) >>
+	    EXTENT_BITS_SZIND_SHIFT);
+	assert(szind <= SC_NSIZES);
+	return szind;
+}
+
+static inline szind_t
+extent_szind_get(const extent_t *extent) {
+	szind_t szind = extent_szind_get_maybe_invalid(extent);
+	assert(szind < SC_NSIZES); /* Never call when "invalid". */
+	return szind;
+}
+
+static inline size_t
+extent_usize_get(const extent_t *extent) {
+	return sz_index2size(extent_szind_get(extent));
+}
+
+static inline unsigned
+extent_binshard_get(const extent_t *extent) {
+	unsigned binshard = (unsigned)((extent->e_bits &
+	    EXTENT_BITS_BINSHARD_MASK) >> EXTENT_BITS_BINSHARD_SHIFT);
+	assert(binshard < bin_infos[extent_szind_get(extent)].n_shards);
+	return binshard;
+}
+
+static inline size_t
+extent_sn_get(const extent_t *extent) {
+	return (size_t)((extent->e_bits & EXTENT_BITS_SN_MASK) >>
+	    EXTENT_BITS_SN_SHIFT);
+}
+
+static inline extent_state_t
+extent_state_get(const extent_t *extent) {
+	return (extent_state_t)((extent->e_bits & EXTENT_BITS_STATE_MASK) >>
+	    EXTENT_BITS_STATE_SHIFT);
+}
+
+static inline bool
+extent_zeroed_get(const extent_t *extent) {
+	return (bool)((extent->e_bits & EXTENT_BITS_ZEROED_MASK) >>
+	    EXTENT_BITS_ZEROED_SHIFT);
+}
+
+static inline bool
+extent_committed_get(const extent_t *extent) {
+	return (bool)((extent->e_bits & EXTENT_BITS_COMMITTED_MASK) >>
+	    EXTENT_BITS_COMMITTED_SHIFT);
+}
+
+static inline bool
+extent_dumpable_get(const extent_t *extent) {
+	return (bool)((extent->e_bits & EXTENT_BITS_DUMPABLE_MASK) >>
+	    EXTENT_BITS_DUMPABLE_SHIFT);
+}
+
+static inline bool
+extent_slab_get(const extent_t *extent) {
+	return (bool)((extent->e_bits & EXTENT_BITS_SLAB_MASK) >>
+	    EXTENT_BITS_SLAB_SHIFT);
+}
+
+static inline unsigned
+extent_nfree_get(const extent_t *extent) {
+	assert(extent_slab_get(extent));
+	return (unsigned)((extent->e_bits & EXTENT_BITS_NFREE_MASK) >>
+	    EXTENT_BITS_NFREE_SHIFT);
+}
+
+static inline void *
+extent_base_get(const extent_t *extent) {
+	assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) ||
+	    !extent_slab_get(extent));
+	return PAGE_ADDR2BASE(extent->e_addr);
+}
+
+static inline void *
+extent_addr_get(const extent_t *extent) {
+	assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) ||
+	    !extent_slab_get(extent));
+	return extent->e_addr;
+}
+
+static inline size_t
+extent_size_get(const extent_t *extent) {
+	return (extent->e_size_esn & EXTENT_SIZE_MASK);
+}
+
+static inline size_t
+extent_esn_get(const extent_t *extent) {
+	return (extent->e_size_esn & EXTENT_ESN_MASK);
+}
+
+static inline size_t
+extent_bsize_get(const extent_t *extent) {
+	return extent->e_bsize;
+}
+
+static inline void *
+extent_before_get(const extent_t *extent) {
+	return (void *)((uintptr_t)extent_base_get(extent) - PAGE);
+}
+
+static inline void *
+extent_last_get(const extent_t *extent) {
+	return (void *)((uintptr_t)extent_base_get(extent) +
+	    extent_size_get(extent) - PAGE);
+}
+
+static inline void *
+extent_past_get(const extent_t *extent) {
+	return (void *)((uintptr_t)extent_base_get(extent) +
+	    extent_size_get(extent));
+}
+
+static inline slab_data_t *
+extent_slab_data_get(extent_t *extent) {
+	assert(extent_slab_get(extent));
+	return &extent->e_slab_data;
+}
+
+static inline const slab_data_t *
+extent_slab_data_get_const(const extent_t *extent) {
+	assert(extent_slab_get(extent));
+	return &extent->e_slab_data;
+}
+
+static inline prof_tctx_t *
+extent_prof_tctx_get(const extent_t *extent) {
+	return (prof_tctx_t *)atomic_load_p(&extent->e_prof_tctx,
+	    ATOMIC_ACQUIRE);
+}
+
+static inline nstime_t
+extent_prof_alloc_time_get(const extent_t *extent) {
+	return extent->e_alloc_time;
+}
+
+static inline void
+extent_arena_ind_set(extent_t *extent, unsigned arena_ind) {
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_ARENA_MASK) |
+	    ((uint64_t)arena_ind << EXTENT_BITS_ARENA_SHIFT);
+}
+
+static inline void
+extent_binshard_set(extent_t *extent, unsigned binshard) {
+	/* The assertion assumes szind is set already. */
+	assert(binshard < bin_infos[extent_szind_get(extent)].n_shards);
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_BINSHARD_MASK) |
+	    ((uint64_t)binshard << EXTENT_BITS_BINSHARD_SHIFT);
+}
+
+static inline void
+extent_addr_set(extent_t *extent, void *addr) {
+	extent->e_addr = addr;
+}
+
+static inline void
+extent_size_set(extent_t *extent, size_t size) {
+	assert((size & ~EXTENT_SIZE_MASK) == 0);
+	extent->e_size_esn = size | (extent->e_size_esn & ~EXTENT_SIZE_MASK);
+}
+
+static inline void
+extent_esn_set(extent_t *extent, size_t esn) {
+	extent->e_size_esn = (extent->e_size_esn & ~EXTENT_ESN_MASK) | (esn &
+	    EXTENT_ESN_MASK);
+}
+
+static inline void
+extent_bsize_set(extent_t *extent, size_t bsize) {
+	extent->e_bsize = bsize;
+}
+
+static inline void
+extent_szind_set(extent_t *extent, szind_t szind) {
+	assert(szind <= SC_NSIZES); /* SC_NSIZES means "invalid". */
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SZIND_MASK) |
+	    ((uint64_t)szind << EXTENT_BITS_SZIND_SHIFT);
+}
+
+static inline void
+extent_nfree_set(extent_t *extent, unsigned nfree) {
+	assert(extent_slab_get(extent));
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_NFREE_MASK) |
+	    ((uint64_t)nfree << EXTENT_BITS_NFREE_SHIFT);
+}
+
+static inline void
+extent_nfree_binshard_set(extent_t *extent, unsigned nfree, unsigned binshard) {
+	/* The assertion assumes szind is set already. */
+	assert(binshard < bin_infos[extent_szind_get(extent)].n_shards);
+	extent->e_bits = (extent->e_bits &
+	    (~EXTENT_BITS_NFREE_MASK & ~EXTENT_BITS_BINSHARD_MASK)) |
+	    ((uint64_t)binshard << EXTENT_BITS_BINSHARD_SHIFT) |
+	    ((uint64_t)nfree << EXTENT_BITS_NFREE_SHIFT);
+}
+
+static inline void
+extent_nfree_inc(extent_t *extent) {
+	assert(extent_slab_get(extent));
+	extent->e_bits += ((uint64_t)1U << EXTENT_BITS_NFREE_SHIFT);
+}
+
+static inline void
+extent_nfree_dec(extent_t *extent) {
+	assert(extent_slab_get(extent));
+	extent->e_bits -= ((uint64_t)1U << EXTENT_BITS_NFREE_SHIFT);
+}
+
+static inline void
+extent_nfree_sub(extent_t *extent, uint64_t n) {
+	assert(extent_slab_get(extent));
+	extent->e_bits -= (n << EXTENT_BITS_NFREE_SHIFT);
+}
+
+static inline void
+extent_sn_set(extent_t *extent, size_t sn) {
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SN_MASK) |
+	    ((uint64_t)sn << EXTENT_BITS_SN_SHIFT);
+}
+
+static inline void
+extent_state_set(extent_t *extent, extent_state_t state) {
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_STATE_MASK) |
+	    ((uint64_t)state << EXTENT_BITS_STATE_SHIFT);
+}
+
+static inline void
+extent_zeroed_set(extent_t *extent, bool zeroed) {
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_ZEROED_MASK) |
+	    ((uint64_t)zeroed << EXTENT_BITS_ZEROED_SHIFT);
+}
+
+static inline void
+extent_committed_set(extent_t *extent, bool committed) {
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_COMMITTED_MASK) |
+	    ((uint64_t)committed << EXTENT_BITS_COMMITTED_SHIFT);
+}
+
+static inline void
+extent_dumpable_set(extent_t *extent, bool dumpable) {
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_DUMPABLE_MASK) |
+	    ((uint64_t)dumpable << EXTENT_BITS_DUMPABLE_SHIFT);
+}
+
+static inline void
+extent_slab_set(extent_t *extent, bool slab) {
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SLAB_MASK) |
+	    ((uint64_t)slab << EXTENT_BITS_SLAB_SHIFT);
+}
+
+static inline void
+extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx) {
+	atomic_store_p(&extent->e_prof_tctx, tctx, ATOMIC_RELEASE);
+}
+
+static inline void
+extent_prof_alloc_time_set(extent_t *extent, nstime_t t) {
+	nstime_copy(&extent->e_alloc_time, &t);
+}
+
+static inline bool
+extent_is_head_get(extent_t *extent) {
+	if (maps_coalesce) {
+		not_reached();
+	}
+
+	return (bool)((extent->e_bits & EXTENT_BITS_IS_HEAD_MASK) >>
+	    EXTENT_BITS_IS_HEAD_SHIFT);
+}
+
+static inline void
+extent_is_head_set(extent_t *extent, bool is_head) {
+	if (maps_coalesce) {
+		not_reached();
+	}
+
+	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_IS_HEAD_MASK) |
+	    ((uint64_t)is_head << EXTENT_BITS_IS_HEAD_SHIFT);
+}
+
+static inline void
+extent_init(extent_t *extent, unsigned arena_ind, void *addr, size_t size,
+    bool slab, szind_t szind, size_t sn, extent_state_t state, bool zeroed,
+    bool committed, bool dumpable, extent_head_state_t is_head) {
+	assert(addr == PAGE_ADDR2BASE(addr) || !slab);
+
+	extent_arena_ind_set(extent, arena_ind);
+	extent_addr_set(extent, addr);
+	extent_size_set(extent, size);
+	extent_slab_set(extent, slab);
+	extent_szind_set(extent, szind);
+	extent_sn_set(extent, sn);
+	extent_state_set(extent, state);
+	extent_zeroed_set(extent, zeroed);
+	extent_committed_set(extent, committed);
+	extent_dumpable_set(extent, dumpable);
+	ql_elm_new(extent, ql_link);
+	if (!maps_coalesce) {
+		extent_is_head_set(extent, (is_head == EXTENT_IS_HEAD) ? true :
+		    false);
+	}
+	if (config_prof) {
+		extent_prof_tctx_set(extent, NULL);
+	}
+}
+
+static inline void
+extent_binit(extent_t *extent, void *addr, size_t bsize, size_t sn) {
+	extent_arena_ind_set(extent, (1U << MALLOCX_ARENA_BITS) - 1);
+	extent_addr_set(extent, addr);
+	extent_bsize_set(extent, bsize);
+	extent_slab_set(extent, false);
+	extent_szind_set(extent, SC_NSIZES);
+	extent_sn_set(extent, sn);
+	extent_state_set(extent, extent_state_active);
+	extent_zeroed_set(extent, true);
+	extent_committed_set(extent, true);
+	extent_dumpable_set(extent, true);
+}
+
+static inline void
+extent_list_init(extent_list_t *list) {
+	ql_new(list);
+}
+
+static inline extent_t *
+extent_list_first(const extent_list_t *list) {
+	return ql_first(list);
+}
+
+static inline extent_t *
+extent_list_last(const extent_list_t *list) {
+	return ql_last(list, ql_link);
+}
+
+static inline void
+extent_list_append(extent_list_t *list, extent_t *extent) {
+	ql_tail_insert(list, extent, ql_link);
+}
+
+static inline void
+extent_list_prepend(extent_list_t *list, extent_t *extent) {
+	ql_head_insert(list, extent, ql_link);
+}
+
+static inline void
+extent_list_replace(extent_list_t *list, extent_t *to_remove,
+    extent_t *to_insert) {
+	ql_after_insert(to_remove, to_insert, ql_link);
+	ql_remove(list, to_remove, ql_link);
+}
+
+static inline void
+extent_list_remove(extent_list_t *list, extent_t *extent) {
+	ql_remove(list, extent, ql_link);
+}
+
+static inline int
+extent_sn_comp(const extent_t *a, const extent_t *b) {
+	size_t a_sn = extent_sn_get(a);
+	size_t b_sn = extent_sn_get(b);
+
+	return (a_sn > b_sn) - (a_sn < b_sn);
+}
+
+static inline int
+extent_esn_comp(const extent_t *a, const extent_t *b) {
+	size_t a_esn = extent_esn_get(a);
+	size_t b_esn = extent_esn_get(b);
+
+	return (a_esn > b_esn) - (a_esn < b_esn);
+}
+
+static inline int
+extent_ad_comp(const extent_t *a, const extent_t *b) {
+	uintptr_t a_addr = (uintptr_t)extent_addr_get(a);
+	uintptr_t b_addr = (uintptr_t)extent_addr_get(b);
+
+	return (a_addr > b_addr) - (a_addr < b_addr);
+}
+
+static inline int
+extent_ead_comp(const extent_t *a, const extent_t *b) {
+	uintptr_t a_eaddr = (uintptr_t)a;
+	uintptr_t b_eaddr = (uintptr_t)b;
+
+	return (a_eaddr > b_eaddr) - (a_eaddr < b_eaddr);
+}
+
+static inline int
+extent_snad_comp(const extent_t *a, const extent_t *b) {
+	int ret;
+
+	ret = extent_sn_comp(a, b);
+	if (ret != 0) {
+		return ret;
+	}
+
+	ret = extent_ad_comp(a, b);
+	return ret;
+}
+
+static inline int
+extent_esnead_comp(const extent_t *a, const extent_t *b) {
+	int ret;
+
+	ret = extent_esn_comp(a, b);
+	if (ret != 0) {
+		return ret;
+	}
+
+	ret = extent_ead_comp(a, b);
+	return ret;
+}
+
+#endif /* JEMALLOC_INTERNAL_EXTENT_H */
diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
index 95be0848..2647df8a 100644
--- a/include/jemalloc/internal/extent_inlines.h
+++ b/include/jemalloc/internal/extent_inlines.h
@@ -35,432 +35,4 @@ extent_unlock2(tsdn_t *tsdn, extent_t *extent1, extent_t *extent2) {
 	    (uintptr_t)extent2);
 }
 
-static inline unsigned
-extent_arena_ind_get(const extent_t *extent) {
-	unsigned arena_ind = (unsigned)((extent->e_bits &
-	    EXTENT_BITS_ARENA_MASK) >> EXTENT_BITS_ARENA_SHIFT);
-	assert(arena_ind < MALLOCX_ARENA_LIMIT);
-
-	return arena_ind;
-}
-
-static inline szind_t
-extent_szind_get_maybe_invalid(const extent_t *extent) {
-	szind_t szind = (szind_t)((extent->e_bits & EXTENT_BITS_SZIND_MASK) >>
-	    EXTENT_BITS_SZIND_SHIFT);
-	assert(szind <= SC_NSIZES);
-	return szind;
-}
-
-static inline szind_t
-extent_szind_get(const extent_t *extent) {
-	szind_t szind = extent_szind_get_maybe_invalid(extent);
-	assert(szind < SC_NSIZES); /* Never call when "invalid". */
-	return szind;
-}
-
-static inline size_t
-extent_usize_get(const extent_t *extent) {
-	return sz_index2size(extent_szind_get(extent));
-}
-
-static inline unsigned
-extent_binshard_get(const extent_t *extent) {
-	unsigned binshard = (unsigned)((extent->e_bits &
-	    EXTENT_BITS_BINSHARD_MASK) >> EXTENT_BITS_BINSHARD_SHIFT);
-	assert(binshard < bin_infos[extent_szind_get(extent)].n_shards);
-	return binshard;
-}
-
-static inline size_t
-extent_sn_get(const extent_t *extent) {
-	return (size_t)((extent->e_bits & EXTENT_BITS_SN_MASK) >>
-	    EXTENT_BITS_SN_SHIFT);
-}
-
-static inline extent_state_t
-extent_state_get(const extent_t *extent) {
-	return (extent_state_t)((extent->e_bits & EXTENT_BITS_STATE_MASK) >>
-	    EXTENT_BITS_STATE_SHIFT);
-}
-
-static inline bool
-extent_zeroed_get(const extent_t *extent) {
-	return (bool)((extent->e_bits & EXTENT_BITS_ZEROED_MASK) >>
-	    EXTENT_BITS_ZEROED_SHIFT);
-}
-
-static inline bool
-extent_committed_get(const extent_t *extent) {
-	return (bool)((extent->e_bits & EXTENT_BITS_COMMITTED_MASK) >>
-	    EXTENT_BITS_COMMITTED_SHIFT);
-}
-
-static inline bool
-extent_dumpable_get(const extent_t *extent) {
-	return (bool)((extent->e_bits & EXTENT_BITS_DUMPABLE_MASK) >>
-	    EXTENT_BITS_DUMPABLE_SHIFT);
-}
-
-static inline bool
-extent_slab_get(const extent_t *extent) {
-	return (bool)((extent->e_bits & EXTENT_BITS_SLAB_MASK) >>
-	    EXTENT_BITS_SLAB_SHIFT);
-}
-
-static inline unsigned
-extent_nfree_get(const extent_t *extent) {
-	assert(extent_slab_get(extent));
-	return (unsigned)((extent->e_bits & EXTENT_BITS_NFREE_MASK) >>
-	    EXTENT_BITS_NFREE_SHIFT);
-}
-
-static inline void *
-extent_base_get(const extent_t *extent) {
-	assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) ||
-	    !extent_slab_get(extent));
-	return PAGE_ADDR2BASE(extent->e_addr);
-}
-
-static inline void *
-extent_addr_get(const extent_t *extent) {
-	assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) ||
-	    !extent_slab_get(extent));
-	return extent->e_addr;
-}
-
-static inline size_t
-extent_size_get(const extent_t *extent) {
-	return (extent->e_size_esn & EXTENT_SIZE_MASK);
-}
-
-static inline size_t
-extent_esn_get(const extent_t *extent) {
-	return (extent->e_size_esn & EXTENT_ESN_MASK);
-}
-
-static inline size_t
-extent_bsize_get(const extent_t *extent) {
-	return extent->e_bsize;
-}
-
-static inline void *
-extent_before_get(const extent_t *extent) {
-	return (void *)((uintptr_t)extent_base_get(extent) - PAGE);
-}
-
-static inline void *
-extent_last_get(const extent_t *extent) {
-	return (void *)((uintptr_t)extent_base_get(extent) +
-	    extent_size_get(extent) - PAGE);
-}
-
-static inline void *
-extent_past_get(const extent_t *extent) {
-	return (void *)((uintptr_t)extent_base_get(extent) +
-	    extent_size_get(extent));
-}
-
-static inline slab_data_t *
-extent_slab_data_get(extent_t *extent) {
-	assert(extent_slab_get(extent));
-	return &extent->e_slab_data;
-}
-
-static inline const slab_data_t *
-extent_slab_data_get_const(const extent_t *extent) {
-	assert(extent_slab_get(extent));
-	return &extent->e_slab_data;
-}
-
-static inline prof_tctx_t *
-extent_prof_tctx_get(const extent_t *extent) {
-	return (prof_tctx_t *)atomic_load_p(&extent->e_prof_tctx,
-	    ATOMIC_ACQUIRE);
-}
-
-static inline nstime_t
-extent_prof_alloc_time_get(const extent_t *extent) {
-	return extent->e_alloc_time;
-}
-
-static inline void
-extent_arena_ind_set(extent_t *extent, unsigned arena_ind) {
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_ARENA_MASK) |
-	    ((uint64_t)arena_ind << EXTENT_BITS_ARENA_SHIFT);
-}
-
-static inline void
-extent_binshard_set(extent_t *extent, unsigned binshard) {
-	/* The assertion assumes szind is set already. */
-	assert(binshard < bin_infos[extent_szind_get(extent)].n_shards);
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_BINSHARD_MASK) |
-	    ((uint64_t)binshard << EXTENT_BITS_BINSHARD_SHIFT);
-}
-
-static inline void
-extent_addr_set(extent_t *extent, void *addr) {
-	extent->e_addr = addr;
-}
-
-static inline void
-extent_size_set(extent_t *extent, size_t size) {
-	assert((size & ~EXTENT_SIZE_MASK) == 0);
-	extent->e_size_esn = size | (extent->e_size_esn & ~EXTENT_SIZE_MASK);
-}
-
-static inline void
-extent_esn_set(extent_t *extent, size_t esn) {
-	extent->e_size_esn = (extent->e_size_esn & ~EXTENT_ESN_MASK) | (esn &
-	    EXTENT_ESN_MASK);
-}
-
-static inline void
-extent_bsize_set(extent_t *extent, size_t bsize) {
-	extent->e_bsize = bsize;
-}
-
-static inline void
-extent_szind_set(extent_t *extent, szind_t szind) {
-	assert(szind <= SC_NSIZES); /* SC_NSIZES means "invalid". */
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SZIND_MASK) |
-	    ((uint64_t)szind << EXTENT_BITS_SZIND_SHIFT);
-}
-
-static inline void
-extent_nfree_set(extent_t *extent, unsigned nfree) {
-	assert(extent_slab_get(extent));
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_NFREE_MASK) |
-	    ((uint64_t)nfree << EXTENT_BITS_NFREE_SHIFT);
-}
-
-static inline void
-extent_nfree_binshard_set(extent_t *extent, unsigned nfree, unsigned binshard) {
-	/* The assertion assumes szind is set already. */
-	assert(binshard < bin_infos[extent_szind_get(extent)].n_shards);
-	extent->e_bits = (extent->e_bits &
-	    (~EXTENT_BITS_NFREE_MASK & ~EXTENT_BITS_BINSHARD_MASK)) |
-	    ((uint64_t)binshard << EXTENT_BITS_BINSHARD_SHIFT) |
-	    ((uint64_t)nfree << EXTENT_BITS_NFREE_SHIFT);
-}
-
-static inline void
-extent_nfree_inc(extent_t *extent) {
-	assert(extent_slab_get(extent));
-	extent->e_bits += ((uint64_t)1U << EXTENT_BITS_NFREE_SHIFT);
-}
-
-static inline void
-extent_nfree_dec(extent_t *extent) {
-	assert(extent_slab_get(extent));
-	extent->e_bits -= ((uint64_t)1U << EXTENT_BITS_NFREE_SHIFT);
-}
-
-static inline void
-extent_nfree_sub(extent_t *extent, uint64_t n) {
-	assert(extent_slab_get(extent));
-	extent->e_bits -= (n << EXTENT_BITS_NFREE_SHIFT);
-}
-
-static inline void
-extent_sn_set(extent_t *extent, size_t sn) {
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SN_MASK) |
-	    ((uint64_t)sn << EXTENT_BITS_SN_SHIFT);
-}
-
-static inline void
-extent_state_set(extent_t *extent, extent_state_t state) {
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_STATE_MASK) |
-	    ((uint64_t)state << EXTENT_BITS_STATE_SHIFT);
-}
-
-static inline void
-extent_zeroed_set(extent_t *extent, bool zeroed) {
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_ZEROED_MASK) |
-	    ((uint64_t)zeroed << EXTENT_BITS_ZEROED_SHIFT);
-}
-
-static inline void
-extent_committed_set(extent_t *extent, bool committed) {
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_COMMITTED_MASK) |
-	    ((uint64_t)committed << EXTENT_BITS_COMMITTED_SHIFT);
-}
-
-static inline void
-extent_dumpable_set(extent_t *extent, bool dumpable) {
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_DUMPABLE_MASK) |
-	    ((uint64_t)dumpable << EXTENT_BITS_DUMPABLE_SHIFT);
-}
-
-static inline void
-extent_slab_set(extent_t *extent, bool slab) {
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SLAB_MASK) |
-	    ((uint64_t)slab << EXTENT_BITS_SLAB_SHIFT);
-}
-
-static inline void
-extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx) {
-	atomic_store_p(&extent->e_prof_tctx, tctx, ATOMIC_RELEASE);
-}
-
-static inline void
-extent_prof_alloc_time_set(extent_t *extent, nstime_t t) {
-	nstime_copy(&extent->e_alloc_time, &t);
-}
-
-static inline bool
-extent_is_head_get(extent_t *extent) {
-	if (maps_coalesce) {
-		not_reached();
-	}
-
-	return (bool)((extent->e_bits & EXTENT_BITS_IS_HEAD_MASK) >>
-	    EXTENT_BITS_IS_HEAD_SHIFT);
-}
-
-static inline void
-extent_is_head_set(extent_t *extent, bool is_head) {
-	if (maps_coalesce) {
-		not_reached();
-	}
-
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_IS_HEAD_MASK) |
-	    ((uint64_t)is_head << EXTENT_BITS_IS_HEAD_SHIFT);
-}
-
-static inline void
-extent_init(extent_t *extent, unsigned arena_ind, void *addr, size_t size,
-    bool slab, szind_t szind, size_t sn, extent_state_t state, bool zeroed,
-    bool committed, bool dumpable, extent_head_state_t is_head) {
-	assert(addr == PAGE_ADDR2BASE(addr) || !slab);
-
-	extent_arena_ind_set(extent, arena_ind);
-	extent_addr_set(extent, addr);
-	extent_size_set(extent, size);
-	extent_slab_set(extent, slab);
-	extent_szind_set(extent, szind);
-	extent_sn_set(extent, sn);
-	extent_state_set(extent, state);
-	extent_zeroed_set(extent, zeroed);
-	extent_committed_set(extent, committed);
-	extent_dumpable_set(extent, dumpable);
-	ql_elm_new(extent, ql_link);
-	if (!maps_coalesce) {
-		extent_is_head_set(extent, (is_head == EXTENT_IS_HEAD) ? true :
-		    false);
-	}
-	if (config_prof) {
-		extent_prof_tctx_set(extent, NULL);
-	}
-}
-
-static inline void
-extent_binit(extent_t *extent, void *addr, size_t bsize, size_t sn) {
-	extent_arena_ind_set(extent, (1U << MALLOCX_ARENA_BITS) - 1);
-	extent_addr_set(extent, addr);
-	extent_bsize_set(extent, bsize);
-	extent_slab_set(extent, false);
-	extent_szind_set(extent, SC_NSIZES);
-	extent_sn_set(extent, sn);
-	extent_state_set(extent, extent_state_active);
-	extent_zeroed_set(extent, true);
-	extent_committed_set(extent, true);
-	extent_dumpable_set(extent, true);
-}
-
-static inline void
-extent_list_init(extent_list_t *list) {
-	ql_new(list);
-}
-
-static inline extent_t *
-extent_list_first(const extent_list_t *list) {
-	return ql_first(list);
-}
-
-static inline extent_t *
-extent_list_last(const extent_list_t *list) {
-	return ql_last(list, ql_link);
-}
-
-static inline void
-extent_list_append(extent_list_t *list, extent_t *extent) {
-	ql_tail_insert(list, extent, ql_link);
-}
-
-static inline void
-extent_list_prepend(extent_list_t *list, extent_t *extent) {
-	ql_head_insert(list, extent, ql_link);
-}
-
-static inline void
-extent_list_replace(extent_list_t *list, extent_t *to_remove,
-    extent_t *to_insert) {
-	ql_after_insert(to_remove, to_insert, ql_link);
-	ql_remove(list, to_remove, ql_link);
-}
-
-static inline void
-extent_list_remove(extent_list_t *list, extent_t *extent) {
-	ql_remove(list, extent, ql_link);
-}
-
-static inline int
-extent_sn_comp(const extent_t *a, const extent_t *b) {
-	size_t a_sn = extent_sn_get(a);
-	size_t b_sn = extent_sn_get(b);
-
-	return (a_sn > b_sn) - (a_sn < b_sn);
-}
-
-static inline int
-extent_esn_comp(const extent_t *a, const extent_t *b) {
-	size_t a_esn = extent_esn_get(a);
-	size_t b_esn = extent_esn_get(b);
-
-	return (a_esn > b_esn) - (a_esn < b_esn);
-}
-
-static inline int
-extent_ad_comp(const extent_t *a, const extent_t *b) {
-	uintptr_t a_addr = (uintptr_t)extent_addr_get(a);
-	uintptr_t b_addr = (uintptr_t)extent_addr_get(b);
-
-	return (a_addr > b_addr) - (a_addr < b_addr);
-}
-
-static inline int
-extent_ead_comp(const extent_t *a, const extent_t *b) {
-	uintptr_t a_eaddr = (uintptr_t)a;
-	uintptr_t b_eaddr = (uintptr_t)b;
-
-	return (a_eaddr > b_eaddr) - (a_eaddr < b_eaddr);
-}
-
-static inline int
-extent_snad_comp(const extent_t *a, const extent_t *b) {
-	int ret;
-
-	ret = extent_sn_comp(a, b);
-	if (ret != 0) {
-		return ret;
-	}
-
-	ret = extent_ad_comp(a, b);
-	return ret;
-}
-
-static inline int
-extent_esnead_comp(const extent_t *a, const extent_t *b) {
-	int ret;
-
-	ret = extent_esn_comp(a, b);
-	if (ret != 0) {
-		return ret;
-	}
-
-	ret = extent_ead_comp(a, b);
-	return ret;
-}
-
 #endif /* JEMALLOC_INTERNAL_EXTENT_INLINES_H */
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index 827bd3b4..108ac401 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -2,7 +2,6 @@
 #define JEMALLOC_INTERNAL_EXTENT_STRUCTS_H
 
 #include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/bit_util.h"
 #include "jemalloc/internal/bitmap.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/ql.h"
@@ -10,182 +9,6 @@
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/slab_data.h"
 
-typedef enum {
-	extent_state_active   = 0,
-	extent_state_dirty    = 1,
-	extent_state_muzzy    = 2,
-	extent_state_retained = 3
-} extent_state_t;
-
-/* Extent (span of pages).  Use accessor functions for e_* fields. */
-struct extent_s {
-	/*
-	 * Bitfield containing several fields:
-	 *
-	 * a: arena_ind
-	 * b: slab
-	 * c: committed
-	 * d: dumpable
-	 * z: zeroed
-	 * t: state
-	 * i: szind
-	 * f: nfree
-	 * s: bin_shard
-	 * n: sn
-	 *
-	 * nnnnnnnn ... nnnnnnss ssssffff ffffffii iiiiiitt zdcbaaaa aaaaaaaa
-	 *
-	 * arena_ind: Arena from which this extent came, or all 1 bits if
-	 *            unassociated.
-	 *
-	 * slab: The slab flag indicates whether the extent is used for a slab
-	 *       of small regions.  This helps differentiate small size classes,
-	 *       and it indicates whether interior pointers can be looked up via
-	 *       iealloc().
-	 *
-	 * committed: The committed flag indicates whether physical memory is
-	 *            committed to the extent, whether explicitly or implicitly
-	 *            as on a system that overcommits and satisfies physical
-	 *            memory needs on demand via soft page faults.
-	 *
-	 * dumpable: The dumpable flag indicates whether or not we've set the
-	 *           memory in question to be dumpable.  Note that this
-	 *           interacts somewhat subtly with user-specified extent hooks,
-	 *           since we don't know if *they* are fiddling with
-	 *           dumpability (in which case, we don't want to undo whatever
-	 *           they're doing).  To deal with this scenario, we:
-	 *             - Make dumpable false only for memory allocated with the
-	 *               default hooks.
-	 *             - Only allow memory to go from non-dumpable to dumpable,
-	 *               and only once.
-	 *             - Never make the OS call to allow dumping when the
-	 *               dumpable bit is already set.
-	 *           These three constraints mean that we will never
-	 *           accidentally dump user memory that the user meant to set
-	 *           nondumpable with their extent hooks.
-	 *
-	 *
-	 * zeroed: The zeroed flag is used by extent recycling code to track
-	 *         whether memory is zero-filled.
-	 *
-	 * state: The state flag is an extent_state_t.
-	 *
-	 * szind: The szind flag indicates usable size class index for
-	 *        allocations residing in this extent, regardless of whether the
-	 *        extent is a slab.  Extent size and usable size often differ
-	 *        even for non-slabs, either due to sz_large_pad or promotion of
-	 *        sampled small regions.
-	 *
-	 * nfree: Number of free regions in slab.
-	 *
-	 * bin_shard: the shard of the bin from which this extent came.
-	 *
-	 * sn: Serial number (potentially non-unique).
-	 *
-	 *     Serial numbers may wrap around if !opt_retain, but as long as
-	 *     comparison functions fall back on address comparison for equal
-	 *     serial numbers, stable (if imperfect) ordering is maintained.
-	 *
-	 *     Serial numbers may not be unique even in the absence of
-	 *     wrap-around, e.g. when splitting an extent and assigning the same
-	 *     serial number to both resulting adjacent extents.
-	 */
-	uint64_t		e_bits;
-#define MASK(CURRENT_FIELD_WIDTH, CURRENT_FIELD_SHIFT) ((((((uint64_t)0x1U) << (CURRENT_FIELD_WIDTH)) - 1)) << (CURRENT_FIELD_SHIFT))
-
-#define EXTENT_BITS_ARENA_WIDTH  MALLOCX_ARENA_BITS
-#define EXTENT_BITS_ARENA_SHIFT  0
-#define EXTENT_BITS_ARENA_MASK  MASK(EXTENT_BITS_ARENA_WIDTH, EXTENT_BITS_ARENA_SHIFT)
-
-#define EXTENT_BITS_SLAB_WIDTH  1
-#define EXTENT_BITS_SLAB_SHIFT  (EXTENT_BITS_ARENA_WIDTH + EXTENT_BITS_ARENA_SHIFT)
-#define EXTENT_BITS_SLAB_MASK  MASK(EXTENT_BITS_SLAB_WIDTH, EXTENT_BITS_SLAB_SHIFT)
-
-#define EXTENT_BITS_COMMITTED_WIDTH  1
-#define EXTENT_BITS_COMMITTED_SHIFT  (EXTENT_BITS_SLAB_WIDTH + EXTENT_BITS_SLAB_SHIFT)
-#define EXTENT_BITS_COMMITTED_MASK  MASK(EXTENT_BITS_COMMITTED_WIDTH, EXTENT_BITS_COMMITTED_SHIFT)
-
-#define EXTENT_BITS_DUMPABLE_WIDTH  1
-#define EXTENT_BITS_DUMPABLE_SHIFT  (EXTENT_BITS_COMMITTED_WIDTH + EXTENT_BITS_COMMITTED_SHIFT)
-#define EXTENT_BITS_DUMPABLE_MASK  MASK(EXTENT_BITS_DUMPABLE_WIDTH, EXTENT_BITS_DUMPABLE_SHIFT)
-
-#define EXTENT_BITS_ZEROED_WIDTH  1
-#define EXTENT_BITS_ZEROED_SHIFT  (EXTENT_BITS_DUMPABLE_WIDTH + EXTENT_BITS_DUMPABLE_SHIFT)
-#define EXTENT_BITS_ZEROED_MASK  MASK(EXTENT_BITS_ZEROED_WIDTH, EXTENT_BITS_ZEROED_SHIFT)
-
-#define EXTENT_BITS_STATE_WIDTH  2
-#define EXTENT_BITS_STATE_SHIFT  (EXTENT_BITS_ZEROED_WIDTH + EXTENT_BITS_ZEROED_SHIFT)
-#define EXTENT_BITS_STATE_MASK  MASK(EXTENT_BITS_STATE_WIDTH, EXTENT_BITS_STATE_SHIFT)
-
-#define EXTENT_BITS_SZIND_WIDTH  LG_CEIL(SC_NSIZES)
-#define EXTENT_BITS_SZIND_SHIFT  (EXTENT_BITS_STATE_WIDTH + EXTENT_BITS_STATE_SHIFT)
-#define EXTENT_BITS_SZIND_MASK  MASK(EXTENT_BITS_SZIND_WIDTH, EXTENT_BITS_SZIND_SHIFT)
-
-#define EXTENT_BITS_NFREE_WIDTH  (SC_LG_SLAB_MAXREGS + 1)
-#define EXTENT_BITS_NFREE_SHIFT  (EXTENT_BITS_SZIND_WIDTH + EXTENT_BITS_SZIND_SHIFT)
-#define EXTENT_BITS_NFREE_MASK  MASK(EXTENT_BITS_NFREE_WIDTH, EXTENT_BITS_NFREE_SHIFT)
-
-#define EXTENT_BITS_BINSHARD_WIDTH  6
-#define EXTENT_BITS_BINSHARD_SHIFT  (EXTENT_BITS_NFREE_WIDTH + EXTENT_BITS_NFREE_SHIFT)
-#define EXTENT_BITS_BINSHARD_MASK  MASK(EXTENT_BITS_BINSHARD_WIDTH, EXTENT_BITS_BINSHARD_SHIFT)
-
-#define EXTENT_BITS_IS_HEAD_WIDTH 1
-#define EXTENT_BITS_IS_HEAD_SHIFT  (EXTENT_BITS_BINSHARD_WIDTH + EXTENT_BITS_BINSHARD_SHIFT)
-#define EXTENT_BITS_IS_HEAD_MASK  MASK(EXTENT_BITS_IS_HEAD_WIDTH, EXTENT_BITS_IS_HEAD_SHIFT)
-
-#define EXTENT_BITS_SN_SHIFT   (EXTENT_BITS_IS_HEAD_WIDTH + EXTENT_BITS_IS_HEAD_SHIFT)
-#define EXTENT_BITS_SN_MASK  (UINT64_MAX << EXTENT_BITS_SN_SHIFT)
-
-	/* Pointer to the extent that this structure is responsible for. */
-	void			*e_addr;
-
-	union {
-		/*
-		 * Extent size and serial number associated with the extent
-		 * structure (different than the serial number for the extent at
-		 * e_addr).
-		 *
-		 * ssssssss [...] ssssssss ssssnnnn nnnnnnnn
-		 */
-		size_t			e_size_esn;
-	#define EXTENT_SIZE_MASK	((size_t)~(PAGE-1))
-	#define EXTENT_ESN_MASK		((size_t)PAGE-1)
-		/* Base extent size, which may not be a multiple of PAGE. */
-		size_t			e_bsize;
-	};
-
-	/*
-	 * List linkage, used by a variety of lists:
-	 * - bin_t's slabs_full
-	 * - extents_t's LRU
-	 * - stashed dirty extents
-	 * - arena's large allocations
-	 */
-	ql_elm(extent_t)	ql_link;
-
-	/*
-	 * Linkage for per size class sn/address-ordered heaps, and
-	 * for extent_avail
-	 */
-	phn(extent_t)		ph_link;
-
-	union {
-		/* Small region slab metadata. */
-		slab_data_t	e_slab_data;
-
-		/* Profiling data, used for large objects. */
-		struct {
-			/* Time when this was allocated. */
-			nstime_t		e_alloc_time;
-			/* Points to a prof_tctx_t. */
-			atomic_p_t		e_prof_tctx;
-		};
-	};
-};
-typedef ql_head(extent_t) extent_list_t;
-typedef ph(extent_t) extent_tree_t;
-typedef ph(extent_t) extent_heap_t;
-
 /* Quantized collection of extents, with built-in LRU queue. */
 struct extents_s {
 	malloc_mutex_t		mtx;
diff --git a/include/jemalloc/internal/extent_types.h b/include/jemalloc/internal/extent_types.h
index 96925cf9..a56410af 100644
--- a/include/jemalloc/internal/extent_types.h
+++ b/include/jemalloc/internal/extent_types.h
@@ -1,7 +1,6 @@
 #ifndef JEMALLOC_INTERNAL_EXTENT_TYPES_H
 #define JEMALLOC_INTERNAL_EXTENT_TYPES_H
 
-typedef struct extent_s extent_t;
 typedef struct extents_s extents_t;
 
 typedef struct extent_util_stats_s extent_util_stats_t;
@@ -15,9 +14,4 @@ typedef struct extent_util_stats_verbose_s extent_util_stats_verbose_t;
  */
 #define LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT 6
 
-typedef enum {
-	EXTENT_NOT_HEAD,
-	EXTENT_IS_HEAD   /* Only relevant for Windows && opt.retain. */
-} extent_head_state_t;
-
 #endif /* JEMALLOC_INTERNAL_EXTENT_TYPES_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index 55fcf3eb..6755b43e 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -50,10 +50,9 @@
 /* STRUCTS */
 /******************************************************************************/
 
-#include "jemalloc/internal/extent_structs.h"
-#include "jemalloc/internal/base_structs.h"
 #include "jemalloc/internal/prof_structs.h"
 #include "jemalloc/internal/arena_structs.h"
+#include "jemalloc/internal/base_structs.h"
 #include "jemalloc/internal/tcache_structs.h"
 #include "jemalloc/internal/background_thread_structs.h"
 
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index ed0e7b9f..b6b83395 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -39,6 +39,7 @@
     <ClCompile Include="..\..\..\..\src\background_thread.c" />
     <ClCompile Include="..\..\..\..\src\base.c" />
     <ClCompile Include="..\..\..\..\src\bin.c" />
+    <ClCompile Include="..\..\..\..\src\bin_info.c" />
     <ClCompile Include="..\..\..\..\src\bitmap.c" />
     <ClCompile Include="..\..\..\..\src\ckh.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 1606a3ab..f405ea3d 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -39,6 +39,7 @@
     <ClCompile Include="..\..\..\..\src\background_thread.c" />
     <ClCompile Include="..\..\..\..\src\base.c" />
     <ClCompile Include="..\..\..\..\src\bin.c" />
+    <ClCompile Include="..\..\..\..\src\bin_info.c" />
     <ClCompile Include="..\..\..\..\src\bitmap.c" />
     <ClCompile Include="..\..\..\..\src\ckh.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
diff --git a/src/bin.c b/src/bin.c
index bca6b12c..d7cbfb56 100644
--- a/src/bin.c
+++ b/src/bin.c
@@ -6,26 +6,6 @@
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/witness.h"
 
-bin_info_t bin_infos[SC_NBINS];
-
-static void
-bin_infos_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
-    bin_info_t bin_infos[SC_NBINS]) {
-	for (unsigned i = 0; i < SC_NBINS; i++) {
-		bin_info_t *bin_info = &bin_infos[i];
-		sc_t *sc = &sc_data->sc[i];
-		bin_info->reg_size = ((size_t)1U << sc->lg_base)
-		    + ((size_t)sc->ndelta << sc->lg_delta);
-		bin_info->slab_size = (sc->pgs << LG_PAGE);
-		bin_info->nregs =
-		    (uint32_t)(bin_info->slab_size / bin_info->reg_size);
-		bin_info->n_shards = bin_shard_sizes[i];
-		bitmap_info_t bitmap_info = BITMAP_INFO_INITIALIZER(
-		    bin_info->nregs);
-		bin_info->bitmap_info = bitmap_info;
-	}
-}
-
 bool
 bin_update_shard_size(unsigned bin_shard_sizes[SC_NBINS], size_t start_size,
     size_t end_size, size_t nshards) {
@@ -58,12 +38,6 @@ bin_shard_sizes_boot(unsigned bin_shard_sizes[SC_NBINS]) {
 	}
 }
 
-void
-bin_boot(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
-	assert(sc_data->initialized);
-	bin_infos_init(sc_data, bin_shard_sizes, bin_infos);
-}
-
 bool
 bin_init(bin_t *bin) {
 	if (malloc_mutex_init(&bin->lock, "bin", WITNESS_RANK_BIN,
diff --git a/src/bin_info.c b/src/bin_info.c
new file mode 100644
index 00000000..20b93ea4
--- /dev/null
+++ b/src/bin_info.c
@@ -0,0 +1,30 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/bin_info.h"
+
+bin_info_t bin_infos[SC_NBINS];
+
+static void
+bin_infos_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
+    bin_info_t bin_infos[SC_NBINS]) {
+	for (unsigned i = 0; i < SC_NBINS; i++) {
+		bin_info_t *bin_info = &bin_infos[i];
+		sc_t *sc = &sc_data->sc[i];
+		bin_info->reg_size = ((size_t)1U << sc->lg_base)
+		    + ((size_t)sc->ndelta << sc->lg_delta);
+		bin_info->slab_size = (sc->pgs << LG_PAGE);
+		bin_info->nregs =
+		    (uint32_t)(bin_info->slab_size / bin_info->reg_size);
+		bin_info->n_shards = bin_shard_sizes[i];
+		bitmap_info_t bitmap_info = BITMAP_INFO_INITIALIZER(
+		    bin_info->nregs);
+		bin_info->bitmap_info = bitmap_info;
+	}
+}
+
+void
+bin_info_boot(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
+	assert(sc_data->initialized);
+	bin_infos_init(sc_data, bin_shard_sizes, bin_infos);
+}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 753fcbea..fc7d2891 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1488,8 +1488,8 @@ malloc_init_hard_a0_locked() {
 	 * Ordering here is somewhat tricky; we need sc_boot() first, since that
 	 * determines what the size classes will be, and then
 	 * malloc_conf_init(), since any slab size tweaking will need to be done
-	 * before sz_boot and bin_boot, which assume that the values they read
-	 * out of sc_data_global are final.
+	 * before sz_boot and bin_info_boot, which assume that the values they
+	 * read out of sc_data_global are final.
 	 */
 	sc_boot(&sc_data);
 	unsigned bin_shard_sizes[SC_NBINS];
@@ -1504,7 +1504,7 @@ malloc_init_hard_a0_locked() {
 	}
 	malloc_conf_init(&sc_data, bin_shard_sizes);
 	sz_boot(&sc_data);
-	bin_boot(&sc_data, bin_shard_sizes);
+	bin_info_boot(&sc_data, bin_shard_sizes);
 
 	if (opt_stats_print) {
 		/* Print statistics at exit. */

From 4e5e43f22eead4d1e3fcb4422410e0100b9d8448 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Fri, 20 Sep 2019 19:59:55 -0700
Subject: [PATCH 1375/2608] Rename extents_t -> eset_t.

---
 include/jemalloc/internal/arena_structs.h  |   7 +-
 include/jemalloc/internal/eset.h           |  60 ++++
 include/jemalloc/internal/extent_externs.h |  22 +-
 include/jemalloc/internal/extent_structs.h |  47 ---
 include/jemalloc/internal/extent_types.h   |   2 -
 src/arena.c                                |  50 ++--
 src/background_thread.c                    |   4 +-
 src/extent.c                               | 332 ++++++++++-----------
 8 files changed, 268 insertions(+), 256 deletions(-)
 create mode 100644 include/jemalloc/internal/eset.h

diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index eeab57fd..9563c3d6 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -5,6 +5,7 @@
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/bin.h"
 #include "jemalloc/internal/bitmap.h"
+#include "jemalloc/internal/eset.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/mutex.h"
@@ -161,9 +162,9 @@ struct arena_s {
 	 *
 	 * Synchronization: internal.
 	 */
-	extents_t		extents_dirty;
-	extents_t		extents_muzzy;
-	extents_t		extents_retained;
+	eset_t		extents_dirty;
+	eset_t		extents_muzzy;
+	eset_t		extents_retained;
 
 	/*
 	 * Decay-based purging state, responsible for scheduling extent state
diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
new file mode 100644
index 00000000..1c18f4ee
--- /dev/null
+++ b/include/jemalloc/internal/eset.h
@@ -0,0 +1,60 @@
+#ifndef JEMALLOC_INTERNAL_ESET_H
+#define JEMALLOC_INTERNAL_ESET_H
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/bitmap.h"
+#include "jemalloc/internal/extent.h"
+#include "jemalloc/internal/mutex.h"
+
+/*
+ * An eset ("extent set") is a quantized collection of extents, with built-in
+ * LRU queue.
+ */
+typedef struct eset_s eset_t;
+struct eset_s {
+	malloc_mutex_t mtx;
+
+	/*
+	 * Quantized per size class heaps of extents.
+	 *
+	 * Synchronization: mtx.
+	 */
+	extent_heap_t heaps[SC_NPSIZES + 1];
+	atomic_zu_t nextents[SC_NPSIZES + 1];
+	atomic_zu_t nbytes[SC_NPSIZES + 1];
+
+	/*
+	 * Bitmap for which set bits correspond to non-empty heaps.
+	 *
+	 * Synchronization: mtx.
+	 */
+	bitmap_t bitmap[BITMAP_GROUPS(SC_NPSIZES + 1)];
+
+	/*
+	 * LRU of all extents in heaps.
+	 *
+	 * Synchronization: mtx.
+	 */
+	extent_list_t lru;
+
+	/*
+	 * Page sum for all extents in heaps.
+	 *
+	 * The synchronization here is a little tricky.  Modifications to npages
+	 * must hold mtx, but reads need not (though, a reader who sees npages
+	 * without holding the mutex can't assume anything about the rest of the
+	 * state of the eset_t).
+	 */
+	atomic_zu_t npages;
+
+	/* All stored extents must be in the same state. */
+	extent_state_t state;
+
+	/*
+	 * If true, delay coalescing until eviction; otherwise coalesce during
+	 * deallocation.
+	 */
+	bool delay_coalesce;
+};
+
+#endif /* JEMALLOC_INTERNAL_ESET_H */
diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index 8aba5763..45271d7c 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -27,25 +27,25 @@ size_t extent_size_quantize_ceil(size_t size);
 ph_proto(, extent_avail_, extent_tree_t, extent_t)
 ph_proto(, extent_heap_, extent_heap_t, extent_t)
 
-bool extents_init(tsdn_t *tsdn, extents_t *extents, extent_state_t state,
+bool extents_init(tsdn_t *tsdn, eset_t *eset, extent_state_t state,
     bool delay_coalesce);
-extent_state_t extents_state_get(const extents_t *extents);
-size_t extents_npages_get(extents_t *extents);
+extent_state_t extents_state_get(const eset_t *eset);
+size_t extents_npages_get(eset_t *eset);
 /* Get the number of extents in the given page size index. */
-size_t extents_nextents_get(extents_t *extents, pszind_t ind);
+size_t extents_nextents_get(eset_t *eset, pszind_t ind);
 /* Get the sum total bytes of the extents in the given page size index. */
-size_t extents_nbytes_get(extents_t *extents, pszind_t ind);
+size_t extents_nbytes_get(eset_t *eset, pszind_t ind);
 extent_t *extents_alloc(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extents_t *extents, void *new_addr,
+    extent_hooks_t **r_extent_hooks, eset_t *eset, void *new_addr,
     size_t size, size_t pad, size_t alignment, bool slab, szind_t szind,
     bool *zero, bool *commit);
 void extents_dalloc(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extents_t *extents, extent_t *extent);
+    extent_hooks_t **r_extent_hooks, eset_t *eset, extent_t *extent);
 extent_t *extents_evict(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extents_t *extents, size_t npages_min);
-void extents_prefork(tsdn_t *tsdn, extents_t *extents);
-void extents_postfork_parent(tsdn_t *tsdn, extents_t *extents);
-void extents_postfork_child(tsdn_t *tsdn, extents_t *extents);
+    extent_hooks_t **r_extent_hooks, eset_t *eset, size_t npages_min);
+void extents_prefork(tsdn_t *tsdn, eset_t *eset);
+void extents_postfork_parent(tsdn_t *tsdn, eset_t *eset);
+void extents_postfork_child(tsdn_t *tsdn, eset_t *eset);
 extent_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, void *new_addr, size_t size, size_t pad,
     size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit);
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
index 108ac401..4e6e085c 100644
--- a/include/jemalloc/internal/extent_structs.h
+++ b/include/jemalloc/internal/extent_structs.h
@@ -9,53 +9,6 @@
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/slab_data.h"
 
-/* Quantized collection of extents, with built-in LRU queue. */
-struct extents_s {
-	malloc_mutex_t		mtx;
-
-	/*
-	 * Quantized per size class heaps of extents.
-	 *
-	 * Synchronization: mtx.
-	 */
-	extent_heap_t		heaps[SC_NPSIZES + 1];
-	atomic_zu_t		nextents[SC_NPSIZES + 1];
-	atomic_zu_t		nbytes[SC_NPSIZES + 1];
-
-	/*
-	 * Bitmap for which set bits correspond to non-empty heaps.
-	 *
-	 * Synchronization: mtx.
-	 */
-	bitmap_t		bitmap[BITMAP_GROUPS(SC_NPSIZES + 1)];
-
-	/*
-	 * LRU of all extents in heaps.
-	 *
-	 * Synchronization: mtx.
-	 */
-	extent_list_t		lru;
-
-	/*
-	 * Page sum for all extents in heaps.
-	 *
-	 * The synchronization here is a little tricky.  Modifications to npages
-	 * must hold mtx, but reads need not (though, a reader who sees npages
-	 * without holding the mutex can't assume anything about the rest of the
-	 * state of the extents_t).
-	 */
-	atomic_zu_t		npages;
-
-	/* All stored extents must be in the same state. */
-	extent_state_t		state;
-
-	/*
-	 * If true, delay coalescing until eviction; otherwise coalesce during
-	 * deallocation.
-	 */
-	bool			delay_coalesce;
-};
-
 /*
  * The following two structs are for experimental purposes. See
  * experimental_utilization_query_ctl and
diff --git a/include/jemalloc/internal/extent_types.h b/include/jemalloc/internal/extent_types.h
index a56410af..02d7b2cd 100644
--- a/include/jemalloc/internal/extent_types.h
+++ b/include/jemalloc/internal/extent_types.h
@@ -1,8 +1,6 @@
 #ifndef JEMALLOC_INTERNAL_EXTENT_TYPES_H
 #define JEMALLOC_INTERNAL_EXTENT_TYPES_H
 
-typedef struct extents_s extents_t;
-
 typedef struct extent_util_stats_s extent_util_stats_t;
 typedef struct extent_util_stats_verbose_s extent_util_stats_verbose_t;
 
diff --git a/src/arena.c b/src/arena.c
index 231d6681..5380deed 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -55,7 +55,7 @@ static unsigned huge_arena_ind;
  */
 
 static void arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena,
-    arena_decay_t *decay, extents_t *extents, bool all, size_t npages_limit,
+    arena_decay_t *decay, eset_t *eset, bool all, size_t npages_limit,
     size_t npages_decay_max, bool is_background_thread);
 static bool arena_decay_dirty(tsdn_t *tsdn, arena_t *arena,
     bool is_background_thread, bool all);
@@ -609,10 +609,10 @@ arena_decay_backlog_update(arena_decay_t *decay, uint64_t nadvance_u64,
 
 static void
 arena_decay_try_purge(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    extents_t *extents, size_t current_npages, size_t npages_limit,
+    eset_t *eset, size_t current_npages, size_t npages_limit,
     bool is_background_thread) {
 	if (current_npages > npages_limit) {
-		arena_decay_to_limit(tsdn, arena, decay, extents, false,
+		arena_decay_to_limit(tsdn, arena, decay, eset, false,
 		    npages_limit, current_npages - npages_limit,
 		    is_background_thread);
 	}
@@ -644,8 +644,8 @@ arena_decay_epoch_advance_helper(arena_decay_t *decay, const nstime_t *time,
 
 static void
 arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    extents_t *extents, const nstime_t *time, bool is_background_thread) {
-	size_t current_npages = extents_npages_get(extents);
+    eset_t *eset, const nstime_t *time, bool is_background_thread) {
+	size_t current_npages = extents_npages_get(eset);
 	arena_decay_epoch_advance_helper(decay, time, current_npages);
 
 	size_t npages_limit = arena_decay_backlog_npages_limit(decay);
@@ -654,7 +654,7 @@ arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	    current_npages;
 
 	if (!background_thread_enabled() || is_background_thread) {
-		arena_decay_try_purge(tsdn, arena, decay, extents,
+		arena_decay_try_purge(tsdn, arena, decay, eset,
 		    current_npages, npages_limit, is_background_thread);
 	}
 }
@@ -712,15 +712,15 @@ arena_decay_ms_valid(ssize_t decay_ms) {
 
 static bool
 arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    extents_t *extents, bool is_background_thread) {
+    eset_t *eset, bool is_background_thread) {
 	malloc_mutex_assert_owner(tsdn, &decay->mtx);
 
 	/* Purge all or nothing if the option is disabled. */
 	ssize_t decay_ms = arena_decay_ms_read(decay);
 	if (decay_ms <= 0) {
 		if (decay_ms == 0) {
-			arena_decay_to_limit(tsdn, arena, decay, extents, false,
-			    0, extents_npages_get(extents),
+			arena_decay_to_limit(tsdn, arena, decay, eset, false,
+			    0, extents_npages_get(eset),
 			    is_background_thread);
 		}
 		return false;
@@ -756,11 +756,11 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	 */
 	bool advance_epoch = arena_decay_deadline_reached(decay, &time);
 	if (advance_epoch) {
-		arena_decay_epoch_advance(tsdn, arena, decay, extents, &time,
+		arena_decay_epoch_advance(tsdn, arena, decay, eset, &time,
 		    is_background_thread);
 	} else if (is_background_thread) {
-		arena_decay_try_purge(tsdn, arena, decay, extents,
-		    extents_npages_get(extents),
+		arena_decay_try_purge(tsdn, arena, decay, eset,
+		    extents_npages_get(eset),
 		    arena_decay_backlog_npages_limit(decay),
 		    is_background_thread);
 	}
@@ -785,7 +785,7 @@ arena_muzzy_decay_ms_get(arena_t *arena) {
 
 static bool
 arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    extents_t *extents, ssize_t decay_ms) {
+    eset_t *eset, ssize_t decay_ms) {
 	if (!arena_decay_ms_valid(decay_ms)) {
 		return true;
 	}
@@ -800,7 +800,7 @@ arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	 * arbitrary change during initial arena configuration.
 	 */
 	arena_decay_reinit(decay, decay_ms);
-	arena_maybe_decay(tsdn, arena, decay, extents, false);
+	arena_maybe_decay(tsdn, arena, decay, eset, false);
 	malloc_mutex_unlock(tsdn, &decay->mtx);
 
 	return false;
@@ -822,7 +822,7 @@ arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
 
 static size_t
 arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extents_t *extents, size_t npages_limit,
+    extent_hooks_t **r_extent_hooks, eset_t *eset, size_t npages_limit,
 	size_t npages_decay_max, extent_list_t *decay_extents) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
@@ -831,7 +831,7 @@ arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
 	size_t nstashed = 0;
 	extent_t *extent;
 	while (nstashed < npages_decay_max &&
-	    (extent = extents_evict(tsdn, arena, r_extent_hooks, extents,
+	    (extent = extents_evict(tsdn, arena, r_extent_hooks, eset,
 	    npages_limit)) != NULL) {
 		extent_list_append(decay_extents, extent);
 		nstashed += extent_size_get(extent) >> LG_PAGE;
@@ -841,7 +841,7 @@ arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
 
 static size_t
 arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, arena_decay_t *decay, extents_t *extents,
+    extent_hooks_t **r_extent_hooks, arena_decay_t *decay, eset_t *eset,
     bool all, extent_list_t *decay_extents, bool is_background_thread) {
 	size_t nmadvise, nunmapped;
 	size_t npurged;
@@ -861,7 +861,7 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
 		size_t npages = extent_size_get(extent) >> LG_PAGE;
 		npurged += npages;
 		extent_list_remove(decay_extents, extent);
-		switch (extents_state_get(extents)) {
+		switch (extents_state_get(eset)) {
 		case extent_state_active:
 			not_reached();
 		case extent_state_dirty:
@@ -914,7 +914,7 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
  */
 static void
 arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    extents_t *extents, bool all, size_t npages_limit, size_t npages_decay_max,
+    eset_t *eset, bool all, size_t npages_limit, size_t npages_decay_max,
     bool is_background_thread) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 1);
@@ -931,11 +931,11 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	extent_list_t decay_extents;
 	extent_list_init(&decay_extents);
 
-	size_t npurge = arena_stash_decayed(tsdn, arena, &extent_hooks, extents,
+	size_t npurge = arena_stash_decayed(tsdn, arena, &extent_hooks, eset,
 	    npages_limit, npages_decay_max, &decay_extents);
 	if (npurge != 0) {
 		size_t npurged = arena_decay_stashed(tsdn, arena,
-		    &extent_hooks, decay, extents, all, &decay_extents,
+		    &extent_hooks, decay, eset, all, &decay_extents,
 		    is_background_thread);
 		assert(npurged == npurge);
 	}
@@ -946,11 +946,11 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 
 static bool
 arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    extents_t *extents, bool is_background_thread, bool all) {
+    eset_t *eset, bool is_background_thread, bool all) {
 	if (all) {
 		malloc_mutex_lock(tsdn, &decay->mtx);
-		arena_decay_to_limit(tsdn, arena, decay, extents, all, 0,
-		    extents_npages_get(extents), is_background_thread);
+		arena_decay_to_limit(tsdn, arena, decay, eset, all, 0,
+		    extents_npages_get(eset), is_background_thread);
 		malloc_mutex_unlock(tsdn, &decay->mtx);
 
 		return false;
@@ -961,7 +961,7 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 		return true;
 	}
 
-	bool epoch_advanced = arena_maybe_decay(tsdn, arena, decay, extents,
+	bool epoch_advanced = arena_maybe_decay(tsdn, arena, decay, eset,
 	    is_background_thread);
 	size_t npages_new;
 	if (epoch_advanced) {
diff --git a/src/background_thread.c b/src/background_thread.c
index bea445f2..f4b9ceff 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -114,7 +114,7 @@ decay_npurge_after_interval(arena_decay_t *decay, size_t interval) {
 
 static uint64_t
 arena_decay_compute_purge_interval_impl(tsdn_t *tsdn, arena_decay_t *decay,
-    extents_t *extents) {
+    eset_t *eset) {
 	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
 		/* Use minimal interval if decay is contended. */
 		return BACKGROUND_THREAD_MIN_INTERVAL_NS;
@@ -130,7 +130,7 @@ arena_decay_compute_purge_interval_impl(tsdn_t *tsdn, arena_decay_t *decay,
 
 	uint64_t decay_interval_ns = nstime_ns(&decay->interval);
 	assert(decay_interval_ns > 0);
-	size_t npages = extents_npages_get(extents);
+	size_t npages = extents_npages_get(eset);
 	if (npages == 0) {
 		unsigned i;
 		for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
diff --git a/src/extent.c b/src/extent.c
index aac54556..d5350142 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -19,7 +19,7 @@ mutex_pool_t	extent_mutex_pool;
 
 size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT;
 
-static const bitmap_info_t extents_bitmap_info =
+static const bitmap_info_t eset_bitmap_info =
     BITMAP_INFO_INITIALIZER(SC_NPSIZES+1);
 
 static void *extent_alloc_default(extent_hooks_t *extent_hooks, void *new_addr,
@@ -101,14 +101,14 @@ static atomic_zu_t highpages;
 
 static void extent_deregister(tsdn_t *tsdn, extent_t *extent);
 static extent_t *extent_recycle(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extents_t *extents, void *new_addr,
+    extent_hooks_t **r_extent_hooks, eset_t *eset, void *new_addr,
     size_t usize, size_t pad, size_t alignment, bool slab, szind_t szind,
     bool *zero, bool *commit, bool growing_retained);
 static extent_t *extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
+    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
     extent_t *extent, bool *coalesced, bool growing_retained);
 static void extent_record(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extents_t *extents, extent_t *extent,
+    extent_hooks_t **r_extent_hooks, eset_t *eset, extent_t *extent,
     bool growing_retained);
 
 /******************************************************************************/
@@ -309,118 +309,118 @@ extent_size_quantize_ceil(size_t size) {
 ph_gen(, extent_heap_, extent_heap_t, extent_t, ph_link, extent_snad_comp)
 
 bool
-extents_init(tsdn_t *tsdn, extents_t *extents, extent_state_t state,
+extents_init(tsdn_t *tsdn, eset_t *eset, extent_state_t state,
     bool delay_coalesce) {
-	if (malloc_mutex_init(&extents->mtx, "extents", WITNESS_RANK_EXTENTS,
+	if (malloc_mutex_init(&eset->mtx, "extents", WITNESS_RANK_EXTENTS,
 	    malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	for (unsigned i = 0; i < SC_NPSIZES + 1; i++) {
-		extent_heap_new(&extents->heaps[i]);
+		extent_heap_new(&eset->heaps[i]);
 	}
-	bitmap_init(extents->bitmap, &extents_bitmap_info, true);
-	extent_list_init(&extents->lru);
-	atomic_store_zu(&extents->npages, 0, ATOMIC_RELAXED);
-	extents->state = state;
-	extents->delay_coalesce = delay_coalesce;
+	bitmap_init(eset->bitmap, &eset_bitmap_info, true);
+	extent_list_init(&eset->lru);
+	atomic_store_zu(&eset->npages, 0, ATOMIC_RELAXED);
+	eset->state = state;
+	eset->delay_coalesce = delay_coalesce;
 	return false;
 }
 
 extent_state_t
-extents_state_get(const extents_t *extents) {
-	return extents->state;
+extents_state_get(const eset_t *eset) {
+	return eset->state;
 }
 
 size_t
-extents_npages_get(extents_t *extents) {
-	return atomic_load_zu(&extents->npages, ATOMIC_RELAXED);
+extents_npages_get(eset_t *eset) {
+	return atomic_load_zu(&eset->npages, ATOMIC_RELAXED);
 }
 
 size_t
-extents_nextents_get(extents_t *extents, pszind_t pind) {
-	return atomic_load_zu(&extents->nextents[pind], ATOMIC_RELAXED);
+extents_nextents_get(eset_t *eset, pszind_t pind) {
+	return atomic_load_zu(&eset->nextents[pind], ATOMIC_RELAXED);
 }
 
 size_t
-extents_nbytes_get(extents_t *extents, pszind_t pind) {
-	return atomic_load_zu(&extents->nbytes[pind], ATOMIC_RELAXED);
+extents_nbytes_get(eset_t *eset, pszind_t pind) {
+	return atomic_load_zu(&eset->nbytes[pind], ATOMIC_RELAXED);
 }
 
 static void
-extents_stats_add(extents_t *extent, pszind_t pind, size_t sz) {
-	size_t cur = atomic_load_zu(&extent->nextents[pind], ATOMIC_RELAXED);
-	atomic_store_zu(&extent->nextents[pind], cur + 1, ATOMIC_RELAXED);
-	cur = atomic_load_zu(&extent->nbytes[pind], ATOMIC_RELAXED);
-	atomic_store_zu(&extent->nbytes[pind], cur + sz, ATOMIC_RELAXED);
+extents_stats_add(eset_t *eset, pszind_t pind, size_t sz) {
+	size_t cur = atomic_load_zu(&eset->nextents[pind], ATOMIC_RELAXED);
+	atomic_store_zu(&eset->nextents[pind], cur + 1, ATOMIC_RELAXED);
+	cur = atomic_load_zu(&eset->nbytes[pind], ATOMIC_RELAXED);
+	atomic_store_zu(&eset->nbytes[pind], cur + sz, ATOMIC_RELAXED);
 }
 
 static void
-extents_stats_sub(extents_t *extent, pszind_t pind, size_t sz) {
-	size_t cur = atomic_load_zu(&extent->nextents[pind], ATOMIC_RELAXED);
-	atomic_store_zu(&extent->nextents[pind], cur - 1, ATOMIC_RELAXED);
-	cur = atomic_load_zu(&extent->nbytes[pind], ATOMIC_RELAXED);
-	atomic_store_zu(&extent->nbytes[pind], cur - sz, ATOMIC_RELAXED);
+extents_stats_sub(eset_t *eset, pszind_t pind, size_t sz) {
+	size_t cur = atomic_load_zu(&eset->nextents[pind], ATOMIC_RELAXED);
+	atomic_store_zu(&eset->nextents[pind], cur - 1, ATOMIC_RELAXED);
+	cur = atomic_load_zu(&eset->nbytes[pind], ATOMIC_RELAXED);
+	atomic_store_zu(&eset->nbytes[pind], cur - sz, ATOMIC_RELAXED);
 }
 
 static void
-extents_insert_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent) {
-	malloc_mutex_assert_owner(tsdn, &extents->mtx);
-	assert(extent_state_get(extent) == extents->state);
+extents_insert_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent) {
+	malloc_mutex_assert_owner(tsdn, &eset->mtx);
+	assert(extent_state_get(extent) == eset->state);
 
 	size_t size = extent_size_get(extent);
 	size_t psz = extent_size_quantize_floor(size);
 	pszind_t pind = sz_psz2ind(psz);
-	if (extent_heap_empty(&extents->heaps[pind])) {
-		bitmap_unset(extents->bitmap, &extents_bitmap_info,
+	if (extent_heap_empty(&eset->heaps[pind])) {
+		bitmap_unset(eset->bitmap, &eset_bitmap_info,
 		    (size_t)pind);
 	}
-	extent_heap_insert(&extents->heaps[pind], extent);
+	extent_heap_insert(&eset->heaps[pind], extent);
 
 	if (config_stats) {
-		extents_stats_add(extents, pind, size);
+		extents_stats_add(eset, pind, size);
 	}
 
-	extent_list_append(&extents->lru, extent);
+	extent_list_append(&eset->lru, extent);
 	size_t npages = size >> LG_PAGE;
 	/*
 	 * All modifications to npages hold the mutex (as asserted above), so we
 	 * don't need an atomic fetch-add; we can get by with a load followed by
 	 * a store.
 	 */
-	size_t cur_extents_npages =
-	    atomic_load_zu(&extents->npages, ATOMIC_RELAXED);
-	atomic_store_zu(&extents->npages, cur_extents_npages + npages,
+	size_t cur_eset_npages =
+	    atomic_load_zu(&eset->npages, ATOMIC_RELAXED);
+	atomic_store_zu(&eset->npages, cur_eset_npages + npages,
 	    ATOMIC_RELAXED);
 }
 
 static void
-extents_remove_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent) {
-	malloc_mutex_assert_owner(tsdn, &extents->mtx);
-	assert(extent_state_get(extent) == extents->state);
+extents_remove_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent) {
+	malloc_mutex_assert_owner(tsdn, &eset->mtx);
+	assert(extent_state_get(extent) == eset->state);
 
 	size_t size = extent_size_get(extent);
 	size_t psz = extent_size_quantize_floor(size);
 	pszind_t pind = sz_psz2ind(psz);
-	extent_heap_remove(&extents->heaps[pind], extent);
+	extent_heap_remove(&eset->heaps[pind], extent);
 
 	if (config_stats) {
-		extents_stats_sub(extents, pind, size);
+		extents_stats_sub(eset, pind, size);
 	}
 
-	if (extent_heap_empty(&extents->heaps[pind])) {
-		bitmap_set(extents->bitmap, &extents_bitmap_info,
+	if (extent_heap_empty(&eset->heaps[pind])) {
+		bitmap_set(eset->bitmap, &eset_bitmap_info,
 		    (size_t)pind);
 	}
-	extent_list_remove(&extents->lru, extent);
+	extent_list_remove(&eset->lru, extent);
 	size_t npages = size >> LG_PAGE;
 	/*
-	 * As in extents_insert_locked, we hold extents->mtx and so don't need
-	 * atomic operations for updating extents->npages.
+	 * As in extents_insert_locked, we hold eset->mtx and so don't need
+	 * atomic operations for updating eset->npages.
 	 */
 	size_t cur_extents_npages =
-	    atomic_load_zu(&extents->npages, ATOMIC_RELAXED);
+	    atomic_load_zu(&eset->npages, ATOMIC_RELAXED);
 	assert(cur_extents_npages >= npages);
-	atomic_store_zu(&extents->npages,
+	atomic_store_zu(&eset->npages,
 	    cur_extents_npages - (size >> LG_PAGE), ATOMIC_RELAXED);
 }
 
@@ -429,18 +429,18 @@ extents_remove_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent) {
  * requirement.  For each size, try only the first extent in the heap.
  */
 static extent_t *
-extents_fit_alignment(extents_t *extents, size_t min_size, size_t max_size,
+extents_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
     size_t alignment) {
         pszind_t pind = sz_psz2ind(extent_size_quantize_ceil(min_size));
         pszind_t pind_max = sz_psz2ind(extent_size_quantize_ceil(max_size));
 
-	for (pszind_t i = (pszind_t)bitmap_ffu(extents->bitmap,
-	    &extents_bitmap_info, (size_t)pind); i < pind_max; i =
-	    (pszind_t)bitmap_ffu(extents->bitmap, &extents_bitmap_info,
+	for (pszind_t i = (pszind_t)bitmap_ffu(eset->bitmap,
+	    &eset_bitmap_info, (size_t)pind); i < pind_max; i =
+	    (pszind_t)bitmap_ffu(eset->bitmap, &eset_bitmap_info,
 	    (size_t)i+1)) {
 		assert(i < SC_NPSIZES);
-		assert(!extent_heap_empty(&extents->heaps[i]));
-		extent_t *extent = extent_heap_first(&extents->heaps[i]);
+		assert(!extent_heap_empty(&eset->heaps[i]));
+		extent_t *extent = extent_heap_first(&eset->heaps[i]);
 		uintptr_t base = (uintptr_t)extent_base_get(extent);
 		size_t candidate_size = extent_size_get(extent);
 		assert(candidate_size >= min_size);
@@ -466,7 +466,7 @@ extents_fit_alignment(extents_t *extents, size_t min_size, size_t max_size,
  * large enough.
  */
 static extent_t *
-extents_first_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
+extents_first_fit_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
     size_t size) {
 	extent_t *ret = NULL;
 
@@ -477,25 +477,25 @@ extents_first_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 		 * No split / merge allowed (Windows w/o retain). Try exact fit
 		 * only.
 		 */
-		return extent_heap_empty(&extents->heaps[pind]) ? NULL :
-		    extent_heap_first(&extents->heaps[pind]);
+		return extent_heap_empty(&eset->heaps[pind]) ? NULL :
+		    extent_heap_first(&eset->heaps[pind]);
 	}
 
-	for (pszind_t i = (pszind_t)bitmap_ffu(extents->bitmap,
-	    &extents_bitmap_info, (size_t)pind);
+	for (pszind_t i = (pszind_t)bitmap_ffu(eset->bitmap,
+	    &eset_bitmap_info, (size_t)pind);
 	    i < SC_NPSIZES + 1;
-	    i = (pszind_t)bitmap_ffu(extents->bitmap, &extents_bitmap_info,
+	    i = (pszind_t)bitmap_ffu(eset->bitmap, &eset_bitmap_info,
 	    (size_t)i+1)) {
-		assert(!extent_heap_empty(&extents->heaps[i]));
-		extent_t *extent = extent_heap_first(&extents->heaps[i]);
+		assert(!extent_heap_empty(&eset->heaps[i]));
+		extent_t *extent = extent_heap_first(&eset->heaps[i]);
 		assert(extent_size_get(extent) >= size);
 		/*
 		 * In order to reduce fragmentation, avoid reusing and splitting
-		 * large extents for much smaller sizes.
+		 * large eset for much smaller sizes.
 		 *
-		 * Only do check for dirty extents (delay_coalesce).
+		 * Only do check for dirty eset (delay_coalesce).
 		 */
-		if (extents->delay_coalesce &&
+		if (eset->delay_coalesce &&
 		    (sz_pind2sz(i) >> opt_lg_extent_max_active_fit) > size) {
 			break;
 		}
@@ -513,12 +513,12 @@ extents_first_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 
 /*
  * Do first-fit extent selection, where the selection policy choice is
- * based on extents->delay_coalesce.
+ * based on eset->delay_coalesce.
  */
 static extent_t *
-extents_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
+extents_fit_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
     size_t esize, size_t alignment) {
-	malloc_mutex_assert_owner(tsdn, &extents->mtx);
+	malloc_mutex_assert_owner(tsdn, &eset->mtx);
 
 	size_t max_size = esize + PAGE_CEILING(alignment) - PAGE;
 	/* Beware size_t wrap-around. */
@@ -527,7 +527,7 @@ extents_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 	}
 
 	extent_t *extent =
-	    extents_first_fit_locked(tsdn, arena, extents, max_size);
+	    extents_first_fit_locked(tsdn, arena, eset, max_size);
 
 	if (alignment > PAGE && extent == NULL) {
 		/*
@@ -535,7 +535,7 @@ extents_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 		 * pessimistic.  Next we try to satisfy the aligned allocation
 		 * with sizes in [esize, max_size).
 		 */
-		extent = extents_fit_alignment(extents, esize, max_size,
+		extent = extents_fit_alignment(eset, esize, max_size,
 		    alignment);
 	}
 
@@ -544,31 +544,31 @@ extents_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
 
 static bool
 extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
+    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
     extent_t *extent) {
 	extent_state_set(extent, extent_state_active);
 	bool coalesced;
 	extent = extent_try_coalesce(tsdn, arena, r_extent_hooks, rtree_ctx,
-	    extents, extent, &coalesced, false);
-	extent_state_set(extent, extents_state_get(extents));
+	    eset, extent, &coalesced, false);
+	extent_state_set(extent, extents_state_get(eset));
 
 	if (!coalesced) {
 		return true;
 	}
-	extents_insert_locked(tsdn, extents, extent);
+	extents_insert_locked(tsdn, eset, extent);
 	return false;
 }
 
 extent_t *
 extents_alloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    extents_t *extents, void *new_addr, size_t size, size_t pad,
+    eset_t *eset, void *new_addr, size_t size, size_t pad,
     size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit) {
 	assert(size + pad != 0);
 	assert(alignment != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	extent_t *extent = extent_recycle(tsdn, arena, r_extent_hooks, extents,
+	extent_t *extent = extent_recycle(tsdn, arena, r_extent_hooks, eset,
 	    new_addr, size, pad, alignment, slab, szind, zero, commit, false);
 	assert(extent == NULL || extent_dumpable_get(extent));
 	return extent;
@@ -576,7 +576,7 @@ extents_alloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 
 void
 extents_dalloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    extents_t *extents, extent_t *extent) {
+    eset_t *eset, extent_t *extent) {
 	assert(extent_base_get(extent) != NULL);
 	assert(extent_size_get(extent) != 0);
 	assert(extent_dumpable_get(extent));
@@ -586,16 +586,16 @@ extents_dalloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	extent_addr_set(extent, extent_base_get(extent));
 	extent_zeroed_set(extent, false);
 
-	extent_record(tsdn, arena, r_extent_hooks, extents, extent, false);
+	extent_record(tsdn, arena, r_extent_hooks, eset, extent, false);
 }
 
 extent_t *
 extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    extents_t *extents, size_t npages_min) {
+    eset_t *eset, size_t npages_min) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	malloc_mutex_lock(tsdn, &extents->mtx);
+	malloc_mutex_lock(tsdn, &eset->mtx);
 
 	/*
 	 * Get the LRU coalesced extent, if any.  If coalescing was delayed,
@@ -604,24 +604,24 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	extent_t *extent;
 	while (true) {
 		/* Get the LRU extent, if any. */
-		extent = extent_list_first(&extents->lru);
+		extent = extent_list_first(&eset->lru);
 		if (extent == NULL) {
 			goto label_return;
 		}
 		/* Check the eviction limit. */
-		size_t extents_npages = atomic_load_zu(&extents->npages,
+		size_t extents_npages = atomic_load_zu(&eset->npages,
 		    ATOMIC_RELAXED);
 		if (extents_npages <= npages_min) {
 			extent = NULL;
 			goto label_return;
 		}
-		extents_remove_locked(tsdn, extents, extent);
-		if (!extents->delay_coalesce) {
+		extents_remove_locked(tsdn, eset, extent);
+		if (!eset->delay_coalesce) {
 			break;
 		}
 		/* Try to coalesce. */
 		if (extent_try_delayed_coalesce(tsdn, arena, r_extent_hooks,
-		    rtree_ctx, extents, extent)) {
+		    rtree_ctx, eset, extent)) {
 			break;
 		}
 		/*
@@ -634,7 +634,7 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	 * Either mark the extent active or deregister it to protect against
 	 * concurrent operations.
 	 */
-	switch (extents_state_get(extents)) {
+	switch (extents_state_get(eset)) {
 	case extent_state_active:
 		not_reached();
 	case extent_state_dirty:
@@ -649,7 +649,7 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	}
 
 label_return:
-	malloc_mutex_unlock(tsdn, &extents->mtx);
+	malloc_mutex_unlock(tsdn, &eset->mtx);
 	return extent;
 }
 
@@ -659,7 +659,7 @@ label_return:
  */
 static void
 extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    extents_t *extents, extent_t *extent, bool growing_retained) {
+    eset_t *eset, extent_t *extent, bool growing_retained) {
 	size_t sz = extent_size_get(extent);
 	if (config_stats) {
 		arena_stats_accum_zu(&arena->stats.abandoned_vm, sz);
@@ -668,7 +668,7 @@ extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks
 	 * Leak extent after making sure its pages have already been purged, so
 	 * that this is only a virtual memory leak.
 	 */
-	if (extents_state_get(extents) == extent_state_dirty) {
+	if (extents_state_get(eset) == extent_state_dirty) {
 		if (extent_purge_lazy_impl(tsdn, arena, r_extent_hooks,
 		    extent, 0, sz, growing_retained)) {
 			extent_purge_forced_impl(tsdn, arena, r_extent_hooks,
@@ -680,45 +680,45 @@ extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks
 }
 
 void
-extents_prefork(tsdn_t *tsdn, extents_t *extents) {
-	malloc_mutex_prefork(tsdn, &extents->mtx);
+extents_prefork(tsdn_t *tsdn, eset_t *eset) {
+	malloc_mutex_prefork(tsdn, &eset->mtx);
 }
 
 void
-extents_postfork_parent(tsdn_t *tsdn, extents_t *extents) {
-	malloc_mutex_postfork_parent(tsdn, &extents->mtx);
+extents_postfork_parent(tsdn_t *tsdn, eset_t *eset) {
+	malloc_mutex_postfork_parent(tsdn, &eset->mtx);
 }
 
 void
-extents_postfork_child(tsdn_t *tsdn, extents_t *extents) {
-	malloc_mutex_postfork_child(tsdn, &extents->mtx);
+extents_postfork_child(tsdn_t *tsdn, eset_t *eset) {
+	malloc_mutex_postfork_child(tsdn, &eset->mtx);
 }
 
 static void
-extent_deactivate_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
+extent_deactivate_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
     extent_t *extent) {
 	assert(extent_arena_ind_get(extent) == arena_ind_get(arena));
 	assert(extent_state_get(extent) == extent_state_active);
 
-	extent_state_set(extent, extents_state_get(extents));
-	extents_insert_locked(tsdn, extents, extent);
+	extent_state_set(extent, extents_state_get(eset));
+	extents_insert_locked(tsdn, eset, extent);
 }
 
 static void
-extent_deactivate(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
+extent_deactivate(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
     extent_t *extent) {
-	malloc_mutex_lock(tsdn, &extents->mtx);
-	extent_deactivate_locked(tsdn, arena, extents, extent);
-	malloc_mutex_unlock(tsdn, &extents->mtx);
+	malloc_mutex_lock(tsdn, &eset->mtx);
+	extent_deactivate_locked(tsdn, arena, eset, extent);
+	malloc_mutex_unlock(tsdn, &eset->mtx);
 }
 
 static void
-extent_activate_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents,
+extent_activate_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
     extent_t *extent) {
 	assert(extent_arena_ind_get(extent) == arena_ind_get(arena));
-	assert(extent_state_get(extent) == extents_state_get(extents));
+	assert(extent_state_get(extent) == extents_state_get(eset));
 
-	extents_remove_locked(tsdn, extents, extent);
+	extents_remove_locked(tsdn, eset, extent);
 	extent_state_set(extent, extent_state_active);
 }
 
@@ -911,12 +911,12 @@ extent_deregister_no_gdump_sub(tsdn_t *tsdn, extent_t *extent) {
 }
 
 /*
- * Tries to find and remove an extent from extents that can be used for the
+ * Tries to find and remove an extent from eset that can be used for the
  * given allocation request.
  */
 static extent_t *
 extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
+    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
     bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -940,7 +940,7 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
 	}
 
 	size_t esize = size + pad;
-	malloc_mutex_lock(tsdn, &extents->mtx);
+	malloc_mutex_lock(tsdn, &eset->mtx);
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 	extent_t *extent;
 	if (new_addr != NULL) {
@@ -957,22 +957,22 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
 			    != arena_ind_get(arena) ||
 			    extent_size_get(extent) < esize ||
 			    extent_state_get(extent) !=
-			    extents_state_get(extents)) {
+			    extents_state_get(eset)) {
 				extent = NULL;
 			}
 			extent_unlock(tsdn, unlock_extent);
 		}
 	} else {
-		extent = extents_fit_locked(tsdn, arena, extents, esize,
+		extent = extents_fit_locked(tsdn, arena, eset, esize,
 		    alignment);
 	}
 	if (extent == NULL) {
-		malloc_mutex_unlock(tsdn, &extents->mtx);
+		malloc_mutex_unlock(tsdn, &eset->mtx);
 		return NULL;
 	}
 
-	extent_activate_locked(tsdn, arena, extents, extent);
-	malloc_mutex_unlock(tsdn, &extents->mtx);
+	extent_activate_locked(tsdn, arena, eset, extent);
+	malloc_mutex_unlock(tsdn, &eset->mtx);
 
 	return extent;
 }
@@ -981,7 +981,7 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
  * Given an allocation request and an extent guaranteed to be able to satisfy
  * it, this splits off lead and trail extents, leaving extent pointing to an
  * extent satisfying the allocation.
- * This function doesn't put lead or trail into any extents_t; it's the caller's
+ * This function doesn't put lead or trail into any eset_t; it's the caller's
  * job to ensure that they can be reused.
  */
 typedef enum {
@@ -1078,11 +1078,11 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena,
  * This fulfills the indicated allocation request out of the given extent (which
  * the caller should have ensured was big enough).  If there's any unused space
  * before or after the resulting allocation, that space is given its own extent
- * and put back into extents.
+ * and put back into eset.
  */
 static extent_t *
 extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
+    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
     szind_t szind, extent_t *extent, bool growing_retained) {
 	extent_t *lead;
@@ -1099,19 +1099,19 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
 	    && !opt_retain) {
 		/*
 		 * Split isn't supported (implies Windows w/o retain).  Avoid
-		 * leaking the extents.
+		 * leaking the eset.
 		 */
 		assert(to_leak != NULL && lead == NULL && trail == NULL);
-		extent_deactivate(tsdn, arena, extents, to_leak);
+		extent_deactivate(tsdn, arena, eset, to_leak);
 		return NULL;
 	}
 
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
-			extent_deactivate(tsdn, arena, extents, lead);
+			extent_deactivate(tsdn, arena, eset, lead);
 		}
 		if (trail != NULL) {
-			extent_deactivate(tsdn, arena, extents, trail);
+			extent_deactivate(tsdn, arena, eset, trail);
 		}
 		return extent;
 	} else {
@@ -1126,7 +1126,7 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
 		if (to_leak != NULL) {
 			void *leak = extent_base_get(to_leak);
 			extent_deregister_no_gdump_sub(tsdn, to_leak);
-			extents_abandon_vm(tsdn, arena, r_extent_hooks, extents,
+			extents_abandon_vm(tsdn, arena, r_extent_hooks, eset,
 			    to_leak, growing_retained);
 			assert(extent_lock_from_addr(tsdn, rtree_ctx, leak,
 			    false) == NULL);
@@ -1149,11 +1149,11 @@ extent_need_manual_zero(arena_t *arena) {
 
 /*
  * Tries to satisfy the given allocation request by reusing one of the extents
- * in the given extents_t.
+ * in the given eset_t.
  */
 static extent_t *
 extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    extents_t *extents, void *new_addr, size_t size, size_t pad,
+    eset_t *eset, void *new_addr, size_t size, size_t pad,
     size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit,
     bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -1166,14 +1166,14 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
 	extent_t *extent = extent_recycle_extract(tsdn, arena, r_extent_hooks,
-	    rtree_ctx, extents, new_addr, size, pad, alignment, slab,
+	    rtree_ctx, eset, new_addr, size, pad, alignment, slab,
 	    growing_retained);
 	if (extent == NULL) {
 		return NULL;
 	}
 
 	extent = extent_recycle_split(tsdn, arena, r_extent_hooks, rtree_ctx,
-	    extents, new_addr, size, pad, alignment, slab, szind, extent,
+	    eset, new_addr, size, pad, alignment, slab, szind, extent,
 	    growing_retained);
 	if (extent == NULL) {
 		return NULL;
@@ -1182,7 +1182,7 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	if (*commit && !extent_committed_get(extent)) {
 		if (extent_commit_impl(tsdn, arena, r_extent_hooks, extent,
 		    0, extent_size_get(extent), growing_retained)) {
-			extent_record(tsdn, arena, r_extent_hooks, extents,
+			extent_record(tsdn, arena, r_extent_hooks, eset,
 			    extent, growing_retained);
 			return NULL;
 		}
@@ -1584,7 +1584,7 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 }
 
 static bool
-extent_can_coalesce(arena_t *arena, extents_t *extents, const extent_t *inner,
+extent_can_coalesce(arena_t *arena, eset_t *eset, const extent_t *inner,
     const extent_t *outer) {
 	assert(extent_arena_ind_get(inner) == arena_ind_get(arena));
 	if (extent_arena_ind_get(outer) != arena_ind_get(arena)) {
@@ -1592,7 +1592,7 @@ extent_can_coalesce(arena_t *arena, extents_t *extents, const extent_t *inner,
 	}
 
 	assert(extent_state_get(inner) == extent_state_active);
-	if (extent_state_get(outer) != extents->state) {
+	if (extent_state_get(outer) != eset->state) {
 		return false;
 	}
 
@@ -1605,19 +1605,19 @@ extent_can_coalesce(arena_t *arena, extents_t *extents, const extent_t *inner,
 
 static bool
 extent_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    extents_t *extents, extent_t *inner, extent_t *outer, bool forward,
+    eset_t *eset, extent_t *inner, extent_t *outer, bool forward,
     bool growing_retained) {
-	assert(extent_can_coalesce(arena, extents, inner, outer));
+	assert(extent_can_coalesce(arena, eset, inner, outer));
 
-	extent_activate_locked(tsdn, arena, extents, outer);
+	extent_activate_locked(tsdn, arena, eset, outer);
 
-	malloc_mutex_unlock(tsdn, &extents->mtx);
+	malloc_mutex_unlock(tsdn, &eset->mtx);
 	bool err = extent_merge_impl(tsdn, arena, r_extent_hooks,
 	    forward ? inner : outer, forward ? outer : inner, growing_retained);
-	malloc_mutex_lock(tsdn, &extents->mtx);
+	malloc_mutex_lock(tsdn, &eset->mtx);
 
 	if (err) {
-		extent_deactivate_locked(tsdn, arena, extents, outer);
+		extent_deactivate_locked(tsdn, arena, eset, outer);
 	}
 
 	return err;
@@ -1625,7 +1625,7 @@ extent_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 
 static extent_t *
 extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
+    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
     extent_t *extent, bool *coalesced, bool growing_retained,
     bool inactive_only) {
 	/*
@@ -1646,19 +1646,19 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena,
 		    extent_past_get(extent), inactive_only);
 		if (next != NULL) {
 			/*
-			 * extents->mtx only protects against races for
-			 * like-state extents, so call extent_can_coalesce()
+			 * eset->mtx only protects against races for
+			 * like-state eset, so call extent_can_coalesce()
 			 * before releasing next's pool lock.
 			 */
-			bool can_coalesce = extent_can_coalesce(arena, extents,
+			bool can_coalesce = extent_can_coalesce(arena, eset,
 			    extent, next);
 
 			extent_unlock(tsdn, next);
 
 			if (can_coalesce && !extent_coalesce(tsdn, arena,
-			    r_extent_hooks, extents, extent, next, true,
+			    r_extent_hooks, eset, extent, next, true,
 			    growing_retained)) {
-				if (extents->delay_coalesce) {
+				if (eset->delay_coalesce) {
 					/* Do minimal coalescing. */
 					*coalesced = true;
 					return extent;
@@ -1671,15 +1671,15 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena,
 		extent_t *prev = extent_lock_from_addr(tsdn, rtree_ctx,
 		    extent_before_get(extent), inactive_only);
 		if (prev != NULL) {
-			bool can_coalesce = extent_can_coalesce(arena, extents,
+			bool can_coalesce = extent_can_coalesce(arena, eset,
 			    extent, prev);
 			extent_unlock(tsdn, prev);
 
 			if (can_coalesce && !extent_coalesce(tsdn, arena,
-			    r_extent_hooks, extents, extent, prev, false,
+			    r_extent_hooks, eset, extent, prev, false,
 			    growing_retained)) {
 				extent = prev;
-				if (extents->delay_coalesce) {
+				if (eset->delay_coalesce) {
 					/* Do minimal coalescing. */
 					*coalesced = true;
 					return extent;
@@ -1689,7 +1689,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena,
 		}
 	} while (again);
 
-	if (extents->delay_coalesce) {
+	if (eset->delay_coalesce) {
 		*coalesced = false;
 	}
 	return extent;
@@ -1697,35 +1697,35 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena,
 
 static extent_t *
 extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
+    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
     extent_t *extent, bool *coalesced, bool growing_retained) {
 	return extent_try_coalesce_impl(tsdn, arena, r_extent_hooks, rtree_ctx,
-	    extents, extent, coalesced, growing_retained, false);
+	    eset, extent, coalesced, growing_retained, false);
 }
 
 static extent_t *
 extent_try_coalesce_large(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents,
+    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
     extent_t *extent, bool *coalesced, bool growing_retained) {
 	return extent_try_coalesce_impl(tsdn, arena, r_extent_hooks, rtree_ctx,
-	    extents, extent, coalesced, growing_retained, true);
+	    eset, extent, coalesced, growing_retained, true);
 }
 
 /*
  * Does the metadata management portions of putting an unused extent into the
- * given extents_t (coalesces, deregisters slab interiors, the heap operations).
+ * given eset_t (coalesces, deregisters slab interiors, the heap operations).
  */
 static void
 extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
-    extents_t *extents, extent_t *extent, bool growing_retained) {
+    eset_t *eset, extent_t *extent, bool growing_retained) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	assert((extents_state_get(extents) != extent_state_dirty &&
-	    extents_state_get(extents) != extent_state_muzzy) ||
+	assert((extents_state_get(eset) != extent_state_dirty &&
+	    extents_state_get(eset) != extent_state_muzzy) ||
 	    !extent_zeroed_get(extent));
 
-	malloc_mutex_lock(tsdn, &extents->mtx);
+	malloc_mutex_lock(tsdn, &eset->mtx);
 	extent_hooks_assure_initialized(arena, r_extent_hooks);
 
 	extent_szind_set(extent, SC_NSIZES);
@@ -1737,29 +1737,29 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	assert(rtree_extent_read(tsdn, &extents_rtree, rtree_ctx,
 	    (uintptr_t)extent_base_get(extent), true) == extent);
 
-	if (!extents->delay_coalesce) {
+	if (!eset->delay_coalesce) {
 		extent = extent_try_coalesce(tsdn, arena, r_extent_hooks,
-		    rtree_ctx, extents, extent, NULL, growing_retained);
+		    rtree_ctx, eset, extent, NULL, growing_retained);
 	} else if (extent_size_get(extent) >= SC_LARGE_MINCLASS) {
-		assert(extents == &arena->extents_dirty);
-		/* Always coalesce large extents eagerly. */
+		assert(eset == &arena->extents_dirty);
+		/* Always coalesce large eset eagerly. */
 		bool coalesced;
 		do {
 			assert(extent_state_get(extent) == extent_state_active);
 			extent = extent_try_coalesce_large(tsdn, arena,
-			    r_extent_hooks, rtree_ctx, extents, extent,
+			    r_extent_hooks, rtree_ctx, eset, extent,
 			    &coalesced, growing_retained);
 		} while (coalesced);
 		if (extent_size_get(extent) >= oversize_threshold) {
 			/* Shortcut to purge the oversize extent eagerly. */
-			malloc_mutex_unlock(tsdn, &extents->mtx);
+			malloc_mutex_unlock(tsdn, &eset->mtx);
 			arena_decay_extent(tsdn, arena, r_extent_hooks, extent);
 			return;
 		}
 	}
-	extent_deactivate_locked(tsdn, arena, extents, extent);
+	extent_deactivate_locked(tsdn, arena, eset, extent);
 
-	malloc_mutex_unlock(tsdn, &extents->mtx);
+	malloc_mutex_unlock(tsdn, &eset->mtx);
 }
 
 void

From e6180fe1b485c6128de4169e86c178f3118dcde4 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Fri, 20 Sep 2019 20:17:23 -0700
Subject: [PATCH 1376/2608] Eset: Add a source file.

This will let us move extents_* functions over one by one.
---
 Makefile.in                                    | 1 +
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj | 1 +
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj | 1 +
 src/eset.c                                     | 4 ++++
 4 files changed, 7 insertions(+)
 create mode 100644 src/eset.c

diff --git a/Makefile.in b/Makefile.in
index 62ae71f0..21a10532 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -104,6 +104,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/ckh.c \
 	$(srcroot)src/ctl.c \
 	$(srcroot)src/div.c \
+	$(srcroot)src/eset.c \
 	$(srcroot)src/extent.c \
 	$(srcroot)src/extent_dss.c \
 	$(srcroot)src/extent_mmap.c \
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index b6b83395..a9683384 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -44,6 +44,7 @@
     <ClCompile Include="..\..\..\..\src\ckh.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
     <ClCompile Include="..\..\..\..\src\div.c" />
+    <ClCompile Include="..\..\..\..\src\eset.c" />
     <ClCompile Include="..\..\..\..\src\extent.c" />
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index f405ea3d..72a57e56 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -44,6 +44,7 @@
     <ClCompile Include="..\..\..\..\src\ckh.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
     <ClCompile Include="..\..\..\..\src\div.c" />
+    <ClCompile Include="..\..\..\..\src\eset.c" />
     <ClCompile Include="..\..\..\..\src\extent.c" />
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
diff --git a/src/eset.c b/src/eset.c
new file mode 100644
index 00000000..3b8d1cbc
--- /dev/null
+++ b/src/eset.c
@@ -0,0 +1,4 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/eset.h"

From b416b96a397a2234d943d1e7e37e1dc208c971bc Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Fri, 20 Sep 2019 20:37:15 -0700
Subject: [PATCH 1377/2608] Extents -> Eset: rename/move extents_init.

---
 include/jemalloc/internal/eset.h           |  6 ++++++
 include/jemalloc/internal/extent_externs.h |  2 --
 src/arena.c                                |  8 +++-----
 src/eset.c                                 | 21 +++++++++++++++++++++
 src/extent.c                               | 21 ---------------------
 5 files changed, 30 insertions(+), 28 deletions(-)

diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index 1c18f4ee..55db75e1 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -6,6 +6,9 @@
 #include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/mutex.h"
 
+/* This is a transitional declarion, while we move extent.c into eset.c. */
+extern const bitmap_info_t eset_bitmap_info;
+
 /*
  * An eset ("extent set") is a quantized collection of extents, with built-in
  * LRU queue.
@@ -57,4 +60,7 @@ struct eset_s {
 	bool delay_coalesce;
 };
 
+bool eset_init(tsdn_t *tsdn, eset_t *eset, extent_state_t state,
+    bool delay_coalesce);
+
 #endif /* JEMALLOC_INTERNAL_ESET_H */
diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index 45271d7c..7a223840 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -27,8 +27,6 @@ size_t extent_size_quantize_ceil(size_t size);
 ph_proto(, extent_avail_, extent_tree_t, extent_t)
 ph_proto(, extent_heap_, extent_heap_t, extent_t)
 
-bool extents_init(tsdn_t *tsdn, eset_t *eset, extent_state_t state,
-    bool delay_coalesce);
 extent_state_t extents_state_get(const eset_t *eset);
 size_t extents_npages_get(eset_t *eset);
 /* Get the number of extents in the given page size index. */
diff --git a/src/arena.c b/src/arena.c
index 5380deed..1d269dc7 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2022,16 +2022,14 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 * are likely to be reused soon after deallocation, and the cost of
 	 * merging/splitting extents is non-trivial.
 	 */
-	if (extents_init(tsdn, &arena->extents_dirty, extent_state_dirty,
-	    true)) {
+	if (eset_init(tsdn, &arena->extents_dirty, extent_state_dirty, true)) {
 		goto label_error;
 	}
 	/*
 	 * Coalesce muzzy extents immediately, because operations on them are in
 	 * the critical path much less often than for dirty extents.
 	 */
-	if (extents_init(tsdn, &arena->extents_muzzy, extent_state_muzzy,
-	    false)) {
+	if (eset_init(tsdn, &arena->extents_muzzy, extent_state_muzzy, false)) {
 		goto label_error;
 	}
 	/*
@@ -2040,7 +2038,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 * coalescing), but also because operations on retained extents are not
 	 * in the critical path.
 	 */
-	if (extents_init(tsdn, &arena->extents_retained, extent_state_retained,
+	if (eset_init(tsdn, &arena->extents_retained, extent_state_retained,
 	    false)) {
 		goto label_error;
 	}
diff --git a/src/eset.c b/src/eset.c
index 3b8d1cbc..09148d0c 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -2,3 +2,24 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/eset.h"
+
+const bitmap_info_t eset_bitmap_info =
+    BITMAP_INFO_INITIALIZER(SC_NPSIZES+1);
+
+bool
+eset_init(tsdn_t *tsdn, eset_t *eset, extent_state_t state,
+    bool delay_coalesce) {
+	if (malloc_mutex_init(&eset->mtx, "extents", WITNESS_RANK_EXTENTS,
+	    malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+	for (unsigned i = 0; i < SC_NPSIZES + 1; i++) {
+		extent_heap_new(&eset->heaps[i]);
+	}
+	bitmap_init(eset->bitmap, &eset_bitmap_info, true);
+	extent_list_init(&eset->lru);
+	atomic_store_zu(&eset->npages, 0, ATOMIC_RELAXED);
+	eset->state = state;
+	eset->delay_coalesce = delay_coalesce;
+	return false;
+}
diff --git a/src/extent.c b/src/extent.c
index d5350142..51a145df 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -19,9 +19,6 @@ mutex_pool_t	extent_mutex_pool;
 
 size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT;
 
-static const bitmap_info_t eset_bitmap_info =
-    BITMAP_INFO_INITIALIZER(SC_NPSIZES+1);
-
 static void *extent_alloc_default(extent_hooks_t *extent_hooks, void *new_addr,
     size_t size, size_t alignment, bool *zero, bool *commit,
     unsigned arena_ind);
@@ -308,24 +305,6 @@ extent_size_quantize_ceil(size_t size) {
 /* Generate pairing heap functions. */
 ph_gen(, extent_heap_, extent_heap_t, extent_t, ph_link, extent_snad_comp)
 
-bool
-extents_init(tsdn_t *tsdn, eset_t *eset, extent_state_t state,
-    bool delay_coalesce) {
-	if (malloc_mutex_init(&eset->mtx, "extents", WITNESS_RANK_EXTENTS,
-	    malloc_mutex_rank_exclusive)) {
-		return true;
-	}
-	for (unsigned i = 0; i < SC_NPSIZES + 1; i++) {
-		extent_heap_new(&eset->heaps[i]);
-	}
-	bitmap_init(eset->bitmap, &eset_bitmap_info, true);
-	extent_list_init(&eset->lru);
-	atomic_store_zu(&eset->npages, 0, ATOMIC_RELAXED);
-	eset->state = state;
-	eset->delay_coalesce = delay_coalesce;
-	return false;
-}
-
 extent_state_t
 extents_state_get(const eset_t *eset) {
 	return eset->state;

From 63d1b7a7a76b7294a7dd85599c24cd9b555ccf4e Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Fri, 20 Sep 2019 20:45:16 -0700
Subject: [PATCH 1378/2608] Extents -> Eset: move extents_state_get.

---
 include/jemalloc/internal/eset.h           |  1 +
 include/jemalloc/internal/extent_externs.h |  1 -
 src/arena.c                                |  2 +-
 src/eset.c                                 |  5 +++++
 src/extent.c                               | 21 ++++++++-------------
 5 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index 55db75e1..abd37cad 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -62,5 +62,6 @@ struct eset_s {
 
 bool eset_init(tsdn_t *tsdn, eset_t *eset, extent_state_t state,
     bool delay_coalesce);
+extent_state_t eset_state_get(const eset_t *eset);
 
 #endif /* JEMALLOC_INTERNAL_ESET_H */
diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index 7a223840..2e196dd0 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -27,7 +27,6 @@ size_t extent_size_quantize_ceil(size_t size);
 ph_proto(, extent_avail_, extent_tree_t, extent_t)
 ph_proto(, extent_heap_, extent_heap_t, extent_t)
 
-extent_state_t extents_state_get(const eset_t *eset);
 size_t extents_npages_get(eset_t *eset);
 /* Get the number of extents in the given page size index. */
 size_t extents_nextents_get(eset_t *eset, pszind_t ind);
diff --git a/src/arena.c b/src/arena.c
index 1d269dc7..3eae7e35 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -861,7 +861,7 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
 		size_t npages = extent_size_get(extent) >> LG_PAGE;
 		npurged += npages;
 		extent_list_remove(decay_extents, extent);
-		switch (extents_state_get(eset)) {
+		switch (eset_state_get(eset)) {
 		case extent_state_active:
 			not_reached();
 		case extent_state_dirty:
diff --git a/src/eset.c b/src/eset.c
index 09148d0c..d0b55941 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -23,3 +23,8 @@ eset_init(tsdn_t *tsdn, eset_t *eset, extent_state_t state,
 	eset->delay_coalesce = delay_coalesce;
 	return false;
 }
+
+extent_state_t
+eset_state_get(const eset_t *eset) {
+	return eset->state;
+}
diff --git a/src/extent.c b/src/extent.c
index 51a145df..cdbf9098 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -305,11 +305,6 @@ extent_size_quantize_ceil(size_t size) {
 /* Generate pairing heap functions. */
 ph_gen(, extent_heap_, extent_heap_t, extent_t, ph_link, extent_snad_comp)
 
-extent_state_t
-extents_state_get(const eset_t *eset) {
-	return eset->state;
-}
-
 size_t
 extents_npages_get(eset_t *eset) {
 	return atomic_load_zu(&eset->npages, ATOMIC_RELAXED);
@@ -529,7 +524,7 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena,
 	bool coalesced;
 	extent = extent_try_coalesce(tsdn, arena, r_extent_hooks, rtree_ctx,
 	    eset, extent, &coalesced, false);
-	extent_state_set(extent, extents_state_get(eset));
+	extent_state_set(extent, eset_state_get(eset));
 
 	if (!coalesced) {
 		return true;
@@ -613,7 +608,7 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	 * Either mark the extent active or deregister it to protect against
 	 * concurrent operations.
 	 */
-	switch (extents_state_get(eset)) {
+	switch (eset_state_get(eset)) {
 	case extent_state_active:
 		not_reached();
 	case extent_state_dirty:
@@ -647,7 +642,7 @@ extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks
 	 * Leak extent after making sure its pages have already been purged, so
 	 * that this is only a virtual memory leak.
 	 */
-	if (extents_state_get(eset) == extent_state_dirty) {
+	if (eset_state_get(eset) == extent_state_dirty) {
 		if (extent_purge_lazy_impl(tsdn, arena, r_extent_hooks,
 		    extent, 0, sz, growing_retained)) {
 			extent_purge_forced_impl(tsdn, arena, r_extent_hooks,
@@ -679,7 +674,7 @@ extent_deactivate_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
 	assert(extent_arena_ind_get(extent) == arena_ind_get(arena));
 	assert(extent_state_get(extent) == extent_state_active);
 
-	extent_state_set(extent, extents_state_get(eset));
+	extent_state_set(extent, eset_state_get(eset));
 	extents_insert_locked(tsdn, eset, extent);
 }
 
@@ -695,7 +690,7 @@ static void
 extent_activate_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
     extent_t *extent) {
 	assert(extent_arena_ind_get(extent) == arena_ind_get(arena));
-	assert(extent_state_get(extent) == extents_state_get(eset));
+	assert(extent_state_get(extent) == eset_state_get(eset));
 
 	extents_remove_locked(tsdn, eset, extent);
 	extent_state_set(extent, extent_state_active);
@@ -936,7 +931,7 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
 			    != arena_ind_get(arena) ||
 			    extent_size_get(extent) < esize ||
 			    extent_state_get(extent) !=
-			    extents_state_get(eset)) {
+			    eset_state_get(eset)) {
 				extent = NULL;
 			}
 			extent_unlock(tsdn, unlock_extent);
@@ -1700,8 +1695,8 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	assert((extents_state_get(eset) != extent_state_dirty &&
-	    extents_state_get(eset) != extent_state_muzzy) ||
+	assert((eset_state_get(eset) != extent_state_dirty &&
+	    eset_state_get(eset) != extent_state_muzzy) ||
 	    !extent_zeroed_get(extent));
 
 	malloc_mutex_lock(tsdn, &eset->mtx);

From 820f070c6b5b7ff44902ddb45b4b8894075a5c96 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Fri, 20 Sep 2019 23:54:57 -0700
Subject: [PATCH 1379/2608] Move page quantization to sz module.

---
 include/jemalloc/internal/sz.h |  3 ++
 src/extent.c                   | 63 +++-------------------------------
 src/sz.c                       | 48 ++++++++++++++++++++++++++
 test/unit/extent_quantize.c    | 16 ++++-----
 4 files changed, 64 insertions(+), 66 deletions(-)

diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index 68e558ab..6df541f6 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -315,4 +315,7 @@ sz_sa2u(size_t size, size_t alignment) {
 	return usize;
 }
 
+size_t sz_psz_quantize_floor(size_t size);
+size_t sz_psz_quantize_ceil(size_t size);
+
 #endif /* JEMALLOC_INTERNAL_SIZE_H */
diff --git a/src/extent.c b/src/extent.c
index cdbf9098..af23ca29 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -249,59 +249,6 @@ extent_hooks_assure_initialized(arena_t *arena,
 	}
 }
 
-#ifndef JEMALLOC_JET
-static
-#endif
-size_t
-extent_size_quantize_floor(size_t size) {
-	size_t ret;
-	pszind_t pind;
-
-	assert(size > 0);
-	assert((size & PAGE_MASK) == 0);
-
-	pind = sz_psz2ind(size - sz_large_pad + 1);
-	if (pind == 0) {
-		/*
-		 * Avoid underflow.  This short-circuit would also do the right
-		 * thing for all sizes in the range for which there are
-		 * PAGE-spaced size classes, but it's simplest to just handle
-		 * the one case that would cause erroneous results.
-		 */
-		return size;
-	}
-	ret = sz_pind2sz(pind - 1) + sz_large_pad;
-	assert(ret <= size);
-	return ret;
-}
-
-#ifndef JEMALLOC_JET
-static
-#endif
-size_t
-extent_size_quantize_ceil(size_t size) {
-	size_t ret;
-
-	assert(size > 0);
-	assert(size - sz_large_pad <= SC_LARGE_MAXCLASS);
-	assert((size & PAGE_MASK) == 0);
-
-	ret = extent_size_quantize_floor(size);
-	if (ret < size) {
-		/*
-		 * Skip a quantization that may have an adequately large extent,
-		 * because under-sized extents may be mixed in.  This only
-		 * happens when an unusual size is requested, i.e. for aligned
-		 * allocation, and is just one of several places where linear
-		 * search would potentially find sufficiently aligned available
-		 * memory somewhere lower.
-		 */
-		ret = sz_pind2sz(sz_psz2ind(ret - sz_large_pad + 1)) +
-		    sz_large_pad;
-	}
-	return ret;
-}
-
 /* Generate pairing heap functions. */
 ph_gen(, extent_heap_, extent_heap_t, extent_t, ph_link, extent_snad_comp)
 
@@ -342,7 +289,7 @@ extents_insert_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent) {
 	assert(extent_state_get(extent) == eset->state);
 
 	size_t size = extent_size_get(extent);
-	size_t psz = extent_size_quantize_floor(size);
+	size_t psz = sz_psz_quantize_floor(size);
 	pszind_t pind = sz_psz2ind(psz);
 	if (extent_heap_empty(&eset->heaps[pind])) {
 		bitmap_unset(eset->bitmap, &eset_bitmap_info,
@@ -373,7 +320,7 @@ extents_remove_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent) {
 	assert(extent_state_get(extent) == eset->state);
 
 	size_t size = extent_size_get(extent);
-	size_t psz = extent_size_quantize_floor(size);
+	size_t psz = sz_psz_quantize_floor(size);
 	pszind_t pind = sz_psz2ind(psz);
 	extent_heap_remove(&eset->heaps[pind], extent);
 
@@ -405,8 +352,8 @@ extents_remove_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent) {
 static extent_t *
 extents_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
     size_t alignment) {
-        pszind_t pind = sz_psz2ind(extent_size_quantize_ceil(min_size));
-        pszind_t pind_max = sz_psz2ind(extent_size_quantize_ceil(max_size));
+        pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(min_size));
+        pszind_t pind_max = sz_psz2ind(sz_psz_quantize_ceil(max_size));
 
 	for (pszind_t i = (pszind_t)bitmap_ffu(eset->bitmap,
 	    &eset_bitmap_info, (size_t)pind); i < pind_max; i =
@@ -444,7 +391,7 @@ extents_first_fit_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
     size_t size) {
 	extent_t *ret = NULL;
 
-	pszind_t pind = sz_psz2ind(extent_size_quantize_ceil(size));
+	pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(size));
 
 	if (!maps_coalesce && !opt_retain) {
 		/*
diff --git a/src/sz.c b/src/sz.c
index 8633fb05..7734f394 100644
--- a/src/sz.c
+++ b/src/sz.c
@@ -4,6 +4,54 @@
 JEMALLOC_ALIGNED(CACHELINE)
 size_t sz_pind2sz_tab[SC_NPSIZES+1];
 
+
+size_t
+sz_psz_quantize_floor(size_t size) {
+	size_t ret;
+	pszind_t pind;
+
+	assert(size > 0);
+	assert((size & PAGE_MASK) == 0);
+
+	pind = sz_psz2ind(size - sz_large_pad + 1);
+	if (pind == 0) {
+		/*
+		 * Avoid underflow.  This short-circuit would also do the right
+		 * thing for all sizes in the range for which there are
+		 * PAGE-spaced size classes, but it's simplest to just handle
+		 * the one case that would cause erroneous results.
+		 */
+		return size;
+	}
+	ret = sz_pind2sz(pind - 1) + sz_large_pad;
+	assert(ret <= size);
+	return ret;
+}
+
+size_t
+sz_psz_quantize_ceil(size_t size) {
+	size_t ret;
+
+	assert(size > 0);
+	assert(size - sz_large_pad <= SC_LARGE_MAXCLASS);
+	assert((size & PAGE_MASK) == 0);
+
+	ret = sz_psz_quantize_floor(size);
+	if (ret < size) {
+		/*
+		 * Skip a quantization that may have an adequately large extent,
+		 * because under-sized extents may be mixed in.  This only
+		 * happens when an unusual size is requested, i.e. for aligned
+		 * allocation, and is just one of several places where linear
+		 * search would potentially find sufficiently aligned available
+		 * memory somewhere lower.
+		 */
+		ret = sz_pind2sz(sz_psz2ind(ret - sz_large_pad + 1)) +
+		    sz_large_pad;
+	}
+	return ret;
+}
+
 static void
 sz_boot_pind2sz_tab(const sc_data_t *sc_data) {
 	int pind = 0;
diff --git a/test/unit/extent_quantize.c b/test/unit/extent_quantize.c
index 0ca7a75d..64b3baa7 100644
--- a/test/unit/extent_quantize.c
+++ b/test/unit/extent_quantize.c
@@ -23,11 +23,11 @@ TEST_BEGIN(test_small_extent_size) {
 		assert_d_eq(mallctlbymib(mib, miblen, (void *)&extent_size, &sz,
 		    NULL, 0), 0, "Unexpected mallctlbymib failure");
 		assert_zu_eq(extent_size,
-		    extent_size_quantize_floor(extent_size),
+		    sz_psz_quantize_floor(extent_size),
 		    "Small extent quantization should be a no-op "
 		    "(extent_size=%zu)", extent_size);
 		assert_zu_eq(extent_size,
-		    extent_size_quantize_ceil(extent_size),
+		    sz_psz_quantize_ceil(extent_size),
 		    "Small extent quantization should be a no-op "
 		    "(extent_size=%zu)", extent_size);
 	}
@@ -65,8 +65,8 @@ TEST_BEGIN(test_large_extent_size) {
 		    &sz, NULL, 0), 0, "Unexpected mallctlbymib failure");
 		extent_size = cache_oblivious ? lextent_size + PAGE :
 		    lextent_size;
-		floor = extent_size_quantize_floor(extent_size);
-		ceil = extent_size_quantize_ceil(extent_size);
+		floor = sz_psz_quantize_floor(extent_size);
+		ceil = sz_psz_quantize_ceil(extent_size);
 
 		assert_zu_eq(extent_size, floor,
 		    "Extent quantization should be a no-op for precise size "
@@ -79,7 +79,7 @@ TEST_BEGIN(test_large_extent_size) {
 
 		if (i > 0) {
 			assert_zu_eq(extent_size_prev,
-			    extent_size_quantize_floor(extent_size - PAGE),
+			    sz_psz_quantize_floor(extent_size - PAGE),
 			    "Floor should be a precise size");
 			if (extent_size_prev < ceil_prev) {
 				assert_zu_eq(ceil_prev, extent_size,
@@ -91,7 +91,7 @@ TEST_BEGIN(test_large_extent_size) {
 		}
 		if (i + 1 < nlextents) {
 			extent_size_prev = floor;
-			ceil_prev = extent_size_quantize_ceil(extent_size +
+			ceil_prev = sz_psz_quantize_ceil(extent_size +
 			    PAGE);
 		}
 	}
@@ -109,8 +109,8 @@ TEST_BEGIN(test_monotonic) {
 		size_t extent_size, floor, ceil;
 
 		extent_size = i << LG_PAGE;
-		floor = extent_size_quantize_floor(extent_size);
-		ceil = extent_size_quantize_ceil(extent_size);
+		floor = sz_psz_quantize_floor(extent_size);
+		ceil = sz_psz_quantize_ceil(extent_size);
 
 		assert_zu_le(floor, extent_size,
 		    "Floor should be <= (floor=%zu, extent_size=%zu, ceil=%zu)",

From a42861540e3a257259eb1c303c7750229ac62b71 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Fri, 20 Sep 2019 20:52:13 -0700
Subject: [PATCH 1380/2608] Extents -> Eset: Convert some stats getters.

---
 include/jemalloc/internal/arena_structs.h  |  4 +--
 include/jemalloc/internal/eset.h           |  6 ++++
 include/jemalloc/internal/extent_externs.h |  5 ---
 src/arena.c                                | 37 +++++++++++-----------
 src/background_thread.c                    |  6 ++--
 src/eset.c                                 | 15 +++++++++
 src/extent.c                               | 15 ---------
 7 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index 9563c3d6..6e8b8291 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -52,8 +52,8 @@ struct arena_decay_s {
 	/*
 	 * Number of unpurged pages at beginning of current epoch.  During epoch
 	 * advancement we use the delta between arena->decay_*.nunpurged and
-	 * extents_npages_get(&arena->extents_*) to determine how many dirty
-	 * pages, if any, were generated.
+	 * eset_npages_get(&arena->extents_*) to determine how many dirty pages,
+	 * if any, were generated.
 	 */
 	size_t			nunpurged;
 	/*
diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index abd37cad..1e055397 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -64,4 +64,10 @@ bool eset_init(tsdn_t *tsdn, eset_t *eset, extent_state_t state,
     bool delay_coalesce);
 extent_state_t eset_state_get(const eset_t *eset);
 
+size_t eset_npages_get(eset_t *eset);
+/* Get the number of extents in the given page size index. */
+size_t eset_nextents_get(eset_t *eset, pszind_t ind);
+/* Get the sum total bytes of the extents in the given page size index. */
+size_t eset_nbytes_get(eset_t *eset, pszind_t ind);
+
 #endif /* JEMALLOC_INTERNAL_ESET_H */
diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index 2e196dd0..1c930275 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -27,11 +27,6 @@ size_t extent_size_quantize_ceil(size_t size);
 ph_proto(, extent_avail_, extent_tree_t, extent_t)
 ph_proto(, extent_heap_, extent_heap_t, extent_t)
 
-size_t extents_npages_get(eset_t *eset);
-/* Get the number of extents in the given page size index. */
-size_t extents_nextents_get(eset_t *eset, pszind_t ind);
-/* Get the sum total bytes of the extents in the given page size index. */
-size_t extents_nbytes_get(eset_t *eset, pszind_t ind);
 extent_t *extents_alloc(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, eset_t *eset, void *new_addr,
     size_t size, size_t pad, size_t alignment, bool slab, szind_t szind,
diff --git a/src/arena.c b/src/arena.c
index 3eae7e35..9dba4e73 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -75,8 +75,8 @@ arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	*dirty_decay_ms = arena_dirty_decay_ms_get(arena);
 	*muzzy_decay_ms = arena_muzzy_decay_ms_get(arena);
 	*nactive += atomic_load_zu(&arena->nactive, ATOMIC_RELAXED);
-	*ndirty += extents_npages_get(&arena->extents_dirty);
-	*nmuzzy += extents_npages_get(&arena->extents_muzzy);
+	*ndirty += eset_npages_get(&arena->extents_dirty);
+	*nmuzzy += eset_npages_get(&arena->extents_muzzy);
 }
 
 void
@@ -99,7 +99,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	arena_stats_accum_zu(&astats->mapped, base_mapped
 	    + arena_stats_read_zu(tsdn, &arena->stats, &arena->stats.mapped));
 	arena_stats_accum_zu(&astats->retained,
-	    extents_npages_get(&arena->extents_retained) << LG_PAGE);
+	    eset_npages_get(&arena->extents_retained) << LG_PAGE);
 
 	atomic_store_zu(&astats->extent_avail,
 	    atomic_load_zu(&arena->extent_avail_cnt, ATOMIC_RELAXED),
@@ -130,8 +130,8 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	arena_stats_accum_zu(&astats->metadata_thp, metadata_thp);
 	arena_stats_accum_zu(&astats->resident, base_resident +
 	    (((atomic_load_zu(&arena->nactive, ATOMIC_RELAXED) +
-	    extents_npages_get(&arena->extents_dirty) +
-	    extents_npages_get(&arena->extents_muzzy)) << LG_PAGE)));
+	    eset_npages_get(&arena->extents_dirty) +
+	    eset_npages_get(&arena->extents_muzzy)) << LG_PAGE)));
 	arena_stats_accum_zu(&astats->abandoned_vm, atomic_load_zu(
 	    &arena->stats.abandoned_vm, ATOMIC_RELAXED));
 
@@ -173,13 +173,12 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	for (pszind_t i = 0; i < SC_NPSIZES; i++) {
 		size_t dirty, muzzy, retained, dirty_bytes, muzzy_bytes,
 		    retained_bytes;
-		dirty = extents_nextents_get(&arena->extents_dirty, i);
-		muzzy = extents_nextents_get(&arena->extents_muzzy, i);
-		retained = extents_nextents_get(&arena->extents_retained, i);
-		dirty_bytes = extents_nbytes_get(&arena->extents_dirty, i);
-		muzzy_bytes = extents_nbytes_get(&arena->extents_muzzy, i);
-		retained_bytes =
-		    extents_nbytes_get(&arena->extents_retained, i);
+		dirty = eset_nextents_get(&arena->extents_dirty, i);
+		muzzy = eset_nextents_get(&arena->extents_muzzy, i);
+		retained = eset_nextents_get(&arena->extents_retained, i);
+		dirty_bytes = eset_nbytes_get(&arena->extents_dirty, i);
+		muzzy_bytes = eset_nbytes_get(&arena->extents_muzzy, i);
+		retained_bytes = eset_nbytes_get(&arena->extents_retained, i);
 
 		atomic_store_zu(&estats[i].ndirty, dirty, ATOMIC_RELAXED);
 		atomic_store_zu(&estats[i].nmuzzy, muzzy, ATOMIC_RELAXED);
@@ -645,7 +644,7 @@ arena_decay_epoch_advance_helper(arena_decay_t *decay, const nstime_t *time,
 static void
 arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
     eset_t *eset, const nstime_t *time, bool is_background_thread) {
-	size_t current_npages = extents_npages_get(eset);
+	size_t current_npages = eset_npages_get(eset);
 	arena_decay_epoch_advance_helper(decay, time, current_npages);
 
 	size_t npages_limit = arena_decay_backlog_npages_limit(decay);
@@ -720,7 +719,7 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	if (decay_ms <= 0) {
 		if (decay_ms == 0) {
 			arena_decay_to_limit(tsdn, arena, decay, eset, false,
-			    0, extents_npages_get(eset),
+			    0, eset_npages_get(eset),
 			    is_background_thread);
 		}
 		return false;
@@ -760,7 +759,7 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 		    is_background_thread);
 	} else if (is_background_thread) {
 		arena_decay_try_purge(tsdn, arena, decay, eset,
-		    extents_npages_get(eset),
+		    eset_npages_get(eset),
 		    arena_decay_backlog_npages_limit(decay),
 		    is_background_thread);
 	}
@@ -907,7 +906,7 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
 
 /*
  * npages_limit: Decay at most npages_decay_max pages without violating the
- * invariant: (extents_npages_get(extents) >= npages_limit).  We need an upper
+ * invariant: (eset_npages_get(extents) >= npages_limit).  We need an upper
  * bound on number of pages in order to prevent unbounded growth (namely in
  * stashed), otherwise unbounded new pages could be added to extents during the
  * current decay run, so that the purging thread never finishes.
@@ -950,7 +949,7 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	if (all) {
 		malloc_mutex_lock(tsdn, &decay->mtx);
 		arena_decay_to_limit(tsdn, arena, decay, eset, all, 0,
-		    extents_npages_get(eset), is_background_thread);
+		    eset_npages_get(eset), is_background_thread);
 		malloc_mutex_unlock(tsdn, &decay->mtx);
 
 		return false;
@@ -1177,8 +1176,8 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
 	 * Furthermore, the caller (arena_i_destroy_ctl()) purged all cached
 	 * extents, so only retained extents may remain.
 	 */
-	assert(extents_npages_get(&arena->extents_dirty) == 0);
-	assert(extents_npages_get(&arena->extents_muzzy) == 0);
+	assert(eset_npages_get(&arena->extents_dirty) == 0);
+	assert(eset_npages_get(&arena->extents_muzzy) == 0);
 
 	/* Deallocate retained memory. */
 	arena_destroy_retained(tsd_tsdn(tsd), arena);
diff --git a/src/background_thread.c b/src/background_thread.c
index f4b9ceff..9476a12f 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -130,7 +130,7 @@ arena_decay_compute_purge_interval_impl(tsdn_t *tsdn, arena_decay_t *decay,
 
 	uint64_t decay_interval_ns = nstime_ns(&decay->interval);
 	assert(decay_interval_ns > 0);
-	size_t npages = extents_npages_get(eset);
+	size_t npages = eset_npages_get(eset);
 	if (npages == 0) {
 		unsigned i;
 		for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
@@ -718,8 +718,8 @@ background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
 	if (info->npages_to_purge_new > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
 		should_signal = true;
 	} else if (unlikely(background_thread_indefinite_sleep(info)) &&
-	    (extents_npages_get(&arena->extents_dirty) > 0 ||
-	    extents_npages_get(&arena->extents_muzzy) > 0 ||
+	    (eset_npages_get(&arena->extents_dirty) > 0 ||
+	    eset_npages_get(&arena->extents_muzzy) > 0 ||
 	    info->npages_to_purge_new > 0)) {
 		should_signal = true;
 	} else {
diff --git a/src/eset.c b/src/eset.c
index d0b55941..d9457ee9 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -28,3 +28,18 @@ extent_state_t
 eset_state_get(const eset_t *eset) {
 	return eset->state;
 }
+
+size_t
+eset_npages_get(eset_t *eset) {
+	return atomic_load_zu(&eset->npages, ATOMIC_RELAXED);
+}
+
+size_t
+eset_nextents_get(eset_t *eset, pszind_t pind) {
+	return atomic_load_zu(&eset->nextents[pind], ATOMIC_RELAXED);
+}
+
+size_t
+eset_nbytes_get(eset_t *eset, pszind_t pind) {
+	return atomic_load_zu(&eset->nbytes[pind], ATOMIC_RELAXED);
+}
diff --git a/src/extent.c b/src/extent.c
index af23ca29..81ce308a 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -252,21 +252,6 @@ extent_hooks_assure_initialized(arena_t *arena,
 /* Generate pairing heap functions. */
 ph_gen(, extent_heap_, extent_heap_t, extent_t, ph_link, extent_snad_comp)
 
-size_t
-extents_npages_get(eset_t *eset) {
-	return atomic_load_zu(&eset->npages, ATOMIC_RELAXED);
-}
-
-size_t
-extents_nextents_get(eset_t *eset, pszind_t pind) {
-	return atomic_load_zu(&eset->nextents[pind], ATOMIC_RELAXED);
-}
-
-size_t
-extents_nbytes_get(eset_t *eset, pszind_t pind) {
-	return atomic_load_zu(&eset->nbytes[pind], ATOMIC_RELAXED);
-}
-
 static void
 extents_stats_add(eset_t *eset, pszind_t pind, size_t sz) {
 	size_t cur = atomic_load_zu(&eset->nextents[pind], ATOMIC_RELAXED);

From 1210af9a4e26994c6f340085554f3519994ae682 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Fri, 20 Sep 2019 23:51:13 -0700
Subject: [PATCH 1381/2608] Extent -> Eset: Move insertion and removal.

---
 include/jemalloc/internal/eset.h |  3 ++
 src/eset.c                       | 78 ++++++++++++++++++++++++++++
 src/extent.c                     | 87 ++------------------------------
 3 files changed, 85 insertions(+), 83 deletions(-)

diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index 1e055397..400316ed 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -70,4 +70,7 @@ size_t eset_nextents_get(eset_t *eset, pszind_t ind);
 /* Get the sum total bytes of the extents in the given page size index. */
 size_t eset_nbytes_get(eset_t *eset, pszind_t ind);
 
+void eset_insert_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent);
+void eset_remove_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent);
+
 #endif /* JEMALLOC_INTERNAL_ESET_H */
diff --git a/src/eset.c b/src/eset.c
index d9457ee9..21dcccad 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -43,3 +43,81 @@ size_t
 eset_nbytes_get(eset_t *eset, pszind_t pind) {
 	return atomic_load_zu(&eset->nbytes[pind], ATOMIC_RELAXED);
 }
+
+static void
+eset_stats_add(eset_t *eset, pszind_t pind, size_t sz) {
+	size_t cur = atomic_load_zu(&eset->nextents[pind], ATOMIC_RELAXED);
+	atomic_store_zu(&eset->nextents[pind], cur + 1, ATOMIC_RELAXED);
+	cur = atomic_load_zu(&eset->nbytes[pind], ATOMIC_RELAXED);
+	atomic_store_zu(&eset->nbytes[pind], cur + sz, ATOMIC_RELAXED);
+}
+
+static void
+eset_stats_sub(eset_t *eset, pszind_t pind, size_t sz) {
+	size_t cur = atomic_load_zu(&eset->nextents[pind], ATOMIC_RELAXED);
+	atomic_store_zu(&eset->nextents[pind], cur - 1, ATOMIC_RELAXED);
+	cur = atomic_load_zu(&eset->nbytes[pind], ATOMIC_RELAXED);
+	atomic_store_zu(&eset->nbytes[pind], cur - sz, ATOMIC_RELAXED);
+}
+
+void
+eset_insert_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent) {
+	malloc_mutex_assert_owner(tsdn, &eset->mtx);
+	assert(extent_state_get(extent) == eset->state);
+
+	size_t size = extent_size_get(extent);
+	size_t psz = sz_psz_quantize_floor(size);
+	pszind_t pind = sz_psz2ind(psz);
+	if (extent_heap_empty(&eset->heaps[pind])) {
+		bitmap_unset(eset->bitmap, &eset_bitmap_info,
+		    (size_t)pind);
+	}
+	extent_heap_insert(&eset->heaps[pind], extent);
+
+	if (config_stats) {
+		eset_stats_add(eset, pind, size);
+	}
+
+	extent_list_append(&eset->lru, extent);
+	size_t npages = size >> LG_PAGE;
+	/*
+	 * All modifications to npages hold the mutex (as asserted above), so we
+	 * don't need an atomic fetch-add; we can get by with a load followed by
+	 * a store.
+	 */
+	size_t cur_eset_npages =
+	    atomic_load_zu(&eset->npages, ATOMIC_RELAXED);
+	atomic_store_zu(&eset->npages, cur_eset_npages + npages,
+	    ATOMIC_RELAXED);
+}
+
+void
+eset_remove_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent) {
+	malloc_mutex_assert_owner(tsdn, &eset->mtx);
+	assert(extent_state_get(extent) == eset->state);
+
+	size_t size = extent_size_get(extent);
+	size_t psz = sz_psz_quantize_floor(size);
+	pszind_t pind = sz_psz2ind(psz);
+	extent_heap_remove(&eset->heaps[pind], extent);
+
+	if (config_stats) {
+		eset_stats_sub(eset, pind, size);
+	}
+
+	if (extent_heap_empty(&eset->heaps[pind])) {
+		bitmap_set(eset->bitmap, &eset_bitmap_info,
+		    (size_t)pind);
+	}
+	extent_list_remove(&eset->lru, extent);
+	size_t npages = size >> LG_PAGE;
+	/*
+	 * As in eset_insert_locked, we hold eset->mtx and so don't need atomic
+	 * operations for updating eset->npages.
+	 */
+	size_t cur_extents_npages =
+	    atomic_load_zu(&eset->npages, ATOMIC_RELAXED);
+	assert(cur_extents_npages >= npages);
+	atomic_store_zu(&eset->npages,
+	    cur_extents_npages - (size >> LG_PAGE), ATOMIC_RELAXED);
+}
diff --git a/src/extent.c b/src/extent.c
index 81ce308a..069899c2 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -249,87 +249,8 @@ extent_hooks_assure_initialized(arena_t *arena,
 	}
 }
 
-/* Generate pairing heap functions. */
 ph_gen(, extent_heap_, extent_heap_t, extent_t, ph_link, extent_snad_comp)
 
-static void
-extents_stats_add(eset_t *eset, pszind_t pind, size_t sz) {
-	size_t cur = atomic_load_zu(&eset->nextents[pind], ATOMIC_RELAXED);
-	atomic_store_zu(&eset->nextents[pind], cur + 1, ATOMIC_RELAXED);
-	cur = atomic_load_zu(&eset->nbytes[pind], ATOMIC_RELAXED);
-	atomic_store_zu(&eset->nbytes[pind], cur + sz, ATOMIC_RELAXED);
-}
-
-static void
-extents_stats_sub(eset_t *eset, pszind_t pind, size_t sz) {
-	size_t cur = atomic_load_zu(&eset->nextents[pind], ATOMIC_RELAXED);
-	atomic_store_zu(&eset->nextents[pind], cur - 1, ATOMIC_RELAXED);
-	cur = atomic_load_zu(&eset->nbytes[pind], ATOMIC_RELAXED);
-	atomic_store_zu(&eset->nbytes[pind], cur - sz, ATOMIC_RELAXED);
-}
-
-static void
-extents_insert_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent) {
-	malloc_mutex_assert_owner(tsdn, &eset->mtx);
-	assert(extent_state_get(extent) == eset->state);
-
-	size_t size = extent_size_get(extent);
-	size_t psz = sz_psz_quantize_floor(size);
-	pszind_t pind = sz_psz2ind(psz);
-	if (extent_heap_empty(&eset->heaps[pind])) {
-		bitmap_unset(eset->bitmap, &eset_bitmap_info,
-		    (size_t)pind);
-	}
-	extent_heap_insert(&eset->heaps[pind], extent);
-
-	if (config_stats) {
-		extents_stats_add(eset, pind, size);
-	}
-
-	extent_list_append(&eset->lru, extent);
-	size_t npages = size >> LG_PAGE;
-	/*
-	 * All modifications to npages hold the mutex (as asserted above), so we
-	 * don't need an atomic fetch-add; we can get by with a load followed by
-	 * a store.
-	 */
-	size_t cur_eset_npages =
-	    atomic_load_zu(&eset->npages, ATOMIC_RELAXED);
-	atomic_store_zu(&eset->npages, cur_eset_npages + npages,
-	    ATOMIC_RELAXED);
-}
-
-static void
-extents_remove_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent) {
-	malloc_mutex_assert_owner(tsdn, &eset->mtx);
-	assert(extent_state_get(extent) == eset->state);
-
-	size_t size = extent_size_get(extent);
-	size_t psz = sz_psz_quantize_floor(size);
-	pszind_t pind = sz_psz2ind(psz);
-	extent_heap_remove(&eset->heaps[pind], extent);
-
-	if (config_stats) {
-		extents_stats_sub(eset, pind, size);
-	}
-
-	if (extent_heap_empty(&eset->heaps[pind])) {
-		bitmap_set(eset->bitmap, &eset_bitmap_info,
-		    (size_t)pind);
-	}
-	extent_list_remove(&eset->lru, extent);
-	size_t npages = size >> LG_PAGE;
-	/*
-	 * As in extents_insert_locked, we hold eset->mtx and so don't need
-	 * atomic operations for updating eset->npages.
-	 */
-	size_t cur_extents_npages =
-	    atomic_load_zu(&eset->npages, ATOMIC_RELAXED);
-	assert(cur_extents_npages >= npages);
-	atomic_store_zu(&eset->npages,
-	    cur_extents_npages - (size >> LG_PAGE), ATOMIC_RELAXED);
-}
-
 /*
  * Find an extent with size [min_size, max_size) to satisfy the alignment
  * requirement.  For each size, try only the first extent in the heap.
@@ -461,7 +382,7 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena,
 	if (!coalesced) {
 		return true;
 	}
-	extents_insert_locked(tsdn, eset, extent);
+	eset_insert_locked(tsdn, eset, extent);
 	return false;
 }
 
@@ -521,7 +442,7 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 			extent = NULL;
 			goto label_return;
 		}
-		extents_remove_locked(tsdn, eset, extent);
+		eset_remove_locked(tsdn, eset, extent);
 		if (!eset->delay_coalesce) {
 			break;
 		}
@@ -607,7 +528,7 @@ extent_deactivate_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
 	assert(extent_state_get(extent) == extent_state_active);
 
 	extent_state_set(extent, eset_state_get(eset));
-	extents_insert_locked(tsdn, eset, extent);
+	eset_insert_locked(tsdn, eset, extent);
 }
 
 static void
@@ -624,7 +545,7 @@ extent_activate_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
 	assert(extent_arena_ind_get(extent) == arena_ind_get(arena));
 	assert(extent_state_get(extent) == eset_state_get(eset));
 
-	extents_remove_locked(tsdn, eset, extent);
+	eset_remove_locked(tsdn, eset, extent);
 	extent_state_set(extent, extent_state_active);
 }
 

From 77bbb35a92821858b9054aa88f2c3bc76b29cbdc Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Sat, 21 Sep 2019 09:36:22 -0700
Subject: [PATCH 1382/2608] Extent -> Eset: Move extent fit functions.

---
 include/jemalloc/internal/eset.h |   6 ++
 src/eset.c                       | 112 ++++++++++++++++++++++++++++
 src/extent.c                     | 121 +------------------------------
 3 files changed, 119 insertions(+), 120 deletions(-)

diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index 400316ed..77a55e9b 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -72,5 +72,11 @@ size_t eset_nbytes_get(eset_t *eset, pszind_t ind);
 
 void eset_insert_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent);
 void eset_remove_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent);
+/*
+ * Select an extent from this eset of the given size and alignment.  Returns
+ * null if no such item could be found.
+ */
+extent_t *eset_fit_locked(tsdn_t *tsdn, eset_t *eset, size_t esize,
+    size_t alignment);
 
 #endif /* JEMALLOC_INTERNAL_ESET_H */
diff --git a/src/eset.c b/src/eset.c
index 21dcccad..68ec7e46 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -2,6 +2,8 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/eset.h"
+/* For opt_retain */
+#include "jemalloc/internal/extent_mmap.h"
 
 const bitmap_info_t eset_bitmap_info =
     BITMAP_INFO_INITIALIZER(SC_NPSIZES+1);
@@ -121,3 +123,113 @@ eset_remove_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent) {
 	atomic_store_zu(&eset->npages,
 	    cur_extents_npages - (size >> LG_PAGE), ATOMIC_RELAXED);
 }
+
+/*
+ * Find an extent with size [min_size, max_size) to satisfy the alignment
+ * requirement.  For each size, try only the first extent in the heap.
+ */
+static extent_t *
+eset_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
+    size_t alignment) {
+        pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(min_size));
+        pszind_t pind_max = sz_psz2ind(sz_psz_quantize_ceil(max_size));
+
+	for (pszind_t i = (pszind_t)bitmap_ffu(eset->bitmap,
+	    &eset_bitmap_info, (size_t)pind); i < pind_max; i =
+	    (pszind_t)bitmap_ffu(eset->bitmap, &eset_bitmap_info,
+	    (size_t)i+1)) {
+		assert(i < SC_NPSIZES);
+		assert(!extent_heap_empty(&eset->heaps[i]));
+		extent_t *extent = extent_heap_first(&eset->heaps[i]);
+		uintptr_t base = (uintptr_t)extent_base_get(extent);
+		size_t candidate_size = extent_size_get(extent);
+		assert(candidate_size >= min_size);
+
+		uintptr_t next_align = ALIGNMENT_CEILING((uintptr_t)base,
+		    PAGE_CEILING(alignment));
+		if (base > next_align || base + candidate_size <= next_align) {
+			/* Overflow or not crossing the next alignment. */
+			continue;
+		}
+
+		size_t leadsize = next_align - base;
+		if (candidate_size - leadsize >= min_size) {
+			return extent;
+		}
+	}
+
+	return NULL;
+}
+
+/*
+ * Do first-fit extent selection, i.e. select the oldest/lowest extent that is
+ * large enough.
+ */
+static extent_t *
+eset_first_fit_locked(tsdn_t *tsdn, eset_t *eset, size_t size) {
+	extent_t *ret = NULL;
+
+	pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(size));
+
+	if (!maps_coalesce && !opt_retain) {
+		/*
+		 * No split / merge allowed (Windows w/o retain). Try exact fit
+		 * only.
+		 */
+		return extent_heap_empty(&eset->heaps[pind]) ? NULL :
+		    extent_heap_first(&eset->heaps[pind]);
+	}
+
+	for (pszind_t i = (pszind_t)bitmap_ffu(eset->bitmap,
+	    &eset_bitmap_info, (size_t)pind);
+	    i < SC_NPSIZES + 1;
+	    i = (pszind_t)bitmap_ffu(eset->bitmap, &eset_bitmap_info,
+	    (size_t)i+1)) {
+		assert(!extent_heap_empty(&eset->heaps[i]));
+		extent_t *extent = extent_heap_first(&eset->heaps[i]);
+		assert(extent_size_get(extent) >= size);
+		/*
+		 * In order to reduce fragmentation, avoid reusing and splitting
+		 * large eset for much smaller sizes.
+		 *
+		 * Only do check for dirty eset (delay_coalesce).
+		 */
+		if (eset->delay_coalesce &&
+		    (sz_pind2sz(i) >> opt_lg_extent_max_active_fit) > size) {
+			break;
+		}
+		if (ret == NULL || extent_snad_comp(extent, ret) < 0) {
+			ret = extent;
+		}
+		if (i == SC_NPSIZES) {
+			break;
+		}
+		assert(i < SC_NPSIZES);
+	}
+
+	return ret;
+}
+
+extent_t *
+eset_fit_locked(tsdn_t *tsdn, eset_t *eset, size_t esize, size_t alignment) {
+	malloc_mutex_assert_owner(tsdn, &eset->mtx);
+
+	size_t max_size = esize + PAGE_CEILING(alignment) - PAGE;
+	/* Beware size_t wrap-around. */
+	if (max_size < esize) {
+		return NULL;
+	}
+
+	extent_t *extent = eset_first_fit_locked(tsdn, eset, max_size);
+
+	if (alignment > PAGE && extent == NULL) {
+		/*
+		 * max_size guarantees the alignment requirement but is rather
+		 * pessimistic.  Next we try to satisfy the aligned allocation
+		 * with sizes in [esize, max_size).
+		 */
+		extent = eset_fit_alignment(eset, esize, max_size, alignment);
+	}
+
+	return extent;
+}
diff --git a/src/extent.c b/src/extent.c
index 069899c2..a5f0048c 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -251,124 +251,6 @@ extent_hooks_assure_initialized(arena_t *arena,
 
 ph_gen(, extent_heap_, extent_heap_t, extent_t, ph_link, extent_snad_comp)
 
-/*
- * Find an extent with size [min_size, max_size) to satisfy the alignment
- * requirement.  For each size, try only the first extent in the heap.
- */
-static extent_t *
-extents_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
-    size_t alignment) {
-        pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(min_size));
-        pszind_t pind_max = sz_psz2ind(sz_psz_quantize_ceil(max_size));
-
-	for (pszind_t i = (pszind_t)bitmap_ffu(eset->bitmap,
-	    &eset_bitmap_info, (size_t)pind); i < pind_max; i =
-	    (pszind_t)bitmap_ffu(eset->bitmap, &eset_bitmap_info,
-	    (size_t)i+1)) {
-		assert(i < SC_NPSIZES);
-		assert(!extent_heap_empty(&eset->heaps[i]));
-		extent_t *extent = extent_heap_first(&eset->heaps[i]);
-		uintptr_t base = (uintptr_t)extent_base_get(extent);
-		size_t candidate_size = extent_size_get(extent);
-		assert(candidate_size >= min_size);
-
-		uintptr_t next_align = ALIGNMENT_CEILING((uintptr_t)base,
-		    PAGE_CEILING(alignment));
-		if (base > next_align || base + candidate_size <= next_align) {
-			/* Overflow or not crossing the next alignment. */
-			continue;
-		}
-
-		size_t leadsize = next_align - base;
-		if (candidate_size - leadsize >= min_size) {
-			return extent;
-		}
-	}
-
-	return NULL;
-}
-
-/*
- * Do first-fit extent selection, i.e. select the oldest/lowest extent that is
- * large enough.
- */
-static extent_t *
-extents_first_fit_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
-    size_t size) {
-	extent_t *ret = NULL;
-
-	pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(size));
-
-	if (!maps_coalesce && !opt_retain) {
-		/*
-		 * No split / merge allowed (Windows w/o retain). Try exact fit
-		 * only.
-		 */
-		return extent_heap_empty(&eset->heaps[pind]) ? NULL :
-		    extent_heap_first(&eset->heaps[pind]);
-	}
-
-	for (pszind_t i = (pszind_t)bitmap_ffu(eset->bitmap,
-	    &eset_bitmap_info, (size_t)pind);
-	    i < SC_NPSIZES + 1;
-	    i = (pszind_t)bitmap_ffu(eset->bitmap, &eset_bitmap_info,
-	    (size_t)i+1)) {
-		assert(!extent_heap_empty(&eset->heaps[i]));
-		extent_t *extent = extent_heap_first(&eset->heaps[i]);
-		assert(extent_size_get(extent) >= size);
-		/*
-		 * In order to reduce fragmentation, avoid reusing and splitting
-		 * large eset for much smaller sizes.
-		 *
-		 * Only do check for dirty eset (delay_coalesce).
-		 */
-		if (eset->delay_coalesce &&
-		    (sz_pind2sz(i) >> opt_lg_extent_max_active_fit) > size) {
-			break;
-		}
-		if (ret == NULL || extent_snad_comp(extent, ret) < 0) {
-			ret = extent;
-		}
-		if (i == SC_NPSIZES) {
-			break;
-		}
-		assert(i < SC_NPSIZES);
-	}
-
-	return ret;
-}
-
-/*
- * Do first-fit extent selection, where the selection policy choice is
- * based on eset->delay_coalesce.
- */
-static extent_t *
-extents_fit_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
-    size_t esize, size_t alignment) {
-	malloc_mutex_assert_owner(tsdn, &eset->mtx);
-
-	size_t max_size = esize + PAGE_CEILING(alignment) - PAGE;
-	/* Beware size_t wrap-around. */
-	if (max_size < esize) {
-		return NULL;
-	}
-
-	extent_t *extent =
-	    extents_first_fit_locked(tsdn, arena, eset, max_size);
-
-	if (alignment > PAGE && extent == NULL) {
-		/*
-		 * max_size guarantees the alignment requirement but is rather
-		 * pessimistic.  Next we try to satisfy the aligned allocation
-		 * with sizes in [esize, max_size).
-		 */
-		extent = extents_fit_alignment(eset, esize, max_size,
-		    alignment);
-	}
-
-	return extent;
-}
-
 static bool
 extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
@@ -790,8 +672,7 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
 			extent_unlock(tsdn, unlock_extent);
 		}
 	} else {
-		extent = extents_fit_locked(tsdn, arena, eset, esize,
-		    alignment);
+		extent = eset_fit_locked(tsdn, eset, esize, alignment);
 	}
 	if (extent == NULL) {
 		malloc_mutex_unlock(tsdn, &eset->mtx);

From e144b21e4be9a6353ff9fee1b10c90e4b1030879 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Sat, 21 Sep 2019 10:23:12 -0700
Subject: [PATCH 1383/2608] Extent -> Eset: Move fork handling.

---
 include/jemalloc/internal/eset.h           |  4 ++++
 include/jemalloc/internal/extent_externs.h |  3 ---
 src/arena.c                                | 18 +++++++++---------
 src/eset.c                                 | 15 +++++++++++++++
 src/extent.c                               | 15 ---------------
 5 files changed, 28 insertions(+), 27 deletions(-)

diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index 77a55e9b..5b479d59 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -79,4 +79,8 @@ void eset_remove_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent);
 extent_t *eset_fit_locked(tsdn_t *tsdn, eset_t *eset, size_t esize,
     size_t alignment);
 
+void eset_prefork(tsdn_t *tsdn, eset_t *eset);
+void eset_postfork_parent(tsdn_t *tsdn, eset_t *eset);
+void eset_postfork_child(tsdn_t *tsdn, eset_t *eset);
+
 #endif /* JEMALLOC_INTERNAL_ESET_H */
diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index 1c930275..cbfb2c72 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -35,9 +35,6 @@ void extents_dalloc(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, eset_t *eset, extent_t *extent);
 extent_t *extents_evict(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, eset_t *eset, size_t npages_min);
-void extents_prefork(tsdn_t *tsdn, eset_t *eset);
-void extents_postfork_parent(tsdn_t *tsdn, eset_t *eset);
-void extents_postfork_child(tsdn_t *tsdn, eset_t *eset);
 extent_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, void *new_addr, size_t size, size_t pad,
     size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit);
diff --git a/src/arena.c b/src/arena.c
index 9dba4e73..f9d7dcdb 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2200,9 +2200,9 @@ arena_prefork2(tsdn_t *tsdn, arena_t *arena) {
 
 void
 arena_prefork3(tsdn_t *tsdn, arena_t *arena) {
-	extents_prefork(tsdn, &arena->extents_dirty);
-	extents_prefork(tsdn, &arena->extents_muzzy);
-	extents_prefork(tsdn, &arena->extents_retained);
+	eset_prefork(tsdn, &arena->extents_dirty);
+	eset_prefork(tsdn, &arena->extents_muzzy);
+	eset_prefork(tsdn, &arena->extents_retained);
 }
 
 void
@@ -2242,9 +2242,9 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 	malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
 	base_postfork_parent(tsdn, arena->base);
 	malloc_mutex_postfork_parent(tsdn, &arena->extent_avail_mtx);
-	extents_postfork_parent(tsdn, &arena->extents_dirty);
-	extents_postfork_parent(tsdn, &arena->extents_muzzy);
-	extents_postfork_parent(tsdn, &arena->extents_retained);
+	eset_postfork_parent(tsdn, &arena->extents_dirty);
+	eset_postfork_parent(tsdn, &arena->extents_muzzy);
+	eset_postfork_parent(tsdn, &arena->extents_retained);
 	malloc_mutex_postfork_parent(tsdn, &arena->extent_grow_mtx);
 	malloc_mutex_postfork_parent(tsdn, &arena->decay_dirty.mtx);
 	malloc_mutex_postfork_parent(tsdn, &arena->decay_muzzy.mtx);
@@ -2288,9 +2288,9 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
 	base_postfork_child(tsdn, arena->base);
 	malloc_mutex_postfork_child(tsdn, &arena->extent_avail_mtx);
-	extents_postfork_child(tsdn, &arena->extents_dirty);
-	extents_postfork_child(tsdn, &arena->extents_muzzy);
-	extents_postfork_child(tsdn, &arena->extents_retained);
+	eset_postfork_child(tsdn, &arena->extents_dirty);
+	eset_postfork_child(tsdn, &arena->extents_muzzy);
+	eset_postfork_child(tsdn, &arena->extents_retained);
 	malloc_mutex_postfork_child(tsdn, &arena->extent_grow_mtx);
 	malloc_mutex_postfork_child(tsdn, &arena->decay_dirty.mtx);
 	malloc_mutex_postfork_child(tsdn, &arena->decay_muzzy.mtx);
diff --git a/src/eset.c b/src/eset.c
index 68ec7e46..9cc8ceea 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -233,3 +233,18 @@ eset_fit_locked(tsdn_t *tsdn, eset_t *eset, size_t esize, size_t alignment) {
 
 	return extent;
 }
+
+void
+eset_prefork(tsdn_t *tsdn, eset_t *eset) {
+	malloc_mutex_prefork(tsdn, &eset->mtx);
+}
+
+void
+eset_postfork_parent(tsdn_t *tsdn, eset_t *eset) {
+	malloc_mutex_postfork_parent(tsdn, &eset->mtx);
+}
+
+void
+eset_postfork_child(tsdn_t *tsdn, eset_t *eset) {
+	malloc_mutex_postfork_child(tsdn, &eset->mtx);
+}
diff --git a/src/extent.c b/src/extent.c
index a5f0048c..b66afdbc 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -388,21 +388,6 @@ extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks
 	extent_dalloc(tsdn, arena, extent);
 }
 
-void
-extents_prefork(tsdn_t *tsdn, eset_t *eset) {
-	malloc_mutex_prefork(tsdn, &eset->mtx);
-}
-
-void
-extents_postfork_parent(tsdn_t *tsdn, eset_t *eset) {
-	malloc_mutex_postfork_parent(tsdn, &eset->mtx);
-}
-
-void
-extents_postfork_child(tsdn_t *tsdn, eset_t *eset) {
-	malloc_mutex_postfork_child(tsdn, &eset->mtx);
-}
-
 static void
 extent_deactivate_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
     extent_t *extent) {

From 821dd53a1d46f07cc8252bea4b229a77caa4ca83 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Sat, 21 Sep 2019 10:35:47 -0700
Subject: [PATCH 1384/2608] Extent -> Eset: Rename arena members.

---
 include/jemalloc/internal/arena_structs.h |  6 +-
 src/arena.c                               | 78 +++++++++++------------
 src/background_thread.c                   |  8 +--
 src/ctl.c                                 |  6 +-
 src/extent.c                              | 16 ++---
 src/large.c                               |  4 +-
 6 files changed, 59 insertions(+), 59 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index 6e8b8291..54889dc8 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -162,9 +162,9 @@ struct arena_s {
 	 *
 	 * Synchronization: internal.
 	 */
-	eset_t		extents_dirty;
-	eset_t		extents_muzzy;
-	eset_t		extents_retained;
+	eset_t		eset_dirty;
+	eset_t		eset_muzzy;
+	eset_t		eset_retained;
 
 	/*
 	 * Decay-based purging state, responsible for scheduling extent state
diff --git a/src/arena.c b/src/arena.c
index f9d7dcdb..37f4b556 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -75,8 +75,8 @@ arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	*dirty_decay_ms = arena_dirty_decay_ms_get(arena);
 	*muzzy_decay_ms = arena_muzzy_decay_ms_get(arena);
 	*nactive += atomic_load_zu(&arena->nactive, ATOMIC_RELAXED);
-	*ndirty += eset_npages_get(&arena->extents_dirty);
-	*nmuzzy += eset_npages_get(&arena->extents_muzzy);
+	*ndirty += eset_npages_get(&arena->eset_dirty);
+	*nmuzzy += eset_npages_get(&arena->eset_muzzy);
 }
 
 void
@@ -99,7 +99,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	arena_stats_accum_zu(&astats->mapped, base_mapped
 	    + arena_stats_read_zu(tsdn, &arena->stats, &arena->stats.mapped));
 	arena_stats_accum_zu(&astats->retained,
-	    eset_npages_get(&arena->extents_retained) << LG_PAGE);
+	    eset_npages_get(&arena->eset_retained) << LG_PAGE);
 
 	atomic_store_zu(&astats->extent_avail,
 	    atomic_load_zu(&arena->extent_avail_cnt, ATOMIC_RELAXED),
@@ -130,8 +130,8 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	arena_stats_accum_zu(&astats->metadata_thp, metadata_thp);
 	arena_stats_accum_zu(&astats->resident, base_resident +
 	    (((atomic_load_zu(&arena->nactive, ATOMIC_RELAXED) +
-	    eset_npages_get(&arena->extents_dirty) +
-	    eset_npages_get(&arena->extents_muzzy)) << LG_PAGE)));
+	    eset_npages_get(&arena->eset_dirty) +
+	    eset_npages_get(&arena->eset_muzzy)) << LG_PAGE)));
 	arena_stats_accum_zu(&astats->abandoned_vm, atomic_load_zu(
 	    &arena->stats.abandoned_vm, ATOMIC_RELAXED));
 
@@ -173,12 +173,12 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	for (pszind_t i = 0; i < SC_NPSIZES; i++) {
 		size_t dirty, muzzy, retained, dirty_bytes, muzzy_bytes,
 		    retained_bytes;
-		dirty = eset_nextents_get(&arena->extents_dirty, i);
-		muzzy = eset_nextents_get(&arena->extents_muzzy, i);
-		retained = eset_nextents_get(&arena->extents_retained, i);
-		dirty_bytes = eset_nbytes_get(&arena->extents_dirty, i);
-		muzzy_bytes = eset_nbytes_get(&arena->extents_muzzy, i);
-		retained_bytes = eset_nbytes_get(&arena->extents_retained, i);
+		dirty = eset_nextents_get(&arena->eset_dirty, i);
+		muzzy = eset_nextents_get(&arena->eset_muzzy, i);
+		retained = eset_nextents_get(&arena->eset_retained, i);
+		dirty_bytes = eset_nbytes_get(&arena->eset_dirty, i);
+		muzzy_bytes = eset_nbytes_get(&arena->eset_muzzy, i);
+		retained_bytes = eset_nbytes_get(&arena->eset_retained, i);
 
 		atomic_store_zu(&estats[i].ndirty, dirty, ATOMIC_RELAXED);
 		atomic_store_zu(&estats[i].nmuzzy, muzzy, ATOMIC_RELAXED);
@@ -225,11 +225,11 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	READ_ARENA_MUTEX_PROF_DATA(large_mtx, arena_prof_mutex_large);
 	READ_ARENA_MUTEX_PROF_DATA(extent_avail_mtx,
 	    arena_prof_mutex_extent_avail)
-	READ_ARENA_MUTEX_PROF_DATA(extents_dirty.mtx,
+	READ_ARENA_MUTEX_PROF_DATA(eset_dirty.mtx,
 	    arena_prof_mutex_extents_dirty)
-	READ_ARENA_MUTEX_PROF_DATA(extents_muzzy.mtx,
+	READ_ARENA_MUTEX_PROF_DATA(eset_muzzy.mtx,
 	    arena_prof_mutex_extents_muzzy)
-	READ_ARENA_MUTEX_PROF_DATA(extents_retained.mtx,
+	READ_ARENA_MUTEX_PROF_DATA(eset_retained.mtx,
 	    arena_prof_mutex_extents_retained)
 	READ_ARENA_MUTEX_PROF_DATA(decay_dirty.mtx,
 	    arena_prof_mutex_decay_dirty)
@@ -257,7 +257,7 @@ arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	extents_dalloc(tsdn, arena, r_extent_hooks, &arena->extents_dirty,
+	extents_dalloc(tsdn, arena, r_extent_hooks, &arena->eset_dirty,
 	    extent);
 	if (arena_dirty_decay_ms_get(arena) == 0) {
 		arena_decay_dirty(tsdn, arena, false, true);
@@ -435,11 +435,11 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	size_t mapped_add;
 	bool commit = true;
 	extent_t *extent = extents_alloc(tsdn, arena, &extent_hooks,
-	    &arena->extents_dirty, NULL, usize, sz_large_pad, alignment, false,
+	    &arena->eset_dirty, NULL, usize, sz_large_pad, alignment, false,
 	    szind, zero, &commit);
 	if (extent == NULL && arena_may_have_muzzy(arena)) {
 		extent = extents_alloc(tsdn, arena, &extent_hooks,
-		    &arena->extents_muzzy, NULL, usize, sz_large_pad, alignment,
+		    &arena->eset_muzzy, NULL, usize, sz_large_pad, alignment,
 		    false, szind, zero, &commit);
 	}
 	size_t size = usize + sz_large_pad;
@@ -809,14 +809,14 @@ bool
 arena_dirty_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
     ssize_t decay_ms) {
 	return arena_decay_ms_set(tsdn, arena, &arena->decay_dirty,
-	    &arena->extents_dirty, decay_ms);
+	    &arena->eset_dirty, decay_ms);
 }
 
 bool
 arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
     ssize_t decay_ms) {
 	return arena_decay_ms_set(tsdn, arena, &arena->decay_muzzy,
-	    &arena->extents_muzzy, decay_ms);
+	    &arena->eset_muzzy, decay_ms);
 }
 
 static size_t
@@ -869,7 +869,7 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
 			    r_extent_hooks, extent, 0,
 			    extent_size_get(extent))) {
 				extents_dalloc(tsdn, arena, r_extent_hooks,
-				    &arena->extents_muzzy, extent);
+				    &arena->eset_muzzy, extent);
 				arena_background_thread_inactivity_check(tsdn,
 				    arena, is_background_thread);
 				break;
@@ -982,14 +982,14 @@ static bool
 arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
     bool all) {
 	return arena_decay_impl(tsdn, arena, &arena->decay_dirty,
-	    &arena->extents_dirty, is_background_thread, all);
+	    &arena->eset_dirty, is_background_thread, all);
 }
 
 static bool
 arena_decay_muzzy(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
     bool all) {
 	return arena_decay_impl(tsdn, arena, &arena->decay_muzzy,
-	    &arena->extents_muzzy, is_background_thread, all);
+	    &arena->eset_muzzy, is_background_thread, all);
 }
 
 void
@@ -1160,7 +1160,7 @@ arena_destroy_retained(tsdn_t *tsdn, arena_t *arena) {
 	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
 	extent_t *extent;
 	while ((extent = extents_evict(tsdn, arena, &extent_hooks,
-	    &arena->extents_retained, 0)) != NULL) {
+	    &arena->eset_retained, 0)) != NULL) {
 		extent_destroy_wrapper(tsdn, arena, &extent_hooks, extent);
 	}
 }
@@ -1176,8 +1176,8 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
 	 * Furthermore, the caller (arena_i_destroy_ctl()) purged all cached
 	 * extents, so only retained extents may remain.
 	 */
-	assert(eset_npages_get(&arena->extents_dirty) == 0);
-	assert(eset_npages_get(&arena->extents_muzzy) == 0);
+	assert(eset_npages_get(&arena->eset_dirty) == 0);
+	assert(eset_npages_get(&arena->eset_muzzy) == 0);
 
 	/* Deallocate retained memory. */
 	arena_destroy_retained(tsd_tsdn(tsd), arena);
@@ -1235,11 +1235,11 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	bool zero = false;
 	bool commit = true;
 	extent_t *slab = extents_alloc(tsdn, arena, &extent_hooks,
-	    &arena->extents_dirty, NULL, bin_info->slab_size, 0, PAGE, true,
+	    &arena->eset_dirty, NULL, bin_info->slab_size, 0, PAGE, true,
 	    binind, &zero, &commit);
 	if (slab == NULL && arena_may_have_muzzy(arena)) {
 		slab = extents_alloc(tsdn, arena, &extent_hooks,
-		    &arena->extents_muzzy, NULL, bin_info->slab_size, 0, PAGE,
+		    &arena->eset_muzzy, NULL, bin_info->slab_size, 0, PAGE,
 		    true, binind, &zero, &commit);
 	}
 	if (slab == NULL) {
@@ -2021,14 +2021,14 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 * are likely to be reused soon after deallocation, and the cost of
 	 * merging/splitting extents is non-trivial.
 	 */
-	if (eset_init(tsdn, &arena->extents_dirty, extent_state_dirty, true)) {
+	if (eset_init(tsdn, &arena->eset_dirty, extent_state_dirty, true)) {
 		goto label_error;
 	}
 	/*
 	 * Coalesce muzzy extents immediately, because operations on them are in
 	 * the critical path much less often than for dirty extents.
 	 */
-	if (eset_init(tsdn, &arena->extents_muzzy, extent_state_muzzy, false)) {
+	if (eset_init(tsdn, &arena->eset_muzzy, extent_state_muzzy, false)) {
 		goto label_error;
 	}
 	/*
@@ -2037,7 +2037,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 * coalescing), but also because operations on retained extents are not
 	 * in the critical path.
 	 */
-	if (eset_init(tsdn, &arena->extents_retained, extent_state_retained,
+	if (eset_init(tsdn, &arena->eset_retained, extent_state_retained,
 	    false)) {
 		goto label_error;
 	}
@@ -2200,9 +2200,9 @@ arena_prefork2(tsdn_t *tsdn, arena_t *arena) {
 
 void
 arena_prefork3(tsdn_t *tsdn, arena_t *arena) {
-	eset_prefork(tsdn, &arena->extents_dirty);
-	eset_prefork(tsdn, &arena->extents_muzzy);
-	eset_prefork(tsdn, &arena->extents_retained);
+	eset_prefork(tsdn, &arena->eset_dirty);
+	eset_prefork(tsdn, &arena->eset_muzzy);
+	eset_prefork(tsdn, &arena->eset_retained);
 }
 
 void
@@ -2242,9 +2242,9 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 	malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
 	base_postfork_parent(tsdn, arena->base);
 	malloc_mutex_postfork_parent(tsdn, &arena->extent_avail_mtx);
-	eset_postfork_parent(tsdn, &arena->extents_dirty);
-	eset_postfork_parent(tsdn, &arena->extents_muzzy);
-	eset_postfork_parent(tsdn, &arena->extents_retained);
+	eset_postfork_parent(tsdn, &arena->eset_dirty);
+	eset_postfork_parent(tsdn, &arena->eset_muzzy);
+	eset_postfork_parent(tsdn, &arena->eset_retained);
 	malloc_mutex_postfork_parent(tsdn, &arena->extent_grow_mtx);
 	malloc_mutex_postfork_parent(tsdn, &arena->decay_dirty.mtx);
 	malloc_mutex_postfork_parent(tsdn, &arena->decay_muzzy.mtx);
@@ -2288,9 +2288,9 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
 	base_postfork_child(tsdn, arena->base);
 	malloc_mutex_postfork_child(tsdn, &arena->extent_avail_mtx);
-	eset_postfork_child(tsdn, &arena->extents_dirty);
-	eset_postfork_child(tsdn, &arena->extents_muzzy);
-	eset_postfork_child(tsdn, &arena->extents_retained);
+	eset_postfork_child(tsdn, &arena->eset_dirty);
+	eset_postfork_child(tsdn, &arena->eset_muzzy);
+	eset_postfork_child(tsdn, &arena->eset_retained);
 	malloc_mutex_postfork_child(tsdn, &arena->extent_grow_mtx);
 	malloc_mutex_postfork_child(tsdn, &arena->decay_dirty.mtx);
 	malloc_mutex_postfork_child(tsdn, &arena->decay_muzzy.mtx);
diff --git a/src/background_thread.c b/src/background_thread.c
index 9476a12f..4a74edbf 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -202,12 +202,12 @@ static uint64_t
 arena_decay_compute_purge_interval(tsdn_t *tsdn, arena_t *arena) {
 	uint64_t i1, i2;
 	i1 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_dirty,
-	    &arena->extents_dirty);
+	    &arena->eset_dirty);
 	if (i1 == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
 		return i1;
 	}
 	i2 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_muzzy,
-	    &arena->extents_muzzy);
+	    &arena->eset_muzzy);
 
 	return i1 < i2 ? i1 : i2;
 }
@@ -718,8 +718,8 @@ background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
 	if (info->npages_to_purge_new > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
 		should_signal = true;
 	} else if (unlikely(background_thread_indefinite_sleep(info)) &&
-	    (eset_npages_get(&arena->extents_dirty) > 0 ||
-	    eset_npages_get(&arena->extents_muzzy) > 0 ||
+	    (eset_npages_get(&arena->eset_dirty) > 0 ||
+	    eset_npages_get(&arena->eset_muzzy) > 0 ||
 	    info->npages_to_purge_new > 0)) {
 		should_signal = true;
 	} else {
diff --git a/src/ctl.c b/src/ctl.c
index 2be2f328..a29be194 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3001,9 +3001,9 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
 		}
 		MUTEX_PROF_RESET(arena->large_mtx);
 		MUTEX_PROF_RESET(arena->extent_avail_mtx);
-		MUTEX_PROF_RESET(arena->extents_dirty.mtx);
-		MUTEX_PROF_RESET(arena->extents_muzzy.mtx);
-		MUTEX_PROF_RESET(arena->extents_retained.mtx);
+		MUTEX_PROF_RESET(arena->eset_dirty.mtx);
+		MUTEX_PROF_RESET(arena->eset_muzzy.mtx);
+		MUTEX_PROF_RESET(arena->eset_retained.mtx);
 		MUTEX_PROF_RESET(arena->decay_dirty.mtx);
 		MUTEX_PROF_RESET(arena->decay_muzzy.mtx);
 		MUTEX_PROF_RESET(arena->tcache_ql_mtx);
diff --git a/src/extent.c b/src/extent.c
index b66afdbc..a015f9b3 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1097,11 +1097,11 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
 			extent_record(tsdn, arena, r_extent_hooks,
-			    &arena->extents_retained, lead, true);
+			    &arena->eset_retained, lead, true);
 		}
 		if (trail != NULL) {
 			extent_record(tsdn, arena, r_extent_hooks,
-			    &arena->extents_retained, trail, true);
+			    &arena->eset_retained, trail, true);
 		}
 	} else {
 		/*
@@ -1114,12 +1114,12 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 				extent_gdump_add(tsdn, to_salvage);
 			}
 			extent_record(tsdn, arena, r_extent_hooks,
-			    &arena->extents_retained, to_salvage, true);
+			    &arena->eset_retained, to_salvage, true);
 		}
 		if (to_leak != NULL) {
 			extent_deregister_no_gdump_sub(tsdn, to_leak);
 			extents_abandon_vm(tsdn, arena, r_extent_hooks,
-			    &arena->extents_retained, to_leak, true);
+			    &arena->eset_retained, to_leak, true);
 		}
 		goto label_err;
 	}
@@ -1128,7 +1128,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 		if (extent_commit_impl(tsdn, arena, r_extent_hooks, extent, 0,
 		    extent_size_get(extent), true)) {
 			extent_record(tsdn, arena, r_extent_hooks,
-			    &arena->extents_retained, extent, true);
+			    &arena->eset_retained, extent, true);
 			goto label_err;
 		}
 		if (!extent_need_manual_zero(arena)) {
@@ -1189,7 +1189,7 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
 	malloc_mutex_lock(tsdn, &arena->extent_grow_mtx);
 
 	extent_t *extent = extent_recycle(tsdn, arena, r_extent_hooks,
-	    &arena->extents_retained, new_addr, size, pad, alignment, slab,
+	    &arena->eset_retained, new_addr, size, pad, alignment, slab,
 	    szind, zero, commit, true);
 	if (extent != NULL) {
 		malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
@@ -1434,7 +1434,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		extent = extent_try_coalesce(tsdn, arena, r_extent_hooks,
 		    rtree_ctx, eset, extent, NULL, growing_retained);
 	} else if (extent_size_get(extent) >= SC_LARGE_MINCLASS) {
-		assert(eset == &arena->extents_dirty);
+		assert(eset == &arena->eset_dirty);
 		/* Always coalesce large eset eagerly. */
 		bool coalesced;
 		do {
@@ -1577,7 +1577,7 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 		extent_gdump_sub(tsdn, extent);
 	}
 
-	extent_record(tsdn, arena, r_extent_hooks, &arena->extents_retained,
+	extent_record(tsdn, arena, r_extent_hooks, &arena->eset_retained,
 	    extent, false);
 }
 
diff --git a/src/large.c b/src/large.c
index a5c2f9ab..40afa623 100644
--- a/src/large.c
+++ b/src/large.c
@@ -155,10 +155,10 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 	extent_t *trail;
 	bool new_mapping;
 	if ((trail = extents_alloc(tsdn, arena, &extent_hooks,
-	    &arena->extents_dirty, extent_past_get(extent), trailsize, 0,
+	    &arena->eset_dirty, extent_past_get(extent), trailsize, 0,
 	    CACHELINE, false, SC_NSIZES, &is_zeroed_trail, &commit)) != NULL
 	    || (trail = extents_alloc(tsdn, arena, &extent_hooks,
-	    &arena->extents_muzzy, extent_past_get(extent), trailsize, 0,
+	    &arena->eset_muzzy, extent_past_get(extent), trailsize, 0,
 	    CACHELINE, false, SC_NSIZES, &is_zeroed_trail, &commit)) != NULL) {
 		if (config_stats) {
 			new_mapping = false;

From ce5b128f1006cb8bde04b633bfc43a4881e76490 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Sat, 21 Sep 2019 10:40:39 -0700
Subject: [PATCH 1385/2608] Remove the undefined extent_size_quantize
 declarations.

---
 include/jemalloc/internal/extent_externs.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index cbfb2c72..6963b477 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -19,11 +19,6 @@ extent_hooks_t *extent_hooks_get(arena_t *arena);
 extent_hooks_t *extent_hooks_set(tsd_t *tsd, arena_t *arena,
     extent_hooks_t *extent_hooks);
 
-#ifdef JEMALLOC_JET
-size_t extent_size_quantize_floor(size_t size);
-size_t extent_size_quantize_ceil(size_t size);
-#endif
-
 ph_proto(, extent_avail_, extent_tree_t, extent_t)
 ph_proto(, extent_heap_, extent_heap_t, extent_t)
 

From c97d255752e3dd53dbfcb5c3fdf9d972da2b47f1 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Sat, 21 Sep 2019 11:01:39 -0700
Subject: [PATCH 1386/2608] Eset: Remove temporary declaration.

---
 include/jemalloc/internal/eset.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index 5b479d59..fae64c8c 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -6,9 +6,6 @@
 #include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/mutex.h"
 
-/* This is a transitional declarion, while we move extent.c into eset.c. */
-extern const bitmap_info_t eset_bitmap_info;
-
 /*
  * An eset ("extent set") is a quantized collection of extents, with built-in
  * LRU queue.

From 3d84bd57f4954a17059bd31330ec87d3c1876411 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Mon, 23 Sep 2019 18:05:57 -0700
Subject: [PATCH 1387/2608] Arena: Add helper function arena_get_from_extent.

---
 include/jemalloc/internal/arena_inlines_b.h |  6 ++++
 src/arena.c                                 |  9 ++----
 src/ctl.c                                   |  3 +-
 src/large.c                                 | 31 +++++++--------------
 4 files changed, 20 insertions(+), 29 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 917a4916..a6135ee7 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -8,6 +8,12 @@
 #include "jemalloc/internal/sz.h"
 #include "jemalloc/internal/ticker.h"
 
+static inline arena_t *
+arena_get_from_extent(extent_t *extent) {
+	return (arena_t *)atomic_load_p(&arenas[extent_arena_ind_get(extent)],
+	    ATOMIC_RELAXED);
+}
+
 JEMALLOC_ALWAYS_INLINE bool
 arena_has_default_hooks(arena_t *arena) {
 	return (extent_hooks_get(arena) == &extent_hooks_default);
diff --git a/src/arena.c b/src/arena.c
index 37f4b556..e096f3a6 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1565,8 +1565,7 @@ arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize) {
 
 	extent_t *extent = rtree_extent_read(tsdn, &extents_rtree, rtree_ctx,
 	    (uintptr_t)ptr, true);
-	arena_t *arena = atomic_load_p(&arenas[extent_arena_ind_get(extent)],
-	    ATOMIC_RELAXED);
+	arena_t *arena = arena_get_from_extent(extent);
 
 	szind_t szind = sz_size2index(usize);
 	extent_szind_set(extent, szind);
@@ -1731,8 +1730,7 @@ arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr) {
 void
 arena_dalloc_small(tsdn_t *tsdn, void *ptr) {
 	extent_t *extent = iealloc(tsdn, ptr);
-	arena_t *arena = atomic_load_p(&arenas[extent_arena_ind_get(extent)],
-	    ATOMIC_RELAXED);
+	arena_t *arena = arena_get_from_extent(extent);
 
 	arena_dalloc_bin(tsdn, arena, extent, ptr);
 	arena_decay_tick(tsdn, arena);
@@ -1768,8 +1766,7 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 			goto done;
 		}
 
-		arena_t *arena = atomic_load_p(
-		    &arenas[extent_arena_ind_get(extent)], ATOMIC_RELAXED);
+		arena_t *arena = arena_get_from_extent(extent);
 		arena_decay_tick(tsdn, arena);
 		ret = false;
 	} else if (oldsize >= SC_LARGE_MINCLASS
diff --git a/src/ctl.c b/src/ctl.c
index a29be194..6bd534a7 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2612,8 +2612,7 @@ arenas_lookup_ctl(tsd_t *tsd, const size_t *mib,
 	if (extent == NULL)
 		goto label_return;
 
-	arena = atomic_load_p(&arenas[extent_arena_ind_get(extent)],
-	    ATOMIC_RELAXED);
+	arena = arena_get_from_extent(extent);
 	if (arena == NULL)
 		goto label_return;
 
diff --git a/src/large.c b/src/large.c
index 40afa623..13d8e56c 100644
--- a/src/large.c
+++ b/src/large.c
@@ -94,8 +94,7 @@ large_dalloc_maybe_junk_t *JET_MUTABLE large_dalloc_maybe_junk =
 
 static bool
 large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize) {
-	arena_t *arena = atomic_load_p(&arenas[extent_arena_ind_get(extent)],
-	    ATOMIC_RELAXED);
+	arena_t *arena = arena_get_from_extent(extent);
 	size_t oldusize = extent_usize_get(extent);
 	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
 	size_t diff = extent_size_get(extent) - (usize + sz_large_pad);
@@ -131,8 +130,7 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize) {
 static bool
 large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
     bool zero) {
-	arena_t *arena = atomic_load_p(&arenas[extent_arena_ind_get(extent)],
-	    ATOMIC_RELAXED);
+	arena_t *arena = arena_get_from_extent(extent);
 	size_t oldusize = extent_usize_get(extent);
 	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
 	size_t trailsize = usize - oldusize;
@@ -232,18 +230,14 @@ large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
 		/* Attempt to expand the allocation in-place. */
 		if (!large_ralloc_no_move_expand(tsdn, extent, usize_max,
 		    zero)) {
-			arena_decay_tick(tsdn,
-			    atomic_load_p(&arenas[extent_arena_ind_get(extent)],
-			    ATOMIC_RELAXED));
+			arena_decay_tick(tsdn, arena_get_from_extent(extent));
 			return false;
 		}
 		/* Try again, this time with usize_min. */
 		if (usize_min < usize_max && usize_min > oldusize &&
 		    large_ralloc_no_move_expand(tsdn, extent, usize_min,
 		    zero)) {
-			arena_decay_tick(tsdn, atomic_load_p(
-			    &arenas[extent_arena_ind_get(extent)],
-			    ATOMIC_RELAXED));
+			arena_decay_tick(tsdn, arena_get_from_extent(extent));
 			return false;
 		}
 	}
@@ -253,17 +247,14 @@ large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
 	 * the new size.
 	 */
 	if (oldusize >= usize_min && oldusize <= usize_max) {
-		arena_decay_tick(tsdn, atomic_load_p(
-		    &arenas[extent_arena_ind_get(extent)], ATOMIC_RELAXED));
+		arena_decay_tick(tsdn, arena_get_from_extent(extent));
 		return false;
 	}
 
 	/* Attempt to shrink the allocation in-place. */
 	if (oldusize > usize_max) {
 		if (!large_ralloc_no_move_shrink(tsdn, extent, usize_max)) {
-			arena_decay_tick(tsdn, atomic_load_p(
-			    &arenas[extent_arena_ind_get(extent)],
-			    ATOMIC_RELAXED));
+			arena_decay_tick(tsdn, arena_get_from_extent(extent));
 			return false;
 		}
 	}
@@ -357,20 +348,18 @@ large_dalloc_finish_impl(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
 
 void
 large_dalloc_prep_junked_locked(tsdn_t *tsdn, extent_t *extent) {
-	large_dalloc_prep_impl(tsdn, atomic_load_p(
-	    &arenas[extent_arena_ind_get(extent)], ATOMIC_RELAXED), extent, true);
+	large_dalloc_prep_impl(tsdn, arena_get_from_extent(extent), extent,
+	    true);
 }
 
 void
 large_dalloc_finish(tsdn_t *tsdn, extent_t *extent) {
-	large_dalloc_finish_impl(tsdn, atomic_load_p(
-	    &arenas[extent_arena_ind_get(extent)], ATOMIC_RELAXED), extent);
+	large_dalloc_finish_impl(tsdn, arena_get_from_extent(extent), extent);
 }
 
 void
 large_dalloc(tsdn_t *tsdn, extent_t *extent) {
-	arena_t *arena = atomic_load_p(
-	    &arenas[extent_arena_ind_get(extent)], ATOMIC_RELAXED);
+	arena_t *arena = arena_get_from_extent(extent);
 	large_dalloc_prep_impl(tsdn, arena, extent, false);
 	large_dalloc_finish_impl(tsdn, arena, extent);
 	arena_decay_tick(tsdn, arena);

From 1df9dd35154ca460facbd74f779a13dcece78dac Mon Sep 17 00:00:00 2001
From: Gareth Lloyd <gareth@ignition-web.co.uk>
Date: Tue, 24 Sep 2019 16:09:07 +0100
Subject: [PATCH 1388/2608] Fix je_ prefix issue in test

---
 msvc/test_threads/test_threads.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/msvc/test_threads/test_threads.cpp b/msvc/test_threads/test_threads.cpp
index 92e31624..6eed028d 100644
--- a/msvc/test_threads/test_threads.cpp
+++ b/msvc/test_threads/test_threads.cpp
@@ -9,6 +9,7 @@
 #include <thread>
 #include <vector>
 #include <stdio.h>
+#define JEMALLOC_NO_DEMANGLE
 #include <jemalloc/jemalloc.h>
 
 using std::vector;

From beb7c16e946d5a48ac6c3e7318aa24be4e787c0c Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 27 Aug 2019 14:42:14 -0700
Subject: [PATCH 1389/2608] Guard prof_active reset by opt_prof

Set `prof_active` to read-only when `opt_prof` is turned off.
---
 include/jemalloc/internal/prof_inlines_a.h | 11 +++++++++
 src/ctl.c                                  | 10 ++++++--
 src/prof.c                                 |  3 +++
 test/unit/mallctl.c                        | 27 ++++++++++++++++++++++
 4 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/prof_inlines_a.h b/include/jemalloc/internal/prof_inlines_a.h
index 471d9853..6716d2f4 100644
--- a/include/jemalloc/internal/prof_inlines_a.h
+++ b/include/jemalloc/internal/prof_inlines_a.h
@@ -71,8 +71,19 @@ prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum,
 #endif
 }
 
+JEMALLOC_ALWAYS_INLINE void
+prof_active_assert() {
+	cassert(config_prof);
+	/*
+	 * If opt_prof is off, then prof_active must always be off, regardless
+	 * of whether prof_active_mtx is in effect or not.
+	 */
+	assert(opt_prof || !prof_active);
+}
+
 JEMALLOC_ALWAYS_INLINE bool
 prof_active_get_unlocked(void) {
+	prof_active_assert();
 	/*
 	 * Even if opt_prof is true, sampling can be temporarily disabled by
 	 * setting prof_active to false.  No locking is used when reading
diff --git a/src/ctl.c b/src/ctl.c
index 6bd534a7..fd05c08b 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2662,7 +2662,8 @@ prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	bool oldval;
 
 	if (!config_prof) {
-		return ENOENT;
+		ret = ENOENT;
+		goto label_return;
 	}
 
 	if (newp != NULL) {
@@ -2670,7 +2671,12 @@ prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 			ret = EINVAL;
 			goto label_return;
 		}
-		oldval = prof_active_set(tsd_tsdn(tsd), *(bool *)newp);
+		bool val = *(bool *)newp;
+		if (!opt_prof && val) {
+			ret = ENOENT;
+			goto label_return;
+		}
+		oldval = prof_active_set(tsd_tsdn(tsd), val);
 	} else {
 		oldval = prof_active_get(tsd_tsdn(tsd));
 	}
diff --git a/src/prof.c b/src/prof.c
index 9ea4eda4..e00151d3 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -788,6 +788,7 @@ bool
 prof_active_get(tsdn_t *tsdn) {
 	bool prof_active_current;
 
+	prof_active_assert();
 	malloc_mutex_lock(tsdn, &prof_active_mtx);
 	prof_active_current = prof_active;
 	malloc_mutex_unlock(tsdn, &prof_active_mtx);
@@ -798,10 +799,12 @@ bool
 prof_active_set(tsdn_t *tsdn, bool active) {
 	bool prof_active_old;
 
+	prof_active_assert();
 	malloc_mutex_lock(tsdn, &prof_active_mtx);
 	prof_active_old = prof_active;
 	prof_active = active;
 	malloc_mutex_unlock(tsdn, &prof_active_mtx);
+	prof_active_assert();
 	return prof_active_old;
 }
 
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 3a75ac04..0e88f314 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -762,6 +762,32 @@ TEST_BEGIN(test_arenas_lookup) {
 }
 TEST_END
 
+TEST_BEGIN(test_prof_active) {
+	/*
+	 * If config_prof is off, then the test for prof_active in
+	 * test_mallctl_opt was already enough.
+	 */
+	test_skip_if(!config_prof);
+
+	bool active, old;
+	size_t len = sizeof(bool);
+
+	active = true;
+	assert_d_eq(mallctl("prof.active", NULL, NULL, &active, len), ENOENT,
+	    "Setting prof_active to true should fail when opt_prof is off");
+	old = true;
+	assert_d_eq(mallctl("prof.active", &old, &len, &active, len), ENOENT,
+	    "Setting prof_active to true should fail when opt_prof is off");
+	assert_true(old, "old valud should not be touched when mallctl fails");
+	active = false;
+	assert_d_eq(mallctl("prof.active", NULL, NULL, &active, len), 0,
+	    "Setting prof_active to false should succeed when opt_prof is off");
+	assert_d_eq(mallctl("prof.active", &old, &len, &active, len), 0,
+	    "Setting prof_active to false should succeed when opt_prof is off");
+	assert_false(old, "prof_active should be false when opt_prof is off");
+}
+TEST_END
+
 TEST_BEGIN(test_stats_arenas) {
 #define TEST_STATS_ARENAS(t, name) do {					\
 	t name;								\
@@ -882,6 +908,7 @@ main(void) {
 	    test_arenas_lextent_constants,
 	    test_arenas_create,
 	    test_arenas_lookup,
+	    test_prof_active,
 	    test_stats_arenas,
 	    test_hooks,
 	    test_hooks_exhaustion);

From 66e07f986d77e0b16fd236bbe3518790717d1a4d Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 3 Oct 2019 13:01:12 -0700
Subject: [PATCH 1390/2608] Suppress tdata creation in reentrancy

This change suppresses tdata initialization and prof sample threshold
update in interrupting malloc calls.  Interrupting calls have no need
for tdata.  Delaying tdata creation aligns better with our lazy tdata
creation principle, and it also helps us gain control back from
interrupting calls more quickly and reduces any risk of delegating
tdata creation to an interrupting call.
---
 include/jemalloc/internal/prof_inlines_b.h | 10 ++++++----
 src/prof.c                                 | 17 +++++++++++++++--
 src/prof_data.c                            |  2 ++
 3 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 860dfbee..c750a25b 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -22,6 +22,7 @@ prof_tdata_get(tsd_t *tsd, bool create) {
 
 	tdata = tsd_prof_tdata_get(tsd);
 	if (create) {
+		assert(tsd_reentrancy_level_get(tsd) == 0);
 		if (unlikely(tdata == NULL)) {
 			if (tsd_nominal(tsd)) {
 				tdata = prof_tdata_init(tsd);
@@ -109,7 +110,11 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
 		return true;
 	}
 
-	bool booted = tsd_prof_tdata_get(tsd);
+	if (tsd_reentrancy_level_get(tsd) > 0) {
+		return true;
+	}
+
+	bool booted = prof_tdata_get(tsd, false);
 	tdata = prof_tdata_get(tsd, true);
 	if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) {
 		tdata = NULL;
@@ -132,9 +137,6 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
 		return true;
 	}
 
-	if (tsd_reentrancy_level_get(tsd) > 0) {
-		return true;
-	}
 	/* Compute new sample threshold. */
 	if (update) {
 		prof_sample_threshold_update(tdata);
diff --git a/src/prof.c b/src/prof.c
index e00151d3..a702cc2b 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -127,10 +127,15 @@ prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx) {
 
 void
 prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
-	prof_tdata_t *tdata;
-
 	cassert(config_prof);
 
+	if (tsd_reentrancy_level_get(tsd) > 0) {
+		assert((uintptr_t)tctx == (uintptr_t)1U);
+		return;
+	}
+
+	prof_tdata_t *tdata;
+
 	if (updated) {
 		/*
 		 * Compute a new sample threshold.  This isn't very important in
@@ -810,6 +815,8 @@ prof_active_set(tsdn_t *tsdn, bool active) {
 
 const char *
 prof_thread_name_get(tsd_t *tsd) {
+	assert(tsd_reentrancy_level_get(tsd) == 0);
+
 	prof_tdata_t *tdata;
 
 	tdata = prof_tdata_get(tsd, true);
@@ -821,6 +828,8 @@ prof_thread_name_get(tsd_t *tsd) {
 
 int
 prof_thread_name_set(tsd_t *tsd, const char *thread_name) {
+	assert(tsd_reentrancy_level_get(tsd) == 0);
+
 	prof_tdata_t *tdata;
 	unsigned i;
 	char *s;
@@ -859,6 +868,8 @@ prof_thread_name_set(tsd_t *tsd, const char *thread_name) {
 
 bool
 prof_thread_active_get(tsd_t *tsd) {
+	assert(tsd_reentrancy_level_get(tsd) == 0);
+
 	prof_tdata_t *tdata;
 
 	tdata = prof_tdata_get(tsd, true);
@@ -870,6 +881,8 @@ prof_thread_active_get(tsd_t *tsd) {
 
 bool
 prof_thread_active_set(tsd_t *tsd, bool active) {
+	assert(tsd_reentrancy_level_get(tsd) == 0);
+
 	prof_tdata_t *tdata;
 
 	tdata = prof_tdata_get(tsd, true);
diff --git a/src/prof_data.c b/src/prof_data.c
index bab8e5c0..cd92ee61 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -1199,6 +1199,8 @@ prof_bt_keycomp(const void *k1, const void *k2) {
 prof_tdata_t *
 prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
     char *thread_name, bool active) {
+	assert(tsd_reentrancy_level_get(tsd) == 0);
+
 	prof_tdata_t *tdata;
 
 	cassert(config_prof);

From 4094b7c03fb5e814f6f4c85ff7e93b3228dc4d29 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 11 Sep 2019 10:21:46 -0700
Subject: [PATCH 1391/2608] Limit # of iters of test_bitmap_xfu.

Otherwise the test is too slow for higher page sizes such as 64k.
---
 test/unit/bitmap.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/test/unit/bitmap.c b/test/unit/bitmap.c
index cafb2039..182f2f60 100644
--- a/test/unit/bitmap.c
+++ b/test/unit/bitmap.c
@@ -403,9 +403,11 @@ test_bitmap_xfu_body(const bitmap_info_t *binfo, size_t nbits) {
 }
 
 TEST_BEGIN(test_bitmap_xfu) {
-	size_t nbits;
+	size_t nbits, nbits_max;
 
-	for (nbits = 1; nbits <= BITMAP_MAXBITS; nbits++) {
+	/* The test is O(n^2); large page sizes may slow down too much. */
+	nbits_max = BITMAP_MAXBITS > 512 ? 512 : BITMAP_MAXBITS;
+	for (nbits = 1; nbits <= nbits_max; nbits++) {
 		bitmap_info_t binfo;
 		bitmap_info_init(&binfo, nbits);
 		test_bitmap_xfu_body(&binfo, nbits);

From 4fbbc817c1130d3d6c066f132fb5a2b23803be89 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 15 Jul 2019 10:37:09 -0700
Subject: [PATCH 1392/2608] Simplify time setting and getting for prof log

---
 include/jemalloc/internal/arena_inlines_b.h | 6 ++----
 include/jemalloc/internal/prof_inlines_b.h  | 9 ++++-----
 src/prof.c                                  | 2 +-
 src/prof_log.c                              | 3 +--
 4 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index a6135ee7..7ac2f942 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -90,8 +90,7 @@ arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) {
 }
 
 JEMALLOC_ALWAYS_INLINE nstime_t
-arena_prof_alloc_time_get(tsdn_t *tsdn, const void *ptr,
-    alloc_ctx_t *alloc_ctx) {
+arena_prof_alloc_time_get(tsdn_t *tsdn, const void *ptr) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
@@ -105,8 +104,7 @@ arena_prof_alloc_time_get(tsdn_t *tsdn, const void *ptr,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx,
-    nstime_t t) {
+arena_prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, nstime_t t) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index c750a25b..6b10f5bf 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -64,20 +64,19 @@ prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) {
 }
 
 JEMALLOC_ALWAYS_INLINE nstime_t
-prof_alloc_time_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) {
+prof_alloc_time_get(tsdn_t *tsdn, const void *ptr) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	return arena_prof_alloc_time_get(tsdn, ptr, alloc_ctx);
+	return arena_prof_alloc_time_get(tsdn, ptr);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx,
-    nstime_t t) {
+prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, nstime_t t) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	arena_prof_alloc_time_set(tsdn, ptr, alloc_ctx, t);
+	arena_prof_alloc_time_set(tsdn, ptr, t);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
diff --git a/src/prof.c b/src/prof.c
index a702cc2b..fc0c7d8a 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -169,7 +169,7 @@ prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
 	 * when free() is called. */
 	nstime_t t = NSTIME_ZERO_INITIALIZER;
 	nstime_update(&t);
-	prof_alloc_time_set(tsdn, ptr, NULL, t);
+	prof_alloc_time_set(tsdn, ptr, t);
 
 	malloc_mutex_lock(tsdn, tctx->tdata->lock);
 	tctx->cnts.curobjs++;
diff --git a/src/prof_log.c b/src/prof_log.c
index af91af7d..c95f29e4 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -229,8 +229,7 @@ prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx) {
 		log_tables_initialized = true;
 	}
 
-	nstime_t alloc_time = prof_alloc_time_get(tsd_tsdn(tsd), ptr,
-			          (alloc_ctx_t *)NULL);
+	nstime_t alloc_time = prof_alloc_time_get(tsd_tsdn(tsd), ptr);
 	nstime_t free_time = NSTIME_ZERO_INITIALIZER;
 	nstime_update(&free_time);
 

From 4fe50bc7d05083d822a34068bdd75e34f067e5e4 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 17 Oct 2019 16:46:45 -0700
Subject: [PATCH 1393/2608] Fix amd64 MSVC warning

---
 include/jemalloc/internal/cache_bin.h    |  7 +--
 include/jemalloc/internal/safety_check.h |  2 +-
 src/prof_log.c                           |  2 +-
 src/stats.c                              | 56 ++++++++++++++----------
 4 files changed, 40 insertions(+), 27 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 5396c2d9..74ebbf7b 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -120,8 +120,8 @@ cache_bin_ncached_max_get(szind_t ind) {
 
 static inline cache_bin_sz_t
 cache_bin_ncached_get(cache_bin_t *bin, szind_t ind) {
-	cache_bin_sz_t n = (tcache_bin_info[ind].stack_size +
-	    bin->full_position - bin->cur_ptr.lowbits) / sizeof(void *);
+	cache_bin_sz_t n = (cache_bin_sz_t)((tcache_bin_info[ind].stack_size +
+	    bin->full_position - bin->cur_ptr.lowbits) / sizeof(void *));
 	assert(n <= cache_bin_ncached_max_get(ind));
 	assert(n == 0 || *(bin->cur_ptr.ptr) != NULL);
 
@@ -158,7 +158,8 @@ static inline cache_bin_sz_t
 cache_bin_low_water_get(cache_bin_t *bin, szind_t ind) {
 	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get(ind);
 	cache_bin_sz_t low_water = ncached_max -
-	    (bin->low_water_position - bin->full_position) / sizeof(void *);
+	    (cache_bin_sz_t)((bin->low_water_position - bin->full_position) /
+	    sizeof(void *));
 	assert(low_water <= ncached_max);
 	assert(low_water <= cache_bin_ncached_get(bin, ind));
 	assert(bin->low_water_position >= bin->cur_ptr.lowbits);
diff --git a/include/jemalloc/internal/safety_check.h b/include/jemalloc/internal/safety_check.h
index 53339ac1..ec4b3369 100644
--- a/include/jemalloc/internal/safety_check.h
+++ b/include/jemalloc/internal/safety_check.h
@@ -3,7 +3,7 @@
 
 void safety_check_fail(const char *format, ...);
 /* Can set to NULL for a default. */
-void safety_check_set_abort(void (*abort_fn)());
+void safety_check_set_abort(void (*abort_fn)(const char *));
 
 JEMALLOC_ALWAYS_INLINE void
 safety_check_set_redzone(void *ptr, size_t usize, size_t bumped_usize) {
diff --git a/src/prof_log.c b/src/prof_log.c
index c95f29e4..73ca7417 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -452,7 +452,7 @@ prof_emitter_write_cb(void *opaque, const char *to_write) {
 		return;
 	}
 #endif
-	arg->ret = write(arg->fd, (void *)to_write, bytes);
+	arg->ret = malloc_write_fd(arg->fd, to_write, bytes);
 }
 
 /*
diff --git a/src/stats.c b/src/stats.c
index cf75810a..1718b618 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -118,7 +118,7 @@ mutex_stats_init_cols(emitter_row_t *row, const char *table_name,
 
 #define WIDTH_uint32_t 12
 #define WIDTH_uint64_t 16
-#define OP(counter, counter_type, human, derived, base_counter)	\
+#define OP(counter, counter_type, human, derived, base_counter)		\
 	col = &col_##counter_type[k_##counter_type];			\
 	++k_##counter_type;						\
 	emitter_col_init(col, row);					\
@@ -145,16 +145,20 @@ mutex_stats_read_global(const char *name, emitter_col_t *col_name,
 	emitter_col_t *dst;
 #define EMITTER_TYPE_uint32_t emitter_type_uint32
 #define EMITTER_TYPE_uint64_t emitter_type_uint64
-#define OP(counter, counter_type, human, derived, base_counter)	\
+#define OP(counter, counter_type, human, derived, base_counter)		\
 	dst = &col_##counter_type[mutex_counter_##counter];		\
 	dst->type = EMITTER_TYPE_##counter_type;			\
 	if (!derived) {							\
 		gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,	\
 		    "mutexes", name, #counter);				\
-		CTL_GET(cmd, (counter_type *)&dst->bool_val, counter_type);	\
-	} else { \
-	    emitter_col_t *base = &col_##counter_type[mutex_counter_##base_counter];	\
-	    dst->counter_type##_val = rate_per_second(base->counter_type##_val, uptime); \
+		CTL_GET(cmd, (counter_type *)&dst->bool_val,		\
+		    counter_type);					\
+	} else {							\
+		emitter_col_t *base =					\
+		    &col_##counter_type[mutex_counter_##base_counter];	\
+		dst->counter_type##_val =				\
+		    (counter_type)rate_per_second(			\
+		    base->counter_type##_val, uptime);			\
 	}
 	MUTEX_PROF_COUNTERS
 #undef OP
@@ -175,16 +179,21 @@ mutex_stats_read_arena(unsigned arena_ind, mutex_prof_arena_ind_t mutex_ind,
 	emitter_col_t *dst;
 #define EMITTER_TYPE_uint32_t emitter_type_uint32
 #define EMITTER_TYPE_uint64_t emitter_type_uint64
-#define OP(counter, counter_type, human, derived, base_counter)	\
+#define OP(counter, counter_type, human, derived, base_counter)		\
 	dst = &col_##counter_type[mutex_counter_##counter];		\
 	dst->type = EMITTER_TYPE_##counter_type;			\
-	if (!derived) {                                   \
+	if (!derived) {							\
 		gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,        \
-		    "arenas.0.mutexes", arena_mutex_names[mutex_ind], #counter);\
-		CTL_M2_GET(cmd, arena_ind, (counter_type *)&dst->bool_val, counter_type); \
-	} else {                      \
-		emitter_col_t *base = &col_##counter_type[mutex_counter_##base_counter];	\
-		dst->counter_type##_val = rate_per_second(base->counter_type##_val, uptime); \
+		    "arenas.0.mutexes", arena_mutex_names[mutex_ind],	\
+		    #counter);						\
+		CTL_M2_GET(cmd, arena_ind,				\
+		    (counter_type *)&dst->bool_val, counter_type);	\
+	} else {							\
+		emitter_col_t *base =					\
+		    &col_##counter_type[mutex_counter_##base_counter];	\
+		dst->counter_type##_val =				\
+		    (counter_type)rate_per_second(			\
+		    base->counter_type##_val, uptime);			\
 	}
 	MUTEX_PROF_COUNTERS
 #undef OP
@@ -202,17 +211,20 @@ mutex_stats_read_arena_bin(unsigned arena_ind, unsigned bin_ind,
 
 #define EMITTER_TYPE_uint32_t emitter_type_uint32
 #define EMITTER_TYPE_uint64_t emitter_type_uint64
-#define OP(counter, counter_type, human, derived, base_counter)	\
+#define OP(counter, counter_type, human, derived, base_counter)		\
 	dst = &col_##counter_type[mutex_counter_##counter];		\
 	dst->type = EMITTER_TYPE_##counter_type;			\
-	if (!derived) {                                   \
-		gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,        \
-		    "arenas.0.bins.0","mutex", #counter);            \
-		CTL_M2_M4_GET(cmd, arena_ind, bin_ind,                \
-		    (counter_type *)&dst->bool_val, counter_type);  \
-	} else {                      \
-		emitter_col_t *base = &col_##counter_type[mutex_counter_##base_counter]; \
-		dst->counter_type##_val = rate_per_second(base->counter_type##_val, uptime); \
+	if (!derived) {							\
+		gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,	\
+		    "arenas.0.bins.0","mutex", #counter);		\
+		CTL_M2_M4_GET(cmd, arena_ind, bin_ind,			\
+		    (counter_type *)&dst->bool_val, counter_type);	\
+	} else {							\
+		emitter_col_t *base =					\
+		    &col_##counter_type[mutex_counter_##base_counter];	\
+		dst->counter_type##_val =				\
+		    (counter_type)rate_per_second(			\
+		    base->counter_type##_val, uptime);			\
 	}
 	MUTEX_PROF_COUNTERS
 #undef OP

From 05681e387a3202567ff95528dbc460e92e031a3c Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 8 Oct 2019 11:33:55 -0700
Subject: [PATCH 1394/2608] Optimize cache_bin_alloc_easy for malloc fast path

`tcache_bin_info` is not accessed on malloc fast path but the
compiler reserves a register for it, as well as an additional
register for `tcache_bin_info[ind].stack_size`.  The optimization
gets rid of the need for the two registers.
---
 include/jemalloc/internal/cache_bin.h | 44 +++++++++++++++++++++------
 src/jemalloc.c                        |  2 +-
 2 files changed, 36 insertions(+), 10 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 74ebbf7b..38b8e328 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -183,8 +183,11 @@ cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
 	descriptor->bins_large = bins_large;
 }
 
+#define INVALID_SZIND ((szind_t)(unsigned)-1)
+
 JEMALLOC_ALWAYS_INLINE void *
-cache_bin_alloc_easy(cache_bin_t *bin, bool *success, szind_t ind) {
+cache_bin_alloc_easy_impl(cache_bin_t *bin, bool *success, szind_t ind,
+    const bool adjust_low_water) {
 	/*
 	 * This may read from the empty position; however the loaded value won't
 	 * be used.  It's safe because the stack has one more slot reserved.
@@ -192,20 +195,30 @@ cache_bin_alloc_easy(cache_bin_t *bin, bool *success, szind_t ind) {
 	void *ret = *(bin->cur_ptr.ptr++);
 	/*
 	 * Check for both bin->ncached == 0 and ncached < low_water in a single
-	 * branch.  This also avoids accessing tcache_bin_info (which is on a
-	 * separate cacheline / page) in the common case.
+	 * branch.  When adjust_low_water is true, this also avoids accessing
+	 * tcache_bin_info (which is on a separate cacheline / page) in the
+	 * common case.
 	 */
 	if (unlikely(bin->cur_ptr.lowbits > bin->low_water_position)) {
-		uint32_t empty_position = bin->full_position +
-		    tcache_bin_info[ind].stack_size;
-		if (unlikely(bin->cur_ptr.lowbits > empty_position)) {
-			/* Over-allocated; revert. */
+		if (adjust_low_water) {
+			assert(ind != INVALID_SZIND);
+			uint32_t empty_position = bin->full_position +
+			    tcache_bin_info[ind].stack_size;
+			if (unlikely(bin->cur_ptr.lowbits > empty_position)) {
+				/* Over-allocated; revert. */
+				bin->cur_ptr.ptr--;
+				assert(bin->cur_ptr.lowbits == empty_position);
+				*success = false;
+				return NULL;
+			}
+			bin->low_water_position = bin->cur_ptr.lowbits;
+		} else {
+			assert(ind == INVALID_SZIND);
 			bin->cur_ptr.ptr--;
-			assert(bin->cur_ptr.lowbits == empty_position);
+			assert(bin->cur_ptr.lowbits == bin->low_water_position);
 			*success = false;
 			return NULL;
 		}
-		bin->low_water_position = bin->cur_ptr.lowbits;
 	}
 
 	/*
@@ -220,6 +233,19 @@ cache_bin_alloc_easy(cache_bin_t *bin, bool *success, szind_t ind) {
 	return ret;
 }
 
+JEMALLOC_ALWAYS_INLINE void *
+cache_bin_alloc_easy_reduced(cache_bin_t *bin, bool *success) {
+	/* The szind parameter won't be used. */
+	return cache_bin_alloc_easy_impl(bin, success, INVALID_SZIND, false);
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+cache_bin_alloc_easy(cache_bin_t *bin, bool *success, szind_t ind) {
+	return cache_bin_alloc_easy_impl(bin, success, ind, true);
+}
+
+#undef INVALID_SZIND
+
 JEMALLOC_ALWAYS_INLINE bool
 cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) {
 	if (unlikely(bin->cur_ptr.lowbits == bin->full_position)) {
diff --git a/src/jemalloc.c b/src/jemalloc.c
index fc7d2891..7745e348 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2372,7 +2372,7 @@ je_malloc(size_t size) {
 
 	cache_bin_t *bin = tcache_small_bin_get(tcache, ind);
 	bool tcache_success;
-	void *ret = cache_bin_alloc_easy(bin, &tcache_success, ind);
+	void *ret = cache_bin_alloc_easy_reduced(bin, &tcache_success);
 
 	if (tcache_success) {
 		*tsd_thread_allocatedp_get(tsd) += usize;

From 4786099a3ad11dbf4027f453b8c6de1c1e8777db Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 24 Oct 2019 13:16:09 -0700
Subject: [PATCH 1395/2608] Increase column width for global malloc/free rate

---
 src/stats.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index 1718b618..2b744e19 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -829,12 +829,12 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 
 	COL(alloc_count_row, count_nmalloc, right, 16, title);
 	col_count_nmalloc.str_val = "nmalloc";
-	COL(alloc_count_row, count_nmalloc_ps, right, 8, title);
+	COL(alloc_count_row, count_nmalloc_ps, right, 10, title);
 	col_count_nmalloc_ps.str_val = "(#/sec)";
 
 	COL(alloc_count_row, count_ndalloc, right, 16, title);
 	col_count_ndalloc.str_val = "ndalloc";
-	COL(alloc_count_row, count_ndalloc_ps, right, 8, title);
+	COL(alloc_count_row, count_ndalloc_ps, right, 10, title);
 	col_count_ndalloc_ps.str_val = "(#/sec)";
 
 	COL(alloc_count_row, count_nrequests, right, 16, title);

From bd6e28d6a3d0468e36d7da032966e0d786020bcc Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 28 Oct 2019 09:24:42 -0700
Subject: [PATCH 1396/2608] Guard slabcur fetching in extent_util

---
 src/ctl.c               | 3 ++-
 src/extent.c            | 9 +++++++--
 test/unit/extent_util.c | 6 ++----
 3 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index fd05c08b..206af4cc 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3199,7 +3199,8 @@ label_return:
  * otherwise their values are undefined.
  *
  * This API is mainly intended for small class allocations, where extents are
- * used as slab.
+ * used as slab.  Note that if the bin the extent belongs to is completely
+ * full, "(a)" will be NULL.
  *
  * In case of large class allocations, "(a)" will be NULL, and "(e)" and "(f)"
  * will be zero (if stats are enabled; otherwise undefined).  The other three
diff --git a/src/extent.c b/src/extent.c
index a015f9b3..4bb358d4 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -2124,7 +2124,12 @@ extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
 	} else {
 		*bin_nfree = *bin_nregs = 0;
 	}
-	*slabcur_addr = extent_addr_get(bin->slabcur);
-	assert(*slabcur_addr != NULL);
+	extent_t *slab;
+	if (bin->slabcur != NULL) {
+		slab = bin->slabcur;
+	} else {
+		slab = extent_heap_first(&bin->slabs_nonfull);
+	}
+	*slabcur_addr = slab != NULL ? extent_addr_get(slab) : NULL;
 	malloc_mutex_unlock(tsdn, &bin->lock);
 }
diff --git a/test/unit/extent_util.c b/test/unit/extent_util.c
index 97e55f0f..4de0b043 100644
--- a/test/unit/extent_util.c
+++ b/test/unit/extent_util.c
@@ -94,10 +94,8 @@ TEST_BEGIN(test_query) {
 			    "Extent region count exceeded size");
 			assert_zu_ne(NREGS_READ(out), 0,
 			    "Extent region count must be positive");
-			assert_ptr_not_null(SLABCUR_READ(out),
-			    "Current slab is null");
-			assert_true(NFREE_READ(out) == 0
-			    || SLABCUR_READ(out) <= p,
+			assert_true(NFREE_READ(out) == 0 || (SLABCUR_READ(out)
+			    != NULL && SLABCUR_READ(out) <= p),
 			    "Allocation should follow first fit principle");
 			if (config_stats) {
 				assert_zu_le(BIN_NFREE_READ(out),

From ee961c23100ebbe1e6eb7390a03be5456bc8814c Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Mon, 21 Oct 2019 18:44:42 -0700
Subject: [PATCH 1397/2608] Merge realloc and rallocx pathways.

---
 src/jemalloc.c | 253 +++++++++++++++----------------------------------
 1 file changed, 76 insertions(+), 177 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 7745e348..8dd81bd8 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2506,56 +2506,6 @@ je_calloc(size_t num, size_t size) {
 	return ret;
 }
 
-static void *
-irealloc_prof_sample(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize,
-    prof_tctx_t *tctx, hook_ralloc_args_t *hook_args) {
-	void *p;
-
-	if (tctx == NULL) {
-		return NULL;
-	}
-	if (usize <= SC_SMALL_MAXCLASS) {
-		p = iralloc(tsd, old_ptr, old_usize,
-		    SC_LARGE_MINCLASS, 0, false, hook_args);
-		if (p == NULL) {
-			return NULL;
-		}
-		arena_prof_promote(tsd_tsdn(tsd), p, usize);
-	} else {
-		p = iralloc(tsd, old_ptr, old_usize, usize, 0, false,
-		    hook_args);
-	}
-
-	return p;
-}
-
-JEMALLOC_ALWAYS_INLINE void *
-irealloc_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t usize,
-   alloc_ctx_t *alloc_ctx, hook_ralloc_args_t *hook_args) {
-	void *p;
-	bool prof_active;
-	prof_tctx_t *old_tctx, *tctx;
-
-	prof_active = prof_active_get_unlocked();
-	old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_ptr, alloc_ctx);
-	tctx = prof_alloc_prep(tsd, usize, prof_active, true);
-	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
-		p = irealloc_prof_sample(tsd, old_ptr, old_usize, usize, tctx,
-		    hook_args);
-	} else {
-		p = iralloc(tsd, old_ptr, old_usize, usize, 0, false,
-		    hook_args);
-	}
-	if (unlikely(p == NULL)) {
-		prof_alloc_rollback(tsd, tctx, true);
-		return NULL;
-	}
-	prof_realloc(tsd, p, usize, tctx, prof_active, true, old_ptr, old_usize,
-	    old_tctx);
-
-	return p;
-}
-
 JEMALLOC_ALWAYS_INLINE void
 ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 	if (!slow_path) {
@@ -2645,121 +2595,6 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 	}
 }
 
-JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-void JEMALLOC_NOTHROW *
-JEMALLOC_ALLOC_SIZE(2)
-je_realloc(void *ptr, size_t arg_size) {
-	void *ret;
-	tsdn_t *tsdn JEMALLOC_CC_SILENCE_INIT(NULL);
-	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
-	size_t old_usize = 0;
-	size_t size = arg_size;
-
-	LOG("core.realloc.entry", "ptr: %p, size: %zu\n", ptr, size);
-
-	if (unlikely(size == 0)) {
-		if (ptr != NULL) {
-			/* realloc(ptr, 0) is equivalent to free(ptr). */
-			UTRACE(ptr, 0, 0);
-			tcache_t *tcache;
-			tsd_t *tsd = tsd_fetch();
-			if (tsd_reentrancy_level_get(tsd) == 0) {
-				tcache = tcache_get(tsd);
-			} else {
-				tcache = NULL;
-			}
-
-			uintptr_t args[3] = {(uintptr_t)ptr, size};
-			hook_invoke_dalloc(hook_dalloc_realloc, ptr, args);
-
-			ifree(tsd, ptr, tcache, true);
-
-			LOG("core.realloc.exit", "result: %p", NULL);
-			return NULL;
-		}
-		size = 1;
-	}
-
-	if (likely(ptr != NULL)) {
-		assert(malloc_initialized() || IS_INITIALIZER);
-		tsd_t *tsd = tsd_fetch();
-
-		check_entry_exit_locking(tsd_tsdn(tsd));
-
-
-		hook_ralloc_args_t hook_args = {true, {(uintptr_t)ptr,
-			(uintptr_t)arg_size, 0, 0}};
-
-		alloc_ctx_t alloc_ctx;
-		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
-		rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
-		    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
-		assert(alloc_ctx.szind != SC_NSIZES);
-		old_usize = sz_index2size(alloc_ctx.szind);
-		assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
-		usize = sz_s2u(size);
-		if (config_prof && opt_prof) {
-			if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
-				ret = NULL;
-			} else {
-				ret = irealloc_prof(tsd, ptr, old_usize, usize,
-				    &alloc_ctx, &hook_args);
-			}
-		} else {
-			ret = iralloc(tsd, ptr, old_usize, size, 0, false,
-			    &hook_args);
-		}
-		tsdn = tsd_tsdn(tsd);
-	} else {
-		/* realloc(NULL, size) is equivalent to malloc(size). */
-		static_opts_t sopts;
-		dynamic_opts_t dopts;
-
-		static_opts_init(&sopts);
-		dynamic_opts_init(&dopts);
-
-		sopts.null_out_result_on_error = true;
-		sopts.set_errno_on_error = true;
-		sopts.oom_string =
-		    "<jemalloc>: Error in realloc(): out of memory\n";
-
-		dopts.result = &ret;
-		dopts.num_items = 1;
-		dopts.item_size = size;
-
-		imalloc(&sopts, &dopts);
-		if (sopts.slow) {
-			uintptr_t args[3] = {(uintptr_t)ptr, arg_size};
-			hook_invoke_alloc(hook_alloc_realloc, ret,
-			    (uintptr_t)ret, args);
-		}
-
-		return ret;
-	}
-
-	if (unlikely(ret == NULL)) {
-		if (config_xmalloc && unlikely(opt_xmalloc)) {
-			malloc_write("<jemalloc>: Error in realloc(): "
-			    "out of memory\n");
-			abort();
-		}
-		set_errno(ENOMEM);
-	}
-	if (likely(ret != NULL)) {
-		tsd_t *tsd;
-
-		assert(usize == isalloc(tsdn, ret));
-		tsd = tsdn_tsd(tsdn);
-		*tsd_thread_allocatedp_get(tsd) += usize;
-		*tsd_thread_deallocatedp_get(tsd) += old_usize;
-	}
-	UTRACE(ptr, size, ret);
-	check_entry_exit_locking(tsdn);
-
-	LOG("core.realloc.exit", "result: %p", ret);
-	return ret;
-}
-
 JEMALLOC_NOINLINE
 void
 free_default(void *ptr) {
@@ -3201,10 +3036,8 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 	return p;
 }
 
-JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-void JEMALLOC_NOTHROW *
-JEMALLOC_ALLOC_SIZE(2)
-je_rallocx(void *ptr, size_t size, int flags) {
+JEMALLOC_ALWAYS_INLINE void *
+do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 	void *p;
 	tsd_t *tsd;
 	size_t usize;
@@ -3214,10 +3047,6 @@ je_rallocx(void *ptr, size_t size, int flags) {
 	arena_t *arena;
 	tcache_t *tcache;
 
-	LOG("core.rallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr,
-	    size, flags);
-
-
 	assert(ptr != NULL);
 	assert(size != 0);
 	assert(malloc_initialized() || IS_INITIALIZER);
@@ -3252,8 +3081,8 @@ je_rallocx(void *ptr, size_t size, int flags) {
 	old_usize = sz_index2size(alloc_ctx.szind);
 	assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
 
-	hook_ralloc_args_t hook_args = {false, {(uintptr_t)ptr, size, flags,
-		0}};
+	hook_ralloc_args_t hook_args = {is_realloc, {(uintptr_t)ptr, size,
+		flags, 0}};
 	if (config_prof && opt_prof) {
 		usize = (alignment == 0) ?
 		    sz_s2u(size) : sz_sa2u(size, alignment);
@@ -3281,7 +3110,6 @@ je_rallocx(void *ptr, size_t size, int flags) {
 	UTRACE(ptr, size, p);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	LOG("core.rallocx.exit", "result: %p", p);
 	return p;
 label_oom:
 	if (config_xmalloc && unlikely(opt_xmalloc)) {
@@ -3291,10 +3119,81 @@ label_oom:
 	UTRACE(ptr, size, 0);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	LOG("core.rallocx.exit", "result: %p", NULL);
 	return NULL;
 }
 
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+void JEMALLOC_NOTHROW *
+JEMALLOC_ALLOC_SIZE(2)
+je_rallocx(void *ptr, size_t size, int flags) {
+	LOG("core.rallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr,
+	    size, flags);
+	void *ret = do_rallocx(ptr, size, flags, false);
+	LOG("core.rallocx.exit", "result: %p", ret);
+	return ret;
+}
+
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+void JEMALLOC_NOTHROW *
+JEMALLOC_ALLOC_SIZE(2)
+je_realloc(void *ptr, size_t size) {
+	LOG("core.realloc.entry", "ptr: %p, size: %zu\n", ptr, size);
+
+	if (likely(ptr != NULL && size != 0)) {
+		void *ret = do_rallocx(ptr, size, 0, true);
+		LOG("core.realloc.exit", "result: %p", ret);
+		return ret;
+	} else if (ptr != NULL && size == 0) {
+		/* realloc(ptr, 0) is equivalent to free(ptr). */
+		UTRACE(ptr, 0, 0);
+		tcache_t *tcache;
+		tsd_t *tsd = tsd_fetch();
+		check_entry_exit_locking(tsd_tsdn(tsd));
+
+		if (tsd_reentrancy_level_get(tsd) == 0) {
+			tcache = tcache_get(tsd);
+		} else {
+			tcache = NULL;
+		}
+
+		uintptr_t args[3] = {(uintptr_t)ptr, size};
+		hook_invoke_dalloc(hook_dalloc_realloc, ptr, args);
+
+		ifree(tsd, ptr, tcache, true);
+
+		check_entry_exit_locking(tsd_tsdn(tsd));
+		LOG("core.realloc.exit", "result: %p", NULL);
+		return NULL;
+	} else {
+		/* realloc(NULL, size) is equivalent to malloc(size). */
+		void *ret;
+
+		static_opts_t sopts;
+		dynamic_opts_t dopts;
+
+		static_opts_init(&sopts);
+		dynamic_opts_init(&dopts);
+
+		sopts.null_out_result_on_error = true;
+		sopts.set_errno_on_error = true;
+		sopts.oom_string =
+		    "<jemalloc>: Error in realloc(): out of memory\n";
+
+		dopts.result = &ret;
+		dopts.num_items = 1;
+		dopts.item_size = size;
+
+		imalloc(&sopts, &dopts);
+		if (sopts.slow) {
+			uintptr_t args[3] = {(uintptr_t)ptr, size};
+			hook_invoke_alloc(hook_alloc_realloc, ret,
+			    (uintptr_t)ret, args);
+		}
+		LOG("core.realloc.exit", "result: %p", ret);
+		return ret;
+	}
+}
+
 JEMALLOC_ALWAYS_INLINE size_t
 ixallocx_helper(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
     size_t extra, size_t alignment, bool zero) {

From 9cfa8059475745c31c9c646144432174a2165ca4 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Mon, 23 Sep 2019 17:56:19 -0700
Subject: [PATCH 1398/2608] Realloc: Make behavior of realloc(ptr, 0)
 configurable.

---
 Makefile.in                                   |  5 +-
 doc/jemalloc.xml.in                           | 27 +++++++
 .../internal/jemalloc_internal_externs.h      |  2 +
 .../internal/jemalloc_internal_types.h        | 11 +++
 src/ctl.c                                     |  6 +-
 src/jemalloc.c                                | 77 +++++++++++++++----
 src/stats.c                                   |  1 +
 test/unit/hook.c                              | 24 +++---
 test/unit/mallctl.c                           | 13 +++-
 test/unit/zero_realloc_abort.c                | 26 +++++++
 test/unit/zero_realloc_abort.sh               |  3 +
 test/unit/zero_realloc_free.c                 | 33 ++++++++
 test/unit/zero_realloc_free.sh                |  3 +
 test/unit/zero_realloc_strict.c               | 48 ++++++++++++
 test/unit/zero_realloc_strict.sh              |  3 +
 15 files changed, 256 insertions(+), 26 deletions(-)
 create mode 100644 test/unit/zero_realloc_abort.c
 create mode 100644 test/unit/zero_realloc_abort.sh
 create mode 100644 test/unit/zero_realloc_free.c
 create mode 100644 test/unit/zero_realloc_free.sh
 create mode 100644 test/unit/zero_realloc_strict.c
 create mode 100644 test/unit/zero_realloc_strict.sh

diff --git a/Makefile.in b/Makefile.in
index 21a10532..e4d21805 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -232,7 +232,10 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/nstime.c \
 	$(srcroot)test/unit/tsd.c \
 	$(srcroot)test/unit/witness.c \
-	$(srcroot)test/unit/zero.c
+	$(srcroot)test/unit/zero.c \
+	$(srcroot)test/unit/zero_realloc_abort.c \
+	$(srcroot)test/unit/zero_realloc_free.c \
+	$(srcroot)test/unit/zero_realloc_strict.c
 ifeq (@enable_prof@, 1)
 TESTS_UNIT += \
 	$(srcroot)test/unit/arena_reset_prof.c
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index e83bfbff..746c6bdb 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1489,6 +1489,33 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         by default.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.zero_realloc">
+        <term>
+          <mallctl>opt.zero_realloc</mallctl>
+          (<type>const char *</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para> Determines the behavior of
+	<function>realloc()</function> when passed a value of zero for the new
+	size.  <quote>strict</quote> treats this as an allocation of size zero
+	(and returns a non-null result except in case of resource exhaustion).
+	<quote>free</quote> treats this as a deallocation of the pointer, and
+	returns <constant>NULL</constant> without setting
+	<varname>errno</varname>.  <quote>abort</quote> aborts the process if
+	zero is passed.  The default is <quote>strict</quote>.</para>
+
+	<para>There is considerable divergence of behaviors across
+	implementations in handling this case. Many have the behavior of
+	<quote>free</quote>. This can introduce security vulnerabilities, since
+	a <constant>NULL</constant> return value indicates failure, and the
+	continued validity of the passed-in pointer (per POSIX and C11).
+	<quote>strict</quote> is safe, but can cause leaks in programs that
+	expect the common behavior.  Programs intended to be portable and
+	leak-free cannot assume either behavior, and must therefore never call
+	realloc with a size of 0.  The <quote>abort</quote> option enables these
+	testing this behavior.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="thread.arena">
         <term>
           <mallctl>thread.arena</mallctl>
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index d291170b..dae77b42 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -18,6 +18,8 @@ extern bool opt_utrace;
 extern bool opt_xmalloc;
 extern bool opt_zero;
 extern unsigned opt_narenas;
+extern zero_realloc_action_t opt_zero_realloc_action;
+extern const char *zero_realloc_mode_names[];
 
 /* Number of CPUs. */
 extern unsigned ncpus;
diff --git a/include/jemalloc/internal/jemalloc_internal_types.h b/include/jemalloc/internal/jemalloc_internal_types.h
index e296c5a7..324a4b13 100644
--- a/include/jemalloc/internal/jemalloc_internal_types.h
+++ b/include/jemalloc/internal/jemalloc_internal_types.h
@@ -12,6 +12,17 @@ typedef unsigned szind_t;
 /* Processor / core id type. */
 typedef int malloc_cpuid_t;
 
+/* When realloc(non-null-ptr, 0) is called, what happens? */
+enum zero_realloc_action_e {
+	/* Realloc(ptr, 0) is free(ptr); return malloc(0); */
+	zero_realloc_action_strict = 0,
+	/* Realloc(ptr, 0) is free(ptr); */
+	zero_realloc_action_free = 1,
+	/* Realloc(ptr, 0) aborts. */
+	zero_realloc_action_abort = 2
+};
+typedef enum zero_realloc_action_e zero_realloc_action_t;
+
 /*
  * Flags bits:
  *
diff --git a/src/ctl.c b/src/ctl.c
index 206af4cc..b51207f8 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -112,6 +112,7 @@ CTL_PROTO(opt_prof_gdump)
 CTL_PROTO(opt_prof_final)
 CTL_PROTO(opt_prof_leak)
 CTL_PROTO(opt_prof_accum)
+CTL_PROTO(opt_zero_realloc)
 CTL_PROTO(tcache_create)
 CTL_PROTO(tcache_flush)
 CTL_PROTO(tcache_destroy)
@@ -339,7 +340,8 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("prof_gdump"),	CTL(opt_prof_gdump)},
 	{NAME("prof_final"),	CTL(opt_prof_final)},
 	{NAME("prof_leak"),	CTL(opt_prof_leak)},
-	{NAME("prof_accum"),	CTL(opt_prof_accum)}
+	{NAME("prof_accum"),	CTL(opt_prof_accum)},
+	{NAME("zero_realloc"),	CTL(opt_zero_realloc)}
 };
 
 static const ctl_named_node_t	tcache_node[] = {
@@ -1793,6 +1795,8 @@ CTL_RO_NL_CGEN(config_prof, opt_lg_prof_interval, opt_lg_prof_interval, ssize_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof_gdump, opt_prof_gdump, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_final, opt_prof_final, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool)
+CTL_RO_NL_GEN(opt_zero_realloc,
+    zero_realloc_mode_names[opt_zero_realloc_action], const char *)
 
 /******************************************************************************/
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 8dd81bd8..35a9e7b5 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -67,6 +67,15 @@ bool	opt_junk_free =
 #endif
     ;
 
+zero_realloc_action_t opt_zero_realloc_action =
+    zero_realloc_action_strict;
+
+const char *zero_realloc_mode_names[] = {
+	"strict",
+	"free",
+	"abort",
+};
+
 bool	opt_utrace = false;
 bool	opt_xmalloc = false;
 bool	opt_zero = false;
@@ -1411,6 +1420,22 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				}
 				CONF_CONTINUE;
 			}
+			if (CONF_MATCH("zero_realloc")) {
+				if (CONF_MATCH_VALUE("strict")) {
+					opt_zero_realloc_action
+					    = zero_realloc_action_strict;
+				} else if (CONF_MATCH_VALUE("free")) {
+					opt_zero_realloc_action
+					    = zero_realloc_action_free;
+				} else if (CONF_MATCH_VALUE("abort")) {
+					opt_zero_realloc_action
+					    = zero_realloc_action_abort;
+				} else {
+					CONF_ERROR("Invalid conf value",
+					    k, klen, v, vlen);
+				}
+				CONF_CONTINUE;
+			}
 			CONF_ERROR("Invalid conf pair", k, klen, v, vlen);
 #undef CONF_ERROR
 #undef CONF_CONTINUE
@@ -3133,18 +3158,17 @@ je_rallocx(void *ptr, size_t size, int flags) {
 	return ret;
 }
 
-JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-void JEMALLOC_NOTHROW *
-JEMALLOC_ALLOC_SIZE(2)
-je_realloc(void *ptr, size_t size) {
-	LOG("core.realloc.entry", "ptr: %p, size: %zu\n", ptr, size);
-
-	if (likely(ptr != NULL && size != 0)) {
-		void *ret = do_rallocx(ptr, size, 0, true);
-		LOG("core.realloc.exit", "result: %p", ret);
-		return ret;
-	} else if (ptr != NULL && size == 0) {
-		/* realloc(ptr, 0) is equivalent to free(ptr). */
+static void *
+do_realloc_nonnull_zero(void *ptr) {
+	if (opt_zero_realloc_action == zero_realloc_action_strict) {
+		/*
+		 * The user might have gotten a strict setting while expecting a
+		 * free setting.  If that's the case, we at least try to
+		 * reduce the harm, and turn off the tcache while allocating, so
+		 * that we'll get a true first fit.
+		 */
+		return do_rallocx(ptr, 1, MALLOCX_TCACHE_NONE, true);
+	} else if (opt_zero_realloc_action == zero_realloc_action_free) {
 		UTRACE(ptr, 0, 0);
 		tcache_t *tcache;
 		tsd_t *tsd = tsd_fetch();
@@ -3156,14 +3180,39 @@ je_realloc(void *ptr, size_t size) {
 			tcache = NULL;
 		}
 
-		uintptr_t args[3] = {(uintptr_t)ptr, size};
+		uintptr_t args[3] = {(uintptr_t)ptr, 0};
 		hook_invoke_dalloc(hook_dalloc_realloc, ptr, args);
 
 		ifree(tsd, ptr, tcache, true);
 
 		check_entry_exit_locking(tsd_tsdn(tsd));
-		LOG("core.realloc.exit", "result: %p", NULL);
 		return NULL;
+	} else {
+		safety_check_fail("Called realloc(non-null-ptr, 0) with "
+		    "zero_realloc:abort set\n");
+		/* In real code, this will never run; the safety check failure
+		 * will call abort.  In the unit test, we just want to bail out
+		 * without corrupting internal state that the test needs to
+		 * finish.
+		 */
+		return NULL;
+	}
+}
+
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+void JEMALLOC_NOTHROW *
+JEMALLOC_ALLOC_SIZE(2)
+je_realloc(void *ptr, size_t size) {
+	LOG("core.realloc.entry", "ptr: %p, size: %zu\n", ptr, size);
+
+	if (likely(ptr != NULL && size != 0)) {
+		void *ret = do_rallocx(ptr, size, 0, true);
+		LOG("core.realloc.exit", "result: %p", ret);
+		return ret;
+	} else if (ptr != NULL && size == 0) {
+		void *ret = do_realloc_nonnull_zero(ptr);
+		LOG("core.realloc.exit", "result: %p", ret);
+		return ret;
 	} else {
 		/* realloc(NULL, size) is equivalent to malloc(size). */
 		void *ret;
diff --git a/src/stats.c b/src/stats.c
index 2b744e19..c9bab4f7 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1109,6 +1109,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_BOOL("prof_leak")
 	OPT_WRITE_BOOL("stats_print")
 	OPT_WRITE_CHAR_P("stats_print_opts")
+	OPT_WRITE_CHAR_P("zero_realloc")
 
 	emitter_dict_end(emitter);
 
diff --git a/test/unit/hook.c b/test/unit/hook.c
index 72fcc433..36dcb89d 100644
--- a/test/unit/hook.c
+++ b/test/unit/hook.c
@@ -428,15 +428,21 @@ TEST_BEGIN(test_hooks_realloc_as_malloc_or_free) {
 	free(ptr);
 
 	/* realloc(ptr, 0) as free */
-	ptr = malloc(1);
-	reset();
-	realloc(ptr, 0);
-	assert_d_eq(call_count, 1, "Hook not called");
-	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	assert_d_eq(arg_type, (int)hook_dalloc_realloc, "Wrong hook type");
-	assert_ptr_eq(ptr, arg_address, "Wrong pointer freed");
-	assert_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong raw arg");
-	assert_u64_eq((uintptr_t)0, arg_args_raw[1], "Wrong raw arg");
+	if (opt_zero_realloc_action == zero_realloc_action_free) {
+		ptr = malloc(1);
+		reset();
+		realloc(ptr, 0);
+		assert_d_eq(call_count, 1, "Hook not called");
+		assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+		assert_d_eq(arg_type, (int)hook_dalloc_realloc,
+		    "Wrong hook type");
+		assert_ptr_eq(ptr, arg_address,
+		    "Wrong pointer freed");
+		assert_u64_eq((uintptr_t)ptr, arg_args_raw[0],
+		    "Wrong raw arg");
+		assert_u64_eq((uintptr_t)0, arg_args_raw[1],
+		    "Wrong raw arg");
+	}
 
 	/* realloc(NULL, 0) as malloc(0) */
 	reset();
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 0e88f314..4c0830f2 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -880,6 +880,16 @@ TEST_BEGIN(test_hooks_exhaustion) {
 }
 TEST_END
 
+TEST_BEGIN(test_zero_realloc) {
+	const char *val;
+	size_t sz = sizeof(val);
+	int err = mallctl("opt.zero_realloc", &val, &sz, NULL, 0);
+	assert_d_eq(err, 0, "Unexpected mallctl result");
+	assert_str_eq(val, "strict",
+	    "Unexpected default zero_realloc_beahvior");
+}
+TEST_END
+
 int
 main(void) {
 	return test(
@@ -911,5 +921,6 @@ main(void) {
 	    test_prof_active,
 	    test_stats_arenas,
 	    test_hooks,
-	    test_hooks_exhaustion);
+	    test_hooks_exhaustion,
+	    test_zero_realloc);
 }
diff --git a/test/unit/zero_realloc_abort.c b/test/unit/zero_realloc_abort.c
new file mode 100644
index 00000000..2f49392b
--- /dev/null
+++ b/test/unit/zero_realloc_abort.c
@@ -0,0 +1,26 @@
+#include "test/jemalloc_test.h"
+
+#include <signal.h>
+
+static bool abort_called = false;
+
+void set_abort_called() {
+	abort_called = true;
+};
+
+TEST_BEGIN(test_realloc_abort) {
+	abort_called = false;
+	safety_check_set_abort(&set_abort_called);
+	void *ptr = mallocx(42, 0);
+	assert_ptr_not_null(ptr, "Unexpected mallocx error");
+	ptr = realloc(ptr, 0);
+	assert_true(abort_called, "Realloc with zero size didn't abort");
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_realloc_abort);
+}
+
diff --git a/test/unit/zero_realloc_abort.sh b/test/unit/zero_realloc_abort.sh
new file mode 100644
index 00000000..37daeeaa
--- /dev/null
+++ b/test/unit/zero_realloc_abort.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="zero_realloc:abort"
diff --git a/test/unit/zero_realloc_free.c b/test/unit/zero_realloc_free.c
new file mode 100644
index 00000000..a0736881
--- /dev/null
+++ b/test/unit/zero_realloc_free.c
@@ -0,0 +1,33 @@
+#include "test/jemalloc_test.h"
+
+static uint64_t
+deallocated() {
+	if (!config_stats) {
+		return 0;
+	}
+	uint64_t deallocated;
+	size_t sz = sizeof(deallocated);
+	assert_d_eq(mallctl("thread.deallocated", (void *)&deallocated, &sz,
+	    NULL, 0), 0, "Unexpected mallctl failure");
+	return deallocated;
+}
+
+TEST_BEGIN(test_realloc_free) {
+	void *ptr = mallocx(42, 0);
+	assert_ptr_not_null(ptr, "Unexpected mallocx error");
+	uint64_t deallocated_before = deallocated();
+	ptr = realloc(ptr, 0);
+	uint64_t deallocated_after = deallocated();
+	assert_ptr_null(ptr, "Realloc didn't free");
+	if (config_stats) {
+		assert_u64_gt(deallocated_after, deallocated_before,
+		    "Realloc didn't free");
+	}
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_realloc_free);
+}
diff --git a/test/unit/zero_realloc_free.sh b/test/unit/zero_realloc_free.sh
new file mode 100644
index 00000000..51b01c91
--- /dev/null
+++ b/test/unit/zero_realloc_free.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="zero_realloc:free"
diff --git a/test/unit/zero_realloc_strict.c b/test/unit/zero_realloc_strict.c
new file mode 100644
index 00000000..b7099517
--- /dev/null
+++ b/test/unit/zero_realloc_strict.c
@@ -0,0 +1,48 @@
+#include "test/jemalloc_test.h"
+
+static uint64_t
+allocated() {
+	if (!config_stats) {
+		return 0;
+	}
+	uint64_t allocated;
+	size_t sz = sizeof(allocated);
+	assert_d_eq(mallctl("thread.allocated", (void *)&allocated, &sz, NULL,
+	    0), 0, "Unexpected mallctl failure");
+	return allocated;
+}
+
+static uint64_t
+deallocated() {
+	if (!config_stats) {
+		return 0;
+	}
+	uint64_t deallocated;
+	size_t sz = sizeof(deallocated);
+	assert_d_eq(mallctl("thread.deallocated", (void *)&deallocated, &sz,
+	    NULL, 0), 0, "Unexpected mallctl failure");
+	return deallocated;
+}
+
+TEST_BEGIN(test_realloc_strict) {
+	void *ptr = mallocx(1, 0);
+	assert_ptr_not_null(ptr, "Unexpected mallocx error");
+	uint64_t allocated_before = allocated();
+	uint64_t deallocated_before = deallocated();
+	ptr = realloc(ptr, 0);
+	uint64_t allocated_after = allocated();
+	uint64_t deallocated_after = deallocated();
+	if (config_stats) {
+		assert_u64_lt(allocated_before, allocated_after,
+		    "Unexpected stats change");
+		assert_u64_lt(deallocated_before, deallocated_after,
+		    "Unexpected stats change");
+	}
+	dallocx(ptr, 0);
+}
+TEST_END
+int
+main(void) {
+	return test(
+	    test_realloc_strict);
+}
diff --git a/test/unit/zero_realloc_strict.sh b/test/unit/zero_realloc_strict.sh
new file mode 100644
index 00000000..314dcd0a
--- /dev/null
+++ b/test/unit/zero_realloc_strict.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="zero_realloc:strict"

From de81a4eadabb85b4c911fc6301b69f093ad47b53 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Sat, 26 Oct 2019 11:04:46 -0700
Subject: [PATCH 1399/2608] Add stats counters for number of zero reallocs

---
 Makefile.in                                   |  3 +-
 doc/jemalloc.xml.in                           | 15 +++++++
 .../internal/jemalloc_internal_externs.h      |  1 +
 src/ctl.c                                     |  7 +++-
 src/jemalloc.c                                |  5 +++
 src/stats.c                                   |  9 +++++
 test/unit/zero_reallocs.c                     | 40 +++++++++++++++++++
 test/unit/zero_reallocs.sh                    |  3 ++
 8 files changed, 81 insertions(+), 2 deletions(-)
 create mode 100644 test/unit/zero_reallocs.c
 create mode 100644 test/unit/zero_reallocs.sh

diff --git a/Makefile.in b/Makefile.in
index e4d21805..fede961d 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -235,7 +235,8 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/zero.c \
 	$(srcroot)test/unit/zero_realloc_abort.c \
 	$(srcroot)test/unit/zero_realloc_free.c \
-	$(srcroot)test/unit/zero_realloc_strict.c
+	$(srcroot)test/unit/zero_realloc_strict.c \
+	$(srcroot)test/unit/zero_reallocs.c
 ifeq (@enable_prof@, 1)
 TESTS_UNIT += \
 	$(srcroot)test/unit/arena_reset_prof.c
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 746c6bdb..77afb00c 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -2451,6 +2451,21 @@ struct extent_hooks_s {
         </para></listitem>
       </varlistentry>
 
+      <varlistentry id="stats.zero_reallocs">
+        <term>
+          <mallctl>stats.zero_reallocs</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Number of times that the <function>realloc()</function>
+        was called with a non-<constant>NULL</constant> pointer argument and a
+        <constant>0</constant> size argument.  This is a fundamentally unsafe
+        pattern in portable programs; see <link linkend="opt.zero_realloc">
+        <mallctl>opt.zero_realloc</mallctl></link> for details.
+        </para></listitem>
+      </varlistentry>
+
       <varlistentry id="stats.background_thread.num_threads">
         <term>
           <mallctl>stats.background_thread.num_threads</mallctl>
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index dae77b42..e9dbde80 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -20,6 +20,7 @@ extern bool opt_zero;
 extern unsigned opt_narenas;
 extern zero_realloc_action_t opt_zero_realloc_action;
 extern const char *zero_realloc_mode_names[];
+extern atomic_zu_t zero_realloc_count;
 
 /* Number of CPUs. */
 extern unsigned ncpus;
diff --git a/src/ctl.c b/src/ctl.c
index b51207f8..abb82b57 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -224,6 +224,7 @@ CTL_PROTO(stats_metadata_thp)
 CTL_PROTO(stats_resident)
 CTL_PROTO(stats_mapped)
 CTL_PROTO(stats_retained)
+CTL_PROTO(stats_zero_reallocs)
 CTL_PROTO(experimental_hooks_install)
 CTL_PROTO(experimental_hooks_remove)
 CTL_PROTO(experimental_utilization_query)
@@ -593,7 +594,8 @@ static const ctl_named_node_t stats_node[] = {
 	{NAME("background_thread"),
 	 CHILD(named, stats_background_thread)},
 	{NAME("mutexes"),	CHILD(named, stats_mutexes)},
-	{NAME("arenas"),	CHILD(indexed, stats_arenas)}
+	{NAME("arenas"),	CHILD(indexed, stats_arenas)},
+	{NAME("zero_reallocs"),	CTL(stats_zero_reallocs)},
 };
 
 static const ctl_named_node_t experimental_hooks_node[] = {
@@ -2841,6 +2843,9 @@ CTL_RO_CGEN(config_stats, stats_background_thread_num_runs,
 CTL_RO_CGEN(config_stats, stats_background_thread_run_interval,
     nstime_ns(&ctl_stats->background_thread.run_interval), uint64_t)
 
+CTL_RO_CGEN(config_stats, stats_zero_reallocs,
+    atomic_load_zu(&zero_realloc_count, ATOMIC_RELAXED), size_t)
+
 CTL_RO_GEN(stats_arenas_i_dss, arenas_i(mib[2])->dss, const char *)
 CTL_RO_GEN(stats_arenas_i_dirty_decay_ms, arenas_i(mib[2])->dirty_decay_ms,
     ssize_t)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 35a9e7b5..88064df4 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -70,6 +70,8 @@ bool	opt_junk_free =
 zero_realloc_action_t opt_zero_realloc_action =
     zero_realloc_action_strict;
 
+atomic_zu_t zero_realloc_count = ATOMIC_INIT(0);
+
 const char *zero_realloc_mode_names[] = {
 	"strict",
 	"free",
@@ -3160,6 +3162,9 @@ je_rallocx(void *ptr, size_t size, int flags) {
 
 static void *
 do_realloc_nonnull_zero(void *ptr) {
+	if (config_stats) {
+		atomic_fetch_add_zu(&zero_realloc_count, 1, ATOMIC_RELAXED);
+	}
 	if (opt_zero_realloc_action == zero_realloc_action_strict) {
 		/*
 		 * The user might have gotten a strict setting while expecting a
diff --git a/src/stats.c b/src/stats.c
index c9bab4f7..41b990e2 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1252,6 +1252,7 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 	size_t allocated, active, metadata, metadata_thp, resident, mapped,
 	    retained;
 	size_t num_background_threads;
+	size_t zero_reallocs;
 	uint64_t background_thread_num_runs, background_thread_run_interval;
 
 	CTL_GET("stats.allocated", &allocated, size_t);
@@ -1262,6 +1263,8 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 	CTL_GET("stats.mapped", &mapped, size_t);
 	CTL_GET("stats.retained", &retained, size_t);
 
+	CTL_GET("stats.zero_reallocs", &zero_reallocs, size_t);
+
 	if (have_background_thread) {
 		CTL_GET("stats.background_thread.num_threads",
 		    &num_background_threads, size_t);
@@ -1285,12 +1288,18 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 	emitter_json_kv(emitter, "resident", emitter_type_size, &resident);
 	emitter_json_kv(emitter, "mapped", emitter_type_size, &mapped);
 	emitter_json_kv(emitter, "retained", emitter_type_size, &retained);
+	emitter_json_kv(emitter, "zero_reallocs", emitter_type_size,
+	    &zero_reallocs);
 
 	emitter_table_printf(emitter, "Allocated: %zu, active: %zu, "
 	    "metadata: %zu (n_thp %zu), resident: %zu, mapped: %zu, "
 	    "retained: %zu\n", allocated, active, metadata, metadata_thp,
 	    resident, mapped, retained);
 
+	/* Strange behaviors */
+	emitter_table_printf(emitter,
+	    "Count of realloc(non-null-ptr, 0) calls: %zu\n", zero_reallocs);
+
 	/* Background thread stats. */
 	emitter_json_object_kv_begin(emitter, "background_thread");
 	emitter_json_kv(emitter, "num_threads", emitter_type_size,
diff --git a/test/unit/zero_reallocs.c b/test/unit/zero_reallocs.c
new file mode 100644
index 00000000..fd33aaf6
--- /dev/null
+++ b/test/unit/zero_reallocs.c
@@ -0,0 +1,40 @@
+#include "test/jemalloc_test.h"
+
+static size_t
+zero_reallocs() {
+	if (!config_stats) {
+		return 0;
+	}
+	size_t count = 12345;
+	size_t sz = sizeof(count);
+
+	assert_d_eq(mallctl("stats.zero_reallocs", (void *)&count, &sz,
+	    NULL, 0), 0, "Unexpected mallctl failure");
+	return count;
+}
+
+TEST_BEGIN(test_zero_reallocs) {
+	test_skip_if(!config_stats);
+
+	for (size_t i = 0; i < 100; ++i) {
+		void *ptr = mallocx(i * i + 1, 0);
+		assert_ptr_not_null(ptr, "Unexpected mallocx error");
+		size_t count = zero_reallocs();
+		assert_zu_eq(i, count, "Incorrect zero realloc count");
+		ptr = realloc(ptr, 0);
+		assert_ptr_null(ptr, "Realloc didn't free");
+		count = zero_reallocs();
+		assert_zu_eq(i + 1, count, "Realloc didn't adjust count");
+	}
+}
+TEST_END
+
+int
+main(void) {
+	/*
+	 * We expect explicit counts; reentrant tests run multiple times, so
+	 * counts leak across runs.
+	 */
+	return test_no_reentrancy(
+	    test_zero_reallocs);
+}
diff --git a/test/unit/zero_reallocs.sh b/test/unit/zero_reallocs.sh
new file mode 100644
index 00000000..51b01c91
--- /dev/null
+++ b/test/unit/zero_reallocs.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="zero_realloc:free"

From 6924f83cb21f75e1c892d8f469500e12f1a3f5a7 Mon Sep 17 00:00:00 2001
From: RingsC <hom.lee@hotmail.com>
Date: Sun, 13 Oct 2019 23:11:23 +0800
Subject: [PATCH 1400/2608] use SYS_openat when available

some architecture like AArch64 may not have the open syscall, but have
openat syscall. so check and use SYS_openat if SYS_openat available if
SYS_open is not supported at init_thp_state.
---
 src/pages.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/pages.c b/src/pages.c
index 13de27a0..75c8dd9d 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -563,6 +563,9 @@ init_thp_state(void) {
 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
 	int fd = (int)syscall(SYS_open,
 	    "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
+#elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat)
+	int fd = (int)syscall(SYS_openat,
+		    AT_FDCWD, "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
 #else
 	int fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
 #endif

From 152c0ef954f19fc2bbe53fead9c62c9824f06109 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 3 Sep 2019 15:04:48 -0700
Subject: [PATCH 1401/2608] Build a general purpose thread event handler

---
 Makefile.in                                   |  10 +-
 include/jemalloc/internal/prof_inlines_b.h    |  62 +++--
 include/jemalloc/internal/thread_event.h      | 139 ++++++++++
 include/jemalloc/internal/tsd.h               |  76 ++++--
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |   3 +-
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |  27 +-
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |   3 +-
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |  33 +--
 src/jemalloc.c                                |  78 +++---
 src/prof.c                                    |  21 +-
 src/thread_event.c                            | 255 ++++++++++++++++++
 test/unit/thread_event.c                      |  57 ++++
 test/unit/thread_event.sh                     |   5 +
 13 files changed, 630 insertions(+), 139 deletions(-)
 create mode 100644 include/jemalloc/internal/thread_event.h
 create mode 100644 src/thread_event.c
 create mode 100644 test/unit/thread_event.c
 create mode 100644 test/unit/thread_event.sh

diff --git a/Makefile.in b/Makefile.in
index fede961d..7eba7742 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -123,11 +123,12 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/prof_log.c \
 	$(srcroot)src/rtree.c \
 	$(srcroot)src/safety_check.c \
-	$(srcroot)src/stats.c \
 	$(srcroot)src/sc.c \
+	$(srcroot)src/stats.c \
 	$(srcroot)src/sz.c \
 	$(srcroot)src/tcache.c \
 	$(srcroot)src/test_hooks.c \
+	$(srcroot)src/thread_event.c \
 	$(srcroot)src/ticker.c \
 	$(srcroot)src/tsd.c \
 	$(srcroot)src/witness.c
@@ -176,9 +177,9 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/background_thread.c \
 	$(srcroot)test/unit/background_thread_enable.c \
 	$(srcroot)test/unit/base.c \
+	$(srcroot)test/unit/binshard.c \
 	$(srcroot)test/unit/bitmap.c \
 	$(srcroot)test/unit/bit_util.c \
-	$(srcroot)test/unit/binshard.c \
 	$(srcroot)test/unit/buf_writer.c \
 	$(srcroot)test/unit/cache_bin.c \
 	$(srcroot)test/unit/ckh.c \
@@ -200,6 +201,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/math.c \
 	$(srcroot)test/unit/mq.c \
 	$(srcroot)test/unit/mtx.c \
+	$(srcroot)test/unit/nstime.c \
 	$(srcroot)test/unit/pack.c \
 	$(srcroot)test/unit/pages.c \
 	$(srcroot)test/unit/ph.c \
@@ -218,9 +220,9 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/retained.c \
 	$(srcroot)test/unit/rtree.c \
 	$(srcroot)test/unit/safety_check.c \
+	$(srcroot)test/unit/sc.c \
 	$(srcroot)test/unit/seq.c \
 	$(srcroot)test/unit/SFMT.c \
-	$(srcroot)test/unit/sc.c \
 	$(srcroot)test/unit/size_classes.c \
 	$(srcroot)test/unit/slab.c \
 	$(srcroot)test/unit/smoothstep.c \
@@ -228,8 +230,8 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/stats.c \
 	$(srcroot)test/unit/stats_print.c \
 	$(srcroot)test/unit/test_hooks.c \
+	$(srcroot)test/unit/thread_event.c \
 	$(srcroot)test/unit/ticker.c \
-	$(srcroot)test/unit/nstime.c \
 	$(srcroot)test/unit/tsd.c \
 	$(srcroot)test/unit/witness.c \
 	$(srcroot)test/unit/zero.c \
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 6b10f5bf..b4e65c05 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -3,6 +3,7 @@
 
 #include "jemalloc/internal/safety_check.h"
 #include "jemalloc/internal/sz.h"
+#include "jemalloc/internal/thread_event.h"
 
 JEMALLOC_ALWAYS_INLINE bool
 prof_gdump_get_unlocked(void) {
@@ -79,24 +80,6 @@ prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, nstime_t t) {
 	arena_prof_alloc_time_set(tsdn, ptr, t);
 }
 
-JEMALLOC_ALWAYS_INLINE bool
-prof_sample_check(tsd_t *tsd, size_t usize, bool update) {
-	ssize_t check = update ? 0 : usize;
-
-	int64_t bytes_until_sample = tsd_bytes_until_sample_get(tsd);
-	if (update) {
-		bytes_until_sample -= usize;
-		if (tsd_nominal(tsd)) {
-			tsd_bytes_until_sample_set(tsd, bytes_until_sample);
-		}
-	}
-	if (likely(bytes_until_sample >= check)) {
-		return true;
-	}
-
-	return false;
-}
-
 JEMALLOC_ALWAYS_INLINE bool
 prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
 			 prof_tdata_t **tdata_out) {
@@ -105,7 +88,7 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
 	cassert(config_prof);
 
 	/* Fastpath: no need to load tdata */
-	if (likely(prof_sample_check(tsd, usize, update))) {
+	if (likely(prof_sample_event_wait_get(tsd) > 0)) {
 		return true;
 	}
 
@@ -127,13 +110,40 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
 		return true;
 	}
 
-	/*
-	 * If this was the first creation of tdata, then
-	 * prof_tdata_get() reset bytes_until_sample, so decrement and
-	 * check it again
-	 */
-	if (!booted && prof_sample_check(tsd, usize, update)) {
-		return true;
+	if (!booted) {
+		/*
+		 * If this was the first creation of tdata, then it means that
+		 * the previous thread_event() relied on the wrong prof_sample
+		 * wait time, and that it should have relied on the new
+		 * prof_sample wait time just set by prof_tdata_get(), so we
+		 * now manually check again.
+		 *
+		 * If the check fails, then even though we relied on the wrong
+		 * prof_sample wait time, we're now actually in perfect shape,
+		 * in the sense that we can pretend that we have used the right
+		 * prof_sample wait time.
+		 *
+		 * If the check succeeds, then we are now in a tougher
+		 * situation, in the sense that we cannot pretend that we have
+		 * used the right prof_sample wait time.  A straightforward
+		 * solution would be to fully roll back thread_event(), set the
+		 * right prof_sample wait time, and then redo thread_event().
+		 * A simpler way, which is implemented below, is to just set a
+		 * new prof_sample wait time that is usize less, and do nothing
+		 * else.  Strictly speaking, the thread event handler may end
+		 * up in a wrong state, since it has still recorded an event
+		 * whereas in reality there may be no event.  However, the
+		 * difference in the wait time offsets the wrongly recorded
+		 * event, so that, functionally, the countdown to the next
+		 * event will behave exactly as if we have used the right
+		 * prof_sample wait time in the first place.
+		 */
+		uint64_t wait = prof_sample_event_wait_get(tsd);
+		assert(wait > 0);
+		if (usize < wait) {
+			thread_prof_sample_event_update(tsd, wait - usize);
+			return true;
+		}
 	}
 
 	/* Compute new sample threshold. */
diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
new file mode 100644
index 00000000..08678b74
--- /dev/null
+++ b/include/jemalloc/internal/thread_event.h
@@ -0,0 +1,139 @@
+#ifndef JEMALLOC_INTERNAL_THREAD_EVENT_H
+#define JEMALLOC_INTERNAL_THREAD_EVENT_H
+
+#include "jemalloc/internal/tsd.h"
+
+/*
+ * Maximum threshold on thread_allocated_next_event_fast, so that there is no
+ * need to check overflow in malloc fast path. (The allocation size in malloc
+ * fast path never exceeds SC_LOOKUP_MAXCLASS.)
+ */
+#define THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX				\
+    (UINT64_MAX - SC_LOOKUP_MAXCLASS + 1U)
+
+/*
+ * The max interval helps make sure that malloc stays on the fast path in the
+ * common case, i.e. thread_allocated < thread_allocated_next_event_fast.
+ * When thread_allocated is within an event's distance to
+ * THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX above, thread_allocated_next_event_fast
+ * is wrapped around and we fall back to the medium-fast path. The max interval
+ * makes sure that we're not staying on the fallback case for too long, even if
+ * there's no active event or if all active events have long wait times.
+ */
+#define THREAD_EVENT_MAX_INTERVAL ((uint64_t)(4U << 20))
+
+void thread_event_assert_invariants_debug(tsd_t *tsd);
+void thread_event_trigger(tsd_t *tsd, bool delay_event);
+void thread_event_rollback(tsd_t *tsd, size_t diff);
+void thread_event_update(tsd_t *tsd);
+void thread_event_boot();
+
+/*
+ * List of all events, in the following format:
+ *  E(event,		(condition))
+ */
+#define ITERATE_OVER_ALL_EVENTS						\
+    E(prof_sample,	(config_prof && opt_prof))
+
+#define E(event, condition)						\
+    C(event##_event_wait)
+
+/* List of all thread event counters. */
+#define ITERATE_OVER_ALL_COUNTERS					\
+    C(thread_allocated)							\
+    C(thread_allocated_next_event_fast)					\
+    C(thread_allocated_last_event)					\
+    C(thread_allocated_next_event)					\
+    ITERATE_OVER_ALL_EVENTS
+
+/* Getters directly wrap TSD getters. */
+#define C(counter)							\
+JEMALLOC_ALWAYS_INLINE uint64_t						\
+counter##_get(tsd_t *tsd) {						\
+	return tsd_##counter##_get(tsd);				\
+}
+
+ITERATE_OVER_ALL_COUNTERS
+#undef C
+
+/*
+ * Setters call the TSD pointer getters rather than the TSD setters, so that
+ * the counters can be modified even when TSD state is reincarnated or
+ * minimal_initialized: if an event is triggered in such cases, we will
+ * temporarily delay the event and let it be immediately triggered at the next
+ * allocation call.
+ */
+#define C(counter)							\
+JEMALLOC_ALWAYS_INLINE void						\
+counter##_set(tsd_t *tsd, uint64_t v) {					\
+	*tsd_##counter##p_get(tsd) = v;					\
+}
+
+ITERATE_OVER_ALL_COUNTERS
+#undef C
+
+/*
+ * For generating _event_wait getter / setter functions for each individual
+ * event.
+ */
+#undef E
+
+/*
+ * The function checks in debug mode whether the thread event counters are in
+ * a consistent state, which forms the invariants before and after each round
+ * of thread event handling that we can rely on and need to promise.
+ * The invariants are only temporarily violated in the middle of:
+ * (a) thread_event() if an event is triggered (the thread_event_trigger() call
+ *     at the end will restore the invariants),
+ * (b) thread_##event##_event_update() (the thread_event_update() call at the
+ *     end will restore the invariants), or
+ * (c) thread_event_rollback() if the rollback falls below the last_event (the
+ *     thread_event_update() call at the end will restore the invariants).
+ */
+JEMALLOC_ALWAYS_INLINE void
+thread_event_assert_invariants(tsd_t *tsd) {
+	if (config_debug) {
+		thread_event_assert_invariants_debug(tsd);
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void
+thread_event(tsd_t *tsd, size_t usize) {
+	thread_event_assert_invariants(tsd);
+
+	uint64_t thread_allocated_before = thread_allocated_get(tsd);
+	thread_allocated_set(tsd, thread_allocated_before + usize);
+
+	/* The subtraction is intentionally susceptible to underflow. */
+	if (likely(usize < thread_allocated_next_event_get(tsd) -
+	    thread_allocated_before)) {
+		thread_event_assert_invariants(tsd);
+	} else {
+		thread_event_trigger(tsd, false);
+	}
+}
+
+#define E(event, condition)						\
+JEMALLOC_ALWAYS_INLINE void						\
+thread_##event##_event_update(tsd_t *tsd, uint64_t event_wait) {	\
+	thread_event_assert_invariants(tsd);				\
+	assert(condition);						\
+	assert(tsd_nominal(tsd));					\
+	assert(tsd_reentrancy_level_get(tsd) == 0);			\
+	assert(event_wait > 0U);					\
+	if (THREAD_EVENT_MIN_START_WAIT > 1U &&				\
+	    unlikely(event_wait < THREAD_EVENT_MIN_START_WAIT)) {	\
+		event_wait = THREAD_EVENT_MIN_START_WAIT;		\
+	}								\
+	if (THREAD_EVENT_MAX_START_WAIT < UINT64_MAX &&			\
+	    unlikely(event_wait > THREAD_EVENT_MAX_START_WAIT)) {	\
+		event_wait = THREAD_EVENT_MAX_START_WAIT;		\
+	}								\
+	event##_event_wait_set(tsd, event_wait);			\
+	thread_event_update(tsd);					\
+}
+
+ITERATE_OVER_ALL_EVENTS
+#undef E
+
+#endif /* JEMALLOC_INTERNAL_THREAD_EVENT_H */
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index e2cc7747..14ad53d7 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -15,39 +15,45 @@
 
 /*
  * Thread-Specific-Data layout
- * --- data accessed on tcache fast path: state, rtree_ctx, stats, prof ---
+ * --- data accessed on tcache fast path: state, rtree_ctx, stats ---
  * s: state
  * e: tcache_enabled
  * m: thread_allocated
+ * k: thread_allocated_next_event_fast
  * f: thread_deallocated
- * b: bytes_until_sample (config_prof)
- * p: prof_tdata (config_prof)
  * c: rtree_ctx (rtree cache accessed on deallocation)
  * t: tcache
  * --- data not accessed on tcache fast path: arena-related fields ---
  * d: arenas_tdata_bypass
  * r: reentrancy_level
  * x: narenas_tdata
+ * l: thread_allocated_last_event
+ * j: thread_allocated_next_event
+ * w: prof_sample_event_wait (config_prof)
+ * p: prof_tdata (config_prof)
  * v: offset_state
  * i: iarena
  * a: arena
  * o: arenas_tdata
+ * b: binshards
  * Loading TSD data is on the critical path of basically all malloc operations.
  * In particular, tcache and rtree_ctx rely on hot CPU cache to be effective.
  * Use a compact layout to reduce cache footprint.
  * +--- 64-bit and 64B cacheline; 1B each letter; First byte on the left. ---+
  * |----------------------------  1st cacheline  ----------------------------|
- * | sedrxxxx vvvvvvvv mmmmmmmm ffffffff bbbbbbbb pppppppp [c * 16  .......] |
+ * | sedrxxxx mmmmmmmm kkkkkkkk ffffffff [c * 32  ........ ........ .......] |
  * |----------------------------  2nd cacheline  ----------------------------|
  * | [c * 64  ........ ........ ........ ........ ........ ........ .......] |
  * |----------------------------  3nd cacheline  ----------------------------|
- * | [c * 48  ........ ........ ........ ........ .......] iiiiiiii aaaaaaaa |
+ * | [c * 32  ........ ........ .......] llllllll jjjjjjjj wwwwwwww pppppppp |
  * +----------------------------  4th cacheline  ----------------------------+
- * | oooooooo [t...... ........ ........ ........ ........ ........ ........ |
+ * | vvvvvvvv iiiiiiii aaaaaaaa oooooooo [b...... ........ ........ ........ |
+ * +----------------------------  5th cacheline  ----------------------------+
+ * | ..b][t.. ........ ........ ........ ........ ........ ........ ........ |
  * +-------------------------------------------------------------------------+
  * Note: the entire tcache is embedded into TSD and spans multiple cachelines.
  *
- * The last 3 members (i, a and o) before tcache isn't really needed on tcache
+ * The elements after rtree_ctx and before tcache aren't really needed on tcache
  * fast path.  However we have a number of unused tcache bins and witnesses
  * (never touched unless config_debug) at the end of tcache, so we place them
  * there to avoid breaking the cachelines and possibly paging in an extra page.
@@ -64,18 +70,21 @@ typedef void (*test_callback_t)(int *);
 #  define MALLOC_TEST_TSD_INITIALIZER
 #endif
 
-/*  O(name,			type,			nullable type */
+/*  O(name,			type,			nullable type) */
 #define MALLOC_TSD							\
     O(tcache_enabled,		bool,			bool)		\
     O(arenas_tdata_bypass,	bool,			bool)		\
     O(reentrancy_level,		int8_t,			int8_t)		\
     O(narenas_tdata,		uint32_t,		uint32_t)	\
-    O(offset_state,		uint64_t,		uint64_t)	\
     O(thread_allocated,		uint64_t,		uint64_t)	\
+    O(thread_allocated_next_event_fast,	uint64_t,	uint64_t)	\
     O(thread_deallocated,	uint64_t,		uint64_t)	\
-    O(bytes_until_sample,	int64_t,		int64_t)	\
-    O(prof_tdata,		prof_tdata_t *,		prof_tdata_t *)	\
     O(rtree_ctx,		rtree_ctx_t,		rtree_ctx_t)	\
+    O(thread_allocated_last_event,	uint64_t,	uint64_t)	\
+    O(thread_allocated_next_event,	uint64_t,	uint64_t)	\
+    O(prof_sample_event_wait,	uint64_t,		uint64_t)	\
+    O(prof_tdata,		prof_tdata_t *,		prof_tdata_t *)	\
+    O(offset_state,		uint64_t,		uint64_t)	\
     O(iarena,			arena_t *,		arena_t *)	\
     O(arena,			arena_t *,		arena_t *)	\
     O(arenas_tdata,		arena_tdata_t *,	arena_tdata_t *)\
@@ -84,25 +93,34 @@ typedef void (*test_callback_t)(int *);
     O(witness_tsd,              witness_tsd_t,		witness_tsdn_t)	\
     MALLOC_TEST_TSD
 
+/*
+ * THREAD_EVENT_MIN_START_WAIT should not exceed the minimal allocation usize.
+ */
+#define THREAD_EVENT_MIN_START_WAIT ((uint64_t)1U)
+#define THREAD_EVENT_MAX_START_WAIT UINT64_MAX
+
 #define TSD_INITIALIZER {						\
-    ATOMIC_INIT(tsd_state_uninitialized),				\
-    TCACHE_ENABLED_ZERO_INITIALIZER,					\
-    false,								\
-    0,									\
-    0,									\
-    0,									\
-    0,									\
-    0,									\
-    0,									\
-    NULL,								\
-    RTREE_CTX_ZERO_INITIALIZER,						\
-    NULL,								\
-    NULL,								\
-    NULL,								\
-    TSD_BINSHARDS_ZERO_INITIALIZER,					\
-    TCACHE_ZERO_INITIALIZER,						\
-    WITNESS_TSD_INITIALIZER						\
-    MALLOC_TEST_TSD_INITIALIZER						\
+    /* state */			ATOMIC_INIT(tsd_state_uninitialized),	\
+    /* tcache_enabled */	TCACHE_ENABLED_ZERO_INITIALIZER,	\
+    /* arenas_tdata_bypass */	false,					\
+    /* reentrancy_level */	0,					\
+    /* narenas_tdata */		0,					\
+    /* thread_allocated */	0,					\
+    /* thread_allocated_next_event_fast */ THREAD_EVENT_MIN_START_WAIT,	\
+    /* thread_deallocated */	0,					\
+    /* rtree_ctx */		RTREE_CTX_ZERO_INITIALIZER,		\
+    /* thread_allocated_last_event */	0,			\
+    /* thread_allocated_next_event */	THREAD_EVENT_MIN_START_WAIT,	\
+    /* prof_sample_event_wait */	THREAD_EVENT_MIN_START_WAIT,	\
+    /* prof_tdata */		NULL,					\
+    /* offset_state */		0,					\
+    /* iarena */		NULL,					\
+    /* arena */			NULL,					\
+    /* arenas_tdata */		NULL,					\
+    /* binshards */		TSD_BINSHARDS_ZERO_INITIALIZER,		\
+    /* tcache */		TCACHE_ZERO_INITIALIZER,		\
+    /* witness */		WITNESS_TSD_INITIALIZER			\
+    /* test data */		MALLOC_TEST_TSD_INITIALIZER		\
 }
 
 void *malloc_tsd_malloc(size_t size);
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index a9683384..5838e933 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -63,15 +63,16 @@
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
     <ClCompile Include="..\..\..\..\src\prof_log.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
+    <ClCompile Include="..\..\..\..\src\safety_check.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
     <ClCompile Include="..\..\..\..\src\stats.c" />
     <ClCompile Include="..\..\..\..\src\sz.c" />
     <ClCompile Include="..\..\..\..\src\tcache.c" />
     <ClCompile Include="..\..\..\..\src\test_hooks.c" />
+    <ClCompile Include="..\..\..\..\src\thread_event.c" />
     <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
     <ClCompile Include="..\..\..\..\src\witness.c" />
-    <ClCompile Include="..\..\..\..\src\safety_check.c" />
   </ItemGroup>
   <PropertyGroup Label="Globals">
     <ProjectGuid>{8D6BB292-9E1C-413D-9F98-4864BDC1514A}</ProjectGuid>
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index bc40883b..3551ba5e 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -16,6 +16,9 @@
     <ClCompile Include="..\..\..\..\src\base.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\bin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\bitmap.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -25,6 +28,9 @@
     <ClCompile Include="..\..\..\..\src\ctl.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\div.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\extent.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -46,6 +52,9 @@
     <ClCompile Include="..\..\..\..\src\large.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\log.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\malloc_io.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -76,6 +85,9 @@
     <ClCompile Include="..\..\..\..\src\rtree.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\safety_check.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\sc.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -91,6 +103,9 @@
     <ClCompile Include="..\..\..\..\src\test_hooks.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\thread_event.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\ticker.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -100,17 +115,5 @@
     <ClCompile Include="..\..\..\..\src\witness.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\log.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\..\src\bin.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\..\src\div.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\..\src\safety_check.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
   </ItemGroup>
 </Project>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 72a57e56..b9d4f681 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -63,15 +63,16 @@
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
     <ClCompile Include="..\..\..\..\src\prof_log.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
+    <ClCompile Include="..\..\..\..\src\safety_check.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
     <ClCompile Include="..\..\..\..\src\stats.c" />
     <ClCompile Include="..\..\..\..\src\sz.c" />
     <ClCompile Include="..\..\..\..\src\tcache.c" />
     <ClCompile Include="..\..\..\..\src\test_hooks.c" />
+    <ClCompile Include="..\..\..\..\src\thread_event.c" />
     <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
     <ClCompile Include="..\..\..\..\src\witness.c" />
-    <ClCompile Include="..\..\..\..\src\safety_check.c" />
   </ItemGroup>
   <PropertyGroup Label="Globals">
     <ProjectGuid>{8D6BB292-9E1C-413D-9F98-4864BDC1514A}</ProjectGuid>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 622b93f1..3551ba5e 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -16,6 +16,9 @@
     <ClCompile Include="..\..\..\..\src\base.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\bin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\bitmap.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -25,6 +28,9 @@
     <ClCompile Include="..\..\..\..\src\ctl.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\div.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\extent.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -46,6 +52,9 @@
     <ClCompile Include="..\..\..\..\src\large.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\log.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\malloc_io.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -76,6 +85,9 @@
     <ClCompile Include="..\..\..\..\src\rtree.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\safety_check.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\sc.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -88,6 +100,12 @@
     <ClCompile Include="..\..\..\..\src\tcache.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\test_hooks.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\thread_event.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\ticker.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -97,20 +115,5 @@
     <ClCompile Include="..\..\..\..\src\witness.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\log.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\..\src\bin.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\..\src\div.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\..\src\test_hooks.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
-    <ClCompile Include="..\..\..\..\src\safety_check.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
   </ItemGroup>
 </Project>
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 88064df4..63a1e302 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -18,6 +18,7 @@
 #include "jemalloc/internal/spin.h"
 #include "jemalloc/internal/sz.h"
 #include "jemalloc/internal/ticker.h"
+#include "jemalloc/internal/thread_event.h"
 #include "jemalloc/internal/util.h"
 
 /******************************************************************************/
@@ -1530,6 +1531,7 @@ malloc_init_hard_a0_locked() {
 		prof_boot0();
 	}
 	malloc_conf_init(&sc_data, bin_shard_sizes);
+	thread_event_boot();
 	sz_boot(&sc_data);
 	bin_info_boot(&sc_data, bin_shard_sizes);
 
@@ -2128,6 +2130,8 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		dopts->arena_ind = 0;
 	}
 
+	thread_event(tsd, usize);
+
 	/*
 	 * If dopts->alignment > 0, then ind is still 0, but usize was computed
 	 * in the previous if statement.  Down the positive alignment path,
@@ -2136,20 +2140,6 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 
 	/* If profiling is on, get our profiling context. */
 	if (config_prof && opt_prof) {
-		/*
-		 * The fast path modifies bytes_until_sample regardless of
-		 * prof_active.  We reset it to be the sample interval, so that
-		 * there won't be excessive routings to the slow path, and that
-		 * when prof_active is turned on later, the counting for
-		 * sampling can immediately resume as normal (though the very
-		 * first sampling interval is not randomized).
-		 */
-		if (unlikely(tsd_bytes_until_sample_get(tsd) < 0) &&
-		    !prof_active_get_unlocked()) {
-			tsd_bytes_until_sample_set(tsd,
-			    (ssize_t)(1 << lg_prof_sample));
-		}
-
 		prof_tctx_t *tctx = prof_alloc_prep(
 		    tsd, usize, prof_active_get_unlocked(), true);
 
@@ -2167,24 +2157,17 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		}
 
 		if (unlikely(allocation == NULL)) {
+			thread_event_rollback(tsd, usize);
 			prof_alloc_rollback(tsd, tctx, true);
 			goto label_oom;
 		}
 		prof_malloc(tsd_tsdn(tsd), allocation, usize, &alloc_ctx, tctx);
 	} else {
 		assert(!opt_prof);
-		/*
-		 * The fast path modifies bytes_until_sample regardless of
-		 * opt_prof.  We reset it to a huge value here, so as to
-		 * minimize the triggering for slow path.
-		 */
-		if (config_prof &&
-		    unlikely(tsd_bytes_until_sample_get(tsd) < 0)) {
-			tsd_bytes_until_sample_set(tsd, SSIZE_MAX);
-		}
 		allocation = imalloc_no_sample(sopts, dopts, tsd, size, usize,
 		    ind);
 		if (unlikely(allocation == NULL)) {
+			thread_event_rollback(tsd, usize);
 			goto label_oom;
 		}
 	}
@@ -2197,7 +2180,6 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 	    || ((uintptr_t)allocation & (dopts->alignment - 1)) == ZU(0));
 
 	assert(usize == isalloc(tsd_tsdn(tsd), allocation));
-	*tsd_thread_allocatedp_get(tsd) += usize;
 
 	if (sopts->slow) {
 		UTRACE(0, size, allocation);
@@ -2373,7 +2355,12 @@ je_malloc(size_t size) {
 	}
 
 	szind_t ind = sz_size2index_lookup(size);
-	/* usize is always needed to increment thread_allocated. */
+	/*
+	 * The thread_allocated counter in tsd serves as a general purpose
+	 * accumulator for bytes of allocation to trigger different types of
+	 * events.  usize is always needed to advance thread_allocated, though
+	 * it's not always needed in the core allocation logic.
+	 */
 	size_t usize = sz_index2size(ind);
 	/*
 	 * Fast path relies on size being a bin.
@@ -2382,19 +2369,12 @@ je_malloc(size_t size) {
 	assert(ind < SC_NBINS);
 	assert(size <= SC_SMALL_MAXCLASS);
 
-	if (config_prof) {
-		int64_t bytes_until_sample = tsd_bytes_until_sample_get(tsd);
-		bytes_until_sample -= usize;
-		tsd_bytes_until_sample_set(tsd, bytes_until_sample);
-
-		if (unlikely(bytes_until_sample < 0)) {
-			/*
-			 * Avoid a prof_active check on the fastpath.
-			 * If prof_active is false, bytes_until_sample will be
-			 * reset in slow path.
-			 */
-			return malloc_default(size);
-		}
+	uint64_t thread_allocated_after = thread_allocated_get(tsd) + usize;
+	assert(thread_allocated_next_event_fast_get(tsd) <=
+	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX);
+	if (unlikely(thread_allocated_after >=
+	    thread_allocated_next_event_fast_get(tsd))) {
+		return malloc_default(size);
 	}
 
 	cache_bin_t *bin = tcache_small_bin_get(tcache, ind);
@@ -2402,7 +2382,7 @@ je_malloc(size_t size) {
 	void *ret = cache_bin_alloc_easy_reduced(bin, &tcache_success);
 
 	if (tcache_success) {
-		*tsd_thread_allocatedp_get(tsd) += usize;
+		thread_allocated_set(tsd, thread_allocated_after);
 		if (config_stats) {
 			bin->tstats.nrequests++;
 		}
@@ -3116,9 +3096,11 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 		if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
 			goto label_oom;
 		}
+		thread_event(tsd, usize);
 		p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize,
 		    zero, tcache, arena, &alloc_ctx, &hook_args);
 		if (unlikely(p == NULL)) {
+			thread_event_rollback(tsd, usize);
 			goto label_oom;
 		}
 	} else {
@@ -3128,10 +3110,10 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 			goto label_oom;
 		}
 		usize = isalloc(tsd_tsdn(tsd), p);
+		thread_event(tsd, usize);
 	}
 	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
 
-	*tsd_thread_allocatedp_get(tsd) += usize;
 	*tsd_thread_deallocatedp_get(tsd) += old_usize;
 
 	UTRACE(ptr, size, p);
@@ -3307,6 +3289,7 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 			usize_max = SC_LARGE_MAXCLASS;
 		}
 	}
+	thread_event(tsd, usize_max);
 	tctx = prof_alloc_prep(tsd, usize_max, prof_active, false);
 
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
@@ -3316,6 +3299,18 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 		usize = ixallocx_helper(tsd_tsdn(tsd), ptr, old_usize, size,
 		    extra, alignment, zero);
 	}
+	if (usize <= usize_max) {
+		thread_event_rollback(tsd, usize_max - usize);
+	} else {
+		/*
+		 * For downsizing request, usize_max can be less than usize.
+		 * We here further increase thread event counters so as to
+		 * record the true usize, and then when the execution goes back
+		 * to xallocx(), the entire usize will be rolled back if it's
+		 * equal to the old usize.
+		 */
+		thread_event(tsd, usize - usize_max);
+	}
 	if (usize == old_usize) {
 		prof_alloc_rollback(tsd, tctx, false);
 		return usize;
@@ -3373,12 +3368,13 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	} else {
 		usize = ixallocx_helper(tsd_tsdn(tsd), ptr, old_usize, size,
 		    extra, alignment, zero);
+		thread_event(tsd, usize);
 	}
 	if (unlikely(usize == old_usize)) {
+		thread_event_rollback(tsd, usize);
 		goto label_not_resized;
 	}
 
-	*tsd_thread_allocatedp_get(tsd) += usize;
 	*tsd_thread_deallocatedp_get(tsd) += old_usize;
 
 label_not_resized:
diff --git a/src/prof.c b/src/prof.c
index fc0c7d8a..7e219dc3 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -5,6 +5,7 @@
 #include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/thread_event.h"
 
 /*
  * This file implements the profiling "APIs" needed by other parts of jemalloc,
@@ -471,8 +472,11 @@ prof_sample_threshold_update(prof_tdata_t *tdata) {
 		return;
 	}
 
+	tsd_t *tsd = tsd_fetch();
+
 	if (lg_prof_sample == 0) {
-		tsd_bytes_until_sample_set(tsd_fetch(), 0);
+		thread_prof_sample_event_update(tsd,
+		    THREAD_EVENT_MIN_START_WAIT);
 		return;
 	}
 
@@ -480,11 +484,11 @@ prof_sample_threshold_update(prof_tdata_t *tdata) {
 	 * Compute sample interval as a geometrically distributed random
 	 * variable with mean (2^lg_prof_sample).
 	 *
-	 *                             __        __
-	 *                             |  log(u)  |                     1
-	 * tdata->bytes_until_sample = | -------- |, where p = ---------------
-	 *                             | log(1-p) |             lg_prof_sample
-	 *                                                     2
+	 *                      __        __
+	 *                      |  log(u)  |                     1
+	 * bytes_until_sample = | -------- |, where p = ---------------
+	 *                      | log(1-p) |             lg_prof_sample
+	 *                                              2
 	 *
 	 * For more information on the math, see:
 	 *
@@ -499,10 +503,7 @@ prof_sample_threshold_update(prof_tdata_t *tdata) {
 	uint64_t bytes_until_sample = (uint64_t)(log(u) /
 	    log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
 	    + (uint64_t)1U;
-	if (bytes_until_sample > SSIZE_MAX) {
-		bytes_until_sample = SSIZE_MAX;
-	}
-	tsd_bytes_until_sample_set(tsd_fetch(), bytes_until_sample);
+	thread_prof_sample_event_update(tsd, bytes_until_sample);
 
 #endif
 }
diff --git a/src/thread_event.c b/src/thread_event.c
new file mode 100644
index 00000000..c6542f46
--- /dev/null
+++ b/src/thread_event.c
@@ -0,0 +1,255 @@
+#define JEMALLOC_THREAD_EVENT_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/thread_event.h"
+
+/*
+ * There's no lock for thread_event_active because write is only done in
+ * malloc_init(), where init_lock there serves as the guard, and ever since
+ * then thread_event_active becomes read only.
+ */
+static bool thread_event_active = false;
+
+/* Event handler function signatures. */
+#define E(event, condition)						\
+static void thread_##event##_event_handler(tsd_t *tsd);
+
+ITERATE_OVER_ALL_EVENTS
+#undef E
+
+static uint64_t
+thread_allocated_next_event_compute(tsd_t *tsd) {
+	uint64_t wait = THREAD_EVENT_MAX_START_WAIT;
+	bool no_event_on = true;
+
+#define E(event, condition)						\
+	if (condition) {						\
+		no_event_on = false;					\
+		uint64_t event_wait =					\
+		    event##_event_wait_get(tsd);			\
+		assert(event_wait <= THREAD_EVENT_MAX_START_WAIT);	\
+		if (event_wait > 0U && event_wait < wait) {		\
+			wait = event_wait;				\
+		}							\
+	}
+
+	ITERATE_OVER_ALL_EVENTS
+#undef E
+
+	assert(no_event_on == !thread_event_active);
+	assert(wait <= THREAD_EVENT_MAX_START_WAIT);
+	return wait;
+}
+
+void
+thread_event_assert_invariants_debug(tsd_t *tsd) {
+	uint64_t thread_allocated = thread_allocated_get(tsd);
+	uint64_t last_event = thread_allocated_last_event_get(tsd);
+	uint64_t next_event = thread_allocated_next_event_get(tsd);
+	uint64_t next_event_fast = thread_allocated_next_event_fast_get(tsd);
+
+	assert(last_event != next_event);
+	if (next_event <= THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX) {
+		assert(next_event_fast == next_event);
+	} else {
+		assert(next_event_fast == 0U);
+	}
+
+	/* The subtraction is intentionally susceptible to underflow. */
+	uint64_t interval = next_event - last_event;
+
+	/* The subtraction is intentionally susceptible to underflow. */
+	assert(thread_allocated - last_event < interval);
+
+	uint64_t min_wait = thread_allocated_next_event_compute(tsd);
+
+	/*
+	 * next_event should have been pushed up only except when no event is
+	 * on and the TSD is just initialized.  The last_event == 0U guard
+	 * below is stronger than needed, but having an exactly accurate guard
+	 * is more complicated to implement.
+	 */
+	assert((!thread_event_active && last_event == 0U) ||
+	    interval == min_wait ||
+	    (interval < min_wait && interval == THREAD_EVENT_MAX_INTERVAL));
+}
+
+static void
+thread_event_adjust_thresholds_helper(tsd_t *tsd, uint64_t wait) {
+	assert(wait <= THREAD_EVENT_MAX_START_WAIT);
+	uint64_t next_event = thread_allocated_last_event_get(tsd) + (wait <=
+	    THREAD_EVENT_MAX_INTERVAL ? wait : THREAD_EVENT_MAX_INTERVAL);
+	thread_allocated_next_event_set(tsd, next_event);
+	uint64_t next_event_fast = (next_event <=
+	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX) ? next_event : 0U;
+	thread_allocated_next_event_fast_set(tsd, next_event_fast);
+}
+
+static void
+thread_prof_sample_event_handler(tsd_t *tsd) {
+	assert(config_prof && opt_prof);
+	assert(prof_sample_event_wait_get(tsd) == 0U);
+	if (!prof_active_get_unlocked()) {
+		/*
+		 * If prof_active is off, we reset prof_sample_event_wait to be
+		 * the sample interval when it drops to 0, so that there won't
+		 * be excessive routings to the slow path, and that when
+		 * prof_active is turned on later, the counting for sampling
+		 * can immediately resume as normal.
+		 */
+		thread_prof_sample_event_update(tsd,
+		    (uint64_t)(1 << lg_prof_sample));
+	}
+}
+
+static uint64_t
+thread_event_trigger_batch_update(tsd_t *tsd, uint64_t accumbytes,
+    bool allow_event_trigger) {
+	uint64_t wait = THREAD_EVENT_MAX_START_WAIT;
+
+#define E(event, condition)						\
+	if (condition) {						\
+		uint64_t event_wait = event##_event_wait_get(tsd);	\
+		assert(event_wait <= THREAD_EVENT_MAX_START_WAIT);	\
+		if (event_wait > accumbytes) {				\
+			event_wait -= accumbytes;			\
+		} else {						\
+			event_wait = 0U;				\
+			if (!allow_event_trigger) {			\
+				event_wait =				\
+				    THREAD_EVENT_MIN_START_WAIT;	\
+			}						\
+		}							\
+		assert(event_wait <= THREAD_EVENT_MAX_START_WAIT);	\
+		event##_event_wait_set(tsd, event_wait);		\
+		/*							\
+		 * If there is a single event, then the remaining wait	\
+		 * time may become zero, and we rely on either the	\
+		 * event handler or a thread_event_update() call later	\
+		 * to properly set next_event; if there are multiple	\
+		 * events, then	here we can get the minimum remaining	\
+		 * wait time to	the next already set event.		\
+		 */							\
+		if (event_wait > 0U && event_wait < wait) {		\
+			wait = event_wait;				\
+		}							\
+	}
+
+	ITERATE_OVER_ALL_EVENTS
+#undef E
+
+	assert(wait <= THREAD_EVENT_MAX_START_WAIT);
+	return wait;
+}
+
+void
+thread_event_trigger(tsd_t *tsd, bool delay_event) {
+	/* usize has already been added to thread_allocated. */
+	uint64_t thread_allocated_after = thread_allocated_get(tsd);
+
+	/* The subtraction is intentionally susceptible to underflow. */
+	uint64_t accumbytes = thread_allocated_after -
+	    thread_allocated_last_event_get(tsd);
+
+	/* Make sure that accumbytes cannot overflow uint64_t. */
+	cassert(THREAD_EVENT_MAX_INTERVAL <=
+	    UINT64_MAX - SC_LARGE_MAXCLASS + 1);
+
+	thread_allocated_last_event_set(tsd, thread_allocated_after);
+	bool allow_event_trigger = !delay_event && tsd_nominal(tsd) &&
+	    tsd_reentrancy_level_get(tsd) == 0;
+	uint64_t wait = thread_event_trigger_batch_update(tsd, accumbytes,
+	    allow_event_trigger);
+	thread_event_adjust_thresholds_helper(tsd, wait);
+
+	thread_event_assert_invariants(tsd);
+
+#define E(event, condition)						\
+	if (condition && event##_event_wait_get(tsd) == 0U) {		\
+		assert(allow_event_trigger);				\
+		thread_##event##_event_handler(tsd);			\
+	}
+
+	ITERATE_OVER_ALL_EVENTS
+#undef E
+
+	thread_event_assert_invariants(tsd);
+}
+
+void
+thread_event_rollback(tsd_t *tsd, size_t diff) {
+	thread_event_assert_invariants(tsd);
+
+	if (diff == 0U) {
+		return;
+	}
+
+	uint64_t thread_allocated = thread_allocated_get(tsd);
+	/* The subtraction is intentionally susceptible to underflow. */
+	uint64_t thread_allocated_rollback = thread_allocated - diff;
+	thread_allocated_set(tsd, thread_allocated_rollback);
+
+	uint64_t last_event = thread_allocated_last_event_get(tsd);
+	/* Both subtractions are intentionally susceptible to underflow. */
+	if (thread_allocated_rollback - last_event <=
+	    thread_allocated - last_event) {
+		thread_event_assert_invariants(tsd);
+		return;
+	}
+
+	thread_allocated_last_event_set(tsd, thread_allocated_rollback);
+
+	/* The subtraction is intentionally susceptible to underflow. */
+	uint64_t wait_diff = last_event - thread_allocated_rollback;
+	assert(wait_diff <= diff);
+
+#define E(event, condition)						\
+	if (condition) {						\
+		uint64_t event_wait = event##_event_wait_get(tsd);	\
+		assert(event_wait <= THREAD_EVENT_MAX_START_WAIT);	\
+		if (event_wait > 0U) {					\
+			if (wait_diff >					\
+			    THREAD_EVENT_MAX_START_WAIT - event_wait) {	\
+				event_wait =				\
+				    THREAD_EVENT_MAX_START_WAIT;	\
+			} else {					\
+				event_wait += wait_diff;		\
+			}						\
+			assert(event_wait <=				\
+			    THREAD_EVENT_MAX_START_WAIT);		\
+			event##_event_wait_set(tsd, event_wait);	\
+		}							\
+	}
+
+	ITERATE_OVER_ALL_EVENTS
+#undef E
+
+	thread_event_update(tsd);
+}
+
+void
+thread_event_update(tsd_t *tsd) {
+	uint64_t wait = thread_allocated_next_event_compute(tsd);
+	thread_event_adjust_thresholds_helper(tsd, wait);
+
+	uint64_t last_event = thread_allocated_last_event_get(tsd);
+
+	/* Both subtractions are intentionally susceptible to underflow. */
+	if (thread_allocated_get(tsd) - last_event >=
+	    thread_allocated_next_event_get(tsd) - last_event) {
+		thread_event_trigger(tsd, true);
+	} else {
+		thread_event_assert_invariants(tsd);
+	}
+}
+
+void thread_event_boot() {
+#define E(event, condition)						\
+	if (condition) {						\
+		thread_event_active = true;				\
+	}
+
+	ITERATE_OVER_ALL_EVENTS
+#undef E
+}
diff --git a/test/unit/thread_event.c b/test/unit/thread_event.c
new file mode 100644
index 00000000..6817262b
--- /dev/null
+++ b/test/unit/thread_event.c
@@ -0,0 +1,57 @@
+#include "test/jemalloc_test.h"
+
+TEST_BEGIN(test_next_event_fast_roll_back) {
+	tsd_t *tsd = tsd_fetch();
+	thread_allocated_last_event_set(tsd, 0);
+	thread_allocated_set(tsd,
+	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX - 8U);
+	thread_allocated_next_event_set(tsd,
+	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX);
+	thread_allocated_next_event_fast_set(tsd,
+	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX);
+	prof_sample_event_wait_set(tsd,
+	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX);
+	void *p = malloc(16U);
+	assert_ptr_not_null(p, "malloc() failed");
+	free(p);
+}
+TEST_END
+
+TEST_BEGIN(test_next_event_fast_resume) {
+	tsd_t *tsd = tsd_fetch();
+	thread_allocated_last_event_set(tsd, 0);
+	thread_allocated_set(tsd,
+	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX + 8U);
+	thread_allocated_next_event_set(tsd,
+	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX + 16U);
+	thread_allocated_next_event_fast_set(tsd, 0);
+	prof_sample_event_wait_set(tsd,
+	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX + 16U);
+	void *p = malloc(SC_LOOKUP_MAXCLASS);
+	assert_ptr_not_null(p, "malloc() failed");
+	free(p);
+}
+TEST_END
+
+TEST_BEGIN(test_event_rollback) {
+	tsd_t *tsd = tsd_fetch();
+	const uint64_t diff = THREAD_EVENT_MAX_INTERVAL >> 2;
+	size_t count = 10;
+	uint64_t thread_allocated = thread_allocated_get(tsd);
+	while (count-- != 0) {
+		thread_event_rollback(tsd, diff);
+		uint64_t thread_allocated_after = thread_allocated_get(tsd);
+		assert_u64_eq(thread_allocated - thread_allocated_after, diff,
+		    "thread event counters are not properly rolled back");
+		thread_allocated = thread_allocated_after;
+	}
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_next_event_fast_roll_back,
+	    test_next_event_fast_resume,
+	    test_event_rollback);
+}
diff --git a/test/unit/thread_event.sh b/test/unit/thread_event.sh
new file mode 100644
index 00000000..8fcc7d8a
--- /dev/null
+++ b/test/unit/thread_event.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:true,lg_prof_sample:0"
+fi

From 198f02e7972023d10c9e4c4c6ab162738d103707 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 14 Oct 2019 09:35:51 -0700
Subject: [PATCH 1402/2608] Pull prof_accumbytes into thread event handler

---
 include/jemalloc/internal/arena_externs.h     |  2 +-
 include/jemalloc/internal/arena_inlines_a.h   | 11 ---
 .../internal/jemalloc_internal_inlines_b.h    |  2 +-
 include/jemalloc/internal/prof_externs.h      | 14 ++-
 include/jemalloc/internal/prof_inlines_a.h    | 90 +++++--------------
 include/jemalloc/internal/tcache_inlines.h    |  6 --
 include/jemalloc/internal/tcache_structs.h    |  3 +-
 include/jemalloc/internal/tcache_types.h      |  4 +-
 include/jemalloc/internal/thread_event.h      |  3 +-
 include/jemalloc/internal/tsd.h               | 11 ++-
 src/arena.c                                   | 14 +--
 src/jemalloc.c                                |  3 -
 src/large.c                                   |  3 -
 src/prof.c                                    | 83 +++++++++++++++--
 src/tcache.c                                  | 36 +-------
 src/thread_event.c                            | 40 +++++----
 16 files changed, 148 insertions(+), 177 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index a4523ae0..a71f9446 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -49,7 +49,7 @@ void arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
 void arena_reset(tsd_t *tsd, arena_t *arena);
 void arena_destroy(tsd_t *tsd, arena_t *arena);
 void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
-    cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes);
+    cache_bin_t *tbin, szind_t binind);
 void arena_alloc_junk_small(void *ptr, const bin_info_t *bin_info,
     bool zero);
 
diff --git a/include/jemalloc/internal/arena_inlines_a.h b/include/jemalloc/internal/arena_inlines_a.h
index 9abf7f6a..27434c30 100644
--- a/include/jemalloc/internal/arena_inlines_a.h
+++ b/include/jemalloc/internal/arena_inlines_a.h
@@ -21,17 +21,6 @@ arena_internal_get(arena_t *arena) {
 	return atomic_load_zu(&arena->stats.internal, ATOMIC_RELAXED);
 }
 
-static inline bool
-arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) {
-	cassert(config_prof);
-
-	if (likely(prof_interval == 0 || !prof_active_get_unlocked())) {
-		return false;
-	}
-
-	return prof_accum_add(tsdn, &arena->prof_accum, accumbytes);
-}
-
 static inline void
 percpu_arena_update(tsd_t *tsd, unsigned cpu) {
 	assert(have_percpu_arena);
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index 70d6e578..f0b73d02 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -24,7 +24,7 @@ arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {
 		if (tcache_available(tsd)) {
 			tcache_t *tcache = tcache_get(tsd);
 			if (tcache->arena != NULL) {
-				/* See comments in tcache_data_init().*/
+				/* See comments in tsd_tcache_data_init().*/
 				assert(tcache->arena ==
 				    arena_get(tsd_tsdn(tsd), 0, false));
 				if (tcache->arena != ret) {
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 7befad64..94fbd752 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -33,13 +33,7 @@ extern bool	prof_active;
 /* Accessed via prof_gdump_[gs]et{_unlocked,}(). */
 extern bool	prof_gdump_val;
 
-/*
- * Profile dump interval, measured in bytes allocated.  Each arena triggers a
- * profile dump when it reaches this threshold.  The effect is that the
- * interval between profile dumps averages prof_interval, though the actual
- * interval between dumps will tend to be sporadic, and the interval will be a
- * maximum of approximately (prof_interval * narenas).
- */
+/* Profile dump interval, measured in bytes allocated. */
 extern uint64_t	prof_interval;
 
 /*
@@ -50,6 +44,10 @@ extern size_t	lg_prof_sample;
 
 extern bool	prof_booted;
 
+/* Functions only accessed in prof_inlines_a.h */
+bool prof_idump_accum_impl(tsdn_t *tsdn, uint64_t accumbytes);
+void prof_idump_rollback_impl(tsdn_t *tsdn, size_t usize);
+
 void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
 void prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
     prof_tctx_t *tctx);
@@ -73,7 +71,7 @@ void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
 #endif
 int prof_getpid(void);
 void prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind);
-bool prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum);
+bool prof_accum_init(tsdn_t *tsdn);
 void prof_idump(tsdn_t *tsdn);
 bool prof_mdump(tsd_t *tsd, const char *filename);
 void prof_gdump(tsdn_t *tsdn);
diff --git a/include/jemalloc/internal/prof_inlines_a.h b/include/jemalloc/internal/prof_inlines_a.h
index 6716d2f4..61773a2b 100644
--- a/include/jemalloc/internal/prof_inlines_a.h
+++ b/include/jemalloc/internal/prof_inlines_a.h
@@ -3,74 +3,6 @@
 
 #include "jemalloc/internal/mutex.h"
 
-static inline bool
-prof_accum_add(tsdn_t *tsdn, prof_accum_t *prof_accum,
-    uint64_t accumbytes) {
-	cassert(config_prof);
-
-	bool overflow;
-	uint64_t a0, a1;
-
-	/*
-	 * If the application allocates fast enough (and/or if idump is slow
-	 * enough), extreme overflow here (a1 >= prof_interval * 2) can cause
-	 * idump trigger coalescing.  This is an intentional mechanism that
-	 * avoids rate-limiting allocation.
-	 */
-#ifdef JEMALLOC_ATOMIC_U64
-	a0 = atomic_load_u64(&prof_accum->accumbytes, ATOMIC_RELAXED);
-	do {
-		a1 = a0 + accumbytes;
-		assert(a1 >= a0);
-		overflow = (a1 >= prof_interval);
-		if (overflow) {
-			a1 %= prof_interval;
-		}
-	} while (!atomic_compare_exchange_weak_u64(&prof_accum->accumbytes, &a0,
-	    a1, ATOMIC_RELAXED, ATOMIC_RELAXED));
-#else
-	malloc_mutex_lock(tsdn, &prof_accum->mtx);
-	a0 = prof_accum->accumbytes;
-	a1 = a0 + accumbytes;
-	overflow = (a1 >= prof_interval);
-	if (overflow) {
-		a1 %= prof_interval;
-	}
-	prof_accum->accumbytes = a1;
-	malloc_mutex_unlock(tsdn, &prof_accum->mtx);
-#endif
-	return overflow;
-}
-
-static inline void
-prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum,
-    size_t usize) {
-	cassert(config_prof);
-
-	/*
-	 * Cancel out as much of the excessive prof_accumbytes increase as
-	 * possible without underflowing.  Interval-triggered dumps occur
-	 * slightly more often than intended as a result of incomplete
-	 * canceling.
-	 */
-	uint64_t a0, a1;
-#ifdef JEMALLOC_ATOMIC_U64
-	a0 = atomic_load_u64(&prof_accum->accumbytes, ATOMIC_RELAXED);
-	do {
-		a1 = (a0 >= SC_LARGE_MINCLASS - usize)
-		    ? a0 - (SC_LARGE_MINCLASS - usize) : 0;
-	} while (!atomic_compare_exchange_weak_u64(&prof_accum->accumbytes, &a0,
-	    a1, ATOMIC_RELAXED, ATOMIC_RELAXED));
-#else
-	malloc_mutex_lock(tsdn, &prof_accum->mtx);
-	a0 = prof_accum->accumbytes;
-	a1 = (a0 >= SC_LARGE_MINCLASS - usize)
-	    ?  a0 - (SC_LARGE_MINCLASS - usize) : 0;
-	prof_accum->accumbytes = a1;
-	malloc_mutex_unlock(tsdn, &prof_accum->mtx);
-#endif
-}
-
 JEMALLOC_ALWAYS_INLINE void
 prof_active_assert() {
 	cassert(config_prof);
@@ -93,4 +25,26 @@ prof_active_get_unlocked(void) {
 	return prof_active;
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+prof_idump_accum(tsdn_t *tsdn, uint64_t accumbytes) {
+	cassert(config_prof);
+
+	if (prof_interval == 0 || !prof_active_get_unlocked()) {
+		return false;
+	}
+
+	return prof_idump_accum_impl(tsdn, accumbytes);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_idump_rollback(tsdn_t *tsdn, size_t usize) {
+	cassert(config_prof);
+
+	if (prof_interval == 0 || !prof_active_get_unlocked()) {
+		return;
+	}
+
+	prof_idump_rollback_impl(tsdn, usize);
+}
+
 #endif /* JEMALLOC_INTERNAL_PROF_INLINES_A_H */
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 8988ae9d..85c6cc46 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -93,9 +93,6 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
 	if (config_stats) {
 		bin->tstats.nrequests++;
 	}
-	if (config_prof) {
-		tcache->prof_accumbytes += usize;
-	}
 	tcache_event(tsd, tcache);
 	return ret;
 }
@@ -151,9 +148,6 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 		if (config_stats) {
 			bin->tstats.nrequests++;
 		}
-		if (config_prof) {
-			tcache->prof_accumbytes += usize;
-		}
 	}
 
 	tcache_event(tsd, tcache);
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index 008b1f73..98d3ef70 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -16,10 +16,9 @@ struct tcache_s {
 	 * together at the start of this struct.
 	 */
 
-	/* Cleared after arena_prof_accum(). */
-	uint64_t	prof_accumbytes;
 	/* Drives incremental GC. */
 	ticker_t	gc_ticker;
+
 	/*
 	 * The pointer stacks associated with bins follow as a contiguous array.
 	 * During tcache initialization, the avail pointer in each element of
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index dce69382..60261fc0 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -47,8 +47,8 @@ typedef struct tcaches_s tcaches_t;
 #define TCACHE_GC_INCR							\
     ((TCACHE_GC_SWEEP / SC_NBINS) + ((TCACHE_GC_SWEEP / SC_NBINS == 0) ? 0 : 1))
 
-/* Used in TSD static initializer only. Real init in tcache_data_init(). */
-#define TCACHE_ZERO_INITIALIZER {0}
+/* Used in TSD static initializer only. Real init in tsd_tcache_data_init(). */
+#define TCACHE_ZERO_INITIALIZER {{0}}
 
 /* Used in TSD static initializer only. Will be initialized to opt_tcache. */
 #define TCACHE_ENABLED_ZERO_INITIALIZER false
diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index 08678b74..6aa334fc 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -44,7 +44,8 @@ void thread_event_boot();
     C(thread_allocated_next_event_fast)					\
     C(thread_allocated_last_event)					\
     C(thread_allocated_next_event)					\
-    ITERATE_OVER_ALL_EVENTS
+    ITERATE_OVER_ALL_EVENTS						\
+    C(prof_sample_last_event)
 
 /* Getters directly wrap TSD getters. */
 #define C(counter)							\
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 14ad53d7..60500df7 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -30,6 +30,7 @@
  * l: thread_allocated_last_event
  * j: thread_allocated_next_event
  * w: prof_sample_event_wait (config_prof)
+ * x: prof_sample_last_event (config_prof)
  * p: prof_tdata (config_prof)
  * v: offset_state
  * i: iarena
@@ -45,11 +46,11 @@
  * |----------------------------  2nd cacheline  ----------------------------|
  * | [c * 64  ........ ........ ........ ........ ........ ........ .......] |
  * |----------------------------  3nd cacheline  ----------------------------|
- * | [c * 32  ........ ........ .......] llllllll jjjjjjjj wwwwwwww pppppppp |
+ * | [c * 32  ........ ........ .......] llllllll jjjjjjjj wwwwwwww xxxxxxxx |
  * +----------------------------  4th cacheline  ----------------------------+
- * | vvvvvvvv iiiiiiii aaaaaaaa oooooooo [b...... ........ ........ ........ |
+ * | pppppppp vvvvvvvv iiiiiiii aaaaaaaa oooooooo [b...... ........ ........ |
  * +----------------------------  5th cacheline  ----------------------------+
- * | ..b][t.. ........ ........ ........ ........ ........ ........ ........ |
+ * | ........ ..b][t.. ........ ........ ........ ........ ........ ........ |
  * +-------------------------------------------------------------------------+
  * Note: the entire tcache is embedded into TSD and spans multiple cachelines.
  *
@@ -83,6 +84,7 @@ typedef void (*test_callback_t)(int *);
     O(thread_allocated_last_event,	uint64_t,	uint64_t)	\
     O(thread_allocated_next_event,	uint64_t,	uint64_t)	\
     O(prof_sample_event_wait,	uint64_t,		uint64_t)	\
+    O(prof_sample_last_event,	uint64_t,		uint64_t)	\
     O(prof_tdata,		prof_tdata_t *,		prof_tdata_t *)	\
     O(offset_state,		uint64_t,		uint64_t)	\
     O(iarena,			arena_t *,		arena_t *)	\
@@ -109,9 +111,10 @@ typedef void (*test_callback_t)(int *);
     /* thread_allocated_next_event_fast */ THREAD_EVENT_MIN_START_WAIT,	\
     /* thread_deallocated */	0,					\
     /* rtree_ctx */		RTREE_CTX_ZERO_INITIALIZER,		\
-    /* thread_allocated_last_event */	0,			\
+    /* thread_allocated_last_event */	0,				\
     /* thread_allocated_next_event */	THREAD_EVENT_MIN_START_WAIT,	\
     /* prof_sample_event_wait */	THREAD_EVENT_MIN_START_WAIT,	\
+    /* prof_sample_last_event */	0,				\
     /* prof_tdata */		NULL,					\
     /* offset_state */		0,					\
     /* iarena */		NULL,					\
diff --git a/src/arena.c b/src/arena.c
index e096f3a6..a60a6843 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1378,13 +1378,10 @@ arena_bin_choose_lock(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 
 void
 arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
-    cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes) {
+    cache_bin_t *tbin, szind_t binind) {
 	unsigned i, nfill, cnt;
 
 	assert(cache_bin_ncached_get(tbin, binind) == 0);
-	if (config_prof && arena_prof_accum(tsdn, arena, prof_accumbytes)) {
-		prof_idump(tsdn);
-	}
 	tcache->bin_refilled[binind] = true;
 
 	unsigned binshard;
@@ -1484,10 +1481,8 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
 		bin->stats.nrequests++;
 		bin->stats.curregs++;
 	}
+
 	malloc_mutex_unlock(tsdn, &bin->lock);
-	if (config_prof && arena_prof_accum(tsdn, arena, usize)) {
-		prof_idump(tsdn);
-	}
 
 	if (!zero) {
 		if (config_fill) {
@@ -1565,14 +1560,13 @@ arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize) {
 
 	extent_t *extent = rtree_extent_read(tsdn, &extents_rtree, rtree_ctx,
 	    (uintptr_t)ptr, true);
-	arena_t *arena = arena_get_from_extent(extent);
 
 	szind_t szind = sz_size2index(usize);
 	extent_szind_set(extent, szind);
 	rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr,
 	    szind, false);
 
-	prof_accum_cancel(tsdn, &arena->prof_accum, usize);
+	prof_idump_rollback(tsdn, usize);
 
 	assert(isalloc(tsdn, ptr) == usize);
 }
@@ -1982,7 +1976,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	}
 
 	if (config_prof) {
-		if (prof_accum_init(tsdn, &arena->prof_accum)) {
+		if (prof_accum_init(tsdn)) {
 			goto label_error;
 		}
 	}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 63a1e302..264b3f3f 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2386,9 +2386,6 @@ je_malloc(size_t size) {
 		if (config_stats) {
 			bin->tstats.nrequests++;
 		}
-		if (config_prof) {
-			tcache->prof_accumbytes += usize;
-		}
 
 		LOG("core.malloc.exit", "result: %p", ret);
 
diff --git a/src/large.c b/src/large.c
index 13d8e56c..8aaa3ce2 100644
--- a/src/large.c
+++ b/src/large.c
@@ -56,9 +56,6 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 		extent_list_append(&arena->large, extent);
 		malloc_mutex_unlock(tsdn, &arena->large_mtx);
 	}
-	if (config_prof && arena_prof_accum(tsdn, arena, usize)) {
-		prof_idump(tsdn);
-	}
 
 	if (zero) {
 		assert(is_zeroed);
diff --git a/src/prof.c b/src/prof.c
index 7e219dc3..5360662b 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -45,6 +45,9 @@ bool		opt_prof_leak = false;
 bool		opt_prof_accum = false;
 char		opt_prof_prefix[PROF_DUMP_FILENAME_LEN];
 
+/* Accessed via prof_idump_[accum/rollback](). */
+static prof_accum_t	prof_idump_accumulated;
+
 /*
  * Initialized as opt_prof_active, and accessed via
  * prof_active_[gs]et{_unlocked,}().
@@ -586,21 +589,91 @@ prof_fdump(void) {
 }
 
 bool
-prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum) {
+prof_accum_init(tsdn_t *tsdn) {
 	cassert(config_prof);
 
 #ifndef JEMALLOC_ATOMIC_U64
-	if (malloc_mutex_init(&prof_accum->mtx, "prof_accum",
+	if (malloc_mutex_init(&prof_idump_accumulated.mtx, "prof_accum",
 	    WITNESS_RANK_PROF_ACCUM, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
-	prof_accum->accumbytes = 0;
+	prof_idump_accumulated.accumbytes = 0;
 #else
-	atomic_store_u64(&prof_accum->accumbytes, 0, ATOMIC_RELAXED);
+	atomic_store_u64(&prof_idump_accumulated.accumbytes, 0,
+	    ATOMIC_RELAXED);
 #endif
 	return false;
 }
 
+bool
+prof_idump_accum_impl(tsdn_t *tsdn, uint64_t accumbytes) {
+	cassert(config_prof);
+
+	bool overflow;
+	uint64_t a0, a1;
+
+	/*
+	 * If the application allocates fast enough (and/or if idump is slow
+	 * enough), extreme overflow here (a1 >= prof_interval * 2) can cause
+	 * idump trigger coalescing.  This is an intentional mechanism that
+	 * avoids rate-limiting allocation.
+	 */
+#ifdef JEMALLOC_ATOMIC_U64
+	a0 = atomic_load_u64(&prof_idump_accumulated.accumbytes,
+	    ATOMIC_RELAXED);
+	do {
+		a1 = a0 + accumbytes;
+		assert(a1 >= a0);
+		overflow = (a1 >= prof_interval);
+		if (overflow) {
+			a1 %= prof_interval;
+		}
+	} while (!atomic_compare_exchange_weak_u64(
+	    &prof_idump_accumulated.accumbytes, &a0, a1, ATOMIC_RELAXED,
+	    ATOMIC_RELAXED));
+#else
+	malloc_mutex_lock(tsdn, &prof_idump_accumulated.mtx);
+	a0 = prof_idump_accumulated.accumbytes;
+	a1 = a0 + accumbytes;
+	overflow = (a1 >= prof_interval);
+	if (overflow) {
+		a1 %= prof_interval;
+	}
+	prof_idump_accumulated.accumbytes = a1;
+	malloc_mutex_unlock(tsdn, &prof_idump_accumulated.mtx);
+#endif
+	return overflow;
+}
+
+void
+prof_idump_rollback_impl(tsdn_t *tsdn, size_t usize) {
+	cassert(config_prof);
+
+	/*
+	 * Cancel out as much of the excessive accumbytes increase as possible
+	 * without underflowing.  Interval-triggered dumps occur slightly more
+	 * often than intended as a result of incomplete canceling.
+	 */
+	uint64_t a0, a1;
+#ifdef JEMALLOC_ATOMIC_U64
+	a0 = atomic_load_u64(&prof_idump_accumulated.accumbytes,
+	    ATOMIC_RELAXED);
+	do {
+		a1 = (a0 >= SC_LARGE_MINCLASS - usize)
+		    ? a0 - (SC_LARGE_MINCLASS - usize) : 0;
+	} while (!atomic_compare_exchange_weak_u64(
+	    &prof_idump_accumulated.accumbytes, &a0, a1, ATOMIC_RELAXED,
+	    ATOMIC_RELAXED));
+#else
+	malloc_mutex_lock(tsdn, &prof_idump_accumulated.mtx);
+	a0 = prof_idump_accumulated.accumbytes;
+	a1 = (a0 >= SC_LARGE_MINCLASS - usize)
+	    ?  a0 - (SC_LARGE_MINCLASS - usize) : 0;
+	prof_idump_accumulated.accumbytes = a1;
+	malloc_mutex_unlock(tsdn, &prof_idump_accumulated.mtx);
+#endif
+}
+
 bool
 prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix) {
 	cassert(config_prof);
@@ -641,7 +714,7 @@ prof_idump(tsdn_t *tsdn) {
 		return;
 	}
 
-	tdata = prof_tdata_get(tsd, false);
+	tdata = prof_tdata_get(tsd, true);
 	if (tdata == NULL) {
 		return;
 	}
diff --git a/src/tcache.c b/src/tcache.c
index e17b67a3..7758c4f2 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -106,11 +106,7 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 	void *ret;
 
 	assert(tcache->arena != NULL);
-	arena_tcache_fill_small(tsdn, arena, tcache, tbin, binind,
-	    config_prof ? tcache->prof_accumbytes : 0);
-	if (config_prof) {
-		tcache->prof_accumbytes = 0;
-	}
+	arena_tcache_fill_small(tsdn, arena, tcache, tbin, binind);
 	ret = cache_bin_alloc_easy(tbin, tcache_success, binind);
 
 	return ret;
@@ -181,14 +177,6 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 		assert(binshard < bin_infos[binind].n_shards);
 		bin_t *bin = &bin_arena->bins[binind].bin_shards[binshard];
 
-		if (config_prof && bin_arena == arena) {
-			if (arena_prof_accum(tsd_tsdn(tsd), arena,
-			    tcache->prof_accumbytes)) {
-				prof_idump(tsd_tsdn(tsd));
-			}
-			tcache->prof_accumbytes = 0;
-		}
-
 		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 		if (config_stats && bin_arena == arena && !merged_stats) {
 			merged_stats = true;
@@ -274,11 +262,6 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
 		unsigned locked_arena_ind = extent_arena_ind_get(extent);
 		arena_t *locked_arena = arena_get(tsd_tsdn(tsd),
 		    locked_arena_ind, false);
-		bool idump;
-
-		if (config_prof) {
-			idump = false;
-		}
 
 		bool lock_large = !arena_is_auto(locked_arena);
 		if (lock_large) {
@@ -295,11 +278,6 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
 		}
 		if ((config_prof || config_stats) &&
 		    (locked_arena == tcache_arena)) {
-			if (config_prof) {
-				idump = arena_prof_accum(tsd_tsdn(tsd),
-				    tcache_arena, tcache->prof_accumbytes);
-				tcache->prof_accumbytes = 0;
-			}
 			if (config_stats) {
 				merged_stats = true;
 				arena_stats_large_flush_nrequests_add(
@@ -332,9 +310,6 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
 				ndeferred++;
 			}
 		}
-		if (config_prof && idump) {
-			prof_idump(tsd_tsdn(tsd));
-		}
 		arena_decay_ticks(tsd_tsdn(tsd), locked_arena, nflush -
 		    ndeferred);
 		nflush = ndeferred;
@@ -462,7 +437,6 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
 	assert(!tcache_bin_lowbits_overflowable(avail_stack));
 
 	memset(&tcache->link, 0, sizeof(ql_elm(tcache_t)));
-	tcache->prof_accumbytes = 0;
 	tcache->next_gc_bin = 0;
 	tcache->arena = NULL;
 
@@ -590,14 +564,6 @@ tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
 			assert(tbin->tstats.nrequests == 0);
 		}
 	}
-
-	if (config_prof && tcache->prof_accumbytes > 0) {
-		if (arena_prof_accum(tsd_tsdn(tsd), tcache->arena,
-		    tcache->prof_accumbytes)) {
-			prof_idump(tsd_tsdn(tsd));
-		}
-		tcache->prof_accumbytes = 0;
-	}
 }
 
 void
diff --git a/src/thread_event.c b/src/thread_event.c
index c6542f46..312dff26 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -18,6 +18,29 @@ static void thread_##event##_event_handler(tsd_t *tsd);
 ITERATE_OVER_ALL_EVENTS
 #undef E
 
+static void
+thread_prof_sample_event_handler(tsd_t *tsd) {
+	assert(config_prof && opt_prof);
+	assert(prof_sample_event_wait_get(tsd) == 0U);
+	uint64_t last_event = thread_allocated_last_event_get(tsd);
+	uint64_t last_sample_event = prof_sample_last_event_get(tsd);
+	prof_sample_last_event_set(tsd, last_event);
+	if (prof_idump_accum(tsd_tsdn(tsd), last_event - last_sample_event)) {
+		prof_idump(tsd_tsdn(tsd));
+	}
+	if (!prof_active_get_unlocked()) {
+		/*
+		 * If prof_active is off, we reset prof_sample_event_wait to be
+		 * the sample interval when it drops to 0, so that there won't
+		 * be excessive routings to the slow path, and that when
+		 * prof_active is turned on later, the counting for sampling
+		 * can immediately resume as normal.
+		 */
+		thread_prof_sample_event_update(tsd,
+		    (uint64_t)(1 << lg_prof_sample));
+	}
+}
+
 static uint64_t
 thread_allocated_next_event_compute(tsd_t *tsd) {
 	uint64_t wait = THREAD_EVENT_MAX_START_WAIT;
@@ -86,23 +109,6 @@ thread_event_adjust_thresholds_helper(tsd_t *tsd, uint64_t wait) {
 	thread_allocated_next_event_fast_set(tsd, next_event_fast);
 }
 
-static void
-thread_prof_sample_event_handler(tsd_t *tsd) {
-	assert(config_prof && opt_prof);
-	assert(prof_sample_event_wait_get(tsd) == 0U);
-	if (!prof_active_get_unlocked()) {
-		/*
-		 * If prof_active is off, we reset prof_sample_event_wait to be
-		 * the sample interval when it drops to 0, so that there won't
-		 * be excessive routings to the slow path, and that when
-		 * prof_active is turned on later, the counting for sampling
-		 * can immediately resume as normal.
-		 */
-		thread_prof_sample_event_update(tsd,
-		    (uint64_t)(1 << lg_prof_sample));
-	}
-}
-
 static uint64_t
 thread_event_trigger_batch_update(tsd_t *tsd, uint64_t accumbytes,
     bool allow_event_trigger) {

From 97f93fa0f2d7343d308bbcd5cf551492d5652d0a Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 24 Oct 2019 16:41:45 -0700
Subject: [PATCH 1403/2608] Pull tcache GC events into thread event handler

---
 include/jemalloc/internal/tcache_inlines.h |  2 --
 include/jemalloc/internal/tcache_types.h   |  5 ++++-
 include/jemalloc/internal/thread_event.h   |  1 +
 include/jemalloc/internal/tsd.h            |  9 ++++++---
 src/jemalloc.c                             |  4 ----
 src/thread_event.c                         | 11 +++++++++++
 src/tsd.c                                  |  4 ++++
 test/unit/thread_event.c                   | 10 ++++++++--
 8 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 85c6cc46..40c4286c 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -93,7 +93,6 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
 	if (config_stats) {
 		bin->tstats.nrequests++;
 	}
-	tcache_event(tsd, tcache);
 	return ret;
 }
 
@@ -150,7 +149,6 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 		}
 	}
 
-	tcache_event(tsd, tcache);
 	return ret;
 }
 
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index 60261fc0..9fd39263 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -43,10 +43,13 @@ typedef struct tcaches_s tcaches_t;
  */
 #define TCACHE_GC_SWEEP			8192
 
-/* Number of tcache allocation/deallocation events between incremental GCs. */
+/* Number of tcache deallocation events between incremental GCs. */
 #define TCACHE_GC_INCR							\
     ((TCACHE_GC_SWEEP / SC_NBINS) + ((TCACHE_GC_SWEEP / SC_NBINS == 0) ? 0 : 1))
 
+/* Number of allocation bytes between tcache incremental GCs. */
+#define TCACHE_GC_INCR_BYTES 65536U
+
 /* Used in TSD static initializer only. Real init in tsd_tcache_data_init(). */
 #define TCACHE_ZERO_INITIALIZER {{0}}
 
diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index 6aa334fc..3da9f0a6 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -33,6 +33,7 @@ void thread_event_boot();
  *  E(event,		(condition))
  */
 #define ITERATE_OVER_ALL_EVENTS						\
+    E(tcache_gc,	(TCACHE_GC_INCR_BYTES > 0))			\
     E(prof_sample,	(config_prof && opt_prof))
 
 #define E(event, condition)						\
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 60500df7..17bfc886 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -29,6 +29,7 @@
  * x: narenas_tdata
  * l: thread_allocated_last_event
  * j: thread_allocated_next_event
+ * g: tcache_gc_event_wait
  * w: prof_sample_event_wait (config_prof)
  * x: prof_sample_last_event (config_prof)
  * p: prof_tdata (config_prof)
@@ -46,11 +47,11 @@
  * |----------------------------  2nd cacheline  ----------------------------|
  * | [c * 64  ........ ........ ........ ........ ........ ........ .......] |
  * |----------------------------  3nd cacheline  ----------------------------|
- * | [c * 32  ........ ........ .......] llllllll jjjjjjjj wwwwwwww xxxxxxxx |
+ * | [c * 32  ........ ........ .......] llllllll jjjjjjjj gggggggg wwwwwwww |
  * +----------------------------  4th cacheline  ----------------------------+
- * | pppppppp vvvvvvvv iiiiiiii aaaaaaaa oooooooo [b...... ........ ........ |
+ * | xxxxxxxx pppppppp vvvvvvvv iiiiiiii aaaaaaaa oooooooo [b...... ........ |
  * +----------------------------  5th cacheline  ----------------------------+
- * | ........ ..b][t.. ........ ........ ........ ........ ........ ........ |
+ * | ........ ........ ..b][t.. ........ ........ ........ ........ ........ |
  * +-------------------------------------------------------------------------+
  * Note: the entire tcache is embedded into TSD and spans multiple cachelines.
  *
@@ -83,6 +84,7 @@ typedef void (*test_callback_t)(int *);
     O(rtree_ctx,		rtree_ctx_t,		rtree_ctx_t)	\
     O(thread_allocated_last_event,	uint64_t,	uint64_t)	\
     O(thread_allocated_next_event,	uint64_t,	uint64_t)	\
+    O(tcache_gc_event_wait,	uint64_t,		uint64_t)	\
     O(prof_sample_event_wait,	uint64_t,		uint64_t)	\
     O(prof_sample_last_event,	uint64_t,		uint64_t)	\
     O(prof_tdata,		prof_tdata_t *,		prof_tdata_t *)	\
@@ -113,6 +115,7 @@ typedef void (*test_callback_t)(int *);
     /* rtree_ctx */		RTREE_CTX_ZERO_INITIALIZER,		\
     /* thread_allocated_last_event */	0,				\
     /* thread_allocated_next_event */	THREAD_EVENT_MIN_START_WAIT,	\
+    /* tcache_gc_event_wait */		THREAD_EVENT_MIN_START_WAIT,	\
     /* prof_sample_event_wait */	THREAD_EVENT_MIN_START_WAIT,	\
     /* prof_sample_last_event */	0,				\
     /* prof_tdata */		NULL,					\
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 264b3f3f..10735121 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2350,10 +2350,6 @@ je_malloc(size_t size) {
 
 	tcache_t *tcache = tsd_tcachep_get(tsd);
 
-	if (unlikely(ticker_trytick(&tcache->gc_ticker))) {
-		return malloc_default(size);
-	}
-
 	szind_t ind = sz_size2index_lookup(size);
 	/*
 	 * The thread_allocated counter in tsd serves as a general purpose
diff --git a/src/thread_event.c b/src/thread_event.c
index 312dff26..33d669aa 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -18,6 +18,17 @@ static void thread_##event##_event_handler(tsd_t *tsd);
 ITERATE_OVER_ALL_EVENTS
 #undef E
 
+static void
+thread_tcache_gc_event_handler(tsd_t *tsd) {
+	assert(TCACHE_GC_INCR_BYTES > 0);
+	assert(tcache_gc_event_wait_get(tsd) == 0U);
+	thread_tcache_gc_event_update(tsd, TCACHE_GC_INCR_BYTES);
+	tcache_t *tcache = tcache_get(tsd);
+	if (tcache != NULL) {
+		tcache_event_hard(tsd, tcache);
+	}
+}
+
 static void
 thread_prof_sample_event_handler(tsd_t *tsd) {
 	assert(config_prof && opt_prof);
diff --git a/src/tsd.c b/src/tsd.c
index a31f6b96..3fa43d30 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -233,6 +233,10 @@ tsd_data_init(tsd_t *tsd) {
 	*tsd_offset_statep_get(tsd) = config_debug ? 0 :
 	    (uint64_t)(uintptr_t)tsd;
 
+	if (TCACHE_GC_INCR_BYTES > 0) {
+		thread_tcache_gc_event_update(tsd, TCACHE_GC_INCR_BYTES);
+	}
+
 	return tsd_tcache_enabled_data_init(tsd);
 }
 
diff --git a/test/unit/thread_event.c b/test/unit/thread_event.c
index 6817262b..cf5b2e59 100644
--- a/test/unit/thread_event.c
+++ b/test/unit/thread_event.c
@@ -9,8 +9,11 @@ TEST_BEGIN(test_next_event_fast_roll_back) {
 	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX);
 	thread_allocated_next_event_fast_set(tsd,
 	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX);
-	prof_sample_event_wait_set(tsd,
+#define E(event, condition)						\
+	event##_event_wait_set(tsd,					\
 	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX);
+	ITERATE_OVER_ALL_EVENTS
+#undef E
 	void *p = malloc(16U);
 	assert_ptr_not_null(p, "malloc() failed");
 	free(p);
@@ -25,8 +28,11 @@ TEST_BEGIN(test_next_event_fast_resume) {
 	thread_allocated_next_event_set(tsd,
 	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX + 16U);
 	thread_allocated_next_event_fast_set(tsd, 0);
-	prof_sample_event_wait_set(tsd,
+#define E(event, condition)						\
+	event##_event_wait_set(tsd,					\
 	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX + 16U);
+	ITERATE_OVER_ALL_EVENTS
+#undef E
 	void *p = malloc(SC_LOOKUP_MAXCLASS);
 	assert_ptr_not_null(p, "malloc() failed");
 	free(p);

From 43f0ce92d881f945da54a498cadc654ddb9403a1 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 1 Nov 2019 14:11:59 -0700
Subject: [PATCH 1404/2608] Define general purpose tsd_thread_event_init()

---
 include/jemalloc/internal/thread_event.h |  1 +
 src/thread_event.c                       | 29 ++++++++++++++++++++++++
 src/tsd.c                                |  4 +---
 3 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index 3da9f0a6..8a05eaed 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -27,6 +27,7 @@ void thread_event_trigger(tsd_t *tsd, bool delay_event);
 void thread_event_rollback(tsd_t *tsd, size_t diff);
 void thread_event_update(tsd_t *tsd);
 void thread_event_boot();
+void tsd_thread_event_init(tsd_t *tsd);
 
 /*
  * List of all events, in the following format:
diff --git a/src/thread_event.c b/src/thread_event.c
index 33d669aa..f27a37aa 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -11,6 +11,13 @@
  */
 static bool thread_event_active = false;
 
+/* TSD event init function signatures. */
+#define E(event, condition)						\
+static void tsd_thread_##event##_event_init(tsd_t *tsd);
+
+ITERATE_OVER_ALL_EVENTS
+#undef E
+
 /* Event handler function signatures. */
 #define E(event, condition)						\
 static void thread_##event##_event_handler(tsd_t *tsd);
@@ -18,6 +25,18 @@ static void thread_##event##_event_handler(tsd_t *tsd);
 ITERATE_OVER_ALL_EVENTS
 #undef E
 
+static void
+tsd_thread_tcache_gc_event_init(tsd_t *tsd) {
+	assert(TCACHE_GC_INCR_BYTES > 0);
+	thread_tcache_gc_event_update(tsd, TCACHE_GC_INCR_BYTES);
+}
+
+static void
+tsd_thread_prof_sample_event_init(tsd_t *tsd) {
+	assert(config_prof && opt_prof);
+	/* Do not set sample interval until the first allocation. */
+}
+
 static void
 thread_tcache_gc_event_handler(tsd_t *tsd) {
 	assert(TCACHE_GC_INCR_BYTES > 0);
@@ -270,3 +289,13 @@ void thread_event_boot() {
 	ITERATE_OVER_ALL_EVENTS
 #undef E
 }
+
+void tsd_thread_event_init(tsd_t *tsd) {
+#define E(event, condition)						\
+	if (condition) {						\
+		tsd_thread_##event##_event_init(tsd);			\
+	}
+
+	ITERATE_OVER_ALL_EVENTS
+#undef E
+}
diff --git a/src/tsd.c b/src/tsd.c
index 3fa43d30..bb40af14 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -233,9 +233,7 @@ tsd_data_init(tsd_t *tsd) {
 	*tsd_offset_statep_get(tsd) = config_debug ? 0 :
 	    (uint64_t)(uintptr_t)tsd;
 
-	if (TCACHE_GC_INCR_BYTES > 0) {
-		thread_tcache_gc_event_update(tsd, TCACHE_GC_INCR_BYTES);
-	}
+	tsd_thread_event_init(tsd);
 
 	return tsd_tcache_enabled_data_init(tsd);
 }

From a8b578d538adced7506aec1179379eb541c0198d Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 5 Nov 2019 06:46:52 -0800
Subject: [PATCH 1405/2608] Remove mallctl test for zero_realloc

---
 test/unit/mallctl.c | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 4c0830f2..ebbaed7d 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -178,6 +178,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(size_t, lg_extent_max_active_fit, always);
 	TEST_MALLCTL_OPT(size_t, lg_tcache_max, always);
 	TEST_MALLCTL_OPT(const char *, thp, always);
+	TEST_MALLCTL_OPT(const char *, zero_realloc, always);
 	TEST_MALLCTL_OPT(bool, prof, prof);
 	TEST_MALLCTL_OPT(const char *, prof_prefix, prof);
 	TEST_MALLCTL_OPT(bool, prof_active, prof);
@@ -880,16 +881,6 @@ TEST_BEGIN(test_hooks_exhaustion) {
 }
 TEST_END
 
-TEST_BEGIN(test_zero_realloc) {
-	const char *val;
-	size_t sz = sizeof(val);
-	int err = mallctl("opt.zero_realloc", &val, &sz, NULL, 0);
-	assert_d_eq(err, 0, "Unexpected mallctl result");
-	assert_str_eq(val, "strict",
-	    "Unexpected default zero_realloc_beahvior");
-}
-TEST_END
-
 int
 main(void) {
 	return test(
@@ -921,6 +912,5 @@ main(void) {
 	    test_prof_active,
 	    test_stats_arenas,
 	    test_hooks,
-	    test_hooks_exhaustion,
-	    test_zero_realloc);
+	    test_hooks_exhaustion);
 }

From d01b425e5d1e1ed3d7f7c5571002681469acf601 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Tue, 29 Oct 2019 13:03:41 -0700
Subject: [PATCH 1406/2608] Add -Wimplicit-fallthrough checks if supported

Clang since r369414 (clang-10) can now check -Wimplicit-fallthrough for
C code, and use the GNU C style attribute to denote fallthrough.

Move the test from header only to autoconf. The previous test used
brittle version detection which did not work for newer clang that
supported this feature.

The attribute has to be its own statement, hence the added `;`. It also
can only precede case statements, so the final cases should be
explicitly terminated with break statements.

Fixes commit 3d29d11ac2c1 ("Clean compilation -Wextra")
Link: https://github.com/llvm/llvm-project/commit/1e0affb6e564b7361b0aadb38805f26deff4ecfc
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
---
 configure.ac                                  | 20 ++++++
 include/jemalloc/internal/hash.h              | 65 ++++++++++---------
 .../internal/jemalloc_internal_macros.h       |  7 --
 include/jemalloc/jemalloc_defs.h.in           |  3 +
 include/jemalloc/jemalloc_macros.h.in         |  7 ++
 src/arena.c                                   |  2 +-
 src/malloc_io.c                               |  6 +-
 src/tsd.c                                     |  2 +-
 8 files changed, 68 insertions(+), 44 deletions(-)

diff --git a/configure.ac b/configure.ac
index bca422af..c3f53f70 100644
--- a/configure.ac
+++ b/configure.ac
@@ -866,6 +866,26 @@ if test "x${je_cv_format_arg}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_ATTR_FORMAT_ARG], [ ])
 fi
 
+dnl Check for fallthrough attribute support.
+JE_CFLAGS_SAVE()
+JE_CFLAGS_ADD([-Wimplicit-fallthrough])
+JE_COMPILABLE([fallthrough attribute],
+              [#if !__has_attribute(fallthrough)
+               #error "foo"
+               #endif],
+              [int x = 0;
+               switch (x) {
+               case 0: __attribute__((__fallthrough__));
+               case 1: return 1;
+               }],
+              [je_cv_fallthrough])
+JE_CFLAGS_RESTORE()
+if test "x${je_cv_fallthrough}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_ATTR_FALLTHROUGH], [ ])
+  JE_CFLAGS_ADD([-Wimplicit-fallthrough])
+  JE_CXXFLAGS_ADD([-Wimplicit-fallthrough])
+fi
+
 dnl Support optional additions to rpath.
 AC_ARG_WITH([rpath],
   [AS_HELP_STRING([--with-rpath=<rpath>], [Colon-separated rpath (ELF systems only)])],
diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h
index 0270034e..9132b60c 100644
--- a/include/jemalloc/internal/hash.h
+++ b/include/jemalloc/internal/hash.h
@@ -104,8 +104,8 @@ hash_x86_32(const void *key, int len, uint32_t seed) {
 		uint32_t k1 = 0;
 
 		switch (len & 3) {
-		case 3: k1 ^= tail[2] << 16; JEMALLOC_FALLTHROUGH
-		case 2: k1 ^= tail[1] << 8; JEMALLOC_FALLTHROUGH
+		case 3: k1 ^= tail[2] << 16; JEMALLOC_FALLTHROUGH;
+		case 2: k1 ^= tail[1] << 8; JEMALLOC_FALLTHROUGH;
 		case 1: k1 ^= tail[0]; k1 *= c1; k1 = hash_rotl_32(k1, 15);
 			k1 *= c2; h1 ^= k1;
 		}
@@ -177,29 +177,29 @@ hash_x86_128(const void *key, const int len, uint32_t seed,
 		uint32_t k4 = 0;
 
 		switch (len & 15) {
-		case 15: k4 ^= tail[14] << 16; JEMALLOC_FALLTHROUGH
-		case 14: k4 ^= tail[13] << 8; JEMALLOC_FALLTHROUGH
+		case 15: k4 ^= tail[14] << 16; JEMALLOC_FALLTHROUGH;
+		case 14: k4 ^= tail[13] << 8; JEMALLOC_FALLTHROUGH;
 		case 13: k4 ^= tail[12] << 0;
 			k4 *= c4; k4 = hash_rotl_32(k4, 18); k4 *= c1; h4 ^= k4;
-      JEMALLOC_FALLTHROUGH
-		case 12: k3 ^= tail[11] << 24; JEMALLOC_FALLTHROUGH
-		case 11: k3 ^= tail[10] << 16; JEMALLOC_FALLTHROUGH
-		case 10: k3 ^= tail[ 9] << 8; JEMALLOC_FALLTHROUGH
+			JEMALLOC_FALLTHROUGH;
+		case 12: k3 ^= tail[11] << 24; JEMALLOC_FALLTHROUGH;
+		case 11: k3 ^= tail[10] << 16; JEMALLOC_FALLTHROUGH;
+		case 10: k3 ^= tail[ 9] << 8; JEMALLOC_FALLTHROUGH;
 		case  9: k3 ^= tail[ 8] << 0;
-		     k3 *= c3; k3 = hash_rotl_32(k3, 17); k3 *= c4; h3 ^= k3;
-         JEMALLOC_FALLTHROUGH
-		case  8: k2 ^= tail[ 7] << 24; JEMALLOC_FALLTHROUGH
-		case  7: k2 ^= tail[ 6] << 16; JEMALLOC_FALLTHROUGH
-		case  6: k2 ^= tail[ 5] << 8; JEMALLOC_FALLTHROUGH
+			k3 *= c3; k3 = hash_rotl_32(k3, 17); k3 *= c4; h3 ^= k3;
+			JEMALLOC_FALLTHROUGH;
+		case  8: k2 ^= tail[ 7] << 24; JEMALLOC_FALLTHROUGH;
+		case  7: k2 ^= tail[ 6] << 16; JEMALLOC_FALLTHROUGH;
+		case  6: k2 ^= tail[ 5] << 8; JEMALLOC_FALLTHROUGH;
 		case  5: k2 ^= tail[ 4] << 0;
 			k2 *= c2; k2 = hash_rotl_32(k2, 16); k2 *= c3; h2 ^= k2;
-      JEMALLOC_FALLTHROUGH
-		case  4: k1 ^= tail[ 3] << 24; JEMALLOC_FALLTHROUGH
-		case  3: k1 ^= tail[ 2] << 16; JEMALLOC_FALLTHROUGH
-		case  2: k1 ^= tail[ 1] << 8; JEMALLOC_FALLTHROUGH
+			JEMALLOC_FALLTHROUGH;
+		case  4: k1 ^= tail[ 3] << 24; JEMALLOC_FALLTHROUGH;
+		case  3: k1 ^= tail[ 2] << 16; JEMALLOC_FALLTHROUGH;
+		case  2: k1 ^= tail[ 1] << 8; JEMALLOC_FALLTHROUGH;
 		case  1: k1 ^= tail[ 0] << 0;
 			k1 *= c1; k1 = hash_rotl_32(k1, 15); k1 *= c2; h1 ^= k1;
-      JEMALLOC_FALLTHROUGH
+			break;
 		}
 	}
 
@@ -261,24 +261,25 @@ hash_x64_128(const void *key, const int len, const uint32_t seed,
 		uint64_t k2 = 0;
 
 		switch (len & 15) {
-		case 15: k2 ^= ((uint64_t)(tail[14])) << 48; JEMALLOC_FALLTHROUGH
-		case 14: k2 ^= ((uint64_t)(tail[13])) << 40; JEMALLOC_FALLTHROUGH
-		case 13: k2 ^= ((uint64_t)(tail[12])) << 32; JEMALLOC_FALLTHROUGH
-		case 12: k2 ^= ((uint64_t)(tail[11])) << 24; JEMALLOC_FALLTHROUGH
-		case 11: k2 ^= ((uint64_t)(tail[10])) << 16; JEMALLOC_FALLTHROUGH
-		case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8;  JEMALLOC_FALLTHROUGH
+		case 15: k2 ^= ((uint64_t)(tail[14])) << 48; JEMALLOC_FALLTHROUGH;
+		case 14: k2 ^= ((uint64_t)(tail[13])) << 40; JEMALLOC_FALLTHROUGH;
+		case 13: k2 ^= ((uint64_t)(tail[12])) << 32; JEMALLOC_FALLTHROUGH;
+		case 12: k2 ^= ((uint64_t)(tail[11])) << 24; JEMALLOC_FALLTHROUGH;
+		case 11: k2 ^= ((uint64_t)(tail[10])) << 16; JEMALLOC_FALLTHROUGH;
+		case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8;  JEMALLOC_FALLTHROUGH;
 		case  9: k2 ^= ((uint64_t)(tail[ 8])) << 0;
 			k2 *= c2; k2 = hash_rotl_64(k2, 33); k2 *= c1; h2 ^= k2;
-			JEMALLOC_FALLTHROUGH
-		case  8: k1 ^= ((uint64_t)(tail[ 7])) << 56; JEMALLOC_FALLTHROUGH
-		case  7: k1 ^= ((uint64_t)(tail[ 6])) << 48; JEMALLOC_FALLTHROUGH
-		case  6: k1 ^= ((uint64_t)(tail[ 5])) << 40; JEMALLOC_FALLTHROUGH
-		case  5: k1 ^= ((uint64_t)(tail[ 4])) << 32; JEMALLOC_FALLTHROUGH
-		case  4: k1 ^= ((uint64_t)(tail[ 3])) << 24; JEMALLOC_FALLTHROUGH
-		case  3: k1 ^= ((uint64_t)(tail[ 2])) << 16; JEMALLOC_FALLTHROUGH
-		case  2: k1 ^= ((uint64_t)(tail[ 1])) << 8;  JEMALLOC_FALLTHROUGH
+			JEMALLOC_FALLTHROUGH;
+		case  8: k1 ^= ((uint64_t)(tail[ 7])) << 56; JEMALLOC_FALLTHROUGH;
+		case  7: k1 ^= ((uint64_t)(tail[ 6])) << 48; JEMALLOC_FALLTHROUGH;
+		case  6: k1 ^= ((uint64_t)(tail[ 5])) << 40; JEMALLOC_FALLTHROUGH;
+		case  5: k1 ^= ((uint64_t)(tail[ 4])) << 32; JEMALLOC_FALLTHROUGH;
+		case  4: k1 ^= ((uint64_t)(tail[ 3])) << 24; JEMALLOC_FALLTHROUGH;
+		case  3: k1 ^= ((uint64_t)(tail[ 2])) << 16; JEMALLOC_FALLTHROUGH;
+		case  2: k1 ^= ((uint64_t)(tail[ 1])) << 8;  JEMALLOC_FALLTHROUGH;
 		case  1: k1 ^= ((uint64_t)(tail[ 0])) << 0;
 			k1 *= c1; k1 = hash_rotl_64(k1, 31); k1 *= c2; h1 ^= k1;
+			break;
 		}
 	}
 
diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h
index d8ea06f6..ece3b872 100644
--- a/include/jemalloc/internal/jemalloc_internal_macros.h
+++ b/include/jemalloc/internal/jemalloc_internal_macros.h
@@ -40,13 +40,6 @@
 #define JEMALLOC_VA_ARGS_HEAD(head, ...) head
 #define JEMALLOC_VA_ARGS_TAIL(head, ...) __VA_ARGS__
 
-#if (defined(__GNUC__) || defined(__GNUG__)) && !defined(__clang__) \
-  && defined(JEMALLOC_HAVE_ATTR) && (__GNUC__ >= 7)
-#define JEMALLOC_FALLTHROUGH JEMALLOC_ATTR(fallthrough);
-#else
-#define JEMALLOC_FALLTHROUGH /* falls through */
-#endif
-
 /* Diagnostic suppression macros */
 #if defined(_MSC_VER) && !defined(__clang__)
 #  define JEMALLOC_DIAGNOSTIC_PUSH __pragma(warning(push))
diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in
index 11c39181..032fba4d 100644
--- a/include/jemalloc/jemalloc_defs.h.in
+++ b/include/jemalloc/jemalloc_defs.h.in
@@ -13,6 +13,9 @@
 /* Defined if format(printf, ...) attribute is supported. */
 #undef JEMALLOC_HAVE_ATTR_FORMAT_PRINTF
 
+/* Defined if fallthrough attribute is supported. */
+#undef JEMALLOC_HAVE_ATTR_FALLTHROUGH
+
 /*
  * Define overrides for non-standard allocator-related functions if they are
  * present on the system.
diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in
index 59e29558..b4469d8e 100644
--- a/include/jemalloc/jemalloc_macros.h.in
+++ b/include/jemalloc/jemalloc_macros.h.in
@@ -71,6 +71,7 @@
 #  endif
 #  define JEMALLOC_FORMAT_ARG(i)
 #  define JEMALLOC_FORMAT_PRINTF(s, i)
+#  define JEMALLOC_FALLTHROUGH
 #  define JEMALLOC_NOINLINE __declspec(noinline)
 #  ifdef __cplusplus
 #    define JEMALLOC_NOTHROW __declspec(nothrow)
@@ -109,6 +110,11 @@
 #  else
 #    define JEMALLOC_FORMAT_PRINTF(s, i)
 #  endif
+#  ifdef JEMALLOC_HAVE_ATTR_FALLTHROUGH
+#    define JEMALLOC_FALLTHROUGH JEMALLOC_ATTR(fallthrough)
+#  else
+#    define JEMALLOC_FALLTHROUGH
+#  endif
 #  define JEMALLOC_NOINLINE JEMALLOC_ATTR(noinline)
 #  define JEMALLOC_NOTHROW JEMALLOC_ATTR(nothrow)
 #  define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s))
@@ -121,6 +127,7 @@
 #  define JEMALLOC_ALLOC_SIZE2(s1, s2)
 #  define JEMALLOC_EXPORT
 #  define JEMALLOC_FORMAT_PRINTF(s, i)
+#  define JEMALLOC_FALLTHROUGH
 #  define JEMALLOC_NOINLINE
 #  define JEMALLOC_NOTHROW
 #  define JEMALLOC_SECTION(s)
diff --git a/src/arena.c b/src/arena.c
index a60a6843..e4dd4770 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -874,7 +874,7 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
 				    arena, is_background_thread);
 				break;
 			}
-			/* Fall through. */
+			JEMALLOC_FALLTHROUGH;
 		case extent_state_muzzy:
 			extent_dalloc_wrapper(tsdn, arena, r_extent_hooks,
 			    extent);
diff --git a/src/malloc_io.c b/src/malloc_io.c
index 2fae7570..fc7ff726 100644
--- a/src/malloc_io.c
+++ b/src/malloc_io.c
@@ -135,10 +135,10 @@ malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base) {
 			break;
 		case '-':
 			neg = true;
-			/* Fall through. */
+			JEMALLOC_FALLTHROUGH;
 		case '+':
 			p++;
-			/* Fall through. */
+			JEMALLOC_FALLTHROUGH;
 		default:
 			goto label_prefix;
 		}
@@ -289,7 +289,7 @@ d2s(intmax_t x, char sign, char *s, size_t *slen_p) {
 		if (!neg) {
 			break;
 		}
-		/* Fall through. */
+		JEMALLOC_FALLTHROUGH;
 	case ' ':
 	case '+':
 		s--;
diff --git a/src/tsd.c b/src/tsd.c
index bb40af14..6c90adec 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -389,7 +389,7 @@ tsd_cleanup(void *arg) {
 		 * is still called for testing and completeness.
 		 */
 		assert_tsd_data_cleanup_done(tsd);
-		/* Fall through. */
+		JEMALLOC_FALLTHROUGH;
 	case tsd_state_nominal:
 	case tsd_state_nominal_slow:
 		tsd_do_data_cleanup(tsd);

From 19a51abf337d35b3bdbbac22d8c513f4fd8b6c57 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 4 Nov 2019 16:44:37 -0800
Subject: [PATCH 1407/2608] Avoid arena->offset_state when tsd not available
 for prng.

Use stack locals and remove the offset_state in arena.
---
 include/jemalloc/internal/arena_structs.h |  8 --------
 src/arena.c                               | 12 ------------
 src/extent.c                              |  4 ++--
 3 files changed, 2 insertions(+), 22 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index 54889dc8..bc8c0394 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -118,14 +118,6 @@ struct arena_s {
 	/* Synchronization: internal. */
 	prof_accum_t		prof_accum;
 
-	/*
-	 * PRNG state for cache index randomization of large allocation base
-	 * pointers.
-	 *
-	 * Synchronization: atomic.
-	 */
-	atomic_zu_t		offset_state;
-
 	/*
 	 * Extent serial number generator state.
 	 *
diff --git a/src/arena.c b/src/arena.c
index e4dd4770..fa18d144 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1981,18 +1981,6 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		}
 	}
 
-	if (config_cache_oblivious) {
-		/*
-		 * A nondeterministic seed based on the address of arena reduces
-		 * the likelihood of lockstep non-uniform cache index
-		 * utilization among identical concurrent processes, but at the
-		 * cost of test repeatability.  For debug builds, instead use a
-		 * deterministic seed.
-		 */
-		atomic_store_zu(&arena->offset_state, config_debug ? ind :
-		    (size_t)(uintptr_t)arena, ATOMIC_RELAXED);
-	}
-
 	atomic_store_zu(&arena->extent_sn_next, 0, ATOMIC_RELAXED);
 
 	atomic_store_u(&arena->dss_prec, (unsigned)extent_dss_prec_get(),
diff --git a/src/extent.c b/src/extent.c
index 4bb358d4..50a81055 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -187,8 +187,8 @@ extent_addr_randomize(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 			r = (size_t)prng_lg_range_u64(
 			    tsd_offset_statep_get(tsd), lg_range);
 		} else {
-			r = prng_lg_range_zu(&arena->offset_state, lg_range,
-			    true);
+			uint64_t stack_value = (uint64_t)(uintptr_t)&r;
+			r = (size_t)prng_lg_range_u64(&stack_value, lg_range);
 		}
 		uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE -
 		    lg_range);

From bc774a3519788bec8b18f0a5988767fc11d034fa Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 4 Nov 2019 16:48:12 -0800
Subject: [PATCH 1408/2608] Rename tsd->offset_state to tsd->prng_state.

---
 include/jemalloc/internal/tsd.h | 6 +++---
 src/extent.c                    | 2 +-
 src/tsd.c                       | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 17bfc886..6332a003 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -33,7 +33,7 @@
  * w: prof_sample_event_wait (config_prof)
  * x: prof_sample_last_event (config_prof)
  * p: prof_tdata (config_prof)
- * v: offset_state
+ * v: prng_state
  * i: iarena
  * a: arena
  * o: arenas_tdata
@@ -88,7 +88,7 @@ typedef void (*test_callback_t)(int *);
     O(prof_sample_event_wait,	uint64_t,		uint64_t)	\
     O(prof_sample_last_event,	uint64_t,		uint64_t)	\
     O(prof_tdata,		prof_tdata_t *,		prof_tdata_t *)	\
-    O(offset_state,		uint64_t,		uint64_t)	\
+    O(prng_state,		uint64_t,		uint64_t)	\
     O(iarena,			arena_t *,		arena_t *)	\
     O(arena,			arena_t *,		arena_t *)	\
     O(arenas_tdata,		arena_tdata_t *,	arena_tdata_t *)\
@@ -119,7 +119,7 @@ typedef void (*test_callback_t)(int *);
     /* prof_sample_event_wait */	THREAD_EVENT_MIN_START_WAIT,	\
     /* prof_sample_last_event */	0,				\
     /* prof_tdata */		NULL,					\
-    /* offset_state */		0,					\
+    /* prng_state */		0,					\
     /* iarena */		NULL,					\
     /* arena */			NULL,					\
     /* arenas_tdata */		NULL,					\
diff --git a/src/extent.c b/src/extent.c
index 50a81055..d9eff764 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -185,7 +185,7 @@ extent_addr_randomize(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 		if (!tsdn_null(tsdn)) {
 			tsd_t *tsd = tsdn_tsd(tsdn);
 			r = (size_t)prng_lg_range_u64(
-			    tsd_offset_statep_get(tsd), lg_range);
+			    tsd_prng_statep_get(tsd), lg_range);
 		} else {
 			uint64_t stack_value = (uint64_t)(uintptr_t)&r;
 			r = (size_t)prng_lg_range_u64(&stack_value, lg_range);
diff --git a/src/tsd.c b/src/tsd.c
index 6c90adec..5053f12f 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -230,7 +230,7 @@ tsd_data_init(tsd_t *tsd) {
 	 * cost of test repeatability.  For debug builds, instead use a
 	 * deterministic seed.
 	 */
-	*tsd_offset_statep_get(tsd) = config_debug ? 0 :
+	*tsd_prng_statep_get(tsd) = config_debug ? 0 :
 	    (uint64_t)(uintptr_t)tsd;
 
 	tsd_thread_event_init(tsd);

From da50d8ce87cb21963596825ebc5faf6d8abd4d2c Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 4 Nov 2019 17:22:25 -0800
Subject: [PATCH 1409/2608] Refactor and optimize prof sampling initialization.

Makes the prof sample prng use the tsd prng_state.  This allows us to properly
initialize the sample interval event, without having to create tdata.  As a
result, tdata will be created on demand (when a thread reaches the sample
interval bytes allocated), instead of on the first allocation.
---
 include/jemalloc/internal/prof_externs.h   |  4 +-
 include/jemalloc/internal/prof_inlines_b.h | 45 ++--------------------
 include/jemalloc/internal/prof_structs.h   |  3 --
 src/prof.c                                 | 13 +++----
 src/prof_data.c                            |  7 ++--
 src/thread_event.c                         |  2 +-
 src/tsd.c                                  |  1 +
 7 files changed, 16 insertions(+), 59 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 94fbd752..fd18ac48 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -100,7 +100,7 @@ void prof_prefork0(tsdn_t *tsdn);
 void prof_prefork1(tsdn_t *tsdn);
 void prof_postfork_parent(tsdn_t *tsdn);
 void prof_postfork_child(tsdn_t *tsdn);
-void prof_sample_threshold_update(prof_tdata_t *tdata);
+void prof_sample_threshold_update(tsd_t *tsd);
 
 void prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx);
 bool prof_log_start(tsdn_t *tsdn, const char *filename);
@@ -120,7 +120,7 @@ bool prof_data_init(tsd_t *tsd);
 bool prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
     bool leakcheck);
 prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
-    uint64_t thr_discrim, char *thread_name, bool active);
+    uint64_t thr_discrim, char *thread_name, bool active, bool reset_interval);
 void prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata);
 void prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx);
 
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index b4e65c05..388537e6 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -82,9 +82,7 @@ prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, nstime_t t) {
 
 JEMALLOC_ALWAYS_INLINE bool
 prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
-			 prof_tdata_t **tdata_out) {
-	prof_tdata_t *tdata;
-
+    prof_tdata_t **tdata_out) {
 	cassert(config_prof);
 
 	/* Fastpath: no need to load tdata */
@@ -96,8 +94,7 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
 		return true;
 	}
 
-	bool booted = prof_tdata_get(tsd, false);
-	tdata = prof_tdata_get(tsd, true);
+	prof_tdata_t *tdata = prof_tdata_get(tsd, true);
 	if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) {
 		tdata = NULL;
 	}
@@ -110,45 +107,9 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
 		return true;
 	}
 
-	if (!booted) {
-		/*
-		 * If this was the first creation of tdata, then it means that
-		 * the previous thread_event() relied on the wrong prof_sample
-		 * wait time, and that it should have relied on the new
-		 * prof_sample wait time just set by prof_tdata_get(), so we
-		 * now manually check again.
-		 *
-		 * If the check fails, then even though we relied on the wrong
-		 * prof_sample wait time, we're now actually in perfect shape,
-		 * in the sense that we can pretend that we have used the right
-		 * prof_sample wait time.
-		 *
-		 * If the check succeeds, then we are now in a tougher
-		 * situation, in the sense that we cannot pretend that we have
-		 * used the right prof_sample wait time.  A straightforward
-		 * solution would be to fully roll back thread_event(), set the
-		 * right prof_sample wait time, and then redo thread_event().
-		 * A simpler way, which is implemented below, is to just set a
-		 * new prof_sample wait time that is usize less, and do nothing
-		 * else.  Strictly speaking, the thread event handler may end
-		 * up in a wrong state, since it has still recorded an event
-		 * whereas in reality there may be no event.  However, the
-		 * difference in the wait time offsets the wrongly recorded
-		 * event, so that, functionally, the countdown to the next
-		 * event will behave exactly as if we have used the right
-		 * prof_sample wait time in the first place.
-		 */
-		uint64_t wait = prof_sample_event_wait_get(tsd);
-		assert(wait > 0);
-		if (usize < wait) {
-			thread_prof_sample_event_update(tsd, wait - usize);
-			return true;
-		}
-	}
-
 	/* Compute new sample threshold. */
 	if (update) {
-		prof_sample_threshold_update(tdata);
+		prof_sample_threshold_update(tsd);
 	}
 	return !tdata->active;
 }
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index 34ed4822..9a00a189 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -167,9 +167,6 @@ struct prof_tdata_s {
 	 */
 	ckh_t			bt2tctx;
 
-	/* Sampling state. */
-	uint64_t		prng_state;
-
 	/* State used to avoid dumping while operating on prof internals. */
 	bool			enq;
 	bool			enq_idump;
diff --git a/src/prof.c b/src/prof.c
index 5360662b..0590482c 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -149,7 +149,7 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
 		 */
 		tdata = prof_tdata_get(tsd, true);
 		if (tdata != NULL) {
-			prof_sample_threshold_update(tdata);
+			prof_sample_threshold_update(tsd);
 		}
 	}
 
@@ -469,14 +469,12 @@ prof_tdata_mutex_choose(uint64_t thr_uid) {
  * -mno-sse) in order for the workaround to be complete.
  */
 void
-prof_sample_threshold_update(prof_tdata_t *tdata) {
+prof_sample_threshold_update(tsd_t *tsd) {
 #ifdef JEMALLOC_PROF
 	if (!config_prof) {
 		return;
 	}
 
-	tsd_t *tsd = tsd_fetch();
-
 	if (lg_prof_sample == 0) {
 		thread_prof_sample_event_update(tsd,
 		    THREAD_EVENT_MIN_START_WAIT);
@@ -501,13 +499,12 @@ prof_sample_threshold_update(prof_tdata_t *tdata) {
 	 *   pp 500
 	 *   (http://luc.devroye.org/rnbookindex.html)
 	 */
-	uint64_t r = prng_lg_range_u64(&tdata->prng_state, 53);
+	uint64_t r = prng_lg_range_u64(tsd_prng_statep_get(tsd), 53);
 	double u = (double)r * (1.0/9007199254740992.0L);
 	uint64_t bytes_until_sample = (uint64_t)(log(u) /
 	    log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
 	    + (uint64_t)1U;
 	thread_prof_sample_event_update(tsd, bytes_until_sample);
-
 #endif
 }
 
@@ -810,7 +807,7 @@ prof_thr_uid_alloc(tsdn_t *tsdn) {
 prof_tdata_t *
 prof_tdata_init(tsd_t *tsd) {
 	return prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0,
-	    NULL, prof_thread_active_init_get(tsd_tsdn(tsd)));
+	    NULL, prof_thread_active_init_get(tsd_tsdn(tsd)), false);
 }
 
 static char *
@@ -846,7 +843,7 @@ prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) {
 
 	prof_tdata_detach(tsd, tdata);
 	return prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name,
-	    active);
+	    active, true);
 }
 
 void
diff --git a/src/prof_data.c b/src/prof_data.c
index cd92ee61..2f8bd2de 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -1198,7 +1198,7 @@ prof_bt_keycomp(const void *k1, const void *k2) {
 
 prof_tdata_t *
 prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
-    char *thread_name, bool active) {
+    char *thread_name, bool active, bool reset_interval) {
 	assert(tsd_reentrancy_level_get(tsd) == 0);
 
 	prof_tdata_t *tdata;
@@ -1227,8 +1227,9 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
 		return NULL;
 	}
 
-	tdata->prng_state = (uint64_t)(uintptr_t)tdata;
-	prof_sample_threshold_update(tdata);
+	if (reset_interval) {
+		prof_sample_threshold_update(tsd);
+	}
 
 	tdata->enq = false;
 	tdata->enq_idump = false;
diff --git a/src/thread_event.c b/src/thread_event.c
index f27a37aa..9f6c9271 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -34,7 +34,7 @@ tsd_thread_tcache_gc_event_init(tsd_t *tsd) {
 static void
 tsd_thread_prof_sample_event_init(tsd_t *tsd) {
 	assert(config_prof && opt_prof);
-	/* Do not set sample interval until the first allocation. */
+	prof_sample_threshold_update(tsd);
 }
 
 static void
diff --git a/src/tsd.c b/src/tsd.c
index 5053f12f..6e0ee93c 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -233,6 +233,7 @@ tsd_data_init(tsd_t *tsd) {
 	*tsd_prng_statep_get(tsd) = config_debug ? 0 :
 	    (uint64_t)(uintptr_t)tsd;
 
+	/* event_init may use the prng state above. */
 	tsd_thread_event_init(tsd);
 
 	return tsd_tcache_enabled_data_init(tsd);

From 9c59abe42afd044b742bd5c2ec8c1e01a4a8c1ca Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 11 Nov 2019 12:13:48 -0800
Subject: [PATCH 1410/2608] Fix a typo in Makefile.

---
 Makefile.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile.in b/Makefile.in
index 7eba7742..0bbf106d 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -418,7 +418,7 @@ $(objroot)include/jemalloc/internal/private_namespace_jet.gen.h: $(C_JET_SYMS)
 	$(SHELL) $(srcroot)include/jemalloc/internal/private_namespace.sh $^ > $@
 
 %.h: %.gen.h
-	@if ! `cmp -s $< $@` ; then echo "cp $< $<"; cp $< $@ ; fi
+	@if ! `cmp -s $< $@` ; then echo "cp $< $@"; cp $< $@ ; fi
 
 $(CPP_OBJS) $(CPP_PIC_OBJS) $(TESTS_CPP_OBJS): %.$(O):
 	@mkdir -p $(@D)

From 836d7a7e69011321ba75620279a31d43a05bf0d6 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 4 Nov 2019 18:24:39 -0800
Subject: [PATCH 1411/2608] Check for large size first in the uncommon case of
 malloc.

Larger sizes are not that uncommon comparing to !tsd_fast.
---
 src/jemalloc.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 10735121..239494df 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2344,12 +2344,10 @@ je_malloc(size_t size) {
 	}
 
 	tsd_t *tsd = tsd_get(false);
-	if (unlikely(!tsd || !tsd_fast(tsd) || (size > SC_LOOKUP_MAXCLASS))) {
+	if (unlikely((size > SC_LOOKUP_MAXCLASS) || !tsd || !tsd_fast(tsd))) {
 		return malloc_default(size);
 	}
 
-	tcache_t *tcache = tsd_tcachep_get(tsd);
-
 	szind_t ind = sz_size2index_lookup(size);
 	/*
 	 * The thread_allocated counter in tsd serves as a general purpose
@@ -2373,6 +2371,7 @@ je_malloc(size_t size) {
 		return malloc_default(size);
 	}
 
+	tcache_t *tcache = tsd_tcachep_get(tsd);
 	cache_bin_t *bin = tcache_small_bin_get(tcache, ind);
 	bool tcache_success;
 	void *ret = cache_bin_alloc_easy_reduced(bin, &tcache_success);

From c462753cc8e1d70318b6fcc4ffa0b8498588205c Mon Sep 17 00:00:00 2001
From: Leonardo Santagada <santagada@gmail.com>
Date: Wed, 23 Oct 2019 15:00:49 +0200
Subject: [PATCH 1412/2608] Use __forceinline for JEMALLOC_ALWAYS_INLINE on
 msvc

---
 include/jemalloc/internal/jemalloc_internal_macros.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h
index ece3b872..e97b5f90 100644
--- a/include/jemalloc/internal/jemalloc_internal_macros.h
+++ b/include/jemalloc/internal/jemalloc_internal_macros.h
@@ -4,7 +4,11 @@
 #ifdef JEMALLOC_DEBUG
 #  define JEMALLOC_ALWAYS_INLINE static inline
 #else
-#  define JEMALLOC_ALWAYS_INLINE JEMALLOC_ATTR(always_inline) static inline
+#  ifdef _MSC_VER
+#    define JEMALLOC_ALWAYS_INLINE static __forceinline
+#  else
+#    define JEMALLOC_ALWAYS_INLINE JEMALLOC_ATTR(always_inline) static inline
+#  endif
 #endif
 #ifdef _MSC_VER
 #  define inline _inline

From e4c36a6f30d5b393f05daa2850e2c03406c5c4c2 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 12 Nov 2019 23:44:01 -0800
Subject: [PATCH 1413/2608] Emphasize no modification through thread.allocatedp
 allowed.

---
 doc/jemalloc.xml.in | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 77afb00c..76edab81 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1554,7 +1554,8 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         <link
         linkend="thread.allocated"><mallctl>thread.allocated</mallctl></link>
         mallctl.  This is useful for avoiding the overhead of repeated
-        <function>mallctl*()</function> calls.</para></listitem>
+        <function>mallctl*()</function> calls.  Note that the underlying counter
+        should not be modified by the application.</para></listitem>
       </varlistentry>
 
       <varlistentry id="thread.deallocated">
@@ -1581,7 +1582,8 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         <link
         linkend="thread.deallocated"><mallctl>thread.deallocated</mallctl></link>
         mallctl.  This is useful for avoiding the overhead of repeated
-        <function>mallctl*()</function> calls.</para></listitem>
+        <function>mallctl*()</function> calls.  Note that the underlying counter
+        should not be modified by the application.</para></listitem>
       </varlistentry>
 
       <varlistentry id="thread.tcache.enabled">

From 3b5eecf102dcc3eb9a4a50346cdfa96917683e0a Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 15 Nov 2019 11:43:25 -0800
Subject: [PATCH 1414/2608] Fix bug in prof_realloc

We should pass in `old_ptr` rather than the new `ptr` to
`prof_free_sampled_object()` when `old_ptr` points to a sampled
allocation.
---
 include/jemalloc/internal/prof_inlines_b.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 388537e6..3465397b 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -203,7 +203,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 	 * counters.
 	 */
 	if (unlikely(old_sampled)) {
-		prof_free_sampled_object(tsd, ptr, old_usize, old_tctx);
+		prof_free_sampled_object(tsd, old_ptr, old_usize, old_tctx);
 	}
 }
 

From 73510dfd150d0c28d48b15f28f8329a108c53af0 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 15 Nov 2019 14:20:31 -0800
Subject: [PATCH 1415/2608] Revert "Fix bug in prof_realloc"

This reverts commit 3b5eecf102dcc3eb9a4a50346cdfa96917683e0a.
---
 include/jemalloc/internal/prof_inlines_b.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 3465397b..388537e6 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -203,7 +203,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 	 * counters.
 	 */
 	if (unlikely(old_sampled)) {
-		prof_free_sampled_object(tsd, old_ptr, old_usize, old_tctx);
+		prof_free_sampled_object(tsd, ptr, old_usize, old_tctx);
 	}
 }
 

From 04cb7d4d6b8cd2fb1c615aeb049e00a51c66083e Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 6 Nov 2019 23:09:20 -0800
Subject: [PATCH 1416/2608] Bail out early for muzzy decay.

This avoids taking the muzzy decay mutex with the default setting.
---
 src/arena.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/arena.c b/src/arena.c
index fa18d144..a8cfceea 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -919,7 +919,7 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	    WITNESS_RANK_CORE, 1);
 	malloc_mutex_assert_owner(tsdn, &decay->mtx);
 
-	if (decay->purging) {
+	if (decay->purging || npages_decay_max == 0) {
 		return;
 	}
 	decay->purging = true;
@@ -988,6 +988,10 @@ arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
 static bool
 arena_decay_muzzy(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
     bool all) {
+	if (eset_npages_get(&arena->eset_muzzy) == 0 &&
+	    arena_muzzy_decay_ms_get(arena) <= 0) {
+		return false;
+	}
 	return arena_decay_impl(tsdn, arena, &arena->decay_muzzy,
 	    &arena->eset_muzzy, is_background_thread, all);
 }

From a787d2f5b35f8a28738e19efeea626c2a3999104 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 8 Nov 2019 13:05:43 -0800
Subject: [PATCH 1417/2608] Prefer getaffinity() to detect number of CPUs.

---
 src/jemalloc.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 239494df..0e379d42 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -742,18 +742,28 @@ malloc_ncpus(void) {
 	SYSTEM_INFO si;
 	GetSystemInfo(&si);
 	result = si.dwNumberOfProcessors;
-#elif defined(JEMALLOC_GLIBC_MALLOC_HOOK) && defined(CPU_COUNT)
+#elif defined(CPU_COUNT)
 	/*
 	 * glibc >= 2.6 has the CPU_COUNT macro.
 	 *
 	 * glibc's sysconf() uses isspace().  glibc allocates for the first time
 	 * *before* setting up the isspace tables.  Therefore we need a
 	 * different method to get the number of CPUs.
+	 *
+	 * The getaffinity approach is also preferred when only a subset of CPUs
+	 * is available, to avoid using more arenas than necessary.
 	 */
 	{
+#  if defined(__FreeBSD__)
+		cpuset_t set;
+#  else
 		cpu_set_t set;
-
+#  endif
+#  if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
+		sched_getaffinity(0, sizeof(set), &set);
+#  else
 		pthread_getaffinity_np(pthread_self(), sizeof(set), &set);
+#  endif
 		result = CPU_COUNT(&set);
 	}
 #else

From 7160617107af5f566902ea3d1281b3a3c3cb6eea Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 15 Nov 2019 22:47:49 -0800
Subject: [PATCH 1418/2608] Add branch hints to free_fastpath.

Explicityly mark the non-slab case unlikely.  Previously there were jumps in the
common case.
---
 src/jemalloc.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 0e379d42..a5d66773 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2647,8 +2647,6 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 		return false;
 	}
 
-	tcache_t *tcache = tsd_tcachep_get(tsd);
-
 	alloc_ctx_t alloc_ctx;
 	/*
 	 * If !config_cache_oblivious, we can check PAGE alignment to
@@ -2658,27 +2656,29 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 	 */
 	if (!size_hint || config_cache_oblivious) {
 		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
-		bool res = rtree_szind_slab_read_fast(tsd_tsdn(tsd), &extents_rtree,
-						      rtree_ctx, (uintptr_t)ptr,
-						      &alloc_ctx.szind, &alloc_ctx.slab);
+		bool res = rtree_szind_slab_read_fast(tsd_tsdn(tsd),
+		    &extents_rtree, rtree_ctx, (uintptr_t)ptr, &alloc_ctx.szind,
+		    &alloc_ctx.slab);
 
 		/* Note: profiled objects will have alloc_ctx.slab set */
-		if (!res || !alloc_ctx.slab) {
+		if (unlikely(!res || !alloc_ctx.slab)) {
 			return false;
 		}
 		assert(alloc_ctx.szind != SC_NSIZES);
 	} else {
 		/*
-		 * Check for both sizes that are too large, and for sampled objects.
-		 * Sampled objects are always page-aligned.  The sampled object check
-		 * will also check for null ptr.
+		 * Check for both sizes that are too large, and for sampled
+		 * objects.  Sampled objects are always page-aligned.  The
+		 * sampled object check will also check for null ptr.
 		 */
-		if (size > SC_LOOKUP_MAXCLASS || (((uintptr_t)ptr & PAGE_MASK) == 0)) {
+		if (unlikely(size > SC_LOOKUP_MAXCLASS ||
+		    (((uintptr_t)ptr & PAGE_MASK) == 0))) {
 			return false;
 		}
 		alloc_ctx.szind = sz_size2index_lookup(size);
 	}
 
+	tcache_t *tcache = tsd_tcachep_get(tsd);
 	if (unlikely(ticker_trytick(&tcache->gc_ticker))) {
 		return false;
 	}
@@ -3532,7 +3532,7 @@ je_sdallocx(void *ptr, size_t size, int flags) {
 	LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr,
 		size, flags);
 
-	if (flags !=0 || !free_fastpath(ptr, size, true)) {
+	if (flags != 0 || !free_fastpath(ptr, size, true)) {
 		sdallocx_default(ptr, size, flags);
 	}
 

From cb1a1f4adadc85366e51afcf1a53b359828fba67 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 15 Nov 2019 22:54:15 -0800
Subject: [PATCH 1419/2608] Remove the unnecessary alloc_ctx on free_fastpath.

---
 src/jemalloc.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index a5d66773..e8ac2fc9 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2647,7 +2647,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 		return false;
 	}
 
-	alloc_ctx_t alloc_ctx;
+	szind_t szind;
 	/*
 	 * If !config_cache_oblivious, we can check PAGE alignment to
 	 * detect sampled objects.  Otherwise addresses are
@@ -2655,16 +2655,16 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 	 * See also isfree().
 	 */
 	if (!size_hint || config_cache_oblivious) {
+		bool slab;
 		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
 		bool res = rtree_szind_slab_read_fast(tsd_tsdn(tsd),
-		    &extents_rtree, rtree_ctx, (uintptr_t)ptr, &alloc_ctx.szind,
-		    &alloc_ctx.slab);
+		    &extents_rtree, rtree_ctx, (uintptr_t)ptr, &szind, &slab);
 
 		/* Note: profiled objects will have alloc_ctx.slab set */
-		if (unlikely(!res || !alloc_ctx.slab)) {
+		if (unlikely(!res || !slab)) {
 			return false;
 		}
-		assert(alloc_ctx.szind != SC_NSIZES);
+		assert(szind != SC_NSIZES);
 	} else {
 		/*
 		 * Check for both sizes that are too large, and for sampled
@@ -2675,7 +2675,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 		    (((uintptr_t)ptr & PAGE_MASK) == 0))) {
 			return false;
 		}
-		alloc_ctx.szind = sz_size2index_lookup(size);
+		szind = sz_size2index_lookup(size);
 	}
 
 	tcache_t *tcache = tsd_tcachep_get(tsd);
@@ -2683,12 +2683,12 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 		return false;
 	}
 
-	cache_bin_t *bin = tcache_small_bin_get(tcache, alloc_ctx.szind);
+	cache_bin_t *bin = tcache_small_bin_get(tcache, szind);
 	if (!cache_bin_dalloc_easy(bin, ptr)) {
 		return false;
 	}
 
-	size_t usize = sz_index2size(alloc_ctx.szind);
+	size_t usize = sz_index2size(szind);
 	*tsd_thread_deallocatedp_get(tsd) += usize;
 
 	return true;

From 9a7ae3c97fd4753981d3a14a4b6a72b2d2a83f44 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 5 Nov 2019 20:43:59 -0800
Subject: [PATCH 1420/2608] Reduce footprint of bin_t.

Avoid storing mutex_prof_data_t in bin_t.  Added bin_stats_data_t which is used
for reporting bin stats.
---
 include/jemalloc/internal/arena_externs.h |  2 +-
 include/jemalloc/internal/bin.h           | 23 +++----
 include/jemalloc/internal/bin_stats.h     |  7 ++-
 include/jemalloc/internal/ctl.h           |  2 +-
 src/arena.c                               |  2 +-
 src/ctl.c                                 | 75 ++++++++++-------------
 6 files changed, 54 insertions(+), 57 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index a71f9446..5178e238 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -25,7 +25,7 @@ void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
 void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
-    bin_stats_t *bstats, arena_stats_large_t *lstats,
+    bin_stats_data_t *bstats, arena_stats_large_t *lstats,
     arena_stats_extents_t *estats);
 void arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
     extent_hooks_t **r_extent_hooks, extent_t *extent);
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index 70250a40..0d6aff8b 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -61,19 +61,20 @@ void bin_postfork_child(tsdn_t *tsdn, bin_t *bin);
 
 /* Stats. */
 static inline void
-bin_stats_merge(tsdn_t *tsdn, bin_stats_t *dst_bin_stats, bin_t *bin) {
+bin_stats_merge(tsdn_t *tsdn, bin_stats_data_t *dst_bin_stats, bin_t *bin) {
 	malloc_mutex_lock(tsdn, &bin->lock);
 	malloc_mutex_prof_accum(tsdn, &dst_bin_stats->mutex_data, &bin->lock);
-	dst_bin_stats->nmalloc += bin->stats.nmalloc;
-	dst_bin_stats->ndalloc += bin->stats.ndalloc;
-	dst_bin_stats->nrequests += bin->stats.nrequests;
-	dst_bin_stats->curregs += bin->stats.curregs;
-	dst_bin_stats->nfills += bin->stats.nfills;
-	dst_bin_stats->nflushes += bin->stats.nflushes;
-	dst_bin_stats->nslabs += bin->stats.nslabs;
-	dst_bin_stats->reslabs += bin->stats.reslabs;
-	dst_bin_stats->curslabs += bin->stats.curslabs;
-	dst_bin_stats->nonfull_slabs += bin->stats.nonfull_slabs;
+	bin_stats_t *stats = &dst_bin_stats->stats_data;
+	stats->nmalloc += bin->stats.nmalloc;
+	stats->ndalloc += bin->stats.ndalloc;
+	stats->nrequests += bin->stats.nrequests;
+	stats->curregs += bin->stats.curregs;
+	stats->nfills += bin->stats.nfills;
+	stats->nflushes += bin->stats.nflushes;
+	stats->nslabs += bin->stats.nslabs;
+	stats->reslabs += bin->stats.reslabs;
+	stats->curslabs += bin->stats.curslabs;
+	stats->nonfull_slabs += bin->stats.nonfull_slabs;
 	malloc_mutex_unlock(tsdn, &bin->lock);
 }
 
diff --git a/include/jemalloc/internal/bin_stats.h b/include/jemalloc/internal/bin_stats.h
index d04519c8..0b99297c 100644
--- a/include/jemalloc/internal/bin_stats.h
+++ b/include/jemalloc/internal/bin_stats.h
@@ -47,8 +47,11 @@ struct bin_stats_s {
 
 	/* Current size of nonfull slabs heap in this bin. */
 	size_t		nonfull_slabs;
-
-	mutex_prof_data_t mutex_data;
 };
 
+typedef struct bin_stats_data_s bin_stats_data_t;
+struct bin_stats_data_s {
+	bin_stats_t stats_data;
+	mutex_prof_data_t mutex_data;
+};
 #endif /* JEMALLOC_INTERNAL_BIN_STATS_H */
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index 8ddf7f86..55a8ff48 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -42,7 +42,7 @@ typedef struct ctl_arena_stats_s {
 	uint64_t nfills_small;
 	uint64_t nflushes_small;
 
-	bin_stats_t bstats[SC_NBINS];
+	bin_stats_data_t bstats[SC_NBINS];
 	arena_stats_large_t lstats[SC_NSIZES - SC_NBINS];
 	arena_stats_extents_t estats[SC_NPSIZES];
 } ctl_arena_stats_t;
diff --git a/src/arena.c b/src/arena.c
index a8cfceea..f6e9402d 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -83,7 +83,7 @@ void
 arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
-    bin_stats_t *bstats, arena_stats_large_t *lstats,
+    bin_stats_data_t *bstats, arena_stats_large_t *lstats,
     arena_stats_extents_t *estats) {
 	cassert(config_stats);
 
diff --git a/src/ctl.c b/src/ctl.c
index abb82b57..e2cdc29d 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -789,7 +789,7 @@ ctl_arena_clear(ctl_arena_t *ctl_arena) {
 		ctl_arena->astats->nfills_small = 0;
 		ctl_arena->astats->nflushes_small = 0;
 		memset(ctl_arena->astats->bstats, 0, SC_NBINS *
-		    sizeof(bin_stats_t));
+		    sizeof(bin_stats_data_t));
 		memset(ctl_arena->astats->lstats, 0, (SC_NSIZES - SC_NBINS) *
 		    sizeof(arena_stats_large_t));
 		memset(ctl_arena->astats->estats, 0, SC_NPSIZES *
@@ -810,19 +810,15 @@ ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena) {
 		    ctl_arena->astats->lstats, ctl_arena->astats->estats);
 
 		for (i = 0; i < SC_NBINS; i++) {
-			ctl_arena->astats->allocated_small +=
-			    ctl_arena->astats->bstats[i].curregs *
+			bin_stats_t *bstats =
+			    &ctl_arena->astats->bstats[i].stats_data;
+			ctl_arena->astats->allocated_small += bstats->curregs *
 			    sz_index2size(i);
-			ctl_arena->astats->nmalloc_small +=
-			    ctl_arena->astats->bstats[i].nmalloc;
-			ctl_arena->astats->ndalloc_small +=
-			    ctl_arena->astats->bstats[i].ndalloc;
-			ctl_arena->astats->nrequests_small +=
-			    ctl_arena->astats->bstats[i].nrequests;
-			ctl_arena->astats->nfills_small +=
-			    ctl_arena->astats->bstats[i].nfills;
-			ctl_arena->astats->nflushes_small +=
-			    ctl_arena->astats->bstats[i].nflushes;
+			ctl_arena->astats->nmalloc_small += bstats->nmalloc;
+			ctl_arena->astats->ndalloc_small += bstats->ndalloc;
+			ctl_arena->astats->nrequests_small += bstats->nrequests;
+			ctl_arena->astats->nfills_small += bstats->nfills;
+			ctl_arena->astats->nflushes_small += bstats->nflushes;
 		}
 	} else {
 		arena_basic_stats_merge(tsdn, arena, &ctl_arena->nthreads,
@@ -935,29 +931,26 @@ MUTEX_PROF_ARENA_MUTEXES
 
 		/* Merge bin stats. */
 		for (i = 0; i < SC_NBINS; i++) {
-			sdstats->bstats[i].nmalloc += astats->bstats[i].nmalloc;
-			sdstats->bstats[i].ndalloc += astats->bstats[i].ndalloc;
-			sdstats->bstats[i].nrequests +=
-			    astats->bstats[i].nrequests;
+			bin_stats_t *bstats = &astats->bstats[i].stats_data;
+			bin_stats_t *merged = &sdstats->bstats[i].stats_data;
+			merged->nmalloc += bstats->nmalloc;
+			merged->ndalloc += bstats->ndalloc;
+			merged->nrequests += bstats->nrequests;
 			if (!destroyed) {
-				sdstats->bstats[i].curregs +=
-				    astats->bstats[i].curregs;
+				merged->curregs += bstats->curregs;
 			} else {
-				assert(astats->bstats[i].curregs == 0);
+				assert(bstats->curregs == 0);
 			}
-			sdstats->bstats[i].nfills += astats->bstats[i].nfills;
-			sdstats->bstats[i].nflushes +=
-			    astats->bstats[i].nflushes;
-			sdstats->bstats[i].nslabs += astats->bstats[i].nslabs;
-			sdstats->bstats[i].reslabs += astats->bstats[i].reslabs;
+			merged->nfills += bstats->nfills;
+			merged->nflushes += bstats->nflushes;
+			merged->nslabs += bstats->nslabs;
+			merged->reslabs += bstats->reslabs;
 			if (!destroyed) {
-				sdstats->bstats[i].curslabs +=
-				    astats->bstats[i].curslabs;
-				sdstats->bstats[i].nonfull_slabs +=
-				    astats->bstats[i].nonfull_slabs;
+				merged->curslabs += bstats->curslabs;
+				merged->nonfull_slabs += bstats->nonfull_slabs;
 			} else {
-				assert(astats->bstats[i].curslabs == 0);
-				assert(astats->bstats[i].nonfull_slabs == 0);
+				assert(bstats->curslabs == 0);
+				assert(bstats->nonfull_slabs == 0);
 			}
 			malloc_mutex_prof_merge(&sdstats->bstats[i].mutex_data,
 			    &astats->bstats[i].mutex_data);
@@ -3035,25 +3028,25 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
 }
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nmalloc,
-    arenas_i(mib[2])->astats->bstats[mib[4]].nmalloc, uint64_t)
+    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.nmalloc, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_ndalloc,
-    arenas_i(mib[2])->astats->bstats[mib[4]].ndalloc, uint64_t)
+    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.ndalloc, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nrequests,
-    arenas_i(mib[2])->astats->bstats[mib[4]].nrequests, uint64_t)
+    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.nrequests, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curregs,
-    arenas_i(mib[2])->astats->bstats[mib[4]].curregs, size_t)
+    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.curregs, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nfills,
-    arenas_i(mib[2])->astats->bstats[mib[4]].nfills, uint64_t)
+    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.nfills, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nflushes,
-    arenas_i(mib[2])->astats->bstats[mib[4]].nflushes, uint64_t)
+    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.nflushes, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nslabs,
-    arenas_i(mib[2])->astats->bstats[mib[4]].nslabs, uint64_t)
+    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.nslabs, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nreslabs,
-    arenas_i(mib[2])->astats->bstats[mib[4]].reslabs, uint64_t)
+    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.reslabs, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curslabs,
-    arenas_i(mib[2])->astats->bstats[mib[4]].curslabs, size_t)
+    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.curslabs, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nonfull_slabs,
-    arenas_i(mib[2])->astats->bstats[mib[4]].nonfull_slabs, size_t)
+    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.nonfull_slabs, size_t)
 
 static const ctl_named_node_t *
 stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib,

From 9a3c73800991d3508516208127994a1fc3837de5 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 5 Nov 2019 13:22:54 -0800
Subject: [PATCH 1421/2608] Refactor arena_bin_malloc_hard().

---
 src/arena.c | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index f6e9402d..5537e66f 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1311,21 +1311,21 @@ arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 static void *
 arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
     szind_t binind, unsigned binshard) {
-	const bin_info_t *bin_info;
-	extent_t *slab;
 
-	bin_info = &bin_infos[binind];
-	if (!arena_is_auto(arena) && bin->slabcur != NULL) {
-		arena_bin_slabs_full_insert(arena, bin, bin->slabcur);
-		bin->slabcur = NULL;
-	}
-	slab = arena_bin_nonfull_slab_get(tsdn, arena, bin, binind, binshard);
 	if (bin->slabcur != NULL) {
-		/*
-		 * Another thread updated slabcur while this one ran without the
-		 * bin lock in arena_bin_nonfull_slab_get().
-		 */
+		/* Only attempted when current slab is full. */
+		assert(extent_nfree_get(bin->slabcur) == 0);
+	}
+
+	const bin_info_t *bin_info = &bin_infos[binind];
+	extent_t *slab = arena_bin_nonfull_slab_get(tsdn, arena, bin, binind,
+	    binshard);
+	if (bin->slabcur != NULL) {
 		if (extent_nfree_get(bin->slabcur) > 0) {
+			/*
+			 * Another thread updated slabcur while this one ran
+			 * without the bin lock in arena_bin_nonfull_slab_get().
+			 */
 			void *ret = arena_slab_reg_alloc(bin->slabcur,
 			    bin_info);
 			if (slab != NULL) {
@@ -1357,7 +1357,6 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 		return NULL;
 	}
 	bin->slabcur = slab;
-
 	assert(extent_nfree_get(bin->slabcur) > 0);
 
 	return arena_slab_reg_alloc(slab, bin_info);

From 8b2c2a596da9bed11432ac703a6c0b0a76ec4dfd Mon Sep 17 00:00:00 2001
From: Mark Santaniello <marksan@fb.com>
Date: Sat, 26 Oct 2019 23:28:42 -0700
Subject: [PATCH 1422/2608] Support C++17 over-aligned allocation

Summary:
Add support for C++17 over-aligned allocation:
http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0035r4.html

Supporting all 10 operators means we avoid thunking thru libstdc++-v3/libsupc++ and just call jemalloc directly.

It's also worth noting that there is now an aligned *and sized* operator delete:
```
void operator delete(void* ptr, std::size_t size, std::align_val_t al) noexcept;
```

If JeMalloc did not provide this, the default implementation would ignore the size parameter entirely:
https://github.com/gcc-mirror/gcc/blob/master/libstdc%2B%2B-v3/libsupc%2B%2B/del_opsa.cc#L30-L33

(I must also update ax_cxx_compile_stdcxx.m4 to a newer version with C++17 support.)

Test Plan:
Wrote a simple test that allocates and then deletes an over-aligned type:
```
struct alignas(32) Foo {};
Foo *f;

int main()
{
  f = new Foo;
  delete f;
}
```

Before this change, both new and delete go thru PLT, and we end up calling regular old free:
```
(gdb) disassemble
Dump of assembler code for function main():
...
   0x00000000004029b7 <+55>:    call   0x4022d0 <_ZnwmSt11align_val_t@plt>
...
   0x00000000004029d5 <+85>:    call   0x4022e0 <_ZdlPvmSt11align_val_t@plt>
...
(gdb) s
free (ptr=0x7ffff6408020) at /home/engshare/third-party2/jemalloc/master/src/jemalloc.git-trunk/src/jemalloc.c:2842
2842            if (!free_fastpath(ptr, 0, false)) {
```

After this change, we directly call new/delete and ultimately call sdallocx:
```
(gdb) disassemble
Dump of assembler code for function main():
...
   0x0000000000402b77 <+55>:    call   0x496ca0 <operator new(unsigned long, std::align_val_t)>
...
   0x0000000000402b95 <+85>:    call   0x496e60 <operator delete(void*, unsigned long, std::align_val_t)>
...
(gdb) s
116             je_sdallocx_noflags(ptr, size);
```
---
 configure.ac                                  |   7 +-
 .../internal/jemalloc_internal_decls.h        |   1 +
 m4/ax_cxx_compile_stdcxx.m4                   | 449 ++++++++++++++++--
 src/jemalloc_cpp.cpp                          | 121 ++++-
 4 files changed, 536 insertions(+), 42 deletions(-)

diff --git a/configure.ac b/configure.ac
index c3f53f70..5e56e16b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -290,8 +290,11 @@ if test "x$enable_cxx" = "x1" ; then
   dnl Require at least c++14, which is the first version to support sized
   dnl deallocation.  C++ support is not compiled otherwise.
   m4_include([m4/ax_cxx_compile_stdcxx.m4])
-  AX_CXX_COMPILE_STDCXX([14], [noext], [optional])
-  if test "x${HAVE_CXX14}" = "x1" ; then
+  AX_CXX_COMPILE_STDCXX([17], [noext], [optional])
+  if test "x${HAVE_CXX17}" != "x1"; then
+    AX_CXX_COMPILE_STDCXX([14], [noext], [optional])
+  fi
+  if test "x${HAVE_CXX14}" = "x1" -o "x${HAVE_CXX17}" = "x1"; then
     JE_CXXFLAGS_ADD([-Wall])
     JE_CXXFLAGS_ADD([-Wextra])
     JE_CXXFLAGS_ADD([-g3])
diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
index 7d6053e2..042a1fa4 100644
--- a/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -5,6 +5,7 @@
 #ifdef _WIN32
 #  include <windows.h>
 #  include "msvc_compat/windows_extra.h"
+#  include "msvc_compat/strings.h"
 #  ifdef _WIN64
 #    if LG_VADDR <= 32
 #      error Generate the headers using x64 vcargs
diff --git a/m4/ax_cxx_compile_stdcxx.m4 b/m4/ax_cxx_compile_stdcxx.m4
index 2c18e49c..43087b2e 100644
--- a/m4/ax_cxx_compile_stdcxx.m4
+++ b/m4/ax_cxx_compile_stdcxx.m4
@@ -1,5 +1,5 @@
 # ===========================================================================
-#   http://www.gnu.org/software/autoconf-archive/ax_cxx_compile_stdcxx.html
+#  https://www.gnu.org/software/autoconf-archive/ax_cxx_compile_stdcxx.html
 # ===========================================================================
 #
 # SYNOPSIS
@@ -33,21 +33,23 @@
 #   Copyright (c) 2014, 2015 Google Inc.; contributed by Alexey Sokolov <sokolov@google.com>
 #   Copyright (c) 2015 Paul Norman <penorman@mac.com>
 #   Copyright (c) 2015 Moritz Klammler <moritz@klammler.eu>
+#   Copyright (c) 2016, 2018 Krzesimir Nowak <qdlacz@gmail.com>
+#   Copyright (c) 2019 Enji Cooper <yaneurabeya@gmail.com>
 #
 #   Copying and distribution of this file, with or without modification, are
 #   permitted in any medium without royalty provided the copyright notice
 #   and this notice are preserved.  This file is offered as-is, without any
 #   warranty.
 
-#serial 4
+#serial 11
 
 dnl  This macro is based on the code from the AX_CXX_COMPILE_STDCXX_11 macro
 dnl  (serial version number 13).
 
 AC_DEFUN([AX_CXX_COMPILE_STDCXX], [dnl
-  m4_if([$1], [11], [],
-        [$1], [14], [],
-        [$1], [17], [m4_fatal([support for C++17 not yet implemented in AX_CXX_COMPILE_STDCXX])],
+  m4_if([$1], [11], [ax_cxx_compile_alternatives="11 0x"],
+        [$1], [14], [ax_cxx_compile_alternatives="14 1y"],
+        [$1], [17], [ax_cxx_compile_alternatives="17 1z"],
         [m4_fatal([invalid first argument `$1' to AX_CXX_COMPILE_STDCXX])])dnl
   m4_if([$2], [], [],
         [$2], [ext], [],
@@ -59,18 +61,11 @@ AC_DEFUN([AX_CXX_COMPILE_STDCXX], [dnl
         [m4_fatal([invalid third argument `$3' to AX_CXX_COMPILE_STDCXX])])
   AC_LANG_PUSH([C++])dnl
   ac_success=no
-  AC_CACHE_CHECK(whether $CXX supports C++$1 features by default,
-  ax_cv_cxx_compile_cxx$1,
-  [AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])],
-    [ax_cv_cxx_compile_cxx$1=yes],
-    [ax_cv_cxx_compile_cxx$1=no])])
-  if test x$ax_cv_cxx_compile_cxx$1 = xyes; then
-    ac_success=yes
-  fi
 
   m4_if([$2], [noext], [], [dnl
   if test x$ac_success = xno; then
-    for switch in -std=gnu++$1 -std=gnu++0x; do
+    for alternative in ${ax_cxx_compile_alternatives}; do
+      switch="-std=gnu++${alternative}"
       cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_$switch])
       AC_CACHE_CHECK(whether $CXX supports C++$1 features with $switch,
                      $cachevar,
@@ -96,22 +91,27 @@ AC_DEFUN([AX_CXX_COMPILE_STDCXX], [dnl
     dnl HP's aCC needs +std=c++11 according to:
     dnl http://h21007.www2.hp.com/portal/download/files/unprot/aCxx/PDF_Release_Notes/769149-001.pdf
     dnl Cray's crayCC needs "-h std=c++11"
-    for switch in -std=c++$1 -std=c++0x +std=c++$1 "-h std=c++$1"; do
-      cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_$switch])
-      AC_CACHE_CHECK(whether $CXX supports C++$1 features with $switch,
-                     $cachevar,
-        [ac_save_CXX="$CXX"
-         CXX="$CXX $switch"
-         AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])],
-          [eval $cachevar=yes],
-          [eval $cachevar=no])
-         CXX="$ac_save_CXX"])
-      if eval test x\$$cachevar = xyes; then
-        CXX="$CXX $switch"
-        if test -n "$CXXCPP" ; then
-          CXXCPP="$CXXCPP $switch"
+    for alternative in ${ax_cxx_compile_alternatives}; do
+      for switch in -std=c++${alternative} +std=c++${alternative} "-h std=c++${alternative}"; do
+        cachevar=AS_TR_SH([ax_cv_cxx_compile_cxx$1_$switch])
+        AC_CACHE_CHECK(whether $CXX supports C++$1 features with $switch,
+                       $cachevar,
+          [ac_save_CXX="$CXX"
+           CXX="$CXX $switch"
+           AC_COMPILE_IFELSE([AC_LANG_SOURCE([_AX_CXX_COMPILE_STDCXX_testbody_$1])],
+            [eval $cachevar=yes],
+            [eval $cachevar=no])
+           CXX="$ac_save_CXX"])
+        if eval test x\$$cachevar = xyes; then
+          CXX="$CXX $switch"
+          if test -n "$CXXCPP" ; then
+            CXXCPP="$CXXCPP $switch"
+          fi
+          ac_success=yes
+          break
         fi
-        ac_success=yes
+      done
+      if test x$ac_success = xyes; then
         break
       fi
     done
@@ -148,6 +148,11 @@ m4_define([_AX_CXX_COMPILE_STDCXX_testbody_14],
   _AX_CXX_COMPILE_STDCXX_testbody_new_in_14
 )
 
+m4_define([_AX_CXX_COMPILE_STDCXX_testbody_17],
+  _AX_CXX_COMPILE_STDCXX_testbody_new_in_11
+  _AX_CXX_COMPILE_STDCXX_testbody_new_in_14
+  _AX_CXX_COMPILE_STDCXX_testbody_new_in_17
+)
 
 dnl  Tests for new features in C++11
 
@@ -185,11 +190,13 @@ namespace cxx11
 
     struct Base
     {
+      virtual ~Base() {}
       virtual void f() {}
     };
 
     struct Derived : public Base
     {
+      virtual ~Derived() override {}
       virtual void f() override {}
     };
 
@@ -518,7 +525,7 @@ namespace cxx14
 
   }
 
-  namespace test_digit_seperators
+  namespace test_digit_separators
   {
 
     constexpr auto ten_million = 100'000'000;
@@ -560,3 +567,385 @@ namespace cxx14
 #endif  // __cplusplus >= 201402L
 
 ]])
+
+
+dnl  Tests for new features in C++17
+
+m4_define([_AX_CXX_COMPILE_STDCXX_testbody_new_in_17], [[
+
+// If the compiler admits that it is not ready for C++17, why torture it?
+// Hopefully, this will speed up the test.
+
+#ifndef __cplusplus
+
+#error "This is not a C++ compiler"
+
+#elif __cplusplus < 201703L
+
+#error "This is not a C++17 compiler"
+
+#else
+
+#include <initializer_list>
+#include <utility>
+#include <type_traits>
+
+namespace cxx17
+{
+
+  namespace test_constexpr_lambdas
+  {
+
+    constexpr int foo = [](){return 42;}();
+
+  }
+
+  namespace test::nested_namespace::definitions
+  {
+
+  }
+
+  namespace test_fold_expression
+  {
+
+    template<typename... Args>
+    int multiply(Args... args)
+    {
+      return (args * ... * 1);
+    }
+
+    template<typename... Args>
+    bool all(Args... args)
+    {
+      return (args && ...);
+    }
+
+  }
+
+  namespace test_extended_static_assert
+  {
+
+    static_assert (true);
+
+  }
+
+  namespace test_auto_brace_init_list
+  {
+
+    auto foo = {5};
+    auto bar {5};
+
+    static_assert(std::is_same<std::initializer_list<int>, decltype(foo)>::value);
+    static_assert(std::is_same<int, decltype(bar)>::value);
+  }
+
+  namespace test_typename_in_template_template_parameter
+  {
+
+    template<template<typename> typename X> struct D;
+
+  }
+
+  namespace test_fallthrough_nodiscard_maybe_unused_attributes
+  {
+
+    int f1()
+    {
+      return 42;
+    }
+
+    [[nodiscard]] int f2()
+    {
+      [[maybe_unused]] auto unused = f1();
+
+      switch (f1())
+      {
+      case 17:
+        f1();
+        [[fallthrough]];
+      case 42:
+        f1();
+      }
+      return f1();
+    }
+
+  }
+
+  namespace test_extended_aggregate_initialization
+  {
+
+    struct base1
+    {
+      int b1, b2 = 42;
+    };
+
+    struct base2
+    {
+      base2() {
+        b3 = 42;
+      }
+      int b3;
+    };
+
+    struct derived : base1, base2
+    {
+        int d;
+    };
+
+    derived d1 {{1, 2}, {}, 4};  // full initialization
+    derived d2 {{}, {}, 4};      // value-initialized bases
+
+  }
+
+  namespace test_general_range_based_for_loop
+  {
+
+    struct iter
+    {
+      int i;
+
+      int& operator* ()
+      {
+        return i;
+      }
+
+      const int& operator* () const
+      {
+        return i;
+      }
+
+      iter& operator++()
+      {
+        ++i;
+        return *this;
+      }
+    };
+
+    struct sentinel
+    {
+      int i;
+    };
+
+    bool operator== (const iter& i, const sentinel& s)
+    {
+      return i.i == s.i;
+    }
+
+    bool operator!= (const iter& i, const sentinel& s)
+    {
+      return !(i == s);
+    }
+
+    struct range
+    {
+      iter begin() const
+      {
+        return {0};
+      }
+
+      sentinel end() const
+      {
+        return {5};
+      }
+    };
+
+    void f()
+    {
+      range r {};
+
+      for (auto i : r)
+      {
+        [[maybe_unused]] auto v = i;
+      }
+    }
+
+  }
+
+  namespace test_lambda_capture_asterisk_this_by_value
+  {
+
+    struct t
+    {
+      int i;
+      int foo()
+      {
+        return [*this]()
+        {
+          return i;
+        }();
+      }
+    };
+
+  }
+
+  namespace test_enum_class_construction
+  {
+
+    enum class byte : unsigned char
+    {};
+
+    byte foo {42};
+
+  }
+
+  namespace test_constexpr_if
+  {
+
+    template <bool cond>
+    int f ()
+    {
+      if constexpr(cond)
+      {
+        return 13;
+      }
+      else
+      {
+        return 42;
+      }
+    }
+
+  }
+
+  namespace test_selection_statement_with_initializer
+  {
+
+    int f()
+    {
+      return 13;
+    }
+
+    int f2()
+    {
+      if (auto i = f(); i > 0)
+      {
+        return 3;
+      }
+
+      switch (auto i = f(); i + 4)
+      {
+      case 17:
+        return 2;
+
+      default:
+        return 1;
+      }
+    }
+
+  }
+
+  namespace test_template_argument_deduction_for_class_templates
+  {
+
+    template <typename T1, typename T2>
+    struct pair
+    {
+      pair (T1 p1, T2 p2)
+        : m1 {p1},
+          m2 {p2}
+      {}
+
+      T1 m1;
+      T2 m2;
+    };
+
+    void f()
+    {
+      [[maybe_unused]] auto p = pair{13, 42u};
+    }
+
+  }
+
+  namespace test_non_type_auto_template_parameters
+  {
+
+    template <auto n>
+    struct B
+    {};
+
+    B<5> b1;
+    B<'a'> b2;
+
+  }
+
+  namespace test_structured_bindings
+  {
+
+    int arr[2] = { 1, 2 };
+    std::pair<int, int> pr = { 1, 2 };
+
+    auto f1() -> int(&)[2]
+    {
+      return arr;
+    }
+
+    auto f2() -> std::pair<int, int>&
+    {
+      return pr;
+    }
+
+    struct S
+    {
+      int x1 : 2;
+      volatile double y1;
+    };
+
+    S f3()
+    {
+      return {};
+    }
+
+    auto [ x1, y1 ] = f1();
+    auto& [ xr1, yr1 ] = f1();
+    auto [ x2, y2 ] = f2();
+    auto& [ xr2, yr2 ] = f2();
+    const auto [ x3, y3 ] = f3();
+
+  }
+
+  namespace test_exception_spec_type_system
+  {
+
+    struct Good {};
+    struct Bad {};
+
+    void g1() noexcept;
+    void g2();
+
+    template<typename T>
+    Bad
+    f(T*, T*);
+
+    template<typename T1, typename T2>
+    Good
+    f(T1*, T2*);
+
+    static_assert (std::is_same_v<Good, decltype(f(g1, g2))>);
+
+  }
+
+  namespace test_inline_variables
+  {
+
+    template<class T> void f(T)
+    {}
+
+    template<class T> inline T g(T)
+    {
+      return T{};
+    }
+
+    template<> inline void f<>(int)
+    {}
+
+    template<> int g<>(int)
+    {
+      return 5;
+    }
+
+  }
+
+}  // namespace cxx17
+
+#endif  // __cplusplus < 201703L
+
+]])
diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index da0441a7..f10970ac 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -39,6 +39,20 @@ void	operator delete(void *ptr, std::size_t size) noexcept;
 void	operator delete[](void *ptr, std::size_t size) noexcept;
 #endif
 
+#if __cpp_aligned_new >= 201606
+/* C++17's over-aligned operators. */
+void	*operator new(std::size_t size, std::align_val_t);
+void	*operator new(std::size_t size, std::align_val_t, const std::nothrow_t &) noexcept;
+void	*operator new[](std::size_t size, std::align_val_t);
+void	*operator new[](std::size_t size, std::align_val_t, const std::nothrow_t &) noexcept;
+void	operator delete(void* ptr, std::align_val_t) noexcept;
+void	operator delete(void* ptr, std::align_val_t, const std::nothrow_t &) noexcept;
+void	operator delete(void* ptr, std::size_t size, std::align_val_t al) noexcept;
+void	operator delete[](void* ptr, std::align_val_t) noexcept;
+void	operator delete[](void* ptr, std::align_val_t, const std::nothrow_t &) noexcept;
+void	operator delete[](void* ptr, std::size_t size, std::align_val_t al) noexcept;
+#endif
+
 JEMALLOC_NOINLINE
 static void *
 handleOOM(std::size_t size, bool nothrow) {
@@ -76,12 +90,46 @@ JEMALLOC_ALWAYS_INLINE
 void *
 newImpl(std::size_t size) noexcept(IsNoExcept) {
 	void *ptr = je_malloc(size);
-	if (likely(ptr != nullptr))
+	if (likely(ptr != nullptr)) {
 		return ptr;
+	}
 
 	return handleOOM(size, IsNoExcept);
 }
 
+template <bool IsNoExcept>
+JEMALLOC_ALWAYS_INLINE
+void *
+alignedNewImpl(std::size_t size, std::align_val_t alignment) noexcept(IsNoExcept) {
+	void *ptr = je_aligned_alloc(static_cast<std::size_t>(alignment), size);
+	if (likely(ptr != nullptr)) {
+		return ptr;
+	}
+
+	return handleOOM(size, IsNoExcept);
+}
+
+JEMALLOC_ALWAYS_INLINE
+void
+sizedDeleteImpl(void* ptr, std::size_t size) noexcept {
+	if (unlikely(ptr == nullptr)) {
+		return;
+	}
+	je_sdallocx_noflags(ptr, size);
+}
+
+JEMALLOC_ALWAYS_INLINE
+void
+alignedSizedDeleteImpl(void* ptr, std::size_t size, std::align_val_t alignment) noexcept {
+	if (config_debug) {
+		assert(((size_t)alignment & ((size_t)alignment - 1)) == 0);
+	}
+	if (unlikely(ptr == nullptr)) {
+		return;
+	}
+	je_sdallocx(ptr, size, MALLOCX_ALIGN(alignment));
+}
+
 void *
 operator new(std::size_t size) {
 	return newImpl<false>(size);
@@ -102,6 +150,30 @@ operator new[](std::size_t size, const std::nothrow_t &) noexcept {
 	return newImpl<true>(size);
 }
 
+#if __cpp_aligned_new >= 201606
+
+void *
+operator new(std::size_t size, std::align_val_t alignment) {
+	return alignedNewImpl<false>(size, alignment);
+}
+
+void *
+operator new(std::size_t size, std::align_val_t alignment, const std::nothrow_t &) noexcept {
+	return alignedNewImpl<true>(size, alignment);
+}
+
+void *
+operator new[](std::size_t size, std::align_val_t alignment) {
+	return alignedNewImpl<false>(size, alignment);
+}
+
+void *
+operator new[](std::size_t size, std::align_val_t alignment, const std::nothrow_t &) noexcept {
+	return alignedNewImpl<true>(size, alignment);
+}
+
+#endif  // __cpp_aligned_new
+
 void
 operator delete(void *ptr) noexcept {
 	je_free(ptr);
@@ -125,17 +197,46 @@ void operator delete[](void *ptr, const std::nothrow_t &) noexcept {
 
 void
 operator delete(void *ptr, std::size_t size) noexcept {
-	if (unlikely(ptr == nullptr)) {
-		return;
-	}
-	je_sdallocx_noflags(ptr, size);
+	sizedDeleteImpl(ptr, size);
 }
 
-void operator delete[](void *ptr, std::size_t size) noexcept {
-	if (unlikely(ptr == nullptr)) {
-		return;
-	}
-	je_sdallocx_noflags(ptr, size);
+void
+operator delete[](void *ptr, std::size_t size) noexcept {
+	sizedDeleteImpl(ptr, size);
 }
 
 #endif  // __cpp_sized_deallocation
+
+#if __cpp_aligned_new >= 201606
+
+void
+operator delete(void* ptr, std::align_val_t) noexcept {
+	je_free(ptr);
+}
+
+void
+operator delete(void* ptr, std::align_val_t, const std::nothrow_t&) noexcept {
+	je_free(ptr);
+}
+
+void
+operator delete[](void* ptr, std::align_val_t) noexcept {
+	je_free(ptr);
+}
+
+void
+operator delete[](void* ptr, std::align_val_t, const std::nothrow_t&) noexcept {
+	je_free(ptr);
+}
+
+void
+operator delete(void* ptr, std::size_t size, std::align_val_t alignment) noexcept {
+	alignedSizedDeleteImpl(ptr, size, alignment);
+}
+
+void
+operator delete[](void* ptr, std::size_t size, std::align_val_t alignment) noexcept {
+	alignedSizedDeleteImpl(ptr, size, alignment);
+}
+
+#endif  // __cpp_aligned_new

From b55419f9b99ab416f035179593370401af8d213f Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 19 Nov 2019 16:24:57 -0800
Subject: [PATCH 1423/2608] Restructure profiling

Develop new data structure and code logic for holding profiling
related information stored in the extent that may be needed after the
extent is released, which in particular is the case for the
reallocation code path (e.g. in `rallocx()` and `xallocx()`).  The
data structure is a generalization of `prof_tctx_t`: we previously
only copy out the `prof_tctx` before the extent is released, but we
may be in need of additional fields. Currently the only additional
field is the allocation time field, but there may be more fields in
the future.

The restructuring also resolved a bug: `prof_realloc()` mistakenly
passed the new `ptr` to `prof_free_sampled_object()`, but passing in
the `old_ptr` would crash because it's already been released.  Now
the essential profiling information is collectively copied out early
and safely passed to `prof_free_sampled_object()` after the extent is
released.
---
 include/jemalloc/internal/arena_inlines_b.h | 44 +++++++++------------
 include/jemalloc/internal/extent.h          | 15 +++----
 include/jemalloc/internal/large_externs.h   |  5 +--
 include/jemalloc/internal/prof_externs.h    |  5 +--
 include/jemalloc/internal/prof_inlines_b.h  | 34 +++++++---------
 include/jemalloc/internal/prof_structs.h    |  7 ++++
 include/jemalloc/internal/prof_types.h      |  1 +
 src/jemalloc.c                              | 27 ++++++-------
 src/large.c                                 | 12 ++----
 src/prof.c                                  |  9 +++--
 src/prof_log.c                              |  5 ++-
 test/unit/prof_tctx.c                       | 10 ++---
 12 files changed, 82 insertions(+), 92 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 7ac2f942..dd743cea 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -40,23 +40,31 @@ arena_choose_maybe_huge(tsd_t *tsd, arena_t *arena, size_t size) {
 	return arena_choose(tsd, NULL);
 }
 
-JEMALLOC_ALWAYS_INLINE prof_tctx_t *
-arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) {
+JEMALLOC_ALWAYS_INLINE void
+arena_prof_info_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx,
+    prof_info_t *prof_info) {
 	cassert(config_prof);
 	assert(ptr != NULL);
+	assert(prof_info != NULL);
+
+	const extent_t *extent;
+	bool is_slab;
 
 	/* Static check. */
 	if (alloc_ctx == NULL) {
-		const extent_t *extent = iealloc(tsdn, ptr);
-		if (unlikely(!extent_slab_get(extent))) {
-			return large_prof_tctx_get(tsdn, extent);
-		}
-	} else {
-		if (unlikely(!alloc_ctx->slab)) {
-			return large_prof_tctx_get(tsdn, iealloc(tsdn, ptr));
-		}
+		extent = iealloc(tsdn, ptr);
+		is_slab = extent_slab_get(extent);
+	} else if (!unlikely(is_slab = alloc_ctx->slab)) {
+		extent = iealloc(tsdn, ptr);
+	}
+
+	if (unlikely(!is_slab)) {
+		/* extent must have been initialized at this point. */
+		large_prof_info_get(tsdn, extent, prof_info);
+	} else {
+		memset(prof_info, 0, sizeof(prof_info_t));
+		prof_info->prof_tctx = (prof_tctx_t *)(uintptr_t)1U;
 	}
-	return (prof_tctx_t *)(uintptr_t)1U;
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -89,20 +97,6 @@ arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) {
 	large_prof_tctx_reset(tsdn, extent);
 }
 
-JEMALLOC_ALWAYS_INLINE nstime_t
-arena_prof_alloc_time_get(tsdn_t *tsdn, const void *ptr) {
-	cassert(config_prof);
-	assert(ptr != NULL);
-
-	extent_t *extent = iealloc(tsdn, ptr);
-	/*
-	 * Unlike arena_prof_prof_tctx_{get, set}, we only call this once we're
-	 * sure we have a sampled allocation.
-	 */
-	assert(!extent_slab_get(extent));
-	return large_prof_alloc_time_get(extent);
-}
-
 JEMALLOC_ALWAYS_INLINE void
 arena_prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, nstime_t t) {
 	cassert(config_prof);
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 92c34aec..c47beafd 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -333,15 +333,12 @@ extent_slab_data_get_const(const extent_t *extent) {
 	return &extent->e_slab_data;
 }
 
-static inline prof_tctx_t *
-extent_prof_tctx_get(const extent_t *extent) {
-	return (prof_tctx_t *)atomic_load_p(&extent->e_prof_tctx,
-	    ATOMIC_ACQUIRE);
-}
-
-static inline nstime_t
-extent_prof_alloc_time_get(const extent_t *extent) {
-	return extent->e_alloc_time;
+static inline void
+extent_prof_info_get(const extent_t *extent, prof_info_t *prof_info) {
+	assert(prof_info != NULL);
+	prof_info->prof_tctx = (prof_tctx_t *)atomic_load_p(
+	    &extent->e_prof_tctx, ATOMIC_ACQUIRE);
+	prof_info->alloc_time = extent->e_alloc_time;
 }
 
 static inline void
diff --git a/include/jemalloc/internal/large_externs.h b/include/jemalloc/internal/large_externs.h
index a05019e8..9a1ff160 100644
--- a/include/jemalloc/internal/large_externs.h
+++ b/include/jemalloc/internal/large_externs.h
@@ -22,11 +22,10 @@ void large_dalloc_prep_junked_locked(tsdn_t *tsdn, extent_t *extent);
 void large_dalloc_finish(tsdn_t *tsdn, extent_t *extent);
 void large_dalloc(tsdn_t *tsdn, extent_t *extent);
 size_t large_salloc(tsdn_t *tsdn, const extent_t *extent);
-prof_tctx_t *large_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent);
+void large_prof_info_get(tsdn_t *tsdn, const extent_t *extent,
+    prof_info_t *prof_info);
 void large_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, prof_tctx_t *tctx);
 void large_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent);
-
-nstime_t large_prof_alloc_time_get(const extent_t *extent);
 void large_prof_alloc_time_set(extent_t *extent, nstime_t time);
 
 #endif /* JEMALLOC_INTERNAL_LARGE_EXTERNS_H */
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index fd18ac48..47e47ba6 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -51,8 +51,7 @@ void prof_idump_rollback_impl(tsdn_t *tsdn, size_t usize);
 void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
 void prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
     prof_tctx_t *tctx);
-void prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize,
-    prof_tctx_t *tctx);
+void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info);
 void bt_init(prof_bt_t *bt, void **vec);
 void prof_backtrace(tsd_t *tsd, prof_bt_t *bt);
 prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt);
@@ -102,7 +101,7 @@ void prof_postfork_parent(tsdn_t *tsdn);
 void prof_postfork_child(tsdn_t *tsdn);
 void prof_sample_threshold_update(tsd_t *tsd);
 
-void prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx);
+void prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info);
 bool prof_log_start(tsdn_t *tsdn, const char *filename);
 bool prof_log_stop(tsdn_t *tsdn);
 bool prof_log_init(tsd_t *tsdn);
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 388537e6..5acb4ca1 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -39,12 +39,14 @@ prof_tdata_get(tsd_t *tsd, bool create) {
 	return tdata;
 }
 
-JEMALLOC_ALWAYS_INLINE prof_tctx_t *
-prof_tctx_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) {
+JEMALLOC_ALWAYS_INLINE void
+prof_info_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx,
+    prof_info_t *prof_info) {
 	cassert(config_prof);
 	assert(ptr != NULL);
+	assert(prof_info != NULL);
 
-	return arena_prof_tctx_get(tsdn, ptr, alloc_ctx);
+	arena_prof_info_get(tsdn, ptr, alloc_ctx, prof_info);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -64,14 +66,6 @@ prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) {
 	arena_prof_tctx_reset(tsdn, ptr, tctx);
 }
 
-JEMALLOC_ALWAYS_INLINE nstime_t
-prof_alloc_time_get(tsdn_t *tsdn, const void *ptr) {
-	cassert(config_prof);
-	assert(ptr != NULL);
-
-	return arena_prof_alloc_time_get(tsdn, ptr);
-}
-
 JEMALLOC_ALWAYS_INLINE void
 prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, nstime_t t) {
 	cassert(config_prof);
@@ -152,7 +146,7 @@ prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx,
 JEMALLOC_ALWAYS_INLINE void
 prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
     bool prof_active, bool updated, const void *old_ptr, size_t old_usize,
-    prof_tctx_t *old_tctx) {
+    prof_info_t *old_prof_info) {
 	bool sampled, old_sampled, moved;
 
 	cassert(config_prof);
@@ -174,7 +168,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 	}
 
 	sampled = ((uintptr_t)tctx > (uintptr_t)1U);
-	old_sampled = ((uintptr_t)old_tctx > (uintptr_t)1U);
+	old_sampled = ((uintptr_t)old_prof_info->prof_tctx > (uintptr_t)1U);
 	moved = (ptr != old_ptr);
 
 	if (unlikely(sampled)) {
@@ -191,8 +185,9 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 		 */
 		prof_tctx_reset(tsd_tsdn(tsd), ptr, tctx);
 	} else {
-		assert((uintptr_t)prof_tctx_get(tsd_tsdn(tsd), ptr, NULL) ==
-		    (uintptr_t)1U);
+		prof_info_t prof_info;
+		prof_info_get(tsd_tsdn(tsd), ptr, NULL, &prof_info);
+		assert((uintptr_t)prof_info.prof_tctx == (uintptr_t)1U);
 	}
 
 	/*
@@ -203,19 +198,20 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 	 * counters.
 	 */
 	if (unlikely(old_sampled)) {
-		prof_free_sampled_object(tsd, ptr, old_usize, old_tctx);
+		prof_free_sampled_object(tsd, old_usize, old_prof_info);
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
 prof_free(tsd_t *tsd, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx) {
-	prof_tctx_t *tctx = prof_tctx_get(tsd_tsdn(tsd), ptr, alloc_ctx);
+	prof_info_t prof_info;
+	prof_info_get(tsd_tsdn(tsd), ptr, alloc_ctx, &prof_info);
 
 	cassert(config_prof);
 	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
 
-	if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) {
-		prof_free_sampled_object(tsd, ptr, usize, tctx);
+	if (unlikely((uintptr_t)prof_info.prof_tctx > (uintptr_t)1U)) {
+		prof_free_sampled_object(tsd, usize, &prof_info);
 	}
 }
 
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index 9a00a189..17a56508 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -96,6 +96,13 @@ struct prof_tctx_s {
 };
 typedef rb_tree(prof_tctx_t) prof_tctx_tree_t;
 
+struct prof_info_s {
+	/* Points to the prof_tctx_t corresponding to the allocation. */
+	prof_tctx_t		*prof_tctx;
+	/* Time when the allocation was made. */
+	nstime_t		alloc_time;
+};
+
 struct prof_gctx_s {
 	/* Protects nlimbo, cnt_summed, and tctxs. */
 	malloc_mutex_t		*lock;
diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h
index a50653bb..7a34385b 100644
--- a/include/jemalloc/internal/prof_types.h
+++ b/include/jemalloc/internal/prof_types.h
@@ -5,6 +5,7 @@ typedef struct prof_bt_s prof_bt_t;
 typedef struct prof_accum_s prof_accum_t;
 typedef struct prof_cnt_s prof_cnt_t;
 typedef struct prof_tctx_s prof_tctx_t;
+typedef struct prof_info_s prof_info_t;
 typedef struct prof_gctx_s prof_gctx_t;
 typedef struct prof_tdata_s prof_tdata_t;
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index e8ac2fc9..17709923 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3009,13 +3009,11 @@ JEMALLOC_ALWAYS_INLINE void *
 irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
     size_t alignment, size_t *usize, bool zero, tcache_t *tcache,
     arena_t *arena, alloc_ctx_t *alloc_ctx, hook_ralloc_args_t *hook_args) {
+	prof_info_t old_prof_info;
+	prof_info_get(tsd_tsdn(tsd), old_ptr, alloc_ctx, &old_prof_info);
+	bool prof_active = prof_active_get_unlocked();
+	prof_tctx_t *tctx = prof_alloc_prep(tsd, *usize, prof_active, false);
 	void *p;
-	bool prof_active;
-	prof_tctx_t *old_tctx, *tctx;
-
-	prof_active = prof_active_get_unlocked();
-	old_tctx = prof_tctx_get(tsd_tsdn(tsd), old_ptr, alloc_ctx);
-	tctx = prof_alloc_prep(tsd, *usize, prof_active, false);
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
 		p = irallocx_prof_sample(tsd_tsdn(tsd), old_ptr, old_usize,
 		    *usize, alignment, zero, tcache, arena, tctx, hook_args);
@@ -3040,7 +3038,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 		*usize = isalloc(tsd_tsdn(tsd), p);
 	}
 	prof_realloc(tsd, p, *usize, tctx, prof_active, false, old_ptr,
-	    old_usize, old_tctx);
+	    old_usize, &old_prof_info);
 
 	return p;
 }
@@ -3262,18 +3260,15 @@ ixallocx_prof_sample(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
 JEMALLOC_ALWAYS_INLINE size_t
 ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
     size_t extra, size_t alignment, bool zero, alloc_ctx_t *alloc_ctx) {
-	size_t usize_max, usize;
-	bool prof_active;
-	prof_tctx_t *old_tctx, *tctx;
-
-	prof_active = prof_active_get_unlocked();
-	old_tctx = prof_tctx_get(tsd_tsdn(tsd), ptr, alloc_ctx);
+	prof_info_t old_prof_info;
+	prof_info_get(tsd_tsdn(tsd), ptr, alloc_ctx, &old_prof_info);
 	/*
 	 * usize isn't knowable before ixalloc() returns when extra is non-zero.
 	 * Therefore, compute its maximum possible value and use that in
 	 * prof_alloc_prep() to decide whether to capture a backtrace.
 	 * prof_realloc() will use the actual usize to decide whether to sample.
 	 */
+	size_t usize_max;
 	if (alignment == 0) {
 		usize_max = sz_s2u(size+extra);
 		assert(usize_max > 0
@@ -3292,8 +3287,10 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 		}
 	}
 	thread_event(tsd, usize_max);
-	tctx = prof_alloc_prep(tsd, usize_max, prof_active, false);
+	bool prof_active = prof_active_get_unlocked();
+	prof_tctx_t *tctx = prof_alloc_prep(tsd, usize_max, prof_active, false);
 
+	size_t usize;
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
 		usize = ixallocx_prof_sample(tsd_tsdn(tsd), ptr, old_usize,
 		    size, extra, alignment, zero, tctx);
@@ -3318,7 +3315,7 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 		return usize;
 	}
 	prof_realloc(tsd, ptr, usize, tctx, prof_active, false, ptr, old_usize,
-	    old_tctx);
+	    &old_prof_info);
 
 	return usize;
 }
diff --git a/src/large.c b/src/large.c
index 8aaa3ce2..6eeb7f49 100644
--- a/src/large.c
+++ b/src/large.c
@@ -367,9 +367,10 @@ large_salloc(tsdn_t *tsdn, const extent_t *extent) {
 	return extent_usize_get(extent);
 }
 
-prof_tctx_t *
-large_prof_tctx_get(tsdn_t *tsdn, const extent_t *extent) {
-	return extent_prof_tctx_get(extent);
+void
+large_prof_info_get(tsdn_t *tsdn, const extent_t *extent,
+    prof_info_t *prof_info) {
+	extent_prof_info_get(extent, prof_info);
 }
 
 void
@@ -382,11 +383,6 @@ large_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent) {
 	large_prof_tctx_set(tsdn, extent, (prof_tctx_t *)(uintptr_t)1U);
 }
 
-nstime_t
-large_prof_alloc_time_get(const extent_t *extent) {
-	return extent_prof_alloc_time_get(extent);
-}
-
 void
 large_prof_alloc_time_set(extent_t *extent, nstime_t t) {
 	extent_prof_alloc_time_set(extent, t);
diff --git a/src/prof.c b/src/prof.c
index 0590482c..ccac3c0f 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -187,8 +187,11 @@ prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
 }
 
 void
-prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize,
-    prof_tctx_t *tctx) {
+prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
+	assert(prof_info != NULL);
+	prof_tctx_t *tctx = prof_info->prof_tctx;
+	assert((uintptr_t)tctx > (uintptr_t)1U);
+
 	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
 
 	assert(tctx->cnts.curobjs > 0);
@@ -196,7 +199,7 @@ prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize,
 	tctx->cnts.curobjs--;
 	tctx->cnts.curbytes -= usize;
 
-	prof_try_log(tsd, ptr, usize, tctx);
+	prof_try_log(tsd, usize, prof_info);
 
 	if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx)) {
 		prof_tctx_destroy(tsd, tctx);
diff --git a/src/prof_log.c b/src/prof_log.c
index 73ca7417..5747c8db 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -199,7 +199,8 @@ prof_log_thr_index(tsd_t *tsd, uint64_t thr_uid, const char *name) {
 }
 
 void
-prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx) {
+prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
+	prof_tctx_t *tctx = prof_info->prof_tctx;
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
 
 	prof_tdata_t *cons_tdata = prof_tdata_get(tsd, false);
@@ -229,7 +230,7 @@ prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx) {
 		log_tables_initialized = true;
 	}
 
-	nstime_t alloc_time = prof_alloc_time_get(tsd_tsdn(tsd), ptr);
+	nstime_t alloc_time = prof_info->alloc_time;
 	nstime_t free_time = NSTIME_ZERO_INITIALIZER;
 	nstime_update(&free_time);
 
diff --git a/test/unit/prof_tctx.c b/test/unit/prof_tctx.c
index ff3b2b0c..30df71b2 100644
--- a/test/unit/prof_tctx.c
+++ b/test/unit/prof_tctx.c
@@ -4,7 +4,7 @@ TEST_BEGIN(test_prof_realloc) {
 	tsdn_t *tsdn;
 	int flags;
 	void *p, *q;
-	prof_tctx_t *tctx_p, *tctx_q;
+	prof_info_t prof_info_p, prof_info_q;
 	uint64_t curobjs_0, curobjs_1, curobjs_2, curobjs_3;
 
 	test_skip_if(!config_prof);
@@ -15,8 +15,8 @@ TEST_BEGIN(test_prof_realloc) {
 	prof_cnt_all(&curobjs_0, NULL, NULL, NULL);
 	p = mallocx(1024, flags);
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
-	tctx_p = prof_tctx_get(tsdn, p, NULL);
-	assert_ptr_ne(tctx_p, (prof_tctx_t *)(uintptr_t)1U,
+	prof_info_get(tsdn, p, NULL, &prof_info_p);
+	assert_ptr_ne(prof_info_p.prof_tctx, (prof_tctx_t *)(uintptr_t)1U,
 	    "Expected valid tctx");
 	prof_cnt_all(&curobjs_1, NULL, NULL, NULL);
 	assert_u64_eq(curobjs_0 + 1, curobjs_1,
@@ -25,8 +25,8 @@ TEST_BEGIN(test_prof_realloc) {
 	q = rallocx(p, 2048, flags);
 	assert_ptr_ne(p, q, "Expected move");
 	assert_ptr_not_null(p, "Unexpected rmallocx() failure");
-	tctx_q = prof_tctx_get(tsdn, q, NULL);
-	assert_ptr_ne(tctx_q, (prof_tctx_t *)(uintptr_t)1U,
+	prof_info_get(tsdn, q, NULL, &prof_info_q);
+	assert_ptr_ne(prof_info_q.prof_tctx, (prof_tctx_t *)(uintptr_t)1U,
 	    "Expected valid tctx");
 	prof_cnt_all(&curobjs_2, NULL, NULL, NULL);
 	assert_u64_eq(curobjs_1, curobjs_2,

From 694537177851b52851b89bf59f1692d2b9e348aa Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 22 Nov 2019 11:42:01 -0800
Subject: [PATCH 1424/2608] Change tsdn to tsd for profiling code path

---
 include/jemalloc/internal/arena_inlines_b.h | 26 ++++++++--------
 include/jemalloc/internal/large_externs.h   |  7 ++---
 include/jemalloc/internal/prof_externs.h    |  2 +-
 include/jemalloc/internal/prof_inlines_b.h  | 34 ++++++++++-----------
 src/jemalloc.c                              |  6 ++--
 src/large.c                                 |  9 +++---
 src/prof.c                                  | 10 +++---
 test/unit/prof_tctx.c                       |  8 ++---
 8 files changed, 50 insertions(+), 52 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index dd743cea..6ec1a123 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -41,7 +41,7 @@ arena_choose_maybe_huge(tsd_t *tsd, arena_t *arena, size_t size) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_prof_info_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx,
+arena_prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
     prof_info_t *prof_info) {
 	cassert(config_prof);
 	assert(ptr != NULL);
@@ -52,15 +52,15 @@ arena_prof_info_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx,
 
 	/* Static check. */
 	if (alloc_ctx == NULL) {
-		extent = iealloc(tsdn, ptr);
+		extent = iealloc(tsd_tsdn(tsd), ptr);
 		is_slab = extent_slab_get(extent);
 	} else if (!unlikely(is_slab = alloc_ctx->slab)) {
-		extent = iealloc(tsdn, ptr);
+		extent = iealloc(tsd_tsdn(tsd), ptr);
 	}
 
 	if (unlikely(!is_slab)) {
 		/* extent must have been initialized at this point. */
-		large_prof_info_get(tsdn, extent, prof_info);
+		large_prof_info_get(extent, prof_info);
 	} else {
 		memset(prof_info, 0, sizeof(prof_info_t));
 		prof_info->prof_tctx = (prof_tctx_t *)(uintptr_t)1U;
@@ -68,41 +68,41 @@ arena_prof_info_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
+arena_prof_tctx_set(tsd_t *tsd, const void *ptr, size_t usize,
     alloc_ctx_t *alloc_ctx, prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
 	/* Static check. */
 	if (alloc_ctx == NULL) {
-		extent_t *extent = iealloc(tsdn, ptr);
+		extent_t *extent = iealloc(tsd_tsdn(tsd), ptr);
 		if (unlikely(!extent_slab_get(extent))) {
-			large_prof_tctx_set(tsdn, extent, tctx);
+			large_prof_tctx_set(extent, tctx);
 		}
 	} else {
 		if (unlikely(!alloc_ctx->slab)) {
-			large_prof_tctx_set(tsdn, iealloc(tsdn, ptr), tctx);
+			large_prof_tctx_set(iealloc(tsd_tsdn(tsd), ptr), tctx);
 		}
 	}
 }
 
 static inline void
-arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) {
+arena_prof_tctx_reset(tsd_t *tsd, const void *ptr, prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	extent_t *extent = iealloc(tsdn, ptr);
+	extent_t *extent = iealloc(tsd_tsdn(tsd), ptr);
 	assert(!extent_slab_get(extent));
 
-	large_prof_tctx_reset(tsdn, extent);
+	large_prof_tctx_reset(extent);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, nstime_t t) {
+arena_prof_alloc_time_set(tsd_t *tsd, const void *ptr, nstime_t t) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	extent_t *extent = iealloc(tsdn, ptr);
+	extent_t *extent = iealloc(tsd_tsdn(tsd), ptr);
 	assert(!extent_slab_get(extent));
 	large_prof_alloc_time_set(extent, t);
 }
diff --git a/include/jemalloc/internal/large_externs.h b/include/jemalloc/internal/large_externs.h
index 9a1ff160..85786bb2 100644
--- a/include/jemalloc/internal/large_externs.h
+++ b/include/jemalloc/internal/large_externs.h
@@ -22,10 +22,9 @@ void large_dalloc_prep_junked_locked(tsdn_t *tsdn, extent_t *extent);
 void large_dalloc_finish(tsdn_t *tsdn, extent_t *extent);
 void large_dalloc(tsdn_t *tsdn, extent_t *extent);
 size_t large_salloc(tsdn_t *tsdn, const extent_t *extent);
-void large_prof_info_get(tsdn_t *tsdn, const extent_t *extent,
-    prof_info_t *prof_info);
-void large_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, prof_tctx_t *tctx);
-void large_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent);
+void large_prof_info_get(const extent_t *extent, prof_info_t *prof_info);
+void large_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx);
+void large_prof_tctx_reset(extent_t *extent);
 void large_prof_alloc_time_set(extent_t *extent, nstime_t time);
 
 #endif /* JEMALLOC_INTERNAL_LARGE_EXTERNS_H */
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 47e47ba6..6e020be1 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -49,7 +49,7 @@ bool prof_idump_accum_impl(tsdn_t *tsdn, uint64_t accumbytes);
 void prof_idump_rollback_impl(tsdn_t *tsdn, size_t usize);
 
 void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
-void prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
+void prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
     prof_tctx_t *tctx);
 void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info);
 void bt_init(prof_bt_t *bt, void **vec);
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 5acb4ca1..827476d1 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -40,38 +40,38 @@ prof_tdata_get(tsd_t *tsd, bool create) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_info_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx,
+prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
     prof_info_t *prof_info) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 	assert(prof_info != NULL);
 
-	arena_prof_info_get(tsdn, ptr, alloc_ctx, prof_info);
+	arena_prof_info_get(tsd, ptr, alloc_ctx, prof_info);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize,
+prof_tctx_set(tsd_t *tsd, const void *ptr, size_t usize,
     alloc_ctx_t *alloc_ctx, prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	arena_prof_tctx_set(tsdn, ptr, usize, alloc_ctx, tctx);
+	arena_prof_tctx_set(tsd, ptr, usize, alloc_ctx, tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) {
+prof_tctx_reset(tsd_t *tsd, const void *ptr, prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	arena_prof_tctx_reset(tsdn, ptr, tctx);
+	arena_prof_tctx_reset(tsd, ptr, tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_alloc_time_set(tsdn_t *tsdn, const void *ptr, nstime_t t) {
+prof_alloc_time_set(tsd_t *tsd, const void *ptr, nstime_t t) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	arena_prof_alloc_time_set(tsdn, ptr, t);
+	arena_prof_alloc_time_set(tsd, ptr, t);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -129,16 +129,16 @@ prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_malloc(tsdn_t *tsdn, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx,
+prof_malloc(tsd_t *tsd, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx,
     prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
-	assert(usize == isalloc(tsdn, ptr));
+	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
 
 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) {
-		prof_malloc_sample_object(tsdn, ptr, usize, tctx);
+		prof_malloc_sample_object(tsd, ptr, usize, tctx);
 	} else {
-		prof_tctx_set(tsdn, ptr, usize, alloc_ctx,
+		prof_tctx_set(tsd, ptr, usize, alloc_ctx,
 		    (prof_tctx_t *)(uintptr_t)1U);
 	}
 }
@@ -172,9 +172,9 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 	moved = (ptr != old_ptr);
 
 	if (unlikely(sampled)) {
-		prof_malloc_sample_object(tsd_tsdn(tsd), ptr, usize, tctx);
+		prof_malloc_sample_object(tsd, ptr, usize, tctx);
 	} else if (moved) {
-		prof_tctx_set(tsd_tsdn(tsd), ptr, usize, NULL,
+		prof_tctx_set(tsd, ptr, usize, NULL,
 		    (prof_tctx_t *)(uintptr_t)1U);
 	} else if (unlikely(old_sampled)) {
 		/*
@@ -183,10 +183,10 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 		 * to do here in the presence of explicit knowledge re: moved
 		 * state.
 		 */
-		prof_tctx_reset(tsd_tsdn(tsd), ptr, tctx);
+		prof_tctx_reset(tsd, ptr, tctx);
 	} else {
 		prof_info_t prof_info;
-		prof_info_get(tsd_tsdn(tsd), ptr, NULL, &prof_info);
+		prof_info_get(tsd, ptr, NULL, &prof_info);
 		assert((uintptr_t)prof_info.prof_tctx == (uintptr_t)1U);
 	}
 
@@ -205,7 +205,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 JEMALLOC_ALWAYS_INLINE void
 prof_free(tsd_t *tsd, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx) {
 	prof_info_t prof_info;
-	prof_info_get(tsd_tsdn(tsd), ptr, alloc_ctx, &prof_info);
+	prof_info_get(tsd, ptr, alloc_ctx, &prof_info);
 
 	cassert(config_prof);
 	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 17709923..13bf8d7e 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2171,7 +2171,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 			prof_alloc_rollback(tsd, tctx, true);
 			goto label_oom;
 		}
-		prof_malloc(tsd_tsdn(tsd), allocation, usize, &alloc_ctx, tctx);
+		prof_malloc(tsd, allocation, usize, &alloc_ctx, tctx);
 	} else {
 		assert(!opt_prof);
 		allocation = imalloc_no_sample(sopts, dopts, tsd, size, usize,
@@ -3010,7 +3010,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
     size_t alignment, size_t *usize, bool zero, tcache_t *tcache,
     arena_t *arena, alloc_ctx_t *alloc_ctx, hook_ralloc_args_t *hook_args) {
 	prof_info_t old_prof_info;
-	prof_info_get(tsd_tsdn(tsd), old_ptr, alloc_ctx, &old_prof_info);
+	prof_info_get(tsd, old_ptr, alloc_ctx, &old_prof_info);
 	bool prof_active = prof_active_get_unlocked();
 	prof_tctx_t *tctx = prof_alloc_prep(tsd, *usize, prof_active, false);
 	void *p;
@@ -3261,7 +3261,7 @@ JEMALLOC_ALWAYS_INLINE size_t
 ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
     size_t extra, size_t alignment, bool zero, alloc_ctx_t *alloc_ctx) {
 	prof_info_t old_prof_info;
-	prof_info_get(tsd_tsdn(tsd), ptr, alloc_ctx, &old_prof_info);
+	prof_info_get(tsd, ptr, alloc_ctx, &old_prof_info);
 	/*
 	 * usize isn't knowable before ixalloc() returns when extra is non-zero.
 	 * Therefore, compute its maximum possible value and use that in
diff --git a/src/large.c b/src/large.c
index 6eeb7f49..4d1257f6 100644
--- a/src/large.c
+++ b/src/large.c
@@ -368,19 +368,18 @@ large_salloc(tsdn_t *tsdn, const extent_t *extent) {
 }
 
 void
-large_prof_info_get(tsdn_t *tsdn, const extent_t *extent,
-    prof_info_t *prof_info) {
+large_prof_info_get(const extent_t *extent, prof_info_t *prof_info) {
 	extent_prof_info_get(extent, prof_info);
 }
 
 void
-large_prof_tctx_set(tsdn_t *tsdn, extent_t *extent, prof_tctx_t *tctx) {
+large_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx) {
 	extent_prof_tctx_set(extent, tctx);
 }
 
 void
-large_prof_tctx_reset(tsdn_t *tsdn, extent_t *extent) {
-	large_prof_tctx_set(tsdn, extent, (prof_tctx_t *)(uintptr_t)1U);
+large_prof_tctx_reset(extent_t *extent) {
+	large_prof_tctx_set(extent, (prof_tctx_t *)(uintptr_t)1U);
 }
 
 void
diff --git a/src/prof.c b/src/prof.c
index ccac3c0f..36945bdb 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -165,17 +165,17 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
 }
 
 void
-prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
+prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
     prof_tctx_t *tctx) {
-	prof_tctx_set(tsdn, ptr, usize, NULL, tctx);
+	prof_tctx_set(tsd, ptr, usize, NULL, tctx);
 
 	/* Get the current time and set this in the extent_t. We'll read this
 	 * when free() is called. */
 	nstime_t t = NSTIME_ZERO_INITIALIZER;
 	nstime_update(&t);
-	prof_alloc_time_set(tsdn, ptr, t);
+	prof_alloc_time_set(tsd, ptr, t);
 
-	malloc_mutex_lock(tsdn, tctx->tdata->lock);
+	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
 	tctx->cnts.curobjs++;
 	tctx->cnts.curbytes += usize;
 	if (opt_prof_accum) {
@@ -183,7 +183,7 @@ prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
 		tctx->cnts.accumbytes += usize;
 	}
 	tctx->prepared = false;
-	malloc_mutex_unlock(tsdn, tctx->tdata->lock);
+	malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
 }
 
 void
diff --git a/test/unit/prof_tctx.c b/test/unit/prof_tctx.c
index 30df71b2..4e775452 100644
--- a/test/unit/prof_tctx.c
+++ b/test/unit/prof_tctx.c
@@ -1,7 +1,7 @@
 #include "test/jemalloc_test.h"
 
 TEST_BEGIN(test_prof_realloc) {
-	tsdn_t *tsdn;
+	tsd_t *tsd;
 	int flags;
 	void *p, *q;
 	prof_info_t prof_info_p, prof_info_q;
@@ -9,13 +9,13 @@ TEST_BEGIN(test_prof_realloc) {
 
 	test_skip_if(!config_prof);
 
-	tsdn = tsdn_fetch();
+	tsd = tsd_fetch();
 	flags = MALLOCX_TCACHE_NONE;
 
 	prof_cnt_all(&curobjs_0, NULL, NULL, NULL);
 	p = mallocx(1024, flags);
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
-	prof_info_get(tsdn, p, NULL, &prof_info_p);
+	prof_info_get(tsd, p, NULL, &prof_info_p);
 	assert_ptr_ne(prof_info_p.prof_tctx, (prof_tctx_t *)(uintptr_t)1U,
 	    "Expected valid tctx");
 	prof_cnt_all(&curobjs_1, NULL, NULL, NULL);
@@ -25,7 +25,7 @@ TEST_BEGIN(test_prof_realloc) {
 	q = rallocx(p, 2048, flags);
 	assert_ptr_ne(p, q, "Expected move");
 	assert_ptr_not_null(p, "Unexpected rmallocx() failure");
-	prof_info_get(tsdn, q, NULL, &prof_info_q);
+	prof_info_get(tsd, q, NULL, &prof_info_q);
 	assert_ptr_ne(prof_info_q.prof_tctx, (prof_tctx_t *)(uintptr_t)1U,
 	    "Expected valid tctx");
 	prof_cnt_all(&curobjs_2, NULL, NULL, NULL);

From 5c47a3022775080866fd37d74c0143d7ffec3915 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 25 Nov 2019 15:27:52 -0800
Subject: [PATCH 1425/2608] Guard C++ aligned APIs

---
 src/jemalloc_cpp.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index f10970ac..c2110a11 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -97,6 +97,7 @@ newImpl(std::size_t size) noexcept(IsNoExcept) {
 	return handleOOM(size, IsNoExcept);
 }
 
+#if __cpp_aligned_new >= 201606
 template <bool IsNoExcept>
 JEMALLOC_ALWAYS_INLINE
 void *
@@ -108,6 +109,7 @@ alignedNewImpl(std::size_t size, std::align_val_t alignment) noexcept(IsNoExcept
 
 	return handleOOM(size, IsNoExcept);
 }
+#endif  // __cpp_aligned_new
 
 JEMALLOC_ALWAYS_INLINE
 void
@@ -118,6 +120,7 @@ sizedDeleteImpl(void* ptr, std::size_t size) noexcept {
 	je_sdallocx_noflags(ptr, size);
 }
 
+#if __cpp_aligned_new >= 201606
 JEMALLOC_ALWAYS_INLINE
 void
 alignedSizedDeleteImpl(void* ptr, std::size_t size, std::align_val_t alignment) noexcept {
@@ -129,6 +132,7 @@ alignedSizedDeleteImpl(void* ptr, std::size_t size, std::align_val_t alignment)
 	}
 	je_sdallocx(ptr, size, MALLOCX_ALIGN(alignment));
 }
+#endif  // __cpp_aligned_new
 
 void *
 operator new(std::size_t size) {

From a70909b130ab37a0e87627122f1f637f08173431 Mon Sep 17 00:00:00 2001
From: Li-Wen Hsu <lwhsu@lwhsu.org>
Date: Tue, 3 Dec 2019 02:18:27 +0800
Subject: [PATCH 1426/2608] Test on all supported release of FreeBSD

Keep 11.2 because 11.3 is temporarily not available for now.
---
 .cirrus.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.cirrus.yml b/.cirrus.yml
index 019d2c38..a9de9534 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -5,6 +5,7 @@ env:
 task:
   freebsd_instance:
     matrix:
+      image: freebsd-12-1-release-amd64
       image: freebsd-12-0-release-amd64
       image: freebsd-11-2-release-amd64
   install_script:

From 1b1e76acfe281e5b27a2ce0e28342cbc04c01b37 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 4 Dec 2019 10:16:44 -0800
Subject: [PATCH 1427/2608] Disable some spuriously-triggering warnings

---
 configure.ac                             |  5 +++++
 include/jemalloc/internal/tcache_types.h |  2 +-
 src/prof.c                               | 17 ++++++++++++++++-
 3 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index 5e56e16b..6ccd009a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -250,6 +250,11 @@ if test "x$GCC" = "xyes" ; then
   JE_CFLAGS_ADD([-Wsign-compare])
   JE_CFLAGS_ADD([-Wundef])
   JE_CFLAGS_ADD([-Wno-format-zero-length])
+  dnl This warning triggers on the use of the universal zero initializer, which
+  dnl is a very handy idiom for things like the tcache static initializer (which
+  dnl has lots of nested structs).  See the discussion at.
+  dnl https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119
+  JE_CFLAGS_ADD([-Wno-missing-braces])
   JE_CFLAGS_ADD([-pipe])
   JE_CFLAGS_ADD([-g3])
 elif test "x$je_cv_msvc" = "xyes" ; then
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index 9fd39263..c30a5339 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -51,7 +51,7 @@ typedef struct tcaches_s tcaches_t;
 #define TCACHE_GC_INCR_BYTES 65536U
 
 /* Used in TSD static initializer only. Real init in tsd_tcache_data_init(). */
-#define TCACHE_ZERO_INITIALIZER {{0}}
+#define TCACHE_ZERO_INITIALIZER {0}
 
 /* Used in TSD static initializer only. Will be initialized to opt_tcache. */
 #define TCACHE_ENABLED_ZERO_INITIALIZER false
diff --git a/src/prof.c b/src/prof.c
index 36945bdb..9c2357c8 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -113,6 +113,21 @@ bool			prof_booted = false;
 
 /******************************************************************************/
 
+/*
+ * If profiling is off, then PROF_DUMP_FILENAME_LEN is 1, so we'll end up
+ * calling strncpy with a size of 0, which triggers a -Wstringop-truncation
+ * warning (strncpy can never actually be called in this case, since we bail out
+ * much earlier when config_prof is false).  This function works around the
+ * warning to let us leave the warning on.
+ */
+static inline void
+prof_strncpy(char *UNUSED dest, const char *UNUSED src, size_t UNUSED size) {
+	cassert(config_prof);
+#ifdef JEMALLOC_PROF
+	strncpy(dest, src, size);
+#endif
+}
+
 static bool
 prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx) {
 	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
@@ -692,7 +707,7 @@ prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix) {
 	}
 	assert(prof_dump_prefix != NULL);
 
-	strncpy(prof_dump_prefix, prefix, PROF_DUMP_FILENAME_LEN - 1);
+	prof_strncpy(prof_dump_prefix, prefix, PROF_DUMP_FILENAME_LEN - 1);
 	prof_dump_prefix[PROF_DUMP_FILENAME_LEN - 1] = '\0';
 	malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
 

From 5e0b090992ba4399b65c177cd30d56cc69c96646 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 5 Dec 2019 15:15:36 -0800
Subject: [PATCH 1428/2608] No need to pass usize to prof_tctx_set()

---
 include/jemalloc/internal/arena_inlines_b.h |  4 ++--
 include/jemalloc/internal/prof_inlines_b.h  | 11 +++++------
 src/prof.c                                  |  2 +-
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 6ec1a123..fb25c8f8 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -68,8 +68,8 @@ arena_prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_prof_tctx_set(tsd_t *tsd, const void *ptr, size_t usize,
-    alloc_ctx_t *alloc_ctx, prof_tctx_t *tctx) {
+arena_prof_tctx_set(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
+    prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 827476d1..06689c8a 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -50,12 +50,12 @@ prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_tctx_set(tsd_t *tsd, const void *ptr, size_t usize,
-    alloc_ctx_t *alloc_ctx, prof_tctx_t *tctx) {
+prof_tctx_set(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
+    prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	arena_prof_tctx_set(tsd, ptr, usize, alloc_ctx, tctx);
+	arena_prof_tctx_set(tsd, ptr, alloc_ctx, tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -138,7 +138,7 @@ prof_malloc(tsd_t *tsd, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx,
 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) {
 		prof_malloc_sample_object(tsd, ptr, usize, tctx);
 	} else {
-		prof_tctx_set(tsd, ptr, usize, alloc_ctx,
+		prof_tctx_set(tsd, ptr, alloc_ctx,
 		    (prof_tctx_t *)(uintptr_t)1U);
 	}
 }
@@ -174,8 +174,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 	if (unlikely(sampled)) {
 		prof_malloc_sample_object(tsd, ptr, usize, tctx);
 	} else if (moved) {
-		prof_tctx_set(tsd, ptr, usize, NULL,
-		    (prof_tctx_t *)(uintptr_t)1U);
+		prof_tctx_set(tsd, ptr, NULL, (prof_tctx_t *)(uintptr_t)1U);
 	} else if (unlikely(old_sampled)) {
 		/*
 		 * prof_tctx_set() would work for the !moved case as well, but
diff --git a/src/prof.c b/src/prof.c
index 9c2357c8..d0c06a8a 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -182,7 +182,7 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
 void
 prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
     prof_tctx_t *tctx) {
-	prof_tctx_set(tsd, ptr, usize, NULL, tctx);
+	prof_tctx_set(tsd, ptr, NULL, tctx);
 
 	/* Get the current time and set this in the extent_t. We'll read this
 	 * when free() is called. */

From aa1d71fb7ab34ce96743753f08a761747b5449c8 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 5 Dec 2019 15:35:12 -0800
Subject: [PATCH 1429/2608] Rename prof_tctx to alloc_tctx in prof_info_t

---
 include/jemalloc/internal/arena_inlines_b.h | 2 +-
 include/jemalloc/internal/extent.h          | 2 +-
 include/jemalloc/internal/prof_inlines_b.h  | 6 +++---
 include/jemalloc/internal/prof_structs.h    | 2 +-
 src/prof.c                                  | 2 +-
 src/prof_log.c                              | 2 +-
 test/unit/prof_tctx.c                       | 4 ++--
 7 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index fb25c8f8..930daba0 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -63,7 +63,7 @@ arena_prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
 		large_prof_info_get(extent, prof_info);
 	} else {
 		memset(prof_info, 0, sizeof(prof_info_t));
-		prof_info->prof_tctx = (prof_tctx_t *)(uintptr_t)1U;
+		prof_info->alloc_tctx = (prof_tctx_t *)(uintptr_t)1U;
 	}
 }
 
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index c47beafd..3a20540d 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -336,7 +336,7 @@ extent_slab_data_get_const(const extent_t *extent) {
 static inline void
 extent_prof_info_get(const extent_t *extent, prof_info_t *prof_info) {
 	assert(prof_info != NULL);
-	prof_info->prof_tctx = (prof_tctx_t *)atomic_load_p(
+	prof_info->alloc_tctx = (prof_tctx_t *)atomic_load_p(
 	    &extent->e_prof_tctx, ATOMIC_ACQUIRE);
 	prof_info->alloc_time = extent->e_alloc_time;
 }
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 06689c8a..3c0594ef 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -168,7 +168,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 	}
 
 	sampled = ((uintptr_t)tctx > (uintptr_t)1U);
-	old_sampled = ((uintptr_t)old_prof_info->prof_tctx > (uintptr_t)1U);
+	old_sampled = ((uintptr_t)old_prof_info->alloc_tctx > (uintptr_t)1U);
 	moved = (ptr != old_ptr);
 
 	if (unlikely(sampled)) {
@@ -186,7 +186,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 	} else {
 		prof_info_t prof_info;
 		prof_info_get(tsd, ptr, NULL, &prof_info);
-		assert((uintptr_t)prof_info.prof_tctx == (uintptr_t)1U);
+		assert((uintptr_t)prof_info.alloc_tctx == (uintptr_t)1U);
 	}
 
 	/*
@@ -209,7 +209,7 @@ prof_free(tsd_t *tsd, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx) {
 	cassert(config_prof);
 	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
 
-	if (unlikely((uintptr_t)prof_info.prof_tctx > (uintptr_t)1U)) {
+	if (unlikely((uintptr_t)prof_info.alloc_tctx > (uintptr_t)1U)) {
 		prof_free_sampled_object(tsd, usize, &prof_info);
 	}
 }
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index 17a56508..6223adc8 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -98,7 +98,7 @@ typedef rb_tree(prof_tctx_t) prof_tctx_tree_t;
 
 struct prof_info_s {
 	/* Points to the prof_tctx_t corresponding to the allocation. */
-	prof_tctx_t		*prof_tctx;
+	prof_tctx_t		*alloc_tctx;
 	/* Time when the allocation was made. */
 	nstime_t		alloc_time;
 };
diff --git a/src/prof.c b/src/prof.c
index d0c06a8a..3be461bc 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -204,7 +204,7 @@ prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
 void
 prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
 	assert(prof_info != NULL);
-	prof_tctx_t *tctx = prof_info->prof_tctx;
+	prof_tctx_t *tctx = prof_info->alloc_tctx;
 	assert((uintptr_t)tctx > (uintptr_t)1U);
 
 	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
diff --git a/src/prof_log.c b/src/prof_log.c
index 5747c8db..b5879348 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -200,7 +200,7 @@ prof_log_thr_index(tsd_t *tsd, uint64_t thr_uid, const char *name) {
 
 void
 prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
-	prof_tctx_t *tctx = prof_info->prof_tctx;
+	prof_tctx_t *tctx = prof_info->alloc_tctx;
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
 
 	prof_tdata_t *cons_tdata = prof_tdata_get(tsd, false);
diff --git a/test/unit/prof_tctx.c b/test/unit/prof_tctx.c
index 4e775452..4dde0ab2 100644
--- a/test/unit/prof_tctx.c
+++ b/test/unit/prof_tctx.c
@@ -16,7 +16,7 @@ TEST_BEGIN(test_prof_realloc) {
 	p = mallocx(1024, flags);
 	assert_ptr_not_null(p, "Unexpected mallocx() failure");
 	prof_info_get(tsd, p, NULL, &prof_info_p);
-	assert_ptr_ne(prof_info_p.prof_tctx, (prof_tctx_t *)(uintptr_t)1U,
+	assert_ptr_ne(prof_info_p.alloc_tctx, (prof_tctx_t *)(uintptr_t)1U,
 	    "Expected valid tctx");
 	prof_cnt_all(&curobjs_1, NULL, NULL, NULL);
 	assert_u64_eq(curobjs_0 + 1, curobjs_1,
@@ -26,7 +26,7 @@ TEST_BEGIN(test_prof_realloc) {
 	assert_ptr_ne(p, q, "Expected move");
 	assert_ptr_not_null(p, "Unexpected rmallocx() failure");
 	prof_info_get(tsd, q, NULL, &prof_info_q);
-	assert_ptr_ne(prof_info_q.prof_tctx, (prof_tctx_t *)(uintptr_t)1U,
+	assert_ptr_ne(prof_info_q.alloc_tctx, (prof_tctx_t *)(uintptr_t)1U,
 	    "Expected valid tctx");
 	prof_cnt_all(&curobjs_2, NULL, NULL, NULL);
 	assert_u64_eq(curobjs_1, curobjs_2,

From dfdd46f6c1e136b57cc943a8569f7f95312f88c6 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 5 Dec 2019 15:52:54 -0800
Subject: [PATCH 1430/2608] Refactor prof_tctx_t creation

---
 include/jemalloc/internal/prof_externs.h   |  2 +-
 include/jemalloc/internal/prof_inlines_b.h | 23 +++++-----------------
 src/prof_data.c                            | 16 +++++++++++----
 3 files changed, 18 insertions(+), 23 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 6e020be1..86f4193a 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -54,7 +54,7 @@ void prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
 void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info);
 void bt_init(prof_bt_t *bt, void **vec);
 void prof_backtrace(tsd_t *tsd, prof_bt_t *bt);
-prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt);
+prof_tctx_t *prof_tctx_create(tsd_t *tsd);
 #ifdef JEMALLOC_JET
 size_t prof_tdata_count(void);
 size_t prof_bt_count(void);
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 3c0594ef..2aebb3de 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -75,8 +75,7 @@ prof_alloc_time_set(tsd_t *tsd, const void *ptr, nstime_t t) {
 }
 
 JEMALLOC_ALWAYS_INLINE bool
-prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
-    prof_tdata_t **tdata_out) {
+prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update) {
 	cassert(config_prof);
 
 	/* Fastpath: no need to load tdata */
@@ -90,14 +89,6 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
 
 	prof_tdata_t *tdata = prof_tdata_get(tsd, true);
 	if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) {
-		tdata = NULL;
-	}
-
-	if (tdata_out != NULL) {
-		*tdata_out = tdata;
-	}
-
-	if (unlikely(tdata == NULL)) {
 		return true;
 	}
 
@@ -111,18 +102,14 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update,
 JEMALLOC_ALWAYS_INLINE prof_tctx_t *
 prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update) {
 	prof_tctx_t *ret;
-	prof_tdata_t *tdata;
-	prof_bt_t bt;
 
 	assert(usize == sz_s2u(usize));
 
-	if (!prof_active || likely(prof_sample_accum_update(tsd, usize, update,
-	    &tdata))) {
+	if (!prof_active ||
+	    likely(prof_sample_accum_update(tsd, usize, update))) {
 		ret = (prof_tctx_t *)(uintptr_t)1U;
 	} else {
-		bt_init(&bt, tdata->vec);
-		prof_backtrace(tsd, &bt);
-		ret = prof_lookup(tsd, &bt);
+		ret = prof_tctx_create(tsd);
 	}
 
 	return ret;
@@ -154,7 +141,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 
 	if (prof_active && !updated && ptr != NULL) {
 		assert(usize == isalloc(tsd_tsdn(tsd), ptr));
-		if (prof_sample_accum_update(tsd, usize, true, NULL)) {
+		if (prof_sample_accum_update(tsd, usize, true)) {
 			/*
 			 * Don't sample.  The usize passed to prof_alloc_prep()
 			 * was larger than what actually got allocated, so a
diff --git a/src/prof_data.c b/src/prof_data.c
index 2f8bd2de..1b321528 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -300,7 +300,7 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
 	return false;
 }
 
-prof_tctx_t *
+static prof_tctx_t *
 prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 	union {
 		prof_tctx_t	*p;
@@ -312,9 +312,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 	cassert(config_prof);
 
 	tdata = prof_tdata_get(tsd, false);
-	if (tdata == NULL) {
-		return NULL;
-	}
+	assert(tdata != NULL);
 
 	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
 	not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v);
@@ -374,6 +372,16 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 	return ret.p;
 }
 
+prof_tctx_t *
+prof_tctx_create(tsd_t *tsd) {
+	prof_tdata_t *tdata = prof_tdata_get(tsd, false);
+	assert(tdata != NULL);
+	prof_bt_t bt;
+	bt_init(&bt, tdata->vec);
+	prof_backtrace(tsd, &bt);
+	return prof_lookup(tsd, &bt);
+}
+
 #ifdef JEMALLOC_JET
 static prof_tdata_t *
 prof_tdata_count_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,

From 7e3671911f9343a40702801fcbb3833bd98d0c46 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 6 Dec 2019 09:45:40 -0800
Subject: [PATCH 1431/2608] Get rid of old indentation style for prof

---
 include/jemalloc/internal/prof_externs.h | 38 ++++++-------
 src/prof.c                               | 72 ++++++++++++------------
 src/prof_data.c                          | 10 ++--
 3 files changed, 60 insertions(+), 60 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 86f4193a..6d296920 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -3,24 +3,24 @@
 
 #include "jemalloc/internal/mutex.h"
 
-extern malloc_mutex_t	bt2gctx_mtx;
-extern malloc_mutex_t	tdatas_mtx;
-extern malloc_mutex_t	prof_dump_mtx;
+extern malloc_mutex_t bt2gctx_mtx;
+extern malloc_mutex_t tdatas_mtx;
+extern malloc_mutex_t prof_dump_mtx;
 
 malloc_mutex_t *prof_gctx_mutex_choose(void);
 malloc_mutex_t *prof_tdata_mutex_choose(uint64_t thr_uid);
 
-extern bool	opt_prof;
-extern bool	opt_prof_active;
-extern bool	opt_prof_thread_active_init;
-extern size_t	opt_lg_prof_sample;   /* Mean bytes between samples. */
-extern ssize_t	opt_lg_prof_interval; /* lg(prof_interval). */
-extern bool	opt_prof_gdump;       /* High-water memory dumping. */
-extern bool	opt_prof_final;       /* Final profile dumping. */
-extern bool	opt_prof_leak;        /* Dump leak summary at exit. */
-extern bool	opt_prof_accum;       /* Report cumulative bytes. */
-extern bool	opt_prof_log;	      /* Turn logging on at boot. */
-extern char	opt_prof_prefix[
+extern bool opt_prof;
+extern bool opt_prof_active;
+extern bool opt_prof_thread_active_init;
+extern size_t opt_lg_prof_sample;    /* Mean bytes between samples. */
+extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */
+extern bool opt_prof_gdump;          /* High-water memory dumping. */
+extern bool opt_prof_final;          /* Final profile dumping. */
+extern bool opt_prof_leak;           /* Dump leak summary at exit. */
+extern bool opt_prof_accum;          /* Report cumulative bytes. */
+extern bool opt_prof_log;            /* Turn logging on at boot. */
+extern char opt_prof_prefix[
     /* Minimize memory bloat for non-prof builds. */
 #ifdef JEMALLOC_PROF
     PATH_MAX +
@@ -28,21 +28,21 @@ extern char	opt_prof_prefix[
     1];
 
 /* Accessed via prof_active_[gs]et{_unlocked,}(). */
-extern bool	prof_active;
+extern bool prof_active;
 
 /* Accessed via prof_gdump_[gs]et{_unlocked,}(). */
-extern bool	prof_gdump_val;
+extern bool prof_gdump_val;
 
 /* Profile dump interval, measured in bytes allocated. */
-extern uint64_t	prof_interval;
+extern uint64_t prof_interval;
 
 /*
  * Initialized as opt_lg_prof_sample, and potentially modified during profiling
  * resets.
  */
-extern size_t	lg_prof_sample;
+extern size_t lg_prof_sample;
 
-extern bool	prof_booted;
+extern bool prof_booted;
 
 /* Functions only accessed in prof_inlines_a.h */
 bool prof_idump_accum_impl(tsdn_t *tsdn, uint64_t accumbytes);
diff --git a/src/prof.c b/src/prof.c
index 3be461bc..a9849b05 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -34,44 +34,44 @@
 /******************************************************************************/
 /* Data. */
 
-bool		opt_prof = false;
-bool		opt_prof_active = true;
-bool		opt_prof_thread_active_init = true;
-size_t		opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
-ssize_t		opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
-bool		opt_prof_gdump = false;
-bool		opt_prof_final = false;
-bool		opt_prof_leak = false;
-bool		opt_prof_accum = false;
-char		opt_prof_prefix[PROF_DUMP_FILENAME_LEN];
+bool opt_prof = false;
+bool opt_prof_active = true;
+bool opt_prof_thread_active_init = true;
+size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
+ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
+bool opt_prof_gdump = false;
+bool opt_prof_final = false;
+bool opt_prof_leak = false;
+bool opt_prof_accum = false;
+char opt_prof_prefix[PROF_DUMP_FILENAME_LEN];
 
 /* Accessed via prof_idump_[accum/rollback](). */
-static prof_accum_t	prof_idump_accumulated;
+static prof_accum_t prof_idump_accumulated;
 
 /*
  * Initialized as opt_prof_active, and accessed via
  * prof_active_[gs]et{_unlocked,}().
  */
-bool			prof_active;
-static malloc_mutex_t	prof_active_mtx;
+bool prof_active;
+static malloc_mutex_t prof_active_mtx;
 
 /*
  * Initialized as opt_prof_thread_active_init, and accessed via
  * prof_thread_active_init_[gs]et().
  */
-static bool		prof_thread_active_init;
-static malloc_mutex_t	prof_thread_active_init_mtx;
+static bool prof_thread_active_init;
+static malloc_mutex_t prof_thread_active_init_mtx;
 
 /*
  * Initialized as opt_prof_gdump, and accessed via
  * prof_gdump_[gs]et{_unlocked,}().
  */
-bool			prof_gdump_val;
-static malloc_mutex_t	prof_gdump_mtx;
+bool prof_gdump_val;
+static malloc_mutex_t prof_gdump_mtx;
 
-uint64_t	prof_interval = 0;
+uint64_t prof_interval = 0;
 
-size_t		lg_prof_sample;
+size_t lg_prof_sample;
 
 /*
  * Table of mutexes that are shared among gctx's.  These are leaf locks, so
@@ -80,8 +80,8 @@ size_t		lg_prof_sample;
  * and destroying mutexes causes complications for systems that allocate when
  * creating/destroying mutexes.
  */
-static malloc_mutex_t	*gctx_locks;
-static atomic_u_t	cum_gctxs; /* Atomic counter. */
+static malloc_mutex_t *gctx_locks;
+static atomic_u_t cum_gctxs; /* Atomic counter. */
 
 /*
  * Table of mutexes that are shared among tdata's.  No operations require
@@ -89,27 +89,27 @@ static atomic_u_t	cum_gctxs; /* Atomic counter. */
  * than one tdata at the same time, even though a gctx lock may be acquired
  * while holding a tdata lock.
  */
-static malloc_mutex_t	*tdata_locks;
+static malloc_mutex_t *tdata_locks;
 
 /* Non static to enable profiling. */
-malloc_mutex_t		bt2gctx_mtx;
+malloc_mutex_t bt2gctx_mtx;
 
-malloc_mutex_t	tdatas_mtx;
+malloc_mutex_t tdatas_mtx;
 
-static uint64_t		next_thr_uid;
-static malloc_mutex_t	next_thr_uid_mtx;
+static uint64_t next_thr_uid;
+static malloc_mutex_t next_thr_uid_mtx;
 
-static malloc_mutex_t	prof_dump_filename_mtx;
-static uint64_t		prof_dump_seq;
-static uint64_t		prof_dump_iseq;
-static uint64_t		prof_dump_mseq;
-static uint64_t		prof_dump_useq;
+static malloc_mutex_t prof_dump_filename_mtx;
+static uint64_t prof_dump_seq;
+static uint64_t prof_dump_iseq;
+static uint64_t prof_dump_mseq;
+static uint64_t prof_dump_useq;
 
-malloc_mutex_t	prof_dump_mtx;
-static char	*prof_dump_prefix = NULL;
+malloc_mutex_t prof_dump_mtx;
+static char *prof_dump_prefix = NULL;
 
 /* Do not dump any profiles until bootstrapping is complete. */
-bool			prof_booted = false;
+bool prof_booted = false;
 
 /******************************************************************************/
 
@@ -550,8 +550,8 @@ prof_dump_prefix_is_empty(tsdn_t *tsdn) {
 	return ret;
 }
 
-#define DUMP_FILENAME_BUFSIZE	(PATH_MAX + 1)
-#define VSEQ_INVALID		UINT64_C(0xffffffffffffffff)
+#define DUMP_FILENAME_BUFSIZE (PATH_MAX + 1)
+#define VSEQ_INVALID UINT64_C(0xffffffffffffffff)
 static void
 prof_dump_filename(tsd_t *tsd, char *filename, char v, uint64_t vseq) {
 	cassert(config_prof);
diff --git a/src/prof_data.c b/src/prof_data.c
index 1b321528..ecabed3e 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -29,19 +29,19 @@
  * Global hash of (prof_bt_t *)-->(prof_gctx_t *).  This is the master data
  * structure that knows about all backtraces currently captured.
  */
-static ckh_t		bt2gctx;
+static ckh_t bt2gctx;
 
 /*
  * Tree of all extant prof_tdata_t structures, regardless of state,
  * {attached,detached,expired}.
  */
-static prof_tdata_tree_t	tdatas;
+static prof_tdata_tree_t tdatas;
 
 /*
  * This buffer is rather large for stack allocation, so use a single buffer for
  * all profile dumps.
  */
-static char		prof_dump_buf[
+static char prof_dump_buf[
     /* Minimize memory bloat for non-prof builds. */
 #ifdef JEMALLOC_PROF
     PROF_DUMP_BUFSIZE
@@ -49,8 +49,8 @@ static char		prof_dump_buf[
     1
 #endif
 ];
-static size_t		prof_dump_buf_end;
-static int		prof_dump_fd;
+static size_t prof_dump_buf_end;
+static int prof_dump_fd;
 
 /******************************************************************************/
 /* Red-black trees. */

From 055478cca8ca8d00e74119ef6210ac64713b0ffb Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 10 Dec 2019 10:03:54 -0800
Subject: [PATCH 1432/2608] Threshold is no longer updated before
 prof_realloc()

---
 include/jemalloc/internal/prof_inlines_b.h | 4 ++--
 src/jemalloc.c                             | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 2aebb3de..c6f12ca5 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -132,14 +132,14 @@ prof_malloc(tsd_t *tsd, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx,
 
 JEMALLOC_ALWAYS_INLINE void
 prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
-    bool prof_active, bool updated, const void *old_ptr, size_t old_usize,
+    bool prof_active, const void *old_ptr, size_t old_usize,
     prof_info_t *old_prof_info) {
 	bool sampled, old_sampled, moved;
 
 	cassert(config_prof);
 	assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
 
-	if (prof_active && !updated && ptr != NULL) {
+	if (prof_active && ptr != NULL) {
 		assert(usize == isalloc(tsd_tsdn(tsd), ptr));
 		if (prof_sample_accum_update(tsd, usize, true)) {
 			/*
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 13bf8d7e..e25e064b 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3037,8 +3037,8 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 		 */
 		*usize = isalloc(tsd_tsdn(tsd), p);
 	}
-	prof_realloc(tsd, p, *usize, tctx, prof_active, false, old_ptr,
-	    old_usize, &old_prof_info);
+	prof_realloc(tsd, p, *usize, tctx, prof_active, old_ptr, old_usize,
+	    &old_prof_info);
 
 	return p;
 }
@@ -3314,7 +3314,7 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 		prof_alloc_rollback(tsd, tctx, false);
 		return usize;
 	}
-	prof_realloc(tsd, ptr, usize, tctx, prof_active, false, ptr, old_usize,
+	prof_realloc(tsd, ptr, usize, tctx, prof_active, ptr, old_usize,
 	    &old_prof_info);
 
 	return usize;

From 7d2bac5a384a2fded203298c36ce91b24cbbd497 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 10 Dec 2019 10:46:31 -0800
Subject: [PATCH 1433/2608] Refactor destroy code path for prof_tctx

---
 include/jemalloc/internal/prof_externs.h |  4 ++--
 src/prof.c                               | 28 ++---------------------
 src/prof_data.c                          | 29 +++++++++++++++++++++++-
 3 files changed, 32 insertions(+), 29 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 6d296920..bd73a296 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -114,13 +114,13 @@ bool prof_log_rep_check(void);
 void prof_log_dummy_set(bool new_value);
 #endif
 
-/* Functions in prof_data.c only accessed in prof.c */
+/* Functions in prof_data.c only used in profiling code. */
 bool prof_data_init(tsd_t *tsd);
 bool prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
     bool leakcheck);
 prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
     uint64_t thr_discrim, char *thread_name, bool active, bool reset_interval);
 void prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata);
-void prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx);
+void prof_tctx_try_destroy(tsd_t *tsd, prof_tctx_t *tctx);
 
 #endif /* JEMALLOC_INTERNAL_PROF_EXTERNS_H */
diff --git a/src/prof.c b/src/prof.c
index a9849b05..0d6da21c 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -128,22 +128,6 @@ prof_strncpy(char *UNUSED dest, const char *UNUSED src, size_t UNUSED size) {
 #endif
 }
 
-static bool
-prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx) {
-	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
-
-	if (opt_prof_accum) {
-		return false;
-	}
-	if (tctx->cnts.curobjs != 0) {
-		return false;
-	}
-	if (tctx->prepared) {
-		return false;
-	}
-	return true;
-}
-
 void
 prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
 	cassert(config_prof);
@@ -171,11 +155,7 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
 	if ((uintptr_t)tctx > (uintptr_t)1U) {
 		malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
 		tctx->prepared = false;
-		if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx)) {
-			prof_tctx_destroy(tsd, tctx);
-		} else {
-			malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
-		}
+		prof_tctx_try_destroy(tsd, tctx);
 	}
 }
 
@@ -216,11 +196,7 @@ prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
 
 	prof_try_log(tsd, usize, prof_info);
 
-	if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx)) {
-		prof_tctx_destroy(tsd, tctx);
-	} else {
-		malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
-	}
+	prof_tctx_try_destroy(tsd, tctx);
 }
 
 void
diff --git a/src/prof_data.c b/src/prof_data.c
index ecabed3e..8a2cc845 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -1373,7 +1373,23 @@ prof_reset(tsd_t *tsd, size_t lg_sample) {
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
 }
 
-void
+static bool
+prof_tctx_should_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
+
+	if (opt_prof_accum) {
+		return false;
+	}
+	if (tctx->cnts.curobjs != 0) {
+		return false;
+	}
+	if (tctx->prepared) {
+		return false;
+	}
+	return true;
+}
+
+static void
 prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
 	prof_tdata_t *tdata = tctx->tdata;
 	prof_gctx_t *gctx = tctx->gctx;
@@ -1449,4 +1465,15 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
 	}
 }
 
+void
+prof_tctx_try_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
+	if (prof_tctx_should_destroy(tsd, tctx)) {
+		/* tctx->tdata->lock will be released in prof_tctx_destroy(). */
+		prof_tctx_destroy(tsd, tctx);
+	} else {
+		malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
+	}
+}
+
 /******************************************************************************/

From 45836d7fd3edca6e71031bce2291b48c4bb3cf76 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 10 Dec 2019 17:07:41 -0800
Subject: [PATCH 1434/2608] Pass nstime_t pointer for profiling

---
 include/jemalloc/internal/arena_inlines_b.h | 2 +-
 include/jemalloc/internal/extent.h          | 4 ++--
 include/jemalloc/internal/large_externs.h   | 2 +-
 include/jemalloc/internal/prof_inlines_b.h  | 2 +-
 src/large.c                                 | 2 +-
 src/prof.c                                  | 2 +-
 6 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 930daba0..fbb8fa18 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -98,7 +98,7 @@ arena_prof_tctx_reset(tsd_t *tsd, const void *ptr, prof_tctx_t *tctx) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_prof_alloc_time_set(tsd_t *tsd, const void *ptr, nstime_t t) {
+arena_prof_alloc_time_set(tsd_t *tsd, const void *ptr, nstime_t *t) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 3a20540d..fa7d1260 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -461,8 +461,8 @@ extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx) {
 }
 
 static inline void
-extent_prof_alloc_time_set(extent_t *extent, nstime_t t) {
-	nstime_copy(&extent->e_alloc_time, &t);
+extent_prof_alloc_time_set(extent_t *extent, nstime_t *t) {
+	nstime_copy(&extent->e_alloc_time, t);
 }
 
 static inline bool
diff --git a/include/jemalloc/internal/large_externs.h b/include/jemalloc/internal/large_externs.h
index 85786bb2..a0f48b81 100644
--- a/include/jemalloc/internal/large_externs.h
+++ b/include/jemalloc/internal/large_externs.h
@@ -25,6 +25,6 @@ size_t large_salloc(tsdn_t *tsdn, const extent_t *extent);
 void large_prof_info_get(const extent_t *extent, prof_info_t *prof_info);
 void large_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx);
 void large_prof_tctx_reset(extent_t *extent);
-void large_prof_alloc_time_set(extent_t *extent, nstime_t time);
+void large_prof_alloc_time_set(extent_t *extent, nstime_t *time);
 
 #endif /* JEMALLOC_INTERNAL_LARGE_EXTERNS_H */
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index c6f12ca5..657e1168 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -67,7 +67,7 @@ prof_tctx_reset(tsd_t *tsd, const void *ptr, prof_tctx_t *tctx) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_alloc_time_set(tsd_t *tsd, const void *ptr, nstime_t t) {
+prof_alloc_time_set(tsd_t *tsd, const void *ptr, nstime_t *t) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
diff --git a/src/large.c b/src/large.c
index 4d1257f6..f10b0d15 100644
--- a/src/large.c
+++ b/src/large.c
@@ -383,6 +383,6 @@ large_prof_tctx_reset(extent_t *extent) {
 }
 
 void
-large_prof_alloc_time_set(extent_t *extent, nstime_t t) {
+large_prof_alloc_time_set(extent_t *extent, nstime_t *t) {
 	extent_prof_alloc_time_set(extent, t);
 }
diff --git a/src/prof.c b/src/prof.c
index 0d6da21c..4d3a8006 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -168,7 +168,7 @@ prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
 	 * when free() is called. */
 	nstime_t t = NSTIME_ZERO_INITIALIZER;
 	nstime_update(&t);
-	prof_alloc_time_set(tsd, ptr, t);
+	prof_alloc_time_set(tsd, ptr, &t);
 
 	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
 	tctx->cnts.curobjs++;

From 1decf958d1dabc1d1d217889cdcea7edb2eefd3e Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 21 Nov 2019 14:10:03 -0800
Subject: [PATCH 1435/2608] Fix incorrect usage of cassert.

---
 src/tcache.c          | 2 +-
 src/thread_event.c    | 3 +--
 test/unit/cache_bin.c | 2 +-
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index 7758c4f2..7922e59e 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -402,7 +402,7 @@ tsd_tcache_enabled_data_init(tsd_t *tsd) {
 
 static bool
 tcache_bin_init(cache_bin_t *bin, szind_t ind, uintptr_t *stack_cur) {
-	cassert(sizeof(bin->cur_ptr) == sizeof(void *));
+	assert(sizeof(bin->cur_ptr) == sizeof(void *));
 	/*
 	 * The full_position points to the lowest available space.  Allocations
 	 * will access the slots toward higher addresses (for the benefit of
diff --git a/src/thread_event.c b/src/thread_event.c
index 9f6c9271..9a1d0f9b 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -189,8 +189,7 @@ thread_event_trigger(tsd_t *tsd, bool delay_event) {
 	    thread_allocated_last_event_get(tsd);
 
 	/* Make sure that accumbytes cannot overflow uint64_t. */
-	cassert(THREAD_EVENT_MAX_INTERVAL <=
-	    UINT64_MAX - SC_LARGE_MAXCLASS + 1);
+	assert(THREAD_EVENT_MAX_INTERVAL <= UINT64_MAX - SC_LARGE_MAXCLASS + 1);
 
 	thread_allocated_last_event_set(tsd, thread_allocated_after);
 	bool allow_event_trigger = !delay_event && tsd_nominal(tsd) &&
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index f469b8da..12201a22 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -4,7 +4,7 @@ cache_bin_t test_bin;
 
 TEST_BEGIN(test_cache_bin) {
 	cache_bin_t *bin = &test_bin;
-	cassert(PAGE > TCACHE_NSLOTS_SMALL_MAX * sizeof(void *));
+	assert(PAGE > TCACHE_NSLOTS_SMALL_MAX * sizeof(void *));
 	/* Page aligned to make sure lowbits not overflowable. */
 	void **stack = mallocx(PAGE, MALLOCX_TCACHE_NONE | MALLOCX_ALIGN(PAGE));
 

From dd649c94859e2cdbe7b527cfb743b549c8d8bf50 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 11 Nov 2019 16:34:48 -0800
Subject: [PATCH 1436/2608] Optimize away the tsd_fast() check on fastpath.

Fold the tsd_state check onto the event threshold check.  The fast threshold is
set to 0 when tsd switch to non-nominal.

The fast_threshold can be reset by remote threads, to refect the non nominal tsd
state change.
---
 include/jemalloc/internal/thread_event.h | 57 +++++++++++++++++-
 include/jemalloc/internal/tsd.h          |  2 +-
 src/jemalloc.c                           | 18 +++---
 src/thread_event.c                       | 75 ++++++++++++++++++++++--
 src/tsd.c                                | 10 +++-
 test/unit/thread_event.c                 |  3 -
 6 files changed, 144 insertions(+), 21 deletions(-)

diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index 8a05eaed..3ceb4702 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -27,6 +27,7 @@ void thread_event_trigger(tsd_t *tsd, bool delay_event);
 void thread_event_rollback(tsd_t *tsd, size_t diff);
 void thread_event_update(tsd_t *tsd);
 void thread_event_boot();
+void thread_event_recompute_fast_threshold(tsd_t *tsd);
 void tsd_thread_event_init(tsd_t *tsd);
 
 /*
@@ -43,9 +44,7 @@ void tsd_thread_event_init(tsd_t *tsd);
 /* List of all thread event counters. */
 #define ITERATE_OVER_ALL_COUNTERS					\
     C(thread_allocated)							\
-    C(thread_allocated_next_event_fast)					\
     C(thread_allocated_last_event)					\
-    C(thread_allocated_next_event)					\
     ITERATE_OVER_ALL_EVENTS						\
     C(prof_sample_last_event)
 
@@ -81,6 +80,60 @@ ITERATE_OVER_ALL_COUNTERS
  */
 #undef E
 
+/*
+ * Two malloc fastpath getters -- use the unsafe getters since tsd may be
+ * non-nominal, in which case the fast_threshold will be set to 0.  This allows
+ * checking for events and tsd non-nominal in a single branch.
+ *
+ * Note that these can only be used on the fastpath.
+ */
+JEMALLOC_ALWAYS_INLINE uint64_t
+thread_allocated_malloc_fastpath(tsd_t *tsd) {
+	return *tsd_thread_allocatedp_get_unsafe(tsd);
+}
+
+JEMALLOC_ALWAYS_INLINE uint64_t
+thread_allocated_next_event_malloc_fastpath(tsd_t *tsd) {
+	uint64_t v = *tsd_thread_allocated_next_event_fastp_get_unsafe(tsd);
+	assert(v <= THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX);
+	return v;
+}
+
+/* Below 3 for next_event_fast. */
+JEMALLOC_ALWAYS_INLINE uint64_t
+thread_allocated_next_event_fast_get(tsd_t *tsd) {
+	uint64_t v = tsd_thread_allocated_next_event_fast_get(tsd);
+	assert(v <= THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX);
+	return v;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+thread_allocated_next_event_fast_set(tsd_t *tsd, uint64_t v) {
+	assert(v <= THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX);
+	*tsd_thread_allocated_next_event_fastp_get(tsd) = v;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+thread_allocated_next_event_fast_set_non_nominal(tsd_t *tsd) {
+	/*
+	 * Set the fast threshold to zero when tsd is non-nominal.  Use the
+	 * unsafe getter as this may get called during tsd init and clean up.
+	 */
+	*tsd_thread_allocated_next_event_fastp_get_unsafe(tsd) = 0;
+}
+
+/* For next_event.  Setter also updates the fast threshold. */
+JEMALLOC_ALWAYS_INLINE uint64_t
+thread_allocated_next_event_get(tsd_t *tsd) {
+	return tsd_thread_allocated_next_event_get(tsd);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+thread_allocated_next_event_set(tsd_t *tsd, uint64_t v) {
+	*tsd_thread_allocated_next_eventp_get(tsd) = v;
+	thread_event_recompute_fast_threshold(tsd);
+}
+
 /*
  * The function checks in debug mode whether the thread event counters are in
  * a consistent state, which forms the invariants before and after each round
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 6332a003..961fc1f5 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -110,7 +110,7 @@ typedef void (*test_callback_t)(int *);
     /* reentrancy_level */	0,					\
     /* narenas_tdata */		0,					\
     /* thread_allocated */	0,					\
-    /* thread_allocated_next_event_fast */ THREAD_EVENT_MIN_START_WAIT,	\
+    /* thread_allocated_next_event_fast */ 0, 				\
     /* thread_deallocated */	0,					\
     /* rtree_ctx */		RTREE_CTX_ZERO_INITIALIZER,		\
     /* thread_allocated_last_event */	0,				\
diff --git a/src/jemalloc.c b/src/jemalloc.c
index e25e064b..af72d41a 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2354,7 +2354,7 @@ je_malloc(size_t size) {
 	}
 
 	tsd_t *tsd = tsd_get(false);
-	if (unlikely((size > SC_LOOKUP_MAXCLASS) || !tsd || !tsd_fast(tsd))) {
+	if (unlikely((size > SC_LOOKUP_MAXCLASS) || tsd == NULL)) {
 		return malloc_default(size);
 	}
 
@@ -2373,13 +2373,17 @@ je_malloc(size_t size) {
 	assert(ind < SC_NBINS);
 	assert(size <= SC_SMALL_MAXCLASS);
 
-	uint64_t thread_allocated_after = thread_allocated_get(tsd) + usize;
-	assert(thread_allocated_next_event_fast_get(tsd) <=
-	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX);
-	if (unlikely(thread_allocated_after >=
-	    thread_allocated_next_event_fast_get(tsd))) {
+	uint64_t allocated = thread_allocated_malloc_fastpath(tsd);
+	uint64_t threshold = thread_allocated_next_event_malloc_fastpath(tsd);
+	/*
+	 * Check for events and tsd non-nominal (fast_threshold will be set to
+	 * 0) in a single branch.
+	 */
+	uint64_t allocated_after = allocated + usize;
+	if (unlikely(allocated_after >= threshold)) {
 		return malloc_default(size);
 	}
+	assert(tsd_fast(tsd));
 
 	tcache_t *tcache = tsd_tcachep_get(tsd);
 	cache_bin_t *bin = tcache_small_bin_get(tcache, ind);
@@ -2387,7 +2391,7 @@ je_malloc(size_t size) {
 	void *ret = cache_bin_alloc_easy_reduced(bin, &tcache_success);
 
 	if (tcache_success) {
-		thread_allocated_set(tsd, thread_allocated_after);
+		thread_allocated_set(tsd, allocated_after);
 		if (config_stats) {
 			bin->tstats.nrequests++;
 		}
diff --git a/src/thread_event.c b/src/thread_event.c
index 9a1d0f9b..0657c841 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -103,10 +103,11 @@ thread_event_assert_invariants_debug(tsd_t *tsd) {
 	uint64_t next_event_fast = thread_allocated_next_event_fast_get(tsd);
 
 	assert(last_event != next_event);
-	if (next_event <= THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX) {
-		assert(next_event_fast == next_event);
-	} else {
+	if (next_event > THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX ||
+	    !tsd_fast(tsd)) {
 		assert(next_event_fast == 0U);
+	} else {
+		assert(next_event_fast == next_event);
 	}
 
 	/* The subtraction is intentionally susceptible to underflow. */
@@ -128,15 +129,77 @@ thread_event_assert_invariants_debug(tsd_t *tsd) {
 	    (interval < min_wait && interval == THREAD_EVENT_MAX_INTERVAL));
 }
 
+/*
+ * Synchronization around the fast threshold in tsd --
+ * There are two threads to consider in the synchronization here:
+ * - The owner of the tsd being updated by a slow path change
+ * - The remote thread, doing that slow path change.
+ *
+ * As a design constraint, we want to ensure that a slow-path transition cannot
+ * be ignored for arbitrarily long, and that if the remote thread causes a
+ * slow-path transition and then communicates with the owner thread that it has
+ * occurred, then the owner will go down the slow path on the next allocator
+ * operation (so that we don't want to just wait until the owner hits its slow
+ * path reset condition on its own).
+ *
+ * Here's our strategy to do that:
+ *
+ * The remote thread will update the slow-path stores to TSD variables, issue a
+ * SEQ_CST fence, and then update the TSD next_event_fast counter. The owner
+ * thread will update next_event_fast, issue an SEQ_CST fence, and then check
+ * its TSD to see if it's on the slow path.
+
+ * This is fairly straightforward when 64-bit atomics are supported. Assume that
+ * the remote fence is sandwiched between two owner fences in the reset pathway.
+ * The case where there is no preceding or trailing owner fence (i.e. because
+ * the owner thread is near the beginning or end of its life) can be analyzed
+ * similarly. The owner store to next_event_fast preceding the earlier owner
+ * fence will be earlier in coherence order than the remote store to it, so that
+ * the owner thread will go down the slow path once the store becomes visible to
+ * it, which is no later than the time of the second fence.
+
+ * The case where we don't support 64-bit atomics is trickier, since word
+ * tearing is possible. We'll repeat the same analysis, and look at the two
+ * owner fences sandwiching the remote fence. The next_event_fast stores done
+ * alongside the earlier owner fence cannot overwrite any of the remote stores
+ * (since they precede the earlier owner fence in sb, which precedes the remote
+ * fence in sc, which precedes the remote stores in sb). After the second owner
+ * fence there will be a re-check of the slow-path variables anyways, so the
+ * "owner will notice that it's on the slow path eventually" guarantee is
+ * satisfied. To make sure that the out-of-band-messaging constraint is as well,
+ * note that either the message passing is sequenced before the second owner
+ * fence (in which case the remote stores happen before the second set of owner
+ * stores, so malloc sees a value of zero for next_event_fast and goes down the
+ * slow path), or it is not (in which case the owner sees the tsd slow-path
+ * writes on its previous update). This leaves open the possibility that the
+ * remote thread will (at some arbitrary point in the future) zero out one half
+ * of the owner thread's next_event_fast, but that's always safe (it just sends
+ * it down the slow path earlier).
+ */
+void
+thread_event_recompute_fast_threshold(tsd_t *tsd) {
+	if (tsd_state_get(tsd) != tsd_state_nominal) {
+		/* Check first because this is also called on purgatory. */
+		thread_allocated_next_event_fast_set_non_nominal(tsd);
+		return;
+	}
+	uint64_t next_event = thread_allocated_next_event_get(tsd);
+	uint64_t next_event_fast = (next_event <=
+	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX) ? next_event : 0U;
+	thread_allocated_next_event_fast_set(tsd, next_event_fast);
+
+	atomic_fence(ATOMIC_SEQ_CST);
+	if (tsd_state_get(tsd) != tsd_state_nominal) {
+		thread_allocated_next_event_fast_set_non_nominal(tsd);
+	}
+}
+
 static void
 thread_event_adjust_thresholds_helper(tsd_t *tsd, uint64_t wait) {
 	assert(wait <= THREAD_EVENT_MAX_START_WAIT);
 	uint64_t next_event = thread_allocated_last_event_get(tsd) + (wait <=
 	    THREAD_EVENT_MAX_INTERVAL ? wait : THREAD_EVENT_MAX_INTERVAL);
 	thread_allocated_next_event_set(tsd, next_event);
-	uint64_t next_event_fast = (next_event <=
-	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX) ? next_event : 0U;
-	thread_allocated_next_event_fast_set(tsd, next_event_fast);
 }
 
 static uint64_t
diff --git a/src/tsd.c b/src/tsd.c
index 6e0ee93c..17e9eed2 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -115,8 +115,11 @@ tsd_force_recompute(tsdn_t *tsdn) {
 	ql_foreach(remote_tsd, &tsd_nominal_tsds, TSD_MANGLE(tcache).tsd_link) {
 		assert(tsd_atomic_load(&remote_tsd->state, ATOMIC_RELAXED)
 		    <= tsd_state_nominal_max);
-		tsd_atomic_store(&remote_tsd->state, tsd_state_nominal_recompute,
-		    ATOMIC_RELAXED);
+		tsd_atomic_store(&remote_tsd->state,
+		    tsd_state_nominal_recompute, ATOMIC_RELAXED);
+		/* See comments in thread_event_recompute_fast_threshold(). */
+		atomic_fence(ATOMIC_SEQ_CST);
+		thread_allocated_next_event_fast_set_non_nominal(remote_tsd);
 	}
 	malloc_mutex_unlock(tsdn, &tsd_nominal_tsds_lock);
 }
@@ -175,6 +178,8 @@ tsd_slow_update(tsd_t *tsd) {
 		old_state = tsd_atomic_exchange(&tsd->state, new_state,
 		    ATOMIC_ACQUIRE);
 	} while (old_state == tsd_state_nominal_recompute);
+
+	thread_event_recompute_fast_threshold(tsd);
 }
 
 void
@@ -213,6 +218,7 @@ tsd_state_set(tsd_t *tsd, uint8_t new_state) {
 			tsd_slow_update(tsd);
 		}
 	}
+	thread_event_recompute_fast_threshold(tsd);
 }
 
 static bool
diff --git a/test/unit/thread_event.c b/test/unit/thread_event.c
index cf5b2e59..f016cc5d 100644
--- a/test/unit/thread_event.c
+++ b/test/unit/thread_event.c
@@ -7,8 +7,6 @@ TEST_BEGIN(test_next_event_fast_roll_back) {
 	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX - 8U);
 	thread_allocated_next_event_set(tsd,
 	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX);
-	thread_allocated_next_event_fast_set(tsd,
-	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX);
 #define E(event, condition)						\
 	event##_event_wait_set(tsd,					\
 	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX);
@@ -27,7 +25,6 @@ TEST_BEGIN(test_next_event_fast_resume) {
 	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX + 8U);
 	thread_allocated_next_event_set(tsd,
 	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX + 16U);
-	thread_allocated_next_event_fast_set(tsd, 0);
 #define E(event, condition)						\
 	event##_event_wait_set(tsd,					\
 	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX + 16U);

From 1d01e4c770c3229041f1010037da2533568fef05 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 16 Dec 2019 12:41:06 -0800
Subject: [PATCH 1437/2608] Initialization utilities for nstime

---
 include/jemalloc/internal/nstime.h |  8 ++++++++
 src/arena.c                        |  9 +++------
 src/background_thread.c            |  7 +++----
 src/ctl.c                          |  2 +-
 src/mutex.c                        |  8 ++++----
 src/nstime.c                       |  6 ++++++
 src/prof.c                         |  4 ++--
 src/prof_log.c                     | 12 +++++++-----
 test/src/timer.c                   |  3 +--
 test/unit/background_thread.c      |  9 ++++-----
 test/unit/decay.c                  |  9 +++------
 test/unit/nstime.c                 | 10 ++++------
 12 files changed, 46 insertions(+), 41 deletions(-)

diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h
index 17c177c7..a3766ff2 100644
--- a/include/jemalloc/internal/nstime.h
+++ b/include/jemalloc/internal/nstime.h
@@ -31,4 +31,12 @@ extern nstime_monotonic_t *JET_MUTABLE nstime_monotonic;
 typedef bool (nstime_update_t)(nstime_t *);
 extern nstime_update_t *JET_MUTABLE nstime_update;
 
+bool nstime_init_update(nstime_t *time);
+
+JEMALLOC_ALWAYS_INLINE void
+nstime_init_zero(nstime_t *time) {
+	static const nstime_t zero = NSTIME_ZERO_INITIALIZER;
+	nstime_copy(time, &zero);
+}
+
 #endif /* JEMALLOC_INTERNAL_NSTIME_H */
diff --git a/src/arena.c b/src/arena.c
index 5537e66f..05c4021c 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -667,8 +667,7 @@ arena_decay_reinit(arena_decay_t *decay, ssize_t decay_ms) {
 		nstime_idivide(&decay->interval, SMOOTHSTEP_NSTEPS);
 	}
 
-	nstime_init(&decay->epoch, 0);
-	nstime_update(&decay->epoch);
+	nstime_init_update(&decay->epoch);
 	decay->jitter_state = (uint64_t)(uintptr_t)decay;
 	arena_decay_deadline_init(decay);
 	decay->nunpurged = 0;
@@ -726,8 +725,7 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	}
 
 	nstime_t time;
-	nstime_init(&time, 0);
-	nstime_update(&time);
+	nstime_init_update(&time);
 	if (unlikely(!nstime_monotonic() && nstime_compare(&decay->epoch, &time)
 	    > 0)) {
 		/*
@@ -2066,8 +2064,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	/* Set arena before creating background threads. */
 	arena_set(ind, arena);
 
-	nstime_init(&arena->create_time, 0);
-	nstime_update(&arena->create_time);
+	nstime_init_update(&arena->create_time);
 
 	/* We don't support reentrancy for arena 0 bootstrapping. */
 	if (ind != 0) {
diff --git a/src/background_thread.c b/src/background_thread.c
index 4a74edbf..400dae53 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -74,7 +74,7 @@ background_thread_info_init(tsdn_t *tsdn, background_thread_info_t *info) {
 	info->npages_to_purge_new = 0;
 	if (config_stats) {
 		info->tot_n_runs = 0;
-		nstime_init(&info->tot_sleep_time, 0);
+		nstime_init_zero(&info->tot_sleep_time);
 	}
 }
 
@@ -236,8 +236,7 @@ background_thread_sleep(tsdn_t *tsdn, background_thread_info_t *info,
 		    interval <= BACKGROUND_THREAD_INDEFINITE_SLEEP);
 		/* We need malloc clock (can be different from tv). */
 		nstime_t next_wakeup;
-		nstime_init(&next_wakeup, 0);
-		nstime_update(&next_wakeup);
+		nstime_init_update(&next_wakeup);
 		nstime_iadd(&next_wakeup, interval);
 		assert(nstime_ns(&next_wakeup) <
 		    BACKGROUND_THREAD_INDEFINITE_SLEEP);
@@ -794,7 +793,7 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
 		return true;
 	}
 
-	nstime_init(&stats->run_interval, 0);
+	nstime_init_zero(&stats->run_interval);
 	memset(&stats->max_counter_per_bg_thd, 0, sizeof(mutex_prof_data_t));
 
 	uint64_t num_runs = 0;
diff --git a/src/ctl.c b/src/ctl.c
index e2cdc29d..24f530f0 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1037,7 +1037,7 @@ ctl_background_thread_stats_read(tsdn_t *tsdn) {
 	if (!have_background_thread ||
 	    background_thread_stats_read(tsdn, stats)) {
 		memset(stats, 0, sizeof(background_thread_stats_t));
-		nstime_init(&stats->run_interval, 0);
+		nstime_init_zero(&stats->run_interval);
 	}
 	malloc_mutex_prof_copy(
 	    &ctl_stats->mutex_prof_data[global_prof_mutex_max_per_bg_thd],
diff --git a/src/mutex.c b/src/mutex.c
index 3f920f5b..bffcfb5f 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -46,7 +46,7 @@ JEMALLOC_EXPORT int	_pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
 void
 malloc_mutex_lock_slow(malloc_mutex_t *mutex) {
 	mutex_prof_data_t *data = &mutex->prof_data;
-	nstime_t before = NSTIME_ZERO_INITIALIZER;
+	nstime_t before;
 
 	if (ncpus == 1) {
 		goto label_spin_done;
@@ -68,7 +68,7 @@ malloc_mutex_lock_slow(malloc_mutex_t *mutex) {
 		return;
 	}
 label_spin_done:
-	nstime_update(&before);
+	nstime_init_update(&before);
 	/* Copy before to after to avoid clock skews. */
 	nstime_t after;
 	nstime_copy(&after, &before);
@@ -104,8 +104,8 @@ label_spin_done:
 static void
 mutex_prof_data_init(mutex_prof_data_t *data) {
 	memset(data, 0, sizeof(mutex_prof_data_t));
-	nstime_init(&data->max_wait_time, 0);
-	nstime_init(&data->tot_wait_time, 0);
+	nstime_init_zero(&data->max_wait_time);
+	nstime_init_zero(&data->tot_wait_time);
 	data->prev_owner = NULL;
 }
 
diff --git a/src/nstime.c b/src/nstime.c
index 71db3539..eb8f6c0a 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -168,3 +168,9 @@ nstime_update_impl(nstime_t *time) {
 	return false;
 }
 nstime_update_t *JET_MUTABLE nstime_update = nstime_update_impl;
+
+bool
+nstime_init_update(nstime_t *time) {
+	nstime_init_zero(time);
+	return nstime_update(time);
+}
diff --git a/src/prof.c b/src/prof.c
index 4d3a8006..d1d46e2e 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -166,8 +166,8 @@ prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
 
 	/* Get the current time and set this in the extent_t. We'll read this
 	 * when free() is called. */
-	nstime_t t = NSTIME_ZERO_INITIALIZER;
-	nstime_update(&t);
+	nstime_t t;
+	nstime_init_update(&t);
 	prof_alloc_time_set(tsd, ptr, &t);
 
 	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
diff --git a/src/prof_log.c b/src/prof_log.c
index b5879348..2904f0c6 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -38,7 +38,7 @@ static char log_filename[
     1];
 
 /* Timestamp for most recent call to log_start(). */
-static nstime_t log_start_timestamp = NSTIME_ZERO_INITIALIZER;
+static nstime_t log_start_timestamp;
 
 /* Increment these when adding to the log_bt and log_thr linked lists. */
 static size_t log_bt_index = 0;
@@ -231,8 +231,8 @@ prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
 	}
 
 	nstime_t alloc_time = prof_info->alloc_time;
-	nstime_t free_time = NSTIME_ZERO_INITIALIZER;
-	nstime_update(&free_time);
+	nstime_t free_time;
+	nstime_init_update(&free_time);
 
 	size_t sz = sizeof(prof_alloc_node_t);
 	prof_alloc_node_t *new_node = (prof_alloc_node_t *)
@@ -556,9 +556,9 @@ static void
 prof_log_emit_metadata(emitter_t *emitter) {
 	emitter_json_object_kv_begin(emitter, "info");
 
-	nstime_t now = NSTIME_ZERO_INITIALIZER;
+	nstime_t now;
 
-	nstime_update(&now);
+	nstime_init_update(&now);
 	uint64_t ns = nstime_ns(&now) - nstime_ns(&log_start_timestamp);
 	emitter_json_kv(emitter, "duration", emitter_type_uint64, &ns);
 
@@ -702,6 +702,8 @@ bool prof_log_init(tsd_t *tsd) {
 		return true;
 	}
 
+	nstime_init_zero(&log_start_timestamp);
+
 	log_tables_initialized = true;
 	return false;
 }
diff --git a/test/src/timer.c b/test/src/timer.c
index c451c639..6e8b8edb 100644
--- a/test/src/timer.c
+++ b/test/src/timer.c
@@ -2,8 +2,7 @@
 
 void
 timer_start(timedelta_t *timer) {
-	nstime_init(&timer->t0, 0);
-	nstime_update(&timer->t0);
+	nstime_init_update(&timer->t0);
 }
 
 void
diff --git a/test/unit/background_thread.c b/test/unit/background_thread.c
index f7bd37c4..f597285a 100644
--- a/test/unit/background_thread.c
+++ b/test/unit/background_thread.c
@@ -83,9 +83,8 @@ TEST_BEGIN(test_background_thread_running) {
 	assert_b_eq(info->state, background_thread_started,
 	    "Background_thread did not start.\n");
 
-	nstime_t start, now;
-	nstime_init(&start, 0);
-	nstime_update(&start);
+	nstime_t start;
+	nstime_init_update(&start);
 
 	bool ran = false;
 	while (true) {
@@ -98,8 +97,8 @@ TEST_BEGIN(test_background_thread_running) {
 			break;
 		}
 
-		nstime_init(&now, 0);
-		nstime_update(&now);
+		nstime_t now;
+		nstime_init_update(&now);
 		nstime_subtract(&now, &start);
 		assert_u64_lt(nstime_sec(&now), 1000,
 		    "Background threads did not run for 1000 seconds.");
diff --git a/test/unit/decay.c b/test/unit/decay.c
index cf3c0796..59936db3 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -384,8 +384,7 @@ decay_ticker_helper(unsigned arena_ind, int flags, bool dirty, ssize_t dt,
 #define NINTERVALS 101
 	nstime_t time, update_interval, decay_ms, deadline;
 
-	nstime_init(&time, 0);
-	nstime_update(&time);
+	nstime_init_update(&time);
 
 	nstime_init2(&decay_ms, dt, 0);
 	nstime_copy(&deadline, &time);
@@ -456,8 +455,7 @@ TEST_BEGIN(test_decay_ticker) {
 	}
 
 	nupdates_mock = 0;
-	nstime_init(&time_mock, 0);
-	nstime_update(&time_mock);
+	nstime_init_update(&time_mock);
 	monotonic_mock = true;
 
 	nstime_monotonic_orig = nstime_monotonic;
@@ -507,8 +505,7 @@ TEST_BEGIN(test_decay_nonmonotonic) {
 	npurge0 = get_arena_npurge(0);
 
 	nupdates_mock = 0;
-	nstime_init(&time_mock, 0);
-	nstime_update(&time_mock);
+	nstime_init_update(&time_mock);
 	monotonic_mock = false;
 
 	nstime_monotonic_orig = nstime_monotonic;
diff --git a/test/unit/nstime.c b/test/unit/nstime.c
index f3137805..5a736bba 100644
--- a/test/unit/nstime.c
+++ b/test/unit/nstime.c
@@ -25,7 +25,7 @@ TEST_BEGIN(test_nstime_copy) {
 	nstime_t nsta, nstb;
 
 	nstime_init2(&nsta, 42, 43);
-	nstime_init(&nstb, 0);
+	nstime_init_zero(&nstb);
 	nstime_copy(&nstb, &nsta);
 	assert_u64_eq(nstime_sec(&nstb), 42, "sec incorrectly copied");
 	assert_u64_eq(nstime_nsec(&nstb), 43, "nsec incorrectly copied");
@@ -108,7 +108,7 @@ TEST_BEGIN(test_nstime_subtract) {
 	nstime_init2(&nsta, 42, 43);
 	nstime_copy(&nstb, &nsta);
 	nstime_subtract(&nsta, &nstb);
-	nstime_init(&nstb, 0);
+	nstime_init_zero(&nstb);
 	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
 	    "Incorrect subtraction result");
 
@@ -126,7 +126,7 @@ TEST_BEGIN(test_nstime_isubtract) {
 
 	nstime_init2(&nsta, 42, 43);
 	nstime_isubtract(&nsta, 42*BILLION + 43);
-	nstime_init(&nstb, 0);
+	nstime_init_zero(&nstb);
 	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
 	    "Incorrect subtraction result");
 
@@ -209,9 +209,7 @@ TEST_END
 TEST_BEGIN(test_nstime_update) {
 	nstime_t nst;
 
-	nstime_init(&nst, 0);
-
-	assert_false(nstime_update(&nst), "Basic time update failed.");
+	assert_false(nstime_init_update(&nst), "Basic time update failed.");
 
 	/* Only Rip Van Winkle sleeps this long. */
 	{

From 4afd709d1f3ae7a727f144a96d8b834157d31e17 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 13 Dec 2019 16:48:03 -0800
Subject: [PATCH 1438/2608] Restructure setters for profiling info

Explicitly define three setters:

- `prof_tctx_reset()`: set `prof_tctx` to `1U`, if we don't know in
advance whether the allocation is large or not;
- `prof_tctx_reset_sampled()`: set `prof_tctx` to `1U`, if we already
know in advance that the allocation is large;
- `prof_info_set()`: set a real `prof_tctx`, and also set other
profiling info e.g. the allocation time.

Code structure wise, the prof level is kept as a thin wrapper, the
large level only provides low level setter APIs, and the arena level
carries out the main logic.
---
 include/jemalloc/internal/arena_inlines_b.h | 15 +++++------
 include/jemalloc/internal/large_externs.h   |  3 +--
 include/jemalloc/internal/prof_inlines_b.h  | 29 ++++++++++-----------
 src/large.c                                 |  9 ++++---
 src/prof.c                                  |  8 +-----
 5 files changed, 29 insertions(+), 35 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index fbb8fa18..23b3455b 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -68,8 +68,7 @@ arena_prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_prof_tctx_set(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
-    prof_tctx_t *tctx) {
+arena_prof_tctx_reset(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
@@ -77,17 +76,17 @@ arena_prof_tctx_set(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
 	if (alloc_ctx == NULL) {
 		extent_t *extent = iealloc(tsd_tsdn(tsd), ptr);
 		if (unlikely(!extent_slab_get(extent))) {
-			large_prof_tctx_set(extent, tctx);
+			large_prof_tctx_reset(extent);
 		}
 	} else {
 		if (unlikely(!alloc_ctx->slab)) {
-			large_prof_tctx_set(iealloc(tsd_tsdn(tsd), ptr), tctx);
+			large_prof_tctx_reset(iealloc(tsd_tsdn(tsd), ptr));
 		}
 	}
 }
 
-static inline void
-arena_prof_tctx_reset(tsd_t *tsd, const void *ptr, prof_tctx_t *tctx) {
+JEMALLOC_ALWAYS_INLINE void
+arena_prof_tctx_reset_sampled(tsd_t *tsd, const void *ptr) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
@@ -98,13 +97,13 @@ arena_prof_tctx_reset(tsd_t *tsd, const void *ptr, prof_tctx_t *tctx) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_prof_alloc_time_set(tsd_t *tsd, const void *ptr, nstime_t *t) {
+arena_prof_info_set(tsd_t *tsd, const void *ptr, prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
 	extent_t *extent = iealloc(tsd_tsdn(tsd), ptr);
 	assert(!extent_slab_get(extent));
-	large_prof_alloc_time_set(extent, t);
+	large_prof_info_set(extent, tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE void
diff --git a/include/jemalloc/internal/large_externs.h b/include/jemalloc/internal/large_externs.h
index a0f48b81..2299920d 100644
--- a/include/jemalloc/internal/large_externs.h
+++ b/include/jemalloc/internal/large_externs.h
@@ -23,8 +23,7 @@ void large_dalloc_finish(tsdn_t *tsdn, extent_t *extent);
 void large_dalloc(tsdn_t *tsdn, extent_t *extent);
 size_t large_salloc(tsdn_t *tsdn, const extent_t *extent);
 void large_prof_info_get(const extent_t *extent, prof_info_t *prof_info);
-void large_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx);
 void large_prof_tctx_reset(extent_t *extent);
-void large_prof_alloc_time_set(extent_t *extent, nstime_t *time);
+void large_prof_info_set(extent_t *extent, prof_tctx_t *tctx);
 
 #endif /* JEMALLOC_INTERNAL_LARGE_EXTERNS_H */
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 657e1168..193ede71 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -50,28 +50,28 @@ prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_tctx_set(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
-    prof_tctx_t *tctx) {
+prof_tctx_reset(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	arena_prof_tctx_set(tsd, ptr, alloc_ctx, tctx);
+	arena_prof_tctx_reset(tsd, ptr, alloc_ctx);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_tctx_reset(tsd_t *tsd, const void *ptr, prof_tctx_t *tctx) {
+prof_tctx_reset_sampled(tsd_t *tsd, const void *ptr) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	arena_prof_tctx_reset(tsd, ptr, tctx);
+	arena_prof_tctx_reset_sampled(tsd, ptr);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_alloc_time_set(tsd_t *tsd, const void *ptr, nstime_t *t) {
+prof_info_set(tsd_t *tsd, const void *ptr, prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
+	assert((uintptr_t)tctx > (uintptr_t)1U);
 
-	arena_prof_alloc_time_set(tsd, ptr, t);
+	arena_prof_info_set(tsd, ptr, tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -125,8 +125,7 @@ prof_malloc(tsd_t *tsd, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx,
 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) {
 		prof_malloc_sample_object(tsd, ptr, usize, tctx);
 	} else {
-		prof_tctx_set(tsd, ptr, alloc_ctx,
-		    (prof_tctx_t *)(uintptr_t)1U);
+		prof_tctx_reset(tsd, ptr, alloc_ctx);
 	}
 }
 
@@ -161,15 +160,15 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 	if (unlikely(sampled)) {
 		prof_malloc_sample_object(tsd, ptr, usize, tctx);
 	} else if (moved) {
-		prof_tctx_set(tsd, ptr, NULL, (prof_tctx_t *)(uintptr_t)1U);
+		prof_tctx_reset(tsd, ptr, NULL);
 	} else if (unlikely(old_sampled)) {
 		/*
-		 * prof_tctx_set() would work for the !moved case as well, but
-		 * prof_tctx_reset() is slightly cheaper, and the proper thing
-		 * to do here in the presence of explicit knowledge re: moved
-		 * state.
+		 * prof_tctx_reset() would work for the !moved case as well,
+		 * but prof_tctx_reset_sampled() is slightly cheaper, and the
+		 * proper thing to do here in the presence of explicit
+		 * knowledge re: moved state.
 		 */
-		prof_tctx_reset(tsd, ptr, tctx);
+		prof_tctx_reset_sampled(tsd, ptr);
 	} else {
 		prof_info_t prof_info;
 		prof_info_get(tsd, ptr, NULL, &prof_info);
diff --git a/src/large.c b/src/large.c
index f10b0d15..1a1e82b6 100644
--- a/src/large.c
+++ b/src/large.c
@@ -372,7 +372,7 @@ large_prof_info_get(const extent_t *extent, prof_info_t *prof_info) {
 	extent_prof_info_get(extent, prof_info);
 }
 
-void
+static void
 large_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx) {
 	extent_prof_tctx_set(extent, tctx);
 }
@@ -383,6 +383,9 @@ large_prof_tctx_reset(extent_t *extent) {
 }
 
 void
-large_prof_alloc_time_set(extent_t *extent, nstime_t *t) {
-	extent_prof_alloc_time_set(extent, t);
+large_prof_info_set(extent_t *extent, prof_tctx_t *tctx) {
+	large_prof_tctx_set(extent, tctx);
+	nstime_t t;
+	nstime_init_update(&t);
+	extent_prof_alloc_time_set(extent, &t);
 }
diff --git a/src/prof.c b/src/prof.c
index d1d46e2e..3a72e9c4 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -162,13 +162,7 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
 void
 prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
     prof_tctx_t *tctx) {
-	prof_tctx_set(tsd, ptr, NULL, tctx);
-
-	/* Get the current time and set this in the extent_t. We'll read this
-	 * when free() is called. */
-	nstime_t t;
-	nstime_init_update(&t);
-	prof_alloc_time_set(tsd, ptr, &t);
+	prof_info_set(tsd, ptr, tctx);
 
 	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
 	tctx->cnts.curobjs++;

From d5031ea82441301693a30cad50e0d32d45997bc3 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 17 Dec 2019 11:57:08 -0800
Subject: [PATCH 1439/2608] Allow dallocx and sdallocx after tsd destruction.

After a thread turns into purgatory / reincarnated state, still allow dallocx
and sdallocx to function normally.
---
 include/jemalloc/internal/tsd.h |  6 ++++++
 src/jemalloc.c                  | 10 ++++++----
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 961fc1f5..b7ce7ca2 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -440,4 +440,10 @@ tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback) {
 	return tsd_rtree_ctx(tsdn_tsd(tsdn));
 }
 
+static inline bool
+tsd_state_nocleanup(tsd_t *tsd) {
+	return tsd_state_get(tsd) == tsd_state_reincarnated ||
+	    tsd_state_get(tsd) == tsd_state_minimal_initialized;
+}
+
 #endif /* JEMALLOC_INTERNAL_TSD_H */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index af72d41a..4fc1a5ec 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3428,14 +3428,15 @@ je_dallocx(void *ptr, int flags) {
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
-	tsd_t *tsd = tsd_fetch();
+	tsd_t *tsd = tsd_fetch_min();
 	bool fast = tsd_fast(tsd);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
 	tcache_t *tcache;
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
 		/* Not allowed to be reentrant and specify a custom tcache. */
-		assert(tsd_reentrancy_level_get(tsd) == 0);
+		assert(tsd_reentrancy_level_get(tsd) == 0 ||
+		    tsd_state_nocleanup(tsd));
 		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) {
 			tcache = NULL;
 		} else {
@@ -3487,7 +3488,7 @@ sdallocx_default(void *ptr, size_t size, int flags) {
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
-	tsd_t *tsd = tsd_fetch();
+	tsd_t *tsd = tsd_fetch_min();
 	bool fast = tsd_fast(tsd);
 	size_t usize = inallocx(tsd_tsdn(tsd), size, flags);
 	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
@@ -3496,7 +3497,8 @@ sdallocx_default(void *ptr, size_t size, int flags) {
 	tcache_t *tcache;
 	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
 		/* Not allowed to be reentrant and specify a custom tcache. */
-		assert(tsd_reentrancy_level_get(tsd) == 0);
+		assert(tsd_reentrancy_level_get(tsd) == 0 ||
+		    tsd_state_nocleanup(tsd));
 		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) {
 			tcache = NULL;
 		} else {

From 9226e1f0d8ad691ef140bc0bf9340efadb96e5fe Mon Sep 17 00:00:00 2001
From: Wenbo Zhang <ethercflow@gmail.com>
Date: Sun, 15 Dec 2019 07:26:45 -0500
Subject: [PATCH 1440/2608] fix opt.thp:never still use THP with base_new

---
 src/base.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/base.c b/src/base.c
index f3c61661..9a55ed2e 100644
--- a/src/base.c
+++ b/src/base.c
@@ -39,6 +39,9 @@ base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size)
 	size_t alignment = HUGEPAGE;
 	if (extent_hooks == &extent_hooks_default) {
 		addr = extent_alloc_mmap(NULL, size, alignment, &zero, &commit);
+		if (have_madvise_huge && addr) {
+			pages_set_thp_state(addr, size);
+		}
 	} else {
 		/* No arena context as we are creating new arenas. */
 		tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);

From 4278f846038b2299938be8479c8ccd3617eed217 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 18 Nov 2019 12:59:34 -0800
Subject: [PATCH 1441/2608] Move extent hook getters/setters to arena.c

This is where they're logically scoped; they access arena data.
---
 include/jemalloc/internal/arena_externs.h   |  3 +++
 include/jemalloc/internal/arena_inlines_b.h |  2 +-
 include/jemalloc/internal/extent_externs.h  |  4 ----
 src/arena.c                                 | 26 +++++++++++++++++++--
 src/ctl.c                                   |  7 +++---
 src/extent.c                                | 22 +----------------
 src/large.c                                 |  4 ++--
 7 files changed, 35 insertions(+), 33 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 5178e238..93a6302e 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -72,6 +72,9 @@ void *arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
     size_t size, size_t alignment, bool zero, tcache_t *tcache,
     hook_ralloc_args_t *hook_args);
 dss_prec_t arena_dss_prec_get(arena_t *arena);
+extent_hooks_t *arena_get_extent_hooks(arena_t *arena);
+extent_hooks_t *arena_set_extent_hooks(tsd_t *tsd, arena_t *arena,
+    extent_hooks_t *extent_hooks);
 bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
 ssize_t arena_dirty_decay_ms_default_get(void);
 bool arena_dirty_decay_ms_default_set(ssize_t decay_ms);
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 23b3455b..8f2d3968 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -16,7 +16,7 @@ arena_get_from_extent(extent_t *extent) {
 
 JEMALLOC_ALWAYS_INLINE bool
 arena_has_default_hooks(arena_t *arena) {
-	return (extent_hooks_get(arena) == &extent_hooks_default);
+	return (arena_get_extent_hooks(arena) == &extent_hooks_default);
 }
 
 JEMALLOC_ALWAYS_INLINE arena_t *
diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index 6963b477..edf3c65c 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -15,10 +15,6 @@ extern mutex_pool_t extent_mutex_pool;
 extent_t *extent_alloc(tsdn_t *tsdn, arena_t *arena);
 void extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
 
-extent_hooks_t *extent_hooks_get(arena_t *arena);
-extent_hooks_t *extent_hooks_set(tsd_t *tsd, arena_t *arena,
-    extent_hooks_t *extent_hooks);
-
 ph_proto(, extent_avail_, extent_tree_t, extent_t)
 ph_proto(, extent_heap_, extent_heap_t, extent_t)
 
diff --git a/src/arena.c b/src/arena.c
index 05c4021c..043f8069 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -923,7 +923,7 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	decay->purging = true;
 	malloc_mutex_unlock(tsdn, &decay->mtx);
 
-	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
+	extent_hooks_t *extent_hooks = arena_get_extent_hooks(arena);
 
 	extent_list_t decay_extents;
 	extent_list_init(&decay_extents);
@@ -1159,7 +1159,7 @@ arena_destroy_retained(tsdn_t *tsdn, arena_t *arena) {
 	 * destroyed, or provide custom extent hooks that track retained
 	 * dss-based extents for later reuse.
 	 */
-	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
+	extent_hooks_t *extent_hooks = arena_get_extent_hooks(arena);
 	extent_t *extent;
 	while ((extent = extents_evict(tsdn, arena, &extent_hooks,
 	    &arena->eset_retained, 0)) != NULL) {
@@ -1846,6 +1846,28 @@ arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
 	return ret;
 }
 
+extent_hooks_t *
+arena_get_extent_hooks(arena_t *arena) {
+	return base_extent_hooks_get(arena->base);
+}
+
+extent_hooks_t *
+arena_set_extent_hooks(tsd_t *tsd, arena_t *arena,
+    extent_hooks_t *extent_hooks) {
+	background_thread_info_t *info;
+	if (have_background_thread) {
+		info = arena_background_thread_info_get(arena);
+		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+	}
+	extent_hooks_t *ret = base_extent_hooks_set(arena->base, extent_hooks);
+	if (have_background_thread) {
+		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
+	}
+
+	return ret;
+}
+
+
 dss_prec_t
 arena_dss_prec_get(arena_t *arena) {
 	return (dss_prec_t)atomic_load_u(&arena->dss_prec, ATOMIC_ACQUIRE);
diff --git a/src/ctl.c b/src/ctl.c
index 24f530f0..c2f12704 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2396,11 +2396,12 @@ arena_i_extent_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 				extent_hooks_t *new_extent_hooks
 				    JEMALLOC_CC_SILENCE_INIT(NULL);
 				WRITE(new_extent_hooks, extent_hooks_t *);
-				old_extent_hooks = extent_hooks_set(tsd, arena,
-				    new_extent_hooks);
+				old_extent_hooks = arena_set_extent_hooks(tsd,
+				    arena, new_extent_hooks);
 				READ(old_extent_hooks, extent_hooks_t *);
 			} else {
-				old_extent_hooks = extent_hooks_get(arena);
+				old_extent_hooks = arena_get_extent_hooks(
+				    arena);
 				READ(old_extent_hooks, extent_hooks_t *);
 			}
 		}
diff --git a/src/extent.c b/src/extent.c
index d9eff764..60830a67 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -221,31 +221,11 @@ extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
 	malloc_mutex_unlock(tsdn, &arena->extent_avail_mtx);
 }
 
-extent_hooks_t *
-extent_hooks_get(arena_t *arena) {
-	return base_extent_hooks_get(arena->base);
-}
-
-extent_hooks_t *
-extent_hooks_set(tsd_t *tsd, arena_t *arena, extent_hooks_t *extent_hooks) {
-	background_thread_info_t *info;
-	if (have_background_thread) {
-		info = arena_background_thread_info_get(arena);
-		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
-	}
-	extent_hooks_t *ret = base_extent_hooks_set(arena->base, extent_hooks);
-	if (have_background_thread) {
-		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
-	}
-
-	return ret;
-}
-
 static void
 extent_hooks_assure_initialized(arena_t *arena,
     extent_hooks_t **r_extent_hooks) {
 	if (*r_extent_hooks == EXTENT_HOOKS_INITIALIZER) {
-		*r_extent_hooks = extent_hooks_get(arena);
+		*r_extent_hooks = arena_get_extent_hooks(arena);
 	}
 }
 
diff --git a/src/large.c b/src/large.c
index 1a1e82b6..fb216ede 100644
--- a/src/large.c
+++ b/src/large.c
@@ -93,7 +93,7 @@ static bool
 large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize) {
 	arena_t *arena = arena_get_from_extent(extent);
 	size_t oldusize = extent_usize_get(extent);
-	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
+	extent_hooks_t *extent_hooks = arena_get_extent_hooks(arena);
 	size_t diff = extent_size_get(extent) - (usize + sz_large_pad);
 
 	assert(oldusize > usize);
@@ -129,7 +129,7 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
     bool zero) {
 	arena_t *arena = arena_get_from_extent(extent);
 	size_t oldusize = extent_usize_get(extent);
-	extent_hooks_t *extent_hooks = extent_hooks_get(arena);
+	extent_hooks_t *extent_hooks = arena_get_extent_hooks(arena);
 	size_t trailsize = usize - oldusize;
 
 	if (extent_hooks->merge == NULL) {

From 9f6eb09585239c10bde86d68ed48f6fe113ef8f7 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 18 Nov 2019 14:03:22 -0800
Subject: [PATCH 1442/2608] Extents: Eagerly initialize extent hooks.

When deferred initialization was added, initializing required copying
sizeof(extent_hooks_t) bytes after a pointer chase. Today, it's just a single
pointer loaded from the base_t. In subsequent diffs, we'll get rid of even that.
---
 include/jemalloc/internal/arena_externs.h   |   2 +-
 include/jemalloc/internal/arena_inlines_b.h |   5 +-
 include/jemalloc/internal/extent_externs.h  |  24 +-
 include/jemalloc/internal/extent_types.h    |   2 -
 src/arena.c                                 |  48 ++--
 src/extent.c                                | 271 +++++++++-----------
 src/extent_dss.c                            |   4 +-
 src/large.c                                 |  18 +-
 8 files changed, 173 insertions(+), 201 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 93a6302e..c13d8289 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -28,7 +28,7 @@ void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     bin_stats_data_t *bstats, arena_stats_large_t *lstats,
     arena_stats_extents_t *estats);
 void arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent);
+    extent_hooks_t *extent_hooks, extent_t *extent);
 #ifdef JEMALLOC_JET
 size_t arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr);
 #endif
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 8f2d3968..9ccfaa90 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -134,11 +134,10 @@ arena_decay_tick(tsdn_t *tsdn, arena_t *arena) {
 
 /* Purge a single extent to retained / unmapped directly. */
 JEMALLOC_ALWAYS_INLINE void
-arena_decay_extent(tsdn_t *tsdn,arena_t *arena, extent_hooks_t **r_extent_hooks,
+arena_decay_extent(tsdn_t *tsdn,arena_t *arena, extent_hooks_t *extent_hooks,
     extent_t *extent) {
 	size_t extent_size = extent_size_get(extent);
-	extent_dalloc_wrapper(tsdn, arena,
-	    r_extent_hooks, extent);
+	extent_dalloc_wrapper(tsdn, arena, extent_hooks, extent);
 	if (config_stats) {
 		/* Update stats accordingly. */
 		arena_stats_lock(tsdn, &arena->stats);
diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index edf3c65c..218ca94f 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -19,38 +19,38 @@ ph_proto(, extent_avail_, extent_tree_t, extent_t)
 ph_proto(, extent_heap_, extent_heap_t, extent_t)
 
 extent_t *extents_alloc(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, eset_t *eset, void *new_addr,
+    extent_hooks_t *extent_hooks, eset_t *eset, void *new_addr,
     size_t size, size_t pad, size_t alignment, bool slab, szind_t szind,
     bool *zero, bool *commit);
 void extents_dalloc(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, eset_t *eset, extent_t *extent);
+    extent_hooks_t *extent_hooks, eset_t *eset, extent_t *extent);
 extent_t *extents_evict(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, eset_t *eset, size_t npages_min);
+    extent_hooks_t *extent_hooks, eset_t *eset, size_t npages_min);
 extent_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, void *new_addr, size_t size, size_t pad,
+    extent_hooks_t *extent_hooks, void *new_addr, size_t size, size_t pad,
     size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit);
 void extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
 void extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent);
+    extent_hooks_t *extent_hooks, extent_t *extent);
 void extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent);
+    extent_hooks_t *extent_hooks, extent_t *extent);
 bool extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
     size_t length);
 bool extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
     size_t length);
 bool extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
     size_t length);
 bool extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
     size_t length);
 extent_t *extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t size_a,
     szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b);
 bool extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b);
+    extent_hooks_t *extent_hooks, extent_t *a, extent_t *b);
 
 bool extent_boot(void);
 
diff --git a/include/jemalloc/internal/extent_types.h b/include/jemalloc/internal/extent_types.h
index 02d7b2cd..25b360eb 100644
--- a/include/jemalloc/internal/extent_types.h
+++ b/include/jemalloc/internal/extent_types.h
@@ -4,8 +4,6 @@
 typedef struct extent_util_stats_s extent_util_stats_t;
 typedef struct extent_util_stats_verbose_s extent_util_stats_verbose_t;
 
-#define EXTENT_HOOKS_INITIALIZER	NULL
-
 /*
  * When reuse (and split) an active extent, (1U << opt_lg_extent_max_active_fit)
  * is the max ratio between the size of the active extent and the new extent.
diff --git a/src/arena.c b/src/arena.c
index 043f8069..a272438a 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -253,11 +253,11 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 
 void
 arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent) {
+    extent_hooks_t *extent_hooks, extent_t *extent) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	extents_dalloc(tsdn, arena, r_extent_hooks, &arena->eset_dirty,
+	extents_dalloc(tsdn, arena, extent_hooks, &arena->eset_dirty,
 	    extent);
 	if (arena_dirty_decay_ms_get(arena) == 0) {
 		arena_decay_dirty(tsdn, arena, false, true);
@@ -426,7 +426,7 @@ arena_may_have_muzzy(arena_t *arena) {
 extent_t *
 arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool *zero) {
-	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
+	extent_hooks_t *extent_hooks = arena_get_extent_hooks(arena);
 
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
@@ -434,17 +434,17 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	szind_t szind = sz_size2index(usize);
 	size_t mapped_add;
 	bool commit = true;
-	extent_t *extent = extents_alloc(tsdn, arena, &extent_hooks,
+	extent_t *extent = extents_alloc(tsdn, arena, extent_hooks,
 	    &arena->eset_dirty, NULL, usize, sz_large_pad, alignment, false,
 	    szind, zero, &commit);
 	if (extent == NULL && arena_may_have_muzzy(arena)) {
-		extent = extents_alloc(tsdn, arena, &extent_hooks,
+		extent = extents_alloc(tsdn, arena, extent_hooks,
 		    &arena->eset_muzzy, NULL, usize, sz_large_pad, alignment,
 		    false, szind, zero, &commit);
 	}
 	size_t size = usize + sz_large_pad;
 	if (extent == NULL) {
-		extent = extent_alloc_wrapper(tsdn, arena, &extent_hooks, NULL,
+		extent = extent_alloc_wrapper(tsdn, arena, extent_hooks, NULL,
 		    usize, sz_large_pad, alignment, false, szind, zero,
 		    &commit);
 		if (config_stats) {
@@ -819,7 +819,7 @@ arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
 
 static size_t
 arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, eset_t *eset, size_t npages_limit,
+    extent_hooks_t *extent_hooks, eset_t *eset, size_t npages_limit,
 	size_t npages_decay_max, extent_list_t *decay_extents) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
@@ -828,7 +828,7 @@ arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
 	size_t nstashed = 0;
 	extent_t *extent;
 	while (nstashed < npages_decay_max &&
-	    (extent = extents_evict(tsdn, arena, r_extent_hooks, eset,
+	    (extent = extents_evict(tsdn, arena, extent_hooks, eset,
 	    npages_limit)) != NULL) {
 		extent_list_append(decay_extents, extent);
 		nstashed += extent_size_get(extent) >> LG_PAGE;
@@ -838,7 +838,7 @@ arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
 
 static size_t
 arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, arena_decay_t *decay, eset_t *eset,
+    extent_hooks_t *extent_hooks, arena_decay_t *decay, eset_t *eset,
     bool all, extent_list_t *decay_extents, bool is_background_thread) {
 	size_t nmadvise, nunmapped;
 	size_t npurged;
@@ -864,9 +864,9 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
 		case extent_state_dirty:
 			if (!all && muzzy_decay_ms != 0 &&
 			    !extent_purge_lazy_wrapper(tsdn, arena,
-			    r_extent_hooks, extent, 0,
+			    extent_hooks, extent, 0,
 			    extent_size_get(extent))) {
-				extents_dalloc(tsdn, arena, r_extent_hooks,
+				extents_dalloc(tsdn, arena, extent_hooks,
 				    &arena->eset_muzzy, extent);
 				arena_background_thread_inactivity_check(tsdn,
 				    arena, is_background_thread);
@@ -874,7 +874,7 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
 			}
 			JEMALLOC_FALLTHROUGH;
 		case extent_state_muzzy:
-			extent_dalloc_wrapper(tsdn, arena, r_extent_hooks,
+			extent_dalloc_wrapper(tsdn, arena, extent_hooks,
 			    extent);
 			if (config_stats) {
 				nunmapped += npages;
@@ -928,11 +928,11 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	extent_list_t decay_extents;
 	extent_list_init(&decay_extents);
 
-	size_t npurge = arena_stash_decayed(tsdn, arena, &extent_hooks, eset,
+	size_t npurge = arena_stash_decayed(tsdn, arena, extent_hooks, eset,
 	    npages_limit, npages_decay_max, &decay_extents);
 	if (npurge != 0) {
 		size_t npurged = arena_decay_stashed(tsdn, arena,
-		    &extent_hooks, decay, eset, all, &decay_extents,
+		    extent_hooks, decay, eset, all, &decay_extents,
 		    is_background_thread);
 		assert(npurged == npurge);
 	}
@@ -1006,8 +1006,8 @@ static void
 arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *slab) {
 	arena_nactive_sub(arena, extent_size_get(slab) >> LG_PAGE);
 
-	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
-	arena_extents_dirty_dalloc(tsdn, arena, &extent_hooks, slab);
+	extent_hooks_t *extent_hooks = arena_get_extent_hooks(arena);
+	arena_extents_dirty_dalloc(tsdn, arena, extent_hooks, slab);
 }
 
 static void
@@ -1161,9 +1161,9 @@ arena_destroy_retained(tsdn_t *tsdn, arena_t *arena) {
 	 */
 	extent_hooks_t *extent_hooks = arena_get_extent_hooks(arena);
 	extent_t *extent;
-	while ((extent = extents_evict(tsdn, arena, &extent_hooks,
+	while ((extent = extents_evict(tsdn, arena, extent_hooks,
 	    &arena->eset_retained, 0)) != NULL) {
-		extent_destroy_wrapper(tsdn, arena, &extent_hooks, extent);
+		extent_destroy_wrapper(tsdn, arena, extent_hooks, extent);
 	}
 }
 
@@ -1205,7 +1205,7 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
 
 static extent_t *
 arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, const bin_info_t *bin_info,
+    extent_hooks_t *extent_hooks, const bin_info_t *bin_info,
     szind_t szind) {
 	extent_t *slab;
 	bool zero, commit;
@@ -1215,7 +1215,7 @@ arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena,
 
 	zero = false;
 	commit = true;
-	slab = extent_alloc_wrapper(tsdn, arena, r_extent_hooks, NULL,
+	slab = extent_alloc_wrapper(tsdn, arena, extent_hooks, NULL,
 	    bin_info->slab_size, 0, PAGE, true, szind, &zero, &commit);
 
 	if (config_stats && slab != NULL) {
@@ -1232,20 +1232,20 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
+	extent_hooks_t *extent_hooks = arena_get_extent_hooks(arena);
 	szind_t szind = sz_size2index(bin_info->reg_size);
 	bool zero = false;
 	bool commit = true;
-	extent_t *slab = extents_alloc(tsdn, arena, &extent_hooks,
+	extent_t *slab = extents_alloc(tsdn, arena, extent_hooks,
 	    &arena->eset_dirty, NULL, bin_info->slab_size, 0, PAGE, true,
 	    binind, &zero, &commit);
 	if (slab == NULL && arena_may_have_muzzy(arena)) {
-		slab = extents_alloc(tsdn, arena, &extent_hooks,
+		slab = extents_alloc(tsdn, arena, extent_hooks,
 		    &arena->eset_muzzy, NULL, bin_info->slab_size, 0, PAGE,
 		    true, binind, &zero, &commit);
 	}
 	if (slab == NULL) {
-		slab = arena_slab_alloc_hard(tsdn, arena, &extent_hooks,
+		slab = arena_slab_alloc_hard(tsdn, arena, extent_hooks,
 		    bin_info, szind);
 		if (slab == NULL) {
 			return NULL;
diff --git a/src/extent.c b/src/extent.c
index 60830a67..d21a1e81 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -29,7 +29,7 @@ static void extent_destroy_default(extent_hooks_t *extent_hooks, void *addr,
 static bool extent_commit_default(extent_hooks_t *extent_hooks, void *addr,
     size_t size, size_t offset, size_t length, unsigned arena_ind);
 static bool extent_commit_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
     size_t length, bool growing_retained);
 static bool extent_decommit_default(extent_hooks_t *extent_hooks,
     void *addr, size_t size, size_t offset, size_t length, unsigned arena_ind);
@@ -38,27 +38,27 @@ static bool extent_purge_lazy_default(extent_hooks_t *extent_hooks, void *addr,
     size_t size, size_t offset, size_t length, unsigned arena_ind);
 #endif
 static bool extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
     size_t length, bool growing_retained);
 #ifdef PAGES_CAN_PURGE_FORCED
 static bool extent_purge_forced_default(extent_hooks_t *extent_hooks,
     void *addr, size_t size, size_t offset, size_t length, unsigned arena_ind);
 #endif
 static bool extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
     size_t length, bool growing_retained);
 static bool extent_split_default(extent_hooks_t *extent_hooks, void *addr,
     size_t size, size_t size_a, size_t size_b, bool committed,
     unsigned arena_ind);
 static extent_t *extent_split_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t size_a,
     szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
     bool growing_retained);
 static bool extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a,
     size_t size_a, void *addr_b, size_t size_b, bool committed,
     unsigned arena_ind);
 static bool extent_merge_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b,
+    extent_hooks_t *extent_hooks, extent_t *a, extent_t *b,
     bool growing_retained);
 
 const extent_hooks_t	extent_hooks_default = {
@@ -98,14 +98,14 @@ static atomic_zu_t highpages;
 
 static void extent_deregister(tsdn_t *tsdn, extent_t *extent);
 static extent_t *extent_recycle(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, eset_t *eset, void *new_addr,
+    extent_hooks_t *extent_hooks, eset_t *eset, void *new_addr,
     size_t usize, size_t pad, size_t alignment, bool slab, szind_t szind,
     bool *zero, bool *commit, bool growing_retained);
 static extent_t *extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
+    extent_hooks_t *extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
     extent_t *extent, bool *coalesced, bool growing_retained);
 static void extent_record(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, eset_t *eset, extent_t *extent,
+    extent_hooks_t *extent_hooks, eset_t *eset, extent_t *extent,
     bool growing_retained);
 
 /******************************************************************************/
@@ -221,23 +221,15 @@ extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
 	malloc_mutex_unlock(tsdn, &arena->extent_avail_mtx);
 }
 
-static void
-extent_hooks_assure_initialized(arena_t *arena,
-    extent_hooks_t **r_extent_hooks) {
-	if (*r_extent_hooks == EXTENT_HOOKS_INITIALIZER) {
-		*r_extent_hooks = arena_get_extent_hooks(arena);
-	}
-}
-
 ph_gen(, extent_heap_, extent_heap_t, extent_t, ph_link, extent_snad_comp)
 
 static bool
 extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
+    extent_hooks_t *extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
     extent_t *extent) {
 	extent_state_set(extent, extent_state_active);
 	bool coalesced;
-	extent = extent_try_coalesce(tsdn, arena, r_extent_hooks, rtree_ctx,
+	extent = extent_try_coalesce(tsdn, arena, extent_hooks, rtree_ctx,
 	    eset, extent, &coalesced, false);
 	extent_state_set(extent, eset_state_get(eset));
 
@@ -249,7 +241,7 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena,
 }
 
 extent_t *
-extents_alloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
+extents_alloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
     eset_t *eset, void *new_addr, size_t size, size_t pad,
     size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit) {
 	assert(size + pad != 0);
@@ -257,14 +249,14 @@ extents_alloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	extent_t *extent = extent_recycle(tsdn, arena, r_extent_hooks, eset,
+	extent_t *extent = extent_recycle(tsdn, arena, extent_hooks, eset,
 	    new_addr, size, pad, alignment, slab, szind, zero, commit, false);
 	assert(extent == NULL || extent_dumpable_get(extent));
 	return extent;
 }
 
 void
-extents_dalloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
+extents_dalloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
     eset_t *eset, extent_t *extent) {
 	assert(extent_base_get(extent) != NULL);
 	assert(extent_size_get(extent) != 0);
@@ -275,11 +267,11 @@ extents_dalloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	extent_addr_set(extent, extent_base_get(extent));
 	extent_zeroed_set(extent, false);
 
-	extent_record(tsdn, arena, r_extent_hooks, eset, extent, false);
+	extent_record(tsdn, arena, extent_hooks, eset, extent, false);
 }
 
 extent_t *
-extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
+extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
     eset_t *eset, size_t npages_min) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
@@ -309,7 +301,7 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 			break;
 		}
 		/* Try to coalesce. */
-		if (extent_try_delayed_coalesce(tsdn, arena, r_extent_hooks,
+		if (extent_try_delayed_coalesce(tsdn, arena, extent_hooks,
 		    rtree_ctx, eset, extent)) {
 			break;
 		}
@@ -347,7 +339,7 @@ label_return:
  * indicates OOM), e.g. when trying to split an existing extent.
  */
 static void
-extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
+extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
     eset_t *eset, extent_t *extent, bool growing_retained) {
 	size_t sz = extent_size_get(extent);
 	if (config_stats) {
@@ -358,9 +350,9 @@ extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks
 	 * that this is only a virtual memory leak.
 	 */
 	if (eset_state_get(eset) == extent_state_dirty) {
-		if (extent_purge_lazy_impl(tsdn, arena, r_extent_hooks,
+		if (extent_purge_lazy_impl(tsdn, arena, extent_hooks,
 		    extent, 0, sz, growing_retained)) {
-			extent_purge_forced_impl(tsdn, arena, r_extent_hooks,
+			extent_purge_forced_impl(tsdn, arena, extent_hooks,
 			    extent, 0, extent_size_get(extent),
 			    growing_retained);
 		}
@@ -590,7 +582,7 @@ extent_deregister_no_gdump_sub(tsdn_t *tsdn, extent_t *extent) {
  */
 static extent_t *
 extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
+    extent_hooks_t *extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
     bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -615,7 +607,6 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
 
 	size_t esize = size + pad;
 	malloc_mutex_lock(tsdn, &eset->mtx);
-	extent_hooks_assure_initialized(arena, r_extent_hooks);
 	extent_t *extent;
 	if (new_addr != NULL) {
 		extent = extent_lock_from_addr(tsdn, rtree_ctx, new_addr,
@@ -678,7 +669,7 @@ typedef enum {
 
 static extent_split_interior_result_t
 extent_split_interior(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx,
+    extent_hooks_t *extent_hooks, rtree_ctx_t *rtree_ctx,
     /* The result of splitting, in case of success. */
     extent_t **extent, extent_t **lead, extent_t **trail,
     /* The mess to clean up, in case of error. */
@@ -702,7 +693,7 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena,
 	/* Split the lead. */
 	if (leadsize != 0) {
 		*lead = *extent;
-		*extent = extent_split_impl(tsdn, arena, r_extent_hooks,
+		*extent = extent_split_impl(tsdn, arena, extent_hooks,
 		    *lead, leadsize, SC_NSIZES, false, esize + trailsize, szind,
 		    slab, growing_retained);
 		if (*extent == NULL) {
@@ -714,7 +705,7 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena,
 
 	/* Split the trail. */
 	if (trailsize != 0) {
-		*trail = extent_split_impl(tsdn, arena, r_extent_hooks, *extent,
+		*trail = extent_split_impl(tsdn, arena, extent_hooks, *extent,
 		    esize, szind, slab, trailsize, SC_NSIZES, false,
 		    growing_retained);
 		if (*trail == NULL) {
@@ -755,7 +746,7 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena,
  */
 static extent_t *
 extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
+    extent_hooks_t *extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
     szind_t szind, extent_t *extent, bool growing_retained) {
 	extent_t *lead;
@@ -764,7 +755,7 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
 	extent_t *to_salvage;
 
 	extent_split_interior_result_t result = extent_split_interior(
-	    tsdn, arena, r_extent_hooks, rtree_ctx, &extent, &lead, &trail,
+	    tsdn, arena, extent_hooks, rtree_ctx, &extent, &lead, &trail,
 	    &to_leak, &to_salvage, new_addr, size, pad, alignment, slab, szind,
 	    growing_retained);
 
@@ -799,7 +790,7 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
 		if (to_leak != NULL) {
 			void *leak = extent_base_get(to_leak);
 			extent_deregister_no_gdump_sub(tsdn, to_leak);
-			extents_abandon_vm(tsdn, arena, r_extent_hooks, eset,
+			extents_abandon_vm(tsdn, arena, extent_hooks, eset,
 			    to_leak, growing_retained);
 			assert(extent_lock_from_addr(tsdn, rtree_ctx, leak,
 			    false) == NULL);
@@ -825,7 +816,7 @@ extent_need_manual_zero(arena_t *arena) {
  * in the given eset_t.
  */
 static extent_t *
-extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
+extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
     eset_t *eset, void *new_addr, size_t size, size_t pad,
     size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit,
     bool growing_retained) {
@@ -838,14 +829,14 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	extent_t *extent = extent_recycle_extract(tsdn, arena, r_extent_hooks,
+	extent_t *extent = extent_recycle_extract(tsdn, arena, extent_hooks,
 	    rtree_ctx, eset, new_addr, size, pad, alignment, slab,
 	    growing_retained);
 	if (extent == NULL) {
 		return NULL;
 	}
 
-	extent = extent_recycle_split(tsdn, arena, r_extent_hooks, rtree_ctx,
+	extent = extent_recycle_split(tsdn, arena, extent_hooks, rtree_ctx,
 	    eset, new_addr, size, pad, alignment, slab, szind, extent,
 	    growing_retained);
 	if (extent == NULL) {
@@ -853,9 +844,9 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	}
 
 	if (*commit && !extent_committed_get(extent)) {
-		if (extent_commit_impl(tsdn, arena, r_extent_hooks, extent,
+		if (extent_commit_impl(tsdn, arena, extent_hooks, extent,
 		    0, extent_size_get(extent), growing_retained)) {
-			extent_record(tsdn, arena, r_extent_hooks, eset,
+			extent_record(tsdn, arena, extent_hooks, eset,
 			    extent, growing_retained);
 			return NULL;
 		}
@@ -995,7 +986,7 @@ extent_hook_post_reentrancy(tsdn_t *tsdn) {
  */
 static extent_t *
 extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, size_t size, size_t pad, size_t alignment,
+    extent_hooks_t *extent_hooks, size_t size, size_t pad, size_t alignment,
     bool slab, szind_t szind, bool *zero, bool *commit) {
 	malloc_mutex_assert_owner(tsdn, &arena->extent_grow_mtx);
 	assert(pad == 0 || !slab);
@@ -1031,12 +1022,12 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	bool committed = false;
 
 	void *ptr;
-	if (*r_extent_hooks == &extent_hooks_default) {
+	if (extent_hooks == &extent_hooks_default) {
 		ptr = extent_alloc_default_impl(tsdn, arena, NULL,
 		    alloc_size, PAGE, &zeroed, &committed);
 	} else {
 		extent_hook_pre_reentrancy(tsdn, arena);
-		ptr = (*r_extent_hooks)->alloc(*r_extent_hooks, NULL,
+		ptr = extent_hooks->alloc(extent_hooks, NULL,
 		    alloc_size, PAGE, &zeroed, &committed,
 		    arena_ind_get(arena));
 		extent_hook_post_reentrancy(tsdn);
@@ -1070,17 +1061,17 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	extent_t *to_leak;
 	extent_t *to_salvage;
 	extent_split_interior_result_t result = extent_split_interior(
-	    tsdn, arena, r_extent_hooks, rtree_ctx, &extent, &lead, &trail,
+	    tsdn, arena, extent_hooks, rtree_ctx, &extent, &lead, &trail,
 	    &to_leak, &to_salvage, NULL, size, pad, alignment, slab, szind,
 	    true);
 
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
-			extent_record(tsdn, arena, r_extent_hooks,
+			extent_record(tsdn, arena, extent_hooks,
 			    &arena->eset_retained, lead, true);
 		}
 		if (trail != NULL) {
-			extent_record(tsdn, arena, r_extent_hooks,
+			extent_record(tsdn, arena, extent_hooks,
 			    &arena->eset_retained, trail, true);
 		}
 	} else {
@@ -1093,21 +1084,21 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 			if (config_prof) {
 				extent_gdump_add(tsdn, to_salvage);
 			}
-			extent_record(tsdn, arena, r_extent_hooks,
+			extent_record(tsdn, arena, extent_hooks,
 			    &arena->eset_retained, to_salvage, true);
 		}
 		if (to_leak != NULL) {
 			extent_deregister_no_gdump_sub(tsdn, to_leak);
-			extents_abandon_vm(tsdn, arena, r_extent_hooks,
+			extents_abandon_vm(tsdn, arena, extent_hooks,
 			    &arena->eset_retained, to_leak, true);
 		}
 		goto label_err;
 	}
 
 	if (*commit && !extent_committed_get(extent)) {
-		if (extent_commit_impl(tsdn, arena, r_extent_hooks, extent, 0,
+		if (extent_commit_impl(tsdn, arena, extent_hooks, extent, 0,
 		    extent_size_get(extent), true)) {
-			extent_record(tsdn, arena, r_extent_hooks,
+			extent_record(tsdn, arena, extent_hooks,
 			    &arena->eset_retained, extent, true);
 			goto label_err;
 		}
@@ -1161,14 +1152,14 @@ label_err:
 
 static extent_t *
 extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, void *new_addr, size_t size, size_t pad,
+    extent_hooks_t *extent_hooks, void *new_addr, size_t size, size_t pad,
     size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit) {
 	assert(size != 0);
 	assert(alignment != 0);
 
 	malloc_mutex_lock(tsdn, &arena->extent_grow_mtx);
 
-	extent_t *extent = extent_recycle(tsdn, arena, r_extent_hooks,
+	extent_t *extent = extent_recycle(tsdn, arena, extent_hooks,
 	    &arena->eset_retained, new_addr, size, pad, alignment, slab,
 	    szind, zero, commit, true);
 	if (extent != NULL) {
@@ -1177,7 +1168,7 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
 			extent_gdump_add(tsdn, extent);
 		}
 	} else if (opt_retain && new_addr == NULL) {
-		extent = extent_grow_retained(tsdn, arena, r_extent_hooks, size,
+		extent = extent_grow_retained(tsdn, arena, extent_hooks, size,
 		    pad, alignment, slab, szind, zero, commit);
 		/* extent_grow_retained() always releases extent_grow_mtx. */
 	} else {
@@ -1190,7 +1181,7 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
 
 static extent_t *
 extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, void *new_addr, size_t size, size_t pad,
+    extent_hooks_t *extent_hooks, void *new_addr, size_t size, size_t pad,
     size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit) {
 	size_t esize = size + pad;
 	extent_t *extent = extent_alloc(tsdn, arena);
@@ -1199,13 +1190,13 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 	}
 	void *addr;
 	size_t palignment = ALIGNMENT_CEILING(alignment, PAGE);
-	if (*r_extent_hooks == &extent_hooks_default) {
+	if (extent_hooks == &extent_hooks_default) {
 		/* Call directly to propagate tsdn. */
 		addr = extent_alloc_default_impl(tsdn, arena, new_addr, esize,
 		    palignment, zero, commit);
 	} else {
 		extent_hook_pre_reentrancy(tsdn, arena);
-		addr = (*r_extent_hooks)->alloc(*r_extent_hooks, new_addr,
+		addr = extent_hooks->alloc(extent_hooks, new_addr,
 		    esize, palignment, zero, commit, arena_ind_get(arena));
 		extent_hook_post_reentrancy(tsdn);
 	}
@@ -1229,14 +1220,12 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 
 extent_t *
 extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, void *new_addr, size_t size, size_t pad,
+    extent_hooks_t *extent_hooks, void *new_addr, size_t size, size_t pad,
     size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	extent_hooks_assure_initialized(arena, r_extent_hooks);
-
-	extent_t *extent = extent_alloc_retained(tsdn, arena, r_extent_hooks,
+	extent_t *extent = extent_alloc_retained(tsdn, arena, extent_hooks,
 	    new_addr, size, pad, alignment, slab, szind, zero, commit);
 	if (extent == NULL) {
 		if (opt_retain && new_addr != NULL) {
@@ -1248,7 +1237,7 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 			 */
 			return NULL;
 		}
-		extent = extent_alloc_wrapper_hard(tsdn, arena, r_extent_hooks,
+		extent = extent_alloc_wrapper_hard(tsdn, arena, extent_hooks,
 		    new_addr, size, pad, alignment, slab, szind, zero, commit);
 	}
 
@@ -1277,7 +1266,7 @@ extent_can_coalesce(arena_t *arena, eset_t *eset, const extent_t *inner,
 }
 
 static bool
-extent_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
+extent_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
     eset_t *eset, extent_t *inner, extent_t *outer, bool forward,
     bool growing_retained) {
 	assert(extent_can_coalesce(arena, eset, inner, outer));
@@ -1285,7 +1274,7 @@ extent_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	extent_activate_locked(tsdn, arena, eset, outer);
 
 	malloc_mutex_unlock(tsdn, &eset->mtx);
-	bool err = extent_merge_impl(tsdn, arena, r_extent_hooks,
+	bool err = extent_merge_impl(tsdn, arena, extent_hooks,
 	    forward ? inner : outer, forward ? outer : inner, growing_retained);
 	malloc_mutex_lock(tsdn, &eset->mtx);
 
@@ -1298,7 +1287,7 @@ extent_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 
 static extent_t *
 extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
+    extent_hooks_t *extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
     extent_t *extent, bool *coalesced, bool growing_retained,
     bool inactive_only) {
 	/*
@@ -1329,7 +1318,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena,
 			extent_unlock(tsdn, next);
 
 			if (can_coalesce && !extent_coalesce(tsdn, arena,
-			    r_extent_hooks, eset, extent, next, true,
+			    extent_hooks, eset, extent, next, true,
 			    growing_retained)) {
 				if (eset->delay_coalesce) {
 					/* Do minimal coalescing. */
@@ -1349,7 +1338,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena,
 			extent_unlock(tsdn, prev);
 
 			if (can_coalesce && !extent_coalesce(tsdn, arena,
-			    r_extent_hooks, eset, extent, prev, false,
+			    extent_hooks, eset, extent, prev, false,
 			    growing_retained)) {
 				extent = prev;
 				if (eset->delay_coalesce) {
@@ -1370,17 +1359,17 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena,
 
 static extent_t *
 extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
+    extent_hooks_t *extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
     extent_t *extent, bool *coalesced, bool growing_retained) {
-	return extent_try_coalesce_impl(tsdn, arena, r_extent_hooks, rtree_ctx,
+	return extent_try_coalesce_impl(tsdn, arena, extent_hooks, rtree_ctx,
 	    eset, extent, coalesced, growing_retained, false);
 }
 
 static extent_t *
 extent_try_coalesce_large(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
+    extent_hooks_t *extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
     extent_t *extent, bool *coalesced, bool growing_retained) {
-	return extent_try_coalesce_impl(tsdn, arena, r_extent_hooks, rtree_ctx,
+	return extent_try_coalesce_impl(tsdn, arena, extent_hooks, rtree_ctx,
 	    eset, extent, coalesced, growing_retained, true);
 }
 
@@ -1389,7 +1378,7 @@ extent_try_coalesce_large(tsdn_t *tsdn, arena_t *arena,
  * given eset_t (coalesces, deregisters slab interiors, the heap operations).
  */
 static void
-extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
+extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
     eset_t *eset, extent_t *extent, bool growing_retained) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
@@ -1399,7 +1388,6 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	    !extent_zeroed_get(extent));
 
 	malloc_mutex_lock(tsdn, &eset->mtx);
-	extent_hooks_assure_initialized(arena, r_extent_hooks);
 
 	extent_szind_set(extent, SC_NSIZES);
 	if (extent_slab_get(extent)) {
@@ -1411,7 +1399,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 	    (uintptr_t)extent_base_get(extent), true) == extent);
 
 	if (!eset->delay_coalesce) {
-		extent = extent_try_coalesce(tsdn, arena, r_extent_hooks,
+		extent = extent_try_coalesce(tsdn, arena, extent_hooks,
 		    rtree_ctx, eset, extent, NULL, growing_retained);
 	} else if (extent_size_get(extent) >= SC_LARGE_MINCLASS) {
 		assert(eset == &arena->eset_dirty);
@@ -1420,13 +1408,13 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 		do {
 			assert(extent_state_get(extent) == extent_state_active);
 			extent = extent_try_coalesce_large(tsdn, arena,
-			    r_extent_hooks, rtree_ctx, eset, extent,
+			    extent_hooks, rtree_ctx, eset, extent,
 			    &coalesced, growing_retained);
 		} while (coalesced);
 		if (extent_size_get(extent) >= oversize_threshold) {
 			/* Shortcut to purge the oversize extent eagerly. */
 			malloc_mutex_unlock(tsdn, &eset->mtx);
-			arena_decay_extent(tsdn, arena, r_extent_hooks, extent);
+			arena_decay_extent(tsdn, arena, extent_hooks, extent);
 			return;
 		}
 	}
@@ -1437,7 +1425,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks,
 
 void
 extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
-	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
+	extent_hooks_t *extent_hooks = arena_get_extent_hooks(arena);
 
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
@@ -1446,7 +1434,7 @@ extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
 		extent_dalloc(tsdn, arena, extent);
 		return;
 	}
-	extent_dalloc_wrapper(tsdn, arena, &extent_hooks, extent);
+	extent_dalloc_wrapper(tsdn, arena, extent_hooks, extent);
 }
 
 static bool
@@ -1471,7 +1459,7 @@ extent_dalloc_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
 
 static bool
 extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent) {
+    extent_hooks_t *extent_hooks, extent_t *extent) {
 	bool err;
 
 	assert(extent_base_get(extent) != NULL);
@@ -1481,16 +1469,15 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena,
 
 	extent_addr_set(extent, extent_base_get(extent));
 
-	extent_hooks_assure_initialized(arena, r_extent_hooks);
 	/* Try to deallocate. */
-	if (*r_extent_hooks == &extent_hooks_default) {
+	if (extent_hooks == &extent_hooks_default) {
 		/* Call directly to propagate tsdn. */
 		err = extent_dalloc_default_impl(extent_base_get(extent),
 		    extent_size_get(extent));
 	} else {
 		extent_hook_pre_reentrancy(tsdn, arena);
-		err = ((*r_extent_hooks)->dalloc == NULL ||
-		    (*r_extent_hooks)->dalloc(*r_extent_hooks,
+		err = (extent_hooks->dalloc == NULL ||
+		    extent_hooks->dalloc(extent_hooks,
 		    extent_base_get(extent), extent_size_get(extent),
 		    extent_committed_get(extent), arena_ind_get(arena)));
 		extent_hook_post_reentrancy(tsdn);
@@ -1505,50 +1492,50 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena,
 
 void
 extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent) {
+    extent_hooks_t *extent_hooks, extent_t *extent) {
 	assert(extent_dumpable_get(extent));
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
 	/* Avoid calling the default extent_dalloc unless have to. */
-	if (*r_extent_hooks != &extent_hooks_default || extent_may_dalloc()) {
+	if (extent_hooks != &extent_hooks_default || extent_may_dalloc()) {
 		/*
 		 * Deregister first to avoid a race with other allocating
 		 * threads, and reregister if deallocation fails.
 		 */
 		extent_deregister(tsdn, extent);
-		if (!extent_dalloc_wrapper_try(tsdn, arena, r_extent_hooks,
+		if (!extent_dalloc_wrapper_try(tsdn, arena, extent_hooks,
 		    extent)) {
 			return;
 		}
 		extent_reregister(tsdn, extent);
 	}
 
-	if (*r_extent_hooks != &extent_hooks_default) {
+	if (extent_hooks != &extent_hooks_default) {
 		extent_hook_pre_reentrancy(tsdn, arena);
 	}
 	/* Try to decommit; purge if that fails. */
 	bool zeroed;
 	if (!extent_committed_get(extent)) {
 		zeroed = true;
-	} else if (!extent_decommit_wrapper(tsdn, arena, r_extent_hooks, extent,
+	} else if (!extent_decommit_wrapper(tsdn, arena, extent_hooks, extent,
 	    0, extent_size_get(extent))) {
 		zeroed = true;
-	} else if ((*r_extent_hooks)->purge_forced != NULL &&
-	    !(*r_extent_hooks)->purge_forced(*r_extent_hooks,
+	} else if (extent_hooks->purge_forced != NULL &&
+	    !extent_hooks->purge_forced(extent_hooks,
 	    extent_base_get(extent), extent_size_get(extent), 0,
 	    extent_size_get(extent), arena_ind_get(arena))) {
 		zeroed = true;
 	} else if (extent_state_get(extent) == extent_state_muzzy ||
-	    ((*r_extent_hooks)->purge_lazy != NULL &&
-	    !(*r_extent_hooks)->purge_lazy(*r_extent_hooks,
+	    (extent_hooks->purge_lazy != NULL &&
+	    !extent_hooks->purge_lazy(extent_hooks,
 	    extent_base_get(extent), extent_size_get(extent), 0,
 	    extent_size_get(extent), arena_ind_get(arena)))) {
 		zeroed = false;
 	} else {
 		zeroed = false;
 	}
-	if (*r_extent_hooks != &extent_hooks_default) {
+	if (extent_hooks != &extent_hooks_default) {
 		extent_hook_post_reentrancy(tsdn);
 	}
 	extent_zeroed_set(extent, zeroed);
@@ -1557,7 +1544,7 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 		extent_gdump_sub(tsdn, extent);
 	}
 
-	extent_record(tsdn, arena, r_extent_hooks, &arena->eset_retained,
+	extent_record(tsdn, arena, extent_hooks, &arena->eset_retained,
 	    extent, false);
 }
 
@@ -1576,7 +1563,7 @@ extent_destroy_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
 
 void
 extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent) {
+    extent_hooks_t *extent_hooks, extent_t *extent) {
 	assert(extent_base_get(extent) != NULL);
 	assert(extent_size_get(extent) != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -1587,15 +1574,14 @@ extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena,
 
 	extent_addr_set(extent, extent_base_get(extent));
 
-	extent_hooks_assure_initialized(arena, r_extent_hooks);
 	/* Try to destroy; silently fail otherwise. */
-	if (*r_extent_hooks == &extent_hooks_default) {
+	if (extent_hooks == &extent_hooks_default) {
 		/* Call directly to propagate tsdn. */
 		extent_destroy_default_impl(extent_base_get(extent),
 		    extent_size_get(extent));
-	} else if ((*r_extent_hooks)->destroy != NULL) {
+	} else if (extent_hooks->destroy != NULL) {
 		extent_hook_pre_reentrancy(tsdn, arena);
-		(*r_extent_hooks)->destroy(*r_extent_hooks,
+		extent_hooks->destroy(extent_hooks,
 		    extent_base_get(extent), extent_size_get(extent),
 		    extent_committed_get(extent), arena_ind_get(arena));
 		extent_hook_post_reentrancy(tsdn);
@@ -1613,19 +1599,18 @@ extent_commit_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
 
 static bool
 extent_commit_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
     size_t length, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 
-	extent_hooks_assure_initialized(arena, r_extent_hooks);
-	if (*r_extent_hooks != &extent_hooks_default) {
+	if (extent_hooks != &extent_hooks_default) {
 		extent_hook_pre_reentrancy(tsdn, arena);
 	}
-	bool err = ((*r_extent_hooks)->commit == NULL ||
-	    (*r_extent_hooks)->commit(*r_extent_hooks, extent_base_get(extent),
+	bool err = (extent_hooks->commit == NULL ||
+	    extent_hooks->commit(extent_hooks, extent_base_get(extent),
 	    extent_size_get(extent), offset, length, arena_ind_get(arena)));
-	if (*r_extent_hooks != &extent_hooks_default) {
+	if (extent_hooks != &extent_hooks_default) {
 		extent_hook_post_reentrancy(tsdn);
 	}
 	extent_committed_set(extent, extent_committed_get(extent) || !err);
@@ -1634,9 +1619,9 @@ extent_commit_impl(tsdn_t *tsdn, arena_t *arena,
 
 bool
 extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
     size_t length) {
-	return extent_commit_impl(tsdn, arena, r_extent_hooks, extent, offset,
+	return extent_commit_impl(tsdn, arena, extent_hooks, extent, offset,
 	    length, false);
 }
 
@@ -1649,21 +1634,19 @@ extent_decommit_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
 
 bool
 extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
     size_t length) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	extent_hooks_assure_initialized(arena, r_extent_hooks);
-
-	if (*r_extent_hooks != &extent_hooks_default) {
+	if (extent_hooks != &extent_hooks_default) {
 		extent_hook_pre_reentrancy(tsdn, arena);
 	}
-	bool err = ((*r_extent_hooks)->decommit == NULL ||
-	    (*r_extent_hooks)->decommit(*r_extent_hooks,
+	bool err = (extent_hooks->decommit == NULL ||
+	    extent_hooks->decommit(extent_hooks,
 	    extent_base_get(extent), extent_size_get(extent), offset, length,
 	    arena_ind_get(arena)));
-	if (*r_extent_hooks != &extent_hooks_default) {
+	if (extent_hooks != &extent_hooks_default) {
 		extent_hook_post_reentrancy(tsdn);
 	}
 	extent_committed_set(extent, extent_committed_get(extent) && err);
@@ -1686,23 +1669,21 @@ extent_purge_lazy_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
 
 static bool
 extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
     size_t length, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 
-	extent_hooks_assure_initialized(arena, r_extent_hooks);
-
-	if ((*r_extent_hooks)->purge_lazy == NULL) {
+	if (extent_hooks->purge_lazy == NULL) {
 		return true;
 	}
-	if (*r_extent_hooks != &extent_hooks_default) {
+	if (extent_hooks != &extent_hooks_default) {
 		extent_hook_pre_reentrancy(tsdn, arena);
 	}
-	bool err = (*r_extent_hooks)->purge_lazy(*r_extent_hooks,
+	bool err = extent_hooks->purge_lazy(extent_hooks,
 	    extent_base_get(extent), extent_size_get(extent), offset, length,
 	    arena_ind_get(arena));
-	if (*r_extent_hooks != &extent_hooks_default) {
+	if (extent_hooks != &extent_hooks_default) {
 		extent_hook_post_reentrancy(tsdn);
 	}
 
@@ -1711,9 +1692,9 @@ extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena,
 
 bool
 extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
     size_t length) {
-	return extent_purge_lazy_impl(tsdn, arena, r_extent_hooks, extent,
+	return extent_purge_lazy_impl(tsdn, arena, extent_hooks, extent,
 	    offset, length, false);
 }
 
@@ -1733,23 +1714,21 @@ extent_purge_forced_default(extent_hooks_t *extent_hooks, void *addr,
 
 static bool
 extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
     size_t length, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 
-	extent_hooks_assure_initialized(arena, r_extent_hooks);
-
-	if ((*r_extent_hooks)->purge_forced == NULL) {
+	if (extent_hooks->purge_forced == NULL) {
 		return true;
 	}
-	if (*r_extent_hooks != &extent_hooks_default) {
+	if (extent_hooks != &extent_hooks_default) {
 		extent_hook_pre_reentrancy(tsdn, arena);
 	}
-	bool err = (*r_extent_hooks)->purge_forced(*r_extent_hooks,
+	bool err = extent_hooks->purge_forced(extent_hooks,
 	    extent_base_get(extent), extent_size_get(extent), offset, length,
 	    arena_ind_get(arena));
-	if (*r_extent_hooks != &extent_hooks_default) {
+	if (extent_hooks != &extent_hooks_default) {
 		extent_hook_post_reentrancy(tsdn);
 	}
 	return err;
@@ -1757,9 +1736,9 @@ extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena,
 
 bool
 extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t offset,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
     size_t length) {
-	return extent_purge_forced_impl(tsdn, arena, r_extent_hooks, extent,
+	return extent_purge_forced_impl(tsdn, arena, extent_hooks, extent,
 	    offset, length, false);
 }
 
@@ -1787,16 +1766,14 @@ extent_split_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
  */
 static extent_t *
 extent_split_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t size_a,
     szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
     bool growing_retained) {
 	assert(extent_size_get(extent) == size_a + size_b);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 
-	extent_hooks_assure_initialized(arena, r_extent_hooks);
-
-	if ((*r_extent_hooks)->split == NULL) {
+	if (extent_hooks->split == NULL) {
 		return NULL;
 	}
 
@@ -1838,13 +1815,13 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena,
 
 	extent_lock2(tsdn, extent, trail);
 
-	if (*r_extent_hooks != &extent_hooks_default) {
+	if (extent_hooks != &extent_hooks_default) {
 		extent_hook_pre_reentrancy(tsdn, arena);
 	}
-	bool err = (*r_extent_hooks)->split(*r_extent_hooks, extent_base_get(extent),
+	bool err = extent_hooks->split(extent_hooks, extent_base_get(extent),
 	    size_a + size_b, size_a, size_b, extent_committed_get(extent),
 	    arena_ind_get(arena));
-	if (*r_extent_hooks != &extent_hooks_default) {
+	if (extent_hooks != &extent_hooks_default) {
 		extent_hook_post_reentrancy(tsdn);
 	}
 	if (err) {
@@ -1872,9 +1849,9 @@ label_error_a:
 
 extent_t *
 extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a,
+    extent_hooks_t *extent_hooks, extent_t *extent, size_t size_a,
     szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b) {
-	return extent_split_impl(tsdn, arena, r_extent_hooks, extent, size_a,
+	return extent_split_impl(tsdn, arena, extent_hooks, extent, size_a,
 	    szind_a, slab_a, size_b, szind_b, slab_b, false);
 }
 
@@ -1938,26 +1915,24 @@ extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
 
 static bool
 extent_merge_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b,
+    extent_hooks_t *extent_hooks, extent_t *a, extent_t *b,
     bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	assert(extent_base_get(a) < extent_base_get(b));
 
-	extent_hooks_assure_initialized(arena, r_extent_hooks);
-
-	if ((*r_extent_hooks)->merge == NULL || extent_head_no_merge(a, b)) {
+	if (extent_hooks->merge == NULL || extent_head_no_merge(a, b)) {
 		return true;
 	}
 
 	bool err;
-	if (*r_extent_hooks == &extent_hooks_default) {
+	if (extent_hooks == &extent_hooks_default) {
 		/* Call directly to propagate tsdn. */
 		err = extent_merge_default_impl(extent_base_get(a),
 		    extent_base_get(b));
 	} else {
 		extent_hook_pre_reentrancy(tsdn, arena);
-		err = (*r_extent_hooks)->merge(*r_extent_hooks,
+		err = extent_hooks->merge(extent_hooks,
 		    extent_base_get(a), extent_size_get(a), extent_base_get(b),
 		    extent_size_get(b), extent_committed_get(a),
 		    arena_ind_get(arena));
@@ -2017,8 +1992,8 @@ extent_merge_impl(tsdn_t *tsdn, arena_t *arena,
 
 bool
 extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t **r_extent_hooks, extent_t *a, extent_t *b) {
-	return extent_merge_impl(tsdn, arena, r_extent_hooks, a, b, false);
+    extent_hooks_t *extent_hooks, extent_t *a, extent_t *b) {
+	return extent_merge_impl(tsdn, arena, extent_hooks, a, b, false);
 }
 
 bool
diff --git a/src/extent_dss.c b/src/extent_dss.c
index eb074800..dd80a196 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -195,7 +195,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 				}
 				if (*zero && *commit) {
 					extent_hooks_t *extent_hooks =
-					    EXTENT_HOOKS_INITIALIZER;
+					    arena_get_extent_hooks(arena);
 					extent_t extent;
 
 					extent_init(&extent,
@@ -204,7 +204,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					    extent_state_active, false, true,
 					    true, EXTENT_NOT_HEAD);
 					if (extent_purge_forced_wrapper(tsdn,
-					    arena, &extent_hooks, &extent, 0,
+					    arena, extent_hooks, &extent, 0,
 					    size)) {
 						memset(ret, 0, size);
 					}
diff --git a/src/large.c b/src/large.c
index fb216ede..6de1c570 100644
--- a/src/large.c
+++ b/src/large.c
@@ -105,7 +105,7 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize) {
 	/* Split excess pages. */
 	if (diff != 0) {
 		extent_t *trail = extent_split_wrapper(tsdn, arena,
-		    &extent_hooks, extent, usize + sz_large_pad,
+		    extent_hooks, extent, usize + sz_large_pad,
 		    sz_size2index(usize), false, diff, SC_NSIZES, false);
 		if (trail == NULL) {
 			return true;
@@ -116,7 +116,7 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize) {
 			    extent_size_get(trail));
 		}
 
-		arena_extents_dirty_dalloc(tsdn, arena, &extent_hooks, trail);
+		arena_extents_dirty_dalloc(tsdn, arena, extent_hooks, trail);
 	}
 
 	arena_extent_ralloc_large_shrink(tsdn, arena, extent, oldusize);
@@ -149,17 +149,17 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 	bool commit = true;
 	extent_t *trail;
 	bool new_mapping;
-	if ((trail = extents_alloc(tsdn, arena, &extent_hooks,
+	if ((trail = extents_alloc(tsdn, arena, extent_hooks,
 	    &arena->eset_dirty, extent_past_get(extent), trailsize, 0,
 	    CACHELINE, false, SC_NSIZES, &is_zeroed_trail, &commit)) != NULL
-	    || (trail = extents_alloc(tsdn, arena, &extent_hooks,
+	    || (trail = extents_alloc(tsdn, arena, extent_hooks,
 	    &arena->eset_muzzy, extent_past_get(extent), trailsize, 0,
 	    CACHELINE, false, SC_NSIZES, &is_zeroed_trail, &commit)) != NULL) {
 		if (config_stats) {
 			new_mapping = false;
 		}
 	} else {
-		if ((trail = extent_alloc_wrapper(tsdn, arena, &extent_hooks,
+		if ((trail = extent_alloc_wrapper(tsdn, arena, extent_hooks,
 		    extent_past_get(extent), trailsize, 0, CACHELINE, false,
 		    SC_NSIZES, &is_zeroed_trail, &commit)) == NULL) {
 			return true;
@@ -169,8 +169,8 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 		}
 	}
 
-	if (extent_merge_wrapper(tsdn, arena, &extent_hooks, extent, trail)) {
-		extent_dalloc_wrapper(tsdn, arena, &extent_hooks, trail);
+	if (extent_merge_wrapper(tsdn, arena, extent_hooks, extent, trail)) {
+		extent_dalloc_wrapper(tsdn, arena, extent_hooks, trail);
 		return true;
 	}
 	rtree_ctx_t rtree_ctx_fallback;
@@ -339,8 +339,8 @@ large_dalloc_prep_impl(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 
 static void
 large_dalloc_finish_impl(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
-	extent_hooks_t *extent_hooks = EXTENT_HOOKS_INITIALIZER;
-	arena_extents_dirty_dalloc(tsdn, arena, &extent_hooks, extent);
+	extent_hooks_t *extent_hooks = arena_get_extent_hooks(arena);
+	arena_extents_dirty_dalloc(tsdn, arena, extent_hooks, extent);
 }
 
 void

From 837119a9489992e1c4326015ae21e16c246ed094 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 18 Nov 2019 14:43:48 -0800
Subject: [PATCH 1443/2608] base_structs.h: Remove some mid-line tabs.

---
 include/jemalloc/internal/base_structs.h | 30 ++++++++++++------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/base_structs.h b/include/jemalloc/internal/base_structs.h
index cc0f9a57..a3429d65 100644
--- a/include/jemalloc/internal/base_structs.h
+++ b/include/jemalloc/internal/base_structs.h
@@ -9,52 +9,52 @@
 /* Embedded at the beginning of every block of base-managed virtual memory. */
 struct base_block_s {
 	/* Total size of block's virtual memory mapping. */
-	size_t		size;
+	size_t size;
 
 	/* Next block in list of base's blocks. */
-	base_block_t	*next;
+	base_block_t *next;
 
 	/* Tracks unused trailing space. */
-	extent_t	extent;
+	extent_t extent;
 };
 
 struct base_s {
 	/* Associated arena's index within the arenas array. */
-	unsigned	ind;
+	unsigned ind;
 
 	/*
 	 * User-configurable extent hook functions.  Points to an
 	 * extent_hooks_t.
 	 */
-	atomic_p_t	extent_hooks;
+	atomic_p_t extent_hooks;
 
 	/* Protects base_alloc() and base_stats_get() operations. */
-	malloc_mutex_t	mtx;
+	malloc_mutex_t mtx;
 
 	/* Using THP when true (metadata_thp auto mode). */
-	bool		auto_thp_switched;
+	bool auto_thp_switched;
 	/*
 	 * Most recent size class in the series of increasingly large base
 	 * extents.  Logarithmic spacing between subsequent allocations ensures
 	 * that the total number of distinct mappings remains small.
 	 */
-	pszind_t	pind_last;
+	pszind_t pind_last;
 
 	/* Serial number generation state. */
-	size_t		extent_sn_next;
+	size_t extent_sn_next;
 
 	/* Chain of all blocks associated with base. */
-	base_block_t	*blocks;
+	base_block_t *blocks;
 
 	/* Heap of extents that track unused trailing space within blocks. */
-	extent_heap_t	avail[SC_NSIZES];
+	extent_heap_t avail[SC_NSIZES];
 
 	/* Stats, only maintained if config_stats. */
-	size_t		allocated;
-	size_t		resident;
-	size_t		mapped;
+	size_t allocated;
+	size_t resident;
+	size_t mapped;
 	/* Number of THP regions touched. */
-	size_t		n_thp;
+	size_t n_thp;
 };
 
 #endif /* JEMALLOC_INTERNAL_BASE_STRUCTS_H */

From ba8b9ecbcbda3b975711e4bced4647afaa50c71e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 2 Dec 2019 10:44:09 -0800
Subject: [PATCH 1444/2608] Add ehooks module

---
 Makefile.in                                    | 1 +
 include/jemalloc/internal/ehooks.h             | 4 ++++
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj | 1 +
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj | 1 +
 src/ehooks.c                                   | 3 +++
 5 files changed, 10 insertions(+)
 create mode 100644 include/jemalloc/internal/ehooks.h
 create mode 100644 src/ehooks.c

diff --git a/Makefile.in b/Makefile.in
index 0bbf106d..a735e0e6 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -104,6 +104,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/ckh.c \
 	$(srcroot)src/ctl.c \
 	$(srcroot)src/div.c \
+	$(srcroot)src/ehooks.c \
 	$(srcroot)src/eset.c \
 	$(srcroot)src/extent.c \
 	$(srcroot)src/extent_dss.c \
diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
new file mode 100644
index 00000000..695859d5
--- /dev/null
+++ b/include/jemalloc/internal/ehooks.h
@@ -0,0 +1,4 @@
+#ifndef JEMALLOC_INTERNAL_EHOOKS_H
+#define JEMALLOC_INTERNAL_EHOOKS_H
+
+#endif /* JEMALLOC_INTERNAL_EHOOKS_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 5838e933..e6803120 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -44,6 +44,7 @@
     <ClCompile Include="..\..\..\..\src\ckh.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
     <ClCompile Include="..\..\..\..\src\div.c" />
+    <ClCompile Include="..\..\..\..\src\ehooks.c" />
     <ClCompile Include="..\..\..\..\src\eset.c" />
     <ClCompile Include="..\..\..\..\src\extent.c" />
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index b9d4f681..ce51930a 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -44,6 +44,7 @@
     <ClCompile Include="..\..\..\..\src\ckh.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
     <ClCompile Include="..\..\..\..\src\div.c" />
+    <ClCompile Include="..\..\..\..\src\ehooks.c" />
     <ClCompile Include="..\..\..\..\src\eset.c" />
     <ClCompile Include="..\..\..\..\src\extent.c" />
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
diff --git a/src/ehooks.c b/src/ehooks.c
new file mode 100644
index 00000000..454cb475
--- /dev/null
+++ b/src/ehooks.c
@@ -0,0 +1,3 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+

From ae0d8e8591f749ee8fbe1d732984a63f900aaea3 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 2 Dec 2019 14:19:22 -0800
Subject: [PATCH 1445/2608] Move extent ehook calls into ehooks

---
 include/jemalloc/internal/arena_externs.h   |   4 +-
 include/jemalloc/internal/arena_inlines_b.h |   9 +-
 include/jemalloc/internal/base_externs.h    |   2 +-
 include/jemalloc/internal/base_structs.h    |   6 +-
 include/jemalloc/internal/ehooks.h          | 147 ++++++
 include/jemalloc/internal/extent_externs.h  |  61 ++-
 src/arena.c                                 |  93 ++--
 src/base.c                                  |  69 +--
 src/ctl.c                                   |   5 +-
 src/ehooks.c                                |   5 +
 src/extent.c                                | 471 +++++++++-----------
 src/extent_dss.c                            |   7 +-
 src/large.c                                 |  36 +-
 13 files changed, 511 insertions(+), 404 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index c13d8289..b6b33ce3 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -28,7 +28,7 @@ void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     bin_stats_data_t *bstats, arena_stats_large_t *lstats,
     arena_stats_extents_t *estats);
 void arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent);
+    ehooks_t *ehooks, extent_t *extent);
 #ifdef JEMALLOC_JET
 size_t arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr);
 #endif
@@ -72,7 +72,7 @@ void *arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
     size_t size, size_t alignment, bool zero, tcache_t *tcache,
     hook_ralloc_args_t *hook_args);
 dss_prec_t arena_dss_prec_get(arena_t *arena);
-extent_hooks_t *arena_get_extent_hooks(arena_t *arena);
+ehooks_t *arena_get_ehooks(arena_t *arena);
 extent_hooks_t *arena_set_extent_hooks(tsd_t *tsd, arena_t *arena,
     extent_hooks_t *extent_hooks);
 bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 9ccfaa90..16da67e2 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -14,11 +14,6 @@ arena_get_from_extent(extent_t *extent) {
 	    ATOMIC_RELAXED);
 }
 
-JEMALLOC_ALWAYS_INLINE bool
-arena_has_default_hooks(arena_t *arena) {
-	return (arena_get_extent_hooks(arena) == &extent_hooks_default);
-}
-
 JEMALLOC_ALWAYS_INLINE arena_t *
 arena_choose_maybe_huge(tsd_t *tsd, arena_t *arena, size_t size) {
 	if (arena != NULL) {
@@ -134,10 +129,10 @@ arena_decay_tick(tsdn_t *tsdn, arena_t *arena) {
 
 /* Purge a single extent to retained / unmapped directly. */
 JEMALLOC_ALWAYS_INLINE void
-arena_decay_extent(tsdn_t *tsdn,arena_t *arena, extent_hooks_t *extent_hooks,
+arena_decay_extent(tsdn_t *tsdn,arena_t *arena, ehooks_t *ehooks,
     extent_t *extent) {
 	size_t extent_size = extent_size_get(extent);
-	extent_dalloc_wrapper(tsdn, arena, extent_hooks, extent);
+	extent_dalloc_wrapper(tsdn, arena, ehooks, extent);
 	if (config_stats) {
 		/* Update stats accordingly. */
 		arena_stats_lock(tsdn, &arena->stats);
diff --git a/include/jemalloc/internal/base_externs.h b/include/jemalloc/internal/base_externs.h
index 7b705c9b..35734c3c 100644
--- a/include/jemalloc/internal/base_externs.h
+++ b/include/jemalloc/internal/base_externs.h
@@ -7,7 +7,7 @@ extern const char *metadata_thp_mode_names[];
 base_t *b0get(void);
 base_t *base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
 void base_delete(tsdn_t *tsdn, base_t *base);
-extent_hooks_t *base_extent_hooks_get(base_t *base);
+ehooks_t *base_ehooks_get(base_t *base);
 extent_hooks_t *base_extent_hooks_set(base_t *base,
     extent_hooks_t *extent_hooks);
 void *base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment);
diff --git a/include/jemalloc/internal/base_structs.h b/include/jemalloc/internal/base_structs.h
index a3429d65..68e7896e 100644
--- a/include/jemalloc/internal/base_structs.h
+++ b/include/jemalloc/internal/base_structs.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_BASE_STRUCTS_H
 #define JEMALLOC_INTERNAL_BASE_STRUCTS_H
 
+#include "jemalloc/internal/ehooks.h"
 #include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/mutex.h"
@@ -23,10 +24,9 @@ struct base_s {
 	unsigned ind;
 
 	/*
-	 * User-configurable extent hook functions.  Points to an
-	 * extent_hooks_t.
+	 * User-configurable extent hook functions.
 	 */
-	atomic_p_t extent_hooks;
+	ehooks_t ehooks;
 
 	/* Protects base_alloc() and base_stats_get() operations. */
 	malloc_mutex_t mtx;
diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index 695859d5..c79ea24b 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -1,4 +1,151 @@
 #ifndef JEMALLOC_INTERNAL_EHOOKS_H
 #define JEMALLOC_INTERNAL_EHOOKS_H
 
+#include "jemalloc/internal/atomic.h"
+
+extern const extent_hooks_t extent_hooks_default;
+
+typedef struct ehooks_s ehooks_t;
+struct ehooks_s {
+	/* Logically an extent_hooks_t *. */
+	atomic_p_t ptr;
+};
+
+void ehooks_init(ehooks_t *ehooks, extent_hooks_t *extent_hooks);
+
+static inline void
+ehooks_set_extent_hooks_ptr(ehooks_t *ehooks, extent_hooks_t *extent_hooks) {
+	atomic_store_p(&ehooks->ptr, extent_hooks, ATOMIC_RELEASE);
+}
+
+static inline extent_hooks_t *
+ehooks_get_extent_hooks_ptr(ehooks_t *ehooks) {
+	return (extent_hooks_t *)atomic_load_p(&ehooks->ptr, ATOMIC_ACQUIRE);
+}
+
+static inline bool
+ehooks_are_default(ehooks_t *ehooks) {
+	return ehooks_get_extent_hooks_ptr(ehooks) == &extent_hooks_default;
+}
+
+static inline bool
+ehooks_destroy_is_noop(ehooks_t *ehooks) {
+	return ehooks_get_extent_hooks_ptr(ehooks)->destroy == NULL;
+}
+
+static inline bool
+ehooks_purge_lazy_will_fail(ehooks_t *ehooks) {
+	return ehooks_get_extent_hooks_ptr(ehooks)->purge_lazy == NULL;
+}
+
+static inline bool
+ehooks_purge_forced_will_fail(ehooks_t *ehooks) {
+	return ehooks_get_extent_hooks_ptr(ehooks)->purge_forced == NULL;
+}
+
+static inline bool
+ehooks_split_will_fail(ehooks_t *ehooks) {
+	return ehooks_get_extent_hooks_ptr(ehooks)->split == NULL;
+}
+
+static inline bool
+ehooks_merge_will_fail(ehooks_t *ehooks) {
+	return ehooks_get_extent_hooks_ptr(ehooks)->merge == NULL;
+}
+
+static inline void *
+ehooks_alloc(ehooks_t *ehooks, void *new_addr, size_t size, size_t alignment,
+    bool *zero, bool *commit, unsigned arena_ind) {
+	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+	return extent_hooks->alloc(extent_hooks, new_addr, size, alignment,
+	    zero, commit, arena_ind);
+}
+
+static inline bool
+ehooks_dalloc(ehooks_t *ehooks, void *addr, size_t size, bool committed,
+    unsigned arena_ind) {
+	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+	if (extent_hooks->dalloc == NULL) {
+		return true;
+	}
+	return extent_hooks->dalloc(extent_hooks, addr, size, committed,
+	    arena_ind);
+}
+
+static inline void
+ehooks_destroy(ehooks_t *ehooks, void *addr, size_t size, bool committed,
+    unsigned arena_ind) {
+	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+	if (extent_hooks->destroy == NULL) {
+		return;
+	}
+	extent_hooks->destroy(extent_hooks, addr, size, committed, arena_ind);
+}
+
+static inline bool
+ehooks_commit(ehooks_t *ehooks, void *addr, size_t size, size_t offset,
+    size_t length, unsigned arena_ind) {
+	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+	if (extent_hooks->commit == NULL) {
+		return true;
+	}
+	return extent_hooks->commit(extent_hooks, addr, size, offset, length,
+	    arena_ind);
+}
+
+static inline bool
+ehooks_decommit(ehooks_t *ehooks, void *addr, size_t size, size_t offset,
+    size_t length, unsigned arena_ind) {
+	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+	if (extent_hooks->decommit == NULL) {
+		return true;
+	}
+	return extent_hooks->decommit(extent_hooks, addr, size, offset, length,
+	    arena_ind);
+}
+
+static inline bool
+ehooks_purge_lazy(ehooks_t *ehooks, void *addr, size_t size, size_t offset,
+    size_t length, unsigned arena_ind) {
+	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+	if (extent_hooks->purge_lazy == NULL) {
+		return true;
+	}
+	return extent_hooks->purge_lazy(extent_hooks, addr, size, offset,
+	    length, arena_ind);
+}
+
+static inline bool
+ehooks_purge_forced(ehooks_t *ehooks, void *addr, size_t size, size_t offset,
+    size_t length, unsigned arena_ind) {
+	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+	if (extent_hooks->purge_forced == NULL) {
+		return true;
+	}
+	return extent_hooks->purge_forced(extent_hooks, addr, size, offset,
+	    length, arena_ind);
+}
+
+static inline bool
+ehooks_split(ehooks_t *ehooks, void *addr, size_t size, size_t size_a,
+    size_t size_b, bool committed, unsigned arena_ind) {
+	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+	if (extent_hooks->split == NULL) {
+		return true;
+	}
+	return extent_hooks->split(extent_hooks, addr, size, size_a, size_b,
+	    committed, arena_ind);
+}
+
+static inline bool
+ehooks_merge(ehooks_t *ehooks, void *addr_a, size_t size_a, void *addr_b,
+    size_t size_b, bool committed, unsigned arena_ind) {
+	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+	if (extent_hooks->merge == NULL) {
+		return true;
+	}
+	return extent_hooks->merge(extent_hooks, addr_a, size_a, addr_b, size_b,
+	    committed, arena_ind);
+}
+
 #endif /* JEMALLOC_INTERNAL_EHOOKS_H */
diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index 218ca94f..26828ba5 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_EXTENT_EXTERNS_H
 #define JEMALLOC_INTERNAL_EXTENT_EXTERNS_H
 
+#include "jemalloc/internal/ehooks.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/mutex_pool.h"
 #include "jemalloc/internal/ph.h"
@@ -9,7 +10,6 @@
 extern size_t opt_lg_extent_max_active_fit;
 
 extern rtree_t extents_rtree;
-extern const extent_hooks_t extent_hooks_default;
 extern mutex_pool_t extent_mutex_pool;
 
 extent_t *extent_alloc(tsdn_t *tsdn, arena_t *arena);
@@ -18,39 +18,34 @@ void extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
 ph_proto(, extent_avail_, extent_tree_t, extent_t)
 ph_proto(, extent_heap_, extent_heap_t, extent_t)
 
-extent_t *extents_alloc(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, eset_t *eset, void *new_addr,
-    size_t size, size_t pad, size_t alignment, bool slab, szind_t szind,
-    bool *zero, bool *commit);
-void extents_dalloc(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, eset_t *eset, extent_t *extent);
-extent_t *extents_evict(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, eset_t *eset, size_t npages_min);
-extent_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, void *new_addr, size_t size, size_t pad,
-    size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit);
+extent_t *extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    eset_t *eset, void *new_addr, size_t size, size_t pad, size_t alignment,
+    bool slab, szind_t szind, bool *zero, bool *commit);
+void extents_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    eset_t *eset, extent_t *extent);
+extent_t *extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    eset_t *eset, size_t npages_min);
+extent_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+    szind_t szind, bool *zero, bool *commit);
 void extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
-void extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent);
-void extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent);
-bool extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
-    size_t length);
-bool extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
-    size_t length);
-bool extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
-    size_t length);
-bool extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
-    size_t length);
-extent_t *extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t size_a,
-    szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b);
-bool extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *a, extent_t *b);
+void extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent);
+void extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent);
+bool extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length);
+bool extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length);
+bool extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length);
+bool extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length);
+extent_t *extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t size_a, szind_t szind_a, bool slab_a,
+    size_t size_b, szind_t szind_b, bool slab_b);
+bool extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *a, extent_t *b);
 
 bool extent_boot(void);
 
diff --git a/src/arena.c b/src/arena.c
index a272438a..214a97c2 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -4,6 +4,7 @@
 
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/div.h"
+#include "jemalloc/internal/ehooks.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/extent_mmap.h"
 #include "jemalloc/internal/mutex.h"
@@ -252,13 +253,12 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 }
 
 void
-arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent) {
+arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	extents_dalloc(tsdn, arena, extent_hooks, &arena->eset_dirty,
-	    extent);
+	extents_dalloc(tsdn, arena, ehooks, &arena->eset_dirty, extent);
 	if (arena_dirty_decay_ms_get(arena) == 0) {
 		arena_decay_dirty(tsdn, arena, false, true);
 	} else {
@@ -426,7 +426,7 @@ arena_may_have_muzzy(arena_t *arena) {
 extent_t *
 arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool *zero) {
-	extent_hooks_t *extent_hooks = arena_get_extent_hooks(arena);
+	ehooks_t *ehooks = arena_get_ehooks(arena);
 
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
@@ -434,19 +434,18 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	szind_t szind = sz_size2index(usize);
 	size_t mapped_add;
 	bool commit = true;
-	extent_t *extent = extents_alloc(tsdn, arena, extent_hooks,
+	extent_t *extent = extents_alloc(tsdn, arena, ehooks,
 	    &arena->eset_dirty, NULL, usize, sz_large_pad, alignment, false,
 	    szind, zero, &commit);
 	if (extent == NULL && arena_may_have_muzzy(arena)) {
-		extent = extents_alloc(tsdn, arena, extent_hooks,
-		    &arena->eset_muzzy, NULL, usize, sz_large_pad, alignment,
-		    false, szind, zero, &commit);
+		extent = extents_alloc(tsdn, arena, ehooks, &arena->eset_muzzy,
+		    NULL, usize, sz_large_pad, alignment, false, szind, zero,
+		    &commit);
 	}
 	size_t size = usize + sz_large_pad;
 	if (extent == NULL) {
-		extent = extent_alloc_wrapper(tsdn, arena, extent_hooks, NULL,
-		    usize, sz_large_pad, alignment, false, szind, zero,
-		    &commit);
+		extent = extent_alloc_wrapper(tsdn, arena, ehooks, NULL, usize,
+		    sz_large_pad, alignment, false, szind, zero, &commit);
 		if (config_stats) {
 			/*
 			 * extent may be NULL on OOM, but in that case
@@ -819,8 +818,8 @@ arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
 
 static size_t
 arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, eset_t *eset, size_t npages_limit,
-	size_t npages_decay_max, extent_list_t *decay_extents) {
+    ehooks_t *ehooks, eset_t *eset, size_t npages_limit,
+    size_t npages_decay_max, extent_list_t *decay_extents) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
@@ -828,8 +827,8 @@ arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
 	size_t nstashed = 0;
 	extent_t *extent;
 	while (nstashed < npages_decay_max &&
-	    (extent = extents_evict(tsdn, arena, extent_hooks, eset,
-	    npages_limit)) != NULL) {
+	    (extent = extents_evict(tsdn, arena, ehooks, eset, npages_limit))
+	    != NULL) {
 		extent_list_append(decay_extents, extent);
 		nstashed += extent_size_get(extent) >> LG_PAGE;
 	}
@@ -837,9 +836,9 @@ arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
 }
 
 static size_t
-arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, arena_decay_t *decay, eset_t *eset,
-    bool all, extent_list_t *decay_extents, bool is_background_thread) {
+arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    arena_decay_t *decay, eset_t *eset, bool all, extent_list_t *decay_extents,
+    bool is_background_thread) {
 	size_t nmadvise, nunmapped;
 	size_t npurged;
 
@@ -864,9 +863,9 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
 		case extent_state_dirty:
 			if (!all && muzzy_decay_ms != 0 &&
 			    !extent_purge_lazy_wrapper(tsdn, arena,
-			    extent_hooks, extent, 0,
+			    ehooks, extent, 0,
 			    extent_size_get(extent))) {
-				extents_dalloc(tsdn, arena, extent_hooks,
+				extents_dalloc(tsdn, arena, ehooks,
 				    &arena->eset_muzzy, extent);
 				arena_background_thread_inactivity_check(tsdn,
 				    arena, is_background_thread);
@@ -874,8 +873,7 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena,
 			}
 			JEMALLOC_FALLTHROUGH;
 		case extent_state_muzzy:
-			extent_dalloc_wrapper(tsdn, arena, extent_hooks,
-			    extent);
+			extent_dalloc_wrapper(tsdn, arena, ehooks, extent);
 			if (config_stats) {
 				nunmapped += npages;
 			}
@@ -923,17 +921,16 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	decay->purging = true;
 	malloc_mutex_unlock(tsdn, &decay->mtx);
 
-	extent_hooks_t *extent_hooks = arena_get_extent_hooks(arena);
+	ehooks_t *ehooks = arena_get_ehooks(arena);
 
 	extent_list_t decay_extents;
 	extent_list_init(&decay_extents);
 
-	size_t npurge = arena_stash_decayed(tsdn, arena, extent_hooks, eset,
+	size_t npurge = arena_stash_decayed(tsdn, arena, ehooks, eset,
 	    npages_limit, npages_decay_max, &decay_extents);
 	if (npurge != 0) {
-		size_t npurged = arena_decay_stashed(tsdn, arena,
-		    extent_hooks, decay, eset, all, &decay_extents,
-		    is_background_thread);
+		size_t npurged = arena_decay_stashed(tsdn, arena, ehooks, decay,
+		    eset, all, &decay_extents, is_background_thread);
 		assert(npurged == npurge);
 	}
 
@@ -1006,8 +1003,8 @@ static void
 arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *slab) {
 	arena_nactive_sub(arena, extent_size_get(slab) >> LG_PAGE);
 
-	extent_hooks_t *extent_hooks = arena_get_extent_hooks(arena);
-	arena_extents_dirty_dalloc(tsdn, arena, extent_hooks, slab);
+	ehooks_t *ehooks = arena_get_ehooks(arena);
+	arena_extents_dirty_dalloc(tsdn, arena, ehooks, slab);
 }
 
 static void
@@ -1159,11 +1156,11 @@ arena_destroy_retained(tsdn_t *tsdn, arena_t *arena) {
 	 * destroyed, or provide custom extent hooks that track retained
 	 * dss-based extents for later reuse.
 	 */
-	extent_hooks_t *extent_hooks = arena_get_extent_hooks(arena);
+	ehooks_t *ehooks = arena_get_ehooks(arena);
 	extent_t *extent;
-	while ((extent = extents_evict(tsdn, arena, extent_hooks,
+	while ((extent = extents_evict(tsdn, arena, ehooks,
 	    &arena->eset_retained, 0)) != NULL) {
-		extent_destroy_wrapper(tsdn, arena, extent_hooks, extent);
+		extent_destroy_wrapper(tsdn, arena, ehooks, extent);
 	}
 }
 
@@ -1204,9 +1201,8 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
 }
 
 static extent_t *
-arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, const bin_info_t *bin_info,
-    szind_t szind) {
+arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    const bin_info_t *bin_info, szind_t szind) {
 	extent_t *slab;
 	bool zero, commit;
 
@@ -1215,7 +1211,7 @@ arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena,
 
 	zero = false;
 	commit = true;
-	slab = extent_alloc_wrapper(tsdn, arena, extent_hooks, NULL,
+	slab = extent_alloc_wrapper(tsdn, arena, ehooks, NULL,
 	    bin_info->slab_size, 0, PAGE, true, szind, &zero, &commit);
 
 	if (config_stats && slab != NULL) {
@@ -1232,21 +1228,20 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	extent_hooks_t *extent_hooks = arena_get_extent_hooks(arena);
+	ehooks_t *ehooks = arena_get_ehooks(arena);
 	szind_t szind = sz_size2index(bin_info->reg_size);
 	bool zero = false;
 	bool commit = true;
-	extent_t *slab = extents_alloc(tsdn, arena, extent_hooks,
-	    &arena->eset_dirty, NULL, bin_info->slab_size, 0, PAGE, true,
-	    binind, &zero, &commit);
+	extent_t *slab = extents_alloc(tsdn, arena, ehooks, &arena->eset_dirty,
+	    NULL, bin_info->slab_size, 0, PAGE, true, binind, &zero, &commit);
 	if (slab == NULL && arena_may_have_muzzy(arena)) {
-		slab = extents_alloc(tsdn, arena, extent_hooks,
-		    &arena->eset_muzzy, NULL, bin_info->slab_size, 0, PAGE,
-		    true, binind, &zero, &commit);
+		slab = extents_alloc(tsdn, arena, ehooks, &arena->eset_muzzy,
+		    NULL, bin_info->slab_size, 0, PAGE, true, binind, &zero,
+		    &commit);
 	}
 	if (slab == NULL) {
-		slab = arena_slab_alloc_hard(tsdn, arena, extent_hooks,
-		    bin_info, szind);
+		slab = arena_slab_alloc_hard(tsdn, arena, ehooks, bin_info,
+		    szind);
 		if (slab == NULL) {
 			return NULL;
 		}
@@ -1846,9 +1841,9 @@ arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
 	return ret;
 }
 
-extent_hooks_t *
-arena_get_extent_hooks(arena_t *arena) {
-	return base_extent_hooks_get(arena->base);
+ehooks_t *
+arena_get_ehooks(arena_t *arena) {
+	return base_ehooks_get(arena->base);
 }
 
 extent_hooks_t *
diff --git a/src/base.c b/src/base.c
index 9a55ed2e..92dfca8c 100644
--- a/src/base.c
+++ b/src/base.c
@@ -29,7 +29,7 @@ metadata_thp_madvise(void) {
 }
 
 static void *
-base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size) {
+base_map(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, size_t size) {
 	void *addr;
 	bool zero = true;
 	bool commit = true;
@@ -37,7 +37,7 @@ base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size)
 	/* Use huge page sizes and alignment regardless of opt_metadata_thp. */
 	assert(size == HUGEPAGE_CEILING(size));
 	size_t alignment = HUGEPAGE;
-	if (extent_hooks == &extent_hooks_default) {
+	if (ehooks_are_default(ehooks)) {
 		addr = extent_alloc_mmap(NULL, size, alignment, &zero, &commit);
 		if (have_madvise_huge && addr) {
 			pages_set_thp_state(addr, size);
@@ -46,8 +46,8 @@ base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size)
 		/* No arena context as we are creating new arenas. */
 		tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
 		pre_reentrancy(tsd, NULL);
-		addr = extent_hooks->alloc(extent_hooks, NULL, size, alignment,
-		    &zero, &commit, ind);
+		addr = ehooks_alloc(ehooks, NULL, size, alignment, &zero,
+		    &commit, ind);
 		post_reentrancy(tsd);
 	}
 
@@ -55,7 +55,7 @@ base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size)
 }
 
 static void
-base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr,
+base_unmap(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, void *addr,
     size_t size) {
 	/*
 	 * Cascade through dalloc, decommit, purge_forced, and purge_lazy,
@@ -67,7 +67,7 @@ base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr,
 	 * may in fact want the end state of all associated virtual memory to be
 	 * in some consistent-but-allocated state.
 	 */
-	if (extent_hooks == &extent_hooks_default) {
+	if (ehooks_are_default(ehooks)) {
 		if (!extent_dalloc_mmap(addr, size)) {
 			goto label_done;
 		}
@@ -85,24 +85,16 @@ base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr,
 	} else {
 		tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
 		pre_reentrancy(tsd, NULL);
-		if (extent_hooks->dalloc != NULL &&
-		    !extent_hooks->dalloc(extent_hooks, addr, size, true,
-		    ind)) {
+		if (!ehooks_dalloc(ehooks, addr, size, true, ind)) {
 			goto label_post_reentrancy;
 		}
-		if (extent_hooks->decommit != NULL &&
-		    !extent_hooks->decommit(extent_hooks, addr, size, 0, size,
-		    ind)) {
+		if (!ehooks_decommit(ehooks, addr, size, 0, size, ind)) {
 			goto label_post_reentrancy;
 		}
-		if (extent_hooks->purge_forced != NULL &&
-		    !extent_hooks->purge_forced(extent_hooks, addr, size, 0,
-		    size, ind)) {
+		if (!ehooks_purge_forced(ehooks, addr, size, 0, size, ind)) {
 			goto label_post_reentrancy;
 		}
-		if (extent_hooks->purge_lazy != NULL &&
-		    !extent_hooks->purge_lazy(extent_hooks, addr, size, 0, size,
-		    ind)) {
+		if (!ehooks_purge_lazy(ehooks, addr, size, 0, size, ind)) {
 			goto label_post_reentrancy;
 		}
 		/* Nothing worked.  That's the application's problem. */
@@ -248,8 +240,8 @@ base_extent_bump_alloc(base_t *base, extent_t *extent, size_t size,
  * On success a pointer to the initialized base_block_t header is returned.
  */
 static base_block_t *
-base_block_alloc(tsdn_t *tsdn, base_t *base, extent_hooks_t *extent_hooks,
-    unsigned ind, pszind_t *pind_last, size_t *extent_sn_next, size_t size,
+base_block_alloc(tsdn_t *tsdn, base_t *base, ehooks_t *ehooks, unsigned ind,
+    pszind_t *pind_last, size_t *extent_sn_next, size_t size,
     size_t alignment) {
 	alignment = ALIGNMENT_CEILING(alignment, QUANTUM);
 	size_t usize = ALIGNMENT_CEILING(size, alignment);
@@ -270,7 +262,7 @@ base_block_alloc(tsdn_t *tsdn, base_t *base, extent_hooks_t *extent_hooks,
 	size_t next_block_size = HUGEPAGE_CEILING(sz_pind2sz(pind_next));
 	size_t block_size = (min_block_size > next_block_size) ? min_block_size
 	    : next_block_size;
-	base_block_t *block = (base_block_t *)base_map(tsdn, extent_hooks, ind,
+	base_block_t *block = (base_block_t *)base_map(tsdn, ehooks, ind,
 	    block_size);
 	if (block == NULL) {
 		return NULL;
@@ -311,13 +303,13 @@ static extent_t *
 base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
 	malloc_mutex_assert_owner(tsdn, &base->mtx);
 
-	extent_hooks_t *extent_hooks = base_extent_hooks_get(base);
+	ehooks_t *ehooks = base_ehooks_get(base);
 	/*
 	 * Drop mutex during base_block_alloc(), because an extent hook will be
 	 * called.
 	 */
 	malloc_mutex_unlock(tsdn, &base->mtx);
-	base_block_t *block = base_block_alloc(tsdn, base, extent_hooks,
+	base_block_t *block = base_block_alloc(tsdn, base, ehooks,
 	    base_ind_get(base), &base->pind_last, &base->extent_sn_next, size,
 	    alignment);
 	malloc_mutex_lock(tsdn, &base->mtx);
@@ -353,7 +345,16 @@ base_t *
 base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	pszind_t pind_last = 0;
 	size_t extent_sn_next = 0;
-	base_block_t *block = base_block_alloc(tsdn, NULL, extent_hooks, ind,
+
+	/*
+	 * The base will contain the ehooks eventually, but it itself is
+	 * allocated using them.  So we use some stack ehooks to bootstrap its
+	 * memory, and then initialize the ehooks within the base_t.
+	 */
+	ehooks_t fake_ehooks;
+	ehooks_init(&fake_ehooks, extent_hooks);
+
+	base_block_t *block = base_block_alloc(tsdn, NULL, &fake_ehooks, ind,
 	    &pind_last, &extent_sn_next, sizeof(base_t), QUANTUM);
 	if (block == NULL) {
 		return NULL;
@@ -365,10 +366,10 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	base_t *base = (base_t *)base_extent_bump_alloc_helper(&block->extent,
 	    &gap_size, base_size, base_alignment);
 	base->ind = ind;
-	atomic_store_p(&base->extent_hooks, extent_hooks, ATOMIC_RELAXED);
+	ehooks_init(&base->ehooks, extent_hooks);
 	if (malloc_mutex_init(&base->mtx, "base", WITNESS_RANK_BASE,
 	    malloc_mutex_rank_exclusive)) {
-		base_unmap(tsdn, extent_hooks, ind, block, block->size);
+		base_unmap(tsdn, &fake_ehooks, ind, block, block->size);
 		return NULL;
 	}
 	base->pind_last = pind_last;
@@ -397,26 +398,26 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 
 void
 base_delete(tsdn_t *tsdn, base_t *base) {
-	extent_hooks_t *extent_hooks = base_extent_hooks_get(base);
+	ehooks_t *ehooks = base_ehooks_get(base);
 	base_block_t *next = base->blocks;
 	do {
 		base_block_t *block = next;
 		next = block->next;
-		base_unmap(tsdn, extent_hooks, base_ind_get(base), block,
+		base_unmap(tsdn, ehooks, base_ind_get(base), block,
 		    block->size);
 	} while (next != NULL);
 }
 
-extent_hooks_t *
-base_extent_hooks_get(base_t *base) {
-	return (extent_hooks_t *)atomic_load_p(&base->extent_hooks,
-	    ATOMIC_ACQUIRE);
+ehooks_t *
+base_ehooks_get(base_t *base) {
+	return &base->ehooks;
 }
 
 extent_hooks_t *
 base_extent_hooks_set(base_t *base, extent_hooks_t *extent_hooks) {
-	extent_hooks_t *old_extent_hooks = base_extent_hooks_get(base);
-	atomic_store_p(&base->extent_hooks, extent_hooks, ATOMIC_RELEASE);
+	extent_hooks_t *old_extent_hooks =
+	    ehooks_get_extent_hooks_ptr(&base->ehooks);
+	ehooks_init(&base->ehooks, extent_hooks);
 	return old_extent_hooks;
 }
 
diff --git a/src/ctl.c b/src/ctl.c
index c2f12704..9b88f403 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2400,8 +2400,9 @@ arena_i_extent_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 				    arena, new_extent_hooks);
 				READ(old_extent_hooks, extent_hooks_t *);
 			} else {
-				old_extent_hooks = arena_get_extent_hooks(
-				    arena);
+				old_extent_hooks =
+				    ehooks_get_extent_hooks_ptr(
+					arena_get_ehooks(arena));
 				READ(old_extent_hooks, extent_hooks_t *);
 			}
 		}
diff --git a/src/ehooks.c b/src/ehooks.c
index 454cb475..0f59f339 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -1,3 +1,8 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/ehooks.h"
+
+void ehooks_init(ehooks_t *ehooks, extent_hooks_t *extent_hooks) {
+	ehooks_set_extent_hooks_ptr(ehooks, extent_hooks);
+}
diff --git a/src/extent.c b/src/extent.c
index d21a1e81..23194e1d 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -28,40 +28,38 @@ static void extent_destroy_default(extent_hooks_t *extent_hooks, void *addr,
     size_t size, bool committed, unsigned arena_ind);
 static bool extent_commit_default(extent_hooks_t *extent_hooks, void *addr,
     size_t size, size_t offset, size_t length, unsigned arena_ind);
-static bool extent_commit_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
-    size_t length, bool growing_retained);
-static bool extent_decommit_default(extent_hooks_t *extent_hooks,
-    void *addr, size_t size, size_t offset, size_t length, unsigned arena_ind);
+static bool extent_commit_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length, bool growing_retained);
+static bool extent_decommit_default(extent_hooks_t *extent_hooks, void *addr,
+    size_t size, size_t offset, size_t length, unsigned arena_ind);
 #ifdef PAGES_CAN_PURGE_LAZY
 static bool extent_purge_lazy_default(extent_hooks_t *extent_hooks, void *addr,
     size_t size, size_t offset, size_t length, unsigned arena_ind);
 #endif
 static bool extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
-    size_t length, bool growing_retained);
+    ehooks_t *ehooks, extent_t *extent, size_t offset, size_t length,
+    bool growing_retained);
 #ifdef PAGES_CAN_PURGE_FORCED
 static bool extent_purge_forced_default(extent_hooks_t *extent_hooks,
     void *addr, size_t size, size_t offset, size_t length, unsigned arena_ind);
 #endif
 static bool extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
-    size_t length, bool growing_retained);
+    ehooks_t *ehooks, extent_t *extent, size_t offset, size_t length,
+    bool growing_retained);
 static bool extent_split_default(extent_hooks_t *extent_hooks, void *addr,
     size_t size, size_t size_a, size_t size_b, bool committed,
     unsigned arena_ind);
 static extent_t *extent_split_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t size_a,
-    szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
+    ehooks_t *ehooks, extent_t *extent, size_t size_a, szind_t szind_a,
+    bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
     bool growing_retained);
 static bool extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a,
     size_t size_a, void *addr_b, size_t size_b, bool committed,
     unsigned arena_ind);
-static bool extent_merge_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *a, extent_t *b,
-    bool growing_retained);
+static bool extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *a, extent_t *b, bool growing_retained);
 
-const extent_hooks_t	extent_hooks_default = {
+const extent_hooks_t extent_hooks_default = {
 	extent_alloc_default,
 	extent_dalloc_default,
 	extent_destroy_default,
@@ -97,16 +95,14 @@ static atomic_zu_t highpages;
  */
 
 static void extent_deregister(tsdn_t *tsdn, extent_t *extent);
-static extent_t *extent_recycle(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, eset_t *eset, void *new_addr,
-    size_t usize, size_t pad, size_t alignment, bool slab, szind_t szind,
-    bool *zero, bool *commit, bool growing_retained);
+static extent_t *extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    eset_t *eset, void *new_addr, size_t usize, size_t pad, size_t alignment,
+    bool slab, szind_t szind, bool *zero, bool *commit, bool growing_retained);
 static extent_t *extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
-    extent_t *extent, bool *coalesced, bool growing_retained);
-static void extent_record(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, eset_t *eset, extent_t *extent,
-    bool growing_retained);
+    ehooks_t *ehooks, rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent,
+    bool *coalesced, bool growing_retained);
+static void extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    eset_t *eset, extent_t *extent, bool growing_retained);
 
 /******************************************************************************/
 
@@ -224,13 +220,12 @@ extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
 ph_gen(, extent_heap_, extent_heap_t, extent_t, ph_link, extent_snad_comp)
 
 static bool
-extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
-    extent_t *extent) {
+extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent) {
 	extent_state_set(extent, extent_state_active);
 	bool coalesced;
-	extent = extent_try_coalesce(tsdn, arena, extent_hooks, rtree_ctx,
-	    eset, extent, &coalesced, false);
+	extent = extent_try_coalesce(tsdn, arena, ehooks, rtree_ctx, eset,
+	    extent, &coalesced, false);
 	extent_state_set(extent, eset_state_get(eset));
 
 	if (!coalesced) {
@@ -241,23 +236,23 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena,
 }
 
 extent_t *
-extents_alloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
-    eset_t *eset, void *new_addr, size_t size, size_t pad,
-    size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit) {
+extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
+    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+    szind_t szind, bool *zero, bool *commit) {
 	assert(size + pad != 0);
 	assert(alignment != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	extent_t *extent = extent_recycle(tsdn, arena, extent_hooks, eset,
-	    new_addr, size, pad, alignment, slab, szind, zero, commit, false);
+	extent_t *extent = extent_recycle(tsdn, arena, ehooks, eset, new_addr,
+	    size, pad, alignment, slab, szind, zero, commit, false);
 	assert(extent == NULL || extent_dumpable_get(extent));
 	return extent;
 }
 
 void
-extents_dalloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
-    eset_t *eset, extent_t *extent) {
+extents_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
+    extent_t *extent) {
 	assert(extent_base_get(extent) != NULL);
 	assert(extent_size_get(extent) != 0);
 	assert(extent_dumpable_get(extent));
@@ -267,12 +262,12 @@ extents_dalloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 	extent_addr_set(extent, extent_base_get(extent));
 	extent_zeroed_set(extent, false);
 
-	extent_record(tsdn, arena, extent_hooks, eset, extent, false);
+	extent_record(tsdn, arena, ehooks, eset, extent, false);
 }
 
 extent_t *
-extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
-    eset_t *eset, size_t npages_min) {
+extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
+    size_t npages_min) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
@@ -301,8 +296,8 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 			break;
 		}
 		/* Try to coalesce. */
-		if (extent_try_delayed_coalesce(tsdn, arena, extent_hooks,
-		    rtree_ctx, eset, extent)) {
+		if (extent_try_delayed_coalesce(tsdn, arena, ehooks, rtree_ctx,
+		    eset, extent)) {
 			break;
 		}
 		/*
@@ -339,8 +334,8 @@ label_return:
  * indicates OOM), e.g. when trying to split an existing extent.
  */
 static void
-extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
-    eset_t *eset, extent_t *extent, bool growing_retained) {
+extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
+    extent_t *extent, bool growing_retained) {
 	size_t sz = extent_size_get(extent);
 	if (config_stats) {
 		arena_stats_accum_zu(&arena->stats.abandoned_vm, sz);
@@ -350,11 +345,10 @@ extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 	 * that this is only a virtual memory leak.
 	 */
 	if (eset_state_get(eset) == extent_state_dirty) {
-		if (extent_purge_lazy_impl(tsdn, arena, extent_hooks,
-		    extent, 0, sz, growing_retained)) {
-			extent_purge_forced_impl(tsdn, arena, extent_hooks,
-			    extent, 0, extent_size_get(extent),
-			    growing_retained);
+		if (extent_purge_lazy_impl(tsdn, arena, ehooks, extent, 0, sz,
+		    growing_retained)) {
+			extent_purge_forced_impl(tsdn, arena, ehooks, extent, 0,
+			    extent_size_get(extent), growing_retained);
 		}
 	}
 	extent_dalloc(tsdn, arena, extent);
@@ -581,10 +575,9 @@ extent_deregister_no_gdump_sub(tsdn_t *tsdn, extent_t *extent) {
  * given allocation request.
  */
 static extent_t *
-extent_recycle_extract(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
-    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    bool growing_retained) {
+extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    rtree_ctx_t *rtree_ctx, eset_t *eset, void *new_addr, size_t size,
+    size_t pad, size_t alignment, bool slab, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	assert(alignment > 0);
@@ -668,8 +661,8 @@ typedef enum {
 } extent_split_interior_result_t;
 
 static extent_split_interior_result_t
-extent_split_interior(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, rtree_ctx_t *rtree_ctx,
+extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    rtree_ctx_t *rtree_ctx,
     /* The result of splitting, in case of success. */
     extent_t **extent, extent_t **lead, extent_t **trail,
     /* The mess to clean up, in case of error. */
@@ -693,9 +686,9 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena,
 	/* Split the lead. */
 	if (leadsize != 0) {
 		*lead = *extent;
-		*extent = extent_split_impl(tsdn, arena, extent_hooks,
-		    *lead, leadsize, SC_NSIZES, false, esize + trailsize, szind,
-		    slab, growing_retained);
+		*extent = extent_split_impl(tsdn, arena, ehooks, *lead,
+		    leadsize, SC_NSIZES, false, esize + trailsize, szind, slab,
+		    growing_retained);
 		if (*extent == NULL) {
 			*to_leak = *lead;
 			*lead = NULL;
@@ -705,9 +698,8 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena,
 
 	/* Split the trail. */
 	if (trailsize != 0) {
-		*trail = extent_split_impl(tsdn, arena, extent_hooks, *extent,
-		    esize, szind, slab, trailsize, SC_NSIZES, false,
-		    growing_retained);
+		*trail = extent_split_impl(tsdn, arena, ehooks, *extent, esize,
+		    szind, slab, trailsize, SC_NSIZES, false, growing_retained);
 		if (*trail == NULL) {
 			*to_leak = *extent;
 			*to_salvage = *lead;
@@ -745,18 +737,18 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena,
  * and put back into eset.
  */
 static extent_t *
-extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
-    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    szind_t szind, extent_t *extent, bool growing_retained) {
+extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    rtree_ctx_t *rtree_ctx, eset_t *eset, void *new_addr, size_t size,
+    size_t pad, size_t alignment, bool slab, szind_t szind, extent_t *extent,
+    bool growing_retained) {
 	extent_t *lead;
 	extent_t *trail;
 	extent_t *to_leak;
 	extent_t *to_salvage;
 
 	extent_split_interior_result_t result = extent_split_interior(
-	    tsdn, arena, extent_hooks, rtree_ctx, &extent, &lead, &trail,
-	    &to_leak, &to_salvage, new_addr, size, pad, alignment, slab, szind,
+	    tsdn, arena, ehooks, rtree_ctx, &extent, &lead, &trail, &to_leak,
+	    &to_salvage, new_addr, size, pad, alignment, slab, szind,
 	    growing_retained);
 
 	if (!maps_coalesce && result != extent_split_interior_ok
@@ -790,8 +782,8 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena,
 		if (to_leak != NULL) {
 			void *leak = extent_base_get(to_leak);
 			extent_deregister_no_gdump_sub(tsdn, to_leak);
-			extents_abandon_vm(tsdn, arena, extent_hooks, eset,
-			    to_leak, growing_retained);
+			extents_abandon_vm(tsdn, arena, ehooks, eset, to_leak,
+			    growing_retained);
 			assert(extent_lock_from_addr(tsdn, rtree_ctx, leak,
 			    false) == NULL);
 		}
@@ -807,7 +799,7 @@ extent_need_manual_zero(arena_t *arena) {
 	 * default extent hooks installed (in which case the purge semantics may
 	 * change); or 2) transparent huge pages enabled.
 	 */
-	return (!arena_has_default_hooks(arena) ||
+	return (!ehooks_are_default(arena_get_ehooks(arena)) ||
 		(opt_thp == thp_mode_always));
 }
 
@@ -816,10 +808,9 @@ extent_need_manual_zero(arena_t *arena) {
  * in the given eset_t.
  */
 static extent_t *
-extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
-    eset_t *eset, void *new_addr, size_t size, size_t pad,
-    size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit,
-    bool growing_retained) {
+extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
+    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+    szind_t szind, bool *zero, bool *commit, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	assert(new_addr == NULL || !slab);
@@ -829,25 +820,25 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	extent_t *extent = extent_recycle_extract(tsdn, arena, extent_hooks,
+	extent_t *extent = extent_recycle_extract(tsdn, arena, ehooks,
 	    rtree_ctx, eset, new_addr, size, pad, alignment, slab,
 	    growing_retained);
 	if (extent == NULL) {
 		return NULL;
 	}
 
-	extent = extent_recycle_split(tsdn, arena, extent_hooks, rtree_ctx,
-	    eset, new_addr, size, pad, alignment, slab, szind, extent,
+	extent = extent_recycle_split(tsdn, arena, ehooks, rtree_ctx, eset,
+	    new_addr, size, pad, alignment, slab, szind, extent,
 	    growing_retained);
 	if (extent == NULL) {
 		return NULL;
 	}
 
 	if (*commit && !extent_committed_get(extent)) {
-		if (extent_commit_impl(tsdn, arena, extent_hooks, extent,
-		    0, extent_size_get(extent), growing_retained)) {
-			extent_record(tsdn, arena, extent_hooks, eset,
-			    extent, growing_retained);
+		if (extent_commit_impl(tsdn, arena, ehooks, extent, 0,
+		    extent_size_get(extent), growing_retained)) {
+			extent_record(tsdn, arena, ehooks, eset, extent,
+			    growing_retained);
 			return NULL;
 		}
 		if (!extent_need_manual_zero(arena)) {
@@ -985,9 +976,9 @@ extent_hook_post_reentrancy(tsdn_t *tsdn) {
  * virtual memory ranges retained by each arena.
  */
 static extent_t *
-extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, size_t size, size_t pad, size_t alignment,
-    bool slab, szind_t szind, bool *zero, bool *commit) {
+extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    size_t size, size_t pad, size_t alignment, bool slab, szind_t szind,
+    bool *zero, bool *commit) {
 	malloc_mutex_assert_owner(tsdn, &arena->extent_grow_mtx);
 	assert(pad == 0 || !slab);
 	assert(!*zero || !slab);
@@ -1022,14 +1013,13 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	bool committed = false;
 
 	void *ptr;
-	if (extent_hooks == &extent_hooks_default) {
+	if (ehooks_are_default(ehooks)) {
 		ptr = extent_alloc_default_impl(tsdn, arena, NULL,
 		    alloc_size, PAGE, &zeroed, &committed);
 	} else {
 		extent_hook_pre_reentrancy(tsdn, arena);
-		ptr = extent_hooks->alloc(extent_hooks, NULL,
-		    alloc_size, PAGE, &zeroed, &committed,
-		    arena_ind_get(arena));
+		ptr = ehooks_alloc(ehooks, NULL, alloc_size, PAGE, &zeroed,
+		    &committed, arena_ind_get(arena));
 		extent_hook_post_reentrancy(tsdn);
 	}
 
@@ -1060,18 +1050,17 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 	extent_t *trail;
 	extent_t *to_leak;
 	extent_t *to_salvage;
-	extent_split_interior_result_t result = extent_split_interior(
-	    tsdn, arena, extent_hooks, rtree_ctx, &extent, &lead, &trail,
-	    &to_leak, &to_salvage, NULL, size, pad, alignment, slab, szind,
-	    true);
+	extent_split_interior_result_t result = extent_split_interior(tsdn,
+	    arena, ehooks, rtree_ctx, &extent, &lead, &trail, &to_leak,
+	    &to_salvage, NULL, size, pad, alignment, slab, szind, true);
 
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
-			extent_record(tsdn, arena, extent_hooks,
+			extent_record(tsdn, arena, ehooks,
 			    &arena->eset_retained, lead, true);
 		}
 		if (trail != NULL) {
-			extent_record(tsdn, arena, extent_hooks,
+			extent_record(tsdn, arena, ehooks,
 			    &arena->eset_retained, trail, true);
 		}
 	} else {
@@ -1084,21 +1073,21 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena,
 			if (config_prof) {
 				extent_gdump_add(tsdn, to_salvage);
 			}
-			extent_record(tsdn, arena, extent_hooks,
+			extent_record(tsdn, arena, ehooks,
 			    &arena->eset_retained, to_salvage, true);
 		}
 		if (to_leak != NULL) {
 			extent_deregister_no_gdump_sub(tsdn, to_leak);
-			extents_abandon_vm(tsdn, arena, extent_hooks,
+			extents_abandon_vm(tsdn, arena, ehooks,
 			    &arena->eset_retained, to_leak, true);
 		}
 		goto label_err;
 	}
 
 	if (*commit && !extent_committed_get(extent)) {
-		if (extent_commit_impl(tsdn, arena, extent_hooks, extent, 0,
+		if (extent_commit_impl(tsdn, arena, ehooks, extent, 0,
 		    extent_size_get(extent), true)) {
-			extent_record(tsdn, arena, extent_hooks,
+			extent_record(tsdn, arena, ehooks,
 			    &arena->eset_retained, extent, true);
 			goto label_err;
 		}
@@ -1151,15 +1140,15 @@ label_err:
 }
 
 static extent_t *
-extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, void *new_addr, size_t size, size_t pad,
-    size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit) {
+extent_alloc_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+    szind_t szind, bool *zero, bool *commit) {
 	assert(size != 0);
 	assert(alignment != 0);
 
 	malloc_mutex_lock(tsdn, &arena->extent_grow_mtx);
 
-	extent_t *extent = extent_recycle(tsdn, arena, extent_hooks,
+	extent_t *extent = extent_recycle(tsdn, arena, ehooks,
 	    &arena->eset_retained, new_addr, size, pad, alignment, slab,
 	    szind, zero, commit, true);
 	if (extent != NULL) {
@@ -1168,8 +1157,8 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
 			extent_gdump_add(tsdn, extent);
 		}
 	} else if (opt_retain && new_addr == NULL) {
-		extent = extent_grow_retained(tsdn, arena, extent_hooks, size,
-		    pad, alignment, slab, szind, zero, commit);
+		extent = extent_grow_retained(tsdn, arena, ehooks, size, pad,
+		    alignment, slab, szind, zero, commit);
 		/* extent_grow_retained() always releases extent_grow_mtx. */
 	} else {
 		malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
@@ -1180,9 +1169,9 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
 }
 
 static extent_t *
-extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, void *new_addr, size_t size, size_t pad,
-    size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit) {
+extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+    szind_t szind, bool *zero, bool *commit) {
 	size_t esize = size + pad;
 	extent_t *extent = extent_alloc(tsdn, arena);
 	if (extent == NULL) {
@@ -1190,14 +1179,14 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 	}
 	void *addr;
 	size_t palignment = ALIGNMENT_CEILING(alignment, PAGE);
-	if (extent_hooks == &extent_hooks_default) {
+	if (ehooks_are_default(ehooks)) {
 		/* Call directly to propagate tsdn. */
 		addr = extent_alloc_default_impl(tsdn, arena, new_addr, esize,
 		    palignment, zero, commit);
 	} else {
 		extent_hook_pre_reentrancy(tsdn, arena);
-		addr = extent_hooks->alloc(extent_hooks, new_addr,
-		    esize, palignment, zero, commit, arena_ind_get(arena));
+		addr = ehooks_alloc(ehooks, new_addr, esize, palignment, zero,
+		    commit, arena_ind_get(arena));
 		extent_hook_post_reentrancy(tsdn);
 	}
 	if (addr == NULL) {
@@ -1219,14 +1208,14 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena,
 }
 
 extent_t *
-extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, void *new_addr, size_t size, size_t pad,
-    size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit) {
+extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+    szind_t szind, bool *zero, bool *commit) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	extent_t *extent = extent_alloc_retained(tsdn, arena, extent_hooks,
-	    new_addr, size, pad, alignment, slab, szind, zero, commit);
+	extent_t *extent = extent_alloc_retained(tsdn, arena, ehooks, new_addr,
+	    size, pad, alignment, slab, szind, zero, commit);
 	if (extent == NULL) {
 		if (opt_retain && new_addr != NULL) {
 			/*
@@ -1237,7 +1226,7 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 			 */
 			return NULL;
 		}
-		extent = extent_alloc_wrapper_hard(tsdn, arena, extent_hooks,
+		extent = extent_alloc_wrapper_hard(tsdn, arena, ehooks,
 		    new_addr, size, pad, alignment, slab, szind, zero, commit);
 	}
 
@@ -1266,15 +1255,14 @@ extent_can_coalesce(arena_t *arena, eset_t *eset, const extent_t *inner,
 }
 
 static bool
-extent_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
-    eset_t *eset, extent_t *inner, extent_t *outer, bool forward,
-    bool growing_retained) {
+extent_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
+    extent_t *inner, extent_t *outer, bool forward, bool growing_retained) {
 	assert(extent_can_coalesce(arena, eset, inner, outer));
 
 	extent_activate_locked(tsdn, arena, eset, outer);
 
 	malloc_mutex_unlock(tsdn, &eset->mtx);
-	bool err = extent_merge_impl(tsdn, arena, extent_hooks,
+	bool err = extent_merge_impl(tsdn, arena, ehooks,
 	    forward ? inner : outer, forward ? outer : inner, growing_retained);
 	malloc_mutex_lock(tsdn, &eset->mtx);
 
@@ -1286,10 +1274,9 @@ extent_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 }
 
 static extent_t *
-extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
-    extent_t *extent, bool *coalesced, bool growing_retained,
-    bool inactive_only) {
+extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent, bool *coalesced,
+    bool growing_retained, bool inactive_only) {
 	/*
 	 * We avoid checking / locking inactive neighbors for large size
 	 * classes, since they are eagerly coalesced on deallocation which can
@@ -1318,7 +1305,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena,
 			extent_unlock(tsdn, next);
 
 			if (can_coalesce && !extent_coalesce(tsdn, arena,
-			    extent_hooks, eset, extent, next, true,
+			    ehooks, eset, extent, next, true,
 			    growing_retained)) {
 				if (eset->delay_coalesce) {
 					/* Do minimal coalescing. */
@@ -1338,7 +1325,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena,
 			extent_unlock(tsdn, prev);
 
 			if (can_coalesce && !extent_coalesce(tsdn, arena,
-			    extent_hooks, eset, extent, prev, false,
+			    ehooks, eset, extent, prev, false,
 			    growing_retained)) {
 				extent = prev;
 				if (eset->delay_coalesce) {
@@ -1358,19 +1345,19 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena,
 }
 
 static extent_t *
-extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
-    extent_t *extent, bool *coalesced, bool growing_retained) {
-	return extent_try_coalesce_impl(tsdn, arena, extent_hooks, rtree_ctx,
-	    eset, extent, coalesced, growing_retained, false);
+extent_try_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent, bool *coalesced,
+    bool growing_retained) {
+	return extent_try_coalesce_impl(tsdn, arena, ehooks, rtree_ctx, eset,
+	    extent, coalesced, growing_retained, false);
 }
 
 static extent_t *
-extent_try_coalesce_large(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, rtree_ctx_t *rtree_ctx, eset_t *eset,
-    extent_t *extent, bool *coalesced, bool growing_retained) {
-	return extent_try_coalesce_impl(tsdn, arena, extent_hooks, rtree_ctx,
-	    eset, extent, coalesced, growing_retained, true);
+extent_try_coalesce_large(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent, bool *coalesced,
+    bool growing_retained) {
+	return extent_try_coalesce_impl(tsdn, arena, ehooks, rtree_ctx, eset,
+	    extent, coalesced, growing_retained, true);
 }
 
 /*
@@ -1378,8 +1365,8 @@ extent_try_coalesce_large(tsdn_t *tsdn, arena_t *arena,
  * given eset_t (coalesces, deregisters slab interiors, the heap operations).
  */
 static void
-extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
-    eset_t *eset, extent_t *extent, bool growing_retained) {
+extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
+    extent_t *extent, bool growing_retained) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
@@ -1399,22 +1386,22 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 	    (uintptr_t)extent_base_get(extent), true) == extent);
 
 	if (!eset->delay_coalesce) {
-		extent = extent_try_coalesce(tsdn, arena, extent_hooks,
-		    rtree_ctx, eset, extent, NULL, growing_retained);
+		extent = extent_try_coalesce(tsdn, arena, ehooks, rtree_ctx,
+		    eset, extent, NULL, growing_retained);
 	} else if (extent_size_get(extent) >= SC_LARGE_MINCLASS) {
 		assert(eset == &arena->eset_dirty);
 		/* Always coalesce large eset eagerly. */
 		bool coalesced;
 		do {
 			assert(extent_state_get(extent) == extent_state_active);
-			extent = extent_try_coalesce_large(tsdn, arena,
-			    extent_hooks, rtree_ctx, eset, extent,
-			    &coalesced, growing_retained);
+			extent = extent_try_coalesce_large(tsdn, arena, ehooks,
+			    rtree_ctx, eset, extent, &coalesced,
+			    growing_retained);
 		} while (coalesced);
 		if (extent_size_get(extent) >= oversize_threshold) {
 			/* Shortcut to purge the oversize extent eagerly. */
 			malloc_mutex_unlock(tsdn, &eset->mtx);
-			arena_decay_extent(tsdn, arena, extent_hooks, extent);
+			arena_decay_extent(tsdn, arena, ehooks, extent);
 			return;
 		}
 	}
@@ -1425,7 +1412,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t *extent_hooks,
 
 void
 extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
-	extent_hooks_t *extent_hooks = arena_get_extent_hooks(arena);
+	ehooks_t *ehooks = arena_get_ehooks(arena);
 
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
@@ -1434,7 +1421,7 @@ extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
 		extent_dalloc(tsdn, arena, extent);
 		return;
 	}
-	extent_dalloc_wrapper(tsdn, arena, extent_hooks, extent);
+	extent_dalloc_wrapper(tsdn, arena, ehooks, extent);
 }
 
 static bool
@@ -1458,8 +1445,8 @@ extent_dalloc_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
 }
 
 static bool
-extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent) {
+extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent) {
 	bool err;
 
 	assert(extent_base_get(extent) != NULL);
@@ -1470,16 +1457,15 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena,
 	extent_addr_set(extent, extent_base_get(extent));
 
 	/* Try to deallocate. */
-	if (extent_hooks == &extent_hooks_default) {
+	if (ehooks_are_default(ehooks)) {
 		/* Call directly to propagate tsdn. */
 		err = extent_dalloc_default_impl(extent_base_get(extent),
 		    extent_size_get(extent));
 	} else {
 		extent_hook_pre_reentrancy(tsdn, arena);
-		err = (extent_hooks->dalloc == NULL ||
-		    extent_hooks->dalloc(extent_hooks,
-		    extent_base_get(extent), extent_size_get(extent),
-		    extent_committed_get(extent), arena_ind_get(arena)));
+		err = ehooks_dalloc(ehooks, extent_base_get(extent),
+		    extent_size_get(extent), extent_committed_get(extent),
+		    arena_ind_get(arena));
 		extent_hook_post_reentrancy(tsdn);
 	}
 
@@ -1491,51 +1477,48 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena,
 }
 
 void
-extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent) {
+extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent) {
 	assert(extent_dumpable_get(extent));
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
 	/* Avoid calling the default extent_dalloc unless have to. */
-	if (extent_hooks != &extent_hooks_default || extent_may_dalloc()) {
+	if (!ehooks_are_default(ehooks) || extent_may_dalloc()) {
 		/*
 		 * Deregister first to avoid a race with other allocating
 		 * threads, and reregister if deallocation fails.
 		 */
 		extent_deregister(tsdn, extent);
-		if (!extent_dalloc_wrapper_try(tsdn, arena, extent_hooks,
-		    extent)) {
+		if (!extent_dalloc_wrapper_try(tsdn, arena, ehooks, extent)) {
 			return;
 		}
 		extent_reregister(tsdn, extent);
 	}
 
-	if (extent_hooks != &extent_hooks_default) {
+	if (!ehooks_are_default(ehooks)) {
 		extent_hook_pre_reentrancy(tsdn, arena);
 	}
 	/* Try to decommit; purge if that fails. */
 	bool zeroed;
 	if (!extent_committed_get(extent)) {
 		zeroed = true;
-	} else if (!extent_decommit_wrapper(tsdn, arena, extent_hooks, extent,
-	    0, extent_size_get(extent))) {
+	} else if (!extent_decommit_wrapper(tsdn, arena, ehooks, extent, 0,
+	    extent_size_get(extent))) {
 		zeroed = true;
-	} else if (extent_hooks->purge_forced != NULL &&
-	    !extent_hooks->purge_forced(extent_hooks,
-	    extent_base_get(extent), extent_size_get(extent), 0,
-	    extent_size_get(extent), arena_ind_get(arena))) {
+	} else if (!ehooks_purge_forced(ehooks, extent_base_get(extent),
+	    extent_size_get(extent), 0, extent_size_get(extent),
+	    arena_ind_get(arena))) {
 		zeroed = true;
 	} else if (extent_state_get(extent) == extent_state_muzzy ||
-	    (extent_hooks->purge_lazy != NULL &&
-	    !extent_hooks->purge_lazy(extent_hooks,
-	    extent_base_get(extent), extent_size_get(extent), 0,
-	    extent_size_get(extent), arena_ind_get(arena)))) {
+	    !ehooks_purge_lazy(ehooks, extent_base_get(extent),
+	    extent_size_get(extent), 0, extent_size_get(extent),
+	    arena_ind_get(arena))) {
 		zeroed = false;
 	} else {
 		zeroed = false;
 	}
-	if (extent_hooks != &extent_hooks_default) {
+	if (!ehooks_are_default(ehooks)) {
 		extent_hook_post_reentrancy(tsdn);
 	}
 	extent_zeroed_set(extent, zeroed);
@@ -1544,8 +1527,8 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena,
 		extent_gdump_sub(tsdn, extent);
 	}
 
-	extent_record(tsdn, arena, extent_hooks, &arena->eset_retained,
-	    extent, false);
+	extent_record(tsdn, arena, ehooks, &arena->eset_retained, extent,
+	    false);
 }
 
 static void
@@ -1562,8 +1545,8 @@ extent_destroy_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
 }
 
 void
-extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent) {
+extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent) {
 	assert(extent_base_get(extent) != NULL);
 	assert(extent_size_get(extent) != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -1575,15 +1558,15 @@ extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena,
 	extent_addr_set(extent, extent_base_get(extent));
 
 	/* Try to destroy; silently fail otherwise. */
-	if (extent_hooks == &extent_hooks_default) {
+	if (ehooks_are_default(ehooks)) {
 		/* Call directly to propagate tsdn. */
 		extent_destroy_default_impl(extent_base_get(extent),
 		    extent_size_get(extent));
-	} else if (extent_hooks->destroy != NULL) {
+	} else if (!ehooks_destroy_is_noop(ehooks)) {
 		extent_hook_pre_reentrancy(tsdn, arena);
-		extent_hooks->destroy(extent_hooks,
-		    extent_base_get(extent), extent_size_get(extent),
-		    extent_committed_get(extent), arena_ind_get(arena));
+		ehooks_destroy(ehooks, extent_base_get(extent),
+		    extent_size_get(extent), extent_committed_get(extent),
+		    arena_ind_get(arena));
 		extent_hook_post_reentrancy(tsdn);
 	}
 
@@ -1598,19 +1581,17 @@ extent_commit_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
 }
 
 static bool
-extent_commit_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
-    size_t length, bool growing_retained) {
+extent_commit_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 
-	if (extent_hooks != &extent_hooks_default) {
+	if (!ehooks_are_default(ehooks)) {
 		extent_hook_pre_reentrancy(tsdn, arena);
 	}
-	bool err = (extent_hooks->commit == NULL ||
-	    extent_hooks->commit(extent_hooks, extent_base_get(extent),
-	    extent_size_get(extent), offset, length, arena_ind_get(arena)));
-	if (extent_hooks != &extent_hooks_default) {
+	bool err = ehooks_commit(ehooks, extent_base_get(extent),
+	    extent_size_get(extent), offset, length, arena_ind_get(arena));
+	if (!ehooks_are_default(ehooks)) {
 		extent_hook_post_reentrancy(tsdn);
 	}
 	extent_committed_set(extent, extent_committed_get(extent) || !err);
@@ -1618,11 +1599,11 @@ extent_commit_impl(tsdn_t *tsdn, arena_t *arena,
 }
 
 bool
-extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
+extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset,
     size_t length) {
-	return extent_commit_impl(tsdn, arena, extent_hooks, extent, offset,
-	    length, false);
+	return extent_commit_impl(tsdn, arena, ehooks, extent, offset, length,
+	    false);
 }
 
 static bool
@@ -1633,20 +1614,17 @@ extent_decommit_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
 }
 
 bool
-extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
-    size_t length) {
+extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	if (extent_hooks != &extent_hooks_default) {
+	if (!ehooks_are_default(ehooks)) {
 		extent_hook_pre_reentrancy(tsdn, arena);
 	}
-	bool err = (extent_hooks->decommit == NULL ||
-	    extent_hooks->decommit(extent_hooks,
-	    extent_base_get(extent), extent_size_get(extent), offset, length,
-	    arena_ind_get(arena)));
-	if (extent_hooks != &extent_hooks_default) {
+	bool err = ehooks_decommit(ehooks, extent_base_get(extent),
+	    extent_size_get(extent), offset, length, arena_ind_get(arena));
+	if (!ehooks_are_default(ehooks)) {
 		extent_hook_post_reentrancy(tsdn);
 	}
 	extent_committed_set(extent, extent_committed_get(extent) && err);
@@ -1668,22 +1646,20 @@ extent_purge_lazy_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
 #endif
 
 static bool
-extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
-    size_t length, bool growing_retained) {
+extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 
-	if (extent_hooks->purge_lazy == NULL) {
+	if (ehooks_purge_lazy_will_fail(ehooks)) {
 		return true;
 	}
-	if (extent_hooks != &extent_hooks_default) {
+	if (!ehooks_are_default(ehooks)) {
 		extent_hook_pre_reentrancy(tsdn, arena);
 	}
-	bool err = extent_hooks->purge_lazy(extent_hooks,
-	    extent_base_get(extent), extent_size_get(extent), offset, length,
-	    arena_ind_get(arena));
-	if (extent_hooks != &extent_hooks_default) {
+	bool err = ehooks_purge_lazy(ehooks, extent_base_get(extent),
+	    extent_size_get(extent), offset, length, arena_ind_get(arena));
+	if (!ehooks_are_default(ehooks)) {
 		extent_hook_post_reentrancy(tsdn);
 	}
 
@@ -1691,11 +1667,10 @@ extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena,
 }
 
 bool
-extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
-    size_t length) {
-	return extent_purge_lazy_impl(tsdn, arena, extent_hooks, extent,
-	    offset, length, false);
+extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length) {
+	return extent_purge_lazy_impl(tsdn, arena, ehooks, extent, offset,
+	    length, false);
 }
 
 #ifdef PAGES_CAN_PURGE_FORCED
@@ -1713,32 +1688,29 @@ extent_purge_forced_default(extent_hooks_t *extent_hooks, void *addr,
 #endif
 
 static bool
-extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
-    size_t length, bool growing_retained) {
+extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 
-	if (extent_hooks->purge_forced == NULL) {
+	if (ehooks_purge_forced_will_fail(ehooks)) {
 		return true;
 	}
-	if (extent_hooks != &extent_hooks_default) {
+	if (!ehooks_are_default(ehooks)) {
 		extent_hook_pre_reentrancy(tsdn, arena);
 	}
-	bool err = extent_hooks->purge_forced(extent_hooks,
-	    extent_base_get(extent), extent_size_get(extent), offset, length,
-	    arena_ind_get(arena));
-	if (extent_hooks != &extent_hooks_default) {
+	bool err = ehooks_purge_forced(ehooks, extent_base_get(extent),
+	    extent_size_get(extent), offset, length, arena_ind_get(arena));
+	if (!ehooks_are_default(ehooks)) {
 		extent_hook_post_reentrancy(tsdn);
 	}
 	return err;
 }
 
 bool
-extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t offset,
-    size_t length) {
-	return extent_purge_forced_impl(tsdn, arena, extent_hooks, extent,
+extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length) {
+	return extent_purge_forced_impl(tsdn, arena, ehooks, extent,
 	    offset, length, false);
 }
 
@@ -1765,15 +1737,14 @@ extent_split_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
  * and returns the trail (except in case of error).
  */
 static extent_t *
-extent_split_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t size_a,
-    szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
-    bool growing_retained) {
+extent_split_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t size_a, szind_t szind_a, bool slab_a,
+    size_t size_b, szind_t szind_b, bool slab_b, bool growing_retained) {
 	assert(extent_size_get(extent) == size_a + size_b);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 
-	if (extent_hooks->split == NULL) {
+	if (ehooks_split_will_fail(ehooks)) {
 		return NULL;
 	}
 
@@ -1815,13 +1786,13 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena,
 
 	extent_lock2(tsdn, extent, trail);
 
-	if (extent_hooks != &extent_hooks_default) {
+	if (!ehooks_are_default(ehooks)) {
 		extent_hook_pre_reentrancy(tsdn, arena);
 	}
-	bool err = extent_hooks->split(extent_hooks, extent_base_get(extent),
+	bool err = ehooks_split(ehooks, extent_base_get(extent),
 	    size_a + size_b, size_a, size_b, extent_committed_get(extent),
 	    arena_ind_get(arena));
-	if (extent_hooks != &extent_hooks_default) {
+	if (!ehooks_are_default(ehooks)) {
 		extent_hook_post_reentrancy(tsdn);
 	}
 	if (err) {
@@ -1848,11 +1819,11 @@ label_error_a:
 }
 
 extent_t *
-extent_split_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *extent, size_t size_a,
-    szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b) {
-	return extent_split_impl(tsdn, arena, extent_hooks, extent, size_a,
-	    szind_a, slab_a, size_b, szind_b, slab_b, false);
+extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t size_a, szind_t szind_a, bool slab_a,
+    size_t size_b, szind_t szind_b, bool slab_b) {
+	return extent_split_impl(tsdn, arena, ehooks, extent, size_a, szind_a,
+	    slab_a, size_b, szind_b, slab_b, false);
 }
 
 static bool
@@ -1914,28 +1885,26 @@ extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
 }
 
 static bool
-extent_merge_impl(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *a, extent_t *b,
-    bool growing_retained) {
+extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, extent_t *a,
+    extent_t *b, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	assert(extent_base_get(a) < extent_base_get(b));
 
-	if (extent_hooks->merge == NULL || extent_head_no_merge(a, b)) {
+	if (ehooks_merge_will_fail(ehooks) || extent_head_no_merge(a, b)) {
 		return true;
 	}
 
 	bool err;
-	if (extent_hooks == &extent_hooks_default) {
+	if (ehooks_are_default(ehooks)) {
 		/* Call directly to propagate tsdn. */
 		err = extent_merge_default_impl(extent_base_get(a),
 		    extent_base_get(b));
 	} else {
 		extent_hook_pre_reentrancy(tsdn, arena);
-		err = extent_hooks->merge(extent_hooks,
-		    extent_base_get(a), extent_size_get(a), extent_base_get(b),
-		    extent_size_get(b), extent_committed_get(a),
-		    arena_ind_get(arena));
+		err = ehooks_merge(ehooks, extent_base_get(a),
+		    extent_size_get(a), extent_base_get(b), extent_size_get(b),
+		    extent_committed_get(a), arena_ind_get(arena));
 		extent_hook_post_reentrancy(tsdn);
 	}
 
@@ -1991,9 +1960,9 @@ extent_merge_impl(tsdn_t *tsdn, arena_t *arena,
 }
 
 bool
-extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena,
-    extent_hooks_t *extent_hooks, extent_t *a, extent_t *b) {
-	return extent_merge_impl(tsdn, arena, extent_hooks, a, b, false);
+extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *a, extent_t *b) {
+	return extent_merge_impl(tsdn, arena, ehooks, a, b, false);
 }
 
 bool
diff --git a/src/extent_dss.c b/src/extent_dss.c
index dd80a196..59e7e7d6 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -194,9 +194,9 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					*commit = pages_decommit(ret, size);
 				}
 				if (*zero && *commit) {
-					extent_hooks_t *extent_hooks =
-					    arena_get_extent_hooks(arena);
 					extent_t extent;
+					ehooks_t *ehooks = arena_get_ehooks(
+					    arena);
 
 					extent_init(&extent,
 					    arena_ind_get(arena), ret, size,
@@ -204,8 +204,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					    extent_state_active, false, true,
 					    true, EXTENT_NOT_HEAD);
 					if (extent_purge_forced_wrapper(tsdn,
-					    arena, extent_hooks, &extent, 0,
-					    size)) {
+					    arena, ehooks, &extent, 0, size)) {
 						memset(ret, 0, size);
 					}
 				}
diff --git a/src/large.c b/src/large.c
index 6de1c570..6fd21bea 100644
--- a/src/large.c
+++ b/src/large.c
@@ -93,20 +93,20 @@ static bool
 large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize) {
 	arena_t *arena = arena_get_from_extent(extent);
 	size_t oldusize = extent_usize_get(extent);
-	extent_hooks_t *extent_hooks = arena_get_extent_hooks(arena);
+	ehooks_t *ehooks = arena_get_ehooks(arena);
 	size_t diff = extent_size_get(extent) - (usize + sz_large_pad);
 
 	assert(oldusize > usize);
 
-	if (extent_hooks->split == NULL) {
+	if (ehooks_split_will_fail(ehooks)) {
 		return true;
 	}
 
 	/* Split excess pages. */
 	if (diff != 0) {
 		extent_t *trail = extent_split_wrapper(tsdn, arena,
-		    extent_hooks, extent, usize + sz_large_pad,
-		    sz_size2index(usize), false, diff, SC_NSIZES, false);
+		    ehooks, extent, usize + sz_large_pad, sz_size2index(usize),
+		    false, diff, SC_NSIZES, false);
 		if (trail == NULL) {
 			return true;
 		}
@@ -116,7 +116,7 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize) {
 			    extent_size_get(trail));
 		}
 
-		arena_extents_dirty_dalloc(tsdn, arena, extent_hooks, trail);
+		arena_extents_dirty_dalloc(tsdn, arena, ehooks, trail);
 	}
 
 	arena_extent_ralloc_large_shrink(tsdn, arena, extent, oldusize);
@@ -129,10 +129,10 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
     bool zero) {
 	arena_t *arena = arena_get_from_extent(extent);
 	size_t oldusize = extent_usize_get(extent);
-	extent_hooks_t *extent_hooks = arena_get_extent_hooks(arena);
+	ehooks_t *ehooks = arena_get_ehooks(arena);
 	size_t trailsize = usize - oldusize;
 
-	if (extent_hooks->merge == NULL) {
+	if (ehooks_merge_will_fail(ehooks)) {
 		return true;
 	}
 
@@ -149,17 +149,17 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 	bool commit = true;
 	extent_t *trail;
 	bool new_mapping;
-	if ((trail = extents_alloc(tsdn, arena, extent_hooks,
-	    &arena->eset_dirty, extent_past_get(extent), trailsize, 0,
-	    CACHELINE, false, SC_NSIZES, &is_zeroed_trail, &commit)) != NULL
-	    || (trail = extents_alloc(tsdn, arena, extent_hooks,
-	    &arena->eset_muzzy, extent_past_get(extent), trailsize, 0,
-	    CACHELINE, false, SC_NSIZES, &is_zeroed_trail, &commit)) != NULL) {
+	if ((trail = extents_alloc(tsdn, arena, ehooks, &arena->eset_dirty,
+	    extent_past_get(extent), trailsize, 0, CACHELINE, false, SC_NSIZES,
+	    &is_zeroed_trail, &commit)) != NULL
+	    || (trail = extents_alloc(tsdn, arena, ehooks, &arena->eset_muzzy,
+	    extent_past_get(extent), trailsize, 0, CACHELINE, false, SC_NSIZES,
+	    &is_zeroed_trail, &commit)) != NULL) {
 		if (config_stats) {
 			new_mapping = false;
 		}
 	} else {
-		if ((trail = extent_alloc_wrapper(tsdn, arena, extent_hooks,
+		if ((trail = extent_alloc_wrapper(tsdn, arena, ehooks,
 		    extent_past_get(extent), trailsize, 0, CACHELINE, false,
 		    SC_NSIZES, &is_zeroed_trail, &commit)) == NULL) {
 			return true;
@@ -169,8 +169,8 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 		}
 	}
 
-	if (extent_merge_wrapper(tsdn, arena, extent_hooks, extent, trail)) {
-		extent_dalloc_wrapper(tsdn, arena, extent_hooks, trail);
+	if (extent_merge_wrapper(tsdn, arena, ehooks, extent, trail)) {
+		extent_dalloc_wrapper(tsdn, arena, ehooks, trail);
 		return true;
 	}
 	rtree_ctx_t rtree_ctx_fallback;
@@ -339,8 +339,8 @@ large_dalloc_prep_impl(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 
 static void
 large_dalloc_finish_impl(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
-	extent_hooks_t *extent_hooks = arena_get_extent_hooks(arena);
-	arena_extents_dirty_dalloc(tsdn, arena, extent_hooks, extent);
+	ehooks_t *ehooks = arena_get_ehooks(arena);
+	arena_extents_dirty_dalloc(tsdn, arena, ehooks, extent);
 }
 
 void

From 703fbc0ff584e00899b5b30aa927c55ecc89dabf Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 2 Dec 2019 16:45:40 -0800
Subject: [PATCH 1446/2608] Introduce unsafe reentrancy guards.

We have to work to circumvent the safety checks in pre_reentrancy when going
down extent hook pathways.  Instead, let's explicitly have checked and unchecked
guards.
---
 .../internal/jemalloc_internal_inlines_a.h    | 16 ++----------
 include/jemalloc/internal/tsd.h               | 26 +++++++++++++++++++
 2 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index fedbd862..98a64780 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -151,24 +151,12 @@ static inline void
 pre_reentrancy(tsd_t *tsd, arena_t *arena) {
 	/* arena is the current context.  Reentry from a0 is not allowed. */
 	assert(arena != arena_get(tsd_tsdn(tsd), 0, false));
-
-	bool fast = tsd_fast(tsd);
-	assert(tsd_reentrancy_level_get(tsd) < INT8_MAX);
-	++*tsd_reentrancy_levelp_get(tsd);
-	if (fast) {
-		/* Prepare slow path for reentrancy. */
-		tsd_slow_update(tsd);
-		assert(tsd_state_get(tsd) == tsd_state_nominal_slow);
-	}
+	tsd_pre_reentrancy_raw(tsd);
 }
 
 static inline void
 post_reentrancy(tsd_t *tsd) {
-	int8_t *reentrancy_level = tsd_reentrancy_levelp_get(tsd);
-	assert(*reentrancy_level > 0);
-	if (--*reentrancy_level == 0) {
-		tsd_slow_update(tsd);
-	}
+	tsd_post_reentrancy_raw(tsd);
 }
 
 #endif /* JEMALLOC_INTERNAL_INLINES_A_H */
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index b7ce7ca2..3465a2d4 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -446,4 +446,30 @@ tsd_state_nocleanup(tsd_t *tsd) {
 	    tsd_state_get(tsd) == tsd_state_minimal_initialized;
 }
 
+/*
+ * These "raw" tsd reentrancy functions don't have any debug checking to make
+ * sure that we're not touching arena 0.  Better is to call pre_reentrancy and
+ * post_reentrancy if this is possible.
+ */
+static inline void
+tsd_pre_reentrancy_raw(tsd_t *tsd) {
+	bool fast = tsd_fast(tsd);
+	assert(tsd_reentrancy_level_get(tsd) < INT8_MAX);
+	++*tsd_reentrancy_levelp_get(tsd);
+	if (fast) {
+		/* Prepare slow path for reentrancy. */
+		tsd_slow_update(tsd);
+		assert(tsd_state_get(tsd) == tsd_state_nominal_slow);
+	}
+}
+
+static inline void
+tsd_post_reentrancy_raw(tsd_t *tsd) {
+	int8_t *reentrancy_level = tsd_reentrancy_levelp_get(tsd);
+	assert(*reentrancy_level > 0);
+	if (--*reentrancy_level == 0) {
+		tsd_slow_update(tsd);
+	}
+}
+
 #endif /* JEMALLOC_INTERNAL_TSD_H */

From dc8b4e6e13fd2a0497f3ab5c0ba9edb92a64f470 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 2 Dec 2019 16:42:44 -0800
Subject: [PATCH 1447/2608] Extent -> Ehooks: Move alloc hook.

---
 include/jemalloc/internal/ehooks.h | 33 +++++++++-
 src/base.c                         |  6 +-
 src/ehooks.c                       | 69 +++++++++++++++++++++
 src/extent.c                       | 96 ++----------------------------
 4 files changed, 105 insertions(+), 99 deletions(-)

diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index c79ea24b..37087cac 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -11,6 +11,26 @@ struct ehooks_s {
 	atomic_p_t ptr;
 };
 
+/* NOT PUBLIC. */
+void *ehooks_default_alloc_impl(tsdn_t *tsdn, void *new_addr, size_t size,
+    size_t alignment, bool *zero, bool *commit, unsigned arena_ind);
+void *ehooks_default_alloc(extent_hooks_t *extent_hooks, void *new_addr,
+    size_t size, size_t alignment, bool *zero, bool *commit,
+    unsigned arena_ind);
+
+static inline void
+ehooks_pre_reentrancy(tsdn_t *tsdn) {
+	tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
+	tsd_pre_reentrancy_raw(tsd);
+}
+
+static inline void
+ehooks_post_reentrancy(tsdn_t *tsdn) {
+	tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
+	tsd_post_reentrancy_raw(tsd);
+}
+
+/* PUBLIC. */
 void ehooks_init(ehooks_t *ehooks, extent_hooks_t *extent_hooks);
 
 static inline void
@@ -54,11 +74,18 @@ ehooks_merge_will_fail(ehooks_t *ehooks) {
 }
 
 static inline void *
-ehooks_alloc(ehooks_t *ehooks, void *new_addr, size_t size, size_t alignment,
-    bool *zero, bool *commit, unsigned arena_ind) {
+ehooks_alloc(tsdn_t *tsdn, ehooks_t *ehooks, void *new_addr, size_t size,
+    size_t alignment, bool *zero, bool *commit, unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
-	return extent_hooks->alloc(extent_hooks, new_addr, size, alignment,
+	if (extent_hooks == &extent_hooks_default) {
+		return ehooks_default_alloc_impl(tsdn, new_addr, size,
+		    alignment, zero, commit, arena_ind);
+	}
+	ehooks_pre_reentrancy(tsdn);
+	void *ret = extent_hooks->alloc(extent_hooks, new_addr, size, alignment,
 	    zero, commit, arena_ind);
+	ehooks_post_reentrancy(tsdn);
+	return ret;
 }
 
 static inline bool
diff --git a/src/base.c b/src/base.c
index 92dfca8c..4f47438f 100644
--- a/src/base.c
+++ b/src/base.c
@@ -43,12 +43,8 @@ base_map(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, size_t size) {
 			pages_set_thp_state(addr, size);
 		}
 	} else {
-		/* No arena context as we are creating new arenas. */
-		tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
-		pre_reentrancy(tsd, NULL);
-		addr = ehooks_alloc(ehooks, NULL, size, alignment, &zero,
+		addr = ehooks_alloc(tsdn, ehooks, NULL, size, alignment, &zero,
 		    &commit, ind);
-		post_reentrancy(tsd);
 	}
 
 	return addr;
diff --git a/src/ehooks.c b/src/ehooks.c
index 0f59f339..ba62b8da 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -2,7 +2,76 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/ehooks.h"
+#include "jemalloc/internal/extent_mmap.h"
 
 void ehooks_init(ehooks_t *ehooks, extent_hooks_t *extent_hooks) {
 	ehooks_set_extent_hooks_ptr(ehooks, extent_hooks);
 }
+
+/*
+ * If the caller specifies (!*zero), it is still possible to receive zeroed
+ * memory, in which case *zero is toggled to true.  arena_extent_alloc() takes
+ * advantage of this to avoid demanding zeroed extents, but taking advantage of
+ * them if they are returned.
+ */
+static void *
+extent_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
+    size_t alignment, bool *zero, bool *commit, dss_prec_t dss_prec) {
+	void *ret;
+
+	assert(size != 0);
+	assert(alignment != 0);
+
+	/* "primary" dss. */
+	if (have_dss && dss_prec == dss_prec_primary && (ret =
+	    extent_alloc_dss(tsdn, arena, new_addr, size, alignment, zero,
+	    commit)) != NULL) {
+		return ret;
+	}
+	/* mmap. */
+	if ((ret = extent_alloc_mmap(new_addr, size, alignment, zero, commit))
+	    != NULL) {
+		return ret;
+	}
+	/* "secondary" dss. */
+	if (have_dss && dss_prec == dss_prec_secondary && (ret =
+	    extent_alloc_dss(tsdn, arena, new_addr, size, alignment, zero,
+	    commit)) != NULL) {
+		return ret;
+	}
+
+	/* All strategies for allocation failed. */
+	return NULL;
+}
+
+void *
+ehooks_default_alloc_impl(tsdn_t *tsdn, void *new_addr, size_t size,
+    size_t alignment, bool *zero, bool *commit, unsigned arena_ind) {
+	arena_t *arena = arena_get(tsdn, arena_ind, false);
+	void *ret = extent_alloc_core(tsdn, arena, new_addr, size, alignment, zero,
+	    commit, (dss_prec_t)atomic_load_u(&arena->dss_prec,
+	    ATOMIC_RELAXED));
+	if (have_madvise_huge && ret) {
+		pages_set_thp_state(ret, size);
+	}
+	return ret;
+}
+
+void *
+ehooks_default_alloc(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
+    size_t alignment, bool *zero, bool *commit, unsigned arena_ind) {
+	tsdn_t *tsdn;
+	arena_t *arena;
+
+	tsdn = tsdn_fetch();
+	arena = arena_get(tsdn, arena_ind, false);
+	/*
+	 * The arena we're allocating on behalf of must have been initialized
+	 * already.
+	 */
+	assert(arena != NULL);
+
+	return ehooks_default_alloc_impl(tsdn, new_addr, size,
+	    ALIGNMENT_CEILING(alignment, PAGE), zero, commit,
+	    arena_ind_get(arena));
+}
diff --git a/src/extent.c b/src/extent.c
index 23194e1d..96547a5e 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -19,9 +19,6 @@ mutex_pool_t	extent_mutex_pool;
 
 size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT;
 
-static void *extent_alloc_default(extent_hooks_t *extent_hooks, void *new_addr,
-    size_t size, size_t alignment, bool *zero, bool *commit,
-    unsigned arena_ind);
 static bool extent_dalloc_default(extent_hooks_t *extent_hooks, void *addr,
     size_t size, bool committed, unsigned arena_ind);
 static void extent_destroy_default(extent_hooks_t *extent_hooks, void *addr,
@@ -60,7 +57,7 @@ static bool extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     extent_t *a, extent_t *b, bool growing_retained);
 
 const extent_hooks_t extent_hooks_default = {
-	extent_alloc_default,
+	ehooks_default_alloc,
 	extent_dalloc_default,
 	extent_destroy_default,
 	extent_commit_default,
@@ -881,72 +878,6 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 	return extent;
 }
 
-/*
- * If the caller specifies (!*zero), it is still possible to receive zeroed
- * memory, in which case *zero is toggled to true.  arena_extent_alloc() takes
- * advantage of this to avoid demanding zeroed extents, but taking advantage of
- * them if they are returned.
- */
-static void *
-extent_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
-    size_t alignment, bool *zero, bool *commit, dss_prec_t dss_prec) {
-	void *ret;
-
-	assert(size != 0);
-	assert(alignment != 0);
-
-	/* "primary" dss. */
-	if (have_dss && dss_prec == dss_prec_primary && (ret =
-	    extent_alloc_dss(tsdn, arena, new_addr, size, alignment, zero,
-	    commit)) != NULL) {
-		return ret;
-	}
-	/* mmap. */
-	if ((ret = extent_alloc_mmap(new_addr, size, alignment, zero, commit))
-	    != NULL) {
-		return ret;
-	}
-	/* "secondary" dss. */
-	if (have_dss && dss_prec == dss_prec_secondary && (ret =
-	    extent_alloc_dss(tsdn, arena, new_addr, size, alignment, zero,
-	    commit)) != NULL) {
-		return ret;
-	}
-
-	/* All strategies for allocation failed. */
-	return NULL;
-}
-
-static void *
-extent_alloc_default_impl(tsdn_t *tsdn, arena_t *arena, void *new_addr,
-    size_t size, size_t alignment, bool *zero, bool *commit) {
-	void *ret = extent_alloc_core(tsdn, arena, new_addr, size, alignment, zero,
-	    commit, (dss_prec_t)atomic_load_u(&arena->dss_prec,
-	    ATOMIC_RELAXED));
-	if (have_madvise_huge && ret) {
-		pages_set_thp_state(ret, size);
-	}
-	return ret;
-}
-
-static void *
-extent_alloc_default(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
-    size_t alignment, bool *zero, bool *commit, unsigned arena_ind) {
-	tsdn_t *tsdn;
-	arena_t *arena;
-
-	tsdn = tsdn_fetch();
-	arena = arena_get(tsdn, arena_ind, false);
-	/*
-	 * The arena we're allocating on behalf of must have been initialized
-	 * already.
-	 */
-	assert(arena != NULL);
-
-	return extent_alloc_default_impl(tsdn, arena, new_addr, size,
-	    ALIGNMENT_CEILING(alignment, PAGE), zero, commit);
-}
-
 static void
 extent_hook_pre_reentrancy(tsdn_t *tsdn, arena_t *arena) {
 	tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
@@ -1012,16 +943,8 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	bool zeroed = false;
 	bool committed = false;
 
-	void *ptr;
-	if (ehooks_are_default(ehooks)) {
-		ptr = extent_alloc_default_impl(tsdn, arena, NULL,
-		    alloc_size, PAGE, &zeroed, &committed);
-	} else {
-		extent_hook_pre_reentrancy(tsdn, arena);
-		ptr = ehooks_alloc(ehooks, NULL, alloc_size, PAGE, &zeroed,
-		    &committed, arena_ind_get(arena));
-		extent_hook_post_reentrancy(tsdn);
-	}
+	void *ptr = ehooks_alloc(tsdn, ehooks, NULL, alloc_size, PAGE, &zeroed,
+	    &committed, arena_ind_get(arena));
 
 	extent_init(extent, arena_ind_get(arena), ptr, alloc_size, false,
 	    SC_NSIZES, arena_extent_sn_next(arena), extent_state_active, zeroed,
@@ -1177,18 +1100,9 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	if (extent == NULL) {
 		return NULL;
 	}
-	void *addr;
 	size_t palignment = ALIGNMENT_CEILING(alignment, PAGE);
-	if (ehooks_are_default(ehooks)) {
-		/* Call directly to propagate tsdn. */
-		addr = extent_alloc_default_impl(tsdn, arena, new_addr, esize,
-		    palignment, zero, commit);
-	} else {
-		extent_hook_pre_reentrancy(tsdn, arena);
-		addr = ehooks_alloc(ehooks, new_addr, esize, palignment, zero,
-		    commit, arena_ind_get(arena));
-		extent_hook_post_reentrancy(tsdn);
-	}
+	void *addr = ehooks_alloc(tsdn, ehooks, new_addr, esize, palignment,
+	    zero, commit, arena_ind_get(arena));
 	if (addr == NULL) {
 		extent_dalloc(tsdn, arena, extent);
 		return NULL;

From bac8e2e5a65a361dec4598419dd10d2b119e8d24 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 3 Dec 2019 10:47:28 -0800
Subject: [PATCH 1448/2608] Extent -> Ehooks: Move dalloc hook.

---
 include/jemalloc/internal/ehooks.h | 20 ++++++++++++++-----
 src/base.c                         |  2 +-
 src/ehooks.c                       | 14 +++++++++++++
 src/extent.c                       | 32 ++++--------------------------
 4 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index 37087cac..dc03021a 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -18,6 +18,10 @@ void *ehooks_default_alloc(extent_hooks_t *extent_hooks, void *new_addr,
     size_t size, size_t alignment, bool *zero, bool *commit,
     unsigned arena_ind);
 
+bool ehooks_default_dalloc_impl(void *addr, size_t size);
+bool ehooks_default_dalloc(extent_hooks_t *extent_hooks, void *addr,
+    size_t size, bool committed, unsigned arena_ind);
+
 static inline void
 ehooks_pre_reentrancy(tsdn_t *tsdn) {
 	tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
@@ -89,14 +93,20 @@ ehooks_alloc(tsdn_t *tsdn, ehooks_t *ehooks, void *new_addr, size_t size,
 }
 
 static inline bool
-ehooks_dalloc(ehooks_t *ehooks, void *addr, size_t size, bool committed,
-    unsigned arena_ind) {
+ehooks_dalloc(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
+    bool committed, unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
-	if (extent_hooks->dalloc == NULL) {
+	if (extent_hooks == &extent_hooks_default) {
+		return ehooks_default_dalloc_impl(addr, size);
+	} else if (extent_hooks->dalloc == NULL) {
 		return true;
+	} else {
+		ehooks_pre_reentrancy(tsdn);
+		bool err = extent_hooks->dalloc(extent_hooks, addr, size,
+		    committed, arena_ind);
+		ehooks_post_reentrancy(tsdn);
+		return err;
 	}
-	return extent_hooks->dalloc(extent_hooks, addr, size, committed,
-	    arena_ind);
 }
 
 static inline void
diff --git a/src/base.c b/src/base.c
index 4f47438f..52699c50 100644
--- a/src/base.c
+++ b/src/base.c
@@ -81,7 +81,7 @@ base_unmap(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, void *addr,
 	} else {
 		tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
 		pre_reentrancy(tsd, NULL);
-		if (!ehooks_dalloc(ehooks, addr, size, true, ind)) {
+		if (!ehooks_dalloc(tsdn, ehooks, addr, size, true, ind)) {
 			goto label_post_reentrancy;
 		}
 		if (!ehooks_decommit(ehooks, addr, size, 0, size, ind)) {
diff --git a/src/ehooks.c b/src/ehooks.c
index ba62b8da..9a266ef0 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -75,3 +75,17 @@ ehooks_default_alloc(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
 	    ALIGNMENT_CEILING(alignment, PAGE), zero, commit,
 	    arena_ind_get(arena));
 }
+
+bool
+ehooks_default_dalloc_impl(void *addr, size_t size) {
+	if (!have_dss || !extent_in_dss(addr)) {
+		return extent_dalloc_mmap(addr, size);
+	}
+	return true;
+}
+
+bool
+ehooks_default_dalloc(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    bool committed, unsigned arena_ind) {
+	return ehooks_default_dalloc_impl(addr, size);
+}
diff --git a/src/extent.c b/src/extent.c
index 96547a5e..676d7ac0 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -19,8 +19,6 @@ mutex_pool_t	extent_mutex_pool;
 
 size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT;
 
-static bool extent_dalloc_default(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, bool committed, unsigned arena_ind);
 static void extent_destroy_default(extent_hooks_t *extent_hooks, void *addr,
     size_t size, bool committed, unsigned arena_ind);
 static bool extent_commit_default(extent_hooks_t *extent_hooks, void *addr,
@@ -58,7 +56,7 @@ static bool extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 const extent_hooks_t extent_hooks_default = {
 	ehooks_default_alloc,
-	extent_dalloc_default,
+	ehooks_default_dalloc,
 	extent_destroy_default,
 	extent_commit_default,
 	extent_decommit_default
@@ -1344,20 +1342,6 @@ extent_may_dalloc(void) {
 	return !opt_retain;
 }
 
-static bool
-extent_dalloc_default_impl(void *addr, size_t size) {
-	if (!have_dss || !extent_in_dss(addr)) {
-		return extent_dalloc_mmap(addr, size);
-	}
-	return true;
-}
-
-static bool
-extent_dalloc_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    bool committed, unsigned arena_ind) {
-	return extent_dalloc_default_impl(addr, size);
-}
-
 static bool
 extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     extent_t *extent) {
@@ -1371,17 +1355,9 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	extent_addr_set(extent, extent_base_get(extent));
 
 	/* Try to deallocate. */
-	if (ehooks_are_default(ehooks)) {
-		/* Call directly to propagate tsdn. */
-		err = extent_dalloc_default_impl(extent_base_get(extent),
-		    extent_size_get(extent));
-	} else {
-		extent_hook_pre_reentrancy(tsdn, arena);
-		err = ehooks_dalloc(ehooks, extent_base_get(extent),
-		    extent_size_get(extent), extent_committed_get(extent),
-		    arena_ind_get(arena));
-		extent_hook_post_reentrancy(tsdn);
-	}
+	err = ehooks_dalloc(tsdn, ehooks, extent_base_get(extent),
+	    extent_size_get(extent), extent_committed_get(extent),
+	    arena_ind_get(arena));
 
 	if (!err) {
 		extent_dalloc(tsdn, arena, extent);

From 5459ec9daeea3144e71abb3b0eb9417a56e7ae95 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 3 Dec 2019 12:05:18 -0800
Subject: [PATCH 1449/2608] Extent -> Ehooks: Move destroy hook.

---
 include/jemalloc/internal/ehooks.h | 18 ++++++++++++-----
 src/ehooks.c                       | 14 ++++++++++++++
 src/extent.c                       | 31 ++++--------------------------
 3 files changed, 31 insertions(+), 32 deletions(-)

diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index dc03021a..07094d91 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -17,10 +17,12 @@ void *ehooks_default_alloc_impl(tsdn_t *tsdn, void *new_addr, size_t size,
 void *ehooks_default_alloc(extent_hooks_t *extent_hooks, void *new_addr,
     size_t size, size_t alignment, bool *zero, bool *commit,
     unsigned arena_ind);
-
 bool ehooks_default_dalloc_impl(void *addr, size_t size);
 bool ehooks_default_dalloc(extent_hooks_t *extent_hooks, void *addr,
     size_t size, bool committed, unsigned arena_ind);
+void ehooks_default_destroy_impl(void *addr, size_t size);
+void ehooks_default_destroy(extent_hooks_t *extent_hooks, void *addr,
+    size_t size, bool committed, unsigned arena_ind);
 
 static inline void
 ehooks_pre_reentrancy(tsdn_t *tsdn) {
@@ -110,13 +112,19 @@ ehooks_dalloc(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 }
 
 static inline void
-ehooks_destroy(ehooks_t *ehooks, void *addr, size_t size, bool committed,
-    unsigned arena_ind) {
+ehooks_destroy(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
+    bool committed, unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
-	if (extent_hooks->destroy == NULL) {
+	if (extent_hooks == &extent_hooks_default) {
+		return ehooks_default_destroy_impl(addr, size);
+	} else if (extent_hooks->destroy == NULL) {
 		return;
+	} else {
+		ehooks_pre_reentrancy(tsdn);
+		extent_hooks->destroy(extent_hooks, addr, size, committed,
+		    arena_ind);
+		ehooks_post_reentrancy(tsdn);
 	}
-	extent_hooks->destroy(extent_hooks, addr, size, committed, arena_ind);
 }
 
 static inline bool
diff --git a/src/ehooks.c b/src/ehooks.c
index 9a266ef0..ad6fd24e 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -89,3 +89,17 @@ ehooks_default_dalloc(extent_hooks_t *extent_hooks, void *addr, size_t size,
     bool committed, unsigned arena_ind) {
 	return ehooks_default_dalloc_impl(addr, size);
 }
+
+void
+ehooks_default_destroy_impl(void *addr, size_t size) {
+	if (!have_dss || !extent_in_dss(addr)) {
+		pages_unmap(addr, size);
+	}
+}
+
+void
+ehooks_default_destroy(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    bool committed, unsigned arena_ind) {
+	ehooks_default_destroy_impl(addr, size);
+}
+
diff --git a/src/extent.c b/src/extent.c
index 676d7ac0..271fe4a3 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -19,8 +19,6 @@ mutex_pool_t	extent_mutex_pool;
 
 size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT;
 
-static void extent_destroy_default(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, bool committed, unsigned arena_ind);
 static bool extent_commit_default(extent_hooks_t *extent_hooks, void *addr,
     size_t size, size_t offset, size_t length, unsigned arena_ind);
 static bool extent_commit_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
@@ -57,7 +55,7 @@ static bool extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 const extent_hooks_t extent_hooks_default = {
 	ehooks_default_alloc,
 	ehooks_default_dalloc,
-	extent_destroy_default,
+	ehooks_default_destroy,
 	extent_commit_default,
 	extent_decommit_default
 #ifdef PAGES_CAN_PURGE_LAZY
@@ -1421,19 +1419,6 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    false);
 }
 
-static void
-extent_destroy_default_impl(void *addr, size_t size) {
-	if (!have_dss || !extent_in_dss(addr)) {
-		pages_unmap(addr, size);
-	}
-}
-
-static void
-extent_destroy_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    bool committed, unsigned arena_ind) {
-	extent_destroy_default_impl(addr, size);
-}
-
 void
 extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     extent_t *extent) {
@@ -1448,17 +1433,9 @@ extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	extent_addr_set(extent, extent_base_get(extent));
 
 	/* Try to destroy; silently fail otherwise. */
-	if (ehooks_are_default(ehooks)) {
-		/* Call directly to propagate tsdn. */
-		extent_destroy_default_impl(extent_base_get(extent),
-		    extent_size_get(extent));
-	} else if (!ehooks_destroy_is_noop(ehooks)) {
-		extent_hook_pre_reentrancy(tsdn, arena);
-		ehooks_destroy(ehooks, extent_base_get(extent),
-		    extent_size_get(extent), extent_committed_get(extent),
-		    arena_ind_get(arena));
-		extent_hook_post_reentrancy(tsdn);
-	}
+	ehooks_destroy(tsdn, ehooks, extent_base_get(extent),
+	    extent_size_get(extent), extent_committed_get(extent),
+	    arena_ind_get(arena));
 
 	extent_dalloc(tsdn, arena, extent);
 }

From d78fe241acb79ab4b0b7cb5b48d07be8582fc60a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 3 Dec 2019 12:25:00 -0800
Subject: [PATCH 1450/2608] Extent -> Ehooks: Move commit and decommit hooks.

---
 include/jemalloc/internal/ehooks.h | 38 ++++++++++++++++++++--------
 src/base.c                         |  2 +-
 src/ehooks.c                       | 23 +++++++++++++++++
 src/extent.c                       | 40 +++---------------------------
 4 files changed, 56 insertions(+), 47 deletions(-)

diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index 07094d91..e9bdca37 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -23,6 +23,12 @@ bool ehooks_default_dalloc(extent_hooks_t *extent_hooks, void *addr,
 void ehooks_default_destroy_impl(void *addr, size_t size);
 void ehooks_default_destroy(extent_hooks_t *extent_hooks, void *addr,
     size_t size, bool committed, unsigned arena_ind);
+bool ehooks_default_commit_impl(void *addr, size_t offset, size_t length);
+bool ehooks_default_commit(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    size_t offset, size_t length, unsigned arena_ind);
+bool ehooks_default_decommit_impl(void *addr, size_t offset, size_t length);
+bool ehooks_default_decommit(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    size_t offset, size_t length, unsigned arena_ind);
 
 static inline void
 ehooks_pre_reentrancy(tsdn_t *tsdn) {
@@ -128,25 +134,37 @@ ehooks_destroy(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 }
 
 static inline bool
-ehooks_commit(ehooks_t *ehooks, void *addr, size_t size, size_t offset,
-    size_t length, unsigned arena_ind) {
+ehooks_commit(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
+    size_t offset, size_t length, unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
-	if (extent_hooks->commit == NULL) {
+	if (extent_hooks == &extent_hooks_default) {
+		return ehooks_default_commit_impl(addr, offset, length);
+	} else if (extent_hooks->commit == NULL) {
 		return true;
+	} else {
+		ehooks_pre_reentrancy(tsdn);
+		bool err = extent_hooks->commit(extent_hooks, addr, size,
+		    offset, length, arena_ind);
+		ehooks_post_reentrancy(tsdn);
+		return err;
 	}
-	return extent_hooks->commit(extent_hooks, addr, size, offset, length,
-	    arena_ind);
 }
 
 static inline bool
-ehooks_decommit(ehooks_t *ehooks, void *addr, size_t size, size_t offset,
-    size_t length, unsigned arena_ind) {
+ehooks_decommit(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
+    size_t offset, size_t length, unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
-	if (extent_hooks->decommit == NULL) {
+	if (extent_hooks == &extent_hooks_default) {
+		return ehooks_default_decommit_impl(addr, offset, length);
+	} else if (extent_hooks->decommit == NULL) {
 		return true;
+	} else {
+		ehooks_pre_reentrancy(tsdn);
+		bool err = extent_hooks->decommit(extent_hooks, addr, size,
+		    offset, length, arena_ind);
+		ehooks_post_reentrancy(tsdn);
+		return err;
 	}
-	return extent_hooks->decommit(extent_hooks, addr, size, offset, length,
-	    arena_ind);
 }
 
 static inline bool
diff --git a/src/base.c b/src/base.c
index 52699c50..6b88b238 100644
--- a/src/base.c
+++ b/src/base.c
@@ -84,7 +84,7 @@ base_unmap(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, void *addr,
 		if (!ehooks_dalloc(tsdn, ehooks, addr, size, true, ind)) {
 			goto label_post_reentrancy;
 		}
-		if (!ehooks_decommit(ehooks, addr, size, 0, size, ind)) {
+		if (!ehooks_decommit(tsdn, ehooks, addr, size, 0, size, ind)) {
 			goto label_post_reentrancy;
 		}
 		if (!ehooks_purge_forced(ehooks, addr, size, 0, size, ind)) {
diff --git a/src/ehooks.c b/src/ehooks.c
index ad6fd24e..cb02377c 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -103,3 +103,26 @@ ehooks_default_destroy(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	ehooks_default_destroy_impl(addr, size);
 }
 
+bool
+ehooks_default_commit_impl(void *addr, size_t offset, size_t length) {
+	return pages_commit((void *)((uintptr_t)addr + (uintptr_t)offset),
+	    length);
+}
+
+bool
+ehooks_default_commit(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    size_t offset, size_t length, unsigned arena_ind) {
+	return ehooks_default_commit_impl(addr, offset, length);
+}
+
+bool
+ehooks_default_decommit_impl(void *addr, size_t offset, size_t length) {
+	return pages_decommit((void *)((uintptr_t)addr + (uintptr_t)offset),
+	    length);
+}
+
+bool
+ehooks_default_decommit(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    size_t offset, size_t length, unsigned arena_ind) {
+	return ehooks_default_decommit_impl(addr, offset, length);
+}
diff --git a/src/extent.c b/src/extent.c
index 271fe4a3..3eb4961b 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -19,12 +19,8 @@ mutex_pool_t	extent_mutex_pool;
 
 size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT;
 
-static bool extent_commit_default(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, size_t offset, size_t length, unsigned arena_ind);
 static bool extent_commit_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     extent_t *extent, size_t offset, size_t length, bool growing_retained);
-static bool extent_decommit_default(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, size_t offset, size_t length, unsigned arena_ind);
 #ifdef PAGES_CAN_PURGE_LAZY
 static bool extent_purge_lazy_default(extent_hooks_t *extent_hooks, void *addr,
     size_t size, size_t offset, size_t length, unsigned arena_ind);
@@ -56,8 +52,8 @@ const extent_hooks_t extent_hooks_default = {
 	ehooks_default_alloc,
 	ehooks_default_dalloc,
 	ehooks_default_destroy,
-	extent_commit_default,
-	extent_decommit_default
+	ehooks_default_commit,
+	ehooks_default_decommit
 #ifdef PAGES_CAN_PURGE_LAZY
 	,
 	extent_purge_lazy_default
@@ -1440,27 +1436,13 @@ extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	extent_dalloc(tsdn, arena, extent);
 }
 
-static bool
-extent_commit_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    size_t offset, size_t length, unsigned arena_ind) {
-	return pages_commit((void *)((uintptr_t)addr + (uintptr_t)offset),
-	    length);
-}
-
 static bool
 extent_commit_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     extent_t *extent, size_t offset, size_t length, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-
-	if (!ehooks_are_default(ehooks)) {
-		extent_hook_pre_reentrancy(tsdn, arena);
-	}
-	bool err = ehooks_commit(ehooks, extent_base_get(extent),
+	bool err = ehooks_commit(tsdn, ehooks, extent_base_get(extent),
 	    extent_size_get(extent), offset, length, arena_ind_get(arena));
-	if (!ehooks_are_default(ehooks)) {
-		extent_hook_post_reentrancy(tsdn);
-	}
 	extent_committed_set(extent, extent_committed_get(extent) || !err);
 	return err;
 }
@@ -1473,27 +1455,13 @@ extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    false);
 }
 
-static bool
-extent_decommit_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    size_t offset, size_t length, unsigned arena_ind) {
-	return pages_decommit((void *)((uintptr_t)addr + (uintptr_t)offset),
-	    length);
-}
-
 bool
 extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     extent_t *extent, size_t offset, size_t length) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
-
-	if (!ehooks_are_default(ehooks)) {
-		extent_hook_pre_reentrancy(tsdn, arena);
-	}
-	bool err = ehooks_decommit(ehooks, extent_base_get(extent),
+	bool err = ehooks_decommit(tsdn, ehooks, extent_base_get(extent),
 	    extent_size_get(extent), offset, length, arena_ind_get(arena));
-	if (!ehooks_are_default(ehooks)) {
-		extent_hook_post_reentrancy(tsdn);
-	}
 	extent_committed_set(extent, extent_committed_get(extent) && err);
 	return err;
 }

From f83fdf5336b6705bac027cb3f70b6ca4485cb0c1 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 3 Dec 2019 12:26:45 -0800
Subject: [PATCH 1451/2608] Extent: Clean up a comma

---
 src/extent.c | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index 3eb4961b..cb010643 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -53,22 +53,17 @@ const extent_hooks_t extent_hooks_default = {
 	ehooks_default_dalloc,
 	ehooks_default_destroy,
 	ehooks_default_commit,
-	ehooks_default_decommit
+	ehooks_default_decommit,
 #ifdef PAGES_CAN_PURGE_LAZY
-	,
-	extent_purge_lazy_default
+	extent_purge_lazy_default,
 #else
-	,
-	NULL
+	NULL,
 #endif
 #ifdef PAGES_CAN_PURGE_FORCED
-	,
-	extent_purge_forced_default
+	extent_purge_forced_default,
 #else
-	,
-	NULL
+	NULL,
 #endif
-	,
 	extent_split_default,
 	extent_merge_default
 };

From 368baa42ef76f1dd44950b5929dc5697c0ac7add Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 3 Dec 2019 12:59:46 -0800
Subject: [PATCH 1452/2608] Extent -> Ehooks: Move purge_lazy hook.

---
 include/jemalloc/internal/ehooks.h | 22 +++++++++++++++----
 src/base.c                         |  3 ++-
 src/ehooks.c                       | 18 +++++++++++++++
 src/extent.c                       | 35 +++---------------------------
 4 files changed, 41 insertions(+), 37 deletions(-)

diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index e9bdca37..c234ccdb 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -29,6 +29,11 @@ bool ehooks_default_commit(extent_hooks_t *extent_hooks, void *addr, size_t size
 bool ehooks_default_decommit_impl(void *addr, size_t offset, size_t length);
 bool ehooks_default_decommit(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind);
+#ifdef PAGES_CAN_PURGE_LAZY
+bool ehooks_default_purge_lazy_impl(void *addr, size_t offset, size_t length);
+bool ehooks_default_purge_lazy(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    size_t offset, size_t length, unsigned arena_ind);
+#endif
 
 static inline void
 ehooks_pre_reentrancy(tsdn_t *tsdn) {
@@ -168,14 +173,23 @@ ehooks_decommit(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 }
 
 static inline bool
-ehooks_purge_lazy(ehooks_t *ehooks, void *addr, size_t size, size_t offset,
-    size_t length, unsigned arena_ind) {
+ehooks_purge_lazy(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
+    size_t offset, size_t length, unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+#ifdef PAGES_CAN_PURGE_LAZY
+	if (extent_hooks == &extent_hooks_default) {
+		return ehooks_default_purge_lazy_impl(addr, offset, length);
+	}
+#endif
 	if (extent_hooks->purge_lazy == NULL) {
 		return true;
+	} else {
+		ehooks_pre_reentrancy(tsdn);
+		bool err = extent_hooks->purge_lazy(extent_hooks, addr, size,
+		    offset, length, arena_ind);
+		ehooks_post_reentrancy(tsdn);
+		return err;
 	}
-	return extent_hooks->purge_lazy(extent_hooks, addr, size, offset,
-	    length, arena_ind);
 }
 
 static inline bool
diff --git a/src/base.c b/src/base.c
index 6b88b238..48a8c6ab 100644
--- a/src/base.c
+++ b/src/base.c
@@ -90,7 +90,8 @@ base_unmap(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, void *addr,
 		if (!ehooks_purge_forced(ehooks, addr, size, 0, size, ind)) {
 			goto label_post_reentrancy;
 		}
-		if (!ehooks_purge_lazy(ehooks, addr, size, 0, size, ind)) {
+		if (!ehooks_purge_lazy(tsdn, ehooks, addr, size, 0, size,
+		    ind)) {
 			goto label_post_reentrancy;
 		}
 		/* Nothing worked.  That's the application's problem. */
diff --git a/src/ehooks.c b/src/ehooks.c
index cb02377c..ae0e980c 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -126,3 +126,21 @@ ehooks_default_decommit(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
 	return ehooks_default_decommit_impl(addr, offset, length);
 }
+
+#ifdef PAGES_CAN_PURGE_LAZY
+bool
+ehooks_default_purge_lazy_impl(void *addr, size_t offset, size_t length) {
+	return pages_purge_lazy((void *)((uintptr_t)addr + (uintptr_t)offset),
+	    length);
+}
+
+bool
+ehooks_default_purge_lazy(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    size_t offset, size_t length, unsigned arena_ind) {
+	assert(addr != NULL);
+	assert((offset & PAGE_MASK) == 0);
+	assert(length != 0);
+	assert((length & PAGE_MASK) == 0);
+	return ehooks_default_purge_lazy_impl(addr, offset, length);
+}
+#endif
diff --git a/src/extent.c b/src/extent.c
index cb010643..f3fbe95f 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -21,10 +21,6 @@ size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT;
 
 static bool extent_commit_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     extent_t *extent, size_t offset, size_t length, bool growing_retained);
-#ifdef PAGES_CAN_PURGE_LAZY
-static bool extent_purge_lazy_default(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, size_t offset, size_t length, unsigned arena_ind);
-#endif
 static bool extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena,
     ehooks_t *ehooks, extent_t *extent, size_t offset, size_t length,
     bool growing_retained);
@@ -55,7 +51,7 @@ const extent_hooks_t extent_hooks_default = {
 	ehooks_default_commit,
 	ehooks_default_decommit,
 #ifdef PAGES_CAN_PURGE_LAZY
-	extent_purge_lazy_default,
+	ehooks_default_purge_lazy,
 #else
 	NULL,
 #endif
@@ -1390,7 +1386,7 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    arena_ind_get(arena))) {
 		zeroed = true;
 	} else if (extent_state_get(extent) == extent_state_muzzy ||
-	    !ehooks_purge_lazy(ehooks, extent_base_get(extent),
+	    !ehooks_purge_lazy(tsdn, ehooks, extent_base_get(extent),
 	    extent_size_get(extent), 0, extent_size_get(extent),
 	    arena_ind_get(arena))) {
 		zeroed = false;
@@ -1461,38 +1457,13 @@ extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	return err;
 }
 
-#ifdef PAGES_CAN_PURGE_LAZY
-static bool
-extent_purge_lazy_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    size_t offset, size_t length, unsigned arena_ind) {
-	assert(addr != NULL);
-	assert((offset & PAGE_MASK) == 0);
-	assert(length != 0);
-	assert((length & PAGE_MASK) == 0);
-
-	return pages_purge_lazy((void *)((uintptr_t)addr + (uintptr_t)offset),
-	    length);
-}
-#endif
-
 static bool
 extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     extent_t *extent, size_t offset, size_t length, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-
-	if (ehooks_purge_lazy_will_fail(ehooks)) {
-		return true;
-	}
-	if (!ehooks_are_default(ehooks)) {
-		extent_hook_pre_reentrancy(tsdn, arena);
-	}
-	bool err = ehooks_purge_lazy(ehooks, extent_base_get(extent),
+	bool err = ehooks_purge_lazy(tsdn, ehooks, extent_base_get(extent),
 	    extent_size_get(extent), offset, length, arena_ind_get(arena));
-	if (!ehooks_are_default(ehooks)) {
-		extent_hook_post_reentrancy(tsdn);
-	}
-
 	return err;
 }
 

From a5b42a1a10048d9562d59e494c9e2cf3ab6943ba Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 3 Dec 2019 13:11:54 -0800
Subject: [PATCH 1453/2608] Extent -> Ehooks: Move purge_forced hook.

---
 include/jemalloc/internal/ehooks.h | 22 +++++++++++++++----
 src/base.c                         |  3 ++-
 src/ehooks.c                       | 18 ++++++++++++++++
 src/extent.c                       | 34 +++---------------------------
 4 files changed, 41 insertions(+), 36 deletions(-)

diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index c234ccdb..ae5ef663 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -34,6 +34,11 @@ bool ehooks_default_purge_lazy_impl(void *addr, size_t offset, size_t length);
 bool ehooks_default_purge_lazy(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind);
 #endif
+#ifdef PAGES_CAN_PURGE_FORCED
+bool ehooks_default_purge_forced_impl(void *addr, size_t offset, size_t length);
+bool ehooks_default_purge_forced(extent_hooks_t *extent_hooks, void *addr,
+    size_t size, size_t offset, size_t length, unsigned arena_ind);
+#endif
 
 static inline void
 ehooks_pre_reentrancy(tsdn_t *tsdn) {
@@ -193,14 +198,23 @@ ehooks_purge_lazy(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 }
 
 static inline bool
-ehooks_purge_forced(ehooks_t *ehooks, void *addr, size_t size, size_t offset,
-    size_t length, unsigned arena_ind) {
+ehooks_purge_forced(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
+    size_t offset, size_t length, unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+#ifdef PAGES_CAN_PURGE_FORCED
+	if (extent_hooks == &extent_hooks_default) {
+		return ehooks_default_purge_forced_impl(addr, offset, length);
+	}
+#endif
 	if (extent_hooks->purge_forced == NULL) {
 		return true;
+	} else {
+		ehooks_pre_reentrancy(tsdn);
+		bool err = extent_hooks->purge_forced(extent_hooks, addr, size,
+		    offset, length, arena_ind);
+		ehooks_post_reentrancy(tsdn);
+		return err;
 	}
-	return extent_hooks->purge_forced(extent_hooks, addr, size, offset,
-	    length, arena_ind);
 }
 
 static inline bool
diff --git a/src/base.c b/src/base.c
index 48a8c6ab..92d9bc1e 100644
--- a/src/base.c
+++ b/src/base.c
@@ -87,7 +87,8 @@ base_unmap(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, void *addr,
 		if (!ehooks_decommit(tsdn, ehooks, addr, size, 0, size, ind)) {
 			goto label_post_reentrancy;
 		}
-		if (!ehooks_purge_forced(ehooks, addr, size, 0, size, ind)) {
+		if (!ehooks_purge_forced(tsdn, ehooks, addr, size, 0, size,
+		    ind)) {
 			goto label_post_reentrancy;
 		}
 		if (!ehooks_purge_lazy(tsdn, ehooks, addr, size, 0, size,
diff --git a/src/ehooks.c b/src/ehooks.c
index ae0e980c..67ca2381 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -144,3 +144,21 @@ ehooks_default_purge_lazy(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	return ehooks_default_purge_lazy_impl(addr, offset, length);
 }
 #endif
+
+#ifdef PAGES_CAN_PURGE_FORCED
+bool
+ehooks_default_purge_forced_impl(void *addr, size_t offset, size_t length) {
+	return pages_purge_forced((void *)((uintptr_t)addr +
+	    (uintptr_t)offset), length);
+}
+
+bool
+ehooks_default_purge_forced(extent_hooks_t *extent_hooks, void *addr,
+    size_t size, size_t offset, size_t length, unsigned arena_ind) {
+	assert(addr != NULL);
+	assert((offset & PAGE_MASK) == 0);
+	assert(length != 0);
+	assert((length & PAGE_MASK) == 0);
+	return ehooks_default_purge_forced_impl(addr, offset, length);
+}
+#endif
diff --git a/src/extent.c b/src/extent.c
index f3fbe95f..f4f37976 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -24,10 +24,6 @@ static bool extent_commit_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 static bool extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena,
     ehooks_t *ehooks, extent_t *extent, size_t offset, size_t length,
     bool growing_retained);
-#ifdef PAGES_CAN_PURGE_FORCED
-static bool extent_purge_forced_default(extent_hooks_t *extent_hooks,
-    void *addr, size_t size, size_t offset, size_t length, unsigned arena_ind);
-#endif
 static bool extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena,
     ehooks_t *ehooks, extent_t *extent, size_t offset, size_t length,
     bool growing_retained);
@@ -56,7 +52,7 @@ const extent_hooks_t extent_hooks_default = {
 	NULL,
 #endif
 #ifdef PAGES_CAN_PURGE_FORCED
-	extent_purge_forced_default,
+	ehooks_default_purge_forced,
 #else
 	NULL,
 #endif
@@ -1381,7 +1377,7 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	} else if (!extent_decommit_wrapper(tsdn, arena, ehooks, extent, 0,
 	    extent_size_get(extent))) {
 		zeroed = true;
-	} else if (!ehooks_purge_forced(ehooks, extent_base_get(extent),
+	} else if (!ehooks_purge_forced(tsdn, ehooks, extent_base_get(extent),
 	    extent_size_get(extent), 0, extent_size_get(extent),
 	    arena_ind_get(arena))) {
 		zeroed = true;
@@ -1474,37 +1470,13 @@ extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    length, false);
 }
 
-#ifdef PAGES_CAN_PURGE_FORCED
-static bool
-extent_purge_forced_default(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, size_t offset, size_t length, unsigned arena_ind) {
-	assert(addr != NULL);
-	assert((offset & PAGE_MASK) == 0);
-	assert(length != 0);
-	assert((length & PAGE_MASK) == 0);
-
-	return pages_purge_forced((void *)((uintptr_t)addr +
-	    (uintptr_t)offset), length);
-}
-#endif
-
 static bool
 extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     extent_t *extent, size_t offset, size_t length, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-
-	if (ehooks_purge_forced_will_fail(ehooks)) {
-		return true;
-	}
-	if (!ehooks_are_default(ehooks)) {
-		extent_hook_pre_reentrancy(tsdn, arena);
-	}
-	bool err = ehooks_purge_forced(ehooks, extent_base_get(extent),
+	bool err = ehooks_purge_forced(tsdn, ehooks, extent_base_get(extent),
 	    extent_size_get(extent), offset, length, arena_ind_get(arena));
-	if (!ehooks_are_default(ehooks)) {
-		extent_hook_post_reentrancy(tsdn);
-	}
 	return err;
 }
 

From 1fff4d2ee3f5ab9d288a2b56544c1c8c4d8736da Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 3 Dec 2019 13:48:18 -0800
Subject: [PATCH 1454/2608] Extent -> Ehooks: Move split hook.

---
 include/jemalloc/internal/ehooks.h | 19 ++++++++++++++-----
 src/ehooks.c                       | 20 ++++++++++++++++++++
 src/extent.c                       | 29 +++--------------------------
 3 files changed, 37 insertions(+), 31 deletions(-)

diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index ae5ef663..e84222f6 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -39,6 +39,9 @@ bool ehooks_default_purge_forced_impl(void *addr, size_t offset, size_t length);
 bool ehooks_default_purge_forced(extent_hooks_t *extent_hooks, void *addr,
     size_t size, size_t offset, size_t length, unsigned arena_ind);
 #endif
+bool ehooks_default_split_impl();
+bool ehooks_default_split(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    size_t size_a, size_t size_b, bool committed, unsigned arena_ind);
 
 static inline void
 ehooks_pre_reentrancy(tsdn_t *tsdn) {
@@ -218,14 +221,20 @@ ehooks_purge_forced(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 }
 
 static inline bool
-ehooks_split(ehooks_t *ehooks, void *addr, size_t size, size_t size_a,
-    size_t size_b, bool committed, unsigned arena_ind) {
+ehooks_split(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
+    size_t size_a, size_t size_b, bool committed, unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
-	if (extent_hooks->split == NULL) {
+	if (ehooks_are_default(ehooks)) {
+		return ehooks_default_split_impl();
+	} else if (extent_hooks->split == NULL) {
 		return true;
+	} else {
+		ehooks_pre_reentrancy(tsdn);
+		bool err = extent_hooks->split(extent_hooks, addr, size, size_a,
+		    size_b, committed, arena_ind);
+		ehooks_post_reentrancy(tsdn);
+		return err;
 	}
-	return extent_hooks->split(extent_hooks, addr, size, size_a, size_b,
-	    committed, arena_ind);
 }
 
 static inline bool
diff --git a/src/ehooks.c b/src/ehooks.c
index 67ca2381..8bd95500 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -162,3 +162,23 @@ ehooks_default_purge_forced(extent_hooks_t *extent_hooks, void *addr,
 	return ehooks_default_purge_forced_impl(addr, offset, length);
 }
 #endif
+
+bool
+ehooks_default_split_impl() {
+	if (!maps_coalesce) {
+		/*
+		 * Without retain, only whole regions can be purged (required by
+		 * MEM_RELEASE on Windows) -- therefore disallow splitting.  See
+		 * comments in extent_head_no_merge().
+		 */
+		return !opt_retain;
+	}
+
+	return false;
+}
+
+bool
+ehooks_default_split(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    size_t size_a, size_t size_b, bool committed, unsigned arena_ind) {
+	return ehooks_default_split_impl();
+}
diff --git a/src/extent.c b/src/extent.c
index f4f37976..521c0b96 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -27,9 +27,6 @@ static bool extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena,
 static bool extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena,
     ehooks_t *ehooks, extent_t *extent, size_t offset, size_t length,
     bool growing_retained);
-static bool extent_split_default(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, size_t size_a, size_t size_b, bool committed,
-    unsigned arena_ind);
 static extent_t *extent_split_impl(tsdn_t *tsdn, arena_t *arena,
     ehooks_t *ehooks, extent_t *extent, size_t size_a, szind_t szind_a,
     bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
@@ -56,7 +53,7 @@ const extent_hooks_t extent_hooks_default = {
 #else
 	NULL,
 #endif
-	extent_split_default,
+	ehooks_default_split,
 	extent_merge_default
 };
 
@@ -1487,21 +1484,6 @@ extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    offset, length, false);
 }
 
-static bool
-extent_split_default(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    size_t size_a, size_t size_b, bool committed, unsigned arena_ind) {
-	if (!maps_coalesce) {
-		/*
-		 * Without retain, only whole regions can be purged (required by
-		 * MEM_RELEASE on Windows) -- therefore disallow splitting.  See
-		 * comments in extent_head_no_merge().
-		 */
-		return !opt_retain;
-	}
-
-	return false;
-}
-
 /*
  * Accepts the extent to split, and the characteristics of each side of the
  * split.  The 'a' parameters go with the 'lead' of the resulting pair of
@@ -1559,15 +1541,10 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 	extent_lock2(tsdn, extent, trail);
 
-	if (!ehooks_are_default(ehooks)) {
-		extent_hook_pre_reentrancy(tsdn, arena);
-	}
-	bool err = ehooks_split(ehooks, extent_base_get(extent),
+	bool err = ehooks_split(tsdn, ehooks, extent_base_get(extent),
 	    size_a + size_b, size_a, size_b, extent_committed_get(extent),
 	    arena_ind_get(arena));
-	if (!ehooks_are_default(ehooks)) {
-		extent_hook_post_reentrancy(tsdn);
-	}
+
 	if (err) {
 		goto label_error_c;
 	}

From 2fe5108263d013b07572f5aa597ba6ace86ed342 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 3 Dec 2019 14:03:38 -0800
Subject: [PATCH 1455/2608] Extent -> Ehooks: Move merge hook.

---
 include/jemalloc/internal/ehooks.h         | 19 ++++++---
 include/jemalloc/internal/extent_externs.h |  1 +
 src/ehooks.c                               | 26 ++++++++++++
 src/extent.c                               | 48 +++-------------------
 4 files changed, 46 insertions(+), 48 deletions(-)

diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index e84222f6..48d13fc6 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -42,6 +42,9 @@ bool ehooks_default_purge_forced(extent_hooks_t *extent_hooks, void *addr,
 bool ehooks_default_split_impl();
 bool ehooks_default_split(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t size_a, size_t size_b, bool committed, unsigned arena_ind);
+bool ehooks_default_merge_impl(void *addr_a, void *addr_b);
+bool ehooks_default_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
+    void *addr_b, size_t size_b, bool committed, unsigned arena_ind);
 
 static inline void
 ehooks_pre_reentrancy(tsdn_t *tsdn) {
@@ -238,14 +241,20 @@ ehooks_split(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 }
 
 static inline bool
-ehooks_merge(ehooks_t *ehooks, void *addr_a, size_t size_a, void *addr_b,
-    size_t size_b, bool committed, unsigned arena_ind) {
+ehooks_merge(tsdn_t *tsdn, ehooks_t *ehooks, void *addr_a, size_t size_a,
+    void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
-	if (extent_hooks->merge == NULL) {
+	if (extent_hooks == &extent_hooks_default) {
+		return ehooks_default_merge_impl(addr_a, addr_b);
+	} else if (extent_hooks->merge == NULL) {
 		return true;
+	} else {
+		ehooks_pre_reentrancy(tsdn);
+		bool err = extent_hooks->merge(extent_hooks, addr_a, size_a,
+		    addr_b, size_b, committed, arena_ind);
+		ehooks_post_reentrancy(tsdn);
+		return err;
 	}
-	return extent_hooks->merge(extent_hooks, addr_a, size_a, addr_b, size_b,
-	    committed, arena_ind);
 }
 
 #endif /* JEMALLOC_INTERNAL_EHOOKS_H */
diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent_externs.h
index 26828ba5..4e3803c6 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent_externs.h
@@ -46,6 +46,7 @@ extent_t *extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     size_t size_b, szind_t szind_b, bool slab_b);
 bool extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     extent_t *a, extent_t *b);
+bool extent_head_no_merge(extent_t *a, extent_t *b);
 
 bool extent_boot(void);
 
diff --git a/src/ehooks.c b/src/ehooks.c
index 8bd95500..bb328546 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -182,3 +182,29 @@ ehooks_default_split(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t size_a, size_t size_b, bool committed, unsigned arena_ind) {
 	return ehooks_default_split_impl();
 }
+
+bool
+ehooks_default_merge_impl(void *addr_a, void *addr_b) {
+	if (!maps_coalesce && !opt_retain) {
+		return true;
+	}
+	if (have_dss && !extent_dss_mergeable(addr_a, addr_b)) {
+		return true;
+	}
+
+	return false;
+}
+
+bool
+ehooks_default_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
+    void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
+	if (!maps_coalesce) {
+		tsdn_t *tsdn = tsdn_fetch();
+		extent_t *a = iealloc(tsdn, addr_a);
+		extent_t *b = iealloc(tsdn, addr_b);
+		if (extent_head_no_merge(a, b)) {
+			return true;
+		}
+	}
+	return ehooks_default_merge_impl(addr_a, addr_b);
+}
diff --git a/src/extent.c b/src/extent.c
index 521c0b96..3e78e965 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -31,9 +31,6 @@ static extent_t *extent_split_impl(tsdn_t *tsdn, arena_t *arena,
     ehooks_t *ehooks, extent_t *extent, size_t size_a, szind_t szind_a,
     bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
     bool growing_retained);
-static bool extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a,
-    size_t size_a, void *addr_b, size_t size_b, bool committed,
-    unsigned arena_ind);
 static bool extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     extent_t *a, extent_t *b, bool growing_retained);
 
@@ -54,7 +51,7 @@ const extent_hooks_t extent_hooks_default = {
 	NULL,
 #endif
 	ehooks_default_split,
-	extent_merge_default
+	ehooks_default_merge
 };
 
 /* Used exclusively for gdump triggering. */
@@ -1576,23 +1573,11 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    slab_a, size_b, szind_b, slab_b, false);
 }
 
-static bool
-extent_merge_default_impl(void *addr_a, void *addr_b) {
-	if (!maps_coalesce && !opt_retain) {
-		return true;
-	}
-	if (have_dss && !extent_dss_mergeable(addr_a, addr_b)) {
-		return true;
-	}
-
-	return false;
-}
-
 /*
  * Returns true if the given extents can't be merged because of their head bit
  * settings.  Assumes the second extent has the higher address.
  */
-static bool
+bool
 extent_head_no_merge(extent_t *a, extent_t *b) {
 	assert(extent_base_get(a) < extent_base_get(b));
 	/*
@@ -1620,20 +1605,6 @@ extent_head_no_merge(extent_t *a, extent_t *b) {
 	return false;
 }
 
-static bool
-extent_merge_default(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
-    void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
-	if (!maps_coalesce) {
-		tsdn_t *tsdn = tsdn_fetch();
-		extent_t *a = iealloc(tsdn, addr_a);
-		extent_t *b = iealloc(tsdn, addr_b);
-		if (extent_head_no_merge(a, b)) {
-			return true;
-		}
-	}
-	return extent_merge_default_impl(addr_a, addr_b);
-}
-
 static bool
 extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, extent_t *a,
     extent_t *b, bool growing_retained) {
@@ -1645,18 +1616,9 @@ extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, extent_t *a,
 		return true;
 	}
 
-	bool err;
-	if (ehooks_are_default(ehooks)) {
-		/* Call directly to propagate tsdn. */
-		err = extent_merge_default_impl(extent_base_get(a),
-		    extent_base_get(b));
-	} else {
-		extent_hook_pre_reentrancy(tsdn, arena);
-		err = ehooks_merge(ehooks, extent_base_get(a),
-		    extent_size_get(a), extent_base_get(b), extent_size_get(b),
-		    extent_committed_get(a), arena_ind_get(arena));
-		extent_hook_post_reentrancy(tsdn);
-	}
+	bool err = ehooks_merge(tsdn, ehooks, extent_base_get(a),
+	    extent_size_get(a), extent_base_get(b), extent_size_get(b),
+	    extent_committed_get(a), arena_ind_get(arena));
 
 	if (err) {
 		return true;

From c8dae890c88162748c22acbc7885c9ebf8012e10 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 3 Dec 2019 14:23:40 -0800
Subject: [PATCH 1456/2608] Extent -> Ehooks: Move over default hooks.

---
 include/jemalloc/internal/ehooks.h            | 23 +++++++++++--------
 .../internal/jemalloc_internal_inlines_a.h    |  2 +-
 src/base.c                                    |  2 +-
 src/ctl.c                                     |  4 ++--
 src/ehooks.c                                  | 20 ++++++++++++++++
 src/extent.c                                  | 20 ----------------
 src/jemalloc.c                                |  7 +++---
 test/unit/base.c                              |  3 ++-
 8 files changed, 43 insertions(+), 38 deletions(-)

diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index 48d13fc6..fbb37137 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -3,7 +3,7 @@
 
 #include "jemalloc/internal/atomic.h"
 
-extern const extent_hooks_t extent_hooks_default;
+extern const extent_hooks_t ehooks_default_extent_hooks;
 
 typedef struct ehooks_s ehooks_t;
 struct ehooks_s {
@@ -11,6 +11,8 @@ struct ehooks_s {
 	atomic_p_t ptr;
 };
 
+extern const extent_hooks_t ehooks_default_extent_hooks;
+
 /* NOT PUBLIC. */
 void *ehooks_default_alloc_impl(tsdn_t *tsdn, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit, unsigned arena_ind);
@@ -73,7 +75,8 @@ ehooks_get_extent_hooks_ptr(ehooks_t *ehooks) {
 
 static inline bool
 ehooks_are_default(ehooks_t *ehooks) {
-	return ehooks_get_extent_hooks_ptr(ehooks) == &extent_hooks_default;
+	return ehooks_get_extent_hooks_ptr(ehooks) ==
+	    &ehooks_default_extent_hooks;
 }
 
 static inline bool
@@ -105,7 +108,7 @@ static inline void *
 ehooks_alloc(tsdn_t *tsdn, ehooks_t *ehooks, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit, unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
-	if (extent_hooks == &extent_hooks_default) {
+	if (extent_hooks == &ehooks_default_extent_hooks) {
 		return ehooks_default_alloc_impl(tsdn, new_addr, size,
 		    alignment, zero, commit, arena_ind);
 	}
@@ -120,7 +123,7 @@ static inline bool
 ehooks_dalloc(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
     bool committed, unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
-	if (extent_hooks == &extent_hooks_default) {
+	if (extent_hooks == &ehooks_default_extent_hooks) {
 		return ehooks_default_dalloc_impl(addr, size);
 	} else if (extent_hooks->dalloc == NULL) {
 		return true;
@@ -137,7 +140,7 @@ static inline void
 ehooks_destroy(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
     bool committed, unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
-	if (extent_hooks == &extent_hooks_default) {
+	if (extent_hooks == &ehooks_default_extent_hooks) {
 		return ehooks_default_destroy_impl(addr, size);
 	} else if (extent_hooks->destroy == NULL) {
 		return;
@@ -153,7 +156,7 @@ static inline bool
 ehooks_commit(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
-	if (extent_hooks == &extent_hooks_default) {
+	if (extent_hooks == &ehooks_default_extent_hooks) {
 		return ehooks_default_commit_impl(addr, offset, length);
 	} else if (extent_hooks->commit == NULL) {
 		return true;
@@ -170,7 +173,7 @@ static inline bool
 ehooks_decommit(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
-	if (extent_hooks == &extent_hooks_default) {
+	if (extent_hooks == &ehooks_default_extent_hooks) {
 		return ehooks_default_decommit_impl(addr, offset, length);
 	} else if (extent_hooks->decommit == NULL) {
 		return true;
@@ -188,7 +191,7 @@ ehooks_purge_lazy(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 #ifdef PAGES_CAN_PURGE_LAZY
-	if (extent_hooks == &extent_hooks_default) {
+	if (extent_hooks == &ehooks_default_extent_hooks) {
 		return ehooks_default_purge_lazy_impl(addr, offset, length);
 	}
 #endif
@@ -208,7 +211,7 @@ ehooks_purge_forced(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 #ifdef PAGES_CAN_PURGE_FORCED
-	if (extent_hooks == &extent_hooks_default) {
+	if (extent_hooks == &ehooks_default_extent_hooks) {
 		return ehooks_default_purge_forced_impl(addr, offset, length);
 	}
 #endif
@@ -244,7 +247,7 @@ static inline bool
 ehooks_merge(tsdn_t *tsdn, ehooks_t *ehooks, void *addr_a, size_t size_a,
     void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
-	if (extent_hooks == &extent_hooks_default) {
+	if (extent_hooks == &ehooks_default_extent_hooks) {
 		return ehooks_default_merge_impl(addr_a, addr_b);
 	} else if (extent_hooks->merge == NULL) {
 		return true;
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index 98a64780..f079e853 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -91,7 +91,7 @@ arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing) {
 	if (unlikely(ret == NULL)) {
 		if (init_if_missing) {
 			ret = arena_init(tsdn, ind,
-			    (extent_hooks_t *)&extent_hooks_default);
+			    (extent_hooks_t *)&ehooks_default_extent_hooks);
 		}
 	}
 	return ret;
diff --git a/src/base.c b/src/base.c
index 92d9bc1e..a1b45d06 100644
--- a/src/base.c
+++ b/src/base.c
@@ -511,6 +511,6 @@ base_postfork_child(tsdn_t *tsdn, base_t *base) {
 
 bool
 base_boot(tsdn_t *tsdn) {
-	b0 = base_new(tsdn, 0, (extent_hooks_t *)&extent_hooks_default);
+	b0 = base_new(tsdn, 0, (extent_hooks_t *)&ehooks_default_extent_hooks);
 	return (b0 == NULL);
 }
diff --git a/src/ctl.c b/src/ctl.c
index 9b88f403..a9982ca3 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2377,7 +2377,7 @@ arena_i_extent_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 				goto label_return;
 			}
 			old_extent_hooks =
-			    (extent_hooks_t *)&extent_hooks_default;
+			    (extent_hooks_t *)&ehooks_default_extent_hooks;
 			READ(old_extent_hooks, extent_hooks_t *);
 			if (newp != NULL) {
 				/* Initialize a new arena as a side effect. */
@@ -2581,7 +2581,7 @@ arenas_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 
-	extent_hooks = (extent_hooks_t *)&extent_hooks_default;
+	extent_hooks = (extent_hooks_t *)&ehooks_default_extent_hooks;
 	WRITE(extent_hooks, extent_hooks_t *);
 	if ((arena_ind = ctl_arena_init(tsd, extent_hooks)) == UINT_MAX) {
 		ret = EAGAIN;
diff --git a/src/ehooks.c b/src/ehooks.c
index bb328546..728783ee 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -208,3 +208,23 @@ ehooks_default_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
 	}
 	return ehooks_default_merge_impl(addr_a, addr_b);
 }
+
+const extent_hooks_t ehooks_default_extent_hooks = {
+	ehooks_default_alloc,
+	ehooks_default_dalloc,
+	ehooks_default_destroy,
+	ehooks_default_commit,
+	ehooks_default_decommit,
+#ifdef PAGES_CAN_PURGE_LAZY
+	ehooks_default_purge_lazy,
+#else
+	NULL,
+#endif
+#ifdef PAGES_CAN_PURGE_FORCED
+	ehooks_default_purge_forced,
+#else
+	NULL,
+#endif
+	ehooks_default_split,
+	ehooks_default_merge
+};
diff --git a/src/extent.c b/src/extent.c
index 3e78e965..e7e4712e 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -34,26 +34,6 @@ static extent_t *extent_split_impl(tsdn_t *tsdn, arena_t *arena,
 static bool extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     extent_t *a, extent_t *b, bool growing_retained);
 
-const extent_hooks_t extent_hooks_default = {
-	ehooks_default_alloc,
-	ehooks_default_dalloc,
-	ehooks_default_destroy,
-	ehooks_default_commit,
-	ehooks_default_decommit,
-#ifdef PAGES_CAN_PURGE_LAZY
-	ehooks_default_purge_lazy,
-#else
-	NULL,
-#endif
-#ifdef PAGES_CAN_PURGE_FORCED
-	ehooks_default_purge_forced,
-#else
-	NULL,
-#endif
-	ehooks_default_split,
-	ehooks_default_merge
-};
-
 /* Used exclusively for gdump triggering. */
 static atomic_zu_t curpages;
 static atomic_zu_t highpages;
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 4fc1a5ec..825a8ed0 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -589,7 +589,8 @@ arena_choose_hard(tsd_t *tsd, bool internal) {
 				choose[j] = first_null;
 				arena = arena_init_locked(tsd_tsdn(tsd),
 				    choose[j],
-				    (extent_hooks_t *)&extent_hooks_default);
+				    (extent_hooks_t *)
+				    &ehooks_default_extent_hooks);
 				if (arena == NULL) {
 					malloc_mutex_unlock(tsd_tsdn(tsd),
 					    &arenas_lock);
@@ -1589,8 +1590,8 @@ malloc_init_hard_a0_locked() {
 	 * Initialize one arena here.  The rest are lazily created in
 	 * arena_choose_hard().
 	 */
-	if (arena_init(TSDN_NULL, 0, (extent_hooks_t *)&extent_hooks_default)
-	    == NULL) {
+	if (arena_init(TSDN_NULL, 0,
+	    (extent_hooks_t *)&ehooks_default_extent_hooks) == NULL) {
 		return true;
 	}
 	a0 = arena_get(TSDN_NULL, 0, false);
diff --git a/test/unit/base.c b/test/unit/base.c
index 6b792cf2..7ced15f7 100644
--- a/test/unit/base.c
+++ b/test/unit/base.c
@@ -31,7 +31,8 @@ TEST_BEGIN(test_base_hooks_default) {
 	size_t allocated0, allocated1, resident, mapped, n_thp;
 
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
-	base = base_new(tsdn, 0, (extent_hooks_t *)&extent_hooks_default);
+	base = base_new(tsdn, 0,
+	    (extent_hooks_t *)&ehooks_default_extent_hooks);
 
 	if (config_stats) {
 		base_stats_get(tsdn, base, &allocated0, &resident, &mapped,

From 39fdc690a0d3a49c1e36d79f625350426480b18f Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 3 Dec 2019 17:03:31 -0800
Subject: [PATCH 1457/2608] Ehooks comments and cleanup.

---
 include/jemalloc/internal/ehooks.h | 73 +++++++++++++++---------------
 src/ehooks.c                       | 18 ++++----
 2 files changed, 46 insertions(+), 45 deletions(-)

diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index fbb37137..97c3f442 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -1,6 +1,13 @@
 #ifndef JEMALLOC_INTERNAL_EHOOKS_H
 #define JEMALLOC_INTERNAL_EHOOKS_H
 
+/*
+ * This module is the internal interface to the extent hooks (both
+ * user-specified and external).  Eventually, this will give us the flexibility
+ * to use multiple different versions of user-visible extent-hook APIs under a
+ * single user interface.
+ */
+
 #include "jemalloc/internal/atomic.h"
 
 extern const extent_hooks_t ehooks_default_extent_hooks;
@@ -13,41 +20,45 @@ struct ehooks_s {
 
 extern const extent_hooks_t ehooks_default_extent_hooks;
 
-/* NOT PUBLIC. */
+/*
+ * These are not really part of the public API.  Each hook has a fast-path for
+ * the default-hooks case that can avoid various small inefficiencies:
+ *   - Forgetting tsd and then calling tsd_get within the hook.
+ *   - Getting more state than necessary out of the extent_t.
+ *   - Doing arena_ind -> arena -> arena_ind lookups.
+ * By making the calls to these functions visible to the compiler, it can move
+ * those extra bits of computation down below the fast-paths where they get ignored.
+ */
 void *ehooks_default_alloc_impl(tsdn_t *tsdn, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit, unsigned arena_ind);
-void *ehooks_default_alloc(extent_hooks_t *extent_hooks, void *new_addr,
-    size_t size, size_t alignment, bool *zero, bool *commit,
-    unsigned arena_ind);
 bool ehooks_default_dalloc_impl(void *addr, size_t size);
-bool ehooks_default_dalloc(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, bool committed, unsigned arena_ind);
 void ehooks_default_destroy_impl(void *addr, size_t size);
-void ehooks_default_destroy(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, bool committed, unsigned arena_ind);
 bool ehooks_default_commit_impl(void *addr, size_t offset, size_t length);
-bool ehooks_default_commit(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    size_t offset, size_t length, unsigned arena_ind);
 bool ehooks_default_decommit_impl(void *addr, size_t offset, size_t length);
-bool ehooks_default_decommit(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    size_t offset, size_t length, unsigned arena_ind);
 #ifdef PAGES_CAN_PURGE_LAZY
 bool ehooks_default_purge_lazy_impl(void *addr, size_t offset, size_t length);
-bool ehooks_default_purge_lazy(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    size_t offset, size_t length, unsigned arena_ind);
 #endif
 #ifdef PAGES_CAN_PURGE_FORCED
 bool ehooks_default_purge_forced_impl(void *addr, size_t offset, size_t length);
-bool ehooks_default_purge_forced(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, size_t offset, size_t length, unsigned arena_ind);
 #endif
 bool ehooks_default_split_impl();
-bool ehooks_default_split(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    size_t size_a, size_t size_b, bool committed, unsigned arena_ind);
 bool ehooks_default_merge_impl(void *addr_a, void *addr_b);
-bool ehooks_default_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
-    void *addr_b, size_t size_b, bool committed, unsigned arena_ind);
 
+/*
+ * We don't officially support reentrancy from wtihin the extent hooks.  But
+ * various people who sit within throwing distance of the jemalloc team want
+ * that functionality in certain limited cases.  The default reentrancy guards
+ * assert that we're not reentrant from a0 (since it's the bootstrap arena,
+ * where reentrant allocations would be redirected), which we would incorrectly
+ * trigger in cases where a0 has extent hooks (those hooks themselves can't be
+ * reentrant, then, but there are reasonable uses for such functionality, like
+ * putting internal metadata on hugepages).  Therefore, we use the raw
+ * reentrancy guards.
+ *
+ * Eventually, we need to think more carefully about whether and where we
+ * support allocating from within extent hooks (and what that means for things
+ * like profiling, stats collection, etc.), and document what the guarantee is.
+ */
 static inline void
 ehooks_pre_reentrancy(tsdn_t *tsdn) {
 	tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
@@ -60,7 +71,7 @@ ehooks_post_reentrancy(tsdn_t *tsdn) {
 	tsd_post_reentrancy_raw(tsd);
 }
 
-/* PUBLIC. */
+/* Beginning of the public API. */
 void ehooks_init(ehooks_t *ehooks, extent_hooks_t *extent_hooks);
 
 static inline void
@@ -79,21 +90,11 @@ ehooks_are_default(ehooks_t *ehooks) {
 	    &ehooks_default_extent_hooks;
 }
 
-static inline bool
-ehooks_destroy_is_noop(ehooks_t *ehooks) {
-	return ehooks_get_extent_hooks_ptr(ehooks)->destroy == NULL;
-}
-
-static inline bool
-ehooks_purge_lazy_will_fail(ehooks_t *ehooks) {
-	return ehooks_get_extent_hooks_ptr(ehooks)->purge_lazy == NULL;
-}
-
-static inline bool
-ehooks_purge_forced_will_fail(ehooks_t *ehooks) {
-	return ehooks_get_extent_hooks_ptr(ehooks)->purge_forced == NULL;
-}
-
+/*
+ * In some cases, a caller needs to allocate resources before attempting to call
+ * a hook.  If that hook is doomed to fail, this is wasteful.  We therefore
+ * include some checks for such cases.
+ */
 static inline bool
 ehooks_split_will_fail(ehooks_t *ehooks) {
 	return ehooks_get_extent_hooks_ptr(ehooks)->split == NULL;
diff --git a/src/ehooks.c b/src/ehooks.c
index 728783ee..d7d1613f 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -57,7 +57,7 @@ ehooks_default_alloc_impl(tsdn_t *tsdn, void *new_addr, size_t size,
 	return ret;
 }
 
-void *
+static void *
 ehooks_default_alloc(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit, unsigned arena_ind) {
 	tsdn_t *tsdn;
@@ -84,7 +84,7 @@ ehooks_default_dalloc_impl(void *addr, size_t size) {
 	return true;
 }
 
-bool
+static bool
 ehooks_default_dalloc(extent_hooks_t *extent_hooks, void *addr, size_t size,
     bool committed, unsigned arena_ind) {
 	return ehooks_default_dalloc_impl(addr, size);
@@ -97,7 +97,7 @@ ehooks_default_destroy_impl(void *addr, size_t size) {
 	}
 }
 
-void
+static void
 ehooks_default_destroy(extent_hooks_t *extent_hooks, void *addr, size_t size,
     bool committed, unsigned arena_ind) {
 	ehooks_default_destroy_impl(addr, size);
@@ -109,7 +109,7 @@ ehooks_default_commit_impl(void *addr, size_t offset, size_t length) {
 	    length);
 }
 
-bool
+static bool
 ehooks_default_commit(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
 	return ehooks_default_commit_impl(addr, offset, length);
@@ -121,7 +121,7 @@ ehooks_default_decommit_impl(void *addr, size_t offset, size_t length) {
 	    length);
 }
 
-bool
+static bool
 ehooks_default_decommit(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
 	return ehooks_default_decommit_impl(addr, offset, length);
@@ -134,7 +134,7 @@ ehooks_default_purge_lazy_impl(void *addr, size_t offset, size_t length) {
 	    length);
 }
 
-bool
+static bool
 ehooks_default_purge_lazy(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
 	assert(addr != NULL);
@@ -152,7 +152,7 @@ ehooks_default_purge_forced_impl(void *addr, size_t offset, size_t length) {
 	    (uintptr_t)offset), length);
 }
 
-bool
+static bool
 ehooks_default_purge_forced(extent_hooks_t *extent_hooks, void *addr,
     size_t size, size_t offset, size_t length, unsigned arena_ind) {
 	assert(addr != NULL);
@@ -177,7 +177,7 @@ ehooks_default_split_impl() {
 	return false;
 }
 
-bool
+static bool
 ehooks_default_split(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t size_a, size_t size_b, bool committed, unsigned arena_ind) {
 	return ehooks_default_split_impl();
@@ -195,7 +195,7 @@ ehooks_default_merge_impl(void *addr_a, void *addr_b) {
 	return false;
 }
 
-bool
+static bool
 ehooks_default_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
     void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
 	if (!maps_coalesce) {

From e08c581cf1ae5fe8a6735f7b92b7780527125287 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 3 Dec 2019 17:43:53 -0800
Subject: [PATCH 1458/2608] Extent: Get rid of extent-specific pre/post
 reentrancy calls.

These are taken care of by the ehook module; the extra increments and
decrements are safe but unnecessary.
---
 src/extent.c | 29 -----------------------------
 1 file changed, 29 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index e7e4712e..ea7b8f2c 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -831,29 +831,6 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 	return extent;
 }
 
-static void
-extent_hook_pre_reentrancy(tsdn_t *tsdn, arena_t *arena) {
-	tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
-	if (arena == arena_get(tsd_tsdn(tsd), 0, false)) {
-		/*
-		 * The only legitimate case of customized extent hooks for a0 is
-		 * hooks with no allocation activities.  One such example is to
-		 * place metadata on pre-allocated resources such as huge pages.
-		 * In that case, rely on reentrancy_level checks to catch
-		 * infinite recursions.
-		 */
-		pre_reentrancy(tsd, NULL);
-	} else {
-		pre_reentrancy(tsd, arena);
-	}
-}
-
-static void
-extent_hook_post_reentrancy(tsdn_t *tsdn) {
-	tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
-	post_reentrancy(tsd);
-}
-
 /*
  * If virtual memory is retained, create increasingly larger extents from which
  * to split requested extents in order to limit the total number of disjoint
@@ -1341,9 +1318,6 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		extent_reregister(tsdn, extent);
 	}
 
-	if (!ehooks_are_default(ehooks)) {
-		extent_hook_pre_reentrancy(tsdn, arena);
-	}
 	/* Try to decommit; purge if that fails. */
 	bool zeroed;
 	if (!extent_committed_get(extent)) {
@@ -1363,9 +1337,6 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	} else {
 		zeroed = false;
 	}
-	if (!ehooks_are_default(ehooks)) {
-		extent_hook_post_reentrancy(tsdn);
-	}
 	extent_zeroed_set(extent, zeroed);
 
 	if (config_prof) {

From 92a511d385d1a256a42c6bf8cfc3dd9adb1f5217 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 3 Dec 2019 18:31:47 -0800
Subject: [PATCH 1459/2608] Make extent module hermetic.

In the form of extent2.h.  The naming leaves something to be desired, but I'll
leave that for a later diff.
---
 Makefile.in                                   |    1 +
 include/jemalloc/internal/bin.h               |    2 -
 include/jemalloc/internal/extent.h            |    3 +
 .../internal/{extent_externs.h => extent2.h}  |   51 +-
 include/jemalloc/internal/extent_inlines.h    |   38 -
 include/jemalloc/internal/extent_structs.h    |   33 -
 include/jemalloc/internal/extent_types.h      |   13 -
 .../internal/jemalloc_internal_includes.h     |    3 -
 .../internal/jemalloc_internal_inlines_b.h    |    1 +
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |    1 +
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |    1 +
 src/extent.c                                  | 1718 +---------------
 src/extent2.c                                 | 1738 +++++++++++++++++
 13 files changed, 1788 insertions(+), 1815 deletions(-)
 rename include/jemalloc/internal/{extent_externs.h => extent2.h} (62%)
 delete mode 100644 include/jemalloc/internal/extent_inlines.h
 delete mode 100644 include/jemalloc/internal/extent_structs.h
 delete mode 100644 include/jemalloc/internal/extent_types.h
 create mode 100644 src/extent2.c

diff --git a/Makefile.in b/Makefile.in
index a735e0e6..29977bc0 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -107,6 +107,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/ehooks.c \
 	$(srcroot)src/eset.c \
 	$(srcroot)src/extent.c \
+	$(srcroot)src/extent2.c \
 	$(srcroot)src/extent_dss.c \
 	$(srcroot)src/extent_mmap.c \
 	$(srcroot)src/hash.c \
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index 0d6aff8b..92e8122d 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -4,8 +4,6 @@
 #include "jemalloc/internal/bin_stats.h"
 #include "jemalloc/internal/bin_types.h"
 #include "jemalloc/internal/extent.h"
-#include "jemalloc/internal/extent_types.h"
-#include "jemalloc/internal/extent_structs.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/sc.h"
 
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index fa7d1260..2fd6e906 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -620,4 +620,7 @@ extent_esnead_comp(const extent_t *a, const extent_t *b) {
 	return ret;
 }
 
+ph_proto(, extent_avail_, extent_tree_t, extent_t)
+ph_proto(, extent_heap_, extent_heap_t, extent_t)
+
 #endif /* JEMALLOC_INTERNAL_EXTENT_H */
diff --git a/include/jemalloc/internal/extent_externs.h b/include/jemalloc/internal/extent2.h
similarity index 62%
rename from include/jemalloc/internal/extent_externs.h
rename to include/jemalloc/internal/extent2.h
index 4e3803c6..22035bba 100644
--- a/include/jemalloc/internal/extent_externs.h
+++ b/include/jemalloc/internal/extent2.h
@@ -1,23 +1,56 @@
-#ifndef JEMALLOC_INTERNAL_EXTENT_EXTERNS_H
-#define JEMALLOC_INTERNAL_EXTENT_EXTERNS_H
+#ifndef JEMALLOC_INTERNAL_EXTENT2_H
+#define JEMALLOC_INTERNAL_EXTENT2_H
 
 #include "jemalloc/internal/ehooks.h"
-#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/mutex_pool.h"
+#include "jemalloc/internal/eset.h"
 #include "jemalloc/internal/ph.h"
 #include "jemalloc/internal/rtree.h"
 
+/*
+ * This module contains the page-level allocator.  It chooses the addresses that
+ * allocations requested by other modules will inhabit, and updates the global
+ * metadata to reflect allocation/deallocation/purging decisions.
+ *
+ * The naming ("extent2" for the module, and "extent_" or "extents_" for most of
+ * the functions) is historical.  Eventually, the naming should be updated to
+ * reflect the functionality.  Similarly, the utilization stats live here for no
+ * particular reason.  This will also be changed, but much more immediately.
+ */
+
+/*
+ * The following two structs are for experimental purposes. See
+ * experimental_utilization_query_ctl and
+ * experimental_utilization_batch_query_ctl in src/ctl.c.
+ */
+typedef struct extent_util_stats_s extent_util_stats_t;
+struct extent_util_stats_s {
+	size_t nfree;
+	size_t nregs;
+	size_t size;
+};
+
+typedef struct extent_util_stats_verbose_s extent_util_stats_verbose_t;
+struct extent_util_stats_verbose_s {
+	void *slabcur_addr;
+	size_t nfree;
+	size_t nregs;
+	size_t size;
+	size_t bin_nfree;
+	size_t bin_nregs;
+};
+
+/*
+ * When reuse (and split) an active extent, (1U << opt_lg_extent_max_active_fit)
+ * is the max ratio between the size of the active extent and the new extent.
+ */
+#define LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT 6
 extern size_t opt_lg_extent_max_active_fit;
 
 extern rtree_t extents_rtree;
-extern mutex_pool_t extent_mutex_pool;
 
 extent_t *extent_alloc(tsdn_t *tsdn, arena_t *arena);
 void extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
 
-ph_proto(, extent_avail_, extent_tree_t, extent_t)
-ph_proto(, extent_heap_, extent_heap_t, extent_t)
-
 extent_t *extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     eset_t *eset, void *new_addr, size_t size, size_t pad, size_t alignment,
     bool slab, szind_t szind, bool *zero, bool *commit);
@@ -56,4 +89,4 @@ void extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
     size_t *nfree, size_t *nregs, size_t *size,
     size_t *bin_nfree, size_t *bin_nregs, void **slabcur_addr);
 
-#endif /* JEMALLOC_INTERNAL_EXTENT_EXTERNS_H */
+#endif /* JEMALLOC_INTERNAL_EXTENT2_H */
diff --git a/include/jemalloc/internal/extent_inlines.h b/include/jemalloc/internal/extent_inlines.h
deleted file mode 100644
index 2647df8a..00000000
--- a/include/jemalloc/internal/extent_inlines.h
+++ /dev/null
@@ -1,38 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_EXTENT_INLINES_H
-#define JEMALLOC_INTERNAL_EXTENT_INLINES_H
-
-#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/mutex_pool.h"
-#include "jemalloc/internal/pages.h"
-#include "jemalloc/internal/prng.h"
-#include "jemalloc/internal/ql.h"
-#include "jemalloc/internal/sc.h"
-#include "jemalloc/internal/sz.h"
-
-static inline void
-extent_lock(tsdn_t *tsdn, extent_t *extent) {
-	assert(extent != NULL);
-	mutex_pool_lock(tsdn, &extent_mutex_pool, (uintptr_t)extent);
-}
-
-static inline void
-extent_unlock(tsdn_t *tsdn, extent_t *extent) {
-	assert(extent != NULL);
-	mutex_pool_unlock(tsdn, &extent_mutex_pool, (uintptr_t)extent);
-}
-
-static inline void
-extent_lock2(tsdn_t *tsdn, extent_t *extent1, extent_t *extent2) {
-	assert(extent1 != NULL && extent2 != NULL);
-	mutex_pool_lock2(tsdn, &extent_mutex_pool, (uintptr_t)extent1,
-	    (uintptr_t)extent2);
-}
-
-static inline void
-extent_unlock2(tsdn_t *tsdn, extent_t *extent1, extent_t *extent2) {
-	assert(extent1 != NULL && extent2 != NULL);
-	mutex_pool_unlock2(tsdn, &extent_mutex_pool, (uintptr_t)extent1,
-	    (uintptr_t)extent2);
-}
-
-#endif /* JEMALLOC_INTERNAL_EXTENT_INLINES_H */
diff --git a/include/jemalloc/internal/extent_structs.h b/include/jemalloc/internal/extent_structs.h
deleted file mode 100644
index 4e6e085c..00000000
--- a/include/jemalloc/internal/extent_structs.h
+++ /dev/null
@@ -1,33 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_EXTENT_STRUCTS_H
-#define JEMALLOC_INTERNAL_EXTENT_STRUCTS_H
-
-#include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/bitmap.h"
-#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/ql.h"
-#include "jemalloc/internal/ph.h"
-#include "jemalloc/internal/sc.h"
-#include "jemalloc/internal/slab_data.h"
-
-/*
- * The following two structs are for experimental purposes. See
- * experimental_utilization_query_ctl and
- * experimental_utilization_batch_query_ctl in src/ctl.c.
- */
-
-struct extent_util_stats_s {
-	size_t nfree;
-	size_t nregs;
-	size_t size;
-};
-
-struct extent_util_stats_verbose_s {
-	void *slabcur_addr;
-	size_t nfree;
-	size_t nregs;
-	size_t size;
-	size_t bin_nfree;
-	size_t bin_nregs;
-};
-
-#endif /* JEMALLOC_INTERNAL_EXTENT_STRUCTS_H */
diff --git a/include/jemalloc/internal/extent_types.h b/include/jemalloc/internal/extent_types.h
deleted file mode 100644
index 25b360eb..00000000
--- a/include/jemalloc/internal/extent_types.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_EXTENT_TYPES_H
-#define JEMALLOC_INTERNAL_EXTENT_TYPES_H
-
-typedef struct extent_util_stats_s extent_util_stats_t;
-typedef struct extent_util_stats_verbose_s extent_util_stats_verbose_t;
-
-/*
- * When reuse (and split) an active extent, (1U << opt_lg_extent_max_active_fit)
- * is the max ratio between the size of the active extent and the new extent.
- */
-#define LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT 6
-
-#endif /* JEMALLOC_INTERNAL_EXTENT_TYPES_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index 6755b43e..75a94d3a 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -40,7 +40,6 @@
 /* TYPES */
 /******************************************************************************/
 
-#include "jemalloc/internal/extent_types.h"
 #include "jemalloc/internal/base_types.h"
 #include "jemalloc/internal/arena_types.h"
 #include "jemalloc/internal/tcache_types.h"
@@ -61,7 +60,6 @@
 /******************************************************************************/
 
 #include "jemalloc/internal/jemalloc_internal_externs.h"
-#include "jemalloc/internal/extent_externs.h"
 #include "jemalloc/internal/base_externs.h"
 #include "jemalloc/internal/arena_externs.h"
 #include "jemalloc/internal/large_externs.h"
@@ -81,7 +79,6 @@
  */
 #include "jemalloc/internal/prof_inlines_a.h"
 #include "jemalloc/internal/arena_inlines_a.h"
-#include "jemalloc/internal/extent_inlines.h"
 #include "jemalloc/internal/jemalloc_internal_inlines_b.h"
 #include "jemalloc/internal/tcache_inlines.h"
 #include "jemalloc/internal/arena_inlines_b.h"
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index f0b73d02..d4cb04c2 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_INLINES_B_H
 #define JEMALLOC_INTERNAL_INLINES_B_H
 
+#include "jemalloc/internal/extent2.h"
 #include "jemalloc/internal/rtree.h"
 
 /* Choose an arena based on a per-thread value. */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index e6803120..4118b911 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -47,6 +47,7 @@
     <ClCompile Include="..\..\..\..\src\ehooks.c" />
     <ClCompile Include="..\..\..\..\src\eset.c" />
     <ClCompile Include="..\..\..\..\src\extent.c" />
+    <ClCompile Include="..\..\..\..\src\extent2.c" />
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
     <ClCompile Include="..\..\..\..\src\hash.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index ce51930a..ed3b5248 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -47,6 +47,7 @@
     <ClCompile Include="..\..\..\..\src\ehooks.c" />
     <ClCompile Include="..\..\..\..\src\eset.c" />
     <ClCompile Include="..\..\..\..\src\extent.c" />
+    <ClCompile Include="..\..\..\..\src\extent2.c" />
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
     <ClCompile Include="..\..\..\..\src\hash.c" />
diff --git a/src/extent.c b/src/extent.c
index ea7b8f2c..1a5a1fa6 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1,1722 +1,6 @@
-#define JEMALLOC_EXTENT_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
-#include "jemalloc/internal/assert.h"
-#include "jemalloc/internal/extent_dss.h"
-#include "jemalloc/internal/extent_mmap.h"
-#include "jemalloc/internal/ph.h"
-#include "jemalloc/internal/rtree.h"
-#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/mutex_pool.h"
-
-/******************************************************************************/
-/* Data. */
-
-rtree_t		extents_rtree;
-/* Keyed by the address of the extent_t being protected. */
-mutex_pool_t	extent_mutex_pool;
-
-size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT;
-
-static bool extent_commit_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length, bool growing_retained);
-static bool extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena,
-    ehooks_t *ehooks, extent_t *extent, size_t offset, size_t length,
-    bool growing_retained);
-static bool extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena,
-    ehooks_t *ehooks, extent_t *extent, size_t offset, size_t length,
-    bool growing_retained);
-static extent_t *extent_split_impl(tsdn_t *tsdn, arena_t *arena,
-    ehooks_t *ehooks, extent_t *extent, size_t size_a, szind_t szind_a,
-    bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
-    bool growing_retained);
-static bool extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *a, extent_t *b, bool growing_retained);
-
-/* Used exclusively for gdump triggering. */
-static atomic_zu_t curpages;
-static atomic_zu_t highpages;
-
-/******************************************************************************/
-/*
- * Function prototypes for static functions that are referenced prior to
- * definition.
- */
-
-static void extent_deregister(tsdn_t *tsdn, extent_t *extent);
-static extent_t *extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    eset_t *eset, void *new_addr, size_t usize, size_t pad, size_t alignment,
-    bool slab, szind_t szind, bool *zero, bool *commit, bool growing_retained);
-static extent_t *extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
-    ehooks_t *ehooks, rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent,
-    bool *coalesced, bool growing_retained);
-static void extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    eset_t *eset, extent_t *extent, bool growing_retained);
-
-/******************************************************************************/
-
-#define ATTR_NONE /* does nothing */
-
-ph_gen(ATTR_NONE, extent_avail_, extent_tree_t, extent_t, ph_link,
+ph_gen(, extent_avail_, extent_tree_t, extent_t, ph_link,
     extent_esnead_comp)
-
-#undef ATTR_NONE
-
-typedef enum {
-	lock_result_success,
-	lock_result_failure,
-	lock_result_no_extent
-} lock_result_t;
-
-static lock_result_t
-extent_rtree_leaf_elm_try_lock(tsdn_t *tsdn, rtree_leaf_elm_t *elm,
-    extent_t **result, bool inactive_only) {
-	extent_t *extent1 = rtree_leaf_elm_extent_read(tsdn, &extents_rtree,
-	    elm, true);
-
-	/* Slab implies active extents and should be skipped. */
-	if (extent1 == NULL || (inactive_only && rtree_leaf_elm_slab_read(tsdn,
-	    &extents_rtree, elm, true))) {
-		return lock_result_no_extent;
-	}
-
-	/*
-	 * It's possible that the extent changed out from under us, and with it
-	 * the leaf->extent mapping.  We have to recheck while holding the lock.
-	 */
-	extent_lock(tsdn, extent1);
-	extent_t *extent2 = rtree_leaf_elm_extent_read(tsdn,
-	    &extents_rtree, elm, true);
-
-	if (extent1 == extent2) {
-		*result = extent1;
-		return lock_result_success;
-	} else {
-		extent_unlock(tsdn, extent1);
-		return lock_result_failure;
-	}
-}
-
-/*
- * Returns a pool-locked extent_t * if there's one associated with the given
- * address, and NULL otherwise.
- */
-static extent_t *
-extent_lock_from_addr(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, void *addr,
-    bool inactive_only) {
-	extent_t *ret = NULL;
-	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, &extents_rtree,
-	    rtree_ctx, (uintptr_t)addr, false, false);
-	if (elm == NULL) {
-		return NULL;
-	}
-	lock_result_t lock_result;
-	do {
-		lock_result = extent_rtree_leaf_elm_try_lock(tsdn, elm, &ret,
-		    inactive_only);
-	} while (lock_result == lock_result_failure);
-	return ret;
-}
-
-static void
-extent_addr_randomize(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
-    size_t alignment) {
-	assert(extent_base_get(extent) == extent_addr_get(extent));
-
-	if (alignment < PAGE) {
-		unsigned lg_range = LG_PAGE -
-		    lg_floor(CACHELINE_CEILING(alignment));
-		size_t r;
-		if (!tsdn_null(tsdn)) {
-			tsd_t *tsd = tsdn_tsd(tsdn);
-			r = (size_t)prng_lg_range_u64(
-			    tsd_prng_statep_get(tsd), lg_range);
-		} else {
-			uint64_t stack_value = (uint64_t)(uintptr_t)&r;
-			r = (size_t)prng_lg_range_u64(&stack_value, lg_range);
-		}
-		uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE -
-		    lg_range);
-		extent->e_addr = (void *)((uintptr_t)extent->e_addr +
-		    random_offset);
-		assert(ALIGNMENT_ADDR2BASE(extent->e_addr, alignment) ==
-		    extent->e_addr);
-	}
-}
-
-extent_t *
-extent_alloc(tsdn_t *tsdn, arena_t *arena) {
-	malloc_mutex_lock(tsdn, &arena->extent_avail_mtx);
-	extent_t *extent = extent_avail_first(&arena->extent_avail);
-	if (extent == NULL) {
-		malloc_mutex_unlock(tsdn, &arena->extent_avail_mtx);
-		return base_alloc_extent(tsdn, arena->base);
-	}
-	extent_avail_remove(&arena->extent_avail, extent);
-	atomic_fetch_sub_zu(&arena->extent_avail_cnt, 1, ATOMIC_RELAXED);
-	malloc_mutex_unlock(tsdn, &arena->extent_avail_mtx);
-	return extent;
-}
-
-void
-extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
-	malloc_mutex_lock(tsdn, &arena->extent_avail_mtx);
-	extent_avail_insert(&arena->extent_avail, extent);
-	atomic_fetch_add_zu(&arena->extent_avail_cnt, 1, ATOMIC_RELAXED);
-	malloc_mutex_unlock(tsdn, &arena->extent_avail_mtx);
-}
-
 ph_gen(, extent_heap_, extent_heap_t, extent_t, ph_link, extent_snad_comp)
-
-static bool
-extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent) {
-	extent_state_set(extent, extent_state_active);
-	bool coalesced;
-	extent = extent_try_coalesce(tsdn, arena, ehooks, rtree_ctx, eset,
-	    extent, &coalesced, false);
-	extent_state_set(extent, eset_state_get(eset));
-
-	if (!coalesced) {
-		return true;
-	}
-	eset_insert_locked(tsdn, eset, extent);
-	return false;
-}
-
-extent_t *
-extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
-    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    szind_t szind, bool *zero, bool *commit) {
-	assert(size + pad != 0);
-	assert(alignment != 0);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
-	extent_t *extent = extent_recycle(tsdn, arena, ehooks, eset, new_addr,
-	    size, pad, alignment, slab, szind, zero, commit, false);
-	assert(extent == NULL || extent_dumpable_get(extent));
-	return extent;
-}
-
-void
-extents_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
-    extent_t *extent) {
-	assert(extent_base_get(extent) != NULL);
-	assert(extent_size_get(extent) != 0);
-	assert(extent_dumpable_get(extent));
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
-	extent_addr_set(extent, extent_base_get(extent));
-	extent_zeroed_set(extent, false);
-
-	extent_record(tsdn, arena, ehooks, eset, extent, false);
-}
-
-extent_t *
-extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
-    size_t npages_min) {
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
-	malloc_mutex_lock(tsdn, &eset->mtx);
-
-	/*
-	 * Get the LRU coalesced extent, if any.  If coalescing was delayed,
-	 * the loop will iterate until the LRU extent is fully coalesced.
-	 */
-	extent_t *extent;
-	while (true) {
-		/* Get the LRU extent, if any. */
-		extent = extent_list_first(&eset->lru);
-		if (extent == NULL) {
-			goto label_return;
-		}
-		/* Check the eviction limit. */
-		size_t extents_npages = atomic_load_zu(&eset->npages,
-		    ATOMIC_RELAXED);
-		if (extents_npages <= npages_min) {
-			extent = NULL;
-			goto label_return;
-		}
-		eset_remove_locked(tsdn, eset, extent);
-		if (!eset->delay_coalesce) {
-			break;
-		}
-		/* Try to coalesce. */
-		if (extent_try_delayed_coalesce(tsdn, arena, ehooks, rtree_ctx,
-		    eset, extent)) {
-			break;
-		}
-		/*
-		 * The LRU extent was just coalesced and the result placed in
-		 * the LRU at its neighbor's position.  Start over.
-		 */
-	}
-
-	/*
-	 * Either mark the extent active or deregister it to protect against
-	 * concurrent operations.
-	 */
-	switch (eset_state_get(eset)) {
-	case extent_state_active:
-		not_reached();
-	case extent_state_dirty:
-	case extent_state_muzzy:
-		extent_state_set(extent, extent_state_active);
-		break;
-	case extent_state_retained:
-		extent_deregister(tsdn, extent);
-		break;
-	default:
-		not_reached();
-	}
-
-label_return:
-	malloc_mutex_unlock(tsdn, &eset->mtx);
-	return extent;
-}
-
-/*
- * This can only happen when we fail to allocate a new extent struct (which
- * indicates OOM), e.g. when trying to split an existing extent.
- */
-static void
-extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
-    extent_t *extent, bool growing_retained) {
-	size_t sz = extent_size_get(extent);
-	if (config_stats) {
-		arena_stats_accum_zu(&arena->stats.abandoned_vm, sz);
-	}
-	/*
-	 * Leak extent after making sure its pages have already been purged, so
-	 * that this is only a virtual memory leak.
-	 */
-	if (eset_state_get(eset) == extent_state_dirty) {
-		if (extent_purge_lazy_impl(tsdn, arena, ehooks, extent, 0, sz,
-		    growing_retained)) {
-			extent_purge_forced_impl(tsdn, arena, ehooks, extent, 0,
-			    extent_size_get(extent), growing_retained);
-		}
-	}
-	extent_dalloc(tsdn, arena, extent);
-}
-
-static void
-extent_deactivate_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
-    extent_t *extent) {
-	assert(extent_arena_ind_get(extent) == arena_ind_get(arena));
-	assert(extent_state_get(extent) == extent_state_active);
-
-	extent_state_set(extent, eset_state_get(eset));
-	eset_insert_locked(tsdn, eset, extent);
-}
-
-static void
-extent_deactivate(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
-    extent_t *extent) {
-	malloc_mutex_lock(tsdn, &eset->mtx);
-	extent_deactivate_locked(tsdn, arena, eset, extent);
-	malloc_mutex_unlock(tsdn, &eset->mtx);
-}
-
-static void
-extent_activate_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
-    extent_t *extent) {
-	assert(extent_arena_ind_get(extent) == arena_ind_get(arena));
-	assert(extent_state_get(extent) == eset_state_get(eset));
-
-	eset_remove_locked(tsdn, eset, extent);
-	extent_state_set(extent, extent_state_active);
-}
-
-static bool
-extent_rtree_leaf_elms_lookup(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
-    const extent_t *extent, bool dependent, bool init_missing,
-    rtree_leaf_elm_t **r_elm_a, rtree_leaf_elm_t **r_elm_b) {
-	*r_elm_a = rtree_leaf_elm_lookup(tsdn, &extents_rtree, rtree_ctx,
-	    (uintptr_t)extent_base_get(extent), dependent, init_missing);
-	if (!dependent && *r_elm_a == NULL) {
-		return true;
-	}
-	assert(*r_elm_a != NULL);
-
-	*r_elm_b = rtree_leaf_elm_lookup(tsdn, &extents_rtree, rtree_ctx,
-	    (uintptr_t)extent_last_get(extent), dependent, init_missing);
-	if (!dependent && *r_elm_b == NULL) {
-		return true;
-	}
-	assert(*r_elm_b != NULL);
-
-	return false;
-}
-
-static void
-extent_rtree_write_acquired(tsdn_t *tsdn, rtree_leaf_elm_t *elm_a,
-    rtree_leaf_elm_t *elm_b, extent_t *extent, szind_t szind, bool slab) {
-	rtree_leaf_elm_write(tsdn, &extents_rtree, elm_a, extent, szind, slab);
-	if (elm_b != NULL) {
-		rtree_leaf_elm_write(tsdn, &extents_rtree, elm_b, extent, szind,
-		    slab);
-	}
-}
-
-static void
-extent_interior_register(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, extent_t *extent,
-    szind_t szind) {
-	assert(extent_slab_get(extent));
-
-	/* Register interior. */
-	for (size_t i = 1; i < (extent_size_get(extent) >> LG_PAGE) - 1; i++) {
-		rtree_write(tsdn, &extents_rtree, rtree_ctx,
-		    (uintptr_t)extent_base_get(extent) + (uintptr_t)(i <<
-		    LG_PAGE), extent, szind, true);
-	}
-}
-
-static void
-extent_gdump_add(tsdn_t *tsdn, const extent_t *extent) {
-	cassert(config_prof);
-	/* prof_gdump() requirement. */
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
-	if (opt_prof && extent_state_get(extent) == extent_state_active) {
-		size_t nadd = extent_size_get(extent) >> LG_PAGE;
-		size_t cur = atomic_fetch_add_zu(&curpages, nadd,
-		    ATOMIC_RELAXED) + nadd;
-		size_t high = atomic_load_zu(&highpages, ATOMIC_RELAXED);
-		while (cur > high && !atomic_compare_exchange_weak_zu(
-		    &highpages, &high, cur, ATOMIC_RELAXED, ATOMIC_RELAXED)) {
-			/*
-			 * Don't refresh cur, because it may have decreased
-			 * since this thread lost the highpages update race.
-			 * Note that high is updated in case of CAS failure.
-			 */
-		}
-		if (cur > high && prof_gdump_get_unlocked()) {
-			prof_gdump(tsdn);
-		}
-	}
-}
-
-static void
-extent_gdump_sub(tsdn_t *tsdn, const extent_t *extent) {
-	cassert(config_prof);
-
-	if (opt_prof && extent_state_get(extent) == extent_state_active) {
-		size_t nsub = extent_size_get(extent) >> LG_PAGE;
-		assert(atomic_load_zu(&curpages, ATOMIC_RELAXED) >= nsub);
-		atomic_fetch_sub_zu(&curpages, nsub, ATOMIC_RELAXED);
-	}
-}
-
-static bool
-extent_register_impl(tsdn_t *tsdn, extent_t *extent, bool gdump_add) {
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-	rtree_leaf_elm_t *elm_a, *elm_b;
-
-	/*
-	 * We need to hold the lock to protect against a concurrent coalesce
-	 * operation that sees us in a partial state.
-	 */
-	extent_lock(tsdn, extent);
-
-	if (extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, extent, false, true,
-	    &elm_a, &elm_b)) {
-		extent_unlock(tsdn, extent);
-		return true;
-	}
-
-	szind_t szind = extent_szind_get_maybe_invalid(extent);
-	bool slab = extent_slab_get(extent);
-	extent_rtree_write_acquired(tsdn, elm_a, elm_b, extent, szind, slab);
-	if (slab) {
-		extent_interior_register(tsdn, rtree_ctx, extent, szind);
-	}
-
-	extent_unlock(tsdn, extent);
-
-	if (config_prof && gdump_add) {
-		extent_gdump_add(tsdn, extent);
-	}
-
-	return false;
-}
-
-static bool
-extent_register(tsdn_t *tsdn, extent_t *extent) {
-	return extent_register_impl(tsdn, extent, true);
-}
-
-static bool
-extent_register_no_gdump_add(tsdn_t *tsdn, extent_t *extent) {
-	return extent_register_impl(tsdn, extent, false);
-}
-
-static void
-extent_reregister(tsdn_t *tsdn, extent_t *extent) {
-	bool err = extent_register(tsdn, extent);
-	assert(!err);
-}
-
-/*
- * Removes all pointers to the given extent from the global rtree indices for
- * its interior.  This is relevant for slab extents, for which we need to do
- * metadata lookups at places other than the head of the extent.  We deregister
- * on the interior, then, when an extent moves from being an active slab to an
- * inactive state.
- */
-static void
-extent_interior_deregister(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
-    extent_t *extent) {
-	size_t i;
-
-	assert(extent_slab_get(extent));
-
-	for (i = 1; i < (extent_size_get(extent) >> LG_PAGE) - 1; i++) {
-		rtree_clear(tsdn, &extents_rtree, rtree_ctx,
-		    (uintptr_t)extent_base_get(extent) + (uintptr_t)(i <<
-		    LG_PAGE));
-	}
-}
-
-/*
- * Removes all pointers to the given extent from the global rtree.
- */
-static void
-extent_deregister_impl(tsdn_t *tsdn, extent_t *extent, bool gdump) {
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-	rtree_leaf_elm_t *elm_a, *elm_b;
-	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, extent, true, false,
-	    &elm_a, &elm_b);
-
-	extent_lock(tsdn, extent);
-
-	extent_rtree_write_acquired(tsdn, elm_a, elm_b, NULL, SC_NSIZES, false);
-	if (extent_slab_get(extent)) {
-		extent_interior_deregister(tsdn, rtree_ctx, extent);
-		extent_slab_set(extent, false);
-	}
-
-	extent_unlock(tsdn, extent);
-
-	if (config_prof && gdump) {
-		extent_gdump_sub(tsdn, extent);
-	}
-}
-
-static void
-extent_deregister(tsdn_t *tsdn, extent_t *extent) {
-	extent_deregister_impl(tsdn, extent, true);
-}
-
-static void
-extent_deregister_no_gdump_sub(tsdn_t *tsdn, extent_t *extent) {
-	extent_deregister_impl(tsdn, extent, false);
-}
-
-/*
- * Tries to find and remove an extent from eset that can be used for the
- * given allocation request.
- */
-static extent_t *
-extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, eset_t *eset, void *new_addr, size_t size,
-    size_t pad, size_t alignment, bool slab, bool growing_retained) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-	assert(alignment > 0);
-	if (config_debug && new_addr != NULL) {
-		/*
-		 * Non-NULL new_addr has two use cases:
-		 *
-		 *   1) Recycle a known-extant extent, e.g. during purging.
-		 *   2) Perform in-place expanding reallocation.
-		 *
-		 * Regardless of use case, new_addr must either refer to a
-		 * non-existing extent, or to the base of an extant extent,
-		 * since only active slabs support interior lookups (which of
-		 * course cannot be recycled).
-		 */
-		assert(PAGE_ADDR2BASE(new_addr) == new_addr);
-		assert(pad == 0);
-		assert(alignment <= PAGE);
-	}
-
-	size_t esize = size + pad;
-	malloc_mutex_lock(tsdn, &eset->mtx);
-	extent_t *extent;
-	if (new_addr != NULL) {
-		extent = extent_lock_from_addr(tsdn, rtree_ctx, new_addr,
-		    false);
-		if (extent != NULL) {
-			/*
-			 * We might null-out extent to report an error, but we
-			 * still need to unlock the associated mutex after.
-			 */
-			extent_t *unlock_extent = extent;
-			assert(extent_base_get(extent) == new_addr);
-			if (extent_arena_ind_get(extent)
-			    != arena_ind_get(arena) ||
-			    extent_size_get(extent) < esize ||
-			    extent_state_get(extent) !=
-			    eset_state_get(eset)) {
-				extent = NULL;
-			}
-			extent_unlock(tsdn, unlock_extent);
-		}
-	} else {
-		extent = eset_fit_locked(tsdn, eset, esize, alignment);
-	}
-	if (extent == NULL) {
-		malloc_mutex_unlock(tsdn, &eset->mtx);
-		return NULL;
-	}
-
-	extent_activate_locked(tsdn, arena, eset, extent);
-	malloc_mutex_unlock(tsdn, &eset->mtx);
-
-	return extent;
-}
-
-/*
- * Given an allocation request and an extent guaranteed to be able to satisfy
- * it, this splits off lead and trail extents, leaving extent pointing to an
- * extent satisfying the allocation.
- * This function doesn't put lead or trail into any eset_t; it's the caller's
- * job to ensure that they can be reused.
- */
-typedef enum {
-	/*
-	 * Split successfully.  lead, extent, and trail, are modified to extents
-	 * describing the ranges before, in, and after the given allocation.
-	 */
-	extent_split_interior_ok,
-	/*
-	 * The extent can't satisfy the given allocation request.  None of the
-	 * input extent_t *s are touched.
-	 */
-	extent_split_interior_cant_alloc,
-	/*
-	 * In a potentially invalid state.  Must leak (if *to_leak is non-NULL),
-	 * and salvage what's still salvageable (if *to_salvage is non-NULL).
-	 * None of lead, extent, or trail are valid.
-	 */
-	extent_split_interior_error
-} extent_split_interior_result_t;
-
-static extent_split_interior_result_t
-extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx,
-    /* The result of splitting, in case of success. */
-    extent_t **extent, extent_t **lead, extent_t **trail,
-    /* The mess to clean up, in case of error. */
-    extent_t **to_leak, extent_t **to_salvage,
-    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    szind_t szind, bool growing_retained) {
-	size_t esize = size + pad;
-	size_t leadsize = ALIGNMENT_CEILING((uintptr_t)extent_base_get(*extent),
-	    PAGE_CEILING(alignment)) - (uintptr_t)extent_base_get(*extent);
-	assert(new_addr == NULL || leadsize == 0);
-	if (extent_size_get(*extent) < leadsize + esize) {
-		return extent_split_interior_cant_alloc;
-	}
-	size_t trailsize = extent_size_get(*extent) - leadsize - esize;
-
-	*lead = NULL;
-	*trail = NULL;
-	*to_leak = NULL;
-	*to_salvage = NULL;
-
-	/* Split the lead. */
-	if (leadsize != 0) {
-		*lead = *extent;
-		*extent = extent_split_impl(tsdn, arena, ehooks, *lead,
-		    leadsize, SC_NSIZES, false, esize + trailsize, szind, slab,
-		    growing_retained);
-		if (*extent == NULL) {
-			*to_leak = *lead;
-			*lead = NULL;
-			return extent_split_interior_error;
-		}
-	}
-
-	/* Split the trail. */
-	if (trailsize != 0) {
-		*trail = extent_split_impl(tsdn, arena, ehooks, *extent, esize,
-		    szind, slab, trailsize, SC_NSIZES, false, growing_retained);
-		if (*trail == NULL) {
-			*to_leak = *extent;
-			*to_salvage = *lead;
-			*lead = NULL;
-			*extent = NULL;
-			return extent_split_interior_error;
-		}
-	}
-
-	if (leadsize == 0 && trailsize == 0) {
-		/*
-		 * Splitting causes szind to be set as a side effect, but no
-		 * splitting occurred.
-		 */
-		extent_szind_set(*extent, szind);
-		if (szind != SC_NSIZES) {
-			rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx,
-			    (uintptr_t)extent_addr_get(*extent), szind, slab);
-			if (slab && extent_size_get(*extent) > PAGE) {
-				rtree_szind_slab_update(tsdn, &extents_rtree,
-				    rtree_ctx,
-				    (uintptr_t)extent_past_get(*extent) -
-				    (uintptr_t)PAGE, szind, slab);
-			}
-		}
-	}
-
-	return extent_split_interior_ok;
-}
-
-/*
- * This fulfills the indicated allocation request out of the given extent (which
- * the caller should have ensured was big enough).  If there's any unused space
- * before or after the resulting allocation, that space is given its own extent
- * and put back into eset.
- */
-static extent_t *
-extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, eset_t *eset, void *new_addr, size_t size,
-    size_t pad, size_t alignment, bool slab, szind_t szind, extent_t *extent,
-    bool growing_retained) {
-	extent_t *lead;
-	extent_t *trail;
-	extent_t *to_leak;
-	extent_t *to_salvage;
-
-	extent_split_interior_result_t result = extent_split_interior(
-	    tsdn, arena, ehooks, rtree_ctx, &extent, &lead, &trail, &to_leak,
-	    &to_salvage, new_addr, size, pad, alignment, slab, szind,
-	    growing_retained);
-
-	if (!maps_coalesce && result != extent_split_interior_ok
-	    && !opt_retain) {
-		/*
-		 * Split isn't supported (implies Windows w/o retain).  Avoid
-		 * leaking the eset.
-		 */
-		assert(to_leak != NULL && lead == NULL && trail == NULL);
-		extent_deactivate(tsdn, arena, eset, to_leak);
-		return NULL;
-	}
-
-	if (result == extent_split_interior_ok) {
-		if (lead != NULL) {
-			extent_deactivate(tsdn, arena, eset, lead);
-		}
-		if (trail != NULL) {
-			extent_deactivate(tsdn, arena, eset, trail);
-		}
-		return extent;
-	} else {
-		/*
-		 * We should have picked an extent that was large enough to
-		 * fulfill our allocation request.
-		 */
-		assert(result == extent_split_interior_error);
-		if (to_salvage != NULL) {
-			extent_deregister(tsdn, to_salvage);
-		}
-		if (to_leak != NULL) {
-			void *leak = extent_base_get(to_leak);
-			extent_deregister_no_gdump_sub(tsdn, to_leak);
-			extents_abandon_vm(tsdn, arena, ehooks, eset, to_leak,
-			    growing_retained);
-			assert(extent_lock_from_addr(tsdn, rtree_ctx, leak,
-			    false) == NULL);
-		}
-		return NULL;
-	}
-	unreachable();
-}
-
-static bool
-extent_need_manual_zero(arena_t *arena) {
-	/*
-	 * Need to manually zero the extent on repopulating if either; 1) non
-	 * default extent hooks installed (in which case the purge semantics may
-	 * change); or 2) transparent huge pages enabled.
-	 */
-	return (!ehooks_are_default(arena_get_ehooks(arena)) ||
-		(opt_thp == thp_mode_always));
-}
-
-/*
- * Tries to satisfy the given allocation request by reusing one of the extents
- * in the given eset_t.
- */
-static extent_t *
-extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
-    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    szind_t szind, bool *zero, bool *commit, bool growing_retained) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-	assert(new_addr == NULL || !slab);
-	assert(pad == 0 || !slab);
-	assert(!*zero || !slab);
-
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
-	extent_t *extent = extent_recycle_extract(tsdn, arena, ehooks,
-	    rtree_ctx, eset, new_addr, size, pad, alignment, slab,
-	    growing_retained);
-	if (extent == NULL) {
-		return NULL;
-	}
-
-	extent = extent_recycle_split(tsdn, arena, ehooks, rtree_ctx, eset,
-	    new_addr, size, pad, alignment, slab, szind, extent,
-	    growing_retained);
-	if (extent == NULL) {
-		return NULL;
-	}
-
-	if (*commit && !extent_committed_get(extent)) {
-		if (extent_commit_impl(tsdn, arena, ehooks, extent, 0,
-		    extent_size_get(extent), growing_retained)) {
-			extent_record(tsdn, arena, ehooks, eset, extent,
-			    growing_retained);
-			return NULL;
-		}
-		if (!extent_need_manual_zero(arena)) {
-			extent_zeroed_set(extent, true);
-		}
-	}
-
-	if (extent_committed_get(extent)) {
-		*commit = true;
-	}
-	if (extent_zeroed_get(extent)) {
-		*zero = true;
-	}
-
-	if (pad != 0) {
-		extent_addr_randomize(tsdn, arena, extent, alignment);
-	}
-	assert(extent_state_get(extent) == extent_state_active);
-	if (slab) {
-		extent_slab_set(extent, slab);
-		extent_interior_register(tsdn, rtree_ctx, extent, szind);
-	}
-
-	if (*zero) {
-		void *addr = extent_base_get(extent);
-		if (!extent_zeroed_get(extent)) {
-			size_t size = extent_size_get(extent);
-			if (extent_need_manual_zero(arena) ||
-			    pages_purge_forced(addr, size)) {
-				memset(addr, 0, size);
-			}
-		} else if (config_debug) {
-			size_t *p = (size_t *)(uintptr_t)addr;
-			/* Check the first page only. */
-			for (size_t i = 0; i < PAGE / sizeof(size_t); i++) {
-				assert(p[i] == 0);
-			}
-		}
-	}
-	return extent;
-}
-
-/*
- * If virtual memory is retained, create increasingly larger extents from which
- * to split requested extents in order to limit the total number of disjoint
- * virtual memory ranges retained by each arena.
- */
-static extent_t *
-extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    size_t size, size_t pad, size_t alignment, bool slab, szind_t szind,
-    bool *zero, bool *commit) {
-	malloc_mutex_assert_owner(tsdn, &arena->extent_grow_mtx);
-	assert(pad == 0 || !slab);
-	assert(!*zero || !slab);
-
-	size_t esize = size + pad;
-	size_t alloc_size_min = esize + PAGE_CEILING(alignment) - PAGE;
-	/* Beware size_t wrap-around. */
-	if (alloc_size_min < esize) {
-		goto label_err;
-	}
-	/*
-	 * Find the next extent size in the series that would be large enough to
-	 * satisfy this request.
-	 */
-	pszind_t egn_skip = 0;
-	size_t alloc_size = sz_pind2sz(arena->extent_grow_next + egn_skip);
-	while (alloc_size < alloc_size_min) {
-		egn_skip++;
-		if (arena->extent_grow_next + egn_skip >=
-		    sz_psz2ind(SC_LARGE_MAXCLASS)) {
-			/* Outside legal range. */
-			goto label_err;
-		}
-		alloc_size = sz_pind2sz(arena->extent_grow_next + egn_skip);
-	}
-
-	extent_t *extent = extent_alloc(tsdn, arena);
-	if (extent == NULL) {
-		goto label_err;
-	}
-	bool zeroed = false;
-	bool committed = false;
-
-	void *ptr = ehooks_alloc(tsdn, ehooks, NULL, alloc_size, PAGE, &zeroed,
-	    &committed, arena_ind_get(arena));
-
-	extent_init(extent, arena_ind_get(arena), ptr, alloc_size, false,
-	    SC_NSIZES, arena_extent_sn_next(arena), extent_state_active, zeroed,
-	    committed, true, EXTENT_IS_HEAD);
-	if (ptr == NULL) {
-		extent_dalloc(tsdn, arena, extent);
-		goto label_err;
-	}
-
-	if (extent_register_no_gdump_add(tsdn, extent)) {
-		extent_dalloc(tsdn, arena, extent);
-		goto label_err;
-	}
-
-	if (extent_zeroed_get(extent) && extent_committed_get(extent)) {
-		*zero = true;
-	}
-	if (extent_committed_get(extent)) {
-		*commit = true;
-	}
-
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
-	extent_t *lead;
-	extent_t *trail;
-	extent_t *to_leak;
-	extent_t *to_salvage;
-	extent_split_interior_result_t result = extent_split_interior(tsdn,
-	    arena, ehooks, rtree_ctx, &extent, &lead, &trail, &to_leak,
-	    &to_salvage, NULL, size, pad, alignment, slab, szind, true);
-
-	if (result == extent_split_interior_ok) {
-		if (lead != NULL) {
-			extent_record(tsdn, arena, ehooks,
-			    &arena->eset_retained, lead, true);
-		}
-		if (trail != NULL) {
-			extent_record(tsdn, arena, ehooks,
-			    &arena->eset_retained, trail, true);
-		}
-	} else {
-		/*
-		 * We should have allocated a sufficiently large extent; the
-		 * cant_alloc case should not occur.
-		 */
-		assert(result == extent_split_interior_error);
-		if (to_salvage != NULL) {
-			if (config_prof) {
-				extent_gdump_add(tsdn, to_salvage);
-			}
-			extent_record(tsdn, arena, ehooks,
-			    &arena->eset_retained, to_salvage, true);
-		}
-		if (to_leak != NULL) {
-			extent_deregister_no_gdump_sub(tsdn, to_leak);
-			extents_abandon_vm(tsdn, arena, ehooks,
-			    &arena->eset_retained, to_leak, true);
-		}
-		goto label_err;
-	}
-
-	if (*commit && !extent_committed_get(extent)) {
-		if (extent_commit_impl(tsdn, arena, ehooks, extent, 0,
-		    extent_size_get(extent), true)) {
-			extent_record(tsdn, arena, ehooks,
-			    &arena->eset_retained, extent, true);
-			goto label_err;
-		}
-		if (!extent_need_manual_zero(arena)) {
-			extent_zeroed_set(extent, true);
-		}
-	}
-
-	/*
-	 * Increment extent_grow_next if doing so wouldn't exceed the allowed
-	 * range.
-	 */
-	if (arena->extent_grow_next + egn_skip + 1 <=
-	    arena->retain_grow_limit) {
-		arena->extent_grow_next += egn_skip + 1;
-	} else {
-		arena->extent_grow_next = arena->retain_grow_limit;
-	}
-	/* All opportunities for failure are past. */
-	malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
-
-	if (config_prof) {
-		/* Adjust gdump stats now that extent is final size. */
-		extent_gdump_add(tsdn, extent);
-	}
-	if (pad != 0) {
-		extent_addr_randomize(tsdn, arena, extent, alignment);
-	}
-	if (slab) {
-		rtree_ctx_t rtree_ctx_fallback;
-		rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn,
-		    &rtree_ctx_fallback);
-
-		extent_slab_set(extent, true);
-		extent_interior_register(tsdn, rtree_ctx, extent, szind);
-	}
-	if (*zero && !extent_zeroed_get(extent)) {
-		void *addr = extent_base_get(extent);
-		size_t size = extent_size_get(extent);
-		if (extent_need_manual_zero(arena) ||
-		    pages_purge_forced(addr, size)) {
-			memset(addr, 0, size);
-		}
-	}
-
-	return extent;
-label_err:
-	malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
-	return NULL;
-}
-
-static extent_t *
-extent_alloc_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    szind_t szind, bool *zero, bool *commit) {
-	assert(size != 0);
-	assert(alignment != 0);
-
-	malloc_mutex_lock(tsdn, &arena->extent_grow_mtx);
-
-	extent_t *extent = extent_recycle(tsdn, arena, ehooks,
-	    &arena->eset_retained, new_addr, size, pad, alignment, slab,
-	    szind, zero, commit, true);
-	if (extent != NULL) {
-		malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
-		if (config_prof) {
-			extent_gdump_add(tsdn, extent);
-		}
-	} else if (opt_retain && new_addr == NULL) {
-		extent = extent_grow_retained(tsdn, arena, ehooks, size, pad,
-		    alignment, slab, szind, zero, commit);
-		/* extent_grow_retained() always releases extent_grow_mtx. */
-	} else {
-		malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
-	}
-	malloc_mutex_assert_not_owner(tsdn, &arena->extent_grow_mtx);
-
-	return extent;
-}
-
-static extent_t *
-extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    szind_t szind, bool *zero, bool *commit) {
-	size_t esize = size + pad;
-	extent_t *extent = extent_alloc(tsdn, arena);
-	if (extent == NULL) {
-		return NULL;
-	}
-	size_t palignment = ALIGNMENT_CEILING(alignment, PAGE);
-	void *addr = ehooks_alloc(tsdn, ehooks, new_addr, esize, palignment,
-	    zero, commit, arena_ind_get(arena));
-	if (addr == NULL) {
-		extent_dalloc(tsdn, arena, extent);
-		return NULL;
-	}
-	extent_init(extent, arena_ind_get(arena), addr, esize, slab, szind,
-	    arena_extent_sn_next(arena), extent_state_active, *zero, *commit,
-	    true, EXTENT_NOT_HEAD);
-	if (pad != 0) {
-		extent_addr_randomize(tsdn, arena, extent, alignment);
-	}
-	if (extent_register(tsdn, extent)) {
-		extent_dalloc(tsdn, arena, extent);
-		return NULL;
-	}
-
-	return extent;
-}
-
-extent_t *
-extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    szind_t szind, bool *zero, bool *commit) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
-	extent_t *extent = extent_alloc_retained(tsdn, arena, ehooks, new_addr,
-	    size, pad, alignment, slab, szind, zero, commit);
-	if (extent == NULL) {
-		if (opt_retain && new_addr != NULL) {
-			/*
-			 * When retain is enabled and new_addr is set, we do not
-			 * attempt extent_alloc_wrapper_hard which does mmap
-			 * that is very unlikely to succeed (unless it happens
-			 * to be at the end).
-			 */
-			return NULL;
-		}
-		extent = extent_alloc_wrapper_hard(tsdn, arena, ehooks,
-		    new_addr, size, pad, alignment, slab, szind, zero, commit);
-	}
-
-	assert(extent == NULL || extent_dumpable_get(extent));
-	return extent;
-}
-
-static bool
-extent_can_coalesce(arena_t *arena, eset_t *eset, const extent_t *inner,
-    const extent_t *outer) {
-	assert(extent_arena_ind_get(inner) == arena_ind_get(arena));
-	if (extent_arena_ind_get(outer) != arena_ind_get(arena)) {
-		return false;
-	}
-
-	assert(extent_state_get(inner) == extent_state_active);
-	if (extent_state_get(outer) != eset->state) {
-		return false;
-	}
-
-	if (extent_committed_get(inner) != extent_committed_get(outer)) {
-		return false;
-	}
-
-	return true;
-}
-
-static bool
-extent_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
-    extent_t *inner, extent_t *outer, bool forward, bool growing_retained) {
-	assert(extent_can_coalesce(arena, eset, inner, outer));
-
-	extent_activate_locked(tsdn, arena, eset, outer);
-
-	malloc_mutex_unlock(tsdn, &eset->mtx);
-	bool err = extent_merge_impl(tsdn, arena, ehooks,
-	    forward ? inner : outer, forward ? outer : inner, growing_retained);
-	malloc_mutex_lock(tsdn, &eset->mtx);
-
-	if (err) {
-		extent_deactivate_locked(tsdn, arena, eset, outer);
-	}
-
-	return err;
-}
-
-static extent_t *
-extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent, bool *coalesced,
-    bool growing_retained, bool inactive_only) {
-	/*
-	 * We avoid checking / locking inactive neighbors for large size
-	 * classes, since they are eagerly coalesced on deallocation which can
-	 * cause lock contention.
-	 */
-	/*
-	 * Continue attempting to coalesce until failure, to protect against
-	 * races with other threads that are thwarted by this one.
-	 */
-	bool again;
-	do {
-		again = false;
-
-		/* Try to coalesce forward. */
-		extent_t *next = extent_lock_from_addr(tsdn, rtree_ctx,
-		    extent_past_get(extent), inactive_only);
-		if (next != NULL) {
-			/*
-			 * eset->mtx only protects against races for
-			 * like-state eset, so call extent_can_coalesce()
-			 * before releasing next's pool lock.
-			 */
-			bool can_coalesce = extent_can_coalesce(arena, eset,
-			    extent, next);
-
-			extent_unlock(tsdn, next);
-
-			if (can_coalesce && !extent_coalesce(tsdn, arena,
-			    ehooks, eset, extent, next, true,
-			    growing_retained)) {
-				if (eset->delay_coalesce) {
-					/* Do minimal coalescing. */
-					*coalesced = true;
-					return extent;
-				}
-				again = true;
-			}
-		}
-
-		/* Try to coalesce backward. */
-		extent_t *prev = extent_lock_from_addr(tsdn, rtree_ctx,
-		    extent_before_get(extent), inactive_only);
-		if (prev != NULL) {
-			bool can_coalesce = extent_can_coalesce(arena, eset,
-			    extent, prev);
-			extent_unlock(tsdn, prev);
-
-			if (can_coalesce && !extent_coalesce(tsdn, arena,
-			    ehooks, eset, extent, prev, false,
-			    growing_retained)) {
-				extent = prev;
-				if (eset->delay_coalesce) {
-					/* Do minimal coalescing. */
-					*coalesced = true;
-					return extent;
-				}
-				again = true;
-			}
-		}
-	} while (again);
-
-	if (eset->delay_coalesce) {
-		*coalesced = false;
-	}
-	return extent;
-}
-
-static extent_t *
-extent_try_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent, bool *coalesced,
-    bool growing_retained) {
-	return extent_try_coalesce_impl(tsdn, arena, ehooks, rtree_ctx, eset,
-	    extent, coalesced, growing_retained, false);
-}
-
-static extent_t *
-extent_try_coalesce_large(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent, bool *coalesced,
-    bool growing_retained) {
-	return extent_try_coalesce_impl(tsdn, arena, ehooks, rtree_ctx, eset,
-	    extent, coalesced, growing_retained, true);
-}
-
-/*
- * Does the metadata management portions of putting an unused extent into the
- * given eset_t (coalesces, deregisters slab interiors, the heap operations).
- */
-static void
-extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
-    extent_t *extent, bool growing_retained) {
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
-	assert((eset_state_get(eset) != extent_state_dirty &&
-	    eset_state_get(eset) != extent_state_muzzy) ||
-	    !extent_zeroed_get(extent));
-
-	malloc_mutex_lock(tsdn, &eset->mtx);
-
-	extent_szind_set(extent, SC_NSIZES);
-	if (extent_slab_get(extent)) {
-		extent_interior_deregister(tsdn, rtree_ctx, extent);
-		extent_slab_set(extent, false);
-	}
-
-	assert(rtree_extent_read(tsdn, &extents_rtree, rtree_ctx,
-	    (uintptr_t)extent_base_get(extent), true) == extent);
-
-	if (!eset->delay_coalesce) {
-		extent = extent_try_coalesce(tsdn, arena, ehooks, rtree_ctx,
-		    eset, extent, NULL, growing_retained);
-	} else if (extent_size_get(extent) >= SC_LARGE_MINCLASS) {
-		assert(eset == &arena->eset_dirty);
-		/* Always coalesce large eset eagerly. */
-		bool coalesced;
-		do {
-			assert(extent_state_get(extent) == extent_state_active);
-			extent = extent_try_coalesce_large(tsdn, arena, ehooks,
-			    rtree_ctx, eset, extent, &coalesced,
-			    growing_retained);
-		} while (coalesced);
-		if (extent_size_get(extent) >= oversize_threshold) {
-			/* Shortcut to purge the oversize extent eagerly. */
-			malloc_mutex_unlock(tsdn, &eset->mtx);
-			arena_decay_extent(tsdn, arena, ehooks, extent);
-			return;
-		}
-	}
-	extent_deactivate_locked(tsdn, arena, eset, extent);
-
-	malloc_mutex_unlock(tsdn, &eset->mtx);
-}
-
-void
-extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
-	ehooks_t *ehooks = arena_get_ehooks(arena);
-
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
-	if (extent_register(tsdn, extent)) {
-		extent_dalloc(tsdn, arena, extent);
-		return;
-	}
-	extent_dalloc_wrapper(tsdn, arena, ehooks, extent);
-}
-
-static bool
-extent_may_dalloc(void) {
-	/* With retain enabled, the default dalloc always fails. */
-	return !opt_retain;
-}
-
-static bool
-extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent) {
-	bool err;
-
-	assert(extent_base_get(extent) != NULL);
-	assert(extent_size_get(extent) != 0);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
-	extent_addr_set(extent, extent_base_get(extent));
-
-	/* Try to deallocate. */
-	err = ehooks_dalloc(tsdn, ehooks, extent_base_get(extent),
-	    extent_size_get(extent), extent_committed_get(extent),
-	    arena_ind_get(arena));
-
-	if (!err) {
-		extent_dalloc(tsdn, arena, extent);
-	}
-
-	return err;
-}
-
-void
-extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent) {
-	assert(extent_dumpable_get(extent));
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
-	/* Avoid calling the default extent_dalloc unless have to. */
-	if (!ehooks_are_default(ehooks) || extent_may_dalloc()) {
-		/*
-		 * Deregister first to avoid a race with other allocating
-		 * threads, and reregister if deallocation fails.
-		 */
-		extent_deregister(tsdn, extent);
-		if (!extent_dalloc_wrapper_try(tsdn, arena, ehooks, extent)) {
-			return;
-		}
-		extent_reregister(tsdn, extent);
-	}
-
-	/* Try to decommit; purge if that fails. */
-	bool zeroed;
-	if (!extent_committed_get(extent)) {
-		zeroed = true;
-	} else if (!extent_decommit_wrapper(tsdn, arena, ehooks, extent, 0,
-	    extent_size_get(extent))) {
-		zeroed = true;
-	} else if (!ehooks_purge_forced(tsdn, ehooks, extent_base_get(extent),
-	    extent_size_get(extent), 0, extent_size_get(extent),
-	    arena_ind_get(arena))) {
-		zeroed = true;
-	} else if (extent_state_get(extent) == extent_state_muzzy ||
-	    !ehooks_purge_lazy(tsdn, ehooks, extent_base_get(extent),
-	    extent_size_get(extent), 0, extent_size_get(extent),
-	    arena_ind_get(arena))) {
-		zeroed = false;
-	} else {
-		zeroed = false;
-	}
-	extent_zeroed_set(extent, zeroed);
-
-	if (config_prof) {
-		extent_gdump_sub(tsdn, extent);
-	}
-
-	extent_record(tsdn, arena, ehooks, &arena->eset_retained, extent,
-	    false);
-}
-
-void
-extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent) {
-	assert(extent_base_get(extent) != NULL);
-	assert(extent_size_get(extent) != 0);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
-	/* Deregister first to avoid a race with other allocating threads. */
-	extent_deregister(tsdn, extent);
-
-	extent_addr_set(extent, extent_base_get(extent));
-
-	/* Try to destroy; silently fail otherwise. */
-	ehooks_destroy(tsdn, ehooks, extent_base_get(extent),
-	    extent_size_get(extent), extent_committed_get(extent),
-	    arena_ind_get(arena));
-
-	extent_dalloc(tsdn, arena, extent);
-}
-
-static bool
-extent_commit_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length, bool growing_retained) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-	bool err = ehooks_commit(tsdn, ehooks, extent_base_get(extent),
-	    extent_size_get(extent), offset, length, arena_ind_get(arena));
-	extent_committed_set(extent, extent_committed_get(extent) || !err);
-	return err;
-}
-
-bool
-extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset,
-    size_t length) {
-	return extent_commit_impl(tsdn, arena, ehooks, extent, offset, length,
-	    false);
-}
-
-bool
-extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-	bool err = ehooks_decommit(tsdn, ehooks, extent_base_get(extent),
-	    extent_size_get(extent), offset, length, arena_ind_get(arena));
-	extent_committed_set(extent, extent_committed_get(extent) && err);
-	return err;
-}
-
-static bool
-extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length, bool growing_retained) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-	bool err = ehooks_purge_lazy(tsdn, ehooks, extent_base_get(extent),
-	    extent_size_get(extent), offset, length, arena_ind_get(arena));
-	return err;
-}
-
-bool
-extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length) {
-	return extent_purge_lazy_impl(tsdn, arena, ehooks, extent, offset,
-	    length, false);
-}
-
-static bool
-extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length, bool growing_retained) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-	bool err = ehooks_purge_forced(tsdn, ehooks, extent_base_get(extent),
-	    extent_size_get(extent), offset, length, arena_ind_get(arena));
-	return err;
-}
-
-bool
-extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length) {
-	return extent_purge_forced_impl(tsdn, arena, ehooks, extent,
-	    offset, length, false);
-}
-
-/*
- * Accepts the extent to split, and the characteristics of each side of the
- * split.  The 'a' parameters go with the 'lead' of the resulting pair of
- * extents (the lower addressed portion of the split), and the 'b' parameters go
- * with the trail (the higher addressed portion).  This makes 'extent' the lead,
- * and returns the trail (except in case of error).
- */
-static extent_t *
-extent_split_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t size_a, szind_t szind_a, bool slab_a,
-    size_t size_b, szind_t szind_b, bool slab_b, bool growing_retained) {
-	assert(extent_size_get(extent) == size_a + size_b);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-
-	if (ehooks_split_will_fail(ehooks)) {
-		return NULL;
-	}
-
-	extent_t *trail = extent_alloc(tsdn, arena);
-	if (trail == NULL) {
-		goto label_error_a;
-	}
-
-	extent_init(trail, arena_ind_get(arena),
-	    (void *)((uintptr_t)extent_base_get(extent) + size_a), size_b,
-	    slab_b, szind_b, extent_sn_get(extent), extent_state_get(extent),
-	    extent_zeroed_get(extent), extent_committed_get(extent),
-	    extent_dumpable_get(extent), EXTENT_NOT_HEAD);
-
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-	rtree_leaf_elm_t *lead_elm_a, *lead_elm_b;
-	{
-		extent_t lead;
-
-		extent_init(&lead, arena_ind_get(arena),
-		    extent_addr_get(extent), size_a,
-		    slab_a, szind_a, extent_sn_get(extent),
-		    extent_state_get(extent), extent_zeroed_get(extent),
-		    extent_committed_get(extent), extent_dumpable_get(extent),
-		    EXTENT_NOT_HEAD);
-
-		extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, &lead, false,
-		    true, &lead_elm_a, &lead_elm_b);
-	}
-	rtree_leaf_elm_t *trail_elm_a, *trail_elm_b;
-	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, trail, false, true,
-	    &trail_elm_a, &trail_elm_b);
-
-	if (lead_elm_a == NULL || lead_elm_b == NULL || trail_elm_a == NULL
-	    || trail_elm_b == NULL) {
-		goto label_error_b;
-	}
-
-	extent_lock2(tsdn, extent, trail);
-
-	bool err = ehooks_split(tsdn, ehooks, extent_base_get(extent),
-	    size_a + size_b, size_a, size_b, extent_committed_get(extent),
-	    arena_ind_get(arena));
-
-	if (err) {
-		goto label_error_c;
-	}
-
-	extent_size_set(extent, size_a);
-	extent_szind_set(extent, szind_a);
-
-	extent_rtree_write_acquired(tsdn, lead_elm_a, lead_elm_b, extent,
-	    szind_a, slab_a);
-	extent_rtree_write_acquired(tsdn, trail_elm_a, trail_elm_b, trail,
-	    szind_b, slab_b);
-
-	extent_unlock2(tsdn, extent, trail);
-
-	return trail;
-label_error_c:
-	extent_unlock2(tsdn, extent, trail);
-label_error_b:
-	extent_dalloc(tsdn, arena, trail);
-label_error_a:
-	return NULL;
-}
-
-extent_t *
-extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t size_a, szind_t szind_a, bool slab_a,
-    size_t size_b, szind_t szind_b, bool slab_b) {
-	return extent_split_impl(tsdn, arena, ehooks, extent, size_a, szind_a,
-	    slab_a, size_b, szind_b, slab_b, false);
-}
-
-/*
- * Returns true if the given extents can't be merged because of their head bit
- * settings.  Assumes the second extent has the higher address.
- */
-bool
-extent_head_no_merge(extent_t *a, extent_t *b) {
-	assert(extent_base_get(a) < extent_base_get(b));
-	/*
-	 * When coalesce is not always allowed (Windows), only merge extents
-	 * from the same VirtualAlloc region under opt.retain (in which case
-	 * MEM_DECOMMIT is utilized for purging).
-	 */
-	if (maps_coalesce) {
-		return false;
-	}
-	if (!opt_retain) {
-		return true;
-	}
-	/* If b is a head extent, disallow the cross-region merge. */
-	if (extent_is_head_get(b)) {
-		/*
-		 * Additionally, sn should not overflow with retain; sanity
-		 * check that different regions have unique sn.
-		 */
-		assert(extent_sn_comp(a, b) != 0);
-		return true;
-	}
-	assert(extent_sn_comp(a, b) == 0);
-
-	return false;
-}
-
-static bool
-extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, extent_t *a,
-    extent_t *b, bool growing_retained) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-	assert(extent_base_get(a) < extent_base_get(b));
-
-	if (ehooks_merge_will_fail(ehooks) || extent_head_no_merge(a, b)) {
-		return true;
-	}
-
-	bool err = ehooks_merge(tsdn, ehooks, extent_base_get(a),
-	    extent_size_get(a), extent_base_get(b), extent_size_get(b),
-	    extent_committed_get(a), arena_ind_get(arena));
-
-	if (err) {
-		return true;
-	}
-
-	/*
-	 * The rtree writes must happen while all the relevant elements are
-	 * owned, so the following code uses decomposed helper functions rather
-	 * than extent_{,de}register() to do things in the right order.
-	 */
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-	rtree_leaf_elm_t *a_elm_a, *a_elm_b, *b_elm_a, *b_elm_b;
-	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, a, true, false, &a_elm_a,
-	    &a_elm_b);
-	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, b, true, false, &b_elm_a,
-	    &b_elm_b);
-
-	extent_lock2(tsdn, a, b);
-
-	if (a_elm_b != NULL) {
-		rtree_leaf_elm_write(tsdn, &extents_rtree, a_elm_b, NULL,
-		    SC_NSIZES, false);
-	}
-	if (b_elm_b != NULL) {
-		rtree_leaf_elm_write(tsdn, &extents_rtree, b_elm_a, NULL,
-		    SC_NSIZES, false);
-	} else {
-		b_elm_b = b_elm_a;
-	}
-
-	extent_size_set(a, extent_size_get(a) + extent_size_get(b));
-	extent_szind_set(a, SC_NSIZES);
-	extent_sn_set(a, (extent_sn_get(a) < extent_sn_get(b)) ?
-	    extent_sn_get(a) : extent_sn_get(b));
-	extent_zeroed_set(a, extent_zeroed_get(a) && extent_zeroed_get(b));
-
-	extent_rtree_write_acquired(tsdn, a_elm_a, b_elm_b, a, SC_NSIZES,
-	    false);
-
-	extent_unlock2(tsdn, a, b);
-
-	/*
-	 * If we got here, we merged the extents; so they must be from the same
-	 * arena (i.e. this one).
-	 */
-	assert(extent_arena_ind_get(b) == arena_ind_get(arena));
-	extent_dalloc(tsdn, arena, b);
-
-	return false;
-}
-
-bool
-extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *a, extent_t *b) {
-	return extent_merge_impl(tsdn, arena, ehooks, a, b, false);
-}
-
-bool
-extent_boot(void) {
-	if (rtree_new(&extents_rtree, true)) {
-		return true;
-	}
-
-	if (mutex_pool_init(&extent_mutex_pool, "extent_mutex_pool",
-	    WITNESS_RANK_EXTENT_POOL)) {
-		return true;
-	}
-
-	if (have_dss) {
-		extent_dss_boot();
-	}
-
-	return false;
-}
-
-void
-extent_util_stats_get(tsdn_t *tsdn, const void *ptr,
-    size_t *nfree, size_t *nregs, size_t *size) {
-	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL);
-
-	const extent_t *extent = iealloc(tsdn, ptr);
-	if (unlikely(extent == NULL)) {
-		*nfree = *nregs = *size = 0;
-		return;
-	}
-
-	*size = extent_size_get(extent);
-	if (!extent_slab_get(extent)) {
-		*nfree = 0;
-		*nregs = 1;
-	} else {
-		*nfree = extent_nfree_get(extent);
-		*nregs = bin_infos[extent_szind_get(extent)].nregs;
-		assert(*nfree <= *nregs);
-		assert(*nfree * extent_usize_get(extent) <= *size);
-	}
-}
-
-void
-extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
-    size_t *nfree, size_t *nregs, size_t *size,
-    size_t *bin_nfree, size_t *bin_nregs, void **slabcur_addr) {
-	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL
-	    && bin_nfree != NULL && bin_nregs != NULL && slabcur_addr != NULL);
-
-	const extent_t *extent = iealloc(tsdn, ptr);
-	if (unlikely(extent == NULL)) {
-		*nfree = *nregs = *size = *bin_nfree = *bin_nregs = 0;
-		*slabcur_addr = NULL;
-		return;
-	}
-
-	*size = extent_size_get(extent);
-	if (!extent_slab_get(extent)) {
-		*nfree = *bin_nfree = *bin_nregs = 0;
-		*nregs = 1;
-		*slabcur_addr = NULL;
-		return;
-	}
-
-	*nfree = extent_nfree_get(extent);
-	const szind_t szind = extent_szind_get(extent);
-	*nregs = bin_infos[szind].nregs;
-	assert(*nfree <= *nregs);
-	assert(*nfree * extent_usize_get(extent) <= *size);
-
-	const arena_t *arena = (arena_t *)atomic_load_p(
-	    &arenas[extent_arena_ind_get(extent)], ATOMIC_RELAXED);
-	assert(arena != NULL);
-	const unsigned binshard = extent_binshard_get(extent);
-	bin_t *bin = &arena->bins[szind].bin_shards[binshard];
-
-	malloc_mutex_lock(tsdn, &bin->lock);
-	if (config_stats) {
-		*bin_nregs = *nregs * bin->stats.curslabs;
-		assert(*bin_nregs >= bin->stats.curregs);
-		*bin_nfree = *bin_nregs - bin->stats.curregs;
-	} else {
-		*bin_nfree = *bin_nregs = 0;
-	}
-	extent_t *slab;
-	if (bin->slabcur != NULL) {
-		slab = bin->slabcur;
-	} else {
-		slab = extent_heap_first(&bin->slabs_nonfull);
-	}
-	*slabcur_addr = slab != NULL ? extent_addr_get(slab) : NULL;
-	malloc_mutex_unlock(tsdn, &bin->lock);
-}
diff --git a/src/extent2.c b/src/extent2.c
new file mode 100644
index 00000000..4865beb1
--- /dev/null
+++ b/src/extent2.c
@@ -0,0 +1,1738 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/extent_dss.h"
+#include "jemalloc/internal/extent_mmap.h"
+#include "jemalloc/internal/ph.h"
+#include "jemalloc/internal/rtree.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/mutex_pool.h"
+
+/******************************************************************************/
+/* Data. */
+
+rtree_t		extents_rtree;
+/* Keyed by the address of the extent_t being protected. */
+mutex_pool_t	extent_mutex_pool;
+
+size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT;
+
+static bool extent_commit_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length, bool growing_retained);
+static bool extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena,
+    ehooks_t *ehooks, extent_t *extent, size_t offset, size_t length,
+    bool growing_retained);
+static bool extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena,
+    ehooks_t *ehooks, extent_t *extent, size_t offset, size_t length,
+    bool growing_retained);
+static extent_t *extent_split_impl(tsdn_t *tsdn, arena_t *arena,
+    ehooks_t *ehooks, extent_t *extent, size_t size_a, szind_t szind_a,
+    bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
+    bool growing_retained);
+static bool extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *a, extent_t *b, bool growing_retained);
+
+/* Used exclusively for gdump triggering. */
+static atomic_zu_t curpages;
+static atomic_zu_t highpages;
+
+/******************************************************************************/
+/*
+ * Function prototypes for static functions that are referenced prior to
+ * definition.
+ */
+
+static void extent_deregister(tsdn_t *tsdn, extent_t *extent);
+static extent_t *extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    eset_t *eset, void *new_addr, size_t usize, size_t pad, size_t alignment,
+    bool slab, szind_t szind, bool *zero, bool *commit, bool growing_retained);
+static extent_t *extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
+    ehooks_t *ehooks, rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent,
+    bool *coalesced, bool growing_retained);
+static void extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    eset_t *eset, extent_t *extent, bool growing_retained);
+
+/******************************************************************************/
+
+typedef enum {
+	lock_result_success,
+	lock_result_failure,
+	lock_result_no_extent
+} lock_result_t;
+
+static inline void
+extent_lock(tsdn_t *tsdn, extent_t *extent) {
+	assert(extent != NULL);
+	mutex_pool_lock(tsdn, &extent_mutex_pool, (uintptr_t)extent);
+}
+
+static inline void
+extent_unlock(tsdn_t *tsdn, extent_t *extent) {
+	assert(extent != NULL);
+	mutex_pool_unlock(tsdn, &extent_mutex_pool, (uintptr_t)extent);
+}
+
+static inline void
+extent_lock2(tsdn_t *tsdn, extent_t *extent1, extent_t *extent2) {
+	assert(extent1 != NULL && extent2 != NULL);
+	mutex_pool_lock2(tsdn, &extent_mutex_pool, (uintptr_t)extent1,
+	    (uintptr_t)extent2);
+}
+
+static inline void
+extent_unlock2(tsdn_t *tsdn, extent_t *extent1, extent_t *extent2) {
+	assert(extent1 != NULL && extent2 != NULL);
+	mutex_pool_unlock2(tsdn, &extent_mutex_pool, (uintptr_t)extent1,
+	    (uintptr_t)extent2);
+}
+
+static lock_result_t
+extent_rtree_leaf_elm_try_lock(tsdn_t *tsdn, rtree_leaf_elm_t *elm,
+    extent_t **result, bool inactive_only) {
+	extent_t *extent1 = rtree_leaf_elm_extent_read(tsdn, &extents_rtree,
+	    elm, true);
+
+	/* Slab implies active extents and should be skipped. */
+	if (extent1 == NULL || (inactive_only && rtree_leaf_elm_slab_read(tsdn,
+	    &extents_rtree, elm, true))) {
+		return lock_result_no_extent;
+	}
+
+	/*
+	 * It's possible that the extent changed out from under us, and with it
+	 * the leaf->extent mapping.  We have to recheck while holding the lock.
+	 */
+	extent_lock(tsdn, extent1);
+	extent_t *extent2 = rtree_leaf_elm_extent_read(tsdn,
+	    &extents_rtree, elm, true);
+
+	if (extent1 == extent2) {
+		*result = extent1;
+		return lock_result_success;
+	} else {
+		extent_unlock(tsdn, extent1);
+		return lock_result_failure;
+	}
+}
+
+/*
+ * Returns a pool-locked extent_t * if there's one associated with the given
+ * address, and NULL otherwise.
+ */
+static extent_t *
+extent_lock_from_addr(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, void *addr,
+    bool inactive_only) {
+	extent_t *ret = NULL;
+	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, &extents_rtree,
+	    rtree_ctx, (uintptr_t)addr, false, false);
+	if (elm == NULL) {
+		return NULL;
+	}
+	lock_result_t lock_result;
+	do {
+		lock_result = extent_rtree_leaf_elm_try_lock(tsdn, elm, &ret,
+		    inactive_only);
+	} while (lock_result == lock_result_failure);
+	return ret;
+}
+
+static void
+extent_addr_randomize(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+    size_t alignment) {
+	assert(extent_base_get(extent) == extent_addr_get(extent));
+
+	if (alignment < PAGE) {
+		unsigned lg_range = LG_PAGE -
+		    lg_floor(CACHELINE_CEILING(alignment));
+		size_t r;
+		if (!tsdn_null(tsdn)) {
+			tsd_t *tsd = tsdn_tsd(tsdn);
+			r = (size_t)prng_lg_range_u64(
+			    tsd_prng_statep_get(tsd), lg_range);
+		} else {
+			uint64_t stack_value = (uint64_t)(uintptr_t)&r;
+			r = (size_t)prng_lg_range_u64(&stack_value, lg_range);
+		}
+		uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE -
+		    lg_range);
+		extent->e_addr = (void *)((uintptr_t)extent->e_addr +
+		    random_offset);
+		assert(ALIGNMENT_ADDR2BASE(extent->e_addr, alignment) ==
+		    extent->e_addr);
+	}
+}
+
+extent_t *
+extent_alloc(tsdn_t *tsdn, arena_t *arena) {
+	malloc_mutex_lock(tsdn, &arena->extent_avail_mtx);
+	extent_t *extent = extent_avail_first(&arena->extent_avail);
+	if (extent == NULL) {
+		malloc_mutex_unlock(tsdn, &arena->extent_avail_mtx);
+		return base_alloc_extent(tsdn, arena->base);
+	}
+	extent_avail_remove(&arena->extent_avail, extent);
+	atomic_fetch_sub_zu(&arena->extent_avail_cnt, 1, ATOMIC_RELAXED);
+	malloc_mutex_unlock(tsdn, &arena->extent_avail_mtx);
+	return extent;
+}
+
+void
+extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
+	malloc_mutex_lock(tsdn, &arena->extent_avail_mtx);
+	extent_avail_insert(&arena->extent_avail, extent);
+	atomic_fetch_add_zu(&arena->extent_avail_cnt, 1, ATOMIC_RELAXED);
+	malloc_mutex_unlock(tsdn, &arena->extent_avail_mtx);
+}
+
+static bool
+extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent) {
+	extent_state_set(extent, extent_state_active);
+	bool coalesced;
+	extent = extent_try_coalesce(tsdn, arena, ehooks, rtree_ctx, eset,
+	    extent, &coalesced, false);
+	extent_state_set(extent, eset_state_get(eset));
+
+	if (!coalesced) {
+		return true;
+	}
+	eset_insert_locked(tsdn, eset, extent);
+	return false;
+}
+
+extent_t *
+extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
+    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+    szind_t szind, bool *zero, bool *commit) {
+	assert(size + pad != 0);
+	assert(alignment != 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	extent_t *extent = extent_recycle(tsdn, arena, ehooks, eset, new_addr,
+	    size, pad, alignment, slab, szind, zero, commit, false);
+	assert(extent == NULL || extent_dumpable_get(extent));
+	return extent;
+}
+
+void
+extents_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
+    extent_t *extent) {
+	assert(extent_base_get(extent) != NULL);
+	assert(extent_size_get(extent) != 0);
+	assert(extent_dumpable_get(extent));
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	extent_addr_set(extent, extent_base_get(extent));
+	extent_zeroed_set(extent, false);
+
+	extent_record(tsdn, arena, ehooks, eset, extent, false);
+}
+
+extent_t *
+extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
+    size_t npages_min) {
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+	malloc_mutex_lock(tsdn, &eset->mtx);
+
+	/*
+	 * Get the LRU coalesced extent, if any.  If coalescing was delayed,
+	 * the loop will iterate until the LRU extent is fully coalesced.
+	 */
+	extent_t *extent;
+	while (true) {
+		/* Get the LRU extent, if any. */
+		extent = extent_list_first(&eset->lru);
+		if (extent == NULL) {
+			goto label_return;
+		}
+		/* Check the eviction limit. */
+		size_t extents_npages = atomic_load_zu(&eset->npages,
+		    ATOMIC_RELAXED);
+		if (extents_npages <= npages_min) {
+			extent = NULL;
+			goto label_return;
+		}
+		eset_remove_locked(tsdn, eset, extent);
+		if (!eset->delay_coalesce) {
+			break;
+		}
+		/* Try to coalesce. */
+		if (extent_try_delayed_coalesce(tsdn, arena, ehooks, rtree_ctx,
+		    eset, extent)) {
+			break;
+		}
+		/*
+		 * The LRU extent was just coalesced and the result placed in
+		 * the LRU at its neighbor's position.  Start over.
+		 */
+	}
+
+	/*
+	 * Either mark the extent active or deregister it to protect against
+	 * concurrent operations.
+	 */
+	switch (eset_state_get(eset)) {
+	case extent_state_active:
+		not_reached();
+	case extent_state_dirty:
+	case extent_state_muzzy:
+		extent_state_set(extent, extent_state_active);
+		break;
+	case extent_state_retained:
+		extent_deregister(tsdn, extent);
+		break;
+	default:
+		not_reached();
+	}
+
+label_return:
+	malloc_mutex_unlock(tsdn, &eset->mtx);
+	return extent;
+}
+
+/*
+ * This can only happen when we fail to allocate a new extent struct (which
+ * indicates OOM), e.g. when trying to split an existing extent.
+ */
+static void
+extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
+    extent_t *extent, bool growing_retained) {
+	size_t sz = extent_size_get(extent);
+	if (config_stats) {
+		arena_stats_accum_zu(&arena->stats.abandoned_vm, sz);
+	}
+	/*
+	 * Leak extent after making sure its pages have already been purged, so
+	 * that this is only a virtual memory leak.
+	 */
+	if (eset_state_get(eset) == extent_state_dirty) {
+		if (extent_purge_lazy_impl(tsdn, arena, ehooks, extent, 0, sz,
+		    growing_retained)) {
+			extent_purge_forced_impl(tsdn, arena, ehooks, extent, 0,
+			    extent_size_get(extent), growing_retained);
+		}
+	}
+	extent_dalloc(tsdn, arena, extent);
+}
+
+static void
+extent_deactivate_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
+    extent_t *extent) {
+	assert(extent_arena_ind_get(extent) == arena_ind_get(arena));
+	assert(extent_state_get(extent) == extent_state_active);
+
+	extent_state_set(extent, eset_state_get(eset));
+	eset_insert_locked(tsdn, eset, extent);
+}
+
+static void
+extent_deactivate(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
+    extent_t *extent) {
+	malloc_mutex_lock(tsdn, &eset->mtx);
+	extent_deactivate_locked(tsdn, arena, eset, extent);
+	malloc_mutex_unlock(tsdn, &eset->mtx);
+}
+
+static void
+extent_activate_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
+    extent_t *extent) {
+	assert(extent_arena_ind_get(extent) == arena_ind_get(arena));
+	assert(extent_state_get(extent) == eset_state_get(eset));
+
+	eset_remove_locked(tsdn, eset, extent);
+	extent_state_set(extent, extent_state_active);
+}
+
+static bool
+extent_rtree_leaf_elms_lookup(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
+    const extent_t *extent, bool dependent, bool init_missing,
+    rtree_leaf_elm_t **r_elm_a, rtree_leaf_elm_t **r_elm_b) {
+	*r_elm_a = rtree_leaf_elm_lookup(tsdn, &extents_rtree, rtree_ctx,
+	    (uintptr_t)extent_base_get(extent), dependent, init_missing);
+	if (!dependent && *r_elm_a == NULL) {
+		return true;
+	}
+	assert(*r_elm_a != NULL);
+
+	*r_elm_b = rtree_leaf_elm_lookup(tsdn, &extents_rtree, rtree_ctx,
+	    (uintptr_t)extent_last_get(extent), dependent, init_missing);
+	if (!dependent && *r_elm_b == NULL) {
+		return true;
+	}
+	assert(*r_elm_b != NULL);
+
+	return false;
+}
+
+static void
+extent_rtree_write_acquired(tsdn_t *tsdn, rtree_leaf_elm_t *elm_a,
+    rtree_leaf_elm_t *elm_b, extent_t *extent, szind_t szind, bool slab) {
+	rtree_leaf_elm_write(tsdn, &extents_rtree, elm_a, extent, szind, slab);
+	if (elm_b != NULL) {
+		rtree_leaf_elm_write(tsdn, &extents_rtree, elm_b, extent, szind,
+		    slab);
+	}
+}
+
+static void
+extent_interior_register(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, extent_t *extent,
+    szind_t szind) {
+	assert(extent_slab_get(extent));
+
+	/* Register interior. */
+	for (size_t i = 1; i < (extent_size_get(extent) >> LG_PAGE) - 1; i++) {
+		rtree_write(tsdn, &extents_rtree, rtree_ctx,
+		    (uintptr_t)extent_base_get(extent) + (uintptr_t)(i <<
+		    LG_PAGE), extent, szind, true);
+	}
+}
+
+static void
+extent_gdump_add(tsdn_t *tsdn, const extent_t *extent) {
+	cassert(config_prof);
+	/* prof_gdump() requirement. */
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	if (opt_prof && extent_state_get(extent) == extent_state_active) {
+		size_t nadd = extent_size_get(extent) >> LG_PAGE;
+		size_t cur = atomic_fetch_add_zu(&curpages, nadd,
+		    ATOMIC_RELAXED) + nadd;
+		size_t high = atomic_load_zu(&highpages, ATOMIC_RELAXED);
+		while (cur > high && !atomic_compare_exchange_weak_zu(
+		    &highpages, &high, cur, ATOMIC_RELAXED, ATOMIC_RELAXED)) {
+			/*
+			 * Don't refresh cur, because it may have decreased
+			 * since this thread lost the highpages update race.
+			 * Note that high is updated in case of CAS failure.
+			 */
+		}
+		if (cur > high && prof_gdump_get_unlocked()) {
+			prof_gdump(tsdn);
+		}
+	}
+}
+
+static void
+extent_gdump_sub(tsdn_t *tsdn, const extent_t *extent) {
+	cassert(config_prof);
+
+	if (opt_prof && extent_state_get(extent) == extent_state_active) {
+		size_t nsub = extent_size_get(extent) >> LG_PAGE;
+		assert(atomic_load_zu(&curpages, ATOMIC_RELAXED) >= nsub);
+		atomic_fetch_sub_zu(&curpages, nsub, ATOMIC_RELAXED);
+	}
+}
+
+static bool
+extent_register_impl(tsdn_t *tsdn, extent_t *extent, bool gdump_add) {
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+	rtree_leaf_elm_t *elm_a, *elm_b;
+
+	/*
+	 * We need to hold the lock to protect against a concurrent coalesce
+	 * operation that sees us in a partial state.
+	 */
+	extent_lock(tsdn, extent);
+
+	if (extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, extent, false, true,
+	    &elm_a, &elm_b)) {
+		extent_unlock(tsdn, extent);
+		return true;
+	}
+
+	szind_t szind = extent_szind_get_maybe_invalid(extent);
+	bool slab = extent_slab_get(extent);
+	extent_rtree_write_acquired(tsdn, elm_a, elm_b, extent, szind, slab);
+	if (slab) {
+		extent_interior_register(tsdn, rtree_ctx, extent, szind);
+	}
+
+	extent_unlock(tsdn, extent);
+
+	if (config_prof && gdump_add) {
+		extent_gdump_add(tsdn, extent);
+	}
+
+	return false;
+}
+
+static bool
+extent_register(tsdn_t *tsdn, extent_t *extent) {
+	return extent_register_impl(tsdn, extent, true);
+}
+
+static bool
+extent_register_no_gdump_add(tsdn_t *tsdn, extent_t *extent) {
+	return extent_register_impl(tsdn, extent, false);
+}
+
+static void
+extent_reregister(tsdn_t *tsdn, extent_t *extent) {
+	bool err = extent_register(tsdn, extent);
+	assert(!err);
+}
+
+/*
+ * Removes all pointers to the given extent from the global rtree indices for
+ * its interior.  This is relevant for slab extents, for which we need to do
+ * metadata lookups at places other than the head of the extent.  We deregister
+ * on the interior, then, when an extent moves from being an active slab to an
+ * inactive state.
+ */
+static void
+extent_interior_deregister(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
+    extent_t *extent) {
+	size_t i;
+
+	assert(extent_slab_get(extent));
+
+	for (i = 1; i < (extent_size_get(extent) >> LG_PAGE) - 1; i++) {
+		rtree_clear(tsdn, &extents_rtree, rtree_ctx,
+		    (uintptr_t)extent_base_get(extent) + (uintptr_t)(i <<
+		    LG_PAGE));
+	}
+}
+
+/*
+ * Removes all pointers to the given extent from the global rtree.
+ */
+static void
+extent_deregister_impl(tsdn_t *tsdn, extent_t *extent, bool gdump) {
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+	rtree_leaf_elm_t *elm_a, *elm_b;
+	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, extent, true, false,
+	    &elm_a, &elm_b);
+
+	extent_lock(tsdn, extent);
+
+	extent_rtree_write_acquired(tsdn, elm_a, elm_b, NULL, SC_NSIZES, false);
+	if (extent_slab_get(extent)) {
+		extent_interior_deregister(tsdn, rtree_ctx, extent);
+		extent_slab_set(extent, false);
+	}
+
+	extent_unlock(tsdn, extent);
+
+	if (config_prof && gdump) {
+		extent_gdump_sub(tsdn, extent);
+	}
+}
+
+static void
+extent_deregister(tsdn_t *tsdn, extent_t *extent) {
+	extent_deregister_impl(tsdn, extent, true);
+}
+
+static void
+extent_deregister_no_gdump_sub(tsdn_t *tsdn, extent_t *extent) {
+	extent_deregister_impl(tsdn, extent, false);
+}
+
+/*
+ * Tries to find and remove an extent from eset that can be used for the
+ * given allocation request.
+ */
+static extent_t *
+extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    rtree_ctx_t *rtree_ctx, eset_t *eset, void *new_addr, size_t size,
+    size_t pad, size_t alignment, bool slab, bool growing_retained) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+	assert(alignment > 0);
+	if (config_debug && new_addr != NULL) {
+		/*
+		 * Non-NULL new_addr has two use cases:
+		 *
+		 *   1) Recycle a known-extant extent, e.g. during purging.
+		 *   2) Perform in-place expanding reallocation.
+		 *
+		 * Regardless of use case, new_addr must either refer to a
+		 * non-existing extent, or to the base of an extant extent,
+		 * since only active slabs support interior lookups (which of
+		 * course cannot be recycled).
+		 */
+		assert(PAGE_ADDR2BASE(new_addr) == new_addr);
+		assert(pad == 0);
+		assert(alignment <= PAGE);
+	}
+
+	size_t esize = size + pad;
+	malloc_mutex_lock(tsdn, &eset->mtx);
+	extent_t *extent;
+	if (new_addr != NULL) {
+		extent = extent_lock_from_addr(tsdn, rtree_ctx, new_addr,
+		    false);
+		if (extent != NULL) {
+			/*
+			 * We might null-out extent to report an error, but we
+			 * still need to unlock the associated mutex after.
+			 */
+			extent_t *unlock_extent = extent;
+			assert(extent_base_get(extent) == new_addr);
+			if (extent_arena_ind_get(extent)
+			    != arena_ind_get(arena) ||
+			    extent_size_get(extent) < esize ||
+			    extent_state_get(extent) !=
+			    eset_state_get(eset)) {
+				extent = NULL;
+			}
+			extent_unlock(tsdn, unlock_extent);
+		}
+	} else {
+		extent = eset_fit_locked(tsdn, eset, esize, alignment);
+	}
+	if (extent == NULL) {
+		malloc_mutex_unlock(tsdn, &eset->mtx);
+		return NULL;
+	}
+
+	extent_activate_locked(tsdn, arena, eset, extent);
+	malloc_mutex_unlock(tsdn, &eset->mtx);
+
+	return extent;
+}
+
+/*
+ * Given an allocation request and an extent guaranteed to be able to satisfy
+ * it, this splits off lead and trail extents, leaving extent pointing to an
+ * extent satisfying the allocation.
+ * This function doesn't put lead or trail into any eset_t; it's the caller's
+ * job to ensure that they can be reused.
+ */
+typedef enum {
+	/*
+	 * Split successfully.  lead, extent, and trail, are modified to extents
+	 * describing the ranges before, in, and after the given allocation.
+	 */
+	extent_split_interior_ok,
+	/*
+	 * The extent can't satisfy the given allocation request.  None of the
+	 * input extent_t *s are touched.
+	 */
+	extent_split_interior_cant_alloc,
+	/*
+	 * In a potentially invalid state.  Must leak (if *to_leak is non-NULL),
+	 * and salvage what's still salvageable (if *to_salvage is non-NULL).
+	 * None of lead, extent, or trail are valid.
+	 */
+	extent_split_interior_error
+} extent_split_interior_result_t;
+
+static extent_split_interior_result_t
+extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    rtree_ctx_t *rtree_ctx,
+    /* The result of splitting, in case of success. */
+    extent_t **extent, extent_t **lead, extent_t **trail,
+    /* The mess to clean up, in case of error. */
+    extent_t **to_leak, extent_t **to_salvage,
+    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+    szind_t szind, bool growing_retained) {
+	size_t esize = size + pad;
+	size_t leadsize = ALIGNMENT_CEILING((uintptr_t)extent_base_get(*extent),
+	    PAGE_CEILING(alignment)) - (uintptr_t)extent_base_get(*extent);
+	assert(new_addr == NULL || leadsize == 0);
+	if (extent_size_get(*extent) < leadsize + esize) {
+		return extent_split_interior_cant_alloc;
+	}
+	size_t trailsize = extent_size_get(*extent) - leadsize - esize;
+
+	*lead = NULL;
+	*trail = NULL;
+	*to_leak = NULL;
+	*to_salvage = NULL;
+
+	/* Split the lead. */
+	if (leadsize != 0) {
+		*lead = *extent;
+		*extent = extent_split_impl(tsdn, arena, ehooks, *lead,
+		    leadsize, SC_NSIZES, false, esize + trailsize, szind, slab,
+		    growing_retained);
+		if (*extent == NULL) {
+			*to_leak = *lead;
+			*lead = NULL;
+			return extent_split_interior_error;
+		}
+	}
+
+	/* Split the trail. */
+	if (trailsize != 0) {
+		*trail = extent_split_impl(tsdn, arena, ehooks, *extent, esize,
+		    szind, slab, trailsize, SC_NSIZES, false, growing_retained);
+		if (*trail == NULL) {
+			*to_leak = *extent;
+			*to_salvage = *lead;
+			*lead = NULL;
+			*extent = NULL;
+			return extent_split_interior_error;
+		}
+	}
+
+	if (leadsize == 0 && trailsize == 0) {
+		/*
+		 * Splitting causes szind to be set as a side effect, but no
+		 * splitting occurred.
+		 */
+		extent_szind_set(*extent, szind);
+		if (szind != SC_NSIZES) {
+			rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx,
+			    (uintptr_t)extent_addr_get(*extent), szind, slab);
+			if (slab && extent_size_get(*extent) > PAGE) {
+				rtree_szind_slab_update(tsdn, &extents_rtree,
+				    rtree_ctx,
+				    (uintptr_t)extent_past_get(*extent) -
+				    (uintptr_t)PAGE, szind, slab);
+			}
+		}
+	}
+
+	return extent_split_interior_ok;
+}
+
+/*
+ * This fulfills the indicated allocation request out of the given extent (which
+ * the caller should have ensured was big enough).  If there's any unused space
+ * before or after the resulting allocation, that space is given its own extent
+ * and put back into eset.
+ */
+static extent_t *
+extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    rtree_ctx_t *rtree_ctx, eset_t *eset, void *new_addr, size_t size,
+    size_t pad, size_t alignment, bool slab, szind_t szind, extent_t *extent,
+    bool growing_retained) {
+	extent_t *lead;
+	extent_t *trail;
+	extent_t *to_leak;
+	extent_t *to_salvage;
+
+	extent_split_interior_result_t result = extent_split_interior(
+	    tsdn, arena, ehooks, rtree_ctx, &extent, &lead, &trail, &to_leak,
+	    &to_salvage, new_addr, size, pad, alignment, slab, szind,
+	    growing_retained);
+
+	if (!maps_coalesce && result != extent_split_interior_ok
+	    && !opt_retain) {
+		/*
+		 * Split isn't supported (implies Windows w/o retain).  Avoid
+		 * leaking the eset.
+		 */
+		assert(to_leak != NULL && lead == NULL && trail == NULL);
+		extent_deactivate(tsdn, arena, eset, to_leak);
+		return NULL;
+	}
+
+	if (result == extent_split_interior_ok) {
+		if (lead != NULL) {
+			extent_deactivate(tsdn, arena, eset, lead);
+		}
+		if (trail != NULL) {
+			extent_deactivate(tsdn, arena, eset, trail);
+		}
+		return extent;
+	} else {
+		/*
+		 * We should have picked an extent that was large enough to
+		 * fulfill our allocation request.
+		 */
+		assert(result == extent_split_interior_error);
+		if (to_salvage != NULL) {
+			extent_deregister(tsdn, to_salvage);
+		}
+		if (to_leak != NULL) {
+			void *leak = extent_base_get(to_leak);
+			extent_deregister_no_gdump_sub(tsdn, to_leak);
+			extents_abandon_vm(tsdn, arena, ehooks, eset, to_leak,
+			    growing_retained);
+			assert(extent_lock_from_addr(tsdn, rtree_ctx, leak,
+			    false) == NULL);
+		}
+		return NULL;
+	}
+	unreachable();
+}
+
+static bool
+extent_need_manual_zero(arena_t *arena) {
+	/*
+	 * Need to manually zero the extent on repopulating if either; 1) non
+	 * default extent hooks installed (in which case the purge semantics may
+	 * change); or 2) transparent huge pages enabled.
+	 */
+	return (!ehooks_are_default(arena_get_ehooks(arena)) ||
+		(opt_thp == thp_mode_always));
+}
+
+/*
+ * Tries to satisfy the given allocation request by reusing one of the extents
+ * in the given eset_t.
+ */
+static extent_t *
+extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
+    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+    szind_t szind, bool *zero, bool *commit, bool growing_retained) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+	assert(new_addr == NULL || !slab);
+	assert(pad == 0 || !slab);
+	assert(!*zero || !slab);
+
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+	extent_t *extent = extent_recycle_extract(tsdn, arena, ehooks,
+	    rtree_ctx, eset, new_addr, size, pad, alignment, slab,
+	    growing_retained);
+	if (extent == NULL) {
+		return NULL;
+	}
+
+	extent = extent_recycle_split(tsdn, arena, ehooks, rtree_ctx, eset,
+	    new_addr, size, pad, alignment, slab, szind, extent,
+	    growing_retained);
+	if (extent == NULL) {
+		return NULL;
+	}
+
+	if (*commit && !extent_committed_get(extent)) {
+		if (extent_commit_impl(tsdn, arena, ehooks, extent, 0,
+		    extent_size_get(extent), growing_retained)) {
+			extent_record(tsdn, arena, ehooks, eset, extent,
+			    growing_retained);
+			return NULL;
+		}
+		if (!extent_need_manual_zero(arena)) {
+			extent_zeroed_set(extent, true);
+		}
+	}
+
+	if (extent_committed_get(extent)) {
+		*commit = true;
+	}
+	if (extent_zeroed_get(extent)) {
+		*zero = true;
+	}
+
+	if (pad != 0) {
+		extent_addr_randomize(tsdn, arena, extent, alignment);
+	}
+	assert(extent_state_get(extent) == extent_state_active);
+	if (slab) {
+		extent_slab_set(extent, slab);
+		extent_interior_register(tsdn, rtree_ctx, extent, szind);
+	}
+
+	if (*zero) {
+		void *addr = extent_base_get(extent);
+		if (!extent_zeroed_get(extent)) {
+			size_t size = extent_size_get(extent);
+			if (extent_need_manual_zero(arena) ||
+			    pages_purge_forced(addr, size)) {
+				memset(addr, 0, size);
+			}
+		} else if (config_debug) {
+			size_t *p = (size_t *)(uintptr_t)addr;
+			/* Check the first page only. */
+			for (size_t i = 0; i < PAGE / sizeof(size_t); i++) {
+				assert(p[i] == 0);
+			}
+		}
+	}
+	return extent;
+}
+
+/*
+ * If virtual memory is retained, create increasingly larger extents from which
+ * to split requested extents in order to limit the total number of disjoint
+ * virtual memory ranges retained by each arena.
+ */
+static extent_t *
+extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    size_t size, size_t pad, size_t alignment, bool slab, szind_t szind,
+    bool *zero, bool *commit) {
+	malloc_mutex_assert_owner(tsdn, &arena->extent_grow_mtx);
+	assert(pad == 0 || !slab);
+	assert(!*zero || !slab);
+
+	size_t esize = size + pad;
+	size_t alloc_size_min = esize + PAGE_CEILING(alignment) - PAGE;
+	/* Beware size_t wrap-around. */
+	if (alloc_size_min < esize) {
+		goto label_err;
+	}
+	/*
+	 * Find the next extent size in the series that would be large enough to
+	 * satisfy this request.
+	 */
+	pszind_t egn_skip = 0;
+	size_t alloc_size = sz_pind2sz(arena->extent_grow_next + egn_skip);
+	while (alloc_size < alloc_size_min) {
+		egn_skip++;
+		if (arena->extent_grow_next + egn_skip >=
+		    sz_psz2ind(SC_LARGE_MAXCLASS)) {
+			/* Outside legal range. */
+			goto label_err;
+		}
+		alloc_size = sz_pind2sz(arena->extent_grow_next + egn_skip);
+	}
+
+	extent_t *extent = extent_alloc(tsdn, arena);
+	if (extent == NULL) {
+		goto label_err;
+	}
+	bool zeroed = false;
+	bool committed = false;
+
+	void *ptr = ehooks_alloc(tsdn, ehooks, NULL, alloc_size, PAGE, &zeroed,
+	    &committed, arena_ind_get(arena));
+
+	extent_init(extent, arena_ind_get(arena), ptr, alloc_size, false,
+	    SC_NSIZES, arena_extent_sn_next(arena), extent_state_active, zeroed,
+	    committed, true, EXTENT_IS_HEAD);
+	if (ptr == NULL) {
+		extent_dalloc(tsdn, arena, extent);
+		goto label_err;
+	}
+
+	if (extent_register_no_gdump_add(tsdn, extent)) {
+		extent_dalloc(tsdn, arena, extent);
+		goto label_err;
+	}
+
+	if (extent_zeroed_get(extent) && extent_committed_get(extent)) {
+		*zero = true;
+	}
+	if (extent_committed_get(extent)) {
+		*commit = true;
+	}
+
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+	extent_t *lead;
+	extent_t *trail;
+	extent_t *to_leak;
+	extent_t *to_salvage;
+	extent_split_interior_result_t result = extent_split_interior(tsdn,
+	    arena, ehooks, rtree_ctx, &extent, &lead, &trail, &to_leak,
+	    &to_salvage, NULL, size, pad, alignment, slab, szind, true);
+
+	if (result == extent_split_interior_ok) {
+		if (lead != NULL) {
+			extent_record(tsdn, arena, ehooks,
+			    &arena->eset_retained, lead, true);
+		}
+		if (trail != NULL) {
+			extent_record(tsdn, arena, ehooks,
+			    &arena->eset_retained, trail, true);
+		}
+	} else {
+		/*
+		 * We should have allocated a sufficiently large extent; the
+		 * cant_alloc case should not occur.
+		 */
+		assert(result == extent_split_interior_error);
+		if (to_salvage != NULL) {
+			if (config_prof) {
+				extent_gdump_add(tsdn, to_salvage);
+			}
+			extent_record(tsdn, arena, ehooks,
+			    &arena->eset_retained, to_salvage, true);
+		}
+		if (to_leak != NULL) {
+			extent_deregister_no_gdump_sub(tsdn, to_leak);
+			extents_abandon_vm(tsdn, arena, ehooks,
+			    &arena->eset_retained, to_leak, true);
+		}
+		goto label_err;
+	}
+
+	if (*commit && !extent_committed_get(extent)) {
+		if (extent_commit_impl(tsdn, arena, ehooks, extent, 0,
+		    extent_size_get(extent), true)) {
+			extent_record(tsdn, arena, ehooks,
+			    &arena->eset_retained, extent, true);
+			goto label_err;
+		}
+		if (!extent_need_manual_zero(arena)) {
+			extent_zeroed_set(extent, true);
+		}
+	}
+
+	/*
+	 * Increment extent_grow_next if doing so wouldn't exceed the allowed
+	 * range.
+	 */
+	if (arena->extent_grow_next + egn_skip + 1 <=
+	    arena->retain_grow_limit) {
+		arena->extent_grow_next += egn_skip + 1;
+	} else {
+		arena->extent_grow_next = arena->retain_grow_limit;
+	}
+	/* All opportunities for failure are past. */
+	malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
+
+	if (config_prof) {
+		/* Adjust gdump stats now that extent is final size. */
+		extent_gdump_add(tsdn, extent);
+	}
+	if (pad != 0) {
+		extent_addr_randomize(tsdn, arena, extent, alignment);
+	}
+	if (slab) {
+		rtree_ctx_t rtree_ctx_fallback;
+		rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn,
+		    &rtree_ctx_fallback);
+
+		extent_slab_set(extent, true);
+		extent_interior_register(tsdn, rtree_ctx, extent, szind);
+	}
+	if (*zero && !extent_zeroed_get(extent)) {
+		void *addr = extent_base_get(extent);
+		size_t size = extent_size_get(extent);
+		if (extent_need_manual_zero(arena) ||
+		    pages_purge_forced(addr, size)) {
+			memset(addr, 0, size);
+		}
+	}
+
+	return extent;
+label_err:
+	malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
+	return NULL;
+}
+
+static extent_t *
+extent_alloc_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+    szind_t szind, bool *zero, bool *commit) {
+	assert(size != 0);
+	assert(alignment != 0);
+
+	malloc_mutex_lock(tsdn, &arena->extent_grow_mtx);
+
+	extent_t *extent = extent_recycle(tsdn, arena, ehooks,
+	    &arena->eset_retained, new_addr, size, pad, alignment, slab,
+	    szind, zero, commit, true);
+	if (extent != NULL) {
+		malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
+		if (config_prof) {
+			extent_gdump_add(tsdn, extent);
+		}
+	} else if (opt_retain && new_addr == NULL) {
+		extent = extent_grow_retained(tsdn, arena, ehooks, size, pad,
+		    alignment, slab, szind, zero, commit);
+		/* extent_grow_retained() always releases extent_grow_mtx. */
+	} else {
+		malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
+	}
+	malloc_mutex_assert_not_owner(tsdn, &arena->extent_grow_mtx);
+
+	return extent;
+}
+
+static extent_t *
+extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+    szind_t szind, bool *zero, bool *commit) {
+	size_t esize = size + pad;
+	extent_t *extent = extent_alloc(tsdn, arena);
+	if (extent == NULL) {
+		return NULL;
+	}
+	size_t palignment = ALIGNMENT_CEILING(alignment, PAGE);
+	void *addr = ehooks_alloc(tsdn, ehooks, new_addr, esize, palignment,
+	    zero, commit, arena_ind_get(arena));
+	if (addr == NULL) {
+		extent_dalloc(tsdn, arena, extent);
+		return NULL;
+	}
+	extent_init(extent, arena_ind_get(arena), addr, esize, slab, szind,
+	    arena_extent_sn_next(arena), extent_state_active, *zero, *commit,
+	    true, EXTENT_NOT_HEAD);
+	if (pad != 0) {
+		extent_addr_randomize(tsdn, arena, extent, alignment);
+	}
+	if (extent_register(tsdn, extent)) {
+		extent_dalloc(tsdn, arena, extent);
+		return NULL;
+	}
+
+	return extent;
+}
+
+extent_t *
+extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+    szind_t szind, bool *zero, bool *commit) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	extent_t *extent = extent_alloc_retained(tsdn, arena, ehooks, new_addr,
+	    size, pad, alignment, slab, szind, zero, commit);
+	if (extent == NULL) {
+		if (opt_retain && new_addr != NULL) {
+			/*
+			 * When retain is enabled and new_addr is set, we do not
+			 * attempt extent_alloc_wrapper_hard which does mmap
+			 * that is very unlikely to succeed (unless it happens
+			 * to be at the end).
+			 */
+			return NULL;
+		}
+		extent = extent_alloc_wrapper_hard(tsdn, arena, ehooks,
+		    new_addr, size, pad, alignment, slab, szind, zero, commit);
+	}
+
+	assert(extent == NULL || extent_dumpable_get(extent));
+	return extent;
+}
+
+static bool
+extent_can_coalesce(arena_t *arena, eset_t *eset, const extent_t *inner,
+    const extent_t *outer) {
+	assert(extent_arena_ind_get(inner) == arena_ind_get(arena));
+	if (extent_arena_ind_get(outer) != arena_ind_get(arena)) {
+		return false;
+	}
+
+	assert(extent_state_get(inner) == extent_state_active);
+	if (extent_state_get(outer) != eset->state) {
+		return false;
+	}
+
+	if (extent_committed_get(inner) != extent_committed_get(outer)) {
+		return false;
+	}
+
+	return true;
+}
+
+static bool
+extent_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
+    extent_t *inner, extent_t *outer, bool forward, bool growing_retained) {
+	assert(extent_can_coalesce(arena, eset, inner, outer));
+
+	extent_activate_locked(tsdn, arena, eset, outer);
+
+	malloc_mutex_unlock(tsdn, &eset->mtx);
+	bool err = extent_merge_impl(tsdn, arena, ehooks,
+	    forward ? inner : outer, forward ? outer : inner, growing_retained);
+	malloc_mutex_lock(tsdn, &eset->mtx);
+
+	if (err) {
+		extent_deactivate_locked(tsdn, arena, eset, outer);
+	}
+
+	return err;
+}
+
+static extent_t *
+extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent, bool *coalesced,
+    bool growing_retained, bool inactive_only) {
+	/*
+	 * We avoid checking / locking inactive neighbors for large size
+	 * classes, since they are eagerly coalesced on deallocation which can
+	 * cause lock contention.
+	 */
+	/*
+	 * Continue attempting to coalesce until failure, to protect against
+	 * races with other threads that are thwarted by this one.
+	 */
+	bool again;
+	do {
+		again = false;
+
+		/* Try to coalesce forward. */
+		extent_t *next = extent_lock_from_addr(tsdn, rtree_ctx,
+		    extent_past_get(extent), inactive_only);
+		if (next != NULL) {
+			/*
+			 * eset->mtx only protects against races for
+			 * like-state eset, so call extent_can_coalesce()
+			 * before releasing next's pool lock.
+			 */
+			bool can_coalesce = extent_can_coalesce(arena, eset,
+			    extent, next);
+
+			extent_unlock(tsdn, next);
+
+			if (can_coalesce && !extent_coalesce(tsdn, arena,
+			    ehooks, eset, extent, next, true,
+			    growing_retained)) {
+				if (eset->delay_coalesce) {
+					/* Do minimal coalescing. */
+					*coalesced = true;
+					return extent;
+				}
+				again = true;
+			}
+		}
+
+		/* Try to coalesce backward. */
+		extent_t *prev = extent_lock_from_addr(tsdn, rtree_ctx,
+		    extent_before_get(extent), inactive_only);
+		if (prev != NULL) {
+			bool can_coalesce = extent_can_coalesce(arena, eset,
+			    extent, prev);
+			extent_unlock(tsdn, prev);
+
+			if (can_coalesce && !extent_coalesce(tsdn, arena,
+			    ehooks, eset, extent, prev, false,
+			    growing_retained)) {
+				extent = prev;
+				if (eset->delay_coalesce) {
+					/* Do minimal coalescing. */
+					*coalesced = true;
+					return extent;
+				}
+				again = true;
+			}
+		}
+	} while (again);
+
+	if (eset->delay_coalesce) {
+		*coalesced = false;
+	}
+	return extent;
+}
+
+static extent_t *
+extent_try_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent, bool *coalesced,
+    bool growing_retained) {
+	return extent_try_coalesce_impl(tsdn, arena, ehooks, rtree_ctx, eset,
+	    extent, coalesced, growing_retained, false);
+}
+
+static extent_t *
+extent_try_coalesce_large(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent, bool *coalesced,
+    bool growing_retained) {
+	return extent_try_coalesce_impl(tsdn, arena, ehooks, rtree_ctx, eset,
+	    extent, coalesced, growing_retained, true);
+}
+
+/*
+ * Does the metadata management portions of putting an unused extent into the
+ * given eset_t (coalesces, deregisters slab interiors, the heap operations).
+ */
+static void
+extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
+    extent_t *extent, bool growing_retained) {
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+	assert((eset_state_get(eset) != extent_state_dirty &&
+	    eset_state_get(eset) != extent_state_muzzy) ||
+	    !extent_zeroed_get(extent));
+
+	malloc_mutex_lock(tsdn, &eset->mtx);
+
+	extent_szind_set(extent, SC_NSIZES);
+	if (extent_slab_get(extent)) {
+		extent_interior_deregister(tsdn, rtree_ctx, extent);
+		extent_slab_set(extent, false);
+	}
+
+	assert(rtree_extent_read(tsdn, &extents_rtree, rtree_ctx,
+	    (uintptr_t)extent_base_get(extent), true) == extent);
+
+	if (!eset->delay_coalesce) {
+		extent = extent_try_coalesce(tsdn, arena, ehooks, rtree_ctx,
+		    eset, extent, NULL, growing_retained);
+	} else if (extent_size_get(extent) >= SC_LARGE_MINCLASS) {
+		assert(eset == &arena->eset_dirty);
+		/* Always coalesce large eset eagerly. */
+		bool coalesced;
+		do {
+			assert(extent_state_get(extent) == extent_state_active);
+			extent = extent_try_coalesce_large(tsdn, arena, ehooks,
+			    rtree_ctx, eset, extent, &coalesced,
+			    growing_retained);
+		} while (coalesced);
+		if (extent_size_get(extent) >= oversize_threshold) {
+			/* Shortcut to purge the oversize extent eagerly. */
+			malloc_mutex_unlock(tsdn, &eset->mtx);
+			arena_decay_extent(tsdn, arena, ehooks, extent);
+			return;
+		}
+	}
+	extent_deactivate_locked(tsdn, arena, eset, extent);
+
+	malloc_mutex_unlock(tsdn, &eset->mtx);
+}
+
+void
+extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
+	ehooks_t *ehooks = arena_get_ehooks(arena);
+
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	if (extent_register(tsdn, extent)) {
+		extent_dalloc(tsdn, arena, extent);
+		return;
+	}
+	extent_dalloc_wrapper(tsdn, arena, ehooks, extent);
+}
+
+static bool
+extent_may_dalloc(void) {
+	/* With retain enabled, the default dalloc always fails. */
+	return !opt_retain;
+}
+
+static bool
+extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent) {
+	bool err;
+
+	assert(extent_base_get(extent) != NULL);
+	assert(extent_size_get(extent) != 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	extent_addr_set(extent, extent_base_get(extent));
+
+	/* Try to deallocate. */
+	err = ehooks_dalloc(tsdn, ehooks, extent_base_get(extent),
+	    extent_size_get(extent), extent_committed_get(extent),
+	    arena_ind_get(arena));
+
+	if (!err) {
+		extent_dalloc(tsdn, arena, extent);
+	}
+
+	return err;
+}
+
+void
+extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent) {
+	assert(extent_dumpable_get(extent));
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	/* Avoid calling the default extent_dalloc unless have to. */
+	if (!ehooks_are_default(ehooks) || extent_may_dalloc()) {
+		/*
+		 * Deregister first to avoid a race with other allocating
+		 * threads, and reregister if deallocation fails.
+		 */
+		extent_deregister(tsdn, extent);
+		if (!extent_dalloc_wrapper_try(tsdn, arena, ehooks, extent)) {
+			return;
+		}
+		extent_reregister(tsdn, extent);
+	}
+
+	/* Try to decommit; purge if that fails. */
+	bool zeroed;
+	if (!extent_committed_get(extent)) {
+		zeroed = true;
+	} else if (!extent_decommit_wrapper(tsdn, arena, ehooks, extent, 0,
+	    extent_size_get(extent))) {
+		zeroed = true;
+	} else if (!ehooks_purge_forced(tsdn, ehooks, extent_base_get(extent),
+	    extent_size_get(extent), 0, extent_size_get(extent),
+	    arena_ind_get(arena))) {
+		zeroed = true;
+	} else if (extent_state_get(extent) == extent_state_muzzy ||
+	    !ehooks_purge_lazy(tsdn, ehooks, extent_base_get(extent),
+	    extent_size_get(extent), 0, extent_size_get(extent),
+	    arena_ind_get(arena))) {
+		zeroed = false;
+	} else {
+		zeroed = false;
+	}
+	extent_zeroed_set(extent, zeroed);
+
+	if (config_prof) {
+		extent_gdump_sub(tsdn, extent);
+	}
+
+	extent_record(tsdn, arena, ehooks, &arena->eset_retained, extent,
+	    false);
+}
+
+void
+extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent) {
+	assert(extent_base_get(extent) != NULL);
+	assert(extent_size_get(extent) != 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	/* Deregister first to avoid a race with other allocating threads. */
+	extent_deregister(tsdn, extent);
+
+	extent_addr_set(extent, extent_base_get(extent));
+
+	/* Try to destroy; silently fail otherwise. */
+	ehooks_destroy(tsdn, ehooks, extent_base_get(extent),
+	    extent_size_get(extent), extent_committed_get(extent),
+	    arena_ind_get(arena));
+
+	extent_dalloc(tsdn, arena, extent);
+}
+
+static bool
+extent_commit_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length, bool growing_retained) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+	bool err = ehooks_commit(tsdn, ehooks, extent_base_get(extent),
+	    extent_size_get(extent), offset, length, arena_ind_get(arena));
+	extent_committed_set(extent, extent_committed_get(extent) || !err);
+	return err;
+}
+
+bool
+extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset,
+    size_t length) {
+	return extent_commit_impl(tsdn, arena, ehooks, extent, offset, length,
+	    false);
+}
+
+bool
+extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+	bool err = ehooks_decommit(tsdn, ehooks, extent_base_get(extent),
+	    extent_size_get(extent), offset, length, arena_ind_get(arena));
+	extent_committed_set(extent, extent_committed_get(extent) && err);
+	return err;
+}
+
+static bool
+extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length, bool growing_retained) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+	bool err = ehooks_purge_lazy(tsdn, ehooks, extent_base_get(extent),
+	    extent_size_get(extent), offset, length, arena_ind_get(arena));
+	return err;
+}
+
+bool
+extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length) {
+	return extent_purge_lazy_impl(tsdn, arena, ehooks, extent, offset,
+	    length, false);
+}
+
+static bool
+extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length, bool growing_retained) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+	bool err = ehooks_purge_forced(tsdn, ehooks, extent_base_get(extent),
+	    extent_size_get(extent), offset, length, arena_ind_get(arena));
+	return err;
+}
+
+bool
+extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t offset, size_t length) {
+	return extent_purge_forced_impl(tsdn, arena, ehooks, extent,
+	    offset, length, false);
+}
+
+/*
+ * Accepts the extent to split, and the characteristics of each side of the
+ * split.  The 'a' parameters go with the 'lead' of the resulting pair of
+ * extents (the lower addressed portion of the split), and the 'b' parameters go
+ * with the trail (the higher addressed portion).  This makes 'extent' the lead,
+ * and returns the trail (except in case of error).
+ */
+static extent_t *
+extent_split_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t size_a, szind_t szind_a, bool slab_a,
+    size_t size_b, szind_t szind_b, bool slab_b, bool growing_retained) {
+	assert(extent_size_get(extent) == size_a + size_b);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+
+	if (ehooks_split_will_fail(ehooks)) {
+		return NULL;
+	}
+
+	extent_t *trail = extent_alloc(tsdn, arena);
+	if (trail == NULL) {
+		goto label_error_a;
+	}
+
+	extent_init(trail, arena_ind_get(arena),
+	    (void *)((uintptr_t)extent_base_get(extent) + size_a), size_b,
+	    slab_b, szind_b, extent_sn_get(extent), extent_state_get(extent),
+	    extent_zeroed_get(extent), extent_committed_get(extent),
+	    extent_dumpable_get(extent), EXTENT_NOT_HEAD);
+
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+	rtree_leaf_elm_t *lead_elm_a, *lead_elm_b;
+	{
+		extent_t lead;
+
+		extent_init(&lead, arena_ind_get(arena),
+		    extent_addr_get(extent), size_a,
+		    slab_a, szind_a, extent_sn_get(extent),
+		    extent_state_get(extent), extent_zeroed_get(extent),
+		    extent_committed_get(extent), extent_dumpable_get(extent),
+		    EXTENT_NOT_HEAD);
+
+		extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, &lead, false,
+		    true, &lead_elm_a, &lead_elm_b);
+	}
+	rtree_leaf_elm_t *trail_elm_a, *trail_elm_b;
+	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, trail, false, true,
+	    &trail_elm_a, &trail_elm_b);
+
+	if (lead_elm_a == NULL || lead_elm_b == NULL || trail_elm_a == NULL
+	    || trail_elm_b == NULL) {
+		goto label_error_b;
+	}
+
+	extent_lock2(tsdn, extent, trail);
+
+	bool err = ehooks_split(tsdn, ehooks, extent_base_get(extent),
+	    size_a + size_b, size_a, size_b, extent_committed_get(extent),
+	    arena_ind_get(arena));
+
+	if (err) {
+		goto label_error_c;
+	}
+
+	extent_size_set(extent, size_a);
+	extent_szind_set(extent, szind_a);
+
+	extent_rtree_write_acquired(tsdn, lead_elm_a, lead_elm_b, extent,
+	    szind_a, slab_a);
+	extent_rtree_write_acquired(tsdn, trail_elm_a, trail_elm_b, trail,
+	    szind_b, slab_b);
+
+	extent_unlock2(tsdn, extent, trail);
+
+	return trail;
+label_error_c:
+	extent_unlock2(tsdn, extent, trail);
+label_error_b:
+	extent_dalloc(tsdn, arena, trail);
+label_error_a:
+	return NULL;
+}
+
+extent_t *
+extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *extent, size_t size_a, szind_t szind_a, bool slab_a,
+    size_t size_b, szind_t szind_b, bool slab_b) {
+	return extent_split_impl(tsdn, arena, ehooks, extent, size_a, szind_a,
+	    slab_a, size_b, szind_b, slab_b, false);
+}
+
+/*
+ * Returns true if the given extents can't be merged because of their head bit
+ * settings.  Assumes the second extent has the higher address.
+ */
+bool
+extent_head_no_merge(extent_t *a, extent_t *b) {
+	assert(extent_base_get(a) < extent_base_get(b));
+	/*
+	 * When coalesce is not always allowed (Windows), only merge extents
+	 * from the same VirtualAlloc region under opt.retain (in which case
+	 * MEM_DECOMMIT is utilized for purging).
+	 */
+	if (maps_coalesce) {
+		return false;
+	}
+	if (!opt_retain) {
+		return true;
+	}
+	/* If b is a head extent, disallow the cross-region merge. */
+	if (extent_is_head_get(b)) {
+		/*
+		 * Additionally, sn should not overflow with retain; sanity
+		 * check that different regions have unique sn.
+		 */
+		assert(extent_sn_comp(a, b) != 0);
+		return true;
+	}
+	assert(extent_sn_comp(a, b) == 0);
+
+	return false;
+}
+
+static bool
+extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, extent_t *a,
+    extent_t *b, bool growing_retained) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+	assert(extent_base_get(a) < extent_base_get(b));
+
+	if (ehooks_merge_will_fail(ehooks) || extent_head_no_merge(a, b)) {
+		return true;
+	}
+
+	bool err = ehooks_merge(tsdn, ehooks, extent_base_get(a),
+	    extent_size_get(a), extent_base_get(b), extent_size_get(b),
+	    extent_committed_get(a), arena_ind_get(arena));
+
+	if (err) {
+		return true;
+	}
+
+	/*
+	 * The rtree writes must happen while all the relevant elements are
+	 * owned, so the following code uses decomposed helper functions rather
+	 * than extent_{,de}register() to do things in the right order.
+	 */
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+	rtree_leaf_elm_t *a_elm_a, *a_elm_b, *b_elm_a, *b_elm_b;
+	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, a, true, false, &a_elm_a,
+	    &a_elm_b);
+	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, b, true, false, &b_elm_a,
+	    &b_elm_b);
+
+	extent_lock2(tsdn, a, b);
+
+	if (a_elm_b != NULL) {
+		rtree_leaf_elm_write(tsdn, &extents_rtree, a_elm_b, NULL,
+		    SC_NSIZES, false);
+	}
+	if (b_elm_b != NULL) {
+		rtree_leaf_elm_write(tsdn, &extents_rtree, b_elm_a, NULL,
+		    SC_NSIZES, false);
+	} else {
+		b_elm_b = b_elm_a;
+	}
+
+	extent_size_set(a, extent_size_get(a) + extent_size_get(b));
+	extent_szind_set(a, SC_NSIZES);
+	extent_sn_set(a, (extent_sn_get(a) < extent_sn_get(b)) ?
+	    extent_sn_get(a) : extent_sn_get(b));
+	extent_zeroed_set(a, extent_zeroed_get(a) && extent_zeroed_get(b));
+
+	extent_rtree_write_acquired(tsdn, a_elm_a, b_elm_b, a, SC_NSIZES,
+	    false);
+
+	extent_unlock2(tsdn, a, b);
+
+	/*
+	 * If we got here, we merged the extents; so they must be from the same
+	 * arena (i.e. this one).
+	 */
+	assert(extent_arena_ind_get(b) == arena_ind_get(arena));
+	extent_dalloc(tsdn, arena, b);
+
+	return false;
+}
+
+bool
+extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    extent_t *a, extent_t *b) {
+	return extent_merge_impl(tsdn, arena, ehooks, a, b, false);
+}
+
+bool
+extent_boot(void) {
+	if (rtree_new(&extents_rtree, true)) {
+		return true;
+	}
+
+	if (mutex_pool_init(&extent_mutex_pool, "extent_mutex_pool",
+	    WITNESS_RANK_EXTENT_POOL)) {
+		return true;
+	}
+
+	if (have_dss) {
+		extent_dss_boot();
+	}
+
+	return false;
+}
+
+void
+extent_util_stats_get(tsdn_t *tsdn, const void *ptr,
+    size_t *nfree, size_t *nregs, size_t *size) {
+	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL);
+
+	const extent_t *extent = iealloc(tsdn, ptr);
+	if (unlikely(extent == NULL)) {
+		*nfree = *nregs = *size = 0;
+		return;
+	}
+
+	*size = extent_size_get(extent);
+	if (!extent_slab_get(extent)) {
+		*nfree = 0;
+		*nregs = 1;
+	} else {
+		*nfree = extent_nfree_get(extent);
+		*nregs = bin_infos[extent_szind_get(extent)].nregs;
+		assert(*nfree <= *nregs);
+		assert(*nfree * extent_usize_get(extent) <= *size);
+	}
+}
+
+void
+extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
+    size_t *nfree, size_t *nregs, size_t *size,
+    size_t *bin_nfree, size_t *bin_nregs, void **slabcur_addr) {
+	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL
+	    && bin_nfree != NULL && bin_nregs != NULL && slabcur_addr != NULL);
+
+	const extent_t *extent = iealloc(tsdn, ptr);
+	if (unlikely(extent == NULL)) {
+		*nfree = *nregs = *size = *bin_nfree = *bin_nregs = 0;
+		*slabcur_addr = NULL;
+		return;
+	}
+
+	*size = extent_size_get(extent);
+	if (!extent_slab_get(extent)) {
+		*nfree = *bin_nfree = *bin_nregs = 0;
+		*nregs = 1;
+		*slabcur_addr = NULL;
+		return;
+	}
+
+	*nfree = extent_nfree_get(extent);
+	const szind_t szind = extent_szind_get(extent);
+	*nregs = bin_infos[szind].nregs;
+	assert(*nfree <= *nregs);
+	assert(*nfree * extent_usize_get(extent) <= *size);
+
+	const arena_t *arena = (arena_t *)atomic_load_p(
+	    &arenas[extent_arena_ind_get(extent)], ATOMIC_RELAXED);
+	assert(arena != NULL);
+	const unsigned binshard = extent_binshard_get(extent);
+	bin_t *bin = &arena->bins[szind].bin_shards[binshard];
+
+	malloc_mutex_lock(tsdn, &bin->lock);
+	if (config_stats) {
+		*bin_nregs = *nregs * bin->stats.curslabs;
+		assert(*bin_nregs >= bin->stats.curregs);
+		*bin_nfree = *bin_nregs - bin->stats.curregs;
+	} else {
+		*bin_nfree = *bin_nregs = 0;
+	}
+	extent_t *slab;
+	if (bin->slabcur != NULL) {
+		slab = bin->slabcur;
+	} else {
+		slab = extent_heap_first(&bin->slabs_nonfull);
+	}
+	*slabcur_addr = slab != NULL ? extent_addr_get(slab) : NULL;
+	malloc_mutex_unlock(tsdn, &bin->lock);
+}

From 403f2d1664acfae920e8e6ce51e2695d826a0628 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 4 Dec 2019 09:44:59 -0800
Subject: [PATCH 1460/2608] Extents: Split out introspection functionality.

This isn't really part of the core extent allocation facilities.  Especially as
this module grows, having it in its own place may come in handy.
---
 Makefile.in                                   |  3 +-
 include/jemalloc/internal/extent2.h           | 28 -------
 include/jemalloc/internal/inspect.h           | 40 ++++++++++
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |  1 +
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |  1 +
 src/ctl.c                                     | 20 ++---
 src/extent2.c                                 | 75 ------------------
 src/inspect.c                                 | 77 +++++++++++++++++++
 test/unit/{extent_util.c => inspect.c}        |  0
 9 files changed, 132 insertions(+), 113 deletions(-)
 create mode 100644 include/jemalloc/internal/inspect.h
 create mode 100644 src/inspect.c
 rename test/unit/{extent_util.c => inspect.c} (100%)

diff --git a/Makefile.in b/Makefile.in
index 29977bc0..cab4e1f1 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -112,6 +112,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/extent_mmap.c \
 	$(srcroot)src/hash.c \
 	$(srcroot)src/hook.c \
+	$(srcroot)src/inspect.c \
 	$(srcroot)src/large.c \
 	$(srcroot)src/log.c \
 	$(srcroot)src/malloc_io.c \
@@ -189,11 +190,11 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/div.c \
 	$(srcroot)test/unit/emitter.c \
 	$(srcroot)test/unit/extent_quantize.c \
-	$(srcroot)test/unit/extent_util.c \
 	$(srcroot)test/unit/fork.c \
 	$(srcroot)test/unit/hash.c \
 	$(srcroot)test/unit/hook.c \
 	$(srcroot)test/unit/huge.c \
+	$(srcroot)test/unit/inspect.c \
 	$(srcroot)test/unit/junk.c \
 	$(srcroot)test/unit/junk_alloc.c \
 	$(srcroot)test/unit/junk_free.c \
diff --git a/include/jemalloc/internal/extent2.h b/include/jemalloc/internal/extent2.h
index 22035bba..7a18a613 100644
--- a/include/jemalloc/internal/extent2.h
+++ b/include/jemalloc/internal/extent2.h
@@ -17,28 +17,6 @@
  * particular reason.  This will also be changed, but much more immediately.
  */
 
-/*
- * The following two structs are for experimental purposes. See
- * experimental_utilization_query_ctl and
- * experimental_utilization_batch_query_ctl in src/ctl.c.
- */
-typedef struct extent_util_stats_s extent_util_stats_t;
-struct extent_util_stats_s {
-	size_t nfree;
-	size_t nregs;
-	size_t size;
-};
-
-typedef struct extent_util_stats_verbose_s extent_util_stats_verbose_t;
-struct extent_util_stats_verbose_s {
-	void *slabcur_addr;
-	size_t nfree;
-	size_t nregs;
-	size_t size;
-	size_t bin_nfree;
-	size_t bin_nregs;
-};
-
 /*
  * When reuse (and split) an active extent, (1U << opt_lg_extent_max_active_fit)
  * is the max ratio between the size of the active extent and the new extent.
@@ -83,10 +61,4 @@ bool extent_head_no_merge(extent_t *a, extent_t *b);
 
 bool extent_boot(void);
 
-void extent_util_stats_get(tsdn_t *tsdn, const void *ptr,
-    size_t *nfree, size_t *nregs, size_t *size);
-void extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
-    size_t *nfree, size_t *nregs, size_t *size,
-    size_t *bin_nfree, size_t *bin_nregs, void **slabcur_addr);
-
 #endif /* JEMALLOC_INTERNAL_EXTENT2_H */
diff --git a/include/jemalloc/internal/inspect.h b/include/jemalloc/internal/inspect.h
new file mode 100644
index 00000000..65fef51d
--- /dev/null
+++ b/include/jemalloc/internal/inspect.h
@@ -0,0 +1,40 @@
+#ifndef JEMALLOC_INTERNAL_INSPECT_H
+#define JEMALLOC_INTERNAL_INSPECT_H
+
+/*
+ * This module contains the heap introspection capabilities.  For now they are
+ * exposed purely through mallctl APIs in the experimental namespace, but this
+ * may change over time.
+ */
+
+/*
+ * The following two structs are for experimental purposes. See
+ * experimental_utilization_query_ctl and
+ * experimental_utilization_batch_query_ctl in src/ctl.c.
+ */
+typedef struct inspect_extent_util_stats_s inspect_extent_util_stats_t;
+struct inspect_extent_util_stats_s {
+	size_t nfree;
+	size_t nregs;
+	size_t size;
+};
+
+typedef struct inspect_extent_util_stats_verbose_s
+    inspect_extent_util_stats_verbose_t;
+
+struct inspect_extent_util_stats_verbose_s {
+	void *slabcur_addr;
+	size_t nfree;
+	size_t nregs;
+	size_t size;
+	size_t bin_nfree;
+	size_t bin_nregs;
+};
+
+void inspect_extent_util_stats_get(tsdn_t *tsdn, const void *ptr,
+    size_t *nfree, size_t *nregs, size_t *size);
+void inspect_extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
+    size_t *nfree, size_t *nregs, size_t *size,
+    size_t *bin_nfree, size_t *bin_nregs, void **slabcur_addr);
+
+#endif /* JEMALLOC_INTERNAL_INSPECT_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 4118b911..f5069d38 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -52,6 +52,7 @@
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
     <ClCompile Include="..\..\..\..\src\hash.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
+    <ClCompile Include="..\..\..\..\src\inspect.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
     <ClCompile Include="..\..\..\..\src\large.c" />
     <ClCompile Include="..\..\..\..\src\log.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index ed3b5248..19e72d4f 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -52,6 +52,7 @@
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
     <ClCompile Include="..\..\..\..\src\hash.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
+    <ClCompile Include="..\..\..\..\src\inspect.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
     <ClCompile Include="..\..\..\..\src\large.c" />
     <ClCompile Include="..\..\..\..\src\log.c" />
diff --git a/src/ctl.c b/src/ctl.c
index a9982ca3..4aa4af8f 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -6,6 +6,7 @@
 #include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/extent_mmap.h"
+#include "jemalloc/internal/inspect.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/sc.h"
@@ -3258,11 +3259,11 @@ experimental_utilization_query_ctl(tsd_t *tsd, const size_t *mib,
     size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 
-	assert(sizeof(extent_util_stats_verbose_t)
+	assert(sizeof(inspect_extent_util_stats_verbose_t)
 	    == sizeof(void *) + sizeof(size_t) * 5);
 
 	if (oldp == NULL || oldlenp == NULL
-	    || *oldlenp != sizeof(extent_util_stats_verbose_t)
+	    || *oldlenp != sizeof(inspect_extent_util_stats_verbose_t)
 	    || newp == NULL) {
 		ret = EINVAL;
 		goto label_return;
@@ -3270,9 +3271,9 @@ experimental_utilization_query_ctl(tsd_t *tsd, const size_t *mib,
 
 	void *ptr = NULL;
 	WRITE(ptr, void *);
-	extent_util_stats_verbose_t *util_stats
-	    = (extent_util_stats_verbose_t *)oldp;
-	extent_util_stats_verbose_get(tsd_tsdn(tsd), ptr,
+	inspect_extent_util_stats_verbose_t *util_stats
+	    = (inspect_extent_util_stats_verbose_t *)oldp;
+	inspect_extent_util_stats_verbose_get(tsd_tsdn(tsd), ptr,
 	    &util_stats->nfree, &util_stats->nregs, &util_stats->size,
 	    &util_stats->bin_nfree, &util_stats->bin_nregs,
 	    &util_stats->slabcur_addr);
@@ -3383,21 +3384,22 @@ experimental_utilization_batch_query_ctl(tsd_t *tsd, const size_t *mib,
     size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 
-	assert(sizeof(extent_util_stats_t) == sizeof(size_t) * 3);
+	assert(sizeof(inspect_extent_util_stats_t) == sizeof(size_t) * 3);
 
 	const size_t len = newlen / sizeof(const void *);
 	if (oldp == NULL || oldlenp == NULL || newp == NULL || newlen == 0
 	    || newlen != len * sizeof(const void *)
-	    || *oldlenp != len * sizeof(extent_util_stats_t)) {
+	    || *oldlenp != len * sizeof(inspect_extent_util_stats_t)) {
 		ret = EINVAL;
 		goto label_return;
 	}
 
 	void **ptrs = (void **)newp;
-	extent_util_stats_t *util_stats = (extent_util_stats_t *)oldp;
+	inspect_extent_util_stats_t *util_stats =
+	    (inspect_extent_util_stats_t *)oldp;
 	size_t i;
 	for (i = 0; i < len; ++i) {
-		extent_util_stats_get(tsd_tsdn(tsd), ptrs[i],
+		inspect_extent_util_stats_get(tsd_tsdn(tsd), ptrs[i],
 		    &util_stats[i].nfree, &util_stats[i].nregs,
 		    &util_stats[i].size);
 	}
diff --git a/src/extent2.c b/src/extent2.c
index 4865beb1..0b097160 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -1661,78 +1661,3 @@ extent_boot(void) {
 
 	return false;
 }
-
-void
-extent_util_stats_get(tsdn_t *tsdn, const void *ptr,
-    size_t *nfree, size_t *nregs, size_t *size) {
-	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL);
-
-	const extent_t *extent = iealloc(tsdn, ptr);
-	if (unlikely(extent == NULL)) {
-		*nfree = *nregs = *size = 0;
-		return;
-	}
-
-	*size = extent_size_get(extent);
-	if (!extent_slab_get(extent)) {
-		*nfree = 0;
-		*nregs = 1;
-	} else {
-		*nfree = extent_nfree_get(extent);
-		*nregs = bin_infos[extent_szind_get(extent)].nregs;
-		assert(*nfree <= *nregs);
-		assert(*nfree * extent_usize_get(extent) <= *size);
-	}
-}
-
-void
-extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
-    size_t *nfree, size_t *nregs, size_t *size,
-    size_t *bin_nfree, size_t *bin_nregs, void **slabcur_addr) {
-	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL
-	    && bin_nfree != NULL && bin_nregs != NULL && slabcur_addr != NULL);
-
-	const extent_t *extent = iealloc(tsdn, ptr);
-	if (unlikely(extent == NULL)) {
-		*nfree = *nregs = *size = *bin_nfree = *bin_nregs = 0;
-		*slabcur_addr = NULL;
-		return;
-	}
-
-	*size = extent_size_get(extent);
-	if (!extent_slab_get(extent)) {
-		*nfree = *bin_nfree = *bin_nregs = 0;
-		*nregs = 1;
-		*slabcur_addr = NULL;
-		return;
-	}
-
-	*nfree = extent_nfree_get(extent);
-	const szind_t szind = extent_szind_get(extent);
-	*nregs = bin_infos[szind].nregs;
-	assert(*nfree <= *nregs);
-	assert(*nfree * extent_usize_get(extent) <= *size);
-
-	const arena_t *arena = (arena_t *)atomic_load_p(
-	    &arenas[extent_arena_ind_get(extent)], ATOMIC_RELAXED);
-	assert(arena != NULL);
-	const unsigned binshard = extent_binshard_get(extent);
-	bin_t *bin = &arena->bins[szind].bin_shards[binshard];
-
-	malloc_mutex_lock(tsdn, &bin->lock);
-	if (config_stats) {
-		*bin_nregs = *nregs * bin->stats.curslabs;
-		assert(*bin_nregs >= bin->stats.curregs);
-		*bin_nfree = *bin_nregs - bin->stats.curregs;
-	} else {
-		*bin_nfree = *bin_nregs = 0;
-	}
-	extent_t *slab;
-	if (bin->slabcur != NULL) {
-		slab = bin->slabcur;
-	} else {
-		slab = extent_heap_first(&bin->slabs_nonfull);
-	}
-	*slabcur_addr = slab != NULL ? extent_addr_get(slab) : NULL;
-	malloc_mutex_unlock(tsdn, &bin->lock);
-}
diff --git a/src/inspect.c b/src/inspect.c
new file mode 100644
index 00000000..435016e6
--- /dev/null
+++ b/src/inspect.c
@@ -0,0 +1,77 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+void
+inspect_extent_util_stats_get(tsdn_t *tsdn, const void *ptr, size_t *nfree,
+    size_t *nregs, size_t *size) {
+	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL);
+
+	const extent_t *extent = iealloc(tsdn, ptr);
+	if (unlikely(extent == NULL)) {
+		*nfree = *nregs = *size = 0;
+		return;
+	}
+
+	*size = extent_size_get(extent);
+	if (!extent_slab_get(extent)) {
+		*nfree = 0;
+		*nregs = 1;
+	} else {
+		*nfree = extent_nfree_get(extent);
+		*nregs = bin_infos[extent_szind_get(extent)].nregs;
+		assert(*nfree <= *nregs);
+		assert(*nfree * extent_usize_get(extent) <= *size);
+	}
+}
+
+void
+inspect_extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
+    size_t *nfree, size_t *nregs, size_t *size, size_t *bin_nfree,
+    size_t *bin_nregs, void **slabcur_addr) {
+	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL
+	    && bin_nfree != NULL && bin_nregs != NULL && slabcur_addr != NULL);
+
+	const extent_t *extent = iealloc(tsdn, ptr);
+	if (unlikely(extent == NULL)) {
+		*nfree = *nregs = *size = *bin_nfree = *bin_nregs = 0;
+		*slabcur_addr = NULL;
+		return;
+	}
+
+	*size = extent_size_get(extent);
+	if (!extent_slab_get(extent)) {
+		*nfree = *bin_nfree = *bin_nregs = 0;
+		*nregs = 1;
+		*slabcur_addr = NULL;
+		return;
+	}
+
+	*nfree = extent_nfree_get(extent);
+	const szind_t szind = extent_szind_get(extent);
+	*nregs = bin_infos[szind].nregs;
+	assert(*nfree <= *nregs);
+	assert(*nfree * extent_usize_get(extent) <= *size);
+
+	const arena_t *arena = (arena_t *)atomic_load_p(
+	    &arenas[extent_arena_ind_get(extent)], ATOMIC_RELAXED);
+	assert(arena != NULL);
+	const unsigned binshard = extent_binshard_get(extent);
+	bin_t *bin = &arena->bins[szind].bin_shards[binshard];
+
+	malloc_mutex_lock(tsdn, &bin->lock);
+	if (config_stats) {
+		*bin_nregs = *nregs * bin->stats.curslabs;
+		assert(*bin_nregs >= bin->stats.curregs);
+		*bin_nfree = *bin_nregs - bin->stats.curregs;
+	} else {
+		*bin_nfree = *bin_nregs = 0;
+	}
+	extent_t *slab;
+	if (bin->slabcur != NULL) {
+		slab = bin->slabcur;
+	} else {
+		slab = extent_heap_first(&bin->slabs_nonfull);
+	}
+	*slabcur_addr = slab != NULL ? extent_addr_get(slab) : NULL;
+	malloc_mutex_unlock(tsdn, &bin->lock);
+}
diff --git a/test/unit/extent_util.c b/test/unit/inspect.c
similarity index 100%
rename from test/unit/extent_util.c
rename to test/unit/inspect.c

From ebbb973271e26175c832a6ec5dfc515e7473a9af Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 4 Dec 2019 14:05:14 -0800
Subject: [PATCH 1461/2608] Base: Remove some unnecessary reentrancy guards.

The ehooks module will now call these if necessary.
---
 src/base.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/src/base.c b/src/base.c
index a1b45d06..79736cdc 100644
--- a/src/base.c
+++ b/src/base.c
@@ -79,25 +79,21 @@ base_unmap(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, void *addr,
 		/* Nothing worked.  This should never happen. */
 		not_reached();
 	} else {
-		tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
-		pre_reentrancy(tsd, NULL);
 		if (!ehooks_dalloc(tsdn, ehooks, addr, size, true, ind)) {
-			goto label_post_reentrancy;
+			goto label_done;
 		}
 		if (!ehooks_decommit(tsdn, ehooks, addr, size, 0, size, ind)) {
-			goto label_post_reentrancy;
+			goto label_done;
 		}
 		if (!ehooks_purge_forced(tsdn, ehooks, addr, size, 0, size,
 		    ind)) {
-			goto label_post_reentrancy;
+			goto label_done;
 		}
 		if (!ehooks_purge_lazy(tsdn, ehooks, addr, size, 0, size,
 		    ind)) {
-			goto label_post_reentrancy;
+			goto label_done;
 		}
 		/* Nothing worked.  That's the application's problem. */
-	label_post_reentrancy:
-		post_reentrancy(tsd);
 	}
 label_done:
 	if (metadata_thp_madvise()) {

From d0f187ad3b2ea2e457a05217da4be23db5d915a5 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 4 Dec 2019 14:42:10 -0800
Subject: [PATCH 1462/2608] Arena: Loosen arena_may_have_muzzy restrictions.

If there are custom extent hooks, pages_can_purge_lazy is not necessarily the
right guard.  We could check ehooks_are_default too, but the case where
purge_lazy is unsupported is rare and getting rarer.  Just checking the decay
interval captures most of the benefit.
---
 src/arena.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arena.c b/src/arena.c
index 214a97c2..2d46b9e6 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -420,7 +420,7 @@ arena_large_ralloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t oldusize,
 
 static bool
 arena_may_have_muzzy(arena_t *arena) {
-	return (pages_can_purge_lazy && (arena_muzzy_decay_ms_get(arena) != 0));
+	return arena_muzzy_decay_ms_get(arena) != 0;
 }
 
 extent_t *

From 4b2e5ee8b9989a84a5c3665bada0973ab351d3d9 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 4 Dec 2019 17:55:24 -0800
Subject: [PATCH 1463/2608] Ehooks: Add a "zero" ehook.

This is the first API expansion.  It lets the hooks pick where and how to purge
within themselves.
---
 include/jemalloc/internal/ehooks.h | 18 ++++++++++++++
 src/ehooks.c                       | 17 +++++++++++++
 src/extent2.c                      | 38 ++++++++++--------------------
 3 files changed, 48 insertions(+), 25 deletions(-)

diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index 97c3f442..734cd181 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -43,6 +43,7 @@ bool ehooks_default_purge_forced_impl(void *addr, size_t offset, size_t length);
 #endif
 bool ehooks_default_split_impl();
 bool ehooks_default_merge_impl(void *addr_a, void *addr_b);
+void ehooks_default_zero_impl(void *addr, size_t size);
 
 /*
  * We don't officially support reentrancy from wtihin the extent hooks.  But
@@ -261,4 +262,21 @@ ehooks_merge(tsdn_t *tsdn, ehooks_t *ehooks, void *addr_a, size_t size_a,
 	}
 }
 
+static inline void
+ehooks_zero(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
+    unsigned arena_ind) {
+	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+	if (extent_hooks == &ehooks_default_extent_hooks) {
+		ehooks_default_zero_impl(addr, size);
+	} else {
+		/*
+		 * It would be correct to try using the user-provided purge
+		 * hooks (since they are required to have zeroed the extent if
+		 * they indicate success), but we don't necessarily know their
+		 * cost.  We'll be conservative and use memset.
+		 */
+		memset(addr, 0, size);
+	}
+}
+
 #endif /* JEMALLOC_INTERNAL_EHOOKS_H */
diff --git a/src/ehooks.c b/src/ehooks.c
index d7d1613f..25aef1c3 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -209,6 +209,23 @@ ehooks_default_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
 	return ehooks_default_merge_impl(addr_a, addr_b);
 }
 
+void
+ehooks_default_zero_impl(void *addr, size_t size) {
+	/*
+	 * By default, we try to zero out memory using OS-provided demand-zeroed
+	 * pages.  If the user has specifically requested hugepages, though, we
+	 * don't want to purge in the middle of a hugepage (which would break it
+	 * up), so we act conservatively and use memset.
+	 */
+	bool needs_memset = true;
+	if (opt_thp != thp_mode_always) {
+		needs_memset = pages_purge_forced(addr, size);
+	}
+	if (needs_memset) {
+		memset(addr, 0, size);
+	}
+}
+
 const extent_hooks_t ehooks_default_extent_hooks = {
 	ehooks_default_alloc,
 	ehooks_default_dalloc,
diff --git a/src/extent2.c b/src/extent2.c
index 0b097160..55f72dff 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -758,17 +758,6 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	unreachable();
 }
 
-static bool
-extent_need_manual_zero(arena_t *arena) {
-	/*
-	 * Need to manually zero the extent on repopulating if either; 1) non
-	 * default extent hooks installed (in which case the purge semantics may
-	 * change); or 2) transparent huge pages enabled.
-	 */
-	return (!ehooks_are_default(arena_get_ehooks(arena)) ||
-		(opt_thp == thp_mode_always));
-}
-
 /*
  * Tries to satisfy the given allocation request by reusing one of the extents
  * in the given eset_t.
@@ -807,9 +796,6 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 			    growing_retained);
 			return NULL;
 		}
-		if (!extent_need_manual_zero(arena)) {
-			extent_zeroed_set(extent, true);
-		}
 	}
 
 	if (extent_committed_get(extent)) {
@@ -832,11 +818,10 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 		void *addr = extent_base_get(extent);
 		if (!extent_zeroed_get(extent)) {
 			size_t size = extent_size_get(extent);
-			if (extent_need_manual_zero(arena) ||
-			    pages_purge_forced(addr, size)) {
-				memset(addr, 0, size);
-			}
-		} else if (config_debug) {
+			ehooks_zero(tsdn, ehooks, addr, size,
+			    arena_ind_get(arena));
+		}
+		if (config_debug) {
 			size_t *p = (size_t *)(uintptr_t)addr;
 			/* Check the first page only. */
 			for (size_t i = 0; i < PAGE / sizeof(size_t); i++) {
@@ -960,8 +945,14 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			    &arena->eset_retained, extent, true);
 			goto label_err;
 		}
-		if (!extent_need_manual_zero(arena)) {
-			extent_zeroed_set(extent, true);
+		/* A successful commit should return zeroed memory. */
+		if (config_debug) {
+			void *addr = extent_addr_get(extent);
+			size_t *p = (size_t *)(uintptr_t)addr;
+			/* Check the first page only. */
+			for (size_t i = 0; i < PAGE / sizeof(size_t); i++) {
+				assert(p[i] == 0);
+			}
 		}
 	}
 
@@ -996,10 +987,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	if (*zero && !extent_zeroed_get(extent)) {
 		void *addr = extent_base_get(extent);
 		size_t size = extent_size_get(extent);
-		if (extent_need_manual_zero(arena) ||
-		    pages_purge_forced(addr, size)) {
-			memset(addr, 0, size);
-		}
+		ehooks_zero(tsdn, ehooks, addr, size, arena_ind_get(arena));
 	}
 
 	return extent;

From a738a66b5c43849eb90deef11b391641ce382aa0 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 5 Dec 2019 17:09:44 -0800
Subject: [PATCH 1464/2608] Ehooks: Add some debug zero and addr checks.

These help make sure that the ehooks return properly zeroed memory when required
to.
---
 include/jemalloc/internal/ehooks.h | 75 ++++++++++++++++++++++++++----
 src/extent2.c                      |  7 ---
 2 files changed, 66 insertions(+), 16 deletions(-)

diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index 734cd181..c046cd13 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -106,18 +106,63 @@ ehooks_merge_will_fail(ehooks_t *ehooks) {
 	return ehooks_get_extent_hooks_ptr(ehooks)->merge == NULL;
 }
 
+/*
+ * Some hooks are required to return zeroed memory in certain situations.  In
+ * debug mode, we do some heuristic checks that they did what they were supposed
+ * to.
+ *
+ * This isn't really ehooks-specific (i.e. anyone can check for zeroed memory).
+ * But incorrect zero information indicates an ehook bug.
+ */
+static inline void
+ehooks_debug_zero_check(void *addr, size_t size) {
+	assert(((uintptr_t)addr & PAGE_MASK) == 0);
+	assert((size & PAGE_MASK) == 0);
+	assert(size > 0);
+	if (config_debug) {
+		/* Check the whole first page. */
+		size_t *p = (size_t *)addr;
+		for (size_t i = 0; i < PAGE / sizeof(size_t); i++) {
+			assert(p[i] == 0);
+		}
+		/*
+		 * And 4 spots within.  There's a tradeoff here; the larger
+		 * this number, the more likely it is that we'll catch a bug
+		 * where ehooks return a sparsely non-zero range.  But
+		 * increasing the number of checks also increases the number of
+		 * page faults in debug mode.  FreeBSD does much of their
+		 * day-to-day development work in debug mode, so we don't want
+		 * even the debug builds to be too slow.
+		 */
+		const size_t nchecks = 4;
+		assert(PAGE >= sizeof(size_t) * nchecks);
+		for (size_t i = 0; i < nchecks; ++i) {
+			assert(p[i * (size / sizeof(size_t) / nchecks)] == 0);
+		}
+	}
+}
+
+
 static inline void *
 ehooks_alloc(tsdn_t *tsdn, ehooks_t *ehooks, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit, unsigned arena_ind) {
+	bool orig_zero = *zero;
+	void *ret;
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 	if (extent_hooks == &ehooks_default_extent_hooks) {
-		return ehooks_default_alloc_impl(tsdn, new_addr, size,
+		ret = ehooks_default_alloc_impl(tsdn, new_addr, size,
 		    alignment, zero, commit, arena_ind);
+	} else {
+		ehooks_pre_reentrancy(tsdn);
+		ret = extent_hooks->alloc(extent_hooks, new_addr, size,
+		    alignment, zero, commit, arena_ind);
+		ehooks_post_reentrancy(tsdn);
+	}
+	assert(new_addr == NULL || ret == NULL || new_addr == ret);
+	assert(!orig_zero || *zero);
+	if (*zero && ret != NULL) {
+		ehooks_debug_zero_check(ret, size);
 	}
-	ehooks_pre_reentrancy(tsdn);
-	void *ret = extent_hooks->alloc(extent_hooks, new_addr, size, alignment,
-	    zero, commit, arena_ind);
-	ehooks_post_reentrancy(tsdn);
 	return ret;
 }
 
@@ -158,17 +203,21 @@ static inline bool
 ehooks_commit(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+	bool err;
 	if (extent_hooks == &ehooks_default_extent_hooks) {
-		return ehooks_default_commit_impl(addr, offset, length);
+		err = ehooks_default_commit_impl(addr, offset, length);
 	} else if (extent_hooks->commit == NULL) {
-		return true;
+		err = true;
 	} else {
 		ehooks_pre_reentrancy(tsdn);
-		bool err = extent_hooks->commit(extent_hooks, addr, size,
+		err = extent_hooks->commit(extent_hooks, addr, size,
 		    offset, length, arena_ind);
 		ehooks_post_reentrancy(tsdn);
-		return err;
 	}
+	if (!err) {
+		ehooks_debug_zero_check(addr, size);
+	}
+	return err;
 }
 
 static inline bool
@@ -212,6 +261,14 @@ static inline bool
 ehooks_purge_forced(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+	/*
+	 * It would be correct to have a ehooks_debug_zero_check call at the end
+	 * of this function; purge_forced is required to zero.  But checking
+	 * would touch the page in question, which may have performance
+	 * consequences (imagine the hooks are using hugepages, with a global
+	 * zero page off).  Even in debug mode, it's usually a good idea to
+	 * avoid cases that can dramatically increase memory consumption.
+	 */
 #ifdef PAGES_CAN_PURGE_FORCED
 	if (extent_hooks == &ehooks_default_extent_hooks) {
 		return ehooks_default_purge_forced_impl(addr, offset, length);
diff --git a/src/extent2.c b/src/extent2.c
index 55f72dff..4001d178 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -821,13 +821,6 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 			ehooks_zero(tsdn, ehooks, addr, size,
 			    arena_ind_get(arena));
 		}
-		if (config_debug) {
-			size_t *p = (size_t *)(uintptr_t)addr;
-			/* Check the first page only. */
-			for (size_t i = 0; i < PAGE / sizeof(size_t); i++) {
-				assert(p[i] == 0);
-			}
-		}
 	}
 	return extent;
 }

From 865debda2276fee0257c90678bafd1bd2f73df6a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 9 Dec 2019 10:41:25 -0800
Subject: [PATCH 1465/2608] Rename extent.h -> edata.h.

This name is slightly pithier; a full-on rename will come shortly.
---
 Makefile.in                                     | 2 +-
 include/jemalloc/internal/base_structs.h        | 2 +-
 include/jemalloc/internal/bin.h                 | 2 +-
 include/jemalloc/internal/{extent.h => edata.h} | 0
 include/jemalloc/internal/eset.h                | 2 +-
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj  | 2 +-
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj  | 2 +-
 src/{extent.c => edata.c}                       | 0
 8 files changed, 6 insertions(+), 6 deletions(-)
 rename include/jemalloc/internal/{extent.h => edata.h} (100%)
 rename src/{extent.c => edata.c} (100%)

diff --git a/Makefile.in b/Makefile.in
index cab4e1f1..86a51ccb 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -104,9 +104,9 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/ckh.c \
 	$(srcroot)src/ctl.c \
 	$(srcroot)src/div.c \
+	$(srcroot)src/edata.c \
 	$(srcroot)src/ehooks.c \
 	$(srcroot)src/eset.c \
-	$(srcroot)src/extent.c \
 	$(srcroot)src/extent2.c \
 	$(srcroot)src/extent_dss.c \
 	$(srcroot)src/extent_mmap.c \
diff --git a/include/jemalloc/internal/base_structs.h b/include/jemalloc/internal/base_structs.h
index 68e7896e..10978928 100644
--- a/include/jemalloc/internal/base_structs.h
+++ b/include/jemalloc/internal/base_structs.h
@@ -2,7 +2,7 @@
 #define JEMALLOC_INTERNAL_BASE_STRUCTS_H
 
 #include "jemalloc/internal/ehooks.h"
-#include "jemalloc/internal/extent.h"
+#include "jemalloc/internal/edata.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/sc.h"
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index 92e8122d..8cc7fed0 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -3,7 +3,7 @@
 
 #include "jemalloc/internal/bin_stats.h"
 #include "jemalloc/internal/bin_types.h"
-#include "jemalloc/internal/extent.h"
+#include "jemalloc/internal/edata.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/sc.h"
 
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/edata.h
similarity index 100%
rename from include/jemalloc/internal/extent.h
rename to include/jemalloc/internal/edata.h
diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index fae64c8c..833f19c5 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -3,7 +3,7 @@
 
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/bitmap.h"
-#include "jemalloc/internal/extent.h"
+#include "jemalloc/internal/edata.h"
 #include "jemalloc/internal/mutex.h"
 
 /*
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index f5069d38..9dfc36d2 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -44,9 +44,9 @@
     <ClCompile Include="..\..\..\..\src\ckh.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
     <ClCompile Include="..\..\..\..\src\div.c" />
+    <ClCompile Include="..\..\..\..\src\edata.c" />
     <ClCompile Include="..\..\..\..\src\ehooks.c" />
     <ClCompile Include="..\..\..\..\src\eset.c" />
-    <ClCompile Include="..\..\..\..\src\extent.c" />
     <ClCompile Include="..\..\..\..\src\extent2.c" />
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 19e72d4f..0ec4d1ee 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -44,9 +44,9 @@
     <ClCompile Include="..\..\..\..\src\ckh.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
     <ClCompile Include="..\..\..\..\src\div.c" />
+    <ClCompile Include="..\..\..\..\src\edata.c" />
     <ClCompile Include="..\..\..\..\src\ehooks.c" />
     <ClCompile Include="..\..\..\..\src\eset.c" />
-    <ClCompile Include="..\..\..\..\src\extent.c" />
     <ClCompile Include="..\..\..\..\src\extent2.c" />
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
diff --git a/src/extent.c b/src/edata.c
similarity index 100%
rename from src/extent.c
rename to src/edata.c

From a7862df6169f27d9f347343ffef2bef3e167317c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 9 Dec 2019 14:36:45 -0800
Subject: [PATCH 1466/2608] Rename extent_t to edata_t.

This frees us up from the unfortunate extent/extent2 naming collision.
---
 include/jemalloc/internal/arena_externs.h     |  14 +-
 include/jemalloc/internal/arena_inlines_b.h   |  86 +-
 include/jemalloc/internal/arena_stats.h       |   4 +-
 include/jemalloc/internal/arena_structs.h     |  14 +-
 include/jemalloc/internal/base_externs.h      |   2 +-
 include/jemalloc/internal/base_structs.h      |   4 +-
 include/jemalloc/internal/bin.h               |   6 +-
 include/jemalloc/internal/bin_types.h         |   2 +-
 include/jemalloc/internal/edata.h             | 469 +++++-----
 include/jemalloc/internal/eset.h              |  10 +-
 include/jemalloc/internal/extent2.h           |  34 +-
 .../internal/jemalloc_internal_inlines_b.h    |   4 +-
 include/jemalloc/internal/large_externs.h     |  16 +-
 include/jemalloc/internal/rtree.h             |  72 +-
 include/jemalloc/internal/witness.h           |   2 +-
 src/arena.c                                   | 296 +++----
 src/base.c                                    |  76 +-
 src/bin.c                                     |   4 +-
 src/ctl.c                                     |  16 +-
 src/edata.c                                   |   6 +-
 src/ehooks.c                                  |   4 +-
 src/eset.c                                    |  68 +-
 src/extent2.c                                 | 835 +++++++++---------
 src/extent_dss.c                              |  10 +-
 src/inspect.c                                 |  38 +-
 src/large.c                                   | 144 ++-
 src/tcache.c                                  |  64 +-
 test/unit/arena_reset.c                       |  10 +-
 test/unit/base.c                              |   6 +-
 test/unit/binshard.c                          |  10 +-
 test/unit/rtree.c                             |  72 +-
 test/unit/slab.c                              |  10 +-
 32 files changed, 1200 insertions(+), 1208 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index b6b33ce3..608dda72 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -28,18 +28,18 @@ void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     bin_stats_data_t *bstats, arena_stats_large_t *lstats,
     arena_stats_extents_t *estats);
 void arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
-    ehooks_t *ehooks, extent_t *extent);
+    ehooks_t *ehooks, edata_t *edata);
 #ifdef JEMALLOC_JET
-size_t arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr);
+size_t arena_slab_regind(edata_t *slab, szind_t binind, const void *ptr);
 #endif
-extent_t *arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena,
+edata_t *arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena,
     size_t usize, size_t alignment, bool *zero);
 void arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena,
-    extent_t *extent);
+    edata_t *edata);
 void arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena,
-    extent_t *extent, size_t oldsize);
+    edata_t *edata, size_t oldsize);
 void arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena,
-    extent_t *extent, size_t oldsize);
+    edata_t *edata, size_t oldsize);
 ssize_t arena_dirty_decay_ms_get(arena_t *arena);
 bool arena_dirty_decay_ms_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_ms);
 ssize_t arena_muzzy_decay_ms_get(arena_t *arena);
@@ -64,7 +64,7 @@ void arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize);
 void arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
     bool slow_path);
 void arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    szind_t binind, extent_t *extent, void *ptr);
+    szind_t binind, edata_t *edata, void *ptr);
 void arena_dalloc_small(tsdn_t *tsdn, void *ptr);
 bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t extra, bool zero, size_t *newsize);
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 16da67e2..6dacab33 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -9,8 +9,8 @@
 #include "jemalloc/internal/ticker.h"
 
 static inline arena_t *
-arena_get_from_extent(extent_t *extent) {
-	return (arena_t *)atomic_load_p(&arenas[extent_arena_ind_get(extent)],
+arena_get_from_edata(edata_t *edata) {
+	return (arena_t *)atomic_load_p(&arenas[edata_arena_ind_get(edata)],
 	    ATOMIC_RELAXED);
 }
 
@@ -42,20 +42,20 @@ arena_prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
 	assert(ptr != NULL);
 	assert(prof_info != NULL);
 
-	const extent_t *extent;
+	const edata_t *edata;
 	bool is_slab;
 
 	/* Static check. */
 	if (alloc_ctx == NULL) {
-		extent = iealloc(tsd_tsdn(tsd), ptr);
-		is_slab = extent_slab_get(extent);
+		edata = iealloc(tsd_tsdn(tsd), ptr);
+		is_slab = edata_slab_get(edata);
 	} else if (!unlikely(is_slab = alloc_ctx->slab)) {
-		extent = iealloc(tsd_tsdn(tsd), ptr);
+		edata = iealloc(tsd_tsdn(tsd), ptr);
 	}
 
 	if (unlikely(!is_slab)) {
-		/* extent must have been initialized at this point. */
-		large_prof_info_get(extent, prof_info);
+		/* edata must have been initialized at this point. */
+		large_prof_info_get(edata, prof_info);
 	} else {
 		memset(prof_info, 0, sizeof(prof_info_t));
 		prof_info->alloc_tctx = (prof_tctx_t *)(uintptr_t)1U;
@@ -69,9 +69,9 @@ arena_prof_tctx_reset(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx) {
 
 	/* Static check. */
 	if (alloc_ctx == NULL) {
-		extent_t *extent = iealloc(tsd_tsdn(tsd), ptr);
-		if (unlikely(!extent_slab_get(extent))) {
-			large_prof_tctx_reset(extent);
+		edata_t *edata = iealloc(tsd_tsdn(tsd), ptr);
+		if (unlikely(!edata_slab_get(edata))) {
+			large_prof_tctx_reset(edata);
 		}
 	} else {
 		if (unlikely(!alloc_ctx->slab)) {
@@ -85,10 +85,10 @@ arena_prof_tctx_reset_sampled(tsd_t *tsd, const void *ptr) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	extent_t *extent = iealloc(tsd_tsdn(tsd), ptr);
-	assert(!extent_slab_get(extent));
+	edata_t *edata = iealloc(tsd_tsdn(tsd), ptr);
+	assert(!edata_slab_get(edata));
 
-	large_prof_tctx_reset(extent);
+	large_prof_tctx_reset(edata);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -96,9 +96,9 @@ arena_prof_info_set(tsd_t *tsd, const void *ptr, prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	extent_t *extent = iealloc(tsd_tsdn(tsd), ptr);
-	assert(!extent_slab_get(extent));
-	large_prof_info_set(extent, tctx);
+	edata_t *edata = iealloc(tsd_tsdn(tsd), ptr);
+	assert(!edata_slab_get(edata));
+	large_prof_info_set(edata, tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -130,9 +130,9 @@ arena_decay_tick(tsdn_t *tsdn, arena_t *arena) {
 /* Purge a single extent to retained / unmapped directly. */
 JEMALLOC_ALWAYS_INLINE void
 arena_decay_extent(tsdn_t *tsdn,arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent) {
-	size_t extent_size = extent_size_get(extent);
-	extent_dalloc_wrapper(tsdn, arena, ehooks, extent);
+    edata_t *edata) {
+	size_t extent_size = edata_size_get(edata);
+	extent_dalloc_wrapper(tsdn, arena, ehooks, edata);
 	if (config_stats) {
 		/* Update stats accordingly. */
 		arena_stats_lock(tsdn, &arena->stats);
@@ -169,7 +169,7 @@ arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
 
 JEMALLOC_ALWAYS_INLINE arena_t *
 arena_aalloc(tsdn_t *tsdn, const void *ptr) {
-	return (arena_t *)atomic_load_p(&arenas[extent_arena_ind_get(
+	return (arena_t *)atomic_load_p(&arenas[edata_arena_ind_get(
 	    iealloc(tsdn, ptr))], ATOMIC_RELAXED);
 }
 
@@ -201,19 +201,19 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	extent_t *extent;
+	edata_t *edata;
 	szind_t szind;
-	if (rtree_extent_szind_read(tsdn, &extents_rtree, rtree_ctx,
-	    (uintptr_t)ptr, false, &extent, &szind)) {
+	if (rtree_edata_szind_read(tsdn, &extents_rtree, rtree_ctx,
+	    (uintptr_t)ptr, false, &edata, &szind)) {
 		return 0;
 	}
 
-	if (extent == NULL) {
+	if (edata == NULL) {
 		return 0;
 	}
-	assert(extent_state_get(extent) == extent_state_active);
+	assert(edata_state_get(edata) == extent_state_active);
 	/* Only slab members should be looked up via interior pointers. */
-	assert(extent_addr_get(extent) == ptr || extent_slab_get(extent));
+	assert(edata_addr_get(edata) == ptr || edata_slab_get(edata));
 
 	assert(szind != SC_NSIZES);
 
@@ -225,8 +225,8 @@ arena_dalloc_large_no_tcache(tsdn_t *tsdn, void *ptr, szind_t szind) {
 	if (config_prof && unlikely(szind < SC_NBINS)) {
 		arena_dalloc_promoted(tsdn, ptr, NULL, true);
 	} else {
-		extent_t *extent = iealloc(tsdn, ptr);
-		large_dalloc(tsdn, extent);
+		edata_t *edata = iealloc(tsdn, ptr);
+		large_dalloc(tsdn, edata);
 	}
 }
 
@@ -243,11 +243,11 @@ arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
 	    true, &szind, &slab);
 
 	if (config_debug) {
-		extent_t *extent = rtree_extent_read(tsdn, &extents_rtree,
+		edata_t *edata = rtree_edata_read(tsdn, &extents_rtree,
 		    rtree_ctx, (uintptr_t)ptr, true);
-		assert(szind == extent_szind_get(extent));
+		assert(szind == edata_szind_get(edata));
 		assert(szind < SC_NSIZES);
-		assert(slab == extent_slab_get(extent));
+		assert(slab == edata_slab_get(edata));
 	}
 
 	if (likely(slab)) {
@@ -269,8 +269,8 @@ arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
 			    slow_path);
 		}
 	} else {
-		extent_t *extent = iealloc(tsdn, ptr);
-		large_dalloc(tsdn, extent);
+		edata_t *edata = iealloc(tsdn, ptr);
+		large_dalloc(tsdn, edata);
 	}
 }
 
@@ -300,11 +300,11 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 
 	if (config_debug) {
 		rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn));
-		extent_t *extent = rtree_extent_read(tsdn, &extents_rtree,
+		edata_t *edata = rtree_edata_read(tsdn, &extents_rtree,
 		    rtree_ctx, (uintptr_t)ptr, true);
-		assert(szind == extent_szind_get(extent));
+		assert(szind == edata_szind_get(edata));
 		assert(szind < SC_NSIZES);
-		assert(slab == extent_slab_get(extent));
+		assert(slab == edata_slab_get(edata));
 	}
 
 	if (likely(slab)) {
@@ -344,10 +344,10 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
 		assert((config_prof && opt_prof) || slab == (szind < SC_NBINS));
 
 		if (config_debug) {
-			extent_t *extent = rtree_extent_read(tsdn,
+			edata_t *edata = rtree_edata_read(tsdn,
 			    &extents_rtree, rtree_ctx, (uintptr_t)ptr, true);
-			assert(szind == extent_szind_get(extent));
-			assert(slab == extent_slab_get(extent));
+			assert(szind == edata_szind_get(edata));
+			assert(slab == edata_slab_get(edata));
 		}
 	}
 
@@ -401,10 +401,10 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn));
 		rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx,
 		    (uintptr_t)ptr, true, &szind, &slab);
-		extent_t *extent = rtree_extent_read(tsdn,
+		edata_t *edata = rtree_edata_read(tsdn,
 		    &extents_rtree, rtree_ctx, (uintptr_t)ptr, true);
-		assert(szind == extent_szind_get(extent));
-		assert(slab == extent_slab_get(extent));
+		assert(szind == edata_szind_get(edata));
+		assert(slab == edata_slab_get(edata));
 	}
 
 	if (likely(slab)) {
diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 23949ed9..4166705e 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -94,8 +94,8 @@ struct arena_stats_s {
 	 */
 	atomic_zu_t		retained; /* Derived. */
 
-	/* Number of extent_t structs allocated by base, but not being used. */
-	atomic_zu_t		extent_avail;
+	/* Number of edata_t structs allocated by base, but not being used. */
+	atomic_zu_t		edata_avail;
 
 	arena_stats_decay_t	decay_dirty;
 	arena_stats_decay_t	decay_muzzy;
diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index bc8c0394..aac620b9 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -144,7 +144,7 @@ struct arena_s {
 	 *
 	 * Synchronization: large_mtx.
 	 */
-	extent_list_t		large;
+	edata_list_t		large;
 	/* Synchronizes all large allocation/update/deallocation. */
 	malloc_mutex_t		large_mtx;
 
@@ -185,14 +185,14 @@ struct arena_s {
 	malloc_mutex_t		extent_grow_mtx;
 
 	/*
-	 * Available extent structures that were allocated via
-	 * base_alloc_extent().
+	 * Available edata structures that were allocated via
+	 * base_alloc_edata().
 	 *
-	 * Synchronization: extent_avail_mtx.
+	 * Synchronization: edata_avail_mtx.
 	 */
-	extent_tree_t		extent_avail;
-	atomic_zu_t		extent_avail_cnt;
-	malloc_mutex_t		extent_avail_mtx;
+	edata_tree_t		edata_avail;
+	atomic_zu_t		edata_avail_cnt;
+	malloc_mutex_t		edata_avail_mtx;
 
 	/*
 	 * bins is used to store heaps of free regions.
diff --git a/include/jemalloc/internal/base_externs.h b/include/jemalloc/internal/base_externs.h
index 35734c3c..2f241317 100644
--- a/include/jemalloc/internal/base_externs.h
+++ b/include/jemalloc/internal/base_externs.h
@@ -11,7 +11,7 @@ ehooks_t *base_ehooks_get(base_t *base);
 extent_hooks_t *base_extent_hooks_set(base_t *base,
     extent_hooks_t *extent_hooks);
 void *base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment);
-extent_t *base_alloc_extent(tsdn_t *tsdn, base_t *base);
+edata_t *base_alloc_edata(tsdn_t *tsdn, base_t *base);
 void base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated,
     size_t *resident, size_t *mapped, size_t *n_thp);
 void base_prefork(tsdn_t *tsdn, base_t *base);
diff --git a/include/jemalloc/internal/base_structs.h b/include/jemalloc/internal/base_structs.h
index 10978928..fb7e68a4 100644
--- a/include/jemalloc/internal/base_structs.h
+++ b/include/jemalloc/internal/base_structs.h
@@ -16,7 +16,7 @@ struct base_block_s {
 	base_block_t *next;
 
 	/* Tracks unused trailing space. */
-	extent_t extent;
+	edata_t edata;
 };
 
 struct base_s {
@@ -47,7 +47,7 @@ struct base_s {
 	base_block_t *blocks;
 
 	/* Heap of extents that track unused trailing space within blocks. */
-	extent_heap_t avail[SC_NSIZES];
+	edata_heap_t avail[SC_NSIZES];
 
 	/* Stats, only maintained if config_stats. */
 	size_t allocated;
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index 8cc7fed0..9a774e90 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -22,17 +22,17 @@ struct bin_s {
 	 * slabcur is reassigned, the previous slab must be deallocated or
 	 * inserted into slabs_{nonfull,full}.
 	 */
-	extent_t		*slabcur;
+	edata_t			*slabcur;
 
 	/*
 	 * Heap of non-full slabs.  This heap is used to assure that new
 	 * allocations come from the non-full slab that is oldest/lowest in
 	 * memory.
 	 */
-	extent_heap_t		slabs_nonfull;
+	edata_heap_t		slabs_nonfull;
 
 	/* List used to track full slabs. */
-	extent_list_t		slabs_full;
+	edata_list_t		slabs_full;
 
 	/* Bin statistics. */
 	bin_stats_t	stats;
diff --git a/include/jemalloc/internal/bin_types.h b/include/jemalloc/internal/bin_types.h
index 3533606b..945e8326 100644
--- a/include/jemalloc/internal/bin_types.h
+++ b/include/jemalloc/internal/bin_types.h
@@ -3,7 +3,7 @@
 
 #include "jemalloc/internal/sc.h"
 
-#define BIN_SHARDS_MAX (1 << EXTENT_BITS_BINSHARD_WIDTH)
+#define BIN_SHARDS_MAX (1 << EDATA_BITS_BINSHARD_WIDTH)
 #define N_BIN_SHARDS_DEFAULT 1
 
 /* Used in TSD static initializer only. Real init in arena_bind(). */
diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index 2fd6e906..990c3256 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -1,5 +1,5 @@
-#ifndef JEMALLOC_INTERNAL_EXTENT_H
-#define JEMALLOC_INTERNAL_EXTENT_H
+#ifndef JEMALLOC_INTERNAL_EDATA_H
+#define JEMALLOC_INTERNAL_EDATA_H
 
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/bin_info.h"
@@ -26,11 +26,11 @@ enum extent_head_state_e {
 typedef enum extent_head_state_e extent_head_state_t;
 
 /* Extent (span of pages).  Use accessor functions for e_* fields. */
-typedef struct extent_s extent_t;
-typedef ql_head(extent_t) extent_list_t;
-typedef ph(extent_t) extent_tree_t;
-typedef ph(extent_t) extent_heap_t;
-struct extent_s {
+typedef struct edata_s edata_t;
+typedef ql_head(edata_t) edata_list_t;
+typedef ph(edata_t) edata_tree_t;
+typedef ph(edata_t) edata_heap_t;
+struct edata_s {
 	/*
 	 * Bitfield containing several fields:
 	 *
@@ -105,48 +105,48 @@ struct extent_s {
 	uint64_t		e_bits;
 #define MASK(CURRENT_FIELD_WIDTH, CURRENT_FIELD_SHIFT) ((((((uint64_t)0x1U) << (CURRENT_FIELD_WIDTH)) - 1)) << (CURRENT_FIELD_SHIFT))
 
-#define EXTENT_BITS_ARENA_WIDTH  MALLOCX_ARENA_BITS
-#define EXTENT_BITS_ARENA_SHIFT  0
-#define EXTENT_BITS_ARENA_MASK  MASK(EXTENT_BITS_ARENA_WIDTH, EXTENT_BITS_ARENA_SHIFT)
+#define EDATA_BITS_ARENA_WIDTH  MALLOCX_ARENA_BITS
+#define EDATA_BITS_ARENA_SHIFT  0
+#define EDATA_BITS_ARENA_MASK  MASK(EDATA_BITS_ARENA_WIDTH, EDATA_BITS_ARENA_SHIFT)
 
-#define EXTENT_BITS_SLAB_WIDTH  1
-#define EXTENT_BITS_SLAB_SHIFT  (EXTENT_BITS_ARENA_WIDTH + EXTENT_BITS_ARENA_SHIFT)
-#define EXTENT_BITS_SLAB_MASK  MASK(EXTENT_BITS_SLAB_WIDTH, EXTENT_BITS_SLAB_SHIFT)
+#define EDATA_BITS_SLAB_WIDTH  1
+#define EDATA_BITS_SLAB_SHIFT  (EDATA_BITS_ARENA_WIDTH + EDATA_BITS_ARENA_SHIFT)
+#define EDATA_BITS_SLAB_MASK  MASK(EDATA_BITS_SLAB_WIDTH, EDATA_BITS_SLAB_SHIFT)
 
-#define EXTENT_BITS_COMMITTED_WIDTH  1
-#define EXTENT_BITS_COMMITTED_SHIFT  (EXTENT_BITS_SLAB_WIDTH + EXTENT_BITS_SLAB_SHIFT)
-#define EXTENT_BITS_COMMITTED_MASK  MASK(EXTENT_BITS_COMMITTED_WIDTH, EXTENT_BITS_COMMITTED_SHIFT)
+#define EDATA_BITS_COMMITTED_WIDTH  1
+#define EDATA_BITS_COMMITTED_SHIFT  (EDATA_BITS_SLAB_WIDTH + EDATA_BITS_SLAB_SHIFT)
+#define EDATA_BITS_COMMITTED_MASK  MASK(EDATA_BITS_COMMITTED_WIDTH, EDATA_BITS_COMMITTED_SHIFT)
 
-#define EXTENT_BITS_DUMPABLE_WIDTH  1
-#define EXTENT_BITS_DUMPABLE_SHIFT  (EXTENT_BITS_COMMITTED_WIDTH + EXTENT_BITS_COMMITTED_SHIFT)
-#define EXTENT_BITS_DUMPABLE_MASK  MASK(EXTENT_BITS_DUMPABLE_WIDTH, EXTENT_BITS_DUMPABLE_SHIFT)
+#define EDATA_BITS_DUMPABLE_WIDTH  1
+#define EDATA_BITS_DUMPABLE_SHIFT  (EDATA_BITS_COMMITTED_WIDTH + EDATA_BITS_COMMITTED_SHIFT)
+#define EDATA_BITS_DUMPABLE_MASK  MASK(EDATA_BITS_DUMPABLE_WIDTH, EDATA_BITS_DUMPABLE_SHIFT)
 
-#define EXTENT_BITS_ZEROED_WIDTH  1
-#define EXTENT_BITS_ZEROED_SHIFT  (EXTENT_BITS_DUMPABLE_WIDTH + EXTENT_BITS_DUMPABLE_SHIFT)
-#define EXTENT_BITS_ZEROED_MASK  MASK(EXTENT_BITS_ZEROED_WIDTH, EXTENT_BITS_ZEROED_SHIFT)
+#define EDATA_BITS_ZEROED_WIDTH  1
+#define EDATA_BITS_ZEROED_SHIFT  (EDATA_BITS_DUMPABLE_WIDTH + EDATA_BITS_DUMPABLE_SHIFT)
+#define EDATA_BITS_ZEROED_MASK  MASK(EDATA_BITS_ZEROED_WIDTH, EDATA_BITS_ZEROED_SHIFT)
 
-#define EXTENT_BITS_STATE_WIDTH  2
-#define EXTENT_BITS_STATE_SHIFT  (EXTENT_BITS_ZEROED_WIDTH + EXTENT_BITS_ZEROED_SHIFT)
-#define EXTENT_BITS_STATE_MASK  MASK(EXTENT_BITS_STATE_WIDTH, EXTENT_BITS_STATE_SHIFT)
+#define EDATA_BITS_STATE_WIDTH  2
+#define EDATA_BITS_STATE_SHIFT  (EDATA_BITS_ZEROED_WIDTH + EDATA_BITS_ZEROED_SHIFT)
+#define EDATA_BITS_STATE_MASK  MASK(EDATA_BITS_STATE_WIDTH, EDATA_BITS_STATE_SHIFT)
 
-#define EXTENT_BITS_SZIND_WIDTH  LG_CEIL(SC_NSIZES)
-#define EXTENT_BITS_SZIND_SHIFT  (EXTENT_BITS_STATE_WIDTH + EXTENT_BITS_STATE_SHIFT)
-#define EXTENT_BITS_SZIND_MASK  MASK(EXTENT_BITS_SZIND_WIDTH, EXTENT_BITS_SZIND_SHIFT)
+#define EDATA_BITS_SZIND_WIDTH  LG_CEIL(SC_NSIZES)
+#define EDATA_BITS_SZIND_SHIFT  (EDATA_BITS_STATE_WIDTH + EDATA_BITS_STATE_SHIFT)
+#define EDATA_BITS_SZIND_MASK  MASK(EDATA_BITS_SZIND_WIDTH, EDATA_BITS_SZIND_SHIFT)
 
-#define EXTENT_BITS_NFREE_WIDTH  (SC_LG_SLAB_MAXREGS + 1)
-#define EXTENT_BITS_NFREE_SHIFT  (EXTENT_BITS_SZIND_WIDTH + EXTENT_BITS_SZIND_SHIFT)
-#define EXTENT_BITS_NFREE_MASK  MASK(EXTENT_BITS_NFREE_WIDTH, EXTENT_BITS_NFREE_SHIFT)
+#define EDATA_BITS_NFREE_WIDTH  (SC_LG_SLAB_MAXREGS + 1)
+#define EDATA_BITS_NFREE_SHIFT  (EDATA_BITS_SZIND_WIDTH + EDATA_BITS_SZIND_SHIFT)
+#define EDATA_BITS_NFREE_MASK  MASK(EDATA_BITS_NFREE_WIDTH, EDATA_BITS_NFREE_SHIFT)
 
-#define EXTENT_BITS_BINSHARD_WIDTH  6
-#define EXTENT_BITS_BINSHARD_SHIFT  (EXTENT_BITS_NFREE_WIDTH + EXTENT_BITS_NFREE_SHIFT)
-#define EXTENT_BITS_BINSHARD_MASK  MASK(EXTENT_BITS_BINSHARD_WIDTH, EXTENT_BITS_BINSHARD_SHIFT)
+#define EDATA_BITS_BINSHARD_WIDTH  6
+#define EDATA_BITS_BINSHARD_SHIFT  (EDATA_BITS_NFREE_WIDTH + EDATA_BITS_NFREE_SHIFT)
+#define EDATA_BITS_BINSHARD_MASK  MASK(EDATA_BITS_BINSHARD_WIDTH, EDATA_BITS_BINSHARD_SHIFT)
 
-#define EXTENT_BITS_IS_HEAD_WIDTH 1
-#define EXTENT_BITS_IS_HEAD_SHIFT  (EXTENT_BITS_BINSHARD_WIDTH + EXTENT_BITS_BINSHARD_SHIFT)
-#define EXTENT_BITS_IS_HEAD_MASK  MASK(EXTENT_BITS_IS_HEAD_WIDTH, EXTENT_BITS_IS_HEAD_SHIFT)
+#define EDATA_BITS_IS_HEAD_WIDTH 1
+#define EDATA_BITS_IS_HEAD_SHIFT  (EDATA_BITS_BINSHARD_WIDTH + EDATA_BITS_BINSHARD_SHIFT)
+#define EDATA_BITS_IS_HEAD_MASK  MASK(EDATA_BITS_IS_HEAD_WIDTH, EDATA_BITS_IS_HEAD_SHIFT)
 
-#define EXTENT_BITS_SN_SHIFT   (EXTENT_BITS_IS_HEAD_WIDTH + EXTENT_BITS_IS_HEAD_SHIFT)
-#define EXTENT_BITS_SN_MASK  (UINT64_MAX << EXTENT_BITS_SN_SHIFT)
+#define EDATA_BITS_SN_SHIFT   (EDATA_BITS_IS_HEAD_WIDTH + EDATA_BITS_IS_HEAD_SHIFT)
+#define EDATA_BITS_SN_MASK  (UINT64_MAX << EDATA_BITS_SN_SHIFT)
 
 	/* Pointer to the extent that this structure is responsible for. */
 	void			*e_addr;
@@ -160,8 +160,8 @@ struct extent_s {
 		 * ssssssss [...] ssssssss ssssnnnn nnnnnnnn
 		 */
 		size_t			e_size_esn;
-	#define EXTENT_SIZE_MASK	((size_t)~(PAGE-1))
-	#define EXTENT_ESN_MASK		((size_t)PAGE-1)
+	#define EDATA_SIZE_MASK	((size_t)~(PAGE-1))
+	#define EDATA_ESN_MASK		((size_t)PAGE-1)
 		/* Base extent size, which may not be a multiple of PAGE. */
 		size_t			e_bsize;
 	};
@@ -173,13 +173,13 @@ struct extent_s {
 	 * - stashed dirty extents
 	 * - arena's large allocations
 	 */
-	ql_elm(extent_t)	ql_link;
+	ql_elm(edata_t) ql_link;
 
 	/*
 	 * Linkage for per size class sn/address-ordered heaps, and
 	 * for extent_avail
 	 */
-	phn(extent_t)		ph_link;
+	phn(edata_t)		ph_link;
 
 	union {
 		/* Small region slab metadata. */
@@ -196,398 +196,397 @@ struct extent_s {
 };
 
 static inline unsigned
-extent_arena_ind_get(const extent_t *extent) {
-	unsigned arena_ind = (unsigned)((extent->e_bits &
-	    EXTENT_BITS_ARENA_MASK) >> EXTENT_BITS_ARENA_SHIFT);
+edata_arena_ind_get(const edata_t *edata) {
+	unsigned arena_ind = (unsigned)((edata->e_bits &
+	    EDATA_BITS_ARENA_MASK) >> EDATA_BITS_ARENA_SHIFT);
 	assert(arena_ind < MALLOCX_ARENA_LIMIT);
 
 	return arena_ind;
 }
 
 static inline szind_t
-extent_szind_get_maybe_invalid(const extent_t *extent) {
-	szind_t szind = (szind_t)((extent->e_bits & EXTENT_BITS_SZIND_MASK) >>
-	    EXTENT_BITS_SZIND_SHIFT);
+edata_szind_get_maybe_invalid(const edata_t *edata) {
+	szind_t szind = (szind_t)((edata->e_bits & EDATA_BITS_SZIND_MASK) >>
+	    EDATA_BITS_SZIND_SHIFT);
 	assert(szind <= SC_NSIZES);
 	return szind;
 }
 
 static inline szind_t
-extent_szind_get(const extent_t *extent) {
-	szind_t szind = extent_szind_get_maybe_invalid(extent);
+edata_szind_get(const edata_t *edata) {
+	szind_t szind = edata_szind_get_maybe_invalid(edata);
 	assert(szind < SC_NSIZES); /* Never call when "invalid". */
 	return szind;
 }
 
 static inline size_t
-extent_usize_get(const extent_t *extent) {
-	return sz_index2size(extent_szind_get(extent));
+edata_usize_get(const edata_t *edata) {
+	return sz_index2size(edata_szind_get(edata));
 }
 
 static inline unsigned
-extent_binshard_get(const extent_t *extent) {
-	unsigned binshard = (unsigned)((extent->e_bits &
-	    EXTENT_BITS_BINSHARD_MASK) >> EXTENT_BITS_BINSHARD_SHIFT);
-	assert(binshard < bin_infos[extent_szind_get(extent)].n_shards);
+edata_binshard_get(const edata_t *edata) {
+	unsigned binshard = (unsigned)((edata->e_bits &
+	    EDATA_BITS_BINSHARD_MASK) >> EDATA_BITS_BINSHARD_SHIFT);
+	assert(binshard < bin_infos[edata_szind_get(edata)].n_shards);
 	return binshard;
 }
 
 static inline size_t
-extent_sn_get(const extent_t *extent) {
-	return (size_t)((extent->e_bits & EXTENT_BITS_SN_MASK) >>
-	    EXTENT_BITS_SN_SHIFT);
+edata_sn_get(const edata_t *edata) {
+	return (size_t)((edata->e_bits & EDATA_BITS_SN_MASK) >>
+	    EDATA_BITS_SN_SHIFT);
 }
 
 static inline extent_state_t
-extent_state_get(const extent_t *extent) {
-	return (extent_state_t)((extent->e_bits & EXTENT_BITS_STATE_MASK) >>
-	    EXTENT_BITS_STATE_SHIFT);
+edata_state_get(const edata_t *edata) {
+	return (extent_state_t)((edata->e_bits & EDATA_BITS_STATE_MASK) >>
+	    EDATA_BITS_STATE_SHIFT);
 }
 
 static inline bool
-extent_zeroed_get(const extent_t *extent) {
-	return (bool)((extent->e_bits & EXTENT_BITS_ZEROED_MASK) >>
-	    EXTENT_BITS_ZEROED_SHIFT);
+edata_zeroed_get(const edata_t *edata) {
+	return (bool)((edata->e_bits & EDATA_BITS_ZEROED_MASK) >>
+	    EDATA_BITS_ZEROED_SHIFT);
 }
 
 static inline bool
-extent_committed_get(const extent_t *extent) {
-	return (bool)((extent->e_bits & EXTENT_BITS_COMMITTED_MASK) >>
-	    EXTENT_BITS_COMMITTED_SHIFT);
+edata_committed_get(const edata_t *edata) {
+	return (bool)((edata->e_bits & EDATA_BITS_COMMITTED_MASK) >>
+	    EDATA_BITS_COMMITTED_SHIFT);
 }
 
 static inline bool
-extent_dumpable_get(const extent_t *extent) {
-	return (bool)((extent->e_bits & EXTENT_BITS_DUMPABLE_MASK) >>
-	    EXTENT_BITS_DUMPABLE_SHIFT);
+edata_dumpable_get(const edata_t *edata) {
+	return (bool)((edata->e_bits & EDATA_BITS_DUMPABLE_MASK) >>
+	    EDATA_BITS_DUMPABLE_SHIFT);
 }
 
 static inline bool
-extent_slab_get(const extent_t *extent) {
-	return (bool)((extent->e_bits & EXTENT_BITS_SLAB_MASK) >>
-	    EXTENT_BITS_SLAB_SHIFT);
+edata_slab_get(const edata_t *edata) {
+	return (bool)((edata->e_bits & EDATA_BITS_SLAB_MASK) >>
+	    EDATA_BITS_SLAB_SHIFT);
 }
 
 static inline unsigned
-extent_nfree_get(const extent_t *extent) {
-	assert(extent_slab_get(extent));
-	return (unsigned)((extent->e_bits & EXTENT_BITS_NFREE_MASK) >>
-	    EXTENT_BITS_NFREE_SHIFT);
+edata_nfree_get(const edata_t *edata) {
+	assert(edata_slab_get(edata));
+	return (unsigned)((edata->e_bits & EDATA_BITS_NFREE_MASK) >>
+	    EDATA_BITS_NFREE_SHIFT);
 }
 
 static inline void *
-extent_base_get(const extent_t *extent) {
-	assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) ||
-	    !extent_slab_get(extent));
-	return PAGE_ADDR2BASE(extent->e_addr);
+edata_base_get(const edata_t *edata) {
+	assert(edata->e_addr == PAGE_ADDR2BASE(edata->e_addr) ||
+	    !edata_slab_get(edata));
+	return PAGE_ADDR2BASE(edata->e_addr);
 }
 
 static inline void *
-extent_addr_get(const extent_t *extent) {
-	assert(extent->e_addr == PAGE_ADDR2BASE(extent->e_addr) ||
-	    !extent_slab_get(extent));
-	return extent->e_addr;
+edata_addr_get(const edata_t *edata) {
+	assert(edata->e_addr == PAGE_ADDR2BASE(edata->e_addr) ||
+	    !edata_slab_get(edata));
+	return edata->e_addr;
 }
 
 static inline size_t
-extent_size_get(const extent_t *extent) {
-	return (extent->e_size_esn & EXTENT_SIZE_MASK);
+edata_size_get(const edata_t *edata) {
+	return (edata->e_size_esn & EDATA_SIZE_MASK);
 }
 
 static inline size_t
-extent_esn_get(const extent_t *extent) {
-	return (extent->e_size_esn & EXTENT_ESN_MASK);
+edata_esn_get(const edata_t *edata) {
+	return (edata->e_size_esn & EDATA_ESN_MASK);
 }
 
 static inline size_t
-extent_bsize_get(const extent_t *extent) {
-	return extent->e_bsize;
+edata_bsize_get(const edata_t *edata) {
+	return edata->e_bsize;
 }
 
 static inline void *
-extent_before_get(const extent_t *extent) {
-	return (void *)((uintptr_t)extent_base_get(extent) - PAGE);
+edata_before_get(const edata_t *edata) {
+	return (void *)((uintptr_t)edata_base_get(edata) - PAGE);
 }
 
 static inline void *
-extent_last_get(const extent_t *extent) {
-	return (void *)((uintptr_t)extent_base_get(extent) +
-	    extent_size_get(extent) - PAGE);
+edata_last_get(const edata_t *edata) {
+	return (void *)((uintptr_t)edata_base_get(edata) +
+	    edata_size_get(edata) - PAGE);
 }
 
 static inline void *
-extent_past_get(const extent_t *extent) {
-	return (void *)((uintptr_t)extent_base_get(extent) +
-	    extent_size_get(extent));
+edata_past_get(const edata_t *edata) {
+	return (void *)((uintptr_t)edata_base_get(edata) +
+	    edata_size_get(edata));
 }
 
 static inline slab_data_t *
-extent_slab_data_get(extent_t *extent) {
-	assert(extent_slab_get(extent));
-	return &extent->e_slab_data;
+edata_slab_data_get(edata_t *edata) {
+	assert(edata_slab_get(edata));
+	return &edata->e_slab_data;
 }
 
 static inline const slab_data_t *
-extent_slab_data_get_const(const extent_t *extent) {
-	assert(extent_slab_get(extent));
-	return &extent->e_slab_data;
+edata_slab_data_get_const(const edata_t *edata) {
+	assert(edata_slab_get(edata));
+	return &edata->e_slab_data;
 }
 
 static inline void
-extent_prof_info_get(const extent_t *extent, prof_info_t *prof_info) {
+edata_prof_info_get(const edata_t *edata, prof_info_t *prof_info) {
 	assert(prof_info != NULL);
 	prof_info->alloc_tctx = (prof_tctx_t *)atomic_load_p(
-	    &extent->e_prof_tctx, ATOMIC_ACQUIRE);
-	prof_info->alloc_time = extent->e_alloc_time;
+	    &edata->e_prof_tctx, ATOMIC_ACQUIRE);
+	prof_info->alloc_time = edata->e_alloc_time;
 }
 
 static inline void
-extent_arena_ind_set(extent_t *extent, unsigned arena_ind) {
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_ARENA_MASK) |
-	    ((uint64_t)arena_ind << EXTENT_BITS_ARENA_SHIFT);
+edata_arena_ind_set(edata_t *edata, unsigned arena_ind) {
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_ARENA_MASK) |
+	    ((uint64_t)arena_ind << EDATA_BITS_ARENA_SHIFT);
 }
 
 static inline void
-extent_binshard_set(extent_t *extent, unsigned binshard) {
+edata_binshard_set(edata_t *edata, unsigned binshard) {
 	/* The assertion assumes szind is set already. */
-	assert(binshard < bin_infos[extent_szind_get(extent)].n_shards);
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_BINSHARD_MASK) |
-	    ((uint64_t)binshard << EXTENT_BITS_BINSHARD_SHIFT);
+	assert(binshard < bin_infos[edata_szind_get(edata)].n_shards);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_BINSHARD_MASK) |
+	    ((uint64_t)binshard << EDATA_BITS_BINSHARD_SHIFT);
 }
 
 static inline void
-extent_addr_set(extent_t *extent, void *addr) {
-	extent->e_addr = addr;
+edata_addr_set(edata_t *edata, void *addr) {
+	edata->e_addr = addr;
 }
 
 static inline void
-extent_size_set(extent_t *extent, size_t size) {
-	assert((size & ~EXTENT_SIZE_MASK) == 0);
-	extent->e_size_esn = size | (extent->e_size_esn & ~EXTENT_SIZE_MASK);
+edata_size_set(edata_t *edata, size_t size) {
+	assert((size & ~EDATA_SIZE_MASK) == 0);
+	edata->e_size_esn = size | (edata->e_size_esn & ~EDATA_SIZE_MASK);
 }
 
 static inline void
-extent_esn_set(extent_t *extent, size_t esn) {
-	extent->e_size_esn = (extent->e_size_esn & ~EXTENT_ESN_MASK) | (esn &
-	    EXTENT_ESN_MASK);
+edata_esn_set(edata_t *edata, size_t esn) {
+	edata->e_size_esn = (edata->e_size_esn & ~EDATA_ESN_MASK) | (esn &
+	    EDATA_ESN_MASK);
 }
 
 static inline void
-extent_bsize_set(extent_t *extent, size_t bsize) {
-	extent->e_bsize = bsize;
+edata_bsize_set(edata_t *edata, size_t bsize) {
+	edata->e_bsize = bsize;
 }
 
 static inline void
-extent_szind_set(extent_t *extent, szind_t szind) {
+edata_szind_set(edata_t *edata, szind_t szind) {
 	assert(szind <= SC_NSIZES); /* SC_NSIZES means "invalid". */
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SZIND_MASK) |
-	    ((uint64_t)szind << EXTENT_BITS_SZIND_SHIFT);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_SZIND_MASK) |
+	    ((uint64_t)szind << EDATA_BITS_SZIND_SHIFT);
 }
 
 static inline void
-extent_nfree_set(extent_t *extent, unsigned nfree) {
-	assert(extent_slab_get(extent));
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_NFREE_MASK) |
-	    ((uint64_t)nfree << EXTENT_BITS_NFREE_SHIFT);
+edata_nfree_set(edata_t *edata, unsigned nfree) {
+	assert(edata_slab_get(edata));
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_NFREE_MASK) |
+	    ((uint64_t)nfree << EDATA_BITS_NFREE_SHIFT);
 }
 
 static inline void
-extent_nfree_binshard_set(extent_t *extent, unsigned nfree, unsigned binshard) {
+edata_nfree_binshard_set(edata_t *edata, unsigned nfree, unsigned binshard) {
 	/* The assertion assumes szind is set already. */
-	assert(binshard < bin_infos[extent_szind_get(extent)].n_shards);
-	extent->e_bits = (extent->e_bits &
-	    (~EXTENT_BITS_NFREE_MASK & ~EXTENT_BITS_BINSHARD_MASK)) |
-	    ((uint64_t)binshard << EXTENT_BITS_BINSHARD_SHIFT) |
-	    ((uint64_t)nfree << EXTENT_BITS_NFREE_SHIFT);
+	assert(binshard < bin_infos[edata_szind_get(edata)].n_shards);
+	edata->e_bits = (edata->e_bits &
+	    (~EDATA_BITS_NFREE_MASK & ~EDATA_BITS_BINSHARD_MASK)) |
+	    ((uint64_t)binshard << EDATA_BITS_BINSHARD_SHIFT) |
+	    ((uint64_t)nfree << EDATA_BITS_NFREE_SHIFT);
 }
 
 static inline void
-extent_nfree_inc(extent_t *extent) {
-	assert(extent_slab_get(extent));
-	extent->e_bits += ((uint64_t)1U << EXTENT_BITS_NFREE_SHIFT);
+edata_nfree_inc(edata_t *edata) {
+	assert(edata_slab_get(edata));
+	edata->e_bits += ((uint64_t)1U << EDATA_BITS_NFREE_SHIFT);
 }
 
 static inline void
-extent_nfree_dec(extent_t *extent) {
-	assert(extent_slab_get(extent));
-	extent->e_bits -= ((uint64_t)1U << EXTENT_BITS_NFREE_SHIFT);
+edata_nfree_dec(edata_t *edata) {
+	assert(edata_slab_get(edata));
+	edata->e_bits -= ((uint64_t)1U << EDATA_BITS_NFREE_SHIFT);
 }
 
 static inline void
-extent_nfree_sub(extent_t *extent, uint64_t n) {
-	assert(extent_slab_get(extent));
-	extent->e_bits -= (n << EXTENT_BITS_NFREE_SHIFT);
+edata_nfree_sub(edata_t *edata, uint64_t n) {
+	assert(edata_slab_get(edata));
+	edata->e_bits -= (n << EDATA_BITS_NFREE_SHIFT);
 }
 
 static inline void
-extent_sn_set(extent_t *extent, size_t sn) {
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SN_MASK) |
-	    ((uint64_t)sn << EXTENT_BITS_SN_SHIFT);
+edata_sn_set(edata_t *edata, size_t sn) {
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_SN_MASK) |
+	    ((uint64_t)sn << EDATA_BITS_SN_SHIFT);
 }
 
 static inline void
-extent_state_set(extent_t *extent, extent_state_t state) {
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_STATE_MASK) |
-	    ((uint64_t)state << EXTENT_BITS_STATE_SHIFT);
+edata_state_set(edata_t *edata, extent_state_t state) {
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_STATE_MASK) |
+	    ((uint64_t)state << EDATA_BITS_STATE_SHIFT);
 }
 
 static inline void
-extent_zeroed_set(extent_t *extent, bool zeroed) {
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_ZEROED_MASK) |
-	    ((uint64_t)zeroed << EXTENT_BITS_ZEROED_SHIFT);
+edata_zeroed_set(edata_t *edata, bool zeroed) {
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_ZEROED_MASK) |
+	    ((uint64_t)zeroed << EDATA_BITS_ZEROED_SHIFT);
 }
 
 static inline void
-extent_committed_set(extent_t *extent, bool committed) {
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_COMMITTED_MASK) |
-	    ((uint64_t)committed << EXTENT_BITS_COMMITTED_SHIFT);
+edata_committed_set(edata_t *edata, bool committed) {
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_COMMITTED_MASK) |
+	    ((uint64_t)committed << EDATA_BITS_COMMITTED_SHIFT);
 }
 
 static inline void
-extent_dumpable_set(extent_t *extent, bool dumpable) {
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_DUMPABLE_MASK) |
-	    ((uint64_t)dumpable << EXTENT_BITS_DUMPABLE_SHIFT);
+edata_dumpable_set(edata_t *edata, bool dumpable) {
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_DUMPABLE_MASK) |
+	    ((uint64_t)dumpable << EDATA_BITS_DUMPABLE_SHIFT);
 }
 
 static inline void
-extent_slab_set(extent_t *extent, bool slab) {
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SLAB_MASK) |
-	    ((uint64_t)slab << EXTENT_BITS_SLAB_SHIFT);
+edata_slab_set(edata_t *edata, bool slab) {
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_SLAB_MASK) |
+	    ((uint64_t)slab << EDATA_BITS_SLAB_SHIFT);
 }
 
 static inline void
-extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx) {
-	atomic_store_p(&extent->e_prof_tctx, tctx, ATOMIC_RELEASE);
+edata_prof_tctx_set(edata_t *edata, prof_tctx_t *tctx) {
+	atomic_store_p(&edata->e_prof_tctx, tctx, ATOMIC_RELEASE);
 }
 
 static inline void
-extent_prof_alloc_time_set(extent_t *extent, nstime_t *t) {
-	nstime_copy(&extent->e_alloc_time, t);
+edata_prof_alloc_time_set(edata_t *edata, nstime_t *t) {
+	nstime_copy(&edata->e_alloc_time, t);
 }
 
 static inline bool
-extent_is_head_get(extent_t *extent) {
+edata_is_head_get(edata_t *edata) {
 	if (maps_coalesce) {
 		not_reached();
 	}
 
-	return (bool)((extent->e_bits & EXTENT_BITS_IS_HEAD_MASK) >>
-	    EXTENT_BITS_IS_HEAD_SHIFT);
+	return (bool)((edata->e_bits & EDATA_BITS_IS_HEAD_MASK) >>
+	    EDATA_BITS_IS_HEAD_SHIFT);
 }
 
 static inline void
-extent_is_head_set(extent_t *extent, bool is_head) {
+edata_is_head_set(edata_t *edata, bool is_head) {
 	if (maps_coalesce) {
 		not_reached();
 	}
 
-	extent->e_bits = (extent->e_bits & ~EXTENT_BITS_IS_HEAD_MASK) |
-	    ((uint64_t)is_head << EXTENT_BITS_IS_HEAD_SHIFT);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_IS_HEAD_MASK) |
+	    ((uint64_t)is_head << EDATA_BITS_IS_HEAD_SHIFT);
 }
 
 static inline void
-extent_init(extent_t *extent, unsigned arena_ind, void *addr, size_t size,
+edata_init(edata_t *edata, unsigned arena_ind, void *addr, size_t size,
     bool slab, szind_t szind, size_t sn, extent_state_t state, bool zeroed,
     bool committed, bool dumpable, extent_head_state_t is_head) {
 	assert(addr == PAGE_ADDR2BASE(addr) || !slab);
 
-	extent_arena_ind_set(extent, arena_ind);
-	extent_addr_set(extent, addr);
-	extent_size_set(extent, size);
-	extent_slab_set(extent, slab);
-	extent_szind_set(extent, szind);
-	extent_sn_set(extent, sn);
-	extent_state_set(extent, state);
-	extent_zeroed_set(extent, zeroed);
-	extent_committed_set(extent, committed);
-	extent_dumpable_set(extent, dumpable);
-	ql_elm_new(extent, ql_link);
+	edata_arena_ind_set(edata, arena_ind);
+	edata_addr_set(edata, addr);
+	edata_size_set(edata, size);
+	edata_slab_set(edata, slab);
+	edata_szind_set(edata, szind);
+	edata_sn_set(edata, sn);
+	edata_state_set(edata, state);
+	edata_zeroed_set(edata, zeroed);
+	edata_committed_set(edata, committed);
+	edata_dumpable_set(edata, dumpable);
+	ql_elm_new(edata, ql_link);
 	if (!maps_coalesce) {
-		extent_is_head_set(extent, (is_head == EXTENT_IS_HEAD) ? true :
-		    false);
+		edata_is_head_set(edata, is_head == EXTENT_IS_HEAD);
 	}
 	if (config_prof) {
-		extent_prof_tctx_set(extent, NULL);
+		edata_prof_tctx_set(edata, NULL);
 	}
 }
 
 static inline void
-extent_binit(extent_t *extent, void *addr, size_t bsize, size_t sn) {
-	extent_arena_ind_set(extent, (1U << MALLOCX_ARENA_BITS) - 1);
-	extent_addr_set(extent, addr);
-	extent_bsize_set(extent, bsize);
-	extent_slab_set(extent, false);
-	extent_szind_set(extent, SC_NSIZES);
-	extent_sn_set(extent, sn);
-	extent_state_set(extent, extent_state_active);
-	extent_zeroed_set(extent, true);
-	extent_committed_set(extent, true);
-	extent_dumpable_set(extent, true);
+edata_binit(edata_t *edata, void *addr, size_t bsize, size_t sn) {
+	edata_arena_ind_set(edata, (1U << MALLOCX_ARENA_BITS) - 1);
+	edata_addr_set(edata, addr);
+	edata_bsize_set(edata, bsize);
+	edata_slab_set(edata, false);
+	edata_szind_set(edata, SC_NSIZES);
+	edata_sn_set(edata, sn);
+	edata_state_set(edata, extent_state_active);
+	edata_zeroed_set(edata, true);
+	edata_committed_set(edata, true);
+	edata_dumpable_set(edata, true);
 }
 
 static inline void
-extent_list_init(extent_list_t *list) {
+edata_list_init(edata_list_t *list) {
 	ql_new(list);
 }
 
-static inline extent_t *
-extent_list_first(const extent_list_t *list) {
+static inline edata_t *
+edata_list_first(const edata_list_t *list) {
 	return ql_first(list);
 }
 
-static inline extent_t *
-extent_list_last(const extent_list_t *list) {
+static inline edata_t *
+edata_list_last(const edata_list_t *list) {
 	return ql_last(list, ql_link);
 }
 
 static inline void
-extent_list_append(extent_list_t *list, extent_t *extent) {
-	ql_tail_insert(list, extent, ql_link);
+edata_list_append(edata_list_t *list, edata_t *edata) {
+	ql_tail_insert(list, edata, ql_link);
 }
 
 static inline void
-extent_list_prepend(extent_list_t *list, extent_t *extent) {
-	ql_head_insert(list, extent, ql_link);
+edata_list_prepend(edata_list_t *list, edata_t *edata) {
+	ql_head_insert(list, edata, ql_link);
 }
 
 static inline void
-extent_list_replace(extent_list_t *list, extent_t *to_remove,
-    extent_t *to_insert) {
+edata_list_replace(edata_list_t *list, edata_t *to_remove,
+    edata_t *to_insert) {
 	ql_after_insert(to_remove, to_insert, ql_link);
 	ql_remove(list, to_remove, ql_link);
 }
 
 static inline void
-extent_list_remove(extent_list_t *list, extent_t *extent) {
-	ql_remove(list, extent, ql_link);
+edata_list_remove(edata_list_t *list, edata_t *edata) {
+	ql_remove(list, edata, ql_link);
 }
 
 static inline int
-extent_sn_comp(const extent_t *a, const extent_t *b) {
-	size_t a_sn = extent_sn_get(a);
-	size_t b_sn = extent_sn_get(b);
+edata_sn_comp(const edata_t *a, const edata_t *b) {
+	size_t a_sn = edata_sn_get(a);
+	size_t b_sn = edata_sn_get(b);
 
 	return (a_sn > b_sn) - (a_sn < b_sn);
 }
 
 static inline int
-extent_esn_comp(const extent_t *a, const extent_t *b) {
-	size_t a_esn = extent_esn_get(a);
-	size_t b_esn = extent_esn_get(b);
+edata_esn_comp(const edata_t *a, const edata_t *b) {
+	size_t a_esn = edata_esn_get(a);
+	size_t b_esn = edata_esn_get(b);
 
 	return (a_esn > b_esn) - (a_esn < b_esn);
 }
 
 static inline int
-extent_ad_comp(const extent_t *a, const extent_t *b) {
-	uintptr_t a_addr = (uintptr_t)extent_addr_get(a);
-	uintptr_t b_addr = (uintptr_t)extent_addr_get(b);
+edata_ad_comp(const edata_t *a, const edata_t *b) {
+	uintptr_t a_addr = (uintptr_t)edata_addr_get(a);
+	uintptr_t b_addr = (uintptr_t)edata_addr_get(b);
 
 	return (a_addr > b_addr) - (a_addr < b_addr);
 }
 
 static inline int
-extent_ead_comp(const extent_t *a, const extent_t *b) {
+edata_ead_comp(const edata_t *a, const edata_t *b) {
 	uintptr_t a_eaddr = (uintptr_t)a;
 	uintptr_t b_eaddr = (uintptr_t)b;
 
@@ -595,32 +594,32 @@ extent_ead_comp(const extent_t *a, const extent_t *b) {
 }
 
 static inline int
-extent_snad_comp(const extent_t *a, const extent_t *b) {
+edata_snad_comp(const edata_t *a, const edata_t *b) {
 	int ret;
 
-	ret = extent_sn_comp(a, b);
+	ret = edata_sn_comp(a, b);
 	if (ret != 0) {
 		return ret;
 	}
 
-	ret = extent_ad_comp(a, b);
+	ret = edata_ad_comp(a, b);
 	return ret;
 }
 
 static inline int
-extent_esnead_comp(const extent_t *a, const extent_t *b) {
+edata_esnead_comp(const edata_t *a, const edata_t *b) {
 	int ret;
 
-	ret = extent_esn_comp(a, b);
+	ret = edata_esn_comp(a, b);
 	if (ret != 0) {
 		return ret;
 	}
 
-	ret = extent_ead_comp(a, b);
+	ret = edata_ead_comp(a, b);
 	return ret;
 }
 
-ph_proto(, extent_avail_, extent_tree_t, extent_t)
-ph_proto(, extent_heap_, extent_heap_t, extent_t)
+ph_proto(, edata_avail_, edata_tree_t, edata_t)
+ph_proto(, edata_heap_, edata_heap_t, edata_t)
 
-#endif /* JEMALLOC_INTERNAL_EXTENT_H */
+#endif /* JEMALLOC_INTERNAL_EDATA_H */
diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index 833f19c5..e76257af 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -19,7 +19,7 @@ struct eset_s {
 	 *
 	 * Synchronization: mtx.
 	 */
-	extent_heap_t heaps[SC_NPSIZES + 1];
+	edata_heap_t heaps[SC_NPSIZES + 1];
 	atomic_zu_t nextents[SC_NPSIZES + 1];
 	atomic_zu_t nbytes[SC_NPSIZES + 1];
 
@@ -35,7 +35,7 @@ struct eset_s {
 	 *
 	 * Synchronization: mtx.
 	 */
-	extent_list_t lru;
+	edata_list_t lru;
 
 	/*
 	 * Page sum for all extents in heaps.
@@ -67,13 +67,13 @@ size_t eset_nextents_get(eset_t *eset, pszind_t ind);
 /* Get the sum total bytes of the extents in the given page size index. */
 size_t eset_nbytes_get(eset_t *eset, pszind_t ind);
 
-void eset_insert_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent);
-void eset_remove_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent);
+void eset_insert_locked(tsdn_t *tsdn, eset_t *eset, edata_t *edata);
+void eset_remove_locked(tsdn_t *tsdn, eset_t *eset, edata_t *edata);
 /*
  * Select an extent from this eset of the given size and alignment.  Returns
  * null if no such item could be found.
  */
-extent_t *eset_fit_locked(tsdn_t *tsdn, eset_t *eset, size_t esize,
+edata_t *eset_fit_locked(tsdn_t *tsdn, eset_t *eset, size_t esize,
     size_t alignment);
 
 void eset_prefork(tsdn_t *tsdn, eset_t *eset);
diff --git a/include/jemalloc/internal/extent2.h b/include/jemalloc/internal/extent2.h
index 7a18a613..ef232677 100644
--- a/include/jemalloc/internal/extent2.h
+++ b/include/jemalloc/internal/extent2.h
@@ -26,38 +26,38 @@ extern size_t opt_lg_extent_max_active_fit;
 
 extern rtree_t extents_rtree;
 
-extent_t *extent_alloc(tsdn_t *tsdn, arena_t *arena);
-void extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
+edata_t *extent_alloc(tsdn_t *tsdn, arena_t *arena);
+void extent_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *edata);
 
-extent_t *extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+edata_t *extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     eset_t *eset, void *new_addr, size_t size, size_t pad, size_t alignment,
     bool slab, szind_t szind, bool *zero, bool *commit);
 void extents_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    eset_t *eset, extent_t *extent);
-extent_t *extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    eset_t *eset, edata_t *edata);
+edata_t *extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     eset_t *eset, size_t npages_min);
-extent_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+edata_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
     szind_t szind, bool *zero, bool *commit);
-void extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent);
+void extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, edata_t *edata);
 void extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent);
+    edata_t *edata);
 void extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent);
+    edata_t *edata);
 bool extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length);
+    edata_t *edata, size_t offset, size_t length);
 bool extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length);
+    edata_t *edata, size_t offset, size_t length);
 bool extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length);
+    edata_t *edata, size_t offset, size_t length);
 bool extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length);
-extent_t *extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t size_a, szind_t szind_a, bool slab_a,
+    edata_t *edata, size_t offset, size_t length);
+edata_t *extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a,
     size_t size_b, szind_t szind_b, bool slab_b);
 bool extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *a, extent_t *b);
-bool extent_head_no_merge(extent_t *a, extent_t *b);
+    edata_t *a, edata_t *b);
+bool extent_head_no_merge(edata_t *a, edata_t *b);
 
 bool extent_boot(void);
 
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index d4cb04c2..8367ee2b 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -76,12 +76,12 @@ arena_is_auto(arena_t *arena) {
 	return (arena_ind_get(arena) < manual_arena_base);
 }
 
-JEMALLOC_ALWAYS_INLINE extent_t *
+JEMALLOC_ALWAYS_INLINE edata_t *
 iealloc(tsdn_t *tsdn, const void *ptr) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	return rtree_extent_read(tsdn, &extents_rtree, rtree_ctx,
+	return rtree_edata_read(tsdn, &extents_rtree, rtree_ctx,
 	    (uintptr_t)ptr, true);
 }
 
diff --git a/include/jemalloc/internal/large_externs.h b/include/jemalloc/internal/large_externs.h
index 2299920d..fe5e606b 100644
--- a/include/jemalloc/internal/large_externs.h
+++ b/include/jemalloc/internal/large_externs.h
@@ -6,7 +6,7 @@
 void *large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero);
 void *large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
     bool zero);
-bool large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
+bool large_ralloc_no_move(tsdn_t *tsdn, edata_t *edata, size_t usize_min,
     size_t usize_max, bool zero);
 void *large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache,
@@ -18,12 +18,12 @@ extern large_dalloc_junk_t *JET_MUTABLE large_dalloc_junk;
 typedef void (large_dalloc_maybe_junk_t)(void *, size_t);
 extern large_dalloc_maybe_junk_t *JET_MUTABLE large_dalloc_maybe_junk;
 
-void large_dalloc_prep_junked_locked(tsdn_t *tsdn, extent_t *extent);
-void large_dalloc_finish(tsdn_t *tsdn, extent_t *extent);
-void large_dalloc(tsdn_t *tsdn, extent_t *extent);
-size_t large_salloc(tsdn_t *tsdn, const extent_t *extent);
-void large_prof_info_get(const extent_t *extent, prof_info_t *prof_info);
-void large_prof_tctx_reset(extent_t *extent);
-void large_prof_info_set(extent_t *extent, prof_tctx_t *tctx);
+void large_dalloc_prep_junked_locked(tsdn_t *tsdn, edata_t *edata);
+void large_dalloc_finish(tsdn_t *tsdn, edata_t *edata);
+void large_dalloc(tsdn_t *tsdn, edata_t *edata);
+size_t large_salloc(tsdn_t *tsdn, const edata_t *edata);
+void large_prof_info_get(const edata_t *edata, prof_info_t *prof_info);
+void large_prof_tctx_reset(edata_t *edata);
+void large_prof_info_set(edata_t *edata, prof_tctx_t *tctx);
 
 #endif /* JEMALLOC_INTERNAL_LARGE_EXTERNS_H */
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 16ccbebe..339c7e5e 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -48,18 +48,18 @@ struct rtree_leaf_elm_s {
 	/*
 	 * Single pointer-width field containing all three leaf element fields.
 	 * For example, on a 64-bit x64 system with 48 significant virtual
-	 * memory address bits, the index, extent, and slab fields are packed as
+	 * memory address bits, the index, edata, and slab fields are packed as
 	 * such:
 	 *
 	 * x: index
-	 * e: extent
+	 * e: edata
 	 * b: slab
 	 *
 	 *   00000000 xxxxxxxx eeeeeeee [...] eeeeeeee eeee000b
 	 */
 	atomic_p_t	le_bits;
 #else
-	atomic_p_t	le_extent; /* (extent_t *) */
+	atomic_p_t	le_edata; /* (edata_t *) */
 	atomic_u_t	le_szind; /* (szind_t) */
 	atomic_b_t	le_slab; /* (bool) */
 #endif
@@ -176,8 +176,8 @@ rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree,
 	    ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
 }
 
-JEMALLOC_ALWAYS_INLINE extent_t *
-rtree_leaf_elm_bits_extent_get(uintptr_t bits) {
+JEMALLOC_ALWAYS_INLINE edata_t *
+rtree_leaf_elm_bits_edata_get(uintptr_t bits) {
 #    ifdef __aarch64__
 	/*
 	 * aarch64 doesn't sign extend the highest virtual address bit to set
@@ -187,10 +187,10 @@ rtree_leaf_elm_bits_extent_get(uintptr_t bits) {
 	/* Mask off the slab bit. */
 	uintptr_t low_bit_mask = ~(uintptr_t)1;
 	uintptr_t mask = high_bit_mask & low_bit_mask;
-	return (extent_t *)(bits & mask);
+	return (edata_t *)(bits & mask);
 #    else
 	/* Restore sign-extended high bits, mask slab bit. */
-	return (extent_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB) >>
+	return (edata_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB) >>
 	    RTREE_NHIB) & ~((uintptr_t)0x1));
 #    endif
 }
@@ -207,16 +207,16 @@ rtree_leaf_elm_bits_slab_get(uintptr_t bits) {
 
 #  endif
 
-JEMALLOC_ALWAYS_INLINE extent_t *
-rtree_leaf_elm_extent_read(tsdn_t *tsdn, rtree_t *rtree,
+JEMALLOC_ALWAYS_INLINE edata_t *
+rtree_leaf_elm_edata_read(tsdn_t *tsdn, rtree_t *rtree,
     rtree_leaf_elm_t *elm, bool dependent) {
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
-	return rtree_leaf_elm_bits_extent_get(bits);
+	return rtree_leaf_elm_bits_edata_get(bits);
 #else
-	extent_t *extent = (extent_t *)atomic_load_p(&elm->le_extent, dependent
+	edata_t *edata = (edata_t *)atomic_load_p(&elm->le_edata, dependent
 	    ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
-	return extent;
+	return edata;
 #endif
 }
 
@@ -245,16 +245,16 @@ rtree_leaf_elm_slab_read(tsdn_t *tsdn, rtree_t *rtree,
 }
 
 static inline void
-rtree_leaf_elm_extent_write(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, extent_t *extent) {
+rtree_leaf_elm_edata_write(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_leaf_elm_t *elm, edata_t *edata) {
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, true);
 	uintptr_t bits = ((uintptr_t)rtree_leaf_elm_bits_szind_get(old_bits) <<
-	    LG_VADDR) | ((uintptr_t)extent & (((uintptr_t)0x1 << LG_VADDR) - 1))
+	    LG_VADDR) | ((uintptr_t)edata & (((uintptr_t)0x1 << LG_VADDR) - 1))
 	    | ((uintptr_t)rtree_leaf_elm_bits_slab_get(old_bits));
 	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
 #else
-	atomic_store_p(&elm->le_extent, extent, ATOMIC_RELEASE);
+	atomic_store_p(&elm->le_edata, edata, ATOMIC_RELEASE);
 #endif
 }
 
@@ -267,7 +267,7 @@ rtree_leaf_elm_szind_write(tsdn_t *tsdn, rtree_t *rtree,
 	uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm,
 	    true);
 	uintptr_t bits = ((uintptr_t)szind << LG_VADDR) |
-	    ((uintptr_t)rtree_leaf_elm_bits_extent_get(old_bits) &
+	    ((uintptr_t)rtree_leaf_elm_bits_edata_get(old_bits) &
 	    (((uintptr_t)0x1 << LG_VADDR) - 1)) |
 	    ((uintptr_t)rtree_leaf_elm_bits_slab_get(old_bits));
 	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
@@ -283,7 +283,7 @@ rtree_leaf_elm_slab_write(tsdn_t *tsdn, rtree_t *rtree,
 	uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm,
 	    true);
 	uintptr_t bits = ((uintptr_t)rtree_leaf_elm_bits_szind_get(old_bits) <<
-	    LG_VADDR) | ((uintptr_t)rtree_leaf_elm_bits_extent_get(old_bits) &
+	    LG_VADDR) | ((uintptr_t)rtree_leaf_elm_bits_edata_get(old_bits) &
 	    (((uintptr_t)0x1 << LG_VADDR) - 1)) | ((uintptr_t)slab);
 	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
 #else
@@ -293,20 +293,20 @@ rtree_leaf_elm_slab_write(tsdn_t *tsdn, rtree_t *rtree,
 
 static inline void
 rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, extent_t *extent, szind_t szind, bool slab) {
+    rtree_leaf_elm_t *elm, edata_t *edata, szind_t szind, bool slab) {
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t bits = ((uintptr_t)szind << LG_VADDR) |
-	    ((uintptr_t)extent & (((uintptr_t)0x1 << LG_VADDR) - 1)) |
+	    ((uintptr_t)edata & (((uintptr_t)0x1 << LG_VADDR) - 1)) |
 	    ((uintptr_t)slab);
 	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
 #else
 	rtree_leaf_elm_slab_write(tsdn, rtree, elm, slab);
 	rtree_leaf_elm_szind_write(tsdn, rtree, elm, szind);
 	/*
-	 * Write extent last, since the element is atomically considered valid
-	 * as soon as the extent field is non-NULL.
+	 * Write edata last, since the element is atomically considered valid
+	 * as soon as the edata field is non-NULL.
 	 */
-	rtree_leaf_elm_extent_write(tsdn, rtree, elm, extent);
+	rtree_leaf_elm_edata_write(tsdn, rtree, elm, edata);
 #endif
 }
 
@@ -317,7 +317,7 @@ rtree_leaf_elm_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree,
 
 	/*
 	 * The caller implicitly assures that it is the only writer to the szind
-	 * and slab fields, and that the extent field cannot currently change.
+	 * and slab fields, and that the edata field cannot currently change.
 	 */
 	rtree_leaf_elm_slab_write(tsdn, rtree, elm, slab);
 	rtree_leaf_elm_szind_write(tsdn, rtree, elm, szind);
@@ -384,9 +384,9 @@ rtree_leaf_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 
 static inline bool
 rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
-    extent_t *extent, szind_t szind, bool slab) {
-	/* Use rtree_clear() to set the extent to NULL. */
-	assert(extent != NULL);
+    edata_t *edata, szind_t szind, bool slab) {
+	/* Use rtree_clear() to set the edata to NULL. */
+	assert(edata != NULL);
 
 	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
 	    key, false, true);
@@ -394,8 +394,8 @@ rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
 		return true;
 	}
 
-	assert(rtree_leaf_elm_extent_read(tsdn, rtree, elm, false) == NULL);
-	rtree_leaf_elm_write(tsdn, rtree, elm, extent, szind, slab);
+	assert(rtree_leaf_elm_edata_read(tsdn, rtree, elm, false) == NULL);
+	rtree_leaf_elm_write(tsdn, rtree, elm, edata, szind, slab);
 
 	return false;
 }
@@ -412,15 +412,15 @@ rtree_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
 	return elm;
 }
 
-JEMALLOC_ALWAYS_INLINE extent_t *
-rtree_extent_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+JEMALLOC_ALWAYS_INLINE edata_t *
+rtree_edata_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key, bool dependent) {
 	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key,
 	    dependent);
 	if (!dependent && elm == NULL) {
 		return NULL;
 	}
-	return rtree_leaf_elm_extent_read(tsdn, rtree, elm, dependent);
+	return rtree_leaf_elm_edata_read(tsdn, rtree, elm, dependent);
 }
 
 JEMALLOC_ALWAYS_INLINE szind_t
@@ -440,14 +440,14 @@ rtree_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
  */
 
 JEMALLOC_ALWAYS_INLINE bool
-rtree_extent_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
-    uintptr_t key, bool dependent, extent_t **r_extent, szind_t *r_szind) {
+rtree_edata_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+    uintptr_t key, bool dependent, edata_t **r_edata, szind_t *r_szind) {
 	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key,
 	    dependent);
 	if (!dependent && elm == NULL) {
 		return true;
 	}
-	*r_extent = rtree_leaf_elm_extent_read(tsdn, rtree, elm, dependent);
+	*r_edata = rtree_leaf_elm_edata_read(tsdn, rtree, elm, dependent);
 	*r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent);
 	return false;
 }
@@ -520,7 +520,7 @@ static inline void
 rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key) {
 	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, true);
-	assert(rtree_leaf_elm_extent_read(tsdn, rtree, elm, false) !=
+	assert(rtree_leaf_elm_edata_read(tsdn, rtree, elm, false) !=
 	    NULL);
 	rtree_leaf_elm_write(tsdn, rtree, elm, NULL, SC_NSIZES, false);
 }
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index d76b7908..ddbcf9d2 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -43,7 +43,7 @@
 #define WITNESS_RANK_TCACHE_QL		13U
 #define WITNESS_RANK_EXTENT_GROW	14U
 #define WITNESS_RANK_EXTENTS		15U
-#define WITNESS_RANK_EXTENT_AVAIL	16U
+#define WITNESS_RANK_EDATA_AVAIL	16U
 
 #define WITNESS_RANK_EXTENT_POOL	17U
 #define WITNESS_RANK_RTREE		18U
diff --git a/src/arena.c b/src/arena.c
index 2d46b9e6..f05a1d17 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -60,9 +60,9 @@ static void arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena,
     size_t npages_decay_max, bool is_background_thread);
 static bool arena_decay_dirty(tsdn_t *tsdn, arena_t *arena,
     bool is_background_thread, bool all);
-static void arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
+static void arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, edata_t *slab,
     bin_t *bin);
-static void arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
+static void arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, edata_t *slab,
     bin_t *bin);
 
 /******************************************************************************/
@@ -102,8 +102,8 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	arena_stats_accum_zu(&astats->retained,
 	    eset_npages_get(&arena->eset_retained) << LG_PAGE);
 
-	atomic_store_zu(&astats->extent_avail,
-	    atomic_load_zu(&arena->extent_avail_cnt, ATOMIC_RELAXED),
+	atomic_store_zu(&astats->edata_avail,
+	    atomic_load_zu(&arena->edata_avail_cnt, ATOMIC_RELAXED),
 	    ATOMIC_RELAXED);
 
 	arena_stats_accum_u64(&astats->decay_dirty.npurge,
@@ -224,7 +224,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 
 	/* Gather per arena mutex profiling data. */
 	READ_ARENA_MUTEX_PROF_DATA(large_mtx, arena_prof_mutex_large);
-	READ_ARENA_MUTEX_PROF_DATA(extent_avail_mtx,
+	READ_ARENA_MUTEX_PROF_DATA(edata_avail_mtx,
 	    arena_prof_mutex_extent_avail)
 	READ_ARENA_MUTEX_PROF_DATA(eset_dirty.mtx,
 	    arena_prof_mutex_extents_dirty)
@@ -254,11 +254,11 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 
 void
 arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent) {
+    edata_t *edata) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	extents_dalloc(tsdn, arena, ehooks, &arena->eset_dirty, extent);
+	extents_dalloc(tsdn, arena, ehooks, &arena->eset_dirty, edata);
 	if (arena_dirty_decay_ms_get(arena) == 0) {
 		arena_decay_dirty(tsdn, arena, false, true);
 	} else {
@@ -267,34 +267,34 @@ arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 }
 
 static void *
-arena_slab_reg_alloc(extent_t *slab, const bin_info_t *bin_info) {
+arena_slab_reg_alloc(edata_t *slab, const bin_info_t *bin_info) {
 	void *ret;
-	slab_data_t *slab_data = extent_slab_data_get(slab);
+	slab_data_t *slab_data = edata_slab_data_get(slab);
 	size_t regind;
 
-	assert(extent_nfree_get(slab) > 0);
+	assert(edata_nfree_get(slab) > 0);
 	assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info));
 
 	regind = bitmap_sfu(slab_data->bitmap, &bin_info->bitmap_info);
-	ret = (void *)((uintptr_t)extent_addr_get(slab) +
+	ret = (void *)((uintptr_t)edata_addr_get(slab) +
 	    (uintptr_t)(bin_info->reg_size * regind));
-	extent_nfree_dec(slab);
+	edata_nfree_dec(slab);
 	return ret;
 }
 
 static void
-arena_slab_reg_alloc_batch(extent_t *slab, const bin_info_t *bin_info,
+arena_slab_reg_alloc_batch(edata_t *slab, const bin_info_t *bin_info,
 			   unsigned cnt, void** ptrs) {
-	slab_data_t *slab_data = extent_slab_data_get(slab);
+	slab_data_t *slab_data = edata_slab_data_get(slab);
 
-	assert(extent_nfree_get(slab) >= cnt);
+	assert(edata_nfree_get(slab) >= cnt);
 	assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info));
 
 #if (! defined JEMALLOC_INTERNAL_POPCOUNTL) || (defined BITMAP_USE_TREE)
 	for (unsigned i = 0; i < cnt; i++) {
 		size_t regind = bitmap_sfu(slab_data->bitmap,
 					   &bin_info->bitmap_info);
-		*(ptrs + i) = (void *)((uintptr_t)extent_addr_get(slab) +
+		*(ptrs + i) = (void *)((uintptr_t)edata_addr_get(slab) +
 		    (uintptr_t)(bin_info->reg_size * regind));
 	}
 #else
@@ -315,7 +315,7 @@ arena_slab_reg_alloc_batch(extent_t *slab, const bin_info_t *bin_info,
 		 * Load from memory locations only once, outside the
 		 * hot loop below.
 		 */
-		uintptr_t base = (uintptr_t)extent_addr_get(slab);
+		uintptr_t base = (uintptr_t)edata_addr_get(slab);
 		uintptr_t regsize = (uintptr_t)bin_info->reg_size;
 		while (pop--) {
 			size_t bit = cfs_lu(&g);
@@ -327,24 +327,24 @@ arena_slab_reg_alloc_batch(extent_t *slab, const bin_info_t *bin_info,
 		slab_data->bitmap[group] = g;
 	}
 #endif
-	extent_nfree_sub(slab, cnt);
+	edata_nfree_sub(slab, cnt);
 }
 
 #ifndef JEMALLOC_JET
 static
 #endif
 size_t
-arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr) {
+arena_slab_regind(edata_t *slab, szind_t binind, const void *ptr) {
 	size_t diff, regind;
 
 	/* Freeing a pointer outside the slab can cause assertion failure. */
-	assert((uintptr_t)ptr >= (uintptr_t)extent_addr_get(slab));
-	assert((uintptr_t)ptr < (uintptr_t)extent_past_get(slab));
+	assert((uintptr_t)ptr >= (uintptr_t)edata_addr_get(slab));
+	assert((uintptr_t)ptr < (uintptr_t)edata_past_get(slab));
 	/* Freeing an interior pointer can cause assertion failure. */
-	assert(((uintptr_t)ptr - (uintptr_t)extent_addr_get(slab)) %
+	assert(((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab)) %
 	    (uintptr_t)bin_infos[binind].reg_size == 0);
 
-	diff = (size_t)((uintptr_t)ptr - (uintptr_t)extent_addr_get(slab));
+	diff = (size_t)((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab));
 
 	/* Avoid doing division with a variable divisor. */
 	regind = div_compute(&arena_binind_div_info[binind], diff);
@@ -355,17 +355,17 @@ arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr) {
 }
 
 static void
-arena_slab_reg_dalloc(extent_t *slab, slab_data_t *slab_data, void *ptr) {
-	szind_t binind = extent_szind_get(slab);
+arena_slab_reg_dalloc(edata_t *slab, slab_data_t *slab_data, void *ptr) {
+	szind_t binind = edata_szind_get(slab);
 	const bin_info_t *bin_info = &bin_infos[binind];
 	size_t regind = arena_slab_regind(slab, binind, ptr);
 
-	assert(extent_nfree_get(slab) < bin_info->nregs);
+	assert(edata_nfree_get(slab) < bin_info->nregs);
 	/* Freeing an unallocated pointer can cause assertion failure. */
 	assert(bitmap_get(slab_data->bitmap, &bin_info->bitmap_info, regind));
 
 	bitmap_unset(slab_data->bitmap, &bin_info->bitmap_info, regind);
-	extent_nfree_inc(slab);
+	edata_nfree_inc(slab);
 }
 
 static void
@@ -423,7 +423,7 @@ arena_may_have_muzzy(arena_t *arena) {
 	return arena_muzzy_decay_ms_get(arena) != 0;
 }
 
-extent_t *
+edata_t *
 arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool *zero) {
 	ehooks_t *ehooks = arena_get_ehooks(arena);
@@ -434,23 +434,22 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	szind_t szind = sz_size2index(usize);
 	size_t mapped_add;
 	bool commit = true;
-	extent_t *extent = extents_alloc(tsdn, arena, ehooks,
-	    &arena->eset_dirty, NULL, usize, sz_large_pad, alignment, false,
-	    szind, zero, &commit);
-	if (extent == NULL && arena_may_have_muzzy(arena)) {
-		extent = extents_alloc(tsdn, arena, ehooks, &arena->eset_muzzy,
+	edata_t *edata = extents_alloc(tsdn, arena, ehooks, &arena->eset_dirty,
+	    NULL, usize, sz_large_pad, alignment, false, szind, zero, &commit);
+	if (edata == NULL && arena_may_have_muzzy(arena)) {
+		edata = extents_alloc(tsdn, arena, ehooks, &arena->eset_muzzy,
 		    NULL, usize, sz_large_pad, alignment, false, szind, zero,
 		    &commit);
 	}
 	size_t size = usize + sz_large_pad;
-	if (extent == NULL) {
-		extent = extent_alloc_wrapper(tsdn, arena, ehooks, NULL, usize,
+	if (edata == NULL) {
+		edata = extent_alloc_wrapper(tsdn, arena, ehooks, NULL, usize,
 		    sz_large_pad, alignment, false, szind, zero, &commit);
 		if (config_stats) {
 			/*
-			 * extent may be NULL on OOM, but in that case
-			 * mapped_add isn't used below, so there's no need to
-			 * conditionlly set it to 0 here.
+			 * edata may be NULL on OOM, but in that case mapped_add
+			 * isn't used below, so there's no need to conditionlly
+			 * set it to 0 here.
 			 */
 			mapped_add = size;
 		}
@@ -458,7 +457,7 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 		mapped_add = 0;
 	}
 
-	if (extent != NULL) {
+	if (edata != NULL) {
 		if (config_stats) {
 			arena_stats_lock(tsdn, &arena->stats);
 			arena_large_malloc_stats_update(tsdn, arena, usize);
@@ -471,24 +470,24 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 		arena_nactive_add(arena, size >> LG_PAGE);
 	}
 
-	return extent;
+	return edata;
 }
 
 void
-arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
+arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
 	if (config_stats) {
 		arena_stats_lock(tsdn, &arena->stats);
 		arena_large_dalloc_stats_update(tsdn, arena,
-		    extent_usize_get(extent));
+		    edata_usize_get(edata));
 		arena_stats_unlock(tsdn, &arena->stats);
 	}
-	arena_nactive_sub(arena, extent_size_get(extent) >> LG_PAGE);
+	arena_nactive_sub(arena, edata_size_get(edata) >> LG_PAGE);
 }
 
 void
-arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
     size_t oldusize) {
-	size_t usize = extent_usize_get(extent);
+	size_t usize = edata_usize_get(edata);
 	size_t udiff = oldusize - usize;
 
 	if (config_stats) {
@@ -500,9 +499,9 @@ arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 }
 
 void
-arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
     size_t oldusize) {
-	size_t usize = extent_usize_get(extent);
+	size_t usize = edata_usize_get(edata);
 	size_t udiff = usize - oldusize;
 
 	if (config_stats) {
@@ -819,25 +818,25 @@ arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
 static size_t
 arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
     ehooks_t *ehooks, eset_t *eset, size_t npages_limit,
-    size_t npages_decay_max, extent_list_t *decay_extents) {
+    size_t npages_decay_max, edata_list_t *decay_extents) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
 	/* Stash extents according to npages_limit. */
 	size_t nstashed = 0;
-	extent_t *extent;
+	edata_t *edata;
 	while (nstashed < npages_decay_max &&
-	    (extent = extents_evict(tsdn, arena, ehooks, eset, npages_limit))
+	    (edata = extents_evict(tsdn, arena, ehooks, eset, npages_limit))
 	    != NULL) {
-		extent_list_append(decay_extents, extent);
-		nstashed += extent_size_get(extent) >> LG_PAGE;
+		edata_list_append(decay_extents, edata);
+		nstashed += edata_size_get(edata) >> LG_PAGE;
 	}
 	return nstashed;
 }
 
 static size_t
 arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    arena_decay_t *decay, eset_t *eset, bool all, extent_list_t *decay_extents,
+    arena_decay_t *decay, eset_t *eset, bool all, edata_list_t *decay_extents,
     bool is_background_thread) {
 	size_t nmadvise, nunmapped;
 	size_t npurged;
@@ -849,31 +848,30 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	npurged = 0;
 
 	ssize_t muzzy_decay_ms = arena_muzzy_decay_ms_get(arena);
-	for (extent_t *extent = extent_list_first(decay_extents); extent !=
-	    NULL; extent = extent_list_first(decay_extents)) {
+	for (edata_t *edata = edata_list_first(decay_extents); edata !=
+	    NULL; edata = edata_list_first(decay_extents)) {
 		if (config_stats) {
 			nmadvise++;
 		}
-		size_t npages = extent_size_get(extent) >> LG_PAGE;
+		size_t npages = edata_size_get(edata) >> LG_PAGE;
 		npurged += npages;
-		extent_list_remove(decay_extents, extent);
+		edata_list_remove(decay_extents, edata);
 		switch (eset_state_get(eset)) {
 		case extent_state_active:
 			not_reached();
 		case extent_state_dirty:
 			if (!all && muzzy_decay_ms != 0 &&
 			    !extent_purge_lazy_wrapper(tsdn, arena,
-			    ehooks, extent, 0,
-			    extent_size_get(extent))) {
+			    ehooks, edata, 0, edata_size_get(edata))) {
 				extents_dalloc(tsdn, arena, ehooks,
-				    &arena->eset_muzzy, extent);
+				    &arena->eset_muzzy, edata);
 				arena_background_thread_inactivity_check(tsdn,
 				    arena, is_background_thread);
 				break;
 			}
 			JEMALLOC_FALLTHROUGH;
 		case extent_state_muzzy:
-			extent_dalloc_wrapper(tsdn, arena, ehooks, extent);
+			extent_dalloc_wrapper(tsdn, arena, ehooks, edata);
 			if (config_stats) {
 				nunmapped += npages;
 			}
@@ -923,8 +921,8 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 
 	ehooks_t *ehooks = arena_get_ehooks(arena);
 
-	extent_list_t decay_extents;
-	extent_list_init(&decay_extents);
+	edata_list_t decay_extents;
+	edata_list_init(&decay_extents);
 
 	size_t npurge = arena_stash_decayed(tsdn, arena, ehooks, eset,
 	    npages_limit, npages_decay_max, &decay_extents);
@@ -1000,33 +998,33 @@ arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) {
 }
 
 static void
-arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *slab) {
-	arena_nactive_sub(arena, extent_size_get(slab) >> LG_PAGE);
+arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab) {
+	arena_nactive_sub(arena, edata_size_get(slab) >> LG_PAGE);
 
 	ehooks_t *ehooks = arena_get_ehooks(arena);
 	arena_extents_dirty_dalloc(tsdn, arena, ehooks, slab);
 }
 
 static void
-arena_bin_slabs_nonfull_insert(bin_t *bin, extent_t *slab) {
-	assert(extent_nfree_get(slab) > 0);
-	extent_heap_insert(&bin->slabs_nonfull, slab);
+arena_bin_slabs_nonfull_insert(bin_t *bin, edata_t *slab) {
+	assert(edata_nfree_get(slab) > 0);
+	edata_heap_insert(&bin->slabs_nonfull, slab);
 	if (config_stats) {
 		bin->stats.nonfull_slabs++;
 	}
 }
 
 static void
-arena_bin_slabs_nonfull_remove(bin_t *bin, extent_t *slab) {
-	extent_heap_remove(&bin->slabs_nonfull, slab);
+arena_bin_slabs_nonfull_remove(bin_t *bin, edata_t *slab) {
+	edata_heap_remove(&bin->slabs_nonfull, slab);
 	if (config_stats) {
 		bin->stats.nonfull_slabs--;
 	}
 }
 
-static extent_t *
+static edata_t *
 arena_bin_slabs_nonfull_tryget(bin_t *bin) {
-	extent_t *slab = extent_heap_remove_first(&bin->slabs_nonfull);
+	edata_t *slab = edata_heap_remove_first(&bin->slabs_nonfull);
 	if (slab == NULL) {
 		return NULL;
 	}
@@ -1038,30 +1036,30 @@ arena_bin_slabs_nonfull_tryget(bin_t *bin) {
 }
 
 static void
-arena_bin_slabs_full_insert(arena_t *arena, bin_t *bin, extent_t *slab) {
-	assert(extent_nfree_get(slab) == 0);
+arena_bin_slabs_full_insert(arena_t *arena, bin_t *bin, edata_t *slab) {
+	assert(edata_nfree_get(slab) == 0);
 	/*
 	 *  Tracking extents is required by arena_reset, which is not allowed
-	 *  for auto arenas.  Bypass this step to avoid touching the extent
+	 *  for auto arenas.  Bypass this step to avoid touching the edata
 	 *  linkage (often results in cache misses) for auto arenas.
 	 */
 	if (arena_is_auto(arena)) {
 		return;
 	}
-	extent_list_append(&bin->slabs_full, slab);
+	edata_list_append(&bin->slabs_full, slab);
 }
 
 static void
-arena_bin_slabs_full_remove(arena_t *arena, bin_t *bin, extent_t *slab) {
+arena_bin_slabs_full_remove(arena_t *arena, bin_t *bin, edata_t *slab) {
 	if (arena_is_auto(arena)) {
 		return;
 	}
-	extent_list_remove(&bin->slabs_full, slab);
+	edata_list_remove(&bin->slabs_full, slab);
 }
 
 static void
 arena_bin_reset(tsd_t *tsd, arena_t *arena, bin_t *bin) {
-	extent_t *slab;
+	edata_t *slab;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 	if (bin->slabcur != NULL) {
@@ -1071,13 +1069,13 @@ arena_bin_reset(tsd_t *tsd, arena_t *arena, bin_t *bin) {
 		arena_slab_dalloc(tsd_tsdn(tsd), arena, slab);
 		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 	}
-	while ((slab = extent_heap_remove_first(&bin->slabs_nonfull)) != NULL) {
+	while ((slab = edata_heap_remove_first(&bin->slabs_nonfull)) != NULL) {
 		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 		arena_slab_dalloc(tsd_tsdn(tsd), arena, slab);
 		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 	}
-	for (slab = extent_list_first(&bin->slabs_full); slab != NULL;
-	     slab = extent_list_first(&bin->slabs_full)) {
+	for (slab = edata_list_first(&bin->slabs_full); slab != NULL;
+	     slab = edata_list_first(&bin->slabs_full)) {
 		arena_bin_slabs_full_remove(arena, bin, slab);
 		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 		arena_slab_dalloc(tsd_tsdn(tsd), arena, slab);
@@ -1109,9 +1107,9 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 	/* Large allocations. */
 	malloc_mutex_lock(tsd_tsdn(tsd), &arena->large_mtx);
 
-	for (extent_t *extent = extent_list_first(&arena->large); extent !=
-	    NULL; extent = extent_list_first(&arena->large)) {
-		void *ptr = extent_base_get(extent);
+	for (edata_t *edata = edata_list_first(&arena->large); edata !=
+	    NULL; edata = edata_list_first(&arena->large)) {
+		void *ptr = edata_base_get(edata);
 		size_t usize;
 
 		malloc_mutex_unlock(tsd_tsdn(tsd), &arena->large_mtx);
@@ -1129,7 +1127,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 		if (config_prof && opt_prof) {
 			prof_free(tsd, ptr, usize, &alloc_ctx);
 		}
-		large_dalloc(tsd_tsdn(tsd), extent);
+		large_dalloc(tsd_tsdn(tsd), edata);
 		malloc_mutex_lock(tsd_tsdn(tsd), &arena->large_mtx);
 	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->large_mtx);
@@ -1157,10 +1155,10 @@ arena_destroy_retained(tsdn_t *tsdn, arena_t *arena) {
 	 * dss-based extents for later reuse.
 	 */
 	ehooks_t *ehooks = arena_get_ehooks(arena);
-	extent_t *extent;
-	while ((extent = extents_evict(tsdn, arena, ehooks,
+	edata_t *edata;
+	while ((edata = extents_evict(tsdn, arena, ehooks,
 	    &arena->eset_retained, 0)) != NULL) {
-		extent_destroy_wrapper(tsdn, arena, ehooks, extent);
+		extent_destroy_wrapper(tsdn, arena, ehooks, edata);
 	}
 }
 
@@ -1200,10 +1198,10 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
 	base_delete(tsd_tsdn(tsd), arena->base);
 }
 
-static extent_t *
+static edata_t *
 arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     const bin_info_t *bin_info, szind_t szind) {
-	extent_t *slab;
+	edata_t *slab;
 	bool zero, commit;
 
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -1222,7 +1220,7 @@ arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	return slab;
 }
 
-static extent_t *
+static edata_t *
 arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard,
     const bin_info_t *bin_info) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -1232,7 +1230,7 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	szind_t szind = sz_size2index(bin_info->reg_size);
 	bool zero = false;
 	bool commit = true;
-	extent_t *slab = extents_alloc(tsdn, arena, ehooks, &arena->eset_dirty,
+	edata_t *slab = extents_alloc(tsdn, arena, ehooks, &arena->eset_dirty,
 	    NULL, bin_info->slab_size, 0, PAGE, true, binind, &zero, &commit);
 	if (slab == NULL && arena_may_have_muzzy(arena)) {
 		slab = extents_alloc(tsdn, arena, ehooks, &arena->eset_muzzy,
@@ -1246,22 +1244,22 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 			return NULL;
 		}
 	}
-	assert(extent_slab_get(slab));
+	assert(edata_slab_get(slab));
 
 	/* Initialize slab internals. */
-	slab_data_t *slab_data = extent_slab_data_get(slab);
-	extent_nfree_binshard_set(slab, bin_info->nregs, binshard);
+	slab_data_t *slab_data = edata_slab_data_get(slab);
+	edata_nfree_binshard_set(slab, bin_info->nregs, binshard);
 	bitmap_init(slab_data->bitmap, &bin_info->bitmap_info, false);
 
-	arena_nactive_add(arena, extent_size_get(slab) >> LG_PAGE);
+	arena_nactive_add(arena, edata_size_get(slab) >> LG_PAGE);
 
 	return slab;
 }
 
-static extent_t *
+static edata_t *
 arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
     szind_t binind, unsigned binshard) {
-	extent_t *slab;
+	edata_t *slab;
 	const bin_info_t *bin_info;
 
 	/* Look for a usable slab. */
@@ -1307,14 +1305,14 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 
 	if (bin->slabcur != NULL) {
 		/* Only attempted when current slab is full. */
-		assert(extent_nfree_get(bin->slabcur) == 0);
+		assert(edata_nfree_get(bin->slabcur) == 0);
 	}
 
 	const bin_info_t *bin_info = &bin_infos[binind];
-	extent_t *slab = arena_bin_nonfull_slab_get(tsdn, arena, bin, binind,
+	edata_t *slab = arena_bin_nonfull_slab_get(tsdn, arena, bin, binind,
 	    binshard);
 	if (bin->slabcur != NULL) {
-		if (extent_nfree_get(bin->slabcur) > 0) {
+		if (edata_nfree_get(bin->slabcur) > 0) {
 			/*
 			 * Another thread updated slabcur while this one ran
 			 * without the bin lock in arena_bin_nonfull_slab_get().
@@ -1331,7 +1329,7 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 				 * arena_bin_lower_slab() must be called, as if
 				 * a region were just deallocated from the slab.
 				 */
-				if (extent_nfree_get(slab) == bin_info->nregs) {
+				if (edata_nfree_get(slab) == bin_info->nregs) {
 					arena_dalloc_bin_slab(tsdn, arena, slab,
 					    bin);
 				} else {
@@ -1350,7 +1348,7 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 		return NULL;
 	}
 	bin->slabcur = slab;
-	assert(extent_nfree_get(bin->slabcur) > 0);
+	assert(edata_nfree_get(bin->slabcur) > 0);
 
 	return arena_slab_reg_alloc(slab, bin_info);
 }
@@ -1386,12 +1384,12 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 	void **empty_position = cache_bin_empty_position_get(tbin, binind);
 	for (i = 0, nfill = (cache_bin_ncached_max_get(binind) >>
 	    tcache->lg_fill_div[binind]); i < nfill; i += cnt) {
-		extent_t *slab;
-		if ((slab = bin->slabcur) != NULL && extent_nfree_get(slab) >
+		edata_t *slab;
+		if ((slab = bin->slabcur) != NULL && edata_nfree_get(slab) >
 		    0) {
 			unsigned tofill = nfill - i;
-			cnt = tofill < extent_nfree_get(slab) ?
-				tofill : extent_nfree_get(slab);
+			cnt = tofill < edata_nfree_get(slab) ?
+				tofill : edata_nfree_get(slab);
 			arena_slab_reg_alloc_batch(
 			   slab, &bin_infos[binind], cnt,
 			   empty_position - nfill + i);
@@ -1454,14 +1452,14 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
 	void *ret;
 	bin_t *bin;
 	size_t usize;
-	extent_t *slab;
+	edata_t *slab;
 
 	assert(binind < SC_NBINS);
 	usize = sz_index2size(binind);
 	unsigned binshard;
 	bin = arena_bin_choose_lock(tsdn, arena, binind, &binshard);
 
-	if ((slab = bin->slabcur) != NULL && extent_nfree_get(slab) > 0) {
+	if ((slab = bin->slabcur) != NULL && edata_nfree_get(slab) > 0) {
 		ret = arena_slab_reg_alloc(slab, &bin_infos[binind]);
 	} else {
 		ret = arena_bin_malloc_hard(tsdn, arena, bin, binind, binshard);
@@ -1554,11 +1552,11 @@ arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	extent_t *extent = rtree_extent_read(tsdn, &extents_rtree, rtree_ctx,
+	edata_t *edata = rtree_edata_read(tsdn, &extents_rtree, rtree_ctx,
 	    (uintptr_t)ptr, true);
 
 	szind_t szind = sz_size2index(usize);
-	extent_szind_set(extent, szind);
+	edata_szind_set(edata, szind);
 	rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr,
 	    szind, false);
 
@@ -1568,11 +1566,11 @@ arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize) {
 }
 
 static size_t
-arena_prof_demote(tsdn_t *tsdn, extent_t *extent, const void *ptr) {
+arena_prof_demote(tsdn_t *tsdn, edata_t *edata, const void *ptr) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	extent_szind_set(extent, SC_NBINS);
+	edata_szind_set(edata, SC_NBINS);
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr,
@@ -1589,9 +1587,9 @@ arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 	cassert(config_prof);
 	assert(opt_prof);
 
-	extent_t *extent = iealloc(tsdn, ptr);
-	size_t usize = extent_usize_get(extent);
-	size_t bumped_usize = arena_prof_demote(tsdn, extent, ptr);
+	edata_t *edata = iealloc(tsdn, ptr);
+	size_t usize = edata_usize_get(edata);
+	size_t bumped_usize = arena_prof_demote(tsdn, edata, ptr);
 	if (config_opt_safety_checks && usize < SC_LARGE_MINCLASS) {
 		/*
 		 * Currently, we only do redzoning for small sampled
@@ -1604,17 +1602,17 @@ arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 		tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
 		    sz_size2index(bumped_usize), slow_path);
 	} else {
-		large_dalloc(tsdn, extent);
+		large_dalloc(tsdn, edata);
 	}
 }
 
 static void
-arena_dissociate_bin_slab(arena_t *arena, extent_t *slab, bin_t *bin) {
+arena_dissociate_bin_slab(arena_t *arena, edata_t *slab, bin_t *bin) {
 	/* Dissociate slab from bin. */
 	if (slab == bin->slabcur) {
 		bin->slabcur = NULL;
 	} else {
-		szind_t binind = extent_szind_get(slab);
+		szind_t binind = edata_szind_get(slab);
 		const bin_info_t *bin_info = &bin_infos[binind];
 
 		/*
@@ -1631,7 +1629,7 @@ arena_dissociate_bin_slab(arena_t *arena, extent_t *slab, bin_t *bin) {
 }
 
 static void
-arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
+arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, edata_t *slab,
     bin_t *bin) {
 	assert(slab != bin->slabcur);
 
@@ -1646,9 +1644,9 @@ arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
 }
 
 static void
-arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
+arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, edata_t *slab,
     bin_t *bin) {
-	assert(extent_nfree_get(slab) > 0);
+	assert(edata_nfree_get(slab) > 0);
 
 	/*
 	 * Make sure that if bin->slabcur is non-NULL, it refers to the
@@ -1656,9 +1654,9 @@ arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
 	 * than proactively keeping it pointing at the oldest/lowest non-full
 	 * slab.
 	 */
-	if (bin->slabcur != NULL && extent_snad_comp(bin->slabcur, slab) > 0) {
+	if (bin->slabcur != NULL && edata_snad_comp(bin->slabcur, slab) > 0) {
 		/* Switch slabcur. */
-		if (extent_nfree_get(bin->slabcur) > 0) {
+		if (edata_nfree_get(bin->slabcur) > 0) {
 			arena_bin_slabs_nonfull_insert(bin, bin->slabcur);
 		} else {
 			arena_bin_slabs_full_insert(arena, bin, bin->slabcur);
@@ -1674,8 +1672,8 @@ arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab,
 
 static void
 arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    szind_t binind, extent_t *slab, void *ptr, bool junked) {
-	slab_data_t *slab_data = extent_slab_data_get(slab);
+    szind_t binind, edata_t *slab, void *ptr, bool junked) {
+	slab_data_t *slab_data = edata_slab_data_get(slab);
 	const bin_info_t *bin_info = &bin_infos[binind];
 
 	if (!junked && config_fill && unlikely(opt_junk_free)) {
@@ -1683,7 +1681,7 @@ arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 	}
 
 	arena_slab_reg_dalloc(slab, slab_data, ptr);
-	unsigned nfree = extent_nfree_get(slab);
+	unsigned nfree = edata_nfree_get(slab);
 	if (nfree == bin_info->nregs) {
 		arena_dissociate_bin_slab(arena, slab, bin);
 		arena_dalloc_bin_slab(tsdn, arena, slab, bin);
@@ -1700,29 +1698,29 @@ arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 
 void
 arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    szind_t binind, extent_t *extent, void *ptr) {
-	arena_dalloc_bin_locked_impl(tsdn, arena, bin, binind, extent, ptr,
+    szind_t binind, edata_t *edata, void *ptr) {
+	arena_dalloc_bin_locked_impl(tsdn, arena, bin, binind, edata, ptr,
 	    true);
 }
 
 static void
-arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr) {
-	szind_t binind = extent_szind_get(extent);
-	unsigned binshard = extent_binshard_get(extent);
+arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, edata_t *edata, void *ptr) {
+	szind_t binind = edata_szind_get(edata);
+	unsigned binshard = edata_binshard_get(edata);
 	bin_t *bin = &arena->bins[binind].bin_shards[binshard];
 
 	malloc_mutex_lock(tsdn, &bin->lock);
-	arena_dalloc_bin_locked_impl(tsdn, arena, bin, binind, extent, ptr,
+	arena_dalloc_bin_locked_impl(tsdn, arena, bin, binind, edata, ptr,
 	    false);
 	malloc_mutex_unlock(tsdn, &bin->lock);
 }
 
 void
 arena_dalloc_small(tsdn_t *tsdn, void *ptr) {
-	extent_t *extent = iealloc(tsdn, ptr);
-	arena_t *arena = arena_get_from_extent(extent);
+	edata_t *edata = iealloc(tsdn, ptr);
+	arena_t *arena = arena_get_from_edata(edata);
 
-	arena_dalloc_bin(tsdn, arena, extent, ptr);
+	arena_dalloc_bin(tsdn, arena, edata, ptr);
 	arena_decay_tick(tsdn, arena);
 }
 
@@ -1733,7 +1731,7 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 	/* Calls with non-zero extra had to clamp extra. */
 	assert(extra == 0 || size + extra <= SC_LARGE_MAXCLASS);
 
-	extent_t *extent = iealloc(tsdn, ptr);
+	edata_t *edata = iealloc(tsdn, ptr);
 	if (unlikely(size > SC_LARGE_MAXCLASS)) {
 		ret = true;
 		goto done;
@@ -1756,19 +1754,19 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 			goto done;
 		}
 
-		arena_t *arena = arena_get_from_extent(extent);
+		arena_t *arena = arena_get_from_edata(edata);
 		arena_decay_tick(tsdn, arena);
 		ret = false;
 	} else if (oldsize >= SC_LARGE_MINCLASS
 	    && usize_max >= SC_LARGE_MINCLASS) {
-		ret = large_ralloc_no_move(tsdn, extent, usize_min, usize_max,
+		ret = large_ralloc_no_move(tsdn, edata, usize_min, usize_max,
 		    zero);
 	} else {
 		ret = true;
 	}
 done:
-	assert(extent == iealloc(tsdn, ptr));
-	*newsize = extent_usize_get(extent);
+	assert(edata == iealloc(tsdn, ptr));
+	*newsize = edata_usize_get(edata);
 
 	return ret;
 }
@@ -2006,7 +2004,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 
 	atomic_store_zu(&arena->nactive, 0, ATOMIC_RELAXED);
 
-	extent_list_init(&arena->large);
+	edata_list_init(&arena->large);
 	if (malloc_mutex_init(&arena->large_mtx, "arena_large",
 	    WITNESS_RANK_ARENA_LARGE, malloc_mutex_rank_exclusive)) {
 		goto label_error;
@@ -2055,9 +2053,9 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		goto label_error;
 	}
 
-	extent_avail_new(&arena->extent_avail);
-	if (malloc_mutex_init(&arena->extent_avail_mtx, "extent_avail",
-	    WITNESS_RANK_EXTENT_AVAIL, malloc_mutex_rank_exclusive)) {
+	edata_avail_new(&arena->edata_avail);
+	if (malloc_mutex_init(&arena->edata_avail_mtx, "edata_avail",
+	    WITNESS_RANK_EDATA_AVAIL, malloc_mutex_rank_exclusive)) {
 		goto label_error;
 	}
 
@@ -2203,7 +2201,7 @@ arena_prefork3(tsdn_t *tsdn, arena_t *arena) {
 
 void
 arena_prefork4(tsdn_t *tsdn, arena_t *arena) {
-	malloc_mutex_prefork(tsdn, &arena->extent_avail_mtx);
+	malloc_mutex_prefork(tsdn, &arena->edata_avail_mtx);
 }
 
 void
@@ -2237,7 +2235,7 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 	}
 	malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
 	base_postfork_parent(tsdn, arena->base);
-	malloc_mutex_postfork_parent(tsdn, &arena->extent_avail_mtx);
+	malloc_mutex_postfork_parent(tsdn, &arena->edata_avail_mtx);
 	eset_postfork_parent(tsdn, &arena->eset_dirty);
 	eset_postfork_parent(tsdn, &arena->eset_muzzy);
 	eset_postfork_parent(tsdn, &arena->eset_retained);
@@ -2283,7 +2281,7 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	}
 	malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
 	base_postfork_child(tsdn, arena->base);
-	malloc_mutex_postfork_child(tsdn, &arena->extent_avail_mtx);
+	malloc_mutex_postfork_child(tsdn, &arena->edata_avail_mtx);
 	eset_postfork_child(tsdn, &arena->eset_dirty);
 	eset_postfork_child(tsdn, &arena->eset_muzzy);
 	eset_postfork_child(tsdn, &arena->eset_retained);
diff --git a/src/base.c b/src/base.c
index 79736cdc..76d76557 100644
--- a/src/base.c
+++ b/src/base.c
@@ -105,14 +105,14 @@ label_done:
 }
 
 static void
-base_extent_init(size_t *extent_sn_next, extent_t *extent, void *addr,
+base_edata_init(size_t *extent_sn_next, edata_t *edata, void *addr,
     size_t size) {
 	size_t sn;
 
 	sn = *extent_sn_next;
 	(*extent_sn_next)++;
 
-	extent_binit(extent, addr, size, sn);
+	edata_binit(edata, addr, size, sn);
 }
 
 static size_t
@@ -158,7 +158,7 @@ base_auto_thp_switch(tsdn_t *tsdn, base_t *base) {
 		pages_huge(block, block->size);
 		if (config_stats) {
 			base->n_thp += HUGEPAGE_CEILING(block->size -
-			    extent_bsize_get(&block->extent)) >> LG_HUGEPAGE;
+			    edata_bsize_get(&block->edata)) >> LG_HUGEPAGE;
 		}
 		block = block->next;
 		assert(block == NULL || (base_ind_get(base) == 0));
@@ -166,34 +166,34 @@ base_auto_thp_switch(tsdn_t *tsdn, base_t *base) {
 }
 
 static void *
-base_extent_bump_alloc_helper(extent_t *extent, size_t *gap_size, size_t size,
+base_extent_bump_alloc_helper(edata_t *edata, size_t *gap_size, size_t size,
     size_t alignment) {
 	void *ret;
 
 	assert(alignment == ALIGNMENT_CEILING(alignment, QUANTUM));
 	assert(size == ALIGNMENT_CEILING(size, alignment));
 
-	*gap_size = ALIGNMENT_CEILING((uintptr_t)extent_addr_get(extent),
-	    alignment) - (uintptr_t)extent_addr_get(extent);
-	ret = (void *)((uintptr_t)extent_addr_get(extent) + *gap_size);
-	assert(extent_bsize_get(extent) >= *gap_size + size);
-	extent_binit(extent, (void *)((uintptr_t)extent_addr_get(extent) +
-	    *gap_size + size), extent_bsize_get(extent) - *gap_size - size,
-	    extent_sn_get(extent));
+	*gap_size = ALIGNMENT_CEILING((uintptr_t)edata_addr_get(edata),
+	    alignment) - (uintptr_t)edata_addr_get(edata);
+	ret = (void *)((uintptr_t)edata_addr_get(edata) + *gap_size);
+	assert(edata_bsize_get(edata) >= *gap_size + size);
+	edata_binit(edata, (void *)((uintptr_t)edata_addr_get(edata) +
+	    *gap_size + size), edata_bsize_get(edata) - *gap_size - size,
+	    edata_sn_get(edata));
 	return ret;
 }
 
 static void
-base_extent_bump_alloc_post(base_t *base, extent_t *extent, size_t gap_size,
+base_extent_bump_alloc_post(base_t *base, edata_t *edata, size_t gap_size,
     void *addr, size_t size) {
-	if (extent_bsize_get(extent) > 0) {
+	if (edata_bsize_get(edata) > 0) {
 		/*
 		 * Compute the index for the largest size class that does not
 		 * exceed extent's size.
 		 */
 		szind_t index_floor =
-		    sz_size2index(extent_bsize_get(extent) + 1) - 1;
-		extent_heap_insert(&base->avail[index_floor], extent);
+		    sz_size2index(edata_bsize_get(edata) + 1) - 1;
+		edata_heap_insert(&base->avail[index_floor], edata);
 	}
 
 	if (config_stats) {
@@ -218,13 +218,13 @@ base_extent_bump_alloc_post(base_t *base, extent_t *extent, size_t gap_size,
 }
 
 static void *
-base_extent_bump_alloc(base_t *base, extent_t *extent, size_t size,
+base_extent_bump_alloc(base_t *base, edata_t *edata, size_t size,
     size_t alignment) {
 	void *ret;
 	size_t gap_size;
 
-	ret = base_extent_bump_alloc_helper(extent, &gap_size, size, alignment);
-	base_extent_bump_alloc_post(base, extent, gap_size, ret, size);
+	ret = base_extent_bump_alloc_helper(edata, &gap_size, size, alignment);
+	base_extent_bump_alloc_post(base, edata, gap_size, ret, size);
 	return ret;
 }
 
@@ -284,7 +284,7 @@ base_block_alloc(tsdn_t *tsdn, base_t *base, ehooks_t *ehooks, unsigned ind,
 	block->size = block_size;
 	block->next = NULL;
 	assert(block_size >= header_size);
-	base_extent_init(extent_sn_next, &block->extent,
+	base_edata_init(extent_sn_next, &block->edata,
 	    (void *)((uintptr_t)block + header_size), block_size - header_size);
 	return block;
 }
@@ -293,7 +293,7 @@ base_block_alloc(tsdn_t *tsdn, base_t *base, ehooks_t *ehooks, unsigned ind,
  * Allocate an extent that is at least as large as specified size, with
  * specified alignment.
  */
-static extent_t *
+static edata_t *
 base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
 	malloc_mutex_assert_owner(tsdn, &base->mtx);
 
@@ -327,7 +327,7 @@ base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
 		assert(base->resident <= base->mapped);
 		assert(base->n_thp << LG_HUGEPAGE <= base->mapped);
 	}
-	return &block->extent;
+	return &block->edata;
 }
 
 base_t *
@@ -357,7 +357,7 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	size_t gap_size;
 	size_t base_alignment = CACHELINE;
 	size_t base_size = ALIGNMENT_CEILING(sizeof(base_t), base_alignment);
-	base_t *base = (base_t *)base_extent_bump_alloc_helper(&block->extent,
+	base_t *base = (base_t *)base_extent_bump_alloc_helper(&block->edata,
 	    &gap_size, base_size, base_alignment);
 	base->ind = ind;
 	ehooks_init(&base->ehooks, extent_hooks);
@@ -371,7 +371,7 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	base->blocks = block;
 	base->auto_thp_switched = false;
 	for (szind_t i = 0; i < SC_NSIZES; i++) {
-		extent_heap_new(&base->avail[i]);
+		edata_heap_new(&base->avail[i]);
 	}
 	if (config_stats) {
 		base->allocated = sizeof(base_block_t);
@@ -384,7 +384,7 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		assert(base->resident <= base->mapped);
 		assert(base->n_thp << LG_HUGEPAGE <= base->mapped);
 	}
-	base_extent_bump_alloc_post(base, &block->extent, gap_size, base,
+	base_extent_bump_alloc_post(base, &block->edata, gap_size, base,
 	    base_size);
 
 	return base;
@@ -422,28 +422,28 @@ base_alloc_impl(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment,
 	size_t usize = ALIGNMENT_CEILING(size, alignment);
 	size_t asize = usize + alignment - QUANTUM;
 
-	extent_t *extent = NULL;
+	edata_t *edata = NULL;
 	malloc_mutex_lock(tsdn, &base->mtx);
 	for (szind_t i = sz_size2index(asize); i < SC_NSIZES; i++) {
-		extent = extent_heap_remove_first(&base->avail[i]);
-		if (extent != NULL) {
+		edata = edata_heap_remove_first(&base->avail[i]);
+		if (edata != NULL) {
 			/* Use existing space. */
 			break;
 		}
 	}
-	if (extent == NULL) {
+	if (edata == NULL) {
 		/* Try to allocate more space. */
-		extent = base_extent_alloc(tsdn, base, usize, alignment);
+		edata = base_extent_alloc(tsdn, base, usize, alignment);
 	}
 	void *ret;
-	if (extent == NULL) {
+	if (edata == NULL) {
 		ret = NULL;
 		goto label_return;
 	}
 
-	ret = base_extent_bump_alloc(base, extent, usize, alignment);
+	ret = base_extent_bump_alloc(base, edata, usize, alignment);
 	if (esn != NULL) {
-		*esn = extent_sn_get(extent);
+		*esn = edata_sn_get(edata);
 	}
 label_return:
 	malloc_mutex_unlock(tsdn, &base->mtx);
@@ -463,16 +463,16 @@ base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
 	return base_alloc_impl(tsdn, base, size, alignment, NULL);
 }
 
-extent_t *
-base_alloc_extent(tsdn_t *tsdn, base_t *base) {
+edata_t *
+base_alloc_edata(tsdn_t *tsdn, base_t *base) {
 	size_t esn;
-	extent_t *extent = base_alloc_impl(tsdn, base, sizeof(extent_t),
+	edata_t *edata = base_alloc_impl(tsdn, base, sizeof(edata_t),
 	    CACHELINE, &esn);
-	if (extent == NULL) {
+	if (edata == NULL) {
 		return NULL;
 	}
-	extent_esn_set(extent, esn);
-	return extent;
+	edata_esn_set(edata, esn);
+	return edata;
 }
 
 void
diff --git a/src/bin.c b/src/bin.c
index d7cbfb56..52de9ff3 100644
--- a/src/bin.c
+++ b/src/bin.c
@@ -45,8 +45,8 @@ bin_init(bin_t *bin) {
 		return true;
 	}
 	bin->slabcur = NULL;
-	extent_heap_new(&bin->slabs_nonfull);
-	extent_list_init(&bin->slabs_full);
+	edata_heap_new(&bin->slabs_nonfull);
+	edata_list_init(&bin->slabs_full);
 	if (config_stats) {
 		memset(&bin->stats, 0, sizeof(bin_stats_t));
 	}
diff --git a/src/ctl.c b/src/ctl.c
index 4aa4af8f..1e72bf4c 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -855,8 +855,8 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 			    &astats->astats.mapped);
 			accum_atomic_zu(&sdstats->astats.retained,
 			    &astats->astats.retained);
-			accum_atomic_zu(&sdstats->astats.extent_avail,
-			    &astats->astats.extent_avail);
+			accum_atomic_zu(&sdstats->astats.edata_avail,
+			    &astats->astats.edata_avail);
 		}
 
 		ctl_accum_arena_stats_u64(&sdstats->astats.decay_dirty.npurge,
@@ -2603,18 +2603,18 @@ arenas_lookup_ctl(tsd_t *tsd, const size_t *mib,
 	int ret;
 	unsigned arena_ind;
 	void *ptr;
-	extent_t *extent;
+	edata_t *edata;
 	arena_t *arena;
 
 	ptr = NULL;
 	ret = EINVAL;
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	WRITE(ptr, void *);
-	extent = iealloc(tsd_tsdn(tsd), ptr);
-	if (extent == NULL)
+	edata = iealloc(tsd_tsdn(tsd), ptr);
+	if (edata == NULL)
 		goto label_return;
 
-	arena = arena_get_from_extent(extent);
+	arena = arena_get_from_edata(edata);
 	if (arena == NULL)
 		goto label_return;
 
@@ -2860,7 +2860,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_retained,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.retained, ATOMIC_RELAXED),
     size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_extent_avail,
-    atomic_load_zu(&arenas_i(mib[2])->astats->astats.extent_avail,
+    atomic_load_zu(&arenas_i(mib[2])->astats->astats.edata_avail,
         ATOMIC_RELAXED),
     size_t)
 
@@ -3010,7 +3010,7 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
 			continue;
 		}
 		MUTEX_PROF_RESET(arena->large_mtx);
-		MUTEX_PROF_RESET(arena->extent_avail_mtx);
+		MUTEX_PROF_RESET(arena->edata_avail_mtx);
 		MUTEX_PROF_RESET(arena->eset_dirty.mtx);
 		MUTEX_PROF_RESET(arena->eset_muzzy.mtx);
 		MUTEX_PROF_RESET(arena->eset_retained.mtx);
diff --git a/src/edata.c b/src/edata.c
index 1a5a1fa6..5e53e998 100644
--- a/src/edata.c
+++ b/src/edata.c
@@ -1,6 +1,6 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
-ph_gen(, extent_avail_, extent_tree_t, extent_t, ph_link,
-    extent_esnead_comp)
-ph_gen(, extent_heap_, extent_heap_t, extent_t, ph_link, extent_snad_comp)
+ph_gen(, edata_avail_, edata_tree_t, edata_t, ph_link,
+    edata_esnead_comp)
+ph_gen(, edata_heap_, edata_heap_t, edata_t, ph_link, edata_snad_comp)
diff --git a/src/ehooks.c b/src/ehooks.c
index 25aef1c3..a62586b9 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -200,8 +200,8 @@ ehooks_default_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
     void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
 	if (!maps_coalesce) {
 		tsdn_t *tsdn = tsdn_fetch();
-		extent_t *a = iealloc(tsdn, addr_a);
-		extent_t *b = iealloc(tsdn, addr_b);
+		edata_t *a = iealloc(tsdn, addr_a);
+		edata_t *b = iealloc(tsdn, addr_b);
 		if (extent_head_no_merge(a, b)) {
 			return true;
 		}
diff --git a/src/eset.c b/src/eset.c
index 9cc8ceea..88b9c8c7 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -16,10 +16,10 @@ eset_init(tsdn_t *tsdn, eset_t *eset, extent_state_t state,
 		return true;
 	}
 	for (unsigned i = 0; i < SC_NPSIZES + 1; i++) {
-		extent_heap_new(&eset->heaps[i]);
+		edata_heap_new(&eset->heaps[i]);
 	}
 	bitmap_init(eset->bitmap, &eset_bitmap_info, true);
-	extent_list_init(&eset->lru);
+	edata_list_init(&eset->lru);
 	atomic_store_zu(&eset->npages, 0, ATOMIC_RELAXED);
 	eset->state = state;
 	eset->delay_coalesce = delay_coalesce;
@@ -63,24 +63,24 @@ eset_stats_sub(eset_t *eset, pszind_t pind, size_t sz) {
 }
 
 void
-eset_insert_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent) {
+eset_insert_locked(tsdn_t *tsdn, eset_t *eset, edata_t *edata) {
 	malloc_mutex_assert_owner(tsdn, &eset->mtx);
-	assert(extent_state_get(extent) == eset->state);
+	assert(edata_state_get(edata) == eset->state);
 
-	size_t size = extent_size_get(extent);
+	size_t size = edata_size_get(edata);
 	size_t psz = sz_psz_quantize_floor(size);
 	pszind_t pind = sz_psz2ind(psz);
-	if (extent_heap_empty(&eset->heaps[pind])) {
+	if (edata_heap_empty(&eset->heaps[pind])) {
 		bitmap_unset(eset->bitmap, &eset_bitmap_info,
 		    (size_t)pind);
 	}
-	extent_heap_insert(&eset->heaps[pind], extent);
+	edata_heap_insert(&eset->heaps[pind], edata);
 
 	if (config_stats) {
 		eset_stats_add(eset, pind, size);
 	}
 
-	extent_list_append(&eset->lru, extent);
+	edata_list_append(&eset->lru, edata);
 	size_t npages = size >> LG_PAGE;
 	/*
 	 * All modifications to npages hold the mutex (as asserted above), so we
@@ -94,24 +94,24 @@ eset_insert_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent) {
 }
 
 void
-eset_remove_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent) {
+eset_remove_locked(tsdn_t *tsdn, eset_t *eset, edata_t *edata) {
 	malloc_mutex_assert_owner(tsdn, &eset->mtx);
-	assert(extent_state_get(extent) == eset->state);
+	assert(edata_state_get(edata) == eset->state);
 
-	size_t size = extent_size_get(extent);
+	size_t size = edata_size_get(edata);
 	size_t psz = sz_psz_quantize_floor(size);
 	pszind_t pind = sz_psz2ind(psz);
-	extent_heap_remove(&eset->heaps[pind], extent);
+	edata_heap_remove(&eset->heaps[pind], edata);
 
 	if (config_stats) {
 		eset_stats_sub(eset, pind, size);
 	}
 
-	if (extent_heap_empty(&eset->heaps[pind])) {
+	if (edata_heap_empty(&eset->heaps[pind])) {
 		bitmap_set(eset->bitmap, &eset_bitmap_info,
 		    (size_t)pind);
 	}
-	extent_list_remove(&eset->lru, extent);
+	edata_list_remove(&eset->lru, edata);
 	size_t npages = size >> LG_PAGE;
 	/*
 	 * As in eset_insert_locked, we hold eset->mtx and so don't need atomic
@@ -128,7 +128,7 @@ eset_remove_locked(tsdn_t *tsdn, eset_t *eset, extent_t *extent) {
  * Find an extent with size [min_size, max_size) to satisfy the alignment
  * requirement.  For each size, try only the first extent in the heap.
  */
-static extent_t *
+static edata_t *
 eset_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
     size_t alignment) {
         pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(min_size));
@@ -139,10 +139,10 @@ eset_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
 	    (pszind_t)bitmap_ffu(eset->bitmap, &eset_bitmap_info,
 	    (size_t)i+1)) {
 		assert(i < SC_NPSIZES);
-		assert(!extent_heap_empty(&eset->heaps[i]));
-		extent_t *extent = extent_heap_first(&eset->heaps[i]);
-		uintptr_t base = (uintptr_t)extent_base_get(extent);
-		size_t candidate_size = extent_size_get(extent);
+		assert(!edata_heap_empty(&eset->heaps[i]));
+		edata_t *edata = edata_heap_first(&eset->heaps[i]);
+		uintptr_t base = (uintptr_t)edata_base_get(edata);
+		size_t candidate_size = edata_size_get(edata);
 		assert(candidate_size >= min_size);
 
 		uintptr_t next_align = ALIGNMENT_CEILING((uintptr_t)base,
@@ -154,7 +154,7 @@ eset_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
 
 		size_t leadsize = next_align - base;
 		if (candidate_size - leadsize >= min_size) {
-			return extent;
+			return edata;
 		}
 	}
 
@@ -165,9 +165,9 @@ eset_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
  * Do first-fit extent selection, i.e. select the oldest/lowest extent that is
  * large enough.
  */
-static extent_t *
+static edata_t *
 eset_first_fit_locked(tsdn_t *tsdn, eset_t *eset, size_t size) {
-	extent_t *ret = NULL;
+	edata_t *ret = NULL;
 
 	pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(size));
 
@@ -176,8 +176,8 @@ eset_first_fit_locked(tsdn_t *tsdn, eset_t *eset, size_t size) {
 		 * No split / merge allowed (Windows w/o retain). Try exact fit
 		 * only.
 		 */
-		return extent_heap_empty(&eset->heaps[pind]) ? NULL :
-		    extent_heap_first(&eset->heaps[pind]);
+		return edata_heap_empty(&eset->heaps[pind]) ? NULL :
+		    edata_heap_first(&eset->heaps[pind]);
 	}
 
 	for (pszind_t i = (pszind_t)bitmap_ffu(eset->bitmap,
@@ -185,9 +185,9 @@ eset_first_fit_locked(tsdn_t *tsdn, eset_t *eset, size_t size) {
 	    i < SC_NPSIZES + 1;
 	    i = (pszind_t)bitmap_ffu(eset->bitmap, &eset_bitmap_info,
 	    (size_t)i+1)) {
-		assert(!extent_heap_empty(&eset->heaps[i]));
-		extent_t *extent = extent_heap_first(&eset->heaps[i]);
-		assert(extent_size_get(extent) >= size);
+		assert(!edata_heap_empty(&eset->heaps[i]));
+		edata_t *edata = edata_heap_first(&eset->heaps[i]);
+		assert(edata_size_get(edata) >= size);
 		/*
 		 * In order to reduce fragmentation, avoid reusing and splitting
 		 * large eset for much smaller sizes.
@@ -198,8 +198,8 @@ eset_first_fit_locked(tsdn_t *tsdn, eset_t *eset, size_t size) {
 		    (sz_pind2sz(i) >> opt_lg_extent_max_active_fit) > size) {
 			break;
 		}
-		if (ret == NULL || extent_snad_comp(extent, ret) < 0) {
-			ret = extent;
+		if (ret == NULL || edata_snad_comp(edata, ret) < 0) {
+			ret = edata;
 		}
 		if (i == SC_NPSIZES) {
 			break;
@@ -210,7 +210,7 @@ eset_first_fit_locked(tsdn_t *tsdn, eset_t *eset, size_t size) {
 	return ret;
 }
 
-extent_t *
+edata_t *
 eset_fit_locked(tsdn_t *tsdn, eset_t *eset, size_t esize, size_t alignment) {
 	malloc_mutex_assert_owner(tsdn, &eset->mtx);
 
@@ -220,18 +220,18 @@ eset_fit_locked(tsdn_t *tsdn, eset_t *eset, size_t esize, size_t alignment) {
 		return NULL;
 	}
 
-	extent_t *extent = eset_first_fit_locked(tsdn, eset, max_size);
+	edata_t *edata = eset_first_fit_locked(tsdn, eset, max_size);
 
-	if (alignment > PAGE && extent == NULL) {
+	if (alignment > PAGE && edata == NULL) {
 		/*
 		 * max_size guarantees the alignment requirement but is rather
 		 * pessimistic.  Next we try to satisfy the aligned allocation
 		 * with sizes in [esize, max_size).
 		 */
-		extent = eset_fit_alignment(eset, esize, max_size, alignment);
+		edata = eset_fit_alignment(eset, esize, max_size, alignment);
 	}
 
-	return extent;
+	return edata;
 }
 
 void
diff --git a/src/extent2.c b/src/extent2.c
index 4001d178..5bacb8fe 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -13,25 +13,25 @@
 /* Data. */
 
 rtree_t		extents_rtree;
-/* Keyed by the address of the extent_t being protected. */
+/* Keyed by the address of the edata_t being protected. */
 mutex_pool_t	extent_mutex_pool;
 
 size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT;
 
 static bool extent_commit_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length, bool growing_retained);
+    edata_t *edata, size_t offset, size_t length, bool growing_retained);
 static bool extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena,
-    ehooks_t *ehooks, extent_t *extent, size_t offset, size_t length,
+    ehooks_t *ehooks, edata_t *edata, size_t offset, size_t length,
     bool growing_retained);
 static bool extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena,
-    ehooks_t *ehooks, extent_t *extent, size_t offset, size_t length,
+    ehooks_t *ehooks, edata_t *edata, size_t offset, size_t length,
     bool growing_retained);
-static extent_t *extent_split_impl(tsdn_t *tsdn, arena_t *arena,
-    ehooks_t *ehooks, extent_t *extent, size_t size_a, szind_t szind_a,
+static edata_t *extent_split_impl(tsdn_t *tsdn, arena_t *arena,
+    ehooks_t *ehooks, edata_t *edata, size_t size_a, szind_t szind_a,
     bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
     bool growing_retained);
 static bool extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *a, extent_t *b, bool growing_retained);
+    edata_t *a, edata_t *b, bool growing_retained);
 
 /* Used exclusively for gdump triggering. */
 static atomic_zu_t curpages;
@@ -43,15 +43,15 @@ static atomic_zu_t highpages;
  * definition.
  */
 
-static void extent_deregister(tsdn_t *tsdn, extent_t *extent);
-static extent_t *extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+static void extent_deregister(tsdn_t *tsdn, edata_t *edata);
+static edata_t *extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     eset_t *eset, void *new_addr, size_t usize, size_t pad, size_t alignment,
     bool slab, szind_t szind, bool *zero, bool *commit, bool growing_retained);
-static extent_t *extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
-    ehooks_t *ehooks, rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent,
+static edata_t *extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
+    ehooks_t *ehooks, rtree_ctx_t *rtree_ctx, eset_t *eset, edata_t *edata,
     bool *coalesced, bool growing_retained);
 static void extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    eset_t *eset, extent_t *extent, bool growing_retained);
+    eset_t *eset, edata_t *edata, bool growing_retained);
 
 /******************************************************************************/
 
@@ -62,68 +62,68 @@ typedef enum {
 } lock_result_t;
 
 static inline void
-extent_lock(tsdn_t *tsdn, extent_t *extent) {
-	assert(extent != NULL);
-	mutex_pool_lock(tsdn, &extent_mutex_pool, (uintptr_t)extent);
+extent_lock_edata(tsdn_t *tsdn, edata_t *edata) {
+	assert(edata != NULL);
+	mutex_pool_lock(tsdn, &extent_mutex_pool, (uintptr_t)edata);
 }
 
 static inline void
-extent_unlock(tsdn_t *tsdn, extent_t *extent) {
-	assert(extent != NULL);
-	mutex_pool_unlock(tsdn, &extent_mutex_pool, (uintptr_t)extent);
+extent_unlock_edata(tsdn_t *tsdn, edata_t *edata) {
+	assert(edata != NULL);
+	mutex_pool_unlock(tsdn, &extent_mutex_pool, (uintptr_t)edata);
 }
 
 static inline void
-extent_lock2(tsdn_t *tsdn, extent_t *extent1, extent_t *extent2) {
-	assert(extent1 != NULL && extent2 != NULL);
-	mutex_pool_lock2(tsdn, &extent_mutex_pool, (uintptr_t)extent1,
-	    (uintptr_t)extent2);
+extent_lock_edata2(tsdn_t *tsdn, edata_t *edata1, edata_t *edata2) {
+	assert(edata1 != NULL && edata2 != NULL);
+	mutex_pool_lock2(tsdn, &extent_mutex_pool, (uintptr_t)edata1,
+	    (uintptr_t)edata2);
 }
 
 static inline void
-extent_unlock2(tsdn_t *tsdn, extent_t *extent1, extent_t *extent2) {
-	assert(extent1 != NULL && extent2 != NULL);
-	mutex_pool_unlock2(tsdn, &extent_mutex_pool, (uintptr_t)extent1,
-	    (uintptr_t)extent2);
+extent_unlock_edata2(tsdn_t *tsdn, edata_t *edata1, edata_t *edata2) {
+	assert(edata1 != NULL && edata2 != NULL);
+	mutex_pool_unlock2(tsdn, &extent_mutex_pool, (uintptr_t)edata1,
+	    (uintptr_t)edata2);
 }
 
 static lock_result_t
 extent_rtree_leaf_elm_try_lock(tsdn_t *tsdn, rtree_leaf_elm_t *elm,
-    extent_t **result, bool inactive_only) {
-	extent_t *extent1 = rtree_leaf_elm_extent_read(tsdn, &extents_rtree,
+    edata_t **result, bool inactive_only) {
+	edata_t *edata1 = rtree_leaf_elm_edata_read(tsdn, &extents_rtree,
 	    elm, true);
 
 	/* Slab implies active extents and should be skipped. */
-	if (extent1 == NULL || (inactive_only && rtree_leaf_elm_slab_read(tsdn,
+	if (edata1 == NULL || (inactive_only && rtree_leaf_elm_slab_read(tsdn,
 	    &extents_rtree, elm, true))) {
 		return lock_result_no_extent;
 	}
 
 	/*
 	 * It's possible that the extent changed out from under us, and with it
-	 * the leaf->extent mapping.  We have to recheck while holding the lock.
+	 * the leaf->edata mapping.  We have to recheck while holding the lock.
 	 */
-	extent_lock(tsdn, extent1);
-	extent_t *extent2 = rtree_leaf_elm_extent_read(tsdn,
-	    &extents_rtree, elm, true);
+	extent_lock_edata(tsdn, edata1);
+	edata_t *edata2 = rtree_leaf_elm_edata_read(tsdn, &extents_rtree, elm,
+	    true);
 
-	if (extent1 == extent2) {
-		*result = extent1;
+	if (edata1 == edata2) {
+		*result = edata1;
 		return lock_result_success;
 	} else {
-		extent_unlock(tsdn, extent1);
+		extent_unlock_edata(tsdn, edata1);
 		return lock_result_failure;
 	}
 }
 
 /*
- * Returns a pool-locked extent_t * if there's one associated with the given
+ * Returns a pool-locked edata_t * if there's one associated with the given
  * address, and NULL otherwise.
  */
-static extent_t *
-extent_lock_from_addr(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, void *addr,
+static edata_t *
+extent_lock_edata_from_addr(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, void *addr,
     bool inactive_only) {
-	extent_t *ret = NULL;
+	edata_t *ret = NULL;
 	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, &extents_rtree,
 	    rtree_ctx, (uintptr_t)addr, false, false);
 	if (elm == NULL) {
@@ -138,9 +138,9 @@ extent_lock_from_addr(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, void *addr,
 }
 
 static void
-extent_addr_randomize(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+extent_addr_randomize(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
     size_t alignment) {
-	assert(extent_base_get(extent) == extent_addr_get(extent));
+	assert(edata_base_get(edata) == edata_addr_get(edata));
 
 	if (alignment < PAGE) {
 		unsigned lg_range = LG_PAGE -
@@ -156,52 +156,52 @@ extent_addr_randomize(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
 		}
 		uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE -
 		    lg_range);
-		extent->e_addr = (void *)((uintptr_t)extent->e_addr +
+		edata->e_addr = (void *)((uintptr_t)edata->e_addr +
 		    random_offset);
-		assert(ALIGNMENT_ADDR2BASE(extent->e_addr, alignment) ==
-		    extent->e_addr);
+		assert(ALIGNMENT_ADDR2BASE(edata->e_addr, alignment) ==
+		    edata->e_addr);
 	}
 }
 
-extent_t *
+edata_t *
 extent_alloc(tsdn_t *tsdn, arena_t *arena) {
-	malloc_mutex_lock(tsdn, &arena->extent_avail_mtx);
-	extent_t *extent = extent_avail_first(&arena->extent_avail);
-	if (extent == NULL) {
-		malloc_mutex_unlock(tsdn, &arena->extent_avail_mtx);
-		return base_alloc_extent(tsdn, arena->base);
+	malloc_mutex_lock(tsdn, &arena->edata_avail_mtx);
+	edata_t *edata = edata_avail_first(&arena->edata_avail);
+	if (edata == NULL) {
+		malloc_mutex_unlock(tsdn, &arena->edata_avail_mtx);
+		return base_alloc_edata(tsdn, arena->base);
 	}
-	extent_avail_remove(&arena->extent_avail, extent);
-	atomic_fetch_sub_zu(&arena->extent_avail_cnt, 1, ATOMIC_RELAXED);
-	malloc_mutex_unlock(tsdn, &arena->extent_avail_mtx);
-	return extent;
+	edata_avail_remove(&arena->edata_avail, edata);
+	atomic_fetch_sub_zu(&arena->edata_avail_cnt, 1, ATOMIC_RELAXED);
+	malloc_mutex_unlock(tsdn, &arena->edata_avail_mtx);
+	return edata;
 }
 
 void
-extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
-	malloc_mutex_lock(tsdn, &arena->extent_avail_mtx);
-	extent_avail_insert(&arena->extent_avail, extent);
-	atomic_fetch_add_zu(&arena->extent_avail_cnt, 1, ATOMIC_RELAXED);
-	malloc_mutex_unlock(tsdn, &arena->extent_avail_mtx);
+extent_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
+	malloc_mutex_lock(tsdn, &arena->edata_avail_mtx);
+	edata_avail_insert(&arena->edata_avail, edata);
+	atomic_fetch_add_zu(&arena->edata_avail_cnt, 1, ATOMIC_RELAXED);
+	malloc_mutex_unlock(tsdn, &arena->edata_avail_mtx);
 }
 
 static bool
 extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent) {
-	extent_state_set(extent, extent_state_active);
+    rtree_ctx_t *rtree_ctx, eset_t *eset, edata_t *edata) {
+	edata_state_set(edata, extent_state_active);
 	bool coalesced;
-	extent = extent_try_coalesce(tsdn, arena, ehooks, rtree_ctx, eset,
-	    extent, &coalesced, false);
-	extent_state_set(extent, eset_state_get(eset));
+	edata = extent_try_coalesce(tsdn, arena, ehooks, rtree_ctx, eset,
+	    edata, &coalesced, false);
+	edata_state_set(edata, eset_state_get(eset));
 
 	if (!coalesced) {
 		return true;
 	}
-	eset_insert_locked(tsdn, eset, extent);
+	eset_insert_locked(tsdn, eset, edata);
 	return false;
 }
 
-extent_t *
+edata_t *
 extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
     szind_t szind, bool *zero, bool *commit) {
@@ -210,28 +210,28 @@ extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	extent_t *extent = extent_recycle(tsdn, arena, ehooks, eset, new_addr,
+	edata_t *edata = extent_recycle(tsdn, arena, ehooks, eset, new_addr,
 	    size, pad, alignment, slab, szind, zero, commit, false);
-	assert(extent == NULL || extent_dumpable_get(extent));
-	return extent;
+	assert(edata == NULL || edata_dumpable_get(edata));
+	return edata;
 }
 
 void
 extents_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
-    extent_t *extent) {
-	assert(extent_base_get(extent) != NULL);
-	assert(extent_size_get(extent) != 0);
-	assert(extent_dumpable_get(extent));
+    edata_t *edata) {
+	assert(edata_base_get(edata) != NULL);
+	assert(edata_size_get(edata) != 0);
+	assert(edata_dumpable_get(edata));
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	extent_addr_set(extent, extent_base_get(extent));
-	extent_zeroed_set(extent, false);
+	edata_addr_set(edata, edata_base_get(edata));
+	edata_zeroed_set(edata, false);
 
-	extent_record(tsdn, arena, ehooks, eset, extent, false);
+	extent_record(tsdn, arena, ehooks, eset, edata, false);
 }
 
-extent_t *
+edata_t *
 extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
     size_t npages_min) {
 	rtree_ctx_t rtree_ctx_fallback;
@@ -243,27 +243,27 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 	 * Get the LRU coalesced extent, if any.  If coalescing was delayed,
 	 * the loop will iterate until the LRU extent is fully coalesced.
 	 */
-	extent_t *extent;
+	edata_t *edata;
 	while (true) {
 		/* Get the LRU extent, if any. */
-		extent = extent_list_first(&eset->lru);
-		if (extent == NULL) {
+		edata = edata_list_first(&eset->lru);
+		if (edata == NULL) {
 			goto label_return;
 		}
 		/* Check the eviction limit. */
 		size_t extents_npages = atomic_load_zu(&eset->npages,
 		    ATOMIC_RELAXED);
 		if (extents_npages <= npages_min) {
-			extent = NULL;
+			edata = NULL;
 			goto label_return;
 		}
-		eset_remove_locked(tsdn, eset, extent);
+		eset_remove_locked(tsdn, eset, edata);
 		if (!eset->delay_coalesce) {
 			break;
 		}
 		/* Try to coalesce. */
 		if (extent_try_delayed_coalesce(tsdn, arena, ehooks, rtree_ctx,
-		    eset, extent)) {
+		    eset, edata)) {
 			break;
 		}
 		/*
@@ -281,10 +281,10 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 		not_reached();
 	case extent_state_dirty:
 	case extent_state_muzzy:
-		extent_state_set(extent, extent_state_active);
+		edata_state_set(edata, extent_state_active);
 		break;
 	case extent_state_retained:
-		extent_deregister(tsdn, extent);
+		extent_deregister(tsdn, edata);
 		break;
 	default:
 		not_reached();
@@ -292,7 +292,7 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 
 label_return:
 	malloc_mutex_unlock(tsdn, &eset->mtx);
-	return extent;
+	return edata;
 }
 
 /*
@@ -301,8 +301,8 @@ label_return:
  */
 static void
 extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
-    extent_t *extent, bool growing_retained) {
-	size_t sz = extent_size_get(extent);
+    edata_t *edata, bool growing_retained) {
+	size_t sz = edata_size_get(edata);
 	if (config_stats) {
 		arena_stats_accum_zu(&arena->stats.abandoned_vm, sz);
 	}
@@ -311,56 +311,56 @@ extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 	 * that this is only a virtual memory leak.
 	 */
 	if (eset_state_get(eset) == extent_state_dirty) {
-		if (extent_purge_lazy_impl(tsdn, arena, ehooks, extent, 0, sz,
+		if (extent_purge_lazy_impl(tsdn, arena, ehooks, edata, 0, sz,
 		    growing_retained)) {
-			extent_purge_forced_impl(tsdn, arena, ehooks, extent, 0,
-			    extent_size_get(extent), growing_retained);
+			extent_purge_forced_impl(tsdn, arena, ehooks, edata, 0,
+			    edata_size_get(edata), growing_retained);
 		}
 	}
-	extent_dalloc(tsdn, arena, extent);
+	extent_dalloc(tsdn, arena, edata);
 }
 
 static void
 extent_deactivate_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
-    extent_t *extent) {
-	assert(extent_arena_ind_get(extent) == arena_ind_get(arena));
-	assert(extent_state_get(extent) == extent_state_active);
+    edata_t *edata) {
+	assert(edata_arena_ind_get(edata) == arena_ind_get(arena));
+	assert(edata_state_get(edata) == extent_state_active);
 
-	extent_state_set(extent, eset_state_get(eset));
-	eset_insert_locked(tsdn, eset, extent);
+	edata_state_set(edata, eset_state_get(eset));
+	eset_insert_locked(tsdn, eset, edata);
 }
 
 static void
 extent_deactivate(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
-    extent_t *extent) {
+    edata_t *edata) {
 	malloc_mutex_lock(tsdn, &eset->mtx);
-	extent_deactivate_locked(tsdn, arena, eset, extent);
+	extent_deactivate_locked(tsdn, arena, eset, edata);
 	malloc_mutex_unlock(tsdn, &eset->mtx);
 }
 
 static void
 extent_activate_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
-    extent_t *extent) {
-	assert(extent_arena_ind_get(extent) == arena_ind_get(arena));
-	assert(extent_state_get(extent) == eset_state_get(eset));
+    edata_t *edata) {
+	assert(edata_arena_ind_get(edata) == arena_ind_get(arena));
+	assert(edata_state_get(edata) == eset_state_get(eset));
 
-	eset_remove_locked(tsdn, eset, extent);
-	extent_state_set(extent, extent_state_active);
+	eset_remove_locked(tsdn, eset, edata);
+	edata_state_set(edata, extent_state_active);
 }
 
 static bool
 extent_rtree_leaf_elms_lookup(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
-    const extent_t *extent, bool dependent, bool init_missing,
+    const edata_t *edata, bool dependent, bool init_missing,
     rtree_leaf_elm_t **r_elm_a, rtree_leaf_elm_t **r_elm_b) {
 	*r_elm_a = rtree_leaf_elm_lookup(tsdn, &extents_rtree, rtree_ctx,
-	    (uintptr_t)extent_base_get(extent), dependent, init_missing);
+	    (uintptr_t)edata_base_get(edata), dependent, init_missing);
 	if (!dependent && *r_elm_a == NULL) {
 		return true;
 	}
 	assert(*r_elm_a != NULL);
 
 	*r_elm_b = rtree_leaf_elm_lookup(tsdn, &extents_rtree, rtree_ctx,
-	    (uintptr_t)extent_last_get(extent), dependent, init_missing);
+	    (uintptr_t)edata_last_get(edata), dependent, init_missing);
 	if (!dependent && *r_elm_b == NULL) {
 		return true;
 	}
@@ -371,36 +371,36 @@ extent_rtree_leaf_elms_lookup(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
 
 static void
 extent_rtree_write_acquired(tsdn_t *tsdn, rtree_leaf_elm_t *elm_a,
-    rtree_leaf_elm_t *elm_b, extent_t *extent, szind_t szind, bool slab) {
-	rtree_leaf_elm_write(tsdn, &extents_rtree, elm_a, extent, szind, slab);
+    rtree_leaf_elm_t *elm_b, edata_t *edata, szind_t szind, bool slab) {
+	rtree_leaf_elm_write(tsdn, &extents_rtree, elm_a, edata, szind, slab);
 	if (elm_b != NULL) {
-		rtree_leaf_elm_write(tsdn, &extents_rtree, elm_b, extent, szind,
+		rtree_leaf_elm_write(tsdn, &extents_rtree, elm_b, edata, szind,
 		    slab);
 	}
 }
 
 static void
-extent_interior_register(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, extent_t *extent,
+extent_interior_register(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, edata_t *edata,
     szind_t szind) {
-	assert(extent_slab_get(extent));
+	assert(edata_slab_get(edata));
 
 	/* Register interior. */
-	for (size_t i = 1; i < (extent_size_get(extent) >> LG_PAGE) - 1; i++) {
+	for (size_t i = 1; i < (edata_size_get(edata) >> LG_PAGE) - 1; i++) {
 		rtree_write(tsdn, &extents_rtree, rtree_ctx,
-		    (uintptr_t)extent_base_get(extent) + (uintptr_t)(i <<
-		    LG_PAGE), extent, szind, true);
+		    (uintptr_t)edata_base_get(edata) + (uintptr_t)(i <<
+		    LG_PAGE), edata, szind, true);
 	}
 }
 
 static void
-extent_gdump_add(tsdn_t *tsdn, const extent_t *extent) {
+extent_gdump_add(tsdn_t *tsdn, const edata_t *edata) {
 	cassert(config_prof);
 	/* prof_gdump() requirement. */
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	if (opt_prof && extent_state_get(extent) == extent_state_active) {
-		size_t nadd = extent_size_get(extent) >> LG_PAGE;
+	if (opt_prof && edata_state_get(edata) == extent_state_active) {
+		size_t nadd = edata_size_get(edata) >> LG_PAGE;
 		size_t cur = atomic_fetch_add_zu(&curpages, nadd,
 		    ATOMIC_RELAXED) + nadd;
 		size_t high = atomic_load_zu(&highpages, ATOMIC_RELAXED);
@@ -419,18 +419,18 @@ extent_gdump_add(tsdn_t *tsdn, const extent_t *extent) {
 }
 
 static void
-extent_gdump_sub(tsdn_t *tsdn, const extent_t *extent) {
+extent_gdump_sub(tsdn_t *tsdn, const edata_t *edata) {
 	cassert(config_prof);
 
-	if (opt_prof && extent_state_get(extent) == extent_state_active) {
-		size_t nsub = extent_size_get(extent) >> LG_PAGE;
+	if (opt_prof && edata_state_get(edata) == extent_state_active) {
+		size_t nsub = edata_size_get(edata) >> LG_PAGE;
 		assert(atomic_load_zu(&curpages, ATOMIC_RELAXED) >= nsub);
 		atomic_fetch_sub_zu(&curpages, nsub, ATOMIC_RELAXED);
 	}
 }
 
 static bool
-extent_register_impl(tsdn_t *tsdn, extent_t *extent, bool gdump_add) {
+extent_register_impl(tsdn_t *tsdn, edata_t *edata, bool gdump_add) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	rtree_leaf_elm_t *elm_a, *elm_b;
@@ -439,43 +439,43 @@ extent_register_impl(tsdn_t *tsdn, extent_t *extent, bool gdump_add) {
 	 * We need to hold the lock to protect against a concurrent coalesce
 	 * operation that sees us in a partial state.
 	 */
-	extent_lock(tsdn, extent);
+	extent_lock_edata(tsdn, edata);
 
-	if (extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, extent, false, true,
+	if (extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, edata, false, true,
 	    &elm_a, &elm_b)) {
-		extent_unlock(tsdn, extent);
+		extent_unlock_edata(tsdn, edata);
 		return true;
 	}
 
-	szind_t szind = extent_szind_get_maybe_invalid(extent);
-	bool slab = extent_slab_get(extent);
-	extent_rtree_write_acquired(tsdn, elm_a, elm_b, extent, szind, slab);
+	szind_t szind = edata_szind_get_maybe_invalid(edata);
+	bool slab = edata_slab_get(edata);
+	extent_rtree_write_acquired(tsdn, elm_a, elm_b, edata, szind, slab);
 	if (slab) {
-		extent_interior_register(tsdn, rtree_ctx, extent, szind);
+		extent_interior_register(tsdn, rtree_ctx, edata, szind);
 	}
 
-	extent_unlock(tsdn, extent);
+	extent_unlock_edata(tsdn, edata);
 
 	if (config_prof && gdump_add) {
-		extent_gdump_add(tsdn, extent);
+		extent_gdump_add(tsdn, edata);
 	}
 
 	return false;
 }
 
 static bool
-extent_register(tsdn_t *tsdn, extent_t *extent) {
-	return extent_register_impl(tsdn, extent, true);
+extent_register(tsdn_t *tsdn, edata_t *edata) {
+	return extent_register_impl(tsdn, edata, true);
 }
 
 static bool
-extent_register_no_gdump_add(tsdn_t *tsdn, extent_t *extent) {
-	return extent_register_impl(tsdn, extent, false);
+extent_register_no_gdump_add(tsdn_t *tsdn, edata_t *edata) {
+	return extent_register_impl(tsdn, edata, false);
 }
 
 static void
-extent_reregister(tsdn_t *tsdn, extent_t *extent) {
-	bool err = extent_register(tsdn, extent);
+extent_reregister(tsdn_t *tsdn, edata_t *edata) {
+	bool err = extent_register(tsdn, edata);
 	assert(!err);
 }
 
@@ -488,14 +488,14 @@ extent_reregister(tsdn_t *tsdn, extent_t *extent) {
  */
 static void
 extent_interior_deregister(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
-    extent_t *extent) {
+    edata_t *edata) {
 	size_t i;
 
-	assert(extent_slab_get(extent));
+	assert(edata_slab_get(edata));
 
-	for (i = 1; i < (extent_size_get(extent) >> LG_PAGE) - 1; i++) {
+	for (i = 1; i < (edata_size_get(edata) >> LG_PAGE) - 1; i++) {
 		rtree_clear(tsdn, &extents_rtree, rtree_ctx,
-		    (uintptr_t)extent_base_get(extent) + (uintptr_t)(i <<
+		    (uintptr_t)edata_base_get(edata) + (uintptr_t)(i <<
 		    LG_PAGE));
 	}
 }
@@ -504,43 +504,43 @@ extent_interior_deregister(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
  * Removes all pointers to the given extent from the global rtree.
  */
 static void
-extent_deregister_impl(tsdn_t *tsdn, extent_t *extent, bool gdump) {
+extent_deregister_impl(tsdn_t *tsdn, edata_t *edata, bool gdump) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	rtree_leaf_elm_t *elm_a, *elm_b;
-	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, extent, true, false,
+	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, edata, true, false,
 	    &elm_a, &elm_b);
 
-	extent_lock(tsdn, extent);
+	extent_lock_edata(tsdn, edata);
 
 	extent_rtree_write_acquired(tsdn, elm_a, elm_b, NULL, SC_NSIZES, false);
-	if (extent_slab_get(extent)) {
-		extent_interior_deregister(tsdn, rtree_ctx, extent);
-		extent_slab_set(extent, false);
+	if (edata_slab_get(edata)) {
+		extent_interior_deregister(tsdn, rtree_ctx, edata);
+		edata_slab_set(edata, false);
 	}
 
-	extent_unlock(tsdn, extent);
+	extent_unlock_edata(tsdn, edata);
 
 	if (config_prof && gdump) {
-		extent_gdump_sub(tsdn, extent);
+		extent_gdump_sub(tsdn, edata);
 	}
 }
 
 static void
-extent_deregister(tsdn_t *tsdn, extent_t *extent) {
-	extent_deregister_impl(tsdn, extent, true);
+extent_deregister(tsdn_t *tsdn, edata_t *edata) {
+	extent_deregister_impl(tsdn, edata, true);
 }
 
 static void
-extent_deregister_no_gdump_sub(tsdn_t *tsdn, extent_t *extent) {
-	extent_deregister_impl(tsdn, extent, false);
+extent_deregister_no_gdump_sub(tsdn_t *tsdn, edata_t *edata) {
+	extent_deregister_impl(tsdn, edata, false);
 }
 
 /*
  * Tries to find and remove an extent from eset that can be used for the
  * given allocation request.
  */
-static extent_t *
+static edata_t *
 extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     rtree_ctx_t *rtree_ctx, eset_t *eset, void *new_addr, size_t size,
     size_t pad, size_t alignment, bool slab, bool growing_retained) {
@@ -566,62 +566,60 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 	size_t esize = size + pad;
 	malloc_mutex_lock(tsdn, &eset->mtx);
-	extent_t *extent;
+	edata_t *edata;
 	if (new_addr != NULL) {
-		extent = extent_lock_from_addr(tsdn, rtree_ctx, new_addr,
+		edata = extent_lock_edata_from_addr(tsdn, rtree_ctx, new_addr,
 		    false);
-		if (extent != NULL) {
+		if (edata != NULL) {
 			/*
-			 * We might null-out extent to report an error, but we
+			 * We might null-out edata to report an error, but we
 			 * still need to unlock the associated mutex after.
 			 */
-			extent_t *unlock_extent = extent;
-			assert(extent_base_get(extent) == new_addr);
-			if (extent_arena_ind_get(extent)
-			    != arena_ind_get(arena) ||
-			    extent_size_get(extent) < esize ||
-			    extent_state_get(extent) !=
-			    eset_state_get(eset)) {
-				extent = NULL;
+			edata_t *unlock_edata = edata;
+			assert(edata_base_get(edata) == new_addr);
+			if (edata_arena_ind_get(edata) != arena_ind_get(arena)
+			    || edata_size_get(edata) < esize
+			    || edata_state_get(edata) != eset_state_get(eset)) {
+				edata = NULL;
 			}
-			extent_unlock(tsdn, unlock_extent);
+			extent_unlock_edata(tsdn, unlock_edata);
 		}
 	} else {
-		extent = eset_fit_locked(tsdn, eset, esize, alignment);
+		edata = eset_fit_locked(tsdn, eset, esize, alignment);
 	}
-	if (extent == NULL) {
+	if (edata == NULL) {
 		malloc_mutex_unlock(tsdn, &eset->mtx);
 		return NULL;
 	}
 
-	extent_activate_locked(tsdn, arena, eset, extent);
+	extent_activate_locked(tsdn, arena, eset, edata);
 	malloc_mutex_unlock(tsdn, &eset->mtx);
 
-	return extent;
+	return edata;
 }
 
 /*
  * Given an allocation request and an extent guaranteed to be able to satisfy
- * it, this splits off lead and trail extents, leaving extent pointing to an
+ * it, this splits off lead and trail extents, leaving edata pointing to an
  * extent satisfying the allocation.
  * This function doesn't put lead or trail into any eset_t; it's the caller's
  * job to ensure that they can be reused.
  */
 typedef enum {
 	/*
-	 * Split successfully.  lead, extent, and trail, are modified to extents
+	 * Split successfully.  lead, edata, and trail, are modified to extents
 	 * describing the ranges before, in, and after the given allocation.
 	 */
 	extent_split_interior_ok,
 	/*
 	 * The extent can't satisfy the given allocation request.  None of the
-	 * input extent_t *s are touched.
+	 * input edata_t *s are touched.
 	 */
 	extent_split_interior_cant_alloc,
 	/*
 	 * In a potentially invalid state.  Must leak (if *to_leak is non-NULL),
 	 * and salvage what's still salvageable (if *to_salvage is non-NULL).
-	 * None of lead, extent, or trail are valid.
+	 * None of lead, edata, or trail are valid.
 	 */
 	extent_split_interior_error
 } extent_split_interior_result_t;
@@ -630,19 +628,19 @@ static extent_split_interior_result_t
 extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     rtree_ctx_t *rtree_ctx,
     /* The result of splitting, in case of success. */
-    extent_t **extent, extent_t **lead, extent_t **trail,
+    edata_t **edata, edata_t **lead, edata_t **trail,
     /* The mess to clean up, in case of error. */
-    extent_t **to_leak, extent_t **to_salvage,
+    edata_t **to_leak, edata_t **to_salvage,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
     szind_t szind, bool growing_retained) {
 	size_t esize = size + pad;
-	size_t leadsize = ALIGNMENT_CEILING((uintptr_t)extent_base_get(*extent),
-	    PAGE_CEILING(alignment)) - (uintptr_t)extent_base_get(*extent);
+	size_t leadsize = ALIGNMENT_CEILING((uintptr_t)edata_base_get(*edata),
+	    PAGE_CEILING(alignment)) - (uintptr_t)edata_base_get(*edata);
 	assert(new_addr == NULL || leadsize == 0);
-	if (extent_size_get(*extent) < leadsize + esize) {
+	if (edata_size_get(*edata) < leadsize + esize) {
 		return extent_split_interior_cant_alloc;
 	}
-	size_t trailsize = extent_size_get(*extent) - leadsize - esize;
+	size_t trailsize = edata_size_get(*edata) - leadsize - esize;
 
 	*lead = NULL;
 	*trail = NULL;
@@ -651,11 +649,11 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 	/* Split the lead. */
 	if (leadsize != 0) {
-		*lead = *extent;
-		*extent = extent_split_impl(tsdn, arena, ehooks, *lead,
+		*lead = *edata;
+		*edata = extent_split_impl(tsdn, arena, ehooks, *lead,
 		    leadsize, SC_NSIZES, false, esize + trailsize, szind, slab,
 		    growing_retained);
-		if (*extent == NULL) {
+		if (*edata == NULL) {
 			*to_leak = *lead;
 			*lead = NULL;
 			return extent_split_interior_error;
@@ -664,13 +662,13 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 	/* Split the trail. */
 	if (trailsize != 0) {
-		*trail = extent_split_impl(tsdn, arena, ehooks, *extent, esize,
+		*trail = extent_split_impl(tsdn, arena, ehooks, *edata, esize,
 		    szind, slab, trailsize, SC_NSIZES, false, growing_retained);
 		if (*trail == NULL) {
-			*to_leak = *extent;
+			*to_leak = *edata;
 			*to_salvage = *lead;
 			*lead = NULL;
-			*extent = NULL;
+			*edata = NULL;
 			return extent_split_interior_error;
 		}
 	}
@@ -680,14 +678,14 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		 * Splitting causes szind to be set as a side effect, but no
 		 * splitting occurred.
 		 */
-		extent_szind_set(*extent, szind);
+		edata_szind_set(*edata, szind);
 		if (szind != SC_NSIZES) {
 			rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx,
-			    (uintptr_t)extent_addr_get(*extent), szind, slab);
-			if (slab && extent_size_get(*extent) > PAGE) {
+			    (uintptr_t)edata_addr_get(*edata), szind, slab);
+			if (slab && edata_size_get(*edata) > PAGE) {
 				rtree_szind_slab_update(tsdn, &extents_rtree,
 				    rtree_ctx,
-				    (uintptr_t)extent_past_get(*extent) -
+				    (uintptr_t)edata_past_get(*edata) -
 				    (uintptr_t)PAGE, szind, slab);
 			}
 		}
@@ -702,18 +700,18 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
  * before or after the resulting allocation, that space is given its own extent
  * and put back into eset.
  */
-static extent_t *
+static edata_t *
 extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     rtree_ctx_t *rtree_ctx, eset_t *eset, void *new_addr, size_t size,
-    size_t pad, size_t alignment, bool slab, szind_t szind, extent_t *extent,
+    size_t pad, size_t alignment, bool slab, szind_t szind, edata_t *edata,
     bool growing_retained) {
-	extent_t *lead;
-	extent_t *trail;
-	extent_t *to_leak;
-	extent_t *to_salvage;
+	edata_t *lead;
+	edata_t *trail;
+	edata_t *to_leak;
+	edata_t *to_salvage;
 
 	extent_split_interior_result_t result = extent_split_interior(
-	    tsdn, arena, ehooks, rtree_ctx, &extent, &lead, &trail, &to_leak,
+	    tsdn, arena, ehooks, rtree_ctx, &edata, &lead, &trail, &to_leak,
 	    &to_salvage, new_addr, size, pad, alignment, slab, szind,
 	    growing_retained);
 
@@ -735,7 +733,7 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		if (trail != NULL) {
 			extent_deactivate(tsdn, arena, eset, trail);
 		}
-		return extent;
+		return edata;
 	} else {
 		/*
 		 * We should have picked an extent that was large enough to
@@ -746,11 +744,11 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			extent_deregister(tsdn, to_salvage);
 		}
 		if (to_leak != NULL) {
-			void *leak = extent_base_get(to_leak);
+			void *leak = edata_base_get(to_leak);
 			extent_deregister_no_gdump_sub(tsdn, to_leak);
 			extents_abandon_vm(tsdn, arena, ehooks, eset, to_leak,
 			    growing_retained);
-			assert(extent_lock_from_addr(tsdn, rtree_ctx, leak,
+			assert(extent_lock_edata_from_addr(tsdn, rtree_ctx, leak,
 			    false) == NULL);
 		}
 		return NULL;
@@ -762,7 +760,7 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
  * Tries to satisfy the given allocation request by reusing one of the extents
  * in the given eset_t.
  */
-static extent_t *
+static edata_t *
 extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
     szind_t szind, bool *zero, bool *commit, bool growing_retained) {
@@ -775,54 +773,54 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	extent_t *extent = extent_recycle_extract(tsdn, arena, ehooks,
+	edata_t *edata = extent_recycle_extract(tsdn, arena, ehooks,
 	    rtree_ctx, eset, new_addr, size, pad, alignment, slab,
 	    growing_retained);
-	if (extent == NULL) {
+	if (edata == NULL) {
 		return NULL;
 	}
 
-	extent = extent_recycle_split(tsdn, arena, ehooks, rtree_ctx, eset,
-	    new_addr, size, pad, alignment, slab, szind, extent,
+	edata = extent_recycle_split(tsdn, arena, ehooks, rtree_ctx, eset,
+	    new_addr, size, pad, alignment, slab, szind, edata,
 	    growing_retained);
-	if (extent == NULL) {
+	if (edata == NULL) {
 		return NULL;
 	}
 
-	if (*commit && !extent_committed_get(extent)) {
-		if (extent_commit_impl(tsdn, arena, ehooks, extent, 0,
-		    extent_size_get(extent), growing_retained)) {
-			extent_record(tsdn, arena, ehooks, eset, extent,
+	if (*commit && !edata_committed_get(edata)) {
+		if (extent_commit_impl(tsdn, arena, ehooks, edata, 0,
+		    edata_size_get(edata), growing_retained)) {
+			extent_record(tsdn, arena, ehooks, eset, edata,
 			    growing_retained);
 			return NULL;
 		}
 	}
 
-	if (extent_committed_get(extent)) {
+	if (edata_committed_get(edata)) {
 		*commit = true;
 	}
-	if (extent_zeroed_get(extent)) {
+	if (edata_zeroed_get(edata)) {
 		*zero = true;
 	}
 
 	if (pad != 0) {
-		extent_addr_randomize(tsdn, arena, extent, alignment);
+		extent_addr_randomize(tsdn, arena, edata, alignment);
 	}
-	assert(extent_state_get(extent) == extent_state_active);
+	assert(edata_state_get(edata) == extent_state_active);
 	if (slab) {
-		extent_slab_set(extent, slab);
-		extent_interior_register(tsdn, rtree_ctx, extent, szind);
+		edata_slab_set(edata, slab);
+		extent_interior_register(tsdn, rtree_ctx, edata, szind);
 	}
 
 	if (*zero) {
-		void *addr = extent_base_get(extent);
-		if (!extent_zeroed_get(extent)) {
-			size_t size = extent_size_get(extent);
+		void *addr = edata_base_get(edata);
+		if (!edata_zeroed_get(edata)) {
+			size_t size = edata_size_get(edata);
 			ehooks_zero(tsdn, ehooks, addr, size,
 			    arena_ind_get(arena));
 		}
 	}
-	return extent;
+	return edata;
 }
 
 /*
@@ -830,7 +828,7 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
  * to split requested extents in order to limit the total number of disjoint
  * virtual memory ranges retained by each arena.
  */
-static extent_t *
+static edata_t *
 extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     size_t size, size_t pad, size_t alignment, bool slab, szind_t szind,
     bool *zero, bool *commit) {
@@ -860,8 +858,8 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		alloc_size = sz_pind2sz(arena->extent_grow_next + egn_skip);
 	}
 
-	extent_t *extent = extent_alloc(tsdn, arena);
-	if (extent == NULL) {
+	edata_t *edata = extent_alloc(tsdn, arena);
+	if (edata == NULL) {
 		goto label_err;
 	}
 	bool zeroed = false;
@@ -870,35 +868,35 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	void *ptr = ehooks_alloc(tsdn, ehooks, NULL, alloc_size, PAGE, &zeroed,
 	    &committed, arena_ind_get(arena));
 
-	extent_init(extent, arena_ind_get(arena), ptr, alloc_size, false,
+	edata_init(edata, arena_ind_get(arena), ptr, alloc_size, false,
 	    SC_NSIZES, arena_extent_sn_next(arena), extent_state_active, zeroed,
 	    committed, true, EXTENT_IS_HEAD);
 	if (ptr == NULL) {
-		extent_dalloc(tsdn, arena, extent);
+		extent_dalloc(tsdn, arena, edata);
 		goto label_err;
 	}
 
-	if (extent_register_no_gdump_add(tsdn, extent)) {
-		extent_dalloc(tsdn, arena, extent);
+	if (extent_register_no_gdump_add(tsdn, edata)) {
+		extent_dalloc(tsdn, arena, edata);
 		goto label_err;
 	}
 
-	if (extent_zeroed_get(extent) && extent_committed_get(extent)) {
+	if (edata_zeroed_get(edata) && edata_committed_get(edata)) {
 		*zero = true;
 	}
-	if (extent_committed_get(extent)) {
+	if (edata_committed_get(edata)) {
 		*commit = true;
 	}
 
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	extent_t *lead;
-	extent_t *trail;
-	extent_t *to_leak;
-	extent_t *to_salvage;
+	edata_t *lead;
+	edata_t *trail;
+	edata_t *to_leak;
+	edata_t *to_salvage;
 	extent_split_interior_result_t result = extent_split_interior(tsdn,
-	    arena, ehooks, rtree_ctx, &extent, &lead, &trail, &to_leak,
+	    arena, ehooks, rtree_ctx, &edata, &lead, &trail, &to_leak,
 	    &to_salvage, NULL, size, pad, alignment, slab, szind, true);
 
 	if (result == extent_split_interior_ok) {
@@ -931,16 +929,16 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		goto label_err;
 	}
 
-	if (*commit && !extent_committed_get(extent)) {
-		if (extent_commit_impl(tsdn, arena, ehooks, extent, 0,
-		    extent_size_get(extent), true)) {
+	if (*commit && !edata_committed_get(edata)) {
+		if (extent_commit_impl(tsdn, arena, ehooks, edata, 0,
+		    edata_size_get(edata), true)) {
 			extent_record(tsdn, arena, ehooks,
-			    &arena->eset_retained, extent, true);
+			    &arena->eset_retained, edata, true);
 			goto label_err;
 		}
 		/* A successful commit should return zeroed memory. */
 		if (config_debug) {
-			void *addr = extent_addr_get(extent);
+			void *addr = edata_addr_get(edata);
 			size_t *p = (size_t *)(uintptr_t)addr;
 			/* Check the first page only. */
 			for (size_t i = 0; i < PAGE / sizeof(size_t); i++) {
@@ -964,32 +962,32 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 	if (config_prof) {
 		/* Adjust gdump stats now that extent is final size. */
-		extent_gdump_add(tsdn, extent);
+		extent_gdump_add(tsdn, edata);
 	}
 	if (pad != 0) {
-		extent_addr_randomize(tsdn, arena, extent, alignment);
+		extent_addr_randomize(tsdn, arena, edata, alignment);
 	}
 	if (slab) {
 		rtree_ctx_t rtree_ctx_fallback;
 		rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn,
 		    &rtree_ctx_fallback);
 
-		extent_slab_set(extent, true);
-		extent_interior_register(tsdn, rtree_ctx, extent, szind);
+		edata_slab_set(edata, true);
+		extent_interior_register(tsdn, rtree_ctx, edata, szind);
 	}
-	if (*zero && !extent_zeroed_get(extent)) {
-		void *addr = extent_base_get(extent);
-		size_t size = extent_size_get(extent);
+	if (*zero && !edata_zeroed_get(edata)) {
+		void *addr = edata_base_get(edata);
+		size_t size = edata_size_get(edata);
 		ehooks_zero(tsdn, ehooks, addr, size, arena_ind_get(arena));
 	}
 
-	return extent;
+	return edata;
 label_err:
 	malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
 	return NULL;
 }
 
-static extent_t *
+static edata_t *
 extent_alloc_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
     szind_t szind, bool *zero, bool *commit) {
@@ -998,16 +996,16 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 	malloc_mutex_lock(tsdn, &arena->extent_grow_mtx);
 
-	extent_t *extent = extent_recycle(tsdn, arena, ehooks,
+	edata_t *edata = extent_recycle(tsdn, arena, ehooks,
 	    &arena->eset_retained, new_addr, size, pad, alignment, slab,
 	    szind, zero, commit, true);
-	if (extent != NULL) {
+	if (edata != NULL) {
 		malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
 		if (config_prof) {
-			extent_gdump_add(tsdn, extent);
+			extent_gdump_add(tsdn, edata);
 		}
 	} else if (opt_retain && new_addr == NULL) {
-		extent = extent_grow_retained(tsdn, arena, ehooks, size, pad,
+		edata = extent_grow_retained(tsdn, arena, ehooks, size, pad,
 		    alignment, slab, szind, zero, commit);
 		/* extent_grow_retained() always releases extent_grow_mtx. */
 	} else {
@@ -1015,49 +1013,49 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	}
 	malloc_mutex_assert_not_owner(tsdn, &arena->extent_grow_mtx);
 
-	return extent;
+	return edata;
 }
 
-static extent_t *
+static edata_t *
 extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
     szind_t szind, bool *zero, bool *commit) {
 	size_t esize = size + pad;
-	extent_t *extent = extent_alloc(tsdn, arena);
-	if (extent == NULL) {
+	edata_t *edata = extent_alloc(tsdn, arena);
+	if (edata == NULL) {
 		return NULL;
 	}
 	size_t palignment = ALIGNMENT_CEILING(alignment, PAGE);
 	void *addr = ehooks_alloc(tsdn, ehooks, new_addr, esize, palignment,
 	    zero, commit, arena_ind_get(arena));
 	if (addr == NULL) {
-		extent_dalloc(tsdn, arena, extent);
+		extent_dalloc(tsdn, arena, edata);
 		return NULL;
 	}
-	extent_init(extent, arena_ind_get(arena), addr, esize, slab, szind,
+	edata_init(edata, arena_ind_get(arena), addr, esize, slab, szind,
 	    arena_extent_sn_next(arena), extent_state_active, *zero, *commit,
 	    true, EXTENT_NOT_HEAD);
 	if (pad != 0) {
-		extent_addr_randomize(tsdn, arena, extent, alignment);
+		extent_addr_randomize(tsdn, arena, edata, alignment);
 	}
-	if (extent_register(tsdn, extent)) {
-		extent_dalloc(tsdn, arena, extent);
+	if (extent_register(tsdn, edata)) {
+		extent_dalloc(tsdn, arena, edata);
 		return NULL;
 	}
 
-	return extent;
+	return edata;
 }
 
-extent_t *
+edata_t *
 extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
     szind_t szind, bool *zero, bool *commit) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	extent_t *extent = extent_alloc_retained(tsdn, arena, ehooks, new_addr,
+	edata_t *edata = extent_alloc_retained(tsdn, arena, ehooks, new_addr,
 	    size, pad, alignment, slab, szind, zero, commit);
-	if (extent == NULL) {
+	if (edata == NULL) {
 		if (opt_retain && new_addr != NULL) {
 			/*
 			 * When retain is enabled and new_addr is set, we do not
@@ -1067,28 +1065,28 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			 */
 			return NULL;
 		}
-		extent = extent_alloc_wrapper_hard(tsdn, arena, ehooks,
+		edata = extent_alloc_wrapper_hard(tsdn, arena, ehooks,
 		    new_addr, size, pad, alignment, slab, szind, zero, commit);
 	}
 
-	assert(extent == NULL || extent_dumpable_get(extent));
-	return extent;
+	assert(edata == NULL || edata_dumpable_get(edata));
+	return edata;
 }
 
 static bool
-extent_can_coalesce(arena_t *arena, eset_t *eset, const extent_t *inner,
-    const extent_t *outer) {
-	assert(extent_arena_ind_get(inner) == arena_ind_get(arena));
-	if (extent_arena_ind_get(outer) != arena_ind_get(arena)) {
+extent_can_coalesce(arena_t *arena, eset_t *eset, const edata_t *inner,
+    const edata_t *outer) {
+	assert(edata_arena_ind_get(inner) == arena_ind_get(arena));
+	if (edata_arena_ind_get(outer) != arena_ind_get(arena)) {
 		return false;
 	}
 
-	assert(extent_state_get(inner) == extent_state_active);
-	if (extent_state_get(outer) != eset->state) {
+	assert(edata_state_get(inner) == extent_state_active);
+	if (edata_state_get(outer) != eset->state) {
 		return false;
 	}
 
-	if (extent_committed_get(inner) != extent_committed_get(outer)) {
+	if (edata_committed_get(inner) != edata_committed_get(outer)) {
 		return false;
 	}
 
@@ -1097,7 +1095,7 @@ extent_can_coalesce(arena_t *arena, eset_t *eset, const extent_t *inner,
 
 static bool
 extent_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
-    extent_t *inner, extent_t *outer, bool forward, bool growing_retained) {
+    edata_t *inner, edata_t *outer, bool forward, bool growing_retained) {
 	assert(extent_can_coalesce(arena, eset, inner, outer));
 
 	extent_activate_locked(tsdn, arena, eset, outer);
@@ -1114,9 +1112,9 @@ extent_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 	return err;
 }
 
-static extent_t *
+static edata_t *
 extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent, bool *coalesced,
+    rtree_ctx_t *rtree_ctx, eset_t *eset, edata_t *edata, bool *coalesced,
     bool growing_retained, bool inactive_only) {
 	/*
 	 * We avoid checking / locking inactive neighbors for large size
@@ -1132,8 +1130,8 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		again = false;
 
 		/* Try to coalesce forward. */
-		extent_t *next = extent_lock_from_addr(tsdn, rtree_ctx,
-		    extent_past_get(extent), inactive_only);
+		edata_t *next = extent_lock_edata_from_addr(tsdn, rtree_ctx,
+		    edata_past_get(edata), inactive_only);
 		if (next != NULL) {
 			/*
 			 * eset->mtx only protects against races for
@@ -1141,38 +1139,38 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			 * before releasing next's pool lock.
 			 */
 			bool can_coalesce = extent_can_coalesce(arena, eset,
-			    extent, next);
+			    edata, next);
 
-			extent_unlock(tsdn, next);
+			extent_unlock_edata(tsdn, next);
 
 			if (can_coalesce && !extent_coalesce(tsdn, arena,
-			    ehooks, eset, extent, next, true,
+			    ehooks, eset, edata, next, true,
 			    growing_retained)) {
 				if (eset->delay_coalesce) {
 					/* Do minimal coalescing. */
 					*coalesced = true;
-					return extent;
+					return edata;
 				}
 				again = true;
 			}
 		}
 
 		/* Try to coalesce backward. */
-		extent_t *prev = extent_lock_from_addr(tsdn, rtree_ctx,
-		    extent_before_get(extent), inactive_only);
+		edata_t *prev = extent_lock_edata_from_addr(tsdn, rtree_ctx,
+		    edata_before_get(edata), inactive_only);
 		if (prev != NULL) {
 			bool can_coalesce = extent_can_coalesce(arena, eset,
-			    extent, prev);
-			extent_unlock(tsdn, prev);
+			    edata, prev);
+			extent_unlock_edata(tsdn, prev);
 
 			if (can_coalesce && !extent_coalesce(tsdn, arena,
-			    ehooks, eset, extent, prev, false,
+			    ehooks, eset, edata, prev, false,
 			    growing_retained)) {
-				extent = prev;
+				edata = prev;
 				if (eset->delay_coalesce) {
 					/* Do minimal coalescing. */
 					*coalesced = true;
-					return extent;
+					return edata;
 				}
 				again = true;
 			}
@@ -1182,23 +1180,23 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	if (eset->delay_coalesce) {
 		*coalesced = false;
 	}
-	return extent;
+	return edata;
 }
 
-static extent_t *
+static edata_t *
 extent_try_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent, bool *coalesced,
+    rtree_ctx_t *rtree_ctx, eset_t *eset, edata_t *edata, bool *coalesced,
     bool growing_retained) {
 	return extent_try_coalesce_impl(tsdn, arena, ehooks, rtree_ctx, eset,
-	    extent, coalesced, growing_retained, false);
+	    edata, coalesced, growing_retained, false);
 }
 
-static extent_t *
+static edata_t *
 extent_try_coalesce_large(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, eset_t *eset, extent_t *extent, bool *coalesced,
+    rtree_ctx_t *rtree_ctx, eset_t *eset, edata_t *edata, bool *coalesced,
     bool growing_retained) {
 	return extent_try_coalesce_impl(tsdn, arena, ehooks, rtree_ctx, eset,
-	    extent, coalesced, growing_retained, true);
+	    edata, coalesced, growing_retained, true);
 }
 
 /*
@@ -1207,62 +1205,62 @@ extent_try_coalesce_large(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
  */
 static void
 extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
-    extent_t *extent, bool growing_retained) {
+    edata_t *edata, bool growing_retained) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
 	assert((eset_state_get(eset) != extent_state_dirty &&
 	    eset_state_get(eset) != extent_state_muzzy) ||
-	    !extent_zeroed_get(extent));
+	    !edata_zeroed_get(edata));
 
 	malloc_mutex_lock(tsdn, &eset->mtx);
 
-	extent_szind_set(extent, SC_NSIZES);
-	if (extent_slab_get(extent)) {
-		extent_interior_deregister(tsdn, rtree_ctx, extent);
-		extent_slab_set(extent, false);
+	edata_szind_set(edata, SC_NSIZES);
+	if (edata_slab_get(edata)) {
+		extent_interior_deregister(tsdn, rtree_ctx, edata);
+		edata_slab_set(edata, false);
 	}
 
-	assert(rtree_extent_read(tsdn, &extents_rtree, rtree_ctx,
-	    (uintptr_t)extent_base_get(extent), true) == extent);
+	assert(rtree_edata_read(tsdn, &extents_rtree, rtree_ctx,
+	    (uintptr_t)edata_base_get(edata), true) == edata);
 
 	if (!eset->delay_coalesce) {
-		extent = extent_try_coalesce(tsdn, arena, ehooks, rtree_ctx,
-		    eset, extent, NULL, growing_retained);
-	} else if (extent_size_get(extent) >= SC_LARGE_MINCLASS) {
+		edata = extent_try_coalesce(tsdn, arena, ehooks, rtree_ctx,
+		    eset, edata, NULL, growing_retained);
+	} else if (edata_size_get(edata) >= SC_LARGE_MINCLASS) {
 		assert(eset == &arena->eset_dirty);
 		/* Always coalesce large eset eagerly. */
 		bool coalesced;
 		do {
-			assert(extent_state_get(extent) == extent_state_active);
-			extent = extent_try_coalesce_large(tsdn, arena, ehooks,
-			    rtree_ctx, eset, extent, &coalesced,
+			assert(edata_state_get(edata) == extent_state_active);
+			edata = extent_try_coalesce_large(tsdn, arena, ehooks,
+			    rtree_ctx, eset, edata, &coalesced,
 			    growing_retained);
 		} while (coalesced);
-		if (extent_size_get(extent) >= oversize_threshold) {
+		if (edata_size_get(edata) >= oversize_threshold) {
 			/* Shortcut to purge the oversize extent eagerly. */
 			malloc_mutex_unlock(tsdn, &eset->mtx);
-			arena_decay_extent(tsdn, arena, ehooks, extent);
+			arena_decay_extent(tsdn, arena, ehooks, edata);
 			return;
 		}
 	}
-	extent_deactivate_locked(tsdn, arena, eset, extent);
+	extent_deactivate_locked(tsdn, arena, eset, edata);
 
 	malloc_mutex_unlock(tsdn, &eset->mtx);
 }
 
 void
-extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
+extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
 	ehooks_t *ehooks = arena_get_ehooks(arena);
 
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	if (extent_register(tsdn, extent)) {
-		extent_dalloc(tsdn, arena, extent);
+	if (extent_register(tsdn, edata)) {
+		extent_dalloc(tsdn, arena, edata);
 		return;
 	}
-	extent_dalloc_wrapper(tsdn, arena, ehooks, extent);
+	extent_dalloc_wrapper(tsdn, arena, ehooks, edata);
 }
 
 static bool
@@ -1273,23 +1271,23 @@ extent_may_dalloc(void) {
 
 static bool
 extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent) {
+    edata_t *edata) {
 	bool err;
 
-	assert(extent_base_get(extent) != NULL);
-	assert(extent_size_get(extent) != 0);
+	assert(edata_base_get(edata) != NULL);
+	assert(edata_size_get(edata) != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	extent_addr_set(extent, extent_base_get(extent));
+	edata_addr_set(edata, edata_base_get(edata));
 
 	/* Try to deallocate. */
-	err = ehooks_dalloc(tsdn, ehooks, extent_base_get(extent),
-	    extent_size_get(extent), extent_committed_get(extent),
+	err = ehooks_dalloc(tsdn, ehooks, edata_base_get(edata),
+	    edata_size_get(edata), edata_committed_get(edata),
 	    arena_ind_get(arena));
 
 	if (!err) {
-		extent_dalloc(tsdn, arena, extent);
+		extent_dalloc(tsdn, arena, edata);
 	}
 
 	return err;
@@ -1297,8 +1295,8 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 void
 extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent) {
-	assert(extent_dumpable_get(extent));
+    edata_t *edata) {
+	assert(edata_dumpable_get(edata));
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
@@ -1308,124 +1306,123 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		 * Deregister first to avoid a race with other allocating
 		 * threads, and reregister if deallocation fails.
 		 */
-		extent_deregister(tsdn, extent);
-		if (!extent_dalloc_wrapper_try(tsdn, arena, ehooks, extent)) {
+		extent_deregister(tsdn, edata);
+		if (!extent_dalloc_wrapper_try(tsdn, arena, ehooks, edata)) {
 			return;
 		}
-		extent_reregister(tsdn, extent);
+		extent_reregister(tsdn, edata);
 	}
 
 	/* Try to decommit; purge if that fails. */
 	bool zeroed;
-	if (!extent_committed_get(extent)) {
+	if (!edata_committed_get(edata)) {
 		zeroed = true;
-	} else if (!extent_decommit_wrapper(tsdn, arena, ehooks, extent, 0,
-	    extent_size_get(extent))) {
+	} else if (!extent_decommit_wrapper(tsdn, arena, ehooks, edata, 0,
+	    edata_size_get(edata))) {
 		zeroed = true;
-	} else if (!ehooks_purge_forced(tsdn, ehooks, extent_base_get(extent),
-	    extent_size_get(extent), 0, extent_size_get(extent),
+	} else if (!ehooks_purge_forced(tsdn, ehooks, edata_base_get(edata),
+	    edata_size_get(edata), 0, edata_size_get(edata),
 	    arena_ind_get(arena))) {
 		zeroed = true;
-	} else if (extent_state_get(extent) == extent_state_muzzy ||
-	    !ehooks_purge_lazy(tsdn, ehooks, extent_base_get(extent),
-	    extent_size_get(extent), 0, extent_size_get(extent),
+	} else if (edata_state_get(edata) == extent_state_muzzy ||
+	    !ehooks_purge_lazy(tsdn, ehooks, edata_base_get(edata),
+	    edata_size_get(edata), 0, edata_size_get(edata),
 	    arena_ind_get(arena))) {
 		zeroed = false;
 	} else {
 		zeroed = false;
 	}
-	extent_zeroed_set(extent, zeroed);
+	edata_zeroed_set(edata, zeroed);
 
 	if (config_prof) {
-		extent_gdump_sub(tsdn, extent);
+		extent_gdump_sub(tsdn, edata);
 	}
 
-	extent_record(tsdn, arena, ehooks, &arena->eset_retained, extent,
-	    false);
+	extent_record(tsdn, arena, ehooks, &arena->eset_retained, edata, false);
 }
 
 void
 extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent) {
-	assert(extent_base_get(extent) != NULL);
-	assert(extent_size_get(extent) != 0);
+    edata_t *edata) {
+	assert(edata_base_get(edata) != NULL);
+	assert(edata_size_get(edata) != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
 	/* Deregister first to avoid a race with other allocating threads. */
-	extent_deregister(tsdn, extent);
+	extent_deregister(tsdn, edata);
 
-	extent_addr_set(extent, extent_base_get(extent));
+	edata_addr_set(edata, edata_base_get(edata));
 
 	/* Try to destroy; silently fail otherwise. */
-	ehooks_destroy(tsdn, ehooks, extent_base_get(extent),
-	    extent_size_get(extent), extent_committed_get(extent),
+	ehooks_destroy(tsdn, ehooks, edata_base_get(edata),
+	    edata_size_get(edata), edata_committed_get(edata),
 	    arena_ind_get(arena));
 
-	extent_dalloc(tsdn, arena, extent);
+	extent_dalloc(tsdn, arena, edata);
 }
 
 static bool
 extent_commit_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length, bool growing_retained) {
+    edata_t *edata, size_t offset, size_t length, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-	bool err = ehooks_commit(tsdn, ehooks, extent_base_get(extent),
-	    extent_size_get(extent), offset, length, arena_ind_get(arena));
-	extent_committed_set(extent, extent_committed_get(extent) || !err);
+	bool err = ehooks_commit(tsdn, ehooks, edata_base_get(edata),
+	    edata_size_get(edata), offset, length, arena_ind_get(arena));
+	edata_committed_set(edata, edata_committed_get(edata) || !err);
 	return err;
 }
 
 bool
 extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset,
+    edata_t *edata, size_t offset,
     size_t length) {
-	return extent_commit_impl(tsdn, arena, ehooks, extent, offset, length,
+	return extent_commit_impl(tsdn, arena, ehooks, edata, offset, length,
 	    false);
 }
 
 bool
 extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length) {
+    edata_t *edata, size_t offset, size_t length) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
-	bool err = ehooks_decommit(tsdn, ehooks, extent_base_get(extent),
-	    extent_size_get(extent), offset, length, arena_ind_get(arena));
-	extent_committed_set(extent, extent_committed_get(extent) && err);
+	bool err = ehooks_decommit(tsdn, ehooks, edata_base_get(edata),
+	    edata_size_get(edata), offset, length, arena_ind_get(arena));
+	edata_committed_set(edata, edata_committed_get(edata) && err);
 	return err;
 }
 
 static bool
 extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length, bool growing_retained) {
+    edata_t *edata, size_t offset, size_t length, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-	bool err = ehooks_purge_lazy(tsdn, ehooks, extent_base_get(extent),
-	    extent_size_get(extent), offset, length, arena_ind_get(arena));
+	bool err = ehooks_purge_lazy(tsdn, ehooks, edata_base_get(edata),
+	    edata_size_get(edata), offset, length, arena_ind_get(arena));
 	return err;
 }
 
 bool
 extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length) {
-	return extent_purge_lazy_impl(tsdn, arena, ehooks, extent, offset,
+    edata_t *edata, size_t offset, size_t length) {
+	return extent_purge_lazy_impl(tsdn, arena, ehooks, edata, offset,
 	    length, false);
 }
 
 static bool
 extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length, bool growing_retained) {
+    edata_t *edata, size_t offset, size_t length, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-	bool err = ehooks_purge_forced(tsdn, ehooks, extent_base_get(extent),
-	    extent_size_get(extent), offset, length, arena_ind_get(arena));
+	bool err = ehooks_purge_forced(tsdn, ehooks, edata_base_get(edata),
+	    edata_size_get(edata), offset, length, arena_ind_get(arena));
 	return err;
 }
 
 bool
 extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t offset, size_t length) {
-	return extent_purge_forced_impl(tsdn, arena, ehooks, extent,
+    edata_t *edata, size_t offset, size_t length) {
+	return extent_purge_forced_impl(tsdn, arena, ehooks, edata,
 	    offset, length, false);
 }
 
@@ -1436,11 +1433,11 @@ extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
  * with the trail (the higher addressed portion).  This makes 'extent' the lead,
  * and returns the trail (except in case of error).
  */
-static extent_t *
+static edata_t *
 extent_split_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t size_a, szind_t szind_a, bool slab_a,
+    edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a,
     size_t size_b, szind_t szind_b, bool slab_b, bool growing_retained) {
-	assert(extent_size_get(extent) == size_a + size_b);
+	assert(edata_size_get(edata) == size_a + size_b);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 
@@ -1448,28 +1445,28 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		return NULL;
 	}
 
-	extent_t *trail = extent_alloc(tsdn, arena);
+	edata_t *trail = extent_alloc(tsdn, arena);
 	if (trail == NULL) {
 		goto label_error_a;
 	}
 
-	extent_init(trail, arena_ind_get(arena),
-	    (void *)((uintptr_t)extent_base_get(extent) + size_a), size_b,
-	    slab_b, szind_b, extent_sn_get(extent), extent_state_get(extent),
-	    extent_zeroed_get(extent), extent_committed_get(extent),
-	    extent_dumpable_get(extent), EXTENT_NOT_HEAD);
+	edata_init(trail, arena_ind_get(arena),
+	    (void *)((uintptr_t)edata_base_get(edata) + size_a), size_b,
+	    slab_b, szind_b, edata_sn_get(edata), edata_state_get(edata),
+	    edata_zeroed_get(edata), edata_committed_get(edata),
+	    edata_dumpable_get(edata), EXTENT_NOT_HEAD);
 
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	rtree_leaf_elm_t *lead_elm_a, *lead_elm_b;
 	{
-		extent_t lead;
+		edata_t lead;
 
-		extent_init(&lead, arena_ind_get(arena),
-		    extent_addr_get(extent), size_a,
-		    slab_a, szind_a, extent_sn_get(extent),
-		    extent_state_get(extent), extent_zeroed_get(extent),
-		    extent_committed_get(extent), extent_dumpable_get(extent),
+		edata_init(&lead, arena_ind_get(arena),
+		    edata_addr_get(edata), size_a,
+		    slab_a, szind_a, edata_sn_get(edata),
+		    edata_state_get(edata), edata_zeroed_get(edata),
+		    edata_committed_get(edata), edata_dumpable_get(edata),
 		    EXTENT_NOT_HEAD);
 
 		extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, &lead, false,
@@ -1484,40 +1481,40 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		goto label_error_b;
 	}
 
-	extent_lock2(tsdn, extent, trail);
+	extent_lock_edata2(tsdn, edata, trail);
 
-	bool err = ehooks_split(tsdn, ehooks, extent_base_get(extent),
-	    size_a + size_b, size_a, size_b, extent_committed_get(extent),
+	bool err = ehooks_split(tsdn, ehooks, edata_base_get(edata),
+	    size_a + size_b, size_a, size_b, edata_committed_get(edata),
 	    arena_ind_get(arena));
 
 	if (err) {
 		goto label_error_c;
 	}
 
-	extent_size_set(extent, size_a);
-	extent_szind_set(extent, szind_a);
+	edata_size_set(edata, size_a);
+	edata_szind_set(edata, szind_a);
 
-	extent_rtree_write_acquired(tsdn, lead_elm_a, lead_elm_b, extent,
+	extent_rtree_write_acquired(tsdn, lead_elm_a, lead_elm_b, edata,
 	    szind_a, slab_a);
 	extent_rtree_write_acquired(tsdn, trail_elm_a, trail_elm_b, trail,
 	    szind_b, slab_b);
 
-	extent_unlock2(tsdn, extent, trail);
+	extent_unlock_edata2(tsdn, edata, trail);
 
 	return trail;
 label_error_c:
-	extent_unlock2(tsdn, extent, trail);
+	extent_unlock_edata2(tsdn, edata, trail);
 label_error_b:
 	extent_dalloc(tsdn, arena, trail);
 label_error_a:
 	return NULL;
 }
 
-extent_t *
+edata_t *
 extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *extent, size_t size_a, szind_t szind_a, bool slab_a,
+    edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a,
     size_t size_b, szind_t szind_b, bool slab_b) {
-	return extent_split_impl(tsdn, arena, ehooks, extent, size_a, szind_a,
+	return extent_split_impl(tsdn, arena, ehooks, edata, size_a, szind_a,
 	    slab_a, size_b, szind_b, slab_b, false);
 }
 
@@ -1526,8 +1523,8 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
  * settings.  Assumes the second extent has the higher address.
  */
 bool
-extent_head_no_merge(extent_t *a, extent_t *b) {
-	assert(extent_base_get(a) < extent_base_get(b));
+extent_head_no_merge(edata_t *a, edata_t *b) {
+	assert(edata_base_get(a) < edata_base_get(b));
 	/*
 	 * When coalesce is not always allowed (Windows), only merge extents
 	 * from the same VirtualAlloc region under opt.retain (in which case
@@ -1540,33 +1537,33 @@ extent_head_no_merge(extent_t *a, extent_t *b) {
 		return true;
 	}
 	/* If b is a head extent, disallow the cross-region merge. */
-	if (extent_is_head_get(b)) {
+	if (edata_is_head_get(b)) {
 		/*
 		 * Additionally, sn should not overflow with retain; sanity
 		 * check that different regions have unique sn.
 		 */
-		assert(extent_sn_comp(a, b) != 0);
+		assert(edata_sn_comp(a, b) != 0);
 		return true;
 	}
-	assert(extent_sn_comp(a, b) == 0);
+	assert(edata_sn_comp(a, b) == 0);
 
 	return false;
 }
 
 static bool
-extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, extent_t *a,
-    extent_t *b, bool growing_retained) {
+extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, edata_t *a,
+    edata_t *b, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-	assert(extent_base_get(a) < extent_base_get(b));
+	assert(edata_base_get(a) < edata_base_get(b));
 
 	if (ehooks_merge_will_fail(ehooks) || extent_head_no_merge(a, b)) {
 		return true;
 	}
 
-	bool err = ehooks_merge(tsdn, ehooks, extent_base_get(a),
-	    extent_size_get(a), extent_base_get(b), extent_size_get(b),
-	    extent_committed_get(a), arena_ind_get(arena));
+	bool err = ehooks_merge(tsdn, ehooks, edata_base_get(a),
+	    edata_size_get(a), edata_base_get(b), edata_size_get(b),
+	    edata_committed_get(a), arena_ind_get(arena));
 
 	if (err) {
 		return true;
@@ -1585,7 +1582,7 @@ extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, extent_t *a,
 	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, b, true, false, &b_elm_a,
 	    &b_elm_b);
 
-	extent_lock2(tsdn, a, b);
+	extent_lock_edata2(tsdn, a, b);
 
 	if (a_elm_b != NULL) {
 		rtree_leaf_elm_write(tsdn, &extents_rtree, a_elm_b, NULL,
@@ -1598,22 +1595,22 @@ extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, extent_t *a,
 		b_elm_b = b_elm_a;
 	}
 
-	extent_size_set(a, extent_size_get(a) + extent_size_get(b));
-	extent_szind_set(a, SC_NSIZES);
-	extent_sn_set(a, (extent_sn_get(a) < extent_sn_get(b)) ?
-	    extent_sn_get(a) : extent_sn_get(b));
-	extent_zeroed_set(a, extent_zeroed_get(a) && extent_zeroed_get(b));
+	edata_size_set(a, edata_size_get(a) + edata_size_get(b));
+	edata_szind_set(a, SC_NSIZES);
+	edata_sn_set(a, (edata_sn_get(a) < edata_sn_get(b)) ?
+	    edata_sn_get(a) : edata_sn_get(b));
+	edata_zeroed_set(a, edata_zeroed_get(a) && edata_zeroed_get(b));
 
 	extent_rtree_write_acquired(tsdn, a_elm_a, b_elm_b, a, SC_NSIZES,
 	    false);
 
-	extent_unlock2(tsdn, a, b);
+	extent_unlock_edata2(tsdn, a, b);
 
 	/*
 	 * If we got here, we merged the extents; so they must be from the same
 	 * arena (i.e. this one).
 	 */
-	assert(extent_arena_ind_get(b) == arena_ind_get(arena));
+	assert(edata_arena_ind_get(b) == arena_ind_get(arena));
 	extent_dalloc(tsdn, arena, b);
 
 	return false;
@@ -1621,7 +1618,7 @@ extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, extent_t *a,
 
 bool
 extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    extent_t *a, extent_t *b) {
+    edata_t *a, edata_t *b) {
 	return extent_merge_impl(tsdn, arena, ehooks, a, b, false);
 }
 
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 59e7e7d6..a66afb68 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -109,7 +109,7 @@ extent_dss_max_update(void *new_addr) {
 void *
 extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit) {
-	extent_t *gap;
+	edata_t *gap;
 
 	cassert(have_dss);
 	assert(size > 0);
@@ -153,7 +153,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			size_t gap_size_page = (uintptr_t)ret -
 			    (uintptr_t)gap_addr_page;
 			if (gap_size_page != 0) {
-				extent_init(gap, arena_ind_get(arena),
+				edata_init(gap, arena_ind_get(arena),
 				    gap_addr_page, gap_size_page, false,
 				    SC_NSIZES, arena_extent_sn_next(arena),
 				    extent_state_active, false, true, true,
@@ -194,17 +194,17 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					*commit = pages_decommit(ret, size);
 				}
 				if (*zero && *commit) {
-					extent_t extent;
+					edata_t edata;
 					ehooks_t *ehooks = arena_get_ehooks(
 					    arena);
 
-					extent_init(&extent,
+					edata_init(&edata,
 					    arena_ind_get(arena), ret, size,
 					    size, false, SC_NSIZES,
 					    extent_state_active, false, true,
 					    true, EXTENT_NOT_HEAD);
 					if (extent_purge_forced_wrapper(tsdn,
-					    arena, ehooks, &extent, 0, size)) {
+					    arena, ehooks, &edata, 0, size)) {
 						memset(ret, 0, size);
 					}
 				}
diff --git a/src/inspect.c b/src/inspect.c
index 435016e6..5ad23a0e 100644
--- a/src/inspect.c
+++ b/src/inspect.c
@@ -6,21 +6,21 @@ inspect_extent_util_stats_get(tsdn_t *tsdn, const void *ptr, size_t *nfree,
     size_t *nregs, size_t *size) {
 	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL);
 
-	const extent_t *extent = iealloc(tsdn, ptr);
-	if (unlikely(extent == NULL)) {
+	const edata_t *edata = iealloc(tsdn, ptr);
+	if (unlikely(edata == NULL)) {
 		*nfree = *nregs = *size = 0;
 		return;
 	}
 
-	*size = extent_size_get(extent);
-	if (!extent_slab_get(extent)) {
+	*size = edata_size_get(edata);
+	if (!edata_slab_get(edata)) {
 		*nfree = 0;
 		*nregs = 1;
 	} else {
-		*nfree = extent_nfree_get(extent);
-		*nregs = bin_infos[extent_szind_get(extent)].nregs;
+		*nfree = edata_nfree_get(edata);
+		*nregs = bin_infos[edata_szind_get(edata)].nregs;
 		assert(*nfree <= *nregs);
-		assert(*nfree * extent_usize_get(extent) <= *size);
+		assert(*nfree * edata_usize_get(edata) <= *size);
 	}
 }
 
@@ -31,31 +31,31 @@ inspect_extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
 	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL
 	    && bin_nfree != NULL && bin_nregs != NULL && slabcur_addr != NULL);
 
-	const extent_t *extent = iealloc(tsdn, ptr);
-	if (unlikely(extent == NULL)) {
+	const edata_t *edata = iealloc(tsdn, ptr);
+	if (unlikely(edata == NULL)) {
 		*nfree = *nregs = *size = *bin_nfree = *bin_nregs = 0;
 		*slabcur_addr = NULL;
 		return;
 	}
 
-	*size = extent_size_get(extent);
-	if (!extent_slab_get(extent)) {
+	*size = edata_size_get(edata);
+	if (!edata_slab_get(edata)) {
 		*nfree = *bin_nfree = *bin_nregs = 0;
 		*nregs = 1;
 		*slabcur_addr = NULL;
 		return;
 	}
 
-	*nfree = extent_nfree_get(extent);
-	const szind_t szind = extent_szind_get(extent);
+	*nfree = edata_nfree_get(edata);
+	const szind_t szind = edata_szind_get(edata);
 	*nregs = bin_infos[szind].nregs;
 	assert(*nfree <= *nregs);
-	assert(*nfree * extent_usize_get(extent) <= *size);
+	assert(*nfree * edata_usize_get(edata) <= *size);
 
 	const arena_t *arena = (arena_t *)atomic_load_p(
-	    &arenas[extent_arena_ind_get(extent)], ATOMIC_RELAXED);
+	    &arenas[edata_arena_ind_get(edata)], ATOMIC_RELAXED);
 	assert(arena != NULL);
-	const unsigned binshard = extent_binshard_get(extent);
+	const unsigned binshard = edata_binshard_get(edata);
 	bin_t *bin = &arena->bins[szind].bin_shards[binshard];
 
 	malloc_mutex_lock(tsdn, &bin->lock);
@@ -66,12 +66,12 @@ inspect_extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
 	} else {
 		*bin_nfree = *bin_nregs = 0;
 	}
-	extent_t *slab;
+	edata_t *slab;
 	if (bin->slabcur != NULL) {
 		slab = bin->slabcur;
 	} else {
-		slab = extent_heap_first(&bin->slabs_nonfull);
+		slab = edata_heap_first(&bin->slabs_nonfull);
 	}
-	*slabcur_addr = slab != NULL ? extent_addr_get(slab) : NULL;
+	*slabcur_addr = slab != NULL ? edata_addr_get(slab) : NULL;
 	malloc_mutex_unlock(tsdn, &bin->lock);
 }
diff --git a/src/large.c b/src/large.c
index 6fd21bea..67b47453 100644
--- a/src/large.c
+++ b/src/large.c
@@ -21,7 +21,7 @@ void *
 large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
     bool zero) {
 	size_t ausize;
-	extent_t *extent;
+	edata_t *edata;
 	bool is_zeroed;
 	UNUSED bool idump JEMALLOC_CC_SILENCE_INIT(false);
 
@@ -44,28 +44,28 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	if (likely(!tsdn_null(tsdn))) {
 		arena = arena_choose_maybe_huge(tsdn_tsd(tsdn), arena, usize);
 	}
-	if (unlikely(arena == NULL) || (extent = arena_extent_alloc_large(tsdn,
+	if (unlikely(arena == NULL) || (edata = arena_extent_alloc_large(tsdn,
 	    arena, usize, alignment, &is_zeroed)) == NULL) {
 		return NULL;
 	}
 
 	/* See comments in arena_bin_slabs_full_insert(). */
 	if (!arena_is_auto(arena)) {
-		/* Insert extent into large. */
+		/* Insert edata into large. */
 		malloc_mutex_lock(tsdn, &arena->large_mtx);
-		extent_list_append(&arena->large, extent);
+		edata_list_append(&arena->large, edata);
 		malloc_mutex_unlock(tsdn, &arena->large_mtx);
 	}
 
 	if (zero) {
 		assert(is_zeroed);
 	} else if (config_fill && unlikely(opt_junk_alloc)) {
-		memset(extent_addr_get(extent), JEMALLOC_ALLOC_JUNK,
-		    extent_usize_get(extent));
+		memset(edata_addr_get(edata), JEMALLOC_ALLOC_JUNK,
+		    edata_usize_get(edata));
 	}
 
 	arena_decay_tick(tsdn, arena);
-	return extent_addr_get(extent);
+	return edata_addr_get(edata);
 }
 
 static void
@@ -90,11 +90,11 @@ large_dalloc_maybe_junk_t *JET_MUTABLE large_dalloc_maybe_junk =
     large_dalloc_maybe_junk_impl;
 
 static bool
-large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize) {
-	arena_t *arena = arena_get_from_extent(extent);
-	size_t oldusize = extent_usize_get(extent);
+large_ralloc_no_move_shrink(tsdn_t *tsdn, edata_t *edata, size_t usize) {
+	arena_t *arena = arena_get_from_edata(edata);
+	size_t oldusize = edata_usize_get(edata);
 	ehooks_t *ehooks = arena_get_ehooks(arena);
-	size_t diff = extent_size_get(extent) - (usize + sz_large_pad);
+	size_t diff = edata_size_get(edata) - (usize + sz_large_pad);
 
 	assert(oldusize > usize);
 
@@ -104,31 +104,31 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, extent_t *extent, size_t usize) {
 
 	/* Split excess pages. */
 	if (diff != 0) {
-		extent_t *trail = extent_split_wrapper(tsdn, arena,
-		    ehooks, extent, usize + sz_large_pad, sz_size2index(usize),
+		edata_t *trail = extent_split_wrapper(tsdn, arena,
+		    ehooks, edata, usize + sz_large_pad, sz_size2index(usize),
 		    false, diff, SC_NSIZES, false);
 		if (trail == NULL) {
 			return true;
 		}
 
 		if (config_fill && unlikely(opt_junk_free)) {
-			large_dalloc_maybe_junk(extent_addr_get(trail),
-			    extent_size_get(trail));
+			large_dalloc_maybe_junk(edata_addr_get(trail),
+			    edata_size_get(trail));
 		}
 
 		arena_extents_dirty_dalloc(tsdn, arena, ehooks, trail);
 	}
 
-	arena_extent_ralloc_large_shrink(tsdn, arena, extent, oldusize);
+	arena_extent_ralloc_large_shrink(tsdn, arena, edata, oldusize);
 
 	return false;
 }
 
 static bool
-large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
+large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
     bool zero) {
-	arena_t *arena = arena_get_from_extent(extent);
-	size_t oldusize = extent_usize_get(extent);
+	arena_t *arena = arena_get_from_edata(edata);
+	size_t oldusize = edata_usize_get(edata);
 	ehooks_t *ehooks = arena_get_ehooks(arena);
 	size_t trailsize = usize - oldusize;
 
@@ -147,20 +147,20 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 	 */
 	bool is_zeroed_trail = zero;
 	bool commit = true;
-	extent_t *trail;
+	edata_t *trail;
 	bool new_mapping;
 	if ((trail = extents_alloc(tsdn, arena, ehooks, &arena->eset_dirty,
-	    extent_past_get(extent), trailsize, 0, CACHELINE, false, SC_NSIZES,
+	    edata_past_get(edata), trailsize, 0, CACHELINE, false, SC_NSIZES,
 	    &is_zeroed_trail, &commit)) != NULL
 	    || (trail = extents_alloc(tsdn, arena, ehooks, &arena->eset_muzzy,
-	    extent_past_get(extent), trailsize, 0, CACHELINE, false, SC_NSIZES,
+	    edata_past_get(edata), trailsize, 0, CACHELINE, false, SC_NSIZES,
 	    &is_zeroed_trail, &commit)) != NULL) {
 		if (config_stats) {
 			new_mapping = false;
 		}
 	} else {
 		if ((trail = extent_alloc_wrapper(tsdn, arena, ehooks,
-		    extent_past_get(extent), trailsize, 0, CACHELINE, false,
+		    edata_past_get(edata), trailsize, 0, CACHELINE, false,
 		    SC_NSIZES, &is_zeroed_trail, &commit)) == NULL) {
 			return true;
 		}
@@ -169,16 +169,16 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 		}
 	}
 
-	if (extent_merge_wrapper(tsdn, arena, ehooks, extent, trail)) {
+	if (extent_merge_wrapper(tsdn, arena, ehooks, edata, trail)) {
 		extent_dalloc_wrapper(tsdn, arena, ehooks, trail);
 		return true;
 	}
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	szind_t szind = sz_size2index(usize);
-	extent_szind_set(extent, szind);
+	edata_szind_set(edata, szind);
 	rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx,
-	    (uintptr_t)extent_addr_get(extent), szind, false);
+	    (uintptr_t)edata_addr_get(edata), szind, false);
 
 	if (config_stats && new_mapping) {
 		arena_stats_mapped_add(tsdn, &arena->stats, trailsize);
@@ -194,7 +194,7 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 			 * of CACHELINE in [0 .. PAGE).
 			 */
 			void *zbase = (void *)
-			    ((uintptr_t)extent_addr_get(extent) + oldusize);
+			    ((uintptr_t)edata_addr_get(edata) + oldusize);
 			void *zpast = PAGE_ADDR2BASE((void *)((uintptr_t)zbase +
 			    PAGE));
 			size_t nzero = (uintptr_t)zpast - (uintptr_t)zbase;
@@ -203,19 +203,19 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, extent_t *extent, size_t usize,
 		}
 		assert(is_zeroed_trail);
 	} else if (config_fill && unlikely(opt_junk_alloc)) {
-		memset((void *)((uintptr_t)extent_addr_get(extent) + oldusize),
+		memset((void *)((uintptr_t)edata_addr_get(edata) + oldusize),
 		    JEMALLOC_ALLOC_JUNK, usize - oldusize);
 	}
 
-	arena_extent_ralloc_large_expand(tsdn, arena, extent, oldusize);
+	arena_extent_ralloc_large_expand(tsdn, arena, edata, oldusize);
 
 	return false;
 }
 
 bool
-large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
+large_ralloc_no_move(tsdn_t *tsdn, edata_t *edata, size_t usize_min,
     size_t usize_max, bool zero) {
-	size_t oldusize = extent_usize_get(extent);
+	size_t oldusize = edata_usize_get(edata);
 
 	/* The following should have been caught by callers. */
 	assert(usize_min > 0 && usize_max <= SC_LARGE_MAXCLASS);
@@ -225,16 +225,15 @@ large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
 
 	if (usize_max > oldusize) {
 		/* Attempt to expand the allocation in-place. */
-		if (!large_ralloc_no_move_expand(tsdn, extent, usize_max,
+		if (!large_ralloc_no_move_expand(tsdn, edata, usize_max,
 		    zero)) {
-			arena_decay_tick(tsdn, arena_get_from_extent(extent));
+			arena_decay_tick(tsdn, arena_get_from_edata(edata));
 			return false;
 		}
 		/* Try again, this time with usize_min. */
 		if (usize_min < usize_max && usize_min > oldusize &&
-		    large_ralloc_no_move_expand(tsdn, extent, usize_min,
-		    zero)) {
-			arena_decay_tick(tsdn, arena_get_from_extent(extent));
+		    large_ralloc_no_move_expand(tsdn, edata, usize_min, zero)) {
+			arena_decay_tick(tsdn, arena_get_from_edata(edata));
 			return false;
 		}
 	}
@@ -244,14 +243,14 @@ large_ralloc_no_move(tsdn_t *tsdn, extent_t *extent, size_t usize_min,
 	 * the new size.
 	 */
 	if (oldusize >= usize_min && oldusize <= usize_max) {
-		arena_decay_tick(tsdn, arena_get_from_extent(extent));
+		arena_decay_tick(tsdn, arena_get_from_edata(edata));
 		return false;
 	}
 
 	/* Attempt to shrink the allocation in-place. */
 	if (oldusize > usize_max) {
-		if (!large_ralloc_no_move_shrink(tsdn, extent, usize_max)) {
-			arena_decay_tick(tsdn, arena_get_from_extent(extent));
+		if (!large_ralloc_no_move_shrink(tsdn, edata, usize_max)) {
+			arena_decay_tick(tsdn, arena_get_from_edata(edata));
 			return false;
 		}
 	}
@@ -271,9 +270,9 @@ void *
 large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache,
     hook_ralloc_args_t *hook_args) {
-	extent_t *extent = iealloc(tsdn, ptr);
+	edata_t *edata = iealloc(tsdn, ptr);
 
-	size_t oldusize = extent_usize_get(extent);
+	size_t oldusize = edata_usize_get(edata);
 	/* The following should have been caught by callers. */
 	assert(usize > 0 && usize <= SC_LARGE_MAXCLASS);
 	/* Both allocation sizes must be large to avoid a move. */
@@ -281,11 +280,11 @@ large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
 	    && usize >= SC_LARGE_MINCLASS);
 
 	/* Try to avoid moving the allocation. */
-	if (!large_ralloc_no_move(tsdn, extent, usize, usize, zero)) {
+	if (!large_ralloc_no_move(tsdn, edata, usize, usize, zero)) {
 		hook_invoke_expand(hook_args->is_realloc
 		    ? hook_expand_realloc : hook_expand_rallocx, ptr, oldusize,
 		    usize, (uintptr_t)ptr, hook_args->args);
-		return extent_addr_get(extent);
+		return edata_addr_get(edata);
 	}
 
 	/*
@@ -306,8 +305,8 @@ large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
 	    ? hook_dalloc_realloc : hook_dalloc_rallocx, ptr, hook_args->args);
 
 	size_t copysize = (usize < oldusize) ? usize : oldusize;
-	memcpy(ret, extent_addr_get(extent), copysize);
-	isdalloct(tsdn, extent_addr_get(extent), oldusize, tcache, NULL, true);
+	memcpy(ret, edata_addr_get(edata), copysize);
+	isdalloct(tsdn, edata_addr_get(edata), oldusize, tcache, NULL, true);
 	return ret;
 }
 
@@ -316,76 +315,75 @@ large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
  * whether the arena's large_mtx is currently held.
  */
 static void
-large_dalloc_prep_impl(tsdn_t *tsdn, arena_t *arena, extent_t *extent,
+large_dalloc_prep_impl(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
     bool junked_locked) {
 	if (!junked_locked) {
 		/* See comments in arena_bin_slabs_full_insert(). */
 		if (!arena_is_auto(arena)) {
 			malloc_mutex_lock(tsdn, &arena->large_mtx);
-			extent_list_remove(&arena->large, extent);
+			edata_list_remove(&arena->large, edata);
 			malloc_mutex_unlock(tsdn, &arena->large_mtx);
 		}
-		large_dalloc_maybe_junk(extent_addr_get(extent),
-		    extent_usize_get(extent));
+		large_dalloc_maybe_junk(edata_addr_get(edata),
+		    edata_usize_get(edata));
 	} else {
 		/* Only hold the large_mtx if necessary. */
 		if (!arena_is_auto(arena)) {
 			malloc_mutex_assert_owner(tsdn, &arena->large_mtx);
-			extent_list_remove(&arena->large, extent);
+			edata_list_remove(&arena->large, edata);
 		}
 	}
-	arena_extent_dalloc_large_prep(tsdn, arena, extent);
+	arena_extent_dalloc_large_prep(tsdn, arena, edata);
 }
 
 static void
-large_dalloc_finish_impl(tsdn_t *tsdn, arena_t *arena, extent_t *extent) {
+large_dalloc_finish_impl(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
 	ehooks_t *ehooks = arena_get_ehooks(arena);
-	arena_extents_dirty_dalloc(tsdn, arena, ehooks, extent);
+	arena_extents_dirty_dalloc(tsdn, arena, ehooks, edata);
 }
 
 void
-large_dalloc_prep_junked_locked(tsdn_t *tsdn, extent_t *extent) {
-	large_dalloc_prep_impl(tsdn, arena_get_from_extent(extent), extent,
-	    true);
+large_dalloc_prep_junked_locked(tsdn_t *tsdn, edata_t *edata) {
+	large_dalloc_prep_impl(tsdn, arena_get_from_edata(edata), edata, true);
 }
 
 void
-large_dalloc_finish(tsdn_t *tsdn, extent_t *extent) {
-	large_dalloc_finish_impl(tsdn, arena_get_from_extent(extent), extent);
+large_dalloc_finish(tsdn_t *tsdn, edata_t *edata) {
+	large_dalloc_finish_impl(tsdn, arena_get_from_edata(edata), edata);
 }
 
 void
-large_dalloc(tsdn_t *tsdn, extent_t *extent) {
-	arena_t *arena = arena_get_from_extent(extent);
-	large_dalloc_prep_impl(tsdn, arena, extent, false);
-	large_dalloc_finish_impl(tsdn, arena, extent);
+large_dalloc(tsdn_t *tsdn, edata_t *edata) {
+	arena_t *arena = arena_get_from_edata(edata);
+	large_dalloc_prep_impl(tsdn, arena, edata, false);
+	large_dalloc_finish_impl(tsdn, arena, edata);
 	arena_decay_tick(tsdn, arena);
 }
 
 size_t
-large_salloc(tsdn_t *tsdn, const extent_t *extent) {
-	return extent_usize_get(extent);
+large_salloc(tsdn_t *tsdn, const edata_t *edata) {
+	return edata_usize_get(edata);
 }
 
 void
-large_prof_info_get(const extent_t *extent, prof_info_t *prof_info) {
-	extent_prof_info_get(extent, prof_info);
+large_prof_info_get(const edata_t *edata, prof_info_t *prof_info) {
+	edata_prof_info_get(edata, prof_info);
 }
 
 static void
-large_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx) {
-	extent_prof_tctx_set(extent, tctx);
+large_prof_tctx_set(edata_t *edata, prof_tctx_t *tctx) {
+	edata_prof_tctx_set(edata, tctx);
 }
 
 void
-large_prof_tctx_reset(extent_t *extent) {
-	large_prof_tctx_set(extent, (prof_tctx_t *)(uintptr_t)1U);
+large_prof_tctx_reset(edata_t *edata) {
+	large_prof_tctx_set(edata, (prof_tctx_t *)(uintptr_t)1U);
 }
 
 void
-large_prof_info_set(extent_t *extent, prof_tctx_t *tctx) {
-	large_prof_tctx_set(extent, tctx);
+large_prof_info_set(edata_t *edata, prof_tctx_t *tctx) {
+	large_prof_tctx_set(edata, tctx);
 	nstime_t t;
 	nstime_init_update(&t);
-	extent_prof_alloc_time_set(extent, &t);
+	edata_prof_alloc_time_set(edata, &t);
 }
diff --git a/src/tcache.c b/src/tcache.c
index 7922e59e..0a511e2d 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -114,8 +114,8 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 
 /* Enabled with --enable-extra-size-check. */
 static void
-tbin_extents_lookup_size_check(tsdn_t *tsdn, cache_bin_t *tbin, szind_t binind,
-    size_t nflush, extent_t **extents){
+tbin_edatas_lookup_size_check(tsdn_t *tsdn, cache_bin_t *tbin, szind_t binind,
+    size_t nflush, edata_t **edatas){
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
@@ -129,9 +129,9 @@ tbin_extents_lookup_size_check(tsdn_t *tsdn, cache_bin_t *tbin, szind_t binind,
 	size_t sz_sum = binind * nflush;
 	void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
 	for (unsigned i = 0 ; i < nflush; i++) {
-		rtree_extent_szind_read(tsdn, &extents_rtree,
+		rtree_edata_szind_read(tsdn, &extents_rtree,
 		    rtree_ctx, (uintptr_t)*(bottom_item - i), true,
-		    &extents[i], &szind);
+		    &edatas[i], &szind);
 		sz_sum -= szind;
 	}
 	if (sz_sum != 0) {
@@ -154,26 +154,26 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	arena_t *arena = tcache->arena;
 	assert(arena != NULL);
 	unsigned nflush = ncached - rem;
-	VARIABLE_ARRAY(extent_t *, item_extent, nflush);
+	VARIABLE_ARRAY(edata_t *, item_edata, nflush);
 
 	void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
-	/* Look up extent once per item. */
+	/* Look up edata once per item. */
 	if (config_opt_safety_checks) {
-		tbin_extents_lookup_size_check(tsd_tsdn(tsd), tbin, binind,
-		    nflush, item_extent);
+		tbin_edatas_lookup_size_check(tsd_tsdn(tsd), tbin, binind,
+		    nflush, item_edata);
 	} else {
 		for (unsigned i = 0 ; i < nflush; i++) {
-			item_extent[i] = iealloc(tsd_tsdn(tsd),
+			item_edata[i] = iealloc(tsd_tsdn(tsd),
 			    *(bottom_item - i));
 		}
 	}
 	while (nflush > 0) {
 		/* Lock the arena bin associated with the first object. */
-		extent_t *extent = item_extent[0];
-		unsigned bin_arena_ind = extent_arena_ind_get(extent);
+		edata_t *edata = item_edata[0];
+		unsigned bin_arena_ind = edata_arena_ind_get(edata);
 		arena_t *bin_arena = arena_get(tsd_tsdn(tsd), bin_arena_ind,
 		    false);
-		unsigned binshard = extent_binshard_get(extent);
+		unsigned binshard = edata_binshard_get(edata);
 		assert(binshard < bin_infos[binind].n_shards);
 		bin_t *bin = &bin_arena->bins[binind].bin_shards[binshard];
 
@@ -187,13 +187,13 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 		unsigned ndeferred = 0;
 		for (unsigned i = 0; i < nflush; i++) {
 			void *ptr = *(bottom_item - i);
-			extent = item_extent[i];
-			assert(ptr != NULL && extent != NULL);
+			edata = item_edata[i];
+			assert(ptr != NULL && edata != NULL);
 
-			if (extent_arena_ind_get(extent) == bin_arena_ind
-			    && extent_binshard_get(extent) == binshard) {
+			if (edata_arena_ind_get(edata) == bin_arena_ind
+			    && edata_binshard_get(edata) == binshard) {
 				arena_dalloc_bin_junked_locked(tsd_tsdn(tsd),
-				    bin_arena, bin, binind, extent, ptr);
+				    bin_arena, bin, binind, edata, ptr);
 			} else {
 				/*
 				 * This object was allocated via a different
@@ -202,7 +202,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 				 * handled in a future pass.
 				 */
 				*(bottom_item - ndeferred) = ptr;
-				item_extent[ndeferred] = extent;
+				item_edata[ndeferred] = edata;
 				ndeferred++;
 			}
 		}
@@ -244,22 +244,22 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
 	arena_t *tcache_arena = tcache->arena;
 	assert(tcache_arena != NULL);
 	unsigned nflush = ncached - rem;
-	VARIABLE_ARRAY(extent_t *, item_extent, nflush);
+	VARIABLE_ARRAY(edata_t *, item_edata, nflush);
 
 	void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
 #ifndef JEMALLOC_EXTRA_SIZE_CHECK
-	/* Look up extent once per item. */
+	/* Look up edata once per item. */
 	for (unsigned i = 0 ; i < nflush; i++) {
-		item_extent[i] = iealloc(tsd_tsdn(tsd), *(bottom_item - i));
+		item_edata[i] = iealloc(tsd_tsdn(tsd), *(bottom_item - i));
 	}
 #else
 	tbin_extents_lookup_size_check(tsd_tsdn(tsd), tbin, binind, nflush,
-	    item_extent);
+	    item_edata);
 #endif
 	while (nflush > 0) {
 		/* Lock the arena associated with the first object. */
-		extent_t *extent = item_extent[0];
-		unsigned locked_arena_ind = extent_arena_ind_get(extent);
+		edata_t *edata = item_edata[0];
+		unsigned locked_arena_ind = edata_arena_ind_get(edata);
 		arena_t *locked_arena = arena_get(tsd_tsdn(tsd),
 		    locked_arena_ind, false);
 
@@ -270,10 +270,10 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
 		for (unsigned i = 0; i < nflush; i++) {
 			void *ptr = *(bottom_item - i);
 			assert(ptr != NULL);
-			extent = item_extent[i];
-			if (extent_arena_ind_get(extent) == locked_arena_ind) {
+			edata = item_edata[i];
+			if (edata_arena_ind_get(edata) == locked_arena_ind) {
 				large_dalloc_prep_junked_locked(tsd_tsdn(tsd),
-				    extent);
+				    edata);
 			}
 		}
 		if ((config_prof || config_stats) &&
@@ -293,11 +293,11 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
 		unsigned ndeferred = 0;
 		for (unsigned i = 0; i < nflush; i++) {
 			void *ptr = *(bottom_item - i);
-			extent = item_extent[i];
-			assert(ptr != NULL && extent != NULL);
+			edata = item_edata[i];
+			assert(ptr != NULL && edata != NULL);
 
-			if (extent_arena_ind_get(extent) == locked_arena_ind) {
-				large_dalloc_finish(tsd_tsdn(tsd), extent);
+			if (edata_arena_ind_get(edata) == locked_arena_ind) {
+				large_dalloc_finish(tsd_tsdn(tsd), edata);
 			} else {
 				/*
 				 * This object was allocated via a different
@@ -306,7 +306,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
 				 * in a future pass.
 				 */
 				*(bottom_item - ndeferred) = ptr;
-				item_extent[ndeferred] = extent;
+				item_edata[ndeferred] = edata;
 				ndeferred++;
 			}
 		}
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index b182f31a..854799da 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -63,17 +63,17 @@ vsalloc(tsdn_t *tsdn, const void *ptr) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	extent_t *extent;
+	edata_t *edata;
 	szind_t szind;
-	if (rtree_extent_szind_read(tsdn, &extents_rtree, rtree_ctx,
-	    (uintptr_t)ptr, false, &extent, &szind)) {
+	if (rtree_edata_szind_read(tsdn, &extents_rtree, rtree_ctx,
+	    (uintptr_t)ptr, false, &edata, &szind)) {
 		return 0;
 	}
 
-	if (extent == NULL) {
+	if (edata == NULL) {
 		return 0;
 	}
-	if (extent_state_get(extent) != extent_state_active) {
+	if (edata_state_get(edata) != extent_state_active) {
 		return 0;
 	}
 
diff --git a/test/unit/base.c b/test/unit/base.c
index 7ced15f7..3b848ca1 100644
--- a/test/unit/base.c
+++ b/test/unit/base.c
@@ -168,14 +168,14 @@ TEST_BEGIN(test_base_hooks_not_null) {
 	 * that the first block's remaining space is considered for subsequent
 	 * allocation.
 	 */
-	assert_zu_ge(extent_bsize_get(&base->blocks->extent), QUANTUM,
+	assert_zu_ge(edata_bsize_get(&base->blocks->edata), QUANTUM,
 	    "Remainder insufficient for test");
 	/* Use up all but one quantum of block. */
-	while (extent_bsize_get(&base->blocks->extent) > QUANTUM) {
+	while (edata_bsize_get(&base->blocks->edata) > QUANTUM) {
 		p = base_alloc(tsdn, base, QUANTUM, QUANTUM);
 		assert_ptr_not_null(p, "Unexpected base_alloc() failure");
 	}
-	r_exp = extent_addr_get(&base->blocks->extent);
+	r_exp = edata_addr_get(&base->blocks->edata);
 	assert_zu_eq(base->extent_sn_next, 1, "One extant block expected");
 	q = base_alloc(tsdn, base, QUANTUM + 1, QUANTUM);
 	assert_ptr_not_null(q, "Unexpected base_alloc() failure");
diff --git a/test/unit/binshard.c b/test/unit/binshard.c
index d7a8df8f..d9a0d599 100644
--- a/test/unit/binshard.c
+++ b/test/unit/binshard.c
@@ -53,7 +53,7 @@ TEST_END
 static void *
 thd_start(void *varg) {
 	void *ptr, *ptr2;
-	extent_t *extent;
+	edata_t *edata;
 	unsigned shard1, shard2;
 
 	tsdn_t *tsdn = tsdn_fetch();
@@ -62,13 +62,13 @@ thd_start(void *varg) {
 		ptr = mallocx(1, MALLOCX_TCACHE_NONE);
 		ptr2 = mallocx(129, MALLOCX_TCACHE_NONE);
 
-		extent = iealloc(tsdn, ptr);
-		shard1 = extent_binshard_get(extent);
+		edata = iealloc(tsdn, ptr);
+		shard1 = edata_binshard_get(edata);
 		dallocx(ptr, 0);
 		assert_u_lt(shard1, 16, "Unexpected bin shard used");
 
-		extent = iealloc(tsdn, ptr2);
-		shard2 = extent_binshard_get(extent);
+		edata = iealloc(tsdn, ptr2);
+		shard2 = edata_binshard_get(edata);
 		dallocx(ptr2, 0);
 		assert_u_lt(shard2, 4, "Unexpected bin shard used");
 
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 9105e3e5..2477db03 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -75,8 +75,8 @@ TEST_BEGIN(test_rtree_read_empty) {
 	rtree_ctx_t rtree_ctx;
 	rtree_ctx_data_init(&rtree_ctx);
 	assert_false(rtree_new(rtree, false), "Unexpected rtree_new() failure");
-	assert_ptr_null(rtree_extent_read(tsdn, rtree, &rtree_ctx, PAGE,
-	    false), "rtree_extent_read() should return NULL for empty tree");
+	assert_ptr_null(rtree_edata_read(tsdn, rtree, &rtree_ctx, PAGE,
+	    false), "rtree_edata_read() should return NULL for empty tree");
 	rtree_delete(tsdn, rtree);
 }
 TEST_END
@@ -86,11 +86,11 @@ TEST_END
 #undef SEED
 
 TEST_BEGIN(test_rtree_extrema) {
-	extent_t extent_a, extent_b;
-	extent_init(&extent_a, INVALID_ARENA_IND, NULL, SC_LARGE_MINCLASS,
+	edata_t edata_a, edata_b;
+	edata_init(&edata_a, INVALID_ARENA_IND, NULL, SC_LARGE_MINCLASS,
 	    false, sz_size2index(SC_LARGE_MINCLASS), 0,
 	    extent_state_active, false, false, true, EXTENT_NOT_HEAD);
-	extent_init(&extent_b, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
+	edata_init(&edata_b, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, true, EXTENT_NOT_HEAD);
 
 	tsdn_t *tsdn = tsdn_fetch();
@@ -100,21 +100,21 @@ TEST_BEGIN(test_rtree_extrema) {
 	rtree_ctx_data_init(&rtree_ctx);
 	assert_false(rtree_new(rtree, false), "Unexpected rtree_new() failure");
 
-	assert_false(rtree_write(tsdn, rtree, &rtree_ctx, PAGE, &extent_a,
-	    extent_szind_get(&extent_a), extent_slab_get(&extent_a)),
+	assert_false(rtree_write(tsdn, rtree, &rtree_ctx, PAGE, &edata_a,
+	    edata_szind_get(&edata_a), edata_slab_get(&edata_a)),
 	    "Unexpected rtree_write() failure");
 	rtree_szind_slab_update(tsdn, rtree, &rtree_ctx, PAGE,
-	    extent_szind_get(&extent_a), extent_slab_get(&extent_a));
-	assert_ptr_eq(rtree_extent_read(tsdn, rtree, &rtree_ctx, PAGE, true),
-	    &extent_a,
-	    "rtree_extent_read() should return previously set value");
+	    edata_szind_get(&edata_a), edata_slab_get(&edata_a));
+	assert_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx, PAGE, true),
+	    &edata_a,
+	    "rtree_edata_read() should return previously set value");
 
 	assert_false(rtree_write(tsdn, rtree, &rtree_ctx, ~((uintptr_t)0),
-	    &extent_b, extent_szind_get_maybe_invalid(&extent_b),
-	    extent_slab_get(&extent_b)), "Unexpected rtree_write() failure");
-	assert_ptr_eq(rtree_extent_read(tsdn, rtree, &rtree_ctx,
-	    ~((uintptr_t)0), true), &extent_b,
-	    "rtree_extent_read() should return previously set value");
+	    &edata_b, edata_szind_get_maybe_invalid(&edata_b),
+	    edata_slab_get(&edata_b)), "Unexpected rtree_write() failure");
+	assert_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx,
+	    ~((uintptr_t)0), true), &edata_b,
+	    "rtree_edata_read() should return previously set value");
 
 	rtree_delete(tsdn, rtree);
 }
@@ -126,8 +126,8 @@ TEST_BEGIN(test_rtree_bits) {
 	uintptr_t keys[] = {PAGE, PAGE + 1,
 	    PAGE + (((uintptr_t)1) << LG_PAGE) - 1};
 
-	extent_t extent;
-	extent_init(&extent, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
+	edata_t edata;
+	edata_init(&edata, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, true, EXTENT_NOT_HEAD);
 
 	rtree_t *rtree = &test_rtree;
@@ -137,17 +137,17 @@ TEST_BEGIN(test_rtree_bits) {
 
 	for (unsigned i = 0; i < sizeof(keys)/sizeof(uintptr_t); i++) {
 		assert_false(rtree_write(tsdn, rtree, &rtree_ctx, keys[i],
-		    &extent, SC_NSIZES, false),
+		    &edata, SC_NSIZES, false),
 		    "Unexpected rtree_write() failure");
 		for (unsigned j = 0; j < sizeof(keys)/sizeof(uintptr_t); j++) {
-			assert_ptr_eq(rtree_extent_read(tsdn, rtree, &rtree_ctx,
-			    keys[j], true), &extent,
-			    "rtree_extent_read() should return previously set "
+			assert_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx,
+			    keys[j], true), &edata,
+			    "rtree_edata_read() should return previously set "
 			    "value and ignore insignificant key bits; i=%u, "
 			    "j=%u, set key=%#"FMTxPTR", get key=%#"FMTxPTR, i,
 			    j, keys[i], keys[j]);
 		}
-		assert_ptr_null(rtree_extent_read(tsdn, rtree, &rtree_ctx,
+		assert_ptr_null(rtree_edata_read(tsdn, rtree, &rtree_ctx,
 		    (((uintptr_t)2) << LG_PAGE), false),
 		    "Only leftmost rtree leaf should be set; i=%u", i);
 		rtree_clear(tsdn, rtree, &rtree_ctx, keys[i]);
@@ -167,8 +167,8 @@ TEST_BEGIN(test_rtree_random) {
 	rtree_ctx_t rtree_ctx;
 	rtree_ctx_data_init(&rtree_ctx);
 
-	extent_t extent;
-	extent_init(&extent, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
+	edata_t edata;
+	edata_init(&edata, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, true, EXTENT_NOT_HEAD);
 
 	assert_false(rtree_new(rtree, false), "Unexpected rtree_new() failure");
@@ -179,29 +179,29 @@ TEST_BEGIN(test_rtree_random) {
 		    &rtree_ctx, keys[i], false, true);
 		assert_ptr_not_null(elm,
 		    "Unexpected rtree_leaf_elm_lookup() failure");
-		rtree_leaf_elm_write(tsdn, rtree, elm, &extent, SC_NSIZES,
+		rtree_leaf_elm_write(tsdn, rtree, elm, &edata, SC_NSIZES,
 		    false);
-		assert_ptr_eq(rtree_extent_read(tsdn, rtree, &rtree_ctx,
-		    keys[i], true), &extent,
-		    "rtree_extent_read() should return previously set value");
+		assert_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx,
+		    keys[i], true), &edata,
+		    "rtree_edata_read() should return previously set value");
 	}
 	for (unsigned i = 0; i < NSET; i++) {
-		assert_ptr_eq(rtree_extent_read(tsdn, rtree, &rtree_ctx,
-		    keys[i], true), &extent,
-		    "rtree_extent_read() should return previously set value, "
+		assert_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx,
+		    keys[i], true), &edata,
+		    "rtree_edata_read() should return previously set value, "
 		    "i=%u", i);
 	}
 
 	for (unsigned i = 0; i < NSET; i++) {
 		rtree_clear(tsdn, rtree, &rtree_ctx, keys[i]);
-		assert_ptr_null(rtree_extent_read(tsdn, rtree, &rtree_ctx,
+		assert_ptr_null(rtree_edata_read(tsdn, rtree, &rtree_ctx,
 		    keys[i], true),
-		   "rtree_extent_read() should return previously set value");
+		   "rtree_edata_read() should return previously set value");
 	}
 	for (unsigned i = 0; i < NSET; i++) {
-		assert_ptr_null(rtree_extent_read(tsdn, rtree, &rtree_ctx,
+		assert_ptr_null(rtree_edata_read(tsdn, rtree, &rtree_ctx,
 		    keys[i], true),
-		    "rtree_extent_read() should return previously set value");
+		    "rtree_edata_read() should return previously set value");
 	}
 
 	rtree_delete(tsdn, rtree);
diff --git a/test/unit/slab.c b/test/unit/slab.c
index bcc752e4..5d2b35fa 100644
--- a/test/unit/slab.c
+++ b/test/unit/slab.c
@@ -7,24 +7,24 @@ TEST_BEGIN(test_arena_slab_regind) {
 
 	for (binind = 0; binind < SC_NBINS; binind++) {
 		size_t regind;
-		extent_t slab;
+		edata_t slab;
 		const bin_info_t *bin_info = &bin_infos[binind];
-		extent_init(&slab, INVALID_ARENA_IND,
+		edata_init(&slab, INVALID_ARENA_IND,
 		    mallocx(bin_info->slab_size, MALLOCX_LG_ALIGN(LG_PAGE)),
 		    bin_info->slab_size, true,
 		    binind, 0, extent_state_active, false, true, true,
 		    EXTENT_NOT_HEAD);
-		assert_ptr_not_null(extent_addr_get(&slab),
+		assert_ptr_not_null(edata_addr_get(&slab),
 		    "Unexpected malloc() failure");
 		for (regind = 0; regind < bin_info->nregs; regind++) {
-			void *reg = (void *)((uintptr_t)extent_addr_get(&slab) +
+			void *reg = (void *)((uintptr_t)edata_addr_get(&slab) +
 			    (bin_info->reg_size * regind));
 			assert_zu_eq(arena_slab_regind(&slab, binind, reg),
 			    regind,
 			    "Incorrect region index computed for size %zu",
 			    bin_info->reg_size);
 		}
-		free(extent_addr_get(&slab));
+		free(edata_addr_get(&slab));
 	}
 }
 TEST_END

From 78591841798fa548feba468d1bb7338592039180 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Dec 2019 11:17:19 -0800
Subject: [PATCH 1467/2608] Pull out edata_t caching into its own module.

---
 Makefile.in                                   |  1 +
 include/jemalloc/internal/arena_structs.h     | 12 ++---
 include/jemalloc/internal/edata_cache.h       | 25 +++++++++
 include/jemalloc/internal/extent2.h           |  3 --
 include/jemalloc/internal/witness.h           |  2 +-
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |  1 +
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |  1 +
 src/arena.c                                   | 14 +++--
 src/ctl.c                                     |  2 +-
 src/edata_cache.c                             | 47 +++++++++++++++++
 src/extent2.c                                 | 51 ++++++-------------
 src/extent_dss.c                              |  7 +--
 12 files changed, 106 insertions(+), 60 deletions(-)
 create mode 100644 include/jemalloc/internal/edata_cache.h
 create mode 100644 src/edata_cache.c

diff --git a/Makefile.in b/Makefile.in
index 86a51ccb..f75ae4b6 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -105,6 +105,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/ctl.c \
 	$(srcroot)src/div.c \
 	$(srcroot)src/edata.c \
+	$(srcroot)src/edata_cache.c \
 	$(srcroot)src/ehooks.c \
 	$(srcroot)src/eset.c \
 	$(srcroot)src/extent2.c \
diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index aac620b9..38c8b27c 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -5,6 +5,7 @@
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/bin.h"
 #include "jemalloc/internal/bitmap.h"
+#include "jemalloc/internal/edata_cache.h"
 #include "jemalloc/internal/eset.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
@@ -184,15 +185,8 @@ struct arena_s {
 	pszind_t		retain_grow_limit;
 	malloc_mutex_t		extent_grow_mtx;
 
-	/*
-	 * Available edata structures that were allocated via
-	 * base_alloc_edata().
-	 *
-	 * Synchronization: edata_avail_mtx.
-	 */
-	edata_tree_t		edata_avail;
-	atomic_zu_t		edata_avail_cnt;
-	malloc_mutex_t		edata_avail_mtx;
+	/* The source of edata_t objects. */
+	edata_cache_t		edata_cache;
 
 	/*
 	 * bins is used to store heaps of free regions.
diff --git a/include/jemalloc/internal/edata_cache.h b/include/jemalloc/internal/edata_cache.h
new file mode 100644
index 00000000..fc184084
--- /dev/null
+++ b/include/jemalloc/internal/edata_cache.h
@@ -0,0 +1,25 @@
+#ifndef JEMALLOC_INTERNAL_EDATA_CACHE_H
+#define JEMALLOC_INTERNAL_EDATA_CACHE_H
+
+/*
+ * A cache of edata_t structures allocated via base_alloc_edata (as opposed to
+ * the underlying extents they describe).  The contents of returned edata_t
+ * objects are garbage and cannot be relied upon.
+ */
+
+typedef struct edata_cache_s edata_cache_t;
+struct edata_cache_s {
+	edata_tree_t avail;
+	atomic_zu_t count;
+	malloc_mutex_t mtx;
+};
+
+bool edata_cache_init(edata_cache_t *edata_cache);
+edata_t *edata_cache_get(tsdn_t *tsdn, edata_cache_t *edata_cache,
+    base_t *base);
+void edata_cache_put(tsdn_t *tsdn, edata_cache_t *edata_cache, edata_t *edata);
+void edata_cache_prefork(tsdn_t *tsdn, edata_cache_t *edata_cache);
+void edata_cache_postfork_parent(tsdn_t *tsdn, edata_cache_t *edata_cache);
+void edata_cache_postfork_child(tsdn_t *tsdn, edata_cache_t *edata_cache);
+
+#endif /* JEMALLOC_INTERNAL_EDATA_CACHE_H */
diff --git a/include/jemalloc/internal/extent2.h b/include/jemalloc/internal/extent2.h
index ef232677..629474ee 100644
--- a/include/jemalloc/internal/extent2.h
+++ b/include/jemalloc/internal/extent2.h
@@ -26,9 +26,6 @@ extern size_t opt_lg_extent_max_active_fit;
 
 extern rtree_t extents_rtree;
 
-edata_t *extent_alloc(tsdn_t *tsdn, arena_t *arena);
-void extent_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *edata);
-
 edata_t *extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     eset_t *eset, void *new_addr, size_t size, size_t pad, size_t alignment,
     bool slab, szind_t szind, bool *zero, bool *commit);
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index ddbcf9d2..985e0a33 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -43,7 +43,7 @@
 #define WITNESS_RANK_TCACHE_QL		13U
 #define WITNESS_RANK_EXTENT_GROW	14U
 #define WITNESS_RANK_EXTENTS		15U
-#define WITNESS_RANK_EDATA_AVAIL	16U
+#define WITNESS_RANK_EDATA_CACHE	16U
 
 #define WITNESS_RANK_EXTENT_POOL	17U
 #define WITNESS_RANK_RTREE		18U
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 9dfc36d2..23312d3b 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -45,6 +45,7 @@
     <ClCompile Include="..\..\..\..\src\ctl.c" />
     <ClCompile Include="..\..\..\..\src\div.c" />
     <ClCompile Include="..\..\..\..\src\edata.c" />
+    <ClCompile Include="..\..\..\..\src\edata_cache.c" />
     <ClCompile Include="..\..\..\..\src\ehooks.c" />
     <ClCompile Include="..\..\..\..\src\eset.c" />
     <ClCompile Include="..\..\..\..\src\extent2.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 0ec4d1ee..76c16c5a 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -45,6 +45,7 @@
     <ClCompile Include="..\..\..\..\src\ctl.c" />
     <ClCompile Include="..\..\..\..\src\div.c" />
     <ClCompile Include="..\..\..\..\src\edata.c" />
+    <ClCompile Include="..\..\..\..\src\edata_cache.c" />
     <ClCompile Include="..\..\..\..\src\ehooks.c" />
     <ClCompile Include="..\..\..\..\src\eset.c" />
     <ClCompile Include="..\..\..\..\src\extent2.c" />
diff --git a/src/arena.c b/src/arena.c
index f05a1d17..a23419a3 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -103,7 +103,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	    eset_npages_get(&arena->eset_retained) << LG_PAGE);
 
 	atomic_store_zu(&astats->edata_avail,
-	    atomic_load_zu(&arena->edata_avail_cnt, ATOMIC_RELAXED),
+	    atomic_load_zu(&arena->edata_cache.count, ATOMIC_RELAXED),
 	    ATOMIC_RELAXED);
 
 	arena_stats_accum_u64(&astats->decay_dirty.npurge,
@@ -224,7 +224,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 
 	/* Gather per arena mutex profiling data. */
 	READ_ARENA_MUTEX_PROF_DATA(large_mtx, arena_prof_mutex_large);
-	READ_ARENA_MUTEX_PROF_DATA(edata_avail_mtx,
+	READ_ARENA_MUTEX_PROF_DATA(edata_cache.mtx,
 	    arena_prof_mutex_extent_avail)
 	READ_ARENA_MUTEX_PROF_DATA(eset_dirty.mtx,
 	    arena_prof_mutex_extents_dirty)
@@ -2053,9 +2053,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		goto label_error;
 	}
 
-	edata_avail_new(&arena->edata_avail);
-	if (malloc_mutex_init(&arena->edata_avail_mtx, "edata_avail",
-	    WITNESS_RANK_EDATA_AVAIL, malloc_mutex_rank_exclusive)) {
+	if (edata_cache_init(&arena->edata_cache)) {
 		goto label_error;
 	}
 
@@ -2201,7 +2199,7 @@ arena_prefork3(tsdn_t *tsdn, arena_t *arena) {
 
 void
 arena_prefork4(tsdn_t *tsdn, arena_t *arena) {
-	malloc_mutex_prefork(tsdn, &arena->edata_avail_mtx);
+	edata_cache_prefork(tsdn, &arena->edata_cache);
 }
 
 void
@@ -2235,7 +2233,7 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 	}
 	malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
 	base_postfork_parent(tsdn, arena->base);
-	malloc_mutex_postfork_parent(tsdn, &arena->edata_avail_mtx);
+	edata_cache_postfork_parent(tsdn, &arena->edata_cache);
 	eset_postfork_parent(tsdn, &arena->eset_dirty);
 	eset_postfork_parent(tsdn, &arena->eset_muzzy);
 	eset_postfork_parent(tsdn, &arena->eset_retained);
@@ -2281,7 +2279,7 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	}
 	malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
 	base_postfork_child(tsdn, arena->base);
-	malloc_mutex_postfork_child(tsdn, &arena->edata_avail_mtx);
+	edata_cache_postfork_child(tsdn, &arena->edata_cache);
 	eset_postfork_child(tsdn, &arena->eset_dirty);
 	eset_postfork_child(tsdn, &arena->eset_muzzy);
 	eset_postfork_child(tsdn, &arena->eset_retained);
diff --git a/src/ctl.c b/src/ctl.c
index 1e72bf4c..a58b22ba 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3010,7 +3010,7 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
 			continue;
 		}
 		MUTEX_PROF_RESET(arena->large_mtx);
-		MUTEX_PROF_RESET(arena->edata_avail_mtx);
+		MUTEX_PROF_RESET(arena->edata_cache.mtx);
 		MUTEX_PROF_RESET(arena->eset_dirty.mtx);
 		MUTEX_PROF_RESET(arena->eset_muzzy.mtx);
 		MUTEX_PROF_RESET(arena->eset_retained.mtx);
diff --git a/src/edata_cache.c b/src/edata_cache.c
new file mode 100644
index 00000000..4d026029
--- /dev/null
+++ b/src/edata_cache.c
@@ -0,0 +1,47 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+bool
+edata_cache_init(edata_cache_t *edata_cache) {
+	if (malloc_mutex_init(&edata_cache->mtx, "edata_cache",
+	    WITNESS_RANK_EDATA_CACHE, malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+
+	edata_avail_new(&edata_cache->avail);
+	return false;
+}
+
+edata_t *
+edata_cache_get(tsdn_t *tsdn, edata_cache_t *edata_cache, base_t *base) {
+	malloc_mutex_lock(tsdn, &edata_cache->mtx);
+	edata_t *edata = edata_avail_first(&edata_cache->avail);
+	if (edata == NULL) {
+		malloc_mutex_unlock(tsdn, &edata_cache->mtx);
+		return base_alloc_edata(tsdn, base);
+	}
+	edata_avail_remove(&edata_cache->avail, edata);
+	atomic_fetch_sub_zu(&edata_cache->count, 1, ATOMIC_RELAXED);
+	malloc_mutex_unlock(tsdn, &edata_cache->mtx);
+	return edata;
+}
+
+void
+edata_cache_put(tsdn_t *tsdn, edata_cache_t *edata_cache, edata_t *edata) {
+	malloc_mutex_lock(tsdn, &edata_cache->mtx);
+	edata_avail_insert(&edata_cache->avail, edata);
+	atomic_fetch_add_zu(&edata_cache->count, 1, ATOMIC_RELAXED);
+	malloc_mutex_unlock(tsdn, &edata_cache->mtx);
+}
+
+void edata_cache_prefork(tsdn_t *tsdn, edata_cache_t *edata_cache) {
+	malloc_mutex_prefork(tsdn, &edata_cache->mtx);
+}
+
+void edata_cache_postfork_parent(tsdn_t *tsdn, edata_cache_t *edata_cache) {
+	malloc_mutex_postfork_parent(tsdn, &edata_cache->mtx);
+}
+
+void edata_cache_postfork_child(tsdn_t *tsdn, edata_cache_t *edata_cache) {
+	malloc_mutex_postfork_child(tsdn, &edata_cache->mtx);
+}
diff --git a/src/extent2.c b/src/extent2.c
index 5bacb8fe..b77e4b89 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -163,28 +163,6 @@ extent_addr_randomize(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 	}
 }
 
-edata_t *
-extent_alloc(tsdn_t *tsdn, arena_t *arena) {
-	malloc_mutex_lock(tsdn, &arena->edata_avail_mtx);
-	edata_t *edata = edata_avail_first(&arena->edata_avail);
-	if (edata == NULL) {
-		malloc_mutex_unlock(tsdn, &arena->edata_avail_mtx);
-		return base_alloc_edata(tsdn, arena->base);
-	}
-	edata_avail_remove(&arena->edata_avail, edata);
-	atomic_fetch_sub_zu(&arena->edata_avail_cnt, 1, ATOMIC_RELAXED);
-	malloc_mutex_unlock(tsdn, &arena->edata_avail_mtx);
-	return edata;
-}
-
-void
-extent_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
-	malloc_mutex_lock(tsdn, &arena->edata_avail_mtx);
-	edata_avail_insert(&arena->edata_avail, edata);
-	atomic_fetch_add_zu(&arena->edata_avail_cnt, 1, ATOMIC_RELAXED);
-	malloc_mutex_unlock(tsdn, &arena->edata_avail_mtx);
-}
-
 static bool
 extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     rtree_ctx_t *rtree_ctx, eset_t *eset, edata_t *edata) {
@@ -317,7 +295,7 @@ extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 			    edata_size_get(edata), growing_retained);
 		}
 	}
-	extent_dalloc(tsdn, arena, edata);
+	edata_cache_put(tsdn, &arena->edata_cache, edata);
 }
 
 static void
@@ -858,7 +836,8 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		alloc_size = sz_pind2sz(arena->extent_grow_next + egn_skip);
 	}
 
-	edata_t *edata = extent_alloc(tsdn, arena);
+	edata_t *edata = edata_cache_get(tsdn, &arena->edata_cache,
+	    arena->base);
 	if (edata == NULL) {
 		goto label_err;
 	}
@@ -872,12 +851,12 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    SC_NSIZES, arena_extent_sn_next(arena), extent_state_active, zeroed,
 	    committed, true, EXTENT_IS_HEAD);
 	if (ptr == NULL) {
-		extent_dalloc(tsdn, arena, edata);
+		edata_cache_put(tsdn, &arena->edata_cache, edata);
 		goto label_err;
 	}
 
 	if (extent_register_no_gdump_add(tsdn, edata)) {
-		extent_dalloc(tsdn, arena, edata);
+		edata_cache_put(tsdn, &arena->edata_cache, edata);
 		goto label_err;
 	}
 
@@ -1021,7 +1000,8 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
     szind_t szind, bool *zero, bool *commit) {
 	size_t esize = size + pad;
-	edata_t *edata = extent_alloc(tsdn, arena);
+	edata_t *edata = edata_cache_get(tsdn, &arena->edata_cache,
+	    arena->base);
 	if (edata == NULL) {
 		return NULL;
 	}
@@ -1029,7 +1009,7 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	void *addr = ehooks_alloc(tsdn, ehooks, new_addr, esize, palignment,
 	    zero, commit, arena_ind_get(arena));
 	if (addr == NULL) {
-		extent_dalloc(tsdn, arena, edata);
+		edata_cache_put(tsdn, &arena->edata_cache, edata);
 		return NULL;
 	}
 	edata_init(edata, arena_ind_get(arena), addr, esize, slab, szind,
@@ -1039,7 +1019,7 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		extent_addr_randomize(tsdn, arena, edata, alignment);
 	}
 	if (extent_register(tsdn, edata)) {
-		extent_dalloc(tsdn, arena, edata);
+		edata_cache_put(tsdn, &arena->edata_cache, edata);
 		return NULL;
 	}
 
@@ -1257,7 +1237,7 @@ extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
 	    WITNESS_RANK_CORE, 0);
 
 	if (extent_register(tsdn, edata)) {
-		extent_dalloc(tsdn, arena, edata);
+		edata_cache_put(tsdn, &arena->edata_cache, edata);
 		return;
 	}
 	extent_dalloc_wrapper(tsdn, arena, ehooks, edata);
@@ -1287,7 +1267,7 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    arena_ind_get(arena));
 
 	if (!err) {
-		extent_dalloc(tsdn, arena, edata);
+		edata_cache_put(tsdn, &arena->edata_cache, edata);
 	}
 
 	return err;
@@ -1359,7 +1339,7 @@ extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    edata_size_get(edata), edata_committed_get(edata),
 	    arena_ind_get(arena));
 
-	extent_dalloc(tsdn, arena, edata);
+	edata_cache_put(tsdn, &arena->edata_cache, edata);
 }
 
 static bool
@@ -1445,7 +1425,8 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		return NULL;
 	}
 
-	edata_t *trail = extent_alloc(tsdn, arena);
+	edata_t *trail = edata_cache_get(tsdn, &arena->edata_cache,
+	    arena->base);
 	if (trail == NULL) {
 		goto label_error_a;
 	}
@@ -1505,7 +1486,7 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 label_error_c:
 	extent_unlock_edata2(tsdn, edata, trail);
 label_error_b:
-	extent_dalloc(tsdn, arena, trail);
+	edata_cache_put(tsdn, &arena->edata_cache, trail);
 label_error_a:
 	return NULL;
 }
@@ -1611,7 +1592,7 @@ extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, edata_t *a,
 	 * arena (i.e. this one).
 	 */
 	assert(edata_arena_ind_get(b) == arena_ind_get(arena));
-	extent_dalloc(tsdn, arena, b);
+	edata_cache_put(tsdn, &arena->edata_cache, b);
 
 	return false;
 }
diff --git a/src/extent_dss.c b/src/extent_dss.c
index a66afb68..25ba944b 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -123,7 +123,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 		return NULL;
 	}
 
-	gap = extent_alloc(tsdn, arena);
+	gap = edata_cache_get(tsdn, &arena->edata_cache, arena->base);
 	if (gap == NULL) {
 		return NULL;
 	}
@@ -188,7 +188,8 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 				if (gap_size_page != 0) {
 					extent_dalloc_gap(tsdn, arena, gap);
 				} else {
-					extent_dalloc(tsdn, arena, gap);
+					edata_cache_put(tsdn,
+					    &arena->edata_cache, gap);
 				}
 				if (!*commit) {
 					*commit = pages_decommit(ret, size);
@@ -224,7 +225,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 	}
 label_oom:
 	extent_dss_extending_finish();
-	extent_dalloc(tsdn, arena, gap);
+	edata_cache_put(tsdn, &arena->edata_cache, gap);
 	return NULL;
 }
 

From 09475bf8acfef36924df787deb0247a7b0456c66 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Dec 2019 13:35:43 -0800
Subject: [PATCH 1468/2608] extent_may_dalloc -> ehooks_dalloc_will_fail

---
 include/jemalloc/internal/ehooks.h | 10 ++++++++++
 src/extent2.c                      |  8 +-------
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index c046cd13..711a534b 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -9,6 +9,7 @@
  */
 
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/extent_mmap.h"
 
 extern const extent_hooks_t ehooks_default_extent_hooks;
 
@@ -96,6 +97,15 @@ ehooks_are_default(ehooks_t *ehooks) {
  * a hook.  If that hook is doomed to fail, this is wasteful.  We therefore
  * include some checks for such cases.
  */
+static inline bool
+ehooks_dalloc_will_fail(ehooks_t *ehooks) {
+	if (ehooks_are_default(ehooks)) {
+		return opt_retain;
+	} else {
+		return ehooks_get_extent_hooks_ptr(ehooks)->dalloc == NULL;
+	}
+}
+
 static inline bool
 ehooks_split_will_fail(ehooks_t *ehooks) {
 	return ehooks_get_extent_hooks_ptr(ehooks)->split == NULL;
diff --git a/src/extent2.c b/src/extent2.c
index b77e4b89..148c3283 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -1243,12 +1243,6 @@ extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
 	extent_dalloc_wrapper(tsdn, arena, ehooks, edata);
 }
 
-static bool
-extent_may_dalloc(void) {
-	/* With retain enabled, the default dalloc always fails. */
-	return !opt_retain;
-}
-
 static bool
 extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     edata_t *edata) {
@@ -1281,7 +1275,7 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    WITNESS_RANK_CORE, 0);
 
 	/* Avoid calling the default extent_dalloc unless have to. */
-	if (!ehooks_are_default(ehooks) || extent_may_dalloc()) {
+	if (!ehooks_dalloc_will_fail(ehooks)) {
 		/*
 		 * Deregister first to avoid a race with other allocating
 		 * threads, and reregister if deallocation fails.

From 07045162459f1d5f529ca530f035157f97645b0d Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Dec 2019 17:23:24 -0800
Subject: [PATCH 1469/2608] Ehooks: Add head tracking.

---
 include/jemalloc/internal/edata.h   | 12 +-----
 include/jemalloc/internal/ehooks.h  | 20 +++++++---
 include/jemalloc/internal/extent2.h |  1 -
 src/ehooks.c                        | 60 ++++++++++++++++++++++++-----
 src/extent2.c                       | 41 ++------------------
 5 files changed, 68 insertions(+), 66 deletions(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index 990c3256..86f5ac57 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -467,20 +467,12 @@ edata_prof_alloc_time_set(edata_t *edata, nstime_t *t) {
 
 static inline bool
 edata_is_head_get(edata_t *edata) {
-	if (maps_coalesce) {
-		not_reached();
-	}
-
 	return (bool)((edata->e_bits & EDATA_BITS_IS_HEAD_MASK) >>
 	    EDATA_BITS_IS_HEAD_SHIFT);
 }
 
 static inline void
 edata_is_head_set(edata_t *edata, bool is_head) {
-	if (maps_coalesce) {
-		not_reached();
-	}
-
 	edata->e_bits = (edata->e_bits & ~EDATA_BITS_IS_HEAD_MASK) |
 	    ((uint64_t)is_head << EDATA_BITS_IS_HEAD_SHIFT);
 }
@@ -502,9 +494,7 @@ edata_init(edata_t *edata, unsigned arena_ind, void *addr, size_t size,
 	edata_committed_set(edata, committed);
 	edata_dumpable_set(edata, dumpable);
 	ql_elm_new(edata, ql_link);
-	if (!maps_coalesce) {
-		edata_is_head_set(edata, is_head == EXTENT_IS_HEAD);
-	}
+	edata_is_head_set(edata, is_head == EXTENT_IS_HEAD);
 	if (config_prof) {
 		edata_prof_tctx_set(edata, NULL);
 	}
diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index 711a534b..6f4f950c 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -1,16 +1,21 @@
 #ifndef JEMALLOC_INTERNAL_EHOOKS_H
 #define JEMALLOC_INTERNAL_EHOOKS_H
 
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/extent_mmap.h"
+
 /*
  * This module is the internal interface to the extent hooks (both
  * user-specified and external).  Eventually, this will give us the flexibility
  * to use multiple different versions of user-visible extent-hook APIs under a
  * single user interface.
+ *
+ * Current API expansions (not available to anyone but the default hooks yet):
+ *   - Head state tracking.  Hooks can decide whether or not to merge two
+ *     extents based on whether or not one of them is the head (i.e. was
+ *     allocated on its own).  The later extent loses its "head" status.
  */
 
-#include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/extent_mmap.h"
-
 extern const extent_hooks_t ehooks_default_extent_hooks;
 
 typedef struct ehooks_s ehooks_t;
@@ -43,7 +48,8 @@ bool ehooks_default_purge_lazy_impl(void *addr, size_t offset, size_t length);
 bool ehooks_default_purge_forced_impl(void *addr, size_t offset, size_t length);
 #endif
 bool ehooks_default_split_impl();
-bool ehooks_default_merge_impl(void *addr_a, void *addr_b);
+bool ehooks_default_merge_impl(tsdn_t *tsdn, void *addr_a, bool head_a,
+    void *addr_b, bool head_b);
 void ehooks_default_zero_impl(void *addr, size_t size);
 
 /*
@@ -314,10 +320,12 @@ ehooks_split(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 
 static inline bool
 ehooks_merge(tsdn_t *tsdn, ehooks_t *ehooks, void *addr_a, size_t size_a,
-    void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
+    bool head_a, void *addr_b, size_t size_b, bool head_b, bool committed,
+    unsigned arena_ind) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 	if (extent_hooks == &ehooks_default_extent_hooks) {
-		return ehooks_default_merge_impl(addr_a, addr_b);
+		return ehooks_default_merge_impl(tsdn, addr_a, head_a, addr_b,
+		    head_b);
 	} else if (extent_hooks->merge == NULL) {
 		return true;
 	} else {
diff --git a/include/jemalloc/internal/extent2.h b/include/jemalloc/internal/extent2.h
index 629474ee..08443366 100644
--- a/include/jemalloc/internal/extent2.h
+++ b/include/jemalloc/internal/extent2.h
@@ -54,7 +54,6 @@ edata_t *extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     size_t size_b, szind_t szind_b, bool slab_b);
 bool extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     edata_t *a, edata_t *b);
-bool extent_head_no_merge(edata_t *a, edata_t *b);
 
 bool extent_boot(void);
 
diff --git a/src/ehooks.c b/src/ehooks.c
index a62586b9..51b1514a 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -183,8 +183,51 @@ ehooks_default_split(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	return ehooks_default_split_impl();
 }
 
+static inline bool
+ehooks_same_sn(tsdn_t *tsdn, void *addr_a, void *addr_b) {
+	edata_t *a = iealloc(tsdn, addr_a);
+	edata_t *b = iealloc(tsdn, addr_b);
+	return edata_sn_comp(a, b) == 0;
+}
+
+/*
+ * Returns true if the given extents can't be merged because of their head bit
+ * settings.  Assumes the second extent has the higher address.
+ */
+static bool
+ehooks_no_merge_heads(tsdn_t *tsdn, void *addr_a, bool head_a, void *addr_b,
+    bool head_b) {
+	/*
+	 * When coalesce is not always allowed (Windows), only merge extents
+	 * from the same VirtualAlloc region under opt.retain (in which case
+	 * MEM_DECOMMIT is utilized for purging).
+	 */
+	if (maps_coalesce) {
+		return false;
+	}
+	if (!opt_retain) {
+		return true;
+	}
+	/* If b is a head extent, disallow the cross-region merge. */
+	if (head_b) {
+		/*
+		 * Additionally, sn should not overflow with retain; sanity
+		 * check that different regions have unique sn.
+		 */
+		assert(!ehooks_same_sn(tsdn, addr_a, addr_b));
+		return true;
+	}
+	assert(ehooks_same_sn(tsdn, addr_a, addr_b));
+
+	return false;
+}
+
 bool
-ehooks_default_merge_impl(void *addr_a, void *addr_b) {
+ehooks_default_merge_impl(tsdn_t *tsdn, void *addr_a, bool head_a, void *addr_b,
+    bool head_b) {
+	if (ehooks_no_merge_heads(tsdn, addr_a, head_a, addr_b, head_b)) {
+		return true;
+	}
 	if (!maps_coalesce && !opt_retain) {
 		return true;
 	}
@@ -198,15 +241,12 @@ ehooks_default_merge_impl(void *addr_a, void *addr_b) {
 static bool
 ehooks_default_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
     void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
-	if (!maps_coalesce) {
-		tsdn_t *tsdn = tsdn_fetch();
-		edata_t *a = iealloc(tsdn, addr_a);
-		edata_t *b = iealloc(tsdn, addr_b);
-		if (extent_head_no_merge(a, b)) {
-			return true;
-		}
-	}
-	return ehooks_default_merge_impl(addr_a, addr_b);
+	tsdn_t *tsdn = tsdn_fetch();
+	edata_t *a = iealloc(tsdn, addr_a);
+	bool head_a = edata_is_head_get(a);
+	edata_t *b = iealloc(tsdn, addr_b);
+	bool head_b = edata_is_head_get(b);
+	return ehooks_default_merge_impl(tsdn, addr_a, head_a, addr_b, head_b);
 }
 
 void
diff --git a/src/extent2.c b/src/extent2.c
index 148c3283..21f9cdbd 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -1493,38 +1493,6 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    slab_a, size_b, szind_b, slab_b, false);
 }
 
-/*
- * Returns true if the given extents can't be merged because of their head bit
- * settings.  Assumes the second extent has the higher address.
- */
-bool
-extent_head_no_merge(edata_t *a, edata_t *b) {
-	assert(edata_base_get(a) < edata_base_get(b));
-	/*
-	 * When coalesce is not always allowed (Windows), only merge extents
-	 * from the same VirtualAlloc region under opt.retain (in which case
-	 * MEM_DECOMMIT is utilized for purging).
-	 */
-	if (maps_coalesce) {
-		return false;
-	}
-	if (!opt_retain) {
-		return true;
-	}
-	/* If b is a head extent, disallow the cross-region merge. */
-	if (edata_is_head_get(b)) {
-		/*
-		 * Additionally, sn should not overflow with retain; sanity
-		 * check that different regions have unique sn.
-		 */
-		assert(edata_sn_comp(a, b) != 0);
-		return true;
-	}
-	assert(edata_sn_comp(a, b) == 0);
-
-	return false;
-}
-
 static bool
 extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, edata_t *a,
     edata_t *b, bool growing_retained) {
@@ -1532,13 +1500,10 @@ extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, edata_t *a,
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	assert(edata_base_get(a) < edata_base_get(b));
 
-	if (ehooks_merge_will_fail(ehooks) || extent_head_no_merge(a, b)) {
-		return true;
-	}
-
 	bool err = ehooks_merge(tsdn, ehooks, edata_base_get(a),
-	    edata_size_get(a), edata_base_get(b), edata_size_get(b),
-	    edata_committed_get(a), arena_ind_get(arena));
+	    edata_size_get(a), edata_is_head_get(a), edata_base_get(b),
+	    edata_size_get(b), edata_is_head_get(b), edata_committed_get(a),
+	    arena_ind_get(arena));
 
 	if (err) {
 		return true;

From bb70df8e5babcf2779230d40b6a34fb04187c818 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 12 Dec 2019 16:25:24 -0800
Subject: [PATCH 1470/2608] Extent refactor: Introduce ecache module.

This will eventually completely wrap the eset, and handle concurrency,
allocation, and deallocation.  For now, we only pull out the mutex from the
eset.
---
 Makefile.in                                   |   1 +
 include/jemalloc/internal/arena_structs.h     |  28 +--
 include/jemalloc/internal/ecache.h            |  59 +++++
 include/jemalloc/internal/eset.h              |  47 +---
 include/jemalloc/internal/extent2.h           |   8 +-
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |   1 +
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |   1 +
 src/arena.c                                   | 156 ++++++------
 src/background_thread.c                       |  12 +-
 src/ctl.c                                     |   6 +-
 src/ecache.c                                  |  54 +++++
 src/eset.c                                    |  44 +---
 src/extent2.c                                 | 226 +++++++++---------
 src/large.c                                   |   4 +-
 test/unit/retained.c                          |   2 +-
 15 files changed, 354 insertions(+), 295 deletions(-)
 create mode 100644 include/jemalloc/internal/ecache.h
 create mode 100644 src/ecache.c

diff --git a/Makefile.in b/Makefile.in
index f75ae4b6..71458487 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -104,6 +104,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/ckh.c \
 	$(srcroot)src/ctl.c \
 	$(srcroot)src/div.c \
+	$(srcroot)src/ecache.c \
 	$(srcroot)src/edata.c \
 	$(srcroot)src/edata_cache.c \
 	$(srcroot)src/ehooks.c \
diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index 38c8b27c..48d13b8c 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -5,8 +5,8 @@
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/bin.h"
 #include "jemalloc/internal/bitmap.h"
+#include "jemalloc/internal/ecache.h"
 #include "jemalloc/internal/edata_cache.h"
-#include "jemalloc/internal/eset.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/mutex.h"
@@ -53,7 +53,7 @@ struct arena_decay_s {
 	/*
 	 * Number of unpurged pages at beginning of current epoch.  During epoch
 	 * advancement we use the delta between arena->decay_*.nunpurged and
-	 * eset_npages_get(&arena->extents_*) to determine how many dirty pages,
+	 * ecache_npages_get(&arena->ecache_*) to determine how many dirty pages,
 	 * if any, were generated.
 	 */
 	size_t			nunpurged;
@@ -155,9 +155,9 @@ struct arena_s {
 	 *
 	 * Synchronization: internal.
 	 */
-	eset_t		eset_dirty;
-	eset_t		eset_muzzy;
-	eset_t		eset_retained;
+	ecache_t	ecache_dirty;
+	ecache_t	ecache_muzzy;
+	ecache_t	ecache_retained;
 
 	/*
 	 * Decay-based purging state, responsible for scheduling extent state
@@ -168,22 +168,8 @@ struct arena_s {
 	arena_decay_t		decay_dirty; /* dirty --> muzzy */
 	arena_decay_t		decay_muzzy; /* muzzy --> retained */
 
-	/*
-	 * Next extent size class in a growing series to use when satisfying a
-	 * request via the extent hooks (only if opt_retain).  This limits the
-	 * number of disjoint virtual memory ranges so that extent merging can
-	 * be effective even if multiple arenas' extent allocation requests are
-	 * highly interleaved.
-	 *
-	 * retain_grow_limit is the max allowed size ind to expand (unless the
-	 * required size is greater).  Default is no limit, and controlled
-	 * through mallctl only.
-	 *
-	 * Synchronization: extent_grow_mtx
-	 */
-	pszind_t		extent_grow_next;
-	pszind_t		retain_grow_limit;
-	malloc_mutex_t		extent_grow_mtx;
+	/* The grow info for the retained ecache. */
+	ecache_grow_t		ecache_grow;
 
 	/* The source of edata_t objects. */
 	edata_cache_t		edata_cache;
diff --git a/include/jemalloc/internal/ecache.h b/include/jemalloc/internal/ecache.h
new file mode 100644
index 00000000..70857203
--- /dev/null
+++ b/include/jemalloc/internal/ecache.h
@@ -0,0 +1,59 @@
+#ifndef JEMALLOC_INTERNAL_ECACHE_H
+#define JEMALLOC_INTERNAL_ECACHE_H
+
+#include "jemalloc/internal/eset.h"
+#include "jemalloc/internal/mutex.h"
+
+typedef struct ecache_s ecache_t;
+struct ecache_s {
+	malloc_mutex_t mtx;
+	eset_t eset;
+};
+
+typedef struct ecache_grow_s ecache_grow_t;
+struct ecache_grow_s {
+	/*
+	 * Next extent size class in a growing series to use when satisfying a
+	 * request via the extent hooks (only if opt_retain).  This limits the
+	 * number of disjoint virtual memory ranges so that extent merging can
+	 * be effective even if multiple arenas' extent allocation requests are
+	 * highly interleaved.
+	 *
+	 * retain_grow_limit is the max allowed size ind to expand (unless the
+	 * required size is greater).  Default is no limit, and controlled
+	 * through mallctl only.
+	 *
+	 * Synchronization: extent_grow_mtx
+	 */
+	pszind_t next;
+	pszind_t limit;
+	malloc_mutex_t mtx;
+};
+
+static inline size_t
+ecache_npages_get(ecache_t *ecache) {
+	return eset_npages_get(&ecache->eset);
+}
+/* Get the number of extents in the given page size index. */
+static inline size_t
+ecache_nextents_get(ecache_t *ecache, pszind_t ind) {
+	return eset_nextents_get(&ecache->eset, ind);
+}
+/* Get the sum total bytes of the extents in the given page size index. */
+static inline size_t
+ecache_nbytes_get(ecache_t *ecache, pszind_t ind) {
+	return eset_nbytes_get(&ecache->eset, ind);
+}
+
+bool ecache_init(tsdn_t *tsdn, ecache_t *ecache, extent_state_t state,
+    bool delay_coalesce);
+void ecache_prefork(tsdn_t *tsdn, ecache_t *ecache);
+void ecache_postfork_parent(tsdn_t *tsdn, ecache_t *ecache);
+void ecache_postfork_child(tsdn_t *tsdn, ecache_t *ecache);
+
+bool ecache_grow_init(tsdn_t *tsdn, ecache_grow_t *ecache_grow);
+void ecache_grow_prefork(tsdn_t *tsdn, ecache_grow_t *ecache_grow);
+void ecache_grow_postfork_parent(tsdn_t *tsdn, ecache_grow_t *ecache_grow);
+void ecache_grow_postfork_child(tsdn_t *tsdn, ecache_grow_t *ecache_grow);
+
+#endif /* JEMALLOC_INTERNAL_ECACHE_H */
diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index e76257af..bbc6b5cd 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -9,42 +9,25 @@
 /*
  * An eset ("extent set") is a quantized collection of extents, with built-in
  * LRU queue.
+ *
+ * This class is not thread-safe; synchronization must be done externally if
+ * there are mutating operations.  One exception is the stats counters, which
+ * may be read without any locking.
  */
 typedef struct eset_s eset_t;
 struct eset_s {
-	malloc_mutex_t mtx;
-
-	/*
-	 * Quantized per size class heaps of extents.
-	 *
-	 * Synchronization: mtx.
-	 */
+	/* Quantized per size class heaps of extents. */
 	edata_heap_t heaps[SC_NPSIZES + 1];
 	atomic_zu_t nextents[SC_NPSIZES + 1];
 	atomic_zu_t nbytes[SC_NPSIZES + 1];
 
-	/*
-	 * Bitmap for which set bits correspond to non-empty heaps.
-	 *
-	 * Synchronization: mtx.
-	 */
+	/* Bitmap for which set bits correspond to non-empty heaps. */
 	bitmap_t bitmap[BITMAP_GROUPS(SC_NPSIZES + 1)];
 
-	/*
-	 * LRU of all extents in heaps.
-	 *
-	 * Synchronization: mtx.
-	 */
+	/* LRU of all extents in heaps. */
 	edata_list_t lru;
 
-	/*
-	 * Page sum for all extents in heaps.
-	 *
-	 * The synchronization here is a little tricky.  Modifications to npages
-	 * must hold mtx, but reads need not (though, a reader who sees npages
-	 * without holding the mutex can't assume anything about the rest of the
-	 * state of the eset_t).
-	 */
+	/* Page sum for all extents in heaps. */
 	atomic_zu_t npages;
 
 	/* All stored extents must be in the same state. */
@@ -57,8 +40,7 @@ struct eset_s {
 	bool delay_coalesce;
 };
 
-bool eset_init(tsdn_t *tsdn, eset_t *eset, extent_state_t state,
-    bool delay_coalesce);
+void eset_init(eset_t *eset, extent_state_t state, bool delay_coalesce);
 extent_state_t eset_state_get(const eset_t *eset);
 
 size_t eset_npages_get(eset_t *eset);
@@ -67,17 +49,12 @@ size_t eset_nextents_get(eset_t *eset, pszind_t ind);
 /* Get the sum total bytes of the extents in the given page size index. */
 size_t eset_nbytes_get(eset_t *eset, pszind_t ind);
 
-void eset_insert_locked(tsdn_t *tsdn, eset_t *eset, edata_t *edata);
-void eset_remove_locked(tsdn_t *tsdn, eset_t *eset, edata_t *edata);
+void eset_insert(eset_t *eset, edata_t *edata);
+void eset_remove(eset_t *eset, edata_t *edata);
 /*
  * Select an extent from this eset of the given size and alignment.  Returns
  * null if no such item could be found.
  */
-edata_t *eset_fit_locked(tsdn_t *tsdn, eset_t *eset, size_t esize,
-    size_t alignment);
-
-void eset_prefork(tsdn_t *tsdn, eset_t *eset);
-void eset_postfork_parent(tsdn_t *tsdn, eset_t *eset);
-void eset_postfork_child(tsdn_t *tsdn, eset_t *eset);
+edata_t *eset_fit(eset_t *eset, size_t esize, size_t alignment);
 
 #endif /* JEMALLOC_INTERNAL_ESET_H */
diff --git a/include/jemalloc/internal/extent2.h b/include/jemalloc/internal/extent2.h
index 08443366..80e789e4 100644
--- a/include/jemalloc/internal/extent2.h
+++ b/include/jemalloc/internal/extent2.h
@@ -1,8 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_EXTENT2_H
 #define JEMALLOC_INTERNAL_EXTENT2_H
 
+#include "jemalloc/internal/ecache.h"
 #include "jemalloc/internal/ehooks.h"
-#include "jemalloc/internal/eset.h"
 #include "jemalloc/internal/ph.h"
 #include "jemalloc/internal/rtree.h"
 
@@ -27,12 +27,12 @@ extern size_t opt_lg_extent_max_active_fit;
 extern rtree_t extents_rtree;
 
 edata_t *extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    eset_t *eset, void *new_addr, size_t size, size_t pad, size_t alignment,
+    ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
     bool slab, szind_t szind, bool *zero, bool *commit);
 void extents_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    eset_t *eset, edata_t *edata);
+    ecache_t *ecache, edata_t *edata);
 edata_t *extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    eset_t *eset, size_t npages_min);
+    ecache_t *ecache, size_t npages_min);
 edata_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
     szind_t szind, bool *zero, bool *commit);
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 23312d3b..7b2e84a9 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -44,6 +44,7 @@
     <ClCompile Include="..\..\..\..\src\ckh.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
     <ClCompile Include="..\..\..\..\src\div.c" />
+    <ClCompile Include="..\..\..\..\src\ecache.c" />
     <ClCompile Include="..\..\..\..\src\edata.c" />
     <ClCompile Include="..\..\..\..\src\edata_cache.c" />
     <ClCompile Include="..\..\..\..\src\ehooks.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 76c16c5a..338962b3 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -44,6 +44,7 @@
     <ClCompile Include="..\..\..\..\src\ckh.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
     <ClCompile Include="..\..\..\..\src\div.c" />
+    <ClCompile Include="..\..\..\..\src\ecache.c" />
     <ClCompile Include="..\..\..\..\src\edata.c" />
     <ClCompile Include="..\..\..\..\src\edata_cache.c" />
     <ClCompile Include="..\..\..\..\src\ehooks.c" />
diff --git a/src/arena.c b/src/arena.c
index a23419a3..2652207a 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -56,7 +56,7 @@ static unsigned huge_arena_ind;
  */
 
 static void arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena,
-    arena_decay_t *decay, eset_t *eset, bool all, size_t npages_limit,
+    arena_decay_t *decay, ecache_t *ecache, bool all, size_t npages_limit,
     size_t npages_decay_max, bool is_background_thread);
 static bool arena_decay_dirty(tsdn_t *tsdn, arena_t *arena,
     bool is_background_thread, bool all);
@@ -76,8 +76,8 @@ arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	*dirty_decay_ms = arena_dirty_decay_ms_get(arena);
 	*muzzy_decay_ms = arena_muzzy_decay_ms_get(arena);
 	*nactive += atomic_load_zu(&arena->nactive, ATOMIC_RELAXED);
-	*ndirty += eset_npages_get(&arena->eset_dirty);
-	*nmuzzy += eset_npages_get(&arena->eset_muzzy);
+	*ndirty += ecache_npages_get(&arena->ecache_dirty);
+	*nmuzzy += ecache_npages_get(&arena->ecache_muzzy);
 }
 
 void
@@ -100,7 +100,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	arena_stats_accum_zu(&astats->mapped, base_mapped
 	    + arena_stats_read_zu(tsdn, &arena->stats, &arena->stats.mapped));
 	arena_stats_accum_zu(&astats->retained,
-	    eset_npages_get(&arena->eset_retained) << LG_PAGE);
+	    ecache_npages_get(&arena->ecache_retained) << LG_PAGE);
 
 	atomic_store_zu(&astats->edata_avail,
 	    atomic_load_zu(&arena->edata_cache.count, ATOMIC_RELAXED),
@@ -131,8 +131,8 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	arena_stats_accum_zu(&astats->metadata_thp, metadata_thp);
 	arena_stats_accum_zu(&astats->resident, base_resident +
 	    (((atomic_load_zu(&arena->nactive, ATOMIC_RELAXED) +
-	    eset_npages_get(&arena->eset_dirty) +
-	    eset_npages_get(&arena->eset_muzzy)) << LG_PAGE)));
+	    ecache_npages_get(&arena->ecache_dirty) +
+	    ecache_npages_get(&arena->ecache_muzzy)) << LG_PAGE)));
 	arena_stats_accum_zu(&astats->abandoned_vm, atomic_load_zu(
 	    &arena->stats.abandoned_vm, ATOMIC_RELAXED));
 
@@ -174,12 +174,12 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	for (pszind_t i = 0; i < SC_NPSIZES; i++) {
 		size_t dirty, muzzy, retained, dirty_bytes, muzzy_bytes,
 		    retained_bytes;
-		dirty = eset_nextents_get(&arena->eset_dirty, i);
-		muzzy = eset_nextents_get(&arena->eset_muzzy, i);
-		retained = eset_nextents_get(&arena->eset_retained, i);
-		dirty_bytes = eset_nbytes_get(&arena->eset_dirty, i);
-		muzzy_bytes = eset_nbytes_get(&arena->eset_muzzy, i);
-		retained_bytes = eset_nbytes_get(&arena->eset_retained, i);
+		dirty = ecache_nextents_get(&arena->ecache_dirty, i);
+		muzzy = ecache_nextents_get(&arena->ecache_muzzy, i);
+		retained = ecache_nextents_get(&arena->ecache_retained, i);
+		dirty_bytes = ecache_nbytes_get(&arena->ecache_dirty, i);
+		muzzy_bytes = ecache_nbytes_get(&arena->ecache_muzzy, i);
+		retained_bytes = ecache_nbytes_get(&arena->ecache_retained, i);
 
 		atomic_store_zu(&estats[i].ndirty, dirty, ATOMIC_RELAXED);
 		atomic_store_zu(&estats[i].nmuzzy, muzzy, ATOMIC_RELAXED);
@@ -226,11 +226,11 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	READ_ARENA_MUTEX_PROF_DATA(large_mtx, arena_prof_mutex_large);
 	READ_ARENA_MUTEX_PROF_DATA(edata_cache.mtx,
 	    arena_prof_mutex_extent_avail)
-	READ_ARENA_MUTEX_PROF_DATA(eset_dirty.mtx,
+	READ_ARENA_MUTEX_PROF_DATA(ecache_dirty.mtx,
 	    arena_prof_mutex_extents_dirty)
-	READ_ARENA_MUTEX_PROF_DATA(eset_muzzy.mtx,
+	READ_ARENA_MUTEX_PROF_DATA(ecache_muzzy.mtx,
 	    arena_prof_mutex_extents_muzzy)
-	READ_ARENA_MUTEX_PROF_DATA(eset_retained.mtx,
+	READ_ARENA_MUTEX_PROF_DATA(ecache_retained.mtx,
 	    arena_prof_mutex_extents_retained)
 	READ_ARENA_MUTEX_PROF_DATA(decay_dirty.mtx,
 	    arena_prof_mutex_decay_dirty)
@@ -258,7 +258,7 @@ arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	extents_dalloc(tsdn, arena, ehooks, &arena->eset_dirty, edata);
+	extents_dalloc(tsdn, arena, ehooks, &arena->ecache_dirty, edata);
 	if (arena_dirty_decay_ms_get(arena) == 0) {
 		arena_decay_dirty(tsdn, arena, false, true);
 	} else {
@@ -434,10 +434,11 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	szind_t szind = sz_size2index(usize);
 	size_t mapped_add;
 	bool commit = true;
-	edata_t *edata = extents_alloc(tsdn, arena, ehooks, &arena->eset_dirty,
-	    NULL, usize, sz_large_pad, alignment, false, szind, zero, &commit);
+	edata_t *edata = extents_alloc(tsdn, arena, ehooks,
+	    &arena->ecache_dirty, NULL, usize, sz_large_pad, alignment, false,
+	    szind, zero, &commit);
 	if (edata == NULL && arena_may_have_muzzy(arena)) {
-		edata = extents_alloc(tsdn, arena, ehooks, &arena->eset_muzzy,
+		edata = extents_alloc(tsdn, arena, ehooks, &arena->ecache_muzzy,
 		    NULL, usize, sz_large_pad, alignment, false, szind, zero,
 		    &commit);
 	}
@@ -606,10 +607,10 @@ arena_decay_backlog_update(arena_decay_t *decay, uint64_t nadvance_u64,
 
 static void
 arena_decay_try_purge(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    eset_t *eset, size_t current_npages, size_t npages_limit,
+    ecache_t *ecache, size_t current_npages, size_t npages_limit,
     bool is_background_thread) {
 	if (current_npages > npages_limit) {
-		arena_decay_to_limit(tsdn, arena, decay, eset, false,
+		arena_decay_to_limit(tsdn, arena, decay, ecache, false,
 		    npages_limit, current_npages - npages_limit,
 		    is_background_thread);
 	}
@@ -641,8 +642,8 @@ arena_decay_epoch_advance_helper(arena_decay_t *decay, const nstime_t *time,
 
 static void
 arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    eset_t *eset, const nstime_t *time, bool is_background_thread) {
-	size_t current_npages = eset_npages_get(eset);
+    ecache_t *ecache, const nstime_t *time, bool is_background_thread) {
+	size_t current_npages = ecache_npages_get(ecache);
 	arena_decay_epoch_advance_helper(decay, time, current_npages);
 
 	size_t npages_limit = arena_decay_backlog_npages_limit(decay);
@@ -651,7 +652,7 @@ arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	    current_npages;
 
 	if (!background_thread_enabled() || is_background_thread) {
-		arena_decay_try_purge(tsdn, arena, decay, eset,
+		arena_decay_try_purge(tsdn, arena, decay, ecache,
 		    current_npages, npages_limit, is_background_thread);
 	}
 }
@@ -708,15 +709,15 @@ arena_decay_ms_valid(ssize_t decay_ms) {
 
 static bool
 arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    eset_t *eset, bool is_background_thread) {
+    ecache_t *ecache, bool is_background_thread) {
 	malloc_mutex_assert_owner(tsdn, &decay->mtx);
 
 	/* Purge all or nothing if the option is disabled. */
 	ssize_t decay_ms = arena_decay_ms_read(decay);
 	if (decay_ms <= 0) {
 		if (decay_ms == 0) {
-			arena_decay_to_limit(tsdn, arena, decay, eset, false,
-			    0, eset_npages_get(eset),
+			arena_decay_to_limit(tsdn, arena, decay, ecache, false,
+			    0, ecache_npages_get(ecache),
 			    is_background_thread);
 		}
 		return false;
@@ -751,11 +752,11 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	 */
 	bool advance_epoch = arena_decay_deadline_reached(decay, &time);
 	if (advance_epoch) {
-		arena_decay_epoch_advance(tsdn, arena, decay, eset, &time,
+		arena_decay_epoch_advance(tsdn, arena, decay, ecache, &time,
 		    is_background_thread);
 	} else if (is_background_thread) {
-		arena_decay_try_purge(tsdn, arena, decay, eset,
-		    eset_npages_get(eset),
+		arena_decay_try_purge(tsdn, arena, decay, ecache,
+		    ecache_npages_get(ecache),
 		    arena_decay_backlog_npages_limit(decay),
 		    is_background_thread);
 	}
@@ -780,7 +781,7 @@ arena_muzzy_decay_ms_get(arena_t *arena) {
 
 static bool
 arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    eset_t *eset, ssize_t decay_ms) {
+    ecache_t *ecache, ssize_t decay_ms) {
 	if (!arena_decay_ms_valid(decay_ms)) {
 		return true;
 	}
@@ -795,7 +796,7 @@ arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	 * arbitrary change during initial arena configuration.
 	 */
 	arena_decay_reinit(decay, decay_ms);
-	arena_maybe_decay(tsdn, arena, decay, eset, false);
+	arena_maybe_decay(tsdn, arena, decay, ecache, false);
 	malloc_mutex_unlock(tsdn, &decay->mtx);
 
 	return false;
@@ -805,19 +806,19 @@ bool
 arena_dirty_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
     ssize_t decay_ms) {
 	return arena_decay_ms_set(tsdn, arena, &arena->decay_dirty,
-	    &arena->eset_dirty, decay_ms);
+	    &arena->ecache_dirty, decay_ms);
 }
 
 bool
 arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
     ssize_t decay_ms) {
 	return arena_decay_ms_set(tsdn, arena, &arena->decay_muzzy,
-	    &arena->eset_muzzy, decay_ms);
+	    &arena->ecache_muzzy, decay_ms);
 }
 
 static size_t
 arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
-    ehooks_t *ehooks, eset_t *eset, size_t npages_limit,
+    ehooks_t *ehooks, ecache_t *ecache, size_t npages_limit,
     size_t npages_decay_max, edata_list_t *decay_extents) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
@@ -826,7 +827,7 @@ arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
 	size_t nstashed = 0;
 	edata_t *edata;
 	while (nstashed < npages_decay_max &&
-	    (edata = extents_evict(tsdn, arena, ehooks, eset, npages_limit))
+	    (edata = extents_evict(tsdn, arena, ehooks, ecache, npages_limit))
 	    != NULL) {
 		edata_list_append(decay_extents, edata);
 		nstashed += edata_size_get(edata) >> LG_PAGE;
@@ -836,8 +837,8 @@ arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
 
 static size_t
 arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    arena_decay_t *decay, eset_t *eset, bool all, edata_list_t *decay_extents,
-    bool is_background_thread) {
+    arena_decay_t *decay, ecache_t *ecache, bool all,
+    edata_list_t *decay_extents, bool is_background_thread) {
 	size_t nmadvise, nunmapped;
 	size_t npurged;
 
@@ -856,7 +857,7 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		size_t npages = edata_size_get(edata) >> LG_PAGE;
 		npurged += npages;
 		edata_list_remove(decay_extents, edata);
-		switch (eset_state_get(eset)) {
+		switch (eset_state_get(&ecache->eset)) {
 		case extent_state_active:
 			not_reached();
 		case extent_state_dirty:
@@ -864,7 +865,7 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			    !extent_purge_lazy_wrapper(tsdn, arena,
 			    ehooks, edata, 0, edata_size_get(edata))) {
 				extents_dalloc(tsdn, arena, ehooks,
-				    &arena->eset_muzzy, edata);
+				    &arena->ecache_muzzy, edata);
 				arena_background_thread_inactivity_check(tsdn,
 				    arena, is_background_thread);
 				break;
@@ -900,14 +901,14 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 /*
  * npages_limit: Decay at most npages_decay_max pages without violating the
- * invariant: (eset_npages_get(extents) >= npages_limit).  We need an upper
+ * invariant: (ecache_npages_get(ecache) >= npages_limit).  We need an upper
  * bound on number of pages in order to prevent unbounded growth (namely in
  * stashed), otherwise unbounded new pages could be added to extents during the
  * current decay run, so that the purging thread never finishes.
  */
 static void
 arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    eset_t *eset, bool all, size_t npages_limit, size_t npages_decay_max,
+    ecache_t *ecache, bool all, size_t npages_limit, size_t npages_decay_max,
     bool is_background_thread) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 1);
@@ -924,11 +925,11 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	edata_list_t decay_extents;
 	edata_list_init(&decay_extents);
 
-	size_t npurge = arena_stash_decayed(tsdn, arena, ehooks, eset,
+	size_t npurge = arena_stash_decayed(tsdn, arena, ehooks, ecache,
 	    npages_limit, npages_decay_max, &decay_extents);
 	if (npurge != 0) {
 		size_t npurged = arena_decay_stashed(tsdn, arena, ehooks, decay,
-		    eset, all, &decay_extents, is_background_thread);
+		    ecache, all, &decay_extents, is_background_thread);
 		assert(npurged == npurge);
 	}
 
@@ -938,11 +939,11 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 
 static bool
 arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    eset_t *eset, bool is_background_thread, bool all) {
+    ecache_t *ecache, bool is_background_thread, bool all) {
 	if (all) {
 		malloc_mutex_lock(tsdn, &decay->mtx);
-		arena_decay_to_limit(tsdn, arena, decay, eset, all, 0,
-		    eset_npages_get(eset), is_background_thread);
+		arena_decay_to_limit(tsdn, arena, decay, ecache, all, 0,
+		    ecache_npages_get(ecache), is_background_thread);
 		malloc_mutex_unlock(tsdn, &decay->mtx);
 
 		return false;
@@ -953,7 +954,7 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 		return true;
 	}
 
-	bool epoch_advanced = arena_maybe_decay(tsdn, arena, decay, eset,
+	bool epoch_advanced = arena_maybe_decay(tsdn, arena, decay, ecache,
 	    is_background_thread);
 	size_t npages_new;
 	if (epoch_advanced) {
@@ -975,18 +976,18 @@ static bool
 arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
     bool all) {
 	return arena_decay_impl(tsdn, arena, &arena->decay_dirty,
-	    &arena->eset_dirty, is_background_thread, all);
+	    &arena->ecache_dirty, is_background_thread, all);
 }
 
 static bool
 arena_decay_muzzy(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
     bool all) {
-	if (eset_npages_get(&arena->eset_muzzy) == 0 &&
+	if (ecache_npages_get(&arena->ecache_muzzy) == 0 &&
 	    arena_muzzy_decay_ms_get(arena) <= 0) {
 		return false;
 	}
 	return arena_decay_impl(tsdn, arena, &arena->decay_muzzy,
-	    &arena->eset_muzzy, is_background_thread, all);
+	    &arena->ecache_muzzy, is_background_thread, all);
 }
 
 void
@@ -1157,7 +1158,7 @@ arena_destroy_retained(tsdn_t *tsdn, arena_t *arena) {
 	ehooks_t *ehooks = arena_get_ehooks(arena);
 	edata_t *edata;
 	while ((edata = extents_evict(tsdn, arena, ehooks,
-	    &arena->eset_retained, 0)) != NULL) {
+	    &arena->ecache_retained, 0)) != NULL) {
 		extent_destroy_wrapper(tsdn, arena, ehooks, edata);
 	}
 }
@@ -1173,8 +1174,8 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
 	 * Furthermore, the caller (arena_i_destroy_ctl()) purged all cached
 	 * extents, so only retained extents may remain.
 	 */
-	assert(eset_npages_get(&arena->eset_dirty) == 0);
-	assert(eset_npages_get(&arena->eset_muzzy) == 0);
+	assert(ecache_npages_get(&arena->ecache_dirty) == 0);
+	assert(ecache_npages_get(&arena->ecache_muzzy) == 0);
 
 	/* Deallocate retained memory. */
 	arena_destroy_retained(tsd_tsdn(tsd), arena);
@@ -1230,10 +1231,10 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	szind_t szind = sz_size2index(bin_info->reg_size);
 	bool zero = false;
 	bool commit = true;
-	edata_t *slab = extents_alloc(tsdn, arena, ehooks, &arena->eset_dirty,
+	edata_t *slab = extents_alloc(tsdn, arena, ehooks, &arena->ecache_dirty,
 	    NULL, bin_info->slab_size, 0, PAGE, true, binind, &zero, &commit);
 	if (slab == NULL && arena_may_have_muzzy(arena)) {
-		slab = extents_alloc(tsdn, arena, ehooks, &arena->eset_muzzy,
+		slab = extents_alloc(tsdn, arena, ehooks, &arena->ecache_muzzy,
 		    NULL, bin_info->slab_size, 0, PAGE, true, binind, &zero,
 		    &commit);
 	}
@@ -1917,14 +1918,14 @@ arena_retain_grow_limit_get_set(tsd_t *tsd, arena_t *arena, size_t *old_limit,
 		}
 	}
 
-	malloc_mutex_lock(tsd_tsdn(tsd), &arena->extent_grow_mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &arena->ecache_grow.mtx);
 	if (old_limit != NULL) {
-		*old_limit = sz_pind2sz(arena->retain_grow_limit);
+		*old_limit = sz_pind2sz(arena->ecache_grow.limit);
 	}
 	if (new_limit != NULL) {
-		arena->retain_grow_limit = new_ind;
+		arena->ecache_grow.limit = new_ind;
 	}
-	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->extent_grow_mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->ecache_grow.mtx);
 
 	return false;
 }
@@ -2016,14 +2017,14 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 * are likely to be reused soon after deallocation, and the cost of
 	 * merging/splitting extents is non-trivial.
 	 */
-	if (eset_init(tsdn, &arena->eset_dirty, extent_state_dirty, true)) {
+	if (ecache_init(tsdn, &arena->ecache_dirty, extent_state_dirty, true)) {
 		goto label_error;
 	}
 	/*
 	 * Coalesce muzzy extents immediately, because operations on them are in
 	 * the critical path much less often than for dirty extents.
 	 */
-	if (eset_init(tsdn, &arena->eset_muzzy, extent_state_muzzy, false)) {
+	if (ecache_init(tsdn, &arena->ecache_muzzy, extent_state_muzzy, false)) {
 		goto label_error;
 	}
 	/*
@@ -2032,7 +2033,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 * coalescing), but also because operations on retained extents are not
 	 * in the critical path.
 	 */
-	if (eset_init(tsdn, &arena->eset_retained, extent_state_retained,
+	if (ecache_init(tsdn, &arena->ecache_retained, extent_state_retained,
 	    false)) {
 		goto label_error;
 	}
@@ -2046,10 +2047,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		goto label_error;
 	}
 
-	arena->extent_grow_next = sz_psz2ind(HUGEPAGE);
-	arena->retain_grow_limit = sz_psz2ind(SC_LARGE_MAXCLASS);
-	if (malloc_mutex_init(&arena->extent_grow_mtx, "extent_grow",
-	    WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) {
+	if (ecache_grow_init(tsdn, &arena->ecache_grow)) {
 		goto label_error;
 	}
 
@@ -2187,14 +2185,14 @@ arena_prefork1(tsdn_t *tsdn, arena_t *arena) {
 
 void
 arena_prefork2(tsdn_t *tsdn, arena_t *arena) {
-	malloc_mutex_prefork(tsdn, &arena->extent_grow_mtx);
+	ecache_grow_prefork(tsdn, &arena->ecache_grow);
 }
 
 void
 arena_prefork3(tsdn_t *tsdn, arena_t *arena) {
-	eset_prefork(tsdn, &arena->eset_dirty);
-	eset_prefork(tsdn, &arena->eset_muzzy);
-	eset_prefork(tsdn, &arena->eset_retained);
+	ecache_prefork(tsdn, &arena->ecache_dirty);
+	ecache_prefork(tsdn, &arena->ecache_muzzy);
+	ecache_prefork(tsdn, &arena->ecache_retained);
 }
 
 void
@@ -2234,10 +2232,10 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 	malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
 	base_postfork_parent(tsdn, arena->base);
 	edata_cache_postfork_parent(tsdn, &arena->edata_cache);
-	eset_postfork_parent(tsdn, &arena->eset_dirty);
-	eset_postfork_parent(tsdn, &arena->eset_muzzy);
-	eset_postfork_parent(tsdn, &arena->eset_retained);
-	malloc_mutex_postfork_parent(tsdn, &arena->extent_grow_mtx);
+	ecache_postfork_parent(tsdn, &arena->ecache_dirty);
+	ecache_postfork_parent(tsdn, &arena->ecache_muzzy);
+	ecache_postfork_parent(tsdn, &arena->ecache_retained);
+	ecache_grow_postfork_parent(tsdn, &arena->ecache_grow);
 	malloc_mutex_postfork_parent(tsdn, &arena->decay_dirty.mtx);
 	malloc_mutex_postfork_parent(tsdn, &arena->decay_muzzy.mtx);
 	if (config_stats) {
@@ -2280,10 +2278,10 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
 	base_postfork_child(tsdn, arena->base);
 	edata_cache_postfork_child(tsdn, &arena->edata_cache);
-	eset_postfork_child(tsdn, &arena->eset_dirty);
-	eset_postfork_child(tsdn, &arena->eset_muzzy);
-	eset_postfork_child(tsdn, &arena->eset_retained);
-	malloc_mutex_postfork_child(tsdn, &arena->extent_grow_mtx);
+	ecache_postfork_child(tsdn, &arena->ecache_dirty);
+	ecache_postfork_child(tsdn, &arena->ecache_muzzy);
+	ecache_postfork_child(tsdn, &arena->ecache_retained);
+	ecache_grow_postfork_child(tsdn, &arena->ecache_grow);
 	malloc_mutex_postfork_child(tsdn, &arena->decay_dirty.mtx);
 	malloc_mutex_postfork_child(tsdn, &arena->decay_muzzy.mtx);
 	if (config_stats) {
diff --git a/src/background_thread.c b/src/background_thread.c
index 400dae53..90b027ed 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -114,7 +114,7 @@ decay_npurge_after_interval(arena_decay_t *decay, size_t interval) {
 
 static uint64_t
 arena_decay_compute_purge_interval_impl(tsdn_t *tsdn, arena_decay_t *decay,
-    eset_t *eset) {
+    ecache_t *ecache) {
 	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
 		/* Use minimal interval if decay is contended. */
 		return BACKGROUND_THREAD_MIN_INTERVAL_NS;
@@ -130,7 +130,7 @@ arena_decay_compute_purge_interval_impl(tsdn_t *tsdn, arena_decay_t *decay,
 
 	uint64_t decay_interval_ns = nstime_ns(&decay->interval);
 	assert(decay_interval_ns > 0);
-	size_t npages = eset_npages_get(eset);
+	size_t npages = ecache_npages_get(ecache);
 	if (npages == 0) {
 		unsigned i;
 		for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
@@ -202,12 +202,12 @@ static uint64_t
 arena_decay_compute_purge_interval(tsdn_t *tsdn, arena_t *arena) {
 	uint64_t i1, i2;
 	i1 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_dirty,
-	    &arena->eset_dirty);
+	    &arena->ecache_dirty);
 	if (i1 == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
 		return i1;
 	}
 	i2 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_muzzy,
-	    &arena->eset_muzzy);
+	    &arena->ecache_muzzy);
 
 	return i1 < i2 ? i1 : i2;
 }
@@ -717,8 +717,8 @@ background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
 	if (info->npages_to_purge_new > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
 		should_signal = true;
 	} else if (unlikely(background_thread_indefinite_sleep(info)) &&
-	    (eset_npages_get(&arena->eset_dirty) > 0 ||
-	    eset_npages_get(&arena->eset_muzzy) > 0 ||
+	    (ecache_npages_get(&arena->ecache_dirty) > 0 ||
+	    ecache_npages_get(&arena->ecache_muzzy) > 0 ||
 	    info->npages_to_purge_new > 0)) {
 		should_signal = true;
 	} else {
diff --git a/src/ctl.c b/src/ctl.c
index a58b22ba..eee12770 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3011,9 +3011,9 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
 		}
 		MUTEX_PROF_RESET(arena->large_mtx);
 		MUTEX_PROF_RESET(arena->edata_cache.mtx);
-		MUTEX_PROF_RESET(arena->eset_dirty.mtx);
-		MUTEX_PROF_RESET(arena->eset_muzzy.mtx);
-		MUTEX_PROF_RESET(arena->eset_retained.mtx);
+		MUTEX_PROF_RESET(arena->ecache_dirty.mtx);
+		MUTEX_PROF_RESET(arena->ecache_muzzy.mtx);
+		MUTEX_PROF_RESET(arena->ecache_retained.mtx);
 		MUTEX_PROF_RESET(arena->decay_dirty.mtx);
 		MUTEX_PROF_RESET(arena->decay_muzzy.mtx);
 		MUTEX_PROF_RESET(arena->tcache_ql_mtx);
diff --git a/src/ecache.c b/src/ecache.c
new file mode 100644
index 00000000..7155de35
--- /dev/null
+++ b/src/ecache.c
@@ -0,0 +1,54 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+bool
+ecache_init(tsdn_t *tsdn, ecache_t *ecache, extent_state_t state,
+    bool delay_coalesce) {
+	if (malloc_mutex_init(&ecache->mtx, "extents", WITNESS_RANK_EXTENTS,
+	    malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+	eset_init(&ecache->eset, state, delay_coalesce);
+	return false;
+}
+
+void
+ecache_prefork(tsdn_t *tsdn, ecache_t *ecache) {
+	malloc_mutex_prefork(tsdn, &ecache->mtx);
+}
+
+void
+ecache_postfork_parent(tsdn_t *tsdn, ecache_t *ecache) {
+	malloc_mutex_postfork_parent(tsdn, &ecache->mtx);
+}
+
+void
+ecache_postfork_child(tsdn_t *tsdn, ecache_t *ecache) {
+	malloc_mutex_postfork_child(tsdn, &ecache->mtx);
+}
+
+bool
+ecache_grow_init(tsdn_t *tsdn, ecache_grow_t *ecache_grow) {
+	ecache_grow->next = sz_psz2ind(HUGEPAGE);
+	ecache_grow->limit = sz_psz2ind(SC_LARGE_MAXCLASS);
+	if (malloc_mutex_init(&ecache_grow->mtx, "extent_grow",
+	    WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+	return false;
+}
+
+void
+ecache_grow_prefork(tsdn_t *tsdn, ecache_grow_t *ecache_grow) {
+	malloc_mutex_prefork(tsdn, &ecache_grow->mtx);
+}
+
+void
+ecache_grow_postfork_parent(tsdn_t *tsdn, ecache_grow_t *ecache_grow) {
+	malloc_mutex_postfork_parent(tsdn, &ecache_grow->mtx);
+}
+
+void
+ecache_grow_postfork_child(tsdn_t *tsdn, ecache_grow_t *ecache_grow) {
+	malloc_mutex_postfork_child(tsdn, &ecache_grow->mtx);
+}
diff --git a/src/eset.c b/src/eset.c
index 88b9c8c7..e71868c8 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -8,13 +8,9 @@
 const bitmap_info_t eset_bitmap_info =
     BITMAP_INFO_INITIALIZER(SC_NPSIZES+1);
 
-bool
-eset_init(tsdn_t *tsdn, eset_t *eset, extent_state_t state,
+void
+eset_init(eset_t *eset, extent_state_t state,
     bool delay_coalesce) {
-	if (malloc_mutex_init(&eset->mtx, "extents", WITNESS_RANK_EXTENTS,
-	    malloc_mutex_rank_exclusive)) {
-		return true;
-	}
 	for (unsigned i = 0; i < SC_NPSIZES + 1; i++) {
 		edata_heap_new(&eset->heaps[i]);
 	}
@@ -23,7 +19,6 @@ eset_init(tsdn_t *tsdn, eset_t *eset, extent_state_t state,
 	atomic_store_zu(&eset->npages, 0, ATOMIC_RELAXED);
 	eset->state = state;
 	eset->delay_coalesce = delay_coalesce;
-	return false;
 }
 
 extent_state_t
@@ -63,8 +58,7 @@ eset_stats_sub(eset_t *eset, pszind_t pind, size_t sz) {
 }
 
 void
-eset_insert_locked(tsdn_t *tsdn, eset_t *eset, edata_t *edata) {
-	malloc_mutex_assert_owner(tsdn, &eset->mtx);
+eset_insert(eset_t *eset, edata_t *edata) {
 	assert(edata_state_get(edata) == eset->state);
 
 	size_t size = edata_size_get(edata);
@@ -94,8 +88,7 @@ eset_insert_locked(tsdn_t *tsdn, eset_t *eset, edata_t *edata) {
 }
 
 void
-eset_remove_locked(tsdn_t *tsdn, eset_t *eset, edata_t *edata) {
-	malloc_mutex_assert_owner(tsdn, &eset->mtx);
+eset_remove(eset_t *eset, edata_t *edata) {
 	assert(edata_state_get(edata) == eset->state);
 
 	size_t size = edata_size_get(edata);
@@ -114,9 +107,13 @@ eset_remove_locked(tsdn_t *tsdn, eset_t *eset, edata_t *edata) {
 	edata_list_remove(&eset->lru, edata);
 	size_t npages = size >> LG_PAGE;
 	/*
-	 * As in eset_insert_locked, we hold eset->mtx and so don't need atomic
+	 * As in eset_insert, we hold eset->mtx and so don't need atomic
 	 * operations for updating eset->npages.
 	 */
+	/*
+	 * This class is not thread-safe in general; we rely on external
+	 * synchronization for all mutating operations.
+	 */
 	size_t cur_extents_npages =
 	    atomic_load_zu(&eset->npages, ATOMIC_RELAXED);
 	assert(cur_extents_npages >= npages);
@@ -166,7 +163,7 @@ eset_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
  * large enough.
  */
 static edata_t *
-eset_first_fit_locked(tsdn_t *tsdn, eset_t *eset, size_t size) {
+eset_first_fit(eset_t *eset, size_t size) {
 	edata_t *ret = NULL;
 
 	pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(size));
@@ -211,16 +208,14 @@ eset_first_fit_locked(tsdn_t *tsdn, eset_t *eset, size_t size) {
 }
 
 edata_t *
-eset_fit_locked(tsdn_t *tsdn, eset_t *eset, size_t esize, size_t alignment) {
-	malloc_mutex_assert_owner(tsdn, &eset->mtx);
-
+eset_fit(eset_t *eset, size_t esize, size_t alignment) {
 	size_t max_size = esize + PAGE_CEILING(alignment) - PAGE;
 	/* Beware size_t wrap-around. */
 	if (max_size < esize) {
 		return NULL;
 	}
 
-	edata_t *edata = eset_first_fit_locked(tsdn, eset, max_size);
+	edata_t *edata = eset_first_fit(eset, max_size);
 
 	if (alignment > PAGE && edata == NULL) {
 		/*
@@ -233,18 +228,3 @@ eset_fit_locked(tsdn_t *tsdn, eset_t *eset, size_t esize, size_t alignment) {
 
 	return edata;
 }
-
-void
-eset_prefork(tsdn_t *tsdn, eset_t *eset) {
-	malloc_mutex_prefork(tsdn, &eset->mtx);
-}
-
-void
-eset_postfork_parent(tsdn_t *tsdn, eset_t *eset) {
-	malloc_mutex_postfork_parent(tsdn, &eset->mtx);
-}
-
-void
-eset_postfork_child(tsdn_t *tsdn, eset_t *eset) {
-	malloc_mutex_postfork_child(tsdn, &eset->mtx);
-}
diff --git a/src/extent2.c b/src/extent2.c
index 21f9cdbd..3b47e070 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -45,13 +45,13 @@ static atomic_zu_t highpages;
 
 static void extent_deregister(tsdn_t *tsdn, edata_t *edata);
 static edata_t *extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    eset_t *eset, void *new_addr, size_t usize, size_t pad, size_t alignment,
+    ecache_t *ecache, void *new_addr, size_t usize, size_t pad, size_t alignment,
     bool slab, szind_t szind, bool *zero, bool *commit, bool growing_retained);
 static edata_t *extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
-    ehooks_t *ehooks, rtree_ctx_t *rtree_ctx, eset_t *eset, edata_t *edata,
+    ehooks_t *ehooks, rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata,
     bool *coalesced, bool growing_retained);
 static void extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    eset_t *eset, edata_t *edata, bool growing_retained);
+    ecache_t *ecache, edata_t *edata, bool growing_retained);
 
 /******************************************************************************/
 
@@ -165,22 +165,22 @@ extent_addr_randomize(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 
 static bool
 extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, eset_t *eset, edata_t *edata) {
+    rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata) {
 	edata_state_set(edata, extent_state_active);
 	bool coalesced;
-	edata = extent_try_coalesce(tsdn, arena, ehooks, rtree_ctx, eset,
+	edata = extent_try_coalesce(tsdn, arena, ehooks, rtree_ctx, ecache,
 	    edata, &coalesced, false);
-	edata_state_set(edata, eset_state_get(eset));
+	edata_state_set(edata, eset_state_get(&ecache->eset));
 
 	if (!coalesced) {
 		return true;
 	}
-	eset_insert_locked(tsdn, eset, edata);
+	eset_insert(&ecache->eset, edata);
 	return false;
 }
 
 edata_t *
-extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
+extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
     szind_t szind, bool *zero, bool *commit) {
 	assert(size + pad != 0);
@@ -188,14 +188,14 @@ extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	edata_t *edata = extent_recycle(tsdn, arena, ehooks, eset, new_addr,
+	edata_t *edata = extent_recycle(tsdn, arena, ehooks, ecache, new_addr,
 	    size, pad, alignment, slab, szind, zero, commit, false);
 	assert(edata == NULL || edata_dumpable_get(edata));
 	return edata;
 }
 
 void
-extents_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
+extents_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
     edata_t *edata) {
 	assert(edata_base_get(edata) != NULL);
 	assert(edata_size_get(edata) != 0);
@@ -206,16 +206,16 @@ extents_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 	edata_addr_set(edata, edata_base_get(edata));
 	edata_zeroed_set(edata, false);
 
-	extent_record(tsdn, arena, ehooks, eset, edata, false);
+	extent_record(tsdn, arena, ehooks, ecache, edata, false);
 }
 
 edata_t *
-extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
+extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
     size_t npages_min) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	malloc_mutex_lock(tsdn, &eset->mtx);
+	malloc_mutex_lock(tsdn, &ecache->mtx);
 
 	/*
 	 * Get the LRU coalesced extent, if any.  If coalescing was delayed,
@@ -224,24 +224,23 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 	edata_t *edata;
 	while (true) {
 		/* Get the LRU extent, if any. */
-		edata = edata_list_first(&eset->lru);
+		edata = edata_list_first(&ecache->eset.lru);
 		if (edata == NULL) {
 			goto label_return;
 		}
 		/* Check the eviction limit. */
-		size_t extents_npages = atomic_load_zu(&eset->npages,
-		    ATOMIC_RELAXED);
+		size_t extents_npages = ecache_npages_get(ecache);
 		if (extents_npages <= npages_min) {
 			edata = NULL;
 			goto label_return;
 		}
-		eset_remove_locked(tsdn, eset, edata);
-		if (!eset->delay_coalesce) {
+		eset_remove(&ecache->eset, edata);
+		if (!ecache->eset.delay_coalesce) {
 			break;
 		}
 		/* Try to coalesce. */
 		if (extent_try_delayed_coalesce(tsdn, arena, ehooks, rtree_ctx,
-		    eset, edata)) {
+		    ecache, edata)) {
 			break;
 		}
 		/*
@@ -254,7 +253,7 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 	 * Either mark the extent active or deregister it to protect against
 	 * concurrent operations.
 	 */
-	switch (eset_state_get(eset)) {
+	switch (eset_state_get(&ecache->eset)) {
 	case extent_state_active:
 		not_reached();
 	case extent_state_dirty:
@@ -269,7 +268,7 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 	}
 
 label_return:
-	malloc_mutex_unlock(tsdn, &eset->mtx);
+	malloc_mutex_unlock(tsdn, &ecache->mtx);
 	return edata;
 }
 
@@ -278,8 +277,8 @@ label_return:
  * indicates OOM), e.g. when trying to split an existing extent.
  */
 static void
-extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
-    edata_t *edata, bool growing_retained) {
+extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    ecache_t *ecache, edata_t *edata, bool growing_retained) {
 	size_t sz = edata_size_get(edata);
 	if (config_stats) {
 		arena_stats_accum_zu(&arena->stats.abandoned_vm, sz);
@@ -288,7 +287,7 @@ extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 	 * Leak extent after making sure its pages have already been purged, so
 	 * that this is only a virtual memory leak.
 	 */
-	if (eset_state_get(eset) == extent_state_dirty) {
+	if (eset_state_get(&ecache->eset) == extent_state_dirty) {
 		if (extent_purge_lazy_impl(tsdn, arena, ehooks, edata, 0, sz,
 		    growing_retained)) {
 			extent_purge_forced_impl(tsdn, arena, ehooks, edata, 0,
@@ -299,30 +298,30 @@ extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 }
 
 static void
-extent_deactivate_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
+extent_deactivate_locked(tsdn_t *tsdn, arena_t *arena, ecache_t *ecache,
     edata_t *edata) {
 	assert(edata_arena_ind_get(edata) == arena_ind_get(arena));
 	assert(edata_state_get(edata) == extent_state_active);
 
-	edata_state_set(edata, eset_state_get(eset));
-	eset_insert_locked(tsdn, eset, edata);
+	edata_state_set(edata, eset_state_get(&ecache->eset));
+	eset_insert(&ecache->eset, edata);
 }
 
 static void
-extent_deactivate(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
+extent_deactivate(tsdn_t *tsdn, arena_t *arena, ecache_t *ecache,
     edata_t *edata) {
-	malloc_mutex_lock(tsdn, &eset->mtx);
-	extent_deactivate_locked(tsdn, arena, eset, edata);
-	malloc_mutex_unlock(tsdn, &eset->mtx);
+	malloc_mutex_lock(tsdn, &ecache->mtx);
+	extent_deactivate_locked(tsdn, arena, ecache, edata);
+	malloc_mutex_unlock(tsdn, &ecache->mtx);
 }
 
 static void
-extent_activate_locked(tsdn_t *tsdn, arena_t *arena, eset_t *eset,
+extent_activate_locked(tsdn_t *tsdn, arena_t *arena, ecache_t *ecache,
     edata_t *edata) {
 	assert(edata_arena_ind_get(edata) == arena_ind_get(arena));
-	assert(edata_state_get(edata) == eset_state_get(eset));
+	assert(edata_state_get(edata) == eset_state_get(&ecache->eset));
 
-	eset_remove_locked(tsdn, eset, edata);
+	eset_remove(&ecache->eset, edata);
 	edata_state_set(edata, extent_state_active);
 }
 
@@ -515,12 +514,12 @@ extent_deregister_no_gdump_sub(tsdn_t *tsdn, edata_t *edata) {
 }
 
 /*
- * Tries to find and remove an extent from eset that can be used for the
+ * Tries to find and remove an extent from ecache that can be used for the
  * given allocation request.
  */
 static edata_t *
 extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, eset_t *eset, void *new_addr, size_t size,
+    rtree_ctx_t *rtree_ctx, ecache_t *ecache, void *new_addr, size_t size,
     size_t pad, size_t alignment, bool slab, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
@@ -543,7 +542,7 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	}
 
 	size_t esize = size + pad;
-	malloc_mutex_lock(tsdn, &eset->mtx);
+	malloc_mutex_lock(tsdn, &ecache->mtx);
 	edata_t *edata;
 	if (new_addr != NULL) {
 		edata = extent_lock_edata_from_addr(tsdn, rtree_ctx, new_addr,
@@ -557,21 +556,22 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			assert(edata_base_get(edata) == new_addr);
 			if (edata_arena_ind_get(edata) != arena_ind_get(arena)
 			    || edata_size_get(edata) < esize
-			    || edata_state_get(edata) != eset_state_get(eset)) {
+			    || edata_state_get(edata)
+			    != eset_state_get(&ecache->eset)) {
 				edata = NULL;
 			}
 			extent_unlock_edata(tsdn, unlock_edata);
 		}
 	} else {
-		edata = eset_fit_locked(tsdn, eset, esize, alignment);
+		edata = eset_fit(&ecache->eset, esize, alignment);
 	}
 	if (edata == NULL) {
-		malloc_mutex_unlock(tsdn, &eset->mtx);
+		malloc_mutex_unlock(tsdn, &ecache->mtx);
 		return NULL;
 	}
 
-	extent_activate_locked(tsdn, arena, eset, edata);
-	malloc_mutex_unlock(tsdn, &eset->mtx);
+	extent_activate_locked(tsdn, arena, ecache, edata);
+	malloc_mutex_unlock(tsdn, &ecache->mtx);
 
 	return edata;
 }
@@ -580,7 +580,7 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
  * Given an allocation request and an extent guaranteed to be able to satisfy
  * it, this splits off lead and trail extents, leaving edata pointing to an
  * extent satisfying the allocation.
- * This function doesn't put lead or trail into any eset_t; it's the caller's
+ * This function doesn't put lead or trail into any ecache; it's the caller's
  * job to ensure that they can be reused.
  */
 typedef enum {
@@ -676,11 +676,11 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
  * This fulfills the indicated allocation request out of the given extent (which
  * the caller should have ensured was big enough).  If there's any unused space
  * before or after the resulting allocation, that space is given its own extent
- * and put back into eset.
+ * and put back into ecache.
  */
 static edata_t *
 extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, eset_t *eset, void *new_addr, size_t size,
+    rtree_ctx_t *rtree_ctx, ecache_t *ecache, void *new_addr, size_t size,
     size_t pad, size_t alignment, bool slab, szind_t szind, edata_t *edata,
     bool growing_retained) {
 	edata_t *lead;
@@ -697,19 +697,19 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    && !opt_retain) {
 		/*
 		 * Split isn't supported (implies Windows w/o retain).  Avoid
-		 * leaking the eset.
+		 * leaking the extent.
 		 */
 		assert(to_leak != NULL && lead == NULL && trail == NULL);
-		extent_deactivate(tsdn, arena, eset, to_leak);
+		extent_deactivate(tsdn, arena, ecache, to_leak);
 		return NULL;
 	}
 
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
-			extent_deactivate(tsdn, arena, eset, lead);
+			extent_deactivate(tsdn, arena, ecache, lead);
 		}
 		if (trail != NULL) {
-			extent_deactivate(tsdn, arena, eset, trail);
+			extent_deactivate(tsdn, arena, ecache, trail);
 		}
 		return edata;
 	} else {
@@ -724,7 +724,7 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		if (to_leak != NULL) {
 			void *leak = edata_base_get(to_leak);
 			extent_deregister_no_gdump_sub(tsdn, to_leak);
-			extents_abandon_vm(tsdn, arena, ehooks, eset, to_leak,
+			extents_abandon_vm(tsdn, arena, ehooks, ecache, to_leak,
 			    growing_retained);
 			assert(extent_lock_edata_from_addr(tsdn, rtree_ctx, leak,
 			    false) == NULL);
@@ -736,10 +736,10 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 /*
  * Tries to satisfy the given allocation request by reusing one of the extents
- * in the given eset_t.
+ * in the given ecache_t.
  */
 static edata_t *
-extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
+extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
     szind_t szind, bool *zero, bool *commit, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -752,13 +752,13 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
 	edata_t *edata = extent_recycle_extract(tsdn, arena, ehooks,
-	    rtree_ctx, eset, new_addr, size, pad, alignment, slab,
+	    rtree_ctx, ecache, new_addr, size, pad, alignment, slab,
 	    growing_retained);
 	if (edata == NULL) {
 		return NULL;
 	}
 
-	edata = extent_recycle_split(tsdn, arena, ehooks, rtree_ctx, eset,
+	edata = extent_recycle_split(tsdn, arena, ehooks, rtree_ctx, ecache,
 	    new_addr, size, pad, alignment, slab, szind, edata,
 	    growing_retained);
 	if (edata == NULL) {
@@ -768,7 +768,7 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 	if (*commit && !edata_committed_get(edata)) {
 		if (extent_commit_impl(tsdn, arena, ehooks, edata, 0,
 		    edata_size_get(edata), growing_retained)) {
-			extent_record(tsdn, arena, ehooks, eset, edata,
+			extent_record(tsdn, arena, ehooks, ecache, edata,
 			    growing_retained);
 			return NULL;
 		}
@@ -810,7 +810,7 @@ static edata_t *
 extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     size_t size, size_t pad, size_t alignment, bool slab, szind_t szind,
     bool *zero, bool *commit) {
-	malloc_mutex_assert_owner(tsdn, &arena->extent_grow_mtx);
+	malloc_mutex_assert_owner(tsdn, &arena->ecache_grow.mtx);
 	assert(pad == 0 || !slab);
 	assert(!*zero || !slab);
 
@@ -825,15 +825,15 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	 * satisfy this request.
 	 */
 	pszind_t egn_skip = 0;
-	size_t alloc_size = sz_pind2sz(arena->extent_grow_next + egn_skip);
+	size_t alloc_size = sz_pind2sz(arena->ecache_grow.next + egn_skip);
 	while (alloc_size < alloc_size_min) {
 		egn_skip++;
-		if (arena->extent_grow_next + egn_skip >=
+		if (arena->ecache_grow.next + egn_skip >=
 		    sz_psz2ind(SC_LARGE_MAXCLASS)) {
 			/* Outside legal range. */
 			goto label_err;
 		}
-		alloc_size = sz_pind2sz(arena->extent_grow_next + egn_skip);
+		alloc_size = sz_pind2sz(arena->ecache_grow.next + egn_skip);
 	}
 
 	edata_t *edata = edata_cache_get(tsdn, &arena->edata_cache,
@@ -881,11 +881,11 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
 			extent_record(tsdn, arena, ehooks,
-			    &arena->eset_retained, lead, true);
+			    &arena->ecache_retained, lead, true);
 		}
 		if (trail != NULL) {
 			extent_record(tsdn, arena, ehooks,
-			    &arena->eset_retained, trail, true);
+			    &arena->ecache_retained, trail, true);
 		}
 	} else {
 		/*
@@ -898,12 +898,12 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 				extent_gdump_add(tsdn, to_salvage);
 			}
 			extent_record(tsdn, arena, ehooks,
-			    &arena->eset_retained, to_salvage, true);
+			    &arena->ecache_retained, to_salvage, true);
 		}
 		if (to_leak != NULL) {
 			extent_deregister_no_gdump_sub(tsdn, to_leak);
 			extents_abandon_vm(tsdn, arena, ehooks,
-			    &arena->eset_retained, to_leak, true);
+			    &arena->ecache_retained, to_leak, true);
 		}
 		goto label_err;
 	}
@@ -912,7 +912,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		if (extent_commit_impl(tsdn, arena, ehooks, edata, 0,
 		    edata_size_get(edata), true)) {
 			extent_record(tsdn, arena, ehooks,
-			    &arena->eset_retained, edata, true);
+			    &arena->ecache_retained, edata, true);
 			goto label_err;
 		}
 		/* A successful commit should return zeroed memory. */
@@ -930,14 +930,14 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	 * Increment extent_grow_next if doing so wouldn't exceed the allowed
 	 * range.
 	 */
-	if (arena->extent_grow_next + egn_skip + 1 <=
-	    arena->retain_grow_limit) {
-		arena->extent_grow_next += egn_skip + 1;
+	if (arena->ecache_grow.next + egn_skip + 1 <=
+	    arena->ecache_grow.limit) {
+		arena->ecache_grow.next += egn_skip + 1;
 	} else {
-		arena->extent_grow_next = arena->retain_grow_limit;
+		arena->ecache_grow.next = arena->ecache_grow.limit;
 	}
 	/* All opportunities for failure are past. */
-	malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
+	malloc_mutex_unlock(tsdn, &arena->ecache_grow.mtx);
 
 	if (config_prof) {
 		/* Adjust gdump stats now that extent is final size. */
@@ -962,7 +962,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 	return edata;
 label_err:
-	malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
+	malloc_mutex_unlock(tsdn, &arena->ecache_grow.mtx);
 	return NULL;
 }
 
@@ -973,13 +973,13 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	assert(size != 0);
 	assert(alignment != 0);
 
-	malloc_mutex_lock(tsdn, &arena->extent_grow_mtx);
+	malloc_mutex_lock(tsdn, &arena->ecache_grow.mtx);
 
 	edata_t *edata = extent_recycle(tsdn, arena, ehooks,
-	    &arena->eset_retained, new_addr, size, pad, alignment, slab,
+	    &arena->ecache_retained, new_addr, size, pad, alignment, slab,
 	    szind, zero, commit, true);
 	if (edata != NULL) {
-		malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
+		malloc_mutex_unlock(tsdn, &arena->ecache_grow.mtx);
 		if (config_prof) {
 			extent_gdump_add(tsdn, edata);
 		}
@@ -988,9 +988,9 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		    alignment, slab, szind, zero, commit);
 		/* extent_grow_retained() always releases extent_grow_mtx. */
 	} else {
-		malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx);
+		malloc_mutex_unlock(tsdn, &arena->ecache_grow.mtx);
 	}
-	malloc_mutex_assert_not_owner(tsdn, &arena->extent_grow_mtx);
+	malloc_mutex_assert_not_owner(tsdn, &arena->ecache_grow.mtx);
 
 	return edata;
 }
@@ -1054,7 +1054,7 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 }
 
 static bool
-extent_can_coalesce(arena_t *arena, eset_t *eset, const edata_t *inner,
+extent_can_coalesce(arena_t *arena, ecache_t *ecache, const edata_t *inner,
     const edata_t *outer) {
 	assert(edata_arena_ind_get(inner) == arena_ind_get(arena));
 	if (edata_arena_ind_get(outer) != arena_ind_get(arena)) {
@@ -1062,7 +1062,7 @@ extent_can_coalesce(arena_t *arena, eset_t *eset, const edata_t *inner,
 	}
 
 	assert(edata_state_get(inner) == extent_state_active);
-	if (edata_state_get(outer) != eset->state) {
+	if (edata_state_get(outer) != ecache->eset.state) {
 		return false;
 	}
 
@@ -1074,19 +1074,20 @@ extent_can_coalesce(arena_t *arena, eset_t *eset, const edata_t *inner,
 }
 
 static bool
-extent_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
-    edata_t *inner, edata_t *outer, bool forward, bool growing_retained) {
-	assert(extent_can_coalesce(arena, eset, inner, outer));
+extent_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    ecache_t *ecache, edata_t *inner, edata_t *outer, bool forward,
+    bool growing_retained) {
+	assert(extent_can_coalesce(arena, ecache, inner, outer));
 
-	extent_activate_locked(tsdn, arena, eset, outer);
+	extent_activate_locked(tsdn, arena, ecache, outer);
 
-	malloc_mutex_unlock(tsdn, &eset->mtx);
+	malloc_mutex_unlock(tsdn, &ecache->mtx);
 	bool err = extent_merge_impl(tsdn, arena, ehooks,
 	    forward ? inner : outer, forward ? outer : inner, growing_retained);
-	malloc_mutex_lock(tsdn, &eset->mtx);
+	malloc_mutex_lock(tsdn, &ecache->mtx);
 
 	if (err) {
-		extent_deactivate_locked(tsdn, arena, eset, outer);
+		extent_deactivate_locked(tsdn, arena, ecache, outer);
 	}
 
 	return err;
@@ -1094,7 +1095,7 @@ extent_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 
 static edata_t *
 extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, eset_t *eset, edata_t *edata, bool *coalesced,
+    rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata, bool *coalesced,
     bool growing_retained, bool inactive_only) {
 	/*
 	 * We avoid checking / locking inactive neighbors for large size
@@ -1114,19 +1115,19 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		    edata_past_get(edata), inactive_only);
 		if (next != NULL) {
 			/*
-			 * eset->mtx only protects against races for
-			 * like-state eset, so call extent_can_coalesce()
+			 * ecache->mtx only protects against races for
+			 * like-state extents, so call extent_can_coalesce()
 			 * before releasing next's pool lock.
 			 */
-			bool can_coalesce = extent_can_coalesce(arena, eset,
+			bool can_coalesce = extent_can_coalesce(arena, ecache,
 			    edata, next);
 
 			extent_unlock_edata(tsdn, next);
 
 			if (can_coalesce && !extent_coalesce(tsdn, arena,
-			    ehooks, eset, edata, next, true,
+			    ehooks, ecache, edata, next, true,
 			    growing_retained)) {
-				if (eset->delay_coalesce) {
+				if (ecache->eset.delay_coalesce) {
 					/* Do minimal coalescing. */
 					*coalesced = true;
 					return edata;
@@ -1139,15 +1140,15 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		edata_t *prev = extent_lock_edata_from_addr(tsdn, rtree_ctx,
 		    edata_before_get(edata), inactive_only);
 		if (prev != NULL) {
-			bool can_coalesce = extent_can_coalesce(arena, eset,
+			bool can_coalesce = extent_can_coalesce(arena, ecache,
 			    edata, prev);
 			extent_unlock_edata(tsdn, prev);
 
 			if (can_coalesce && !extent_coalesce(tsdn, arena,
-			    ehooks, eset, edata, prev, false,
+			    ehooks, ecache, edata, prev, false,
 			    growing_retained)) {
 				edata = prev;
-				if (eset->delay_coalesce) {
+				if (ecache->eset.delay_coalesce) {
 					/* Do minimal coalescing. */
 					*coalesced = true;
 					return edata;
@@ -1157,7 +1158,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		}
 	} while (again);
 
-	if (eset->delay_coalesce) {
+	if (ecache->eset.delay_coalesce) {
 		*coalesced = false;
 	}
 	return edata;
@@ -1165,35 +1166,35 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 static edata_t *
 extent_try_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, eset_t *eset, edata_t *edata, bool *coalesced,
+    rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata, bool *coalesced,
     bool growing_retained) {
-	return extent_try_coalesce_impl(tsdn, arena, ehooks, rtree_ctx, eset,
+	return extent_try_coalesce_impl(tsdn, arena, ehooks, rtree_ctx, ecache,
 	    edata, coalesced, growing_retained, false);
 }
 
 static edata_t *
 extent_try_coalesce_large(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, eset_t *eset, edata_t *edata, bool *coalesced,
+    rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata, bool *coalesced,
     bool growing_retained) {
-	return extent_try_coalesce_impl(tsdn, arena, ehooks, rtree_ctx, eset,
+	return extent_try_coalesce_impl(tsdn, arena, ehooks, rtree_ctx, ecache,
 	    edata, coalesced, growing_retained, true);
 }
 
 /*
  * Does the metadata management portions of putting an unused extent into the
- * given eset_t (coalesces, deregisters slab interiors, the heap operations).
+ * given ecache_t (coalesces, deregisters slab interiors, the heap operations).
  */
 static void
-extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
+extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
     edata_t *edata, bool growing_retained) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	assert((eset_state_get(eset) != extent_state_dirty &&
-	    eset_state_get(eset) != extent_state_muzzy) ||
+	assert((eset_state_get(&ecache->eset) != extent_state_dirty &&
+	    eset_state_get(&ecache->eset) != extent_state_muzzy) ||
 	    !edata_zeroed_get(edata));
 
-	malloc_mutex_lock(tsdn, &eset->mtx);
+	malloc_mutex_lock(tsdn, &ecache->mtx);
 
 	edata_szind_set(edata, SC_NSIZES);
 	if (edata_slab_get(edata)) {
@@ -1204,29 +1205,29 @@ extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, eset_t *eset,
 	assert(rtree_edata_read(tsdn, &extents_rtree, rtree_ctx,
 	    (uintptr_t)edata_base_get(edata), true) == edata);
 
-	if (!eset->delay_coalesce) {
+	if (!ecache->eset.delay_coalesce) {
 		edata = extent_try_coalesce(tsdn, arena, ehooks, rtree_ctx,
-		    eset, edata, NULL, growing_retained);
+		    ecache, edata, NULL, growing_retained);
 	} else if (edata_size_get(edata) >= SC_LARGE_MINCLASS) {
-		assert(eset == &arena->eset_dirty);
-		/* Always coalesce large eset eagerly. */
+		assert(ecache == &arena->ecache_dirty);
+		/* Always coalesce large extents eagerly. */
 		bool coalesced;
 		do {
 			assert(edata_state_get(edata) == extent_state_active);
 			edata = extent_try_coalesce_large(tsdn, arena, ehooks,
-			    rtree_ctx, eset, edata, &coalesced,
+			    rtree_ctx, ecache, edata, &coalesced,
 			    growing_retained);
 		} while (coalesced);
 		if (edata_size_get(edata) >= oversize_threshold) {
 			/* Shortcut to purge the oversize extent eagerly. */
-			malloc_mutex_unlock(tsdn, &eset->mtx);
+			malloc_mutex_unlock(tsdn, &ecache->mtx);
 			arena_decay_extent(tsdn, arena, ehooks, edata);
 			return;
 		}
 	}
-	extent_deactivate_locked(tsdn, arena, eset, edata);
+	extent_deactivate_locked(tsdn, arena, ecache, edata);
 
-	malloc_mutex_unlock(tsdn, &eset->mtx);
+	malloc_mutex_unlock(tsdn, &ecache->mtx);
 }
 
 void
@@ -1312,7 +1313,8 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		extent_gdump_sub(tsdn, edata);
 	}
 
-	extent_record(tsdn, arena, ehooks, &arena->eset_retained, edata, false);
+	extent_record(tsdn, arena, ehooks, &arena->ecache_retained, edata,
+	    false);
 }
 
 void
diff --git a/src/large.c b/src/large.c
index 67b47453..b8b70f4e 100644
--- a/src/large.c
+++ b/src/large.c
@@ -149,10 +149,10 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 	bool commit = true;
 	edata_t *trail;
 	bool new_mapping;
-	if ((trail = extents_alloc(tsdn, arena, ehooks, &arena->eset_dirty,
+	if ((trail = extents_alloc(tsdn, arena, ehooks, &arena->ecache_dirty,
 	    edata_past_get(edata), trailsize, 0, CACHELINE, false, SC_NSIZES,
 	    &is_zeroed_trail, &commit)) != NULL
-	    || (trail = extents_alloc(tsdn, arena, ehooks, &arena->eset_muzzy,
+	    || (trail = extents_alloc(tsdn, arena, ehooks, &arena->ecache_muzzy,
 	    edata_past_get(edata), trailsize, 0, CACHELINE, false, SC_NSIZES,
 	    &is_zeroed_trail, &commit)) != NULL) {
 		if (config_stats) {
diff --git a/test/unit/retained.c b/test/unit/retained.c
index 7993fd3d..e6b6f7b7 100644
--- a/test/unit/retained.c
+++ b/test/unit/retained.c
@@ -142,7 +142,7 @@ TEST_BEGIN(test_retained) {
 		size_t usable = 0;
 		size_t fragmented = 0;
 		for (pszind_t pind = sz_psz2ind(HUGEPAGE); pind <
-		    arena->extent_grow_next; pind++) {
+		    arena->ecache_grow.next; pind++) {
 			size_t psz = sz_pind2sz(pind);
 			size_t psz_fragmented = psz % esz;
 			size_t psz_usable = psz - psz_fragmented;

From 98eb40e563bd2c42bfd5d7275584a4aa69a2b3b7 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 12 Dec 2019 16:33:19 -0800
Subject: [PATCH 1471/2608] Move delay_coalesce from the eset to the ecache.

---
 include/jemalloc/internal/ecache.h |  5 +++++
 include/jemalloc/internal/eset.h   | 11 +++--------
 src/ecache.c                       |  3 ++-
 src/eset.c                         | 12 +++++-------
 src/extent2.c                      | 13 +++++++------
 5 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/include/jemalloc/internal/ecache.h b/include/jemalloc/internal/ecache.h
index 70857203..05388b62 100644
--- a/include/jemalloc/internal/ecache.h
+++ b/include/jemalloc/internal/ecache.h
@@ -8,6 +8,11 @@ typedef struct ecache_s ecache_t;
 struct ecache_s {
 	malloc_mutex_t mtx;
 	eset_t eset;
+	/*
+	 * If true, delay coalescing until eviction; otherwise coalesce during
+	 * deallocation.
+	 */
+	bool delay_coalesce;
 };
 
 typedef struct ecache_grow_s ecache_grow_t;
diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index bbc6b5cd..7a1055c2 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -32,15 +32,9 @@ struct eset_s {
 
 	/* All stored extents must be in the same state. */
 	extent_state_t state;
-
-	/*
-	 * If true, delay coalescing until eviction; otherwise coalesce during
-	 * deallocation.
-	 */
-	bool delay_coalesce;
 };
 
-void eset_init(eset_t *eset, extent_state_t state, bool delay_coalesce);
+void eset_init(eset_t *eset, extent_state_t state);
 extent_state_t eset_state_get(const eset_t *eset);
 
 size_t eset_npages_get(eset_t *eset);
@@ -55,6 +49,7 @@ void eset_remove(eset_t *eset, edata_t *edata);
  * Select an extent from this eset of the given size and alignment.  Returns
  * null if no such item could be found.
  */
-edata_t *eset_fit(eset_t *eset, size_t esize, size_t alignment);
+edata_t *eset_fit(eset_t *eset, size_t esize, size_t alignment,
+    bool delay_coalesce);
 
 #endif /* JEMALLOC_INTERNAL_ESET_H */
diff --git a/src/ecache.c b/src/ecache.c
index 7155de35..d5ed2356 100644
--- a/src/ecache.c
+++ b/src/ecache.c
@@ -8,7 +8,8 @@ ecache_init(tsdn_t *tsdn, ecache_t *ecache, extent_state_t state,
 	    malloc_mutex_rank_exclusive)) {
 		return true;
 	}
-	eset_init(&ecache->eset, state, delay_coalesce);
+	ecache->delay_coalesce = delay_coalesce;
+	eset_init(&ecache->eset, state);
 	return false;
 }
 
diff --git a/src/eset.c b/src/eset.c
index e71868c8..9d3dedae 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -9,8 +9,7 @@ const bitmap_info_t eset_bitmap_info =
     BITMAP_INFO_INITIALIZER(SC_NPSIZES+1);
 
 void
-eset_init(eset_t *eset, extent_state_t state,
-    bool delay_coalesce) {
+eset_init(eset_t *eset, extent_state_t state) {
 	for (unsigned i = 0; i < SC_NPSIZES + 1; i++) {
 		edata_heap_new(&eset->heaps[i]);
 	}
@@ -18,7 +17,6 @@ eset_init(eset_t *eset, extent_state_t state,
 	edata_list_init(&eset->lru);
 	atomic_store_zu(&eset->npages, 0, ATOMIC_RELAXED);
 	eset->state = state;
-	eset->delay_coalesce = delay_coalesce;
 }
 
 extent_state_t
@@ -163,7 +161,7 @@ eset_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
  * large enough.
  */
 static edata_t *
-eset_first_fit(eset_t *eset, size_t size) {
+eset_first_fit(eset_t *eset, size_t size, bool delay_coalesce) {
 	edata_t *ret = NULL;
 
 	pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(size));
@@ -191,7 +189,7 @@ eset_first_fit(eset_t *eset, size_t size) {
 		 *
 		 * Only do check for dirty eset (delay_coalesce).
 		 */
-		if (eset->delay_coalesce &&
+		if (delay_coalesce &&
 		    (sz_pind2sz(i) >> opt_lg_extent_max_active_fit) > size) {
 			break;
 		}
@@ -208,14 +206,14 @@ eset_first_fit(eset_t *eset, size_t size) {
 }
 
 edata_t *
-eset_fit(eset_t *eset, size_t esize, size_t alignment) {
+eset_fit(eset_t *eset, size_t esize, size_t alignment, bool delay_coalesce) {
 	size_t max_size = esize + PAGE_CEILING(alignment) - PAGE;
 	/* Beware size_t wrap-around. */
 	if (max_size < esize) {
 		return NULL;
 	}
 
-	edata_t *edata = eset_first_fit(eset, max_size);
+	edata_t *edata = eset_first_fit(eset, max_size, delay_coalesce);
 
 	if (alignment > PAGE && edata == NULL) {
 		/*
diff --git a/src/extent2.c b/src/extent2.c
index 3b47e070..96d4a112 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -235,7 +235,7 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 			goto label_return;
 		}
 		eset_remove(&ecache->eset, edata);
-		if (!ecache->eset.delay_coalesce) {
+		if (!ecache->delay_coalesce) {
 			break;
 		}
 		/* Try to coalesce. */
@@ -563,7 +563,8 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			extent_unlock_edata(tsdn, unlock_edata);
 		}
 	} else {
-		edata = eset_fit(&ecache->eset, esize, alignment);
+		edata = eset_fit(&ecache->eset, esize, alignment,
+		    ecache->delay_coalesce);
 	}
 	if (edata == NULL) {
 		malloc_mutex_unlock(tsdn, &ecache->mtx);
@@ -1127,7 +1128,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			if (can_coalesce && !extent_coalesce(tsdn, arena,
 			    ehooks, ecache, edata, next, true,
 			    growing_retained)) {
-				if (ecache->eset.delay_coalesce) {
+				if (ecache->delay_coalesce) {
 					/* Do minimal coalescing. */
 					*coalesced = true;
 					return edata;
@@ -1148,7 +1149,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			    ehooks, ecache, edata, prev, false,
 			    growing_retained)) {
 				edata = prev;
-				if (ecache->eset.delay_coalesce) {
+				if (ecache->delay_coalesce) {
 					/* Do minimal coalescing. */
 					*coalesced = true;
 					return edata;
@@ -1158,7 +1159,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		}
 	} while (again);
 
-	if (ecache->eset.delay_coalesce) {
+	if (ecache->delay_coalesce) {
 		*coalesced = false;
 	}
 	return edata;
@@ -1205,7 +1206,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 	assert(rtree_edata_read(tsdn, &extents_rtree, rtree_ctx,
 	    (uintptr_t)edata_base_get(edata), true) == edata);
 
-	if (!ecache->eset.delay_coalesce) {
+	if (!ecache->delay_coalesce) {
 		edata = extent_try_coalesce(tsdn, arena, ehooks, rtree_ctx,
 		    ecache, edata, NULL, growing_retained);
 	} else if (edata_size_get(edata) >= SC_LARGE_MINCLASS) {

From d8b0b66c6c0818f83661f69a5eba05924efe0755 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 12 Dec 2019 16:44:49 -0800
Subject: [PATCH 1472/2608] Put extent_state_t into ecache as well as eset.

---
 include/jemalloc/internal/ecache.h |  2 ++
 include/jemalloc/internal/eset.h   |  6 ++++--
 src/arena.c                        |  2 +-
 src/ecache.c                       |  1 +
 src/eset.c                         |  5 -----
 src/extent2.c                      | 18 +++++++++---------
 6 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/include/jemalloc/internal/ecache.h b/include/jemalloc/internal/ecache.h
index 05388b62..8532192c 100644
--- a/include/jemalloc/internal/ecache.h
+++ b/include/jemalloc/internal/ecache.h
@@ -8,6 +8,8 @@ typedef struct ecache_s ecache_t;
 struct ecache_s {
 	malloc_mutex_t mtx;
 	eset_t eset;
+	/* All stored extents must be in the same state. */
+	extent_state_t state;
 	/*
 	 * If true, delay coalescing until eviction; otherwise coalesce during
 	 * deallocation.
diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index 7a1055c2..5c1051a6 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -30,12 +30,14 @@ struct eset_s {
 	/* Page sum for all extents in heaps. */
 	atomic_zu_t npages;
 
-	/* All stored extents must be in the same state. */
+	/*
+	 * A duplication of the data in the containing ecache.  We use this only
+	 * for assertions on the states of the passed-in extents.
+	 */
 	extent_state_t state;
 };
 
 void eset_init(eset_t *eset, extent_state_t state);
-extent_state_t eset_state_get(const eset_t *eset);
 
 size_t eset_npages_get(eset_t *eset);
 /* Get the number of extents in the given page size index. */
diff --git a/src/arena.c b/src/arena.c
index 2652207a..5407ab90 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -857,7 +857,7 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		size_t npages = edata_size_get(edata) >> LG_PAGE;
 		npurged += npages;
 		edata_list_remove(decay_extents, edata);
-		switch (eset_state_get(&ecache->eset)) {
+		switch (ecache->state) {
 		case extent_state_active:
 			not_reached();
 		case extent_state_dirty:
diff --git a/src/ecache.c b/src/ecache.c
index d5ed2356..a57a0a6f 100644
--- a/src/ecache.c
+++ b/src/ecache.c
@@ -8,6 +8,7 @@ ecache_init(tsdn_t *tsdn, ecache_t *ecache, extent_state_t state,
 	    malloc_mutex_rank_exclusive)) {
 		return true;
 	}
+	ecache->state = state;
 	ecache->delay_coalesce = delay_coalesce;
 	eset_init(&ecache->eset, state);
 	return false;
diff --git a/src/eset.c b/src/eset.c
index 9d3dedae..16ca72d1 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -19,11 +19,6 @@ eset_init(eset_t *eset, extent_state_t state) {
 	eset->state = state;
 }
 
-extent_state_t
-eset_state_get(const eset_t *eset) {
-	return eset->state;
-}
-
 size_t
 eset_npages_get(eset_t *eset) {
 	return atomic_load_zu(&eset->npages, ATOMIC_RELAXED);
diff --git a/src/extent2.c b/src/extent2.c
index 96d4a112..96c6ca68 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -170,7 +170,7 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	bool coalesced;
 	edata = extent_try_coalesce(tsdn, arena, ehooks, rtree_ctx, ecache,
 	    edata, &coalesced, false);
-	edata_state_set(edata, eset_state_get(&ecache->eset));
+	edata_state_set(edata, ecache->state);
 
 	if (!coalesced) {
 		return true;
@@ -253,7 +253,7 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 	 * Either mark the extent active or deregister it to protect against
 	 * concurrent operations.
 	 */
-	switch (eset_state_get(&ecache->eset)) {
+	switch (ecache->state) {
 	case extent_state_active:
 		not_reached();
 	case extent_state_dirty:
@@ -287,7 +287,7 @@ extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	 * Leak extent after making sure its pages have already been purged, so
 	 * that this is only a virtual memory leak.
 	 */
-	if (eset_state_get(&ecache->eset) == extent_state_dirty) {
+	if (ecache->state == extent_state_dirty) {
 		if (extent_purge_lazy_impl(tsdn, arena, ehooks, edata, 0, sz,
 		    growing_retained)) {
 			extent_purge_forced_impl(tsdn, arena, ehooks, edata, 0,
@@ -303,7 +303,7 @@ extent_deactivate_locked(tsdn_t *tsdn, arena_t *arena, ecache_t *ecache,
 	assert(edata_arena_ind_get(edata) == arena_ind_get(arena));
 	assert(edata_state_get(edata) == extent_state_active);
 
-	edata_state_set(edata, eset_state_get(&ecache->eset));
+	edata_state_set(edata, ecache->state);
 	eset_insert(&ecache->eset, edata);
 }
 
@@ -319,7 +319,7 @@ static void
 extent_activate_locked(tsdn_t *tsdn, arena_t *arena, ecache_t *ecache,
     edata_t *edata) {
 	assert(edata_arena_ind_get(edata) == arena_ind_get(arena));
-	assert(edata_state_get(edata) == eset_state_get(&ecache->eset));
+	assert(edata_state_get(edata) == ecache->state);
 
 	eset_remove(&ecache->eset, edata);
 	edata_state_set(edata, extent_state_active);
@@ -557,7 +557,7 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			if (edata_arena_ind_get(edata) != arena_ind_get(arena)
 			    || edata_size_get(edata) < esize
 			    || edata_state_get(edata)
-			    != eset_state_get(&ecache->eset)) {
+			    != ecache->state) {
 				edata = NULL;
 			}
 			extent_unlock_edata(tsdn, unlock_edata);
@@ -1063,7 +1063,7 @@ extent_can_coalesce(arena_t *arena, ecache_t *ecache, const edata_t *inner,
 	}
 
 	assert(edata_state_get(inner) == extent_state_active);
-	if (edata_state_get(outer) != ecache->eset.state) {
+	if (edata_state_get(outer) != ecache->state) {
 		return false;
 	}
 
@@ -1191,8 +1191,8 @@ extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	assert((eset_state_get(&ecache->eset) != extent_state_dirty &&
-	    eset_state_get(&ecache->eset) != extent_state_muzzy) ||
+	assert((ecache->state != extent_state_dirty &&
+	    ecache->state != extent_state_muzzy) ||
 	    !edata_zeroed_get(edata));
 
 	malloc_mutex_lock(tsdn, &ecache->mtx);

From ae23e5f42676bc7c851c8ea8036dfa87763be11b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 12 Dec 2019 17:30:28 -0800
Subject: [PATCH 1473/2608] Unify extent_alloc_wrapper with the other wrappers.

Previously, it was really more like extents_alloc (it looks in an ecache for an
extent to reuse as its primary allocation pathway).  Make that pathway more
explciitly like extents_alloc, and rename extent_alloc_wrapper_hard accordingly.
---
 include/jemalloc/internal/extent2.h |  4 ++
 src/arena.c                         |  9 ++--
 src/extent2.c                       | 66 ++++++++++++++++-------------
 src/large.c                         |  7 +--
 4 files changed, 50 insertions(+), 36 deletions(-)

diff --git a/include/jemalloc/internal/extent2.h b/include/jemalloc/internal/extent2.h
index 80e789e4..d74e2323 100644
--- a/include/jemalloc/internal/extent2.h
+++ b/include/jemalloc/internal/extent2.h
@@ -29,10 +29,14 @@ extern rtree_t extents_rtree;
 edata_t *extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
     bool slab, szind_t szind, bool *zero, bool *commit);
+edata_t *extents_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
+    bool slab, szind_t szind, bool *zero, bool *commit);
 void extents_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata);
 edata_t *extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, size_t npages_min);
+
 edata_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
     szind_t szind, bool *zero, bool *commit);
diff --git a/src/arena.c b/src/arena.c
index 5407ab90..9edeaf6b 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -444,8 +444,9 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	}
 	size_t size = usize + sz_large_pad;
 	if (edata == NULL) {
-		edata = extent_alloc_wrapper(tsdn, arena, ehooks, NULL, usize,
-		    sz_large_pad, alignment, false, szind, zero, &commit);
+		edata = extents_alloc_grow(tsdn, arena, ehooks,
+		    &arena->ecache_retained, NULL, usize, sz_large_pad,
+		    alignment, false, szind, zero, &commit);
 		if (config_stats) {
 			/*
 			 * edata may be NULL on OOM, but in that case mapped_add
@@ -1210,8 +1211,8 @@ arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 	zero = false;
 	commit = true;
-	slab = extent_alloc_wrapper(tsdn, arena, ehooks, NULL,
-	    bin_info->slab_size, 0, PAGE, true, szind, &zero, &commit);
+	slab = extents_alloc_grow(tsdn, arena, ehooks, &arena->ecache_retained,
+	    NULL, bin_info->slab_size, 0, PAGE, true, szind, &zero, &commit);
 
 	if (config_stats && slab != NULL) {
 		arena_stats_mapped_add(tsdn, &arena->stats,
diff --git a/src/extent2.c b/src/extent2.c
index 96c6ca68..7ecf332a 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -52,6 +52,9 @@ static edata_t *extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
     bool *coalesced, bool growing_retained);
 static void extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool growing_retained);
+static edata_t *extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
+    ehooks_t *ehooks, void *new_addr, size_t size, size_t pad, size_t alignment,
+    bool slab, szind_t szind, bool *zero, bool *commit);
 
 /******************************************************************************/
 
@@ -194,6 +197,35 @@ extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 	return edata;
 }
 
+edata_t *
+extents_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
+    bool slab, szind_t szind, bool *zero, bool *commit) {
+	assert(size + pad != 0);
+	assert(alignment != 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	edata_t *edata = extent_alloc_retained(tsdn, arena, ehooks, new_addr,
+	    size, pad, alignment, slab, szind, zero, commit);
+	if (edata == NULL) {
+		if (opt_retain && new_addr != NULL) {
+			/*
+			 * When retain is enabled and new_addr is set, we do not
+			 * attempt extent_alloc_wrapper which does mmap that is
+			 * very unlikely to succeed (unless it happens to be at
+			 * the end).
+			 */
+			return NULL;
+		}
+		edata = extent_alloc_wrapper(tsdn, arena, ehooks,
+		    new_addr, size, pad, alignment, slab, szind, zero, commit);
+	}
+
+	assert(edata == NULL || edata_dumpable_get(edata));
+	return edata;
+}
+
 void
 extents_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
     edata_t *edata) {
@@ -996,10 +1028,13 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	return edata;
 }
 
-static edata_t *
-extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+edata_t *
+extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
     szind_t szind, bool *zero, bool *commit) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
 	size_t esize = size + pad;
 	edata_t *edata = edata_cache_get(tsdn, &arena->edata_cache,
 	    arena->base);
@@ -1027,33 +1062,6 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	return edata;
 }
 
-edata_t *
-extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    szind_t szind, bool *zero, bool *commit) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
-	edata_t *edata = extent_alloc_retained(tsdn, arena, ehooks, new_addr,
-	    size, pad, alignment, slab, szind, zero, commit);
-	if (edata == NULL) {
-		if (opt_retain && new_addr != NULL) {
-			/*
-			 * When retain is enabled and new_addr is set, we do not
-			 * attempt extent_alloc_wrapper_hard which does mmap
-			 * that is very unlikely to succeed (unless it happens
-			 * to be at the end).
-			 */
-			return NULL;
-		}
-		edata = extent_alloc_wrapper_hard(tsdn, arena, ehooks,
-		    new_addr, size, pad, alignment, slab, szind, zero, commit);
-	}
-
-	assert(edata == NULL || edata_dumpable_get(edata));
-	return edata;
-}
-
 static bool
 extent_can_coalesce(arena_t *arena, ecache_t *ecache, const edata_t *inner,
     const edata_t *outer) {
diff --git a/src/large.c b/src/large.c
index b8b70f4e..4a3ad853 100644
--- a/src/large.c
+++ b/src/large.c
@@ -159,9 +159,10 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 			new_mapping = false;
 		}
 	} else {
-		if ((trail = extent_alloc_wrapper(tsdn, arena, ehooks,
-		    edata_past_get(edata), trailsize, 0, CACHELINE, false,
-		    SC_NSIZES, &is_zeroed_trail, &commit)) == NULL) {
+		if ((trail = extents_alloc_grow(tsdn, arena, ehooks,
+		    &arena->ecache_retained, edata_past_get(edata), trailsize,
+		    0, CACHELINE, false, SC_NSIZES, &is_zeroed_trail, &commit))
+			== NULL) {
 			return true;
 		}
 		if (config_stats) {

From c792f3e4abd856933d4043a2b8f5fc2477c5d93d Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 12 Dec 2019 18:28:37 -0800
Subject: [PATCH 1474/2608] edata_cache: Remember the associated base_t.

This will save us some trouble down the line when we stop passing arena pointers
everywhere; we won't have to pass around a base_t pointer either.
---
 include/jemalloc/internal/edata_cache.h |  7 ++++---
 src/arena.c                             |  2 +-
 src/edata_cache.c                       | 16 +++++++++++-----
 src/extent2.c                           |  9 +++------
 src/extent_dss.c                        |  2 +-
 5 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/include/jemalloc/internal/edata_cache.h b/include/jemalloc/internal/edata_cache.h
index fc184084..9cb0d1c8 100644
--- a/include/jemalloc/internal/edata_cache.h
+++ b/include/jemalloc/internal/edata_cache.h
@@ -12,12 +12,13 @@ struct edata_cache_s {
 	edata_tree_t avail;
 	atomic_zu_t count;
 	malloc_mutex_t mtx;
+	base_t *base;
 };
 
-bool edata_cache_init(edata_cache_t *edata_cache);
-edata_t *edata_cache_get(tsdn_t *tsdn, edata_cache_t *edata_cache,
-    base_t *base);
+bool edata_cache_init(edata_cache_t *edata_cache, base_t *base);
+edata_t *edata_cache_get(tsdn_t *tsdn, edata_cache_t *edata_cache);
 void edata_cache_put(tsdn_t *tsdn, edata_cache_t *edata_cache, edata_t *edata);
+
 void edata_cache_prefork(tsdn_t *tsdn, edata_cache_t *edata_cache);
 void edata_cache_postfork_parent(tsdn_t *tsdn, edata_cache_t *edata_cache);
 void edata_cache_postfork_child(tsdn_t *tsdn, edata_cache_t *edata_cache);
diff --git a/src/arena.c b/src/arena.c
index 9edeaf6b..b5c8606f 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2052,7 +2052,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		goto label_error;
 	}
 
-	if (edata_cache_init(&arena->edata_cache)) {
+	if (edata_cache_init(&arena->edata_cache, base)) {
 		goto label_error;
 	}
 
diff --git a/src/edata_cache.c b/src/edata_cache.c
index 4d026029..1af7b96f 100644
--- a/src/edata_cache.c
+++ b/src/edata_cache.c
@@ -2,23 +2,29 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 bool
-edata_cache_init(edata_cache_t *edata_cache) {
+edata_cache_init(edata_cache_t *edata_cache, base_t *base) {
+	edata_avail_new(&edata_cache->avail);
+	/*
+	 * This is not strictly necessary, since the edata_cache_t is only
+	 * created inside an arena, which is zeroed on creation.  But this is
+	 * handy as a safety measure.
+	 */
+	atomic_store_zu(&edata_cache->count, 0, ATOMIC_RELAXED);
 	if (malloc_mutex_init(&edata_cache->mtx, "edata_cache",
 	    WITNESS_RANK_EDATA_CACHE, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
-
-	edata_avail_new(&edata_cache->avail);
+	edata_cache->base = base;
 	return false;
 }
 
 edata_t *
-edata_cache_get(tsdn_t *tsdn, edata_cache_t *edata_cache, base_t *base) {
+edata_cache_get(tsdn_t *tsdn, edata_cache_t *edata_cache) {
 	malloc_mutex_lock(tsdn, &edata_cache->mtx);
 	edata_t *edata = edata_avail_first(&edata_cache->avail);
 	if (edata == NULL) {
 		malloc_mutex_unlock(tsdn, &edata_cache->mtx);
-		return base_alloc_edata(tsdn, base);
+		return base_alloc_edata(tsdn, edata_cache->base);
 	}
 	edata_avail_remove(&edata_cache->avail, edata);
 	atomic_fetch_sub_zu(&edata_cache->count, 1, ATOMIC_RELAXED);
diff --git a/src/extent2.c b/src/extent2.c
index 7ecf332a..ff98aa59 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -869,8 +869,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		alloc_size = sz_pind2sz(arena->ecache_grow.next + egn_skip);
 	}
 
-	edata_t *edata = edata_cache_get(tsdn, &arena->edata_cache,
-	    arena->base);
+	edata_t *edata = edata_cache_get(tsdn, &arena->edata_cache);
 	if (edata == NULL) {
 		goto label_err;
 	}
@@ -1036,8 +1035,7 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    WITNESS_RANK_CORE, 0);
 
 	size_t esize = size + pad;
-	edata_t *edata = edata_cache_get(tsdn, &arena->edata_cache,
-	    arena->base);
+	edata_t *edata = edata_cache_get(tsdn, &arena->edata_cache);
 	if (edata == NULL) {
 		return NULL;
 	}
@@ -1430,8 +1428,7 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		return NULL;
 	}
 
-	edata_t *trail = edata_cache_get(tsdn, &arena->edata_cache,
-	    arena->base);
+	edata_t *trail = edata_cache_get(tsdn, &arena->edata_cache);
 	if (trail == NULL) {
 		goto label_error_a;
 	}
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 25ba944b..9cf098ea 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -123,7 +123,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 		return NULL;
 	}
 
-	gap = edata_cache_get(tsdn, &arena->edata_cache, arena->base);
+	gap = edata_cache_get(tsdn, &arena->edata_cache);
 	if (gap == NULL) {
 		return NULL;
 	}

From 57fe99d4be118a1f34b45013be962f31f7786703 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 13 Dec 2019 10:09:57 -0800
Subject: [PATCH 1475/2608] Move relevant index into the ehooks_t itself.

It's always passed into the ehooks; keeping it colocated lets us avoid passing
the arena everywhere.
---
 include/jemalloc/internal/base_inlines.h |  2 +-
 include/jemalloc/internal/base_structs.h |  3 ---
 include/jemalloc/internal/ehooks.h       | 13 ++++++++++++-
 src/base.c                               |  7 +++----
 src/ehooks.c                             |  4 +++-
 5 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/include/jemalloc/internal/base_inlines.h b/include/jemalloc/internal/base_inlines.h
index aec0e2e1..221fca81 100644
--- a/include/jemalloc/internal/base_inlines.h
+++ b/include/jemalloc/internal/base_inlines.h
@@ -3,7 +3,7 @@
 
 static inline unsigned
 base_ind_get(const base_t *base) {
-	return base->ind;
+	return ehooks_ind_get(&base->ehooks);
 }
 
 static inline bool
diff --git a/include/jemalloc/internal/base_structs.h b/include/jemalloc/internal/base_structs.h
index fb7e68a4..ff1fdfb3 100644
--- a/include/jemalloc/internal/base_structs.h
+++ b/include/jemalloc/internal/base_structs.h
@@ -20,9 +20,6 @@ struct base_block_s {
 };
 
 struct base_s {
-	/* Associated arena's index within the arenas array. */
-	unsigned ind;
-
 	/*
 	 * User-configurable extent hook functions.
 	 */
diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index 6f4f950c..23ab29cd 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -20,6 +20,12 @@ extern const extent_hooks_t ehooks_default_extent_hooks;
 
 typedef struct ehooks_s ehooks_t;
 struct ehooks_s {
+	/*
+	 * The user-visible id that goes with the ehooks (i.e. that of the base
+	 * they're a part of, the associated arena's index within the arenas
+	 * array).
+	 */
+	unsigned ind;
 	/* Logically an extent_hooks_t *. */
 	atomic_p_t ptr;
 };
@@ -80,7 +86,12 @@ ehooks_post_reentrancy(tsdn_t *tsdn) {
 }
 
 /* Beginning of the public API. */
-void ehooks_init(ehooks_t *ehooks, extent_hooks_t *extent_hooks);
+void ehooks_init(ehooks_t *ehooks, extent_hooks_t *extent_hooks, unsigned ind);
+
+static inline unsigned
+ehooks_ind_get(const ehooks_t *ehooks) {
+	return ehooks->ind;
+}
 
 static inline void
 ehooks_set_extent_hooks_ptr(ehooks_t *ehooks, extent_hooks_t *extent_hooks) {
diff --git a/src/base.c b/src/base.c
index 76d76557..ad3fe83c 100644
--- a/src/base.c
+++ b/src/base.c
@@ -346,7 +346,7 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 * memory, and then initialize the ehooks within the base_t.
 	 */
 	ehooks_t fake_ehooks;
-	ehooks_init(&fake_ehooks, extent_hooks);
+	ehooks_init(&fake_ehooks, extent_hooks, ind);
 
 	base_block_t *block = base_block_alloc(tsdn, NULL, &fake_ehooks, ind,
 	    &pind_last, &extent_sn_next, sizeof(base_t), QUANTUM);
@@ -359,8 +359,7 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	size_t base_size = ALIGNMENT_CEILING(sizeof(base_t), base_alignment);
 	base_t *base = (base_t *)base_extent_bump_alloc_helper(&block->edata,
 	    &gap_size, base_size, base_alignment);
-	base->ind = ind;
-	ehooks_init(&base->ehooks, extent_hooks);
+	ehooks_init(&base->ehooks, extent_hooks, ind);
 	if (malloc_mutex_init(&base->mtx, "base", WITNESS_RANK_BASE,
 	    malloc_mutex_rank_exclusive)) {
 		base_unmap(tsdn, &fake_ehooks, ind, block, block->size);
@@ -411,7 +410,7 @@ extent_hooks_t *
 base_extent_hooks_set(base_t *base, extent_hooks_t *extent_hooks) {
 	extent_hooks_t *old_extent_hooks =
 	    ehooks_get_extent_hooks_ptr(&base->ehooks);
-	ehooks_init(&base->ehooks, extent_hooks);
+	ehooks_init(&base->ehooks, extent_hooks, ehooks_ind_get(&base->ehooks));
 	return old_extent_hooks;
 }
 
diff --git a/src/ehooks.c b/src/ehooks.c
index 51b1514a..2fb2c4c4 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -4,7 +4,9 @@
 #include "jemalloc/internal/ehooks.h"
 #include "jemalloc/internal/extent_mmap.h"
 
-void ehooks_init(ehooks_t *ehooks, extent_hooks_t *extent_hooks) {
+void
+ehooks_init(ehooks_t *ehooks, extent_hooks_t *extent_hooks, unsigned ind) {
+	ehooks->ind = ind;
 	ehooks_set_extent_hooks_ptr(ehooks, extent_hooks);
 }
 

From 9cad5639ff7bca9f33b161363252ae868cec1d34 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 13 Dec 2019 10:44:03 -0800
Subject: [PATCH 1476/2608] Ehooks: remove arena_ind parameter.

This lives within the ehooks_t now, so that callers don't need to know it.
---
 include/jemalloc/internal/ehooks.h | 42 ++++++++++++++----------------
 src/base.c                         | 12 ++++-----
 src/extent2.c                      | 35 ++++++++++---------------
 3 files changed, 39 insertions(+), 50 deletions(-)

diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index 23ab29cd..4d183e0b 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -172,17 +172,17 @@ ehooks_debug_zero_check(void *addr, size_t size) {
 
 static inline void *
 ehooks_alloc(tsdn_t *tsdn, ehooks_t *ehooks, void *new_addr, size_t size,
-    size_t alignment, bool *zero, bool *commit, unsigned arena_ind) {
+    size_t alignment, bool *zero, bool *commit) {
 	bool orig_zero = *zero;
 	void *ret;
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 	if (extent_hooks == &ehooks_default_extent_hooks) {
 		ret = ehooks_default_alloc_impl(tsdn, new_addr, size,
-		    alignment, zero, commit, arena_ind);
+		    alignment, zero, commit, ehooks_ind_get(ehooks));
 	} else {
 		ehooks_pre_reentrancy(tsdn);
 		ret = extent_hooks->alloc(extent_hooks, new_addr, size,
-		    alignment, zero, commit, arena_ind);
+		    alignment, zero, commit, ehooks_ind_get(ehooks));
 		ehooks_post_reentrancy(tsdn);
 	}
 	assert(new_addr == NULL || ret == NULL || new_addr == ret);
@@ -195,7 +195,7 @@ ehooks_alloc(tsdn_t *tsdn, ehooks_t *ehooks, void *new_addr, size_t size,
 
 static inline bool
 ehooks_dalloc(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
-    bool committed, unsigned arena_ind) {
+    bool committed) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 	if (extent_hooks == &ehooks_default_extent_hooks) {
 		return ehooks_default_dalloc_impl(addr, size);
@@ -204,7 +204,7 @@ ehooks_dalloc(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 	} else {
 		ehooks_pre_reentrancy(tsdn);
 		bool err = extent_hooks->dalloc(extent_hooks, addr, size,
-		    committed, arena_ind);
+		    committed, ehooks_ind_get(ehooks));
 		ehooks_post_reentrancy(tsdn);
 		return err;
 	}
@@ -212,7 +212,7 @@ ehooks_dalloc(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 
 static inline void
 ehooks_destroy(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
-    bool committed, unsigned arena_ind) {
+    bool committed) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 	if (extent_hooks == &ehooks_default_extent_hooks) {
 		return ehooks_default_destroy_impl(addr, size);
@@ -221,14 +221,14 @@ ehooks_destroy(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 	} else {
 		ehooks_pre_reentrancy(tsdn);
 		extent_hooks->destroy(extent_hooks, addr, size, committed,
-		    arena_ind);
+		    ehooks_ind_get(ehooks));
 		ehooks_post_reentrancy(tsdn);
 	}
 }
 
 static inline bool
 ehooks_commit(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
-    size_t offset, size_t length, unsigned arena_ind) {
+    size_t offset, size_t length) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 	bool err;
 	if (extent_hooks == &ehooks_default_extent_hooks) {
@@ -238,7 +238,7 @@ ehooks_commit(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 	} else {
 		ehooks_pre_reentrancy(tsdn);
 		err = extent_hooks->commit(extent_hooks, addr, size,
-		    offset, length, arena_ind);
+		    offset, length, ehooks_ind_get(ehooks));
 		ehooks_post_reentrancy(tsdn);
 	}
 	if (!err) {
@@ -249,7 +249,7 @@ ehooks_commit(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 
 static inline bool
 ehooks_decommit(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
-    size_t offset, size_t length, unsigned arena_ind) {
+    size_t offset, size_t length) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 	if (extent_hooks == &ehooks_default_extent_hooks) {
 		return ehooks_default_decommit_impl(addr, offset, length);
@@ -258,7 +258,7 @@ ehooks_decommit(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 	} else {
 		ehooks_pre_reentrancy(tsdn);
 		bool err = extent_hooks->decommit(extent_hooks, addr, size,
-		    offset, length, arena_ind);
+		    offset, length, ehooks_ind_get(ehooks));
 		ehooks_post_reentrancy(tsdn);
 		return err;
 	}
@@ -266,7 +266,7 @@ ehooks_decommit(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 
 static inline bool
 ehooks_purge_lazy(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
-    size_t offset, size_t length, unsigned arena_ind) {
+    size_t offset, size_t length) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 #ifdef PAGES_CAN_PURGE_LAZY
 	if (extent_hooks == &ehooks_default_extent_hooks) {
@@ -278,7 +278,7 @@ ehooks_purge_lazy(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 	} else {
 		ehooks_pre_reentrancy(tsdn);
 		bool err = extent_hooks->purge_lazy(extent_hooks, addr, size,
-		    offset, length, arena_ind);
+		    offset, length, ehooks_ind_get(ehooks));
 		ehooks_post_reentrancy(tsdn);
 		return err;
 	}
@@ -286,7 +286,7 @@ ehooks_purge_lazy(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 
 static inline bool
 ehooks_purge_forced(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
-    size_t offset, size_t length, unsigned arena_ind) {
+    size_t offset, size_t length) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 	/*
 	 * It would be correct to have a ehooks_debug_zero_check call at the end
@@ -306,7 +306,7 @@ ehooks_purge_forced(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 	} else {
 		ehooks_pre_reentrancy(tsdn);
 		bool err = extent_hooks->purge_forced(extent_hooks, addr, size,
-		    offset, length, arena_ind);
+		    offset, length, ehooks_ind_get(ehooks));
 		ehooks_post_reentrancy(tsdn);
 		return err;
 	}
@@ -314,7 +314,7 @@ ehooks_purge_forced(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 
 static inline bool
 ehooks_split(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
-    size_t size_a, size_t size_b, bool committed, unsigned arena_ind) {
+    size_t size_a, size_t size_b, bool committed) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 	if (ehooks_are_default(ehooks)) {
 		return ehooks_default_split_impl();
@@ -323,7 +323,7 @@ ehooks_split(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 	} else {
 		ehooks_pre_reentrancy(tsdn);
 		bool err = extent_hooks->split(extent_hooks, addr, size, size_a,
-		    size_b, committed, arena_ind);
+		    size_b, committed, ehooks_ind_get(ehooks));
 		ehooks_post_reentrancy(tsdn);
 		return err;
 	}
@@ -331,8 +331,7 @@ ehooks_split(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 
 static inline bool
 ehooks_merge(tsdn_t *tsdn, ehooks_t *ehooks, void *addr_a, size_t size_a,
-    bool head_a, void *addr_b, size_t size_b, bool head_b, bool committed,
-    unsigned arena_ind) {
+    bool head_a, void *addr_b, size_t size_b, bool head_b, bool committed) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 	if (extent_hooks == &ehooks_default_extent_hooks) {
 		return ehooks_default_merge_impl(tsdn, addr_a, head_a, addr_b,
@@ -342,15 +341,14 @@ ehooks_merge(tsdn_t *tsdn, ehooks_t *ehooks, void *addr_a, size_t size_a,
 	} else {
 		ehooks_pre_reentrancy(tsdn);
 		bool err = extent_hooks->merge(extent_hooks, addr_a, size_a,
-		    addr_b, size_b, committed, arena_ind);
+		    addr_b, size_b, committed, ehooks_ind_get(ehooks));
 		ehooks_post_reentrancy(tsdn);
 		return err;
 	}
 }
 
 static inline void
-ehooks_zero(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
-    unsigned arena_ind) {
+ehooks_zero(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 	if (extent_hooks == &ehooks_default_extent_hooks) {
 		ehooks_default_zero_impl(addr, size);
diff --git a/src/base.c b/src/base.c
index ad3fe83c..005b0c53 100644
--- a/src/base.c
+++ b/src/base.c
@@ -44,7 +44,7 @@ base_map(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, size_t size) {
 		}
 	} else {
 		addr = ehooks_alloc(tsdn, ehooks, NULL, size, alignment, &zero,
-		    &commit, ind);
+		    &commit);
 	}
 
 	return addr;
@@ -79,18 +79,16 @@ base_unmap(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, void *addr,
 		/* Nothing worked.  This should never happen. */
 		not_reached();
 	} else {
-		if (!ehooks_dalloc(tsdn, ehooks, addr, size, true, ind)) {
+		if (!ehooks_dalloc(tsdn, ehooks, addr, size, true)) {
 			goto label_done;
 		}
-		if (!ehooks_decommit(tsdn, ehooks, addr, size, 0, size, ind)) {
+		if (!ehooks_decommit(tsdn, ehooks, addr, size, 0, size)) {
 			goto label_done;
 		}
-		if (!ehooks_purge_forced(tsdn, ehooks, addr, size, 0, size,
-		    ind)) {
+		if (!ehooks_purge_forced(tsdn, ehooks, addr, size, 0, size)) {
 			goto label_done;
 		}
-		if (!ehooks_purge_lazy(tsdn, ehooks, addr, size, 0, size,
-		    ind)) {
+		if (!ehooks_purge_lazy(tsdn, ehooks, addr, size, 0, size)) {
 			goto label_done;
 		}
 		/* Nothing worked.  That's the application's problem. */
diff --git a/src/extent2.c b/src/extent2.c
index ff98aa59..13b29207 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -827,8 +827,7 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 		void *addr = edata_base_get(edata);
 		if (!edata_zeroed_get(edata)) {
 			size_t size = edata_size_get(edata);
-			ehooks_zero(tsdn, ehooks, addr, size,
-			    arena_ind_get(arena));
+			ehooks_zero(tsdn, ehooks, addr, size);
 		}
 	}
 	return edata;
@@ -877,7 +876,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	bool committed = false;
 
 	void *ptr = ehooks_alloc(tsdn, ehooks, NULL, alloc_size, PAGE, &zeroed,
-	    &committed, arena_ind_get(arena));
+	    &committed);
 
 	edata_init(edata, arena_ind_get(arena), ptr, alloc_size, false,
 	    SC_NSIZES, arena_extent_sn_next(arena), extent_state_active, zeroed,
@@ -989,7 +988,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	if (*zero && !edata_zeroed_get(edata)) {
 		void *addr = edata_base_get(edata);
 		size_t size = edata_size_get(edata);
-		ehooks_zero(tsdn, ehooks, addr, size, arena_ind_get(arena));
+		ehooks_zero(tsdn, ehooks, addr, size);
 	}
 
 	return edata;
@@ -1041,7 +1040,7 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	}
 	size_t palignment = ALIGNMENT_CEILING(alignment, PAGE);
 	void *addr = ehooks_alloc(tsdn, ehooks, new_addr, esize, palignment,
-	    zero, commit, arena_ind_get(arena));
+	    zero, commit);
 	if (addr == NULL) {
 		edata_cache_put(tsdn, &arena->edata_cache, edata);
 		return NULL;
@@ -1265,8 +1264,7 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 	/* Try to deallocate. */
 	err = ehooks_dalloc(tsdn, ehooks, edata_base_get(edata),
-	    edata_size_get(edata), edata_committed_get(edata),
-	    arena_ind_get(arena));
+	    edata_size_get(edata), edata_committed_get(edata));
 
 	if (!err) {
 		edata_cache_put(tsdn, &arena->edata_cache, edata);
@@ -1303,13 +1301,11 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    edata_size_get(edata))) {
 		zeroed = true;
 	} else if (!ehooks_purge_forced(tsdn, ehooks, edata_base_get(edata),
-	    edata_size_get(edata), 0, edata_size_get(edata),
-	    arena_ind_get(arena))) {
+	    edata_size_get(edata), 0, edata_size_get(edata))) {
 		zeroed = true;
 	} else if (edata_state_get(edata) == extent_state_muzzy ||
 	    !ehooks_purge_lazy(tsdn, ehooks, edata_base_get(edata),
-	    edata_size_get(edata), 0, edata_size_get(edata),
-	    arena_ind_get(arena))) {
+	    edata_size_get(edata), 0, edata_size_get(edata))) {
 		zeroed = false;
 	} else {
 		zeroed = false;
@@ -1339,8 +1335,7 @@ extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 	/* Try to destroy; silently fail otherwise. */
 	ehooks_destroy(tsdn, ehooks, edata_base_get(edata),
-	    edata_size_get(edata), edata_committed_get(edata),
-	    arena_ind_get(arena));
+	    edata_size_get(edata), edata_committed_get(edata));
 
 	edata_cache_put(tsdn, &arena->edata_cache, edata);
 }
@@ -1351,7 +1346,7 @@ extent_commit_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	bool err = ehooks_commit(tsdn, ehooks, edata_base_get(edata),
-	    edata_size_get(edata), offset, length, arena_ind_get(arena));
+	    edata_size_get(edata), offset, length);
 	edata_committed_set(edata, edata_committed_get(edata) || !err);
 	return err;
 }
@@ -1370,7 +1365,7 @@ extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 	bool err = ehooks_decommit(tsdn, ehooks, edata_base_get(edata),
-	    edata_size_get(edata), offset, length, arena_ind_get(arena));
+	    edata_size_get(edata), offset, length);
 	edata_committed_set(edata, edata_committed_get(edata) && err);
 	return err;
 }
@@ -1381,7 +1376,7 @@ extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	bool err = ehooks_purge_lazy(tsdn, ehooks, edata_base_get(edata),
-	    edata_size_get(edata), offset, length, arena_ind_get(arena));
+	    edata_size_get(edata), offset, length);
 	return err;
 }
 
@@ -1398,7 +1393,7 @@ extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	bool err = ehooks_purge_forced(tsdn, ehooks, edata_base_get(edata),
-	    edata_size_get(edata), offset, length, arena_ind_get(arena));
+	    edata_size_get(edata), offset, length);
 	return err;
 }
 
@@ -1467,8 +1462,7 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	extent_lock_edata2(tsdn, edata, trail);
 
 	bool err = ehooks_split(tsdn, ehooks, edata_base_get(edata),
-	    size_a + size_b, size_a, size_b, edata_committed_get(edata),
-	    arena_ind_get(arena));
+	    size_a + size_b, size_a, size_b, edata_committed_get(edata));
 
 	if (err) {
 		goto label_error_c;
@@ -1510,8 +1504,7 @@ extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, edata_t *a,
 
 	bool err = ehooks_merge(tsdn, ehooks, edata_base_get(a),
 	    edata_size_get(a), edata_is_head_get(a), edata_base_get(b),
-	    edata_size_get(b), edata_is_head_get(b), edata_committed_get(a),
-	    arena_ind_get(arena));
+	    edata_size_get(b), edata_is_head_get(b), edata_committed_get(a));
 
 	if (err) {
 		return true;

From 439219be7e350113771a27c6fb19ce77f5d26e03 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 13 Dec 2019 10:52:51 -0800
Subject: [PATCH 1477/2608] Remove extent_can_coalesce arena dependency.

---
 src/extent2.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/extent2.c b/src/extent2.c
index 13b29207..0c816bcc 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -1060,10 +1060,9 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 }
 
 static bool
-extent_can_coalesce(arena_t *arena, ecache_t *ecache, const edata_t *inner,
+extent_can_coalesce(ecache_t *ecache, const edata_t *inner,
     const edata_t *outer) {
-	assert(edata_arena_ind_get(inner) == arena_ind_get(arena));
-	if (edata_arena_ind_get(outer) != arena_ind_get(arena)) {
+	if (edata_arena_ind_get(inner) != edata_arena_ind_get(outer)) {
 		return false;
 	}
 
@@ -1083,7 +1082,7 @@ static bool
 extent_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *inner, edata_t *outer, bool forward,
     bool growing_retained) {
-	assert(extent_can_coalesce(arena, ecache, inner, outer));
+	assert(extent_can_coalesce(ecache, inner, outer));
 
 	extent_activate_locked(tsdn, arena, ecache, outer);
 
@@ -1125,7 +1124,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			 * like-state extents, so call extent_can_coalesce()
 			 * before releasing next's pool lock.
 			 */
-			bool can_coalesce = extent_can_coalesce(arena, ecache,
+			bool can_coalesce = extent_can_coalesce(ecache,
 			    edata, next);
 
 			extent_unlock_edata(tsdn, next);
@@ -1146,8 +1145,8 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		edata_t *prev = extent_lock_edata_from_addr(tsdn, rtree_ctx,
 		    edata_before_get(edata), inactive_only);
 		if (prev != NULL) {
-			bool can_coalesce = extent_can_coalesce(arena, ecache,
-			    edata, prev);
+			bool can_coalesce = extent_can_coalesce(ecache, edata,
+			    prev);
 			extent_unlock_edata(tsdn, prev);
 
 			if (can_coalesce && !extent_coalesce(tsdn, arena,

From 372042a082347dd4c036f5cfeff3853d5eac4b91 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 13 Dec 2019 11:16:58 -0800
Subject: [PATCH 1478/2608] Remove merge dependence on the arena.

---
 include/jemalloc/internal/extent2.h |  4 ++--
 src/extent2.c                       | 24 +++++++++++-------------
 src/large.c                         |  3 ++-
 3 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/include/jemalloc/internal/extent2.h b/include/jemalloc/internal/extent2.h
index d74e2323..d6854554 100644
--- a/include/jemalloc/internal/extent2.h
+++ b/include/jemalloc/internal/extent2.h
@@ -56,8 +56,8 @@ bool extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 edata_t *extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a,
     size_t size_b, szind_t szind_b, bool slab_b);
-bool extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *a, edata_t *b);
+bool extent_merge_wrapper(tsdn_t *tsdn, ehooks_t *ehooks,
+    edata_cache_t *edata_cache, edata_t *a, edata_t *b);
 
 bool extent_boot(void);
 
diff --git a/src/extent2.c b/src/extent2.c
index 0c816bcc..1b70f202 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -30,8 +30,8 @@ static edata_t *extent_split_impl(tsdn_t *tsdn, arena_t *arena,
     ehooks_t *ehooks, edata_t *edata, size_t size_a, szind_t szind_a,
     bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
     bool growing_retained);
-static bool extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *a, edata_t *b, bool growing_retained);
+static bool extent_merge_impl(tsdn_t *tsdn, ehooks_t *ehooks,
+    edata_cache_t *edata_cache, edata_t *a, edata_t *b, bool growing_retained);
 
 /* Used exclusively for gdump triggering. */
 static atomic_zu_t curpages;
@@ -1087,7 +1087,7 @@ extent_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	extent_activate_locked(tsdn, arena, ecache, outer);
 
 	malloc_mutex_unlock(tsdn, &ecache->mtx);
-	bool err = extent_merge_impl(tsdn, arena, ehooks,
+	bool err = extent_merge_impl(tsdn, ehooks, &arena->edata_cache,
 	    forward ? inner : outer, forward ? outer : inner, growing_retained);
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 
@@ -1495,12 +1495,15 @@ extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 }
 
 static bool
-extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, edata_t *a,
-    edata_t *b, bool growing_retained) {
+extent_merge_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_cache_t *edata_cache,
+    edata_t *a, edata_t *b, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	assert(edata_base_get(a) < edata_base_get(b));
 
+	assert(edata_arena_ind_get(a) == edata_arena_ind_get(b));
+	assert(edata_arena_ind_get(a) == ehooks_ind_get(ehooks));
+
 	bool err = ehooks_merge(tsdn, ehooks, edata_base_get(a),
 	    edata_size_get(a), edata_is_head_get(a), edata_base_get(b),
 	    edata_size_get(b), edata_is_head_get(b), edata_committed_get(a));
@@ -1546,20 +1549,15 @@ extent_merge_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, edata_t *a,
 
 	extent_unlock_edata2(tsdn, a, b);
 
-	/*
-	 * If we got here, we merged the extents; so they must be from the same
-	 * arena (i.e. this one).
-	 */
-	assert(edata_arena_ind_get(b) == arena_ind_get(arena));
-	edata_cache_put(tsdn, &arena->edata_cache, b);
+	edata_cache_put(tsdn, edata_cache, b);
 
 	return false;
 }
 
 bool
-extent_merge_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+extent_merge_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_cache_t *edata_cache,
     edata_t *a, edata_t *b) {
-	return extent_merge_impl(tsdn, arena, ehooks, a, b, false);
+	return extent_merge_impl(tsdn, ehooks, edata_cache, a, b, false);
 }
 
 bool
diff --git a/src/large.c b/src/large.c
index 4a3ad853..4af586db 100644
--- a/src/large.c
+++ b/src/large.c
@@ -170,7 +170,8 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 		}
 	}
 
-	if (extent_merge_wrapper(tsdn, arena, ehooks, edata, trail)) {
+	if (extent_merge_wrapper(tsdn, ehooks, &arena->edata_cache, edata,
+	    trail)) {
 		extent_dalloc_wrapper(tsdn, arena, ehooks, trail);
 		return true;
 	}

From 576d7047ab93baf37d851136f6ccd4fb38810ded Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 13 Dec 2019 11:33:03 -0800
Subject: [PATCH 1479/2608] Ecache: Should know its arena_ind.

What we call an arena_ind is really the index associated with some particular
set of ehooks; the arena is just the user-visible portion of that.  Making this
explicit, and reframing checks in terms of that, makes the code simpler and
cleaner, and helps us avoid passing the arena itself all throughout extent code.

This lets us put back an arena-specific assert.
---
 include/jemalloc/internal/ecache.h | 9 ++++++++-
 src/arena.c                        | 8 +++++---
 src/ecache.c                       | 3 ++-
 src/extent2.c                      | 2 ++
 4 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/ecache.h b/include/jemalloc/internal/ecache.h
index 8532192c..a11418c0 100644
--- a/include/jemalloc/internal/ecache.h
+++ b/include/jemalloc/internal/ecache.h
@@ -10,6 +10,8 @@ struct ecache_s {
 	eset_t eset;
 	/* All stored extents must be in the same state. */
 	extent_state_t state;
+	/* The index of the ehooks the ecache is associated with. */
+	unsigned ind;
 	/*
 	 * If true, delay coalescing until eviction; otherwise coalesce during
 	 * deallocation.
@@ -52,8 +54,13 @@ ecache_nbytes_get(ecache_t *ecache, pszind_t ind) {
 	return eset_nbytes_get(&ecache->eset, ind);
 }
 
+static inline unsigned
+ecache_ind_get(ecache_t *ecache) {
+	return ecache->ind;
+}
+
 bool ecache_init(tsdn_t *tsdn, ecache_t *ecache, extent_state_t state,
-    bool delay_coalesce);
+    unsigned ind, bool delay_coalesce);
 void ecache_prefork(tsdn_t *tsdn, ecache_t *ecache);
 void ecache_postfork_parent(tsdn_t *tsdn, ecache_t *ecache);
 void ecache_postfork_child(tsdn_t *tsdn, ecache_t *ecache);
diff --git a/src/arena.c b/src/arena.c
index b5c8606f..e795acf2 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2018,14 +2018,16 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 * are likely to be reused soon after deallocation, and the cost of
 	 * merging/splitting extents is non-trivial.
 	 */
-	if (ecache_init(tsdn, &arena->ecache_dirty, extent_state_dirty, true)) {
+	if (ecache_init(tsdn, &arena->ecache_dirty, extent_state_dirty, ind,
+	    true)) {
 		goto label_error;
 	}
 	/*
 	 * Coalesce muzzy extents immediately, because operations on them are in
 	 * the critical path much less often than for dirty extents.
 	 */
-	if (ecache_init(tsdn, &arena->ecache_muzzy, extent_state_muzzy, false)) {
+	if (ecache_init(tsdn, &arena->ecache_muzzy, extent_state_muzzy, ind,
+	    false)) {
 		goto label_error;
 	}
 	/*
@@ -2035,7 +2037,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 * in the critical path.
 	 */
 	if (ecache_init(tsdn, &arena->ecache_retained, extent_state_retained,
-	    false)) {
+	    ind, false)) {
 		goto label_error;
 	}
 
diff --git a/src/ecache.c b/src/ecache.c
index a57a0a6f..301b7ca6 100644
--- a/src/ecache.c
+++ b/src/ecache.c
@@ -2,13 +2,14 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 bool
-ecache_init(tsdn_t *tsdn, ecache_t *ecache, extent_state_t state,
+ecache_init(tsdn_t *tsdn, ecache_t *ecache, extent_state_t state, unsigned ind,
     bool delay_coalesce) {
 	if (malloc_mutex_init(&ecache->mtx, "extents", WITNESS_RANK_EXTENTS,
 	    malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	ecache->state = state;
+	ecache->ind = ind;
 	ecache->delay_coalesce = delay_coalesce;
 	eset_init(&ecache->eset, state);
 	return false;
diff --git a/src/extent2.c b/src/extent2.c
index 1b70f202..1dbccf6e 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -1062,6 +1062,8 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 static bool
 extent_can_coalesce(ecache_t *ecache, const edata_t *inner,
     const edata_t *outer) {
+	assert(edata_arena_ind_get(inner) == ecache_ind_get(ecache));
+
 	if (edata_arena_ind_get(inner) != edata_arena_ind_get(outer)) {
 		return false;
 	}

From 282a382326fc4271f77df207074d73016fe8dcb0 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 13 Dec 2019 13:34:35 -0800
Subject: [PATCH 1480/2608] Extent: Break [de]activation's arena dependence.

---
 src/extent2.c | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)

diff --git a/src/extent2.c b/src/extent2.c
index 1dbccf6e..30f0f029 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -330,9 +330,8 @@ extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 }
 
 static void
-extent_deactivate_locked(tsdn_t *tsdn, arena_t *arena, ecache_t *ecache,
-    edata_t *edata) {
-	assert(edata_arena_ind_get(edata) == arena_ind_get(arena));
+extent_deactivate_locked(tsdn_t *tsdn, ecache_t *ecache, edata_t *edata) {
+	assert(edata_arena_ind_get(edata) == ecache_ind_get(ecache));
 	assert(edata_state_get(edata) == extent_state_active);
 
 	edata_state_set(edata, ecache->state);
@@ -340,17 +339,15 @@ extent_deactivate_locked(tsdn_t *tsdn, arena_t *arena, ecache_t *ecache,
 }
 
 static void
-extent_deactivate(tsdn_t *tsdn, arena_t *arena, ecache_t *ecache,
-    edata_t *edata) {
+extent_deactivate(tsdn_t *tsdn, ecache_t *ecache, edata_t *edata) {
 	malloc_mutex_lock(tsdn, &ecache->mtx);
-	extent_deactivate_locked(tsdn, arena, ecache, edata);
+	extent_deactivate_locked(tsdn, ecache, edata);
 	malloc_mutex_unlock(tsdn, &ecache->mtx);
 }
 
 static void
-extent_activate_locked(tsdn_t *tsdn, arena_t *arena, ecache_t *ecache,
-    edata_t *edata) {
-	assert(edata_arena_ind_get(edata) == arena_ind_get(arena));
+extent_activate_locked(tsdn_t *tsdn, ecache_t *ecache, edata_t *edata) {
+	assert(edata_arena_ind_get(edata) == ecache_ind_get(ecache));
 	assert(edata_state_get(edata) == ecache->state);
 
 	eset_remove(&ecache->eset, edata);
@@ -603,7 +600,7 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		return NULL;
 	}
 
-	extent_activate_locked(tsdn, arena, ecache, edata);
+	extent_activate_locked(tsdn, ecache, edata);
 	malloc_mutex_unlock(tsdn, &ecache->mtx);
 
 	return edata;
@@ -733,16 +730,16 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		 * leaking the extent.
 		 */
 		assert(to_leak != NULL && lead == NULL && trail == NULL);
-		extent_deactivate(tsdn, arena, ecache, to_leak);
+		extent_deactivate(tsdn, ecache, to_leak);
 		return NULL;
 	}
 
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
-			extent_deactivate(tsdn, arena, ecache, lead);
+			extent_deactivate(tsdn, ecache, lead);
 		}
 		if (trail != NULL) {
-			extent_deactivate(tsdn, arena, ecache, trail);
+			extent_deactivate(tsdn, ecache, trail);
 		}
 		return edata;
 	} else {
@@ -1086,7 +1083,7 @@ extent_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     bool growing_retained) {
 	assert(extent_can_coalesce(ecache, inner, outer));
 
-	extent_activate_locked(tsdn, arena, ecache, outer);
+	extent_activate_locked(tsdn, ecache, outer);
 
 	malloc_mutex_unlock(tsdn, &ecache->mtx);
 	bool err = extent_merge_impl(tsdn, ehooks, &arena->edata_cache,
@@ -1094,7 +1091,7 @@ extent_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 
 	if (err) {
-		extent_deactivate_locked(tsdn, arena, ecache, outer);
+		extent_deactivate_locked(tsdn, ecache, outer);
 	}
 
 	return err;
@@ -1232,7 +1229,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 			return;
 		}
 	}
-	extent_deactivate_locked(tsdn, arena, ecache, edata);
+	extent_deactivate_locked(tsdn, ecache, edata);
 
 	malloc_mutex_unlock(tsdn, &ecache->mtx);
 }

From 48ec5d4355c66c20d9143214c83823875ea91579 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 13 Dec 2019 13:37:00 -0800
Subject: [PATCH 1481/2608] Break extent_coalesce arena dependence

---
 src/extent2.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/extent2.c b/src/extent2.c
index 30f0f029..6539146a 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -1078,7 +1078,7 @@ extent_can_coalesce(ecache_t *ecache, const edata_t *inner,
 }
 
 static bool
-extent_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+extent_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *inner, edata_t *outer, bool forward,
     bool growing_retained) {
 	assert(extent_can_coalesce(ecache, inner, outer));
@@ -1086,7 +1086,7 @@ extent_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	extent_activate_locked(tsdn, ecache, outer);
 
 	malloc_mutex_unlock(tsdn, &ecache->mtx);
-	bool err = extent_merge_impl(tsdn, ehooks, &arena->edata_cache,
+	bool err = extent_merge_impl(tsdn, ehooks, edata_cache,
 	    forward ? inner : outer, forward ? outer : inner, growing_retained);
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 
@@ -1128,9 +1128,9 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 			extent_unlock_edata(tsdn, next);
 
-			if (can_coalesce && !extent_coalesce(tsdn, arena,
-			    ehooks, ecache, edata, next, true,
-			    growing_retained)) {
+			if (can_coalesce && !extent_coalesce(tsdn,
+			    &arena->edata_cache, ehooks, ecache, edata, next,
+			    true, growing_retained)) {
 				if (ecache->delay_coalesce) {
 					/* Do minimal coalescing. */
 					*coalesced = true;
@@ -1148,9 +1148,9 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			    prev);
 			extent_unlock_edata(tsdn, prev);
 
-			if (can_coalesce && !extent_coalesce(tsdn, arena,
-			    ehooks, ecache, edata, prev, false,
-			    growing_retained)) {
+			if (can_coalesce && !extent_coalesce(tsdn,
+			    &arena->edata_cache, ehooks, ecache, edata, prev,
+			    false, growing_retained)) {
 				edata = prev;
 				if (ecache->delay_coalesce) {
 					/* Do minimal coalescing. */

From 0aa9769fb0cc73e1df6c728af10b45dfb4d1bc71 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 13 Dec 2019 13:46:25 -0800
Subject: [PATCH 1482/2608] Break commit functions' arena dependence

---
 include/jemalloc/internal/extent2.h |  8 +--
 src/extent2.c                       | 84 ++++++++++++++---------------
 2 files changed, 46 insertions(+), 46 deletions(-)

diff --git a/include/jemalloc/internal/extent2.h b/include/jemalloc/internal/extent2.h
index d6854554..eda31cd1 100644
--- a/include/jemalloc/internal/extent2.h
+++ b/include/jemalloc/internal/extent2.h
@@ -45,10 +45,10 @@ void extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     edata_t *edata);
 void extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     edata_t *edata);
-bool extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata, size_t offset, size_t length);
-bool extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata, size_t offset, size_t length);
+bool extent_commit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    size_t offset, size_t length);
+bool extent_decommit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    size_t offset, size_t length);
 bool extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     edata_t *edata, size_t offset, size_t length);
 bool extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
diff --git a/src/extent2.c b/src/extent2.c
index 6539146a..c1dfa99d 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -18,8 +18,8 @@ mutex_pool_t	extent_mutex_pool;
 
 size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT;
 
-static bool extent_commit_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata, size_t offset, size_t length, bool growing_retained);
+static bool extent_commit_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    size_t offset, size_t length, bool growing_retained);
 static bool extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena,
     ehooks_t *ehooks, edata_t *edata, size_t offset, size_t length,
     bool growing_retained);
@@ -47,7 +47,7 @@ static void extent_deregister(tsdn_t *tsdn, edata_t *edata);
 static edata_t *extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t usize, size_t pad, size_t alignment,
     bool slab, szind_t szind, bool *zero, bool *commit, bool growing_retained);
-static edata_t *extent_try_coalesce(tsdn_t *tsdn, arena_t *arena,
+static edata_t *extent_try_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache,
     ehooks_t *ehooks, rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata,
     bool *coalesced, bool growing_retained);
 static void extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
@@ -167,12 +167,13 @@ extent_addr_randomize(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 }
 
 static bool
-extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata) {
+extent_try_delayed_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache,
+    ehooks_t *ehooks, rtree_ctx_t *rtree_ctx, ecache_t *ecache,
+    edata_t *edata) {
 	edata_state_set(edata, extent_state_active);
 	bool coalesced;
-	edata = extent_try_coalesce(tsdn, arena, ehooks, rtree_ctx, ecache,
-	    edata, &coalesced, false);
+	edata = extent_try_coalesce(tsdn, edata_cache, ehooks, rtree_ctx,
+	    ecache, edata, &coalesced, false);
 	edata_state_set(edata, ecache->state);
 
 	if (!coalesced) {
@@ -271,8 +272,8 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 			break;
 		}
 		/* Try to coalesce. */
-		if (extent_try_delayed_coalesce(tsdn, arena, ehooks, rtree_ctx,
-		    ecache, edata)) {
+		if (extent_try_delayed_coalesce(tsdn, &arena->edata_cache,
+		    ehooks, rtree_ctx, ecache, edata)) {
 			break;
 		}
 		/*
@@ -796,7 +797,7 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 	}
 
 	if (*commit && !edata_committed_get(edata)) {
-		if (extent_commit_impl(tsdn, arena, ehooks, edata, 0,
+		if (extent_commit_impl(tsdn, ehooks, edata, 0,
 		    edata_size_get(edata), growing_retained)) {
 			extent_record(tsdn, arena, ehooks, ecache, edata,
 			    growing_retained);
@@ -937,7 +938,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	}
 
 	if (*commit && !edata_committed_get(edata)) {
-		if (extent_commit_impl(tsdn, arena, ehooks, edata, 0,
+		if (extent_commit_impl(tsdn, ehooks, edata, 0,
 		    edata_size_get(edata), true)) {
 			extent_record(tsdn, arena, ehooks,
 			    &arena->ecache_retained, edata, true);
@@ -1098,9 +1099,9 @@ extent_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
 }
 
 static edata_t *
-extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata, bool *coalesced,
-    bool growing_retained, bool inactive_only) {
+extent_try_coalesce_impl(tsdn_t *tsdn, edata_cache_t *edata_cache,
+    ehooks_t *ehooks, rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata,
+    bool *coalesced, bool growing_retained, bool inactive_only) {
 	/*
 	 * We avoid checking / locking inactive neighbors for large size
 	 * classes, since they are eagerly coalesced on deallocation which can
@@ -1128,9 +1129,9 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 			extent_unlock_edata(tsdn, next);
 
-			if (can_coalesce && !extent_coalesce(tsdn,
-			    &arena->edata_cache, ehooks, ecache, edata, next,
-			    true, growing_retained)) {
+			if (can_coalesce && !extent_coalesce(tsdn, edata_cache,
+			    ehooks, ecache, edata, next, true,
+			    growing_retained)) {
 				if (ecache->delay_coalesce) {
 					/* Do minimal coalescing. */
 					*coalesced = true;
@@ -1148,9 +1149,9 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			    prev);
 			extent_unlock_edata(tsdn, prev);
 
-			if (can_coalesce && !extent_coalesce(tsdn,
-			    &arena->edata_cache, ehooks, ecache, edata, prev,
-			    false, growing_retained)) {
+			if (can_coalesce && !extent_coalesce(tsdn, edata_cache,
+			    ehooks, ecache, edata, prev, false,
+			    growing_retained)) {
 				edata = prev;
 				if (ecache->delay_coalesce) {
 					/* Do minimal coalescing. */
@@ -1169,19 +1170,19 @@ extent_try_coalesce_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 }
 
 static edata_t *
-extent_try_coalesce(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+extent_try_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
     rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata, bool *coalesced,
     bool growing_retained) {
-	return extent_try_coalesce_impl(tsdn, arena, ehooks, rtree_ctx, ecache,
-	    edata, coalesced, growing_retained, false);
+	return extent_try_coalesce_impl(tsdn, edata_cache, ehooks, rtree_ctx,
+	    ecache, edata, coalesced, growing_retained, false);
 }
 
 static edata_t *
-extent_try_coalesce_large(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata, bool *coalesced,
-    bool growing_retained) {
-	return extent_try_coalesce_impl(tsdn, arena, ehooks, rtree_ctx, ecache,
-	    edata, coalesced, growing_retained, true);
+extent_try_coalesce_large(tsdn_t *tsdn, edata_cache_t *edata_cache,
+    ehooks_t *ehooks, rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata,
+    bool *coalesced, bool growing_retained) {
+	return extent_try_coalesce_impl(tsdn, edata_cache, ehooks, rtree_ctx,
+	    ecache, edata, coalesced, growing_retained, true);
 }
 
 /*
@@ -1210,17 +1211,17 @@ extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 	    (uintptr_t)edata_base_get(edata), true) == edata);
 
 	if (!ecache->delay_coalesce) {
-		edata = extent_try_coalesce(tsdn, arena, ehooks, rtree_ctx,
-		    ecache, edata, NULL, growing_retained);
+		edata = extent_try_coalesce(tsdn, &arena->edata_cache, ehooks,
+		    rtree_ctx, ecache, edata, NULL, growing_retained);
 	} else if (edata_size_get(edata) >= SC_LARGE_MINCLASS) {
 		assert(ecache == &arena->ecache_dirty);
 		/* Always coalesce large extents eagerly. */
 		bool coalesced;
 		do {
 			assert(edata_state_get(edata) == extent_state_active);
-			edata = extent_try_coalesce_large(tsdn, arena, ehooks,
-			    rtree_ctx, ecache, edata, &coalesced,
-			    growing_retained);
+			edata = extent_try_coalesce_large(tsdn,
+			    &arena->edata_cache, ehooks, rtree_ctx, ecache,
+			    edata, &coalesced, growing_retained);
 		} while (coalesced);
 		if (edata_size_get(edata) >= oversize_threshold) {
 			/* Shortcut to purge the oversize extent eagerly. */
@@ -1295,7 +1296,7 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	bool zeroed;
 	if (!edata_committed_get(edata)) {
 		zeroed = true;
-	} else if (!extent_decommit_wrapper(tsdn, arena, ehooks, edata, 0,
+	} else if (!extent_decommit_wrapper(tsdn, ehooks, edata, 0,
 	    edata_size_get(edata))) {
 		zeroed = true;
 	} else if (!ehooks_purge_forced(tsdn, ehooks, edata_base_get(edata),
@@ -1339,8 +1340,8 @@ extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 }
 
 static bool
-extent_commit_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata, size_t offset, size_t length, bool growing_retained) {
+extent_commit_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    size_t offset, size_t length, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	bool err = ehooks_commit(tsdn, ehooks, edata_base_get(edata),
@@ -1350,16 +1351,15 @@ extent_commit_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 }
 
 bool
-extent_commit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata, size_t offset,
-    size_t length) {
-	return extent_commit_impl(tsdn, arena, ehooks, edata, offset, length,
+extent_commit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    size_t offset, size_t length) {
+	return extent_commit_impl(tsdn, ehooks, edata, offset, length,
 	    false);
 }
 
 bool
-extent_decommit_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata, size_t offset, size_t length) {
+extent_decommit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    size_t offset, size_t length) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 	bool err = ehooks_decommit(tsdn, ehooks, edata_base_get(edata),

From 56cc56b69214bf3dbcd64ad83aa63fe22be20d62 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 13 Dec 2019 13:52:34 -0800
Subject: [PATCH 1483/2608] Break extent split dependence on arena.

---
 include/jemalloc/internal/extent2.h |  6 +++---
 src/extent2.c                       | 29 +++++++++++++++--------------
 src/large.c                         |  2 +-
 3 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/include/jemalloc/internal/extent2.h b/include/jemalloc/internal/extent2.h
index eda31cd1..9e1f0d62 100644
--- a/include/jemalloc/internal/extent2.h
+++ b/include/jemalloc/internal/extent2.h
@@ -53,9 +53,9 @@ bool extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     edata_t *edata, size_t offset, size_t length);
 bool extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     edata_t *edata, size_t offset, size_t length);
-edata_t *extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a,
-    size_t size_b, szind_t szind_b, bool slab_b);
+edata_t *extent_split_wrapper(tsdn_t *tsdn, edata_cache_t *edata_cache,
+    ehooks_t *ehooks, edata_t *edata, size_t size_a, szind_t szind_a,
+    bool slab_a, size_t size_b, szind_t szind_b, bool slab_b);
 bool extent_merge_wrapper(tsdn_t *tsdn, ehooks_t *ehooks,
     edata_cache_t *edata_cache, edata_t *a, edata_t *b);
 
diff --git a/src/extent2.c b/src/extent2.c
index c1dfa99d..e4218c59 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -26,7 +26,7 @@ static bool extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena,
 static bool extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena,
     ehooks_t *ehooks, edata_t *edata, size_t offset, size_t length,
     bool growing_retained);
-static edata_t *extent_split_impl(tsdn_t *tsdn, arena_t *arena,
+static edata_t *extent_split_impl(tsdn_t *tsdn, edata_cache_t *edata_cache,
     ehooks_t *ehooks, edata_t *edata, size_t size_a, szind_t szind_a,
     bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
     bool growing_retained);
@@ -659,9 +659,9 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	/* Split the lead. */
 	if (leadsize != 0) {
 		*lead = *edata;
-		*edata = extent_split_impl(tsdn, arena, ehooks, *lead,
-		    leadsize, SC_NSIZES, false, esize + trailsize, szind, slab,
-		    growing_retained);
+		*edata = extent_split_impl(tsdn, &arena->edata_cache, ehooks,
+		    *lead, leadsize, SC_NSIZES, false, esize + trailsize, szind,
+		    slab, growing_retained);
 		if (*edata == NULL) {
 			*to_leak = *lead;
 			*lead = NULL;
@@ -671,8 +671,9 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 	/* Split the trail. */
 	if (trailsize != 0) {
-		*trail = extent_split_impl(tsdn, arena, ehooks, *edata, esize,
-		    szind, slab, trailsize, SC_NSIZES, false, growing_retained);
+		*trail = extent_split_impl(tsdn, &arena->edata_cache, ehooks,
+		    *edata, esize, szind, slab, trailsize, SC_NSIZES, false,
+		    growing_retained);
 		if (*trail == NULL) {
 			*to_leak = *edata;
 			*to_salvage = *lead;
@@ -1410,7 +1411,7 @@ extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
  * and returns the trail (except in case of error).
  */
 static edata_t *
-extent_split_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+extent_split_impl(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
     edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a,
     size_t size_b, szind_t szind_b, bool slab_b, bool growing_retained) {
 	assert(edata_size_get(edata) == size_a + size_b);
@@ -1421,12 +1422,12 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		return NULL;
 	}
 
-	edata_t *trail = edata_cache_get(tsdn, &arena->edata_cache);
+	edata_t *trail = edata_cache_get(tsdn, edata_cache);
 	if (trail == NULL) {
 		goto label_error_a;
 	}
 
-	edata_init(trail, arena_ind_get(arena),
+	edata_init(trail, ehooks_ind_get(ehooks),
 	    (void *)((uintptr_t)edata_base_get(edata) + size_a), size_b,
 	    slab_b, szind_b, edata_sn_get(edata), edata_state_get(edata),
 	    edata_zeroed_get(edata), edata_committed_get(edata),
@@ -1438,7 +1439,7 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	{
 		edata_t lead;
 
-		edata_init(&lead, arena_ind_get(arena),
+		edata_init(&lead, ehooks_ind_get(ehooks),
 		    edata_addr_get(edata), size_a,
 		    slab_a, szind_a, edata_sn_get(edata),
 		    edata_state_get(edata), edata_zeroed_get(edata),
@@ -1480,17 +1481,17 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 label_error_c:
 	extent_unlock_edata2(tsdn, edata, trail);
 label_error_b:
-	edata_cache_put(tsdn, &arena->edata_cache, trail);
+	edata_cache_put(tsdn, edata_cache, trail);
 label_error_a:
 	return NULL;
 }
 
 edata_t *
-extent_split_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+extent_split_wrapper(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
     edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a,
     size_t size_b, szind_t szind_b, bool slab_b) {
-	return extent_split_impl(tsdn, arena, ehooks, edata, size_a, szind_a,
-	    slab_a, size_b, szind_b, slab_b, false);
+	return extent_split_impl(tsdn, edata_cache, ehooks, edata, size_a,
+	    szind_a, slab_a, size_b, szind_b, slab_b, false);
 }
 
 static bool
diff --git a/src/large.c b/src/large.c
index 4af586db..f91fb749 100644
--- a/src/large.c
+++ b/src/large.c
@@ -104,7 +104,7 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, edata_t *edata, size_t usize) {
 
 	/* Split excess pages. */
 	if (diff != 0) {
-		edata_t *trail = extent_split_wrapper(tsdn, arena,
+		edata_t *trail = extent_split_wrapper(tsdn, &arena->edata_cache,
 		    ehooks, edata, usize + sz_large_pad, sz_size2index(usize),
 		    false, diff, SC_NSIZES, false);
 		if (trail == NULL) {

From 2f4fa80414fc9e7374f0b784e0f925aa31d0e599 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 16 Dec 2019 11:01:34 -0800
Subject: [PATCH 1484/2608] Rename extents -> ecache.

---
 include/jemalloc/internal/extent2.h |  8 ++++----
 src/arena.c                         | 23 +++++++++++------------
 src/extent2.c                       |  8 ++++----
 src/large.c                         |  6 +++---
 4 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/include/jemalloc/internal/extent2.h b/include/jemalloc/internal/extent2.h
index 9e1f0d62..fff69bb2 100644
--- a/include/jemalloc/internal/extent2.h
+++ b/include/jemalloc/internal/extent2.h
@@ -26,15 +26,15 @@ extern size_t opt_lg_extent_max_active_fit;
 
 extern rtree_t extents_rtree;
 
-edata_t *extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+edata_t *ecache_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
     bool slab, szind_t szind, bool *zero, bool *commit);
-edata_t *extents_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+edata_t *ecache_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
     bool slab, szind_t szind, bool *zero, bool *commit);
-void extents_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+void ecache_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata);
-edata_t *extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+edata_t *ecache_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, size_t npages_min);
 
 edata_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
diff --git a/src/arena.c b/src/arena.c
index e795acf2..7e1a673e 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -258,7 +258,7 @@ arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	extents_dalloc(tsdn, arena, ehooks, &arena->ecache_dirty, edata);
+	ecache_dalloc(tsdn, arena, ehooks, &arena->ecache_dirty, edata);
 	if (arena_dirty_decay_ms_get(arena) == 0) {
 		arena_decay_dirty(tsdn, arena, false, true);
 	} else {
@@ -434,17 +434,16 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	szind_t szind = sz_size2index(usize);
 	size_t mapped_add;
 	bool commit = true;
-	edata_t *edata = extents_alloc(tsdn, arena, ehooks,
-	    &arena->ecache_dirty, NULL, usize, sz_large_pad, alignment, false,
-	    szind, zero, &commit);
+	edata_t *edata = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_dirty,
+	    NULL, usize, sz_large_pad, alignment, false, szind, zero, &commit);
 	if (edata == NULL && arena_may_have_muzzy(arena)) {
-		edata = extents_alloc(tsdn, arena, ehooks, &arena->ecache_muzzy,
+		edata = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_muzzy,
 		    NULL, usize, sz_large_pad, alignment, false, szind, zero,
 		    &commit);
 	}
 	size_t size = usize + sz_large_pad;
 	if (edata == NULL) {
-		edata = extents_alloc_grow(tsdn, arena, ehooks,
+		edata = ecache_alloc_grow(tsdn, arena, ehooks,
 		    &arena->ecache_retained, NULL, usize, sz_large_pad,
 		    alignment, false, szind, zero, &commit);
 		if (config_stats) {
@@ -828,7 +827,7 @@ arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
 	size_t nstashed = 0;
 	edata_t *edata;
 	while (nstashed < npages_decay_max &&
-	    (edata = extents_evict(tsdn, arena, ehooks, ecache, npages_limit))
+	    (edata = ecache_evict(tsdn, arena, ehooks, ecache, npages_limit))
 	    != NULL) {
 		edata_list_append(decay_extents, edata);
 		nstashed += edata_size_get(edata) >> LG_PAGE;
@@ -865,7 +864,7 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			if (!all && muzzy_decay_ms != 0 &&
 			    !extent_purge_lazy_wrapper(tsdn, arena,
 			    ehooks, edata, 0, edata_size_get(edata))) {
-				extents_dalloc(tsdn, arena, ehooks,
+				ecache_dalloc(tsdn, arena, ehooks,
 				    &arena->ecache_muzzy, edata);
 				arena_background_thread_inactivity_check(tsdn,
 				    arena, is_background_thread);
@@ -1158,7 +1157,7 @@ arena_destroy_retained(tsdn_t *tsdn, arena_t *arena) {
 	 */
 	ehooks_t *ehooks = arena_get_ehooks(arena);
 	edata_t *edata;
-	while ((edata = extents_evict(tsdn, arena, ehooks,
+	while ((edata = ecache_evict(tsdn, arena, ehooks,
 	    &arena->ecache_retained, 0)) != NULL) {
 		extent_destroy_wrapper(tsdn, arena, ehooks, edata);
 	}
@@ -1211,7 +1210,7 @@ arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 	zero = false;
 	commit = true;
-	slab = extents_alloc_grow(tsdn, arena, ehooks, &arena->ecache_retained,
+	slab = ecache_alloc_grow(tsdn, arena, ehooks, &arena->ecache_retained,
 	    NULL, bin_info->slab_size, 0, PAGE, true, szind, &zero, &commit);
 
 	if (config_stats && slab != NULL) {
@@ -1232,10 +1231,10 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	szind_t szind = sz_size2index(bin_info->reg_size);
 	bool zero = false;
 	bool commit = true;
-	edata_t *slab = extents_alloc(tsdn, arena, ehooks, &arena->ecache_dirty,
+	edata_t *slab = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_dirty,
 	    NULL, bin_info->slab_size, 0, PAGE, true, binind, &zero, &commit);
 	if (slab == NULL && arena_may_have_muzzy(arena)) {
-		slab = extents_alloc(tsdn, arena, ehooks, &arena->ecache_muzzy,
+		slab = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_muzzy,
 		    NULL, bin_info->slab_size, 0, PAGE, true, binind, &zero,
 		    &commit);
 	}
diff --git a/src/extent2.c b/src/extent2.c
index e4218c59..8d78f95f 100644
--- a/src/extent2.c
+++ b/src/extent2.c
@@ -184,7 +184,7 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache,
 }
 
 edata_t *
-extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
+ecache_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
     szind_t szind, bool *zero, bool *commit) {
 	assert(size + pad != 0);
@@ -199,7 +199,7 @@ extents_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 }
 
 edata_t *
-extents_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+ecache_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
     bool slab, szind_t szind, bool *zero, bool *commit) {
 	assert(size + pad != 0);
@@ -228,7 +228,7 @@ extents_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 }
 
 void
-extents_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
+ecache_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
     edata_t *edata) {
 	assert(edata_base_get(edata) != NULL);
 	assert(edata_size_get(edata) != 0);
@@ -243,7 +243,7 @@ extents_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 }
 
 edata_t *
-extents_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
+ecache_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
     size_t npages_min) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
diff --git a/src/large.c b/src/large.c
index f91fb749..5ca09f68 100644
--- a/src/large.c
+++ b/src/large.c
@@ -149,17 +149,17 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 	bool commit = true;
 	edata_t *trail;
 	bool new_mapping;
-	if ((trail = extents_alloc(tsdn, arena, ehooks, &arena->ecache_dirty,
+	if ((trail = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_dirty,
 	    edata_past_get(edata), trailsize, 0, CACHELINE, false, SC_NSIZES,
 	    &is_zeroed_trail, &commit)) != NULL
-	    || (trail = extents_alloc(tsdn, arena, ehooks, &arena->ecache_muzzy,
+	    || (trail = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_muzzy,
 	    edata_past_get(edata), trailsize, 0, CACHELINE, false, SC_NSIZES,
 	    &is_zeroed_trail, &commit)) != NULL) {
 		if (config_stats) {
 			new_mapping = false;
 		}
 	} else {
-		if ((trail = extents_alloc_grow(tsdn, arena, ehooks,
+		if ((trail = ecache_alloc_grow(tsdn, arena, ehooks,
 		    &arena->ecache_retained, edata_past_get(edata), trailsize,
 		    0, CACHELINE, false, SC_NSIZES, &is_zeroed_trail, &commit))
 			== NULL) {

From e210ccc57ed165cc4308a09a9637f5d6e49b0dbd Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 16 Dec 2019 11:05:07 -0800
Subject: [PATCH 1485/2608] Move extent2 -> extent.

Eventually, we may fully break off the extent module; but not for some time.  If
it's going to live on in a non-transitory state, it might as well have the nicer
name.
---
 Makefile.in                                           |  2 +-
 include/jemalloc/internal/{extent2.h => extent.h}     | 11 +++--------
 .../jemalloc/internal/jemalloc_internal_inlines_b.h   |  2 +-
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj        |  2 +-
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj        |  2 +-
 src/{extent2.c => extent.c}                           |  0
 6 files changed, 7 insertions(+), 12 deletions(-)
 rename include/jemalloc/internal/{extent2.h => extent.h} (85%)
 rename src/{extent2.c => extent.c} (100%)

diff --git a/Makefile.in b/Makefile.in
index 71458487..40ba7f26 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -109,7 +109,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/edata_cache.c \
 	$(srcroot)src/ehooks.c \
 	$(srcroot)src/eset.c \
-	$(srcroot)src/extent2.c \
+	$(srcroot)src/extent.c \
 	$(srcroot)src/extent_dss.c \
 	$(srcroot)src/extent_mmap.c \
 	$(srcroot)src/hash.c \
diff --git a/include/jemalloc/internal/extent2.h b/include/jemalloc/internal/extent.h
similarity index 85%
rename from include/jemalloc/internal/extent2.h
rename to include/jemalloc/internal/extent.h
index fff69bb2..8fecee62 100644
--- a/include/jemalloc/internal/extent2.h
+++ b/include/jemalloc/internal/extent.h
@@ -1,5 +1,5 @@
-#ifndef JEMALLOC_INTERNAL_EXTENT2_H
-#define JEMALLOC_INTERNAL_EXTENT2_H
+#ifndef JEMALLOC_INTERNAL_EXTENT_H
+#define JEMALLOC_INTERNAL_EXTENT_H
 
 #include "jemalloc/internal/ecache.h"
 #include "jemalloc/internal/ehooks.h"
@@ -10,11 +10,6 @@
  * This module contains the page-level allocator.  It chooses the addresses that
  * allocations requested by other modules will inhabit, and updates the global
  * metadata to reflect allocation/deallocation/purging decisions.
- *
- * The naming ("extent2" for the module, and "extent_" or "extents_" for most of
- * the functions) is historical.  Eventually, the naming should be updated to
- * reflect the functionality.  Similarly, the utilization stats live here for no
- * particular reason.  This will also be changed, but much more immediately.
  */
 
 /*
@@ -61,4 +56,4 @@ bool extent_merge_wrapper(tsdn_t *tsdn, ehooks_t *ehooks,
 
 bool extent_boot(void);
 
-#endif /* JEMALLOC_INTERNAL_EXTENT2_H */
+#endif /* JEMALLOC_INTERNAL_EXTENT_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index 8367ee2b..ebfb331b 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -1,7 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_INLINES_B_H
 #define JEMALLOC_INTERNAL_INLINES_B_H
 
-#include "jemalloc/internal/extent2.h"
+#include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/rtree.h"
 
 /* Choose an arena based on a per-thread value. */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 7b2e84a9..58790903 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -49,7 +49,7 @@
     <ClCompile Include="..\..\..\..\src\edata_cache.c" />
     <ClCompile Include="..\..\..\..\src\ehooks.c" />
     <ClCompile Include="..\..\..\..\src\eset.c" />
-    <ClCompile Include="..\..\..\..\src\extent2.c" />
+    <ClCompile Include="..\..\..\..\src\extent.c" />
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
     <ClCompile Include="..\..\..\..\src\hash.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 338962b3..631de575 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -49,7 +49,7 @@
     <ClCompile Include="..\..\..\..\src\edata_cache.c" />
     <ClCompile Include="..\..\..\..\src\ehooks.c" />
     <ClCompile Include="..\..\..\..\src\eset.c" />
-    <ClCompile Include="..\..\..\..\src\extent2.c" />
+    <ClCompile Include="..\..\..\..\src\extent.c" />
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
     <ClCompile Include="..\..\..\..\src\hash.c" />
diff --git a/src/extent2.c b/src/extent.c
similarity index 100%
rename from src/extent2.c
rename to src/extent.c

From f2f2084e79c3546b38fb635401588afdd0560392 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 19 Dec 2019 17:15:57 -0800
Subject: [PATCH 1486/2608] Ehooks: Assert alloc isn't NULL

---
 src/ehooks.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/ehooks.c b/src/ehooks.c
index 2fb2c4c4..78c28340 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -6,6 +6,8 @@
 
 void
 ehooks_init(ehooks_t *ehooks, extent_hooks_t *extent_hooks, unsigned ind) {
+	/* All other hooks are optional; this one is not. */
+	assert(extent_hooks->alloc != NULL);
 	ehooks->ind = ind;
 	ehooks_set_extent_hooks_ptr(ehooks, extent_hooks);
 }

From 6342da0970257187f5fcc9504301eba75f92ccca Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 19 Dec 2019 17:53:52 -0800
Subject: [PATCH 1487/2608] Ehooks: Further optimize default merge case.

This avoids the cost of an iealloc in cases where the user uses the default
merge hook without using the default extent hooks.
---
 include/jemalloc/internal/ehooks.h | 19 ++++++++++++++++++-
 src/ehooks.c                       |  2 +-
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index 4d183e0b..1bd44cb8 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -54,6 +54,13 @@ bool ehooks_default_purge_lazy_impl(void *addr, size_t offset, size_t length);
 bool ehooks_default_purge_forced_impl(void *addr, size_t offset, size_t length);
 #endif
 bool ehooks_default_split_impl();
+/*
+ * Merge is the only default extent hook we declare -- see the comment in
+ * ehooks_merge.
+ */
+bool ehooks_default_merge(extent_hooks_t *extent_hooks, void *addr_a,
+    size_t size_a, void *addr_b, size_t size_b, bool committed,
+    unsigned arena_ind);
 bool ehooks_default_merge_impl(tsdn_t *tsdn, void *addr_a, bool head_a,
     void *addr_b, bool head_b);
 void ehooks_default_zero_impl(void *addr, size_t size);
@@ -333,7 +340,17 @@ static inline bool
 ehooks_merge(tsdn_t *tsdn, ehooks_t *ehooks, void *addr_a, size_t size_a,
     bool head_a, void *addr_b, size_t size_b, bool head_b, bool committed) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
-	if (extent_hooks == &ehooks_default_extent_hooks) {
+	/*
+	 * The definition of extent_hooks merge function doesn't know about
+	 * extent head state, but the implementation does.  As a result, it
+	 * needs to call iealloc again and walk the rtree.  Since the cost of an
+	 * iealloc is large relative to the cost of the default merge hook
+	 * (which on posix-likes is just "return false"), we go even further
+	 * when we short-circuit; we don't just check if the extent hooks
+	 * generally are default, we check if the merge hook specifically is.
+	 */
+	if (extent_hooks == &ehooks_default_extent_hooks
+	    || extent_hooks->merge == &ehooks_default_merge) {
 		return ehooks_default_merge_impl(tsdn, addr_a, head_a, addr_b,
 		    head_b);
 	} else if (extent_hooks->merge == NULL) {
diff --git a/src/ehooks.c b/src/ehooks.c
index 78c28340..667bee84 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -242,7 +242,7 @@ ehooks_default_merge_impl(tsdn_t *tsdn, void *addr_a, bool head_a, void *addr_b,
 	return false;
 }
 
-static bool
+bool
 ehooks_default_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
     void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
 	tsdn_t *tsdn = tsdn_fetch();

From ea42174d07c2cf496e407bfae74be866ee090b2f Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 18 Dec 2019 15:15:31 -0800
Subject: [PATCH 1488/2608] Refactor profiling headers

---
 include/jemalloc/internal/prof_data_externs.h | 22 +++++++++++++
 include/jemalloc/internal/prof_externs.h      | 32 +++----------------
 include/jemalloc/internal/prof_log_externs.h  | 17 ++++++++++
 src/prof.c                                    | 32 ++-----------------
 src/prof_data.c                               | 31 ++++++++++++++++++
 src/prof_log.c                                |  4 ++-
 test/unit/prof_log.c                          |  1 +
 7 files changed, 80 insertions(+), 59 deletions(-)
 create mode 100644 include/jemalloc/internal/prof_data_externs.h
 create mode 100644 include/jemalloc/internal/prof_log_externs.h

diff --git a/include/jemalloc/internal/prof_data_externs.h b/include/jemalloc/internal/prof_data_externs.h
new file mode 100644
index 00000000..95dc6b0b
--- /dev/null
+++ b/include/jemalloc/internal/prof_data_externs.h
@@ -0,0 +1,22 @@
+#ifndef JEMALLOC_INTERNAL_PROF_DATA_EXTERNS_H
+#define JEMALLOC_INTERNAL_PROF_DATA_EXTERNS_H
+
+#include "jemalloc/internal/mutex.h"
+
+extern malloc_mutex_t *gctx_locks;
+extern malloc_mutex_t *tdata_locks;
+
+void prof_bt_hash(const void *key, size_t r_hash[2]);
+bool prof_bt_keycomp(const void *k1, const void *k2);
+
+bool prof_data_init(tsd_t *tsd);
+bool prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
+    bool leakcheck);
+prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
+    uint64_t thr_discrim, char *thread_name, bool active, bool reset_interval);
+void prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata);
+void bt_init(prof_bt_t *bt, void **vec);
+void prof_backtrace(tsd_t *tsd, prof_bt_t *bt);
+void prof_tctx_try_destroy(tsd_t *tsd, prof_tctx_t *tctx);
+
+#endif /* JEMALLOC_INTERNAL_PROF_DATA_EXTERNS_H */
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index bd73a296..9ba363bf 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -7,9 +7,6 @@ extern malloc_mutex_t bt2gctx_mtx;
 extern malloc_mutex_t tdatas_mtx;
 extern malloc_mutex_t prof_dump_mtx;
 
-malloc_mutex_t *prof_gctx_mutex_choose(void);
-malloc_mutex_t *prof_tdata_mutex_choose(uint64_t thr_uid);
-
 extern bool opt_prof;
 extern bool opt_prof_active;
 extern bool opt_prof_thread_active_init;
@@ -48,12 +45,14 @@ extern bool prof_booted;
 bool prof_idump_accum_impl(tsdn_t *tsdn, uint64_t accumbytes);
 void prof_idump_rollback_impl(tsdn_t *tsdn, size_t usize);
 
+/* Functions only accessed in prof_inlines_b.h */
+prof_tdata_t *prof_tdata_init(tsd_t *tsd);
+prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
+
 void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
 void prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
     prof_tctx_t *tctx);
 void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info);
-void bt_init(prof_bt_t *bt, void **vec);
-void prof_backtrace(tsd_t *tsd, prof_bt_t *bt);
 prof_tctx_t *prof_tctx_create(tsd_t *tsd);
 #ifdef JEMALLOC_JET
 size_t prof_tdata_count(void);
@@ -76,10 +75,6 @@ bool prof_mdump(tsd_t *tsd, const char *filename);
 void prof_gdump(tsdn_t *tsdn);
 bool prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix);
 
-void prof_bt_hash(const void *key, size_t r_hash[2]);
-bool prof_bt_keycomp(const void *k1, const void *k2);
-prof_tdata_t *prof_tdata_init(tsd_t *tsd);
-prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
 void prof_reset(tsd_t *tsd, size_t lg_sample);
 void prof_tdata_cleanup(tsd_t *tsd);
 bool prof_active_get(tsdn_t *tsdn);
@@ -101,26 +96,7 @@ void prof_postfork_parent(tsdn_t *tsdn);
 void prof_postfork_child(tsdn_t *tsdn);
 void prof_sample_threshold_update(tsd_t *tsd);
 
-void prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info);
 bool prof_log_start(tsdn_t *tsdn, const char *filename);
 bool prof_log_stop(tsdn_t *tsdn);
-bool prof_log_init(tsd_t *tsdn);
-#ifdef JEMALLOC_JET
-size_t prof_log_bt_count(void);
-size_t prof_log_alloc_count(void);
-size_t prof_log_thr_count(void);
-bool prof_log_is_logging(void);
-bool prof_log_rep_check(void);
-void prof_log_dummy_set(bool new_value);
-#endif
-
-/* Functions in prof_data.c only used in profiling code. */
-bool prof_data_init(tsd_t *tsd);
-bool prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
-    bool leakcheck);
-prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
-    uint64_t thr_discrim, char *thread_name, bool active, bool reset_interval);
-void prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata);
-void prof_tctx_try_destroy(tsd_t *tsd, prof_tctx_t *tctx);
 
 #endif /* JEMALLOC_INTERNAL_PROF_EXTERNS_H */
diff --git a/include/jemalloc/internal/prof_log_externs.h b/include/jemalloc/internal/prof_log_externs.h
new file mode 100644
index 00000000..cde651b5
--- /dev/null
+++ b/include/jemalloc/internal/prof_log_externs.h
@@ -0,0 +1,17 @@
+#ifndef JEMALLOC_INTERNAL_PROF_LOG_EXTERNS_H
+#define JEMALLOC_INTERNAL_PROF_LOG_EXTERNS_H
+
+#include "jemalloc/internal/mutex.h"
+
+void prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info);
+bool prof_log_init(tsd_t *tsdn);
+#ifdef JEMALLOC_JET
+size_t prof_log_bt_count(void);
+size_t prof_log_alloc_count(void);
+size_t prof_log_thr_count(void);
+bool prof_log_is_logging(void);
+bool prof_log_rep_check(void);
+void prof_log_dummy_set(bool new_value);
+#endif
+
+#endif /* JEMALLOC_INTERNAL_PROF_LOG_EXTERNS_H */
diff --git a/src/prof.c b/src/prof.c
index 3a72e9c4..58839bc4 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -5,6 +5,8 @@
 #include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/prof_data_externs.h"
+#include "jemalloc/internal/prof_log_externs.h"
 #include "jemalloc/internal/thread_event.h"
 
 /*
@@ -73,24 +75,6 @@ uint64_t prof_interval = 0;
 
 size_t lg_prof_sample;
 
-/*
- * Table of mutexes that are shared among gctx's.  These are leaf locks, so
- * there is no problem with using them for more than one gctx at the same time.
- * The primary motivation for this sharing though is that gctx's are ephemeral,
- * and destroying mutexes causes complications for systems that allocate when
- * creating/destroying mutexes.
- */
-static malloc_mutex_t *gctx_locks;
-static atomic_u_t cum_gctxs; /* Atomic counter. */
-
-/*
- * Table of mutexes that are shared among tdata's.  No operations require
- * holding multiple tdata locks, so there is no problem with using them for more
- * than one tdata at the same time, even though a gctx lock may be acquired
- * while holding a tdata lock.
- */
-static malloc_mutex_t *tdata_locks;
-
 /* Non static to enable profiling. */
 malloc_mutex_t bt2gctx_mtx;
 
@@ -431,18 +415,6 @@ prof_backtrace(tsd_t *tsd, prof_bt_t *bt) {
 	post_reentrancy(tsd);
 }
 
-malloc_mutex_t *
-prof_gctx_mutex_choose(void) {
-	unsigned ngctxs = atomic_fetch_add_u(&cum_gctxs, 1, ATOMIC_RELAXED);
-
-	return &gctx_locks[(ngctxs - 1) % PROF_NCTX_LOCKS];
-}
-
-malloc_mutex_t *
-prof_tdata_mutex_choose(uint64_t thr_uid) {
-	return &tdata_locks[thr_uid % PROF_NTDATA_LOCKS];
-}
-
 /*
  * The bodies of this function and prof_leakcheck() are compiled out unless heap
  * profiling is enabled, so that it is possible to compile jemalloc with
diff --git a/src/prof_data.c b/src/prof_data.c
index 8a2cc845..5c2b926b 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -6,6 +6,7 @@
 #include "jemalloc/internal/ckh.h"
 #include "jemalloc/internal/hash.h"
 #include "jemalloc/internal/malloc_io.h"
+#include "jemalloc/internal/prof_data_externs.h"
 
 /*
  * This file defines and manages the core profiling data structures.
@@ -25,6 +26,24 @@
 
 /******************************************************************************/
 
+/*
+ * Table of mutexes that are shared among gctx's.  These are leaf locks, so
+ * there is no problem with using them for more than one gctx at the same time.
+ * The primary motivation for this sharing though is that gctx's are ephemeral,
+ * and destroying mutexes causes complications for systems that allocate when
+ * creating/destroying mutexes.
+ */
+malloc_mutex_t *gctx_locks;
+static atomic_u_t cum_gctxs; /* Atomic counter. */
+
+/*
+ * Table of mutexes that are shared among tdata's.  No operations require
+ * holding multiple tdata locks, so there is no problem with using them for more
+ * than one tdata at the same time, even though a gctx lock may be acquired
+ * while holding a tdata lock.
+ */
+malloc_mutex_t *tdata_locks;
+
 /*
  * Global hash of (prof_bt_t *)-->(prof_gctx_t *).  This is the master data
  * structure that knows about all backtraces currently captured.
@@ -114,6 +133,18 @@ rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link,
 
 /******************************************************************************/
 
+static malloc_mutex_t *
+prof_gctx_mutex_choose(void) {
+	unsigned ngctxs = atomic_fetch_add_u(&cum_gctxs, 1, ATOMIC_RELAXED);
+
+	return &gctx_locks[(ngctxs - 1) % PROF_NCTX_LOCKS];
+}
+
+static malloc_mutex_t *
+prof_tdata_mutex_choose(uint64_t thr_uid) {
+	return &tdata_locks[thr_uid % PROF_NTDATA_LOCKS];
+}
+
 bool
 prof_data_init(tsd_t *tsd) {
 	tdata_tree_new(&tdatas);
diff --git a/src/prof_log.c b/src/prof_log.c
index 2904f0c6..6ac81e07 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -4,10 +4,12 @@
 
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/ckh.h"
+#include "jemalloc/internal/emitter.h"
 #include "jemalloc/internal/hash.h"
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/emitter.h"
+#include "jemalloc/internal/prof_data_externs.h"
+#include "jemalloc/internal/prof_log_externs.h"
 
 bool opt_prof_log = false;
 typedef enum prof_logging_state_e prof_logging_state_t;
diff --git a/test/unit/prof_log.c b/test/unit/prof_log.c
index 9336ebca..e816d4e6 100644
--- a/test/unit/prof_log.c
+++ b/test/unit/prof_log.c
@@ -1,4 +1,5 @@
 #include "test/jemalloc_test.h"
+#include "jemalloc/internal/prof_log_externs.h"
 
 #define N_PARAM 100
 #define N_THREADS 10

From 112dc36dd5cf3fc24e1bd9beda61b48cb1d6e9e3 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 20 Dec 2019 10:38:05 -0800
Subject: [PATCH 1489/2608] Handle log_mtx during forking

---
 include/jemalloc/internal/prof_log_externs.h | 2 ++
 src/prof.c                                   | 3 +++
 src/prof_log.c                               | 2 +-
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/prof_log_externs.h b/include/jemalloc/internal/prof_log_externs.h
index cde651b5..c8cc5a3e 100644
--- a/include/jemalloc/internal/prof_log_externs.h
+++ b/include/jemalloc/internal/prof_log_externs.h
@@ -3,6 +3,8 @@
 
 #include "jemalloc/internal/mutex.h"
 
+extern malloc_mutex_t log_mtx;
+
 void prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info);
 bool prof_log_init(tsd_t *tsdn);
 #ifdef JEMALLOC_JET
diff --git a/src/prof.c b/src/prof.c
index 58839bc4..f35bba99 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -1119,6 +1119,7 @@ prof_prefork0(tsdn_t *tsdn) {
 		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
 			malloc_mutex_prefork(tsdn, &tdata_locks[i]);
 		}
+		malloc_mutex_prefork(tsdn, &log_mtx);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
 			malloc_mutex_prefork(tsdn, &gctx_locks[i]);
 		}
@@ -1150,6 +1151,7 @@ prof_postfork_parent(tsdn_t *tsdn) {
 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
 			malloc_mutex_postfork_parent(tsdn, &gctx_locks[i]);
 		}
+		malloc_mutex_postfork_parent(tsdn, &log_mtx);
 		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
 			malloc_mutex_postfork_parent(tsdn, &tdata_locks[i]);
 		}
@@ -1172,6 +1174,7 @@ prof_postfork_child(tsdn_t *tsdn) {
 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
 			malloc_mutex_postfork_child(tsdn, &gctx_locks[i]);
 		}
+		malloc_mutex_postfork_child(tsdn, &log_mtx);
 		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
 			malloc_mutex_postfork_child(tsdn, &tdata_locks[i]);
 		}
diff --git a/src/prof_log.c b/src/prof_log.c
index 6ac81e07..9411b98c 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -104,7 +104,7 @@ static prof_alloc_node_t *log_alloc_first = NULL;
 static prof_alloc_node_t *log_alloc_last = NULL;
 
 /* Protects the prof_logging_state and any log_{...} variable. */
-static malloc_mutex_t log_mtx;
+malloc_mutex_t log_mtx;
 
 /******************************************************************************/
 /*

From 3fa142cf394d39f36d4bf7564251071f13527e4f Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Sun, 22 Dec 2019 20:02:28 -0800
Subject: [PATCH 1490/2608] Remove _externs from prof internal header names

---
 .../jemalloc/internal/{prof_data_externs.h => prof_data.h}    | 0
 include/jemalloc/internal/{prof_log_externs.h => prof_log.h}  | 0
 src/prof.c                                                    | 4 ++--
 src/prof_data.c                                               | 2 +-
 src/prof_log.c                                                | 4 ++--
 test/unit/prof_log.c                                          | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)
 rename include/jemalloc/internal/{prof_data_externs.h => prof_data.h} (100%)
 rename include/jemalloc/internal/{prof_log_externs.h => prof_log.h} (100%)

diff --git a/include/jemalloc/internal/prof_data_externs.h b/include/jemalloc/internal/prof_data.h
similarity index 100%
rename from include/jemalloc/internal/prof_data_externs.h
rename to include/jemalloc/internal/prof_data.h
diff --git a/include/jemalloc/internal/prof_log_externs.h b/include/jemalloc/internal/prof_log.h
similarity index 100%
rename from include/jemalloc/internal/prof_log_externs.h
rename to include/jemalloc/internal/prof_log.h
diff --git a/src/prof.c b/src/prof.c
index f35bba99..33b68198 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -5,8 +5,8 @@
 #include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/prof_data_externs.h"
-#include "jemalloc/internal/prof_log_externs.h"
+#include "jemalloc/internal/prof_data.h"
+#include "jemalloc/internal/prof_log.h"
 #include "jemalloc/internal/thread_event.h"
 
 /*
diff --git a/src/prof_data.c b/src/prof_data.c
index 5c2b926b..690070e6 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -6,7 +6,7 @@
 #include "jemalloc/internal/ckh.h"
 #include "jemalloc/internal/hash.h"
 #include "jemalloc/internal/malloc_io.h"
-#include "jemalloc/internal/prof_data_externs.h"
+#include "jemalloc/internal/prof_data.h"
 
 /*
  * This file defines and manages the core profiling data structures.
diff --git a/src/prof_log.c b/src/prof_log.c
index 9411b98c..11de4363 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -8,8 +8,8 @@
 #include "jemalloc/internal/hash.h"
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/prof_data_externs.h"
-#include "jemalloc/internal/prof_log_externs.h"
+#include "jemalloc/internal/prof_data.h"
+#include "jemalloc/internal/prof_log.h"
 
 bool opt_prof_log = false;
 typedef enum prof_logging_state_e prof_logging_state_t;
diff --git a/test/unit/prof_log.c b/test/unit/prof_log.c
index e816d4e6..4b14fd56 100644
--- a/test/unit/prof_log.c
+++ b/test/unit/prof_log.c
@@ -1,5 +1,5 @@
 #include "test/jemalloc_test.h"
-#include "jemalloc/internal/prof_log_externs.h"
+#include "jemalloc/internal/prof_log.h"
 
 #define N_PARAM 100
 #define N_THREADS 10

From e98ddf7987b8e9556c269ca0829f438151b124b7 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 24 Dec 2019 11:30:23 -0800
Subject: [PATCH 1491/2608] Fix unlikely condition in arena_prof_info_get()

---
 include/jemalloc/internal/arena_inlines_b.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 6dacab33..28f2e97f 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -49,7 +49,7 @@ arena_prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
 	if (alloc_ctx == NULL) {
 		edata = iealloc(tsd_tsdn(tsd), ptr);
 		is_slab = edata_slab_get(edata);
-	} else if (!unlikely(is_slab = alloc_ctx->slab)) {
+	} else if (unlikely(!(is_slab = alloc_ctx->slab))) {
 		edata = iealloc(tsd_tsdn(tsd), ptr);
 	}
 

From 7a27a05940d8eb0afc6ddbe32b420ce9e1452b91 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 26 Dec 2019 15:28:04 -0800
Subject: [PATCH 1492/2608] Delete tdata states used for cleanup

---
 include/jemalloc/internal/prof_inlines_b.h | 2 +-
 include/jemalloc/internal/prof_types.h     | 8 --------
 2 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 193ede71..186446bb 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -88,7 +88,7 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update) {
 	}
 
 	prof_tdata_t *tdata = prof_tdata_get(tsd, true);
-	if (unlikely((uintptr_t)tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)) {
+	if (unlikely(tdata == NULL)) {
 		return true;
 	}
 
diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h
index 7a34385b..ad095da3 100644
--- a/include/jemalloc/internal/prof_types.h
+++ b/include/jemalloc/internal/prof_types.h
@@ -46,14 +46,6 @@ typedef struct prof_tdata_s prof_tdata_t;
  */
 #define PROF_NTDATA_LOCKS		256
 
-/*
- * prof_tdata pointers close to NULL are used to encode state information that
- * is used for cleaning up during thread shutdown.
- */
-#define PROF_TDATA_STATE_REINCARNATED	((prof_tdata_t *)(uintptr_t)1)
-#define PROF_TDATA_STATE_PURGATORY	((prof_tdata_t *)(uintptr_t)2)
-#define PROF_TDATA_STATE_MAX		PROF_TDATA_STATE_PURGATORY
-
 /* Minimize memory bloat for non-prof builds. */
 #ifdef JEMALLOC_PROF
 #define PROF_DUMP_FILENAME_LEN (PATH_MAX + 1)

From 9a60cf54ec4b825a692330a1c56932fa1b121e27 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 18 Dec 2019 13:38:14 -0800
Subject: [PATCH 1493/2608] Last-N profiling mode

---
 Makefile.in                                   |   2 +
 include/jemalloc/internal/arena_inlines_b.h   |  16 +-
 include/jemalloc/internal/edata.h             |  53 +-
 include/jemalloc/internal/large_externs.h     |   3 +-
 include/jemalloc/internal/nstime.h            |  10 +-
 include/jemalloc/internal/prof_externs.h      |   9 +
 include/jemalloc/internal/prof_inlines_b.h    |  20 +-
 include/jemalloc/internal/prof_recent.h       |  16 +
 include/jemalloc/internal/prof_structs.h      |  22 +-
 include/jemalloc/internal/prof_types.h        |   4 +
 include/jemalloc/internal/witness.h           |   1 +
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |   3 +
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |   3 +
 src/ctl.c                                     |  45 +-
 src/extent.c                                  |   2 +
 src/jemalloc.c                                |  48 +-
 src/large.c                                   |  20 +-
 src/prof.c                                    |  16 +-
 src/prof_data.c                               |  15 +-
 src/prof_recent.c                             | 553 ++++++++++++++++++
 test/unit/mallctl.c                           |   1 +
 test/unit/prof_recent.c                       | 391 +++++++++++++
 test/unit/prof_recent.sh                      |   5 +
 test/unit/prof_reset.sh                       |   2 +-
 26 files changed, 1218 insertions(+), 44 deletions(-)
 create mode 100644 include/jemalloc/internal/prof_recent.h
 create mode 100644 src/prof_recent.c
 create mode 100644 test/unit/prof_recent.c
 create mode 100644 test/unit/prof_recent.sh

diff --git a/Makefile.in b/Makefile.in
index 40ba7f26..ad54720e 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -126,6 +126,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/prof.c \
 	$(srcroot)src/prof_data.c \
 	$(srcroot)src/prof_log.c \
+	$(srcroot)src/prof_recent.c \
 	$(srcroot)src/rtree.c \
 	$(srcroot)src/safety_check.c \
 	$(srcroot)src/sc.c \
@@ -216,6 +217,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/prof_gdump.c \
 	$(srcroot)test/unit/prof_idump.c \
 	$(srcroot)test/unit/prof_log.c \
+	$(srcroot)test/unit/prof_recent.c \
 	$(srcroot)test/unit/prof_reset.c \
 	$(srcroot)test/unit/prof_tctx.c \
 	$(srcroot)test/unit/prof_thread_name.c \
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 28f2e97f..a310eb29 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -37,12 +37,12 @@ arena_choose_maybe_huge(tsd_t *tsd, arena_t *arena, size_t size) {
 
 JEMALLOC_ALWAYS_INLINE void
 arena_prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
-    prof_info_t *prof_info) {
+    prof_info_t *prof_info, bool reset_recent) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 	assert(prof_info != NULL);
 
-	const edata_t *edata;
+	edata_t *edata = NULL;
 	bool is_slab;
 
 	/* Static check. */
@@ -55,10 +55,14 @@ arena_prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
 
 	if (unlikely(!is_slab)) {
 		/* edata must have been initialized at this point. */
-		large_prof_info_get(edata, prof_info);
+		assert(edata != NULL);
+		large_prof_info_get(tsd, edata, prof_info, reset_recent);
 	} else {
-		memset(prof_info, 0, sizeof(prof_info_t));
 		prof_info->alloc_tctx = (prof_tctx_t *)(uintptr_t)1U;
+		/*
+		 * No need to set other fields in prof_info; they will never be
+		 * accessed if (uintptr_t)alloc_tctx == (uintptr_t)1U.
+		 */
 	}
 }
 
@@ -92,11 +96,9 @@ arena_prof_tctx_reset_sampled(tsd_t *tsd, const void *ptr) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_prof_info_set(tsd_t *tsd, const void *ptr, prof_tctx_t *tctx) {
+arena_prof_info_set(tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx) {
 	cassert(config_prof);
-	assert(ptr != NULL);
 
-	edata_t *edata = iealloc(tsd_tsdn(tsd), ptr);
 	assert(!edata_slab_get(edata));
 	large_prof_info_set(edata, tctx);
 }
diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index 86f5ac57..2a81bdc6 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -25,6 +25,20 @@ enum extent_head_state_e {
 };
 typedef enum extent_head_state_e extent_head_state_t;
 
+struct e_prof_info_s {
+	/* Time when this was allocated. */
+	nstime_t	e_prof_alloc_time;
+	/* Points to a prof_tctx_t. */
+	atomic_p_t	e_prof_tctx;
+	/*
+	 * Points to a prof_recent_t for the allocation; NULL
+	 * means the recent allocation record no longer exists.
+	 * Protected by prof_recent_alloc_mtx.
+	 */
+	atomic_p_t	e_prof_recent_alloc;
+};
+typedef struct e_prof_info_s e_prof_info_t;
+
 /* Extent (span of pages).  Use accessor functions for e_* fields. */
 typedef struct edata_s edata_t;
 typedef ql_head(edata_t) edata_list_t;
@@ -186,12 +200,7 @@ struct edata_s {
 		slab_data_t	e_slab_data;
 
 		/* Profiling data, used for large objects. */
-		struct {
-			/* Time when this was allocated. */
-			nstime_t		e_alloc_time;
-			/* Points to a prof_tctx_t. */
-			atomic_p_t		e_prof_tctx;
-		};
+		e_prof_info_t	e_prof_info;
 	};
 };
 
@@ -333,12 +342,21 @@ edata_slab_data_get_const(const edata_t *edata) {
 	return &edata->e_slab_data;
 }
 
-static inline void
-edata_prof_info_get(const edata_t *edata, prof_info_t *prof_info) {
-	assert(prof_info != NULL);
-	prof_info->alloc_tctx = (prof_tctx_t *)atomic_load_p(
-	    &edata->e_prof_tctx, ATOMIC_ACQUIRE);
-	prof_info->alloc_time = edata->e_alloc_time;
+static inline prof_tctx_t *
+edata_prof_tctx_get(const edata_t *edata) {
+	return (prof_tctx_t *)atomic_load_p(&edata->e_prof_info.e_prof_tctx,
+	    ATOMIC_ACQUIRE);
+}
+
+static inline const nstime_t *
+edata_prof_alloc_time_get(const edata_t *edata) {
+	return &edata->e_prof_info.e_prof_alloc_time;
+}
+
+static inline prof_recent_t *
+edata_prof_recent_alloc_get_dont_call_directly(const edata_t *edata) {
+	return (prof_recent_t *)atomic_load_p(
+	    &edata->e_prof_info.e_prof_recent_alloc, ATOMIC_RELAXED);
 }
 
 static inline void
@@ -457,12 +475,19 @@ edata_slab_set(edata_t *edata, bool slab) {
 
 static inline void
 edata_prof_tctx_set(edata_t *edata, prof_tctx_t *tctx) {
-	atomic_store_p(&edata->e_prof_tctx, tctx, ATOMIC_RELEASE);
+	atomic_store_p(&edata->e_prof_info.e_prof_tctx, tctx, ATOMIC_RELEASE);
 }
 
 static inline void
 edata_prof_alloc_time_set(edata_t *edata, nstime_t *t) {
-	nstime_copy(&edata->e_alloc_time, t);
+	nstime_copy(&edata->e_prof_info.e_prof_alloc_time, t);
+}
+
+static inline void
+edata_prof_recent_alloc_set_dont_call_directly(edata_t *edata,
+    prof_recent_t *recent_alloc) {
+	atomic_store_p(&edata->e_prof_info.e_prof_recent_alloc, recent_alloc,
+	    ATOMIC_RELAXED);
 }
 
 static inline bool
diff --git a/include/jemalloc/internal/large_externs.h b/include/jemalloc/internal/large_externs.h
index fe5e606b..05e6c442 100644
--- a/include/jemalloc/internal/large_externs.h
+++ b/include/jemalloc/internal/large_externs.h
@@ -22,7 +22,8 @@ void large_dalloc_prep_junked_locked(tsdn_t *tsdn, edata_t *edata);
 void large_dalloc_finish(tsdn_t *tsdn, edata_t *edata);
 void large_dalloc(tsdn_t *tsdn, edata_t *edata);
 size_t large_salloc(tsdn_t *tsdn, const edata_t *edata);
-void large_prof_info_get(const edata_t *edata, prof_info_t *prof_info);
+void large_prof_info_get(tsd_t *tsd, edata_t *edata, prof_info_t *prof_info,
+    bool reset_recent);
 void large_prof_tctx_reset(edata_t *edata);
 void large_prof_info_set(edata_t *edata, prof_tctx_t *tctx);
 
diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h
index a3766ff2..c4bee24d 100644
--- a/include/jemalloc/internal/nstime.h
+++ b/include/jemalloc/internal/nstime.h
@@ -9,6 +9,8 @@ typedef struct {
 	uint64_t ns;
 } nstime_t;
 
+static const nstime_t zero = NSTIME_ZERO_INITIALIZER;
+
 void nstime_init(nstime_t *time, uint64_t ns);
 void nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec);
 uint64_t nstime_ns(const nstime_t *time);
@@ -35,8 +37,14 @@ bool nstime_init_update(nstime_t *time);
 
 JEMALLOC_ALWAYS_INLINE void
 nstime_init_zero(nstime_t *time) {
-	static const nstime_t zero = NSTIME_ZERO_INITIALIZER;
 	nstime_copy(time, &zero);
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+nstime_equals_zero(nstime_t *time) {
+	int diff = nstime_compare(time, &zero);
+	assert(diff >= 0);
+	return diff == 0;
+}
+
 #endif /* JEMALLOC_INTERNAL_NSTIME_H */
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 9ba363bf..a07fd22b 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -24,6 +24,10 @@ extern char opt_prof_prefix[
 #endif
     1];
 
+/* For recording recent allocations */
+extern ssize_t opt_prof_recent_alloc_max;
+extern malloc_mutex_t prof_recent_alloc_mtx;
+
 /* Accessed via prof_active_[gs]et{_unlocked,}(). */
 extern bool prof_active;
 
@@ -99,4 +103,9 @@ void prof_sample_threshold_update(tsd_t *tsd);
 bool prof_log_start(tsdn_t *tsdn, const char *filename);
 bool prof_log_stop(tsdn_t *tsdn);
 
+ssize_t prof_recent_alloc_max_ctl_read();
+ssize_t prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max);
+void prof_recent_alloc_dump(tsd_t *tsd, void (*write_cb)(void *, const char *),
+    void *cbopaque);
+
 #endif /* JEMALLOC_INTERNAL_PROF_EXTERNS_H */
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 186446bb..9ea0454c 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -46,7 +46,17 @@ prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
 	assert(ptr != NULL);
 	assert(prof_info != NULL);
 
-	arena_prof_info_get(tsd, ptr, alloc_ctx, prof_info);
+	arena_prof_info_get(tsd, ptr, alloc_ctx, prof_info, false);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_info_get_and_reset_recent(tsd_t *tsd, const void *ptr,
+    alloc_ctx_t *alloc_ctx, prof_info_t *prof_info) {
+	cassert(config_prof);
+	assert(ptr != NULL);
+	assert(prof_info != NULL);
+
+	arena_prof_info_get(tsd, ptr, alloc_ctx, prof_info, true);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -66,12 +76,12 @@ prof_tctx_reset_sampled(tsd_t *tsd, const void *ptr) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_info_set(tsd_t *tsd, const void *ptr, prof_tctx_t *tctx) {
+prof_info_set(tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx) {
 	cassert(config_prof);
-	assert(ptr != NULL);
+	assert(edata != NULL);
 	assert((uintptr_t)tctx > (uintptr_t)1U);
 
-	arena_prof_info_set(tsd, ptr, tctx);
+	arena_prof_info_set(tsd, edata, tctx);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -190,7 +200,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 JEMALLOC_ALWAYS_INLINE void
 prof_free(tsd_t *tsd, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx) {
 	prof_info_t prof_info;
-	prof_info_get(tsd, ptr, alloc_ctx, &prof_info);
+	prof_info_get_and_reset_recent(tsd, ptr, alloc_ctx, &prof_info);
 
 	cassert(config_prof);
 	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
diff --git a/include/jemalloc/internal/prof_recent.h b/include/jemalloc/internal/prof_recent.h
new file mode 100644
index 00000000..d0e9e1e1
--- /dev/null
+++ b/include/jemalloc/internal/prof_recent.h
@@ -0,0 +1,16 @@
+#ifndef JEMALLOC_INTERNAL_PROF_RECENT_EXTERNS_H
+#define JEMALLOC_INTERNAL_PROF_RECENT_EXTERNS_H
+
+bool prof_recent_alloc_prepare(tsd_t *tsd, prof_tctx_t *tctx);
+void prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t usize);
+void prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata);
+bool prof_recent_init();
+void edata_prof_recent_alloc_init(edata_t *edata);
+#ifdef JEMALLOC_JET
+prof_recent_t *prof_recent_alloc_begin(tsd_t *tsd);
+prof_recent_t *prof_recent_alloc_end(tsd_t *tsd);
+prof_recent_t *prof_recent_alloc_next(tsd_t *tsd, prof_recent_t *node);
+prof_recent_t *edata_prof_recent_alloc_get(tsd_t *tsd, const edata_t *edata);
+#endif
+
+#endif /* JEMALLOC_INTERNAL_PROF_RECENT_EXTERNS_H */
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index 6223adc8..59c0f4ff 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_PROF_STRUCTS_H
 
 #include "jemalloc/internal/ckh.h"
+#include "jemalloc/internal/edata.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/rb.h"
@@ -55,6 +56,12 @@ struct prof_tctx_s {
 	uint64_t		thr_uid;
 	uint64_t		thr_discrim;
 
+	/*
+	 * Reference count of how many times this tctx object is referenced in
+	 * recent allocation / deallocation records, protected by tdata->lock.
+	 */
+	uint64_t		recent_count;
+
 	/* Profiling counters, protected by tdata->lock. */
 	prof_cnt_t		cnts;
 
@@ -97,10 +104,10 @@ struct prof_tctx_s {
 typedef rb_tree(prof_tctx_t) prof_tctx_tree_t;
 
 struct prof_info_s {
-	/* Points to the prof_tctx_t corresponding to the allocation. */
-	prof_tctx_t		*alloc_tctx;
 	/* Time when the allocation was made. */
 	nstime_t		alloc_time;
+	/* Points to the prof_tctx_t corresponding to the allocation. */
+	prof_tctx_t		*alloc_tctx;
 };
 
 struct prof_gctx_s {
@@ -201,4 +208,15 @@ struct prof_tdata_s {
 };
 typedef rb_tree(prof_tdata_t) prof_tdata_tree_t;
 
+struct prof_recent_s {
+	nstime_t alloc_time;
+	nstime_t dalloc_time;
+
+	prof_recent_t *next;
+	size_t usize;
+	prof_tctx_t *alloc_tctx;
+	edata_t *alloc_edata; /* NULL means allocation has been freed. */
+	prof_tctx_t *dalloc_tctx;
+};
+
 #endif /* JEMALLOC_INTERNAL_PROF_STRUCTS_H */
diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h
index ad095da3..498962db 100644
--- a/include/jemalloc/internal/prof_types.h
+++ b/include/jemalloc/internal/prof_types.h
@@ -8,6 +8,7 @@ typedef struct prof_tctx_s prof_tctx_t;
 typedef struct prof_info_s prof_info_t;
 typedef struct prof_gctx_s prof_gctx_t;
 typedef struct prof_tdata_s prof_tdata_t;
+typedef struct prof_recent_s prof_recent_t;
 
 /* Option defaults. */
 #ifdef JEMALLOC_PROF
@@ -53,4 +54,7 @@ typedef struct prof_tdata_s prof_tdata_t;
 #define PROF_DUMP_FILENAME_LEN 1
 #endif
 
+/* Default number of recent allocations to record. */
+#define PROF_RECENT_ALLOC_MAX_DEFAULT 0
+
 #endif /* JEMALLOC_INTERNAL_PROF_TYPES_H */
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 985e0a33..4ed787a2 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -61,6 +61,7 @@
 #define WITNESS_RANK_PROF_GDUMP		WITNESS_RANK_LEAF
 #define WITNESS_RANK_PROF_NEXT_THR_UID	WITNESS_RANK_LEAF
 #define WITNESS_RANK_PROF_THREAD_ACTIVE_INIT	WITNESS_RANK_LEAF
+#define WITNESS_RANK_PROF_RECENT_ALLOC	WITNESS_RANK_LEAF
 
 /******************************************************************************/
 /* PER-WITNESS DATA */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 58790903..f9af3ddd 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -67,6 +67,7 @@
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
     <ClCompile Include="..\..\..\..\src\prof_log.c" />
+    <ClCompile Include="..\..\..\..\src\prof_recent.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\safety_check.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 3551ba5e..90f8831d 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -82,6 +82,9 @@
     <ClCompile Include="..\..\..\..\src\prof_log.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_recent.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\rtree.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 631de575..4ca484ac 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -67,6 +67,7 @@
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
     <ClCompile Include="..\..\..\..\src\prof_log.c" />
+    <ClCompile Include="..\..\..\..\src\prof_recent.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\safety_check.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 3551ba5e..90f8831d 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -82,6 +82,9 @@
     <ClCompile Include="..\..\..\..\src\prof_log.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_recent.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\rtree.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/ctl.c b/src/ctl.c
index eee12770..5a467d5a 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -113,6 +113,7 @@ CTL_PROTO(opt_prof_gdump)
 CTL_PROTO(opt_prof_final)
 CTL_PROTO(opt_prof_leak)
 CTL_PROTO(opt_prof_accum)
+CTL_PROTO(opt_prof_recent_alloc_max)
 CTL_PROTO(opt_zero_realloc)
 CTL_PROTO(tcache_create)
 CTL_PROTO(tcache_flush)
@@ -232,6 +233,7 @@ CTL_PROTO(experimental_utilization_query)
 CTL_PROTO(experimental_utilization_batch_query)
 CTL_PROTO(experimental_arenas_i_pactivep)
 INDEX_PROTO(experimental_arenas_i)
+CTL_PROTO(experimental_prof_recent_alloc_max)
 
 #define MUTEX_STATS_CTL_PROTO_GEN(n)					\
 CTL_PROTO(stats_##n##_num_ops)						\
@@ -343,6 +345,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("prof_final"),	CTL(opt_prof_final)},
 	{NAME("prof_leak"),	CTL(opt_prof_leak)},
 	{NAME("prof_accum"),	CTL(opt_prof_accum)},
+	{NAME("prof_recent_alloc_max"), CTL(opt_prof_recent_alloc_max)},
 	{NAME("zero_realloc"),	CTL(opt_zero_realloc)}
 };
 
@@ -620,10 +623,15 @@ static const ctl_indexed_node_t experimental_arenas_node[] = {
 	{INDEX(experimental_arenas_i)}
 };
 
+static const ctl_named_node_t experimental_prof_recent_node[] = {
+	{NAME("alloc_max"),	CTL(experimental_prof_recent_alloc_max)},
+};
+
 static const ctl_named_node_t experimental_node[] = {
 	{NAME("hooks"),		CHILD(named, experimental_hooks)},
 	{NAME("utilization"),	CHILD(named, experimental_utilization)},
-	{NAME("arenas"),	CHILD(indexed, experimental_arenas)}
+	{NAME("arenas"),	CHILD(indexed, experimental_arenas)},
+	{NAME("prof_recent"),	CHILD(named, experimental_prof_recent)}
 };
 
 static const ctl_named_node_t	root_node[] = {
@@ -1791,6 +1799,8 @@ CTL_RO_NL_CGEN(config_prof, opt_lg_prof_interval, opt_lg_prof_interval, ssize_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof_gdump, opt_prof_gdump, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_final, opt_prof_final, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool)
+CTL_RO_NL_CGEN(config_prof, opt_prof_recent_alloc_max,
+    opt_prof_recent_alloc_max, ssize_t)
 CTL_RO_NL_GEN(opt_zero_realloc,
     zero_realloc_mode_names[opt_zero_realloc_action], const char *)
 
@@ -3461,3 +3471,36 @@ label_return:
 	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	return ret;
 }
+
+static int
+experimental_prof_recent_alloc_max_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+
+	if (!(config_prof && opt_prof)) {
+		ret = ENOENT;
+		goto label_return;
+	}
+
+	ssize_t old_max;
+	if (newp != NULL) {
+		if (newlen != sizeof(ssize_t)) {
+			ret = EINVAL;
+			goto label_return;
+		}
+		ssize_t max = *(ssize_t *)newp;
+		if (max < -1) {
+			ret = EINVAL;
+			goto label_return;
+		}
+		old_max = prof_recent_alloc_max_ctl_write(tsd, max);
+	} else {
+		old_max = prof_recent_alloc_max_ctl_read();
+	}
+	READ(old_max, ssize_t);
+
+	ret = 0;
+
+label_return:
+	return ret;
+}
diff --git a/src/extent.c b/src/extent.c
index 8d78f95f..54f14995 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1562,6 +1562,8 @@ extent_merge_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_cache_t *edata_cache,
 
 bool
 extent_boot(void) {
+	assert(sizeof(slab_data_t) >= sizeof(e_prof_info_t));
+
 	if (rtree_new(&extents_rtree, true)) {
 		return true;
 	}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 825a8ed0..7184cbb0 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1402,6 +1402,8 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				CONF_HANDLE_BOOL(opt_prof_final, "prof_final")
 				CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak")
 				CONF_HANDLE_BOOL(opt_prof_log, "prof_log")
+				CONF_HANDLE_SSIZE_T(opt_prof_recent_alloc_max,
+				    "prof_recent_alloc_max", -1, SSIZE_MAX)
 			}
 			if (config_log) {
 				if (CONF_MATCH("log")) {
@@ -3015,7 +3017,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
     size_t alignment, size_t *usize, bool zero, tcache_t *tcache,
     arena_t *arena, alloc_ctx_t *alloc_ctx, hook_ralloc_args_t *hook_args) {
 	prof_info_t old_prof_info;
-	prof_info_get(tsd, old_ptr, alloc_ctx, &old_prof_info);
+	prof_info_get_and_reset_recent(tsd, old_ptr, alloc_ctx, &old_prof_info);
 	bool prof_active = prof_active_get_unlocked();
 	prof_tctx_t *tctx = prof_alloc_prep(tsd, *usize, prof_active, false);
 	void *p;
@@ -3265,8 +3267,13 @@ ixallocx_prof_sample(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
 JEMALLOC_ALWAYS_INLINE size_t
 ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
     size_t extra, size_t alignment, bool zero, alloc_ctx_t *alloc_ctx) {
+	/*
+	 * old_prof_info is only used for asserting that the profiling info
+	 * isn't changed by the ixalloc() call.
+	 */
 	prof_info_t old_prof_info;
 	prof_info_get(tsd, ptr, alloc_ctx, &old_prof_info);
+
 	/*
 	 * usize isn't knowable before ixalloc() returns when extra is non-zero.
 	 * Therefore, compute its maximum possible value and use that in
@@ -3315,13 +3322,26 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 		 */
 		thread_event(tsd, usize - usize_max);
 	}
-	if (usize == old_usize) {
-		prof_alloc_rollback(tsd, tctx, false);
-		return usize;
-	}
-	prof_realloc(tsd, ptr, usize, tctx, prof_active, ptr, old_usize,
-	    &old_prof_info);
 
+	/*
+	 * At this point we can still safely get the original profiling
+	 * information associated with the ptr, because (a) the edata_t object
+	 * associated with the ptr still lives and (b) the profiling info
+	 * fields are not touched.  "(a)" is asserted in the outer je_xallocx()
+	 * function, and "(b)" is indirectly verified below by checking that
+	 * the alloc_tctx field is unchanged.
+	 */
+	prof_info_t prof_info;
+	if (usize == old_usize) {
+		prof_info_get(tsd, ptr, alloc_ctx, &prof_info);
+		prof_alloc_rollback(tsd, tctx, false);
+	} else {
+		prof_info_get_and_reset_recent(tsd, ptr, alloc_ctx, &prof_info);
+		prof_realloc(tsd, ptr, usize, tctx, prof_active, ptr,
+		    old_usize, &prof_info);
+	}
+
+	assert(old_prof_info.alloc_tctx == prof_info.alloc_tctx);
 	return usize;
 }
 
@@ -3342,6 +3362,13 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	tsd = tsd_fetch();
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
+	/*
+	 * old_edata is only for verifying that xallocx() keeps the edata_t
+	 * object associated with the ptr (though the content of the edata_t
+	 * object can be changed).
+	 */
+	edata_t *old_edata = iealloc(tsd_tsdn(tsd), ptr);
+
 	alloc_ctx_t alloc_ctx;
 	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
 	rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
@@ -3374,6 +3401,13 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 		    extra, alignment, zero);
 		thread_event(tsd, usize);
 	}
+
+	/*
+	 * xallocx() should keep using the same edata_t object (though its
+	 * content can be changed).
+	 */
+	assert(iealloc(tsd_tsdn(tsd), ptr) == old_edata);
+
 	if (unlikely(usize == old_usize)) {
 		thread_event_rollback(tsd, usize);
 		goto label_not_resized;
diff --git a/src/large.c b/src/large.c
index 5ca09f68..ca35fc54 100644
--- a/src/large.c
+++ b/src/large.c
@@ -5,6 +5,7 @@
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/extent_mmap.h"
 #include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/prof_recent.h"
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/util.h"
 
@@ -368,8 +369,22 @@ large_salloc(tsdn_t *tsdn, const edata_t *edata) {
 }
 
 void
-large_prof_info_get(const edata_t *edata, prof_info_t *prof_info) {
-	edata_prof_info_get(edata, prof_info);
+large_prof_info_get(tsd_t *tsd, edata_t *edata, prof_info_t *prof_info,
+    bool reset_recent) {
+	assert(prof_info != NULL);
+	nstime_copy(&prof_info->alloc_time, edata_prof_alloc_time_get(edata));
+
+	prof_tctx_t *alloc_tctx = edata_prof_tctx_get(edata);
+	prof_info->alloc_tctx = alloc_tctx;
+
+	if (reset_recent && (uintptr_t)alloc_tctx > (uintptr_t)1U) {
+		/*
+		 * This allocation was a prof sample.  Reset the pointer on the
+		 * recent allocation record, so that this allocation is
+		 * recorded as released.
+		 */
+		prof_recent_alloc_reset(tsd, edata);
+	}
 }
 
 static void
@@ -388,4 +403,5 @@ large_prof_info_set(edata_t *edata, prof_tctx_t *tctx) {
 	nstime_t t;
 	nstime_init_update(&t);
 	edata_prof_alloc_time_set(edata, &t);
+	edata_prof_recent_alloc_init(edata);
 }
diff --git a/src/prof.c b/src/prof.c
index 33b68198..159600e7 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -7,6 +7,7 @@
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/prof_data.h"
 #include "jemalloc/internal/prof_log.h"
+#include "jemalloc/internal/prof_recent.h"
 #include "jemalloc/internal/thread_event.h"
 
 /*
@@ -146,7 +147,8 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
 void
 prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
     prof_tctx_t *tctx) {
-	prof_info_set(tsd, ptr, tctx);
+	edata_t *edata = iealloc(tsd_tsdn(tsd), ptr);
+	prof_info_set(tsd, edata, tctx);
 
 	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
 	tctx->cnts.curobjs++;
@@ -155,8 +157,13 @@ prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
 		tctx->cnts.accumobjs++;
 		tctx->cnts.accumbytes += usize;
 	}
+	bool record_recent = prof_recent_alloc_prepare(tsd, tctx);
 	tctx->prepared = false;
 	malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
+	if (record_recent) {
+		assert(tctx == edata_prof_tctx_get(edata));
+		prof_recent_alloc(tsd, edata, usize);
+	}
 }
 
 void
@@ -1068,6 +1075,10 @@ prof_boot2(tsd_t *tsd) {
 			return true;
 		}
 
+		if (prof_recent_init()) {
+			return true;
+		}
+
 		gctx_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
 		    b0get(), PROF_NCTX_LOCKS * sizeof(malloc_mutex_t),
 		    CACHELINE);
@@ -1134,6 +1145,7 @@ prof_prefork1(tsdn_t *tsdn) {
 		malloc_mutex_prefork(tsdn, &prof_gdump_mtx);
 		malloc_mutex_prefork(tsdn, &next_thr_uid_mtx);
 		malloc_mutex_prefork(tsdn, &prof_thread_active_init_mtx);
+		malloc_mutex_prefork(tsdn, &prof_recent_alloc_mtx);
 	}
 }
 
@@ -1142,6 +1154,7 @@ prof_postfork_parent(tsdn_t *tsdn) {
 	if (config_prof && opt_prof) {
 		unsigned i;
 
+		malloc_mutex_postfork_parent(tsdn, &prof_recent_alloc_mtx);
 		malloc_mutex_postfork_parent(tsdn,
 		    &prof_thread_active_init_mtx);
 		malloc_mutex_postfork_parent(tsdn, &next_thr_uid_mtx);
@@ -1166,6 +1179,7 @@ prof_postfork_child(tsdn_t *tsdn) {
 	if (config_prof && opt_prof) {
 		unsigned i;
 
+		malloc_mutex_postfork_child(tsdn, &prof_recent_alloc_mtx);
 		malloc_mutex_postfork_child(tsdn, &prof_thread_active_init_mtx);
 		malloc_mutex_postfork_child(tsdn, &next_thr_uid_mtx);
 		malloc_mutex_postfork_child(tsdn, &prof_gdump_mtx);
diff --git a/src/prof_data.c b/src/prof_data.c
index 690070e6..dfc507f9 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -378,6 +378,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 		ret.p->tdata = tdata;
 		ret.p->thr_uid = tdata->thr_uid;
 		ret.p->thr_discrim = tdata->thr_discrim;
+		ret.p->recent_count = 0;
 		memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
 		ret.p->gctx = gctx;
 		ret.p->tctx_uid = tdata->tctx_uid_next++;
@@ -405,8 +406,15 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 
 prof_tctx_t *
 prof_tctx_create(tsd_t *tsd) {
-	prof_tdata_t *tdata = prof_tdata_get(tsd, false);
-	assert(tdata != NULL);
+	if (tsd_reentrancy_level_get(tsd) > 0) {
+		return NULL;
+	}
+
+	prof_tdata_t *tdata = prof_tdata_get(tsd, true);
+	if (tdata == NULL) {
+		return NULL;
+	}
+
 	prof_bt_t bt;
 	bt_init(&bt, tdata->vec);
 	prof_backtrace(tsd, &bt);
@@ -1417,6 +1425,9 @@ prof_tctx_should_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
 	if (tctx->prepared) {
 		return false;
 	}
+	if (tctx->recent_count != 0) {
+		return false;
+	}
 	return true;
 }
 
diff --git a/src/prof_recent.c b/src/prof_recent.c
new file mode 100644
index 00000000..98349aca
--- /dev/null
+++ b/src/prof_recent.c
@@ -0,0 +1,553 @@
+#define JEMALLOC_PROF_RECENT_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/emitter.h"
+#include "jemalloc/internal/prof_data.h"
+#include "jemalloc/internal/prof_recent.h"
+
+#ifndef JEMALLOC_JET
+#  define STATIC_INLINE_IF_NOT_TEST static inline
+#else
+#  define STATIC_INLINE_IF_NOT_TEST
+#endif
+
+ssize_t opt_prof_recent_alloc_max = PROF_RECENT_ALLOC_MAX_DEFAULT;
+malloc_mutex_t prof_recent_alloc_mtx; /* Protects the fields below */
+static atomic_zd_t prof_recent_alloc_max;
+static ssize_t prof_recent_alloc_count = 0;
+static prof_recent_t *prof_recent_alloc_dummy = NULL;
+
+static void
+prof_recent_alloc_max_init() {
+	atomic_store_zd(&prof_recent_alloc_max, opt_prof_recent_alloc_max,
+	    ATOMIC_RELAXED);
+}
+
+static inline ssize_t
+prof_recent_alloc_max_get_no_lock() {
+	return atomic_load_zd(&prof_recent_alloc_max, ATOMIC_RELAXED);
+}
+
+static inline ssize_t
+prof_recent_alloc_max_get(tsd_t *tsd) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	return prof_recent_alloc_max_get_no_lock();
+}
+
+static inline ssize_t
+prof_recent_alloc_max_update(tsd_t *tsd, ssize_t max) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	ssize_t old_max = prof_recent_alloc_max_get(tsd);
+	atomic_store_zd(&prof_recent_alloc_max, max, ATOMIC_RELAXED);
+	return old_max;
+}
+
+static inline void
+increment_recent_count(tsd_t *tsd, prof_tctx_t *tctx) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
+	++tctx->recent_count;
+	assert(tctx->recent_count > 0);
+}
+
+bool
+prof_recent_alloc_prepare(tsd_t *tsd, prof_tctx_t *tctx) {
+	assert(opt_prof && prof_booted);
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
+	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+
+	/*
+	 * Check whether last-N mode is turned on without trying to acquire the
+	 * lock, so as to optimize for the following two scenarios:
+	 * (1) Last-N mode is switched off;
+	 * (2) Dumping, during which last-N mode is temporarily turned off so
+	 *     as not to block sampled allocations.
+	 */
+	if (prof_recent_alloc_max_get_no_lock() == 0) {
+		return false;
+	}
+
+	/*
+	 * Increment recent_count to hold the tctx so that it won't be gone
+	 * even after tctx->tdata->lock is released.  This acts as a
+	 * "placeholder"; the real recording of the allocation requires a lock
+	 * on prof_recent_alloc_mtx and is done in prof_recent_alloc (when
+	 * tctx->tdata->lock has been released).
+	 */
+	increment_recent_count(tsd, tctx);
+	return true;
+}
+
+static void
+decrement_recent_count(tsd_t *tsd, prof_tctx_t *tctx) {
+	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	assert(tctx != NULL);
+	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
+	assert(tctx->recent_count > 0);
+	--tctx->recent_count;
+	prof_tctx_try_destroy(tsd, tctx);
+}
+
+void
+edata_prof_recent_alloc_init(edata_t *edata) {
+	edata_prof_recent_alloc_set_dont_call_directly(edata, NULL);
+}
+
+static inline prof_recent_t *
+edata_prof_recent_alloc_get_no_lock(const edata_t *edata) {
+	return edata_prof_recent_alloc_get_dont_call_directly(edata);
+}
+
+STATIC_INLINE_IF_NOT_TEST prof_recent_t *
+edata_prof_recent_alloc_get(tsd_t *tsd, const edata_t *edata) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	prof_recent_t *recent_alloc =
+	    edata_prof_recent_alloc_get_no_lock(edata);
+	assert(recent_alloc == NULL || recent_alloc->alloc_edata == edata);
+	return recent_alloc;
+}
+
+static prof_recent_t *
+edata_prof_recent_alloc_update_internal(tsd_t *tsd, edata_t *edata,
+    prof_recent_t *recent_alloc) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	prof_recent_t *old_recent_alloc =
+	    edata_prof_recent_alloc_get(tsd, edata);
+	edata_prof_recent_alloc_set_dont_call_directly(edata, recent_alloc);
+	return old_recent_alloc;
+}
+
+static void
+edata_prof_recent_alloc_set(tsd_t *tsd, edata_t *edata,
+    prof_recent_t *recent_alloc) {
+	assert(recent_alloc != NULL);
+	prof_recent_t *old_recent_alloc =
+	    edata_prof_recent_alloc_update_internal(tsd, edata, recent_alloc);
+	assert(old_recent_alloc == NULL);
+	recent_alloc->alloc_edata = edata;
+}
+
+static void
+edata_prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata,
+    prof_recent_t *recent_alloc) {
+	assert(recent_alloc != NULL);
+	prof_recent_t *old_recent_alloc =
+	    edata_prof_recent_alloc_update_internal(tsd, edata, NULL);
+	assert(old_recent_alloc == recent_alloc);
+	assert(edata == recent_alloc->alloc_edata);
+	recent_alloc->alloc_edata = NULL;
+}
+
+/*
+ * This function should be called right before an allocation is released, so
+ * that the associated recent allocation record can contain the following
+ * information:
+ * (1) The allocation is released;
+ * (2) The time of the deallocation; and
+ * (3) The prof_tctx associated with the deallocation.
+ */
+void
+prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata) {
+	/*
+	 * Check whether the recent allocation record still exists without
+	 * trying to acquire the lock.
+	 */
+	if (edata_prof_recent_alloc_get_no_lock(edata) == NULL) {
+		return;
+	}
+
+	prof_tctx_t *dalloc_tctx = prof_tctx_create(tsd);
+	/*
+	 * In case dalloc_tctx is NULL, e.g. due to OOM, we will not record the
+	 * deallocation time / tctx, which is handled later, after we check
+	 * again when holding the lock.
+	 */
+
+	if (dalloc_tctx != NULL) {
+		malloc_mutex_lock(tsd_tsdn(tsd), dalloc_tctx->tdata->lock);
+		increment_recent_count(tsd, dalloc_tctx);
+		dalloc_tctx->prepared = false;
+		malloc_mutex_unlock(tsd_tsdn(tsd), dalloc_tctx->tdata->lock);
+	}
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	/* Check again after acquiring the lock.  */
+	prof_recent_t *recent = edata_prof_recent_alloc_get(tsd, edata);
+	if (recent != NULL) {
+		edata_prof_recent_alloc_reset(tsd, edata, recent);
+		assert(nstime_equals_zero(&recent->dalloc_time));
+		assert(recent->dalloc_tctx == NULL);
+		if (dalloc_tctx != NULL) {
+			nstime_update(&recent->dalloc_time);
+			recent->dalloc_tctx = dalloc_tctx;
+		}
+	} else if (dalloc_tctx != NULL) {
+		/* We lost the rase - the allocation record was just gone. */
+		decrement_recent_count(tsd, dalloc_tctx);
+	}
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+}
+
+static void
+prof_recent_alloc_evict_edata(tsd_t *tsd, prof_recent_t *recent) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	if (recent->alloc_edata != NULL) {
+		edata_prof_recent_alloc_reset(tsd, recent->alloc_edata, recent);
+	}
+}
+
+STATIC_INLINE_IF_NOT_TEST prof_recent_t *
+prof_recent_alloc_begin(tsd_t *tsd) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	assert(prof_recent_alloc_dummy != NULL);
+	return prof_recent_alloc_dummy->next;
+}
+
+STATIC_INLINE_IF_NOT_TEST prof_recent_t *
+prof_recent_alloc_end(tsd_t *tsd) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	assert(prof_recent_alloc_dummy != NULL);
+	return prof_recent_alloc_dummy;
+}
+
+STATIC_INLINE_IF_NOT_TEST prof_recent_t *
+prof_recent_alloc_next(tsd_t *tsd, prof_recent_t *node) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	assert(prof_recent_alloc_dummy != NULL);
+	assert(node != NULL && node != prof_recent_alloc_dummy);
+	return node->next;
+}
+
+static bool
+prof_recent_alloc_is_empty(tsd_t *tsd) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	if (prof_recent_alloc_begin(tsd) == prof_recent_alloc_end(tsd)) {
+		assert(prof_recent_alloc_count == 0);
+		return true;
+	} else {
+		assert(prof_recent_alloc_count > 0);
+		return false;
+	}
+}
+
+static void
+prof_recent_alloc_assert_count(tsd_t *tsd) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	if (config_debug) {
+		ssize_t count = 0;
+		prof_recent_t *n = prof_recent_alloc_begin(tsd);
+		while (n != prof_recent_alloc_end(tsd)) {
+			++count;
+			n = prof_recent_alloc_next(tsd, n);
+		}
+		assert(count == prof_recent_alloc_count);
+		assert(prof_recent_alloc_max_get(tsd) == -1 ||
+		    count <= prof_recent_alloc_max_get(tsd));
+	}
+}
+
+void
+prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t usize) {
+	assert(edata != NULL);
+	prof_tctx_t *tctx = edata_prof_tctx_get(edata);
+
+	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock);
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	prof_recent_alloc_assert_count(tsd);
+
+	/*
+	 * Reserve a new prof_recent_t node if needed.  If needed, we release
+	 * the prof_recent_alloc_mtx lock and allocate.  Then, rather than
+	 * immediately checking for OOM, we regain the lock and try to make use
+	 * of the reserve node if needed.  There are six scenarios:
+	 *
+	 *          \ now | no need | need but OOMed | need and allocated
+	 *     later \    |         |                |
+	 *    ------------------------------------------------------------
+	 *     no need    |   (1)   |      (2)       |         (3)
+	 *    ------------------------------------------------------------
+	 *     need       |   (4)   |      (5)       |         (6)
+	 *
+	 * First, "(4)" never happens, because we don't release the lock in the
+	 * middle if there's no need for a new node; in such cases "(1)" always
+	 * takes place, which is trivial.
+	 *
+	 * Out of the remaining four scenarios, "(6)" is the common case and is
+	 * trivial.  "(5)" is also trivial, in which case we'll rollback the
+	 * effect of prof_recent_alloc_prepare() as expected.
+	 *
+	 * "(2)" / "(3)" occurs when the need for a new node is gone after we
+	 * regain the lock.  If the new node is successfully allocated, i.e. in
+	 * the case of "(3)", we'll release it in the end; otherwise, i.e. in
+	 * the case of "(2)", we do nothing - we're lucky that the OOM ends up
+	 * doing no harm at all.
+	 *
+	 * Therefore, the only performance cost of the "release lock" ->
+	 * "allocate" -> "regain lock" design is the "(3)" case, but it happens
+	 * very rarely, so the cost is relatively small compared to the gain of
+	 * not having to have the lock order of prof_recent_alloc_mtx above all
+	 * the allocation locks.
+	 */
+	prof_recent_t *reserve = NULL;
+	if (prof_recent_alloc_max_get(tsd) == -1 ||
+	    prof_recent_alloc_count < prof_recent_alloc_max_get(tsd)) {
+		assert(prof_recent_alloc_max_get(tsd) != 0);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+		reserve = (prof_recent_t *)iallocztm(tsd_tsdn(tsd),
+		    sizeof(prof_recent_t), sz_size2index(sizeof(prof_recent_t)),
+		    false, NULL, true, arena_get(tsd_tsdn(tsd), 0, false),
+		    true);
+		malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+		prof_recent_alloc_assert_count(tsd);
+	}
+
+	if (prof_recent_alloc_max_get(tsd) == 0) {
+		assert(prof_recent_alloc_is_empty(tsd));
+		goto label_rollback;
+	}
+
+	assert(prof_recent_alloc_dummy != NULL);
+	{
+		/* Fill content into the dummy node. */
+		prof_recent_t *node = prof_recent_alloc_dummy;
+		node->usize = usize;
+		nstime_copy(&node->alloc_time,
+		    edata_prof_alloc_time_get(edata));
+		node->alloc_tctx = tctx;
+		edata_prof_recent_alloc_set(tsd, edata, node);
+		nstime_init_zero(&node->dalloc_time);
+		node->dalloc_tctx = NULL;
+	}
+
+	prof_tctx_t *old_alloc_tctx, *old_dalloc_tctx;
+	if (prof_recent_alloc_count == prof_recent_alloc_max_get(tsd)) {
+		/* If upper limit is reached, simply shift the dummy. */
+		assert(prof_recent_alloc_max_get(tsd) != -1);
+		assert(!prof_recent_alloc_is_empty(tsd));
+		prof_recent_alloc_dummy = prof_recent_alloc_dummy->next;
+		old_alloc_tctx = prof_recent_alloc_dummy->alloc_tctx;
+		assert(old_alloc_tctx != NULL);
+		old_dalloc_tctx = prof_recent_alloc_dummy->dalloc_tctx;
+		prof_recent_alloc_evict_edata(tsd, prof_recent_alloc_dummy);
+	} else {
+		/* Otherwise use the new node as the dummy. */
+		assert(prof_recent_alloc_max_get(tsd) == -1 ||
+		    prof_recent_alloc_count < prof_recent_alloc_max_get(tsd));
+		if (reserve == NULL) {
+			goto label_rollback;
+		}
+		reserve->next = prof_recent_alloc_dummy->next;
+		prof_recent_alloc_dummy->next = reserve;
+		prof_recent_alloc_dummy = reserve;
+		reserve = NULL;
+		old_alloc_tctx = NULL;
+		old_dalloc_tctx = NULL;
+		++prof_recent_alloc_count;
+	}
+
+	assert(!prof_recent_alloc_is_empty(tsd));
+	prof_recent_alloc_assert_count(tsd);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+
+	if (reserve != NULL) {
+		idalloctm(tsd_tsdn(tsd), reserve, NULL, NULL, true, true);
+	}
+
+	/*
+	 * Asynchronously handle the tctx of the old node, so that there's no
+	 * simultaneous holdings of prof_recent_alloc_mtx and tdata->lock.
+	 * In the worst case this may delay the tctx release but it's better
+	 * than holding prof_recent_alloc_mtx for longer.
+	 */
+	if (old_alloc_tctx != NULL) {
+		decrement_recent_count(tsd, old_alloc_tctx);
+	}
+	if (old_dalloc_tctx != NULL) {
+		decrement_recent_count(tsd, old_dalloc_tctx);
+	}
+	return;
+
+label_rollback:
+	assert(edata_prof_recent_alloc_get(tsd, edata) == NULL);
+	prof_recent_alloc_assert_count(tsd);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	if (reserve != NULL) {
+		idalloctm(tsd_tsdn(tsd), reserve, NULL, NULL, true, true);
+	}
+	decrement_recent_count(tsd, tctx);
+}
+
+ssize_t
+prof_recent_alloc_max_ctl_read() {
+	/* Don't bother to acquire the lock. */
+	return prof_recent_alloc_max_get_no_lock();
+}
+
+ssize_t
+prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max) {
+	assert(max >= -1);
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	prof_recent_alloc_assert_count(tsd);
+
+	const ssize_t old_max = prof_recent_alloc_max_update(tsd, max);
+
+	if (max == -1 || prof_recent_alloc_count <= max) {
+		/* Easy case - no need to alter the list. */
+		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+		return old_max;
+	}
+
+	prof_recent_t *begin = prof_recent_alloc_dummy->next;
+	/* For verification purpose only. */
+	ssize_t count = prof_recent_alloc_count - max;
+	do {
+		assert(!prof_recent_alloc_is_empty(tsd));
+		prof_recent_t *node = prof_recent_alloc_dummy->next;
+		assert(node != prof_recent_alloc_dummy);
+		prof_recent_alloc_evict_edata(tsd, node);
+		prof_recent_alloc_dummy->next = node->next;
+		--prof_recent_alloc_count;
+	} while (prof_recent_alloc_count > max);
+	prof_recent_t *end = prof_recent_alloc_dummy->next;
+	assert(begin != end);
+
+	prof_recent_alloc_assert_count(tsd);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+
+	/*
+	 * Asynchronously handle the tctx of the to-be-deleted nodes, so that
+	 * there's no simultaneous holdings of prof_recent_alloc_mtx and
+	 * tdata->lock.  In the worst case there can be slightly extra space
+	 * overhead taken by these nodes, but the total number of nodes at any
+	 * time is bounded by (max + sum(decreases)), where "max" means the
+	 * most recent prof_recent_alloc_max and "sum(decreases)" means the
+	 * sum of the deltas of all decreases in prof_recent_alloc_max in the
+	 * past.  This (max + sum(decreases)) value is completely transparent
+	 * to and controlled by application.
+	 */
+	do {
+		prof_recent_t *node = begin;
+		decrement_recent_count(tsd, node->alloc_tctx);
+		if (node->dalloc_tctx != NULL) {
+			decrement_recent_count(tsd, node->dalloc_tctx);
+		}
+		begin = node->next;
+		idalloctm(tsd_tsdn(tsd), node, NULL, NULL, true, true);
+		--count;
+	} while (begin != end);
+	assert(count == 0);
+
+	return old_max;
+}
+
+static void
+dump_bt(emitter_t *emitter, prof_tctx_t *tctx) {
+	char bt_buf[2 * sizeof(intptr_t) + 3];
+	char *s = bt_buf;
+	assert(tctx != NULL);
+	prof_bt_t *bt = &tctx->gctx->bt;
+	for (size_t i = 0; i < bt->len; ++i) {
+		malloc_snprintf(bt_buf, sizeof(bt_buf), "%p", bt->vec[i]);
+		emitter_json_value(emitter, emitter_type_string, &s);
+	}
+}
+
+#define PROF_RECENT_PRINT_BUFSIZE 4096
+void
+prof_recent_alloc_dump(tsd_t *tsd, void (*write_cb)(void *, const char *),
+    void *cbopaque) {
+	char *buf = (char *)iallocztm(tsd_tsdn(tsd), PROF_RECENT_PRINT_BUFSIZE,
+	    sz_size2index(PROF_RECENT_PRINT_BUFSIZE), false, NULL, true,
+	    arena_get(tsd_tsdn(tsd), 0, false), true);
+	buf_writer_arg_t buf_arg = {write_cb, cbopaque, buf,
+	    PROF_RECENT_PRINT_BUFSIZE - 1, 0};
+	emitter_t emitter;
+	emitter_init(&emitter, emitter_output_json_compact, buffered_write_cb,
+	    &buf_arg);
+	emitter_begin(&emitter);
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	prof_recent_alloc_assert_count(tsd);
+
+	/*
+	 * Set prof_recent_alloc_max to 0 so that dumping won't block sampled
+	 * allocations: the allocations can complete but will not be recorded.
+	 */
+	ssize_t max = prof_recent_alloc_max_update(tsd, 0);
+
+	emitter_json_kv(&emitter, "recent_alloc_max", emitter_type_ssize, &max);
+
+	emitter_json_array_kv_begin(&emitter, "recent_alloc");
+	for (prof_recent_t *n = prof_recent_alloc_begin(tsd);
+	    n != prof_recent_alloc_end(tsd);
+	    n = prof_recent_alloc_next(tsd, n)) {
+		emitter_json_object_begin(&emitter);
+
+		emitter_json_kv(&emitter, "usize", emitter_type_size,
+		    &n->usize);
+		bool released = n->alloc_edata == NULL;
+		emitter_json_kv(&emitter, "released", emitter_type_bool,
+		    &released);
+
+		emitter_json_kv(&emitter, "alloc_thread_uid",
+		    emitter_type_uint64, &n->alloc_tctx->thr_uid);
+		uint64_t alloc_time_ns = nstime_ns(&n->alloc_time);
+		emitter_json_kv(&emitter, "alloc_time", emitter_type_uint64,
+		    &alloc_time_ns);
+		emitter_json_array_kv_begin(&emitter, "alloc_trace");
+		dump_bt(&emitter, n->alloc_tctx);
+		emitter_json_array_end(&emitter);
+
+		if (n->dalloc_tctx != NULL) {
+			assert(released);
+			emitter_json_kv(&emitter, "dalloc_thread_uid",
+			    emitter_type_uint64, &n->dalloc_tctx->thr_uid);
+			assert(!nstime_equals_zero(&n->dalloc_time));
+			uint64_t dalloc_time_ns = nstime_ns(&n->dalloc_time);
+			emitter_json_kv(&emitter, "dalloc_time",
+			    emitter_type_uint64, &dalloc_time_ns);
+			emitter_json_array_kv_begin(&emitter, "dalloc_trace");
+			dump_bt(&emitter, n->dalloc_tctx);
+			emitter_json_array_end(&emitter);
+		} else {
+			assert(nstime_equals_zero(&n->dalloc_time));
+		}
+
+		emitter_json_object_end(&emitter);
+	}
+	emitter_json_array_end(&emitter);
+
+	max = prof_recent_alloc_max_update(tsd, max);
+	assert(max == 0);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+
+	emitter_end(&emitter);
+	buf_writer_flush(&buf_arg);
+	idalloctm(tsd_tsdn(tsd), buf, NULL, NULL, true, true);
+}
+#undef PROF_RECENT_PRINT_BUFSIZE
+
+bool
+prof_recent_init() {
+	prof_recent_alloc_max_init();
+
+	if (malloc_mutex_init(&prof_recent_alloc_mtx,
+	    "prof_recent_alloc", WITNESS_RANK_PROF_RECENT_ALLOC,
+	    malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+
+	assert(prof_recent_alloc_dummy == NULL);
+	prof_recent_alloc_dummy = (prof_recent_t *)iallocztm(
+	    TSDN_NULL, sizeof(prof_recent_t),
+	    sz_size2index(sizeof(prof_recent_t)), false, NULL, true,
+	    arena_get(TSDN_NULL, 0, true), true);
+	if (prof_recent_alloc_dummy == NULL) {
+		return true;
+	}
+	prof_recent_alloc_dummy->next = prof_recent_alloc_dummy;
+
+	return false;
+}
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index ebbaed7d..d317b4af 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -188,6 +188,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, prof_gdump, prof);
 	TEST_MALLCTL_OPT(bool, prof_final, prof);
 	TEST_MALLCTL_OPT(bool, prof_leak, prof);
+	TEST_MALLCTL_OPT(ssize_t, prof_recent_alloc_max, prof);
 
 #undef TEST_MALLCTL_OPT
 }
diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
new file mode 100644
index 00000000..e10ac3fe
--- /dev/null
+++ b/test/unit/prof_recent.c
@@ -0,0 +1,391 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/prof_recent.h"
+
+/* As specified in the shell script */
+#define OPT_ALLOC_MAX	3
+
+/* Invariant before and after every test (when config_prof is on) */
+static void confirm_prof_setup(tsd_t *tsd) {
+	/* Options */
+	assert_true(opt_prof, "opt_prof not on");
+	assert_true(opt_prof_active, "opt_prof_active not on");
+	assert_zd_eq(opt_prof_recent_alloc_max, OPT_ALLOC_MAX,
+	    "opt_prof_recent_alloc_max not set correctly");
+
+	/* Dynamics */
+	assert_true(prof_active, "prof_active not on");
+	assert_zd_eq(prof_recent_alloc_max_ctl_read(tsd), OPT_ALLOC_MAX,
+	    "prof_recent_alloc_max not set correctly");
+}
+
+TEST_BEGIN(test_confirm_setup) {
+	test_skip_if(!config_prof);
+	confirm_prof_setup(tsd_fetch());
+}
+TEST_END
+
+TEST_BEGIN(test_prof_recent_off) {
+	test_skip_if(config_prof);
+
+	const ssize_t past_ref = 0, future_ref = 0;
+	const size_t len_ref = sizeof(ssize_t);
+
+	ssize_t past = past_ref, future = future_ref;
+	size_t len = len_ref;
+
+#define ASSERT_SHOULD_FAIL(opt, a, b, c, d) do {			\
+	assert_d_eq(mallctl("experimental.prof_recent." opt, a, b, c,	\
+	    d), ENOENT, "Should return ENOENT when config_prof is off");\
+	assert_zd_eq(past, past_ref, "output was touched");		\
+	assert_zu_eq(len, len_ref, "output length was touched");	\
+	assert_zd_eq(future, future_ref, "input was touched");		\
+} while (0)
+
+	ASSERT_SHOULD_FAIL("alloc_max", NULL, NULL, NULL, 0);
+	ASSERT_SHOULD_FAIL("alloc_max", &past, &len, NULL, 0);
+	ASSERT_SHOULD_FAIL("alloc_max", NULL, NULL, &future, len);
+	ASSERT_SHOULD_FAIL("alloc_max", &past, &len, &future, len);
+
+#undef ASSERT_SHOULD_FAIL
+}
+TEST_END
+
+TEST_BEGIN(test_prof_recent_on) {
+	test_skip_if(!config_prof);
+
+	ssize_t past, future;
+	size_t len = sizeof(ssize_t);
+
+	tsd_t *tsd = tsd_fetch();
+
+	confirm_prof_setup(tsd);
+
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	    NULL, NULL, NULL, 0), 0, "no-op mallctl should be allowed");
+	confirm_prof_setup(tsd);
+
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	    &past, &len, NULL, 0), 0, "Read error");
+	assert_zd_eq(past, OPT_ALLOC_MAX, "Wrong read result");
+	future = OPT_ALLOC_MAX + 1;
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	    NULL, NULL, &future, len), 0, "Write error");
+	future = -1;
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	    &past, &len, &future, len), 0, "Read/write error");
+	assert_zd_eq(past, OPT_ALLOC_MAX + 1, "Wrong read result");
+	future = -2;
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	    &past, &len, &future, len), EINVAL,
+	    "Invalid write should return EINVAL");
+	assert_zd_eq(past, OPT_ALLOC_MAX + 1,
+	    "Output should not be touched given invalid write");
+	future = OPT_ALLOC_MAX;
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	    &past, &len, &future, len), 0, "Read/write error");
+	assert_zd_eq(past, -1, "Wrong read result");
+	future = OPT_ALLOC_MAX + 2;
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	    &past, &len, &future, len * 2), EINVAL,
+	    "Invalid write should return EINVAL");
+	assert_zd_eq(past, -1,
+	    "Output should not be touched given invalid write");
+
+	confirm_prof_setup(tsd);
+}
+TEST_END
+
+/* Reproducible sequence of request sizes */
+#define NTH_REQ_SIZE(n) ((n) * 97 + 101)
+
+static void confirm_malloc(tsd_t *tsd, void *p) {
+	assert_ptr_not_null(p, "malloc failed unexpectedly");
+	edata_t *e = iealloc(TSDN_NULL, p);
+	assert_ptr_not_null(e, "NULL edata for living pointer");
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	prof_recent_t *n = edata_prof_recent_alloc_get(tsd, e);
+	assert_ptr_not_null(n, "Record in edata should not be NULL");
+	assert_ptr_not_null(n->alloc_tctx,
+	    "alloc_tctx in record should not be NULL");
+	assert_ptr_eq(e, n->alloc_edata,
+	    "edata pointer in record is not correct");
+	assert_ptr_null(n->dalloc_tctx, "dalloc_tctx in record should be NULL");
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+}
+
+static void confirm_record_size(tsd_t *tsd, prof_recent_t *n, unsigned kth) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	assert_zu_eq(n->usize, sz_s2u(NTH_REQ_SIZE(kth)),
+	    "Recorded allocation usize is wrong");
+}
+
+static void confirm_record_living(tsd_t *tsd, prof_recent_t *n) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	assert_ptr_not_null(n->alloc_tctx,
+	    "alloc_tctx in record should not be NULL");
+	assert_ptr_not_null(n->alloc_edata,
+	    "Recorded edata should not be NULL for living pointer");
+	assert_ptr_eq(n, edata_prof_recent_alloc_get(tsd, n->alloc_edata),
+	    "Record in edata is not correct");
+	assert_ptr_null(n->dalloc_tctx, "dalloc_tctx in record should be NULL");
+}
+
+static void confirm_record_released(tsd_t *tsd, prof_recent_t *n) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	assert_ptr_not_null(n->alloc_tctx,
+	    "alloc_tctx in record should not be NULL");
+	assert_ptr_null(n->alloc_edata,
+	    "Recorded edata should be NULL for released pointer");
+	assert_ptr_not_null(n->dalloc_tctx,
+	    "dalloc_tctx in record should not be NULL for released pointer");
+}
+
+TEST_BEGIN(test_prof_recent_alloc) {
+	test_skip_if(!config_prof);
+
+	bool b;
+	unsigned i, c;
+	size_t req_size;
+	void *p;
+	prof_recent_t *n;
+	ssize_t future;
+
+	tsd_t *tsd = tsd_fetch();
+
+	confirm_prof_setup(tsd);
+
+	/*
+	 * First batch of 2 * OPT_ALLOC_MAX allocations.  After the
+	 * (OPT_ALLOC_MAX - 1)'th allocation the recorded allocations should
+	 * always be the last OPT_ALLOC_MAX allocations coming from here.
+	 */
+	for (i = 0; i < 2 * OPT_ALLOC_MAX; ++i) {
+		req_size = NTH_REQ_SIZE(i);
+		p = malloc(req_size);
+		confirm_malloc(tsd, p);
+		if (i < OPT_ALLOC_MAX - 1) {
+			malloc_mutex_lock(tsd_tsdn(tsd),
+			    &prof_recent_alloc_mtx);
+			assert_ptr_ne(prof_recent_alloc_begin(tsd),
+			    prof_recent_alloc_end(tsd),
+			    "Empty recent allocation");
+			malloc_mutex_unlock(tsd_tsdn(tsd),
+			    &prof_recent_alloc_mtx);
+			free(p);
+			/*
+			 * The recorded allocations may still include some
+			 * other allocations before the test run started,
+			 * so keep allocating without checking anything.
+			 */
+			continue;
+		}
+		c = 0;
+		malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+		for (n = prof_recent_alloc_begin(tsd);
+		    n != prof_recent_alloc_end(tsd);
+		    n = prof_recent_alloc_next(tsd, n)) {
+			++c;
+			confirm_record_size(tsd, n, i + c - OPT_ALLOC_MAX);
+			if (c == OPT_ALLOC_MAX) {
+				confirm_record_living(tsd, n);
+			} else {
+				confirm_record_released(tsd, n);
+			}
+		}
+		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+		assert_u_eq(c, OPT_ALLOC_MAX,
+		    "Incorrect total number of allocations");
+		free(p);
+	}
+
+	confirm_prof_setup(tsd);
+
+	b = false;
+	assert_d_eq(mallctl("prof.active", NULL, NULL, &b, sizeof(bool)), 0,
+	    "mallctl for turning off prof_active failed");
+
+	/*
+	 * Second batch of OPT_ALLOC_MAX allocations.  Since prof_active is
+	 * turned off, this batch shouldn't be recorded.
+	 */
+	for (; i < 3 * OPT_ALLOC_MAX; ++i) {
+		req_size = NTH_REQ_SIZE(i);
+		p = malloc(req_size);
+		assert_ptr_not_null(p, "malloc failed unexpectedly");
+		c = 0;
+		malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+		for (n = prof_recent_alloc_begin(tsd);
+		    n != prof_recent_alloc_end(tsd);
+		    n = prof_recent_alloc_next(tsd, n)) {
+			confirm_record_size(tsd, n, c + OPT_ALLOC_MAX);
+			confirm_record_released(tsd, n);
+			++c;
+		}
+		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+		assert_u_eq(c, OPT_ALLOC_MAX,
+		    "Incorrect total number of allocations");
+		free(p);
+	}
+
+	b = true;
+	assert_d_eq(mallctl("prof.active", NULL, NULL, &b, sizeof(bool)), 0,
+	    "mallctl for turning on prof_active failed");
+
+	confirm_prof_setup(tsd);
+
+	/*
+	 * Third batch of OPT_ALLOC_MAX allocations.  Since prof_active is
+	 * turned back on, they should be recorded, and in the list of recorded
+	 * allocations they should follow the first batch rather than the
+	 * second batch.
+	 */
+	for (; i < 4 * OPT_ALLOC_MAX; ++i) {
+		req_size = NTH_REQ_SIZE(i);
+		p = malloc(req_size);
+		confirm_malloc(tsd, p);
+		c = 0;
+		malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+		for (n = prof_recent_alloc_begin(tsd);
+		    n != prof_recent_alloc_end(tsd);
+		    n = prof_recent_alloc_next(tsd, n)) {
+			++c;
+			confirm_record_size(tsd, n,
+			    /* Is the allocation from the third batch? */
+			    i + c - OPT_ALLOC_MAX >= 3 * OPT_ALLOC_MAX ?
+			    /* If yes, then it's just recorded. */
+			    i + c - OPT_ALLOC_MAX :
+			    /*
+			     * Otherwise, it should come from the first batch
+			     * instead of the second batch.
+			     */
+			    i + c - 2 * OPT_ALLOC_MAX);
+			if (c == OPT_ALLOC_MAX) {
+				confirm_record_living(tsd, n);
+			} else {
+				confirm_record_released(tsd, n);
+			}
+		}
+		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+		assert_u_eq(c, OPT_ALLOC_MAX,
+		    "Incorrect total number of allocations");
+		free(p);
+	}
+
+	/* Increasing the limit shouldn't alter the list of records. */
+	future = OPT_ALLOC_MAX + 1;
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	c = 0;
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	for (n = prof_recent_alloc_begin(tsd);
+	    n != prof_recent_alloc_end(tsd);
+	    n = prof_recent_alloc_next(tsd, n)) {
+		confirm_record_size(tsd, n, c + 3 * OPT_ALLOC_MAX);
+		confirm_record_released(tsd, n);
+		++c;
+	}
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	assert_u_eq(c, OPT_ALLOC_MAX,
+	    "Incorrect total number of allocations");
+
+	/*
+	 * Decreasing the limit shouldn't alter the list of records as long as
+	 * the new limit is still no less than the length of the list.
+	 */
+	future = OPT_ALLOC_MAX;
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	c = 0;
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	for (n = prof_recent_alloc_begin(tsd);
+	    n != prof_recent_alloc_end(tsd);
+	    n = prof_recent_alloc_next(tsd, n)) {
+		confirm_record_size(tsd, n, c + 3 * OPT_ALLOC_MAX);
+		confirm_record_released(tsd, n);
+		++c;
+	}
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	assert_u_eq(c, OPT_ALLOC_MAX,
+	    "Incorrect total number of allocations");
+
+	/*
+	 * Decreasing the limit should shorten the list of records if the new
+	 * limit is less than the length of the list.
+	 */
+	future = OPT_ALLOC_MAX - 1;
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	c = 0;
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	for (n = prof_recent_alloc_begin(tsd);
+	    n != prof_recent_alloc_end(tsd);
+	    n = prof_recent_alloc_next(tsd, n)) {
+		++c;
+		confirm_record_size(tsd, n, c + 3 * OPT_ALLOC_MAX);
+		confirm_record_released(tsd, n);
+	}
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	assert_u_eq(c, OPT_ALLOC_MAX - 1,
+	    "Incorrect total number of allocations");
+
+	/* Setting to unlimited shouldn't alter the list of records. */
+	future = -1;
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	c = 0;
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	for (n = prof_recent_alloc_begin(tsd);
+	    n != prof_recent_alloc_end(tsd);
+	    n = prof_recent_alloc_next(tsd, n)) {
+		++c;
+		confirm_record_size(tsd, n, c + 3 * OPT_ALLOC_MAX);
+		confirm_record_released(tsd, n);
+	}
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	assert_u_eq(c, OPT_ALLOC_MAX - 1,
+	    "Incorrect total number of allocations");
+
+	/* Downshift to only one record. */
+	future = 1;
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	n = prof_recent_alloc_begin(tsd);
+	assert(n != prof_recent_alloc_end(tsd));
+	confirm_record_size(tsd, n, 4 * OPT_ALLOC_MAX - 1);
+	confirm_record_released(tsd, n);
+	n = prof_recent_alloc_next(tsd, n);
+	assert(n == prof_recent_alloc_end(tsd));
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+
+	/* Completely turn off. */
+	future = 0;
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	assert(prof_recent_alloc_begin(tsd) == prof_recent_alloc_end(tsd));
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+
+	/* Restore the settings. */
+	future = OPT_ALLOC_MAX;
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	assert(prof_recent_alloc_begin(tsd) == prof_recent_alloc_end(tsd));
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+
+	confirm_prof_setup(tsd);
+}
+TEST_END
+
+#undef NTH_REQ_SIZE
+
+int
+main(void) {
+	return test(
+	    test_confirm_setup,
+	    test_prof_recent_off,
+	    test_prof_recent_on,
+	    test_prof_recent_alloc);
+}
diff --git a/test/unit/prof_recent.sh b/test/unit/prof_recent.sh
new file mode 100644
index 00000000..59759a6a
--- /dev/null
+++ b/test/unit/prof_recent.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:true,lg_prof_sample:0,prof_recent_alloc_max:3"
+fi
diff --git a/test/unit/prof_reset.sh b/test/unit/prof_reset.sh
index 43c516a0..daefeb70 100644
--- a/test/unit/prof_reset.sh
+++ b/test/unit/prof_reset.sh
@@ -1,5 +1,5 @@
 #!/bin/sh
 
 if [ "x${enable_prof}" = "x1" ] ; then
-  export MALLOC_CONF="prof:true,prof_active:false,lg_prof_sample:0"
+  export MALLOC_CONF="prof:true,prof_active:false,lg_prof_sample:0,prof_recent_alloc_max:0"
 fi

From 6b6b4709b34992940e112fbe5726472b37783ef2 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 9 Jan 2020 09:59:17 -0800
Subject: [PATCH 1494/2608] Unify buffered writer naming

---
 include/jemalloc/internal/malloc_io.h | 6 +++---
 src/jemalloc.c                        | 6 +++---
 src/malloc_io.c                       | 8 ++++----
 src/prof_log.c                        | 8 ++++----
 src/prof_recent.c                     | 6 +++---
 test/unit/buf_writer.c                | 8 ++++----
 6 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/include/jemalloc/internal/malloc_io.h b/include/jemalloc/internal/malloc_io.h
index f5d16a5e..dabcb198 100644
--- a/include/jemalloc/internal/malloc_io.h
+++ b/include/jemalloc/internal/malloc_io.h
@@ -119,9 +119,9 @@ typedef struct {
 	char *buf;
 	size_t buf_size; /* must be one less than the capacity of buf array */
 	size_t buf_end;
-} buf_writer_arg_t;
+} buf_write_arg_t;
 
-void buf_writer_flush(buf_writer_arg_t *arg);
-void buffered_write_cb(void *buf_writer_arg, const char *s);
+void buf_write_flush(buf_write_arg_t *arg);
+void buf_write_cb(void *buf_write_arg, const char *s);
 
 #endif /* JEMALLOC_INTERNAL_MALLOC_IO_H */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 7184cbb0..0a95b3b1 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3696,10 +3696,10 @@ je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		char *stats_print_buf = (char *)iallocztm(tsdn,
 		    STATS_PRINT_BUFSIZE, sz_size2index(STATS_PRINT_BUFSIZE),
 		    false, NULL, true, arena_get(TSDN_NULL, 0, true), true);
-		buf_writer_arg_t stats_print_buf_arg = {write_cb, cbopaque,
+		buf_write_arg_t stats_print_buf_arg = {write_cb, cbopaque,
 		    stats_print_buf, STATS_PRINT_BUFSIZE - 1, 0};
-		stats_print(buffered_write_cb, &stats_print_buf_arg, opts);
-		buf_writer_flush(&stats_print_buf_arg);
+		stats_print(buf_write_cb, &stats_print_buf_arg, opts);
+		buf_write_flush(&stats_print_buf_arg);
 		idalloctm(tsdn, stats_print_buf, NULL, NULL, true, true);
 	}
 
diff --git a/src/malloc_io.c b/src/malloc_io.c
index fc7ff726..cfefcacb 100644
--- a/src/malloc_io.c
+++ b/src/malloc_io.c
@@ -665,7 +665,7 @@ malloc_printf(const char *format, ...) {
 }
 
 void
-buf_writer_flush(buf_writer_arg_t *arg) {
+buf_write_flush(buf_write_arg_t *arg) {
 	assert(arg->buf_end <= arg->buf_size);
 	arg->buf[arg->buf_end] = '\0';
 	if (arg->write_cb == NULL) {
@@ -677,13 +677,13 @@ buf_writer_flush(buf_writer_arg_t *arg) {
 }
 
 void
-buffered_write_cb(void *buf_writer_arg, const char *s) {
-	buf_writer_arg_t *arg = (buf_writer_arg_t *)buf_writer_arg;
+buf_write_cb(void *buf_write_arg, const char *s) {
+	buf_write_arg_t *arg = (buf_write_arg_t *)buf_write_arg;
 	size_t i, slen, n, s_remain, buf_remain;
 	assert(arg->buf_end <= arg->buf_size);
 	for (i = 0, slen = strlen(s); i < slen; i += n) {
 		if (arg->buf_end == arg->buf_size) {
-			buf_writer_flush(arg);
+			buf_write_flush(arg);
 		}
 		s_remain = slen - i;
 		buf_remain = arg->buf_size - arg->buf_end;
diff --git a/src/prof_log.c b/src/prof_log.c
index 11de4363..e3d21af2 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -631,12 +631,12 @@ prof_log_stop(tsdn_t *tsdn) {
 	char *prof_log_stop_buf = (char *)iallocztm(tsdn,
 	    PROF_LOG_STOP_BUFSIZE, sz_size2index(PROF_LOG_STOP_BUFSIZE),
 	    false, NULL, true, arena_get(TSDN_NULL, 0, true), true);
-	buf_writer_arg_t prof_log_stop_buf_arg = {prof_emitter_write_cb, &arg,
+	buf_write_arg_t prof_log_stop_buf_arg = {prof_emitter_write_cb, &arg,
 	    prof_log_stop_buf, PROF_LOG_STOP_BUFSIZE - 1, 0};
 
 	/* Emit to json. */
-	emitter_init(&emitter, emitter_output_json_compact,
-	    buffered_write_cb, &prof_log_stop_buf_arg);
+	emitter_init(&emitter, emitter_output_json_compact, buf_write_cb,
+	    &prof_log_stop_buf_arg);
 
 	emitter_begin(&emitter);
 	prof_log_emit_metadata(&emitter);
@@ -645,7 +645,7 @@ prof_log_stop(tsdn_t *tsdn) {
 	prof_log_emit_allocs(tsd, &emitter);
 	emitter_end(&emitter);
 
-	buf_writer_flush(&prof_log_stop_buf_arg);
+	buf_write_flush(&prof_log_stop_buf_arg);
 	idalloctm(tsdn, prof_log_stop_buf, NULL, NULL, true, true);
 
 	/* Reset global state. */
diff --git a/src/prof_recent.c b/src/prof_recent.c
index 98349aca..f4cad097 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -461,10 +461,10 @@ prof_recent_alloc_dump(tsd_t *tsd, void (*write_cb)(void *, const char *),
 	char *buf = (char *)iallocztm(tsd_tsdn(tsd), PROF_RECENT_PRINT_BUFSIZE,
 	    sz_size2index(PROF_RECENT_PRINT_BUFSIZE), false, NULL, true,
 	    arena_get(tsd_tsdn(tsd), 0, false), true);
-	buf_writer_arg_t buf_arg = {write_cb, cbopaque, buf,
+	buf_write_arg_t buf_arg = {write_cb, cbopaque, buf,
 	    PROF_RECENT_PRINT_BUFSIZE - 1, 0};
 	emitter_t emitter;
-	emitter_init(&emitter, emitter_output_json_compact, buffered_write_cb,
+	emitter_init(&emitter, emitter_output_json_compact, buf_write_cb,
 	    &buf_arg);
 	emitter_begin(&emitter);
 
@@ -524,7 +524,7 @@ prof_recent_alloc_dump(tsd_t *tsd, void (*write_cb)(void *, const char *),
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 
 	emitter_end(&emitter);
-	buf_writer_flush(&buf_arg);
+	buf_write_flush(&buf_arg);
 	idalloctm(tsd_tsdn(tsd), buf, NULL, NULL, true, true);
 }
 #undef PROF_RECENT_PRINT_BUFSIZE
diff --git a/test/unit/buf_writer.c b/test/unit/buf_writer.c
index 4d8ae99b..5051f76a 100644
--- a/test/unit/buf_writer.c
+++ b/test/unit/buf_writer.c
@@ -20,8 +20,8 @@ TEST_BEGIN(test_buf_write) {
 	size_t n_unit, remain, i;
 	ssize_t unit;
 	uint64_t arg = 4; /* Starting value of random argument. */
-	buf_writer_arg_t test_buf_arg =
-	    {test_write_cb, &arg, test_buf, TEST_BUF_SIZE - 1, 0};
+	buf_write_arg_t test_buf_arg = {test_write_cb, &arg, test_buf,
+	    TEST_BUF_SIZE - 1, 0};
 
 	memset(s, 'a', UNIT_MAX);
 	arg_store = arg;
@@ -33,7 +33,7 @@ TEST_BEGIN(test_buf_write) {
 			remain = 0;
 			for (i = 1; i <= n_unit; ++i) {
 				arg = prng_lg_range_u64(&arg, 64);
-				buffered_write_cb(&test_buf_arg, s);
+				buf_write_cb(&test_buf_arg, s);
 				remain += unit;
 				if (remain > test_buf_arg.buf_size) {
 					/* Flushes should have happened. */
@@ -49,7 +49,7 @@ TEST_BEGIN(test_buf_write) {
 				    "Incorrect length after writing %zu strings"
 				    " of length %zu", i, unit);
 			}
-			buf_writer_flush(&test_buf_arg);
+			buf_write_flush(&test_buf_arg);
 			assert_zu_eq(test_write_len, n_unit * unit,
 			    "Incorrect length after flushing at the end of"
 			    " writing %zu strings of length %zu", n_unit, unit);

From 6d8e6169028f50ef9904692a0d4ecc0f21054925 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 9 Jan 2020 16:36:09 -0800
Subject: [PATCH 1495/2608] Make buffered writer an independent module

---
 Makefile.in                                   |  1 +
 include/jemalloc/internal/buf_writer.h        | 24 +++++++++++++
 include/jemalloc/internal/malloc_io.h         | 26 +-------------
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |  1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |  3 ++
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |  1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |  3 ++
 src/buf_writer.c                              | 36 +++++++++++++++++++
 src/jemalloc.c                                |  1 +
 src/malloc_io.c                               | 33 +----------------
 src/prof_log.c                                |  1 +
 src/prof_recent.c                             |  1 +
 test/unit/buf_writer.c                        |  2 ++
 13 files changed, 76 insertions(+), 57 deletions(-)
 create mode 100644 include/jemalloc/internal/buf_writer.h
 create mode 100644 src/buf_writer.c

diff --git a/Makefile.in b/Makefile.in
index ad54720e..d923d507 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -101,6 +101,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/bin.c \
 	$(srcroot)src/bin_info.c \
 	$(srcroot)src/bitmap.c \
+	$(srcroot)src/buf_writer.c \
 	$(srcroot)src/ckh.c \
 	$(srcroot)src/ctl.c \
 	$(srcroot)src/div.c \
diff --git a/include/jemalloc/internal/buf_writer.h b/include/jemalloc/internal/buf_writer.h
new file mode 100644
index 00000000..013bbf5d
--- /dev/null
+++ b/include/jemalloc/internal/buf_writer.h
@@ -0,0 +1,24 @@
+#ifndef JEMALLOC_INTERNAL_BUF_WRITER_H
+#define JEMALLOC_INTERNAL_BUF_WRITER_H
+
+/*
+ * Note: when using the buffered writer, cbopaque is passed to write_cb only
+ * when the buffer is flushed.  It would make a difference if cbopaque points
+ * to something that's changing for each write_cb call, or something that
+ * affects write_cb in a way dependent on the content of the output string.
+ * However, the most typical usage case in practice is that cbopaque points to
+ * some "option like" content for the write_cb, so it doesn't matter.
+ */
+
+typedef struct {
+	void (*write_cb)(void *, const char *);
+	void *cbopaque;
+	char *buf;
+	size_t buf_size; /* must be one less than the capacity of buf array */
+	size_t buf_end;
+} buf_write_arg_t;
+
+void buf_write_flush(buf_write_arg_t *arg);
+void buf_write_cb(void *buf_write_arg, const char *s);
+
+#endif /* JEMALLOC_INTERNAL_BUF_WRITER_H */
diff --git a/include/jemalloc/internal/malloc_io.h b/include/jemalloc/internal/malloc_io.h
index dabcb198..fac63612 100644
--- a/include/jemalloc/internal/malloc_io.h
+++ b/include/jemalloc/internal/malloc_io.h
@@ -40,6 +40,7 @@
  */
 #define MALLOC_PRINTF_BUFSIZE	4096
 
+void wrtmessage(void *cbopaque, const char *s);
 int buferror(int err, char *buf, size_t buflen);
 uintmax_t malloc_strtoumax(const char *restrict nptr, char **restrict endptr,
     int base);
@@ -99,29 +100,4 @@ malloc_read_fd(int fd, void *buf, size_t count) {
 	return (ssize_t)result;
 }
 
-/******************************************************************************/
-
-/*
- * The rest is buffered writing utility.
- *
- * The only difference when using the buffered writer is that cbopaque is
- * passed to write_cb only when the buffer is flushed.  It would make a
- * difference if cbopaque points to something that's changing for each write_cb
- * call, or something that affects write_cb in a way dependent on the content
- * of the output string.  However, the most typical usage case in practice is
- * that cbopaque points to some "option like" content for the write_cb, so it
- * doesn't matter.
- */
-
-typedef struct {
-	void (*write_cb)(void *, const char *);
-	void *cbopaque;
-	char *buf;
-	size_t buf_size; /* must be one less than the capacity of buf array */
-	size_t buf_end;
-} buf_write_arg_t;
-
-void buf_write_flush(buf_write_arg_t *arg);
-void buf_write_cb(void *buf_write_arg, const char *s);
-
 #endif /* JEMALLOC_INTERNAL_MALLOC_IO_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index f9af3ddd..4b25b856 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -41,6 +41,7 @@
     <ClCompile Include="..\..\..\..\src\bin.c" />
     <ClCompile Include="..\..\..\..\src\bin_info.c" />
     <ClCompile Include="..\..\..\..\src\bitmap.c" />
+    <ClCompile Include="..\..\..\..\src\buf_writer.c" />
     <ClCompile Include="..\..\..\..\src\ckh.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
     <ClCompile Include="..\..\..\..\src\div.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 90f8831d..73ee8d1d 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -22,6 +22,9 @@
     <ClCompile Include="..\..\..\..\src\bitmap.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\buf_writer.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\ckh.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 4ca484ac..ed6f618d 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -41,6 +41,7 @@
     <ClCompile Include="..\..\..\..\src\bin.c" />
     <ClCompile Include="..\..\..\..\src\bin_info.c" />
     <ClCompile Include="..\..\..\..\src\bitmap.c" />
+    <ClCompile Include="..\..\..\..\src\buf_writer.c" />
     <ClCompile Include="..\..\..\..\src\ckh.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
     <ClCompile Include="..\..\..\..\src\div.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 90f8831d..73ee8d1d 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -22,6 +22,9 @@
     <ClCompile Include="..\..\..\..\src\bitmap.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\buf_writer.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\ckh.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/buf_writer.c b/src/buf_writer.c
new file mode 100644
index 00000000..41065946
--- /dev/null
+++ b/src/buf_writer.c
@@ -0,0 +1,36 @@
+#define JEMALLOC_BUF_WRITER_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/buf_writer.h"
+#include "jemalloc/internal/malloc_io.h"
+
+void
+buf_write_flush(buf_write_arg_t *arg) {
+	assert(arg->buf_end <= arg->buf_size);
+	arg->buf[arg->buf_end] = '\0';
+	if (arg->write_cb == NULL) {
+		arg->write_cb = je_malloc_message != NULL ?
+		    je_malloc_message : wrtmessage;
+	}
+	arg->write_cb(arg->cbopaque, arg->buf);
+	arg->buf_end = 0;
+}
+
+void
+buf_write_cb(void *buf_write_arg, const char *s) {
+	buf_write_arg_t *arg = (buf_write_arg_t *)buf_write_arg;
+	size_t i, slen, n, s_remain, buf_remain;
+	assert(arg->buf_end <= arg->buf_size);
+	for (i = 0, slen = strlen(s); i < slen; i += n) {
+		if (arg->buf_end == arg->buf_size) {
+			buf_write_flush(arg);
+		}
+		s_remain = slen - i;
+		buf_remain = arg->buf_size - arg->buf_end;
+		n = s_remain < buf_remain ? s_remain : buf_remain;
+		memcpy(arg->buf + arg->buf_end, s + i, n);
+		arg->buf_end += n;
+	}
+	assert(i == slen);
+}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 0a95b3b1..e54c49b0 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -4,6 +4,7 @@
 
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/buf_writer.h"
 #include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/extent_mmap.h"
diff --git a/src/malloc_io.c b/src/malloc_io.c
index cfefcacb..4b7d2e4a 100644
--- a/src/malloc_io.c
+++ b/src/malloc_io.c
@@ -53,7 +53,6 @@
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
-static void wrtmessage(void *cbopaque, const char *s);
 #define U2S_BUFSIZE ((1U << (LG_SIZEOF_INTMAX_T + 3)) + 1)
 static char *u2s(uintmax_t x, unsigned base, bool uppercase, char *s,
     size_t *slen_p);
@@ -68,7 +67,7 @@ static char *x2s(uintmax_t x, bool alt_form, bool uppercase, char *s,
 /******************************************************************************/
 
 /* malloc_message() setup. */
-static void
+void
 wrtmessage(void *cbopaque, const char *s) {
 	malloc_write_fd(STDERR_FILENO, s, strlen(s));
 }
@@ -664,36 +663,6 @@ malloc_printf(const char *format, ...) {
 	va_end(ap);
 }
 
-void
-buf_write_flush(buf_write_arg_t *arg) {
-	assert(arg->buf_end <= arg->buf_size);
-	arg->buf[arg->buf_end] = '\0';
-	if (arg->write_cb == NULL) {
-		arg->write_cb = je_malloc_message != NULL ?
-		    je_malloc_message : wrtmessage;
-	}
-	arg->write_cb(arg->cbopaque, arg->buf);
-	arg->buf_end = 0;
-}
-
-void
-buf_write_cb(void *buf_write_arg, const char *s) {
-	buf_write_arg_t *arg = (buf_write_arg_t *)buf_write_arg;
-	size_t i, slen, n, s_remain, buf_remain;
-	assert(arg->buf_end <= arg->buf_size);
-	for (i = 0, slen = strlen(s); i < slen; i += n) {
-		if (arg->buf_end == arg->buf_size) {
-			buf_write_flush(arg);
-		}
-		s_remain = slen - i;
-		buf_remain = arg->buf_size - arg->buf_end;
-		n = s_remain < buf_remain ? s_remain : buf_remain;
-		memcpy(arg->buf + arg->buf_end, s + i, n);
-		arg->buf_end += n;
-	}
-	assert(i == slen);
-}
-
 /*
  * Restore normal assertion macros, in order to make it possible to compile all
  * C files as a single concatenation.
diff --git a/src/prof_log.c b/src/prof_log.c
index e3d21af2..d0b798de 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -3,6 +3,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/buf_writer.h"
 #include "jemalloc/internal/ckh.h"
 #include "jemalloc/internal/emitter.h"
 #include "jemalloc/internal/hash.h"
diff --git a/src/prof_recent.c b/src/prof_recent.c
index f4cad097..a1f71ea1 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -3,6 +3,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/buf_writer.h"
 #include "jemalloc/internal/emitter.h"
 #include "jemalloc/internal/prof_data.h"
 #include "jemalloc/internal/prof_recent.h"
diff --git a/test/unit/buf_writer.c b/test/unit/buf_writer.c
index 5051f76a..bbdb657e 100644
--- a/test/unit/buf_writer.c
+++ b/test/unit/buf_writer.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/buf_writer.h"
+
 #define TEST_BUF_SIZE 16
 #define UNIT_MAX (TEST_BUF_SIZE * 3)
 

From 40a391408c6edbabac4e408c1cdfdda64c0cd356 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 9 Jan 2020 16:50:09 -0800
Subject: [PATCH 1496/2608] Define constructor for buffered writer argument

---
 include/jemalloc/internal/buf_writer.h | 12 +++++++++++-
 src/jemalloc.c                         | 17 +++++++++--------
 src/prof_log.c                         | 17 +++++++++--------
 src/prof_recent.c                      |  5 +++--
 4 files changed, 32 insertions(+), 19 deletions(-)

diff --git a/include/jemalloc/internal/buf_writer.h b/include/jemalloc/internal/buf_writer.h
index 013bbf5d..1acda9a4 100644
--- a/include/jemalloc/internal/buf_writer.h
+++ b/include/jemalloc/internal/buf_writer.h
@@ -14,10 +14,20 @@ typedef struct {
 	void (*write_cb)(void *, const char *);
 	void *cbopaque;
 	char *buf;
-	size_t buf_size; /* must be one less than the capacity of buf array */
+	size_t buf_size;
 	size_t buf_end;
 } buf_write_arg_t;
 
+JEMALLOC_ALWAYS_INLINE void
+buf_write_init(buf_write_arg_t *arg, void (*write_cb)(void *, const char *),
+    void *cbopaque, char *buf, size_t buf_len) {
+	arg->write_cb = write_cb;
+	arg->cbopaque = cbopaque;
+	arg->buf = buf;
+	arg->buf_size = buf_len - 1; /* Accommodating '\0' at the end. */
+	arg->buf_end = 0;
+}
+
 void buf_write_flush(buf_write_arg_t *arg);
 void buf_write_cb(void *buf_write_arg, const char *s);
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index e54c49b0..5503fd00 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3694,14 +3694,15 @@ je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	if (config_debug) {
 		stats_print(write_cb, cbopaque, opts);
 	} else {
-		char *stats_print_buf = (char *)iallocztm(tsdn,
-		    STATS_PRINT_BUFSIZE, sz_size2index(STATS_PRINT_BUFSIZE),
-		    false, NULL, true, arena_get(TSDN_NULL, 0, true), true);
-		buf_write_arg_t stats_print_buf_arg = {write_cb, cbopaque,
-		    stats_print_buf, STATS_PRINT_BUFSIZE - 1, 0};
-		stats_print(buf_write_cb, &stats_print_buf_arg, opts);
-		buf_write_flush(&stats_print_buf_arg);
-		idalloctm(tsdn, stats_print_buf, NULL, NULL, true, true);
+		char *buf = (char *)iallocztm(tsdn, STATS_PRINT_BUFSIZE,
+		    sz_size2index(STATS_PRINT_BUFSIZE), false, NULL, true,
+		    arena_get(TSDN_NULL, 0, true), true);
+		buf_write_arg_t buf_arg;
+		buf_write_init(&buf_arg, write_cb, cbopaque, buf,
+		    STATS_PRINT_BUFSIZE);
+		stats_print(buf_write_cb, &buf_arg, opts);
+		buf_write_flush(&buf_arg);
+		idalloctm(tsdn, buf, NULL, NULL, true, true);
 	}
 
 	check_entry_exit_locking(tsdn);
diff --git a/src/prof_log.c b/src/prof_log.c
index d0b798de..9495cf7a 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -629,15 +629,16 @@ prof_log_stop(tsdn_t *tsdn) {
 	struct prof_emitter_cb_arg_s arg;
 	arg.fd = fd;
 
-	char *prof_log_stop_buf = (char *)iallocztm(tsdn,
-	    PROF_LOG_STOP_BUFSIZE, sz_size2index(PROF_LOG_STOP_BUFSIZE),
-	    false, NULL, true, arena_get(TSDN_NULL, 0, true), true);
-	buf_write_arg_t prof_log_stop_buf_arg = {prof_emitter_write_cb, &arg,
-	    prof_log_stop_buf, PROF_LOG_STOP_BUFSIZE - 1, 0};
+	char *buf = (char *)iallocztm(tsdn, PROF_LOG_STOP_BUFSIZE,
+	    sz_size2index(PROF_LOG_STOP_BUFSIZE), false, NULL, true,
+	    arena_get(TSDN_NULL, 0, true), true);
+	buf_write_arg_t buf_arg;
+	buf_write_init(&buf_arg, prof_emitter_write_cb, &arg, buf,
+	    PROF_LOG_STOP_BUFSIZE);
 
 	/* Emit to json. */
 	emitter_init(&emitter, emitter_output_json_compact, buf_write_cb,
-	    &prof_log_stop_buf_arg);
+	    &buf_arg);
 
 	emitter_begin(&emitter);
 	prof_log_emit_metadata(&emitter);
@@ -646,8 +647,8 @@ prof_log_stop(tsdn_t *tsdn) {
 	prof_log_emit_allocs(tsd, &emitter);
 	emitter_end(&emitter);
 
-	buf_write_flush(&prof_log_stop_buf_arg);
-	idalloctm(tsdn, prof_log_stop_buf, NULL, NULL, true, true);
+	buf_write_flush(&buf_arg);
+	idalloctm(tsdn, buf, NULL, NULL, true, true);
 
 	/* Reset global state. */
 	if (log_tables_initialized) {
diff --git a/src/prof_recent.c b/src/prof_recent.c
index a1f71ea1..ed4170e0 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -462,8 +462,9 @@ prof_recent_alloc_dump(tsd_t *tsd, void (*write_cb)(void *, const char *),
 	char *buf = (char *)iallocztm(tsd_tsdn(tsd), PROF_RECENT_PRINT_BUFSIZE,
 	    sz_size2index(PROF_RECENT_PRINT_BUFSIZE), false, NULL, true,
 	    arena_get(tsd_tsdn(tsd), 0, false), true);
-	buf_write_arg_t buf_arg = {write_cb, cbopaque, buf,
-	    PROF_RECENT_PRINT_BUFSIZE - 1, 0};
+	buf_write_arg_t buf_arg;
+	buf_write_init(&buf_arg, write_cb, cbopaque, buf,
+	    PROF_RECENT_PRINT_BUFSIZE);
 	emitter_t emitter;
 	emitter_init(&emitter, emitter_output_json_compact, buf_write_cb,
 	    &buf_arg);

From 2b604a3016f2cbda9499e2533ebef43b6fa9b72e Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 9 Jan 2020 10:20:34 -0800
Subject: [PATCH 1497/2608] Record request size in prof recent entries

---
 include/jemalloc/internal/prof_externs.h   |  4 ++--
 include/jemalloc/internal/prof_inlines_b.h | 12 ++++++------
 include/jemalloc/internal/prof_recent.h    |  2 +-
 include/jemalloc/internal/prof_structs.h   |  2 +-
 src/jemalloc.c                             |  8 ++++----
 src/prof.c                                 |  6 +++---
 src/prof_recent.c                          |  9 +++++----
 test/unit/prof_recent.c                    |  4 ++--
 8 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index a07fd22b..df4f7cd8 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -54,8 +54,8 @@ prof_tdata_t *prof_tdata_init(tsd_t *tsd);
 prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
 
 void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
-void prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
-    prof_tctx_t *tctx);
+void prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
+    size_t usize, prof_tctx_t *tctx);
 void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info);
 prof_tctx_t *prof_tctx_create(tsd_t *tsd);
 #ifdef JEMALLOC_JET
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 9ea0454c..d0cc48d0 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -126,22 +126,22 @@ prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_malloc(tsd_t *tsd, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx,
-    prof_tctx_t *tctx) {
+prof_malloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
+    alloc_ctx_t *alloc_ctx, prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
 
 	if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) {
-		prof_malloc_sample_object(tsd, ptr, usize, tctx);
+		prof_malloc_sample_object(tsd, ptr, size, usize, tctx);
 	} else {
 		prof_tctx_reset(tsd, ptr, alloc_ctx);
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
-    bool prof_active, const void *old_ptr, size_t old_usize,
+prof_realloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
+    prof_tctx_t *tctx, bool prof_active, const void *old_ptr, size_t old_usize,
     prof_info_t *old_prof_info) {
 	bool sampled, old_sampled, moved;
 
@@ -168,7 +168,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx,
 	moved = (ptr != old_ptr);
 
 	if (unlikely(sampled)) {
-		prof_malloc_sample_object(tsd, ptr, usize, tctx);
+		prof_malloc_sample_object(tsd, ptr, size, usize, tctx);
 	} else if (moved) {
 		prof_tctx_reset(tsd, ptr, NULL);
 	} else if (unlikely(old_sampled)) {
diff --git a/include/jemalloc/internal/prof_recent.h b/include/jemalloc/internal/prof_recent.h
index d0e9e1e1..b2973db4 100644
--- a/include/jemalloc/internal/prof_recent.h
+++ b/include/jemalloc/internal/prof_recent.h
@@ -2,7 +2,7 @@
 #define JEMALLOC_INTERNAL_PROF_RECENT_EXTERNS_H
 
 bool prof_recent_alloc_prepare(tsd_t *tsd, prof_tctx_t *tctx);
-void prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t usize);
+void prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size);
 void prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata);
 bool prof_recent_init();
 void edata_prof_recent_alloc_init(edata_t *edata);
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index 59c0f4ff..ee78643e 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -213,7 +213,7 @@ struct prof_recent_s {
 	nstime_t dalloc_time;
 
 	prof_recent_t *next;
-	size_t usize;
+	size_t size;
 	prof_tctx_t *alloc_tctx;
 	edata_t *alloc_edata; /* NULL means allocation has been freed. */
 	prof_tctx_t *dalloc_tctx;
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 5503fd00..e33d0326 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2175,7 +2175,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 			prof_alloc_rollback(tsd, tctx, true);
 			goto label_oom;
 		}
-		prof_malloc(tsd, allocation, usize, &alloc_ctx, tctx);
+		prof_malloc(tsd, allocation, size, usize, &alloc_ctx, tctx);
 	} else {
 		assert(!opt_prof);
 		allocation = imalloc_no_sample(sopts, dopts, tsd, size, usize,
@@ -3045,8 +3045,8 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 		 */
 		*usize = isalloc(tsd_tsdn(tsd), p);
 	}
-	prof_realloc(tsd, p, *usize, tctx, prof_active, old_ptr, old_usize,
-	    &old_prof_info);
+	prof_realloc(tsd, p, size, *usize, tctx, prof_active, old_ptr,
+	    old_usize, &old_prof_info);
 
 	return p;
 }
@@ -3338,7 +3338,7 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 		prof_alloc_rollback(tsd, tctx, false);
 	} else {
 		prof_info_get_and_reset_recent(tsd, ptr, alloc_ctx, &prof_info);
-		prof_realloc(tsd, ptr, usize, tctx, prof_active, ptr,
+		prof_realloc(tsd, ptr, size, usize, tctx, prof_active, ptr,
 		    old_usize, &prof_info);
 	}
 
diff --git a/src/prof.c b/src/prof.c
index 159600e7..791c362f 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -145,8 +145,8 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
 }
 
 void
-prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
-    prof_tctx_t *tctx) {
+prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
+    size_t usize, prof_tctx_t *tctx) {
 	edata_t *edata = iealloc(tsd_tsdn(tsd), ptr);
 	prof_info_set(tsd, edata, tctx);
 
@@ -162,7 +162,7 @@ prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t usize,
 	malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
 	if (record_recent) {
 		assert(tctx == edata_prof_tctx_get(edata));
-		prof_recent_alloc(tsd, edata, usize);
+		prof_recent_alloc(tsd, edata, size);
 	}
 }
 
diff --git a/src/prof_recent.c b/src/prof_recent.c
index ed4170e0..0ae449f5 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -249,7 +249,7 @@ prof_recent_alloc_assert_count(tsd_t *tsd) {
 }
 
 void
-prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t usize) {
+prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size) {
 	assert(edata != NULL);
 	prof_tctx_t *tctx = edata_prof_tctx_get(edata);
 
@@ -312,7 +312,7 @@ prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t usize) {
 	{
 		/* Fill content into the dummy node. */
 		prof_recent_t *node = prof_recent_alloc_dummy;
-		node->usize = usize;
+		node->size = size;
 		nstime_copy(&node->alloc_time,
 		    edata_prof_alloc_time_get(edata));
 		node->alloc_tctx = tctx;
@@ -487,8 +487,9 @@ prof_recent_alloc_dump(tsd_t *tsd, void (*write_cb)(void *, const char *),
 	    n = prof_recent_alloc_next(tsd, n)) {
 		emitter_json_object_begin(&emitter);
 
-		emitter_json_kv(&emitter, "usize", emitter_type_size,
-		    &n->usize);
+		emitter_json_kv(&emitter, "size", emitter_type_size, &n->size);
+		size_t usize = sz_s2u(n->size);
+		emitter_json_kv(&emitter, "usize", emitter_type_size, &usize);
 		bool released = n->alloc_edata == NULL;
 		emitter_json_kv(&emitter, "released", emitter_type_bool,
 		    &released);
diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index e10ac3fe..0f140a85 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -116,8 +116,8 @@ static void confirm_malloc(tsd_t *tsd, void *p) {
 
 static void confirm_record_size(tsd_t *tsd, prof_recent_t *n, unsigned kth) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	assert_zu_eq(n->usize, sz_s2u(NTH_REQ_SIZE(kth)),
-	    "Recorded allocation usize is wrong");
+	assert_zu_eq(n->size, NTH_REQ_SIZE(kth),
+	    "Recorded allocation size is wrong");
 }
 
 static void confirm_record_living(tsd_t *tsd, prof_recent_t *n) {

From a5d3dd4059a19268e6c2916b4014e395442d5750 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 2 Jan 2020 11:19:14 -0800
Subject: [PATCH 1498/2608] Fix an assertion on extent head state with dss.

---
 src/ehooks.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/ehooks.c b/src/ehooks.c
index 667bee84..1e1cac9f 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -221,7 +221,8 @@ ehooks_no_merge_heads(tsdn_t *tsdn, void *addr_a, bool head_a, void *addr_b,
 		assert(!ehooks_same_sn(tsdn, addr_a, addr_b));
 		return true;
 	}
-	assert(ehooks_same_sn(tsdn, addr_a, addr_b));
+	assert(ehooks_same_sn(tsdn, addr_a, addr_b) || (have_dss &&
+	    (extent_in_dss(addr_a) || extent_in_dss(addr_b))));
 
 	return false;
 }

From ad3f3fc561d5829a0a998c1b0650f6e7c7474a74 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 14 Jan 2020 11:12:18 -0800
Subject: [PATCH 1499/2608] Fetch time after tctx and only for samples

---
 src/large.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/large.c b/src/large.c
index ca35fc54..9b946178 100644
--- a/src/large.c
+++ b/src/large.c
@@ -372,18 +372,20 @@ void
 large_prof_info_get(tsd_t *tsd, edata_t *edata, prof_info_t *prof_info,
     bool reset_recent) {
 	assert(prof_info != NULL);
-	nstime_copy(&prof_info->alloc_time, edata_prof_alloc_time_get(edata));
 
 	prof_tctx_t *alloc_tctx = edata_prof_tctx_get(edata);
 	prof_info->alloc_tctx = alloc_tctx;
 
-	if (reset_recent && (uintptr_t)alloc_tctx > (uintptr_t)1U) {
-		/*
-		 * This allocation was a prof sample.  Reset the pointer on the
-		 * recent allocation record, so that this allocation is
-		 * recorded as released.
-		 */
-		prof_recent_alloc_reset(tsd, edata);
+	if ((uintptr_t)alloc_tctx > (uintptr_t)1U) {
+		nstime_copy(&prof_info->alloc_time,
+		    edata_prof_alloc_time_get(edata));
+		if (reset_recent) {
+			/*
+			 * Reset the pointer on the recent allocation record,
+			 * so that this allocation is recorded as released.
+			 */
+			prof_recent_alloc_reset(tsd, edata);
+		}
 	}
 }
 

From dab81bd315e3eee19552ab68d331f693b205866a Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 13 Jan 2020 23:28:09 -0800
Subject: [PATCH 1500/2608] Rework and fix the assertions on malloc fastpath.

The first half of the malloc fastpath may execute before malloc_init.  Make the
assertions work in that case.
---
 include/jemalloc/internal/sz.h | 24 ++++++++++++++++++----
 src/jemalloc.c                 | 37 +++++++++++++++++++++++++---------
 2 files changed, 48 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index 6df541f6..73fb0a4c 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -152,10 +152,15 @@ sz_size2index_compute(size_t size) {
 }
 
 JEMALLOC_ALWAYS_INLINE szind_t
-sz_size2index_lookup(size_t size) {
+sz_size2index_lookup_impl(size_t size) {
 	assert(size <= SC_LOOKUP_MAXCLASS);
-	szind_t ret = (sz_size2index_tab[(size + (ZU(1) << SC_LG_TINY_MIN) - 1)
-					 >> SC_LG_TINY_MIN]);
+	return sz_size2index_tab[(size + (ZU(1) << SC_LG_TINY_MIN) - 1)
+	    >> SC_LG_TINY_MIN];
+}
+
+JEMALLOC_ALWAYS_INLINE szind_t
+sz_size2index_lookup(size_t size) {
+	szind_t ret = sz_size2index_lookup_impl(size);
 	assert(ret == sz_size2index_compute(size));
 	return ret;
 }
@@ -194,9 +199,14 @@ sz_index2size_compute(szind_t index) {
 	}
 }
 
+JEMALLOC_ALWAYS_INLINE size_t
+sz_index2size_lookup_impl(szind_t index) {
+	return sz_index2size_tab[index];
+}
+
 JEMALLOC_ALWAYS_INLINE size_t
 sz_index2size_lookup(szind_t index) {
-	size_t ret = (size_t)sz_index2size_tab[index];
+	size_t ret = sz_index2size_lookup_impl(index);
 	assert(ret == sz_index2size_compute(index));
 	return ret;
 }
@@ -207,6 +217,12 @@ sz_index2size(szind_t index) {
 	return sz_index2size_lookup(index);
 }
 
+JEMALLOC_ALWAYS_INLINE void
+sz_size2index_usize_fastpath(size_t size, szind_t *ind, size_t *usize) {
+	*ind = sz_size2index_lookup_impl(size);
+	*usize = sz_index2size_lookup_impl(*ind);
+}
+
 JEMALLOC_ALWAYS_INLINE size_t
 sz_s2u_compute(size_t size) {
 	if (unlikely(size > SC_LARGE_MAXCLASS)) {
diff --git a/src/jemalloc.c b/src/jemalloc.c
index e33d0326..64550613 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2361,29 +2361,48 @@ je_malloc(size_t size) {
 	if (unlikely((size > SC_LOOKUP_MAXCLASS) || tsd == NULL)) {
 		return malloc_default(size);
 	}
-
-	szind_t ind = sz_size2index_lookup(size);
+	/*
+	 * The code below till the branch checking the next_event threshold may
+	 * execute before malloc_init(), in which case the threshold is 0 to
+	 * trigger slow path and initialization.
+	 *
+	 * Note that when uninitialized, only the fast-path variants of the sz /
+	 * tsd facilities may be called.
+	 */
+	szind_t ind;
 	/*
 	 * The thread_allocated counter in tsd serves as a general purpose
 	 * accumulator for bytes of allocation to trigger different types of
 	 * events.  usize is always needed to advance thread_allocated, though
 	 * it's not always needed in the core allocation logic.
 	 */
-	size_t usize = sz_index2size(ind);
-	/*
-	 * Fast path relies on size being a bin.
-	 * I.e. SC_LOOKUP_MAXCLASS < SC_SMALL_MAXCLASS
-	 */
+	size_t usize;
+
+	sz_size2index_usize_fastpath(size, &ind, &usize);
+	/* Fast path relies on size being a bin. */
 	assert(ind < SC_NBINS);
-	assert(size <= SC_SMALL_MAXCLASS);
+	assert((SC_LOOKUP_MAXCLASS < SC_SMALL_MAXCLASS) &&
+	    (size <= SC_SMALL_MAXCLASS));
 
 	uint64_t allocated = thread_allocated_malloc_fastpath(tsd);
 	uint64_t threshold = thread_allocated_next_event_malloc_fastpath(tsd);
+	uint64_t allocated_after = allocated + usize;
+	/*
+	 * The ind and usize might be uninitialized (or partially) before
+	 * malloc_init().  The assertions check for: 1) full correctness (usize
+	 * & ind) when initialized; and 2) guaranteed slow-path (threshold == 0)
+	 * when !initialized.
+	 */
+	if (!malloc_initialized()) {
+		assert(threshold == 0);
+	} else {
+		assert(ind == sz_size2index(size));
+		assert(usize > 0 && usize == sz_index2size(ind));
+	}
 	/*
 	 * Check for events and tsd non-nominal (fast_threshold will be set to
 	 * 0) in a single branch.
 	 */
-	uint64_t allocated_after = allocated + usize;
 	if (unlikely(allocated_after >= threshold)) {
 		return malloc_default(size);
 	}

From b8df719d5c10f6b52263ca4e7bb800c2796b6767 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 14 Jan 2020 11:23:32 -0800
Subject: [PATCH 1501/2608] No tdata creation for backtracing on dying thread

---
 src/prof_data.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/prof_data.c b/src/prof_data.c
index dfc507f9..723e579c 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -406,7 +406,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 
 prof_tctx_t *
 prof_tctx_create(tsd_t *tsd) {
-	if (tsd_reentrancy_level_get(tsd) > 0) {
+	if (!tsd_nominal(tsd) || tsd_reentrancy_level_get(tsd) > 0) {
 		return NULL;
 	}
 

From bd3be8e0b169e8a3952cbed1a399cfffe9023862 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 16 Jan 2020 13:28:27 -0800
Subject: [PATCH 1502/2608] Remove commit parameter to ecache functions.

No caller ever wants uncommitted memory.
---
 include/jemalloc/internal/extent.h |  4 ++--
 src/arena.c                        | 19 +++++++------------
 src/extent.c                       | 12 +++++++-----
 src/large.c                        |  7 +++----
 4 files changed, 19 insertions(+), 23 deletions(-)

diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 8fecee62..b89708a4 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -23,10 +23,10 @@ extern rtree_t extents_rtree;
 
 edata_t *ecache_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
-    bool slab, szind_t szind, bool *zero, bool *commit);
+    bool slab, szind_t szind, bool *zero);
 edata_t *ecache_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
-    bool slab, szind_t szind, bool *zero, bool *commit);
+    bool slab, szind_t szind, bool *zero);
 void ecache_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata);
 edata_t *ecache_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
diff --git a/src/arena.c b/src/arena.c
index 7e1a673e..d04712a5 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -433,19 +433,17 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 
 	szind_t szind = sz_size2index(usize);
 	size_t mapped_add;
-	bool commit = true;
 	edata_t *edata = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_dirty,
-	    NULL, usize, sz_large_pad, alignment, false, szind, zero, &commit);
+	    NULL, usize, sz_large_pad, alignment, false, szind, zero);
 	if (edata == NULL && arena_may_have_muzzy(arena)) {
 		edata = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_muzzy,
-		    NULL, usize, sz_large_pad, alignment, false, szind, zero,
-		    &commit);
+		    NULL, usize, sz_large_pad, alignment, false, szind, zero);
 	}
 	size_t size = usize + sz_large_pad;
 	if (edata == NULL) {
 		edata = ecache_alloc_grow(tsdn, arena, ehooks,
 		    &arena->ecache_retained, NULL, usize, sz_large_pad,
-		    alignment, false, szind, zero, &commit);
+		    alignment, false, szind, zero);
 		if (config_stats) {
 			/*
 			 * edata may be NULL on OOM, but in that case mapped_add
@@ -1203,15 +1201,14 @@ static edata_t *
 arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     const bin_info_t *bin_info, szind_t szind) {
 	edata_t *slab;
-	bool zero, commit;
+	bool zero;
 
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
 	zero = false;
-	commit = true;
 	slab = ecache_alloc_grow(tsdn, arena, ehooks, &arena->ecache_retained,
-	    NULL, bin_info->slab_size, 0, PAGE, true, szind, &zero, &commit);
+	    NULL, bin_info->slab_size, 0, PAGE, true, szind, &zero);
 
 	if (config_stats && slab != NULL) {
 		arena_stats_mapped_add(tsdn, &arena->stats,
@@ -1230,13 +1227,11 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	ehooks_t *ehooks = arena_get_ehooks(arena);
 	szind_t szind = sz_size2index(bin_info->reg_size);
 	bool zero = false;
-	bool commit = true;
 	edata_t *slab = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_dirty,
-	    NULL, bin_info->slab_size, 0, PAGE, true, binind, &zero, &commit);
+	    NULL, bin_info->slab_size, 0, PAGE, true, binind, &zero);
 	if (slab == NULL && arena_may_have_muzzy(arena)) {
 		slab = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_muzzy,
-		    NULL, bin_info->slab_size, 0, PAGE, true, binind, &zero,
-		    &commit);
+		    NULL, bin_info->slab_size, 0, PAGE, true, binind, &zero);
 	}
 	if (slab == NULL) {
 		slab = arena_slab_alloc_hard(tsdn, arena, ehooks, bin_info,
diff --git a/src/extent.c b/src/extent.c
index 54f14995..9779c38b 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -186,14 +186,15 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache,
 edata_t *
 ecache_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
     void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    szind_t szind, bool *zero, bool *commit) {
+    szind_t szind, bool *zero) {
 	assert(size + pad != 0);
 	assert(alignment != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
+	bool commit = true;
 	edata_t *edata = extent_recycle(tsdn, arena, ehooks, ecache, new_addr,
-	    size, pad, alignment, slab, szind, zero, commit, false);
+	    size, pad, alignment, slab, szind, zero, &commit, false);
 	assert(edata == NULL || edata_dumpable_get(edata));
 	return edata;
 }
@@ -201,14 +202,15 @@ ecache_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 edata_t *
 ecache_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
-    bool slab, szind_t szind, bool *zero, bool *commit) {
+    bool slab, szind_t szind, bool *zero) {
 	assert(size + pad != 0);
 	assert(alignment != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
+	bool commit = true;
 	edata_t *edata = extent_alloc_retained(tsdn, arena, ehooks, new_addr,
-	    size, pad, alignment, slab, szind, zero, commit);
+	    size, pad, alignment, slab, szind, zero, &commit);
 	if (edata == NULL) {
 		if (opt_retain && new_addr != NULL) {
 			/*
@@ -220,7 +222,7 @@ ecache_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			return NULL;
 		}
 		edata = extent_alloc_wrapper(tsdn, arena, ehooks,
-		    new_addr, size, pad, alignment, slab, szind, zero, commit);
+		    new_addr, size, pad, alignment, slab, szind, zero, &commit);
 	}
 
 	assert(edata == NULL || edata_dumpable_get(edata));
diff --git a/src/large.c b/src/large.c
index 9b946178..e133e193 100644
--- a/src/large.c
+++ b/src/large.c
@@ -147,22 +147,21 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 	 * false.
 	 */
 	bool is_zeroed_trail = zero;
-	bool commit = true;
 	edata_t *trail;
 	bool new_mapping;
 	if ((trail = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_dirty,
 	    edata_past_get(edata), trailsize, 0, CACHELINE, false, SC_NSIZES,
-	    &is_zeroed_trail, &commit)) != NULL
+	    &is_zeroed_trail)) != NULL
 	    || (trail = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_muzzy,
 	    edata_past_get(edata), trailsize, 0, CACHELINE, false, SC_NSIZES,
-	    &is_zeroed_trail, &commit)) != NULL) {
+	    &is_zeroed_trail)) != NULL) {
 		if (config_stats) {
 			new_mapping = false;
 		}
 	} else {
 		if ((trail = ecache_alloc_grow(tsdn, arena, ehooks,
 		    &arena->ecache_retained, edata_past_get(edata), trailsize,
-		    0, CACHELINE, false, SC_NSIZES, &is_zeroed_trail, &commit))
+		    0, CACHELINE, false, SC_NSIZES, &is_zeroed_trail))
 			== NULL) {
 			return true;
 		}

From 7b67ed0b5a90d5288c66c132f210883dece99181 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 14 Jan 2020 16:10:23 -0800
Subject: [PATCH 1503/2608] Get rid of lock overlap in prof_recent_alloc_reset

---
 src/prof_recent.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/prof_recent.c b/src/prof_recent.c
index 0ae449f5..a53f82c7 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -182,12 +182,15 @@ prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata) {
 		if (dalloc_tctx != NULL) {
 			nstime_update(&recent->dalloc_time);
 			recent->dalloc_tctx = dalloc_tctx;
+			dalloc_tctx = NULL;
 		}
-	} else if (dalloc_tctx != NULL) {
+	}
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+
+	if (dalloc_tctx != NULL) {
 		/* We lost the rase - the allocation record was just gone. */
 		decrement_recent_count(tsd, dalloc_tctx);
 	}
-	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 }
 
 static void

From a72ea0db60bc475415c13f1057408389bccb40a4 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 15 Jan 2020 07:25:59 -0800
Subject: [PATCH 1504/2608] Restructure and correct sleep utility for testing

---
 Makefile.in                          | 2 +-
 test/include/test/jemalloc_test.h.in | 1 +
 test/include/test/mq.h               | 4 ++--
 test/include/test/sleep.h            | 1 +
 test/src/{mq.c => sleep.c}           | 4 ++--
 5 files changed, 7 insertions(+), 5 deletions(-)
 create mode 100644 test/include/test/sleep.h
 rename test/src/{mq.c => sleep.c} (89%)

diff --git a/Makefile.in b/Makefile.in
index d923d507..24ab5421 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -167,7 +167,7 @@ DOCS_MAN3 := $(DOCS_XML:$(objroot)%.xml=$(objroot)%.3)
 DOCS := $(DOCS_HTML) $(DOCS_MAN3)
 C_TESTLIB_SRCS := $(srcroot)test/src/btalloc.c $(srcroot)test/src/btalloc_0.c \
 	$(srcroot)test/src/btalloc_1.c $(srcroot)test/src/math.c \
-	$(srcroot)test/src/mtx.c $(srcroot)test/src/mq.c \
+	$(srcroot)test/src/mtx.c $(srcroot)test/src/sleep.c \
 	$(srcroot)test/src/SFMT.c $(srcroot)test/src/test.c \
 	$(srcroot)test/src/thd.c $(srcroot)test/src/timer.c
 ifeq (1, $(link_whole_archive))
diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in
index c46af5d9..a59408fc 100644
--- a/test/include/test/jemalloc_test.h.in
+++ b/test/include/test/jemalloc_test.h.in
@@ -124,6 +124,7 @@ static const bool config_debug =
 #include "test/math.h"
 #include "test/mtx.h"
 #include "test/mq.h"
+#include "test/sleep.h"
 #include "test/test.h"
 #include "test/timer.h"
 #include "test/thd.h"
diff --git a/test/include/test/mq.h b/test/include/test/mq.h
index af2c078d..5dc6486c 100644
--- a/test/include/test/mq.h
+++ b/test/include/test/mq.h
@@ -1,4 +1,4 @@
-void	mq_nanosleep(unsigned ns);
+#include "test/sleep.h"
 
 /*
  * Simple templated message queue implementation that relies on only mutexes for
@@ -82,7 +82,7 @@ a_prefix##get(a_mq_type *mq) {						\
 									\
 	ns = 1;								\
 	while (true) {							\
-		mq_nanosleep(ns);					\
+		sleep_ns(ns);						\
 		msg = a_prefix##tryget(mq);				\
 		if (msg != NULL) {					\
 			return msg;					\
diff --git a/test/include/test/sleep.h b/test/include/test/sleep.h
new file mode 100644
index 00000000..c232f633
--- /dev/null
+++ b/test/include/test/sleep.h
@@ -0,0 +1 @@
+void sleep_ns(unsigned ns);
diff --git a/test/src/mq.c b/test/src/sleep.c
similarity index 89%
rename from test/src/mq.c
rename to test/src/sleep.c
index 9b5f672d..2234b4bc 100644
--- a/test/src/mq.c
+++ b/test/src/sleep.c
@@ -5,11 +5,11 @@
  * time is guaranteed.
  */
 void
-mq_nanosleep(unsigned ns) {
+sleep_ns(unsigned ns) {
 	assert(ns <= 1000*1000*1000);
 
 #ifdef _WIN32
-	Sleep(ns / 1000);
+	Sleep(ns / 1000 / 1000);
 #else
 	{
 		struct timespec timeout;

From d3312085603ab84e13e820be19f55f05e75a46ea Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 16 Jan 2020 20:43:45 -0800
Subject: [PATCH 1505/2608] Get rid of redundant logic in prof

---
 src/prof_data.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/src/prof_data.c b/src/prof_data.c
index 723e579c..2a25ec7e 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -218,8 +218,8 @@ prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt) {
 }
 
 static void
-prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
-    prof_tdata_t *tdata) {
+prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self,
+    prof_gctx_t *gctx) {
 	cassert(config_prof);
 
 	/*
@@ -371,7 +371,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 		    arena_ichoose(tsd, NULL), true);
 		if (ret.p == NULL) {
 			if (new_gctx) {
-				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
+				prof_gctx_try_destroy(tsd, tdata, gctx);
 			}
 			return NULL;
 		}
@@ -389,7 +389,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 		malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
 		if (error) {
 			if (new_gctx) {
-				prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
+				prof_gctx_try_destroy(tsd, tdata, gctx);
 			}
 			idalloctm(tsd_tsdn(tsd), ret.v, NULL, NULL, true, true);
 			return NULL;
@@ -767,7 +767,7 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) {
 		if (prof_gctx_should_destroy(gctx)) {
 			gctx->nlimbo++;
 			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
-			prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
+			prof_gctx_try_destroy(tsd, tdata, gctx);
 		} else {
 			malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
 		}
@@ -1367,8 +1367,7 @@ prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata) {
 	malloc_mutex_lock(tsdn, tdata->lock);
 	if (!tdata->expired) {
 		tdata->expired = true;
-		destroy_tdata = tdata->attached ? false :
-		    prof_tdata_should_destroy(tsdn, tdata, false);
+		destroy_tdata = prof_tdata_should_destroy(tsdn, tdata, false);
 	} else {
 		destroy_tdata = false;
 	}
@@ -1492,8 +1491,7 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
 	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
 	if (destroy_gctx) {
-		prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx,
-		    tdata);
+		prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx);
 	}
 
 	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock);

From 84b28c6a13d4d208e547bc50f7091107f5161957 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 16 Jan 2020 20:38:46 -0800
Subject: [PATCH 1506/2608] Properly handle tdata deletion race

---
 src/prof_data.c | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/src/prof_data.c b/src/prof_data.c
index 2a25ec7e..9721cbe7 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -1315,6 +1315,7 @@ static void
 prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
     bool even_if_attached) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tdatas_mtx);
+	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tdata->lock);
 
 	tdata_tree_remove(&tdatas, tdata);
 
@@ -1432,10 +1433,6 @@ prof_tctx_should_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
 
 static void
 prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
-	prof_tdata_t *tdata = tctx->tdata;
-	prof_gctx_t *gctx = tctx->gctx;
-	bool destroy_tdata, destroy_tctx, destroy_gctx;
-
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
 
 	assert(tctx->cnts.curobjs == 0);
@@ -1444,9 +1441,21 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
 	assert(tctx->cnts.accumobjs == 0);
 	assert(tctx->cnts.accumbytes == 0);
 
-	ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL);
-	destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata, false);
-	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
+	prof_gctx_t *gctx = tctx->gctx;
+
+	{
+		prof_tdata_t *tdata = tctx->tdata;
+		tctx->tdata = NULL;
+		ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL);
+		bool destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd),
+		    tdata, false);
+		malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
+		if (destroy_tdata) {
+			prof_tdata_destroy(tsd, tdata, false);
+		}
+	}
+
+	bool destroy_tctx, destroy_gctx;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
 	switch (tctx->state) {
@@ -1493,13 +1502,6 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
 	if (destroy_gctx) {
 		prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx);
 	}
-
-	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock);
-
-	if (destroy_tdata) {
-		prof_tdata_destroy(tsd, tdata, false);
-	}
-
 	if (destroy_tctx) {
 		idalloctm(tsd_tsdn(tsd), tctx, NULL, NULL, true, true);
 	}

From cd6e908241900640864b59a4dae835e9cecfc0cd Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 14 Jan 2020 16:01:45 -0800
Subject: [PATCH 1507/2608] Add stress test for last-N profiling mode

---
 test/unit/prof_recent.c | 117 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 115 insertions(+), 2 deletions(-)

diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index 0f140a85..c1324527 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -3,7 +3,7 @@
 #include "jemalloc/internal/prof_recent.h"
 
 /* As specified in the shell script */
-#define OPT_ALLOC_MAX	3
+#define OPT_ALLOC_MAX 3
 
 /* Invariant before and after every test (when config_prof is on) */
 static void confirm_prof_setup(tsd_t *tsd) {
@@ -381,11 +381,124 @@ TEST_END
 
 #undef NTH_REQ_SIZE
 
+#define N_THREADS 16
+#define N_PTRS 512
+#define N_CTLS 8
+#define N_ITERS 2048
+#define STRESS_ALLOC_MAX 4096
+
+typedef struct {
+	thd_t thd;
+	size_t id;
+	void *ptrs[N_PTRS];
+	size_t count;
+} thd_data_t;
+
+static thd_data_t thd_data[N_THREADS];
+static ssize_t test_max;
+
+static void test_write_cb(void *cbopaque, const char *str) {
+	sleep_ns(1000 * 1000);
+}
+
+static void *f_thread(void *arg) {
+	const size_t thd_id = *(size_t *)arg;
+	thd_data_t *data_p = thd_data + thd_id;
+	assert(data_p->id == thd_id);
+	data_p->count = 0;
+	uint64_t rand = (uint64_t)thd_id;
+	tsd_t *tsd = tsd_fetch();
+	assert(test_max > 1);
+	ssize_t last_max = -1;
+	for (int i = 0; i < N_ITERS; i++) {
+		rand = prng_range_u64(&rand, N_PTRS + N_CTLS * 5);
+		assert(data_p->count <= N_PTRS);
+		if (rand < data_p->count) {
+			assert(data_p->count > 0);
+			if (rand != data_p->count - 1) {
+				assert(data_p->count > 1);
+				void *temp = data_p->ptrs[rand];
+				data_p->ptrs[rand] =
+				    data_p->ptrs[data_p->count - 1];
+				data_p->ptrs[data_p->count - 1] = temp;
+			}
+			free(data_p->ptrs[--data_p->count]);
+		} else if (rand < N_PTRS) {
+			assert(data_p->count < N_PTRS);
+			data_p->ptrs[data_p->count++] = malloc(1);
+		} else if (rand % 5 == 0) {
+			prof_recent_alloc_dump(tsd, test_write_cb, NULL);
+		} else if (rand % 5 == 1) {
+			last_max = prof_recent_alloc_max_ctl_read(tsd);
+		} else if (rand % 5 == 2) {
+			last_max =
+			    prof_recent_alloc_max_ctl_write(tsd, test_max * 2);
+		} else if (rand % 5 == 3) {
+			last_max =
+			    prof_recent_alloc_max_ctl_write(tsd, test_max);
+		} else {
+			assert(rand % 5 == 4);
+			last_max =
+			    prof_recent_alloc_max_ctl_write(tsd, test_max / 2);
+		}
+		assert_zd_ge(last_max, -1, "Illegal last-N max");
+	}
+
+	while (data_p->count > 0) {
+		free(data_p->ptrs[--data_p->count]);
+	}
+
+	return NULL;
+}
+
+TEST_BEGIN(test_prof_recent_stress) {
+	test_skip_if(!config_prof);
+
+	tsd_t *tsd = tsd_fetch();
+	confirm_prof_setup(tsd);
+
+	test_max = OPT_ALLOC_MAX;
+	for (size_t i = 0; i < N_THREADS; i++) {
+		thd_data_t *data_p = thd_data + i;
+		data_p->id = i;
+		thd_create(&data_p->thd, &f_thread, &data_p->id);
+	}
+	for (size_t i = 0; i < N_THREADS; i++) {
+		thd_data_t *data_p = thd_data + i;
+		thd_join(data_p->thd, NULL);
+	}
+
+	test_max = STRESS_ALLOC_MAX;
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	    NULL, NULL, &test_max, sizeof(ssize_t)), 0, "Write error");
+	for (size_t i = 0; i < N_THREADS; i++) {
+		thd_data_t *data_p = thd_data + i;
+		data_p->id = i;
+		thd_create(&data_p->thd, &f_thread, &data_p->id);
+	}
+	for (size_t i = 0; i < N_THREADS; i++) {
+		thd_data_t *data_p = thd_data + i;
+		thd_join(data_p->thd, NULL);
+	}
+
+	test_max = OPT_ALLOC_MAX;
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	    NULL, NULL, &test_max, sizeof(ssize_t)), 0, "Write error");
+	confirm_prof_setup(tsd);
+}
+TEST_END
+
+#undef STRESS_ALLOC_MAX
+#undef N_ITERS
+#undef N_PTRS
+#undef N_THREADS
+
 int
 main(void) {
 	return test(
 	    test_confirm_setup,
 	    test_prof_recent_off,
 	    test_prof_recent_on,
-	    test_prof_recent_alloc);
+	    test_prof_recent_alloc,
+	    test_prof_recent_stress);
 }

From f81341a48b15e9257d573b80e8e45589137397ec Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 13 Jan 2020 14:23:10 -0800
Subject: [PATCH 1508/2608] Fallback to unbuffered printing if OOM

---
 include/jemalloc/internal/buf_writer.h |  2 ++
 src/jemalloc.c                         | 16 ++++++++++------
 src/prof_log.c                         | 21 +++++++++++++--------
 src/prof_recent.c                      | 21 ++++++++++++++-------
 4 files changed, 39 insertions(+), 21 deletions(-)

diff --git a/include/jemalloc/internal/buf_writer.h b/include/jemalloc/internal/buf_writer.h
index 1acda9a4..60bd0108 100644
--- a/include/jemalloc/internal/buf_writer.h
+++ b/include/jemalloc/internal/buf_writer.h
@@ -23,7 +23,9 @@ buf_write_init(buf_write_arg_t *arg, void (*write_cb)(void *, const char *),
     void *cbopaque, char *buf, size_t buf_len) {
 	arg->write_cb = write_cb;
 	arg->cbopaque = cbopaque;
+	assert(buf != NULL);
 	arg->buf = buf;
+	assert(buf_len >= 2);
 	arg->buf_size = buf_len - 1; /* Accommodating '\0' at the end. */
 	arg->buf_end = 0;
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 64550613..218e04a1 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3716,12 +3716,16 @@ je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		char *buf = (char *)iallocztm(tsdn, STATS_PRINT_BUFSIZE,
 		    sz_size2index(STATS_PRINT_BUFSIZE), false, NULL, true,
 		    arena_get(TSDN_NULL, 0, true), true);
-		buf_write_arg_t buf_arg;
-		buf_write_init(&buf_arg, write_cb, cbopaque, buf,
-		    STATS_PRINT_BUFSIZE);
-		stats_print(buf_write_cb, &buf_arg, opts);
-		buf_write_flush(&buf_arg);
-		idalloctm(tsdn, buf, NULL, NULL, true, true);
+		if (buf == NULL) {
+			stats_print(write_cb, cbopaque, opts);
+		} else {
+			buf_write_arg_t buf_arg;
+			buf_write_init(&buf_arg, write_cb, cbopaque, buf,
+			    STATS_PRINT_BUFSIZE);
+			stats_print(buf_write_cb, &buf_arg, opts);
+			buf_write_flush(&buf_arg);
+			idalloctm(tsdn, buf, NULL, NULL, true, true);
+		}
 	}
 
 	check_entry_exit_locking(tsdn);
diff --git a/src/prof_log.c b/src/prof_log.c
index 9495cf7a..a04c8e40 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -633,12 +633,15 @@ prof_log_stop(tsdn_t *tsdn) {
 	    sz_size2index(PROF_LOG_STOP_BUFSIZE), false, NULL, true,
 	    arena_get(TSDN_NULL, 0, true), true);
 	buf_write_arg_t buf_arg;
-	buf_write_init(&buf_arg, prof_emitter_write_cb, &arg, buf,
-	    PROF_LOG_STOP_BUFSIZE);
-
-	/* Emit to json. */
-	emitter_init(&emitter, emitter_output_json_compact, buf_write_cb,
-	    &buf_arg);
+	if (buf == NULL) {
+		emitter_init(&emitter, emitter_output_json_compact,
+		    prof_emitter_write_cb, &arg);
+	} else {
+		buf_write_init(&buf_arg, prof_emitter_write_cb, &arg, buf,
+		    PROF_LOG_STOP_BUFSIZE);
+		emitter_init(&emitter, emitter_output_json_compact,
+		    buf_write_cb, &buf_arg);
+	}
 
 	emitter_begin(&emitter);
 	prof_log_emit_metadata(&emitter);
@@ -647,8 +650,10 @@ prof_log_stop(tsdn_t *tsdn) {
 	prof_log_emit_allocs(tsd, &emitter);
 	emitter_end(&emitter);
 
-	buf_write_flush(&buf_arg);
-	idalloctm(tsdn, buf, NULL, NULL, true, true);
+	if (buf != NULL) {
+		buf_write_flush(&buf_arg);
+		idalloctm(tsdn, buf, NULL, NULL, true, true);
+	}
 
 	/* Reset global state. */
 	if (log_tables_initialized) {
diff --git a/src/prof_recent.c b/src/prof_recent.c
index a53f82c7..66a9b406 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -465,12 +465,17 @@ prof_recent_alloc_dump(tsd_t *tsd, void (*write_cb)(void *, const char *),
 	char *buf = (char *)iallocztm(tsd_tsdn(tsd), PROF_RECENT_PRINT_BUFSIZE,
 	    sz_size2index(PROF_RECENT_PRINT_BUFSIZE), false, NULL, true,
 	    arena_get(tsd_tsdn(tsd), 0, false), true);
-	buf_write_arg_t buf_arg;
-	buf_write_init(&buf_arg, write_cb, cbopaque, buf,
-	    PROF_RECENT_PRINT_BUFSIZE);
 	emitter_t emitter;
-	emitter_init(&emitter, emitter_output_json_compact, buf_write_cb,
-	    &buf_arg);
+	buf_write_arg_t buf_arg;
+	if (buf == NULL) {
+		emitter_init(&emitter, emitter_output_json_compact, write_cb,
+		    cbopaque);
+	} else {
+		buf_write_init(&buf_arg, write_cb, cbopaque, buf,
+		    PROF_RECENT_PRINT_BUFSIZE);
+		emitter_init(&emitter, emitter_output_json_compact,
+		    buf_write_cb, &buf_arg);
+	}
 	emitter_begin(&emitter);
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
@@ -530,8 +535,10 @@ prof_recent_alloc_dump(tsd_t *tsd, void (*write_cb)(void *, const char *),
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 
 	emitter_end(&emitter);
-	buf_write_flush(&buf_arg);
-	idalloctm(tsd_tsdn(tsd), buf, NULL, NULL, true, true);
+	if (buf != NULL) {
+		buf_write_flush(&buf_arg);
+		idalloctm(tsd_tsdn(tsd), buf, NULL, NULL, true, true);
+	}
 }
 #undef PROF_RECENT_PRINT_BUFSIZE
 

From 6a622867cac04d7cdd4cf9cf19b7a367f9108fa5 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 22 Jan 2020 11:13:26 -0800
Subject: [PATCH 1509/2608] Add "thread.idle" mallctl.

This can encapsulate various internal cleaning logic, and can be used to free up
resources before a long sleep.
---
 doc/jemalloc.xml.in | 22 ++++++++++++++
 src/ctl.c           | 45 +++++++++++++++++++++++++++-
 test/unit/mallctl.c | 72 ++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 137 insertions(+), 2 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 76edab81..b0a3f6cf 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1654,6 +1654,28 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         default.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="thread.idle">
+        <term>
+          <mallctl>thread.idle</mallctl>
+          (<type>void</type>)
+          <literal>--</literal>
+        </term>
+        <listitem><para>Hints to jemalloc that the calling thread will be idle
+	for some nontrivial period of time (say, on the order of seconds), and
+	that doing some cleanup operations may be beneficial.  There are no
+	guarantees as to what specific operations will be performed; currently
+	this flushes the caller's tcache and may (according to some heuristic)
+	purge its associated arena.</para>
+	<para>This is not intended to be a general-purpose background activity
+	mechanism, and threads should not wake up multiple times solely to call
+	it.  Rather, a thread waiting for a task should do a timed wait first,
+	call <link linkend="thread.idle"><mallctl>thread.idle</mallctl><link> if
+	no task appears in the timeout interval, and then do an untimed wait.
+	For such a background activity mechanism, see
+	<link linked="background_thread"><mallctl>background_thread</mallctl></link>.
+	</para></listitem>
+      </varlistentry>
+
       <varlistentry id="tcache.create">
         <term>
           <mallctl>tcache.create</mallctl>
diff --git a/src/ctl.c b/src/ctl.c
index 5a467d5a..bbe962c8 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -68,6 +68,7 @@ CTL_PROTO(thread_allocated)
 CTL_PROTO(thread_allocatedp)
 CTL_PROTO(thread_deallocated)
 CTL_PROTO(thread_deallocatedp)
+CTL_PROTO(thread_idle)
 CTL_PROTO(config_cache_oblivious)
 CTL_PROTO(config_debug)
 CTL_PROTO(config_fill)
@@ -293,7 +294,8 @@ static const ctl_named_node_t	thread_node[] = {
 	{NAME("deallocated"),	CTL(thread_deallocated)},
 	{NAME("deallocatedp"),	CTL(thread_deallocatedp)},
 	{NAME("tcache"),	CHILD(named, thread_tcache)},
-	{NAME("prof"),		CHILD(named, thread_prof)}
+	{NAME("prof"),		CHILD(named, thread_prof)},
+	{NAME("idle"),		CTL(thread_idle)}
 };
 
 static const ctl_named_node_t	config_node[] = {
@@ -1900,6 +1902,12 @@ thread_tcache_flush_ctl(tsd_t *tsd, const size_t *mib,
 		goto label_return;
 	}
 
+	/*
+	 * Slightly counterintuitively, READONLY() really just requires that the
+	 * call isn't trying to write, and WRITEONLY() just requires that it
+	 * isn't trying to read; hence, adding both requires that the operation
+	 * is neither a read nor a write.
+	 */
 	READONLY();
 	WRITEONLY();
 
@@ -1971,6 +1979,41 @@ label_return:
 	return ret;
 }
 
+static int
+thread_idle_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
+    size_t newlen) {
+	int ret;
+
+	/* See the comment in thread_tcache_flush_ctl. */
+	READONLY();
+	WRITEONLY();
+
+	if (tcache_available(tsd)) {
+		tcache_flush(tsd);
+	}
+	/*
+	 * This heuristic is perhaps not the most well-considered.  But it
+	 * matches the only idling policy we have experience with in the status
+	 * quo.  Over time we should investigate more principled approaches.
+	 */
+	if (opt_narenas > ncpus * 2) {
+		arena_t *arena = arena_choose(tsd, NULL);
+		if (arena != NULL) {
+			arena_decay(tsd_tsdn(tsd), arena, false, true);
+		}
+		/*
+		 * The missing arena case is not actually an error; a thread
+		 * might be idle before it associates itself to one.  This is
+		 * unusual, but not wrong.
+		 */
+	}
+
+	ret = 0;
+label_return:
+	return ret;
+}
+
 /******************************************************************************/
 
 static int
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index d317b4af..da1716a3 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -882,6 +882,75 @@ TEST_BEGIN(test_hooks_exhaustion) {
 }
 TEST_END
 
+TEST_BEGIN(test_thread_idle) {
+	/*
+	 * We're cheating a little bit in this test, and inferring things about
+	 * implementation internals (like tcache details).  We have to;
+	 * thread.idle has no guaranteed effects.  We need stats to make these
+	 * inferences.
+	 */
+	test_skip_if(!config_stats);
+
+	int err;
+	size_t sz;
+	size_t miblen;
+
+	bool tcache_enabled = false;
+	sz = sizeof(tcache_enabled);
+	err = mallctl("thread.tcache.enabled", &tcache_enabled, &sz, NULL, 0);
+	assert_d_eq(err, 0, "");
+	test_skip_if(!tcache_enabled);
+
+	size_t tcache_max;
+	sz = sizeof(tcache_max);
+	err = mallctl("arenas.tcache_max", &tcache_max, &sz, NULL, 0);
+	assert_d_eq(err, 0, "");
+	test_skip_if(tcache_max == 0);
+
+	unsigned arena_ind;
+	sz = sizeof(arena_ind);
+	err = mallctl("thread.arena", &arena_ind, &sz, NULL, 0);
+	assert_d_eq(err, 0, "");
+
+	/* We're going to do an allocation of size 1, which we know is small. */
+	size_t mib[5];
+	miblen = sizeof(mib)/sizeof(mib[0]);
+	err = mallctlnametomib("stats.arenas.0.small.ndalloc", mib, &miblen);
+	assert_d_eq(err, 0, "");
+	mib[2] = arena_ind;
+
+	/*
+	 * This alloc and dalloc should leave something in the tcache, in a
+	 * small size's cache bin.
+	 */
+	void *ptr = mallocx(1, 0);
+	dallocx(ptr, 0);
+
+	uint64_t epoch;
+	err = mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch));
+	assert_d_eq(err, 0, "");
+
+	uint64_t small_dalloc_pre_idle;
+	sz = sizeof(small_dalloc_pre_idle);
+	err = mallctlbymib(mib, miblen, &small_dalloc_pre_idle, &sz, NULL, 0);
+	assert_d_eq(err, 0, "");
+
+	err = mallctl("thread.idle", NULL, NULL, NULL, 0);
+	assert_d_eq(err, 0, "");
+
+	err = mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch));
+	assert_d_eq(err, 0, "");
+
+	uint64_t small_dalloc_post_idle;
+	sz = sizeof(small_dalloc_post_idle);
+	err = mallctlbymib(mib, miblen, &small_dalloc_post_idle, &sz, NULL, 0);
+	assert_d_eq(err, 0, "");
+
+	assert_u64_lt(small_dalloc_pre_idle, small_dalloc_post_idle,
+	    "Purge didn't flush the tcache");
+}
+TEST_END
+
 int
 main(void) {
 	return test(
@@ -913,5 +982,6 @@ main(void) {
 	    test_prof_active,
 	    test_stats_arenas,
 	    test_hooks,
-	    test_hooks_exhaustion);
+	    test_hooks_exhaustion,
+	    test_thread_idle);
 }

From d92f0175c75b5c9d9fc2bccabd2af0e6ebce7757 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 22 Jan 2020 14:59:28 -0800
Subject: [PATCH 1510/2608] Introduce NEITHER_READ_NOR_WRITE in ctl.

This is slightly clearer in meaning.  A function that is both READONLY() and
WRITEONLY() is in fact neither one.
---
 src/ctl.c | 32 +++++++++++++++-----------------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index bbe962c8..d357b383 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1451,6 +1451,7 @@ ctl_mtx_assert_held(tsdn_t *tsdn) {
 	}								\
 } while (0)
 
+/* Can read or write, but not both. */
 #define READ_XOR_WRITE()	do {					\
 	if ((oldp != NULL && oldlenp != NULL) && (newp != NULL ||	\
 	    newlen != 0)) {						\
@@ -1459,6 +1460,15 @@ ctl_mtx_assert_held(tsdn_t *tsdn) {
 	}								\
 } while (0)
 
+/* Can neither read nor write. */
+#define NEITHER_READ_NOR_WRITE()	do {				\
+	if (oldp != NULL || oldlenp != NULL || newp != NULL ||		\
+	    newlen != 0) {						\
+		ret = EPERM;						\
+		goto label_return;					\
+	}								\
+} while (0)
+
 #define READ(v, t)	do {						\
 	if (oldp != NULL && oldlenp != NULL) {				\
 		if (*oldlenp != sizeof(t)) {				\
@@ -1902,14 +1912,7 @@ thread_tcache_flush_ctl(tsd_t *tsd, const size_t *mib,
 		goto label_return;
 	}
 
-	/*
-	 * Slightly counterintuitively, READONLY() really just requires that the
-	 * call isn't trying to write, and WRITEONLY() just requires that it
-	 * isn't trying to read; hence, adding both requires that the operation
-	 * is neither a read nor a write.
-	 */
-	READONLY();
-	WRITEONLY();
+	NEITHER_READ_NOR_WRITE();
 
 	tcache_flush(tsd);
 
@@ -1985,9 +1988,7 @@ thread_idle_ctl(tsd_t *tsd, const size_t *mib,
     size_t newlen) {
 	int ret;
 
-	/* See the comment in thread_tcache_flush_ctl. */
-	READONLY();
-	WRITEONLY();
+	NEITHER_READ_NOR_WRITE();
 
 	if (tcache_available(tsd)) {
 		tcache_flush(tsd);
@@ -2151,8 +2152,7 @@ arena_i_decay_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	int ret;
 	unsigned arena_ind;
 
-	READONLY();
-	WRITEONLY();
+	NEITHER_READ_NOR_WRITE();
 	MIB_UNSIGNED(arena_ind, 1);
 	arena_i_decay(tsd_tsdn(tsd), arena_ind, false);
 
@@ -2167,8 +2167,7 @@ arena_i_purge_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	int ret;
 	unsigned arena_ind;
 
-	READONLY();
-	WRITEONLY();
+	NEITHER_READ_NOR_WRITE();
 	MIB_UNSIGNED(arena_ind, 1);
 	arena_i_decay(tsd_tsdn(tsd), arena_ind, true);
 
@@ -2183,8 +2182,7 @@ arena_i_reset_destroy_helper(tsd_t *tsd, const size_t *mib, size_t miblen,
     arena_t **arena) {
 	int ret;
 
-	READONLY();
-	WRITEONLY();
+	NEITHER_READ_NOR_WRITE();
 	MIB_UNSIGNED(*arena_ind, 1);
 
 	*arena = arena_get(tsd_tsdn(tsd), *arena_ind, false);

From ea351a7b52430de88007bf16f354a132da311c5b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 23 Jan 2020 16:05:37 -0800
Subject: [PATCH 1511/2608] Fix syntax errors in doc for thread.idle.

---
 doc/jemalloc.xml.in | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index b0a3f6cf..802c64ad 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1669,10 +1669,10 @@ malloc_conf = "xmalloc:true";]]></programlisting>
 	<para>This is not intended to be a general-purpose background activity
 	mechanism, and threads should not wake up multiple times solely to call
 	it.  Rather, a thread waiting for a task should do a timed wait first,
-	call <link linkend="thread.idle"><mallctl>thread.idle</mallctl><link> if
-	no task appears in the timeout interval, and then do an untimed wait.
+	call <link linkend="thread.idle"><mallctl>thread.idle</mallctl></link>
+	if no task appears in the timeout interval, and then do an untimed wait.
 	For such a background activity mechanism, see
-	<link linked="background_thread"><mallctl>background_thread</mallctl></link>.
+	<link linkend="background_thread"><mallctl>background_thread</mallctl></link>.
 	</para></listitem>
       </varlistentry>
 

From d71a145ec1bb8153c3d69be27eea5b076d59abfe Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 23 Jan 2020 13:18:04 -0800
Subject: [PATCH 1512/2608] Chagne prof_accum_t to counter_accum_t for general
 purpose.

---
 Makefile.in                                   |  1 +
 include/jemalloc/internal/arena_structs.h     |  3 +-
 include/jemalloc/internal/counter.h           | 83 +++++++++++++++++++
 include/jemalloc/internal/prof_externs.h      |  2 +-
 include/jemalloc/internal/prof_structs.h      |  9 --
 include/jemalloc/internal/prof_types.h        |  1 -
 include/jemalloc/internal/witness.h           |  2 +-
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |  1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |  3 +
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |  1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |  3 +
 src/arena.c                                   |  2 +-
 src/counter.c                                 | 22 +++++
 src/prof.c                                    | 76 ++---------------
 14 files changed, 125 insertions(+), 84 deletions(-)
 create mode 100644 include/jemalloc/internal/counter.h
 create mode 100644 src/counter.c

diff --git a/Makefile.in b/Makefile.in
index 24ab5421..37941ea1 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -103,6 +103,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/bitmap.c \
 	$(srcroot)src/buf_writer.c \
 	$(srcroot)src/ckh.c \
+	$(srcroot)src/counter.c \
 	$(srcroot)src/ctl.c \
 	$(srcroot)src/div.c \
 	$(srcroot)src/ecache.c \
diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index 48d13b8c..2d5c5680 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -5,6 +5,7 @@
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/bin.h"
 #include "jemalloc/internal/bitmap.h"
+#include "jemalloc/internal/counter.h"
 #include "jemalloc/internal/ecache.h"
 #include "jemalloc/internal/edata_cache.h"
 #include "jemalloc/internal/extent_dss.h"
@@ -117,7 +118,7 @@ struct arena_s {
 	malloc_mutex_t				tcache_ql_mtx;
 
 	/* Synchronization: internal. */
-	prof_accum_t		prof_accum;
+	counter_accum_t		prof_accum;
 
 	/*
 	 * Extent serial number generator state.
diff --git a/include/jemalloc/internal/counter.h b/include/jemalloc/internal/counter.h
new file mode 100644
index 00000000..302e3504
--- /dev/null
+++ b/include/jemalloc/internal/counter.h
@@ -0,0 +1,83 @@
+#ifndef JEMALLOC_INTERNAL_COUNTER_H
+#define JEMALLOC_INTERNAL_COUNTER_H
+
+#include "jemalloc/internal/mutex.h"
+
+typedef struct counter_accum_s {
+#ifndef JEMALLOC_ATOMIC_U64
+	malloc_mutex_t	mtx;
+	uint64_t	accumbytes;
+#else
+	atomic_u64_t	accumbytes;
+#endif
+	uint64_t	interval;
+} counter_accum_t;
+
+JEMALLOC_ALWAYS_INLINE bool
+counter_accum(tsdn_t *tsdn, counter_accum_t *counter, uint64_t accumbytes) {
+	bool overflow;
+	uint64_t a0, a1;
+
+	/*
+	 * If the event moves fast enough (and/or if the event handling is slow
+	 * enough), extreme overflow here (a1 >= interval * 2) can cause counter
+	 * trigger coalescing.  This is an intentional mechanism that avoids
+	 * rate-limiting allocation.
+	 */
+	uint64_t interval = counter->interval;
+	assert(interval > 0);
+#ifdef JEMALLOC_ATOMIC_U64
+	a0 = atomic_load_u64(&counter->accumbytes, ATOMIC_RELAXED);
+	do {
+		a1 = a0 + accumbytes;
+		assert(a1 >= a0);
+		overflow = (a1 >= interval);
+		if (overflow) {
+			a1 %= interval;
+		}
+	} while (!atomic_compare_exchange_weak_u64(&counter->accumbytes, &a0, a1,
+	    ATOMIC_RELAXED, ATOMIC_RELAXED));
+#else
+	malloc_mutex_lock(tsdn, &counter->mtx);
+	a0 = counter->accumbytes;
+	a1 = a0 + accumbytes;
+	overflow = (a1 >= interval);
+	if (overflow) {
+		a1 %= interval;
+	}
+	counter->accumbytes = a1;
+	malloc_mutex_unlock(tsdn, &counter->mtx);
+#endif
+	return overflow;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+counter_rollback(tsdn_t *tsdn, counter_accum_t *counter, size_t usize) {
+	/*
+	 * Cancel out as much of the excessive accumbytes increase as possible
+	 * without underflowing.  Interval-triggered events occur slightly more
+	 * often than intended as a result of incomplete canceling.
+	 */
+	uint64_t a0, a1;
+#ifdef JEMALLOC_ATOMIC_U64
+	a0 = atomic_load_u64(&counter->accumbytes,
+	    ATOMIC_RELAXED);
+	do {
+		a1 = (a0 >= SC_LARGE_MINCLASS - usize)
+		    ? a0 - (SC_LARGE_MINCLASS - usize) : 0;
+	} while (!atomic_compare_exchange_weak_u64(
+	    &counter->accumbytes, &a0, a1, ATOMIC_RELAXED,
+	    ATOMIC_RELAXED));
+#else
+	malloc_mutex_lock(tsdn, &counter->mtx);
+	a0 = counter->accumbytes;
+	a1 = (a0 >= SC_LARGE_MINCLASS - usize)
+	    ?  a0 - (SC_LARGE_MINCLASS - usize) : 0;
+	counter->accumbytes = a1;
+	malloc_mutex_unlock(tsdn, &counter->mtx);
+#endif
+}
+
+bool counter_accum_init(counter_accum_t *counter, uint64_t interval);
+
+#endif /* JEMALLOC_INTERNAL_COUNTER_H */
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index df4f7cd8..36571c8c 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -73,7 +73,7 @@ void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
 #endif
 int prof_getpid(void);
 void prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind);
-bool prof_accum_init(tsdn_t *tsdn);
+bool prof_accum_init(void);
 void prof_idump(tsdn_t *tsdn);
 bool prof_mdump(tsd_t *tsd, const char *filename);
 void prof_gdump(tsdn_t *tsdn);
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index ee78643e..977eb1c8 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -21,15 +21,6 @@ typedef struct {
 } prof_unwind_data_t;
 #endif
 
-struct prof_accum_s {
-#ifndef JEMALLOC_ATOMIC_U64
-	malloc_mutex_t	mtx;
-	uint64_t	accumbytes;
-#else
-	atomic_u64_t	accumbytes;
-#endif
-};
-
 struct prof_cnt_s {
 	/* Profiling counters. */
 	uint64_t	curobjs;
diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h
index 498962db..4abe5b58 100644
--- a/include/jemalloc/internal/prof_types.h
+++ b/include/jemalloc/internal/prof_types.h
@@ -2,7 +2,6 @@
 #define JEMALLOC_INTERNAL_PROF_TYPES_H
 
 typedef struct prof_bt_s prof_bt_t;
-typedef struct prof_accum_s prof_accum_t;
 typedef struct prof_cnt_s prof_cnt_t;
 typedef struct prof_tctx_s prof_tctx_t;
 typedef struct prof_info_s prof_info_t;
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 4ed787a2..083bdcc9 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -54,9 +54,9 @@
 #define WITNESS_RANK_LEAF		0xffffffffU
 #define WITNESS_RANK_BIN		WITNESS_RANK_LEAF
 #define WITNESS_RANK_ARENA_STATS	WITNESS_RANK_LEAF
+#define WITNESS_RANK_COUNTER_ACCUM	WITNESS_RANK_LEAF
 #define WITNESS_RANK_DSS		WITNESS_RANK_LEAF
 #define WITNESS_RANK_PROF_ACTIVE	WITNESS_RANK_LEAF
-#define WITNESS_RANK_PROF_ACCUM		WITNESS_RANK_LEAF
 #define WITNESS_RANK_PROF_DUMP_FILENAME	WITNESS_RANK_LEAF
 #define WITNESS_RANK_PROF_GDUMP		WITNESS_RANK_LEAF
 #define WITNESS_RANK_PROF_NEXT_THR_UID	WITNESS_RANK_LEAF
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 4b25b856..d8b48986 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -43,6 +43,7 @@
     <ClCompile Include="..\..\..\..\src\bitmap.c" />
     <ClCompile Include="..\..\..\..\src\buf_writer.c" />
     <ClCompile Include="..\..\..\..\src\ckh.c" />
+    <ClCompile Include="..\..\..\..\src\counter.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
     <ClCompile Include="..\..\..\..\src\div.c" />
     <ClCompile Include="..\..\..\..\src\ecache.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 73ee8d1d..404adbe5 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -28,6 +28,9 @@
     <ClCompile Include="..\..\..\..\src\ckh.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\counter.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\ctl.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index ed6f618d..b0d32d93 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -43,6 +43,7 @@
     <ClCompile Include="..\..\..\..\src\bitmap.c" />
     <ClCompile Include="..\..\..\..\src\buf_writer.c" />
     <ClCompile Include="..\..\..\..\src\ckh.c" />
+    <ClCompile Include="..\..\..\..\src\counter.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
     <ClCompile Include="..\..\..\..\src\div.c" />
     <ClCompile Include="..\..\..\..\src\ecache.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 73ee8d1d..404adbe5 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -28,6 +28,9 @@
     <ClCompile Include="..\..\..\..\src\ckh.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\counter.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\ctl.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/arena.c b/src/arena.c
index d04712a5..9558bb40 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1988,7 +1988,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	}
 
 	if (config_prof) {
-		if (prof_accum_init(tsdn)) {
+		if (prof_accum_init()) {
 			goto label_error;
 		}
 	}
diff --git a/src/counter.c b/src/counter.c
new file mode 100644
index 00000000..1b8201e5
--- /dev/null
+++ b/src/counter.c
@@ -0,0 +1,22 @@
+#define JEMALLOC_COUNTER_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/counter.h"
+
+bool
+counter_accum_init(counter_accum_t *counter, uint64_t interval) {
+#ifndef JEMALLOC_ATOMIC_U64
+	if (malloc_mutex_init(&counter->mtx, "counter_accum",
+	    WITNESS_RANK_COUNTER_ACCUM, malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+	counter->accumbytes = 0;
+#else
+	atomic_store_u64(&counter->accumbytes, 0,
+	    ATOMIC_RELAXED);
+#endif
+	counter->interval = interval;
+
+	return false;
+}
diff --git a/src/prof.c b/src/prof.c
index 791c362f..649e9ca2 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -5,6 +5,7 @@
 #include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/counter.h"
 #include "jemalloc/internal/prof_data.h"
 #include "jemalloc/internal/prof_log.h"
 #include "jemalloc/internal/prof_recent.h"
@@ -49,7 +50,7 @@ bool opt_prof_accum = false;
 char opt_prof_prefix[PROF_DUMP_FILENAME_LEN];
 
 /* Accessed via prof_idump_[accum/rollback](). */
-static prof_accum_t prof_idump_accumulated;
+static counter_accum_t prof_idump_accumulated;
 
 /*
  * Initialized as opt_prof_active, and accessed via
@@ -553,89 +554,24 @@ prof_fdump(void) {
 }
 
 bool
-prof_accum_init(tsdn_t *tsdn) {
+prof_accum_init(void) {
 	cassert(config_prof);
 
-#ifndef JEMALLOC_ATOMIC_U64
-	if (malloc_mutex_init(&prof_idump_accumulated.mtx, "prof_accum",
-	    WITNESS_RANK_PROF_ACCUM, malloc_mutex_rank_exclusive)) {
-		return true;
-	}
-	prof_idump_accumulated.accumbytes = 0;
-#else
-	atomic_store_u64(&prof_idump_accumulated.accumbytes, 0,
-	    ATOMIC_RELAXED);
-#endif
-	return false;
+	return counter_accum_init(&prof_idump_accumulated, prof_interval);
 }
 
 bool
 prof_idump_accum_impl(tsdn_t *tsdn, uint64_t accumbytes) {
 	cassert(config_prof);
 
-	bool overflow;
-	uint64_t a0, a1;
-
-	/*
-	 * If the application allocates fast enough (and/or if idump is slow
-	 * enough), extreme overflow here (a1 >= prof_interval * 2) can cause
-	 * idump trigger coalescing.  This is an intentional mechanism that
-	 * avoids rate-limiting allocation.
-	 */
-#ifdef JEMALLOC_ATOMIC_U64
-	a0 = atomic_load_u64(&prof_idump_accumulated.accumbytes,
-	    ATOMIC_RELAXED);
-	do {
-		a1 = a0 + accumbytes;
-		assert(a1 >= a0);
-		overflow = (a1 >= prof_interval);
-		if (overflow) {
-			a1 %= prof_interval;
-		}
-	} while (!atomic_compare_exchange_weak_u64(
-	    &prof_idump_accumulated.accumbytes, &a0, a1, ATOMIC_RELAXED,
-	    ATOMIC_RELAXED));
-#else
-	malloc_mutex_lock(tsdn, &prof_idump_accumulated.mtx);
-	a0 = prof_idump_accumulated.accumbytes;
-	a1 = a0 + accumbytes;
-	overflow = (a1 >= prof_interval);
-	if (overflow) {
-		a1 %= prof_interval;
-	}
-	prof_idump_accumulated.accumbytes = a1;
-	malloc_mutex_unlock(tsdn, &prof_idump_accumulated.mtx);
-#endif
-	return overflow;
+	return counter_accum(tsdn, &prof_idump_accumulated, accumbytes);
 }
 
 void
 prof_idump_rollback_impl(tsdn_t *tsdn, size_t usize) {
 	cassert(config_prof);
 
-	/*
-	 * Cancel out as much of the excessive accumbytes increase as possible
-	 * without underflowing.  Interval-triggered dumps occur slightly more
-	 * often than intended as a result of incomplete canceling.
-	 */
-	uint64_t a0, a1;
-#ifdef JEMALLOC_ATOMIC_U64
-	a0 = atomic_load_u64(&prof_idump_accumulated.accumbytes,
-	    ATOMIC_RELAXED);
-	do {
-		a1 = (a0 >= SC_LARGE_MINCLASS - usize)
-		    ? a0 - (SC_LARGE_MINCLASS - usize) : 0;
-	} while (!atomic_compare_exchange_weak_u64(
-	    &prof_idump_accumulated.accumbytes, &a0, a1, ATOMIC_RELAXED,
-	    ATOMIC_RELAXED));
-#else
-	malloc_mutex_lock(tsdn, &prof_idump_accumulated.mtx);
-	a0 = prof_idump_accumulated.accumbytes;
-	a1 = (a0 >= SC_LARGE_MINCLASS - usize)
-	    ?  a0 - (SC_LARGE_MINCLASS - usize) : 0;
-	prof_idump_accumulated.accumbytes = a1;
-	malloc_mutex_unlock(tsdn, &prof_idump_accumulated.mtx);
-#endif
+	return counter_rollback(tsdn, &prof_idump_accumulated, usize);
 }
 
 bool

From 88b0e03a4e081d3d9c1bdf369345679f9e23b983 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 13 Jan 2020 22:29:17 -0800
Subject: [PATCH 1513/2608] Implement opt.stats_interval and the _opts options.

Add options stats_interval and stats_interval_opts to allow interval based stats
printing.  This provides an easy way to collect stats without code changes,
because opt.stats_print may not work (some binaries never exit).
---
 Makefile.in                              |   1 +
 doc/jemalloc.xml.in                      |  35 +++++++
 include/jemalloc/internal/counter.h      |  14 ++-
 include/jemalloc/internal/emitter.h      |   4 +
 include/jemalloc/internal/stats.h        |  18 ++++
 include/jemalloc/internal/thread_event.h |   6 +-
 include/jemalloc/internal/tsd.h          |   4 +
 src/ctl.c                                |   6 ++
 src/jemalloc.c                           |  82 ++++++++-------
 src/prof.c                               |   5 +-
 src/stats.c                              |  52 ++++++++-
 src/thread_event.c                       |  26 ++++-
 test/unit/counter.c                      | 128 +++++++++++++++++++++++
 test/unit/mallctl.c                      |   3 +
 14 files changed, 334 insertions(+), 50 deletions(-)
 create mode 100644 test/unit/counter.c

diff --git a/Makefile.in b/Makefile.in
index 37941ea1..eda9c7a9 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -191,6 +191,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/buf_writer.c \
 	$(srcroot)test/unit/cache_bin.c \
 	$(srcroot)test/unit/ckh.c \
+	$(srcroot)test/unit/counter.c \
 	$(srcroot)test/unit/decay.c \
 	$(srcroot)test/unit/div.c \
 	$(srcroot)test/unit/emitter.c \
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 802c64ad..1baf1f6a 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1185,6 +1185,41 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         enabled.  The default is <quote></quote>.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.stats_interval">
+        <term>
+          <mallctl>opt.stats_interval</mallctl>
+          (<type>int64_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Average interval between statistics outputs, as measured
+        in bytes of allocation activity.  The actual interval may be sporadic
+        because decentralized event counters are used to avoid synchronization
+        bottlenecks.  The output may be triggered on any thread, which then
+        calls <function>malloc_stats_print()</function>.  <link
+        linkend="opt.stats_interval_opts"><mallctl>opt.stats_interval_opts</mallctl></link>
+        can be combined to specify output options.  By default,
+        interval-triggered stats output is disabled (encoded as
+        -1).</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="opt.stats_interval_opts">
+        <term>
+          <mallctl>opt.stats_interval_opts</mallctl>
+          (<type>const char *</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Options (the <parameter>opts</parameter> string) to pass
+        to the <function>malloc_stats_print()</function> for interval based
+	statistics printing (enabled
+        through <link
+        linkend="opt.stats_interval"><mallctl>opt.stats_interval</mallctl></link>). See
+        available options in <link
+        linkend="malloc_stats_print_opts"><function>malloc_stats_print()</function></link>.
+        Has no effect unless <link
+        linkend="opt.stats_interval"><mallctl>opt.stats_interval</mallctl></link> is
+        enabled.  The default is <quote></quote>.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="opt.junk">
         <term>
           <mallctl>opt.junk</mallctl>
diff --git a/include/jemalloc/internal/counter.h b/include/jemalloc/internal/counter.h
index 302e3504..4aee23dd 100644
--- a/include/jemalloc/internal/counter.h
+++ b/include/jemalloc/internal/counter.h
@@ -6,11 +6,11 @@
 typedef struct counter_accum_s {
 #ifndef JEMALLOC_ATOMIC_U64
 	malloc_mutex_t	mtx;
-	uint64_t	accumbytes;
+	uint64_t accumbytes;
 #else
-	atomic_u64_t	accumbytes;
+	atomic_u64_t accumbytes;
 #endif
-	uint64_t	interval;
+	uint64_t interval;
 } counter_accum_t;
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -52,7 +52,7 @@ counter_accum(tsdn_t *tsdn, counter_accum_t *counter, uint64_t accumbytes) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-counter_rollback(tsdn_t *tsdn, counter_accum_t *counter, size_t usize) {
+counter_rollback(tsdn_t *tsdn, counter_accum_t *counter, uint64_t bytes) {
 	/*
 	 * Cancel out as much of the excessive accumbytes increase as possible
 	 * without underflowing.  Interval-triggered events occur slightly more
@@ -63,16 +63,14 @@ counter_rollback(tsdn_t *tsdn, counter_accum_t *counter, size_t usize) {
 	a0 = atomic_load_u64(&counter->accumbytes,
 	    ATOMIC_RELAXED);
 	do {
-		a1 = (a0 >= SC_LARGE_MINCLASS - usize)
-		    ? a0 - (SC_LARGE_MINCLASS - usize) : 0;
+		a1 = (a0 >= bytes) ? a0 - bytes : 0;
 	} while (!atomic_compare_exchange_weak_u64(
 	    &counter->accumbytes, &a0, a1, ATOMIC_RELAXED,
 	    ATOMIC_RELAXED));
 #else
 	malloc_mutex_lock(tsdn, &counter->mtx);
 	a0 = counter->accumbytes;
-	a1 = (a0 >= SC_LARGE_MINCLASS - usize)
-	    ?  a0 - (SC_LARGE_MINCLASS - usize) : 0;
+	a1 = (a0 >= bytes) ?  a0 - bytes : 0;
 	counter->accumbytes = a1;
 	malloc_mutex_unlock(tsdn, &counter->mtx);
 #endif
diff --git a/include/jemalloc/internal/emitter.h b/include/jemalloc/internal/emitter.h
index 009bf9ac..c3f47b29 100644
--- a/include/jemalloc/internal/emitter.h
+++ b/include/jemalloc/internal/emitter.h
@@ -22,6 +22,7 @@ typedef enum emitter_type_e emitter_type_t;
 enum emitter_type_e {
 	emitter_type_bool,
 	emitter_type_int,
+	emitter_type_int64,
 	emitter_type_unsigned,
 	emitter_type_uint32,
 	emitter_type_uint64,
@@ -149,6 +150,9 @@ emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width,
 	case emitter_type_int:
 		EMIT_SIMPLE(int, "%d")
 		break;
+	case emitter_type_int64:
+		EMIT_SIMPLE(int64_t, "%" FMTd64)
+		break;
 	case emitter_type_unsigned:
 		EMIT_SIMPLE(unsigned, "%u")
 		break;
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index 3b9e0eac..d1f5eab7 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -24,8 +24,26 @@ enum {
 extern bool opt_stats_print;
 extern char opt_stats_print_opts[stats_print_tot_num_options+1];
 
+/* Utilities for stats_interval. */
+extern int64_t opt_stats_interval;
+extern char opt_stats_interval_opts[stats_print_tot_num_options+1];
+
+#define STATS_INTERVAL_DEFAULT -1
+/*
+ * Batch-increment the counter to reduce synchronization overhead.  Each thread
+ * merges after (interval >> LG_BATCH_SIZE) bytes of allocations; also limit the
+ * BATCH_MAX for accuracy when the interval is huge (which is expected).
+ */
+#define STATS_INTERVAL_ACCUM_LG_BATCH_SIZE 6
+#define STATS_INTERVAL_ACCUM_BATCH_MAX (4 << 20)
+
+uint64_t stats_interval_accum_batch_size(void);
+bool stats_interval_accum(tsd_t *tsd, uint64_t bytes);
+
 /* Implements je_malloc_stats_print. */
 void stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
     const char *opts);
 
+bool stats_boot(void);
+
 #endif /* JEMALLOC_INTERNAL_STATS_H */
diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index 3ceb4702..454c689b 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -36,7 +36,8 @@ void tsd_thread_event_init(tsd_t *tsd);
  */
 #define ITERATE_OVER_ALL_EVENTS						\
     E(tcache_gc,	(TCACHE_GC_INCR_BYTES > 0))			\
-    E(prof_sample,	(config_prof && opt_prof))
+    E(prof_sample,	(config_prof && opt_prof))	    		\
+    E(stats_interval,	(opt_stats_interval >= 0))
 
 #define E(event, condition)						\
     C(event##_event_wait)
@@ -46,7 +47,8 @@ void tsd_thread_event_init(tsd_t *tsd);
     C(thread_allocated)							\
     C(thread_allocated_last_event)					\
     ITERATE_OVER_ALL_EVENTS						\
-    C(prof_sample_last_event)
+    C(prof_sample_last_event)						\
+    C(stats_interval_last_event)
 
 /* Getters directly wrap TSD getters. */
 #define C(counter)							\
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 3465a2d4..576fa440 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -87,6 +87,8 @@ typedef void (*test_callback_t)(int *);
     O(tcache_gc_event_wait,	uint64_t,		uint64_t)	\
     O(prof_sample_event_wait,	uint64_t,		uint64_t)	\
     O(prof_sample_last_event,	uint64_t,		uint64_t)	\
+    O(stats_interval_event_wait,	uint64_t,	uint64_t)	\
+    O(stats_interval_last_event,	uint64_t,	uint64_t)	\
     O(prof_tdata,		prof_tdata_t *,		prof_tdata_t *)	\
     O(prng_state,		uint64_t,		uint64_t)	\
     O(iarena,			arena_t *,		arena_t *)	\
@@ -118,6 +120,8 @@ typedef void (*test_callback_t)(int *);
     /* tcache_gc_event_wait */		THREAD_EVENT_MIN_START_WAIT,	\
     /* prof_sample_event_wait */	THREAD_EVENT_MIN_START_WAIT,	\
     /* prof_sample_last_event */	0,				\
+    /* stats_interval_event_wait */	THREAD_EVENT_MIN_START_WAIT,	\
+    /* stats_interval_last_event */	0,				\
     /* prof_tdata */		NULL,					\
     /* prng_state */		0,					\
     /* iarena */		NULL,					\
diff --git a/src/ctl.c b/src/ctl.c
index d357b383..78f5df25 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -96,6 +96,8 @@ CTL_PROTO(opt_dirty_decay_ms)
 CTL_PROTO(opt_muzzy_decay_ms)
 CTL_PROTO(opt_stats_print)
 CTL_PROTO(opt_stats_print_opts)
+CTL_PROTO(opt_stats_interval)
+CTL_PROTO(opt_stats_interval_opts)
 CTL_PROTO(opt_junk)
 CTL_PROTO(opt_zero)
 CTL_PROTO(opt_utrace)
@@ -329,6 +331,8 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("muzzy_decay_ms"), CTL(opt_muzzy_decay_ms)},
 	{NAME("stats_print"),	CTL(opt_stats_print)},
 	{NAME("stats_print_opts"),	CTL(opt_stats_print_opts)},
+	{NAME("stats_interval"),	CTL(opt_stats_interval)},
+	{NAME("stats_interval_opts"),	CTL(opt_stats_interval_opts)},
 	{NAME("junk"),		CTL(opt_junk)},
 	{NAME("zero"),		CTL(opt_zero)},
 	{NAME("utrace"),	CTL(opt_utrace)},
@@ -1791,6 +1795,8 @@ CTL_RO_NL_GEN(opt_dirty_decay_ms, opt_dirty_decay_ms, ssize_t)
 CTL_RO_NL_GEN(opt_muzzy_decay_ms, opt_muzzy_decay_ms, ssize_t)
 CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool)
 CTL_RO_NL_GEN(opt_stats_print_opts, opt_stats_print_opts, const char *)
+CTL_RO_NL_GEN(opt_stats_interval, opt_stats_interval, int64_t)
+CTL_RO_NL_GEN(opt_stats_interval_opts, opt_stats_interval_opts, const char *)
 CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, const char *)
 CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool)
 CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 218e04a1..19767911 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -775,8 +775,8 @@ malloc_ncpus(void) {
 }
 
 static void
-init_opt_stats_print_opts(const char *v, size_t vlen) {
-	size_t opts_len = strlen(opt_stats_print_opts);
+init_opt_stats_opts(const char *v, size_t vlen, char *dest) {
+	size_t opts_len = strlen(dest);
 	assert(opts_len <= stats_print_tot_num_options);
 
 	for (size_t i = 0; i < vlen; i++) {
@@ -787,16 +787,16 @@ init_opt_stats_print_opts(const char *v, size_t vlen) {
 		default: continue;
 		}
 
-		if (strchr(opt_stats_print_opts, v[i]) != NULL) {
+		if (strchr(dest, v[i]) != NULL) {
 			/* Ignore repeated. */
 			continue;
 		}
 
-		opt_stats_print_opts[opts_len++] = v[i];
-		opt_stats_print_opts[opts_len] = '\0';
+		dest[opts_len++] = v[i];
+		dest[opts_len] = '\0';
 		assert(opts_len <= stats_print_tot_num_options);
 	}
-	assert(opts_len == strlen(opt_stats_print_opts));
+	assert(opts_len == strlen(dest));
 }
 
 /* Reads the next size pair in a multi-sized option. */
@@ -1118,39 +1118,47 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 #define CONF_CHECK_MIN(um, min)	((um) < (min))
 #define CONF_DONT_CHECK_MAX(um, max)	false
 #define CONF_CHECK_MAX(um, max)	((um) > (max))
-#define CONF_HANDLE_T_U(t, o, n, min, max, check_min, check_max, clip)	\
+
+#define CONF_HANDLE_T(t, max_t, o, n, min, max, check_min, check_max, clip) \
 			if (CONF_MATCH(n)) {				\
-				uintmax_t um;				\
+				max_t mv;				\
 				char *end;				\
 									\
 				set_errno(0);				\
-				um = malloc_strtoumax(v, &end, 0);	\
+				mv = (max_t)malloc_strtoumax(v, &end, 0); \
 				if (get_errno() != 0 || (uintptr_t)end -\
 				    (uintptr_t)v != vlen) {		\
 					CONF_ERROR("Invalid conf value",\
 					    k, klen, v, vlen);		\
 				} else if (clip) {			\
-					if (check_min(um, (t)(min))) {	\
+					if (check_min(mv, (t)(min))) {	\
 						o = (t)(min);		\
 					} else if (			\
-					    check_max(um, (t)(max))) {	\
+					    check_max(mv, (t)(max))) {	\
 						o = (t)(max);		\
 					} else {			\
-						o = (t)um;		\
+						o = (t)mv;		\
 					}				\
 				} else {				\
-					if (check_min(um, (t)(min)) ||	\
-					    check_max(um, (t)(max))) {	\
+					if (check_min(mv, (t)(min)) ||	\
+					    check_max(mv, (t)(max))) {	\
 						CONF_ERROR(		\
 						    "Out-of-range "	\
 						    "conf value",	\
 						    k, klen, v, vlen);	\
 					} else {			\
-						o = (t)um;		\
+						o = (t)mv;		\
 					}				\
 				}					\
 				CONF_CONTINUE;				\
 			}
+#define CONF_HANDLE_T_U(t, o, n, min, max, check_min, check_max, clip)	\
+	      CONF_HANDLE_T(t, uintmax_t, o, n, min, max, check_min,	\
+			    check_max, clip)
+#define CONF_HANDLE_T_SIGNED(t, o, n, min, max, check_min, check_max, clip)\
+	      CONF_HANDLE_T(t, intmax_t, o, n, min, max, check_min,	\
+			    check_max, clip)
+
 #define CONF_HANDLE_UNSIGNED(o, n, min, max, check_min, check_max,	\
     clip)								\
 			CONF_HANDLE_T_U(unsigned, o, n, min, max,	\
@@ -1158,27 +1166,12 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 #define CONF_HANDLE_SIZE_T(o, n, min, max, check_min, check_max, clip)	\
 			CONF_HANDLE_T_U(size_t, o, n, min, max,		\
 			    check_min, check_max, clip)
+#define CONF_HANDLE_INT64_T(o, n, min, max, check_min, check_max, clip)	\
+			CONF_HANDLE_T_SIGNED(int64_t, o, n, min, max,	\
+			    check_min, check_max, clip)
 #define CONF_HANDLE_SSIZE_T(o, n, min, max)				\
-			if (CONF_MATCH(n)) {				\
-				long l;					\
-				char *end;				\
-									\
-				set_errno(0);				\
-				l = strtol(v, &end, 0);			\
-				if (get_errno() != 0 || (uintptr_t)end -\
-				    (uintptr_t)v != vlen) {		\
-					CONF_ERROR("Invalid conf value",\
-					    k, klen, v, vlen);		\
-				} else if (l < (ssize_t)(min) || l >	\
-				    (ssize_t)(max)) {			\
-					CONF_ERROR(			\
-					    "Out-of-range conf value",	\
-					    k, klen, v, vlen);		\
-				} else {				\
-					o = l;				\
-				}					\
-				CONF_CONTINUE;				\
-			}
+			CONF_HANDLE_T_SIGNED(ssize_t, o, n, min, max,	\
+			    CONF_CHECK_MIN, CONF_CHECK_MAX, false)
 #define CONF_HANDLE_CHAR_P(o, n, d)					\
 			if (CONF_MATCH(n)) {				\
 				size_t cpylen = (vlen <=		\
@@ -1275,7 +1268,16 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    SSIZE_MAX);
 			CONF_HANDLE_BOOL(opt_stats_print, "stats_print")
 			if (CONF_MATCH("stats_print_opts")) {
-				init_opt_stats_print_opts(v, vlen);
+				init_opt_stats_opts(v, vlen,
+				    opt_stats_print_opts);
+				CONF_CONTINUE;
+			}
+			CONF_HANDLE_INT64_T(opt_stats_interval,
+			    "stats_interval", -1, INT64_MAX,
+			    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, false)
+			if (CONF_MATCH("stats_interval_opts")) {
+				init_opt_stats_opts(v, vlen,
+				    opt_stats_interval_opts);
 				CONF_CONTINUE;
 			}
 			if (config_fill) {
@@ -1463,7 +1465,9 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 #undef CONF_CHECK_MIN
 #undef CONF_DONT_CHECK_MAX
 #undef CONF_CHECK_MAX
+#undef CONF_HANDLE_T
 #undef CONF_HANDLE_T_U
+#undef CONF_HANDLE_T_SIGNED
 #undef CONF_HANDLE_UNSIGNED
 #undef CONF_HANDLE_SIZE_T
 #undef CONF_HANDLE_SSIZE_T
@@ -1545,7 +1549,6 @@ malloc_init_hard_a0_locked() {
 		prof_boot0();
 	}
 	malloc_conf_init(&sc_data, bin_shard_sizes);
-	thread_event_boot();
 	sz_boot(&sc_data);
 	bin_info_boot(&sc_data, bin_shard_sizes);
 
@@ -1558,6 +1561,10 @@ malloc_init_hard_a0_locked() {
 			}
 		}
 	}
+
+	if (stats_boot()) {
+		return true;
+	}
 	if (pages_boot()) {
 		return true;
 	}
@@ -1573,6 +1580,7 @@ malloc_init_hard_a0_locked() {
 	if (config_prof) {
 		prof_boot1();
 	}
+	thread_event_boot();
 	arena_boot(&sc_data);
 	if (tcache_boot(TSDN_NULL)) {
 		return true;
diff --git a/src/prof.c b/src/prof.c
index 649e9ca2..0d29c681 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -571,7 +571,10 @@ void
 prof_idump_rollback_impl(tsdn_t *tsdn, size_t usize) {
 	cassert(config_prof);
 
-	return counter_rollback(tsdn, &prof_idump_accumulated, usize);
+	/* Rollback is only done on arena_prof_promote of small sizes. */
+	assert(SC_LARGE_MINCLASS > usize);
+	return counter_rollback(tsdn, &prof_idump_accumulated,
+	    SC_LARGE_MINCLASS - usize);
 }
 
 bool
diff --git a/src/stats.c b/src/stats.c
index 41b990e2..dd31032d 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -50,6 +50,13 @@ const char *arena_mutex_names[mutex_prof_num_arena_mutexes] = {
 bool opt_stats_print = false;
 char opt_stats_print_opts[stats_print_tot_num_options+1] = "";
 
+int64_t opt_stats_interval = STATS_INTERVAL_DEFAULT;
+char opt_stats_interval_opts[stats_print_tot_num_options+1] = "";
+
+static counter_accum_t stats_interval_accumulated;
+/* Per thread batch accum size for stats_interval. */
+static uint64_t stats_interval_accum_batch;
+
 /******************************************************************************/
 
 static uint64_t
@@ -1000,14 +1007,16 @@ stats_general_print(emitter_t *emitter) {
 	unsigned uv;
 	uint32_t u32v;
 	uint64_t u64v;
+	int64_t i64v;
 	ssize_t ssv, ssv2;
-	size_t sv, bsz, usz, ssz, sssz, cpsz;
+	size_t sv, bsz, usz, i64sz, ssz, sssz, cpsz;
 
 	bsz = sizeof(bool);
 	usz = sizeof(unsigned);
 	ssz = sizeof(size_t);
 	sssz = sizeof(ssize_t);
 	cpsz = sizeof(const char *);
+	i64sz = sizeof(int64_t);
 
 	CTL_GET("version", &cpv, const char *);
 	emitter_kv(emitter, "version", "Version", emitter_type_string, &cpv);
@@ -1063,6 +1072,9 @@ stats_general_print(emitter_t *emitter) {
 #define OPT_WRITE_UNSIGNED(name)					\
 	OPT_WRITE(name, uv, usz, emitter_type_unsigned)
 
+#define OPT_WRITE_INT64(name)						\
+	OPT_WRITE(name, i64v, i64sz, emitter_type_int64)
+
 #define OPT_WRITE_SIZE_T(name)						\
 	OPT_WRITE(name, sv, ssz, emitter_type_size)
 #define OPT_WRITE_SSIZE_T(name)						\
@@ -1109,6 +1121,10 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_BOOL("prof_leak")
 	OPT_WRITE_BOOL("stats_print")
 	OPT_WRITE_CHAR_P("stats_print_opts")
+	OPT_WRITE_BOOL("stats_print")
+	OPT_WRITE_CHAR_P("stats_print_opts")
+	OPT_WRITE_INT64("stats_interval")
+	OPT_WRITE_CHAR_P("stats_interval_opts")
 	OPT_WRITE_CHAR_P("zero_realloc")
 
 	emitter_dict_end(emitter);
@@ -1477,3 +1493,37 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	emitter_table_printf(&emitter, "--- End jemalloc statistics ---\n");
 	emitter_end(&emitter);
 }
+
+bool
+stats_interval_accum(tsd_t *tsd, uint64_t bytes) {
+	return counter_accum(tsd_tsdn(tsd), &stats_interval_accumulated, bytes);
+}
+
+uint64_t
+stats_interval_accum_batch_size(void) {
+	return stats_interval_accum_batch;
+}
+
+bool
+stats_boot(void) {
+	uint64_t stats_interval;
+	if (opt_stats_interval < 0) {
+		assert(opt_stats_interval == -1);
+		stats_interval = 0;
+		stats_interval_accum_batch = 0;
+	} else{
+		/* See comments in stats.h */
+		stats_interval = (opt_stats_interval > 0) ?
+		    opt_stats_interval : 1;
+		uint64_t batch = stats_interval >>
+		    STATS_INTERVAL_ACCUM_LG_BATCH_SIZE;
+		if (batch > STATS_INTERVAL_ACCUM_BATCH_MAX) {
+			batch = STATS_INTERVAL_ACCUM_BATCH_MAX;
+		} else if (batch == 0) {
+			batch = 1;
+		}
+		stats_interval_accum_batch = batch;
+	}
+
+	return counter_accum_init(&stats_interval_accumulated, stats_interval);
+}
diff --git a/src/thread_event.c b/src/thread_event.c
index 0657c841..6aedf161 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -25,6 +25,7 @@ static void thread_##event##_event_handler(tsd_t *tsd);
 ITERATE_OVER_ALL_EVENTS
 #undef E
 
+/* (Re)Init functions. */
 static void
 tsd_thread_tcache_gc_event_init(tsd_t *tsd) {
 	assert(TCACHE_GC_INCR_BYTES > 0);
@@ -37,11 +38,19 @@ tsd_thread_prof_sample_event_init(tsd_t *tsd) {
 	prof_sample_threshold_update(tsd);
 }
 
+static void
+tsd_thread_stats_interval_event_init(tsd_t *tsd) {
+	assert(opt_stats_interval >= 0);
+	uint64_t interval = stats_interval_accum_batch_size();
+	thread_stats_interval_event_update(tsd, interval);
+}
+
+/* Handler functions. */
 static void
 thread_tcache_gc_event_handler(tsd_t *tsd) {
 	assert(TCACHE_GC_INCR_BYTES > 0);
 	assert(tcache_gc_event_wait_get(tsd) == 0U);
-	thread_tcache_gc_event_update(tsd, TCACHE_GC_INCR_BYTES);
+	tsd_thread_tcache_gc_event_init(tsd);
 	tcache_t *tcache = tcache_get(tsd);
 	if (tcache != NULL) {
 		tcache_event_hard(tsd, tcache);
@@ -71,6 +80,21 @@ thread_prof_sample_event_handler(tsd_t *tsd) {
 	}
 }
 
+static void
+thread_stats_interval_event_handler(tsd_t *tsd) {
+	assert(opt_stats_interval >= 0);
+	assert(stats_interval_event_wait_get(tsd) == 0U);
+	uint64_t last_event = thread_allocated_last_event_get(tsd);
+	uint64_t last_stats_event = stats_interval_last_event_get(tsd);
+	stats_interval_last_event_set(tsd, last_event);
+
+	if (stats_interval_accum(tsd, last_event - last_stats_event)) {
+		je_malloc_stats_print(NULL, NULL, opt_stats_interval_opts);
+	}
+	tsd_thread_stats_interval_event_init(tsd);
+}
+/* Per event facilities done. */
+
 static uint64_t
 thread_allocated_next_event_compute(tsd_t *tsd) {
 	uint64_t wait = THREAD_EVENT_MAX_START_WAIT;
diff --git a/test/unit/counter.c b/test/unit/counter.c
new file mode 100644
index 00000000..619510d3
--- /dev/null
+++ b/test/unit/counter.c
@@ -0,0 +1,128 @@
+#include "test/jemalloc_test.h"
+
+static const uint64_t interval = 1 << 20;
+
+TEST_BEGIN(test_counter_accum) {
+	uint64_t increment = interval >> 4;
+	unsigned n = interval / increment;
+	uint64_t accum = 0;
+
+	counter_accum_t c;
+	counter_accum_init(&c, interval);
+
+	tsd_t *tsd = tsd_fetch();
+	bool trigger;
+	for (unsigned i = 0; i < n; i++) {
+		trigger = counter_accum(tsd_tsdn(tsd), &c, increment);
+		accum += increment;
+		if (accum < interval) {
+			assert_b_eq(trigger, false, "Should not trigger");
+		} else {
+			assert_b_eq(trigger, true, "Should have triggered");
+		}
+	}
+	assert_b_eq(trigger, true, "Should have triggered");
+}
+TEST_END
+
+void
+assert_counter_value(counter_accum_t *c, uint64_t v) {
+	uint64_t accum;
+#ifdef JEMALLOC_ATOMIC_U64
+	accum = atomic_load_u64(&(c->accumbytes), ATOMIC_RELAXED);
+#else
+	accum = c->accumbytes;
+#endif
+	assert_u64_eq(accum, v, "Counter value mismatch");
+}
+
+TEST_BEGIN(test_counter_rollback) {
+	uint64_t half_interval = interval / 2;
+
+	counter_accum_t c;
+	counter_accum_init(&c, interval);
+
+	tsd_t *tsd = tsd_fetch();
+	counter_rollback(tsd_tsdn(tsd), &c, half_interval);
+
+	bool trigger;
+	trigger = counter_accum(tsd_tsdn(tsd), &c, half_interval);
+	assert_b_eq(trigger, false, "Should not trigger");
+	counter_rollback(tsd_tsdn(tsd), &c, half_interval + 1);
+	assert_counter_value(&c,  0);
+
+	trigger = counter_accum(tsd_tsdn(tsd), &c, half_interval);
+	assert_b_eq(trigger, false, "Should not trigger");
+	counter_rollback(tsd_tsdn(tsd), &c, half_interval - 1);
+	assert_counter_value(&c,  1);
+
+	counter_rollback(tsd_tsdn(tsd), &c, 1);
+	assert_counter_value(&c,  0);
+
+	trigger = counter_accum(tsd_tsdn(tsd), &c, half_interval);
+	assert_b_eq(trigger, false, "Should not trigger");
+	counter_rollback(tsd_tsdn(tsd), &c, 1);
+	assert_counter_value(&c,  half_interval - 1);
+
+	trigger = counter_accum(tsd_tsdn(tsd), &c, half_interval);
+	assert_b_eq(trigger, false, "Should not trigger");
+	assert_counter_value(&c,  interval - 1);
+
+	trigger = counter_accum(tsd_tsdn(tsd), &c, 1);
+	assert_b_eq(trigger, true, "Should have triggered");
+	assert_counter_value(&c, 0);
+
+	trigger = counter_accum(tsd_tsdn(tsd), &c, interval + 1);
+	assert_b_eq(trigger, true, "Should have triggered");
+	assert_counter_value(&c, 1);
+}
+TEST_END
+
+#define N_THDS (16)
+#define N_ITER_THD (1 << 12)
+#define ITER_INCREMENT (interval >> 4)
+
+static void *
+thd_start(void *varg) {
+	counter_accum_t *c = (counter_accum_t *)varg;
+
+	tsd_t *tsd = tsd_fetch();
+	bool trigger;
+	uintptr_t n_triggered = 0;
+	for (unsigned i = 0; i < N_ITER_THD; i++) {
+		trigger = counter_accum(tsd_tsdn(tsd), c, ITER_INCREMENT);
+		n_triggered += trigger ? 1 : 0;
+	}
+
+	return (void *)n_triggered;
+}
+
+
+TEST_BEGIN(test_counter_mt) {
+	counter_accum_t shared_c;
+	counter_accum_init(&shared_c, interval);
+
+	thd_t thds[N_THDS];
+	unsigned i;
+	for (i = 0; i < N_THDS; i++) {
+		thd_create(&thds[i], thd_start, (void *)&shared_c);
+	}
+
+	uint64_t sum = 0;
+	for (i = 0; i < N_THDS; i++) {
+		void *ret;
+		thd_join(thds[i], &ret);
+		sum += (uintptr_t)ret;
+	}
+	assert_u64_eq(sum, N_THDS * N_ITER_THD / (interval / ITER_INCREMENT),
+	    "Incorrect number of triggers");
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_counter_accum,
+	    test_counter_rollback,
+	    test_counter_mt);
+}
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index da1716a3..14c169b7 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -170,6 +170,9 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(ssize_t, dirty_decay_ms, always);
 	TEST_MALLCTL_OPT(ssize_t, muzzy_decay_ms, always);
 	TEST_MALLCTL_OPT(bool, stats_print, always);
+	TEST_MALLCTL_OPT(const char *, stats_print_opts, always);
+	TEST_MALLCTL_OPT(int64_t, stats_interval, always);
+	TEST_MALLCTL_OPT(const char *, stats_interval_opts, always);
 	TEST_MALLCTL_OPT(const char *, junk, fill);
 	TEST_MALLCTL_OPT(bool, zero, fill);
 	TEST_MALLCTL_OPT(bool, utrace, utrace);

From 38a48e5741faf51548f5b750c0ab6eba8eb67a0c Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 16 Jan 2020 13:00:35 -0800
Subject: [PATCH 1514/2608] Set reentrancy to 1 for tsd_state_purgatory.

Reentrancy is already set for other non-nominal tsd states (reincarnated and
minimal_initialized).  Add purgatory to be safe and consistent.
---
 include/jemalloc/internal/tsd.h | 5 ++++-
 src/tsd.c                       | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 576fa440..a62793aa 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -414,7 +414,10 @@ tsd_fetch(void) {
 
 static inline bool
 tsd_nominal(tsd_t *tsd) {
-	return (tsd_state_get(tsd) <= tsd_state_nominal_max);
+	bool nominal = tsd_state_get(tsd) <= tsd_state_nominal_max;
+	assert(nominal || tsd_reentrancy_level_get(tsd) > 0);
+
+	return nominal;
 }
 
 JEMALLOC_ALWAYS_INLINE tsdn_t *
diff --git a/src/tsd.c b/src/tsd.c
index 17e9eed2..940ff7d2 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -376,6 +376,7 @@ tsd_do_data_cleanup(tsd_t *tsd) {
 	arenas_tdata_cleanup(tsd);
 	tcache_cleanup(tsd);
 	witnesses_cleanup(tsd_witness_tsdp_get_unsafe(tsd));
+	*tsd_reentrancy_levelp_get(tsd) = 1;
 }
 
 void

From 0f552ed673b26b733a290bcac4c4d8ff4d0344e1 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 13 Jan 2020 16:18:32 -0800
Subject: [PATCH 1515/2608] Don't purge huge extents when decay is off.

---
 include/jemalloc/internal/arena_inlines_b.h | 6 ++++++
 src/extent.c                                | 3 ++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index a310eb29..844e045d 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -103,6 +103,12 @@ arena_prof_info_set(tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx) {
 	large_prof_info_set(edata, tctx);
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+arena_may_force_decay(arena_t *arena) {
+	return !(arena_dirty_decay_ms_get(arena) == -1
+	    || arena_muzzy_decay_ms_get(arena) == -1);
+}
+
 JEMALLOC_ALWAYS_INLINE void
 arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) {
 	tsd_t *tsd;
diff --git a/src/extent.c b/src/extent.c
index 9779c38b..07c0bd21 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1226,7 +1226,8 @@ extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 			    &arena->edata_cache, ehooks, rtree_ctx, ecache,
 			    edata, &coalesced, growing_retained);
 		} while (coalesced);
-		if (edata_size_get(edata) >= oversize_threshold) {
+		if (edata_size_get(edata) >= oversize_threshold &&
+		    arena_may_force_decay(arena)) {
 			/* Shortcut to purge the oversize extent eagerly. */
 			malloc_mutex_unlock(tsdn, &ecache->mtx);
 			arena_decay_extent(tsdn, arena, ehooks, edata);

From 88d9eca8483f39ded261c897e95e7d4459775c28 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 28 Jan 2020 17:32:45 -0800
Subject: [PATCH 1516/2608] Enforce page alignment for sampled allocations.

This allows sampled allocations to be checked through alignment, therefore
enable sized deallocation regardless of cache_oblivious.
---
 include/jemalloc/internal/prof_inlines_b.h | 17 +++++
 src/jemalloc.c                             | 82 ++++++++++++----------
 test/integration/extent.c                  |  2 +-
 3 files changed, 62 insertions(+), 39 deletions(-)

diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index d0cc48d0..c53dac50 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -197,6 +197,22 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
 	}
 }
 
+JEMALLOC_ALWAYS_INLINE size_t
+prof_sample_align(size_t orig_align) {
+	/*
+	 * Enforce page alignment, so that sampled allocations can be identified
+	 * w/o metadata lookup.
+	 */
+	assert(opt_prof);
+	return (config_cache_oblivious && orig_align < PAGE) ? PAGE :
+	    orig_align;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+prof_sample_aligned(const void *ptr) {
+	return ((uintptr_t)ptr & PAGE_MASK) == 0;
+}
+
 JEMALLOC_ALWAYS_INLINE void
 prof_free(tsd_t *tsd, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx) {
 	prof_info_t prof_info;
@@ -206,6 +222,7 @@ prof_free(tsd_t *tsd, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx) {
 	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
 
 	if (unlikely((uintptr_t)prof_info.alloc_tctx > (uintptr_t)1U)) {
+		assert(prof_sample_aligned(ptr));
 		prof_free_sampled_object(tsd, usize, &prof_info);
 	}
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 19767911..bac050a4 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2013,6 +2013,7 @@ imalloc_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
 	szind_t ind_large;
 	size_t bumped_usize = usize;
 
+	dopts->alignment = prof_sample_align(dopts->alignment);
 	if (usize <= SC_SMALL_MAXCLASS) {
 		assert(((dopts->alignment == 0) ?
 		    sz_s2u(SC_LARGE_MINCLASS) :
@@ -2029,6 +2030,7 @@ imalloc_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
 	} else {
 		ret = imalloc_no_sample(sopts, dopts, tsd, usize, usize, ind);
 	}
+	assert(prof_sample_aligned(ret));
 
 	return ret;
 }
@@ -2598,32 +2600,42 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	alloc_ctx_t alloc_ctx, *ctx;
-	if (!config_cache_oblivious && ((uintptr_t)ptr & PAGE_MASK) != 0) {
-		/*
-		 * When cache_oblivious is disabled and ptr is not page aligned,
-		 * the allocation was not sampled -- usize can be used to
-		 * determine szind directly.
-		 */
-		alloc_ctx.szind = sz_size2index(usize);
-		alloc_ctx.slab = true;
-		ctx = &alloc_ctx;
-		if (config_debug) {
-			alloc_ctx_t dbg_ctx;
+	if (!config_prof) {
+		/* Means usize will be used to determine szind. */
+		ctx = NULL;
+	} else {
+		if (likely(!prof_sample_aligned(ptr))) {
+			ctx = &alloc_ctx;
+			/*
+			 * When the ptr is not page aligned, it was not sampled.
+			 * usize can be trusted to determine szind and slab.
+			 */
+			ctx->szind = sz_size2index(usize);
+			if (config_cache_oblivious) {
+				ctx->slab = (ctx->szind < SC_NBINS);
+			} else {
+				/* Non page aligned must be slab allocated. */
+				ctx->slab = true;
+			}
+			if (config_debug) {
+				alloc_ctx_t dbg_ctx;
+				rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
+				rtree_szind_slab_read(tsd_tsdn(tsd),
+				    &extents_rtree, rtree_ctx, (uintptr_t)ptr,
+				    true, &dbg_ctx.szind, &dbg_ctx.slab);
+				assert(dbg_ctx.szind == ctx->szind);
+				assert(dbg_ctx.slab == ctx->slab);
+			}
+		} else if (opt_prof) {
+			ctx = &alloc_ctx;
 			rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
 			rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree,
-			    rtree_ctx, (uintptr_t)ptr, true, &dbg_ctx.szind,
-			    &dbg_ctx.slab);
-			assert(dbg_ctx.szind == alloc_ctx.szind);
-			assert(dbg_ctx.slab == alloc_ctx.slab);
+			    rtree_ctx, (uintptr_t)ptr, true, &ctx->szind,
+			    &ctx->slab);
+			assert(ctx->szind == sz_size2index(usize));
+		} else {
+			ctx = NULL;
 		}
-	} else if (config_prof && opt_prof) {
-		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
-		rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
-		    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
-		assert(alloc_ctx.szind == sz_size2index(usize));
-		ctx = &alloc_ctx;
-	} else {
-		ctx = NULL;
 	}
 
 	if (config_prof && opt_prof) {
@@ -2683,13 +2695,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 	}
 
 	szind_t szind;
-	/*
-	 * If !config_cache_oblivious, we can check PAGE alignment to
-	 * detect sampled objects.  Otherwise addresses are
-	 * randomized, and we have to look it up in the rtree anyway.
-	 * See also isfree().
-	 */
-	if (!size_hint || config_cache_oblivious) {
+	if (!size_hint) {
 		bool slab;
 		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
 		bool res = rtree_szind_slab_read_fast(tsd_tsdn(tsd),
@@ -2707,7 +2713,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 		 * sampled object check will also check for null ptr.
 		 */
 		if (unlikely(size > SC_LOOKUP_MAXCLASS ||
-		    (((uintptr_t)ptr & PAGE_MASK) == 0))) {
+		    (config_prof && prof_sample_aligned(ptr)))) {
 			return false;
 		}
 		szind = sz_size2index_lookup(size);
@@ -3024,6 +3030,8 @@ irallocx_prof_sample(tsdn_t *tsdn, void *old_ptr, size_t old_usize,
 	if (tctx == NULL) {
 		return NULL;
 	}
+
+	alignment = prof_sample_align(alignment);
 	if (usize <= SC_SMALL_MAXCLASS) {
 		p = iralloct(tsdn, old_ptr, old_usize,
 		    SC_LARGE_MINCLASS, alignment, zero, tcache,
@@ -3036,6 +3044,7 @@ irallocx_prof_sample(tsdn_t *tsdn, void *old_ptr, size_t old_usize,
 		p = iralloct(tsdn, old_ptr, old_usize, usize, alignment, zero,
 		    tcache, arena, hook_args);
 	}
+	assert(prof_sample_aligned(p));
 
 	return p;
 }
@@ -3281,15 +3290,13 @@ ixallocx_helper(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
 static size_t
 ixallocx_prof_sample(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
     size_t extra, size_t alignment, bool zero, prof_tctx_t *tctx) {
-	size_t usize;
-
-	if (tctx == NULL) {
+	/* Sampled allocation needs to be page aligned. */
+	if (tctx == NULL || !prof_sample_aligned(ptr)) {
 		return old_usize;
 	}
-	usize = ixallocx_helper(tsdn, ptr, old_usize, size, extra, alignment,
-	    zero);
 
-	return usize;
+	return ixallocx_helper(tsdn, ptr, old_usize, size, extra, alignment,
+	    zero);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
@@ -3590,7 +3597,6 @@ sdallocx_default(void *ptr, size_t size, int flags) {
 		isfree(tsd, ptr, usize, tcache, true);
 	}
 	check_entry_exit_locking(tsd_tsdn(tsd));
-
 }
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
diff --git a/test/integration/extent.c b/test/integration/extent.c
index b5db0876..a75ba03e 100644
--- a/test/integration/extent.c
+++ b/test/integration/extent.c
@@ -59,8 +59,8 @@ test_extent_body(unsigned arena_ind) {
 		assert_true(called_decommit, "Expected decommit call");
 		assert_true(did_purge_lazy || did_purge_forced,
 		    "Expected purge");
+		assert_true(called_split, "Expected split call");
 	}
-	assert_true(called_split, "Expected split call");
 	dallocx(p, flags);
 	try_dalloc = true;
 

From 974222c626b351256f071d18994c70b79d10a627 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 30 Jan 2020 14:35:54 -0800
Subject: [PATCH 1517/2608] Add safety check on sdallocx slow / sampled path.

---
 include/jemalloc/internal/safety_check.h |  1 +
 src/jemalloc.c                           |  6 +++++-
 src/safety_check.c                       | 12 ++++++++++++
 src/tcache.c                             |  5 +----
 4 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/safety_check.h b/include/jemalloc/internal/safety_check.h
index ec4b3369..a7a44338 100644
--- a/include/jemalloc/internal/safety_check.h
+++ b/include/jemalloc/internal/safety_check.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_SAFETY_CHECK_H
 #define JEMALLOC_INTERNAL_SAFETY_CHECK_H
 
+void safety_check_fail_sized_dealloc(bool current_dealloc);
 void safety_check_fail(const char *format, ...);
 /* Can set to NULL for a default. */
 void safety_check_set_abort(void (*abort_fn)(const char *));
diff --git a/src/jemalloc.c b/src/jemalloc.c
index bac050a4..5f11fc38 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2632,7 +2632,11 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 			rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree,
 			    rtree_ctx, (uintptr_t)ptr, true, &ctx->szind,
 			    &ctx->slab);
-			assert(ctx->szind == sz_size2index(usize));
+			/* Small alloc may have !slab (sampled). */
+			bool sz_correct = (ctx->szind == sz_size2index(usize));
+			if (config_opt_safety_checks && !sz_correct) {
+				safety_check_fail_sized_dealloc(true);
+			}
 		} else {
 			ctx = NULL;
 		}
diff --git a/src/safety_check.c b/src/safety_check.c
index 804155dc..a83dca76 100644
--- a/src/safety_check.c
+++ b/src/safety_check.c
@@ -3,6 +3,18 @@
 
 static void (*safety_check_abort)(const char *message);
 
+void safety_check_fail_sized_dealloc(bool current_dealloc) {
+	assert(config_opt_safety_checks);
+	char *src = current_dealloc ? "the current pointer being freed" :
+	    "in thread cache, possibly from previous deallocations";
+
+	safety_check_fail("<jemalloc>: size mismatch detected, likely caused by"
+	   " application sized deallocation bugs (source: %s). Suggest building"
+	    "with --enable-debug or address sanitizer for debugging. Abort.\n",
+	    src);
+	abort();
+}
+
 void safety_check_set_abort(void (*abort_fn)(const char *)) {
 	safety_check_abort = abort_fn;
 }
diff --git a/src/tcache.c b/src/tcache.c
index 0a511e2d..2f4ca5a4 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -135,10 +135,7 @@ tbin_edatas_lookup_size_check(tsdn_t *tsdn, cache_bin_t *tbin, szind_t binind,
 		sz_sum -= szind;
 	}
 	if (sz_sum != 0) {
-		safety_check_fail("<jemalloc>: size mismatch in thread cache "
-		    "detected, likely caused by sized deallocation bugs by "
-		    "application. Abort.\n");
-		abort();
+		safety_check_fail_sized_dealloc(false);
 	}
 }
 

From 536ea6858ecfcac49060c805231bd1722d84a0cf Mon Sep 17 00:00:00 2001
From: zoulasc <christos@zoulas.com>
Date: Mon, 3 Feb 2020 15:35:08 -0500
Subject: [PATCH 1518/2608] NetBSD specific changes: - NetBSD overcommits -
 When mapping pages, use the maximum of the alignment requested and the  
 compiled-in PAGE constant which might be greater than the current kernel  
 pagesize, since we compile binaries with the maximum page size supported   by
 the architecture (so that they work with all kernels).

---
 src/pages.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/src/pages.c b/src/pages.c
index 75c8dd9d..62e84f04 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -14,6 +14,9 @@
 #include <vm/vm_param.h>
 #endif
 #endif
+#ifdef __NetBSD__
+#include <sys/bitops.h>	/* ilog2 */
+#endif
 
 /******************************************************************************/
 /* Data. */
@@ -74,6 +77,18 @@ os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
 	 * of existing mappings, and we only want to create new mappings.
 	 */
 	{
+#ifdef __NetBSD__
+		/*
+		 * On NetBSD PAGE for a platform is defined to the
+		 * maximum page size of all machine architectures
+		 * for that platform, so that we can use the same
+		 * binaries across all machine architectures.
+		 */
+		if (alignment > os_page || PAGE > os_page) {
+			unsigned int a = ilog2(MAX(alignment, PAGE));
+			mmap_flags |= MAP_ALIGNED(a);
+		}
+#endif
 		int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
 
 		ret = mmap(addr, size, prot, mmap_flags, -1, 0);
@@ -622,6 +637,8 @@ pages_boot(void) {
 		mmap_flags |= MAP_NORESERVE;
 	}
 #  endif
+#elif defined(__NetBSD__)
+	os_overcommits = true;
 #else
 	os_overcommits = false;
 #endif

From 97dd79db6c4f9b93bb83182afb191d8dbef49806 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 28 Jan 2020 21:12:06 -0800
Subject: [PATCH 1519/2608] Implement deallocation events.

Make the event module to accept two event types, and pass around the event
context.  Use bytes-based events to trigger tcache GC on deallocation, and get
rid of the tcache ticker.
---
 include/jemalloc/internal/tcache_inlines.h |  16 --
 include/jemalloc/internal/tcache_structs.h |   3 -
 include/jemalloc/internal/thread_event.h   | 176 +++++++++++++++------
 include/jemalloc/internal/tsd.h            |   8 +
 src/jemalloc.c                             |  65 ++++----
 src/tcache.c                               |   2 -
 src/thread_event.c                         | 161 ++++++++++++-------
 src/tsd.c                                  |   2 +-
 test/unit/thread_event.c                   |  44 ++++--
 9 files changed, 306 insertions(+), 171 deletions(-)

diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 40c4286c..d356181c 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -5,7 +5,6 @@
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/sz.h"
-#include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/util.h"
 
 static inline bool
@@ -27,17 +26,6 @@ tcache_enabled_set(tsd_t *tsd, bool enabled) {
 	tsd_slow_update(tsd);
 }
 
-JEMALLOC_ALWAYS_INLINE void
-tcache_event(tsd_t *tsd, tcache_t *tcache) {
-	if (TCACHE_GC_INCR == 0) {
-		return;
-	}
-
-	if (unlikely(ticker_tick(&tcache->gc_ticker))) {
-		tcache_event_hard(tsd, tcache);
-	}
-}
-
 JEMALLOC_ALWAYS_INLINE void *
 tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
     size_t size, szind_t binind, bool zero, bool slow_path) {
@@ -171,8 +159,6 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
 		assert(ret);
 	}
-
-	tcache_event(tsd, tcache);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -195,8 +181,6 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
 		assert(ret);
 	}
-
-	tcache_event(tsd, tcache);
 }
 
 JEMALLOC_ALWAYS_INLINE tcache_t *
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index 98d3ef70..38a82fe4 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -16,9 +16,6 @@ struct tcache_s {
 	 * together at the start of this struct.
 	 */
 
-	/* Drives incremental GC. */
-	ticker_t	gc_ticker;
-
 	/*
 	 * The pointer stacks associated with bins follow as a contiguous array.
 	 * During tcache initialization, the avail pointer in each element of
diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index 454c689b..33cbcbe7 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -4,42 +4,51 @@
 #include "jemalloc/internal/tsd.h"
 
 /*
- * Maximum threshold on thread_allocated_next_event_fast, so that there is no
- * need to check overflow in malloc fast path. (The allocation size in malloc
+ * Maximum threshold on thread_(de)allocated_next_event_fast, so that there is
+ * no need to check overflow in malloc fast path. (The allocation size in malloc
  * fast path never exceeds SC_LOOKUP_MAXCLASS.)
  */
-#define THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX				\
+#define THREAD_NEXT_EVENT_FAST_MAX				\
     (UINT64_MAX - SC_LOOKUP_MAXCLASS + 1U)
 
 /*
  * The max interval helps make sure that malloc stays on the fast path in the
- * common case, i.e. thread_allocated < thread_allocated_next_event_fast.
- * When thread_allocated is within an event's distance to
- * THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX above, thread_allocated_next_event_fast
- * is wrapped around and we fall back to the medium-fast path. The max interval
- * makes sure that we're not staying on the fallback case for too long, even if
- * there's no active event or if all active events have long wait times.
+ * common case, i.e. thread_allocated < thread_allocated_next_event_fast.  When
+ * thread_allocated is within an event's distance to THREAD_NEXT_EVENT_FAST_MAX
+ * above, thread_allocated_next_event_fast is wrapped around and we fall back to
+ * the medium-fast path. The max interval makes sure that we're not staying on
+ * the fallback case for too long, even if there's no active event or if all
+ * active events have long wait times.
  */
 #define THREAD_EVENT_MAX_INTERVAL ((uint64_t)(4U << 20))
 
+typedef struct event_ctx_s {
+	bool is_alloc;
+	uint64_t *current;
+	uint64_t *last_event;
+	uint64_t *next_event;
+	uint64_t *next_event_fast;
+} event_ctx_t;
+
 void thread_event_assert_invariants_debug(tsd_t *tsd);
-void thread_event_trigger(tsd_t *tsd, bool delay_event);
-void thread_event_rollback(tsd_t *tsd, size_t diff);
-void thread_event_update(tsd_t *tsd);
+void thread_event_trigger(tsd_t *tsd, event_ctx_t *ctx, bool delay_event);
+void thread_alloc_event_rollback(tsd_t *tsd, size_t diff);
+void thread_event_update(tsd_t *tsd, bool alloc_event);
 void thread_event_boot();
 void thread_event_recompute_fast_threshold(tsd_t *tsd);
 void tsd_thread_event_init(tsd_t *tsd);
 
 /*
  * List of all events, in the following format:
- *  E(event,		(condition))
+ *  E(event,		(condition), is_alloc_event)
  */
 #define ITERATE_OVER_ALL_EVENTS						\
-    E(tcache_gc,	(TCACHE_GC_INCR_BYTES > 0))			\
-    E(prof_sample,	(config_prof && opt_prof))	    		\
-    E(stats_interval,	(opt_stats_interval >= 0))
+    E(tcache_gc,	(TCACHE_GC_INCR_BYTES > 0), true)		\
+    E(prof_sample,	(config_prof && opt_prof), true)	    	\
+    E(stats_interval,	(opt_stats_interval >= 0), true)	    	\
+    E(tcache_gc_dalloc,	(TCACHE_GC_INCR_BYTES > 0), false)
 
-#define E(event, condition)						\
+#define E(event, condition_unused, is_alloc_event_unused)		\
     C(event##_event_wait)
 
 /* List of all thread event counters. */
@@ -83,9 +92,9 @@ ITERATE_OVER_ALL_COUNTERS
 #undef E
 
 /*
- * Two malloc fastpath getters -- use the unsafe getters since tsd may be
- * non-nominal, in which case the fast_threshold will be set to 0.  This allows
- * checking for events and tsd non-nominal in a single branch.
+ * The malloc and free fastpath getters -- use the unsafe getters since tsd may
+ * be non-nominal, in which case the fast_threshold will be set to 0.  This
+ * allows checking for events and tsd non-nominal in a single branch.
  *
  * Note that these can only be used on the fastpath.
  */
@@ -97,42 +106,83 @@ thread_allocated_malloc_fastpath(tsd_t *tsd) {
 JEMALLOC_ALWAYS_INLINE uint64_t
 thread_allocated_next_event_malloc_fastpath(tsd_t *tsd) {
 	uint64_t v = *tsd_thread_allocated_next_event_fastp_get_unsafe(tsd);
-	assert(v <= THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX);
+	assert(v <= THREAD_NEXT_EVENT_FAST_MAX);
 	return v;
 }
 
+JEMALLOC_ALWAYS_INLINE void
+thread_event_free_fastpath_ctx(tsd_t *tsd, uint64_t *deallocated,
+    uint64_t *threshold, bool size_hint) {
+	if (!size_hint) {
+		*deallocated = tsd_thread_deallocated_get(tsd);
+		*threshold = tsd_thread_deallocated_next_event_fast_get(tsd);
+	} else {
+		/* Unsafe getters since this may happen before tsd_init. */
+		*deallocated = *tsd_thread_deallocatedp_get_unsafe(tsd);
+		*threshold =
+		    *tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd);
+	}
+	assert(*threshold <= THREAD_NEXT_EVENT_FAST_MAX);
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+event_ctx_is_alloc(event_ctx_t *ctx) {
+	return ctx->is_alloc;
+}
+
+JEMALLOC_ALWAYS_INLINE uint64_t
+event_ctx_current_bytes_get(event_ctx_t *ctx) {
+	return *ctx->current;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+event_ctx_current_bytes_set(event_ctx_t *ctx, uint64_t v) {
+	*ctx->current = v;
+}
+
+JEMALLOC_ALWAYS_INLINE uint64_t
+event_ctx_last_event_get(event_ctx_t *ctx) {
+	return *ctx->last_event;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+event_ctx_last_event_set(event_ctx_t *ctx, uint64_t v) {
+	*ctx->last_event = v;
+}
+
 /* Below 3 for next_event_fast. */
 JEMALLOC_ALWAYS_INLINE uint64_t
-thread_allocated_next_event_fast_get(tsd_t *tsd) {
-	uint64_t v = tsd_thread_allocated_next_event_fast_get(tsd);
-	assert(v <= THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX);
+event_ctx_next_event_fast_get(event_ctx_t *ctx) {
+	uint64_t v = *ctx->next_event_fast;
+	assert(v <= THREAD_NEXT_EVENT_FAST_MAX);
 	return v;
 }
 
 JEMALLOC_ALWAYS_INLINE void
-thread_allocated_next_event_fast_set(tsd_t *tsd, uint64_t v) {
-	assert(v <= THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX);
-	*tsd_thread_allocated_next_event_fastp_get(tsd) = v;
+event_ctx_next_event_fast_set(event_ctx_t *ctx, uint64_t v) {
+	assert(v <= THREAD_NEXT_EVENT_FAST_MAX);
+	*ctx->next_event_fast = v;
 }
 
 JEMALLOC_ALWAYS_INLINE void
-thread_allocated_next_event_fast_set_non_nominal(tsd_t *tsd) {
+thread_next_event_fast_set_non_nominal(tsd_t *tsd) {
 	/*
-	 * Set the fast threshold to zero when tsd is non-nominal.  Use the
+	 * Set the fast thresholds to zero when tsd is non-nominal.  Use the
 	 * unsafe getter as this may get called during tsd init and clean up.
 	 */
 	*tsd_thread_allocated_next_event_fastp_get_unsafe(tsd) = 0;
+	*tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd) = 0;
 }
 
 /* For next_event.  Setter also updates the fast threshold. */
 JEMALLOC_ALWAYS_INLINE uint64_t
-thread_allocated_next_event_get(tsd_t *tsd) {
-	return tsd_thread_allocated_next_event_get(tsd);
+event_ctx_next_event_get(event_ctx_t *ctx) {
+	return *ctx->next_event;
 }
 
 JEMALLOC_ALWAYS_INLINE void
-thread_allocated_next_event_set(tsd_t *tsd, uint64_t v) {
-	*tsd_thread_allocated_next_eventp_get(tsd) = v;
+event_ctx_next_event_set(tsd_t *tsd, event_ctx_t *ctx, uint64_t v) {
+	*ctx->next_event = v;
 	thread_event_recompute_fast_threshold(tsd);
 }
 
@@ -145,8 +195,8 @@ thread_allocated_next_event_set(tsd_t *tsd, uint64_t v) {
  *     at the end will restore the invariants),
  * (b) thread_##event##_event_update() (the thread_event_update() call at the
  *     end will restore the invariants), or
- * (c) thread_event_rollback() if the rollback falls below the last_event (the
- *     thread_event_update() call at the end will restore the invariants).
+ * (c) thread_alloc_event_rollback() if the rollback falls below the last_event
+ *     (the thread_event_update() call at the end will restore the invariants).
  */
 JEMALLOC_ALWAYS_INLINE void
 thread_event_assert_invariants(tsd_t *tsd) {
@@ -156,22 +206,52 @@ thread_event_assert_invariants(tsd_t *tsd) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-thread_event(tsd_t *tsd, size_t usize) {
-	thread_event_assert_invariants(tsd);
-
-	uint64_t thread_allocated_before = thread_allocated_get(tsd);
-	thread_allocated_set(tsd, thread_allocated_before + usize);
-
-	/* The subtraction is intentionally susceptible to underflow. */
-	if (likely(usize < thread_allocated_next_event_get(tsd) -
-	    thread_allocated_before)) {
-		thread_event_assert_invariants(tsd);
+event_ctx_get(tsd_t *tsd, event_ctx_t *ctx, bool is_alloc) {
+	ctx->is_alloc = is_alloc;
+	if (is_alloc) {
+		ctx->current = tsd_thread_allocatedp_get(tsd);
+		ctx->last_event = tsd_thread_allocated_last_eventp_get(tsd);
+		ctx->next_event = tsd_thread_allocated_next_eventp_get(tsd);
+		ctx->next_event_fast =
+		    tsd_thread_allocated_next_event_fastp_get(tsd);
 	} else {
-		thread_event_trigger(tsd, false);
+		ctx->current = tsd_thread_deallocatedp_get(tsd);
+		ctx->last_event = tsd_thread_deallocated_last_eventp_get(tsd);
+		ctx->next_event = tsd_thread_deallocated_next_eventp_get(tsd);
+		ctx->next_event_fast =
+		    tsd_thread_deallocated_next_event_fastp_get(tsd);
 	}
 }
 
-#define E(event, condition)						\
+JEMALLOC_ALWAYS_INLINE void
+thread_event_advance(tsd_t *tsd, size_t usize, bool is_alloc) {
+	thread_event_assert_invariants(tsd);
+
+	event_ctx_t ctx;
+	event_ctx_get(tsd, &ctx, is_alloc);
+
+	uint64_t bytes_before = event_ctx_current_bytes_get(&ctx);
+	event_ctx_current_bytes_set(&ctx, bytes_before + usize);
+
+	/* The subtraction is intentionally susceptible to underflow. */
+	if (likely(usize < event_ctx_next_event_get(&ctx) - bytes_before)) {
+		thread_event_assert_invariants(tsd);
+	} else {
+		thread_event_trigger(tsd, &ctx, false);
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void
+thread_dalloc_event(tsd_t *tsd, size_t usize) {
+	thread_event_advance(tsd, usize, false);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+thread_alloc_event(tsd_t *tsd, size_t usize) {
+	thread_event_advance(tsd, usize, true);
+}
+
+#define E(event, condition, is_alloc)					\
 JEMALLOC_ALWAYS_INLINE void						\
 thread_##event##_event_update(tsd_t *tsd, uint64_t event_wait) {	\
 	thread_event_assert_invariants(tsd);				\
@@ -188,7 +268,7 @@ thread_##event##_event_update(tsd_t *tsd, uint64_t event_wait) {	\
 		event_wait = THREAD_EVENT_MAX_START_WAIT;		\
 	}								\
 	event##_event_wait_set(tsd, event_wait);			\
-	thread_event_update(tsd);					\
+	thread_event_update(tsd, is_alloc);				\
 }
 
 ITERATE_OVER_ALL_EVENTS
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index a62793aa..6868ce4b 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -81,10 +81,14 @@ typedef void (*test_callback_t)(int *);
     O(thread_allocated,		uint64_t,		uint64_t)	\
     O(thread_allocated_next_event_fast,	uint64_t,	uint64_t)	\
     O(thread_deallocated,	uint64_t,		uint64_t)	\
+    O(thread_deallocated_next_event_fast, uint64_t,	uint64_t)	\
     O(rtree_ctx,		rtree_ctx_t,		rtree_ctx_t)	\
     O(thread_allocated_last_event,	uint64_t,	uint64_t)	\
     O(thread_allocated_next_event,	uint64_t,	uint64_t)	\
+    O(thread_deallocated_last_event,	uint64_t,	uint64_t)	\
+    O(thread_deallocated_next_event,	uint64_t,	uint64_t)	\
     O(tcache_gc_event_wait,	uint64_t,		uint64_t)	\
+    O(tcache_gc_dalloc_event_wait,	uint64_t,	uint64_t)	\
     O(prof_sample_event_wait,	uint64_t,		uint64_t)	\
     O(prof_sample_last_event,	uint64_t,		uint64_t)	\
     O(stats_interval_event_wait,	uint64_t,	uint64_t)	\
@@ -114,10 +118,14 @@ typedef void (*test_callback_t)(int *);
     /* thread_allocated */	0,					\
     /* thread_allocated_next_event_fast */ 0, 				\
     /* thread_deallocated */	0,					\
+    /* thread_deallocated_next_event_fast */	0,			\
     /* rtree_ctx */		RTREE_CTX_ZERO_INITIALIZER,		\
     /* thread_allocated_last_event */	0,				\
     /* thread_allocated_next_event */	THREAD_EVENT_MIN_START_WAIT,	\
+    /* thread_deallocated_last_event */	0,				\
+    /* thread_deallocated_next_event */	THREAD_EVENT_MIN_START_WAIT,	\
     /* tcache_gc_event_wait */		THREAD_EVENT_MIN_START_WAIT,	\
+    /* tcache_gc_dalloc_event_wait */	THREAD_EVENT_MIN_START_WAIT,	\
     /* prof_sample_event_wait */	THREAD_EVENT_MIN_START_WAIT,	\
     /* prof_sample_last_event */	0,				\
     /* stats_interval_event_wait */	THREAD_EVENT_MIN_START_WAIT,	\
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 5f11fc38..60565df7 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2154,7 +2154,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		dopts->arena_ind = 0;
 	}
 
-	thread_event(tsd, usize);
+	thread_alloc_event(tsd, usize);
 
 	/*
 	 * If dopts->alignment > 0, then ind is still 0, but usize was computed
@@ -2181,7 +2181,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		}
 
 		if (unlikely(allocation == NULL)) {
-			thread_event_rollback(tsd, usize);
+			thread_alloc_event_rollback(tsd, usize);
 			prof_alloc_rollback(tsd, tctx, true);
 			goto label_oom;
 		}
@@ -2191,7 +2191,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		allocation = imalloc_no_sample(sopts, dopts, tsd, size, usize,
 		    ind);
 		if (unlikely(allocation == NULL)) {
-			thread_event_rollback(tsd, usize);
+			thread_alloc_event_rollback(tsd, usize);
 			goto label_oom;
 		}
 	}
@@ -2575,7 +2575,6 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 	if (config_prof && opt_prof) {
 		prof_free(tsd, ptr, usize, &alloc_ctx);
 	}
-	*tsd_thread_deallocatedp_get(tsd) += usize;
 
 	if (likely(!slow_path)) {
 		idalloctm(tsd_tsdn(tsd), ptr, tcache, &alloc_ctx, false,
@@ -2584,6 +2583,7 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 		idalloctm(tsd_tsdn(tsd), ptr, tcache, &alloc_ctx, false,
 		    true);
 	}
+	thread_dalloc_event(tsd, usize);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -2645,14 +2645,12 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 	if (config_prof && opt_prof) {
 		prof_free(tsd, ptr, usize, ctx);
 	}
-
-	*tsd_thread_deallocatedp_get(tsd) += usize;
-
 	if (likely(!slow_path)) {
 		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, ctx, false);
 	} else {
 		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, ctx, true);
 	}
+	thread_dalloc_event(tsd, usize);
 }
 
 JEMALLOC_NOINLINE
@@ -2694,12 +2692,12 @@ free_default(void *ptr) {
 JEMALLOC_ALWAYS_INLINE
 bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 	tsd_t *tsd = tsd_get(false);
-	if (unlikely(!tsd || !tsd_fast(tsd))) {
-		return false;
-	}
 
 	szind_t szind;
 	if (!size_hint) {
+		if (unlikely(!tsd || !tsd_fast(tsd))) {
+			return false;
+		}
 		bool slab;
 		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
 		bool res = rtree_szind_slab_read_fast(tsd_tsdn(tsd),
@@ -2711,6 +2709,15 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 		}
 		assert(szind != SC_NSIZES);
 	} else {
+		/*
+		 * The size hinted fastpath does not involve rtree lookup, thus
+		 * can tolerate an uninitialized tsd.  This allows the tsd_fast
+		 * check to be folded into the branch testing fast_threshold
+		 * (set to 0 when !tsd_fast).
+		 */
+		if (unlikely(!tsd)) {
+			return false;
+		}
 		/*
 		 * Check for both sizes that are too large, and for sampled
 		 * objects.  Sampled objects are always page-aligned.  The
@@ -2722,19 +2729,26 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 		}
 		szind = sz_size2index_lookup(size);
 	}
+	uint64_t deallocated, threshold;
+	thread_event_free_fastpath_ctx(tsd, &deallocated, &threshold, size_hint);
 
-	tcache_t *tcache = tsd_tcachep_get(tsd);
-	if (unlikely(ticker_trytick(&tcache->gc_ticker))) {
+	size_t usize = sz_index2size(szind);
+	uint64_t deallocated_after = deallocated + usize;
+	/*
+	 * Check for events and tsd non-nominal (fast_threshold will be set to
+	 * 0) in a single branch.
+	 */
+	if (unlikely(deallocated_after >= threshold)) {
 		return false;
 	}
 
+	tcache_t *tcache = tsd_tcachep_get(tsd);
 	cache_bin_t *bin = tcache_small_bin_get(tcache, szind);
 	if (!cache_bin_dalloc_easy(bin, ptr)) {
 		return false;
 	}
 
-	size_t usize = sz_index2size(szind);
-	*tsd_thread_deallocatedp_get(tsd) += usize;
+	*tsd_thread_deallocatedp_get(tsd) = deallocated_after;
 
 	return true;
 }
@@ -3144,11 +3158,11 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 		if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
 			goto label_oom;
 		}
-		thread_event(tsd, usize);
+		thread_alloc_event(tsd, usize);
 		p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize,
 		    zero, tcache, arena, &alloc_ctx, &hook_args);
 		if (unlikely(p == NULL)) {
-			thread_event_rollback(tsd, usize);
+			thread_alloc_event_rollback(tsd, usize);
 			goto label_oom;
 		}
 	} else {
@@ -3158,11 +3172,10 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 			goto label_oom;
 		}
 		usize = isalloc(tsd_tsdn(tsd), p);
-		thread_event(tsd, usize);
+		thread_alloc_event(tsd, usize);
 	}
 	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
-
-	*tsd_thread_deallocatedp_get(tsd) += old_usize;
+	thread_dalloc_event(tsd, old_usize);
 
 	UTRACE(ptr, size, p);
 	check_entry_exit_locking(tsd_tsdn(tsd));
@@ -3337,7 +3350,7 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 			usize_max = SC_LARGE_MAXCLASS;
 		}
 	}
-	thread_event(tsd, usize_max);
+	thread_alloc_event(tsd, usize_max);
 	bool prof_active = prof_active_get_unlocked();
 	prof_tctx_t *tctx = prof_alloc_prep(tsd, usize_max, prof_active, false);
 
@@ -3350,7 +3363,7 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 		    extra, alignment, zero);
 	}
 	if (usize <= usize_max) {
-		thread_event_rollback(tsd, usize_max - usize);
+		thread_alloc_event_rollback(tsd, usize_max - usize);
 	} else {
 		/*
 		 * For downsizing request, usize_max can be less than usize.
@@ -3359,7 +3372,7 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 		 * to xallocx(), the entire usize will be rolled back if it's
 		 * equal to the old usize.
 		 */
-		thread_event(tsd, usize - usize_max);
+		thread_alloc_event(tsd, usize - usize_max);
 	}
 
 	/*
@@ -3438,7 +3451,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	} else {
 		usize = ixallocx_helper(tsd_tsdn(tsd), ptr, old_usize, size,
 		    extra, alignment, zero);
-		thread_event(tsd, usize);
+		thread_alloc_event(tsd, usize);
 	}
 
 	/*
@@ -3448,12 +3461,10 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	assert(iealloc(tsd_tsdn(tsd), ptr) == old_edata);
 
 	if (unlikely(usize == old_usize)) {
-		thread_event_rollback(tsd, usize);
+		thread_alloc_event_rollback(tsd, usize);
 		goto label_not_resized;
 	}
-
-	*tsd_thread_deallocatedp_get(tsd) += old_usize;
-
+	thread_dalloc_event(tsd, old_usize);
 label_not_resized:
 	if (unlikely(!tsd_fast(tsd))) {
 		uintptr_t args[4] = {(uintptr_t)ptr, size, extra, flags};
diff --git a/src/tcache.c b/src/tcache.c
index 2f4ca5a4..3d965126 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -437,8 +437,6 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
 	tcache->next_gc_bin = 0;
 	tcache->arena = NULL;
 
-	ticker_init(&tcache->gc_ticker, TCACHE_GC_INCR);
-
 	assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
 	memset(tcache->bins_small, 0, sizeof(cache_bin_t) * SC_NBINS);
 	memset(tcache->bins_large, 0, sizeof(cache_bin_t) * (nhbins - SC_NBINS));
diff --git a/src/thread_event.c b/src/thread_event.c
index 6aedf161..5bdc4aef 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -12,14 +12,14 @@
 static bool thread_event_active = false;
 
 /* TSD event init function signatures. */
-#define E(event, condition)						\
+#define E(event, condition_unused, is_alloc_event_unused)		\
 static void tsd_thread_##event##_event_init(tsd_t *tsd);
 
 ITERATE_OVER_ALL_EVENTS
 #undef E
 
 /* Event handler function signatures. */
-#define E(event, condition)						\
+#define E(event, condition_unused, is_alloc_event_unused)		\
 static void thread_##event##_event_handler(tsd_t *tsd);
 
 ITERATE_OVER_ALL_EVENTS
@@ -32,6 +32,12 @@ tsd_thread_tcache_gc_event_init(tsd_t *tsd) {
 	thread_tcache_gc_event_update(tsd, TCACHE_GC_INCR_BYTES);
 }
 
+static void
+tsd_thread_tcache_gc_dalloc_event_init(tsd_t *tsd) {
+	assert(TCACHE_GC_INCR_BYTES > 0);
+	thread_tcache_gc_dalloc_event_update(tsd, TCACHE_GC_INCR_BYTES);
+}
+
 static void
 tsd_thread_prof_sample_event_init(tsd_t *tsd) {
 	assert(config_prof && opt_prof);
@@ -46,17 +52,30 @@ tsd_thread_stats_interval_event_init(tsd_t *tsd) {
 }
 
 /* Handler functions. */
+
 static void
-thread_tcache_gc_event_handler(tsd_t *tsd) {
+tcache_gc_event(tsd_t *tsd) {
 	assert(TCACHE_GC_INCR_BYTES > 0);
-	assert(tcache_gc_event_wait_get(tsd) == 0U);
-	tsd_thread_tcache_gc_event_init(tsd);
 	tcache_t *tcache = tcache_get(tsd);
 	if (tcache != NULL) {
 		tcache_event_hard(tsd, tcache);
 	}
 }
 
+static void
+thread_tcache_gc_event_handler(tsd_t *tsd) {
+	assert(tcache_gc_event_wait_get(tsd) == 0U);
+	tsd_thread_tcache_gc_event_init(tsd);
+	tcache_gc_event(tsd);
+}
+
+static void
+thread_tcache_gc_dalloc_event_handler(tsd_t *tsd) {
+	assert(tcache_gc_dalloc_event_wait_get(tsd) == 0U);
+	tsd_thread_tcache_gc_dalloc_event_init(tsd);
+	tcache_gc_event(tsd);
+}
+
 static void
 thread_prof_sample_event_handler(tsd_t *tsd) {
 	assert(config_prof && opt_prof);
@@ -96,12 +115,12 @@ thread_stats_interval_event_handler(tsd_t *tsd) {
 /* Per event facilities done. */
 
 static uint64_t
-thread_allocated_next_event_compute(tsd_t *tsd) {
+thread_next_event_compute(tsd_t *tsd, bool is_alloc) {
 	uint64_t wait = THREAD_EVENT_MAX_START_WAIT;
 	bool no_event_on = true;
 
-#define E(event, condition)						\
-	if (condition) {						\
+#define E(event, condition, alloc_event)				\
+	if (is_alloc == alloc_event && condition) {			\
 		no_event_on = false;					\
 		uint64_t event_wait =					\
 		    event##_event_wait_get(tsd);			\
@@ -119,15 +138,15 @@ thread_allocated_next_event_compute(tsd_t *tsd) {
 	return wait;
 }
 
-void
-thread_event_assert_invariants_debug(tsd_t *tsd) {
-	uint64_t thread_allocated = thread_allocated_get(tsd);
-	uint64_t last_event = thread_allocated_last_event_get(tsd);
-	uint64_t next_event = thread_allocated_next_event_get(tsd);
-	uint64_t next_event_fast = thread_allocated_next_event_fast_get(tsd);
+static void
+thread_event_assert_invariants_impl(tsd_t *tsd, event_ctx_t *ctx) {
+	uint64_t current_bytes = event_ctx_current_bytes_get(ctx);
+	uint64_t last_event = event_ctx_last_event_get(ctx);
+	uint64_t next_event = event_ctx_next_event_get(ctx);
+	uint64_t next_event_fast = event_ctx_next_event_fast_get(ctx);
 
 	assert(last_event != next_event);
-	if (next_event > THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX ||
+	if (next_event > THREAD_NEXT_EVENT_FAST_MAX ||
 	    !tsd_fast(tsd)) {
 		assert(next_event_fast == 0U);
 	} else {
@@ -138,10 +157,9 @@ thread_event_assert_invariants_debug(tsd_t *tsd) {
 	uint64_t interval = next_event - last_event;
 
 	/* The subtraction is intentionally susceptible to underflow. */
-	assert(thread_allocated - last_event < interval);
-
-	uint64_t min_wait = thread_allocated_next_event_compute(tsd);
-
+	assert(current_bytes - last_event < interval);
+	uint64_t min_wait = thread_next_event_compute(tsd,
+	    event_ctx_is_alloc(ctx));
 	/*
 	 * next_event should have been pushed up only except when no event is
 	 * on and the TSD is just initialized.  The last_event == 0U guard
@@ -153,6 +171,16 @@ thread_event_assert_invariants_debug(tsd_t *tsd) {
 	    (interval < min_wait && interval == THREAD_EVENT_MAX_INTERVAL));
 }
 
+void
+thread_event_assert_invariants_debug(tsd_t *tsd) {
+	event_ctx_t ctx;
+	event_ctx_get(tsd, &ctx, true);
+	thread_event_assert_invariants_impl(tsd, &ctx);
+
+	event_ctx_get(tsd, &ctx, false);
+	thread_event_assert_invariants_impl(tsd, &ctx);
+}
+
 /*
  * Synchronization around the fast threshold in tsd --
  * There are two threads to consider in the synchronization here:
@@ -200,39 +228,50 @@ thread_event_assert_invariants_debug(tsd_t *tsd) {
  * of the owner thread's next_event_fast, but that's always safe (it just sends
  * it down the slow path earlier).
  */
+static void
+event_ctx_next_event_fast_update(event_ctx_t *ctx) {
+	uint64_t next_event = event_ctx_next_event_get(ctx);
+	uint64_t next_event_fast = (next_event <=
+	    THREAD_NEXT_EVENT_FAST_MAX) ? next_event : 0U;
+	event_ctx_next_event_fast_set(ctx, next_event_fast);
+}
+
 void
 thread_event_recompute_fast_threshold(tsd_t *tsd) {
 	if (tsd_state_get(tsd) != tsd_state_nominal) {
 		/* Check first because this is also called on purgatory. */
-		thread_allocated_next_event_fast_set_non_nominal(tsd);
+		thread_next_event_fast_set_non_nominal(tsd);
 		return;
 	}
-	uint64_t next_event = thread_allocated_next_event_get(tsd);
-	uint64_t next_event_fast = (next_event <=
-	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX) ? next_event : 0U;
-	thread_allocated_next_event_fast_set(tsd, next_event_fast);
+
+	event_ctx_t ctx;
+	event_ctx_get(tsd, &ctx, true);
+	event_ctx_next_event_fast_update(&ctx);
+	event_ctx_get(tsd, &ctx, false);
+	event_ctx_next_event_fast_update(&ctx);
 
 	atomic_fence(ATOMIC_SEQ_CST);
 	if (tsd_state_get(tsd) != tsd_state_nominal) {
-		thread_allocated_next_event_fast_set_non_nominal(tsd);
+		thread_next_event_fast_set_non_nominal(tsd);
 	}
 }
 
 static void
-thread_event_adjust_thresholds_helper(tsd_t *tsd, uint64_t wait) {
+thread_event_adjust_thresholds_helper(tsd_t *tsd, event_ctx_t *ctx,
+    uint64_t wait) {
 	assert(wait <= THREAD_EVENT_MAX_START_WAIT);
-	uint64_t next_event = thread_allocated_last_event_get(tsd) + (wait <=
+	uint64_t next_event = event_ctx_last_event_get(ctx) + (wait <=
 	    THREAD_EVENT_MAX_INTERVAL ? wait : THREAD_EVENT_MAX_INTERVAL);
-	thread_allocated_next_event_set(tsd, next_event);
+	event_ctx_next_event_set(tsd, ctx, next_event);
 }
 
 static uint64_t
 thread_event_trigger_batch_update(tsd_t *tsd, uint64_t accumbytes,
-    bool allow_event_trigger) {
+    bool is_alloc, bool allow_event_trigger) {
 	uint64_t wait = THREAD_EVENT_MAX_START_WAIT;
 
-#define E(event, condition)						\
-	if (condition) {						\
+#define E(event, condition, alloc_event)				\
+	if (is_alloc == alloc_event && condition) {			\
 		uint64_t event_wait = event##_event_wait_get(tsd);	\
 		assert(event_wait <= THREAD_EVENT_MAX_START_WAIT);	\
 		if (event_wait > accumbytes) {				\
@@ -267,28 +306,30 @@ thread_event_trigger_batch_update(tsd_t *tsd, uint64_t accumbytes,
 }
 
 void
-thread_event_trigger(tsd_t *tsd, bool delay_event) {
+thread_event_trigger(tsd_t *tsd, event_ctx_t *ctx, bool delay_event) {
 	/* usize has already been added to thread_allocated. */
-	uint64_t thread_allocated_after = thread_allocated_get(tsd);
+	uint64_t bytes_after = event_ctx_current_bytes_get(ctx);
 
 	/* The subtraction is intentionally susceptible to underflow. */
-	uint64_t accumbytes = thread_allocated_after -
-	    thread_allocated_last_event_get(tsd);
+	uint64_t accumbytes = bytes_after - event_ctx_last_event_get(ctx);
 
 	/* Make sure that accumbytes cannot overflow uint64_t. */
 	assert(THREAD_EVENT_MAX_INTERVAL <= UINT64_MAX - SC_LARGE_MAXCLASS + 1);
 
-	thread_allocated_last_event_set(tsd, thread_allocated_after);
+	event_ctx_last_event_set(ctx, bytes_after);
 	bool allow_event_trigger = !delay_event && tsd_nominal(tsd) &&
 	    tsd_reentrancy_level_get(tsd) == 0;
+
+	bool is_alloc = ctx->is_alloc;
 	uint64_t wait = thread_event_trigger_batch_update(tsd, accumbytes,
-	    allow_event_trigger);
-	thread_event_adjust_thresholds_helper(tsd, wait);
+	    is_alloc, allow_event_trigger);
+	thread_event_adjust_thresholds_helper(tsd, ctx, wait);
 
 	thread_event_assert_invariants(tsd);
 
-#define E(event, condition)						\
-	if (condition && event##_event_wait_get(tsd) == 0U) {		\
+#define E(event, condition, alloc_event)				\
+	if (is_alloc == alloc_event && condition &&			\
+	    event##_event_wait_get(tsd) == 0U) {			\
 		assert(allow_event_trigger);				\
 		thread_##event##_event_handler(tsd);			\
 	}
@@ -300,19 +341,23 @@ thread_event_trigger(tsd_t *tsd, bool delay_event) {
 }
 
 void
-thread_event_rollback(tsd_t *tsd, size_t diff) {
+thread_alloc_event_rollback(tsd_t *tsd, size_t diff) {
 	thread_event_assert_invariants(tsd);
 
 	if (diff == 0U) {
 		return;
 	}
 
-	uint64_t thread_allocated = thread_allocated_get(tsd);
+	/* Rollback happens only on alloc events. */
+	event_ctx_t ctx;
+	event_ctx_get(tsd, &ctx, true);
+
+	uint64_t thread_allocated = event_ctx_current_bytes_get(&ctx);
 	/* The subtraction is intentionally susceptible to underflow. */
 	uint64_t thread_allocated_rollback = thread_allocated - diff;
-	thread_allocated_set(tsd, thread_allocated_rollback);
+	event_ctx_current_bytes_set(&ctx, thread_allocated_rollback);
 
-	uint64_t last_event = thread_allocated_last_event_get(tsd);
+	uint64_t last_event = event_ctx_last_event_get(&ctx);
 	/* Both subtractions are intentionally susceptible to underflow. */
 	if (thread_allocated_rollback - last_event <=
 	    thread_allocated - last_event) {
@@ -320,14 +365,14 @@ thread_event_rollback(tsd_t *tsd, size_t diff) {
 		return;
 	}
 
-	thread_allocated_last_event_set(tsd, thread_allocated_rollback);
+	event_ctx_last_event_set(&ctx, thread_allocated_rollback);
 
 	/* The subtraction is intentionally susceptible to underflow. */
 	uint64_t wait_diff = last_event - thread_allocated_rollback;
 	assert(wait_diff <= diff);
 
-#define E(event, condition)						\
-	if (condition) {						\
+#define E(event, condition, alloc_event)				\
+	if (alloc_event == true && condition) {				\
 		uint64_t event_wait = event##_event_wait_get(tsd);	\
 		assert(event_wait <= THREAD_EVENT_MAX_START_WAIT);	\
 		if (event_wait > 0U) {					\
@@ -347,27 +392,29 @@ thread_event_rollback(tsd_t *tsd, size_t diff) {
 	ITERATE_OVER_ALL_EVENTS
 #undef E
 
-	thread_event_update(tsd);
+	thread_event_update(tsd, true);
 }
 
 void
-thread_event_update(tsd_t *tsd) {
-	uint64_t wait = thread_allocated_next_event_compute(tsd);
-	thread_event_adjust_thresholds_helper(tsd, wait);
+thread_event_update(tsd_t *tsd, bool is_alloc) {
+	event_ctx_t ctx;
+	event_ctx_get(tsd, &ctx, is_alloc);
 
-	uint64_t last_event = thread_allocated_last_event_get(tsd);
+	uint64_t wait = thread_next_event_compute(tsd, is_alloc);
+	thread_event_adjust_thresholds_helper(tsd, &ctx, wait);
 
+	uint64_t last_event = event_ctx_last_event_get(&ctx);
 	/* Both subtractions are intentionally susceptible to underflow. */
-	if (thread_allocated_get(tsd) - last_event >=
-	    thread_allocated_next_event_get(tsd) - last_event) {
-		thread_event_trigger(tsd, true);
+	if (event_ctx_current_bytes_get(&ctx) - last_event >=
+	    event_ctx_next_event_get(&ctx) - last_event) {
+		thread_event_trigger(tsd, &ctx, true);
 	} else {
 		thread_event_assert_invariants(tsd);
 	}
 }
 
 void thread_event_boot() {
-#define E(event, condition)						\
+#define E(event, condition, ignored)					\
 	if (condition) {						\
 		thread_event_active = true;				\
 	}
@@ -377,7 +424,7 @@ void thread_event_boot() {
 }
 
 void tsd_thread_event_init(tsd_t *tsd) {
-#define E(event, condition)						\
+#define E(event, condition, is_alloc_event_unused)			\
 	if (condition) {						\
 		tsd_thread_##event##_event_init(tsd);			\
 	}
diff --git a/src/tsd.c b/src/tsd.c
index 940ff7d2..54e5b4af 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -119,7 +119,7 @@ tsd_force_recompute(tsdn_t *tsdn) {
 		    tsd_state_nominal_recompute, ATOMIC_RELAXED);
 		/* See comments in thread_event_recompute_fast_threshold(). */
 		atomic_fence(ATOMIC_SEQ_CST);
-		thread_allocated_next_event_fast_set_non_nominal(remote_tsd);
+		thread_next_event_fast_set_non_nominal(remote_tsd);
 	}
 	malloc_mutex_unlock(tsdn, &tsd_nominal_tsds_lock);
 }
diff --git a/test/unit/thread_event.c b/test/unit/thread_event.c
index f016cc5d..db2d637e 100644
--- a/test/unit/thread_event.c
+++ b/test/unit/thread_event.c
@@ -2,14 +2,18 @@
 
 TEST_BEGIN(test_next_event_fast_roll_back) {
 	tsd_t *tsd = tsd_fetch();
-	thread_allocated_last_event_set(tsd, 0);
-	thread_allocated_set(tsd,
-	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX - 8U);
-	thread_allocated_next_event_set(tsd,
-	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX);
-#define E(event, condition)						\
-	event##_event_wait_set(tsd,					\
-	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX);
+	event_ctx_t ctx;
+	event_ctx_get(tsd, &ctx, true);
+
+	event_ctx_last_event_set(&ctx, 0);
+	event_ctx_current_bytes_set(&ctx,
+	    THREAD_NEXT_EVENT_FAST_MAX - 8U);
+	event_ctx_next_event_set(tsd, &ctx,
+	    THREAD_NEXT_EVENT_FAST_MAX);
+#define E(event, condition, is_alloc)					\
+	if (is_alloc && condition) {					\
+		event##_event_wait_set(tsd, THREAD_NEXT_EVENT_FAST_MAX);\
+	}
 	ITERATE_OVER_ALL_EVENTS
 #undef E
 	void *p = malloc(16U);
@@ -20,14 +24,20 @@ TEST_END
 
 TEST_BEGIN(test_next_event_fast_resume) {
 	tsd_t *tsd = tsd_fetch();
-	thread_allocated_last_event_set(tsd, 0);
-	thread_allocated_set(tsd,
-	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX + 8U);
-	thread_allocated_next_event_set(tsd,
-	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX + 16U);
-#define E(event, condition)						\
-	event##_event_wait_set(tsd,					\
-	    THREAD_ALLOCATED_NEXT_EVENT_FAST_MAX + 16U);
+
+	event_ctx_t ctx;
+	event_ctx_get(tsd, &ctx, true);
+
+	event_ctx_last_event_set(&ctx, 0);
+	event_ctx_current_bytes_set(&ctx,
+	    THREAD_NEXT_EVENT_FAST_MAX + 8U);
+	event_ctx_next_event_set(tsd, &ctx,
+	    THREAD_NEXT_EVENT_FAST_MAX + 16U);
+#define E(event, condition, is_alloc)					\
+	if (is_alloc && condition) {					\
+		event##_event_wait_set(tsd,				\
+		    THREAD_NEXT_EVENT_FAST_MAX + 16U);			\
+	}
 	ITERATE_OVER_ALL_EVENTS
 #undef E
 	void *p = malloc(SC_LOOKUP_MAXCLASS);
@@ -42,7 +52,7 @@ TEST_BEGIN(test_event_rollback) {
 	size_t count = 10;
 	uint64_t thread_allocated = thread_allocated_get(tsd);
 	while (count-- != 0) {
-		thread_event_rollback(tsd, diff);
+		thread_alloc_event_rollback(tsd, diff);
 		uint64_t thread_allocated_after = thread_allocated_get(tsd);
 		assert_u64_eq(thread_allocated - thread_allocated_after, diff,
 		    "thread event counters are not properly rolled back");

From 5e500523a056d7330e2223627ecdfb565d88e070 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 29 Jan 2020 20:29:05 -0800
Subject: [PATCH 1520/2608] Remove thread_event_boot().

---
 include/jemalloc/internal/thread_event.h |  1 -
 src/jemalloc.c                           |  1 -
 src/thread_event.c                       | 36 +++++++++---------------
 3 files changed, 13 insertions(+), 25 deletions(-)

diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index 33cbcbe7..383af30a 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -34,7 +34,6 @@ void thread_event_assert_invariants_debug(tsd_t *tsd);
 void thread_event_trigger(tsd_t *tsd, event_ctx_t *ctx, bool delay_event);
 void thread_alloc_event_rollback(tsd_t *tsd, size_t diff);
 void thread_event_update(tsd_t *tsd, bool alloc_event);
-void thread_event_boot();
 void thread_event_recompute_fast_threshold(tsd_t *tsd);
 void tsd_thread_event_init(tsd_t *tsd);
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 60565df7..e4ef7f3a 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1580,7 +1580,6 @@ malloc_init_hard_a0_locked() {
 	if (config_prof) {
 		prof_boot1();
 	}
-	thread_event_boot();
 	arena_boot(&sc_data);
 	if (tcache_boot(TSDN_NULL)) {
 		return true;
diff --git a/src/thread_event.c b/src/thread_event.c
index 5bdc4aef..0fbdebee 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -4,13 +4,6 @@
 
 #include "jemalloc/internal/thread_event.h"
 
-/*
- * There's no lock for thread_event_active because write is only done in
- * malloc_init(), where init_lock there serves as the guard, and ever since
- * then thread_event_active becomes read only.
- */
-static bool thread_event_active = false;
-
 /* TSD event init function signatures. */
 #define E(event, condition_unused, is_alloc_event_unused)		\
 static void tsd_thread_##event##_event_init(tsd_t *tsd);
@@ -114,14 +107,23 @@ thread_stats_interval_event_handler(tsd_t *tsd) {
 }
 /* Per event facilities done. */
 
+static bool
+event_ctx_has_active_events(event_ctx_t *ctx) {
+	assert(config_debug);
+#define E(event, condition, alloc_event)			       \
+	if (condition && alloc_event == ctx->is_alloc) {	       \
+		return true;					       \
+	}
+	ITERATE_OVER_ALL_EVENTS
+#undef E
+	return false;
+}
+
 static uint64_t
 thread_next_event_compute(tsd_t *tsd, bool is_alloc) {
 	uint64_t wait = THREAD_EVENT_MAX_START_WAIT;
-	bool no_event_on = true;
-
 #define E(event, condition, alloc_event)				\
 	if (is_alloc == alloc_event && condition) {			\
-		no_event_on = false;					\
 		uint64_t event_wait =					\
 		    event##_event_wait_get(tsd);			\
 		assert(event_wait <= THREAD_EVENT_MAX_START_WAIT);	\
@@ -132,8 +134,6 @@ thread_next_event_compute(tsd_t *tsd, bool is_alloc) {
 
 	ITERATE_OVER_ALL_EVENTS
 #undef E
-
-	assert(no_event_on == !thread_event_active);
 	assert(wait <= THREAD_EVENT_MAX_START_WAIT);
 	return wait;
 }
@@ -166,7 +166,7 @@ thread_event_assert_invariants_impl(tsd_t *tsd, event_ctx_t *ctx) {
 	 * below is stronger than needed, but having an exactly accurate guard
 	 * is more complicated to implement.
 	 */
-	assert((!thread_event_active && last_event == 0U) ||
+	assert((!event_ctx_has_active_events(ctx) && last_event == 0U) ||
 	    interval == min_wait ||
 	    (interval < min_wait && interval == THREAD_EVENT_MAX_INTERVAL));
 }
@@ -413,16 +413,6 @@ thread_event_update(tsd_t *tsd, bool is_alloc) {
 	}
 }
 
-void thread_event_boot() {
-#define E(event, condition, ignored)					\
-	if (condition) {						\
-		thread_event_active = true;				\
-	}
-
-	ITERATE_OVER_ALL_EVENTS
-#undef E
-}
-
 void tsd_thread_event_init(tsd_t *tsd) {
 #define E(event, condition, is_alloc_event_unused)			\
 	if (condition) {						\

From e8965226168cdcb359f6db39fdf4c216b47a60cf Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 30 Jan 2020 16:31:45 -0800
Subject: [PATCH 1521/2608] Abbreviate thread-event to te.

---
 include/jemalloc/internal/thread_event.h | 126 +++++++------
 include/jemalloc/internal/tsd.h          |  18 +-
 src/jemalloc.c                           |  17 +-
 src/prof.c                               |   5 +-
 src/thread_event.c                       | 217 +++++++++++------------
 src/tsd.c                                |  10 +-
 test/unit/thread_event.c                 |  32 ++--
 7 files changed, 201 insertions(+), 224 deletions(-)

diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index 383af30a..d528c051 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -3,39 +3,40 @@
 
 #include "jemalloc/internal/tsd.h"
 
+/* "te" is short for "thread_event" */
+
 /*
  * Maximum threshold on thread_(de)allocated_next_event_fast, so that there is
  * no need to check overflow in malloc fast path. (The allocation size in malloc
  * fast path never exceeds SC_LOOKUP_MAXCLASS.)
  */
-#define THREAD_NEXT_EVENT_FAST_MAX				\
-    (UINT64_MAX - SC_LOOKUP_MAXCLASS + 1U)
+#define TE_NEXT_EVENT_FAST_MAX (UINT64_MAX - SC_LOOKUP_MAXCLASS + 1U)
 
 /*
  * The max interval helps make sure that malloc stays on the fast path in the
  * common case, i.e. thread_allocated < thread_allocated_next_event_fast.  When
- * thread_allocated is within an event's distance to THREAD_NEXT_EVENT_FAST_MAX
+ * thread_allocated is within an event's distance to TE_NEXT_EVENT_FAST_MAX
  * above, thread_allocated_next_event_fast is wrapped around and we fall back to
  * the medium-fast path. The max interval makes sure that we're not staying on
  * the fallback case for too long, even if there's no active event or if all
  * active events have long wait times.
  */
-#define THREAD_EVENT_MAX_INTERVAL ((uint64_t)(4U << 20))
+#define TE_MAX_INTERVAL ((uint64_t)(4U << 20))
 
-typedef struct event_ctx_s {
+typedef struct te_ctx_s {
 	bool is_alloc;
 	uint64_t *current;
 	uint64_t *last_event;
 	uint64_t *next_event;
 	uint64_t *next_event_fast;
-} event_ctx_t;
+} te_ctx_t;
 
-void thread_event_assert_invariants_debug(tsd_t *tsd);
-void thread_event_trigger(tsd_t *tsd, event_ctx_t *ctx, bool delay_event);
-void thread_alloc_event_rollback(tsd_t *tsd, size_t diff);
-void thread_event_update(tsd_t *tsd, bool alloc_event);
-void thread_event_recompute_fast_threshold(tsd_t *tsd);
-void tsd_thread_event_init(tsd_t *tsd);
+void te_assert_invariants_debug(tsd_t *tsd);
+void te_event_trigger(tsd_t *tsd, te_ctx_t *ctx, bool delay_event);
+void te_alloc_rollback(tsd_t *tsd, size_t diff);
+void te_event_update(tsd_t *tsd, bool alloc_event);
+void te_recompute_fast_threshold(tsd_t *tsd);
+void tsd_te_init(tsd_t *tsd);
 
 /*
  * List of all events, in the following format:
@@ -97,21 +98,16 @@ ITERATE_OVER_ALL_COUNTERS
  *
  * Note that these can only be used on the fastpath.
  */
-JEMALLOC_ALWAYS_INLINE uint64_t
-thread_allocated_malloc_fastpath(tsd_t *tsd) {
-	return *tsd_thread_allocatedp_get_unsafe(tsd);
-}
-
-JEMALLOC_ALWAYS_INLINE uint64_t
-thread_allocated_next_event_malloc_fastpath(tsd_t *tsd) {
-	uint64_t v = *tsd_thread_allocated_next_event_fastp_get_unsafe(tsd);
-	assert(v <= THREAD_NEXT_EVENT_FAST_MAX);
-	return v;
+JEMALLOC_ALWAYS_INLINE void
+te_malloc_fastpath_ctx(tsd_t *tsd, uint64_t *allocated, uint64_t *threshold) {
+	*allocated = *tsd_thread_allocatedp_get_unsafe(tsd);
+	*threshold = *tsd_thread_allocated_next_event_fastp_get_unsafe(tsd);
+	assert(*threshold <= TE_NEXT_EVENT_FAST_MAX);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-thread_event_free_fastpath_ctx(tsd_t *tsd, uint64_t *deallocated,
-    uint64_t *threshold, bool size_hint) {
+te_free_fastpath_ctx(tsd_t *tsd, uint64_t *deallocated, uint64_t *threshold,
+    bool size_hint) {
 	if (!size_hint) {
 		*deallocated = tsd_thread_deallocated_get(tsd);
 		*threshold = tsd_thread_deallocated_next_event_fast_get(tsd);
@@ -121,50 +117,50 @@ thread_event_free_fastpath_ctx(tsd_t *tsd, uint64_t *deallocated,
 		*threshold =
 		    *tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd);
 	}
-	assert(*threshold <= THREAD_NEXT_EVENT_FAST_MAX);
+	assert(*threshold <= TE_NEXT_EVENT_FAST_MAX);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
-event_ctx_is_alloc(event_ctx_t *ctx) {
+te_ctx_is_alloc(te_ctx_t *ctx) {
 	return ctx->is_alloc;
 }
 
 JEMALLOC_ALWAYS_INLINE uint64_t
-event_ctx_current_bytes_get(event_ctx_t *ctx) {
+te_ctx_current_bytes_get(te_ctx_t *ctx) {
 	return *ctx->current;
 }
 
 JEMALLOC_ALWAYS_INLINE void
-event_ctx_current_bytes_set(event_ctx_t *ctx, uint64_t v) {
+te_ctx_current_bytes_set(te_ctx_t *ctx, uint64_t v) {
 	*ctx->current = v;
 }
 
 JEMALLOC_ALWAYS_INLINE uint64_t
-event_ctx_last_event_get(event_ctx_t *ctx) {
+te_ctx_last_event_get(te_ctx_t *ctx) {
 	return *ctx->last_event;
 }
 
 JEMALLOC_ALWAYS_INLINE void
-event_ctx_last_event_set(event_ctx_t *ctx, uint64_t v) {
+te_ctx_last_event_set(te_ctx_t *ctx, uint64_t v) {
 	*ctx->last_event = v;
 }
 
 /* Below 3 for next_event_fast. */
 JEMALLOC_ALWAYS_INLINE uint64_t
-event_ctx_next_event_fast_get(event_ctx_t *ctx) {
+te_ctx_next_event_fast_get(te_ctx_t *ctx) {
 	uint64_t v = *ctx->next_event_fast;
-	assert(v <= THREAD_NEXT_EVENT_FAST_MAX);
+	assert(v <= TE_NEXT_EVENT_FAST_MAX);
 	return v;
 }
 
 JEMALLOC_ALWAYS_INLINE void
-event_ctx_next_event_fast_set(event_ctx_t *ctx, uint64_t v) {
-	assert(v <= THREAD_NEXT_EVENT_FAST_MAX);
+te_ctx_next_event_fast_set(te_ctx_t *ctx, uint64_t v) {
+	assert(v <= TE_NEXT_EVENT_FAST_MAX);
 	*ctx->next_event_fast = v;
 }
 
 JEMALLOC_ALWAYS_INLINE void
-thread_next_event_fast_set_non_nominal(tsd_t *tsd) {
+te_next_event_fast_set_non_nominal(tsd_t *tsd) {
 	/*
 	 * Set the fast thresholds to zero when tsd is non-nominal.  Use the
 	 * unsafe getter as this may get called during tsd init and clean up.
@@ -175,14 +171,14 @@ thread_next_event_fast_set_non_nominal(tsd_t *tsd) {
 
 /* For next_event.  Setter also updates the fast threshold. */
 JEMALLOC_ALWAYS_INLINE uint64_t
-event_ctx_next_event_get(event_ctx_t *ctx) {
+te_ctx_next_event_get(te_ctx_t *ctx) {
 	return *ctx->next_event;
 }
 
 JEMALLOC_ALWAYS_INLINE void
-event_ctx_next_event_set(tsd_t *tsd, event_ctx_t *ctx, uint64_t v) {
+te_ctx_next_event_set(tsd_t *tsd, te_ctx_t *ctx, uint64_t v) {
 	*ctx->next_event = v;
-	thread_event_recompute_fast_threshold(tsd);
+	te_recompute_fast_threshold(tsd);
 }
 
 /*
@@ -190,22 +186,22 @@ event_ctx_next_event_set(tsd_t *tsd, event_ctx_t *ctx, uint64_t v) {
  * a consistent state, which forms the invariants before and after each round
  * of thread event handling that we can rely on and need to promise.
  * The invariants are only temporarily violated in the middle of:
- * (a) thread_event() if an event is triggered (the thread_event_trigger() call
+ * (a) event_advance() if an event is triggered (the te_event_trigger() call
  *     at the end will restore the invariants),
- * (b) thread_##event##_event_update() (the thread_event_update() call at the
+ * (b) te_##event##_event_update() (the te_event_update() call at the
  *     end will restore the invariants), or
- * (c) thread_alloc_event_rollback() if the rollback falls below the last_event
- *     (the thread_event_update() call at the end will restore the invariants).
+ * (c) te_alloc_rollback() if the rollback falls below the last_event
+ *     (the te_event_update() call at the end will restore the invariants).
  */
 JEMALLOC_ALWAYS_INLINE void
-thread_event_assert_invariants(tsd_t *tsd) {
+te_assert_invariants(tsd_t *tsd) {
 	if (config_debug) {
-		thread_event_assert_invariants_debug(tsd);
+		te_assert_invariants_debug(tsd);
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
-event_ctx_get(tsd_t *tsd, event_ctx_t *ctx, bool is_alloc) {
+te_ctx_get(tsd_t *tsd, te_ctx_t *ctx, bool is_alloc) {
 	ctx->is_alloc = is_alloc;
 	if (is_alloc) {
 		ctx->current = tsd_thread_allocatedp_get(tsd);
@@ -223,51 +219,51 @@ event_ctx_get(tsd_t *tsd, event_ctx_t *ctx, bool is_alloc) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-thread_event_advance(tsd_t *tsd, size_t usize, bool is_alloc) {
-	thread_event_assert_invariants(tsd);
+te_event_advance(tsd_t *tsd, size_t usize, bool is_alloc) {
+	te_assert_invariants(tsd);
 
-	event_ctx_t ctx;
-	event_ctx_get(tsd, &ctx, is_alloc);
+	te_ctx_t ctx;
+	te_ctx_get(tsd, &ctx, is_alloc);
 
-	uint64_t bytes_before = event_ctx_current_bytes_get(&ctx);
-	event_ctx_current_bytes_set(&ctx, bytes_before + usize);
+	uint64_t bytes_before = te_ctx_current_bytes_get(&ctx);
+	te_ctx_current_bytes_set(&ctx, bytes_before + usize);
 
 	/* The subtraction is intentionally susceptible to underflow. */
-	if (likely(usize < event_ctx_next_event_get(&ctx) - bytes_before)) {
-		thread_event_assert_invariants(tsd);
+	if (likely(usize < te_ctx_next_event_get(&ctx) - bytes_before)) {
+		te_assert_invariants(tsd);
 	} else {
-		thread_event_trigger(tsd, &ctx, false);
+		te_event_trigger(tsd, &ctx, false);
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
 thread_dalloc_event(tsd_t *tsd, size_t usize) {
-	thread_event_advance(tsd, usize, false);
+	te_event_advance(tsd, usize, false);
 }
 
 JEMALLOC_ALWAYS_INLINE void
 thread_alloc_event(tsd_t *tsd, size_t usize) {
-	thread_event_advance(tsd, usize, true);
+	te_event_advance(tsd, usize, true);
 }
 
 #define E(event, condition, is_alloc)					\
 JEMALLOC_ALWAYS_INLINE void						\
-thread_##event##_event_update(tsd_t *tsd, uint64_t event_wait) {	\
-	thread_event_assert_invariants(tsd);				\
+te_##event##_event_update(tsd_t *tsd, uint64_t event_wait) {		\
+	te_assert_invariants(tsd);					\
 	assert(condition);						\
 	assert(tsd_nominal(tsd));					\
 	assert(tsd_reentrancy_level_get(tsd) == 0);			\
 	assert(event_wait > 0U);					\
-	if (THREAD_EVENT_MIN_START_WAIT > 1U &&				\
-	    unlikely(event_wait < THREAD_EVENT_MIN_START_WAIT)) {	\
-		event_wait = THREAD_EVENT_MIN_START_WAIT;		\
+	if (TE_MIN_START_WAIT > 1U &&					\
+	    unlikely(event_wait < TE_MIN_START_WAIT)) {			\
+		event_wait = TE_MIN_START_WAIT;				\
 	}								\
-	if (THREAD_EVENT_MAX_START_WAIT < UINT64_MAX &&			\
-	    unlikely(event_wait > THREAD_EVENT_MAX_START_WAIT)) {	\
-		event_wait = THREAD_EVENT_MAX_START_WAIT;		\
+	if (TE_MAX_START_WAIT < UINT64_MAX &&				\
+	    unlikely(event_wait > TE_MAX_START_WAIT)) {			\
+		event_wait = TE_MAX_START_WAIT;				\
 	}								\
 	event##_event_wait_set(tsd, event_wait);			\
-	thread_event_update(tsd, is_alloc);				\
+	te_event_update(tsd, is_alloc);					\
 }
 
 ITERATE_OVER_ALL_EVENTS
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 6868ce4b..163ffc4b 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -104,10 +104,10 @@ typedef void (*test_callback_t)(int *);
     MALLOC_TEST_TSD
 
 /*
- * THREAD_EVENT_MIN_START_WAIT should not exceed the minimal allocation usize.
+ * TE_MIN_START_WAIT should not exceed the minimal allocation usize.
  */
-#define THREAD_EVENT_MIN_START_WAIT ((uint64_t)1U)
-#define THREAD_EVENT_MAX_START_WAIT UINT64_MAX
+#define TE_MIN_START_WAIT ((uint64_t)1U)
+#define TE_MAX_START_WAIT UINT64_MAX
 
 #define TSD_INITIALIZER {						\
     /* state */			ATOMIC_INIT(tsd_state_uninitialized),	\
@@ -121,14 +121,14 @@ typedef void (*test_callback_t)(int *);
     /* thread_deallocated_next_event_fast */	0,			\
     /* rtree_ctx */		RTREE_CTX_ZERO_INITIALIZER,		\
     /* thread_allocated_last_event */	0,				\
-    /* thread_allocated_next_event */	THREAD_EVENT_MIN_START_WAIT,	\
+    /* thread_allocated_next_event */	TE_MIN_START_WAIT,		\
     /* thread_deallocated_last_event */	0,				\
-    /* thread_deallocated_next_event */	THREAD_EVENT_MIN_START_WAIT,	\
-    /* tcache_gc_event_wait */		THREAD_EVENT_MIN_START_WAIT,	\
-    /* tcache_gc_dalloc_event_wait */	THREAD_EVENT_MIN_START_WAIT,	\
-    /* prof_sample_event_wait */	THREAD_EVENT_MIN_START_WAIT,	\
+    /* thread_deallocated_next_event */	TE_MIN_START_WAIT,		\
+    /* tcache_gc_event_wait */		TE_MIN_START_WAIT,		\
+    /* tcache_gc_dalloc_event_wait */	TE_MIN_START_WAIT,		\
+    /* prof_sample_event_wait */	TE_MIN_START_WAIT,		\
     /* prof_sample_last_event */	0,				\
-    /* stats_interval_event_wait */	THREAD_EVENT_MIN_START_WAIT,	\
+    /* stats_interval_event_wait */	TE_MIN_START_WAIT,		\
     /* stats_interval_last_event */	0,				\
     /* prof_tdata */		NULL,					\
     /* prng_state */		0,					\
diff --git a/src/jemalloc.c b/src/jemalloc.c
index e4ef7f3a..190b3a2a 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2180,7 +2180,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		}
 
 		if (unlikely(allocation == NULL)) {
-			thread_alloc_event_rollback(tsd, usize);
+			te_alloc_rollback(tsd, usize);
 			prof_alloc_rollback(tsd, tctx, true);
 			goto label_oom;
 		}
@@ -2190,7 +2190,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		allocation = imalloc_no_sample(sopts, dopts, tsd, size, usize,
 		    ind);
 		if (unlikely(allocation == NULL)) {
-			thread_alloc_event_rollback(tsd, usize);
+			te_alloc_rollback(tsd, usize);
 			goto label_oom;
 		}
 	}
@@ -2386,15 +2386,14 @@ je_malloc(size_t size) {
 	 * it's not always needed in the core allocation logic.
 	 */
 	size_t usize;
-
 	sz_size2index_usize_fastpath(size, &ind, &usize);
 	/* Fast path relies on size being a bin. */
 	assert(ind < SC_NBINS);
 	assert((SC_LOOKUP_MAXCLASS < SC_SMALL_MAXCLASS) &&
 	    (size <= SC_SMALL_MAXCLASS));
 
-	uint64_t allocated = thread_allocated_malloc_fastpath(tsd);
-	uint64_t threshold = thread_allocated_next_event_malloc_fastpath(tsd);
+	uint64_t allocated, threshold;
+	te_malloc_fastpath_ctx(tsd, &allocated, &threshold);
 	uint64_t allocated_after = allocated + usize;
 	/*
 	 * The ind and usize might be uninitialized (or partially) before
@@ -2729,7 +2728,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 		szind = sz_size2index_lookup(size);
 	}
 	uint64_t deallocated, threshold;
-	thread_event_free_fastpath_ctx(tsd, &deallocated, &threshold, size_hint);
+	te_free_fastpath_ctx(tsd, &deallocated, &threshold, size_hint);
 
 	size_t usize = sz_index2size(szind);
 	uint64_t deallocated_after = deallocated + usize;
@@ -3161,7 +3160,7 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 		p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize,
 		    zero, tcache, arena, &alloc_ctx, &hook_args);
 		if (unlikely(p == NULL)) {
-			thread_alloc_event_rollback(tsd, usize);
+			te_alloc_rollback(tsd, usize);
 			goto label_oom;
 		}
 	} else {
@@ -3362,7 +3361,7 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 		    extra, alignment, zero);
 	}
 	if (usize <= usize_max) {
-		thread_alloc_event_rollback(tsd, usize_max - usize);
+		te_alloc_rollback(tsd, usize_max - usize);
 	} else {
 		/*
 		 * For downsizing request, usize_max can be less than usize.
@@ -3460,7 +3459,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	assert(iealloc(tsd_tsdn(tsd), ptr) == old_edata);
 
 	if (unlikely(usize == old_usize)) {
-		thread_alloc_event_rollback(tsd, usize);
+		te_alloc_rollback(tsd, usize);
 		goto label_not_resized;
 	}
 	thread_dalloc_event(tsd, old_usize);
diff --git a/src/prof.c b/src/prof.c
index 0d29c681..248532e8 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -444,8 +444,7 @@ prof_sample_threshold_update(tsd_t *tsd) {
 	}
 
 	if (lg_prof_sample == 0) {
-		thread_prof_sample_event_update(tsd,
-		    THREAD_EVENT_MIN_START_WAIT);
+		te_prof_sample_event_update(tsd, TE_MIN_START_WAIT);
 		return;
 	}
 
@@ -472,7 +471,7 @@ prof_sample_threshold_update(tsd_t *tsd) {
 	uint64_t bytes_until_sample = (uint64_t)(log(u) /
 	    log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
 	    + (uint64_t)1U;
-	thread_prof_sample_event_update(tsd, bytes_until_sample);
+	te_prof_sample_event_update(tsd, bytes_until_sample);
 #endif
 }
 
diff --git a/src/thread_event.c b/src/thread_event.c
index 0fbdebee..dadace38 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -6,46 +6,45 @@
 
 /* TSD event init function signatures. */
 #define E(event, condition_unused, is_alloc_event_unused)		\
-static void tsd_thread_##event##_event_init(tsd_t *tsd);
+static void te_tsd_##event##_event_init(tsd_t *tsd);
 
 ITERATE_OVER_ALL_EVENTS
 #undef E
 
 /* Event handler function signatures. */
 #define E(event, condition_unused, is_alloc_event_unused)		\
-static void thread_##event##_event_handler(tsd_t *tsd);
+static void te_##event##_event_handler(tsd_t *tsd);
 
 ITERATE_OVER_ALL_EVENTS
 #undef E
 
 /* (Re)Init functions. */
 static void
-tsd_thread_tcache_gc_event_init(tsd_t *tsd) {
+te_tsd_tcache_gc_event_init(tsd_t *tsd) {
 	assert(TCACHE_GC_INCR_BYTES > 0);
-	thread_tcache_gc_event_update(tsd, TCACHE_GC_INCR_BYTES);
+	te_tcache_gc_event_update(tsd, TCACHE_GC_INCR_BYTES);
 }
 
 static void
-tsd_thread_tcache_gc_dalloc_event_init(tsd_t *tsd) {
+te_tsd_tcache_gc_dalloc_event_init(tsd_t *tsd) {
 	assert(TCACHE_GC_INCR_BYTES > 0);
-	thread_tcache_gc_dalloc_event_update(tsd, TCACHE_GC_INCR_BYTES);
+	te_tcache_gc_dalloc_event_update(tsd, TCACHE_GC_INCR_BYTES);
 }
 
 static void
-tsd_thread_prof_sample_event_init(tsd_t *tsd) {
+te_tsd_prof_sample_event_init(tsd_t *tsd) {
 	assert(config_prof && opt_prof);
 	prof_sample_threshold_update(tsd);
 }
 
 static void
-tsd_thread_stats_interval_event_init(tsd_t *tsd) {
+te_tsd_stats_interval_event_init(tsd_t *tsd) {
 	assert(opt_stats_interval >= 0);
 	uint64_t interval = stats_interval_accum_batch_size();
-	thread_stats_interval_event_update(tsd, interval);
+	te_stats_interval_event_update(tsd, interval);
 }
 
 /* Handler functions. */
-
 static void
 tcache_gc_event(tsd_t *tsd) {
 	assert(TCACHE_GC_INCR_BYTES > 0);
@@ -56,21 +55,21 @@ tcache_gc_event(tsd_t *tsd) {
 }
 
 static void
-thread_tcache_gc_event_handler(tsd_t *tsd) {
+te_tcache_gc_event_handler(tsd_t *tsd) {
 	assert(tcache_gc_event_wait_get(tsd) == 0U);
-	tsd_thread_tcache_gc_event_init(tsd);
+	te_tsd_tcache_gc_event_init(tsd);
 	tcache_gc_event(tsd);
 }
 
 static void
-thread_tcache_gc_dalloc_event_handler(tsd_t *tsd) {
+te_tcache_gc_dalloc_event_handler(tsd_t *tsd) {
 	assert(tcache_gc_dalloc_event_wait_get(tsd) == 0U);
-	tsd_thread_tcache_gc_dalloc_event_init(tsd);
+	te_tsd_tcache_gc_dalloc_event_init(tsd);
 	tcache_gc_event(tsd);
 }
 
 static void
-thread_prof_sample_event_handler(tsd_t *tsd) {
+te_prof_sample_event_handler(tsd_t *tsd) {
 	assert(config_prof && opt_prof);
 	assert(prof_sample_event_wait_get(tsd) == 0U);
 	uint64_t last_event = thread_allocated_last_event_get(tsd);
@@ -87,13 +86,13 @@ thread_prof_sample_event_handler(tsd_t *tsd) {
 		 * prof_active is turned on later, the counting for sampling
 		 * can immediately resume as normal.
 		 */
-		thread_prof_sample_event_update(tsd,
+		te_prof_sample_event_update(tsd,
 		    (uint64_t)(1 << lg_prof_sample));
 	}
 }
 
 static void
-thread_stats_interval_event_handler(tsd_t *tsd) {
+te_stats_interval_event_handler(tsd_t *tsd) {
 	assert(opt_stats_interval >= 0);
 	assert(stats_interval_event_wait_get(tsd) == 0U);
 	uint64_t last_event = thread_allocated_last_event_get(tsd);
@@ -103,12 +102,12 @@ thread_stats_interval_event_handler(tsd_t *tsd) {
 	if (stats_interval_accum(tsd, last_event - last_stats_event)) {
 		je_malloc_stats_print(NULL, NULL, opt_stats_interval_opts);
 	}
-	tsd_thread_stats_interval_event_init(tsd);
+	te_tsd_stats_interval_event_init(tsd);
 }
 /* Per event facilities done. */
 
 static bool
-event_ctx_has_active_events(event_ctx_t *ctx) {
+te_ctx_has_active_events(te_ctx_t *ctx) {
 	assert(config_debug);
 #define E(event, condition, alloc_event)			       \
 	if (condition && alloc_event == ctx->is_alloc) {	       \
@@ -120,13 +119,13 @@ event_ctx_has_active_events(event_ctx_t *ctx) {
 }
 
 static uint64_t
-thread_next_event_compute(tsd_t *tsd, bool is_alloc) {
-	uint64_t wait = THREAD_EVENT_MAX_START_WAIT;
+te_next_event_compute(tsd_t *tsd, bool is_alloc) {
+	uint64_t wait = TE_MAX_START_WAIT;
 #define E(event, condition, alloc_event)				\
 	if (is_alloc == alloc_event && condition) {			\
 		uint64_t event_wait =					\
 		    event##_event_wait_get(tsd);			\
-		assert(event_wait <= THREAD_EVENT_MAX_START_WAIT);	\
+		assert(event_wait <= TE_MAX_START_WAIT);		\
 		if (event_wait > 0U && event_wait < wait) {		\
 			wait = event_wait;				\
 		}							\
@@ -134,20 +133,19 @@ thread_next_event_compute(tsd_t *tsd, bool is_alloc) {
 
 	ITERATE_OVER_ALL_EVENTS
 #undef E
-	assert(wait <= THREAD_EVENT_MAX_START_WAIT);
+	assert(wait <= TE_MAX_START_WAIT);
 	return wait;
 }
 
 static void
-thread_event_assert_invariants_impl(tsd_t *tsd, event_ctx_t *ctx) {
-	uint64_t current_bytes = event_ctx_current_bytes_get(ctx);
-	uint64_t last_event = event_ctx_last_event_get(ctx);
-	uint64_t next_event = event_ctx_next_event_get(ctx);
-	uint64_t next_event_fast = event_ctx_next_event_fast_get(ctx);
+te_assert_invariants_impl(tsd_t *tsd, te_ctx_t *ctx) {
+	uint64_t current_bytes = te_ctx_current_bytes_get(ctx);
+	uint64_t last_event = te_ctx_last_event_get(ctx);
+	uint64_t next_event = te_ctx_next_event_get(ctx);
+	uint64_t next_event_fast = te_ctx_next_event_fast_get(ctx);
 
 	assert(last_event != next_event);
-	if (next_event > THREAD_NEXT_EVENT_FAST_MAX ||
-	    !tsd_fast(tsd)) {
+	if (next_event > TE_NEXT_EVENT_FAST_MAX || !tsd_fast(tsd)) {
 		assert(next_event_fast == 0U);
 	} else {
 		assert(next_event_fast == next_event);
@@ -158,27 +156,26 @@ thread_event_assert_invariants_impl(tsd_t *tsd, event_ctx_t *ctx) {
 
 	/* The subtraction is intentionally susceptible to underflow. */
 	assert(current_bytes - last_event < interval);
-	uint64_t min_wait = thread_next_event_compute(tsd,
-	    event_ctx_is_alloc(ctx));
+	uint64_t min_wait = te_next_event_compute(tsd, te_ctx_is_alloc(ctx));
 	/*
 	 * next_event should have been pushed up only except when no event is
 	 * on and the TSD is just initialized.  The last_event == 0U guard
 	 * below is stronger than needed, but having an exactly accurate guard
 	 * is more complicated to implement.
 	 */
-	assert((!event_ctx_has_active_events(ctx) && last_event == 0U) ||
+	assert((!te_ctx_has_active_events(ctx) && last_event == 0U) ||
 	    interval == min_wait ||
-	    (interval < min_wait && interval == THREAD_EVENT_MAX_INTERVAL));
+	    (interval < min_wait && interval == TE_MAX_INTERVAL));
 }
 
 void
-thread_event_assert_invariants_debug(tsd_t *tsd) {
-	event_ctx_t ctx;
-	event_ctx_get(tsd, &ctx, true);
-	thread_event_assert_invariants_impl(tsd, &ctx);
+te_assert_invariants_debug(tsd_t *tsd) {
+	te_ctx_t ctx;
+	te_ctx_get(tsd, &ctx, true);
+	te_assert_invariants_impl(tsd, &ctx);
 
-	event_ctx_get(tsd, &ctx, false);
-	thread_event_assert_invariants_impl(tsd, &ctx);
+	te_ctx_get(tsd, &ctx, false);
+	te_assert_invariants_impl(tsd, &ctx);
 }
 
 /*
@@ -229,66 +226,65 @@ thread_event_assert_invariants_debug(tsd_t *tsd) {
  * it down the slow path earlier).
  */
 static void
-event_ctx_next_event_fast_update(event_ctx_t *ctx) {
-	uint64_t next_event = event_ctx_next_event_get(ctx);
-	uint64_t next_event_fast = (next_event <=
-	    THREAD_NEXT_EVENT_FAST_MAX) ? next_event : 0U;
-	event_ctx_next_event_fast_set(ctx, next_event_fast);
+te_ctx_next_event_fast_update(te_ctx_t *ctx) {
+	uint64_t next_event = te_ctx_next_event_get(ctx);
+	uint64_t next_event_fast = (next_event <= TE_NEXT_EVENT_FAST_MAX) ?
+	    next_event : 0U;
+	te_ctx_next_event_fast_set(ctx, next_event_fast);
 }
 
 void
-thread_event_recompute_fast_threshold(tsd_t *tsd) {
+te_recompute_fast_threshold(tsd_t *tsd) {
 	if (tsd_state_get(tsd) != tsd_state_nominal) {
 		/* Check first because this is also called on purgatory. */
-		thread_next_event_fast_set_non_nominal(tsd);
+		te_next_event_fast_set_non_nominal(tsd);
 		return;
 	}
 
-	event_ctx_t ctx;
-	event_ctx_get(tsd, &ctx, true);
-	event_ctx_next_event_fast_update(&ctx);
-	event_ctx_get(tsd, &ctx, false);
-	event_ctx_next_event_fast_update(&ctx);
+	te_ctx_t ctx;
+	te_ctx_get(tsd, &ctx, true);
+	te_ctx_next_event_fast_update(&ctx);
+	te_ctx_get(tsd, &ctx, false);
+	te_ctx_next_event_fast_update(&ctx);
 
 	atomic_fence(ATOMIC_SEQ_CST);
 	if (tsd_state_get(tsd) != tsd_state_nominal) {
-		thread_next_event_fast_set_non_nominal(tsd);
+		te_next_event_fast_set_non_nominal(tsd);
 	}
 }
 
 static void
-thread_event_adjust_thresholds_helper(tsd_t *tsd, event_ctx_t *ctx,
+te_adjust_thresholds_helper(tsd_t *tsd, te_ctx_t *ctx,
     uint64_t wait) {
-	assert(wait <= THREAD_EVENT_MAX_START_WAIT);
-	uint64_t next_event = event_ctx_last_event_get(ctx) + (wait <=
-	    THREAD_EVENT_MAX_INTERVAL ? wait : THREAD_EVENT_MAX_INTERVAL);
-	event_ctx_next_event_set(tsd, ctx, next_event);
+	assert(wait <= TE_MAX_START_WAIT);
+	uint64_t next_event = te_ctx_last_event_get(ctx) + (wait <=
+	    TE_MAX_INTERVAL ? wait : TE_MAX_INTERVAL);
+	te_ctx_next_event_set(tsd, ctx, next_event);
 }
 
 static uint64_t
-thread_event_trigger_batch_update(tsd_t *tsd, uint64_t accumbytes,
-    bool is_alloc, bool allow_event_trigger) {
-	uint64_t wait = THREAD_EVENT_MAX_START_WAIT;
+te_batch_accum(tsd_t *tsd, uint64_t accumbytes, bool is_alloc,
+    bool allow_event_trigger) {
+	uint64_t wait = TE_MAX_START_WAIT;
 
 #define E(event, condition, alloc_event)				\
 	if (is_alloc == alloc_event && condition) {			\
 		uint64_t event_wait = event##_event_wait_get(tsd);	\
-		assert(event_wait <= THREAD_EVENT_MAX_START_WAIT);	\
+		assert(event_wait <= TE_MAX_START_WAIT);		\
 		if (event_wait > accumbytes) {				\
 			event_wait -= accumbytes;			\
 		} else {						\
 			event_wait = 0U;				\
 			if (!allow_event_trigger) {			\
-				event_wait =				\
-				    THREAD_EVENT_MIN_START_WAIT;	\
+				event_wait = TE_MIN_START_WAIT;		\
 			}						\
 		}							\
-		assert(event_wait <= THREAD_EVENT_MAX_START_WAIT);	\
+		assert(event_wait <= TE_MAX_START_WAIT);		\
 		event##_event_wait_set(tsd, event_wait);		\
 		/*							\
 		 * If there is a single event, then the remaining wait	\
 		 * time may become zero, and we rely on either the	\
-		 * event handler or a thread_event_update() call later	\
+		 * event handler or a te_event_update() call later	\
 		 * to properly set next_event; if there are multiple	\
 		 * events, then	here we can get the minimum remaining	\
 		 * wait time to	the next already set event.		\
@@ -301,72 +297,64 @@ thread_event_trigger_batch_update(tsd_t *tsd, uint64_t accumbytes,
 	ITERATE_OVER_ALL_EVENTS
 #undef E
 
-	assert(wait <= THREAD_EVENT_MAX_START_WAIT);
+	assert(wait <= TE_MAX_START_WAIT);
 	return wait;
 }
 
 void
-thread_event_trigger(tsd_t *tsd, event_ctx_t *ctx, bool delay_event) {
+te_event_trigger(tsd_t *tsd, te_ctx_t *ctx, bool delay_event) {
 	/* usize has already been added to thread_allocated. */
-	uint64_t bytes_after = event_ctx_current_bytes_get(ctx);
-
+	uint64_t bytes_after = te_ctx_current_bytes_get(ctx);
 	/* The subtraction is intentionally susceptible to underflow. */
-	uint64_t accumbytes = bytes_after - event_ctx_last_event_get(ctx);
+	uint64_t accumbytes = bytes_after - te_ctx_last_event_get(ctx);
 
-	/* Make sure that accumbytes cannot overflow uint64_t. */
-	assert(THREAD_EVENT_MAX_INTERVAL <= UINT64_MAX - SC_LARGE_MAXCLASS + 1);
-
-	event_ctx_last_event_set(ctx, bytes_after);
+	te_ctx_last_event_set(ctx, bytes_after);
 	bool allow_event_trigger = !delay_event && tsd_nominal(tsd) &&
 	    tsd_reentrancy_level_get(tsd) == 0;
 
 	bool is_alloc = ctx->is_alloc;
-	uint64_t wait = thread_event_trigger_batch_update(tsd, accumbytes,
-	    is_alloc, allow_event_trigger);
-	thread_event_adjust_thresholds_helper(tsd, ctx, wait);
-
-	thread_event_assert_invariants(tsd);
+	uint64_t wait = te_batch_accum(tsd, accumbytes, is_alloc,
+	    allow_event_trigger);
+	te_adjust_thresholds_helper(tsd, ctx, wait);
 
+	te_assert_invariants(tsd);
 #define E(event, condition, alloc_event)				\
 	if (is_alloc == alloc_event && condition &&			\
 	    event##_event_wait_get(tsd) == 0U) {			\
 		assert(allow_event_trigger);				\
-		thread_##event##_event_handler(tsd);			\
+		te_##event##_event_handler(tsd);			\
 	}
 
 	ITERATE_OVER_ALL_EVENTS
 #undef E
-
-	thread_event_assert_invariants(tsd);
+	te_assert_invariants(tsd);
 }
 
 void
-thread_alloc_event_rollback(tsd_t *tsd, size_t diff) {
-	thread_event_assert_invariants(tsd);
-
+te_alloc_rollback(tsd_t *tsd, size_t diff) {
+	te_assert_invariants(tsd);
 	if (diff == 0U) {
 		return;
 	}
 
 	/* Rollback happens only on alloc events. */
-	event_ctx_t ctx;
-	event_ctx_get(tsd, &ctx, true);
+	te_ctx_t ctx;
+	te_ctx_get(tsd, &ctx, true);
 
-	uint64_t thread_allocated = event_ctx_current_bytes_get(&ctx);
+	uint64_t thread_allocated = te_ctx_current_bytes_get(&ctx);
 	/* The subtraction is intentionally susceptible to underflow. */
 	uint64_t thread_allocated_rollback = thread_allocated - diff;
-	event_ctx_current_bytes_set(&ctx, thread_allocated_rollback);
+	te_ctx_current_bytes_set(&ctx, thread_allocated_rollback);
 
-	uint64_t last_event = event_ctx_last_event_get(&ctx);
+	uint64_t last_event = te_ctx_last_event_get(&ctx);
 	/* Both subtractions are intentionally susceptible to underflow. */
 	if (thread_allocated_rollback - last_event <=
 	    thread_allocated - last_event) {
-		thread_event_assert_invariants(tsd);
+		te_assert_invariants(tsd);
 		return;
 	}
 
-	event_ctx_last_event_set(&ctx, thread_allocated_rollback);
-
+	te_ctx_last_event_set(&ctx, thread_allocated_rollback);
 	/* The subtraction is intentionally susceptible to underflow. */
 	uint64_t wait_diff = last_event - thread_allocated_rollback;
 	assert(wait_diff <= diff);
@@ -374,49 +362,48 @@ thread_alloc_event_rollback(tsd_t *tsd, size_t diff) {
 #define E(event, condition, alloc_event)				\
 	if (alloc_event == true && condition) {				\
 		uint64_t event_wait = event##_event_wait_get(tsd);	\
-		assert(event_wait <= THREAD_EVENT_MAX_START_WAIT);	\
+		assert(event_wait <= TE_MAX_START_WAIT);		\
 		if (event_wait > 0U) {					\
-			if (wait_diff >					\
-			    THREAD_EVENT_MAX_START_WAIT - event_wait) {	\
-				event_wait =				\
-				    THREAD_EVENT_MAX_START_WAIT;	\
+			if (wait_diff >	TE_MAX_START_WAIT - event_wait) {\
+				event_wait = TE_MAX_START_WAIT;		\
 			} else {					\
 				event_wait += wait_diff;		\
 			}						\
-			assert(event_wait <=				\
-			    THREAD_EVENT_MAX_START_WAIT);		\
+			assert(event_wait <= TE_MAX_START_WAIT);	\
 			event##_event_wait_set(tsd, event_wait);	\
 		}							\
 	}
 
 	ITERATE_OVER_ALL_EVENTS
 #undef E
-
-	thread_event_update(tsd, true);
+	te_event_update(tsd, true);
 }
 
 void
-thread_event_update(tsd_t *tsd, bool is_alloc) {
-	event_ctx_t ctx;
-	event_ctx_get(tsd, &ctx, is_alloc);
+te_event_update(tsd_t *tsd, bool is_alloc) {
+	te_ctx_t ctx;
+	te_ctx_get(tsd, &ctx, is_alloc);
 
-	uint64_t wait = thread_next_event_compute(tsd, is_alloc);
-	thread_event_adjust_thresholds_helper(tsd, &ctx, wait);
+	uint64_t wait = te_next_event_compute(tsd, is_alloc);
+	te_adjust_thresholds_helper(tsd, &ctx, wait);
 
-	uint64_t last_event = event_ctx_last_event_get(&ctx);
+	uint64_t last_event = te_ctx_last_event_get(&ctx);
 	/* Both subtractions are intentionally susceptible to underflow. */
-	if (event_ctx_current_bytes_get(&ctx) - last_event >=
-	    event_ctx_next_event_get(&ctx) - last_event) {
-		thread_event_trigger(tsd, &ctx, true);
+	if (te_ctx_current_bytes_get(&ctx) - last_event >=
+	    te_ctx_next_event_get(&ctx) - last_event) {
+		te_event_trigger(tsd, &ctx, true);
 	} else {
-		thread_event_assert_invariants(tsd);
+		te_assert_invariants(tsd);
 	}
 }
 
-void tsd_thread_event_init(tsd_t *tsd) {
+void tsd_te_init(tsd_t *tsd) {
+	/* Make sure no overflow for the bytes accumulated on event_trigger. */
+	assert(TE_MAX_INTERVAL <= UINT64_MAX - SC_LARGE_MAXCLASS + 1);
+
 #define E(event, condition, is_alloc_event_unused)			\
 	if (condition) {						\
-		tsd_thread_##event##_event_init(tsd);			\
+		te_tsd_##event##_event_init(tsd);			\
 	}
 
 	ITERATE_OVER_ALL_EVENTS
diff --git a/src/tsd.c b/src/tsd.c
index 54e5b4af..38196c80 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -117,9 +117,9 @@ tsd_force_recompute(tsdn_t *tsdn) {
 		    <= tsd_state_nominal_max);
 		tsd_atomic_store(&remote_tsd->state,
 		    tsd_state_nominal_recompute, ATOMIC_RELAXED);
-		/* See comments in thread_event_recompute_fast_threshold(). */
+		/* See comments in te_recompute_fast_threshold(). */
 		atomic_fence(ATOMIC_SEQ_CST);
-		thread_next_event_fast_set_non_nominal(remote_tsd);
+		te_next_event_fast_set_non_nominal(remote_tsd);
 	}
 	malloc_mutex_unlock(tsdn, &tsd_nominal_tsds_lock);
 }
@@ -179,7 +179,7 @@ tsd_slow_update(tsd_t *tsd) {
 		    ATOMIC_ACQUIRE);
 	} while (old_state == tsd_state_nominal_recompute);
 
-	thread_event_recompute_fast_threshold(tsd);
+	te_recompute_fast_threshold(tsd);
 }
 
 void
@@ -218,7 +218,7 @@ tsd_state_set(tsd_t *tsd, uint8_t new_state) {
 			tsd_slow_update(tsd);
 		}
 	}
-	thread_event_recompute_fast_threshold(tsd);
+	te_recompute_fast_threshold(tsd);
 }
 
 static bool
@@ -240,7 +240,7 @@ tsd_data_init(tsd_t *tsd) {
 	    (uint64_t)(uintptr_t)tsd;
 
 	/* event_init may use the prng state above. */
-	tsd_thread_event_init(tsd);
+	tsd_te_init(tsd);
 
 	return tsd_tcache_enabled_data_init(tsd);
 }
diff --git a/test/unit/thread_event.c b/test/unit/thread_event.c
index db2d637e..0855829c 100644
--- a/test/unit/thread_event.c
+++ b/test/unit/thread_event.c
@@ -2,17 +2,15 @@
 
 TEST_BEGIN(test_next_event_fast_roll_back) {
 	tsd_t *tsd = tsd_fetch();
-	event_ctx_t ctx;
-	event_ctx_get(tsd, &ctx, true);
+	te_ctx_t ctx;
+	te_ctx_get(tsd, &ctx, true);
 
-	event_ctx_last_event_set(&ctx, 0);
-	event_ctx_current_bytes_set(&ctx,
-	    THREAD_NEXT_EVENT_FAST_MAX - 8U);
-	event_ctx_next_event_set(tsd, &ctx,
-	    THREAD_NEXT_EVENT_FAST_MAX);
+	te_ctx_last_event_set(&ctx, 0);
+	te_ctx_current_bytes_set(&ctx, TE_NEXT_EVENT_FAST_MAX - 8U);
+	te_ctx_next_event_set(tsd, &ctx, TE_NEXT_EVENT_FAST_MAX);
 #define E(event, condition, is_alloc)					\
 	if (is_alloc && condition) {					\
-		event##_event_wait_set(tsd, THREAD_NEXT_EVENT_FAST_MAX);\
+		event##_event_wait_set(tsd, TE_NEXT_EVENT_FAST_MAX);	\
 	}
 	ITERATE_OVER_ALL_EVENTS
 #undef E
@@ -25,18 +23,16 @@ TEST_END
 TEST_BEGIN(test_next_event_fast_resume) {
 	tsd_t *tsd = tsd_fetch();
 
-	event_ctx_t ctx;
-	event_ctx_get(tsd, &ctx, true);
+	te_ctx_t ctx;
+	te_ctx_get(tsd, &ctx, true);
 
-	event_ctx_last_event_set(&ctx, 0);
-	event_ctx_current_bytes_set(&ctx,
-	    THREAD_NEXT_EVENT_FAST_MAX + 8U);
-	event_ctx_next_event_set(tsd, &ctx,
-	    THREAD_NEXT_EVENT_FAST_MAX + 16U);
+	te_ctx_last_event_set(&ctx, 0);
+	te_ctx_current_bytes_set(&ctx, TE_NEXT_EVENT_FAST_MAX + 8U);
+	te_ctx_next_event_set(tsd, &ctx, TE_NEXT_EVENT_FAST_MAX + 16U);
 #define E(event, condition, is_alloc)					\
 	if (is_alloc && condition) {					\
 		event##_event_wait_set(tsd,				\
-		    THREAD_NEXT_EVENT_FAST_MAX + 16U);			\
+		    TE_NEXT_EVENT_FAST_MAX + 16U);			\
 	}
 	ITERATE_OVER_ALL_EVENTS
 #undef E
@@ -48,11 +44,11 @@ TEST_END
 
 TEST_BEGIN(test_event_rollback) {
 	tsd_t *tsd = tsd_fetch();
-	const uint64_t diff = THREAD_EVENT_MAX_INTERVAL >> 2;
+	const uint64_t diff = TE_MAX_INTERVAL >> 2;
 	size_t count = 10;
 	uint64_t thread_allocated = thread_allocated_get(tsd);
 	while (count-- != 0) {
-		thread_alloc_event_rollback(tsd, diff);
+		te_alloc_rollback(tsd, diff);
 		uint64_t thread_allocated_after = thread_allocated_get(tsd);
 		assert_u64_eq(thread_allocated - thread_allocated_after, diff,
 		    "thread event counters are not properly rolled back");

From c6bfe55857230949ea2d6467c1dc3fce213fe9c3 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 3 Feb 2020 23:59:31 -0800
Subject: [PATCH 1522/2608] Update the tsd description.

---
 include/jemalloc/internal/tsd.h | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 163ffc4b..d88f3d12 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -17,21 +17,27 @@
  * Thread-Specific-Data layout
  * --- data accessed on tcache fast path: state, rtree_ctx, stats ---
  * s: state
- * e: tcache_enabled
  * m: thread_allocated
  * k: thread_allocated_next_event_fast
  * f: thread_deallocated
+ * h: thread_deallocated_next_event_fast
  * c: rtree_ctx (rtree cache accessed on deallocation)
  * t: tcache
  * --- data not accessed on tcache fast path: arena-related fields ---
+ * e: tcache_enabled
  * d: arenas_tdata_bypass
  * r: reentrancy_level
- * x: narenas_tdata
+ * n: narenas_tdata
  * l: thread_allocated_last_event
  * j: thread_allocated_next_event
+ * q: thread_deallocated_last_event
+ * u: thread_deallocated_next_event
  * g: tcache_gc_event_wait
+ * y: tcache_gc_dalloc_event_wait
  * w: prof_sample_event_wait (config_prof)
  * x: prof_sample_last_event (config_prof)
+ * z: stats_interval_event_wait
+ * e: stats_interval_last_event
  * p: prof_tdata (config_prof)
  * v: prng_state
  * i: iarena
@@ -43,15 +49,15 @@
  * Use a compact layout to reduce cache footprint.
  * +--- 64-bit and 64B cacheline; 1B each letter; First byte on the left. ---+
  * |----------------------------  1st cacheline  ----------------------------|
- * | sedrxxxx mmmmmmmm kkkkkkkk ffffffff [c * 32  ........ ........ .......] |
+ * | sedrnnnn mmmmmmmm kkkkkkkk ffffffff hhhhhhhh [c * 24  ........ ........]|
  * |----------------------------  2nd cacheline  ----------------------------|
- * | [c * 64  ........ ........ ........ ........ ........ ........ .......] |
+ * | [c * 64  ........ ........ ........ ........ ........ ........ ........]|
  * |----------------------------  3nd cacheline  ----------------------------|
- * | [c * 32  ........ ........ .......] llllllll jjjjjjjj gggggggg wwwwwwww |
+ * | [c * 40  ........ ........ ........ .......] llllllll jjjjjjjj qqqqqqqq |
  * +----------------------------  4th cacheline  ----------------------------+
- * | xxxxxxxx pppppppp vvvvvvvv iiiiiiii aaaaaaaa oooooooo [b...... ........ |
- * +----------------------------  5th cacheline  ----------------------------+
- * | ........ ........ ..b][t.. ........ ........ ........ ........ ........ |
+ * | uuuuuuuu gggggggg yyyyyyyy wwwwwwww xxxxxxxx zzzzzzzz eeeeeeee pppppppp |
+ * +----------------------------  5th and after  ----------------------------+
+ * | vvvvvvvv iiiiiiii aaaaaaaa oooooooo [b * 40; then embedded tcache ..... |
  * +-------------------------------------------------------------------------+
  * Note: the entire tcache is embedded into TSD and spans multiple cachelines.
  *

From bdc08b51581d422189e32ee87724e668f0fa5ef2 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 23 Jan 2020 15:00:01 -0800
Subject: [PATCH 1523/2608] Better naming buffered writer

---
 include/jemalloc/internal/buf_writer.h | 20 ++++++++---------
 src/buf_writer.c                       | 30 +++++++++++++-------------
 src/jemalloc.c                         |  8 +++----
 src/prof_log.c                         |  8 +++----
 src/prof_recent.c                      |  8 +++----
 test/unit/buf_writer.c                 | 15 +++++++------
 6 files changed, 45 insertions(+), 44 deletions(-)

diff --git a/include/jemalloc/internal/buf_writer.h b/include/jemalloc/internal/buf_writer.h
index 60bd0108..b2644a86 100644
--- a/include/jemalloc/internal/buf_writer.h
+++ b/include/jemalloc/internal/buf_writer.h
@@ -16,21 +16,21 @@ typedef struct {
 	char *buf;
 	size_t buf_size;
 	size_t buf_end;
-} buf_write_arg_t;
+} buf_writer_t;
 
 JEMALLOC_ALWAYS_INLINE void
-buf_write_init(buf_write_arg_t *arg, void (*write_cb)(void *, const char *),
-    void *cbopaque, char *buf, size_t buf_len) {
-	arg->write_cb = write_cb;
-	arg->cbopaque = cbopaque;
+buf_writer_init(buf_writer_t *buf_writer, void (*write_cb)(void *,
+    const char *), void *cbopaque, char *buf, size_t buf_len) {
+	buf_writer->write_cb = write_cb;
+	buf_writer->cbopaque = cbopaque;
 	assert(buf != NULL);
-	arg->buf = buf;
+	buf_writer->buf = buf;
 	assert(buf_len >= 2);
-	arg->buf_size = buf_len - 1; /* Accommodating '\0' at the end. */
-	arg->buf_end = 0;
+	buf_writer->buf_size = buf_len - 1; /* Allowing for '\0' at the end. */
+	buf_writer->buf_end = 0;
 }
 
-void buf_write_flush(buf_write_arg_t *arg);
-void buf_write_cb(void *buf_write_arg, const char *s);
+void buf_writer_flush(buf_writer_t *buf_writer);
+void buf_writer_cb(void *buf_writer_arg, const char *s);
 
 #endif /* JEMALLOC_INTERNAL_BUF_WRITER_H */
diff --git a/src/buf_writer.c b/src/buf_writer.c
index 41065946..aed7d4a8 100644
--- a/src/buf_writer.c
+++ b/src/buf_writer.c
@@ -6,31 +6,31 @@
 #include "jemalloc/internal/malloc_io.h"
 
 void
-buf_write_flush(buf_write_arg_t *arg) {
-	assert(arg->buf_end <= arg->buf_size);
-	arg->buf[arg->buf_end] = '\0';
-	if (arg->write_cb == NULL) {
-		arg->write_cb = je_malloc_message != NULL ?
+buf_writer_flush(buf_writer_t *buf_writer) {
+	assert(buf_writer->buf_end <= buf_writer->buf_size);
+	buf_writer->buf[buf_writer->buf_end] = '\0';
+	if (buf_writer->write_cb == NULL) {
+		buf_writer->write_cb = je_malloc_message != NULL ?
 		    je_malloc_message : wrtmessage;
 	}
-	arg->write_cb(arg->cbopaque, arg->buf);
-	arg->buf_end = 0;
+	buf_writer->write_cb(buf_writer->cbopaque, buf_writer->buf);
+	buf_writer->buf_end = 0;
 }
 
 void
-buf_write_cb(void *buf_write_arg, const char *s) {
-	buf_write_arg_t *arg = (buf_write_arg_t *)buf_write_arg;
+buf_writer_cb(void *buf_writer_arg, const char *s) {
+	buf_writer_t *buf_writer = (buf_writer_t *)buf_writer_arg;
 	size_t i, slen, n, s_remain, buf_remain;
-	assert(arg->buf_end <= arg->buf_size);
+	assert(buf_writer->buf_end <= buf_writer->buf_size);
 	for (i = 0, slen = strlen(s); i < slen; i += n) {
-		if (arg->buf_end == arg->buf_size) {
-			buf_write_flush(arg);
+		if (buf_writer->buf_end == buf_writer->buf_size) {
+			buf_writer_flush(buf_writer);
 		}
 		s_remain = slen - i;
-		buf_remain = arg->buf_size - arg->buf_end;
+		buf_remain = buf_writer->buf_size - buf_writer->buf_end;
 		n = s_remain < buf_remain ? s_remain : buf_remain;
-		memcpy(arg->buf + arg->buf_end, s + i, n);
-		arg->buf_end += n;
+		memcpy(buf_writer->buf + buf_writer->buf_end, s + i, n);
+		buf_writer->buf_end += n;
 	}
 	assert(i == slen);
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 190b3a2a..35c490be 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3746,11 +3746,11 @@ je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		if (buf == NULL) {
 			stats_print(write_cb, cbopaque, opts);
 		} else {
-			buf_write_arg_t buf_arg;
-			buf_write_init(&buf_arg, write_cb, cbopaque, buf,
+			buf_writer_t buf_writer;
+			buf_writer_init(&buf_writer, write_cb, cbopaque, buf,
 			    STATS_PRINT_BUFSIZE);
-			stats_print(buf_write_cb, &buf_arg, opts);
-			buf_write_flush(&buf_arg);
+			stats_print(buf_writer_cb, &buf_writer, opts);
+			buf_writer_flush(&buf_writer);
 			idalloctm(tsdn, buf, NULL, NULL, true, true);
 		}
 	}
diff --git a/src/prof_log.c b/src/prof_log.c
index a04c8e40..95cf246d 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -632,15 +632,15 @@ prof_log_stop(tsdn_t *tsdn) {
 	char *buf = (char *)iallocztm(tsdn, PROF_LOG_STOP_BUFSIZE,
 	    sz_size2index(PROF_LOG_STOP_BUFSIZE), false, NULL, true,
 	    arena_get(TSDN_NULL, 0, true), true);
-	buf_write_arg_t buf_arg;
+	buf_writer_t buf_writer;
 	if (buf == NULL) {
 		emitter_init(&emitter, emitter_output_json_compact,
 		    prof_emitter_write_cb, &arg);
 	} else {
-		buf_write_init(&buf_arg, prof_emitter_write_cb, &arg, buf,
+		buf_writer_init(&buf_writer, prof_emitter_write_cb, &arg, buf,
 		    PROF_LOG_STOP_BUFSIZE);
 		emitter_init(&emitter, emitter_output_json_compact,
-		    buf_write_cb, &buf_arg);
+		    buf_writer_cb, &buf_writer);
 	}
 
 	emitter_begin(&emitter);
@@ -651,7 +651,7 @@ prof_log_stop(tsdn_t *tsdn) {
 	emitter_end(&emitter);
 
 	if (buf != NULL) {
-		buf_write_flush(&buf_arg);
+		buf_writer_flush(&buf_writer);
 		idalloctm(tsdn, buf, NULL, NULL, true, true);
 	}
 
diff --git a/src/prof_recent.c b/src/prof_recent.c
index 66a9b406..dde029ce 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -466,15 +466,15 @@ prof_recent_alloc_dump(tsd_t *tsd, void (*write_cb)(void *, const char *),
 	    sz_size2index(PROF_RECENT_PRINT_BUFSIZE), false, NULL, true,
 	    arena_get(tsd_tsdn(tsd), 0, false), true);
 	emitter_t emitter;
-	buf_write_arg_t buf_arg;
+	buf_writer_t buf_writer;
 	if (buf == NULL) {
 		emitter_init(&emitter, emitter_output_json_compact, write_cb,
 		    cbopaque);
 	} else {
-		buf_write_init(&buf_arg, write_cb, cbopaque, buf,
+		buf_writer_init(&buf_writer, write_cb, cbopaque, buf,
 		    PROF_RECENT_PRINT_BUFSIZE);
 		emitter_init(&emitter, emitter_output_json_compact,
-		    buf_write_cb, &buf_arg);
+		    buf_writer_cb, &buf_writer);
 	}
 	emitter_begin(&emitter);
 
@@ -536,7 +536,7 @@ prof_recent_alloc_dump(tsd_t *tsd, void (*write_cb)(void *, const char *),
 
 	emitter_end(&emitter);
 	if (buf != NULL) {
-		buf_write_flush(&buf_arg);
+		buf_writer_flush(&buf_writer);
 		idalloctm(tsd_tsdn(tsd), buf, NULL, NULL, true, true);
 	}
 }
diff --git a/test/unit/buf_writer.c b/test/unit/buf_writer.c
index bbdb657e..63fd0c67 100644
--- a/test/unit/buf_writer.c
+++ b/test/unit/buf_writer.c
@@ -22,8 +22,9 @@ TEST_BEGIN(test_buf_write) {
 	size_t n_unit, remain, i;
 	ssize_t unit;
 	uint64_t arg = 4; /* Starting value of random argument. */
-	buf_write_arg_t test_buf_arg = {test_write_cb, &arg, test_buf,
-	    TEST_BUF_SIZE - 1, 0};
+	buf_writer_t buf_writer;
+	buf_writer_init(&buf_writer, test_write_cb, &arg, test_buf,
+	    TEST_BUF_SIZE);
 
 	memset(s, 'a', UNIT_MAX);
 	arg_store = arg;
@@ -35,23 +36,23 @@ TEST_BEGIN(test_buf_write) {
 			remain = 0;
 			for (i = 1; i <= n_unit; ++i) {
 				arg = prng_lg_range_u64(&arg, 64);
-				buf_write_cb(&test_buf_arg, s);
+				buf_writer_cb(&buf_writer, s);
 				remain += unit;
-				if (remain > test_buf_arg.buf_size) {
+				if (remain > buf_writer.buf_size) {
 					/* Flushes should have happened. */
 					assert_u64_eq(arg_store, arg, "Call "
 					    "back argument didn't get through");
-					remain %= test_buf_arg.buf_size;
+					remain %= buf_writer.buf_size;
 					if (remain == 0) {
 						/* Last flush should be lazy. */
-						remain += test_buf_arg.buf_size;
+						remain += buf_writer.buf_size;
 					}
 				}
 				assert_zu_eq(test_write_len + remain, i * unit,
 				    "Incorrect length after writing %zu strings"
 				    " of length %zu", i, unit);
 			}
-			buf_write_flush(&test_buf_arg);
+			buf_writer_flush(&buf_writer);
 			assert_zu_eq(test_write_len, n_unit * unit,
 			    "Incorrect length after flushing at the end of"
 			    " writing %zu strings of length %zu", n_unit, unit);

From 9cac3fa8f588c828a0a94bdc911383d2952b40e0 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 3 Feb 2020 15:56:13 -0800
Subject: [PATCH 1524/2608] Encapsulate buffer allocation in buffered writer

---
 include/jemalloc/internal/buf_writer.h |  27 +++---
 src/buf_writer.c                       | 118 +++++++++++++++++++++++--
 src/jemalloc.c                         |  19 ++--
 src/prof_log.c                         |  22 ++---
 src/prof_recent.c                      |  24 ++---
 test/unit/buf_writer.c                 |  85 +++++++++++++++---
 6 files changed, 215 insertions(+), 80 deletions(-)

diff --git a/include/jemalloc/internal/buf_writer.h b/include/jemalloc/internal/buf_writer.h
index b2644a86..c1e2a827 100644
--- a/include/jemalloc/internal/buf_writer.h
+++ b/include/jemalloc/internal/buf_writer.h
@@ -10,27 +10,24 @@
  * some "option like" content for the write_cb, so it doesn't matter.
  */
 
+typedef void (write_cb_t)(void *, const char *);
+
 typedef struct {
-	void (*write_cb)(void *, const char *);
-	void *cbopaque;
+	write_cb_t *public_write_cb;
+	void *public_cbopaque;
+	write_cb_t *private_write_cb;
+	void *private_cbopaque;
 	char *buf;
 	size_t buf_size;
 	size_t buf_end;
+	bool internal_buf;
 } buf_writer_t;
 
-JEMALLOC_ALWAYS_INLINE void
-buf_writer_init(buf_writer_t *buf_writer, void (*write_cb)(void *,
-    const char *), void *cbopaque, char *buf, size_t buf_len) {
-	buf_writer->write_cb = write_cb;
-	buf_writer->cbopaque = cbopaque;
-	assert(buf != NULL);
-	buf_writer->buf = buf;
-	assert(buf_len >= 2);
-	buf_writer->buf_size = buf_len - 1; /* Allowing for '\0' at the end. */
-	buf_writer->buf_end = 0;
-}
-
+bool buf_writer_init(tsdn_t *tsdn, buf_writer_t *buf_writer,
+    write_cb_t *write_cb, void *cbopaque, char *buf, size_t buf_len);
+write_cb_t *buf_writer_get_write_cb(buf_writer_t *buf_writer);
+void *buf_writer_get_cbopaque(buf_writer_t *buf_writer);
 void buf_writer_flush(buf_writer_t *buf_writer);
-void buf_writer_cb(void *buf_writer_arg, const char *s);
+void buf_writer_terminate(tsdn_t *tsdn, buf_writer_t *buf_writer);
 
 #endif /* JEMALLOC_INTERNAL_BUF_WRITER_H */
diff --git a/src/buf_writer.c b/src/buf_writer.c
index aed7d4a8..bb8763b9 100644
--- a/src/buf_writer.c
+++ b/src/buf_writer.c
@@ -5,23 +5,114 @@
 #include "jemalloc/internal/buf_writer.h"
 #include "jemalloc/internal/malloc_io.h"
 
-void
-buf_writer_flush(buf_writer_t *buf_writer) {
-	assert(buf_writer->buf_end <= buf_writer->buf_size);
-	buf_writer->buf[buf_writer->buf_end] = '\0';
-	if (buf_writer->write_cb == NULL) {
-		buf_writer->write_cb = je_malloc_message != NULL ?
-		    je_malloc_message : wrtmessage;
+static void *
+buf_writer_allocate_internal_buf(tsdn_t *tsdn, size_t buf_len) {
+#ifdef JEMALLOC_JET
+	if (buf_len > SC_LARGE_MAXCLASS) {
+		return NULL;
+	}
+#else
+	assert(buf_len <= SC_LARGE_MAXCLASS);
+#endif
+	return iallocztm(tsdn, buf_len, sz_size2index(buf_len), false, NULL,
+	    true, arena_get(tsdn, 0, false), true);
+}
+
+static void
+buf_writer_free_internal_buf(tsdn_t *tsdn, void *buf) {
+	if (buf != NULL) {
+		idalloctm(tsdn, buf, NULL, NULL, true, true);
+	}
+}
+
+static write_cb_t buf_writer_cb;
+
+static void
+buf_writer_assert(buf_writer_t *buf_writer) {
+	if (buf_writer->buf != NULL) {
+		assert(buf_writer->public_write_cb == buf_writer_cb);
+		assert(buf_writer->public_cbopaque == buf_writer);
+		assert(buf_writer->private_write_cb != buf_writer_cb);
+		assert(buf_writer->private_cbopaque != buf_writer);
+		assert(buf_writer->buf_size > 0);
+	} else {
+		assert(buf_writer->public_write_cb != buf_writer_cb);
+		assert(buf_writer->public_cbopaque != buf_writer);
+		assert(buf_writer->private_write_cb == NULL);
+		assert(buf_writer->private_cbopaque == NULL);
+		assert(buf_writer->buf_size == 0);
+	}
+}
+
+bool
+buf_writer_init(tsdn_t *tsdn, buf_writer_t *buf_writer, write_cb_t *write_cb,
+    void *cbopaque, char *buf, size_t buf_len) {
+	assert(buf_len >= 2);
+	if (buf != NULL) {
+		buf_writer->buf = buf;
+		buf_writer->internal_buf = false;
+	} else {
+		buf_writer->buf = buf_writer_allocate_internal_buf(tsdn,
+		    buf_len);
+		buf_writer->internal_buf = true;
 	}
-	buf_writer->write_cb(buf_writer->cbopaque, buf_writer->buf);
 	buf_writer->buf_end = 0;
+	if (buf_writer->buf != NULL) {
+		buf_writer->public_write_cb = buf_writer_cb;
+		buf_writer->public_cbopaque = buf_writer;
+		buf_writer->private_write_cb = write_cb;
+		buf_writer->private_cbopaque = cbopaque;
+		buf_writer->buf_size = buf_len - 1; /* Allowing for '\0'. */
+		buf_writer_assert(buf_writer);
+		return false;
+	} else {
+		buf_writer->public_write_cb = write_cb;
+		buf_writer->public_cbopaque = cbopaque;
+		buf_writer->private_write_cb = NULL;
+		buf_writer->private_cbopaque = NULL;
+		buf_writer->buf_size = 0;
+		buf_writer_assert(buf_writer);
+		return true;
+	}
+}
+
+write_cb_t *
+buf_writer_get_write_cb(buf_writer_t *buf_writer) {
+	buf_writer_assert(buf_writer);
+	return buf_writer->public_write_cb;
+}
+
+void *
+buf_writer_get_cbopaque(buf_writer_t *buf_writer) {
+	buf_writer_assert(buf_writer);
+	return buf_writer->public_cbopaque;
 }
 
 void
+buf_writer_flush(buf_writer_t *buf_writer) {
+	buf_writer_assert(buf_writer);
+	if (buf_writer->buf == NULL) {
+		return;
+	}
+	assert(buf_writer->buf_end <= buf_writer->buf_size);
+	buf_writer->buf[buf_writer->buf_end] = '\0';
+	if (buf_writer->private_write_cb == NULL) {
+		buf_writer->private_write_cb = je_malloc_message != NULL ?
+		    je_malloc_message : wrtmessage;
+	}
+	assert(buf_writer->private_write_cb != NULL);
+	buf_writer->private_write_cb(buf_writer->private_cbopaque,
+	    buf_writer->buf);
+	buf_writer->buf_end = 0;
+}
+
+static void
 buf_writer_cb(void *buf_writer_arg, const char *s) {
 	buf_writer_t *buf_writer = (buf_writer_t *)buf_writer_arg;
-	size_t i, slen, n, s_remain, buf_remain;
+	buf_writer_assert(buf_writer);
+	assert(buf_writer->buf != NULL);
 	assert(buf_writer->buf_end <= buf_writer->buf_size);
+	size_t i, slen, n, s_remain, buf_remain;
 	for (i = 0, slen = strlen(s); i < slen; i += n) {
 		if (buf_writer->buf_end == buf_writer->buf_size) {
 			buf_writer_flush(buf_writer);
@@ -34,3 +125,12 @@ buf_writer_cb(void *buf_writer_arg, const char *s) {
 	}
 	assert(i == slen);
 }
+
+void
+buf_writer_terminate(tsdn_t *tsdn, buf_writer_t *buf_writer) {
+	buf_writer_assert(buf_writer);
+	buf_writer_flush(buf_writer);
+	if (buf_writer->internal_buf) {
+		buf_writer_free_internal_buf(tsdn, buf_writer->buf);
+	}
+}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 35c490be..ddb29e38 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3740,19 +3740,12 @@ je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 	if (config_debug) {
 		stats_print(write_cb, cbopaque, opts);
 	} else {
-		char *buf = (char *)iallocztm(tsdn, STATS_PRINT_BUFSIZE,
-		    sz_size2index(STATS_PRINT_BUFSIZE), false, NULL, true,
-		    arena_get(TSDN_NULL, 0, true), true);
-		if (buf == NULL) {
-			stats_print(write_cb, cbopaque, opts);
-		} else {
-			buf_writer_t buf_writer;
-			buf_writer_init(&buf_writer, write_cb, cbopaque, buf,
-			    STATS_PRINT_BUFSIZE);
-			stats_print(buf_writer_cb, &buf_writer, opts);
-			buf_writer_flush(&buf_writer);
-			idalloctm(tsdn, buf, NULL, NULL, true, true);
-		}
+		buf_writer_t buf_writer;
+		buf_writer_init(tsdn, &buf_writer, write_cb, cbopaque, NULL,
+		    STATS_PRINT_BUFSIZE);
+		stats_print(buf_writer_get_write_cb(&buf_writer),
+		    buf_writer_get_cbopaque(&buf_writer), opts);
+		buf_writer_terminate(tsdn, &buf_writer);
 	}
 
 	check_entry_exit_locking(tsdn);
diff --git a/src/prof_log.c b/src/prof_log.c
index 95cf246d..c29fa350 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -629,19 +629,12 @@ prof_log_stop(tsdn_t *tsdn) {
 	struct prof_emitter_cb_arg_s arg;
 	arg.fd = fd;
 
-	char *buf = (char *)iallocztm(tsdn, PROF_LOG_STOP_BUFSIZE,
-	    sz_size2index(PROF_LOG_STOP_BUFSIZE), false, NULL, true,
-	    arena_get(TSDN_NULL, 0, true), true);
 	buf_writer_t buf_writer;
-	if (buf == NULL) {
-		emitter_init(&emitter, emitter_output_json_compact,
-		    prof_emitter_write_cb, &arg);
-	} else {
-		buf_writer_init(&buf_writer, prof_emitter_write_cb, &arg, buf,
-		    PROF_LOG_STOP_BUFSIZE);
-		emitter_init(&emitter, emitter_output_json_compact,
-		    buf_writer_cb, &buf_writer);
-	}
+	buf_writer_init(tsdn, &buf_writer, prof_emitter_write_cb, &arg, NULL,
+	    PROF_LOG_STOP_BUFSIZE);
+	emitter_init(&emitter, emitter_output_json_compact,
+	    buf_writer_get_write_cb(&buf_writer),
+	    buf_writer_get_cbopaque(&buf_writer));
 
 	emitter_begin(&emitter);
 	prof_log_emit_metadata(&emitter);
@@ -650,10 +643,7 @@ prof_log_stop(tsdn_t *tsdn) {
 	prof_log_emit_allocs(tsd, &emitter);
 	emitter_end(&emitter);
 
-	if (buf != NULL) {
-		buf_writer_flush(&buf_writer);
-		idalloctm(tsdn, buf, NULL, NULL, true, true);
-	}
+	buf_writer_terminate(tsdn, &buf_writer);
 
 	/* Reset global state. */
 	if (log_tables_initialized) {
diff --git a/src/prof_recent.c b/src/prof_recent.c
index dde029ce..7a98cc58 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -462,20 +462,13 @@ dump_bt(emitter_t *emitter, prof_tctx_t *tctx) {
 void
 prof_recent_alloc_dump(tsd_t *tsd, void (*write_cb)(void *, const char *),
     void *cbopaque) {
-	char *buf = (char *)iallocztm(tsd_tsdn(tsd), PROF_RECENT_PRINT_BUFSIZE,
-	    sz_size2index(PROF_RECENT_PRINT_BUFSIZE), false, NULL, true,
-	    arena_get(tsd_tsdn(tsd), 0, false), true);
-	emitter_t emitter;
 	buf_writer_t buf_writer;
-	if (buf == NULL) {
-		emitter_init(&emitter, emitter_output_json_compact, write_cb,
-		    cbopaque);
-	} else {
-		buf_writer_init(&buf_writer, write_cb, cbopaque, buf,
-		    PROF_RECENT_PRINT_BUFSIZE);
-		emitter_init(&emitter, emitter_output_json_compact,
-		    buf_writer_cb, &buf_writer);
-	}
+	buf_writer_init(tsd_tsdn(tsd), &buf_writer, write_cb, cbopaque, NULL,
+	    PROF_RECENT_PRINT_BUFSIZE);
+	emitter_t emitter;
+	emitter_init(&emitter, emitter_output_json_compact,
+	    buf_writer_get_write_cb(&buf_writer),
+	    buf_writer_get_cbopaque(&buf_writer));
 	emitter_begin(&emitter);
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
@@ -535,10 +528,7 @@ prof_recent_alloc_dump(tsd_t *tsd, void (*write_cb)(void *, const char *),
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 
 	emitter_end(&emitter);
-	if (buf != NULL) {
-		buf_writer_flush(&buf_writer);
-		idalloctm(tsd_tsdn(tsd), buf, NULL, NULL, true, true);
-	}
+	buf_writer_terminate(tsd_tsdn(tsd), &buf_writer);
 }
 #undef PROF_RECENT_PRINT_BUFSIZE
 
diff --git a/test/unit/buf_writer.c b/test/unit/buf_writer.c
index 63fd0c67..5171d618 100644
--- a/test/unit/buf_writer.c
+++ b/test/unit/buf_writer.c
@@ -7,6 +7,7 @@
 
 static size_t test_write_len;
 static char test_buf[TEST_BUF_SIZE];
+static uint64_t arg;
 static uint64_t arg_store;
 
 static void test_write_cb(void *cbopaque, const char *s) {
@@ -17,16 +18,16 @@ static void test_write_cb(void *cbopaque, const char *s) {
 	    "Test write overflowed");
 }
 
-TEST_BEGIN(test_buf_write) {
+static void test_buf_writer_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 	char s[UNIT_MAX + 1];
 	size_t n_unit, remain, i;
 	ssize_t unit;
-	uint64_t arg = 4; /* Starting value of random argument. */
-	buf_writer_t buf_writer;
-	buf_writer_init(&buf_writer, test_write_cb, &arg, test_buf,
-	    TEST_BUF_SIZE);
+	assert_ptr_not_null(buf_writer->buf, "Buffer is null");
+	write_cb_t *write_cb = buf_writer_get_write_cb(buf_writer);
+	void *cbopaque = buf_writer_get_cbopaque(buf_writer);
 
 	memset(s, 'a', UNIT_MAX);
+	arg = 4; /* Starting value of random argument. */
 	arg_store = arg;
 	for (unit = UNIT_MAX; unit >= 0; --unit) {
 		/* unit keeps decreasing, so strlen(s) is always unit. */
@@ -36,32 +37,96 @@ TEST_BEGIN(test_buf_write) {
 			remain = 0;
 			for (i = 1; i <= n_unit; ++i) {
 				arg = prng_lg_range_u64(&arg, 64);
-				buf_writer_cb(&buf_writer, s);
+				write_cb(cbopaque, s);
 				remain += unit;
-				if (remain > buf_writer.buf_size) {
+				if (remain > buf_writer->buf_size) {
 					/* Flushes should have happened. */
 					assert_u64_eq(arg_store, arg, "Call "
 					    "back argument didn't get through");
-					remain %= buf_writer.buf_size;
+					remain %= buf_writer->buf_size;
 					if (remain == 0) {
 						/* Last flush should be lazy. */
-						remain += buf_writer.buf_size;
+						remain += buf_writer->buf_size;
 					}
 				}
 				assert_zu_eq(test_write_len + remain, i * unit,
 				    "Incorrect length after writing %zu strings"
 				    " of length %zu", i, unit);
 			}
+			buf_writer_flush(buf_writer);
+			assert_zu_eq(test_write_len, n_unit * unit,
+			    "Incorrect length after flushing at the end of"
+			    " writing %zu strings of length %zu", n_unit, unit);
+		}
+	}
+	buf_writer_terminate(tsdn, buf_writer);
+}
+
+TEST_BEGIN(test_buf_write_static) {
+	buf_writer_t buf_writer;
+	tsdn_t *tsdn = tsdn_fetch();
+	assert_false(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
+	    test_buf, TEST_BUF_SIZE),
+	    "buf_writer_init() should not encounter error on static buffer");
+	test_buf_writer_body(tsdn, &buf_writer);
+}
+TEST_END
+
+TEST_BEGIN(test_buf_write_dynamic) {
+	buf_writer_t buf_writer;
+	tsdn_t *tsdn = tsdn_fetch();
+	assert_false(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
+	    NULL, TEST_BUF_SIZE), "buf_writer_init() should not OOM");
+	test_buf_writer_body(tsdn, &buf_writer);
+}
+TEST_END
+
+TEST_BEGIN(test_buf_write_oom) {
+	buf_writer_t buf_writer;
+	tsdn_t *tsdn = tsdn_fetch();
+	assert_true(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
+	    NULL, SC_LARGE_MAXCLASS + 1), "buf_writer_init() should OOM");
+	assert_ptr_null(buf_writer.buf, "Buffer should be null");
+	write_cb_t *write_cb = buf_writer_get_write_cb(&buf_writer);
+	assert_ptr_eq(write_cb, test_write_cb, "Should use test_write_cb");
+	void *cbopaque = buf_writer_get_cbopaque(&buf_writer);
+	assert_ptr_eq(cbopaque, &arg, "Should use arg");
+
+	char s[UNIT_MAX + 1];
+	size_t n_unit, i;
+	ssize_t unit;
+
+	memset(s, 'a', UNIT_MAX);
+	arg = 4; /* Starting value of random argument. */
+	arg_store = arg;
+	for (unit = UNIT_MAX; unit >= 0; unit -= UNIT_MAX / 4) {
+		/* unit keeps decreasing, so strlen(s) is always unit. */
+		s[unit] = '\0';
+		for (n_unit = 1; n_unit <= 3; ++n_unit) {
+			test_write_len = 0;
+			for (i = 1; i <= n_unit; ++i) {
+				arg = prng_lg_range_u64(&arg, 64);
+				write_cb(cbopaque, s);
+				assert_u64_eq(arg_store, arg,
+				    "Call back argument didn't get through");
+				assert_zu_eq(test_write_len, i * unit,
+				    "Incorrect length after writing %zu strings"
+				    " of length %zu", i, unit);
+			}
 			buf_writer_flush(&buf_writer);
 			assert_zu_eq(test_write_len, n_unit * unit,
 			    "Incorrect length after flushing at the end of"
 			    " writing %zu strings of length %zu", n_unit, unit);
 		}
 	}
+	buf_writer_terminate(tsdn, &buf_writer);
 }
 TEST_END
 
 int
 main(void) {
-	return test(test_buf_write);
+	return test(
+	    test_buf_write_static,
+	    test_buf_write_dynamic,
+	    test_buf_write_oom);
 }

From 2476889195e897912cc4b6a26bfeab1eee4c06df Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 4 Feb 2020 15:00:37 -0800
Subject: [PATCH 1525/2608] Add inspect.c to MSVC filters

---
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters | 3 +++
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 404adbe5..9b0445f6 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -52,6 +52,9 @@
     <ClCompile Include="..\..\..\..\src\hook.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\inspect.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\jemalloc.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 404adbe5..9b0445f6 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -52,6 +52,9 @@
     <ClCompile Include="..\..\..\..\src\hook.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\inspect.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\jemalloc.c">
       <Filter>Source Files</Filter>
     </ClCompile>

From 7014f81e172290466e1a28118b622519bbbed2b0 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 4 Feb 2020 16:36:02 -0800
Subject: [PATCH 1526/2608] Add ASSURED_WRITE in mallctl

---
 src/ctl.c | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index 78f5df25..302cb9dc 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1496,6 +1496,14 @@ ctl_mtx_assert_held(tsdn_t *tsdn) {
 	}								\
 } while (0)
 
+#define ASSURED_WRITE(v, t)	do {					\
+	if (newp == NULL || newlen != sizeof(t)) {			\
+		ret = EINVAL;						\
+		goto label_return;					\
+	}								\
+	(v) = *(t *)newp;						\
+} while (0)
+
 #define MIB_UNSIGNED(v, i) do {						\
 	if (mib[i] > UINT_MAX) {					\
 		ret = EFAULT;						\
@@ -2048,12 +2056,7 @@ tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	unsigned tcache_ind;
 
 	WRITEONLY();
-	tcache_ind = UINT_MAX;
-	WRITE(tcache_ind, unsigned);
-	if (tcache_ind == UINT_MAX) {
-		ret = EFAULT;
-		goto label_return;
-	}
+	ASSURED_WRITE(tcache_ind, unsigned);
 	tcaches_flush(tsd, tcache_ind);
 
 	ret = 0;
@@ -2068,12 +2071,7 @@ tcache_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	unsigned tcache_ind;
 
 	WRITEONLY();
-	tcache_ind = UINT_MAX;
-	WRITE(tcache_ind, unsigned);
-	if (tcache_ind == UINT_MAX) {
-		ret = EFAULT;
-		goto label_return;
-	}
+	ASSURED_WRITE(tcache_ind, unsigned);
 	tcaches_destroy(tsd, tcache_ind);
 
 	ret = 0;

From ca1f08225134981eb74083e5143be4a9d544ff1a Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 30 Dec 2019 17:14:44 -0800
Subject: [PATCH 1527/2608] Disallow merge across mmap regions to preserve SN /
 first-fit.

Check the is_head state before merging two extents.  Disallow the merge if it's
crossing two separate mmap regions.  This enforces first-fit (by not losing the
SN) at a very small cost.
---
 src/ehooks.c | 32 +++++++++++++++++++-------------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/src/ehooks.c b/src/ehooks.c
index 1e1cac9f..5ea73e3e 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -201,17 +201,6 @@ ehooks_same_sn(tsdn_t *tsdn, void *addr_a, void *addr_b) {
 static bool
 ehooks_no_merge_heads(tsdn_t *tsdn, void *addr_a, bool head_a, void *addr_b,
     bool head_b) {
-	/*
-	 * When coalesce is not always allowed (Windows), only merge extents
-	 * from the same VirtualAlloc region under opt.retain (in which case
-	 * MEM_DECOMMIT is utilized for purging).
-	 */
-	if (maps_coalesce) {
-		return false;
-	}
-	if (!opt_retain) {
-		return true;
-	}
 	/* If b is a head extent, disallow the cross-region merge. */
 	if (head_b) {
 		/*
@@ -230,10 +219,27 @@ ehooks_no_merge_heads(tsdn_t *tsdn, void *addr_a, bool head_a, void *addr_b,
 bool
 ehooks_default_merge_impl(tsdn_t *tsdn, void *addr_a, bool head_a, void *addr_b,
     bool head_b) {
-	if (ehooks_no_merge_heads(tsdn, addr_a, head_a, addr_b, head_b)) {
+	assert(addr_a < addr_b);
+	/*
+	 * For non-DSS cases (first 2 branches) --
+	 * a) W/o maps_coalesce, merge is not always allowed (Windows):
+	 *   1) w/o retain, never merge (first branch below).
+	 *   2) with retain, only merge extents from the same VirtualAlloc
+	 *      region (in which case MEM_DECOMMIT is utilized for purging).
+	 *
+	 * b) With maps_coalesce, it's always possible to merge.
+	 *   1) w/o retain, always allow merge (only about dirty / muzzy).
+	 *   2) with retain, to preserve the SN / first-fit, merge is still
+	 *      disallowed if b is a head extent, i.e. no merging across
+	 *      different mmap regions.
+	 *
+	 * a2) and b2) share the implementation (the no_merge_heads branch).
+	 */
+	if (!maps_coalesce && !opt_retain) {
 		return true;
 	}
-	if (!maps_coalesce && !opt_retain) {
+	if (opt_retain && ehooks_no_merge_heads(tsdn, addr_a, head_a, addr_b,
+	    head_b)) {
 		return true;
 	}
 	if (have_dss && !extent_dss_mergeable(addr_a, addr_b)) {

From 7fd22f7b2ea5ce2540563ece8e2d30a5316ac857 Mon Sep 17 00:00:00 2001
From: Kamil Rytarowski <n54@gmx.com>
Date: Thu, 13 Feb 2020 14:49:32 +0100
Subject: [PATCH 1528/2608] Fix Undefined Behavior in hash.h

hash.h:200:27, left shift of 250 by 24 places cannot be represented in type 'int'
---
 include/jemalloc/internal/hash.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h
index 9132b60c..7f945679 100644
--- a/include/jemalloc/internal/hash.h
+++ b/include/jemalloc/internal/hash.h
@@ -182,19 +182,19 @@ hash_x86_128(const void *key, const int len, uint32_t seed,
 		case 13: k4 ^= tail[12] << 0;
 			k4 *= c4; k4 = hash_rotl_32(k4, 18); k4 *= c1; h4 ^= k4;
 			JEMALLOC_FALLTHROUGH;
-		case 12: k3 ^= tail[11] << 24; JEMALLOC_FALLTHROUGH;
+		case 12: k3 ^= (uint32_t) tail[11] << 24; JEMALLOC_FALLTHROUGH;
 		case 11: k3 ^= tail[10] << 16; JEMALLOC_FALLTHROUGH;
 		case 10: k3 ^= tail[ 9] << 8; JEMALLOC_FALLTHROUGH;
 		case  9: k3 ^= tail[ 8] << 0;
 			k3 *= c3; k3 = hash_rotl_32(k3, 17); k3 *= c4; h3 ^= k3;
 			JEMALLOC_FALLTHROUGH;
-		case  8: k2 ^= tail[ 7] << 24; JEMALLOC_FALLTHROUGH;
+		case  8: k2 ^= (uint32_t) tail[ 7] << 24; JEMALLOC_FALLTHROUGH;
 		case  7: k2 ^= tail[ 6] << 16; JEMALLOC_FALLTHROUGH;
 		case  6: k2 ^= tail[ 5] << 8; JEMALLOC_FALLTHROUGH;
 		case  5: k2 ^= tail[ 4] << 0;
 			k2 *= c2; k2 = hash_rotl_32(k2, 16); k2 *= c3; h2 ^= k2;
 			JEMALLOC_FALLTHROUGH;
-		case  4: k1 ^= tail[ 3] << 24; JEMALLOC_FALLTHROUGH;
+		case  4: k1 ^= (uint32_t) tail[ 3] << 24; JEMALLOC_FALLTHROUGH;
 		case  3: k1 ^= tail[ 2] << 16; JEMALLOC_FALLTHROUGH;
 		case  2: k1 ^= tail[ 1] << 8; JEMALLOC_FALLTHROUGH;
 		case  1: k1 ^= tail[ 0] << 0;

From ba0e35411cc39d57abb830c80eebde054b06241c Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 7 Feb 2020 14:53:36 -0800
Subject: [PATCH 1529/2608] Rework the bin locking around tcache refill /
 flush.

Previously, tcache fill/flush (as well as small alloc/dalloc on the arena) may
potentially drop the bin lock for slab_alloc and slab_dalloc.  This commit
refactors the logic so that the slab calls happen in the same function / level
as the bin lock / unlock.  The main purpose is to be able to use flat combining
without having to keep track of stack state.

In the meantime, this change reduces the locking, especially for slab_dalloc
calls, where nothing happens after the call.
---
 include/jemalloc/internal/arena_externs.h |   3 +-
 src/arena.c                               | 411 ++++++++++++----------
 src/tcache.c                              |  39 +-
 3 files changed, 259 insertions(+), 194 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 608dda72..1b92766d 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -63,8 +63,9 @@ void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
 void arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize);
 void arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
     bool slow_path);
-void arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
+bool arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
     szind_t binind, edata_t *edata, void *ptr);
+void arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab);
 void arena_dalloc_small(tsdn_t *tsdn, void *ptr);
 bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t extra, bool zero, size_t *newsize);
diff --git a/src/arena.c b/src/arena.c
index 9558bb40..22348949 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -60,8 +60,6 @@ static void arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena,
     size_t npages_decay_max, bool is_background_thread);
 static bool arena_decay_dirty(tsdn_t *tsdn, arena_t *arena,
     bool is_background_thread, bool all);
-static void arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, edata_t *slab,
-    bin_t *bin);
 static void arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, edata_t *slab,
     bin_t *bin);
 
@@ -996,7 +994,7 @@ arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) {
 	arena_decay_muzzy(tsdn, arena, is_background_thread, all);
 }
 
-static void
+void
 arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab) {
 	arena_nactive_sub(arena, edata_size_get(slab) >> LG_PAGE);
 
@@ -1252,101 +1250,55 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	return slab;
 }
 
-static edata_t *
-arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    szind_t binind, unsigned binshard) {
-	edata_t *slab;
-	const bin_info_t *bin_info;
+/*
+ * Before attempting the _with_fresh_slab approaches below, the _no_fresh_slab
+ * variants (i.e. through slabcur and nonfull) must be tried first.
+ */
+static void
+arena_bin_refill_slabcur_with_fresh_slab(tsdn_t *tsdn, arena_t *arena,
+    bin_t *bin, szind_t binind, edata_t *fresh_slab) {
+	malloc_mutex_assert_owner(tsdn, &bin->lock);
+	/* Only called after slabcur and nonfull both failed. */
+	assert(bin->slabcur == NULL);
+	assert(edata_heap_first(&bin->slabs_nonfull) == NULL);
+	assert(fresh_slab != NULL);
 
-	/* Look for a usable slab. */
-	slab = arena_bin_slabs_nonfull_tryget(bin);
-	if (slab != NULL) {
-		return slab;
+	/* A new slab from arena_slab_alloc() */
+	assert(edata_nfree_get(fresh_slab) == bin_infos[binind].nregs);
+	if (config_stats) {
+		bin->stats.nslabs++;
+		bin->stats.curslabs++;
 	}
-	/* No existing slabs have any space available. */
-
-	bin_info = &bin_infos[binind];
-
-	/* Allocate a new slab. */
-	malloc_mutex_unlock(tsdn, &bin->lock);
-	/******************************/
-	slab = arena_slab_alloc(tsdn, arena, binind, binshard, bin_info);
-	/********************************/
-	malloc_mutex_lock(tsdn, &bin->lock);
-	if (slab != NULL) {
-		if (config_stats) {
-			bin->stats.nslabs++;
-			bin->stats.curslabs++;
-		}
-		return slab;
-	}
-
-	/*
-	 * arena_slab_alloc() failed, but another thread may have made
-	 * sufficient memory available while this one dropped bin->lock above,
-	 * so search one more time.
-	 */
-	slab = arena_bin_slabs_nonfull_tryget(bin);
-	if (slab != NULL) {
-		return slab;
-	}
-
-	return NULL;
+	bin->slabcur = fresh_slab;
 }
 
-/* Re-fill bin->slabcur, then call arena_slab_reg_alloc(). */
+/* Refill slabcur and then alloc using the fresh slab */
 static void *
-arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    szind_t binind, unsigned binshard) {
+arena_bin_malloc_with_fresh_slab(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
+    szind_t binind, edata_t *fresh_slab) {
+	malloc_mutex_assert_owner(tsdn, &bin->lock);
+	arena_bin_refill_slabcur_with_fresh_slab(tsdn, arena, bin, binind,
+	    fresh_slab);
+
+	return arena_slab_reg_alloc(bin->slabcur, &bin_infos[binind]);
+}
+
+static bool
+arena_bin_refill_slabcur_no_fresh_slab(tsdn_t *tsdn, arena_t *arena,
+    bin_t *bin) {
+	malloc_mutex_assert_owner(tsdn, &bin->lock);
+	/* Only called after arena_slab_reg_alloc[_batch] failed. */
+	assert(bin->slabcur == NULL || edata_nfree_get(bin->slabcur) == 0);
 
 	if (bin->slabcur != NULL) {
-		/* Only attempted when current slab is full. */
-		assert(edata_nfree_get(bin->slabcur) == 0);
-	}
-
-	const bin_info_t *bin_info = &bin_infos[binind];
-	edata_t *slab = arena_bin_nonfull_slab_get(tsdn, arena, bin, binind,
-	    binshard);
-	if (bin->slabcur != NULL) {
-		if (edata_nfree_get(bin->slabcur) > 0) {
-			/*
-			 * Another thread updated slabcur while this one ran
-			 * without the bin lock in arena_bin_nonfull_slab_get().
-			 */
-			void *ret = arena_slab_reg_alloc(bin->slabcur,
-			    bin_info);
-			if (slab != NULL) {
-				/*
-				 * arena_slab_alloc() may have allocated slab,
-				 * or it may have been pulled from
-				 * slabs_nonfull.  Therefore it is unsafe to
-				 * make any assumptions about how slab has
-				 * previously been used, and
-				 * arena_bin_lower_slab() must be called, as if
-				 * a region were just deallocated from the slab.
-				 */
-				if (edata_nfree_get(slab) == bin_info->nregs) {
-					arena_dalloc_bin_slab(tsdn, arena, slab,
-					    bin);
-				} else {
-					arena_bin_lower_slab(tsdn, arena, slab,
-					    bin);
-				}
-			}
-			return ret;
-		}
-
 		arena_bin_slabs_full_insert(arena, bin, bin->slabcur);
-		bin->slabcur = NULL;
 	}
 
-	if (slab == NULL) {
-		return NULL;
-	}
-	bin->slabcur = slab;
-	assert(edata_nfree_get(bin->slabcur) > 0);
+	/* Look for a usable slab. */
+	bin->slabcur = arena_bin_slabs_nonfull_tryget(bin);
+	assert(bin->slabcur == NULL || edata_nfree_get(bin->slabcur) > 0);
 
-	return arena_slab_reg_alloc(slab, bin_info);
+	return (bin->slabcur == NULL);
 }
 
 /* Choose a bin shard and return the locked bin. */
@@ -1369,63 +1321,139 @@ arena_bin_choose_lock(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 void
 arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     cache_bin_t *tbin, szind_t binind) {
-	unsigned i, nfill, cnt;
-
 	assert(cache_bin_ncached_get(tbin, binind) == 0);
 	tcache->bin_refilled[binind] = true;
 
-	unsigned binshard;
-	bin_t *bin = arena_bin_choose_lock(tsdn, arena, binind, &binshard);
-
+	const bin_info_t *bin_info = &bin_infos[binind];
+	const unsigned nfill = cache_bin_ncached_max_get(binind) >>
+	    tcache->lg_fill_div[binind];
 	void **empty_position = cache_bin_empty_position_get(tbin, binind);
-	for (i = 0, nfill = (cache_bin_ncached_max_get(binind) >>
-	    tcache->lg_fill_div[binind]); i < nfill; i += cnt) {
-		edata_t *slab;
-		if ((slab = bin->slabcur) != NULL && edata_nfree_get(slab) >
-		    0) {
-			unsigned tofill = nfill - i;
-			cnt = tofill < edata_nfree_get(slab) ?
-				tofill : edata_nfree_get(slab);
-			arena_slab_reg_alloc_batch(
-			   slab, &bin_infos[binind], cnt,
-			   empty_position - nfill + i);
-		} else {
-			cnt = 1;
-			void *ptr = arena_bin_malloc_hard(tsdn, arena, bin,
-			    binind, binshard);
-			/*
-			 * OOM.  tbin->avail isn't yet filled down to its first
-			 * element, so the successful allocations (if any) must
-			 * be moved just before tbin->avail before bailing out.
-			 */
-			if (ptr == NULL) {
-				if (i > 0) {
-					memmove(empty_position - i,
-						empty_position - nfill,
-						i * sizeof(void *));
-				}
-				break;
-			}
-			/* Insert such that low regions get used first. */
-			*(empty_position - nfill + i) = ptr;
+
+	/*
+	 * Bin-local resources are used first: 1) bin->slabcur, and 2) nonfull
+	 * slabs.  After both are exhausted, new slabs will be allocated through
+	 * arena_slab_alloc().
+	 *
+	 * Bin lock is only taken / released right before / after the while(...)
+	 * refill loop, with new slab allocation (which has its own locking)
+	 * kept outside of the loop.  This setup facilitates flat combining, at
+	 * the cost of the nested loop (through goto label_refill).
+	 *
+	 * To optimize for cases with contention and limited resources
+	 * (e.g. hugepage-backed or non-overcommit arenas), each fill-iteration
+	 * gets one chance of slab_alloc, and a retry of bin local resources
+	 * after the slab allocation (regardless if slab_alloc failed, because
+	 * the bin lock is dropped during the slab allocation).
+	 *
+	 * In other words, new slab allocation is allowed, as long as there was
+	 * progress since the previous slab_alloc.  This is tracked with
+	 * made_progress below, initialized to true to jump start the first
+	 * iteration.
+	 *
+	 * In other words (again), the loop will only terminate early (i.e. stop
+	 * with filled < nfill) after going through the three steps: a) bin
+	 * local exhausted, b) unlock and slab_alloc returns null, c) re-lock
+	 * and bin local fails again.
+	 */
+	bool made_progress = true;
+	edata_t *fresh_slab = NULL;
+	bool alloc_and_retry = false;
+	unsigned filled = 0;
+
+	bin_t *bin;
+	unsigned binshard;
+label_refill:
+	bin = arena_bin_choose_lock(tsdn, arena, binind, &binshard);
+	while (filled < nfill) {
+		/* Try batch-fill from slabcur first. */
+		edata_t *slabcur = bin->slabcur;
+		if (slabcur != NULL && edata_nfree_get(slabcur) > 0) {
+			unsigned tofill = nfill - filled;
+			unsigned nfree = edata_nfree_get(slabcur);
+			unsigned cnt = tofill < nfree ? tofill : nfree;
+
+			arena_slab_reg_alloc_batch(slabcur, bin_info, cnt,
+			    empty_position - tofill);
+			made_progress = true;
+			filled += cnt;
+			continue;
 		}
-		if (config_fill && unlikely(opt_junk_alloc)) {
-			for (unsigned j = 0; j < cnt; j++) {
-				void* ptr = *(empty_position - nfill + i + j);
-				arena_alloc_junk_small(ptr, &bin_infos[binind],
-							true);
-			}
+		/* Next try refilling slabcur from nonfull slabs. */
+		if (!arena_bin_refill_slabcur_no_fresh_slab(tsdn, arena, bin)) {
+			assert(bin->slabcur != NULL);
+			continue;
 		}
-	}
-	if (config_stats) {
-		bin->stats.nmalloc += i;
+
+		/* Then see if a new slab was reserved already. */
+		if (fresh_slab != NULL) {
+			arena_bin_refill_slabcur_with_fresh_slab(tsdn, arena,
+			    bin, binind, fresh_slab);
+			assert(bin->slabcur != NULL);
+			fresh_slab = NULL;
+			continue;
+		}
+
+		/* Try slab_alloc if made progress (or never did slab_alloc). */
+		if (made_progress) {
+			assert(bin->slabcur == NULL);
+			assert(fresh_slab == NULL);
+			alloc_and_retry = true;
+			/* Alloc a new slab then come back. */
+			break;
+		}
+
+		assert(fresh_slab == NULL);
+		/*
+		 * OOM.  tbin->avail isn't yet filled down to its first element,
+		 * so the successful allocations (if any) must be moved just
+		 * before tbin->avail before bailing out.
+		 */
+		if (filled > 0) {
+			memmove(empty_position - filled, empty_position - nfill,
+			    filled * sizeof(void *));
+		}
+		assert(!alloc_and_retry);
+		break;
+	} /* while (filled < nfill) loop. */
+
+	if (config_stats && !alloc_and_retry) {
+		bin->stats.nmalloc += filled;
 		bin->stats.nrequests += tbin->tstats.nrequests;
-		bin->stats.curregs += i;
+		bin->stats.curregs += filled;
 		bin->stats.nfills++;
 		tbin->tstats.nrequests = 0;
 	}
 	malloc_mutex_unlock(tsdn, &bin->lock);
-	cache_bin_ncached_set(tbin, binind, i);
+
+	if (alloc_and_retry) {
+		assert(fresh_slab == NULL);
+		assert(filled < nfill);
+		assert(made_progress);
+
+		fresh_slab = arena_slab_alloc(tsdn, arena, binind, binshard,
+		    bin_info);
+		/* fresh_slab NULL case handled in the for loop. */
+
+		alloc_and_retry = false;
+		made_progress = false;
+		goto label_refill;
+	}
+	assert(filled == nfill || (fresh_slab == NULL && !made_progress));
+
+	/* Release if allocated but not used. */
+	if (fresh_slab != NULL) {
+		assert(edata_nfree_get(fresh_slab) == bin_info->nregs);
+		arena_slab_dalloc(tsdn, arena, fresh_slab);
+		fresh_slab = NULL;
+	}
+
+	if (config_fill && unlikely(opt_junk_alloc)) {
+		for (unsigned i = 0; i < filled; i++) {
+			void *ptr = *(empty_position - nfill + filled + i);
+			arena_alloc_junk_small(ptr, bin_info, true);
+		}
+	}
+	cache_bin_ncached_set(tbin, binind, filled);
 	arena_decay_tick(tsdn, arena);
 }
 
@@ -1443,55 +1471,80 @@ arena_dalloc_junk_small_impl(void *ptr, const bin_info_t *bin_info) {
 arena_dalloc_junk_small_t *JET_MUTABLE arena_dalloc_junk_small =
     arena_dalloc_junk_small_impl;
 
+/*
+ * Without allocating a new slab, try arena_slab_reg_alloc() and re-fill
+ * bin->slabcur if necessary.
+ */
+static void *
+arena_bin_malloc_no_fresh_slab(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
+    szind_t binind) {
+	malloc_mutex_assert_owner(tsdn, &bin->lock);
+	if (bin->slabcur == NULL || edata_nfree_get(bin->slabcur) == 0) {
+		if (arena_bin_refill_slabcur_no_fresh_slab(tsdn, arena, bin)) {
+			return NULL;
+		}
+	}
+
+	assert(bin->slabcur != NULL && edata_nfree_get(bin->slabcur) > 0);
+	return arena_slab_reg_alloc(bin->slabcur, &bin_infos[binind]);
+}
+
 static void *
 arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
-	void *ret;
-	bin_t *bin;
-	size_t usize;
-	edata_t *slab;
-
 	assert(binind < SC_NBINS);
-	usize = sz_index2size(binind);
+	const bin_info_t *bin_info = &bin_infos[binind];
+	size_t usize = sz_index2size(binind);
 	unsigned binshard;
-	bin = arena_bin_choose_lock(tsdn, arena, binind, &binshard);
-
-	if ((slab = bin->slabcur) != NULL && edata_nfree_get(slab) > 0) {
-		ret = arena_slab_reg_alloc(slab, &bin_infos[binind]);
-	} else {
-		ret = arena_bin_malloc_hard(tsdn, arena, bin, binind, binshard);
-	}
+	bin_t *bin = arena_bin_choose_lock(tsdn, arena, binind, &binshard);
 
+	edata_t *fresh_slab = NULL;
+	void *ret = arena_bin_malloc_no_fresh_slab(tsdn, arena, bin, binind);
 	if (ret == NULL) {
 		malloc_mutex_unlock(tsdn, &bin->lock);
-		return NULL;
+		/******************************/
+		fresh_slab = arena_slab_alloc(tsdn, arena, binind, binshard,
+		    bin_info);
+		/********************************/
+		malloc_mutex_lock(tsdn, &bin->lock);
+		/* Retry since the lock was dropped. */
+		ret = arena_bin_malloc_no_fresh_slab(tsdn, arena, bin, binind);
+		if (ret == NULL) {
+			if (fresh_slab == NULL) {
+				/* OOM */
+				malloc_mutex_unlock(tsdn, &bin->lock);
+				return NULL;
+			}
+			ret = arena_bin_malloc_with_fresh_slab(tsdn, arena, bin,
+			    binind, fresh_slab);
+			fresh_slab = NULL;
+		}
 	}
-
 	if (config_stats) {
 		bin->stats.nmalloc++;
 		bin->stats.nrequests++;
 		bin->stats.curregs++;
 	}
-
 	malloc_mutex_unlock(tsdn, &bin->lock);
 
+	if (fresh_slab != NULL) {
+		arena_slab_dalloc(tsdn, arena, fresh_slab);
+	}
 	if (!zero) {
 		if (config_fill) {
 			if (unlikely(opt_junk_alloc)) {
-				arena_alloc_junk_small(ret,
-				    &bin_infos[binind], false);
+				arena_alloc_junk_small(ret, bin_info, false);
 			} else if (unlikely(opt_zero)) {
 				memset(ret, 0, usize);
 			}
 		}
 	} else {
 		if (config_fill && unlikely(opt_junk_alloc)) {
-			arena_alloc_junk_small(ret, &bin_infos[binind],
-			    true);
+			arena_alloc_junk_small(ret, bin_info, true);
 		}
 		memset(ret, 0, usize);
 	}
-
 	arena_decay_tick(tsdn, arena);
+
 	return ret;
 }
 
@@ -1624,21 +1677,6 @@ arena_dissociate_bin_slab(arena_t *arena, edata_t *slab, bin_t *bin) {
 	}
 }
 
-static void
-arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, edata_t *slab,
-    bin_t *bin) {
-	assert(slab != bin->slabcur);
-
-	malloc_mutex_unlock(tsdn, &bin->lock);
-	/******************************/
-	arena_slab_dalloc(tsdn, arena, slab);
-	/****************************/
-	malloc_mutex_lock(tsdn, &bin->lock);
-	if (config_stats) {
-		bin->stats.curslabs--;
-	}
-}
-
 static void
 arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, edata_t *slab,
     bin_t *bin) {
@@ -1667,20 +1705,31 @@ arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, edata_t *slab,
 }
 
 static void
+arena_dalloc_bin_slab_prepare(tsdn_t *tsdn, edata_t *slab, bin_t *bin) {
+	malloc_mutex_assert_owner(tsdn, &bin->lock);
+
+	assert(slab != bin->slabcur);
+	if (config_stats) {
+		bin->stats.curslabs--;
+	}
+}
+
+/* Returns true if arena_slab_dalloc must be called on slab */
+static bool
 arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
     szind_t binind, edata_t *slab, void *ptr, bool junked) {
-	slab_data_t *slab_data = edata_slab_data_get(slab);
 	const bin_info_t *bin_info = &bin_infos[binind];
-
 	if (!junked && config_fill && unlikely(opt_junk_free)) {
 		arena_dalloc_junk_small(ptr, bin_info);
 	}
+	arena_slab_reg_dalloc(slab, edata_slab_data_get(slab), ptr);
 
-	arena_slab_reg_dalloc(slab, slab_data, ptr);
+	bool ret = false;
 	unsigned nfree = edata_nfree_get(slab);
 	if (nfree == bin_info->nregs) {
 		arena_dissociate_bin_slab(arena, slab, bin);
-		arena_dalloc_bin_slab(tsdn, arena, slab, bin);
+		arena_dalloc_bin_slab_prepare(tsdn, slab, bin);
+		ret = true;
 	} else if (nfree == 1 && slab != bin->slabcur) {
 		arena_bin_slabs_full_remove(arena, bin, slab);
 		arena_bin_lower_slab(tsdn, arena, slab, bin);
@@ -1690,13 +1739,15 @@ arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 		bin->stats.ndalloc++;
 		bin->stats.curregs--;
 	}
+
+	return ret;
 }
 
-void
+bool
 arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
     szind_t binind, edata_t *edata, void *ptr) {
-	arena_dalloc_bin_locked_impl(tsdn, arena, bin, binind, edata, ptr,
-	    true);
+	return arena_dalloc_bin_locked_impl(tsdn, arena, bin, binind, edata,
+	    ptr, true);
 }
 
 static void
@@ -1706,9 +1757,13 @@ arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, edata_t *edata, void *ptr) {
 	bin_t *bin = &arena->bins[binind].bin_shards[binshard];
 
 	malloc_mutex_lock(tsdn, &bin->lock);
-	arena_dalloc_bin_locked_impl(tsdn, arena, bin, binind, edata, ptr,
-	    false);
+	bool ret = arena_dalloc_bin_locked_impl(tsdn, arena, bin, binind, edata,
+	    ptr, false);
 	malloc_mutex_unlock(tsdn, &bin->lock);
+
+	if (ret) {
+		arena_slab_dalloc(tsdn, arena, edata);
+	}
 }
 
 void
diff --git a/src/tcache.c b/src/tcache.c
index 3d965126..27ac5c2b 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -142,8 +142,6 @@ tbin_edatas_lookup_size_check(tsdn_t *tsdn, cache_bin_t *tbin, szind_t binind,
 void
 tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
     szind_t binind, unsigned rem) {
-	bool merged_stats = false;
-
 	assert(binind < SC_NBINS);
 	cache_bin_sz_t ncached = cache_bin_ncached_get(tbin, binind);
 	assert((cache_bin_sz_t)rem <= ncached);
@@ -154,27 +152,30 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	VARIABLE_ARRAY(edata_t *, item_edata, nflush);
 
 	void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
+	tsdn_t *tsdn = tsd_tsdn(tsd);
 	/* Look up edata once per item. */
 	if (config_opt_safety_checks) {
-		tbin_edatas_lookup_size_check(tsd_tsdn(tsd), tbin, binind,
-		    nflush, item_edata);
+		tbin_edatas_lookup_size_check(tsdn, tbin, binind, nflush,
+		    item_edata);
 	} else {
 		for (unsigned i = 0 ; i < nflush; i++) {
-			item_edata[i] = iealloc(tsd_tsdn(tsd),
-			    *(bottom_item - i));
+			item_edata[i] = iealloc(tsdn, *(bottom_item - i));
 		}
 	}
+
+	bool merged_stats = false;
+	unsigned dalloc_count = 0;
+	VARIABLE_ARRAY(edata_t *, dalloc_slabs, nflush + 1);
 	while (nflush > 0) {
 		/* Lock the arena bin associated with the first object. */
 		edata_t *edata = item_edata[0];
 		unsigned bin_arena_ind = edata_arena_ind_get(edata);
-		arena_t *bin_arena = arena_get(tsd_tsdn(tsd), bin_arena_ind,
-		    false);
+		arena_t *bin_arena = arena_get(tsdn, bin_arena_ind, false);
 		unsigned binshard = edata_binshard_get(edata);
 		assert(binshard < bin_infos[binind].n_shards);
 		bin_t *bin = &bin_arena->bins[binind].bin_shards[binshard];
 
-		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
+		malloc_mutex_lock(tsdn, &bin->lock);
 		if (config_stats && bin_arena == arena && !merged_stats) {
 			merged_stats = true;
 			bin->stats.nflushes++;
@@ -189,8 +190,10 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 
 			if (edata_arena_ind_get(edata) == bin_arena_ind
 			    && edata_binshard_get(edata) == binshard) {
-				arena_dalloc_bin_junked_locked(tsd_tsdn(tsd),
-				    bin_arena, bin, binind, edata, ptr);
+				if (arena_dalloc_bin_junked_locked(tsdn,
+				    bin_arena, bin, binind, edata, ptr)) {
+					dalloc_slabs[dalloc_count++] = edata;
+				}
 			} else {
 				/*
 				 * This object was allocated via a different
@@ -203,22 +206,28 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 				ndeferred++;
 			}
 		}
-		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
-		arena_decay_ticks(tsd_tsdn(tsd), bin_arena, nflush - ndeferred);
+		malloc_mutex_unlock(tsdn, &bin->lock);
+		arena_decay_ticks(tsdn, bin_arena, nflush - ndeferred);
 		nflush = ndeferred;
 	}
+	/* Handle all deferred slab dalloc. */
+	for (unsigned i = 0; i < dalloc_count; i++) {
+		edata_t *slab = dalloc_slabs[i];
+		arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
+	}
+
 	if (config_stats && !merged_stats) {
 		/*
 		 * The flush loop didn't happen to flush to this thread's
 		 * arena, so the stats didn't get merged.  Manually do so now.
 		 */
 		unsigned binshard;
-		bin_t *bin = arena_bin_choose_lock(tsd_tsdn(tsd), arena, binind,
+		bin_t *bin = arena_bin_choose_lock(tsdn, arena, binind,
 		    &binshard);
 		bin->stats.nflushes++;
 		bin->stats.nrequests += tbin->tstats.nrequests;
 		tbin->tstats.nrequests = 0;
-		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
+		malloc_mutex_unlock(tsdn, &bin->lock);
 	}
 
 	memmove(tbin->cur_ptr.ptr + (ncached - rem), tbin->cur_ptr.ptr, rem *

From bc05ecebf66531ebed82ad630d096061087ea18d Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 5 Feb 2020 15:33:31 -0800
Subject: [PATCH 1530/2608] Add const qualifier in assert_cmp()

---
 test/include/test/test.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/include/test/test.h b/test/include/test/test.h
index 07f58a47..9081716b 100644
--- a/test/include/test/test.h
+++ b/test/include/test/test.h
@@ -1,8 +1,8 @@
 #define ASSERT_BUFSIZE	256
 
 #define assert_cmp(t, a, b, cmp, neg_cmp, pri, ...) do {		\
-	t a_ = (a);							\
-	t b_ = (b);							\
+	const t a_ = (a);						\
+	const t b_ = (b);						\
 	if (!(a_ cmp b_)) {						\
 		char prefix[ASSERT_BUFSIZE];				\
 		char message[ASSERT_BUFSIZE];				\

From 68e8ddcaffeee1f2a510e0fc00eb510001a4eff4 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 4 Feb 2020 16:05:11 -0800
Subject: [PATCH 1531/2608] Add mallctl for dumping last-N profiling records

---
 src/ctl.c               |  33 +++++++
 test/unit/prof_recent.c | 192 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 225 insertions(+)

diff --git a/src/ctl.c b/src/ctl.c
index 302cb9dc..29909dfb 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -237,6 +237,7 @@ CTL_PROTO(experimental_utilization_batch_query)
 CTL_PROTO(experimental_arenas_i_pactivep)
 INDEX_PROTO(experimental_arenas_i)
 CTL_PROTO(experimental_prof_recent_alloc_max)
+CTL_PROTO(experimental_prof_recent_alloc_dump)
 
 #define MUTEX_STATS_CTL_PROTO_GEN(n)					\
 CTL_PROTO(stats_##n##_num_ops)						\
@@ -631,6 +632,7 @@ static const ctl_indexed_node_t experimental_arenas_node[] = {
 
 static const ctl_named_node_t experimental_prof_recent_node[] = {
 	{NAME("alloc_max"),	CTL(experimental_prof_recent_alloc_max)},
+	{NAME("alloc_dump"),	CTL(experimental_prof_recent_alloc_dump)},
 };
 
 static const ctl_named_node_t experimental_node[] = {
@@ -3549,3 +3551,34 @@ experimental_prof_recent_alloc_max_ctl(tsd_t *tsd, const size_t *mib,
 label_return:
 	return ret;
 }
+
+typedef struct write_cb_packet_s write_cb_packet_t;
+struct write_cb_packet_s {
+	void (*write_cb)(void *, const char *);
+	void *cbopaque;
+};
+
+static int
+experimental_prof_recent_alloc_dump_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+
+	if (!(config_prof && opt_prof)) {
+		ret = ENOENT;
+		goto label_return;
+	}
+
+	assert(sizeof(write_cb_packet_t) == sizeof(void *) * 2);
+
+	WRITEONLY();
+	write_cb_packet_t write_cb_packet;
+	ASSURED_WRITE(write_cb_packet, write_cb_packet_t);
+
+	prof_recent_alloc_dump(tsd, write_cb_packet.write_cb,
+	    write_cb_packet.cbopaque);
+
+	ret = 0;
+
+label_return:
+	return ret;
+}
diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index c1324527..3c10618f 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -381,6 +381,197 @@ TEST_END
 
 #undef NTH_REQ_SIZE
 
+#define DUMP_OUT_SIZE 4096
+static char dump_out[DUMP_OUT_SIZE];
+static size_t dump_out_len = 0;
+
+static void test_dump_write_cb(void *not_used, const char *str) {
+	size_t len = strlen(str);
+	assert(dump_out_len + len < DUMP_OUT_SIZE);
+	memcpy(dump_out + dump_out_len, str, len + 1);
+	dump_out_len += len;
+}
+
+static void call_dump() {
+	static void *in[2] = {test_dump_write_cb, NULL};
+	dump_out_len = 0;
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_dump",
+	    NULL, NULL, in, sizeof(in)), 0, "Dump mallctl raised error");
+}
+
+typedef struct {
+	size_t size;
+	bool released;
+} confirm_record_t;
+
+#define DUMP_ERROR "Dump output is wrong"
+
+static void confirm_record(const char *template,
+    const confirm_record_t *records, const size_t n_records) {
+	static const char *types[2] = {"alloc", "dalloc"};
+	static char buf[64];
+
+	/*
+	 * The template string would be in the form of:
+	 * "{\"recent_alloc_max\":XYZ,\"recent_alloc\":[]}",
+	 * and dump_out would be in the form of:
+	 * "{\"recent_alloc_max\":XYZ,\"recent_alloc\":[...]}".
+	 * Using "- 2" serves to cut right before the ending "]}".
+	 */
+	assert_d_eq(memcmp(dump_out, template, strlen(template) - 2), 0,
+	    DUMP_ERROR);
+	assert_d_eq(memcmp(dump_out + strlen(dump_out) - 2,
+	    template + strlen(template) - 2, 2), 0, DUMP_ERROR);
+
+	const char *start = dump_out + strlen(template) - 2;
+	const char *end = dump_out + strlen(dump_out) - 2;
+	const confirm_record_t *record;
+	for (record = records; record < records + n_records; ++record) {
+
+#define ASSERT_CHAR(c) do {						\
+	assert_true(start < end, DUMP_ERROR);				\
+	assert_c_eq(*start++, c, DUMP_ERROR);				\
+} while (0)
+
+#define ASSERT_STR(s) do {						\
+	const size_t len = strlen(s);					\
+	assert_true(start + len <= end, DUMP_ERROR);			\
+	assert_d_eq(memcmp(start, s, len), 0, DUMP_ERROR);		\
+	start += len;							\
+} while (0)
+
+#define ASSERT_FORMATTED_STR(s, ...) do {				\
+	malloc_snprintf(buf, sizeof(buf), s, __VA_ARGS__);		\
+	ASSERT_STR(buf);						\
+} while (0)
+
+		if (record != records) {
+			ASSERT_CHAR(',');
+		}
+
+		ASSERT_CHAR('{');
+
+		ASSERT_STR("\"size\"");
+		ASSERT_CHAR(':');
+		ASSERT_FORMATTED_STR("%zu", record->size);
+		ASSERT_CHAR(',');
+
+		ASSERT_STR("\"usize\"");
+		ASSERT_CHAR(':');
+		ASSERT_FORMATTED_STR("%zu", sz_s2u(record->size));
+		ASSERT_CHAR(',');
+
+		ASSERT_STR("\"released\"");
+		ASSERT_CHAR(':');
+		ASSERT_STR(record->released ? "true" : "false");
+		ASSERT_CHAR(',');
+
+		const char **type = types;
+		while (true) {
+			ASSERT_FORMATTED_STR("\"%s_thread_uid\"", *type);
+			ASSERT_CHAR(':');
+			while (isdigit(*start)) {
+				++start;
+			}
+			ASSERT_CHAR(',');
+
+			ASSERT_FORMATTED_STR("\"%s_time\"", *type);
+			ASSERT_CHAR(':');
+			while (isdigit(*start)) {
+				++start;
+			}
+			ASSERT_CHAR(',');
+
+			ASSERT_FORMATTED_STR("\"%s_trace\"", *type);
+			ASSERT_CHAR(':');
+			ASSERT_CHAR('[');
+			while (isdigit(*start) || *start == 'x' ||
+			    (*start >= 'a' && *start <= 'f') ||
+			    *start == '\"' || *start == ',') {
+				++start;
+			}
+			ASSERT_CHAR(']');
+
+			if (strcmp(*type, "dalloc") == 0) {
+				break;
+			}
+
+			assert(strcmp(*type, "alloc") == 0);
+			if (!record->released) {
+				break;
+			}
+
+			ASSERT_CHAR(',');
+			++type;
+		}
+
+		ASSERT_CHAR('}');
+
+#undef ASSERT_FORMATTED_STR
+#undef ASSERT_STR
+#undef ASSERT_CHAR
+
+	}
+	assert_ptr_eq(record, records + n_records, DUMP_ERROR);
+	assert_ptr_eq(start, end, DUMP_ERROR);
+}
+
+TEST_BEGIN(test_prof_recent_alloc_dump) {
+	test_skip_if(!config_prof);
+
+	tsd_t *tsd = tsd_fetch();
+	confirm_prof_setup(tsd);
+
+	ssize_t future;
+	void *p, *q;
+	confirm_record_t records[2];
+
+	future = 0;
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	call_dump();
+	assert_str_eq(dump_out, "{\"recent_alloc_max\":0,\"recent_alloc\":[]}",
+	    DUMP_ERROR);
+
+	future = 2;
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	call_dump();
+	const char *template = "{\"recent_alloc_max\":2,\"recent_alloc\":[]}";
+	assert_str_eq(dump_out, template, DUMP_ERROR);
+
+	p = malloc(7);
+	call_dump();
+	records[0].size = 7;
+	records[0].released = false;
+	confirm_record(template, records, 1);
+
+	q = malloc(17);
+	call_dump();
+	records[1].size = 17;
+	records[1].released = false;
+	confirm_record(template, records, 2);
+
+	free(q);
+	call_dump();
+	records[1].released = true;
+	confirm_record(template, records, 2);
+
+	free(p);
+	call_dump();
+	records[0].released = true;
+	confirm_record(template, records, 2);
+
+	future = OPT_ALLOC_MAX;
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	confirm_prof_setup(tsd);
+}
+TEST_END
+
+#undef DUMP_ERROR
+#undef DUMP_OUT_SIZE
+
 #define N_THREADS 16
 #define N_PTRS 512
 #define N_CTLS 8
@@ -500,5 +691,6 @@ main(void) {
 	    test_prof_recent_off,
 	    test_prof_recent_on,
 	    test_prof_recent_alloc,
+	    test_prof_recent_alloc_dump,
 	    test_prof_recent_stress);
 }

From 0f686e82a37e49af6caee2d469f2a2a88e1fbf7c Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 13 Feb 2020 20:04:22 -0800
Subject: [PATCH 1532/2608] Avoid variable length array with length 0.

---
 src/tcache.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index 27ac5c2b..e8a4cc5f 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -149,7 +149,8 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	arena_t *arena = tcache->arena;
 	assert(arena != NULL);
 	unsigned nflush = ncached - rem;
-	VARIABLE_ARRAY(edata_t *, item_edata, nflush);
+	/* Variable length array must have > 0 length. */
+	VARIABLE_ARRAY(edata_t *, item_edata, nflush + 1);
 
 	void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
 	tsdn_t *tsdn = tsd_tsdn(tsd);
@@ -250,7 +251,8 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
 	arena_t *tcache_arena = tcache->arena;
 	assert(tcache_arena != NULL);
 	unsigned nflush = ncached - rem;
-	VARIABLE_ARRAY(edata_t *, item_edata, nflush);
+	/* Variable length array must have > 0 length. */
+	VARIABLE_ARRAY(edata_t *, item_edata, nflush + 1);
 
 	void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
 #ifndef JEMALLOC_EXTRA_SIZE_CHECK

From 01f255161c97fac5a64517a0366d59eb8afdeae0 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 27 Jan 2020 13:55:46 -0800
Subject: [PATCH 1533/2608] Add emap, for tracking extent locking.

---
 Makefile.in                                   |   1 +
 include/jemalloc/internal/arena_inlines_b.h   |  26 +--
 include/jemalloc/internal/emap.h              |  33 +++
 include/jemalloc/internal/extent.h            |   2 -
 .../internal/jemalloc_internal_inlines_b.h    |   3 +-
 include/jemalloc/internal/witness.h           |   2 +-
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |   3 +
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |   3 +
 src/arena.c                                   |  15 +-
 src/emap.c                                    | 127 +++++++++++
 src/extent.c                                  | 211 ++++--------------
 src/jemalloc.c                                |  20 +-
 src/large.c                                   |   2 +-
 src/tcache.c                                  |   2 +-
 test/unit/arena_reset.c                       |   2 +-
 17 files changed, 257 insertions(+), 197 deletions(-)
 create mode 100644 include/jemalloc/internal/emap.h
 create mode 100644 src/emap.c

diff --git a/Makefile.in b/Makefile.in
index eda9c7a9..984bd724 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -110,6 +110,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/edata.c \
 	$(srcroot)src/edata_cache.c \
 	$(srcroot)src/ehooks.c \
+	$(srcroot)src/emap.c \
 	$(srcroot)src/eset.c \
 	$(srcroot)src/extent.c \
 	$(srcroot)src/extent_dss.c \
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 844e045d..b39578c9 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -188,7 +188,7 @@ arena_salloc(tsdn_t *tsdn, const void *ptr) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	szind_t szind = rtree_szind_read(tsdn, &extents_rtree, rtree_ctx,
+	szind_t szind = rtree_szind_read(tsdn, &emap_global.rtree, rtree_ctx,
 	    (uintptr_t)ptr, true);
 	assert(szind != SC_NSIZES);
 
@@ -211,7 +211,7 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
 
 	edata_t *edata;
 	szind_t szind;
-	if (rtree_edata_szind_read(tsdn, &extents_rtree, rtree_ctx,
+	if (rtree_edata_szind_read(tsdn, &emap_global.rtree, rtree_ctx,
 	    (uintptr_t)ptr, false, &edata, &szind)) {
 		return 0;
 	}
@@ -247,11 +247,11 @@ arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
 
 	szind_t szind;
 	bool slab;
-	rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr,
-	    true, &szind, &slab);
+	rtree_szind_slab_read(tsdn, &emap_global.rtree, rtree_ctx,
+	    (uintptr_t)ptr, true, &szind, &slab);
 
 	if (config_debug) {
-		edata_t *edata = rtree_edata_read(tsdn, &extents_rtree,
+		edata_t *edata = rtree_edata_read(tsdn, &emap_global.rtree,
 		    rtree_ctx, (uintptr_t)ptr, true);
 		assert(szind == edata_szind_get(edata));
 		assert(szind < SC_NSIZES);
@@ -302,13 +302,13 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 		assert(szind != SC_NSIZES);
 	} else {
 		rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn));
-		rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx,
+		rtree_szind_slab_read(tsdn, &emap_global.rtree, rtree_ctx,
 		    (uintptr_t)ptr, true, &szind, &slab);
 	}
 
 	if (config_debug) {
 		rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn));
-		edata_t *edata = rtree_edata_read(tsdn, &extents_rtree,
+		edata_t *edata = rtree_edata_read(tsdn, &emap_global.rtree,
 		    rtree_ctx, (uintptr_t)ptr, true);
 		assert(szind == edata_szind_get(edata));
 		assert(szind < SC_NSIZES);
@@ -345,7 +345,7 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
 		rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn,
 		    &rtree_ctx_fallback);
 
-		rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx,
+		rtree_szind_slab_read(tsdn, &emap_global.rtree, rtree_ctx,
 		    (uintptr_t)ptr, true, &szind, &slab);
 
 		assert(szind == sz_size2index(size));
@@ -353,7 +353,7 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
 
 		if (config_debug) {
 			edata_t *edata = rtree_edata_read(tsdn,
-			    &extents_rtree, rtree_ctx, (uintptr_t)ptr, true);
+			    &emap_global.rtree, rtree_ctx, (uintptr_t)ptr, true);
 			assert(szind == edata_szind_get(edata));
 			assert(slab == edata_slab_get(edata));
 		}
@@ -388,8 +388,8 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 			rtree_ctx_t rtree_ctx_fallback;
 			rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn,
 			    &rtree_ctx_fallback);
-			rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx,
-			    (uintptr_t)ptr, true, &local_ctx.szind,
+			rtree_szind_slab_read(tsdn, &emap_global.rtree,
+			    rtree_ctx, (uintptr_t)ptr, true, &local_ctx.szind,
 			    &local_ctx.slab);
 			assert(local_ctx.szind == sz_size2index(size));
 			alloc_ctx = &local_ctx;
@@ -407,10 +407,10 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 
 	if (config_debug) {
 		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn));
-		rtree_szind_slab_read(tsdn, &extents_rtree, rtree_ctx,
+		rtree_szind_slab_read(tsdn, &emap_global.rtree, rtree_ctx,
 		    (uintptr_t)ptr, true, &szind, &slab);
 		edata_t *edata = rtree_edata_read(tsdn,
-		    &extents_rtree, rtree_ctx, (uintptr_t)ptr, true);
+		    &emap_global.rtree, rtree_ctx, (uintptr_t)ptr, true);
 		assert(szind == edata_szind_get(edata));
 		assert(slab == edata_slab_get(edata));
 	}
diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
new file mode 100644
index 00000000..9a37b943
--- /dev/null
+++ b/include/jemalloc/internal/emap.h
@@ -0,0 +1,33 @@
+#ifndef JEMALLOC_INTERNAL_EMAP_H
+#define JEMALLOC_INTERNAL_EMAP_H
+
+#include "jemalloc/internal/mutex_pool.h"
+#include "jemalloc/internal/rtree.h"
+
+typedef struct emap_s emap_t;
+struct emap_s {
+	rtree_t rtree;
+	/* Keyed by the address of the edata_t being protected. */
+	mutex_pool_t mtx_pool;
+};
+
+extern emap_t emap_global;
+
+bool emap_init(emap_t *emap);
+
+void emap_lock_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata);
+void emap_unlock_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata);
+
+void emap_lock_edata2(tsdn_t *tsdn, emap_t *emap, edata_t *edata1,
+    edata_t *edata2);
+void emap_unlock_edata2(tsdn_t *tsdn, emap_t *emap, edata_t *edata1,
+    edata_t *edata2);
+
+edata_t *emap_lock_edata_from_addr(tsdn_t *tsdn, emap_t *emap,
+    rtree_ctx_t *rtree_ctx, void *addr, bool inactive_only);
+
+bool emap_rtree_leaf_elms_lookup(tsdn_t *tsdn, emap_t *emap,
+    rtree_ctx_t *rtree_ctx, const edata_t *edata, bool dependent,
+    bool init_missing, rtree_leaf_elm_t **r_elm_a, rtree_leaf_elm_t **r_elm_b);
+
+#endif /* JEMALLOC_INTERNAL_EMAP_H */
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index b89708a4..d0ba70b8 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -19,8 +19,6 @@
 #define LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT 6
 extern size_t opt_lg_extent_max_active_fit;
 
-extern rtree_t extents_rtree;
-
 edata_t *ecache_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
     bool slab, szind_t szind, bool *zero);
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index ebfb331b..00fb6042 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_INLINES_B_H
 #define JEMALLOC_INTERNAL_INLINES_B_H
 
+#include "jemalloc/internal/emap.h"
 #include "jemalloc/internal/extent.h"
 #include "jemalloc/internal/rtree.h"
 
@@ -81,7 +82,7 @@ iealloc(tsdn_t *tsdn, const void *ptr) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	return rtree_edata_read(tsdn, &extents_rtree, rtree_ctx,
+	return rtree_edata_read(tsdn, &emap_global.rtree, rtree_ctx,
 	    (uintptr_t)ptr, true);
 }
 
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 083bdcc9..b5fa1c02 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -45,7 +45,7 @@
 #define WITNESS_RANK_EXTENTS		15U
 #define WITNESS_RANK_EDATA_CACHE	16U
 
-#define WITNESS_RANK_EXTENT_POOL	17U
+#define WITNESS_RANK_EMAP		17U
 #define WITNESS_RANK_RTREE		18U
 #define WITNESS_RANK_BASE		19U
 #define WITNESS_RANK_ARENA_LARGE	20U
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index d8b48986..d98bb858 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -50,6 +50,7 @@
     <ClCompile Include="..\..\..\..\src\edata.c" />
     <ClCompile Include="..\..\..\..\src\edata_cache.c" />
     <ClCompile Include="..\..\..\..\src\ehooks.c" />
+    <ClCompile Include="..\..\..\..\src\emap.c" />
     <ClCompile Include="..\..\..\..\src\eset.c" />
     <ClCompile Include="..\..\..\..\src\extent.c" />
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 9b0445f6..fd3e11c0 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -37,6 +37,9 @@
     <ClCompile Include="..\..\..\..\src\div.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\emap.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\extent.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index b0d32d93..b59d411f 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -50,6 +50,7 @@
     <ClCompile Include="..\..\..\..\src\edata.c" />
     <ClCompile Include="..\..\..\..\src\edata_cache.c" />
     <ClCompile Include="..\..\..\..\src\ehooks.c" />
+    <ClCompile Include="..\..\..\..\src\emap.c" />
     <ClCompile Include="..\..\..\..\src\eset.c" />
     <ClCompile Include="..\..\..\..\src\extent.c" />
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 9b0445f6..fd3e11c0 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -37,6 +37,9 @@
     <ClCompile Include="..\..\..\..\src\div.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\emap.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\extent.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/arena.c b/src/arena.c
index 22348949..3206a9a6 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1112,8 +1112,9 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 		malloc_mutex_unlock(tsd_tsdn(tsd), &arena->large_mtx);
 		alloc_ctx_t alloc_ctx;
 		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
-		rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
-		    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
+		rtree_szind_slab_read(tsd_tsdn(tsd), &emap_global.rtree,
+		    rtree_ctx, (uintptr_t)ptr, true, &alloc_ctx.szind,
+		    &alloc_ctx.slab);
 		assert(alloc_ctx.szind != SC_NSIZES);
 
 		if (config_stats || (config_prof && opt_prof)) {
@@ -1601,13 +1602,13 @@ arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	edata_t *edata = rtree_edata_read(tsdn, &extents_rtree, rtree_ctx,
+	edata_t *edata = rtree_edata_read(tsdn, &emap_global.rtree, rtree_ctx,
 	    (uintptr_t)ptr, true);
 
 	szind_t szind = sz_size2index(usize);
 	edata_szind_set(edata, szind);
-	rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr,
-	    szind, false);
+	rtree_szind_slab_update(tsdn, &emap_global.rtree, rtree_ctx,
+	    (uintptr_t)ptr, szind, false);
 
 	prof_idump_rollback(tsdn, usize);
 
@@ -1622,8 +1623,8 @@ arena_prof_demote(tsdn_t *tsdn, edata_t *edata, const void *ptr) {
 	edata_szind_set(edata, SC_NBINS);
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-	rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx, (uintptr_t)ptr,
-	    SC_NBINS, false);
+	rtree_szind_slab_update(tsdn, &emap_global.rtree, rtree_ctx,
+	    (uintptr_t)ptr, SC_NBINS, false);
 
 	assert(isalloc(tsdn, ptr) == SC_LARGE_MINCLASS);
 
diff --git a/src/emap.c b/src/emap.c
new file mode 100644
index 00000000..ea3cce0f
--- /dev/null
+++ b/src/emap.c
@@ -0,0 +1,127 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/emap.h"
+
+emap_t emap_global;
+
+enum emap_lock_result_e {
+	emap_lock_result_success,
+	emap_lock_result_failure,
+	emap_lock_result_no_extent
+};
+typedef enum emap_lock_result_e emap_lock_result_t;
+
+bool
+emap_init(emap_t *emap) {
+	bool err;
+	err = rtree_new(&emap->rtree, true);
+	if (err) {
+		return true;
+	}
+	err = mutex_pool_init(&emap->mtx_pool, "emap_mutex_pool",
+	    WITNESS_RANK_EMAP);
+	if (err) {
+		return true;
+	}
+	return false;
+}
+
+void
+emap_lock_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
+	assert(edata != NULL);
+	mutex_pool_lock(tsdn, &emap->mtx_pool, (uintptr_t)edata);
+}
+
+void
+emap_unlock_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
+	assert(edata != NULL);
+	mutex_pool_unlock(tsdn, &emap->mtx_pool, (uintptr_t)edata);
+}
+
+void
+emap_lock_edata2(tsdn_t *tsdn, emap_t *emap, edata_t *edata1,
+    edata_t *edata2) {
+	assert(edata1 != NULL && edata2 != NULL);
+	mutex_pool_lock2(tsdn, &emap->mtx_pool, (uintptr_t)edata1,
+	    (uintptr_t)edata2);
+}
+
+void
+emap_unlock_edata2(tsdn_t *tsdn, emap_t *emap, edata_t *edata1,
+    edata_t *edata2) {
+	assert(edata1 != NULL && edata2 != NULL);
+	mutex_pool_unlock2(tsdn, &emap->mtx_pool, (uintptr_t)edata1,
+	    (uintptr_t)edata2);
+}
+
+static inline emap_lock_result_t
+emap_try_lock_rtree_leaf_elm(tsdn_t *tsdn, emap_t *emap, rtree_leaf_elm_t *elm,
+    edata_t **result, bool inactive_only) {
+	edata_t *edata1 = rtree_leaf_elm_edata_read(tsdn, &emap->rtree,
+	    elm, true);
+
+	/* Slab implies active extents and should be skipped. */
+	if (edata1 == NULL || (inactive_only && rtree_leaf_elm_slab_read(tsdn,
+	    &emap->rtree, elm, true))) {
+		return emap_lock_result_no_extent;
+	}
+
+	/*
+	 * It's possible that the extent changed out from under us, and with it
+	 * the leaf->edata mapping.  We have to recheck while holding the lock.
+	 */
+	emap_lock_edata(tsdn, emap, edata1);
+	edata_t *edata2 = rtree_leaf_elm_edata_read(tsdn, &emap->rtree, elm,
+	    true);
+
+	if (edata1 == edata2) {
+		*result = edata1;
+		return emap_lock_result_success;
+	} else {
+		emap_unlock_edata(tsdn, emap, edata1);
+		return emap_lock_result_failure;
+	}
+}
+
+/*
+ * Returns a pool-locked edata_t * if there's one associated with the given
+ * address, and NULL otherwise.
+ */
+edata_t *
+emap_lock_edata_from_addr(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
+    void *addr, bool inactive_only) {
+	edata_t *ret = NULL;
+	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, &emap->rtree,
+	    rtree_ctx, (uintptr_t)addr, false, false);
+	if (elm == NULL) {
+		return NULL;
+	}
+	emap_lock_result_t lock_result;
+	do {
+		lock_result = emap_try_lock_rtree_leaf_elm(tsdn, emap, elm,
+		    &ret, inactive_only);
+	} while (lock_result == emap_lock_result_failure);
+	return ret;
+}
+
+bool
+emap_rtree_leaf_elms_lookup(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
+    const edata_t *edata, bool dependent, bool init_missing,
+    rtree_leaf_elm_t **r_elm_a, rtree_leaf_elm_t **r_elm_b) {
+	*r_elm_a = rtree_leaf_elm_lookup(tsdn, &emap->rtree, rtree_ctx,
+	    (uintptr_t)edata_base_get(edata), dependent, init_missing);
+	if (!dependent && *r_elm_a == NULL) {
+		return true;
+	}
+	assert(*r_elm_a != NULL);
+
+	*r_elm_b = rtree_leaf_elm_lookup(tsdn, &emap->rtree, rtree_ctx,
+	    (uintptr_t)edata_last_get(edata), dependent, init_missing);
+	if (!dependent && *r_elm_b == NULL) {
+		return true;
+	}
+	assert(*r_elm_b != NULL);
+
+	return false;
+}
diff --git a/src/extent.c b/src/extent.c
index 07c0bd21..bbebf9ed 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -2,20 +2,15 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/emap.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/extent_mmap.h"
 #include "jemalloc/internal/ph.h"
-#include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/mutex_pool.h"
 
 /******************************************************************************/
 /* Data. */
 
-rtree_t		extents_rtree;
-/* Keyed by the address of the edata_t being protected. */
-mutex_pool_t	extent_mutex_pool;
-
 size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT;
 
 static bool extent_commit_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
@@ -58,88 +53,6 @@ static edata_t *extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
 
 /******************************************************************************/
 
-typedef enum {
-	lock_result_success,
-	lock_result_failure,
-	lock_result_no_extent
-} lock_result_t;
-
-static inline void
-extent_lock_edata(tsdn_t *tsdn, edata_t *edata) {
-	assert(edata != NULL);
-	mutex_pool_lock(tsdn, &extent_mutex_pool, (uintptr_t)edata);
-}
-
-static inline void
-extent_unlock_edata(tsdn_t *tsdn, edata_t *edata) {
-	assert(edata != NULL);
-	mutex_pool_unlock(tsdn, &extent_mutex_pool, (uintptr_t)edata);
-}
-
-static inline void
-extent_lock_edata2(tsdn_t *tsdn, edata_t *edata1, edata_t *edata2) {
-	assert(edata1 != NULL && edata2 != NULL);
-	mutex_pool_lock2(tsdn, &extent_mutex_pool, (uintptr_t)edata1,
-	    (uintptr_t)edata2);
-}
-
-static inline void
-extent_unlock_edata2(tsdn_t *tsdn, edata_t *edata1, edata_t *edata2) {
-	assert(edata1 != NULL && edata2 != NULL);
-	mutex_pool_unlock2(tsdn, &extent_mutex_pool, (uintptr_t)edata1,
-	    (uintptr_t)edata2);
-}
-
-static lock_result_t
-extent_rtree_leaf_elm_try_lock(tsdn_t *tsdn, rtree_leaf_elm_t *elm,
-    edata_t **result, bool inactive_only) {
-	edata_t *edata1 = rtree_leaf_elm_edata_read(tsdn, &extents_rtree,
-	    elm, true);
-
-	/* Slab implies active extents and should be skipped. */
-	if (edata1 == NULL || (inactive_only && rtree_leaf_elm_slab_read(tsdn,
-	    &extents_rtree, elm, true))) {
-		return lock_result_no_extent;
-	}
-
-	/*
-	 * It's possible that the extent changed out from under us, and with it
-	 * the leaf->edata mapping.  We have to recheck while holding the lock.
-	 */
-	extent_lock_edata(tsdn, edata1);
-	edata_t *edata2 = rtree_leaf_elm_edata_read(tsdn, &extents_rtree, elm,
-	    true);
-
-	if (edata1 == edata2) {
-		*result = edata1;
-		return lock_result_success;
-	} else {
-		extent_unlock_edata(tsdn, edata1);
-		return lock_result_failure;
-	}
-}
-
-/*
- * Returns a pool-locked edata_t * if there's one associated with the given
- * address, and NULL otherwise.
- */
-static edata_t *
-extent_lock_edata_from_addr(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, void *addr,
-    bool inactive_only) {
-	edata_t *ret = NULL;
-	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, &extents_rtree,
-	    rtree_ctx, (uintptr_t)addr, false, false);
-	if (elm == NULL) {
-		return NULL;
-	}
-	lock_result_t lock_result;
-	do {
-		lock_result = extent_rtree_leaf_elm_try_lock(tsdn, elm, &ret,
-		    inactive_only);
-	} while (lock_result == lock_result_failure);
-	return ret;
-}
-
 static void
 extent_addr_randomize(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
     size_t alignment) {
@@ -357,34 +270,14 @@ extent_activate_locked(tsdn_t *tsdn, ecache_t *ecache, edata_t *edata) {
 	edata_state_set(edata, extent_state_active);
 }
 
-static bool
-extent_rtree_leaf_elms_lookup(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
-    const edata_t *edata, bool dependent, bool init_missing,
-    rtree_leaf_elm_t **r_elm_a, rtree_leaf_elm_t **r_elm_b) {
-	*r_elm_a = rtree_leaf_elm_lookup(tsdn, &extents_rtree, rtree_ctx,
-	    (uintptr_t)edata_base_get(edata), dependent, init_missing);
-	if (!dependent && *r_elm_a == NULL) {
-		return true;
-	}
-	assert(*r_elm_a != NULL);
-
-	*r_elm_b = rtree_leaf_elm_lookup(tsdn, &extents_rtree, rtree_ctx,
-	    (uintptr_t)edata_last_get(edata), dependent, init_missing);
-	if (!dependent && *r_elm_b == NULL) {
-		return true;
-	}
-	assert(*r_elm_b != NULL);
-
-	return false;
-}
-
 static void
 extent_rtree_write_acquired(tsdn_t *tsdn, rtree_leaf_elm_t *elm_a,
     rtree_leaf_elm_t *elm_b, edata_t *edata, szind_t szind, bool slab) {
-	rtree_leaf_elm_write(tsdn, &extents_rtree, elm_a, edata, szind, slab);
+	rtree_leaf_elm_write(tsdn, &emap_global.rtree, elm_a, edata, szind,
+	    slab);
 	if (elm_b != NULL) {
-		rtree_leaf_elm_write(tsdn, &extents_rtree, elm_b, edata, szind,
-		    slab);
+		rtree_leaf_elm_write(tsdn, &emap_global.rtree, elm_b, edata,
+		    szind, slab);
 	}
 }
 
@@ -395,7 +288,7 @@ extent_interior_register(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, edata_t *edata,
 
 	/* Register interior. */
 	for (size_t i = 1; i < (edata_size_get(edata) >> LG_PAGE) - 1; i++) {
-		rtree_write(tsdn, &extents_rtree, rtree_ctx,
+		rtree_write(tsdn, &emap_global.rtree, rtree_ctx,
 		    (uintptr_t)edata_base_get(edata) + (uintptr_t)(i <<
 		    LG_PAGE), edata, szind, true);
 	}
@@ -448,11 +341,11 @@ extent_register_impl(tsdn_t *tsdn, edata_t *edata, bool gdump_add) {
 	 * We need to hold the lock to protect against a concurrent coalesce
 	 * operation that sees us in a partial state.
 	 */
-	extent_lock_edata(tsdn, edata);
+	emap_lock_edata(tsdn, &emap_global, edata);
 
-	if (extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, edata, false, true,
-	    &elm_a, &elm_b)) {
-		extent_unlock_edata(tsdn, edata);
+	if (emap_rtree_leaf_elms_lookup(tsdn, &emap_global, rtree_ctx, edata,
+	    false, true, &elm_a, &elm_b)) {
+		emap_unlock_edata(tsdn, &emap_global, edata);
 		return true;
 	}
 
@@ -463,7 +356,7 @@ extent_register_impl(tsdn_t *tsdn, edata_t *edata, bool gdump_add) {
 		extent_interior_register(tsdn, rtree_ctx, edata, szind);
 	}
 
-	extent_unlock_edata(tsdn, edata);
+	emap_unlock_edata(tsdn, &emap_global, edata);
 
 	if (config_prof && gdump_add) {
 		extent_gdump_add(tsdn, edata);
@@ -503,7 +396,7 @@ extent_interior_deregister(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
 	assert(edata_slab_get(edata));
 
 	for (i = 1; i < (edata_size_get(edata) >> LG_PAGE) - 1; i++) {
-		rtree_clear(tsdn, &extents_rtree, rtree_ctx,
+		rtree_clear(tsdn, &emap_global.rtree, rtree_ctx,
 		    (uintptr_t)edata_base_get(edata) + (uintptr_t)(i <<
 		    LG_PAGE));
 	}
@@ -517,10 +410,10 @@ extent_deregister_impl(tsdn_t *tsdn, edata_t *edata, bool gdump) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	rtree_leaf_elm_t *elm_a, *elm_b;
-	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, edata, true, false,
-	    &elm_a, &elm_b);
+	emap_rtree_leaf_elms_lookup(tsdn, &emap_global, rtree_ctx, edata,
+	    true, false, &elm_a, &elm_b);
 
-	extent_lock_edata(tsdn, edata);
+	emap_lock_edata(tsdn, &emap_global, edata);
 
 	extent_rtree_write_acquired(tsdn, elm_a, elm_b, NULL, SC_NSIZES, false);
 	if (edata_slab_get(edata)) {
@@ -528,7 +421,7 @@ extent_deregister_impl(tsdn_t *tsdn, edata_t *edata, bool gdump) {
 		edata_slab_set(edata, false);
 	}
 
-	extent_unlock_edata(tsdn, edata);
+	emap_unlock_edata(tsdn, &emap_global, edata);
 
 	if (config_prof && gdump) {
 		extent_gdump_sub(tsdn, edata);
@@ -577,8 +470,8 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 	edata_t *edata;
 	if (new_addr != NULL) {
-		edata = extent_lock_edata_from_addr(tsdn, rtree_ctx, new_addr,
-		    false);
+		edata = emap_lock_edata_from_addr(tsdn, &emap_global, rtree_ctx,
+		    new_addr, false);
 		if (edata != NULL) {
 			/*
 			 * We might null-out edata to report an error, but we
@@ -592,7 +485,7 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			    != ecache->state) {
 				edata = NULL;
 			}
-			extent_unlock_edata(tsdn, unlock_edata);
+			emap_unlock_edata(tsdn, &emap_global, unlock_edata);
 		}
 	} else {
 		edata = eset_fit(&ecache->eset, esize, alignment,
@@ -692,11 +585,12 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		 */
 		edata_szind_set(*edata, szind);
 		if (szind != SC_NSIZES) {
-			rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx,
-			    (uintptr_t)edata_addr_get(*edata), szind, slab);
+			rtree_szind_slab_update(tsdn, &emap_global.rtree,
+			    rtree_ctx, (uintptr_t)edata_addr_get(*edata), szind,
+			    slab);
 			if (slab && edata_size_get(*edata) > PAGE) {
-				rtree_szind_slab_update(tsdn, &extents_rtree,
-				    rtree_ctx,
+				rtree_szind_slab_update(tsdn,
+				    &emap_global.rtree, rtree_ctx,
 				    (uintptr_t)edata_past_get(*edata) -
 				    (uintptr_t)PAGE, szind, slab);
 			}
@@ -760,8 +654,8 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			extent_deregister_no_gdump_sub(tsdn, to_leak);
 			extents_abandon_vm(tsdn, arena, ehooks, ecache, to_leak,
 			    growing_retained);
-			assert(extent_lock_edata_from_addr(tsdn, rtree_ctx, leak,
-			    false) == NULL);
+			assert(emap_lock_edata_from_addr(tsdn, &emap_global,
+			    rtree_ctx, leak, false) == NULL);
 		}
 		return NULL;
 	}
@@ -1119,8 +1013,8 @@ extent_try_coalesce_impl(tsdn_t *tsdn, edata_cache_t *edata_cache,
 		again = false;
 
 		/* Try to coalesce forward. */
-		edata_t *next = extent_lock_edata_from_addr(tsdn, rtree_ctx,
-		    edata_past_get(edata), inactive_only);
+		edata_t *next = emap_lock_edata_from_addr(tsdn, &emap_global,
+		    rtree_ctx, edata_past_get(edata), inactive_only);
 		if (next != NULL) {
 			/*
 			 * ecache->mtx only protects against races for
@@ -1130,7 +1024,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, edata_cache_t *edata_cache,
 			bool can_coalesce = extent_can_coalesce(ecache,
 			    edata, next);
 
-			extent_unlock_edata(tsdn, next);
+			emap_unlock_edata(tsdn, &emap_global, next);
 
 			if (can_coalesce && !extent_coalesce(tsdn, edata_cache,
 			    ehooks, ecache, edata, next, true,
@@ -1145,12 +1039,12 @@ extent_try_coalesce_impl(tsdn_t *tsdn, edata_cache_t *edata_cache,
 		}
 
 		/* Try to coalesce backward. */
-		edata_t *prev = extent_lock_edata_from_addr(tsdn, rtree_ctx,
-		    edata_before_get(edata), inactive_only);
+		edata_t *prev = emap_lock_edata_from_addr(tsdn, &emap_global,
+		    rtree_ctx, edata_before_get(edata), inactive_only);
 		if (prev != NULL) {
 			bool can_coalesce = extent_can_coalesce(ecache, edata,
 			    prev);
-			extent_unlock_edata(tsdn, prev);
+			emap_unlock_edata(tsdn, &emap_global, prev);
 
 			if (can_coalesce && !extent_coalesce(tsdn, edata_cache,
 			    ehooks, ecache, edata, prev, false,
@@ -1210,7 +1104,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 		edata_slab_set(edata, false);
 	}
 
-	assert(rtree_edata_read(tsdn, &extents_rtree, rtree_ctx,
+	assert(rtree_edata_read(tsdn, &emap_global.rtree, rtree_ctx,
 	    (uintptr_t)edata_base_get(edata), true) == edata);
 
 	if (!ecache->delay_coalesce) {
@@ -1449,19 +1343,19 @@ extent_split_impl(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
 		    edata_committed_get(edata), edata_dumpable_get(edata),
 		    EXTENT_NOT_HEAD);
 
-		extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, &lead, false,
-		    true, &lead_elm_a, &lead_elm_b);
+		emap_rtree_leaf_elms_lookup(tsdn, &emap_global, rtree_ctx,
+		    &lead, false, true, &lead_elm_a, &lead_elm_b);
 	}
 	rtree_leaf_elm_t *trail_elm_a, *trail_elm_b;
-	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, trail, false, true,
-	    &trail_elm_a, &trail_elm_b);
+	emap_rtree_leaf_elms_lookup(tsdn, &emap_global, rtree_ctx, trail, false,
+	    true, &trail_elm_a, &trail_elm_b);
 
 	if (lead_elm_a == NULL || lead_elm_b == NULL || trail_elm_a == NULL
 	    || trail_elm_b == NULL) {
 		goto label_error_b;
 	}
 
-	extent_lock_edata2(tsdn, edata, trail);
+	emap_lock_edata2(tsdn, &emap_global, edata, trail);
 
 	bool err = ehooks_split(tsdn, ehooks, edata_base_get(edata),
 	    size_a + size_b, size_a, size_b, edata_committed_get(edata));
@@ -1478,11 +1372,11 @@ extent_split_impl(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
 	extent_rtree_write_acquired(tsdn, trail_elm_a, trail_elm_b, trail,
 	    szind_b, slab_b);
 
-	extent_unlock_edata2(tsdn, edata, trail);
+	emap_unlock_edata2(tsdn, &emap_global, edata, trail);
 
 	return trail;
 label_error_c:
-	extent_unlock_edata2(tsdn, edata, trail);
+	emap_unlock_edata2(tsdn, &emap_global, edata, trail);
 label_error_b:
 	edata_cache_put(tsdn, edata_cache, trail);
 label_error_a:
@@ -1523,19 +1417,19 @@ extent_merge_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_cache_t *edata_cache,
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	rtree_leaf_elm_t *a_elm_a, *a_elm_b, *b_elm_a, *b_elm_b;
-	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, a, true, false, &a_elm_a,
-	    &a_elm_b);
-	extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, b, true, false, &b_elm_a,
-	    &b_elm_b);
+	emap_rtree_leaf_elms_lookup(tsdn, &emap_global, rtree_ctx, a, true,
+	    false, &a_elm_a, &a_elm_b);
+	emap_rtree_leaf_elms_lookup(tsdn, &emap_global, rtree_ctx, b, true,
+	    false, &b_elm_a, &b_elm_b);
 
-	extent_lock_edata2(tsdn, a, b);
+	emap_lock_edata2(tsdn, &emap_global, a, b);
 
 	if (a_elm_b != NULL) {
-		rtree_leaf_elm_write(tsdn, &extents_rtree, a_elm_b, NULL,
+		rtree_leaf_elm_write(tsdn, &emap_global.rtree, a_elm_b, NULL,
 		    SC_NSIZES, false);
 	}
 	if (b_elm_b != NULL) {
-		rtree_leaf_elm_write(tsdn, &extents_rtree, b_elm_a, NULL,
+		rtree_leaf_elm_write(tsdn, &emap_global.rtree, b_elm_a, NULL,
 		    SC_NSIZES, false);
 	} else {
 		b_elm_b = b_elm_a;
@@ -1550,7 +1444,7 @@ extent_merge_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_cache_t *edata_cache,
 	extent_rtree_write_acquired(tsdn, a_elm_a, b_elm_b, a, SC_NSIZES,
 	    false);
 
-	extent_unlock_edata2(tsdn, a, b);
+	emap_unlock_edata2(tsdn, &emap_global, a, b);
 
 	edata_cache_put(tsdn, edata_cache, b);
 
@@ -1567,15 +1461,6 @@ bool
 extent_boot(void) {
 	assert(sizeof(slab_data_t) >= sizeof(e_prof_info_t));
 
-	if (rtree_new(&extents_rtree, true)) {
-		return true;
-	}
-
-	if (mutex_pool_init(&extent_mutex_pool, "extent_mutex_pool",
-	    WITNESS_RANK_EXTENT_POOL)) {
-		return true;
-	}
-
 	if (have_dss) {
 		extent_dss_boot();
 	}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index ddb29e38..8f34989a 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -6,6 +6,7 @@
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/buf_writer.h"
 #include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/emap.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/extent_mmap.h"
 #include "jemalloc/internal/hook.h"
@@ -1571,6 +1572,9 @@ malloc_init_hard_a0_locked() {
 	if (base_boot(TSDN_NULL)) {
 		return true;
 	}
+	if (emap_init(&emap_global)) {
+		return true;
+	}
 	if (extent_boot()) {
 		return true;
 	}
@@ -2565,7 +2569,7 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 
 	alloc_ctx_t alloc_ctx;
 	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
-	rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
+	rtree_szind_slab_read(tsd_tsdn(tsd), &emap_global.rtree, rtree_ctx,
 	    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
 	assert(alloc_ctx.szind != SC_NSIZES);
 
@@ -2619,15 +2623,16 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 				alloc_ctx_t dbg_ctx;
 				rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
 				rtree_szind_slab_read(tsd_tsdn(tsd),
-				    &extents_rtree, rtree_ctx, (uintptr_t)ptr,
-				    true, &dbg_ctx.szind, &dbg_ctx.slab);
+				    &emap_global.rtree, rtree_ctx,
+				    (uintptr_t)ptr, true, &dbg_ctx.szind,
+				    &dbg_ctx.slab);
 				assert(dbg_ctx.szind == ctx->szind);
 				assert(dbg_ctx.slab == ctx->slab);
 			}
 		} else if (opt_prof) {
 			ctx = &alloc_ctx;
 			rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
-			rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree,
+			rtree_szind_slab_read(tsd_tsdn(tsd), &emap_global.rtree,
 			    rtree_ctx, (uintptr_t)ptr, true, &ctx->szind,
 			    &ctx->slab);
 			/* Small alloc may have !slab (sampled). */
@@ -2699,7 +2704,8 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 		bool slab;
 		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
 		bool res = rtree_szind_slab_read_fast(tsd_tsdn(tsd),
-		    &extents_rtree, rtree_ctx, (uintptr_t)ptr, &szind, &slab);
+		    &emap_global.rtree, rtree_ctx, (uintptr_t)ptr, &szind,
+		    &slab);
 
 		/* Note: profiled objects will have alloc_ctx.slab set */
 		if (unlikely(!res || !slab)) {
@@ -3142,7 +3148,7 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 
 	alloc_ctx_t alloc_ctx;
 	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
-	rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
+	rtree_szind_slab_read(tsd_tsdn(tsd), &emap_global.rtree, rtree_ctx,
 	    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
 	assert(alloc_ctx.szind != SC_NSIZES);
 	old_usize = sz_index2size(alloc_ctx.szind);
@@ -3421,7 +3427,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 
 	alloc_ctx_t alloc_ctx;
 	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
-	rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx,
+	rtree_szind_slab_read(tsd_tsdn(tsd), &emap_global.rtree, rtree_ctx,
 	    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
 	assert(alloc_ctx.szind != SC_NSIZES);
 	old_usize = sz_index2size(alloc_ctx.szind);
diff --git a/src/large.c b/src/large.c
index e133e193..2e520981 100644
--- a/src/large.c
+++ b/src/large.c
@@ -179,7 +179,7 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	szind_t szind = sz_size2index(usize);
 	edata_szind_set(edata, szind);
-	rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx,
+	rtree_szind_slab_update(tsdn, &emap_global.rtree, rtree_ctx,
 	    (uintptr_t)edata_addr_get(edata), szind, false);
 
 	if (config_stats && new_mapping) {
diff --git a/src/tcache.c b/src/tcache.c
index e8a4cc5f..9146f244 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -129,7 +129,7 @@ tbin_edatas_lookup_size_check(tsdn_t *tsdn, cache_bin_t *tbin, szind_t binind,
 	size_t sz_sum = binind * nflush;
 	void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
 	for (unsigned i = 0 ; i < nflush; i++) {
-		rtree_edata_szind_read(tsdn, &extents_rtree,
+		rtree_edata_szind_read(tsdn, &emap_global.rtree,
 		    rtree_ctx, (uintptr_t)*(bottom_item - i), true,
 		    &edatas[i], &szind);
 		sz_sum -= szind;
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index 854799da..a1f1d07c 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -65,7 +65,7 @@ vsalloc(tsdn_t *tsdn, const void *ptr) {
 
 	edata_t *edata;
 	szind_t szind;
-	if (rtree_edata_szind_read(tsdn, &extents_rtree, rtree_ctx,
+	if (rtree_edata_szind_read(tsdn, &emap_global.rtree, rtree_ctx,
 	    (uintptr_t)ptr, false, &edata, &szind)) {
 		return 0;
 	}

From ca21ce4071d14b3cbbb88697bfd76a30b9de7ac8 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 30 Jan 2020 12:31:19 -0800
Subject: [PATCH 1534/2608] Emap: Move in write_acquired from extent.

---
 include/jemalloc/internal/emap.h |  5 +++++
 src/emap.c                       | 10 ++++++++++
 src/extent.c                     | 29 ++++++++++-------------------
 3 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 9a37b943..b9624d18 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -30,4 +30,9 @@ bool emap_rtree_leaf_elms_lookup(tsdn_t *tsdn, emap_t *emap,
     rtree_ctx_t *rtree_ctx, const edata_t *edata, bool dependent,
     bool init_missing, rtree_leaf_elm_t **r_elm_a, rtree_leaf_elm_t **r_elm_b);
 
+/* Only temporarily public; this will be internal eventually. */
+void emap_rtree_write_acquired(tsdn_t *tsdn, emap_t *emap,
+    rtree_leaf_elm_t *elm_a, rtree_leaf_elm_t *elm_b, edata_t *edata,
+    szind_t szind, bool slab);
+
 #endif /* JEMALLOC_INTERNAL_EMAP_H */
diff --git a/src/emap.c b/src/emap.c
index ea3cce0f..4ed9ff11 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -125,3 +125,13 @@ emap_rtree_leaf_elms_lookup(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
 
 	return false;
 }
+
+void
+emap_rtree_write_acquired(tsdn_t *tsdn, emap_t *emap, rtree_leaf_elm_t *elm_a,
+    rtree_leaf_elm_t *elm_b, edata_t *edata, szind_t szind, bool slab) {
+	rtree_leaf_elm_write(tsdn, &emap->rtree, elm_a, edata, szind, slab);
+	if (elm_b != NULL) {
+		rtree_leaf_elm_write(tsdn, &emap->rtree, elm_b, edata, szind,
+		    slab);
+	}
+}
diff --git a/src/extent.c b/src/extent.c
index bbebf9ed..fc449254 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -270,17 +270,6 @@ extent_activate_locked(tsdn_t *tsdn, ecache_t *ecache, edata_t *edata) {
 	edata_state_set(edata, extent_state_active);
 }
 
-static void
-extent_rtree_write_acquired(tsdn_t *tsdn, rtree_leaf_elm_t *elm_a,
-    rtree_leaf_elm_t *elm_b, edata_t *edata, szind_t szind, bool slab) {
-	rtree_leaf_elm_write(tsdn, &emap_global.rtree, elm_a, edata, szind,
-	    slab);
-	if (elm_b != NULL) {
-		rtree_leaf_elm_write(tsdn, &emap_global.rtree, elm_b, edata,
-		    szind, slab);
-	}
-}
-
 static void
 extent_interior_register(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, edata_t *edata,
     szind_t szind) {
@@ -351,7 +340,8 @@ extent_register_impl(tsdn_t *tsdn, edata_t *edata, bool gdump_add) {
 
 	szind_t szind = edata_szind_get_maybe_invalid(edata);
 	bool slab = edata_slab_get(edata);
-	extent_rtree_write_acquired(tsdn, elm_a, elm_b, edata, szind, slab);
+	emap_rtree_write_acquired(tsdn, &emap_global, elm_a, elm_b, edata,
+	    szind, slab);
 	if (slab) {
 		extent_interior_register(tsdn, rtree_ctx, edata, szind);
 	}
@@ -415,7 +405,8 @@ extent_deregister_impl(tsdn_t *tsdn, edata_t *edata, bool gdump) {
 
 	emap_lock_edata(tsdn, &emap_global, edata);
 
-	extent_rtree_write_acquired(tsdn, elm_a, elm_b, NULL, SC_NSIZES, false);
+	emap_rtree_write_acquired(tsdn, &emap_global, elm_a, elm_b, NULL,
+	    SC_NSIZES, false);
 	if (edata_slab_get(edata)) {
 		extent_interior_deregister(tsdn, rtree_ctx, edata);
 		edata_slab_set(edata, false);
@@ -1367,10 +1358,10 @@ extent_split_impl(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
 	edata_size_set(edata, size_a);
 	edata_szind_set(edata, szind_a);
 
-	extent_rtree_write_acquired(tsdn, lead_elm_a, lead_elm_b, edata,
-	    szind_a, slab_a);
-	extent_rtree_write_acquired(tsdn, trail_elm_a, trail_elm_b, trail,
-	    szind_b, slab_b);
+	emap_rtree_write_acquired(tsdn, &emap_global, lead_elm_a, lead_elm_b,
+	    edata, szind_a, slab_a);
+	emap_rtree_write_acquired(tsdn, &emap_global, trail_elm_a, trail_elm_b,
+	    trail, szind_b, slab_b);
 
 	emap_unlock_edata2(tsdn, &emap_global, edata, trail);
 
@@ -1441,8 +1432,8 @@ extent_merge_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_cache_t *edata_cache,
 	    edata_sn_get(a) : edata_sn_get(b));
 	edata_zeroed_set(a, edata_zeroed_get(a) && edata_zeroed_get(b));
 
-	extent_rtree_write_acquired(tsdn, a_elm_a, b_elm_b, a, SC_NSIZES,
-	    false);
+	emap_rtree_write_acquired(tsdn, &emap_global, a_elm_a, b_elm_b, a,
+	    SC_NSIZES, false);
 
 	emap_unlock_edata2(tsdn, &emap_global, a, b);
 

From d05b61db4a4ac9ba498d2a478f65035935d776ba Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 30 Jan 2020 12:40:07 -0800
Subject: [PATCH 1535/2608] Emap: Move extent boundary registration in.

---
 include/jemalloc/internal/emap.h |  8 ++++++++
 src/emap.c                       | 13 +++++++++++++
 src/extent.c                     | 12 +++++-------
 3 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index b9624d18..93fa472a 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -35,4 +35,12 @@ void emap_rtree_write_acquired(tsdn_t *tsdn, emap_t *emap,
     rtree_leaf_elm_t *elm_a, rtree_leaf_elm_t *elm_b, edata_t *edata,
     szind_t szind, bool slab);
 
+/*
+ * Associate the given edata with its beginning and end address, setting the
+ * szind and slab info appropriately.
+ * Returns true on error (i.e. resource exhaustion).
+ */
+bool emap_register_boundary(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
+    edata_t *edata, szind_t szind, bool slab);
+
 #endif /* JEMALLOC_INTERNAL_EMAP_H */
diff --git a/src/emap.c b/src/emap.c
index 4ed9ff11..4371c4a2 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -135,3 +135,16 @@ emap_rtree_write_acquired(tsdn_t *tsdn, emap_t *emap, rtree_leaf_elm_t *elm_a,
 		    slab);
 	}
 }
+
+bool
+emap_register_boundary(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
+    edata_t *edata, szind_t szind, bool slab) {
+	rtree_leaf_elm_t *elm_a, *elm_b;
+	bool err = emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, edata,
+	    false, true, &elm_a, &elm_b);
+	if (err) {
+		return true;
+	}
+	emap_rtree_write_acquired(tsdn, emap, elm_a, elm_b, edata, szind, slab);
+	return false;
+}
diff --git a/src/extent.c b/src/extent.c
index fc449254..4c4e16a2 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -324,7 +324,6 @@ static bool
 extent_register_impl(tsdn_t *tsdn, edata_t *edata, bool gdump_add) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-	rtree_leaf_elm_t *elm_a, *elm_b;
 
 	/*
 	 * We need to hold the lock to protect against a concurrent coalesce
@@ -332,16 +331,15 @@ extent_register_impl(tsdn_t *tsdn, edata_t *edata, bool gdump_add) {
 	 */
 	emap_lock_edata(tsdn, &emap_global, edata);
 
-	if (emap_rtree_leaf_elms_lookup(tsdn, &emap_global, rtree_ctx, edata,
-	    false, true, &elm_a, &elm_b)) {
+	szind_t szind = edata_szind_get_maybe_invalid(edata);
+	bool slab = edata_slab_get(edata);
+
+	if (emap_register_boundary(tsdn, &emap_global, rtree_ctx, edata, szind,
+	    slab)) {
 		emap_unlock_edata(tsdn, &emap_global, edata);
 		return true;
 	}
 
-	szind_t szind = edata_szind_get_maybe_invalid(edata);
-	bool slab = edata_slab_get(edata);
-	emap_rtree_write_acquired(tsdn, &emap_global, elm_a, elm_b, edata,
-	    szind, slab);
 	if (slab) {
 		extent_interior_register(tsdn, rtree_ctx, edata, szind);
 	}

From 9b5ca0b09df207de4abe02ccaedd018fc2deed77 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 30 Jan 2020 13:32:38 -0800
Subject: [PATCH 1536/2608] Emap: Move in slab interior registration.

---
 include/jemalloc/internal/emap.h | 22 ++++++++++++++++++++++
 src/emap.c                       | 13 +++++++++++++
 src/extent.c                     | 22 ++++++----------------
 3 files changed, 41 insertions(+), 16 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 93fa472a..e8b422e7 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -43,4 +43,26 @@ void emap_rtree_write_acquired(tsdn_t *tsdn, emap_t *emap,
 bool emap_register_boundary(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
     edata_t *edata, szind_t szind, bool slab);
 
+/*
+ * Does the same thing, but with the interior of the range, for slab
+ * allocations.
+ *
+ * You might wonder why we don't just have a single emap_register function that
+ * does both depending on the value of 'slab'.  The answer is twofold:
+ * - As a practical matter, in places like the extract->split->commit pathway,
+ *   we defer the interior operation until we're sure that the commit won't fail
+ *   (but we have to register the split boundaries there).
+ * - In general, we're trying to move to a world where the page-specific
+ *   allocator doesn't know as much about how the pages it allocates will be
+ *   used, and passing a 'slab' parameter everywhere makes that more
+ *   complicated.
+ *
+ * Unlike the boundary version, this function can't fail; this is because slabs
+ * can't get big enough to touch a new page that neither of the boundaries
+ * touched, so no allocation is necessary to fill the interior once the boundary
+ * has been touched.
+ */
+void emap_register_interior(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
+    edata_t *edata, szind_t szind);
+
 #endif /* JEMALLOC_INTERNAL_EMAP_H */
diff --git a/src/emap.c b/src/emap.c
index 4371c4a2..0a37d17e 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -148,3 +148,16 @@ emap_register_boundary(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
 	emap_rtree_write_acquired(tsdn, emap, elm_a, elm_b, edata, szind, slab);
 	return false;
 }
+
+void
+emap_register_interior(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
+    edata_t *edata, szind_t szind) {
+	assert(edata_slab_get(edata));
+
+	/* Register interior. */
+	for (size_t i = 1; i < (edata_size_get(edata) >> LG_PAGE) - 1; i++) {
+		rtree_write(tsdn, &emap->rtree, rtree_ctx,
+		    (uintptr_t)edata_base_get(edata) + (uintptr_t)(i <<
+		    LG_PAGE), edata, szind, true);
+	}
+}
diff --git a/src/extent.c b/src/extent.c
index 4c4e16a2..9975dd25 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -270,19 +270,6 @@ extent_activate_locked(tsdn_t *tsdn, ecache_t *ecache, edata_t *edata) {
 	edata_state_set(edata, extent_state_active);
 }
 
-static void
-extent_interior_register(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, edata_t *edata,
-    szind_t szind) {
-	assert(edata_slab_get(edata));
-
-	/* Register interior. */
-	for (size_t i = 1; i < (edata_size_get(edata) >> LG_PAGE) - 1; i++) {
-		rtree_write(tsdn, &emap_global.rtree, rtree_ctx,
-		    (uintptr_t)edata_base_get(edata) + (uintptr_t)(i <<
-		    LG_PAGE), edata, szind, true);
-	}
-}
-
 static void
 extent_gdump_add(tsdn_t *tsdn, const edata_t *edata) {
 	cassert(config_prof);
@@ -341,7 +328,8 @@ extent_register_impl(tsdn_t *tsdn, edata_t *edata, bool gdump_add) {
 	}
 
 	if (slab) {
-		extent_interior_register(tsdn, rtree_ctx, edata, szind);
+		emap_register_interior(tsdn, &emap_global, rtree_ctx, edata,
+		    szind);
 	}
 
 	emap_unlock_edata(tsdn, &emap_global, edata);
@@ -704,7 +692,8 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 	assert(edata_state_get(edata) == extent_state_active);
 	if (slab) {
 		edata_slab_set(edata, slab);
-		extent_interior_register(tsdn, rtree_ctx, edata, szind);
+		emap_register_interior(tsdn, &emap_global, rtree_ctx, edata,
+		    szind);
 	}
 
 	if (*zero) {
@@ -867,7 +856,8 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		    &rtree_ctx_fallback);
 
 		edata_slab_set(edata, true);
-		extent_interior_register(tsdn, rtree_ctx, edata, szind);
+		emap_register_interior(tsdn, &emap_global, rtree_ctx, edata,
+		    szind);
 	}
 	if (*zero && !edata_zeroed_get(edata)) {
 		void *addr = edata_base_get(edata);

From 6513d9d923d4e32775612614326ff1889807c840 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 30 Jan 2020 14:55:36 -0800
Subject: [PATCH 1537/2608] Emap: Move over deregistration boundary functions.

---
 include/jemalloc/internal/emap.h |  3 +++
 src/emap.c                       | 11 +++++++++++
 src/extent.c                     |  8 +-------
 3 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index e8b422e7..eef33f27 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -65,4 +65,7 @@ bool emap_register_boundary(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
 void emap_register_interior(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
     edata_t *edata, szind_t szind);
 
+void emap_deregister_boundary(tsdn_t *tsdn, emap_t *emap,
+    rtree_ctx_t *rtree_ctx, edata_t *edata);
+
 #endif /* JEMALLOC_INTERNAL_EMAP_H */
diff --git a/src/emap.c b/src/emap.c
index 0a37d17e..d54cf7e9 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -161,3 +161,14 @@ emap_register_interior(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
 		    LG_PAGE), edata, szind, true);
 	}
 }
+
+void
+emap_deregister_boundary(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
+    edata_t *edata) {
+	rtree_leaf_elm_t *elm_a, *elm_b;
+
+	emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, edata,
+	    true, false, &elm_a, &elm_b);
+	emap_rtree_write_acquired(tsdn, emap, elm_a, elm_b, NULL, SC_NSIZES,
+	    false);
+}
diff --git a/src/extent.c b/src/extent.c
index 9975dd25..2b2ba7ea 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -385,19 +385,13 @@ static void
 extent_deregister_impl(tsdn_t *tsdn, edata_t *edata, bool gdump) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-	rtree_leaf_elm_t *elm_a, *elm_b;
-	emap_rtree_leaf_elms_lookup(tsdn, &emap_global, rtree_ctx, edata,
-	    true, false, &elm_a, &elm_b);
 
 	emap_lock_edata(tsdn, &emap_global, edata);
-
-	emap_rtree_write_acquired(tsdn, &emap_global, elm_a, elm_b, NULL,
-	    SC_NSIZES, false);
+	emap_deregister_boundary(tsdn, &emap_global, rtree_ctx, edata);
 	if (edata_slab_get(edata)) {
 		extent_interior_deregister(tsdn, rtree_ctx, edata);
 		edata_slab_set(edata, false);
 	}
-
 	emap_unlock_edata(tsdn, &emap_global, edata);
 
 	if (config_prof && gdump) {

From 44f5f5360598b57b9d701f6b544f5cd2acd4df9c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 30 Jan 2020 15:02:52 -0800
Subject: [PATCH 1538/2608] Emap: Move over deregistration functions.

---
 include/jemalloc/internal/emap.h |  2 ++
 src/emap.c                       | 11 +++++++++++
 src/extent.c                     | 25 ++-----------------------
 3 files changed, 15 insertions(+), 23 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index eef33f27..d28a5f79 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -67,5 +67,7 @@ void emap_register_interior(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
 
 void emap_deregister_boundary(tsdn_t *tsdn, emap_t *emap,
     rtree_ctx_t *rtree_ctx, edata_t *edata);
+void emap_deregister_interior(tsdn_t *tsdn, emap_t *emap,
+    rtree_ctx_t *rtree_ctx, edata_t *edata);
 
 #endif /* JEMALLOC_INTERNAL_EMAP_H */
diff --git a/src/emap.c b/src/emap.c
index d54cf7e9..92814fcc 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -172,3 +172,14 @@ emap_deregister_boundary(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
 	emap_rtree_write_acquired(tsdn, emap, elm_a, elm_b, NULL, SC_NSIZES,
 	    false);
 }
+
+void
+emap_deregister_interior(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
+    edata_t *edata) {
+	assert(edata_slab_get(edata));
+	for (size_t i = 1; i < (edata_size_get(edata) >> LG_PAGE) - 1; i++) {
+		rtree_clear(tsdn, &emap->rtree, rtree_ctx,
+		    (uintptr_t)edata_base_get(edata) + (uintptr_t)(i <<
+		    LG_PAGE));
+	}
+}
diff --git a/src/extent.c b/src/extent.c
index 2b2ba7ea..bb88c202 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -357,27 +357,6 @@ extent_reregister(tsdn_t *tsdn, edata_t *edata) {
 	assert(!err);
 }
 
-/*
- * Removes all pointers to the given extent from the global rtree indices for
- * its interior.  This is relevant for slab extents, for which we need to do
- * metadata lookups at places other than the head of the extent.  We deregister
- * on the interior, then, when an extent moves from being an active slab to an
- * inactive state.
- */
-static void
-extent_interior_deregister(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx,
-    edata_t *edata) {
-	size_t i;
-
-	assert(edata_slab_get(edata));
-
-	for (i = 1; i < (edata_size_get(edata) >> LG_PAGE) - 1; i++) {
-		rtree_clear(tsdn, &emap_global.rtree, rtree_ctx,
-		    (uintptr_t)edata_base_get(edata) + (uintptr_t)(i <<
-		    LG_PAGE));
-	}
-}
-
 /*
  * Removes all pointers to the given extent from the global rtree.
  */
@@ -389,7 +368,7 @@ extent_deregister_impl(tsdn_t *tsdn, edata_t *edata, bool gdump) {
 	emap_lock_edata(tsdn, &emap_global, edata);
 	emap_deregister_boundary(tsdn, &emap_global, rtree_ctx, edata);
 	if (edata_slab_get(edata)) {
-		extent_interior_deregister(tsdn, rtree_ctx, edata);
+		emap_deregister_interior(tsdn, &emap_global, rtree_ctx, edata);
 		edata_slab_set(edata, false);
 	}
 	emap_unlock_edata(tsdn, &emap_global, edata);
@@ -1073,7 +1052,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 
 	edata_szind_set(edata, SC_NSIZES);
 	if (edata_slab_get(edata)) {
-		extent_interior_deregister(tsdn, rtree_ctx, edata);
+		emap_deregister_interior(tsdn, &emap_global, rtree_ctx, edata);
 		edata_slab_set(edata, false);
 	}
 

From 7c7b7020640488f26fb81143ab2ca7c74377580b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 3 Feb 2020 13:27:21 -0800
Subject: [PATCH 1539/2608] Emap: Move over metadata splitting logic.

---
 include/jemalloc/internal/emap.h | 17 ++++++++++
 src/emap.c                       | 53 ++++++++++++++++++++++++++++++++
 src/extent.c                     | 44 ++++++--------------------
 3 files changed, 79 insertions(+), 35 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index d28a5f79..e5188d49 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -70,4 +70,21 @@ void emap_deregister_boundary(tsdn_t *tsdn, emap_t *emap,
 void emap_deregister_interior(tsdn_t *tsdn, emap_t *emap,
     rtree_ctx_t *rtree_ctx, edata_t *edata);
 
+typedef struct emap_split_prepare_s emap_split_prepare_t;
+struct emap_split_prepare_s {
+	rtree_leaf_elm_t *lead_elm_a;
+	rtree_leaf_elm_t *lead_elm_b;
+	rtree_leaf_elm_t *trail_elm_a;
+	rtree_leaf_elm_t *trail_elm_b;
+};
+
+bool emap_split_prepare(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
+    emap_split_prepare_t *split_prepare, edata_t *edata, size_t size_a,
+    szind_t szind_a, bool slab_a, edata_t *trail, size_t size_b,
+    szind_t szind_b, bool slab_b, unsigned ind_b);
+void emap_split_commit(tsdn_t *tsdn, emap_t *emap,
+    emap_split_prepare_t *split_prepare, edata_t *lead, size_t size_a,
+    szind_t szind_a, bool slab_a, edata_t *trail, size_t size_b,
+    szind_t szind_b, bool slab_b);
+
 #endif /* JEMALLOC_INTERNAL_EMAP_H */
diff --git a/src/emap.c b/src/emap.c
index 92814fcc..45673c23 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -183,3 +183,56 @@ emap_deregister_interior(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
 		    LG_PAGE));
 	}
 }
+
+bool
+emap_split_prepare(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
+    emap_split_prepare_t *split_prepare, edata_t *edata, size_t size_a,
+    szind_t szind_a, bool slab_a, edata_t *trail, size_t size_b,
+    szind_t szind_b, bool slab_b, unsigned ind_b) {
+	/*
+	 * Note that while the trail mostly inherits its attributes from the
+	 * extent to be split, it maintains its own arena ind -- this allows
+	 * cross-arena edata interactions, such as occur in the range ecache.
+	 */
+	edata_init(trail, ind_b,
+	    (void *)((uintptr_t)edata_base_get(edata) + size_a), size_b,
+	    slab_b, szind_b, edata_sn_get(edata), edata_state_get(edata),
+	    edata_zeroed_get(edata), edata_committed_get(edata),
+	    edata_dumpable_get(edata), EXTENT_NOT_HEAD);
+
+	/*
+	 * We use incorrect constants for things like arena ind, zero, dump, and
+	 * commit state, and head status.  This is a fake edata_t, used to
+	 * facilitate a lookup.
+	 */
+	edata_t lead;
+	edata_init(&lead, 0U, edata_addr_get(edata), size_a, slab_a, szind_a, 0,
+	    extent_state_active, false, false, false, EXTENT_NOT_HEAD);
+
+	emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, &lead, false, true,
+	    &split_prepare->lead_elm_a, &split_prepare->lead_elm_b);
+	emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, trail, false, true,
+	    &split_prepare->trail_elm_a, &split_prepare->trail_elm_b);
+
+	if (split_prepare->lead_elm_a == NULL
+	    || split_prepare->lead_elm_b == NULL
+	    || split_prepare->trail_elm_a == NULL
+	    || split_prepare->trail_elm_b == NULL) {
+		return true;
+	}
+	return false;
+}
+
+void
+emap_split_commit(tsdn_t *tsdn, emap_t *emap,
+    emap_split_prepare_t *split_prepare, edata_t *lead, size_t size_a,
+    szind_t szind_a, bool slab_a, edata_t *trail, size_t size_b,
+    szind_t szind_b, bool slab_b) {
+	edata_size_set(lead, size_a);
+	edata_szind_set(lead, szind_a);
+
+	emap_rtree_write_acquired(tsdn, emap, split_prepare->lead_elm_a,
+	    split_prepare->lead_elm_b, lead, szind_a, slab_a);
+	emap_rtree_write_acquired(tsdn, emap, split_prepare->trail_elm_a,
+	    split_prepare->trail_elm_b, trail, szind_b, slab_b);
+}
diff --git a/src/extent.c b/src/extent.c
index bb88c202..fa9bc41b 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1276,53 +1276,27 @@ extent_split_impl(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
 		goto label_error_a;
 	}
 
-	edata_init(trail, ehooks_ind_get(ehooks),
-	    (void *)((uintptr_t)edata_base_get(edata) + size_a), size_b,
-	    slab_b, szind_b, edata_sn_get(edata), edata_state_get(edata),
-	    edata_zeroed_get(edata), edata_committed_get(edata),
-	    edata_dumpable_get(edata), EXTENT_NOT_HEAD);
-
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-	rtree_leaf_elm_t *lead_elm_a, *lead_elm_b;
-	{
-		edata_t lead;
-
-		edata_init(&lead, ehooks_ind_get(ehooks),
-		    edata_addr_get(edata), size_a,
-		    slab_a, szind_a, edata_sn_get(edata),
-		    edata_state_get(edata), edata_zeroed_get(edata),
-		    edata_committed_get(edata), edata_dumpable_get(edata),
-		    EXTENT_NOT_HEAD);
-
-		emap_rtree_leaf_elms_lookup(tsdn, &emap_global, rtree_ctx,
-		    &lead, false, true, &lead_elm_a, &lead_elm_b);
-	}
-	rtree_leaf_elm_t *trail_elm_a, *trail_elm_b;
-	emap_rtree_leaf_elms_lookup(tsdn, &emap_global, rtree_ctx, trail, false,
-	    true, &trail_elm_a, &trail_elm_b);
-
-	if (lead_elm_a == NULL || lead_elm_b == NULL || trail_elm_a == NULL
-	    || trail_elm_b == NULL) {
+	emap_split_prepare_t split_prepare;
+	bool err = emap_split_prepare(tsdn, &emap_global, rtree_ctx,
+	    &split_prepare, edata, size_a, szind_a, slab_a, trail, size_b,
+	    szind_b, slab_b, ehooks_ind_get(ehooks));
+	if (err) {
 		goto label_error_b;
 	}
 
 	emap_lock_edata2(tsdn, &emap_global, edata, trail);
 
-	bool err = ehooks_split(tsdn, ehooks, edata_base_get(edata),
-	    size_a + size_b, size_a, size_b, edata_committed_get(edata));
+	err = ehooks_split(tsdn, ehooks, edata_base_get(edata), size_a + size_b,
+	    size_a, size_b, edata_committed_get(edata));
 
 	if (err) {
 		goto label_error_c;
 	}
 
-	edata_size_set(edata, size_a);
-	edata_szind_set(edata, szind_a);
-
-	emap_rtree_write_acquired(tsdn, &emap_global, lead_elm_a, lead_elm_b,
-	    edata, szind_a, slab_a);
-	emap_rtree_write_acquired(tsdn, &emap_global, trail_elm_a, trail_elm_b,
-	    trail, szind_b, slab_b);
+	emap_split_commit(tsdn, &emap_global, &split_prepare, edata, size_a,
+	    szind_a, slab_a, trail, size_b, szind_b, slab_b);
 
 	emap_unlock_edata2(tsdn, &emap_global, edata, trail);
 

From 040eac77ccca6d07b8457237cfe939b7e182474b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 3 Feb 2020 13:33:05 -0800
Subject: [PATCH 1540/2608] Tell edatas their creation arena immediately.

This avoids having to pass it in anywhere else.
---
 include/jemalloc/internal/emap.h | 2 +-
 src/base.c                       | 1 +
 src/emap.c                       | 4 ++--
 src/extent.c                     | 2 +-
 4 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index e5188d49..fcc9fe4c 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -81,7 +81,7 @@ struct emap_split_prepare_s {
 bool emap_split_prepare(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
     emap_split_prepare_t *split_prepare, edata_t *edata, size_t size_a,
     szind_t szind_a, bool slab_a, edata_t *trail, size_t size_b,
-    szind_t szind_b, bool slab_b, unsigned ind_b);
+    szind_t szind_b, bool slab_b);
 void emap_split_commit(tsdn_t *tsdn, emap_t *emap,
     emap_split_prepare_t *split_prepare, edata_t *lead, size_t size_a,
     szind_t szind_a, bool slab_a, edata_t *trail, size_t size_b,
diff --git a/src/base.c b/src/base.c
index 005b0c53..c006774e 100644
--- a/src/base.c
+++ b/src/base.c
@@ -468,6 +468,7 @@ base_alloc_edata(tsdn_t *tsdn, base_t *base) {
 	if (edata == NULL) {
 		return NULL;
 	}
+	edata_arena_ind_set(edata, ehooks_ind_get(&base->ehooks));
 	edata_esn_set(edata, esn);
 	return edata;
 }
diff --git a/src/emap.c b/src/emap.c
index 45673c23..b79b66af 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -188,13 +188,13 @@ bool
 emap_split_prepare(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
     emap_split_prepare_t *split_prepare, edata_t *edata, size_t size_a,
     szind_t szind_a, bool slab_a, edata_t *trail, size_t size_b,
-    szind_t szind_b, bool slab_b, unsigned ind_b) {
+    szind_t szind_b, bool slab_b) {
 	/*
 	 * Note that while the trail mostly inherits its attributes from the
 	 * extent to be split, it maintains its own arena ind -- this allows
 	 * cross-arena edata interactions, such as occur in the range ecache.
 	 */
-	edata_init(trail, ind_b,
+	edata_init(trail, edata_arena_ind_get(trail),
 	    (void *)((uintptr_t)edata_base_get(edata) + size_a), size_b,
 	    slab_b, szind_b, edata_sn_get(edata), edata_state_get(edata),
 	    edata_zeroed_get(edata), edata_committed_get(edata),
diff --git a/src/extent.c b/src/extent.c
index fa9bc41b..e8a12574 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1281,7 +1281,7 @@ extent_split_impl(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
 	emap_split_prepare_t split_prepare;
 	bool err = emap_split_prepare(tsdn, &emap_global, rtree_ctx,
 	    &split_prepare, edata, size_a, szind_a, slab_a, trail, size_b,
-	    szind_b, slab_b, ehooks_ind_get(ehooks));
+	    szind_b, slab_b);
 	if (err) {
 		goto label_error_b;
 	}

From 0586a56f39845433faa54cea5be56b80e14b2570 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 3 Feb 2020 14:15:07 -0800
Subject: [PATCH 1541/2608] Emap: Move in merge functionality.

---
 include/jemalloc/internal/emap.h |  4 ++++
 src/emap.c                       | 37 ++++++++++++++++++++++++++++++++
 src/extent.c                     | 28 +++---------------------
 3 files changed, 44 insertions(+), 25 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index fcc9fe4c..7835da96 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -86,5 +86,9 @@ void emap_split_commit(tsdn_t *tsdn, emap_t *emap,
     emap_split_prepare_t *split_prepare, edata_t *lead, size_t size_a,
     szind_t szind_a, bool slab_a, edata_t *trail, size_t size_b,
     szind_t szind_b, bool slab_b);
+void emap_merge_prepare(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
+    emap_split_prepare_t *split_prepare, edata_t *lead, edata_t *trail);
+void emap_merge_commit(tsdn_t *tsdn, emap_t *emap,
+    emap_split_prepare_t *split_prepare, edata_t *lead, edata_t *trail);
 
 #endif /* JEMALLOC_INTERNAL_EMAP_H */
diff --git a/src/emap.c b/src/emap.c
index b79b66af..bde986f0 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -236,3 +236,40 @@ emap_split_commit(tsdn_t *tsdn, emap_t *emap,
 	emap_rtree_write_acquired(tsdn, emap, split_prepare->trail_elm_a,
 	    split_prepare->trail_elm_b, trail, szind_b, slab_b);
 }
+
+void
+emap_merge_prepare(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
+    emap_split_prepare_t *split_prepare, edata_t *lead, edata_t *trail) {
+	emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, lead, true, false,
+	    &split_prepare->lead_elm_a, &split_prepare->lead_elm_b);
+	emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, trail, true, false,
+	    &split_prepare->trail_elm_a, &split_prepare->trail_elm_b);
+}
+
+void
+emap_merge_commit(tsdn_t *tsdn, emap_t *emap,
+    emap_split_prepare_t *split_prepare, edata_t *lead, edata_t *trail) {
+	if (split_prepare->lead_elm_b != NULL) {
+		rtree_leaf_elm_write(tsdn, &emap->rtree,
+		    split_prepare->lead_elm_b, NULL, SC_NSIZES, false);
+	}
+
+	rtree_leaf_elm_t *merged_b;
+	if (split_prepare->trail_elm_b != NULL) {
+		rtree_leaf_elm_write(tsdn, &emap->rtree,
+		    split_prepare->trail_elm_a, NULL, SC_NSIZES, false);
+		merged_b = split_prepare->trail_elm_b;
+	} else {
+		merged_b = split_prepare->trail_elm_a;
+	}
+
+	edata_size_set(lead, edata_size_get(lead) + edata_size_get(trail));
+	edata_szind_set(lead, SC_NSIZES);
+	edata_sn_set(lead, (edata_sn_get(lead) < edata_sn_get(trail)) ?
+	    edata_sn_get(lead) : edata_sn_get(trail));
+	edata_zeroed_set(lead, edata_zeroed_get(lead)
+	    && edata_zeroed_get(trail));
+
+	emap_rtree_write_acquired(tsdn, emap, split_prepare->lead_elm_a,
+	    merged_b, lead, SC_NSIZES, false);
+}
diff --git a/src/extent.c b/src/extent.c
index e8a12574..3db82a70 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1342,33 +1342,11 @@ extent_merge_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_cache_t *edata_cache,
 	 */
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-	rtree_leaf_elm_t *a_elm_a, *a_elm_b, *b_elm_a, *b_elm_b;
-	emap_rtree_leaf_elms_lookup(tsdn, &emap_global, rtree_ctx, a, true,
-	    false, &a_elm_a, &a_elm_b);
-	emap_rtree_leaf_elms_lookup(tsdn, &emap_global, rtree_ctx, b, true,
-	    false, &b_elm_a, &b_elm_b);
-
+	emap_split_prepare_t split_prepare;
+	emap_merge_prepare(tsdn, &emap_global, rtree_ctx, &split_prepare, a, b);
 	emap_lock_edata2(tsdn, &emap_global, a, b);
 
-	if (a_elm_b != NULL) {
-		rtree_leaf_elm_write(tsdn, &emap_global.rtree, a_elm_b, NULL,
-		    SC_NSIZES, false);
-	}
-	if (b_elm_b != NULL) {
-		rtree_leaf_elm_write(tsdn, &emap_global.rtree, b_elm_a, NULL,
-		    SC_NSIZES, false);
-	} else {
-		b_elm_b = b_elm_a;
-	}
-
-	edata_size_set(a, edata_size_get(a) + edata_size_get(b));
-	edata_szind_set(a, SC_NSIZES);
-	edata_sn_set(a, (edata_sn_get(a) < edata_sn_get(b)) ?
-	    edata_sn_get(a) : edata_sn_get(b));
-	edata_zeroed_set(a, edata_zeroed_get(a) && edata_zeroed_get(b));
-
-	emap_rtree_write_acquired(tsdn, &emap_global, a_elm_a, b_elm_b, a,
-	    SC_NSIZES, false);
+	emap_merge_commit(tsdn, &emap_global, &split_prepare, a, b);
 
 	emap_unlock_edata2(tsdn, &emap_global, a, b);
 

From 231d1477e5d8dd591d2f51c1c884ac58fc7adb2c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 3 Feb 2020 14:21:48 -0800
Subject: [PATCH 1542/2608] Rename emap_split_prepare_t -> emap_prepare_t.

Both the split and merge functions use it.
---
 include/jemalloc/internal/emap.h | 22 ++++++-------
 src/emap.c                       | 56 +++++++++++++++-----------------
 src/extent.c                     | 20 +++++-------
 3 files changed, 45 insertions(+), 53 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 7835da96..3be9192f 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -70,8 +70,8 @@ void emap_deregister_boundary(tsdn_t *tsdn, emap_t *emap,
 void emap_deregister_interior(tsdn_t *tsdn, emap_t *emap,
     rtree_ctx_t *rtree_ctx, edata_t *edata);
 
-typedef struct emap_split_prepare_s emap_split_prepare_t;
-struct emap_split_prepare_s {
+typedef struct emap_prepare_s emap_prepare_t;
+struct emap_prepare_s {
 	rtree_leaf_elm_t *lead_elm_a;
 	rtree_leaf_elm_t *lead_elm_b;
 	rtree_leaf_elm_t *trail_elm_a;
@@ -79,16 +79,14 @@ struct emap_split_prepare_s {
 };
 
 bool emap_split_prepare(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
-    emap_split_prepare_t *split_prepare, edata_t *edata, size_t size_a,
-    szind_t szind_a, bool slab_a, edata_t *trail, size_t size_b,
-    szind_t szind_b, bool slab_b);
-void emap_split_commit(tsdn_t *tsdn, emap_t *emap,
-    emap_split_prepare_t *split_prepare, edata_t *lead, size_t size_a,
-    szind_t szind_a, bool slab_a, edata_t *trail, size_t size_b,
-    szind_t szind_b, bool slab_b);
+    emap_prepare_t *prepare, edata_t *edata, size_t size_a, szind_t szind_a,
+    bool slab_a, edata_t *trail, size_t size_b, szind_t szind_b, bool slab_b);
+void emap_split_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
+    edata_t *lead, size_t size_a, szind_t szind_a, bool slab_a, edata_t *trail,
+    size_t size_b, szind_t szind_b, bool slab_b);
 void emap_merge_prepare(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
-    emap_split_prepare_t *split_prepare, edata_t *lead, edata_t *trail);
-void emap_merge_commit(tsdn_t *tsdn, emap_t *emap,
-    emap_split_prepare_t *split_prepare, edata_t *lead, edata_t *trail);
+    emap_prepare_t *prepare, edata_t *lead, edata_t *trail);
+void emap_merge_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
+    edata_t *lead, edata_t *trail);
 
 #endif /* JEMALLOC_INTERNAL_EMAP_H */
diff --git a/src/emap.c b/src/emap.c
index bde986f0..1d41307b 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -186,9 +186,8 @@ emap_deregister_interior(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
 
 bool
 emap_split_prepare(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
-    emap_split_prepare_t *split_prepare, edata_t *edata, size_t size_a,
-    szind_t szind_a, bool slab_a, edata_t *trail, size_t size_b,
-    szind_t szind_b, bool slab_b) {
+    emap_prepare_t *prepare, edata_t *edata, size_t size_a, szind_t szind_a,
+    bool slab_a, edata_t *trail, size_t size_b, szind_t szind_b, bool slab_b) {
 	/*
 	 * Note that while the trail mostly inherits its attributes from the
 	 * extent to be split, it maintains its own arena ind -- this allows
@@ -210,57 +209,54 @@ emap_split_prepare(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
 	    extent_state_active, false, false, false, EXTENT_NOT_HEAD);
 
 	emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, &lead, false, true,
-	    &split_prepare->lead_elm_a, &split_prepare->lead_elm_b);
+	    &prepare->lead_elm_a, &prepare->lead_elm_b);
 	emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, trail, false, true,
-	    &split_prepare->trail_elm_a, &split_prepare->trail_elm_b);
+	    &prepare->trail_elm_a, &prepare->trail_elm_b);
 
-	if (split_prepare->lead_elm_a == NULL
-	    || split_prepare->lead_elm_b == NULL
-	    || split_prepare->trail_elm_a == NULL
-	    || split_prepare->trail_elm_b == NULL) {
+	if (prepare->lead_elm_a == NULL || prepare->lead_elm_b == NULL
+	    || prepare->trail_elm_a == NULL || prepare->trail_elm_b == NULL) {
 		return true;
 	}
 	return false;
 }
 
 void
-emap_split_commit(tsdn_t *tsdn, emap_t *emap,
-    emap_split_prepare_t *split_prepare, edata_t *lead, size_t size_a,
-    szind_t szind_a, bool slab_a, edata_t *trail, size_t size_b,
-    szind_t szind_b, bool slab_b) {
+emap_split_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
+    edata_t *lead, size_t size_a, szind_t szind_a, bool slab_a, edata_t *trail,
+    size_t size_b, szind_t szind_b, bool slab_b) {
 	edata_size_set(lead, size_a);
 	edata_szind_set(lead, szind_a);
 
-	emap_rtree_write_acquired(tsdn, emap, split_prepare->lead_elm_a,
-	    split_prepare->lead_elm_b, lead, szind_a, slab_a);
-	emap_rtree_write_acquired(tsdn, emap, split_prepare->trail_elm_a,
-	    split_prepare->trail_elm_b, trail, szind_b, slab_b);
+	emap_rtree_write_acquired(tsdn, emap, prepare->lead_elm_a,
+	    prepare->lead_elm_b, lead, szind_a, slab_a);
+	emap_rtree_write_acquired(tsdn, emap, prepare->trail_elm_a,
+	    prepare->trail_elm_b, trail, szind_b, slab_b);
 }
 
 void
 emap_merge_prepare(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
-    emap_split_prepare_t *split_prepare, edata_t *lead, edata_t *trail) {
+    emap_prepare_t *prepare, edata_t *lead, edata_t *trail) {
 	emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, lead, true, false,
-	    &split_prepare->lead_elm_a, &split_prepare->lead_elm_b);
+	    &prepare->lead_elm_a, &prepare->lead_elm_b);
 	emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, trail, true, false,
-	    &split_prepare->trail_elm_a, &split_prepare->trail_elm_b);
+	    &prepare->trail_elm_a, &prepare->trail_elm_b);
 }
 
 void
-emap_merge_commit(tsdn_t *tsdn, emap_t *emap,
-    emap_split_prepare_t *split_prepare, edata_t *lead, edata_t *trail) {
-	if (split_prepare->lead_elm_b != NULL) {
+emap_merge_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
+    edata_t *lead, edata_t *trail) {
+	if (prepare->lead_elm_b != NULL) {
 		rtree_leaf_elm_write(tsdn, &emap->rtree,
-		    split_prepare->lead_elm_b, NULL, SC_NSIZES, false);
+		    prepare->lead_elm_b, NULL, SC_NSIZES, false);
 	}
 
 	rtree_leaf_elm_t *merged_b;
-	if (split_prepare->trail_elm_b != NULL) {
+	if (prepare->trail_elm_b != NULL) {
 		rtree_leaf_elm_write(tsdn, &emap->rtree,
-		    split_prepare->trail_elm_a, NULL, SC_NSIZES, false);
-		merged_b = split_prepare->trail_elm_b;
+		    prepare->trail_elm_a, NULL, SC_NSIZES, false);
+		merged_b = prepare->trail_elm_b;
 	} else {
-		merged_b = split_prepare->trail_elm_a;
+		merged_b = prepare->trail_elm_a;
 	}
 
 	edata_size_set(lead, edata_size_get(lead) + edata_size_get(trail));
@@ -270,6 +266,6 @@ emap_merge_commit(tsdn_t *tsdn, emap_t *emap,
 	edata_zeroed_set(lead, edata_zeroed_get(lead)
 	    && edata_zeroed_get(trail));
 
-	emap_rtree_write_acquired(tsdn, emap, split_prepare->lead_elm_a,
-	    merged_b, lead, SC_NSIZES, false);
+	emap_rtree_write_acquired(tsdn, emap, prepare->lead_elm_a, merged_b,
+	    lead, SC_NSIZES, false);
 }
diff --git a/src/extent.c b/src/extent.c
index 3db82a70..2c8813d4 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1278,10 +1278,9 @@ extent_split_impl(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
 
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-	emap_split_prepare_t split_prepare;
-	bool err = emap_split_prepare(tsdn, &emap_global, rtree_ctx,
-	    &split_prepare, edata, size_a, szind_a, slab_a, trail, size_b,
-	    szind_b, slab_b);
+	emap_prepare_t prepare;
+	bool err = emap_split_prepare(tsdn, &emap_global, rtree_ctx, &prepare,
+	    edata, size_a, szind_a, slab_a, trail, size_b, szind_b, slab_b);
 	if (err) {
 		goto label_error_b;
 	}
@@ -1295,8 +1294,8 @@ extent_split_impl(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
 		goto label_error_c;
 	}
 
-	emap_split_commit(tsdn, &emap_global, &split_prepare, edata, size_a,
-	    szind_a, slab_a, trail, size_b, szind_b, slab_b);
+	emap_split_commit(tsdn, &emap_global, &prepare, edata, size_a, szind_a,
+	    slab_a, trail, size_b, szind_b, slab_b);
 
 	emap_unlock_edata2(tsdn, &emap_global, edata, trail);
 
@@ -1342,12 +1341,11 @@ extent_merge_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_cache_t *edata_cache,
 	 */
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-	emap_split_prepare_t split_prepare;
-	emap_merge_prepare(tsdn, &emap_global, rtree_ctx, &split_prepare, a, b);
+	emap_prepare_t prepare;
+	emap_merge_prepare(tsdn, &emap_global, rtree_ctx, &prepare, a, b);
+
 	emap_lock_edata2(tsdn, &emap_global, a, b);
-
-	emap_merge_commit(tsdn, &emap_global, &split_prepare, a, b);
-
+	emap_merge_commit(tsdn, &emap_global, &prepare, a, b);
 	emap_unlock_edata2(tsdn, &emap_global, a, b);
 
 	edata_cache_put(tsdn, edata_cache, b);

From 08eb1e6c3164b90cebe0f28bb07c0586a74f3c9e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 5 Feb 2020 12:16:30 -0800
Subject: [PATCH 1543/2608] Emap: Comments and cleanup

Document some of the public interface, and hide the functions that are no longer
used outside of the emap module.
---
 include/jemalloc/internal/emap.h | 33 +++++++++++++++++++++-----------
 src/emap.c                       |  4 ++--
 2 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 3be9192f..99ebfd8d 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -15,26 +15,21 @@ extern emap_t emap_global;
 
 bool emap_init(emap_t *emap);
 
+/*
+ * Grab the lock or locks associated with the edata or edatas indicated (which
+ * is done just by simple address hashing).  The hashing strategy means that
+ * it's never safe to grab locks incrementally -- you have to grab all the locks
+ * you'll need at once, and release them all at once.
+ */
 void emap_lock_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata);
 void emap_unlock_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata);
-
 void emap_lock_edata2(tsdn_t *tsdn, emap_t *emap, edata_t *edata1,
     edata_t *edata2);
 void emap_unlock_edata2(tsdn_t *tsdn, emap_t *emap, edata_t *edata1,
     edata_t *edata2);
-
 edata_t *emap_lock_edata_from_addr(tsdn_t *tsdn, emap_t *emap,
     rtree_ctx_t *rtree_ctx, void *addr, bool inactive_only);
 
-bool emap_rtree_leaf_elms_lookup(tsdn_t *tsdn, emap_t *emap,
-    rtree_ctx_t *rtree_ctx, const edata_t *edata, bool dependent,
-    bool init_missing, rtree_leaf_elm_t **r_elm_a, rtree_leaf_elm_t **r_elm_b);
-
-/* Only temporarily public; this will be internal eventually. */
-void emap_rtree_write_acquired(tsdn_t *tsdn, emap_t *emap,
-    rtree_leaf_elm_t *elm_a, rtree_leaf_elm_t *elm_b, edata_t *edata,
-    szind_t szind, bool slab);
-
 /*
  * Associate the given edata with its beginning and end address, setting the
  * szind and slab info appropriately.
@@ -78,6 +73,22 @@ struct emap_prepare_s {
 	rtree_leaf_elm_t *trail_elm_b;
 };
 
+/**
+ * These functions do some of the metadata management for merging and splitting
+ * extents.  In particular, they set the mappings from addresses to edatas and
+ * fill in lead and trail.
+ *
+ * Each operation has a "prepare" and a "commit" portion.  The prepare portion
+ * does the operations that can be done without exclusive access to the extent
+ * in question, while the commit variant requires exclusive access to maintain
+ * the emap invariants.  The only function that can fail is emap_split_prepare,
+ * and it returns true on failure (at which point the caller shouldn't commit).
+ *
+ * In all cases, "lead" refers to the lower-addressed extent, and trail to the
+ * higher-addressed one.  Trail can contain garbage (except for its arena_ind
+ * and esn values) data for the split variants, and can be reused for any
+ * purpose by its given arena after a merge or a failed split.
+ */
 bool emap_split_prepare(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
     emap_prepare_t *prepare, edata_t *edata, size_t size_a, szind_t szind_a,
     bool slab_a, edata_t *trail, size_t size_b, szind_t szind_b, bool slab_b);
diff --git a/src/emap.c b/src/emap.c
index 1d41307b..9ff51e3d 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -105,7 +105,7 @@ emap_lock_edata_from_addr(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
 	return ret;
 }
 
-bool
+static bool
 emap_rtree_leaf_elms_lookup(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
     const edata_t *edata, bool dependent, bool init_missing,
     rtree_leaf_elm_t **r_elm_a, rtree_leaf_elm_t **r_elm_b) {
@@ -126,7 +126,7 @@ emap_rtree_leaf_elms_lookup(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
 	return false;
 }
 
-void
+static void
 emap_rtree_write_acquired(tsdn_t *tsdn, emap_t *emap, rtree_leaf_elm_t *elm_a,
     rtree_leaf_elm_t *elm_b, edata_t *edata, szind_t szind, bool slab) {
 	rtree_leaf_elm_write(tsdn, &emap->rtree, elm_a, edata, szind, slab);

From 1d449bd9a6aca25f3cdfc58545f4857f52f36b12 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 5 Feb 2020 13:51:05 -0800
Subject: [PATCH 1544/2608] Emap: Internal rtree context setting.

The only time sharing an rtree context saves across extent operations isn't a
no-op is when tsd is unavailable.  But this happens only in situations like
thread death or initialization, and we don't care about shaving off every
possible cycle in such scenarios.
---
 include/jemalloc/internal/emap.h |  48 ++++++----
 src/emap.c                       |  73 ++++++++++++---
 src/extent.c                     | 153 +++++++++++--------------------
 3 files changed, 139 insertions(+), 135 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 99ebfd8d..70163942 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -27,16 +27,16 @@ void emap_lock_edata2(tsdn_t *tsdn, emap_t *emap, edata_t *edata1,
     edata_t *edata2);
 void emap_unlock_edata2(tsdn_t *tsdn, emap_t *emap, edata_t *edata1,
     edata_t *edata2);
-edata_t *emap_lock_edata_from_addr(tsdn_t *tsdn, emap_t *emap,
-    rtree_ctx_t *rtree_ctx, void *addr, bool inactive_only);
+edata_t *emap_lock_edata_from_addr(tsdn_t *tsdn, emap_t *emap, void *addr,
+    bool inactive_only);
 
 /*
  * Associate the given edata with its beginning and end address, setting the
  * szind and slab info appropriately.
  * Returns true on error (i.e. resource exhaustion).
  */
-bool emap_register_boundary(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
-    edata_t *edata, szind_t szind, bool slab);
+bool emap_register_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
+    szind_t szind, bool slab);
 
 /*
  * Does the same thing, but with the interior of the range, for slab
@@ -57,13 +57,11 @@ bool emap_register_boundary(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
  * touched, so no allocation is necessary to fill the interior once the boundary
  * has been touched.
  */
-void emap_register_interior(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
-    edata_t *edata, szind_t szind);
+void emap_register_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
+    szind_t szind);
 
-void emap_deregister_boundary(tsdn_t *tsdn, emap_t *emap,
-    rtree_ctx_t *rtree_ctx, edata_t *edata);
-void emap_deregister_interior(tsdn_t *tsdn, emap_t *emap,
-    rtree_ctx_t *rtree_ctx, edata_t *edata);
+void emap_deregister_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata);
+void emap_deregister_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata);
 
 typedef struct emap_prepare_s emap_prepare_t;
 struct emap_prepare_s {
@@ -74,9 +72,12 @@ struct emap_prepare_s {
 };
 
 /**
- * These functions do some of the metadata management for merging and splitting
- * extents.  In particular, they set the mappings from addresses to edatas and
- * fill in lead and trail.
+ * These functions do some of the metadata management for merging, splitting,
+ * and reusing extents.  In particular, they set the boundary mappings from
+ * addresses to edatas and fill in the szind, size, and slab values for the
+ * output edata (and, for splitting, *all* values for the trail).  If the result
+ * is going to be used as a slab, you still need to call emap_register_interior
+ * on it, though.
  *
  * Each operation has a "prepare" and a "commit" portion.  The prepare portion
  * does the operations that can be done without exclusive access to the extent
@@ -89,15 +90,26 @@ struct emap_prepare_s {
  * and esn values) data for the split variants, and can be reused for any
  * purpose by its given arena after a merge or a failed split.
  */
-bool emap_split_prepare(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
-    emap_prepare_t *prepare, edata_t *edata, size_t size_a, szind_t szind_a,
-    bool slab_a, edata_t *trail, size_t size_b, szind_t szind_b, bool slab_b);
+void emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, size_t size,
+    szind_t szind, bool slab);
+bool emap_split_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
+    edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a, edata_t *trail,
+    size_t size_b, szind_t szind_b, bool slab_b);
 void emap_split_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
     edata_t *lead, size_t size_a, szind_t szind_a, bool slab_a, edata_t *trail,
     size_t size_b, szind_t szind_b, bool slab_b);
-void emap_merge_prepare(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
-    emap_prepare_t *prepare, edata_t *lead, edata_t *trail);
+void emap_merge_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
+    edata_t *lead, edata_t *trail);
 void emap_merge_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
     edata_t *lead, edata_t *trail);
 
+/* Assert that the emap's view of the given edata matches the edata's view. */
+void emap_do_assert_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata);
+static inline void
+emap_assert_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
+	if (config_debug) {
+		emap_do_assert_mapped(tsdn, emap, edata);
+	}
+}
+
 #endif /* JEMALLOC_INTERNAL_EMAP_H */
diff --git a/src/emap.c b/src/emap.c
index 9ff51e3d..5b7d4e1f 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -5,6 +5,15 @@
 
 emap_t emap_global;
 
+/*
+ * Note: Ends without at semicolon, so that
+ *     EMAP_DECLARE_RTREE_CTX;
+ * in uses will avoid empty-statement warnings.
+ */
+#define EMAP_DECLARE_RTREE_CTX						\
+    rtree_ctx_t rtree_ctx_fallback;					\
+    rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback)
+
 enum emap_lock_result_e {
 	emap_lock_result_success,
 	emap_lock_result_failure,
@@ -89,8 +98,9 @@ emap_try_lock_rtree_leaf_elm(tsdn_t *tsdn, emap_t *emap, rtree_leaf_elm_t *elm,
  * address, and NULL otherwise.
  */
 edata_t *
-emap_lock_edata_from_addr(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
-    void *addr, bool inactive_only) {
+emap_lock_edata_from_addr(tsdn_t *tsdn, emap_t *emap, void *addr,
+    bool inactive_only) {
+	EMAP_DECLARE_RTREE_CTX;
 	edata_t *ret = NULL;
 	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, &emap->rtree,
 	    rtree_ctx, (uintptr_t)addr, false, false);
@@ -137,8 +147,10 @@ emap_rtree_write_acquired(tsdn_t *tsdn, emap_t *emap, rtree_leaf_elm_t *elm_a,
 }
 
 bool
-emap_register_boundary(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
-    edata_t *edata, szind_t szind, bool slab) {
+emap_register_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
+    szind_t szind, bool slab) {
+	EMAP_DECLARE_RTREE_CTX;
+
 	rtree_leaf_elm_t *elm_a, *elm_b;
 	bool err = emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, edata,
 	    false, true, &elm_a, &elm_b);
@@ -150,8 +162,10 @@ emap_register_boundary(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
 }
 
 void
-emap_register_interior(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
-    edata_t *edata, szind_t szind) {
+emap_register_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
+    szind_t szind) {
+	EMAP_DECLARE_RTREE_CTX;
+
 	assert(edata_slab_get(edata));
 
 	/* Register interior. */
@@ -163,8 +177,8 @@ emap_register_interior(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
 }
 
 void
-emap_deregister_boundary(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
-    edata_t *edata) {
+emap_deregister_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
+	EMAP_DECLARE_RTREE_CTX;
 	rtree_leaf_elm_t *elm_a, *elm_b;
 
 	emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, edata,
@@ -174,8 +188,9 @@ emap_deregister_boundary(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
 }
 
 void
-emap_deregister_interior(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
-    edata_t *edata) {
+emap_deregister_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
+	EMAP_DECLARE_RTREE_CTX;
+
 	assert(edata_slab_get(edata));
 	for (size_t i = 1; i < (edata_size_get(edata) >> LG_PAGE) - 1; i++) {
 		rtree_clear(tsdn, &emap->rtree, rtree_ctx,
@@ -184,10 +199,29 @@ emap_deregister_interior(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
 	}
 }
 
+void emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, size_t size,
+    szind_t szind, bool slab) {
+	EMAP_DECLARE_RTREE_CTX;
+
+	edata_szind_set(edata, szind);
+	if (szind != SC_NSIZES) {
+		rtree_szind_slab_update(tsdn, &emap->rtree, rtree_ctx,
+		    (uintptr_t)edata_addr_get(edata), szind, slab);
+		if (slab && edata_size_get(edata) > PAGE) {
+			rtree_szind_slab_update(tsdn,
+			    &emap->rtree, rtree_ctx,
+			    (uintptr_t)edata_past_get(edata) - (uintptr_t)PAGE,
+			    szind, slab);
+			}
+		}
+
+}
+
 bool
-emap_split_prepare(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
-    emap_prepare_t *prepare, edata_t *edata, size_t size_a, szind_t szind_a,
-    bool slab_a, edata_t *trail, size_t size_b, szind_t szind_b, bool slab_b) {
+emap_split_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
+    edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a, edata_t *trail,
+    size_t size_b, szind_t szind_b, bool slab_b) {
+	EMAP_DECLARE_RTREE_CTX;
 	/*
 	 * Note that while the trail mostly inherits its attributes from the
 	 * extent to be split, it maintains its own arena ind -- this allows
@@ -234,8 +268,9 @@ emap_split_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
 }
 
 void
-emap_merge_prepare(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
-    emap_prepare_t *prepare, edata_t *lead, edata_t *trail) {
+emap_merge_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
+    edata_t *lead, edata_t *trail) {
+	EMAP_DECLARE_RTREE_CTX;
 	emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, lead, true, false,
 	    &prepare->lead_elm_a, &prepare->lead_elm_b);
 	emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, trail, true, false,
@@ -269,3 +304,11 @@ emap_merge_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
 	emap_rtree_write_acquired(tsdn, emap, prepare->lead_elm_a, merged_b,
 	    lead, SC_NSIZES, false);
 }
+
+void
+emap_do_assert_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
+	EMAP_DECLARE_RTREE_CTX;
+
+	assert(rtree_edata_read(tsdn, &emap->rtree, rtree_ctx,
+	    (uintptr_t)edata_base_get(edata), true) == edata);
+}
diff --git a/src/extent.c b/src/extent.c
index 2c8813d4..7c79ced5 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -43,8 +43,8 @@ static edata_t *extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t usize, size_t pad, size_t alignment,
     bool slab, szind_t szind, bool *zero, bool *commit, bool growing_retained);
 static edata_t *extent_try_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache,
-    ehooks_t *ehooks, rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata,
-    bool *coalesced, bool growing_retained);
+    ehooks_t *ehooks, ecache_t *ecache, edata_t *edata, bool *coalesced,
+    bool growing_retained);
 static void extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool growing_retained);
 static edata_t *extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
@@ -81,12 +81,11 @@ extent_addr_randomize(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 
 static bool
 extent_try_delayed_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache,
-    ehooks_t *ehooks, rtree_ctx_t *rtree_ctx, ecache_t *ecache,
-    edata_t *edata) {
+    ehooks_t *ehooks, ecache_t *ecache, edata_t *edata) {
 	edata_state_set(edata, extent_state_active);
 	bool coalesced;
-	edata = extent_try_coalesce(tsdn, edata_cache, ehooks, rtree_ctx,
-	    ecache, edata, &coalesced, false);
+	edata = extent_try_coalesce(tsdn, edata_cache, ehooks, ecache, edata,
+	    &coalesced, false);
 	edata_state_set(edata, ecache->state);
 
 	if (!coalesced) {
@@ -160,9 +159,6 @@ ecache_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 edata_t *
 ecache_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
     size_t npages_min) {
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 
 	/*
@@ -188,7 +184,7 @@ ecache_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 		}
 		/* Try to coalesce. */
 		if (extent_try_delayed_coalesce(tsdn, &arena->edata_cache,
-		    ehooks, rtree_ctx, ecache, edata)) {
+		    ehooks, ecache, edata)) {
 			break;
 		}
 		/*
@@ -309,9 +305,6 @@ extent_gdump_sub(tsdn_t *tsdn, const edata_t *edata) {
 
 static bool
 extent_register_impl(tsdn_t *tsdn, edata_t *edata, bool gdump_add) {
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
 	/*
 	 * We need to hold the lock to protect against a concurrent coalesce
 	 * operation that sees us in a partial state.
@@ -321,15 +314,13 @@ extent_register_impl(tsdn_t *tsdn, edata_t *edata, bool gdump_add) {
 	szind_t szind = edata_szind_get_maybe_invalid(edata);
 	bool slab = edata_slab_get(edata);
 
-	if (emap_register_boundary(tsdn, &emap_global, rtree_ctx, edata, szind,
-	    slab)) {
+	if (emap_register_boundary(tsdn, &emap_global, edata, szind, slab)) {
 		emap_unlock_edata(tsdn, &emap_global, edata);
 		return true;
 	}
 
 	if (slab) {
-		emap_register_interior(tsdn, &emap_global, rtree_ctx, edata,
-		    szind);
+		emap_register_interior(tsdn, &emap_global, edata, szind);
 	}
 
 	emap_unlock_edata(tsdn, &emap_global, edata);
@@ -362,13 +353,10 @@ extent_reregister(tsdn_t *tsdn, edata_t *edata) {
  */
 static void
 extent_deregister_impl(tsdn_t *tsdn, edata_t *edata, bool gdump) {
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
 	emap_lock_edata(tsdn, &emap_global, edata);
-	emap_deregister_boundary(tsdn, &emap_global, rtree_ctx, edata);
+	emap_deregister_boundary(tsdn, &emap_global, edata);
 	if (edata_slab_get(edata)) {
-		emap_deregister_interior(tsdn, &emap_global, rtree_ctx, edata);
+		emap_deregister_interior(tsdn, &emap_global, edata);
 		edata_slab_set(edata, false);
 	}
 	emap_unlock_edata(tsdn, &emap_global, edata);
@@ -394,8 +382,8 @@ extent_deregister_no_gdump_sub(tsdn_t *tsdn, edata_t *edata) {
  */
 static edata_t *
 extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, ecache_t *ecache, void *new_addr, size_t size,
-    size_t pad, size_t alignment, bool slab, bool growing_retained) {
+    ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
+    bool slab, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	assert(alignment > 0);
@@ -420,8 +408,8 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 	edata_t *edata;
 	if (new_addr != NULL) {
-		edata = emap_lock_edata_from_addr(tsdn, &emap_global, rtree_ctx,
-		    new_addr, false);
+		edata = emap_lock_edata_from_addr(tsdn, &emap_global, new_addr,
+		    false);
 		if (edata != NULL) {
 			/*
 			 * We might null-out edata to report an error, but we
@@ -480,7 +468,6 @@ typedef enum {
 
 static extent_split_interior_result_t
 extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx,
     /* The result of splitting, in case of success. */
     edata_t **edata, edata_t **lead, edata_t **trail,
     /* The mess to clean up, in case of error. */
@@ -529,22 +516,7 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	}
 
 	if (leadsize == 0 && trailsize == 0) {
-		/*
-		 * Splitting causes szind to be set as a side effect, but no
-		 * splitting occurred.
-		 */
-		edata_szind_set(*edata, szind);
-		if (szind != SC_NSIZES) {
-			rtree_szind_slab_update(tsdn, &emap_global.rtree,
-			    rtree_ctx, (uintptr_t)edata_addr_get(*edata), szind,
-			    slab);
-			if (slab && edata_size_get(*edata) > PAGE) {
-				rtree_szind_slab_update(tsdn,
-				    &emap_global.rtree, rtree_ctx,
-				    (uintptr_t)edata_past_get(*edata) -
-				    (uintptr_t)PAGE, szind, slab);
-			}
-		}
+		emap_remap(tsdn, &emap_global, *edata, size, szind, slab);
 	}
 
 	return extent_split_interior_ok;
@@ -558,18 +530,16 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
  */
 static edata_t *
 extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, ecache_t *ecache, void *new_addr, size_t size,
-    size_t pad, size_t alignment, bool slab, szind_t szind, edata_t *edata,
-    bool growing_retained) {
+    ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
+    bool slab, szind_t szind, edata_t *edata, bool growing_retained) {
 	edata_t *lead;
 	edata_t *trail;
 	edata_t *to_leak;
 	edata_t *to_salvage;
 
 	extent_split_interior_result_t result = extent_split_interior(
-	    tsdn, arena, ehooks, rtree_ctx, &edata, &lead, &trail, &to_leak,
-	    &to_salvage, new_addr, size, pad, alignment, slab, szind,
-	    growing_retained);
+	    tsdn, arena, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage,
+	    new_addr, size, pad, alignment, slab, szind, growing_retained);
 
 	if (!maps_coalesce && result != extent_split_interior_ok
 	    && !opt_retain) {
@@ -605,7 +575,7 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			extents_abandon_vm(tsdn, arena, ehooks, ecache, to_leak,
 			    growing_retained);
 			assert(emap_lock_edata_from_addr(tsdn, &emap_global,
-			    rtree_ctx, leak, false) == NULL);
+			    leak, false) == NULL);
 		}
 		return NULL;
 	}
@@ -626,19 +596,14 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 	assert(pad == 0 || !slab);
 	assert(!*zero || !slab);
 
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
-	edata_t *edata = extent_recycle_extract(tsdn, arena, ehooks,
-	    rtree_ctx, ecache, new_addr, size, pad, alignment, slab,
-	    growing_retained);
+	edata_t *edata = extent_recycle_extract(tsdn, arena, ehooks, ecache,
+	    new_addr, size, pad, alignment, slab, growing_retained);
 	if (edata == NULL) {
 		return NULL;
 	}
 
-	edata = extent_recycle_split(tsdn, arena, ehooks, rtree_ctx, ecache,
-	    new_addr, size, pad, alignment, slab, szind, edata,
-	    growing_retained);
+	edata = extent_recycle_split(tsdn, arena, ehooks, ecache, new_addr,
+	    size, pad, alignment, slab, szind, edata, growing_retained);
 	if (edata == NULL) {
 		return NULL;
 	}
@@ -665,8 +630,7 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 	assert(edata_state_get(edata) == extent_state_active);
 	if (slab) {
 		edata_slab_set(edata, slab);
-		emap_register_interior(tsdn, &emap_global, rtree_ctx, edata,
-		    szind);
+		emap_register_interior(tsdn, &emap_global, edata, szind);
 	}
 
 	if (*zero) {
@@ -724,14 +688,15 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	void *ptr = ehooks_alloc(tsdn, ehooks, NULL, alloc_size, PAGE, &zeroed,
 	    &committed);
 
-	edata_init(edata, arena_ind_get(arena), ptr, alloc_size, false,
-	    SC_NSIZES, arena_extent_sn_next(arena), extent_state_active, zeroed,
-	    committed, true, EXTENT_IS_HEAD);
 	if (ptr == NULL) {
 		edata_cache_put(tsdn, &arena->edata_cache, edata);
 		goto label_err;
 	}
 
+	edata_init(edata, arena_ind_get(arena), ptr, alloc_size, false,
+	    SC_NSIZES, arena_extent_sn_next(arena), extent_state_active, zeroed,
+	    committed, true, EXTENT_IS_HEAD);
+
 	if (extent_register_no_gdump_add(tsdn, edata)) {
 		edata_cache_put(tsdn, &arena->edata_cache, edata);
 		goto label_err;
@@ -744,15 +709,13 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		*commit = true;
 	}
 
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
 	edata_t *lead;
 	edata_t *trail;
 	edata_t *to_leak;
 	edata_t *to_salvage;
+
 	extent_split_interior_result_t result = extent_split_interior(tsdn,
-	    arena, ehooks, rtree_ctx, &edata, &lead, &trail, &to_leak,
+	    arena, ehooks, &edata, &lead, &trail, &to_leak,
 	    &to_salvage, NULL, size, pad, alignment, slab, szind, true);
 
 	if (result == extent_split_interior_ok) {
@@ -824,13 +787,8 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		extent_addr_randomize(tsdn, arena, edata, alignment);
 	}
 	if (slab) {
-		rtree_ctx_t rtree_ctx_fallback;
-		rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn,
-		    &rtree_ctx_fallback);
-
 		edata_slab_set(edata, true);
-		emap_register_interior(tsdn, &emap_global, rtree_ctx, edata,
-		    szind);
+		emap_register_interior(tsdn, &emap_global, edata, szind);
 	}
 	if (*zero && !edata_zeroed_get(edata)) {
 		void *addr = edata_base_get(edata);
@@ -949,8 +907,8 @@ extent_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
 
 static edata_t *
 extent_try_coalesce_impl(tsdn_t *tsdn, edata_cache_t *edata_cache,
-    ehooks_t *ehooks, rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata,
-    bool *coalesced, bool growing_retained, bool inactive_only) {
+    ehooks_t *ehooks, ecache_t *ecache, edata_t *edata, bool *coalesced,
+    bool growing_retained, bool inactive_only) {
 	/*
 	 * We avoid checking / locking inactive neighbors for large size
 	 * classes, since they are eagerly coalesced on deallocation which can
@@ -966,7 +924,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, edata_cache_t *edata_cache,
 
 		/* Try to coalesce forward. */
 		edata_t *next = emap_lock_edata_from_addr(tsdn, &emap_global,
-		    rtree_ctx, edata_past_get(edata), inactive_only);
+		    edata_past_get(edata), inactive_only);
 		if (next != NULL) {
 			/*
 			 * ecache->mtx only protects against races for
@@ -992,7 +950,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, edata_cache_t *edata_cache,
 
 		/* Try to coalesce backward. */
 		edata_t *prev = emap_lock_edata_from_addr(tsdn, &emap_global,
-		    rtree_ctx, edata_before_get(edata), inactive_only);
+		    edata_before_get(edata), inactive_only);
 		if (prev != NULL) {
 			bool can_coalesce = extent_can_coalesce(ecache, edata,
 			    prev);
@@ -1020,18 +978,17 @@ extent_try_coalesce_impl(tsdn_t *tsdn, edata_cache_t *edata_cache,
 
 static edata_t *
 extent_try_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
-    rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata, bool *coalesced,
-    bool growing_retained) {
-	return extent_try_coalesce_impl(tsdn, edata_cache, ehooks, rtree_ctx,
-	    ecache, edata, coalesced, growing_retained, false);
+    ecache_t *ecache, edata_t *edata, bool *coalesced, bool growing_retained) {
+	return extent_try_coalesce_impl(tsdn, edata_cache, ehooks,  ecache,
+	    edata, coalesced, growing_retained, false);
 }
 
 static edata_t *
 extent_try_coalesce_large(tsdn_t *tsdn, edata_cache_t *edata_cache,
-    ehooks_t *ehooks, rtree_ctx_t *rtree_ctx, ecache_t *ecache, edata_t *edata,
-    bool *coalesced, bool growing_retained) {
-	return extent_try_coalesce_impl(tsdn, edata_cache, ehooks, rtree_ctx,
-	    ecache, edata, coalesced, growing_retained, true);
+    ehooks_t *ehooks, ecache_t *ecache, edata_t *edata, bool *coalesced,
+    bool growing_retained) {
+	return extent_try_coalesce_impl(tsdn, edata_cache, ehooks, ecache,
+	    edata, coalesced, growing_retained, true);
 }
 
 /*
@@ -1041,9 +998,6 @@ extent_try_coalesce_large(tsdn_t *tsdn, edata_cache_t *edata_cache,
 static void
 extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
     edata_t *edata, bool growing_retained) {
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
 	assert((ecache->state != extent_state_dirty &&
 	    ecache->state != extent_state_muzzy) ||
 	    !edata_zeroed_get(edata));
@@ -1052,16 +1006,15 @@ extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 
 	edata_szind_set(edata, SC_NSIZES);
 	if (edata_slab_get(edata)) {
-		emap_deregister_interior(tsdn, &emap_global, rtree_ctx, edata);
+		emap_deregister_interior(tsdn, &emap_global, edata);
 		edata_slab_set(edata, false);
 	}
 
-	assert(rtree_edata_read(tsdn, &emap_global.rtree, rtree_ctx,
-	    (uintptr_t)edata_base_get(edata), true) == edata);
+	emap_assert_mapped(tsdn, &emap_global, edata);
 
 	if (!ecache->delay_coalesce) {
 		edata = extent_try_coalesce(tsdn, &arena->edata_cache, ehooks,
-		    rtree_ctx, ecache, edata, NULL, growing_retained);
+		    ecache, edata, NULL, growing_retained);
 	} else if (edata_size_get(edata) >= SC_LARGE_MINCLASS) {
 		assert(ecache == &arena->ecache_dirty);
 		/* Always coalesce large extents eagerly. */
@@ -1069,8 +1022,8 @@ extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 		do {
 			assert(edata_state_get(edata) == extent_state_active);
 			edata = extent_try_coalesce_large(tsdn,
-			    &arena->edata_cache, ehooks, rtree_ctx, ecache,
-			    edata, &coalesced, growing_retained);
+			    &arena->edata_cache, ehooks, ecache, edata,
+			    &coalesced, growing_retained);
 		} while (coalesced);
 		if (edata_size_get(edata) >= oversize_threshold &&
 		    arena_may_force_decay(arena)) {
@@ -1276,11 +1229,9 @@ extent_split_impl(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
 		goto label_error_a;
 	}
 
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	emap_prepare_t prepare;
-	bool err = emap_split_prepare(tsdn, &emap_global, rtree_ctx, &prepare,
-	    edata, size_a, szind_a, slab_a, trail, size_b, szind_b, slab_b);
+	bool err = emap_split_prepare(tsdn, &emap_global, &prepare, edata,
+	    size_a, szind_a, slab_a, trail, size_b, szind_b, slab_b);
 	if (err) {
 		goto label_error_b;
 	}
@@ -1339,10 +1290,8 @@ extent_merge_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_cache_t *edata_cache,
 	 * owned, so the following code uses decomposed helper functions rather
 	 * than extent_{,de}register() to do things in the right order.
 	 */
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 	emap_prepare_t prepare;
-	emap_merge_prepare(tsdn, &emap_global, rtree_ctx, &prepare, a, b);
+	emap_merge_prepare(tsdn, &emap_global, &prepare, a, b);
 
 	emap_lock_edata2(tsdn, &emap_global, a, b);
 	emap_merge_commit(tsdn, &emap_global, &prepare, a, b);

From 9b5d105fc36e719869f3e113d0d2dc16cf24a60c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 5 Feb 2020 14:50:34 -0800
Subject: [PATCH 1545/2608] Emap: Move in iealloc.

This is logically scoped to the emap.
---
 include/jemalloc/internal/arena_inlines_b.h   | 22 +++++++++++--------
 include/jemalloc/internal/emap.h              |  9 ++++++++
 .../internal/jemalloc_internal_inlines_b.h    | 11 ----------
 src/arena.c                                   |  8 +++----
 src/ctl.c                                     |  2 +-
 src/ehooks.c                                  |  8 +++----
 src/inspect.c                                 |  4 ++--
 src/jemalloc.c                                |  4 ++--
 src/large.c                                   |  2 +-
 src/prof.c                                    |  2 +-
 src/tcache.c                                  |  6 +++--
 test/unit/binshard.c                          |  4 ++--
 test/unit/prof_recent.c                       |  2 +-
 13 files changed, 44 insertions(+), 40 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index b39578c9..79478136 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_INLINES_B_H
 #define JEMALLOC_INTERNAL_ARENA_INLINES_B_H
 
+#include "jemalloc/internal/emap.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/rtree.h"
@@ -47,10 +48,10 @@ arena_prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
 
 	/* Static check. */
 	if (alloc_ctx == NULL) {
-		edata = iealloc(tsd_tsdn(tsd), ptr);
+		edata = emap_lookup(tsd_tsdn(tsd), &emap_global, ptr);
 		is_slab = edata_slab_get(edata);
 	} else if (unlikely(!(is_slab = alloc_ctx->slab))) {
-		edata = iealloc(tsd_tsdn(tsd), ptr);
+		edata = emap_lookup(tsd_tsdn(tsd), &emap_global, ptr);
 	}
 
 	if (unlikely(!is_slab)) {
@@ -73,13 +74,15 @@ arena_prof_tctx_reset(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx) {
 
 	/* Static check. */
 	if (alloc_ctx == NULL) {
-		edata_t *edata = iealloc(tsd_tsdn(tsd), ptr);
+		edata_t *edata = emap_lookup(tsd_tsdn(tsd), &emap_global, ptr);
 		if (unlikely(!edata_slab_get(edata))) {
 			large_prof_tctx_reset(edata);
 		}
 	} else {
 		if (unlikely(!alloc_ctx->slab)) {
-			large_prof_tctx_reset(iealloc(tsd_tsdn(tsd), ptr));
+			edata_t *edata = emap_lookup(tsd_tsdn(tsd),
+			    &emap_global, ptr);
+			large_prof_tctx_reset(edata);
 		}
 	}
 }
@@ -89,7 +92,7 @@ arena_prof_tctx_reset_sampled(tsd_t *tsd, const void *ptr) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	edata_t *edata = iealloc(tsd_tsdn(tsd), ptr);
+	edata_t *edata = emap_lookup(tsd_tsdn(tsd), &emap_global, ptr);
 	assert(!edata_slab_get(edata));
 
 	large_prof_tctx_reset(edata);
@@ -177,8 +180,9 @@ arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
 
 JEMALLOC_ALWAYS_INLINE arena_t *
 arena_aalloc(tsdn_t *tsdn, const void *ptr) {
-	return (arena_t *)atomic_load_p(&arenas[edata_arena_ind_get(
-	    iealloc(tsdn, ptr))], ATOMIC_RELAXED);
+	edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
+	unsigned arena_ind = edata_arena_ind_get(edata);
+	return (arena_t *)atomic_load_p(&arenas[arena_ind], ATOMIC_RELAXED);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
@@ -233,7 +237,7 @@ arena_dalloc_large_no_tcache(tsdn_t *tsdn, void *ptr, szind_t szind) {
 	if (config_prof && unlikely(szind < SC_NBINS)) {
 		arena_dalloc_promoted(tsdn, ptr, NULL, true);
 	} else {
-		edata_t *edata = iealloc(tsdn, ptr);
+		edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
 		large_dalloc(tsdn, edata);
 	}
 }
@@ -277,7 +281,7 @@ arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
 			    slow_path);
 		}
 	} else {
-		edata_t *edata = iealloc(tsdn, ptr);
+		edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
 		large_dalloc(tsdn, edata);
 	}
 }
diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 70163942..a6aadbc5 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -112,4 +112,13 @@ emap_assert_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	}
 }
 
+JEMALLOC_ALWAYS_INLINE edata_t *
+emap_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr) {
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+	return rtree_edata_read(tsdn, &emap->rtree, rtree_ctx, (uintptr_t)ptr,
+	    true);
+}
+
 #endif /* JEMALLOC_INTERNAL_EMAP_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index 00fb6042..fc526c4b 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -1,9 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_INLINES_B_H
 #define JEMALLOC_INTERNAL_INLINES_B_H
 
-#include "jemalloc/internal/emap.h"
 #include "jemalloc/internal/extent.h"
-#include "jemalloc/internal/rtree.h"
 
 /* Choose an arena based on a per-thread value. */
 static inline arena_t *
@@ -77,13 +75,4 @@ arena_is_auto(arena_t *arena) {
 	return (arena_ind_get(arena) < manual_arena_base);
 }
 
-JEMALLOC_ALWAYS_INLINE edata_t *
-iealloc(tsdn_t *tsdn, const void *ptr) {
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
-	return rtree_edata_read(tsdn, &emap_global.rtree, rtree_ctx,
-	    (uintptr_t)ptr, true);
-}
-
 #endif /* JEMALLOC_INTERNAL_INLINES_B_H */
diff --git a/src/arena.c b/src/arena.c
index 3206a9a6..f7f3ee5c 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1637,7 +1637,7 @@ arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 	cassert(config_prof);
 	assert(opt_prof);
 
-	edata_t *edata = iealloc(tsdn, ptr);
+	edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
 	size_t usize = edata_usize_get(edata);
 	size_t bumped_usize = arena_prof_demote(tsdn, edata, ptr);
 	if (config_opt_safety_checks && usize < SC_LARGE_MINCLASS) {
@@ -1769,7 +1769,7 @@ arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, edata_t *edata, void *ptr) {
 
 void
 arena_dalloc_small(tsdn_t *tsdn, void *ptr) {
-	edata_t *edata = iealloc(tsdn, ptr);
+	edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
 	arena_t *arena = arena_get_from_edata(edata);
 
 	arena_dalloc_bin(tsdn, arena, edata, ptr);
@@ -1783,7 +1783,7 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 	/* Calls with non-zero extra had to clamp extra. */
 	assert(extra == 0 || size + extra <= SC_LARGE_MAXCLASS);
 
-	edata_t *edata = iealloc(tsdn, ptr);
+	edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
 	if (unlikely(size > SC_LARGE_MAXCLASS)) {
 		ret = true;
 		goto done;
@@ -1817,7 +1817,7 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 		ret = true;
 	}
 done:
-	assert(edata == iealloc(tsdn, ptr));
+	assert(edata == emap_lookup(tsdn, &emap_global, ptr));
 	*newsize = edata_usize_get(edata);
 
 	return ret;
diff --git a/src/ctl.c b/src/ctl.c
index 29909dfb..3f30ef0c 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2667,7 +2667,7 @@ arenas_lookup_ctl(tsd_t *tsd, const size_t *mib,
 	ret = EINVAL;
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	WRITE(ptr, void *);
-	edata = iealloc(tsd_tsdn(tsd), ptr);
+	edata = emap_lookup(tsd_tsdn(tsd), &emap_global, ptr);
 	if (edata == NULL)
 		goto label_return;
 
diff --git a/src/ehooks.c b/src/ehooks.c
index 5ea73e3e..13d9ab0c 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -189,8 +189,8 @@ ehooks_default_split(extent_hooks_t *extent_hooks, void *addr, size_t size,
 
 static inline bool
 ehooks_same_sn(tsdn_t *tsdn, void *addr_a, void *addr_b) {
-	edata_t *a = iealloc(tsdn, addr_a);
-	edata_t *b = iealloc(tsdn, addr_b);
+	edata_t *a = emap_lookup(tsdn, &emap_global, addr_a);
+	edata_t *b = emap_lookup(tsdn, &emap_global, addr_b);
 	return edata_sn_comp(a, b) == 0;
 }
 
@@ -253,9 +253,9 @@ bool
 ehooks_default_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
     void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
 	tsdn_t *tsdn = tsdn_fetch();
-	edata_t *a = iealloc(tsdn, addr_a);
+	edata_t *a = emap_lookup(tsdn, &emap_global, addr_a);
 	bool head_a = edata_is_head_get(a);
-	edata_t *b = iealloc(tsdn, addr_b);
+	edata_t *b = emap_lookup(tsdn, &emap_global, addr_b);
 	bool head_b = edata_is_head_get(b);
 	return ehooks_default_merge_impl(tsdn, addr_a, head_a, addr_b, head_b);
 }
diff --git a/src/inspect.c b/src/inspect.c
index 5ad23a0e..1be3429a 100644
--- a/src/inspect.c
+++ b/src/inspect.c
@@ -6,7 +6,7 @@ inspect_extent_util_stats_get(tsdn_t *tsdn, const void *ptr, size_t *nfree,
     size_t *nregs, size_t *size) {
 	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL);
 
-	const edata_t *edata = iealloc(tsdn, ptr);
+	const edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
 	if (unlikely(edata == NULL)) {
 		*nfree = *nregs = *size = 0;
 		return;
@@ -31,7 +31,7 @@ inspect_extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
 	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL
 	    && bin_nfree != NULL && bin_nregs != NULL && slabcur_addr != NULL);
 
-	const edata_t *edata = iealloc(tsdn, ptr);
+	const edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
 	if (unlikely(edata == NULL)) {
 		*nfree = *nregs = *size = *bin_nfree = *bin_nregs = 0;
 		*slabcur_addr = NULL;
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 8f34989a..2b4cd277 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3423,7 +3423,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	 * object associated with the ptr (though the content of the edata_t
 	 * object can be changed).
 	 */
-	edata_t *old_edata = iealloc(tsd_tsdn(tsd), ptr);
+	edata_t *old_edata = emap_lookup(tsd_tsdn(tsd), &emap_global, ptr);
 
 	alloc_ctx_t alloc_ctx;
 	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
@@ -3462,7 +3462,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	 * xallocx() should keep using the same edata_t object (though its
 	 * content can be changed).
 	 */
-	assert(iealloc(tsd_tsdn(tsd), ptr) == old_edata);
+	assert(emap_lookup(tsd_tsdn(tsd), &emap_global, ptr) == old_edata);
 
 	if (unlikely(usize == old_usize)) {
 		te_alloc_rollback(tsd, usize);
diff --git a/src/large.c b/src/large.c
index 2e520981..d393c43c 100644
--- a/src/large.c
+++ b/src/large.c
@@ -272,7 +272,7 @@ void *
 large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache,
     hook_ralloc_args_t *hook_args) {
-	edata_t *edata = iealloc(tsdn, ptr);
+	edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
 
 	size_t oldusize = edata_usize_get(edata);
 	/* The following should have been caught by callers. */
diff --git a/src/prof.c b/src/prof.c
index 248532e8..7b57dd26 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -148,7 +148,7 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
 void
 prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
     size_t usize, prof_tctx_t *tctx) {
-	edata_t *edata = iealloc(tsd_tsdn(tsd), ptr);
+	edata_t *edata = emap_lookup(tsd_tsdn(tsd), &emap_global, ptr);
 	prof_info_set(tsd, edata, tctx);
 
 	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
diff --git a/src/tcache.c b/src/tcache.c
index 9146f244..e9331d03 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -160,7 +160,8 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 		    item_edata);
 	} else {
 		for (unsigned i = 0 ; i < nflush; i++) {
-			item_edata[i] = iealloc(tsdn, *(bottom_item - i));
+			item_edata[i] = emap_lookup(tsd_tsdn(tsd), &emap_global,
+			    *(bottom_item - i));
 		}
 	}
 
@@ -258,7 +259,8 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
 #ifndef JEMALLOC_EXTRA_SIZE_CHECK
 	/* Look up edata once per item. */
 	for (unsigned i = 0 ; i < nflush; i++) {
-		item_edata[i] = iealloc(tsd_tsdn(tsd), *(bottom_item - i));
+		item_edata[i] = emap_lookup(tsd_tsdn(tsd), &emap_global,
+		    *(bottom_item - i));
 	}
 #else
 	tbin_extents_lookup_size_check(tsd_tsdn(tsd), tbin, binind, nflush,
diff --git a/test/unit/binshard.c b/test/unit/binshard.c
index d9a0d599..d5f43df1 100644
--- a/test/unit/binshard.c
+++ b/test/unit/binshard.c
@@ -62,12 +62,12 @@ thd_start(void *varg) {
 		ptr = mallocx(1, MALLOCX_TCACHE_NONE);
 		ptr2 = mallocx(129, MALLOCX_TCACHE_NONE);
 
-		edata = iealloc(tsdn, ptr);
+		edata = emap_lookup(tsdn, &emap_global, ptr);
 		shard1 = edata_binshard_get(edata);
 		dallocx(ptr, 0);
 		assert_u_lt(shard1, 16, "Unexpected bin shard used");
 
-		edata = iealloc(tsdn, ptr2);
+		edata = emap_lookup(tsdn, &emap_global, ptr2);
 		shard2 = edata_binshard_get(edata);
 		dallocx(ptr2, 0);
 		assert_u_lt(shard2, 4, "Unexpected bin shard used");
diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index 3c10618f..a8761ca9 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -101,7 +101,7 @@ TEST_END
 
 static void confirm_malloc(tsd_t *tsd, void *p) {
 	assert_ptr_not_null(p, "malloc failed unexpectedly");
-	edata_t *e = iealloc(TSDN_NULL, p);
+	edata_t *e = emap_lookup(TSDN_NULL, &emap_global, p);
 	assert_ptr_not_null(e, "NULL edata for living pointer");
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	prof_recent_t *n = edata_prof_recent_alloc_get(tsd, e);

From 65a54d771467df1d2144ae3da9ebf4ae2388bd4d Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 5 Feb 2020 17:17:15 -0800
Subject: [PATCH 1546/2608] Emap: Move in szind and slab modifications.

---
 include/jemalloc/internal/emap.h | 12 +++++++++---
 src/arena.c                      | 16 +++-------------
 src/emap.c                       | 16 ++++++++++++++--
 src/extent.c                     |  2 +-
 src/large.c                      |  9 +++------
 5 files changed, 30 insertions(+), 25 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index a6aadbc5..9e3b415c 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -79,7 +79,13 @@ struct emap_prepare_s {
  * is going to be used as a slab, you still need to call emap_register_interior
  * on it, though.
  *
- * Each operation has a "prepare" and a "commit" portion.  The prepare portion
+ * Remap simply changes the szind and slab status of an extent's boundary
+ * mappings.  If the extent is not a slab, it doesn't bother with updating the
+ * end mapping (since lookups only occur in the interior of an extent for
+ * slabs).  Since the szind and slab status only make sense for active extents,
+ * this should only be called while activating or deactivating an extent.
+ *
+ * Split and merge have a "prepare" and a "commit" portion.  The prepare portion
  * does the operations that can be done without exclusive access to the extent
  * in question, while the commit variant requires exclusive access to maintain
  * the emap invariants.  The only function that can fail is emap_split_prepare,
@@ -90,8 +96,8 @@ struct emap_prepare_s {
  * and esn values) data for the split variants, and can be reused for any
  * purpose by its given arena after a merge or a failed split.
  */
-void emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, size_t size,
-    szind_t szind, bool slab);
+void emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
+    bool slab);
 bool emap_split_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
     edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a, edata_t *trail,
     size_t size_b, szind_t szind_b, bool slab_b);
diff --git a/src/arena.c b/src/arena.c
index f7f3ee5c..2df7df6e 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1599,16 +1599,10 @@ arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize) {
 		safety_check_set_redzone(ptr, usize, SC_LARGE_MINCLASS);
 	}
 
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
-	edata_t *edata = rtree_edata_read(tsdn, &emap_global.rtree, rtree_ctx,
-	    (uintptr_t)ptr, true);
+	edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
 
 	szind_t szind = sz_size2index(usize);
-	edata_szind_set(edata, szind);
-	rtree_szind_slab_update(tsdn, &emap_global.rtree, rtree_ctx,
-	    (uintptr_t)ptr, szind, false);
+	emap_remap(tsdn, &emap_global, edata, szind, false);
 
 	prof_idump_rollback(tsdn, usize);
 
@@ -1620,11 +1614,7 @@ arena_prof_demote(tsdn_t *tsdn, edata_t *edata, const void *ptr) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	edata_szind_set(edata, SC_NBINS);
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-	rtree_szind_slab_update(tsdn, &emap_global.rtree, rtree_ctx,
-	    (uintptr_t)ptr, SC_NBINS, false);
+	emap_remap(tsdn, &emap_global, edata, SC_NBINS, false);
 
 	assert(isalloc(tsdn, ptr) == SC_LARGE_MINCLASS);
 
diff --git a/src/emap.c b/src/emap.c
index 5b7d4e1f..ae0d3127 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -199,14 +199,26 @@ emap_deregister_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	}
 }
 
-void emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, size_t size,
-    szind_t szind, bool slab) {
+void emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
+    bool slab) {
 	EMAP_DECLARE_RTREE_CTX;
 
 	edata_szind_set(edata, szind);
 	if (szind != SC_NSIZES) {
 		rtree_szind_slab_update(tsdn, &emap->rtree, rtree_ctx,
 		    (uintptr_t)edata_addr_get(edata), szind, slab);
+		/*
+		 * Recall that this is called only for active->inactive and
+		 * inactive->active transitions (since only active extents have
+		 * meaningful values for szind and slab).  Active, non-slab
+		 * extents only need to handle lookups at their head (on
+		 * deallocation), so we don't bother filling in the end
+		 * boundary.
+		 *
+		 * For slab extents, we do the end-mapping change.  This still
+		 * leaves the interior unmodified; an emap_register_interior
+		 * call is coming in those cases, though.
+		 */
 		if (slab && edata_size_get(edata) > PAGE) {
 			rtree_szind_slab_update(tsdn,
 			    &emap->rtree, rtree_ctx,
diff --git a/src/extent.c b/src/extent.c
index 7c79ced5..d06b8d68 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -516,7 +516,7 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	}
 
 	if (leadsize == 0 && trailsize == 0) {
-		emap_remap(tsdn, &emap_global, *edata, size, szind, slab);
+		emap_remap(tsdn, &emap_global, *edata, szind, slab);
 	}
 
 	return extent_split_interior_ok;
diff --git a/src/large.c b/src/large.c
index d393c43c..3965c5ec 100644
--- a/src/large.c
+++ b/src/large.c
@@ -3,10 +3,10 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/emap.h"
 #include "jemalloc/internal/extent_mmap.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/prof_recent.h"
-#include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/util.h"
 
 /******************************************************************************/
@@ -175,12 +175,9 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 		extent_dalloc_wrapper(tsdn, arena, ehooks, trail);
 		return true;
 	}
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
 	szind_t szind = sz_size2index(usize);
-	edata_szind_set(edata, szind);
-	rtree_szind_slab_update(tsdn, &emap_global.rtree, rtree_ctx,
-	    (uintptr_t)edata_addr_get(edata), szind, false);
+	emap_remap(tsdn, &emap_global, edata, szind, false);
 
 	if (config_stats && new_mapping) {
 		arena_stats_mapped_add(tsdn, &arena->stats, trailsize);

From f7d9c6c42d51af2a06048e64b1a35a39c143eb4a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 5 Feb 2020 18:58:19 -0800
Subject: [PATCH 1547/2608] Emap: Move in alloc_ctx lookup functionality.

---
 include/jemalloc/internal/arena_structs.h |  6 ---
 include/jemalloc/internal/arena_types.h   |  1 -
 include/jemalloc/internal/emap.h          | 18 ++++++++
 src/jemalloc.c                            | 56 +++++++++++------------
 4 files changed, 44 insertions(+), 37 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index 2d5c5680..fde540af 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -197,10 +197,4 @@ struct arena_tdata_s {
 	ticker_t		decay_ticker;
 };
 
-/* Used to pass rtree lookup context down the path. */
-struct alloc_ctx_s {
-	szind_t szind;
-	bool slab;
-};
-
 #endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H */
diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h
index 369dff06..b13d8a05 100644
--- a/include/jemalloc/internal/arena_types.h
+++ b/include/jemalloc/internal/arena_types.h
@@ -12,7 +12,6 @@
 typedef struct arena_decay_s arena_decay_t;
 typedef struct arena_s arena_t;
 typedef struct arena_tdata_s arena_tdata_t;
-typedef struct alloc_ctx_s alloc_ctx_t;
 
 typedef enum {
 	percpu_arena_mode_names_base   = 0, /* Used for options processing. */
diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 9e3b415c..4588daf9 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -11,6 +11,13 @@ struct emap_s {
 	mutex_pool_t mtx_pool;
 };
 
+/* Used to pass rtree lookup context down the path. */
+typedef struct alloc_ctx_t alloc_ctx_t;
+struct alloc_ctx_t {
+	szind_t szind;
+	bool slab;
+};
+
 extern emap_t emap_global;
 
 bool emap_init(emap_t *emap);
@@ -127,4 +134,15 @@ emap_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr) {
 	    true);
 }
 
+/* Fills in alloc_ctx with the info in the map. */
+JEMALLOC_ALWAYS_INLINE void
+emap_alloc_info_lookup(tsdn_t *tsdn, emap_t *emap, void *ptr,
+    alloc_ctx_t *alloc_ctx) {
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+	rtree_szind_slab_read(tsdn, &emap->rtree, rtree_ctx, (uintptr_t)ptr,
+	    true, &alloc_ctx->szind, &alloc_ctx->slab);
+}
+
 #endif /* JEMALLOC_INTERNAL_EMAP_H */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 2b4cd277..d0af5da5 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2568,9 +2568,7 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	alloc_ctx_t alloc_ctx;
-	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
-	rtree_szind_slab_read(tsd_tsdn(tsd), &emap_global.rtree, rtree_ctx,
-	    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
+	emap_alloc_info_lookup(tsd_tsdn(tsd), &emap_global, ptr, &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
 
 	size_t usize = sz_index2size(alloc_ctx.szind);
@@ -2601,57 +2599,55 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
-	alloc_ctx_t alloc_ctx, *ctx;
+	alloc_ctx_t alloc_ctx;
 	if (!config_prof) {
-		/* Means usize will be used to determine szind. */
-		ctx = NULL;
+		alloc_ctx.szind = sz_size2index(usize);
+		alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
 	} else {
 		if (likely(!prof_sample_aligned(ptr))) {
-			ctx = &alloc_ctx;
 			/*
 			 * When the ptr is not page aligned, it was not sampled.
 			 * usize can be trusted to determine szind and slab.
 			 */
-			ctx->szind = sz_size2index(usize);
+			alloc_ctx.szind = sz_size2index(usize);
 			if (config_cache_oblivious) {
-				ctx->slab = (ctx->szind < SC_NBINS);
+				alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
 			} else {
 				/* Non page aligned must be slab allocated. */
-				ctx->slab = true;
+				alloc_ctx.slab = true;
 			}
 			if (config_debug) {
 				alloc_ctx_t dbg_ctx;
-				rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
-				rtree_szind_slab_read(tsd_tsdn(tsd),
-				    &emap_global.rtree, rtree_ctx,
-				    (uintptr_t)ptr, true, &dbg_ctx.szind,
-				    &dbg_ctx.slab);
-				assert(dbg_ctx.szind == ctx->szind);
-				assert(dbg_ctx.slab == ctx->slab);
+				emap_alloc_info_lookup(tsd_tsdn(tsd),
+				    &emap_global, ptr, &dbg_ctx);
+				assert(dbg_ctx.szind == alloc_ctx.szind);
+				assert(dbg_ctx.slab == alloc_ctx.slab);
 			}
 		} else if (opt_prof) {
-			ctx = &alloc_ctx;
-			rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
-			rtree_szind_slab_read(tsd_tsdn(tsd), &emap_global.rtree,
-			    rtree_ctx, (uintptr_t)ptr, true, &ctx->szind,
-			    &ctx->slab);
-			/* Small alloc may have !slab (sampled). */
-			bool sz_correct = (ctx->szind == sz_size2index(usize));
-			if (config_opt_safety_checks && !sz_correct) {
-				safety_check_fail_sized_dealloc(true);
+			emap_alloc_info_lookup(tsd_tsdn(tsd), &emap_global,
+			    ptr, &alloc_ctx);
+
+			if (config_opt_safety_checks) {
+				/* Small alloc may have !slab (sampled). */
+				if (alloc_ctx.szind != sz_size2index(usize)) {
+					safety_check_fail_sized_dealloc(true);
+				}
 			}
 		} else {
-			ctx = NULL;
+			alloc_ctx.szind = sz_size2index(usize);
+			alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
 		}
 	}
 
 	if (config_prof && opt_prof) {
-		prof_free(tsd, ptr, usize, ctx);
+		prof_free(tsd, ptr, usize, &alloc_ctx);
 	}
 	if (likely(!slow_path)) {
-		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, ctx, false);
+		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, &alloc_ctx,
+		    false);
 	} else {
-		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, ctx, true);
+		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, &alloc_ctx,
+		    true);
 	}
 	thread_dalloc_event(tsd, usize);
 }

From 06e42090f7ff42d944dbf318dd24eeac43e59255 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 6 Feb 2020 10:59:48 -0800
Subject: [PATCH 1548/2608] Make jemalloc.c use the emap interface.

While we're here, we'll also clean up some style nits.
---
 include/jemalloc/internal/emap.h | 15 +++++++++++++++
 src/jemalloc.c                   | 33 ++++++++++++++------------------
 2 files changed, 29 insertions(+), 19 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 4588daf9..3a8182d3 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -145,4 +145,19 @@ emap_alloc_info_lookup(tsdn_t *tsdn, emap_t *emap, void *ptr,
 	    true, &alloc_ctx->szind, &alloc_ctx->slab);
 }
 
+/*
+ * Fills in alloc_ctx, but only if it can be done easily (i.e. with a hit in the
+ * L1 rtree cache.
+ *
+ * Returns whether or not alloc_ctx was filled in.
+ */
+JEMALLOC_ALWAYS_INLINE bool
+emap_alloc_info_try_lookup_fast(tsd_t *tsd, emap_t *emap, void *ptr,
+    alloc_ctx_t *alloc_ctx) {
+	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
+	bool res = rtree_szind_slab_read_fast(tsd_tsdn(tsd), &emap->rtree,
+	    rtree_ctx, (uintptr_t)ptr, &alloc_ctx->szind, &alloc_ctx->slab);
+	return res;
+}
+
 #endif /* JEMALLOC_INTERNAL_EMAP_H */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index d0af5da5..90a948c3 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2688,26 +2688,24 @@ free_default(void *ptr) {
 	}
 }
 
+/* Returns whether or not the free attempt was successful. */
 JEMALLOC_ALWAYS_INLINE
 bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 	tsd_t *tsd = tsd_get(false);
 
-	szind_t szind;
+	alloc_ctx_t alloc_ctx;
 	if (!size_hint) {
-		if (unlikely(!tsd || !tsd_fast(tsd))) {
+		if (unlikely(tsd == NULL || !tsd_fast(tsd))) {
 			return false;
 		}
-		bool slab;
-		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
-		bool res = rtree_szind_slab_read_fast(tsd_tsdn(tsd),
-		    &emap_global.rtree, rtree_ctx, (uintptr_t)ptr, &szind,
-		    &slab);
+		bool res = emap_alloc_info_try_lookup_fast(tsd, &emap_global,
+		    ptr, &alloc_ctx);
 
 		/* Note: profiled objects will have alloc_ctx.slab set */
-		if (unlikely(!res || !slab)) {
+		if (unlikely(!res || !alloc_ctx.slab)) {
 			return false;
 		}
-		assert(szind != SC_NSIZES);
+		assert(alloc_ctx.szind != SC_NSIZES);
 	} else {
 		/*
 		 * The size hinted fastpath does not involve rtree lookup, thus
@@ -2715,7 +2713,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 		 * check to be folded into the branch testing fast_threshold
 		 * (set to 0 when !tsd_fast).
 		 */
-		if (unlikely(!tsd)) {
+		if (unlikely(tsd == NULL)) {
 			return false;
 		}
 		/*
@@ -2727,12 +2725,13 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 		    (config_prof && prof_sample_aligned(ptr)))) {
 			return false;
 		}
-		szind = sz_size2index_lookup(size);
+		alloc_ctx.szind = sz_size2index_lookup(size);
+		alloc_ctx.slab = false;
 	}
 	uint64_t deallocated, threshold;
 	te_free_fastpath_ctx(tsd, &deallocated, &threshold, size_hint);
 
-	size_t usize = sz_index2size(szind);
+	size_t usize = sz_index2size(alloc_ctx.szind);
 	uint64_t deallocated_after = deallocated + usize;
 	/*
 	 * Check for events and tsd non-nominal (fast_threshold will be set to
@@ -2743,7 +2742,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 	}
 
 	tcache_t *tcache = tsd_tcachep_get(tsd);
-	cache_bin_t *bin = tcache_small_bin_get(tcache, szind);
+	cache_bin_t *bin = tcache_small_bin_get(tcache, alloc_ctx.szind);
 	if (!cache_bin_dalloc_easy(bin, ptr)) {
 		return false;
 	}
@@ -3143,9 +3142,7 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 	}
 
 	alloc_ctx_t alloc_ctx;
-	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
-	rtree_szind_slab_read(tsd_tsdn(tsd), &emap_global.rtree, rtree_ctx,
-	    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
+	emap_alloc_info_lookup(tsd_tsdn(tsd), &emap_global, ptr, &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
 	old_usize = sz_index2size(alloc_ctx.szind);
 	assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
@@ -3422,9 +3419,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	edata_t *old_edata = emap_lookup(tsd_tsdn(tsd), &emap_global, ptr);
 
 	alloc_ctx_t alloc_ctx;
-	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
-	rtree_szind_slab_read(tsd_tsdn(tsd), &emap_global.rtree, rtree_ctx,
-	    (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab);
+	emap_alloc_info_lookup(tsd_tsdn(tsd), &emap_global, ptr, &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
 	old_usize = sz_index2size(alloc_ctx.szind);
 	assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));

From ac50c1e44b1a34b27ca72ada25a65d685253e2c2 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 6 Feb 2020 13:16:07 -0800
Subject: [PATCH 1549/2608] Emap: Remove direct access to emap internals.

In the process, we do a few local cleanups and optimizations.  In particular,
the size safety check on tcache flush no longer does a redundant load.
---
 include/jemalloc/internal/arena_inlines_b.h | 168 ++++++++------------
 include/jemalloc/internal/emap.h            |  39 ++++-
 include/jemalloc/internal/rtree.h           |  13 +-
 include/jemalloc/internal/util.h            |   7 +
 src/arena.c                                 |   6 +-
 src/tcache.c                                |  24 +--
 test/unit/arena_reset.c                     |  19 +--
 7 files changed, 144 insertions(+), 132 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 79478136..eb82e716 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -188,15 +188,11 @@ arena_aalloc(tsdn_t *tsdn, const void *ptr) {
 JEMALLOC_ALWAYS_INLINE size_t
 arena_salloc(tsdn_t *tsdn, const void *ptr) {
 	assert(ptr != NULL);
+	alloc_ctx_t alloc_ctx;
+	emap_alloc_info_lookup(tsdn, &emap_global, ptr, &alloc_ctx);
+	assert(alloc_ctx.szind != SC_NSIZES);
 
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
-	szind_t szind = rtree_szind_read(tsdn, &emap_global.rtree, rtree_ctx,
-	    (uintptr_t)ptr, true);
-	assert(szind != SC_NSIZES);
-
-	return sz_index2size(szind);
+	return sz_index2size(alloc_ctx.szind);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
@@ -210,26 +206,24 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
 	 *   failure.
 	 */
 
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
-	edata_t *edata;
-	szind_t szind;
-	if (rtree_edata_szind_read(tsdn, &emap_global.rtree, rtree_ctx,
-	    (uintptr_t)ptr, false, &edata, &szind)) {
+	emap_full_alloc_ctx_t full_alloc_ctx;
+	bool missing = emap_full_alloc_info_try_lookup(tsdn, &emap_global, ptr,
+	    &full_alloc_ctx);
+	if (missing) {
 		return 0;
 	}
 
-	if (edata == NULL) {
+	if (full_alloc_ctx.edata == NULL) {
 		return 0;
 	}
-	assert(edata_state_get(edata) == extent_state_active);
+	assert(edata_state_get(full_alloc_ctx.edata) == extent_state_active);
 	/* Only slab members should be looked up via interior pointers. */
-	assert(edata_addr_get(edata) == ptr || edata_slab_get(edata));
+	assert(edata_addr_get(full_alloc_ctx.edata) == ptr
+	    || edata_slab_get(full_alloc_ctx.edata));
 
-	assert(szind != SC_NSIZES);
+	assert(full_alloc_ctx.szind != SC_NSIZES);
 
-	return sz_index2size(szind);
+	return sz_index2size(full_alloc_ctx.szind);
 }
 
 static inline void
@@ -246,27 +240,21 @@ static inline void
 arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
 	assert(ptr != NULL);
 
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
-	szind_t szind;
-	bool slab;
-	rtree_szind_slab_read(tsdn, &emap_global.rtree, rtree_ctx,
-	    (uintptr_t)ptr, true, &szind, &slab);
+	alloc_ctx_t alloc_ctx;
+	emap_alloc_info_lookup(tsdn, &emap_global, ptr, &alloc_ctx);
 
 	if (config_debug) {
-		edata_t *edata = rtree_edata_read(tsdn, &emap_global.rtree,
-		    rtree_ctx, (uintptr_t)ptr, true);
-		assert(szind == edata_szind_get(edata));
-		assert(szind < SC_NSIZES);
-		assert(slab == edata_slab_get(edata));
+		edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
+		assert(alloc_ctx.szind == edata_szind_get(edata));
+		assert(alloc_ctx.szind < SC_NSIZES);
+		assert(alloc_ctx.slab == edata_slab_get(edata));
 	}
 
-	if (likely(slab)) {
+	if (likely(alloc_ctx.slab)) {
 		/* Small allocation. */
 		arena_dalloc_small(tsdn, ptr);
 	} else {
-		arena_dalloc_large_no_tcache(tsdn, ptr, szind);
+		arena_dalloc_large_no_tcache(tsdn, ptr, alloc_ctx.szind);
 	}
 }
 
@@ -288,7 +276,7 @@ arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
 
 JEMALLOC_ALWAYS_INLINE void
 arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
-    alloc_ctx_t *alloc_ctx, bool slow_path) {
+    alloc_ctx_t *caller_alloc_ctx, bool slow_path) {
 	assert(!tsdn_null(tsdn) || tcache == NULL);
 	assert(ptr != NULL);
 
@@ -297,34 +285,28 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 		return;
 	}
 
-	szind_t szind;
-	bool slab;
-	rtree_ctx_t *rtree_ctx;
-	if (alloc_ctx != NULL) {
-		szind = alloc_ctx->szind;
-		slab = alloc_ctx->slab;
-		assert(szind != SC_NSIZES);
+	alloc_ctx_t alloc_ctx;
+	if (caller_alloc_ctx != NULL) {
+		alloc_ctx = *caller_alloc_ctx;
 	} else {
-		rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn));
-		rtree_szind_slab_read(tsdn, &emap_global.rtree, rtree_ctx,
-		    (uintptr_t)ptr, true, &szind, &slab);
+		util_assume(!tsdn_null(tsdn));
+		emap_alloc_info_lookup(tsdn, &emap_global, ptr, &alloc_ctx);
 	}
 
 	if (config_debug) {
-		rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn));
-		edata_t *edata = rtree_edata_read(tsdn, &emap_global.rtree,
-		    rtree_ctx, (uintptr_t)ptr, true);
-		assert(szind == edata_szind_get(edata));
-		assert(szind < SC_NSIZES);
-		assert(slab == edata_slab_get(edata));
+		edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
+		assert(alloc_ctx.szind == edata_szind_get(edata));
+		assert(alloc_ctx.szind < SC_NSIZES);
+		assert(alloc_ctx.slab == edata_slab_get(edata));
 	}
 
-	if (likely(slab)) {
+	if (likely(alloc_ctx.slab)) {
 		/* Small allocation. */
-		tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind,
-		    slow_path);
+		tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr,
+		    alloc_ctx.szind, slow_path);
 	} else {
-		arena_dalloc_large(tsdn, ptr, tcache, szind, slow_path);
+		arena_dalloc_large(tsdn, ptr, tcache, alloc_ctx.szind,
+		    slow_path);
 	}
 }
 
@@ -333,47 +315,41 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
 	assert(ptr != NULL);
 	assert(size <= SC_LARGE_MAXCLASS);
 
-	szind_t szind;
-	bool slab;
+	alloc_ctx_t alloc_ctx;
 	if (!config_prof || !opt_prof) {
 		/*
 		 * There is no risk of being confused by a promoted sampled
 		 * object, so base szind and slab on the given size.
 		 */
-		szind = sz_size2index(size);
-		slab = (szind < SC_NBINS);
+		alloc_ctx.szind = sz_size2index(size);
+		alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
 	}
 
 	if ((config_prof && opt_prof) || config_debug) {
-		rtree_ctx_t rtree_ctx_fallback;
-		rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn,
-		    &rtree_ctx_fallback);
+		emap_alloc_info_lookup(tsdn, &emap_global, ptr, &alloc_ctx);
 
-		rtree_szind_slab_read(tsdn, &emap_global.rtree, rtree_ctx,
-		    (uintptr_t)ptr, true, &szind, &slab);
-
-		assert(szind == sz_size2index(size));
-		assert((config_prof && opt_prof) || slab == (szind < SC_NBINS));
+		assert(alloc_ctx.szind == sz_size2index(size));
+		assert((config_prof && opt_prof)
+		    || alloc_ctx.slab == (alloc_ctx.szind < SC_NBINS));
 
 		if (config_debug) {
-			edata_t *edata = rtree_edata_read(tsdn,
-			    &emap_global.rtree, rtree_ctx, (uintptr_t)ptr, true);
-			assert(szind == edata_szind_get(edata));
-			assert(slab == edata_slab_get(edata));
+			edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
+			assert(alloc_ctx.szind == edata_szind_get(edata));
+			assert(alloc_ctx.slab == edata_slab_get(edata));
 		}
 	}
 
-	if (likely(slab)) {
+	if (likely(alloc_ctx.slab)) {
 		/* Small allocation. */
 		arena_dalloc_small(tsdn, ptr);
 	} else {
-		arena_dalloc_large_no_tcache(tsdn, ptr, szind);
+		arena_dalloc_large_no_tcache(tsdn, ptr, alloc_ctx.szind);
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
 arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
-    alloc_ctx_t *alloc_ctx, bool slow_path) {
+    alloc_ctx_t *caller_alloc_ctx, bool slow_path) {
 	assert(!tsdn_null(tsdn) || tcache == NULL);
 	assert(ptr != NULL);
 	assert(size <= SC_LARGE_MAXCLASS);
@@ -383,48 +359,38 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 		return;
 	}
 
-	szind_t szind;
-	bool slab;
-	alloc_ctx_t local_ctx;
+	alloc_ctx_t alloc_ctx;
 	if (config_prof && opt_prof) {
-		if (alloc_ctx == NULL) {
+		if (caller_alloc_ctx == NULL) {
 			/* Uncommon case and should be a static check. */
-			rtree_ctx_t rtree_ctx_fallback;
-			rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn,
-			    &rtree_ctx_fallback);
-			rtree_szind_slab_read(tsdn, &emap_global.rtree,
-			    rtree_ctx, (uintptr_t)ptr, true, &local_ctx.szind,
-			    &local_ctx.slab);
-			assert(local_ctx.szind == sz_size2index(size));
-			alloc_ctx = &local_ctx;
+			emap_alloc_info_lookup(tsdn, &emap_global, ptr,
+			    &alloc_ctx);
+			assert(alloc_ctx.szind == sz_size2index(size));
+		} else {
+			alloc_ctx = *caller_alloc_ctx;
 		}
-		slab = alloc_ctx->slab;
-		szind = alloc_ctx->szind;
 	} else {
 		/*
 		 * There is no risk of being confused by a promoted sampled
 		 * object, so base szind and slab on the given size.
 		 */
-		szind = sz_size2index(size);
-		slab = (szind < SC_NBINS);
+		alloc_ctx.szind = sz_size2index(size);
+		alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
 	}
 
 	if (config_debug) {
-		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsdn_tsd(tsdn));
-		rtree_szind_slab_read(tsdn, &emap_global.rtree, rtree_ctx,
-		    (uintptr_t)ptr, true, &szind, &slab);
-		edata_t *edata = rtree_edata_read(tsdn,
-		    &emap_global.rtree, rtree_ctx, (uintptr_t)ptr, true);
-		assert(szind == edata_szind_get(edata));
-		assert(slab == edata_slab_get(edata));
+		edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
+		assert(alloc_ctx.szind == edata_szind_get(edata));
+		assert(alloc_ctx.slab == edata_slab_get(edata));
 	}
 
-	if (likely(slab)) {
+	if (likely(alloc_ctx.slab)) {
 		/* Small allocation. */
-		tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr, szind,
-		    slow_path);
+		tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr,
+		    alloc_ctx.szind, slow_path);
 	} else {
-		arena_dalloc_large(tsdn, ptr, tcache, szind, slow_path);
+		arena_dalloc_large(tsdn, ptr, tcache, alloc_ctx.szind,
+		    slow_path);
 	}
 }
 
diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 3a8182d3..89bb9684 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -18,6 +18,13 @@ struct alloc_ctx_t {
 	bool slab;
 };
 
+typedef struct emap_full_alloc_ctx_s emap_full_alloc_ctx_t;
+struct emap_full_alloc_ctx_s {
+	szind_t szind;
+	bool slab;
+	edata_t *edata;
+};
+
 extern emap_t emap_global;
 
 bool emap_init(emap_t *emap);
@@ -136,7 +143,7 @@ emap_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr) {
 
 /* Fills in alloc_ctx with the info in the map. */
 JEMALLOC_ALWAYS_INLINE void
-emap_alloc_info_lookup(tsdn_t *tsdn, emap_t *emap, void *ptr,
+emap_alloc_info_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
     alloc_ctx_t *alloc_ctx) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
@@ -145,6 +152,34 @@ emap_alloc_info_lookup(tsdn_t *tsdn, emap_t *emap, void *ptr,
 	    true, &alloc_ctx->szind, &alloc_ctx->slab);
 }
 
+/* The pointer must be mapped. */
+JEMALLOC_ALWAYS_INLINE void
+emap_full_alloc_info_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
+    emap_full_alloc_ctx_t *full_alloc_ctx) {
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+	rtree_edata_szind_slab_read(tsdn, &emap->rtree, rtree_ctx,
+	    (uintptr_t)ptr, true, &full_alloc_ctx->edata,
+	    &full_alloc_ctx->szind, &full_alloc_ctx->slab);
+}
+
+/*
+ * The pointer is allowed to not be mapped.
+ *
+ * Returns true when the pointer is not present.
+ */
+JEMALLOC_ALWAYS_INLINE bool
+emap_full_alloc_info_try_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
+    emap_full_alloc_ctx_t *full_alloc_ctx) {
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+	return rtree_edata_szind_slab_read(tsdn, &emap->rtree, rtree_ctx,
+	    (uintptr_t)ptr, false, &full_alloc_ctx->edata,
+	    &full_alloc_ctx->szind, &full_alloc_ctx->slab);
+}
+
 /*
  * Fills in alloc_ctx, but only if it can be done easily (i.e. with a hit in the
  * L1 rtree cache.
@@ -152,7 +187,7 @@ emap_alloc_info_lookup(tsdn_t *tsdn, emap_t *emap, void *ptr,
  * Returns whether or not alloc_ctx was filled in.
  */
 JEMALLOC_ALWAYS_INLINE bool
-emap_alloc_info_try_lookup_fast(tsd_t *tsd, emap_t *emap, void *ptr,
+emap_alloc_info_try_lookup_fast(tsd_t *tsd, emap_t *emap, const void *ptr,
     alloc_ctx_t *alloc_ctx) {
 	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
 	bool res = rtree_szind_slab_read_fast(tsd_tsdn(tsd), &emap->rtree,
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 339c7e5e..11a52ed0 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -440,15 +440,24 @@ rtree_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
  */
 
 JEMALLOC_ALWAYS_INLINE bool
-rtree_edata_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
-    uintptr_t key, bool dependent, edata_t **r_edata, szind_t *r_szind) {
+rtree_edata_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, edata_t **r_edata,
+    szind_t *r_szind, bool *r_slab) {
 	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key,
 	    dependent);
 	if (!dependent && elm == NULL) {
 		return true;
 	}
+#ifdef RTREE_LEAF_COMPACT
+	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
+	*r_edata = rtree_leaf_elm_bits_edata_get(bits);
+	*r_szind = rtree_leaf_elm_bits_szind_get(bits);
+	*r_slab = rtree_leaf_elm_bits_slab_get(bits);
+#else
 	*r_edata = rtree_leaf_elm_edata_read(tsdn, rtree, elm, dependent);
 	*r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent);
+	*r_slab = rtree_leaf_elm_slab_read(tsdn, rtree, elm, dependent);
+#endif
 	return false;
 }
 
diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index 304cb545..cb751479 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -62,6 +62,13 @@ get_errno(void) {
 #endif
 }
 
+JEMALLOC_ALWAYS_INLINE void
+util_assume(bool b) {
+	if (!b) {
+		unreachable();
+	}
+}
+
 #undef UTIL_INLINE
 
 #endif /* JEMALLOC_INTERNAL_UTIL_H */
diff --git a/src/arena.c b/src/arena.c
index 2df7df6e..b2a0ac76 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1111,10 +1111,8 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 
 		malloc_mutex_unlock(tsd_tsdn(tsd), &arena->large_mtx);
 		alloc_ctx_t alloc_ctx;
-		rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
-		rtree_szind_slab_read(tsd_tsdn(tsd), &emap_global.rtree,
-		    rtree_ctx, (uintptr_t)ptr, true, &alloc_ctx.szind,
-		    &alloc_ctx.slab);
+		emap_alloc_info_lookup(tsd_tsdn(tsd), &emap_global, ptr,
+		    &alloc_ctx);
 		assert(alloc_ctx.szind != SC_NSIZES);
 
 		if (config_stats || (config_prof && opt_prof)) {
diff --git a/src/tcache.c b/src/tcache.c
index e9331d03..b7c0a549 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -114,10 +114,10 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 
 /* Enabled with --enable-extra-size-check. */
 static void
-tbin_edatas_lookup_size_check(tsdn_t *tsdn, cache_bin_t *tbin, szind_t binind,
-    size_t nflush, edata_t **edatas){
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+tbin_edatas_lookup_size_check(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
+    size_t nflush, edata_t **edatas) {
+	/* Avoids null-checking tsdn in the loop below. */
+	util_assume(tsd != NULL);
 
 	/*
 	 * Verify that the items in the tcache all have the correct size; this
@@ -125,16 +125,16 @@ tbin_edatas_lookup_size_check(tsdn_t *tsdn, cache_bin_t *tbin, szind_t binind,
 	 * instead of corrupting metadata.  Since this can be turned on for opt
 	 * builds, avoid the branch in the loop.
 	 */
-	szind_t szind;
-	size_t sz_sum = binind * nflush;
+	size_t szind_sum = binind * nflush;
 	void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
 	for (unsigned i = 0 ; i < nflush; i++) {
-		rtree_edata_szind_read(tsdn, &emap_global.rtree,
-		    rtree_ctx, (uintptr_t)*(bottom_item - i), true,
-		    &edatas[i], &szind);
-		sz_sum -= szind;
+		emap_full_alloc_ctx_t full_alloc_ctx;
+		emap_full_alloc_info_lookup(tsd_tsdn(tsd), &emap_global,
+		    *(bottom_item - i), &full_alloc_ctx);
+		edatas[i] = full_alloc_ctx.edata;
+		szind_sum -= full_alloc_ctx.szind;
 	}
-	if (sz_sum != 0) {
+	if (szind_sum != 0) {
 		safety_check_fail_sized_dealloc(false);
 	}
 }
@@ -156,7 +156,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	tsdn_t *tsdn = tsd_tsdn(tsd);
 	/* Look up edata once per item. */
 	if (config_opt_safety_checks) {
-		tbin_edatas_lookup_size_check(tsdn, tbin, binind, nflush,
+		tbin_edatas_lookup_size_check(tsd, tbin, binind, nflush,
 		    item_edata);
 	} else {
 		for (unsigned i = 0 ; i < nflush; i++) {
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index a1f1d07c..7fbde0b6 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -60,28 +60,25 @@ get_large_size(size_t ind) {
 /* Like ivsalloc(), but safe to call on discarded allocations. */
 static size_t
 vsalloc(tsdn_t *tsdn, const void *ptr) {
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
-
-	edata_t *edata;
-	szind_t szind;
-	if (rtree_edata_szind_read(tsdn, &emap_global.rtree, rtree_ctx,
-	    (uintptr_t)ptr, false, &edata, &szind)) {
+	emap_full_alloc_ctx_t full_alloc_ctx;
+	bool missing = emap_full_alloc_info_try_lookup(tsdn, &emap_global,
+	    ptr, &full_alloc_ctx);
+	if (missing) {
 		return 0;
 	}
 
-	if (edata == NULL) {
+	if (full_alloc_ctx.edata == NULL) {
 		return 0;
 	}
-	if (edata_state_get(edata) != extent_state_active) {
+	if (edata_state_get(full_alloc_ctx.edata) != extent_state_active) {
 		return 0;
 	}
 
-	if (szind == SC_NSIZES) {
+	if (full_alloc_ctx.szind == SC_NSIZES) {
 		return 0;
 	}
 
-	return sz_index2size(szind);
+	return sz_index2size(full_alloc_ctx.szind);
 }
 
 static unsigned

From 7e6c8a72869d00e641404e962a830d635a3cd825 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 6 Feb 2020 13:45:04 -0800
Subject: [PATCH 1550/2608] Emap: Standardize naming.

Namespace everything under emap_, always specify what it is we're looking up
(emap_lookup -> emap_edata_lookup), and use "ctx" over "info".
---
 include/jemalloc/internal/arena_inlines_b.h   | 57 ++++++++++---------
 include/jemalloc/internal/emap.h              | 18 +++---
 .../internal/jemalloc_internal_inlines_c.h    |  6 +-
 include/jemalloc/internal/prof_inlines_b.h    | 11 ++--
 src/arena.c                                   | 14 ++---
 src/ctl.c                                     |  2 +-
 src/ehooks.c                                  |  8 +--
 src/inspect.c                                 |  4 +-
 src/jemalloc.c                                | 37 ++++++------
 src/large.c                                   |  2 +-
 src/prof.c                                    |  2 +-
 src/tcache.c                                  |  8 +--
 test/unit/arena_reset.c                       |  4 +-
 test/unit/binshard.c                          |  4 +-
 test/unit/prof_recent.c                       |  2 +-
 15 files changed, 93 insertions(+), 86 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index eb82e716..e7f7b858 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -37,7 +37,7 @@ arena_choose_maybe_huge(tsd_t *tsd, arena_t *arena, size_t size) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
+arena_prof_info_get(tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx,
     prof_info_t *prof_info, bool reset_recent) {
 	cassert(config_prof);
 	assert(ptr != NULL);
@@ -48,10 +48,10 @@ arena_prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
 
 	/* Static check. */
 	if (alloc_ctx == NULL) {
-		edata = emap_lookup(tsd_tsdn(tsd), &emap_global, ptr);
+		edata = emap_edata_lookup(tsd_tsdn(tsd), &emap_global, ptr);
 		is_slab = edata_slab_get(edata);
 	} else if (unlikely(!(is_slab = alloc_ctx->slab))) {
-		edata = emap_lookup(tsd_tsdn(tsd), &emap_global, ptr);
+		edata = emap_edata_lookup(tsd_tsdn(tsd), &emap_global, ptr);
 	}
 
 	if (unlikely(!is_slab)) {
@@ -68,19 +68,21 @@ arena_prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_prof_tctx_reset(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx) {
+arena_prof_tctx_reset(tsd_t *tsd, const void *ptr,
+    emap_alloc_ctx_t *alloc_ctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
 	/* Static check. */
 	if (alloc_ctx == NULL) {
-		edata_t *edata = emap_lookup(tsd_tsdn(tsd), &emap_global, ptr);
+		edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd), &emap_global,
+		    ptr);
 		if (unlikely(!edata_slab_get(edata))) {
 			large_prof_tctx_reset(edata);
 		}
 	} else {
 		if (unlikely(!alloc_ctx->slab)) {
-			edata_t *edata = emap_lookup(tsd_tsdn(tsd),
+			edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd),
 			    &emap_global, ptr);
 			large_prof_tctx_reset(edata);
 		}
@@ -92,7 +94,7 @@ arena_prof_tctx_reset_sampled(tsd_t *tsd, const void *ptr) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	edata_t *edata = emap_lookup(tsd_tsdn(tsd), &emap_global, ptr);
+	edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd), &emap_global, ptr);
 	assert(!edata_slab_get(edata));
 
 	large_prof_tctx_reset(edata);
@@ -180,7 +182,7 @@ arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
 
 JEMALLOC_ALWAYS_INLINE arena_t *
 arena_aalloc(tsdn_t *tsdn, const void *ptr) {
-	edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
+	edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
 	unsigned arena_ind = edata_arena_ind_get(edata);
 	return (arena_t *)atomic_load_p(&arenas[arena_ind], ATOMIC_RELAXED);
 }
@@ -188,8 +190,8 @@ arena_aalloc(tsdn_t *tsdn, const void *ptr) {
 JEMALLOC_ALWAYS_INLINE size_t
 arena_salloc(tsdn_t *tsdn, const void *ptr) {
 	assert(ptr != NULL);
-	alloc_ctx_t alloc_ctx;
-	emap_alloc_info_lookup(tsdn, &emap_global, ptr, &alloc_ctx);
+	emap_alloc_ctx_t alloc_ctx;
+	emap_alloc_ctx_lookup(tsdn, &emap_global, ptr, &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
 
 	return sz_index2size(alloc_ctx.szind);
@@ -207,7 +209,7 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
 	 */
 
 	emap_full_alloc_ctx_t full_alloc_ctx;
-	bool missing = emap_full_alloc_info_try_lookup(tsdn, &emap_global, ptr,
+	bool missing = emap_full_alloc_ctx_try_lookup(tsdn, &emap_global, ptr,
 	    &full_alloc_ctx);
 	if (missing) {
 		return 0;
@@ -231,7 +233,7 @@ arena_dalloc_large_no_tcache(tsdn_t *tsdn, void *ptr, szind_t szind) {
 	if (config_prof && unlikely(szind < SC_NBINS)) {
 		arena_dalloc_promoted(tsdn, ptr, NULL, true);
 	} else {
-		edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
+		edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
 		large_dalloc(tsdn, edata);
 	}
 }
@@ -240,11 +242,11 @@ static inline void
 arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
 	assert(ptr != NULL);
 
-	alloc_ctx_t alloc_ctx;
-	emap_alloc_info_lookup(tsdn, &emap_global, ptr, &alloc_ctx);
+	emap_alloc_ctx_t alloc_ctx;
+	emap_alloc_ctx_lookup(tsdn, &emap_global, ptr, &alloc_ctx);
 
 	if (config_debug) {
-		edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
+		edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
 		assert(alloc_ctx.szind == edata_szind_get(edata));
 		assert(alloc_ctx.szind < SC_NSIZES);
 		assert(alloc_ctx.slab == edata_slab_get(edata));
@@ -269,14 +271,14 @@ arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
 			    slow_path);
 		}
 	} else {
-		edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
+		edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
 		large_dalloc(tsdn, edata);
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
 arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
-    alloc_ctx_t *caller_alloc_ctx, bool slow_path) {
+    emap_alloc_ctx_t *caller_alloc_ctx, bool slow_path) {
 	assert(!tsdn_null(tsdn) || tcache == NULL);
 	assert(ptr != NULL);
 
@@ -285,16 +287,16 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 		return;
 	}
 
-	alloc_ctx_t alloc_ctx;
+	emap_alloc_ctx_t alloc_ctx;
 	if (caller_alloc_ctx != NULL) {
 		alloc_ctx = *caller_alloc_ctx;
 	} else {
 		util_assume(!tsdn_null(tsdn));
-		emap_alloc_info_lookup(tsdn, &emap_global, ptr, &alloc_ctx);
+		emap_alloc_ctx_lookup(tsdn, &emap_global, ptr, &alloc_ctx);
 	}
 
 	if (config_debug) {
-		edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
+		edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
 		assert(alloc_ctx.szind == edata_szind_get(edata));
 		assert(alloc_ctx.szind < SC_NSIZES);
 		assert(alloc_ctx.slab == edata_slab_get(edata));
@@ -315,7 +317,7 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
 	assert(ptr != NULL);
 	assert(size <= SC_LARGE_MAXCLASS);
 
-	alloc_ctx_t alloc_ctx;
+	emap_alloc_ctx_t alloc_ctx;
 	if (!config_prof || !opt_prof) {
 		/*
 		 * There is no risk of being confused by a promoted sampled
@@ -326,14 +328,15 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
 	}
 
 	if ((config_prof && opt_prof) || config_debug) {
-		emap_alloc_info_lookup(tsdn, &emap_global, ptr, &alloc_ctx);
+		emap_alloc_ctx_lookup(tsdn, &emap_global, ptr, &alloc_ctx);
 
 		assert(alloc_ctx.szind == sz_size2index(size));
 		assert((config_prof && opt_prof)
 		    || alloc_ctx.slab == (alloc_ctx.szind < SC_NBINS));
 
 		if (config_debug) {
-			edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
+			edata_t *edata = emap_edata_lookup(tsdn, &emap_global,
+			    ptr);
 			assert(alloc_ctx.szind == edata_szind_get(edata));
 			assert(alloc_ctx.slab == edata_slab_get(edata));
 		}
@@ -349,7 +352,7 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
 
 JEMALLOC_ALWAYS_INLINE void
 arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
-    alloc_ctx_t *caller_alloc_ctx, bool slow_path) {
+    emap_alloc_ctx_t *caller_alloc_ctx, bool slow_path) {
 	assert(!tsdn_null(tsdn) || tcache == NULL);
 	assert(ptr != NULL);
 	assert(size <= SC_LARGE_MAXCLASS);
@@ -359,11 +362,11 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 		return;
 	}
 
-	alloc_ctx_t alloc_ctx;
+	emap_alloc_ctx_t alloc_ctx;
 	if (config_prof && opt_prof) {
 		if (caller_alloc_ctx == NULL) {
 			/* Uncommon case and should be a static check. */
-			emap_alloc_info_lookup(tsdn, &emap_global, ptr,
+			emap_alloc_ctx_lookup(tsdn, &emap_global, ptr,
 			    &alloc_ctx);
 			assert(alloc_ctx.szind == sz_size2index(size));
 		} else {
@@ -379,7 +382,7 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 	}
 
 	if (config_debug) {
-		edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
+		edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
 		assert(alloc_ctx.szind == edata_szind_get(edata));
 		assert(alloc_ctx.slab == edata_slab_get(edata));
 	}
diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 89bb9684..c4b40142 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -12,8 +12,8 @@ struct emap_s {
 };
 
 /* Used to pass rtree lookup context down the path. */
-typedef struct alloc_ctx_t alloc_ctx_t;
-struct alloc_ctx_t {
+typedef struct emap_alloc_ctx_t emap_alloc_ctx_t;
+struct emap_alloc_ctx_t {
 	szind_t szind;
 	bool slab;
 };
@@ -133,7 +133,7 @@ emap_assert_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 }
 
 JEMALLOC_ALWAYS_INLINE edata_t *
-emap_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr) {
+emap_edata_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
@@ -143,8 +143,8 @@ emap_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr) {
 
 /* Fills in alloc_ctx with the info in the map. */
 JEMALLOC_ALWAYS_INLINE void
-emap_alloc_info_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
-    alloc_ctx_t *alloc_ctx) {
+emap_alloc_ctx_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
+    emap_alloc_ctx_t *alloc_ctx) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
@@ -154,7 +154,7 @@ emap_alloc_info_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
 
 /* The pointer must be mapped. */
 JEMALLOC_ALWAYS_INLINE void
-emap_full_alloc_info_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
+emap_full_alloc_ctx_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
     emap_full_alloc_ctx_t *full_alloc_ctx) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
@@ -170,7 +170,7 @@ emap_full_alloc_info_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
  * Returns true when the pointer is not present.
  */
 JEMALLOC_ALWAYS_INLINE bool
-emap_full_alloc_info_try_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
+emap_full_alloc_ctx_try_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
     emap_full_alloc_ctx_t *full_alloc_ctx) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
@@ -187,8 +187,8 @@ emap_full_alloc_info_try_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
  * Returns whether or not alloc_ctx was filled in.
  */
 JEMALLOC_ALWAYS_INLINE bool
-emap_alloc_info_try_lookup_fast(tsd_t *tsd, emap_t *emap, const void *ptr,
-    alloc_ctx_t *alloc_ctx) {
+emap_alloc_ctx_try_lookup_fast(tsd_t *tsd, emap_t *emap, const void *ptr,
+    emap_alloc_ctx_t *alloc_ctx) {
 	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
 	bool res = rtree_szind_slab_read_fast(tsd_tsdn(tsd), &emap->rtree,
 	    rtree_ctx, (uintptr_t)ptr, &alloc_ctx->szind, &alloc_ctx->slab);
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index cdb10eb2..0a5ffba5 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -101,8 +101,8 @@ ivsalloc(tsdn_t *tsdn, const void *ptr) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, alloc_ctx_t *alloc_ctx,
-    bool is_internal, bool slow_path) {
+idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
+    emap_alloc_ctx_t *alloc_ctx, bool is_internal, bool slow_path) {
 	assert(ptr != NULL);
 	assert(!is_internal || tcache == NULL);
 	assert(!is_internal || arena_is_auto(iaalloc(tsdn, ptr)));
@@ -125,7 +125,7 @@ idalloc(tsd_t *tsd, void *ptr) {
 
 JEMALLOC_ALWAYS_INLINE void
 isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
-    alloc_ctx_t *alloc_ctx, bool slow_path) {
+    emap_alloc_ctx_t *alloc_ctx, bool slow_path) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 	arena_sdalloc(tsdn, ptr, size, tcache, alloc_ctx, slow_path);
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index c53dac50..7e28d836 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -40,7 +40,7 @@ prof_tdata_get(tsd_t *tsd, bool create) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
+prof_info_get(tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx,
     prof_info_t *prof_info) {
 	cassert(config_prof);
 	assert(ptr != NULL);
@@ -51,7 +51,7 @@ prof_info_get(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx,
 
 JEMALLOC_ALWAYS_INLINE void
 prof_info_get_and_reset_recent(tsd_t *tsd, const void *ptr,
-    alloc_ctx_t *alloc_ctx, prof_info_t *prof_info) {
+    emap_alloc_ctx_t *alloc_ctx, prof_info_t *prof_info) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 	assert(prof_info != NULL);
@@ -60,7 +60,7 @@ prof_info_get_and_reset_recent(tsd_t *tsd, const void *ptr,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_tctx_reset(tsd_t *tsd, const void *ptr, alloc_ctx_t *alloc_ctx) {
+prof_tctx_reset(tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
@@ -127,7 +127,7 @@ prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update) {
 
 JEMALLOC_ALWAYS_INLINE void
 prof_malloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
-    alloc_ctx_t *alloc_ctx, prof_tctx_t *tctx) {
+    emap_alloc_ctx_t *alloc_ctx, prof_tctx_t *tctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
@@ -214,7 +214,8 @@ prof_sample_aligned(const void *ptr) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_free(tsd_t *tsd, const void *ptr, size_t usize, alloc_ctx_t *alloc_ctx) {
+prof_free(tsd_t *tsd, const void *ptr, size_t usize,
+    emap_alloc_ctx_t *alloc_ctx) {
 	prof_info_t prof_info;
 	prof_info_get_and_reset_recent(tsd, ptr, alloc_ctx, &prof_info);
 
diff --git a/src/arena.c b/src/arena.c
index b2a0ac76..aa19e092 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1110,8 +1110,8 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 		size_t usize;
 
 		malloc_mutex_unlock(tsd_tsdn(tsd), &arena->large_mtx);
-		alloc_ctx_t alloc_ctx;
-		emap_alloc_info_lookup(tsd_tsdn(tsd), &emap_global, ptr,
+		emap_alloc_ctx_t alloc_ctx;
+		emap_alloc_ctx_lookup(tsd_tsdn(tsd), &emap_global, ptr,
 		    &alloc_ctx);
 		assert(alloc_ctx.szind != SC_NSIZES);
 
@@ -1597,7 +1597,7 @@ arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize) {
 		safety_check_set_redzone(ptr, usize, SC_LARGE_MINCLASS);
 	}
 
-	edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
+	edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
 
 	szind_t szind = sz_size2index(usize);
 	emap_remap(tsdn, &emap_global, edata, szind, false);
@@ -1625,7 +1625,7 @@ arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 	cassert(config_prof);
 	assert(opt_prof);
 
-	edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
+	edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
 	size_t usize = edata_usize_get(edata);
 	size_t bumped_usize = arena_prof_demote(tsdn, edata, ptr);
 	if (config_opt_safety_checks && usize < SC_LARGE_MINCLASS) {
@@ -1757,7 +1757,7 @@ arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, edata_t *edata, void *ptr) {
 
 void
 arena_dalloc_small(tsdn_t *tsdn, void *ptr) {
-	edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
+	edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
 	arena_t *arena = arena_get_from_edata(edata);
 
 	arena_dalloc_bin(tsdn, arena, edata, ptr);
@@ -1771,7 +1771,7 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 	/* Calls with non-zero extra had to clamp extra. */
 	assert(extra == 0 || size + extra <= SC_LARGE_MAXCLASS);
 
-	edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
+	edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
 	if (unlikely(size > SC_LARGE_MAXCLASS)) {
 		ret = true;
 		goto done;
@@ -1805,7 +1805,7 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 		ret = true;
 	}
 done:
-	assert(edata == emap_lookup(tsdn, &emap_global, ptr));
+	assert(edata == emap_edata_lookup(tsdn, &emap_global, ptr));
 	*newsize = edata_usize_get(edata);
 
 	return ret;
diff --git a/src/ctl.c b/src/ctl.c
index 3f30ef0c..3123ab84 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2667,7 +2667,7 @@ arenas_lookup_ctl(tsd_t *tsd, const size_t *mib,
 	ret = EINVAL;
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	WRITE(ptr, void *);
-	edata = emap_lookup(tsd_tsdn(tsd), &emap_global, ptr);
+	edata = emap_edata_lookup(tsd_tsdn(tsd), &emap_global, ptr);
 	if (edata == NULL)
 		goto label_return;
 
diff --git a/src/ehooks.c b/src/ehooks.c
index 13d9ab0c..ff459dfb 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -189,8 +189,8 @@ ehooks_default_split(extent_hooks_t *extent_hooks, void *addr, size_t size,
 
 static inline bool
 ehooks_same_sn(tsdn_t *tsdn, void *addr_a, void *addr_b) {
-	edata_t *a = emap_lookup(tsdn, &emap_global, addr_a);
-	edata_t *b = emap_lookup(tsdn, &emap_global, addr_b);
+	edata_t *a = emap_edata_lookup(tsdn, &emap_global, addr_a);
+	edata_t *b = emap_edata_lookup(tsdn, &emap_global, addr_b);
 	return edata_sn_comp(a, b) == 0;
 }
 
@@ -253,9 +253,9 @@ bool
 ehooks_default_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
     void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
 	tsdn_t *tsdn = tsdn_fetch();
-	edata_t *a = emap_lookup(tsdn, &emap_global, addr_a);
+	edata_t *a = emap_edata_lookup(tsdn, &emap_global, addr_a);
 	bool head_a = edata_is_head_get(a);
-	edata_t *b = emap_lookup(tsdn, &emap_global, addr_b);
+	edata_t *b = emap_edata_lookup(tsdn, &emap_global, addr_b);
 	bool head_b = edata_is_head_get(b);
 	return ehooks_default_merge_impl(tsdn, addr_a, head_a, addr_b, head_b);
 }
diff --git a/src/inspect.c b/src/inspect.c
index 1be3429a..6c4dd8a7 100644
--- a/src/inspect.c
+++ b/src/inspect.c
@@ -6,7 +6,7 @@ inspect_extent_util_stats_get(tsdn_t *tsdn, const void *ptr, size_t *nfree,
     size_t *nregs, size_t *size) {
 	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL);
 
-	const edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
+	const edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
 	if (unlikely(edata == NULL)) {
 		*nfree = *nregs = *size = 0;
 		return;
@@ -31,7 +31,7 @@ inspect_extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
 	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL
 	    && bin_nfree != NULL && bin_nregs != NULL && slabcur_addr != NULL);
 
-	const edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
+	const edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
 	if (unlikely(edata == NULL)) {
 		*nfree = *nregs = *size = *bin_nfree = *bin_nregs = 0;
 		*slabcur_addr = NULL;
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 90a948c3..907235af 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2170,7 +2170,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		prof_tctx_t *tctx = prof_alloc_prep(
 		    tsd, usize, prof_active_get_unlocked(), true);
 
-		alloc_ctx_t alloc_ctx;
+		emap_alloc_ctx_t alloc_ctx;
 		if (likely((uintptr_t)tctx == (uintptr_t)1U)) {
 			alloc_ctx.slab = (usize <= SC_SMALL_MAXCLASS);
 			allocation = imalloc_no_sample(
@@ -2567,8 +2567,8 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
-	alloc_ctx_t alloc_ctx;
-	emap_alloc_info_lookup(tsd_tsdn(tsd), &emap_global, ptr, &alloc_ctx);
+	emap_alloc_ctx_t alloc_ctx;
+	emap_alloc_ctx_lookup(tsd_tsdn(tsd), &emap_global, ptr, &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
 
 	size_t usize = sz_index2size(alloc_ctx.szind);
@@ -2599,7 +2599,7 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 	assert(ptr != NULL);
 	assert(malloc_initialized() || IS_INITIALIZER);
 
-	alloc_ctx_t alloc_ctx;
+	emap_alloc_ctx_t alloc_ctx;
 	if (!config_prof) {
 		alloc_ctx.szind = sz_size2index(usize);
 		alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
@@ -2617,14 +2617,14 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 				alloc_ctx.slab = true;
 			}
 			if (config_debug) {
-				alloc_ctx_t dbg_ctx;
-				emap_alloc_info_lookup(tsd_tsdn(tsd),
+				emap_alloc_ctx_t dbg_ctx;
+				emap_alloc_ctx_lookup(tsd_tsdn(tsd),
 				    &emap_global, ptr, &dbg_ctx);
 				assert(dbg_ctx.szind == alloc_ctx.szind);
 				assert(dbg_ctx.slab == alloc_ctx.slab);
 			}
 		} else if (opt_prof) {
-			emap_alloc_info_lookup(tsd_tsdn(tsd), &emap_global,
+			emap_alloc_ctx_lookup(tsd_tsdn(tsd), &emap_global,
 			    ptr, &alloc_ctx);
 
 			if (config_opt_safety_checks) {
@@ -2693,12 +2693,12 @@ JEMALLOC_ALWAYS_INLINE
 bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 	tsd_t *tsd = tsd_get(false);
 
-	alloc_ctx_t alloc_ctx;
+	emap_alloc_ctx_t alloc_ctx;
 	if (!size_hint) {
 		if (unlikely(tsd == NULL || !tsd_fast(tsd))) {
 			return false;
 		}
-		bool res = emap_alloc_info_try_lookup_fast(tsd, &emap_global,
+		bool res = emap_alloc_ctx_try_lookup_fast(tsd, &emap_global,
 		    ptr, &alloc_ctx);
 
 		/* Note: profiled objects will have alloc_ctx.slab set */
@@ -3069,7 +3069,8 @@ irallocx_prof_sample(tsdn_t *tsdn, void *old_ptr, size_t old_usize,
 JEMALLOC_ALWAYS_INLINE void *
 irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
     size_t alignment, size_t *usize, bool zero, tcache_t *tcache,
-    arena_t *arena, alloc_ctx_t *alloc_ctx, hook_ralloc_args_t *hook_args) {
+    arena_t *arena, emap_alloc_ctx_t *alloc_ctx,
+    hook_ralloc_args_t *hook_args) {
 	prof_info_t old_prof_info;
 	prof_info_get_and_reset_recent(tsd, old_ptr, alloc_ctx, &old_prof_info);
 	bool prof_active = prof_active_get_unlocked();
@@ -3141,8 +3142,8 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 		tcache = tcache_get(tsd);
 	}
 
-	alloc_ctx_t alloc_ctx;
-	emap_alloc_info_lookup(tsd_tsdn(tsd), &emap_global, ptr, &alloc_ctx);
+	emap_alloc_ctx_t alloc_ctx;
+	emap_alloc_ctx_lookup(tsd_tsdn(tsd), &emap_global, ptr, &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
 	old_usize = sz_index2size(alloc_ctx.szind);
 	assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
@@ -3315,7 +3316,7 @@ ixallocx_prof_sample(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
 
 JEMALLOC_ALWAYS_INLINE size_t
 ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
-    size_t extra, size_t alignment, bool zero, alloc_ctx_t *alloc_ctx) {
+    size_t extra, size_t alignment, bool zero, emap_alloc_ctx_t *alloc_ctx) {
 	/*
 	 * old_prof_info is only used for asserting that the profiling info
 	 * isn't changed by the ixalloc() call.
@@ -3416,10 +3417,11 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	 * object associated with the ptr (though the content of the edata_t
 	 * object can be changed).
 	 */
-	edata_t *old_edata = emap_lookup(tsd_tsdn(tsd), &emap_global, ptr);
+	edata_t *old_edata = emap_edata_lookup(tsd_tsdn(tsd), &emap_global,
+	    ptr);
 
-	alloc_ctx_t alloc_ctx;
-	emap_alloc_info_lookup(tsd_tsdn(tsd), &emap_global, ptr, &alloc_ctx);
+	emap_alloc_ctx_t alloc_ctx;
+	emap_alloc_ctx_lookup(tsd_tsdn(tsd), &emap_global, ptr, &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
 	old_usize = sz_index2size(alloc_ctx.szind);
 	assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
@@ -3453,7 +3455,8 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	 * xallocx() should keep using the same edata_t object (though its
 	 * content can be changed).
 	 */
-	assert(emap_lookup(tsd_tsdn(tsd), &emap_global, ptr) == old_edata);
+	assert(emap_edata_lookup(tsd_tsdn(tsd), &emap_global, ptr)
+	    == old_edata);
 
 	if (unlikely(usize == old_usize)) {
 		te_alloc_rollback(tsd, usize);
diff --git a/src/large.c b/src/large.c
index 3965c5ec..f13b1e5e 100644
--- a/src/large.c
+++ b/src/large.c
@@ -269,7 +269,7 @@ void *
 large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache,
     hook_ralloc_args_t *hook_args) {
-	edata_t *edata = emap_lookup(tsdn, &emap_global, ptr);
+	edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
 
 	size_t oldusize = edata_usize_get(edata);
 	/* The following should have been caught by callers. */
diff --git a/src/prof.c b/src/prof.c
index 7b57dd26..49f5a0ea 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -148,7 +148,7 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
 void
 prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
     size_t usize, prof_tctx_t *tctx) {
-	edata_t *edata = emap_lookup(tsd_tsdn(tsd), &emap_global, ptr);
+	edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd), &emap_global, ptr);
 	prof_info_set(tsd, edata, tctx);
 
 	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
diff --git a/src/tcache.c b/src/tcache.c
index b7c0a549..33d3cba6 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -129,7 +129,7 @@ tbin_edatas_lookup_size_check(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
 	void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
 	for (unsigned i = 0 ; i < nflush; i++) {
 		emap_full_alloc_ctx_t full_alloc_ctx;
-		emap_full_alloc_info_lookup(tsd_tsdn(tsd), &emap_global,
+		emap_full_alloc_ctx_lookup(tsd_tsdn(tsd), &emap_global,
 		    *(bottom_item - i), &full_alloc_ctx);
 		edatas[i] = full_alloc_ctx.edata;
 		szind_sum -= full_alloc_ctx.szind;
@@ -160,8 +160,8 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 		    item_edata);
 	} else {
 		for (unsigned i = 0 ; i < nflush; i++) {
-			item_edata[i] = emap_lookup(tsd_tsdn(tsd), &emap_global,
-			    *(bottom_item - i));
+			item_edata[i] = emap_edata_lookup(tsd_tsdn(tsd),
+			    &emap_global, *(bottom_item - i));
 		}
 	}
 
@@ -259,7 +259,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t
 #ifndef JEMALLOC_EXTRA_SIZE_CHECK
 	/* Look up edata once per item. */
 	for (unsigned i = 0 ; i < nflush; i++) {
-		item_edata[i] = emap_lookup(tsd_tsdn(tsd), &emap_global,
+		item_edata[i] = emap_edata_lookup(tsd_tsdn(tsd), &emap_global,
 		    *(bottom_item - i));
 	}
 #else
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index 7fbde0b6..64db058d 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -61,8 +61,8 @@ get_large_size(size_t ind) {
 static size_t
 vsalloc(tsdn_t *tsdn, const void *ptr) {
 	emap_full_alloc_ctx_t full_alloc_ctx;
-	bool missing = emap_full_alloc_info_try_lookup(tsdn, &emap_global,
-	    ptr, &full_alloc_ctx);
+	bool missing = emap_full_alloc_ctx_try_lookup(tsdn, &emap_global, ptr,
+	    &full_alloc_ctx);
 	if (missing) {
 		return 0;
 	}
diff --git a/test/unit/binshard.c b/test/unit/binshard.c
index d5f43df1..6e10d477 100644
--- a/test/unit/binshard.c
+++ b/test/unit/binshard.c
@@ -62,12 +62,12 @@ thd_start(void *varg) {
 		ptr = mallocx(1, MALLOCX_TCACHE_NONE);
 		ptr2 = mallocx(129, MALLOCX_TCACHE_NONE);
 
-		edata = emap_lookup(tsdn, &emap_global, ptr);
+		edata = emap_edata_lookup(tsdn, &emap_global, ptr);
 		shard1 = edata_binshard_get(edata);
 		dallocx(ptr, 0);
 		assert_u_lt(shard1, 16, "Unexpected bin shard used");
 
-		edata = emap_lookup(tsdn, &emap_global, ptr2);
+		edata = emap_edata_lookup(tsdn, &emap_global, ptr2);
 		shard2 = edata_binshard_get(edata);
 		dallocx(ptr2, 0);
 		assert_u_lt(shard2, 4, "Unexpected bin shard used");
diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index a8761ca9..962be74e 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -101,7 +101,7 @@ TEST_END
 
 static void confirm_malloc(tsd_t *tsd, void *p) {
 	assert_ptr_not_null(p, "malloc failed unexpectedly");
-	edata_t *e = emap_lookup(TSDN_NULL, &emap_global, p);
+	edata_t *e = emap_edata_lookup(TSDN_NULL, &emap_global, p);
 	assert_ptr_not_null(e, "NULL edata for living pointer");
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	prof_recent_t *n = edata_prof_recent_alloc_get(tsd, e);

From 34b7165fde9622afe75037a2c8862f53269f10bb Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Mon, 17 Feb 2020 11:48:42 -0800
Subject: [PATCH 1551/2608] Put szind_t, pszind_t in sz.h.

---
 include/jemalloc/internal/cache_bin.h               | 1 +
 include/jemalloc/internal/jemalloc_internal_types.h | 6 ------
 include/jemalloc/internal/sz.h                      | 6 ++++++
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 38b8e328..60feb15f 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_CACHE_BIN_H
 
 #include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/sz.h"
 
 /*
  * The cache_bins are the mechanism that the tcache and the arena use to
diff --git a/include/jemalloc/internal/jemalloc_internal_types.h b/include/jemalloc/internal/jemalloc_internal_types.h
index 324a4b13..d8da4dee 100644
--- a/include/jemalloc/internal/jemalloc_internal_types.h
+++ b/include/jemalloc/internal/jemalloc_internal_types.h
@@ -3,12 +3,6 @@
 
 #include "jemalloc/internal/quantum.h"
 
-/* Page size index type. */
-typedef unsigned pszind_t;
-
-/* Size class index type. */
-typedef unsigned szind_t;
-
 /* Processor / core id type. */
 typedef int malloc_cpuid_t;
 
diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index 73fb0a4c..b0941169 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -22,6 +22,12 @@
  * size that would result from such an allocation.
  */
 
+/* Page size index type. */
+typedef unsigned pszind_t;
+
+/* Size class index type. */
+typedef unsigned szind_t;
+
 /*
  * sz_pind2sz_tab encodes the same information as could be computed by
  * sz_pind2sz_compute().

From 182192f83c029a794ee3c32767f43e471a00bd26 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Mon, 17 Feb 2020 12:00:57 -0800
Subject: [PATCH 1552/2608] Base: Pull into a single header.

---
 include/jemalloc/internal/base.h              | 103 ++++++++++++++++++
 include/jemalloc/internal/base_externs.h      |  22 ----
 include/jemalloc/internal/base_inlines.h      |  13 ---
 include/jemalloc/internal/base_types.h        |  33 ------
 include/jemalloc/internal/edata_cache.h       |   2 +
 .../internal/jemalloc_internal_includes.h     |   4 -
 src/base.c                                    |   9 ++
 7 files changed, 114 insertions(+), 72 deletions(-)
 create mode 100644 include/jemalloc/internal/base.h
 delete mode 100644 include/jemalloc/internal/base_externs.h
 delete mode 100644 include/jemalloc/internal/base_inlines.h
 delete mode 100644 include/jemalloc/internal/base_types.h

diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h
new file mode 100644
index 00000000..dcac3b6a
--- /dev/null
+++ b/include/jemalloc/internal/base.h
@@ -0,0 +1,103 @@
+#ifndef JEMALLOC_INTERNAL_BASE_H
+#define JEMALLOC_INTERNAL_BASE_H
+
+#include "jemalloc/internal/edata.h"
+#include "jemalloc/internal/ehooks.h"
+#include "jemalloc/internal/mutex.h"
+
+enum metadata_thp_mode_e {
+	metadata_thp_disabled   = 0,
+	/*
+	 * Lazily enable hugepage for metadata. To avoid high RSS caused by THP
+	 * + low usage arena (i.e. THP becomes a significant percentage), the
+	 * "auto" option only starts using THP after a base allocator used up
+	 * the first THP region.  Starting from the second hugepage (in a single
+	 * arena), "auto" behaves the same as "always", i.e. madvise hugepage
+	 * right away.
+	 */
+	metadata_thp_auto       = 1,
+	metadata_thp_always     = 2,
+	metadata_thp_mode_limit = 3
+};
+typedef enum metadata_thp_mode_e metadata_thp_mode_t;
+
+#define METADATA_THP_DEFAULT metadata_thp_disabled
+extern metadata_thp_mode_t opt_metadata_thp;
+extern const char *metadata_thp_mode_names[];
+
+
+/* Embedded at the beginning of every block of base-managed virtual memory. */
+typedef struct base_block_s base_block_t;
+struct base_block_s {
+	/* Total size of block's virtual memory mapping. */
+	size_t size;
+
+	/* Next block in list of base's blocks. */
+	base_block_t *next;
+
+	/* Tracks unused trailing space. */
+	edata_t edata;
+};
+
+typedef struct base_s base_t;
+struct base_s {
+	/*
+	 * User-configurable extent hook functions.
+	 */
+	ehooks_t ehooks;
+
+	/* Protects base_alloc() and base_stats_get() operations. */
+	malloc_mutex_t mtx;
+
+	/* Using THP when true (metadata_thp auto mode). */
+	bool auto_thp_switched;
+	/*
+	 * Most recent size class in the series of increasingly large base
+	 * extents.  Logarithmic spacing between subsequent allocations ensures
+	 * that the total number of distinct mappings remains small.
+	 */
+	pszind_t pind_last;
+
+	/* Serial number generation state. */
+	size_t extent_sn_next;
+
+	/* Chain of all blocks associated with base. */
+	base_block_t *blocks;
+
+	/* Heap of extents that track unused trailing space within blocks. */
+	edata_heap_t avail[SC_NSIZES];
+
+	/* Stats, only maintained if config_stats. */
+	size_t allocated;
+	size_t resident;
+	size_t mapped;
+	/* Number of THP regions touched. */
+	size_t n_thp;
+};
+
+static inline unsigned
+base_ind_get(const base_t *base) {
+	return ehooks_ind_get(&base->ehooks);
+}
+
+static inline bool
+metadata_thp_enabled(void) {
+	return (opt_metadata_thp != metadata_thp_disabled);
+}
+
+base_t *b0get(void);
+base_t *base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
+void base_delete(tsdn_t *tsdn, base_t *base);
+ehooks_t *base_ehooks_get(base_t *base);
+extent_hooks_t *base_extent_hooks_set(base_t *base,
+    extent_hooks_t *extent_hooks);
+void *base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment);
+edata_t *base_alloc_edata(tsdn_t *tsdn, base_t *base);
+void base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated,
+    size_t *resident, size_t *mapped, size_t *n_thp);
+void base_prefork(tsdn_t *tsdn, base_t *base);
+void base_postfork_parent(tsdn_t *tsdn, base_t *base);
+void base_postfork_child(tsdn_t *tsdn, base_t *base);
+bool base_boot(tsdn_t *tsdn);
+
+#endif /* JEMALLOC_INTERNAL_BASE_H */
diff --git a/include/jemalloc/internal/base_externs.h b/include/jemalloc/internal/base_externs.h
deleted file mode 100644
index 2f241317..00000000
--- a/include/jemalloc/internal/base_externs.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_BASE_EXTERNS_H
-#define JEMALLOC_INTERNAL_BASE_EXTERNS_H
-
-extern metadata_thp_mode_t opt_metadata_thp;
-extern const char *metadata_thp_mode_names[];
-
-base_t *b0get(void);
-base_t *base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
-void base_delete(tsdn_t *tsdn, base_t *base);
-ehooks_t *base_ehooks_get(base_t *base);
-extent_hooks_t *base_extent_hooks_set(base_t *base,
-    extent_hooks_t *extent_hooks);
-void *base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment);
-edata_t *base_alloc_edata(tsdn_t *tsdn, base_t *base);
-void base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated,
-    size_t *resident, size_t *mapped, size_t *n_thp);
-void base_prefork(tsdn_t *tsdn, base_t *base);
-void base_postfork_parent(tsdn_t *tsdn, base_t *base);
-void base_postfork_child(tsdn_t *tsdn, base_t *base);
-bool base_boot(tsdn_t *tsdn);
-
-#endif /* JEMALLOC_INTERNAL_BASE_EXTERNS_H */
diff --git a/include/jemalloc/internal/base_inlines.h b/include/jemalloc/internal/base_inlines.h
deleted file mode 100644
index 221fca81..00000000
--- a/include/jemalloc/internal/base_inlines.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_BASE_INLINES_H
-#define JEMALLOC_INTERNAL_BASE_INLINES_H
-
-static inline unsigned
-base_ind_get(const base_t *base) {
-	return ehooks_ind_get(&base->ehooks);
-}
-
-static inline bool
-metadata_thp_enabled(void) {
-	return (opt_metadata_thp != metadata_thp_disabled);
-}
-#endif /* JEMALLOC_INTERNAL_BASE_INLINES_H */
diff --git a/include/jemalloc/internal/base_types.h b/include/jemalloc/internal/base_types.h
deleted file mode 100644
index b6db77df..00000000
--- a/include/jemalloc/internal/base_types.h
+++ /dev/null
@@ -1,33 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_BASE_TYPES_H
-#define JEMALLOC_INTERNAL_BASE_TYPES_H
-
-typedef struct base_block_s base_block_t;
-typedef struct base_s base_t;
-
-#define METADATA_THP_DEFAULT metadata_thp_disabled
-
-/*
- * In auto mode, arenas switch to huge pages for the base allocator on the
- * second base block.  a0 switches to thp on the 5th block (after 20 megabytes
- * of metadata), since more metadata (e.g. rtree nodes) come from a0's base.
- */
-
-#define BASE_AUTO_THP_THRESHOLD    2
-#define BASE_AUTO_THP_THRESHOLD_A0 5
-
-typedef enum {
-	metadata_thp_disabled   = 0,
-	/*
-	 * Lazily enable hugepage for metadata. To avoid high RSS caused by THP
-	 * + low usage arena (i.e. THP becomes a significant percentage), the
-	 * "auto" option only starts using THP after a base allocator used up
-	 * the first THP region.  Starting from the second hugepage (in a single
-	 * arena), "auto" behaves the same as "always", i.e. madvise hugepage
-	 * right away.
-	 */
-	metadata_thp_auto       = 1,
-	metadata_thp_always     = 2,
-	metadata_thp_mode_limit = 3
-} metadata_thp_mode_t;
-
-#endif /* JEMALLOC_INTERNAL_BASE_TYPES_H */
diff --git a/include/jemalloc/internal/edata_cache.h b/include/jemalloc/internal/edata_cache.h
index 9cb0d1c8..73ac7af8 100644
--- a/include/jemalloc/internal/edata_cache.h
+++ b/include/jemalloc/internal/edata_cache.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_EDATA_CACHE_H
 #define JEMALLOC_INTERNAL_EDATA_CACHE_H
 
+#include "jemalloc/internal/base.h"
+
 /*
  * A cache of edata_t structures allocated via base_alloc_edata (as opposed to
  * the underlying extents they describe).  The contents of returned edata_t
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index 75a94d3a..72b5a72a 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -40,7 +40,6 @@
 /* TYPES */
 /******************************************************************************/
 
-#include "jemalloc/internal/base_types.h"
 #include "jemalloc/internal/arena_types.h"
 #include "jemalloc/internal/tcache_types.h"
 #include "jemalloc/internal/prof_types.h"
@@ -51,7 +50,6 @@
 
 #include "jemalloc/internal/prof_structs.h"
 #include "jemalloc/internal/arena_structs.h"
-#include "jemalloc/internal/base_structs.h"
 #include "jemalloc/internal/tcache_structs.h"
 #include "jemalloc/internal/background_thread_structs.h"
 
@@ -60,7 +58,6 @@
 /******************************************************************************/
 
 #include "jemalloc/internal/jemalloc_internal_externs.h"
-#include "jemalloc/internal/base_externs.h"
 #include "jemalloc/internal/arena_externs.h"
 #include "jemalloc/internal/large_externs.h"
 #include "jemalloc/internal/tcache_externs.h"
@@ -72,7 +69,6 @@
 /******************************************************************************/
 
 #include "jemalloc/internal/jemalloc_internal_inlines_a.h"
-#include "jemalloc/internal/base_inlines.h"
 /*
  * Include portions of arena code interleaved with tcache code in order to
  * resolve circular dependencies.
diff --git a/src/base.c b/src/base.c
index c006774e..595b7710 100644
--- a/src/base.c
+++ b/src/base.c
@@ -7,6 +7,15 @@
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/sz.h"
 
+/*
+ * In auto mode, arenas switch to huge pages for the base allocator on the
+ * second base block.  a0 switches to thp on the 5th block (after 20 megabytes
+ * of metadata), since more metadata (e.g. rtree nodes) come from a0's base.
+ */
+
+#define BASE_AUTO_THP_THRESHOLD    2
+#define BASE_AUTO_THP_THRESHOLD_A0 5
+
 /******************************************************************************/
 /* Data. */
 

From 7013716aaab806dc6ed2de3437170cdfa2b15a4a Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Mon, 17 Feb 2020 12:24:09 -0800
Subject: [PATCH 1553/2608] Emap: Take (and propagate) a zeroed parameter.

Rtree needs this, and we should really treat them similarly.
---
 include/jemalloc/internal/emap.h | 2 +-
 src/emap.c                       | 4 ++--
 src/jemalloc.c                   | 3 ++-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index c4b40142..b51a0c53 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -27,7 +27,7 @@ struct emap_full_alloc_ctx_s {
 
 extern emap_t emap_global;
 
-bool emap_init(emap_t *emap);
+bool emap_init(emap_t *emap, bool zeroed);
 
 /*
  * Grab the lock or locks associated with the edata or edatas indicated (which
diff --git a/src/emap.c b/src/emap.c
index ae0d3127..200a7828 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -22,9 +22,9 @@ enum emap_lock_result_e {
 typedef enum emap_lock_result_e emap_lock_result_t;
 
 bool
-emap_init(emap_t *emap) {
+emap_init(emap_t *emap, bool zeroed) {
 	bool err;
-	err = rtree_new(&emap->rtree, true);
+	err = rtree_new(&emap->rtree, zeroed);
 	if (err) {
 		return true;
 	}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 907235af..e2adffd2 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1572,7 +1572,8 @@ malloc_init_hard_a0_locked() {
 	if (base_boot(TSDN_NULL)) {
 		return true;
 	}
-	if (emap_init(&emap_global)) {
+	/* emap_global is static, hence zeroed. */
+	if (emap_init(&emap_global, /* zeroed */ true)) {
 		return true;
 	}
 	if (extent_boot()) {

From a0c1f4ac57abe164cecc027efd697a7f1e0e2db4 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Mon, 17 Feb 2020 13:11:10 -0800
Subject: [PATCH 1554/2608] Rtree: take the base allocator as a parameter.

This facilitates better testing by avoiding mixing of the "real" base with the
base used by the rtree under test.
---
 include/jemalloc/internal/base.h  |   3 +-
 include/jemalloc/internal/emap.h  |   3 +-
 include/jemalloc/internal/rtree.h |  17 +----
 src/base.c                        |   6 +-
 src/emap.c                        |   4 +-
 src/jemalloc.c                    |   2 +-
 src/rtree.c                       |  74 +++-------------------
 test/unit/rtree.c                 | 101 ++++++++----------------------
 8 files changed, 45 insertions(+), 165 deletions(-)

diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h
index dcac3b6a..628e393b 100644
--- a/include/jemalloc/internal/base.h
+++ b/include/jemalloc/internal/base.h
@@ -86,7 +86,8 @@ metadata_thp_enabled(void) {
 }
 
 base_t *b0get(void);
-base_t *base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
+base_t *base_new(tsdn_t *tsdn, unsigned ind,
+    const extent_hooks_t *extent_hooks);
 void base_delete(tsdn_t *tsdn, base_t *base);
 ehooks_t *base_ehooks_get(base_t *base);
 extent_hooks_t *base_extent_hooks_set(base_t *base,
diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index b51a0c53..b9f6bc06 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_EMAP_H
 #define JEMALLOC_INTERNAL_EMAP_H
 
+#include "jemalloc/internal/base.h"
 #include "jemalloc/internal/mutex_pool.h"
 #include "jemalloc/internal/rtree.h"
 
@@ -27,7 +28,7 @@ struct emap_full_alloc_ctx_s {
 
 extern emap_t emap_global;
 
-bool emap_init(emap_t *emap, bool zeroed);
+bool emap_init(emap_t *emap, base_t *base, bool zeroed);
 
 /*
  * Grab the lock or locks associated with the edata or edatas indicated (which
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 11a52ed0..094cc1ad 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -78,6 +78,7 @@ struct rtree_level_s {
 
 typedef struct rtree_s rtree_t;
 struct rtree_s {
+	base_t			*base;
 	malloc_mutex_t		init_lock;
 	/* Number of elements based on rtree_levels[0].bits. */
 #if RTREE_HEIGHT > 1
@@ -109,22 +110,8 @@ static const rtree_level_t rtree_levels[] = {
 #endif
 };
 
-bool rtree_new(rtree_t *rtree, bool zeroed);
+bool rtree_new(rtree_t *rtree, base_t *base, bool zeroed);
 
-typedef rtree_node_elm_t *(rtree_node_alloc_t)(tsdn_t *, rtree_t *, size_t);
-extern rtree_node_alloc_t *JET_MUTABLE rtree_node_alloc;
-
-typedef rtree_leaf_elm_t *(rtree_leaf_alloc_t)(tsdn_t *, rtree_t *, size_t);
-extern rtree_leaf_alloc_t *JET_MUTABLE rtree_leaf_alloc;
-
-typedef void (rtree_node_dalloc_t)(tsdn_t *, rtree_t *, rtree_node_elm_t *);
-extern rtree_node_dalloc_t *JET_MUTABLE rtree_node_dalloc;
-
-typedef void (rtree_leaf_dalloc_t)(tsdn_t *, rtree_t *, rtree_leaf_elm_t *);
-extern rtree_leaf_dalloc_t *JET_MUTABLE rtree_leaf_dalloc;
-#ifdef JEMALLOC_JET
-void rtree_delete(tsdn_t *tsdn, rtree_t *rtree);
-#endif
 rtree_leaf_elm_t *rtree_leaf_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree,
     rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, bool init_missing);
 
diff --git a/src/base.c b/src/base.c
index 595b7710..ebb42da5 100644
--- a/src/base.c
+++ b/src/base.c
@@ -343,7 +343,7 @@ b0get(void) {
 }
 
 base_t *
-base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
+base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks) {
 	pszind_t pind_last = 0;
 	size_t extent_sn_next = 0;
 
@@ -353,7 +353,7 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 * memory, and then initialize the ehooks within the base_t.
 	 */
 	ehooks_t fake_ehooks;
-	ehooks_init(&fake_ehooks, extent_hooks, ind);
+	ehooks_init(&fake_ehooks, (extent_hooks_t *)extent_hooks, ind);
 
 	base_block_t *block = base_block_alloc(tsdn, NULL, &fake_ehooks, ind,
 	    &pind_last, &extent_sn_next, sizeof(base_t), QUANTUM);
@@ -366,7 +366,7 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	size_t base_size = ALIGNMENT_CEILING(sizeof(base_t), base_alignment);
 	base_t *base = (base_t *)base_extent_bump_alloc_helper(&block->edata,
 	    &gap_size, base_size, base_alignment);
-	ehooks_init(&base->ehooks, extent_hooks, ind);
+	ehooks_init(&base->ehooks, (extent_hooks_t *)extent_hooks, ind);
 	if (malloc_mutex_init(&base->mtx, "base", WITNESS_RANK_BASE,
 	    malloc_mutex_rank_exclusive)) {
 		base_unmap(tsdn, &fake_ehooks, ind, block, block->size);
diff --git a/src/emap.c b/src/emap.c
index 200a7828..723dfad2 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -22,9 +22,9 @@ enum emap_lock_result_e {
 typedef enum emap_lock_result_e emap_lock_result_t;
 
 bool
-emap_init(emap_t *emap, bool zeroed) {
+emap_init(emap_t *emap, base_t *base, bool zeroed) {
 	bool err;
-	err = rtree_new(&emap->rtree, zeroed);
+	err = rtree_new(&emap->rtree, base, zeroed);
 	if (err) {
 		return true;
 	}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index e2adffd2..6dc2e475 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1573,7 +1573,7 @@ malloc_init_hard_a0_locked() {
 		return true;
 	}
 	/* emap_global is static, hence zeroed. */
-	if (emap_init(&emap_global, /* zeroed */ true)) {
+	if (emap_init(&emap_global, b0get(), /* zeroed */ true)) {
 		return true;
 	}
 	if (extent_boot()) {
diff --git a/src/rtree.c b/src/rtree.c
index 4ae41fe2..07a4e9ac 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -10,7 +10,7 @@
  * used.
  */
 bool
-rtree_new(rtree_t *rtree, bool zeroed) {
+rtree_new(rtree_t *rtree, base_t *base, bool zeroed) {
 #ifdef JEMALLOC_JET
 	if (!zeroed) {
 		memset(rtree, 0, sizeof(rtree_t)); /* Clear root. */
@@ -18,6 +18,7 @@ rtree_new(rtree_t *rtree, bool zeroed) {
 #else
 	assert(zeroed);
 #endif
+	rtree->base = base;
 
 	if (malloc_mutex_init(&rtree->init_lock, "rtree", WITNESS_RANK_RTREE,
 	    malloc_mutex_rank_exclusive)) {
@@ -28,75 +29,16 @@ rtree_new(rtree_t *rtree, bool zeroed) {
 }
 
 static rtree_node_elm_t *
-rtree_node_alloc_impl(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
-	return (rtree_node_elm_t *)base_alloc(tsdn, b0get(), nelms *
-	    sizeof(rtree_node_elm_t), CACHELINE);
+rtree_node_alloc(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
+	return (rtree_node_elm_t *)base_alloc(tsdn, rtree->base,
+	    nelms * sizeof(rtree_node_elm_t), CACHELINE);
 }
-rtree_node_alloc_t *JET_MUTABLE rtree_node_alloc = rtree_node_alloc_impl;
-
-static void
-rtree_node_dalloc_impl(tsdn_t *tsdn, rtree_t *rtree, rtree_node_elm_t *node) {
-	/* Nodes are never deleted during normal operation. */
-	not_reached();
-}
-rtree_node_dalloc_t *JET_MUTABLE rtree_node_dalloc =
-    rtree_node_dalloc_impl;
 
 static rtree_leaf_elm_t *
-rtree_leaf_alloc_impl(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
-	return (rtree_leaf_elm_t *)base_alloc(tsdn, b0get(), nelms *
-	    sizeof(rtree_leaf_elm_t), CACHELINE);
+rtree_leaf_alloc(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
+	return (rtree_leaf_elm_t *)base_alloc(tsdn, rtree->base,
+	    nelms * sizeof(rtree_leaf_elm_t), CACHELINE);
 }
-rtree_leaf_alloc_t *JET_MUTABLE rtree_leaf_alloc = rtree_leaf_alloc_impl;
-
-static void
-rtree_leaf_dalloc_impl(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *leaf) {
-	/* Leaves are never deleted during normal operation. */
-	not_reached();
-}
-rtree_leaf_dalloc_t *JET_MUTABLE rtree_leaf_dalloc =
-    rtree_leaf_dalloc_impl;
-
-#ifdef JEMALLOC_JET
-#  if RTREE_HEIGHT > 1
-static void
-rtree_delete_subtree(tsdn_t *tsdn, rtree_t *rtree, rtree_node_elm_t *subtree,
-    unsigned level) {
-	size_t nchildren = ZU(1) << rtree_levels[level].bits;
-	if (level + 2 < RTREE_HEIGHT) {
-		for (size_t i = 0; i < nchildren; i++) {
-			rtree_node_elm_t *node =
-			    (rtree_node_elm_t *)atomic_load_p(&subtree[i].child,
-			    ATOMIC_RELAXED);
-			if (node != NULL) {
-				rtree_delete_subtree(tsdn, rtree, node, level +
-				    1);
-			}
-		}
-	} else {
-		for (size_t i = 0; i < nchildren; i++) {
-			rtree_leaf_elm_t *leaf =
-			    (rtree_leaf_elm_t *)atomic_load_p(&subtree[i].child,
-			    ATOMIC_RELAXED);
-			if (leaf != NULL) {
-				rtree_leaf_dalloc(tsdn, rtree, leaf);
-			}
-		}
-	}
-
-	if (subtree != rtree->root) {
-		rtree_node_dalloc(tsdn, rtree, subtree);
-	}
-}
-#  endif
-
-void
-rtree_delete(tsdn_t *tsdn, rtree_t *rtree) {
-#  if RTREE_HEIGHT > 1
-	rtree_delete_subtree(tsdn, rtree, rtree->root, 0);
-#  endif
-}
-#endif
 
 static rtree_node_elm_t *
 rtree_node_init(tsdn_t *tsdn, rtree_t *rtree, unsigned level,
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 2477db03..b5ece82c 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -4,80 +4,26 @@
 
 #define INVALID_ARENA_IND ((1U << MALLOCX_ARENA_BITS) - 1)
 
-rtree_node_alloc_t *rtree_node_alloc_orig;
-rtree_node_dalloc_t *rtree_node_dalloc_orig;
-rtree_leaf_alloc_t *rtree_leaf_alloc_orig;
-rtree_leaf_dalloc_t *rtree_leaf_dalloc_orig;
-
 /* Potentially too large to safely place on the stack. */
 rtree_t test_rtree;
 
-static rtree_node_elm_t *
-rtree_node_alloc_intercept(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
-	rtree_node_elm_t *node;
-
-	if (rtree != &test_rtree) {
-		return rtree_node_alloc_orig(tsdn, rtree, nelms);
-	}
-
-	malloc_mutex_unlock(tsdn, &rtree->init_lock);
-	node = (rtree_node_elm_t *)calloc(nelms, sizeof(rtree_node_elm_t));
-	assert_ptr_not_null(node, "Unexpected calloc() failure");
-	malloc_mutex_lock(tsdn, &rtree->init_lock);
-
-	return node;
-}
-
-static void
-rtree_node_dalloc_intercept(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_node_elm_t *node) {
-	if (rtree != &test_rtree) {
-		rtree_node_dalloc_orig(tsdn, rtree, node);
-		return;
-	}
-
-	free(node);
-}
-
-static rtree_leaf_elm_t *
-rtree_leaf_alloc_intercept(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
-	rtree_leaf_elm_t *leaf;
-
-	if (rtree != &test_rtree) {
-		return rtree_leaf_alloc_orig(tsdn, rtree, nelms);
-	}
-
-	malloc_mutex_unlock(tsdn, &rtree->init_lock);
-	leaf = (rtree_leaf_elm_t *)calloc(nelms, sizeof(rtree_leaf_elm_t));
-	assert_ptr_not_null(leaf, "Unexpected calloc() failure");
-	malloc_mutex_lock(tsdn, &rtree->init_lock);
-
-	return leaf;
-}
-
-static void
-rtree_leaf_dalloc_intercept(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *leaf) {
-	if (rtree != &test_rtree) {
-		rtree_leaf_dalloc_orig(tsdn, rtree, leaf);
-		return;
-	}
-
-	free(leaf);
-}
-
 TEST_BEGIN(test_rtree_read_empty) {
 	tsdn_t *tsdn;
 
 	tsdn = tsdn_fetch();
 
+	base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks);
+	assert_ptr_not_null(base, "Unexpected base_new failure");
+
 	rtree_t *rtree = &test_rtree;
 	rtree_ctx_t rtree_ctx;
 	rtree_ctx_data_init(&rtree_ctx);
-	assert_false(rtree_new(rtree, false), "Unexpected rtree_new() failure");
+	assert_false(rtree_new(rtree, base, false),
+	    "Unexpected rtree_new() failure");
 	assert_ptr_null(rtree_edata_read(tsdn, rtree, &rtree_ctx, PAGE,
 	    false), "rtree_edata_read() should return NULL for empty tree");
-	rtree_delete(tsdn, rtree);
+
+	base_delete(tsdn, base);
 }
 TEST_END
 
@@ -95,10 +41,14 @@ TEST_BEGIN(test_rtree_extrema) {
 
 	tsdn_t *tsdn = tsdn_fetch();
 
+	base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks);
+	assert_ptr_not_null(base, "Unexpected base_new failure");
+
 	rtree_t *rtree = &test_rtree;
 	rtree_ctx_t rtree_ctx;
 	rtree_ctx_data_init(&rtree_ctx);
-	assert_false(rtree_new(rtree, false), "Unexpected rtree_new() failure");
+	assert_false(rtree_new(rtree, base, false),
+	    "Unexpected rtree_new() failure");
 
 	assert_false(rtree_write(tsdn, rtree, &rtree_ctx, PAGE, &edata_a,
 	    edata_szind_get(&edata_a), edata_slab_get(&edata_a)),
@@ -116,12 +66,14 @@ TEST_BEGIN(test_rtree_extrema) {
 	    ~((uintptr_t)0), true), &edata_b,
 	    "rtree_edata_read() should return previously set value");
 
-	rtree_delete(tsdn, rtree);
+	base_delete(tsdn, base);
 }
 TEST_END
 
 TEST_BEGIN(test_rtree_bits) {
 	tsdn_t *tsdn = tsdn_fetch();
+	base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks);
+	assert_ptr_not_null(base, "Unexpected base_new failure");
 
 	uintptr_t keys[] = {PAGE, PAGE + 1,
 	    PAGE + (((uintptr_t)1) << LG_PAGE) - 1};
@@ -133,7 +85,8 @@ TEST_BEGIN(test_rtree_bits) {
 	rtree_t *rtree = &test_rtree;
 	rtree_ctx_t rtree_ctx;
 	rtree_ctx_data_init(&rtree_ctx);
-	assert_false(rtree_new(rtree, false), "Unexpected rtree_new() failure");
+	assert_false(rtree_new(rtree, base, false),
+	    "Unexpected rtree_new() failure");
 
 	for (unsigned i = 0; i < sizeof(keys)/sizeof(uintptr_t); i++) {
 		assert_false(rtree_write(tsdn, rtree, &rtree_ctx, keys[i],
@@ -153,7 +106,7 @@ TEST_BEGIN(test_rtree_bits) {
 		rtree_clear(tsdn, rtree, &rtree_ctx, keys[i]);
 	}
 
-	rtree_delete(tsdn, rtree);
+	base_delete(tsdn, base);
 }
 TEST_END
 
@@ -162,6 +115,10 @@ TEST_BEGIN(test_rtree_random) {
 #define SEED 42
 	sfmt_t *sfmt = init_gen_rand(SEED);
 	tsdn_t *tsdn = tsdn_fetch();
+
+	base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks);
+	assert_ptr_not_null(base, "Unexpected base_new failure");
+
 	uintptr_t keys[NSET];
 	rtree_t *rtree = &test_rtree;
 	rtree_ctx_t rtree_ctx;
@@ -171,7 +128,8 @@ TEST_BEGIN(test_rtree_random) {
 	edata_init(&edata, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, true, EXTENT_NOT_HEAD);
 
-	assert_false(rtree_new(rtree, false), "Unexpected rtree_new() failure");
+	assert_false(rtree_new(rtree, base, false),
+	    "Unexpected rtree_new() failure");
 
 	for (unsigned i = 0; i < NSET; i++) {
 		keys[i] = (uintptr_t)gen_rand64(sfmt);
@@ -204,7 +162,7 @@ TEST_BEGIN(test_rtree_random) {
 		    "rtree_edata_read() should return previously set value");
 	}
 
-	rtree_delete(tsdn, rtree);
+	base_delete(tsdn, base);
 	fini_gen_rand(sfmt);
 #undef NSET
 #undef SEED
@@ -213,15 +171,6 @@ TEST_END
 
 int
 main(void) {
-	rtree_node_alloc_orig = rtree_node_alloc;
-	rtree_node_alloc = rtree_node_alloc_intercept;
-	rtree_node_dalloc_orig = rtree_node_dalloc;
-	rtree_node_dalloc = rtree_node_dalloc_intercept;
-	rtree_leaf_alloc_orig = rtree_leaf_alloc;
-	rtree_leaf_alloc = rtree_leaf_alloc_intercept;
-	rtree_leaf_dalloc_orig = rtree_leaf_dalloc;
-	rtree_leaf_dalloc = rtree_leaf_dalloc_intercept;
-
 	return test(
 	    test_rtree_read_empty,
 	    test_rtree_extrema,

From 29436fa056169389f3d76c74aae1465604bdd799 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Mon, 17 Feb 2020 14:09:29 -0800
Subject: [PATCH 1555/2608] Break prof and tcache knowledge of b0.

---
 include/jemalloc/internal/prof_externs.h   |  2 +-
 include/jemalloc/internal/tcache_externs.h |  4 ++--
 src/ctl.c                                  |  2 +-
 src/jemalloc.c                             |  4 ++--
 src/prof.c                                 | 21 ++++++++++++---------
 src/tcache.c                               | 16 ++++++++--------
 6 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 36571c8c..0b6fecd2 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -93,7 +93,7 @@ bool prof_gdump_get(tsdn_t *tsdn);
 bool prof_gdump_set(tsdn_t *tsdn, bool active);
 void prof_boot0(void);
 void prof_boot1(void);
-bool prof_boot2(tsd_t *tsd);
+bool prof_boot2(tsd_t *tsd, base_t *base);
 void prof_prefork0(tsdn_t *tsdn);
 void prof_prefork1(tsdn_t *tsdn);
 void prof_postfork_parent(tsdn_t *tsdn);
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 2060bb19..db6f98bf 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -36,10 +36,10 @@ void	tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache,
 tcache_t *tcache_create_explicit(tsd_t *tsd);
 void	tcache_cleanup(tsd_t *tsd);
 void	tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
-bool	tcaches_create(tsd_t *tsd, unsigned *r_ind);
+bool	tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind);
 void	tcaches_flush(tsd_t *tsd, unsigned ind);
 void	tcaches_destroy(tsd_t *tsd, unsigned ind);
-bool	tcache_boot(tsdn_t *tsdn);
+bool	tcache_boot(tsdn_t *tsdn, base_t *base);
 void tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
 void tcache_prefork(tsdn_t *tsdn);
 void tcache_postfork_parent(tsdn_t *tsdn);
diff --git a/src/ctl.c b/src/ctl.c
index 3123ab84..d149ce6d 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2040,7 +2040,7 @@ tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	unsigned tcache_ind;
 
 	READONLY();
-	if (tcaches_create(tsd, &tcache_ind)) {
+	if (tcaches_create(tsd, b0get(), &tcache_ind)) {
 		ret = EFAULT;
 		goto label_return;
 	}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 6dc2e475..4e1d3df8 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1586,7 +1586,7 @@ malloc_init_hard_a0_locked() {
 		prof_boot1();
 	}
 	arena_boot(&sc_data);
-	if (tcache_boot(TSDN_NULL)) {
+	if (tcache_boot(TSDN_NULL, b0get())) {
 		return true;
 	}
 	if (malloc_mutex_init(&arenas_lock, "arenas", WITNESS_RANK_ARENAS,
@@ -1823,7 +1823,7 @@ malloc_init_hard(void) {
 	if (malloc_init_narenas() || background_thread_boot1(tsd_tsdn(tsd))) {
 		UNLOCK_RETURN(tsd_tsdn(tsd), true, true)
 	}
-	if (config_prof && prof_boot2(tsd)) {
+	if (config_prof && prof_boot2(tsd, b0get())) {
 		UNLOCK_RETURN(tsd_tsdn(tsd), true, true)
 	}
 
diff --git a/src/prof.c b/src/prof.c
index 49f5a0ea..761cb95d 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -91,6 +91,9 @@ static uint64_t prof_dump_iseq;
 static uint64_t prof_dump_mseq;
 static uint64_t prof_dump_useq;
 
+/* The fallback allocator profiling functionality will use. */
+base_t *prof_base;
+
 malloc_mutex_t prof_dump_mtx;
 static char *prof_dump_prefix = NULL;
 
@@ -584,8 +587,8 @@ prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix) {
 	if (prof_dump_prefix == NULL) {
 		malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
 		/* Everything is still guarded by ctl_mtx. */
-		char *buffer = base_alloc(tsdn, b0get(), PROF_DUMP_FILENAME_LEN,
-		    QUANTUM);
+		char *buffer = base_alloc(tsdn, prof_base,
+		    PROF_DUMP_FILENAME_LEN, QUANTUM);
 		if (buffer == NULL) {
 			return true;
 		}
@@ -944,7 +947,7 @@ prof_boot1(void) {
 }
 
 bool
-prof_boot2(tsd_t *tsd) {
+prof_boot2(tsd_t *tsd, base_t *base) {
 	cassert(config_prof);
 
 	if (opt_prof) {
@@ -1017,9 +1020,10 @@ prof_boot2(tsd_t *tsd) {
 			return true;
 		}
 
-		gctx_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
-		    b0get(), PROF_NCTX_LOCKS * sizeof(malloc_mutex_t),
-		    CACHELINE);
+		prof_base = base;
+
+		gctx_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd), base,
+		    PROF_NCTX_LOCKS * sizeof(malloc_mutex_t), CACHELINE);
 		if (gctx_locks == NULL) {
 			return true;
 		}
@@ -1031,9 +1035,8 @@ prof_boot2(tsd_t *tsd) {
 			}
 		}
 
-		tdata_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
-		    b0get(), PROF_NTDATA_LOCKS * sizeof(malloc_mutex_t),
-		    CACHELINE);
+		tdata_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd), base,
+		    PROF_NTDATA_LOCKS * sizeof(malloc_mutex_t), CACHELINE);
 		if (tdata_locks == NULL) {
 			return true;
 		}
diff --git a/src/tcache.c b/src/tcache.c
index 33d3cba6..782d8833 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -664,14 +664,14 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 }
 
 static bool
-tcaches_create_prep(tsd_t *tsd) {
+tcaches_create_prep(tsd_t *tsd, base_t *base) {
 	bool err;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
 
 	if (tcaches == NULL) {
-		tcaches = base_alloc(tsd_tsdn(tsd), b0get(), sizeof(tcache_t *)
-		    * (MALLOCX_TCACHE_MAX+1), CACHELINE);
+		tcaches = base_alloc(tsd_tsdn(tsd), base,
+		    sizeof(tcache_t *) * (MALLOCX_TCACHE_MAX+1), CACHELINE);
 		if (tcaches == NULL) {
 			err = true;
 			goto label_return;
@@ -690,12 +690,12 @@ label_return:
 }
 
 bool
-tcaches_create(tsd_t *tsd, unsigned *r_ind) {
+tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind) {
 	witness_assert_depth(tsdn_witness_tsdp_get(tsd_tsdn(tsd)), 0);
 
 	bool err;
 
-	if (tcaches_create_prep(tsd)) {
+	if (tcaches_create_prep(tsd, base)) {
 		err = true;
 		goto label_return;
 	}
@@ -772,7 +772,7 @@ tcaches_destroy(tsd_t *tsd, unsigned ind) {
 }
 
 bool
-tcache_boot(tsdn_t *tsdn) {
+tcache_boot(tsdn_t *tsdn, base_t *base) {
 	/* If necessary, clamp opt_lg_tcache_max. */
 	if (opt_lg_tcache_max < 0 || (ZU(1) << opt_lg_tcache_max) <
 	    SC_SMALL_MAXCLASS) {
@@ -789,8 +789,8 @@ tcache_boot(tsdn_t *tsdn) {
 	nhbins = sz_size2index(tcache_maxclass) + 1;
 
 	/* Initialize tcache_bin_info. */
-	tcache_bin_info = (cache_bin_info_t *)base_alloc(tsdn, b0get(), nhbins
-	    * sizeof(cache_bin_info_t), CACHELINE);
+	tcache_bin_info = (cache_bin_info_t *)base_alloc(tsdn, base,
+	    nhbins * sizeof(cache_bin_info_t), CACHELINE);
 	if (tcache_bin_info == NULL) {
 		return true;
 	}

From 162c2bcf319966b83e56a552b158d87a211bfcd1 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Mon, 17 Feb 2020 14:13:38 -0800
Subject: [PATCH 1556/2608] Background thread: take base as a parameter.

---
 include/jemalloc/internal/background_thread_externs.h | 2 +-
 src/background_thread.c                               | 4 ++--
 src/jemalloc.c                                        | 3 ++-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/background_thread_externs.h b/include/jemalloc/internal/background_thread_externs.h
index 0f997e18..224e3700 100644
--- a/include/jemalloc/internal/background_thread_externs.h
+++ b/include/jemalloc/internal/background_thread_externs.h
@@ -27,6 +27,6 @@ extern int pthread_create_wrapper(pthread_t *__restrict, const pthread_attr_t *,
     void *(*)(void *), void *__restrict);
 #endif
 bool background_thread_boot0(void);
-bool background_thread_boot1(tsdn_t *tsdn);
+bool background_thread_boot1(tsdn_t *tsdn, base_t *base);
 
 #endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H */
diff --git a/src/background_thread.c b/src/background_thread.c
index 90b027ed..ca06be02 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -895,7 +895,7 @@ background_thread_boot0(void) {
 }
 
 bool
-background_thread_boot1(tsdn_t *tsdn) {
+background_thread_boot1(tsdn_t *tsdn, base_t *base) {
 #ifdef JEMALLOC_BACKGROUND_THREAD
 	assert(have_background_thread);
 	assert(narenas_total_get() > 0);
@@ -914,7 +914,7 @@ background_thread_boot1(tsdn_t *tsdn) {
 	}
 
 	background_thread_info = (background_thread_info_t *)base_alloc(tsdn,
-	    b0get(), opt_max_background_threads *
+	    base, opt_max_background_threads *
 	    sizeof(background_thread_info_t), CACHELINE);
 	if (background_thread_info == NULL) {
 		return true;
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 4e1d3df8..b29ae47e 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1820,7 +1820,8 @@ malloc_init_hard(void) {
 	/* Set reentrancy level to 1 during init. */
 	pre_reentrancy(tsd, NULL);
 	/* Initialize narenas before prof_boot2 (for allocation). */
-	if (malloc_init_narenas() || background_thread_boot1(tsd_tsdn(tsd))) {
+	if (malloc_init_narenas()
+	    || background_thread_boot1(tsd_tsdn(tsd), b0get())) {
 		UNLOCK_RETURN(tsd_tsdn(tsd), true, true)
 	}
 	if (config_prof && prof_boot2(tsd, b0get())) {

From 21dfa4300dd372c11c7e1392225f58ae92c35eeb Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 18 Feb 2020 14:39:06 -0800
Subject: [PATCH 1557/2608] Change assert_* to expect_* in tests

```
grep -Irl assert_ test/ | xargs sed -i \
    's/witness_assert/witness_do_not_replace/g';
grep -Irl assert_ test/ | xargs sed -i \
    's/malloc_mutex_assert_owner/malloc_mutex_do_not_replace_owner/g';

grep -Ir assert_ test/ | grep -o "[_a-zA-Z]*assert_[_a-zA-Z]*" | \
    grep -v "^assert_"; # confirm no output
grep -Irl assert_ test/ | xargs sed -i 's/assert_/expect_/g';

grep -Irl witness_do_not_replace test/ | xargs sed -i \
    's/witness_do_not_replace/witness_assert/g';
grep -Irl malloc_mutex_do_not_replace_owner test/ | xargs sed -i \
    's/malloc_mutex_do_not_replace_owner/malloc_mutex_assert_owner/g';
```
---
 test/include/test/btalloc.h              |   2 +-
 test/include/test/extent_hooks.h         |  40 +--
 test/include/test/jemalloc_test.h.in     |   4 +-
 test/include/test/test.h                 | 216 +++++++--------
 test/integration/MALLOCX_ARENA.c         |   8 +-
 test/integration/aligned_alloc.c         |  12 +-
 test/integration/allocated.c             |  20 +-
 test/integration/cpp/basic.cpp           |   4 +-
 test/integration/extent.c                |  90 +++---
 test/integration/mallocx.c               |  56 ++--
 test/integration/overflow.c              |  20 +-
 test/integration/posix_memalign.c        |  12 +-
 test/integration/rallocx.c               |  58 ++--
 test/integration/slab_sizes.c            |  22 +-
 test/integration/smallocx.c              |  66 ++---
 test/integration/thread_arena.c          |  10 +-
 test/integration/thread_tcache_enabled.c |  38 +--
 test/integration/xallocx.c               | 110 ++++----
 test/unit/SFMT.c                         |  32 +--
 test/unit/a0.c                           |   2 +-
 test/unit/arena_reset.c                  |  56 ++--
 test/unit/atomic.c                       |  44 +--
 test/unit/background_thread.c            |  28 +-
 test/unit/background_thread_enable.c     |  28 +-
 test/unit/base.c                         |  66 ++---
 test/unit/binshard.c                     |  28 +-
 test/unit/bit_util.c                     |  54 ++--
 test/unit/bitmap.c                       |  96 +++----
 test/unit/buf_writer.c                   |  28 +-
 test/unit/cache_bin.c                    |  32 +--
 test/unit/ckh.c                          |  74 ++---
 test/unit/counter.c                      |  38 +--
 test/unit/decay.c                        | 142 +++++-----
 test/unit/div.c                          |   2 +-
 test/unit/emitter.c                      |  14 +-
 test/unit/extent_quantize.c              |  34 +--
 test/unit/fork.c                         |   8 +-
 test/unit/hash.c                         |   2 +-
 test/unit/hook.c                         | 332 +++++++++++------------
 test/unit/huge.c                         |  58 ++--
 test/unit/inspect.c                      |  84 +++---
 test/unit/junk.c                         |  22 +-
 test/unit/log.c                          |  14 +-
 test/unit/mallctl.c                      | 322 +++++++++++-----------
 test/unit/malloc_io.c                    |  18 +-
 test/unit/math.c                         |  12 +-
 test/unit/mq.c                           |  18 +-
 test/unit/mtx.c                          |   6 +-
 test/unit/nstime.c                       |  70 ++---
 test/unit/pack.c                         |  20 +-
 test/unit/pages.c                        |   6 +-
 test/unit/ph.c                           |  58 ++--
 test/unit/prng.c                         |  36 +--
 test/unit/prof_accum.c                   |   8 +-
 test/unit/prof_active.c                  |  14 +-
 test/unit/prof_gdump.c                   |  24 +-
 test/unit/prof_idump.c                   |  12 +-
 test/unit/prof_log.c                     |  58 ++--
 test/unit/prof_recent.c                  | 140 +++++-----
 test/unit/prof_reset.c                   |  66 ++---
 test/unit/prof_tctx.c                    |  16 +-
 test/unit/prof_thread_name.c             |  14 +-
 test/unit/ql.c                           |  24 +-
 test/unit/qr.c                           |  32 +--
 test/unit/rb.c                           |  60 ++--
 test/unit/retained.c                     |  24 +-
 test/unit/rtree.c                        |  42 +--
 test/unit/safety_check.c                 |  14 +-
 test/unit/sc.c                           |   6 +-
 test/unit/seq.c                          |  12 +-
 test/unit/size_classes.c                 |  88 +++---
 test/unit/slab.c                         |   4 +-
 test/unit/smoothstep.c                   |  12 +-
 test/unit/stats.c                        | 168 ++++++------
 test/unit/stats_print.c                  |  26 +-
 test/unit/test_hooks.c                   |   8 +-
 test/unit/thread_event.c                 |   6 +-
 test/unit/ticker.c                       |  36 +--
 test/unit/tsd.c                          |  48 ++--
 test/unit/witness.c                      |  32 +--
 test/unit/zero.c                         |  10 +-
 test/unit/zero_realloc_abort.c           |   4 +-
 test/unit/zero_realloc_free.c            |   8 +-
 test/unit/zero_realloc_strict.c          |  10 +-
 test/unit/zero_reallocs.c                |  10 +-
 85 files changed, 1854 insertions(+), 1854 deletions(-)

diff --git a/test/include/test/btalloc.h b/test/include/test/btalloc.h
index 5877ea77..8f345993 100644
--- a/test/include/test/btalloc.h
+++ b/test/include/test/btalloc.h
@@ -25,6 +25,6 @@ btalloc_##n(size_t size, unsigned bits) {				\
 		}							\
 	}								\
 	/* Intentionally sabotage tail call optimization. */		\
-	assert_ptr_not_null(p, "Unexpected mallocx() failure");		\
+	expect_ptr_not_null(p, "Unexpected mallocx() failure");		\
 	return p;							\
 }
diff --git a/test/include/test/extent_hooks.h b/test/include/test/extent_hooks.h
index 1f062015..aad0a46c 100644
--- a/test/include/test/extent_hooks.h
+++ b/test/include/test/extent_hooks.h
@@ -86,9 +86,9 @@ extent_alloc_hook(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
 	    "*zero=%s, *commit=%s, arena_ind=%u)\n", __func__, extent_hooks,
 	    new_addr, size, alignment, *zero ?  "true" : "false", *commit ?
 	    "true" : "false", arena_ind);
-	assert_ptr_eq(extent_hooks, &hooks,
+	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	assert_ptr_eq(extent_hooks->alloc, extent_alloc_hook,
+	expect_ptr_eq(extent_hooks->alloc, extent_alloc_hook,
 	    "Wrong hook function");
 	called_alloc = true;
 	if (!try_alloc) {
@@ -108,9 +108,9 @@ extent_dalloc_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, committed=%s, "
 	    "arena_ind=%u)\n", __func__, extent_hooks, addr, size, committed ?
 	    "true" : "false", arena_ind);
-	assert_ptr_eq(extent_hooks, &hooks,
+	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	assert_ptr_eq(extent_hooks->dalloc, extent_dalloc_hook,
+	expect_ptr_eq(extent_hooks->dalloc, extent_dalloc_hook,
 	    "Wrong hook function");
 	called_dalloc = true;
 	if (!try_dalloc) {
@@ -127,9 +127,9 @@ extent_destroy_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, committed=%s, "
 	    "arena_ind=%u)\n", __func__, extent_hooks, addr, size, committed ?
 	    "true" : "false", arena_ind);
-	assert_ptr_eq(extent_hooks, &hooks,
+	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	assert_ptr_eq(extent_hooks->destroy, extent_destroy_hook,
+	expect_ptr_eq(extent_hooks->destroy, extent_destroy_hook,
 	    "Wrong hook function");
 	called_destroy = true;
 	if (!try_destroy) {
@@ -147,9 +147,9 @@ extent_commit_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
 	    "length=%zu, arena_ind=%u)\n", __func__, extent_hooks, addr, size,
 	    offset, length, arena_ind);
-	assert_ptr_eq(extent_hooks, &hooks,
+	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	assert_ptr_eq(extent_hooks->commit, extent_commit_hook,
+	expect_ptr_eq(extent_hooks->commit, extent_commit_hook,
 	    "Wrong hook function");
 	called_commit = true;
 	if (!try_commit) {
@@ -169,9 +169,9 @@ extent_decommit_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
 	    "length=%zu, arena_ind=%u)\n", __func__, extent_hooks, addr, size,
 	    offset, length, arena_ind);
-	assert_ptr_eq(extent_hooks, &hooks,
+	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	assert_ptr_eq(extent_hooks->decommit, extent_decommit_hook,
+	expect_ptr_eq(extent_hooks->decommit, extent_decommit_hook,
 	    "Wrong hook function");
 	called_decommit = true;
 	if (!try_decommit) {
@@ -191,9 +191,9 @@ extent_purge_lazy_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
 	    "length=%zu arena_ind=%u)\n", __func__, extent_hooks, addr, size,
 	    offset, length, arena_ind);
-	assert_ptr_eq(extent_hooks, &hooks,
+	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	assert_ptr_eq(extent_hooks->purge_lazy, extent_purge_lazy_hook,
+	expect_ptr_eq(extent_hooks->purge_lazy, extent_purge_lazy_hook,
 	    "Wrong hook function");
 	called_purge_lazy = true;
 	if (!try_purge_lazy) {
@@ -214,9 +214,9 @@ extent_purge_forced_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
 	    "length=%zu arena_ind=%u)\n", __func__, extent_hooks, addr, size,
 	    offset, length, arena_ind);
-	assert_ptr_eq(extent_hooks, &hooks,
+	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	assert_ptr_eq(extent_hooks->purge_forced, extent_purge_forced_hook,
+	expect_ptr_eq(extent_hooks->purge_forced, extent_purge_forced_hook,
 	    "Wrong hook function");
 	called_purge_forced = true;
 	if (!try_purge_forced) {
@@ -238,9 +238,9 @@ extent_split_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	    "size_b=%zu, committed=%s, arena_ind=%u)\n", __func__, extent_hooks,
 	    addr, size, size_a, size_b, committed ? "true" : "false",
 	    arena_ind);
-	assert_ptr_eq(extent_hooks, &hooks,
+	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	assert_ptr_eq(extent_hooks->split, extent_split_hook,
+	expect_ptr_eq(extent_hooks->split, extent_split_hook,
 	    "Wrong hook function");
 	called_split = true;
 	if (!try_split) {
@@ -262,11 +262,11 @@ extent_merge_hook(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
 	    "size_b=%zu, committed=%s, arena_ind=%u)\n", __func__, extent_hooks,
 	    addr_a, size_a, addr_b, size_b, committed ? "true" : "false",
 	    arena_ind);
-	assert_ptr_eq(extent_hooks, &hooks,
+	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	assert_ptr_eq(extent_hooks->merge, extent_merge_hook,
+	expect_ptr_eq(extent_hooks->merge, extent_merge_hook,
 	    "Wrong hook function");
-	assert_ptr_eq((void *)((uintptr_t)addr_a + size_a), addr_b,
+	expect_ptr_eq((void *)((uintptr_t)addr_a + size_a), addr_b,
 	    "Extents not mergeable");
 	called_merge = true;
 	if (!try_merge) {
@@ -284,6 +284,6 @@ extent_hooks_prep(void) {
 	size_t sz;
 
 	sz = sizeof(default_hooks);
-	assert_d_eq(mallctl("arena.0.extent_hooks", (void *)&default_hooks, &sz,
+	expect_d_eq(mallctl("arena.0.extent_hooks", (void *)&default_hooks, &sz,
 	    NULL, 0), 0, "Unexpected mallctl() error");
 }
diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in
index a59408fc..e5d63062 100644
--- a/test/include/test/jemalloc_test.h.in
+++ b/test/include/test/jemalloc_test.h.in
@@ -139,7 +139,7 @@ static const bool config_debug =
 #undef assert
 #undef not_reached
 #undef not_implemented
-#undef assert_not_implemented
+#undef expect_not_implemented
 
 #define assert(e) do {							\
 	if (!(e)) {							\
@@ -163,7 +163,7 @@ static const bool config_debug =
 	abort();							\
 } while (0)
 
-#define assert_not_implemented(e) do {					\
+#define expect_not_implemented(e) do {					\
 	if (!(e)) {							\
 		not_implemented();					\
 	}								\
diff --git a/test/include/test/test.h b/test/include/test/test.h
index 9081716b..cf6616b6 100644
--- a/test/include/test/test.h
+++ b/test/include/test/test.h
@@ -1,6 +1,6 @@
 #define ASSERT_BUFSIZE	256
 
-#define assert_cmp(t, a, b, cmp, neg_cmp, pri, ...) do {		\
+#define expect_cmp(t, a, b, cmp, neg_cmp, pri, ...) do {		\
 	const t a_ = (a);						\
 	const t b_ = (b);						\
 	if (!(a_ cmp b_)) {						\
@@ -17,200 +17,200 @@
 	}								\
 } while (0)
 
-#define assert_ptr_eq(a, b, ...)	assert_cmp(void *, a, b, ==,	\
+#define expect_ptr_eq(a, b, ...)	expect_cmp(void *, a, b, ==,	\
     !=, "p", __VA_ARGS__)
-#define assert_ptr_ne(a, b, ...)	assert_cmp(void *, a, b, !=,	\
+#define expect_ptr_ne(a, b, ...)	expect_cmp(void *, a, b, !=,	\
     ==, "p", __VA_ARGS__)
-#define assert_ptr_null(a, ...)		assert_cmp(void *, a, NULL, ==,	\
+#define expect_ptr_null(a, ...)		expect_cmp(void *, a, NULL, ==,	\
     !=, "p", __VA_ARGS__)
-#define assert_ptr_not_null(a, ...)	assert_cmp(void *, a, NULL, !=,	\
+#define expect_ptr_not_null(a, ...)	expect_cmp(void *, a, NULL, !=,	\
     ==, "p", __VA_ARGS__)
 
-#define assert_c_eq(a, b, ...)	assert_cmp(char, a, b, ==, !=, "c", __VA_ARGS__)
-#define assert_c_ne(a, b, ...)	assert_cmp(char, a, b, !=, ==, "c", __VA_ARGS__)
-#define assert_c_lt(a, b, ...)	assert_cmp(char, a, b, <, >=, "c", __VA_ARGS__)
-#define assert_c_le(a, b, ...)	assert_cmp(char, a, b, <=, >, "c", __VA_ARGS__)
-#define assert_c_ge(a, b, ...)	assert_cmp(char, a, b, >=, <, "c", __VA_ARGS__)
-#define assert_c_gt(a, b, ...)	assert_cmp(char, a, b, >, <=, "c", __VA_ARGS__)
+#define expect_c_eq(a, b, ...)	expect_cmp(char, a, b, ==, !=, "c", __VA_ARGS__)
+#define expect_c_ne(a, b, ...)	expect_cmp(char, a, b, !=, ==, "c", __VA_ARGS__)
+#define expect_c_lt(a, b, ...)	expect_cmp(char, a, b, <, >=, "c", __VA_ARGS__)
+#define expect_c_le(a, b, ...)	expect_cmp(char, a, b, <=, >, "c", __VA_ARGS__)
+#define expect_c_ge(a, b, ...)	expect_cmp(char, a, b, >=, <, "c", __VA_ARGS__)
+#define expect_c_gt(a, b, ...)	expect_cmp(char, a, b, >, <=, "c", __VA_ARGS__)
 
-#define assert_x_eq(a, b, ...)	assert_cmp(int, a, b, ==, !=, "#x", __VA_ARGS__)
-#define assert_x_ne(a, b, ...)	assert_cmp(int, a, b, !=, ==, "#x", __VA_ARGS__)
-#define assert_x_lt(a, b, ...)	assert_cmp(int, a, b, <, >=, "#x", __VA_ARGS__)
-#define assert_x_le(a, b, ...)	assert_cmp(int, a, b, <=, >, "#x", __VA_ARGS__)
-#define assert_x_ge(a, b, ...)	assert_cmp(int, a, b, >=, <, "#x", __VA_ARGS__)
-#define assert_x_gt(a, b, ...)	assert_cmp(int, a, b, >, <=, "#x", __VA_ARGS__)
+#define expect_x_eq(a, b, ...)	expect_cmp(int, a, b, ==, !=, "#x", __VA_ARGS__)
+#define expect_x_ne(a, b, ...)	expect_cmp(int, a, b, !=, ==, "#x", __VA_ARGS__)
+#define expect_x_lt(a, b, ...)	expect_cmp(int, a, b, <, >=, "#x", __VA_ARGS__)
+#define expect_x_le(a, b, ...)	expect_cmp(int, a, b, <=, >, "#x", __VA_ARGS__)
+#define expect_x_ge(a, b, ...)	expect_cmp(int, a, b, >=, <, "#x", __VA_ARGS__)
+#define expect_x_gt(a, b, ...)	expect_cmp(int, a, b, >, <=, "#x", __VA_ARGS__)
 
-#define assert_d_eq(a, b, ...)	assert_cmp(int, a, b, ==, !=, "d", __VA_ARGS__)
-#define assert_d_ne(a, b, ...)	assert_cmp(int, a, b, !=, ==, "d", __VA_ARGS__)
-#define assert_d_lt(a, b, ...)	assert_cmp(int, a, b, <, >=, "d", __VA_ARGS__)
-#define assert_d_le(a, b, ...)	assert_cmp(int, a, b, <=, >, "d", __VA_ARGS__)
-#define assert_d_ge(a, b, ...)	assert_cmp(int, a, b, >=, <, "d", __VA_ARGS__)
-#define assert_d_gt(a, b, ...)	assert_cmp(int, a, b, >, <=, "d", __VA_ARGS__)
+#define expect_d_eq(a, b, ...)	expect_cmp(int, a, b, ==, !=, "d", __VA_ARGS__)
+#define expect_d_ne(a, b, ...)	expect_cmp(int, a, b, !=, ==, "d", __VA_ARGS__)
+#define expect_d_lt(a, b, ...)	expect_cmp(int, a, b, <, >=, "d", __VA_ARGS__)
+#define expect_d_le(a, b, ...)	expect_cmp(int, a, b, <=, >, "d", __VA_ARGS__)
+#define expect_d_ge(a, b, ...)	expect_cmp(int, a, b, >=, <, "d", __VA_ARGS__)
+#define expect_d_gt(a, b, ...)	expect_cmp(int, a, b, >, <=, "d", __VA_ARGS__)
 
-#define assert_u_eq(a, b, ...)	assert_cmp(int, a, b, ==, !=, "u", __VA_ARGS__)
-#define assert_u_ne(a, b, ...)	assert_cmp(int, a, b, !=, ==, "u", __VA_ARGS__)
-#define assert_u_lt(a, b, ...)	assert_cmp(int, a, b, <, >=, "u", __VA_ARGS__)
-#define assert_u_le(a, b, ...)	assert_cmp(int, a, b, <=, >, "u", __VA_ARGS__)
-#define assert_u_ge(a, b, ...)	assert_cmp(int, a, b, >=, <, "u", __VA_ARGS__)
-#define assert_u_gt(a, b, ...)	assert_cmp(int, a, b, >, <=, "u", __VA_ARGS__)
+#define expect_u_eq(a, b, ...)	expect_cmp(int, a, b, ==, !=, "u", __VA_ARGS__)
+#define expect_u_ne(a, b, ...)	expect_cmp(int, a, b, !=, ==, "u", __VA_ARGS__)
+#define expect_u_lt(a, b, ...)	expect_cmp(int, a, b, <, >=, "u", __VA_ARGS__)
+#define expect_u_le(a, b, ...)	expect_cmp(int, a, b, <=, >, "u", __VA_ARGS__)
+#define expect_u_ge(a, b, ...)	expect_cmp(int, a, b, >=, <, "u", __VA_ARGS__)
+#define expect_u_gt(a, b, ...)	expect_cmp(int, a, b, >, <=, "u", __VA_ARGS__)
 
-#define assert_ld_eq(a, b, ...)	assert_cmp(long, a, b, ==,	\
+#define expect_ld_eq(a, b, ...)	expect_cmp(long, a, b, ==,	\
     !=, "ld", __VA_ARGS__)
-#define assert_ld_ne(a, b, ...)	assert_cmp(long, a, b, !=,	\
+#define expect_ld_ne(a, b, ...)	expect_cmp(long, a, b, !=,	\
     ==, "ld", __VA_ARGS__)
-#define assert_ld_lt(a, b, ...)	assert_cmp(long, a, b, <,	\
+#define expect_ld_lt(a, b, ...)	expect_cmp(long, a, b, <,	\
     >=, "ld", __VA_ARGS__)
-#define assert_ld_le(a, b, ...)	assert_cmp(long, a, b, <=,	\
+#define expect_ld_le(a, b, ...)	expect_cmp(long, a, b, <=,	\
     >, "ld", __VA_ARGS__)
-#define assert_ld_ge(a, b, ...)	assert_cmp(long, a, b, >=,	\
+#define expect_ld_ge(a, b, ...)	expect_cmp(long, a, b, >=,	\
     <, "ld", __VA_ARGS__)
-#define assert_ld_gt(a, b, ...)	assert_cmp(long, a, b, >,	\
+#define expect_ld_gt(a, b, ...)	expect_cmp(long, a, b, >,	\
     <=, "ld", __VA_ARGS__)
 
-#define assert_lu_eq(a, b, ...)	assert_cmp(unsigned long,	\
+#define expect_lu_eq(a, b, ...)	expect_cmp(unsigned long,	\
     a, b, ==, !=, "lu", __VA_ARGS__)
-#define assert_lu_ne(a, b, ...)	assert_cmp(unsigned long,	\
+#define expect_lu_ne(a, b, ...)	expect_cmp(unsigned long,	\
     a, b, !=, ==, "lu", __VA_ARGS__)
-#define assert_lu_lt(a, b, ...)	assert_cmp(unsigned long,	\
+#define expect_lu_lt(a, b, ...)	expect_cmp(unsigned long,	\
     a, b, <, >=, "lu", __VA_ARGS__)
-#define assert_lu_le(a, b, ...)	assert_cmp(unsigned long,	\
+#define expect_lu_le(a, b, ...)	expect_cmp(unsigned long,	\
     a, b, <=, >, "lu", __VA_ARGS__)
-#define assert_lu_ge(a, b, ...)	assert_cmp(unsigned long,	\
+#define expect_lu_ge(a, b, ...)	expect_cmp(unsigned long,	\
     a, b, >=, <, "lu", __VA_ARGS__)
-#define assert_lu_gt(a, b, ...)	assert_cmp(unsigned long,	\
+#define expect_lu_gt(a, b, ...)	expect_cmp(unsigned long,	\
     a, b, >, <=, "lu", __VA_ARGS__)
 
-#define assert_qd_eq(a, b, ...)	assert_cmp(long long, a, b, ==,	\
+#define expect_qd_eq(a, b, ...)	expect_cmp(long long, a, b, ==,	\
     !=, "qd", __VA_ARGS__)
-#define assert_qd_ne(a, b, ...)	assert_cmp(long long, a, b, !=,	\
+#define expect_qd_ne(a, b, ...)	expect_cmp(long long, a, b, !=,	\
     ==, "qd", __VA_ARGS__)
-#define assert_qd_lt(a, b, ...)	assert_cmp(long long, a, b, <,	\
+#define expect_qd_lt(a, b, ...)	expect_cmp(long long, a, b, <,	\
     >=, "qd", __VA_ARGS__)
-#define assert_qd_le(a, b, ...)	assert_cmp(long long, a, b, <=,	\
+#define expect_qd_le(a, b, ...)	expect_cmp(long long, a, b, <=,	\
     >, "qd", __VA_ARGS__)
-#define assert_qd_ge(a, b, ...)	assert_cmp(long long, a, b, >=,	\
+#define expect_qd_ge(a, b, ...)	expect_cmp(long long, a, b, >=,	\
     <, "qd", __VA_ARGS__)
-#define assert_qd_gt(a, b, ...)	assert_cmp(long long, a, b, >,	\
+#define expect_qd_gt(a, b, ...)	expect_cmp(long long, a, b, >,	\
     <=, "qd", __VA_ARGS__)
 
-#define assert_qu_eq(a, b, ...)	assert_cmp(unsigned long long,	\
+#define expect_qu_eq(a, b, ...)	expect_cmp(unsigned long long,	\
     a, b, ==, !=, "qu", __VA_ARGS__)
-#define assert_qu_ne(a, b, ...)	assert_cmp(unsigned long long,	\
+#define expect_qu_ne(a, b, ...)	expect_cmp(unsigned long long,	\
     a, b, !=, ==, "qu", __VA_ARGS__)
-#define assert_qu_lt(a, b, ...)	assert_cmp(unsigned long long,	\
+#define expect_qu_lt(a, b, ...)	expect_cmp(unsigned long long,	\
     a, b, <, >=, "qu", __VA_ARGS__)
-#define assert_qu_le(a, b, ...)	assert_cmp(unsigned long long,	\
+#define expect_qu_le(a, b, ...)	expect_cmp(unsigned long long,	\
     a, b, <=, >, "qu", __VA_ARGS__)
-#define assert_qu_ge(a, b, ...)	assert_cmp(unsigned long long,	\
+#define expect_qu_ge(a, b, ...)	expect_cmp(unsigned long long,	\
     a, b, >=, <, "qu", __VA_ARGS__)
-#define assert_qu_gt(a, b, ...)	assert_cmp(unsigned long long,	\
+#define expect_qu_gt(a, b, ...)	expect_cmp(unsigned long long,	\
     a, b, >, <=, "qu", __VA_ARGS__)
 
-#define assert_jd_eq(a, b, ...)	assert_cmp(intmax_t, a, b, ==,	\
+#define expect_jd_eq(a, b, ...)	expect_cmp(intmax_t, a, b, ==,	\
     !=, "jd", __VA_ARGS__)
-#define assert_jd_ne(a, b, ...)	assert_cmp(intmax_t, a, b, !=,	\
+#define expect_jd_ne(a, b, ...)	expect_cmp(intmax_t, a, b, !=,	\
     ==, "jd", __VA_ARGS__)
-#define assert_jd_lt(a, b, ...)	assert_cmp(intmax_t, a, b, <,	\
+#define expect_jd_lt(a, b, ...)	expect_cmp(intmax_t, a, b, <,	\
     >=, "jd", __VA_ARGS__)
-#define assert_jd_le(a, b, ...)	assert_cmp(intmax_t, a, b, <=,	\
+#define expect_jd_le(a, b, ...)	expect_cmp(intmax_t, a, b, <=,	\
     >, "jd", __VA_ARGS__)
-#define assert_jd_ge(a, b, ...)	assert_cmp(intmax_t, a, b, >=,	\
+#define expect_jd_ge(a, b, ...)	expect_cmp(intmax_t, a, b, >=,	\
     <, "jd", __VA_ARGS__)
-#define assert_jd_gt(a, b, ...)	assert_cmp(intmax_t, a, b, >,	\
+#define expect_jd_gt(a, b, ...)	expect_cmp(intmax_t, a, b, >,	\
     <=, "jd", __VA_ARGS__)
 
-#define assert_ju_eq(a, b, ...)	assert_cmp(uintmax_t, a, b, ==,	\
+#define expect_ju_eq(a, b, ...)	expect_cmp(uintmax_t, a, b, ==,	\
     !=, "ju", __VA_ARGS__)
-#define assert_ju_ne(a, b, ...)	assert_cmp(uintmax_t, a, b, !=,	\
+#define expect_ju_ne(a, b, ...)	expect_cmp(uintmax_t, a, b, !=,	\
     ==, "ju", __VA_ARGS__)
-#define assert_ju_lt(a, b, ...)	assert_cmp(uintmax_t, a, b, <,	\
+#define expect_ju_lt(a, b, ...)	expect_cmp(uintmax_t, a, b, <,	\
     >=, "ju", __VA_ARGS__)
-#define assert_ju_le(a, b, ...)	assert_cmp(uintmax_t, a, b, <=,	\
+#define expect_ju_le(a, b, ...)	expect_cmp(uintmax_t, a, b, <=,	\
     >, "ju", __VA_ARGS__)
-#define assert_ju_ge(a, b, ...)	assert_cmp(uintmax_t, a, b, >=,	\
+#define expect_ju_ge(a, b, ...)	expect_cmp(uintmax_t, a, b, >=,	\
     <, "ju", __VA_ARGS__)
-#define assert_ju_gt(a, b, ...)	assert_cmp(uintmax_t, a, b, >,	\
+#define expect_ju_gt(a, b, ...)	expect_cmp(uintmax_t, a, b, >,	\
     <=, "ju", __VA_ARGS__)
 
-#define assert_zd_eq(a, b, ...)	assert_cmp(ssize_t, a, b, ==,	\
+#define expect_zd_eq(a, b, ...)	expect_cmp(ssize_t, a, b, ==,	\
     !=, "zd", __VA_ARGS__)
-#define assert_zd_ne(a, b, ...)	assert_cmp(ssize_t, a, b, !=,	\
+#define expect_zd_ne(a, b, ...)	expect_cmp(ssize_t, a, b, !=,	\
     ==, "zd", __VA_ARGS__)
-#define assert_zd_lt(a, b, ...)	assert_cmp(ssize_t, a, b, <,	\
+#define expect_zd_lt(a, b, ...)	expect_cmp(ssize_t, a, b, <,	\
     >=, "zd", __VA_ARGS__)
-#define assert_zd_le(a, b, ...)	assert_cmp(ssize_t, a, b, <=,	\
+#define expect_zd_le(a, b, ...)	expect_cmp(ssize_t, a, b, <=,	\
     >, "zd", __VA_ARGS__)
-#define assert_zd_ge(a, b, ...)	assert_cmp(ssize_t, a, b, >=,	\
+#define expect_zd_ge(a, b, ...)	expect_cmp(ssize_t, a, b, >=,	\
     <, "zd", __VA_ARGS__)
-#define assert_zd_gt(a, b, ...)	assert_cmp(ssize_t, a, b, >,	\
+#define expect_zd_gt(a, b, ...)	expect_cmp(ssize_t, a, b, >,	\
     <=, "zd", __VA_ARGS__)
 
-#define assert_zu_eq(a, b, ...)	assert_cmp(size_t, a, b, ==,	\
+#define expect_zu_eq(a, b, ...)	expect_cmp(size_t, a, b, ==,	\
     !=, "zu", __VA_ARGS__)
-#define assert_zu_ne(a, b, ...)	assert_cmp(size_t, a, b, !=,	\
+#define expect_zu_ne(a, b, ...)	expect_cmp(size_t, a, b, !=,	\
     ==, "zu", __VA_ARGS__)
-#define assert_zu_lt(a, b, ...)	assert_cmp(size_t, a, b, <,	\
+#define expect_zu_lt(a, b, ...)	expect_cmp(size_t, a, b, <,	\
     >=, "zu", __VA_ARGS__)
-#define assert_zu_le(a, b, ...)	assert_cmp(size_t, a, b, <=,	\
+#define expect_zu_le(a, b, ...)	expect_cmp(size_t, a, b, <=,	\
     >, "zu", __VA_ARGS__)
-#define assert_zu_ge(a, b, ...)	assert_cmp(size_t, a, b, >=,	\
+#define expect_zu_ge(a, b, ...)	expect_cmp(size_t, a, b, >=,	\
     <, "zu", __VA_ARGS__)
-#define assert_zu_gt(a, b, ...)	assert_cmp(size_t, a, b, >,	\
+#define expect_zu_gt(a, b, ...)	expect_cmp(size_t, a, b, >,	\
     <=, "zu", __VA_ARGS__)
 
-#define assert_d32_eq(a, b, ...)	assert_cmp(int32_t, a, b, ==,	\
+#define expect_d32_eq(a, b, ...)	expect_cmp(int32_t, a, b, ==,	\
     !=, FMTd32, __VA_ARGS__)
-#define assert_d32_ne(a, b, ...)	assert_cmp(int32_t, a, b, !=,	\
+#define expect_d32_ne(a, b, ...)	expect_cmp(int32_t, a, b, !=,	\
     ==, FMTd32, __VA_ARGS__)
-#define assert_d32_lt(a, b, ...)	assert_cmp(int32_t, a, b, <,	\
+#define expect_d32_lt(a, b, ...)	expect_cmp(int32_t, a, b, <,	\
     >=, FMTd32, __VA_ARGS__)
-#define assert_d32_le(a, b, ...)	assert_cmp(int32_t, a, b, <=,	\
+#define expect_d32_le(a, b, ...)	expect_cmp(int32_t, a, b, <=,	\
     >, FMTd32, __VA_ARGS__)
-#define assert_d32_ge(a, b, ...)	assert_cmp(int32_t, a, b, >=,	\
+#define expect_d32_ge(a, b, ...)	expect_cmp(int32_t, a, b, >=,	\
     <, FMTd32, __VA_ARGS__)
-#define assert_d32_gt(a, b, ...)	assert_cmp(int32_t, a, b, >,	\
+#define expect_d32_gt(a, b, ...)	expect_cmp(int32_t, a, b, >,	\
     <=, FMTd32, __VA_ARGS__)
 
-#define assert_u32_eq(a, b, ...)	assert_cmp(uint32_t, a, b, ==,	\
+#define expect_u32_eq(a, b, ...)	expect_cmp(uint32_t, a, b, ==,	\
     !=, FMTu32, __VA_ARGS__)
-#define assert_u32_ne(a, b, ...)	assert_cmp(uint32_t, a, b, !=,	\
+#define expect_u32_ne(a, b, ...)	expect_cmp(uint32_t, a, b, !=,	\
     ==, FMTu32, __VA_ARGS__)
-#define assert_u32_lt(a, b, ...)	assert_cmp(uint32_t, a, b, <,	\
+#define expect_u32_lt(a, b, ...)	expect_cmp(uint32_t, a, b, <,	\
     >=, FMTu32, __VA_ARGS__)
-#define assert_u32_le(a, b, ...)	assert_cmp(uint32_t, a, b, <=,	\
+#define expect_u32_le(a, b, ...)	expect_cmp(uint32_t, a, b, <=,	\
     >, FMTu32, __VA_ARGS__)
-#define assert_u32_ge(a, b, ...)	assert_cmp(uint32_t, a, b, >=,	\
+#define expect_u32_ge(a, b, ...)	expect_cmp(uint32_t, a, b, >=,	\
     <, FMTu32, __VA_ARGS__)
-#define assert_u32_gt(a, b, ...)	assert_cmp(uint32_t, a, b, >,	\
+#define expect_u32_gt(a, b, ...)	expect_cmp(uint32_t, a, b, >,	\
     <=, FMTu32, __VA_ARGS__)
 
-#define assert_d64_eq(a, b, ...)	assert_cmp(int64_t, a, b, ==,	\
+#define expect_d64_eq(a, b, ...)	expect_cmp(int64_t, a, b, ==,	\
     !=, FMTd64, __VA_ARGS__)
-#define assert_d64_ne(a, b, ...)	assert_cmp(int64_t, a, b, !=,	\
+#define expect_d64_ne(a, b, ...)	expect_cmp(int64_t, a, b, !=,	\
     ==, FMTd64, __VA_ARGS__)
-#define assert_d64_lt(a, b, ...)	assert_cmp(int64_t, a, b, <,	\
+#define expect_d64_lt(a, b, ...)	expect_cmp(int64_t, a, b, <,	\
     >=, FMTd64, __VA_ARGS__)
-#define assert_d64_le(a, b, ...)	assert_cmp(int64_t, a, b, <=,	\
+#define expect_d64_le(a, b, ...)	expect_cmp(int64_t, a, b, <=,	\
     >, FMTd64, __VA_ARGS__)
-#define assert_d64_ge(a, b, ...)	assert_cmp(int64_t, a, b, >=,	\
+#define expect_d64_ge(a, b, ...)	expect_cmp(int64_t, a, b, >=,	\
     <, FMTd64, __VA_ARGS__)
-#define assert_d64_gt(a, b, ...)	assert_cmp(int64_t, a, b, >,	\
+#define expect_d64_gt(a, b, ...)	expect_cmp(int64_t, a, b, >,	\
     <=, FMTd64, __VA_ARGS__)
 
-#define assert_u64_eq(a, b, ...)	assert_cmp(uint64_t, a, b, ==,	\
+#define expect_u64_eq(a, b, ...)	expect_cmp(uint64_t, a, b, ==,	\
     !=, FMTu64, __VA_ARGS__)
-#define assert_u64_ne(a, b, ...)	assert_cmp(uint64_t, a, b, !=,	\
+#define expect_u64_ne(a, b, ...)	expect_cmp(uint64_t, a, b, !=,	\
     ==, FMTu64, __VA_ARGS__)
-#define assert_u64_lt(a, b, ...)	assert_cmp(uint64_t, a, b, <,	\
+#define expect_u64_lt(a, b, ...)	expect_cmp(uint64_t, a, b, <,	\
     >=, FMTu64, __VA_ARGS__)
-#define assert_u64_le(a, b, ...)	assert_cmp(uint64_t, a, b, <=,	\
+#define expect_u64_le(a, b, ...)	expect_cmp(uint64_t, a, b, <=,	\
     >, FMTu64, __VA_ARGS__)
-#define assert_u64_ge(a, b, ...)	assert_cmp(uint64_t, a, b, >=,	\
+#define expect_u64_ge(a, b, ...)	expect_cmp(uint64_t, a, b, >=,	\
     <, FMTu64, __VA_ARGS__)
-#define assert_u64_gt(a, b, ...)	assert_cmp(uint64_t, a, b, >,	\
+#define expect_u64_gt(a, b, ...)	expect_cmp(uint64_t, a, b, >,	\
     <=, FMTu64, __VA_ARGS__)
 
-#define assert_b_eq(a, b, ...) do {					\
+#define expect_b_eq(a, b, ...) do {					\
 	bool a_ = (a);							\
 	bool b_ = (b);							\
 	if (!(a_ == b_)) {						\
@@ -226,7 +226,7 @@
 		p_test_fail(prefix, message);				\
 	}								\
 } while (0)
-#define assert_b_ne(a, b, ...) do {					\
+#define expect_b_ne(a, b, ...) do {					\
 	bool a_ = (a);							\
 	bool b_ = (b);							\
 	if (!(a_ != b_)) {						\
@@ -242,10 +242,10 @@
 		p_test_fail(prefix, message);				\
 	}								\
 } while (0)
-#define assert_true(a, ...)	assert_b_eq(a, true, __VA_ARGS__)
-#define assert_false(a, ...)	assert_b_eq(a, false, __VA_ARGS__)
+#define expect_true(a, ...)	expect_b_eq(a, true, __VA_ARGS__)
+#define expect_false(a, ...)	expect_b_eq(a, false, __VA_ARGS__)
 
-#define assert_str_eq(a, b, ...) do {				\
+#define expect_str_eq(a, b, ...) do {				\
 	if (strcmp((a), (b))) {						\
 		char prefix[ASSERT_BUFSIZE];				\
 		char message[ASSERT_BUFSIZE];				\
@@ -258,7 +258,7 @@
 		p_test_fail(prefix, message);				\
 	}								\
 } while (0)
-#define assert_str_ne(a, b, ...) do {				\
+#define expect_str_ne(a, b, ...) do {				\
 	if (!strcmp((a), (b))) {					\
 		char prefix[ASSERT_BUFSIZE];				\
 		char message[ASSERT_BUFSIZE];				\
@@ -272,7 +272,7 @@
 	}								\
 } while (0)
 
-#define assert_not_reached(...) do {					\
+#define expect_not_reached(...) do {					\
 	char prefix[ASSERT_BUFSIZE];					\
 	char message[ASSERT_BUFSIZE];					\
 	malloc_snprintf(prefix, sizeof(prefix),				\
diff --git a/test/integration/MALLOCX_ARENA.c b/test/integration/MALLOCX_ARENA.c
index 222164d6..7e61df08 100644
--- a/test/integration/MALLOCX_ARENA.c
+++ b/test/integration/MALLOCX_ARENA.c
@@ -18,7 +18,7 @@ thd_start(void *arg) {
 	size_t sz;
 
 	sz = sizeof(arena_ind);
-	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
 	    0, "Error in arenas.create");
 
 	if (thread_ind % 4 != 3) {
@@ -29,16 +29,16 @@ thd_start(void *arg) {
 		    (sizeof(dss_precs)/sizeof(char*));
 		const char *dss = dss_precs[prec_ind];
 		int expected_err = (have_dss || prec_ind == 0) ? 0 : EFAULT;
-		assert_d_eq(mallctlnametomib("arena.0.dss", mib, &miblen), 0,
+		expect_d_eq(mallctlnametomib("arena.0.dss", mib, &miblen), 0,
 		    "Error in mallctlnametomib()");
 		mib[1] = arena_ind;
-		assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&dss,
+		expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&dss,
 		    sizeof(const char *)), expected_err,
 		    "Error in mallctlbymib()");
 	}
 
 	p = mallocx(1, MALLOCX_ARENA(arena_ind));
-	assert_ptr_not_null(p, "Unexpected mallocx() error");
+	expect_ptr_not_null(p, "Unexpected mallocx() error");
 	dallocx(p, 0);
 
 	return NULL;
diff --git a/test/integration/aligned_alloc.c b/test/integration/aligned_alloc.c
index 4375b172..3f619e7e 100644
--- a/test/integration/aligned_alloc.c
+++ b/test/integration/aligned_alloc.c
@@ -9,7 +9,7 @@
  */
 static void
 purge(void) {
-	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl error");
 }
 
@@ -20,14 +20,14 @@ TEST_BEGIN(test_alignment_errors) {
 	alignment = 0;
 	set_errno(0);
 	p = aligned_alloc(alignment, 1);
-	assert_false(p != NULL || get_errno() != EINVAL,
+	expect_false(p != NULL || get_errno() != EINVAL,
 	    "Expected error for invalid alignment %zu", alignment);
 
 	for (alignment = sizeof(size_t); alignment < MAXALIGN;
 	    alignment <<= 1) {
 		set_errno(0);
 		p = aligned_alloc(alignment + 1, 1);
-		assert_false(p != NULL || get_errno() != EINVAL,
+		expect_false(p != NULL || get_errno() != EINVAL,
 		    "Expected error for invalid alignment %zu",
 		    alignment + 1);
 	}
@@ -58,7 +58,7 @@ TEST_BEGIN(test_oom_errors) {
 #endif
 	set_errno(0);
 	p = aligned_alloc(alignment, size);
-	assert_false(p != NULL || get_errno() != ENOMEM,
+	expect_false(p != NULL || get_errno() != ENOMEM,
 	    "Expected error for aligned_alloc(%zu, %zu)",
 	    alignment, size);
 
@@ -71,7 +71,7 @@ TEST_BEGIN(test_oom_errors) {
 #endif
 	set_errno(0);
 	p = aligned_alloc(alignment, size);
-	assert_false(p != NULL || get_errno() != ENOMEM,
+	expect_false(p != NULL || get_errno() != ENOMEM,
 	    "Expected error for aligned_alloc(%zu, %zu)",
 	    alignment, size);
 
@@ -83,7 +83,7 @@ TEST_BEGIN(test_oom_errors) {
 #endif
 	set_errno(0);
 	p = aligned_alloc(alignment, size);
-	assert_false(p != NULL || get_errno() != ENOMEM,
+	expect_false(p != NULL || get_errno() != ENOMEM,
 	    "Expected error for aligned_alloc(&p, %zu, %zu)",
 	    alignment, size);
 }
diff --git a/test/integration/allocated.c b/test/integration/allocated.c
index 1425fd0a..8f2f21d5 100644
--- a/test/integration/allocated.c
+++ b/test/integration/allocated.c
@@ -32,7 +32,7 @@ thd_start(void *arg) {
 		test_fail("%s(): Error in mallctl(): %s", __func__,
 		    strerror(err));
 	}
-	assert_u64_eq(*ap0, a0,
+	expect_u64_eq(*ap0, a0,
 	    "\"thread.allocatedp\" should provide a pointer to internal "
 	    "storage");
 
@@ -53,25 +53,25 @@ thd_start(void *arg) {
 		test_fail("%s(): Error in mallctl(): %s", __func__,
 		    strerror(err));
 	}
-	assert_u64_eq(*dp0, d0,
+	expect_u64_eq(*dp0, d0,
 	    "\"thread.deallocatedp\" should provide a pointer to internal "
 	    "storage");
 
 	p = malloc(1);
-	assert_ptr_not_null(p, "Unexpected malloc() error");
+	expect_ptr_not_null(p, "Unexpected malloc() error");
 
 	sz = sizeof(a1);
 	mallctl("thread.allocated", (void *)&a1, &sz, NULL, 0);
 	sz = sizeof(ap1);
 	mallctl("thread.allocatedp", (void *)&ap1, &sz, NULL, 0);
-	assert_u64_eq(*ap1, a1,
+	expect_u64_eq(*ap1, a1,
 	    "Dereferenced \"thread.allocatedp\" value should equal "
 	    "\"thread.allocated\" value");
-	assert_ptr_eq(ap0, ap1,
+	expect_ptr_eq(ap0, ap1,
 	    "Pointer returned by \"thread.allocatedp\" should not change");
 
 	usize = malloc_usable_size(p);
-	assert_u64_le(a0 + usize, a1,
+	expect_u64_le(a0 + usize, a1,
 	    "Allocated memory counter should increase by at least the amount "
 	    "explicitly allocated");
 
@@ -81,19 +81,19 @@ thd_start(void *arg) {
 	mallctl("thread.deallocated", (void *)&d1, &sz, NULL, 0);
 	sz = sizeof(dp1);
 	mallctl("thread.deallocatedp", (void *)&dp1, &sz, NULL, 0);
-	assert_u64_eq(*dp1, d1,
+	expect_u64_eq(*dp1, d1,
 	    "Dereferenced \"thread.deallocatedp\" value should equal "
 	    "\"thread.deallocated\" value");
-	assert_ptr_eq(dp0, dp1,
+	expect_ptr_eq(dp0, dp1,
 	    "Pointer returned by \"thread.deallocatedp\" should not change");
 
-	assert_u64_le(d0 + usize, d1,
+	expect_u64_le(d0 + usize, d1,
 	    "Deallocated memory counter should increase by at least the amount "
 	    "explicitly deallocated");
 
 	return NULL;
 label_ENOENT:
-	assert_false(config_stats,
+	expect_false(config_stats,
 	    "ENOENT should only be returned if stats are disabled");
 	test_skip("\"thread.allocated\" mallctl not available");
 	return NULL;
diff --git a/test/integration/cpp/basic.cpp b/test/integration/cpp/basic.cpp
index 65890ecd..b48ec8aa 100644
--- a/test/integration/cpp/basic.cpp
+++ b/test/integration/cpp/basic.cpp
@@ -3,14 +3,14 @@
 
 TEST_BEGIN(test_basic) {
 	auto foo = new long(4);
-	assert_ptr_not_null(foo, "Unexpected new[] failure");
+	expect_ptr_not_null(foo, "Unexpected new[] failure");
 	delete foo;
 	// Test nullptr handling.
 	foo = nullptr;
 	delete foo;
 
 	auto bar = new long;
-	assert_ptr_not_null(bar, "Unexpected new failure");
+	expect_ptr_not_null(bar, "Unexpected new failure");
 	delete bar;
 	// Test nullptr handling.
 	bar = nullptr;
diff --git a/test/integration/extent.c b/test/integration/extent.c
index a75ba03e..ccc314d9 100644
--- a/test/integration/extent.c
+++ b/test/integration/extent.c
@@ -10,7 +10,7 @@ check_background_thread_enabled(void) {
 	if (ret == ENOENT) {
 		return false;
 	}
-	assert_d_eq(ret, 0, "Unexpected mallctl error");
+	expect_d_eq(ret, 0, "Unexpected mallctl error");
 	return enabled;
 }
 
@@ -27,16 +27,16 @@ test_extent_body(unsigned arena_ind) {
 
 	/* Get large size classes. */
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL,
+	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL,
 	    0), 0, "Unexpected arenas.lextent.0.size failure");
-	assert_d_eq(mallctl("arenas.lextent.1.size", (void *)&large1, &sz, NULL,
+	expect_d_eq(mallctl("arenas.lextent.1.size", (void *)&large1, &sz, NULL,
 	    0), 0, "Unexpected arenas.lextent.1.size failure");
-	assert_d_eq(mallctl("arenas.lextent.2.size", (void *)&large2, &sz, NULL,
+	expect_d_eq(mallctl("arenas.lextent.2.size", (void *)&large2, &sz, NULL,
 	    0), 0, "Unexpected arenas.lextent.2.size failure");
 
 	/* Test dalloc/decommit/purge cascade. */
 	purge_miblen = sizeof(purge_mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib("arena.0.purge", purge_mib, &purge_miblen),
+	expect_d_eq(mallctlnametomib("arena.0.purge", purge_mib, &purge_miblen),
 	    0, "Unexpected mallctlnametomib() failure");
 	purge_mib[1] = (size_t)arena_ind;
 	called_alloc = false;
@@ -44,22 +44,22 @@ test_extent_body(unsigned arena_ind) {
 	try_dalloc = false;
 	try_decommit = false;
 	p = mallocx(large0 * 2, flags);
-	assert_ptr_not_null(p, "Unexpected mallocx() error");
-	assert_true(called_alloc, "Expected alloc call");
+	expect_ptr_not_null(p, "Unexpected mallocx() error");
+	expect_true(called_alloc, "Expected alloc call");
 	called_dalloc = false;
 	called_decommit = false;
 	did_purge_lazy = false;
 	did_purge_forced = false;
 	called_split = false;
 	xallocx_success_a = (xallocx(p, large0, 0, flags) == large0);
-	assert_d_eq(mallctlbymib(purge_mib, purge_miblen, NULL, NULL, NULL, 0),
+	expect_d_eq(mallctlbymib(purge_mib, purge_miblen, NULL, NULL, NULL, 0),
 	    0, "Unexpected arena.%u.purge error", arena_ind);
 	if (xallocx_success_a) {
-		assert_true(called_dalloc, "Expected dalloc call");
-		assert_true(called_decommit, "Expected decommit call");
-		assert_true(did_purge_lazy || did_purge_forced,
+		expect_true(called_dalloc, "Expected dalloc call");
+		expect_true(called_decommit, "Expected decommit call");
+		expect_true(did_purge_lazy || did_purge_forced,
 		    "Expected purge");
-		assert_true(called_split, "Expected split call");
+		expect_true(called_split, "Expected split call");
 	}
 	dallocx(p, flags);
 	try_dalloc = true;
@@ -68,25 +68,25 @@ test_extent_body(unsigned arena_ind) {
 	try_dalloc = false;
 	try_decommit = true;
 	p = mallocx(large0 * 2, flags);
-	assert_ptr_not_null(p, "Unexpected mallocx() error");
+	expect_ptr_not_null(p, "Unexpected mallocx() error");
 	did_decommit = false;
 	did_commit = false;
 	called_split = false;
 	did_split = false;
 	did_merge = false;
 	xallocx_success_b = (xallocx(p, large0, 0, flags) == large0);
-	assert_d_eq(mallctlbymib(purge_mib, purge_miblen, NULL, NULL, NULL, 0),
+	expect_d_eq(mallctlbymib(purge_mib, purge_miblen, NULL, NULL, NULL, 0),
 	    0, "Unexpected arena.%u.purge error", arena_ind);
 	if (xallocx_success_b) {
-		assert_true(did_split, "Expected split");
+		expect_true(did_split, "Expected split");
 	}
 	xallocx_success_c = (xallocx(p, large0 * 2, 0, flags) == large0 * 2);
 	if (did_split) {
-		assert_b_eq(did_decommit, did_commit,
+		expect_b_eq(did_decommit, did_commit,
 		    "Expected decommit/commit match");
 	}
 	if (xallocx_success_b && xallocx_success_c) {
-		assert_true(did_merge, "Expected merge");
+		expect_true(did_merge, "Expected merge");
 	}
 	dallocx(p, flags);
 	try_dalloc = true;
@@ -94,7 +94,7 @@ test_extent_body(unsigned arena_ind) {
 
 	/* Make sure non-large allocation succeeds. */
 	p = mallocx(42, flags);
-	assert_ptr_not_null(p, "Unexpected mallocx() error");
+	expect_ptr_not_null(p, "Unexpected mallocx() error");
 	dallocx(p, flags);
 }
 
@@ -110,7 +110,7 @@ test_manual_hook_auto_arena(void) {
 
 	sz = sizeof(unsigned);
 	/* Get number of auto arenas. */
-	assert_d_eq(mallctl("opt.narenas", (void *)&narenas, &sz, NULL, 0),
+	expect_d_eq(mallctl("opt.narenas", (void *)&narenas, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
 	if (narenas == 1) {
 		return;
@@ -118,18 +118,18 @@ test_manual_hook_auto_arena(void) {
 
 	/* Install custom extent hooks on arena 1 (might not be initialized). */
 	hooks_miblen = sizeof(hooks_mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib("arena.0.extent_hooks", hooks_mib,
+	expect_d_eq(mallctlnametomib("arena.0.extent_hooks", hooks_mib,
 	    &hooks_miblen), 0, "Unexpected mallctlnametomib() failure");
 	hooks_mib[1] = 1;
 	old_size = sizeof(extent_hooks_t *);
 	new_hooks = &hooks;
 	new_size = sizeof(extent_hooks_t *);
-	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks,
+	expect_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks,
 	    &old_size, (void *)&new_hooks, new_size), 0,
 	    "Unexpected extent_hooks error");
 	static bool auto_arena_created = false;
 	if (old_hooks != &hooks) {
-		assert_b_eq(auto_arena_created, false,
+		expect_b_eq(auto_arena_created, false,
 		    "Expected auto arena 1 created only once.");
 		auto_arena_created = true;
 	}
@@ -146,35 +146,35 @@ test_manual_hook_body(void) {
 	extent_hooks_prep();
 
 	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
 
 	/* Install custom extent hooks. */
 	hooks_miblen = sizeof(hooks_mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib("arena.0.extent_hooks", hooks_mib,
+	expect_d_eq(mallctlnametomib("arena.0.extent_hooks", hooks_mib,
 	    &hooks_miblen), 0, "Unexpected mallctlnametomib() failure");
 	hooks_mib[1] = (size_t)arena_ind;
 	old_size = sizeof(extent_hooks_t *);
 	new_hooks = &hooks;
 	new_size = sizeof(extent_hooks_t *);
-	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks,
+	expect_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks,
 	    &old_size, (void *)&new_hooks, new_size), 0,
 	    "Unexpected extent_hooks error");
-	assert_ptr_ne(old_hooks->alloc, extent_alloc_hook,
+	expect_ptr_ne(old_hooks->alloc, extent_alloc_hook,
 	    "Unexpected extent_hooks error");
-	assert_ptr_ne(old_hooks->dalloc, extent_dalloc_hook,
+	expect_ptr_ne(old_hooks->dalloc, extent_dalloc_hook,
 	    "Unexpected extent_hooks error");
-	assert_ptr_ne(old_hooks->commit, extent_commit_hook,
+	expect_ptr_ne(old_hooks->commit, extent_commit_hook,
 	    "Unexpected extent_hooks error");
-	assert_ptr_ne(old_hooks->decommit, extent_decommit_hook,
+	expect_ptr_ne(old_hooks->decommit, extent_decommit_hook,
 	    "Unexpected extent_hooks error");
-	assert_ptr_ne(old_hooks->purge_lazy, extent_purge_lazy_hook,
+	expect_ptr_ne(old_hooks->purge_lazy, extent_purge_lazy_hook,
 	    "Unexpected extent_hooks error");
-	assert_ptr_ne(old_hooks->purge_forced, extent_purge_forced_hook,
+	expect_ptr_ne(old_hooks->purge_forced, extent_purge_forced_hook,
 	    "Unexpected extent_hooks error");
-	assert_ptr_ne(old_hooks->split, extent_split_hook,
+	expect_ptr_ne(old_hooks->split, extent_split_hook,
 	    "Unexpected extent_hooks error");
-	assert_ptr_ne(old_hooks->merge, extent_merge_hook,
+	expect_ptr_ne(old_hooks->merge, extent_merge_hook,
 	    "Unexpected extent_hooks error");
 
 	if (!check_background_thread_enabled()) {
@@ -182,26 +182,26 @@ test_manual_hook_body(void) {
 	}
 
 	/* Restore extent hooks. */
-	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, NULL, NULL,
+	expect_d_eq(mallctlbymib(hooks_mib, hooks_miblen, NULL, NULL,
 	    (void *)&old_hooks, new_size), 0, "Unexpected extent_hooks error");
-	assert_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks,
+	expect_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks,
 	    &old_size, NULL, 0), 0, "Unexpected extent_hooks error");
-	assert_ptr_eq(old_hooks, default_hooks, "Unexpected extent_hooks error");
-	assert_ptr_eq(old_hooks->alloc, default_hooks->alloc,
+	expect_ptr_eq(old_hooks, default_hooks, "Unexpected extent_hooks error");
+	expect_ptr_eq(old_hooks->alloc, default_hooks->alloc,
 	    "Unexpected extent_hooks error");
-	assert_ptr_eq(old_hooks->dalloc, default_hooks->dalloc,
+	expect_ptr_eq(old_hooks->dalloc, default_hooks->dalloc,
 	    "Unexpected extent_hooks error");
-	assert_ptr_eq(old_hooks->commit, default_hooks->commit,
+	expect_ptr_eq(old_hooks->commit, default_hooks->commit,
 	    "Unexpected extent_hooks error");
-	assert_ptr_eq(old_hooks->decommit, default_hooks->decommit,
+	expect_ptr_eq(old_hooks->decommit, default_hooks->decommit,
 	    "Unexpected extent_hooks error");
-	assert_ptr_eq(old_hooks->purge_lazy, default_hooks->purge_lazy,
+	expect_ptr_eq(old_hooks->purge_lazy, default_hooks->purge_lazy,
 	    "Unexpected extent_hooks error");
-	assert_ptr_eq(old_hooks->purge_forced, default_hooks->purge_forced,
+	expect_ptr_eq(old_hooks->purge_forced, default_hooks->purge_forced,
 	    "Unexpected extent_hooks error");
-	assert_ptr_eq(old_hooks->split, default_hooks->split,
+	expect_ptr_eq(old_hooks->split, default_hooks->split,
 	    "Unexpected extent_hooks error");
-	assert_ptr_eq(old_hooks->merge, default_hooks->merge,
+	expect_ptr_eq(old_hooks->merge, default_hooks->merge,
 	    "Unexpected extent_hooks error");
 }
 
@@ -232,7 +232,7 @@ TEST_BEGIN(test_extent_auto_hook) {
 	sz = sizeof(unsigned);
 	new_hooks = &hooks;
 	new_size = sizeof(extent_hooks_t *);
-	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
 	    (void *)&new_hooks, new_size), 0, "Unexpected mallctl() failure");
 
 	test_skip_if(check_background_thread_enabled());
diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index 645d4db4..fdf1e3f4 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -6,7 +6,7 @@ get_nsizes_impl(const char *cmd) {
 	size_t z;
 
 	z = sizeof(unsigned);
-	assert_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
+	expect_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
 	    "Unexpected mallctl(\"%s\", ...) failure", cmd);
 
 	return ret;
@@ -25,11 +25,11 @@ get_size_impl(const char *cmd, size_t ind) {
 	size_t miblen = 4;
 
 	z = sizeof(size_t);
-	assert_d_eq(mallctlnametomib(cmd, mib, &miblen),
+	expect_d_eq(mallctlnametomib(cmd, mib, &miblen),
 	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
 	mib[2] = ind;
 	z = sizeof(size_t);
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
 	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
 
 	return ret;
@@ -47,7 +47,7 @@ get_large_size(size_t ind) {
  */
 static void
 purge(void) {
-	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl error");
 }
 
@@ -66,16 +66,16 @@ TEST_BEGIN(test_overflow) {
 
 	largemax = get_large_size(get_nlarge()-1);
 
-	assert_ptr_null(mallocx(largemax+1, 0),
+	expect_ptr_null(mallocx(largemax+1, 0),
 	    "Expected OOM for mallocx(size=%#zx, 0)", largemax+1);
 
-	assert_ptr_null(mallocx(ZU(PTRDIFF_MAX)+1, 0),
+	expect_ptr_null(mallocx(ZU(PTRDIFF_MAX)+1, 0),
 	    "Expected OOM for mallocx(size=%#zx, 0)", ZU(PTRDIFF_MAX)+1);
 
-	assert_ptr_null(mallocx(SIZE_T_MAX, 0),
+	expect_ptr_null(mallocx(SIZE_T_MAX, 0),
 	    "Expected OOM for mallocx(size=%#zx, 0)", SIZE_T_MAX);
 
-	assert_ptr_null(mallocx(1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)),
+	expect_ptr_null(mallocx(1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)),
 	    "Expected OOM for mallocx(size=1, MALLOCX_ALIGN(%#zx))",
 	    ZU(PTRDIFF_MAX)+1);
 }
@@ -85,11 +85,11 @@ static void *
 remote_alloc(void *arg) {
 	unsigned arena;
 	size_t sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
+	expect_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 	size_t large_sz;
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&large_sz, &sz,
+	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&large_sz, &sz,
 	    NULL, 0), 0, "Unexpected mallctl failure");
 
 	void *ptr = mallocx(large_sz, MALLOCX_ARENA(arena)
@@ -105,7 +105,7 @@ TEST_BEGIN(test_remote_free) {
 	void *ret;
 	thd_create(&thd, remote_alloc, (void *)&ret);
 	thd_join(thd, NULL);
-	assert_ptr_not_null(ret, "Unexpected mallocx failure");
+	expect_ptr_not_null(ret, "Unexpected mallocx failure");
 
 	/* Avoid TCACHE_NONE to explicitly test tcache_flush(). */
 	dallocx(ret, 0);
@@ -131,7 +131,7 @@ TEST_BEGIN(test_oom) {
 			oom = true;
 		}
 	}
-	assert_true(oom,
+	expect_true(oom,
 	    "Expected OOM during series of calls to mallocx(size=%zu, 0)",
 	    largemax);
 	for (i = 0; i < sizeof(ptrs) / sizeof(void *); i++) {
@@ -142,14 +142,14 @@ TEST_BEGIN(test_oom) {
 	purge();
 
 #if LG_SIZEOF_PTR == 3
-	assert_ptr_null(mallocx(0x8000000000000000ULL,
+	expect_ptr_null(mallocx(0x8000000000000000ULL,
 	    MALLOCX_ALIGN(0x8000000000000000ULL)),
 	    "Expected OOM for mallocx()");
-	assert_ptr_null(mallocx(0x8000000000000000ULL,
+	expect_ptr_null(mallocx(0x8000000000000000ULL,
 	    MALLOCX_ALIGN(0x80000000)),
 	    "Expected OOM for mallocx()");
 #else
-	assert_ptr_null(mallocx(0x80000000UL, MALLOCX_ALIGN(0x80000000UL)),
+	expect_ptr_null(mallocx(0x80000000UL, MALLOCX_ALIGN(0x80000000UL)),
 	    "Expected OOM for mallocx()");
 #endif
 }
@@ -166,28 +166,28 @@ TEST_BEGIN(test_basic) {
 		size_t nsz, rsz;
 		void *p;
 		nsz = nallocx(sz, 0);
-		assert_zu_ne(nsz, 0, "Unexpected nallocx() error");
+		expect_zu_ne(nsz, 0, "Unexpected nallocx() error");
 		p = mallocx(sz, 0);
-		assert_ptr_not_null(p,
+		expect_ptr_not_null(p,
 		    "Unexpected mallocx(size=%zx, flags=0) error", sz);
 		rsz = sallocx(p, 0);
-		assert_zu_ge(rsz, sz, "Real size smaller than expected");
-		assert_zu_eq(nsz, rsz, "nallocx()/sallocx() size mismatch");
+		expect_zu_ge(rsz, sz, "Real size smaller than expected");
+		expect_zu_eq(nsz, rsz, "nallocx()/sallocx() size mismatch");
 		dallocx(p, 0);
 
 		p = mallocx(sz, 0);
-		assert_ptr_not_null(p,
+		expect_ptr_not_null(p,
 		    "Unexpected mallocx(size=%zx, flags=0) error", sz);
 		dallocx(p, 0);
 
 		nsz = nallocx(sz, MALLOCX_ZERO);
-		assert_zu_ne(nsz, 0, "Unexpected nallocx() error");
+		expect_zu_ne(nsz, 0, "Unexpected nallocx() error");
 		p = mallocx(sz, MALLOCX_ZERO);
-		assert_ptr_not_null(p,
+		expect_ptr_not_null(p,
 		    "Unexpected mallocx(size=%zx, flags=MALLOCX_ZERO) error",
 		    nsz);
 		rsz = sallocx(p, 0);
-		assert_zu_eq(nsz, rsz, "nallocx()/sallocx() rsize mismatch");
+		expect_zu_eq(nsz, rsz, "nallocx()/sallocx() rsize mismatch");
 		dallocx(p, 0);
 		purge();
 	}
@@ -224,22 +224,22 @@ TEST_BEGIN(test_alignment_and_size) {
 			for (i = 0; i < NITER; i++) {
 				nsz = nallocx(sz, MALLOCX_ALIGN(alignment) |
 				    MALLOCX_ZERO | MALLOCX_ARENA(0));
-				assert_zu_ne(nsz, 0,
+				expect_zu_ne(nsz, 0,
 				    "nallocx() error for alignment=%zu, "
 				    "size=%zu (%#zx)", alignment, sz, sz);
 				ps[i] = mallocx(sz, MALLOCX_ALIGN(alignment) |
 				    MALLOCX_ZERO | MALLOCX_ARENA(0));
-				assert_ptr_not_null(ps[i],
+				expect_ptr_not_null(ps[i],
 				    "mallocx() error for alignment=%zu, "
 				    "size=%zu (%#zx)", alignment, sz, sz);
 				rsz = sallocx(ps[i], 0);
-				assert_zu_ge(rsz, sz,
+				expect_zu_ge(rsz, sz,
 				    "Real size smaller than expected for "
 				    "alignment=%zu, size=%zu", alignment, sz);
-				assert_zu_eq(nsz, rsz,
+				expect_zu_eq(nsz, rsz,
 				    "nallocx()/sallocx() size mismatch for "
 				    "alignment=%zu, size=%zu", alignment, sz);
-				assert_ptr_null(
+				expect_ptr_null(
 				    (void *)((uintptr_t)ps[i] & (alignment-1)),
 				    "%p inadequately aligned for"
 				    " alignment=%zu, size=%zu", ps[i],
diff --git a/test/integration/overflow.c b/test/integration/overflow.c
index 748ebb67..ce63327c 100644
--- a/test/integration/overflow.c
+++ b/test/integration/overflow.c
@@ -17,33 +17,33 @@ TEST_BEGIN(test_overflow) {
 	void *p;
 
 	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.nlextents", (void *)&nlextents, &sz, NULL,
+	expect_d_eq(mallctl("arenas.nlextents", (void *)&nlextents, &sz, NULL,
 	    0), 0, "Unexpected mallctl() error");
 
 	miblen = sizeof(mib) / sizeof(size_t);
-	assert_d_eq(mallctlnametomib("arenas.lextent.0.size", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("arenas.lextent.0.size", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() error");
 	mib[2] = nlextents - 1;
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&max_size_class, &sz,
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&max_size_class, &sz,
 	    NULL, 0), 0, "Unexpected mallctlbymib() error");
 
-	assert_ptr_null(malloc(max_size_class + 1),
+	expect_ptr_null(malloc(max_size_class + 1),
 	    "Expected OOM due to over-sized allocation request");
-	assert_ptr_null(malloc(SIZE_T_MAX),
+	expect_ptr_null(malloc(SIZE_T_MAX),
 	    "Expected OOM due to over-sized allocation request");
 
-	assert_ptr_null(calloc(1, max_size_class + 1),
+	expect_ptr_null(calloc(1, max_size_class + 1),
 	    "Expected OOM due to over-sized allocation request");
-	assert_ptr_null(calloc(1, SIZE_T_MAX),
+	expect_ptr_null(calloc(1, SIZE_T_MAX),
 	    "Expected OOM due to over-sized allocation request");
 
 	p = malloc(1);
-	assert_ptr_not_null(p, "Unexpected malloc() OOM");
-	assert_ptr_null(realloc(p, max_size_class + 1),
+	expect_ptr_not_null(p, "Unexpected malloc() OOM");
+	expect_ptr_null(realloc(p, max_size_class + 1),
 	    "Expected OOM due to over-sized allocation request");
-	assert_ptr_null(realloc(p, SIZE_T_MAX),
+	expect_ptr_null(realloc(p, SIZE_T_MAX),
 	    "Expected OOM due to over-sized allocation request");
 	free(p);
 }
diff --git a/test/integration/posix_memalign.c b/test/integration/posix_memalign.c
index d992260a..6f8a1b03 100644
--- a/test/integration/posix_memalign.c
+++ b/test/integration/posix_memalign.c
@@ -9,7 +9,7 @@
  */
 static void
 purge(void) {
-	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl error");
 }
 
@@ -18,14 +18,14 @@ TEST_BEGIN(test_alignment_errors) {
 	void *p;
 
 	for (alignment = 0; alignment < sizeof(void *); alignment++) {
-		assert_d_eq(posix_memalign(&p, alignment, 1), EINVAL,
+		expect_d_eq(posix_memalign(&p, alignment, 1), EINVAL,
 		    "Expected error for invalid alignment %zu",
 		    alignment);
 	}
 
 	for (alignment = sizeof(size_t); alignment < MAXALIGN;
 	    alignment <<= 1) {
-		assert_d_ne(posix_memalign(&p, alignment + 1, 1), 0,
+		expect_d_ne(posix_memalign(&p, alignment + 1, 1), 0,
 		    "Expected error for invalid alignment %zu",
 		    alignment + 1);
 	}
@@ -43,7 +43,7 @@ TEST_BEGIN(test_oom_errors) {
 	alignment = 0x80000000LU;
 	size      = 0x80000000LU;
 #endif
-	assert_d_ne(posix_memalign(&p, alignment, size), 0,
+	expect_d_ne(posix_memalign(&p, alignment, size), 0,
 	    "Expected error for posix_memalign(&p, %zu, %zu)",
 	    alignment, size);
 
@@ -54,7 +54,7 @@ TEST_BEGIN(test_oom_errors) {
 	alignment = 0x40000000LU;
 	size      = 0xc0000001LU;
 #endif
-	assert_d_ne(posix_memalign(&p, alignment, size), 0,
+	expect_d_ne(posix_memalign(&p, alignment, size), 0,
 	    "Expected error for posix_memalign(&p, %zu, %zu)",
 	    alignment, size);
 
@@ -64,7 +64,7 @@ TEST_BEGIN(test_oom_errors) {
 #else
 	size = 0xfffffff0LU;
 #endif
-	assert_d_ne(posix_memalign(&p, alignment, size), 0,
+	expect_d_ne(posix_memalign(&p, alignment, size), 0,
 	    "Expected error for posix_memalign(&p, %zu, %zu)",
 	    alignment, size);
 }
diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c
index 08ed08d3..6cc4437d 100644
--- a/test/integration/rallocx.c
+++ b/test/integration/rallocx.c
@@ -6,7 +6,7 @@ get_nsizes_impl(const char *cmd) {
 	size_t z;
 
 	z = sizeof(unsigned);
-	assert_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
+	expect_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
 	    "Unexpected mallctl(\"%s\", ...) failure", cmd);
 
 	return ret;
@@ -25,11 +25,11 @@ get_size_impl(const char *cmd, size_t ind) {
 	size_t miblen = 4;
 
 	z = sizeof(size_t);
-	assert_d_eq(mallctlnametomib(cmd, mib, &miblen),
+	expect_d_eq(mallctlnametomib(cmd, mib, &miblen),
 	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
 	mib[2] = ind;
 	z = sizeof(size_t);
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
 	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
 
 	return ret;
@@ -50,28 +50,28 @@ TEST_BEGIN(test_grow_and_shrink) {
 #define MAXSZ ZU(12 * 1024 * 1024)
 
 	p = mallocx(1, 0);
-	assert_ptr_not_null(p, "Unexpected mallocx() error");
+	expect_ptr_not_null(p, "Unexpected mallocx() error");
 	szs[0] = sallocx(p, 0);
 
 	for (i = 0; i < NCYCLES; i++) {
 		for (j = 1; j < NSZS && szs[j-1] < MAXSZ; j++) {
 			q = rallocx(p, szs[j-1]+1, 0);
-			assert_ptr_not_null(q,
+			expect_ptr_not_null(q,
 			    "Unexpected rallocx() error for size=%zu-->%zu",
 			    szs[j-1], szs[j-1]+1);
 			szs[j] = sallocx(q, 0);
-			assert_zu_ne(szs[j], szs[j-1]+1,
+			expect_zu_ne(szs[j], szs[j-1]+1,
 			    "Expected size to be at least: %zu", szs[j-1]+1);
 			p = q;
 		}
 
 		for (j--; j > 0; j--) {
 			q = rallocx(p, szs[j-1], 0);
-			assert_ptr_not_null(q,
+			expect_ptr_not_null(q,
 			    "Unexpected rallocx() error for size=%zu-->%zu",
 			    szs[j], szs[j-1]);
 			tsz = sallocx(q, 0);
-			assert_zu_eq(tsz, szs[j-1],
+			expect_zu_eq(tsz, szs[j-1],
 			    "Expected size=%zu, got size=%zu", szs[j-1], tsz);
 			p = q;
 		}
@@ -113,23 +113,23 @@ TEST_BEGIN(test_zero) {
 	for (i = 0; i < sizeof(start_sizes)/sizeof(size_t); i++) {
 		size_t start_size = start_sizes[i];
 		p = mallocx(start_size, MALLOCX_ZERO);
-		assert_ptr_not_null(p, "Unexpected mallocx() error");
+		expect_ptr_not_null(p, "Unexpected mallocx() error");
 		psz = sallocx(p, 0);
 
-		assert_false(validate_fill(p, 0, 0, psz),
+		expect_false(validate_fill(p, 0, 0, psz),
 		    "Expected zeroed memory");
 		memset(p, FILL_BYTE, psz);
-		assert_false(validate_fill(p, FILL_BYTE, 0, psz),
+		expect_false(validate_fill(p, FILL_BYTE, 0, psz),
 		    "Expected filled memory");
 
 		for (j = 1; j < RANGE; j++) {
 			q = rallocx(p, start_size+j, MALLOCX_ZERO);
-			assert_ptr_not_null(q, "Unexpected rallocx() error");
+			expect_ptr_not_null(q, "Unexpected rallocx() error");
 			qsz = sallocx(q, 0);
 			if (q != p || qsz != psz) {
-				assert_false(validate_fill(q, FILL_BYTE, 0,
+				expect_false(validate_fill(q, FILL_BYTE, 0,
 				    psz), "Expected filled memory");
-				assert_false(validate_fill(q, 0, psz, qsz-psz),
+				expect_false(validate_fill(q, 0, psz, qsz-psz),
 				    "Expected zeroed memory");
 			}
 			if (psz != qsz) {
@@ -139,7 +139,7 @@ TEST_BEGIN(test_zero) {
 			}
 			p = q;
 		}
-		assert_false(validate_fill(p, FILL_BYTE, 0, psz),
+		expect_false(validate_fill(p, FILL_BYTE, 0, psz),
 		    "Expected filled memory");
 		dallocx(p, 0);
 	}
@@ -154,13 +154,13 @@ TEST_BEGIN(test_align) {
 
 	align = ZU(1);
 	p = mallocx(1, MALLOCX_ALIGN(align));
-	assert_ptr_not_null(p, "Unexpected mallocx() error");
+	expect_ptr_not_null(p, "Unexpected mallocx() error");
 
 	for (align <<= 1; align <= MAX_ALIGN; align <<= 1) {
 		q = rallocx(p, 1, MALLOCX_ALIGN(align));
-		assert_ptr_not_null(q,
+		expect_ptr_not_null(q,
 		    "Unexpected rallocx() error for align=%zu", align);
-		assert_ptr_null(
+		expect_ptr_null(
 		    (void *)((uintptr_t)q & (align-1)),
 		    "%p inadequately aligned for align=%zu",
 		    q, align);
@@ -180,23 +180,23 @@ TEST_BEGIN(test_lg_align_and_zero) {
 
 	lg_align = 0;
 	p = mallocx(1, MALLOCX_LG_ALIGN(lg_align)|MALLOCX_ZERO);
-	assert_ptr_not_null(p, "Unexpected mallocx() error");
+	expect_ptr_not_null(p, "Unexpected mallocx() error");
 
 	for (lg_align++; lg_align <= MAX_LG_ALIGN; lg_align++) {
 		q = rallocx(p, 1, MALLOCX_LG_ALIGN(lg_align)|MALLOCX_ZERO);
-		assert_ptr_not_null(q,
+		expect_ptr_not_null(q,
 		    "Unexpected rallocx() error for lg_align=%u", lg_align);
-		assert_ptr_null(
+		expect_ptr_null(
 		    (void *)((uintptr_t)q & ((ZU(1) << lg_align)-1)),
 		    "%p inadequately aligned for lg_align=%u", q, lg_align);
 		sz = sallocx(q, 0);
 		if ((sz << 1) <= MAX_VALIDATE) {
-			assert_false(validate_fill(q, 0, 0, sz),
+			expect_false(validate_fill(q, 0, 0, sz),
 			    "Expected zeroed memory");
 		} else {
-			assert_false(validate_fill(q, 0, 0, MAX_VALIDATE),
+			expect_false(validate_fill(q, 0, 0, MAX_VALIDATE),
 			    "Expected zeroed memory");
-			assert_false(validate_fill(
+			expect_false(validate_fill(
 			    (void *)((uintptr_t)q+sz-MAX_VALIDATE),
 			    0, 0, MAX_VALIDATE), "Expected zeroed memory");
 		}
@@ -225,18 +225,18 @@ TEST_BEGIN(test_overflow) {
 	largemax = get_large_size(get_nlarge()-1);
 
 	p = mallocx(1, 0);
-	assert_ptr_not_null(p, "Unexpected mallocx() failure");
+	expect_ptr_not_null(p, "Unexpected mallocx() failure");
 
-	assert_ptr_null(rallocx(p, largemax+1, 0),
+	expect_ptr_null(rallocx(p, largemax+1, 0),
 	    "Expected OOM for rallocx(p, size=%#zx, 0)", largemax+1);
 
-	assert_ptr_null(rallocx(p, ZU(PTRDIFF_MAX)+1, 0),
+	expect_ptr_null(rallocx(p, ZU(PTRDIFF_MAX)+1, 0),
 	    "Expected OOM for rallocx(p, size=%#zx, 0)", ZU(PTRDIFF_MAX)+1);
 
-	assert_ptr_null(rallocx(p, SIZE_T_MAX, 0),
+	expect_ptr_null(rallocx(p, SIZE_T_MAX, 0),
 	    "Expected OOM for rallocx(p, size=%#zx, 0)", SIZE_T_MAX);
 
-	assert_ptr_null(rallocx(p, 1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)),
+	expect_ptr_null(rallocx(p, 1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)),
 	    "Expected OOM for rallocx(p, size=1, MALLOCX_ALIGN(%#zx))",
 	    ZU(PTRDIFF_MAX)+1);
 
diff --git a/test/integration/slab_sizes.c b/test/integration/slab_sizes.c
index af250c3f..f6a66f21 100644
--- a/test/integration/slab_sizes.c
+++ b/test/integration/slab_sizes.c
@@ -10,19 +10,19 @@ TEST_BEGIN(test_slab_sizes) {
 	size_t len;
 
 	len = sizeof(nbins);
-	assert_d_eq(mallctl("arenas.nbins", &nbins, &len, NULL, 0), 0,
+	expect_d_eq(mallctl("arenas.nbins", &nbins, &len, NULL, 0), 0,
 	    "nbins mallctl failure");
 
 	len = sizeof(page);
-	assert_d_eq(mallctl("arenas.page", &page, &len, NULL, 0), 0,
+	expect_d_eq(mallctl("arenas.page", &page, &len, NULL, 0), 0,
 	    "page mallctl failure");
 
 	len = 4;
-	assert_d_eq(mallctlnametomib("arenas.bin.0.size", sizemib, &len), 0,
+	expect_d_eq(mallctlnametomib("arenas.bin.0.size", sizemib, &len), 0,
 	    "bin size mallctlnametomib failure");
 
 	len = 4;
-	assert_d_eq(mallctlnametomib("arenas.bin.0.slab_size", slabmib, &len),
+	expect_d_eq(mallctlnametomib("arenas.bin.0.slab_size", slabmib, &len),
 	    0, "slab size mallctlnametomib failure");
 
 	size_t biggest_slab_seen = 0;
@@ -33,11 +33,11 @@ TEST_BEGIN(test_slab_sizes) {
 		len = sizeof(size_t);
 		sizemib[2] = i;
 		slabmib[2] = i;
-		assert_d_eq(mallctlbymib(sizemib, 4, (void *)&bin_size, &len,
+		expect_d_eq(mallctlbymib(sizemib, 4, (void *)&bin_size, &len,
 		    NULL, 0), 0, "bin size mallctlbymib failure");
 
 		len = sizeof(size_t);
-		assert_d_eq(mallctlbymib(slabmib, 4, (void *)&slab_size, &len,
+		expect_d_eq(mallctlbymib(slabmib, 4, (void *)&slab_size, &len,
 		    NULL, 0), 0, "slab size mallctlbymib failure");
 
 		if (bin_size < 100) {
@@ -48,19 +48,19 @@ TEST_BEGIN(test_slab_sizes) {
 			 * should at least make sure that the number of pages
 			 * goes up.
 			 */
-			assert_zu_ge(slab_size, biggest_slab_seen,
+			expect_zu_ge(slab_size, biggest_slab_seen,
 			    "Slab sizes should go up");
 			biggest_slab_seen = slab_size;
 		} else if (
 		    (100 <= bin_size && bin_size < 128)
 		    || (128 < bin_size && bin_size <= 200)) {
-			assert_zu_eq(slab_size, page,
+			expect_zu_eq(slab_size, page,
 			    "Forced-small slabs should be small");
 		} else if (bin_size == 128) {
-			assert_zu_eq(slab_size, 2 * page,
+			expect_zu_eq(slab_size, 2 * page,
 			    "Forced-2-page slab should be 2 pages");
 		} else if (200 < bin_size && bin_size <= 4096) {
-			assert_zu_ge(slab_size, biggest_slab_seen,
+			expect_zu_ge(slab_size, biggest_slab_seen,
 			    "Slab sizes should go up");
 			biggest_slab_seen = slab_size;
 		}
@@ -69,7 +69,7 @@ TEST_BEGIN(test_slab_sizes) {
 	 * For any reasonable configuration, 17 pages should be a valid slab
 	 * size for 4096-byte items.
 	 */
-	assert_zu_eq(biggest_slab_seen, 17 * page, "Didn't hit page target");
+	expect_zu_eq(biggest_slab_seen, 17 * page, "Didn't hit page target");
 }
 TEST_END
 
diff --git a/test/integration/smallocx.c b/test/integration/smallocx.c
index 2486752b..389319b7 100644
--- a/test/integration/smallocx.c
+++ b/test/integration/smallocx.c
@@ -26,7 +26,7 @@ get_nsizes_impl(const char *cmd) {
 	size_t z;
 
 	z = sizeof(unsigned);
-	assert_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
+	expect_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
 	    "Unexpected mallctl(\"%s\", ...) failure", cmd);
 
 	return ret;
@@ -45,11 +45,11 @@ get_size_impl(const char *cmd, size_t ind) {
 	size_t miblen = 4;
 
 	z = sizeof(size_t);
-	assert_d_eq(mallctlnametomib(cmd, mib, &miblen),
+	expect_d_eq(mallctlnametomib(cmd, mib, &miblen),
 	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
 	mib[2] = ind;
 	z = sizeof(size_t);
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
 	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
 
 	return ret;
@@ -67,7 +67,7 @@ get_large_size(size_t ind) {
  */
 static void
 purge(void) {
-	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl error");
 }
 
@@ -86,16 +86,16 @@ TEST_BEGIN(test_overflow) {
 
 	largemax = get_large_size(get_nlarge()-1);
 
-	assert_ptr_null(smallocx(largemax+1, 0).ptr,
+	expect_ptr_null(smallocx(largemax+1, 0).ptr,
 	    "Expected OOM for smallocx(size=%#zx, 0)", largemax+1);
 
-	assert_ptr_null(smallocx(ZU(PTRDIFF_MAX)+1, 0).ptr,
+	expect_ptr_null(smallocx(ZU(PTRDIFF_MAX)+1, 0).ptr,
 	    "Expected OOM for smallocx(size=%#zx, 0)", ZU(PTRDIFF_MAX)+1);
 
-	assert_ptr_null(smallocx(SIZE_T_MAX, 0).ptr,
+	expect_ptr_null(smallocx(SIZE_T_MAX, 0).ptr,
 	    "Expected OOM for smallocx(size=%#zx, 0)", SIZE_T_MAX);
 
-	assert_ptr_null(smallocx(1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)).ptr,
+	expect_ptr_null(smallocx(1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)).ptr,
 	    "Expected OOM for smallocx(size=1, MALLOCX_ALIGN(%#zx))",
 	    ZU(PTRDIFF_MAX)+1);
 }
@@ -105,17 +105,17 @@ static void *
 remote_alloc(void *arg) {
 	unsigned arena;
 	size_t sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
+	expect_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 	size_t large_sz;
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&large_sz, &sz,
+	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&large_sz, &sz,
 	    NULL, 0), 0, "Unexpected mallctl failure");
 
 	smallocx_return_t r
 	    = smallocx(large_sz, MALLOCX_ARENA(arena) | MALLOCX_TCACHE_NONE);
 	void *ptr = r.ptr;
-	assert_zu_eq(r.size,
+	expect_zu_eq(r.size,
 	    nallocx(large_sz, MALLOCX_ARENA(arena) | MALLOCX_TCACHE_NONE),
 	    "Expected smalloc(size,flags).size == nallocx(size,flags)");
 	void **ret = (void **)arg;
@@ -129,7 +129,7 @@ TEST_BEGIN(test_remote_free) {
 	void *ret;
 	thd_create(&thd, remote_alloc, (void *)&ret);
 	thd_join(thd, NULL);
-	assert_ptr_not_null(ret, "Unexpected smallocx failure");
+	expect_ptr_not_null(ret, "Unexpected smallocx failure");
 
 	/* Avoid TCACHE_NONE to explicitly test tcache_flush(). */
 	dallocx(ret, 0);
@@ -155,7 +155,7 @@ TEST_BEGIN(test_oom) {
 			oom = true;
 		}
 	}
-	assert_true(oom,
+	expect_true(oom,
 	    "Expected OOM during series of calls to smallocx(size=%zu, 0)",
 	    largemax);
 	for (i = 0; i < sizeof(ptrs) / sizeof(void *); i++) {
@@ -166,14 +166,14 @@ TEST_BEGIN(test_oom) {
 	purge();
 
 #if LG_SIZEOF_PTR == 3
-	assert_ptr_null(smallocx(0x8000000000000000ULL,
+	expect_ptr_null(smallocx(0x8000000000000000ULL,
 	    MALLOCX_ALIGN(0x8000000000000000ULL)).ptr,
 	    "Expected OOM for smallocx()");
-	assert_ptr_null(smallocx(0x8000000000000000ULL,
+	expect_ptr_null(smallocx(0x8000000000000000ULL,
 	    MALLOCX_ALIGN(0x80000000)).ptr,
 	    "Expected OOM for smallocx()");
 #else
-	assert_ptr_null(smallocx(0x80000000UL, MALLOCX_ALIGN(0x80000000UL)).ptr,
+	expect_ptr_null(smallocx(0x80000000UL, MALLOCX_ALIGN(0x80000000UL)).ptr,
 	    "Expected OOM for smallocx()");
 #endif
 }
@@ -191,36 +191,36 @@ TEST_BEGIN(test_basic) {
 		size_t nsz, rsz, smz;
 		void *p;
 		nsz = nallocx(sz, 0);
-		assert_zu_ne(nsz, 0, "Unexpected nallocx() error");
+		expect_zu_ne(nsz, 0, "Unexpected nallocx() error");
 		ret = smallocx(sz, 0);
 		p = ret.ptr;
 		smz = ret.size;
-		assert_ptr_not_null(p,
+		expect_ptr_not_null(p,
 		    "Unexpected smallocx(size=%zx, flags=0) error", sz);
 		rsz = sallocx(p, 0);
-		assert_zu_ge(rsz, sz, "Real size smaller than expected");
-		assert_zu_eq(nsz, rsz, "nallocx()/sallocx() size mismatch");
-		assert_zu_eq(nsz, smz, "nallocx()/smallocx() size mismatch");
+		expect_zu_ge(rsz, sz, "Real size smaller than expected");
+		expect_zu_eq(nsz, rsz, "nallocx()/sallocx() size mismatch");
+		expect_zu_eq(nsz, smz, "nallocx()/smallocx() size mismatch");
 		dallocx(p, 0);
 
 		ret = smallocx(sz, 0);
 		p = ret.ptr;
 		smz = ret.size;
-		assert_ptr_not_null(p,
+		expect_ptr_not_null(p,
 		    "Unexpected smallocx(size=%zx, flags=0) error", sz);
 		dallocx(p, 0);
 
 		nsz = nallocx(sz, MALLOCX_ZERO);
-		assert_zu_ne(nsz, 0, "Unexpected nallocx() error");
-		assert_zu_ne(smz, 0, "Unexpected smallocx() error");
+		expect_zu_ne(nsz, 0, "Unexpected nallocx() error");
+		expect_zu_ne(smz, 0, "Unexpected smallocx() error");
 		ret = smallocx(sz, MALLOCX_ZERO);
 		p = ret.ptr;
-		assert_ptr_not_null(p,
+		expect_ptr_not_null(p,
 		    "Unexpected smallocx(size=%zx, flags=MALLOCX_ZERO) error",
 		    nsz);
 		rsz = sallocx(p, 0);
-		assert_zu_eq(nsz, rsz, "nallocx()/sallocx() rsize mismatch");
-		assert_zu_eq(nsz, smz, "nallocx()/smallocx() size mismatch");
+		expect_zu_eq(nsz, rsz, "nallocx()/sallocx() rsize mismatch");
+		expect_zu_eq(nsz, smz, "nallocx()/smallocx() size mismatch");
 		dallocx(p, 0);
 		purge();
 	}
@@ -257,27 +257,27 @@ TEST_BEGIN(test_alignment_and_size) {
 			for (i = 0; i < NITER; i++) {
 				nsz = nallocx(sz, MALLOCX_ALIGN(alignment) |
 				    MALLOCX_ZERO);
-				assert_zu_ne(nsz, 0,
+				expect_zu_ne(nsz, 0,
 				    "nallocx() error for alignment=%zu, "
 				    "size=%zu (%#zx)", alignment, sz, sz);
 				smallocx_return_t ret
 				    = smallocx(sz, MALLOCX_ALIGN(alignment) | MALLOCX_ZERO);
 				ps[i] = ret.ptr;
-				assert_ptr_not_null(ps[i],
+				expect_ptr_not_null(ps[i],
 				    "smallocx() error for alignment=%zu, "
 				    "size=%zu (%#zx)", alignment, sz, sz);
 				rsz = sallocx(ps[i], 0);
 				smz = ret.size;
-				assert_zu_ge(rsz, sz,
+				expect_zu_ge(rsz, sz,
 				    "Real size smaller than expected for "
 				    "alignment=%zu, size=%zu", alignment, sz);
-				assert_zu_eq(nsz, rsz,
+				expect_zu_eq(nsz, rsz,
 				    "nallocx()/sallocx() size mismatch for "
 				    "alignment=%zu, size=%zu", alignment, sz);
-				assert_zu_eq(nsz, smz,
+				expect_zu_eq(nsz, smz,
 				    "nallocx()/smallocx() size mismatch for "
 				    "alignment=%zu, size=%zu", alignment, sz);
-				assert_ptr_null(
+				expect_ptr_null(
 				    (void *)((uintptr_t)ps[i] & (alignment-1)),
 				    "%p inadequately aligned for"
 				    " alignment=%zu, size=%zu", ps[i],
diff --git a/test/integration/thread_arena.c b/test/integration/thread_arena.c
index 1e5ec05d..4a6abf64 100644
--- a/test/integration/thread_arena.c
+++ b/test/integration/thread_arena.c
@@ -11,7 +11,7 @@ thd_start(void *arg) {
 	int err;
 
 	p = malloc(1);
-	assert_ptr_not_null(p, "Error in malloc()");
+	expect_ptr_not_null(p, "Error in malloc()");
 	free(p);
 
 	size = sizeof(arena_ind);
@@ -31,7 +31,7 @@ thd_start(void *arg) {
 		buferror(err, buf, sizeof(buf));
 		test_fail("Error in mallctl(): %s", buf);
 	}
-	assert_u_eq(arena_ind, main_arena_ind,
+	expect_u_eq(arena_ind, main_arena_ind,
 	    "Arena index should be same as for main thread");
 
 	return NULL;
@@ -52,11 +52,11 @@ TEST_BEGIN(test_thread_arena) {
 	unsigned i;
 
 	p = malloc(1);
-	assert_ptr_not_null(p, "Error in malloc()");
+	expect_ptr_not_null(p, "Error in malloc()");
 
 	unsigned arena_ind, old_arena_ind;
 	size_t sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
 	    0, "Arena creation failure");
 
 	size_t size = sizeof(arena_ind);
@@ -73,7 +73,7 @@ TEST_BEGIN(test_thread_arena) {
 	for (i = 0; i < NTHREADS; i++) {
 		intptr_t join_ret;
 		thd_join(thds[i], (void *)&join_ret);
-		assert_zd_eq(join_ret, 0, "Unexpected thread join error");
+		expect_zd_eq(join_ret, 0, "Unexpected thread join error");
 	}
 	free(p);
 }
diff --git a/test/integration/thread_tcache_enabled.c b/test/integration/thread_tcache_enabled.c
index 95c9acc1..d44dbe90 100644
--- a/test/integration/thread_tcache_enabled.c
+++ b/test/integration/thread_tcache_enabled.c
@@ -4,59 +4,59 @@ void *
 thd_start(void *arg) {
 	bool e0, e1;
 	size_t sz = sizeof(bool);
-	assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz, NULL,
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz, NULL,
 	    0), 0, "Unexpected mallctl failure");
 
 	if (e0) {
 		e1 = false;
-		assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+		expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
 		    (void *)&e1, sz), 0, "Unexpected mallctl() error");
-		assert_true(e0, "tcache should be enabled");
+		expect_true(e0, "tcache should be enabled");
 	}
 
 	e1 = true;
-	assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
 	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
-	assert_false(e0, "tcache should be disabled");
+	expect_false(e0, "tcache should be disabled");
 
 	e1 = true;
-	assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
 	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
-	assert_true(e0, "tcache should be enabled");
+	expect_true(e0, "tcache should be enabled");
 
 	e1 = false;
-	assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
 	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
-	assert_true(e0, "tcache should be enabled");
+	expect_true(e0, "tcache should be enabled");
 
 	e1 = false;
-	assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
 	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
-	assert_false(e0, "tcache should be disabled");
+	expect_false(e0, "tcache should be disabled");
 
 	free(malloc(1));
 	e1 = true;
-	assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
 	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
-	assert_false(e0, "tcache should be disabled");
+	expect_false(e0, "tcache should be disabled");
 
 	free(malloc(1));
 	e1 = true;
-	assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
 	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
-	assert_true(e0, "tcache should be enabled");
+	expect_true(e0, "tcache should be enabled");
 
 	free(malloc(1));
 	e1 = false;
-	assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
 	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
-	assert_true(e0, "tcache should be enabled");
+	expect_true(e0, "tcache should be enabled");
 
 	free(malloc(1));
 	e1 = false;
-	assert_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
 	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
-	assert_false(e0, "tcache should be disabled");
+	expect_false(e0, "tcache should be disabled");
 
 	free(malloc(1));
 	return NULL;
diff --git a/test/integration/xallocx.c b/test/integration/xallocx.c
index cd0ca048..13708548 100644
--- a/test/integration/xallocx.c
+++ b/test/integration/xallocx.c
@@ -11,7 +11,7 @@ arena_ind(void) {
 
 	if (ind == 0) {
 		size_t sz = sizeof(ind);
-		assert_d_eq(mallctl("arenas.create", (void *)&ind, &sz, NULL,
+		expect_d_eq(mallctl("arenas.create", (void *)&ind, &sz, NULL,
 		    0), 0, "Unexpected mallctl failure creating arena");
 	}
 
@@ -23,11 +23,11 @@ TEST_BEGIN(test_same_size) {
 	size_t sz, tsz;
 
 	p = mallocx(42, 0);
-	assert_ptr_not_null(p, "Unexpected mallocx() error");
+	expect_ptr_not_null(p, "Unexpected mallocx() error");
 	sz = sallocx(p, 0);
 
 	tsz = xallocx(p, sz, 0, 0);
-	assert_zu_eq(tsz, sz, "Unexpected size change: %zu --> %zu", sz, tsz);
+	expect_zu_eq(tsz, sz, "Unexpected size change: %zu --> %zu", sz, tsz);
 
 	dallocx(p, 0);
 }
@@ -38,11 +38,11 @@ TEST_BEGIN(test_extra_no_move) {
 	size_t sz, tsz;
 
 	p = mallocx(42, 0);
-	assert_ptr_not_null(p, "Unexpected mallocx() error");
+	expect_ptr_not_null(p, "Unexpected mallocx() error");
 	sz = sallocx(p, 0);
 
 	tsz = xallocx(p, sz, sz-42, 0);
-	assert_zu_eq(tsz, sz, "Unexpected size change: %zu --> %zu", sz, tsz);
+	expect_zu_eq(tsz, sz, "Unexpected size change: %zu --> %zu", sz, tsz);
 
 	dallocx(p, 0);
 }
@@ -53,11 +53,11 @@ TEST_BEGIN(test_no_move_fail) {
 	size_t sz, tsz;
 
 	p = mallocx(42, 0);
-	assert_ptr_not_null(p, "Unexpected mallocx() error");
+	expect_ptr_not_null(p, "Unexpected mallocx() error");
 	sz = sallocx(p, 0);
 
 	tsz = xallocx(p, sz + 5, 0, 0);
-	assert_zu_eq(tsz, sz, "Unexpected size change: %zu --> %zu", sz, tsz);
+	expect_zu_eq(tsz, sz, "Unexpected size change: %zu --> %zu", sz, tsz);
 
 	dallocx(p, 0);
 }
@@ -69,7 +69,7 @@ get_nsizes_impl(const char *cmd) {
 	size_t z;
 
 	z = sizeof(unsigned);
-	assert_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
+	expect_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
 	    "Unexpected mallctl(\"%s\", ...) failure", cmd);
 
 	return ret;
@@ -93,11 +93,11 @@ get_size_impl(const char *cmd, size_t ind) {
 	size_t miblen = 4;
 
 	z = sizeof(size_t);
-	assert_d_eq(mallctlnametomib(cmd, mib, &miblen),
+	expect_d_eq(mallctlnametomib(cmd, mib, &miblen),
 	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
 	mib[2] = ind;
 	z = sizeof(size_t);
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
 	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
 
 	return ret;
@@ -122,20 +122,20 @@ TEST_BEGIN(test_size) {
 	largemax = get_large_size(get_nlarge()-1);
 
 	p = mallocx(small0, 0);
-	assert_ptr_not_null(p, "Unexpected mallocx() error");
+	expect_ptr_not_null(p, "Unexpected mallocx() error");
 
 	/* Test smallest supported size. */
-	assert_zu_eq(xallocx(p, 1, 0, 0), small0,
+	expect_zu_eq(xallocx(p, 1, 0, 0), small0,
 	    "Unexpected xallocx() behavior");
 
 	/* Test largest supported size. */
-	assert_zu_le(xallocx(p, largemax, 0, 0), largemax,
+	expect_zu_le(xallocx(p, largemax, 0, 0), largemax,
 	    "Unexpected xallocx() behavior");
 
 	/* Test size overflow. */
-	assert_zu_le(xallocx(p, largemax+1, 0, 0), largemax,
+	expect_zu_le(xallocx(p, largemax+1, 0, 0), largemax,
 	    "Unexpected xallocx() behavior");
-	assert_zu_le(xallocx(p, SIZE_T_MAX, 0, 0), largemax,
+	expect_zu_le(xallocx(p, SIZE_T_MAX, 0, 0), largemax,
 	    "Unexpected xallocx() behavior");
 
 	dallocx(p, 0);
@@ -151,22 +151,22 @@ TEST_BEGIN(test_size_extra_overflow) {
 	largemax = get_large_size(get_nlarge()-1);
 
 	p = mallocx(small0, 0);
-	assert_ptr_not_null(p, "Unexpected mallocx() error");
+	expect_ptr_not_null(p, "Unexpected mallocx() error");
 
 	/* Test overflows that can be resolved by clamping extra. */
-	assert_zu_le(xallocx(p, largemax-1, 2, 0), largemax,
+	expect_zu_le(xallocx(p, largemax-1, 2, 0), largemax,
 	    "Unexpected xallocx() behavior");
-	assert_zu_le(xallocx(p, largemax, 1, 0), largemax,
+	expect_zu_le(xallocx(p, largemax, 1, 0), largemax,
 	    "Unexpected xallocx() behavior");
 
 	/* Test overflow such that largemax-size underflows. */
-	assert_zu_le(xallocx(p, largemax+1, 2, 0), largemax,
+	expect_zu_le(xallocx(p, largemax+1, 2, 0), largemax,
 	    "Unexpected xallocx() behavior");
-	assert_zu_le(xallocx(p, largemax+2, 3, 0), largemax,
+	expect_zu_le(xallocx(p, largemax+2, 3, 0), largemax,
 	    "Unexpected xallocx() behavior");
-	assert_zu_le(xallocx(p, SIZE_T_MAX-2, 2, 0), largemax,
+	expect_zu_le(xallocx(p, SIZE_T_MAX-2, 2, 0), largemax,
 	    "Unexpected xallocx() behavior");
-	assert_zu_le(xallocx(p, SIZE_T_MAX-1, 1, 0), largemax,
+	expect_zu_le(xallocx(p, SIZE_T_MAX-1, 1, 0), largemax,
 	    "Unexpected xallocx() behavior");
 
 	dallocx(p, 0);
@@ -183,21 +183,21 @@ TEST_BEGIN(test_extra_small) {
 	largemax = get_large_size(get_nlarge()-1);
 
 	p = mallocx(small0, 0);
-	assert_ptr_not_null(p, "Unexpected mallocx() error");
+	expect_ptr_not_null(p, "Unexpected mallocx() error");
 
-	assert_zu_eq(xallocx(p, small1, 0, 0), small0,
+	expect_zu_eq(xallocx(p, small1, 0, 0), small0,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_eq(xallocx(p, small1, 0, 0), small0,
+	expect_zu_eq(xallocx(p, small1, 0, 0), small0,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_eq(xallocx(p, small0, small1 - small0, 0), small0,
+	expect_zu_eq(xallocx(p, small0, small1 - small0, 0), small0,
 	    "Unexpected xallocx() behavior");
 
 	/* Test size+extra overflow. */
-	assert_zu_eq(xallocx(p, small0, largemax - small0 + 1, 0), small0,
+	expect_zu_eq(xallocx(p, small0, largemax - small0 + 1, 0), small0,
 	    "Unexpected xallocx() behavior");
-	assert_zu_eq(xallocx(p, small0, SIZE_T_MAX - small0, 0), small0,
+	expect_zu_eq(xallocx(p, small0, SIZE_T_MAX - small0, 0), small0,
 	    "Unexpected xallocx() behavior");
 
 	dallocx(p, 0);
@@ -217,56 +217,56 @@ TEST_BEGIN(test_extra_large) {
 	largemax = get_large_size(get_nlarge()-1);
 
 	p = mallocx(large3, flags);
-	assert_ptr_not_null(p, "Unexpected mallocx() error");
+	expect_ptr_not_null(p, "Unexpected mallocx() error");
 
-	assert_zu_eq(xallocx(p, large3, 0, flags), large3,
+	expect_zu_eq(xallocx(p, large3, 0, flags), large3,
 	    "Unexpected xallocx() behavior");
 	/* Test size decrease with zero extra. */
-	assert_zu_ge(xallocx(p, large1, 0, flags), large1,
+	expect_zu_ge(xallocx(p, large1, 0, flags), large1,
 	    "Unexpected xallocx() behavior");
-	assert_zu_ge(xallocx(p, smallmax, 0, flags), large1,
+	expect_zu_ge(xallocx(p, smallmax, 0, flags), large1,
 	    "Unexpected xallocx() behavior");
 
 	if (xallocx(p, large3, 0, flags) != large3) {
 		p = rallocx(p, large3, flags);
-		assert_ptr_not_null(p, "Unexpected rallocx() failure");
+		expect_ptr_not_null(p, "Unexpected rallocx() failure");
 	}
 	/* Test size decrease with non-zero extra. */
-	assert_zu_eq(xallocx(p, large1, large3 - large1, flags), large3,
+	expect_zu_eq(xallocx(p, large1, large3 - large1, flags), large3,
 	    "Unexpected xallocx() behavior");
-	assert_zu_eq(xallocx(p, large2, large3 - large2, flags), large3,
+	expect_zu_eq(xallocx(p, large2, large3 - large2, flags), large3,
 	    "Unexpected xallocx() behavior");
-	assert_zu_ge(xallocx(p, large1, large2 - large1, flags), large2,
+	expect_zu_ge(xallocx(p, large1, large2 - large1, flags), large2,
 	    "Unexpected xallocx() behavior");
-	assert_zu_ge(xallocx(p, smallmax, large1 - smallmax, flags), large1,
+	expect_zu_ge(xallocx(p, smallmax, large1 - smallmax, flags), large1,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_ge(xallocx(p, large1, 0, flags), large1,
+	expect_zu_ge(xallocx(p, large1, 0, flags), large1,
 	    "Unexpected xallocx() behavior");
 	/* Test size increase with zero extra. */
-	assert_zu_le(xallocx(p, large3, 0, flags), large3,
+	expect_zu_le(xallocx(p, large3, 0, flags), large3,
 	    "Unexpected xallocx() behavior");
-	assert_zu_le(xallocx(p, largemax+1, 0, flags), large3,
+	expect_zu_le(xallocx(p, largemax+1, 0, flags), large3,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_ge(xallocx(p, large1, 0, flags), large1,
+	expect_zu_ge(xallocx(p, large1, 0, flags), large1,
 	    "Unexpected xallocx() behavior");
 	/* Test size increase with non-zero extra. */
-	assert_zu_le(xallocx(p, large1, SIZE_T_MAX - large1, flags), largemax,
+	expect_zu_le(xallocx(p, large1, SIZE_T_MAX - large1, flags), largemax,
 	    "Unexpected xallocx() behavior");
 
-	assert_zu_ge(xallocx(p, large1, 0, flags), large1,
+	expect_zu_ge(xallocx(p, large1, 0, flags), large1,
 	    "Unexpected xallocx() behavior");
 	/* Test size increase with non-zero extra. */
-	assert_zu_le(xallocx(p, large1, large3 - large1, flags), large3,
+	expect_zu_le(xallocx(p, large1, large3 - large1, flags), large3,
 	    "Unexpected xallocx() behavior");
 
 	if (xallocx(p, large3, 0, flags) != large3) {
 		p = rallocx(p, large3, flags);
-		assert_ptr_not_null(p, "Unexpected rallocx() failure");
+		expect_ptr_not_null(p, "Unexpected rallocx() failure");
 	}
 	/* Test size+extra overflow. */
-	assert_zu_le(xallocx(p, large3, largemax - large3 + 1, flags), largemax,
+	expect_zu_le(xallocx(p, large3, largemax - large3 + 1, flags), largemax,
 	    "Unexpected xallocx() behavior");
 
 	dallocx(p, flags);
@@ -320,8 +320,8 @@ test_zero(size_t szmin, size_t szmax) {
 
 	sz = szmax;
 	p = mallocx(sz, flags);
-	assert_ptr_not_null(p, "Unexpected mallocx() error");
-	assert_false(validate_fill(p, 0x00, 0, sz), "Memory not filled: sz=%zu",
+	expect_ptr_not_null(p, "Unexpected mallocx() error");
+	expect_false(validate_fill(p, 0x00, 0, sz), "Memory not filled: sz=%zu",
 	    sz);
 
 	/*
@@ -329,30 +329,30 @@ test_zero(size_t szmin, size_t szmax) {
 	 * errors.
 	 */
 	memset(p, FILL_BYTE, sz);
-	assert_false(validate_fill(p, FILL_BYTE, 0, sz),
+	expect_false(validate_fill(p, FILL_BYTE, 0, sz),
 	    "Memory not filled: sz=%zu", sz);
 
 	/* Shrink in place so that we can expect growing in place to succeed. */
 	sz = szmin;
 	if (xallocx(p, sz, 0, flags) != sz) {
 		p = rallocx(p, sz, flags);
-		assert_ptr_not_null(p, "Unexpected rallocx() failure");
+		expect_ptr_not_null(p, "Unexpected rallocx() failure");
 	}
-	assert_false(validate_fill(p, FILL_BYTE, 0, sz),
+	expect_false(validate_fill(p, FILL_BYTE, 0, sz),
 	    "Memory not filled: sz=%zu", sz);
 
 	for (sz = szmin; sz < szmax; sz = nsz) {
 		nsz = nallocx(sz+1, flags);
 		if (xallocx(p, sz+1, 0, flags) != nsz) {
 			p = rallocx(p, sz+1, flags);
-			assert_ptr_not_null(p, "Unexpected rallocx() failure");
+			expect_ptr_not_null(p, "Unexpected rallocx() failure");
 		}
-		assert_false(validate_fill(p, FILL_BYTE, 0, sz),
+		expect_false(validate_fill(p, FILL_BYTE, 0, sz),
 		    "Memory not filled: sz=%zu", sz);
-		assert_false(validate_fill(p, 0x00, sz, nsz-sz),
+		expect_false(validate_fill(p, 0x00, sz, nsz-sz),
 		    "Memory not filled: sz=%zu, nsz-sz=%zu", sz, nsz-sz);
 		memset((void *)((uintptr_t)p + sz), FILL_BYTE, nsz-sz);
-		assert_false(validate_fill(p, FILL_BYTE, 0, nsz),
+		expect_false(validate_fill(p, FILL_BYTE, 0, nsz),
 		    "Memory not filled: nsz=%zu", nsz);
 	}
 
diff --git a/test/unit/SFMT.c b/test/unit/SFMT.c
index 1fc8cf1b..b9f85dd9 100644
--- a/test/unit/SFMT.c
+++ b/test/unit/SFMT.c
@@ -1456,7 +1456,7 @@ TEST_BEGIN(test_gen_rand_32) {
 	uint32_t r32;
 	sfmt_t *ctx;
 
-	assert_d_le(get_min_array_size32(), BLOCK_SIZE,
+	expect_d_le(get_min_array_size32(), BLOCK_SIZE,
 	    "Array size too small");
 	ctx = init_gen_rand(1234);
 	fill_array32(ctx, array32, BLOCK_SIZE);
@@ -1466,16 +1466,16 @@ TEST_BEGIN(test_gen_rand_32) {
 	ctx = init_gen_rand(1234);
 	for (i = 0; i < BLOCK_SIZE; i++) {
 		if (i < COUNT_1) {
-			assert_u32_eq(array32[i], init_gen_rand_32_expected[i],
+			expect_u32_eq(array32[i], init_gen_rand_32_expected[i],
 			    "Output mismatch for i=%d", i);
 		}
 		r32 = gen_rand32(ctx);
-		assert_u32_eq(r32, array32[i],
+		expect_u32_eq(r32, array32[i],
 		    "Mismatch at array32[%d]=%x, gen=%x", i, array32[i], r32);
 	}
 	for (i = 0; i < COUNT_2; i++) {
 		r32 = gen_rand32(ctx);
-		assert_u32_eq(r32, array32_2[i],
+		expect_u32_eq(r32, array32_2[i],
 		    "Mismatch at array32_2[%d]=%x, gen=%x", i, array32_2[i],
 		    r32);
 	}
@@ -1491,7 +1491,7 @@ TEST_BEGIN(test_by_array_32) {
 	uint32_t r32;
 	sfmt_t *ctx;
 
-	assert_d_le(get_min_array_size32(), BLOCK_SIZE,
+	expect_d_le(get_min_array_size32(), BLOCK_SIZE,
 	    "Array size too small");
 	ctx = init_by_array(ini, 4);
 	fill_array32(ctx, array32, BLOCK_SIZE);
@@ -1501,16 +1501,16 @@ TEST_BEGIN(test_by_array_32) {
 	ctx = init_by_array(ini, 4);
 	for (i = 0; i < BLOCK_SIZE; i++) {
 		if (i < COUNT_1) {
-			assert_u32_eq(array32[i], init_by_array_32_expected[i],
+			expect_u32_eq(array32[i], init_by_array_32_expected[i],
 			    "Output mismatch for i=%d", i);
 		}
 		r32 = gen_rand32(ctx);
-		assert_u32_eq(r32, array32[i],
+		expect_u32_eq(r32, array32[i],
 		    "Mismatch at array32[%d]=%x, gen=%x", i, array32[i], r32);
 	}
 	for (i = 0; i < COUNT_2; i++) {
 		r32 = gen_rand32(ctx);
-		assert_u32_eq(r32, array32_2[i],
+		expect_u32_eq(r32, array32_2[i],
 		    "Mismatch at array32_2[%d]=%x, gen=%x", i, array32_2[i],
 		    r32);
 	}
@@ -1525,7 +1525,7 @@ TEST_BEGIN(test_gen_rand_64) {
 	uint64_t r;
 	sfmt_t *ctx;
 
-	assert_d_le(get_min_array_size64(), BLOCK_SIZE64,
+	expect_d_le(get_min_array_size64(), BLOCK_SIZE64,
 	    "Array size too small");
 	ctx = init_gen_rand(4321);
 	fill_array64(ctx, array64, BLOCK_SIZE64);
@@ -1535,17 +1535,17 @@ TEST_BEGIN(test_gen_rand_64) {
 	ctx = init_gen_rand(4321);
 	for (i = 0; i < BLOCK_SIZE64; i++) {
 		if (i < COUNT_1) {
-			assert_u64_eq(array64[i], init_gen_rand_64_expected[i],
+			expect_u64_eq(array64[i], init_gen_rand_64_expected[i],
 			    "Output mismatch for i=%d", i);
 		}
 		r = gen_rand64(ctx);
-		assert_u64_eq(r, array64[i],
+		expect_u64_eq(r, array64[i],
 		    "Mismatch at array64[%d]=%"FMTx64", gen=%"FMTx64, i,
 		    array64[i], r);
 	}
 	for (i = 0; i < COUNT_2; i++) {
 		r = gen_rand64(ctx);
-		assert_u64_eq(r, array64_2[i],
+		expect_u64_eq(r, array64_2[i],
 		    "Mismatch at array64_2[%d]=%"FMTx64" gen=%"FMTx64"", i,
 		    array64_2[i], r);
 	}
@@ -1561,7 +1561,7 @@ TEST_BEGIN(test_by_array_64) {
 	uint32_t ini[] = {5, 4, 3, 2, 1};
 	sfmt_t *ctx;
 
-	assert_d_le(get_min_array_size64(), BLOCK_SIZE64,
+	expect_d_le(get_min_array_size64(), BLOCK_SIZE64,
 	    "Array size too small");
 	ctx = init_by_array(ini, 5);
 	fill_array64(ctx, array64, BLOCK_SIZE64);
@@ -1571,17 +1571,17 @@ TEST_BEGIN(test_by_array_64) {
 	ctx = init_by_array(ini, 5);
 	for (i = 0; i < BLOCK_SIZE64; i++) {
 		if (i < COUNT_1) {
-			assert_u64_eq(array64[i], init_by_array_64_expected[i],
+			expect_u64_eq(array64[i], init_by_array_64_expected[i],
 			    "Output mismatch for i=%d", i);
 		}
 		r = gen_rand64(ctx);
-		assert_u64_eq(r, array64[i],
+		expect_u64_eq(r, array64[i],
 		    "Mismatch at array64[%d]=%"FMTx64" gen=%"FMTx64, i,
 		    array64[i], r);
 	}
 	for (i = 0; i < COUNT_2; i++) {
 		r = gen_rand64(ctx);
-		assert_u64_eq(r, array64_2[i],
+		expect_u64_eq(r, array64_2[i],
 		    "Mismatch at array64_2[%d]=%"FMTx64" gen=%"FMTx64, i,
 		    array64_2[i], r);
 	}
diff --git a/test/unit/a0.c b/test/unit/a0.c
index a27ab3f4..c1be79a6 100644
--- a/test/unit/a0.c
+++ b/test/unit/a0.c
@@ -4,7 +4,7 @@ TEST_BEGIN(test_a0) {
 	void *p;
 
 	p = a0malloc(1);
-	assert_ptr_not_null(p, "Unexpected a0malloc() error");
+	expect_ptr_not_null(p, "Unexpected a0malloc() error");
 	a0dalloc(p);
 }
 TEST_END
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index 64db058d..a7a23f74 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -13,7 +13,7 @@ get_nsizes_impl(const char *cmd) {
 	size_t z;
 
 	z = sizeof(unsigned);
-	assert_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
+	expect_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
 	    "Unexpected mallctl(\"%s\", ...) failure", cmd);
 
 	return ret;
@@ -37,11 +37,11 @@ get_size_impl(const char *cmd, size_t ind) {
 	size_t miblen = 4;
 
 	z = sizeof(size_t);
-	assert_d_eq(mallctlnametomib(cmd, mib, &miblen),
+	expect_d_eq(mallctlnametomib(cmd, mib, &miblen),
 	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
 	mib[2] = ind;
 	z = sizeof(size_t);
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
 	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
 
 	return ret;
@@ -85,7 +85,7 @@ static unsigned
 do_arena_create(extent_hooks_t *h) {
 	unsigned arena_ind;
 	size_t sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
 	    (void *)(h != NULL ? &h : NULL), (h != NULL ? sizeof(h) : 0)), 0,
 	    "Unexpected mallctl() failure");
 	return arena_ind;
@@ -105,19 +105,19 @@ do_arena_reset_pre(unsigned arena_ind, void ***ptrs, unsigned *nptrs) {
 	nlarge = get_nlarge() > NLARGE ? NLARGE : get_nlarge();
 	*nptrs = nsmall + nlarge;
 	*ptrs = (void **)malloc(*nptrs * sizeof(void *));
-	assert_ptr_not_null(*ptrs, "Unexpected malloc() failure");
+	expect_ptr_not_null(*ptrs, "Unexpected malloc() failure");
 
 	/* Allocate objects with a wide range of sizes. */
 	for (i = 0; i < nsmall; i++) {
 		sz = get_small_size(i);
 		(*ptrs)[i] = mallocx(sz, flags);
-		assert_ptr_not_null((*ptrs)[i],
+		expect_ptr_not_null((*ptrs)[i],
 		    "Unexpected mallocx(%zu, %#x) failure", sz, flags);
 	}
 	for (i = 0; i < nlarge; i++) {
 		sz = get_large_size(i);
 		(*ptrs)[nsmall + i] = mallocx(sz, flags);
-		assert_ptr_not_null((*ptrs)[i],
+		expect_ptr_not_null((*ptrs)[i],
 		    "Unexpected mallocx(%zu, %#x) failure", sz, flags);
 	}
 
@@ -125,7 +125,7 @@ do_arena_reset_pre(unsigned arena_ind, void ***ptrs, unsigned *nptrs) {
 
 	/* Verify allocations. */
 	for (i = 0; i < *nptrs; i++) {
-		assert_zu_gt(ivsalloc(tsdn, (*ptrs)[i]), 0,
+		expect_zu_gt(ivsalloc(tsdn, (*ptrs)[i]), 0,
 		    "Allocation should have queryable size");
 	}
 }
@@ -143,7 +143,7 @@ do_arena_reset_post(void **ptrs, unsigned nptrs, unsigned arena_ind) {
 	}
 	/* Verify allocations no longer exist. */
 	for (i = 0; i < nptrs; i++) {
-		assert_zu_eq(vsalloc(tsdn, ptrs[i]), 0,
+		expect_zu_eq(vsalloc(tsdn, ptrs[i]), 0,
 		    "Allocation should no longer exist");
 	}
 	if (have_background_thread) {
@@ -160,10 +160,10 @@ do_arena_reset_destroy(const char *name, unsigned arena_ind) {
 	size_t miblen;
 
 	miblen = sizeof(mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib(name, mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib(name, mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctlbymib() failure");
 }
 
@@ -197,23 +197,23 @@ arena_i_initialized(unsigned arena_ind, bool refresh) {
 
 	if (refresh) {
 		uint64_t epoch = 1;
-		assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
+		expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
 		    sizeof(epoch)), 0, "Unexpected mallctl() failure");
 	}
 
 	miblen = sizeof(mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib("arena.0.initialized", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("arena.0.initialized", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
 	sz = sizeof(initialized);
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&initialized, &sz, NULL,
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&initialized, &sz, NULL,
 	    0), 0, "Unexpected mallctlbymib() failure");
 
 	return initialized;
 }
 
 TEST_BEGIN(test_arena_destroy_initial) {
-	assert_false(arena_i_initialized(MALLCTL_ARENAS_DESTROYED, false),
+	expect_false(arena_i_initialized(MALLCTL_ARENAS_DESTROYED, false),
 	    "Destroyed arena stats should not be initialized");
 }
 TEST_END
@@ -226,9 +226,9 @@ TEST_BEGIN(test_arena_destroy_hooks_default) {
 	arena_ind = do_arena_create(NULL);
 	do_arena_reset_pre(arena_ind, &ptrs, &nptrs);
 
-	assert_false(arena_i_initialized(arena_ind, false),
+	expect_false(arena_i_initialized(arena_ind, false),
 	    "Arena stats should not be initialized");
-	assert_true(arena_i_initialized(arena_ind, true),
+	expect_true(arena_i_initialized(arena_ind, true),
 	    "Arena stats should be initialized");
 
 	/*
@@ -239,9 +239,9 @@ TEST_BEGIN(test_arena_destroy_hooks_default) {
 
 	do_arena_destroy(arena_ind);
 
-	assert_false(arena_i_initialized(arena_ind, true),
+	expect_false(arena_i_initialized(arena_ind, true),
 	    "Arena stats should not be initialized");
-	assert_true(arena_i_initialized(MALLCTL_ARENAS_DESTROYED, false),
+	expect_true(arena_i_initialized(MALLCTL_ARENAS_DESTROYED, false),
 	    "Destroyed arena stats should be initialized");
 
 	do_arena_reset_post(ptrs, nptrs, arena_ind);
@@ -249,7 +249,7 @@ TEST_BEGIN(test_arena_destroy_hooks_default) {
 	arena_ind_prev = arena_ind;
 	arena_ind = do_arena_create(NULL);
 	do_arena_reset_pre(arena_ind, &ptrs, &nptrs);
-	assert_u_eq(arena_ind, arena_ind_prev,
+	expect_u_eq(arena_ind, arena_ind_prev,
 	    "Arena index should have been recycled");
 	do_arena_destroy(arena_ind);
 	do_arena_reset_post(ptrs, nptrs, arena_ind);
@@ -268,9 +268,9 @@ extent_dalloc_unmap(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, committed=%s, "
 	    "arena_ind=%u)\n", __func__, extent_hooks, addr, size, committed ?
 	    "true" : "false", arena_ind);
-	assert_ptr_eq(extent_hooks, &hooks,
+	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	assert_ptr_eq(extent_hooks->dalloc, extent_dalloc_unmap,
+	expect_ptr_eq(extent_hooks->dalloc, extent_dalloc_unmap,
 	    "Wrong hook function");
 	called_dalloc = true;
 	if (!try_dalloc) {
@@ -314,20 +314,20 @@ TEST_BEGIN(test_arena_destroy_hooks_unmap) {
 	arena_ind = do_arena_create(&hooks);
 	do_arena_reset_pre(arena_ind, &ptrs, &nptrs);
 
-	assert_true(did_alloc, "Expected alloc");
+	expect_true(did_alloc, "Expected alloc");
 
-	assert_false(arena_i_initialized(arena_ind, false),
+	expect_false(arena_i_initialized(arena_ind, false),
 	    "Arena stats should not be initialized");
-	assert_true(arena_i_initialized(arena_ind, true),
+	expect_true(arena_i_initialized(arena_ind, true),
 	    "Arena stats should be initialized");
 
 	did_dalloc = false;
 	do_arena_destroy(arena_ind);
-	assert_true(did_dalloc, "Expected dalloc");
+	expect_true(did_dalloc, "Expected dalloc");
 
-	assert_false(arena_i_initialized(arena_ind, true),
+	expect_false(arena_i_initialized(arena_ind, true),
 	    "Arena stats should not be initialized");
-	assert_true(arena_i_initialized(MALLCTL_ARENAS_DESTROYED, false),
+	expect_true(arena_i_initialized(MALLCTL_ARENAS_DESTROYED, false),
 	    "Destroyed arena stats should be initialized");
 
 	do_arena_reset_post(ptrs, nptrs, arena_ind);
diff --git a/test/unit/atomic.c b/test/unit/atomic.c
index 572d8d23..1326a11c 100644
--- a/test/unit/atomic.c
+++ b/test/unit/atomic.c
@@ -6,7 +6,7 @@
  * some places and "ptr" in others.  In the long run it would be nice to unify
  * these, but in the short run we'll use this shim.
  */
-#define assert_p_eq assert_ptr_eq
+#define expect_p_eq expect_ptr_eq
 
 /*
  * t: the non-atomic type, like "uint32_t".
@@ -24,20 +24,20 @@
 									\
 	/* ATOMIC_INIT and load. */					\
 	val = atomic_load_##ta(&atom, ATOMIC_RELAXED);			\
-	assert_##ta##_eq(val1, val, "Load or init failed");		\
+	expect_##ta##_eq(val1, val, "Load or init failed");		\
 									\
 	/* Store. */							\
 	atomic_store_##ta(&atom, val1, ATOMIC_RELAXED);			\
 	atomic_store_##ta(&atom, val2, ATOMIC_RELAXED);			\
 	val = atomic_load_##ta(&atom, ATOMIC_RELAXED);			\
-	assert_##ta##_eq(val2, val, "Store failed");			\
+	expect_##ta##_eq(val2, val, "Store failed");			\
 									\
 	/* Exchange. */							\
 	atomic_store_##ta(&atom, val1, ATOMIC_RELAXED);			\
 	val = atomic_exchange_##ta(&atom, val2, ATOMIC_RELAXED);	\
-	assert_##ta##_eq(val1, val, "Exchange returned invalid value");	\
+	expect_##ta##_eq(val1, val, "Exchange returned invalid value");	\
 	val = atomic_load_##ta(&atom, ATOMIC_RELAXED);			\
-	assert_##ta##_eq(val2, val, "Exchange store invalid value");	\
+	expect_##ta##_eq(val2, val, "Exchange store invalid value");	\
 									\
 	/* 								\
 	 * Weak CAS.  Spurious failures are allowed, so we loop a few	\
@@ -49,17 +49,17 @@
 		expected = val2;					\
 		success = atomic_compare_exchange_weak_##ta(&atom,	\
 		    &expected, val3, ATOMIC_RELAXED, ATOMIC_RELAXED);	\
-		assert_##ta##_eq(val1, expected, 			\
+		expect_##ta##_eq(val1, expected, 			\
 		    "CAS should update expected");			\
 	}								\
-	assert_b_eq(val1 == val2, success,				\
+	expect_b_eq(val1 == val2, success,				\
 	    "Weak CAS did the wrong state update");			\
 	val = atomic_load_##ta(&atom, ATOMIC_RELAXED);			\
 	if (success) {							\
-		assert_##ta##_eq(val3, val,				\
+		expect_##ta##_eq(val3, val,				\
 		    "Successful CAS should update atomic");		\
 	} else {							\
-		assert_##ta##_eq(val1, val,				\
+		expect_##ta##_eq(val1, val,				\
 		    "Unsuccessful CAS should not update atomic");	\
 	}								\
 									\
@@ -68,14 +68,14 @@
 	expected = val2;						\
 	success = atomic_compare_exchange_strong_##ta(&atom, &expected,	\
 	    val3, ATOMIC_RELAXED, ATOMIC_RELAXED);			\
-	assert_b_eq(val1 == val2, success,				\
+	expect_b_eq(val1 == val2, success,				\
 	    "Strong CAS did the wrong state update");			\
 	val = atomic_load_##ta(&atom, ATOMIC_RELAXED);			\
 	if (success) {							\
-		assert_##ta##_eq(val3, val,				\
+		expect_##ta##_eq(val3, val,				\
 		    "Successful CAS should update atomic");		\
 	} else {							\
-		assert_##ta##_eq(val1, val,				\
+		expect_##ta##_eq(val1, val,				\
 		    "Unsuccessful CAS should not update atomic");	\
 	}								\
 									\
@@ -89,46 +89,46 @@
 	/* Fetch-add. */						\
 	atomic_store_##ta(&atom, val1, ATOMIC_RELAXED);			\
 	val = atomic_fetch_add_##ta(&atom, val2, ATOMIC_RELAXED);	\
-	assert_##ta##_eq(val1, val,					\
+	expect_##ta##_eq(val1, val,					\
 	    "Fetch-add should return previous value");			\
 	val = atomic_load_##ta(&atom, ATOMIC_RELAXED);			\
-	assert_##ta##_eq(val1 + val2, val,				\
+	expect_##ta##_eq(val1 + val2, val,				\
 	    "Fetch-add should update atomic");				\
 									\
 	/* Fetch-sub. */						\
 	atomic_store_##ta(&atom, val1, ATOMIC_RELAXED);			\
 	val = atomic_fetch_sub_##ta(&atom, val2, ATOMIC_RELAXED);	\
-	assert_##ta##_eq(val1, val,					\
+	expect_##ta##_eq(val1, val,					\
 	    "Fetch-sub should return previous value");			\
 	val = atomic_load_##ta(&atom, ATOMIC_RELAXED);			\
-	assert_##ta##_eq(val1 - val2, val,				\
+	expect_##ta##_eq(val1 - val2, val,				\
 	    "Fetch-sub should update atomic");				\
 									\
 	/* Fetch-and. */						\
 	atomic_store_##ta(&atom, val1, ATOMIC_RELAXED);			\
 	val = atomic_fetch_and_##ta(&atom, val2, ATOMIC_RELAXED);	\
-	assert_##ta##_eq(val1, val,					\
+	expect_##ta##_eq(val1, val,					\
 	    "Fetch-and should return previous value");			\
 	val = atomic_load_##ta(&atom, ATOMIC_RELAXED);			\
-	assert_##ta##_eq(val1 & val2, val,				\
+	expect_##ta##_eq(val1 & val2, val,				\
 	    "Fetch-and should update atomic");				\
 									\
 	/* Fetch-or. */							\
 	atomic_store_##ta(&atom, val1, ATOMIC_RELAXED);			\
 	val = atomic_fetch_or_##ta(&atom, val2, ATOMIC_RELAXED);	\
-	assert_##ta##_eq(val1, val,					\
+	expect_##ta##_eq(val1, val,					\
 	    "Fetch-or should return previous value");			\
 	val = atomic_load_##ta(&atom, ATOMIC_RELAXED);			\
-	assert_##ta##_eq(val1 | val2, val,				\
+	expect_##ta##_eq(val1 | val2, val,				\
 	    "Fetch-or should update atomic");				\
 									\
 	/* Fetch-xor. */						\
 	atomic_store_##ta(&atom, val1, ATOMIC_RELAXED);			\
 	val = atomic_fetch_xor_##ta(&atom, val2, ATOMIC_RELAXED);	\
-	assert_##ta##_eq(val1, val,					\
+	expect_##ta##_eq(val1, val,					\
 	    "Fetch-xor should return previous value");			\
 	val = atomic_load_##ta(&atom, ATOMIC_RELAXED);			\
-	assert_##ta##_eq(val1 ^ val2, val,				\
+	expect_##ta##_eq(val1 ^ val2, val,				\
 	    "Fetch-xor should update atomic");				\
 } while (0)
 
diff --git a/test/unit/background_thread.c b/test/unit/background_thread.c
index f597285a..c60010a8 100644
--- a/test/unit/background_thread.c
+++ b/test/unit/background_thread.c
@@ -8,15 +8,15 @@ test_switch_background_thread_ctl(bool new_val) {
 	size_t sz = sizeof(bool);
 
 	e1 = new_val;
-	assert_d_eq(mallctl("background_thread", (void *)&e0, &sz,
+	expect_d_eq(mallctl("background_thread", (void *)&e0, &sz,
 	    &e1, sz), 0, "Unexpected mallctl() failure");
-	assert_b_eq(e0, !e1,
+	expect_b_eq(e0, !e1,
 	    "background_thread should be %d before.\n", !e1);
 	if (e1) {
-		assert_zu_gt(n_background_threads, 0,
+		expect_zu_gt(n_background_threads, 0,
 		    "Number of background threads should be non zero.\n");
 	} else {
-		assert_zu_eq(n_background_threads, 0,
+		expect_zu_eq(n_background_threads, 0,
 		    "Number of background threads should be zero.\n");
 	}
 }
@@ -27,15 +27,15 @@ test_repeat_background_thread_ctl(bool before) {
 	size_t sz = sizeof(bool);
 
 	e1 = before;
-	assert_d_eq(mallctl("background_thread", (void *)&e0, &sz,
+	expect_d_eq(mallctl("background_thread", (void *)&e0, &sz,
 	    &e1, sz), 0, "Unexpected mallctl() failure");
-	assert_b_eq(e0, before,
+	expect_b_eq(e0, before,
 	    "background_thread should be %d.\n", before);
 	if (e1) {
-		assert_zu_gt(n_background_threads, 0,
+		expect_zu_gt(n_background_threads, 0,
 		    "Number of background threads should be non zero.\n");
 	} else {
-		assert_zu_eq(n_background_threads, 0,
+		expect_zu_eq(n_background_threads, 0,
 		    "Number of background threads should be zero.\n");
 	}
 }
@@ -46,16 +46,16 @@ TEST_BEGIN(test_background_thread_ctl) {
 	bool e0, e1;
 	size_t sz = sizeof(bool);
 
-	assert_d_eq(mallctl("opt.background_thread", (void *)&e0, &sz,
+	expect_d_eq(mallctl("opt.background_thread", (void *)&e0, &sz,
 	    NULL, 0), 0, "Unexpected mallctl() failure");
-	assert_d_eq(mallctl("background_thread", (void *)&e1, &sz,
+	expect_d_eq(mallctl("background_thread", (void *)&e1, &sz,
 	    NULL, 0), 0, "Unexpected mallctl() failure");
-	assert_b_eq(e0, e1,
+	expect_b_eq(e0, e1,
 	    "Default and opt.background_thread does not match.\n");
 	if (e0) {
 		test_switch_background_thread_ctl(false);
 	}
-	assert_zu_eq(n_background_threads, 0,
+	expect_zu_eq(n_background_threads, 0,
 	    "Number of background threads should be 0.\n");
 
 	for (unsigned i = 0; i < 4; i++) {
@@ -80,7 +80,7 @@ TEST_BEGIN(test_background_thread_running) {
 
 	test_repeat_background_thread_ctl(false);
 	test_switch_background_thread_ctl(true);
-	assert_b_eq(info->state, background_thread_started,
+	expect_b_eq(info->state, background_thread_started,
 	    "Background_thread did not start.\n");
 
 	nstime_t start;
@@ -100,7 +100,7 @@ TEST_BEGIN(test_background_thread_running) {
 		nstime_t now;
 		nstime_init_update(&now);
 		nstime_subtract(&now, &start);
-		assert_u64_lt(nstime_sec(&now), 1000,
+		expect_u64_lt(nstime_sec(&now), 1000,
 		    "Background threads did not run for 1000 seconds.");
 		sleep(1);
 	}
diff --git a/test/unit/background_thread_enable.c b/test/unit/background_thread_enable.c
index d894e937..46776f36 100644
--- a/test/unit/background_thread_enable.c
+++ b/test/unit/background_thread_enable.c
@@ -16,16 +16,16 @@ TEST_BEGIN(test_deferred) {
 	 * approximation.
 	 */
 	for (unsigned i = 0; i < 10 * ncpus; i++) {
-		assert_d_eq(mallctl("arenas.create", &id, &sz_u, NULL, 0), 0,
+		expect_d_eq(mallctl("arenas.create", &id, &sz_u, NULL, 0), 0,
 		    "Failed to create arena");
 	}
 
 	bool enable = true;
 	size_t sz_b = sizeof(bool);
-	assert_d_eq(mallctl("background_thread", NULL, NULL, &enable, sz_b), 0,
+	expect_d_eq(mallctl("background_thread", NULL, NULL, &enable, sz_b), 0,
 	    "Failed to enable background threads");
 	enable = false;
-	assert_d_eq(mallctl("background_thread", NULL, NULL, &enable, sz_b), 0,
+	expect_d_eq(mallctl("background_thread", NULL, NULL, &enable, sz_b), 0,
 	    "Failed to disable background threads");
 }
 TEST_END
@@ -36,43 +36,43 @@ TEST_BEGIN(test_max_background_threads) {
 	size_t max_n_thds;
 	size_t opt_max_n_thds;
 	size_t sz_m = sizeof(max_n_thds);
-	assert_d_eq(mallctl("opt.max_background_threads",
+	expect_d_eq(mallctl("opt.max_background_threads",
 	    &opt_max_n_thds, &sz_m, NULL, 0), 0,
 	    "Failed to get opt.max_background_threads");
-	assert_d_eq(mallctl("max_background_threads", &max_n_thds, &sz_m, NULL,
+	expect_d_eq(mallctl("max_background_threads", &max_n_thds, &sz_m, NULL,
 	    0), 0, "Failed to get max background threads");
-	assert_zu_eq(opt_max_n_thds, max_n_thds,
+	expect_zu_eq(opt_max_n_thds, max_n_thds,
 	    "max_background_threads and "
 	    "opt.max_background_threads should match");
-	assert_d_eq(mallctl("max_background_threads", NULL, NULL, &max_n_thds,
+	expect_d_eq(mallctl("max_background_threads", NULL, NULL, &max_n_thds,
 	    sz_m), 0, "Failed to set max background threads");
 
 	unsigned id;
 	size_t sz_u = sizeof(unsigned);
 
 	for (unsigned i = 0; i < 10 * ncpus; i++) {
-		assert_d_eq(mallctl("arenas.create", &id, &sz_u, NULL, 0), 0,
+		expect_d_eq(mallctl("arenas.create", &id, &sz_u, NULL, 0), 0,
 		    "Failed to create arena");
 	}
 
 	bool enable = true;
 	size_t sz_b = sizeof(bool);
-	assert_d_eq(mallctl("background_thread", NULL, NULL, &enable, sz_b), 0,
+	expect_d_eq(mallctl("background_thread", NULL, NULL, &enable, sz_b), 0,
 	    "Failed to enable background threads");
-	assert_zu_eq(n_background_threads, max_n_thds,
+	expect_zu_eq(n_background_threads, max_n_thds,
 	    "Number of background threads should not change.\n");
 	size_t new_max_thds = max_n_thds - 1;
 	if (new_max_thds > 0) {
-		assert_d_eq(mallctl("max_background_threads", NULL, NULL,
+		expect_d_eq(mallctl("max_background_threads", NULL, NULL,
 		    &new_max_thds, sz_m), 0,
 		    "Failed to set max background threads");
-		assert_zu_eq(n_background_threads, new_max_thds,
+		expect_zu_eq(n_background_threads, new_max_thds,
 		    "Number of background threads should decrease by 1.\n");
 	}
 	new_max_thds = 1;
-	assert_d_eq(mallctl("max_background_threads", NULL, NULL, &new_max_thds,
+	expect_d_eq(mallctl("max_background_threads", NULL, NULL, &new_max_thds,
 	    sz_m), 0, "Failed to set max background threads");
-	assert_zu_eq(n_background_threads, new_max_thds,
+	expect_zu_eq(n_background_threads, new_max_thds,
 	    "Number of background threads should be 1.\n");
 }
 TEST_END
diff --git a/test/unit/base.c b/test/unit/base.c
index 3b848ca1..5e990b34 100644
--- a/test/unit/base.c
+++ b/test/unit/base.c
@@ -37,21 +37,21 @@ TEST_BEGIN(test_base_hooks_default) {
 	if (config_stats) {
 		base_stats_get(tsdn, base, &allocated0, &resident, &mapped,
 		    &n_thp);
-		assert_zu_ge(allocated0, sizeof(base_t),
+		expect_zu_ge(allocated0, sizeof(base_t),
 		    "Base header should count as allocated");
 		if (opt_metadata_thp == metadata_thp_always) {
-			assert_zu_gt(n_thp, 0,
+			expect_zu_gt(n_thp, 0,
 			    "Base should have 1 THP at least.");
 		}
 	}
 
-	assert_ptr_not_null(base_alloc(tsdn, base, 42, 1),
+	expect_ptr_not_null(base_alloc(tsdn, base, 42, 1),
 	    "Unexpected base_alloc() failure");
 
 	if (config_stats) {
 		base_stats_get(tsdn, base, &allocated1, &resident, &mapped,
 		    &n_thp);
-		assert_zu_ge(allocated1 - allocated0, 42,
+		expect_zu_ge(allocated1 - allocated0, 42,
 		    "At least 42 bytes were allocated by base_alloc()");
 	}
 
@@ -75,26 +75,26 @@ TEST_BEGIN(test_base_hooks_null) {
 
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 	base = base_new(tsdn, 0, &hooks);
-	assert_ptr_not_null(base, "Unexpected base_new() failure");
+	expect_ptr_not_null(base, "Unexpected base_new() failure");
 
 	if (config_stats) {
 		base_stats_get(tsdn, base, &allocated0, &resident, &mapped,
 		    &n_thp);
-		assert_zu_ge(allocated0, sizeof(base_t),
+		expect_zu_ge(allocated0, sizeof(base_t),
 		    "Base header should count as allocated");
 		if (opt_metadata_thp == metadata_thp_always) {
-			assert_zu_gt(n_thp, 0,
+			expect_zu_gt(n_thp, 0,
 			    "Base should have 1 THP at least.");
 		}
 	}
 
-	assert_ptr_not_null(base_alloc(tsdn, base, 42, 1),
+	expect_ptr_not_null(base_alloc(tsdn, base, 42, 1),
 	    "Unexpected base_alloc() failure");
 
 	if (config_stats) {
 		base_stats_get(tsdn, base, &allocated1, &resident, &mapped,
 		    &n_thp);
-		assert_zu_ge(allocated1 - allocated0, 42,
+		expect_zu_ge(allocated1 - allocated0, 42,
 		    "At least 42 bytes were allocated by base_alloc()");
 	}
 
@@ -121,8 +121,8 @@ TEST_BEGIN(test_base_hooks_not_null) {
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 	did_alloc = false;
 	base = base_new(tsdn, 0, &hooks);
-	assert_ptr_not_null(base, "Unexpected base_new() failure");
-	assert_true(did_alloc, "Expected alloc");
+	expect_ptr_not_null(base, "Unexpected base_new() failure");
+	expect_true(did_alloc, "Expected alloc");
 
 	/*
 	 * Check for tight packing at specified alignment under simple
@@ -143,21 +143,21 @@ TEST_BEGIN(test_base_hooks_not_null) {
 			size_t align_ceil = ALIGNMENT_CEILING(alignment,
 			    QUANTUM);
 			p = base_alloc(tsdn, base, 1, alignment);
-			assert_ptr_not_null(p,
+			expect_ptr_not_null(p,
 			    "Unexpected base_alloc() failure");
-			assert_ptr_eq(p,
+			expect_ptr_eq(p,
 			    (void *)(ALIGNMENT_CEILING((uintptr_t)p,
 			    alignment)), "Expected quantum alignment");
 			q = base_alloc(tsdn, base, alignment, alignment);
-			assert_ptr_not_null(q,
+			expect_ptr_not_null(q,
 			    "Unexpected base_alloc() failure");
-			assert_ptr_eq((void *)((uintptr_t)p + align_ceil), q,
+			expect_ptr_eq((void *)((uintptr_t)p + align_ceil), q,
 			    "Minimal allocation should take up %zu bytes",
 			    align_ceil);
 			r = base_alloc(tsdn, base, 1, alignment);
-			assert_ptr_not_null(r,
+			expect_ptr_not_null(r,
 			    "Unexpected base_alloc() failure");
-			assert_ptr_eq((void *)((uintptr_t)q + align_ceil), r,
+			expect_ptr_eq((void *)((uintptr_t)q + align_ceil), r,
 			    "Minimal allocation should take up %zu bytes",
 			    align_ceil);
 		}
@@ -168,23 +168,23 @@ TEST_BEGIN(test_base_hooks_not_null) {
 	 * that the first block's remaining space is considered for subsequent
 	 * allocation.
 	 */
-	assert_zu_ge(edata_bsize_get(&base->blocks->edata), QUANTUM,
+	expect_zu_ge(edata_bsize_get(&base->blocks->edata), QUANTUM,
 	    "Remainder insufficient for test");
 	/* Use up all but one quantum of block. */
 	while (edata_bsize_get(&base->blocks->edata) > QUANTUM) {
 		p = base_alloc(tsdn, base, QUANTUM, QUANTUM);
-		assert_ptr_not_null(p, "Unexpected base_alloc() failure");
+		expect_ptr_not_null(p, "Unexpected base_alloc() failure");
 	}
 	r_exp = edata_addr_get(&base->blocks->edata);
-	assert_zu_eq(base->extent_sn_next, 1, "One extant block expected");
+	expect_zu_eq(base->extent_sn_next, 1, "One extant block expected");
 	q = base_alloc(tsdn, base, QUANTUM + 1, QUANTUM);
-	assert_ptr_not_null(q, "Unexpected base_alloc() failure");
-	assert_ptr_ne(q, r_exp, "Expected allocation from new block");
-	assert_zu_eq(base->extent_sn_next, 2, "Two extant blocks expected");
+	expect_ptr_not_null(q, "Unexpected base_alloc() failure");
+	expect_ptr_ne(q, r_exp, "Expected allocation from new block");
+	expect_zu_eq(base->extent_sn_next, 2, "Two extant blocks expected");
 	r = base_alloc(tsdn, base, QUANTUM, QUANTUM);
-	assert_ptr_not_null(r, "Unexpected base_alloc() failure");
-	assert_ptr_eq(r, r_exp, "Expected allocation from first block");
-	assert_zu_eq(base->extent_sn_next, 2, "Two extant blocks expected");
+	expect_ptr_not_null(r, "Unexpected base_alloc() failure");
+	expect_ptr_eq(r, r_exp, "Expected allocation from first block");
+	expect_zu_eq(base->extent_sn_next, 2, "Two extant blocks expected");
 
 	/*
 	 * Check for proper alignment support when normal blocks are too small.
@@ -199,9 +199,9 @@ TEST_BEGIN(test_base_hooks_not_null) {
 		for (i = 0; i < sizeof(alignments) / sizeof(size_t); i++) {
 			size_t alignment = alignments[i];
 			p = base_alloc(tsdn, base, QUANTUM, alignment);
-			assert_ptr_not_null(p,
+			expect_ptr_not_null(p,
 			    "Unexpected base_alloc() failure");
-			assert_ptr_eq(p,
+			expect_ptr_eq(p,
 			    (void *)(ALIGNMENT_CEILING((uintptr_t)p,
 			    alignment)), "Expected %zu-byte alignment",
 			    alignment);
@@ -211,11 +211,11 @@ TEST_BEGIN(test_base_hooks_not_null) {
 	called_dalloc = called_destroy = called_decommit = called_purge_lazy =
 	    called_purge_forced = false;
 	base_delete(tsdn, base);
-	assert_true(called_dalloc, "Expected dalloc call");
-	assert_true(!called_destroy, "Unexpected destroy call");
-	assert_true(called_decommit, "Expected decommit call");
-	assert_true(called_purge_lazy, "Expected purge_lazy call");
-	assert_true(called_purge_forced, "Expected purge_forced call");
+	expect_true(called_dalloc, "Expected dalloc call");
+	expect_true(!called_destroy, "Unexpected destroy call");
+	expect_true(called_decommit, "Expected decommit call");
+	expect_true(called_purge_lazy, "Expected purge_lazy call");
+	expect_true(called_purge_forced, "Expected purge_forced call");
 
 	try_dalloc = true;
 	try_destroy = true;
diff --git a/test/unit/binshard.c b/test/unit/binshard.c
index 6e10d477..243a9b3a 100644
--- a/test/unit/binshard.c
+++ b/test/unit/binshard.c
@@ -13,7 +13,7 @@ thd_producer(void *varg) {
 
 	sz = sizeof(arena);
 	/* Remote arena. */
-	assert_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
+	expect_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 	for (i = 0; i < REMOTE_NALLOC / 2; i++) {
 		mem[i] = mallocx(1, MALLOCX_TCACHE_NONE | MALLOCX_ARENA(arena));
@@ -42,7 +42,7 @@ TEST_BEGIN(test_producer_consumer) {
 	/* Remote deallocation by the current thread. */
 	for (i = 0; i < NTHREADS; i++) {
 		for (unsigned j = 0; j < REMOTE_NALLOC; j++) {
-			assert_ptr_not_null(mem[i][j],
+			expect_ptr_not_null(mem[i][j],
 			    "Unexpected remote allocation failure");
 			dallocx(mem[i][j], 0);
 		}
@@ -65,12 +65,12 @@ thd_start(void *varg) {
 		edata = emap_edata_lookup(tsdn, &emap_global, ptr);
 		shard1 = edata_binshard_get(edata);
 		dallocx(ptr, 0);
-		assert_u_lt(shard1, 16, "Unexpected bin shard used");
+		expect_u_lt(shard1, 16, "Unexpected bin shard used");
 
 		edata = emap_edata_lookup(tsdn, &emap_global, ptr2);
 		shard2 = edata_binshard_get(edata);
 		dallocx(ptr2, 0);
-		assert_u_lt(shard2, 4, "Unexpected bin shard used");
+		expect_u_lt(shard2, 4, "Unexpected bin shard used");
 
 		if (shard1 > 0 || shard2 > 0) {
 			/* Triggered sharded bin usage. */
@@ -98,7 +98,7 @@ TEST_BEGIN(test_bin_shard_mt) {
 			sharded = true;
 		}
 	}
-	assert_b_eq(sharded, true, "Did not find sharded bins");
+	expect_b_eq(sharded, true, "Did not find sharded bins");
 }
 TEST_END
 
@@ -108,14 +108,14 @@ TEST_BEGIN(test_bin_shard) {
 	size_t miblen, miblen2, len;
 
 	len = sizeof(nbins);
-	assert_d_eq(mallctl("arenas.nbins", (void *)&nbins, &len, NULL, 0), 0,
+	expect_d_eq(mallctl("arenas.nbins", (void *)&nbins, &len, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
 	miblen = 4;
-	assert_d_eq(mallctlnametomib("arenas.bin.0.nshards", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("arenas.bin.0.nshards", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	miblen2 = 4;
-	assert_d_eq(mallctlnametomib("arenas.bin.0.size", mib2, &miblen2), 0,
+	expect_d_eq(mallctlnametomib("arenas.bin.0.size", mib2, &miblen2), 0,
 	    "Unexpected mallctlnametomib() failure");
 
 	for (i = 0; i < nbins; i++) {
@@ -124,22 +124,22 @@ TEST_BEGIN(test_bin_shard) {
 
 		mib[2] = i;
 		sz1 = sizeof(nshards);
-		assert_d_eq(mallctlbymib(mib, miblen, (void *)&nshards, &sz1,
+		expect_d_eq(mallctlbymib(mib, miblen, (void *)&nshards, &sz1,
 		    NULL, 0), 0, "Unexpected mallctlbymib() failure");
 
 		mib2[2] = i;
 		sz2 = sizeof(size);
-		assert_d_eq(mallctlbymib(mib2, miblen2, (void *)&size, &sz2,
+		expect_d_eq(mallctlbymib(mib2, miblen2, (void *)&size, &sz2,
 		    NULL, 0), 0, "Unexpected mallctlbymib() failure");
 
 		if (size >= 1 && size <= 128) {
-			assert_u_eq(nshards, 16, "Unexpected nshards");
+			expect_u_eq(nshards, 16, "Unexpected nshards");
 		} else if (size == 256) {
-			assert_u_eq(nshards, 8, "Unexpected nshards");
+			expect_u_eq(nshards, 8, "Unexpected nshards");
 		} else if (size > 128 && size <= 512) {
-			assert_u_eq(nshards, 4, "Unexpected nshards");
+			expect_u_eq(nshards, 4, "Unexpected nshards");
 		} else {
-			assert_u_eq(nshards, 1, "Unexpected nshards");
+			expect_u_eq(nshards, 1, "Unexpected nshards");
 		}
 	}
 }
diff --git a/test/unit/bit_util.c b/test/unit/bit_util.c
index b747deb4..3eeb7a31 100644
--- a/test/unit/bit_util.c
+++ b/test/unit/bit_util.c
@@ -6,27 +6,27 @@
 	unsigned i, pow2;						\
 	t x;								\
 									\
-	assert_##suf##_eq(pow2_ceil_##suf(0), 0, "Unexpected result");	\
+	expect_##suf##_eq(pow2_ceil_##suf(0), 0, "Unexpected result");	\
 									\
 	for (i = 0; i < sizeof(t) * 8; i++) {				\
-		assert_##suf##_eq(pow2_ceil_##suf(((t)1) << i), ((t)1)	\
+		expect_##suf##_eq(pow2_ceil_##suf(((t)1) << i), ((t)1)	\
 		    << i, "Unexpected result");				\
 	}								\
 									\
 	for (i = 2; i < sizeof(t) * 8; i++) {				\
-		assert_##suf##_eq(pow2_ceil_##suf((((t)1) << i) - 1),	\
+		expect_##suf##_eq(pow2_ceil_##suf((((t)1) << i) - 1),	\
 		    ((t)1) << i, "Unexpected result");			\
 	}								\
 									\
 	for (i = 0; i < sizeof(t) * 8 - 1; i++) {			\
-		assert_##suf##_eq(pow2_ceil_##suf((((t)1) << i) + 1),	\
+		expect_##suf##_eq(pow2_ceil_##suf((((t)1) << i) + 1),	\
 		    ((t)1) << (i+1), "Unexpected result");		\
 	}								\
 									\
 	for (pow2 = 1; pow2 < 25; pow2++) {				\
 		for (x = (((t)1) << (pow2-1)) + 1; x <= ((t)1) << pow2;	\
 		    x++) {						\
-			assert_##suf##_eq(pow2_ceil_##suf(x),		\
+			expect_##suf##_eq(pow2_ceil_##suf(x),		\
 			    ((t)1) << pow2,				\
 			    "Unexpected result, x=%"pri, x);		\
 		}							\
@@ -49,35 +49,35 @@ TEST_BEGIN(test_pow2_ceil_zu) {
 TEST_END
 
 void
-assert_lg_ceil_range(size_t input, unsigned answer) {
+expect_lg_ceil_range(size_t input, unsigned answer) {
 	if (input == 1) {
-		assert_u_eq(0, answer, "Got %u as lg_ceil of 1", answer);
+		expect_u_eq(0, answer, "Got %u as lg_ceil of 1", answer);
 		return;
 	}
-	assert_zu_le(input, (ZU(1) << answer),
+	expect_zu_le(input, (ZU(1) << answer),
 	    "Got %u as lg_ceil of %zu", answer, input);
-	assert_zu_gt(input, (ZU(1) << (answer - 1)),
+	expect_zu_gt(input, (ZU(1) << (answer - 1)),
 	    "Got %u as lg_ceil of %zu", answer, input);
 }
 
 void
-assert_lg_floor_range(size_t input, unsigned answer) {
+expect_lg_floor_range(size_t input, unsigned answer) {
 	if (input == 1) {
-		assert_u_eq(0, answer, "Got %u as lg_floor of 1", answer);
+		expect_u_eq(0, answer, "Got %u as lg_floor of 1", answer);
 		return;
 	}
-	assert_zu_ge(input, (ZU(1) << answer),
+	expect_zu_ge(input, (ZU(1) << answer),
 	    "Got %u as lg_floor of %zu", answer, input);
-	assert_zu_lt(input, (ZU(1) << (answer + 1)),
+	expect_zu_lt(input, (ZU(1) << (answer + 1)),
 	    "Got %u as lg_floor of %zu", answer, input);
 }
 
 TEST_BEGIN(test_lg_ceil_floor) {
 	for (size_t i = 1; i < 10 * 1000 * 1000; i++) {
-		assert_lg_ceil_range(i, lg_ceil(i));
-		assert_lg_ceil_range(i, LG_CEIL(i));
-		assert_lg_floor_range(i, lg_floor(i));
-		assert_lg_floor_range(i, LG_FLOOR(i));
+		expect_lg_ceil_range(i, lg_ceil(i));
+		expect_lg_ceil_range(i, LG_CEIL(i));
+		expect_lg_floor_range(i, lg_floor(i));
+		expect_lg_floor_range(i, LG_FLOOR(i));
 	}
 	for (int i = 10; i < 8 * (1 << LG_SIZEOF_PTR) - 5; i++) {
 		for (size_t j = 0; j < (1 << 4); j++) {
@@ -85,17 +85,17 @@ TEST_BEGIN(test_lg_ceil_floor) {
 			    - j * ((size_t)1 << (i - 4));
 			size_t num2 = ((size_t)1 << i)
 			    + j * ((size_t)1 << (i - 4));
-			assert_zu_ne(num1, 0, "Invalid lg argument");
-			assert_zu_ne(num2, 0, "Invalid lg argument");
-			assert_lg_ceil_range(num1, lg_ceil(num1));
-			assert_lg_ceil_range(num1, LG_CEIL(num1));
-			assert_lg_ceil_range(num2, lg_ceil(num2));
-			assert_lg_ceil_range(num2, LG_CEIL(num2));
+			expect_zu_ne(num1, 0, "Invalid lg argument");
+			expect_zu_ne(num2, 0, "Invalid lg argument");
+			expect_lg_ceil_range(num1, lg_ceil(num1));
+			expect_lg_ceil_range(num1, LG_CEIL(num1));
+			expect_lg_ceil_range(num2, lg_ceil(num2));
+			expect_lg_ceil_range(num2, LG_CEIL(num2));
 
-			assert_lg_floor_range(num1, lg_floor(num1));
-			assert_lg_floor_range(num1, LG_FLOOR(num1));
-			assert_lg_floor_range(num2, lg_floor(num2));
-			assert_lg_floor_range(num2, LG_FLOOR(num2));
+			expect_lg_floor_range(num1, lg_floor(num1));
+			expect_lg_floor_range(num1, LG_FLOOR(num1));
+			expect_lg_floor_range(num2, lg_floor(num2));
+			expect_lg_floor_range(num2, LG_FLOOR(num2));
 		}
 	}
 }
diff --git a/test/unit/bitmap.c b/test/unit/bitmap.c
index 182f2f60..6b0ea9ef 100644
--- a/test/unit/bitmap.c
+++ b/test/unit/bitmap.c
@@ -97,28 +97,28 @@ test_bitmap_initializer_body(const bitmap_info_t *binfo, size_t nbits) {
 	bitmap_info_t binfo_dyn;
 	bitmap_info_init(&binfo_dyn, nbits);
 
-	assert_zu_eq(bitmap_size(binfo), bitmap_size(&binfo_dyn),
+	expect_zu_eq(bitmap_size(binfo), bitmap_size(&binfo_dyn),
 	    "Unexpected difference between static and dynamic initialization, "
 	    "nbits=%zu", nbits);
-	assert_zu_eq(binfo->nbits, binfo_dyn.nbits,
+	expect_zu_eq(binfo->nbits, binfo_dyn.nbits,
 	    "Unexpected difference between static and dynamic initialization, "
 	    "nbits=%zu", nbits);
 #ifdef BITMAP_USE_TREE
-	assert_u_eq(binfo->nlevels, binfo_dyn.nlevels,
+	expect_u_eq(binfo->nlevels, binfo_dyn.nlevels,
 	    "Unexpected difference between static and dynamic initialization, "
 	    "nbits=%zu", nbits);
 	{
 		unsigned i;
 
 		for (i = 0; i < binfo->nlevels; i++) {
-			assert_zu_eq(binfo->levels[i].group_offset,
+			expect_zu_eq(binfo->levels[i].group_offset,
 			    binfo_dyn.levels[i].group_offset,
 			    "Unexpected difference between static and dynamic "
 			    "initialization, nbits=%zu, level=%u", nbits, i);
 		}
 	}
 #else
-	assert_zu_eq(binfo->ngroups, binfo_dyn.ngroups,
+	expect_zu_eq(binfo->ngroups, binfo_dyn.ngroups,
 	    "Unexpected difference between static and dynamic initialization");
 #endif
 }
@@ -140,9 +140,9 @@ static size_t
 test_bitmap_size_body(const bitmap_info_t *binfo, size_t nbits,
     size_t prev_size) {
 	size_t size = bitmap_size(binfo);
-	assert_zu_ge(size, (nbits >> 3),
+	expect_zu_ge(size, (nbits >> 3),
 	    "Bitmap size is smaller than expected");
-	assert_zu_ge(size, prev_size, "Bitmap size is smaller than expected");
+	expect_zu_ge(size, prev_size, "Bitmap size is smaller than expected");
 	return size;
 }
 
@@ -170,17 +170,17 @@ static void
 test_bitmap_init_body(const bitmap_info_t *binfo, size_t nbits) {
 	size_t i;
 	bitmap_t *bitmap = (bitmap_t *)malloc(bitmap_size(binfo));
-	assert_ptr_not_null(bitmap, "Unexpected malloc() failure");
+	expect_ptr_not_null(bitmap, "Unexpected malloc() failure");
 
 	bitmap_init(bitmap, binfo, false);
 	for (i = 0; i < nbits; i++) {
-		assert_false(bitmap_get(bitmap, binfo, i),
+		expect_false(bitmap_get(bitmap, binfo, i),
 		    "Bit should be unset");
 	}
 
 	bitmap_init(bitmap, binfo, true);
 	for (i = 0; i < nbits; i++) {
-		assert_true(bitmap_get(bitmap, binfo, i), "Bit should be set");
+		expect_true(bitmap_get(bitmap, binfo, i), "Bit should be set");
 	}
 
 	free(bitmap);
@@ -207,13 +207,13 @@ static void
 test_bitmap_set_body(const bitmap_info_t *binfo, size_t nbits) {
 	size_t i;
 	bitmap_t *bitmap = (bitmap_t *)malloc(bitmap_size(binfo));
-	assert_ptr_not_null(bitmap, "Unexpected malloc() failure");
+	expect_ptr_not_null(bitmap, "Unexpected malloc() failure");
 	bitmap_init(bitmap, binfo, false);
 
 	for (i = 0; i < nbits; i++) {
 		bitmap_set(bitmap, binfo, i);
 	}
-	assert_true(bitmap_full(bitmap, binfo), "All bits should be set");
+	expect_true(bitmap_full(bitmap, binfo), "All bits should be set");
 	free(bitmap);
 }
 
@@ -238,20 +238,20 @@ static void
 test_bitmap_unset_body(const bitmap_info_t *binfo, size_t nbits) {
 	size_t i;
 	bitmap_t *bitmap = (bitmap_t *)malloc(bitmap_size(binfo));
-	assert_ptr_not_null(bitmap, "Unexpected malloc() failure");
+	expect_ptr_not_null(bitmap, "Unexpected malloc() failure");
 	bitmap_init(bitmap, binfo, false);
 
 	for (i = 0; i < nbits; i++) {
 		bitmap_set(bitmap, binfo, i);
 	}
-	assert_true(bitmap_full(bitmap, binfo), "All bits should be set");
+	expect_true(bitmap_full(bitmap, binfo), "All bits should be set");
 	for (i = 0; i < nbits; i++) {
 		bitmap_unset(bitmap, binfo, i);
 	}
 	for (i = 0; i < nbits; i++) {
 		bitmap_set(bitmap, binfo, i);
 	}
-	assert_true(bitmap_full(bitmap, binfo), "All bits should be set");
+	expect_true(bitmap_full(bitmap, binfo), "All bits should be set");
 	free(bitmap);
 }
 
@@ -275,25 +275,25 @@ TEST_END
 static void
 test_bitmap_xfu_body(const bitmap_info_t *binfo, size_t nbits) {
 	bitmap_t *bitmap = (bitmap_t *)malloc(bitmap_size(binfo));
-	assert_ptr_not_null(bitmap, "Unexpected malloc() failure");
+	expect_ptr_not_null(bitmap, "Unexpected malloc() failure");
 	bitmap_init(bitmap, binfo, false);
 
 	/* Iteratively set bits starting at the beginning. */
 	for (size_t i = 0; i < nbits; i++) {
-		assert_zu_eq(bitmap_ffu(bitmap, binfo, 0), i,
+		expect_zu_eq(bitmap_ffu(bitmap, binfo, 0), i,
 		    "First unset bit should be just after previous first unset "
 		    "bit");
-		assert_zu_eq(bitmap_ffu(bitmap, binfo, (i > 0) ? i-1 : i), i,
+		expect_zu_eq(bitmap_ffu(bitmap, binfo, (i > 0) ? i-1 : i), i,
 		    "First unset bit should be just after previous first unset "
 		    "bit");
-		assert_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
+		expect_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
 		    "First unset bit should be just after previous first unset "
 		    "bit");
-		assert_zu_eq(bitmap_sfu(bitmap, binfo), i,
+		expect_zu_eq(bitmap_sfu(bitmap, binfo), i,
 		    "First unset bit should be just after previous first unset "
 		    "bit");
 	}
-	assert_true(bitmap_full(bitmap, binfo), "All bits should be set");
+	expect_true(bitmap_full(bitmap, binfo), "All bits should be set");
 
 	/*
 	 * Iteratively unset bits starting at the end, and verify that
@@ -301,17 +301,17 @@ test_bitmap_xfu_body(const bitmap_info_t *binfo, size_t nbits) {
 	 */
 	for (size_t i = nbits - 1; i < nbits; i--) { /* (nbits..0] */
 		bitmap_unset(bitmap, binfo, i);
-		assert_zu_eq(bitmap_ffu(bitmap, binfo, 0), i,
+		expect_zu_eq(bitmap_ffu(bitmap, binfo, 0), i,
 		    "First unset bit should the bit previously unset");
-		assert_zu_eq(bitmap_ffu(bitmap, binfo, (i > 0) ? i-1 : i), i,
+		expect_zu_eq(bitmap_ffu(bitmap, binfo, (i > 0) ? i-1 : i), i,
 		    "First unset bit should the bit previously unset");
-		assert_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
+		expect_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
 		    "First unset bit should the bit previously unset");
-		assert_zu_eq(bitmap_sfu(bitmap, binfo), i,
+		expect_zu_eq(bitmap_sfu(bitmap, binfo), i,
 		    "First unset bit should the bit previously unset");
 		bitmap_unset(bitmap, binfo, i);
 	}
-	assert_false(bitmap_get(bitmap, binfo, 0), "Bit should be unset");
+	expect_false(bitmap_get(bitmap, binfo, 0), "Bit should be unset");
 
 	/*
 	 * Iteratively set bits starting at the beginning, and verify that
@@ -319,29 +319,29 @@ test_bitmap_xfu_body(const bitmap_info_t *binfo, size_t nbits) {
 	 */
 	for (size_t i = 1; i < nbits; i++) {
 		bitmap_set(bitmap, binfo, i - 1);
-		assert_zu_eq(bitmap_ffu(bitmap, binfo, 0), i,
+		expect_zu_eq(bitmap_ffu(bitmap, binfo, 0), i,
 		    "First unset bit should be just after the bit previously "
 		    "set");
-		assert_zu_eq(bitmap_ffu(bitmap, binfo, (i > 0) ? i-1 : i), i,
+		expect_zu_eq(bitmap_ffu(bitmap, binfo, (i > 0) ? i-1 : i), i,
 		    "First unset bit should be just after the bit previously "
 		    "set");
-		assert_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
+		expect_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
 		    "First unset bit should be just after the bit previously "
 		    "set");
-		assert_zu_eq(bitmap_sfu(bitmap, binfo), i,
+		expect_zu_eq(bitmap_sfu(bitmap, binfo), i,
 		    "First unset bit should be just after the bit previously "
 		    "set");
 		bitmap_unset(bitmap, binfo, i);
 	}
-	assert_zu_eq(bitmap_ffu(bitmap, binfo, 0), nbits - 1,
+	expect_zu_eq(bitmap_ffu(bitmap, binfo, 0), nbits - 1,
 	    "First unset bit should be the last bit");
-	assert_zu_eq(bitmap_ffu(bitmap, binfo, (nbits > 1) ? nbits-2 : nbits-1),
+	expect_zu_eq(bitmap_ffu(bitmap, binfo, (nbits > 1) ? nbits-2 : nbits-1),
 	    nbits - 1, "First unset bit should be the last bit");
-	assert_zu_eq(bitmap_ffu(bitmap, binfo, nbits - 1), nbits - 1,
+	expect_zu_eq(bitmap_ffu(bitmap, binfo, nbits - 1), nbits - 1,
 	    "First unset bit should be the last bit");
-	assert_zu_eq(bitmap_sfu(bitmap, binfo), nbits - 1,
+	expect_zu_eq(bitmap_sfu(bitmap, binfo), nbits - 1,
 	    "First unset bit should be the last bit");
-	assert_true(bitmap_full(bitmap, binfo), "All bits should be set");
+	expect_true(bitmap_full(bitmap, binfo), "All bits should be set");
 
 	/*
 	 * Bubble a "usu" pattern through the bitmap and verify that
@@ -352,22 +352,22 @@ test_bitmap_xfu_body(const bitmap_info_t *binfo, size_t nbits) {
 			bitmap_unset(bitmap, binfo, i);
 			bitmap_unset(bitmap, binfo, i+2);
 			if (i > 0) {
-				assert_zu_eq(bitmap_ffu(bitmap, binfo, i-1), i,
+				expect_zu_eq(bitmap_ffu(bitmap, binfo, i-1), i,
 				    "Unexpected first unset bit");
 			}
-			assert_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
+			expect_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
 			    "Unexpected first unset bit");
-			assert_zu_eq(bitmap_ffu(bitmap, binfo, i+1), i+2,
+			expect_zu_eq(bitmap_ffu(bitmap, binfo, i+1), i+2,
 			    "Unexpected first unset bit");
-			assert_zu_eq(bitmap_ffu(bitmap, binfo, i+2), i+2,
+			expect_zu_eq(bitmap_ffu(bitmap, binfo, i+2), i+2,
 			    "Unexpected first unset bit");
 			if (i + 3 < nbits) {
-				assert_zu_eq(bitmap_ffu(bitmap, binfo, i+3),
+				expect_zu_eq(bitmap_ffu(bitmap, binfo, i+3),
 				    nbits, "Unexpected first unset bit");
 			}
-			assert_zu_eq(bitmap_sfu(bitmap, binfo), i,
+			expect_zu_eq(bitmap_sfu(bitmap, binfo), i,
 			    "Unexpected first unset bit");
-			assert_zu_eq(bitmap_sfu(bitmap, binfo), i+2,
+			expect_zu_eq(bitmap_sfu(bitmap, binfo), i+2,
 			    "Unexpected first unset bit");
 		}
 	}
@@ -382,20 +382,20 @@ test_bitmap_xfu_body(const bitmap_info_t *binfo, size_t nbits) {
 		for (size_t i = 0; i < nbits-1; i++) {
 			bitmap_unset(bitmap, binfo, i);
 			if (i > 0) {
-				assert_zu_eq(bitmap_ffu(bitmap, binfo, i-1), i,
+				expect_zu_eq(bitmap_ffu(bitmap, binfo, i-1), i,
 				    "Unexpected first unset bit");
 			}
-			assert_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
+			expect_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
 			    "Unexpected first unset bit");
-			assert_zu_eq(bitmap_ffu(bitmap, binfo, i+1), nbits-1,
+			expect_zu_eq(bitmap_ffu(bitmap, binfo, i+1), nbits-1,
 			    "Unexpected first unset bit");
-			assert_zu_eq(bitmap_ffu(bitmap, binfo, nbits-1),
+			expect_zu_eq(bitmap_ffu(bitmap, binfo, nbits-1),
 			    nbits-1, "Unexpected first unset bit");
 
-			assert_zu_eq(bitmap_sfu(bitmap, binfo), i,
+			expect_zu_eq(bitmap_sfu(bitmap, binfo), i,
 			    "Unexpected first unset bit");
 		}
-		assert_zu_eq(bitmap_sfu(bitmap, binfo), nbits-1,
+		expect_zu_eq(bitmap_sfu(bitmap, binfo), nbits-1,
 		    "Unexpected first unset bit");
 	}
 
diff --git a/test/unit/buf_writer.c b/test/unit/buf_writer.c
index 5171d618..37314db2 100644
--- a/test/unit/buf_writer.c
+++ b/test/unit/buf_writer.c
@@ -14,7 +14,7 @@ static void test_write_cb(void *cbopaque, const char *s) {
 	size_t prev_test_write_len = test_write_len;
 	test_write_len += strlen(s); /* only increase the length */
 	arg_store = *(uint64_t *)cbopaque; /* only pass along the argument */
-	assert_zu_le(prev_test_write_len, test_write_len,
+	expect_zu_le(prev_test_write_len, test_write_len,
 	    "Test write overflowed");
 }
 
@@ -22,7 +22,7 @@ static void test_buf_writer_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 	char s[UNIT_MAX + 1];
 	size_t n_unit, remain, i;
 	ssize_t unit;
-	assert_ptr_not_null(buf_writer->buf, "Buffer is null");
+	expect_ptr_not_null(buf_writer->buf, "Buffer is null");
 	write_cb_t *write_cb = buf_writer_get_write_cb(buf_writer);
 	void *cbopaque = buf_writer_get_cbopaque(buf_writer);
 
@@ -41,7 +41,7 @@ static void test_buf_writer_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 				remain += unit;
 				if (remain > buf_writer->buf_size) {
 					/* Flushes should have happened. */
-					assert_u64_eq(arg_store, arg, "Call "
+					expect_u64_eq(arg_store, arg, "Call "
 					    "back argument didn't get through");
 					remain %= buf_writer->buf_size;
 					if (remain == 0) {
@@ -49,12 +49,12 @@ static void test_buf_writer_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 						remain += buf_writer->buf_size;
 					}
 				}
-				assert_zu_eq(test_write_len + remain, i * unit,
+				expect_zu_eq(test_write_len + remain, i * unit,
 				    "Incorrect length after writing %zu strings"
 				    " of length %zu", i, unit);
 			}
 			buf_writer_flush(buf_writer);
-			assert_zu_eq(test_write_len, n_unit * unit,
+			expect_zu_eq(test_write_len, n_unit * unit,
 			    "Incorrect length after flushing at the end of"
 			    " writing %zu strings of length %zu", n_unit, unit);
 		}
@@ -65,7 +65,7 @@ static void test_buf_writer_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 TEST_BEGIN(test_buf_write_static) {
 	buf_writer_t buf_writer;
 	tsdn_t *tsdn = tsdn_fetch();
-	assert_false(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
+	expect_false(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
 	    test_buf, TEST_BUF_SIZE),
 	    "buf_writer_init() should not encounter error on static buffer");
 	test_buf_writer_body(tsdn, &buf_writer);
@@ -75,7 +75,7 @@ TEST_END
 TEST_BEGIN(test_buf_write_dynamic) {
 	buf_writer_t buf_writer;
 	tsdn_t *tsdn = tsdn_fetch();
-	assert_false(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
+	expect_false(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
 	    NULL, TEST_BUF_SIZE), "buf_writer_init() should not OOM");
 	test_buf_writer_body(tsdn, &buf_writer);
 }
@@ -84,13 +84,13 @@ TEST_END
 TEST_BEGIN(test_buf_write_oom) {
 	buf_writer_t buf_writer;
 	tsdn_t *tsdn = tsdn_fetch();
-	assert_true(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
+	expect_true(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
 	    NULL, SC_LARGE_MAXCLASS + 1), "buf_writer_init() should OOM");
-	assert_ptr_null(buf_writer.buf, "Buffer should be null");
+	expect_ptr_null(buf_writer.buf, "Buffer should be null");
 	write_cb_t *write_cb = buf_writer_get_write_cb(&buf_writer);
-	assert_ptr_eq(write_cb, test_write_cb, "Should use test_write_cb");
+	expect_ptr_eq(write_cb, test_write_cb, "Should use test_write_cb");
 	void *cbopaque = buf_writer_get_cbopaque(&buf_writer);
-	assert_ptr_eq(cbopaque, &arg, "Should use arg");
+	expect_ptr_eq(cbopaque, &arg, "Should use arg");
 
 	char s[UNIT_MAX + 1];
 	size_t n_unit, i;
@@ -107,14 +107,14 @@ TEST_BEGIN(test_buf_write_oom) {
 			for (i = 1; i <= n_unit; ++i) {
 				arg = prng_lg_range_u64(&arg, 64);
 				write_cb(cbopaque, s);
-				assert_u64_eq(arg_store, arg,
+				expect_u64_eq(arg_store, arg,
 				    "Call back argument didn't get through");
-				assert_zu_eq(test_write_len, i * unit,
+				expect_zu_eq(test_write_len, i * unit,
 				    "Incorrect length after writing %zu strings"
 				    " of length %zu", i, unit);
 			}
 			buf_writer_flush(&buf_writer);
-			assert_zu_eq(test_write_len, n_unit * unit,
+			expect_zu_eq(test_write_len, n_unit * unit,
 			    "Incorrect length after flushing at the end of"
 			    " writing %zu strings of length %zu", n_unit, unit);
 		}
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index 12201a22..f98a92c3 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -8,52 +8,52 @@ TEST_BEGIN(test_cache_bin) {
 	/* Page aligned to make sure lowbits not overflowable. */
 	void **stack = mallocx(PAGE, MALLOCX_TCACHE_NONE | MALLOCX_ALIGN(PAGE));
 
-	assert_ptr_not_null(stack, "Unexpected mallocx failure");
+	expect_ptr_not_null(stack, "Unexpected mallocx failure");
 	/* Initialize to empty; bin 0. */
 	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get(0);
 	void **empty_position = stack + ncached_max;
 	bin->cur_ptr.ptr = empty_position;
 	bin->low_water_position = bin->cur_ptr.lowbits;
 	bin->full_position = (uint32_t)(uintptr_t)stack;
-	assert_ptr_eq(cache_bin_empty_position_get(bin, 0), empty_position,
+	expect_ptr_eq(cache_bin_empty_position_get(bin, 0), empty_position,
 	    "Incorrect empty position");
-	/* Not using assert_zu etc on cache_bin_sz_t since it may change. */
-	assert_true(cache_bin_ncached_get(bin, 0) == 0, "Incorrect cache size");
+	/* Not using expect_zu etc on cache_bin_sz_t since it may change. */
+	expect_true(cache_bin_ncached_get(bin, 0) == 0, "Incorrect cache size");
 
 	bool success;
 	void *ret = cache_bin_alloc_easy(bin, &success, 0);
-	assert_false(success, "Empty cache bin should not alloc");
-	assert_true(cache_bin_low_water_get(bin, 0) == 0,
+	expect_false(success, "Empty cache bin should not alloc");
+	expect_true(cache_bin_low_water_get(bin, 0) == 0,
 	    "Incorrect low water mark");
 
 	cache_bin_ncached_set(bin, 0, 0);
-	assert_ptr_eq(bin->cur_ptr.ptr, empty_position, "Bin should be empty");
+	expect_ptr_eq(bin->cur_ptr.ptr, empty_position, "Bin should be empty");
 	for (cache_bin_sz_t i = 1; i < ncached_max + 1; i++) {
 		success = cache_bin_dalloc_easy(bin, (void *)(uintptr_t)i);
-		assert_true(success && cache_bin_ncached_get(bin, 0) == i,
+		expect_true(success && cache_bin_ncached_get(bin, 0) == i,
 		    "Bin dalloc failure");
 	}
 	success = cache_bin_dalloc_easy(bin, (void *)1);
-	assert_false(success, "Bin should be full");
-	assert_ptr_eq(bin->cur_ptr.ptr, stack, "Incorrect bin cur_ptr");
+	expect_false(success, "Bin should be full");
+	expect_ptr_eq(bin->cur_ptr.ptr, stack, "Incorrect bin cur_ptr");
 
 	cache_bin_ncached_set(bin, 0, ncached_max);
-	assert_ptr_eq(bin->cur_ptr.ptr, stack, "cur_ptr should not change");
+	expect_ptr_eq(bin->cur_ptr.ptr, stack, "cur_ptr should not change");
 	/* Emulate low water after refill. */
 	bin->low_water_position = bin->full_position;
 	for (cache_bin_sz_t i = ncached_max; i > 0; i--) {
 		ret = cache_bin_alloc_easy(bin, &success, 0);
 		cache_bin_sz_t ncached = cache_bin_ncached_get(bin, 0);
-		assert_true(success && ncached == i - 1,
+		expect_true(success && ncached == i - 1,
 		    "Cache bin alloc failure");
-		assert_ptr_eq(ret, (void *)(uintptr_t)i, "Bin alloc failure");
-		assert_true(cache_bin_low_water_get(bin, 0) == ncached,
+		expect_ptr_eq(ret, (void *)(uintptr_t)i, "Bin alloc failure");
+		expect_true(cache_bin_low_water_get(bin, 0) == ncached,
 		    "Incorrect low water mark");
 	}
 
 	ret = cache_bin_alloc_easy(bin, &success, 0);
-	assert_false(success, "Empty cache bin should not alloc.");
-	assert_ptr_eq(bin->cur_ptr.ptr, stack + ncached_max,
+	expect_false(success, "Empty cache bin should not alloc.");
+	expect_ptr_eq(bin->cur_ptr.ptr, stack + ncached_max,
 	    "Bin should be empty");
 }
 TEST_END
diff --git a/test/unit/ckh.c b/test/unit/ckh.c
index 707ea5f8..36142acd 100644
--- a/test/unit/ckh.c
+++ b/test/unit/ckh.c
@@ -6,11 +6,11 @@ TEST_BEGIN(test_new_delete) {
 
 	tsd = tsd_fetch();
 
-	assert_false(ckh_new(tsd, &ckh, 2, ckh_string_hash,
+	expect_false(ckh_new(tsd, &ckh, 2, ckh_string_hash,
 	    ckh_string_keycomp), "Unexpected ckh_new() error");
 	ckh_delete(tsd, &ckh);
 
-	assert_false(ckh_new(tsd, &ckh, 3, ckh_pointer_hash,
+	expect_false(ckh_new(tsd, &ckh, 3, ckh_pointer_hash,
 	    ckh_pointer_keycomp), "Unexpected ckh_new() error");
 	ckh_delete(tsd, &ckh);
 }
@@ -30,16 +30,16 @@ TEST_BEGIN(test_count_insert_search_remove) {
 
 	tsd = tsd_fetch();
 
-	assert_false(ckh_new(tsd, &ckh, 2, ckh_string_hash,
+	expect_false(ckh_new(tsd, &ckh, 2, ckh_string_hash,
 	    ckh_string_keycomp), "Unexpected ckh_new() error");
-	assert_zu_eq(ckh_count(&ckh), 0,
+	expect_zu_eq(ckh_count(&ckh), 0,
 	    "ckh_count() should return %zu, but it returned %zu", ZU(0),
 	    ckh_count(&ckh));
 
 	/* Insert. */
 	for (i = 0; i < sizeof(strs)/sizeof(const char *); i++) {
 		ckh_insert(tsd, &ckh, strs[i], strs[i]);
-		assert_zu_eq(ckh_count(&ckh), i+1,
+		expect_zu_eq(ckh_count(&ckh), i+1,
 		    "ckh_count() should return %zu, but it returned %zu", i+1,
 		    ckh_count(&ckh));
 	}
@@ -57,17 +57,17 @@ TEST_BEGIN(test_count_insert_search_remove) {
 		vp = (i & 2) ? &v.p : NULL;
 		k.p = NULL;
 		v.p = NULL;
-		assert_false(ckh_search(&ckh, strs[i], kp, vp),
+		expect_false(ckh_search(&ckh, strs[i], kp, vp),
 		    "Unexpected ckh_search() error");
 
 		ks = (i & 1) ? strs[i] : (const char *)NULL;
 		vs = (i & 2) ? strs[i] : (const char *)NULL;
-		assert_ptr_eq((void *)ks, (void *)k.s, "Key mismatch, i=%zu",
+		expect_ptr_eq((void *)ks, (void *)k.s, "Key mismatch, i=%zu",
 		    i);
-		assert_ptr_eq((void *)vs, (void *)v.s, "Value mismatch, i=%zu",
+		expect_ptr_eq((void *)vs, (void *)v.s, "Value mismatch, i=%zu",
 		    i);
 	}
-	assert_true(ckh_search(&ckh, missing, NULL, NULL),
+	expect_true(ckh_search(&ckh, missing, NULL, NULL),
 	    "Unexpected ckh_search() success");
 
 	/* Remove. */
@@ -83,16 +83,16 @@ TEST_BEGIN(test_count_insert_search_remove) {
 		vp = (i & 2) ? &v.p : NULL;
 		k.p = NULL;
 		v.p = NULL;
-		assert_false(ckh_remove(tsd, &ckh, strs[i], kp, vp),
+		expect_false(ckh_remove(tsd, &ckh, strs[i], kp, vp),
 		    "Unexpected ckh_remove() error");
 
 		ks = (i & 1) ? strs[i] : (const char *)NULL;
 		vs = (i & 2) ? strs[i] : (const char *)NULL;
-		assert_ptr_eq((void *)ks, (void *)k.s, "Key mismatch, i=%zu",
+		expect_ptr_eq((void *)ks, (void *)k.s, "Key mismatch, i=%zu",
 		    i);
-		assert_ptr_eq((void *)vs, (void *)v.s, "Value mismatch, i=%zu",
+		expect_ptr_eq((void *)vs, (void *)v.s, "Value mismatch, i=%zu",
 		    i);
-		assert_zu_eq(ckh_count(&ckh),
+		expect_zu_eq(ckh_count(&ckh),
 		    sizeof(strs)/sizeof(const char *) - i - 1,
 		    "ckh_count() should return %zu, but it returned %zu",
 		        sizeof(strs)/sizeof(const char *) - i - 1,
@@ -113,40 +113,40 @@ TEST_BEGIN(test_insert_iter_remove) {
 
 	tsd = tsd_fetch();
 
-	assert_false(ckh_new(tsd, &ckh, 2, ckh_pointer_hash,
+	expect_false(ckh_new(tsd, &ckh, 2, ckh_pointer_hash,
 	    ckh_pointer_keycomp), "Unexpected ckh_new() error");
 
 	for (i = 0; i < NITEMS; i++) {
 		p[i] = mallocx(i+1, 0);
-		assert_ptr_not_null(p[i], "Unexpected mallocx() failure");
+		expect_ptr_not_null(p[i], "Unexpected mallocx() failure");
 	}
 
 	for (i = 0; i < NITEMS; i++) {
 		size_t j;
 
 		for (j = i; j < NITEMS; j++) {
-			assert_false(ckh_insert(tsd, &ckh, p[j], p[j]),
+			expect_false(ckh_insert(tsd, &ckh, p[j], p[j]),
 			    "Unexpected ckh_insert() failure");
-			assert_false(ckh_search(&ckh, p[j], &q, &r),
+			expect_false(ckh_search(&ckh, p[j], &q, &r),
 			    "Unexpected ckh_search() failure");
-			assert_ptr_eq(p[j], q, "Key pointer mismatch");
-			assert_ptr_eq(p[j], r, "Value pointer mismatch");
+			expect_ptr_eq(p[j], q, "Key pointer mismatch");
+			expect_ptr_eq(p[j], r, "Value pointer mismatch");
 		}
 
-		assert_zu_eq(ckh_count(&ckh), NITEMS,
+		expect_zu_eq(ckh_count(&ckh), NITEMS,
 		    "ckh_count() should return %zu, but it returned %zu",
 		    NITEMS, ckh_count(&ckh));
 
 		for (j = i + 1; j < NITEMS; j++) {
-			assert_false(ckh_search(&ckh, p[j], NULL, NULL),
+			expect_false(ckh_search(&ckh, p[j], NULL, NULL),
 			    "Unexpected ckh_search() failure");
-			assert_false(ckh_remove(tsd, &ckh, p[j], &q, &r),
+			expect_false(ckh_remove(tsd, &ckh, p[j], &q, &r),
 			    "Unexpected ckh_remove() failure");
-			assert_ptr_eq(p[j], q, "Key pointer mismatch");
-			assert_ptr_eq(p[j], r, "Value pointer mismatch");
-			assert_true(ckh_search(&ckh, p[j], NULL, NULL),
+			expect_ptr_eq(p[j], q, "Key pointer mismatch");
+			expect_ptr_eq(p[j], r, "Value pointer mismatch");
+			expect_true(ckh_search(&ckh, p[j], NULL, NULL),
 			    "Unexpected ckh_search() success");
-			assert_true(ckh_remove(tsd, &ckh, p[j], &q, &r),
+			expect_true(ckh_remove(tsd, &ckh, p[j], &q, &r),
 			    "Unexpected ckh_remove() success");
 		}
 
@@ -159,11 +159,11 @@ TEST_BEGIN(test_insert_iter_remove) {
 			for (tabind = 0; !ckh_iter(&ckh, &tabind, &q, &r);) {
 				size_t k;
 
-				assert_ptr_eq(q, r, "Key and val not equal");
+				expect_ptr_eq(q, r, "Key and val not equal");
 
 				for (k = 0; k < NITEMS; k++) {
 					if (p[k] == q) {
-						assert_false(seen[k],
+						expect_false(seen[k],
 						    "Item %zu already seen", k);
 						seen[k] = true;
 						break;
@@ -172,29 +172,29 @@ TEST_BEGIN(test_insert_iter_remove) {
 			}
 
 			for (j = 0; j < i + 1; j++) {
-				assert_true(seen[j], "Item %zu not seen", j);
+				expect_true(seen[j], "Item %zu not seen", j);
 			}
 			for (; j < NITEMS; j++) {
-				assert_false(seen[j], "Item %zu seen", j);
+				expect_false(seen[j], "Item %zu seen", j);
 			}
 		}
 	}
 
 	for (i = 0; i < NITEMS; i++) {
-		assert_false(ckh_search(&ckh, p[i], NULL, NULL),
+		expect_false(ckh_search(&ckh, p[i], NULL, NULL),
 		    "Unexpected ckh_search() failure");
-		assert_false(ckh_remove(tsd, &ckh, p[i], &q, &r),
+		expect_false(ckh_remove(tsd, &ckh, p[i], &q, &r),
 		    "Unexpected ckh_remove() failure");
-		assert_ptr_eq(p[i], q, "Key pointer mismatch");
-		assert_ptr_eq(p[i], r, "Value pointer mismatch");
-		assert_true(ckh_search(&ckh, p[i], NULL, NULL),
+		expect_ptr_eq(p[i], q, "Key pointer mismatch");
+		expect_ptr_eq(p[i], r, "Value pointer mismatch");
+		expect_true(ckh_search(&ckh, p[i], NULL, NULL),
 		    "Unexpected ckh_search() success");
-		assert_true(ckh_remove(tsd, &ckh, p[i], &q, &r),
+		expect_true(ckh_remove(tsd, &ckh, p[i], &q, &r),
 		    "Unexpected ckh_remove() success");
 		dallocx(p[i], 0);
 	}
 
-	assert_zu_eq(ckh_count(&ckh), 0,
+	expect_zu_eq(ckh_count(&ckh), 0,
 	    "ckh_count() should return %zu, but it returned %zu",
 	    ZU(0), ckh_count(&ckh));
 	ckh_delete(tsd, &ckh);
diff --git a/test/unit/counter.c b/test/unit/counter.c
index 619510d3..585cbc63 100644
--- a/test/unit/counter.c
+++ b/test/unit/counter.c
@@ -16,24 +16,24 @@ TEST_BEGIN(test_counter_accum) {
 		trigger = counter_accum(tsd_tsdn(tsd), &c, increment);
 		accum += increment;
 		if (accum < interval) {
-			assert_b_eq(trigger, false, "Should not trigger");
+			expect_b_eq(trigger, false, "Should not trigger");
 		} else {
-			assert_b_eq(trigger, true, "Should have triggered");
+			expect_b_eq(trigger, true, "Should have triggered");
 		}
 	}
-	assert_b_eq(trigger, true, "Should have triggered");
+	expect_b_eq(trigger, true, "Should have triggered");
 }
 TEST_END
 
 void
-assert_counter_value(counter_accum_t *c, uint64_t v) {
+expect_counter_value(counter_accum_t *c, uint64_t v) {
 	uint64_t accum;
 #ifdef JEMALLOC_ATOMIC_U64
 	accum = atomic_load_u64(&(c->accumbytes), ATOMIC_RELAXED);
 #else
 	accum = c->accumbytes;
 #endif
-	assert_u64_eq(accum, v, "Counter value mismatch");
+	expect_u64_eq(accum, v, "Counter value mismatch");
 }
 
 TEST_BEGIN(test_counter_rollback) {
@@ -47,34 +47,34 @@ TEST_BEGIN(test_counter_rollback) {
 
 	bool trigger;
 	trigger = counter_accum(tsd_tsdn(tsd), &c, half_interval);
-	assert_b_eq(trigger, false, "Should not trigger");
+	expect_b_eq(trigger, false, "Should not trigger");
 	counter_rollback(tsd_tsdn(tsd), &c, half_interval + 1);
-	assert_counter_value(&c,  0);
+	expect_counter_value(&c,  0);
 
 	trigger = counter_accum(tsd_tsdn(tsd), &c, half_interval);
-	assert_b_eq(trigger, false, "Should not trigger");
+	expect_b_eq(trigger, false, "Should not trigger");
 	counter_rollback(tsd_tsdn(tsd), &c, half_interval - 1);
-	assert_counter_value(&c,  1);
+	expect_counter_value(&c,  1);
 
 	counter_rollback(tsd_tsdn(tsd), &c, 1);
-	assert_counter_value(&c,  0);
+	expect_counter_value(&c,  0);
 
 	trigger = counter_accum(tsd_tsdn(tsd), &c, half_interval);
-	assert_b_eq(trigger, false, "Should not trigger");
+	expect_b_eq(trigger, false, "Should not trigger");
 	counter_rollback(tsd_tsdn(tsd), &c, 1);
-	assert_counter_value(&c,  half_interval - 1);
+	expect_counter_value(&c,  half_interval - 1);
 
 	trigger = counter_accum(tsd_tsdn(tsd), &c, half_interval);
-	assert_b_eq(trigger, false, "Should not trigger");
-	assert_counter_value(&c,  interval - 1);
+	expect_b_eq(trigger, false, "Should not trigger");
+	expect_counter_value(&c,  interval - 1);
 
 	trigger = counter_accum(tsd_tsdn(tsd), &c, 1);
-	assert_b_eq(trigger, true, "Should have triggered");
-	assert_counter_value(&c, 0);
+	expect_b_eq(trigger, true, "Should have triggered");
+	expect_counter_value(&c, 0);
 
 	trigger = counter_accum(tsd_tsdn(tsd), &c, interval + 1);
-	assert_b_eq(trigger, true, "Should have triggered");
-	assert_counter_value(&c, 1);
+	expect_b_eq(trigger, true, "Should have triggered");
+	expect_counter_value(&c, 1);
 }
 TEST_END
 
@@ -114,7 +114,7 @@ TEST_BEGIN(test_counter_mt) {
 		thd_join(thds[i], &ret);
 		sum += (uintptr_t)ret;
 	}
-	assert_u64_eq(sum, N_THDS * N_ITER_THD / (interval / ITER_INCREMENT),
+	expect_u64_eq(sum, N_THDS * N_ITER_THD / (interval / ITER_INCREMENT),
 	    "Incorrect number of triggers");
 }
 TEST_END
diff --git a/test/unit/decay.c b/test/unit/decay.c
index 59936db3..7ed270f4 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -17,7 +17,7 @@ check_background_thread_enabled(void) {
 	if (ret == ENOENT) {
 		return false;
 	}
-	assert_d_eq(ret, 0, "Unexpected mallctl error");
+	expect_d_eq(ret, 0, "Unexpected mallctl error");
 	return enabled;
 }
 
@@ -39,22 +39,22 @@ static unsigned
 do_arena_create(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 	unsigned arena_ind;
 	size_t sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
 	size_t mib[3];
 	size_t miblen = sizeof(mib)/sizeof(size_t);
 
-	assert_d_eq(mallctlnametomib("arena.0.dirty_decay_ms", mib, &miblen),
+	expect_d_eq(mallctlnametomib("arena.0.dirty_decay_ms", mib, &miblen),
 	    0, "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL,
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL,
 	    (void *)&dirty_decay_ms, sizeof(dirty_decay_ms)), 0,
 	    "Unexpected mallctlbymib() failure");
 
-	assert_d_eq(mallctlnametomib("arena.0.muzzy_decay_ms", mib, &miblen),
+	expect_d_eq(mallctlnametomib("arena.0.muzzy_decay_ms", mib, &miblen),
 	    0, "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL,
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL,
 	    (void *)&muzzy_decay_ms, sizeof(muzzy_decay_ms)), 0,
 	    "Unexpected mallctlbymib() failure");
 
@@ -65,17 +65,17 @@ static void
 do_arena_destroy(unsigned arena_ind) {
 	size_t mib[3];
 	size_t miblen = sizeof(mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib("arena.0.destroy", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("arena.0.destroy", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctlbymib() failure");
 }
 
 void
 do_epoch(void) {
 	uint64_t epoch = 1;
-	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
 	    0, "Unexpected mallctl() failure");
 }
 
@@ -83,10 +83,10 @@ void
 do_purge(unsigned arena_ind) {
 	size_t mib[3];
 	size_t miblen = sizeof(mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib("arena.0.purge", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("arena.0.purge", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctlbymib() failure");
 }
 
@@ -94,10 +94,10 @@ void
 do_decay(unsigned arena_ind) {
 	size_t mib[3];
 	size_t miblen = sizeof(mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib("arena.0.decay", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("arena.0.decay", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctlbymib() failure");
 }
 
@@ -105,12 +105,12 @@ static uint64_t
 get_arena_npurge_impl(const char *mibname, unsigned arena_ind) {
 	size_t mib[4];
 	size_t miblen = sizeof(mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib(mibname, mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib(mibname, mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[2] = (size_t)arena_ind;
 	uint64_t npurge = 0;
 	size_t sz = sizeof(npurge);
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&npurge, &sz, NULL, 0),
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&npurge, &sz, NULL, 0),
 	    config_stats ? 0 : ENOENT, "Unexpected mallctlbymib() failure");
 	return npurge;
 }
@@ -145,12 +145,12 @@ get_arena_pdirty(unsigned arena_ind) {
 	do_epoch();
 	size_t mib[4];
 	size_t miblen = sizeof(mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib("stats.arenas.0.pdirty", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("stats.arenas.0.pdirty", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[2] = (size_t)arena_ind;
 	size_t pdirty;
 	size_t sz = sizeof(pdirty);
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&pdirty, &sz, NULL, 0), 0,
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&pdirty, &sz, NULL, 0), 0,
 	    "Unexpected mallctlbymib() failure");
 	return pdirty;
 }
@@ -160,12 +160,12 @@ get_arena_pmuzzy(unsigned arena_ind) {
 	do_epoch();
 	size_t mib[4];
 	size_t miblen = sizeof(mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib("stats.arenas.0.pmuzzy", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("stats.arenas.0.pmuzzy", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[2] = (size_t)arena_ind;
 	size_t pmuzzy;
 	size_t sz = sizeof(pmuzzy);
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&pmuzzy, &sz, NULL, 0), 0,
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&pmuzzy, &sz, NULL, 0), 0,
 	    "Unexpected mallctlbymib() failure");
 	return pmuzzy;
 }
@@ -173,7 +173,7 @@ get_arena_pmuzzy(unsigned arena_ind) {
 static void *
 do_mallocx(size_t size, int flags) {
 	void *p = mallocx(size, flags);
-	assert_ptr_not_null(p, "Unexpected mallocx() failure");
+	expect_ptr_not_null(p, "Unexpected mallocx() failure");
 	return p;
 }
 
@@ -193,7 +193,7 @@ TEST_BEGIN(test_decay_ticks) {
 	void *p;
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL,
+	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL,
 	    0), 0, "Unexpected mallctl failure");
 
 	/* Set up a manually managed arena for test. */
@@ -202,11 +202,11 @@ TEST_BEGIN(test_decay_ticks) {
 	/* Migrate to the new arena, and get the ticker. */
 	unsigned old_arena_ind;
 	size_t sz_arena_ind = sizeof(old_arena_ind);
-	assert_d_eq(mallctl("thread.arena", (void *)&old_arena_ind,
+	expect_d_eq(mallctl("thread.arena", (void *)&old_arena_ind,
 	    &sz_arena_ind, (void *)&arena_ind, sizeof(arena_ind)), 0,
 	    "Unexpected mallctl() failure");
 	decay_ticker = decay_ticker_get(tsd_fetch(), arena_ind);
-	assert_ptr_not_null(decay_ticker,
+	expect_ptr_not_null(decay_ticker,
 	    "Unexpected failure getting decay ticker");
 
 	/*
@@ -218,38 +218,38 @@ TEST_BEGIN(test_decay_ticks) {
 	/* malloc(). */
 	tick0 = ticker_read(decay_ticker);
 	p = malloc(large0);
-	assert_ptr_not_null(p, "Unexpected malloc() failure");
+	expect_ptr_not_null(p, "Unexpected malloc() failure");
 	tick1 = ticker_read(decay_ticker);
-	assert_u32_ne(tick1, tick0, "Expected ticker to tick during malloc()");
+	expect_u32_ne(tick1, tick0, "Expected ticker to tick during malloc()");
 	/* free(). */
 	tick0 = ticker_read(decay_ticker);
 	free(p);
 	tick1 = ticker_read(decay_ticker);
-	assert_u32_ne(tick1, tick0, "Expected ticker to tick during free()");
+	expect_u32_ne(tick1, tick0, "Expected ticker to tick during free()");
 
 	/* calloc(). */
 	tick0 = ticker_read(decay_ticker);
 	p = calloc(1, large0);
-	assert_ptr_not_null(p, "Unexpected calloc() failure");
+	expect_ptr_not_null(p, "Unexpected calloc() failure");
 	tick1 = ticker_read(decay_ticker);
-	assert_u32_ne(tick1, tick0, "Expected ticker to tick during calloc()");
+	expect_u32_ne(tick1, tick0, "Expected ticker to tick during calloc()");
 	free(p);
 
 	/* posix_memalign(). */
 	tick0 = ticker_read(decay_ticker);
-	assert_d_eq(posix_memalign(&p, sizeof(size_t), large0), 0,
+	expect_d_eq(posix_memalign(&p, sizeof(size_t), large0), 0,
 	    "Unexpected posix_memalign() failure");
 	tick1 = ticker_read(decay_ticker);
-	assert_u32_ne(tick1, tick0,
+	expect_u32_ne(tick1, tick0,
 	    "Expected ticker to tick during posix_memalign()");
 	free(p);
 
 	/* aligned_alloc(). */
 	tick0 = ticker_read(decay_ticker);
 	p = aligned_alloc(sizeof(size_t), large0);
-	assert_ptr_not_null(p, "Unexpected aligned_alloc() failure");
+	expect_ptr_not_null(p, "Unexpected aligned_alloc() failure");
 	tick1 = ticker_read(decay_ticker);
-	assert_u32_ne(tick1, tick0,
+	expect_u32_ne(tick1, tick0,
 	    "Expected ticker to tick during aligned_alloc()");
 	free(p);
 
@@ -257,20 +257,20 @@ TEST_BEGIN(test_decay_ticks) {
 	/* Allocate. */
 	tick0 = ticker_read(decay_ticker);
 	p = realloc(NULL, large0);
-	assert_ptr_not_null(p, "Unexpected realloc() failure");
+	expect_ptr_not_null(p, "Unexpected realloc() failure");
 	tick1 = ticker_read(decay_ticker);
-	assert_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()");
+	expect_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()");
 	/* Reallocate. */
 	tick0 = ticker_read(decay_ticker);
 	p = realloc(p, large0);
-	assert_ptr_not_null(p, "Unexpected realloc() failure");
+	expect_ptr_not_null(p, "Unexpected realloc() failure");
 	tick1 = ticker_read(decay_ticker);
-	assert_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()");
+	expect_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()");
 	/* Deallocate. */
 	tick0 = ticker_read(decay_ticker);
 	realloc(p, 0);
 	tick1 = ticker_read(decay_ticker);
-	assert_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()");
+	expect_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()");
 
 	/*
 	 * Test the *allocx() APIs using large and small size classes, with
@@ -288,40 +288,40 @@ TEST_BEGIN(test_decay_ticks) {
 			/* mallocx(). */
 			tick0 = ticker_read(decay_ticker);
 			p = mallocx(sz, MALLOCX_TCACHE_NONE);
-			assert_ptr_not_null(p, "Unexpected mallocx() failure");
+			expect_ptr_not_null(p, "Unexpected mallocx() failure");
 			tick1 = ticker_read(decay_ticker);
-			assert_u32_ne(tick1, tick0,
+			expect_u32_ne(tick1, tick0,
 			    "Expected ticker to tick during mallocx() (sz=%zu)",
 			    sz);
 			/* rallocx(). */
 			tick0 = ticker_read(decay_ticker);
 			p = rallocx(p, sz, MALLOCX_TCACHE_NONE);
-			assert_ptr_not_null(p, "Unexpected rallocx() failure");
+			expect_ptr_not_null(p, "Unexpected rallocx() failure");
 			tick1 = ticker_read(decay_ticker);
-			assert_u32_ne(tick1, tick0,
+			expect_u32_ne(tick1, tick0,
 			    "Expected ticker to tick during rallocx() (sz=%zu)",
 			    sz);
 			/* xallocx(). */
 			tick0 = ticker_read(decay_ticker);
 			xallocx(p, sz, 0, MALLOCX_TCACHE_NONE);
 			tick1 = ticker_read(decay_ticker);
-			assert_u32_ne(tick1, tick0,
+			expect_u32_ne(tick1, tick0,
 			    "Expected ticker to tick during xallocx() (sz=%zu)",
 			    sz);
 			/* dallocx(). */
 			tick0 = ticker_read(decay_ticker);
 			dallocx(p, MALLOCX_TCACHE_NONE);
 			tick1 = ticker_read(decay_ticker);
-			assert_u32_ne(tick1, tick0,
+			expect_u32_ne(tick1, tick0,
 			    "Expected ticker to tick during dallocx() (sz=%zu)",
 			    sz);
 			/* sdallocx(). */
 			p = mallocx(sz, MALLOCX_TCACHE_NONE);
-			assert_ptr_not_null(p, "Unexpected mallocx() failure");
+			expect_ptr_not_null(p, "Unexpected mallocx() failure");
 			tick0 = ticker_read(decay_ticker);
 			sdallocx(p, sz, MALLOCX_TCACHE_NONE);
 			tick1 = ticker_read(decay_ticker);
-			assert_u32_ne(tick1, tick0,
+			expect_u32_ne(tick1, tick0,
 			    "Expected ticker to tick during sdallocx() "
 			    "(sz=%zu)", sz);
 		}
@@ -338,11 +338,11 @@ TEST_BEGIN(test_decay_ticks) {
 
 	size_t tcache_max, sz_tcache_max;
 	sz_tcache_max = sizeof(tcache_max);
-	assert_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max,
+	expect_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max,
 	    &sz_tcache_max, NULL, 0), 0, "Unexpected mallctl() failure");
 
 	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("tcache.create", (void *)&tcache_ind, &sz,
+	expect_d_eq(mallctl("tcache.create", (void *)&tcache_ind, &sz,
 	    NULL, 0), 0, "Unexpected mallctl failure");
 
 	for (i = 0; i < sizeof(tcache_sizes) / sizeof(size_t); i++) {
@@ -351,26 +351,26 @@ TEST_BEGIN(test_decay_ticks) {
 		/* tcache fill. */
 		tick0 = ticker_read(decay_ticker);
 		p = mallocx(sz, MALLOCX_TCACHE(tcache_ind));
-		assert_ptr_not_null(p, "Unexpected mallocx() failure");
+		expect_ptr_not_null(p, "Unexpected mallocx() failure");
 		tick1 = ticker_read(decay_ticker);
-		assert_u32_ne(tick1, tick0,
+		expect_u32_ne(tick1, tick0,
 		    "Expected ticker to tick during tcache fill "
 		    "(sz=%zu)", sz);
 		/* tcache flush. */
 		dallocx(p, MALLOCX_TCACHE(tcache_ind));
 		tick0 = ticker_read(decay_ticker);
-		assert_d_eq(mallctl("tcache.flush", NULL, NULL,
+		expect_d_eq(mallctl("tcache.flush", NULL, NULL,
 		    (void *)&tcache_ind, sizeof(unsigned)), 0,
 		    "Unexpected mallctl failure");
 		tick1 = ticker_read(decay_ticker);
 
 		/* Will only tick if it's in tcache. */
 		if (sz <= tcache_max) {
-			assert_u32_ne(tick1, tick0,
+			expect_u32_ne(tick1, tick0,
 			    "Expected ticker to tick during tcache "
 			    "flush (sz=%zu)", sz);
 		} else {
-			assert_u32_eq(tick1, tick0,
+			expect_u32_eq(tick1, tick0,
 			    "Unexpected ticker tick during tcache "
 			    "flush (sz=%zu)", sz);
 		}
@@ -417,7 +417,7 @@ decay_ticker_helper(unsigned arena_ind, int flags, bool dirty, ssize_t dt,
 	dallocx(p, flags);
 
 	if (config_stats) {
-		assert_u64_gt(dirty_npurge1 + muzzy_npurge1, dirty_npurge0 +
+		expect_u64_gt(dirty_npurge1 + muzzy_npurge1, dirty_npurge0 +
 		    muzzy_npurge0, "Expected purging to occur");
 	}
 #undef NINTERVALS
@@ -442,7 +442,7 @@ TEST_BEGIN(test_decay_ticker) {
 
 	size_t tcache_max;
 	size_t sz = sizeof(size_t);
-	assert_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max, &sz, NULL,
+	expect_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max, &sz, NULL,
 	    0), 0, "Unexpected mallctl failure");
 	large = nallocx(tcache_max + 1, flags);
 
@@ -467,7 +467,7 @@ TEST_BEGIN(test_decay_ticker) {
 		dallocx(ps[i], flags);
 		unsigned nupdates0 = nupdates_mock;
 		do_decay(arena_ind);
-		assert_u_gt(nupdates_mock, nupdates0,
+		expect_u_gt(nupdates_mock, nupdates0,
 		    "Expected nstime_update() to be called");
 	}
 
@@ -495,10 +495,10 @@ TEST_BEGIN(test_decay_nonmonotonic) {
 	unsigned i, nupdates0;
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL,
+	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL,
 	    0), 0, "Unexpected mallctl failure");
 
-	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl failure");
 	do_epoch();
 	sz = sizeof(uint64_t);
@@ -515,15 +515,15 @@ TEST_BEGIN(test_decay_nonmonotonic) {
 
 	for (i = 0; i < NPS; i++) {
 		ps[i] = mallocx(large0, flags);
-		assert_ptr_not_null(ps[i], "Unexpected mallocx() failure");
+		expect_ptr_not_null(ps[i], "Unexpected mallocx() failure");
 	}
 
 	for (i = 0; i < NPS; i++) {
 		dallocx(ps[i], flags);
 		nupdates0 = nupdates_mock;
-		assert_d_eq(mallctl("arena.0.decay", NULL, NULL, NULL, 0), 0,
+		expect_d_eq(mallctl("arena.0.decay", NULL, NULL, NULL, 0), 0,
 		    "Unexpected arena.0.decay failure");
-		assert_u_gt(nupdates_mock, nupdates0,
+		expect_u_gt(nupdates_mock, nupdates0,
 		    "Expected nstime_update() to be called");
 	}
 
@@ -532,7 +532,7 @@ TEST_BEGIN(test_decay_nonmonotonic) {
 	npurge1 = get_arena_npurge(0);
 
 	if (config_stats) {
-		assert_u64_eq(npurge0, npurge1, "Unexpected purging occurred");
+		expect_u64_eq(npurge0, npurge1, "Unexpected purging occurred");
 	}
 
 	nstime_monotonic = nstime_monotonic_orig;
@@ -545,16 +545,16 @@ TEST_BEGIN(test_decay_now) {
 	test_skip_if(check_background_thread_enabled());
 
 	unsigned arena_ind = do_arena_create(0, 0);
-	assert_zu_eq(get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages");
-	assert_zu_eq(get_arena_pmuzzy(arena_ind), 0, "Unexpected muzzy pages");
+	expect_zu_eq(get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages");
+	expect_zu_eq(get_arena_pmuzzy(arena_ind), 0, "Unexpected muzzy pages");
 	size_t sizes[] = {16, PAGE<<2, HUGEPAGE<<2};
 	/* Verify that dirty/muzzy pages never linger after deallocation. */
 	for (unsigned i = 0; i < sizeof(sizes)/sizeof(size_t); i++) {
 		size_t size = sizes[i];
 		generate_dirty(arena_ind, size);
-		assert_zu_eq(get_arena_pdirty(arena_ind), 0,
+		expect_zu_eq(get_arena_pdirty(arena_ind), 0,
 		    "Unexpected dirty pages");
-		assert_zu_eq(get_arena_pmuzzy(arena_ind), 0,
+		expect_zu_eq(get_arena_pmuzzy(arena_ind), 0,
 		    "Unexpected muzzy pages");
 	}
 	do_arena_destroy(arena_ind);
@@ -566,8 +566,8 @@ TEST_BEGIN(test_decay_never) {
 
 	unsigned arena_ind = do_arena_create(-1, -1);
 	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
-	assert_zu_eq(get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages");
-	assert_zu_eq(get_arena_pmuzzy(arena_ind), 0, "Unexpected muzzy pages");
+	expect_zu_eq(get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages");
+	expect_zu_eq(get_arena_pmuzzy(arena_ind), 0, "Unexpected muzzy pages");
 	size_t sizes[] = {16, PAGE<<2, HUGEPAGE<<2};
 	void *ptrs[sizeof(sizes)/sizeof(size_t)];
 	for (unsigned i = 0; i < sizeof(sizes)/sizeof(size_t); i++) {
@@ -576,15 +576,15 @@ TEST_BEGIN(test_decay_never) {
 	/* Verify that each deallocation generates additional dirty pages. */
 	size_t pdirty_prev = get_arena_pdirty(arena_ind);
 	size_t pmuzzy_prev = get_arena_pmuzzy(arena_ind);
-	assert_zu_eq(pdirty_prev, 0, "Unexpected dirty pages");
-	assert_zu_eq(pmuzzy_prev, 0, "Unexpected muzzy pages");
+	expect_zu_eq(pdirty_prev, 0, "Unexpected dirty pages");
+	expect_zu_eq(pmuzzy_prev, 0, "Unexpected muzzy pages");
 	for (unsigned i = 0; i < sizeof(sizes)/sizeof(size_t); i++) {
 		dallocx(ptrs[i], flags);
 		size_t pdirty = get_arena_pdirty(arena_ind);
 		size_t pmuzzy = get_arena_pmuzzy(arena_ind);
-		assert_zu_gt(pdirty + (size_t)get_arena_dirty_purged(arena_ind),
+		expect_zu_gt(pdirty + (size_t)get_arena_dirty_purged(arena_ind),
 		    pdirty_prev, "Expected dirty pages to increase.");
-		assert_zu_eq(pmuzzy, 0, "Unexpected muzzy pages");
+		expect_zu_eq(pmuzzy, 0, "Unexpected muzzy pages");
 		pdirty_prev = pdirty;
 	}
 	do_arena_destroy(arena_ind);
diff --git a/test/unit/div.c b/test/unit/div.c
index b47f10b2..29aea665 100644
--- a/test/unit/div.c
+++ b/test/unit/div.c
@@ -14,7 +14,7 @@ TEST_BEGIN(test_div_exhaustive) {
 		    dividend += divisor) {
 			size_t quotient = div_compute(
 			    &div_info, dividend);
-			assert_zu_eq(dividend, quotient * divisor,
+			expect_zu_eq(dividend, quotient * divisor,
 			    "With divisor = %zu, dividend = %zu, "
 			    "got quotient %zu", divisor, dividend, quotient);
 		}
diff --git a/test/unit/emitter.c b/test/unit/emitter.c
index 712c9e10..ef8f9ff5 100644
--- a/test/unit/emitter.c
+++ b/test/unit/emitter.c
@@ -58,14 +58,14 @@ forwarding_cb(void *buf_descriptor_v, const char *str) {
 
 	size_t written = malloc_snprintf(buf_descriptor->buf,
 	    buf_descriptor->len, "%s", str);
-	assert_zu_eq(written, strlen(str), "Buffer overflow!");
+	expect_zu_eq(written, strlen(str), "Buffer overflow!");
 	buf_descriptor->buf += written;
 	buf_descriptor->len -= written;
-	assert_zu_gt(buf_descriptor->len, 0, "Buffer out of space!");
+	expect_zu_gt(buf_descriptor->len, 0, "Buffer out of space!");
 }
 
 static void
-assert_emit_output(void (*emit_fn)(emitter_t *),
+expect_emit_output(void (*emit_fn)(emitter_t *),
     const char *expected_json_output,
     const char *expected_json_compact_output,
     const char *expected_table_output) {
@@ -80,7 +80,7 @@ assert_emit_output(void (*emit_fn)(emitter_t *),
 	emitter_init(&emitter, emitter_output_json, &forwarding_cb,
 	    &buf_descriptor);
 	(*emit_fn)(&emitter);
-	assert_str_eq(expected_json_output, buf, "json output failure");
+	expect_str_eq(expected_json_output, buf, "json output failure");
 
 	buf_descriptor.buf = buf;
 	buf_descriptor.len = MALLOC_PRINTF_BUFSIZE;
@@ -89,7 +89,7 @@ assert_emit_output(void (*emit_fn)(emitter_t *),
 	emitter_init(&emitter, emitter_output_json_compact, &forwarding_cb,
 	    &buf_descriptor);
 	(*emit_fn)(&emitter);
-	assert_str_eq(expected_json_compact_output, buf,
+	expect_str_eq(expected_json_compact_output, buf,
 	    "compact json output failure");
 
 	buf_descriptor.buf = buf;
@@ -99,7 +99,7 @@ assert_emit_output(void (*emit_fn)(emitter_t *),
 	emitter_init(&emitter, emitter_output_table, &forwarding_cb,
 	    &buf_descriptor);
 	(*emit_fn)(&emitter);
-	assert_str_eq(expected_table_output, buf, "table output failure");
+	expect_str_eq(expected_table_output, buf, "table output failure");
 }
 
 static void
@@ -505,7 +505,7 @@ static const char *table_row_table =
 
 #define GENERATE_TEST(feature)					\
 TEST_BEGIN(test_##feature) {					\
-	assert_emit_output(emit_##feature, feature##_json,	\
+	expect_emit_output(emit_##feature, feature##_json,	\
 	    feature##_json_compact, feature##_table);		\
 }								\
 TEST_END
diff --git a/test/unit/extent_quantize.c b/test/unit/extent_quantize.c
index 64b3baa7..27a4a7ea 100644
--- a/test/unit/extent_quantize.c
+++ b/test/unit/extent_quantize.c
@@ -12,21 +12,21 @@ TEST_BEGIN(test_small_extent_size) {
 	 */
 
 	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.nbins", (void *)&nbins, &sz, NULL, 0), 0,
+	expect_d_eq(mallctl("arenas.nbins", (void *)&nbins, &sz, NULL, 0), 0,
 	    "Unexpected mallctl failure");
 
-	assert_d_eq(mallctlnametomib("arenas.bin.0.slab_size", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("arenas.bin.0.slab_size", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib failure");
 	for (i = 0; i < nbins; i++) {
 		mib[2] = i;
 		sz = sizeof(size_t);
-		assert_d_eq(mallctlbymib(mib, miblen, (void *)&extent_size, &sz,
+		expect_d_eq(mallctlbymib(mib, miblen, (void *)&extent_size, &sz,
 		    NULL, 0), 0, "Unexpected mallctlbymib failure");
-		assert_zu_eq(extent_size,
+		expect_zu_eq(extent_size,
 		    sz_psz_quantize_floor(extent_size),
 		    "Small extent quantization should be a no-op "
 		    "(extent_size=%zu)", extent_size);
-		assert_zu_eq(extent_size,
+		expect_zu_eq(extent_size,
 		    sz_psz_quantize_ceil(extent_size),
 		    "Small extent quantization should be a no-op "
 		    "(extent_size=%zu)", extent_size);
@@ -47,42 +47,42 @@ TEST_BEGIN(test_large_extent_size) {
 	 */
 
 	sz = sizeof(bool);
-	assert_d_eq(mallctl("config.cache_oblivious", (void *)&cache_oblivious,
+	expect_d_eq(mallctl("config.cache_oblivious", (void *)&cache_oblivious,
 	    &sz, NULL, 0), 0, "Unexpected mallctl failure");
 
 	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.nlextents", (void *)&nlextents, &sz, NULL,
+	expect_d_eq(mallctl("arenas.nlextents", (void *)&nlextents, &sz, NULL,
 	    0), 0, "Unexpected mallctl failure");
 
-	assert_d_eq(mallctlnametomib("arenas.lextent.0.size", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("arenas.lextent.0.size", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib failure");
 	for (i = 0; i < nlextents; i++) {
 		size_t lextent_size, extent_size, floor, ceil;
 
 		mib[2] = i;
 		sz = sizeof(size_t);
-		assert_d_eq(mallctlbymib(mib, miblen, (void *)&lextent_size,
+		expect_d_eq(mallctlbymib(mib, miblen, (void *)&lextent_size,
 		    &sz, NULL, 0), 0, "Unexpected mallctlbymib failure");
 		extent_size = cache_oblivious ? lextent_size + PAGE :
 		    lextent_size;
 		floor = sz_psz_quantize_floor(extent_size);
 		ceil = sz_psz_quantize_ceil(extent_size);
 
-		assert_zu_eq(extent_size, floor,
+		expect_zu_eq(extent_size, floor,
 		    "Extent quantization should be a no-op for precise size "
 		    "(lextent_size=%zu, extent_size=%zu)", lextent_size,
 		    extent_size);
-		assert_zu_eq(extent_size, ceil,
+		expect_zu_eq(extent_size, ceil,
 		    "Extent quantization should be a no-op for precise size "
 		    "(lextent_size=%zu, extent_size=%zu)", lextent_size,
 		    extent_size);
 
 		if (i > 0) {
-			assert_zu_eq(extent_size_prev,
+			expect_zu_eq(extent_size_prev,
 			    sz_psz_quantize_floor(extent_size - PAGE),
 			    "Floor should be a precise size");
 			if (extent_size_prev < ceil_prev) {
-				assert_zu_eq(ceil_prev, extent_size,
+				expect_zu_eq(ceil_prev, extent_size,
 				    "Ceiling should be a precise size "
 				    "(extent_size_prev=%zu, ceil_prev=%zu, "
 				    "extent_size=%zu)", extent_size_prev,
@@ -112,17 +112,17 @@ TEST_BEGIN(test_monotonic) {
 		floor = sz_psz_quantize_floor(extent_size);
 		ceil = sz_psz_quantize_ceil(extent_size);
 
-		assert_zu_le(floor, extent_size,
+		expect_zu_le(floor, extent_size,
 		    "Floor should be <= (floor=%zu, extent_size=%zu, ceil=%zu)",
 		    floor, extent_size, ceil);
-		assert_zu_ge(ceil, extent_size,
+		expect_zu_ge(ceil, extent_size,
 		    "Ceiling should be >= (floor=%zu, extent_size=%zu, "
 		    "ceil=%zu)", floor, extent_size, ceil);
 
-		assert_zu_le(floor_prev, floor, "Floor should be monotonic "
+		expect_zu_le(floor_prev, floor, "Floor should be monotonic "
 		    "(floor_prev=%zu, floor=%zu, extent_size=%zu, ceil=%zu)",
 		    floor_prev, floor, extent_size, ceil);
-		assert_zu_le(ceil_prev, ceil, "Ceiling should be monotonic "
+		expect_zu_le(ceil_prev, ceil, "Ceiling should be monotonic "
 		    "(floor=%zu, extent_size=%zu, ceil_prev=%zu, ceil=%zu)",
 		    floor, extent_size, ceil_prev, ceil);
 
diff --git a/test/unit/fork.c b/test/unit/fork.c
index b1690750..4137423f 100644
--- a/test/unit/fork.c
+++ b/test/unit/fork.c
@@ -36,25 +36,25 @@ TEST_BEGIN(test_fork) {
 	/* Set up a manually managed arena for test. */
 	unsigned arena_ind;
 	size_t sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
 
 	/* Migrate to the new arena. */
 	unsigned old_arena_ind;
 	sz = sizeof(old_arena_ind);
-	assert_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
+	expect_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
 	    (void *)&arena_ind, sizeof(arena_ind)), 0,
 	    "Unexpected mallctl() failure");
 
 	p = malloc(1);
-	assert_ptr_not_null(p, "Unexpected malloc() failure");
+	expect_ptr_not_null(p, "Unexpected malloc() failure");
 
 	pid = fork();
 
 	free(p);
 
 	p = malloc(64);
-	assert_ptr_not_null(p, "Unexpected malloc() failure");
+	expect_ptr_not_null(p, "Unexpected malloc() failure");
 	free(p);
 
 	if (pid == -1) {
diff --git a/test/unit/hash.c b/test/unit/hash.c
index 7cc034f8..49f08238 100644
--- a/test/unit/hash.c
+++ b/test/unit/hash.c
@@ -131,7 +131,7 @@ hash_variant_verify_key(hash_variant_t variant, uint8_t *key) {
 	default: not_reached();
 	}
 
-	assert_u32_eq(computed, expected,
+	expect_u32_eq(computed, expected,
 	    "Hash mismatch for %s(): expected %#x but got %#x",
 	    hash_variant_string(variant), expected, computed);
 }
diff --git a/test/unit/hook.c b/test/unit/hook.c
index 36dcb89d..16a6f1b0 100644
--- a/test/unit/hook.c
+++ b/test/unit/hook.c
@@ -70,10 +70,10 @@ set_args_raw(uintptr_t *args_raw, int nargs) {
 }
 
 static void
-assert_args_raw(uintptr_t *args_raw_expected, int nargs) {
+expect_args_raw(uintptr_t *args_raw_expected, int nargs) {
 	int cmp = memcmp(args_raw_expected, arg_args_raw,
 	    sizeof(uintptr_t) * nargs);
-	assert_d_eq(cmp, 0, "Raw args mismatch");
+	expect_d_eq(cmp, 0, "Raw args mismatch");
 }
 
 static void
@@ -132,34 +132,34 @@ TEST_BEGIN(test_hooks_basic) {
 	reset_args();
 	hook_invoke_alloc(hook_alloc_posix_memalign, (void *)222, 333,
 	    args_raw);
-	assert_ptr_eq(arg_extra, (void *)111, "Passed wrong user pointer");
-	assert_d_eq((int)hook_alloc_posix_memalign, arg_type,
+	expect_ptr_eq(arg_extra, (void *)111, "Passed wrong user pointer");
+	expect_d_eq((int)hook_alloc_posix_memalign, arg_type,
 	    "Passed wrong alloc type");
-	assert_ptr_eq((void *)222, arg_result, "Passed wrong result address");
-	assert_u64_eq(333, arg_result_raw, "Passed wrong result");
-	assert_args_raw(args_raw, 3);
+	expect_ptr_eq((void *)222, arg_result, "Passed wrong result address");
+	expect_u64_eq(333, arg_result_raw, "Passed wrong result");
+	expect_args_raw(args_raw, 3);
 
 	/* Dalloc */
 	reset_args();
 	hook_invoke_dalloc(hook_dalloc_sdallocx, (void *)222, args_raw);
-	assert_d_eq((int)hook_dalloc_sdallocx, arg_type,
+	expect_d_eq((int)hook_dalloc_sdallocx, arg_type,
 	    "Passed wrong dalloc type");
-	assert_ptr_eq((void *)111, arg_extra, "Passed wrong user pointer");
-	assert_ptr_eq((void *)222, arg_address, "Passed wrong address");
-	assert_args_raw(args_raw, 3);
+	expect_ptr_eq((void *)111, arg_extra, "Passed wrong user pointer");
+	expect_ptr_eq((void *)222, arg_address, "Passed wrong address");
+	expect_args_raw(args_raw, 3);
 
 	/* Expand */
 	reset_args();
 	hook_invoke_expand(hook_expand_xallocx, (void *)222, 333, 444, 555,
 	    args_raw);
-	assert_d_eq((int)hook_expand_xallocx, arg_type,
+	expect_d_eq((int)hook_expand_xallocx, arg_type,
 	    "Passed wrong expand type");
-	assert_ptr_eq((void *)111, arg_extra, "Passed wrong user pointer");
-	assert_ptr_eq((void *)222, arg_address, "Passed wrong address");
-	assert_zu_eq(333, arg_old_usize, "Passed wrong old usize");
-	assert_zu_eq(444, arg_new_usize, "Passed wrong new usize");
-	assert_zu_eq(555, arg_result_raw, "Passed wrong result");
-	assert_args_raw(args_raw, 4);
+	expect_ptr_eq((void *)111, arg_extra, "Passed wrong user pointer");
+	expect_ptr_eq((void *)222, arg_address, "Passed wrong address");
+	expect_zu_eq(333, arg_old_usize, "Passed wrong old usize");
+	expect_zu_eq(444, arg_new_usize, "Passed wrong new usize");
+	expect_zu_eq(555, arg_result_raw, "Passed wrong result");
+	expect_args_raw(args_raw, 4);
 
 	hook_remove(TSDN_NULL, handle);
 }
@@ -177,24 +177,24 @@ TEST_BEGIN(test_hooks_null) {
 	void *handle3 = hook_install(TSDN_NULL, &hooks3);
 	void *handle4 = hook_install(TSDN_NULL, &hooks4);
 
-	assert_ptr_ne(handle1, NULL, "Hook installation failed");
-	assert_ptr_ne(handle2, NULL, "Hook installation failed");
-	assert_ptr_ne(handle3, NULL, "Hook installation failed");
-	assert_ptr_ne(handle4, NULL, "Hook installation failed");
+	expect_ptr_ne(handle1, NULL, "Hook installation failed");
+	expect_ptr_ne(handle2, NULL, "Hook installation failed");
+	expect_ptr_ne(handle3, NULL, "Hook installation failed");
+	expect_ptr_ne(handle4, NULL, "Hook installation failed");
 
 	uintptr_t args_raw[4] = {10, 20, 30, 40};
 
 	call_count = 0;
 	hook_invoke_alloc(hook_alloc_malloc, NULL, 0, args_raw);
-	assert_d_eq(call_count, 1, "Called wrong number of times");
+	expect_d_eq(call_count, 1, "Called wrong number of times");
 
 	call_count = 0;
 	hook_invoke_dalloc(hook_dalloc_free, NULL, args_raw);
-	assert_d_eq(call_count, 1, "Called wrong number of times");
+	expect_d_eq(call_count, 1, "Called wrong number of times");
 
 	call_count = 0;
 	hook_invoke_expand(hook_expand_realloc, NULL, 0, 0, 0, args_raw);
-	assert_d_eq(call_count, 1, "Called wrong number of times");
+	expect_d_eq(call_count, 1, "Called wrong number of times");
 
 	hook_remove(TSDN_NULL, handle1);
 	hook_remove(TSDN_NULL, handle2);
@@ -206,16 +206,16 @@ TEST_END
 TEST_BEGIN(test_hooks_remove) {
 	hooks_t hooks = {&test_alloc_hook, NULL, NULL, NULL};
 	void *handle = hook_install(TSDN_NULL, &hooks);
-	assert_ptr_ne(handle, NULL, "Hook installation failed");
+	expect_ptr_ne(handle, NULL, "Hook installation failed");
 	call_count = 0;
 	uintptr_t args_raw[4] = {10, 20, 30, 40};
 	hook_invoke_alloc(hook_alloc_malloc, NULL, 0, args_raw);
-	assert_d_eq(call_count, 1, "Hook not invoked");
+	expect_d_eq(call_count, 1, "Hook not invoked");
 
 	call_count = 0;
 	hook_remove(TSDN_NULL, handle);
 	hook_invoke_alloc(hook_alloc_malloc, NULL, 0, NULL);
-	assert_d_eq(call_count, 0, "Hook invoked after removal");
+	expect_d_eq(call_count, 0, "Hook invoked after removal");
 
 }
 TEST_END
@@ -224,7 +224,7 @@ TEST_BEGIN(test_hooks_alloc_simple) {
 	/* "Simple" in the sense that we're not in a realloc variant. */
 	hooks_t hooks = {&test_alloc_hook, NULL, NULL, (void *)123};
 	void *handle = hook_install(TSDN_NULL, &hooks);
-	assert_ptr_ne(handle, NULL, "Hook installation failed");
+	expect_ptr_ne(handle, NULL, "Hook installation failed");
 
 	/* Stop malloc from being optimized away. */
 	volatile int err;
@@ -233,69 +233,69 @@ TEST_BEGIN(test_hooks_alloc_simple) {
 	/* malloc */
 	reset();
 	ptr = malloc(1);
-	assert_d_eq(call_count, 1, "Hook not called");
-	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	assert_d_eq(arg_type, (int)hook_alloc_malloc, "Wrong hook type");
-	assert_ptr_eq(ptr, arg_result, "Wrong result");
-	assert_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
+	expect_d_eq(call_count, 1, "Hook not called");
+	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	expect_d_eq(arg_type, (int)hook_alloc_malloc, "Wrong hook type");
+	expect_ptr_eq(ptr, arg_result, "Wrong result");
+	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
 	    "Wrong raw result");
-	assert_u64_eq((uintptr_t)1, arg_args_raw[0], "Wrong argument");
+	expect_u64_eq((uintptr_t)1, arg_args_raw[0], "Wrong argument");
 	free(ptr);
 
 	/* posix_memalign */
 	reset();
 	err = posix_memalign((void **)&ptr, 1024, 1);
-	assert_d_eq(call_count, 1, "Hook not called");
-	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	assert_d_eq(arg_type, (int)hook_alloc_posix_memalign,
+	expect_d_eq(call_count, 1, "Hook not called");
+	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	expect_d_eq(arg_type, (int)hook_alloc_posix_memalign,
 	    "Wrong hook type");
-	assert_ptr_eq(ptr, arg_result, "Wrong result");
-	assert_u64_eq((uintptr_t)err, (uintptr_t)arg_result_raw,
+	expect_ptr_eq(ptr, arg_result, "Wrong result");
+	expect_u64_eq((uintptr_t)err, (uintptr_t)arg_result_raw,
 	    "Wrong raw result");
-	assert_u64_eq((uintptr_t)&ptr, arg_args_raw[0], "Wrong argument");
-	assert_u64_eq((uintptr_t)1024, arg_args_raw[1], "Wrong argument");
-	assert_u64_eq((uintptr_t)1, arg_args_raw[2], "Wrong argument");
+	expect_u64_eq((uintptr_t)&ptr, arg_args_raw[0], "Wrong argument");
+	expect_u64_eq((uintptr_t)1024, arg_args_raw[1], "Wrong argument");
+	expect_u64_eq((uintptr_t)1, arg_args_raw[2], "Wrong argument");
 	free(ptr);
 
 	/* aligned_alloc */
 	reset();
 	ptr = aligned_alloc(1024, 1);
-	assert_d_eq(call_count, 1, "Hook not called");
-	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	assert_d_eq(arg_type, (int)hook_alloc_aligned_alloc,
+	expect_d_eq(call_count, 1, "Hook not called");
+	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	expect_d_eq(arg_type, (int)hook_alloc_aligned_alloc,
 	    "Wrong hook type");
-	assert_ptr_eq(ptr, arg_result, "Wrong result");
-	assert_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
+	expect_ptr_eq(ptr, arg_result, "Wrong result");
+	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
 	    "Wrong raw result");
-	assert_u64_eq((uintptr_t)1024, arg_args_raw[0], "Wrong argument");
-	assert_u64_eq((uintptr_t)1, arg_args_raw[1], "Wrong argument");
+	expect_u64_eq((uintptr_t)1024, arg_args_raw[0], "Wrong argument");
+	expect_u64_eq((uintptr_t)1, arg_args_raw[1], "Wrong argument");
 	free(ptr);
 
 	/* calloc */
 	reset();
 	ptr = calloc(11, 13);
-	assert_d_eq(call_count, 1, "Hook not called");
-	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	assert_d_eq(arg_type, (int)hook_alloc_calloc, "Wrong hook type");
-	assert_ptr_eq(ptr, arg_result, "Wrong result");
-	assert_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
+	expect_d_eq(call_count, 1, "Hook not called");
+	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	expect_d_eq(arg_type, (int)hook_alloc_calloc, "Wrong hook type");
+	expect_ptr_eq(ptr, arg_result, "Wrong result");
+	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
 	    "Wrong raw result");
-	assert_u64_eq((uintptr_t)11, arg_args_raw[0], "Wrong argument");
-	assert_u64_eq((uintptr_t)13, arg_args_raw[1], "Wrong argument");
+	expect_u64_eq((uintptr_t)11, arg_args_raw[0], "Wrong argument");
+	expect_u64_eq((uintptr_t)13, arg_args_raw[1], "Wrong argument");
 	free(ptr);
 
 	/* memalign */
 #ifdef JEMALLOC_OVERRIDE_MEMALIGN
 	reset();
 	ptr = memalign(1024, 1);
-	assert_d_eq(call_count, 1, "Hook not called");
-	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	assert_d_eq(arg_type, (int)hook_alloc_memalign, "Wrong hook type");
-	assert_ptr_eq(ptr, arg_result, "Wrong result");
-	assert_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
+	expect_d_eq(call_count, 1, "Hook not called");
+	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	expect_d_eq(arg_type, (int)hook_alloc_memalign, "Wrong hook type");
+	expect_ptr_eq(ptr, arg_result, "Wrong result");
+	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
 	    "Wrong raw result");
-	assert_u64_eq((uintptr_t)1024, arg_args_raw[0], "Wrong argument");
-	assert_u64_eq((uintptr_t)1, arg_args_raw[1], "Wrong argument");
+	expect_u64_eq((uintptr_t)1024, arg_args_raw[0], "Wrong argument");
+	expect_u64_eq((uintptr_t)1, arg_args_raw[1], "Wrong argument");
 	free(ptr);
 #endif /* JEMALLOC_OVERRIDE_MEMALIGN */
 
@@ -303,27 +303,27 @@ TEST_BEGIN(test_hooks_alloc_simple) {
 #ifdef JEMALLOC_OVERRIDE_VALLOC
 	reset();
 	ptr = valloc(1);
-	assert_d_eq(call_count, 1, "Hook not called");
-	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	assert_d_eq(arg_type, (int)hook_alloc_valloc, "Wrong hook type");
-	assert_ptr_eq(ptr, arg_result, "Wrong result");
-	assert_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
+	expect_d_eq(call_count, 1, "Hook not called");
+	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	expect_d_eq(arg_type, (int)hook_alloc_valloc, "Wrong hook type");
+	expect_ptr_eq(ptr, arg_result, "Wrong result");
+	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
 	    "Wrong raw result");
-	assert_u64_eq((uintptr_t)1, arg_args_raw[0], "Wrong argument");
+	expect_u64_eq((uintptr_t)1, arg_args_raw[0], "Wrong argument");
 	free(ptr);
 #endif /* JEMALLOC_OVERRIDE_VALLOC */
 
 	/* mallocx */
 	reset();
 	ptr = mallocx(1, MALLOCX_LG_ALIGN(10));
-	assert_d_eq(call_count, 1, "Hook not called");
-	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	assert_d_eq(arg_type, (int)hook_alloc_mallocx, "Wrong hook type");
-	assert_ptr_eq(ptr, arg_result, "Wrong result");
-	assert_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
+	expect_d_eq(call_count, 1, "Hook not called");
+	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	expect_d_eq(arg_type, (int)hook_alloc_mallocx, "Wrong hook type");
+	expect_ptr_eq(ptr, arg_result, "Wrong result");
+	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
 	    "Wrong raw result");
-	assert_u64_eq((uintptr_t)1, arg_args_raw[0], "Wrong argument");
-	assert_u64_eq((uintptr_t)MALLOCX_LG_ALIGN(10), arg_args_raw[1],
+	expect_u64_eq((uintptr_t)1, arg_args_raw[0], "Wrong argument");
+	expect_u64_eq((uintptr_t)MALLOCX_LG_ALIGN(10), arg_args_raw[1],
 	    "Wrong flags");
 	free(ptr);
 
@@ -335,7 +335,7 @@ TEST_BEGIN(test_hooks_dalloc_simple) {
 	/* "Simple" in the sense that we're not in a realloc variant. */
 	hooks_t hooks = {NULL, &test_dalloc_hook, NULL, (void *)123};
 	void *handle = hook_install(TSDN_NULL, &hooks);
-	assert_ptr_ne(handle, NULL, "Hook installation failed");
+	expect_ptr_ne(handle, NULL, "Hook installation failed");
 
 	void *volatile ptr;
 
@@ -343,35 +343,35 @@ TEST_BEGIN(test_hooks_dalloc_simple) {
 	reset();
 	ptr = malloc(1);
 	free(ptr);
-	assert_d_eq(call_count, 1, "Hook not called");
-	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	assert_d_eq(arg_type, (int)hook_dalloc_free, "Wrong hook type");
-	assert_ptr_eq(ptr, arg_address, "Wrong pointer freed");
-	assert_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong raw arg");
+	expect_d_eq(call_count, 1, "Hook not called");
+	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	expect_d_eq(arg_type, (int)hook_dalloc_free, "Wrong hook type");
+	expect_ptr_eq(ptr, arg_address, "Wrong pointer freed");
+	expect_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong raw arg");
 
 	/* dallocx() */
 	reset();
 	ptr = malloc(1);
 	dallocx(ptr, MALLOCX_TCACHE_NONE);
-	assert_d_eq(call_count, 1, "Hook not called");
-	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	assert_d_eq(arg_type, (int)hook_dalloc_dallocx, "Wrong hook type");
-	assert_ptr_eq(ptr, arg_address, "Wrong pointer freed");
-	assert_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong raw arg");
-	assert_u64_eq((uintptr_t)MALLOCX_TCACHE_NONE, arg_args_raw[1],
+	expect_d_eq(call_count, 1, "Hook not called");
+	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	expect_d_eq(arg_type, (int)hook_dalloc_dallocx, "Wrong hook type");
+	expect_ptr_eq(ptr, arg_address, "Wrong pointer freed");
+	expect_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong raw arg");
+	expect_u64_eq((uintptr_t)MALLOCX_TCACHE_NONE, arg_args_raw[1],
 	    "Wrong raw arg");
 
 	/* sdallocx() */
 	reset();
 	ptr = malloc(1);
 	sdallocx(ptr, 1, MALLOCX_TCACHE_NONE);
-	assert_d_eq(call_count, 1, "Hook not called");
-	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	assert_d_eq(arg_type, (int)hook_dalloc_sdallocx, "Wrong hook type");
-	assert_ptr_eq(ptr, arg_address, "Wrong pointer freed");
-	assert_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong raw arg");
-	assert_u64_eq((uintptr_t)1, arg_args_raw[1], "Wrong raw arg");
-	assert_u64_eq((uintptr_t)MALLOCX_TCACHE_NONE, arg_args_raw[2],
+	expect_d_eq(call_count, 1, "Hook not called");
+	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	expect_d_eq(arg_type, (int)hook_dalloc_sdallocx, "Wrong hook type");
+	expect_ptr_eq(ptr, arg_address, "Wrong pointer freed");
+	expect_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong raw arg");
+	expect_u64_eq((uintptr_t)1, arg_args_raw[1], "Wrong raw arg");
+	expect_u64_eq((uintptr_t)MALLOCX_TCACHE_NONE, arg_args_raw[2],
 	    "Wrong raw arg");
 
 	hook_remove(TSDN_NULL, handle);
@@ -382,7 +382,7 @@ TEST_BEGIN(test_hooks_expand_simple) {
 	/* "Simple" in the sense that we're not in a realloc variant. */
 	hooks_t hooks = {NULL, NULL, &test_expand_hook, (void *)123};
 	void *handle = hook_install(TSDN_NULL, &hooks);
-	assert_ptr_ne(handle, NULL, "Hook installation failed");
+	expect_ptr_ne(handle, NULL, "Hook installation failed");
 
 	void *volatile ptr;
 
@@ -390,17 +390,17 @@ TEST_BEGIN(test_hooks_expand_simple) {
 	reset();
 	ptr = malloc(1);
 	size_t new_usize = xallocx(ptr, 100, 200, MALLOCX_TCACHE_NONE);
-	assert_d_eq(call_count, 1, "Hook not called");
-	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	assert_d_eq(arg_type, (int)hook_expand_xallocx, "Wrong hook type");
-	assert_ptr_eq(ptr, arg_address, "Wrong pointer expanded");
-	assert_u64_eq(arg_old_usize, nallocx(1, 0), "Wrong old usize");
-	assert_u64_eq(arg_new_usize, sallocx(ptr, 0), "Wrong new usize");
-	assert_u64_eq(new_usize, arg_result_raw, "Wrong result");
-	assert_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong arg");
-	assert_u64_eq(100, arg_args_raw[1], "Wrong arg");
-	assert_u64_eq(200, arg_args_raw[2], "Wrong arg");
-	assert_u64_eq(MALLOCX_TCACHE_NONE, arg_args_raw[3], "Wrong arg");
+	expect_d_eq(call_count, 1, "Hook not called");
+	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	expect_d_eq(arg_type, (int)hook_expand_xallocx, "Wrong hook type");
+	expect_ptr_eq(ptr, arg_address, "Wrong pointer expanded");
+	expect_u64_eq(arg_old_usize, nallocx(1, 0), "Wrong old usize");
+	expect_u64_eq(arg_new_usize, sallocx(ptr, 0), "Wrong new usize");
+	expect_u64_eq(new_usize, arg_result_raw, "Wrong result");
+	expect_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong arg");
+	expect_u64_eq(100, arg_args_raw[1], "Wrong arg");
+	expect_u64_eq(200, arg_args_raw[2], "Wrong arg");
+	expect_u64_eq(MALLOCX_TCACHE_NONE, arg_args_raw[3], "Wrong arg");
 
 	hook_remove(TSDN_NULL, handle);
 }
@@ -410,21 +410,21 @@ TEST_BEGIN(test_hooks_realloc_as_malloc_or_free) {
 	hooks_t hooks = {&test_alloc_hook, &test_dalloc_hook,
 		&test_expand_hook, (void *)123};
 	void *handle = hook_install(TSDN_NULL, &hooks);
-	assert_ptr_ne(handle, NULL, "Hook installation failed");
+	expect_ptr_ne(handle, NULL, "Hook installation failed");
 
 	void *volatile ptr;
 
 	/* realloc(NULL, size) as malloc */
 	reset();
 	ptr = realloc(NULL, 1);
-	assert_d_eq(call_count, 1, "Hook not called");
-	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	assert_d_eq(arg_type, (int)hook_alloc_realloc, "Wrong hook type");
-	assert_ptr_eq(ptr, arg_result, "Wrong result");
-	assert_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
+	expect_d_eq(call_count, 1, "Hook not called");
+	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	expect_d_eq(arg_type, (int)hook_alloc_realloc, "Wrong hook type");
+	expect_ptr_eq(ptr, arg_result, "Wrong result");
+	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
 	    "Wrong raw result");
-	assert_u64_eq((uintptr_t)NULL, arg_args_raw[0], "Wrong argument");
-	assert_u64_eq((uintptr_t)1, arg_args_raw[1], "Wrong argument");
+	expect_u64_eq((uintptr_t)NULL, arg_args_raw[0], "Wrong argument");
+	expect_u64_eq((uintptr_t)1, arg_args_raw[1], "Wrong argument");
 	free(ptr);
 
 	/* realloc(ptr, 0) as free */
@@ -432,29 +432,29 @@ TEST_BEGIN(test_hooks_realloc_as_malloc_or_free) {
 		ptr = malloc(1);
 		reset();
 		realloc(ptr, 0);
-		assert_d_eq(call_count, 1, "Hook not called");
-		assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-		assert_d_eq(arg_type, (int)hook_dalloc_realloc,
+		expect_d_eq(call_count, 1, "Hook not called");
+		expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+		expect_d_eq(arg_type, (int)hook_dalloc_realloc,
 		    "Wrong hook type");
-		assert_ptr_eq(ptr, arg_address,
+		expect_ptr_eq(ptr, arg_address,
 		    "Wrong pointer freed");
-		assert_u64_eq((uintptr_t)ptr, arg_args_raw[0],
+		expect_u64_eq((uintptr_t)ptr, arg_args_raw[0],
 		    "Wrong raw arg");
-		assert_u64_eq((uintptr_t)0, arg_args_raw[1],
+		expect_u64_eq((uintptr_t)0, arg_args_raw[1],
 		    "Wrong raw arg");
 	}
 
 	/* realloc(NULL, 0) as malloc(0) */
 	reset();
 	ptr = realloc(NULL, 0);
-	assert_d_eq(call_count, 1, "Hook not called");
-	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	assert_d_eq(arg_type, (int)hook_alloc_realloc, "Wrong hook type");
-	assert_ptr_eq(ptr, arg_result, "Wrong result");
-	assert_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
+	expect_d_eq(call_count, 1, "Hook not called");
+	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	expect_d_eq(arg_type, (int)hook_alloc_realloc, "Wrong hook type");
+	expect_ptr_eq(ptr, arg_result, "Wrong result");
+	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
 	    "Wrong raw result");
-	assert_u64_eq((uintptr_t)NULL, arg_args_raw[0], "Wrong argument");
-	assert_u64_eq((uintptr_t)0, arg_args_raw[1], "Wrong argument");
+	expect_u64_eq((uintptr_t)NULL, arg_args_raw[0], "Wrong argument");
+	expect_u64_eq((uintptr_t)0, arg_args_raw[1], "Wrong argument");
 	free(ptr);
 
 	hook_remove(TSDN_NULL, handle);
@@ -467,7 +467,7 @@ do_realloc_test(void *(*ralloc)(void *, size_t, int), int flags,
 	hooks_t hooks = {&test_alloc_hook, &test_dalloc_hook,
 		&test_expand_hook, (void *)123};
 	void *handle = hook_install(TSDN_NULL, &hooks);
-	assert_ptr_ne(handle, NULL, "Hook installation failed");
+	expect_ptr_ne(handle, NULL, "Hook installation failed");
 
 	void *volatile ptr;
 	void *volatile ptr2;
@@ -476,16 +476,16 @@ do_realloc_test(void *(*ralloc)(void *, size_t, int), int flags,
 	ptr = malloc(129);
 	reset();
 	ptr2 = ralloc(ptr, 130, flags);
-	assert_ptr_eq(ptr, ptr2, "Small realloc moved");
+	expect_ptr_eq(ptr, ptr2, "Small realloc moved");
 
-	assert_d_eq(call_count, 1, "Hook not called");
-	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	assert_d_eq(arg_type, expand_type, "Wrong hook type");
-	assert_ptr_eq(ptr, arg_address, "Wrong address");
-	assert_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
+	expect_d_eq(call_count, 1, "Hook not called");
+	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	expect_d_eq(arg_type, expand_type, "Wrong hook type");
+	expect_ptr_eq(ptr, arg_address, "Wrong address");
+	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
 	    "Wrong raw result");
-	assert_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong argument");
-	assert_u64_eq((uintptr_t)130, arg_args_raw[1], "Wrong argument");
+	expect_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong argument");
+	expect_u64_eq((uintptr_t)130, arg_args_raw[1], "Wrong argument");
 	free(ptr);
 
 	/*
@@ -499,19 +499,19 @@ do_realloc_test(void *(*ralloc)(void *, size_t, int), int flags,
 	ptr = ralloc(ptr2, 2 * 1024 * 1024, flags);
 	/* ptr is the new address, ptr2 is the old address. */
 	if (ptr == ptr2) {
-		assert_d_eq(call_count, 1, "Hook not called");
-		assert_d_eq(arg_type, expand_type, "Wrong hook type");
+		expect_d_eq(call_count, 1, "Hook not called");
+		expect_d_eq(arg_type, expand_type, "Wrong hook type");
 	} else {
-		assert_d_eq(call_count, 2, "Wrong hooks called");
-		assert_ptr_eq(ptr, arg_result, "Wrong address");
-		assert_d_eq(arg_type, dalloc_type, "Wrong hook type");
+		expect_d_eq(call_count, 2, "Wrong hooks called");
+		expect_ptr_eq(ptr, arg_result, "Wrong address");
+		expect_d_eq(arg_type, dalloc_type, "Wrong hook type");
 	}
-	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	assert_ptr_eq(ptr2, arg_address, "Wrong address");
-	assert_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
+	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	expect_ptr_eq(ptr2, arg_address, "Wrong address");
+	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
 	    "Wrong raw result");
-	assert_u64_eq((uintptr_t)ptr2, arg_args_raw[0], "Wrong argument");
-	assert_u64_eq((uintptr_t)2 * 1024 * 1024, arg_args_raw[1],
+	expect_u64_eq((uintptr_t)ptr2, arg_args_raw[0], "Wrong argument");
+	expect_u64_eq((uintptr_t)2 * 1024 * 1024, arg_args_raw[1],
 	    "Wrong argument");
 	free(ptr);
 
@@ -519,34 +519,34 @@ do_realloc_test(void *(*ralloc)(void *, size_t, int), int flags,
 	ptr = malloc(8);
 	reset();
 	ptr2 = ralloc(ptr, 128, flags);
-	assert_ptr_ne(ptr, ptr2, "Small realloc didn't move");
+	expect_ptr_ne(ptr, ptr2, "Small realloc didn't move");
 
-	assert_d_eq(call_count, 2, "Hook not called");
-	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	assert_d_eq(arg_type, dalloc_type, "Wrong hook type");
-	assert_ptr_eq(ptr, arg_address, "Wrong address");
-	assert_ptr_eq(ptr2, arg_result, "Wrong address");
-	assert_u64_eq((uintptr_t)ptr2, (uintptr_t)arg_result_raw,
+	expect_d_eq(call_count, 2, "Hook not called");
+	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	expect_d_eq(arg_type, dalloc_type, "Wrong hook type");
+	expect_ptr_eq(ptr, arg_address, "Wrong address");
+	expect_ptr_eq(ptr2, arg_result, "Wrong address");
+	expect_u64_eq((uintptr_t)ptr2, (uintptr_t)arg_result_raw,
 	    "Wrong raw result");
-	assert_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong argument");
-	assert_u64_eq((uintptr_t)128, arg_args_raw[1], "Wrong argument");
+	expect_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong argument");
+	expect_u64_eq((uintptr_t)128, arg_args_raw[1], "Wrong argument");
 	free(ptr2);
 
 	/* Realloc with move, large. */
 	ptr = malloc(1);
 	reset();
 	ptr2 = ralloc(ptr, 2 * 1024 * 1024, flags);
-	assert_ptr_ne(ptr, ptr2, "Large realloc didn't move");
+	expect_ptr_ne(ptr, ptr2, "Large realloc didn't move");
 
-	assert_d_eq(call_count, 2, "Hook not called");
-	assert_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	assert_d_eq(arg_type, dalloc_type, "Wrong hook type");
-	assert_ptr_eq(ptr, arg_address, "Wrong address");
-	assert_ptr_eq(ptr2, arg_result, "Wrong address");
-	assert_u64_eq((uintptr_t)ptr2, (uintptr_t)arg_result_raw,
+	expect_d_eq(call_count, 2, "Hook not called");
+	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	expect_d_eq(arg_type, dalloc_type, "Wrong hook type");
+	expect_ptr_eq(ptr, arg_address, "Wrong address");
+	expect_ptr_eq(ptr2, arg_result, "Wrong address");
+	expect_u64_eq((uintptr_t)ptr2, (uintptr_t)arg_result_raw,
 	    "Wrong raw result");
-	assert_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong argument");
-	assert_u64_eq((uintptr_t)2 * 1024 * 1024, arg_args_raw[1],
+	expect_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong argument");
+	expect_u64_eq((uintptr_t)2 * 1024 * 1024, arg_args_raw[1],
 	    "Wrong argument");
 	free(ptr2);
 
diff --git a/test/unit/huge.c b/test/unit/huge.c
index ab72cf00..ec64e500 100644
--- a/test/unit/huge.c
+++ b/test/unit/huge.c
@@ -11,37 +11,37 @@ TEST_BEGIN(huge_bind_thread) {
 	size_t sz = sizeof(unsigned);
 
 	/* Bind to a manual arena. */
-	assert_d_eq(mallctl("arenas.create", &arena1, &sz, NULL, 0), 0,
+	expect_d_eq(mallctl("arenas.create", &arena1, &sz, NULL, 0), 0,
 	    "Failed to create arena");
-	assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena1,
+	expect_d_eq(mallctl("thread.arena", NULL, NULL, &arena1,
 	    sizeof(arena1)), 0, "Fail to bind thread");
 
 	void *ptr = mallocx(HUGE_SZ, 0);
-	assert_ptr_not_null(ptr, "Fail to allocate huge size");
-	assert_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
+	expect_ptr_not_null(ptr, "Fail to allocate huge size");
+	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
 	    sizeof(ptr)), 0, "Unexpected mallctl() failure");
-	assert_u_eq(arena1, arena2, "Wrong arena used after binding");
+	expect_u_eq(arena1, arena2, "Wrong arena used after binding");
 	dallocx(ptr, 0);
 
 	/* Switch back to arena 0. */
 	test_skip_if(have_percpu_arena &&
 	    PERCPU_ARENA_ENABLED(opt_percpu_arena));
 	arena2 = 0;
-	assert_d_eq(mallctl("thread.arena", NULL, NULL, &arena2,
+	expect_d_eq(mallctl("thread.arena", NULL, NULL, &arena2,
 	    sizeof(arena2)), 0, "Fail to bind thread");
 	ptr = mallocx(SMALL_SZ, MALLOCX_TCACHE_NONE);
-	assert_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
+	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
 	    sizeof(ptr)), 0, "Unexpected mallctl() failure");
-	assert_u_eq(arena2, 0, "Wrong arena used after binding");
+	expect_u_eq(arena2, 0, "Wrong arena used after binding");
 	dallocx(ptr, MALLOCX_TCACHE_NONE);
 
 	/* Then huge allocation should use the huge arena. */
 	ptr = mallocx(HUGE_SZ, 0);
-	assert_ptr_not_null(ptr, "Fail to allocate huge size");
-	assert_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
+	expect_ptr_not_null(ptr, "Fail to allocate huge size");
+	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
 	    sizeof(ptr)), 0, "Unexpected mallctl() failure");
-	assert_u_ne(arena2, 0, "Wrong arena used after binding");
-	assert_u_ne(arena1, arena2, "Wrong arena used after binding");
+	expect_u_ne(arena2, 0, "Wrong arena used after binding");
+	expect_u_ne(arena1, arena2, "Wrong arena used after binding");
 	dallocx(ptr, 0);
 }
 TEST_END
@@ -50,22 +50,22 @@ TEST_BEGIN(huge_mallocx) {
 	unsigned arena1, arena2;
 	size_t sz = sizeof(unsigned);
 
-	assert_d_eq(mallctl("arenas.create", &arena1, &sz, NULL, 0), 0,
+	expect_d_eq(mallctl("arenas.create", &arena1, &sz, NULL, 0), 0,
 	    "Failed to create arena");
 	void *huge = mallocx(HUGE_SZ, MALLOCX_ARENA(arena1));
-	assert_ptr_not_null(huge, "Fail to allocate huge size");
-	assert_d_eq(mallctl("arenas.lookup", &arena2, &sz, &huge,
+	expect_ptr_not_null(huge, "Fail to allocate huge size");
+	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &huge,
 	    sizeof(huge)), 0, "Unexpected mallctl() failure");
-	assert_u_eq(arena1, arena2, "Wrong arena used for mallocx");
+	expect_u_eq(arena1, arena2, "Wrong arena used for mallocx");
 	dallocx(huge, MALLOCX_ARENA(arena1));
 
 	void *huge2 = mallocx(HUGE_SZ, 0);
-	assert_ptr_not_null(huge, "Fail to allocate huge size");
-	assert_d_eq(mallctl("arenas.lookup", &arena2, &sz, &huge2,
+	expect_ptr_not_null(huge, "Fail to allocate huge size");
+	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &huge2,
 	    sizeof(huge2)), 0, "Unexpected mallctl() failure");
-	assert_u_ne(arena1, arena2,
+	expect_u_ne(arena1, arena2,
 	    "Huge allocation should not come from the manual arena.");
-	assert_u_ne(arena2, 0,
+	expect_u_ne(arena2, 0,
 	    "Huge allocation should not come from the arena 0.");
 	dallocx(huge2, 0);
 }
@@ -75,25 +75,25 @@ TEST_BEGIN(huge_allocation) {
 	unsigned arena1, arena2;
 
 	void *ptr = mallocx(HUGE_SZ, 0);
-	assert_ptr_not_null(ptr, "Fail to allocate huge size");
+	expect_ptr_not_null(ptr, "Fail to allocate huge size");
 	size_t sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.lookup", &arena1, &sz, &ptr, sizeof(ptr)),
+	expect_d_eq(mallctl("arenas.lookup", &arena1, &sz, &ptr, sizeof(ptr)),
 	    0, "Unexpected mallctl() failure");
-	assert_u_gt(arena1, 0, "Huge allocation should not come from arena 0");
+	expect_u_gt(arena1, 0, "Huge allocation should not come from arena 0");
 	dallocx(ptr, 0);
 
 	ptr = mallocx(HUGE_SZ >> 1, 0);
-	assert_ptr_not_null(ptr, "Fail to allocate half huge size");
-	assert_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
+	expect_ptr_not_null(ptr, "Fail to allocate half huge size");
+	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
 	    sizeof(ptr)), 0, "Unexpected mallctl() failure");
-	assert_u_ne(arena1, arena2, "Wrong arena used for half huge");
+	expect_u_ne(arena1, arena2, "Wrong arena used for half huge");
 	dallocx(ptr, 0);
 
 	ptr = mallocx(SMALL_SZ, MALLOCX_TCACHE_NONE);
-	assert_ptr_not_null(ptr, "Fail to allocate small size");
-	assert_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
+	expect_ptr_not_null(ptr, "Fail to allocate small size");
+	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
 	    sizeof(ptr)), 0, "Unexpected mallctl() failure");
-	assert_u_ne(arena1, arena2,
+	expect_u_ne(arena1, arena2,
 	    "Huge and small should be from different arenas");
 	dallocx(ptr, 0);
 }
diff --git a/test/unit/inspect.c b/test/unit/inspect.c
index 4de0b043..41ef6c29 100644
--- a/test/unit/inspect.c
+++ b/test/unit/inspect.c
@@ -1,11 +1,11 @@
 #include "test/jemalloc_test.h"
 
 #define TEST_UTIL_EINVAL(node, a, b, c, d, why_inval) do {		\
-	assert_d_eq(mallctl("experimental.utilization." node,		\
+	expect_d_eq(mallctl("experimental.utilization." node,		\
 	    a, b, c, d), EINVAL, "Should fail when " why_inval);	\
-	assert_zu_eq(out_sz, out_sz_ref,				\
+	expect_zu_eq(out_sz, out_sz_ref,				\
 	    "Output size touched when given invalid arguments");	\
-	assert_d_eq(memcmp(out, out_ref, out_sz_ref), 0,		\
+	expect_d_eq(memcmp(out, out_ref, out_sz_ref), 0,		\
 	    "Output content touched when given invalid arguments");	\
 } while (0)
 
@@ -15,11 +15,11 @@
 	TEST_UTIL_EINVAL("batch_query", a, b, c, d, why_inval)
 
 #define TEST_UTIL_VALID(node) do {					\
-        assert_d_eq(mallctl("experimental.utilization." node,		\
+        expect_d_eq(mallctl("experimental.utilization." node,		\
 	    out, &out_sz, in, in_sz), 0,				\
 	    "Should return 0 on correct arguments");			\
-        assert_zu_eq(out_sz, out_sz_ref, "incorrect output size");	\
-	assert_d_ne(memcmp(out, out_ref, out_sz_ref), 0,		\
+        expect_zu_eq(out_sz, out_sz_ref, "incorrect output size");	\
+	expect_d_ne(memcmp(out, out_ref, out_sz_ref), 0,		\
 	    "Output content should be changed");			\
 } while (0)
 
@@ -43,11 +43,11 @@ TEST_BEGIN(test_query) {
 		void *out_ref = mallocx(out_sz, 0);
 		size_t out_sz_ref = out_sz;
 
-		assert_ptr_not_null(p,
+		expect_ptr_not_null(p,
 		    "test pointer allocation failed");
-		assert_ptr_not_null(out,
+		expect_ptr_not_null(out,
 		    "test output allocation failed");
-		assert_ptr_not_null(out_ref,
+		expect_ptr_not_null(out_ref,
 		    "test reference output allocation failed");
 
 #define SLABCUR_READ(out) (*(void **)out)
@@ -83,60 +83,60 @@ TEST_BEGIN(test_query) {
 
 		/* Examine output for valid call */
 		TEST_UTIL_VALID("query");
-		assert_zu_le(sz, SIZE_READ(out),
+		expect_zu_le(sz, SIZE_READ(out),
 		    "Extent size should be at least allocation size");
-		assert_zu_eq(SIZE_READ(out) & (PAGE - 1), 0,
+		expect_zu_eq(SIZE_READ(out) & (PAGE - 1), 0,
 		    "Extent size should be a multiple of page size");
 		if (sz <= SC_SMALL_MAXCLASS) {
-			assert_zu_le(NFREE_READ(out), NREGS_READ(out),
+			expect_zu_le(NFREE_READ(out), NREGS_READ(out),
 			    "Extent free count exceeded region count");
-			assert_zu_le(NREGS_READ(out), SIZE_READ(out),
+			expect_zu_le(NREGS_READ(out), SIZE_READ(out),
 			    "Extent region count exceeded size");
-			assert_zu_ne(NREGS_READ(out), 0,
+			expect_zu_ne(NREGS_READ(out), 0,
 			    "Extent region count must be positive");
-			assert_true(NFREE_READ(out) == 0 || (SLABCUR_READ(out)
+			expect_true(NFREE_READ(out) == 0 || (SLABCUR_READ(out)
 			    != NULL && SLABCUR_READ(out) <= p),
 			    "Allocation should follow first fit principle");
 			if (config_stats) {
-				assert_zu_le(BIN_NFREE_READ(out),
+				expect_zu_le(BIN_NFREE_READ(out),
 				    BIN_NREGS_READ(out),
 				    "Bin free count exceeded region count");
-				assert_zu_ne(BIN_NREGS_READ(out), 0,
+				expect_zu_ne(BIN_NREGS_READ(out), 0,
 				    "Bin region count must be positive");
-				assert_zu_le(NFREE_READ(out),
+				expect_zu_le(NFREE_READ(out),
 				    BIN_NFREE_READ(out),
 				    "Extent free count exceeded bin free count");
-				assert_zu_le(NREGS_READ(out),
+				expect_zu_le(NREGS_READ(out),
 				    BIN_NREGS_READ(out),
 				    "Extent region count exceeded "
 				    "bin region count");
-				assert_zu_eq(BIN_NREGS_READ(out)
+				expect_zu_eq(BIN_NREGS_READ(out)
 				    % NREGS_READ(out), 0,
 				    "Bin region count isn't a multiple of "
 				    "extent region count");
-				assert_zu_le(
+				expect_zu_le(
 				    BIN_NFREE_READ(out) - NFREE_READ(out),
 				    BIN_NREGS_READ(out) - NREGS_READ(out),
 				    "Free count in other extents in the bin "
 				    "exceeded region count in other extents "
 				    "in the bin");
-				assert_zu_le(NREGS_READ(out) - NFREE_READ(out),
+				expect_zu_le(NREGS_READ(out) - NFREE_READ(out),
 				    BIN_NREGS_READ(out) - BIN_NFREE_READ(out),
 				    "Extent utilized count exceeded "
 				    "bin utilized count");
 			}
 		} else {
-			assert_zu_eq(NFREE_READ(out), 0,
+			expect_zu_eq(NFREE_READ(out), 0,
 			    "Extent free count should be zero");
-			assert_zu_eq(NREGS_READ(out), 1,
+			expect_zu_eq(NREGS_READ(out), 1,
 			    "Extent region count should be one");
-			assert_ptr_null(SLABCUR_READ(out),
+			expect_ptr_null(SLABCUR_READ(out),
 			    "Current slab must be null for large size classes");
 			if (config_stats) {
-				assert_zu_eq(BIN_NFREE_READ(out), 0,
+				expect_zu_eq(BIN_NFREE_READ(out), 0,
 				    "Bin free count must be zero for "
 				    "large sizes");
-				assert_zu_eq(BIN_NREGS_READ(out), 0,
+				expect_zu_eq(BIN_NREGS_READ(out), 0,
 				    "Bin region count must be zero for "
 				    "large sizes");
 			}
@@ -174,8 +174,8 @@ TEST_BEGIN(test_batch) {
 		size_t out_ref[] = {-1, -1, -1, -1, -1, -1};
 		size_t out_sz_ref = out_sz;
 
-		assert_ptr_not_null(p, "test pointer allocation failed");
-		assert_ptr_not_null(q, "test pointer allocation failed");
+		expect_ptr_not_null(p, "test pointer allocation failed");
+		expect_ptr_not_null(q, "test pointer allocation failed");
 
 		/* Test invalid argument(s) errors */
 		TEST_UTIL_BATCH_EINVAL(NULL, &out_sz, in, in_sz,
@@ -201,7 +201,7 @@ TEST_BEGIN(test_batch) {
 
 	/* Examine output for valid calls */
 #define TEST_EQUAL_REF(i, message) \
-	assert_d_eq(memcmp(out + (i) * 3, out_ref + (i) * 3, 3), 0, message)
+	expect_d_eq(memcmp(out + (i) * 3, out_ref + (i) * 3, 3), 0, message)
 
 #define NFREE_READ(out, i) out[(i) * 3]
 #define NREGS_READ(out, i) out[(i) * 3 + 1]
@@ -210,21 +210,21 @@ TEST_BEGIN(test_batch) {
 		out_sz_ref = out_sz /= 2;
 		in_sz /= 2;
 		TEST_UTIL_BATCH_VALID;
-		assert_zu_le(sz, SIZE_READ(out, 0),
+		expect_zu_le(sz, SIZE_READ(out, 0),
 		    "Extent size should be at least allocation size");
-		assert_zu_eq(SIZE_READ(out, 0) & (PAGE - 1), 0,
+		expect_zu_eq(SIZE_READ(out, 0) & (PAGE - 1), 0,
 		    "Extent size should be a multiple of page size");
 		if (sz <= SC_SMALL_MAXCLASS) {
-			assert_zu_le(NFREE_READ(out, 0), NREGS_READ(out, 0),
+			expect_zu_le(NFREE_READ(out, 0), NREGS_READ(out, 0),
 			    "Extent free count exceeded region count");
-			assert_zu_le(NREGS_READ(out, 0), SIZE_READ(out, 0),
+			expect_zu_le(NREGS_READ(out, 0), SIZE_READ(out, 0),
 			    "Extent region count exceeded size");
-			assert_zu_ne(NREGS_READ(out, 0), 0,
+			expect_zu_ne(NREGS_READ(out, 0), 0,
 			    "Extent region count must be positive");
 		} else {
-			assert_zu_eq(NFREE_READ(out, 0), 0,
+			expect_zu_eq(NFREE_READ(out, 0), 0,
 			    "Extent free count should be zero");
-			assert_zu_eq(NREGS_READ(out, 0), 1,
+			expect_zu_eq(NREGS_READ(out, 0), 1,
 			    "Extent region count should be one");
 		}
 		TEST_EQUAL_REF(1,
@@ -236,15 +236,15 @@ TEST_BEGIN(test_batch) {
 		TEST_UTIL_BATCH_VALID;
 		TEST_EQUAL_REF(0, "Statistics should be stable across calls");
 		if (sz <= SC_SMALL_MAXCLASS) {
-			assert_zu_le(NFREE_READ(out, 1), NREGS_READ(out, 1),
+			expect_zu_le(NFREE_READ(out, 1), NREGS_READ(out, 1),
 			    "Extent free count exceeded region count");
 		} else {
-			assert_zu_eq(NFREE_READ(out, 0), 0,
+			expect_zu_eq(NFREE_READ(out, 0), 0,
 			    "Extent free count should be zero");
 		}
-		assert_zu_eq(NREGS_READ(out, 0), NREGS_READ(out, 1),
+		expect_zu_eq(NREGS_READ(out, 0), NREGS_READ(out, 1),
 		    "Extent region count should be same for same region size");
-		assert_zu_eq(SIZE_READ(out, 0), SIZE_READ(out, 1),
+		expect_zu_eq(SIZE_READ(out, 0), SIZE_READ(out, 1),
 		    "Extent size should be same for same region size");
 
 #undef SIZE_READ
@@ -261,7 +261,7 @@ TEST_END
 
 int
 main(void) {
-	assert_zu_lt(SC_SMALL_MAXCLASS, TEST_MAX_SIZE,
+	expect_zu_lt(SC_SMALL_MAXCLASS, TEST_MAX_SIZE,
 	    "Test case cannot cover large classes");
 	return test(test_query, test_batch);
 }
diff --git a/test/unit/junk.c b/test/unit/junk.c
index 57e3ad43..772a0b4e 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -20,7 +20,7 @@ arena_dalloc_junk_small_intercept(void *ptr, const bin_info_t *bin_info) {
 
 	arena_dalloc_junk_small_orig(ptr, bin_info);
 	for (i = 0; i < bin_info->reg_size; i++) {
-		assert_u_eq(((uint8_t *)ptr)[i], JEMALLOC_FREE_JUNK,
+		expect_u_eq(((uint8_t *)ptr)[i], JEMALLOC_FREE_JUNK,
 		    "Missing junk fill for byte %zu/%zu of deallocated region",
 		    i, bin_info->reg_size);
 	}
@@ -35,7 +35,7 @@ large_dalloc_junk_intercept(void *ptr, size_t usize) {
 
 	large_dalloc_junk_orig(ptr, usize);
 	for (i = 0; i < usize; i++) {
-		assert_u_eq(((uint8_t *)ptr)[i], JEMALLOC_FREE_JUNK,
+		expect_u_eq(((uint8_t *)ptr)[i], JEMALLOC_FREE_JUNK,
 		    "Missing junk fill for byte %zu/%zu of deallocated region",
 		    i, usize);
 	}
@@ -68,22 +68,22 @@ test_junk(size_t sz_min, size_t sz_max) {
 
 	sz_prev = 0;
 	s = (uint8_t *)mallocx(sz_min, 0);
-	assert_ptr_not_null((void *)s, "Unexpected mallocx() failure");
+	expect_ptr_not_null((void *)s, "Unexpected mallocx() failure");
 
 	for (sz = sallocx(s, 0); sz <= sz_max;
 	    sz_prev = sz, sz = sallocx(s, 0)) {
 		if (sz_prev > 0) {
-			assert_u_eq(s[0], 'a',
+			expect_u_eq(s[0], 'a',
 			    "Previously allocated byte %zu/%zu is corrupted",
 			    ZU(0), sz_prev);
-			assert_u_eq(s[sz_prev-1], 'a',
+			expect_u_eq(s[sz_prev-1], 'a',
 			    "Previously allocated byte %zu/%zu is corrupted",
 			    sz_prev-1, sz_prev);
 		}
 
 		for (i = sz_prev; i < sz; i++) {
 			if (opt_junk_alloc) {
-				assert_u_eq(s[i], JEMALLOC_ALLOC_JUNK,
+				expect_u_eq(s[i], JEMALLOC_ALLOC_JUNK,
 				    "Newly allocated byte %zu/%zu isn't "
 				    "junk-filled", i, sz);
 			}
@@ -94,14 +94,14 @@ test_junk(size_t sz_min, size_t sz_max) {
 			uint8_t *t;
 			watch_junking(s);
 			t = (uint8_t *)rallocx(s, sz+1, 0);
-			assert_ptr_not_null((void *)t,
+			expect_ptr_not_null((void *)t,
 			    "Unexpected rallocx() failure");
-			assert_zu_ge(sallocx(t, 0), sz+1,
+			expect_zu_ge(sallocx(t, 0), sz+1,
 			    "Unexpectedly small rallocx() result");
 			if (!background_thread_enabled()) {
-				assert_ptr_ne(s, t,
+				expect_ptr_ne(s, t,
 				    "Unexpected in-place rallocx()");
-				assert_true(!opt_junk_free || saw_junking,
+				expect_true(!opt_junk_free || saw_junking,
 				    "Expected region of size %zu to be "
 				    "junk-filled", sz);
 			}
@@ -111,7 +111,7 @@ test_junk(size_t sz_min, size_t sz_max) {
 
 	watch_junking(s);
 	dallocx(s, 0);
-	assert_true(!opt_junk_free || saw_junking,
+	expect_true(!opt_junk_free || saw_junking,
 	    "Expected region of size %zu to be junk-filled", sz);
 
 	if (opt_junk_free) {
diff --git a/test/unit/log.c b/test/unit/log.c
index 10f45bc1..02e6a6a6 100644
--- a/test/unit/log.c
+++ b/test/unit/log.c
@@ -30,7 +30,7 @@ expect_no_logging(const char *names) {
 			count++;
 		log_do_end(log_l2_a)
 	}
-	assert_d_eq(count, 0, "Disabled logging not ignored!");
+	expect_d_eq(count, 0, "Disabled logging not ignored!");
 }
 
 TEST_BEGIN(test_log_disabled) {
@@ -61,7 +61,7 @@ TEST_BEGIN(test_log_enabled_direct) {
 			count++;
 		log_do_end(log_l1)
 	}
-	assert_d_eq(count, 10, "Mis-logged!");
+	expect_d_eq(count, 10, "Mis-logged!");
 
 	count = 0;
 	update_log_var_names("l1.a");
@@ -70,7 +70,7 @@ TEST_BEGIN(test_log_enabled_direct) {
 			count++;
 		log_do_end(log_l1_a)
 	}
-	assert_d_eq(count, 10, "Mis-logged!");
+	expect_d_eq(count, 10, "Mis-logged!");
 
 	count = 0;
 	update_log_var_names("l1.a|abc|l2|def");
@@ -83,7 +83,7 @@ TEST_BEGIN(test_log_enabled_direct) {
 			count++;
 		log_do_end(log_l2)
 	}
-	assert_d_eq(count, 20, "Mis-logged!");
+	expect_d_eq(count, 20, "Mis-logged!");
 }
 TEST_END
 
@@ -133,7 +133,7 @@ TEST_BEGIN(test_log_enabled_indirect) {
 		log_do_end(log_l2_b_b)
 	}
 
-	assert_d_eq(count, 40, "Mis-logged!");
+	expect_d_eq(count, 40, "Mis-logged!");
 }
 TEST_END
 
@@ -155,7 +155,7 @@ TEST_BEGIN(test_log_enabled_global) {
 		    count++;
 		log_do_end(log_l2_a_a)
 	}
-	assert_d_eq(count, 20, "Mis-logged!");
+	expect_d_eq(count, 20, "Mis-logged!");
 }
 TEST_END
 
@@ -171,7 +171,7 @@ TEST_BEGIN(test_logs_if_no_init) {
 			count++;
 		log_do_end(l)
 	}
-	assert_d_eq(count, 0, "Logging shouldn't happen if not initialized.");
+	expect_d_eq(count, 0, "Logging shouldn't happen if not initialized.");
 }
 TEST_END
 
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 14c169b7..e38723f6 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -7,25 +7,25 @@ TEST_BEGIN(test_mallctl_errors) {
 	uint64_t epoch;
 	size_t sz;
 
-	assert_d_eq(mallctl("no_such_name", NULL, NULL, NULL, 0), ENOENT,
+	expect_d_eq(mallctl("no_such_name", NULL, NULL, NULL, 0), ENOENT,
 	    "mallctl() should return ENOENT for non-existent names");
 
-	assert_d_eq(mallctl("version", NULL, NULL, "0.0.0", strlen("0.0.0")),
+	expect_d_eq(mallctl("version", NULL, NULL, "0.0.0", strlen("0.0.0")),
 	    EPERM, "mallctl() should return EPERM on attempt to write "
 	    "read-only value");
 
-	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
 	    sizeof(epoch)-1), EINVAL,
 	    "mallctl() should return EINVAL for input size mismatch");
-	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
 	    sizeof(epoch)+1), EINVAL,
 	    "mallctl() should return EINVAL for input size mismatch");
 
 	sz = sizeof(epoch)-1;
-	assert_d_eq(mallctl("epoch", (void *)&epoch, &sz, NULL, 0), EINVAL,
+	expect_d_eq(mallctl("epoch", (void *)&epoch, &sz, NULL, 0), EINVAL,
 	    "mallctl() should return EINVAL for output size mismatch");
 	sz = sizeof(epoch)+1;
-	assert_d_eq(mallctl("epoch", (void *)&epoch, &sz, NULL, 0), EINVAL,
+	expect_d_eq(mallctl("epoch", (void *)&epoch, &sz, NULL, 0), EINVAL,
 	    "mallctl() should return EINVAL for output size mismatch");
 }
 TEST_END
@@ -35,7 +35,7 @@ TEST_BEGIN(test_mallctlnametomib_errors) {
 	size_t miblen;
 
 	miblen = sizeof(mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib("no_such_name", mib, &miblen), ENOENT,
+	expect_d_eq(mallctlnametomib("no_such_name", mib, &miblen), ENOENT,
 	    "mallctlnametomib() should return ENOENT for non-existent names");
 }
 TEST_END
@@ -47,30 +47,30 @@ TEST_BEGIN(test_mallctlbymib_errors) {
 	size_t miblen;
 
 	miblen = sizeof(mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib("version", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("version", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, "0.0.0",
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, "0.0.0",
 	    strlen("0.0.0")), EPERM, "mallctl() should return EPERM on "
 	    "attempt to write read-only value");
 
 	miblen = sizeof(mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib("epoch", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("epoch", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&epoch,
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&epoch,
 	    sizeof(epoch)-1), EINVAL,
 	    "mallctlbymib() should return EINVAL for input size mismatch");
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&epoch,
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&epoch,
 	    sizeof(epoch)+1), EINVAL,
 	    "mallctlbymib() should return EINVAL for input size mismatch");
 
 	sz = sizeof(epoch)-1;
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&epoch, &sz, NULL, 0),
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&epoch, &sz, NULL, 0),
 	    EINVAL,
 	    "mallctlbymib() should return EINVAL for output size mismatch");
 	sz = sizeof(epoch)+1;
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&epoch, &sz, NULL, 0),
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&epoch, &sz, NULL, 0),
 	    EINVAL,
 	    "mallctlbymib() should return EINVAL for output size mismatch");
 }
@@ -81,25 +81,25 @@ TEST_BEGIN(test_mallctl_read_write) {
 	size_t sz = sizeof(old_epoch);
 
 	/* Blind. */
-	assert_d_eq(mallctl("epoch", NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctl("epoch", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
-	assert_zu_eq(sz, sizeof(old_epoch), "Unexpected output size");
+	expect_zu_eq(sz, sizeof(old_epoch), "Unexpected output size");
 
 	/* Read. */
-	assert_d_eq(mallctl("epoch", (void *)&old_epoch, &sz, NULL, 0), 0,
+	expect_d_eq(mallctl("epoch", (void *)&old_epoch, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
-	assert_zu_eq(sz, sizeof(old_epoch), "Unexpected output size");
+	expect_zu_eq(sz, sizeof(old_epoch), "Unexpected output size");
 
 	/* Write. */
-	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&new_epoch,
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&new_epoch,
 	    sizeof(new_epoch)), 0, "Unexpected mallctl() failure");
-	assert_zu_eq(sz, sizeof(old_epoch), "Unexpected output size");
+	expect_zu_eq(sz, sizeof(old_epoch), "Unexpected output size");
 
 	/* Read+write. */
-	assert_d_eq(mallctl("epoch", (void *)&old_epoch, &sz,
+	expect_d_eq(mallctl("epoch", (void *)&old_epoch, &sz,
 	    (void *)&new_epoch, sizeof(new_epoch)), 0,
 	    "Unexpected mallctl() failure");
-	assert_zu_eq(sz, sizeof(old_epoch), "Unexpected output size");
+	expect_zu_eq(sz, sizeof(old_epoch), "Unexpected output size");
 }
 TEST_END
 
@@ -109,10 +109,10 @@ TEST_BEGIN(test_mallctlnametomib_short_mib) {
 
 	miblen = 3;
 	mib[3] = 42;
-	assert_d_eq(mallctlnametomib("arenas.bin.0.nregs", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("arenas.bin.0.nregs", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
-	assert_zu_eq(miblen, 3, "Unexpected mib output length");
-	assert_zu_eq(mib[3], 42,
+	expect_zu_eq(miblen, 3, "Unexpected mib output length");
+	expect_zu_eq(mib[3], 42,
 	    "mallctlnametomib() wrote past the end of the input mib");
 }
 TEST_END
@@ -121,10 +121,10 @@ TEST_BEGIN(test_mallctl_config) {
 #define TEST_MALLCTL_CONFIG(config, t) do {				\
 	t oldval;							\
 	size_t sz = sizeof(oldval);					\
-	assert_d_eq(mallctl("config."#config, (void *)&oldval, &sz,	\
+	expect_d_eq(mallctl("config."#config, (void *)&oldval, &sz,	\
 	    NULL, 0), 0, "Unexpected mallctl() failure");		\
-	assert_b_eq(oldval, config_##config, "Incorrect config value");	\
-	assert_zu_eq(sz, sizeof(oldval), "Unexpected output size");	\
+	expect_b_eq(oldval, config_##config, "Incorrect config value");	\
+	expect_zu_eq(sz, sizeof(oldval), "Unexpected output size");	\
 } while (0)
 
 	TEST_MALLCTL_CONFIG(cache_oblivious, bool);
@@ -152,9 +152,9 @@ TEST_BEGIN(test_mallctl_opt) {
 	int expected = config_##config ? 0 : ENOENT;			\
 	int result = mallctl("opt."#opt, (void *)&oldval, &sz, NULL,	\
 	    0);								\
-	assert_d_eq(result, expected,					\
+	expect_d_eq(result, expected,					\
 	    "Unexpected mallctl() result for opt."#opt);		\
-	assert_zu_eq(sz, sizeof(oldval), "Unexpected output size");	\
+	expect_zu_eq(sz, sizeof(oldval), "Unexpected output size");	\
 } while (0)
 
 	TEST_MALLCTL_OPT(bool, abort, always);
@@ -203,18 +203,18 @@ TEST_BEGIN(test_manpage_example) {
 	size_t len, miblen;
 
 	len = sizeof(nbins);
-	assert_d_eq(mallctl("arenas.nbins", (void *)&nbins, &len, NULL, 0), 0,
+	expect_d_eq(mallctl("arenas.nbins", (void *)&nbins, &len, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
 	miblen = 4;
-	assert_d_eq(mallctlnametomib("arenas.bin.0.size", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("arenas.bin.0.size", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	for (i = 0; i < nbins; i++) {
 		size_t bin_size;
 
 		mib[2] = i;
 		len = sizeof(bin_size);
-		assert_d_eq(mallctlbymib(mib, miblen, (void *)&bin_size, &len,
+		expect_d_eq(mallctlbymib(mib, miblen, (void *)&bin_size, &len,
 		    NULL, 0), 0, "Unexpected mallctlbymib() failure");
 		/* Do something with bin_size... */
 	}
@@ -226,9 +226,9 @@ TEST_BEGIN(test_tcache_none) {
 
 	/* Allocate p and q. */
 	void *p0 = mallocx(42, 0);
-	assert_ptr_not_null(p0, "Unexpected mallocx() failure");
+	expect_ptr_not_null(p0, "Unexpected mallocx() failure");
 	void *q = mallocx(42, 0);
-	assert_ptr_not_null(q, "Unexpected mallocx() failure");
+	expect_ptr_not_null(q, "Unexpected mallocx() failure");
 
 	/* Deallocate p and q, but bypass the tcache for q. */
 	dallocx(p0, 0);
@@ -236,8 +236,8 @@ TEST_BEGIN(test_tcache_none) {
 
 	/* Make sure that tcache-based allocation returns p, not q. */
 	void *p1 = mallocx(42, 0);
-	assert_ptr_not_null(p1, "Unexpected mallocx() failure");
-	assert_ptr_eq(p0, p1, "Expected tcache to allocate cached region");
+	expect_ptr_not_null(p1, "Unexpected mallocx() failure");
+	expect_ptr_eq(p0, p1, "Expected tcache to allocate cached region");
 
 	/* Clean up. */
 	dallocx(p1, MALLOCX_TCACHE_NONE);
@@ -258,25 +258,25 @@ TEST_BEGIN(test_tcache) {
 	/* Create tcaches. */
 	for (i = 0; i < NTCACHES; i++) {
 		sz = sizeof(unsigned);
-		assert_d_eq(mallctl("tcache.create", (void *)&tis[i], &sz, NULL,
+		expect_d_eq(mallctl("tcache.create", (void *)&tis[i], &sz, NULL,
 		    0), 0, "Unexpected mallctl() failure, i=%u", i);
 	}
 
 	/* Exercise tcache ID recycling. */
 	for (i = 0; i < NTCACHES; i++) {
-		assert_d_eq(mallctl("tcache.destroy", NULL, NULL,
+		expect_d_eq(mallctl("tcache.destroy", NULL, NULL,
 		    (void *)&tis[i], sizeof(unsigned)), 0,
 		    "Unexpected mallctl() failure, i=%u", i);
 	}
 	for (i = 0; i < NTCACHES; i++) {
 		sz = sizeof(unsigned);
-		assert_d_eq(mallctl("tcache.create", (void *)&tis[i], &sz, NULL,
+		expect_d_eq(mallctl("tcache.create", (void *)&tis[i], &sz, NULL,
 		    0), 0, "Unexpected mallctl() failure, i=%u", i);
 	}
 
 	/* Flush empty tcaches. */
 	for (i = 0; i < NTCACHES; i++) {
-		assert_d_eq(mallctl("tcache.flush", NULL, NULL, (void *)&tis[i],
+		expect_d_eq(mallctl("tcache.flush", NULL, NULL, (void *)&tis[i],
 		    sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u",
 		    i);
 	}
@@ -284,12 +284,12 @@ TEST_BEGIN(test_tcache) {
 	/* Cache some allocations. */
 	for (i = 0; i < NTCACHES; i++) {
 		ps[i] = mallocx(psz, MALLOCX_TCACHE(tis[i]));
-		assert_ptr_not_null(ps[i], "Unexpected mallocx() failure, i=%u",
+		expect_ptr_not_null(ps[i], "Unexpected mallocx() failure, i=%u",
 		    i);
 		dallocx(ps[i], MALLOCX_TCACHE(tis[i]));
 
 		qs[i] = mallocx(qsz, MALLOCX_TCACHE(tis[i]));
-		assert_ptr_not_null(qs[i], "Unexpected mallocx() failure, i=%u",
+		expect_ptr_not_null(qs[i], "Unexpected mallocx() failure, i=%u",
 		    i);
 		dallocx(qs[i], MALLOCX_TCACHE(tis[i]));
 	}
@@ -298,9 +298,9 @@ TEST_BEGIN(test_tcache) {
 	for (i = 0; i < NTCACHES; i++) {
 		void *p0 = ps[i];
 		ps[i] = mallocx(psz, MALLOCX_TCACHE(tis[i]));
-		assert_ptr_not_null(ps[i], "Unexpected mallocx() failure, i=%u",
+		expect_ptr_not_null(ps[i], "Unexpected mallocx() failure, i=%u",
 		    i);
-		assert_ptr_eq(ps[i], p0,
+		expect_ptr_eq(ps[i], p0,
 		    "Expected mallocx() to allocate cached region, i=%u", i);
 	}
 
@@ -308,9 +308,9 @@ TEST_BEGIN(test_tcache) {
 	for (i = 0; i < NTCACHES; i++) {
 		void *q0 = qs[i];
 		qs[i] = rallocx(ps[i], qsz, MALLOCX_TCACHE(tis[i]));
-		assert_ptr_not_null(qs[i], "Unexpected rallocx() failure, i=%u",
+		expect_ptr_not_null(qs[i], "Unexpected rallocx() failure, i=%u",
 		    i);
-		assert_ptr_eq(qs[i], q0,
+		expect_ptr_eq(qs[i], q0,
 		    "Expected rallocx() to allocate cached region, i=%u", i);
 		/* Avoid undefined behavior in case of test failure. */
 		if (qs[i] == NULL) {
@@ -323,14 +323,14 @@ TEST_BEGIN(test_tcache) {
 
 	/* Flush some non-empty tcaches. */
 	for (i = 0; i < NTCACHES/2; i++) {
-		assert_d_eq(mallctl("tcache.flush", NULL, NULL, (void *)&tis[i],
+		expect_d_eq(mallctl("tcache.flush", NULL, NULL, (void *)&tis[i],
 		    sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u",
 		    i);
 	}
 
 	/* Destroy tcaches. */
 	for (i = 0; i < NTCACHES; i++) {
-		assert_d_eq(mallctl("tcache.destroy", NULL, NULL,
+		expect_d_eq(mallctl("tcache.destroy", NULL, NULL,
 		    (void *)&tis[i], sizeof(unsigned)), 0,
 		    "Unexpected mallctl() failure, i=%u", i);
 	}
@@ -342,32 +342,32 @@ TEST_BEGIN(test_thread_arena) {
 
 	const char *opa;
 	size_t sz = sizeof(opa);
-	assert_d_eq(mallctl("opt.percpu_arena", (void *)&opa, &sz, NULL, 0), 0,
+	expect_d_eq(mallctl("opt.percpu_arena", (void *)&opa, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
 	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
+	expect_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
 	if (opt_oversize_threshold != 0) {
 		narenas--;
 	}
-	assert_u_eq(narenas, opt_narenas, "Number of arenas incorrect");
+	expect_u_eq(narenas, opt_narenas, "Number of arenas incorrect");
 
 	if (strcmp(opa, "disabled") == 0) {
 		new_arena_ind = narenas - 1;
-		assert_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
+		expect_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
 		    (void *)&new_arena_ind, sizeof(unsigned)), 0,
 		    "Unexpected mallctl() failure");
 		new_arena_ind = 0;
-		assert_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
+		expect_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
 		    (void *)&new_arena_ind, sizeof(unsigned)), 0,
 		    "Unexpected mallctl() failure");
 	} else {
-		assert_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
+		expect_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
 		    NULL, 0), 0, "Unexpected mallctl() failure");
 		new_arena_ind = percpu_arena_ind_limit(opt_percpu_arena) - 1;
 		if (old_arena_ind != new_arena_ind) {
-			assert_d_eq(mallctl("thread.arena",
+			expect_d_eq(mallctl("thread.arena",
 			    (void *)&old_arena_ind, &sz, (void *)&new_arena_ind,
 			    sizeof(unsigned)), EPERM, "thread.arena ctl "
 			    "should not be allowed with percpu arena");
@@ -384,32 +384,32 @@ TEST_BEGIN(test_arena_i_initialized) {
 	bool initialized;
 
 	sz = sizeof(narenas);
-	assert_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
+	expect_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
 
-	assert_d_eq(mallctlnametomib("arena.0.initialized", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("arena.0.initialized", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	for (i = 0; i < narenas; i++) {
 		mib[1] = i;
 		sz = sizeof(initialized);
-		assert_d_eq(mallctlbymib(mib, miblen, &initialized, &sz, NULL,
+		expect_d_eq(mallctlbymib(mib, miblen, &initialized, &sz, NULL,
 		    0), 0, "Unexpected mallctl() failure");
 	}
 
 	mib[1] = MALLCTL_ARENAS_ALL;
 	sz = sizeof(initialized);
-	assert_d_eq(mallctlbymib(mib, miblen, &initialized, &sz, NULL, 0), 0,
+	expect_d_eq(mallctlbymib(mib, miblen, &initialized, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
-	assert_true(initialized,
+	expect_true(initialized,
 	    "Merged arena statistics should always be initialized");
 
 	/* Equivalent to the above but using mallctl() directly. */
 	sz = sizeof(initialized);
-	assert_d_eq(mallctl(
+	expect_d_eq(mallctl(
 	    "arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".initialized",
 	    (void *)&initialized, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
-	assert_true(initialized,
+	expect_true(initialized,
 	    "Merged arena statistics should always be initialized");
 }
 TEST_END
@@ -418,17 +418,17 @@ TEST_BEGIN(test_arena_i_dirty_decay_ms) {
 	ssize_t dirty_decay_ms, orig_dirty_decay_ms, prev_dirty_decay_ms;
 	size_t sz = sizeof(ssize_t);
 
-	assert_d_eq(mallctl("arena.0.dirty_decay_ms",
+	expect_d_eq(mallctl("arena.0.dirty_decay_ms",
 	    (void *)&orig_dirty_decay_ms, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
 	dirty_decay_ms = -2;
-	assert_d_eq(mallctl("arena.0.dirty_decay_ms", NULL, NULL,
+	expect_d_eq(mallctl("arena.0.dirty_decay_ms", NULL, NULL,
 	    (void *)&dirty_decay_ms, sizeof(ssize_t)), EFAULT,
 	    "Unexpected mallctl() success");
 
 	dirty_decay_ms = 0x7fffffff;
-	assert_d_eq(mallctl("arena.0.dirty_decay_ms", NULL, NULL,
+	expect_d_eq(mallctl("arena.0.dirty_decay_ms", NULL, NULL,
 	    (void *)&dirty_decay_ms, sizeof(ssize_t)), 0,
 	    "Unexpected mallctl() failure");
 
@@ -437,10 +437,10 @@ TEST_BEGIN(test_arena_i_dirty_decay_ms) {
 	    dirty_decay_ms++) {
 		ssize_t old_dirty_decay_ms;
 
-		assert_d_eq(mallctl("arena.0.dirty_decay_ms",
+		expect_d_eq(mallctl("arena.0.dirty_decay_ms",
 		    (void *)&old_dirty_decay_ms, &sz, (void *)&dirty_decay_ms,
 		    sizeof(ssize_t)), 0, "Unexpected mallctl() failure");
-		assert_zd_eq(old_dirty_decay_ms, prev_dirty_decay_ms,
+		expect_zd_eq(old_dirty_decay_ms, prev_dirty_decay_ms,
 		    "Unexpected old arena.0.dirty_decay_ms");
 	}
 }
@@ -450,17 +450,17 @@ TEST_BEGIN(test_arena_i_muzzy_decay_ms) {
 	ssize_t muzzy_decay_ms, orig_muzzy_decay_ms, prev_muzzy_decay_ms;
 	size_t sz = sizeof(ssize_t);
 
-	assert_d_eq(mallctl("arena.0.muzzy_decay_ms",
+	expect_d_eq(mallctl("arena.0.muzzy_decay_ms",
 	    (void *)&orig_muzzy_decay_ms, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
 	muzzy_decay_ms = -2;
-	assert_d_eq(mallctl("arena.0.muzzy_decay_ms", NULL, NULL,
+	expect_d_eq(mallctl("arena.0.muzzy_decay_ms", NULL, NULL,
 	    (void *)&muzzy_decay_ms, sizeof(ssize_t)), EFAULT,
 	    "Unexpected mallctl() success");
 
 	muzzy_decay_ms = 0x7fffffff;
-	assert_d_eq(mallctl("arena.0.muzzy_decay_ms", NULL, NULL,
+	expect_d_eq(mallctl("arena.0.muzzy_decay_ms", NULL, NULL,
 	    (void *)&muzzy_decay_ms, sizeof(ssize_t)), 0,
 	    "Unexpected mallctl() failure");
 
@@ -469,10 +469,10 @@ TEST_BEGIN(test_arena_i_muzzy_decay_ms) {
 	    muzzy_decay_ms++) {
 		ssize_t old_muzzy_decay_ms;
 
-		assert_d_eq(mallctl("arena.0.muzzy_decay_ms",
+		expect_d_eq(mallctl("arena.0.muzzy_decay_ms",
 		    (void *)&old_muzzy_decay_ms, &sz, (void *)&muzzy_decay_ms,
 		    sizeof(ssize_t)), 0, "Unexpected mallctl() failure");
-		assert_zd_eq(old_muzzy_decay_ms, prev_muzzy_decay_ms,
+		expect_zd_eq(old_muzzy_decay_ms, prev_muzzy_decay_ms,
 		    "Unexpected old arena.0.muzzy_decay_ms");
 	}
 }
@@ -484,19 +484,19 @@ TEST_BEGIN(test_arena_i_purge) {
 	size_t mib[3];
 	size_t miblen = 3;
 
-	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
-	assert_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
+	expect_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
-	assert_d_eq(mallctlnametomib("arena.0.purge", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("arena.0.purge", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = narenas;
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctlbymib() failure");
 
 	mib[1] = MALLCTL_ARENAS_ALL;
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctlbymib() failure");
 }
 TEST_END
@@ -507,19 +507,19 @@ TEST_BEGIN(test_arena_i_decay) {
 	size_t mib[3];
 	size_t miblen = 3;
 
-	assert_d_eq(mallctl("arena.0.decay", NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctl("arena.0.decay", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
-	assert_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
+	expect_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
-	assert_d_eq(mallctlnametomib("arena.0.decay", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("arena.0.decay", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = narenas;
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctlbymib() failure");
 
 	mib[1] = MALLCTL_ARENAS_ALL;
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctlbymib() failure");
 }
 TEST_END
@@ -531,40 +531,40 @@ TEST_BEGIN(test_arena_i_dss) {
 	size_t miblen;
 
 	miblen = sizeof(mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib("arena.0.dss", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("arena.0.dss", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() error");
 
 	dss_prec_new = "disabled";
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz,
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz,
 	    (void *)&dss_prec_new, sizeof(dss_prec_new)), 0,
 	    "Unexpected mallctl() failure");
-	assert_str_ne(dss_prec_old, "primary",
+	expect_str_ne(dss_prec_old, "primary",
 	    "Unexpected default for dss precedence");
 
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_new, &sz,
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_new, &sz,
 	    (void *)&dss_prec_old, sizeof(dss_prec_old)), 0,
 	    "Unexpected mallctl() failure");
 
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz, NULL,
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz, NULL,
 	    0), 0, "Unexpected mallctl() failure");
-	assert_str_ne(dss_prec_old, "primary",
+	expect_str_ne(dss_prec_old, "primary",
 	    "Unexpected value for dss precedence");
 
 	mib[1] = narenas_total_get();
 	dss_prec_new = "disabled";
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz,
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz,
 	    (void *)&dss_prec_new, sizeof(dss_prec_new)), 0,
 	    "Unexpected mallctl() failure");
-	assert_str_ne(dss_prec_old, "primary",
+	expect_str_ne(dss_prec_old, "primary",
 	    "Unexpected default for dss precedence");
 
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_new, &sz,
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_new, &sz,
 	    (void *)&dss_prec_old, sizeof(dss_prec_new)), 0,
 	    "Unexpected mallctl() failure");
 
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz, NULL,
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz, NULL,
 	    0), 0, "Unexpected mallctl() failure");
-	assert_str_ne(dss_prec_old, "primary",
+	expect_str_ne(dss_prec_old, "primary",
 	    "Unexpected value for dss precedence");
 }
 TEST_END
@@ -576,43 +576,43 @@ TEST_BEGIN(test_arena_i_retain_grow_limit) {
 
 	bool retain_enabled;
 	size_t sz = sizeof(retain_enabled);
-	assert_d_eq(mallctl("opt.retain", &retain_enabled, &sz, NULL, 0),
+	expect_d_eq(mallctl("opt.retain", &retain_enabled, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
 	test_skip_if(!retain_enabled);
 
 	sz = sizeof(default_limit);
 	miblen = sizeof(mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib("arena.0.retain_grow_limit", mib, &miblen),
+	expect_d_eq(mallctlnametomib("arena.0.retain_grow_limit", mib, &miblen),
 	    0, "Unexpected mallctlnametomib() error");
 
-	assert_d_eq(mallctlbymib(mib, miblen, &default_limit, &sz, NULL, 0), 0,
+	expect_d_eq(mallctlbymib(mib, miblen, &default_limit, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
-	assert_zu_eq(default_limit, SC_LARGE_MAXCLASS,
+	expect_zu_eq(default_limit, SC_LARGE_MAXCLASS,
 	    "Unexpected default for retain_grow_limit");
 
 	new_limit = PAGE - 1;
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &new_limit,
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &new_limit,
 	    sizeof(new_limit)), EFAULT, "Unexpected mallctl() success");
 
 	new_limit = PAGE + 1;
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &new_limit,
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &new_limit,
 	    sizeof(new_limit)), 0, "Unexpected mallctl() failure");
-	assert_d_eq(mallctlbymib(mib, miblen, &old_limit, &sz, NULL, 0), 0,
+	expect_d_eq(mallctlbymib(mib, miblen, &old_limit, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
-	assert_zu_eq(old_limit, PAGE,
+	expect_zu_eq(old_limit, PAGE,
 	    "Unexpected value for retain_grow_limit");
 
 	/* Expect grow less than psize class 10. */
 	new_limit = sz_pind2sz(10) - 1;
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &new_limit,
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &new_limit,
 	    sizeof(new_limit)), 0, "Unexpected mallctl() failure");
-	assert_d_eq(mallctlbymib(mib, miblen, &old_limit, &sz, NULL, 0), 0,
+	expect_d_eq(mallctlbymib(mib, miblen, &old_limit, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
-	assert_zu_eq(old_limit, sz_pind2sz(9),
+	expect_zu_eq(old_limit, sz_pind2sz(9),
 	    "Unexpected value for retain_grow_limit");
 
 	/* Restore to default. */
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &default_limit,
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &default_limit,
 	    sizeof(default_limit)), 0, "Unexpected mallctl() failure");
 }
 TEST_END
@@ -621,17 +621,17 @@ TEST_BEGIN(test_arenas_dirty_decay_ms) {
 	ssize_t dirty_decay_ms, orig_dirty_decay_ms, prev_dirty_decay_ms;
 	size_t sz = sizeof(ssize_t);
 
-	assert_d_eq(mallctl("arenas.dirty_decay_ms",
+	expect_d_eq(mallctl("arenas.dirty_decay_ms",
 	    (void *)&orig_dirty_decay_ms, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
 	dirty_decay_ms = -2;
-	assert_d_eq(mallctl("arenas.dirty_decay_ms", NULL, NULL,
+	expect_d_eq(mallctl("arenas.dirty_decay_ms", NULL, NULL,
 	    (void *)&dirty_decay_ms, sizeof(ssize_t)), EFAULT,
 	    "Unexpected mallctl() success");
 
 	dirty_decay_ms = 0x7fffffff;
-	assert_d_eq(mallctl("arenas.dirty_decay_ms", NULL, NULL,
+	expect_d_eq(mallctl("arenas.dirty_decay_ms", NULL, NULL,
 	    (void *)&dirty_decay_ms, sizeof(ssize_t)), 0,
 	    "Expected mallctl() failure");
 
@@ -640,10 +640,10 @@ TEST_BEGIN(test_arenas_dirty_decay_ms) {
 	    dirty_decay_ms++) {
 		ssize_t old_dirty_decay_ms;
 
-		assert_d_eq(mallctl("arenas.dirty_decay_ms",
+		expect_d_eq(mallctl("arenas.dirty_decay_ms",
 		    (void *)&old_dirty_decay_ms, &sz, (void *)&dirty_decay_ms,
 		    sizeof(ssize_t)), 0, "Unexpected mallctl() failure");
-		assert_zd_eq(old_dirty_decay_ms, prev_dirty_decay_ms,
+		expect_zd_eq(old_dirty_decay_ms, prev_dirty_decay_ms,
 		    "Unexpected old arenas.dirty_decay_ms");
 	}
 }
@@ -653,17 +653,17 @@ TEST_BEGIN(test_arenas_muzzy_decay_ms) {
 	ssize_t muzzy_decay_ms, orig_muzzy_decay_ms, prev_muzzy_decay_ms;
 	size_t sz = sizeof(ssize_t);
 
-	assert_d_eq(mallctl("arenas.muzzy_decay_ms",
+	expect_d_eq(mallctl("arenas.muzzy_decay_ms",
 	    (void *)&orig_muzzy_decay_ms, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
 	muzzy_decay_ms = -2;
-	assert_d_eq(mallctl("arenas.muzzy_decay_ms", NULL, NULL,
+	expect_d_eq(mallctl("arenas.muzzy_decay_ms", NULL, NULL,
 	    (void *)&muzzy_decay_ms, sizeof(ssize_t)), EFAULT,
 	    "Unexpected mallctl() success");
 
 	muzzy_decay_ms = 0x7fffffff;
-	assert_d_eq(mallctl("arenas.muzzy_decay_ms", NULL, NULL,
+	expect_d_eq(mallctl("arenas.muzzy_decay_ms", NULL, NULL,
 	    (void *)&muzzy_decay_ms, sizeof(ssize_t)), 0,
 	    "Expected mallctl() failure");
 
@@ -672,10 +672,10 @@ TEST_BEGIN(test_arenas_muzzy_decay_ms) {
 	    muzzy_decay_ms++) {
 		ssize_t old_muzzy_decay_ms;
 
-		assert_d_eq(mallctl("arenas.muzzy_decay_ms",
+		expect_d_eq(mallctl("arenas.muzzy_decay_ms",
 		    (void *)&old_muzzy_decay_ms, &sz, (void *)&muzzy_decay_ms,
 		    sizeof(ssize_t)), 0, "Unexpected mallctl() failure");
-		assert_zd_eq(old_muzzy_decay_ms, prev_muzzy_decay_ms,
+		expect_zd_eq(old_muzzy_decay_ms, prev_muzzy_decay_ms,
 		    "Unexpected old arenas.muzzy_decay_ms");
 	}
 }
@@ -685,9 +685,9 @@ TEST_BEGIN(test_arenas_constants) {
 #define TEST_ARENAS_CONSTANT(t, name, expected) do {			\
 	t name;								\
 	size_t sz = sizeof(t);						\
-	assert_d_eq(mallctl("arenas."#name, (void *)&name, &sz, NULL,	\
+	expect_d_eq(mallctl("arenas."#name, (void *)&name, &sz, NULL,	\
 	    0), 0, "Unexpected mallctl() failure");			\
-	assert_zu_eq(name, expected, "Incorrect "#name" size");		\
+	expect_zu_eq(name, expected, "Incorrect "#name" size");		\
 } while (0)
 
 	TEST_ARENAS_CONSTANT(size_t, quantum, QUANTUM);
@@ -703,9 +703,9 @@ TEST_BEGIN(test_arenas_bin_constants) {
 #define TEST_ARENAS_BIN_CONSTANT(t, name, expected) do {		\
 	t name;								\
 	size_t sz = sizeof(t);						\
-	assert_d_eq(mallctl("arenas.bin.0."#name, (void *)&name, &sz,	\
+	expect_d_eq(mallctl("arenas.bin.0."#name, (void *)&name, &sz,	\
 	    NULL, 0), 0, "Unexpected mallctl() failure");		\
-	assert_zu_eq(name, expected, "Incorrect "#name" size");		\
+	expect_zu_eq(name, expected, "Incorrect "#name" size");		\
 } while (0)
 
 	TEST_ARENAS_BIN_CONSTANT(size_t, size, bin_infos[0].reg_size);
@@ -722,9 +722,9 @@ TEST_BEGIN(test_arenas_lextent_constants) {
 #define TEST_ARENAS_LEXTENT_CONSTANT(t, name, expected) do {		\
 	t name;								\
 	size_t sz = sizeof(t);						\
-	assert_d_eq(mallctl("arenas.lextent.0."#name, (void *)&name,	\
+	expect_d_eq(mallctl("arenas.lextent.0."#name, (void *)&name,	\
 	    &sz, NULL, 0), 0, "Unexpected mallctl() failure");		\
-	assert_zu_eq(name, expected, "Incorrect "#name" size");		\
+	expect_zu_eq(name, expected, "Incorrect "#name" size");		\
 } while (0)
 
 	TEST_ARENAS_LEXTENT_CONSTANT(size_t, size,
@@ -738,16 +738,16 @@ TEST_BEGIN(test_arenas_create) {
 	unsigned narenas_before, arena, narenas_after;
 	size_t sz = sizeof(unsigned);
 
-	assert_d_eq(mallctl("arenas.narenas", (void *)&narenas_before, &sz,
+	expect_d_eq(mallctl("arenas.narenas", (void *)&narenas_before, &sz,
 	    NULL, 0), 0, "Unexpected mallctl() failure");
-	assert_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
+	expect_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
-	assert_d_eq(mallctl("arenas.narenas", (void *)&narenas_after, &sz, NULL,
+	expect_d_eq(mallctl("arenas.narenas", (void *)&narenas_after, &sz, NULL,
 	    0), 0, "Unexpected mallctl() failure");
 
-	assert_u_eq(narenas_before+1, narenas_after,
+	expect_u_eq(narenas_before+1, narenas_after,
 	    "Unexpected number of arenas before versus after extension");
-	assert_u_eq(arena, narenas_after-1, "Unexpected arena index");
+	expect_u_eq(arena, narenas_after-1, "Unexpected arena index");
 }
 TEST_END
 
@@ -756,13 +756,13 @@ TEST_BEGIN(test_arenas_lookup) {
 	void *ptr;
 	size_t sz = sizeof(unsigned);
 
-	assert_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
+	expect_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 	ptr = mallocx(42, MALLOCX_ARENA(arena) | MALLOCX_TCACHE_NONE);
-	assert_ptr_not_null(ptr, "Unexpected mallocx() failure");
-	assert_d_eq(mallctl("arenas.lookup", &arena1, &sz, &ptr, sizeof(ptr)),
+	expect_ptr_not_null(ptr, "Unexpected mallocx() failure");
+	expect_d_eq(mallctl("arenas.lookup", &arena1, &sz, &ptr, sizeof(ptr)),
 	    0, "Unexpected mallctl() failure");
-	assert_u_eq(arena, arena1, "Unexpected arena index");
+	expect_u_eq(arena, arena1, "Unexpected arena index");
 	dallocx(ptr, 0);
 }
 TEST_END
@@ -778,18 +778,18 @@ TEST_BEGIN(test_prof_active) {
 	size_t len = sizeof(bool);
 
 	active = true;
-	assert_d_eq(mallctl("prof.active", NULL, NULL, &active, len), ENOENT,
+	expect_d_eq(mallctl("prof.active", NULL, NULL, &active, len), ENOENT,
 	    "Setting prof_active to true should fail when opt_prof is off");
 	old = true;
-	assert_d_eq(mallctl("prof.active", &old, &len, &active, len), ENOENT,
+	expect_d_eq(mallctl("prof.active", &old, &len, &active, len), ENOENT,
 	    "Setting prof_active to true should fail when opt_prof is off");
-	assert_true(old, "old valud should not be touched when mallctl fails");
+	expect_true(old, "old valud should not be touched when mallctl fails");
 	active = false;
-	assert_d_eq(mallctl("prof.active", NULL, NULL, &active, len), 0,
+	expect_d_eq(mallctl("prof.active", NULL, NULL, &active, len), 0,
 	    "Setting prof_active to false should succeed when opt_prof is off");
-	assert_d_eq(mallctl("prof.active", &old, &len, &active, len), 0,
+	expect_d_eq(mallctl("prof.active", &old, &len, &active, len), 0,
 	    "Setting prof_active to false should succeed when opt_prof is off");
-	assert_false(old, "prof_active should be false when opt_prof is off");
+	expect_false(old, "prof_active should be false when opt_prof is off");
 }
 TEST_END
 
@@ -797,7 +797,7 @@ TEST_BEGIN(test_stats_arenas) {
 #define TEST_STATS_ARENAS(t, name) do {					\
 	t name;								\
 	size_t sz = sizeof(t);						\
-	assert_d_eq(mallctl("stats.arenas.0."#name, (void *)&name, &sz,	\
+	expect_d_eq(mallctl("stats.arenas.0."#name, (void *)&name, &sz,	\
 	    NULL, 0), 0, "Unexpected mallctl() failure");		\
 } while (0)
 
@@ -831,21 +831,21 @@ TEST_BEGIN(test_hooks) {
 	size_t sz = sizeof(handle);
 	int err = mallctl("experimental.hooks.install", &handle, &sz, &hooks,
 	    sizeof(hooks));
-	assert_d_eq(err, 0, "Hook installation failed");
-	assert_ptr_ne(handle, NULL, "Hook installation gave null handle");
+	expect_d_eq(err, 0, "Hook installation failed");
+	expect_ptr_ne(handle, NULL, "Hook installation gave null handle");
 	void *ptr = mallocx(1, 0);
-	assert_true(hook_called, "Alloc hook not called");
+	expect_true(hook_called, "Alloc hook not called");
 	hook_called = false;
 	free(ptr);
-	assert_true(hook_called, "Free hook not called");
+	expect_true(hook_called, "Free hook not called");
 
 	err = mallctl("experimental.hooks.remove", NULL, NULL, &handle,
 	    sizeof(handle));
-	assert_d_eq(err, 0, "Hook removal failed");
+	expect_d_eq(err, 0, "Hook removal failed");
 	hook_called = false;
 	ptr = mallocx(1, 0);
 	free(ptr);
-	assert_false(hook_called, "Hook called after removal");
+	expect_false(hook_called, "Hook called after removal");
 }
 TEST_END
 
@@ -861,27 +861,27 @@ TEST_BEGIN(test_hooks_exhaustion) {
 		handle = NULL;
 		err = mallctl("experimental.hooks.install", &handle, &sz,
 		    &hooks, sizeof(hooks));
-		assert_d_eq(err, 0, "Error installation hooks");
-		assert_ptr_ne(handle, NULL, "Got NULL handle");
+		expect_d_eq(err, 0, "Error installation hooks");
+		expect_ptr_ne(handle, NULL, "Got NULL handle");
 		handles[i] = handle;
 	}
 	err = mallctl("experimental.hooks.install", &handle, &sz, &hooks,
 	    sizeof(hooks));
-	assert_d_eq(err, EAGAIN, "Should have failed hook installation");
+	expect_d_eq(err, EAGAIN, "Should have failed hook installation");
 	for (int i = 0; i < HOOK_MAX; i++) {
 		err = mallctl("experimental.hooks.remove", NULL, NULL,
 		    &handles[i], sizeof(handles[i]));
-		assert_d_eq(err, 0, "Hook removal failed");
+		expect_d_eq(err, 0, "Hook removal failed");
 	}
 	/* Insertion failed, but then we removed some; it should work now. */
 	handle = NULL;
 	err = mallctl("experimental.hooks.install", &handle, &sz, &hooks,
 	    sizeof(hooks));
-	assert_d_eq(err, 0, "Hook insertion failed");
-	assert_ptr_ne(handle, NULL, "Got NULL handle");
+	expect_d_eq(err, 0, "Hook insertion failed");
+	expect_ptr_ne(handle, NULL, "Got NULL handle");
 	err = mallctl("experimental.hooks.remove", NULL, NULL, &handle,
 	    sizeof(handle));
-	assert_d_eq(err, 0, "Hook removal failed");
+	expect_d_eq(err, 0, "Hook removal failed");
 }
 TEST_END
 
@@ -901,25 +901,25 @@ TEST_BEGIN(test_thread_idle) {
 	bool tcache_enabled = false;
 	sz = sizeof(tcache_enabled);
 	err = mallctl("thread.tcache.enabled", &tcache_enabled, &sz, NULL, 0);
-	assert_d_eq(err, 0, "");
+	expect_d_eq(err, 0, "");
 	test_skip_if(!tcache_enabled);
 
 	size_t tcache_max;
 	sz = sizeof(tcache_max);
 	err = mallctl("arenas.tcache_max", &tcache_max, &sz, NULL, 0);
-	assert_d_eq(err, 0, "");
+	expect_d_eq(err, 0, "");
 	test_skip_if(tcache_max == 0);
 
 	unsigned arena_ind;
 	sz = sizeof(arena_ind);
 	err = mallctl("thread.arena", &arena_ind, &sz, NULL, 0);
-	assert_d_eq(err, 0, "");
+	expect_d_eq(err, 0, "");
 
 	/* We're going to do an allocation of size 1, which we know is small. */
 	size_t mib[5];
 	miblen = sizeof(mib)/sizeof(mib[0]);
 	err = mallctlnametomib("stats.arenas.0.small.ndalloc", mib, &miblen);
-	assert_d_eq(err, 0, "");
+	expect_d_eq(err, 0, "");
 	mib[2] = arena_ind;
 
 	/*
@@ -931,25 +931,25 @@ TEST_BEGIN(test_thread_idle) {
 
 	uint64_t epoch;
 	err = mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch));
-	assert_d_eq(err, 0, "");
+	expect_d_eq(err, 0, "");
 
 	uint64_t small_dalloc_pre_idle;
 	sz = sizeof(small_dalloc_pre_idle);
 	err = mallctlbymib(mib, miblen, &small_dalloc_pre_idle, &sz, NULL, 0);
-	assert_d_eq(err, 0, "");
+	expect_d_eq(err, 0, "");
 
 	err = mallctl("thread.idle", NULL, NULL, NULL, 0);
-	assert_d_eq(err, 0, "");
+	expect_d_eq(err, 0, "");
 
 	err = mallctl("epoch", NULL, NULL, &epoch, sizeof(epoch));
-	assert_d_eq(err, 0, "");
+	expect_d_eq(err, 0, "");
 
 	uint64_t small_dalloc_post_idle;
 	sz = sizeof(small_dalloc_post_idle);
 	err = mallctlbymib(mib, miblen, &small_dalloc_post_idle, &sz, NULL, 0);
-	assert_d_eq(err, 0, "");
+	expect_d_eq(err, 0, "");
 
-	assert_u64_lt(small_dalloc_pre_idle, small_dalloc_post_idle,
+	expect_u64_lt(small_dalloc_pre_idle, small_dalloc_post_idle,
 	    "Purge didn't flush the tcache");
 }
 TEST_END
diff --git a/test/unit/malloc_io.c b/test/unit/malloc_io.c
index 79ba7fc5..1a6e5f63 100644
--- a/test/unit/malloc_io.c
+++ b/test/unit/malloc_io.c
@@ -4,9 +4,9 @@ TEST_BEGIN(test_malloc_strtoumax_no_endptr) {
 	int err;
 
 	set_errno(0);
-	assert_ju_eq(malloc_strtoumax("0", NULL, 0), 0, "Unexpected result");
+	expect_ju_eq(malloc_strtoumax("0", NULL, 0), 0, "Unexpected result");
 	err = get_errno();
-	assert_d_eq(err, 0, "Unexpected failure");
+	expect_d_eq(err, 0, "Unexpected failure");
 }
 TEST_END
 
@@ -89,14 +89,14 @@ TEST_BEGIN(test_malloc_strtoumax) {
 		set_errno(0);
 		result = malloc_strtoumax(test->input, &remainder, test->base);
 		err = get_errno();
-		assert_d_eq(err, test->expected_errno,
+		expect_d_eq(err, test->expected_errno,
 		    "Expected errno %s for \"%s\", base %d",
 		    test->expected_errno_name, test->input, test->base);
-		assert_str_eq(remainder, test->expected_remainder,
+		expect_str_eq(remainder, test->expected_remainder,
 		    "Unexpected remainder for \"%s\", base %d",
 		    test->input, test->base);
 		if (err == 0) {
-			assert_ju_eq(result, test->expected_x,
+			expect_ju_eq(result, test->expected_x,
 			    "Unexpected result for \"%s\", base %d",
 			    test->input, test->base);
 		}
@@ -111,10 +111,10 @@ TEST_BEGIN(test_malloc_snprintf_truncated) {
 	size_t len;
 #define TEST(expected_str_untruncated, ...) do {			\
 	result = malloc_snprintf(buf, len, __VA_ARGS__);		\
-	assert_d_eq(strncmp(buf, expected_str_untruncated, len-1), 0,	\
+	expect_d_eq(strncmp(buf, expected_str_untruncated, len-1), 0,	\
 	    "Unexpected string inequality (\"%s\" vs \"%s\")",		\
 	    buf, expected_str_untruncated);				\
-	assert_zu_eq(result, strlen(expected_str_untruncated),		\
+	expect_zu_eq(result, strlen(expected_str_untruncated),		\
 	    "Unexpected result");					\
 } while (0)
 
@@ -142,8 +142,8 @@ TEST_BEGIN(test_malloc_snprintf) {
 	size_t result;
 #define TEST(expected_str, ...) do {					\
 	result = malloc_snprintf(buf, sizeof(buf), __VA_ARGS__);	\
-	assert_str_eq(buf, expected_str, "Unexpected output");		\
-	assert_zu_eq(result, strlen(expected_str), "Unexpected result");\
+	expect_str_eq(buf, expected_str, "Unexpected output");		\
+	expect_zu_eq(result, strlen(expected_str), "Unexpected result");\
 } while (0)
 
 	TEST("hello", "hello");
diff --git a/test/unit/math.c b/test/unit/math.c
index 09ef20c7..a32767c5 100644
--- a/test/unit/math.c
+++ b/test/unit/math.c
@@ -41,7 +41,7 @@ TEST_BEGIN(test_ln_gamma_factorial) {
 
 	/* exp(ln_gamma(x)) == (x-1)! for integer x. */
 	for (x = 1; x <= 21; x++) {
-		assert_true(double_eq_rel(exp(ln_gamma(x)),
+		expect_true(double_eq_rel(exp(ln_gamma(x)),
 		    (double)factorial(x-1), MAX_REL_ERR, MAX_ABS_ERR),
 		    "Incorrect factorial result for x=%u", x);
 	}
@@ -192,7 +192,7 @@ TEST_BEGIN(test_ln_gamma_misc) {
 
 	for (i = 1; i < sizeof(ln_gamma_misc_expected)/sizeof(double); i++) {
 		double x = (double)i * 0.25;
-		assert_true(double_eq_rel(ln_gamma(x),
+		expect_true(double_eq_rel(ln_gamma(x),
 		    ln_gamma_misc_expected[i], MAX_REL_ERR, MAX_ABS_ERR),
 		    "Incorrect ln_gamma result for i=%u", i);
 	}
@@ -242,7 +242,7 @@ TEST_BEGIN(test_pt_norm) {
 
 	for (i = 1; i < sizeof(pt_norm_expected)/sizeof(double); i++) {
 		double p = (double)i * 0.01;
-		assert_true(double_eq_rel(pt_norm(p), pt_norm_expected[i],
+		expect_true(double_eq_rel(pt_norm(p), pt_norm_expected[i],
 		    MAX_REL_ERR, MAX_ABS_ERR),
 		    "Incorrect pt_norm result for i=%u", i);
 	}
@@ -295,7 +295,7 @@ TEST_BEGIN(test_pt_chi2) {
 		double ln_gamma_df = ln_gamma(df * 0.5);
 		for (j = 1; j < 100; j += 7) {
 			double p = (double)j * 0.01;
-			assert_true(double_eq_rel(pt_chi2(p, df, ln_gamma_df),
+			expect_true(double_eq_rel(pt_chi2(p, df, ln_gamma_df),
 			    pt_chi2_expected[e], MAX_REL_ERR, MAX_ABS_ERR),
 			    "Incorrect pt_chi2 result for i=%u, j=%u", i, j);
 			e++;
@@ -356,7 +356,7 @@ TEST_BEGIN(test_pt_gamma_shape) {
 		double ln_gamma_shape = ln_gamma(shape);
 		for (j = 1; j < 100; j += 7) {
 			double p = (double)j * 0.01;
-			assert_true(double_eq_rel(pt_gamma(p, shape, 1.0,
+			expect_true(double_eq_rel(pt_gamma(p, shape, 1.0,
 			    ln_gamma_shape), pt_gamma_expected[e], MAX_REL_ERR,
 			    MAX_ABS_ERR),
 			    "Incorrect pt_gamma result for i=%u, j=%u", i, j);
@@ -370,7 +370,7 @@ TEST_BEGIN(test_pt_gamma_scale) {
 	double shape = 1.0;
 	double ln_gamma_shape = ln_gamma(shape);
 
-	assert_true(double_eq_rel(
+	expect_true(double_eq_rel(
 	    pt_gamma(0.5, shape, 1.0, ln_gamma_shape) * 10.0,
 	    pt_gamma(0.5, shape, 10.0, ln_gamma_shape), MAX_REL_ERR,
 	    MAX_ABS_ERR),
diff --git a/test/unit/mq.c b/test/unit/mq.c
index 57a4d54e..f833f77c 100644
--- a/test/unit/mq.c
+++ b/test/unit/mq.c
@@ -13,17 +13,17 @@ TEST_BEGIN(test_mq_basic) {
 	mq_t mq;
 	mq_msg_t msg;
 
-	assert_false(mq_init(&mq), "Unexpected mq_init() failure");
-	assert_u_eq(mq_count(&mq), 0, "mq should be empty");
-	assert_ptr_null(mq_tryget(&mq),
+	expect_false(mq_init(&mq), "Unexpected mq_init() failure");
+	expect_u_eq(mq_count(&mq), 0, "mq should be empty");
+	expect_ptr_null(mq_tryget(&mq),
 	    "mq_tryget() should fail when the queue is empty");
 
 	mq_put(&mq, &msg);
-	assert_u_eq(mq_count(&mq), 1, "mq should contain one message");
-	assert_ptr_eq(mq_tryget(&mq), &msg, "mq_tryget() should return msg");
+	expect_u_eq(mq_count(&mq), 1, "mq should contain one message");
+	expect_ptr_eq(mq_tryget(&mq), &msg, "mq_tryget() should return msg");
 
 	mq_put(&mq, &msg);
-	assert_ptr_eq(mq_get(&mq), &msg, "mq_get() should return msg");
+	expect_ptr_eq(mq_get(&mq), &msg, "mq_get() should return msg");
 
 	mq_fini(&mq);
 }
@@ -36,7 +36,7 @@ thd_receiver_start(void *arg) {
 
 	for (i = 0; i < (NSENDERS * NMSGS); i++) {
 		mq_msg_t *msg = mq_get(mq);
-		assert_ptr_not_null(msg, "mq_get() should never return NULL");
+		expect_ptr_not_null(msg, "mq_get() should never return NULL");
 		dallocx(msg, 0);
 	}
 	return NULL;
@@ -51,7 +51,7 @@ thd_sender_start(void *arg) {
 		mq_msg_t *msg;
 		void *p;
 		p = mallocx(sizeof(mq_msg_t), 0);
-		assert_ptr_not_null(p, "Unexpected mallocx() failure");
+		expect_ptr_not_null(p, "Unexpected mallocx() failure");
 		msg = (mq_msg_t *)p;
 		mq_put(mq, msg);
 	}
@@ -64,7 +64,7 @@ TEST_BEGIN(test_mq_threaded) {
 	thd_t senders[NSENDERS];
 	unsigned i;
 
-	assert_false(mq_init(&mq), "Unexpected mq_init() failure");
+	expect_false(mq_init(&mq), "Unexpected mq_init() failure");
 
 	thd_create(&receiver, thd_receiver_start, (void *)&mq);
 	for (i = 0; i < NSENDERS; i++) {
diff --git a/test/unit/mtx.c b/test/unit/mtx.c
index 424587b0..4aeebc13 100644
--- a/test/unit/mtx.c
+++ b/test/unit/mtx.c
@@ -6,7 +6,7 @@
 TEST_BEGIN(test_mtx_basic) {
 	mtx_t mtx;
 
-	assert_false(mtx_init(&mtx), "Unexpected mtx_init() failure");
+	expect_false(mtx_init(&mtx), "Unexpected mtx_init() failure");
 	mtx_lock(&mtx);
 	mtx_unlock(&mtx);
 	mtx_fini(&mtx);
@@ -36,7 +36,7 @@ TEST_BEGIN(test_mtx_race) {
 	thd_t thds[NTHREADS];
 	unsigned i;
 
-	assert_false(mtx_init(&arg.mtx), "Unexpected mtx_init() failure");
+	expect_false(mtx_init(&arg.mtx), "Unexpected mtx_init() failure");
 	arg.x = 0;
 	for (i = 0; i < NTHREADS; i++) {
 		thd_create(&thds[i], thd_start, (void *)&arg);
@@ -44,7 +44,7 @@ TEST_BEGIN(test_mtx_race) {
 	for (i = 0; i < NTHREADS; i++) {
 		thd_join(thds[i], NULL);
 	}
-	assert_u_eq(arg.x, NTHREADS * NINCRS,
+	expect_u_eq(arg.x, NTHREADS * NINCRS,
 	    "Race-related counter corruption");
 }
 TEST_END
diff --git a/test/unit/nstime.c b/test/unit/nstime.c
index 5a736bba..bf875017 100644
--- a/test/unit/nstime.c
+++ b/test/unit/nstime.c
@@ -6,9 +6,9 @@ TEST_BEGIN(test_nstime_init) {
 	nstime_t nst;
 
 	nstime_init(&nst, 42000000043);
-	assert_u64_eq(nstime_ns(&nst), 42000000043, "ns incorrectly read");
-	assert_u64_eq(nstime_sec(&nst), 42, "sec incorrectly read");
-	assert_u64_eq(nstime_nsec(&nst), 43, "nsec incorrectly read");
+	expect_u64_eq(nstime_ns(&nst), 42000000043, "ns incorrectly read");
+	expect_u64_eq(nstime_sec(&nst), 42, "sec incorrectly read");
+	expect_u64_eq(nstime_nsec(&nst), 43, "nsec incorrectly read");
 }
 TEST_END
 
@@ -16,8 +16,8 @@ TEST_BEGIN(test_nstime_init2) {
 	nstime_t nst;
 
 	nstime_init2(&nst, 42, 43);
-	assert_u64_eq(nstime_sec(&nst), 42, "sec incorrectly read");
-	assert_u64_eq(nstime_nsec(&nst), 43, "nsec incorrectly read");
+	expect_u64_eq(nstime_sec(&nst), 42, "sec incorrectly read");
+	expect_u64_eq(nstime_nsec(&nst), 43, "nsec incorrectly read");
 }
 TEST_END
 
@@ -27,8 +27,8 @@ TEST_BEGIN(test_nstime_copy) {
 	nstime_init2(&nsta, 42, 43);
 	nstime_init_zero(&nstb);
 	nstime_copy(&nstb, &nsta);
-	assert_u64_eq(nstime_sec(&nstb), 42, "sec incorrectly copied");
-	assert_u64_eq(nstime_nsec(&nstb), 43, "nsec incorrectly copied");
+	expect_u64_eq(nstime_sec(&nstb), 42, "sec incorrectly copied");
+	expect_u64_eq(nstime_nsec(&nstb), 43, "nsec incorrectly copied");
 }
 TEST_END
 
@@ -37,31 +37,31 @@ TEST_BEGIN(test_nstime_compare) {
 
 	nstime_init2(&nsta, 42, 43);
 	nstime_copy(&nstb, &nsta);
-	assert_d_eq(nstime_compare(&nsta, &nstb), 0, "Times should be equal");
-	assert_d_eq(nstime_compare(&nstb, &nsta), 0, "Times should be equal");
+	expect_d_eq(nstime_compare(&nsta, &nstb), 0, "Times should be equal");
+	expect_d_eq(nstime_compare(&nstb, &nsta), 0, "Times should be equal");
 
 	nstime_init2(&nstb, 42, 42);
-	assert_d_eq(nstime_compare(&nsta, &nstb), 1,
+	expect_d_eq(nstime_compare(&nsta, &nstb), 1,
 	    "nsta should be greater than nstb");
-	assert_d_eq(nstime_compare(&nstb, &nsta), -1,
+	expect_d_eq(nstime_compare(&nstb, &nsta), -1,
 	    "nstb should be less than nsta");
 
 	nstime_init2(&nstb, 42, 44);
-	assert_d_eq(nstime_compare(&nsta, &nstb), -1,
+	expect_d_eq(nstime_compare(&nsta, &nstb), -1,
 	    "nsta should be less than nstb");
-	assert_d_eq(nstime_compare(&nstb, &nsta), 1,
+	expect_d_eq(nstime_compare(&nstb, &nsta), 1,
 	    "nstb should be greater than nsta");
 
 	nstime_init2(&nstb, 41, BILLION - 1);
-	assert_d_eq(nstime_compare(&nsta, &nstb), 1,
+	expect_d_eq(nstime_compare(&nsta, &nstb), 1,
 	    "nsta should be greater than nstb");
-	assert_d_eq(nstime_compare(&nstb, &nsta), -1,
+	expect_d_eq(nstime_compare(&nstb, &nsta), -1,
 	    "nstb should be less than nsta");
 
 	nstime_init2(&nstb, 43, 0);
-	assert_d_eq(nstime_compare(&nsta, &nstb), -1,
+	expect_d_eq(nstime_compare(&nsta, &nstb), -1,
 	    "nsta should be less than nstb");
-	assert_d_eq(nstime_compare(&nstb, &nsta), 1,
+	expect_d_eq(nstime_compare(&nstb, &nsta), 1,
 	    "nstb should be greater than nsta");
 }
 TEST_END
@@ -73,14 +73,14 @@ TEST_BEGIN(test_nstime_add) {
 	nstime_copy(&nstb, &nsta);
 	nstime_add(&nsta, &nstb);
 	nstime_init2(&nstb, 84, 86);
-	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
 	    "Incorrect addition result");
 
 	nstime_init2(&nsta, 42, BILLION - 1);
 	nstime_copy(&nstb, &nsta);
 	nstime_add(&nsta, &nstb);
 	nstime_init2(&nstb, 85, BILLION - 2);
-	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
 	    "Incorrect addition result");
 }
 TEST_END
@@ -91,13 +91,13 @@ TEST_BEGIN(test_nstime_iadd) {
 	nstime_init2(&nsta, 42, BILLION - 1);
 	nstime_iadd(&nsta, 1);
 	nstime_init2(&nstb, 43, 0);
-	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
 	    "Incorrect addition result");
 
 	nstime_init2(&nsta, 42, 1);
 	nstime_iadd(&nsta, BILLION + 1);
 	nstime_init2(&nstb, 43, 2);
-	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
 	    "Incorrect addition result");
 }
 TEST_END
@@ -109,14 +109,14 @@ TEST_BEGIN(test_nstime_subtract) {
 	nstime_copy(&nstb, &nsta);
 	nstime_subtract(&nsta, &nstb);
 	nstime_init_zero(&nstb);
-	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
 	    "Incorrect subtraction result");
 
 	nstime_init2(&nsta, 42, 43);
 	nstime_init2(&nstb, 41, 44);
 	nstime_subtract(&nsta, &nstb);
 	nstime_init2(&nstb, 0, BILLION - 1);
-	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
 	    "Incorrect subtraction result");
 }
 TEST_END
@@ -127,13 +127,13 @@ TEST_BEGIN(test_nstime_isubtract) {
 	nstime_init2(&nsta, 42, 43);
 	nstime_isubtract(&nsta, 42*BILLION + 43);
 	nstime_init_zero(&nstb);
-	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
 	    "Incorrect subtraction result");
 
 	nstime_init2(&nsta, 42, 43);
 	nstime_isubtract(&nsta, 41*BILLION + 44);
 	nstime_init2(&nstb, 0, BILLION - 1);
-	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
 	    "Incorrect subtraction result");
 }
 TEST_END
@@ -144,13 +144,13 @@ TEST_BEGIN(test_nstime_imultiply) {
 	nstime_init2(&nsta, 42, 43);
 	nstime_imultiply(&nsta, 10);
 	nstime_init2(&nstb, 420, 430);
-	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
 	    "Incorrect multiplication result");
 
 	nstime_init2(&nsta, 42, 666666666);
 	nstime_imultiply(&nsta, 3);
 	nstime_init2(&nstb, 127, 999999998);
-	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
 	    "Incorrect multiplication result");
 }
 TEST_END
@@ -162,14 +162,14 @@ TEST_BEGIN(test_nstime_idivide) {
 	nstime_copy(&nstb, &nsta);
 	nstime_imultiply(&nsta, 10);
 	nstime_idivide(&nsta, 10);
-	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
 	    "Incorrect division result");
 
 	nstime_init2(&nsta, 42, 666666666);
 	nstime_copy(&nstb, &nsta);
 	nstime_imultiply(&nsta, 3);
 	nstime_idivide(&nsta, 3);
-	assert_d_eq(nstime_compare(&nsta, &nstb), 0,
+	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
 	    "Incorrect division result");
 }
 TEST_END
@@ -180,7 +180,7 @@ TEST_BEGIN(test_nstime_divide) {
 	nstime_init2(&nsta, 42, 43);
 	nstime_copy(&nstb, &nsta);
 	nstime_imultiply(&nsta, 10);
-	assert_u64_eq(nstime_divide(&nsta, &nstb), 10,
+	expect_u64_eq(nstime_divide(&nsta, &nstb), 10,
 	    "Incorrect division result");
 
 	nstime_init2(&nsta, 42, 43);
@@ -188,7 +188,7 @@ TEST_BEGIN(test_nstime_divide) {
 	nstime_imultiply(&nsta, 10);
 	nstime_init(&nstc, 1);
 	nstime_add(&nsta, &nstc);
-	assert_u64_eq(nstime_divide(&nsta, &nstb), 10,
+	expect_u64_eq(nstime_divide(&nsta, &nstb), 10,
 	    "Incorrect division result");
 
 	nstime_init2(&nsta, 42, 43);
@@ -196,7 +196,7 @@ TEST_BEGIN(test_nstime_divide) {
 	nstime_imultiply(&nsta, 10);
 	nstime_init(&nstc, 1);
 	nstime_subtract(&nsta, &nstc);
-	assert_u64_eq(nstime_divide(&nsta, &nstb), 9,
+	expect_u64_eq(nstime_divide(&nsta, &nstb), 9,
 	    "Incorrect division result");
 }
 TEST_END
@@ -209,7 +209,7 @@ TEST_END
 TEST_BEGIN(test_nstime_update) {
 	nstime_t nst;
 
-	assert_false(nstime_init_update(&nst), "Basic time update failed.");
+	expect_false(nstime_init_update(&nst), "Basic time update failed.");
 
 	/* Only Rip Van Winkle sleeps this long. */
 	{
@@ -220,9 +220,9 @@ TEST_BEGIN(test_nstime_update) {
 	{
 		nstime_t nst0;
 		nstime_copy(&nst0, &nst);
-		assert_true(nstime_update(&nst),
+		expect_true(nstime_update(&nst),
 		    "Update should detect time roll-back.");
-		assert_d_eq(nstime_compare(&nst, &nst0), 0,
+		expect_d_eq(nstime_compare(&nst, &nst0), 0,
 		    "Time should not have been modified");
 	}
 }
diff --git a/test/unit/pack.c b/test/unit/pack.c
index fc188b00..e6392825 100644
--- a/test/unit/pack.c
+++ b/test/unit/pack.c
@@ -22,7 +22,7 @@ binind_compute(void) {
 	unsigned nbins, i;
 
 	sz = sizeof(nbins);
-	assert_d_eq(mallctl("arenas.nbins", (void *)&nbins, &sz, NULL, 0), 0,
+	expect_d_eq(mallctl("arenas.nbins", (void *)&nbins, &sz, NULL, 0), 0,
 	    "Unexpected mallctl failure");
 
 	for (i = 0; i < nbins; i++) {
@@ -30,12 +30,12 @@ binind_compute(void) {
 		size_t miblen = sizeof(mib)/sizeof(size_t);
 		size_t size;
 
-		assert_d_eq(mallctlnametomib("arenas.bin.0.size", mib,
+		expect_d_eq(mallctlnametomib("arenas.bin.0.size", mib,
 		    &miblen), 0, "Unexpected mallctlnametomb failure");
 		mib[2] = (size_t)i;
 
 		sz = sizeof(size);
-		assert_d_eq(mallctlbymib(mib, miblen, (void *)&size, &sz, NULL,
+		expect_d_eq(mallctlbymib(mib, miblen, (void *)&size, &sz, NULL,
 		    0), 0, "Unexpected mallctlbymib failure");
 		if (size == SZ) {
 			return i;
@@ -54,11 +54,11 @@ nregs_per_run_compute(void) {
 	size_t mib[4];
 	size_t miblen = sizeof(mib)/sizeof(size_t);
 
-	assert_d_eq(mallctlnametomib("arenas.bin.0.nregs", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("arenas.bin.0.nregs", mib, &miblen), 0,
 	    "Unexpected mallctlnametomb failure");
 	mib[2] = (size_t)binind;
 	sz = sizeof(nregs);
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&nregs, &sz, NULL,
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&nregs, &sz, NULL,
 	    0), 0, "Unexpected mallctlbymib failure");
 	return nregs;
 }
@@ -69,7 +69,7 @@ arenas_create_mallctl(void) {
 	size_t sz;
 
 	sz = sizeof(arena_ind);
-	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
 	    0, "Error in arenas.create");
 
 	return arena_ind;
@@ -80,10 +80,10 @@ arena_reset_mallctl(unsigned arena_ind) {
 	size_t mib[3];
 	size_t miblen = sizeof(mib)/sizeof(size_t);
 
-	assert_d_eq(mallctlnametomib("arena.0.reset", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("arena.0.reset", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctlbymib() failure");
 }
 
@@ -105,7 +105,7 @@ TEST_BEGIN(test_pack) {
 		for (j = 0; j < nregs_per_run; j++) {
 			void *p = mallocx(SZ, MALLOCX_ARENA(arena_ind) |
 			    MALLOCX_TCACHE_NONE);
-			assert_ptr_not_null(p,
+			expect_ptr_not_null(p,
 			    "Unexpected mallocx(%zu, MALLOCX_ARENA(%u) |"
 			    " MALLOCX_TCACHE_NONE) failure, run=%zu, reg=%zu",
 			    SZ, arena_ind, i, j);
@@ -148,7 +148,7 @@ TEST_BEGIN(test_pack) {
 			}
 			p = mallocx(SZ, MALLOCX_ARENA(arena_ind) |
 			    MALLOCX_TCACHE_NONE);
-			assert_ptr_eq(p, ptrs[(i * nregs_per_run) + j],
+			expect_ptr_eq(p, ptrs[(i * nregs_per_run) + j],
 			    "Unexpected refill discrepancy, run=%zu, reg=%zu\n",
 			    i, j);
 		}
diff --git a/test/unit/pages.c b/test/unit/pages.c
index ee729eec..8dfd1a72 100644
--- a/test/unit/pages.c
+++ b/test/unit/pages.c
@@ -8,13 +8,13 @@ TEST_BEGIN(test_pages_huge) {
 	alloc_size = HUGEPAGE * 2 - PAGE;
 	commit = true;
 	pages = pages_map(NULL, alloc_size, PAGE, &commit);
-	assert_ptr_not_null(pages, "Unexpected pages_map() error");
+	expect_ptr_not_null(pages, "Unexpected pages_map() error");
 
 	if (init_system_thp_mode == thp_mode_default) {
 	    hugepage = (void *)(ALIGNMENT_CEILING((uintptr_t)pages, HUGEPAGE));
-	    assert_b_ne(pages_huge(hugepage, HUGEPAGE), have_madvise_huge,
+	    expect_b_ne(pages_huge(hugepage, HUGEPAGE), have_madvise_huge,
 	        "Unexpected pages_huge() result");
-	    assert_false(pages_nohuge(hugepage, HUGEPAGE),
+	    expect_false(pages_nohuge(hugepage, HUGEPAGE),
 	        "Unexpected pages_nohuge() result");
 	}
 
diff --git a/test/unit/ph.c b/test/unit/ph.c
index 88bf56f8..0f7c991e 100644
--- a/test/unit/ph.c
+++ b/test/unit/ph.c
@@ -30,8 +30,8 @@ node_cmp(const node_t *a, const node_t *b) {
 static int
 node_cmp_magic(const node_t *a, const node_t *b) {
 
-	assert_u32_eq(a->magic, NODE_MAGIC, "Bad magic");
-	assert_u32_eq(b->magic, NODE_MAGIC, "Bad magic");
+	expect_u32_eq(a->magic, NODE_MAGIC, "Bad magic");
+	expect_u32_eq(b->magic, NODE_MAGIC, "Bad magic");
 
 	return node_cmp(a, b);
 }
@@ -74,7 +74,7 @@ heap_print(const heap_t *heap) {
 
 	for (auxelm = phn_next_get(node_t, link, heap->ph_root); auxelm != NULL;
 	    auxelm = phn_next_get(node_t, link, auxelm)) {
-		assert_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t,
+		expect_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t,
 		    link, auxelm)), auxelm,
 		    "auxelm's prev doesn't link to auxelm");
 		node_print(auxelm, 0);
@@ -90,7 +90,7 @@ node_validate(const node_t *node, const node_t *parent) {
 	node_t *leftmost_child, *sibling;
 
 	if (parent != NULL) {
-		assert_d_ge(node_cmp_magic(node, parent), 0,
+		expect_d_ge(node_cmp_magic(node, parent), 0,
 		    "Child is less than parent");
 	}
 
@@ -98,13 +98,13 @@ node_validate(const node_t *node, const node_t *parent) {
 	if (leftmost_child == NULL) {
 		return nnodes;
 	}
-	assert_ptr_eq((void *)phn_prev_get(node_t, link, leftmost_child),
+	expect_ptr_eq((void *)phn_prev_get(node_t, link, leftmost_child),
 	    (void *)node, "Leftmost child does not link to node");
 	nnodes += node_validate(leftmost_child, node);
 
 	for (sibling = phn_next_get(node_t, link, leftmost_child); sibling !=
 	    NULL; sibling = phn_next_get(node_t, link, sibling)) {
-		assert_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t,
+		expect_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t,
 		    link, sibling)), sibling,
 		    "sibling's prev doesn't link to sibling");
 		nnodes += node_validate(sibling, node);
@@ -125,7 +125,7 @@ heap_validate(const heap_t *heap) {
 
 	for (auxelm = phn_next_get(node_t, link, heap->ph_root); auxelm != NULL;
 	    auxelm = phn_next_get(node_t, link, auxelm)) {
-		assert_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t,
+		expect_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t,
 		    link, auxelm)), auxelm,
 		    "auxelm's prev doesn't link to auxelm");
 		nnodes += node_validate(auxelm, NULL);
@@ -142,9 +142,9 @@ TEST_BEGIN(test_ph_empty) {
 	heap_t heap;
 
 	heap_new(&heap);
-	assert_true(heap_empty(&heap), "Heap should be empty");
-	assert_ptr_null(heap_first(&heap), "Unexpected node");
-	assert_ptr_null(heap_any(&heap), "Unexpected node");
+	expect_true(heap_empty(&heap), "Heap should be empty");
+	expect_ptr_null(heap_first(&heap), "Unexpected node");
+	expect_ptr_null(heap_any(&heap), "Unexpected node");
 }
 TEST_END
 
@@ -203,7 +203,7 @@ TEST_BEGIN(test_ph_random) {
 		for (j = 1; j <= NNODES; j++) {
 			/* Initialize heap and nodes. */
 			heap_new(&heap);
-			assert_u_eq(heap_validate(&heap), 0,
+			expect_u_eq(heap_validate(&heap), 0,
 			    "Incorrect node count");
 			for (k = 0; k < j; k++) {
 				nodes[k].magic = NODE_MAGIC;
@@ -214,34 +214,34 @@ TEST_BEGIN(test_ph_random) {
 			for (k = 0; k < j; k++) {
 				heap_insert(&heap, &nodes[k]);
 				if (i % 13 == 12) {
-					assert_ptr_not_null(heap_any(&heap),
+					expect_ptr_not_null(heap_any(&heap),
 					    "Heap should not be empty");
 					/* Trigger merging. */
-					assert_ptr_not_null(heap_first(&heap),
+					expect_ptr_not_null(heap_first(&heap),
 					    "Heap should not be empty");
 				}
-				assert_u_eq(heap_validate(&heap), k + 1,
+				expect_u_eq(heap_validate(&heap), k + 1,
 				    "Incorrect node count");
 			}
 
-			assert_false(heap_empty(&heap),
+			expect_false(heap_empty(&heap),
 			    "Heap should not be empty");
 
 			/* Remove nodes. */
 			switch (i % 6) {
 			case 0:
 				for (k = 0; k < j; k++) {
-					assert_u_eq(heap_validate(&heap), j - k,
+					expect_u_eq(heap_validate(&heap), j - k,
 					    "Incorrect node count");
 					node_remove(&heap, &nodes[k]);
-					assert_u_eq(heap_validate(&heap), j - k
+					expect_u_eq(heap_validate(&heap), j - k
 					    - 1, "Incorrect node count");
 				}
 				break;
 			case 1:
 				for (k = j; k > 0; k--) {
 					node_remove(&heap, &nodes[k-1]);
-					assert_u_eq(heap_validate(&heap), k - 1,
+					expect_u_eq(heap_validate(&heap), k - 1,
 					    "Incorrect node count");
 				}
 				break;
@@ -249,10 +249,10 @@ TEST_BEGIN(test_ph_random) {
 				node_t *prev = NULL;
 				for (k = 0; k < j; k++) {
 					node_t *node = node_remove_first(&heap);
-					assert_u_eq(heap_validate(&heap), j - k
+					expect_u_eq(heap_validate(&heap), j - k
 					    - 1, "Incorrect node count");
 					if (prev != NULL) {
-						assert_d_ge(node_cmp(node,
+						expect_d_ge(node_cmp(node,
 						    prev), 0,
 						    "Bad removal order");
 					}
@@ -263,15 +263,15 @@ TEST_BEGIN(test_ph_random) {
 				node_t *prev = NULL;
 				for (k = 0; k < j; k++) {
 					node_t *node = heap_first(&heap);
-					assert_u_eq(heap_validate(&heap), j - k,
+					expect_u_eq(heap_validate(&heap), j - k,
 					    "Incorrect node count");
 					if (prev != NULL) {
-						assert_d_ge(node_cmp(node,
+						expect_d_ge(node_cmp(node,
 						    prev), 0,
 						    "Bad removal order");
 					}
 					node_remove(&heap, node);
-					assert_u_eq(heap_validate(&heap), j - k
+					expect_u_eq(heap_validate(&heap), j - k
 					    - 1, "Incorrect node count");
 					prev = node;
 				}
@@ -279,17 +279,17 @@ TEST_BEGIN(test_ph_random) {
 			} case 4: {
 				for (k = 0; k < j; k++) {
 					node_remove_any(&heap);
-					assert_u_eq(heap_validate(&heap), j - k
+					expect_u_eq(heap_validate(&heap), j - k
 					    - 1, "Incorrect node count");
 				}
 				break;
 			} case 5: {
 				for (k = 0; k < j; k++) {
 					node_t *node = heap_any(&heap);
-					assert_u_eq(heap_validate(&heap), j - k,
+					expect_u_eq(heap_validate(&heap), j - k,
 					    "Incorrect node count");
 					node_remove(&heap, node);
-					assert_u_eq(heap_validate(&heap), j - k
+					expect_u_eq(heap_validate(&heap), j - k
 					    - 1, "Incorrect node count");
 				}
 				break;
@@ -297,11 +297,11 @@ TEST_BEGIN(test_ph_random) {
 				not_reached();
 			}
 
-			assert_ptr_null(heap_first(&heap),
+			expect_ptr_null(heap_first(&heap),
 			    "Heap should be empty");
-			assert_ptr_null(heap_any(&heap),
+			expect_ptr_null(heap_any(&heap),
 			    "Heap should be empty");
-			assert_true(heap_empty(&heap), "Heap should be empty");
+			expect_true(heap_empty(&heap), "Heap should be empty");
 		}
 	}
 	fini_gen_rand(sfmt);
diff --git a/test/unit/prng.c b/test/unit/prng.c
index b5795c2f..915b3504 100644
--- a/test/unit/prng.c
+++ b/test/unit/prng.c
@@ -10,18 +10,18 @@ test_prng_lg_range_u32(bool atomic) {
 	ra = prng_lg_range_u32(&sa, 32, atomic);
 	atomic_store_u32(&sa, 42, ATOMIC_RELAXED);
 	rb = prng_lg_range_u32(&sa, 32, atomic);
-	assert_u32_eq(ra, rb,
+	expect_u32_eq(ra, rb,
 	    "Repeated generation should produce repeated results");
 
 	atomic_store_u32(&sb, 42, ATOMIC_RELAXED);
 	rb = prng_lg_range_u32(&sb, 32, atomic);
-	assert_u32_eq(ra, rb,
+	expect_u32_eq(ra, rb,
 	    "Equivalent generation should produce equivalent results");
 
 	atomic_store_u32(&sa, 42, ATOMIC_RELAXED);
 	ra = prng_lg_range_u32(&sa, 32, atomic);
 	rb = prng_lg_range_u32(&sa, 32, atomic);
-	assert_u32_ne(ra, rb,
+	expect_u32_ne(ra, rb,
 	    "Full-width results must not immediately repeat");
 
 	atomic_store_u32(&sa, 42, ATOMIC_RELAXED);
@@ -29,9 +29,9 @@ test_prng_lg_range_u32(bool atomic) {
 	for (lg_range = 31; lg_range > 0; lg_range--) {
 		atomic_store_u32(&sb, 42, ATOMIC_RELAXED);
 		rb = prng_lg_range_u32(&sb, lg_range, atomic);
-		assert_u32_eq((rb & (UINT32_C(0xffffffff) << lg_range)),
+		expect_u32_eq((rb & (UINT32_C(0xffffffff) << lg_range)),
 		    0, "High order bits should be 0, lg_range=%u", lg_range);
-		assert_u32_eq(rb, (ra >> (32 - lg_range)),
+		expect_u32_eq(rb, (ra >> (32 - lg_range)),
 		    "Expected high order bits of full-width result, "
 		    "lg_range=%u", lg_range);
 	}
@@ -46,18 +46,18 @@ test_prng_lg_range_u64(void) {
 	ra = prng_lg_range_u64(&sa, 64);
 	sa = 42;
 	rb = prng_lg_range_u64(&sa, 64);
-	assert_u64_eq(ra, rb,
+	expect_u64_eq(ra, rb,
 	    "Repeated generation should produce repeated results");
 
 	sb = 42;
 	rb = prng_lg_range_u64(&sb, 64);
-	assert_u64_eq(ra, rb,
+	expect_u64_eq(ra, rb,
 	    "Equivalent generation should produce equivalent results");
 
 	sa = 42;
 	ra = prng_lg_range_u64(&sa, 64);
 	rb = prng_lg_range_u64(&sa, 64);
-	assert_u64_ne(ra, rb,
+	expect_u64_ne(ra, rb,
 	    "Full-width results must not immediately repeat");
 
 	sa = 42;
@@ -65,9 +65,9 @@ test_prng_lg_range_u64(void) {
 	for (lg_range = 63; lg_range > 0; lg_range--) {
 		sb = 42;
 		rb = prng_lg_range_u64(&sb, lg_range);
-		assert_u64_eq((rb & (UINT64_C(0xffffffffffffffff) << lg_range)),
+		expect_u64_eq((rb & (UINT64_C(0xffffffffffffffff) << lg_range)),
 		    0, "High order bits should be 0, lg_range=%u", lg_range);
-		assert_u64_eq(rb, (ra >> (64 - lg_range)),
+		expect_u64_eq(rb, (ra >> (64 - lg_range)),
 		    "Expected high order bits of full-width result, "
 		    "lg_range=%u", lg_range);
 	}
@@ -83,18 +83,18 @@ test_prng_lg_range_zu(bool atomic) {
 	ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
 	atomic_store_zu(&sa, 42, ATOMIC_RELAXED);
 	rb = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
-	assert_zu_eq(ra, rb,
+	expect_zu_eq(ra, rb,
 	    "Repeated generation should produce repeated results");
 
 	atomic_store_zu(&sb, 42, ATOMIC_RELAXED);
 	rb = prng_lg_range_zu(&sb, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
-	assert_zu_eq(ra, rb,
+	expect_zu_eq(ra, rb,
 	    "Equivalent generation should produce equivalent results");
 
 	atomic_store_zu(&sa, 42, ATOMIC_RELAXED);
 	ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
 	rb = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
-	assert_zu_ne(ra, rb,
+	expect_zu_ne(ra, rb,
 	    "Full-width results must not immediately repeat");
 
 	atomic_store_zu(&sa, 42, ATOMIC_RELAXED);
@@ -103,9 +103,9 @@ test_prng_lg_range_zu(bool atomic) {
 	    lg_range--) {
 		atomic_store_zu(&sb, 42, ATOMIC_RELAXED);
 		rb = prng_lg_range_zu(&sb, lg_range, atomic);
-		assert_zu_eq((rb & (SIZE_T_MAX << lg_range)),
+		expect_zu_eq((rb & (SIZE_T_MAX << lg_range)),
 		    0, "High order bits should be 0, lg_range=%u", lg_range);
-		assert_zu_eq(rb, (ra >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) -
+		expect_zu_eq(rb, (ra >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) -
 		    lg_range)), "Expected high order bits of full-width "
 		    "result, lg_range=%u", lg_range);
 	}
@@ -151,7 +151,7 @@ test_prng_range_u32(bool atomic) {
 		for (rep = 0; rep < NREPS; rep++) {
 			uint32_t r = prng_range_u32(&s, range, atomic);
 
-			assert_u32_lt(r, range, "Out of range");
+			expect_u32_lt(r, range, "Out of range");
 		}
 	}
 }
@@ -171,7 +171,7 @@ test_prng_range_u64(void) {
 		for (rep = 0; rep < NREPS; rep++) {
 			uint64_t r = prng_range_u64(&s, range);
 
-			assert_u64_lt(r, range, "Out of range");
+			expect_u64_lt(r, range, "Out of range");
 		}
 	}
 }
@@ -191,7 +191,7 @@ test_prng_range_zu(bool atomic) {
 		for (rep = 0; rep < NREPS; rep++) {
 			size_t r = prng_range_zu(&s, range, atomic);
 
-			assert_zu_lt(r, range, "Out of range");
+			expect_zu_lt(r, range, "Out of range");
 		}
 	}
 }
diff --git a/test/unit/prof_accum.c b/test/unit/prof_accum.c
index 25220063..8dfa6780 100644
--- a/test/unit/prof_accum.c
+++ b/test/unit/prof_accum.c
@@ -10,7 +10,7 @@ prof_dump_open_intercept(bool propagate_err, const char *filename) {
 	int fd;
 
 	fd = open("/dev/null", O_WRONLY);
-	assert_d_ne(fd, -1, "Unexpected open() failure");
+	expect_d_ne(fd, -1, "Unexpected open() failure");
 
 	return fd;
 }
@@ -32,14 +32,14 @@ thd_start(void *varg) {
 		void *p = alloc_from_permuted_backtrace(thd_ind, i);
 		dallocx(p, 0);
 		if (i % DUMP_INTERVAL == 0) {
-			assert_d_eq(mallctl("prof.dump", NULL, NULL, NULL, 0),
+			expect_d_eq(mallctl("prof.dump", NULL, NULL, NULL, 0),
 			    0, "Unexpected error while dumping heap profile");
 		}
 
 		if (i % BT_COUNT_CHECK_INTERVAL == 0 ||
 		    i+1 == NALLOCS_PER_THREAD) {
 			bt_count = prof_bt_count();
-			assert_zu_le(bt_count_prev+(i-i_prev), bt_count,
+			expect_zu_le(bt_count_prev+(i-i_prev), bt_count,
 			    "Expected larger backtrace count increase");
 			i_prev = i;
 			bt_count_prev = bt_count;
@@ -58,7 +58,7 @@ TEST_BEGIN(test_idump) {
 	test_skip_if(!config_prof);
 
 	active = true;
-	assert_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
+	expect_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
 	    sizeof(active)), 0,
 	    "Unexpected mallctl failure while activating profiling");
 
diff --git a/test/unit/prof_active.c b/test/unit/prof_active.c
index 850a24a7..41c0512d 100644
--- a/test/unit/prof_active.c
+++ b/test/unit/prof_active.c
@@ -6,9 +6,9 @@ mallctl_bool_get(const char *name, bool expected, const char *func, int line) {
 	size_t sz;
 
 	sz = sizeof(old);
-	assert_d_eq(mallctl(name, (void *)&old, &sz, NULL, 0), 0,
+	expect_d_eq(mallctl(name, (void *)&old, &sz, NULL, 0), 0,
 	    "%s():%d: Unexpected mallctl failure reading %s", func, line, name);
-	assert_b_eq(old, expected, "%s():%d: Unexpected %s value", func, line,
+	expect_b_eq(old, expected, "%s():%d: Unexpected %s value", func, line,
 	    name);
 }
 
@@ -19,11 +19,11 @@ mallctl_bool_set(const char *name, bool old_expected, bool val_new,
 	size_t sz;
 
 	sz = sizeof(old);
-	assert_d_eq(mallctl(name, (void *)&old, &sz, (void *)&val_new,
+	expect_d_eq(mallctl(name, (void *)&old, &sz, (void *)&val_new,
 	    sizeof(val_new)), 0,
 	    "%s():%d: Unexpected mallctl failure reading/writing %s", func,
 	    line, name);
-	assert_b_eq(old, old_expected, "%s():%d: Unexpected %s value", func,
+	expect_b_eq(old, old_expected, "%s():%d: Unexpected %s value", func,
 	    line, name);
 }
 
@@ -67,11 +67,11 @@ prof_sampling_probe_impl(bool expect_sample, const char *func, int line) {
 	void *p;
 	size_t expected_backtraces = expect_sample ? 1 : 0;
 
-	assert_zu_eq(prof_bt_count(), 0, "%s():%d: Expected 0 backtraces", func,
+	expect_zu_eq(prof_bt_count(), 0, "%s():%d: Expected 0 backtraces", func,
 	    line);
 	p = mallocx(1, 0);
-	assert_ptr_not_null(p, "Unexpected mallocx() failure");
-	assert_zu_eq(prof_bt_count(), expected_backtraces,
+	expect_ptr_not_null(p, "Unexpected mallocx() failure");
+	expect_zu_eq(prof_bt_count(), expected_backtraces,
 	    "%s():%d: Unexpected backtrace count", func, line);
 	dallocx(p, 0);
 }
diff --git a/test/unit/prof_gdump.c b/test/unit/prof_gdump.c
index f7e0aac7..4c6afbde 100644
--- a/test/unit/prof_gdump.c
+++ b/test/unit/prof_gdump.c
@@ -9,7 +9,7 @@ prof_dump_open_intercept(bool propagate_err, const char *filename) {
 	did_prof_dump_open = true;
 
 	fd = open("/dev/null", O_WRONLY);
-	assert_d_ne(fd, -1, "Unexpected open() failure");
+	expect_d_ne(fd, -1, "Unexpected open() failure");
 
 	return fd;
 }
@@ -22,7 +22,7 @@ TEST_BEGIN(test_gdump) {
 	test_skip_if(!config_prof);
 
 	active = true;
-	assert_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
+	expect_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
 	    sizeof(active)), 0,
 	    "Unexpected mallctl failure while activating profiling");
 
@@ -30,35 +30,35 @@ TEST_BEGIN(test_gdump) {
 
 	did_prof_dump_open = false;
 	p = mallocx((1U << SC_LG_LARGE_MINCLASS), 0);
-	assert_ptr_not_null(p, "Unexpected mallocx() failure");
-	assert_true(did_prof_dump_open, "Expected a profile dump");
+	expect_ptr_not_null(p, "Unexpected mallocx() failure");
+	expect_true(did_prof_dump_open, "Expected a profile dump");
 
 	did_prof_dump_open = false;
 	q = mallocx((1U << SC_LG_LARGE_MINCLASS), 0);
-	assert_ptr_not_null(q, "Unexpected mallocx() failure");
-	assert_true(did_prof_dump_open, "Expected a profile dump");
+	expect_ptr_not_null(q, "Unexpected mallocx() failure");
+	expect_true(did_prof_dump_open, "Expected a profile dump");
 
 	gdump = false;
 	sz = sizeof(gdump_old);
-	assert_d_eq(mallctl("prof.gdump", (void *)&gdump_old, &sz,
+	expect_d_eq(mallctl("prof.gdump", (void *)&gdump_old, &sz,
 	    (void *)&gdump, sizeof(gdump)), 0,
 	    "Unexpected mallctl failure while disabling prof.gdump");
 	assert(gdump_old);
 	did_prof_dump_open = false;
 	r = mallocx((1U << SC_LG_LARGE_MINCLASS), 0);
-	assert_ptr_not_null(q, "Unexpected mallocx() failure");
-	assert_false(did_prof_dump_open, "Unexpected profile dump");
+	expect_ptr_not_null(q, "Unexpected mallocx() failure");
+	expect_false(did_prof_dump_open, "Unexpected profile dump");
 
 	gdump = true;
 	sz = sizeof(gdump_old);
-	assert_d_eq(mallctl("prof.gdump", (void *)&gdump_old, &sz,
+	expect_d_eq(mallctl("prof.gdump", (void *)&gdump_old, &sz,
 	    (void *)&gdump, sizeof(gdump)), 0,
 	    "Unexpected mallctl failure while enabling prof.gdump");
 	assert(!gdump_old);
 	did_prof_dump_open = false;
 	s = mallocx((1U << SC_LG_LARGE_MINCLASS), 0);
-	assert_ptr_not_null(q, "Unexpected mallocx() failure");
-	assert_true(did_prof_dump_open, "Expected a profile dump");
+	expect_ptr_not_null(q, "Unexpected mallocx() failure");
+	expect_true(did_prof_dump_open, "Expected a profile dump");
 
 	dallocx(p, 0);
 	dallocx(q, 0);
diff --git a/test/unit/prof_idump.c b/test/unit/prof_idump.c
index 7a9b2882..dfcc0ff6 100644
--- a/test/unit/prof_idump.c
+++ b/test/unit/prof_idump.c
@@ -11,11 +11,11 @@ prof_dump_open_intercept(bool propagate_err, const char *filename) {
 	did_prof_dump_open = true;
 
 	const char filename_prefix[] = TEST_PREFIX ".";
-	assert_d_eq(strncmp(filename_prefix, filename, sizeof(filename_prefix)
+	expect_d_eq(strncmp(filename_prefix, filename, sizeof(filename_prefix)
 	    - 1), 0, "Dump file name should start with \"" TEST_PREFIX ".\"");
 
 	fd = open("/dev/null", O_WRONLY);
-	assert_d_ne(fd, -1, "Unexpected open() failure");
+	expect_d_ne(fd, -1, "Unexpected open() failure");
 
 	return fd;
 }
@@ -30,11 +30,11 @@ TEST_BEGIN(test_idump) {
 
 	active = true;
 
-	assert_d_eq(mallctl("prof.dump_prefix", NULL, NULL,
+	expect_d_eq(mallctl("prof.dump_prefix", NULL, NULL,
 	    (void *)&dump_prefix, sizeof(dump_prefix)), 0,
 	    "Unexpected mallctl failure while overwriting dump prefix");
 
-	assert_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
+	expect_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
 	    sizeof(active)), 0,
 	    "Unexpected mallctl failure while activating profiling");
 
@@ -42,9 +42,9 @@ TEST_BEGIN(test_idump) {
 
 	did_prof_dump_open = false;
 	p = mallocx(1, 0);
-	assert_ptr_not_null(p, "Unexpected mallocx() failure");
+	expect_ptr_not_null(p, "Unexpected mallocx() failure");
 	dallocx(p, 0);
-	assert_true(did_prof_dump_open, "Expected a profile dump");
+	expect_true(did_prof_dump_open, "Expected a profile dump");
 }
 TEST_END
 
diff --git a/test/unit/prof_log.c b/test/unit/prof_log.c
index 4b14fd56..6b2336dc 100644
--- a/test/unit/prof_log.c
+++ b/test/unit/prof_log.c
@@ -4,16 +4,16 @@
 #define N_PARAM 100
 #define N_THREADS 10
 
-static void assert_rep() {
-	assert_b_eq(prof_log_rep_check(), false, "Rep check failed");
+static void expect_rep() {
+	expect_b_eq(prof_log_rep_check(), false, "Rep check failed");
 }
 
-static void assert_log_empty() {
-	assert_zu_eq(prof_log_bt_count(), 0,
+static void expect_log_empty() {
+	expect_zu_eq(prof_log_bt_count(), 0,
 	    "The log has backtraces; it isn't empty");
-	assert_zu_eq(prof_log_thr_count(), 0,
+	expect_zu_eq(prof_log_thr_count(), 0,
 	    "The log has threads; it isn't empty");
-	assert_zu_eq(prof_log_alloc_count(), 0,
+	expect_zu_eq(prof_log_alloc_count(), 0,
 	    "The log has allocations; it isn't empty");
 }
 
@@ -35,22 +35,22 @@ TEST_BEGIN(test_prof_log_many_logs) {
 	test_skip_if(!config_prof);
 
 	for (i = 0; i < N_PARAM; i++) {
-		assert_b_eq(prof_log_is_logging(), false,
+		expect_b_eq(prof_log_is_logging(), false,
 		    "Logging shouldn't have started yet");
-		assert_d_eq(mallctl("prof.log_start", NULL, NULL, NULL, 0), 0,
+		expect_d_eq(mallctl("prof.log_start", NULL, NULL, NULL, 0), 0,
 		    "Unexpected mallctl failure when starting logging");
-		assert_b_eq(prof_log_is_logging(), true,
+		expect_b_eq(prof_log_is_logging(), true,
 		    "Logging should be started by now");
-		assert_log_empty();
-		assert_rep();
+		expect_log_empty();
+		expect_rep();
 		f();
-		assert_zu_eq(prof_log_thr_count(), 1, "Wrong thread count");
-		assert_rep();
-		assert_b_eq(prof_log_is_logging(), true,
+		expect_zu_eq(prof_log_thr_count(), 1, "Wrong thread count");
+		expect_rep();
+		expect_b_eq(prof_log_is_logging(), true,
 		    "Logging should still be on");
-		assert_d_eq(mallctl("prof.log_stop", NULL, NULL, NULL, 0), 0,
+		expect_d_eq(mallctl("prof.log_stop", NULL, NULL, NULL, 0), 0,
 		    "Unexpected mallctl failure when stopping logging");
-		assert_b_eq(prof_log_is_logging(), false,
+		expect_b_eq(prof_log_is_logging(), false,
 		    "Logging should have turned off");
 	}
 }
@@ -74,7 +74,7 @@ TEST_BEGIN(test_prof_log_many_threads) {
 	test_skip_if(!config_prof);
 
 	int i;
-	assert_d_eq(mallctl("prof.log_start", NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctl("prof.log_start", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl failure when starting logging");
 	for (i = 0; i < N_THREADS; i++) {
 		thd_create(&thr_buf[i], &f_thread, NULL);
@@ -83,10 +83,10 @@ TEST_BEGIN(test_prof_log_many_threads) {
 	for (i = 0; i < N_THREADS; i++) {
 		thd_join(thr_buf[i], NULL);
 	}
-	assert_zu_eq(prof_log_thr_count(), N_THREADS,
+	expect_zu_eq(prof_log_thr_count(), N_THREADS,
 	    "Wrong number of thread entries");
-	assert_rep();
-	assert_d_eq(mallctl("prof.log_stop", NULL, NULL, NULL, 0), 0,
+	expect_rep();
+	expect_d_eq(mallctl("prof.log_stop", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl failure when stopping logging");
 }
 TEST_END
@@ -111,19 +111,19 @@ TEST_BEGIN(test_prof_log_many_traces) {
 
 	test_skip_if(!config_prof);
 
-	assert_d_eq(mallctl("prof.log_start", NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctl("prof.log_start", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl failure when starting logging");
 	int i;
-	assert_rep();
-	assert_log_empty();
+	expect_rep();
+	expect_log_empty();
 	for (i = 0; i < N_PARAM; i++) {
-		assert_rep();
+		expect_rep();
 		f1();
-		assert_rep();
+		expect_rep();
 		f2();
-		assert_rep();
+		expect_rep();
 		f3();
-		assert_rep();
+		expect_rep();
 	}
 	/*
 	 * There should be 8 total backtraces: two for malloc/free in f1(), two
@@ -132,9 +132,9 @@ TEST_BEGIN(test_prof_log_many_traces) {
 	 * optimizations such as loop unrolling might generate more call sites.
 	 * So >= 8 traces are expected.
 	 */
-	assert_zu_ge(prof_log_bt_count(), 8,
+	expect_zu_ge(prof_log_bt_count(), 8,
 	    "Expect at least 8 backtraces given sample workload");
-	assert_d_eq(mallctl("prof.log_stop", NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctl("prof.log_stop", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl failure when stopping logging");
 }
 TEST_END
diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index 962be74e..7400d6cf 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -8,14 +8,14 @@
 /* Invariant before and after every test (when config_prof is on) */
 static void confirm_prof_setup(tsd_t *tsd) {
 	/* Options */
-	assert_true(opt_prof, "opt_prof not on");
-	assert_true(opt_prof_active, "opt_prof_active not on");
-	assert_zd_eq(opt_prof_recent_alloc_max, OPT_ALLOC_MAX,
+	expect_true(opt_prof, "opt_prof not on");
+	expect_true(opt_prof_active, "opt_prof_active not on");
+	expect_zd_eq(opt_prof_recent_alloc_max, OPT_ALLOC_MAX,
 	    "opt_prof_recent_alloc_max not set correctly");
 
 	/* Dynamics */
-	assert_true(prof_active, "prof_active not on");
-	assert_zd_eq(prof_recent_alloc_max_ctl_read(tsd), OPT_ALLOC_MAX,
+	expect_true(prof_active, "prof_active not on");
+	expect_zd_eq(prof_recent_alloc_max_ctl_read(tsd), OPT_ALLOC_MAX,
 	    "prof_recent_alloc_max not set correctly");
 }
 
@@ -35,11 +35,11 @@ TEST_BEGIN(test_prof_recent_off) {
 	size_t len = len_ref;
 
 #define ASSERT_SHOULD_FAIL(opt, a, b, c, d) do {			\
-	assert_d_eq(mallctl("experimental.prof_recent." opt, a, b, c,	\
+	expect_d_eq(mallctl("experimental.prof_recent." opt, a, b, c,	\
 	    d), ENOENT, "Should return ENOENT when config_prof is off");\
-	assert_zd_eq(past, past_ref, "output was touched");		\
-	assert_zu_eq(len, len_ref, "output length was touched");	\
-	assert_zd_eq(future, future_ref, "input was touched");		\
+	expect_zd_eq(past, past_ref, "output was touched");		\
+	expect_zu_eq(len, len_ref, "output length was touched");	\
+	expect_zd_eq(future, future_ref, "input was touched");		\
 } while (0)
 
 	ASSERT_SHOULD_FAIL("alloc_max", NULL, NULL, NULL, 0);
@@ -61,35 +61,35 @@ TEST_BEGIN(test_prof_recent_on) {
 
 	confirm_prof_setup(tsd);
 
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, NULL, 0), 0, "no-op mallctl should be allowed");
 	confirm_prof_setup(tsd);
 
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    &past, &len, NULL, 0), 0, "Read error");
-	assert_zd_eq(past, OPT_ALLOC_MAX, "Wrong read result");
+	expect_zd_eq(past, OPT_ALLOC_MAX, "Wrong read result");
 	future = OPT_ALLOC_MAX + 1;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, len), 0, "Write error");
 	future = -1;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    &past, &len, &future, len), 0, "Read/write error");
-	assert_zd_eq(past, OPT_ALLOC_MAX + 1, "Wrong read result");
+	expect_zd_eq(past, OPT_ALLOC_MAX + 1, "Wrong read result");
 	future = -2;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    &past, &len, &future, len), EINVAL,
 	    "Invalid write should return EINVAL");
-	assert_zd_eq(past, OPT_ALLOC_MAX + 1,
+	expect_zd_eq(past, OPT_ALLOC_MAX + 1,
 	    "Output should not be touched given invalid write");
 	future = OPT_ALLOC_MAX;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    &past, &len, &future, len), 0, "Read/write error");
-	assert_zd_eq(past, -1, "Wrong read result");
+	expect_zd_eq(past, -1, "Wrong read result");
 	future = OPT_ALLOC_MAX + 2;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    &past, &len, &future, len * 2), EINVAL,
 	    "Invalid write should return EINVAL");
-	assert_zd_eq(past, -1,
+	expect_zd_eq(past, -1,
 	    "Output should not be touched given invalid write");
 
 	confirm_prof_setup(tsd);
@@ -100,44 +100,44 @@ TEST_END
 #define NTH_REQ_SIZE(n) ((n) * 97 + 101)
 
 static void confirm_malloc(tsd_t *tsd, void *p) {
-	assert_ptr_not_null(p, "malloc failed unexpectedly");
+	expect_ptr_not_null(p, "malloc failed unexpectedly");
 	edata_t *e = emap_edata_lookup(TSDN_NULL, &emap_global, p);
-	assert_ptr_not_null(e, "NULL edata for living pointer");
+	expect_ptr_not_null(e, "NULL edata for living pointer");
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	prof_recent_t *n = edata_prof_recent_alloc_get(tsd, e);
-	assert_ptr_not_null(n, "Record in edata should not be NULL");
-	assert_ptr_not_null(n->alloc_tctx,
+	expect_ptr_not_null(n, "Record in edata should not be NULL");
+	expect_ptr_not_null(n->alloc_tctx,
 	    "alloc_tctx in record should not be NULL");
-	assert_ptr_eq(e, n->alloc_edata,
+	expect_ptr_eq(e, n->alloc_edata,
 	    "edata pointer in record is not correct");
-	assert_ptr_null(n->dalloc_tctx, "dalloc_tctx in record should be NULL");
+	expect_ptr_null(n->dalloc_tctx, "dalloc_tctx in record should be NULL");
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 }
 
 static void confirm_record_size(tsd_t *tsd, prof_recent_t *n, unsigned kth) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	assert_zu_eq(n->size, NTH_REQ_SIZE(kth),
+	expect_zu_eq(n->size, NTH_REQ_SIZE(kth),
 	    "Recorded allocation size is wrong");
 }
 
 static void confirm_record_living(tsd_t *tsd, prof_recent_t *n) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	assert_ptr_not_null(n->alloc_tctx,
+	expect_ptr_not_null(n->alloc_tctx,
 	    "alloc_tctx in record should not be NULL");
-	assert_ptr_not_null(n->alloc_edata,
+	expect_ptr_not_null(n->alloc_edata,
 	    "Recorded edata should not be NULL for living pointer");
-	assert_ptr_eq(n, edata_prof_recent_alloc_get(tsd, n->alloc_edata),
+	expect_ptr_eq(n, edata_prof_recent_alloc_get(tsd, n->alloc_edata),
 	    "Record in edata is not correct");
-	assert_ptr_null(n->dalloc_tctx, "dalloc_tctx in record should be NULL");
+	expect_ptr_null(n->dalloc_tctx, "dalloc_tctx in record should be NULL");
 }
 
 static void confirm_record_released(tsd_t *tsd, prof_recent_t *n) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	assert_ptr_not_null(n->alloc_tctx,
+	expect_ptr_not_null(n->alloc_tctx,
 	    "alloc_tctx in record should not be NULL");
-	assert_ptr_null(n->alloc_edata,
+	expect_ptr_null(n->alloc_edata,
 	    "Recorded edata should be NULL for released pointer");
-	assert_ptr_not_null(n->dalloc_tctx,
+	expect_ptr_not_null(n->dalloc_tctx,
 	    "dalloc_tctx in record should not be NULL for released pointer");
 }
 
@@ -167,7 +167,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 		if (i < OPT_ALLOC_MAX - 1) {
 			malloc_mutex_lock(tsd_tsdn(tsd),
 			    &prof_recent_alloc_mtx);
-			assert_ptr_ne(prof_recent_alloc_begin(tsd),
+			expect_ptr_ne(prof_recent_alloc_begin(tsd),
 			    prof_recent_alloc_end(tsd),
 			    "Empty recent allocation");
 			malloc_mutex_unlock(tsd_tsdn(tsd),
@@ -194,7 +194,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 			}
 		}
 		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-		assert_u_eq(c, OPT_ALLOC_MAX,
+		expect_u_eq(c, OPT_ALLOC_MAX,
 		    "Incorrect total number of allocations");
 		free(p);
 	}
@@ -202,7 +202,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	confirm_prof_setup(tsd);
 
 	b = false;
-	assert_d_eq(mallctl("prof.active", NULL, NULL, &b, sizeof(bool)), 0,
+	expect_d_eq(mallctl("prof.active", NULL, NULL, &b, sizeof(bool)), 0,
 	    "mallctl for turning off prof_active failed");
 
 	/*
@@ -212,7 +212,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	for (; i < 3 * OPT_ALLOC_MAX; ++i) {
 		req_size = NTH_REQ_SIZE(i);
 		p = malloc(req_size);
-		assert_ptr_not_null(p, "malloc failed unexpectedly");
+		expect_ptr_not_null(p, "malloc failed unexpectedly");
 		c = 0;
 		malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 		for (n = prof_recent_alloc_begin(tsd);
@@ -223,13 +223,13 @@ TEST_BEGIN(test_prof_recent_alloc) {
 			++c;
 		}
 		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-		assert_u_eq(c, OPT_ALLOC_MAX,
+		expect_u_eq(c, OPT_ALLOC_MAX,
 		    "Incorrect total number of allocations");
 		free(p);
 	}
 
 	b = true;
-	assert_d_eq(mallctl("prof.active", NULL, NULL, &b, sizeof(bool)), 0,
+	expect_d_eq(mallctl("prof.active", NULL, NULL, &b, sizeof(bool)), 0,
 	    "mallctl for turning on prof_active failed");
 
 	confirm_prof_setup(tsd);
@@ -267,14 +267,14 @@ TEST_BEGIN(test_prof_recent_alloc) {
 			}
 		}
 		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-		assert_u_eq(c, OPT_ALLOC_MAX,
+		expect_u_eq(c, OPT_ALLOC_MAX,
 		    "Incorrect total number of allocations");
 		free(p);
 	}
 
 	/* Increasing the limit shouldn't alter the list of records. */
 	future = OPT_ALLOC_MAX + 1;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	c = 0;
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
@@ -286,7 +286,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 		++c;
 	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	assert_u_eq(c, OPT_ALLOC_MAX,
+	expect_u_eq(c, OPT_ALLOC_MAX,
 	    "Incorrect total number of allocations");
 
 	/*
@@ -294,7 +294,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	 * the new limit is still no less than the length of the list.
 	 */
 	future = OPT_ALLOC_MAX;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	c = 0;
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
@@ -306,7 +306,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 		++c;
 	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	assert_u_eq(c, OPT_ALLOC_MAX,
+	expect_u_eq(c, OPT_ALLOC_MAX,
 	    "Incorrect total number of allocations");
 
 	/*
@@ -314,7 +314,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	 * limit is less than the length of the list.
 	 */
 	future = OPT_ALLOC_MAX - 1;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	c = 0;
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
@@ -326,12 +326,12 @@ TEST_BEGIN(test_prof_recent_alloc) {
 		confirm_record_released(tsd, n);
 	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	assert_u_eq(c, OPT_ALLOC_MAX - 1,
+	expect_u_eq(c, OPT_ALLOC_MAX - 1,
 	    "Incorrect total number of allocations");
 
 	/* Setting to unlimited shouldn't alter the list of records. */
 	future = -1;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	c = 0;
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
@@ -343,12 +343,12 @@ TEST_BEGIN(test_prof_recent_alloc) {
 		confirm_record_released(tsd, n);
 	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	assert_u_eq(c, OPT_ALLOC_MAX - 1,
+	expect_u_eq(c, OPT_ALLOC_MAX - 1,
 	    "Incorrect total number of allocations");
 
 	/* Downshift to only one record. */
 	future = 1;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	n = prof_recent_alloc_begin(tsd);
@@ -361,7 +361,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 
 	/* Completely turn off. */
 	future = 0;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	assert(prof_recent_alloc_begin(tsd) == prof_recent_alloc_end(tsd));
@@ -369,7 +369,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 
 	/* Restore the settings. */
 	future = OPT_ALLOC_MAX;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	assert(prof_recent_alloc_begin(tsd) == prof_recent_alloc_end(tsd));
@@ -395,7 +395,7 @@ static void test_dump_write_cb(void *not_used, const char *str) {
 static void call_dump() {
 	static void *in[2] = {test_dump_write_cb, NULL};
 	dump_out_len = 0;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_dump",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_dump",
 	    NULL, NULL, in, sizeof(in)), 0, "Dump mallctl raised error");
 }
 
@@ -418,9 +418,9 @@ static void confirm_record(const char *template,
 	 * "{\"recent_alloc_max\":XYZ,\"recent_alloc\":[...]}".
 	 * Using "- 2" serves to cut right before the ending "]}".
 	 */
-	assert_d_eq(memcmp(dump_out, template, strlen(template) - 2), 0,
+	expect_d_eq(memcmp(dump_out, template, strlen(template) - 2), 0,
 	    DUMP_ERROR);
-	assert_d_eq(memcmp(dump_out + strlen(dump_out) - 2,
+	expect_d_eq(memcmp(dump_out + strlen(dump_out) - 2,
 	    template + strlen(template) - 2, 2), 0, DUMP_ERROR);
 
 	const char *start = dump_out + strlen(template) - 2;
@@ -429,14 +429,14 @@ static void confirm_record(const char *template,
 	for (record = records; record < records + n_records; ++record) {
 
 #define ASSERT_CHAR(c) do {						\
-	assert_true(start < end, DUMP_ERROR);				\
-	assert_c_eq(*start++, c, DUMP_ERROR);				\
+	expect_true(start < end, DUMP_ERROR);				\
+	expect_c_eq(*start++, c, DUMP_ERROR);				\
 } while (0)
 
 #define ASSERT_STR(s) do {						\
 	const size_t len = strlen(s);					\
-	assert_true(start + len <= end, DUMP_ERROR);			\
-	assert_d_eq(memcmp(start, s, len), 0, DUMP_ERROR);		\
+	expect_true(start + len <= end, DUMP_ERROR);			\
+	expect_d_eq(memcmp(start, s, len), 0, DUMP_ERROR);		\
 	start += len;							\
 } while (0)
 
@@ -512,8 +512,8 @@ static void confirm_record(const char *template,
 #undef ASSERT_CHAR
 
 	}
-	assert_ptr_eq(record, records + n_records, DUMP_ERROR);
-	assert_ptr_eq(start, end, DUMP_ERROR);
+	expect_ptr_eq(record, records + n_records, DUMP_ERROR);
+	expect_ptr_eq(start, end, DUMP_ERROR);
 }
 
 TEST_BEGIN(test_prof_recent_alloc_dump) {
@@ -527,18 +527,18 @@ TEST_BEGIN(test_prof_recent_alloc_dump) {
 	confirm_record_t records[2];
 
 	future = 0;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	call_dump();
-	assert_str_eq(dump_out, "{\"recent_alloc_max\":0,\"recent_alloc\":[]}",
+	expect_str_eq(dump_out, "{\"recent_alloc_max\":0,\"recent_alloc\":[]}",
 	    DUMP_ERROR);
 
 	future = 2;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	call_dump();
 	const char *template = "{\"recent_alloc_max\":2,\"recent_alloc\":[]}";
-	assert_str_eq(dump_out, template, DUMP_ERROR);
+	expect_str_eq(dump_out, template, DUMP_ERROR);
 
 	p = malloc(7);
 	call_dump();
@@ -563,7 +563,7 @@ TEST_BEGIN(test_prof_recent_alloc_dump) {
 	confirm_record(template, records, 2);
 
 	future = OPT_ALLOC_MAX;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	confirm_prof_setup(tsd);
 }
@@ -632,7 +632,7 @@ static void *f_thread(void *arg) {
 			last_max =
 			    prof_recent_alloc_max_ctl_write(tsd, test_max / 2);
 		}
-		assert_zd_ge(last_max, -1, "Illegal last-N max");
+		expect_zd_ge(last_max, -1, "Illegal last-N max");
 	}
 
 	while (data_p->count > 0) {
@@ -660,7 +660,7 @@ TEST_BEGIN(test_prof_recent_stress) {
 	}
 
 	test_max = STRESS_ALLOC_MAX;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &test_max, sizeof(ssize_t)), 0, "Write error");
 	for (size_t i = 0; i < N_THREADS; i++) {
 		thd_data_t *data_p = thd_data + i;
@@ -673,7 +673,7 @@ TEST_BEGIN(test_prof_recent_stress) {
 	}
 
 	test_max = OPT_ALLOC_MAX;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &test_max, sizeof(ssize_t)), 0, "Write error");
 	confirm_prof_setup(tsd);
 }
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
index 7cce42d2..e643e546 100644
--- a/test/unit/prof_reset.c
+++ b/test/unit/prof_reset.c
@@ -5,14 +5,14 @@ prof_dump_open_intercept(bool propagate_err, const char *filename) {
 	int fd;
 
 	fd = open("/dev/null", O_WRONLY);
-	assert_d_ne(fd, -1, "Unexpected open() failure");
+	expect_d_ne(fd, -1, "Unexpected open() failure");
 
 	return fd;
 }
 
 static void
 set_prof_active(bool active) {
-	assert_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
+	expect_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
 	    sizeof(active)), 0, "Unexpected mallctl failure");
 }
 
@@ -21,7 +21,7 @@ get_lg_prof_sample(void) {
 	size_t lg_prof_sample;
 	size_t sz = sizeof(size_t);
 
-	assert_d_eq(mallctl("prof.lg_sample", (void *)&lg_prof_sample, &sz,
+	expect_d_eq(mallctl("prof.lg_sample", (void *)&lg_prof_sample, &sz,
 	    NULL, 0), 0,
 	    "Unexpected mallctl failure while reading profiling sample rate");
 	return lg_prof_sample;
@@ -29,10 +29,10 @@ get_lg_prof_sample(void) {
 
 static void
 do_prof_reset(size_t lg_prof_sample) {
-	assert_d_eq(mallctl("prof.reset", NULL, NULL,
+	expect_d_eq(mallctl("prof.reset", NULL, NULL,
 	    (void *)&lg_prof_sample, sizeof(size_t)), 0,
 	    "Unexpected mallctl failure while resetting profile data");
-	assert_zu_eq(lg_prof_sample, get_lg_prof_sample(),
+	expect_zu_eq(lg_prof_sample, get_lg_prof_sample(),
 	    "Expected profile sample rate change");
 }
 
@@ -44,22 +44,22 @@ TEST_BEGIN(test_prof_reset_basic) {
 	test_skip_if(!config_prof);
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("opt.lg_prof_sample", (void *)&lg_prof_sample_orig,
+	expect_d_eq(mallctl("opt.lg_prof_sample", (void *)&lg_prof_sample_orig,
 	    &sz, NULL, 0), 0,
 	    "Unexpected mallctl failure while reading profiling sample rate");
-	assert_zu_eq(lg_prof_sample_orig, 0,
+	expect_zu_eq(lg_prof_sample_orig, 0,
 	    "Unexpected profiling sample rate");
 	lg_prof_sample = get_lg_prof_sample();
-	assert_zu_eq(lg_prof_sample_orig, lg_prof_sample,
+	expect_zu_eq(lg_prof_sample_orig, lg_prof_sample,
 	    "Unexpected disagreement between \"opt.lg_prof_sample\" and "
 	    "\"prof.lg_sample\"");
 
 	/* Test simple resets. */
 	for (i = 0; i < 2; i++) {
-		assert_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0), 0,
+		expect_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0), 0,
 		    "Unexpected mallctl failure while resetting profile data");
 		lg_prof_sample = get_lg_prof_sample();
-		assert_zu_eq(lg_prof_sample_orig, lg_prof_sample,
+		expect_zu_eq(lg_prof_sample_orig, lg_prof_sample,
 		    "Unexpected profile sample rate change");
 	}
 
@@ -68,14 +68,14 @@ TEST_BEGIN(test_prof_reset_basic) {
 	for (i = 0; i < 2; i++) {
 		do_prof_reset(lg_prof_sample_next);
 		lg_prof_sample = get_lg_prof_sample();
-		assert_zu_eq(lg_prof_sample, lg_prof_sample_next,
+		expect_zu_eq(lg_prof_sample, lg_prof_sample_next,
 		    "Expected profile sample rate change");
 		lg_prof_sample_next = lg_prof_sample_orig;
 	}
 
 	/* Make sure the test code restored prof.lg_sample. */
 	lg_prof_sample = get_lg_prof_sample();
-	assert_zu_eq(lg_prof_sample_orig, lg_prof_sample,
+	expect_zu_eq(lg_prof_sample_orig, lg_prof_sample,
 	    "Unexpected disagreement between \"opt.lg_prof_sample\" and "
 	    "\"prof.lg_sample\"");
 }
@@ -100,31 +100,31 @@ TEST_BEGIN(test_prof_reset_cleanup) {
 
 	set_prof_active(true);
 
-	assert_zu_eq(prof_bt_count(), 0, "Expected 0 backtraces");
+	expect_zu_eq(prof_bt_count(), 0, "Expected 0 backtraces");
 	p = mallocx(1, 0);
-	assert_ptr_not_null(p, "Unexpected mallocx() failure");
-	assert_zu_eq(prof_bt_count(), 1, "Expected 1 backtrace");
+	expect_ptr_not_null(p, "Unexpected mallocx() failure");
+	expect_zu_eq(prof_bt_count(), 1, "Expected 1 backtrace");
 
 	prof_dump_header_orig = prof_dump_header;
 	prof_dump_header = prof_dump_header_intercept;
-	assert_false(prof_dump_header_intercepted, "Unexpected intercept");
+	expect_false(prof_dump_header_intercepted, "Unexpected intercept");
 
-	assert_d_eq(mallctl("prof.dump", NULL, NULL, NULL, 0),
+	expect_d_eq(mallctl("prof.dump", NULL, NULL, NULL, 0),
 	    0, "Unexpected error while dumping heap profile");
-	assert_true(prof_dump_header_intercepted, "Expected intercept");
-	assert_u64_eq(cnt_all_copy.curobjs, 1, "Expected 1 allocation");
+	expect_true(prof_dump_header_intercepted, "Expected intercept");
+	expect_u64_eq(cnt_all_copy.curobjs, 1, "Expected 1 allocation");
 
-	assert_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0), 0,
 	    "Unexpected error while resetting heap profile data");
-	assert_d_eq(mallctl("prof.dump", NULL, NULL, NULL, 0),
+	expect_d_eq(mallctl("prof.dump", NULL, NULL, NULL, 0),
 	    0, "Unexpected error while dumping heap profile");
-	assert_u64_eq(cnt_all_copy.curobjs, 0, "Expected 0 allocations");
-	assert_zu_eq(prof_bt_count(), 1, "Expected 1 backtrace");
+	expect_u64_eq(cnt_all_copy.curobjs, 0, "Expected 0 allocations");
+	expect_zu_eq(prof_bt_count(), 1, "Expected 1 backtrace");
 
 	prof_dump_header = prof_dump_header_orig;
 
 	dallocx(p, 0);
-	assert_zu_eq(prof_bt_count(), 0, "Expected 0 backtraces");
+	expect_zu_eq(prof_bt_count(), 0, "Expected 0 backtraces");
 
 	set_prof_active(false);
 }
@@ -145,13 +145,13 @@ thd_start(void *varg) {
 
 	for (i = 0; i < NALLOCS_PER_THREAD; i++) {
 		if (i % RESET_INTERVAL == 0) {
-			assert_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0),
+			expect_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0),
 			    0, "Unexpected error while resetting heap profile "
 			    "data");
 		}
 
 		if (i % DUMP_INTERVAL == 0) {
-			assert_d_eq(mallctl("prof.dump", NULL, NULL, NULL, 0),
+			expect_d_eq(mallctl("prof.dump", NULL, NULL, NULL, 0),
 			    0, "Unexpected error while dumping heap profile");
 		}
 
@@ -162,7 +162,7 @@ thd_start(void *varg) {
 				*pp = NULL;
 			}
 			*pp = btalloc(1, thd_ind*NALLOCS_PER_THREAD + i);
-			assert_ptr_not_null(*pp,
+			expect_ptr_not_null(*pp,
 			    "Unexpected btalloc() failure");
 		}
 	}
@@ -189,7 +189,7 @@ TEST_BEGIN(test_prof_reset) {
 	test_skip_if(!config_prof);
 
 	bt_count = prof_bt_count();
-	assert_zu_eq(bt_count, 0,
+	expect_zu_eq(bt_count, 0,
 	    "Unexpected pre-existing tdata structures");
 	tdata_count = prof_tdata_count();
 
@@ -206,9 +206,9 @@ TEST_BEGIN(test_prof_reset) {
 		thd_join(thds[i], NULL);
 	}
 
-	assert_zu_eq(prof_bt_count(), bt_count,
+	expect_zu_eq(prof_bt_count(), bt_count,
 	    "Unexpected bactrace count change");
-	assert_zu_eq(prof_tdata_count(), tdata_count,
+	expect_zu_eq(prof_tdata_count(), tdata_count,
 	    "Unexpected remaining tdata structures");
 
 	set_prof_active(false);
@@ -246,19 +246,19 @@ TEST_BEGIN(test_xallocx) {
 
 		/* Allocate small object (which will be promoted). */
 		p = ptrs[i] = mallocx(1, 0);
-		assert_ptr_not_null(p, "Unexpected mallocx() failure");
+		expect_ptr_not_null(p, "Unexpected mallocx() failure");
 
 		/* Reset profiling. */
 		do_prof_reset(0);
 
 		/* Perform successful xallocx(). */
 		sz = sallocx(p, 0);
-		assert_zu_eq(xallocx(p, sz, 0, 0), sz,
+		expect_zu_eq(xallocx(p, sz, 0, 0), sz,
 		    "Unexpected xallocx() failure");
 
 		/* Perform unsuccessful xallocx(). */
 		nsz = nallocx(sz+1, 0);
-		assert_zu_eq(xallocx(p, nsz, 0, 0), sz,
+		expect_zu_eq(xallocx(p, nsz, 0, 0), sz,
 		    "Unexpected xallocx() success");
 	}
 
diff --git a/test/unit/prof_tctx.c b/test/unit/prof_tctx.c
index 4dde0ab2..4bc597b6 100644
--- a/test/unit/prof_tctx.c
+++ b/test/unit/prof_tctx.c
@@ -14,27 +14,27 @@ TEST_BEGIN(test_prof_realloc) {
 
 	prof_cnt_all(&curobjs_0, NULL, NULL, NULL);
 	p = mallocx(1024, flags);
-	assert_ptr_not_null(p, "Unexpected mallocx() failure");
+	expect_ptr_not_null(p, "Unexpected mallocx() failure");
 	prof_info_get(tsd, p, NULL, &prof_info_p);
-	assert_ptr_ne(prof_info_p.alloc_tctx, (prof_tctx_t *)(uintptr_t)1U,
+	expect_ptr_ne(prof_info_p.alloc_tctx, (prof_tctx_t *)(uintptr_t)1U,
 	    "Expected valid tctx");
 	prof_cnt_all(&curobjs_1, NULL, NULL, NULL);
-	assert_u64_eq(curobjs_0 + 1, curobjs_1,
+	expect_u64_eq(curobjs_0 + 1, curobjs_1,
 	    "Allocation should have increased sample size");
 
 	q = rallocx(p, 2048, flags);
-	assert_ptr_ne(p, q, "Expected move");
-	assert_ptr_not_null(p, "Unexpected rmallocx() failure");
+	expect_ptr_ne(p, q, "Expected move");
+	expect_ptr_not_null(p, "Unexpected rmallocx() failure");
 	prof_info_get(tsd, q, NULL, &prof_info_q);
-	assert_ptr_ne(prof_info_q.alloc_tctx, (prof_tctx_t *)(uintptr_t)1U,
+	expect_ptr_ne(prof_info_q.alloc_tctx, (prof_tctx_t *)(uintptr_t)1U,
 	    "Expected valid tctx");
 	prof_cnt_all(&curobjs_2, NULL, NULL, NULL);
-	assert_u64_eq(curobjs_1, curobjs_2,
+	expect_u64_eq(curobjs_1, curobjs_2,
 	    "Reallocation should not have changed sample size");
 
 	dallocx(q, flags);
 	prof_cnt_all(&curobjs_3, NULL, NULL, NULL);
-	assert_u64_eq(curobjs_0, curobjs_3,
+	expect_u64_eq(curobjs_0, curobjs_3,
 	    "Sample size should have returned to base level");
 }
 TEST_END
diff --git a/test/unit/prof_thread_name.c b/test/unit/prof_thread_name.c
index c9c2a2b7..4a9d38a2 100644
--- a/test/unit/prof_thread_name.c
+++ b/test/unit/prof_thread_name.c
@@ -7,11 +7,11 @@ mallctl_thread_name_get_impl(const char *thread_name_expected, const char *func,
 	size_t sz;
 
 	sz = sizeof(thread_name_old);
-	assert_d_eq(mallctl("thread.prof.name", (void *)&thread_name_old, &sz,
+	expect_d_eq(mallctl("thread.prof.name", (void *)&thread_name_old, &sz,
 	    NULL, 0), 0,
 	    "%s():%d: Unexpected mallctl failure reading thread.prof.name",
 	    func, line);
-	assert_str_eq(thread_name_old, thread_name_expected,
+	expect_str_eq(thread_name_old, thread_name_expected,
 	    "%s():%d: Unexpected thread.prof.name value", func, line);
 }
 #define mallctl_thread_name_get(a)					\
@@ -20,7 +20,7 @@ mallctl_thread_name_get_impl(const char *thread_name_expected, const char *func,
 static void
 mallctl_thread_name_set_impl(const char *thread_name, const char *func,
     int line) {
-	assert_d_eq(mallctl("thread.prof.name", NULL, NULL,
+	expect_d_eq(mallctl("thread.prof.name", NULL, NULL,
 	    (void *)&thread_name, sizeof(thread_name)), 0,
 	    "%s():%d: Unexpected mallctl failure reading thread.prof.name",
 	    func, line);
@@ -39,14 +39,14 @@ TEST_BEGIN(test_prof_thread_name_validation) {
 
 	/* NULL input shouldn't be allowed. */
 	thread_name = NULL;
-	assert_d_eq(mallctl("thread.prof.name", NULL, NULL,
+	expect_d_eq(mallctl("thread.prof.name", NULL, NULL,
 	    (void *)&thread_name, sizeof(thread_name)), EFAULT,
 	    "Unexpected mallctl result writing \"%s\" to thread.prof.name",
 	    thread_name);
 
 	/* '\n' shouldn't be allowed. */
 	thread_name = "hi\nthere";
-	assert_d_eq(mallctl("thread.prof.name", NULL, NULL,
+	expect_d_eq(mallctl("thread.prof.name", NULL, NULL,
 	    (void *)&thread_name, sizeof(thread_name)), EFAULT,
 	    "Unexpected mallctl result writing \"%s\" to thread.prof.name",
 	    thread_name);
@@ -57,7 +57,7 @@ TEST_BEGIN(test_prof_thread_name_validation) {
 		size_t sz;
 
 		sz = sizeof(thread_name_old);
-		assert_d_eq(mallctl("thread.prof.name",
+		expect_d_eq(mallctl("thread.prof.name",
 		    (void *)&thread_name_old, &sz, (void *)&thread_name,
 		    sizeof(thread_name)), EPERM,
 		    "Unexpected mallctl result writing \"%s\" to "
@@ -82,7 +82,7 @@ thd_start(void *varg) {
 	mallctl_thread_name_set(thread_name);
 
 	for (i = 0; i < NRESET; i++) {
-		assert_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0), 0,
+		expect_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0), 0,
 		    "Unexpected error while resetting heap profile data");
 		mallctl_thread_name_get(thread_name);
 	}
diff --git a/test/unit/ql.c b/test/unit/ql.c
index b76c24c4..04da35fe 100644
--- a/test/unit/ql.c
+++ b/test/unit/ql.c
@@ -18,21 +18,21 @@ test_empty_list(list_head_t *head) {
 	list_t *t;
 	unsigned i;
 
-	assert_ptr_null(ql_first(head), "Unexpected element for empty list");
-	assert_ptr_null(ql_last(head, link),
+	expect_ptr_null(ql_first(head), "Unexpected element for empty list");
+	expect_ptr_null(ql_last(head, link),
 	    "Unexpected element for empty list");
 
 	i = 0;
 	ql_foreach(t, head, link) {
 		i++;
 	}
-	assert_u_eq(i, 0, "Unexpected element for empty list");
+	expect_u_eq(i, 0, "Unexpected element for empty list");
 
 	i = 0;
 	ql_reverse_foreach(t, head, link) {
 		i++;
 	}
-	assert_u_eq(i, 0, "Unexpected element for empty list");
+	expect_u_eq(i, 0, "Unexpected element for empty list");
 }
 
 TEST_BEGIN(test_ql_empty) {
@@ -58,34 +58,34 @@ test_entries_list(list_head_t *head, list_t *entries, unsigned nentries) {
 	list_t *t;
 	unsigned i;
 
-	assert_c_eq(ql_first(head)->id, entries[0].id, "Element id mismatch");
-	assert_c_eq(ql_last(head, link)->id, entries[nentries-1].id,
+	expect_c_eq(ql_first(head)->id, entries[0].id, "Element id mismatch");
+	expect_c_eq(ql_last(head, link)->id, entries[nentries-1].id,
 	    "Element id mismatch");
 
 	i = 0;
 	ql_foreach(t, head, link) {
-		assert_c_eq(t->id, entries[i].id, "Element id mismatch");
+		expect_c_eq(t->id, entries[i].id, "Element id mismatch");
 		i++;
 	}
 
 	i = 0;
 	ql_reverse_foreach(t, head, link) {
-		assert_c_eq(t->id, entries[nentries-i-1].id,
+		expect_c_eq(t->id, entries[nentries-i-1].id,
 		    "Element id mismatch");
 		i++;
 	}
 
 	for (i = 0; i < nentries-1; i++) {
 		t = ql_next(head, &entries[i], link);
-		assert_c_eq(t->id, entries[i+1].id, "Element id mismatch");
+		expect_c_eq(t->id, entries[i+1].id, "Element id mismatch");
 	}
-	assert_ptr_null(ql_next(head, &entries[nentries-1], link),
+	expect_ptr_null(ql_next(head, &entries[nentries-1], link),
 	    "Unexpected element");
 
-	assert_ptr_null(ql_prev(head, &entries[0], link), "Unexpected element");
+	expect_ptr_null(ql_prev(head, &entries[0], link), "Unexpected element");
 	for (i = 1; i < nentries; i++) {
 		t = ql_prev(head, &entries[i], link);
-		assert_c_eq(t->id, entries[i-1].id, "Element id mismatch");
+		expect_c_eq(t->id, entries[i-1].id, "Element id mismatch");
 	}
 }
 
diff --git a/test/unit/qr.c b/test/unit/qr.c
index 271a1095..95c16927 100644
--- a/test/unit/qr.c
+++ b/test/unit/qr.c
@@ -34,7 +34,7 @@ test_independent_entries(ring_t *entries) {
 		qr_foreach(t, &entries[i], link) {
 			j++;
 		}
-		assert_u_eq(j, 1,
+		expect_u_eq(j, 1,
 		    "Iteration over single-element ring should visit precisely "
 		    "one element");
 	}
@@ -43,19 +43,19 @@ test_independent_entries(ring_t *entries) {
 		qr_reverse_foreach(t, &entries[i], link) {
 			j++;
 		}
-		assert_u_eq(j, 1,
+		expect_u_eq(j, 1,
 		    "Iteration over single-element ring should visit precisely "
 		    "one element");
 	}
 	for (i = 0; i < NENTRIES; i++) {
 		t = qr_next(&entries[i], link);
-		assert_ptr_eq(t, &entries[i],
+		expect_ptr_eq(t, &entries[i],
 		    "Next element in single-element ring should be same as "
 		    "current element");
 	}
 	for (i = 0; i < NENTRIES; i++) {
 		t = qr_prev(&entries[i], link);
-		assert_ptr_eq(t, &entries[i],
+		expect_ptr_eq(t, &entries[i],
 		    "Previous element in single-element ring should be same as "
 		    "current element");
 	}
@@ -77,7 +77,7 @@ test_entries_ring(ring_t *entries) {
 	for (i = 0; i < NENTRIES; i++) {
 		j = 0;
 		qr_foreach(t, &entries[i], link) {
-			assert_c_eq(t->id, entries[(i+j) % NENTRIES].id,
+			expect_c_eq(t->id, entries[(i+j) % NENTRIES].id,
 			    "Element id mismatch");
 			j++;
 		}
@@ -85,19 +85,19 @@ test_entries_ring(ring_t *entries) {
 	for (i = 0; i < NENTRIES; i++) {
 		j = 0;
 		qr_reverse_foreach(t, &entries[i], link) {
-			assert_c_eq(t->id, entries[(NENTRIES+i-j-1) %
+			expect_c_eq(t->id, entries[(NENTRIES+i-j-1) %
 			    NENTRIES].id, "Element id mismatch");
 			j++;
 		}
 	}
 	for (i = 0; i < NENTRIES; i++) {
 		t = qr_next(&entries[i], link);
-		assert_c_eq(t->id, entries[(i+1) % NENTRIES].id,
+		expect_c_eq(t->id, entries[(i+1) % NENTRIES].id,
 		    "Element id mismatch");
 	}
 	for (i = 0; i < NENTRIES; i++) {
 		t = qr_prev(&entries[i], link);
-		assert_c_eq(t->id, entries[(NENTRIES+i-1) % NENTRIES].id,
+		expect_c_eq(t->id, entries[(NENTRIES+i-1) % NENTRIES].id,
 		    "Element id mismatch");
 	}
 }
@@ -127,13 +127,13 @@ TEST_BEGIN(test_qr_remove) {
 	for (i = 0; i < NENTRIES; i++) {
 		j = 0;
 		qr_foreach(t, &entries[i], link) {
-			assert_c_eq(t->id, entries[i+j].id,
+			expect_c_eq(t->id, entries[i+j].id,
 			    "Element id mismatch");
 			j++;
 		}
 		j = 0;
 		qr_reverse_foreach(t, &entries[i], link) {
-			assert_c_eq(t->id, entries[NENTRIES - 1 - j].id,
+			expect_c_eq(t->id, entries[NENTRIES - 1 - j].id,
 			"Element id mismatch");
 			j++;
 		}
@@ -155,7 +155,7 @@ TEST_BEGIN(test_qr_before_insert) {
 	for (i = 0; i < NENTRIES; i++) {
 		j = 0;
 		qr_foreach(t, &entries[i], link) {
-			assert_c_eq(t->id, entries[(NENTRIES+i-j) %
+			expect_c_eq(t->id, entries[(NENTRIES+i-j) %
 			    NENTRIES].id, "Element id mismatch");
 			j++;
 		}
@@ -163,19 +163,19 @@ TEST_BEGIN(test_qr_before_insert) {
 	for (i = 0; i < NENTRIES; i++) {
 		j = 0;
 		qr_reverse_foreach(t, &entries[i], link) {
-			assert_c_eq(t->id, entries[(i+j+1) % NENTRIES].id,
+			expect_c_eq(t->id, entries[(i+j+1) % NENTRIES].id,
 			    "Element id mismatch");
 			j++;
 		}
 	}
 	for (i = 0; i < NENTRIES; i++) {
 		t = qr_next(&entries[i], link);
-		assert_c_eq(t->id, entries[(NENTRIES+i-1) % NENTRIES].id,
+		expect_c_eq(t->id, entries[(NENTRIES+i-1) % NENTRIES].id,
 		    "Element id mismatch");
 	}
 	for (i = 0; i < NENTRIES; i++) {
 		t = qr_prev(&entries[i], link);
-		assert_c_eq(t->id, entries[(i+1) % NENTRIES].id,
+		expect_c_eq(t->id, entries[(i+1) % NENTRIES].id,
 		    "Element id mismatch");
 	}
 }
@@ -190,11 +190,11 @@ test_split_entries(ring_t *entries) {
 		j = 0;
 		qr_foreach(t, &entries[i], link) {
 			if (i < SPLIT_INDEX) {
-				assert_c_eq(t->id,
+				expect_c_eq(t->id,
 				    entries[(i+j) % SPLIT_INDEX].id,
 				    "Element id mismatch");
 			} else {
-				assert_c_eq(t->id, entries[(i+j-SPLIT_INDEX) %
+				expect_c_eq(t->id, entries[(i+j-SPLIT_INDEX) %
 				    (NENTRIES-SPLIT_INDEX) + SPLIT_INDEX].id,
 				    "Element id mismatch");
 			}
diff --git a/test/unit/rb.c b/test/unit/rb.c
index 65c04920..2509a6dd 100644
--- a/test/unit/rb.c
+++ b/test/unit/rb.c
@@ -26,8 +26,8 @@ static int
 node_cmp(const node_t *a, const node_t *b) {
 	int ret;
 
-	assert_u32_eq(a->magic, NODE_MAGIC, "Bad magic");
-	assert_u32_eq(b->magic, NODE_MAGIC, "Bad magic");
+	expect_u32_eq(a->magic, NODE_MAGIC, "Bad magic");
+	expect_u32_eq(b->magic, NODE_MAGIC, "Bad magic");
 
 	ret = (a->key > b->key) - (a->key < b->key);
 	if (ret == 0) {
@@ -50,21 +50,21 @@ TEST_BEGIN(test_rb_empty) {
 
 	tree_new(&tree);
 
-	assert_true(tree_empty(&tree), "Tree should be empty");
-	assert_ptr_null(tree_first(&tree), "Unexpected node");
-	assert_ptr_null(tree_last(&tree), "Unexpected node");
+	expect_true(tree_empty(&tree), "Tree should be empty");
+	expect_ptr_null(tree_first(&tree), "Unexpected node");
+	expect_ptr_null(tree_last(&tree), "Unexpected node");
 
 	key.key = 0;
 	key.magic = NODE_MAGIC;
-	assert_ptr_null(tree_search(&tree, &key), "Unexpected node");
+	expect_ptr_null(tree_search(&tree, &key), "Unexpected node");
 
 	key.key = 0;
 	key.magic = NODE_MAGIC;
-	assert_ptr_null(tree_nsearch(&tree, &key), "Unexpected node");
+	expect_ptr_null(tree_nsearch(&tree, &key), "Unexpected node");
 
 	key.key = 0;
 	key.magic = NODE_MAGIC;
-	assert_ptr_null(tree_psearch(&tree, &key), "Unexpected node");
+	expect_ptr_null(tree_psearch(&tree, &key), "Unexpected node");
 }
 TEST_END
 
@@ -88,17 +88,17 @@ tree_recurse(node_t *node, unsigned black_height, unsigned black_depth) {
 	/* Red nodes must be interleaved with black nodes. */
 	if (rbtn_red_get(node_t, link, node)) {
 		if (left_node != NULL) {
-			assert_false(rbtn_red_get(node_t, link, left_node),
+			expect_false(rbtn_red_get(node_t, link, left_node),
 				"Node should be black");
 		}
 		if (right_node != NULL) {
-			assert_false(rbtn_red_get(node_t, link, right_node),
+			expect_false(rbtn_red_get(node_t, link, right_node),
 			    "Node should be black");
 		}
 	}
 
 	/* Self. */
-	assert_u32_eq(node->magic, NODE_MAGIC, "Bad magic");
+	expect_u32_eq(node->magic, NODE_MAGIC, "Bad magic");
 
 	/* Left subtree. */
 	if (left_node != NULL) {
@@ -122,21 +122,21 @@ tree_iterate_cb(tree_t *tree, node_t *node, void *data) {
 	unsigned *i = (unsigned *)data;
 	node_t *search_node;
 
-	assert_u32_eq(node->magic, NODE_MAGIC, "Bad magic");
+	expect_u32_eq(node->magic, NODE_MAGIC, "Bad magic");
 
 	/* Test rb_search(). */
 	search_node = tree_search(tree, node);
-	assert_ptr_eq(search_node, node,
+	expect_ptr_eq(search_node, node,
 	    "tree_search() returned unexpected node");
 
 	/* Test rb_nsearch(). */
 	search_node = tree_nsearch(tree, node);
-	assert_ptr_eq(search_node, node,
+	expect_ptr_eq(search_node, node,
 	    "tree_nsearch() returned unexpected node");
 
 	/* Test rb_psearch(). */
 	search_node = tree_psearch(tree, node);
-	assert_ptr_eq(search_node, node,
+	expect_ptr_eq(search_node, node,
 	    "tree_psearch() returned unexpected node");
 
 	(*i)++;
@@ -174,14 +174,14 @@ node_remove(tree_t *tree, node_t *node, unsigned nnodes) {
 	/* Test rb_nsearch(). */
 	search_node = tree_nsearch(tree, node);
 	if (search_node != NULL) {
-		assert_u64_ge(search_node->key, node->key,
+		expect_u64_ge(search_node->key, node->key,
 		    "Key ordering error");
 	}
 
 	/* Test rb_psearch(). */
 	search_node = tree_psearch(tree, node);
 	if (search_node != NULL) {
-		assert_u64_le(search_node->key, node->key,
+		expect_u64_le(search_node->key, node->key,
 		    "Key ordering error");
 	}
 
@@ -189,10 +189,10 @@ node_remove(tree_t *tree, node_t *node, unsigned nnodes) {
 
 	rbtn_black_height(node_t, link, tree, black_height);
 	imbalances = tree_recurse(tree->rbt_root, black_height, 0);
-	assert_u_eq(imbalances, 0, "Tree is unbalanced");
-	assert_u_eq(tree_iterate(tree), nnodes-1,
+	expect_u_eq(imbalances, 0, "Tree is unbalanced");
+	expect_u_eq(tree_iterate(tree), nnodes-1,
 	    "Unexpected node iteration count");
-	assert_u_eq(tree_iterate_reverse(tree), nnodes-1,
+	expect_u_eq(tree_iterate_reverse(tree), nnodes-1,
 	    "Unexpected node iteration count");
 }
 
@@ -220,7 +220,7 @@ static void
 destroy_cb(node_t *node, void *data) {
 	unsigned *nnodes = (unsigned *)data;
 
-	assert_u_gt(*nnodes, 0, "Destruction removed too many nodes");
+	expect_u_gt(*nnodes, 0, "Destruction removed too many nodes");
 	(*nnodes)--;
 }
 
@@ -271,19 +271,19 @@ TEST_BEGIN(test_rb_random) {
 				    black_height);
 				imbalances = tree_recurse(tree.rbt_root,
 				    black_height, 0);
-				assert_u_eq(imbalances, 0,
+				expect_u_eq(imbalances, 0,
 				    "Tree is unbalanced");
 
-				assert_u_eq(tree_iterate(&tree), k+1,
+				expect_u_eq(tree_iterate(&tree), k+1,
 				    "Unexpected node iteration count");
-				assert_u_eq(tree_iterate_reverse(&tree), k+1,
+				expect_u_eq(tree_iterate_reverse(&tree), k+1,
 				    "Unexpected node iteration count");
 
-				assert_false(tree_empty(&tree),
+				expect_false(tree_empty(&tree),
 				    "Tree should not be empty");
-				assert_ptr_not_null(tree_first(&tree),
+				expect_ptr_not_null(tree_first(&tree),
 				    "Tree should not be empty");
-				assert_ptr_not_null(tree_last(&tree),
+				expect_ptr_not_null(tree_last(&tree),
 				    "Tree should not be empty");
 
 				tree_next(&tree, &nodes[k]);
@@ -312,7 +312,7 @@ TEST_BEGIN(test_rb_random) {
 					    remove_iterate_cb, (void *)&nnodes);
 					nnodes--;
 				} while (start != NULL);
-				assert_u_eq(nnodes, 0,
+				expect_u_eq(nnodes, 0,
 				    "Removal terminated early");
 				break;
 			} case 3: {
@@ -326,13 +326,13 @@ TEST_BEGIN(test_rb_random) {
 					    (void *)&nnodes);
 					nnodes--;
 				} while (start != NULL);
-				assert_u_eq(nnodes, 0,
+				expect_u_eq(nnodes, 0,
 				    "Removal terminated early");
 				break;
 			} case 4: {
 				unsigned nnodes = j;
 				tree_destroy(&tree, destroy_cb, &nnodes);
-				assert_u_eq(nnodes, 0,
+				expect_u_eq(nnodes, 0,
 				    "Destruction terminated early");
 				break;
 			} default:
diff --git a/test/unit/retained.c b/test/unit/retained.c
index e6b6f7b7..9ac83ef6 100644
--- a/test/unit/retained.c
+++ b/test/unit/retained.c
@@ -14,7 +14,7 @@ static unsigned
 do_arena_create(extent_hooks_t *h) {
 	unsigned arena_ind;
 	size_t sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
 	    (void *)(h != NULL ? &h : NULL), (h != NULL ? sizeof(h) : 0)), 0,
 	    "Unexpected mallctl() failure");
 	return arena_ind;
@@ -26,17 +26,17 @@ do_arena_destroy(unsigned arena_ind) {
 	size_t miblen;
 
 	miblen = sizeof(mib)/sizeof(size_t);
-	assert_d_eq(mallctlnametomib("arena.0.destroy", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("arena.0.destroy", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
-	assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctlbymib() failure");
 }
 
 static void
 do_refresh(void) {
 	uint64_t epoch = 1;
-	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
 	    sizeof(epoch)), 0, "Unexpected mallctl() failure");
 }
 
@@ -46,11 +46,11 @@ do_get_size_impl(const char *cmd, unsigned arena_ind) {
 	size_t miblen = sizeof(mib) / sizeof(size_t);
 	size_t z = sizeof(size_t);
 
-	assert_d_eq(mallctlnametomib(cmd, mib, &miblen),
+	expect_d_eq(mallctlnametomib(cmd, mib, &miblen),
 	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
 	mib[2] = arena_ind;
 	size_t size;
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&size, &z, NULL, 0),
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&size, &z, NULL, 0),
 	    0, "Unexpected mallctlbymib([\"%s\"], ...) failure", cmd);
 
 	return size;
@@ -76,7 +76,7 @@ thd_start(void *arg) {
 		    next_epoch) {
 			spin_adaptive(&spinner);
 		}
-		assert_u_eq(cur_epoch, next_epoch, "Unexpected epoch");
+		expect_u_eq(cur_epoch, next_epoch, "Unexpected epoch");
 
 		/*
 		 * Allocate.  The main thread will reset the arena, so there's
@@ -86,7 +86,7 @@ thd_start(void *arg) {
 			void *p = mallocx(sz, MALLOCX_ARENA(arena_ind) |
 			    MALLOCX_TCACHE_NONE
 			    );
-			assert_ptr_not_null(p,
+			expect_ptr_not_null(p,
 			    "Unexpected mallocx() failure\n");
 		}
 
@@ -134,9 +134,9 @@ TEST_BEGIN(test_retained) {
 
 		size_t allocated = esz * nthreads * PER_THD_NALLOCS;
 		size_t active = do_get_active(arena_ind);
-		assert_zu_le(allocated, active, "Unexpected active memory");
+		expect_zu_le(allocated, active, "Unexpected active memory");
 		size_t mapped = do_get_mapped(arena_ind);
-		assert_zu_le(active, mapped, "Unexpected mapped memory");
+		expect_zu_le(active, mapped, "Unexpected mapped memory");
 
 		arena_t *arena = arena_get(tsdn_fetch(), arena_ind, false);
 		size_t usable = 0;
@@ -150,7 +150,7 @@ TEST_BEGIN(test_retained) {
 			 * Only consider size classes that wouldn't be skipped.
 			 */
 			if (psz_usable > 0) {
-				assert_zu_lt(usable, allocated,
+				expect_zu_lt(usable, allocated,
 				    "Excessive retained memory "
 				    "(%#zx[+%#zx] > %#zx)", usable, psz_usable,
 				    allocated);
@@ -165,7 +165,7 @@ TEST_BEGIN(test_retained) {
 		 * (rather than retaining) during reset.
 		 */
 		do_arena_destroy(arena_ind);
-		assert_u_eq(do_arena_create(NULL), arena_ind,
+		expect_u_eq(do_arena_create(NULL), arena_ind,
 		    "Unexpected arena index");
 	}
 
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index b5ece82c..3545cfc0 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -13,14 +13,14 @@ TEST_BEGIN(test_rtree_read_empty) {
 	tsdn = tsdn_fetch();
 
 	base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks);
-	assert_ptr_not_null(base, "Unexpected base_new failure");
+	expect_ptr_not_null(base, "Unexpected base_new failure");
 
 	rtree_t *rtree = &test_rtree;
 	rtree_ctx_t rtree_ctx;
 	rtree_ctx_data_init(&rtree_ctx);
-	assert_false(rtree_new(rtree, base, false),
+	expect_false(rtree_new(rtree, base, false),
 	    "Unexpected rtree_new() failure");
-	assert_ptr_null(rtree_edata_read(tsdn, rtree, &rtree_ctx, PAGE,
+	expect_ptr_null(rtree_edata_read(tsdn, rtree, &rtree_ctx, PAGE,
 	    false), "rtree_edata_read() should return NULL for empty tree");
 
 	base_delete(tsdn, base);
@@ -42,27 +42,27 @@ TEST_BEGIN(test_rtree_extrema) {
 	tsdn_t *tsdn = tsdn_fetch();
 
 	base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks);
-	assert_ptr_not_null(base, "Unexpected base_new failure");
+	expect_ptr_not_null(base, "Unexpected base_new failure");
 
 	rtree_t *rtree = &test_rtree;
 	rtree_ctx_t rtree_ctx;
 	rtree_ctx_data_init(&rtree_ctx);
-	assert_false(rtree_new(rtree, base, false),
+	expect_false(rtree_new(rtree, base, false),
 	    "Unexpected rtree_new() failure");
 
-	assert_false(rtree_write(tsdn, rtree, &rtree_ctx, PAGE, &edata_a,
+	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, PAGE, &edata_a,
 	    edata_szind_get(&edata_a), edata_slab_get(&edata_a)),
 	    "Unexpected rtree_write() failure");
 	rtree_szind_slab_update(tsdn, rtree, &rtree_ctx, PAGE,
 	    edata_szind_get(&edata_a), edata_slab_get(&edata_a));
-	assert_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx, PAGE, true),
+	expect_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx, PAGE, true),
 	    &edata_a,
 	    "rtree_edata_read() should return previously set value");
 
-	assert_false(rtree_write(tsdn, rtree, &rtree_ctx, ~((uintptr_t)0),
+	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, ~((uintptr_t)0),
 	    &edata_b, edata_szind_get_maybe_invalid(&edata_b),
 	    edata_slab_get(&edata_b)), "Unexpected rtree_write() failure");
-	assert_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx,
+	expect_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx,
 	    ~((uintptr_t)0), true), &edata_b,
 	    "rtree_edata_read() should return previously set value");
 
@@ -73,7 +73,7 @@ TEST_END
 TEST_BEGIN(test_rtree_bits) {
 	tsdn_t *tsdn = tsdn_fetch();
 	base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks);
-	assert_ptr_not_null(base, "Unexpected base_new failure");
+	expect_ptr_not_null(base, "Unexpected base_new failure");
 
 	uintptr_t keys[] = {PAGE, PAGE + 1,
 	    PAGE + (((uintptr_t)1) << LG_PAGE) - 1};
@@ -85,22 +85,22 @@ TEST_BEGIN(test_rtree_bits) {
 	rtree_t *rtree = &test_rtree;
 	rtree_ctx_t rtree_ctx;
 	rtree_ctx_data_init(&rtree_ctx);
-	assert_false(rtree_new(rtree, base, false),
+	expect_false(rtree_new(rtree, base, false),
 	    "Unexpected rtree_new() failure");
 
 	for (unsigned i = 0; i < sizeof(keys)/sizeof(uintptr_t); i++) {
-		assert_false(rtree_write(tsdn, rtree, &rtree_ctx, keys[i],
+		expect_false(rtree_write(tsdn, rtree, &rtree_ctx, keys[i],
 		    &edata, SC_NSIZES, false),
 		    "Unexpected rtree_write() failure");
 		for (unsigned j = 0; j < sizeof(keys)/sizeof(uintptr_t); j++) {
-			assert_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx,
+			expect_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx,
 			    keys[j], true), &edata,
 			    "rtree_edata_read() should return previously set "
 			    "value and ignore insignificant key bits; i=%u, "
 			    "j=%u, set key=%#"FMTxPTR", get key=%#"FMTxPTR, i,
 			    j, keys[i], keys[j]);
 		}
-		assert_ptr_null(rtree_edata_read(tsdn, rtree, &rtree_ctx,
+		expect_ptr_null(rtree_edata_read(tsdn, rtree, &rtree_ctx,
 		    (((uintptr_t)2) << LG_PAGE), false),
 		    "Only leftmost rtree leaf should be set; i=%u", i);
 		rtree_clear(tsdn, rtree, &rtree_ctx, keys[i]);
@@ -117,7 +117,7 @@ TEST_BEGIN(test_rtree_random) {
 	tsdn_t *tsdn = tsdn_fetch();
 
 	base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks);
-	assert_ptr_not_null(base, "Unexpected base_new failure");
+	expect_ptr_not_null(base, "Unexpected base_new failure");
 
 	uintptr_t keys[NSET];
 	rtree_t *rtree = &test_rtree;
@@ -128,23 +128,23 @@ TEST_BEGIN(test_rtree_random) {
 	edata_init(&edata, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, true, EXTENT_NOT_HEAD);
 
-	assert_false(rtree_new(rtree, base, false),
+	expect_false(rtree_new(rtree, base, false),
 	    "Unexpected rtree_new() failure");
 
 	for (unsigned i = 0; i < NSET; i++) {
 		keys[i] = (uintptr_t)gen_rand64(sfmt);
 		rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree,
 		    &rtree_ctx, keys[i], false, true);
-		assert_ptr_not_null(elm,
+		expect_ptr_not_null(elm,
 		    "Unexpected rtree_leaf_elm_lookup() failure");
 		rtree_leaf_elm_write(tsdn, rtree, elm, &edata, SC_NSIZES,
 		    false);
-		assert_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx,
+		expect_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx,
 		    keys[i], true), &edata,
 		    "rtree_edata_read() should return previously set value");
 	}
 	for (unsigned i = 0; i < NSET; i++) {
-		assert_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx,
+		expect_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx,
 		    keys[i], true), &edata,
 		    "rtree_edata_read() should return previously set value, "
 		    "i=%u", i);
@@ -152,12 +152,12 @@ TEST_BEGIN(test_rtree_random) {
 
 	for (unsigned i = 0; i < NSET; i++) {
 		rtree_clear(tsdn, rtree, &rtree_ctx, keys[i]);
-		assert_ptr_null(rtree_edata_read(tsdn, rtree, &rtree_ctx,
+		expect_ptr_null(rtree_edata_read(tsdn, rtree, &rtree_ctx,
 		    keys[i], true),
 		   "rtree_edata_read() should return previously set value");
 	}
 	for (unsigned i = 0; i < NSET; i++) {
-		assert_ptr_null(rtree_edata_read(tsdn, rtree, &rtree_ctx,
+		expect_ptr_null(rtree_edata_read(tsdn, rtree, &rtree_ctx,
 		    keys[i], true),
 		    "rtree_edata_read() should return previously set value");
 	}
diff --git a/test/unit/safety_check.c b/test/unit/safety_check.c
index bf4bd86d..516a0969 100644
--- a/test/unit/safety_check.c
+++ b/test/unit/safety_check.c
@@ -24,7 +24,7 @@ TEST_BEGIN(test_malloc_free_overflow) {
 	free(ptr);
 	safety_check_set_abort(NULL);
 
-	assert_b_eq(fake_abort_called, true, "Redzone check didn't fire.");
+	expect_b_eq(fake_abort_called, true, "Redzone check didn't fire.");
 	fake_abort_called = false;
 }
 TEST_END
@@ -40,7 +40,7 @@ TEST_BEGIN(test_mallocx_dallocx_overflow) {
 	dallocx(ptr, 0);
 	safety_check_set_abort(NULL);
 
-	assert_b_eq(fake_abort_called, true, "Redzone check didn't fire.");
+	expect_b_eq(fake_abort_called, true, "Redzone check didn't fire.");
 	fake_abort_called = false;
 }
 TEST_END
@@ -56,7 +56,7 @@ TEST_BEGIN(test_malloc_sdallocx_overflow) {
 	sdallocx(ptr, 128, 0);
 	safety_check_set_abort(NULL);
 
-	assert_b_eq(fake_abort_called, true, "Redzone check didn't fire.");
+	expect_b_eq(fake_abort_called, true, "Redzone check didn't fire.");
 	fake_abort_called = false;
 }
 TEST_END
@@ -73,7 +73,7 @@ TEST_BEGIN(test_realloc_overflow) {
 	safety_check_set_abort(NULL);
 	free(ptr);
 
-	assert_b_eq(fake_abort_called, true, "Redzone check didn't fire.");
+	expect_b_eq(fake_abort_called, true, "Redzone check didn't fire.");
 	fake_abort_called = false;
 }
 TEST_END
@@ -90,7 +90,7 @@ TEST_BEGIN(test_rallocx_overflow) {
 	safety_check_set_abort(NULL);
 	free(ptr);
 
-	assert_b_eq(fake_abort_called, true, "Redzone check didn't fire.");
+	expect_b_eq(fake_abort_called, true, "Redzone check didn't fire.");
 	fake_abort_called = false;
 }
 TEST_END
@@ -104,9 +104,9 @@ TEST_BEGIN(test_xallocx_overflow) {
 	char* ptr = malloc(128);
 	ptr[128] = 0;
 	size_t result = xallocx(ptr, 129, 0, 0);
-	assert_zu_eq(result, 128, "");
+	expect_zu_eq(result, 128, "");
 	free(ptr);
-	assert_b_eq(fake_abort_called, true, "Redzone check didn't fire.");
+	expect_b_eq(fake_abort_called, true, "Redzone check didn't fire.");
 	fake_abort_called = false;
 	safety_check_set_abort(NULL);
 }
diff --git a/test/unit/sc.c b/test/unit/sc.c
index bf51d8e5..d207481c 100644
--- a/test/unit/sc.c
+++ b/test/unit/sc.c
@@ -9,7 +9,7 @@ TEST_BEGIN(test_update_slab_size) {
 	    + (ZU(tiny->ndelta) << tiny->lg_delta);
 	size_t pgs_too_big = (tiny_size * BITMAP_MAXBITS + PAGE - 1) / PAGE + 1;
 	sc_data_update_slab_size(&data, tiny_size, tiny_size, (int)pgs_too_big);
-	assert_zu_lt((size_t)tiny->pgs, pgs_too_big, "Allowed excessive pages");
+	expect_zu_lt((size_t)tiny->pgs, pgs_too_big, "Allowed excessive pages");
 
 	sc_data_update_slab_size(&data, 1, 10 * PAGE, 1);
 	for (int i = 0; i < data.nbins; i++) {
@@ -17,9 +17,9 @@ TEST_BEGIN(test_update_slab_size) {
 		size_t reg_size = (ZU(1) << sc->lg_base)
 		    + (ZU(sc->ndelta) << sc->lg_delta);
 		if (reg_size <= PAGE) {
-			assert_d_eq(sc->pgs, 1, "Ignored valid page size hint");
+			expect_d_eq(sc->pgs, 1, "Ignored valid page size hint");
 		} else {
-			assert_d_gt(sc->pgs, 1,
+			expect_d_gt(sc->pgs, 1,
 			    "Allowed invalid page size hint");
 		}
 	}
diff --git a/test/unit/seq.c b/test/unit/seq.c
index 19613b0b..06ed6834 100644
--- a/test/unit/seq.c
+++ b/test/unit/seq.c
@@ -15,10 +15,10 @@ set_data(data_t *data, int num) {
 }
 
 static void
-assert_data(data_t *data) {
+expect_data(data_t *data) {
 	int num = data->arr[0];
 	for (int i = 0; i < 10; i++) {
-		assert_d_eq(num, data->arr[i], "Data consistency error");
+		expect_d_eq(num, data->arr[i], "Data consistency error");
 	}
 }
 
@@ -37,8 +37,8 @@ seq_reader_thd(void *arg) {
 	while (iter < 1000 * 1000 - 1) {
 		bool success = seq_try_load_data(&local_data, &thd_data->data);
 		if (success) {
-			assert_data(&local_data);
-			assert_d_le(iter, local_data.arr[0],
+			expect_data(&local_data);
+			expect_d_le(iter, local_data.arr[0],
 			    "Seq read went back in time.");
 			iter = local_data.arr[0];
 		}
@@ -82,8 +82,8 @@ TEST_BEGIN(test_seq_simple) {
 		seq_store_data(&seq, &data);
 		set_data(&data, 0);
 		bool success = seq_try_load_data(&data, &seq);
-		assert_b_eq(success, true, "Failed non-racing read");
-		assert_data(&data);
+		expect_b_eq(success, true, "Failed non-racing read");
+		expect_data(&data);
 	}
 }
 TEST_END
diff --git a/test/unit/size_classes.c b/test/unit/size_classes.c
index 69473363..c70eb592 100644
--- a/test/unit/size_classes.c
+++ b/test/unit/size_classes.c
@@ -7,16 +7,16 @@ get_max_size_class(void) {
 	size_t sz, miblen, max_size_class;
 
 	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.nlextents", (void *)&nlextents, &sz, NULL,
+	expect_d_eq(mallctl("arenas.nlextents", (void *)&nlextents, &sz, NULL,
 	    0), 0, "Unexpected mallctl() error");
 
 	miblen = sizeof(mib) / sizeof(size_t);
-	assert_d_eq(mallctlnametomib("arenas.lextent.0.size", mib, &miblen), 0,
+	expect_d_eq(mallctlnametomib("arenas.lextent.0.size", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() error");
 	mib[2] = nlextents - 1;
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctlbymib(mib, miblen, (void *)&max_size_class, &sz,
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&max_size_class, &sz,
 	    NULL, 0), 0, "Unexpected mallctlbymib() error");
 
 	return max_size_class;
@@ -32,50 +32,50 @@ TEST_BEGIN(test_size_classes) {
 	for (index = 0, size_class = sz_index2size(index); index < max_index ||
 	    size_class < max_size_class; index++, size_class =
 	    sz_index2size(index)) {
-		assert_true(index < max_index,
+		expect_true(index < max_index,
 		    "Loop conditionals should be equivalent; index=%u, "
 		    "size_class=%zu (%#zx)", index, size_class, size_class);
-		assert_true(size_class < max_size_class,
+		expect_true(size_class < max_size_class,
 		    "Loop conditionals should be equivalent; index=%u, "
 		    "size_class=%zu (%#zx)", index, size_class, size_class);
 
-		assert_u_eq(index, sz_size2index(size_class),
+		expect_u_eq(index, sz_size2index(size_class),
 		    "sz_size2index() does not reverse sz_index2size(): index=%u"
 		    " --> size_class=%zu --> index=%u --> size_class=%zu",
 		    index, size_class, sz_size2index(size_class),
 		    sz_index2size(sz_size2index(size_class)));
-		assert_zu_eq(size_class,
+		expect_zu_eq(size_class,
 		    sz_index2size(sz_size2index(size_class)),
 		    "sz_index2size() does not reverse sz_size2index(): index=%u"
 		    " --> size_class=%zu --> index=%u --> size_class=%zu",
 		    index, size_class, sz_size2index(size_class),
 		    sz_index2size(sz_size2index(size_class)));
 
-		assert_u_eq(index+1, sz_size2index(size_class+1),
+		expect_u_eq(index+1, sz_size2index(size_class+1),
 		    "Next size_class does not round up properly");
 
-		assert_zu_eq(size_class, (index > 0) ?
+		expect_zu_eq(size_class, (index > 0) ?
 		    sz_s2u(sz_index2size(index-1)+1) : sz_s2u(1),
 		    "sz_s2u() does not round up to size class");
-		assert_zu_eq(size_class, sz_s2u(size_class-1),
+		expect_zu_eq(size_class, sz_s2u(size_class-1),
 		    "sz_s2u() does not round up to size class");
-		assert_zu_eq(size_class, sz_s2u(size_class),
+		expect_zu_eq(size_class, sz_s2u(size_class),
 		    "sz_s2u() does not compute same size class");
-		assert_zu_eq(sz_s2u(size_class+1), sz_index2size(index+1),
+		expect_zu_eq(sz_s2u(size_class+1), sz_index2size(index+1),
 		    "sz_s2u() does not round up to next size class");
 	}
 
-	assert_u_eq(index, sz_size2index(sz_index2size(index)),
+	expect_u_eq(index, sz_size2index(sz_index2size(index)),
 	    "sz_size2index() does not reverse sz_index2size()");
-	assert_zu_eq(max_size_class, sz_index2size(
+	expect_zu_eq(max_size_class, sz_index2size(
 	    sz_size2index(max_size_class)),
 	    "sz_index2size() does not reverse sz_size2index()");
 
-	assert_zu_eq(size_class, sz_s2u(sz_index2size(index-1)+1),
+	expect_zu_eq(size_class, sz_s2u(sz_index2size(index-1)+1),
 	    "sz_s2u() does not round up to size class");
-	assert_zu_eq(size_class, sz_s2u(size_class-1),
+	expect_zu_eq(size_class, sz_s2u(size_class-1),
 	    "sz_s2u() does not round up to size class");
-	assert_zu_eq(size_class, sz_s2u(size_class),
+	expect_zu_eq(size_class, sz_s2u(size_class),
 	    "sz_s2u() does not compute same size class");
 }
 TEST_END
@@ -90,53 +90,53 @@ TEST_BEGIN(test_psize_classes) {
 	for (pind = 0, size_class = sz_pind2sz(pind);
 	    pind < max_pind || size_class < max_psz;
 	    pind++, size_class = sz_pind2sz(pind)) {
-		assert_true(pind < max_pind,
+		expect_true(pind < max_pind,
 		    "Loop conditionals should be equivalent; pind=%u, "
 		    "size_class=%zu (%#zx)", pind, size_class, size_class);
-		assert_true(size_class < max_psz,
+		expect_true(size_class < max_psz,
 		    "Loop conditionals should be equivalent; pind=%u, "
 		    "size_class=%zu (%#zx)", pind, size_class, size_class);
 
-		assert_u_eq(pind, sz_psz2ind(size_class),
+		expect_u_eq(pind, sz_psz2ind(size_class),
 		    "sz_psz2ind() does not reverse sz_pind2sz(): pind=%u -->"
 		    " size_class=%zu --> pind=%u --> size_class=%zu", pind,
 		    size_class, sz_psz2ind(size_class),
 		    sz_pind2sz(sz_psz2ind(size_class)));
-		assert_zu_eq(size_class, sz_pind2sz(sz_psz2ind(size_class)),
+		expect_zu_eq(size_class, sz_pind2sz(sz_psz2ind(size_class)),
 		    "sz_pind2sz() does not reverse sz_psz2ind(): pind=%u -->"
 		    " size_class=%zu --> pind=%u --> size_class=%zu", pind,
 		    size_class, sz_psz2ind(size_class),
 		    sz_pind2sz(sz_psz2ind(size_class)));
 
 		if (size_class == SC_LARGE_MAXCLASS) {
-			assert_u_eq(SC_NPSIZES, sz_psz2ind(size_class + 1),
+			expect_u_eq(SC_NPSIZES, sz_psz2ind(size_class + 1),
 			    "Next size_class does not round up properly");
 		} else {
-			assert_u_eq(pind + 1, sz_psz2ind(size_class + 1),
+			expect_u_eq(pind + 1, sz_psz2ind(size_class + 1),
 			    "Next size_class does not round up properly");
 		}
 
-		assert_zu_eq(size_class, (pind > 0) ?
+		expect_zu_eq(size_class, (pind > 0) ?
 		    sz_psz2u(sz_pind2sz(pind-1)+1) : sz_psz2u(1),
 		    "sz_psz2u() does not round up to size class");
-		assert_zu_eq(size_class, sz_psz2u(size_class-1),
+		expect_zu_eq(size_class, sz_psz2u(size_class-1),
 		    "sz_psz2u() does not round up to size class");
-		assert_zu_eq(size_class, sz_psz2u(size_class),
+		expect_zu_eq(size_class, sz_psz2u(size_class),
 		    "sz_psz2u() does not compute same size class");
-		assert_zu_eq(sz_psz2u(size_class+1), sz_pind2sz(pind+1),
+		expect_zu_eq(sz_psz2u(size_class+1), sz_pind2sz(pind+1),
 		    "sz_psz2u() does not round up to next size class");
 	}
 
-	assert_u_eq(pind, sz_psz2ind(sz_pind2sz(pind)),
+	expect_u_eq(pind, sz_psz2ind(sz_pind2sz(pind)),
 	    "sz_psz2ind() does not reverse sz_pind2sz()");
-	assert_zu_eq(max_psz, sz_pind2sz(sz_psz2ind(max_psz)),
+	expect_zu_eq(max_psz, sz_pind2sz(sz_psz2ind(max_psz)),
 	    "sz_pind2sz() does not reverse sz_psz2ind()");
 
-	assert_zu_eq(size_class, sz_psz2u(sz_pind2sz(pind-1)+1),
+	expect_zu_eq(size_class, sz_psz2u(sz_pind2sz(pind-1)+1),
 	    "sz_psz2u() does not round up to size class");
-	assert_zu_eq(size_class, sz_psz2u(size_class-1),
+	expect_zu_eq(size_class, sz_psz2u(size_class-1),
 	    "sz_psz2u() does not round up to size class");
-	assert_zu_eq(size_class, sz_psz2u(size_class),
+	expect_zu_eq(size_class, sz_psz2u(size_class),
 	    "sz_psz2u() does not compute same size class");
 }
 TEST_END
@@ -147,34 +147,34 @@ TEST_BEGIN(test_overflow) {
 	max_size_class = get_max_size_class();
 	max_psz = max_size_class + PAGE;
 
-	assert_u_eq(sz_size2index(max_size_class+1), SC_NSIZES,
+	expect_u_eq(sz_size2index(max_size_class+1), SC_NSIZES,
 	    "sz_size2index() should return NSIZES on overflow");
-	assert_u_eq(sz_size2index(ZU(PTRDIFF_MAX)+1), SC_NSIZES,
+	expect_u_eq(sz_size2index(ZU(PTRDIFF_MAX)+1), SC_NSIZES,
 	    "sz_size2index() should return NSIZES on overflow");
-	assert_u_eq(sz_size2index(SIZE_T_MAX), SC_NSIZES,
+	expect_u_eq(sz_size2index(SIZE_T_MAX), SC_NSIZES,
 	    "sz_size2index() should return NSIZES on overflow");
 
-	assert_zu_eq(sz_s2u(max_size_class+1), 0,
+	expect_zu_eq(sz_s2u(max_size_class+1), 0,
 	    "sz_s2u() should return 0 for unsupported size");
-	assert_zu_eq(sz_s2u(ZU(PTRDIFF_MAX)+1), 0,
+	expect_zu_eq(sz_s2u(ZU(PTRDIFF_MAX)+1), 0,
 	    "sz_s2u() should return 0 for unsupported size");
-	assert_zu_eq(sz_s2u(SIZE_T_MAX), 0,
+	expect_zu_eq(sz_s2u(SIZE_T_MAX), 0,
 	    "sz_s2u() should return 0 on overflow");
 
-	assert_u_eq(sz_psz2ind(max_size_class+1), SC_NPSIZES,
+	expect_u_eq(sz_psz2ind(max_size_class+1), SC_NPSIZES,
 	    "sz_psz2ind() should return NPSIZES on overflow");
-	assert_u_eq(sz_psz2ind(ZU(PTRDIFF_MAX)+1), SC_NPSIZES,
+	expect_u_eq(sz_psz2ind(ZU(PTRDIFF_MAX)+1), SC_NPSIZES,
 	    "sz_psz2ind() should return NPSIZES on overflow");
-	assert_u_eq(sz_psz2ind(SIZE_T_MAX), SC_NPSIZES,
+	expect_u_eq(sz_psz2ind(SIZE_T_MAX), SC_NPSIZES,
 	    "sz_psz2ind() should return NPSIZES on overflow");
 
-	assert_zu_eq(sz_psz2u(max_size_class+1), max_psz,
+	expect_zu_eq(sz_psz2u(max_size_class+1), max_psz,
 	    "sz_psz2u() should return (LARGE_MAXCLASS + PAGE) for unsupported"
 	    " size");
-	assert_zu_eq(sz_psz2u(ZU(PTRDIFF_MAX)+1), max_psz,
+	expect_zu_eq(sz_psz2u(ZU(PTRDIFF_MAX)+1), max_psz,
 	    "sz_psz2u() should return (LARGE_MAXCLASS + PAGE) for unsupported "
 	    "size");
-	assert_zu_eq(sz_psz2u(SIZE_T_MAX), max_psz,
+	expect_zu_eq(sz_psz2u(SIZE_T_MAX), max_psz,
 	    "sz_psz2u() should return (LARGE_MAXCLASS + PAGE) on overflow");
 }
 TEST_END
diff --git a/test/unit/slab.c b/test/unit/slab.c
index 5d2b35fa..304a93d4 100644
--- a/test/unit/slab.c
+++ b/test/unit/slab.c
@@ -14,12 +14,12 @@ TEST_BEGIN(test_arena_slab_regind) {
 		    bin_info->slab_size, true,
 		    binind, 0, extent_state_active, false, true, true,
 		    EXTENT_NOT_HEAD);
-		assert_ptr_not_null(edata_addr_get(&slab),
+		expect_ptr_not_null(edata_addr_get(&slab),
 		    "Unexpected malloc() failure");
 		for (regind = 0; regind < bin_info->nregs; regind++) {
 			void *reg = (void *)((uintptr_t)edata_addr_get(&slab) +
 			    (bin_info->reg_size * regind));
-			assert_zu_eq(arena_slab_regind(&slab, binind, reg),
+			expect_zu_eq(arena_slab_regind(&slab, binind, reg),
 			    regind,
 			    "Incorrect region index computed for size %zu",
 			    bin_info->reg_size);
diff --git a/test/unit/smoothstep.c b/test/unit/smoothstep.c
index 7c5dbb7e..588c9f44 100644
--- a/test/unit/smoothstep.c
+++ b/test/unit/smoothstep.c
@@ -26,9 +26,9 @@ TEST_BEGIN(test_smoothstep_integral) {
 	max = (KQU(1) << (SMOOTHSTEP_BFP-1)) * (SMOOTHSTEP_NSTEPS+1);
 	min = max - SMOOTHSTEP_NSTEPS;
 
-	assert_u64_ge(sum, min,
+	expect_u64_ge(sum, min,
 	    "Integral too small, even accounting for truncation");
-	assert_u64_le(sum, max, "Integral exceeds 1/2");
+	expect_u64_le(sum, max, "Integral exceeds 1/2");
 	if (false) {
 		malloc_printf("%"FMTu64" ulps under 1/2 (limit %d)\n",
 		    max - sum, SMOOTHSTEP_NSTEPS);
@@ -49,10 +49,10 @@ TEST_BEGIN(test_smoothstep_monotonic) {
 	prev_h = 0;
 	for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
 		uint64_t h = smoothstep_tab[i];
-		assert_u64_ge(h, prev_h, "Piecewise non-monotonic, i=%u", i);
+		expect_u64_ge(h, prev_h, "Piecewise non-monotonic, i=%u", i);
 		prev_h = h;
 	}
-	assert_u64_eq(smoothstep_tab[SMOOTHSTEP_NSTEPS-1],
+	expect_u64_eq(smoothstep_tab[SMOOTHSTEP_NSTEPS-1],
 	    (KQU(1) << SMOOTHSTEP_BFP), "Last step must equal 1");
 }
 TEST_END
@@ -72,7 +72,7 @@ TEST_BEGIN(test_smoothstep_slope) {
 	for (i = 0; i < SMOOTHSTEP_NSTEPS / 2 + SMOOTHSTEP_NSTEPS % 2; i++) {
 		uint64_t h = smoothstep_tab[i];
 		uint64_t delta = h - prev_h;
-		assert_u64_ge(delta, prev_delta,
+		expect_u64_ge(delta, prev_delta,
 		    "Slope must monotonically increase in 0.0 <= x <= 0.5, "
 		    "i=%u", i);
 		prev_h = h;
@@ -84,7 +84,7 @@ TEST_BEGIN(test_smoothstep_slope) {
 	for (i = SMOOTHSTEP_NSTEPS-1; i >= SMOOTHSTEP_NSTEPS / 2; i--) {
 		uint64_t h = smoothstep_tab[i];
 		uint64_t delta = prev_h - h;
-		assert_u64_ge(delta, prev_delta,
+		expect_u64_ge(delta, prev_delta,
 		    "Slope must monotonically decrease in 0.5 <= x <= 1.0, "
 		    "i=%u", i);
 		prev_h = h;
diff --git a/test/unit/stats.c b/test/unit/stats.c
index 646768e8..f4ac154d 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -5,21 +5,21 @@ TEST_BEGIN(test_stats_summary) {
 	int expected = config_stats ? 0 : ENOENT;
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.allocated", (void *)&allocated, &sz, NULL,
+	expect_d_eq(mallctl("stats.allocated", (void *)&allocated, &sz, NULL,
 	    0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.active", (void *)&active, &sz, NULL, 0),
+	expect_d_eq(mallctl("stats.active", (void *)&active, &sz, NULL, 0),
 	    expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.resident", (void *)&resident, &sz, NULL, 0),
+	expect_d_eq(mallctl("stats.resident", (void *)&resident, &sz, NULL, 0),
 	    expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.mapped", (void *)&mapped, &sz, NULL, 0),
+	expect_d_eq(mallctl("stats.mapped", (void *)&mapped, &sz, NULL, 0),
 	    expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
-		assert_zu_le(allocated, active,
+		expect_zu_le(allocated, active,
 		    "allocated should be no larger than active");
-		assert_zu_lt(active, resident,
+		expect_zu_lt(active, resident,
 		    "active should be less than resident");
-		assert_zu_lt(active, mapped,
+		expect_zu_lt(active, mapped,
 		    "active should be less than mapped");
 	}
 }
@@ -34,30 +34,30 @@ TEST_BEGIN(test_stats_large) {
 	int expected = config_stats ? 0 : ENOENT;
 
 	p = mallocx(SC_SMALL_MAXCLASS + 1, MALLOCX_ARENA(0));
-	assert_ptr_not_null(p, "Unexpected mallocx() failure");
+	expect_ptr_not_null(p, "Unexpected mallocx() failure");
 
-	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
 	    0, "Unexpected mallctl() failure");
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.arenas.0.large.allocated",
+	expect_d_eq(mallctl("stats.arenas.0.large.allocated",
 	    (void *)&allocated, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.large.nmalloc", (void *)&nmalloc,
+	expect_d_eq(mallctl("stats.arenas.0.large.nmalloc", (void *)&nmalloc,
 	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.large.ndalloc", (void *)&ndalloc,
+	expect_d_eq(mallctl("stats.arenas.0.large.ndalloc", (void *)&ndalloc,
 	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.large.nrequests",
+	expect_d_eq(mallctl("stats.arenas.0.large.nrequests",
 	    (void *)&nrequests, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
 
 	if (config_stats) {
-		assert_zu_gt(allocated, 0,
+		expect_zu_gt(allocated, 0,
 		    "allocated should be greater than zero");
-		assert_u64_ge(nmalloc, ndalloc,
+		expect_u64_ge(nmalloc, ndalloc,
 		    "nmalloc should be at least as large as ndalloc");
-		assert_u64_le(nmalloc, nrequests,
+		expect_u64_le(nmalloc, nrequests,
 		    "nmalloc should no larger than nrequests");
 	}
 
@@ -75,54 +75,54 @@ TEST_BEGIN(test_stats_arenas_summary) {
 	uint64_t muzzy_npurge, muzzy_nmadvise, muzzy_purged;
 
 	little = mallocx(SC_SMALL_MAXCLASS, MALLOCX_ARENA(0));
-	assert_ptr_not_null(little, "Unexpected mallocx() failure");
+	expect_ptr_not_null(little, "Unexpected mallocx() failure");
 	large = mallocx((1U << SC_LG_LARGE_MINCLASS),
 	    MALLOCX_ARENA(0));
-	assert_ptr_not_null(large, "Unexpected mallocx() failure");
+	expect_ptr_not_null(large, "Unexpected mallocx() failure");
 
 	dallocx(little, 0);
 	dallocx(large, 0);
 
-	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
+	expect_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
 	    opt_tcache ? 0 : EFAULT, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
+	expect_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
-	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
 	    0, "Unexpected mallctl() failure");
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.arenas.0.mapped", (void *)&mapped, &sz, NULL,
+	expect_d_eq(mallctl("stats.arenas.0.mapped", (void *)&mapped, &sz, NULL,
 	    0), expected, "Unexepected mallctl() result");
 
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.dirty_npurge",
+	expect_d_eq(mallctl("stats.arenas.0.dirty_npurge",
 	    (void *)&dirty_npurge, &sz, NULL, 0), expected,
 	    "Unexepected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.dirty_nmadvise",
+	expect_d_eq(mallctl("stats.arenas.0.dirty_nmadvise",
 	    (void *)&dirty_nmadvise, &sz, NULL, 0), expected,
 	    "Unexepected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.dirty_purged",
+	expect_d_eq(mallctl("stats.arenas.0.dirty_purged",
 	    (void *)&dirty_purged, &sz, NULL, 0), expected,
 	    "Unexepected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.muzzy_npurge",
+	expect_d_eq(mallctl("stats.arenas.0.muzzy_npurge",
 	    (void *)&muzzy_npurge, &sz, NULL, 0), expected,
 	    "Unexepected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.muzzy_nmadvise",
+	expect_d_eq(mallctl("stats.arenas.0.muzzy_nmadvise",
 	    (void *)&muzzy_nmadvise, &sz, NULL, 0), expected,
 	    "Unexepected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.muzzy_purged",
+	expect_d_eq(mallctl("stats.arenas.0.muzzy_purged",
 	    (void *)&muzzy_purged, &sz, NULL, 0), expected,
 	    "Unexepected mallctl() result");
 
 	if (config_stats) {
 		if (!background_thread_enabled()) {
-			assert_u64_gt(dirty_npurge + muzzy_npurge, 0,
+			expect_u64_gt(dirty_npurge + muzzy_npurge, 0,
 			    "At least one purge should have occurred");
 		}
-		assert_u64_le(dirty_nmadvise, dirty_purged,
+		expect_u64_le(dirty_nmadvise, dirty_purged,
 		    "dirty_nmadvise should be no greater than dirty_purged");
-		assert_u64_le(muzzy_nmadvise, muzzy_purged,
+		expect_u64_le(muzzy_nmadvise, muzzy_purged,
 		    "muzzy_nmadvise should be no greater than muzzy_purged");
 	}
 }
@@ -150,35 +150,35 @@ TEST_BEGIN(test_stats_arenas_small) {
 	no_lazy_lock(); /* Lazy locking would dodge tcache testing. */
 
 	p = mallocx(SC_SMALL_MAXCLASS, MALLOCX_ARENA(0));
-	assert_ptr_not_null(p, "Unexpected mallocx() failure");
+	expect_ptr_not_null(p, "Unexpected mallocx() failure");
 
-	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
+	expect_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
 	    opt_tcache ? 0 : EFAULT, "Unexpected mallctl() result");
 
-	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
 	    0, "Unexpected mallctl() failure");
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.arenas.0.small.allocated",
+	expect_d_eq(mallctl("stats.arenas.0.small.allocated",
 	    (void *)&allocated, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.small.nmalloc", (void *)&nmalloc,
+	expect_d_eq(mallctl("stats.arenas.0.small.nmalloc", (void *)&nmalloc,
 	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.small.ndalloc", (void *)&ndalloc,
+	expect_d_eq(mallctl("stats.arenas.0.small.ndalloc", (void *)&ndalloc,
 	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.small.nrequests",
+	expect_d_eq(mallctl("stats.arenas.0.small.nrequests",
 	    (void *)&nrequests, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
 
 	if (config_stats) {
-		assert_zu_gt(allocated, 0,
+		expect_zu_gt(allocated, 0,
 		    "allocated should be greater than zero");
-		assert_u64_gt(nmalloc, 0,
+		expect_u64_gt(nmalloc, 0,
 		    "nmalloc should be no greater than zero");
-		assert_u64_ge(nmalloc, ndalloc,
+		expect_u64_ge(nmalloc, ndalloc,
 		    "nmalloc should be at least as large as ndalloc");
-		assert_u64_gt(nrequests, 0,
+		expect_u64_gt(nrequests, 0,
 		    "nrequests should be greater than zero");
 	}
 
@@ -193,27 +193,27 @@ TEST_BEGIN(test_stats_arenas_large) {
 	int expected = config_stats ? 0 : ENOENT;
 
 	p = mallocx((1U << SC_LG_LARGE_MINCLASS), MALLOCX_ARENA(0));
-	assert_ptr_not_null(p, "Unexpected mallocx() failure");
+	expect_ptr_not_null(p, "Unexpected mallocx() failure");
 
-	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
 	    0, "Unexpected mallctl() failure");
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.arenas.0.large.allocated",
+	expect_d_eq(mallctl("stats.arenas.0.large.allocated",
 	    (void *)&allocated, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.large.nmalloc", (void *)&nmalloc,
+	expect_d_eq(mallctl("stats.arenas.0.large.nmalloc", (void *)&nmalloc,
 	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.large.ndalloc", (void *)&ndalloc,
+	expect_d_eq(mallctl("stats.arenas.0.large.ndalloc", (void *)&ndalloc,
 	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
-		assert_zu_gt(allocated, 0,
+		expect_zu_gt(allocated, 0,
 		    "allocated should be greater than zero");
-		assert_u64_gt(nmalloc, 0,
+		expect_u64_gt(nmalloc, 0,
 		    "nmalloc should be greater than zero");
-		assert_u64_ge(nmalloc, ndalloc,
+		expect_u64_ge(nmalloc, ndalloc,
 		    "nmalloc should be at least as large as ndalloc");
 	}
 
@@ -234,85 +234,85 @@ TEST_BEGIN(test_stats_arenas_bins) {
 	int expected = config_stats ? 0 : ENOENT;
 
 	/* Make sure allocation below isn't satisfied by tcache. */
-	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
+	expect_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
 	    opt_tcache ? 0 : EFAULT, "Unexpected mallctl() result");
 
 	unsigned arena_ind, old_arena_ind;
 	sz = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
 	    0, "Arena creation failure");
 	sz = sizeof(arena_ind);
-	assert_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
+	expect_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
 	    (void *)&arena_ind, sizeof(arena_ind)), 0,
 	    "Unexpected mallctl() failure");
 
 	p = malloc(bin_infos[0].reg_size);
-	assert_ptr_not_null(p, "Unexpected malloc() failure");
+	expect_ptr_not_null(p, "Unexpected malloc() failure");
 
-	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
+	expect_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
 	    opt_tcache ? 0 : EFAULT, "Unexpected mallctl() result");
 
-	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
 	    0, "Unexpected mallctl() failure");
 
 	char cmd[128];
 	sz = sizeof(uint64_t);
 	gen_mallctl_str(cmd, "nmalloc", arena_ind);
-	assert_d_eq(mallctl(cmd, (void *)&nmalloc, &sz, NULL, 0), expected,
+	expect_d_eq(mallctl(cmd, (void *)&nmalloc, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
 	gen_mallctl_str(cmd, "ndalloc", arena_ind);
-	assert_d_eq(mallctl(cmd, (void *)&ndalloc, &sz, NULL, 0), expected,
+	expect_d_eq(mallctl(cmd, (void *)&ndalloc, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
 	gen_mallctl_str(cmd, "nrequests", arena_ind);
-	assert_d_eq(mallctl(cmd, (void *)&nrequests, &sz, NULL, 0), expected,
+	expect_d_eq(mallctl(cmd, (void *)&nrequests, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
 	sz = sizeof(size_t);
 	gen_mallctl_str(cmd, "curregs", arena_ind);
-	assert_d_eq(mallctl(cmd, (void *)&curregs, &sz, NULL, 0), expected,
+	expect_d_eq(mallctl(cmd, (void *)&curregs, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
 
 	sz = sizeof(uint64_t);
 	gen_mallctl_str(cmd, "nfills", arena_ind);
-	assert_d_eq(mallctl(cmd, (void *)&nfills, &sz, NULL, 0), expected,
+	expect_d_eq(mallctl(cmd, (void *)&nfills, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
 	gen_mallctl_str(cmd, "nflushes", arena_ind);
-	assert_d_eq(mallctl(cmd, (void *)&nflushes, &sz, NULL, 0), expected,
+	expect_d_eq(mallctl(cmd, (void *)&nflushes, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
 
 	gen_mallctl_str(cmd, "nslabs", arena_ind);
-	assert_d_eq(mallctl(cmd, (void *)&nslabs, &sz, NULL, 0), expected,
+	expect_d_eq(mallctl(cmd, (void *)&nslabs, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
 	gen_mallctl_str(cmd, "nreslabs", arena_ind);
-	assert_d_eq(mallctl(cmd, (void *)&nreslabs, &sz, NULL, 0), expected,
+	expect_d_eq(mallctl(cmd, (void *)&nreslabs, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
 	sz = sizeof(size_t);
 	gen_mallctl_str(cmd, "curslabs", arena_ind);
-	assert_d_eq(mallctl(cmd, (void *)&curslabs, &sz, NULL, 0), expected,
+	expect_d_eq(mallctl(cmd, (void *)&curslabs, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
 	gen_mallctl_str(cmd, "nonfull_slabs", arena_ind);
-	assert_d_eq(mallctl(cmd, (void *)&nonfull_slabs, &sz, NULL, 0),
+	expect_d_eq(mallctl(cmd, (void *)&nonfull_slabs, &sz, NULL, 0),
 	    expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
-		assert_u64_gt(nmalloc, 0,
+		expect_u64_gt(nmalloc, 0,
 		    "nmalloc should be greater than zero");
-		assert_u64_ge(nmalloc, ndalloc,
+		expect_u64_ge(nmalloc, ndalloc,
 		    "nmalloc should be at least as large as ndalloc");
-		assert_u64_gt(nrequests, 0,
+		expect_u64_gt(nrequests, 0,
 		    "nrequests should be greater than zero");
-		assert_zu_gt(curregs, 0,
+		expect_zu_gt(curregs, 0,
 		    "allocated should be greater than zero");
 		if (opt_tcache) {
-			assert_u64_gt(nfills, 0,
+			expect_u64_gt(nfills, 0,
 			    "At least one fill should have occurred");
-			assert_u64_gt(nflushes, 0,
+			expect_u64_gt(nflushes, 0,
 			    "At least one flush should have occurred");
 		}
-		assert_u64_gt(nslabs, 0,
+		expect_u64_gt(nslabs, 0,
 		    "At least one slab should have been allocated");
-		assert_zu_gt(curslabs, 0,
+		expect_zu_gt(curslabs, 0,
 		    "At least one slab should be currently allocated");
-		assert_zu_eq(nonfull_slabs, 0,
+		expect_zu_eq(nonfull_slabs, 0,
 		    "slabs_nonfull should be empty");
 	}
 
@@ -327,33 +327,33 @@ TEST_BEGIN(test_stats_arenas_lextents) {
 	int expected = config_stats ? 0 : ENOENT;
 
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&hsize, &sz, NULL,
+	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&hsize, &sz, NULL,
 	    0), 0, "Unexpected mallctl() failure");
 
 	p = mallocx(hsize, MALLOCX_ARENA(0));
-	assert_ptr_not_null(p, "Unexpected mallocx() failure");
+	expect_ptr_not_null(p, "Unexpected mallocx() failure");
 
-	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
 	    0, "Unexpected mallctl() failure");
 
 	sz = sizeof(uint64_t);
-	assert_d_eq(mallctl("stats.arenas.0.lextents.0.nmalloc",
+	expect_d_eq(mallctl("stats.arenas.0.lextents.0.nmalloc",
 	    (void *)&nmalloc, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
-	assert_d_eq(mallctl("stats.arenas.0.lextents.0.ndalloc",
+	expect_d_eq(mallctl("stats.arenas.0.lextents.0.ndalloc",
 	    (void *)&ndalloc, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
 	sz = sizeof(size_t);
-	assert_d_eq(mallctl("stats.arenas.0.lextents.0.curlextents",
+	expect_d_eq(mallctl("stats.arenas.0.lextents.0.curlextents",
 	    (void *)&curlextents, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
 
 	if (config_stats) {
-		assert_u64_gt(nmalloc, 0,
+		expect_u64_gt(nmalloc, 0,
 		    "nmalloc should be greater than zero");
-		assert_u64_ge(nmalloc, ndalloc,
+		expect_u64_ge(nmalloc, ndalloc,
 		    "nmalloc should be at least as large as ndalloc");
-		assert_u64_gt(curlextents, 0,
+		expect_u64_gt(curlextents, 0,
 		    "At least one extent should be currently allocated");
 	}
 
diff --git a/test/unit/stats_print.c b/test/unit/stats_print.c
index 014d002f..3b317753 100644
--- a/test/unit/stats_print.c
+++ b/test/unit/stats_print.c
@@ -136,7 +136,7 @@ parser_tokenize(parser_t *parser) {
 	size_t token_line JEMALLOC_CC_SILENCE_INIT(1);
 	size_t token_col JEMALLOC_CC_SILENCE_INIT(0);
 
-	assert_zu_le(parser->pos, parser->len,
+	expect_zu_le(parser->pos, parser->len,
 	    "Position is past end of buffer");
 
 	while (state != STATE_ACCEPT) {
@@ -686,7 +686,7 @@ parser_parse_value(parser_t *parser) {
 
 static bool
 parser_parse_pair(parser_t *parser) {
-	assert_d_eq(parser->token.token_type, TOKEN_TYPE_STRING,
+	expect_d_eq(parser->token.token_type, TOKEN_TYPE_STRING,
 	    "Pair should start with string");
 	if (parser_tokenize(parser)) {
 		return true;
@@ -731,7 +731,7 @@ parser_parse_values(parser_t *parser) {
 
 static bool
 parser_parse_array(parser_t *parser) {
-	assert_d_eq(parser->token.token_type, TOKEN_TYPE_LBRACKET,
+	expect_d_eq(parser->token.token_type, TOKEN_TYPE_LBRACKET,
 	    "Array should start with [");
 	if (parser_tokenize(parser)) {
 		return true;
@@ -747,7 +747,7 @@ parser_parse_array(parser_t *parser) {
 
 static bool
 parser_parse_pairs(parser_t *parser) {
-	assert_d_eq(parser->token.token_type, TOKEN_TYPE_STRING,
+	expect_d_eq(parser->token.token_type, TOKEN_TYPE_STRING,
 	    "Object should start with string");
 	if (parser_parse_pair(parser)) {
 		return true;
@@ -782,7 +782,7 @@ parser_parse_pairs(parser_t *parser) {
 
 static bool
 parser_parse_object(parser_t *parser) {
-	assert_d_eq(parser->token.token_type, TOKEN_TYPE_LBRACE,
+	expect_d_eq(parser->token.token_type, TOKEN_TYPE_LBRACE,
 	    "Object should start with {");
 	if (parser_tokenize(parser)) {
 		return true;
@@ -899,9 +899,9 @@ TEST_BEGIN(test_json_parser) {
 		const char *input = invalid_inputs[i];
 		parser_t parser;
 		parser_init(&parser, false);
-		assert_false(parser_append(&parser, input),
+		expect_false(parser_append(&parser, input),
 		    "Unexpected input appending failure");
-		assert_true(parser_parse(&parser),
+		expect_true(parser_parse(&parser),
 		    "Unexpected parse success for input: %s", input);
 		parser_fini(&parser);
 	}
@@ -910,9 +910,9 @@ TEST_BEGIN(test_json_parser) {
 		const char *input = valid_inputs[i];
 		parser_t parser;
 		parser_init(&parser, true);
-		assert_false(parser_append(&parser, input),
+		expect_false(parser_append(&parser, input),
 		    "Unexpected input appending failure");
-		assert_false(parser_parse(&parser),
+		expect_false(parser_parse(&parser),
 		    "Unexpected parse error for input: %s", input);
 		parser_fini(&parser);
 	}
@@ -961,17 +961,17 @@ TEST_BEGIN(test_stats_print_json) {
 			break;
 		case 1: {
 			size_t sz = sizeof(arena_ind);
-			assert_d_eq(mallctl("arenas.create", (void *)&arena_ind,
+			expect_d_eq(mallctl("arenas.create", (void *)&arena_ind,
 			    &sz, NULL, 0), 0, "Unexpected mallctl failure");
 			break;
 		} case 2: {
 			size_t mib[3];
 			size_t miblen = sizeof(mib)/sizeof(size_t);
-			assert_d_eq(mallctlnametomib("arena.0.destroy",
+			expect_d_eq(mallctlnametomib("arena.0.destroy",
 			    mib, &miblen), 0,
 			    "Unexpected mallctlnametomib failure");
 			mib[1] = arena_ind;
-			assert_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL,
+			expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL,
 			    0), 0, "Unexpected mallctlbymib failure");
 			break;
 		} default:
@@ -983,7 +983,7 @@ TEST_BEGIN(test_stats_print_json) {
 
 			parser_init(&parser, true);
 			malloc_stats_print(write_cb, (void *)&parser, opts[j]);
-			assert_false(parser_parse(&parser),
+			expect_false(parser_parse(&parser),
 			    "Unexpected parse error, opts=\"%s\"", opts[j]);
 			parser_fini(&parser);
 		}
diff --git a/test/unit/test_hooks.c b/test/unit/test_hooks.c
index ded8698b..2a5b3d52 100644
--- a/test/unit/test_hooks.c
+++ b/test/unit/test_hooks.c
@@ -17,16 +17,16 @@ func_to_hook(int arg1, int arg2) {
 TEST_BEGIN(unhooked_call) {
 	test_hooks_libc_hook = NULL;
 	hook_called = false;
-	assert_d_eq(3, func_to_hook(1, 2), "Hooking changed return value.");
-	assert_false(hook_called, "Nulling out hook didn't take.");
+	expect_d_eq(3, func_to_hook(1, 2), "Hooking changed return value.");
+	expect_false(hook_called, "Nulling out hook didn't take.");
 }
 TEST_END
 
 TEST_BEGIN(hooked_call) {
 	test_hooks_libc_hook = &hook;
 	hook_called = false;
-	assert_d_eq(3, func_to_hook(1, 2), "Hooking changed return value.");
-	assert_true(hook_called, "Hook should have executed.");
+	expect_d_eq(3, func_to_hook(1, 2), "Hooking changed return value.");
+	expect_true(hook_called, "Hook should have executed.");
 }
 TEST_END
 
diff --git a/test/unit/thread_event.c b/test/unit/thread_event.c
index 0855829c..ef3b95ce 100644
--- a/test/unit/thread_event.c
+++ b/test/unit/thread_event.c
@@ -15,7 +15,7 @@ TEST_BEGIN(test_next_event_fast_roll_back) {
 	ITERATE_OVER_ALL_EVENTS
 #undef E
 	void *p = malloc(16U);
-	assert_ptr_not_null(p, "malloc() failed");
+	expect_ptr_not_null(p, "malloc() failed");
 	free(p);
 }
 TEST_END
@@ -37,7 +37,7 @@ TEST_BEGIN(test_next_event_fast_resume) {
 	ITERATE_OVER_ALL_EVENTS
 #undef E
 	void *p = malloc(SC_LOOKUP_MAXCLASS);
-	assert_ptr_not_null(p, "malloc() failed");
+	expect_ptr_not_null(p, "malloc() failed");
 	free(p);
 }
 TEST_END
@@ -50,7 +50,7 @@ TEST_BEGIN(test_event_rollback) {
 	while (count-- != 0) {
 		te_alloc_rollback(tsd, diff);
 		uint64_t thread_allocated_after = thread_allocated_get(tsd);
-		assert_u64_eq(thread_allocated - thread_allocated_after, diff,
+		expect_u64_eq(thread_allocated - thread_allocated_after, diff,
 		    "thread event counters are not properly rolled back");
 		thread_allocated = thread_allocated_after;
 	}
diff --git a/test/unit/ticker.c b/test/unit/ticker.c
index e5790a31..1cf10b0c 100644
--- a/test/unit/ticker.c
+++ b/test/unit/ticker.c
@@ -11,16 +11,16 @@ TEST_BEGIN(test_ticker_tick) {
 	ticker_init(&ticker, NTICKS);
 	for (i = 0; i < NREPS; i++) {
 		for (j = 0; j < NTICKS; j++) {
-			assert_u_eq(ticker_read(&ticker), NTICKS - j,
+			expect_u_eq(ticker_read(&ticker), NTICKS - j,
 			    "Unexpected ticker value (i=%d, j=%d)", i, j);
-			assert_false(ticker_tick(&ticker),
+			expect_false(ticker_tick(&ticker),
 			    "Unexpected ticker fire (i=%d, j=%d)", i, j);
 		}
-		assert_u32_eq(ticker_read(&ticker), 0,
+		expect_u32_eq(ticker_read(&ticker), 0,
 		    "Expected ticker depletion");
-		assert_true(ticker_tick(&ticker),
+		expect_true(ticker_tick(&ticker),
 		    "Expected ticker fire (i=%d)", i);
-		assert_u32_eq(ticker_read(&ticker), NTICKS,
+		expect_u32_eq(ticker_read(&ticker), NTICKS,
 		    "Expected ticker reset");
 	}
 #undef NTICKS
@@ -33,14 +33,14 @@ TEST_BEGIN(test_ticker_ticks) {
 
 	ticker_init(&ticker, NTICKS);
 
-	assert_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value");
-	assert_false(ticker_ticks(&ticker, NTICKS), "Unexpected ticker fire");
-	assert_u_eq(ticker_read(&ticker), 0, "Unexpected ticker value");
-	assert_true(ticker_ticks(&ticker, NTICKS), "Expected ticker fire");
-	assert_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value");
+	expect_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value");
+	expect_false(ticker_ticks(&ticker, NTICKS), "Unexpected ticker fire");
+	expect_u_eq(ticker_read(&ticker), 0, "Unexpected ticker value");
+	expect_true(ticker_ticks(&ticker, NTICKS), "Expected ticker fire");
+	expect_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value");
 
-	assert_true(ticker_ticks(&ticker, NTICKS + 1), "Expected ticker fire");
-	assert_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value");
+	expect_true(ticker_ticks(&ticker, NTICKS + 1), "Expected ticker fire");
+	expect_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value");
 #undef NTICKS
 }
 TEST_END
@@ -51,15 +51,15 @@ TEST_BEGIN(test_ticker_copy) {
 
 	ticker_init(&ta, NTICKS);
 	ticker_copy(&tb, &ta);
-	assert_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value");
-	assert_true(ticker_ticks(&tb, NTICKS + 1), "Expected ticker fire");
-	assert_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value");
+	expect_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value");
+	expect_true(ticker_ticks(&tb, NTICKS + 1), "Expected ticker fire");
+	expect_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value");
 
 	ticker_tick(&ta);
 	ticker_copy(&tb, &ta);
-	assert_u_eq(ticker_read(&tb), NTICKS - 1, "Unexpected ticker value");
-	assert_true(ticker_ticks(&tb, NTICKS), "Expected ticker fire");
-	assert_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value");
+	expect_u_eq(ticker_read(&tb), NTICKS - 1, "Unexpected ticker value");
+	expect_true(ticker_ticks(&tb, NTICKS), "Expected ticker fire");
+	expect_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value");
 #undef NTICKS
 }
 TEST_END
diff --git a/test/unit/tsd.c b/test/unit/tsd.c
index 917884dc..3f3ca73d 100644
--- a/test/unit/tsd.c
+++ b/test/unit/tsd.c
@@ -10,7 +10,7 @@ static int data_cleanup_count;
 void
 data_cleanup(int *data) {
 	if (data_cleanup_count == 0) {
-		assert_x_eq(*data, MALLOC_TSD_TEST_DATA_INIT,
+		expect_x_eq(*data, MALLOC_TSD_TEST_DATA_INIT,
 		    "Argument passed into cleanup function should match tsd "
 		    "value");
 	}
@@ -38,7 +38,7 @@ data_cleanup(int *data) {
 
 	if (reincarnate) {
 		void *p = mallocx(1, 0);
-		assert_ptr_not_null(p, "Unexpeced mallocx() failure");
+		expect_ptr_not_null(p, "Unexpeced mallocx() failure");
 		dallocx(p, 0);
 	}
 }
@@ -49,18 +49,18 @@ thd_start(void *arg) {
 	void *p;
 
 	tsd_t *tsd = tsd_fetch();
-	assert_x_eq(tsd_test_data_get(tsd), MALLOC_TSD_TEST_DATA_INIT,
+	expect_x_eq(tsd_test_data_get(tsd), MALLOC_TSD_TEST_DATA_INIT,
 	    "Initial tsd get should return initialization value");
 
 	p = malloc(1);
-	assert_ptr_not_null(p, "Unexpected malloc() failure");
+	expect_ptr_not_null(p, "Unexpected malloc() failure");
 
 	tsd_test_data_set(tsd, d);
-	assert_x_eq(tsd_test_data_get(tsd), d,
+	expect_x_eq(tsd_test_data_get(tsd), d,
 	    "After tsd set, tsd get should return value that was set");
 
 	d = 0;
-	assert_x_eq(tsd_test_data_get(tsd), (int)(uintptr_t)arg,
+	expect_x_eq(tsd_test_data_get(tsd), (int)(uintptr_t)arg,
 	    "Resetting local data should have no effect on tsd");
 
 	tsd_test_callback_set(tsd, &data_cleanup);
@@ -84,7 +84,7 @@ TEST_BEGIN(test_tsd_sub_thread) {
 	 * We reincarnate twice in the data cleanup, so it should execute at
 	 * least 3 times.
 	 */
-	assert_x_ge(data_cleanup_count, 3,
+	expect_x_ge(data_cleanup_count, 3,
 	    "Cleanup function should have executed multiple times.");
 }
 TEST_END
@@ -95,28 +95,28 @@ thd_start_reincarnated(void *arg) {
 	assert(tsd);
 
 	void *p = malloc(1);
-	assert_ptr_not_null(p, "Unexpected malloc() failure");
+	expect_ptr_not_null(p, "Unexpected malloc() failure");
 
 	/* Manually trigger reincarnation. */
-	assert_ptr_not_null(tsd_arena_get(tsd),
+	expect_ptr_not_null(tsd_arena_get(tsd),
 	    "Should have tsd arena set.");
 	tsd_cleanup((void *)tsd);
-	assert_ptr_null(*tsd_arenap_get_unsafe(tsd),
+	expect_ptr_null(*tsd_arenap_get_unsafe(tsd),
 	    "TSD arena should have been cleared.");
-	assert_u_eq(tsd_state_get(tsd), tsd_state_purgatory,
+	expect_u_eq(tsd_state_get(tsd), tsd_state_purgatory,
 	    "TSD state should be purgatory\n");
 
 	free(p);
-	assert_u_eq(tsd_state_get(tsd), tsd_state_reincarnated,
+	expect_u_eq(tsd_state_get(tsd), tsd_state_reincarnated,
 	    "TSD state should be reincarnated\n");
 	p = mallocx(1, MALLOCX_TCACHE_NONE);
-	assert_ptr_not_null(p, "Unexpected malloc() failure");
-	assert_ptr_null(*tsd_arenap_get_unsafe(tsd),
+	expect_ptr_not_null(p, "Unexpected malloc() failure");
+	expect_ptr_null(*tsd_arenap_get_unsafe(tsd),
 	    "Should not have tsd arena set after reincarnation.");
 
 	free(p);
 	tsd_cleanup((void *)tsd);
-	assert_ptr_null(*tsd_arenap_get_unsafe(tsd),
+	expect_ptr_null(*tsd_arenap_get_unsafe(tsd),
 	    "TSD arena should have been cleared after 2nd cleanup.");
 
 	return NULL;
@@ -206,46 +206,46 @@ TEST_BEGIN(test_tsd_global_slow) {
 		 * Spin-wait.
 		 */
 	}
-	assert_false(atomic_load_b(&data.error, ATOMIC_SEQ_CST), "");
+	expect_false(atomic_load_b(&data.error, ATOMIC_SEQ_CST), "");
 	tsd_global_slow_inc(tsd_tsdn(tsd));
 	free(mallocx(1, 0));
-	assert_false(tsd_fast(tsd), "");
+	expect_false(tsd_fast(tsd), "");
 	atomic_store_u32(&data.phase, 2, ATOMIC_SEQ_CST);
 
 	/* PHASE 3 */
 	while (atomic_load_u32(&data.phase, ATOMIC_SEQ_CST) != 3) {
 	}
-	assert_false(atomic_load_b(&data.error, ATOMIC_SEQ_CST), "");
+	expect_false(atomic_load_b(&data.error, ATOMIC_SEQ_CST), "");
 	/* Increase again, so that we can test multiple fast/slow changes. */
 	tsd_global_slow_inc(tsd_tsdn(tsd));
 	atomic_store_u32(&data.phase, 4, ATOMIC_SEQ_CST);
 	free(mallocx(1, 0));
-	assert_false(tsd_fast(tsd), "");
+	expect_false(tsd_fast(tsd), "");
 
 	/* PHASE 5 */
 	while (atomic_load_u32(&data.phase, ATOMIC_SEQ_CST) != 5) {
 	}
-	assert_false(atomic_load_b(&data.error, ATOMIC_SEQ_CST), "");
+	expect_false(atomic_load_b(&data.error, ATOMIC_SEQ_CST), "");
 	tsd_global_slow_dec(tsd_tsdn(tsd));
 	atomic_store_u32(&data.phase, 6, ATOMIC_SEQ_CST);
 	/* We only decreased once; things should still be slow. */
 	free(mallocx(1, 0));
-	assert_false(tsd_fast(tsd), "");
+	expect_false(tsd_fast(tsd), "");
 
 	/* PHASE 7 */
 	while (atomic_load_u32(&data.phase, ATOMIC_SEQ_CST) != 7) {
 	}
-	assert_false(atomic_load_b(&data.error, ATOMIC_SEQ_CST), "");
+	expect_false(atomic_load_b(&data.error, ATOMIC_SEQ_CST), "");
 	tsd_global_slow_dec(tsd_tsdn(tsd));
 	atomic_store_u32(&data.phase, 8, ATOMIC_SEQ_CST);
 	/* We incremented and then decremented twice; we should be fast now. */
 	free(mallocx(1, 0));
-	assert_true(!originally_fast || tsd_fast(tsd), "");
+	expect_true(!originally_fast || tsd_fast(tsd), "");
 
 	/* PHASE 9 */
 	while (atomic_load_u32(&data.phase, ATOMIC_SEQ_CST) != 9) {
 	}
-	assert_false(atomic_load_b(&data.error, ATOMIC_SEQ_CST), "");
+	expect_false(atomic_load_b(&data.error, ATOMIC_SEQ_CST), "");
 
 	thd_join(thd, NULL);
 }
diff --git a/test/unit/witness.c b/test/unit/witness.c
index 5986da40..5a6c4482 100644
--- a/test/unit/witness.c
+++ b/test/unit/witness.c
@@ -34,7 +34,7 @@ witness_depth_error_intercept(const witness_list_t *witnesses,
 
 static int
 witness_comp(const witness_t *a, void *oa, const witness_t *b, void *ob) {
-	assert_u_eq(a->rank, b->rank, "Witnesses should have equal rank");
+	expect_u_eq(a->rank, b->rank, "Witnesses should have equal rank");
 
 	assert(oa == (void *)a);
 	assert(ob == (void *)b);
@@ -45,7 +45,7 @@ witness_comp(const witness_t *a, void *oa, const witness_t *b, void *ob) {
 static int
 witness_comp_reverse(const witness_t *a, void *oa, const witness_t *b,
     void *ob) {
-	assert_u_eq(a->rank, b->rank, "Witnesses should have equal rank");
+	expect_u_eq(a->rank, b->rank, "Witnesses should have equal rank");
 
 	assert(oa == (void *)a);
 	assert(ob == (void *)b);
@@ -121,9 +121,9 @@ TEST_BEGIN(test_witness_comp) {
 
 	witness_init(&c, "c", 1, witness_comp_reverse, &c);
 	witness_assert_not_owner(&witness_tsdn, &c);
-	assert_false(saw_lock_error, "Unexpected witness lock error");
+	expect_false(saw_lock_error, "Unexpected witness lock error");
 	witness_lock(&witness_tsdn, &c);
-	assert_true(saw_lock_error, "Expected witness lock error");
+	expect_true(saw_lock_error, "Expected witness lock error");
 	witness_unlock(&witness_tsdn, &c);
 	witness_assert_depth(&witness_tsdn, 1);
 
@@ -131,9 +131,9 @@ TEST_BEGIN(test_witness_comp) {
 
 	witness_init(&d, "d", 1, NULL, NULL);
 	witness_assert_not_owner(&witness_tsdn, &d);
-	assert_false(saw_lock_error, "Unexpected witness lock error");
+	expect_false(saw_lock_error, "Unexpected witness lock error");
 	witness_lock(&witness_tsdn, &d);
-	assert_true(saw_lock_error, "Expected witness lock error");
+	expect_true(saw_lock_error, "Expected witness lock error");
 	witness_unlock(&witness_tsdn, &d);
 	witness_assert_depth(&witness_tsdn, 1);
 
@@ -162,9 +162,9 @@ TEST_BEGIN(test_witness_reversal) {
 
 	witness_lock(&witness_tsdn, &b);
 	witness_assert_depth(&witness_tsdn, 1);
-	assert_false(saw_lock_error, "Unexpected witness lock error");
+	expect_false(saw_lock_error, "Unexpected witness lock error");
 	witness_lock(&witness_tsdn, &a);
-	assert_true(saw_lock_error, "Expected witness lock error");
+	expect_true(saw_lock_error, "Expected witness lock error");
 
 	witness_unlock(&witness_tsdn, &a);
 	witness_assert_depth(&witness_tsdn, 1);
@@ -195,11 +195,11 @@ TEST_BEGIN(test_witness_recursive) {
 	witness_init(&a, "a", 1, NULL, NULL);
 
 	witness_lock(&witness_tsdn, &a);
-	assert_false(saw_lock_error, "Unexpected witness lock error");
-	assert_false(saw_not_owner_error, "Unexpected witness not owner error");
+	expect_false(saw_lock_error, "Unexpected witness lock error");
+	expect_false(saw_not_owner_error, "Unexpected witness not owner error");
 	witness_lock(&witness_tsdn, &a);
-	assert_true(saw_lock_error, "Expected witness lock error");
-	assert_true(saw_not_owner_error, "Expected witness not owner error");
+	expect_true(saw_lock_error, "Expected witness lock error");
+	expect_true(saw_not_owner_error, "Expected witness not owner error");
 
 	witness_unlock(&witness_tsdn, &a);
 
@@ -225,9 +225,9 @@ TEST_BEGIN(test_witness_unlock_not_owned) {
 
 	witness_init(&a, "a", 1, NULL, NULL);
 
-	assert_false(saw_owner_error, "Unexpected owner error");
+	expect_false(saw_owner_error, "Unexpected owner error");
 	witness_unlock(&witness_tsdn, &a);
-	assert_true(saw_owner_error, "Expected owner error");
+	expect_true(saw_owner_error, "Expected owner error");
 
 	witness_assert_lockless(&witness_tsdn);
 
@@ -250,14 +250,14 @@ TEST_BEGIN(test_witness_depth) {
 
 	witness_init(&a, "a", 1, NULL, NULL);
 
-	assert_false(saw_depth_error, "Unexpected depth error");
+	expect_false(saw_depth_error, "Unexpected depth error");
 	witness_assert_lockless(&witness_tsdn);
 	witness_assert_depth(&witness_tsdn, 0);
 
 	witness_lock(&witness_tsdn, &a);
 	witness_assert_lockless(&witness_tsdn);
 	witness_assert_depth(&witness_tsdn, 0);
-	assert_true(saw_depth_error, "Expected depth error");
+	expect_true(saw_depth_error, "Expected depth error");
 
 	witness_unlock(&witness_tsdn, &a);
 
diff --git a/test/unit/zero.c b/test/unit/zero.c
index 271fd5cb..d3e81f1b 100644
--- a/test/unit/zero.c
+++ b/test/unit/zero.c
@@ -8,21 +8,21 @@ test_zero(size_t sz_min, size_t sz_max) {
 
 	sz_prev = 0;
 	s = (uint8_t *)mallocx(sz_min, 0);
-	assert_ptr_not_null((void *)s, "Unexpected mallocx() failure");
+	expect_ptr_not_null((void *)s, "Unexpected mallocx() failure");
 
 	for (sz = sallocx(s, 0); sz <= sz_max;
 	    sz_prev = sz, sz = sallocx(s, 0)) {
 		if (sz_prev > 0) {
-			assert_u_eq(s[0], MAGIC,
+			expect_u_eq(s[0], MAGIC,
 			    "Previously allocated byte %zu/%zu is corrupted",
 			    ZU(0), sz_prev);
-			assert_u_eq(s[sz_prev-1], MAGIC,
+			expect_u_eq(s[sz_prev-1], MAGIC,
 			    "Previously allocated byte %zu/%zu is corrupted",
 			    sz_prev-1, sz_prev);
 		}
 
 		for (i = sz_prev; i < sz; i++) {
-			assert_u_eq(s[i], 0x0,
+			expect_u_eq(s[i], 0x0,
 			    "Newly allocated byte %zu/%zu isn't zero-filled",
 			    i, sz);
 			s[i] = MAGIC;
@@ -30,7 +30,7 @@ test_zero(size_t sz_min, size_t sz_max) {
 
 		if (xallocx(s, sz+1, 0, 0) == sz) {
 			s = (uint8_t *)rallocx(s, sz+1, 0);
-			assert_ptr_not_null((void *)s,
+			expect_ptr_not_null((void *)s,
 			    "Unexpected rallocx() failure");
 		}
 	}
diff --git a/test/unit/zero_realloc_abort.c b/test/unit/zero_realloc_abort.c
index 2f49392b..a880d104 100644
--- a/test/unit/zero_realloc_abort.c
+++ b/test/unit/zero_realloc_abort.c
@@ -12,9 +12,9 @@ TEST_BEGIN(test_realloc_abort) {
 	abort_called = false;
 	safety_check_set_abort(&set_abort_called);
 	void *ptr = mallocx(42, 0);
-	assert_ptr_not_null(ptr, "Unexpected mallocx error");
+	expect_ptr_not_null(ptr, "Unexpected mallocx error");
 	ptr = realloc(ptr, 0);
-	assert_true(abort_called, "Realloc with zero size didn't abort");
+	expect_true(abort_called, "Realloc with zero size didn't abort");
 }
 TEST_END
 
diff --git a/test/unit/zero_realloc_free.c b/test/unit/zero_realloc_free.c
index a0736881..baed86c9 100644
--- a/test/unit/zero_realloc_free.c
+++ b/test/unit/zero_realloc_free.c
@@ -7,20 +7,20 @@ deallocated() {
 	}
 	uint64_t deallocated;
 	size_t sz = sizeof(deallocated);
-	assert_d_eq(mallctl("thread.deallocated", (void *)&deallocated, &sz,
+	expect_d_eq(mallctl("thread.deallocated", (void *)&deallocated, &sz,
 	    NULL, 0), 0, "Unexpected mallctl failure");
 	return deallocated;
 }
 
 TEST_BEGIN(test_realloc_free) {
 	void *ptr = mallocx(42, 0);
-	assert_ptr_not_null(ptr, "Unexpected mallocx error");
+	expect_ptr_not_null(ptr, "Unexpected mallocx error");
 	uint64_t deallocated_before = deallocated();
 	ptr = realloc(ptr, 0);
 	uint64_t deallocated_after = deallocated();
-	assert_ptr_null(ptr, "Realloc didn't free");
+	expect_ptr_null(ptr, "Realloc didn't free");
 	if (config_stats) {
-		assert_u64_gt(deallocated_after, deallocated_before,
+		expect_u64_gt(deallocated_after, deallocated_before,
 		    "Realloc didn't free");
 	}
 }
diff --git a/test/unit/zero_realloc_strict.c b/test/unit/zero_realloc_strict.c
index b7099517..249d838a 100644
--- a/test/unit/zero_realloc_strict.c
+++ b/test/unit/zero_realloc_strict.c
@@ -7,7 +7,7 @@ allocated() {
 	}
 	uint64_t allocated;
 	size_t sz = sizeof(allocated);
-	assert_d_eq(mallctl("thread.allocated", (void *)&allocated, &sz, NULL,
+	expect_d_eq(mallctl("thread.allocated", (void *)&allocated, &sz, NULL,
 	    0), 0, "Unexpected mallctl failure");
 	return allocated;
 }
@@ -19,23 +19,23 @@ deallocated() {
 	}
 	uint64_t deallocated;
 	size_t sz = sizeof(deallocated);
-	assert_d_eq(mallctl("thread.deallocated", (void *)&deallocated, &sz,
+	expect_d_eq(mallctl("thread.deallocated", (void *)&deallocated, &sz,
 	    NULL, 0), 0, "Unexpected mallctl failure");
 	return deallocated;
 }
 
 TEST_BEGIN(test_realloc_strict) {
 	void *ptr = mallocx(1, 0);
-	assert_ptr_not_null(ptr, "Unexpected mallocx error");
+	expect_ptr_not_null(ptr, "Unexpected mallocx error");
 	uint64_t allocated_before = allocated();
 	uint64_t deallocated_before = deallocated();
 	ptr = realloc(ptr, 0);
 	uint64_t allocated_after = allocated();
 	uint64_t deallocated_after = deallocated();
 	if (config_stats) {
-		assert_u64_lt(allocated_before, allocated_after,
+		expect_u64_lt(allocated_before, allocated_after,
 		    "Unexpected stats change");
-		assert_u64_lt(deallocated_before, deallocated_after,
+		expect_u64_lt(deallocated_before, deallocated_after,
 		    "Unexpected stats change");
 	}
 	dallocx(ptr, 0);
diff --git a/test/unit/zero_reallocs.c b/test/unit/zero_reallocs.c
index fd33aaf6..66c7a404 100644
--- a/test/unit/zero_reallocs.c
+++ b/test/unit/zero_reallocs.c
@@ -8,7 +8,7 @@ zero_reallocs() {
 	size_t count = 12345;
 	size_t sz = sizeof(count);
 
-	assert_d_eq(mallctl("stats.zero_reallocs", (void *)&count, &sz,
+	expect_d_eq(mallctl("stats.zero_reallocs", (void *)&count, &sz,
 	    NULL, 0), 0, "Unexpected mallctl failure");
 	return count;
 }
@@ -18,13 +18,13 @@ TEST_BEGIN(test_zero_reallocs) {
 
 	for (size_t i = 0; i < 100; ++i) {
 		void *ptr = mallocx(i * i + 1, 0);
-		assert_ptr_not_null(ptr, "Unexpected mallocx error");
+		expect_ptr_not_null(ptr, "Unexpected mallocx error");
 		size_t count = zero_reallocs();
-		assert_zu_eq(i, count, "Incorrect zero realloc count");
+		expect_zu_eq(i, count, "Incorrect zero realloc count");
 		ptr = realloc(ptr, 0);
-		assert_ptr_null(ptr, "Realloc didn't free");
+		expect_ptr_null(ptr, "Realloc didn't free");
 		count = zero_reallocs();
-		assert_zu_eq(i + 1, count, "Realloc didn't adjust count");
+		expect_zu_eq(i + 1, count, "Realloc didn't adjust count");
 	}
 }
 TEST_END

From fa615793821219f8ad62e40aa23c848e5136aa5c Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 6 Feb 2020 10:10:10 -0800
Subject: [PATCH 1558/2608] Add assert_* functionality to tests

---
 test/include/test/test.h | 237 ++++++++++++++++++++++++++++++++++++++-
 test/src/test.c          |   5 +
 2 files changed, 240 insertions(+), 2 deletions(-)

diff --git a/test/include/test/test.h b/test/include/test/test.h
index cf6616b6..a1b8ff32 100644
--- a/test/include/test/test.h
+++ b/test/include/test/test.h
@@ -245,7 +245,7 @@
 #define expect_true(a, ...)	expect_b_eq(a, true, __VA_ARGS__)
 #define expect_false(a, ...)	expect_b_eq(a, false, __VA_ARGS__)
 
-#define expect_str_eq(a, b, ...) do {				\
+#define expect_str_eq(a, b, ...) do {					\
 	if (strcmp((a), (b))) {						\
 		char prefix[ASSERT_BUFSIZE];				\
 		char message[ASSERT_BUFSIZE];				\
@@ -258,7 +258,7 @@
 		p_test_fail(prefix, message);				\
 	}								\
 } while (0)
-#define expect_str_ne(a, b, ...) do {				\
+#define expect_str_ne(a, b, ...) do {					\
 	if (!strcmp((a), (b))) {					\
 		char prefix[ASSERT_BUFSIZE];				\
 		char message[ASSERT_BUFSIZE];				\
@@ -282,6 +282,238 @@
 	p_test_fail(prefix, message);					\
 } while (0)
 
+#define p_abort_test_if_failed() do {					\
+	if (p_test_failed()) {						\
+		abort();						\
+	}								\
+} while (0)
+
+#define assert_cmp(t, a, b, cmp, neg_cmp, pri, ...) do {		\
+	expect_cmp(t, a, b, cmp, neg_cmp, pri, __VA_ARGS__);		\
+	p_abort_test_if_failed();					\
+} while (0)
+
+#define assert_ptr_eq(a, b, ...)	assert_cmp(void *, a, b, ==,	\
+    !=, "p", __VA_ARGS__)
+#define assert_ptr_ne(a, b, ...)	assert_cmp(void *, a, b, !=,	\
+    ==, "p", __VA_ARGS__)
+#define assert_ptr_null(a, ...)		assert_cmp(void *, a, NULL, ==,	\
+    !=, "p", __VA_ARGS__)
+#define assert_ptr_not_null(a, ...)	assert_cmp(void *, a, NULL, !=,	\
+    ==, "p", __VA_ARGS__)
+
+#define assert_c_eq(a, b, ...)	assert_cmp(char, a, b, ==, !=, "c", __VA_ARGS__)
+#define assert_c_ne(a, b, ...)	assert_cmp(char, a, b, !=, ==, "c", __VA_ARGS__)
+#define assert_c_lt(a, b, ...)	assert_cmp(char, a, b, <, >=, "c", __VA_ARGS__)
+#define assert_c_le(a, b, ...)	assert_cmp(char, a, b, <=, >, "c", __VA_ARGS__)
+#define assert_c_ge(a, b, ...)	assert_cmp(char, a, b, >=, <, "c", __VA_ARGS__)
+#define assert_c_gt(a, b, ...)	assert_cmp(char, a, b, >, <=, "c", __VA_ARGS__)
+
+#define assert_x_eq(a, b, ...)	assert_cmp(int, a, b, ==, !=, "#x", __VA_ARGS__)
+#define assert_x_ne(a, b, ...)	assert_cmp(int, a, b, !=, ==, "#x", __VA_ARGS__)
+#define assert_x_lt(a, b, ...)	assert_cmp(int, a, b, <, >=, "#x", __VA_ARGS__)
+#define assert_x_le(a, b, ...)	assert_cmp(int, a, b, <=, >, "#x", __VA_ARGS__)
+#define assert_x_ge(a, b, ...)	assert_cmp(int, a, b, >=, <, "#x", __VA_ARGS__)
+#define assert_x_gt(a, b, ...)	assert_cmp(int, a, b, >, <=, "#x", __VA_ARGS__)
+
+#define assert_d_eq(a, b, ...)	assert_cmp(int, a, b, ==, !=, "d", __VA_ARGS__)
+#define assert_d_ne(a, b, ...)	assert_cmp(int, a, b, !=, ==, "d", __VA_ARGS__)
+#define assert_d_lt(a, b, ...)	assert_cmp(int, a, b, <, >=, "d", __VA_ARGS__)
+#define assert_d_le(a, b, ...)	assert_cmp(int, a, b, <=, >, "d", __VA_ARGS__)
+#define assert_d_ge(a, b, ...)	assert_cmp(int, a, b, >=, <, "d", __VA_ARGS__)
+#define assert_d_gt(a, b, ...)	assert_cmp(int, a, b, >, <=, "d", __VA_ARGS__)
+
+#define assert_u_eq(a, b, ...)	assert_cmp(int, a, b, ==, !=, "u", __VA_ARGS__)
+#define assert_u_ne(a, b, ...)	assert_cmp(int, a, b, !=, ==, "u", __VA_ARGS__)
+#define assert_u_lt(a, b, ...)	assert_cmp(int, a, b, <, >=, "u", __VA_ARGS__)
+#define assert_u_le(a, b, ...)	assert_cmp(int, a, b, <=, >, "u", __VA_ARGS__)
+#define assert_u_ge(a, b, ...)	assert_cmp(int, a, b, >=, <, "u", __VA_ARGS__)
+#define assert_u_gt(a, b, ...)	assert_cmp(int, a, b, >, <=, "u", __VA_ARGS__)
+
+#define assert_ld_eq(a, b, ...)	assert_cmp(long, a, b, ==,	\
+    !=, "ld", __VA_ARGS__)
+#define assert_ld_ne(a, b, ...)	assert_cmp(long, a, b, !=,	\
+    ==, "ld", __VA_ARGS__)
+#define assert_ld_lt(a, b, ...)	assert_cmp(long, a, b, <,	\
+    >=, "ld", __VA_ARGS__)
+#define assert_ld_le(a, b, ...)	assert_cmp(long, a, b, <=,	\
+    >, "ld", __VA_ARGS__)
+#define assert_ld_ge(a, b, ...)	assert_cmp(long, a, b, >=,	\
+    <, "ld", __VA_ARGS__)
+#define assert_ld_gt(a, b, ...)	assert_cmp(long, a, b, >,	\
+    <=, "ld", __VA_ARGS__)
+
+#define assert_lu_eq(a, b, ...)	assert_cmp(unsigned long,	\
+    a, b, ==, !=, "lu", __VA_ARGS__)
+#define assert_lu_ne(a, b, ...)	assert_cmp(unsigned long,	\
+    a, b, !=, ==, "lu", __VA_ARGS__)
+#define assert_lu_lt(a, b, ...)	assert_cmp(unsigned long,	\
+    a, b, <, >=, "lu", __VA_ARGS__)
+#define assert_lu_le(a, b, ...)	assert_cmp(unsigned long,	\
+    a, b, <=, >, "lu", __VA_ARGS__)
+#define assert_lu_ge(a, b, ...)	assert_cmp(unsigned long,	\
+    a, b, >=, <, "lu", __VA_ARGS__)
+#define assert_lu_gt(a, b, ...)	assert_cmp(unsigned long,	\
+    a, b, >, <=, "lu", __VA_ARGS__)
+
+#define assert_qd_eq(a, b, ...)	assert_cmp(long long, a, b, ==,	\
+    !=, "qd", __VA_ARGS__)
+#define assert_qd_ne(a, b, ...)	assert_cmp(long long, a, b, !=,	\
+    ==, "qd", __VA_ARGS__)
+#define assert_qd_lt(a, b, ...)	assert_cmp(long long, a, b, <,	\
+    >=, "qd", __VA_ARGS__)
+#define assert_qd_le(a, b, ...)	assert_cmp(long long, a, b, <=,	\
+    >, "qd", __VA_ARGS__)
+#define assert_qd_ge(a, b, ...)	assert_cmp(long long, a, b, >=,	\
+    <, "qd", __VA_ARGS__)
+#define assert_qd_gt(a, b, ...)	assert_cmp(long long, a, b, >,	\
+    <=, "qd", __VA_ARGS__)
+
+#define assert_qu_eq(a, b, ...)	assert_cmp(unsigned long long,	\
+    a, b, ==, !=, "qu", __VA_ARGS__)
+#define assert_qu_ne(a, b, ...)	assert_cmp(unsigned long long,	\
+    a, b, !=, ==, "qu", __VA_ARGS__)
+#define assert_qu_lt(a, b, ...)	assert_cmp(unsigned long long,	\
+    a, b, <, >=, "qu", __VA_ARGS__)
+#define assert_qu_le(a, b, ...)	assert_cmp(unsigned long long,	\
+    a, b, <=, >, "qu", __VA_ARGS__)
+#define assert_qu_ge(a, b, ...)	assert_cmp(unsigned long long,	\
+    a, b, >=, <, "qu", __VA_ARGS__)
+#define assert_qu_gt(a, b, ...)	assert_cmp(unsigned long long,	\
+    a, b, >, <=, "qu", __VA_ARGS__)
+
+#define assert_jd_eq(a, b, ...)	assert_cmp(intmax_t, a, b, ==,	\
+    !=, "jd", __VA_ARGS__)
+#define assert_jd_ne(a, b, ...)	assert_cmp(intmax_t, a, b, !=,	\
+    ==, "jd", __VA_ARGS__)
+#define assert_jd_lt(a, b, ...)	assert_cmp(intmax_t, a, b, <,	\
+    >=, "jd", __VA_ARGS__)
+#define assert_jd_le(a, b, ...)	assert_cmp(intmax_t, a, b, <=,	\
+    >, "jd", __VA_ARGS__)
+#define assert_jd_ge(a, b, ...)	assert_cmp(intmax_t, a, b, >=,	\
+    <, "jd", __VA_ARGS__)
+#define assert_jd_gt(a, b, ...)	assert_cmp(intmax_t, a, b, >,	\
+    <=, "jd", __VA_ARGS__)
+
+#define assert_ju_eq(a, b, ...)	assert_cmp(uintmax_t, a, b, ==,	\
+    !=, "ju", __VA_ARGS__)
+#define assert_ju_ne(a, b, ...)	assert_cmp(uintmax_t, a, b, !=,	\
+    ==, "ju", __VA_ARGS__)
+#define assert_ju_lt(a, b, ...)	assert_cmp(uintmax_t, a, b, <,	\
+    >=, "ju", __VA_ARGS__)
+#define assert_ju_le(a, b, ...)	assert_cmp(uintmax_t, a, b, <=,	\
+    >, "ju", __VA_ARGS__)
+#define assert_ju_ge(a, b, ...)	assert_cmp(uintmax_t, a, b, >=,	\
+    <, "ju", __VA_ARGS__)
+#define assert_ju_gt(a, b, ...)	assert_cmp(uintmax_t, a, b, >,	\
+    <=, "ju", __VA_ARGS__)
+
+#define assert_zd_eq(a, b, ...)	assert_cmp(ssize_t, a, b, ==,	\
+    !=, "zd", __VA_ARGS__)
+#define assert_zd_ne(a, b, ...)	assert_cmp(ssize_t, a, b, !=,	\
+    ==, "zd", __VA_ARGS__)
+#define assert_zd_lt(a, b, ...)	assert_cmp(ssize_t, a, b, <,	\
+    >=, "zd", __VA_ARGS__)
+#define assert_zd_le(a, b, ...)	assert_cmp(ssize_t, a, b, <=,	\
+    >, "zd", __VA_ARGS__)
+#define assert_zd_ge(a, b, ...)	assert_cmp(ssize_t, a, b, >=,	\
+    <, "zd", __VA_ARGS__)
+#define assert_zd_gt(a, b, ...)	assert_cmp(ssize_t, a, b, >,	\
+    <=, "zd", __VA_ARGS__)
+
+#define assert_zu_eq(a, b, ...)	assert_cmp(size_t, a, b, ==,	\
+    !=, "zu", __VA_ARGS__)
+#define assert_zu_ne(a, b, ...)	assert_cmp(size_t, a, b, !=,	\
+    ==, "zu", __VA_ARGS__)
+#define assert_zu_lt(a, b, ...)	assert_cmp(size_t, a, b, <,	\
+    >=, "zu", __VA_ARGS__)
+#define assert_zu_le(a, b, ...)	assert_cmp(size_t, a, b, <=,	\
+    >, "zu", __VA_ARGS__)
+#define assert_zu_ge(a, b, ...)	assert_cmp(size_t, a, b, >=,	\
+    <, "zu", __VA_ARGS__)
+#define assert_zu_gt(a, b, ...)	assert_cmp(size_t, a, b, >,	\
+    <=, "zu", __VA_ARGS__)
+
+#define assert_d32_eq(a, b, ...)	assert_cmp(int32_t, a, b, ==,	\
+    !=, FMTd32, __VA_ARGS__)
+#define assert_d32_ne(a, b, ...)	assert_cmp(int32_t, a, b, !=,	\
+    ==, FMTd32, __VA_ARGS__)
+#define assert_d32_lt(a, b, ...)	assert_cmp(int32_t, a, b, <,	\
+    >=, FMTd32, __VA_ARGS__)
+#define assert_d32_le(a, b, ...)	assert_cmp(int32_t, a, b, <=,	\
+    >, FMTd32, __VA_ARGS__)
+#define assert_d32_ge(a, b, ...)	assert_cmp(int32_t, a, b, >=,	\
+    <, FMTd32, __VA_ARGS__)
+#define assert_d32_gt(a, b, ...)	assert_cmp(int32_t, a, b, >,	\
+    <=, FMTd32, __VA_ARGS__)
+
+#define assert_u32_eq(a, b, ...)	assert_cmp(uint32_t, a, b, ==,	\
+    !=, FMTu32, __VA_ARGS__)
+#define assert_u32_ne(a, b, ...)	assert_cmp(uint32_t, a, b, !=,	\
+    ==, FMTu32, __VA_ARGS__)
+#define assert_u32_lt(a, b, ...)	assert_cmp(uint32_t, a, b, <,	\
+    >=, FMTu32, __VA_ARGS__)
+#define assert_u32_le(a, b, ...)	assert_cmp(uint32_t, a, b, <=,	\
+    >, FMTu32, __VA_ARGS__)
+#define assert_u32_ge(a, b, ...)	assert_cmp(uint32_t, a, b, >=,	\
+    <, FMTu32, __VA_ARGS__)
+#define assert_u32_gt(a, b, ...)	assert_cmp(uint32_t, a, b, >,	\
+    <=, FMTu32, __VA_ARGS__)
+
+#define assert_d64_eq(a, b, ...)	assert_cmp(int64_t, a, b, ==,	\
+    !=, FMTd64, __VA_ARGS__)
+#define assert_d64_ne(a, b, ...)	assert_cmp(int64_t, a, b, !=,	\
+    ==, FMTd64, __VA_ARGS__)
+#define assert_d64_lt(a, b, ...)	assert_cmp(int64_t, a, b, <,	\
+    >=, FMTd64, __VA_ARGS__)
+#define assert_d64_le(a, b, ...)	assert_cmp(int64_t, a, b, <=,	\
+    >, FMTd64, __VA_ARGS__)
+#define assert_d64_ge(a, b, ...)	assert_cmp(int64_t, a, b, >=,	\
+    <, FMTd64, __VA_ARGS__)
+#define assert_d64_gt(a, b, ...)	assert_cmp(int64_t, a, b, >,	\
+    <=, FMTd64, __VA_ARGS__)
+
+#define assert_u64_eq(a, b, ...)	assert_cmp(uint64_t, a, b, ==,	\
+    !=, FMTu64, __VA_ARGS__)
+#define assert_u64_ne(a, b, ...)	assert_cmp(uint64_t, a, b, !=,	\
+    ==, FMTu64, __VA_ARGS__)
+#define assert_u64_lt(a, b, ...)	assert_cmp(uint64_t, a, b, <,	\
+    >=, FMTu64, __VA_ARGS__)
+#define assert_u64_le(a, b, ...)	assert_cmp(uint64_t, a, b, <=,	\
+    >, FMTu64, __VA_ARGS__)
+#define assert_u64_ge(a, b, ...)	assert_cmp(uint64_t, a, b, >=,	\
+    <, FMTu64, __VA_ARGS__)
+#define assert_u64_gt(a, b, ...)	assert_cmp(uint64_t, a, b, >,	\
+    <=, FMTu64, __VA_ARGS__)
+
+#define assert_b_eq(a, b, ...) do {					\
+	expect_b_eq(a, b, __VA_ARGS__);					\
+	p_abort_test_if_failed();					\
+} while (0)
+
+#define assert_b_ne(a, b, ...) do {					\
+	expect_b_ne(a, b, __VA_ARGS__);					\
+	p_abort_test_if_failed();					\
+} while (0)
+
+#define assert_true(a, ...)	assert_b_eq(a, true, __VA_ARGS__)
+#define assert_false(a, ...)	assert_b_eq(a, false, __VA_ARGS__)
+
+#define assert_str_eq(a, b, ...) do {					\
+	expect_str_eq(a, b, __VA_ARGS__);				\
+	p_abort_test_if_failed();					\
+} while (0)
+
+#define assert_str_ne(a, b, ...) do {					\
+	expect_str_ne(a, b, __VA_ARGS__);				\
+	p_abort_test_if_failed();					\
+} while (0)
+
+#define assert_not_reached(...) do {					\
+	expect_not_reached(__VA_ARGS__);				\
+	p_abort_test_if_failed();					\
+} while (0)
+
 /*
  * If this enum changes, corresponding changes in test/test.sh.in are also
  * necessary.
@@ -336,5 +568,6 @@ test_status_t	p_test_no_malloc_init(test_t *t, ...);
 void	p_test_init(const char *name);
 void	p_test_fini(void);
 void	p_test_fail(const char *prefix, const char *message);
+bool	p_test_failed(void);
 
 void strncpy_cond(void *dst, const char *src, bool cond);
diff --git a/test/src/test.c b/test/src/test.c
index 4583e55a..b40fbc6d 100644
--- a/test/src/test.c
+++ b/test/src/test.c
@@ -233,6 +233,11 @@ p_test_fail(const char *prefix, const char *message) {
 	test_status = test_status_fail;
 }
 
+bool
+p_test_failed() {
+	return test_status == test_status_fail;
+}
+
 void
 strncpy_cond(void *dst, const char *src, bool cond) {
 	if (cond) {

From 0ceb31184d145646ff30b03f566069307cd570d8 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 6 Feb 2020 10:39:42 -0800
Subject: [PATCH 1559/2608] Make use of assert_* in test/unit/buf_writer.c

---
 test/unit/buf_writer.c | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/test/unit/buf_writer.c b/test/unit/buf_writer.c
index 37314db2..01f24119 100644
--- a/test/unit/buf_writer.c
+++ b/test/unit/buf_writer.c
@@ -10,19 +10,21 @@ static char test_buf[TEST_BUF_SIZE];
 static uint64_t arg;
 static uint64_t arg_store;
 
-static void test_write_cb(void *cbopaque, const char *s) {
+static void
+test_write_cb(void *cbopaque, const char *s) {
 	size_t prev_test_write_len = test_write_len;
 	test_write_len += strlen(s); /* only increase the length */
 	arg_store = *(uint64_t *)cbopaque; /* only pass along the argument */
-	expect_zu_le(prev_test_write_len, test_write_len,
+	assert_zu_le(prev_test_write_len, test_write_len,
 	    "Test write overflowed");
 }
 
-static void test_buf_writer_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
+static void
+test_buf_writer_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 	char s[UNIT_MAX + 1];
 	size_t n_unit, remain, i;
 	ssize_t unit;
-	expect_ptr_not_null(buf_writer->buf, "Buffer is null");
+	assert(buf_writer->buf != NULL);
 	write_cb_t *write_cb = buf_writer_get_write_cb(buf_writer);
 	void *cbopaque = buf_writer_get_cbopaque(buf_writer);
 
@@ -41,7 +43,7 @@ static void test_buf_writer_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 				remain += unit;
 				if (remain > buf_writer->buf_size) {
 					/* Flushes should have happened. */
-					expect_u64_eq(arg_store, arg, "Call "
+					assert_u64_eq(arg_store, arg, "Call "
 					    "back argument didn't get through");
 					remain %= buf_writer->buf_size;
 					if (remain == 0) {
@@ -49,7 +51,7 @@ static void test_buf_writer_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 						remain += buf_writer->buf_size;
 					}
 				}
-				expect_zu_eq(test_write_len + remain, i * unit,
+				assert_zu_eq(test_write_len + remain, i * unit,
 				    "Incorrect length after writing %zu strings"
 				    " of length %zu", i, unit);
 			}
@@ -65,7 +67,7 @@ static void test_buf_writer_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 TEST_BEGIN(test_buf_write_static) {
 	buf_writer_t buf_writer;
 	tsdn_t *tsdn = tsdn_fetch();
-	expect_false(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
+	assert_false(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
 	    test_buf, TEST_BUF_SIZE),
 	    "buf_writer_init() should not encounter error on static buffer");
 	test_buf_writer_body(tsdn, &buf_writer);
@@ -75,7 +77,7 @@ TEST_END
 TEST_BEGIN(test_buf_write_dynamic) {
 	buf_writer_t buf_writer;
 	tsdn_t *tsdn = tsdn_fetch();
-	expect_false(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
+	assert_false(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
 	    NULL, TEST_BUF_SIZE), "buf_writer_init() should not OOM");
 	test_buf_writer_body(tsdn, &buf_writer);
 }
@@ -84,13 +86,13 @@ TEST_END
 TEST_BEGIN(test_buf_write_oom) {
 	buf_writer_t buf_writer;
 	tsdn_t *tsdn = tsdn_fetch();
-	expect_true(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
+	assert_true(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
 	    NULL, SC_LARGE_MAXCLASS + 1), "buf_writer_init() should OOM");
-	expect_ptr_null(buf_writer.buf, "Buffer should be null");
+	assert(buf_writer.buf == NULL);
 	write_cb_t *write_cb = buf_writer_get_write_cb(&buf_writer);
-	expect_ptr_eq(write_cb, test_write_cb, "Should use test_write_cb");
+	assert_ptr_eq(write_cb, test_write_cb, "Should use test_write_cb");
 	void *cbopaque = buf_writer_get_cbopaque(&buf_writer);
-	expect_ptr_eq(cbopaque, &arg, "Should use arg");
+	assert_ptr_eq(cbopaque, &arg, "Should use arg");
 
 	char s[UNIT_MAX + 1];
 	size_t n_unit, i;
@@ -107,9 +109,9 @@ TEST_BEGIN(test_buf_write_oom) {
 			for (i = 1; i <= n_unit; ++i) {
 				arg = prng_lg_range_u64(&arg, 64);
 				write_cb(cbopaque, s);
-				expect_u64_eq(arg_store, arg,
+				assert_u64_eq(arg_store, arg,
 				    "Call back argument didn't get through");
-				expect_zu_eq(test_write_len, i * unit,
+				assert_zu_eq(test_write_len, i * unit,
 				    "Incorrect length after writing %zu strings"
 				    " of length %zu", i, unit);
 			}

From a88d22ea114b4db398aad021aa1dcd1b33b4038d Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 6 Feb 2020 10:27:09 -0800
Subject: [PATCH 1560/2608] Make use of assert_* in test/unit/inspect.c

---
 test/unit/inspect.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/test/unit/inspect.c b/test/unit/inspect.c
index 41ef6c29..384b1ad4 100644
--- a/test/unit/inspect.c
+++ b/test/unit/inspect.c
@@ -1,11 +1,11 @@
 #include "test/jemalloc_test.h"
 
 #define TEST_UTIL_EINVAL(node, a, b, c, d, why_inval) do {		\
-	expect_d_eq(mallctl("experimental.utilization." node,		\
+	assert_d_eq(mallctl("experimental.utilization." node,		\
 	    a, b, c, d), EINVAL, "Should fail when " why_inval);	\
-	expect_zu_eq(out_sz, out_sz_ref,				\
+	assert_zu_eq(out_sz, out_sz_ref,				\
 	    "Output size touched when given invalid arguments");	\
-	expect_d_eq(memcmp(out, out_ref, out_sz_ref), 0,		\
+	assert_d_eq(memcmp(out, out_ref, out_sz_ref), 0,		\
 	    "Output content touched when given invalid arguments");	\
 } while (0)
 
@@ -15,7 +15,7 @@
 	TEST_UTIL_EINVAL("batch_query", a, b, c, d, why_inval)
 
 #define TEST_UTIL_VALID(node) do {					\
-        expect_d_eq(mallctl("experimental.utilization." node,		\
+        assert_d_eq(mallctl("experimental.utilization." node,		\
 	    out, &out_sz, in, in_sz), 0,				\
 	    "Should return 0 on correct arguments");			\
         expect_zu_eq(out_sz, out_sz_ref, "incorrect output size");	\
@@ -43,11 +43,11 @@ TEST_BEGIN(test_query) {
 		void *out_ref = mallocx(out_sz, 0);
 		size_t out_sz_ref = out_sz;
 
-		expect_ptr_not_null(p,
+		assert_ptr_not_null(p,
 		    "test pointer allocation failed");
-		expect_ptr_not_null(out,
+		assert_ptr_not_null(out,
 		    "test output allocation failed");
-		expect_ptr_not_null(out_ref,
+		assert_ptr_not_null(out_ref,
 		    "test reference output allocation failed");
 
 #define SLABCUR_READ(out) (*(void **)out)
@@ -174,8 +174,8 @@ TEST_BEGIN(test_batch) {
 		size_t out_ref[] = {-1, -1, -1, -1, -1, -1};
 		size_t out_sz_ref = out_sz;
 
-		expect_ptr_not_null(p, "test pointer allocation failed");
-		expect_ptr_not_null(q, "test pointer allocation failed");
+		assert_ptr_not_null(p, "test pointer allocation failed");
+		assert_ptr_not_null(q, "test pointer allocation failed");
 
 		/* Test invalid argument(s) errors */
 		TEST_UTIL_BATCH_EINVAL(NULL, &out_sz, in, in_sz,
@@ -201,7 +201,7 @@ TEST_BEGIN(test_batch) {
 
 	/* Examine output for valid calls */
 #define TEST_EQUAL_REF(i, message) \
-	expect_d_eq(memcmp(out + (i) * 3, out_ref + (i) * 3, 3), 0, message)
+	assert_d_eq(memcmp(out + (i) * 3, out_ref + (i) * 3, 3), 0, message)
 
 #define NFREE_READ(out, i) out[(i) * 3]
 #define NREGS_READ(out, i) out[(i) * 3 + 1]
@@ -261,7 +261,7 @@ TEST_END
 
 int
 main(void) {
-	expect_zu_lt(SC_SMALL_MAXCLASS, TEST_MAX_SIZE,
+	assert_zu_lt(SC_SMALL_MAXCLASS + 100000, TEST_MAX_SIZE,
 	    "Test case cannot cover large classes");
 	return test(test_query, test_batch);
 }

From 9d2cc3b0fa8365d69747bf0d04686fe41fe44d3e Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 6 Feb 2020 10:55:19 -0800
Subject: [PATCH 1561/2608] Make use of assert_* in test/unit/prof_recent.c

---
 test/unit/prof_recent.c | 147 ++++++++++++++++++++++------------------
 1 file changed, 80 insertions(+), 67 deletions(-)

diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index 7400d6cf..35a2333a 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -6,16 +6,17 @@
 #define OPT_ALLOC_MAX 3
 
 /* Invariant before and after every test (when config_prof is on) */
-static void confirm_prof_setup(tsd_t *tsd) {
+static void
+confirm_prof_setup(tsd_t *tsd) {
 	/* Options */
-	expect_true(opt_prof, "opt_prof not on");
-	expect_true(opt_prof_active, "opt_prof_active not on");
-	expect_zd_eq(opt_prof_recent_alloc_max, OPT_ALLOC_MAX,
+	assert_true(opt_prof, "opt_prof not on");
+	assert_true(opt_prof_active, "opt_prof_active not on");
+	assert_zd_eq(opt_prof_recent_alloc_max, OPT_ALLOC_MAX,
 	    "opt_prof_recent_alloc_max not set correctly");
 
 	/* Dynamics */
-	expect_true(prof_active, "prof_active not on");
-	expect_zd_eq(prof_recent_alloc_max_ctl_read(tsd), OPT_ALLOC_MAX,
+	assert_true(prof_active, "prof_active not on");
+	assert_zd_eq(prof_recent_alloc_max_ctl_read(tsd), OPT_ALLOC_MAX,
 	    "prof_recent_alloc_max not set correctly");
 }
 
@@ -35,11 +36,11 @@ TEST_BEGIN(test_prof_recent_off) {
 	size_t len = len_ref;
 
 #define ASSERT_SHOULD_FAIL(opt, a, b, c, d) do {			\
-	expect_d_eq(mallctl("experimental.prof_recent." opt, a, b, c,	\
+	assert_d_eq(mallctl("experimental.prof_recent." opt, a, b, c,	\
 	    d), ENOENT, "Should return ENOENT when config_prof is off");\
-	expect_zd_eq(past, past_ref, "output was touched");		\
-	expect_zu_eq(len, len_ref, "output length was touched");	\
-	expect_zd_eq(future, future_ref, "input was touched");		\
+	assert_zd_eq(past, past_ref, "output was touched");		\
+	assert_zu_eq(len, len_ref, "output length was touched");	\
+	assert_zd_eq(future, future_ref, "input was touched");		\
 } while (0)
 
 	ASSERT_SHOULD_FAIL("alloc_max", NULL, NULL, NULL, 0);
@@ -61,32 +62,32 @@ TEST_BEGIN(test_prof_recent_on) {
 
 	confirm_prof_setup(tsd);
 
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, NULL, 0), 0, "no-op mallctl should be allowed");
 	confirm_prof_setup(tsd);
 
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    &past, &len, NULL, 0), 0, "Read error");
 	expect_zd_eq(past, OPT_ALLOC_MAX, "Wrong read result");
 	future = OPT_ALLOC_MAX + 1;
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, len), 0, "Write error");
 	future = -1;
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    &past, &len, &future, len), 0, "Read/write error");
 	expect_zd_eq(past, OPT_ALLOC_MAX + 1, "Wrong read result");
 	future = -2;
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    &past, &len, &future, len), EINVAL,
 	    "Invalid write should return EINVAL");
 	expect_zd_eq(past, OPT_ALLOC_MAX + 1,
 	    "Output should not be touched given invalid write");
 	future = OPT_ALLOC_MAX;
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    &past, &len, &future, len), 0, "Read/write error");
 	expect_zd_eq(past, -1, "Wrong read result");
 	future = OPT_ALLOC_MAX + 2;
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    &past, &len, &future, len * 2), EINVAL,
 	    "Invalid write should return EINVAL");
 	expect_zd_eq(past, -1,
@@ -99,13 +100,14 @@ TEST_END
 /* Reproducible sequence of request sizes */
 #define NTH_REQ_SIZE(n) ((n) * 97 + 101)
 
-static void confirm_malloc(tsd_t *tsd, void *p) {
-	expect_ptr_not_null(p, "malloc failed unexpectedly");
+static void
+confirm_malloc(tsd_t *tsd, void *p) {
+	assert_ptr_not_null(p, "malloc failed unexpectedly");
 	edata_t *e = emap_edata_lookup(TSDN_NULL, &emap_global, p);
-	expect_ptr_not_null(e, "NULL edata for living pointer");
+	assert_ptr_not_null(e, "NULL edata for living pointer");
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	prof_recent_t *n = edata_prof_recent_alloc_get(tsd, e);
-	expect_ptr_not_null(n, "Record in edata should not be NULL");
+	assert_ptr_not_null(n, "Record in edata should not be NULL");
 	expect_ptr_not_null(n->alloc_tctx,
 	    "alloc_tctx in record should not be NULL");
 	expect_ptr_eq(e, n->alloc_edata,
@@ -114,24 +116,27 @@ static void confirm_malloc(tsd_t *tsd, void *p) {
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 }
 
-static void confirm_record_size(tsd_t *tsd, prof_recent_t *n, unsigned kth) {
+static void
+confirm_record_size(tsd_t *tsd, prof_recent_t *n, unsigned kth) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	expect_zu_eq(n->size, NTH_REQ_SIZE(kth),
 	    "Recorded allocation size is wrong");
 }
 
-static void confirm_record_living(tsd_t *tsd, prof_recent_t *n) {
+static void
+confirm_record_living(tsd_t *tsd, prof_recent_t *n) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	expect_ptr_not_null(n->alloc_tctx,
 	    "alloc_tctx in record should not be NULL");
-	expect_ptr_not_null(n->alloc_edata,
+	assert_ptr_not_null(n->alloc_edata,
 	    "Recorded edata should not be NULL for living pointer");
 	expect_ptr_eq(n, edata_prof_recent_alloc_get(tsd, n->alloc_edata),
 	    "Record in edata is not correct");
 	expect_ptr_null(n->dalloc_tctx, "dalloc_tctx in record should be NULL");
 }
 
-static void confirm_record_released(tsd_t *tsd, prof_recent_t *n) {
+static void
+confirm_record_released(tsd_t *tsd, prof_recent_t *n) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	expect_ptr_not_null(n->alloc_tctx,
 	    "alloc_tctx in record should not be NULL");
@@ -167,7 +172,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 		if (i < OPT_ALLOC_MAX - 1) {
 			malloc_mutex_lock(tsd_tsdn(tsd),
 			    &prof_recent_alloc_mtx);
-			expect_ptr_ne(prof_recent_alloc_begin(tsd),
+			assert_ptr_ne(prof_recent_alloc_begin(tsd),
 			    prof_recent_alloc_end(tsd),
 			    "Empty recent allocation");
 			malloc_mutex_unlock(tsd_tsdn(tsd),
@@ -194,7 +199,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 			}
 		}
 		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-		expect_u_eq(c, OPT_ALLOC_MAX,
+		assert_u_eq(c, OPT_ALLOC_MAX,
 		    "Incorrect total number of allocations");
 		free(p);
 	}
@@ -202,7 +207,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	confirm_prof_setup(tsd);
 
 	b = false;
-	expect_d_eq(mallctl("prof.active", NULL, NULL, &b, sizeof(bool)), 0,
+	assert_d_eq(mallctl("prof.active", NULL, NULL, &b, sizeof(bool)), 0,
 	    "mallctl for turning off prof_active failed");
 
 	/*
@@ -212,7 +217,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	for (; i < 3 * OPT_ALLOC_MAX; ++i) {
 		req_size = NTH_REQ_SIZE(i);
 		p = malloc(req_size);
-		expect_ptr_not_null(p, "malloc failed unexpectedly");
+		assert_ptr_not_null(p, "malloc failed unexpectedly");
 		c = 0;
 		malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 		for (n = prof_recent_alloc_begin(tsd);
@@ -223,13 +228,13 @@ TEST_BEGIN(test_prof_recent_alloc) {
 			++c;
 		}
 		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-		expect_u_eq(c, OPT_ALLOC_MAX,
+		assert_u_eq(c, OPT_ALLOC_MAX,
 		    "Incorrect total number of allocations");
 		free(p);
 	}
 
 	b = true;
-	expect_d_eq(mallctl("prof.active", NULL, NULL, &b, sizeof(bool)), 0,
+	assert_d_eq(mallctl("prof.active", NULL, NULL, &b, sizeof(bool)), 0,
 	    "mallctl for turning on prof_active failed");
 
 	confirm_prof_setup(tsd);
@@ -267,14 +272,14 @@ TEST_BEGIN(test_prof_recent_alloc) {
 			}
 		}
 		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-		expect_u_eq(c, OPT_ALLOC_MAX,
+		assert_u_eq(c, OPT_ALLOC_MAX,
 		    "Incorrect total number of allocations");
 		free(p);
 	}
 
 	/* Increasing the limit shouldn't alter the list of records. */
 	future = OPT_ALLOC_MAX + 1;
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	c = 0;
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
@@ -286,7 +291,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 		++c;
 	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	expect_u_eq(c, OPT_ALLOC_MAX,
+	assert_u_eq(c, OPT_ALLOC_MAX,
 	    "Incorrect total number of allocations");
 
 	/*
@@ -294,7 +299,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	 * the new limit is still no less than the length of the list.
 	 */
 	future = OPT_ALLOC_MAX;
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	c = 0;
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
@@ -306,7 +311,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 		++c;
 	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	expect_u_eq(c, OPT_ALLOC_MAX,
+	assert_u_eq(c, OPT_ALLOC_MAX,
 	    "Incorrect total number of allocations");
 
 	/*
@@ -314,7 +319,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	 * limit is less than the length of the list.
 	 */
 	future = OPT_ALLOC_MAX - 1;
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	c = 0;
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
@@ -326,12 +331,12 @@ TEST_BEGIN(test_prof_recent_alloc) {
 		confirm_record_released(tsd, n);
 	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	expect_u_eq(c, OPT_ALLOC_MAX - 1,
+	assert_u_eq(c, OPT_ALLOC_MAX - 1,
 	    "Incorrect total number of allocations");
 
 	/* Setting to unlimited shouldn't alter the list of records. */
 	future = -1;
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	c = 0;
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
@@ -343,36 +348,39 @@ TEST_BEGIN(test_prof_recent_alloc) {
 		confirm_record_released(tsd, n);
 	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	expect_u_eq(c, OPT_ALLOC_MAX - 1,
+	assert_u_eq(c, OPT_ALLOC_MAX - 1,
 	    "Incorrect total number of allocations");
 
 	/* Downshift to only one record. */
 	future = 1;
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	n = prof_recent_alloc_begin(tsd);
-	assert(n != prof_recent_alloc_end(tsd));
+	assert_ptr_ne(n, prof_recent_alloc_end(tsd), "Recent list is empty");
 	confirm_record_size(tsd, n, 4 * OPT_ALLOC_MAX - 1);
 	confirm_record_released(tsd, n);
 	n = prof_recent_alloc_next(tsd, n);
-	assert(n == prof_recent_alloc_end(tsd));
+	assert_ptr_eq(n, prof_recent_alloc_end(tsd),
+	    "Recent list should be empty");
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 
 	/* Completely turn off. */
 	future = 0;
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	assert(prof_recent_alloc_begin(tsd) == prof_recent_alloc_end(tsd));
+	assert_ptr_eq(prof_recent_alloc_begin(tsd), prof_recent_alloc_end(tsd),
+	    "Recent list should be empty");
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 
 	/* Restore the settings. */
 	future = OPT_ALLOC_MAX;
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	assert(prof_recent_alloc_begin(tsd) == prof_recent_alloc_end(tsd));
+	assert_ptr_eq(prof_recent_alloc_begin(tsd), prof_recent_alloc_end(tsd),
+	    "Recent list should be empty");
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 
 	confirm_prof_setup(tsd);
@@ -385,17 +393,19 @@ TEST_END
 static char dump_out[DUMP_OUT_SIZE];
 static size_t dump_out_len = 0;
 
-static void test_dump_write_cb(void *not_used, const char *str) {
+static void
+test_dump_write_cb(void *not_used, const char *str) {
 	size_t len = strlen(str);
 	assert(dump_out_len + len < DUMP_OUT_SIZE);
 	memcpy(dump_out + dump_out_len, str, len + 1);
 	dump_out_len += len;
 }
 
-static void call_dump() {
+static void
+call_dump() {
 	static void *in[2] = {test_dump_write_cb, NULL};
 	dump_out_len = 0;
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_dump",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_dump",
 	    NULL, NULL, in, sizeof(in)), 0, "Dump mallctl raised error");
 }
 
@@ -406,7 +416,8 @@ typedef struct {
 
 #define DUMP_ERROR "Dump output is wrong"
 
-static void confirm_record(const char *template,
+static void
+confirm_record(const char *template,
     const confirm_record_t *records, const size_t n_records) {
 	static const char *types[2] = {"alloc", "dalloc"};
 	static char buf[64];
@@ -418,9 +429,9 @@ static void confirm_record(const char *template,
 	 * "{\"recent_alloc_max\":XYZ,\"recent_alloc\":[...]}".
 	 * Using "- 2" serves to cut right before the ending "]}".
 	 */
-	expect_d_eq(memcmp(dump_out, template, strlen(template) - 2), 0,
+	assert_d_eq(memcmp(dump_out, template, strlen(template) - 2), 0,
 	    DUMP_ERROR);
-	expect_d_eq(memcmp(dump_out + strlen(dump_out) - 2,
+	assert_d_eq(memcmp(dump_out + strlen(dump_out) - 2,
 	    template + strlen(template) - 2, 2), 0, DUMP_ERROR);
 
 	const char *start = dump_out + strlen(template) - 2;
@@ -429,14 +440,14 @@ static void confirm_record(const char *template,
 	for (record = records; record < records + n_records; ++record) {
 
 #define ASSERT_CHAR(c) do {						\
-	expect_true(start < end, DUMP_ERROR);				\
-	expect_c_eq(*start++, c, DUMP_ERROR);				\
+	assert_true(start < end, DUMP_ERROR);				\
+	assert_c_eq(*start++, c, DUMP_ERROR);				\
 } while (0)
 
 #define ASSERT_STR(s) do {						\
 	const size_t len = strlen(s);					\
-	expect_true(start + len <= end, DUMP_ERROR);			\
-	expect_d_eq(memcmp(start, s, len), 0, DUMP_ERROR);		\
+	assert_true(start + len <= end, DUMP_ERROR);			\
+	assert_d_eq(memcmp(start, s, len), 0, DUMP_ERROR);		\
 	start += len;							\
 } while (0)
 
@@ -512,8 +523,8 @@ static void confirm_record(const char *template,
 #undef ASSERT_CHAR
 
 	}
-	expect_ptr_eq(record, records + n_records, DUMP_ERROR);
-	expect_ptr_eq(start, end, DUMP_ERROR);
+	assert_ptr_eq(record, records + n_records, DUMP_ERROR);
+	assert_ptr_eq(start, end, DUMP_ERROR);
 }
 
 TEST_BEGIN(test_prof_recent_alloc_dump) {
@@ -527,14 +538,14 @@ TEST_BEGIN(test_prof_recent_alloc_dump) {
 	confirm_record_t records[2];
 
 	future = 0;
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	call_dump();
 	expect_str_eq(dump_out, "{\"recent_alloc_max\":0,\"recent_alloc\":[]}",
 	    DUMP_ERROR);
 
 	future = 2;
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	call_dump();
 	const char *template = "{\"recent_alloc_max\":2,\"recent_alloc\":[]}";
@@ -563,7 +574,7 @@ TEST_BEGIN(test_prof_recent_alloc_dump) {
 	confirm_record(template, records, 2);
 
 	future = OPT_ALLOC_MAX;
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	confirm_prof_setup(tsd);
 }
@@ -588,11 +599,13 @@ typedef struct {
 static thd_data_t thd_data[N_THREADS];
 static ssize_t test_max;
 
-static void test_write_cb(void *cbopaque, const char *str) {
+static void
+test_write_cb(void *cbopaque, const char *str) {
 	sleep_ns(1000 * 1000);
 }
 
-static void *f_thread(void *arg) {
+static void *
+f_thread(void *arg) {
 	const size_t thd_id = *(size_t *)arg;
 	thd_data_t *data_p = thd_data + thd_id;
 	assert(data_p->id == thd_id);
@@ -632,7 +645,7 @@ static void *f_thread(void *arg) {
 			last_max =
 			    prof_recent_alloc_max_ctl_write(tsd, test_max / 2);
 		}
-		expect_zd_ge(last_max, -1, "Illegal last-N max");
+		assert_zd_ge(last_max, -1, "Illegal last-N max");
 	}
 
 	while (data_p->count > 0) {
@@ -660,7 +673,7 @@ TEST_BEGIN(test_prof_recent_stress) {
 	}
 
 	test_max = STRESS_ALLOC_MAX;
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &test_max, sizeof(ssize_t)), 0, "Write error");
 	for (size_t i = 0; i < N_THREADS; i++) {
 		thd_data_t *data_p = thd_data + i;
@@ -673,7 +686,7 @@ TEST_BEGIN(test_prof_recent_stress) {
 	}
 
 	test_max = OPT_ALLOC_MAX;
-	expect_d_eq(mallctl("experimental.prof_recent.alloc_max",
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &test_max, sizeof(ssize_t)), 0, "Write error");
 	confirm_prof_setup(tsd);
 }

From 51bd147422d95bfcd3919f11a6a7dd7a574e05cd Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 6 Feb 2020 10:57:47 -0800
Subject: [PATCH 1562/2608] Make use of assert_* in test/unit/thread_event.c

---
 test/unit/thread_event.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/unit/thread_event.c b/test/unit/thread_event.c
index ef3b95ce..0855829c 100644
--- a/test/unit/thread_event.c
+++ b/test/unit/thread_event.c
@@ -15,7 +15,7 @@ TEST_BEGIN(test_next_event_fast_roll_back) {
 	ITERATE_OVER_ALL_EVENTS
 #undef E
 	void *p = malloc(16U);
-	expect_ptr_not_null(p, "malloc() failed");
+	assert_ptr_not_null(p, "malloc() failed");
 	free(p);
 }
 TEST_END
@@ -37,7 +37,7 @@ TEST_BEGIN(test_next_event_fast_resume) {
 	ITERATE_OVER_ALL_EVENTS
 #undef E
 	void *p = malloc(SC_LOOKUP_MAXCLASS);
-	expect_ptr_not_null(p, "malloc() failed");
+	assert_ptr_not_null(p, "malloc() failed");
 	free(p);
 }
 TEST_END
@@ -50,7 +50,7 @@ TEST_BEGIN(test_event_rollback) {
 	while (count-- != 0) {
 		te_alloc_rollback(tsd, diff);
 		uint64_t thread_allocated_after = thread_allocated_get(tsd);
-		expect_u64_eq(thread_allocated - thread_allocated_after, diff,
+		assert_u64_eq(thread_allocated - thread_allocated_after, diff,
 		    "thread event counters are not properly rolled back");
 		thread_allocated = thread_allocated_after;
 	}

From bc31041edb183d739574d622888d818dbc1bfadf Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 21 Feb 2020 11:05:57 -0800
Subject: [PATCH 1563/2608] Cirrus-CI: test on new freebsd releases.

---
 .cirrus.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.cirrus.yml b/.cirrus.yml
index a9de9534..d01954f1 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -6,8 +6,6 @@ task:
   freebsd_instance:
     matrix:
       image: freebsd-12-1-release-amd64
-      image: freebsd-12-0-release-amd64
-      image: freebsd-11-2-release-amd64
   install_script:
     - sed -i.bak -e 's,pkg+http://pkg.FreeBSD.org/\${ABI}/quarterly,pkg+http://pkg.FreeBSD.org/\${ABI}/latest,' /etc/pkg/FreeBSD.conf
     - pkg upgrade -y

From 9f4fc273892f130fd81d26e7cb9e561fb5a10679 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Tue, 25 Feb 2020 07:47:04 -0800
Subject: [PATCH 1564/2608] Ehooks: Fix a build warning.

We wrote `return some_void_func()` in a function returning void, which is
confusing and triggers warnings on MSVC.
---
 include/jemalloc/internal/ehooks.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index 1bd44cb8..bae468b3 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -222,9 +222,9 @@ ehooks_destroy(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
     bool committed) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 	if (extent_hooks == &ehooks_default_extent_hooks) {
-		return ehooks_default_destroy_impl(addr, size);
+		ehooks_default_destroy_impl(addr, size);
 	} else if (extent_hooks->destroy == NULL) {
-		return;
+		/* Do nothing. */
 	} else {
 		ehooks_pre_reentrancy(tsdn);
 		extent_hooks->destroy(extent_hooks, addr, size, committed,

From 6c3491ad3105994f8b804fc6ddb1aa88024a4d4b Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Sun, 23 Feb 2020 20:33:04 -0800
Subject: [PATCH 1565/2608] Tcache: Unify bin flush logic.

The small and large pathways share most of their logic, even if some of the
individual operations are different.  We pull out the common logic into a
force-inlined function, and then specialize twice, once for each value of
"small".
---
 .../internal/jemalloc_internal_decls.h        |   9 +
 src/tcache.c                                  | 310 +++++++++---------
 2 files changed, 172 insertions(+), 147 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
index 042a1fa4..32058ced 100644
--- a/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -92,4 +92,13 @@ isblank(int c) {
 #endif
 #include <fcntl.h>
 
+/*
+ * The Win32 midl compiler has #define small char; we don't use midl, but
+ * "small" is a nice identifier to have available when talking about size
+ * classes.
+ */
+#ifdef small
+#  undef small
+#endif
+
 #endif /* JEMALLOC_INTERNAL_H */
diff --git a/src/tcache.c b/src/tcache.c
index 782d8833..7ffa6fc3 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -139,21 +139,44 @@ tbin_edatas_lookup_size_check(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
 	}
 }
 
-void
-tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
-    szind_t binind, unsigned rem) {
-	assert(binind < SC_NBINS);
+JEMALLOC_ALWAYS_INLINE bool
+tcache_bin_flush_match(edata_t *edata, unsigned cur_arena_ind,
+    unsigned cur_binshard, bool small) {
+	if (small) {
+		return edata_arena_ind_get(edata) == cur_arena_ind
+		    && edata_binshard_get(edata) == cur_binshard;
+	} else {
+		return edata_arena_ind_get(edata) == cur_arena_ind;
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
+    szind_t binind, unsigned rem, bool small) {
+	/*
+	 * A couple lookup calls take tsdn; declare it once for convenience
+	 * instead of calling tsd_tsdn(tsd) all the time.
+	 */
+	tsdn_t *tsdn = tsd_tsdn(tsd);
+
+	if (small) {
+		assert(binind < SC_NBINS);
+	} else {
+		assert(binind < nhbins);
+	}
 	cache_bin_sz_t ncached = cache_bin_ncached_get(tbin, binind);
 	assert((cache_bin_sz_t)rem <= ncached);
+	arena_t *tcache_arena = tcache->arena;
+	assert(tcache_arena != NULL);
 
-	arena_t *arena = tcache->arena;
-	assert(arena != NULL);
 	unsigned nflush = ncached - rem;
-	/* Variable length array must have > 0 length. */
+	/*
+	 * Variable length array must have > 0 length; the last element is never
+	 * touched (it's just included to satisfy the no-zero-length rule).
+	 */
 	VARIABLE_ARRAY(edata_t *, item_edata, nflush + 1);
-
 	void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
-	tsdn_t *tsdn = tsd_tsdn(tsd);
+	
 	/* Look up edata once per item. */
 	if (config_opt_safety_checks) {
 		tbin_edatas_lookup_size_check(tsd, tbin, binind, nflush,
@@ -165,71 +188,154 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 		}
 	}
 
-	bool merged_stats = false;
+	/*
+	 * The slabs where we freed the last remaining object in the slab (and
+	 * so need to free the slab itself).
+	 * Used only if small == true.
+	 */
 	unsigned dalloc_count = 0;
 	VARIABLE_ARRAY(edata_t *, dalloc_slabs, nflush + 1);
-	while (nflush > 0) {
-		/* Lock the arena bin associated with the first object. */
-		edata_t *edata = item_edata[0];
-		unsigned bin_arena_ind = edata_arena_ind_get(edata);
-		arena_t *bin_arena = arena_get(tsdn, bin_arena_ind, false);
-		unsigned binshard = edata_binshard_get(edata);
-		assert(binshard < bin_infos[binind].n_shards);
-		bin_t *bin = &bin_arena->bins[binind].bin_shards[binshard];
 
-		malloc_mutex_lock(tsdn, &bin->lock);
-		if (config_stats && bin_arena == arena && !merged_stats) {
-			merged_stats = true;
-			bin->stats.nflushes++;
-			bin->stats.nrequests += tbin->tstats.nrequests;
-			tbin->tstats.nrequests = 0;
+	/*
+	 * We're about to grab a bunch of locks.  If one of them happens to be
+	 * the one guarding the arena-level stats counters we flush our
+	 * thread-local ones to, we do so under one critical section.
+	 */
+	bool merged_stats = false;
+	while (nflush > 0) {
+		/* Lock the arena, or bin, associated with the first object. */
+		edata_t *edata = item_edata[0];
+		unsigned cur_arena_ind = edata_arena_ind_get(edata);
+		arena_t *cur_arena = arena_get(tsdn, cur_arena_ind, false);
+
+		/*
+		 * These assignments are always overwritten when small is true,
+		 * and their values are always ignored when small is false, but
+		 * to avoid the technical UB when we pass them as parameters, we
+		 * need to intialize them.
+		 */
+		unsigned cur_binshard = 0;
+		bin_t *cur_bin = NULL;
+		if (small) {
+			cur_binshard = edata_binshard_get(edata);
+			cur_bin = &cur_arena->bins[binind].bin_shards[
+			    cur_binshard];
+			assert(cur_binshard < bin_infos[binind].n_shards);
 		}
+
+		if (small) {
+			malloc_mutex_lock(tsdn, &cur_bin->lock);
+		}
+		if (!small && !arena_is_auto(cur_arena)) {
+			malloc_mutex_lock(tsdn, &cur_arena->large_mtx);
+		}
+
+		/*
+		 * If we acquired the right lock and have some stats to flush,
+		 * flush them.
+		 */
+		if (config_stats && tcache_arena == cur_arena
+		    && !merged_stats) {
+			merged_stats = true;
+			if (small) {
+				cur_bin->stats.nflushes++;
+				cur_bin->stats.nrequests +=
+				    tbin->tstats.nrequests;
+				tbin->tstats.nrequests = 0;
+			} else {
+				arena_stats_large_flush_nrequests_add(tsdn,
+				    &tcache_arena->stats, binind,
+				    tbin->tstats.nrequests);
+				tbin->tstats.nrequests = 0;
+			}
+		}
+
+		/*
+		 * Large allocations need special prep done.  Afterwards, we can
+		 * drop the large lock.
+		 */
+		if (!small) {
+			for (unsigned i = 0; i < nflush; i++) {
+				void *ptr = *(bottom_item - i);
+				edata = item_edata[i];
+				assert(ptr != NULL && edata != NULL);
+
+				if (tcache_bin_flush_match(edata, cur_arena_ind,
+				    cur_binshard, small)) {
+					large_dalloc_prep_junked_locked(tsdn,
+					    edata);
+				}
+			}
+		}
+		if (!small && !arena_is_auto(cur_arena)) {
+			malloc_mutex_unlock(tsdn, &cur_arena->large_mtx);
+		}
+
+		/* Deallocate whatever we can. */
 		unsigned ndeferred = 0;
 		for (unsigned i = 0; i < nflush; i++) {
 			void *ptr = *(bottom_item - i);
 			edata = item_edata[i];
 			assert(ptr != NULL && edata != NULL);
-
-			if (edata_arena_ind_get(edata) == bin_arena_ind
-			    && edata_binshard_get(edata) == binshard) {
-				if (arena_dalloc_bin_junked_locked(tsdn,
-				    bin_arena, bin, binind, edata, ptr)) {
-					dalloc_slabs[dalloc_count++] = edata;
-				}
-			} else {
+			if (!tcache_bin_flush_match(edata, cur_arena_ind,
+			    cur_binshard, small)) {
 				/*
-				 * This object was allocated via a different
-				 * arena bin than the one that is currently
-				 * locked.  Stash the object, so that it can be
-				 * handled in a future pass.
+				 * The object was allocated either via a
+				 * different arena, or a different bin in this
+				 * arena.  Either way, stash the object so that
+				 * it can be handled in a future pass.
 				 */
 				*(bottom_item - ndeferred) = ptr;
 				item_edata[ndeferred] = edata;
 				ndeferred++;
+				continue;
+			}
+			if (small) {
+				if (arena_dalloc_bin_junked_locked(tsdn,
+				    cur_arena, cur_bin, binind, edata, ptr)) {
+					dalloc_slabs[dalloc_count] = edata;
+					dalloc_count++;
+				}
+			} else {
+				large_dalloc_finish(tsdn, edata);
 			}
 		}
-		malloc_mutex_unlock(tsdn, &bin->lock);
-		arena_decay_ticks(tsdn, bin_arena, nflush - ndeferred);
+
+		if (small) {
+			malloc_mutex_unlock(tsdn, &cur_bin->lock);
+		}
+		arena_decay_ticks(tsdn, cur_arena, nflush - ndeferred);
 		nflush = ndeferred;
 	}
+
 	/* Handle all deferred slab dalloc. */
+	assert(small || dalloc_count == 0);
 	for (unsigned i = 0; i < dalloc_count; i++) {
 		edata_t *slab = dalloc_slabs[i];
 		arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
+
 	}
 
 	if (config_stats && !merged_stats) {
-		/*
-		 * The flush loop didn't happen to flush to this thread's
-		 * arena, so the stats didn't get merged.  Manually do so now.
-		 */
-		unsigned binshard;
-		bin_t *bin = arena_bin_choose_lock(tsdn, arena, binind,
-		    &binshard);
-		bin->stats.nflushes++;
-		bin->stats.nrequests += tbin->tstats.nrequests;
-		tbin->tstats.nrequests = 0;
-		malloc_mutex_unlock(tsdn, &bin->lock);
+		if (small) {
+			/*
+			 * The flush loop didn't happen to flush to this
+			 * thread's arena, so the stats didn't get merged.
+			 * Manually do so now.
+			 */
+			unsigned binshard;
+			bin_t *bin = arena_bin_choose_lock(tsdn, tcache_arena,
+			    binind, &binshard);
+			bin->stats.nflushes++;
+			bin->stats.nrequests += tbin->tstats.nrequests;
+			tbin->tstats.nrequests = 0;
+			malloc_mutex_unlock(tsdn, &bin->lock);
+		} else {
+			arena_stats_large_flush_nrequests_add(tsdn,
+			    &tcache_arena->stats, binind,
+			    tbin->tstats.nrequests);
+			tbin->tstats.nrequests = 0;
+		}
 	}
 
 	memmove(tbin->cur_ptr.ptr + (ncached - rem), tbin->cur_ptr.ptr, rem *
@@ -241,105 +347,15 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 }
 
 void
-tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t binind,
-    unsigned rem) {
-	bool merged_stats = false;
+tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
+    szind_t binind, unsigned rem) {
+	tcache_bin_flush_impl(tsd, tcache, tbin, binind, rem, true);
+}
 
-	assert(binind < nhbins);
-	cache_bin_sz_t ncached = cache_bin_ncached_get(tbin, binind);
-	assert((cache_bin_sz_t)rem <= ncached);
-
-	arena_t *tcache_arena = tcache->arena;
-	assert(tcache_arena != NULL);
-	unsigned nflush = ncached - rem;
-	/* Variable length array must have > 0 length. */
-	VARIABLE_ARRAY(edata_t *, item_edata, nflush + 1);
-
-	void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
-#ifndef JEMALLOC_EXTRA_SIZE_CHECK
-	/* Look up edata once per item. */
-	for (unsigned i = 0 ; i < nflush; i++) {
-		item_edata[i] = emap_edata_lookup(tsd_tsdn(tsd), &emap_global,
-		    *(bottom_item - i));
-	}
-#else
-	tbin_extents_lookup_size_check(tsd_tsdn(tsd), tbin, binind, nflush,
-	    item_edata);
-#endif
-	while (nflush > 0) {
-		/* Lock the arena associated with the first object. */
-		edata_t *edata = item_edata[0];
-		unsigned locked_arena_ind = edata_arena_ind_get(edata);
-		arena_t *locked_arena = arena_get(tsd_tsdn(tsd),
-		    locked_arena_ind, false);
-
-		bool lock_large = !arena_is_auto(locked_arena);
-		if (lock_large) {
-			malloc_mutex_lock(tsd_tsdn(tsd), &locked_arena->large_mtx);
-		}
-		for (unsigned i = 0; i < nflush; i++) {
-			void *ptr = *(bottom_item - i);
-			assert(ptr != NULL);
-			edata = item_edata[i];
-			if (edata_arena_ind_get(edata) == locked_arena_ind) {
-				large_dalloc_prep_junked_locked(tsd_tsdn(tsd),
-				    edata);
-			}
-		}
-		if ((config_prof || config_stats) &&
-		    (locked_arena == tcache_arena)) {
-			if (config_stats) {
-				merged_stats = true;
-				arena_stats_large_flush_nrequests_add(
-				    tsd_tsdn(tsd), &tcache_arena->stats, binind,
-				    tbin->tstats.nrequests);
-				tbin->tstats.nrequests = 0;
-			}
-		}
-		if (lock_large) {
-			malloc_mutex_unlock(tsd_tsdn(tsd), &locked_arena->large_mtx);
-		}
-
-		unsigned ndeferred = 0;
-		for (unsigned i = 0; i < nflush; i++) {
-			void *ptr = *(bottom_item - i);
-			edata = item_edata[i];
-			assert(ptr != NULL && edata != NULL);
-
-			if (edata_arena_ind_get(edata) == locked_arena_ind) {
-				large_dalloc_finish(tsd_tsdn(tsd), edata);
-			} else {
-				/*
-				 * This object was allocated via a different
-				 * arena than the one that is currently locked.
-				 * Stash the object, so that it can be handled
-				 * in a future pass.
-				 */
-				*(bottom_item - ndeferred) = ptr;
-				item_edata[ndeferred] = edata;
-				ndeferred++;
-			}
-		}
-		arena_decay_ticks(tsd_tsdn(tsd), locked_arena, nflush -
-		    ndeferred);
-		nflush = ndeferred;
-	}
-	if (config_stats && !merged_stats) {
-		/*
-		 * The flush loop didn't happen to flush to this thread's
-		 * arena, so the stats didn't get merged.  Manually do so now.
-		 */
-		arena_stats_large_flush_nrequests_add(tsd_tsdn(tsd),
-		    &tcache_arena->stats, binind, tbin->tstats.nrequests);
-		tbin->tstats.nrequests = 0;
-	}
-
-	memmove(tbin->cur_ptr.ptr + (ncached - rem), tbin->cur_ptr.ptr, rem *
-	    sizeof(void *));
-	cache_bin_ncached_set(tbin, binind, rem);
-	if (tbin->cur_ptr.lowbits > tbin->low_water_position) {
-		tbin->low_water_position = tbin->cur_ptr.lowbits;
-	}
+void
+tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
+    szind_t binind, unsigned rem) {
+	tcache_bin_flush_impl(tsd, tcache, tbin, binind, rem, false);
 }
 
 void

From 305b1f6d962c5b5a76b7ddb4b55b14d88bada9ba Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 4 Mar 2020 10:27:30 -0800
Subject: [PATCH 1566/2608] Correction on geometric sampling

---
 src/prof.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/prof.c b/src/prof.c
index 761cb95d..82f88a21 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -468,9 +468,16 @@ prof_sample_threshold_update(tsd_t *tsd) {
 	 *   Springer-Verlag, New York, 1986
 	 *   pp 500
 	 *   (http://luc.devroye.org/rnbookindex.html)
+	 *
+	 * In the actual computation, there's a non-zero probability that our
+	 * pseudo random number generator generates an exact 0, and to avoid
+	 * log(0), we set u to 1.0 in case r is 0.  Therefore u effectively is
+	 * uniformly distributed in (0, 1] instead of [0, 1).  Further, rather
+	 * than taking the ceiling, we take the floor and then add 1, since
+	 * otherwise bytes_until_sample would be 0 if u is exactly 1.0.
 	 */
 	uint64_t r = prng_lg_range_u64(tsd_prng_statep_get(tsd), 53);
-	double u = (double)r * (1.0/9007199254740992.0L);
+	double u = (r == 0U) ? 1.0 : (double)r * (1.0/9007199254740992.0L);
 	uint64_t bytes_until_sample = (uint64_t)(log(u) /
 	    log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
 	    + (uint64_t)1U;

From 4a78c6d81b3f431070f362c29ab7b492ee0b9e70 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 6 Mar 2020 15:31:40 -0800
Subject: [PATCH 1567/2608] Correct thread event unit test

---
 test/unit/thread_event.c | 32 ++++++++------------------------
 1 file changed, 8 insertions(+), 24 deletions(-)

diff --git a/test/unit/thread_event.c b/test/unit/thread_event.c
index 0855829c..5501fa3a 100644
--- a/test/unit/thread_event.c
+++ b/test/unit/thread_event.c
@@ -1,6 +1,6 @@
 #include "test/jemalloc_test.h"
 
-TEST_BEGIN(test_next_event_fast_roll_back) {
+TEST_BEGIN(test_next_event_fast) {
 	tsd_t *tsd = tsd_fetch();
 	te_ctx_t ctx;
 	te_ctx_get(tsd, &ctx, true);
@@ -14,31 +14,16 @@ TEST_BEGIN(test_next_event_fast_roll_back) {
 	}
 	ITERATE_OVER_ALL_EVENTS
 #undef E
+
+	/* Test next_event_fast rolling back to 0. */
 	void *p = malloc(16U);
 	assert_ptr_not_null(p, "malloc() failed");
 	free(p);
-}
-TEST_END
 
-TEST_BEGIN(test_next_event_fast_resume) {
-	tsd_t *tsd = tsd_fetch();
-
-	te_ctx_t ctx;
-	te_ctx_get(tsd, &ctx, true);
-
-	te_ctx_last_event_set(&ctx, 0);
-	te_ctx_current_bytes_set(&ctx, TE_NEXT_EVENT_FAST_MAX + 8U);
-	te_ctx_next_event_set(tsd, &ctx, TE_NEXT_EVENT_FAST_MAX + 16U);
-#define E(event, condition, is_alloc)					\
-	if (is_alloc && condition) {					\
-		event##_event_wait_set(tsd,				\
-		    TE_NEXT_EVENT_FAST_MAX + 16U);			\
-	}
-	ITERATE_OVER_ALL_EVENTS
-#undef E
-	void *p = malloc(SC_LOOKUP_MAXCLASS);
-	assert_ptr_not_null(p, "malloc() failed");
-	free(p);
+	/* Test next_event_fast resuming to be equal to next_event. */
+	void *q = malloc(SC_LOOKUP_MAXCLASS);
+	assert_ptr_not_null(q, "malloc() failed");
+	free(q);
 }
 TEST_END
 
@@ -60,7 +45,6 @@ TEST_END
 int
 main(void) {
 	return test(
-	    test_next_event_fast_roll_back,
-	    test_next_event_fast_resume,
+	    test_next_event_fast,
 	    test_event_rollback);
 }

From 22657a5e65953c25531caf155d52ed43eb0c653f Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sun, 1 Mar 2020 09:36:09 -0800
Subject: [PATCH 1568/2608] Extents: Silence the "potentially unused" warning.

---
 src/extent.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index d06b8d68..87dcec3f 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -534,8 +534,8 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     bool slab, szind_t szind, edata_t *edata, bool growing_retained) {
 	edata_t *lead;
 	edata_t *trail;
-	edata_t *to_leak;
-	edata_t *to_salvage;
+	edata_t *to_leak JEMALLOC_CC_SILENCE_INIT(NULL);
+	edata_t *to_salvage JEMALLOC_CC_SILENCE_INIT(NULL);
 
 	extent_split_interior_result_t result = extent_split_interior(
 	    tsdn, arena, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage,
@@ -711,8 +711,8 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 	edata_t *lead;
 	edata_t *trail;
-	edata_t *to_leak;
-	edata_t *to_salvage;
+	edata_t *to_leak JEMALLOC_CC_SILENCE_INIT(NULL);
+	edata_t *to_salvage JEMALLOC_CC_SILENCE_INIT(NULL);
 
 	extent_split_interior_result_t result = extent_split_interior(tsdn,
 	    arena, ehooks, &edata, &lead, &trail, &to_leak,

From b428dceeaf87fb35a16c2337ac13105f7d18dfd3 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sat, 29 Feb 2020 16:53:00 -0800
Subject: [PATCH 1569/2608] Config: Warn on void * pointer arithmetic.

This is handy while developing, but not portable.
---
 configure.ac | 1 +
 1 file changed, 1 insertion(+)

diff --git a/configure.ac b/configure.ac
index 6ccd009a..324656b9 100644
--- a/configure.ac
+++ b/configure.ac
@@ -250,6 +250,7 @@ if test "x$GCC" = "xyes" ; then
   JE_CFLAGS_ADD([-Wsign-compare])
   JE_CFLAGS_ADD([-Wundef])
   JE_CFLAGS_ADD([-Wno-format-zero-length])
+  JE_CFLAGS_ADD([-Wpointer-arith])
   dnl This warning triggers on the use of the universal zero initializer, which
   dnl is a very handy idiom for things like the tcache static initializer (which
   dnl has lots of nested structs).  See the discussion at.

From 79f1ee2fc0163d3666f38cfc59f8c1a8ab07f056 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 28 Feb 2020 11:37:39 -0800
Subject: [PATCH 1570/2608] Move junking out of arena/tcache code.

This is debug only and we keep it off the fast path.  Moving it here simplifies
the internal logic.

This never tries to junk on regions that were shrunk via xallocx.  I think this
is fine for two reasons:
- The shrunk-with-xallocx case is rare.
- We don't always do that anyway before this diff (it depends on the opt
  settings and extent hooks in effect).
---
 include/jemalloc/internal/arena_externs.h     |   9 +-
 .../internal/jemalloc_internal_externs.h      |   2 +
 include/jemalloc/internal/large_externs.h     |   8 +-
 include/jemalloc/internal/tcache_inlines.h    |  35 +--
 src/arena.c                                   |  46 +--
 src/jemalloc.c                                |  62 ++++
 src/large.c                                   |  53 +---
 src/tcache.c                                  |   8 +-
 test/unit/junk.c                              | 274 +++++++++++-------
 9 files changed, 249 insertions(+), 248 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 1b92766d..4ef8d8e8 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -50,11 +50,6 @@ void arena_reset(tsd_t *tsd, arena_t *arena);
 void arena_destroy(tsd_t *tsd, arena_t *arena);
 void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     cache_bin_t *tbin, szind_t binind);
-void arena_alloc_junk_small(void *ptr, const bin_info_t *bin_info,
-    bool zero);
-
-typedef void (arena_dalloc_junk_small_t)(void *, const bin_info_t *);
-extern arena_dalloc_junk_small_t *JET_MUTABLE arena_dalloc_junk_small;
 
 void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
     szind_t ind, bool zero);
@@ -63,9 +58,9 @@ void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
 void arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize);
 void arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
     bool slow_path);
-bool arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    szind_t binind, edata_t *edata, void *ptr);
 void arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab);
+bool arena_dalloc_bin_locked(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
+    szind_t binind, edata_t *edata, void *ptr);
 void arena_dalloc_small(tsdn_t *tsdn, void *ptr);
 bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t extra, bool zero, size_t *newsize);
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index e9dbde80..338a590f 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -14,6 +14,8 @@ extern bool opt_confirm_conf;
 extern const char *opt_junk;
 extern bool opt_junk_alloc;
 extern bool opt_junk_free;
+extern void (*junk_free_callback)(void *ptr, size_t size);
+extern void (*junk_alloc_callback)(void *ptr, size_t size);
 extern bool opt_utrace;
 extern bool opt_xmalloc;
 extern bool opt_zero;
diff --git a/include/jemalloc/internal/large_externs.h b/include/jemalloc/internal/large_externs.h
index 05e6c442..27979648 100644
--- a/include/jemalloc/internal/large_externs.h
+++ b/include/jemalloc/internal/large_externs.h
@@ -12,13 +12,7 @@ void *large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache,
     hook_ralloc_args_t *hook_args);
 
-typedef void (large_dalloc_junk_t)(void *, size_t);
-extern large_dalloc_junk_t *JET_MUTABLE large_dalloc_junk;
-
-typedef void (large_dalloc_maybe_junk_t)(void *, size_t);
-extern large_dalloc_maybe_junk_t *JET_MUTABLE large_dalloc_maybe_junk;
-
-void large_dalloc_prep_junked_locked(tsdn_t *tsdn, edata_t *edata);
+void large_dalloc_prep_locked(tsdn_t *tsdn, edata_t *edata);
 void large_dalloc_finish(tsdn_t *tsdn, edata_t *edata);
 void large_dalloc(tsdn_t *tsdn, edata_t *edata);
 size_t large_salloc(tsdn_t *tsdn, const edata_t *edata);
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index d356181c..ff06935d 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -61,23 +61,9 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
 		usize = sz_index2size(binind);
 		assert(tcache_salloc(tsd_tsdn(tsd), ret) == usize);
 	}
-
-	if (likely(!zero)) {
-		if (slow_path && config_fill) {
-			if (unlikely(opt_junk_alloc)) {
-				arena_alloc_junk_small(ret, &bin_infos[binind],
-				    false);
-			} else if (unlikely(opt_zero)) {
-				memset(ret, 0, usize);
-			}
-		}
-	} else {
-		if (slow_path && config_fill && unlikely(opt_junk_alloc)) {
-			arena_alloc_junk_small(ret, &bin_infos[binind], true);
-		}
+	if (unlikely(zero)) {
 		memset(ret, 0, usize);
 	}
-
 	if (config_stats) {
 		bin->tstats.nrequests++;
 	}
@@ -119,16 +105,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 			assert(usize <= tcache_maxclass);
 		}
 
-		if (likely(!zero)) {
-			if (slow_path && config_fill) {
-				if (unlikely(opt_junk_alloc)) {
-					memset(ret, JEMALLOC_ALLOC_JUNK,
-					    usize);
-				} else if (unlikely(opt_zero)) {
-					memset(ret, 0, usize);
-				}
-			}
-		} else {
+		if (unlikely(zero)) {
 			memset(ret, 0, usize);
 		}
 
@@ -148,10 +125,6 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr)
 	    <= SC_SMALL_MAXCLASS);
 
-	if (slow_path && config_fill && unlikely(opt_junk_free)) {
-		arena_dalloc_junk_small(ptr, &bin_infos[binind]);
-	}
-
 	bin = tcache_small_bin_get(tcache, binind);
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
 		unsigned remain = cache_bin_ncached_max_get(binind) >> 1;
@@ -170,10 +143,6 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	    > SC_SMALL_MAXCLASS);
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_maxclass);
 
-	if (slow_path && config_fill && unlikely(opt_junk_free)) {
-		large_dalloc_junk(ptr, sz_index2size(binind));
-	}
-
 	bin = tcache_large_bin_get(tcache, binind);
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
 		unsigned remain = cache_bin_ncached_max_get(binind) >> 1;
diff --git a/src/arena.c b/src/arena.c
index aa19e092..0a9e4a98 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1446,30 +1446,10 @@ label_refill:
 		fresh_slab = NULL;
 	}
 
-	if (config_fill && unlikely(opt_junk_alloc)) {
-		for (unsigned i = 0; i < filled; i++) {
-			void *ptr = *(empty_position - nfill + filled + i);
-			arena_alloc_junk_small(ptr, bin_info, true);
-		}
-	}
 	cache_bin_ncached_set(tbin, binind, filled);
 	arena_decay_tick(tsdn, arena);
 }
 
-void
-arena_alloc_junk_small(void *ptr, const bin_info_t *bin_info, bool zero) {
-	if (!zero) {
-		memset(ptr, JEMALLOC_ALLOC_JUNK, bin_info->reg_size);
-	}
-}
-
-static void
-arena_dalloc_junk_small_impl(void *ptr, const bin_info_t *bin_info) {
-	memset(ptr, JEMALLOC_FREE_JUNK, bin_info->reg_size);
-}
-arena_dalloc_junk_small_t *JET_MUTABLE arena_dalloc_junk_small =
-    arena_dalloc_junk_small_impl;
-
 /*
  * Without allocating a new slab, try arena_slab_reg_alloc() and re-fill
  * bin->slabcur if necessary.
@@ -1528,18 +1508,7 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
 	if (fresh_slab != NULL) {
 		arena_slab_dalloc(tsdn, arena, fresh_slab);
 	}
-	if (!zero) {
-		if (config_fill) {
-			if (unlikely(opt_junk_alloc)) {
-				arena_alloc_junk_small(ret, bin_info, false);
-			} else if (unlikely(opt_zero)) {
-				memset(ret, 0, usize);
-			}
-		}
-	} else {
-		if (config_fill && unlikely(opt_junk_alloc)) {
-			arena_alloc_junk_small(ret, bin_info, true);
-		}
+	if (zero) {
 		memset(ret, 0, usize);
 	}
 	arena_decay_tick(tsdn, arena);
@@ -1706,11 +1675,8 @@ arena_dalloc_bin_slab_prepare(tsdn_t *tsdn, edata_t *slab, bin_t *bin) {
 /* Returns true if arena_slab_dalloc must be called on slab */
 static bool
 arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    szind_t binind, edata_t *slab, void *ptr, bool junked) {
+    szind_t binind, edata_t *slab, void *ptr) {
 	const bin_info_t *bin_info = &bin_infos[binind];
-	if (!junked && config_fill && unlikely(opt_junk_free)) {
-		arena_dalloc_junk_small(ptr, bin_info);
-	}
 	arena_slab_reg_dalloc(slab, edata_slab_data_get(slab), ptr);
 
 	bool ret = false;
@@ -1733,10 +1699,10 @@ arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 }
 
 bool
-arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    szind_t binind, edata_t *edata, void *ptr) {
+arena_dalloc_bin_locked(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
+szind_t binind, edata_t *edata, void *ptr) {
 	return arena_dalloc_bin_locked_impl(tsdn, arena, bin, binind, edata,
-	    ptr, true);
+	    ptr);
 }
 
 static void
@@ -1747,7 +1713,7 @@ arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, edata_t *edata, void *ptr) {
 
 	malloc_mutex_lock(tsdn, &bin->lock);
 	bool ret = arena_dalloc_bin_locked_impl(tsdn, arena, bin, binind, edata,
-	    ptr, false);
+	    ptr);
 	malloc_mutex_unlock(tsdn, &bin->lock);
 
 	if (ret) {
diff --git a/src/jemalloc.c b/src/jemalloc.c
index b29ae47e..12b4f6c3 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -81,6 +81,24 @@ const char *zero_realloc_mode_names[] = {
 	"abort",
 };
 
+/*
+ * These are the documented values for junk fill debugging facilities -- see the
+ * man page.
+ */
+static const uint8_t junk_alloc_byte = 0xa5;
+static const uint8_t junk_free_byte = 0x5a;
+
+static void default_junk_alloc(void *ptr, size_t usize) {
+	memset(ptr, junk_alloc_byte, usize);
+}
+
+static void default_junk_free(void *ptr, size_t usize) {
+	memset(ptr, junk_free_byte, usize);
+}
+
+void (*junk_alloc_callback)(void *ptr, size_t size) = &default_junk_alloc;
+void (*junk_free_callback)(void *ptr, size_t size) = &default_junk_free;
+
 bool	opt_utrace = false;
 bool	opt_xmalloc = false;
 bool	opt_zero = false;
@@ -2210,6 +2228,14 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 
 	assert(usize == isalloc(tsd_tsdn(tsd), allocation));
 
+	if (config_fill && sopts->slow && !dopts->zero) {
+		if (unlikely(opt_junk_alloc)) {
+			junk_alloc_callback(allocation, usize);
+		} else if (unlikely(opt_zero)) {
+			memset(allocation, 0, usize);
+		}
+	}
+
 	if (sopts->slow) {
 		UTRACE(0, size, allocation);
 	}
@@ -2582,6 +2608,9 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 		idalloctm(tsd_tsdn(tsd), ptr, tcache, &alloc_ctx, false,
 		    false);
 	} else {
+		if (config_fill && slow_path && opt_junk_free) {
+			junk_free_callback(ptr, usize);
+		}
 		idalloctm(tsd_tsdn(tsd), ptr, tcache, &alloc_ctx, false,
 		    true);
 	}
@@ -2648,6 +2677,9 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, &alloc_ctx,
 		    false);
 	} else {
+		if (config_fill && slow_path && opt_junk_free) {
+			junk_free_callback(ptr, usize);
+		}
 		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, &alloc_ctx,
 		    true);
 	}
@@ -2745,6 +2777,14 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 
 	tcache_t *tcache = tsd_tcachep_get(tsd);
 	cache_bin_t *bin = tcache_small_bin_get(tcache, alloc_ctx.szind);
+
+	/*
+	 * If junking were enabled, this is where we would do it.  It's not
+	 * though, since we ensured above that we're on the fast path.  Assert
+	 * that to double-check.
+	 */
+	assert(!opt_junk_free);
+
 	if (!cache_bin_dalloc_easy(bin, ptr)) {
 		return false;
 	}
@@ -3180,6 +3220,16 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 	UTRACE(ptr, size, p);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
+	if (config_fill && malloc_slow && !zero && usize > old_usize) {
+		size_t excess_len = usize - old_usize;
+		void *excess_start = (void *)((uintptr_t)p + old_usize);
+		if (unlikely(opt_junk_alloc)) {
+			junk_alloc_callback(excess_start, excess_len);
+		} else if (unlikely(opt_zero)) {
+			memset(excess_start, 0, excess_len);
+		}
+	}
+
 	return p;
 label_oom:
 	if (config_xmalloc && unlikely(opt_xmalloc)) {
@@ -3465,6 +3515,18 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 		goto label_not_resized;
 	}
 	thread_dalloc_event(tsd, old_usize);
+
+	if (config_fill && malloc_slow) {
+		if (usize > old_usize && !zero) {
+			size_t excess_len = usize - old_usize;
+			void *excess_start = (void *)((uintptr_t)ptr + old_usize);
+			if (unlikely(opt_junk_alloc)) {
+				junk_alloc_callback(excess_start, excess_len);
+			} else if (unlikely(opt_zero)) {
+				memset(excess_start, 0, excess_len);
+			}
+		}
+	}
 label_not_resized:
 	if (unlikely(!tsd_fast(tsd))) {
 		uintptr_t args[4] = {(uintptr_t)ptr, size, extra, flags};
diff --git a/src/large.c b/src/large.c
index f13b1e5e..babb3071 100644
--- a/src/large.c
+++ b/src/large.c
@@ -38,8 +38,8 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	}
 	/*
 	 * Copy zero into is_zeroed and pass the copy when allocating the
-	 * extent, so that it is possible to make correct junk/zero fill
-	 * decisions below, even if is_zeroed ends up true when zero is false.
+	 * extent, so that it is possible to make correct zero fill decisions
+	 * below, even if is_zeroed ends up true when zero is false.
 	 */
 	is_zeroed = zero;
 	if (likely(!tsdn_null(tsdn))) {
@@ -60,36 +60,12 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 
 	if (zero) {
 		assert(is_zeroed);
-	} else if (config_fill && unlikely(opt_junk_alloc)) {
-		memset(edata_addr_get(edata), JEMALLOC_ALLOC_JUNK,
-		    edata_usize_get(edata));
 	}
 
 	arena_decay_tick(tsdn, arena);
 	return edata_addr_get(edata);
 }
 
-static void
-large_dalloc_junk_impl(void *ptr, size_t size) {
-	memset(ptr, JEMALLOC_FREE_JUNK, size);
-}
-large_dalloc_junk_t *JET_MUTABLE large_dalloc_junk = large_dalloc_junk_impl;
-
-static void
-large_dalloc_maybe_junk_impl(void *ptr, size_t size) {
-	if (config_fill && have_dss && unlikely(opt_junk_free)) {
-		/*
-		 * Only bother junk filling if the extent isn't about to be
-		 * unmapped.
-		 */
-		if (opt_retain || (have_dss && extent_in_dss(ptr))) {
-			large_dalloc_junk(ptr, size);
-		}
-	}
-}
-large_dalloc_maybe_junk_t *JET_MUTABLE large_dalloc_maybe_junk =
-    large_dalloc_maybe_junk_impl;
-
 static bool
 large_ralloc_no_move_shrink(tsdn_t *tsdn, edata_t *edata, size_t usize) {
 	arena_t *arena = arena_get_from_edata(edata);
@@ -112,11 +88,6 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, edata_t *edata, size_t usize) {
 			return true;
 		}
 
-		if (config_fill && unlikely(opt_junk_free)) {
-			large_dalloc_maybe_junk(edata_addr_get(trail),
-			    edata_size_get(trail));
-		}
-
 		arena_extents_dirty_dalloc(tsdn, arena, ehooks, trail);
 	}
 
@@ -142,9 +113,8 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 	}
 	/*
 	 * Copy zero into is_zeroed_trail and pass the copy when allocating the
-	 * extent, so that it is possible to make correct junk/zero fill
-	 * decisions below, even if is_zeroed_trail ends up true when zero is
-	 * false.
+	 * extent, so that it is possible to make correct zero fill decisions
+	 * below, even if is_zeroed_trail ends up true when zero is false.
 	 */
 	bool is_zeroed_trail = zero;
 	edata_t *trail;
@@ -201,11 +171,7 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 			memset(zbase, 0, nzero);
 		}
 		assert(is_zeroed_trail);
-	} else if (config_fill && unlikely(opt_junk_alloc)) {
-		memset((void *)((uintptr_t)edata_addr_get(edata) + oldusize),
-		    JEMALLOC_ALLOC_JUNK, usize - oldusize);
 	}
-
 	arena_extent_ralloc_large_expand(tsdn, arena, edata, oldusize);
 
 	return false;
@@ -310,21 +276,18 @@ large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
 }
 
 /*
- * junked_locked indicates whether the extent's data have been junk-filled, and
- * whether the arena's large_mtx is currently held.
+ * locked indicates whether the arena's large_mtx is currently held.
  */
 static void
 large_dalloc_prep_impl(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
-    bool junked_locked) {
-	if (!junked_locked) {
+    bool locked) {
+	if (!locked) {
 		/* See comments in arena_bin_slabs_full_insert(). */
 		if (!arena_is_auto(arena)) {
 			malloc_mutex_lock(tsdn, &arena->large_mtx);
 			edata_list_remove(&arena->large, edata);
 			malloc_mutex_unlock(tsdn, &arena->large_mtx);
 		}
-		large_dalloc_maybe_junk(edata_addr_get(edata),
-		    edata_usize_get(edata));
 	} else {
 		/* Only hold the large_mtx if necessary. */
 		if (!arena_is_auto(arena)) {
@@ -342,7 +305,7 @@ large_dalloc_finish_impl(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
 }
 
 void
-large_dalloc_prep_junked_locked(tsdn_t *tsdn, edata_t *edata) {
+large_dalloc_prep_locked(tsdn_t *tsdn, edata_t *edata) {
 	large_dalloc_prep_impl(tsdn, arena_get_from_edata(edata), edata, true);
 }
 
diff --git a/src/tcache.c b/src/tcache.c
index 7ffa6fc3..c736f565 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -176,7 +176,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	 */
 	VARIABLE_ARRAY(edata_t *, item_edata, nflush + 1);
 	void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
-	
+
 	/* Look up edata once per item. */
 	if (config_opt_safety_checks) {
 		tbin_edatas_lookup_size_check(tsd, tbin, binind, nflush,
@@ -262,7 +262,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 
 				if (tcache_bin_flush_match(edata, cur_arena_ind,
 				    cur_binshard, small)) {
-					large_dalloc_prep_junked_locked(tsdn,
+					large_dalloc_prep_locked(tsdn,
 					    edata);
 				}
 			}
@@ -291,8 +291,8 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 				continue;
 			}
 			if (small) {
-				if (arena_dalloc_bin_junked_locked(tsdn,
-				    cur_arena, cur_bin, binind, edata, ptr)) {
+				if (arena_dalloc_bin_locked(tsdn, cur_arena,
+				    cur_bin, binind, edata, ptr)) {
 					dalloc_slabs[dalloc_count] = edata;
 					dalloc_count++;
 				}
diff --git a/test/unit/junk.c b/test/unit/junk.c
index 772a0b4e..5a74c3d7 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -1,141 +1,191 @@
 #include "test/jemalloc_test.h"
 
-#include "jemalloc/internal/util.h"
-
-static arena_dalloc_junk_small_t *arena_dalloc_junk_small_orig;
-static large_dalloc_junk_t *large_dalloc_junk_orig;
-static large_dalloc_maybe_junk_t *large_dalloc_maybe_junk_orig;
-static void *watch_for_junking;
-static bool saw_junking;
+#define arraylen(arr) (sizeof(arr)/sizeof(arr[0]))
+static size_t ptr_ind;
+static void *volatile ptrs[100];
+static void *last_junked_ptr;
+static size_t last_junked_usize;
 
 static void
-watch_junking(void *p) {
-	watch_for_junking = p;
-	saw_junking = false;
+reset() {
+	ptr_ind = 0;
+	last_junked_ptr = NULL;
+	last_junked_usize = 0;
 }
 
 static void
-arena_dalloc_junk_small_intercept(void *ptr, const bin_info_t *bin_info) {
-	size_t i;
-
-	arena_dalloc_junk_small_orig(ptr, bin_info);
-	for (i = 0; i < bin_info->reg_size; i++) {
-		expect_u_eq(((uint8_t *)ptr)[i], JEMALLOC_FREE_JUNK,
-		    "Missing junk fill for byte %zu/%zu of deallocated region",
-		    i, bin_info->reg_size);
-	}
-	if (ptr == watch_for_junking) {
-		saw_junking = true;
-	}
+test_junk(void *ptr, size_t usize) {
+	last_junked_ptr = ptr;
+	last_junked_usize = usize;
 }
 
 static void
-large_dalloc_junk_intercept(void *ptr, size_t usize) {
-	size_t i;
-
-	large_dalloc_junk_orig(ptr, usize);
-	for (i = 0; i < usize; i++) {
-		expect_u_eq(((uint8_t *)ptr)[i], JEMALLOC_FREE_JUNK,
-		    "Missing junk fill for byte %zu/%zu of deallocated region",
-		    i, usize);
+do_allocs(size_t size, bool zero, size_t lg_align) {
+#define JUNK_ALLOC(...)							\
+	do {								\
+		assert(ptr_ind + 1 < arraylen(ptrs));			\
+		void *ptr = __VA_ARGS__;				\
+		assert_ptr_not_null(ptr, "");				\
+		ptrs[ptr_ind++] = ptr;					\
+		if (opt_junk_alloc && !zero) {				\
+			expect_ptr_eq(ptr, last_junked_ptr, "");	\
+			expect_zu_eq(last_junked_usize,			\
+			    malloc_usable_size(ptr), "");		\
+		}							\
+	} while (0)
+	if (!zero && lg_align == 0) {
+		JUNK_ALLOC(malloc(size));
 	}
-	if (ptr == watch_for_junking) {
-		saw_junking = true;
+	if (!zero) {
+		JUNK_ALLOC(aligned_alloc(1 << lg_align, size));
+	}
+#ifdef JEMALLOC_OVERRIDE_MEMALIGN
+	if (!zero) {
+		JUNK_ALLOC(je_memalign(1 << lg_align, size));
+	}
+#endif
+#ifdef JEMALLOC_OVERRIDE_VALLOC
+	if (!zero && lg_align == LG_PAGE) {
+		JUNK_ALLOC(je_valloc(size));
+	}
+#endif
+	int zero_flag = zero ? MALLOCX_ZERO : 0;
+	JUNK_ALLOC(mallocx(size, zero_flag | MALLOCX_LG_ALIGN(lg_align)));
+	JUNK_ALLOC(mallocx(size, zero_flag | MALLOCX_LG_ALIGN(lg_align)
+	    | MALLOCX_TCACHE_NONE));
+	if (lg_align >= LG_SIZEOF_PTR) {
+		void *memalign_result;
+		int err = posix_memalign(&memalign_result, (1 << lg_align),
+		    size);
+		assert_d_eq(err, 0, "");
+		JUNK_ALLOC(memalign_result);
 	}
 }
 
-static void
-large_dalloc_maybe_junk_intercept(void *ptr, size_t usize) {
-	large_dalloc_maybe_junk_orig(ptr, usize);
-	if (ptr == watch_for_junking) {
-		saw_junking = true;
-	}
-}
+TEST_BEGIN(test_junk_alloc_free) {
+	bool zerovals[] = {false, true};
+	size_t sizevals[] = {
+		1, 8, 100, 1000, 100*1000
+	/*
+	 * Memory allocation failure is a real possibility in 32-bit mode.
+	 * Rather than try to check in the face of resource exhaustion, we just
+	 * rely more on the 64-bit tests.  This is a little bit white-box-y in
+	 * the sense that this is only a good test strategy if we know that the
+	 * junk pathways don't touch interact with the allocation selection
+	 * mechanisms; but this is in fact the case.
+	 */
+#if LG_SIZEOF_PTR == 3
+		    , 10 * 1000 * 1000
+#endif
+	};
+	size_t lg_alignvals[] = {
+		0, 4, 10, 15, 16, LG_PAGE
+#if LG_SIZEOF_PTR == 3
+		    , 20, 24
+#endif
+	};
 
-static void
-test_junk(size_t sz_min, size_t sz_max) {
-	uint8_t *s;
-	size_t sz_prev, sz, i;
+#define JUNK_FREE(...)							\
+	do {								\
+		do_allocs(size, zero, lg_align);			\
+		for (size_t n = 0; n < ptr_ind; n++) {			\
+			void *ptr = ptrs[n];				\
+			__VA_ARGS__;					\
+			if (opt_junk_free) {				\
+				assert_ptr_eq(ptr, last_junked_ptr,	\
+				    "");				\
+				assert_zu_eq(usize, last_junked_usize,	\
+				    "");				\
+			}						\
+			reset();					\
+		}							\
+	} while (0)
+	for (size_t i = 0; i < arraylen(zerovals); i++) {
+		for (size_t j = 0; j < arraylen(sizevals); j++) {
+			for (size_t k = 0; k < arraylen(lg_alignvals); k++) {
+				bool zero = zerovals[i];
+				size_t size = sizevals[j];
+				size_t lg_align = lg_alignvals[k];
+				size_t usize = nallocx(size,
+				    MALLOCX_LG_ALIGN(lg_align));
 
-	if (opt_junk_free) {
-		arena_dalloc_junk_small_orig = arena_dalloc_junk_small;
-		arena_dalloc_junk_small = arena_dalloc_junk_small_intercept;
-		large_dalloc_junk_orig = large_dalloc_junk;
-		large_dalloc_junk = large_dalloc_junk_intercept;
-		large_dalloc_maybe_junk_orig = large_dalloc_maybe_junk;
-		large_dalloc_maybe_junk = large_dalloc_maybe_junk_intercept;
-	}
-
-	sz_prev = 0;
-	s = (uint8_t *)mallocx(sz_min, 0);
-	expect_ptr_not_null((void *)s, "Unexpected mallocx() failure");
-
-	for (sz = sallocx(s, 0); sz <= sz_max;
-	    sz_prev = sz, sz = sallocx(s, 0)) {
-		if (sz_prev > 0) {
-			expect_u_eq(s[0], 'a',
-			    "Previously allocated byte %zu/%zu is corrupted",
-			    ZU(0), sz_prev);
-			expect_u_eq(s[sz_prev-1], 'a',
-			    "Previously allocated byte %zu/%zu is corrupted",
-			    sz_prev-1, sz_prev);
-		}
-
-		for (i = sz_prev; i < sz; i++) {
-			if (opt_junk_alloc) {
-				expect_u_eq(s[i], JEMALLOC_ALLOC_JUNK,
-				    "Newly allocated byte %zu/%zu isn't "
-				    "junk-filled", i, sz);
+				JUNK_FREE(free(ptr));
+				JUNK_FREE(dallocx(ptr, 0));
+				JUNK_FREE(dallocx(ptr, MALLOCX_TCACHE_NONE));
+				JUNK_FREE(dallocx(ptr, MALLOCX_LG_ALIGN(
+				    lg_align)));
+				JUNK_FREE(sdallocx(ptr, usize, MALLOCX_LG_ALIGN(
+				    lg_align)));
+				JUNK_FREE(sdallocx(ptr, usize,
+				    MALLOCX_TCACHE_NONE | MALLOCX_LG_ALIGN(lg_align)));
+				if (opt_zero_realloc_action
+				    == zero_realloc_action_free) {
+					JUNK_FREE(realloc(ptr, 0));
+				}
 			}
-			s[i] = 'a';
-		}
-
-		if (xallocx(s, sz+1, 0, 0) == sz) {
-			uint8_t *t;
-			watch_junking(s);
-			t = (uint8_t *)rallocx(s, sz+1, 0);
-			expect_ptr_not_null((void *)t,
-			    "Unexpected rallocx() failure");
-			expect_zu_ge(sallocx(t, 0), sz+1,
-			    "Unexpectedly small rallocx() result");
-			if (!background_thread_enabled()) {
-				expect_ptr_ne(s, t,
-				    "Unexpected in-place rallocx()");
-				expect_true(!opt_junk_free || saw_junking,
-				    "Expected region of size %zu to be "
-				    "junk-filled", sz);
-			}
-			s = t;
 		}
 	}
-
-	watch_junking(s);
-	dallocx(s, 0);
-	expect_true(!opt_junk_free || saw_junking,
-	    "Expected region of size %zu to be junk-filled", sz);
-
-	if (opt_junk_free) {
-		arena_dalloc_junk_small = arena_dalloc_junk_small_orig;
-		large_dalloc_junk = large_dalloc_junk_orig;
-		large_dalloc_maybe_junk = large_dalloc_maybe_junk_orig;
-	}
-}
-
-TEST_BEGIN(test_junk_small) {
-	test_skip_if(!config_fill);
-	test_junk(1, SC_SMALL_MAXCLASS - 1);
 }
 TEST_END
 
-TEST_BEGIN(test_junk_large) {
-	test_skip_if(!config_fill);
-	test_junk(SC_SMALL_MAXCLASS + 1, (1U << (SC_LG_LARGE_MINCLASS + 1)));
+TEST_BEGIN(test_realloc_expand) {
+	char *volatile ptr;
+	char *volatile expanded;
+
+	test_skip_if(!opt_junk_alloc);
+
+	/* Realloc */
+	ptr = malloc(SC_SMALL_MAXCLASS);
+	expanded = realloc(ptr, SC_LARGE_MINCLASS);
+	expect_ptr_eq(last_junked_ptr, &expanded[SC_SMALL_MAXCLASS], "");
+	expect_zu_eq(last_junked_usize,
+	    SC_LARGE_MINCLASS - SC_SMALL_MAXCLASS, "");
+	free(expanded);
+
+	/* rallocx(..., 0) */
+	ptr = malloc(SC_SMALL_MAXCLASS);
+	expanded = rallocx(ptr, SC_LARGE_MINCLASS, 0);
+	expect_ptr_eq(last_junked_ptr, &expanded[SC_SMALL_MAXCLASS], "");
+	expect_zu_eq(last_junked_usize,
+	    SC_LARGE_MINCLASS - SC_SMALL_MAXCLASS, "");
+	free(expanded);
+
+	/* rallocx(..., nonzero) */
+	ptr = malloc(SC_SMALL_MAXCLASS);
+	expanded = rallocx(ptr, SC_LARGE_MINCLASS, MALLOCX_TCACHE_NONE);
+	expect_ptr_eq(last_junked_ptr, &expanded[SC_SMALL_MAXCLASS], "");
+	expect_zu_eq(last_junked_usize,
+	    SC_LARGE_MINCLASS - SC_SMALL_MAXCLASS, "");
+	free(expanded);
+
+	/* rallocx(..., MALLOCX_ZERO) */
+	ptr = malloc(SC_SMALL_MAXCLASS);
+	last_junked_ptr = (void *)-1;
+	last_junked_usize = (size_t)-1;
+	expanded = rallocx(ptr, SC_LARGE_MINCLASS, MALLOCX_ZERO);
+	expect_ptr_eq(last_junked_ptr, (void *)-1, "");
+	expect_zu_eq(last_junked_usize, (size_t)-1, "");
+	free(expanded);
+
+	/*
+	 * Unfortunately, testing xallocx reliably is difficult to do portably
+	 * (since allocations can be expanded / not expanded differently on
+	 * different platforms.  We rely on manual inspection there -- the
+	 * xallocx pathway is easy to inspect, though.
+	 *
+	 * Likewise, we don't test the shrinking pathways.  It's difficult to do
+	 * so consistently (because of the risk of split failure or memory
+	 * exhaustion, in which case no junking should happen).  This is fine
+	 * -- junking is a best-effort debug mechanism in the first place.
+	 */
 }
 TEST_END
 
 int
 main(void) {
+	junk_alloc_callback = &test_junk;
+	junk_free_callback = &test_junk;
 	return test(
-	    test_junk_small,
-	    test_junk_large);
+	    test_junk_alloc_free,
+	    test_realloc_expand);
 }

From 909c501b07c101890c264fd717b0bf8b5cf27156 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 25 Feb 2020 12:14:48 -0800
Subject: [PATCH 1571/2608] Cache_bin: Shouldn't know about tcache.

Instead, have it take the cache_bin_info_ts to use by pointer.  While we're
here, add a src file for the cache bin.
---
 Makefile.in                                   |  1 +
 include/jemalloc/internal/cache_bin.h         | 58 ++++++++++---------
 include/jemalloc/internal/tcache_externs.h    |  2 +
 include/jemalloc/internal/tcache_inlines.h    | 12 ++--
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |  1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |  3 +
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |  1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |  3 +
 src/arena.c                                   | 18 +++---
 src/cache_bin.c                               |  3 +
 src/tcache.c                                  | 33 ++++++-----
 test/unit/cache_bin.c                         | 33 ++++++-----
 12 files changed, 101 insertions(+), 67 deletions(-)
 create mode 100644 src/cache_bin.c

diff --git a/Makefile.in b/Makefile.in
index 984bd724..b53846d2 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -102,6 +102,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/bin_info.c \
 	$(srcroot)src/bitmap.c \
 	$(srcroot)src/buf_writer.c \
+	$(srcroot)src/cache_bin.c \
 	$(srcroot)src/ckh.c \
 	$(srcroot)src/counter.c \
 	$(srcroot)src/ctl.c \
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 60feb15f..ec2fdf42 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -35,7 +35,6 @@ struct cache_bin_info_s {
 	/* The size of the bin stack, i.e. ncached_max * sizeof(ptr). */
 	cache_bin_sz_t stack_size;
 };
-extern cache_bin_info_t	*tcache_bin_info;
 
 typedef struct cache_bin_s cache_bin_t;
 struct cache_bin_s {
@@ -115,29 +114,29 @@ struct cache_bin_array_descriptor_s {
 
 /* Returns ncached_max: Upper limit on ncached. */
 static inline cache_bin_sz_t
-cache_bin_ncached_max_get(szind_t ind) {
-	return tcache_bin_info[ind].stack_size / sizeof(void *);
+cache_bin_ncached_max_get(szind_t ind, cache_bin_info_t *infos) {
+	return infos[ind].stack_size / sizeof(void *);
 }
 
 static inline cache_bin_sz_t
-cache_bin_ncached_get(cache_bin_t *bin, szind_t ind) {
-	cache_bin_sz_t n = (cache_bin_sz_t)((tcache_bin_info[ind].stack_size +
+cache_bin_ncached_get(cache_bin_t *bin, szind_t ind, cache_bin_info_t *infos) {
+	cache_bin_sz_t n = (cache_bin_sz_t)((infos[ind].stack_size +
 	    bin->full_position - bin->cur_ptr.lowbits) / sizeof(void *));
-	assert(n <= cache_bin_ncached_max_get(ind));
+	assert(n <= cache_bin_ncached_max_get(ind, infos));
 	assert(n == 0 || *(bin->cur_ptr.ptr) != NULL);
 
 	return n;
 }
 
 static inline void **
-cache_bin_empty_position_get(cache_bin_t *bin, szind_t ind) {
-	void **ret = bin->cur_ptr.ptr + cache_bin_ncached_get(bin, ind);
+cache_bin_empty_position_get(cache_bin_t *bin, szind_t ind,
+    cache_bin_info_t *infos) {
+	void **ret = bin->cur_ptr.ptr + cache_bin_ncached_get(bin, ind, infos);
 	/* Low bits overflow disallowed when allocating the space. */
 	assert((uint32_t)(uintptr_t)ret >= bin->cur_ptr.lowbits);
 
 	/* Can also be computed via (full_position + ncached_max) | highbits. */
-	uintptr_t lowbits = bin->full_position +
-	    tcache_bin_info[ind].stack_size;
+	uintptr_t lowbits = bin->full_position + infos[ind].stack_size;
 	uintptr_t highbits = (uintptr_t)bin->cur_ptr.ptr &
 	    ~(((uint64_t)1 << 32) - 1);
 	assert(ret == (void **)(lowbits | highbits));
@@ -147,32 +146,35 @@ cache_bin_empty_position_get(cache_bin_t *bin, szind_t ind) {
 
 /* Returns the position of the bottom item on the stack; for convenience. */
 static inline void **
-cache_bin_bottom_item_get(cache_bin_t *bin, szind_t ind) {
-	void **bottom = cache_bin_empty_position_get(bin, ind) - 1;
-	assert(cache_bin_ncached_get(bin, ind) == 0 || *bottom != NULL);
+cache_bin_bottom_item_get(cache_bin_t *bin, szind_t ind,
+    cache_bin_info_t *infos) {
+	void **bottom = cache_bin_empty_position_get(bin, ind, infos) - 1;
+	assert(cache_bin_ncached_get(bin, ind, infos) == 0 || *bottom != NULL);
 
 	return bottom;
 }
 
 /* Returns the numeric value of low water in [0, ncached]. */
 static inline cache_bin_sz_t
-cache_bin_low_water_get(cache_bin_t *bin, szind_t ind) {
-	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get(ind);
+cache_bin_low_water_get(cache_bin_t *bin, szind_t ind,
+    cache_bin_info_t *infos) {
+	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get(ind, infos);
 	cache_bin_sz_t low_water = ncached_max -
 	    (cache_bin_sz_t)((bin->low_water_position - bin->full_position) /
 	    sizeof(void *));
 	assert(low_water <= ncached_max);
-	assert(low_water <= cache_bin_ncached_get(bin, ind));
+	assert(low_water <= cache_bin_ncached_get(bin, ind, infos));
 	assert(bin->low_water_position >= bin->cur_ptr.lowbits);
 
 	return low_water;
 }
 
 static inline void
-cache_bin_ncached_set(cache_bin_t *bin, szind_t ind, cache_bin_sz_t n) {
-	bin->cur_ptr.lowbits = bin->full_position +
-	    tcache_bin_info[ind].stack_size - n * sizeof(void *);
-	assert(n <= cache_bin_ncached_max_get(ind));
+cache_bin_ncached_set(cache_bin_t *bin, szind_t ind, cache_bin_sz_t n,
+    cache_bin_info_t *infos) {
+	bin->cur_ptr.lowbits = bin->full_position + infos[ind].stack_size
+	    - n * sizeof(void *);
+	assert(n <= cache_bin_ncached_max_get(ind, infos));
 	assert(n == 0 || *bin->cur_ptr.ptr != NULL);
 }
 
@@ -188,7 +190,7 @@ cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
 
 JEMALLOC_ALWAYS_INLINE void *
 cache_bin_alloc_easy_impl(cache_bin_t *bin, bool *success, szind_t ind,
-    const bool adjust_low_water) {
+    cache_bin_info_t *infos, const bool adjust_low_water) {
 	/*
 	 * This may read from the empty position; however the loaded value won't
 	 * be used.  It's safe because the stack has one more slot reserved.
@@ -197,14 +199,14 @@ cache_bin_alloc_easy_impl(cache_bin_t *bin, bool *success, szind_t ind,
 	/*
 	 * Check for both bin->ncached == 0 and ncached < low_water in a single
 	 * branch.  When adjust_low_water is true, this also avoids accessing
-	 * tcache_bin_info (which is on a separate cacheline / page) in the
-	 * common case.
+	 * the cache_bin_info_ts (which is on a separate cacheline / page) in
+	 * the common case.
 	 */
 	if (unlikely(bin->cur_ptr.lowbits > bin->low_water_position)) {
 		if (adjust_low_water) {
 			assert(ind != INVALID_SZIND);
 			uint32_t empty_position = bin->full_position +
-			    tcache_bin_info[ind].stack_size;
+			    infos[ind].stack_size;
 			if (unlikely(bin->cur_ptr.lowbits > empty_position)) {
 				/* Over-allocated; revert. */
 				bin->cur_ptr.ptr--;
@@ -237,12 +239,14 @@ cache_bin_alloc_easy_impl(cache_bin_t *bin, bool *success, szind_t ind,
 JEMALLOC_ALWAYS_INLINE void *
 cache_bin_alloc_easy_reduced(cache_bin_t *bin, bool *success) {
 	/* The szind parameter won't be used. */
-	return cache_bin_alloc_easy_impl(bin, success, INVALID_SZIND, false);
+	return cache_bin_alloc_easy_impl(bin, success, INVALID_SZIND,
+	    /* infos */ NULL, false);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-cache_bin_alloc_easy(cache_bin_t *bin, bool *success, szind_t ind) {
-	return cache_bin_alloc_easy_impl(bin, success, ind, true);
+cache_bin_alloc_easy(cache_bin_t *bin, bool *success, szind_t ind,
+    cache_bin_info_t *infos) {
+	return cache_bin_alloc_easy_impl(bin, success, ind, infos, true);
 }
 
 #undef INVALID_SZIND
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index db6f98bf..c5c8f485 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -13,6 +13,8 @@ extern unsigned	nhbins;
 /* Maximum cached size class. */
 extern size_t	tcache_maxclass;
 
+extern cache_bin_info_t *tcache_bin_info;
+
 /*
  * Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and
  * usable via the MALLOCX_TCACHE() flag.  The automatic per thread tcaches are
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index ff06935d..dc6da949 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -36,7 +36,8 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
 
 	assert(binind < SC_NBINS);
 	bin = tcache_small_bin_get(tcache, binind);
-	ret = cache_bin_alloc_easy(bin, &tcache_success, binind);
+	ret = cache_bin_alloc_easy(bin, &tcache_success, binind,
+	    tcache_bin_info);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
 		bool tcache_hard_success;
@@ -79,7 +80,8 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 
 	assert(binind >= SC_NBINS &&binind < nhbins);
 	bin = tcache_large_bin_get(tcache, binind);
-	ret = cache_bin_alloc_easy(bin, &tcache_success, binind);
+	ret = cache_bin_alloc_easy(bin, &tcache_success, binind,
+	    tcache_bin_info);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
 		/*
@@ -127,7 +129,8 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 
 	bin = tcache_small_bin_get(tcache, binind);
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
-		unsigned remain = cache_bin_ncached_max_get(binind) >> 1;
+		unsigned remain = cache_bin_ncached_max_get(binind,
+		    tcache_bin_info) >> 1;
 		tcache_bin_flush_small(tsd, tcache, bin, binind, remain);
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
 		assert(ret);
@@ -145,7 +148,8 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 
 	bin = tcache_large_bin_get(tcache, binind);
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
-		unsigned remain = cache_bin_ncached_max_get(binind) >> 1;
+		unsigned remain = cache_bin_ncached_max_get(binind,
+		    tcache_bin_info) >> 1;
 		tcache_bin_flush_large(tsd, tcache, bin, binind, remain);
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
 		assert(ret);
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index d98bb858..920d55ed 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -42,6 +42,7 @@
     <ClCompile Include="..\..\..\..\src\bin_info.c" />
     <ClCompile Include="..\..\..\..\src\bitmap.c" />
     <ClCompile Include="..\..\..\..\src\buf_writer.c" />
+    <ClCompile Include="..\..\..\..\src\cache_bin.c" />
     <ClCompile Include="..\..\..\..\src\ckh.c" />
     <ClCompile Include="..\..\..\..\src\counter.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index fd3e11c0..fe77170d 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -25,6 +25,9 @@
     <ClCompile Include="..\..\..\..\src\buf_writer.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\cache_bin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\ckh.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index b59d411f..2db94010 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -42,6 +42,7 @@
     <ClCompile Include="..\..\..\..\src\bin_info.c" />
     <ClCompile Include="..\..\..\..\src\bitmap.c" />
     <ClCompile Include="..\..\..\..\src\buf_writer.c" />
+    <ClCompile Include="..\..\..\..\src\cache_bin.c" />
     <ClCompile Include="..\..\..\..\src\ckh.c" />
     <ClCompile Include="..\..\..\..\src\counter.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index fd3e11c0..fe77170d 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -25,6 +25,9 @@
     <ClCompile Include="..\..\..\..\src\buf_writer.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\cache_bin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\ckh.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/arena.c b/src/arena.c
index 0a9e4a98..5ca884b7 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -200,13 +200,14 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		for (szind_t i = 0; i < SC_NBINS; i++) {
 			cache_bin_t *tbin = &descriptor->bins_small[i];
 			arena_stats_accum_zu(&astats->tcache_bytes,
-			    cache_bin_ncached_get(tbin, i) * sz_index2size(i));
+			    cache_bin_ncached_get(tbin, i, tcache_bin_info)
+			    * sz_index2size(i));
 		}
 		for (szind_t i = 0; i < nhbins - SC_NBINS; i++) {
 			cache_bin_t *tbin = &descriptor->bins_large[i];
 			arena_stats_accum_zu(&astats->tcache_bytes,
-			    cache_bin_ncached_get(tbin, i + SC_NBINS) *
-			    sz_index2size(i));
+			    cache_bin_ncached_get(tbin, i + SC_NBINS,
+			    tcache_bin_info) * sz_index2size(i));
 		}
 	}
 	malloc_mutex_prof_read(tsdn,
@@ -1320,13 +1321,14 @@ arena_bin_choose_lock(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 void
 arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     cache_bin_t *tbin, szind_t binind) {
-	assert(cache_bin_ncached_get(tbin, binind) == 0);
+	assert(cache_bin_ncached_get(tbin, binind, tcache_bin_info) == 0);
 	tcache->bin_refilled[binind] = true;
 
 	const bin_info_t *bin_info = &bin_infos[binind];
-	const unsigned nfill = cache_bin_ncached_max_get(binind) >>
-	    tcache->lg_fill_div[binind];
-	void **empty_position = cache_bin_empty_position_get(tbin, binind);
+	const unsigned nfill = cache_bin_ncached_max_get(binind,
+	    tcache_bin_info) >> tcache->lg_fill_div[binind];
+	void **empty_position = cache_bin_empty_position_get(tbin, binind,
+	    tcache_bin_info);
 
 	/*
 	 * Bin-local resources are used first: 1) bin->slabcur, and 2) nonfull
@@ -1446,7 +1448,7 @@ label_refill:
 		fresh_slab = NULL;
 	}
 
-	cache_bin_ncached_set(tbin, binind, filled);
+	cache_bin_ncached_set(tbin, binind, filled, tcache_bin_info);
 	arena_decay_tick(tsdn, arena);
 }
 
diff --git a/src/cache_bin.c b/src/cache_bin.c
new file mode 100644
index 00000000..454cb475
--- /dev/null
+++ b/src/cache_bin.c
@@ -0,0 +1,3 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
diff --git a/src/tcache.c b/src/tcache.c
index c736f565..62905f14 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -59,8 +59,10 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 		is_small = false;
 	}
 
-	cache_bin_sz_t low_water = cache_bin_low_water_get(tbin, binind);
-	cache_bin_sz_t ncached = cache_bin_ncached_get(tbin, binind);
+	cache_bin_sz_t low_water = cache_bin_low_water_get(tbin, binind,
+	    tcache_bin_info);
+	cache_bin_sz_t ncached = cache_bin_ncached_get(tbin, binind,
+	    tcache_bin_info);
 	if (low_water > 0) {
 		/*
 		 * Flush (ceiling) 3/4 of the objects below the low water mark.
@@ -73,8 +75,8 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 			 * Reduce fill count by 2X.  Limit lg_fill_div such that
 			 * the fill count is always at least 1.
 			 */
-			if ((cache_bin_ncached_max_get(binind) >>
-			     (tcache->lg_fill_div[binind] + 1)) >= 1) {
+			if ((cache_bin_ncached_max_get(binind, tcache_bin_info)
+			    >> (tcache->lg_fill_div[binind] + 1)) >= 1) {
 				tcache->lg_fill_div[binind]++;
 			}
 		} else {
@@ -107,7 +109,8 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 
 	assert(tcache->arena != NULL);
 	arena_tcache_fill_small(tsdn, arena, tcache, tbin, binind);
-	ret = cache_bin_alloc_easy(tbin, tcache_success, binind);
+	ret = cache_bin_alloc_easy(tbin, tcache_success, binind,
+	    tcache_bin_info);
 
 	return ret;
 }
@@ -126,7 +129,8 @@ tbin_edatas_lookup_size_check(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
 	 * builds, avoid the branch in the loop.
 	 */
 	size_t szind_sum = binind * nflush;
-	void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
+	void **bottom_item = cache_bin_bottom_item_get(tbin, binind,
+	    tcache_bin_info);
 	for (unsigned i = 0 ; i < nflush; i++) {
 		emap_full_alloc_ctx_t full_alloc_ctx;
 		emap_full_alloc_ctx_lookup(tsd_tsdn(tsd), &emap_global,
@@ -164,7 +168,8 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	} else {
 		assert(binind < nhbins);
 	}
-	cache_bin_sz_t ncached = cache_bin_ncached_get(tbin, binind);
+	cache_bin_sz_t ncached = cache_bin_ncached_get(tbin, binind,
+	    tcache_bin_info);
 	assert((cache_bin_sz_t)rem <= ncached);
 	arena_t *tcache_arena = tcache->arena;
 	assert(tcache_arena != NULL);
@@ -175,7 +180,8 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	 * touched (it's just included to satisfy the no-zero-length rule).
 	 */
 	VARIABLE_ARRAY(edata_t *, item_edata, nflush + 1);
-	void **bottom_item = cache_bin_bottom_item_get(tbin, binind);
+	void **bottom_item = cache_bin_bottom_item_get(tbin, binind,
+	    tcache_bin_info);
 
 	/* Look up edata once per item. */
 	if (config_opt_safety_checks) {
@@ -340,7 +346,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 
 	memmove(tbin->cur_ptr.ptr + (ncached - rem), tbin->cur_ptr.ptr, rem *
 	    sizeof(void *));
-	cache_bin_ncached_set(tbin, binind, rem);
+	cache_bin_ncached_set(tbin, binind, rem, tcache_bin_info);
 	if (tbin->cur_ptr.lowbits > tbin->low_water_position) {
 		tbin->low_water_position = tbin->cur_ptr.lowbits;
 	}
@@ -445,8 +451,9 @@ tcache_bin_init(cache_bin_t *bin, szind_t ind, uintptr_t *stack_cur) {
 	bin->low_water_position = bin->cur_ptr.lowbits;
 	bin->full_position = (uint32_t)(uintptr_t)full_position;
 	assert(bin->cur_ptr.lowbits - bin->full_position == bin_stack_size);
-	assert(cache_bin_ncached_get(bin, ind) == 0);
-	assert(cache_bin_empty_position_get(bin, ind) == empty_position);
+	assert(cache_bin_ncached_get(bin, ind, tcache_bin_info) == 0);
+	assert(cache_bin_empty_position_get(bin, ind, tcache_bin_info)
+	    == empty_position);
 
 	return false;
 }
@@ -605,8 +612,8 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
 	if (tsd_tcache) {
 		/* Release the avail array for the TSD embedded auto tcache. */
 		cache_bin_t *bin = tcache_small_bin_get(tcache, 0);
-		assert(cache_bin_ncached_get(bin, 0) == 0);
-		assert(cache_bin_empty_position_get(bin, 0) ==
+		assert(cache_bin_ncached_get(bin, 0, tcache_bin_info) == 0);
+		assert(cache_bin_empty_position_get(bin, 0, tcache_bin_info) ==
 		    bin->cur_ptr.ptr);
 		void *avail_array = (void *)((uintptr_t)bin->cur_ptr.ptr -
 		    tcache_bin_info[0].stack_size);
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index f98a92c3..5ef108d0 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -10,48 +10,51 @@ TEST_BEGIN(test_cache_bin) {
 
 	expect_ptr_not_null(stack, "Unexpected mallocx failure");
 	/* Initialize to empty; bin 0. */
-	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get(0);
+	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get(0,
+	    tcache_bin_info);
 	void **empty_position = stack + ncached_max;
 	bin->cur_ptr.ptr = empty_position;
 	bin->low_water_position = bin->cur_ptr.lowbits;
 	bin->full_position = (uint32_t)(uintptr_t)stack;
-	expect_ptr_eq(cache_bin_empty_position_get(bin, 0), empty_position,
-	    "Incorrect empty position");
+	expect_ptr_eq(cache_bin_empty_position_get(bin, 0, tcache_bin_info),
+	    empty_position, "Incorrect empty position");
 	/* Not using expect_zu etc on cache_bin_sz_t since it may change. */
-	expect_true(cache_bin_ncached_get(bin, 0) == 0, "Incorrect cache size");
+	expect_true(cache_bin_ncached_get(bin, 0, tcache_bin_info) == 0,
+	    "Incorrect cache size");
 
 	bool success;
-	void *ret = cache_bin_alloc_easy(bin, &success, 0);
+	void *ret = cache_bin_alloc_easy(bin, &success, 0, tcache_bin_info);
 	expect_false(success, "Empty cache bin should not alloc");
-	expect_true(cache_bin_low_water_get(bin, 0) == 0,
+	expect_true(cache_bin_low_water_get(bin, 0, tcache_bin_info) == 0,
 	    "Incorrect low water mark");
 
-	cache_bin_ncached_set(bin, 0, 0);
+	cache_bin_ncached_set(bin, 0, 0, tcache_bin_info);
 	expect_ptr_eq(bin->cur_ptr.ptr, empty_position, "Bin should be empty");
 	for (cache_bin_sz_t i = 1; i < ncached_max + 1; i++) {
 		success = cache_bin_dalloc_easy(bin, (void *)(uintptr_t)i);
-		expect_true(success && cache_bin_ncached_get(bin, 0) == i,
-		    "Bin dalloc failure");
+		expect_true(success && cache_bin_ncached_get(bin, 0,
+		    tcache_bin_info) == i, "Bin dalloc failure");
 	}
 	success = cache_bin_dalloc_easy(bin, (void *)1);
 	expect_false(success, "Bin should be full");
 	expect_ptr_eq(bin->cur_ptr.ptr, stack, "Incorrect bin cur_ptr");
 
-	cache_bin_ncached_set(bin, 0, ncached_max);
+	cache_bin_ncached_set(bin, 0, ncached_max, tcache_bin_info);
 	expect_ptr_eq(bin->cur_ptr.ptr, stack, "cur_ptr should not change");
 	/* Emulate low water after refill. */
 	bin->low_water_position = bin->full_position;
 	for (cache_bin_sz_t i = ncached_max; i > 0; i--) {
-		ret = cache_bin_alloc_easy(bin, &success, 0);
-		cache_bin_sz_t ncached = cache_bin_ncached_get(bin, 0);
+		ret = cache_bin_alloc_easy(bin, &success, 0, tcache_bin_info);
+		cache_bin_sz_t ncached = cache_bin_ncached_get(bin, 0,
+		    tcache_bin_info);
 		expect_true(success && ncached == i - 1,
 		    "Cache bin alloc failure");
 		expect_ptr_eq(ret, (void *)(uintptr_t)i, "Bin alloc failure");
-		expect_true(cache_bin_low_water_get(bin, 0) == ncached,
-		    "Incorrect low water mark");
+		expect_true(cache_bin_low_water_get(bin, 0, tcache_bin_info)
+		    == ncached, "Incorrect low water mark");
 	}
 
-	ret = cache_bin_alloc_easy(bin, &success, 0);
+	ret = cache_bin_alloc_easy(bin, &success, 0, tcache_bin_info);
 	expect_false(success, "Empty cache bin should not alloc.");
 	expect_ptr_eq(bin->cur_ptr.ptr, stack + ncached_max,
 	    "Bin should be empty");

From da68f7329666a4375e9df04a0f441bb9ae2b4d6c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 25 Feb 2020 12:18:51 -0800
Subject: [PATCH 1572/2608] Move percpu_arena_update.

It's not really part of the API of the arena; it changes which arena we're using
that API on.
---
 include/jemalloc/internal/arena_inlines_a.h   | 22 ------------------
 .../internal/jemalloc_internal_inlines_b.h    | 23 +++++++++++++++++++
 2 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_a.h b/include/jemalloc/internal/arena_inlines_a.h
index 27434c30..b83d0e8e 100644
--- a/include/jemalloc/internal/arena_inlines_a.h
+++ b/include/jemalloc/internal/arena_inlines_a.h
@@ -21,26 +21,4 @@ arena_internal_get(arena_t *arena) {
 	return atomic_load_zu(&arena->stats.internal, ATOMIC_RELAXED);
 }
 
-static inline void
-percpu_arena_update(tsd_t *tsd, unsigned cpu) {
-	assert(have_percpu_arena);
-	arena_t *oldarena = tsd_arena_get(tsd);
-	assert(oldarena != NULL);
-	unsigned oldind = arena_ind_get(oldarena);
-
-	if (oldind != cpu) {
-		unsigned newind = cpu;
-		arena_t *newarena = arena_get(tsd_tsdn(tsd), newind, true);
-		assert(newarena != NULL);
-
-		/* Set new arena/tcache associations. */
-		arena_migrate(tsd, oldind, newind);
-		tcache_t *tcache = tcache_get(tsd);
-		if (tcache != NULL) {
-			tcache_arena_reassociate(tsd_tsdn(tsd), tcache,
-			    newarena);
-		}
-	}
-}
-
 #endif /* JEMALLOC_INTERNAL_ARENA_INLINES_A_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index fc526c4b..3a0bfc64 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -3,6 +3,29 @@
 
 #include "jemalloc/internal/extent.h"
 
+static inline void
+percpu_arena_update(tsd_t *tsd, unsigned cpu) {
+	assert(have_percpu_arena);
+	arena_t *oldarena = tsd_arena_get(tsd);
+	assert(oldarena != NULL);
+	unsigned oldind = arena_ind_get(oldarena);
+
+	if (oldind != cpu) {
+		unsigned newind = cpu;
+		arena_t *newarena = arena_get(tsd_tsdn(tsd), newind, true);
+		assert(newarena != NULL);
+
+		/* Set new arena/tcache associations. */
+		arena_migrate(tsd, oldind, newind);
+		tcache_t *tcache = tcache_get(tsd);
+		if (tcache != NULL) {
+			tcache_arena_reassociate(tsd_tsdn(tsd), tcache,
+			    newarena);
+		}
+	}
+}
+
+
 /* Choose an arena based on a per-thread value. */
 static inline arena_t *
 arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {

From b66c0973cc7811498a97783283c8ef06f83d6b9f Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 26 Feb 2020 17:10:12 -0800
Subject: [PATCH 1573/2608] cache_bin: Don't allow direct internals access.

---
 include/jemalloc/internal/cache_bin.h | 38 ++++++++++++++++++++-------
 src/tcache.c                          | 26 +++++++++---------
 2 files changed, 41 insertions(+), 23 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index ec2fdf42..23092040 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -144,16 +144,6 @@ cache_bin_empty_position_get(cache_bin_t *bin, szind_t ind,
 	return ret;
 }
 
-/* Returns the position of the bottom item on the stack; for convenience. */
-static inline void **
-cache_bin_bottom_item_get(cache_bin_t *bin, szind_t ind,
-    cache_bin_info_t *infos) {
-	void **bottom = cache_bin_empty_position_get(bin, ind, infos) - 1;
-	assert(cache_bin_ncached_get(bin, ind, infos) == 0 || *bottom != NULL);
-
-	return bottom;
-}
-
 /* Returns the numeric value of low water in [0, ncached]. */
 static inline cache_bin_sz_t
 cache_bin_low_water_get(cache_bin_t *bin, szind_t ind,
@@ -263,4 +253,32 @@ cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) {
 	return true;
 }
 
+typedef struct cache_bin_ptr_array_s cache_bin_ptr_array_t;
+struct cache_bin_ptr_array_s {
+	cache_bin_sz_t nflush;
+	void **ptr;
+};
+
+#define CACHE_BIN_PTR_ARRAY_DECLARE(name, nflush_val)			\
+    cache_bin_ptr_array_t name;						\
+    name.nflush = (nflush_val)
+
+static inline void
+cache_bin_ptr_array_init(cache_bin_ptr_array_t *arr, cache_bin_t *bin,
+    cache_bin_sz_t nflush, szind_t ind, cache_bin_info_t *infos) {
+	arr->ptr = cache_bin_empty_position_get(bin, ind, infos) - 1;
+	assert(cache_bin_ncached_get(bin, ind, infos) == 0
+	    || *arr->ptr != NULL);
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+cache_bin_ptr_array_get(cache_bin_ptr_array_t *arr, cache_bin_sz_t n) {
+	return *(arr->ptr - n);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+cache_bin_ptr_array_set(cache_bin_ptr_array_t *arr, cache_bin_sz_t n, void *p) {
+	*(arr->ptr - n) = p;
+}
+
 #endif /* JEMALLOC_INTERNAL_CACHE_BIN_H */
diff --git a/src/tcache.c b/src/tcache.c
index 62905f14..4096b05a 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -117,8 +117,8 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 
 /* Enabled with --enable-extra-size-check. */
 static void
-tbin_edatas_lookup_size_check(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
-    size_t nflush, edata_t **edatas) {
+tbin_edatas_lookup_size_check(tsd_t *tsd, cache_bin_ptr_array_t *arr,
+    szind_t binind, size_t nflush, edata_t **edatas) {
 	/* Avoids null-checking tsdn in the loop below. */
 	util_assume(tsd != NULL);
 
@@ -129,15 +129,14 @@ tbin_edatas_lookup_size_check(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
 	 * builds, avoid the branch in the loop.
 	 */
 	size_t szind_sum = binind * nflush;
-	void **bottom_item = cache_bin_bottom_item_get(tbin, binind,
-	    tcache_bin_info);
-	for (unsigned i = 0 ; i < nflush; i++) {
+	for (unsigned i = 0; i < nflush; i++) {
 		emap_full_alloc_ctx_t full_alloc_ctx;
 		emap_full_alloc_ctx_lookup(tsd_tsdn(tsd), &emap_global,
-		    *(bottom_item - i), &full_alloc_ctx);
+		    cache_bin_ptr_array_get(arr, i), &full_alloc_ctx);
 		edatas[i] = full_alloc_ctx.edata;
 		szind_sum -= full_alloc_ctx.szind;
 	}
+
 	if (szind_sum != 0) {
 		safety_check_fail_sized_dealloc(false);
 	}
@@ -180,17 +179,18 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	 * touched (it's just included to satisfy the no-zero-length rule).
 	 */
 	VARIABLE_ARRAY(edata_t *, item_edata, nflush + 1);
-	void **bottom_item = cache_bin_bottom_item_get(tbin, binind,
-	    tcache_bin_info);
+	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nflush);
+
+	cache_bin_ptr_array_init(&ptrs, tbin, nflush, binind, tcache_bin_info);
 
 	/* Look up edata once per item. */
 	if (config_opt_safety_checks) {
-		tbin_edatas_lookup_size_check(tsd, tbin, binind, nflush,
+		tbin_edatas_lookup_size_check(tsd, &ptrs, binind, nflush,
 		    item_edata);
 	} else {
 		for (unsigned i = 0 ; i < nflush; i++) {
 			item_edata[i] = emap_edata_lookup(tsd_tsdn(tsd),
-			    &emap_global, *(bottom_item - i));
+			    &emap_global, cache_bin_ptr_array_get(&ptrs, i));
 		}
 	}
 
@@ -262,7 +262,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 		 */
 		if (!small) {
 			for (unsigned i = 0; i < nflush; i++) {
-				void *ptr = *(bottom_item - i);
+				void *ptr = cache_bin_ptr_array_get(&ptrs, i);
 				edata = item_edata[i];
 				assert(ptr != NULL && edata != NULL);
 
@@ -280,7 +280,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 		/* Deallocate whatever we can. */
 		unsigned ndeferred = 0;
 		for (unsigned i = 0; i < nflush; i++) {
-			void *ptr = *(bottom_item - i);
+			void *ptr = cache_bin_ptr_array_get(&ptrs, i);
 			edata = item_edata[i];
 			assert(ptr != NULL && edata != NULL);
 			if (!tcache_bin_flush_match(edata, cur_arena_ind,
@@ -291,7 +291,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 				 * arena.  Either way, stash the object so that
 				 * it can be handled in a future pass.
 				 */
-				*(bottom_item - ndeferred) = ptr;
+				cache_bin_ptr_array_set(&ptrs, ndeferred, ptr);
 				item_edata[ndeferred] = edata;
 				ndeferred++;
 				continue;

From 74d36d78efdea846d577dea933e4bb06a18efa10 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 26 Feb 2020 17:23:47 -0800
Subject: [PATCH 1574/2608] Cache bin: Make ncached_max a query on the info_t.

---
 include/jemalloc/internal/cache_bin.h      | 10 +++++-----
 include/jemalloc/internal/tcache_inlines.h |  8 ++++----
 src/arena.c                                |  4 ++--
 src/tcache.c                               |  5 +++--
 test/unit/cache_bin.c                      |  4 ++--
 5 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 23092040..3f0524ea 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -114,15 +114,15 @@ struct cache_bin_array_descriptor_s {
 
 /* Returns ncached_max: Upper limit on ncached. */
 static inline cache_bin_sz_t
-cache_bin_ncached_max_get(szind_t ind, cache_bin_info_t *infos) {
-	return infos[ind].stack_size / sizeof(void *);
+cache_bin_info_ncached_max(cache_bin_info_t *info) {
+	return info->stack_size / sizeof(void *);
 }
 
 static inline cache_bin_sz_t
 cache_bin_ncached_get(cache_bin_t *bin, szind_t ind, cache_bin_info_t *infos) {
 	cache_bin_sz_t n = (cache_bin_sz_t)((infos[ind].stack_size +
 	    bin->full_position - bin->cur_ptr.lowbits) / sizeof(void *));
-	assert(n <= cache_bin_ncached_max_get(ind, infos));
+	assert(n <= cache_bin_info_ncached_max(&infos[ind]));
 	assert(n == 0 || *(bin->cur_ptr.ptr) != NULL);
 
 	return n;
@@ -148,7 +148,7 @@ cache_bin_empty_position_get(cache_bin_t *bin, szind_t ind,
 static inline cache_bin_sz_t
 cache_bin_low_water_get(cache_bin_t *bin, szind_t ind,
     cache_bin_info_t *infos) {
-	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get(ind, infos);
+	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(&infos[ind]);
 	cache_bin_sz_t low_water = ncached_max -
 	    (cache_bin_sz_t)((bin->low_water_position - bin->full_position) /
 	    sizeof(void *));
@@ -164,7 +164,7 @@ cache_bin_ncached_set(cache_bin_t *bin, szind_t ind, cache_bin_sz_t n,
     cache_bin_info_t *infos) {
 	bin->cur_ptr.lowbits = bin->full_position + infos[ind].stack_size
 	    - n * sizeof(void *);
-	assert(n <= cache_bin_ncached_max_get(ind, infos));
+	assert(n <= cache_bin_info_ncached_max(&infos[ind]));
 	assert(n == 0 || *bin->cur_ptr.ptr != NULL);
 }
 
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index dc6da949..28d6e3c8 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -129,8 +129,8 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 
 	bin = tcache_small_bin_get(tcache, binind);
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
-		unsigned remain = cache_bin_ncached_max_get(binind,
-		    tcache_bin_info) >> 1;
+		unsigned remain = cache_bin_info_ncached_max(
+		    &tcache_bin_info[binind]) >> 1;
 		tcache_bin_flush_small(tsd, tcache, bin, binind, remain);
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
 		assert(ret);
@@ -148,8 +148,8 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 
 	bin = tcache_large_bin_get(tcache, binind);
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
-		unsigned remain = cache_bin_ncached_max_get(binind,
-		    tcache_bin_info) >> 1;
+		unsigned remain = cache_bin_info_ncached_max(
+		    &tcache_bin_info[binind]) >> 1;
 		tcache_bin_flush_large(tsd, tcache, bin, binind, remain);
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
 		assert(ret);
diff --git a/src/arena.c b/src/arena.c
index 5ca884b7..2f8a03c0 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1325,8 +1325,8 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 	tcache->bin_refilled[binind] = true;
 
 	const bin_info_t *bin_info = &bin_infos[binind];
-	const unsigned nfill = cache_bin_ncached_max_get(binind,
-	    tcache_bin_info) >> tcache->lg_fill_div[binind];
+	const unsigned nfill = cache_bin_info_ncached_max(
+	    &tcache_bin_info[binind]) >> tcache->lg_fill_div[binind];
 	void **empty_position = cache_bin_empty_position_get(tbin, binind,
 	    tcache_bin_info);
 
diff --git a/src/tcache.c b/src/tcache.c
index 4096b05a..d2442ef5 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -75,8 +75,9 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 			 * Reduce fill count by 2X.  Limit lg_fill_div such that
 			 * the fill count is always at least 1.
 			 */
-			if ((cache_bin_ncached_max_get(binind, tcache_bin_info)
-			    >> (tcache->lg_fill_div[binind] + 1)) >= 1) {
+			if ((cache_bin_info_ncached_max(
+			    &tcache_bin_info[binind]) >>
+			    (tcache->lg_fill_div[binind] + 1)) >= 1) {
 				tcache->lg_fill_div[binind]++;
 			}
 		} else {
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index 5ef108d0..ab36a3a1 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -10,8 +10,8 @@ TEST_BEGIN(test_cache_bin) {
 
 	expect_ptr_not_null(stack, "Unexpected mallocx failure");
 	/* Initialize to empty; bin 0. */
-	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get(0,
-	    tcache_bin_info);
+	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(
+	    &tcache_bin_info[0]);
 	void **empty_position = stack + ncached_max;
 	bin->cur_ptr.ptr = empty_position;
 	bin->low_water_position = bin->cur_ptr.lowbits;

From d303f30796f0aef7f7fc9d907ef240b93d3fc674 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 26 Feb 2020 17:39:55 -0800
Subject: [PATCH 1575/2608] cache_bin nflush -> n.

We're going to use it on the fill pathway as well.
---
 include/jemalloc/internal/cache_bin.h | 8 ++++----
 src/tcache.c                          | 3 ++-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 3f0524ea..1c679236 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -255,16 +255,16 @@ cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) {
 
 typedef struct cache_bin_ptr_array_s cache_bin_ptr_array_t;
 struct cache_bin_ptr_array_s {
-	cache_bin_sz_t nflush;
+	cache_bin_sz_t n;
 	void **ptr;
 };
 
-#define CACHE_BIN_PTR_ARRAY_DECLARE(name, nflush_val)			\
+#define CACHE_BIN_PTR_ARRAY_DECLARE(name, nval)				\
     cache_bin_ptr_array_t name;						\
-    name.nflush = (nflush_val)
+    name.n = (nval)
 
 static inline void
-cache_bin_ptr_array_init(cache_bin_ptr_array_t *arr, cache_bin_t *bin,
+cache_bin_ptr_array_init_for_flush(cache_bin_ptr_array_t *arr, cache_bin_t *bin,
     cache_bin_sz_t nflush, szind_t ind, cache_bin_info_t *infos) {
 	arr->ptr = cache_bin_empty_position_get(bin, ind, infos) - 1;
 	assert(cache_bin_ncached_get(bin, ind, infos) == 0
diff --git a/src/tcache.c b/src/tcache.c
index d2442ef5..3fc4ee6a 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -182,7 +182,8 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	VARIABLE_ARRAY(edata_t *, item_edata, nflush + 1);
 	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nflush);
 
-	cache_bin_ptr_array_init(&ptrs, tbin, nflush, binind, tcache_bin_info);
+	cache_bin_ptr_array_init_for_flush(&ptrs, tbin, nflush, binind,
+	    tcache_bin_info);
 
 	/* Look up edata once per item. */
 	if (config_opt_safety_checks) {

From 1b00d808d7bfb9ff41c643dcb32f96a078090932 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 27 Feb 2020 10:22:46 -0800
Subject: [PATCH 1576/2608] cache_bin: Don't let arena see empty position.

---
 include/jemalloc/internal/cache_bin.h | 20 ++++++++++++++++++++
 src/arena.c                           | 20 ++++++++------------
 2 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 1c679236..775b71f2 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -271,6 +271,13 @@ cache_bin_ptr_array_init_for_flush(cache_bin_ptr_array_t *arr, cache_bin_t *bin,
 	    || *arr->ptr != NULL);
 }
 
+static inline void
+cache_bin_ptr_array_init_for_fill(cache_bin_ptr_array_t *arr, cache_bin_t *bin,
+    cache_bin_sz_t nfill, szind_t ind, cache_bin_info_t *infos) {
+	arr->ptr = cache_bin_empty_position_get(bin, ind, infos) - nfill;
+	assert(cache_bin_ncached_get(bin, ind, infos) == 0);
+}
+
 JEMALLOC_ALWAYS_INLINE void *
 cache_bin_ptr_array_get(cache_bin_ptr_array_t *arr, cache_bin_sz_t n) {
 	return *(arr->ptr - n);
@@ -281,4 +288,17 @@ cache_bin_ptr_array_set(cache_bin_ptr_array_t *arr, cache_bin_sz_t n, void *p) {
 	*(arr->ptr - n) = p;
 }
 
+static inline void
+cache_bin_fill_from_ptr_array(cache_bin_t *bin, cache_bin_ptr_array_t *arr,
+    szind_t ind, szind_t nfilled, cache_bin_info_t *infos) {
+	assert(cache_bin_ncached_get(bin, ind, infos) == 0);
+	if (nfilled < arr->n) {
+		void **empty_position = cache_bin_empty_position_get(bin, ind,
+		    infos);
+		memmove(empty_position - nfilled, empty_position - arr->n,
+		    nfilled * sizeof(void *));
+	}
+	cache_bin_ncached_set(bin, ind, nfilled, infos);
+}
+
 #endif /* JEMALLOC_INTERNAL_CACHE_BIN_H */
diff --git a/src/arena.c b/src/arena.c
index 2f8a03c0..6b5f1d31 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1327,7 +1327,9 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 	const bin_info_t *bin_info = &bin_infos[binind];
 	const unsigned nfill = cache_bin_info_ncached_max(
 	    &tcache_bin_info[binind]) >> tcache->lg_fill_div[binind];
-	void **empty_position = cache_bin_empty_position_get(tbin, binind,
+
+	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nfill);
+	cache_bin_ptr_array_init_for_fill(&ptrs, tbin, nfill, binind,
 	    tcache_bin_info);
 
 	/*
@@ -1374,7 +1376,7 @@ label_refill:
 			unsigned cnt = tofill < nfree ? tofill : nfree;
 
 			arena_slab_reg_alloc_batch(slabcur, bin_info, cnt,
-			    empty_position - tofill);
+			    &ptrs.ptr[filled]);
 			made_progress = true;
 			filled += cnt;
 			continue;
@@ -1403,16 +1405,9 @@ label_refill:
 			break;
 		}
 
+		/* OOM. */
+
 		assert(fresh_slab == NULL);
-		/*
-		 * OOM.  tbin->avail isn't yet filled down to its first element,
-		 * so the successful allocations (if any) must be moved just
-		 * before tbin->avail before bailing out.
-		 */
-		if (filled > 0) {
-			memmove(empty_position - filled, empty_position - nfill,
-			    filled * sizeof(void *));
-		}
 		assert(!alloc_and_retry);
 		break;
 	} /* while (filled < nfill) loop. */
@@ -1448,7 +1443,8 @@ label_refill:
 		fresh_slab = NULL;
 	}
 
-	cache_bin_ncached_set(tbin, binind, filled, tcache_bin_info);
+	cache_bin_fill_from_ptr_array(tbin, &ptrs, binind, filled,
+	    tcache_bin_info);
 	arena_decay_tick(tsdn, arena);
 }
 

From e1dcc557d68cfa1c7f1fab6c84a9e44e1d97e1d4 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 28 Feb 2020 18:55:33 -0800
Subject: [PATCH 1577/2608] Cache bin: Only take the relevant cache_bin_info_t

Previously, we took an array of cache_bin_info_ts and an index, and dereferenced
ourselves.  But infos for other cache_bins aren't relevant to any particular
cache bin, so that should be the caller's job.
---
 include/jemalloc/internal/cache_bin.h      | 71 +++++++++-------------
 include/jemalloc/internal/tcache_inlines.h |  8 +--
 src/arena.c                                | 16 ++---
 src/tcache.c                               | 30 ++++-----
 test/unit/cache_bin.c                      | 26 ++++----
 5 files changed, 70 insertions(+), 81 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 775b71f2..bae669d1 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -119,24 +119,23 @@ cache_bin_info_ncached_max(cache_bin_info_t *info) {
 }
 
 static inline cache_bin_sz_t
-cache_bin_ncached_get(cache_bin_t *bin, szind_t ind, cache_bin_info_t *infos) {
-	cache_bin_sz_t n = (cache_bin_sz_t)((infos[ind].stack_size +
+cache_bin_ncached_get(cache_bin_t *bin, cache_bin_info_t *info) {
+	cache_bin_sz_t n = (cache_bin_sz_t)((info->stack_size +
 	    bin->full_position - bin->cur_ptr.lowbits) / sizeof(void *));
-	assert(n <= cache_bin_info_ncached_max(&infos[ind]));
+	assert(n <= cache_bin_info_ncached_max(info));
 	assert(n == 0 || *(bin->cur_ptr.ptr) != NULL);
 
 	return n;
 }
 
 static inline void **
-cache_bin_empty_position_get(cache_bin_t *bin, szind_t ind,
-    cache_bin_info_t *infos) {
-	void **ret = bin->cur_ptr.ptr + cache_bin_ncached_get(bin, ind, infos);
+cache_bin_empty_position_get(cache_bin_t *bin, cache_bin_info_t *info) {
+	void **ret = bin->cur_ptr.ptr + cache_bin_ncached_get(bin, info);
 	/* Low bits overflow disallowed when allocating the space. */
 	assert((uint32_t)(uintptr_t)ret >= bin->cur_ptr.lowbits);
 
 	/* Can also be computed via (full_position + ncached_max) | highbits. */
-	uintptr_t lowbits = bin->full_position + infos[ind].stack_size;
+	uintptr_t lowbits = bin->full_position + info->stack_size;
 	uintptr_t highbits = (uintptr_t)bin->cur_ptr.ptr &
 	    ~(((uint64_t)1 << 32) - 1);
 	assert(ret == (void **)(lowbits | highbits));
@@ -146,25 +145,24 @@ cache_bin_empty_position_get(cache_bin_t *bin, szind_t ind,
 
 /* Returns the numeric value of low water in [0, ncached]. */
 static inline cache_bin_sz_t
-cache_bin_low_water_get(cache_bin_t *bin, szind_t ind,
-    cache_bin_info_t *infos) {
-	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(&infos[ind]);
+cache_bin_low_water_get(cache_bin_t *bin, cache_bin_info_t *info) {
+	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(info);
 	cache_bin_sz_t low_water = ncached_max -
 	    (cache_bin_sz_t)((bin->low_water_position - bin->full_position) /
 	    sizeof(void *));
 	assert(low_water <= ncached_max);
-	assert(low_water <= cache_bin_ncached_get(bin, ind, infos));
+	assert(low_water <= cache_bin_ncached_get(bin, info));
 	assert(bin->low_water_position >= bin->cur_ptr.lowbits);
 
 	return low_water;
 }
 
 static inline void
-cache_bin_ncached_set(cache_bin_t *bin, szind_t ind, cache_bin_sz_t n,
-    cache_bin_info_t *infos) {
-	bin->cur_ptr.lowbits = bin->full_position + infos[ind].stack_size
+cache_bin_ncached_set(cache_bin_t *bin, cache_bin_sz_t n,
+    cache_bin_info_t *info) {
+	bin->cur_ptr.lowbits = bin->full_position + info->stack_size
 	    - n * sizeof(void *);
-	assert(n <= cache_bin_info_ncached_max(&infos[ind]));
+	assert(n <= cache_bin_info_ncached_max(info));
 	assert(n == 0 || *bin->cur_ptr.ptr != NULL);
 }
 
@@ -176,11 +174,9 @@ cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
 	descriptor->bins_large = bins_large;
 }
 
-#define INVALID_SZIND ((szind_t)(unsigned)-1)
-
 JEMALLOC_ALWAYS_INLINE void *
-cache_bin_alloc_easy_impl(cache_bin_t *bin, bool *success, szind_t ind,
-    cache_bin_info_t *infos, const bool adjust_low_water) {
+cache_bin_alloc_easy_impl(cache_bin_t *bin, bool *success,
+    cache_bin_info_t *info, const bool adjust_low_water) {
 	/*
 	 * This may read from the empty position; however the loaded value won't
 	 * be used.  It's safe because the stack has one more slot reserved.
@@ -194,9 +190,8 @@ cache_bin_alloc_easy_impl(cache_bin_t *bin, bool *success, szind_t ind,
 	 */
 	if (unlikely(bin->cur_ptr.lowbits > bin->low_water_position)) {
 		if (adjust_low_water) {
-			assert(ind != INVALID_SZIND);
 			uint32_t empty_position = bin->full_position +
-			    infos[ind].stack_size;
+			    info->stack_size;
 			if (unlikely(bin->cur_ptr.lowbits > empty_position)) {
 				/* Over-allocated; revert. */
 				bin->cur_ptr.ptr--;
@@ -206,7 +201,6 @@ cache_bin_alloc_easy_impl(cache_bin_t *bin, bool *success, szind_t ind,
 			}
 			bin->low_water_position = bin->cur_ptr.lowbits;
 		} else {
-			assert(ind == INVALID_SZIND);
 			bin->cur_ptr.ptr--;
 			assert(bin->cur_ptr.lowbits == bin->low_water_position);
 			*success = false;
@@ -228,19 +222,15 @@ cache_bin_alloc_easy_impl(cache_bin_t *bin, bool *success, szind_t ind,
 
 JEMALLOC_ALWAYS_INLINE void *
 cache_bin_alloc_easy_reduced(cache_bin_t *bin, bool *success) {
-	/* The szind parameter won't be used. */
-	return cache_bin_alloc_easy_impl(bin, success, INVALID_SZIND,
-	    /* infos */ NULL, false);
+	/* We don't look at info if we're not adjusting low-water. */
+	return cache_bin_alloc_easy_impl(bin, success, NULL, false);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-cache_bin_alloc_easy(cache_bin_t *bin, bool *success, szind_t ind,
-    cache_bin_info_t *infos) {
-	return cache_bin_alloc_easy_impl(bin, success, ind, infos, true);
+cache_bin_alloc_easy(cache_bin_t *bin, bool *success, cache_bin_info_t *info) {
+	return cache_bin_alloc_easy_impl(bin, success, info, true);
 }
 
-#undef INVALID_SZIND
-
 JEMALLOC_ALWAYS_INLINE bool
 cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) {
 	if (unlikely(bin->cur_ptr.lowbits == bin->full_position)) {
@@ -265,17 +255,17 @@ struct cache_bin_ptr_array_s {
 
 static inline void
 cache_bin_ptr_array_init_for_flush(cache_bin_ptr_array_t *arr, cache_bin_t *bin,
-    cache_bin_sz_t nflush, szind_t ind, cache_bin_info_t *infos) {
-	arr->ptr = cache_bin_empty_position_get(bin, ind, infos) - 1;
-	assert(cache_bin_ncached_get(bin, ind, infos) == 0
+    cache_bin_sz_t nflush, cache_bin_info_t *info) {
+	arr->ptr = cache_bin_empty_position_get(bin, info) - 1;
+	assert(cache_bin_ncached_get(bin, info) == 0
 	    || *arr->ptr != NULL);
 }
 
 static inline void
 cache_bin_ptr_array_init_for_fill(cache_bin_ptr_array_t *arr, cache_bin_t *bin,
-    cache_bin_sz_t nfill, szind_t ind, cache_bin_info_t *infos) {
-	arr->ptr = cache_bin_empty_position_get(bin, ind, infos) - nfill;
-	assert(cache_bin_ncached_get(bin, ind, infos) == 0);
+    cache_bin_sz_t nfill, cache_bin_info_t *info) {
+	arr->ptr = cache_bin_empty_position_get(bin, info) - nfill;
+	assert(cache_bin_ncached_get(bin, info) == 0);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
@@ -290,15 +280,14 @@ cache_bin_ptr_array_set(cache_bin_ptr_array_t *arr, cache_bin_sz_t n, void *p) {
 
 static inline void
 cache_bin_fill_from_ptr_array(cache_bin_t *bin, cache_bin_ptr_array_t *arr,
-    szind_t ind, szind_t nfilled, cache_bin_info_t *infos) {
-	assert(cache_bin_ncached_get(bin, ind, infos) == 0);
+    szind_t nfilled, cache_bin_info_t *info) {
+	assert(cache_bin_ncached_get(bin, info) == 0);
 	if (nfilled < arr->n) {
-		void **empty_position = cache_bin_empty_position_get(bin, ind,
-		    infos);
+		void **empty_position = cache_bin_empty_position_get(bin, info);
 		memmove(empty_position - nfilled, empty_position - arr->n,
 		    nfilled * sizeof(void *));
 	}
-	cache_bin_ncached_set(bin, ind, nfilled, infos);
+	cache_bin_ncached_set(bin, nfilled, info);
 }
 
 #endif /* JEMALLOC_INTERNAL_CACHE_BIN_H */
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 28d6e3c8..1b157ba3 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -36,8 +36,8 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
 
 	assert(binind < SC_NBINS);
 	bin = tcache_small_bin_get(tcache, binind);
-	ret = cache_bin_alloc_easy(bin, &tcache_success, binind,
-	    tcache_bin_info);
+	ret = cache_bin_alloc_easy(bin, &tcache_success,
+	    &tcache_bin_info[binind]);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
 		bool tcache_hard_success;
@@ -80,8 +80,8 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 
 	assert(binind >= SC_NBINS &&binind < nhbins);
 	bin = tcache_large_bin_get(tcache, binind);
-	ret = cache_bin_alloc_easy(bin, &tcache_success, binind,
-	    tcache_bin_info);
+	ret = cache_bin_alloc_easy(bin, &tcache_success,
+	    &tcache_bin_info[binind]);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
 		/*
diff --git a/src/arena.c b/src/arena.c
index 6b5f1d31..ee357d7f 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -200,14 +200,14 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		for (szind_t i = 0; i < SC_NBINS; i++) {
 			cache_bin_t *tbin = &descriptor->bins_small[i];
 			arena_stats_accum_zu(&astats->tcache_bytes,
-			    cache_bin_ncached_get(tbin, i, tcache_bin_info)
+			    cache_bin_ncached_get(tbin, &tcache_bin_info[i])
 			    * sz_index2size(i));
 		}
 		for (szind_t i = 0; i < nhbins - SC_NBINS; i++) {
 			cache_bin_t *tbin = &descriptor->bins_large[i];
 			arena_stats_accum_zu(&astats->tcache_bytes,
-			    cache_bin_ncached_get(tbin, i + SC_NBINS,
-			    tcache_bin_info) * sz_index2size(i));
+			    cache_bin_ncached_get(tbin,
+			    &tcache_bin_info[i + SC_NBINS]) * sz_index2size(i));
 		}
 	}
 	malloc_mutex_prof_read(tsdn,
@@ -1321,7 +1321,7 @@ arena_bin_choose_lock(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 void
 arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     cache_bin_t *tbin, szind_t binind) {
-	assert(cache_bin_ncached_get(tbin, binind, tcache_bin_info) == 0);
+	assert(cache_bin_ncached_get(tbin, &tcache_bin_info[binind]) == 0);
 	tcache->bin_refilled[binind] = true;
 
 	const bin_info_t *bin_info = &bin_infos[binind];
@@ -1329,8 +1329,8 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 	    &tcache_bin_info[binind]) >> tcache->lg_fill_div[binind];
 
 	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nfill);
-	cache_bin_ptr_array_init_for_fill(&ptrs, tbin, nfill, binind,
-	    tcache_bin_info);
+	cache_bin_ptr_array_init_for_fill(&ptrs, tbin, nfill,
+	    &tcache_bin_info[binind]);
 
 	/*
 	 * Bin-local resources are used first: 1) bin->slabcur, and 2) nonfull
@@ -1443,8 +1443,8 @@ label_refill:
 		fresh_slab = NULL;
 	}
 
-	cache_bin_fill_from_ptr_array(tbin, &ptrs, binind, filled,
-	    tcache_bin_info);
+	cache_bin_fill_from_ptr_array(tbin, &ptrs, filled,
+	    &tcache_bin_info[binind]);
 	arena_decay_tick(tsdn, arena);
 }
 
diff --git a/src/tcache.c b/src/tcache.c
index 3fc4ee6a..b2d46c3f 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -59,10 +59,10 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 		is_small = false;
 	}
 
-	cache_bin_sz_t low_water = cache_bin_low_water_get(tbin, binind,
-	    tcache_bin_info);
-	cache_bin_sz_t ncached = cache_bin_ncached_get(tbin, binind,
-	    tcache_bin_info);
+	cache_bin_sz_t low_water = cache_bin_low_water_get(tbin,
+	    &tcache_bin_info[binind]);
+	cache_bin_sz_t ncached = cache_bin_ncached_get(tbin,
+	    &tcache_bin_info[binind]);
 	if (low_water > 0) {
 		/*
 		 * Flush (ceiling) 3/4 of the objects below the low water mark.
@@ -110,8 +110,8 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 
 	assert(tcache->arena != NULL);
 	arena_tcache_fill_small(tsdn, arena, tcache, tbin, binind);
-	ret = cache_bin_alloc_easy(tbin, tcache_success, binind,
-	    tcache_bin_info);
+	ret = cache_bin_alloc_easy(tbin, tcache_success,
+	    &tcache_bin_info[binind]);
 
 	return ret;
 }
@@ -168,8 +168,8 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	} else {
 		assert(binind < nhbins);
 	}
-	cache_bin_sz_t ncached = cache_bin_ncached_get(tbin, binind,
-	    tcache_bin_info);
+	cache_bin_sz_t ncached = cache_bin_ncached_get(tbin,
+	    &tcache_bin_info[binind]);
 	assert((cache_bin_sz_t)rem <= ncached);
 	arena_t *tcache_arena = tcache->arena;
 	assert(tcache_arena != NULL);
@@ -182,8 +182,8 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	VARIABLE_ARRAY(edata_t *, item_edata, nflush + 1);
 	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nflush);
 
-	cache_bin_ptr_array_init_for_flush(&ptrs, tbin, nflush, binind,
-	    tcache_bin_info);
+	cache_bin_ptr_array_init_for_flush(&ptrs, tbin, nflush,
+	    &tcache_bin_info[binind]);
 
 	/* Look up edata once per item. */
 	if (config_opt_safety_checks) {
@@ -348,7 +348,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 
 	memmove(tbin->cur_ptr.ptr + (ncached - rem), tbin->cur_ptr.ptr, rem *
 	    sizeof(void *));
-	cache_bin_ncached_set(tbin, binind, rem, tcache_bin_info);
+	cache_bin_ncached_set(tbin, rem, &tcache_bin_info[binind]);
 	if (tbin->cur_ptr.lowbits > tbin->low_water_position) {
 		tbin->low_water_position = tbin->cur_ptr.lowbits;
 	}
@@ -453,8 +453,8 @@ tcache_bin_init(cache_bin_t *bin, szind_t ind, uintptr_t *stack_cur) {
 	bin->low_water_position = bin->cur_ptr.lowbits;
 	bin->full_position = (uint32_t)(uintptr_t)full_position;
 	assert(bin->cur_ptr.lowbits - bin->full_position == bin_stack_size);
-	assert(cache_bin_ncached_get(bin, ind, tcache_bin_info) == 0);
-	assert(cache_bin_empty_position_get(bin, ind, tcache_bin_info)
+	assert(cache_bin_ncached_get(bin, &tcache_bin_info[ind]) == 0);
+	assert(cache_bin_empty_position_get(bin, &tcache_bin_info[ind])
 	    == empty_position);
 
 	return false;
@@ -614,8 +614,8 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
 	if (tsd_tcache) {
 		/* Release the avail array for the TSD embedded auto tcache. */
 		cache_bin_t *bin = tcache_small_bin_get(tcache, 0);
-		assert(cache_bin_ncached_get(bin, 0, tcache_bin_info) == 0);
-		assert(cache_bin_empty_position_get(bin, 0, tcache_bin_info) ==
+		assert(cache_bin_ncached_get(bin, &tcache_bin_info[0]) == 0);
+		assert(cache_bin_empty_position_get(bin, &tcache_bin_info[0]) ==
 		    bin->cur_ptr.ptr);
 		void *avail_array = (void *)((uintptr_t)bin->cur_ptr.ptr -
 		    tcache_bin_info[0].stack_size);
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index ab36a3a1..a019ae73 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -16,45 +16,45 @@ TEST_BEGIN(test_cache_bin) {
 	bin->cur_ptr.ptr = empty_position;
 	bin->low_water_position = bin->cur_ptr.lowbits;
 	bin->full_position = (uint32_t)(uintptr_t)stack;
-	expect_ptr_eq(cache_bin_empty_position_get(bin, 0, tcache_bin_info),
+	expect_ptr_eq(cache_bin_empty_position_get(bin, &tcache_bin_info[0]),
 	    empty_position, "Incorrect empty position");
 	/* Not using expect_zu etc on cache_bin_sz_t since it may change. */
-	expect_true(cache_bin_ncached_get(bin, 0, tcache_bin_info) == 0,
+	expect_true(cache_bin_ncached_get(bin, &tcache_bin_info[0]) == 0,
 	    "Incorrect cache size");
 
 	bool success;
-	void *ret = cache_bin_alloc_easy(bin, &success, 0, tcache_bin_info);
+	void *ret = cache_bin_alloc_easy(bin, &success, &tcache_bin_info[0]);
 	expect_false(success, "Empty cache bin should not alloc");
-	expect_true(cache_bin_low_water_get(bin, 0, tcache_bin_info) == 0,
+	expect_true(cache_bin_low_water_get(bin, &tcache_bin_info[0]) == 0,
 	    "Incorrect low water mark");
 
-	cache_bin_ncached_set(bin, 0, 0, tcache_bin_info);
+	cache_bin_ncached_set(bin, 0, &tcache_bin_info[0]);
 	expect_ptr_eq(bin->cur_ptr.ptr, empty_position, "Bin should be empty");
 	for (cache_bin_sz_t i = 1; i < ncached_max + 1; i++) {
 		success = cache_bin_dalloc_easy(bin, (void *)(uintptr_t)i);
-		expect_true(success && cache_bin_ncached_get(bin, 0,
-		    tcache_bin_info) == i, "Bin dalloc failure");
+		expect_true(success && cache_bin_ncached_get(bin,
+		    &tcache_bin_info[0]) == i, "Bin dalloc failure");
 	}
 	success = cache_bin_dalloc_easy(bin, (void *)1);
 	expect_false(success, "Bin should be full");
 	expect_ptr_eq(bin->cur_ptr.ptr, stack, "Incorrect bin cur_ptr");
 
-	cache_bin_ncached_set(bin, 0, ncached_max, tcache_bin_info);
+	cache_bin_ncached_set(bin, ncached_max, &tcache_bin_info[0]);
 	expect_ptr_eq(bin->cur_ptr.ptr, stack, "cur_ptr should not change");
 	/* Emulate low water after refill. */
 	bin->low_water_position = bin->full_position;
 	for (cache_bin_sz_t i = ncached_max; i > 0; i--) {
-		ret = cache_bin_alloc_easy(bin, &success, 0, tcache_bin_info);
-		cache_bin_sz_t ncached = cache_bin_ncached_get(bin, 0,
-		    tcache_bin_info);
+		ret = cache_bin_alloc_easy(bin, &success, &tcache_bin_info[0]);
+		cache_bin_sz_t ncached = cache_bin_ncached_get(bin,
+		    &tcache_bin_info[0]);
 		expect_true(success && ncached == i - 1,
 		    "Cache bin alloc failure");
 		expect_ptr_eq(ret, (void *)(uintptr_t)i, "Bin alloc failure");
-		expect_true(cache_bin_low_water_get(bin, 0, tcache_bin_info)
+		expect_true(cache_bin_low_water_get(bin, &tcache_bin_info[0])
 		    == ncached, "Incorrect low water mark");
 	}
 
-	ret = cache_bin_alloc_easy(bin, &success, 0, tcache_bin_info);
+	ret = cache_bin_alloc_easy(bin, &success, &tcache_bin_info[0]);
 	expect_false(success, "Empty cache bin should not alloc.");
 	expect_ptr_eq(bin->cur_ptr.ptr, stack + ncached_max,
 	    "Bin should be empty");

From ff6acc6ed503f9808efd74f9aca70ee201d9e87a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 28 Feb 2020 19:12:07 -0800
Subject: [PATCH 1578/2608] Cache bin: simplify names and argument ordering.

We always start with the cache bin, then its info (if necessary).
---
 include/jemalloc/internal/cache_bin.h      | 63 ++++++++++++----------
 include/jemalloc/internal/tcache_inlines.h |  8 +--
 src/arena.c                                |  8 ++-
 src/tcache.c                               | 10 ++--
 test/unit/cache_bin.c                      | 10 ++--
 5 files changed, 51 insertions(+), 48 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index bae669d1..6895dca2 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -158,8 +158,8 @@ cache_bin_low_water_get(cache_bin_t *bin, cache_bin_info_t *info) {
 }
 
 static inline void
-cache_bin_ncached_set(cache_bin_t *bin, cache_bin_sz_t n,
-    cache_bin_info_t *info) {
+cache_bin_ncached_set(cache_bin_t *bin, cache_bin_info_t *info,
+    cache_bin_sz_t n) {
 	bin->cur_ptr.lowbits = bin->full_position + info->stack_size
 	    - n * sizeof(void *);
 	assert(n <= cache_bin_info_ncached_max(info));
@@ -175,8 +175,8 @@ cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-cache_bin_alloc_easy_impl(cache_bin_t *bin, bool *success,
-    cache_bin_info_t *info, const bool adjust_low_water) {
+cache_bin_alloc_easy_impl(cache_bin_t *bin, cache_bin_info_t *info,
+    bool *success, const bool adjust_low_water) {
 	/*
 	 * This may read from the empty position; however the loaded value won't
 	 * be used.  It's safe because the stack has one more slot reserved.
@@ -185,7 +185,7 @@ cache_bin_alloc_easy_impl(cache_bin_t *bin, bool *success,
 	/*
 	 * Check for both bin->ncached == 0 and ncached < low_water in a single
 	 * branch.  When adjust_low_water is true, this also avoids accessing
-	 * the cache_bin_info_ts (which is on a separate cacheline / page) in
+	 * the cache_bin_info_t (which is on a separate cacheline / page) in
 	 * the common case.
 	 */
 	if (unlikely(bin->cur_ptr.lowbits > bin->low_water_position)) {
@@ -223,12 +223,12 @@ cache_bin_alloc_easy_impl(cache_bin_t *bin, bool *success,
 JEMALLOC_ALWAYS_INLINE void *
 cache_bin_alloc_easy_reduced(cache_bin_t *bin, bool *success) {
 	/* We don't look at info if we're not adjusting low-water. */
-	return cache_bin_alloc_easy_impl(bin, success, NULL, false);
+	return cache_bin_alloc_easy_impl(bin, NULL, success, false);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-cache_bin_alloc_easy(cache_bin_t *bin, bool *success, cache_bin_info_t *info) {
-	return cache_bin_alloc_easy_impl(bin, success, info, true);
+cache_bin_alloc_easy(cache_bin_t *bin, cache_bin_info_t *info, bool *success) {
+	return cache_bin_alloc_easy_impl(bin, info, success, true);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -254,18 +254,35 @@ struct cache_bin_ptr_array_s {
     name.n = (nval)
 
 static inline void
-cache_bin_ptr_array_init_for_flush(cache_bin_ptr_array_t *arr, cache_bin_t *bin,
-    cache_bin_sz_t nflush, cache_bin_info_t *info) {
-	arr->ptr = cache_bin_empty_position_get(bin, info) - 1;
-	assert(cache_bin_ncached_get(bin, info) == 0
-	    || *arr->ptr != NULL);
+cache_bin_init_ptr_array_for_fill(cache_bin_t *bin, cache_bin_info_t *info,
+    cache_bin_ptr_array_t *arr, cache_bin_sz_t nfill) {
+	arr->ptr = cache_bin_empty_position_get(bin, info) - nfill;
+	assert(cache_bin_ncached_get(bin, info) == 0);
+}
+
+/*
+ * While nfill in cache_bin_init_ptr_array_for_fill is the number we *intend* to
+ * fill, nfilled here is the number we actually filled (which may be less, in
+ * case of OOM.
+ */
+static inline void
+cache_bin_finish_fill(cache_bin_t *bin, cache_bin_info_t *info,
+    cache_bin_ptr_array_t *arr, szind_t nfilled) {
+	assert(cache_bin_ncached_get(bin, info) == 0);
+	if (nfilled < arr->n) {
+		void **empty_position = cache_bin_empty_position_get(bin, info);
+		memmove(empty_position - nfilled, empty_position - arr->n,
+		    nfilled * sizeof(void *));
+	}
+	cache_bin_ncached_set(bin, info, nfilled);
 }
 
 static inline void
-cache_bin_ptr_array_init_for_fill(cache_bin_ptr_array_t *arr, cache_bin_t *bin,
-    cache_bin_sz_t nfill, cache_bin_info_t *info) {
-	arr->ptr = cache_bin_empty_position_get(bin, info) - nfill;
-	assert(cache_bin_ncached_get(bin, info) == 0);
+cache_bin_init_ptr_array_for_flush(cache_bin_t *bin, cache_bin_info_t *info,
+    cache_bin_ptr_array_t *arr, cache_bin_sz_t nflush) {
+	arr->ptr = cache_bin_empty_position_get(bin, info) - 1;
+	assert(cache_bin_ncached_get(bin, info) == 0
+	    || *arr->ptr != NULL);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
@@ -278,16 +295,4 @@ cache_bin_ptr_array_set(cache_bin_ptr_array_t *arr, cache_bin_sz_t n, void *p) {
 	*(arr->ptr - n) = p;
 }
 
-static inline void
-cache_bin_fill_from_ptr_array(cache_bin_t *bin, cache_bin_ptr_array_t *arr,
-    szind_t nfilled, cache_bin_info_t *info) {
-	assert(cache_bin_ncached_get(bin, info) == 0);
-	if (nfilled < arr->n) {
-		void **empty_position = cache_bin_empty_position_get(bin, info);
-		memmove(empty_position - nfilled, empty_position - arr->n,
-		    nfilled * sizeof(void *));
-	}
-	cache_bin_ncached_set(bin, nfilled, info);
-}
-
 #endif /* JEMALLOC_INTERNAL_CACHE_BIN_H */
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 1b157ba3..2d31ad0e 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -36,8 +36,8 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
 
 	assert(binind < SC_NBINS);
 	bin = tcache_small_bin_get(tcache, binind);
-	ret = cache_bin_alloc_easy(bin, &tcache_success,
-	    &tcache_bin_info[binind]);
+	ret = cache_bin_alloc_easy(bin, &tcache_bin_info[binind],
+	    &tcache_success);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
 		bool tcache_hard_success;
@@ -80,8 +80,8 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 
 	assert(binind >= SC_NBINS &&binind < nhbins);
 	bin = tcache_large_bin_get(tcache, binind);
-	ret = cache_bin_alloc_easy(bin, &tcache_success,
-	    &tcache_bin_info[binind]);
+	ret = cache_bin_alloc_easy(bin, &tcache_bin_info[binind],
+	    &tcache_success);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
 		/*
diff --git a/src/arena.c b/src/arena.c
index ee357d7f..7f7c27fb 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1329,9 +1329,8 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 	    &tcache_bin_info[binind]) >> tcache->lg_fill_div[binind];
 
 	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nfill);
-	cache_bin_ptr_array_init_for_fill(&ptrs, tbin, nfill,
-	    &tcache_bin_info[binind]);
-
+	cache_bin_init_ptr_array_for_fill(tbin, &tcache_bin_info[binind], &ptrs,
+	    nfill);
 	/*
 	 * Bin-local resources are used first: 1) bin->slabcur, and 2) nonfull
 	 * slabs.  After both are exhausted, new slabs will be allocated through
@@ -1443,8 +1442,7 @@ label_refill:
 		fresh_slab = NULL;
 	}
 
-	cache_bin_fill_from_ptr_array(tbin, &ptrs, filled,
-	    &tcache_bin_info[binind]);
+	cache_bin_finish_fill(tbin, &tcache_bin_info[binind], &ptrs, filled);
 	arena_decay_tick(tsdn, arena);
 }
 
diff --git a/src/tcache.c b/src/tcache.c
index b2d46c3f..3c6d5d76 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -110,8 +110,8 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 
 	assert(tcache->arena != NULL);
 	arena_tcache_fill_small(tsdn, arena, tcache, tbin, binind);
-	ret = cache_bin_alloc_easy(tbin, tcache_success,
-	    &tcache_bin_info[binind]);
+	ret = cache_bin_alloc_easy(tbin, &tcache_bin_info[binind],
+	    tcache_success);
 
 	return ret;
 }
@@ -182,8 +182,8 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	VARIABLE_ARRAY(edata_t *, item_edata, nflush + 1);
 	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nflush);
 
-	cache_bin_ptr_array_init_for_flush(&ptrs, tbin, nflush,
-	    &tcache_bin_info[binind]);
+	cache_bin_init_ptr_array_for_flush(tbin, &tcache_bin_info[binind],
+	    &ptrs, nflush);
 
 	/* Look up edata once per item. */
 	if (config_opt_safety_checks) {
@@ -348,7 +348,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 
 	memmove(tbin->cur_ptr.ptr + (ncached - rem), tbin->cur_ptr.ptr, rem *
 	    sizeof(void *));
-	cache_bin_ncached_set(tbin, rem, &tcache_bin_info[binind]);
+	cache_bin_ncached_set(tbin, &tcache_bin_info[binind], rem);
 	if (tbin->cur_ptr.lowbits > tbin->low_water_position) {
 		tbin->low_water_position = tbin->cur_ptr.lowbits;
 	}
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index a019ae73..37ebd303 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -23,12 +23,12 @@ TEST_BEGIN(test_cache_bin) {
 	    "Incorrect cache size");
 
 	bool success;
-	void *ret = cache_bin_alloc_easy(bin, &success, &tcache_bin_info[0]);
+	void *ret = cache_bin_alloc_easy(bin, &tcache_bin_info[0], &success);
 	expect_false(success, "Empty cache bin should not alloc");
 	expect_true(cache_bin_low_water_get(bin, &tcache_bin_info[0]) == 0,
 	    "Incorrect low water mark");
 
-	cache_bin_ncached_set(bin, 0, &tcache_bin_info[0]);
+	cache_bin_ncached_set(bin, &tcache_bin_info[0], 0);
 	expect_ptr_eq(bin->cur_ptr.ptr, empty_position, "Bin should be empty");
 	for (cache_bin_sz_t i = 1; i < ncached_max + 1; i++) {
 		success = cache_bin_dalloc_easy(bin, (void *)(uintptr_t)i);
@@ -39,12 +39,12 @@ TEST_BEGIN(test_cache_bin) {
 	expect_false(success, "Bin should be full");
 	expect_ptr_eq(bin->cur_ptr.ptr, stack, "Incorrect bin cur_ptr");
 
-	cache_bin_ncached_set(bin, ncached_max, &tcache_bin_info[0]);
+	cache_bin_ncached_set(bin, &tcache_bin_info[0], ncached_max);
 	expect_ptr_eq(bin->cur_ptr.ptr, stack, "cur_ptr should not change");
 	/* Emulate low water after refill. */
 	bin->low_water_position = bin->full_position;
 	for (cache_bin_sz_t i = ncached_max; i > 0; i--) {
-		ret = cache_bin_alloc_easy(bin, &success, &tcache_bin_info[0]);
+		ret = cache_bin_alloc_easy(bin, &tcache_bin_info[0], &success);
 		cache_bin_sz_t ncached = cache_bin_ncached_get(bin,
 		    &tcache_bin_info[0]);
 		expect_true(success && ncached == i - 1,
@@ -54,7 +54,7 @@ TEST_BEGIN(test_cache_bin) {
 		    == ncached, "Incorrect low water mark");
 	}
 
-	ret = cache_bin_alloc_easy(bin, &success, &tcache_bin_info[0]);
+	ret = cache_bin_alloc_easy(bin, &tcache_bin_info[0], &success);
 	expect_false(success, "Empty cache bin should not alloc.");
 	expect_ptr_eq(bin->cur_ptr.ptr, stack + ncached_max,
 	    "Bin should be empty");

From 44529da8525ef811ea8cc7704ffa9910459656ce Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sat, 29 Feb 2020 10:48:59 -0800
Subject: [PATCH 1579/2608] Cache-bin: Make flush modifications internal

I.e. the tcache code just calls a cache-bin function to finish flush (and move
pointers around, etc.).  It doesn't directly access the cache-bin's owned memory
any more.
---
 include/jemalloc/internal/cache_bin.h | 18 +++++++++++++++++-
 src/tcache.c                          |  8 ++------
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 6895dca2..382883cd 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -267,7 +267,7 @@ cache_bin_init_ptr_array_for_fill(cache_bin_t *bin, cache_bin_info_t *info,
  */
 static inline void
 cache_bin_finish_fill(cache_bin_t *bin, cache_bin_info_t *info,
-    cache_bin_ptr_array_t *arr, szind_t nfilled) {
+    cache_bin_ptr_array_t *arr, cache_bin_sz_t nfilled) {
 	assert(cache_bin_ncached_get(bin, info) == 0);
 	if (nfilled < arr->n) {
 		void **empty_position = cache_bin_empty_position_get(bin, info);
@@ -285,6 +285,10 @@ cache_bin_init_ptr_array_for_flush(cache_bin_t *bin, cache_bin_info_t *info,
 	    || *arr->ptr != NULL);
 }
 
+/*
+ * These accessors are used by the flush pathways -- they reverse ordinary flush
+ * ordering.
+ */
 JEMALLOC_ALWAYS_INLINE void *
 cache_bin_ptr_array_get(cache_bin_ptr_array_t *arr, cache_bin_sz_t n) {
 	return *(arr->ptr - n);
@@ -295,4 +299,16 @@ cache_bin_ptr_array_set(cache_bin_ptr_array_t *arr, cache_bin_sz_t n, void *p) {
 	*(arr->ptr - n) = p;
 }
 
+static inline void
+cache_bin_finish_flush(cache_bin_t *bin, cache_bin_info_t *info,
+    cache_bin_ptr_array_t *arr, cache_bin_sz_t nflushed) {
+	unsigned rem = cache_bin_ncached_get(bin, info) - nflushed;
+	memmove(bin->cur_ptr.ptr + nflushed, bin->cur_ptr.ptr,
+	    rem * sizeof(void *));
+	cache_bin_ncached_set(bin, info, rem);
+	if (bin->cur_ptr.lowbits > bin->low_water_position) {
+		bin->low_water_position = bin->cur_ptr.lowbits;
+	}
+}
+
 #endif /* JEMALLOC_INTERNAL_CACHE_BIN_H */
diff --git a/src/tcache.c b/src/tcache.c
index 3c6d5d76..e7188585 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -346,12 +346,8 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 		}
 	}
 
-	memmove(tbin->cur_ptr.ptr + (ncached - rem), tbin->cur_ptr.ptr, rem *
-	    sizeof(void *));
-	cache_bin_ncached_set(tbin, &tcache_bin_info[binind], rem);
-	if (tbin->cur_ptr.lowbits > tbin->low_water_position) {
-		tbin->low_water_position = tbin->cur_ptr.lowbits;
-	}
+	cache_bin_finish_flush(tbin, &tcache_bin_info[binind], &ptrs,
+	    ncached - rem);
 }
 
 void

From 60113dfe3b0fe89df5b9661ce27754a5a96cb070 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sat, 29 Feb 2020 14:41:47 -0800
Subject: [PATCH 1580/2608] Cache bin: Move in initialization code.

---
 include/jemalloc/internal/cache_bin.h |  32 ++++++-
 src/cache_bin.c                       | 101 +++++++++++++++++++++
 src/tcache.c                          | 124 ++++++++------------------
 3 files changed, 170 insertions(+), 87 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 382883cd..6ab6baa7 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -14,7 +14,10 @@
  * of the tcache at all.
  */
 
-/* The size in bytes of each cache bin stack. */
+/*
+ * The size in bytes of each cache bin stack.  We also use this to indicate
+ * *counts* of individual objects.
+ */
 typedef uint16_t cache_bin_sz_t;
 
 typedef struct cache_bin_stats_s cache_bin_stats_t;
@@ -311,4 +314,31 @@ cache_bin_finish_flush(cache_bin_t *bin, cache_bin_info_t *info,
 	}
 }
 
+/*
+ * Initialize a cache_bin_info to represent up to the given number of items in
+ * the cache_bins it is associated with.
+ */
+void cache_bin_info_init(cache_bin_info_t *bin_info,
+    cache_bin_sz_t ncached_max);
+/*
+ * Given an array of initialized cache_bin_info_ts, determine how big an
+ * allocation is required to initialize a full set of cache_bin_ts.
+ */
+void cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos,
+    size_t *size, size_t *alignment);
+
+/*
+ * Actually initialize some cache bins.  Callers should allocate the backing
+ * memory indicated by a call to cache_bin_compute_alloc.  They should then
+ * preincrement, call init once for each bin and info, and then call
+ * cache_bin_postincrement.  *alloc_cur will then point immediately past the end
+ * of the allocation.
+ */
+void cache_bin_preincrement(cache_bin_info_t *infos, szind_t ninfos,
+    void *alloc, size_t *cur_offset);
+void cache_bin_postincrement(cache_bin_info_t *infos, szind_t ninfos,
+    void *alloc, size_t *cur_offset);
+void cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
+    size_t *cur_offset);
+
 #endif /* JEMALLOC_INTERNAL_CACHE_BIN_H */
diff --git a/src/cache_bin.c b/src/cache_bin.c
index 454cb475..260c1b77 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -1,3 +1,104 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/bit_util.h"
+
+void
+cache_bin_info_init(cache_bin_info_t *info,
+    cache_bin_sz_t ncached_max) {
+	size_t stack_size = (size_t)ncached_max * sizeof(void *);
+	assert(stack_size < ((size_t)1 << (sizeof(cache_bin_sz_t) * 8)));
+	info->stack_size = (cache_bin_sz_t)stack_size;
+}
+
+void
+cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos,
+    size_t *size, size_t *alignment) {
+	/* For the total bin stack region (per tcache), reserve 2 more slots so
+	 * that
+	 * 1) the empty position can be safely read on the fast path before
+	 *    checking "is_empty"; and
+	 * 2) the cur_ptr can go beyond the empty position by 1 step safely on
+	 * the fast path (i.e. no overflow).
+	 */
+	*size = sizeof(void *) * 2;
+	for (szind_t i = 0; i < ninfos; i++) {
+		*size += infos[i].stack_size;
+	}
+
+	/*
+	 * 1) Align to at least PAGE, to minimize the # of TLBs needed by the
+	 * smaller sizes; also helps if the larger sizes don't get used at all.
+	 * 2) On 32-bit the pointers won't be compressed; use minimal alignment.
+	 */
+	if (LG_SIZEOF_PTR < 3 || *size < PAGE) {
+		*alignment = PAGE;
+	} else {
+		/*
+		 * Align pow2 to avoid overflow the cache bin compressed
+		 * pointers.
+		 */
+		*alignment = pow2_ceil_zu(*size);
+	}
+}
+
+void
+cache_bin_preincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
+    size_t *cur_offset) {
+	if (config_debug) {
+		size_t computed_size;
+		size_t computed_alignment;
+
+		/* Pointer should be as aligned as we asked for. */
+		cache_bin_info_compute_alloc(infos, ninfos, &computed_size,
+		    &computed_alignment);
+		assert(((uintptr_t)alloc & (computed_alignment - 1)) == 0);
+
+		/* And that alignment should disallow overflow. */
+		uint32_t lowbits = (uint32_t)((uintptr_t)alloc + computed_size);
+		assert((uint32_t)(uintptr_t)alloc < lowbits);
+	}
+	/*
+	 * Leave a noticeable mark pattern on the boundaries, in case a bug
+	 * starts leaking those.  Make it look like the junk pattern but be
+	 * distinct from it.
+	 */
+	uintptr_t preceding_ptr_junk = (uintptr_t)0x7a7a7a7a7a7a7a7aULL;
+	*(uintptr_t *)((uintptr_t)alloc + *cur_offset) = preceding_ptr_junk;
+	*cur_offset += sizeof(void *);
+}
+
+void
+cache_bin_postincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
+    size_t *cur_offset) {
+	/* Note: a7 vs. 7a above -- this tells you which pointer leaked. */
+	uintptr_t trailing_ptr_junk = (uintptr_t)0xa7a7a7a7a7a7a7a7ULL;
+	*(uintptr_t *)((uintptr_t)alloc + *cur_offset) = trailing_ptr_junk;
+	*cur_offset += sizeof(void *);
+}
+
+
+void
+cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
+    size_t *cur_offset) {
+	assert(sizeof(bin->cur_ptr) == sizeof(void *));
+	/*
+	 * The full_position points to the lowest available space.  Allocations
+	 * will access the slots toward higher addresses (for the benefit of
+	 * adjacent prefetch).
+	 */
+	void *stack_cur = (void *)((uintptr_t)alloc + *cur_offset);
+	void *full_position = stack_cur;
+	uint32_t bin_stack_size = info->stack_size;
+
+	*cur_offset += bin_stack_size;
+	void *empty_position = (void *)((uintptr_t)alloc + *cur_offset);
+
+	/* Init to the empty position. */
+	bin->cur_ptr.ptr = empty_position;
+	bin->low_water_position = bin->cur_ptr.lowbits;
+	bin->full_position = (uint32_t)(uintptr_t)full_position;
+	assert(bin->cur_ptr.lowbits - bin->full_position == bin_stack_size);
+	assert(cache_bin_ncached_get(bin, info) == 0);
+	assert(cache_bin_empty_position_get(bin, info) == empty_position);
+}
diff --git a/src/tcache.c b/src/tcache.c
index e7188585..48f06b70 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -14,16 +14,10 @@ bool	opt_tcache = true;
 ssize_t	opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
 
 cache_bin_info_t	*tcache_bin_info;
-/*
- * For the total bin stack region (per tcache), reserve 2 more slots so that 1)
- * the empty position can be safely read on the fast path before checking
- * "is_empty"; and 2) the cur_ptr can go beyond the empty position by 1 step
- * safely on the fast path (i.e. no overflow).
- */
-static const unsigned total_stack_padding = sizeof(void *) * 2;
 
 /* Total stack size required (per tcache).  Include the padding above. */
-static uint32_t total_stack_bytes;
+static size_t tcache_bin_alloc_size;
+static size_t tcache_bin_alloc_alignment;
 
 unsigned		nhbins;
 size_t			tcache_maxclass;
@@ -430,43 +424,8 @@ tsd_tcache_enabled_data_init(tsd_t *tsd) {
 	return false;
 }
 
-static bool
-tcache_bin_init(cache_bin_t *bin, szind_t ind, uintptr_t *stack_cur) {
-	assert(sizeof(bin->cur_ptr) == sizeof(void *));
-	/*
-	 * The full_position points to the lowest available space.  Allocations
-	 * will access the slots toward higher addresses (for the benefit of
-	 * adjacent prefetch).
-	 */
-	void *full_position = (void *)*stack_cur;
-	uint32_t bin_stack_size = tcache_bin_info[ind].stack_size;
-
-	*stack_cur += bin_stack_size;
-	void *empty_position = (void *)*stack_cur;
-
-	/* Init to the empty position. */
-	bin->cur_ptr.ptr = empty_position;
-	bin->low_water_position = bin->cur_ptr.lowbits;
-	bin->full_position = (uint32_t)(uintptr_t)full_position;
-	assert(bin->cur_ptr.lowbits - bin->full_position == bin_stack_size);
-	assert(cache_bin_ncached_get(bin, &tcache_bin_info[ind]) == 0);
-	assert(cache_bin_empty_position_get(bin, &tcache_bin_info[ind])
-	    == empty_position);
-
-	return false;
-}
-
-/* Sanity check only. */
-static bool
-tcache_bin_lowbits_overflowable(void *ptr) {
-	uint32_t lowbits = (uint32_t)((uintptr_t)ptr + total_stack_bytes);
-	return lowbits < (uint32_t)(uintptr_t)ptr;
-}
-
 static void
 tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
-	assert(!tcache_bin_lowbits_overflowable(avail_stack));
-
 	memset(&tcache->link, 0, sizeof(ql_elm(tcache_t)));
 	tcache->next_gc_bin = 0;
 	tcache->arena = NULL;
@@ -476,35 +435,25 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
 	memset(tcache->bins_large, 0, sizeof(cache_bin_t) * (nhbins - SC_NBINS));
 
 	unsigned i = 0;
-	uintptr_t stack_cur = (uintptr_t)avail_stack;
+	size_t cur_offset = 0;
+	cache_bin_preincrement(tcache_bin_info, nhbins, avail_stack,
+	    &cur_offset);
 	for (; i < SC_NBINS; i++) {
 		tcache->lg_fill_div[i] = 1;
 		tcache->bin_refilled[i] = false;
 		cache_bin_t *bin = tcache_small_bin_get(tcache, i);
-		tcache_bin_init(bin, i, &stack_cur);
+		cache_bin_init(bin, &tcache_bin_info[i], avail_stack,
+		    &cur_offset);
 	}
 	for (; i < nhbins; i++) {
 		cache_bin_t *bin = tcache_large_bin_get(tcache, i);
-		tcache_bin_init(bin, i, &stack_cur);
+		cache_bin_init(bin, &tcache_bin_info[i], avail_stack,
+		    &cur_offset);
 	}
-
+	cache_bin_postincrement(tcache_bin_info, nhbins, avail_stack,
+	    &cur_offset);
 	/* Sanity check that the whole stack is used. */
-	size_t stack_offset = stack_cur - (uintptr_t)avail_stack;
-	assert(stack_offset + total_stack_padding == total_stack_bytes);
-}
-
-static size_t
-tcache_bin_stack_alignment (size_t size) {
-	/*
-	 * 1) Align to at least PAGE, to minimize the # of TLBs needed by the
-	 * smaller sizes; also helps if the larger sizes don't get used at all.
-	 * 2) On 32-bit the pointers won't be compressed; use minimal alignment.
-	 */
-	if (LG_SIZEOF_PTR < 3 || size < PAGE) {
-		return PAGE;
-	}
-	/* Align pow2 to avoid overflow the cache bin compressed pointers. */
-	return pow2_ceil_zu(size);
+	assert(cur_offset == tcache_bin_alloc_size);
 }
 
 /* Initialize auto tcache (embedded in TSD). */
@@ -512,8 +461,8 @@ bool
 tsd_tcache_data_init(tsd_t *tsd) {
 	tcache_t *tcache = tsd_tcachep_get_unsafe(tsd);
 	assert(tcache_small_bin_get(tcache, 0)->cur_ptr.ptr == NULL);
-	size_t alignment = tcache_bin_stack_alignment(total_stack_bytes);
-	size_t size = sz_sa2u(total_stack_bytes, alignment);
+	size_t alignment = tcache_bin_alloc_alignment;
+	size_t size = sz_sa2u(tcache_bin_alloc_size, alignment);
 
 	void *avail_array = ipallocztm(tsd_tsdn(tsd), size, alignment, true,
 	    NULL, true, arena_get(TSDN_NULL, 0, true));
@@ -551,22 +500,29 @@ tsd_tcache_data_init(tsd_t *tsd) {
 /* Created manual tcache for tcache.create mallctl. */
 tcache_t *
 tcache_create_explicit(tsd_t *tsd) {
-	size_t size = sizeof(tcache_t);
+	/*
+	 * We place the cache bin stacks, then the tcache_t, then a pointer to
+	 * the beginning of the whole allocation (for freeing).  The makes sure
+	 * the cache bins have the requested alignment.
+	 */
+	size_t size = tcache_bin_alloc_size + sizeof(tcache_t) + sizeof(void *);
 	/* Naturally align the pointer stacks. */
 	size = PTR_CEILING(size);
-	size_t stack_offset = size;
-	size += total_stack_bytes;
-	size_t alignment = tcache_bin_stack_alignment(size);
-	size = sz_sa2u(size, alignment);
+	size = sz_sa2u(size, tcache_bin_alloc_alignment);
 
-	tcache_t *tcache = ipallocztm(tsd_tsdn(tsd), size, alignment, true,
-	    NULL, true, arena_get(TSDN_NULL, 0, true));
-	if (tcache == NULL) {
+	void *mem = ipallocztm(tsd_tsdn(tsd), size, tcache_bin_alloc_alignment,
+	    true, NULL, true, arena_get(TSDN_NULL, 0, true));
+	if (mem == NULL) {
 		return NULL;
 	}
+	void *avail_array = mem;
+	tcache_t *tcache = (void *)((uintptr_t)avail_array
+	    + tcache_bin_alloc_size);
+	void **head_ptr = (void *)((uintptr_t)avail_array
+	    + tcache_bin_alloc_size + sizeof(tcache_t));
+	tcache_init(tsd, tcache, avail_array);
+	*head_ptr = mem;
 
-	void *avail_array = (void *)((uintptr_t)tcache +
-	    (uintptr_t)stack_offset);
 	tcache_init(tsd, tcache, avail_array);
 	tcache_arena_associate(tsd_tsdn(tsd), tcache, arena_ichoose(tsd, NULL));
 
@@ -617,8 +573,10 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
 		    tcache_bin_info[0].stack_size);
 		idalloctm(tsd_tsdn(tsd), avail_array, NULL, NULL, true, true);
 	} else {
+		/* See the comment at the top of tcache_create_explicit. */
+		void **mem_begin = (void **)((uintptr_t)tcache + sizeof(tcache_t));
 		/* Release both the tcache struct and avail array. */
-		idalloctm(tsd_tsdn(tsd), tcache, NULL, NULL, true, true);
+		idalloctm(tsd_tsdn(tsd), *mem_begin, NULL, NULL, true, true);
 	}
 
 	/*
@@ -816,7 +774,6 @@ tcache_boot(tsdn_t *tsdn, base_t *base) {
 		return true;
 	}
 	unsigned i, ncached_max;
-	total_stack_bytes = 0;
 	for (i = 0; i < SC_NBINS; i++) {
 		if ((bin_infos[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) {
 			ncached_max = TCACHE_NSLOTS_SMALL_MIN;
@@ -826,18 +783,13 @@ tcache_boot(tsdn_t *tsdn, base_t *base) {
 		} else {
 			ncached_max = TCACHE_NSLOTS_SMALL_MAX;
 		}
-		unsigned stack_size = ncached_max * sizeof(void *);
-		assert(stack_size < ((uint64_t)1 <<
-		    (sizeof(cache_bin_sz_t) * 8)));
-		tcache_bin_info[i].stack_size = stack_size;
-		total_stack_bytes += stack_size;
+		cache_bin_info_init(&tcache_bin_info[i], ncached_max);
 	}
 	for (; i < nhbins; i++) {
-		unsigned stack_size = TCACHE_NSLOTS_LARGE * sizeof(void *);
-		tcache_bin_info[i].stack_size = stack_size;
-		total_stack_bytes += stack_size;
+		cache_bin_info_init(&tcache_bin_info[i], TCACHE_NSLOTS_LARGE);
 	}
-	total_stack_bytes += total_stack_padding;
+	cache_bin_info_compute_alloc(tcache_bin_info, i, &tcache_bin_alloc_size,
+	    &tcache_bin_alloc_alignment);
 
 	return false;
 }

From 7f5ebd211cd870e9c9a303e6145781bfca58e1bb Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sat, 29 Feb 2020 15:07:38 -0800
Subject: [PATCH 1581/2608] Cache bin: set low-water internally.

---
 include/jemalloc/internal/cache_bin.h | 11 ++++++++++-
 src/tcache.c                          |  2 +-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 6ab6baa7..86291748 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -160,6 +160,15 @@ cache_bin_low_water_get(cache_bin_t *bin, cache_bin_info_t *info) {
 	return low_water;
 }
 
+/*
+ * Indicates that the current cache bin position should be the low water mark
+ * going forward.
+ */
+static inline void
+cache_bin_low_water_set(cache_bin_t *bin) {
+	bin->low_water_position = bin->cur_ptr.lowbits;
+}
+
 static inline void
 cache_bin_ncached_set(cache_bin_t *bin, cache_bin_info_t *info,
     cache_bin_sz_t n) {
@@ -289,7 +298,7 @@ cache_bin_init_ptr_array_for_flush(cache_bin_t *bin, cache_bin_info_t *info,
 }
 
 /*
- * These accessors are used by the flush pathways -- they reverse ordinary flush
+ * These accessors are used by the flush pathways -- they reverse ordinary array
  * ordering.
  */
 JEMALLOC_ALWAYS_INLINE void *
diff --git a/src/tcache.c b/src/tcache.c
index 48f06b70..a059ecc7 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -89,7 +89,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 		}
 		tcache->bin_refilled[binind] = false;
 	}
-	tbin->low_water_position = tbin->cur_ptr.lowbits;
+	cache_bin_low_water_set(tbin);
 
 	tcache->next_gc_bin++;
 	if (tcache->next_gc_bin == nhbins) {

From 370c1ea007e152a0f8ede3aad7f69c45d2397e54 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 2 Mar 2020 14:14:08 -0800
Subject: [PATCH 1582/2608] Cache bin: Write the unit test in terms of the API

I.e. stop allowing the unit test to have secret access to implementation
internals.
---
 include/jemalloc/internal/cache_bin.h |   4 +
 test/unit/cache_bin.c                 | 237 ++++++++++++++++++++------
 2 files changed, 191 insertions(+), 50 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 86291748..42504edc 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -169,6 +169,10 @@ cache_bin_low_water_set(cache_bin_t *bin) {
 	bin->low_water_position = bin->cur_ptr.lowbits;
 }
 
+/*
+ * This is an internal implementation detail -- users should only affect ncached
+ * via single-item pushes or batch fills.
+ */
 static inline void
 cache_bin_ncached_set(cache_bin_t *bin, cache_bin_info_t *info,
     cache_bin_sz_t n) {
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index 37ebd303..2623b384 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -1,63 +1,200 @@
 #include "test/jemalloc_test.h"
 
-cache_bin_t test_bin;
+static void
+do_fill_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
+    cache_bin_sz_t ncached_max, cache_bin_sz_t nfill_attempt,
+    cache_bin_sz_t nfill_succeed) {
+	bool success;
+	void *ptr;
+	assert_true(cache_bin_ncached_get(bin, info) == 0, "");
+	CACHE_BIN_PTR_ARRAY_DECLARE(arr, nfill_attempt);
+	cache_bin_init_ptr_array_for_fill(bin, info, &arr, nfill_attempt);
+	for (cache_bin_sz_t i = 0; i < nfill_succeed; i++) {
+		arr.ptr[i] = &ptrs[i];
+	}
+	cache_bin_finish_fill(bin, info, &arr, nfill_succeed);
+	expect_true(cache_bin_ncached_get(bin, info) == nfill_succeed, "");
+	cache_bin_low_water_set(bin);
+
+	for (cache_bin_sz_t i = 0; i < nfill_succeed; i++) {
+		ptr = cache_bin_alloc_easy(bin, info, &success);
+		expect_true(success, "");
+		expect_ptr_eq(ptr, (void *)&ptrs[i],
+		    "Should pop in order filled");
+		expect_true(cache_bin_low_water_get(bin, info)
+		    == nfill_succeed - i - 1, "");
+	}
+	expect_true(cache_bin_ncached_get(bin, info) == 0, "");
+	expect_true(cache_bin_low_water_get(bin, info) == 0, "");
+}
+
+static void
+do_flush_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
+    cache_bin_sz_t nfill, cache_bin_sz_t nflush) {
+	bool success;
+	assert_true(cache_bin_ncached_get(bin, info) == 0, "");
+
+	for (cache_bin_sz_t i = 0; i < nfill; i++) {
+		success = cache_bin_dalloc_easy(bin, &ptrs[i]);
+		expect_true(success, "");
+	}
+
+	CACHE_BIN_PTR_ARRAY_DECLARE(arr, nflush);
+	cache_bin_init_ptr_array_for_flush(bin, info, &arr, nflush);
+	for (cache_bin_sz_t i = 0; i < nflush; i++) {
+		expect_ptr_eq(cache_bin_ptr_array_get(&arr, i), &ptrs[i], "");
+	}
+	cache_bin_finish_flush(bin, info, &arr, nflush);
+
+	expect_true(cache_bin_ncached_get(bin, info) == nfill - nflush, "");
+	while (cache_bin_ncached_get(bin, info) > 0) {
+		cache_bin_alloc_easy(bin, info, &success);
+	}
+}
 
 TEST_BEGIN(test_cache_bin) {
-	cache_bin_t *bin = &test_bin;
-	assert(PAGE > TCACHE_NSLOTS_SMALL_MAX * sizeof(void *));
-	/* Page aligned to make sure lowbits not overflowable. */
-	void **stack = mallocx(PAGE, MALLOCX_TCACHE_NONE | MALLOCX_ALIGN(PAGE));
-
-	expect_ptr_not_null(stack, "Unexpected mallocx failure");
-	/* Initialize to empty; bin 0. */
-	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(
-	    &tcache_bin_info[0]);
-	void **empty_position = stack + ncached_max;
-	bin->cur_ptr.ptr = empty_position;
-	bin->low_water_position = bin->cur_ptr.lowbits;
-	bin->full_position = (uint32_t)(uintptr_t)stack;
-	expect_ptr_eq(cache_bin_empty_position_get(bin, &tcache_bin_info[0]),
-	    empty_position, "Incorrect empty position");
-	/* Not using expect_zu etc on cache_bin_sz_t since it may change. */
-	expect_true(cache_bin_ncached_get(bin, &tcache_bin_info[0]) == 0,
-	    "Incorrect cache size");
-
 	bool success;
-	void *ret = cache_bin_alloc_easy(bin, &tcache_bin_info[0], &success);
-	expect_false(success, "Empty cache bin should not alloc");
-	expect_true(cache_bin_low_water_get(bin, &tcache_bin_info[0]) == 0,
-	    "Incorrect low water mark");
+	void *ptr;
 
-	cache_bin_ncached_set(bin, &tcache_bin_info[0], 0);
-	expect_ptr_eq(bin->cur_ptr.ptr, empty_position, "Bin should be empty");
-	for (cache_bin_sz_t i = 1; i < ncached_max + 1; i++) {
-		success = cache_bin_dalloc_easy(bin, (void *)(uintptr_t)i);
-		expect_true(success && cache_bin_ncached_get(bin,
-		    &tcache_bin_info[0]) == i, "Bin dalloc failure");
+	cache_bin_t bin;
+	cache_bin_info_t info;
+	cache_bin_info_init(&info, TCACHE_NSLOTS_SMALL_MAX);
+
+	size_t size;
+	size_t alignment;
+	cache_bin_info_compute_alloc(&info, 1, &size, &alignment);
+	void *mem = mallocx(size, MALLOCX_ALIGN(alignment));
+	assert_ptr_not_null(mem, "Unexpected mallocx failure");
+
+	size_t cur_offset = 0;
+	cache_bin_preincrement(&info, 1, mem, &cur_offset);
+	cache_bin_init(&bin, &info, mem, &cur_offset);
+	cache_bin_postincrement(&info, 1, mem, &cur_offset);
+
+	assert_zu_eq(cur_offset, size, "Should use all requested memory");
+
+	/* Initialize to empty; should then have 0 elements. */
+	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(&info);
+	expect_true(cache_bin_ncached_get(&bin, &info) == 0, "");
+	expect_true(cache_bin_low_water_get(&bin, &info) == 0, "");
+
+	ptr = cache_bin_alloc_easy_reduced(&bin, &success);
+	expect_false(success, "Shouldn't successfully allocate when empty");
+	expect_ptr_null(ptr, "Shouldn't get a non-null pointer on failure");
+
+	ptr = cache_bin_alloc_easy(&bin, &info, &success);
+	expect_false(success, "Shouldn't successfully allocate when empty");
+	expect_ptr_null(ptr, "Shouldn't get a non-null pointer on failure");
+
+	/*
+	 * We allocate one more item than ncached_max, so we can test cache bin
+	 * exhaustion.
+	 */
+	void **ptrs = mallocx(sizeof(void *) * (ncached_max + 1), 0);
+	assert_ptr_not_null(ptrs, "Unexpected mallocx failure");
+	for  (cache_bin_sz_t i = 0; i < ncached_max; i++) {
+		expect_true(cache_bin_ncached_get(&bin, &info) == i, "");
+		success = cache_bin_dalloc_easy(&bin, &ptrs[i]);
+		expect_true(success,
+		    "Should be able to dalloc into a non-full cache bin.");
+		expect_true(cache_bin_low_water_get(&bin, &info) == 0,
+		    "Pushes and pops shouldn't change low water of zero.");
 	}
-	success = cache_bin_dalloc_easy(bin, (void *)1);
-	expect_false(success, "Bin should be full");
-	expect_ptr_eq(bin->cur_ptr.ptr, stack, "Incorrect bin cur_ptr");
+	expect_true(cache_bin_ncached_get(&bin, &info) == ncached_max, "");
+	success = cache_bin_dalloc_easy(&bin, &ptrs[ncached_max]);
+	expect_false(success, "Shouldn't be able to dalloc into a full bin.");
 
-	cache_bin_ncached_set(bin, &tcache_bin_info[0], ncached_max);
-	expect_ptr_eq(bin->cur_ptr.ptr, stack, "cur_ptr should not change");
-	/* Emulate low water after refill. */
-	bin->low_water_position = bin->full_position;
-	for (cache_bin_sz_t i = ncached_max; i > 0; i--) {
-		ret = cache_bin_alloc_easy(bin, &tcache_bin_info[0], &success);
-		cache_bin_sz_t ncached = cache_bin_ncached_get(bin,
-		    &tcache_bin_info[0]);
-		expect_true(success && ncached == i - 1,
-		    "Cache bin alloc failure");
-		expect_ptr_eq(ret, (void *)(uintptr_t)i, "Bin alloc failure");
-		expect_true(cache_bin_low_water_get(bin, &tcache_bin_info[0])
-		    == ncached, "Incorrect low water mark");
+	cache_bin_low_water_set(&bin);
+
+	for (cache_bin_sz_t i = 0; i < ncached_max; i++) {
+		expect_true(cache_bin_low_water_get(&bin, &info)
+		    == ncached_max - i, "");
+		expect_true(cache_bin_ncached_get(&bin, &info)
+		    == ncached_max - i, "");
+		/*
+		 * This should fail -- the reduced version can't change low
+		 * water.
+		 */
+		ptr = cache_bin_alloc_easy_reduced(&bin, &success);
+		expect_ptr_null(ptr, "");
+		expect_false(success, "");
+		expect_true(cache_bin_low_water_get(&bin, &info)
+		    == ncached_max - i, "");
+		expect_true(cache_bin_ncached_get(&bin, &info)
+		    == ncached_max - i, "");
+
+		/* This should succeed, though. */
+		ptr = cache_bin_alloc_easy(&bin, &info, &success);
+		expect_true(success, "");
+		expect_ptr_eq(ptr, &ptrs[ncached_max - i - 1],
+		    "Alloc should pop in stack order");
+		expect_true(cache_bin_low_water_get(&bin, &info)
+		    == ncached_max - i - 1, "");
+		expect_true(cache_bin_ncached_get(&bin, &info)
+		    == ncached_max - i - 1, "");
+	}
+	/* Now we're empty -- all alloc attempts should fail. */
+	expect_true(cache_bin_ncached_get(&bin, &info) == 0, "");
+	ptr = cache_bin_alloc_easy_reduced(&bin, &success);
+	expect_ptr_null(ptr, "");
+	expect_false(success, "");
+	ptr = cache_bin_alloc_easy(&bin, &info, &success);
+	expect_ptr_null(ptr, "");
+	expect_false(success, "");
+
+	for (cache_bin_sz_t i = 0; i < ncached_max / 2; i++) {
+		cache_bin_dalloc_easy(&bin, &ptrs[i]);
+	}
+	cache_bin_low_water_set(&bin);
+
+	for (cache_bin_sz_t i = ncached_max / 2; i < ncached_max; i++) {
+		cache_bin_dalloc_easy(&bin, &ptrs[i]);
+	}
+	expect_true(cache_bin_ncached_get(&bin, &info) == ncached_max, "");
+	for (cache_bin_sz_t i = ncached_max - 1; i >= ncached_max / 2; i--) {
+		/*
+		 * Size is bigger than low water -- the reduced version should
+		 * succeed.
+		 */
+		ptr = cache_bin_alloc_easy_reduced(&bin, &success);
+		expect_true(success, "");
+		expect_ptr_eq(ptr, &ptrs[i], "");
+	}
+	/* But now, we've hit low-water. */
+	ptr = cache_bin_alloc_easy_reduced(&bin, &success);
+	expect_false(success, "");
+	expect_ptr_null(ptr, "");
+
+	/* We're going to test filling -- we must be empty to start. */
+	while (cache_bin_ncached_get(&bin, &info)) {
+		cache_bin_alloc_easy(&bin, &info, &success);
+		expect_true(success, "");
 	}
 
-	ret = cache_bin_alloc_easy(bin, &tcache_bin_info[0], &success);
-	expect_false(success, "Empty cache bin should not alloc.");
-	expect_ptr_eq(bin->cur_ptr.ptr, stack + ncached_max,
-	    "Bin should be empty");
+	/* Test fill. */
+	/* Try to fill all, succeed fully. */
+	do_fill_test(&bin, &info, ptrs, ncached_max, ncached_max, ncached_max);
+	/* Try to fill all, succeed partially. */
+	do_fill_test(&bin, &info, ptrs, ncached_max, ncached_max,
+	    ncached_max / 2);
+	/* Try to fill all, fail completely. */
+	do_fill_test(&bin, &info, ptrs, ncached_max, ncached_max, 0);
+
+	/* Try to fill some, succeed fully. */
+	do_fill_test(&bin, &info, ptrs, ncached_max, ncached_max / 2,
+	    ncached_max / 2);
+	/* Try to fill some, succeed partially. */
+	do_fill_test(&bin, &info, ptrs, ncached_max, ncached_max / 2,
+	    ncached_max / 2);
+	/* Try to fill some, fail completely. */
+	do_fill_test(&bin, &info, ptrs, ncached_max, ncached_max / 2, 0);
+
+	do_flush_test(&bin, &info, ptrs, ncached_max, ncached_max);
+	do_flush_test(&bin, &info, ptrs, ncached_max, ncached_max / 2);
+	do_flush_test(&bin, &info, ptrs, ncached_max, 0);
+	do_flush_test(&bin, &info, ptrs, ncached_max / 2, ncached_max / 2);
+	do_flush_test(&bin, &info, ptrs, ncached_max / 2, ncached_max / 4);
+	do_flush_test(&bin, &info, ptrs, ncached_max / 2, 0);
 }
 TEST_END
 

From 6a7aa46ef753108f9b0c065572abff14c33eb5d2 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 2 Mar 2020 18:07:19 -0800
Subject: [PATCH 1583/2608] Cache bin: Add a debug method for init checking.

---
 include/jemalloc/internal/cache_bin.h                   | 7 +++++++
 include/jemalloc/internal/jemalloc_internal_inlines_a.h | 4 ++--
 src/cache_bin.c                                         | 5 +++++
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 42504edc..461b20be 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -354,4 +354,11 @@ void cache_bin_postincrement(cache_bin_info_t *infos, szind_t ninfos,
 void cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
     size_t *cur_offset);
 
+/*
+ * If a cache bin was zero initialized (either because it lives in static or
+ * thread-local storage, or was memset to 0), this function indicates whether or
+ * not cache_bin_init was called on it.
+ */
+bool cache_bin_still_zero_initialized(cache_bin_t *bin);
+
 #endif /* JEMALLOC_INTERNAL_CACHE_BIN_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index f079e853..cc5e3595 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -130,8 +130,8 @@ tcache_available(tsd_t *tsd) {
 	if (likely(tsd_tcache_enabled_get(tsd))) {
 		/* Associated arena == NULL implies tcache init in progress. */
 		assert(tsd_tcachep_get(tsd)->arena == NULL ||
-		    tcache_small_bin_get(tsd_tcachep_get(tsd), 0)->cur_ptr.ptr
-		    != NULL);
+		    !cache_bin_still_zero_initialized(
+		    tcache_small_bin_get(tsd_tcachep_get(tsd), 0)));
 		return true;
 	}
 
diff --git a/src/cache_bin.c b/src/cache_bin.c
index 260c1b77..94f3b32e 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -102,3 +102,8 @@ cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
 	assert(cache_bin_ncached_get(bin, info) == 0);
 	assert(cache_bin_empty_position_get(bin, info) == empty_position);
 }
+
+bool
+cache_bin_still_zero_initialized(cache_bin_t *bin) {
+	return bin->cur_ptr.ptr == NULL;
+}

From d498a4bb08f1220c089b2c2c06c26b5ff937e30c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 2 Mar 2020 18:14:19 -0800
Subject: [PATCH 1584/2608] Cache bin: Add an emptiness assertion.

---
 include/jemalloc/internal/cache_bin.h | 7 +++++++
 src/tcache.c                          | 4 +---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 461b20be..cc72af6b 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -146,6 +146,13 @@ cache_bin_empty_position_get(cache_bin_t *bin, cache_bin_info_t *info) {
 	return ret;
 }
 
+static inline void
+cache_bin_assert_empty(cache_bin_t *bin, cache_bin_info_t *info) {
+	assert(cache_bin_ncached_get(bin, info) == 0);
+	assert(cache_bin_empty_position_get(bin, info) == bin->cur_ptr.ptr);
+}
+
+
 /* Returns the numeric value of low water in [0, ncached]. */
 static inline cache_bin_sz_t
 cache_bin_low_water_get(cache_bin_t *bin, cache_bin_info_t *info) {
diff --git a/src/tcache.c b/src/tcache.c
index a059ecc7..bffc04f5 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -566,9 +566,7 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
 	if (tsd_tcache) {
 		/* Release the avail array for the TSD embedded auto tcache. */
 		cache_bin_t *bin = tcache_small_bin_get(tcache, 0);
-		assert(cache_bin_ncached_get(bin, &tcache_bin_info[0]) == 0);
-		assert(cache_bin_empty_position_get(bin, &tcache_bin_info[0]) ==
-		    bin->cur_ptr.ptr);
+		cache_bin_assert_empty(bin, &tcache_bin_info[0]);
 		void *avail_array = (void *)((uintptr_t)bin->cur_ptr.ptr -
 		    tcache_bin_info[0].stack_size);
 		idalloctm(tsd_tsdn(tsd), avail_array, NULL, NULL, true, true);

From 0a2fcfac013e65a22548eeed09ebcaca1bdb63a3 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 2 Mar 2020 18:28:17 -0800
Subject: [PATCH 1585/2608] Tcache: Hold cache bin allocation explicitly.

---
 include/jemalloc/internal/tcache_structs.h |  6 ++++
 src/tcache.c                               | 41 ++++++++--------------
 2 files changed, 20 insertions(+), 27 deletions(-)

diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index 38a82fe4..48dbf0fe 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -49,6 +49,12 @@ struct tcache_s {
 	uint8_t		lg_fill_div[SC_NBINS];
 	/* For small bins, whether has been refilled since last GC. */
 	bool		bin_refilled[SC_NBINS];
+	/*
+	 * The start of the allocation containing the dynamic allocation for
+	 * either the cache bins alone, or the cache bin memory as well as this
+	 * tcache_t.
+	 */
+	void		*dyn_alloc;
 	/*
 	 * We put the cache bins for large size classes at the end of the
 	 * struct, since some of them might not get used.  This might end up
diff --git a/src/tcache.c b/src/tcache.c
index bffc04f5..f6b37765 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -425,10 +425,11 @@ tsd_tcache_enabled_data_init(tsd_t *tsd) {
 }
 
 static void
-tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
+tcache_init(tsd_t *tsd, tcache_t *tcache, void *mem) {
 	memset(&tcache->link, 0, sizeof(ql_elm(tcache_t)));
 	tcache->next_gc_bin = 0;
 	tcache->arena = NULL;
+	tcache->dyn_alloc = mem;
 
 	assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
 	memset(tcache->bins_small, 0, sizeof(cache_bin_t) * SC_NBINS);
@@ -436,21 +437,21 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
 
 	unsigned i = 0;
 	size_t cur_offset = 0;
-	cache_bin_preincrement(tcache_bin_info, nhbins, avail_stack,
+	cache_bin_preincrement(tcache_bin_info, nhbins, mem,
 	    &cur_offset);
 	for (; i < SC_NBINS; i++) {
 		tcache->lg_fill_div[i] = 1;
 		tcache->bin_refilled[i] = false;
 		cache_bin_t *bin = tcache_small_bin_get(tcache, i);
-		cache_bin_init(bin, &tcache_bin_info[i], avail_stack,
+		cache_bin_init(bin, &tcache_bin_info[i], mem,
 		    &cur_offset);
 	}
 	for (; i < nhbins; i++) {
 		cache_bin_t *bin = tcache_large_bin_get(tcache, i);
-		cache_bin_init(bin, &tcache_bin_info[i], avail_stack,
+		cache_bin_init(bin, &tcache_bin_info[i], mem,
 		    &cur_offset);
 	}
-	cache_bin_postincrement(tcache_bin_info, nhbins, avail_stack,
+	cache_bin_postincrement(tcache_bin_info, nhbins, mem,
 	    &cur_offset);
 	/* Sanity check that the whole stack is used. */
 	assert(cur_offset == tcache_bin_alloc_size);
@@ -464,13 +465,13 @@ tsd_tcache_data_init(tsd_t *tsd) {
 	size_t alignment = tcache_bin_alloc_alignment;
 	size_t size = sz_sa2u(tcache_bin_alloc_size, alignment);
 
-	void *avail_array = ipallocztm(tsd_tsdn(tsd), size, alignment, true,
-	    NULL, true, arena_get(TSDN_NULL, 0, true));
-	if (avail_array == NULL) {
+	void *mem = ipallocztm(tsd_tsdn(tsd), size, alignment, true, NULL,
+	    true, arena_get(TSDN_NULL, 0, true));
+	if (mem == NULL) {
 		return true;
 	}
 
-	tcache_init(tsd, tcache, avail_array);
+	tcache_init(tsd, tcache, mem);
 	/*
 	 * Initialization is a bit tricky here.  After malloc init is done, all
 	 * threads can rely on arena_choose and associate tcache accordingly.
@@ -505,7 +506,7 @@ tcache_create_explicit(tsd_t *tsd) {
 	 * the beginning of the whole allocation (for freeing).  The makes sure
 	 * the cache bins have the requested alignment.
 	 */
-	size_t size = tcache_bin_alloc_size + sizeof(tcache_t) + sizeof(void *);
+	size_t size = tcache_bin_alloc_size + sizeof(tcache_t);
 	/* Naturally align the pointer stacks. */
 	size = PTR_CEILING(size);
 	size = sz_sa2u(size, tcache_bin_alloc_alignment);
@@ -515,15 +516,9 @@ tcache_create_explicit(tsd_t *tsd) {
 	if (mem == NULL) {
 		return NULL;
 	}
-	void *avail_array = mem;
-	tcache_t *tcache = (void *)((uintptr_t)avail_array
-	    + tcache_bin_alloc_size);
-	void **head_ptr = (void *)((uintptr_t)avail_array
-	    + tcache_bin_alloc_size + sizeof(tcache_t));
-	tcache_init(tsd, tcache, avail_array);
-	*head_ptr = mem;
+	tcache_t *tcache = (void *)((uintptr_t)mem + tcache_bin_alloc_size);
+	tcache_init(tsd, tcache, mem);
 
-	tcache_init(tsd, tcache, avail_array);
 	tcache_arena_associate(tsd_tsdn(tsd), tcache, arena_ichoose(tsd, NULL));
 
 	return tcache;
@@ -564,18 +559,10 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
 	tcache_arena_dissociate(tsd_tsdn(tsd), tcache);
 
 	if (tsd_tcache) {
-		/* Release the avail array for the TSD embedded auto tcache. */
 		cache_bin_t *bin = tcache_small_bin_get(tcache, 0);
 		cache_bin_assert_empty(bin, &tcache_bin_info[0]);
-		void *avail_array = (void *)((uintptr_t)bin->cur_ptr.ptr -
-		    tcache_bin_info[0].stack_size);
-		idalloctm(tsd_tsdn(tsd), avail_array, NULL, NULL, true, true);
-	} else {
-		/* See the comment at the top of tcache_create_explicit. */
-		void **mem_begin = (void **)((uintptr_t)tcache + sizeof(tcache_t));
-		/* Release both the tcache struct and avail array. */
-		idalloctm(tsd_tsdn(tsd), *mem_begin, NULL, NULL, true, true);
 	}
+	idalloctm(tsd_tsdn(tsd), tcache->dyn_alloc, NULL, NULL, true, true);
 
 	/*
 	 * The deallocation and tcache flush above may not trigger decay since

From fef0b1ffe4d1b92a38727449c802e24294284524 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 2 Mar 2020 18:40:31 -0800
Subject: [PATCH 1586/2608] Cache bin: Remove last internals accesses.

---
 src/tcache.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index f6b37765..e9632236 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -461,7 +461,8 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *mem) {
 bool
 tsd_tcache_data_init(tsd_t *tsd) {
 	tcache_t *tcache = tsd_tcachep_get_unsafe(tsd);
-	assert(tcache_small_bin_get(tcache, 0)->cur_ptr.ptr == NULL);
+	assert(cache_bin_still_zero_initialized(
+	    tcache_small_bin_get(tcache, 0)));
 	size_t alignment = tcache_bin_alloc_alignment;
 	size_t size = sz_sa2u(tcache_bin_alloc_size, alignment);
 
@@ -588,18 +589,23 @@ tcache_cleanup(tsd_t *tsd) {
 	tcache_t *tcache = tsd_tcachep_get(tsd);
 	if (!tcache_available(tsd)) {
 		assert(tsd_tcache_enabled_get(tsd) == false);
-		if (config_debug) {
-			assert(tcache_small_bin_get(tcache, 0)->cur_ptr.ptr
-			    == NULL);
-		}
+		assert(cache_bin_still_zero_initialized(
+		    tcache_small_bin_get(tcache, 0)));
 		return;
 	}
 	assert(tsd_tcache_enabled_get(tsd));
-	assert(tcache_small_bin_get(tcache, 0)->cur_ptr.ptr != NULL);
+	assert(!cache_bin_still_zero_initialized(
+	    tcache_small_bin_get(tcache, 0)));
 
 	tcache_destroy(tsd, tcache, true);
 	if (config_debug) {
-		tcache_small_bin_get(tcache, 0)->cur_ptr.ptr = NULL;
+		/*
+		 * For debug testing only, we want to pretend we're still in the
+		 * zero-initialized state.
+		 */
+		memset(tcache->bins_small, 0, sizeof(cache_bin_t) * SC_NBINS);
+		memset(tcache->bins_large, 0,
+		    sizeof(cache_bin_t) * (nhbins - SC_NBINS));
 	}
 }
 

From 397da038656589cb3a263d1715ae27f90f6b30d1 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 3 Mar 2020 18:32:36 -0800
Subject: [PATCH 1587/2608] Cache bin: rewrite to track more state.

With this, we track all of the empty, full, and low water states together.  This
simplifies a lot of the tracking logic, since we now don't need the
cache_bin_info_t for state queries (except for some debugging).
---
 include/jemalloc/internal/cache_bin.h | 228 +++++++++++++-------------
 src/cache_bin.c                       |  36 ++--
 2 files changed, 129 insertions(+), 135 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index cc72af6b..0fb08421 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -35,67 +35,53 @@ struct cache_bin_stats_s {
  */
 typedef struct cache_bin_info_s cache_bin_info_t;
 struct cache_bin_info_s {
-	/* The size of the bin stack, i.e. ncached_max * sizeof(ptr). */
-	cache_bin_sz_t stack_size;
+	cache_bin_sz_t ncached_max;
 };
 
 typedef struct cache_bin_s cache_bin_t;
 struct cache_bin_s {
 	/*
-	 * The cache bin stack is represented using 3 pointers: cur_ptr,
-	 * low_water and full, optimized for the fast path efficiency.
-	 *
-	 * low addr ==> high addr
-	 * |----|----|----|item1|item2|.....................|itemN|
-	 *  full            cur                                    empty
-	 * (ncached == N; full + ncached_max == empty)
-	 *
-	 * Data directly stored:
-	 * 1) cur_ptr points to the current item to be allocated, i.e. *cur_ptr.
-	 * 2) full points to the top of the stack (i.e. ncached == ncached_max),
-	 * which is compared against on free_fastpath to check "is_full".
-	 * 3) low_water indicates a low water mark of ncached.
-	 * Range of low_water is [cur, empty], i.e. values of [ncached, 0].
-	 *
-	 * The empty position (ncached == 0) is derived via full + ncached_max
-	 * and not accessed in the common case (guarded behind low_water).
-	 *
-	 * On 64-bit, 2 of the 3 pointers (full and low water) are compressed by
-	 * omitting the high 32 bits.  Overflow of the half pointers is avoided
-	 * when allocating / initializing the stack space.  As a result,
-	 * cur_ptr.lowbits can be safely used for pointer comparisons.
+	 * The stack grows down.  Whenever the bin is nonempty, the head points
+	 * to an array entry containing a valid allocation.  When it is empty,
+	 * the head points to one element past the owned array.
 	 */
-	union {
-		void **ptr;
-		struct {
-			/* highbits never accessed directly. */
-#if (LG_SIZEOF_PTR == 3 && defined(JEMALLOC_BIG_ENDIAN))
-			uint32_t __highbits;
-#endif
-			uint32_t lowbits;
-#if (LG_SIZEOF_PTR == 3 && !defined(JEMALLOC_BIG_ENDIAN))
-			uint32_t __highbits;
-#endif
-		};
-	} cur_ptr;
+	void **stack_head;
+
+	/*
+	 * The low bits of the address of the first item in the stack that
+	 * hasn't been used since the last GC, to track the low water mark (min
+	 * # of cached items).
+	 *
+	 * Since the stack grows down, this is a higher address than
+	 * low_bits_full.
+	 */
+	uint16_t low_bits_low_water;
+
+	/*
+	 * The low bits of the value that stack_head will take on when the array
+	 * is full.  (But remember that stack_head always points to a valid item
+	 * when the array is nonempty -- this is in the array).
+	 *
+	 * Recall that since the stack grows down, this is the lowest address in
+	 * the array.
+	 */
+	uint16_t low_bits_full;
+
+	/*
+	 * The low bits of the value that stack_head will take on when the array
+	 * is empty.
+	 *
+	 * The stack grows down -- this is one past the highest address in the
+	 * array.
+	 */
+	uint16_t low_bits_empty;
+
 	/*
 	 * cur_ptr and stats are both modified frequently.  Let's keep them
 	 * close so that they have a higher chance of being on the same
 	 * cacheline, thus less write-backs.
 	 */
 	cache_bin_stats_t tstats;
-	/*
-	 * Points to the first item that hasn't been used since last GC, to
-	 * track the low water mark (min # of cached).
-	 */
-	uint32_t low_water_position;
-	/*
-	 * Points to the position when the cache is full.
-	 *
-	 * To make use of adjacent cacheline prefetch, the items in the avail
-	 * stack goes to higher address for newer allocations (i.e. cur_ptr++).
-	 */
-	uint32_t full_position;
 };
 
 typedef struct cache_bin_array_descriptor_s cache_bin_array_descriptor_t;
@@ -118,30 +104,51 @@ struct cache_bin_array_descriptor_s {
 /* Returns ncached_max: Upper limit on ncached. */
 static inline cache_bin_sz_t
 cache_bin_info_ncached_max(cache_bin_info_t *info) {
-	return info->stack_size / sizeof(void *);
+	return info->ncached_max;
 }
 
+/*
+ * Asserts that the pointer associated with earlier is <= the one associated
+ * with later.
+ */
+static inline void
+cache_bin_assert_earlier(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
+	if (earlier > later) {
+		assert(bin->low_bits_full > bin->low_bits_empty);
+	}
+}
+
+/*
+ * Internal -- does difference calculations that handle wraparound correctly.
+ * Earlier must be associated with the position earlier in memory.
+ */
+static inline uint16_t
+cache_bin_diff(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
+	cache_bin_assert_earlier(bin, earlier, later);
+	return later - earlier;
+}
+
+
 static inline cache_bin_sz_t
 cache_bin_ncached_get(cache_bin_t *bin, cache_bin_info_t *info) {
-	cache_bin_sz_t n = (cache_bin_sz_t)((info->stack_size +
-	    bin->full_position - bin->cur_ptr.lowbits) / sizeof(void *));
+	cache_bin_sz_t diff = cache_bin_diff(bin,
+	    (uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty);
+	cache_bin_sz_t n = diff / sizeof(void *);
+
 	assert(n <= cache_bin_info_ncached_max(info));
-	assert(n == 0 || *(bin->cur_ptr.ptr) != NULL);
+	assert(n == 0 || *(bin->stack_head) != NULL);
 
 	return n;
 }
 
 static inline void **
 cache_bin_empty_position_get(cache_bin_t *bin, cache_bin_info_t *info) {
-	void **ret = bin->cur_ptr.ptr + cache_bin_ncached_get(bin, info);
-	/* Low bits overflow disallowed when allocating the space. */
-	assert((uint32_t)(uintptr_t)ret >= bin->cur_ptr.lowbits);
+	cache_bin_sz_t diff = cache_bin_diff(bin,
+	    (uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty);
+	uintptr_t empty_bits = (uintptr_t)bin->stack_head + diff;
+	void **ret = (void **)empty_bits;
 
-	/* Can also be computed via (full_position + ncached_max) | highbits. */
-	uintptr_t lowbits = bin->full_position + info->stack_size;
-	uintptr_t highbits = (uintptr_t)bin->cur_ptr.ptr &
-	    ~(((uint64_t)1 << 32) - 1);
-	assert(ret == (void **)(lowbits | highbits));
+	assert(ret >= bin->stack_head);
 
 	return ret;
 }
@@ -149,20 +156,29 @@ cache_bin_empty_position_get(cache_bin_t *bin, cache_bin_info_t *info) {
 static inline void
 cache_bin_assert_empty(cache_bin_t *bin, cache_bin_info_t *info) {
 	assert(cache_bin_ncached_get(bin, info) == 0);
-	assert(cache_bin_empty_position_get(bin, info) == bin->cur_ptr.ptr);
+	assert(cache_bin_empty_position_get(bin, info) == bin->stack_head);
 }
 
+/*
+ * Get low water, but without any of the correctness checking we do for the
+ * caller-usable version, if we are temporarily breaking invariants (like
+ * ncached >= low_water during flush).
+ */
+static inline cache_bin_sz_t
+cache_bin_low_water_get_internal(cache_bin_t *bin, cache_bin_info_t *info) {
+	return cache_bin_diff(bin, bin->low_bits_low_water,
+	    bin->low_bits_empty) / sizeof(void *);
+}
 
 /* Returns the numeric value of low water in [0, ncached]. */
 static inline cache_bin_sz_t
 cache_bin_low_water_get(cache_bin_t *bin, cache_bin_info_t *info) {
-	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(info);
-	cache_bin_sz_t low_water = ncached_max -
-	    (cache_bin_sz_t)((bin->low_water_position - bin->full_position) /
-	    sizeof(void *));
-	assert(low_water <= ncached_max);
+	cache_bin_sz_t low_water = cache_bin_low_water_get_internal(bin, info);
+	assert(low_water <= cache_bin_info_ncached_max(info));
 	assert(low_water <= cache_bin_ncached_get(bin, info));
-	assert(bin->low_water_position >= bin->cur_ptr.lowbits);
+
+	cache_bin_assert_earlier(bin, (uint16_t)(uintptr_t)bin->stack_head,
+	    bin->low_bits_low_water);
 
 	return low_water;
 }
@@ -173,20 +189,7 @@ cache_bin_low_water_get(cache_bin_t *bin, cache_bin_info_t *info) {
  */
 static inline void
 cache_bin_low_water_set(cache_bin_t *bin) {
-	bin->low_water_position = bin->cur_ptr.lowbits;
-}
-
-/*
- * This is an internal implementation detail -- users should only affect ncached
- * via single-item pushes or batch fills.
- */
-static inline void
-cache_bin_ncached_set(cache_bin_t *bin, cache_bin_info_t *info,
-    cache_bin_sz_t n) {
-	bin->cur_ptr.lowbits = bin->full_position + info->stack_size
-	    - n * sizeof(void *);
-	assert(n <= cache_bin_info_ncached_max(info));
-	assert(n == 0 || *bin->cur_ptr.ptr != NULL);
+	bin->low_bits_low_water = (uint16_t)(uintptr_t)bin->stack_head;
 }
 
 static inline void
@@ -198,38 +201,35 @@ cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-cache_bin_alloc_easy_impl(cache_bin_t *bin, cache_bin_info_t *info,
-    bool *success, const bool adjust_low_water) {
+cache_bin_alloc_easy_impl(cache_bin_t *bin, bool *success,
+    const bool adjust_low_water) {
 	/*
 	 * This may read from the empty position; however the loaded value won't
 	 * be used.  It's safe because the stack has one more slot reserved.
 	 */
-	void *ret = *(bin->cur_ptr.ptr++);
+	void *ret = *bin->stack_head;
+	uint16_t low_bits = (uint16_t)(uintptr_t)bin->stack_head;
+	void **new_head = bin->stack_head + 1;
 	/*
-	 * Check for both bin->ncached == 0 and ncached < low_water in a single
-	 * branch.  When adjust_low_water is true, this also avoids accessing
-	 * the cache_bin_info_t (which is on a separate cacheline / page) in
-	 * the common case.
+	 * Note that the low water mark is at most empty; if we pass this check,
+	 * we know we're non-empty.
 	 */
-	if (unlikely(bin->cur_ptr.lowbits > bin->low_water_position)) {
+	if (unlikely(low_bits == bin->low_bits_low_water)) {
 		if (adjust_low_water) {
-			uint32_t empty_position = bin->full_position +
-			    info->stack_size;
-			if (unlikely(bin->cur_ptr.lowbits > empty_position)) {
-				/* Over-allocated; revert. */
-				bin->cur_ptr.ptr--;
-				assert(bin->cur_ptr.lowbits == empty_position);
+			if (unlikely(low_bits == bin->low_bits_empty)) {
 				*success = false;
 				return NULL;
 			}
-			bin->low_water_position = bin->cur_ptr.lowbits;
+			/* Overflow should be impossible. */
+			assert(bin->low_bits_low_water
+			    < (uint16_t)(uintptr_t)new_head);
+			bin->low_bits_low_water = (uint16_t)(uintptr_t)new_head;
 		} else {
-			bin->cur_ptr.ptr--;
-			assert(bin->cur_ptr.lowbits == bin->low_water_position);
 			*success = false;
 			return NULL;
 		}
 	}
+	bin->stack_head = new_head;
 
 	/*
 	 * success (instead of ret) should be checked upon the return of this
@@ -246,22 +246,27 @@ cache_bin_alloc_easy_impl(cache_bin_t *bin, cache_bin_info_t *info,
 JEMALLOC_ALWAYS_INLINE void *
 cache_bin_alloc_easy_reduced(cache_bin_t *bin, bool *success) {
 	/* We don't look at info if we're not adjusting low-water. */
-	return cache_bin_alloc_easy_impl(bin, NULL, success, false);
+	return cache_bin_alloc_easy_impl(bin, success, false);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
 cache_bin_alloc_easy(cache_bin_t *bin, cache_bin_info_t *info, bool *success) {
-	return cache_bin_alloc_easy_impl(bin, info, success, true);
+	/* We don't use info now, but we may want to in the future. */
+	(void)info;
+	return cache_bin_alloc_easy_impl(bin, success, true);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
 cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) {
-	if (unlikely(bin->cur_ptr.lowbits == bin->full_position)) {
+	uint16_t low_bits = (uint16_t)(uintptr_t)bin->stack_head;
+	if (unlikely(low_bits == bin->low_bits_full)) {
 		return false;
 	}
 
-	*(--bin->cur_ptr.ptr) = ptr;
-	assert(bin->cur_ptr.lowbits >= bin->full_position);
+	bin->stack_head--;
+	*bin->stack_head = ptr;
+	cache_bin_assert_earlier(bin, bin->low_bits_full,
+	    (uint16_t)(uintptr_t)bin->stack_head);
 
 	return true;
 }
@@ -279,8 +284,8 @@ struct cache_bin_ptr_array_s {
 static inline void
 cache_bin_init_ptr_array_for_fill(cache_bin_t *bin, cache_bin_info_t *info,
     cache_bin_ptr_array_t *arr, cache_bin_sz_t nfill) {
-	arr->ptr = cache_bin_empty_position_get(bin, info) - nfill;
 	assert(cache_bin_ncached_get(bin, info) == 0);
+	arr->ptr = cache_bin_empty_position_get(bin, info) - nfill;
 }
 
 /*
@@ -292,12 +297,12 @@ static inline void
 cache_bin_finish_fill(cache_bin_t *bin, cache_bin_info_t *info,
     cache_bin_ptr_array_t *arr, cache_bin_sz_t nfilled) {
 	assert(cache_bin_ncached_get(bin, info) == 0);
+	void **empty_position = cache_bin_empty_position_get(bin, info);
 	if (nfilled < arr->n) {
-		void **empty_position = cache_bin_empty_position_get(bin, info);
 		memmove(empty_position - nfilled, empty_position - arr->n,
 		    nfilled * sizeof(void *));
 	}
-	cache_bin_ncached_set(bin, info, nfilled);
+	bin->stack_head = empty_position - nfilled;
 }
 
 static inline void
@@ -326,11 +331,12 @@ static inline void
 cache_bin_finish_flush(cache_bin_t *bin, cache_bin_info_t *info,
     cache_bin_ptr_array_t *arr, cache_bin_sz_t nflushed) {
 	unsigned rem = cache_bin_ncached_get(bin, info) - nflushed;
-	memmove(bin->cur_ptr.ptr + nflushed, bin->cur_ptr.ptr,
+	memmove(bin->stack_head + nflushed, bin->stack_head,
 	    rem * sizeof(void *));
-	cache_bin_ncached_set(bin, info, rem);
-	if (bin->cur_ptr.lowbits > bin->low_water_position) {
-		bin->low_water_position = bin->cur_ptr.lowbits;
+	bin->stack_head = bin->stack_head + nflushed;
+	if (cache_bin_ncached_get(bin, info)
+	    < cache_bin_low_water_get_internal(bin, info)) {
+		bin->low_bits_low_water = (uint16_t)(uintptr_t)bin->stack_head;
 	}
 }
 
diff --git a/src/cache_bin.c b/src/cache_bin.c
index 94f3b32e..51b87499 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -8,7 +8,7 @@ cache_bin_info_init(cache_bin_info_t *info,
     cache_bin_sz_t ncached_max) {
 	size_t stack_size = (size_t)ncached_max * sizeof(void *);
 	assert(stack_size < ((size_t)1 << (sizeof(cache_bin_sz_t) * 8)));
-	info->stack_size = (cache_bin_sz_t)stack_size;
+	info->ncached_max = (cache_bin_sz_t)ncached_max;
 }
 
 void
@@ -23,23 +23,14 @@ cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos,
 	 */
 	*size = sizeof(void *) * 2;
 	for (szind_t i = 0; i < ninfos; i++) {
-		*size += infos[i].stack_size;
+		*size += infos[i].ncached_max * sizeof(void *);
 	}
 
 	/*
-	 * 1) Align to at least PAGE, to minimize the # of TLBs needed by the
+	 * Align to at least PAGE, to minimize the # of TLBs needed by the
 	 * smaller sizes; also helps if the larger sizes don't get used at all.
-	 * 2) On 32-bit the pointers won't be compressed; use minimal alignment.
 	 */
-	if (LG_SIZEOF_PTR < 3 || *size < PAGE) {
-		*alignment = PAGE;
-	} else {
-		/*
-		 * Align pow2 to avoid overflow the cache bin compressed
-		 * pointers.
-		 */
-		*alignment = pow2_ceil_zu(*size);
-	}
+	*alignment = PAGE;
 }
 
 void
@@ -53,10 +44,6 @@ cache_bin_preincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
 		cache_bin_info_compute_alloc(infos, ninfos, &computed_size,
 		    &computed_alignment);
 		assert(((uintptr_t)alloc & (computed_alignment - 1)) == 0);
-
-		/* And that alignment should disallow overflow. */
-		uint32_t lowbits = (uint32_t)((uintptr_t)alloc + computed_size);
-		assert((uint32_t)(uintptr_t)alloc < lowbits);
 	}
 	/*
 	 * Leave a noticeable mark pattern on the boundaries, in case a bug
@@ -81,7 +68,6 @@ cache_bin_postincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
 void
 cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
     size_t *cur_offset) {
-	assert(sizeof(bin->cur_ptr) == sizeof(void *));
 	/*
 	 * The full_position points to the lowest available space.  Allocations
 	 * will access the slots toward higher addresses (for the benefit of
@@ -89,21 +75,23 @@ cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
 	 */
 	void *stack_cur = (void *)((uintptr_t)alloc + *cur_offset);
 	void *full_position = stack_cur;
-	uint32_t bin_stack_size = info->stack_size;
+	uint16_t bin_stack_size = info->ncached_max * sizeof(void *);
 
 	*cur_offset += bin_stack_size;
 	void *empty_position = (void *)((uintptr_t)alloc + *cur_offset);
 
 	/* Init to the empty position. */
-	bin->cur_ptr.ptr = empty_position;
-	bin->low_water_position = bin->cur_ptr.lowbits;
-	bin->full_position = (uint32_t)(uintptr_t)full_position;
-	assert(bin->cur_ptr.lowbits - bin->full_position == bin_stack_size);
+	bin->stack_head = (void **)empty_position;
+	bin->low_bits_low_water = (uint16_t)(uintptr_t)bin->stack_head;
+	bin->low_bits_full = (uint16_t)(uintptr_t)full_position;
+	bin->low_bits_empty = (uint16_t)(uintptr_t)empty_position;
+	assert(cache_bin_diff(bin, bin->low_bits_full,
+	    (uint16_t)(uintptr_t) bin->stack_head) == bin_stack_size);
 	assert(cache_bin_ncached_get(bin, info) == 0);
 	assert(cache_bin_empty_position_get(bin, info) == empty_position);
 }
 
 bool
 cache_bin_still_zero_initialized(cache_bin_t *bin) {
-	return bin->cur_ptr.ptr == NULL;
+	return bin->stack_head == NULL;
 }

From d701a085c29df6f6afc9a0b15c4732c8662fe80c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 4 Mar 2020 08:58:42 -0800
Subject: [PATCH 1588/2608] Fast path: allow low-water mark changes.

This lets us put more allocations on an "almost as fast" path after a flush.
This results in around a 4% reduction in malloc cycles in prod workloads
(corresponding to about a 0.1% reduction in overall cycles).
---
 include/jemalloc/internal/cache_bin.h      | 89 +++++++++++-----------
 include/jemalloc/internal/tcache_inlines.h |  6 +-
 src/jemalloc.c                             | 34 ++++++---
 src/tcache.c                               |  3 +-
 test/unit/cache_bin.c                      | 26 +++----
 5 files changed, 86 insertions(+), 72 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 0fb08421..f0297045 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -46,6 +46,12 @@ struct cache_bin_s {
 	 * the head points to one element past the owned array.
 	 */
 	void **stack_head;
+	/*
+	 * cur_ptr and stats are both modified frequently.  Let's keep them
+	 * close so that they have a higher chance of being on the same
+	 * cacheline, thus less write-backs.
+	 */
+	cache_bin_stats_t tstats;
 
 	/*
 	 * The low bits of the address of the first item in the stack that
@@ -76,12 +82,6 @@ struct cache_bin_s {
 	 */
 	uint16_t low_bits_empty;
 
-	/*
-	 * cur_ptr and stats are both modified frequently.  Let's keep them
-	 * close so that they have a higher chance of being on the same
-	 * cacheline, thus less write-backs.
-	 */
-	cache_bin_stats_t tstats;
 };
 
 typedef struct cache_bin_array_descriptor_s cache_bin_array_descriptor_t;
@@ -201,36 +201,7 @@ cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-cache_bin_alloc_easy_impl(cache_bin_t *bin, bool *success,
-    const bool adjust_low_water) {
-	/*
-	 * This may read from the empty position; however the loaded value won't
-	 * be used.  It's safe because the stack has one more slot reserved.
-	 */
-	void *ret = *bin->stack_head;
-	uint16_t low_bits = (uint16_t)(uintptr_t)bin->stack_head;
-	void **new_head = bin->stack_head + 1;
-	/*
-	 * Note that the low water mark is at most empty; if we pass this check,
-	 * we know we're non-empty.
-	 */
-	if (unlikely(low_bits == bin->low_bits_low_water)) {
-		if (adjust_low_water) {
-			if (unlikely(low_bits == bin->low_bits_empty)) {
-				*success = false;
-				return NULL;
-			}
-			/* Overflow should be impossible. */
-			assert(bin->low_bits_low_water
-			    < (uint16_t)(uintptr_t)new_head);
-			bin->low_bits_low_water = (uint16_t)(uintptr_t)new_head;
-		} else {
-			*success = false;
-			return NULL;
-		}
-	}
-	bin->stack_head = new_head;
-
+cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) {
 	/*
 	 * success (instead of ret) should be checked upon the return of this
 	 * function.  We avoid checking (ret == NULL) because there is never a
@@ -238,22 +209,52 @@ cache_bin_alloc_easy_impl(cache_bin_t *bin, bool *success,
 	 * and eagerly checking ret would cause pipeline stall (waiting for the
 	 * cacheline).
 	 */
-	*success = true;
 
-	return ret;
+	/*
+	 * This may read from the empty position; however the loaded value won't
+	 * be used.  It's safe because the stack has one more slot reserved.
+	 */
+	void *ret = *bin->stack_head;
+	uint16_t low_bits = (uint16_t)(uintptr_t)bin->stack_head;
+	void **new_head = bin->stack_head + 1;
+
+	/*
+	 * Note that the low water mark is at most empty; if we pass this check,
+	 * we know we're non-empty.
+	 */
+	if (likely(low_bits != bin->low_bits_low_water)) {
+		bin->stack_head = new_head;
+		*success = true;
+		return ret;
+	}
+	if (!adjust_low_water) {
+		*success = false;
+		return NULL;
+	}
+	/*
+	 * In the fast-path case where we call alloc_easy and then alloc, the
+	 * previous checking and computation is optimized away -- we didn't
+	 * actually commit any of our operations.
+	 */
+	if (likely(low_bits != bin->low_bits_empty)) {
+		bin->stack_head = new_head;
+		bin->low_bits_low_water = (uint16_t)(uintptr_t)new_head;
+		*success = true;
+		return ret;
+	}
+	*success = false;
+	return NULL;
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-cache_bin_alloc_easy_reduced(cache_bin_t *bin, bool *success) {
+cache_bin_alloc_easy(cache_bin_t *bin, bool *success) {
 	/* We don't look at info if we're not adjusting low-water. */
-	return cache_bin_alloc_easy_impl(bin, success, false);
+	return cache_bin_alloc_impl(bin, success, false);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-cache_bin_alloc_easy(cache_bin_t *bin, cache_bin_info_t *info, bool *success) {
-	/* We don't use info now, but we may want to in the future. */
-	(void)info;
-	return cache_bin_alloc_easy_impl(bin, success, true);
+cache_bin_alloc(cache_bin_t *bin, bool *success) {
+	return cache_bin_alloc_impl(bin, success, true);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 2d31ad0e..3b78ed27 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -36,8 +36,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
 
 	assert(binind < SC_NBINS);
 	bin = tcache_small_bin_get(tcache, binind);
-	ret = cache_bin_alloc_easy(bin, &tcache_bin_info[binind],
-	    &tcache_success);
+	ret = cache_bin_alloc(bin, &tcache_success);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
 		bool tcache_hard_success;
@@ -80,8 +79,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 
 	assert(binind >= SC_NBINS &&binind < nhbins);
 	bin = tcache_large_bin_get(tcache, binind);
-	ret = cache_bin_alloc_easy(bin, &tcache_bin_info[binind],
-	    &tcache_success);
+	ret = cache_bin_alloc(bin, &tcache_success);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
 		/*
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 12b4f6c3..758e3244 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2377,6 +2377,17 @@ malloc_default(size_t size) {
  * Begin malloc(3)-compatible functions.
  */
 
+JEMALLOC_ALWAYS_INLINE void
+fastpath_success_finish(tsd_t *tsd, uint64_t allocated_after,
+    cache_bin_t *bin, void *ret) {
+	thread_allocated_set(tsd, allocated_after);
+	if (config_stats) {
+		bin->tstats.nrequests++;
+	}
+
+	LOG("core.malloc.exit", "result: %p", ret);
+}
+
 /*
  * malloc() fastpath.
  *
@@ -2451,17 +2462,22 @@ je_malloc(size_t size) {
 	tcache_t *tcache = tsd_tcachep_get(tsd);
 	cache_bin_t *bin = tcache_small_bin_get(tcache, ind);
 	bool tcache_success;
-	void *ret = cache_bin_alloc_easy_reduced(bin, &tcache_success);
+	void *ret;
 
+	/*
+	 * We split up the code this way so that redundant low-water
+	 * computation doesn't happen on the (more common) case in which we
+	 * don't touch the low water mark.  The compiler won't do this
+	 * duplication on its own.
+	 */
+	ret = cache_bin_alloc_easy(bin, &tcache_success);
 	if (tcache_success) {
-		thread_allocated_set(tsd, allocated_after);
-		if (config_stats) {
-			bin->tstats.nrequests++;
-		}
-
-		LOG("core.malloc.exit", "result: %p", ret);
-
-		/* Fastpath success */
+		fastpath_success_finish(tsd, allocated_after, bin, ret);
+		return ret;
+	}
+	ret = cache_bin_alloc(bin, &tcache_success);
+	if (tcache_success) {
+		fastpath_success_finish(tsd, allocated_after, bin, ret);
 		return ret;
 	}
 
diff --git a/src/tcache.c b/src/tcache.c
index e9632236..9afc0063 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -104,8 +104,7 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 
 	assert(tcache->arena != NULL);
 	arena_tcache_fill_small(tsdn, arena, tcache, tbin, binind);
-	ret = cache_bin_alloc_easy(tbin, &tcache_bin_info[binind],
-	    tcache_success);
+	ret = cache_bin_alloc(tbin, tcache_success);
 
 	return ret;
 }
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index 2623b384..cbd8ce02 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -17,7 +17,7 @@ do_fill_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 	cache_bin_low_water_set(bin);
 
 	for (cache_bin_sz_t i = 0; i < nfill_succeed; i++) {
-		ptr = cache_bin_alloc_easy(bin, info, &success);
+		ptr = cache_bin_alloc(bin, &success);
 		expect_true(success, "");
 		expect_ptr_eq(ptr, (void *)&ptrs[i],
 		    "Should pop in order filled");
@@ -48,7 +48,7 @@ do_flush_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 
 	expect_true(cache_bin_ncached_get(bin, info) == nfill - nflush, "");
 	while (cache_bin_ncached_get(bin, info) > 0) {
-		cache_bin_alloc_easy(bin, info, &success);
+		cache_bin_alloc(bin, &success);
 	}
 }
 
@@ -78,11 +78,11 @@ TEST_BEGIN(test_cache_bin) {
 	expect_true(cache_bin_ncached_get(&bin, &info) == 0, "");
 	expect_true(cache_bin_low_water_get(&bin, &info) == 0, "");
 
-	ptr = cache_bin_alloc_easy_reduced(&bin, &success);
+	ptr = cache_bin_alloc_easy(&bin, &success);
 	expect_false(success, "Shouldn't successfully allocate when empty");
 	expect_ptr_null(ptr, "Shouldn't get a non-null pointer on failure");
 
-	ptr = cache_bin_alloc_easy(&bin, &info, &success);
+	ptr = cache_bin_alloc(&bin, &success);
 	expect_false(success, "Shouldn't successfully allocate when empty");
 	expect_ptr_null(ptr, "Shouldn't get a non-null pointer on failure");
 
@@ -112,10 +112,10 @@ TEST_BEGIN(test_cache_bin) {
 		expect_true(cache_bin_ncached_get(&bin, &info)
 		    == ncached_max - i, "");
 		/*
-		 * This should fail -- the reduced version can't change low
-		 * water.
+		 * This should fail -- the easy variant can't change the low
+		 * water mark.
 		 */
-		ptr = cache_bin_alloc_easy_reduced(&bin, &success);
+		ptr = cache_bin_alloc_easy(&bin, &success);
 		expect_ptr_null(ptr, "");
 		expect_false(success, "");
 		expect_true(cache_bin_low_water_get(&bin, &info)
@@ -124,7 +124,7 @@ TEST_BEGIN(test_cache_bin) {
 		    == ncached_max - i, "");
 
 		/* This should succeed, though. */
-		ptr = cache_bin_alloc_easy(&bin, &info, &success);
+		ptr = cache_bin_alloc(&bin, &success);
 		expect_true(success, "");
 		expect_ptr_eq(ptr, &ptrs[ncached_max - i - 1],
 		    "Alloc should pop in stack order");
@@ -135,10 +135,10 @@ TEST_BEGIN(test_cache_bin) {
 	}
 	/* Now we're empty -- all alloc attempts should fail. */
 	expect_true(cache_bin_ncached_get(&bin, &info) == 0, "");
-	ptr = cache_bin_alloc_easy_reduced(&bin, &success);
+	ptr = cache_bin_alloc_easy(&bin, &success);
 	expect_ptr_null(ptr, "");
 	expect_false(success, "");
-	ptr = cache_bin_alloc_easy(&bin, &info, &success);
+	ptr = cache_bin_alloc(&bin, &success);
 	expect_ptr_null(ptr, "");
 	expect_false(success, "");
 
@@ -156,18 +156,18 @@ TEST_BEGIN(test_cache_bin) {
 		 * Size is bigger than low water -- the reduced version should
 		 * succeed.
 		 */
-		ptr = cache_bin_alloc_easy_reduced(&bin, &success);
+		ptr = cache_bin_alloc_easy(&bin, &success);
 		expect_true(success, "");
 		expect_ptr_eq(ptr, &ptrs[i], "");
 	}
 	/* But now, we've hit low-water. */
-	ptr = cache_bin_alloc_easy_reduced(&bin, &success);
+	ptr = cache_bin_alloc_easy(&bin, &success);
 	expect_false(success, "");
 	expect_ptr_null(ptr, "");
 
 	/* We're going to test filling -- we must be empty to start. */
 	while (cache_bin_ncached_get(&bin, &info)) {
-		cache_bin_alloc_easy(&bin, &info, &success);
+		cache_bin_alloc(&bin, &success);
 		expect_true(success, "");
 	}
 

From 92485032b2e9184cada5a30e3df389fe164fbb4d Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sat, 7 Mar 2020 15:56:49 -0800
Subject: [PATCH 1589/2608] Cache bin: improve comments.

---
 include/jemalloc/internal/cache_bin.h | 95 ++++++++++++++++++++++-----
 1 file changed, 79 insertions(+), 16 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index f0297045..5a772bf6 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -20,6 +20,11 @@
  */
 typedef uint16_t cache_bin_sz_t;
 
+/*
+ * This lives inside the cache_bin (for locality reasons), and is initialized
+ * alongside it, but is otherwise not modified by any cache bin operations.
+ * It's logically public and maintained by its callers.
+ */
 typedef struct cache_bin_stats_s cache_bin_stats_t;
 struct cache_bin_stats_s {
 	/*
@@ -38,6 +43,9 @@ struct cache_bin_info_s {
 	cache_bin_sz_t ncached_max;
 };
 
+/*
+ * Responsible for caching allocations associated with a single size.
+ */
 typedef struct cache_bin_s cache_bin_t;
 struct cache_bin_s {
 	/*
@@ -84,6 +92,12 @@ struct cache_bin_s {
 
 };
 
+/*
+ * The cache_bins live inside the tcache, but the arena (by design) isn't
+ * supposed to know much about tcache internals.  To let the arena iterate over
+ * associated bins, we keep (with the tcache) a linked list of
+ * cache_bin_array_descriptor_ts that tell the arena how to find the bins.
+ */
 typedef struct cache_bin_array_descriptor_s cache_bin_array_descriptor_t;
 struct cache_bin_array_descriptor_s {
 	/*
@@ -96,10 +110,13 @@ struct cache_bin_array_descriptor_s {
 	cache_bin_t *bins_large;
 };
 
-/*
- * None of the cache_bin_*_get / _set functions is used on the fast path, which
- * relies on pointer comparisons to determine if the cache is full / empty.
- */
+static inline void
+cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
+    cache_bin_t *bins_small, cache_bin_t *bins_large) {
+	ql_elm_new(descriptor, link);
+	descriptor->bins_small = bins_small;
+	descriptor->bins_large = bins_large;
+}
 
 /* Returns ncached_max: Upper limit on ncached. */
 static inline cache_bin_sz_t
@@ -108,6 +125,8 @@ cache_bin_info_ncached_max(cache_bin_info_t *info) {
 }
 
 /*
+ * Internal.
+ *
  * Asserts that the pointer associated with earlier is <= the one associated
  * with later.
  */
@@ -119,8 +138,10 @@ cache_bin_assert_earlier(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
 }
 
 /*
- * Internal -- does difference calculations that handle wraparound correctly.
- * Earlier must be associated with the position earlier in memory.
+ * Internal.
+ *
+ * Does difference calculations that handle wraparound correctly.  Earlier must
+ * be associated with the position earlier in memory.
  */
 static inline uint16_t
 cache_bin_diff(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
@@ -128,7 +149,7 @@ cache_bin_diff(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
 	return later - earlier;
 }
 
-
+/* Number of items currently cached in the bin. */
 static inline cache_bin_sz_t
 cache_bin_ncached_get(cache_bin_t *bin, cache_bin_info_t *info) {
 	cache_bin_sz_t diff = cache_bin_diff(bin,
@@ -141,6 +162,11 @@ cache_bin_ncached_get(cache_bin_t *bin, cache_bin_info_t *info) {
 	return n;
 }
 
+/*
+ * Internal.
+ *
+ * A pointer to the position one past the end of the backing array.
+ */
 static inline void **
 cache_bin_empty_position_get(cache_bin_t *bin, cache_bin_info_t *info) {
 	cache_bin_sz_t diff = cache_bin_diff(bin,
@@ -153,6 +179,10 @@ cache_bin_empty_position_get(cache_bin_t *bin, cache_bin_info_t *info) {
 	return ret;
 }
 
+/*
+ * As the name implies.  This is important since it's not correct to try to
+ * batch fill a nonempty cache bin.
+ */
 static inline void
 cache_bin_assert_empty(cache_bin_t *bin, cache_bin_info_t *info) {
 	assert(cache_bin_ncached_get(bin, info) == 0);
@@ -192,14 +222,6 @@ cache_bin_low_water_set(cache_bin_t *bin) {
 	bin->low_bits_low_water = (uint16_t)(uintptr_t)bin->stack_head;
 }
 
-static inline void
-cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
-    cache_bin_t *bins_small, cache_bin_t *bins_large) {
-	ql_elm_new(descriptor, link);
-	descriptor->bins_small = bins_small;
-	descriptor->bins_large = bins_large;
-}
-
 JEMALLOC_ALWAYS_INLINE void *
 cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) {
 	/*
@@ -246,17 +268,27 @@ cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) {
 	return NULL;
 }
 
+/*
+ * Allocate an item out of the bin, failing if we're at the low-water mark.
+ */
 JEMALLOC_ALWAYS_INLINE void *
 cache_bin_alloc_easy(cache_bin_t *bin, bool *success) {
 	/* We don't look at info if we're not adjusting low-water. */
 	return cache_bin_alloc_impl(bin, success, false);
 }
 
+/*
+ * Allocate an item out of the bin, even if we're currently at the low-water
+ * mark (and failing only if the bin is empty).
+ */
 JEMALLOC_ALWAYS_INLINE void *
 cache_bin_alloc(cache_bin_t *bin, bool *success) {
 	return cache_bin_alloc_impl(bin, success, true);
 }
 
+/*
+ * Free an object into the given bin.  Fails only if the bin is full.
+ */
 JEMALLOC_ALWAYS_INLINE bool
 cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) {
 	uint16_t low_bits = (uint16_t)(uintptr_t)bin->stack_head;
@@ -272,16 +304,46 @@ cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) {
 	return true;
 }
 
+/**
+ * Filling and flushing are done in batch, on arrays of void *s.  For filling,
+ * the arrays go forward, and can be accessed with ordinary array arithmetic.
+ * For flushing, we work from the end backwards, and so need to use special
+ * accessors that invert the usual ordering.
+ *
+ * This is important for maintaining first-fit; the arena code fills with
+ * earliest objects first, and so those are the ones we should return first for
+ * cache_bin_alloc calls.  When flushing, we should flush the objects that we
+ * wish to return later; those at the end of the array.  This is better for the
+ * first-fit heuristic as well as for cache locality; the most recently freed
+ * objects are the ones most likely to still be in cache.
+ *
+ * This all sounds very hand-wavey and theoretical, but reverting the ordering
+ * on one or the other pathway leads to measurable slowdowns.
+ */
+
 typedef struct cache_bin_ptr_array_s cache_bin_ptr_array_t;
 struct cache_bin_ptr_array_s {
 	cache_bin_sz_t n;
 	void **ptr;
 };
 
+/*
+ * Declare a cache_bin_ptr_array_t sufficient for nval items.
+ *
+ * In the current implementation, this could be just part of a
+ * cache_bin_ptr_array_init_... call, since we reuse the cache bin stack memory.
+ * Indirecting behind a macro, though, means experimenting with linked-list
+ * representations is easy (since they'll require an alloca in the calling
+ * frame).
+ */
 #define CACHE_BIN_PTR_ARRAY_DECLARE(name, nval)				\
     cache_bin_ptr_array_t name;						\
     name.n = (nval)
 
+/*
+ * Start a fill.  The bin must be empty, and This must be followed by a
+ * finish_fill call before doing any alloc/dalloc operations on the bin.
+ */
 static inline void
 cache_bin_init_ptr_array_for_fill(cache_bin_t *bin, cache_bin_info_t *info,
     cache_bin_ptr_array_t *arr, cache_bin_sz_t nfill) {
@@ -306,6 +368,7 @@ cache_bin_finish_fill(cache_bin_t *bin, cache_bin_info_t *info,
 	bin->stack_head = empty_position - nfilled;
 }
 
+/* Same deal, but with flush. */
 static inline void
 cache_bin_init_ptr_array_for_flush(cache_bin_t *bin, cache_bin_info_t *info,
     cache_bin_ptr_array_t *arr, cache_bin_sz_t nflush) {
@@ -316,7 +379,7 @@ cache_bin_init_ptr_array_for_flush(cache_bin_t *bin, cache_bin_info_t *info,
 
 /*
  * These accessors are used by the flush pathways -- they reverse ordinary array
- * ordering.
+ * ordering.  See the note above.
  */
 JEMALLOC_ALWAYS_INLINE void *
 cache_bin_ptr_array_get(cache_bin_ptr_array_t *arr, cache_bin_sz_t n) {

From e732344ef18fa295c1ca77ffc40760f5873db1b8 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 6 Mar 2020 12:41:16 -0800
Subject: [PATCH 1590/2608] Inspect test: Reduce checks when profiling is on.

Profiled small allocations don't live in bins, which is contrary to the test
expectation.
---
 test/unit/inspect.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/test/unit/inspect.c b/test/unit/inspect.c
index 384b1ad4..fe59e597 100644
--- a/test/unit/inspect.c
+++ b/test/unit/inspect.c
@@ -87,7 +87,13 @@ TEST_BEGIN(test_query) {
 		    "Extent size should be at least allocation size");
 		expect_zu_eq(SIZE_READ(out) & (PAGE - 1), 0,
 		    "Extent size should be a multiple of page size");
-		if (sz <= SC_SMALL_MAXCLASS) {
+
+		/*
+		 * We don't do much bin checking if prof is on, since profiling
+		 * can produce extents that are for small size classes but not
+		 * slabs, which interferes with things like region counts.
+		 */
+		if (!opt_prof && sz <= SC_SMALL_MAXCLASS) {
 			expect_zu_le(NFREE_READ(out), NREGS_READ(out),
 			    "Extent free count exceeded region count");
 			expect_zu_le(NREGS_READ(out), SIZE_READ(out),
@@ -97,6 +103,7 @@ TEST_BEGIN(test_query) {
 			expect_true(NFREE_READ(out) == 0 || (SLABCUR_READ(out)
 			    != NULL && SLABCUR_READ(out) <= p),
 			    "Allocation should follow first fit principle");
+
 			if (config_stats) {
 				expect_zu_le(BIN_NFREE_READ(out),
 				    BIN_NREGS_READ(out),
@@ -125,7 +132,7 @@ TEST_BEGIN(test_query) {
 				    "Extent utilized count exceeded "
 				    "bin utilized count");
 			}
-		} else {
+		} else if (sz > SC_SMALL_MAXCLASS) {
 			expect_zu_eq(NFREE_READ(out), 0,
 			    "Extent free count should be zero");
 			expect_zu_eq(NREGS_READ(out), 1,
@@ -214,14 +221,18 @@ TEST_BEGIN(test_batch) {
 		    "Extent size should be at least allocation size");
 		expect_zu_eq(SIZE_READ(out, 0) & (PAGE - 1), 0,
 		    "Extent size should be a multiple of page size");
-		if (sz <= SC_SMALL_MAXCLASS) {
+		/*
+		 * See the corresponding comment in test_query; profiling breaks
+		 * our slab count expectations.
+		 */
+		if (sz <= SC_SMALL_MAXCLASS && !opt_prof) {
 			expect_zu_le(NFREE_READ(out, 0), NREGS_READ(out, 0),
 			    "Extent free count exceeded region count");
 			expect_zu_le(NREGS_READ(out, 0), SIZE_READ(out, 0),
 			    "Extent region count exceeded size");
 			expect_zu_ne(NREGS_READ(out, 0), 0,
 			    "Extent region count must be positive");
-		} else {
+		} else if (sz > SC_SMALL_MAXCLASS) {
 			expect_zu_eq(NFREE_READ(out, 0), 0,
 			    "Extent free count should be zero");
 			expect_zu_eq(NREGS_READ(out, 0), 1,

From 734109d9c28beb2da12af34e1d2e4324e4895191 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 18 Feb 2020 16:09:10 -0800
Subject: [PATCH 1591/2608] Edata cache: add a unit test.

---
 Makefile.in             |  1 +
 test/unit/edata_cache.c | 54 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+)
 create mode 100644 test/unit/edata_cache.c

diff --git a/Makefile.in b/Makefile.in
index b53846d2..7eca2f5f 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -196,6 +196,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/counter.c \
 	$(srcroot)test/unit/decay.c \
 	$(srcroot)test/unit/div.c \
+	$(srcroot)test/unit/edata_cache.c \
 	$(srcroot)test/unit/emitter.c \
 	$(srcroot)test/unit/extent_quantize.c \
 	$(srcroot)test/unit/fork.c \
diff --git a/test/unit/edata_cache.c b/test/unit/edata_cache.c
new file mode 100644
index 00000000..638e2292
--- /dev/null
+++ b/test/unit/edata_cache.c
@@ -0,0 +1,54 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/edata_cache.h"
+
+static void
+test_edata_cache_init(edata_cache_t *edata_cache) {
+	base_t *base = base_new(TSDN_NULL, /* ind */ 1,
+	    &ehooks_default_extent_hooks);
+	assert_ptr_not_null(base, "");
+	bool err = edata_cache_init(edata_cache, base);
+	assert_false(err, "");
+}
+
+static void
+test_edata_cache_destroy(edata_cache_t *edata_cache) {
+	base_delete(TSDN_NULL, edata_cache->base);
+}
+
+TEST_BEGIN(test_edata_cache) {
+	edata_cache_t edc;
+	test_edata_cache_init(&edc);
+
+	/* Get one */
+	edata_t *ed1 = edata_cache_get(TSDN_NULL, &edc);
+	assert_ptr_not_null(ed1, "");
+
+	/* Cache should be empty */
+	assert_zu_eq(atomic_load_zu(&edc.count, ATOMIC_RELAXED), 0, "");
+
+	/* Get another */
+	edata_t *ed2 = edata_cache_get(TSDN_NULL, &edc);
+	assert_ptr_not_null(ed2, "");
+
+	/* Still empty */
+	assert_zu_eq(atomic_load_zu(&edc.count, ATOMIC_RELAXED), 0, "");
+
+	/* Put one back, and the cache should now have one item */
+	edata_cache_put(TSDN_NULL, &edc, ed1);
+	assert_zu_eq(atomic_load_zu(&edc.count, ATOMIC_RELAXED), 1, "");
+
+	/* Reallocating should reuse the item, and leave an empty cache. */
+	edata_t *ed1_again = edata_cache_get(TSDN_NULL, &edc);
+	assert_ptr_eq(ed1, ed1_again, "");
+	assert_zu_eq(atomic_load_zu(&edc.count, ATOMIC_RELAXED), 0, "");
+
+	test_edata_cache_destroy(&edc);
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_edata_cache);
+}

From 99b1291d1760ad164346073b35ac03ce2eb35e68 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 18 Feb 2020 17:21:40 -0800
Subject: [PATCH 1592/2608] Edata cache: add edata_cache_small_t.

This can be used to amortize the synchronization costs of edata_cache accesses.
---
 include/jemalloc/internal/edata_cache.h | 28 ++++++++++++
 src/edata_cache.c                       | 60 ++++++++++++++++++++++--
 test/unit/edata_cache.c                 | 61 ++++++++++++++++++++-----
 3 files changed, 134 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/edata_cache.h b/include/jemalloc/internal/edata_cache.h
index 73ac7af8..620360d1 100644
--- a/include/jemalloc/internal/edata_cache.h
+++ b/include/jemalloc/internal/edata_cache.h
@@ -25,4 +25,32 @@ void edata_cache_prefork(tsdn_t *tsdn, edata_cache_t *edata_cache);
 void edata_cache_postfork_parent(tsdn_t *tsdn, edata_cache_t *edata_cache);
 void edata_cache_postfork_child(tsdn_t *tsdn, edata_cache_t *edata_cache);
 
+typedef struct edata_cache_small_s edata_cache_small_t;
+struct edata_cache_small_s {
+	edata_list_t list;
+	size_t count;
+	edata_cache_t *fallback;
+};
+
+/*
+ * An edata_cache_small is like an edata_cache, but it relies on external
+ * synchronization and avoids first-fit strategies.  You can call "prepare" to
+ * acquire at least num edata_t objects, and then "finish" to flush all
+ * excess ones back to their fallback edata_cache_t.  Once they have been
+ * acquired, they can be allocated without failing (and in fact, this is
+ * required -- it's not permitted to attempt to get an edata_t without first
+ * preparing for it).
+ */
+
+void edata_cache_small_init(edata_cache_small_t *ecs, edata_cache_t *fallback);
+
+/* Returns whether or not an error occurred. */
+bool edata_cache_small_prepare(tsdn_t *tsdn, edata_cache_small_t *ecs,
+    size_t num);
+edata_t *edata_cache_small_get(edata_cache_small_t *ecs);
+
+void edata_cache_small_put(edata_cache_small_t *ecs, edata_t *edata);
+void edata_cache_small_finish(tsdn_t *tsdn, edata_cache_small_t *ecs,
+    size_t num);
+
 #endif /* JEMALLOC_INTERNAL_EDATA_CACHE_H */
diff --git a/src/edata_cache.c b/src/edata_cache.c
index 1af7b96f..b62972a1 100644
--- a/src/edata_cache.c
+++ b/src/edata_cache.c
@@ -40,14 +40,68 @@ edata_cache_put(tsdn_t *tsdn, edata_cache_t *edata_cache, edata_t *edata) {
 	malloc_mutex_unlock(tsdn, &edata_cache->mtx);
 }
 
-void edata_cache_prefork(tsdn_t *tsdn, edata_cache_t *edata_cache) {
+void
+edata_cache_prefork(tsdn_t *tsdn, edata_cache_t *edata_cache) {
 	malloc_mutex_prefork(tsdn, &edata_cache->mtx);
 }
 
-void edata_cache_postfork_parent(tsdn_t *tsdn, edata_cache_t *edata_cache) {
+void
+edata_cache_postfork_parent(tsdn_t *tsdn, edata_cache_t *edata_cache) {
 	malloc_mutex_postfork_parent(tsdn, &edata_cache->mtx);
 }
 
-void edata_cache_postfork_child(tsdn_t *tsdn, edata_cache_t *edata_cache) {
+void
+edata_cache_postfork_child(tsdn_t *tsdn, edata_cache_t *edata_cache) {
 	malloc_mutex_postfork_child(tsdn, &edata_cache->mtx);
 }
+
+void
+edata_cache_small_init(edata_cache_small_t *ecs, edata_cache_t *fallback) {
+	edata_list_init(&ecs->list);
+	ecs->count = 0;
+	ecs->fallback = fallback;
+}
+
+edata_t *
+edata_cache_small_get(edata_cache_small_t *ecs) {
+	assert(ecs->count > 0);
+	edata_t *edata = edata_list_first(&ecs->list);
+	assert(edata != NULL);
+	edata_list_remove(&ecs->list, edata);
+	ecs->count--;
+	return edata;
+}
+
+void
+edata_cache_small_put(edata_cache_small_t *ecs, edata_t *edata) {
+	assert(edata != NULL);
+	edata_list_append(&ecs->list, edata);
+	ecs->count++;
+}
+
+bool edata_cache_small_prepare(tsdn_t *tsdn, edata_cache_small_t *ecs,
+    size_t num) {
+	while (ecs->count < num) {
+		/*
+		 * Obviously, we can be smarter here and batch the locking that
+		 * happens inside of edata_cache_get.  But for now, something
+		 * quick-and-dirty is fine.
+		 */
+		edata_t *edata = edata_cache_get(tsdn, ecs->fallback);
+		if (edata == NULL) {
+			return true;
+		}
+		ql_elm_new(edata, ql_link);
+		edata_cache_small_put(ecs, edata);
+	}
+	return false;
+}
+
+void edata_cache_small_finish(tsdn_t *tsdn, edata_cache_small_t *ecs,
+    size_t num) {
+	while (ecs->count > num) {
+		/* Same deal here -- we should be batching. */
+		edata_t *edata = edata_cache_small_get(ecs);
+		edata_cache_put(tsdn, ecs->fallback, edata);
+	}
+}
diff --git a/test/unit/edata_cache.c b/test/unit/edata_cache.c
index 638e2292..22c9dcb8 100644
--- a/test/unit/edata_cache.c
+++ b/test/unit/edata_cache.c
@@ -17,38 +17,75 @@ test_edata_cache_destroy(edata_cache_t *edata_cache) {
 }
 
 TEST_BEGIN(test_edata_cache) {
-	edata_cache_t edc;
-	test_edata_cache_init(&edc);
+	edata_cache_t ec;
+	test_edata_cache_init(&ec);
 
 	/* Get one */
-	edata_t *ed1 = edata_cache_get(TSDN_NULL, &edc);
+	edata_t *ed1 = edata_cache_get(TSDN_NULL, &ec);
 	assert_ptr_not_null(ed1, "");
 
 	/* Cache should be empty */
-	assert_zu_eq(atomic_load_zu(&edc.count, ATOMIC_RELAXED), 0, "");
+	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
 
 	/* Get another */
-	edata_t *ed2 = edata_cache_get(TSDN_NULL, &edc);
+	edata_t *ed2 = edata_cache_get(TSDN_NULL, &ec);
 	assert_ptr_not_null(ed2, "");
 
 	/* Still empty */
-	assert_zu_eq(atomic_load_zu(&edc.count, ATOMIC_RELAXED), 0, "");
+	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
 
 	/* Put one back, and the cache should now have one item */
-	edata_cache_put(TSDN_NULL, &edc, ed1);
-	assert_zu_eq(atomic_load_zu(&edc.count, ATOMIC_RELAXED), 1, "");
+	edata_cache_put(TSDN_NULL, &ec, ed1);
+	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 1, "");
 
 	/* Reallocating should reuse the item, and leave an empty cache. */
-	edata_t *ed1_again = edata_cache_get(TSDN_NULL, &edc);
+	edata_t *ed1_again = edata_cache_get(TSDN_NULL, &ec);
 	assert_ptr_eq(ed1, ed1_again, "");
-	assert_zu_eq(atomic_load_zu(&edc.count, ATOMIC_RELAXED), 0, "");
+	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
 
-	test_edata_cache_destroy(&edc);
+	test_edata_cache_destroy(&ec);
+}
+TEST_END
+
+TEST_BEGIN(test_edata_cache_small) {
+	edata_cache_t ec;
+	edata_cache_small_t ecs;
+
+	test_edata_cache_init(&ec);
+	edata_cache_small_init(&ecs, &ec);
+
+	bool err = edata_cache_small_prepare(TSDN_NULL, &ecs, 2);
+	assert_false(err, "");
+	assert_zu_eq(ecs.count, 2, "");
+	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
+
+	edata_t *ed1 = edata_cache_small_get(&ecs);
+	assert_zu_eq(ecs.count, 1, "");
+	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
+
+	edata_t *ed2 = edata_cache_small_get(&ecs);
+	assert_zu_eq(ecs.count, 0, "");
+	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
+
+	edata_cache_small_put(&ecs, ed1);
+	assert_zu_eq(ecs.count, 1, "");
+	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
+
+	edata_cache_small_put(&ecs, ed2);
+	assert_zu_eq(ecs.count, 2, "");
+	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
+
+	edata_cache_small_finish(TSDN_NULL, &ecs, 1);
+	assert_zu_eq(ecs.count, 1, "");
+	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 1, "");
+
+	test_edata_cache_destroy(&ec);
 }
 TEST_END
 
 int
 main(void) {
 	return test(
-	    test_edata_cache);
+	    test_edata_cache,
+	    test_edata_cache_small);
 }

From 0dcd576600b7ad1b4a142eb993e4f7639ccc638c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 18 Feb 2020 17:26:32 -0800
Subject: [PATCH 1593/2608] Edata cache: atomic fetch-add -> load-store.

The modifications to count are protected by a mutex; there's no need to use the
more costly version.
---
 src/edata_cache.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/edata_cache.c b/src/edata_cache.c
index b62972a1..4601f33c 100644
--- a/src/edata_cache.c
+++ b/src/edata_cache.c
@@ -27,7 +27,8 @@ edata_cache_get(tsdn_t *tsdn, edata_cache_t *edata_cache) {
 		return base_alloc_edata(tsdn, edata_cache->base);
 	}
 	edata_avail_remove(&edata_cache->avail, edata);
-	atomic_fetch_sub_zu(&edata_cache->count, 1, ATOMIC_RELAXED);
+	size_t count = atomic_load_zu(&edata_cache->count, ATOMIC_RELAXED);
+	atomic_store_zu(&edata_cache->count, count - 1, ATOMIC_RELAXED);
 	malloc_mutex_unlock(tsdn, &edata_cache->mtx);
 	return edata;
 }
@@ -36,7 +37,8 @@ void
 edata_cache_put(tsdn_t *tsdn, edata_cache_t *edata_cache, edata_t *edata) {
 	malloc_mutex_lock(tsdn, &edata_cache->mtx);
 	edata_avail_insert(&edata_cache->avail, edata);
-	atomic_fetch_add_zu(&edata_cache->count, 1, ATOMIC_RELAXED);
+	size_t count = atomic_load_zu(&edata_cache->count, ATOMIC_RELAXED);
+	atomic_store_zu(&edata_cache->count, count + 1, ATOMIC_RELAXED);
 	malloc_mutex_unlock(tsdn, &edata_cache->mtx);
 }
 

From 441d88d1c78ecc38a7ffad3f88ea50513dabc0f8 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 9 Mar 2020 15:49:15 -0700
Subject: [PATCH 1594/2608] Rewrite profiling thread event

---
 include/jemalloc/internal/prof_externs.h   |  2 +-
 include/jemalloc/internal/prof_inlines_b.h | 17 +++------
 include/jemalloc/internal/thread_event.h   |  7 ++++
 src/jemalloc.c                             | 44 ++++++++--------------
 src/prof.c                                 | 17 +--------
 src/thread_event.c                         | 12 +-----
 6 files changed, 31 insertions(+), 68 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 0b6fecd2..5a32754e 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -53,7 +53,7 @@ void prof_idump_rollback_impl(tsdn_t *tsdn, size_t usize);
 prof_tdata_t *prof_tdata_init(tsd_t *tsd);
 prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
 
-void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated);
+void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx);
 void prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
     size_t usize, prof_tctx_t *tctx);
 void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info);
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 7e28d836..64983877 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -85,11 +85,11 @@ prof_info_set(tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx) {
 }
 
 JEMALLOC_ALWAYS_INLINE bool
-prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update) {
+prof_sample_should_skip(tsd_t *tsd, size_t usize) {
 	cassert(config_prof);
 
 	/* Fastpath: no need to load tdata */
-	if (likely(prof_sample_event_wait_get(tsd) > 0)) {
+	if (likely(!te_prof_sample_event_lookahead(tsd, usize))) {
 		return true;
 	}
 
@@ -102,21 +102,16 @@ prof_sample_accum_update(tsd_t *tsd, size_t usize, bool update) {
 		return true;
 	}
 
-	/* Compute new sample threshold. */
-	if (update) {
-		prof_sample_threshold_update(tsd);
-	}
 	return !tdata->active;
 }
 
 JEMALLOC_ALWAYS_INLINE prof_tctx_t *
-prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active, bool update) {
+prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active) {
 	prof_tctx_t *ret;
 
 	assert(usize == sz_s2u(usize));
 
-	if (!prof_active ||
-	    likely(prof_sample_accum_update(tsd, usize, update))) {
+	if (!prof_active || likely(prof_sample_should_skip(tsd, usize))) {
 		ret = (prof_tctx_t *)(uintptr_t)1U;
 	} else {
 		ret = prof_tctx_create(tsd);
@@ -150,7 +145,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
 
 	if (prof_active && ptr != NULL) {
 		assert(usize == isalloc(tsd_tsdn(tsd), ptr));
-		if (prof_sample_accum_update(tsd, usize, true)) {
+		if (prof_sample_should_skip(tsd, usize)) {
 			/*
 			 * Don't sample.  The usize passed to prof_alloc_prep()
 			 * was larger than what actually got allocated, so a
@@ -158,7 +153,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
 			 * though its actual usize was insufficient to cross the
 			 * sample threshold.
 			 */
-			prof_alloc_rollback(tsd, tctx, true);
+			prof_alloc_rollback(tsd, tctx);
 			tctx = (prof_tctx_t *)(uintptr_t)1U;
 		}
 	}
diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index d528c051..f9e2ba5c 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -218,6 +218,13 @@ te_ctx_get(tsd_t *tsd, te_ctx_t *ctx, bool is_alloc) {
 	}
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+te_prof_sample_event_lookahead(tsd_t *tsd, size_t usize) {
+	return tsd_thread_allocated_get(tsd) + usize -
+	    tsd_thread_allocated_last_event_get(tsd) >=
+	    tsd_prof_sample_event_wait_get(tsd);
+}
+
 JEMALLOC_ALWAYS_INLINE void
 te_event_advance(tsd_t *tsd, size_t usize, bool is_alloc) {
 	te_assert_invariants(tsd);
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 758e3244..7a65db02 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2177,8 +2177,6 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		dopts->arena_ind = 0;
 	}
 
-	thread_alloc_event(tsd, usize);
-
 	/*
 	 * If dopts->alignment > 0, then ind is still 0, but usize was computed
 	 * in the previous if statement.  Down the positive alignment path,
@@ -2187,8 +2185,8 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 
 	/* If profiling is on, get our profiling context. */
 	if (config_prof && opt_prof) {
-		prof_tctx_t *tctx = prof_alloc_prep(
-		    tsd, usize, prof_active_get_unlocked(), true);
+		bool prof_active = prof_active_get_unlocked();
+		prof_tctx_t *tctx = prof_alloc_prep(tsd, usize, prof_active);
 
 		emap_alloc_ctx_t alloc_ctx;
 		if (likely((uintptr_t)tctx == (uintptr_t)1U)) {
@@ -2204,8 +2202,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		}
 
 		if (unlikely(allocation == NULL)) {
-			te_alloc_rollback(tsd, usize);
-			prof_alloc_rollback(tsd, tctx, true);
+			prof_alloc_rollback(tsd, tctx);
 			goto label_oom;
 		}
 		prof_malloc(tsd, allocation, size, usize, &alloc_ctx, tctx);
@@ -2214,7 +2211,6 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		allocation = imalloc_no_sample(sopts, dopts, tsd, size, usize,
 		    ind);
 		if (unlikely(allocation == NULL)) {
-			te_alloc_rollback(tsd, usize);
 			goto label_oom;
 		}
 	}
@@ -2223,6 +2219,9 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 	 * Allocation has been done at this point.  We still have some
 	 * post-allocation work to do though.
 	 */
+
+	thread_alloc_event(tsd, usize);
+
 	assert(dopts->alignment == 0
 	    || ((uintptr_t)allocation & (dopts->alignment - 1)) == ZU(0));
 
@@ -3132,7 +3131,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 	prof_info_t old_prof_info;
 	prof_info_get_and_reset_recent(tsd, old_ptr, alloc_ctx, &old_prof_info);
 	bool prof_active = prof_active_get_unlocked();
-	prof_tctx_t *tctx = prof_alloc_prep(tsd, *usize, prof_active, false);
+	prof_tctx_t *tctx = prof_alloc_prep(tsd, *usize, prof_active);
 	void *p;
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
 		p = irallocx_prof_sample(tsd_tsdn(tsd), old_ptr, old_usize,
@@ -3142,7 +3141,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 		    zero, tcache, arena, hook_args);
 	}
 	if (unlikely(p == NULL)) {
-		prof_alloc_rollback(tsd, tctx, false);
+		prof_alloc_rollback(tsd, tctx);
 		return NULL;
 	}
 
@@ -3155,8 +3154,10 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 		 * be the same as the current usize because of in-place large
 		 * reallocation.  Therefore, query the actual value of usize.
 		 */
+		assert(*usize >= isalloc(tsd_tsdn(tsd), p));
 		*usize = isalloc(tsd_tsdn(tsd), p);
 	}
+
 	prof_realloc(tsd, p, size, *usize, tctx, prof_active, old_ptr,
 	    old_usize, &old_prof_info);
 
@@ -3214,11 +3215,9 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 		if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
 			goto label_oom;
 		}
-		thread_alloc_event(tsd, usize);
 		p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize,
 		    zero, tcache, arena, &alloc_ctx, &hook_args);
 		if (unlikely(p == NULL)) {
-			te_alloc_rollback(tsd, usize);
 			goto label_oom;
 		}
 	} else {
@@ -3228,9 +3227,9 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 			goto label_oom;
 		}
 		usize = isalloc(tsd_tsdn(tsd), p);
-		thread_alloc_event(tsd, usize);
 	}
 	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
+	thread_alloc_event(tsd, usize);
 	thread_dalloc_event(tsd, old_usize);
 
 	UTRACE(ptr, size, p);
@@ -3416,9 +3415,8 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 			usize_max = SC_LARGE_MAXCLASS;
 		}
 	}
-	thread_alloc_event(tsd, usize_max);
 	bool prof_active = prof_active_get_unlocked();
-	prof_tctx_t *tctx = prof_alloc_prep(tsd, usize_max, prof_active, false);
+	prof_tctx_t *tctx = prof_alloc_prep(tsd, usize_max, prof_active);
 
 	size_t usize;
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
@@ -3428,18 +3426,6 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 		usize = ixallocx_helper(tsd_tsdn(tsd), ptr, old_usize, size,
 		    extra, alignment, zero);
 	}
-	if (usize <= usize_max) {
-		te_alloc_rollback(tsd, usize_max - usize);
-	} else {
-		/*
-		 * For downsizing request, usize_max can be less than usize.
-		 * We here further increase thread event counters so as to
-		 * record the true usize, and then when the execution goes back
-		 * to xallocx(), the entire usize will be rolled back if it's
-		 * equal to the old usize.
-		 */
-		thread_alloc_event(tsd, usize - usize_max);
-	}
 
 	/*
 	 * At this point we can still safely get the original profiling
@@ -3452,9 +3438,10 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 	prof_info_t prof_info;
 	if (usize == old_usize) {
 		prof_info_get(tsd, ptr, alloc_ctx, &prof_info);
-		prof_alloc_rollback(tsd, tctx, false);
+		prof_alloc_rollback(tsd, tctx);
 	} else {
 		prof_info_get_and_reset_recent(tsd, ptr, alloc_ctx, &prof_info);
+		assert(usize <= usize_max);
 		prof_realloc(tsd, ptr, size, usize, tctx, prof_active, ptr,
 		    old_usize, &prof_info);
 	}
@@ -3516,7 +3503,6 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	} else {
 		usize = ixallocx_helper(tsd_tsdn(tsd), ptr, old_usize, size,
 		    extra, alignment, zero);
-		thread_alloc_event(tsd, usize);
 	}
 
 	/*
@@ -3527,9 +3513,9 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	    == old_edata);
 
 	if (unlikely(usize == old_usize)) {
-		te_alloc_rollback(tsd, usize);
 		goto label_not_resized;
 	}
+	thread_alloc_event(tsd, usize);
 	thread_dalloc_event(tsd, old_usize);
 
 	if (config_fill && malloc_slow) {
diff --git a/src/prof.c b/src/prof.c
index 82f88a21..73e6d914 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -118,7 +118,7 @@ prof_strncpy(char *UNUSED dest, const char *UNUSED src, size_t UNUSED size) {
 }
 
 void
-prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
+prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx) {
 	cassert(config_prof);
 
 	if (tsd_reentrancy_level_get(tsd) > 0) {
@@ -126,21 +126,6 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
 		return;
 	}
 
-	prof_tdata_t *tdata;
-
-	if (updated) {
-		/*
-		 * Compute a new sample threshold.  This isn't very important in
-		 * practice, because this function is rarely executed, so the
-		 * potential for sample bias is minimal except in contrived
-		 * programs.
-		 */
-		tdata = prof_tdata_get(tsd, true);
-		if (tdata != NULL) {
-			prof_sample_threshold_update(tsd);
-		}
-	}
-
 	if ((uintptr_t)tctx > (uintptr_t)1U) {
 		malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
 		tctx->prepared = false;
diff --git a/src/thread_event.c b/src/thread_event.c
index dadace38..75208f0e 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -78,17 +78,7 @@ te_prof_sample_event_handler(tsd_t *tsd) {
 	if (prof_idump_accum(tsd_tsdn(tsd), last_event - last_sample_event)) {
 		prof_idump(tsd_tsdn(tsd));
 	}
-	if (!prof_active_get_unlocked()) {
-		/*
-		 * If prof_active is off, we reset prof_sample_event_wait to be
-		 * the sample interval when it drops to 0, so that there won't
-		 * be excessive routings to the slow path, and that when
-		 * prof_active is turned on later, the counting for sampling
-		 * can immediately resume as normal.
-		 */
-		te_prof_sample_event_update(tsd,
-		    (uint64_t)(1 << lg_prof_sample));
-	}
+	te_tsd_prof_sample_event_init(tsd);
 }
 
 static void

From ba783b3a0ff6d47d56a76ed298a1aaa2515d12d4 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 10 Mar 2020 14:21:05 -0700
Subject: [PATCH 1595/2608] Remove prof -> thread_event dependency

---
 include/jemalloc/internal/prof_inlines_b.h | 15 +++++++--------
 include/jemalloc/internal/thread_event.h   |  1 +
 src/jemalloc.c                             | 16 +++++++++++-----
 3 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines_b.h
index 64983877..29d4020e 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines_b.h
@@ -85,11 +85,11 @@ prof_info_set(tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx) {
 }
 
 JEMALLOC_ALWAYS_INLINE bool
-prof_sample_should_skip(tsd_t *tsd, size_t usize) {
+prof_sample_should_skip(tsd_t *tsd, bool sample_event) {
 	cassert(config_prof);
 
 	/* Fastpath: no need to load tdata */
-	if (likely(!te_prof_sample_event_lookahead(tsd, usize))) {
+	if (likely(!sample_event)) {
 		return true;
 	}
 
@@ -106,12 +106,11 @@ prof_sample_should_skip(tsd_t *tsd, size_t usize) {
 }
 
 JEMALLOC_ALWAYS_INLINE prof_tctx_t *
-prof_alloc_prep(tsd_t *tsd, size_t usize, bool prof_active) {
+prof_alloc_prep(tsd_t *tsd, bool prof_active, bool sample_event) {
 	prof_tctx_t *ret;
 
-	assert(usize == sz_s2u(usize));
-
-	if (!prof_active || likely(prof_sample_should_skip(tsd, usize))) {
+	if (!prof_active ||
+	    likely(prof_sample_should_skip(tsd, sample_event))) {
 		ret = (prof_tctx_t *)(uintptr_t)1U;
 	} else {
 		ret = prof_tctx_create(tsd);
@@ -137,7 +136,7 @@ prof_malloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
 JEMALLOC_ALWAYS_INLINE void
 prof_realloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
     prof_tctx_t *tctx, bool prof_active, const void *old_ptr, size_t old_usize,
-    prof_info_t *old_prof_info) {
+    prof_info_t *old_prof_info, bool sample_event) {
 	bool sampled, old_sampled, moved;
 
 	cassert(config_prof);
@@ -145,7 +144,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
 
 	if (prof_active && ptr != NULL) {
 		assert(usize == isalloc(tsd_tsdn(tsd), ptr));
-		if (prof_sample_should_skip(tsd, usize)) {
+		if (prof_sample_should_skip(tsd, sample_event)) {
 			/*
 			 * Don't sample.  The usize passed to prof_alloc_prep()
 			 * was larger than what actually got allocated, so a
diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index f9e2ba5c..cef404bf 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -220,6 +220,7 @@ te_ctx_get(tsd_t *tsd, te_ctx_t *ctx, bool is_alloc) {
 
 JEMALLOC_ALWAYS_INLINE bool
 te_prof_sample_event_lookahead(tsd_t *tsd, size_t usize) {
+	assert(usize == sz_s2u(usize));
 	return tsd_thread_allocated_get(tsd) + usize -
 	    tsd_thread_allocated_last_event_get(tsd) >=
 	    tsd_prof_sample_event_wait_get(tsd);
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 7a65db02..8561ef40 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2186,7 +2186,9 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 	/* If profiling is on, get our profiling context. */
 	if (config_prof && opt_prof) {
 		bool prof_active = prof_active_get_unlocked();
-		prof_tctx_t *tctx = prof_alloc_prep(tsd, usize, prof_active);
+		bool sample_event = te_prof_sample_event_lookahead(tsd, usize);
+		prof_tctx_t *tctx = prof_alloc_prep(tsd, prof_active,
+		    sample_event);
 
 		emap_alloc_ctx_t alloc_ctx;
 		if (likely((uintptr_t)tctx == (uintptr_t)1U)) {
@@ -3131,7 +3133,8 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 	prof_info_t old_prof_info;
 	prof_info_get_and_reset_recent(tsd, old_ptr, alloc_ctx, &old_prof_info);
 	bool prof_active = prof_active_get_unlocked();
-	prof_tctx_t *tctx = prof_alloc_prep(tsd, *usize, prof_active);
+	bool sample_event = te_prof_sample_event_lookahead(tsd, *usize);
+	prof_tctx_t *tctx = prof_alloc_prep(tsd, prof_active, sample_event);
 	void *p;
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
 		p = irallocx_prof_sample(tsd_tsdn(tsd), old_ptr, old_usize,
@@ -3158,8 +3161,9 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 		*usize = isalloc(tsd_tsdn(tsd), p);
 	}
 
+	sample_event = te_prof_sample_event_lookahead(tsd, *usize);
 	prof_realloc(tsd, p, size, *usize, tctx, prof_active, old_ptr,
-	    old_usize, &old_prof_info);
+	    old_usize, &old_prof_info, sample_event);
 
 	return p;
 }
@@ -3416,7 +3420,8 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 		}
 	}
 	bool prof_active = prof_active_get_unlocked();
-	prof_tctx_t *tctx = prof_alloc_prep(tsd, usize_max, prof_active);
+	bool sample_event = te_prof_sample_event_lookahead(tsd, usize_max);
+	prof_tctx_t *tctx = prof_alloc_prep(tsd, prof_active, sample_event);
 
 	size_t usize;
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
@@ -3442,8 +3447,9 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 	} else {
 		prof_info_get_and_reset_recent(tsd, ptr, alloc_ctx, &prof_info);
 		assert(usize <= usize_max);
+		sample_event = te_prof_sample_event_lookahead(tsd, usize);
 		prof_realloc(tsd, ptr, size, usize, tctx, prof_active, ptr,
-		    old_usize, &prof_info);
+		    old_usize, &prof_info, sample_event);
 	}
 
 	assert(old_prof_info.alloc_tctx == prof_info.alloc_tctx);

From a5780598b3963648e217c89872e98b40d3e7b4ea Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 9 Mar 2020 17:05:06 -0700
Subject: [PATCH 1596/2608] Remove thread_event_rollback()

---
 include/jemalloc/internal/thread_event.h |  1 -
 src/thread_event.c                       | 49 ------------------------
 test/unit/thread_event.c                 | 18 +--------
 3 files changed, 1 insertion(+), 67 deletions(-)

diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index cef404bf..b05ff25a 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -33,7 +33,6 @@ typedef struct te_ctx_s {
 
 void te_assert_invariants_debug(tsd_t *tsd);
 void te_event_trigger(tsd_t *tsd, te_ctx_t *ctx, bool delay_event);
-void te_alloc_rollback(tsd_t *tsd, size_t diff);
 void te_event_update(tsd_t *tsd, bool alloc_event);
 void te_recompute_fast_threshold(tsd_t *tsd);
 void tsd_te_init(tsd_t *tsd);
diff --git a/src/thread_event.c b/src/thread_event.c
index 75208f0e..163ca3f1 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -320,55 +320,6 @@ te_event_trigger(tsd_t *tsd, te_ctx_t *ctx, bool delay_event) {
 	te_assert_invariants(tsd);
 }
 
-void
-te_alloc_rollback(tsd_t *tsd, size_t diff) {
-	te_assert_invariants(tsd);
-	if (diff == 0U) {
-		return;
-	}
-
-	/* Rollback happens only on alloc events. */
-	te_ctx_t ctx;
-	te_ctx_get(tsd, &ctx, true);
-
-	uint64_t thread_allocated = te_ctx_current_bytes_get(&ctx);
-	/* The subtraction is intentionally susceptible to underflow. */
-	uint64_t thread_allocated_rollback = thread_allocated - diff;
-	te_ctx_current_bytes_set(&ctx, thread_allocated_rollback);
-
-	uint64_t last_event = te_ctx_last_event_get(&ctx);
-	/* Both subtractions are intentionally susceptible to underflow. */
-	if (thread_allocated_rollback - last_event <=
-	    thread_allocated - last_event) {
-		te_assert_invariants(tsd);
-		return;
-	}
-
-	te_ctx_last_event_set(&ctx, thread_allocated_rollback);
-	/* The subtraction is intentionally susceptible to underflow. */
-	uint64_t wait_diff = last_event - thread_allocated_rollback;
-	assert(wait_diff <= diff);
-
-#define E(event, condition, alloc_event)				\
-	if (alloc_event == true && condition) {				\
-		uint64_t event_wait = event##_event_wait_get(tsd);	\
-		assert(event_wait <= TE_MAX_START_WAIT);		\
-		if (event_wait > 0U) {					\
-			if (wait_diff >	TE_MAX_START_WAIT - event_wait) {\
-				event_wait = TE_MAX_START_WAIT;		\
-			} else {					\
-				event_wait += wait_diff;		\
-			}						\
-			assert(event_wait <= TE_MAX_START_WAIT);	\
-			event##_event_wait_set(tsd, event_wait);	\
-		}							\
-	}
-
-	ITERATE_OVER_ALL_EVENTS
-#undef E
-	te_event_update(tsd, true);
-}
-
 void
 te_event_update(tsd_t *tsd, bool is_alloc) {
 	te_ctx_t ctx;
diff --git a/test/unit/thread_event.c b/test/unit/thread_event.c
index 5501fa3a..e0b88a92 100644
--- a/test/unit/thread_event.c
+++ b/test/unit/thread_event.c
@@ -27,24 +27,8 @@ TEST_BEGIN(test_next_event_fast) {
 }
 TEST_END
 
-TEST_BEGIN(test_event_rollback) {
-	tsd_t *tsd = tsd_fetch();
-	const uint64_t diff = TE_MAX_INTERVAL >> 2;
-	size_t count = 10;
-	uint64_t thread_allocated = thread_allocated_get(tsd);
-	while (count-- != 0) {
-		te_alloc_rollback(tsd, diff);
-		uint64_t thread_allocated_after = thread_allocated_get(tsd);
-		assert_u64_eq(thread_allocated - thread_allocated_after, diff,
-		    "thread event counters are not properly rolled back");
-		thread_allocated = thread_allocated_after;
-	}
-}
-TEST_END
-
 int
 main(void) {
 	return test(
-	    test_next_event_fast,
-	    test_event_rollback);
+	    test_next_event_fast);
 }

From 2e5899c1299125c17fc428026a364368ff1531ed Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 12 Mar 2020 12:42:10 -0700
Subject: [PATCH 1597/2608] Stats: Fix tcache_bytes reporting.

Previously, large allocations in tcaches would have their sizes reduced during
stats estimation.  Added a test, which fails before this change but passes now.

This fixes a bug introduced in 593484661261c20f75557279931eb2d9ca165185, which
was itself fixing a bug introduced in 9c0549007dcb64f4ff35d37390a9a6a8d3cea880.
---
 src/arena.c       |  3 ++-
 test/unit/stats.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 7f7c27fb..d4b69798 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -207,7 +207,8 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 			cache_bin_t *tbin = &descriptor->bins_large[i];
 			arena_stats_accum_zu(&astats->tcache_bytes,
 			    cache_bin_ncached_get(tbin,
-			    &tcache_bin_info[i + SC_NBINS]) * sz_index2size(i));
+			    &tcache_bin_info[i + SC_NBINS])
+			    * sz_index2size(i + SC_NBINS));
 		}
 	}
 	malloc_mutex_prof_read(tsdn,
diff --git a/test/unit/stats.c b/test/unit/stats.c
index f4ac154d..20a32ddf 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -1,5 +1,8 @@
 #include "test/jemalloc_test.h"
 
+#define STRINGIFY_HELPER(x) #x
+#define STRINGIFY(x) STRINGIFY_HELPER(x)
+
 TEST_BEGIN(test_stats_summary) {
 	size_t sz, allocated, active, resident, mapped;
 	int expected = config_stats ? 0 : ENOENT;
@@ -361,6 +364,50 @@ TEST_BEGIN(test_stats_arenas_lextents) {
 }
 TEST_END
 
+static void
+test_tcache_bytes_for_usize(size_t usize) {
+	uint64_t epoch;
+	size_t tcache_bytes;
+	size_t sz = sizeof(tcache_bytes);
+
+	void *ptr = mallocx(usize, 0);
+
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	    0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctl(
+	    "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL) ".tcache_bytes",
+	    &tcache_bytes, &sz, NULL, 0), 0, "Unexpected mallctl failure");
+	size_t tcache_bytes_before = tcache_bytes;
+	dallocx(ptr, 0);
+
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	    0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctl(
+	    "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL) ".tcache_bytes",
+	    &tcache_bytes, &sz, NULL, 0), 0, "Unexpected mallctl failure");
+	size_t tcache_bytes_after = tcache_bytes;
+	assert_zu_eq(tcache_bytes_after - tcache_bytes_before,
+	    usize, "Incorrectly attributed a free");
+}
+
+TEST_BEGIN(test_stats_tcache_bytes_small) {
+	test_skip_if(!config_stats);
+	test_skip_if(!opt_tcache);
+	test_skip_if((ZU(1) << opt_lg_tcache_max) < SC_SMALL_MAXCLASS);
+
+	test_tcache_bytes_for_usize(SC_SMALL_MAXCLASS);
+}
+TEST_END
+
+TEST_BEGIN(test_stats_tcache_bytes_large) {
+	test_skip_if(!config_stats);
+	test_skip_if(!opt_tcache);
+	test_skip_if((ZU(1) << opt_lg_tcache_max) < SC_LARGE_MINCLASS);
+
+	test_tcache_bytes_for_usize(SC_LARGE_MINCLASS);
+}
+TEST_END
+
 int
 main(void) {
 	return test_no_reentrancy(
@@ -370,5 +417,7 @@ main(void) {
 	    test_stats_arenas_small,
 	    test_stats_arenas_large,
 	    test_stats_arenas_bins,
-	    test_stats_arenas_lextents);
+	    test_stats_arenas_lextents,
+	    test_stats_tcache_bytes_small,
+	    test_stats_tcache_bytes_large);
 }

From b30a5c2f9073b6f35f0023a443cd18ca406e972a Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 13 Mar 2020 10:00:50 -0700
Subject: [PATCH 1598/2608] Reorganize cpp APIs and suppress unused function
 warnings

---
 src/jemalloc_cpp.cpp | 82 +++++++++++++++++++++-----------------------
 1 file changed, 39 insertions(+), 43 deletions(-)

diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index c2110a11..6959b27f 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -97,43 +97,6 @@ newImpl(std::size_t size) noexcept(IsNoExcept) {
 	return handleOOM(size, IsNoExcept);
 }
 
-#if __cpp_aligned_new >= 201606
-template <bool IsNoExcept>
-JEMALLOC_ALWAYS_INLINE
-void *
-alignedNewImpl(std::size_t size, std::align_val_t alignment) noexcept(IsNoExcept) {
-	void *ptr = je_aligned_alloc(static_cast<std::size_t>(alignment), size);
-	if (likely(ptr != nullptr)) {
-		return ptr;
-	}
-
-	return handleOOM(size, IsNoExcept);
-}
-#endif  // __cpp_aligned_new
-
-JEMALLOC_ALWAYS_INLINE
-void
-sizedDeleteImpl(void* ptr, std::size_t size) noexcept {
-	if (unlikely(ptr == nullptr)) {
-		return;
-	}
-	je_sdallocx_noflags(ptr, size);
-}
-
-#if __cpp_aligned_new >= 201606
-JEMALLOC_ALWAYS_INLINE
-void
-alignedSizedDeleteImpl(void* ptr, std::size_t size, std::align_val_t alignment) noexcept {
-	if (config_debug) {
-		assert(((size_t)alignment & ((size_t)alignment - 1)) == 0);
-	}
-	if (unlikely(ptr == nullptr)) {
-		return;
-	}
-	je_sdallocx(ptr, size, MALLOCX_ALIGN(alignment));
-}
-#endif  // __cpp_aligned_new
-
 void *
 operator new(std::size_t size) {
 	return newImpl<false>(size);
@@ -156,19 +119,31 @@ operator new[](std::size_t size, const std::nothrow_t &) noexcept {
 
 #if __cpp_aligned_new >= 201606
 
+template <bool IsNoExcept>
+JEMALLOC_ALWAYS_INLINE
+void *
+alignedNewImpl(std::size_t size, std::align_val_t alignment) noexcept(IsNoExcept) {
+	void *ptr = je_aligned_alloc(static_cast<std::size_t>(alignment), size);
+	if (likely(ptr != nullptr)) {
+		return ptr;
+	}
+
+	return handleOOM(size, IsNoExcept);
+}
+
 void *
 operator new(std::size_t size, std::align_val_t alignment) {
 	return alignedNewImpl<false>(size, alignment);
 }
 
 void *
-operator new(std::size_t size, std::align_val_t alignment, const std::nothrow_t &) noexcept {
-	return alignedNewImpl<true>(size, alignment);
+operator new[](std::size_t size, std::align_val_t alignment) {
+	return alignedNewImpl<false>(size, alignment);
 }
 
 void *
-operator new[](std::size_t size, std::align_val_t alignment) {
-	return alignedNewImpl<false>(size, alignment);
+operator new(std::size_t size, std::align_val_t alignment, const std::nothrow_t &) noexcept {
+	return alignedNewImpl<true>(size, alignment);
 }
 
 void *
@@ -199,6 +174,15 @@ void operator delete[](void *ptr, const std::nothrow_t &) noexcept {
 
 #if __cpp_sized_deallocation >= 201309
 
+JEMALLOC_ALWAYS_INLINE
+void
+sizedDeleteImpl(void* ptr, std::size_t size) noexcept {
+	if (unlikely(ptr == nullptr)) {
+		return;
+	}
+	je_sdallocx_noflags(ptr, size);
+}
+
 void
 operator delete(void *ptr, std::size_t size) noexcept {
 	sizedDeleteImpl(ptr, size);
@@ -213,18 +197,30 @@ operator delete[](void *ptr, std::size_t size) noexcept {
 
 #if __cpp_aligned_new >= 201606
 
+JEMALLOC_ALWAYS_INLINE
+void
+alignedSizedDeleteImpl(void* ptr, std::size_t size, std::align_val_t alignment) noexcept {
+	if (config_debug) {
+		assert(((size_t)alignment & ((size_t)alignment - 1)) == 0);
+	}
+	if (unlikely(ptr == nullptr)) {
+		return;
+	}
+	je_sdallocx(ptr, size, MALLOCX_ALIGN(alignment));
+}
+
 void
 operator delete(void* ptr, std::align_val_t) noexcept {
 	je_free(ptr);
 }
 
 void
-operator delete(void* ptr, std::align_val_t, const std::nothrow_t&) noexcept {
+operator delete[](void* ptr, std::align_val_t) noexcept {
 	je_free(ptr);
 }
 
 void
-operator delete[](void* ptr, std::align_val_t) noexcept {
+operator delete(void* ptr, std::align_val_t, const std::nothrow_t&) noexcept {
 	je_free(ptr);
 }
 

From ccdc70a5ce7b9dd723d947025f99006e7e78d17e Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 18 Mar 2020 18:06:47 -0700
Subject: [PATCH 1599/2608] Fix: assertion could abort on past failures

---
 test/include/test/test.h | 98 ++++++++++++++++++++++------------------
 test/src/test.c          |  5 --
 2 files changed, 55 insertions(+), 48 deletions(-)

diff --git a/test/include/test/test.h b/test/include/test/test.h
index a1b8ff32..2167e8c6 100644
--- a/test/include/test/test.h
+++ b/test/include/test/test.h
@@ -1,6 +1,6 @@
 #define ASSERT_BUFSIZE	256
 
-#define expect_cmp(t, a, b, cmp, neg_cmp, pri, ...) do {		\
+#define verify_cmp(may_abort, t, a, b, cmp, neg_cmp, pri, ...) do {	\
 	const t a_ = (a);						\
 	const t b_ = (b);						\
 	if (!(a_ cmp b_)) {						\
@@ -13,10 +13,17 @@
 		    __func__, __FILE__, __LINE__,			\
 		    #a, #b, a_, b_);					\
 		malloc_snprintf(message, sizeof(message), __VA_ARGS__);	\
-		p_test_fail(prefix, message);				\
+		if (may_abort) {					\
+			abort();					\
+		} else {						\
+			p_test_fail(prefix, message);			\
+		}							\
 	}								\
 } while (0)
 
+#define expect_cmp(t, a, b, cmp, neg_cmp, pri, ...) verify_cmp(false,	\
+    t, a, b, cmp, neg_cmp, pri, __VA_ARGS__)
+
 #define expect_ptr_eq(a, b, ...)	expect_cmp(void *, a, b, ==,	\
     !=, "p", __VA_ARGS__)
 #define expect_ptr_ne(a, b, ...)	expect_cmp(void *, a, b, !=,	\
@@ -210,7 +217,7 @@
 #define expect_u64_gt(a, b, ...)	expect_cmp(uint64_t, a, b, >,	\
     <=, FMTu64, __VA_ARGS__)
 
-#define expect_b_eq(a, b, ...) do {					\
+#define verify_b_eq(may_abort, a, b, ...) do {				\
 	bool a_ = (a);							\
 	bool b_ = (b);							\
 	if (!(a_ == b_)) {						\
@@ -223,10 +230,15 @@
 		    #a, #b, a_ ? "true" : "false",			\
 		    b_ ? "true" : "false");				\
 		malloc_snprintf(message, sizeof(message), __VA_ARGS__);	\
-		p_test_fail(prefix, message);				\
+		if (may_abort) {					\
+			abort();					\
+		} else {						\
+			p_test_fail(prefix, message);			\
+		}							\
 	}								\
 } while (0)
-#define expect_b_ne(a, b, ...) do {					\
+
+#define verify_b_ne(may_abort, a, b, ...) do {				\
 	bool a_ = (a);							\
 	bool b_ = (b);							\
 	if (!(a_ != b_)) {						\
@@ -239,13 +251,21 @@
 		    #a, #b, a_ ? "true" : "false",			\
 		    b_ ? "true" : "false");				\
 		malloc_snprintf(message, sizeof(message), __VA_ARGS__);	\
-		p_test_fail(prefix, message);				\
+		if (may_abort) {					\
+			abort();					\
+		} else {						\
+			p_test_fail(prefix, message);			\
+		}							\
 	}								\
 } while (0)
+
+#define expect_b_eq(a, b, ...)	verify_b_eq(false, a, b, __VA_ARGS__)
+#define expect_b_ne(a, b, ...)	verify_b_ne(false, a, b, __VA_ARGS__)
+
 #define expect_true(a, ...)	expect_b_eq(a, true, __VA_ARGS__)
 #define expect_false(a, ...)	expect_b_eq(a, false, __VA_ARGS__)
 
-#define expect_str_eq(a, b, ...) do {					\
+#define verify_str_eq(may_abort, a, b, ...) do {			\
 	if (strcmp((a), (b))) {						\
 		char prefix[ASSERT_BUFSIZE];				\
 		char message[ASSERT_BUFSIZE];				\
@@ -255,10 +275,15 @@
 		    "\"%s\" differs from \"%s\": ",			\
 		    __func__, __FILE__, __LINE__, #a, #b, a, b);	\
 		malloc_snprintf(message, sizeof(message), __VA_ARGS__);	\
-		p_test_fail(prefix, message);				\
+		if (may_abort) {					\
+			abort();					\
+		} else {						\
+			p_test_fail(prefix, message);			\
+		}							\
 	}								\
 } while (0)
-#define expect_str_ne(a, b, ...) do {					\
+
+#define verify_str_ne(may_abort, a, b, ...) do {			\
 	if (!strcmp((a), (b))) {					\
 		char prefix[ASSERT_BUFSIZE];				\
 		char message[ASSERT_BUFSIZE];				\
@@ -268,30 +293,35 @@
 		    "\"%s\" same as \"%s\": ",				\
 		    __func__, __FILE__, __LINE__, #a, #b, a, b);	\
 		malloc_snprintf(message, sizeof(message), __VA_ARGS__);	\
-		p_test_fail(prefix, message);				\
+		if (may_abort) {					\
+			abort();					\
+		} else {						\
+			p_test_fail(prefix, message);			\
+		}							\
 	}								\
 } while (0)
 
-#define expect_not_reached(...) do {					\
+#define expect_str_eq(a, b, ...) verify_str_eq(false, a, b, __VA_ARGS__)
+#define expect_str_ne(a, b, ...) verify_str_ne(false, a, b, __VA_ARGS__)
+
+#define verify_not_reached(may_abort, ...) do {				\
 	char prefix[ASSERT_BUFSIZE];					\
 	char message[ASSERT_BUFSIZE];					\
 	malloc_snprintf(prefix, sizeof(prefix),				\
 	    "%s:%s:%d: Unreachable code reached: ",			\
 	    __func__, __FILE__, __LINE__);				\
 	malloc_snprintf(message, sizeof(message), __VA_ARGS__);		\
-	p_test_fail(prefix, message);					\
-} while (0)
-
-#define p_abort_test_if_failed() do {					\
-	if (p_test_failed()) {						\
+	if (may_abort) {						\
 		abort();						\
+	} else {							\
+		p_test_fail(prefix, message);				\
 	}								\
 } while (0)
 
-#define assert_cmp(t, a, b, cmp, neg_cmp, pri, ...) do {		\
-	expect_cmp(t, a, b, cmp, neg_cmp, pri, __VA_ARGS__);		\
-	p_abort_test_if_failed();					\
-} while (0)
+#define expect_not_reached(...) verify_not_reached(false, __VA_ARGS__)
+
+#define assert_cmp(t, a, b, cmp, neg_cmp, pri, ...) verify_cmp(true,	\
+    t, a, b, cmp, neg_cmp, pri, __VA_ARGS__)
 
 #define assert_ptr_eq(a, b, ...)	assert_cmp(void *, a, b, ==,	\
     !=, "p", __VA_ARGS__)
@@ -486,33 +516,16 @@
 #define assert_u64_gt(a, b, ...)	assert_cmp(uint64_t, a, b, >,	\
     <=, FMTu64, __VA_ARGS__)
 
-#define assert_b_eq(a, b, ...) do {					\
-	expect_b_eq(a, b, __VA_ARGS__);					\
-	p_abort_test_if_failed();					\
-} while (0)
-
-#define assert_b_ne(a, b, ...) do {					\
-	expect_b_ne(a, b, __VA_ARGS__);					\
-	p_abort_test_if_failed();					\
-} while (0)
+#define assert_b_eq(a, b, ...)	verify_b_eq(true, a, b, __VA_ARGS__)
+#define assert_b_ne(a, b, ...)	verify_b_ne(true, a, b, __VA_ARGS__)
 
 #define assert_true(a, ...)	assert_b_eq(a, true, __VA_ARGS__)
 #define assert_false(a, ...)	assert_b_eq(a, false, __VA_ARGS__)
 
-#define assert_str_eq(a, b, ...) do {					\
-	expect_str_eq(a, b, __VA_ARGS__);				\
-	p_abort_test_if_failed();					\
-} while (0)
+#define assert_str_eq(a, b, ...) verify_str_eq(true, a, b, __VA_ARGS__)
+#define assert_str_ne(a, b, ...) verify_str_ne(true, a, b, __VA_ARGS__)
 
-#define assert_str_ne(a, b, ...) do {					\
-	expect_str_ne(a, b, __VA_ARGS__);				\
-	p_abort_test_if_failed();					\
-} while (0)
-
-#define assert_not_reached(...) do {					\
-	expect_not_reached(__VA_ARGS__);				\
-	p_abort_test_if_failed();					\
-} while (0)
+#define assert_not_reached(...) verify_not_reached(true, __VA_ARGS__)
 
 /*
  * If this enum changes, corresponding changes in test/test.sh.in are also
@@ -568,6 +581,5 @@ test_status_t	p_test_no_malloc_init(test_t *t, ...);
 void	p_test_init(const char *name);
 void	p_test_fini(void);
 void	p_test_fail(const char *prefix, const char *message);
-bool	p_test_failed(void);
 
 void strncpy_cond(void *dst, const char *src, bool cond);
diff --git a/test/src/test.c b/test/src/test.c
index b40fbc6d..4583e55a 100644
--- a/test/src/test.c
+++ b/test/src/test.c
@@ -233,11 +233,6 @@ p_test_fail(const char *prefix, const char *message) {
 	test_status = test_status_fail;
 }
 
-bool
-p_test_failed() {
-	return test_status == test_status_fail;
-}
-
 void
 strncpy_cond(void *dst, const char *src, bool cond) {
 	if (cond) {

From 2256ef896177faf8af7b199595382348be054250 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 24 Mar 2020 17:53:41 -0700
Subject: [PATCH 1600/2608] Add option to fetch system thread name on each prof
 sample

---
 Makefile.in                              |   1 +
 include/jemalloc/internal/prof_externs.h |   5 +
 src/ctl.c                                |   5 +
 src/jemalloc.c                           |   3 +
 src/prof.c                               | 155 ++++++++++++++---------
 test/unit/mallctl.c                      |   1 +
 test/unit/prof_use_sys_thread_name.c     |  75 +++++++++++
 test/unit/prof_use_sys_thread_name.sh    |   5 +
 8 files changed, 192 insertions(+), 58 deletions(-)
 create mode 100644 test/unit/prof_use_sys_thread_name.c
 create mode 100644 test/unit/prof_use_sys_thread_name.sh

diff --git a/Makefile.in b/Makefile.in
index 7eca2f5f..7300cb98 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -227,6 +227,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/prof_reset.c \
 	$(srcroot)test/unit/prof_tctx.c \
 	$(srcroot)test/unit/prof_thread_name.c \
+	$(srcroot)test/unit/prof_use_sys_thread_name.c \
 	$(srcroot)test/unit/ql.c \
 	$(srcroot)test/unit/qr.c \
 	$(srcroot)test/unit/rb.c \
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 5a32754e..35181671 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -28,6 +28,9 @@ extern char opt_prof_prefix[
 extern ssize_t opt_prof_recent_alloc_max;
 extern malloc_mutex_t prof_recent_alloc_mtx;
 
+/* Whether to use thread name provided by the system or by mallctl. */
+extern bool opt_prof_experimental_use_sys_thread_name;
+
 /* Accessed via prof_active_[gs]et{_unlocked,}(). */
 extern bool prof_active;
 
@@ -59,6 +62,8 @@ void prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
 void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info);
 prof_tctx_t *prof_tctx_create(tsd_t *tsd);
 #ifdef JEMALLOC_JET
+typedef int (prof_read_sys_thread_name_t)(char *buf, size_t limit);
+extern prof_read_sys_thread_name_t *JET_MUTABLE prof_read_sys_thread_name;
 size_t prof_tdata_count(void);
 size_t prof_bt_count(void);
 #endif
diff --git a/src/ctl.c b/src/ctl.c
index d149ce6d..86ac83e1 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -117,6 +117,7 @@ CTL_PROTO(opt_prof_final)
 CTL_PROTO(opt_prof_leak)
 CTL_PROTO(opt_prof_accum)
 CTL_PROTO(opt_prof_recent_alloc_max)
+CTL_PROTO(opt_prof_experimental_use_sys_thread_name)
 CTL_PROTO(opt_zero_realloc)
 CTL_PROTO(tcache_create)
 CTL_PROTO(tcache_flush)
@@ -353,6 +354,8 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("prof_leak"),	CTL(opt_prof_leak)},
 	{NAME("prof_accum"),	CTL(opt_prof_accum)},
 	{NAME("prof_recent_alloc_max"), CTL(opt_prof_recent_alloc_max)},
+	{NAME("prof_experimental_use_sys_thread_name"),
+	    CTL(opt_prof_experimental_use_sys_thread_name)},
 	{NAME("zero_realloc"),	CTL(opt_zero_realloc)}
 };
 
@@ -1829,6 +1832,8 @@ CTL_RO_NL_CGEN(config_prof, opt_prof_final, opt_prof_final, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_recent_alloc_max,
     opt_prof_recent_alloc_max, ssize_t)
+CTL_RO_NL_CGEN(config_prof, opt_prof_experimental_use_sys_thread_name,
+    opt_prof_experimental_use_sys_thread_name, bool)
 CTL_RO_NL_GEN(opt_zero_realloc,
     zero_realloc_mode_names[opt_zero_realloc_action], const char *)
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 8561ef40..ea331f82 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1426,6 +1426,9 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				CONF_HANDLE_BOOL(opt_prof_log, "prof_log")
 				CONF_HANDLE_SSIZE_T(opt_prof_recent_alloc_max,
 				    "prof_recent_alloc_max", -1, SSIZE_MAX)
+				CONF_HANDLE_BOOL(
+				    opt_prof_experimental_use_sys_thread_name,
+				    "prof_experimental_use_sys_thread_name")
 			}
 			if (config_log) {
 				if (CONF_MATCH("log")) {
diff --git a/src/prof.c b/src/prof.c
index 73e6d914..e68694a8 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -48,6 +48,7 @@ bool opt_prof_final = false;
 bool opt_prof_leak = false;
 bool opt_prof_accum = false;
 char opt_prof_prefix[PROF_DUMP_FILENAME_LEN];
+bool opt_prof_experimental_use_sys_thread_name = false;
 
 /* Accessed via prof_idump_[accum/rollback](). */
 static counter_accum_t prof_idump_accumulated;
@@ -133,9 +134,101 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx) {
 	}
 }
 
+static char *
+prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name) {
+	char *ret;
+	size_t size;
+
+	if (thread_name == NULL) {
+		return NULL;
+	}
+
+	size = strlen(thread_name) + 1;
+	if (size == 1) {
+		return "";
+	}
+
+	ret = iallocztm(tsdn, size, sz_size2index(size), false, NULL, true,
+	    arena_get(TSDN_NULL, 0, true), true);
+	if (ret == NULL) {
+		return NULL;
+	}
+	memcpy(ret, thread_name, size);
+	return ret;
+}
+
+static int
+prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name) {
+	assert(tsd_reentrancy_level_get(tsd) == 0);
+
+	prof_tdata_t *tdata;
+	unsigned i;
+	char *s;
+
+	tdata = prof_tdata_get(tsd, true);
+	if (tdata == NULL) {
+		return EAGAIN;
+	}
+
+	/* Validate input. */
+	if (thread_name == NULL) {
+		return EFAULT;
+	}
+	for (i = 0; thread_name[i] != '\0'; i++) {
+		char c = thread_name[i];
+		if (!isgraph(c) && !isblank(c)) {
+			return EFAULT;
+		}
+	}
+
+	s = prof_thread_name_alloc(tsd_tsdn(tsd), thread_name);
+	if (s == NULL) {
+		return EAGAIN;
+	}
+
+	if (tdata->thread_name != NULL) {
+		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
+		    true);
+		tdata->thread_name = NULL;
+	}
+	if (strlen(s) > 0) {
+		tdata->thread_name = s;
+	}
+	return 0;
+}
+
+static int
+prof_read_sys_thread_name_impl(char *buf, size_t limit) {
+#ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
+	return pthread_getname_np(pthread_self(), buf, limit);
+#else
+	return ENOSYS;
+#endif
+}
+#ifdef JEMALLOC_JET
+prof_read_sys_thread_name_t *JET_MUTABLE prof_read_sys_thread_name =
+    prof_read_sys_thread_name_impl;
+#else
+#define prof_read_sys_thread_name prof_read_sys_thread_name_impl
+#endif
+
+static void
+prof_fetch_sys_thread_name(tsd_t *tsd) {
+#define THREAD_NAME_MAX_LEN 16
+	char buf[THREAD_NAME_MAX_LEN];
+	if (!prof_read_sys_thread_name(buf, THREAD_NAME_MAX_LEN)) {
+		prof_thread_name_set_impl(tsd, buf);
+	}
+#undef THREAD_NAME_MAX_LEN
+}
+
 void
 prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
     size_t usize, prof_tctx_t *tctx) {
+	if (opt_prof_experimental_use_sys_thread_name) {
+		prof_fetch_sys_thread_name(tsd);
+	}
+
 	edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd), &emap_global, ptr);
 	prof_info_set(tsd, edata, tctx);
 
@@ -710,29 +803,6 @@ prof_tdata_init(tsd_t *tsd) {
 	    NULL, prof_thread_active_init_get(tsd_tsdn(tsd)), false);
 }
 
-static char *
-prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name) {
-	char *ret;
-	size_t size;
-
-	if (thread_name == NULL) {
-		return NULL;
-	}
-
-	size = strlen(thread_name) + 1;
-	if (size == 1) {
-		return "";
-	}
-
-	ret = iallocztm(tsdn, size, sz_size2index(size), false, NULL, true,
-	    arena_get(TSDN_NULL, 0, true), true);
-	if (ret == NULL) {
-		return NULL;
-	}
-	memcpy(ret, thread_name, size);
-	return ret;
-}
-
 prof_tdata_t *
 prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) {
 	uint64_t thr_uid = tdata->thr_uid;
@@ -799,42 +869,11 @@ prof_thread_name_get(tsd_t *tsd) {
 
 int
 prof_thread_name_set(tsd_t *tsd, const char *thread_name) {
-	assert(tsd_reentrancy_level_get(tsd) == 0);
-
-	prof_tdata_t *tdata;
-	unsigned i;
-	char *s;
-
-	tdata = prof_tdata_get(tsd, true);
-	if (tdata == NULL) {
-		return EAGAIN;
+	if (opt_prof_experimental_use_sys_thread_name) {
+		return ENOENT;
+	} else {
+		return prof_thread_name_set_impl(tsd, thread_name);
 	}
-
-	/* Validate input. */
-	if (thread_name == NULL) {
-		return EFAULT;
-	}
-	for (i = 0; thread_name[i] != '\0'; i++) {
-		char c = thread_name[i];
-		if (!isgraph(c) && !isblank(c)) {
-			return EFAULT;
-		}
-	}
-
-	s = prof_thread_name_alloc(tsd_tsdn(tsd), thread_name);
-	if (s == NULL) {
-		return EAGAIN;
-	}
-
-	if (tdata->thread_name != NULL) {
-		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
-		    true);
-		tdata->thread_name = NULL;
-	}
-	if (strlen(s) > 0) {
-		tdata->thread_name = s;
-	}
-	return 0;
 }
 
 bool
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index e38723f6..cc1d5313 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -192,6 +192,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, prof_final, prof);
 	TEST_MALLCTL_OPT(bool, prof_leak, prof);
 	TEST_MALLCTL_OPT(ssize_t, prof_recent_alloc_max, prof);
+	TEST_MALLCTL_OPT(bool, prof_experimental_use_sys_thread_name, prof);
 
 #undef TEST_MALLCTL_OPT
 }
diff --git a/test/unit/prof_use_sys_thread_name.c b/test/unit/prof_use_sys_thread_name.c
new file mode 100644
index 00000000..60cb55bf
--- /dev/null
+++ b/test/unit/prof_use_sys_thread_name.c
@@ -0,0 +1,75 @@
+#include "test/jemalloc_test.h"
+
+static const char *test_thread_name = "test_name";
+
+static int
+test_prof_read_sys_thread_name_error(char *buf, size_t limit) {
+	return ENOSYS;
+}
+
+static int
+test_prof_read_sys_thread_name(char *buf, size_t limit) {
+	assert(strlen(test_thread_name) < limit);
+	strncpy(buf, test_thread_name, limit);
+	return 0;
+}
+
+static int
+test_prof_read_sys_thread_name_clear(char *buf, size_t limit) {
+	assert(limit > 0);
+	buf[0] = '\0';
+	return 0;
+}
+
+TEST_BEGIN(test_prof_experimental_use_sys_thread_name) {
+	test_skip_if(!config_prof);
+
+	bool oldval;
+	size_t sz = sizeof(oldval);
+	assert_d_eq(mallctl("opt.prof_experimental_use_sys_thread_name",
+	    &oldval, &sz, NULL,	0), 0, "mallctl failed");
+	assert_true(oldval, "option was not set correctly");
+
+	const char *thread_name;
+	sz = sizeof(thread_name);
+	assert_d_eq(mallctl("thread.prof.name", &thread_name, &sz, NULL, 0), 0,
+	    "mallctl read for thread name should not fail");
+	expect_str_eq(thread_name, "", "Initial thread name should be empty");
+
+	thread_name = test_thread_name;
+	assert_d_eq(mallctl("thread.prof.name", NULL, NULL, &thread_name, sz),
+	    ENOENT, "mallctl write for thread name should fail");
+	assert_ptr_eq(thread_name, test_thread_name,
+	    "Thread name should not be touched");
+
+	prof_read_sys_thread_name = test_prof_read_sys_thread_name_error;
+	void *p = malloc(1);
+	free(p);
+	assert_d_eq(mallctl("thread.prof.name", &thread_name, &sz, NULL, 0), 0,
+	    "mallctl read for thread name should not fail");
+	assert_str_eq(thread_name, "",
+	    "Thread name should stay the same if the system call fails");
+
+	prof_read_sys_thread_name = test_prof_read_sys_thread_name;
+	p = malloc(1);
+	free(p);
+	assert_d_eq(mallctl("thread.prof.name", &thread_name, &sz, NULL, 0), 0,
+	    "mallctl read for thread name should not fail");
+	assert_str_eq(thread_name, test_thread_name,
+	    "Thread name should be changed if the system call succeeds");
+
+	prof_read_sys_thread_name = test_prof_read_sys_thread_name_clear;
+	p = malloc(1);
+	free(p);
+	assert_d_eq(mallctl("thread.prof.name", &thread_name, &sz, NULL, 0), 0,
+	    "mallctl read for thread name should not fail");
+	expect_str_eq(thread_name, "", "Thread name should be updated if the "
+	    "system call returns a different name");
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_prof_experimental_use_sys_thread_name);
+}
diff --git a/test/unit/prof_use_sys_thread_name.sh b/test/unit/prof_use_sys_thread_name.sh
new file mode 100644
index 00000000..0e0e0d99
--- /dev/null
+++ b/test/unit/prof_use_sys_thread_name.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:true,lg_prof_sample:0,prof_experimental_use_sys_thread_name:true"
+fi

From 3b4a03b92b2e415415a08f0150fdb9eeb659cd52 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 26 Mar 2020 11:40:49 -0700
Subject: [PATCH 1601/2608] Mac: don't declare system functions as nothrow.

This contradicts the system headers, which can lead to breakages.
---
 include/jemalloc/jemalloc_macros.h.in |  6 ++++++
 include/jemalloc/jemalloc_protos.h.in | 19 ++++++++++---------
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in
index b4469d8e..1ceb7b17 100644
--- a/include/jemalloc/jemalloc_macros.h.in
+++ b/include/jemalloc/jemalloc_macros.h.in
@@ -134,3 +134,9 @@
 #  define JEMALLOC_RESTRICT_RETURN
 #  define JEMALLOC_ALLOCATOR
 #endif
+
+#if defined(__APPLE__) && !defined(JEMALLOC_NO_RENAME)
+#  define JEMALLOC_SYS_NOTHROW
+#else
+#  define JEMALLOC_SYS_NOTHROW JEMALLOC_NOTHROW
+#endif
diff --git a/include/jemalloc/jemalloc_protos.h.in b/include/jemalloc/jemalloc_protos.h.in
index a78414b1..d75b2224 100644
--- a/include/jemalloc/jemalloc_protos.h.in
+++ b/include/jemalloc/jemalloc_protos.h.in
@@ -8,21 +8,22 @@ extern JEMALLOC_EXPORT void		(*@je_@malloc_message)(void *cbopaque,
     const char *s);
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-    void JEMALLOC_NOTHROW	*@je_@malloc(size_t size)
+    void JEMALLOC_SYS_NOTHROW	*@je_@malloc(size_t size)
     JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1);
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-    void JEMALLOC_NOTHROW	*@je_@calloc(size_t num, size_t size)
+    void JEMALLOC_SYS_NOTHROW	*@je_@calloc(size_t num, size_t size)
     JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2);
-JEMALLOC_EXPORT int JEMALLOC_NOTHROW	@je_@posix_memalign(void **memptr,
-    size_t alignment, size_t size) JEMALLOC_CXX_THROW JEMALLOC_ATTR(nonnull(1));
+JEMALLOC_EXPORT int JEMALLOC_SYS_NOTHROW @je_@posix_memalign(
+    void **memptr, size_t alignment, size_t size) JEMALLOC_CXX_THROW
+    JEMALLOC_ATTR(nonnull(1));
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-    void JEMALLOC_NOTHROW	*@je_@aligned_alloc(size_t alignment,
+    void JEMALLOC_SYS_NOTHROW	*@je_@aligned_alloc(size_t alignment,
     size_t size) JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc)
     JEMALLOC_ALLOC_SIZE(2);
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-    void JEMALLOC_NOTHROW	*@je_@realloc(void *ptr, size_t size)
+    void JEMALLOC_SYS_NOTHROW	*@je_@realloc(void *ptr, size_t size)
     JEMALLOC_CXX_THROW JEMALLOC_ALLOC_SIZE(2);
-JEMALLOC_EXPORT void JEMALLOC_NOTHROW	@je_@free(void *ptr)
+JEMALLOC_EXPORT void JEMALLOC_SYS_NOTHROW	@je_@free(void *ptr)
     JEMALLOC_CXX_THROW;
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
@@ -55,12 +56,12 @@ JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW	@je_@malloc_usable_size(
 
 #ifdef JEMALLOC_OVERRIDE_MEMALIGN
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-    void JEMALLOC_NOTHROW	*@je_@memalign(size_t alignment, size_t size)
+    void JEMALLOC_SYS_NOTHROW	*@je_@memalign(size_t alignment, size_t size)
     JEMALLOC_CXX_THROW JEMALLOC_ATTR(malloc);
 #endif
 
 #ifdef JEMALLOC_OVERRIDE_VALLOC
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-    void JEMALLOC_NOTHROW	*@je_@valloc(size_t size) JEMALLOC_CXX_THROW
+    void JEMALLOC_SYS_NOTHROW	*@je_@valloc(size_t size) JEMALLOC_CXX_THROW
     JEMALLOC_ATTR(malloc);
 #endif

From d936b46d3a6320895ddd9a16dc4c5e79d5b9d8e9 Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Sun, 29 Mar 2020 10:41:23 -0700
Subject: [PATCH 1602/2608] Add malloc_conf_2_conf_harder

This comes in handy when you're just a user of a canary system who wants to
change settings set by the configuration system itself.
---
 Makefile.in                |  1 +
 configure.ac               |  3 ++-
 src/jemalloc.c             | 35 ++++++++++++++++++++++++++++++++---
 test/unit/malloc_conf_2.c  | 29 +++++++++++++++++++++++++++++
 test/unit/malloc_conf_2.sh |  1 +
 5 files changed, 65 insertions(+), 4 deletions(-)
 create mode 100644 test/unit/malloc_conf_2.c
 create mode 100644 test/unit/malloc_conf_2.sh

diff --git a/Makefile.in b/Makefile.in
index 7300cb98..10af489b 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -209,6 +209,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/junk_free.c \
 	$(srcroot)test/unit/log.c \
 	$(srcroot)test/unit/mallctl.c \
+	$(srcroot)test/unit/malloc_conf_2.c \
 	$(srcroot)test/unit/malloc_io.c \
 	$(srcroot)test/unit/math.c \
 	$(srcroot)test/unit/mq.c \
diff --git a/configure.ac b/configure.ac
index 324656b9..daac2050 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1000,7 +1000,8 @@ AC_ARG_WITH([export],
 fi]
 )
 
-public_syms="aligned_alloc calloc dallocx free mallctl mallctlbymib mallctlnametomib malloc malloc_conf malloc_message malloc_stats_print malloc_usable_size mallocx smallocx_${jemalloc_version_gid} nallocx posix_memalign rallocx realloc sallocx sdallocx xallocx"
+public_syms="aligned_alloc calloc dallocx free mallctl mallctlbymib
+mallctlnametomib malloc malloc_conf malloc_conf_2_conf_harder malloc_message malloc_stats_print malloc_usable_size mallocx smallocx_${jemalloc_version_gid} nallocx posix_memalign rallocx realloc sallocx sdallocx xallocx"
 dnl Check for additional platform-specific public API functions.
 AC_CHECK_FUNC([memalign],
 	      [AC_DEFINE([JEMALLOC_OVERRIDE_MEMALIGN], [ ])
diff --git a/src/jemalloc.c b/src/jemalloc.c
index ea331f82..63f7ebfa 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -32,6 +32,29 @@ const char	*je_malloc_conf
     JEMALLOC_ATTR(weak)
 #endif
     ;
+/*
+ * The usual rule is that the closer to runtime you are, the higher priority
+ * your configuration settings are (so the jemalloc config options get lower
+ * priority than the per-binary setting, which gets lower priority than the /etc
+ * setting, which gets lower priority than the environment settings).
+ *
+ * But it's a fairly common use case in some testing environments for a user to
+ * be able to control the binary, but nothing else (e.g. a performancy canary
+ * uses the production OS and environment variables, but can run any binary in
+ * those circumstances).  For these use cases, it's handy to have an in-binary
+ * mechanism for overriding environment variable settings, with the idea that if
+ * the results are positive they get promoted to the official settings, and
+ * moved from the binary to the environment variable.
+ *
+ * We don't actually want this to be widespread, so we'll give it a silly name
+ * and not mention it in headers or documentation.
+ */
+const char	*je_malloc_conf_2_conf_harder
+#ifndef _WIN32
+    JEMALLOC_ATTR(weak)
+#endif
+    ;
+
 bool	opt_abort =
 #ifdef JEMALLOC_DEBUG
     true
@@ -975,7 +998,7 @@ malloc_slow_flag_init(void) {
 }
 
 /* Number of sources for initializing malloc_conf */
-#define MALLOC_CONF_NSOURCES 4
+#define MALLOC_CONF_NSOURCES 5
 
 static const char *
 obtain_malloc_conf(unsigned which_source, char buf[PATH_MAX + 1]) {
@@ -1053,6 +1076,9 @@ obtain_malloc_conf(unsigned which_source, char buf[PATH_MAX + 1]) {
 			ret = NULL;
 		}
 		break;
+	} case 4: {
+		ret = je_malloc_conf_2_conf_harder;
+		break;
 	} default:
 		not_reached();
 		ret = NULL;
@@ -1069,7 +1095,9 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 		"string pointed to by the global variable malloc_conf",
 		"\"name\" of the file referenced by the symbolic link named "
 		    "/etc/malloc.conf",
-		"value of the environment variable MALLOC_CONF"
+		"value of the environment variable MALLOC_CONF",
+		"string pointed to by the global variable "
+		    "malloc_conf_2_conf_harder",
 	};
 	unsigned i;
 	const char *opts, *k, *v;
@@ -1506,7 +1534,8 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 
 static void
 malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
-	const char *opts_cache[MALLOC_CONF_NSOURCES] = {NULL, NULL, NULL, NULL};
+	const char *opts_cache[MALLOC_CONF_NSOURCES] = {NULL, NULL, NULL, NULL,
+		NULL};
 	char buf[PATH_MAX + 1];
 
 	/* The first call only set the confirm_conf option and opts_cache */
diff --git a/test/unit/malloc_conf_2.c b/test/unit/malloc_conf_2.c
new file mode 100644
index 00000000..ecfa4991
--- /dev/null
+++ b/test/unit/malloc_conf_2.c
@@ -0,0 +1,29 @@
+#include "test/jemalloc_test.h"
+
+const char *malloc_conf = "dirty_decay_ms:1000";
+const char *malloc_conf_2_conf_harder = "dirty_decay_ms:1234";
+
+TEST_BEGIN(test_malloc_conf_2) {
+#ifdef _WIN32
+	bool windows = true;
+#else
+	bool windows = false;
+#endif
+	/* Windows doesn't support weak symbol linker trickery. */
+	test_skip_if(windows);
+
+	ssize_t dirty_decay_ms;
+	size_t sz = sizeof(dirty_decay_ms);
+
+	int err = mallctl("opt.dirty_decay_ms", &dirty_decay_ms, &sz, NULL, 0);
+	assert_d_eq(err, 0, "Unexpected mallctl failure");
+	expect_zd_eq(dirty_decay_ms, 1234,
+	    "malloc_conf_2 setting didn't take effect");
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_malloc_conf_2);
+}
diff --git a/test/unit/malloc_conf_2.sh b/test/unit/malloc_conf_2.sh
new file mode 100644
index 00000000..2c780f1a
--- /dev/null
+++ b/test/unit/malloc_conf_2.sh
@@ -0,0 +1 @@
+export MALLOC_CONF="dirty_decay_ms:500"

From a166c20818e2f5a50c6f0b511ffc5b2ed66b81d2 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 26 Mar 2020 18:17:20 -0700
Subject: [PATCH 1603/2608] Make prof_tctx_t pointer a true prof atomic fence

---
 src/large.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/large.c b/src/large.c
index babb3071..8982d103 100644
--- a/src/large.c
+++ b/src/large.c
@@ -360,9 +360,9 @@ large_prof_tctx_reset(edata_t *edata) {
 
 void
 large_prof_info_set(edata_t *edata, prof_tctx_t *tctx) {
-	large_prof_tctx_set(edata, tctx);
 	nstime_t t;
 	nstime_init_update(&t);
 	edata_prof_alloc_time_set(edata, &t);
 	edata_prof_recent_alloc_init(edata);
+	large_prof_tctx_set(edata, tctx);
 }

From 09cd79495f947a7a2e271eb9bc6ff36b15cfc72f Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 20 Mar 2020 10:48:55 -0700
Subject: [PATCH 1604/2608] Encapsulate buffer allocation failure in buffered
 writer

---
 include/jemalloc/internal/buf_writer.h |  9 ++--
 src/buf_writer.c                       | 72 +++++++++-----------------
 src/jemalloc.c                         |  3 +-
 src/prof_log.c                         |  5 +-
 src/prof_recent.c                      |  5 +-
 test/unit/buf_writer.c                 | 12 ++---
 6 files changed, 35 insertions(+), 71 deletions(-)

diff --git a/include/jemalloc/internal/buf_writer.h b/include/jemalloc/internal/buf_writer.h
index c1e2a827..b64c966a 100644
--- a/include/jemalloc/internal/buf_writer.h
+++ b/include/jemalloc/internal/buf_writer.h
@@ -13,10 +13,8 @@
 typedef void (write_cb_t)(void *, const char *);
 
 typedef struct {
-	write_cb_t *public_write_cb;
-	void *public_cbopaque;
-	write_cb_t *private_write_cb;
-	void *private_cbopaque;
+	write_cb_t *write_cb;
+	void *cbopaque;
 	char *buf;
 	size_t buf_size;
 	size_t buf_end;
@@ -25,9 +23,8 @@ typedef struct {
 
 bool buf_writer_init(tsdn_t *tsdn, buf_writer_t *buf_writer,
     write_cb_t *write_cb, void *cbopaque, char *buf, size_t buf_len);
-write_cb_t *buf_writer_get_write_cb(buf_writer_t *buf_writer);
-void *buf_writer_get_cbopaque(buf_writer_t *buf_writer);
 void buf_writer_flush(buf_writer_t *buf_writer);
+write_cb_t buf_writer_cb;
 void buf_writer_terminate(tsdn_t *tsdn, buf_writer_t *buf_writer);
 
 #endif /* JEMALLOC_INTERNAL_BUF_WRITER_H */
diff --git a/src/buf_writer.c b/src/buf_writer.c
index bb8763b9..fd0226a1 100644
--- a/src/buf_writer.c
+++ b/src/buf_writer.c
@@ -25,28 +25,29 @@ buf_writer_free_internal_buf(tsdn_t *tsdn, void *buf) {
 	}
 }
 
-static write_cb_t buf_writer_cb;
-
 static void
 buf_writer_assert(buf_writer_t *buf_writer) {
+	assert(buf_writer != NULL);
+	assert(buf_writer->write_cb != NULL);
 	if (buf_writer->buf != NULL) {
-		assert(buf_writer->public_write_cb == buf_writer_cb);
-		assert(buf_writer->public_cbopaque == buf_writer);
-		assert(buf_writer->private_write_cb != buf_writer_cb);
-		assert(buf_writer->private_cbopaque != buf_writer);
 		assert(buf_writer->buf_size > 0);
 	} else {
-		assert(buf_writer->public_write_cb != buf_writer_cb);
-		assert(buf_writer->public_cbopaque != buf_writer);
-		assert(buf_writer->private_write_cb == NULL);
-		assert(buf_writer->private_cbopaque == NULL);
 		assert(buf_writer->buf_size == 0);
+		assert(buf_writer->internal_buf);
 	}
+	assert(buf_writer->buf_end <= buf_writer->buf_size);
 }
 
 bool
 buf_writer_init(tsdn_t *tsdn, buf_writer_t *buf_writer, write_cb_t *write_cb,
     void *cbopaque, char *buf, size_t buf_len) {
+	if (write_cb != NULL) {
+		buf_writer->write_cb = write_cb;
+	} else {
+		buf_writer->write_cb = je_malloc_message != NULL ?
+		    je_malloc_message : wrtmessage;
+	}
+	buf_writer->cbopaque = cbopaque;
 	assert(buf_len >= 2);
 	if (buf != NULL) {
 		buf_writer->buf = buf;
@@ -56,36 +57,14 @@ buf_writer_init(tsdn_t *tsdn, buf_writer_t *buf_writer, write_cb_t *write_cb,
 		    buf_len);
 		buf_writer->internal_buf = true;
 	}
-	buf_writer->buf_end = 0;
 	if (buf_writer->buf != NULL) {
-		buf_writer->public_write_cb = buf_writer_cb;
-		buf_writer->public_cbopaque = buf_writer;
-		buf_writer->private_write_cb = write_cb;
-		buf_writer->private_cbopaque = cbopaque;
 		buf_writer->buf_size = buf_len - 1; /* Allowing for '\0'. */
-		buf_writer_assert(buf_writer);
-		return false;
 	} else {
-		buf_writer->public_write_cb = write_cb;
-		buf_writer->public_cbopaque = cbopaque;
-		buf_writer->private_write_cb = NULL;
-		buf_writer->private_cbopaque = NULL;
 		buf_writer->buf_size = 0;
-		buf_writer_assert(buf_writer);
-		return true;
 	}
-}
-
-write_cb_t *
-buf_writer_get_write_cb(buf_writer_t *buf_writer) {
+	buf_writer->buf_end = 0;
 	buf_writer_assert(buf_writer);
-	return buf_writer->public_write_cb;
-}
-
-void *
-buf_writer_get_cbopaque(buf_writer_t *buf_writer) {
-	buf_writer_assert(buf_writer);
-	return buf_writer->public_cbopaque;
+	return buf_writer->buf == NULL;
 }
 
 void
@@ -94,34 +73,31 @@ buf_writer_flush(buf_writer_t *buf_writer) {
 	if (buf_writer->buf == NULL) {
 		return;
 	}
-	assert(buf_writer->buf_end <= buf_writer->buf_size);
 	buf_writer->buf[buf_writer->buf_end] = '\0';
-	if (buf_writer->private_write_cb == NULL) {
-		buf_writer->private_write_cb = je_malloc_message != NULL ?
-		    je_malloc_message : wrtmessage;
-	}
-	assert(buf_writer->private_write_cb != NULL);
-	buf_writer->private_write_cb(buf_writer->private_cbopaque,
-	    buf_writer->buf);
+	buf_writer->write_cb(buf_writer->cbopaque, buf_writer->buf);
 	buf_writer->buf_end = 0;
+	buf_writer_assert(buf_writer);
 }
 
-static void
+void
 buf_writer_cb(void *buf_writer_arg, const char *s) {
 	buf_writer_t *buf_writer = (buf_writer_t *)buf_writer_arg;
 	buf_writer_assert(buf_writer);
-	assert(buf_writer->buf != NULL);
-	assert(buf_writer->buf_end <= buf_writer->buf_size);
-	size_t i, slen, n, s_remain, buf_remain;
+	if (buf_writer->buf == NULL) {
+		buf_writer->write_cb(buf_writer->cbopaque, s);
+		return;
+	}
+	size_t i, slen, n;
 	for (i = 0, slen = strlen(s); i < slen; i += n) {
 		if (buf_writer->buf_end == buf_writer->buf_size) {
 			buf_writer_flush(buf_writer);
 		}
-		s_remain = slen - i;
-		buf_remain = buf_writer->buf_size - buf_writer->buf_end;
+		size_t s_remain = slen - i;
+		size_t buf_remain = buf_writer->buf_size - buf_writer->buf_end;
 		n = s_remain < buf_remain ? s_remain : buf_remain;
 		memcpy(buf_writer->buf + buf_writer->buf_end, s + i, n);
 		buf_writer->buf_end += n;
+		buf_writer_assert(buf_writer);
 	}
 	assert(i == slen);
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 63f7ebfa..72eb55bf 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3847,8 +3847,7 @@ je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 		buf_writer_t buf_writer;
 		buf_writer_init(tsdn, &buf_writer, write_cb, cbopaque, NULL,
 		    STATS_PRINT_BUFSIZE);
-		stats_print(buf_writer_get_write_cb(&buf_writer),
-		    buf_writer_get_cbopaque(&buf_writer), opts);
+		stats_print(buf_writer_cb, &buf_writer, opts);
 		buf_writer_terminate(tsdn, &buf_writer);
 	}
 
diff --git a/src/prof_log.c b/src/prof_log.c
index c29fa350..1635979e 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -632,9 +632,8 @@ prof_log_stop(tsdn_t *tsdn) {
 	buf_writer_t buf_writer;
 	buf_writer_init(tsdn, &buf_writer, prof_emitter_write_cb, &arg, NULL,
 	    PROF_LOG_STOP_BUFSIZE);
-	emitter_init(&emitter, emitter_output_json_compact,
-	    buf_writer_get_write_cb(&buf_writer),
-	    buf_writer_get_cbopaque(&buf_writer));
+	emitter_init(&emitter, emitter_output_json_compact, buf_writer_cb,
+	    &buf_writer);
 
 	emitter_begin(&emitter);
 	prof_log_emit_metadata(&emitter);
diff --git a/src/prof_recent.c b/src/prof_recent.c
index 7a98cc58..488cf178 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -466,9 +466,8 @@ prof_recent_alloc_dump(tsd_t *tsd, void (*write_cb)(void *, const char *),
 	buf_writer_init(tsd_tsdn(tsd), &buf_writer, write_cb, cbopaque, NULL,
 	    PROF_RECENT_PRINT_BUFSIZE);
 	emitter_t emitter;
-	emitter_init(&emitter, emitter_output_json_compact,
-	    buf_writer_get_write_cb(&buf_writer),
-	    buf_writer_get_cbopaque(&buf_writer));
+	emitter_init(&emitter, emitter_output_json_compact, buf_writer_cb,
+	    &buf_writer);
 	emitter_begin(&emitter);
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
diff --git a/test/unit/buf_writer.c b/test/unit/buf_writer.c
index 01f24119..821cf61f 100644
--- a/test/unit/buf_writer.c
+++ b/test/unit/buf_writer.c
@@ -24,10 +24,8 @@ test_buf_writer_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 	char s[UNIT_MAX + 1];
 	size_t n_unit, remain, i;
 	ssize_t unit;
-	assert(buf_writer->buf != NULL);
-	write_cb_t *write_cb = buf_writer_get_write_cb(buf_writer);
-	void *cbopaque = buf_writer_get_cbopaque(buf_writer);
 
+	assert(buf_writer->buf != NULL);
 	memset(s, 'a', UNIT_MAX);
 	arg = 4; /* Starting value of random argument. */
 	arg_store = arg;
@@ -39,7 +37,7 @@ test_buf_writer_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 			remain = 0;
 			for (i = 1; i <= n_unit; ++i) {
 				arg = prng_lg_range_u64(&arg, 64);
-				write_cb(cbopaque, s);
+				buf_writer_cb(buf_writer, s);
 				remain += unit;
 				if (remain > buf_writer->buf_size) {
 					/* Flushes should have happened. */
@@ -89,10 +87,6 @@ TEST_BEGIN(test_buf_write_oom) {
 	assert_true(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
 	    NULL, SC_LARGE_MAXCLASS + 1), "buf_writer_init() should OOM");
 	assert(buf_writer.buf == NULL);
-	write_cb_t *write_cb = buf_writer_get_write_cb(&buf_writer);
-	assert_ptr_eq(write_cb, test_write_cb, "Should use test_write_cb");
-	void *cbopaque = buf_writer_get_cbopaque(&buf_writer);
-	assert_ptr_eq(cbopaque, &arg, "Should use arg");
 
 	char s[UNIT_MAX + 1];
 	size_t n_unit, i;
@@ -108,7 +102,7 @@ TEST_BEGIN(test_buf_write_oom) {
 			test_write_len = 0;
 			for (i = 1; i <= n_unit; ++i) {
 				arg = prng_lg_range_u64(&arg, 64);
-				write_cb(cbopaque, s);
+				buf_writer_cb(&buf_writer, s);
 				assert_u64_eq(arg_store, arg,
 				    "Call back argument didn't get through");
 				assert_zu_eq(test_write_len, i * unit,

From f9aad7a49b14097a945316f10d2abe179fd0a8a5 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 20 Mar 2020 13:47:09 -0700
Subject: [PATCH 1605/2608] Add piping API to buffered writer

---
 include/jemalloc/internal/buf_writer.h |  4 ++
 src/buf_writer.c                       | 33 ++++++++++++
 test/unit/buf_writer.c                 | 70 +++++++++++++++++++++++++-
 3 files changed, 106 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/buf_writer.h b/include/jemalloc/internal/buf_writer.h
index b64c966a..55b18ab2 100644
--- a/include/jemalloc/internal/buf_writer.h
+++ b/include/jemalloc/internal/buf_writer.h
@@ -27,4 +27,8 @@ void buf_writer_flush(buf_writer_t *buf_writer);
 write_cb_t buf_writer_cb;
 void buf_writer_terminate(tsdn_t *tsdn, buf_writer_t *buf_writer);
 
+typedef ssize_t (read_cb_t)(void *read_cbopaque, void *buf, size_t limit);
+void buf_writer_pipe(buf_writer_t *buf_writer, read_cb_t *read_cb,
+    void *read_cbopaque);
+
 #endif /* JEMALLOC_INTERNAL_BUF_WRITER_H */
diff --git a/src/buf_writer.c b/src/buf_writer.c
index fd0226a1..06a2735b 100644
--- a/src/buf_writer.c
+++ b/src/buf_writer.c
@@ -110,3 +110,36 @@ buf_writer_terminate(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 		buf_writer_free_internal_buf(tsdn, buf_writer->buf);
 	}
 }
+
+void
+buf_writer_pipe(buf_writer_t *buf_writer, read_cb_t *read_cb,
+    void *read_cbopaque) {
+	/*
+	 * A tiny local buffer in case the buffered writer failed to allocate
+	 * at init.
+	 */
+	static char backup_buf[16];
+	static buf_writer_t backup_buf_writer;
+
+	buf_writer_assert(buf_writer);
+	assert(read_cb != NULL);
+	if (buf_writer->buf == NULL) {
+		buf_writer_init(TSDN_NULL, &backup_buf_writer,
+		    buf_writer->write_cb, buf_writer->cbopaque, backup_buf,
+		    sizeof(backup_buf));
+		buf_writer = &backup_buf_writer;
+	}
+	assert(buf_writer->buf != NULL);
+	ssize_t nread = 0;
+	do {
+		buf_writer->buf_end += nread;
+		buf_writer_assert(buf_writer);
+		if (buf_writer->buf_end == buf_writer->buf_size) {
+			buf_writer_flush(buf_writer);
+		}
+		nread = read_cb(read_cbopaque,
+		    buf_writer->buf + buf_writer->buf_end,
+		    buf_writer->buf_size - buf_writer->buf_end);
+	} while (nread > 0);
+	buf_writer_flush(buf_writer);
+}
diff --git a/test/unit/buf_writer.c b/test/unit/buf_writer.c
index 821cf61f..d5e63a0e 100644
--- a/test/unit/buf_writer.c
+++ b/test/unit/buf_writer.c
@@ -119,10 +119,78 @@ TEST_BEGIN(test_buf_write_oom) {
 }
 TEST_END
 
+static int test_read_count;
+static size_t test_read_len;
+static uint64_t arg_sum;
+
+ssize_t
+test_read_cb(void *cbopaque, void *buf, size_t limit) {
+	static uint64_t rand = 4;
+
+	arg_sum += *(uint64_t *)cbopaque;
+	assert_zu_gt(limit, 0, "Limit for read_cb must be positive");
+	--test_read_count;
+	if (test_read_count == 0) {
+		return -1;
+	} else {
+		size_t read_len = limit;
+		if (limit > 1) {
+			rand = prng_range_u64(&rand, (uint64_t)limit);
+			read_len -= (size_t)rand;
+		}
+		assert(read_len > 0);
+		memset(buf, 'a', read_len);
+		size_t prev_test_read_len = test_read_len;
+		test_read_len += read_len;
+		assert_zu_le(prev_test_read_len, test_read_len,
+		    "Test read overflowed");
+		return read_len;
+	}
+}
+
+static void
+test_buf_writer_pipe_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
+	arg = 4; /* Starting value of random argument. */
+	for (int count = 5; count > 0; --count) {
+		arg = prng_lg_range_u64(&arg, 64);
+		arg_sum = 0;
+		test_read_count = count;
+		test_read_len = 0;
+		test_write_len = 0;
+		buf_writer_pipe(buf_writer, test_read_cb, &arg);
+		assert(test_read_count == 0);
+		expect_u64_eq(arg_sum, arg * count, "");
+		expect_zu_eq(test_write_len, test_read_len,
+		    "Write length should be equal to read length");
+	}
+	buf_writer_terminate(tsdn, buf_writer);
+}
+
+TEST_BEGIN(test_buf_write_pipe) {
+	buf_writer_t buf_writer;
+	tsdn_t *tsdn = tsdn_fetch();
+	assert_false(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
+	    test_buf, TEST_BUF_SIZE),
+	    "buf_writer_init() should not encounter error on static buffer");
+	test_buf_writer_pipe_body(tsdn, &buf_writer);
+}
+TEST_END
+
+TEST_BEGIN(test_buf_write_pipe_oom) {
+	buf_writer_t buf_writer;
+	tsdn_t *tsdn = tsdn_fetch();
+	assert_true(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
+	    NULL, SC_LARGE_MAXCLASS + 1), "buf_writer_init() should OOM");
+	test_buf_writer_pipe_body(tsdn, &buf_writer);
+}
+TEST_END
+
 int
 main(void) {
 	return test(
 	    test_buf_write_static,
 	    test_buf_write_dynamic,
-	    test_buf_write_oom);
+	    test_buf_write_oom,
+	    test_buf_write_pipe,
+	    test_buf_write_pipe_oom);
 }

From 0d6d9e85866b77b39d39e0957fd2a577b3091935 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 1 Apr 2020 11:16:21 -0700
Subject: [PATCH 1606/2608] configure.ac: Put public symbols on one line.

---
 configure.ac | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index daac2050..1c2509a6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1000,8 +1000,7 @@ AC_ARG_WITH([export],
 fi]
 )
 
-public_syms="aligned_alloc calloc dallocx free mallctl mallctlbymib
-mallctlnametomib malloc malloc_conf malloc_conf_2_conf_harder malloc_message malloc_stats_print malloc_usable_size mallocx smallocx_${jemalloc_version_gid} nallocx posix_memalign rallocx realloc sallocx sdallocx xallocx"
+public_syms="aligned_alloc calloc dallocx free mallctl mallctlbymib mallctlnametomib malloc malloc_conf malloc_conf_2_conf_harder malloc_message malloc_stats_print malloc_usable_size mallocx smallocx_${jemalloc_version_gid} nallocx posix_memalign rallocx realloc sallocx sdallocx xallocx"
 dnl Check for additional platform-specific public API functions.
 AC_CHECK_FUNC([memalign],
 	      [AC_DEFINE([JEMALLOC_OVERRIDE_MEMALIGN], [ ])

From c9d56cddf27d52b77fc4e346fd841dcbf31ed671 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 1 Apr 2020 15:04:24 -0700
Subject: [PATCH 1607/2608] Optimize meld in qr module

The goal of `qr_meld()` is to change the following four fields
`(a->prev, a->prev->next, b->prev, b->prev->next)` from the values
`(a->prev, a, b->prev, b)` to `(b->prev, b, a->prev, a)`.

This commit changes

```
a->prev->next = b;
b->prev->next = a;
temp = a->prev;
a->prev = b->prev;
b->prev = temp;
```

to

```
temp = a->prev;
a->prev = b->prev;
b->prev = temp;
a->prev->next = a;
b->prev->next = b;
```

The benefit is that we can use `b->prev->next` for `temp`, and so
there's no need to pass in `a_type`.

The restriction is that `b` cannot be a `qr_next()` macro, so users
of `qr_meld()` must pay attention.  (Before this change, neither `a`
nor `b` could be a `qr_next()` macro.)
---
 include/jemalloc/internal/qr.h | 18 ++++++++++--------
 test/unit/qr.c                 | 12 ++++++------
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/include/jemalloc/internal/qr.h b/include/jemalloc/internal/qr.h
index 1e1056b3..e5be443c 100644
--- a/include/jemalloc/internal/qr.h
+++ b/include/jemalloc/internal/qr.h
@@ -32,21 +32,23 @@ struct {								\
 	(a_qrelm)->a_field.qre_next = (a_qr);				\
 } while (0)
 
-#define qr_meld(a_qr_a, a_qr_b, a_type, a_field) do {			\
-	a_type *t;							\
-	(a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_b);	\
-	(a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_a);	\
-	t = (a_qr_a)->a_field.qre_prev;					\
+/* a_qr_a can directly be a qr_next() macro, but a_qr_b cannot.  */
+#define qr_meld(a_qr_a, a_qr_b, a_field) do {				\
+	(a_qr_b)->a_field.qre_prev->a_field.qre_next =			\
+	    (a_qr_a)->a_field.qre_prev;					\
 	(a_qr_a)->a_field.qre_prev = (a_qr_b)->a_field.qre_prev;	\
-	(a_qr_b)->a_field.qre_prev = t;					\
+	(a_qr_b)->a_field.qre_prev =					\
+	    (a_qr_b)->a_field.qre_prev->a_field.qre_next;		\
+	(a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_a);	\
+	(a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_b);	\
 } while (0)
 
 /*
  * qr_meld() and qr_split() are functionally equivalent, so there's no need to
  * have two copies of the code.
  */
-#define qr_split(a_qr_a, a_qr_b, a_type, a_field)			\
-	qr_meld((a_qr_a), (a_qr_b), a_type, a_field)
+#define qr_split(a_qr_a, a_qr_b, a_field)				\
+	qr_meld((a_qr_a), (a_qr_b), a_field)
 
 #define qr_remove(a_qr, a_field) do {					\
 	(a_qr)->a_field.qre_prev->a_field.qre_next			\
diff --git a/test/unit/qr.c b/test/unit/qr.c
index 95c16927..16eed0e9 100644
--- a/test/unit/qr.c
+++ b/test/unit/qr.c
@@ -212,22 +212,22 @@ TEST_BEGIN(test_qr_meld_split) {
 		qr_after_insert(&entries[i - 1], &entries[i], link);
 	}
 
-	qr_split(&entries[0], &entries[SPLIT_INDEX], ring_t, link);
+	qr_split(&entries[0], &entries[SPLIT_INDEX], link);
 	test_split_entries(entries);
 
-	qr_meld(&entries[0], &entries[SPLIT_INDEX], ring_t, link);
+	qr_meld(&entries[0], &entries[SPLIT_INDEX], link);
 	test_entries_ring(entries);
 
-	qr_meld(&entries[0], &entries[SPLIT_INDEX], ring_t, link);
+	qr_meld(&entries[0], &entries[SPLIT_INDEX], link);
 	test_split_entries(entries);
 
-	qr_split(&entries[0], &entries[SPLIT_INDEX], ring_t, link);
+	qr_split(&entries[0], &entries[SPLIT_INDEX], link);
 	test_entries_ring(entries);
 
-	qr_split(&entries[0], &entries[0], ring_t, link);
+	qr_split(&entries[0], &entries[0], link);
 	test_entries_ring(entries);
 
-	qr_meld(&entries[0], &entries[0], ring_t, link);
+	qr_meld(&entries[0], &entries[0], link);
 	test_entries_ring(entries);
 }
 TEST_END

From 1ad06aa53bc5cca22dde934c3d46b6f683057346 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 1 Apr 2020 16:13:57 -0700
Subject: [PATCH 1608/2608] deduplicate insert and delete logic in qr module

---
 include/jemalloc/internal/qr.h | 30 ++++++++----------------------
 1 file changed, 8 insertions(+), 22 deletions(-)

diff --git a/include/jemalloc/internal/qr.h b/include/jemalloc/internal/qr.h
index e5be443c..559cbe42 100644
--- a/include/jemalloc/internal/qr.h
+++ b/include/jemalloc/internal/qr.h
@@ -18,20 +18,6 @@ struct {								\
 
 #define qr_prev(a_qr, a_field) ((a_qr)->a_field.qre_prev)
 
-#define qr_before_insert(a_qrelm, a_qr, a_field) do {			\
-	(a_qr)->a_field.qre_prev = (a_qrelm)->a_field.qre_prev;		\
-	(a_qr)->a_field.qre_next = (a_qrelm);				\
-	(a_qr)->a_field.qre_prev->a_field.qre_next = (a_qr);		\
-	(a_qrelm)->a_field.qre_prev = (a_qr);				\
-} while (0)
-
-#define qr_after_insert(a_qrelm, a_qr, a_field) do {			\
-	(a_qr)->a_field.qre_next = (a_qrelm)->a_field.qre_next;		\
-	(a_qr)->a_field.qre_prev = (a_qrelm);				\
-	(a_qr)->a_field.qre_next->a_field.qre_prev = (a_qr);		\
-	(a_qrelm)->a_field.qre_next = (a_qr);				\
-} while (0)
-
 /* a_qr_a can directly be a qr_next() macro, but a_qr_b cannot.  */
 #define qr_meld(a_qr_a, a_qr_b, a_field) do {				\
 	(a_qr_b)->a_field.qre_prev->a_field.qre_next =			\
@@ -43,6 +29,12 @@ struct {								\
 	(a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_b);	\
 } while (0)
 
+#define qr_before_insert(a_qrelm, a_qr, a_field)			\
+	qr_meld((a_qrelm), (a_qr), a_field)
+
+#define qr_after_insert(a_qrelm, a_qr, a_field)				\
+	qr_before_insert(qr_next(a_qrelm, a_field), (a_qr), a_field)
+
 /*
  * qr_meld() and qr_split() are functionally equivalent, so there's no need to
  * have two copies of the code.
@@ -50,14 +42,8 @@ struct {								\
 #define qr_split(a_qr_a, a_qr_b, a_field)				\
 	qr_meld((a_qr_a), (a_qr_b), a_field)
 
-#define qr_remove(a_qr, a_field) do {					\
-	(a_qr)->a_field.qre_prev->a_field.qre_next			\
-	    = (a_qr)->a_field.qre_next;					\
-	(a_qr)->a_field.qre_next->a_field.qre_prev			\
-	    = (a_qr)->a_field.qre_prev;					\
-	(a_qr)->a_field.qre_next = (a_qr);				\
-	(a_qr)->a_field.qre_prev = (a_qr);				\
-} while (0)
+#define qr_remove(a_qr, a_field)					\
+	qr_split(qr_next(a_qr, a_field), (a_qr), a_field)
 
 #define qr_foreach(var, a_qr, a_field)					\
 	for ((var) = (a_qr);						\

From 0dc95a882fee426a62cb93e7fe6a5b1ac171f9a2 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 1 Apr 2020 17:02:37 -0700
Subject: [PATCH 1609/2608] Add concat and split functionality to ql module

---
 include/jemalloc/internal/ql.h | 19 ++++++++++
 test/unit/ql.c                 | 69 +++++++++++++++++++++++++++++++++-
 2 files changed, 87 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/ql.h b/include/jemalloc/internal/ql.h
index 80290407..93ddce58 100644
--- a/include/jemalloc/internal/ql.h
+++ b/include/jemalloc/internal/ql.h
@@ -58,6 +58,16 @@ struct {								\
 	ql_first(a_head) = qr_next((a_elm), a_field);			\
 } while (0)
 
+#define ql_concat(a_head_a, a_head_b, a_field) do {			\
+	if (ql_first(a_head_a) == NULL) {				\
+		ql_first(a_head_a) = ql_first(a_head_b);		\
+	} else if (ql_first(a_head_b) != NULL) {			\
+		qr_meld(ql_first(a_head_a), ql_first(a_head_b),		\
+		    a_field);						\
+	}								\
+	ql_first(a_head_b) = NULL;					\
+} while (0)
+
 #define ql_remove(a_head, a_elm, a_field) do {				\
 	if (ql_first(a_head) == (a_elm)) {				\
 		ql_first(a_head) = qr_next(ql_first(a_head), a_field);	\
@@ -79,6 +89,15 @@ struct {								\
 	ql_remove((a_head), t, a_field);				\
 } while (0)
 
+#define ql_split(a_head_a, a_elm, a_head_b, a_field) do {		\
+	if (ql_first(a_head_a) == (a_elm)) {				\
+		ql_first(a_head_a) = NULL;				\
+	} else {							\
+		qr_split(ql_first(a_head_a), (a_elm), a_field);		\
+	}								\
+	ql_first(a_head_b) = (a_elm);					\
+} while (0)
+
 #define ql_foreach(a_var, a_head, a_field)				\
 	qr_foreach((a_var), ql_first(a_head), a_field)
 
diff --git a/test/unit/ql.c b/test/unit/ql.c
index 04da35fe..c2b19812 100644
--- a/test/unit/ql.c
+++ b/test/unit/ql.c
@@ -192,6 +192,72 @@ TEST_BEGIN(test_ql_insert) {
 }
 TEST_END
 
+static void
+test_concat_split_entries(list_t *entries, unsigned nentries_a,
+    unsigned nentries_b) {
+	init_entries(entries, nentries_a + nentries_b);
+
+	list_head_t head_a;
+	ql_new(&head_a);
+	for (unsigned i = 0; i < nentries_a; i++) {
+		ql_tail_insert(&head_a, &entries[i], link);
+	}
+	if (nentries_a == 0) {
+		test_empty_list(&head_a);
+	} else {
+		test_entries_list(&head_a, entries, nentries_a);
+	}
+
+	list_head_t head_b;
+	ql_new(&head_b);
+	for (unsigned i = 0; i < nentries_b; i++) {
+		ql_tail_insert(&head_b, &entries[nentries_a + i], link);
+	}
+	if (nentries_b == 0) {
+		test_empty_list(&head_b);
+	} else {
+		test_entries_list(&head_b, entries + nentries_a, nentries_b);
+	}
+
+	ql_concat(&head_a, &head_b, link);
+	if (nentries_a + nentries_b == 0) {
+		test_empty_list(&head_a);
+	} else {
+		test_entries_list(&head_a, entries, nentries_a + nentries_b);
+	}
+	test_empty_list(&head_b);
+
+	if (nentries_b == 0) {
+		return;
+	}
+
+	list_head_t head_c;
+	ql_split(&head_a, &entries[nentries_a], &head_c, link);
+	if (nentries_a == 0) {
+		test_empty_list(&head_a);
+	} else {
+		test_entries_list(&head_a, entries, nentries_a);
+	}
+	test_entries_list(&head_c, entries + nentries_a, nentries_b);
+}
+
+TEST_BEGIN(test_ql_concat_split) {
+	list_t entries[NENTRIES];
+
+	test_concat_split_entries(entries, 0, 0);
+
+	test_concat_split_entries(entries, 0, 1);
+	test_concat_split_entries(entries, 1, 0);
+
+	test_concat_split_entries(entries, 0, NENTRIES);
+	test_concat_split_entries(entries, 1, NENTRIES - 1);
+	test_concat_split_entries(entries, NENTRIES / 2,
+	    NENTRIES - NENTRIES / 2);
+	test_concat_split_entries(entries, NENTRIES - 1, 1);
+	test_concat_split_entries(entries, NENTRIES, 0);
+}
+TEST_END
+
 int
 main(void) {
 	return test(
@@ -200,5 +266,6 @@ main(void) {
 	    test_ql_tail_remove,
 	    test_ql_head_insert,
 	    test_ql_head_remove,
-	    test_ql_insert);
+	    test_ql_insert,
+	    test_ql_concat_split);
 }

From 1dd24ca6d2daeaeb0b9d90f432809508a98b259b Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 2 Apr 2020 11:11:08 -0700
Subject: [PATCH 1610/2608] Add rotate functionality to ql module

---
 include/jemalloc/internal/ql.h | 10 ++++++++++
 test/unit/ql.c                 | 25 ++++++++++++++++++++++++-
 2 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/ql.h b/include/jemalloc/internal/ql.h
index 93ddce58..3b780609 100644
--- a/include/jemalloc/internal/ql.h
+++ b/include/jemalloc/internal/ql.h
@@ -98,6 +98,16 @@ struct {								\
 	ql_first(a_head_b) = (a_elm);					\
 } while (0)
 
+/*
+ * An optimized version of:
+ *	a_type *t = ql_first(a_head);
+ *	ql_remove((a_head), t, a_field);
+ *	ql_tail_insert((a_head), t, a_field);
+ */
+#define ql_rotate(a_head, a_field) do {					\
+	ql_first(a_head) = qr_next(ql_first(a_head), a_field);		\
+} while (0)
+
 #define ql_foreach(a_var, a_head, a_field)				\
 	qr_foreach((a_var), ql_first(a_head), a_field)
 
diff --git a/test/unit/ql.c b/test/unit/ql.c
index c2b19812..662d1e8b 100644
--- a/test/unit/ql.c
+++ b/test/unit/ql.c
@@ -258,6 +258,28 @@ TEST_BEGIN(test_ql_concat_split) {
 }
 TEST_END
 
+TEST_BEGIN(test_ql_rotate) {
+	list_head_t head;
+	list_t entries[NENTRIES];
+	unsigned i;
+
+	ql_new(&head);
+	init_entries(entries, sizeof(entries)/sizeof(list_t));
+	for (i = 0; i < NENTRIES; i++) {
+		ql_tail_insert(&head, &entries[i], link);
+	}
+
+	char head_id = ql_first(&head)->id;
+	for (i = 0; i < NENTRIES; i++) {
+		assert_c_eq(ql_first(&head)->id, head_id, "");
+		ql_rotate(&head, link);
+		assert_c_eq(ql_last(&head, link)->id, head_id, "");
+		head_id++;
+	}
+	test_entries_list(&head, entries, NENTRIES);
+}
+TEST_END
+
 int
 main(void) {
 	return test(
@@ -267,5 +289,6 @@ main(void) {
 	    test_ql_head_insert,
 	    test_ql_head_remove,
 	    test_ql_insert,
-	    test_ql_concat_split);
+	    test_ql_concat_split,
+	    test_ql_rotate);
 }

From a62b7ed92841070932d6aea649ff40933c307cae Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 2 Apr 2020 13:05:16 -0700
Subject: [PATCH 1611/2608] Add emptiness checking to ql module

---
 include/jemalloc/internal/ql.h | 2 ++
 test/unit/ql.c                 | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/include/jemalloc/internal/ql.h b/include/jemalloc/internal/ql.h
index 3b780609..b1ce4793 100644
--- a/include/jemalloc/internal/ql.h
+++ b/include/jemalloc/internal/ql.h
@@ -18,6 +18,8 @@ struct {								\
 	(a_head)->qlh_first = NULL;					\
 } while (0)
 
+#define ql_empty(a_head) ((a_head)->qlh_first == NULL)
+
 #define ql_elm_new(a_elm, a_field) qr_new((a_elm), a_field)
 
 #define ql_first(a_head) ((a_head)->qlh_first)
diff --git a/test/unit/ql.c b/test/unit/ql.c
index 662d1e8b..8f689389 100644
--- a/test/unit/ql.c
+++ b/test/unit/ql.c
@@ -18,6 +18,7 @@ test_empty_list(list_head_t *head) {
 	list_t *t;
 	unsigned i;
 
+	expect_true(ql_empty(head), "Unexpected element for empty list");
 	expect_ptr_null(ql_first(head), "Unexpected element for empty list");
 	expect_ptr_null(ql_last(head, link),
 	    "Unexpected element for empty list");
@@ -58,6 +59,7 @@ test_entries_list(list_head_t *head, list_t *entries, unsigned nentries) {
 	list_t *t;
 	unsigned i;
 
+	expect_false(ql_empty(head), "List should not be empty");
 	expect_c_eq(ql_first(head)->id, entries[0].id, "Element id mismatch");
 	expect_c_eq(ql_last(head, link)->id, entries[nentries-1].id,
 	    "Element id mismatch");

From 4b66297ea0b0ed2ec5c4421878a31f5b27448624 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 2 Apr 2020 13:14:24 -0700
Subject: [PATCH 1612/2608] Add move constructor to ql module

---
 include/jemalloc/internal/ql.h |  5 +++++
 test/unit/ql.c                 | 23 ++++++++++++++++++++++-
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/ql.h b/include/jemalloc/internal/ql.h
index b1ce4793..16cd88d5 100644
--- a/include/jemalloc/internal/ql.h
+++ b/include/jemalloc/internal/ql.h
@@ -18,6 +18,11 @@ struct {								\
 	(a_head)->qlh_first = NULL;					\
 } while (0)
 
+#define ql_move(a_head_dest, a_head_src) do {				\
+	(a_head_dest)->qlh_first = (a_head_src)->qlh_first;		\
+	(a_head_src)->qlh_first = NULL;					\
+} while (0)
+
 #define ql_empty(a_head) ((a_head)->qlh_first == NULL)
 
 #define ql_elm_new(a_elm, a_field) qr_new((a_elm), a_field)
diff --git a/test/unit/ql.c b/test/unit/ql.c
index 8f689389..f9130582 100644
--- a/test/unit/ql.c
+++ b/test/unit/ql.c
@@ -282,6 +282,26 @@ TEST_BEGIN(test_ql_rotate) {
 }
 TEST_END
 
+TEST_BEGIN(test_ql_move) {
+	list_head_t head_dest, head_src;
+	list_t entries[NENTRIES];
+	unsigned i;
+
+	ql_new(&head_src);
+	ql_move(&head_dest, &head_src);
+	test_empty_list(&head_src);
+	test_empty_list(&head_dest);
+
+	init_entries(entries, sizeof(entries)/sizeof(list_t));
+	for (i = 0; i < NENTRIES; i++) {
+		ql_tail_insert(&head_src, &entries[i], link);
+	}
+	ql_move(&head_dest, &head_src);
+	test_empty_list(&head_src);
+	test_entries_list(&head_dest, entries, NENTRIES);
+}
+TEST_END
+
 int
 main(void) {
 	return test(
@@ -292,5 +312,6 @@ main(void) {
 	    test_ql_head_remove,
 	    test_ql_insert,
 	    test_ql_concat_split,
-	    test_ql_rotate);
+	    test_ql_rotate,
+	    test_ql_move);
 }

From ce17af422172b9d924bccfc5d08bb44a10fb0cac Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 3 Apr 2020 15:05:20 -0700
Subject: [PATCH 1613/2608] Better structure ql module

---
 include/jemalloc/internal/ql.h | 35 +++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/include/jemalloc/internal/ql.h b/include/jemalloc/internal/ql.h
index 16cd88d5..db672194 100644
--- a/include/jemalloc/internal/ql.h
+++ b/include/jemalloc/internal/ql.h
@@ -14,24 +14,25 @@ struct {								\
 #define ql_elm(a_type)	qr(a_type)
 
 /* List functions. */
+#define ql_first(a_head) ((a_head)->qlh_first)
+
 #define ql_new(a_head) do {						\
-	(a_head)->qlh_first = NULL;					\
+	ql_first(a_head) = NULL;					\
 } while (0)
 
+#define ql_clear(a_head) ql_new(a_head)
+
 #define ql_move(a_head_dest, a_head_src) do {				\
-	(a_head_dest)->qlh_first = (a_head_src)->qlh_first;		\
-	(a_head_src)->qlh_first = NULL;					\
+	ql_first(a_head_dest) = ql_first(a_head_src);			\
+	ql_clear(a_head_src);						\
 } while (0)
 
-#define ql_empty(a_head) ((a_head)->qlh_first == NULL)
+#define ql_empty(a_head) (ql_first(a_head) == NULL)
 
 #define ql_elm_new(a_elm, a_field) qr_new((a_elm), a_field)
 
-#define ql_first(a_head) ((a_head)->qlh_first)
-
 #define ql_last(a_head, a_field)					\
-	((ql_first(a_head) != NULL)					\
-	    ? qr_prev(ql_first(a_head), a_field) : NULL)
+	(ql_empty(a_head) ? NULL : qr_prev(ql_first(a_head), a_field))
 
 #define ql_next(a_head, a_elm, a_field)					\
 	((ql_last(a_head, a_field) != (a_elm))				\
@@ -52,27 +53,27 @@ struct {								\
 	qr_after_insert((a_qlelm), (a_elm), a_field)
 
 #define ql_head_insert(a_head, a_elm, a_field) do {			\
-	if (ql_first(a_head) != NULL) {					\
+	if (!ql_empty(a_head)) {					\
 		qr_before_insert(ql_first(a_head), (a_elm), a_field);	\
 	}								\
 	ql_first(a_head) = (a_elm);					\
 } while (0)
 
 #define ql_tail_insert(a_head, a_elm, a_field) do {			\
-	if (ql_first(a_head) != NULL) {					\
+	if (!ql_empty(a_head)) {					\
 		qr_before_insert(ql_first(a_head), (a_elm), a_field);	\
 	}								\
 	ql_first(a_head) = qr_next((a_elm), a_field);			\
 } while (0)
 
 #define ql_concat(a_head_a, a_head_b, a_field) do {			\
-	if (ql_first(a_head_a) == NULL) {				\
-		ql_first(a_head_a) = ql_first(a_head_b);		\
-	} else if (ql_first(a_head_b) != NULL) {			\
+	if (ql_empty(a_head_a)) {					\
+		ql_move(a_head_a, a_head_b);				\
+	} else if (!ql_empty(a_head_b)) {				\
 		qr_meld(ql_first(a_head_a), ql_first(a_head_b),		\
 		    a_field);						\
+		ql_clear(a_head_b);					\
 	}								\
-	ql_first(a_head_b) = NULL;					\
 } while (0)
 
 #define ql_remove(a_head, a_elm, a_field) do {				\
@@ -82,7 +83,7 @@ struct {								\
 	if (ql_first(a_head) != (a_elm)) {				\
 		qr_remove((a_elm), a_field);				\
 	} else {							\
-		ql_first(a_head) = NULL;				\
+		ql_clear(a_head);					\
 	}								\
 } while (0)
 
@@ -98,11 +99,11 @@ struct {								\
 
 #define ql_split(a_head_a, a_elm, a_head_b, a_field) do {		\
 	if (ql_first(a_head_a) == (a_elm)) {				\
-		ql_first(a_head_a) = NULL;				\
+		ql_move(a_head_b, a_head_a);				\
 	} else {							\
 		qr_split(ql_first(a_head_a), (a_elm), a_field);		\
+		ql_first(a_head_b) = (a_elm);				\
 	}								\
-	ql_first(a_head_b) = (a_elm);					\
 } while (0)
 
 /*

From 8da6676a029f128753941eedcf2a8b4389cd80f1 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 7 Apr 2020 11:12:53 -0700
Subject: [PATCH 1614/2608] Don't do reentrant testing in junk tests.

---
 test/unit/junk.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/test/unit/junk.c b/test/unit/junk.c
index 5a74c3d7..314da3ce 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -185,7 +185,11 @@ int
 main(void) {
 	junk_alloc_callback = &test_junk;
 	junk_free_callback = &test_junk;
-	return test(
+	/*
+	 * We check the last pointer junked.  If a reentrant call happens, that
+	 * might be an internal allocation.
+	 */
+	return test_no_reentrancy(
 	    test_junk_alloc_free,
 	    test_realloc_expand);
 }

From a5ddfa7d91f96cb1b648c6808488682e96880eb7 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 2 Apr 2020 10:48:58 -0700
Subject: [PATCH 1615/2608] Use ql for prof last-N list

---
 include/jemalloc/internal/prof_structs.h |   2 +-
 src/prof_recent.c                        | 125 +++++++++++------------
 2 files changed, 61 insertions(+), 66 deletions(-)

diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index 977eb1c8..73ef8fc5 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -203,7 +203,7 @@ struct prof_recent_s {
 	nstime_t alloc_time;
 	nstime_t dalloc_time;
 
-	prof_recent_t *next;
+	ql_elm(prof_recent_t) link;
 	size_t size;
 	prof_tctx_t *alloc_tctx;
 	edata_t *alloc_edata; /* NULL means allocation has been freed. */
diff --git a/src/prof_recent.c b/src/prof_recent.c
index 488cf178..185e2b66 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -14,11 +14,13 @@
 #  define STATIC_INLINE_IF_NOT_TEST
 #endif
 
+typedef ql_head(prof_recent_t) prof_recent_list_t;
+
 ssize_t opt_prof_recent_alloc_max = PROF_RECENT_ALLOC_MAX_DEFAULT;
 malloc_mutex_t prof_recent_alloc_mtx; /* Protects the fields below */
 static atomic_zd_t prof_recent_alloc_max;
 static ssize_t prof_recent_alloc_count = 0;
-static prof_recent_t *prof_recent_alloc_dummy = NULL;
+static prof_recent_list_t prof_recent_alloc_list;
 
 static void
 prof_recent_alloc_max_init() {
@@ -204,29 +206,26 @@ prof_recent_alloc_evict_edata(tsd_t *tsd, prof_recent_t *recent) {
 STATIC_INLINE_IF_NOT_TEST prof_recent_t *
 prof_recent_alloc_begin(tsd_t *tsd) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	assert(prof_recent_alloc_dummy != NULL);
-	return prof_recent_alloc_dummy->next;
+	return ql_first(&prof_recent_alloc_list);
 }
 
 STATIC_INLINE_IF_NOT_TEST prof_recent_t *
 prof_recent_alloc_end(tsd_t *tsd) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	assert(prof_recent_alloc_dummy != NULL);
-	return prof_recent_alloc_dummy;
+	return NULL;
 }
 
 STATIC_INLINE_IF_NOT_TEST prof_recent_t *
 prof_recent_alloc_next(tsd_t *tsd, prof_recent_t *node) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	assert(prof_recent_alloc_dummy != NULL);
-	assert(node != NULL && node != prof_recent_alloc_dummy);
-	return node->next;
+	assert(node != NULL);
+	return ql_next(&prof_recent_alloc_list, node, link);
 }
 
 static bool
 prof_recent_alloc_is_empty(tsd_t *tsd) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	if (prof_recent_alloc_begin(tsd) == prof_recent_alloc_end(tsd)) {
+	if (ql_empty(&prof_recent_alloc_list)) {
 		assert(prof_recent_alloc_count == 0);
 		return true;
 	} else {
@@ -238,17 +237,17 @@ prof_recent_alloc_is_empty(tsd_t *tsd) {
 static void
 prof_recent_alloc_assert_count(tsd_t *tsd) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	if (config_debug) {
-		ssize_t count = 0;
-		prof_recent_t *n = prof_recent_alloc_begin(tsd);
-		while (n != prof_recent_alloc_end(tsd)) {
-			++count;
-			n = prof_recent_alloc_next(tsd, n);
-		}
-		assert(count == prof_recent_alloc_count);
-		assert(prof_recent_alloc_max_get(tsd) == -1 ||
-		    count <= prof_recent_alloc_max_get(tsd));
+	if (!config_debug) {
+		return;
 	}
+	ssize_t count = 0;
+	prof_recent_t *n;
+	ql_foreach(n, &prof_recent_alloc_list, link) {
+		++count;
+	}
+	assert(count == prof_recent_alloc_count);
+	assert(prof_recent_alloc_max_get(tsd) == -1 ||
+	    count <= prof_recent_alloc_max_get(tsd));
 }
 
 void
@@ -311,45 +310,42 @@ prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size) {
 		goto label_rollback;
 	}
 
-	assert(prof_recent_alloc_dummy != NULL);
-	{
-		/* Fill content into the dummy node. */
-		prof_recent_t *node = prof_recent_alloc_dummy;
-		node->size = size;
-		nstime_copy(&node->alloc_time,
-		    edata_prof_alloc_time_get(edata));
-		node->alloc_tctx = tctx;
-		edata_prof_recent_alloc_set(tsd, edata, node);
-		nstime_init_zero(&node->dalloc_time);
-		node->dalloc_tctx = NULL;
-	}
-
 	prof_tctx_t *old_alloc_tctx, *old_dalloc_tctx;
 	if (prof_recent_alloc_count == prof_recent_alloc_max_get(tsd)) {
-		/* If upper limit is reached, simply shift the dummy. */
+		/* If upper limit is reached, rotate the head. */
 		assert(prof_recent_alloc_max_get(tsd) != -1);
 		assert(!prof_recent_alloc_is_empty(tsd));
-		prof_recent_alloc_dummy = prof_recent_alloc_dummy->next;
-		old_alloc_tctx = prof_recent_alloc_dummy->alloc_tctx;
+		prof_recent_t *head = ql_first(&prof_recent_alloc_list);
+		old_alloc_tctx = head->alloc_tctx;
 		assert(old_alloc_tctx != NULL);
-		old_dalloc_tctx = prof_recent_alloc_dummy->dalloc_tctx;
-		prof_recent_alloc_evict_edata(tsd, prof_recent_alloc_dummy);
+		old_dalloc_tctx = head->dalloc_tctx;
+		prof_recent_alloc_evict_edata(tsd, head);
+		ql_rotate(&prof_recent_alloc_list, link);
 	} else {
-		/* Otherwise use the new node as the dummy. */
+		/* Otherwise make use of the new node. */
 		assert(prof_recent_alloc_max_get(tsd) == -1 ||
 		    prof_recent_alloc_count < prof_recent_alloc_max_get(tsd));
 		if (reserve == NULL) {
 			goto label_rollback;
 		}
-		reserve->next = prof_recent_alloc_dummy->next;
-		prof_recent_alloc_dummy->next = reserve;
-		prof_recent_alloc_dummy = reserve;
+		ql_elm_new(reserve, link);
+		ql_tail_insert(&prof_recent_alloc_list, reserve, link);
 		reserve = NULL;
 		old_alloc_tctx = NULL;
 		old_dalloc_tctx = NULL;
 		++prof_recent_alloc_count;
 	}
 
+	/* Fill content into the tail node. */
+	prof_recent_t *tail = ql_last(&prof_recent_alloc_list, link);
+	assert(tail != NULL);
+	tail->size = size;
+	nstime_copy(&tail->alloc_time, edata_prof_alloc_time_get(edata));
+	tail->alloc_tctx = tctx;
+	edata_prof_recent_alloc_set(tsd, edata, tail);
+	nstime_init_zero(&tail->dalloc_time);
+	tail->dalloc_tctx = NULL;
+
 	assert(!prof_recent_alloc_is_empty(tsd));
 	prof_recent_alloc_assert_count(tsd);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
@@ -403,19 +399,27 @@ prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max) {
 		return old_max;
 	}
 
-	prof_recent_t *begin = prof_recent_alloc_dummy->next;
 	/* For verification purpose only. */
 	ssize_t count = prof_recent_alloc_count - max;
-	do {
-		assert(!prof_recent_alloc_is_empty(tsd));
-		prof_recent_t *node = prof_recent_alloc_dummy->next;
-		assert(node != prof_recent_alloc_dummy);
+	prof_recent_t *node;
+	ql_foreach(node, &prof_recent_alloc_list, link) {
+		if (prof_recent_alloc_count == max) {
+			break;
+		}
 		prof_recent_alloc_evict_edata(tsd, node);
-		prof_recent_alloc_dummy->next = node->next;
 		--prof_recent_alloc_count;
-	} while (prof_recent_alloc_count > max);
-	prof_recent_t *end = prof_recent_alloc_dummy->next;
-	assert(begin != end);
+	}
+	assert(prof_recent_alloc_count == max);
+
+	prof_recent_list_t old_list;
+	ql_move(&old_list, &prof_recent_alloc_list);
+	if (max == 0) {
+		assert(node == NULL);
+	} else {
+		assert(node != NULL);
+		ql_split(&old_list, node, &prof_recent_alloc_list, link);
+	}
+	assert(!ql_empty(&old_list));
 
 	prof_recent_alloc_assert_count(tsd);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
@@ -432,15 +436,15 @@ prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max) {
 	 * to and controlled by application.
 	 */
 	do {
-		prof_recent_t *node = begin;
+		node = ql_first(&old_list);
+		ql_remove(&old_list, node, link);
 		decrement_recent_count(tsd, node->alloc_tctx);
 		if (node->dalloc_tctx != NULL) {
 			decrement_recent_count(tsd, node->dalloc_tctx);
 		}
-		begin = node->next;
 		idalloctm(tsd_tsdn(tsd), node, NULL, NULL, true, true);
 		--count;
-	} while (begin != end);
+	} while (!ql_empty(&old_list));
 	assert(count == 0);
 
 	return old_max;
@@ -482,9 +486,8 @@ prof_recent_alloc_dump(tsd_t *tsd, void (*write_cb)(void *, const char *),
 	emitter_json_kv(&emitter, "recent_alloc_max", emitter_type_ssize, &max);
 
 	emitter_json_array_kv_begin(&emitter, "recent_alloc");
-	for (prof_recent_t *n = prof_recent_alloc_begin(tsd);
-	    n != prof_recent_alloc_end(tsd);
-	    n = prof_recent_alloc_next(tsd, n)) {
+	prof_recent_t *n;
+	ql_foreach(n, &prof_recent_alloc_list, link) {
 		emitter_json_object_begin(&emitter);
 
 		emitter_json_kv(&emitter, "size", emitter_type_size, &n->size);
@@ -541,15 +544,7 @@ prof_recent_init() {
 		return true;
 	}
 
-	assert(prof_recent_alloc_dummy == NULL);
-	prof_recent_alloc_dummy = (prof_recent_t *)iallocztm(
-	    TSDN_NULL, sizeof(prof_recent_t),
-	    sz_size2index(sizeof(prof_recent_t)), false, NULL, true,
-	    arena_get(TSDN_NULL, 0, true), true);
-	if (prof_recent_alloc_dummy == NULL) {
-		return true;
-	}
-	prof_recent_alloc_dummy->next = prof_recent_alloc_dummy;
+	ql_new(&prof_recent_alloc_list);
 
 	return false;
 }

From 2deabac079440f843f833f1fe121bc62dff8092c Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 2 Apr 2020 13:40:22 -0700
Subject: [PATCH 1616/2608] Get rid of custom iterator for last-N records

---
 include/jemalloc/internal/prof_recent.h |  5 ++-
 src/prof_recent.c                       | 38 +++++----------------
 test/unit/prof_recent.c                 | 44 ++++++++-----------------
 3 files changed, 25 insertions(+), 62 deletions(-)

diff --git a/include/jemalloc/internal/prof_recent.h b/include/jemalloc/internal/prof_recent.h
index b2973db4..d0869ae6 100644
--- a/include/jemalloc/internal/prof_recent.h
+++ b/include/jemalloc/internal/prof_recent.h
@@ -7,9 +7,8 @@ void prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata);
 bool prof_recent_init();
 void edata_prof_recent_alloc_init(edata_t *edata);
 #ifdef JEMALLOC_JET
-prof_recent_t *prof_recent_alloc_begin(tsd_t *tsd);
-prof_recent_t *prof_recent_alloc_end(tsd_t *tsd);
-prof_recent_t *prof_recent_alloc_next(tsd_t *tsd, prof_recent_t *node);
+typedef ql_head(prof_recent_t) prof_recent_list_t;
+extern prof_recent_list_t prof_recent_alloc_list;
 prof_recent_t *edata_prof_recent_alloc_get(tsd_t *tsd, const edata_t *edata);
 #endif
 
diff --git a/src/prof_recent.c b/src/prof_recent.c
index 185e2b66..88effc4b 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -8,19 +8,15 @@
 #include "jemalloc/internal/prof_data.h"
 #include "jemalloc/internal/prof_recent.h"
 
-#ifndef JEMALLOC_JET
-#  define STATIC_INLINE_IF_NOT_TEST static inline
-#else
-#  define STATIC_INLINE_IF_NOT_TEST
-#endif
-
-typedef ql_head(prof_recent_t) prof_recent_list_t;
-
 ssize_t opt_prof_recent_alloc_max = PROF_RECENT_ALLOC_MAX_DEFAULT;
 malloc_mutex_t prof_recent_alloc_mtx; /* Protects the fields below */
 static atomic_zd_t prof_recent_alloc_max;
 static ssize_t prof_recent_alloc_count = 0;
-static prof_recent_list_t prof_recent_alloc_list;
+#ifndef JEMALLOC_JET
+typedef ql_head(prof_recent_t) prof_recent_list_t;
+static
+#endif
+prof_recent_list_t prof_recent_alloc_list;
 
 static void
 prof_recent_alloc_max_init() {
@@ -102,7 +98,10 @@ edata_prof_recent_alloc_get_no_lock(const edata_t *edata) {
 	return edata_prof_recent_alloc_get_dont_call_directly(edata);
 }
 
-STATIC_INLINE_IF_NOT_TEST prof_recent_t *
+#ifndef JEMALLOC_JET
+static inline
+#endif
+prof_recent_t *
 edata_prof_recent_alloc_get(tsd_t *tsd, const edata_t *edata) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	prof_recent_t *recent_alloc =
@@ -203,25 +202,6 @@ prof_recent_alloc_evict_edata(tsd_t *tsd, prof_recent_t *recent) {
 	}
 }
 
-STATIC_INLINE_IF_NOT_TEST prof_recent_t *
-prof_recent_alloc_begin(tsd_t *tsd) {
-	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	return ql_first(&prof_recent_alloc_list);
-}
-
-STATIC_INLINE_IF_NOT_TEST prof_recent_t *
-prof_recent_alloc_end(tsd_t *tsd) {
-	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	return NULL;
-}
-
-STATIC_INLINE_IF_NOT_TEST prof_recent_t *
-prof_recent_alloc_next(tsd_t *tsd, prof_recent_t *node) {
-	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	assert(node != NULL);
-	return ql_next(&prof_recent_alloc_list, node, link);
-}
-
 static bool
 prof_recent_alloc_is_empty(tsd_t *tsd) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index 35a2333a..e19d9940 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -172,8 +172,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 		if (i < OPT_ALLOC_MAX - 1) {
 			malloc_mutex_lock(tsd_tsdn(tsd),
 			    &prof_recent_alloc_mtx);
-			assert_ptr_ne(prof_recent_alloc_begin(tsd),
-			    prof_recent_alloc_end(tsd),
+			assert_false(ql_empty(&prof_recent_alloc_list),
 			    "Empty recent allocation");
 			malloc_mutex_unlock(tsd_tsdn(tsd),
 			    &prof_recent_alloc_mtx);
@@ -187,9 +186,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 		}
 		c = 0;
 		malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-		for (n = prof_recent_alloc_begin(tsd);
-		    n != prof_recent_alloc_end(tsd);
-		    n = prof_recent_alloc_next(tsd, n)) {
+		ql_foreach(n, &prof_recent_alloc_list, link) {
 			++c;
 			confirm_record_size(tsd, n, i + c - OPT_ALLOC_MAX);
 			if (c == OPT_ALLOC_MAX) {
@@ -220,9 +217,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 		assert_ptr_not_null(p, "malloc failed unexpectedly");
 		c = 0;
 		malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-		for (n = prof_recent_alloc_begin(tsd);
-		    n != prof_recent_alloc_end(tsd);
-		    n = prof_recent_alloc_next(tsd, n)) {
+		ql_foreach(n, &prof_recent_alloc_list, link) {
 			confirm_record_size(tsd, n, c + OPT_ALLOC_MAX);
 			confirm_record_released(tsd, n);
 			++c;
@@ -251,9 +246,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 		confirm_malloc(tsd, p);
 		c = 0;
 		malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-		for (n = prof_recent_alloc_begin(tsd);
-		    n != prof_recent_alloc_end(tsd);
-		    n = prof_recent_alloc_next(tsd, n)) {
+		ql_foreach(n, &prof_recent_alloc_list, link) {
 			++c;
 			confirm_record_size(tsd, n,
 			    /* Is the allocation from the third batch? */
@@ -283,9 +276,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	c = 0;
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	for (n = prof_recent_alloc_begin(tsd);
-	    n != prof_recent_alloc_end(tsd);
-	    n = prof_recent_alloc_next(tsd, n)) {
+	ql_foreach(n, &prof_recent_alloc_list, link) {
 		confirm_record_size(tsd, n, c + 3 * OPT_ALLOC_MAX);
 		confirm_record_released(tsd, n);
 		++c;
@@ -303,9 +294,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	c = 0;
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	for (n = prof_recent_alloc_begin(tsd);
-	    n != prof_recent_alloc_end(tsd);
-	    n = prof_recent_alloc_next(tsd, n)) {
+	ql_foreach(n, &prof_recent_alloc_list, link) {
 		confirm_record_size(tsd, n, c + 3 * OPT_ALLOC_MAX);
 		confirm_record_released(tsd, n);
 		++c;
@@ -323,9 +312,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	c = 0;
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	for (n = prof_recent_alloc_begin(tsd);
-	    n != prof_recent_alloc_end(tsd);
-	    n = prof_recent_alloc_next(tsd, n)) {
+	ql_foreach(n, &prof_recent_alloc_list, link) {
 		++c;
 		confirm_record_size(tsd, n, c + 3 * OPT_ALLOC_MAX);
 		confirm_record_released(tsd, n);
@@ -340,9 +327,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	c = 0;
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	for (n = prof_recent_alloc_begin(tsd);
-	    n != prof_recent_alloc_end(tsd);
-	    n = prof_recent_alloc_next(tsd, n)) {
+	ql_foreach(n, &prof_recent_alloc_list, link) {
 		++c;
 		confirm_record_size(tsd, n, c + 3 * OPT_ALLOC_MAX);
 		confirm_record_released(tsd, n);
@@ -356,13 +341,12 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	n = prof_recent_alloc_begin(tsd);
-	assert_ptr_ne(n, prof_recent_alloc_end(tsd), "Recent list is empty");
+	assert_false(ql_empty(&prof_recent_alloc_list), "Recent list is empty");
+	n = ql_first(&prof_recent_alloc_list);
 	confirm_record_size(tsd, n, 4 * OPT_ALLOC_MAX - 1);
 	confirm_record_released(tsd, n);
-	n = prof_recent_alloc_next(tsd, n);
-	assert_ptr_eq(n, prof_recent_alloc_end(tsd),
-	    "Recent list should be empty");
+	n = ql_next(&prof_recent_alloc_list, n, link);
+	assert_ptr_null(n, "Recent list should only contain one record");
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 
 	/* Completely turn off. */
@@ -370,7 +354,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	assert_ptr_eq(prof_recent_alloc_begin(tsd), prof_recent_alloc_end(tsd),
+	assert_true(ql_empty(&prof_recent_alloc_list),
 	    "Recent list should be empty");
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 
@@ -379,7 +363,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	assert_ptr_eq(prof_recent_alloc_begin(tsd), prof_recent_alloc_end(tsd),
+	assert_true(ql_empty(&prof_recent_alloc_list),
 	    "Recent list should be empty");
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 

From c4e9ea8cc6c039af4f14f9e3ad7d92555693adbf Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 6 Apr 2020 16:19:53 -0700
Subject: [PATCH 1617/2608] Get rid of locks in prof recent test

---
 include/jemalloc/internal/prof_recent.h |  2 +-
 src/prof_recent.c                       | 10 +--
 test/unit/prof_recent.c                 | 81 ++++++++-----------------
 3 files changed, 32 insertions(+), 61 deletions(-)

diff --git a/include/jemalloc/internal/prof_recent.h b/include/jemalloc/internal/prof_recent.h
index d0869ae6..bd046526 100644
--- a/include/jemalloc/internal/prof_recent.h
+++ b/include/jemalloc/internal/prof_recent.h
@@ -9,7 +9,7 @@ void edata_prof_recent_alloc_init(edata_t *edata);
 #ifdef JEMALLOC_JET
 typedef ql_head(prof_recent_t) prof_recent_list_t;
 extern prof_recent_list_t prof_recent_alloc_list;
-prof_recent_t *edata_prof_recent_alloc_get(tsd_t *tsd, const edata_t *edata);
+prof_recent_t *edata_prof_recent_alloc_get_no_lock(const edata_t *edata);
 #endif
 
 #endif /* JEMALLOC_INTERNAL_PROF_RECENT_EXTERNS_H */
diff --git a/src/prof_recent.c b/src/prof_recent.c
index 88effc4b..7fd77e93 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -93,15 +93,15 @@ edata_prof_recent_alloc_init(edata_t *edata) {
 	edata_prof_recent_alloc_set_dont_call_directly(edata, NULL);
 }
 
-static inline prof_recent_t *
-edata_prof_recent_alloc_get_no_lock(const edata_t *edata) {
-	return edata_prof_recent_alloc_get_dont_call_directly(edata);
-}
-
 #ifndef JEMALLOC_JET
 static inline
 #endif
 prof_recent_t *
+edata_prof_recent_alloc_get_no_lock(const edata_t *edata) {
+	return edata_prof_recent_alloc_get_dont_call_directly(edata);
+}
+
+static inline prof_recent_t *
 edata_prof_recent_alloc_get(tsd_t *tsd, const edata_t *edata) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	prof_recent_t *recent_alloc =
diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index e19d9940..19ff15fd 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -101,43 +101,38 @@ TEST_END
 #define NTH_REQ_SIZE(n) ((n) * 97 + 101)
 
 static void
-confirm_malloc(tsd_t *tsd, void *p) {
+confirm_malloc(void *p) {
 	assert_ptr_not_null(p, "malloc failed unexpectedly");
 	edata_t *e = emap_edata_lookup(TSDN_NULL, &emap_global, p);
 	assert_ptr_not_null(e, "NULL edata for living pointer");
-	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	prof_recent_t *n = edata_prof_recent_alloc_get(tsd, e);
+	prof_recent_t *n = edata_prof_recent_alloc_get_no_lock(e);
 	assert_ptr_not_null(n, "Record in edata should not be NULL");
 	expect_ptr_not_null(n->alloc_tctx,
 	    "alloc_tctx in record should not be NULL");
 	expect_ptr_eq(e, n->alloc_edata,
 	    "edata pointer in record is not correct");
 	expect_ptr_null(n->dalloc_tctx, "dalloc_tctx in record should be NULL");
-	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 }
 
 static void
-confirm_record_size(tsd_t *tsd, prof_recent_t *n, unsigned kth) {
-	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+confirm_record_size(prof_recent_t *n, unsigned kth) {
 	expect_zu_eq(n->size, NTH_REQ_SIZE(kth),
 	    "Recorded allocation size is wrong");
 }
 
 static void
-confirm_record_living(tsd_t *tsd, prof_recent_t *n) {
-	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+confirm_record_living(prof_recent_t *n) {
 	expect_ptr_not_null(n->alloc_tctx,
 	    "alloc_tctx in record should not be NULL");
 	assert_ptr_not_null(n->alloc_edata,
 	    "Recorded edata should not be NULL for living pointer");
-	expect_ptr_eq(n, edata_prof_recent_alloc_get(tsd, n->alloc_edata),
+	expect_ptr_eq(n, edata_prof_recent_alloc_get_no_lock(n->alloc_edata),
 	    "Record in edata is not correct");
 	expect_ptr_null(n->dalloc_tctx, "dalloc_tctx in record should be NULL");
 }
 
 static void
-confirm_record_released(tsd_t *tsd, prof_recent_t *n) {
-	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+confirm_record_released(prof_recent_t *n) {
 	expect_ptr_not_null(n->alloc_tctx,
 	    "alloc_tctx in record should not be NULL");
 	expect_ptr_null(n->alloc_edata,
@@ -168,14 +163,10 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	for (i = 0; i < 2 * OPT_ALLOC_MAX; ++i) {
 		req_size = NTH_REQ_SIZE(i);
 		p = malloc(req_size);
-		confirm_malloc(tsd, p);
+		confirm_malloc(p);
 		if (i < OPT_ALLOC_MAX - 1) {
-			malloc_mutex_lock(tsd_tsdn(tsd),
-			    &prof_recent_alloc_mtx);
 			assert_false(ql_empty(&prof_recent_alloc_list),
 			    "Empty recent allocation");
-			malloc_mutex_unlock(tsd_tsdn(tsd),
-			    &prof_recent_alloc_mtx);
 			free(p);
 			/*
 			 * The recorded allocations may still include some
@@ -185,17 +176,15 @@ TEST_BEGIN(test_prof_recent_alloc) {
 			continue;
 		}
 		c = 0;
-		malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 		ql_foreach(n, &prof_recent_alloc_list, link) {
 			++c;
-			confirm_record_size(tsd, n, i + c - OPT_ALLOC_MAX);
+			confirm_record_size(n, i + c - OPT_ALLOC_MAX);
 			if (c == OPT_ALLOC_MAX) {
-				confirm_record_living(tsd, n);
+				confirm_record_living(n);
 			} else {
-				confirm_record_released(tsd, n);
+				confirm_record_released(n);
 			}
 		}
-		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 		assert_u_eq(c, OPT_ALLOC_MAX,
 		    "Incorrect total number of allocations");
 		free(p);
@@ -216,13 +205,11 @@ TEST_BEGIN(test_prof_recent_alloc) {
 		p = malloc(req_size);
 		assert_ptr_not_null(p, "malloc failed unexpectedly");
 		c = 0;
-		malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 		ql_foreach(n, &prof_recent_alloc_list, link) {
-			confirm_record_size(tsd, n, c + OPT_ALLOC_MAX);
-			confirm_record_released(tsd, n);
+			confirm_record_size(n, c + OPT_ALLOC_MAX);
+			confirm_record_released(n);
 			++c;
 		}
-		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 		assert_u_eq(c, OPT_ALLOC_MAX,
 		    "Incorrect total number of allocations");
 		free(p);
@@ -243,12 +230,11 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	for (; i < 4 * OPT_ALLOC_MAX; ++i) {
 		req_size = NTH_REQ_SIZE(i);
 		p = malloc(req_size);
-		confirm_malloc(tsd, p);
+		confirm_malloc(p);
 		c = 0;
-		malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 		ql_foreach(n, &prof_recent_alloc_list, link) {
 			++c;
-			confirm_record_size(tsd, n,
+			confirm_record_size(n,
 			    /* Is the allocation from the third batch? */
 			    i + c - OPT_ALLOC_MAX >= 3 * OPT_ALLOC_MAX ?
 			    /* If yes, then it's just recorded. */
@@ -259,12 +245,11 @@ TEST_BEGIN(test_prof_recent_alloc) {
 			     */
 			    i + c - 2 * OPT_ALLOC_MAX);
 			if (c == OPT_ALLOC_MAX) {
-				confirm_record_living(tsd, n);
+				confirm_record_living(n);
 			} else {
-				confirm_record_released(tsd, n);
+				confirm_record_released(n);
 			}
 		}
-		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 		assert_u_eq(c, OPT_ALLOC_MAX,
 		    "Incorrect total number of allocations");
 		free(p);
@@ -275,13 +260,11 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	c = 0;
-	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	ql_foreach(n, &prof_recent_alloc_list, link) {
-		confirm_record_size(tsd, n, c + 3 * OPT_ALLOC_MAX);
-		confirm_record_released(tsd, n);
+		confirm_record_size(n, c + 3 * OPT_ALLOC_MAX);
+		confirm_record_released(n);
 		++c;
 	}
-	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	assert_u_eq(c, OPT_ALLOC_MAX,
 	    "Incorrect total number of allocations");
 
@@ -293,13 +276,11 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	c = 0;
-	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	ql_foreach(n, &prof_recent_alloc_list, link) {
-		confirm_record_size(tsd, n, c + 3 * OPT_ALLOC_MAX);
-		confirm_record_released(tsd, n);
+		confirm_record_size(n, c + 3 * OPT_ALLOC_MAX);
+		confirm_record_released(n);
 		++c;
 	}
-	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	assert_u_eq(c, OPT_ALLOC_MAX,
 	    "Incorrect total number of allocations");
 
@@ -311,13 +292,11 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	c = 0;
-	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	ql_foreach(n, &prof_recent_alloc_list, link) {
 		++c;
-		confirm_record_size(tsd, n, c + 3 * OPT_ALLOC_MAX);
-		confirm_record_released(tsd, n);
+		confirm_record_size(n, c + 3 * OPT_ALLOC_MAX);
+		confirm_record_released(n);
 	}
-	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	assert_u_eq(c, OPT_ALLOC_MAX - 1,
 	    "Incorrect total number of allocations");
 
@@ -326,13 +305,11 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	c = 0;
-	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	ql_foreach(n, &prof_recent_alloc_list, link) {
 		++c;
-		confirm_record_size(tsd, n, c + 3 * OPT_ALLOC_MAX);
-		confirm_record_released(tsd, n);
+		confirm_record_size(n, c + 3 * OPT_ALLOC_MAX);
+		confirm_record_released(n);
 	}
-	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	assert_u_eq(c, OPT_ALLOC_MAX - 1,
 	    "Incorrect total number of allocations");
 
@@ -340,32 +317,26 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	future = 1;
 	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
-	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	assert_false(ql_empty(&prof_recent_alloc_list), "Recent list is empty");
 	n = ql_first(&prof_recent_alloc_list);
-	confirm_record_size(tsd, n, 4 * OPT_ALLOC_MAX - 1);
-	confirm_record_released(tsd, n);
+	confirm_record_size(n, 4 * OPT_ALLOC_MAX - 1);
+	confirm_record_released(n);
 	n = ql_next(&prof_recent_alloc_list, n, link);
 	assert_ptr_null(n, "Recent list should only contain one record");
-	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 
 	/* Completely turn off. */
 	future = 0;
 	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
-	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	assert_true(ql_empty(&prof_recent_alloc_list),
 	    "Recent list should be empty");
-	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 
 	/* Restore the settings. */
 	future = OPT_ALLOC_MAX;
 	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
-	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	assert_true(ql_empty(&prof_recent_alloc_list),
 	    "Recent list should be empty");
-	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 
 	confirm_prof_setup(tsd);
 }

From 12be9f5727e382c96656f9469e9702322ccd0c73 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sat, 7 Mar 2020 20:14:49 -0800
Subject: [PATCH 1618/2608] Add a stub PA module -- a page allocator.

---
 Makefile.in                                            | 1 +
 include/jemalloc/internal/pa.h                         | 9 +++++++++
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj         | 1 +
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters | 3 +++
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj         | 1 +
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters | 3 +++
 src/pa.c                                               | 2 ++
 7 files changed, 20 insertions(+)
 create mode 100644 include/jemalloc/internal/pa.h
 create mode 100644 src/pa.c

diff --git a/Makefile.in b/Makefile.in
index 10af489b..a3c43a6d 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -125,6 +125,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/mutex.c \
 	$(srcroot)src/mutex_pool.c \
 	$(srcroot)src/nstime.c \
+	$(srcroot)src/pa.c \
 	$(srcroot)src/pages.c \
 	$(srcroot)src/prng.c \
 	$(srcroot)src/prof.c \
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
new file mode 100644
index 00000000..5146ae1a
--- /dev/null
+++ b/include/jemalloc/internal/pa.h
@@ -0,0 +1,9 @@
+#ifndef JEMALLOC_INTERNAL_PA_H
+#define JEMALLOC_INTERNAL_PA_H
+
+/*
+ * The page allocator; responsible for acquiring pages of memory for
+ * allocations.
+ */
+
+#endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 920d55ed..3c17e50b 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -66,6 +66,7 @@
     <ClCompile Include="..\..\..\..\src\mutex.c" />
     <ClCompile Include="..\..\..\..\src\mutex_pool.c" />
     <ClCompile Include="..\..\..\..\src\nstime.c" />
+    <ClCompile Include="..\..\..\..\src\pa.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index fe77170d..2f5ed621 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -82,6 +82,9 @@
     <ClCompile Include="..\..\..\..\src\nstime.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pa.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 2db94010..d63042d8 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -66,6 +66,7 @@
     <ClCompile Include="..\..\..\..\src\mutex.c" />
     <ClCompile Include="..\..\..\..\src\mutex_pool.c" />
     <ClCompile Include="..\..\..\..\src\nstime.c" />
+    <ClCompile Include="..\..\..\..\src\pa.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index fe77170d..2f5ed621 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -82,6 +82,9 @@
     <ClCompile Include="..\..\..\..\src\nstime.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pa.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/pa.c b/src/pa.c
new file mode 100644
index 00000000..3a26b393
--- /dev/null
+++ b/src/pa.c
@@ -0,0 +1,2 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"

From 585f92505521136157aad8ac2e9288609127f863 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sun, 8 Mar 2020 10:11:02 -0700
Subject: [PATCH 1619/2608] Move cache index randomization out of extent.

This is logically at a higher level of the stack; extent should just allocate
things at the page-level; it shouldn't care exactly why the callers wants a
given number of pages.
---
 include/jemalloc/internal/arena_inlines_b.h |  26 ++++
 include/jemalloc/internal/extent.h          |  12 +-
 src/arena.c                                 |  24 ++--
 src/extent.c                                | 130 +++++++-------------
 src/large.c                                 |   6 +-
 5 files changed, 93 insertions(+), 105 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index e7f7b858..cadfc8f9 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -397,4 +397,30 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 	}
 }
 
+static inline void
+arena_cache_oblivious_randomize(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
+    size_t alignment) {
+	assert(edata_base_get(edata) == edata_addr_get(edata));
+
+	if (alignment < PAGE) {
+		unsigned lg_range = LG_PAGE -
+		    lg_floor(CACHELINE_CEILING(alignment));
+		size_t r;
+		if (!tsdn_null(tsdn)) {
+			tsd_t *tsd = tsdn_tsd(tsdn);
+			r = (size_t)prng_lg_range_u64(
+			    tsd_prng_statep_get(tsd), lg_range);
+		} else {
+			uint64_t stack_value = (uint64_t)(uintptr_t)&r;
+			r = (size_t)prng_lg_range_u64(&stack_value, lg_range);
+		}
+		uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE -
+		    lg_range);
+		edata->e_addr = (void *)((uintptr_t)edata->e_addr +
+		    random_offset);
+		assert(ALIGNMENT_ADDR2BASE(edata->e_addr, alignment) ==
+		    edata->e_addr);
+	}
+}
+
 #endif /* JEMALLOC_INTERNAL_ARENA_INLINES_B_H */
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index d0ba70b8..e615fb6e 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -20,19 +20,19 @@
 extern size_t opt_lg_extent_max_active_fit;
 
 edata_t *ecache_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
-    bool slab, szind_t szind, bool *zero);
+    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
+    szind_t szind, bool *zero);
 edata_t *ecache_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
-    bool slab, szind_t szind, bool *zero);
+    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
+    szind_t szind, bool *zero);
 void ecache_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata);
 edata_t *ecache_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, size_t npages_min);
 
 edata_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    szind_t szind, bool *zero, bool *commit);
+    void *new_addr, size_t size, size_t alignment, bool slab, szind_t szind,
+    bool *zero, bool *commit);
 void extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, edata_t *edata);
 void extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     edata_t *edata);
diff --git a/src/arena.c b/src/arena.c
index d4b69798..f6876e35 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -433,24 +433,24 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 
 	szind_t szind = sz_size2index(usize);
 	size_t mapped_add;
+	size_t esize = usize + sz_large_pad;
 	edata_t *edata = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_dirty,
-	    NULL, usize, sz_large_pad, alignment, false, szind, zero);
+	    NULL, esize, alignment, false, szind, zero);
 	if (edata == NULL && arena_may_have_muzzy(arena)) {
 		edata = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_muzzy,
-		    NULL, usize, sz_large_pad, alignment, false, szind, zero);
+		    NULL, esize, alignment, false, szind, zero);
 	}
-	size_t size = usize + sz_large_pad;
 	if (edata == NULL) {
 		edata = ecache_alloc_grow(tsdn, arena, ehooks,
-		    &arena->ecache_retained, NULL, usize, sz_large_pad,
-		    alignment, false, szind, zero);
+		    &arena->ecache_retained, NULL, esize, alignment, false,
+		    szind, zero);
 		if (config_stats) {
 			/*
 			 * edata may be NULL on OOM, but in that case mapped_add
 			 * isn't used below, so there's no need to conditionlly
 			 * set it to 0 here.
 			 */
-			mapped_add = size;
+			mapped_add = esize;
 		}
 	} else if (config_stats) {
 		mapped_add = 0;
@@ -466,7 +466,11 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 			}
 			arena_stats_unlock(tsdn, &arena->stats);
 		}
-		arena_nactive_add(arena, size >> LG_PAGE);
+		arena_nactive_add(arena, esize >> LG_PAGE);
+	}
+
+	if (edata != NULL && sz_large_pad != 0) {
+		arena_cache_oblivious_randomize(tsdn, arena, edata, alignment);
 	}
 
 	return edata;
@@ -1207,7 +1211,7 @@ arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 	zero = false;
 	slab = ecache_alloc_grow(tsdn, arena, ehooks, &arena->ecache_retained,
-	    NULL, bin_info->slab_size, 0, PAGE, true, szind, &zero);
+	    NULL, bin_info->slab_size, PAGE, true, szind, &zero);
 
 	if (config_stats && slab != NULL) {
 		arena_stats_mapped_add(tsdn, &arena->stats,
@@ -1227,10 +1231,10 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	szind_t szind = sz_size2index(bin_info->reg_size);
 	bool zero = false;
 	edata_t *slab = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_dirty,
-	    NULL, bin_info->slab_size, 0, PAGE, true, binind, &zero);
+	    NULL, bin_info->slab_size, PAGE, true, binind, &zero);
 	if (slab == NULL && arena_may_have_muzzy(arena)) {
 		slab = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_muzzy,
-		    NULL, bin_info->slab_size, 0, PAGE, true, binind, &zero);
+		    NULL, bin_info->slab_size, PAGE, true, binind, &zero);
 	}
 	if (slab == NULL) {
 		slab = arena_slab_alloc_hard(tsdn, arena, ehooks, bin_info,
diff --git a/src/extent.c b/src/extent.c
index 87dcec3f..54ac40b2 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -40,45 +40,19 @@ static atomic_zu_t highpages;
 
 static void extent_deregister(tsdn_t *tsdn, edata_t *edata);
 static edata_t *extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t usize, size_t pad, size_t alignment,
-    bool slab, szind_t szind, bool *zero, bool *commit, bool growing_retained);
+    ecache_t *ecache, void *new_addr, size_t usize, size_t alignment, bool slab,
+    szind_t szind, bool *zero, bool *commit, bool growing_retained);
 static edata_t *extent_try_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache,
     ehooks_t *ehooks, ecache_t *ecache, edata_t *edata, bool *coalesced,
     bool growing_retained);
 static void extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool growing_retained);
 static edata_t *extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
-    ehooks_t *ehooks, void *new_addr, size_t size, size_t pad, size_t alignment,
-    bool slab, szind_t szind, bool *zero, bool *commit);
+    ehooks_t *ehooks, void *new_addr, size_t size, size_t alignment, bool slab,
+    szind_t szind, bool *zero, bool *commit);
 
 /******************************************************************************/
 
-static void
-extent_addr_randomize(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
-    size_t alignment) {
-	assert(edata_base_get(edata) == edata_addr_get(edata));
-
-	if (alignment < PAGE) {
-		unsigned lg_range = LG_PAGE -
-		    lg_floor(CACHELINE_CEILING(alignment));
-		size_t r;
-		if (!tsdn_null(tsdn)) {
-			tsd_t *tsd = tsdn_tsd(tsdn);
-			r = (size_t)prng_lg_range_u64(
-			    tsd_prng_statep_get(tsd), lg_range);
-		} else {
-			uint64_t stack_value = (uint64_t)(uintptr_t)&r;
-			r = (size_t)prng_lg_range_u64(&stack_value, lg_range);
-		}
-		uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE -
-		    lg_range);
-		edata->e_addr = (void *)((uintptr_t)edata->e_addr +
-		    random_offset);
-		assert(ALIGNMENT_ADDR2BASE(edata->e_addr, alignment) ==
-		    edata->e_addr);
-	}
-}
-
 static bool
 extent_try_delayed_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache,
     ehooks_t *ehooks, ecache_t *ecache, edata_t *edata) {
@@ -97,32 +71,32 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache,
 
 edata_t *
 ecache_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
-    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    szind_t szind, bool *zero) {
-	assert(size + pad != 0);
+    void *new_addr, size_t size, size_t alignment, bool slab, szind_t szind,
+    bool *zero) {
+	assert(size != 0);
 	assert(alignment != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
 	bool commit = true;
 	edata_t *edata = extent_recycle(tsdn, arena, ehooks, ecache, new_addr,
-	    size, pad, alignment, slab, szind, zero, &commit, false);
+	    size, alignment, slab, szind, zero, &commit, false);
 	assert(edata == NULL || edata_dumpable_get(edata));
 	return edata;
 }
 
 edata_t *
 ecache_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
-    bool slab, szind_t szind, bool *zero) {
-	assert(size + pad != 0);
+    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
+    szind_t szind, bool *zero) {
+	assert(size != 0);
 	assert(alignment != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
 	bool commit = true;
 	edata_t *edata = extent_alloc_retained(tsdn, arena, ehooks, new_addr,
-	    size, pad, alignment, slab, szind, zero, &commit);
+	    size, alignment, slab, szind, zero, &commit);
 	if (edata == NULL) {
 		if (opt_retain && new_addr != NULL) {
 			/*
@@ -133,8 +107,8 @@ ecache_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			 */
 			return NULL;
 		}
-		edata = extent_alloc_wrapper(tsdn, arena, ehooks,
-		    new_addr, size, pad, alignment, slab, szind, zero, &commit);
+		edata = extent_alloc_wrapper(tsdn, arena, ehooks, new_addr,
+		    size, alignment, slab, szind, zero, &commit);
 	}
 
 	assert(edata == NULL || edata_dumpable_get(edata));
@@ -382,8 +356,8 @@ extent_deregister_no_gdump_sub(tsdn_t *tsdn, edata_t *edata) {
  */
 static edata_t *
 extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
-    bool slab, bool growing_retained) {
+    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
+    bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	assert(alignment > 0);
@@ -400,11 +374,9 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		 * course cannot be recycled).
 		 */
 		assert(PAGE_ADDR2BASE(new_addr) == new_addr);
-		assert(pad == 0);
 		assert(alignment <= PAGE);
 	}
 
-	size_t esize = size + pad;
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 	edata_t *edata;
 	if (new_addr != NULL) {
@@ -418,7 +390,7 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			edata_t *unlock_edata = edata;
 			assert(edata_base_get(edata) == new_addr);
 			if (edata_arena_ind_get(edata) != arena_ind_get(arena)
-			    || edata_size_get(edata) < esize
+			    || edata_size_get(edata) < size
 			    || edata_state_get(edata)
 			    != ecache->state) {
 				edata = NULL;
@@ -426,7 +398,7 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			emap_unlock_edata(tsdn, &emap_global, unlock_edata);
 		}
 	} else {
-		edata = eset_fit(&ecache->eset, esize, alignment,
+		edata = eset_fit(&ecache->eset, size, alignment,
 		    ecache->delay_coalesce);
 	}
 	if (edata == NULL) {
@@ -472,16 +444,15 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     edata_t **edata, edata_t **lead, edata_t **trail,
     /* The mess to clean up, in case of error. */
     edata_t **to_leak, edata_t **to_salvage,
-    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    szind_t szind, bool growing_retained) {
-	size_t esize = size + pad;
+    void *new_addr, size_t size, size_t alignment, bool slab, szind_t szind,
+    bool growing_retained) {
 	size_t leadsize = ALIGNMENT_CEILING((uintptr_t)edata_base_get(*edata),
 	    PAGE_CEILING(alignment)) - (uintptr_t)edata_base_get(*edata);
 	assert(new_addr == NULL || leadsize == 0);
-	if (edata_size_get(*edata) < leadsize + esize) {
+	if (edata_size_get(*edata) < leadsize + size) {
 		return extent_split_interior_cant_alloc;
 	}
-	size_t trailsize = edata_size_get(*edata) - leadsize - esize;
+	size_t trailsize = edata_size_get(*edata) - leadsize - size;
 
 	*lead = NULL;
 	*trail = NULL;
@@ -492,7 +463,7 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	if (leadsize != 0) {
 		*lead = *edata;
 		*edata = extent_split_impl(tsdn, &arena->edata_cache, ehooks,
-		    *lead, leadsize, SC_NSIZES, false, esize + trailsize, szind,
+		    *lead, leadsize, SC_NSIZES, false, size + trailsize, szind,
 		    slab, growing_retained);
 		if (*edata == NULL) {
 			*to_leak = *lead;
@@ -504,7 +475,7 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	/* Split the trail. */
 	if (trailsize != 0) {
 		*trail = extent_split_impl(tsdn, &arena->edata_cache, ehooks,
-		    *edata, esize, szind, slab, trailsize, SC_NSIZES, false,
+		    *edata, size, szind, slab, trailsize, SC_NSIZES, false,
 		    growing_retained);
 		if (*trail == NULL) {
 			*to_leak = *edata;
@@ -530,8 +501,8 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
  */
 static edata_t *
 extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t pad, size_t alignment,
-    bool slab, szind_t szind, edata_t *edata, bool growing_retained) {
+    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
+    szind_t szind, edata_t *edata, bool growing_retained) {
 	edata_t *lead;
 	edata_t *trail;
 	edata_t *to_leak JEMALLOC_CC_SILENCE_INIT(NULL);
@@ -539,7 +510,7 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 	extent_split_interior_result_t result = extent_split_interior(
 	    tsdn, arena, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage,
-	    new_addr, size, pad, alignment, slab, szind, growing_retained);
+	    new_addr, size, alignment, slab, szind, growing_retained);
 
 	if (!maps_coalesce && result != extent_split_interior_ok
 	    && !opt_retain) {
@@ -588,22 +559,21 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
  */
 static edata_t *
 extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
-    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    szind_t szind, bool *zero, bool *commit, bool growing_retained) {
+    void *new_addr, size_t size, size_t alignment, bool slab, szind_t szind,
+    bool *zero, bool *commit, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	assert(new_addr == NULL || !slab);
-	assert(pad == 0 || !slab);
 	assert(!*zero || !slab);
 
 	edata_t *edata = extent_recycle_extract(tsdn, arena, ehooks, ecache,
-	    new_addr, size, pad, alignment, slab, growing_retained);
+	    new_addr, size, alignment, slab, growing_retained);
 	if (edata == NULL) {
 		return NULL;
 	}
 
 	edata = extent_recycle_split(tsdn, arena, ehooks, ecache, new_addr,
-	    size, pad, alignment, slab, szind, edata, growing_retained);
+	    size, alignment, slab, szind, edata, growing_retained);
 	if (edata == NULL) {
 		return NULL;
 	}
@@ -624,9 +594,6 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 		*zero = true;
 	}
 
-	if (pad != 0) {
-		extent_addr_randomize(tsdn, arena, edata, alignment);
-	}
 	assert(edata_state_get(edata) == extent_state_active);
 	if (slab) {
 		edata_slab_set(edata, slab);
@@ -650,16 +617,14 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
  */
 static edata_t *
 extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    size_t size, size_t pad, size_t alignment, bool slab, szind_t szind,
+    size_t size, size_t alignment, bool slab, szind_t szind,
     bool *zero, bool *commit) {
 	malloc_mutex_assert_owner(tsdn, &arena->ecache_grow.mtx);
-	assert(pad == 0 || !slab);
 	assert(!*zero || !slab);
 
-	size_t esize = size + pad;
-	size_t alloc_size_min = esize + PAGE_CEILING(alignment) - PAGE;
+	size_t alloc_size_min = size + PAGE_CEILING(alignment) - PAGE;
 	/* Beware size_t wrap-around. */
-	if (alloc_size_min < esize) {
+	if (alloc_size_min < size) {
 		goto label_err;
 	}
 	/*
@@ -715,8 +680,8 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	edata_t *to_salvage JEMALLOC_CC_SILENCE_INIT(NULL);
 
 	extent_split_interior_result_t result = extent_split_interior(tsdn,
-	    arena, ehooks, &edata, &lead, &trail, &to_leak,
-	    &to_salvage, NULL, size, pad, alignment, slab, szind, true);
+	    arena, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage, NULL,
+	    size, alignment, slab, szind, true);
 
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
@@ -783,9 +748,6 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		/* Adjust gdump stats now that extent is final size. */
 		extent_gdump_add(tsdn, edata);
 	}
-	if (pad != 0) {
-		extent_addr_randomize(tsdn, arena, edata, alignment);
-	}
 	if (slab) {
 		edata_slab_set(edata, true);
 		emap_register_interior(tsdn, &emap_global, edata, szind);
@@ -804,23 +766,23 @@ label_err:
 
 static edata_t *
 extent_alloc_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
-    szind_t szind, bool *zero, bool *commit) {
+    void *new_addr, size_t size, size_t alignment, bool slab, szind_t szind,
+    bool *zero, bool *commit) {
 	assert(size != 0);
 	assert(alignment != 0);
 
 	malloc_mutex_lock(tsdn, &arena->ecache_grow.mtx);
 
 	edata_t *edata = extent_recycle(tsdn, arena, ehooks,
-	    &arena->ecache_retained, new_addr, size, pad, alignment, slab,
-	    szind, zero, commit, true);
+	    &arena->ecache_retained, new_addr, size, alignment, slab, szind,
+	    zero, commit, true);
 	if (edata != NULL) {
 		malloc_mutex_unlock(tsdn, &arena->ecache_grow.mtx);
 		if (config_prof) {
 			extent_gdump_add(tsdn, edata);
 		}
 	} else if (opt_retain && new_addr == NULL) {
-		edata = extent_grow_retained(tsdn, arena, ehooks, size, pad,
+		edata = extent_grow_retained(tsdn, arena, ehooks, size,
 		    alignment, slab, szind, zero, commit);
 		/* extent_grow_retained() always releases extent_grow_mtx. */
 	} else {
@@ -833,29 +795,25 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 edata_t *
 extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    void *new_addr, size_t size, size_t pad, size_t alignment, bool slab,
+    void *new_addr, size_t size, size_t alignment, bool slab,
     szind_t szind, bool *zero, bool *commit) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	size_t esize = size + pad;
 	edata_t *edata = edata_cache_get(tsdn, &arena->edata_cache);
 	if (edata == NULL) {
 		return NULL;
 	}
 	size_t palignment = ALIGNMENT_CEILING(alignment, PAGE);
-	void *addr = ehooks_alloc(tsdn, ehooks, new_addr, esize, palignment,
+	void *addr = ehooks_alloc(tsdn, ehooks, new_addr, size, palignment,
 	    zero, commit);
 	if (addr == NULL) {
 		edata_cache_put(tsdn, &arena->edata_cache, edata);
 		return NULL;
 	}
-	edata_init(edata, arena_ind_get(arena), addr, esize, slab, szind,
+	edata_init(edata, arena_ind_get(arena), addr, size, slab, szind,
 	    arena_extent_sn_next(arena), extent_state_active, *zero, *commit,
 	    true, EXTENT_NOT_HEAD);
-	if (pad != 0) {
-		extent_addr_randomize(tsdn, arena, edata, alignment);
-	}
 	if (extent_register(tsdn, edata)) {
 		edata_cache_put(tsdn, &arena->edata_cache, edata);
 		return NULL;
diff --git a/src/large.c b/src/large.c
index 8982d103..1899a463 100644
--- a/src/large.c
+++ b/src/large.c
@@ -120,10 +120,10 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 	edata_t *trail;
 	bool new_mapping;
 	if ((trail = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_dirty,
-	    edata_past_get(edata), trailsize, 0, CACHELINE, false, SC_NSIZES,
+	    edata_past_get(edata), trailsize, CACHELINE, false, SC_NSIZES,
 	    &is_zeroed_trail)) != NULL
 	    || (trail = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_muzzy,
-	    edata_past_get(edata), trailsize, 0, CACHELINE, false, SC_NSIZES,
+	    edata_past_get(edata), trailsize, CACHELINE, false, SC_NSIZES,
 	    &is_zeroed_trail)) != NULL) {
 		if (config_stats) {
 			new_mapping = false;
@@ -131,7 +131,7 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 	} else {
 		if ((trail = ecache_alloc_grow(tsdn, arena, ehooks,
 		    &arena->ecache_retained, edata_past_get(edata), trailsize,
-		    0, CACHELINE, false, SC_NSIZES, &is_zeroed_trail))
+		    CACHELINE, false, SC_NSIZES, &is_zeroed_trail))
 			== NULL) {
 			return true;
 		}

From a24faed56915df38c5ab67b66cefbb596c0e165c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sun, 8 Mar 2020 10:35:56 -0700
Subject: [PATCH 1620/2608] PA: Move in the ecache_t objects.

---
 include/jemalloc/internal/arena_structs.h |  12 +--
 include/jemalloc/internal/pa.h            |  13 +++
 src/arena.c                               | 112 ++++++++++++----------
 src/background_thread.c                   |   8 +-
 src/ctl.c                                 |   6 +-
 src/extent.c                              |  20 ++--
 src/large.c                               |  16 ++--
 7 files changed, 102 insertions(+), 85 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index fde540af..23fa424c 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -12,6 +12,7 @@
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/nstime.h"
+#include "jemalloc/internal/pa.h"
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/smoothstep.h"
@@ -150,15 +151,8 @@ struct arena_s {
 	/* Synchronizes all large allocation/update/deallocation. */
 	malloc_mutex_t		large_mtx;
 
-	/*
-	 * Collections of extents that were previously allocated.  These are
-	 * used when allocating extents, in an attempt to re-use address space.
-	 *
-	 * Synchronization: internal.
-	 */
-	ecache_t	ecache_dirty;
-	ecache_t	ecache_muzzy;
-	ecache_t	ecache_retained;
+	/* The page-level allocator shard this arena uses. */
+	pa_shard_t		pa_shard;
 
 	/*
 	 * Decay-based purging state, responsible for scheduling extent state
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 5146ae1a..4e73f10f 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -6,4 +6,17 @@
  * allocations.
  */
 
+typedef struct pa_shard_s pa_shard_t;
+struct pa_shard_s {
+	/*
+	 * Collections of extents that were previously allocated.  These are
+	 * used when allocating extents, in an attempt to re-use address space.
+	 *
+	 * Synchronization: internal.
+	 */
+	ecache_t ecache_dirty;
+	ecache_t ecache_muzzy;
+	ecache_t ecache_retained;
+};
+
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/src/arena.c b/src/arena.c
index f6876e35..d9932b13 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -74,8 +74,8 @@ arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	*dirty_decay_ms = arena_dirty_decay_ms_get(arena);
 	*muzzy_decay_ms = arena_muzzy_decay_ms_get(arena);
 	*nactive += atomic_load_zu(&arena->nactive, ATOMIC_RELAXED);
-	*ndirty += ecache_npages_get(&arena->ecache_dirty);
-	*nmuzzy += ecache_npages_get(&arena->ecache_muzzy);
+	*ndirty += ecache_npages_get(&arena->pa_shard.ecache_dirty);
+	*nmuzzy += ecache_npages_get(&arena->pa_shard.ecache_muzzy);
 }
 
 void
@@ -98,7 +98,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	arena_stats_accum_zu(&astats->mapped, base_mapped
 	    + arena_stats_read_zu(tsdn, &arena->stats, &arena->stats.mapped));
 	arena_stats_accum_zu(&astats->retained,
-	    ecache_npages_get(&arena->ecache_retained) << LG_PAGE);
+	    ecache_npages_get(&arena->pa_shard.ecache_retained) << LG_PAGE);
 
 	atomic_store_zu(&astats->edata_avail,
 	    atomic_load_zu(&arena->edata_cache.count, ATOMIC_RELAXED),
@@ -129,8 +129,8 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	arena_stats_accum_zu(&astats->metadata_thp, metadata_thp);
 	arena_stats_accum_zu(&astats->resident, base_resident +
 	    (((atomic_load_zu(&arena->nactive, ATOMIC_RELAXED) +
-	    ecache_npages_get(&arena->ecache_dirty) +
-	    ecache_npages_get(&arena->ecache_muzzy)) << LG_PAGE)));
+	    ecache_npages_get(&arena->pa_shard.ecache_dirty) +
+	    ecache_npages_get(&arena->pa_shard.ecache_muzzy)) << LG_PAGE)));
 	arena_stats_accum_zu(&astats->abandoned_vm, atomic_load_zu(
 	    &arena->stats.abandoned_vm, ATOMIC_RELAXED));
 
@@ -172,12 +172,16 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	for (pszind_t i = 0; i < SC_NPSIZES; i++) {
 		size_t dirty, muzzy, retained, dirty_bytes, muzzy_bytes,
 		    retained_bytes;
-		dirty = ecache_nextents_get(&arena->ecache_dirty, i);
-		muzzy = ecache_nextents_get(&arena->ecache_muzzy, i);
-		retained = ecache_nextents_get(&arena->ecache_retained, i);
-		dirty_bytes = ecache_nbytes_get(&arena->ecache_dirty, i);
-		muzzy_bytes = ecache_nbytes_get(&arena->ecache_muzzy, i);
-		retained_bytes = ecache_nbytes_get(&arena->ecache_retained, i);
+		dirty = ecache_nextents_get(&arena->pa_shard.ecache_dirty, i);
+		muzzy = ecache_nextents_get(&arena->pa_shard.ecache_muzzy, i);
+		retained = ecache_nextents_get(&arena->pa_shard.ecache_retained,
+		    i);
+		dirty_bytes = ecache_nbytes_get(&arena->pa_shard.ecache_dirty,
+		    i);
+		muzzy_bytes = ecache_nbytes_get(&arena->pa_shard.ecache_muzzy,
+		    i);
+		retained_bytes = ecache_nbytes_get(
+		    &arena->pa_shard.ecache_retained, i);
 
 		atomic_store_zu(&estats[i].ndirty, dirty, ATOMIC_RELAXED);
 		atomic_store_zu(&estats[i].nmuzzy, muzzy, ATOMIC_RELAXED);
@@ -226,11 +230,11 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	READ_ARENA_MUTEX_PROF_DATA(large_mtx, arena_prof_mutex_large);
 	READ_ARENA_MUTEX_PROF_DATA(edata_cache.mtx,
 	    arena_prof_mutex_extent_avail)
-	READ_ARENA_MUTEX_PROF_DATA(ecache_dirty.mtx,
+	READ_ARENA_MUTEX_PROF_DATA(pa_shard.ecache_dirty.mtx,
 	    arena_prof_mutex_extents_dirty)
-	READ_ARENA_MUTEX_PROF_DATA(ecache_muzzy.mtx,
+	READ_ARENA_MUTEX_PROF_DATA(pa_shard.ecache_muzzy.mtx,
 	    arena_prof_mutex_extents_muzzy)
-	READ_ARENA_MUTEX_PROF_DATA(ecache_retained.mtx,
+	READ_ARENA_MUTEX_PROF_DATA(pa_shard.ecache_retained.mtx,
 	    arena_prof_mutex_extents_retained)
 	READ_ARENA_MUTEX_PROF_DATA(decay_dirty.mtx,
 	    arena_prof_mutex_decay_dirty)
@@ -258,7 +262,8 @@ arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	ecache_dalloc(tsdn, arena, ehooks, &arena->ecache_dirty, edata);
+	ecache_dalloc(tsdn, arena, ehooks, &arena->pa_shard.ecache_dirty,
+	    edata);
 	if (arena_dirty_decay_ms_get(arena) == 0) {
 		arena_decay_dirty(tsdn, arena, false, true);
 	} else {
@@ -434,16 +439,18 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	szind_t szind = sz_size2index(usize);
 	size_t mapped_add;
 	size_t esize = usize + sz_large_pad;
-	edata_t *edata = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_dirty,
-	    NULL, esize, alignment, false, szind, zero);
+	edata_t *edata = ecache_alloc(tsdn, arena, ehooks,
+	    &arena->pa_shard.ecache_dirty, NULL, esize, alignment, false, szind,
+	    zero);
 	if (edata == NULL && arena_may_have_muzzy(arena)) {
-		edata = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_muzzy,
-		    NULL, esize, alignment, false, szind, zero);
+		edata = ecache_alloc(tsdn, arena, ehooks,
+		    &arena->pa_shard.ecache_muzzy, NULL, esize, alignment,
+		    false, szind, zero);
 	}
 	if (edata == NULL) {
 		edata = ecache_alloc_grow(tsdn, arena, ehooks,
-		    &arena->ecache_retained, NULL, esize, alignment, false,
-		    szind, zero);
+		    &arena->pa_shard.ecache_retained, NULL, esize, alignment,
+		    false, szind, zero);
 		if (config_stats) {
 			/*
 			 * edata may be NULL on OOM, but in that case mapped_add
@@ -808,14 +815,14 @@ bool
 arena_dirty_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
     ssize_t decay_ms) {
 	return arena_decay_ms_set(tsdn, arena, &arena->decay_dirty,
-	    &arena->ecache_dirty, decay_ms);
+	    &arena->pa_shard.ecache_dirty, decay_ms);
 }
 
 bool
 arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
     ssize_t decay_ms) {
 	return arena_decay_ms_set(tsdn, arena, &arena->decay_muzzy,
-	    &arena->ecache_muzzy, decay_ms);
+	    &arena->pa_shard.ecache_muzzy, decay_ms);
 }
 
 static size_t
@@ -867,7 +874,7 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			    !extent_purge_lazy_wrapper(tsdn, arena,
 			    ehooks, edata, 0, edata_size_get(edata))) {
 				ecache_dalloc(tsdn, arena, ehooks,
-				    &arena->ecache_muzzy, edata);
+				    &arena->pa_shard.ecache_muzzy, edata);
 				arena_background_thread_inactivity_check(tsdn,
 				    arena, is_background_thread);
 				break;
@@ -978,18 +985,18 @@ static bool
 arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
     bool all) {
 	return arena_decay_impl(tsdn, arena, &arena->decay_dirty,
-	    &arena->ecache_dirty, is_background_thread, all);
+	    &arena->pa_shard.ecache_dirty, is_background_thread, all);
 }
 
 static bool
 arena_decay_muzzy(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
     bool all) {
-	if (ecache_npages_get(&arena->ecache_muzzy) == 0 &&
+	if (ecache_npages_get(&arena->pa_shard.ecache_muzzy) == 0 &&
 	    arena_muzzy_decay_ms_get(arena) <= 0) {
 		return false;
 	}
 	return arena_decay_impl(tsdn, arena, &arena->decay_muzzy,
-	    &arena->ecache_muzzy, is_background_thread, all);
+	    &arena->pa_shard.ecache_muzzy, is_background_thread, all);
 }
 
 void
@@ -1159,7 +1166,7 @@ arena_destroy_retained(tsdn_t *tsdn, arena_t *arena) {
 	ehooks_t *ehooks = arena_get_ehooks(arena);
 	edata_t *edata;
 	while ((edata = ecache_evict(tsdn, arena, ehooks,
-	    &arena->ecache_retained, 0)) != NULL) {
+	    &arena->pa_shard.ecache_retained, 0)) != NULL) {
 		extent_destroy_wrapper(tsdn, arena, ehooks, edata);
 	}
 }
@@ -1175,8 +1182,8 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
 	 * Furthermore, the caller (arena_i_destroy_ctl()) purged all cached
 	 * extents, so only retained extents may remain.
 	 */
-	assert(ecache_npages_get(&arena->ecache_dirty) == 0);
-	assert(ecache_npages_get(&arena->ecache_muzzy) == 0);
+	assert(ecache_npages_get(&arena->pa_shard.ecache_dirty) == 0);
+	assert(ecache_npages_get(&arena->pa_shard.ecache_muzzy) == 0);
 
 	/* Deallocate retained memory. */
 	arena_destroy_retained(tsd_tsdn(tsd), arena);
@@ -1210,8 +1217,9 @@ arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    WITNESS_RANK_CORE, 0);
 
 	zero = false;
-	slab = ecache_alloc_grow(tsdn, arena, ehooks, &arena->ecache_retained,
-	    NULL, bin_info->slab_size, PAGE, true, szind, &zero);
+	slab = ecache_alloc_grow(tsdn, arena, ehooks,
+	    &arena->pa_shard.ecache_retained, NULL, bin_info->slab_size, PAGE,
+	    true, szind, &zero);
 
 	if (config_stats && slab != NULL) {
 		arena_stats_mapped_add(tsdn, &arena->stats,
@@ -1230,11 +1238,13 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	ehooks_t *ehooks = arena_get_ehooks(arena);
 	szind_t szind = sz_size2index(bin_info->reg_size);
 	bool zero = false;
-	edata_t *slab = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_dirty,
-	    NULL, bin_info->slab_size, PAGE, true, binind, &zero);
+	edata_t *slab = ecache_alloc(tsdn, arena, ehooks,
+	    &arena->pa_shard.ecache_dirty, NULL, bin_info->slab_size, PAGE,
+	    true, binind, &zero);
 	if (slab == NULL && arena_may_have_muzzy(arena)) {
-		slab = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_muzzy,
-		    NULL, bin_info->slab_size, PAGE, true, binind, &zero);
+		slab = ecache_alloc(tsdn, arena, ehooks,
+		    &arena->pa_shard.ecache_muzzy, NULL, bin_info->slab_size,
+		    PAGE, true, binind, &zero);
 	}
 	if (slab == NULL) {
 		slab = arena_slab_alloc_hard(tsdn, arena, ehooks, bin_info,
@@ -2023,16 +2033,16 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 * are likely to be reused soon after deallocation, and the cost of
 	 * merging/splitting extents is non-trivial.
 	 */
-	if (ecache_init(tsdn, &arena->ecache_dirty, extent_state_dirty, ind,
-	    true)) {
+	if (ecache_init(tsdn, &arena->pa_shard.ecache_dirty, extent_state_dirty,
+	    ind, true)) {
 		goto label_error;
 	}
 	/*
 	 * Coalesce muzzy extents immediately, because operations on them are in
 	 * the critical path much less often than for dirty extents.
 	 */
-	if (ecache_init(tsdn, &arena->ecache_muzzy, extent_state_muzzy, ind,
-	    false)) {
+	if (ecache_init(tsdn, &arena->pa_shard.ecache_muzzy, extent_state_muzzy,
+	    ind, false)) {
 		goto label_error;
 	}
 	/*
@@ -2041,8 +2051,8 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 * coalescing), but also because operations on retained extents are not
 	 * in the critical path.
 	 */
-	if (ecache_init(tsdn, &arena->ecache_retained, extent_state_retained,
-	    ind, false)) {
+	if (ecache_init(tsdn, &arena->pa_shard.ecache_retained,
+	    extent_state_retained, ind, false)) {
 		goto label_error;
 	}
 
@@ -2198,9 +2208,9 @@ arena_prefork2(tsdn_t *tsdn, arena_t *arena) {
 
 void
 arena_prefork3(tsdn_t *tsdn, arena_t *arena) {
-	ecache_prefork(tsdn, &arena->ecache_dirty);
-	ecache_prefork(tsdn, &arena->ecache_muzzy);
-	ecache_prefork(tsdn, &arena->ecache_retained);
+	ecache_prefork(tsdn, &arena->pa_shard.ecache_dirty);
+	ecache_prefork(tsdn, &arena->pa_shard.ecache_muzzy);
+	ecache_prefork(tsdn, &arena->pa_shard.ecache_retained);
 }
 
 void
@@ -2240,9 +2250,9 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 	malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
 	base_postfork_parent(tsdn, arena->base);
 	edata_cache_postfork_parent(tsdn, &arena->edata_cache);
-	ecache_postfork_parent(tsdn, &arena->ecache_dirty);
-	ecache_postfork_parent(tsdn, &arena->ecache_muzzy);
-	ecache_postfork_parent(tsdn, &arena->ecache_retained);
+	ecache_postfork_parent(tsdn, &arena->pa_shard.ecache_dirty);
+	ecache_postfork_parent(tsdn, &arena->pa_shard.ecache_muzzy);
+	ecache_postfork_parent(tsdn, &arena->pa_shard.ecache_retained);
 	ecache_grow_postfork_parent(tsdn, &arena->ecache_grow);
 	malloc_mutex_postfork_parent(tsdn, &arena->decay_dirty.mtx);
 	malloc_mutex_postfork_parent(tsdn, &arena->decay_muzzy.mtx);
@@ -2286,9 +2296,9 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
 	base_postfork_child(tsdn, arena->base);
 	edata_cache_postfork_child(tsdn, &arena->edata_cache);
-	ecache_postfork_child(tsdn, &arena->ecache_dirty);
-	ecache_postfork_child(tsdn, &arena->ecache_muzzy);
-	ecache_postfork_child(tsdn, &arena->ecache_retained);
+	ecache_postfork_child(tsdn, &arena->pa_shard.ecache_dirty);
+	ecache_postfork_child(tsdn, &arena->pa_shard.ecache_muzzy);
+	ecache_postfork_child(tsdn, &arena->pa_shard.ecache_retained);
 	ecache_grow_postfork_child(tsdn, &arena->ecache_grow);
 	malloc_mutex_postfork_child(tsdn, &arena->decay_dirty.mtx);
 	malloc_mutex_postfork_child(tsdn, &arena->decay_muzzy.mtx);
diff --git a/src/background_thread.c b/src/background_thread.c
index ca06be02..ddfe3a35 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -202,12 +202,12 @@ static uint64_t
 arena_decay_compute_purge_interval(tsdn_t *tsdn, arena_t *arena) {
 	uint64_t i1, i2;
 	i1 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_dirty,
-	    &arena->ecache_dirty);
+	    &arena->pa_shard.ecache_dirty);
 	if (i1 == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
 		return i1;
 	}
 	i2 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_muzzy,
-	    &arena->ecache_muzzy);
+	    &arena->pa_shard.ecache_muzzy);
 
 	return i1 < i2 ? i1 : i2;
 }
@@ -717,8 +717,8 @@ background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
 	if (info->npages_to_purge_new > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
 		should_signal = true;
 	} else if (unlikely(background_thread_indefinite_sleep(info)) &&
-	    (ecache_npages_get(&arena->ecache_dirty) > 0 ||
-	    ecache_npages_get(&arena->ecache_muzzy) > 0 ||
+	    (ecache_npages_get(&arena->pa_shard.ecache_dirty) > 0 ||
+	    ecache_npages_get(&arena->pa_shard.ecache_muzzy) > 0 ||
 	    info->npages_to_purge_new > 0)) {
 		should_signal = true;
 	} else {
diff --git a/src/ctl.c b/src/ctl.c
index 86ac83e1..1c180696 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3073,9 +3073,9 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
 		}
 		MUTEX_PROF_RESET(arena->large_mtx);
 		MUTEX_PROF_RESET(arena->edata_cache.mtx);
-		MUTEX_PROF_RESET(arena->ecache_dirty.mtx);
-		MUTEX_PROF_RESET(arena->ecache_muzzy.mtx);
-		MUTEX_PROF_RESET(arena->ecache_retained.mtx);
+		MUTEX_PROF_RESET(arena->pa_shard.ecache_dirty.mtx);
+		MUTEX_PROF_RESET(arena->pa_shard.ecache_muzzy.mtx);
+		MUTEX_PROF_RESET(arena->pa_shard.ecache_retained.mtx);
 		MUTEX_PROF_RESET(arena->decay_dirty.mtx);
 		MUTEX_PROF_RESET(arena->decay_muzzy.mtx);
 		MUTEX_PROF_RESET(arena->tcache_ql_mtx);
diff --git a/src/extent.c b/src/extent.c
index 54ac40b2..d684388d 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -686,11 +686,11 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
 			extent_record(tsdn, arena, ehooks,
-			    &arena->ecache_retained, lead, true);
+			    &arena->pa_shard.ecache_retained, lead, true);
 		}
 		if (trail != NULL) {
 			extent_record(tsdn, arena, ehooks,
-			    &arena->ecache_retained, trail, true);
+			    &arena->pa_shard.ecache_retained, trail, true);
 		}
 	} else {
 		/*
@@ -703,12 +703,12 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 				extent_gdump_add(tsdn, to_salvage);
 			}
 			extent_record(tsdn, arena, ehooks,
-			    &arena->ecache_retained, to_salvage, true);
+			    &arena->pa_shard.ecache_retained, to_salvage, true);
 		}
 		if (to_leak != NULL) {
 			extent_deregister_no_gdump_sub(tsdn, to_leak);
 			extents_abandon_vm(tsdn, arena, ehooks,
-			    &arena->ecache_retained, to_leak, true);
+			    &arena->pa_shard.ecache_retained, to_leak, true);
 		}
 		goto label_err;
 	}
@@ -717,7 +717,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		if (extent_commit_impl(tsdn, ehooks, edata, 0,
 		    edata_size_get(edata), true)) {
 			extent_record(tsdn, arena, ehooks,
-			    &arena->ecache_retained, edata, true);
+			    &arena->pa_shard.ecache_retained, edata, true);
 			goto label_err;
 		}
 		/* A successful commit should return zeroed memory. */
@@ -774,8 +774,8 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	malloc_mutex_lock(tsdn, &arena->ecache_grow.mtx);
 
 	edata_t *edata = extent_recycle(tsdn, arena, ehooks,
-	    &arena->ecache_retained, new_addr, size, alignment, slab, szind,
-	    zero, commit, true);
+	    &arena->pa_shard.ecache_retained, new_addr, size, alignment, slab,
+	    szind, zero, commit, true);
 	if (edata != NULL) {
 		malloc_mutex_unlock(tsdn, &arena->ecache_grow.mtx);
 		if (config_prof) {
@@ -974,7 +974,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 		edata = extent_try_coalesce(tsdn, &arena->edata_cache, ehooks,
 		    ecache, edata, NULL, growing_retained);
 	} else if (edata_size_get(edata) >= SC_LARGE_MINCLASS) {
-		assert(ecache == &arena->ecache_dirty);
+		assert(ecache == &arena->pa_shard.ecache_dirty);
 		/* Always coalesce large extents eagerly. */
 		bool coalesced;
 		do {
@@ -1076,8 +1076,8 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		extent_gdump_sub(tsdn, edata);
 	}
 
-	extent_record(tsdn, arena, ehooks, &arena->ecache_retained, edata,
-	    false);
+	extent_record(tsdn, arena, ehooks, &arena->pa_shard.ecache_retained,
+	    edata, false);
 }
 
 void
diff --git a/src/large.c b/src/large.c
index 1899a463..24ff3be7 100644
--- a/src/large.c
+++ b/src/large.c
@@ -119,19 +119,19 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 	bool is_zeroed_trail = zero;
 	edata_t *trail;
 	bool new_mapping;
-	if ((trail = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_dirty,
-	    edata_past_get(edata), trailsize, CACHELINE, false, SC_NSIZES,
-	    &is_zeroed_trail)) != NULL
-	    || (trail = ecache_alloc(tsdn, arena, ehooks, &arena->ecache_muzzy,
-	    edata_past_get(edata), trailsize, CACHELINE, false, SC_NSIZES,
-	    &is_zeroed_trail)) != NULL) {
+	if ((trail = ecache_alloc(tsdn, arena, ehooks,
+	    &arena->pa_shard.ecache_dirty, edata_past_get(edata), trailsize,
+	    CACHELINE, false, SC_NSIZES, &is_zeroed_trail)) != NULL
+	    || (trail = ecache_alloc(tsdn, arena, ehooks,
+	    &arena->pa_shard.ecache_muzzy, edata_past_get(edata), trailsize,
+	    CACHELINE, false, SC_NSIZES, &is_zeroed_trail)) != NULL) {
 		if (config_stats) {
 			new_mapping = false;
 		}
 	} else {
 		if ((trail = ecache_alloc_grow(tsdn, arena, ehooks,
-		    &arena->ecache_retained, edata_past_get(edata), trailsize,
-		    CACHELINE, false, SC_NSIZES, &is_zeroed_trail))
+		    &arena->pa_shard.ecache_retained, edata_past_get(edata),
+		    trailsize, CACHELINE, false, SC_NSIZES, &is_zeroed_trail))
 			== NULL) {
 			return true;
 		}

From 8433ad84eaac3b7ecb6ee01256ccb5766708ae3a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sun, 8 Mar 2020 11:19:41 -0700
Subject: [PATCH 1621/2608] PA: move in shard initialization.

---
 include/jemalloc/internal/pa.h |  3 +++
 src/arena.c                    | 27 +--------------------------
 src/pa.c                       | 33 +++++++++++++++++++++++++++++++++
 3 files changed, 37 insertions(+), 26 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 4e73f10f..d3f85142 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -19,4 +19,7 @@ struct pa_shard_s {
 	ecache_t ecache_retained;
 };
 
+/* Returns true on error. */
+bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, unsigned ind);
+
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/src/arena.c b/src/arena.c
index d9932b13..23f19883 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -2027,32 +2027,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		goto label_error;
 	}
 
-	/*
-	 * Delay coalescing for dirty extents despite the disruptive effect on
-	 * memory layout for best-fit extent allocation, since cached extents
-	 * are likely to be reused soon after deallocation, and the cost of
-	 * merging/splitting extents is non-trivial.
-	 */
-	if (ecache_init(tsdn, &arena->pa_shard.ecache_dirty, extent_state_dirty,
-	    ind, true)) {
-		goto label_error;
-	}
-	/*
-	 * Coalesce muzzy extents immediately, because operations on them are in
-	 * the critical path much less often than for dirty extents.
-	 */
-	if (ecache_init(tsdn, &arena->pa_shard.ecache_muzzy, extent_state_muzzy,
-	    ind, false)) {
-		goto label_error;
-	}
-	/*
-	 * Coalesce retained extents immediately, in part because they will
-	 * never be evicted (and therefore there's no opportunity for delayed
-	 * coalescing), but also because operations on retained extents are not
-	 * in the critical path.
-	 */
-	if (ecache_init(tsdn, &arena->pa_shard.ecache_retained,
-	    extent_state_retained, ind, false)) {
+	if (pa_shard_init(tsdn, &arena->pa_shard, ind)) {
 		goto label_error;
 	}
 
diff --git a/src/pa.c b/src/pa.c
index 3a26b393..620bf768 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -1,2 +1,35 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
+
+bool
+pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, unsigned ind) {
+	/*
+	 * Delay coalescing for dirty extents despite the disruptive effect on
+	 * memory layout for best-fit extent allocation, since cached extents
+	 * are likely to be reused soon after deallocation, and the cost of
+	 * merging/splitting extents is non-trivial.
+	 */
+	if (ecache_init(tsdn, &shard->ecache_dirty, extent_state_dirty, ind,
+	    /* delay_coalesce */ true)) {
+		return true;
+	}
+	/*
+	 * Coalesce muzzy extents immediately, because operations on them are in
+	 * the critical path much less often than for dirty extents.
+	 */
+	if (ecache_init(tsdn, &shard->ecache_muzzy, extent_state_muzzy, ind,
+	    /* delay_coalesce */ false)) {
+		return true;
+	}
+	/*
+	 * Coalesce retained extents immediately, in part because they will
+	 * never be evicted (and therefore there's no opportunity for delayed
+	 * coalescing), but also because operations on retained extents are not
+	 * in the critical path.
+	 */
+	if (ecache_init(tsdn, &shard->ecache_retained, extent_state_retained,
+	    ind, /* delay_coalesce */ false)) {
+		return true;
+	}
+	return false;
+}

From 688fb3eb8959db178922476ffcfa5e94a82c1511 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sun, 8 Mar 2020 11:41:19 -0700
Subject: [PATCH 1622/2608] PA: Move in the arena edata_cache.

---
 include/jemalloc/internal/arena_structs.h |  3 --
 include/jemalloc/internal/pa.h            |  5 ++-
 src/arena.c                               | 16 ++++-----
 src/ctl.c                                 |  2 +-
 src/extent.c                              | 42 +++++++++++------------
 src/extent_dss.c                          |  6 ++--
 src/large.c                               | 11 +++---
 src/pa.c                                  |  8 ++++-
 8 files changed, 48 insertions(+), 45 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index 23fa424c..dc4e3268 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -166,9 +166,6 @@ struct arena_s {
 	/* The grow info for the retained ecache. */
 	ecache_grow_t		ecache_grow;
 
-	/* The source of edata_t objects. */
-	edata_cache_t		edata_cache;
-
 	/*
 	 * bins is used to store heaps of free regions.
 	 *
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index d3f85142..6bc5e338 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -17,9 +17,12 @@ struct pa_shard_s {
 	ecache_t ecache_dirty;
 	ecache_t ecache_muzzy;
 	ecache_t ecache_retained;
+
+	/* The source of edata_t objects. */
+	edata_cache_t edata_cache;
 };
 
 /* Returns true on error. */
-bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, unsigned ind);
+bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind);
 
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/src/arena.c b/src/arena.c
index 23f19883..55a64c74 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -101,7 +101,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	    ecache_npages_get(&arena->pa_shard.ecache_retained) << LG_PAGE);
 
 	atomic_store_zu(&astats->edata_avail,
-	    atomic_load_zu(&arena->edata_cache.count, ATOMIC_RELAXED),
+	    atomic_load_zu(&arena->pa_shard.edata_cache.count, ATOMIC_RELAXED),
 	    ATOMIC_RELAXED);
 
 	arena_stats_accum_u64(&astats->decay_dirty.npurge,
@@ -228,7 +228,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 
 	/* Gather per arena mutex profiling data. */
 	READ_ARENA_MUTEX_PROF_DATA(large_mtx, arena_prof_mutex_large);
-	READ_ARENA_MUTEX_PROF_DATA(edata_cache.mtx,
+	READ_ARENA_MUTEX_PROF_DATA(pa_shard.edata_cache.mtx,
 	    arena_prof_mutex_extent_avail)
 	READ_ARENA_MUTEX_PROF_DATA(pa_shard.ecache_dirty.mtx,
 	    arena_prof_mutex_extents_dirty)
@@ -2027,7 +2027,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		goto label_error;
 	}
 
-	if (pa_shard_init(tsdn, &arena->pa_shard, ind)) {
+	if (pa_shard_init(tsdn, &arena->pa_shard, base, ind)) {
 		goto label_error;
 	}
 
@@ -2044,10 +2044,6 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		goto label_error;
 	}
 
-	if (edata_cache_init(&arena->edata_cache, base)) {
-		goto label_error;
-	}
-
 	/* Initialize bins. */
 	uintptr_t bin_addr = (uintptr_t)arena + sizeof(arena_t);
 	atomic_store_u(&arena->binshard_next, 0, ATOMIC_RELEASE);
@@ -2190,7 +2186,7 @@ arena_prefork3(tsdn_t *tsdn, arena_t *arena) {
 
 void
 arena_prefork4(tsdn_t *tsdn, arena_t *arena) {
-	edata_cache_prefork(tsdn, &arena->edata_cache);
+	edata_cache_prefork(tsdn, &arena->pa_shard.edata_cache);
 }
 
 void
@@ -2224,7 +2220,7 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 	}
 	malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
 	base_postfork_parent(tsdn, arena->base);
-	edata_cache_postfork_parent(tsdn, &arena->edata_cache);
+	edata_cache_postfork_parent(tsdn, &arena->pa_shard.edata_cache);
 	ecache_postfork_parent(tsdn, &arena->pa_shard.ecache_dirty);
 	ecache_postfork_parent(tsdn, &arena->pa_shard.ecache_muzzy);
 	ecache_postfork_parent(tsdn, &arena->pa_shard.ecache_retained);
@@ -2270,7 +2266,7 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	}
 	malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
 	base_postfork_child(tsdn, arena->base);
-	edata_cache_postfork_child(tsdn, &arena->edata_cache);
+	edata_cache_postfork_child(tsdn, &arena->pa_shard.edata_cache);
 	ecache_postfork_child(tsdn, &arena->pa_shard.ecache_dirty);
 	ecache_postfork_child(tsdn, &arena->pa_shard.ecache_muzzy);
 	ecache_postfork_child(tsdn, &arena->pa_shard.ecache_retained);
diff --git a/src/ctl.c b/src/ctl.c
index 1c180696..31277ae1 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3072,7 +3072,7 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
 			continue;
 		}
 		MUTEX_PROF_RESET(arena->large_mtx);
-		MUTEX_PROF_RESET(arena->edata_cache.mtx);
+		MUTEX_PROF_RESET(arena->pa_shard.edata_cache.mtx);
 		MUTEX_PROF_RESET(arena->pa_shard.ecache_dirty.mtx);
 		MUTEX_PROF_RESET(arena->pa_shard.ecache_muzzy.mtx);
 		MUTEX_PROF_RESET(arena->pa_shard.ecache_retained.mtx);
diff --git a/src/extent.c b/src/extent.c
index d684388d..ae62070c 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -157,8 +157,8 @@ ecache_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 			break;
 		}
 		/* Try to coalesce. */
-		if (extent_try_delayed_coalesce(tsdn, &arena->edata_cache,
-		    ehooks, ecache, edata)) {
+		if (extent_try_delayed_coalesce(tsdn,
+		    &arena->pa_shard.edata_cache, ehooks, ecache, edata)) {
 			break;
 		}
 		/*
@@ -212,7 +212,7 @@ extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			    edata_size_get(edata), growing_retained);
 		}
 	}
-	edata_cache_put(tsdn, &arena->edata_cache, edata);
+	edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
 }
 
 static void
@@ -462,9 +462,9 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	/* Split the lead. */
 	if (leadsize != 0) {
 		*lead = *edata;
-		*edata = extent_split_impl(tsdn, &arena->edata_cache, ehooks,
-		    *lead, leadsize, SC_NSIZES, false, size + trailsize, szind,
-		    slab, growing_retained);
+		*edata = extent_split_impl(tsdn, &arena->pa_shard.edata_cache,
+		    ehooks, *lead, leadsize, SC_NSIZES, false, size + trailsize,
+		    szind, slab, growing_retained);
 		if (*edata == NULL) {
 			*to_leak = *lead;
 			*lead = NULL;
@@ -474,9 +474,9 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 	/* Split the trail. */
 	if (trailsize != 0) {
-		*trail = extent_split_impl(tsdn, &arena->edata_cache, ehooks,
-		    *edata, size, szind, slab, trailsize, SC_NSIZES, false,
-		    growing_retained);
+		*trail = extent_split_impl(tsdn, &arena->pa_shard.edata_cache,
+		    ehooks, *edata, size, szind, slab, trailsize, SC_NSIZES,
+		    false, growing_retained);
 		if (*trail == NULL) {
 			*to_leak = *edata;
 			*to_salvage = *lead;
@@ -643,7 +643,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		alloc_size = sz_pind2sz(arena->ecache_grow.next + egn_skip);
 	}
 
-	edata_t *edata = edata_cache_get(tsdn, &arena->edata_cache);
+	edata_t *edata = edata_cache_get(tsdn, &arena->pa_shard.edata_cache);
 	if (edata == NULL) {
 		goto label_err;
 	}
@@ -654,7 +654,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    &committed);
 
 	if (ptr == NULL) {
-		edata_cache_put(tsdn, &arena->edata_cache, edata);
+		edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
 		goto label_err;
 	}
 
@@ -663,7 +663,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    committed, true, EXTENT_IS_HEAD);
 
 	if (extent_register_no_gdump_add(tsdn, edata)) {
-		edata_cache_put(tsdn, &arena->edata_cache, edata);
+		edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
 		goto label_err;
 	}
 
@@ -800,7 +800,7 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	edata_t *edata = edata_cache_get(tsdn, &arena->edata_cache);
+	edata_t *edata = edata_cache_get(tsdn, &arena->pa_shard.edata_cache);
 	if (edata == NULL) {
 		return NULL;
 	}
@@ -808,14 +808,14 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	void *addr = ehooks_alloc(tsdn, ehooks, new_addr, size, palignment,
 	    zero, commit);
 	if (addr == NULL) {
-		edata_cache_put(tsdn, &arena->edata_cache, edata);
+		edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
 		return NULL;
 	}
 	edata_init(edata, arena_ind_get(arena), addr, size, slab, szind,
 	    arena_extent_sn_next(arena), extent_state_active, *zero, *commit,
 	    true, EXTENT_NOT_HEAD);
 	if (extent_register(tsdn, edata)) {
-		edata_cache_put(tsdn, &arena->edata_cache, edata);
+		edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
 		return NULL;
 	}
 
@@ -971,8 +971,8 @@ extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 	emap_assert_mapped(tsdn, &emap_global, edata);
 
 	if (!ecache->delay_coalesce) {
-		edata = extent_try_coalesce(tsdn, &arena->edata_cache, ehooks,
-		    ecache, edata, NULL, growing_retained);
+		edata = extent_try_coalesce(tsdn, &arena->pa_shard.edata_cache,
+		    ehooks, ecache, edata, NULL, growing_retained);
 	} else if (edata_size_get(edata) >= SC_LARGE_MINCLASS) {
 		assert(ecache == &arena->pa_shard.ecache_dirty);
 		/* Always coalesce large extents eagerly. */
@@ -980,7 +980,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 		do {
 			assert(edata_state_get(edata) == extent_state_active);
 			edata = extent_try_coalesce_large(tsdn,
-			    &arena->edata_cache, ehooks, ecache, edata,
+			    &arena->pa_shard.edata_cache, ehooks, ecache, edata,
 			    &coalesced, growing_retained);
 		} while (coalesced);
 		if (edata_size_get(edata) >= oversize_threshold &&
@@ -1004,7 +1004,7 @@ extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
 	    WITNESS_RANK_CORE, 0);
 
 	if (extent_register(tsdn, edata)) {
-		edata_cache_put(tsdn, &arena->edata_cache, edata);
+		edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
 		return;
 	}
 	extent_dalloc_wrapper(tsdn, arena, ehooks, edata);
@@ -1027,7 +1027,7 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    edata_size_get(edata), edata_committed_get(edata));
 
 	if (!err) {
-		edata_cache_put(tsdn, &arena->edata_cache, edata);
+		edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
 	}
 
 	return err;
@@ -1097,7 +1097,7 @@ extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	ehooks_destroy(tsdn, ehooks, edata_base_get(edata),
 	    edata_size_get(edata), edata_committed_get(edata));
 
-	edata_cache_put(tsdn, &arena->edata_cache, edata);
+	edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
 }
 
 static bool
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 9cf098ea..d125c439 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -123,7 +123,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 		return NULL;
 	}
 
-	gap = edata_cache_get(tsdn, &arena->edata_cache);
+	gap = edata_cache_get(tsdn, &arena->pa_shard.edata_cache);
 	if (gap == NULL) {
 		return NULL;
 	}
@@ -189,7 +189,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					extent_dalloc_gap(tsdn, arena, gap);
 				} else {
 					edata_cache_put(tsdn,
-					    &arena->edata_cache, gap);
+					    &arena->pa_shard.edata_cache, gap);
 				}
 				if (!*commit) {
 					*commit = pages_decommit(ret, size);
@@ -225,7 +225,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 	}
 label_oom:
 	extent_dss_extending_finish();
-	edata_cache_put(tsdn, &arena->edata_cache, gap);
+	edata_cache_put(tsdn, &arena->pa_shard.edata_cache, gap);
 	return NULL;
 }
 
diff --git a/src/large.c b/src/large.c
index 24ff3be7..fa03a50e 100644
--- a/src/large.c
+++ b/src/large.c
@@ -81,9 +81,10 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, edata_t *edata, size_t usize) {
 
 	/* Split excess pages. */
 	if (diff != 0) {
-		edata_t *trail = extent_split_wrapper(tsdn, &arena->edata_cache,
-		    ehooks, edata, usize + sz_large_pad, sz_size2index(usize),
-		    false, diff, SC_NSIZES, false);
+		edata_t *trail = extent_split_wrapper(tsdn,
+		    &arena->pa_shard.edata_cache, ehooks, edata,
+		    usize + sz_large_pad, sz_size2index(usize), false, diff,
+		    SC_NSIZES, false);
 		if (trail == NULL) {
 			return true;
 		}
@@ -140,8 +141,8 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 		}
 	}
 
-	if (extent_merge_wrapper(tsdn, ehooks, &arena->edata_cache, edata,
-	    trail)) {
+	if (extent_merge_wrapper(tsdn, ehooks, &arena->pa_shard.edata_cache,
+	    edata, trail)) {
 		extent_dalloc_wrapper(tsdn, arena, ehooks, trail);
 		return true;
 	}
diff --git a/src/pa.c b/src/pa.c
index 620bf768..6db623b5 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -2,7 +2,9 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 bool
-pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, unsigned ind) {
+pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind) {
+	/* This will change eventually, but for now it should hold. */
+	assert(base_ind_get(base) == ind);
 	/*
 	 * Delay coalescing for dirty extents despite the disruptive effect on
 	 * memory layout for best-fit extent allocation, since cached extents
@@ -31,5 +33,9 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, unsigned ind) {
 	    ind, /* delay_coalesce */ false)) {
 		return true;
 	}
+	if (edata_cache_init(&shard->edata_cache, base)) {
+		return true;
+	}
+
 	return false;
 }

From 32cb7c2f0b4da21ed2b98b8fde7bba86309d1acd Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sun, 8 Mar 2020 13:08:15 -0700
Subject: [PATCH 1623/2608] PA: Add a stats type.

---
 include/jemalloc/internal/arena_stats.h |  9 +++++++--
 include/jemalloc/internal/pa.h          | 14 +++++++++++++-
 src/arena.c                             |  8 +++++---
 src/ctl.c                               |  7 ++++---
 src/extent.c                            |  2 +-
 src/pa.c                                |  6 +++++-
 6 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 4166705e..ab103619 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -4,6 +4,7 @@
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/mutex_prof.h"
+#include "jemalloc/internal/pa.h"
 #include "jemalloc/internal/sc.h"
 
 JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
@@ -112,8 +113,12 @@ struct arena_stats_s {
 	arena_stats_u64_t	nflushes_large; /* Derived. */
 	arena_stats_u64_t	nrequests_large; /* Derived. */
 
-	/* VM space had to be leaked (undocumented).  Normally 0. */
-	atomic_zu_t		abandoned_vm;
+	/*
+	 * The stats logically owned by the pa_shard in the same arena.  This
+	 * lives here only because it's convenient for the purposes of the ctl
+	 * module -- it only knows about the single arena_stats.
+	 */
+	pa_shard_stats_t	pa_shard_stats;
 
 	/* Number of bytes cached in tcache associated with this arena. */
 	atomic_zu_t		tcache_bytes; /* Derived. */
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 6bc5e338..890f7b16 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -1,11 +1,20 @@
 #ifndef JEMALLOC_INTERNAL_PA_H
 #define JEMALLOC_INTERNAL_PA_H
 
+#include "jemalloc/internal/ecache.h"
+#include "jemalloc/internal/edata_cache.h"
+
 /*
  * The page allocator; responsible for acquiring pages of memory for
  * allocations.
  */
 
+typedef struct pa_shard_stats_s pa_shard_stats_t;
+struct pa_shard_stats_s {
+	/* VM space had to be leaked (undocumented).  Normally 0. */
+	atomic_zu_t abandoned_vm;
+};
+
 typedef struct pa_shard_s pa_shard_t;
 struct pa_shard_s {
 	/*
@@ -20,9 +29,12 @@ struct pa_shard_s {
 
 	/* The source of edata_t objects. */
 	edata_cache_t edata_cache;
+
+	pa_shard_stats_t *stats;
 };
 
 /* Returns true on error. */
-bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind);
+bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
+    pa_shard_stats_t *stats);
 
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/src/arena.c b/src/arena.c
index 55a64c74..d03bc72d 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -131,8 +131,9 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	    (((atomic_load_zu(&arena->nactive, ATOMIC_RELAXED) +
 	    ecache_npages_get(&arena->pa_shard.ecache_dirty) +
 	    ecache_npages_get(&arena->pa_shard.ecache_muzzy)) << LG_PAGE)));
-	arena_stats_accum_zu(&astats->abandoned_vm, atomic_load_zu(
-	    &arena->stats.abandoned_vm, ATOMIC_RELAXED));
+	arena_stats_accum_zu(&astats->pa_shard_stats.abandoned_vm,
+	    atomic_load_zu(&arena->stats.pa_shard_stats.abandoned_vm,
+	    ATOMIC_RELAXED));
 
 	for (szind_t i = 0; i < SC_NSIZES - SC_NBINS; i++) {
 		uint64_t nmalloc = arena_stats_read_u64(tsdn, &arena->stats,
@@ -2027,7 +2028,8 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		goto label_error;
 	}
 
-	if (pa_shard_init(tsdn, &arena->pa_shard, base, ind)) {
+	if (pa_shard_init(tsdn, &arena->pa_shard, base, ind,
+	    &arena->stats.pa_shard_stats)) {
 		goto label_error;
 	}
 
diff --git a/src/ctl.c b/src/ctl.c
index 31277ae1..1a9b0d9f 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -939,8 +939,8 @@ MUTEX_PROF_ARENA_MUTEXES
 		    &astats->astats.nrequests_large);
 		ctl_accum_arena_stats_u64(&sdstats->astats.nflushes_large,
 		    &astats->astats.nflushes_large);
-		accum_atomic_zu(&sdstats->astats.abandoned_vm,
-		    &astats->astats.abandoned_vm);
+		accum_atomic_zu(&sdstats->astats.pa_shard_stats.abandoned_vm,
+		    &astats->astats.pa_shard_stats.abandoned_vm);
 
 		accum_atomic_zu(&sdstats->astats.tcache_bytes,
 		    &astats->astats.tcache_bytes);
@@ -2962,7 +2962,8 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_resident,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.resident, ATOMIC_RELAXED),
     size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_abandoned_vm,
-    atomic_load_zu(&arenas_i(mib[2])->astats->astats.abandoned_vm,
+    atomic_load_zu(
+    &arenas_i(mib[2])->astats->astats.pa_shard_stats.abandoned_vm,
     ATOMIC_RELAXED), size_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_small_allocated,
diff --git a/src/extent.c b/src/extent.c
index ae62070c..1b7f00fb 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -199,7 +199,7 @@ extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool growing_retained) {
 	size_t sz = edata_size_get(edata);
 	if (config_stats) {
-		arena_stats_accum_zu(&arena->stats.abandoned_vm, sz);
+		arena_stats_accum_zu(&arena->pa_shard.stats->abandoned_vm, sz);
 	}
 	/*
 	 * Leak extent after making sure its pages have already been purged, so
diff --git a/src/pa.c b/src/pa.c
index 6db623b5..516ae1d3 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -2,7 +2,8 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 bool
-pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind) {
+pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
+    pa_shard_stats_t *stats) {
 	/* This will change eventually, but for now it should hold. */
 	assert(base_ind_get(base) == ind);
 	/*
@@ -37,5 +38,8 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind) {
 		return true;
 	}
 
+	shard->stats = stats;
+	memset(shard->stats, 0, sizeof(*shard->stats));
+
 	return false;
 }

From acd0bf6a2697d47fcfd868f76583c9d0a5974af1 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sun, 8 Mar 2020 13:47:02 -0700
Subject: [PATCH 1624/2608] PA: move in ecache_grow.

---
 include/jemalloc/internal/arena_structs.h |  3 ---
 include/jemalloc/internal/pa.h            |  3 +++
 src/arena.c                               | 18 +++++--------
 src/extent.c                              | 31 +++++++++++++----------
 src/pa.c                                  |  4 +++
 test/unit/retained.c                      |  2 +-
 6 files changed, 32 insertions(+), 29 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index dc4e3268..ed16337b 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -163,9 +163,6 @@ struct arena_s {
 	arena_decay_t		decay_dirty; /* dirty --> muzzy */
 	arena_decay_t		decay_muzzy; /* muzzy --> retained */
 
-	/* The grow info for the retained ecache. */
-	ecache_grow_t		ecache_grow;
-
 	/*
 	 * bins is used to store heaps of free regions.
 	 *
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 890f7b16..3b1a7659 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -30,6 +30,9 @@ struct pa_shard_s {
 	/* The source of edata_t objects. */
 	edata_cache_t edata_cache;
 
+	/* The grow info for the retained ecache. */
+	ecache_grow_t ecache_grow;
+
 	pa_shard_stats_t *stats;
 };
 
diff --git a/src/arena.c b/src/arena.c
index d03bc72d..ced01d73 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1935,14 +1935,14 @@ arena_retain_grow_limit_get_set(tsd_t *tsd, arena_t *arena, size_t *old_limit,
 		}
 	}
 
-	malloc_mutex_lock(tsd_tsdn(tsd), &arena->ecache_grow.mtx);
+	malloc_mutex_lock(tsd_tsdn(tsd), &arena->pa_shard.ecache_grow.mtx);
 	if (old_limit != NULL) {
-		*old_limit = sz_pind2sz(arena->ecache_grow.limit);
+		*old_limit = sz_pind2sz(arena->pa_shard.ecache_grow.limit);
 	}
 	if (new_limit != NULL) {
-		arena->ecache_grow.limit = new_ind;
+		arena->pa_shard.ecache_grow.limit = new_ind;
 	}
-	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->ecache_grow.mtx);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->pa_shard.ecache_grow.mtx);
 
 	return false;
 }
@@ -2042,10 +2042,6 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		goto label_error;
 	}
 
-	if (ecache_grow_init(tsdn, &arena->ecache_grow)) {
-		goto label_error;
-	}
-
 	/* Initialize bins. */
 	uintptr_t bin_addr = (uintptr_t)arena + sizeof(arena_t);
 	atomic_store_u(&arena->binshard_next, 0, ATOMIC_RELEASE);
@@ -2176,7 +2172,7 @@ arena_prefork1(tsdn_t *tsdn, arena_t *arena) {
 
 void
 arena_prefork2(tsdn_t *tsdn, arena_t *arena) {
-	ecache_grow_prefork(tsdn, &arena->ecache_grow);
+	ecache_grow_prefork(tsdn, &arena->pa_shard.ecache_grow);
 }
 
 void
@@ -2226,7 +2222,7 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 	ecache_postfork_parent(tsdn, &arena->pa_shard.ecache_dirty);
 	ecache_postfork_parent(tsdn, &arena->pa_shard.ecache_muzzy);
 	ecache_postfork_parent(tsdn, &arena->pa_shard.ecache_retained);
-	ecache_grow_postfork_parent(tsdn, &arena->ecache_grow);
+	ecache_grow_postfork_parent(tsdn, &arena->pa_shard.ecache_grow);
 	malloc_mutex_postfork_parent(tsdn, &arena->decay_dirty.mtx);
 	malloc_mutex_postfork_parent(tsdn, &arena->decay_muzzy.mtx);
 	if (config_stats) {
@@ -2272,7 +2268,7 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	ecache_postfork_child(tsdn, &arena->pa_shard.ecache_dirty);
 	ecache_postfork_child(tsdn, &arena->pa_shard.ecache_muzzy);
 	ecache_postfork_child(tsdn, &arena->pa_shard.ecache_retained);
-	ecache_grow_postfork_child(tsdn, &arena->ecache_grow);
+	ecache_grow_postfork_child(tsdn, &arena->pa_shard.ecache_grow);
 	malloc_mutex_postfork_child(tsdn, &arena->decay_dirty.mtx);
 	malloc_mutex_postfork_child(tsdn, &arena->decay_muzzy.mtx);
 	if (config_stats) {
diff --git a/src/extent.c b/src/extent.c
index 1b7f00fb..7c00525a 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -619,7 +619,7 @@ static edata_t *
 extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     size_t size, size_t alignment, bool slab, szind_t szind,
     bool *zero, bool *commit) {
-	malloc_mutex_assert_owner(tsdn, &arena->ecache_grow.mtx);
+	malloc_mutex_assert_owner(tsdn, &arena->pa_shard.ecache_grow.mtx);
 	assert(!*zero || !slab);
 
 	size_t alloc_size_min = size + PAGE_CEILING(alignment) - PAGE;
@@ -632,15 +632,17 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	 * satisfy this request.
 	 */
 	pszind_t egn_skip = 0;
-	size_t alloc_size = sz_pind2sz(arena->ecache_grow.next + egn_skip);
+	size_t alloc_size = sz_pind2sz(
+	    arena->pa_shard.ecache_grow.next + egn_skip);
 	while (alloc_size < alloc_size_min) {
 		egn_skip++;
-		if (arena->ecache_grow.next + egn_skip >=
+		if (arena->pa_shard.ecache_grow.next + egn_skip >=
 		    sz_psz2ind(SC_LARGE_MAXCLASS)) {
 			/* Outside legal range. */
 			goto label_err;
 		}
-		alloc_size = sz_pind2sz(arena->ecache_grow.next + egn_skip);
+		alloc_size = sz_pind2sz(
+		    arena->pa_shard.ecache_grow.next + egn_skip);
 	}
 
 	edata_t *edata = edata_cache_get(tsdn, &arena->pa_shard.edata_cache);
@@ -735,14 +737,15 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	 * Increment extent_grow_next if doing so wouldn't exceed the allowed
 	 * range.
 	 */
-	if (arena->ecache_grow.next + egn_skip + 1 <=
-	    arena->ecache_grow.limit) {
-		arena->ecache_grow.next += egn_skip + 1;
+	if (arena->pa_shard.ecache_grow.next + egn_skip + 1 <=
+	    arena->pa_shard.ecache_grow.limit) {
+		arena->pa_shard.ecache_grow.next += egn_skip + 1;
 	} else {
-		arena->ecache_grow.next = arena->ecache_grow.limit;
+		arena->pa_shard.ecache_grow.next
+		    = arena->pa_shard.ecache_grow.limit;
 	}
 	/* All opportunities for failure are past. */
-	malloc_mutex_unlock(tsdn, &arena->ecache_grow.mtx);
+	malloc_mutex_unlock(tsdn, &arena->pa_shard.ecache_grow.mtx);
 
 	if (config_prof) {
 		/* Adjust gdump stats now that extent is final size. */
@@ -760,7 +763,7 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 	return edata;
 label_err:
-	malloc_mutex_unlock(tsdn, &arena->ecache_grow.mtx);
+	malloc_mutex_unlock(tsdn, &arena->pa_shard.ecache_grow.mtx);
 	return NULL;
 }
 
@@ -771,13 +774,13 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	assert(size != 0);
 	assert(alignment != 0);
 
-	malloc_mutex_lock(tsdn, &arena->ecache_grow.mtx);
+	malloc_mutex_lock(tsdn, &arena->pa_shard.ecache_grow.mtx);
 
 	edata_t *edata = extent_recycle(tsdn, arena, ehooks,
 	    &arena->pa_shard.ecache_retained, new_addr, size, alignment, slab,
 	    szind, zero, commit, true);
 	if (edata != NULL) {
-		malloc_mutex_unlock(tsdn, &arena->ecache_grow.mtx);
+		malloc_mutex_unlock(tsdn, &arena->pa_shard.ecache_grow.mtx);
 		if (config_prof) {
 			extent_gdump_add(tsdn, edata);
 		}
@@ -786,9 +789,9 @@ extent_alloc_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		    alignment, slab, szind, zero, commit);
 		/* extent_grow_retained() always releases extent_grow_mtx. */
 	} else {
-		malloc_mutex_unlock(tsdn, &arena->ecache_grow.mtx);
+		malloc_mutex_unlock(tsdn, &arena->pa_shard.ecache_grow.mtx);
 	}
-	malloc_mutex_assert_not_owner(tsdn, &arena->ecache_grow.mtx);
+	malloc_mutex_assert_not_owner(tsdn, &arena->pa_shard.ecache_grow.mtx);
 
 	return edata;
 }
diff --git a/src/pa.c b/src/pa.c
index 516ae1d3..5063d48f 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -38,6 +38,10 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
 		return true;
 	}
 
+	if (ecache_grow_init(tsdn, &shard->ecache_grow)) {
+		return true;
+	}
+
 	shard->stats = stats;
 	memset(shard->stats, 0, sizeof(*shard->stats));
 
diff --git a/test/unit/retained.c b/test/unit/retained.c
index 9ac83ef6..cf3de1ea 100644
--- a/test/unit/retained.c
+++ b/test/unit/retained.c
@@ -142,7 +142,7 @@ TEST_BEGIN(test_retained) {
 		size_t usable = 0;
 		size_t fragmented = 0;
 		for (pszind_t pind = sz_psz2ind(HUGEPAGE); pind <
-		    arena->ecache_grow.next; pind++) {
+		    arena->pa_shard.ecache_grow.next; pind++) {
 			size_t psz = sz_pind2sz(pind);
 			size_t psz_fragmented = psz % esz;
 			size_t psz_usable = psz - psz_fragmented;

From 356aaa7dc65d554806287dfa1849a2d47be9b7a8 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sun, 8 Mar 2020 20:43:41 -0700
Subject: [PATCH 1625/2608] Introduce lockedint module.

This pulls out the various abstractions where some stats counter is sometimes an
atomic, sometimes a plain variable, sometimes always protected by a lock,
sometimes protected by reads but not writes, etc.  With this change, these cases
are treated consistently, and access patterns tagged.

In the process, we fix a few missed-update bugs (where one caller assumes
"protected-by-a-lock" semantics and another does not).
---
 include/jemalloc/internal/arena_inlines_b.h |  12 +-
 include/jemalloc/internal/arena_stats.h     | 179 ++++----------------
 include/jemalloc/internal/atomic.h          |  26 ++-
 include/jemalloc/internal/lockedint.h       | 151 +++++++++++++++++
 src/arena.c                                 | 132 ++++++++-------
 src/ctl.c                                   | 128 +++++++-------
 src/extent.c                                |   3 +-
 7 files changed, 341 insertions(+), 290 deletions(-)
 create mode 100644 include/jemalloc/internal/lockedint.h

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index cadfc8f9..5b33769f 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -148,14 +148,14 @@ arena_decay_extent(tsdn_t *tsdn,arena_t *arena, ehooks_t *ehooks,
 	extent_dalloc_wrapper(tsdn, arena, ehooks, edata);
 	if (config_stats) {
 		/* Update stats accordingly. */
-		arena_stats_lock(tsdn, &arena->stats);
-		arena_stats_add_u64(tsdn, &arena->stats,
+		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 		    &arena->decay_dirty.stats->nmadvise, 1);
-		arena_stats_add_u64(tsdn, &arena->stats,
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 		    &arena->decay_dirty.stats->purged, extent_size >> LG_PAGE);
-		arena_stats_sub_zu(tsdn, &arena->stats, &arena->stats.mapped,
-		    extent_size);
-		arena_stats_unlock(tsdn, &arena->stats);
+		locked_dec_zu(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
+		    &arena->stats.mapped, extent_size);
+		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
 }
 
diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index ab103619..0a1ec734 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_ARENA_STATS_H
 
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/lockedint.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/mutex_prof.h"
 #include "jemalloc/internal/pa.h"
@@ -9,40 +10,28 @@
 
 JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
 
-/*
- * In those architectures that support 64-bit atomics, we use atomic updates for
- * our 64-bit values.  Otherwise, we use a plain uint64_t and synchronize
- * externally.
- */
-#ifdef JEMALLOC_ATOMIC_U64
-typedef atomic_u64_t arena_stats_u64_t;
-#else
-/* Must hold the arena stats mutex while reading atomically. */
-typedef uint64_t arena_stats_u64_t;
-#endif
-
 typedef struct arena_stats_large_s arena_stats_large_t;
 struct arena_stats_large_s {
 	/*
 	 * Total number of allocation/deallocation requests served directly by
 	 * the arena.
 	 */
-	arena_stats_u64_t	nmalloc;
-	arena_stats_u64_t	ndalloc;
+	locked_u64_t	nmalloc;
+	locked_u64_t	ndalloc;
 
 	/*
 	 * Number of allocation requests that correspond to this size class.
 	 * This includes requests served by tcache, though tcache only
 	 * periodically merges into this counter.
 	 */
-	arena_stats_u64_t	nrequests; /* Partially derived. */
+	locked_u64_t	nrequests; /* Partially derived. */
 	/*
 	 * Number of tcache fills / flushes for large (similarly, periodically
 	 * merged).  Note that there is no large tcache batch-fill currently
 	 * (i.e. only fill 1 at a time); however flush may be batched.
 	 */
-	arena_stats_u64_t	nfills; /* Partially derived. */
-	arena_stats_u64_t	nflushes; /* Partially derived. */
+	locked_u64_t	nfills; /* Partially derived. */
+	locked_u64_t	nflushes; /* Partially derived. */
 
 	/* Current number of allocations of this size class. */
 	size_t		curlextents; /* Derived. */
@@ -51,11 +40,11 @@ struct arena_stats_large_s {
 typedef struct arena_stats_decay_s arena_stats_decay_t;
 struct arena_stats_decay_s {
 	/* Total number of purge sweeps. */
-	arena_stats_u64_t	npurge;
+	locked_u64_t	npurge;
 	/* Total number of madvise calls made. */
-	arena_stats_u64_t	nmadvise;
+	locked_u64_t	nmadvise;
 	/* Total number of pages purged. */
-	arena_stats_u64_t	purged;
+	locked_u64_t	purged;
 };
 
 typedef struct arena_stats_extents_s arena_stats_extents_t;
@@ -81,19 +70,19 @@ struct arena_stats_extents_s {
  */
 typedef struct arena_stats_s arena_stats_t;
 struct arena_stats_s {
-#ifndef JEMALLOC_ATOMIC_U64
-	malloc_mutex_t		mtx;
-#endif
+	LOCKEDINT_MTX_DECLARE(mtx)
 
-	/* Number of bytes currently mapped, excluding retained memory. */
-	atomic_zu_t		mapped; /* Partially derived. */
+	/*
+	 * Number of bytes currently mapped, excluding retained memory.
+	 */
+	locked_zu_t		mapped; /* Partially derived. */
 
 	/*
 	 * Number of unused virtual memory bytes currently retained.  Retained
 	 * bytes are technically mapped (though always decommitted or purged),
 	 * but they are excluded from the mapped statistic (above).
 	 */
-	atomic_zu_t		retained; /* Derived. */
+	locked_zu_t		retained; /* Derived. */
 
 	/* Number of edata_t structs allocated by base, but not being used. */
 	atomic_zu_t		edata_avail;
@@ -107,11 +96,11 @@ struct arena_stats_s {
 	atomic_zu_t		metadata_thp;
 
 	atomic_zu_t		allocated_large; /* Derived. */
-	arena_stats_u64_t	nmalloc_large; /* Derived. */
-	arena_stats_u64_t	ndalloc_large; /* Derived. */
-	arena_stats_u64_t	nfills_large; /* Derived. */
-	arena_stats_u64_t	nflushes_large; /* Derived. */
-	arena_stats_u64_t	nrequests_large; /* Derived. */
+	locked_u64_t	nmalloc_large; /* Derived. */
+	locked_u64_t	ndalloc_large; /* Derived. */
+	locked_u64_t	nfills_large; /* Derived. */
+	locked_u64_t	nflushes_large; /* Derived. */
+	locked_u64_t	nrequests_large; /* Derived. */
 
 	/*
 	 * The stats logically owned by the pa_shard in the same arena.  This
@@ -139,138 +128,32 @@ arena_stats_init(tsdn_t *tsdn, arena_stats_t *arena_stats) {
 			assert(((char *)arena_stats)[i] == 0);
 		}
 	}
-#ifndef JEMALLOC_ATOMIC_U64
-	if (malloc_mutex_init(&arena_stats->mtx, "arena_stats",
+	if (LOCKEDINT_MTX_INIT(LOCKEDINT_MTX(arena_stats->mtx), "arena_stats",
 	    WITNESS_RANK_ARENA_STATS, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
-#endif
 	/* Memory is zeroed, so there is no need to clear stats. */
 	return false;
 }
 
-static inline void
-arena_stats_lock(tsdn_t *tsdn, arena_stats_t *arena_stats) {
-#ifndef JEMALLOC_ATOMIC_U64
-	malloc_mutex_lock(tsdn, &arena_stats->mtx);
-#endif
-}
-
-static inline void
-arena_stats_unlock(tsdn_t *tsdn, arena_stats_t *arena_stats) {
-#ifndef JEMALLOC_ATOMIC_U64
-	malloc_mutex_unlock(tsdn, &arena_stats->mtx);
-#endif
-}
-
-static inline uint64_t
-arena_stats_read_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
-    arena_stats_u64_t *p) {
-#ifdef JEMALLOC_ATOMIC_U64
-	return atomic_load_u64(p, ATOMIC_RELAXED);
-#else
-	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
-	return *p;
-#endif
-}
-
-static inline void
-arena_stats_add_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
-    arena_stats_u64_t *p, uint64_t x) {
-#ifdef JEMALLOC_ATOMIC_U64
-	atomic_fetch_add_u64(p, x, ATOMIC_RELAXED);
-#else
-	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
-	*p += x;
-#endif
-}
-
-static inline void
-arena_stats_sub_u64(tsdn_t *tsdn, arena_stats_t *arena_stats,
-    arena_stats_u64_t *p, uint64_t x) {
-#ifdef JEMALLOC_ATOMIC_U64
-	uint64_t r = atomic_fetch_sub_u64(p, x, ATOMIC_RELAXED);
-	assert(r - x <= r);
-#else
-	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
-	*p -= x;
-	assert(*p + x >= *p);
-#endif
-}
-
-/*
- * Non-atomically sets *dst += src.  *dst needs external synchronization.
- * This lets us avoid the cost of a fetch_add when its unnecessary (note that
- * the types here are atomic).
- */
-static inline void
-arena_stats_accum_u64(arena_stats_u64_t *dst, uint64_t src) {
-#ifdef JEMALLOC_ATOMIC_U64
-	uint64_t cur_dst = atomic_load_u64(dst, ATOMIC_RELAXED);
-	atomic_store_u64(dst, src + cur_dst, ATOMIC_RELAXED);
-#else
-	*dst += src;
-#endif
-}
-
-static inline size_t
-arena_stats_read_zu(tsdn_t *tsdn, arena_stats_t *arena_stats,
-    atomic_zu_t *p) {
-#ifdef JEMALLOC_ATOMIC_U64
-	return atomic_load_zu(p, ATOMIC_RELAXED);
-#else
-	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
-	return atomic_load_zu(p, ATOMIC_RELAXED);
-#endif
-}
-
-static inline void
-arena_stats_add_zu(tsdn_t *tsdn, arena_stats_t *arena_stats,
-    atomic_zu_t *p, size_t x) {
-#ifdef JEMALLOC_ATOMIC_U64
-	atomic_fetch_add_zu(p, x, ATOMIC_RELAXED);
-#else
-	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
-	size_t cur = atomic_load_zu(p, ATOMIC_RELAXED);
-	atomic_store_zu(p, cur + x, ATOMIC_RELAXED);
-#endif
-}
-
-static inline void
-arena_stats_sub_zu(tsdn_t *tsdn, arena_stats_t *arena_stats,
-    atomic_zu_t *p, size_t x) {
-#ifdef JEMALLOC_ATOMIC_U64
-	size_t r = atomic_fetch_sub_zu(p, x, ATOMIC_RELAXED);
-	assert(r - x <= r);
-#else
-	malloc_mutex_assert_owner(tsdn, &arena_stats->mtx);
-	size_t cur = atomic_load_zu(p, ATOMIC_RELAXED);
-	atomic_store_zu(p, cur - x, ATOMIC_RELAXED);
-#endif
-}
-
-/* Like the _u64 variant, needs an externally synchronized *dst. */
-static inline void
-arena_stats_accum_zu(atomic_zu_t *dst, size_t src) {
-	size_t cur_dst = atomic_load_zu(dst, ATOMIC_RELAXED);
-	atomic_store_zu(dst, src + cur_dst, ATOMIC_RELAXED);
-}
-
 static inline void
 arena_stats_large_flush_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
     szind_t szind, uint64_t nrequests) {
-	arena_stats_lock(tsdn, arena_stats);
+	LOCKEDINT_MTX_LOCK(tsdn, arena_stats->mtx);
 	arena_stats_large_t *lstats = &arena_stats->lstats[szind - SC_NBINS];
-	arena_stats_add_u64(tsdn, arena_stats, &lstats->nrequests, nrequests);
-	arena_stats_add_u64(tsdn, arena_stats, &lstats->nflushes, 1);
-	arena_stats_unlock(tsdn, arena_stats);
+	locked_inc_u64(tsdn, LOCKEDINT_MTX(arena_stats->mtx),
+	    &lstats->nrequests, nrequests);
+	locked_inc_u64(tsdn, LOCKEDINT_MTX(arena_stats->mtx),
+	    &lstats->nflushes, 1);
+	LOCKEDINT_MTX_UNLOCK(tsdn, arena_stats->mtx);
 }
 
 static inline void
 arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats, size_t size) {
-	arena_stats_lock(tsdn, arena_stats);
-	arena_stats_add_zu(tsdn, arena_stats, &arena_stats->mapped, size);
-	arena_stats_unlock(tsdn, arena_stats);
+	LOCKEDINT_MTX_LOCK(tsdn, arena_stats->mtx);
+	locked_inc_zu(tsdn, LOCKEDINT_MTX(arena_stats->mtx),
+	    &arena_stats->mapped, size);
+	LOCKEDINT_MTX_UNLOCK(tsdn, arena_stats->mtx);
 }
 
 #endif /* JEMALLOC_INTERNAL_ARENA_STATS_H */
diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h
index a76f54ce..e5afb202 100644
--- a/include/jemalloc/internal/atomic.h
+++ b/include/jemalloc/internal/atomic.h
@@ -51,6 +51,20 @@
 #define ATOMIC_ACQ_REL atomic_memory_order_acq_rel
 #define ATOMIC_SEQ_CST atomic_memory_order_seq_cst
 
+/*
+ * Another convenience -- simple atomic helper functions.
+ */
+#define JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(type, short_type,	\
+    lg_size)								\
+    JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, lg_size)		\
+    ATOMIC_INLINE void							\
+    atomic_load_add_store_##short_type(atomic_##short_type##_t *a,	\
+	type inc) {							\
+	    type oldval = atomic_load_##short_type(a, ATOMIC_RELAXED);	\
+	    type newval = oldval + inc;					\
+	    atomic_store_##short_type(a, newval, ATOMIC_RELAXED);	\
+	}
+
 /*
  * Not all platforms have 64-bit atomics.  If we do, this #define exposes that
  * fact.
@@ -67,18 +81,18 @@ JEMALLOC_GENERATE_ATOMICS(void *, p, LG_SIZEOF_PTR)
  */
 JEMALLOC_GENERATE_ATOMICS(bool, b, 0)
 
-JEMALLOC_GENERATE_INT_ATOMICS(unsigned, u, LG_SIZEOF_INT)
+JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(unsigned, u, LG_SIZEOF_INT)
 
-JEMALLOC_GENERATE_INT_ATOMICS(size_t, zu, LG_SIZEOF_PTR)
+JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(size_t, zu, LG_SIZEOF_PTR)
 
-JEMALLOC_GENERATE_INT_ATOMICS(ssize_t, zd, LG_SIZEOF_PTR)
+JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(ssize_t, zd, LG_SIZEOF_PTR)
 
-JEMALLOC_GENERATE_INT_ATOMICS(uint8_t, u8, 0)
+JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(uint8_t, u8, 0)
 
-JEMALLOC_GENERATE_INT_ATOMICS(uint32_t, u32, 2)
+JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(uint32_t, u32, 2)
 
 #ifdef JEMALLOC_ATOMIC_U64
-JEMALLOC_GENERATE_INT_ATOMICS(uint64_t, u64, 3)
+JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(uint64_t, u64, 3)
 #endif
 
 #undef ATOMIC_INLINE
diff --git a/include/jemalloc/internal/lockedint.h b/include/jemalloc/internal/lockedint.h
new file mode 100644
index 00000000..6a1f9ad1
--- /dev/null
+++ b/include/jemalloc/internal/lockedint.h
@@ -0,0 +1,151 @@
+#ifndef JEMALLOC_INTERNAL_LOCKEDINT_H
+#define JEMALLOC_INTERNAL_LOCKEDINT_H
+
+/*
+ * In those architectures that support 64-bit atomics, we use atomic updates for
+ * our 64-bit values.  Otherwise, we use a plain uint64_t and synchronize
+ * externally.
+ */
+
+typedef struct locked_u64_s locked_u64_t;
+#ifdef JEMALLOC_ATOMIC_U64
+struct locked_u64_s {
+	atomic_u64_t val;
+};
+#else
+/* Must hold the associated mutex. */
+struct locked_u64_s {
+	uint64_t val;
+};
+#endif
+
+typedef struct locked_zu_s locked_zu_t;
+struct locked_zu_s {
+	atomic_zu_t val;
+};
+
+#ifndef JEMALLOC_ATOMIC_U64
+#  define LOCKEDINT_MTX_DECLARE(name) malloc_mutex_t name;
+#  define LOCKEDINT_MTX_INIT(ptr, name, rank, rank_mode)		\
+    malloc_mutex_init(ptr, name, rank, rank_mode)
+#  define LOCKEDINT_MTX(mtx) (&(mtx))
+#  define LOCKEDINT_MTX_LOCK(tsdn, mu) malloc_mutex_lock(tsdn, &(mu))
+#  define LOCKEDINT_MTX_UNLOCK(tsdn, mu) malloc_mutex_unlock(tsdn, &(mu))
+#else
+#  define LOCKEDINT_MTX_DECLARE(name)
+#  define LOCKEDINT_MTX(ptr) NULL
+#  define LOCKEDINT_MTX_INIT(ptr, name, rank, rank_mode) false
+#  define LOCKEDINT_MTX_LOCK(tsdn, mu) do {} while (0)
+#  define LOCKEDINT_MTX_UNLOCK(tsdn, mu) do {} while (0)
+#endif
+
+static inline uint64_t
+locked_read_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p) {
+#ifdef JEMALLOC_ATOMIC_U64
+	return atomic_load_u64(&p->val, ATOMIC_RELAXED);
+#else
+	malloc_mutex_assert_owner(tsdn, mtx);
+	return p->val;
+#endif
+}
+
+static inline void
+locked_inc_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p,
+    uint64_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+	atomic_fetch_add_u64(&p->val, x, ATOMIC_RELAXED);
+#else
+	malloc_mutex_assert_owner(tsdn, mtx);
+	p->val += x;
+#endif
+}
+
+static inline void
+locked_dec_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p,
+    uint64_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+	uint64_t r = atomic_fetch_sub_u64(&p->val, x, ATOMIC_RELAXED);
+	assert(r - x <= r);
+#else
+	malloc_mutex_assert_owner(tsdn, mtx);
+	p->val -= x;
+	assert(p->val + x >= p->val);
+#endif
+}
+
+/*
+ * Non-atomically sets *dst += src.  *dst needs external synchronization.
+ * This lets us avoid the cost of a fetch_add when its unnecessary (note that
+ * the types here are atomic).
+ */
+static inline void
+locked_inc_u64_unsynchronized(locked_u64_t *dst, uint64_t src) {
+#ifdef JEMALLOC_ATOMIC_U64
+	uint64_t cur_dst = atomic_load_u64(&dst->val, ATOMIC_RELAXED);
+	atomic_store_u64(&dst->val, src + cur_dst, ATOMIC_RELAXED);
+#else
+	dst->val += src;
+#endif
+}
+
+static inline uint64_t
+locked_read_u64_unsynchronized(locked_u64_t *p) {
+#ifdef JEMALLOC_ATOMIC_U64
+	return atomic_load_u64(&p->val, ATOMIC_RELAXED);
+#else
+	return p->val;
+#endif
+
+}
+
+static inline size_t
+locked_read_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p) {
+#ifdef JEMALLOC_ATOMIC_U64
+	return atomic_load_zu(&p->val, ATOMIC_RELAXED);
+#else
+	malloc_mutex_assert_owner(tsdn, mtx);
+	return atomic_load_zu(&p->val, ATOMIC_RELAXED);
+#endif
+}
+
+static inline void
+locked_inc_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p,
+    size_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+	atomic_fetch_add_zu(&p->val, x, ATOMIC_RELAXED);
+#else
+	malloc_mutex_assert_owner(tsdn, mtx);
+	size_t cur = atomic_load_zu(&p->val, ATOMIC_RELAXED);
+	atomic_store_zu(&p->val, cur + x, ATOMIC_RELAXED);
+#endif
+}
+
+static inline void
+locked_dec_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p,
+    size_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+	size_t r = atomic_fetch_sub_zu(&p->val, x, ATOMIC_RELAXED);
+	assert(r - x <= r);
+#else
+	malloc_mutex_assert_owner(tsdn, mtx);
+	size_t cur = atomic_load_zu(&p->val, ATOMIC_RELAXED);
+	atomic_store_zu(&p->val, cur - x, ATOMIC_RELAXED);
+#endif
+}
+
+/* Like the _u64 variant, needs an externally synchronized *dst. */
+static inline void
+locked_inc_zu_unsynchronized(locked_zu_t *dst, size_t src) {
+	size_t cur_dst = atomic_load_zu(&dst->val, ATOMIC_RELAXED);
+	atomic_store_zu(&dst->val, src + cur_dst, ATOMIC_RELAXED);
+}
+
+/*
+ * Unlike the _u64 variant, this is safe to call unconditionally.
+ */
+static inline size_t
+locked_read_atomic_zu(locked_zu_t *p) {
+	return atomic_load_zu(&p->val, ATOMIC_RELAXED);
+}
+
+#endif /* JEMALLOC_INTERNAL_LOCKEDINT_H */
diff --git a/src/arena.c b/src/arena.c
index ced01d73..d4e200cf 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -93,80 +93,89 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	base_stats_get(tsdn, arena->base, &base_allocated, &base_resident,
 	    &base_mapped, &metadata_thp);
 
-	arena_stats_lock(tsdn, &arena->stats);
+	LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 
-	arena_stats_accum_zu(&astats->mapped, base_mapped
-	    + arena_stats_read_zu(tsdn, &arena->stats, &arena->stats.mapped));
-	arena_stats_accum_zu(&astats->retained,
+	locked_inc_zu_unsynchronized(&astats->mapped, base_mapped
+	    + locked_read_zu(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
+	    &arena->stats.mapped));
+	locked_inc_zu_unsynchronized(&astats->retained,
 	    ecache_npages_get(&arena->pa_shard.ecache_retained) << LG_PAGE);
 
 	atomic_store_zu(&astats->edata_avail,
 	    atomic_load_zu(&arena->pa_shard.edata_cache.count, ATOMIC_RELAXED),
 	    ATOMIC_RELAXED);
 
-	arena_stats_accum_u64(&astats->decay_dirty.npurge,
-	    arena_stats_read_u64(tsdn, &arena->stats,
+	locked_inc_u64_unsynchronized(&astats->decay_dirty.npurge,
+	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 	    &arena->stats.decay_dirty.npurge));
-	arena_stats_accum_u64(&astats->decay_dirty.nmadvise,
-	    arena_stats_read_u64(tsdn, &arena->stats,
+	locked_inc_u64_unsynchronized(&astats->decay_dirty.nmadvise,
+	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 	    &arena->stats.decay_dirty.nmadvise));
-	arena_stats_accum_u64(&astats->decay_dirty.purged,
-	    arena_stats_read_u64(tsdn, &arena->stats,
+	locked_inc_u64_unsynchronized(&astats->decay_dirty.purged,
+	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 	    &arena->stats.decay_dirty.purged));
 
-	arena_stats_accum_u64(&astats->decay_muzzy.npurge,
-	    arena_stats_read_u64(tsdn, &arena->stats,
+	locked_inc_u64_unsynchronized(&astats->decay_muzzy.npurge,
+	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 	    &arena->stats.decay_muzzy.npurge));
-	arena_stats_accum_u64(&astats->decay_muzzy.nmadvise,
-	    arena_stats_read_u64(tsdn, &arena->stats,
+	locked_inc_u64_unsynchronized(&astats->decay_muzzy.nmadvise,
+	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 	    &arena->stats.decay_muzzy.nmadvise));
-	arena_stats_accum_u64(&astats->decay_muzzy.purged,
-	    arena_stats_read_u64(tsdn, &arena->stats,
+	locked_inc_u64_unsynchronized(&astats->decay_muzzy.purged,
+	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 	    &arena->stats.decay_muzzy.purged));
 
-	arena_stats_accum_zu(&astats->base, base_allocated);
-	arena_stats_accum_zu(&astats->internal, arena_internal_get(arena));
-	arena_stats_accum_zu(&astats->metadata_thp, metadata_thp);
-	arena_stats_accum_zu(&astats->resident, base_resident +
+	atomic_load_add_store_zu(&astats->base, base_allocated);
+	atomic_load_add_store_zu(&astats->internal, arena_internal_get(arena));
+	atomic_load_add_store_zu(&astats->metadata_thp, metadata_thp);
+	atomic_load_add_store_zu(&astats->resident, base_resident +
 	    (((atomic_load_zu(&arena->nactive, ATOMIC_RELAXED) +
 	    ecache_npages_get(&arena->pa_shard.ecache_dirty) +
 	    ecache_npages_get(&arena->pa_shard.ecache_muzzy)) << LG_PAGE)));
-	arena_stats_accum_zu(&astats->pa_shard_stats.abandoned_vm,
+	atomic_load_add_store_zu(&astats->pa_shard_stats.abandoned_vm,
 	    atomic_load_zu(&arena->stats.pa_shard_stats.abandoned_vm,
 	    ATOMIC_RELAXED));
 
 	for (szind_t i = 0; i < SC_NSIZES - SC_NBINS; i++) {
-		uint64_t nmalloc = arena_stats_read_u64(tsdn, &arena->stats,
+		uint64_t nmalloc = locked_read_u64(tsdn,
+		    LOCKEDINT_MTX(arena->stats.mtx),
 		    &arena->stats.lstats[i].nmalloc);
-		arena_stats_accum_u64(&lstats[i].nmalloc, nmalloc);
-		arena_stats_accum_u64(&astats->nmalloc_large, nmalloc);
+		locked_inc_u64_unsynchronized(&lstats[i].nmalloc, nmalloc);
+		locked_inc_u64_unsynchronized(&astats->nmalloc_large,
+		    nmalloc);
 
-		uint64_t ndalloc = arena_stats_read_u64(tsdn, &arena->stats,
+		uint64_t ndalloc = locked_read_u64(tsdn,
+		    LOCKEDINT_MTX(arena->stats.mtx),
 		    &arena->stats.lstats[i].ndalloc);
-		arena_stats_accum_u64(&lstats[i].ndalloc, ndalloc);
-		arena_stats_accum_u64(&astats->ndalloc_large, ndalloc);
+		locked_inc_u64_unsynchronized(&lstats[i].ndalloc, ndalloc);
+		locked_inc_u64_unsynchronized(&astats->ndalloc_large,
+		    ndalloc);
 
-		uint64_t nrequests = arena_stats_read_u64(tsdn, &arena->stats,
+		uint64_t nrequests = locked_read_u64(tsdn,
+		    LOCKEDINT_MTX(arena->stats.mtx),
 		    &arena->stats.lstats[i].nrequests);
-		arena_stats_accum_u64(&lstats[i].nrequests,
+		locked_inc_u64_unsynchronized(&lstats[i].nrequests,
 		    nmalloc + nrequests);
-		arena_stats_accum_u64(&astats->nrequests_large,
+		locked_inc_u64_unsynchronized(&astats->nrequests_large,
 		    nmalloc + nrequests);
 
 		/* nfill == nmalloc for large currently. */
-		arena_stats_accum_u64(&lstats[i].nfills, nmalloc);
-		arena_stats_accum_u64(&astats->nfills_large, nmalloc);
+		locked_inc_u64_unsynchronized(&lstats[i].nfills, nmalloc);
+		locked_inc_u64_unsynchronized(&astats->nfills_large,
+		    nmalloc);
 
-		uint64_t nflush = arena_stats_read_u64(tsdn, &arena->stats,
+		uint64_t nflush = locked_read_u64(tsdn,
+		    LOCKEDINT_MTX(arena->stats.mtx),
 		    &arena->stats.lstats[i].nflushes);
-		arena_stats_accum_u64(&lstats[i].nflushes, nflush);
-		arena_stats_accum_u64(&astats->nflushes_large, nflush);
+		locked_inc_u64_unsynchronized(&lstats[i].nflushes, nflush);
+		locked_inc_u64_unsynchronized(&astats->nflushes_large,
+		    nflush);
 
 		assert(nmalloc >= ndalloc);
 		assert(nmalloc - ndalloc <= SIZE_T_MAX);
 		size_t curlextents = (size_t)(nmalloc - ndalloc);
 		lstats[i].curlextents += curlextents;
-		arena_stats_accum_zu(&astats->allocated_large,
+		atomic_load_add_store_zu(&astats->allocated_large,
 		    curlextents * sz_index2size(SC_NBINS + i));
 	}
 
@@ -195,7 +204,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		    ATOMIC_RELAXED);
 	}
 
-	arena_stats_unlock(tsdn, &arena->stats);
+	LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 
 	/* tcache_bytes counts currently cached bytes. */
 	atomic_store_zu(&astats->tcache_bytes, 0, ATOMIC_RELAXED);
@@ -204,13 +213,13 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	ql_foreach(descriptor, &arena->cache_bin_array_descriptor_ql, link) {
 		for (szind_t i = 0; i < SC_NBINS; i++) {
 			cache_bin_t *tbin = &descriptor->bins_small[i];
-			arena_stats_accum_zu(&astats->tcache_bytes,
-			    cache_bin_ncached_get(tbin, &tcache_bin_info[i])
-			    * sz_index2size(i));
+			atomic_load_add_store_zu(&astats->tcache_bytes,
+			    cache_bin_ncached_get(tbin,
+			    &tcache_bin_info[i]) * sz_index2size(i));
 		}
 		for (szind_t i = 0; i < nhbins - SC_NBINS; i++) {
 			cache_bin_t *tbin = &descriptor->bins_large[i];
-			arena_stats_accum_zu(&astats->tcache_bytes,
+			atomic_load_add_store_zu(&astats->tcache_bytes,
 			    cache_bin_ncached_get(tbin,
 			    &tcache_bin_info[i + SC_NBINS])
 			    * sz_index2size(i + SC_NBINS));
@@ -397,7 +406,7 @@ arena_large_malloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 	index = sz_size2index(usize);
 	hindex = (index >= SC_NBINS) ? index - SC_NBINS : 0;
 
-	arena_stats_add_u64(tsdn, &arena->stats,
+	locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 	    &arena->stats.lstats[hindex].nmalloc, 1);
 }
 
@@ -413,7 +422,7 @@ arena_large_dalloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 	index = sz_size2index(usize);
 	hindex = (index >= SC_NBINS) ? index - SC_NBINS : 0;
 
-	arena_stats_add_u64(tsdn, &arena->stats,
+	locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 	    &arena->stats.lstats[hindex].ndalloc, 1);
 }
 
@@ -466,13 +475,14 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 
 	if (edata != NULL) {
 		if (config_stats) {
-			arena_stats_lock(tsdn, &arena->stats);
+			LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 			arena_large_malloc_stats_update(tsdn, arena, usize);
 			if (mapped_add != 0) {
-				arena_stats_add_zu(tsdn, &arena->stats,
+				locked_inc_zu(tsdn,
+				    LOCKEDINT_MTX(arena->stats.mtx),
 				    &arena->stats.mapped, mapped_add);
 			}
-			arena_stats_unlock(tsdn, &arena->stats);
+			LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 		}
 		arena_nactive_add(arena, esize >> LG_PAGE);
 	}
@@ -487,10 +497,10 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 void
 arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
 	if (config_stats) {
-		arena_stats_lock(tsdn, &arena->stats);
+		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 		arena_large_dalloc_stats_update(tsdn, arena,
 		    edata_usize_get(edata));
-		arena_stats_unlock(tsdn, &arena->stats);
+		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
 	arena_nactive_sub(arena, edata_size_get(edata) >> LG_PAGE);
 }
@@ -502,9 +512,9 @@ arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 	size_t udiff = oldusize - usize;
 
 	if (config_stats) {
-		arena_stats_lock(tsdn, &arena->stats);
+		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 		arena_large_ralloc_stats_update(tsdn, arena, oldusize, usize);
-		arena_stats_unlock(tsdn, &arena->stats);
+		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
 	arena_nactive_sub(arena, udiff >> LG_PAGE);
 }
@@ -516,9 +526,9 @@ arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 	size_t udiff = usize - oldusize;
 
 	if (config_stats) {
-		arena_stats_lock(tsdn, &arena->stats);
+		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 		arena_large_ralloc_stats_update(tsdn, arena, oldusize, usize);
-		arena_stats_unlock(tsdn, &arena->stats);
+		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
 	arena_nactive_add(arena, udiff >> LG_PAGE);
 }
@@ -894,16 +904,16 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	}
 
 	if (config_stats) {
-		arena_stats_lock(tsdn, &arena->stats);
-		arena_stats_add_u64(tsdn, &arena->stats, &decay->stats->npurge,
-		    1);
-		arena_stats_add_u64(tsdn, &arena->stats,
+		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
+		    &decay->stats->npurge, 1);
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 		    &decay->stats->nmadvise, nmadvise);
-		arena_stats_add_u64(tsdn, &arena->stats, &decay->stats->purged,
-		    npurged);
-		arena_stats_sub_zu(tsdn, &arena->stats, &arena->stats.mapped,
-		    nunmapped << LG_PAGE);
-		arena_stats_unlock(tsdn, &arena->stats);
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
+		&decay->stats->purged, npurged);
+		locked_dec_zu(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
+		    &arena->stats.mapped, nunmapped << LG_PAGE);
+		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
 
 	return npurged;
diff --git a/src/ctl.c b/src/ctl.c
index 1a9b0d9f..56d30000 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -676,28 +676,19 @@ static const ctl_named_node_t super_root_node[] = {
  * synchronized by the ctl mutex.
  */
 static void
-ctl_accum_arena_stats_u64(arena_stats_u64_t *dst, arena_stats_u64_t *src) {
-#ifdef JEMALLOC_ATOMIC_U64
-	uint64_t cur_dst = atomic_load_u64(dst, ATOMIC_RELAXED);
-	uint64_t cur_src = atomic_load_u64(src, ATOMIC_RELAXED);
-	atomic_store_u64(dst, cur_dst + cur_src, ATOMIC_RELAXED);
-#else
-	*dst += *src;
-#endif
-}
-
-/* Likewise: with ctl mutex synchronization, reading is simple. */
-static uint64_t
-ctl_arena_stats_read_u64(arena_stats_u64_t *p) {
-#ifdef JEMALLOC_ATOMIC_U64
-	return atomic_load_u64(p, ATOMIC_RELAXED);
-#else
-	return *p;
-#endif
+ctl_accum_locked_u64(locked_u64_t *dst, locked_u64_t *src) {
+	locked_inc_u64_unsynchronized(dst,
+	    locked_read_u64_unsynchronized(src));
 }
 
 static void
-accum_atomic_zu(atomic_zu_t *dst, atomic_zu_t *src) {
+ctl_accum_locked_zu(locked_zu_t *dst, locked_zu_t *src) {
+	locked_inc_zu_unsynchronized(dst,
+	    locked_read_atomic_zu(src));
+}
+
+static void
+ctl_accum_atomic_zu(atomic_zu_t *dst, atomic_zu_t *src) {
 	size_t cur_dst = atomic_load_zu(dst, ATOMIC_RELAXED);
 	size_t cur_src = atomic_load_zu(src, ATOMIC_RELAXED);
 	atomic_store_zu(dst, cur_dst + cur_src, ATOMIC_RELAXED);
@@ -870,26 +861,26 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 		ctl_arena_stats_t *astats = ctl_arena->astats;
 
 		if (!destroyed) {
-			accum_atomic_zu(&sdstats->astats.mapped,
+			ctl_accum_locked_zu(&sdstats->astats.mapped,
 			    &astats->astats.mapped);
-			accum_atomic_zu(&sdstats->astats.retained,
+			ctl_accum_locked_zu(&sdstats->astats.retained,
 			    &astats->astats.retained);
-			accum_atomic_zu(&sdstats->astats.edata_avail,
+			ctl_accum_atomic_zu(&sdstats->astats.edata_avail,
 			    &astats->astats.edata_avail);
 		}
 
-		ctl_accum_arena_stats_u64(&sdstats->astats.decay_dirty.npurge,
+		ctl_accum_locked_u64(&sdstats->astats.decay_dirty.npurge,
 		    &astats->astats.decay_dirty.npurge);
-		ctl_accum_arena_stats_u64(&sdstats->astats.decay_dirty.nmadvise,
+		ctl_accum_locked_u64(&sdstats->astats.decay_dirty.nmadvise,
 		    &astats->astats.decay_dirty.nmadvise);
-		ctl_accum_arena_stats_u64(&sdstats->astats.decay_dirty.purged,
+		ctl_accum_locked_u64(&sdstats->astats.decay_dirty.purged,
 		    &astats->astats.decay_dirty.purged);
 
-		ctl_accum_arena_stats_u64(&sdstats->astats.decay_muzzy.npurge,
+		ctl_accum_locked_u64(&sdstats->astats.decay_muzzy.npurge,
 		    &astats->astats.decay_muzzy.npurge);
-		ctl_accum_arena_stats_u64(&sdstats->astats.decay_muzzy.nmadvise,
+		ctl_accum_locked_u64(&sdstats->astats.decay_muzzy.nmadvise,
 		    &astats->astats.decay_muzzy.nmadvise);
-		ctl_accum_arena_stats_u64(&sdstats->astats.decay_muzzy.purged,
+		ctl_accum_locked_u64(&sdstats->astats.decay_muzzy.purged,
 		    &astats->astats.decay_muzzy.purged);
 
 #define OP(mtx) malloc_mutex_prof_merge(				\
@@ -900,13 +891,13 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 MUTEX_PROF_ARENA_MUTEXES
 #undef OP
 		if (!destroyed) {
-			accum_atomic_zu(&sdstats->astats.base,
+			ctl_accum_atomic_zu(&sdstats->astats.base,
 			    &astats->astats.base);
-			accum_atomic_zu(&sdstats->astats.internal,
+			ctl_accum_atomic_zu(&sdstats->astats.internal,
 			    &astats->astats.internal);
-			accum_atomic_zu(&sdstats->astats.resident,
+			ctl_accum_atomic_zu(&sdstats->astats.resident,
 			    &astats->astats.resident);
-			accum_atomic_zu(&sdstats->astats.metadata_thp,
+			ctl_accum_atomic_zu(&sdstats->astats.metadata_thp,
 			    &astats->astats.metadata_thp);
 		} else {
 			assert(atomic_load_zu(
@@ -925,24 +916,25 @@ MUTEX_PROF_ARENA_MUTEXES
 		sdstats->nflushes_small += astats->nflushes_small;
 
 		if (!destroyed) {
-			accum_atomic_zu(&sdstats->astats.allocated_large,
+			ctl_accum_atomic_zu(&sdstats->astats.allocated_large,
 			    &astats->astats.allocated_large);
 		} else {
 			assert(atomic_load_zu(&astats->astats.allocated_large,
 			    ATOMIC_RELAXED) == 0);
 		}
-		ctl_accum_arena_stats_u64(&sdstats->astats.nmalloc_large,
+		ctl_accum_locked_u64(&sdstats->astats.nmalloc_large,
 		    &astats->astats.nmalloc_large);
-		ctl_accum_arena_stats_u64(&sdstats->astats.ndalloc_large,
+		ctl_accum_locked_u64(&sdstats->astats.ndalloc_large,
 		    &astats->astats.ndalloc_large);
-		ctl_accum_arena_stats_u64(&sdstats->astats.nrequests_large,
+		ctl_accum_locked_u64(&sdstats->astats.nrequests_large,
 		    &astats->astats.nrequests_large);
-		ctl_accum_arena_stats_u64(&sdstats->astats.nflushes_large,
+		ctl_accum_locked_u64(&sdstats->astats.nflushes_large,
 		    &astats->astats.nflushes_large);
-		accum_atomic_zu(&sdstats->astats.pa_shard_stats.abandoned_vm,
+		ctl_accum_atomic_zu(
+		    &sdstats->astats.pa_shard_stats.abandoned_vm,
 		    &astats->astats.pa_shard_stats.abandoned_vm);
 
-		accum_atomic_zu(&sdstats->astats.tcache_bytes,
+		ctl_accum_atomic_zu(&sdstats->astats.tcache_bytes,
 		    &astats->astats.tcache_bytes);
 
 		if (ctl_arena->arena_ind == 0) {
@@ -978,11 +970,11 @@ MUTEX_PROF_ARENA_MUTEXES
 
 		/* Merge stats for large allocations. */
 		for (i = 0; i < SC_NSIZES - SC_NBINS; i++) {
-			ctl_accum_arena_stats_u64(&sdstats->lstats[i].nmalloc,
+			ctl_accum_locked_u64(&sdstats->lstats[i].nmalloc,
 			    &astats->lstats[i].nmalloc);
-			ctl_accum_arena_stats_u64(&sdstats->lstats[i].ndalloc,
+			ctl_accum_locked_u64(&sdstats->lstats[i].ndalloc,
 			    &astats->lstats[i].ndalloc);
-			ctl_accum_arena_stats_u64(&sdstats->lstats[i].nrequests,
+			ctl_accum_locked_u64(&sdstats->lstats[i].nrequests,
 			    &astats->lstats[i].nrequests);
 			if (!destroyed) {
 				sdstats->lstats[i].curlextents +=
@@ -994,17 +986,17 @@ MUTEX_PROF_ARENA_MUTEXES
 
 		/* Merge extents stats. */
 		for (i = 0; i < SC_NPSIZES; i++) {
-			accum_atomic_zu(&sdstats->estats[i].ndirty,
+			ctl_accum_atomic_zu(&sdstats->estats[i].ndirty,
 			    &astats->estats[i].ndirty);
-			accum_atomic_zu(&sdstats->estats[i].nmuzzy,
+			ctl_accum_atomic_zu(&sdstats->estats[i].nmuzzy,
 			    &astats->estats[i].nmuzzy);
-			accum_atomic_zu(&sdstats->estats[i].nretained,
+			ctl_accum_atomic_zu(&sdstats->estats[i].nretained,
 			    &astats->estats[i].nretained);
-			accum_atomic_zu(&sdstats->estats[i].dirty_bytes,
+			ctl_accum_atomic_zu(&sdstats->estats[i].dirty_bytes,
 			    &astats->estats[i].dirty_bytes);
-			accum_atomic_zu(&sdstats->estats[i].muzzy_bytes,
+			ctl_accum_atomic_zu(&sdstats->estats[i].muzzy_bytes,
 			    &astats->estats[i].muzzy_bytes);
-			accum_atomic_zu(&sdstats->estats[i].retained_bytes,
+			ctl_accum_atomic_zu(&sdstats->estats[i].retained_bytes,
 			    &astats->estats[i].retained_bytes);
 		}
 	}
@@ -1104,10 +1096,10 @@ ctl_refresh(tsdn_t *tsdn) {
 		    &ctl_sarena->astats->astats.metadata_thp, ATOMIC_RELAXED);
 		ctl_stats->resident = atomic_load_zu(
 		    &ctl_sarena->astats->astats.resident, ATOMIC_RELAXED);
-		ctl_stats->mapped = atomic_load_zu(
-		    &ctl_sarena->astats->astats.mapped, ATOMIC_RELAXED);
-		ctl_stats->retained = atomic_load_zu(
-		    &ctl_sarena->astats->astats.retained, ATOMIC_RELAXED);
+		ctl_stats->mapped = locked_read_atomic_zu(
+		    &ctl_sarena->astats->astats.mapped);
+		ctl_stats->retained = locked_read_atomic_zu(
+		    &ctl_sarena->astats->astats.retained);
 
 		ctl_background_thread_stats_read(tsdn);
 
@@ -2916,10 +2908,10 @@ CTL_RO_GEN(stats_arenas_i_pactive, arenas_i(mib[2])->pactive, size_t)
 CTL_RO_GEN(stats_arenas_i_pdirty, arenas_i(mib[2])->pdirty, size_t)
 CTL_RO_GEN(stats_arenas_i_pmuzzy, arenas_i(mib[2])->pmuzzy, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_mapped,
-    atomic_load_zu(&arenas_i(mib[2])->astats->astats.mapped, ATOMIC_RELAXED),
+    locked_read_atomic_zu(&arenas_i(mib[2])->astats->astats.mapped),
     size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_retained,
-    atomic_load_zu(&arenas_i(mib[2])->astats->astats.retained, ATOMIC_RELAXED),
+    locked_read_atomic_zu(&arenas_i(mib[2])->astats->astats.retained),
     size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_extent_avail,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.edata_avail,
@@ -2927,23 +2919,23 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_extent_avail,
     size_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_npurge,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->astats.decay_dirty.npurge), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_nmadvise,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->astats.decay_dirty.nmadvise), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_purged,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->astats.decay_dirty.purged), uint64_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_npurge,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->astats.decay_muzzy.npurge), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_nmadvise,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->astats.decay_muzzy.nmadvise), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_purged,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->astats.decay_muzzy.purged), uint64_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_base,
@@ -2982,23 +2974,23 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_large_allocated,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.allocated_large,
     ATOMIC_RELAXED), size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_nmalloc,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->astats.nmalloc_large), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_ndalloc,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->astats.ndalloc_large), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_nrequests,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->astats.nrequests_large), uint64_t)
 /*
  * Note: "nmalloc_large" here instead of "nfills" in the read.  This is
  * intentional (large has no batch fill).
  */
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_nfills,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->astats.nmalloc_large), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_nflushes,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->astats.nflushes_large), uint64_t)
 
 /* Lock profiling related APIs below. */
@@ -3124,13 +3116,13 @@ stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib,
 }
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_nmalloc,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->lstats[mib[4]].nmalloc), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_ndalloc,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->lstats[mib[4]].ndalloc), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_nrequests,
-    ctl_arena_stats_read_u64(
+    locked_read_u64_unsynchronized(
     &arenas_i(mib[2])->astats->lstats[mib[4]].nrequests), uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_curlextents,
     arenas_i(mib[2])->astats->lstats[mib[4]].curlextents, size_t)
diff --git a/src/extent.c b/src/extent.c
index 7c00525a..a023d3e2 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -199,7 +199,8 @@ extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool growing_retained) {
 	size_t sz = edata_size_get(edata);
 	if (config_stats) {
-		arena_stats_accum_zu(&arena->pa_shard.stats->abandoned_vm, sz);
+		atomic_fetch_add_zu(&arena->pa_shard.stats->abandoned_vm, sz,
+		    ATOMIC_RELAXED);
 	}
 	/*
 	 * Leak extent after making sure its pages have already been purged, so

From 1ad368c8b7443881f40bc84cba87259f1892a8ce Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 9 Mar 2020 10:40:37 -0700
Subject: [PATCH 1626/2608] PA: Move in decay stats.

---
 include/jemalloc/internal/arena_inlines_b.h |  5 ++-
 include/jemalloc/internal/arena_stats.h     | 13 ------
 include/jemalloc/internal/arena_structs.h   |  2 +-
 include/jemalloc/internal/pa.h              | 13 ++++++
 src/arena.c                                 | 41 +++++++++++-------
 src/ctl.c                                   | 48 +++++++++++++--------
 6 files changed, 72 insertions(+), 50 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 5b33769f..eac4a631 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -150,9 +150,10 @@ arena_decay_extent(tsdn_t *tsdn,arena_t *arena, ehooks_t *ehooks,
 		/* Update stats accordingly. */
 		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-		    &arena->decay_dirty.stats->nmadvise, 1);
+		    &arena->pa_shard.stats->decay_dirty.nmadvise, 1);
 		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-		    &arena->decay_dirty.stats->purged, extent_size >> LG_PAGE);
+		    &arena->pa_shard.stats->decay_dirty.purged,
+		    extent_size >> LG_PAGE);
 		locked_dec_zu(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 		    &arena->stats.mapped, extent_size);
 		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 0a1ec734..82996b8b 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -37,16 +37,6 @@ struct arena_stats_large_s {
 	size_t		curlextents; /* Derived. */
 };
 
-typedef struct arena_stats_decay_s arena_stats_decay_t;
-struct arena_stats_decay_s {
-	/* Total number of purge sweeps. */
-	locked_u64_t	npurge;
-	/* Total number of madvise calls made. */
-	locked_u64_t	nmadvise;
-	/* Total number of pages purged. */
-	locked_u64_t	purged;
-};
-
 typedef struct arena_stats_extents_s arena_stats_extents_t;
 struct arena_stats_extents_s {
 	/*
@@ -87,9 +77,6 @@ struct arena_stats_s {
 	/* Number of edata_t structs allocated by base, but not being used. */
 	atomic_zu_t		edata_avail;
 
-	arena_stats_decay_t	decay_dirty;
-	arena_stats_decay_t	decay_muzzy;
-
 	atomic_zu_t		base; /* Derived. */
 	atomic_zu_t		internal;
 	atomic_zu_t		resident; /* Derived. */
diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index ed16337b..7d312343 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -73,7 +73,7 @@ struct arena_decay_s {
 	 * arena and ctl code.
 	 *
 	 * Synchronization: Same as associated arena's stats field. */
-	arena_stats_decay_t	*stats;
+	pa_shard_decay_stats_t	*stats;
 	/* Peak number of pages in associated extents.  Used for debug only. */
 	uint64_t		ceil_npages;
 };
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 3b1a7659..1b9e58c4 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -3,14 +3,27 @@
 
 #include "jemalloc/internal/ecache.h"
 #include "jemalloc/internal/edata_cache.h"
+#include "jemalloc/internal/lockedint.h"
 
 /*
  * The page allocator; responsible for acquiring pages of memory for
  * allocations.
  */
 
+typedef struct pa_shard_decay_stats_s pa_shard_decay_stats_t;
+struct pa_shard_decay_stats_s {
+	/* Total number of purge sweeps. */
+	locked_u64_t npurge;
+	/* Total number of madvise calls made. */
+	locked_u64_t nmadvise;
+	/* Total number of pages purged. */
+	locked_u64_t purged;
+};
+
 typedef struct pa_shard_stats_s pa_shard_stats_t;
 struct pa_shard_stats_s {
+	pa_shard_decay_stats_t decay_dirty;
+	pa_shard_decay_stats_t decay_muzzy;
 	/* VM space had to be leaked (undocumented).  Normally 0. */
 	atomic_zu_t abandoned_vm;
 };
diff --git a/src/arena.c b/src/arena.c
index d4e200cf..a29dc937 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -105,25 +105,33 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	    atomic_load_zu(&arena->pa_shard.edata_cache.count, ATOMIC_RELAXED),
 	    ATOMIC_RELAXED);
 
-	locked_inc_u64_unsynchronized(&astats->decay_dirty.npurge,
+	/* Dirty pa_shard_decay_stats_t */
+	locked_inc_u64_unsynchronized(
+	    &astats->pa_shard_stats.decay_dirty.npurge,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-	    &arena->stats.decay_dirty.npurge));
-	locked_inc_u64_unsynchronized(&astats->decay_dirty.nmadvise,
+	    &arena->pa_shard.stats->decay_dirty.npurge));
+	locked_inc_u64_unsynchronized(
+	    &astats->pa_shard_stats.decay_dirty.nmadvise,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-	    &arena->stats.decay_dirty.nmadvise));
-	locked_inc_u64_unsynchronized(&astats->decay_dirty.purged,
+	    &arena->pa_shard.stats->decay_dirty.nmadvise));
+	locked_inc_u64_unsynchronized(
+	    &astats->pa_shard_stats.decay_dirty.purged,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-	    &arena->stats.decay_dirty.purged));
+	    &arena->pa_shard.stats->decay_dirty.purged));
 
-	locked_inc_u64_unsynchronized(&astats->decay_muzzy.npurge,
+	/* Muzzy pa_shard_decay_stats_t */
+	locked_inc_u64_unsynchronized(
+	    &astats->pa_shard_stats.decay_muzzy.npurge,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-	    &arena->stats.decay_muzzy.npurge));
-	locked_inc_u64_unsynchronized(&astats->decay_muzzy.nmadvise,
+	    &arena->pa_shard.stats->decay_muzzy.npurge));
+	locked_inc_u64_unsynchronized(
+	    &astats->pa_shard_stats.decay_muzzy.nmadvise,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-	    &arena->stats.decay_muzzy.nmadvise));
-	locked_inc_u64_unsynchronized(&astats->decay_muzzy.purged,
+	    &arena->pa_shard.stats->decay_muzzy.nmadvise));
+	locked_inc_u64_unsynchronized(
+	    &astats->pa_shard_stats.decay_muzzy.purged,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-	    &arena->stats.decay_muzzy.purged));
+	    &arena->pa_shard.stats->decay_muzzy.purged));
 
 	atomic_load_add_store_zu(&astats->base, base_allocated);
 	atomic_load_add_store_zu(&astats->internal, arena_internal_get(arena));
@@ -695,7 +703,7 @@ arena_decay_reinit(arena_decay_t *decay, ssize_t decay_ms) {
 
 static bool
 arena_decay_init(arena_decay_t *decay, ssize_t decay_ms,
-    arena_stats_decay_t *stats) {
+    pa_shard_decay_stats_t *stats) {
 	if (config_debug) {
 		for (size_t i = 0; i < sizeof(arena_decay_t); i++) {
 			assert(((char *)decay)[i] == 0);
@@ -708,7 +716,6 @@ arena_decay_init(arena_decay_t *decay, ssize_t decay_ms,
 	}
 	decay->purging = false;
 	arena_decay_reinit(decay, decay_ms);
-	/* Memory is zeroed, so there is no need to clear stats. */
 	if (config_stats) {
 		decay->stats = stats;
 	}
@@ -2044,11 +2051,13 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	}
 
 	if (arena_decay_init(&arena->decay_dirty,
-	    arena_dirty_decay_ms_default_get(), &arena->stats.decay_dirty)) {
+	    arena_dirty_decay_ms_default_get(),
+	    &arena->pa_shard.stats->decay_dirty)) {
 		goto label_error;
 	}
 	if (arena_decay_init(&arena->decay_muzzy,
-	    arena_muzzy_decay_ms_default_get(), &arena->stats.decay_muzzy)) {
+	    arena_muzzy_decay_ms_default_get(),
+	    &arena->pa_shard.stats->decay_muzzy)) {
 		goto label_error;
 	}
 
diff --git a/src/ctl.c b/src/ctl.c
index 56d30000..26d86da0 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -869,19 +869,25 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 			    &astats->astats.edata_avail);
 		}
 
-		ctl_accum_locked_u64(&sdstats->astats.decay_dirty.npurge,
-		    &astats->astats.decay_dirty.npurge);
-		ctl_accum_locked_u64(&sdstats->astats.decay_dirty.nmadvise,
-		    &astats->astats.decay_dirty.nmadvise);
-		ctl_accum_locked_u64(&sdstats->astats.decay_dirty.purged,
-		    &astats->astats.decay_dirty.purged);
+		ctl_accum_locked_u64(
+		    &sdstats->astats.pa_shard_stats.decay_dirty.npurge,
+		    &astats->astats.pa_shard_stats.decay_dirty.npurge);
+		ctl_accum_locked_u64(
+		    &sdstats->astats.pa_shard_stats.decay_dirty.nmadvise,
+		    &astats->astats.pa_shard_stats.decay_dirty.nmadvise);
+		ctl_accum_locked_u64(
+		    &sdstats->astats.pa_shard_stats.decay_dirty.purged,
+		    &astats->astats.pa_shard_stats.decay_dirty.purged);
 
-		ctl_accum_locked_u64(&sdstats->astats.decay_muzzy.npurge,
-		    &astats->astats.decay_muzzy.npurge);
-		ctl_accum_locked_u64(&sdstats->astats.decay_muzzy.nmadvise,
-		    &astats->astats.decay_muzzy.nmadvise);
-		ctl_accum_locked_u64(&sdstats->astats.decay_muzzy.purged,
-		    &astats->astats.decay_muzzy.purged);
+		ctl_accum_locked_u64(
+		    &sdstats->astats.pa_shard_stats.decay_muzzy.npurge,
+		    &astats->astats.pa_shard_stats.decay_muzzy.npurge);
+		ctl_accum_locked_u64(
+		    &sdstats->astats.pa_shard_stats.decay_muzzy.nmadvise,
+		    &astats->astats.pa_shard_stats.decay_muzzy.nmadvise);
+		ctl_accum_locked_u64(
+		    &sdstats->astats.pa_shard_stats.decay_muzzy.purged,
+		    &astats->astats.pa_shard_stats.decay_muzzy.purged);
 
 #define OP(mtx) malloc_mutex_prof_merge(				\
 		    &(sdstats->astats.mutex_prof_data[			\
@@ -2920,23 +2926,29 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_extent_avail,
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_npurge,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.decay_dirty.npurge), uint64_t)
+    &arenas_i(mib[2])->astats->astats.pa_shard_stats.decay_dirty.npurge),
+    uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_nmadvise,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.decay_dirty.nmadvise), uint64_t)
+    &arenas_i(mib[2])->astats->astats.pa_shard_stats.decay_dirty.nmadvise),
+    uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_purged,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.decay_dirty.purged), uint64_t)
+    &arenas_i(mib[2])->astats->astats.pa_shard_stats.decay_dirty.purged),
+    uint64_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_npurge,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.decay_muzzy.npurge), uint64_t)
+    &arenas_i(mib[2])->astats->astats.pa_shard_stats.decay_muzzy.npurge),
+    uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_nmadvise,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.decay_muzzy.nmadvise), uint64_t)
+    &arenas_i(mib[2])->astats->astats.pa_shard_stats.decay_muzzy.nmadvise),
+    uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_purged,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.decay_muzzy.purged), uint64_t)
+    &arenas_i(mib[2])->astats->astats.pa_shard_stats.decay_muzzy.purged),
+    uint64_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_base,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.base, ATOMIC_RELAXED),

From 1ada4aef84246d3fc494d8064ee14d5ae62ec569 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 9 Mar 2020 10:52:26 -0700
Subject: [PATCH 1627/2608] PA: Get rid of arena_ind_get calls.

This is another step on the path towards breaking the extent reliance on the
arena module.
---
 src/extent.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index a023d3e2..51062642 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -390,7 +390,7 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			 */
 			edata_t *unlock_edata = edata;
 			assert(edata_base_get(edata) == new_addr);
-			if (edata_arena_ind_get(edata) != arena_ind_get(arena)
+			if (edata_arena_ind_get(edata) != ecache_ind_get(ecache)
 			    || edata_size_get(edata) < size
 			    || edata_state_get(edata)
 			    != ecache->state) {
@@ -661,9 +661,9 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		goto label_err;
 	}
 
-	edata_init(edata, arena_ind_get(arena), ptr, alloc_size, false,
-	    SC_NSIZES, arena_extent_sn_next(arena), extent_state_active, zeroed,
-	    committed, true, EXTENT_IS_HEAD);
+	edata_init(edata, ecache_ind_get(&arena->pa_shard.ecache_retained), ptr,
+	    alloc_size, false, SC_NSIZES, arena_extent_sn_next(arena),
+	    extent_state_active, zeroed, committed, true, EXTENT_IS_HEAD);
 
 	if (extent_register_no_gdump_add(tsdn, edata)) {
 		edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
@@ -815,9 +815,9 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
 		return NULL;
 	}
-	edata_init(edata, arena_ind_get(arena), addr, size, slab, szind,
-	    arena_extent_sn_next(arena), extent_state_active, *zero, *commit,
-	    true, EXTENT_NOT_HEAD);
+	edata_init(edata, ecache_ind_get(&arena->pa_shard.ecache_dirty), addr,
+	    size, slab, szind, arena_extent_sn_next(arena), extent_state_active,
+	    *zero, *commit, true, EXTENT_NOT_HEAD);
 	if (extent_register(tsdn, edata)) {
 		edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
 		return NULL;

From ce8c0d6c09e744f52f2ce01b93c77d9acf0cf1a8 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 9 Mar 2020 11:10:43 -0700
Subject: [PATCH 1628/2608] PA: Move in arena extent_sn counter.

Just another step towards making PA self-contained.
---
 include/jemalloc/internal/arena_externs.h | 1 -
 include/jemalloc/internal/arena_structs.h | 7 -------
 include/jemalloc/internal/pa.h            | 4 ++++
 src/arena.c                               | 7 -------
 src/extent.c                              | 9 +++++----
 src/extent_dss.c                          | 6 +++---
 src/pa.c                                  | 7 +++++++
 7 files changed, 19 insertions(+), 22 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 4ef8d8e8..8548b1f0 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -81,7 +81,6 @@ bool arena_retain_grow_limit_get_set(tsd_t *tsd, arena_t *arena,
 unsigned arena_nthreads_get(arena_t *arena, bool internal);
 void arena_nthreads_inc(arena_t *arena, bool internal);
 void arena_nthreads_dec(arena_t *arena, bool internal);
-size_t arena_extent_sn_next(arena_t *arena);
 arena_t *arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
 bool arena_init_huge(void);
 bool arena_is_huge(unsigned arena_ind);
diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index 7d312343..ca11af71 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -121,13 +121,6 @@ struct arena_s {
 	/* Synchronization: internal. */
 	counter_accum_t		prof_accum;
 
-	/*
-	 * Extent serial number generator state.
-	 *
-	 * Synchronization: atomic.
-	 */
-	atomic_zu_t		extent_sn_next;
-
 	/*
 	 * Represents a dss_prec_t, but atomically.
 	 *
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 1b9e58c4..29c6b211 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -46,11 +46,15 @@ struct pa_shard_s {
 	/* The grow info for the retained ecache. */
 	ecache_grow_t ecache_grow;
 
+	/* Extent serial number generator state. */
+	atomic_zu_t extent_sn_next;
+
 	pa_shard_stats_t *stats;
 };
 
 /* Returns true on error. */
 bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
     pa_shard_stats_t *stats);
+size_t pa_shard_extent_sn_next(pa_shard_t *shard);
 
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/src/arena.c b/src/arena.c
index a29dc937..8f306604 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1979,11 +1979,6 @@ arena_nthreads_dec(arena_t *arena, bool internal) {
 	atomic_fetch_sub_u(&arena->nthreads[internal], 1, ATOMIC_RELAXED);
 }
 
-size_t
-arena_extent_sn_next(arena_t *arena) {
-	return atomic_fetch_add_zu(&arena->extent_sn_next, 1, ATOMIC_RELAXED);
-}
-
 arena_t *
 arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	arena_t *arena;
@@ -2032,8 +2027,6 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		}
 	}
 
-	atomic_store_zu(&arena->extent_sn_next, 0, ATOMIC_RELAXED);
-
 	atomic_store_u(&arena->dss_prec, (unsigned)extent_dss_prec_get(),
 	    ATOMIC_RELAXED);
 
diff --git a/src/extent.c b/src/extent.c
index 51062642..918738d6 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -662,8 +662,9 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	}
 
 	edata_init(edata, ecache_ind_get(&arena->pa_shard.ecache_retained), ptr,
-	    alloc_size, false, SC_NSIZES, arena_extent_sn_next(arena),
-	    extent_state_active, zeroed, committed, true, EXTENT_IS_HEAD);
+	    alloc_size, false, SC_NSIZES,
+	    pa_shard_extent_sn_next(&arena->pa_shard), extent_state_active,
+	    zeroed, committed, true, EXTENT_IS_HEAD);
 
 	if (extent_register_no_gdump_add(tsdn, edata)) {
 		edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
@@ -816,8 +817,8 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		return NULL;
 	}
 	edata_init(edata, ecache_ind_get(&arena->pa_shard.ecache_dirty), addr,
-	    size, slab, szind, arena_extent_sn_next(arena), extent_state_active,
-	    *zero, *commit, true, EXTENT_NOT_HEAD);
+	    size, slab, szind, pa_shard_extent_sn_next(&arena->pa_shard),
+	    extent_state_active, *zero, *commit, true, EXTENT_NOT_HEAD);
 	if (extent_register(tsdn, edata)) {
 		edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
 		return NULL;
diff --git a/src/extent_dss.c b/src/extent_dss.c
index d125c439..7746a208 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -155,9 +155,9 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			if (gap_size_page != 0) {
 				edata_init(gap, arena_ind_get(arena),
 				    gap_addr_page, gap_size_page, false,
-				    SC_NSIZES, arena_extent_sn_next(arena),
-				    extent_state_active, false, true, true,
-				    EXTENT_NOT_HEAD);
+				    SC_NSIZES, pa_shard_extent_sn_next(
+					&arena->pa_shard), extent_state_active,
+				    false, true, true, EXTENT_NOT_HEAD);
 			}
 			/*
 			 * Compute the address just past the end of the desired
diff --git a/src/pa.c b/src/pa.c
index 5063d48f..35d3335f 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -42,8 +42,15 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
 		return true;
 	}
 
+	atomic_store_zu(&shard->extent_sn_next, 0, ATOMIC_RELAXED);
+
 	shard->stats = stats;
 	memset(shard->stats, 0, sizeof(*shard->stats));
 
 	return false;
 }
+
+size_t
+pa_shard_extent_sn_next(pa_shard_t *shard) {
+	return atomic_fetch_add_zu(&shard->extent_sn_next, 1, ATOMIC_RELAXED);
+}

From 6ca918d0cfe54587376282ec85edf153c2ea0d5b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 9 Mar 2020 11:26:15 -0700
Subject: [PATCH 1629/2608] PA: Add a stats comment.

---
 include/jemalloc/internal/pa.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 29c6b211..a7c57896 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -10,6 +10,17 @@
  * allocations.
  */
 
+/*
+ * The stats for a particular pa_shard.  Because of the way the ctl module
+ * handles stats epoch data collection (it has its own arena_stats, and merges
+ * the stats from each arena into it), this needs to live in the arena_stats_t;
+ * hence we define it here and let the pa_shard have a pointer (rather than the
+ * more natural approach of just embedding it in the pa_shard itself).
+ *
+ * We follow the arena_stats_t approach of marking the derived fields.  These
+ * are the ones that are not maintained on their own; instead, their values are
+ * derived during those stats merges.
+ */
 typedef struct pa_shard_decay_stats_s pa_shard_decay_stats_t;
 struct pa_shard_decay_stats_s {
 	/* Total number of purge sweeps. */

From 70d12ffa055518326573c985cbc86a32a1f2de1d Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 9 Mar 2020 12:06:19 -0700
Subject: [PATCH 1630/2608] PA: Move mapped into pa stats.

---
 include/jemalloc/internal/arena_inlines_b.h | 15 +++++++++------
 include/jemalloc/internal/arena_stats.h     | 14 --------------
 include/jemalloc/internal/pa.h              | 19 ++++++++++++++++++-
 src/arena.c                                 | 15 ++++++++-------
 src/ctl.c                                   | 11 ++++++-----
 src/large.c                                 |  2 +-
 src/pa.c                                    |  3 ++-
 7 files changed, 44 insertions(+), 35 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index eac4a631..fd641754 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -148,15 +148,18 @@ arena_decay_extent(tsdn_t *tsdn,arena_t *arena, ehooks_t *ehooks,
 	extent_dalloc_wrapper(tsdn, arena, ehooks, edata);
 	if (config_stats) {
 		/* Update stats accordingly. */
-		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
-		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
+		LOCKEDINT_MTX_LOCK(tsdn, *arena->pa_shard.stats_mtx);
+		locked_inc_u64(tsdn,
+		    LOCKEDINT_MTX(*arena->pa_shard.stats_mtx),
 		    &arena->pa_shard.stats->decay_dirty.nmadvise, 1);
-		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
+		locked_inc_u64(tsdn,
+		    LOCKEDINT_MTX(*arena->pa_shard.stats_mtx),
 		    &arena->pa_shard.stats->decay_dirty.purged,
 		    extent_size >> LG_PAGE);
-		locked_dec_zu(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-		    &arena->stats.mapped, extent_size);
-		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
+		locked_dec_zu(tsdn,
+		    LOCKEDINT_MTX(*arena->pa_shard.stats_mtx),
+		    &arena->pa_shard.stats->mapped, extent_size);
+		LOCKEDINT_MTX_UNLOCK(tsdn, *arena->pa_shard.stats_mtx);
 	}
 }
 
diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 82996b8b..129a8fef 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -61,12 +61,6 @@ struct arena_stats_extents_s {
 typedef struct arena_stats_s arena_stats_t;
 struct arena_stats_s {
 	LOCKEDINT_MTX_DECLARE(mtx)
-
-	/*
-	 * Number of bytes currently mapped, excluding retained memory.
-	 */
-	locked_zu_t		mapped; /* Partially derived. */
-
 	/*
 	 * Number of unused virtual memory bytes currently retained.  Retained
 	 * bytes are technically mapped (though always decommitted or purged),
@@ -135,12 +129,4 @@ arena_stats_large_flush_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
 	LOCKEDINT_MTX_UNLOCK(tsdn, arena_stats->mtx);
 }
 
-static inline void
-arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats, size_t size) {
-	LOCKEDINT_MTX_LOCK(tsdn, arena_stats->mtx);
-	locked_inc_zu(tsdn, LOCKEDINT_MTX(arena_stats->mtx),
-	    &arena_stats->mapped, size);
-	LOCKEDINT_MTX_UNLOCK(tsdn, arena_stats->mtx);
-}
-
 #endif /* JEMALLOC_INTERNAL_ARENA_STATS_H */
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index a7c57896..61b6f42c 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -35,6 +35,14 @@ typedef struct pa_shard_stats_s pa_shard_stats_t;
 struct pa_shard_stats_s {
 	pa_shard_decay_stats_t decay_dirty;
 	pa_shard_decay_stats_t decay_muzzy;
+	/*
+	 * Number of bytes currently mapped, excluding retained memory.
+	 *
+	 * Partially derived -- we maintain our own counter, but add in the
+	 * base's own counter at merge.
+	 */
+	locked_zu_t mapped;
+
 	/* VM space had to be leaked (undocumented).  Normally 0. */
 	atomic_zu_t abandoned_vm;
 };
@@ -60,12 +68,21 @@ struct pa_shard_s {
 	/* Extent serial number generator state. */
 	atomic_zu_t extent_sn_next;
 
+	malloc_mutex_t *stats_mtx;
 	pa_shard_stats_t *stats;
 };
 
+static inline void
+pa_shard_stats_mapped_add(tsdn_t *tsdn, pa_shard_t *shard, size_t size) {
+	LOCKEDINT_MTX_LOCK(tsdn, *shard->stats_mtx);
+	locked_inc_zu(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
+	    &shard->stats->mapped, size);
+	LOCKEDINT_MTX_UNLOCK(tsdn, *shard->stats_mtx);
+}
+
 /* Returns true on error. */
 bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
-    pa_shard_stats_t *stats);
+    pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx);
 size_t pa_shard_extent_sn_next(pa_shard_t *shard);
 
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/src/arena.c b/src/arena.c
index 8f306604..2f626fed 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -95,9 +95,10 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 
 	LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 
-	locked_inc_zu_unsynchronized(&astats->mapped, base_mapped
-	    + locked_read_zu(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-	    &arena->stats.mapped));
+	locked_inc_zu_unsynchronized(&astats->pa_shard_stats.mapped,
+	    base_mapped + locked_read_zu(tsdn,
+	    LOCKEDINT_MTX(*arena->pa_shard.stats_mtx),
+	    &arena->pa_shard.stats->mapped));
 	locked_inc_zu_unsynchronized(&astats->retained,
 	    ecache_npages_get(&arena->pa_shard.ecache_retained) << LG_PAGE);
 
@@ -488,7 +489,7 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 			if (mapped_add != 0) {
 				locked_inc_zu(tsdn,
 				    LOCKEDINT_MTX(arena->stats.mtx),
-				    &arena->stats.mapped, mapped_add);
+				    &arena->pa_shard.stats->mapped, mapped_add);
 			}
 			LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 		}
@@ -919,7 +920,7 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 		&decay->stats->purged, npurged);
 		locked_dec_zu(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-		    &arena->stats.mapped, nunmapped << LG_PAGE);
+		    &arena->pa_shard.stats->mapped, nunmapped << LG_PAGE);
 		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
 
@@ -1240,7 +1241,7 @@ arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    true, szind, &zero);
 
 	if (config_stats && slab != NULL) {
-		arena_stats_mapped_add(tsdn, &arena->stats,
+		pa_shard_stats_mapped_add(tsdn, &arena->pa_shard,
 		    bin_info->slab_size);
 	}
 
@@ -2039,7 +2040,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	}
 
 	if (pa_shard_init(tsdn, &arena->pa_shard, base, ind,
-	    &arena->stats.pa_shard_stats)) {
+	    &arena->stats.pa_shard_stats, LOCKEDINT_MTX(arena->stats.mtx))) {
 		goto label_error;
 	}
 
diff --git a/src/ctl.c b/src/ctl.c
index 26d86da0..122856c0 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -861,8 +861,9 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 		ctl_arena_stats_t *astats = ctl_arena->astats;
 
 		if (!destroyed) {
-			ctl_accum_locked_zu(&sdstats->astats.mapped,
-			    &astats->astats.mapped);
+			ctl_accum_locked_zu(
+			    &sdstats->astats.pa_shard_stats.mapped,
+			    &astats->astats.pa_shard_stats.mapped);
 			ctl_accum_locked_zu(&sdstats->astats.retained,
 			    &astats->astats.retained);
 			ctl_accum_atomic_zu(&sdstats->astats.edata_avail,
@@ -1103,7 +1104,7 @@ ctl_refresh(tsdn_t *tsdn) {
 		ctl_stats->resident = atomic_load_zu(
 		    &ctl_sarena->astats->astats.resident, ATOMIC_RELAXED);
 		ctl_stats->mapped = locked_read_atomic_zu(
-		    &ctl_sarena->astats->astats.mapped);
+		    &ctl_sarena->astats->astats.pa_shard_stats.mapped);
 		ctl_stats->retained = locked_read_atomic_zu(
 		    &ctl_sarena->astats->astats.retained);
 
@@ -2914,8 +2915,8 @@ CTL_RO_GEN(stats_arenas_i_pactive, arenas_i(mib[2])->pactive, size_t)
 CTL_RO_GEN(stats_arenas_i_pdirty, arenas_i(mib[2])->pdirty, size_t)
 CTL_RO_GEN(stats_arenas_i_pmuzzy, arenas_i(mib[2])->pmuzzy, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_mapped,
-    locked_read_atomic_zu(&arenas_i(mib[2])->astats->astats.mapped),
-    size_t)
+    locked_read_atomic_zu(&arenas_i(
+    mib[2])->astats->astats.pa_shard_stats.mapped), size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_retained,
     locked_read_atomic_zu(&arenas_i(mib[2])->astats->astats.retained),
     size_t)
diff --git a/src/large.c b/src/large.c
index fa03a50e..57bf6748 100644
--- a/src/large.c
+++ b/src/large.c
@@ -151,7 +151,7 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 	emap_remap(tsdn, &emap_global, edata, szind, false);
 
 	if (config_stats && new_mapping) {
-		arena_stats_mapped_add(tsdn, &arena->stats, trailsize);
+		pa_shard_stats_mapped_add(tsdn, &arena->pa_shard, trailsize);
 	}
 
 	if (zero) {
diff --git a/src/pa.c b/src/pa.c
index 35d3335f..e4dbb040 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -3,7 +3,7 @@
 
 bool
 pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
-    pa_shard_stats_t *stats) {
+    pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx) {
 	/* This will change eventually, but for now it should hold. */
 	assert(base_ind_get(base) == ind);
 	/*
@@ -44,6 +44,7 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
 
 	atomic_store_zu(&shard->extent_sn_next, 0, ATOMIC_RELAXED);
 
+	shard->stats_mtx = stats_mtx;
 	shard->stats = stats;
 	memset(shard->stats, 0, sizeof(*shard->stats));
 

From 22a0a7b93a192a07e9a3e5ba9f5adfa64036219e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 9 Mar 2020 12:14:51 -0700
Subject: [PATCH 1631/2608] Move arena_decay_extent to extent module.

---
 include/jemalloc/internal/arena_inlines_b.h | 23 ------------------
 src/extent.c                                | 26 ++++++++++++++++++++-
 2 files changed, 25 insertions(+), 24 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index fd641754..50223ba7 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -140,29 +140,6 @@ arena_decay_tick(tsdn_t *tsdn, arena_t *arena) {
 	arena_decay_ticks(tsdn, arena, 1);
 }
 
-/* Purge a single extent to retained / unmapped directly. */
-JEMALLOC_ALWAYS_INLINE void
-arena_decay_extent(tsdn_t *tsdn,arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata) {
-	size_t extent_size = edata_size_get(edata);
-	extent_dalloc_wrapper(tsdn, arena, ehooks, edata);
-	if (config_stats) {
-		/* Update stats accordingly. */
-		LOCKEDINT_MTX_LOCK(tsdn, *arena->pa_shard.stats_mtx);
-		locked_inc_u64(tsdn,
-		    LOCKEDINT_MTX(*arena->pa_shard.stats_mtx),
-		    &arena->pa_shard.stats->decay_dirty.nmadvise, 1);
-		locked_inc_u64(tsdn,
-		    LOCKEDINT_MTX(*arena->pa_shard.stats_mtx),
-		    &arena->pa_shard.stats->decay_dirty.purged,
-		    extent_size >> LG_PAGE);
-		locked_dec_zu(tsdn,
-		    LOCKEDINT_MTX(*arena->pa_shard.stats_mtx),
-		    &arena->pa_shard.stats->mapped, extent_size);
-		LOCKEDINT_MTX_UNLOCK(tsdn, *arena->pa_shard.stats_mtx);
-	}
-}
-
 JEMALLOC_ALWAYS_INLINE void *
 arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
     tcache_t *tcache, bool slow_path) {
diff --git a/src/extent.c b/src/extent.c
index 918738d6..8411e8aa 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -954,6 +954,30 @@ extent_try_coalesce_large(tsdn_t *tsdn, edata_cache_t *edata_cache,
 	    edata, coalesced, growing_retained, true);
 }
 
+/* Purge a single extent to retained / unmapped directly. */
+static void
+extent_maximally_purge(tsdn_t *tsdn,arena_t *arena, ehooks_t *ehooks,
+    edata_t *edata) {
+	size_t extent_size = edata_size_get(edata);
+	extent_dalloc_wrapper(tsdn, arena, ehooks, edata);
+	if (config_stats) {
+		/* Update stats accordingly. */
+		LOCKEDINT_MTX_LOCK(tsdn, *arena->pa_shard.stats_mtx);
+		locked_inc_u64(tsdn,
+		    LOCKEDINT_MTX(*arena->pa_shard.stats_mtx),
+		    &arena->pa_shard.stats->decay_dirty.nmadvise, 1);
+		locked_inc_u64(tsdn,
+		    LOCKEDINT_MTX(*arena->pa_shard.stats_mtx),
+		    &arena->pa_shard.stats->decay_dirty.purged,
+		    extent_size >> LG_PAGE);
+		locked_dec_zu(tsdn,
+		    LOCKEDINT_MTX(*arena->pa_shard.stats_mtx),
+		    &arena->pa_shard.stats->mapped, extent_size);
+		LOCKEDINT_MTX_UNLOCK(tsdn, *arena->pa_shard.stats_mtx);
+	}
+}
+
+
 /*
  * Does the metadata management portions of putting an unused extent into the
  * given ecache_t (coalesces, deregisters slab interiors, the heap operations).
@@ -992,7 +1016,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 		    arena_may_force_decay(arena)) {
 			/* Shortcut to purge the oversize extent eagerly. */
 			malloc_mutex_unlock(tsdn, &ecache->mtx);
-			arena_decay_extent(tsdn, arena, ehooks, edata);
+			extent_maximally_purge(tsdn, arena, ehooks, edata);
 			return;
 		}
 	}

From 3192d6b77dae3b4aa36b95eea793fcdea6f5ffbd Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 9 Mar 2020 12:20:06 -0700
Subject: [PATCH 1632/2608] Extents: Have extent_dalloc_gap take ehooks.

We're almost to the point where the extent code doesn't know about arenas at
all.  In that world, we shouldn't pull them out of the arena.
---
 include/jemalloc/internal/extent.h | 3 ++-
 src/extent.c                       | 5 ++---
 src/extent_dss.c                   | 5 ++++-
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index e615fb6e..bb01254c 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -33,7 +33,8 @@ edata_t *ecache_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 edata_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t alignment, bool slab, szind_t szind,
     bool *zero, bool *commit);
-void extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, edata_t *edata);
+void extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    edata_t *edata);
 void extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     edata_t *edata);
 void extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
diff --git a/src/extent.c b/src/extent.c
index 8411e8aa..0162494e 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1026,9 +1026,8 @@ extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 }
 
 void
-extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
-	ehooks_t *ehooks = arena_get_ehooks(arena);
-
+extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+    edata_t *edata) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 7746a208..55f037ef 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -186,7 +186,10 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 				extent_dss_extending_finish();
 
 				if (gap_size_page != 0) {
-					extent_dalloc_gap(tsdn, arena, gap);
+					ehooks_t *ehooks = arena_get_ehooks(
+					    arena);
+					extent_dalloc_gap(tsdn, arena, ehooks,
+					    gap);
 				} else {
 					edata_cache_put(tsdn,
 					    &arena->pa_shard.edata_cache, gap);

From 497836dbc8bd5badb0726a36fb5ce12779b15c6b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 9 Mar 2020 13:19:09 -0700
Subject: [PATCH 1633/2608] Arena stats: mark edata_avail as derived.

The true number is in the edata_cache itself.
---
 include/jemalloc/internal/arena_stats.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 129a8fef..310b907b 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -69,7 +69,7 @@ struct arena_stats_s {
 	locked_zu_t		retained; /* Derived. */
 
 	/* Number of edata_t structs allocated by base, but not being used. */
-	atomic_zu_t		edata_avail;
+	atomic_zu_t		edata_avail; /* Derived. */
 
 	atomic_zu_t		base; /* Derived. */
 	atomic_zu_t		internal;

From 7b6288547637124088ef208fe667037b70bd3e01 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 9 Mar 2020 13:11:35 -0700
Subject: [PATCH 1634/2608] Introduce decay module and put decay objects in PA

---
 include/jemalloc/internal/arena_inlines_b.h   |   4 +-
 include/jemalloc/internal/arena_structs.h     |  70 --------
 .../internal/background_thread_externs.h      |   2 +-
 .../internal/background_thread_inlines.h      |   2 +-
 include/jemalloc/internal/decay.h             |  66 ++++++++
 include/jemalloc/internal/pa.h                |  30 ++--
 src/arena.c                                   | 158 +++++++++---------
 src/background_thread.c                       |  18 +-
 src/ctl.c                                     |   4 +-
 9 files changed, 181 insertions(+), 173 deletions(-)
 create mode 100644 include/jemalloc/internal/decay.h

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 50223ba7..8b77a335 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -134,8 +134,8 @@ arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) {
 
 JEMALLOC_ALWAYS_INLINE void
 arena_decay_tick(tsdn_t *tsdn, arena_t *arena) {
-	malloc_mutex_assert_not_owner(tsdn, &arena->decay_dirty.mtx);
-	malloc_mutex_assert_not_owner(tsdn, &arena->decay_muzzy.mtx);
+	malloc_mutex_assert_not_owner(tsdn, &arena->pa_shard.decay_dirty.mtx);
+	malloc_mutex_assert_not_owner(tsdn, &arena->pa_shard.decay_muzzy.mtx);
 
 	arena_decay_ticks(tsdn, arena, 1);
 }
diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index ca11af71..49568fc6 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -15,69 +15,8 @@
 #include "jemalloc/internal/pa.h"
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/sc.h"
-#include "jemalloc/internal/smoothstep.h"
 #include "jemalloc/internal/ticker.h"
 
-struct arena_decay_s {
-	/* Synchronizes all non-atomic fields. */
-	malloc_mutex_t		mtx;
-	/*
-	 * True if a thread is currently purging the extents associated with
-	 * this decay structure.
-	 */
-	bool			purging;
-	/*
-	 * Approximate time in milliseconds from the creation of a set of unused
-	 * dirty pages until an equivalent set of unused dirty pages is purged
-	 * and/or reused.
-	 */
-	atomic_zd_t		time_ms;
-	/* time / SMOOTHSTEP_NSTEPS. */
-	nstime_t		interval;
-	/*
-	 * Time at which the current decay interval logically started.  We do
-	 * not actually advance to a new epoch until sometime after it starts
-	 * because of scheduling and computation delays, and it is even possible
-	 * to completely skip epochs.  In all cases, during epoch advancement we
-	 * merge all relevant activity into the most recently recorded epoch.
-	 */
-	nstime_t		epoch;
-	/* Deadline randomness generator. */
-	uint64_t		jitter_state;
-	/*
-	 * Deadline for current epoch.  This is the sum of interval and per
-	 * epoch jitter which is a uniform random variable in [0..interval).
-	 * Epochs always advance by precise multiples of interval, but we
-	 * randomize the deadline to reduce the likelihood of arenas purging in
-	 * lockstep.
-	 */
-	nstime_t		deadline;
-	/*
-	 * Number of unpurged pages at beginning of current epoch.  During epoch
-	 * advancement we use the delta between arena->decay_*.nunpurged and
-	 * ecache_npages_get(&arena->ecache_*) to determine how many dirty pages,
-	 * if any, were generated.
-	 */
-	size_t			nunpurged;
-	/*
-	 * Trailing log of how many unused dirty pages were generated during
-	 * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last
-	 * element is the most recent epoch.  Corresponding epoch times are
-	 * relative to epoch.
-	 */
-	size_t			backlog[SMOOTHSTEP_NSTEPS];
-
-	/*
-	 * Pointer to associated stats.  These stats are embedded directly in
-	 * the arena's stats due to how stats structures are shared between the
-	 * arena and ctl code.
-	 *
-	 * Synchronization: Same as associated arena's stats field. */
-	pa_shard_decay_stats_t	*stats;
-	/* Peak number of pages in associated extents.  Used for debug only. */
-	uint64_t		ceil_npages;
-};
-
 struct arena_s {
 	/*
 	 * Number of threads currently assigned to this arena.  Each thread has
@@ -147,15 +86,6 @@ struct arena_s {
 	/* The page-level allocator shard this arena uses. */
 	pa_shard_t		pa_shard;
 
-	/*
-	 * Decay-based purging state, responsible for scheduling extent state
-	 * transitions.
-	 *
-	 * Synchronization: internal.
-	 */
-	arena_decay_t		decay_dirty; /* dirty --> muzzy */
-	arena_decay_t		decay_muzzy; /* muzzy --> retained */
-
 	/*
 	 * bins is used to store heaps of free regions.
 	 *
diff --git a/include/jemalloc/internal/background_thread_externs.h b/include/jemalloc/internal/background_thread_externs.h
index 224e3700..d5c13695 100644
--- a/include/jemalloc/internal/background_thread_externs.h
+++ b/include/jemalloc/internal/background_thread_externs.h
@@ -13,7 +13,7 @@ bool background_thread_create(tsd_t *tsd, unsigned arena_ind);
 bool background_threads_enable(tsd_t *tsd);
 bool background_threads_disable(tsd_t *tsd);
 void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
-    arena_decay_t *decay, size_t npages_new);
+    decay_t *decay, size_t npages_new);
 void background_thread_prefork0(tsdn_t *tsdn);
 void background_thread_prefork1(tsdn_t *tsdn);
 void background_thread_postfork_parent(tsdn_t *tsdn);
diff --git a/include/jemalloc/internal/background_thread_inlines.h b/include/jemalloc/internal/background_thread_inlines.h
index f85e86fa..7bdbe928 100644
--- a/include/jemalloc/internal/background_thread_inlines.h
+++ b/include/jemalloc/internal/background_thread_inlines.h
@@ -55,7 +55,7 @@ arena_background_thread_inactivity_check(tsdn_t *tsdn, arena_t *arena,
 	    arena_background_thread_info_get(arena);
 	if (background_thread_indefinite_sleep(info)) {
 		background_thread_interval_check(tsdn, arena,
-		    &arena->decay_dirty, 0);
+		    &arena->pa_shard.decay_dirty, 0);
 	}
 }
 
diff --git a/include/jemalloc/internal/decay.h b/include/jemalloc/internal/decay.h
new file mode 100644
index 00000000..28fe54d4
--- /dev/null
+++ b/include/jemalloc/internal/decay.h
@@ -0,0 +1,66 @@
+#ifndef JEMALLOC_INTERNAL_DECAY_H
+#define JEMALLOC_INTERNAL_DECAY_H
+
+#include "jemalloc/internal/smoothstep.h"
+
+/*
+ * The decay_t computes the number of pages we should purge at any given time.
+ * Page allocators inform a decay object when pages enter a decay-able state
+ * (i.e. dirty or muzzy), and query it to determine how many pages should be
+ * purged at any given time.
+ */
+typedef struct decay_s decay_t;
+struct decay_s {
+	/* Synchronizes all non-atomic fields. */
+	malloc_mutex_t mtx;
+	/*
+	 * True if a thread is currently purging the extents associated with
+	 * this decay structure.
+	 */
+	bool purging;
+	/*
+	 * Approximate time in milliseconds from the creation of a set of unused
+	 * dirty pages until an equivalent set of unused dirty pages is purged
+	 * and/or reused.
+	 */
+	atomic_zd_t time_ms;
+	/* time / SMOOTHSTEP_NSTEPS. */
+	nstime_t interval;
+	/*
+	 * Time at which the current decay interval logically started.  We do
+	 * not actually advance to a new epoch until sometime after it starts
+	 * because of scheduling and computation delays, and it is even possible
+	 * to completely skip epochs.  In all cases, during epoch advancement we
+	 * merge all relevant activity into the most recently recorded epoch.
+	 */
+	nstime_t epoch;
+	/* Deadline randomness generator. */
+	uint64_t jitter_state;
+	/*
+	 * Deadline for current epoch.  This is the sum of interval and per
+	 * epoch jitter which is a uniform random variable in [0..interval).
+	 * Epochs always advance by precise multiples of interval, but we
+	 * randomize the deadline to reduce the likelihood of arenas purging in
+	 * lockstep.
+	 */
+	nstime_t deadline;
+	/*
+	 * Number of unpurged pages at beginning of current epoch.  During epoch
+	 * advancement we use the delta between arena->decay_*.nunpurged and
+	 * ecache_npages_get(&arena->ecache_*) to determine how many dirty pages,
+	 * if any, were generated.
+	 */
+	size_t nunpurged;
+	/*
+	 * Trailing log of how many unused dirty pages were generated during
+	 * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last
+	 * element is the most recent epoch.  Corresponding epoch times are
+	 * relative to epoch.
+	 */
+	size_t backlog[SMOOTHSTEP_NSTEPS];
+
+	/* Peak number of pages in associated extents.  Used for debug only. */
+	uint64_t ceil_npages;
+};
+
+#endif /* JEMALLOC_INTERNAL_DECAY_H */
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 61b6f42c..d686652a 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_PA_H
 #define JEMALLOC_INTERNAL_PA_H
 
+#include "jemalloc/internal/decay.h"
 #include "jemalloc/internal/ecache.h"
 #include "jemalloc/internal/edata_cache.h"
 #include "jemalloc/internal/lockedint.h"
@@ -10,6 +11,16 @@
  * allocations.
  */
 
+typedef struct pa_shard_decay_stats_s pa_shard_decay_stats_t;
+struct pa_shard_decay_stats_s {
+	/* Total number of purge sweeps. */
+	locked_u64_t npurge;
+	/* Total number of madvise calls made. */
+	locked_u64_t nmadvise;
+	/* Total number of pages purged. */
+	locked_u64_t purged;
+};
+
 /*
  * The stats for a particular pa_shard.  Because of the way the ctl module
  * handles stats epoch data collection (it has its own arena_stats, and merges
@@ -21,16 +32,6 @@
  * are the ones that are not maintained on their own; instead, their values are
  * derived during those stats merges.
  */
-typedef struct pa_shard_decay_stats_s pa_shard_decay_stats_t;
-struct pa_shard_decay_stats_s {
-	/* Total number of purge sweeps. */
-	locked_u64_t npurge;
-	/* Total number of madvise calls made. */
-	locked_u64_t nmadvise;
-	/* Total number of pages purged. */
-	locked_u64_t purged;
-};
-
 typedef struct pa_shard_stats_s pa_shard_stats_t;
 struct pa_shard_stats_s {
 	pa_shard_decay_stats_t decay_dirty;
@@ -70,6 +71,15 @@ struct pa_shard_s {
 
 	malloc_mutex_t *stats_mtx;
 	pa_shard_stats_t *stats;
+
+	/*
+	 * Decay-based purging state, responsible for scheduling extent state
+	 * transitions.
+	 *
+	 * Synchronization: internal.
+	 */
+	decay_t decay_dirty; /* dirty --> muzzy */
+	decay_t decay_muzzy; /* muzzy --> retained */
 };
 
 static inline void
diff --git a/src/arena.c b/src/arena.c
index 2f626fed..ce0b57cc 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -55,9 +55,9 @@ static unsigned huge_arena_ind;
  * definition.
  */
 
-static void arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena,
-    arena_decay_t *decay, ecache_t *ecache, bool all, size_t npages_limit,
-    size_t npages_decay_max, bool is_background_thread);
+static void arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
+    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool all,
+    size_t npages_limit, size_t npages_decay_max, bool is_background_thread);
 static bool arena_decay_dirty(tsdn_t *tsdn, arena_t *arena,
     bool is_background_thread, bool all);
 static void arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, edata_t *slab,
@@ -106,7 +106,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	    atomic_load_zu(&arena->pa_shard.edata_cache.count, ATOMIC_RELAXED),
 	    ATOMIC_RELAXED);
 
-	/* Dirty pa_shard_decay_stats_t */
+	/* Dirty decay stats */
 	locked_inc_u64_unsynchronized(
 	    &astats->pa_shard_stats.decay_dirty.npurge,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
@@ -120,7 +120,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 	    &arena->pa_shard.stats->decay_dirty.purged));
 
-	/* Muzzy pa_shard_decay_stats_t */
+	/* Decay stats */
 	locked_inc_u64_unsynchronized(
 	    &astats->pa_shard_stats.decay_muzzy.npurge,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
@@ -255,9 +255,9 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	    arena_prof_mutex_extents_muzzy)
 	READ_ARENA_MUTEX_PROF_DATA(pa_shard.ecache_retained.mtx,
 	    arena_prof_mutex_extents_retained)
-	READ_ARENA_MUTEX_PROF_DATA(decay_dirty.mtx,
+	READ_ARENA_MUTEX_PROF_DATA(pa_shard.decay_dirty.mtx,
 	    arena_prof_mutex_decay_dirty)
-	READ_ARENA_MUTEX_PROF_DATA(decay_muzzy.mtx,
+	READ_ARENA_MUTEX_PROF_DATA(pa_shard.decay_muzzy.mtx,
 	    arena_prof_mutex_decay_muzzy)
 	READ_ARENA_MUTEX_PROF_DATA(base->mtx,
 	    arena_prof_mutex_base)
@@ -543,17 +543,17 @@ arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 }
 
 static ssize_t
-arena_decay_ms_read(arena_decay_t *decay) {
+arena_decay_ms_read(decay_t *decay) {
 	return atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
 }
 
 static void
-arena_decay_ms_write(arena_decay_t *decay, ssize_t decay_ms) {
+arena_decay_ms_write(decay_t *decay, ssize_t decay_ms) {
 	atomic_store_zd(&decay->time_ms, decay_ms, ATOMIC_RELAXED);
 }
 
 static void
-arena_decay_deadline_init(arena_decay_t *decay) {
+arena_decay_deadline_init(decay_t *decay) {
 	/*
 	 * Generate a new deadline that is uniformly random within the next
 	 * epoch after the current one.
@@ -570,12 +570,12 @@ arena_decay_deadline_init(arena_decay_t *decay) {
 }
 
 static bool
-arena_decay_deadline_reached(const arena_decay_t *decay, const nstime_t *time) {
+arena_decay_deadline_reached(const decay_t *decay, const nstime_t *time) {
 	return (nstime_compare(&decay->deadline, time) <= 0);
 }
 
 static size_t
-arena_decay_backlog_npages_limit(const arena_decay_t *decay) {
+arena_decay_backlog_npages_limit(const decay_t *decay) {
 	uint64_t sum;
 	size_t npages_limit_backlog;
 	unsigned i;
@@ -595,7 +595,7 @@ arena_decay_backlog_npages_limit(const arena_decay_t *decay) {
 }
 
 static void
-arena_decay_backlog_update_last(arena_decay_t *decay, size_t current_npages) {
+arena_decay_backlog_update_last(decay_t *decay, size_t current_npages) {
 	size_t npages_delta = (current_npages > decay->nunpurged) ?
 	    current_npages - decay->nunpurged : 0;
 	decay->backlog[SMOOTHSTEP_NSTEPS-1] = npages_delta;
@@ -613,7 +613,7 @@ arena_decay_backlog_update_last(arena_decay_t *decay, size_t current_npages) {
 }
 
 static void
-arena_decay_backlog_update(arena_decay_t *decay, uint64_t nadvance_u64,
+arena_decay_backlog_update(decay_t *decay, uint64_t nadvance_u64,
     size_t current_npages) {
 	if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) {
 		memset(decay->backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
@@ -635,18 +635,18 @@ arena_decay_backlog_update(arena_decay_t *decay, uint64_t nadvance_u64,
 }
 
 static void
-arena_decay_try_purge(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    ecache_t *ecache, size_t current_npages, size_t npages_limit,
-    bool is_background_thread) {
+arena_decay_try_purge(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
+    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
+    size_t current_npages, size_t npages_limit, bool is_background_thread) {
 	if (current_npages > npages_limit) {
-		arena_decay_to_limit(tsdn, arena, decay, ecache, false,
-		    npages_limit, current_npages - npages_limit,
+		arena_decay_to_limit(tsdn, arena, decay, decay_stats, ecache,
+		    false, npages_limit, current_npages - npages_limit,
 		    is_background_thread);
 	}
 }
 
 static void
-arena_decay_epoch_advance_helper(arena_decay_t *decay, const nstime_t *time,
+arena_decay_epoch_advance_helper(decay_t *decay, const nstime_t *time,
     size_t current_npages) {
 	assert(arena_decay_deadline_reached(decay, time));
 
@@ -670,8 +670,9 @@ arena_decay_epoch_advance_helper(arena_decay_t *decay, const nstime_t *time,
 }
 
 static void
-arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    ecache_t *ecache, const nstime_t *time, bool is_background_thread) {
+arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
+    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, const nstime_t *time,
+    bool is_background_thread) {
 	size_t current_npages = ecache_npages_get(ecache);
 	arena_decay_epoch_advance_helper(decay, time, current_npages);
 
@@ -681,13 +682,13 @@ arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	    current_npages;
 
 	if (!background_thread_enabled() || is_background_thread) {
-		arena_decay_try_purge(tsdn, arena, decay, ecache,
+		arena_decay_try_purge(tsdn, arena, decay, decay_stats, ecache,
 		    current_npages, npages_limit, is_background_thread);
 	}
 }
 
 static void
-arena_decay_reinit(arena_decay_t *decay, ssize_t decay_ms) {
+arena_decay_reinit(decay_t *decay, ssize_t decay_ms) {
 	arena_decay_ms_write(decay, decay_ms);
 	if (decay_ms > 0) {
 		nstime_init(&decay->interval, (uint64_t)decay_ms *
@@ -703,10 +704,9 @@ arena_decay_reinit(arena_decay_t *decay, ssize_t decay_ms) {
 }
 
 static bool
-arena_decay_init(arena_decay_t *decay, ssize_t decay_ms,
-    pa_shard_decay_stats_t *stats) {
+arena_decay_init(decay_t *decay, ssize_t decay_ms) {
 	if (config_debug) {
-		for (size_t i = 0; i < sizeof(arena_decay_t); i++) {
+		for (size_t i = 0; i < sizeof(decay_t); i++) {
 			assert(((char *)decay)[i] == 0);
 		}
 		decay->ceil_npages = 0;
@@ -717,9 +717,6 @@ arena_decay_init(arena_decay_t *decay, ssize_t decay_ms,
 	}
 	decay->purging = false;
 	arena_decay_reinit(decay, decay_ms);
-	if (config_stats) {
-		decay->stats = stats;
-	}
 	return false;
 }
 
@@ -736,16 +733,17 @@ arena_decay_ms_valid(ssize_t decay_ms) {
 }
 
 static bool
-arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    ecache_t *ecache, bool is_background_thread) {
+arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
+    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
+    bool is_background_thread) {
 	malloc_mutex_assert_owner(tsdn, &decay->mtx);
 
 	/* Purge all or nothing if the option is disabled. */
 	ssize_t decay_ms = arena_decay_ms_read(decay);
 	if (decay_ms <= 0) {
 		if (decay_ms == 0) {
-			arena_decay_to_limit(tsdn, arena, decay, ecache, false,
-			    0, ecache_npages_get(ecache),
+			arena_decay_to_limit(tsdn, arena, decay, decay_stats,
+			    ecache, false, 0, ecache_npages_get(ecache),
 			    is_background_thread);
 		}
 		return false;
@@ -780,10 +778,10 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	 */
 	bool advance_epoch = arena_decay_deadline_reached(decay, &time);
 	if (advance_epoch) {
-		arena_decay_epoch_advance(tsdn, arena, decay, ecache, &time,
-		    is_background_thread);
+		arena_decay_epoch_advance(tsdn, arena, decay, decay_stats,
+		    ecache, &time, is_background_thread);
 	} else if (is_background_thread) {
-		arena_decay_try_purge(tsdn, arena, decay, ecache,
+		arena_decay_try_purge(tsdn, arena, decay, decay_stats, ecache,
 		    ecache_npages_get(ecache),
 		    arena_decay_backlog_npages_limit(decay),
 		    is_background_thread);
@@ -793,23 +791,23 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 }
 
 static ssize_t
-arena_decay_ms_get(arena_decay_t *decay) {
+arena_decay_ms_get(decay_t *decay) {
 	return arena_decay_ms_read(decay);
 }
 
 ssize_t
 arena_dirty_decay_ms_get(arena_t *arena) {
-	return arena_decay_ms_get(&arena->decay_dirty);
+	return arena_decay_ms_get(&arena->pa_shard.decay_dirty);
 }
 
 ssize_t
 arena_muzzy_decay_ms_get(arena_t *arena) {
-	return arena_decay_ms_get(&arena->decay_muzzy);
+	return arena_decay_ms_get(&arena->pa_shard.decay_muzzy);
 }
 
 static bool
-arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    ecache_t *ecache, ssize_t decay_ms) {
+arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
+    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, ssize_t decay_ms) {
 	if (!arena_decay_ms_valid(decay_ms)) {
 		return true;
 	}
@@ -824,7 +822,7 @@ arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	 * arbitrary change during initial arena configuration.
 	 */
 	arena_decay_reinit(decay, decay_ms);
-	arena_maybe_decay(tsdn, arena, decay, ecache, false);
+	arena_maybe_decay(tsdn, arena, decay, decay_stats, ecache, false);
 	malloc_mutex_unlock(tsdn, &decay->mtx);
 
 	return false;
@@ -833,15 +831,17 @@ arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 bool
 arena_dirty_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
     ssize_t decay_ms) {
-	return arena_decay_ms_set(tsdn, arena, &arena->decay_dirty,
-	    &arena->pa_shard.ecache_dirty, decay_ms);
+	return arena_decay_ms_set(tsdn, arena, &arena->pa_shard.decay_dirty,
+	    &arena->pa_shard.stats->decay_dirty, &arena->pa_shard.ecache_dirty,
+	    decay_ms);
 }
 
 bool
 arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
     ssize_t decay_ms) {
-	return arena_decay_ms_set(tsdn, arena, &arena->decay_muzzy,
-	    &arena->pa_shard.ecache_muzzy, decay_ms);
+	return arena_decay_ms_set(tsdn, arena, &arena->pa_shard.decay_muzzy,
+	    &arena->pa_shard.stats->decay_muzzy, &arena->pa_shard.ecache_muzzy,
+	    decay_ms);
 }
 
 static size_t
@@ -865,8 +865,8 @@ arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
 
 static size_t
 arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    arena_decay_t *decay, ecache_t *ecache, bool all,
-    edata_list_t *decay_extents, bool is_background_thread) {
+    decay_t *decay, pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
+    bool all, edata_list_t *decay_extents, bool is_background_thread) {
 	size_t nmadvise, nunmapped;
 	size_t npurged;
 
@@ -914,11 +914,11 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	if (config_stats) {
 		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-		    &decay->stats->npurge, 1);
+		    &decay_stats->npurge, 1);
 		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-		    &decay->stats->nmadvise, nmadvise);
+		    &decay_stats->nmadvise, nmadvise);
 		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-		&decay->stats->purged, npurged);
+		    &decay_stats->purged, npurged);
 		locked_dec_zu(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 		    &arena->pa_shard.stats->mapped, nunmapped << LG_PAGE);
 		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
@@ -935,9 +935,9 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
  * current decay run, so that the purging thread never finishes.
  */
 static void
-arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    ecache_t *ecache, bool all, size_t npages_limit, size_t npages_decay_max,
-    bool is_background_thread) {
+arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
+    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool all,
+    size_t npages_limit, size_t npages_decay_max, bool is_background_thread) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 1);
 	malloc_mutex_assert_owner(tsdn, &decay->mtx);
@@ -957,7 +957,8 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 	    npages_limit, npages_decay_max, &decay_extents);
 	if (npurge != 0) {
 		size_t npurged = arena_decay_stashed(tsdn, arena, ehooks, decay,
-		    ecache, all, &decay_extents, is_background_thread);
+		    decay_stats, ecache, all, &decay_extents,
+		    is_background_thread);
 		assert(npurged == npurge);
 	}
 
@@ -966,12 +967,13 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 }
 
 static bool
-arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
-    ecache_t *ecache, bool is_background_thread, bool all) {
+arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
+    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
+    bool is_background_thread, bool all) {
 	if (all) {
 		malloc_mutex_lock(tsdn, &decay->mtx);
-		arena_decay_to_limit(tsdn, arena, decay, ecache, all, 0,
-		    ecache_npages_get(ecache), is_background_thread);
+		arena_decay_to_limit(tsdn, arena, decay, decay_stats, ecache,
+		    all, 0, ecache_npages_get(ecache), is_background_thread);
 		malloc_mutex_unlock(tsdn, &decay->mtx);
 
 		return false;
@@ -982,8 +984,8 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 		return true;
 	}
 
-	bool epoch_advanced = arena_maybe_decay(tsdn, arena, decay, ecache,
-	    is_background_thread);
+	bool epoch_advanced = arena_maybe_decay(tsdn, arena, decay, decay_stats,
+	    ecache, is_background_thread);
 	size_t npages_new;
 	if (epoch_advanced) {
 		/* Backlog is updated on epoch advance. */
@@ -1003,8 +1005,9 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay,
 static bool
 arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
     bool all) {
-	return arena_decay_impl(tsdn, arena, &arena->decay_dirty,
-	    &arena->pa_shard.ecache_dirty, is_background_thread, all);
+	return arena_decay_impl(tsdn, arena, &arena->pa_shard.decay_dirty,
+	    &arena->pa_shard.stats->decay_dirty, &arena->pa_shard.ecache_dirty,
+	    is_background_thread, all);
 }
 
 static bool
@@ -1014,8 +1017,9 @@ arena_decay_muzzy(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
 	    arena_muzzy_decay_ms_get(arena) <= 0) {
 		return false;
 	}
-	return arena_decay_impl(tsdn, arena, &arena->decay_muzzy,
-	    &arena->pa_shard.ecache_muzzy, is_background_thread, all);
+	return arena_decay_impl(tsdn, arena, &arena->pa_shard.decay_muzzy,
+	    &arena->pa_shard.stats->decay_muzzy, &arena->pa_shard.ecache_muzzy,
+	    is_background_thread, all);
 }
 
 void
@@ -2044,14 +2048,12 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		goto label_error;
 	}
 
-	if (arena_decay_init(&arena->decay_dirty,
-	    arena_dirty_decay_ms_default_get(),
-	    &arena->pa_shard.stats->decay_dirty)) {
+	if (arena_decay_init(&arena->pa_shard.decay_dirty,
+	    arena_dirty_decay_ms_default_get())) {
 		goto label_error;
 	}
-	if (arena_decay_init(&arena->decay_muzzy,
-	    arena_muzzy_decay_ms_default_get(),
-	    &arena->pa_shard.stats->decay_muzzy)) {
+	if (arena_decay_init(&arena->pa_shard.decay_muzzy,
+	    arena_muzzy_decay_ms_default_get())) {
 		goto label_error;
 	}
 
@@ -2172,8 +2174,8 @@ arena_boot(sc_data_t *sc_data) {
 
 void
 arena_prefork0(tsdn_t *tsdn, arena_t *arena) {
-	malloc_mutex_prefork(tsdn, &arena->decay_dirty.mtx);
-	malloc_mutex_prefork(tsdn, &arena->decay_muzzy.mtx);
+	malloc_mutex_prefork(tsdn, &arena->pa_shard.decay_dirty.mtx);
+	malloc_mutex_prefork(tsdn, &arena->pa_shard.decay_muzzy.mtx);
 }
 
 void
@@ -2236,8 +2238,8 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 	ecache_postfork_parent(tsdn, &arena->pa_shard.ecache_muzzy);
 	ecache_postfork_parent(tsdn, &arena->pa_shard.ecache_retained);
 	ecache_grow_postfork_parent(tsdn, &arena->pa_shard.ecache_grow);
-	malloc_mutex_postfork_parent(tsdn, &arena->decay_dirty.mtx);
-	malloc_mutex_postfork_parent(tsdn, &arena->decay_muzzy.mtx);
+	malloc_mutex_postfork_parent(tsdn, &arena->pa_shard.decay_dirty.mtx);
+	malloc_mutex_postfork_parent(tsdn, &arena->pa_shard.decay_muzzy.mtx);
 	if (config_stats) {
 		malloc_mutex_postfork_parent(tsdn, &arena->tcache_ql_mtx);
 	}
@@ -2282,8 +2284,8 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	ecache_postfork_child(tsdn, &arena->pa_shard.ecache_muzzy);
 	ecache_postfork_child(tsdn, &arena->pa_shard.ecache_retained);
 	ecache_grow_postfork_child(tsdn, &arena->pa_shard.ecache_grow);
-	malloc_mutex_postfork_child(tsdn, &arena->decay_dirty.mtx);
-	malloc_mutex_postfork_child(tsdn, &arena->decay_muzzy.mtx);
+	malloc_mutex_postfork_child(tsdn, &arena->pa_shard.decay_dirty.mtx);
+	malloc_mutex_postfork_child(tsdn, &arena->pa_shard.decay_muzzy.mtx);
 	if (config_stats) {
 		malloc_mutex_postfork_child(tsdn, &arena->tcache_ql_mtx);
 	}
diff --git a/src/background_thread.c b/src/background_thread.c
index ddfe3a35..95a8b16c 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -55,7 +55,7 @@ bool background_thread_create(tsd_t *tsd, unsigned arena_ind) NOT_REACHED
 bool background_threads_enable(tsd_t *tsd) NOT_REACHED
 bool background_threads_disable(tsd_t *tsd) NOT_REACHED
 void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
-    arena_decay_t *decay, size_t npages_new) NOT_REACHED
+    decay_t *decay, size_t npages_new) NOT_REACHED
 void background_thread_prefork0(tsdn_t *tsdn) NOT_REACHED
 void background_thread_prefork1(tsdn_t *tsdn) NOT_REACHED
 void background_thread_postfork_parent(tsdn_t *tsdn) NOT_REACHED
@@ -99,7 +99,7 @@ set_current_thread_affinity(int cpu) {
 #define BACKGROUND_THREAD_MIN_INTERVAL_NS (BILLION / 10)
 
 static inline size_t
-decay_npurge_after_interval(arena_decay_t *decay, size_t interval) {
+decay_npurge_after_interval(decay_t *decay, size_t interval) {
 	size_t i;
 	uint64_t sum = 0;
 	for (i = 0; i < interval; i++) {
@@ -113,7 +113,7 @@ decay_npurge_after_interval(arena_decay_t *decay, size_t interval) {
 }
 
 static uint64_t
-arena_decay_compute_purge_interval_impl(tsdn_t *tsdn, arena_decay_t *decay,
+arena_decay_compute_purge_interval_impl(tsdn_t *tsdn, decay_t *decay,
     ecache_t *ecache) {
 	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
 		/* Use minimal interval if decay is contended. */
@@ -201,13 +201,13 @@ label_done:
 static uint64_t
 arena_decay_compute_purge_interval(tsdn_t *tsdn, arena_t *arena) {
 	uint64_t i1, i2;
-	i1 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_dirty,
-	    &arena->pa_shard.ecache_dirty);
+	i1 = arena_decay_compute_purge_interval_impl(tsdn,
+	    &arena->pa_shard.decay_dirty, &arena->pa_shard.ecache_dirty);
 	if (i1 == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
 		return i1;
 	}
-	i2 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_muzzy,
-	    &arena->pa_shard.ecache_muzzy);
+	i2 = arena_decay_compute_purge_interval_impl(tsdn,
+	    &arena->pa_shard.decay_muzzy, &arena->pa_shard.ecache_muzzy);
 
 	return i1 < i2 ? i1 : i2;
 }
@@ -653,8 +653,8 @@ background_threads_disable(tsd_t *tsd) {
 
 /* Check if we need to signal the background thread early. */
 void
-background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
-    arena_decay_t *decay, size_t npages_new) {
+background_thread_interval_check(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
+    size_t npages_new) {
 	background_thread_info_t *info = arena_background_thread_info_get(
 	    arena);
 	if (malloc_mutex_trylock(tsdn, &info->mtx)) {
diff --git a/src/ctl.c b/src/ctl.c
index 122856c0..9233c846 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3082,8 +3082,8 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
 		MUTEX_PROF_RESET(arena->pa_shard.ecache_dirty.mtx);
 		MUTEX_PROF_RESET(arena->pa_shard.ecache_muzzy.mtx);
 		MUTEX_PROF_RESET(arena->pa_shard.ecache_retained.mtx);
-		MUTEX_PROF_RESET(arena->decay_dirty.mtx);
-		MUTEX_PROF_RESET(arena->decay_muzzy.mtx);
+		MUTEX_PROF_RESET(arena->pa_shard.decay_dirty.mtx);
+		MUTEX_PROF_RESET(arena->pa_shard.decay_muzzy.mtx);
 		MUTEX_PROF_RESET(arena->tcache_ql_mtx);
 		MUTEX_PROF_RESET(arena->base->mtx);
 

From 4d090d23f1518327ba1c5b1477d4f5a31a6cb745 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 9 Mar 2020 14:52:25 -0700
Subject: [PATCH 1635/2608] Decay: Introduce a stub .c file.

---
 Makefile.in                                            | 1 +
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj         | 1 +
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters | 3 +++
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj         | 1 +
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters | 3 +++
 src/decay.c                                            | 3 +++
 6 files changed, 12 insertions(+)
 create mode 100644 src/decay.c

diff --git a/Makefile.in b/Makefile.in
index a3c43a6d..6bb56a01 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -106,6 +106,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/ckh.c \
 	$(srcroot)src/counter.c \
 	$(srcroot)src/ctl.c \
+	$(srcroot)src/decay.c \
 	$(srcroot)src/div.c \
 	$(srcroot)src/ecache.c \
 	$(srcroot)src/edata.c \
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 3c17e50b..156e4593 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -46,6 +46,7 @@
     <ClCompile Include="..\..\..\..\src\ckh.c" />
     <ClCompile Include="..\..\..\..\src\counter.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
+    <ClCompile Include="..\..\..\..\src\decay.c" />
     <ClCompile Include="..\..\..\..\src\div.c" />
     <ClCompile Include="..\..\..\..\src\ecache.c" />
     <ClCompile Include="..\..\..\..\src\edata.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 2f5ed621..45557f65 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -37,6 +37,9 @@
     <ClCompile Include="..\..\..\..\src\ctl.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\decay.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\div.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index d63042d8..c5cfb95f 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -46,6 +46,7 @@
     <ClCompile Include="..\..\..\..\src\ckh.c" />
     <ClCompile Include="..\..\..\..\src\counter.c" />
     <ClCompile Include="..\..\..\..\src\ctl.c" />
+    <ClCompile Include="..\..\..\..\src\decay.c" />
     <ClCompile Include="..\..\..\..\src\div.c" />
     <ClCompile Include="..\..\..\..\src\ecache.c" />
     <ClCompile Include="..\..\..\..\src\edata.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 2f5ed621..45557f65 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -37,6 +37,9 @@
     <ClCompile Include="..\..\..\..\src\ctl.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\decay.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\div.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/decay.c b/src/decay.c
new file mode 100644
index 00000000..454cb475
--- /dev/null
+++ b/src/decay.c
@@ -0,0 +1,3 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+

From 8f2193dc8db26eba40f7948f7ce60c8584ab31a9 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 9 Mar 2020 14:44:11 -0700
Subject: [PATCH 1636/2608] Decay: Move in arena decay functions.

---
 include/jemalloc/internal/decay.h |  54 +++++++
 src/arena.c                       | 244 +++---------------------------
 src/decay.c                       | 174 +++++++++++++++++++++
 3 files changed, 249 insertions(+), 223 deletions(-)

diff --git a/include/jemalloc/internal/decay.h b/include/jemalloc/internal/decay.h
index 28fe54d4..ef336f07 100644
--- a/include/jemalloc/internal/decay.h
+++ b/include/jemalloc/internal/decay.h
@@ -8,6 +8,15 @@
  * Page allocators inform a decay object when pages enter a decay-able state
  * (i.e. dirty or muzzy), and query it to determine how many pages should be
  * purged at any given time.
+ *
+ * This is mostly a single-threaded data structure and doesn't care about
+ * synchronization at all; it's the caller's responsibility to manage their
+ * synchronization on their own.  There are two exceptions:
+ * 1) It's OK to racily call decay_ms_read (i.e. just the simplest state query).
+ * 2) The mtx and purging fields live (and are initialized) here, but are
+ *    logically owned by the page allocator.  This is just a convenience (since
+ *    those fields would be duplicated for both the dirty and muzzy states
+ *    otherwise).
  */
 typedef struct decay_s decay_t;
 struct decay_s {
@@ -44,6 +53,12 @@ struct decay_s {
 	 * lockstep.
 	 */
 	nstime_t deadline;
+	/*
+	 * The number of pages we cap ourselves at in the current epoch, per
+	 * decay policies.  Updated on an epoch change.  After an epoch change,
+	 * the caller should take steps to try to purge down to this amount.
+	 */
+	size_t npages_limit;
 	/*
 	 * Number of unpurged pages at beginning of current epoch.  During epoch
 	 * advancement we use the delta between arena->decay_*.nunpurged and
@@ -56,6 +71,9 @@ struct decay_s {
 	 * each of the past SMOOTHSTEP_NSTEPS decay epochs, where the last
 	 * element is the most recent epoch.  Corresponding epoch times are
 	 * relative to epoch.
+	 *
+	 * Updated only on epoch advance, triggered by
+	 * decay_maybe_advance_epoch, below.
 	 */
 	size_t backlog[SMOOTHSTEP_NSTEPS];
 
@@ -63,4 +81,40 @@ struct decay_s {
 	uint64_t ceil_npages;
 };
 
+static inline ssize_t
+decay_ms_read(const decay_t *decay) {
+	return atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
+}
+
+static inline size_t
+decay_npages_limit_get(const decay_t *decay) {
+	return decay->npages_limit;
+}
+
+/* How many unused dirty pages were generated during the last epoch. */
+static inline size_t
+decay_epoch_npages_delta(const decay_t *decay) {
+	return decay->backlog[SMOOTHSTEP_NSTEPS - 1];
+}
+
+bool decay_ms_valid(ssize_t decay_ms);
+
+/*
+ * As a precondition, the decay_t must be zeroed out (as if with memset).
+ *
+ * Returns true on error.
+ */
+bool decay_init(decay_t *decay, ssize_t decay_ms);
+
+/*
+ * Given an already-initialized decay_t, reinitialize it with the given decay
+ * time.  The decay_t must have previously been initialized (and should not then
+ * be zeroed).
+ */
+void decay_reinit(decay_t *decay, ssize_t decay_ms);
+
+/* Returns true if the epoch advanced and there are pages to purge. */
+bool decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
+    size_t current_npages);
+
 #endif /* JEMALLOC_INTERNAL_DECAY_H */
diff --git a/src/arena.c b/src/arena.c
index ce0b57cc..055b36f1 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -3,6 +3,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/decay.h"
 #include "jemalloc/internal/div.h"
 #include "jemalloc/internal/ehooks.h"
 #include "jemalloc/internal/extent_dss.h"
@@ -542,98 +543,6 @@ arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 	arena_nactive_add(arena, udiff >> LG_PAGE);
 }
 
-static ssize_t
-arena_decay_ms_read(decay_t *decay) {
-	return atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
-}
-
-static void
-arena_decay_ms_write(decay_t *decay, ssize_t decay_ms) {
-	atomic_store_zd(&decay->time_ms, decay_ms, ATOMIC_RELAXED);
-}
-
-static void
-arena_decay_deadline_init(decay_t *decay) {
-	/*
-	 * Generate a new deadline that is uniformly random within the next
-	 * epoch after the current one.
-	 */
-	nstime_copy(&decay->deadline, &decay->epoch);
-	nstime_add(&decay->deadline, &decay->interval);
-	if (arena_decay_ms_read(decay) > 0) {
-		nstime_t jitter;
-
-		nstime_init(&jitter, prng_range_u64(&decay->jitter_state,
-		    nstime_ns(&decay->interval)));
-		nstime_add(&decay->deadline, &jitter);
-	}
-}
-
-static bool
-arena_decay_deadline_reached(const decay_t *decay, const nstime_t *time) {
-	return (nstime_compare(&decay->deadline, time) <= 0);
-}
-
-static size_t
-arena_decay_backlog_npages_limit(const decay_t *decay) {
-	uint64_t sum;
-	size_t npages_limit_backlog;
-	unsigned i;
-
-	/*
-	 * For each element of decay_backlog, multiply by the corresponding
-	 * fixed-point smoothstep decay factor.  Sum the products, then divide
-	 * to round down to the nearest whole number of pages.
-	 */
-	sum = 0;
-	for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
-		sum += decay->backlog[i] * h_steps[i];
-	}
-	npages_limit_backlog = (size_t)(sum >> SMOOTHSTEP_BFP);
-
-	return npages_limit_backlog;
-}
-
-static void
-arena_decay_backlog_update_last(decay_t *decay, size_t current_npages) {
-	size_t npages_delta = (current_npages > decay->nunpurged) ?
-	    current_npages - decay->nunpurged : 0;
-	decay->backlog[SMOOTHSTEP_NSTEPS-1] = npages_delta;
-
-	if (config_debug) {
-		if (current_npages > decay->ceil_npages) {
-			decay->ceil_npages = current_npages;
-		}
-		size_t npages_limit = arena_decay_backlog_npages_limit(decay);
-		assert(decay->ceil_npages >= npages_limit);
-		if (decay->ceil_npages > npages_limit) {
-			decay->ceil_npages = npages_limit;
-		}
-	}
-}
-
-static void
-arena_decay_backlog_update(decay_t *decay, uint64_t nadvance_u64,
-    size_t current_npages) {
-	if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) {
-		memset(decay->backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
-		    sizeof(size_t));
-	} else {
-		size_t nadvance_z = (size_t)nadvance_u64;
-
-		assert((uint64_t)nadvance_z == nadvance_u64);
-
-		memmove(decay->backlog, &decay->backlog[nadvance_z],
-		    (SMOOTHSTEP_NSTEPS - nadvance_z) * sizeof(size_t));
-		if (nadvance_z > 1) {
-			memset(&decay->backlog[SMOOTHSTEP_NSTEPS -
-			    nadvance_z], 0, (nadvance_z-1) * sizeof(size_t));
-		}
-	}
-
-	arena_decay_backlog_update_last(decay, current_npages);
-}
-
 static void
 arena_decay_try_purge(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
@@ -645,93 +554,6 @@ arena_decay_try_purge(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	}
 }
 
-static void
-arena_decay_epoch_advance_helper(decay_t *decay, const nstime_t *time,
-    size_t current_npages) {
-	assert(arena_decay_deadline_reached(decay, time));
-
-	nstime_t delta;
-	nstime_copy(&delta, time);
-	nstime_subtract(&delta, &decay->epoch);
-
-	uint64_t nadvance_u64 = nstime_divide(&delta, &decay->interval);
-	assert(nadvance_u64 > 0);
-
-	/* Add nadvance_u64 decay intervals to epoch. */
-	nstime_copy(&delta, &decay->interval);
-	nstime_imultiply(&delta, nadvance_u64);
-	nstime_add(&decay->epoch, &delta);
-
-	/* Set a new deadline. */
-	arena_decay_deadline_init(decay);
-
-	/* Update the backlog. */
-	arena_decay_backlog_update(decay, nadvance_u64, current_npages);
-}
-
-static void
-arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
-    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, const nstime_t *time,
-    bool is_background_thread) {
-	size_t current_npages = ecache_npages_get(ecache);
-	arena_decay_epoch_advance_helper(decay, time, current_npages);
-
-	size_t npages_limit = arena_decay_backlog_npages_limit(decay);
-	/* We may unlock decay->mtx when try_purge(). Finish logging first. */
-	decay->nunpurged = (npages_limit > current_npages) ? npages_limit :
-	    current_npages;
-
-	if (!background_thread_enabled() || is_background_thread) {
-		arena_decay_try_purge(tsdn, arena, decay, decay_stats, ecache,
-		    current_npages, npages_limit, is_background_thread);
-	}
-}
-
-static void
-arena_decay_reinit(decay_t *decay, ssize_t decay_ms) {
-	arena_decay_ms_write(decay, decay_ms);
-	if (decay_ms > 0) {
-		nstime_init(&decay->interval, (uint64_t)decay_ms *
-		    KQU(1000000));
-		nstime_idivide(&decay->interval, SMOOTHSTEP_NSTEPS);
-	}
-
-	nstime_init_update(&decay->epoch);
-	decay->jitter_state = (uint64_t)(uintptr_t)decay;
-	arena_decay_deadline_init(decay);
-	decay->nunpurged = 0;
-	memset(decay->backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t));
-}
-
-static bool
-arena_decay_init(decay_t *decay, ssize_t decay_ms) {
-	if (config_debug) {
-		for (size_t i = 0; i < sizeof(decay_t); i++) {
-			assert(((char *)decay)[i] == 0);
-		}
-		decay->ceil_npages = 0;
-	}
-	if (malloc_mutex_init(&decay->mtx, "decay", WITNESS_RANK_DECAY,
-	    malloc_mutex_rank_exclusive)) {
-		return true;
-	}
-	decay->purging = false;
-	arena_decay_reinit(decay, decay_ms);
-	return false;
-}
-
-static bool
-arena_decay_ms_valid(ssize_t decay_ms) {
-	if (decay_ms < -1) {
-		return false;
-	}
-	if (decay_ms == -1 || (uint64_t)decay_ms <= NSTIME_SEC_MAX *
-	    KQU(1000)) {
-		return true;
-	}
-	return false;
-}
-
 static bool
 arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
@@ -739,7 +561,7 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	malloc_mutex_assert_owner(tsdn, &decay->mtx);
 
 	/* Purge all or nothing if the option is disabled. */
-	ssize_t decay_ms = arena_decay_ms_read(decay);
+	ssize_t decay_ms = decay_ms_read(decay);
 	if (decay_ms <= 0) {
 		if (decay_ms == 0) {
 			arena_decay_to_limit(tsdn, arena, decay, decay_stats,
@@ -749,26 +571,6 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 		return false;
 	}
 
-	nstime_t time;
-	nstime_init_update(&time);
-	if (unlikely(!nstime_monotonic() && nstime_compare(&decay->epoch, &time)
-	    > 0)) {
-		/*
-		 * Time went backwards.  Move the epoch back in time and
-		 * generate a new deadline, with the expectation that time
-		 * typically flows forward for long enough periods of time that
-		 * epochs complete.  Unfortunately, this strategy is susceptible
-		 * to clock jitter triggering premature epoch advances, but
-		 * clock jitter estimation and compensation isn't feasible here
-		 * because calls into this code are event-driven.
-		 */
-		nstime_copy(&decay->epoch, &time);
-		arena_decay_deadline_init(decay);
-	} else {
-		/* Verify that time does not go backwards. */
-		assert(nstime_compare(&decay->epoch, &time) <= 0);
-	}
-
 	/*
 	 * If the deadline has been reached, advance to the current epoch and
 	 * purge to the new limit if necessary.  Note that dirty pages created
@@ -776,39 +578,35 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	 * epoch, so as a result purging only happens during epoch advances, or
 	 * being triggered by background threads (scheduled event).
 	 */
-	bool advance_epoch = arena_decay_deadline_reached(decay, &time);
-	if (advance_epoch) {
-		arena_decay_epoch_advance(tsdn, arena, decay, decay_stats,
-		    ecache, &time, is_background_thread);
-	} else if (is_background_thread) {
+	nstime_t time;
+	nstime_init_update(&time);
+	size_t npages_current = ecache_npages_get(ecache);
+	bool epoch_advanced = decay_maybe_advance_epoch(decay, &time,
+	    npages_current);
+	if (is_background_thread ||
+	    (epoch_advanced && !background_thread_enabled())) {
+		size_t npages_limit = decay_npages_limit_get(decay);
 		arena_decay_try_purge(tsdn, arena, decay, decay_stats, ecache,
-		    ecache_npages_get(ecache),
-		    arena_decay_backlog_npages_limit(decay),
-		    is_background_thread);
+		    npages_current, npages_limit, is_background_thread);
 	}
 
-	return advance_epoch;
-}
-
-static ssize_t
-arena_decay_ms_get(decay_t *decay) {
-	return arena_decay_ms_read(decay);
+	return epoch_advanced;
 }
 
 ssize_t
 arena_dirty_decay_ms_get(arena_t *arena) {
-	return arena_decay_ms_get(&arena->pa_shard.decay_dirty);
+	return decay_ms_read(&arena->pa_shard.decay_dirty);
 }
 
 ssize_t
 arena_muzzy_decay_ms_get(arena_t *arena) {
-	return arena_decay_ms_get(&arena->pa_shard.decay_muzzy);
+	return decay_ms_read(&arena->pa_shard.decay_muzzy);
 }
 
 static bool
 arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, ssize_t decay_ms) {
-	if (!arena_decay_ms_valid(decay_ms)) {
+	if (!decay_ms_valid(decay_ms)) {
 		return true;
 	}
 
@@ -821,7 +619,7 @@ arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	 * infrequent, either between the {-1, 0, >0} states, or a one-time
 	 * arbitrary change during initial arena configuration.
 	 */
-	arena_decay_reinit(decay, decay_ms);
+	decay_reinit(decay, decay_ms);
 	arena_maybe_decay(tsdn, arena, decay, decay_stats, ecache, false);
 	malloc_mutex_unlock(tsdn, &decay->mtx);
 
@@ -989,7 +787,7 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	size_t npages_new;
 	if (epoch_advanced) {
 		/* Backlog is updated on epoch advance. */
-		npages_new = decay->backlog[SMOOTHSTEP_NSTEPS-1];
+		npages_new = decay_epoch_npages_delta(decay);
 	}
 	malloc_mutex_unlock(tsdn, &decay->mtx);
 
@@ -1922,7 +1720,7 @@ arena_dirty_decay_ms_default_get(void) {
 
 bool
 arena_dirty_decay_ms_default_set(ssize_t decay_ms) {
-	if (!arena_decay_ms_valid(decay_ms)) {
+	if (!decay_ms_valid(decay_ms)) {
 		return true;
 	}
 	atomic_store_zd(&dirty_decay_ms_default, decay_ms, ATOMIC_RELAXED);
@@ -1936,7 +1734,7 @@ arena_muzzy_decay_ms_default_get(void) {
 
 bool
 arena_muzzy_decay_ms_default_set(ssize_t decay_ms) {
-	if (!arena_decay_ms_valid(decay_ms)) {
+	if (!decay_ms_valid(decay_ms)) {
 		return true;
 	}
 	atomic_store_zd(&muzzy_decay_ms_default, decay_ms, ATOMIC_RELAXED);
@@ -2048,11 +1846,11 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		goto label_error;
 	}
 
-	if (arena_decay_init(&arena->pa_shard.decay_dirty,
+	if (decay_init(&arena->pa_shard.decay_dirty,
 	    arena_dirty_decay_ms_default_get())) {
 		goto label_error;
 	}
-	if (arena_decay_init(&arena->pa_shard.decay_muzzy,
+	if (decay_init(&arena->pa_shard.decay_muzzy,
 	    arena_muzzy_decay_ms_default_get())) {
 		goto label_error;
 	}
diff --git a/src/decay.c b/src/decay.c
index 454cb475..462b9bfe 100644
--- a/src/decay.c
+++ b/src/decay.c
@@ -1,3 +1,177 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/decay.h"
+
+/*
+ * Generate a new deadline that is uniformly random within the next epoch after
+ * the current one.
+ */
+void
+decay_deadline_init(decay_t *decay) {
+	nstime_copy(&decay->deadline, &decay->epoch);
+	nstime_add(&decay->deadline, &decay->interval);
+	if (decay_ms_read(decay) > 0) {
+		nstime_t jitter;
+
+		nstime_init(&jitter, prng_range_u64(&decay->jitter_state,
+		    nstime_ns(&decay->interval)));
+		nstime_add(&decay->deadline, &jitter);
+	}
+}
+
+void
+decay_reinit(decay_t *decay, ssize_t decay_ms) {
+	atomic_store_zd(&decay->time_ms, decay_ms, ATOMIC_RELAXED);
+	if (decay_ms > 0) {
+		nstime_init(&decay->interval, (uint64_t)decay_ms *
+		    KQU(1000000));
+		nstime_idivide(&decay->interval, SMOOTHSTEP_NSTEPS);
+	}
+
+	nstime_init_update(&decay->epoch);
+	decay->jitter_state = (uint64_t)(uintptr_t)decay;
+	decay_deadline_init(decay);
+	decay->nunpurged = 0;
+	memset(decay->backlog, 0, SMOOTHSTEP_NSTEPS * sizeof(size_t));
+}
+
+bool
+decay_init(decay_t *decay, ssize_t decay_ms) {
+	if (config_debug) {
+		for (size_t i = 0; i < sizeof(decay_t); i++) {
+			assert(((char *)decay)[i] == 0);
+		}
+		decay->ceil_npages = 0;
+	}
+	if (malloc_mutex_init(&decay->mtx, "decay", WITNESS_RANK_DECAY,
+	    malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+	decay->purging = false;
+	decay_reinit(decay, decay_ms);
+	return false;
+}
+
+bool
+decay_ms_valid(ssize_t decay_ms) {
+	if (decay_ms < -1) {
+		return false;
+	}
+	if (decay_ms == -1 || (uint64_t)decay_ms <= NSTIME_SEC_MAX *
+	    KQU(1000)) {
+		return true;
+	}
+	return false;
+}
+
+static void
+decay_maybe_update_time(decay_t *decay, nstime_t *new_time) {
+	if (unlikely(!nstime_monotonic() && nstime_compare(&decay->epoch,
+	    new_time) > 0)) {
+		/*
+		 * Time went backwards.  Move the epoch back in time and
+		 * generate a new deadline, with the expectation that time
+		 * typically flows forward for long enough periods of time that
+		 * epochs complete.  Unfortunately, this strategy is susceptible
+		 * to clock jitter triggering premature epoch advances, but
+		 * clock jitter estimation and compensation isn't feasible here
+		 * because calls into this code are event-driven.
+		 */
+		nstime_copy(&decay->epoch, new_time);
+		decay_deadline_init(decay);
+	} else {
+		/* Verify that time does not go backwards. */
+		assert(nstime_compare(&decay->epoch, new_time) <= 0);
+	}
+}
+
+static size_t
+decay_backlog_npages_limit(const decay_t *decay) {
+	/*
+	 * For each element of decay_backlog, multiply by the corresponding
+	 * fixed-point smoothstep decay factor.  Sum the products, then divide
+	 * to round down to the nearest whole number of pages.
+	 */
+	uint64_t sum = 0;
+	for (unsigned i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
+		sum += decay->backlog[i] * h_steps[i];
+	}
+	size_t npages_limit_backlog = (size_t)(sum >> SMOOTHSTEP_BFP);
+
+	return npages_limit_backlog;
+}
+
+static void
+decay_backlog_update(decay_t *decay, uint64_t nadvance_u64,
+    size_t current_npages) {
+	if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) {
+		memset(decay->backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
+		    sizeof(size_t));
+	} else {
+		size_t nadvance_z = (size_t)nadvance_u64;
+
+		assert((uint64_t)nadvance_z == nadvance_u64);
+
+		memmove(decay->backlog, &decay->backlog[nadvance_z],
+		    (SMOOTHSTEP_NSTEPS - nadvance_z) * sizeof(size_t));
+		if (nadvance_z > 1) {
+			memset(&decay->backlog[SMOOTHSTEP_NSTEPS -
+			    nadvance_z], 0, (nadvance_z-1) * sizeof(size_t));
+		}
+	}
+
+	size_t npages_delta = (current_npages > decay->nunpurged) ?
+	    current_npages - decay->nunpurged : 0;
+	decay->backlog[SMOOTHSTEP_NSTEPS-1] = npages_delta;
+
+	if (config_debug) {
+		if (current_npages > decay->ceil_npages) {
+			decay->ceil_npages = current_npages;
+		}
+		size_t npages_limit = decay_backlog_npages_limit(decay);
+		assert(decay->ceil_npages >= npages_limit);
+		if (decay->ceil_npages > npages_limit) {
+			decay->ceil_npages = npages_limit;
+		}
+	}
+}
+
+static inline bool
+decay_deadline_reached(const decay_t *decay, const nstime_t *time) {
+	return (nstime_compare(&decay->deadline, time) <= 0);
+}
+
+bool
+decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
+    size_t npages_current) {
+	/* Handle possible non-monotonicity of time. */
+	decay_maybe_update_time(decay, new_time);
+
+	if (!decay_deadline_reached(decay, new_time)) {
+		return false;
+	}
+	nstime_t delta;
+	nstime_copy(&delta, new_time);
+	nstime_subtract(&delta, &decay->epoch);
+
+	uint64_t nadvance_u64 = nstime_divide(&delta, &decay->interval);
+	assert(nadvance_u64 > 0);
+
+	/* Add nadvance_u64 decay intervals to epoch. */
+	nstime_copy(&delta, &decay->interval);
+	nstime_imultiply(&delta, nadvance_u64);
+	nstime_add(&decay->epoch, &delta);
+
+	/* Set a new deadline. */
+	decay_deadline_init(decay);
+
+	/* Update the backlog. */
+	decay_backlog_update(decay, nadvance_u64, npages_current);
+
+	decay->npages_limit = decay_backlog_npages_limit(decay);
+	decay->nunpurged = (decay->npages_limit > npages_current) ?
+	    decay->npages_limit : npages_current;
+
+	return true;
+}

From cdb916ed3f76f348891d4f2a83f38bd70ed75067 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 9 Mar 2020 18:37:23 -0700
Subject: [PATCH 1637/2608] Decay: Add comments for the public API.

---
 include/jemalloc/internal/decay.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/include/jemalloc/internal/decay.h b/include/jemalloc/internal/decay.h
index ef336f07..b1e80f53 100644
--- a/include/jemalloc/internal/decay.h
+++ b/include/jemalloc/internal/decay.h
@@ -81,11 +81,19 @@ struct decay_s {
 	uint64_t ceil_npages;
 };
 
+/*
+ * The current decay time setting.  This is the only public access to a decay_t
+ * that's allowed without holding mtx.
+ */
 static inline ssize_t
 decay_ms_read(const decay_t *decay) {
 	return atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
 }
 
+/*
+ * See the comment on the struct field -- the limit on pages we should allow in
+ * this decay state this epoch.
+ */
 static inline size_t
 decay_npages_limit_get(const decay_t *decay) {
 	return decay->npages_limit;
@@ -97,6 +105,16 @@ decay_epoch_npages_delta(const decay_t *decay) {
 	return decay->backlog[SMOOTHSTEP_NSTEPS - 1];
 }
 
+/*
+ * Returns true if the passed in decay time setting is valid.
+ * < -1 : invalid
+ * -1   : never decay
+ *  0   : decay immediately
+ *  > 0 : some positive decay time, up to a maximum allowed value of
+ *  NSTIME_SEC_MAX * 1000, which corresponds to decaying somewhere in the early
+ *  27th century.  By that time, we expect to have implemented alternate purging
+ *  strategies.
+ */
 bool decay_ms_valid(ssize_t decay_ms);
 
 /*

From d1d7e1076b6132a1faacd10cafaebaee975edb98 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 9 Mar 2020 19:16:34 -0700
Subject: [PATCH 1638/2608] Decay: move in some background_thread accesses.

---
 include/jemalloc/internal/decay.h | 11 +++++++++++
 src/background_thread.c           |  8 ++++----
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/decay.h b/include/jemalloc/internal/decay.h
index b1e80f53..6a260fca 100644
--- a/include/jemalloc/internal/decay.h
+++ b/include/jemalloc/internal/decay.h
@@ -105,6 +105,17 @@ decay_epoch_npages_delta(const decay_t *decay) {
 	return decay->backlog[SMOOTHSTEP_NSTEPS - 1];
 }
 
+/*
+ * Current epoch duration, in nanoseconds.  Given that new epochs are started
+ * somewhat haphazardly, this is not necessarily exactly the time between any
+ * two calls to decay_maybe_advance_epoch; see the comments on fields in the
+ * decay_t.
+ */
+static inline uint64_t
+decay_epoch_duration_ns(const decay_t *decay) {
+	return nstime_ns(&decay->interval);
+}
+
 /*
  * Returns true if the passed in decay time setting is valid.
  * < -1 : invalid
diff --git a/src/background_thread.c b/src/background_thread.c
index 95a8b16c..6b680530 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -121,14 +121,14 @@ arena_decay_compute_purge_interval_impl(tsdn_t *tsdn, decay_t *decay,
 	}
 
 	uint64_t interval;
-	ssize_t decay_time = atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
+	ssize_t decay_time = decay_ms_read(decay);
 	if (decay_time <= 0) {
 		/* Purging is eagerly done or disabled currently. */
 		interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
 		goto label_done;
 	}
 
-	uint64_t decay_interval_ns = nstime_ns(&decay->interval);
+	uint64_t decay_interval_ns = decay_epoch_duration_ns(decay);
 	assert(decay_interval_ns > 0);
 	size_t npages = ecache_npages_get(ecache);
 	if (npages == 0) {
@@ -674,12 +674,12 @@ background_thread_interval_check(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 		goto label_done;
 	}
 
-	ssize_t decay_time = atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
+	ssize_t decay_time = decay_ms_read(decay);
 	if (decay_time <= 0) {
 		/* Purging is eagerly done or disabled currently. */
 		goto label_done_unlock2;
 	}
-	uint64_t decay_interval_ns = nstime_ns(&decay->interval);
+	uint64_t decay_interval_ns = decay_epoch_duration_ns(decay);
 	assert(decay_interval_ns > 0);
 
 	nstime_t diff;

From bf55e58e63af719ce52a1df08758fb3a64ab2589 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 10 Mar 2020 08:46:47 -0700
Subject: [PATCH 1639/2608] Rename test/unit/decay -> test/unit/arena_decay.

This is really more of an end-to-end test at the arena level; it's not just of
the decay code in particular any more.
---
 Makefile.in                            | 2 +-
 test/unit/{decay.c => arena_decay.c}   | 0
 test/unit/{decay.sh => arena_decay.sh} | 0
 3 files changed, 1 insertion(+), 1 deletion(-)
 rename test/unit/{decay.c => arena_decay.c} (100%)
 rename test/unit/{decay.sh => arena_decay.sh} (100%)

diff --git a/Makefile.in b/Makefile.in
index 6bb56a01..823ccc7d 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -184,6 +184,7 @@ C_UTIL_CPP_SRCS := $(srcroot)src/nstime.c $(srcroot)src/malloc_io.c
 endif
 TESTS_UNIT := \
 	$(srcroot)test/unit/a0.c \
+	$(srcroot)test/unit/arena_decay.c \
 	$(srcroot)test/unit/arena_reset.c \
 	$(srcroot)test/unit/atomic.c \
 	$(srcroot)test/unit/background_thread.c \
@@ -196,7 +197,6 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/cache_bin.c \
 	$(srcroot)test/unit/ckh.c \
 	$(srcroot)test/unit/counter.c \
-	$(srcroot)test/unit/decay.c \
 	$(srcroot)test/unit/div.c \
 	$(srcroot)test/unit/edata_cache.c \
 	$(srcroot)test/unit/emitter.c \
diff --git a/test/unit/decay.c b/test/unit/arena_decay.c
similarity index 100%
rename from test/unit/decay.c
rename to test/unit/arena_decay.c
diff --git a/test/unit/decay.sh b/test/unit/arena_decay.sh
similarity index 100%
rename from test/unit/decay.sh
rename to test/unit/arena_decay.sh

From f77cec311e102a46a58402570b43aa74dc5d7ae7 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 10 Mar 2020 08:52:58 -0700
Subject: [PATCH 1640/2608] Decay: Take current time as an argument.

This better facilitates testing.
---
 include/jemalloc/internal/decay.h |  4 ++--
 src/arena.c                       | 11 ++++++++---
 src/decay.c                       |  8 ++++----
 3 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/decay.h b/include/jemalloc/internal/decay.h
index 6a260fca..df396658 100644
--- a/include/jemalloc/internal/decay.h
+++ b/include/jemalloc/internal/decay.h
@@ -133,14 +133,14 @@ bool decay_ms_valid(ssize_t decay_ms);
  *
  * Returns true on error.
  */
-bool decay_init(decay_t *decay, ssize_t decay_ms);
+bool decay_init(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms);
 
 /*
  * Given an already-initialized decay_t, reinitialize it with the given decay
  * time.  The decay_t must have previously been initialized (and should not then
  * be zeroed).
  */
-void decay_reinit(decay_t *decay, ssize_t decay_ms);
+void decay_reinit(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms);
 
 /* Returns true if the epoch advanced and there are pages to purge. */
 bool decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
diff --git a/src/arena.c b/src/arena.c
index 055b36f1..16be6b1e 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -619,7 +619,9 @@ arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	 * infrequent, either between the {-1, 0, >0} states, or a one-time
 	 * arbitrary change during initial arena configuration.
 	 */
-	decay_reinit(decay, decay_ms);
+	nstime_t cur_time;
+	nstime_init_update(&cur_time);
+	decay_reinit(decay, &cur_time, decay_ms);
 	arena_maybe_decay(tsdn, arena, decay, decay_stats, ecache, false);
 	malloc_mutex_unlock(tsdn, &decay->mtx);
 
@@ -1846,11 +1848,14 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		goto label_error;
 	}
 
-	if (decay_init(&arena->pa_shard.decay_dirty,
+	nstime_t cur_time;
+	nstime_init_update(&cur_time);
+
+	if (decay_init(&arena->pa_shard.decay_dirty, &cur_time,
 	    arena_dirty_decay_ms_default_get())) {
 		goto label_error;
 	}
-	if (decay_init(&arena->pa_shard.decay_muzzy,
+	if (decay_init(&arena->pa_shard.decay_muzzy, &cur_time,
 	    arena_muzzy_decay_ms_default_get())) {
 		goto label_error;
 	}
diff --git a/src/decay.c b/src/decay.c
index 462b9bfe..23d59da9 100644
--- a/src/decay.c
+++ b/src/decay.c
@@ -21,7 +21,7 @@ decay_deadline_init(decay_t *decay) {
 }
 
 void
-decay_reinit(decay_t *decay, ssize_t decay_ms) {
+decay_reinit(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms) {
 	atomic_store_zd(&decay->time_ms, decay_ms, ATOMIC_RELAXED);
 	if (decay_ms > 0) {
 		nstime_init(&decay->interval, (uint64_t)decay_ms *
@@ -29,7 +29,7 @@ decay_reinit(decay_t *decay, ssize_t decay_ms) {
 		nstime_idivide(&decay->interval, SMOOTHSTEP_NSTEPS);
 	}
 
-	nstime_init_update(&decay->epoch);
+	nstime_copy(&decay->epoch, cur_time);
 	decay->jitter_state = (uint64_t)(uintptr_t)decay;
 	decay_deadline_init(decay);
 	decay->nunpurged = 0;
@@ -37,7 +37,7 @@ decay_reinit(decay_t *decay, ssize_t decay_ms) {
 }
 
 bool
-decay_init(decay_t *decay, ssize_t decay_ms) {
+decay_init(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms) {
 	if (config_debug) {
 		for (size_t i = 0; i < sizeof(decay_t); i++) {
 			assert(((char *)decay)[i] == 0);
@@ -49,7 +49,7 @@ decay_init(decay_t *decay, ssize_t decay_ms) {
 		return true;
 	}
 	decay->purging = false;
-	decay_reinit(decay, decay_ms);
+	decay_reinit(decay, cur_time, decay_ms);
 	return false;
 }
 

From 48a2cd6d7932b2a38baab2d5394db3141d41b12e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 10 Mar 2020 10:19:38 -0700
Subject: [PATCH 1641/2608] Decay: Add a (mostly stub) test case.

---
 Makefile.in       |  1 +
 test/unit/decay.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+)
 create mode 100644 test/unit/decay.c

diff --git a/Makefile.in b/Makefile.in
index 823ccc7d..b19c14f9 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -197,6 +197,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/cache_bin.c \
 	$(srcroot)test/unit/ckh.c \
 	$(srcroot)test/unit/counter.c \
+	$(srcroot)test/unit/decay.c \
 	$(srcroot)test/unit/div.c \
 	$(srcroot)test/unit/edata_cache.c \
 	$(srcroot)test/unit/emitter.c \
diff --git a/test/unit/decay.c b/test/unit/decay.c
new file mode 100644
index 00000000..9da0d94c
--- /dev/null
+++ b/test/unit/decay.c
@@ -0,0 +1,45 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/decay.h"
+
+/*
+ * Honestly, this is mostly a stub for now.  Eventually, we should beef up
+ * testing here.
+ */
+
+TEST_BEGIN(test_decay_empty) {
+	/* If we never have any decaying pages, npages_limit should be 0. */
+	decay_t decay;
+	memset(&decay, 0, sizeof(decay));
+
+	nstime_t curtime;
+	nstime_init(&curtime, 0);
+
+	uint64_t decay_ms = 1000;
+	uint64_t decay_ns = decay_ms * 1000 * 1000;
+
+	bool err = decay_init(&decay, &curtime, (ssize_t)decay_ms);
+	assert_false(err, "");
+
+	uint64_t time_between_calls = decay_epoch_duration_ns(&decay) / 5;
+	int nepochs = 0;
+	for (uint64_t i = 0; i < decay_ns / time_between_calls * 10; i++) {
+		size_t dirty_pages = 0;
+		nstime_init(&curtime, i * time_between_calls);
+		bool epoch_advanced = decay_maybe_advance_epoch(&decay,
+		    &curtime, dirty_pages);
+		if (epoch_advanced) {
+			nepochs++;
+			assert_zu_eq(decay_npages_limit_get(&decay), 0,
+			    "Should not increase the limit arbitrarily");
+		}
+	}
+	assert_d_gt(nepochs, 0, "Should have advanced epochs");
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_decay_empty);
+}

From e77f47a85a5e48894065852cbafef3d78724acef Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 10 Mar 2020 11:04:02 -0700
Subject: [PATCH 1642/2608] Move arena decay getters to PA.

---
 include/jemalloc/internal/arena_inlines_b.h |  6 ------
 include/jemalloc/internal/pa.h              | 15 +++++++++++++++
 src/arena.c                                 |  4 ++--
 src/extent.c                                |  2 +-
 4 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 8b77a335..565e2262 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -108,12 +108,6 @@ arena_prof_info_set(tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx) {
 	large_prof_info_set(edata, tctx);
 }
 
-JEMALLOC_ALWAYS_INLINE bool
-arena_may_force_decay(arena_t *arena) {
-	return !(arena_dirty_decay_ms_get(arena) == -1
-	    || arena_muzzy_decay_ms_get(arena) == -1);
-}
-
 JEMALLOC_ALWAYS_INLINE void
 arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) {
 	tsd_t *tsd;
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index d686652a..655e46b4 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -90,6 +90,21 @@ pa_shard_stats_mapped_add(tsdn_t *tsdn, pa_shard_t *shard, size_t size) {
 	LOCKEDINT_MTX_UNLOCK(tsdn, *shard->stats_mtx);
 }
 
+static inline ssize_t
+pa_shard_dirty_decay_ms_get(pa_shard_t *shard) {
+	return decay_ms_read(&shard->decay_dirty);
+}
+static inline ssize_t
+pa_shard_muzzy_decay_ms_get(pa_shard_t *shard) {
+	return decay_ms_read(&shard->decay_muzzy);
+}
+
+static inline bool
+pa_shard_may_force_decay(pa_shard_t *shard) {
+	return !(pa_shard_dirty_decay_ms_get(shard) == -1
+	    || pa_shard_muzzy_decay_ms_get(shard) == -1);
+}
+
 /* Returns true on error. */
 bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
     pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx);
diff --git a/src/arena.c b/src/arena.c
index 16be6b1e..f1ee25a7 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -595,12 +595,12 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 
 ssize_t
 arena_dirty_decay_ms_get(arena_t *arena) {
-	return decay_ms_read(&arena->pa_shard.decay_dirty);
+	return pa_shard_dirty_decay_ms_get(&arena->pa_shard);
 }
 
 ssize_t
 arena_muzzy_decay_ms_get(arena_t *arena) {
-	return decay_ms_read(&arena->pa_shard.decay_muzzy);
+	return pa_shard_muzzy_decay_ms_get(&arena->pa_shard);
 }
 
 static bool
diff --git a/src/extent.c b/src/extent.c
index 0162494e..3d8af3d6 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1013,7 +1013,7 @@ extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 			    &coalesced, growing_retained);
 		} while (coalesced);
 		if (edata_size_get(edata) >= oversize_threshold &&
-		    arena_may_force_decay(arena)) {
+		    pa_shard_may_force_decay(&arena->pa_shard)) {
 			/* Shortcut to purge the oversize extent eagerly. */
 			malloc_mutex_unlock(tsdn, &ecache->mtx);
 			extent_maximally_purge(tsdn, arena, ehooks, edata);

From eba35e2e486ab81f44126d86bbb6555a02072fe2 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 10 Mar 2020 10:37:46 -0700
Subject: [PATCH 1643/2608] Remove extent knowledge of arena.

---
 include/jemalloc/internal/extent.h |  24 +--
 src/arena.c                        |  31 ++--
 src/extent.c                       | 286 ++++++++++++++---------------
 src/extent_dss.c                   |   6 +-
 src/large.c                        |   8 +-
 5 files changed, 176 insertions(+), 179 deletions(-)

diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index bb01254c..8b2db184 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -19,34 +19,34 @@
 #define LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT 6
 extern size_t opt_lg_extent_max_active_fit;
 
-edata_t *ecache_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+edata_t *ecache_alloc(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
     szind_t szind, bool *zero);
-edata_t *ecache_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+edata_t *ecache_alloc_grow(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
     szind_t szind, bool *zero);
-void ecache_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+void ecache_dalloc(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata);
-edata_t *ecache_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+edata_t *ecache_evict(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, size_t npages_min);
 
-edata_t *extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+edata_t *extent_alloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t alignment, bool slab, szind_t szind,
     bool *zero, bool *commit);
-void extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+void extent_dalloc_gap(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     edata_t *edata);
-void extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+void extent_dalloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     edata_t *edata);
-void extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+void extent_destroy_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     edata_t *edata);
 bool extent_commit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length);
 bool extent_decommit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length);
-bool extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata, size_t offset, size_t length);
-bool extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata, size_t offset, size_t length);
+bool extent_purge_lazy_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    size_t offset, size_t length);
+bool extent_purge_forced_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    size_t offset, size_t length);
 edata_t *extent_split_wrapper(tsdn_t *tsdn, edata_cache_t *edata_cache,
     ehooks_t *ehooks, edata_t *edata, size_t size_a, szind_t szind_a,
     bool slab_a, size_t size_b, szind_t szind_b, bool slab_b);
diff --git a/src/arena.c b/src/arena.c
index f1ee25a7..7934a6bf 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -282,8 +282,8 @@ arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	ecache_dalloc(tsdn, arena, ehooks, &arena->pa_shard.ecache_dirty,
-	    edata);
+	ecache_dalloc(tsdn, &arena->pa_shard, ehooks,
+	    &arena->pa_shard.ecache_dirty, edata);
 	if (arena_dirty_decay_ms_get(arena) == 0) {
 		arena_decay_dirty(tsdn, arena, false, true);
 	} else {
@@ -459,16 +459,16 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	szind_t szind = sz_size2index(usize);
 	size_t mapped_add;
 	size_t esize = usize + sz_large_pad;
-	edata_t *edata = ecache_alloc(tsdn, arena, ehooks,
+	edata_t *edata = ecache_alloc(tsdn, &arena->pa_shard, ehooks,
 	    &arena->pa_shard.ecache_dirty, NULL, esize, alignment, false, szind,
 	    zero);
 	if (edata == NULL && arena_may_have_muzzy(arena)) {
-		edata = ecache_alloc(tsdn, arena, ehooks,
+		edata = ecache_alloc(tsdn, &arena->pa_shard, ehooks,
 		    &arena->pa_shard.ecache_muzzy, NULL, esize, alignment,
 		    false, szind, zero);
 	}
 	if (edata == NULL) {
-		edata = ecache_alloc_grow(tsdn, arena, ehooks,
+		edata = ecache_alloc_grow(tsdn, &arena->pa_shard, ehooks,
 		    &arena->pa_shard.ecache_retained, NULL, esize, alignment,
 		    false, szind, zero);
 		if (config_stats) {
@@ -655,7 +655,7 @@ arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
 	size_t nstashed = 0;
 	edata_t *edata;
 	while (nstashed < npages_decay_max &&
-	    (edata = ecache_evict(tsdn, arena, ehooks, ecache, npages_limit))
+	    (edata = ecache_evict(tsdn, &arena->pa_shard, ehooks, ecache, npages_limit))
 	    != NULL) {
 		edata_list_append(decay_extents, edata);
 		nstashed += edata_size_get(edata) >> LG_PAGE;
@@ -690,9 +690,9 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			not_reached();
 		case extent_state_dirty:
 			if (!all && muzzy_decay_ms != 0 &&
-			    !extent_purge_lazy_wrapper(tsdn, arena,
-			    ehooks, edata, 0, edata_size_get(edata))) {
-				ecache_dalloc(tsdn, arena, ehooks,
+			    !extent_purge_lazy_wrapper(tsdn, ehooks, edata, 0,
+			    edata_size_get(edata))) {
+				ecache_dalloc(tsdn, &arena->pa_shard, ehooks,
 				    &arena->pa_shard.ecache_muzzy, edata);
 				arena_background_thread_inactivity_check(tsdn,
 				    arena, is_background_thread);
@@ -700,7 +700,8 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			}
 			JEMALLOC_FALLTHROUGH;
 		case extent_state_muzzy:
-			extent_dalloc_wrapper(tsdn, arena, ehooks, edata);
+			extent_dalloc_wrapper(tsdn, &arena->pa_shard, ehooks,
+			    edata);
 			if (config_stats) {
 				nunmapped += npages;
 			}
@@ -988,9 +989,9 @@ arena_destroy_retained(tsdn_t *tsdn, arena_t *arena) {
 	 */
 	ehooks_t *ehooks = arena_get_ehooks(arena);
 	edata_t *edata;
-	while ((edata = ecache_evict(tsdn, arena, ehooks,
+	while ((edata = ecache_evict(tsdn, &arena->pa_shard, ehooks,
 	    &arena->pa_shard.ecache_retained, 0)) != NULL) {
-		extent_destroy_wrapper(tsdn, arena, ehooks, edata);
+		extent_destroy_wrapper(tsdn, &arena->pa_shard, ehooks, edata);
 	}
 }
 
@@ -1040,7 +1041,7 @@ arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    WITNESS_RANK_CORE, 0);
 
 	zero = false;
-	slab = ecache_alloc_grow(tsdn, arena, ehooks,
+	slab = ecache_alloc_grow(tsdn, &arena->pa_shard, ehooks,
 	    &arena->pa_shard.ecache_retained, NULL, bin_info->slab_size, PAGE,
 	    true, szind, &zero);
 
@@ -1061,11 +1062,11 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	ehooks_t *ehooks = arena_get_ehooks(arena);
 	szind_t szind = sz_size2index(bin_info->reg_size);
 	bool zero = false;
-	edata_t *slab = ecache_alloc(tsdn, arena, ehooks,
+	edata_t *slab = ecache_alloc(tsdn, &arena->pa_shard, ehooks,
 	    &arena->pa_shard.ecache_dirty, NULL, bin_info->slab_size, PAGE,
 	    true, binind, &zero);
 	if (slab == NULL && arena_may_have_muzzy(arena)) {
-		slab = ecache_alloc(tsdn, arena, ehooks,
+		slab = ecache_alloc(tsdn, &arena->pa_shard, ehooks,
 		    &arena->pa_shard.ecache_muzzy, NULL, bin_info->slab_size,
 		    PAGE, true, binind, &zero);
 	}
diff --git a/src/extent.c b/src/extent.c
index 3d8af3d6..595916a1 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -15,12 +15,10 @@ size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT;
 
 static bool extent_commit_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length, bool growing_retained);
-static bool extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena,
-    ehooks_t *ehooks, edata_t *edata, size_t offset, size_t length,
-    bool growing_retained);
-static bool extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena,
-    ehooks_t *ehooks, edata_t *edata, size_t offset, size_t length,
-    bool growing_retained);
+static bool extent_purge_lazy_impl(tsdn_t *tsdn, ehooks_t *ehooks,
+    edata_t *edata, size_t offset, size_t length, bool growing_retained);
+static bool extent_purge_forced_impl(tsdn_t *tsdn, ehooks_t *ehooks,
+    edata_t *edata, size_t offset, size_t length, bool growing_retained);
 static edata_t *extent_split_impl(tsdn_t *tsdn, edata_cache_t *edata_cache,
     ehooks_t *ehooks, edata_t *edata, size_t size_a, szind_t szind_a,
     bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
@@ -39,15 +37,16 @@ static atomic_zu_t highpages;
  */
 
 static void extent_deregister(tsdn_t *tsdn, edata_t *edata);
-static edata_t *extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t usize, size_t alignment, bool slab,
-    szind_t szind, bool *zero, bool *commit, bool growing_retained);
+static edata_t *extent_recycle(tsdn_t *tsdn, pa_shard_t *shard,
+    ehooks_t *ehooks, ecache_t *ecache, void *new_addr, size_t usize,
+    size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit,
+    bool growing_retained);
 static edata_t *extent_try_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache,
     ehooks_t *ehooks, ecache_t *ecache, edata_t *edata, bool *coalesced,
     bool growing_retained);
-static void extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+static void extent_record(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool growing_retained);
-static edata_t *extent_alloc_retained(tsdn_t *tsdn, arena_t *arena,
+static edata_t *extent_alloc_retained(tsdn_t *tsdn, pa_shard_t *shard,
     ehooks_t *ehooks, void *new_addr, size_t size, size_t alignment, bool slab,
     szind_t szind, bool *zero, bool *commit);
 
@@ -70,23 +69,7 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache,
 }
 
 edata_t *
-ecache_alloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
-    void *new_addr, size_t size, size_t alignment, bool slab, szind_t szind,
-    bool *zero) {
-	assert(size != 0);
-	assert(alignment != 0);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
-	bool commit = true;
-	edata_t *edata = extent_recycle(tsdn, arena, ehooks, ecache, new_addr,
-	    size, alignment, slab, szind, zero, &commit, false);
-	assert(edata == NULL || edata_dumpable_get(edata));
-	return edata;
-}
-
-edata_t *
-ecache_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+ecache_alloc(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
     szind_t szind, bool *zero) {
 	assert(size != 0);
@@ -95,7 +78,23 @@ ecache_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    WITNESS_RANK_CORE, 0);
 
 	bool commit = true;
-	edata_t *edata = extent_alloc_retained(tsdn, arena, ehooks, new_addr,
+	edata_t *edata = extent_recycle(tsdn, shard, ehooks, ecache,
+	    new_addr, size, alignment, slab, szind, zero, &commit, false);
+	assert(edata == NULL || edata_dumpable_get(edata));
+	return edata;
+}
+
+edata_t *
+ecache_alloc_grow(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
+    szind_t szind, bool *zero) {
+	assert(size != 0);
+	assert(alignment != 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	bool commit = true;
+	edata_t *edata = extent_alloc_retained(tsdn, shard, ehooks, new_addr,
 	    size, alignment, slab, szind, zero, &commit);
 	if (edata == NULL) {
 		if (opt_retain && new_addr != NULL) {
@@ -107,7 +106,7 @@ ecache_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			 */
 			return NULL;
 		}
-		edata = extent_alloc_wrapper(tsdn, arena, ehooks, new_addr,
+		edata = extent_alloc_wrapper(tsdn, shard, ehooks, new_addr,
 		    size, alignment, slab, szind, zero, &commit);
 	}
 
@@ -116,8 +115,8 @@ ecache_alloc_grow(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 }
 
 void
-ecache_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
-    edata_t *edata) {
+ecache_dalloc(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+    ecache_t *ecache, edata_t *edata) {
 	assert(edata_base_get(edata) != NULL);
 	assert(edata_size_get(edata) != 0);
 	assert(edata_dumpable_get(edata));
@@ -127,12 +126,12 @@ ecache_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 	edata_addr_set(edata, edata_base_get(edata));
 	edata_zeroed_set(edata, false);
 
-	extent_record(tsdn, arena, ehooks, ecache, edata, false);
+	extent_record(tsdn, shard, ehooks, ecache, edata, false);
 }
 
 edata_t *
-ecache_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
-    size_t npages_min) {
+ecache_evict(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+    ecache_t *ecache, size_t npages_min) {
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 
 	/*
@@ -157,8 +156,8 @@ ecache_evict(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 			break;
 		}
 		/* Try to coalesce. */
-		if (extent_try_delayed_coalesce(tsdn,
-		    &arena->pa_shard.edata_cache, ehooks, ecache, edata)) {
+		if (extent_try_delayed_coalesce(tsdn, &shard->edata_cache,
+		    ehooks, ecache, edata)) {
 			break;
 		}
 		/*
@@ -195,11 +194,11 @@ label_return:
  * indicates OOM), e.g. when trying to split an existing extent.
  */
 static void
-extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+extents_abandon_vm(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool growing_retained) {
 	size_t sz = edata_size_get(edata);
 	if (config_stats) {
-		atomic_fetch_add_zu(&arena->pa_shard.stats->abandoned_vm, sz,
+		atomic_fetch_add_zu(&shard->stats->abandoned_vm, sz,
 		    ATOMIC_RELAXED);
 	}
 	/*
@@ -207,13 +206,13 @@ extents_abandon_vm(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	 * that this is only a virtual memory leak.
 	 */
 	if (ecache->state == extent_state_dirty) {
-		if (extent_purge_lazy_impl(tsdn, arena, ehooks, edata, 0, sz,
+		if (extent_purge_lazy_impl(tsdn, ehooks, edata, 0, sz,
 		    growing_retained)) {
-			extent_purge_forced_impl(tsdn, arena, ehooks, edata, 0,
+			extent_purge_forced_impl(tsdn, ehooks, edata, 0,
 			    edata_size_get(edata), growing_retained);
 		}
 	}
-	edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
+	edata_cache_put(tsdn, &shard->edata_cache, edata);
 }
 
 static void
@@ -356,7 +355,7 @@ extent_deregister_no_gdump_sub(tsdn_t *tsdn, edata_t *edata) {
  * given allocation request.
  */
 static edata_t *
-extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+extent_recycle_extract(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
     bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -440,7 +439,7 @@ typedef enum {
 } extent_split_interior_result_t;
 
 static extent_split_interior_result_t
-extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+extent_split_interior(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     /* The result of splitting, in case of success. */
     edata_t **edata, edata_t **lead, edata_t **trail,
     /* The mess to clean up, in case of error. */
@@ -463,9 +462,9 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	/* Split the lead. */
 	if (leadsize != 0) {
 		*lead = *edata;
-		*edata = extent_split_impl(tsdn, &arena->pa_shard.edata_cache,
-		    ehooks, *lead, leadsize, SC_NSIZES, false, size + trailsize,
-		    szind, slab, growing_retained);
+		*edata = extent_split_impl(tsdn, &shard->edata_cache, ehooks,
+		    *lead, leadsize, SC_NSIZES, false, size + trailsize, szind,
+		    slab, growing_retained);
 		if (*edata == NULL) {
 			*to_leak = *lead;
 			*lead = NULL;
@@ -475,9 +474,9 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 	/* Split the trail. */
 	if (trailsize != 0) {
-		*trail = extent_split_impl(tsdn, &arena->pa_shard.edata_cache,
-		    ehooks, *edata, size, szind, slab, trailsize, SC_NSIZES,
-		    false, growing_retained);
+		*trail = extent_split_impl(tsdn, &shard->edata_cache, ehooks,
+		    *edata, size, szind, slab, trailsize, SC_NSIZES, false,
+		    growing_retained);
 		if (*trail == NULL) {
 			*to_leak = *edata;
 			*to_salvage = *lead;
@@ -501,7 +500,7 @@ extent_split_interior(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
  * and put back into ecache.
  */
 static edata_t *
-extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+extent_recycle_split(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
     szind_t szind, edata_t *edata, bool growing_retained) {
 	edata_t *lead;
@@ -510,7 +509,7 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	edata_t *to_salvage JEMALLOC_CC_SILENCE_INIT(NULL);
 
 	extent_split_interior_result_t result = extent_split_interior(
-	    tsdn, arena, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage,
+	    tsdn, shard, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage,
 	    new_addr, size, alignment, slab, szind, growing_retained);
 
 	if (!maps_coalesce && result != extent_split_interior_ok
@@ -544,7 +543,7 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		if (to_leak != NULL) {
 			void *leak = edata_base_get(to_leak);
 			extent_deregister_no_gdump_sub(tsdn, to_leak);
-			extents_abandon_vm(tsdn, arena, ehooks, ecache, to_leak,
+			extents_abandon_vm(tsdn, shard, ehooks, ecache, to_leak,
 			    growing_retained);
 			assert(emap_lock_edata_from_addr(tsdn, &emap_global,
 			    leak, false) == NULL);
@@ -559,21 +558,21 @@ extent_recycle_split(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
  * in the given ecache_t.
  */
 static edata_t *
-extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
-    void *new_addr, size_t size, size_t alignment, bool slab, szind_t szind,
-    bool *zero, bool *commit, bool growing_retained) {
+extent_recycle(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
+    szind_t szind, bool *zero, bool *commit, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	assert(new_addr == NULL || !slab);
 	assert(!*zero || !slab);
 
-	edata_t *edata = extent_recycle_extract(tsdn, arena, ehooks, ecache,
+	edata_t *edata = extent_recycle_extract(tsdn, shard, ehooks, ecache,
 	    new_addr, size, alignment, slab, growing_retained);
 	if (edata == NULL) {
 		return NULL;
 	}
 
-	edata = extent_recycle_split(tsdn, arena, ehooks, ecache, new_addr,
+	edata = extent_recycle_split(tsdn, shard, ehooks, ecache, new_addr,
 	    size, alignment, slab, szind, edata, growing_retained);
 	if (edata == NULL) {
 		return NULL;
@@ -582,7 +581,7 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 	if (*commit && !edata_committed_get(edata)) {
 		if (extent_commit_impl(tsdn, ehooks, edata, 0,
 		    edata_size_get(edata), growing_retained)) {
-			extent_record(tsdn, arena, ehooks, ecache, edata,
+			extent_record(tsdn, shard, ehooks, ecache, edata,
 			    growing_retained);
 			return NULL;
 		}
@@ -614,13 +613,13 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 /*
  * If virtual memory is retained, create increasingly larger extents from which
  * to split requested extents in order to limit the total number of disjoint
- * virtual memory ranges retained by each arena.
+ * virtual memory ranges retained by each shard.
  */
 static edata_t *
-extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     size_t size, size_t alignment, bool slab, szind_t szind,
     bool *zero, bool *commit) {
-	malloc_mutex_assert_owner(tsdn, &arena->pa_shard.ecache_grow.mtx);
+	malloc_mutex_assert_owner(tsdn, &shard->ecache_grow.mtx);
 	assert(!*zero || !slab);
 
 	size_t alloc_size_min = size + PAGE_CEILING(alignment) - PAGE;
@@ -633,20 +632,19 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	 * satisfy this request.
 	 */
 	pszind_t egn_skip = 0;
-	size_t alloc_size = sz_pind2sz(
-	    arena->pa_shard.ecache_grow.next + egn_skip);
+	size_t alloc_size = sz_pind2sz(shard->ecache_grow.next + egn_skip);
 	while (alloc_size < alloc_size_min) {
 		egn_skip++;
-		if (arena->pa_shard.ecache_grow.next + egn_skip >=
+		if (shard->ecache_grow.next + egn_skip >=
 		    sz_psz2ind(SC_LARGE_MAXCLASS)) {
 			/* Outside legal range. */
 			goto label_err;
 		}
 		alloc_size = sz_pind2sz(
-		    arena->pa_shard.ecache_grow.next + egn_skip);
+		    shard->ecache_grow.next + egn_skip);
 	}
 
-	edata_t *edata = edata_cache_get(tsdn, &arena->pa_shard.edata_cache);
+	edata_t *edata = edata_cache_get(tsdn, &shard->edata_cache);
 	if (edata == NULL) {
 		goto label_err;
 	}
@@ -657,17 +655,16 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    &committed);
 
 	if (ptr == NULL) {
-		edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
+		edata_cache_put(tsdn, &shard->edata_cache, edata);
 		goto label_err;
 	}
 
-	edata_init(edata, ecache_ind_get(&arena->pa_shard.ecache_retained), ptr,
-	    alloc_size, false, SC_NSIZES,
-	    pa_shard_extent_sn_next(&arena->pa_shard), extent_state_active,
-	    zeroed, committed, true, EXTENT_IS_HEAD);
+	edata_init(edata, ecache_ind_get(&shard->ecache_retained), ptr,
+	    alloc_size, false, SC_NSIZES, pa_shard_extent_sn_next(shard),
+	    extent_state_active, zeroed, committed, true, EXTENT_IS_HEAD);
 
 	if (extent_register_no_gdump_add(tsdn, edata)) {
-		edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
+		edata_cache_put(tsdn, &shard->edata_cache, edata);
 		goto label_err;
 	}
 
@@ -684,17 +681,17 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	edata_t *to_salvage JEMALLOC_CC_SILENCE_INIT(NULL);
 
 	extent_split_interior_result_t result = extent_split_interior(tsdn,
-	    arena, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage, NULL,
+	    shard, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage, NULL,
 	    size, alignment, slab, szind, true);
 
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
-			extent_record(tsdn, arena, ehooks,
-			    &arena->pa_shard.ecache_retained, lead, true);
+			extent_record(tsdn, shard, ehooks,
+			    &shard->ecache_retained, lead, true);
 		}
 		if (trail != NULL) {
-			extent_record(tsdn, arena, ehooks,
-			    &arena->pa_shard.ecache_retained, trail, true);
+			extent_record(tsdn, shard, ehooks,
+			    &shard->ecache_retained, trail, true);
 		}
 	} else {
 		/*
@@ -706,13 +703,13 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			if (config_prof) {
 				extent_gdump_add(tsdn, to_salvage);
 			}
-			extent_record(tsdn, arena, ehooks,
-			    &arena->pa_shard.ecache_retained, to_salvage, true);
+			extent_record(tsdn, shard, ehooks,
+			    &shard->ecache_retained, to_salvage, true);
 		}
 		if (to_leak != NULL) {
 			extent_deregister_no_gdump_sub(tsdn, to_leak);
-			extents_abandon_vm(tsdn, arena, ehooks,
-			    &arena->pa_shard.ecache_retained, to_leak, true);
+			extents_abandon_vm(tsdn, shard, ehooks,
+			    &shard->ecache_retained, to_leak, true);
 		}
 		goto label_err;
 	}
@@ -720,8 +717,8 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	if (*commit && !edata_committed_get(edata)) {
 		if (extent_commit_impl(tsdn, ehooks, edata, 0,
 		    edata_size_get(edata), true)) {
-			extent_record(tsdn, arena, ehooks,
-			    &arena->pa_shard.ecache_retained, edata, true);
+			extent_record(tsdn, shard, ehooks,
+			    &shard->ecache_retained, edata, true);
 			goto label_err;
 		}
 		/* A successful commit should return zeroed memory. */
@@ -739,15 +736,14 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	 * Increment extent_grow_next if doing so wouldn't exceed the allowed
 	 * range.
 	 */
-	if (arena->pa_shard.ecache_grow.next + egn_skip + 1 <=
-	    arena->pa_shard.ecache_grow.limit) {
-		arena->pa_shard.ecache_grow.next += egn_skip + 1;
+	if (shard->ecache_grow.next + egn_skip + 1 <=
+	    shard->ecache_grow.limit) {
+		shard->ecache_grow.next += egn_skip + 1;
 	} else {
-		arena->pa_shard.ecache_grow.next
-		    = arena->pa_shard.ecache_grow.limit;
+		shard->ecache_grow.next = shard->ecache_grow.limit;
 	}
 	/* All opportunities for failure are past. */
-	malloc_mutex_unlock(tsdn, &arena->pa_shard.ecache_grow.mtx);
+	malloc_mutex_unlock(tsdn, &shard->ecache_grow.mtx);
 
 	if (config_prof) {
 		/* Adjust gdump stats now that extent is final size. */
@@ -765,47 +761,47 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 
 	return edata;
 label_err:
-	malloc_mutex_unlock(tsdn, &arena->pa_shard.ecache_grow.mtx);
+	malloc_mutex_unlock(tsdn, &shard->ecache_grow.mtx);
 	return NULL;
 }
 
 static edata_t *
-extent_alloc_retained(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+extent_alloc_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t alignment, bool slab, szind_t szind,
     bool *zero, bool *commit) {
 	assert(size != 0);
 	assert(alignment != 0);
 
-	malloc_mutex_lock(tsdn, &arena->pa_shard.ecache_grow.mtx);
+	malloc_mutex_lock(tsdn, &shard->ecache_grow.mtx);
 
-	edata_t *edata = extent_recycle(tsdn, arena, ehooks,
-	    &arena->pa_shard.ecache_retained, new_addr, size, alignment, slab,
+	edata_t *edata = extent_recycle(tsdn, shard, ehooks,
+	    &shard->ecache_retained, new_addr, size, alignment, slab,
 	    szind, zero, commit, true);
 	if (edata != NULL) {
-		malloc_mutex_unlock(tsdn, &arena->pa_shard.ecache_grow.mtx);
+		malloc_mutex_unlock(tsdn, &shard->ecache_grow.mtx);
 		if (config_prof) {
 			extent_gdump_add(tsdn, edata);
 		}
 	} else if (opt_retain && new_addr == NULL) {
-		edata = extent_grow_retained(tsdn, arena, ehooks, size,
+		edata = extent_grow_retained(tsdn, shard, ehooks, size,
 		    alignment, slab, szind, zero, commit);
 		/* extent_grow_retained() always releases extent_grow_mtx. */
 	} else {
-		malloc_mutex_unlock(tsdn, &arena->pa_shard.ecache_grow.mtx);
+		malloc_mutex_unlock(tsdn, &shard->ecache_grow.mtx);
 	}
-	malloc_mutex_assert_not_owner(tsdn, &arena->pa_shard.ecache_grow.mtx);
+	malloc_mutex_assert_not_owner(tsdn, &shard->ecache_grow.mtx);
 
 	return edata;
 }
 
 edata_t *
-extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+extent_alloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t alignment, bool slab,
     szind_t szind, bool *zero, bool *commit) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	edata_t *edata = edata_cache_get(tsdn, &arena->pa_shard.edata_cache);
+	edata_t *edata = edata_cache_get(tsdn, &shard->edata_cache);
 	if (edata == NULL) {
 		return NULL;
 	}
@@ -813,14 +809,14 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	void *addr = ehooks_alloc(tsdn, ehooks, new_addr, size, palignment,
 	    zero, commit);
 	if (addr == NULL) {
-		edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
+		edata_cache_put(tsdn, &shard->edata_cache, edata);
 		return NULL;
 	}
-	edata_init(edata, ecache_ind_get(&arena->pa_shard.ecache_dirty), addr,
-	    size, slab, szind, pa_shard_extent_sn_next(&arena->pa_shard),
+	edata_init(edata, ecache_ind_get(&shard->ecache_dirty), addr,
+	    size, slab, szind, pa_shard_extent_sn_next(shard),
 	    extent_state_active, *zero, *commit, true, EXTENT_NOT_HEAD);
 	if (extent_register(tsdn, edata)) {
-		edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
+		edata_cache_put(tsdn, &shard->edata_cache, edata);
 		return NULL;
 	}
 
@@ -956,24 +952,24 @@ extent_try_coalesce_large(tsdn_t *tsdn, edata_cache_t *edata_cache,
 
 /* Purge a single extent to retained / unmapped directly. */
 static void
-extent_maximally_purge(tsdn_t *tsdn,arena_t *arena, ehooks_t *ehooks,
+extent_maximally_purge(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     edata_t *edata) {
 	size_t extent_size = edata_size_get(edata);
-	extent_dalloc_wrapper(tsdn, arena, ehooks, edata);
+	extent_dalloc_wrapper(tsdn, shard, ehooks, edata);
 	if (config_stats) {
 		/* Update stats accordingly. */
-		LOCKEDINT_MTX_LOCK(tsdn, *arena->pa_shard.stats_mtx);
+		LOCKEDINT_MTX_LOCK(tsdn, *shard->stats_mtx);
 		locked_inc_u64(tsdn,
-		    LOCKEDINT_MTX(*arena->pa_shard.stats_mtx),
-		    &arena->pa_shard.stats->decay_dirty.nmadvise, 1);
+		    LOCKEDINT_MTX(*shard->stats_mtx),
+		    &shard->stats->decay_dirty.nmadvise, 1);
 		locked_inc_u64(tsdn,
-		    LOCKEDINT_MTX(*arena->pa_shard.stats_mtx),
-		    &arena->pa_shard.stats->decay_dirty.purged,
+		    LOCKEDINT_MTX(*shard->stats_mtx),
+		    &shard->stats->decay_dirty.purged,
 		    extent_size >> LG_PAGE);
 		locked_dec_zu(tsdn,
-		    LOCKEDINT_MTX(*arena->pa_shard.stats_mtx),
-		    &arena->pa_shard.stats->mapped, extent_size);
-		LOCKEDINT_MTX_UNLOCK(tsdn, *arena->pa_shard.stats_mtx);
+		    LOCKEDINT_MTX(*shard->stats_mtx),
+		    &shard->stats->mapped, extent_size);
+		LOCKEDINT_MTX_UNLOCK(tsdn, *shard->stats_mtx);
 	}
 }
 
@@ -983,8 +979,8 @@ extent_maximally_purge(tsdn_t *tsdn,arena_t *arena, ehooks_t *ehooks,
  * given ecache_t (coalesces, deregisters slab interiors, the heap operations).
  */
 static void
-extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
-    edata_t *edata, bool growing_retained) {
+extent_record(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+    ecache_t *ecache, edata_t *edata, bool growing_retained) {
 	assert((ecache->state != extent_state_dirty &&
 	    ecache->state != extent_state_muzzy) ||
 	    !edata_zeroed_get(edata));
@@ -1000,23 +996,23 @@ extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 	emap_assert_mapped(tsdn, &emap_global, edata);
 
 	if (!ecache->delay_coalesce) {
-		edata = extent_try_coalesce(tsdn, &arena->pa_shard.edata_cache,
-		    ehooks, ecache, edata, NULL, growing_retained);
+		edata = extent_try_coalesce(tsdn, &shard->edata_cache, ehooks,
+		    ecache, edata, NULL, growing_retained);
 	} else if (edata_size_get(edata) >= SC_LARGE_MINCLASS) {
-		assert(ecache == &arena->pa_shard.ecache_dirty);
+		assert(ecache == &shard->ecache_dirty);
 		/* Always coalesce large extents eagerly. */
 		bool coalesced;
 		do {
 			assert(edata_state_get(edata) == extent_state_active);
 			edata = extent_try_coalesce_large(tsdn,
-			    &arena->pa_shard.edata_cache, ehooks, ecache, edata,
+			    &shard->edata_cache, ehooks, ecache, edata,
 			    &coalesced, growing_retained);
 		} while (coalesced);
 		if (edata_size_get(edata) >= oversize_threshold &&
-		    pa_shard_may_force_decay(&arena->pa_shard)) {
+		    pa_shard_may_force_decay(shard)) {
 			/* Shortcut to purge the oversize extent eagerly. */
 			malloc_mutex_unlock(tsdn, &ecache->mtx);
-			extent_maximally_purge(tsdn, arena, ehooks, edata);
+			extent_maximally_purge(tsdn, shard, ehooks, edata);
 			return;
 		}
 	}
@@ -1026,20 +1022,20 @@ extent_record(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks, ecache_t *ecache,
 }
 
 void
-extent_dalloc_gap(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+extent_dalloc_gap(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     edata_t *edata) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
 	if (extent_register(tsdn, edata)) {
-		edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
+		edata_cache_put(tsdn, &shard->edata_cache, edata);
 		return;
 	}
-	extent_dalloc_wrapper(tsdn, arena, ehooks, edata);
+	extent_dalloc_wrapper(tsdn, shard, ehooks, edata);
 }
 
 static bool
-extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+extent_dalloc_wrapper_try(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     edata_t *edata) {
 	bool err;
 
@@ -1055,14 +1051,14 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	    edata_size_get(edata), edata_committed_get(edata));
 
 	if (!err) {
-		edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
+		edata_cache_put(tsdn, &shard->edata_cache, edata);
 	}
 
 	return err;
 }
 
 void
-extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+extent_dalloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     edata_t *edata) {
 	assert(edata_dumpable_get(edata));
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -1075,7 +1071,7 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		 * threads, and reregister if deallocation fails.
 		 */
 		extent_deregister(tsdn, edata);
-		if (!extent_dalloc_wrapper_try(tsdn, arena, ehooks, edata)) {
+		if (!extent_dalloc_wrapper_try(tsdn, shard, ehooks, edata)) {
 			return;
 		}
 		extent_reregister(tsdn, edata);
@@ -1104,12 +1100,12 @@ extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 		extent_gdump_sub(tsdn, edata);
 	}
 
-	extent_record(tsdn, arena, ehooks, &arena->pa_shard.ecache_retained,
-	    edata, false);
+	extent_record(tsdn, shard, ehooks, &shard->ecache_retained, edata,
+	    false);
 }
 
 void
-extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
+extent_destroy_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     edata_t *edata) {
 	assert(edata_base_get(edata) != NULL);
 	assert(edata_size_get(edata) != 0);
@@ -1125,7 +1121,7 @@ extent_destroy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 	ehooks_destroy(tsdn, ehooks, edata_base_get(edata),
 	    edata_size_get(edata), edata_committed_get(edata));
 
-	edata_cache_put(tsdn, &arena->pa_shard.edata_cache, edata);
+	edata_cache_put(tsdn, &shard->edata_cache, edata);
 }
 
 static bool
@@ -1158,8 +1154,8 @@ extent_decommit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 }
 
 static bool
-extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata, size_t offset, size_t length, bool growing_retained) {
+extent_purge_lazy_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    size_t offset, size_t length, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	bool err = ehooks_purge_lazy(tsdn, ehooks, edata_base_get(edata),
@@ -1168,15 +1164,15 @@ extent_purge_lazy_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 }
 
 bool
-extent_purge_lazy_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata, size_t offset, size_t length) {
-	return extent_purge_lazy_impl(tsdn, arena, ehooks, edata, offset,
+extent_purge_lazy_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    size_t offset, size_t length) {
+	return extent_purge_lazy_impl(tsdn, ehooks, edata, offset,
 	    length, false);
 }
 
 static bool
-extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata, size_t offset, size_t length, bool growing_retained) {
+extent_purge_forced_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    size_t offset, size_t length, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	bool err = ehooks_purge_forced(tsdn, ehooks, edata_base_get(edata),
@@ -1185,10 +1181,10 @@ extent_purge_forced_impl(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 }
 
 bool
-extent_purge_forced_wrapper(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata, size_t offset, size_t length) {
-	return extent_purge_forced_impl(tsdn, arena, ehooks, edata,
-	    offset, length, false);
+extent_purge_forced_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    size_t offset, size_t length) {
+	return extent_purge_forced_impl(tsdn, ehooks, edata, offset, length,
+	    false);
 }
 
 /*
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 55f037ef..de6852e0 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -188,8 +188,8 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 				if (gap_size_page != 0) {
 					ehooks_t *ehooks = arena_get_ehooks(
 					    arena);
-					extent_dalloc_gap(tsdn, arena, ehooks,
-					    gap);
+					extent_dalloc_gap(tsdn,
+					    &arena->pa_shard, ehooks, gap);
 				} else {
 					edata_cache_put(tsdn,
 					    &arena->pa_shard.edata_cache, gap);
@@ -208,7 +208,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					    extent_state_active, false, true,
 					    true, EXTENT_NOT_HEAD);
 					if (extent_purge_forced_wrapper(tsdn,
-					    arena, ehooks, &edata, 0, size)) {
+					    ehooks, &edata, 0, size)) {
 						memset(ret, 0, size);
 					}
 				}
diff --git a/src/large.c b/src/large.c
index 57bf6748..26a1740c 100644
--- a/src/large.c
+++ b/src/large.c
@@ -120,17 +120,17 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 	bool is_zeroed_trail = zero;
 	edata_t *trail;
 	bool new_mapping;
-	if ((trail = ecache_alloc(tsdn, arena, ehooks,
+	if ((trail = ecache_alloc(tsdn, &arena->pa_shard, ehooks,
 	    &arena->pa_shard.ecache_dirty, edata_past_get(edata), trailsize,
 	    CACHELINE, false, SC_NSIZES, &is_zeroed_trail)) != NULL
-	    || (trail = ecache_alloc(tsdn, arena, ehooks,
+	    || (trail = ecache_alloc(tsdn, &arena->pa_shard, ehooks,
 	    &arena->pa_shard.ecache_muzzy, edata_past_get(edata), trailsize,
 	    CACHELINE, false, SC_NSIZES, &is_zeroed_trail)) != NULL) {
 		if (config_stats) {
 			new_mapping = false;
 		}
 	} else {
-		if ((trail = ecache_alloc_grow(tsdn, arena, ehooks,
+		if ((trail = ecache_alloc_grow(tsdn, &arena->pa_shard, ehooks,
 		    &arena->pa_shard.ecache_retained, edata_past_get(edata),
 		    trailsize, CACHELINE, false, SC_NSIZES, &is_zeroed_trail))
 			== NULL) {
@@ -143,7 +143,7 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 
 	if (extent_merge_wrapper(tsdn, ehooks, &arena->pa_shard.edata_cache,
 	    edata, trail)) {
-		extent_dalloc_wrapper(tsdn, arena, ehooks, trail);
+		extent_dalloc_wrapper(tsdn, &arena->pa_shard, ehooks, trail);
 		return true;
 	}
 

From 7624043a41087bb5124e8dadb184f53dd8583def Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 10 Mar 2020 12:04:16 -0700
Subject: [PATCH 1644/2608] PA: Add ehook-getting support.

---
 include/jemalloc/internal/pa.h | 9 +++++++++
 src/pa.c                       | 2 ++
 2 files changed, 11 insertions(+)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 655e46b4..827c0b5e 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_PA_H
 #define JEMALLOC_INTERNAL_PA_H
 
+#include "jemalloc/internal/base.h"
 #include "jemalloc/internal/decay.h"
 #include "jemalloc/internal/ecache.h"
 #include "jemalloc/internal/edata_cache.h"
@@ -80,6 +81,9 @@ struct pa_shard_s {
 	 */
 	decay_t decay_dirty; /* dirty --> muzzy */
 	decay_t decay_muzzy; /* muzzy --> retained */
+
+	/* The base from which we get the ehooks and allocate metadat. */
+	base_t *base;
 };
 
 static inline void
@@ -105,6 +109,11 @@ pa_shard_may_force_decay(pa_shard_t *shard) {
 	    || pa_shard_muzzy_decay_ms_get(shard) == -1);
 }
 
+static inline ehooks_t *
+pa_shard_ehooks_get(pa_shard_t *shard) {
+	return base_ehooks_get(shard->base);
+}
+
 /* Returns true on error. */
 bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
     pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx);
diff --git a/src/pa.c b/src/pa.c
index e4dbb040..a4ec4bd0 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -48,6 +48,8 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
 	shard->stats = stats;
 	memset(shard->stats, 0, sizeof(*shard->stats));
 
+	shard->base = base;
+
 	return false;
 }
 

From 9f93625c1438a4dadc60bda9e43c63bcadd21ebd Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 10 Mar 2020 12:29:12 -0700
Subject: [PATCH 1645/2608] PA: Move in arena large allocation functionality.

---
 include/jemalloc/internal/pa.h |  3 +++
 src/arena.c                    | 31 +++------------------------
 src/pa.c                       | 39 ++++++++++++++++++++++++++++++++++
 3 files changed, 45 insertions(+), 28 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 827c0b5e..e1821e69 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -119,4 +119,7 @@ bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
     pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx);
 size_t pa_shard_extent_sn_next(pa_shard_t *shard);
 
+edata_t *
+pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
+    bool slab, szind_t szind, bool *zero, size_t *mapped_add);
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/src/arena.c b/src/arena.c
index 7934a6bf..1e3ae6e5 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -451,37 +451,12 @@ arena_may_have_muzzy(arena_t *arena) {
 edata_t *
 arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool *zero) {
-	ehooks_t *ehooks = arena_get_ehooks(arena);
-
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
 	szind_t szind = sz_size2index(usize);
 	size_t mapped_add;
 	size_t esize = usize + sz_large_pad;
-	edata_t *edata = ecache_alloc(tsdn, &arena->pa_shard, ehooks,
-	    &arena->pa_shard.ecache_dirty, NULL, esize, alignment, false, szind,
-	    zero);
-	if (edata == NULL && arena_may_have_muzzy(arena)) {
-		edata = ecache_alloc(tsdn, &arena->pa_shard, ehooks,
-		    &arena->pa_shard.ecache_muzzy, NULL, esize, alignment,
-		    false, szind, zero);
-	}
-	if (edata == NULL) {
-		edata = ecache_alloc_grow(tsdn, &arena->pa_shard, ehooks,
-		    &arena->pa_shard.ecache_retained, NULL, esize, alignment,
-		    false, szind, zero);
-		if (config_stats) {
-			/*
-			 * edata may be NULL on OOM, but in that case mapped_add
-			 * isn't used below, so there's no need to conditionlly
-			 * set it to 0 here.
-			 */
-			mapped_add = esize;
-		}
-	} else if (config_stats) {
-		mapped_add = 0;
-	}
+
+	edata_t *edata = pa_alloc(tsdn, &arena->pa_shard, esize, alignment,
+	    /* slab */ false, szind, zero, &mapped_add);
 
 	if (edata != NULL) {
 		if (config_stats) {
diff --git a/src/pa.c b/src/pa.c
index a4ec4bd0..072d4852 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -57,3 +57,42 @@ size_t
 pa_shard_extent_sn_next(pa_shard_t *shard) {
 	return atomic_fetch_add_zu(&shard->extent_sn_next, 1, ATOMIC_RELAXED);
 }
+
+static bool
+pa_shard_may_have_muzzy(pa_shard_t *shard) {
+	return pa_shard_muzzy_decay_ms_get(shard) != 0;
+}
+
+edata_t *
+pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
+    bool slab, szind_t szind, bool *zero, size_t *mapped_add) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
+
+	edata_t *edata = ecache_alloc(tsdn, shard, ehooks,
+	    &shard->ecache_dirty, NULL, size, alignment, slab, szind,
+	    zero);
+	if (edata == NULL && pa_shard_may_have_muzzy(shard)) {
+		edata = ecache_alloc(tsdn, shard, ehooks, &shard->ecache_muzzy,
+		    NULL, size, alignment, slab, szind, zero);
+	}
+
+	if (edata == NULL) {
+		edata = ecache_alloc_grow(tsdn, shard, ehooks,
+		    &shard->ecache_retained, NULL, size, alignment, slab,
+		    szind, zero);
+		if (config_stats) {
+			/*
+			 * edata may be NULL on OOM, but in that case mapped_add
+			 * isn't used below, so there's no need to conditionlly
+			 * set it to 0 here.
+			 */
+			*mapped_add = size;
+		}
+	} else if (config_stats) {
+		*mapped_add = 0;
+	}
+	return edata;
+}

From 7be3dea82c8489e7e892c72b5f8d0a2901ff4695 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 10 Mar 2020 13:58:57 -0700
Subject: [PATCH 1646/2608] PA: Have slab allocations use it.

---
 src/arena.c | 50 +++++++++-----------------------------------------
 src/pa.c    |  4 +---
 2 files changed, 10 insertions(+), 44 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 1e3ae6e5..c3365a1a 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -443,11 +443,6 @@ arena_large_ralloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t oldusize,
 	arena_large_malloc_stats_update(tsdn, arena, usize);
 }
 
-static bool
-arena_may_have_muzzy(arena_t *arena) {
-	return arena_muzzy_decay_ms_get(arena) != 0;
-}
-
 edata_t *
 arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool *zero) {
@@ -1006,51 +1001,24 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
 	base_delete(tsd_tsdn(tsd), arena->base);
 }
 
-static edata_t *
-arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    const bin_info_t *bin_info, szind_t szind) {
-	edata_t *slab;
-	bool zero;
-
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
-	zero = false;
-	slab = ecache_alloc_grow(tsdn, &arena->pa_shard, ehooks,
-	    &arena->pa_shard.ecache_retained, NULL, bin_info->slab_size, PAGE,
-	    true, szind, &zero);
-
-	if (config_stats && slab != NULL) {
-		pa_shard_stats_mapped_add(tsdn, &arena->pa_shard,
-		    bin_info->slab_size);
-	}
-
-	return slab;
-}
-
 static edata_t *
 arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard,
     const bin_info_t *bin_info) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	ehooks_t *ehooks = arena_get_ehooks(arena);
-	szind_t szind = sz_size2index(bin_info->reg_size);
 	bool zero = false;
-	edata_t *slab = ecache_alloc(tsdn, &arena->pa_shard, ehooks,
-	    &arena->pa_shard.ecache_dirty, NULL, bin_info->slab_size, PAGE,
-	    true, binind, &zero);
-	if (slab == NULL && arena_may_have_muzzy(arena)) {
-		slab = ecache_alloc(tsdn, &arena->pa_shard, ehooks,
-		    &arena->pa_shard.ecache_muzzy, NULL, bin_info->slab_size,
-		    PAGE, true, binind, &zero);
+	size_t mapped_add = 0;
+
+	edata_t *slab = pa_alloc(tsdn, &arena->pa_shard, bin_info->slab_size,
+	    PAGE, /* slab */ true, /* szind */ binind, &zero, &mapped_add);
+	if (config_stats && slab != NULL && mapped_add != 0) {
+		pa_shard_stats_mapped_add(tsdn, &arena->pa_shard,
+		    bin_info->slab_size);
 	}
+
 	if (slab == NULL) {
-		slab = arena_slab_alloc_hard(tsdn, arena, ehooks, bin_info,
-		    szind);
-		if (slab == NULL) {
-			return NULL;
-		}
+		return NULL;
 	}
 	assert(edata_slab_get(slab));
 
diff --git a/src/pa.c b/src/pa.c
index 072d4852..0dbf0445 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -68,6 +68,7 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
     bool slab, szind_t szind, bool *zero, size_t *mapped_add) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
+	*mapped_add = 0;
 
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
 
@@ -78,7 +79,6 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 		edata = ecache_alloc(tsdn, shard, ehooks, &shard->ecache_muzzy,
 		    NULL, size, alignment, slab, szind, zero);
 	}
-
 	if (edata == NULL) {
 		edata = ecache_alloc_grow(tsdn, shard, ehooks,
 		    &shard->ecache_retained, NULL, size, alignment, slab,
@@ -91,8 +91,6 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 			 */
 			*mapped_add = size;
 		}
-	} else if (config_stats) {
-		*mapped_add = 0;
 	}
 	return edata;
 }

From 0880c2ab9756ddb59b55dea673b20bd80922b487 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 10 Mar 2020 14:38:55 -0700
Subject: [PATCH 1647/2608] PA: Have large expands use it.

---
 include/jemalloc/internal/pa.h |  9 ++++--
 src/large.c                    | 51 ++++++++--------------------------
 src/pa.c                       | 40 ++++++++++++++++++++++++++
 3 files changed, 57 insertions(+), 43 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index e1821e69..0df2b4b3 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -119,7 +119,10 @@ bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
     pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx);
 size_t pa_shard_extent_sn_next(pa_shard_t *shard);
 
-edata_t *
-pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
-    bool slab, szind_t szind, bool *zero, size_t *mapped_add);
+edata_t *pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size,
+    size_t alignment, bool slab, szind_t szind, bool *zero, size_t *mapped_add);
+/* Returns true on error, in which case nothing changed. */
+bool pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
+    size_t new_usize, bool *zero, size_t *mapped_add);
+
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/src/large.c b/src/large.c
index 26a1740c..ff43a8d1 100644
--- a/src/large.c
+++ b/src/large.c
@@ -101,57 +101,28 @@ static bool
 large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
     bool zero) {
 	arena_t *arena = arena_get_from_edata(edata);
-	size_t oldusize = edata_usize_get(edata);
-	ehooks_t *ehooks = arena_get_ehooks(arena);
-	size_t trailsize = usize - oldusize;
-
-	if (ehooks_merge_will_fail(ehooks)) {
-		return true;
-	}
 
 	if (config_fill && unlikely(opt_zero)) {
 		zero = true;
 	}
+
+	size_t old_usize = edata_usize_get(edata);
+
 	/*
 	 * Copy zero into is_zeroed_trail and pass the copy when allocating the
 	 * extent, so that it is possible to make correct zero fill decisions
 	 * below, even if is_zeroed_trail ends up true when zero is false.
 	 */
 	bool is_zeroed_trail = zero;
-	edata_t *trail;
-	bool new_mapping;
-	if ((trail = ecache_alloc(tsdn, &arena->pa_shard, ehooks,
-	    &arena->pa_shard.ecache_dirty, edata_past_get(edata), trailsize,
-	    CACHELINE, false, SC_NSIZES, &is_zeroed_trail)) != NULL
-	    || (trail = ecache_alloc(tsdn, &arena->pa_shard, ehooks,
-	    &arena->pa_shard.ecache_muzzy, edata_past_get(edata), trailsize,
-	    CACHELINE, false, SC_NSIZES, &is_zeroed_trail)) != NULL) {
-		if (config_stats) {
-			new_mapping = false;
-		}
-	} else {
-		if ((trail = ecache_alloc_grow(tsdn, &arena->pa_shard, ehooks,
-		    &arena->pa_shard.ecache_retained, edata_past_get(edata),
-		    trailsize, CACHELINE, false, SC_NSIZES, &is_zeroed_trail))
-			== NULL) {
-			return true;
-		}
-		if (config_stats) {
-			new_mapping = true;
-		}
-	}
-
-	if (extent_merge_wrapper(tsdn, ehooks, &arena->pa_shard.edata_cache,
-	    edata, trail)) {
-		extent_dalloc_wrapper(tsdn, &arena->pa_shard, ehooks, trail);
+	size_t mapped_add;
+	bool err = pa_expand(tsdn, &arena->pa_shard, edata, usize,
+	    &is_zeroed_trail, &mapped_add);
+	if (err) {
 		return true;
 	}
 
-	szind_t szind = sz_size2index(usize);
-	emap_remap(tsdn, &emap_global, edata, szind, false);
-
-	if (config_stats && new_mapping) {
-		pa_shard_stats_mapped_add(tsdn, &arena->pa_shard, trailsize);
+	if (config_stats && mapped_add > 0) {
+		pa_shard_stats_mapped_add(tsdn, &arena->pa_shard, mapped_add);
 	}
 
 	if (zero) {
@@ -164,7 +135,7 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 			 * of CACHELINE in [0 .. PAGE).
 			 */
 			void *zbase = (void *)
-			    ((uintptr_t)edata_addr_get(edata) + oldusize);
+			    ((uintptr_t)edata_addr_get(edata) + old_usize);
 			void *zpast = PAGE_ADDR2BASE((void *)((uintptr_t)zbase +
 			    PAGE));
 			size_t nzero = (uintptr_t)zpast - (uintptr_t)zbase;
@@ -173,7 +144,7 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 		}
 		assert(is_zeroed_trail);
 	}
-	arena_extent_ralloc_large_expand(tsdn, arena, edata, oldusize);
+	arena_extent_ralloc_large_expand(tsdn, arena, edata, old_usize);
 
 	return false;
 }
diff --git a/src/pa.c b/src/pa.c
index 0dbf0445..7c3b568a 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -94,3 +94,43 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 	}
 	return edata;
 }
+
+bool
+pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t new_usize,
+    bool *zero, size_t *mapped_add) {
+	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
+	size_t old_usize = edata_usize_get(edata);
+	size_t trail_size = new_usize - old_usize;
+	void *trail_begin = edata_past_get(edata);
+
+	*mapped_add = 0;
+	if (ehooks_merge_will_fail(ehooks)) {
+		return true;
+	}
+	edata_t *trail = ecache_alloc(tsdn, shard, ehooks, &shard->ecache_dirty,
+	    trail_begin, trail_size, PAGE, /* slab */ false, SC_NSIZES, zero);
+	if (trail == NULL) {
+		trail = ecache_alloc(tsdn, shard, ehooks, &shard->ecache_muzzy,
+		    trail_begin, trail_size, PAGE, /* slab */ false, SC_NSIZES,
+		    zero);
+	}
+	if (trail == NULL) {
+		trail = ecache_alloc_grow(tsdn, shard, ehooks,
+		    &shard->ecache_retained, trail_begin, trail_size, PAGE,
+		    /* slab */ false, SC_NSIZES, zero);
+		*mapped_add = trail_size;
+	}
+	if (trail == NULL) {
+		*mapped_add = 0;
+		return true;
+	}
+	if (extent_merge_wrapper(tsdn, ehooks, &shard->edata_cache, edata,
+	    trail)) {
+		extent_dalloc_wrapper(tsdn, shard, ehooks, trail);
+		*mapped_add = 0;
+		return true;
+	}
+	szind_t szind = sz_size2index(new_usize);
+	emap_remap(tsdn, &emap_global, edata, szind, /* slab */ false);
+	return false;
+}

From 5bcc2c2ab9b46cc15c1bc054a74615daabfd3675 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 10 Mar 2020 17:21:04 -0700
Subject: [PATCH 1648/2608] PA: Have expand take szind and slab.

This isn't really necessary, but having a uniform API will help us later.
---
 include/jemalloc/internal/pa.h | 5 +++--
 src/large.c                    | 3 ++-
 src/pa.c                       | 5 ++---
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 0df2b4b3..ef140b3e 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -122,7 +122,8 @@ size_t pa_shard_extent_sn_next(pa_shard_t *shard);
 edata_t *pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size,
     size_t alignment, bool slab, szind_t szind, bool *zero, size_t *mapped_add);
 /* Returns true on error, in which case nothing changed. */
-bool pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
-    size_t new_usize, bool *zero, size_t *mapped_add);
+bool
+pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t new_usize,
+    szind_t szind, bool slab, bool *zero, size_t *mapped_add);
 
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/src/large.c b/src/large.c
index ff43a8d1..60b51d8c 100644
--- a/src/large.c
+++ b/src/large.c
@@ -115,8 +115,9 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 	 */
 	bool is_zeroed_trail = zero;
 	size_t mapped_add;
+	szind_t szind = sz_size2index(usize);
 	bool err = pa_expand(tsdn, &arena->pa_shard, edata, usize,
-	    &is_zeroed_trail, &mapped_add);
+	    szind, /* slab */ false, &is_zeroed_trail, &mapped_add);
 	if (err) {
 		return true;
 	}
diff --git a/src/pa.c b/src/pa.c
index 7c3b568a..7fafa7e3 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -97,7 +97,7 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 
 bool
 pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t new_usize,
-    bool *zero, size_t *mapped_add) {
+    szind_t szind, bool slab, bool *zero, size_t *mapped_add) {
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
 	size_t old_usize = edata_usize_get(edata);
 	size_t trail_size = new_usize - old_usize;
@@ -130,7 +130,6 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t new_usize,
 		*mapped_add = 0;
 		return true;
 	}
-	szind_t szind = sz_size2index(new_usize);
-	emap_remap(tsdn, &emap_global, edata, szind, /* slab */ false);
+	emap_remap(tsdn, &emap_global, edata, szind, slab);
 	return false;
 }

From 74958567a4fb1917cc6c1e9d5ee98378a8781f1a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 10 Mar 2020 17:27:31 -0700
Subject: [PATCH 1649/2608] PA: have expand take sizes instead of new usize.

This avoids involving usize, which makes some of the stats modifications more
intuitively correct.
---
 include/jemalloc/internal/pa.h |  5 ++---
 src/large.c                    |  4 +++-
 src/pa.c                       | 20 +++++++++++---------
 3 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index ef140b3e..a4f80818 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -122,8 +122,7 @@ size_t pa_shard_extent_sn_next(pa_shard_t *shard);
 edata_t *pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size,
     size_t alignment, bool slab, szind_t szind, bool *zero, size_t *mapped_add);
 /* Returns true on error, in which case nothing changed. */
-bool
-pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t new_usize,
-    szind_t szind, bool slab, bool *zero, size_t *mapped_add);
+bool pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
+    size_t new_size, szind_t szind, bool slab, bool *zero, size_t *mapped_add);
 
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/src/large.c b/src/large.c
index 60b51d8c..c01b0577 100644
--- a/src/large.c
+++ b/src/large.c
@@ -106,7 +106,9 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 		zero = true;
 	}
 
+	size_t old_size = edata_size_get(edata);
 	size_t old_usize = edata_usize_get(edata);
+	size_t new_size = usize + sz_large_pad;
 
 	/*
 	 * Copy zero into is_zeroed_trail and pass the copy when allocating the
@@ -116,7 +118,7 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 	bool is_zeroed_trail = zero;
 	size_t mapped_add;
 	szind_t szind = sz_size2index(usize);
-	bool err = pa_expand(tsdn, &arena->pa_shard, edata, usize,
+	bool err = pa_expand(tsdn, &arena->pa_shard, edata, old_size, new_size,
 	    szind, /* slab */ false, &is_zeroed_trail, &mapped_add);
 	if (err) {
 		return true;
diff --git a/src/pa.c b/src/pa.c
index 7fafa7e3..8f33d9a4 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -96,29 +96,31 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 }
 
 bool
-pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t new_usize,
-    szind_t szind, bool slab, bool *zero, size_t *mapped_add) {
+pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
+    size_t new_size, szind_t szind, bool slab, bool *zero, size_t *mapped_add) {
+	assert(new_size > old_size);
+
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
-	size_t old_usize = edata_usize_get(edata);
-	size_t trail_size = new_usize - old_usize;
 	void *trail_begin = edata_past_get(edata);
+	size_t expand_amount = new_size - old_size;
 
 	*mapped_add = 0;
 	if (ehooks_merge_will_fail(ehooks)) {
 		return true;
 	}
 	edata_t *trail = ecache_alloc(tsdn, shard, ehooks, &shard->ecache_dirty,
-	    trail_begin, trail_size, PAGE, /* slab */ false, SC_NSIZES, zero);
+	    trail_begin, expand_amount, PAGE, /* slab */ false, SC_NSIZES,
+	    zero);
 	if (trail == NULL) {
 		trail = ecache_alloc(tsdn, shard, ehooks, &shard->ecache_muzzy,
-		    trail_begin, trail_size, PAGE, /* slab */ false, SC_NSIZES,
-		    zero);
+		    trail_begin, expand_amount, PAGE, /* slab */ false,
+		    SC_NSIZES, zero);
 	}
 	if (trail == NULL) {
 		trail = ecache_alloc_grow(tsdn, shard, ehooks,
-		    &shard->ecache_retained, trail_begin, trail_size, PAGE,
+		    &shard->ecache_retained, trail_begin, expand_amount, PAGE,
 		    /* slab */ false, SC_NSIZES, zero);
-		*mapped_add = trail_size;
+		*mapped_add = expand_amount;
 	}
 	if (trail == NULL) {
 		*mapped_add = 0;

From 71fc0dc968189e72a4437fb38759ef380a02a7ab Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Mar 2020 11:36:38 -0700
Subject: [PATCH 1650/2608] PA: Move in remaining page allocation functions.

---
 include/jemalloc/internal/arena_externs.h |  3 +-
 include/jemalloc/internal/pa.h            | 16 ++++++++++
 src/arena.c                               | 18 +++++-------
 src/large.c                               | 36 +++++++++++------------
 src/pa.c                                  | 32 ++++++++++++++++++++
 5 files changed, 75 insertions(+), 30 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 8548b1f0..cdbfa4b4 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -27,8 +27,7 @@ void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
     bin_stats_data_t *bstats, arena_stats_large_t *lstats,
     arena_stats_extents_t *estats);
-void arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena,
-    ehooks_t *ehooks, edata_t *edata);
+void arena_handle_new_dirty_pages(tsdn_t *tsdn, arena_t *arena);
 #ifdef JEMALLOC_JET
 size_t arena_slab_regind(edata_t *slab, szind_t binind, const void *ptr);
 #endif
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index a4f80818..df2e88f9 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -119,10 +119,26 @@ bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
     pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx);
 size_t pa_shard_extent_sn_next(pa_shard_t *shard);
 
+/* Gets an edata for the given allocation. */
 edata_t *pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size,
     size_t alignment, bool slab, szind_t szind, bool *zero, size_t *mapped_add);
 /* Returns true on error, in which case nothing changed. */
 bool pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
     size_t new_size, szind_t szind, bool slab, bool *zero, size_t *mapped_add);
+/*
+ * The same.  Sets *generated_dirty to true if we produced new dirty pages, and
+ * false otherwise.
+ */
+bool pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
+    size_t new_size, szind_t szind, bool slab, bool *generated_dirty);
+/*
+ * Frees the given edata back to the pa.  Sets *generated_dirty if we produced
+ * new dirty pages (well, we alwyas set it for now; but this need not be the
+ * case).
+ * (We could make generated_dirty the return value of course, but this is more
+ * consistent with the shrink pathway and our error codes here).
+ */
+void pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
+    bool *generated_dirty);
 
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/src/arena.c b/src/arena.c
index c3365a1a..35fefeb1 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -276,14 +276,10 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	}
 }
 
-void
-arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    edata_t *edata) {
+void arena_handle_new_dirty_pages(tsdn_t *tsdn, arena_t *arena) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	ecache_dalloc(tsdn, &arena->pa_shard, ehooks,
-	    &arena->pa_shard.ecache_dirty, edata);
 	if (arena_dirty_decay_ms_get(arena) == 0) {
 		arena_decay_dirty(tsdn, arena, false, true);
 	} else {
@@ -636,7 +632,7 @@ arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
 static size_t
 arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     decay_t *decay, pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
-    bool all, edata_list_t *decay_extents, bool is_background_thread) {
+    bool all, edata_list_t *decay_extents) {
 	size_t nmadvise, nunmapped;
 	size_t npurged;
 
@@ -728,8 +724,7 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	    npages_limit, npages_decay_max, &decay_extents);
 	if (npurge != 0) {
 		size_t npurged = arena_decay_stashed(tsdn, arena, ehooks, decay,
-		    decay_stats, ecache, all, &decay_extents,
-		    is_background_thread);
+		    decay_stats, ecache, all, &decay_extents);
 		assert(npurged == npurge);
 	}
 
@@ -805,8 +800,11 @@ void
 arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab) {
 	arena_nactive_sub(arena, edata_size_get(slab) >> LG_PAGE);
 
-	ehooks_t *ehooks = arena_get_ehooks(arena);
-	arena_extents_dirty_dalloc(tsdn, arena, ehooks, slab);
+	bool generated_dirty;
+	pa_dalloc(tsdn, &arena->pa_shard, slab, &generated_dirty);
+	if (generated_dirty) {
+		arena_handle_new_dirty_pages(tsdn, arena);
+	}
 }
 
 static void
diff --git a/src/large.c b/src/large.c
index c01b0577..2b913d65 100644
--- a/src/large.c
+++ b/src/large.c
@@ -69,30 +69,27 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 static bool
 large_ralloc_no_move_shrink(tsdn_t *tsdn, edata_t *edata, size_t usize) {
 	arena_t *arena = arena_get_from_edata(edata);
-	size_t oldusize = edata_usize_get(edata);
 	ehooks_t *ehooks = arena_get_ehooks(arena);
-	size_t diff = edata_size_get(edata) - (usize + sz_large_pad);
+	size_t old_size = edata_size_get(edata);
+	size_t old_usize = edata_usize_get(edata);
 
-	assert(oldusize > usize);
+	assert(old_usize > usize);
 
 	if (ehooks_split_will_fail(ehooks)) {
 		return true;
 	}
 
-	/* Split excess pages. */
-	if (diff != 0) {
-		edata_t *trail = extent_split_wrapper(tsdn,
-		    &arena->pa_shard.edata_cache, ehooks, edata,
-		    usize + sz_large_pad, sz_size2index(usize), false, diff,
-		    SC_NSIZES, false);
-		if (trail == NULL) {
-			return true;
-		}
-
-		arena_extents_dirty_dalloc(tsdn, arena, ehooks, trail);
+	bool generated_dirty;
+	bool err = pa_shrink(tsdn, &arena->pa_shard, edata, old_size,
+	    usize + sz_large_pad, sz_size2index(usize), false,
+	    &generated_dirty);
+	if (err) {
+		return true;
 	}
-
-	arena_extent_ralloc_large_shrink(tsdn, arena, edata, oldusize);
+	if (generated_dirty) {
+		arena_handle_new_dirty_pages(tsdn, arena);
+	}
+	arena_extent_ralloc_large_shrink(tsdn, arena, edata, old_usize);
 
 	return false;
 }
@@ -275,8 +272,11 @@ large_dalloc_prep_impl(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 
 static void
 large_dalloc_finish_impl(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
-	ehooks_t *ehooks = arena_get_ehooks(arena);
-	arena_extents_dirty_dalloc(tsdn, arena, ehooks, edata);
+	bool generated_dirty;
+	pa_dalloc(tsdn, &arena->pa_shard, edata, &generated_dirty);
+	if (generated_dirty) {
+		arena_handle_new_dirty_pages(tsdn, arena);
+	}
 }
 
 void
diff --git a/src/pa.c b/src/pa.c
index 8f33d9a4..dfbff226 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -99,6 +99,7 @@ bool
 pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
     size_t new_size, szind_t szind, bool slab, bool *zero, size_t *mapped_add) {
 	assert(new_size > old_size);
+	assert(edata_size_get(edata) == old_size);
 
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
 	void *trail_begin = edata_past_get(edata);
@@ -135,3 +136,34 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	emap_remap(tsdn, &emap_global, edata, szind, slab);
 	return false;
 }
+
+bool
+pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
+    size_t new_size, szind_t szind, bool slab, bool *generated_dirty) {
+	assert(new_size < old_size);
+
+	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
+	*generated_dirty = false;
+
+	if (ehooks_split_will_fail(ehooks)) {
+		return true;
+	}
+
+	edata_t *trail = extent_split_wrapper(tsdn, &shard->edata_cache, ehooks,
+	    edata, new_size, szind, slab, old_size - new_size, SC_NSIZES,
+	    false);
+	if (trail == NULL) {
+		return true;
+	}
+	ecache_dalloc(tsdn, shard, ehooks, &shard->ecache_dirty, trail);
+	*generated_dirty = true;
+	return false;
+}
+
+void
+pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
+    bool *generated_dirty) {
+	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
+	ecache_dalloc(tsdn, shard, ehooks, &shard->ecache_dirty, edata);
+	*generated_dirty = true;
+}

From 655a09634347628abc6720ad1e2b6e1d08fdf8d9 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Mar 2020 08:14:25 -0700
Subject: [PATCH 1651/2608] Move bg inactivity check out of purge inner loop.

I.e. do it once per call to arena_decay_stashed instead of once per muzzy purge.
---
 src/arena.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 35fefeb1..da3fa5c4 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -660,8 +660,6 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
 			    edata_size_get(edata))) {
 				ecache_dalloc(tsdn, &arena->pa_shard, ehooks,
 				    &arena->pa_shard.ecache_muzzy, edata);
-				arena_background_thread_inactivity_check(tsdn,
-				    arena, is_background_thread);
 				break;
 			}
 			JEMALLOC_FALLTHROUGH;
@@ -727,6 +725,8 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 		    decay_stats, ecache, all, &decay_extents);
 		assert(npurged == npurge);
 	}
+	arena_background_thread_inactivity_check(tsdn, arena,
+	    is_background_thread);
 
 	malloc_mutex_lock(tsdn, &decay->mtx);
 	decay->purging = false;

From aef28b2f8fc4031f970896b312127cda00bbc2d0 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Mar 2020 12:00:45 -0700
Subject: [PATCH 1652/2608] PA: Move in stash_decayed.

---
 include/jemalloc/internal/pa.h |  5 ++++-
 src/arena.c                    | 21 +--------------------
 src/pa.c                       | 21 +++++++++++++++++++++
 3 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index df2e88f9..99f1608b 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -77,7 +77,7 @@ struct pa_shard_s {
 	 * Decay-based purging state, responsible for scheduling extent state
 	 * transitions.
 	 *
-	 * Synchronization: internal.
+	 * Synchronization: via the internal mutex.
 	 */
 	decay_t decay_dirty; /* dirty --> muzzy */
 	decay_t decay_muzzy; /* muzzy --> retained */
@@ -141,4 +141,7 @@ bool pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 void pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
     bool *generated_dirty);
 
+size_t pa_stash_decayed(tsdn_t *tsdn, pa_shard_t *shard, ecache_t *ecache,
+    size_t npages_limit, size_t npages_decay_max, edata_list_t *decay_extents);
+
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/src/arena.c b/src/arena.c
index da3fa5c4..efdda70b 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -610,25 +610,6 @@ arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
 	    decay_ms);
 }
 
-static size_t
-arena_stash_decayed(tsdn_t *tsdn, arena_t *arena,
-    ehooks_t *ehooks, ecache_t *ecache, size_t npages_limit,
-    size_t npages_decay_max, edata_list_t *decay_extents) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
-	/* Stash extents according to npages_limit. */
-	size_t nstashed = 0;
-	edata_t *edata;
-	while (nstashed < npages_decay_max &&
-	    (edata = ecache_evict(tsdn, &arena->pa_shard, ehooks, ecache, npages_limit))
-	    != NULL) {
-		edata_list_append(decay_extents, edata);
-		nstashed += edata_size_get(edata) >> LG_PAGE;
-	}
-	return nstashed;
-}
-
 static size_t
 arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
     decay_t *decay, pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
@@ -718,7 +699,7 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	edata_list_t decay_extents;
 	edata_list_init(&decay_extents);
 
-	size_t npurge = arena_stash_decayed(tsdn, arena, ehooks, ecache,
+	size_t npurge = pa_stash_decayed(tsdn, &arena->pa_shard, ecache,
 	    npages_limit, npages_decay_max, &decay_extents);
 	if (npurge != 0) {
 		size_t npurged = arena_decay_stashed(tsdn, arena, ehooks, decay,
diff --git a/src/pa.c b/src/pa.c
index dfbff226..d6fb4730 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -167,3 +167,24 @@ pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
 	ecache_dalloc(tsdn, shard, ehooks, &shard->ecache_dirty, edata);
 	*generated_dirty = true;
 }
+
+size_t
+pa_stash_decayed(tsdn_t *tsdn, pa_shard_t *shard, ecache_t *ecache,
+    size_t npages_limit, size_t npages_decay_max, edata_list_t *decay_extents) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
+
+	/* Stash extents according to npages_limit. */
+	size_t nstashed = 0;
+	while (nstashed < npages_decay_max) {
+		edata_t *edata = ecache_evict(tsdn, shard, ehooks, ecache,
+		    npages_limit);
+		if (edata == NULL) {
+			break;
+		}
+		edata_list_append(decay_extents, edata);
+		nstashed += edata_size_get(edata) >> LG_PAGE;
+	}
+	return nstashed;
+}

From 3034f4a508524e995864e485f03da3fb2792856a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Mar 2020 12:23:09 -0700
Subject: [PATCH 1653/2608] PA: Move in decay_stashed.

---
 include/jemalloc/internal/pa.h |  5 ++-
 src/arena.c                    | 71 ++--------------------------------
 src/pa.c                       | 68 +++++++++++++++++++++++++++++++-
 3 files changed, 73 insertions(+), 71 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 99f1608b..0c2294e0 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -142,6 +142,9 @@ void pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
     bool *generated_dirty);
 
 size_t pa_stash_decayed(tsdn_t *tsdn, pa_shard_t *shard, ecache_t *ecache,
-    size_t npages_limit, size_t npages_decay_max, edata_list_t *decay_extents);
+    size_t npages_limit, size_t npages_decay_max, edata_list_t *result);
+size_t pa_decay_stashed(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
+    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
+    edata_list_t *decay_extents);
 
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/src/arena.c b/src/arena.c
index efdda70b..a378ba0c 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -610,69 +610,6 @@ arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
 	    decay_ms);
 }
 
-static size_t
-arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, ehooks_t *ehooks,
-    decay_t *decay, pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
-    bool all, edata_list_t *decay_extents) {
-	size_t nmadvise, nunmapped;
-	size_t npurged;
-
-	if (config_stats) {
-		nmadvise = 0;
-		nunmapped = 0;
-	}
-	npurged = 0;
-
-	ssize_t muzzy_decay_ms = arena_muzzy_decay_ms_get(arena);
-	for (edata_t *edata = edata_list_first(decay_extents); edata !=
-	    NULL; edata = edata_list_first(decay_extents)) {
-		if (config_stats) {
-			nmadvise++;
-		}
-		size_t npages = edata_size_get(edata) >> LG_PAGE;
-		npurged += npages;
-		edata_list_remove(decay_extents, edata);
-		switch (ecache->state) {
-		case extent_state_active:
-			not_reached();
-		case extent_state_dirty:
-			if (!all && muzzy_decay_ms != 0 &&
-			    !extent_purge_lazy_wrapper(tsdn, ehooks, edata, 0,
-			    edata_size_get(edata))) {
-				ecache_dalloc(tsdn, &arena->pa_shard, ehooks,
-				    &arena->pa_shard.ecache_muzzy, edata);
-				break;
-			}
-			JEMALLOC_FALLTHROUGH;
-		case extent_state_muzzy:
-			extent_dalloc_wrapper(tsdn, &arena->pa_shard, ehooks,
-			    edata);
-			if (config_stats) {
-				nunmapped += npages;
-			}
-			break;
-		case extent_state_retained:
-		default:
-			not_reached();
-		}
-	}
-
-	if (config_stats) {
-		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
-		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-		    &decay_stats->npurge, 1);
-		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-		    &decay_stats->nmadvise, nmadvise);
-		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-		    &decay_stats->purged, npurged);
-		locked_dec_zu(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-		    &arena->pa_shard.stats->mapped, nunmapped << LG_PAGE);
-		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
-	}
-
-	return npurged;
-}
-
 /*
  * npages_limit: Decay at most npages_decay_max pages without violating the
  * invariant: (ecache_npages_get(ecache) >= npages_limit).  We need an upper
@@ -694,16 +631,14 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	decay->purging = true;
 	malloc_mutex_unlock(tsdn, &decay->mtx);
 
-	ehooks_t *ehooks = arena_get_ehooks(arena);
-
 	edata_list_t decay_extents;
 	edata_list_init(&decay_extents);
-
 	size_t npurge = pa_stash_decayed(tsdn, &arena->pa_shard, ecache,
 	    npages_limit, npages_decay_max, &decay_extents);
 	if (npurge != 0) {
-		size_t npurged = arena_decay_stashed(tsdn, arena, ehooks, decay,
-		    decay_stats, ecache, all, &decay_extents);
+		size_t npurged = pa_decay_stashed(tsdn, &arena->pa_shard,
+		    decay, decay_stats, ecache, /* fully_decay */all,
+		    &decay_extents);
 		assert(npurged == npurge);
 	}
 	arena_background_thread_inactivity_check(tsdn, arena,
diff --git a/src/pa.c b/src/pa.c
index d6fb4730..34177eb1 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -170,7 +170,7 @@ pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
 
 size_t
 pa_stash_decayed(tsdn_t *tsdn, pa_shard_t *shard, ecache_t *ecache,
-    size_t npages_limit, size_t npages_decay_max, edata_list_t *decay_extents) {
+    size_t npages_limit, size_t npages_decay_max, edata_list_t *result) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
@@ -183,8 +183,72 @@ pa_stash_decayed(tsdn_t *tsdn, pa_shard_t *shard, ecache_t *ecache,
 		if (edata == NULL) {
 			break;
 		}
-		edata_list_append(decay_extents, edata);
+		edata_list_append(result, edata);
 		nstashed += edata_size_get(edata) >> LG_PAGE;
 	}
 	return nstashed;
 }
+
+size_t
+pa_decay_stashed(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
+    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
+    edata_list_t *decay_extents) {
+	bool err;
+
+	size_t nmadvise = 0;
+	size_t nunmapped = 0;
+	size_t npurged = 0;
+
+	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
+
+	bool try_muzzy = !fully_decay && pa_shard_may_have_muzzy(shard);
+
+	for (edata_t *edata = edata_list_first(decay_extents); edata !=
+	    NULL; edata = edata_list_first(decay_extents)) {
+		edata_list_remove(decay_extents, edata);
+
+		size_t size = edata_size_get(edata);
+		size_t npages = size >> LG_PAGE;
+
+		nmadvise++;
+		npurged += npages;
+
+		switch (ecache->state) {
+		case extent_state_active:
+			not_reached();
+		case extent_state_dirty:
+			if (try_muzzy) {
+				err = extent_purge_lazy_wrapper(tsdn, ehooks,
+				    edata, /* offset */ 0, size);
+				if (!err) {
+					ecache_dalloc(tsdn, shard, ehooks,
+					    &shard->ecache_muzzy, edata);
+					break;
+				}
+			}
+			JEMALLOC_FALLTHROUGH;
+		case extent_state_muzzy:
+			extent_dalloc_wrapper(tsdn, shard, ehooks, edata);
+			nunmapped += npages;
+			break;
+		case extent_state_retained:
+		default:
+			not_reached();
+		}
+	}
+
+	if (config_stats) {
+		LOCKEDINT_MTX_LOCK(tsdn, *shard->stats_mtx);
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
+		    &decay_stats->npurge, 1);
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
+		    &decay_stats->nmadvise, nmadvise);
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
+		    &decay_stats->purged, npurged);
+		locked_dec_zu(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
+		    &shard->stats->mapped, nunmapped << LG_PAGE);
+		LOCKEDINT_MTX_UNLOCK(tsdn, *shard->stats_mtx);
+	}
+
+	return npurged;
+}

From 103f5feda598ec5bd857db8d2f072724ef82ef46 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Mar 2020 13:12:12 -0700
Subject: [PATCH 1654/2608] Move bg thread activity check out of purging core.

---
 src/arena.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index a378ba0c..b92bb802 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -641,8 +641,6 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 		    &decay_extents);
 		assert(npurged == npurge);
 	}
-	arena_background_thread_inactivity_check(tsdn, arena,
-	    is_background_thread);
 
 	malloc_mutex_lock(tsdn, &decay->mtx);
 	decay->purging = false;
@@ -653,10 +651,25 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
     bool is_background_thread, bool all) {
 	if (all) {
+		assert(!is_background_thread);
 		malloc_mutex_lock(tsdn, &decay->mtx);
 		arena_decay_to_limit(tsdn, arena, decay, decay_stats, ecache,
 		    all, 0, ecache_npages_get(ecache), is_background_thread);
 		malloc_mutex_unlock(tsdn, &decay->mtx);
+		/*
+		 * The previous pa_decay_to_limit call may not have actually
+		 * decayed all pages, if new pages were added concurrently with
+		 * the purge.
+		 *
+		 * I don't think we need an activity check for that case (some
+		 * other thread must be deallocating, and they should do one),
+		 * but we do one anyways.  This line comes out of a refactoring
+		 * diff in which the check was pulled out of the callee, and so
+		 * an extra redundant check minimizes the change.  We should
+		 * reevaluate.
+		 */
+		arena_background_thread_inactivity_check(tsdn, arena,
+		    /* is_background_thread */ false);
 
 		return false;
 	}

From f012c43be0c5a43267e145b05e69b974b60f5917 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Mar 2020 13:29:44 -0700
Subject: [PATCH 1655/2608] PA: Move in decay_to_limit

---
 include/jemalloc/internal/pa.h |  3 ++
 src/arena.c                    | 56 ++++++----------------------------
 src/pa.c                       | 35 +++++++++++++++++++++
 3 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 0c2294e0..ecaadbe8 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -146,5 +146,8 @@ size_t pa_stash_decayed(tsdn_t *tsdn, pa_shard_t *shard, ecache_t *ecache,
 size_t pa_decay_stashed(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
     edata_list_t *decay_extents);
+void pa_decay_to_limit(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
+    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
+    size_t npages_limit, size_t npages_decay_max);
 
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/src/arena.c b/src/arena.c
index b92bb802..dddb0830 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -56,9 +56,6 @@ static unsigned huge_arena_ind;
  * definition.
  */
 
-static void arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
-    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool all,
-    size_t npages_limit, size_t npages_decay_max, bool is_background_thread);
 static bool arena_decay_dirty(tsdn_t *tsdn, arena_t *arena,
     bool is_background_thread, bool all);
 static void arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, edata_t *slab,
@@ -514,9 +511,9 @@ arena_decay_try_purge(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
     size_t current_npages, size_t npages_limit, bool is_background_thread) {
 	if (current_npages > npages_limit) {
-		arena_decay_to_limit(tsdn, arena, decay, decay_stats, ecache,
-		    false, npages_limit, current_npages - npages_limit,
-		    is_background_thread);
+		pa_decay_to_limit(tsdn, &arena->pa_shard, decay, decay_stats,
+		    ecache, /* fully_decay */ false, npages_limit,
+		    current_npages - npages_limit);
 	}
 }
 
@@ -530,9 +527,9 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	ssize_t decay_ms = decay_ms_read(decay);
 	if (decay_ms <= 0) {
 		if (decay_ms == 0) {
-			arena_decay_to_limit(tsdn, arena, decay, decay_stats,
-			    ecache, false, 0, ecache_npages_get(ecache),
-			    is_background_thread);
+			pa_decay_to_limit(tsdn, &arena->pa_shard, decay,
+			    decay_stats, ecache, /* fully_decay */ false, 0,
+			    ecache_npages_get(ecache));
 		}
 		return false;
 	}
@@ -610,42 +607,6 @@ arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
 	    decay_ms);
 }
 
-/*
- * npages_limit: Decay at most npages_decay_max pages without violating the
- * invariant: (ecache_npages_get(ecache) >= npages_limit).  We need an upper
- * bound on number of pages in order to prevent unbounded growth (namely in
- * stashed), otherwise unbounded new pages could be added to extents during the
- * current decay run, so that the purging thread never finishes.
- */
-static void
-arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
-    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool all,
-    size_t npages_limit, size_t npages_decay_max, bool is_background_thread) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 1);
-	malloc_mutex_assert_owner(tsdn, &decay->mtx);
-
-	if (decay->purging || npages_decay_max == 0) {
-		return;
-	}
-	decay->purging = true;
-	malloc_mutex_unlock(tsdn, &decay->mtx);
-
-	edata_list_t decay_extents;
-	edata_list_init(&decay_extents);
-	size_t npurge = pa_stash_decayed(tsdn, &arena->pa_shard, ecache,
-	    npages_limit, npages_decay_max, &decay_extents);
-	if (npurge != 0) {
-		size_t npurged = pa_decay_stashed(tsdn, &arena->pa_shard,
-		    decay, decay_stats, ecache, /* fully_decay */all,
-		    &decay_extents);
-		assert(npurged == npurge);
-	}
-
-	malloc_mutex_lock(tsdn, &decay->mtx);
-	decay->purging = false;
-}
-
 static bool
 arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
@@ -653,8 +614,9 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	if (all) {
 		assert(!is_background_thread);
 		malloc_mutex_lock(tsdn, &decay->mtx);
-		arena_decay_to_limit(tsdn, arena, decay, decay_stats, ecache,
-		    all, 0, ecache_npages_get(ecache), is_background_thread);
+		pa_decay_to_limit(tsdn, &arena->pa_shard, decay, decay_stats,
+		    ecache, /* fully_decay */ all, 0,
+		    ecache_npages_get(ecache));
 		malloc_mutex_unlock(tsdn, &decay->mtx);
 		/*
 		 * The previous pa_decay_to_limit call may not have actually
diff --git a/src/pa.c b/src/pa.c
index 34177eb1..eda1a0b6 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -252,3 +252,38 @@ pa_decay_stashed(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 
 	return npurged;
 }
+
+/*
+ * npages_limit: Decay at most npages_decay_max pages without violating the
+ * invariant: (ecache_npages_get(ecache) >= npages_limit).  We need an upper
+ * bound on number of pages in order to prevent unbounded growth (namely in
+ * stashed), otherwise unbounded new pages could be added to extents during the
+ * current decay run, so that the purging thread never finishes.
+ */
+void
+pa_decay_to_limit(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
+    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
+    size_t npages_limit, size_t npages_decay_max) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 1);
+	malloc_mutex_assert_owner(tsdn, &decay->mtx);
+
+	if (decay->purging || npages_decay_max == 0) {
+		return;
+	}
+	decay->purging = true;
+	malloc_mutex_unlock(tsdn, &decay->mtx);
+
+	edata_list_t decay_extents;
+	edata_list_init(&decay_extents);
+	size_t npurge = pa_stash_decayed(tsdn, shard, ecache, npages_limit,
+	    npages_decay_max, &decay_extents);
+	if (npurge != 0) {
+		size_t npurged = pa_decay_stashed(tsdn, shard, decay,
+		    decay_stats, ecache, fully_decay, &decay_extents);
+		assert(npurged == npurge);
+	}
+
+	malloc_mutex_lock(tsdn, &decay->mtx);
+	decay->purging = false;
+}

From 65698b7f2e3613be8e848053213a850dd5a2cf92 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Mar 2020 14:13:01 -0700
Subject: [PATCH 1656/2608] PA: Remove public visibility of some internals.

---
 include/jemalloc/internal/pa.h | 5 -----
 src/pa.c                       | 4 ++--
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index ecaadbe8..ff5924c1 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -141,11 +141,6 @@ bool pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 void pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
     bool *generated_dirty);
 
-size_t pa_stash_decayed(tsdn_t *tsdn, pa_shard_t *shard, ecache_t *ecache,
-    size_t npages_limit, size_t npages_decay_max, edata_list_t *result);
-size_t pa_decay_stashed(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
-    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
-    edata_list_t *decay_extents);
 void pa_decay_to_limit(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
     size_t npages_limit, size_t npages_decay_max);
diff --git a/src/pa.c b/src/pa.c
index eda1a0b6..7a84cb03 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -168,7 +168,7 @@ pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
 	*generated_dirty = true;
 }
 
-size_t
+static size_t
 pa_stash_decayed(tsdn_t *tsdn, pa_shard_t *shard, ecache_t *ecache,
     size_t npages_limit, size_t npages_decay_max, edata_list_t *result) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -189,7 +189,7 @@ pa_stash_decayed(tsdn_t *tsdn, pa_shard_t *shard, ecache_t *ecache,
 	return nstashed;
 }
 
-size_t
+static size_t
 pa_decay_stashed(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
     edata_list_t *decay_extents) {

From 2d6eec7b5cc2a537e5ff702778c0c15832b5f961 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Mar 2020 14:56:05 -0700
Subject: [PATCH 1657/2608] PA: Move in decay-all pathway.

---
 include/jemalloc/internal/pa.h |  2 ++
 src/arena.c                    | 10 +++-------
 src/pa.c                       |  9 +++++++++
 3 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index ff5924c1..db04aa0e 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -144,5 +144,7 @@ void pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
 void pa_decay_to_limit(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
     size_t npages_limit, size_t npages_decay_max);
+void pa_decay_all(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
+    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay);
 
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/src/arena.c b/src/arena.c
index dddb0830..7c65c5c5 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -612,12 +612,8 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
     bool is_background_thread, bool all) {
 	if (all) {
-		assert(!is_background_thread);
-		malloc_mutex_lock(tsdn, &decay->mtx);
-		pa_decay_to_limit(tsdn, &arena->pa_shard, decay, decay_stats,
-		    ecache, /* fully_decay */ all, 0,
-		    ecache_npages_get(ecache));
-		malloc_mutex_unlock(tsdn, &decay->mtx);
+		pa_decay_all(tsdn, &arena->pa_shard, decay, decay_stats, ecache,
+		    /* fully_decay */ all);
 		/*
 		 * The previous pa_decay_to_limit call may not have actually
 		 * decayed all pages, if new pages were added concurrently with
@@ -630,9 +626,9 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 		 * an extra redundant check minimizes the change.  We should
 		 * reevaluate.
 		 */
+		assert(!is_background_thread);
 		arena_background_thread_inactivity_check(tsdn, arena,
 		    /* is_background_thread */ false);
-
 		return false;
 	}
 
diff --git a/src/pa.c b/src/pa.c
index 7a84cb03..711b8243 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -287,3 +287,12 @@ pa_decay_to_limit(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 	malloc_mutex_lock(tsdn, &decay->mtx);
 	decay->purging = false;
 }
+
+void
+pa_decay_all(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
+    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay) {
+		malloc_mutex_lock(tsdn, &decay->mtx);
+		pa_decay_to_limit(tsdn, shard, decay, decay_stats, ecache,
+		    fully_decay, 0, ecache_npages_get(ecache));
+		malloc_mutex_unlock(tsdn, &decay->mtx);
+}

From 46a9d7fc0b0e5124cc8a1ca0e3caec85968a6842 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Mar 2020 15:42:29 -0700
Subject: [PATCH 1658/2608] PA: Move in rest of purging.

---
 include/jemalloc/internal/pa.h | 14 ++++++
 src/arena.c                    | 79 +++++++++++-----------------------
 src/pa.c                       | 51 ++++++++++++++++++++++
 3 files changed, 90 insertions(+), 54 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index db04aa0e..d99b9b73 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -7,6 +7,13 @@
 #include "jemalloc/internal/edata_cache.h"
 #include "jemalloc/internal/lockedint.h"
 
+enum pa_decay_purge_setting_e {
+	PA_DECAY_PURGE_ALWAYS,
+	PA_DECAY_PURGE_NEVER,
+	PA_DECAY_PURGE_ON_EPOCH_ADVANCE
+};
+typedef enum pa_decay_purge_setting_e pa_decay_purge_setting_t;
+
 /*
  * The page allocator; responsible for acquiring pages of memory for
  * allocations.
@@ -147,4 +154,11 @@ void pa_decay_to_limit(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 void pa_decay_all(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay);
 
+void pa_decay_all(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
+    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay);
+/* Returns true if the epoch advanced. */
+bool pa_maybe_decay_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
+    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
+    pa_decay_purge_setting_t decay_purge_setting);
+
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/src/arena.c b/src/arena.c
index 7c65c5c5..d1e61365 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -506,56 +506,6 @@ arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 	arena_nactive_add(arena, udiff >> LG_PAGE);
 }
 
-static void
-arena_decay_try_purge(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
-    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
-    size_t current_npages, size_t npages_limit, bool is_background_thread) {
-	if (current_npages > npages_limit) {
-		pa_decay_to_limit(tsdn, &arena->pa_shard, decay, decay_stats,
-		    ecache, /* fully_decay */ false, npages_limit,
-		    current_npages - npages_limit);
-	}
-}
-
-static bool
-arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
-    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
-    bool is_background_thread) {
-	malloc_mutex_assert_owner(tsdn, &decay->mtx);
-
-	/* Purge all or nothing if the option is disabled. */
-	ssize_t decay_ms = decay_ms_read(decay);
-	if (decay_ms <= 0) {
-		if (decay_ms == 0) {
-			pa_decay_to_limit(tsdn, &arena->pa_shard, decay,
-			    decay_stats, ecache, /* fully_decay */ false, 0,
-			    ecache_npages_get(ecache));
-		}
-		return false;
-	}
-
-	/*
-	 * If the deadline has been reached, advance to the current epoch and
-	 * purge to the new limit if necessary.  Note that dirty pages created
-	 * during the current epoch are not subject to purge until a future
-	 * epoch, so as a result purging only happens during epoch advances, or
-	 * being triggered by background threads (scheduled event).
-	 */
-	nstime_t time;
-	nstime_init_update(&time);
-	size_t npages_current = ecache_npages_get(ecache);
-	bool epoch_advanced = decay_maybe_advance_epoch(decay, &time,
-	    npages_current);
-	if (is_background_thread ||
-	    (epoch_advanced && !background_thread_enabled())) {
-		size_t npages_limit = decay_npages_limit_get(decay);
-		arena_decay_try_purge(tsdn, arena, decay, decay_stats, ecache,
-		    npages_current, npages_limit, is_background_thread);
-	}
-
-	return epoch_advanced;
-}
-
 ssize_t
 arena_dirty_decay_ms_get(arena_t *arena) {
 	return pa_shard_dirty_decay_ms_get(&arena->pa_shard);
@@ -566,6 +516,22 @@ arena_muzzy_decay_ms_get(arena_t *arena) {
 	return pa_shard_muzzy_decay_ms_get(&arena->pa_shard);
 }
 
+/*
+ * In situations where we're not forcing a decay (i.e. because the user
+ * specifically requested it), should we purge ourselves, or wait for the
+ * background thread to get to it.
+ */
+static pa_decay_purge_setting_t
+arena_decide_unforced_decay_purge_setting(bool is_background_thread) {
+	if (is_background_thread) {
+		return PA_DECAY_PURGE_ALWAYS;
+	} else if (!is_background_thread && background_thread_enabled()) {
+		return PA_DECAY_PURGE_NEVER;
+	} else {
+		return PA_DECAY_PURGE_ON_EPOCH_ADVANCE;
+	}
+}
+
 static bool
 arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, ssize_t decay_ms) {
@@ -585,7 +551,11 @@ arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	nstime_t cur_time;
 	nstime_init_update(&cur_time);
 	decay_reinit(decay, &cur_time, decay_ms);
-	arena_maybe_decay(tsdn, arena, decay, decay_stats, ecache, false);
+	pa_decay_purge_setting_t decay_purge =
+	    arena_decide_unforced_decay_purge_setting(
+		/* is_background_thread */ false);
+	pa_maybe_decay_purge(tsdn, &arena->pa_shard, decay, decay_stats, ecache,
+	    decay_purge);
 	malloc_mutex_unlock(tsdn, &decay->mtx);
 
 	return false;
@@ -636,9 +606,10 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 		/* No need to wait if another thread is in progress. */
 		return true;
 	}
-
-	bool epoch_advanced = arena_maybe_decay(tsdn, arena, decay, decay_stats,
-	    ecache, is_background_thread);
+	pa_decay_purge_setting_t decay_purge =
+	    arena_decide_unforced_decay_purge_setting(is_background_thread);
+	bool epoch_advanced = pa_maybe_decay_purge(tsdn, &arena->pa_shard,
+	    decay, decay_stats, ecache, decay_purge);
 	size_t npages_new;
 	if (epoch_advanced) {
 		/* Backlog is updated on epoch advance. */
diff --git a/src/pa.c b/src/pa.c
index 711b8243..06c205c4 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -296,3 +296,54 @@ pa_decay_all(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 		    fully_decay, 0, ecache_npages_get(ecache));
 		malloc_mutex_unlock(tsdn, &decay->mtx);
 }
+
+static void
+pa_decay_try_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
+    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
+    size_t current_npages, size_t npages_limit) {
+	if (current_npages > npages_limit) {
+		pa_decay_to_limit(tsdn, shard, decay, decay_stats, ecache,
+		    /* fully_decay */ false, npages_limit,
+		    current_npages - npages_limit);
+	}
+}
+
+bool
+pa_maybe_decay_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
+    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
+    pa_decay_purge_setting_t decay_purge_setting) {
+	malloc_mutex_assert_owner(tsdn, &decay->mtx);
+
+	/* Purge all or nothing if the option is disabled. */
+	ssize_t decay_ms = decay_ms_read(decay);
+	if (decay_ms <= 0) {
+		if (decay_ms == 0) {
+			pa_decay_to_limit(tsdn, shard, decay, decay_stats,
+			    ecache, /* fully_decay */ false,
+			    /* npages_limit */ 0, ecache_npages_get(ecache));
+		}
+		return false;
+	}
+
+	/*
+	 * If the deadline has been reached, advance to the current epoch and
+	 * purge to the new limit if necessary.  Note that dirty pages created
+	 * during the current epoch are not subject to purge until a future
+	 * epoch, so as a result purging only happens during epoch advances, or
+	 * being triggered by background threads (scheduled event).
+	 */
+	nstime_t time;
+	nstime_init_update(&time);
+	size_t npages_current = ecache_npages_get(ecache);
+	bool epoch_advanced = decay_maybe_advance_epoch(decay, &time,
+	    npages_current);
+	if (decay_purge_setting == PA_DECAY_PURGE_ALWAYS
+	    || (epoch_advanced && decay_purge_setting
+	    == PA_DECAY_PURGE_ON_EPOCH_ADVANCE)) {
+		size_t npages_limit = decay_npages_limit_get(decay);
+		pa_decay_try_purge(tsdn, shard, decay, decay_stats, ecache,
+		    npages_current, npages_limit);
+	}
+
+	return epoch_advanced;
+}

From c075fd0bcb4a4de13204d26ff400bd315811e435 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Mar 2020 16:13:36 -0700
Subject: [PATCH 1659/2608] PA: Minor cleanups and comment fixes.

---
 include/jemalloc/internal/pa.h | 35 ++++++++++++++++++++++++++++------
 src/arena.c                    |  8 +++++---
 src/pa.c                       | 10 ++++------
 3 files changed, 38 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index d99b9b73..9636ced9 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -56,6 +56,21 @@ struct pa_shard_stats_s {
 	atomic_zu_t abandoned_vm;
 };
 
+/*
+ * The local allocator handle.  Keeps the state necessary to satisfy page-sized
+ * allocations.
+ *
+ * The contents are mostly internal to the PA module.  The key exception is that
+ * arena decay code is allowed to grab pointers to the dirty and muzzy ecaches
+ * decay_ts, for a couple of queries, passing them back to a PA function, or
+ * acquiring decay.mtx and looking at decay.purging.  The reasoning is that,
+ * while PA decides what and how to purge, the arena code decides when and where
+ * (e.g. on what thread).  It's allowed to use the presence of another purger to
+ * decide.
+ * (The background thread code also touches some other decay internals, but
+ * that's not fundamental; its' just an artifact of a partial refactoring, and
+ * its accesses could be straightforwardly moved inside the decay module).
+ */
 typedef struct pa_shard_s pa_shard_t;
 struct pa_shard_s {
 	/*
@@ -148,15 +163,23 @@ bool pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 void pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
     bool *generated_dirty);
 
-void pa_decay_to_limit(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
-    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
-    size_t npages_limit, size_t npages_decay_max);
-void pa_decay_all(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
-    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay);
+/*
+ * All purging functions require holding decay->mtx.  This is one of the few
+ * places external modules are allowed to peek inside pa_shard_t internals.
+ */
 
+/*
+ * Decays the number of pages currently in the ecache.  This might not leave the
+ * ecache empty if other threads are inserting dirty objects into it
+ * concurrently with the call.
+ */
 void pa_decay_all(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay);
-/* Returns true if the epoch advanced. */
+/*
+ * Updates decay settings for the current time, and conditionally purges in
+ * response (depending on decay_purge_setting).  Returns whether or not the
+ * epoch advanced.
+ */
 bool pa_maybe_decay_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
     pa_decay_purge_setting_t decay_purge_setting);
diff --git a/src/arena.c b/src/arena.c
index d1e61365..25fad273 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -582,12 +582,14 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
     bool is_background_thread, bool all) {
 	if (all) {
+		malloc_mutex_lock(tsdn, &decay->mtx);
 		pa_decay_all(tsdn, &arena->pa_shard, decay, decay_stats, ecache,
 		    /* fully_decay */ all);
+		malloc_mutex_unlock(tsdn, &decay->mtx);
 		/*
-		 * The previous pa_decay_to_limit call may not have actually
-		 * decayed all pages, if new pages were added concurrently with
-		 * the purge.
+		 * The previous pa_decay_all call may not have actually decayed
+		 * all pages, if new pages were added concurrently with the
+		 * purge.
 		 *
 		 * I don't think we need an activity check for that case (some
 		 * other thread must be deallocating, and they should do one),
diff --git a/src/pa.c b/src/pa.c
index 06c205c4..d9eeb694 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -260,13 +260,12 @@ pa_decay_stashed(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
  * stashed), otherwise unbounded new pages could be added to extents during the
  * current decay run, so that the purging thread never finishes.
  */
-void
+static void
 pa_decay_to_limit(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
     size_t npages_limit, size_t npages_decay_max) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 1);
-	malloc_mutex_assert_owner(tsdn, &decay->mtx);
 
 	if (decay->purging || npages_decay_max == 0) {
 		return;
@@ -291,10 +290,9 @@ pa_decay_to_limit(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 void
 pa_decay_all(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay) {
-		malloc_mutex_lock(tsdn, &decay->mtx);
-		pa_decay_to_limit(tsdn, shard, decay, decay_stats, ecache,
-		    fully_decay, 0, ecache_npages_get(ecache));
-		malloc_mutex_unlock(tsdn, &decay->mtx);
+	malloc_mutex_assert_owner(tsdn, &decay->mtx);
+	pa_decay_to_limit(tsdn, shard, decay, decay_stats, ecache, fully_decay,
+	    /* npages_limit */ 0, ecache_npages_get(ecache));
 }
 
 static void

From 527dd4cdb8d1ec440fefe894ada4ccbc1c3e437d Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Mar 2020 17:40:17 -0700
Subject: [PATCH 1660/2608] PA: Move in nactive counter.

---
 include/jemalloc/internal/arena_structs.h |  7 ------
 include/jemalloc/internal/pa.h            |  7 ++++++
 src/arena.c                               | 29 +++--------------------
 src/ctl.c                                 |  2 +-
 src/pa.c                                  | 29 +++++++++++++++++++++--
 5 files changed, 38 insertions(+), 36 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index 49568fc6..682450e3 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -67,13 +67,6 @@ struct arena_s {
 	 */
 	atomic_u_t		dss_prec;
 
-	/*
-	 * Number of pages in active extents.
-	 *
-	 * Synchronization: atomic.
-	 */
-	atomic_zu_t		nactive;
-
 	/*
 	 * Extant large allocations.
 	 *
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 9636ced9..f0b7faa1 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -73,6 +73,13 @@ struct pa_shard_stats_s {
  */
 typedef struct pa_shard_s pa_shard_t;
 struct pa_shard_s {
+	/*
+	 * Number of pages in active extents.
+	 *
+	 * Synchronization: atomic.
+	 */
+	atomic_zu_t nactive;
+
 	/*
 	 * Collections of extents that were previously allocated.  These are
 	 * used when allocating extents, in an attempt to re-use address space.
diff --git a/src/arena.c b/src/arena.c
index 25fad273..f2886541 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -71,7 +71,7 @@ arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	*dss = dss_prec_names[arena_dss_prec_get(arena)];
 	*dirty_decay_ms = arena_dirty_decay_ms_get(arena);
 	*muzzy_decay_ms = arena_muzzy_decay_ms_get(arena);
-	*nactive += atomic_load_zu(&arena->nactive, ATOMIC_RELAXED);
+	*nactive += atomic_load_zu(&arena->pa_shard.nactive, ATOMIC_RELAXED);
 	*ndirty += ecache_npages_get(&arena->pa_shard.ecache_dirty);
 	*nmuzzy += ecache_npages_get(&arena->pa_shard.ecache_muzzy);
 }
@@ -136,7 +136,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	atomic_load_add_store_zu(&astats->internal, arena_internal_get(arena));
 	atomic_load_add_store_zu(&astats->metadata_thp, metadata_thp);
 	atomic_load_add_store_zu(&astats->resident, base_resident +
-	    (((atomic_load_zu(&arena->nactive, ATOMIC_RELAXED) +
+	    (((atomic_load_zu(&arena->pa_shard.nactive, ATOMIC_RELAXED) +
 	    ecache_npages_get(&arena->pa_shard.ecache_dirty) +
 	    ecache_npages_get(&arena->pa_shard.ecache_muzzy)) << LG_PAGE)));
 	atomic_load_add_store_zu(&astats->pa_shard_stats.abandoned_vm,
@@ -386,17 +386,6 @@ arena_slab_reg_dalloc(edata_t *slab, slab_data_t *slab_data, void *ptr) {
 	edata_nfree_inc(slab);
 }
 
-static void
-arena_nactive_add(arena_t *arena, size_t add_pages) {
-	atomic_fetch_add_zu(&arena->nactive, add_pages, ATOMIC_RELAXED);
-}
-
-static void
-arena_nactive_sub(arena_t *arena, size_t sub_pages) {
-	assert(atomic_load_zu(&arena->nactive, ATOMIC_RELAXED) >= sub_pages);
-	atomic_fetch_sub_zu(&arena->nactive, sub_pages, ATOMIC_RELAXED);
-}
-
 static void
 arena_large_malloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 	szind_t index, hindex;
@@ -457,7 +446,6 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 			}
 			LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 		}
-		arena_nactive_add(arena, esize >> LG_PAGE);
 	}
 
 	if (edata != NULL && sz_large_pad != 0) {
@@ -475,35 +463,30 @@ arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
 		    edata_usize_get(edata));
 		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
-	arena_nactive_sub(arena, edata_size_get(edata) >> LG_PAGE);
 }
 
 void
 arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
     size_t oldusize) {
 	size_t usize = edata_usize_get(edata);
-	size_t udiff = oldusize - usize;
 
 	if (config_stats) {
 		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 		arena_large_ralloc_stats_update(tsdn, arena, oldusize, usize);
 		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
-	arena_nactive_sub(arena, udiff >> LG_PAGE);
 }
 
 void
 arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
     size_t oldusize) {
 	size_t usize = edata_usize_get(edata);
-	size_t udiff = usize - oldusize;
 
 	if (config_stats) {
 		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 		arena_large_ralloc_stats_update(tsdn, arena, oldusize, usize);
 		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
-	arena_nactive_add(arena, udiff >> LG_PAGE);
 }
 
 ssize_t
@@ -658,8 +641,6 @@ arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) {
 
 void
 arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab) {
-	arena_nactive_sub(arena, edata_size_get(slab) >> LG_PAGE);
-
 	bool generated_dirty;
 	pa_dalloc(tsdn, &arena->pa_shard, slab, &generated_dirty);
 	if (generated_dirty) {
@@ -801,7 +782,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 		}
 	}
 
-	atomic_store_zu(&arena->nactive, 0, ATOMIC_RELAXED);
+	atomic_store_zu(&arena->pa_shard.nactive, 0, ATOMIC_RELAXED);
 }
 
 static void
@@ -885,8 +866,6 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	edata_nfree_binshard_set(slab, bin_info->nregs, binshard);
 	bitmap_init(slab_data->bitmap, &bin_info->bitmap_info, false);
 
-	arena_nactive_add(arena, edata_size_get(slab) >> LG_PAGE);
-
 	return slab;
 }
 
@@ -1637,8 +1616,6 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	atomic_store_u(&arena->dss_prec, (unsigned)extent_dss_prec_get(),
 	    ATOMIC_RELAXED);
 
-	atomic_store_zu(&arena->nactive, 0, ATOMIC_RELAXED);
-
 	edata_list_init(&arena->large);
 	if (malloc_mutex_init(&arena->large_mtx, "arena_large",
 	    WITNESS_RANK_ARENA_LARGE, malloc_mutex_rank_exclusive)) {
diff --git a/src/ctl.c b/src/ctl.c
index 9233c846..4350347a 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3516,7 +3516,7 @@ experimental_arenas_i_pactivep_ctl(tsd_t *tsd, const size_t *mib,
 #if defined(JEMALLOC_GCC_ATOMIC_ATOMICS) ||				\
     defined(JEMALLOC_GCC_SYNC_ATOMICS) || defined(_MSC_VER)
 		/* Expose the underlying counter for fast read. */
-		pactivep = (size_t *)&(arena->nactive.repr);
+		pactivep = (size_t *)&(arena->pa_shard.nactive.repr);
 		READ(pactivep, size_t *);
 		ret = 0;
 #else
diff --git a/src/pa.c b/src/pa.c
index d9eeb694..d678d823 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -1,6 +1,17 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+static void
+pa_nactive_add(pa_shard_t *shard, size_t add_pages) {
+	atomic_fetch_add_zu(&shard->nactive, add_pages, ATOMIC_RELAXED);
+}
+
+static void
+pa_nactive_sub(pa_shard_t *shard, size_t sub_pages) {
+	assert(atomic_load_zu(&shard->nactive, ATOMIC_RELAXED) >= sub_pages);
+	atomic_fetch_sub_zu(&shard->nactive, sub_pages, ATOMIC_RELAXED);
+}
+
 bool
 pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
     pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx) {
@@ -43,6 +54,7 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
 	}
 
 	atomic_store_zu(&shard->extent_sn_next, 0, ATOMIC_RELAXED);
+	atomic_store_zu(&shard->nactive, 0, ATOMIC_RELAXED);
 
 	shard->stats_mtx = stats_mtx;
 	shard->stats = stats;
@@ -83,7 +95,7 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 		edata = ecache_alloc_grow(tsdn, shard, ehooks,
 		    &shard->ecache_retained, NULL, size, alignment, slab,
 		    szind, zero);
-		if (config_stats) {
+		if (config_stats && edata != NULL) {
 			/*
 			 * edata may be NULL on OOM, but in that case mapped_add
 			 * isn't used below, so there's no need to conditionlly
@@ -92,6 +104,9 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 			*mapped_add = size;
 		}
 	}
+	if (edata != NULL) {
+		pa_nactive_add(shard, size >> LG_PAGE);
+	}
 	return edata;
 }
 
@@ -100,6 +115,7 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
     size_t new_size, szind_t szind, bool slab, bool *zero, size_t *mapped_add) {
 	assert(new_size > old_size);
 	assert(edata_size_get(edata) == old_size);
+	assert((new_size & PAGE_MASK) == 0);
 
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
 	void *trail_begin = edata_past_get(edata);
@@ -133,6 +149,7 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 		*mapped_add = 0;
 		return true;
 	}
+	pa_nactive_add(shard, expand_amount >> LG_PAGE);
 	emap_remap(tsdn, &emap_global, edata, szind, slab);
 	return false;
 }
@@ -141,6 +158,9 @@ bool
 pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
     size_t new_size, szind_t szind, bool slab, bool *generated_dirty) {
 	assert(new_size < old_size);
+	assert(edata_size_get(edata) == old_size);
+	assert((new_size & PAGE_MASK) == 0);
+	size_t shrink_amount = old_size - new_size;
 
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
 	*generated_dirty = false;
@@ -150,11 +170,13 @@ pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	}
 
 	edata_t *trail = extent_split_wrapper(tsdn, &shard->edata_cache, ehooks,
-	    edata, new_size, szind, slab, old_size - new_size, SC_NSIZES,
+	    edata, new_size, szind, slab, shrink_amount, SC_NSIZES,
 	    false);
 	if (trail == NULL) {
 		return true;
 	}
+	pa_nactive_sub(shard, shrink_amount >> LG_PAGE);
+
 	ecache_dalloc(tsdn, shard, ehooks, &shard->ecache_dirty, trail);
 	*generated_dirty = true;
 	return false;
@@ -163,6 +185,7 @@ pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 void
 pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
     bool *generated_dirty) {
+	pa_nactive_sub(shard, edata_size_get(edata) >> LG_PAGE);
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
 	ecache_dalloc(tsdn, shard, ehooks, &shard->ecache_dirty, edata);
 	*generated_dirty = true;
@@ -345,3 +368,5 @@ pa_maybe_decay_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 
 	return epoch_advanced;
 }
+
+

From f6bfa3dccaa9bb6bfe97aecc32709680b1d47652 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Mar 2020 17:59:06 -0700
Subject: [PATCH 1661/2608] Move extent stats to the PA module.

While we're at it, make them non-atomic -- they are purely derived statistics
(and in fact aren't even in the arena_t or pa_shard_t).
---
 include/jemalloc/internal/arena_externs.h |  2 +-
 include/jemalloc/internal/arena_stats.h   | 16 --------
 include/jemalloc/internal/ctl.h           |  2 +-
 include/jemalloc/internal/pa.h            | 17 ++++++++
 src/arena.c                               | 17 ++++----
 src/ctl.c                                 | 48 ++++++++---------------
 6 files changed, 43 insertions(+), 59 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index cdbfa4b4..24634958 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -26,7 +26,7 @@ void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
     bin_stats_data_t *bstats, arena_stats_large_t *lstats,
-    arena_stats_extents_t *estats);
+    pa_extent_stats_t *estats);
 void arena_handle_new_dirty_pages(tsdn_t *tsdn, arena_t *arena);
 #ifdef JEMALLOC_JET
 size_t arena_slab_regind(edata_t *slab, szind_t binind, const void *ptr);
diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 310b907b..9dc9e5f2 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -37,22 +37,6 @@ struct arena_stats_large_s {
 	size_t		curlextents; /* Derived. */
 };
 
-typedef struct arena_stats_extents_s arena_stats_extents_t;
-struct arena_stats_extents_s {
-	/*
-	 * Stats for a given index in the range [0, SC_NPSIZES] in an extents_t.
-	 * We track both bytes and # of extents: two extents in the same bucket
-	 * may have different sizes if adjacent size classes differ by more than
-	 * a page, so bytes cannot always be derived from # of extents.
-	 */
-	atomic_zu_t ndirty;
-	atomic_zu_t dirty_bytes;
-	atomic_zu_t nmuzzy;
-	atomic_zu_t muzzy_bytes;
-	atomic_zu_t nretained;
-	atomic_zu_t retained_bytes;
-};
-
 /*
  * Arena stats.  Note that fields marked "derived" are not directly maintained
  * within the arena code; rather their values are derived during stats merge
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index 55a8ff48..e0b46fa3 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -44,7 +44,7 @@ typedef struct ctl_arena_stats_s {
 
 	bin_stats_data_t bstats[SC_NBINS];
 	arena_stats_large_t lstats[SC_NSIZES - SC_NBINS];
-	arena_stats_extents_t estats[SC_NPSIZES];
+	pa_extent_stats_t estats[SC_NPSIZES];
 } ctl_arena_stats_t;
 
 typedef struct ctl_stats_s {
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index f0b7faa1..acfad89f 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -29,6 +29,23 @@ struct pa_shard_decay_stats_s {
 	locked_u64_t purged;
 };
 
+typedef struct pa_extent_stats_s pa_extent_stats_t;
+struct pa_extent_stats_s {
+	/*
+	 * Stats for a given index in the range [0, SC_NPSIZES] in the various
+	 * ecache_ts.
+	 * We track both bytes and # of extents: two extents in the same bucket
+	 * may have different sizes if adjacent size classes differ by more than
+	 * a page, so bytes cannot always be derived from # of extents.
+	 */
+	size_t ndirty;
+	size_t dirty_bytes;
+	size_t nmuzzy;
+	size_t muzzy_bytes;
+	size_t nretained;
+	size_t retained_bytes;
+};
+
 /*
  * The stats for a particular pa_shard.  Because of the way the ctl module
  * handles stats epoch data collection (it has its own arena_stats, and merges
diff --git a/src/arena.c b/src/arena.c
index f2886541..2deafe68 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -81,7 +81,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
     bin_stats_data_t *bstats, arena_stats_large_t *lstats,
-    arena_stats_extents_t *estats) {
+    pa_extent_stats_t *estats) {
 	cassert(config_stats);
 
 	arena_basic_stats_merge(tsdn, arena, nthreads, dss, dirty_decay_ms,
@@ -200,15 +200,12 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		retained_bytes = ecache_nbytes_get(
 		    &arena->pa_shard.ecache_retained, i);
 
-		atomic_store_zu(&estats[i].ndirty, dirty, ATOMIC_RELAXED);
-		atomic_store_zu(&estats[i].nmuzzy, muzzy, ATOMIC_RELAXED);
-		atomic_store_zu(&estats[i].nretained, retained, ATOMIC_RELAXED);
-		atomic_store_zu(&estats[i].dirty_bytes, dirty_bytes,
-		    ATOMIC_RELAXED);
-		atomic_store_zu(&estats[i].muzzy_bytes, muzzy_bytes,
-		    ATOMIC_RELAXED);
-		atomic_store_zu(&estats[i].retained_bytes, retained_bytes,
-		    ATOMIC_RELAXED);
+		estats[i].ndirty = dirty;
+		estats[i].nmuzzy = muzzy;
+		estats[i].nretained = retained;
+		estats[i].dirty_bytes = dirty_bytes;
+		estats[i].muzzy_bytes = muzzy_bytes;
+		estats[i].retained_bytes = retained_bytes;
 	}
 
 	LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
diff --git a/src/ctl.c b/src/ctl.c
index 4350347a..16798679 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -804,7 +804,7 @@ ctl_arena_clear(ctl_arena_t *ctl_arena) {
 		memset(ctl_arena->astats->lstats, 0, (SC_NSIZES - SC_NBINS) *
 		    sizeof(arena_stats_large_t));
 		memset(ctl_arena->astats->estats, 0, SC_NPSIZES *
-		    sizeof(arena_stats_extents_t));
+		    sizeof(pa_extent_stats_t));
 	}
 }
 
@@ -993,18 +993,16 @@ MUTEX_PROF_ARENA_MUTEXES
 
 		/* Merge extents stats. */
 		for (i = 0; i < SC_NPSIZES; i++) {
-			ctl_accum_atomic_zu(&sdstats->estats[i].ndirty,
-			    &astats->estats[i].ndirty);
-			ctl_accum_atomic_zu(&sdstats->estats[i].nmuzzy,
-			    &astats->estats[i].nmuzzy);
-			ctl_accum_atomic_zu(&sdstats->estats[i].nretained,
-			    &astats->estats[i].nretained);
-			ctl_accum_atomic_zu(&sdstats->estats[i].dirty_bytes,
-			    &astats->estats[i].dirty_bytes);
-			ctl_accum_atomic_zu(&sdstats->estats[i].muzzy_bytes,
-			    &astats->estats[i].muzzy_bytes);
-			ctl_accum_atomic_zu(&sdstats->estats[i].retained_bytes,
-			    &astats->estats[i].retained_bytes);
+			sdstats->estats[i].ndirty += astats->estats[i].ndirty;
+			sdstats->estats[i].nmuzzy += astats->estats[i].nmuzzy;
+			sdstats->estats[i].nretained
+			    += astats->estats[i].nretained;
+			sdstats->estats[i].dirty_bytes
+			    += astats->estats[i].dirty_bytes;
+			sdstats->estats[i].muzzy_bytes
+			    += astats->estats[i].muzzy_bytes;
+			sdstats->estats[i].retained_bytes
+			    += astats->estats[i].retained_bytes;
 		}
 	}
 }
@@ -3150,29 +3148,17 @@ stats_arenas_i_lextents_j_index(tsdn_t *tsdn, const size_t *mib,
 }
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_ndirty,
-    atomic_load_zu(
-        &arenas_i(mib[2])->astats->estats[mib[4]].ndirty,
-	ATOMIC_RELAXED), size_t);
+        arenas_i(mib[2])->astats->estats[mib[4]].ndirty, size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_nmuzzy,
-    atomic_load_zu(
-        &arenas_i(mib[2])->astats->estats[mib[4]].nmuzzy,
-	ATOMIC_RELAXED), size_t);
+        arenas_i(mib[2])->astats->estats[mib[4]].nmuzzy, size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_nretained,
-    atomic_load_zu(
-        &arenas_i(mib[2])->astats->estats[mib[4]].nretained,
-	ATOMIC_RELAXED), size_t);
+        arenas_i(mib[2])->astats->estats[mib[4]].nretained, size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_dirty_bytes,
-    atomic_load_zu(
-        &arenas_i(mib[2])->astats->estats[mib[4]].dirty_bytes,
-	ATOMIC_RELAXED), size_t);
+        arenas_i(mib[2])->astats->estats[mib[4]].dirty_bytes, size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_muzzy_bytes,
-    atomic_load_zu(
-        &arenas_i(mib[2])->astats->estats[mib[4]].muzzy_bytes,
-	ATOMIC_RELAXED), size_t);
+        arenas_i(mib[2])->astats->estats[mib[4]].muzzy_bytes, size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_retained_bytes,
-    atomic_load_zu(
-        &arenas_i(mib[2])->astats->estats[mib[4]].retained_bytes,
-	ATOMIC_RELAXED), size_t);
+        arenas_i(mib[2])->astats->estats[mib[4]].retained_bytes, size_t);
 
 static const ctl_named_node_t *
 stats_arenas_i_extents_j_index(tsdn_t *tsdn, const size_t *mib,

From 3c28aa6f179421b23fd8795cbcaa4696aba99557 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Mar 2020 18:14:53 -0700
Subject: [PATCH 1662/2608] PA: Move edata_avail stat in, make it non-atomic.

---
 include/jemalloc/internal/arena_stats.h | 3 ---
 include/jemalloc/internal/pa.h          | 3 +++
 src/arena.c                             | 5 ++---
 src/ctl.c                               | 8 +++-----
 src/pa.c                                | 2 --
 5 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 9dc9e5f2..496d6e70 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -52,9 +52,6 @@ struct arena_stats_s {
 	 */
 	locked_zu_t		retained; /* Derived. */
 
-	/* Number of edata_t structs allocated by base, but not being used. */
-	atomic_zu_t		edata_avail; /* Derived. */
-
 	atomic_zu_t		base; /* Derived. */
 	atomic_zu_t		internal;
 	atomic_zu_t		resident; /* Derived. */
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index acfad89f..9da061be 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -69,6 +69,9 @@ struct pa_shard_stats_s {
 	 */
 	locked_zu_t mapped;
 
+	/* Number of edata_t structs allocated by base, but not being used. */
+	size_t edata_avail; /* Derived. */
+
 	/* VM space had to be leaked (undocumented).  Normally 0. */
 	atomic_zu_t abandoned_vm;
 };
diff --git a/src/arena.c b/src/arena.c
index 2deafe68..025418d6 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -100,9 +100,8 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	locked_inc_zu_unsynchronized(&astats->retained,
 	    ecache_npages_get(&arena->pa_shard.ecache_retained) << LG_PAGE);
 
-	atomic_store_zu(&astats->edata_avail,
-	    atomic_load_zu(&arena->pa_shard.edata_cache.count, ATOMIC_RELAXED),
-	    ATOMIC_RELAXED);
+	astats->pa_shard_stats.edata_avail =  atomic_load_zu(
+	    &arena->pa_shard.edata_cache.count, ATOMIC_RELAXED);
 
 	/* Dirty decay stats */
 	locked_inc_u64_unsynchronized(
diff --git a/src/ctl.c b/src/ctl.c
index 16798679..e8687b51 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -866,8 +866,8 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 			    &astats->astats.pa_shard_stats.mapped);
 			ctl_accum_locked_zu(&sdstats->astats.retained,
 			    &astats->astats.retained);
-			ctl_accum_atomic_zu(&sdstats->astats.edata_avail,
-			    &astats->astats.edata_avail);
+			sdstats->astats.pa_shard_stats.edata_avail
+			    += astats->astats.pa_shard_stats.edata_avail;
 		}
 
 		ctl_accum_locked_u64(
@@ -2919,9 +2919,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_retained,
     locked_read_atomic_zu(&arenas_i(mib[2])->astats->astats.retained),
     size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_extent_avail,
-    atomic_load_zu(&arenas_i(mib[2])->astats->astats.edata_avail,
-        ATOMIC_RELAXED),
-    size_t)
+    arenas_i(mib[2])->astats->astats.pa_shard_stats.edata_avail, size_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_npurge,
     locked_read_u64_unsynchronized(
diff --git a/src/pa.c b/src/pa.c
index d678d823..d67c97ea 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -368,5 +368,3 @@ pa_maybe_decay_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 
 	return epoch_advanced;
 }
-
-

From 436789ad96fcc4a091790b9d380ee31570efa6cf Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Mar 2020 18:37:15 -0700
Subject: [PATCH 1663/2608] PA: Make mapped stat atomic.

We always have atomic_zu_t, and mapped/unmapped transitions are always expensive
enough that trying to piggyback on a lock is a waste of time.
---
 include/jemalloc/internal/pa.h | 10 +---------
 src/arena.c                    | 19 +++++++++----------
 src/ctl.c                      | 11 ++++++-----
 src/extent.c                   |  5 ++---
 src/large.c                    |  3 ++-
 src/pa.c                       |  4 ++--
 6 files changed, 22 insertions(+), 30 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 9da061be..0cf83cc3 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -67,7 +67,7 @@ struct pa_shard_stats_s {
 	 * Partially derived -- we maintain our own counter, but add in the
 	 * base's own counter at merge.
 	 */
-	locked_zu_t mapped;
+	atomic_zu_t mapped;
 
 	/* Number of edata_t structs allocated by base, but not being used. */
 	size_t edata_avail; /* Derived. */
@@ -135,14 +135,6 @@ struct pa_shard_s {
 	base_t *base;
 };
 
-static inline void
-pa_shard_stats_mapped_add(tsdn_t *tsdn, pa_shard_t *shard, size_t size) {
-	LOCKEDINT_MTX_LOCK(tsdn, *shard->stats_mtx);
-	locked_inc_zu(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-	    &shard->stats->mapped, size);
-	LOCKEDINT_MTX_UNLOCK(tsdn, *shard->stats_mtx);
-}
-
 static inline ssize_t
 pa_shard_dirty_decay_ms_get(pa_shard_t *shard) {
 	return decay_ms_read(&shard->decay_dirty);
diff --git a/src/arena.c b/src/arena.c
index 025418d6..2fe69041 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -90,16 +90,15 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	size_t base_allocated, base_resident, base_mapped, metadata_thp;
 	base_stats_get(tsdn, arena->base, &base_allocated, &base_resident,
 	    &base_mapped, &metadata_thp);
+	size_t mapped = atomic_load_zu(&arena->pa_shard.stats->mapped,
+	    ATOMIC_RELAXED);
+	atomic_load_add_store_zu(&astats->pa_shard_stats.mapped,
+	    base_mapped + mapped);
 
 	LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 
-	locked_inc_zu_unsynchronized(&astats->pa_shard_stats.mapped,
-	    base_mapped + locked_read_zu(tsdn,
-	    LOCKEDINT_MTX(*arena->pa_shard.stats_mtx),
-	    &arena->pa_shard.stats->mapped));
 	locked_inc_zu_unsynchronized(&astats->retained,
 	    ecache_npages_get(&arena->pa_shard.ecache_retained) << LG_PAGE);
-
 	astats->pa_shard_stats.edata_avail =  atomic_load_zu(
 	    &arena->pa_shard.edata_cache.count, ATOMIC_RELAXED);
 
@@ -436,9 +435,9 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 			LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 			arena_large_malloc_stats_update(tsdn, arena, usize);
 			if (mapped_add != 0) {
-				locked_inc_zu(tsdn,
-				    LOCKEDINT_MTX(arena->stats.mtx),
-				    &arena->pa_shard.stats->mapped, mapped_add);
+				atomic_fetch_add_zu(
+				    &arena->pa_shard.stats->mapped, mapped_add,
+				    ATOMIC_RELAXED);
 			}
 			LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 		}
@@ -848,8 +847,8 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	edata_t *slab = pa_alloc(tsdn, &arena->pa_shard, bin_info->slab_size,
 	    PAGE, /* slab */ true, /* szind */ binind, &zero, &mapped_add);
 	if (config_stats && slab != NULL && mapped_add != 0) {
-		pa_shard_stats_mapped_add(tsdn, &arena->pa_shard,
-		    bin_info->slab_size);
+		atomic_fetch_add_zu(&arena->pa_shard.stats->mapped, mapped_add,
+		    ATOMIC_RELAXED);
 	}
 
 	if (slab == NULL) {
diff --git a/src/ctl.c b/src/ctl.c
index e8687b51..00afc769 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -861,7 +861,7 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 		ctl_arena_stats_t *astats = ctl_arena->astats;
 
 		if (!destroyed) {
-			ctl_accum_locked_zu(
+			ctl_accum_atomic_zu(
 			    &sdstats->astats.pa_shard_stats.mapped,
 			    &astats->astats.pa_shard_stats.mapped);
 			ctl_accum_locked_zu(&sdstats->astats.retained,
@@ -1101,8 +1101,9 @@ ctl_refresh(tsdn_t *tsdn) {
 		    &ctl_sarena->astats->astats.metadata_thp, ATOMIC_RELAXED);
 		ctl_stats->resident = atomic_load_zu(
 		    &ctl_sarena->astats->astats.resident, ATOMIC_RELAXED);
-		ctl_stats->mapped = locked_read_atomic_zu(
-		    &ctl_sarena->astats->astats.pa_shard_stats.mapped);
+		ctl_stats->mapped = atomic_load_zu(
+		    &ctl_sarena->astats->astats.pa_shard_stats.mapped,
+		    ATOMIC_RELAXED);
 		ctl_stats->retained = locked_read_atomic_zu(
 		    &ctl_sarena->astats->astats.retained);
 
@@ -2913,8 +2914,8 @@ CTL_RO_GEN(stats_arenas_i_pactive, arenas_i(mib[2])->pactive, size_t)
 CTL_RO_GEN(stats_arenas_i_pdirty, arenas_i(mib[2])->pdirty, size_t)
 CTL_RO_GEN(stats_arenas_i_pmuzzy, arenas_i(mib[2])->pmuzzy, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_mapped,
-    locked_read_atomic_zu(&arenas_i(
-    mib[2])->astats->astats.pa_shard_stats.mapped), size_t)
+    atomic_load_zu(&arenas_i(mib[2])->astats->astats.pa_shard_stats.mapped,
+    ATOMIC_RELAXED), size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_retained,
     locked_read_atomic_zu(&arenas_i(mib[2])->astats->astats.retained),
     size_t)
diff --git a/src/extent.c b/src/extent.c
index 595916a1..62ebff52 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -966,10 +966,9 @@ extent_maximally_purge(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		    LOCKEDINT_MTX(*shard->stats_mtx),
 		    &shard->stats->decay_dirty.purged,
 		    extent_size >> LG_PAGE);
-		locked_dec_zu(tsdn,
-		    LOCKEDINT_MTX(*shard->stats_mtx),
-		    &shard->stats->mapped, extent_size);
 		LOCKEDINT_MTX_UNLOCK(tsdn, *shard->stats_mtx);
+		atomic_fetch_sub_zu(&shard->stats->mapped, extent_size,
+		    ATOMIC_RELAXED);
 	}
 }
 
diff --git a/src/large.c b/src/large.c
index 2b913d65..f61d1fed 100644
--- a/src/large.c
+++ b/src/large.c
@@ -122,7 +122,8 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 	}
 
 	if (config_stats && mapped_add > 0) {
-		pa_shard_stats_mapped_add(tsdn, &arena->pa_shard, mapped_add);
+		atomic_fetch_add_zu(&arena->pa_shard.stats->mapped, mapped_add,
+		    ATOMIC_RELAXED);
 	}
 
 	if (zero) {
diff --git a/src/pa.c b/src/pa.c
index d67c97ea..e20eab94 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -268,9 +268,9 @@ pa_decay_stashed(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 		    &decay_stats->nmadvise, nmadvise);
 		locked_inc_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
 		    &decay_stats->purged, npurged);
-		locked_dec_zu(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-		    &shard->stats->mapped, nunmapped << LG_PAGE);
 		LOCKEDINT_MTX_UNLOCK(tsdn, *shard->stats_mtx);
+		atomic_fetch_sub_zu(&shard->stats->mapped, nunmapped << LG_PAGE,
+		    ATOMIC_RELAXED);
 	}
 
 	return npurged;

From e2cf3fb1a3f064ba2c237620ca938e0e04c36d92 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Mar 2020 18:49:15 -0700
Subject: [PATCH 1664/2608] PA: Move in all modifications of mapped.

---
 include/jemalloc/internal/pa.h |  4 ++--
 src/arena.c                    | 15 ++-------------
 src/large.c                    |  8 +-------
 src/pa.c                       | 33 +++++++++++++++++----------------
 4 files changed, 22 insertions(+), 38 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 0cf83cc3..1c84c8de 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -162,10 +162,10 @@ size_t pa_shard_extent_sn_next(pa_shard_t *shard);
 
 /* Gets an edata for the given allocation. */
 edata_t *pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size,
-    size_t alignment, bool slab, szind_t szind, bool *zero, size_t *mapped_add);
+    size_t alignment, bool slab, szind_t szind, bool *zero);
 /* Returns true on error, in which case nothing changed. */
 bool pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
-    size_t new_size, szind_t szind, bool slab, bool *zero, size_t *mapped_add);
+    size_t new_size, szind_t szind, bool slab, bool *zero);
 /*
  * The same.  Sets *generated_dirty to true if we produced new dirty pages, and
  * false otherwise.
diff --git a/src/arena.c b/src/arena.c
index 2fe69041..c4bf29fc 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -424,21 +424,15 @@ edata_t *
 arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool *zero) {
 	szind_t szind = sz_size2index(usize);
-	size_t mapped_add;
 	size_t esize = usize + sz_large_pad;
 
 	edata_t *edata = pa_alloc(tsdn, &arena->pa_shard, esize, alignment,
-	    /* slab */ false, szind, zero, &mapped_add);
+	    /* slab */ false, szind, zero);
 
 	if (edata != NULL) {
 		if (config_stats) {
 			LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 			arena_large_malloc_stats_update(tsdn, arena, usize);
-			if (mapped_add != 0) {
-				atomic_fetch_add_zu(
-				    &arena->pa_shard.stats->mapped, mapped_add,
-				    ATOMIC_RELAXED);
-			}
 			LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 		}
 	}
@@ -842,14 +836,9 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	    WITNESS_RANK_CORE, 0);
 
 	bool zero = false;
-	size_t mapped_add = 0;
 
 	edata_t *slab = pa_alloc(tsdn, &arena->pa_shard, bin_info->slab_size,
-	    PAGE, /* slab */ true, /* szind */ binind, &zero, &mapped_add);
-	if (config_stats && slab != NULL && mapped_add != 0) {
-		atomic_fetch_add_zu(&arena->pa_shard.stats->mapped, mapped_add,
-		    ATOMIC_RELAXED);
-	}
+	    PAGE, /* slab */ true, /* szind */ binind, &zero);
 
 	if (slab == NULL) {
 		return NULL;
diff --git a/src/large.c b/src/large.c
index f61d1fed..494a32ba 100644
--- a/src/large.c
+++ b/src/large.c
@@ -113,19 +113,13 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 	 * below, even if is_zeroed_trail ends up true when zero is false.
 	 */
 	bool is_zeroed_trail = zero;
-	size_t mapped_add;
 	szind_t szind = sz_size2index(usize);
 	bool err = pa_expand(tsdn, &arena->pa_shard, edata, old_size, new_size,
-	    szind, /* slab */ false, &is_zeroed_trail, &mapped_add);
+	    szind, /* slab */ false, &is_zeroed_trail);
 	if (err) {
 		return true;
 	}
 
-	if (config_stats && mapped_add > 0) {
-		atomic_fetch_add_zu(&arena->pa_shard.stats->mapped, mapped_add,
-		    ATOMIC_RELAXED);
-	}
-
 	if (zero) {
 		if (config_cache_oblivious) {
 			/*
diff --git a/src/pa.c b/src/pa.c
index e20eab94..10a4401e 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -77,16 +77,17 @@ pa_shard_may_have_muzzy(pa_shard_t *shard) {
 
 edata_t *
 pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
-    bool slab, szind_t szind, bool *zero, size_t *mapped_add) {
+    bool slab, szind_t szind, bool *zero) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
-	*mapped_add = 0;
+
+	size_t mapped_add = 0;
 
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
-
 	edata_t *edata = ecache_alloc(tsdn, shard, ehooks,
 	    &shard->ecache_dirty, NULL, size, alignment, slab, szind,
 	    zero);
+
 	if (edata == NULL && pa_shard_may_have_muzzy(shard)) {
 		edata = ecache_alloc(tsdn, shard, ehooks, &shard->ecache_muzzy,
 		    NULL, size, alignment, slab, szind, zero);
@@ -95,24 +96,21 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 		edata = ecache_alloc_grow(tsdn, shard, ehooks,
 		    &shard->ecache_retained, NULL, size, alignment, slab,
 		    szind, zero);
-		if (config_stats && edata != NULL) {
-			/*
-			 * edata may be NULL on OOM, but in that case mapped_add
-			 * isn't used below, so there's no need to conditionlly
-			 * set it to 0 here.
-			 */
-			*mapped_add = size;
-		}
+		mapped_add = size;
 	}
 	if (edata != NULL) {
 		pa_nactive_add(shard, size >> LG_PAGE);
+		if (config_stats && mapped_add > 0) {
+			atomic_fetch_add_zu(&shard->stats->mapped, mapped_add,
+			    ATOMIC_RELAXED);
+		}
 	}
 	return edata;
 }
 
 bool
 pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
-    size_t new_size, szind_t szind, bool slab, bool *zero, size_t *mapped_add) {
+    size_t new_size, szind_t szind, bool slab, bool *zero) {
 	assert(new_size > old_size);
 	assert(edata_size_get(edata) == old_size);
 	assert((new_size & PAGE_MASK) == 0);
@@ -121,7 +119,8 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	void *trail_begin = edata_past_get(edata);
 	size_t expand_amount = new_size - old_size;
 
-	*mapped_add = 0;
+	size_t mapped_add = 0;
+
 	if (ehooks_merge_will_fail(ehooks)) {
 		return true;
 	}
@@ -137,18 +136,20 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 		trail = ecache_alloc_grow(tsdn, shard, ehooks,
 		    &shard->ecache_retained, trail_begin, expand_amount, PAGE,
 		    /* slab */ false, SC_NSIZES, zero);
-		*mapped_add = expand_amount;
+		mapped_add = expand_amount;
 	}
 	if (trail == NULL) {
-		*mapped_add = 0;
 		return true;
 	}
 	if (extent_merge_wrapper(tsdn, ehooks, &shard->edata_cache, edata,
 	    trail)) {
 		extent_dalloc_wrapper(tsdn, shard, ehooks, trail);
-		*mapped_add = 0;
 		return true;
 	}
+	if (config_stats && mapped_add > 0) {
+		atomic_fetch_add_zu(&shard->stats->mapped, mapped_add,
+		    ATOMIC_RELAXED);
+	}
 	pa_nactive_add(shard, expand_amount >> LG_PAGE);
 	emap_remap(tsdn, &emap_global, edata, szind, slab);
 	return false;

From d0c43217b5bbcf263a4505cad3eaeecc47ac6aa7 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Mar 2020 19:24:05 -0700
Subject: [PATCH 1665/2608] Arena stats: Move retained to PA, use plain ints.

Retained is a property of the allocated pages.  The derived fields no longer
require any locking; they're computed on demand.
---
 include/jemalloc/internal/arena_stats.h | 17 +++++-----
 include/jemalloc/internal/pa.h          |  8 +++++
 src/arena.c                             | 20 +++++++-----
 src/ctl.c                               | 43 +++++++++----------------
 4 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 496d6e70..3bfc8582 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -45,17 +45,16 @@ struct arena_stats_large_s {
 typedef struct arena_stats_s arena_stats_t;
 struct arena_stats_s {
 	LOCKEDINT_MTX_DECLARE(mtx)
-	/*
-	 * Number of unused virtual memory bytes currently retained.  Retained
-	 * bytes are technically mapped (though always decommitted or purged),
-	 * but they are excluded from the mapped statistic (above).
-	 */
-	locked_zu_t		retained; /* Derived. */
 
-	atomic_zu_t		base; /* Derived. */
+	/*
+	 * resident includes the base stats -- that's why it lives here and not
+	 * in pa_shard_stats_t.
+	 */
+	size_t			base; /* Derived. */
+	size_t			resident; /* Derived. */
+	size_t			metadata_thp; /* Derived. */
+
 	atomic_zu_t		internal;
-	atomic_zu_t		resident; /* Derived. */
-	atomic_zu_t		metadata_thp;
 
 	atomic_zu_t		allocated_large; /* Derived. */
 	locked_u64_t	nmalloc_large; /* Derived. */
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 1c84c8de..f7abf1e9 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -61,6 +61,14 @@ typedef struct pa_shard_stats_s pa_shard_stats_t;
 struct pa_shard_stats_s {
 	pa_shard_decay_stats_t decay_dirty;
 	pa_shard_decay_stats_t decay_muzzy;
+
+	/*
+	 * Number of unused virtual memory bytes currently retained.  Retained
+	 * bytes are technically mapped (though always decommitted or purged),
+	 * but they are excluded from the mapped statistic (above).
+	 */
+	size_t retained; /* Derived. */
+
 	/*
 	 * Number of bytes currently mapped, excluding retained memory.
 	 *
diff --git a/src/arena.c b/src/arena.c
index c4bf29fc..0fe85a9c 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -97,8 +97,8 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 
 	LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 
-	locked_inc_zu_unsynchronized(&astats->retained,
-	    ecache_npages_get(&arena->pa_shard.ecache_retained) << LG_PAGE);
+	astats->pa_shard_stats.retained +=
+	    ecache_npages_get(&arena->pa_shard.ecache_retained) << LG_PAGE;
 	astats->pa_shard_stats.edata_avail =  atomic_load_zu(
 	    &arena->pa_shard.edata_cache.count, ATOMIC_RELAXED);
 
@@ -130,13 +130,17 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 	    &arena->pa_shard.stats->decay_muzzy.purged));
 
-	atomic_load_add_store_zu(&astats->base, base_allocated);
+	astats->base += base_allocated;
 	atomic_load_add_store_zu(&astats->internal, arena_internal_get(arena));
-	atomic_load_add_store_zu(&astats->metadata_thp, metadata_thp);
-	atomic_load_add_store_zu(&astats->resident, base_resident +
-	    (((atomic_load_zu(&arena->pa_shard.nactive, ATOMIC_RELAXED) +
-	    ecache_npages_get(&arena->pa_shard.ecache_dirty) +
-	    ecache_npages_get(&arena->pa_shard.ecache_muzzy)) << LG_PAGE)));
+	astats->metadata_thp += metadata_thp;
+
+	size_t pa_resident_pgs = 0;
+	pa_resident_pgs
+	    += atomic_load_zu(&arena->pa_shard.nactive, ATOMIC_RELAXED);
+	pa_resident_pgs
+	    += ecache_npages_get(&arena->pa_shard.ecache_dirty);
+	astats->resident += base_resident + (pa_resident_pgs << LG_PAGE);
+
 	atomic_load_add_store_zu(&astats->pa_shard_stats.abandoned_vm,
 	    atomic_load_zu(&arena->stats.pa_shard_stats.abandoned_vm,
 	    ATOMIC_RELAXED));
diff --git a/src/ctl.c b/src/ctl.c
index 00afc769..368eb5f8 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -681,12 +681,6 @@ ctl_accum_locked_u64(locked_u64_t *dst, locked_u64_t *src) {
 	    locked_read_u64_unsynchronized(src));
 }
 
-static void
-ctl_accum_locked_zu(locked_zu_t *dst, locked_zu_t *src) {
-	locked_inc_zu_unsynchronized(dst,
-	    locked_read_atomic_zu(src));
-}
-
 static void
 ctl_accum_atomic_zu(atomic_zu_t *dst, atomic_zu_t *src) {
 	size_t cur_dst = atomic_load_zu(dst, ATOMIC_RELAXED);
@@ -864,12 +858,13 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 			ctl_accum_atomic_zu(
 			    &sdstats->astats.pa_shard_stats.mapped,
 			    &astats->astats.pa_shard_stats.mapped);
-			ctl_accum_locked_zu(&sdstats->astats.retained,
-			    &astats->astats.retained);
+			sdstats->astats.pa_shard_stats.retained
+			    += astats->astats.pa_shard_stats.retained;
 			sdstats->astats.pa_shard_stats.edata_avail
 			    += astats->astats.pa_shard_stats.edata_avail;
 		}
 
+
 		ctl_accum_locked_u64(
 		    &sdstats->astats.pa_shard_stats.decay_dirty.npurge,
 		    &astats->astats.pa_shard_stats.decay_dirty.npurge);
@@ -898,14 +893,11 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 MUTEX_PROF_ARENA_MUTEXES
 #undef OP
 		if (!destroyed) {
-			ctl_accum_atomic_zu(&sdstats->astats.base,
-			    &astats->astats.base);
+			sdstats->astats.base += astats->astats.base;
+			sdstats->astats.resident += astats->astats.resident;
+			sdstats->astats.metadata_thp += astats->astats.metadata_thp;
 			ctl_accum_atomic_zu(&sdstats->astats.internal,
 			    &astats->astats.internal);
-			ctl_accum_atomic_zu(&sdstats->astats.resident,
-			    &astats->astats.resident);
-			ctl_accum_atomic_zu(&sdstats->astats.metadata_thp,
-			    &astats->astats.metadata_thp);
 		} else {
 			assert(atomic_load_zu(
 			    &astats->astats.internal, ATOMIC_RELAXED) == 0);
@@ -1093,19 +1085,17 @@ ctl_refresh(tsdn_t *tsdn) {
 		    atomic_load_zu(&ctl_sarena->astats->astats.allocated_large,
 			ATOMIC_RELAXED);
 		ctl_stats->active = (ctl_sarena->pactive << LG_PAGE);
-		ctl_stats->metadata = atomic_load_zu(
-		    &ctl_sarena->astats->astats.base, ATOMIC_RELAXED) +
+		ctl_stats->metadata = ctl_sarena->astats->astats.base +
 		    atomic_load_zu(&ctl_sarena->astats->astats.internal,
 			ATOMIC_RELAXED);
-		ctl_stats->metadata_thp = atomic_load_zu(
-		    &ctl_sarena->astats->astats.metadata_thp, ATOMIC_RELAXED);
-		ctl_stats->resident = atomic_load_zu(
-		    &ctl_sarena->astats->astats.resident, ATOMIC_RELAXED);
+		ctl_stats->resident = ctl_sarena->astats->astats.resident;
+		ctl_stats->metadata_thp =
+		    ctl_sarena->astats->astats.metadata_thp;
 		ctl_stats->mapped = atomic_load_zu(
 		    &ctl_sarena->astats->astats.pa_shard_stats.mapped,
 		    ATOMIC_RELAXED);
-		ctl_stats->retained = locked_read_atomic_zu(
-		    &ctl_sarena->astats->astats.retained);
+		ctl_stats->retained =
+		    ctl_sarena->astats->astats.pa_shard_stats.retained;
 
 		ctl_background_thread_stats_read(tsdn);
 
@@ -2917,7 +2907,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_mapped,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.pa_shard_stats.mapped,
     ATOMIC_RELAXED), size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_retained,
-    locked_read_atomic_zu(&arenas_i(mib[2])->astats->astats.retained),
+    arenas_i(mib[2])->astats->astats.pa_shard_stats.retained,
     size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_extent_avail,
     arenas_i(mib[2])->astats->astats.pa_shard_stats.edata_avail, size_t)
@@ -2949,19 +2939,18 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_purged,
     uint64_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_base,
-    atomic_load_zu(&arenas_i(mib[2])->astats->astats.base, ATOMIC_RELAXED),
+    arenas_i(mib[2])->astats->astats.base,
     size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_internal,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.internal, ATOMIC_RELAXED),
     size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_metadata_thp,
-    atomic_load_zu(&arenas_i(mib[2])->astats->astats.metadata_thp,
-    ATOMIC_RELAXED), size_t)
+    arenas_i(mib[2])->astats->astats.metadata_thp, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_tcache_bytes,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.tcache_bytes,
     ATOMIC_RELAXED), size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_resident,
-    atomic_load_zu(&arenas_i(mib[2])->astats->astats.resident, ATOMIC_RELAXED),
+    arenas_i(mib[2])->astats->astats.resident,
     size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_abandoned_vm,
     atomic_load_zu(

From 565045ef716586f93caf6c210905419be9ed6e25 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 12 Mar 2020 08:34:47 -0700
Subject: [PATCH 1666/2608] Arena: Make more derived stats non-atomic/locked.

---
 include/jemalloc/internal/arena_stats.h | 14 ++++----
 src/arena.c                             | 29 +++++++--------
 src/ctl.c                               | 47 +++++++++----------------
 3 files changed, 36 insertions(+), 54 deletions(-)

diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 3bfc8582..3b3441f1 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -56,12 +56,12 @@ struct arena_stats_s {
 
 	atomic_zu_t		internal;
 
-	atomic_zu_t		allocated_large; /* Derived. */
-	locked_u64_t	nmalloc_large; /* Derived. */
-	locked_u64_t	ndalloc_large; /* Derived. */
-	locked_u64_t	nfills_large; /* Derived. */
-	locked_u64_t	nflushes_large; /* Derived. */
-	locked_u64_t	nrequests_large; /* Derived. */
+	size_t			allocated_large; /* Derived. */
+	uint64_t		nmalloc_large; /* Derived. */
+	uint64_t		ndalloc_large; /* Derived. */
+	uint64_t		nfills_large; /* Derived. */
+	uint64_t		nflushes_large; /* Derived. */
+	uint64_t		nrequests_large; /* Derived. */
 
 	/*
 	 * The stats logically owned by the pa_shard in the same arena.  This
@@ -71,7 +71,7 @@ struct arena_stats_s {
 	pa_shard_stats_t	pa_shard_stats;
 
 	/* Number of bytes cached in tcache associated with this arena. */
-	atomic_zu_t		tcache_bytes; /* Derived. */
+	size_t			tcache_bytes; /* Derived. */
 
 	mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes];
 
diff --git a/src/arena.c b/src/arena.c
index 0fe85a9c..73033a64 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -150,42 +150,37 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		    LOCKEDINT_MTX(arena->stats.mtx),
 		    &arena->stats.lstats[i].nmalloc);
 		locked_inc_u64_unsynchronized(&lstats[i].nmalloc, nmalloc);
-		locked_inc_u64_unsynchronized(&astats->nmalloc_large,
-		    nmalloc);
+		astats->nmalloc_large += nmalloc;
 
 		uint64_t ndalloc = locked_read_u64(tsdn,
 		    LOCKEDINT_MTX(arena->stats.mtx),
 		    &arena->stats.lstats[i].ndalloc);
 		locked_inc_u64_unsynchronized(&lstats[i].ndalloc, ndalloc);
-		locked_inc_u64_unsynchronized(&astats->ndalloc_large,
-		    ndalloc);
+		astats->ndalloc_large += ndalloc;
 
 		uint64_t nrequests = locked_read_u64(tsdn,
 		    LOCKEDINT_MTX(arena->stats.mtx),
 		    &arena->stats.lstats[i].nrequests);
 		locked_inc_u64_unsynchronized(&lstats[i].nrequests,
 		    nmalloc + nrequests);
-		locked_inc_u64_unsynchronized(&astats->nrequests_large,
-		    nmalloc + nrequests);
+		astats->nrequests_large += nmalloc + nrequests;
 
 		/* nfill == nmalloc for large currently. */
 		locked_inc_u64_unsynchronized(&lstats[i].nfills, nmalloc);
-		locked_inc_u64_unsynchronized(&astats->nfills_large,
-		    nmalloc);
+		astats->nfills_large += nmalloc;
 
 		uint64_t nflush = locked_read_u64(tsdn,
 		    LOCKEDINT_MTX(arena->stats.mtx),
 		    &arena->stats.lstats[i].nflushes);
 		locked_inc_u64_unsynchronized(&lstats[i].nflushes, nflush);
-		locked_inc_u64_unsynchronized(&astats->nflushes_large,
-		    nflush);
+		astats->nflushes_large += nflush;
 
 		assert(nmalloc >= ndalloc);
 		assert(nmalloc - ndalloc <= SIZE_T_MAX);
 		size_t curlextents = (size_t)(nmalloc - ndalloc);
 		lstats[i].curlextents += curlextents;
-		atomic_load_add_store_zu(&astats->allocated_large,
-		    curlextents * sz_index2size(SC_NBINS + i));
+		astats->allocated_large +=
+		    curlextents * sz_index2size(SC_NBINS + i);
 	}
 
 	for (pszind_t i = 0; i < SC_NPSIZES; i++) {
@@ -213,22 +208,22 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 
 	/* tcache_bytes counts currently cached bytes. */
-	atomic_store_zu(&astats->tcache_bytes, 0, ATOMIC_RELAXED);
+	astats->tcache_bytes = 0;
 	malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
 	cache_bin_array_descriptor_t *descriptor;
 	ql_foreach(descriptor, &arena->cache_bin_array_descriptor_ql, link) {
 		for (szind_t i = 0; i < SC_NBINS; i++) {
 			cache_bin_t *tbin = &descriptor->bins_small[i];
-			atomic_load_add_store_zu(&astats->tcache_bytes,
+			astats->tcache_bytes +=
 			    cache_bin_ncached_get(tbin,
-			    &tcache_bin_info[i]) * sz_index2size(i));
+				&tcache_bin_info[i]) * sz_index2size(i);
 		}
 		for (szind_t i = 0; i < nhbins - SC_NBINS; i++) {
 			cache_bin_t *tbin = &descriptor->bins_large[i];
-			atomic_load_add_store_zu(&astats->tcache_bytes,
+			astats->tcache_bytes +=
 			    cache_bin_ncached_get(tbin,
 			    &tcache_bin_info[i + SC_NBINS])
-			    * sz_index2size(i + SC_NBINS));
+			    * sz_index2size(i + SC_NBINS);
 		}
 	}
 	malloc_mutex_prof_read(tsdn,
diff --git a/src/ctl.c b/src/ctl.c
index 368eb5f8..a3cc74ac 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -915,26 +915,21 @@ MUTEX_PROF_ARENA_MUTEXES
 		sdstats->nflushes_small += astats->nflushes_small;
 
 		if (!destroyed) {
-			ctl_accum_atomic_zu(&sdstats->astats.allocated_large,
-			    &astats->astats.allocated_large);
+			sdstats->astats.allocated_large +=
+			    astats->astats.allocated_large;
 		} else {
-			assert(atomic_load_zu(&astats->astats.allocated_large,
-			    ATOMIC_RELAXED) == 0);
+			assert(astats->astats.allocated_large == 0);
 		}
-		ctl_accum_locked_u64(&sdstats->astats.nmalloc_large,
-		    &astats->astats.nmalloc_large);
-		ctl_accum_locked_u64(&sdstats->astats.ndalloc_large,
-		    &astats->astats.ndalloc_large);
-		ctl_accum_locked_u64(&sdstats->astats.nrequests_large,
-		    &astats->astats.nrequests_large);
-		ctl_accum_locked_u64(&sdstats->astats.nflushes_large,
-		    &astats->astats.nflushes_large);
+		sdstats->astats.nmalloc_large += astats->astats.nmalloc_large;
+		sdstats->astats.ndalloc_large += astats->astats.ndalloc_large;
+		sdstats->astats.nrequests_large
+		    += astats->astats.nrequests_large;
+		sdstats->astats.nflushes_large += astats->astats.nflushes_large;
 		ctl_accum_atomic_zu(
 		    &sdstats->astats.pa_shard_stats.abandoned_vm,
 		    &astats->astats.pa_shard_stats.abandoned_vm);
 
-		ctl_accum_atomic_zu(&sdstats->astats.tcache_bytes,
-		    &astats->astats.tcache_bytes);
+		sdstats->astats.tcache_bytes += astats->astats.tcache_bytes;
 
 		if (ctl_arena->arena_ind == 0) {
 			sdstats->astats.uptime = astats->astats.uptime;
@@ -1082,8 +1077,7 @@ ctl_refresh(tsdn_t *tsdn) {
 
 	if (config_stats) {
 		ctl_stats->allocated = ctl_sarena->astats->allocated_small +
-		    atomic_load_zu(&ctl_sarena->astats->astats.allocated_large,
-			ATOMIC_RELAXED);
+		    ctl_sarena->astats->astats.allocated_large;
 		ctl_stats->active = (ctl_sarena->pactive << LG_PAGE);
 		ctl_stats->metadata = ctl_sarena->astats->astats.base +
 		    atomic_load_zu(&ctl_sarena->astats->astats.internal,
@@ -2947,8 +2941,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_internal,
 CTL_RO_CGEN(config_stats, stats_arenas_i_metadata_thp,
     arenas_i(mib[2])->astats->astats.metadata_thp, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_tcache_bytes,
-    atomic_load_zu(&arenas_i(mib[2])->astats->astats.tcache_bytes,
-    ATOMIC_RELAXED), size_t)
+    arenas_i(mib[2])->astats->astats.tcache_bytes, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_resident,
     arenas_i(mib[2])->astats->astats.resident,
     size_t)
@@ -2970,27 +2963,21 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_small_nfills,
 CTL_RO_CGEN(config_stats, stats_arenas_i_small_nflushes,
     arenas_i(mib[2])->astats->nflushes_small, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_allocated,
-    atomic_load_zu(&arenas_i(mib[2])->astats->astats.allocated_large,
-    ATOMIC_RELAXED), size_t)
+    arenas_i(mib[2])->astats->astats.allocated_large, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_nmalloc,
-    locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.nmalloc_large), uint64_t)
+    arenas_i(mib[2])->astats->astats.nmalloc_large, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_ndalloc,
-    locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.ndalloc_large), uint64_t)
+    arenas_i(mib[2])->astats->astats.ndalloc_large, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_nrequests,
-    locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.nrequests_large), uint64_t)
+    arenas_i(mib[2])->astats->astats.nrequests_large, uint64_t)
 /*
  * Note: "nmalloc_large" here instead of "nfills" in the read.  This is
  * intentional (large has no batch fill).
  */
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_nfills,
-    locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.nmalloc_large), uint64_t)
+    arenas_i(mib[2])->astats->astats.nmalloc_large, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_large_nflushes,
-    locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.nflushes_large), uint64_t)
+    arenas_i(mib[2])->astats->astats.nflushes_large, uint64_t)
 
 /* Lock profiling related APIs below. */
 #define RO_MUTEX_CTL_GEN(n, l)						\

From 8164fad4045a1e30580da30294652e7c3b8a75f7 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 12 Mar 2020 08:46:43 -0700
Subject: [PATCH 1667/2608] Stats: Fix edata_cache size merging.

Previously, we assigned to the output rather than incrementing it.
---
 src/arena.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arena.c b/src/arena.c
index 73033a64..dfb4759b 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -99,7 +99,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 
 	astats->pa_shard_stats.retained +=
 	    ecache_npages_get(&arena->pa_shard.ecache_retained) << LG_PAGE;
-	astats->pa_shard_stats.edata_avail =  atomic_load_zu(
+	astats->pa_shard_stats.edata_avail +=  atomic_load_zu(
 	    &arena->pa_shard.edata_cache.count, ATOMIC_RELAXED);
 
 	/* Dirty decay stats */

From f29f6090f589bbd1eda92f025e931e449fa9d621 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 12 Mar 2020 09:20:37 -0700
Subject: [PATCH 1668/2608] PA: Add pa_extra.c and put PA forking there.

---
 Makefile.in                                   |  1 +
 include/jemalloc/internal/pa.h                | 18 ++++++
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |  1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |  3 +
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |  1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |  3 +
 src/arena.c                                   | 27 ++-------
 src/pa_extra.c                                | 55 +++++++++++++++++++
 8 files changed, 88 insertions(+), 21 deletions(-)
 create mode 100644 src/pa_extra.c

diff --git a/Makefile.in b/Makefile.in
index b19c14f9..c0929ce2 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -127,6 +127,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/mutex_pool.c \
 	$(srcroot)src/nstime.c \
 	$(srcroot)src/pa.c \
+	$(srcroot)src/pa_extra.c \
 	$(srcroot)src/pages.c \
 	$(srcroot)src/prng.c \
 	$(srcroot)src/prof.c \
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index f7abf1e9..9cf290c2 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -211,4 +211,22 @@ bool pa_maybe_decay_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
     pa_decay_purge_setting_t decay_purge_setting);
 
+/******************************************************************************/
+/*
+ * Various bits of "boring" functionality that are still part of this module,
+ * but that we relegate to pa_extra.c, to keep the core logic in pa.c as
+ * readable as possible.
+ */
+
+/*
+ * These fork phases are synchronized with the arena fork phase numbering to
+ * make it easy to keep straight. That's why there's no prefork1.
+ */
+void pa_shard_prefork0(tsdn_t *tsdn, pa_shard_t *shard);
+void pa_shard_prefork2(tsdn_t *tsdn, pa_shard_t *shard);
+void pa_shard_prefork3(tsdn_t *tsdn, pa_shard_t *shard);
+void pa_shard_prefork4(tsdn_t *tsdn, pa_shard_t *shard);
+void pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard);
+void pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard);
+
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 156e4593..9f81e21d 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -68,6 +68,7 @@
     <ClCompile Include="..\..\..\..\src\mutex_pool.c" />
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pa.c" />
+    <ClCompile Include="..\..\..\..\src\pa_extra.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 45557f65..15fe7f08 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -88,6 +88,9 @@
     <ClCompile Include="..\..\..\..\src\pa.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pa_extra.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index c5cfb95f..b5fccaed 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -68,6 +68,7 @@
     <ClCompile Include="..\..\..\..\src\mutex_pool.c" />
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pa.c" />
+    <ClCompile Include="..\..\..\..\src\pa_extra.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 45557f65..15fe7f08 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -88,6 +88,9 @@
     <ClCompile Include="..\..\..\..\src\pa.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pa_extra.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/arena.c b/src/arena.c
index dfb4759b..dc8c26b6 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1739,8 +1739,7 @@ arena_boot(sc_data_t *sc_data) {
 
 void
 arena_prefork0(tsdn_t *tsdn, arena_t *arena) {
-	malloc_mutex_prefork(tsdn, &arena->pa_shard.decay_dirty.mtx);
-	malloc_mutex_prefork(tsdn, &arena->pa_shard.decay_muzzy.mtx);
+	pa_shard_prefork0(tsdn, &arena->pa_shard);
 }
 
 void
@@ -1752,19 +1751,17 @@ arena_prefork1(tsdn_t *tsdn, arena_t *arena) {
 
 void
 arena_prefork2(tsdn_t *tsdn, arena_t *arena) {
-	ecache_grow_prefork(tsdn, &arena->pa_shard.ecache_grow);
+	pa_shard_prefork2(tsdn, &arena->pa_shard);
 }
 
 void
 arena_prefork3(tsdn_t *tsdn, arena_t *arena) {
-	ecache_prefork(tsdn, &arena->pa_shard.ecache_dirty);
-	ecache_prefork(tsdn, &arena->pa_shard.ecache_muzzy);
-	ecache_prefork(tsdn, &arena->pa_shard.ecache_retained);
+	pa_shard_prefork3(tsdn, &arena->pa_shard);
 }
 
 void
 arena_prefork4(tsdn_t *tsdn, arena_t *arena) {
-	edata_cache_prefork(tsdn, &arena->pa_shard.edata_cache);
+	pa_shard_prefork4(tsdn, &arena->pa_shard);
 }
 
 void
@@ -1798,13 +1795,7 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 	}
 	malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
 	base_postfork_parent(tsdn, arena->base);
-	edata_cache_postfork_parent(tsdn, &arena->pa_shard.edata_cache);
-	ecache_postfork_parent(tsdn, &arena->pa_shard.ecache_dirty);
-	ecache_postfork_parent(tsdn, &arena->pa_shard.ecache_muzzy);
-	ecache_postfork_parent(tsdn, &arena->pa_shard.ecache_retained);
-	ecache_grow_postfork_parent(tsdn, &arena->pa_shard.ecache_grow);
-	malloc_mutex_postfork_parent(tsdn, &arena->pa_shard.decay_dirty.mtx);
-	malloc_mutex_postfork_parent(tsdn, &arena->pa_shard.decay_muzzy.mtx);
+	pa_shard_postfork_parent(tsdn, &arena->pa_shard);
 	if (config_stats) {
 		malloc_mutex_postfork_parent(tsdn, &arena->tcache_ql_mtx);
 	}
@@ -1844,13 +1835,7 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	}
 	malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
 	base_postfork_child(tsdn, arena->base);
-	edata_cache_postfork_child(tsdn, &arena->pa_shard.edata_cache);
-	ecache_postfork_child(tsdn, &arena->pa_shard.ecache_dirty);
-	ecache_postfork_child(tsdn, &arena->pa_shard.ecache_muzzy);
-	ecache_postfork_child(tsdn, &arena->pa_shard.ecache_retained);
-	ecache_grow_postfork_child(tsdn, &arena->pa_shard.ecache_grow);
-	malloc_mutex_postfork_child(tsdn, &arena->pa_shard.decay_dirty.mtx);
-	malloc_mutex_postfork_child(tsdn, &arena->pa_shard.decay_muzzy.mtx);
+	pa_shard_postfork_child(tsdn, &arena->pa_shard);
 	if (config_stats) {
 		malloc_mutex_postfork_child(tsdn, &arena->tcache_ql_mtx);
 	}
diff --git a/src/pa_extra.c b/src/pa_extra.c
new file mode 100644
index 00000000..bfb0a004
--- /dev/null
+++ b/src/pa_extra.c
@@ -0,0 +1,55 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+/*
+ * This file is logically part of the PA module.  While pa.c contains the core
+ * allocator functionality, this file contains boring integration functionality;
+ * things like the pre- and post- fork handlers, and stats merging for CTL
+ * refreshes.
+ */
+
+void
+pa_shard_prefork0(tsdn_t *tsdn, pa_shard_t *shard) {
+	malloc_mutex_prefork(tsdn, &shard->decay_dirty.mtx);
+	malloc_mutex_prefork(tsdn, &shard->decay_muzzy.mtx);
+}
+
+void
+pa_shard_prefork2(tsdn_t *tsdn, pa_shard_t *shard) {
+	ecache_grow_prefork(tsdn, &shard->ecache_grow);
+}
+
+void
+pa_shard_prefork3(tsdn_t *tsdn, pa_shard_t *shard) {
+	ecache_prefork(tsdn, &shard->ecache_dirty);
+	ecache_prefork(tsdn, &shard->ecache_muzzy);
+	ecache_prefork(tsdn, &shard->ecache_retained);
+}
+
+
+void
+pa_shard_prefork4(tsdn_t *tsdn, pa_shard_t *shard) {
+	edata_cache_prefork(tsdn, &shard->edata_cache);
+}
+
+void
+pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard) {
+	edata_cache_postfork_parent(tsdn, &shard->edata_cache);
+	ecache_postfork_parent(tsdn, &shard->ecache_dirty);
+	ecache_postfork_parent(tsdn, &shard->ecache_muzzy);
+	ecache_postfork_parent(tsdn, &shard->ecache_retained);
+	ecache_grow_postfork_parent(tsdn, &shard->ecache_grow);
+	malloc_mutex_postfork_parent(tsdn, &shard->decay_dirty.mtx);
+	malloc_mutex_postfork_parent(tsdn, &shard->decay_muzzy.mtx);
+}
+
+void
+pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard) {
+	edata_cache_postfork_child(tsdn, &shard->edata_cache);
+	ecache_postfork_child(tsdn, &shard->ecache_dirty);
+	ecache_postfork_child(tsdn, &shard->ecache_muzzy);
+	ecache_postfork_child(tsdn, &shard->ecache_retained);
+	ecache_grow_postfork_child(tsdn, &shard->ecache_grow);
+	malloc_mutex_postfork_child(tsdn, &shard->decay_dirty.mtx);
+	malloc_mutex_postfork_child(tsdn, &shard->decay_muzzy.mtx);
+}

From 506d907e40e8b5b191b8bc5f2ee77d87e0684cfb Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 12 Mar 2020 09:28:13 -0700
Subject: [PATCH 1669/2608] PA: Move in basic stats merging.

---
 include/jemalloc/internal/pa.h | 3 +++
 src/arena.c                    | 4 +---
 src/pa_extra.c                 | 8 ++++++++
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 9cf290c2..8c82823c 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -229,4 +229,7 @@ void pa_shard_prefork4(tsdn_t *tsdn, pa_shard_t *shard);
 void pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard);
 void pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard);
 
+void pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive,
+    size_t *ndirty, size_t *nmuzzy);
+
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/src/arena.c b/src/arena.c
index dc8c26b6..10a24688 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -71,9 +71,7 @@ arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	*dss = dss_prec_names[arena_dss_prec_get(arena)];
 	*dirty_decay_ms = arena_dirty_decay_ms_get(arena);
 	*muzzy_decay_ms = arena_muzzy_decay_ms_get(arena);
-	*nactive += atomic_load_zu(&arena->pa_shard.nactive, ATOMIC_RELAXED);
-	*ndirty += ecache_npages_get(&arena->pa_shard.ecache_dirty);
-	*nmuzzy += ecache_npages_get(&arena->pa_shard.ecache_muzzy);
+	pa_shard_basic_stats_merge(&arena->pa_shard, nactive, ndirty, nmuzzy);
 }
 
 void
diff --git a/src/pa_extra.c b/src/pa_extra.c
index bfb0a004..1b642df2 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -53,3 +53,11 @@ pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard) {
 	malloc_mutex_postfork_child(tsdn, &shard->decay_dirty.mtx);
 	malloc_mutex_postfork_child(tsdn, &shard->decay_muzzy.mtx);
 }
+
+void
+pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive, size_t *ndirty,
+    size_t *nmuzzy) {
+	*nactive += atomic_load_zu(&shard->nactive, ATOMIC_RELAXED);
+	*ndirty += ecache_npages_get(&shard->ecache_dirty);
+	*nmuzzy += ecache_npages_get(&shard->ecache_muzzy);
+}

From 81c6027592d59383107b3a7a26caddb787ed10c7 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 12 Mar 2020 09:36:25 -0700
Subject: [PATCH 1670/2608] Arena stats: Give it its own "mapped".

This distinguishes it from the PA mapped stat, which is now named "pa_mapped" to
avoid confusion. The (derived) arena stat includes base memory, and the PA stat
is no longer partially derived.
---
 include/jemalloc/internal/arena_stats.h |  1 +
 include/jemalloc/internal/pa.h          |  9 +++++----
 src/arena.c                             |  5 ++---
 src/ctl.c                               | 11 +++--------
 src/extent.c                            |  2 +-
 src/pa.c                                | 10 +++++-----
 6 files changed, 17 insertions(+), 21 deletions(-)

diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 3b3441f1..9effa61b 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -53,6 +53,7 @@ struct arena_stats_s {
 	size_t			base; /* Derived. */
 	size_t			resident; /* Derived. */
 	size_t			metadata_thp; /* Derived. */
+	size_t			mapped; /* Derived. */
 
 	atomic_zu_t		internal;
 
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 8c82823c..1bffa9ef 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -70,12 +70,13 @@ struct pa_shard_stats_s {
 	size_t retained; /* Derived. */
 
 	/*
-	 * Number of bytes currently mapped, excluding retained memory.
+	 * Number of bytes currently mapped, excluding retained memory (and any
+	 * base-allocated memory, which is tracked by the arena stats).
 	 *
-	 * Partially derived -- we maintain our own counter, but add in the
-	 * base's own counter at merge.
+	 * We name this "pa_mapped" to avoid confusion with the arena_stats
+	 * "mapped".
 	 */
-	atomic_zu_t mapped;
+	atomic_zu_t pa_mapped;
 
 	/* Number of edata_t structs allocated by base, but not being used. */
 	size_t edata_avail; /* Derived. */
diff --git a/src/arena.c b/src/arena.c
index 10a24688..07a60510 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -88,10 +88,9 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	size_t base_allocated, base_resident, base_mapped, metadata_thp;
 	base_stats_get(tsdn, arena->base, &base_allocated, &base_resident,
 	    &base_mapped, &metadata_thp);
-	size_t mapped = atomic_load_zu(&arena->pa_shard.stats->mapped,
+	size_t pa_mapped = atomic_load_zu(&arena->pa_shard.stats->pa_mapped,
 	    ATOMIC_RELAXED);
-	atomic_load_add_store_zu(&astats->pa_shard_stats.mapped,
-	    base_mapped + mapped);
+	astats->mapped += base_mapped + pa_mapped;
 
 	LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 
diff --git a/src/ctl.c b/src/ctl.c
index a3cc74ac..00fd7441 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -855,9 +855,7 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 		ctl_arena_stats_t *astats = ctl_arena->astats;
 
 		if (!destroyed) {
-			ctl_accum_atomic_zu(
-			    &sdstats->astats.pa_shard_stats.mapped,
-			    &astats->astats.pa_shard_stats.mapped);
+			sdstats->astats.mapped += astats->astats.mapped;
 			sdstats->astats.pa_shard_stats.retained
 			    += astats->astats.pa_shard_stats.retained;
 			sdstats->astats.pa_shard_stats.edata_avail
@@ -1085,9 +1083,7 @@ ctl_refresh(tsdn_t *tsdn) {
 		ctl_stats->resident = ctl_sarena->astats->astats.resident;
 		ctl_stats->metadata_thp =
 		    ctl_sarena->astats->astats.metadata_thp;
-		ctl_stats->mapped = atomic_load_zu(
-		    &ctl_sarena->astats->astats.pa_shard_stats.mapped,
-		    ATOMIC_RELAXED);
+		ctl_stats->mapped = ctl_sarena->astats->astats.mapped;
 		ctl_stats->retained =
 		    ctl_sarena->astats->astats.pa_shard_stats.retained;
 
@@ -2898,8 +2894,7 @@ CTL_RO_GEN(stats_arenas_i_pactive, arenas_i(mib[2])->pactive, size_t)
 CTL_RO_GEN(stats_arenas_i_pdirty, arenas_i(mib[2])->pdirty, size_t)
 CTL_RO_GEN(stats_arenas_i_pmuzzy, arenas_i(mib[2])->pmuzzy, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_mapped,
-    atomic_load_zu(&arenas_i(mib[2])->astats->astats.pa_shard_stats.mapped,
-    ATOMIC_RELAXED), size_t)
+    arenas_i(mib[2])->astats->astats.mapped, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_retained,
     arenas_i(mib[2])->astats->astats.pa_shard_stats.retained,
     size_t)
diff --git a/src/extent.c b/src/extent.c
index 62ebff52..05d1755e 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -967,7 +967,7 @@ extent_maximally_purge(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		    &shard->stats->decay_dirty.purged,
 		    extent_size >> LG_PAGE);
 		LOCKEDINT_MTX_UNLOCK(tsdn, *shard->stats_mtx);
-		atomic_fetch_sub_zu(&shard->stats->mapped, extent_size,
+		atomic_fetch_sub_zu(&shard->stats->pa_mapped, extent_size,
 		    ATOMIC_RELAXED);
 	}
 }
diff --git a/src/pa.c b/src/pa.c
index 10a4401e..1b7d374c 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -101,8 +101,8 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 	if (edata != NULL) {
 		pa_nactive_add(shard, size >> LG_PAGE);
 		if (config_stats && mapped_add > 0) {
-			atomic_fetch_add_zu(&shard->stats->mapped, mapped_add,
-			    ATOMIC_RELAXED);
+			atomic_fetch_add_zu(&shard->stats->pa_mapped,
+			    mapped_add, ATOMIC_RELAXED);
 		}
 	}
 	return edata;
@@ -147,7 +147,7 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 		return true;
 	}
 	if (config_stats && mapped_add > 0) {
-		atomic_fetch_add_zu(&shard->stats->mapped, mapped_add,
+		atomic_fetch_add_zu(&shard->stats->pa_mapped, mapped_add,
 		    ATOMIC_RELAXED);
 	}
 	pa_nactive_add(shard, expand_amount >> LG_PAGE);
@@ -270,8 +270,8 @@ pa_decay_stashed(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 		locked_inc_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
 		    &decay_stats->purged, npurged);
 		LOCKEDINT_MTX_UNLOCK(tsdn, *shard->stats_mtx);
-		atomic_fetch_sub_zu(&shard->stats->mapped, nunmapped << LG_PAGE,
-		    ATOMIC_RELAXED);
+		atomic_fetch_sub_zu(&shard->stats->pa_mapped,
+		    nunmapped << LG_PAGE, ATOMIC_RELAXED);
 	}
 
 	return npurged;

From 238f3c743067b1305f14ba4ddcf3b95ec7719ae7 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 12 Mar 2020 10:28:18 -0700
Subject: [PATCH 1671/2608] PA: Move in full stats merging.

---
 include/jemalloc/internal/pa.h |  9 +++++
 src/arena.c                    | 71 ++--------------------------------
 src/pa_extra.c                 | 66 +++++++++++++++++++++++++++++++
 3 files changed, 79 insertions(+), 67 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 1bffa9ef..03ab6d0e 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -233,4 +233,13 @@ void pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard);
 void pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive,
     size_t *ndirty, size_t *nmuzzy);
 
+static inline size_t
+pa_shard_pa_mapped(pa_shard_t *shard) {
+	return atomic_load_zu(&shard->stats->pa_mapped, ATOMIC_RELAXED);
+}
+
+void pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
+    pa_shard_stats_t *shard_stats_out, pa_extent_stats_t *extent_stats_out,
+    size_t *resident);
+
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/src/arena.c b/src/arena.c
index 07a60510..fd2876e9 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -88,60 +88,16 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	size_t base_allocated, base_resident, base_mapped, metadata_thp;
 	base_stats_get(tsdn, arena->base, &base_allocated, &base_resident,
 	    &base_mapped, &metadata_thp);
-	size_t pa_mapped = atomic_load_zu(&arena->pa_shard.stats->pa_mapped,
-	    ATOMIC_RELAXED);
+	size_t pa_mapped = pa_shard_pa_mapped(&arena->pa_shard);
 	astats->mapped += base_mapped + pa_mapped;
+	astats->resident += base_resident;
 
 	LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 
-	astats->pa_shard_stats.retained +=
-	    ecache_npages_get(&arena->pa_shard.ecache_retained) << LG_PAGE;
-	astats->pa_shard_stats.edata_avail +=  atomic_load_zu(
-	    &arena->pa_shard.edata_cache.count, ATOMIC_RELAXED);
-
-	/* Dirty decay stats */
-	locked_inc_u64_unsynchronized(
-	    &astats->pa_shard_stats.decay_dirty.npurge,
-	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-	    &arena->pa_shard.stats->decay_dirty.npurge));
-	locked_inc_u64_unsynchronized(
-	    &astats->pa_shard_stats.decay_dirty.nmadvise,
-	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-	    &arena->pa_shard.stats->decay_dirty.nmadvise));
-	locked_inc_u64_unsynchronized(
-	    &astats->pa_shard_stats.decay_dirty.purged,
-	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-	    &arena->pa_shard.stats->decay_dirty.purged));
-
-	/* Decay stats */
-	locked_inc_u64_unsynchronized(
-	    &astats->pa_shard_stats.decay_muzzy.npurge,
-	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-	    &arena->pa_shard.stats->decay_muzzy.npurge));
-	locked_inc_u64_unsynchronized(
-	    &astats->pa_shard_stats.decay_muzzy.nmadvise,
-	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-	    &arena->pa_shard.stats->decay_muzzy.nmadvise));
-	locked_inc_u64_unsynchronized(
-	    &astats->pa_shard_stats.decay_muzzy.purged,
-	    locked_read_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-	    &arena->pa_shard.stats->decay_muzzy.purged));
-
 	astats->base += base_allocated;
 	atomic_load_add_store_zu(&astats->internal, arena_internal_get(arena));
 	astats->metadata_thp += metadata_thp;
 
-	size_t pa_resident_pgs = 0;
-	pa_resident_pgs
-	    += atomic_load_zu(&arena->pa_shard.nactive, ATOMIC_RELAXED);
-	pa_resident_pgs
-	    += ecache_npages_get(&arena->pa_shard.ecache_dirty);
-	astats->resident += base_resident + (pa_resident_pgs << LG_PAGE);
-
-	atomic_load_add_store_zu(&astats->pa_shard_stats.abandoned_vm,
-	    atomic_load_zu(&arena->stats.pa_shard_stats.abandoned_vm,
-	    ATOMIC_RELAXED));
-
 	for (szind_t i = 0; i < SC_NSIZES - SC_NBINS; i++) {
 		uint64_t nmalloc = locked_read_u64(tsdn,
 		    LOCKEDINT_MTX(arena->stats.mtx),
@@ -180,27 +136,8 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		    curlextents * sz_index2size(SC_NBINS + i);
 	}
 
-	for (pszind_t i = 0; i < SC_NPSIZES; i++) {
-		size_t dirty, muzzy, retained, dirty_bytes, muzzy_bytes,
-		    retained_bytes;
-		dirty = ecache_nextents_get(&arena->pa_shard.ecache_dirty, i);
-		muzzy = ecache_nextents_get(&arena->pa_shard.ecache_muzzy, i);
-		retained = ecache_nextents_get(&arena->pa_shard.ecache_retained,
-		    i);
-		dirty_bytes = ecache_nbytes_get(&arena->pa_shard.ecache_dirty,
-		    i);
-		muzzy_bytes = ecache_nbytes_get(&arena->pa_shard.ecache_muzzy,
-		    i);
-		retained_bytes = ecache_nbytes_get(
-		    &arena->pa_shard.ecache_retained, i);
-
-		estats[i].ndirty = dirty;
-		estats[i].nmuzzy = muzzy;
-		estats[i].nretained = retained;
-		estats[i].dirty_bytes = dirty_bytes;
-		estats[i].muzzy_bytes = muzzy_bytes;
-		estats[i].retained_bytes = retained_bytes;
-	}
+	pa_shard_stats_merge(tsdn, &arena->pa_shard, &astats->pa_shard_stats,
+	    estats, &astats->resident);
 
 	LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 
diff --git a/src/pa_extra.c b/src/pa_extra.c
index 1b642df2..1088596e 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -61,3 +61,69 @@ pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive, size_t *ndirty,
 	*ndirty += ecache_npages_get(&shard->ecache_dirty);
 	*nmuzzy += ecache_npages_get(&shard->ecache_muzzy);
 }
+
+void
+pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
+    pa_shard_stats_t *shard_stats_out, pa_extent_stats_t *extent_stats_out,
+    size_t *resident) {
+	cassert(config_stats);
+
+	shard_stats_out->retained +=
+	    ecache_npages_get(&shard->ecache_retained) << LG_PAGE;
+	shard_stats_out->edata_avail += atomic_load_zu(
+	    &shard->edata_cache.count, ATOMIC_RELAXED);
+
+	size_t resident_pgs = 0;
+	resident_pgs += atomic_load_zu(&shard->nactive, ATOMIC_RELAXED);
+	resident_pgs += ecache_npages_get(&shard->ecache_dirty);
+	*resident += (resident_pgs << LG_PAGE);
+
+	/* Dirty decay stats */
+	locked_inc_u64_unsynchronized(
+	    &shard_stats_out->decay_dirty.npurge,
+	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
+	    &shard->stats->decay_dirty.npurge));
+	locked_inc_u64_unsynchronized(
+	    &shard_stats_out->decay_dirty.nmadvise,
+	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
+	    &shard->stats->decay_dirty.nmadvise));
+	locked_inc_u64_unsynchronized(
+	    &shard_stats_out->decay_dirty.purged,
+	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
+	    &shard->stats->decay_dirty.purged));
+
+	/* Muzzy decay stats */
+	locked_inc_u64_unsynchronized(
+	    &shard_stats_out->decay_muzzy.npurge,
+	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
+	    &shard->stats->decay_muzzy.npurge));
+	locked_inc_u64_unsynchronized(
+	    &shard_stats_out->decay_muzzy.nmadvise,
+	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
+	    &shard->stats->decay_muzzy.nmadvise));
+	locked_inc_u64_unsynchronized(
+	    &shard_stats_out->decay_muzzy.purged,
+	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
+	    &shard->stats->decay_muzzy.purged));
+
+	atomic_load_add_store_zu(&shard_stats_out->abandoned_vm,
+	    atomic_load_zu(&shard->stats->abandoned_vm, ATOMIC_RELAXED));
+
+	for (pszind_t i = 0; i < SC_NPSIZES; i++) {
+		size_t dirty, muzzy, retained, dirty_bytes, muzzy_bytes,
+		    retained_bytes;
+		dirty = ecache_nextents_get(&shard->ecache_dirty, i);
+		muzzy = ecache_nextents_get(&shard->ecache_muzzy, i);
+		retained = ecache_nextents_get(&shard->ecache_retained, i);
+		dirty_bytes = ecache_nbytes_get(&shard->ecache_dirty, i);
+		muzzy_bytes = ecache_nbytes_get(&shard->ecache_muzzy, i);
+		retained_bytes = ecache_nbytes_get(&shard->ecache_retained, i);
+
+		extent_stats_out[i].ndirty = dirty;
+		extent_stats_out[i].nmuzzy = muzzy;
+		extent_stats_out[i].nretained = retained;
+		extent_stats_out[i].dirty_bytes = dirty_bytes;
+		extent_stats_out[i].muzzy_bytes = muzzy_bytes;
+		extent_stats_out[i].retained_bytes = retained_bytes;
+	}
+}

From 07675840a5d41c2537de2bd16e8da1cd11ef48e9 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 12 Mar 2020 11:21:22 -0700
Subject: [PATCH 1672/2608] PA: Move in some more internals accesses.

---
 include/jemalloc/internal/pa.h | 18 ++++++++++++++++++
 src/arena.c                    | 34 +++++-----------------------------
 src/pa.c                       | 26 ++++++++++++++++++++++++++
 3 files changed, 49 insertions(+), 29 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 03ab6d0e..4156a4e8 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -153,6 +153,12 @@ pa_shard_muzzy_decay_ms_get(pa_shard_t *shard) {
 	return decay_ms_read(&shard->decay_muzzy);
 }
 
+static inline bool
+pa_shard_dont_decay_muzzy(pa_shard_t *shard) {
+	return ecache_npages_get(&shard->ecache_muzzy) == 0 &&
+	    pa_shard_muzzy_decay_ms_get(shard) <= 0;
+}
+
 static inline bool
 pa_shard_may_force_decay(pa_shard_t *shard) {
 	return !(pa_shard_dirty_decay_ms_get(shard) == -1
@@ -167,6 +173,18 @@ pa_shard_ehooks_get(pa_shard_t *shard) {
 /* Returns true on error. */
 bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
     pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx);
+/*
+ * This does the PA-specific parts of arena reset (i.e. freeing all active
+ * allocations).
+ */
+void pa_shard_reset(pa_shard_t *shard);
+/*
+ * Destroy all the remaining retained extents.  Should only be called after
+ * decaying all active, dirty, and muzzy extents to the retained state, as the
+ * last step in destroying the shard.
+ */
+void pa_shard_destroy_retained(tsdn_t *tsdn, pa_shard_t *shard);
+
 size_t pa_shard_extent_sn_next(pa_shard_t *shard);
 
 /* Gets an edata for the given allocation. */
diff --git a/src/arena.c b/src/arena.c
index fd2876e9..c9a46264 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -544,8 +544,7 @@ arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
 static bool
 arena_decay_muzzy(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
     bool all) {
-	if (ecache_npages_get(&arena->pa_shard.ecache_muzzy) == 0 &&
-	    arena_muzzy_decay_ms_get(arena) <= 0) {
+	if (pa_shard_dont_decay_muzzy(&arena->pa_shard)) {
 		return false;
 	}
 	return arena_decay_impl(tsdn, arena, &arena->pa_shard.decay_muzzy,
@@ -703,27 +702,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 			    &arena->bins[i].bin_shards[j]);
 		}
 	}
-
-	atomic_store_zu(&arena->pa_shard.nactive, 0, ATOMIC_RELAXED);
-}
-
-static void
-arena_destroy_retained(tsdn_t *tsdn, arena_t *arena) {
-	/*
-	 * Iterate over the retained extents and destroy them.  This gives the
-	 * extent allocator underlying the extent hooks an opportunity to unmap
-	 * all retained memory without having to keep its own metadata
-	 * structures.  In practice, virtual memory for dss-allocated extents is
-	 * leaked here, so best practice is to avoid dss for arenas to be
-	 * destroyed, or provide custom extent hooks that track retained
-	 * dss-based extents for later reuse.
-	 */
-	ehooks_t *ehooks = arena_get_ehooks(arena);
-	edata_t *edata;
-	while ((edata = ecache_evict(tsdn, &arena->pa_shard, ehooks,
-	    &arena->pa_shard.ecache_retained, 0)) != NULL) {
-		extent_destroy_wrapper(tsdn, &arena->pa_shard, ehooks, edata);
-	}
+	pa_shard_reset(&arena->pa_shard);
 }
 
 void
@@ -735,13 +714,10 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
 	/*
 	 * No allocations have occurred since arena_reset() was called.
 	 * Furthermore, the caller (arena_i_destroy_ctl()) purged all cached
-	 * extents, so only retained extents may remain.
+	 * extents, so only retained extents may remain and it's safe to call
+	 * pa_shard_destroy_retained.
 	 */
-	assert(ecache_npages_get(&arena->pa_shard.ecache_dirty) == 0);
-	assert(ecache_npages_get(&arena->pa_shard.ecache_muzzy) == 0);
-
-	/* Deallocate retained memory. */
-	arena_destroy_retained(tsd_tsdn(tsd), arena);
+	pa_shard_destroy_retained(tsd_tsdn(tsd), &arena->pa_shard);
 
 	/*
 	 * Remove the arena pointer from the arenas array.  We rely on the fact
diff --git a/src/pa.c b/src/pa.c
index 1b7d374c..a8aee216 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -65,6 +65,32 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
 	return false;
 }
 
+void
+pa_shard_reset(pa_shard_t *shard) {
+	atomic_store_zu(&shard->nactive, 0, ATOMIC_RELAXED);
+}
+
+void
+pa_shard_destroy_retained(tsdn_t *tsdn, pa_shard_t *shard) {
+	assert(ecache_npages_get(&shard->ecache_dirty) == 0);
+	assert(ecache_npages_get(&shard->ecache_muzzy) == 0);
+	/*
+	 * Iterate over the retained extents and destroy them.  This gives the
+	 * extent allocator underlying the extent hooks an opportunity to unmap
+	 * all retained memory without having to keep its own metadata
+	 * structures.  In practice, virtual memory for dss-allocated extents is
+	 * leaked here, so best practice is to avoid dss for arenas to be
+	 * destroyed, or provide custom extent hooks that track retained
+	 * dss-based extents for later reuse.
+	 */
+	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
+	edata_t *edata;
+	while ((edata = ecache_evict(tsdn, shard, ehooks,
+	    &shard->ecache_retained, 0)) != NULL) {
+		extent_destroy_wrapper(tsdn, shard, ehooks, edata);
+	}
+}
+
 size_t
 pa_shard_extent_sn_next(pa_shard_t *shard) {
 	return atomic_fetch_add_zu(&shard->extent_sn_next, 1, ATOMIC_RELAXED);

From daefde88fe960e2ff0756fac82f82512025bdf1d Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 12 Mar 2020 15:26:50 -0700
Subject: [PATCH 1673/2608] PA: Move in mutex stats reading.

---
 include/jemalloc/internal/pa.h |  9 +++++++++
 src/arena.c                    | 16 +++-------------
 src/pa_extra.c                 | 25 +++++++++++++++++++++++++
 3 files changed, 37 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 4156a4e8..05841549 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -260,4 +260,13 @@ void pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
     pa_shard_stats_t *shard_stats_out, pa_extent_stats_t *extent_stats_out,
     size_t *resident);
 
+/*
+ * Reads the PA-owned mutex stats into the output stats array, at the
+ * appropriate positions.  Morally, these stats should really live in
+ * pa_shard_stats_t, but the indices are sort of baked into the various mutex
+ * prof macros.  This would be a good thing to do at some point.
+ */
+void pa_shard_mtx_stats_read(tsdn_t *tsdn, pa_shard_t *shard,
+    mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes]);
+
 #endif /* JEMALLOC_INTERNAL_PA_H */
diff --git a/src/arena.c b/src/arena.c
index c9a46264..e96934a5 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -173,21 +173,11 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 
 	/* Gather per arena mutex profiling data. */
 	READ_ARENA_MUTEX_PROF_DATA(large_mtx, arena_prof_mutex_large);
-	READ_ARENA_MUTEX_PROF_DATA(pa_shard.edata_cache.mtx,
-	    arena_prof_mutex_extent_avail)
-	READ_ARENA_MUTEX_PROF_DATA(pa_shard.ecache_dirty.mtx,
-	    arena_prof_mutex_extents_dirty)
-	READ_ARENA_MUTEX_PROF_DATA(pa_shard.ecache_muzzy.mtx,
-	    arena_prof_mutex_extents_muzzy)
-	READ_ARENA_MUTEX_PROF_DATA(pa_shard.ecache_retained.mtx,
-	    arena_prof_mutex_extents_retained)
-	READ_ARENA_MUTEX_PROF_DATA(pa_shard.decay_dirty.mtx,
-	    arena_prof_mutex_decay_dirty)
-	READ_ARENA_MUTEX_PROF_DATA(pa_shard.decay_muzzy.mtx,
-	    arena_prof_mutex_decay_muzzy)
 	READ_ARENA_MUTEX_PROF_DATA(base->mtx,
-	    arena_prof_mutex_base)
+	    arena_prof_mutex_base);
 #undef READ_ARENA_MUTEX_PROF_DATA
+	pa_shard_mtx_stats_read(tsdn, &arena->pa_shard,
+	    astats->mutex_prof_data);
 
 	nstime_copy(&astats->uptime, &arena->create_time);
 	nstime_update(&astats->uptime);
diff --git a/src/pa_extra.c b/src/pa_extra.c
index 1088596e..1f90f7f7 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -127,3 +127,28 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
 		extent_stats_out[i].retained_bytes = retained_bytes;
 	}
 }
+
+static void
+pa_shard_mtx_stats_read_single(tsdn_t *tsdn, mutex_prof_data_t *mutex_prof_data,
+    malloc_mutex_t *mtx, int ind) {
+	malloc_mutex_lock(tsdn, mtx);
+	malloc_mutex_prof_read(tsdn, &mutex_prof_data[ind], mtx);
+	malloc_mutex_unlock(tsdn, mtx);
+}
+
+void
+pa_shard_mtx_stats_read(tsdn_t *tsdn, pa_shard_t *shard,
+    mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes]) {
+	pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
+	    &shard->edata_cache.mtx, arena_prof_mutex_extent_avail);
+	pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
+	    &shard->ecache_dirty.mtx, arena_prof_mutex_extents_dirty);
+	pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
+	    &shard->ecache_muzzy.mtx, arena_prof_mutex_extents_muzzy);
+	pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
+	    &shard->ecache_retained.mtx, arena_prof_mutex_extents_retained);
+	pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
+	    &shard->decay_dirty.mtx, arena_prof_mutex_decay_dirty);
+	pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
+	    &shard->decay_muzzy.mtx, arena_prof_mutex_decay_muzzy);
+}

From 45671e4a27740c85c83b248d0e7e3f45024fdc45 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 12 Mar 2020 15:51:13 -0700
Subject: [PATCH 1674/2608] PA: Move in retain growth limit setting.

---
 include/jemalloc/internal/pa.h | 12 ++++++++++++
 src/arena.c                    | 22 ++--------------------
 src/pa.c                       | 24 ++++++++++++++++++++++++
 3 files changed, 38 insertions(+), 20 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 05841549..ec6c8048 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -230,6 +230,18 @@ bool pa_maybe_decay_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
     pa_decay_purge_setting_t decay_purge_setting);
 
+/*
+ * Gets / sets the maximum amount that we'll grow an arena down the
+ * grow-retained pathways (unless forced to by an allocaction request).
+ *
+ * Set new_limit to NULL if it's just a query, or old_limit to NULL if you don't
+ * care about the previous value.
+ *
+ * Returns true on error (if the new limit is not valid).
+ */
+bool pa_shard_retain_grow_limit_get_set(tsdn_t *tsdn, pa_shard_t *shard,
+    size_t *old_limit, size_t *new_limit);
+
 /******************************************************************************/
 /*
  * Various bits of "boring" functionality that are still part of this module,
diff --git a/src/arena.c b/src/arena.c
index e96934a5..178cc9a9 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1411,26 +1411,8 @@ bool
 arena_retain_grow_limit_get_set(tsd_t *tsd, arena_t *arena, size_t *old_limit,
     size_t *new_limit) {
 	assert(opt_retain);
-
-	pszind_t new_ind JEMALLOC_CC_SILENCE_INIT(0);
-	if (new_limit != NULL) {
-		size_t limit = *new_limit;
-		/* Grow no more than the new limit. */
-		if ((new_ind = sz_psz2ind(limit + 1) - 1) >= SC_NPSIZES) {
-			return true;
-		}
-	}
-
-	malloc_mutex_lock(tsd_tsdn(tsd), &arena->pa_shard.ecache_grow.mtx);
-	if (old_limit != NULL) {
-		*old_limit = sz_pind2sz(arena->pa_shard.ecache_grow.limit);
-	}
-	if (new_limit != NULL) {
-		arena->pa_shard.ecache_grow.limit = new_ind;
-	}
-	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->pa_shard.ecache_grow.mtx);
-
-	return false;
+	return pa_shard_retain_grow_limit_get_set(tsd_tsdn(tsd),
+	    &arena->pa_shard, old_limit, new_limit);
 }
 
 unsigned
diff --git a/src/pa.c b/src/pa.c
index a8aee216..d4949f5e 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -395,3 +395,27 @@ pa_maybe_decay_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 
 	return epoch_advanced;
 }
+
+bool
+pa_shard_retain_grow_limit_get_set(tsdn_t *tsdn, pa_shard_t *shard,
+    size_t *old_limit, size_t *new_limit) {
+	pszind_t new_ind JEMALLOC_CC_SILENCE_INIT(0);
+	if (new_limit != NULL) {
+		size_t limit = *new_limit;
+		/* Grow no more than the new limit. */
+		if ((new_ind = sz_psz2ind(limit + 1) - 1) >= SC_NPSIZES) {
+			return true;
+		}
+	}
+
+	malloc_mutex_lock(tsdn, &shard->ecache_grow.mtx);
+	if (old_limit != NULL) {
+		*old_limit = sz_pind2sz(shard->ecache_grow.limit);
+	}
+	if (new_limit != NULL) {
+		shard->ecache_grow.limit = new_ind;
+	}
+	malloc_mutex_unlock(tsdn, &shard->ecache_grow.mtx);
+
+	return false;
+}

From faec7219b23303ec812e9aee6fc35352f936d10b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 12 Mar 2020 16:06:40 -0700
Subject: [PATCH 1675/2608] PA: Move in decay initialization.

---
 include/jemalloc/internal/pa.h |  3 ++-
 src/arena.c                    | 14 +++-----------
 src/pa.c                       | 10 +++++++++-
 3 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index ec6c8048..82676ee4 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -172,7 +172,8 @@ pa_shard_ehooks_get(pa_shard_t *shard) {
 
 /* Returns true on error. */
 bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
-    pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx);
+    pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx, nstime_t *cur_time,
+    ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms);
 /*
  * This does the PA-specific parts of arena reset (i.e. freeing all active
  * allocations).
diff --git a/src/arena.c b/src/arena.c
index 178cc9a9..fd19e770 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1487,19 +1487,11 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		goto label_error;
 	}
 
-	if (pa_shard_init(tsdn, &arena->pa_shard, base, ind,
-	    &arena->stats.pa_shard_stats, LOCKEDINT_MTX(arena->stats.mtx))) {
-		goto label_error;
-	}
-
 	nstime_t cur_time;
 	nstime_init_update(&cur_time);
-
-	if (decay_init(&arena->pa_shard.decay_dirty, &cur_time,
-	    arena_dirty_decay_ms_default_get())) {
-		goto label_error;
-	}
-	if (decay_init(&arena->pa_shard.decay_muzzy, &cur_time,
+	if (pa_shard_init(tsdn, &arena->pa_shard, base, ind,
+	    &arena->stats.pa_shard_stats, LOCKEDINT_MTX(arena->stats.mtx),
+	    &cur_time, arena_dirty_decay_ms_default_get(),
 	    arena_muzzy_decay_ms_default_get())) {
 		goto label_error;
 	}
diff --git a/src/pa.c b/src/pa.c
index d4949f5e..a1063b9e 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -14,7 +14,8 @@ pa_nactive_sub(pa_shard_t *shard, size_t sub_pages) {
 
 bool
 pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
-    pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx) {
+    pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx, nstime_t *cur_time,
+    ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 	/* This will change eventually, but for now it should hold. */
 	assert(base_ind_get(base) == ind);
 	/*
@@ -53,6 +54,13 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
 		return true;
 	}
 
+	if (decay_init(&shard->decay_dirty, cur_time, dirty_decay_ms)) {
+		return true;
+	}
+	if (decay_init(&shard->decay_muzzy, cur_time, muzzy_decay_ms)) {
+		return true;
+	}
+
 	atomic_store_zu(&shard->extent_sn_next, 0, ATOMIC_RELAXED);
 	atomic_store_zu(&shard->nactive, 0, ATOMIC_RELAXED);
 

From bd4fdf295ed5a56f433fa8d4a23d1273cc7ad156 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 13 Mar 2020 11:47:51 -0700
Subject: [PATCH 1676/2608] Rtree: Pull leaf contents into their own struct.

---
 include/jemalloc/internal/edata.h |  17 +++++
 include/jemalloc/internal/rtree.h | 118 +++++++++++++++++++-----------
 src/emap.c                        |  18 +++--
 test/unit/rtree.c                 |   7 +-
 4 files changed, 110 insertions(+), 50 deletions(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index 2a81bdc6..0a99ff00 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -39,6 +39,23 @@ struct e_prof_info_s {
 };
 typedef struct e_prof_info_s e_prof_info_t;
 
+/*
+ * The information about a particular edata that lives in an emap.  Space is
+ * more previous there (the information, plus the edata pointer, has to live in
+ * a 64-bit word if we want to enable a packed representation.
+ *
+ * There are two things that are special about the information here:
+ * - It's quicker to access.  You have one fewer pointer hop, since finding the
+ *   edata_t associated with an item always requires accessing the rtree leaf in
+ *   which this data is stored.
+ * - It can be read unsynchronized, and without worrying about lifetime issues.
+ */
+typedef struct edata_map_info_s edata_map_info_t;
+struct edata_map_info_s {
+	bool slab;
+	szind_t szind;
+};
+
 /* Extent (span of pages).  Use accessor functions for e_* fields. */
 typedef struct edata_s edata_t;
 typedef ql_head(edata_t) edata_list_t;
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 094cc1ad..1c2715d0 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -43,6 +43,13 @@ struct rtree_node_elm_s {
 	atomic_p_t	child; /* (rtree_{node,leaf}_elm_t *) */
 };
 
+typedef struct rtree_leaf_elm_contents_s rtree_leaf_elm_contents_t;
+struct rtree_leaf_elm_contents_s {
+	edata_t *edata;
+	szind_t szind;
+	bool slab;
+};
+
 struct rtree_leaf_elm_s {
 #ifdef RTREE_LEAF_COMPACT
 	/*
@@ -163,43 +170,53 @@ rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree,
 	    ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
 }
 
-JEMALLOC_ALWAYS_INLINE edata_t *
-rtree_leaf_elm_bits_edata_get(uintptr_t bits) {
+JEMALLOC_ALWAYS_INLINE uintptr_t
+rtree_leaf_elm_bits_encode(rtree_leaf_elm_contents_t contents) {
+	uintptr_t edata_bits = (uintptr_t)contents.edata
+	    & (((uintptr_t)1 << LG_VADDR) - 1);
+	uintptr_t szind_bits = (uintptr_t)contents.szind << LG_VADDR;
+	/*
+	 * Slab shares the low bit of edata; we know edata is on an even address
+	 * (in fact, it's 128 bytes on 64-bit systems; we can enforce this
+	 * alignment if we want to steal 6 extra rtree leaf bits someday.
+	 */
+	uintptr_t slab_bits = (uintptr_t)contents.slab;
+	return szind_bits | edata_bits | slab_bits;
+}
+
+JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_contents_t
+rtree_leaf_elm_bits_decode(uintptr_t bits) {
+	rtree_leaf_elm_contents_t contents;
+	/* Do the easy things first. */
+	contents.szind = bits >> LG_VADDR;
+	contents.slab = (bool)(bits & 1);
 #    ifdef __aarch64__
 	/*
 	 * aarch64 doesn't sign extend the highest virtual address bit to set
-	 * the higher ones.  Instead, the high bits gets zeroed.
+	 * the higher ones.  Instead, the high bits get zeroed.
 	 */
 	uintptr_t high_bit_mask = ((uintptr_t)1 << LG_VADDR) - 1;
 	/* Mask off the slab bit. */
 	uintptr_t low_bit_mask = ~(uintptr_t)1;
 	uintptr_t mask = high_bit_mask & low_bit_mask;
-	return (edata_t *)(bits & mask);
+	contents.edata = (edata_t *)(bits & mask);
 #    else
 	/* Restore sign-extended high bits, mask slab bit. */
-	return (edata_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB) >>
-	    RTREE_NHIB) & ~((uintptr_t)0x1));
+	contents.edata = (edata_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB)
+	    >> RTREE_NHIB) & ~((uintptr_t)0x1));
 #    endif
+	return contents;
 }
 
-JEMALLOC_ALWAYS_INLINE szind_t
-rtree_leaf_elm_bits_szind_get(uintptr_t bits) {
-	return (szind_t)(bits >> LG_VADDR);
-}
-
-JEMALLOC_ALWAYS_INLINE bool
-rtree_leaf_elm_bits_slab_get(uintptr_t bits) {
-	return (bool)(bits & (uintptr_t)0x1);
-}
-
-#  endif
+#  endif /* RTREE_LEAF_COMPACT */
 
 JEMALLOC_ALWAYS_INLINE edata_t *
 rtree_leaf_elm_edata_read(tsdn_t *tsdn, rtree_t *rtree,
     rtree_leaf_elm_t *elm, bool dependent) {
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
-	return rtree_leaf_elm_bits_edata_get(bits);
+	rtree_leaf_elm_contents_t contents = rtree_leaf_elm_bits_decode(bits);
+	return contents.edata;
 #else
 	edata_t *edata = (edata_t *)atomic_load_p(&elm->le_edata, dependent
 	    ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
@@ -212,7 +229,8 @@ rtree_leaf_elm_szind_read(tsdn_t *tsdn, rtree_t *rtree,
     rtree_leaf_elm_t *elm, bool dependent) {
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
-	return rtree_leaf_elm_bits_szind_get(bits);
+	rtree_leaf_elm_contents_t contents = rtree_leaf_elm_bits_decode(bits);
+	return contents.szind;
 #else
 	return (szind_t)atomic_load_u(&elm->le_szind, dependent ? ATOMIC_RELAXED
 	    : ATOMIC_ACQUIRE);
@@ -224,7 +242,8 @@ rtree_leaf_elm_slab_read(tsdn_t *tsdn, rtree_t *rtree,
     rtree_leaf_elm_t *elm, bool dependent) {
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
-	return rtree_leaf_elm_bits_slab_get(bits);
+	rtree_leaf_elm_contents_t contents = rtree_leaf_elm_bits_decode(bits);
+	return contents.slab;
 #else
 	return atomic_load_b(&elm->le_slab, dependent ? ATOMIC_RELAXED :
 	    ATOMIC_ACQUIRE);
@@ -236,9 +255,10 @@ rtree_leaf_elm_edata_write(tsdn_t *tsdn, rtree_t *rtree,
     rtree_leaf_elm_t *elm, edata_t *edata) {
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, true);
-	uintptr_t bits = ((uintptr_t)rtree_leaf_elm_bits_szind_get(old_bits) <<
-	    LG_VADDR) | ((uintptr_t)edata & (((uintptr_t)0x1 << LG_VADDR) - 1))
-	    | ((uintptr_t)rtree_leaf_elm_bits_slab_get(old_bits));
+	rtree_leaf_elm_contents_t contents = rtree_leaf_elm_bits_decode(
+	    old_bits);
+	contents.edata = edata;
+	uintptr_t bits = rtree_leaf_elm_bits_encode(contents);
 	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
 #else
 	atomic_store_p(&elm->le_edata, edata, ATOMIC_RELEASE);
@@ -253,10 +273,10 @@ rtree_leaf_elm_szind_write(tsdn_t *tsdn, rtree_t *rtree,
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm,
 	    true);
-	uintptr_t bits = ((uintptr_t)szind << LG_VADDR) |
-	    ((uintptr_t)rtree_leaf_elm_bits_edata_get(old_bits) &
-	    (((uintptr_t)0x1 << LG_VADDR) - 1)) |
-	    ((uintptr_t)rtree_leaf_elm_bits_slab_get(old_bits));
+	rtree_leaf_elm_contents_t contents = rtree_leaf_elm_bits_decode(
+	    old_bits);
+	contents.szind = szind;
+	uintptr_t bits = rtree_leaf_elm_bits_encode(contents);
 	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
 #else
 	atomic_store_u(&elm->le_szind, szind, ATOMIC_RELEASE);
@@ -269,9 +289,10 @@ rtree_leaf_elm_slab_write(tsdn_t *tsdn, rtree_t *rtree,
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm,
 	    true);
-	uintptr_t bits = ((uintptr_t)rtree_leaf_elm_bits_szind_get(old_bits) <<
-	    LG_VADDR) | ((uintptr_t)rtree_leaf_elm_bits_edata_get(old_bits) &
-	    (((uintptr_t)0x1 << LG_VADDR) - 1)) | ((uintptr_t)slab);
+	rtree_leaf_elm_contents_t contents = rtree_leaf_elm_bits_decode(
+	    old_bits);
+	contents.slab = slab;
+	uintptr_t bits = rtree_leaf_elm_bits_encode(contents);
 	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
 #else
 	atomic_store_b(&elm->le_slab, slab, ATOMIC_RELEASE);
@@ -280,11 +301,9 @@ rtree_leaf_elm_slab_write(tsdn_t *tsdn, rtree_t *rtree,
 
 static inline void
 rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, edata_t *edata, szind_t szind, bool slab) {
+    rtree_leaf_elm_t *elm, rtree_leaf_elm_contents_t contents) {
 #ifdef RTREE_LEAF_COMPACT
-	uintptr_t bits = ((uintptr_t)szind << LG_VADDR) |
-	    ((uintptr_t)edata & (((uintptr_t)0x1 << LG_VADDR) - 1)) |
-	    ((uintptr_t)slab);
+	uintptr_t bits = rtree_leaf_elm_bits_encode(contents);
 	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
 #else
 	rtree_leaf_elm_slab_write(tsdn, rtree, elm, slab);
@@ -382,7 +401,11 @@ rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
 	}
 
 	assert(rtree_leaf_elm_edata_read(tsdn, rtree, elm, false) == NULL);
-	rtree_leaf_elm_write(tsdn, rtree, elm, edata, szind, slab);
+	rtree_leaf_elm_contents_t contents;
+	contents.edata = edata;
+	contents.szind = szind;
+	contents.slab = slab;
+	rtree_leaf_elm_write(tsdn, rtree, elm, contents);
 
 	return false;
 }
@@ -437,9 +460,11 @@ rtree_edata_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree,
 	}
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
-	*r_edata = rtree_leaf_elm_bits_edata_get(bits);
-	*r_szind = rtree_leaf_elm_bits_szind_get(bits);
-	*r_slab = rtree_leaf_elm_bits_slab_get(bits);
+	rtree_leaf_elm_contents_t contents = rtree_leaf_elm_bits_decode(bits);
+
+	*r_edata = contents.edata;
+	*r_szind = contents.szind;
+	*r_slab = contents.slab;
 #else
 	*r_edata = rtree_leaf_elm_edata_read(tsdn, rtree, elm, dependent);
 	*r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent);
@@ -473,8 +498,10 @@ rtree_szind_slab_read_fast(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 #ifdef RTREE_LEAF_COMPACT
 		uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree,
 							  elm, true);
-		*r_szind = rtree_leaf_elm_bits_szind_get(bits);
-		*r_slab = rtree_leaf_elm_bits_slab_get(bits);
+		rtree_leaf_elm_contents_t contents = rtree_leaf_elm_bits_decode(
+		    bits);
+		*r_szind = contents.szind;
+		*r_slab = contents.slab;
 #else
 		*r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, true);
 		*r_slab = rtree_leaf_elm_slab_read(tsdn, rtree, elm, true);
@@ -494,8 +521,9 @@ rtree_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	}
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
-	*r_szind = rtree_leaf_elm_bits_szind_get(bits);
-	*r_slab = rtree_leaf_elm_bits_slab_get(bits);
+	rtree_leaf_elm_contents_t contents = rtree_leaf_elm_bits_decode(bits);
+	*r_szind = contents.szind;
+	*r_slab = contents.slab;
 #else
 	*r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent);
 	*r_slab = rtree_leaf_elm_slab_read(tsdn, rtree, elm, dependent);
@@ -518,7 +546,11 @@ rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, true);
 	assert(rtree_leaf_elm_edata_read(tsdn, rtree, elm, false) !=
 	    NULL);
-	rtree_leaf_elm_write(tsdn, rtree, elm, NULL, SC_NSIZES, false);
+	rtree_leaf_elm_contents_t contents;
+	contents.edata = NULL;
+	contents.szind = SC_NSIZES;
+	contents.slab = false;
+	rtree_leaf_elm_write(tsdn, rtree, elm, contents);
 }
 
 #endif /* JEMALLOC_INTERNAL_RTREE_H */
diff --git a/src/emap.c b/src/emap.c
index 723dfad2..98921df5 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -139,10 +139,13 @@ emap_rtree_leaf_elms_lookup(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
 static void
 emap_rtree_write_acquired(tsdn_t *tsdn, emap_t *emap, rtree_leaf_elm_t *elm_a,
     rtree_leaf_elm_t *elm_b, edata_t *edata, szind_t szind, bool slab) {
-	rtree_leaf_elm_write(tsdn, &emap->rtree, elm_a, edata, szind, slab);
+	rtree_leaf_elm_contents_t contents;
+	contents.edata = edata;
+	contents.szind = szind;
+	contents.slab = slab;
+	rtree_leaf_elm_write(tsdn, &emap->rtree, elm_a, contents);
 	if (elm_b != NULL) {
-		rtree_leaf_elm_write(tsdn, &emap->rtree, elm_b, edata, szind,
-		    slab);
+		rtree_leaf_elm_write(tsdn, &emap->rtree, elm_b, contents);
 	}
 }
 
@@ -292,15 +295,20 @@ emap_merge_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
 void
 emap_merge_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
     edata_t *lead, edata_t *trail) {
+	rtree_leaf_elm_contents_t clear_contents;
+	clear_contents.edata = NULL;
+	clear_contents.szind = SC_NSIZES;
+	clear_contents.slab = false;
+
 	if (prepare->lead_elm_b != NULL) {
 		rtree_leaf_elm_write(tsdn, &emap->rtree,
-		    prepare->lead_elm_b, NULL, SC_NSIZES, false);
+		    prepare->lead_elm_b, clear_contents);
 	}
 
 	rtree_leaf_elm_t *merged_b;
 	if (prepare->trail_elm_b != NULL) {
 		rtree_leaf_elm_write(tsdn, &emap->rtree,
-		    prepare->trail_elm_a, NULL, SC_NSIZES, false);
+		    prepare->trail_elm_a, clear_contents);
 		merged_b = prepare->trail_elm_b;
 	} else {
 		merged_b = prepare->trail_elm_a;
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 3545cfc0..1a842d77 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -137,8 +137,11 @@ TEST_BEGIN(test_rtree_random) {
 		    &rtree_ctx, keys[i], false, true);
 		expect_ptr_not_null(elm,
 		    "Unexpected rtree_leaf_elm_lookup() failure");
-		rtree_leaf_elm_write(tsdn, rtree, elm, &edata, SC_NSIZES,
-		    false);
+		rtree_leaf_elm_contents_t contents;
+		contents.edata = &edata;
+		contents.szind = SC_NSIZES;
+		contents.slab = false;
+		rtree_leaf_elm_write(tsdn, rtree, elm, contents);
 		expect_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx,
 		    keys[i], true), &edata,
 		    "rtree_edata_read() should return previously set value");

From 12eb888e54572c417c68495fa5be75d9f8402f81 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 13 Mar 2020 12:38:28 -0700
Subject: [PATCH 1677/2608] Edata: Add a ranged bit.

We steal the dumpable bit, which we ended up not needing.
---
 include/jemalloc/internal/edata.h | 51 ++++++++++++-------------------
 src/emap.c                        |  6 ++--
 src/extent.c                      | 14 +++++----
 src/extent_dss.c                  |  4 +--
 test/unit/rtree.c                 |  8 ++---
 test/unit/slab.c                  |  2 +-
 6 files changed, 38 insertions(+), 47 deletions(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index 0a99ff00..3a9ebc81 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -68,7 +68,7 @@ struct edata_s {
 	 * a: arena_ind
 	 * b: slab
 	 * c: committed
-	 * d: dumpable
+	 * r: ranged
 	 * z: zeroed
 	 * t: state
 	 * i: szind
@@ -76,7 +76,7 @@ struct edata_s {
 	 * s: bin_shard
 	 * n: sn
 	 *
-	 * nnnnnnnn ... nnnnnnss ssssffff ffffffii iiiiiitt zdcbaaaa aaaaaaaa
+	 * nnnnnnnn ... nnnnnnss ssssffff ffffffii iiiiiitt zrcbaaaa aaaaaaaa
 	 *
 	 * arena_ind: Arena from which this extent came, or all 1 bits if
 	 *            unassociated.
@@ -91,22 +91,10 @@ struct edata_s {
 	 *            as on a system that overcommits and satisfies physical
 	 *            memory needs on demand via soft page faults.
 	 *
-	 * dumpable: The dumpable flag indicates whether or not we've set the
-	 *           memory in question to be dumpable.  Note that this
-	 *           interacts somewhat subtly with user-specified extent hooks,
-	 *           since we don't know if *they* are fiddling with
-	 *           dumpability (in which case, we don't want to undo whatever
-	 *           they're doing).  To deal with this scenario, we:
-	 *             - Make dumpable false only for memory allocated with the
-	 *               default hooks.
-	 *             - Only allow memory to go from non-dumpable to dumpable,
-	 *               and only once.
-	 *             - Never make the OS call to allow dumping when the
-	 *               dumpable bit is already set.
-	 *           These three constraints mean that we will never
-	 *           accidentally dump user memory that the user meant to set
-	 *           nondumpable with their extent hooks.
-	 *
+	 * ranged: Whether or not this extent is currently owned by the range
+	 *         allocator.  This may be false even if the extent originally
+	 *         came from a range allocator; this indicates its *current*
+	 *         owner, not its original owner.
 	 *
 	 * zeroed: The zeroed flag is used by extent recycling code to track
 	 *         whether memory is zero-filled.
@@ -148,12 +136,12 @@ struct edata_s {
 #define EDATA_BITS_COMMITTED_SHIFT  (EDATA_BITS_SLAB_WIDTH + EDATA_BITS_SLAB_SHIFT)
 #define EDATA_BITS_COMMITTED_MASK  MASK(EDATA_BITS_COMMITTED_WIDTH, EDATA_BITS_COMMITTED_SHIFT)
 
-#define EDATA_BITS_DUMPABLE_WIDTH  1
-#define EDATA_BITS_DUMPABLE_SHIFT  (EDATA_BITS_COMMITTED_WIDTH + EDATA_BITS_COMMITTED_SHIFT)
-#define EDATA_BITS_DUMPABLE_MASK  MASK(EDATA_BITS_DUMPABLE_WIDTH, EDATA_BITS_DUMPABLE_SHIFT)
+#define EDATA_BITS_RANGED_WIDTH  1
+#define EDATA_BITS_RANGED_SHIFT  (EDATA_BITS_COMMITTED_WIDTH + EDATA_BITS_COMMITTED_SHIFT)
+#define EDATA_BITS_RANGED_MASK  MASK(EDATA_BITS_RANGED_WIDTH, EDATA_BITS_RANGED_SHIFT)
 
 #define EDATA_BITS_ZEROED_WIDTH  1
-#define EDATA_BITS_ZEROED_SHIFT  (EDATA_BITS_DUMPABLE_WIDTH + EDATA_BITS_DUMPABLE_SHIFT)
+#define EDATA_BITS_ZEROED_SHIFT  (EDATA_BITS_RANGED_WIDTH + EDATA_BITS_RANGED_SHIFT)
 #define EDATA_BITS_ZEROED_MASK  MASK(EDATA_BITS_ZEROED_WIDTH, EDATA_BITS_ZEROED_SHIFT)
 
 #define EDATA_BITS_STATE_WIDTH  2
@@ -283,9 +271,9 @@ edata_committed_get(const edata_t *edata) {
 }
 
 static inline bool
-edata_dumpable_get(const edata_t *edata) {
-	return (bool)((edata->e_bits & EDATA_BITS_DUMPABLE_MASK) >>
-	    EDATA_BITS_DUMPABLE_SHIFT);
+edata_ranged_get(const edata_t *edata) {
+	return (bool)((edata->e_bits & EDATA_BITS_RANGED_MASK) >>
+	    EDATA_BITS_RANGED_SHIFT);
 }
 
 static inline bool
@@ -479,9 +467,9 @@ edata_committed_set(edata_t *edata, bool committed) {
 }
 
 static inline void
-edata_dumpable_set(edata_t *edata, bool dumpable) {
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_DUMPABLE_MASK) |
-	    ((uint64_t)dumpable << EDATA_BITS_DUMPABLE_SHIFT);
+edata_ranged_set(edata_t *edata, bool ranged) {
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_RANGED_MASK) |
+	    ((uint64_t)ranged << EDATA_BITS_RANGED_SHIFT);
 }
 
 static inline void
@@ -522,8 +510,9 @@ edata_is_head_set(edata_t *edata, bool is_head) {
 static inline void
 edata_init(edata_t *edata, unsigned arena_ind, void *addr, size_t size,
     bool slab, szind_t szind, size_t sn, extent_state_t state, bool zeroed,
-    bool committed, bool dumpable, extent_head_state_t is_head) {
+    bool committed, bool ranged, extent_head_state_t is_head) {
 	assert(addr == PAGE_ADDR2BASE(addr) || !slab);
+	assert(ranged == false);
 
 	edata_arena_ind_set(edata, arena_ind);
 	edata_addr_set(edata, addr);
@@ -534,7 +523,7 @@ edata_init(edata_t *edata, unsigned arena_ind, void *addr, size_t size,
 	edata_state_set(edata, state);
 	edata_zeroed_set(edata, zeroed);
 	edata_committed_set(edata, committed);
-	edata_dumpable_set(edata, dumpable);
+	edata_ranged_set(edata, ranged);
 	ql_elm_new(edata, ql_link);
 	edata_is_head_set(edata, is_head == EXTENT_IS_HEAD);
 	if (config_prof) {
@@ -553,7 +542,7 @@ edata_binit(edata_t *edata, void *addr, size_t bsize, size_t sn) {
 	edata_state_set(edata, extent_state_active);
 	edata_zeroed_set(edata, true);
 	edata_committed_set(edata, true);
-	edata_dumpable_set(edata, true);
+	edata_ranged_set(edata, false);
 }
 
 static inline void
diff --git a/src/emap.c b/src/emap.c
index 98921df5..a227ad10 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -246,11 +246,11 @@ emap_split_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
 	    (void *)((uintptr_t)edata_base_get(edata) + size_a), size_b,
 	    slab_b, szind_b, edata_sn_get(edata), edata_state_get(edata),
 	    edata_zeroed_get(edata), edata_committed_get(edata),
-	    edata_dumpable_get(edata), EXTENT_NOT_HEAD);
+	    edata_ranged_get(edata), EXTENT_NOT_HEAD);
 
 	/*
-	 * We use incorrect constants for things like arena ind, zero, dump, and
-	 * commit state, and head status.  This is a fake edata_t, used to
+	 * We use incorrect constants for things like arena ind, zero, ranged,
+	 * and commit state, and head status.  This is a fake edata_t, used to
 	 * facilitate a lookup.
 	 */
 	edata_t lead;
diff --git a/src/extent.c b/src/extent.c
index 05d1755e..7f2d8833 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -80,7 +80,7 @@ ecache_alloc(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	bool commit = true;
 	edata_t *edata = extent_recycle(tsdn, shard, ehooks, ecache,
 	    new_addr, size, alignment, slab, szind, zero, &commit, false);
-	assert(edata == NULL || edata_dumpable_get(edata));
+	assert(edata == NULL || !edata_ranged_get(edata));
 	return edata;
 }
 
@@ -110,7 +110,7 @@ ecache_alloc_grow(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		    size, alignment, slab, szind, zero, &commit);
 	}
 
-	assert(edata == NULL || edata_dumpable_get(edata));
+	assert(edata == NULL || !edata_ranged_get(edata));
 	return edata;
 }
 
@@ -119,7 +119,7 @@ ecache_dalloc(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata) {
 	assert(edata_base_get(edata) != NULL);
 	assert(edata_size_get(edata) != 0);
-	assert(edata_dumpable_get(edata));
+	assert(!edata_ranged_get(edata));
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
@@ -661,7 +661,8 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 
 	edata_init(edata, ecache_ind_get(&shard->ecache_retained), ptr,
 	    alloc_size, false, SC_NSIZES, pa_shard_extent_sn_next(shard),
-	    extent_state_active, zeroed, committed, true, EXTENT_IS_HEAD);
+	    extent_state_active, zeroed, committed, /* ranged */ false,
+	    EXTENT_IS_HEAD);
 
 	if (extent_register_no_gdump_add(tsdn, edata)) {
 		edata_cache_put(tsdn, &shard->edata_cache, edata);
@@ -814,7 +815,8 @@ extent_alloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	}
 	edata_init(edata, ecache_ind_get(&shard->ecache_dirty), addr,
 	    size, slab, szind, pa_shard_extent_sn_next(shard),
-	    extent_state_active, *zero, *commit, true, EXTENT_NOT_HEAD);
+	    extent_state_active, *zero, *commit, /* ranged */ false,
+	    EXTENT_NOT_HEAD);
 	if (extent_register(tsdn, edata)) {
 		edata_cache_put(tsdn, &shard->edata_cache, edata);
 		return NULL;
@@ -1059,7 +1061,7 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 void
 extent_dalloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     edata_t *edata) {
-	assert(edata_dumpable_get(edata));
+	assert(!edata_ranged_get(edata));
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
diff --git a/src/extent_dss.c b/src/extent_dss.c
index de6852e0..18b68952 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -157,7 +157,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 				    gap_addr_page, gap_size_page, false,
 				    SC_NSIZES, pa_shard_extent_sn_next(
 					&arena->pa_shard), extent_state_active,
-				    false, true, true, EXTENT_NOT_HEAD);
+				    false, true, false, EXTENT_NOT_HEAD);
 			}
 			/*
 			 * Compute the address just past the end of the desired
@@ -206,7 +206,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					    arena_ind_get(arena), ret, size,
 					    size, false, SC_NSIZES,
 					    extent_state_active, false, true,
-					    true, EXTENT_NOT_HEAD);
+					    false, EXTENT_NOT_HEAD);
 					if (extent_purge_forced_wrapper(tsdn,
 					    ehooks, &edata, 0, size)) {
 						memset(ret, 0, size);
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 1a842d77..01e710c5 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -35,9 +35,9 @@ TEST_BEGIN(test_rtree_extrema) {
 	edata_t edata_a, edata_b;
 	edata_init(&edata_a, INVALID_ARENA_IND, NULL, SC_LARGE_MINCLASS,
 	    false, sz_size2index(SC_LARGE_MINCLASS), 0,
-	    extent_state_active, false, false, true, EXTENT_NOT_HEAD);
+	    extent_state_active, false, false, false, EXTENT_NOT_HEAD);
 	edata_init(&edata_b, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
-	    extent_state_active, false, false, true, EXTENT_NOT_HEAD);
+	    extent_state_active, false, false, false, EXTENT_NOT_HEAD);
 
 	tsdn_t *tsdn = tsdn_fetch();
 
@@ -80,7 +80,7 @@ TEST_BEGIN(test_rtree_bits) {
 
 	edata_t edata;
 	edata_init(&edata, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
-	    extent_state_active, false, false, true, EXTENT_NOT_HEAD);
+	    extent_state_active, false, false, false, EXTENT_NOT_HEAD);
 
 	rtree_t *rtree = &test_rtree;
 	rtree_ctx_t rtree_ctx;
@@ -126,7 +126,7 @@ TEST_BEGIN(test_rtree_random) {
 
 	edata_t edata;
 	edata_init(&edata, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
-	    extent_state_active, false, false, true, EXTENT_NOT_HEAD);
+	    extent_state_active, false, false, false, EXTENT_NOT_HEAD);
 
 	expect_false(rtree_new(rtree, base, false),
 	    "Unexpected rtree_new() failure");
diff --git a/test/unit/slab.c b/test/unit/slab.c
index 304a93d4..5ca8c441 100644
--- a/test/unit/slab.c
+++ b/test/unit/slab.c
@@ -12,7 +12,7 @@ TEST_BEGIN(test_arena_slab_regind) {
 		edata_init(&slab, INVALID_ARENA_IND,
 		    mallocx(bin_info->slab_size, MALLOCX_LG_ALIGN(LG_PAGE)),
 		    bin_info->slab_size, true,
-		    binind, 0, extent_state_active, false, true, true,
+		    binind, 0, extent_state_active, false, true, false,
 		    EXTENT_NOT_HEAD);
 		expect_ptr_not_null(edata_addr_get(&slab),
 		    "Unexpected malloc() failure");

From dfef0df71a956338c3bb4a902a288ee550409c3b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 13 Mar 2020 18:34:46 -0700
Subject: [PATCH 1678/2608] Emap: Move edata modification out of emap_remap.

---
 src/arena.c  | 6 ++++--
 src/emap.c   | 1 -
 src/extent.c | 1 +
 src/pa.c     | 1 +
 4 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index fd19e770..c70b1284 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1067,7 +1067,8 @@ arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize) {
 	edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
 
 	szind_t szind = sz_size2index(usize);
-	emap_remap(tsdn, &emap_global, edata, szind, false);
+	edata_szind_set(edata, szind);
+	emap_remap(tsdn, &emap_global, edata, szind, /* slab */ false);
 
 	prof_idump_rollback(tsdn, usize);
 
@@ -1079,7 +1080,8 @@ arena_prof_demote(tsdn_t *tsdn, edata_t *edata, const void *ptr) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	emap_remap(tsdn, &emap_global, edata, SC_NBINS, false);
+	edata_szind_set(edata, SC_NBINS);
+	emap_remap(tsdn, &emap_global, edata, SC_NBINS, /* slab */ false);
 
 	assert(isalloc(tsdn, ptr) == SC_LARGE_MINCLASS);
 
diff --git a/src/emap.c b/src/emap.c
index a227ad10..11e4f4a1 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -206,7 +206,6 @@ void emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
     bool slab) {
 	EMAP_DECLARE_RTREE_CTX;
 
-	edata_szind_set(edata, szind);
 	if (szind != SC_NSIZES) {
 		rtree_szind_slab_update(tsdn, &emap->rtree, rtree_ctx,
 		    (uintptr_t)edata_addr_get(edata), szind, slab);
diff --git a/src/extent.c b/src/extent.c
index 7f2d8833..0d87cffa 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -487,6 +487,7 @@ extent_split_interior(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	}
 
 	if (leadsize == 0 && trailsize == 0) {
+		edata_szind_set(*edata, szind);
 		emap_remap(tsdn, &emap_global, *edata, szind, slab);
 	}
 
diff --git a/src/pa.c b/src/pa.c
index a1063b9e..2809630e 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -185,6 +185,7 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 		    ATOMIC_RELAXED);
 	}
 	pa_nactive_add(shard, expand_amount >> LG_PAGE);
+	edata_szind_set(edata, szind);
 	emap_remap(tsdn, &emap_global, edata, szind, slab);
 	return false;
 }

From 0c96a2f03bcb741b1c29fd1a3af3044a03a8ac08 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 13 Mar 2020 18:45:54 -0700
Subject: [PATCH 1679/2608] Emap: Move out remaining edata modifications.

---
 src/emap.c   | 10 ----------
 src/extent.c |  9 +++++++++
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/src/emap.c b/src/emap.c
index 11e4f4a1..95ff7b34 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -272,9 +272,6 @@ void
 emap_split_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
     edata_t *lead, size_t size_a, szind_t szind_a, bool slab_a, edata_t *trail,
     size_t size_b, szind_t szind_b, bool slab_b) {
-	edata_size_set(lead, size_a);
-	edata_szind_set(lead, szind_a);
-
 	emap_rtree_write_acquired(tsdn, emap, prepare->lead_elm_a,
 	    prepare->lead_elm_b, lead, szind_a, slab_a);
 	emap_rtree_write_acquired(tsdn, emap, prepare->trail_elm_a,
@@ -313,13 +310,6 @@ emap_merge_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
 		merged_b = prepare->trail_elm_a;
 	}
 
-	edata_size_set(lead, edata_size_get(lead) + edata_size_get(trail));
-	edata_szind_set(lead, SC_NSIZES);
-	edata_sn_set(lead, (edata_sn_get(lead) < edata_sn_get(trail)) ?
-	    edata_sn_get(lead) : edata_sn_get(trail));
-	edata_zeroed_set(lead, edata_zeroed_get(lead)
-	    && edata_zeroed_get(trail));
-
 	emap_rtree_write_acquired(tsdn, emap, prepare->lead_elm_a, merged_b,
 	    lead, SC_NSIZES, false);
 }
diff --git a/src/extent.c b/src/extent.c
index 0d87cffa..b0db91dc 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1229,6 +1229,8 @@ extent_split_impl(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
 		goto label_error_c;
 	}
 
+	edata_size_set(edata, size_a);
+	edata_szind_set(edata, szind_a);
 	emap_split_commit(tsdn, &emap_global, &prepare, edata, size_a, szind_a,
 	    slab_a, trail, size_b, szind_b, slab_b);
 
@@ -1278,6 +1280,13 @@ extent_merge_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_cache_t *edata_cache,
 	emap_merge_prepare(tsdn, &emap_global, &prepare, a, b);
 
 	emap_lock_edata2(tsdn, &emap_global, a, b);
+
+	edata_size_set(a, edata_size_get(a) + edata_size_get(b));
+	edata_szind_set(a, SC_NSIZES);
+	edata_sn_set(a, (edata_sn_get(a) < edata_sn_get(b)) ?
+	    edata_sn_get(a) : edata_sn_get(b));
+	edata_zeroed_set(a, edata_zeroed_get(a) && edata_zeroed_get(b));
+
 	emap_merge_commit(tsdn, &emap_global, &prepare, a, b);
 	emap_unlock_edata2(tsdn, &emap_global, a, b);
 

From 883ab327cca593de320f781e3c654e8b716a4786 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 13 Mar 2020 19:33:30 -0700
Subject: [PATCH 1680/2608] Emap: Move out last edata state touching.

---
 include/jemalloc/internal/emap.h | 15 ++++++---------
 src/emap.c                       | 11 -----------
 src/extent.c                     |  5 +++++
 3 files changed, 11 insertions(+), 20 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index b9f6bc06..8c7713ce 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -87,12 +87,10 @@ struct emap_prepare_s {
 };
 
 /**
- * These functions do some of the metadata management for merging, splitting,
- * and reusing extents.  In particular, they set the boundary mappings from
- * addresses to edatas and fill in the szind, size, and slab values for the
- * output edata (and, for splitting, *all* values for the trail).  If the result
- * is going to be used as a slab, you still need to call emap_register_interior
- * on it, though.
+ * These functions the emap metadata management for merging, splitting, and
+ * reusing extents.  In particular, they set the boundary mappings from
+ * addresses to edatas.  If the result is going to be used as a slab, you
+ * still need to call emap_register_interior on it, though.
  *
  * Remap simply changes the szind and slab status of an extent's boundary
  * mappings.  If the extent is not a slab, it doesn't bother with updating the
@@ -107,9 +105,8 @@ struct emap_prepare_s {
  * and it returns true on failure (at which point the caller shouldn't commit).
  *
  * In all cases, "lead" refers to the lower-addressed extent, and trail to the
- * higher-addressed one.  Trail can contain garbage (except for its arena_ind
- * and esn values) data for the split variants, and can be reused for any
- * purpose by its given arena after a merge or a failed split.
+ * higher-addressed one.  It's the caller's responsibility to set the edata
+ * state appropriately.
  */
 void emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
     bool slab);
diff --git a/src/emap.c b/src/emap.c
index 95ff7b34..c79dafa7 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -228,7 +228,6 @@ void emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
 			    szind, slab);
 			}
 		}
-
 }
 
 bool
@@ -236,16 +235,6 @@ emap_split_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
     edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a, edata_t *trail,
     size_t size_b, szind_t szind_b, bool slab_b) {
 	EMAP_DECLARE_RTREE_CTX;
-	/*
-	 * Note that while the trail mostly inherits its attributes from the
-	 * extent to be split, it maintains its own arena ind -- this allows
-	 * cross-arena edata interactions, such as occur in the range ecache.
-	 */
-	edata_init(trail, edata_arena_ind_get(trail),
-	    (void *)((uintptr_t)edata_base_get(edata) + size_a), size_b,
-	    slab_b, szind_b, edata_sn_get(edata), edata_state_get(edata),
-	    edata_zeroed_get(edata), edata_committed_get(edata),
-	    edata_ranged_get(edata), EXTENT_NOT_HEAD);
 
 	/*
 	 * We use incorrect constants for things like arena ind, zero, ranged,
diff --git a/src/extent.c b/src/extent.c
index b0db91dc..33179939 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1213,6 +1213,11 @@ extent_split_impl(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
 		goto label_error_a;
 	}
 
+	edata_init(trail, edata_arena_ind_get(edata),
+	    (void *)((uintptr_t)edata_base_get(edata) + size_a), size_b,
+	    slab_b, szind_b, edata_sn_get(edata), edata_state_get(edata),
+	    edata_zeroed_get(edata), edata_committed_get(edata),
+	    edata_ranged_get(edata), EXTENT_NOT_HEAD);
 	emap_prepare_t prepare;
 	bool err = emap_split_prepare(tsdn, &emap_global, &prepare, edata,
 	    size_a, szind_a, slab_a, trail, size_b, szind_b, slab_b);

From 7bb6e2dc0d526bac72d2ed531ddb60fd10a5a5e4 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sat, 14 Mar 2020 09:46:09 -0700
Subject: [PATCH 1681/2608] Eset: take opt_lg_max_active_fit as a parameter.

This breaks its dependence on the global.
---
 include/jemalloc/internal/eset.h |  2 +-
 src/eset.c                       | 29 ++++++++++++++++++-----------
 src/extent.c                     | 14 ++++++++++++--
 3 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index 5c1051a6..d051b81b 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -52,6 +52,6 @@ void eset_remove(eset_t *eset, edata_t *edata);
  * null if no such item could be found.
  */
 edata_t *eset_fit(eset_t *eset, size_t esize, size_t alignment,
-    bool delay_coalesce);
+    unsigned lg_max_fit);
 
 #endif /* JEMALLOC_INTERNAL_ESET_H */
diff --git a/src/eset.c b/src/eset.c
index 16ca72d1..12a57aff 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -154,9 +154,15 @@ eset_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
 /*
  * Do first-fit extent selection, i.e. select the oldest/lowest extent that is
  * large enough.
+ *
+ * lg_max_fit is the (log of the) maximum ratio between the requested size and
+ * the returned size that we'll allow.  This can reduce fragmentation by
+ * avoiding reusing and splitting large extents for smaller sizes.  In practice,
+ * it's set to opt_lg_extent_max_active_fit for the dirty eset and SC_PTR_BITS
+ * for others.
  */
 static edata_t *
-eset_first_fit(eset_t *eset, size_t size, bool delay_coalesce) {
+eset_first_fit(eset_t *eset, size_t size, unsigned lg_max_fit) {
 	edata_t *ret = NULL;
 
 	pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(size));
@@ -178,14 +184,15 @@ eset_first_fit(eset_t *eset, size_t size, bool delay_coalesce) {
 		assert(!edata_heap_empty(&eset->heaps[i]));
 		edata_t *edata = edata_heap_first(&eset->heaps[i]);
 		assert(edata_size_get(edata) >= size);
-		/*
-		 * In order to reduce fragmentation, avoid reusing and splitting
-		 * large eset for much smaller sizes.
-		 *
-		 * Only do check for dirty eset (delay_coalesce).
-		 */
-		if (delay_coalesce &&
-		    (sz_pind2sz(i) >> opt_lg_extent_max_active_fit) > size) {
+		if (lg_max_fit == SC_PTR_BITS) {
+			/*
+			 * We'll shift by this below, and shifting out all the
+			 * bits is undefined.  Decreasing is safe, since the
+			 * page size is larger than 1 byte.
+			 */
+			lg_max_fit = SC_PTR_BITS - 1;
+		}
+		if ((sz_pind2sz(i) >> lg_max_fit) > size) {
 			break;
 		}
 		if (ret == NULL || edata_snad_comp(edata, ret) < 0) {
@@ -201,14 +208,14 @@ eset_first_fit(eset_t *eset, size_t size, bool delay_coalesce) {
 }
 
 edata_t *
-eset_fit(eset_t *eset, size_t esize, size_t alignment, bool delay_coalesce) {
+eset_fit(eset_t *eset, size_t esize, size_t alignment, unsigned lg_max_fit) {
 	size_t max_size = esize + PAGE_CEILING(alignment) - PAGE;
 	/* Beware size_t wrap-around. */
 	if (max_size < esize) {
 		return NULL;
 	}
 
-	edata_t *edata = eset_first_fit(eset, max_size, delay_coalesce);
+	edata_t *edata = eset_first_fit(eset, max_size, lg_max_fit);
 
 	if (alignment > PAGE && edata == NULL) {
 		/*
diff --git a/src/extent.c b/src/extent.c
index 33179939..e570ed59 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -398,8 +398,18 @@ extent_recycle_extract(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 			emap_unlock_edata(tsdn, &emap_global, unlock_edata);
 		}
 	} else {
-		edata = eset_fit(&ecache->eset, size, alignment,
-		    ecache->delay_coalesce);
+		/*
+		 * A large extent might be broken up from its original size to
+		 * some small size to satisfy a small request.  When that small
+		 * request is freed, though, it won't merge back with the larger
+		 * extent if delayed coalescing is on.  The large extent can
+		 * then no longer satify a request for its original size.  To
+		 * limit this effect, when delayed coalescing is enabled, we
+		 * put a cap on how big an extent we can split for a request.
+		 */
+		unsigned lg_max_fit = ecache->delay_coalesce
+		    ? (unsigned)opt_lg_extent_max_active_fit : SC_PTR_BITS;
+		edata = eset_fit(&ecache->eset, size, alignment, lg_max_fit);
 	}
 	if (edata == NULL) {
 		malloc_mutex_unlock(tsdn, &ecache->mtx);

From f730577277ace08287bb8eedce75e49d35aeb0ba Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sat, 14 Mar 2020 10:05:12 -0700
Subject: [PATCH 1682/2608] Eset: Parameterize last globals accesses.

I.e. opt_retain and maps_coalesce.
---
 include/jemalloc/internal/eset.h |  2 +-
 src/eset.c                       | 16 ++++++----------
 src/extent.c                     |  8 +++++++-
 3 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index d051b81b..e29179d1 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -51,7 +51,7 @@ void eset_remove(eset_t *eset, edata_t *edata);
  * Select an extent from this eset of the given size and alignment.  Returns
  * null if no such item could be found.
  */
-edata_t *eset_fit(eset_t *eset, size_t esize, size_t alignment,
+edata_t *eset_fit(eset_t *eset, size_t esize, size_t alignment, bool exact_only,
     unsigned lg_max_fit);
 
 #endif /* JEMALLOC_INTERNAL_ESET_H */
diff --git a/src/eset.c b/src/eset.c
index 12a57aff..c4e39d25 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -2,8 +2,6 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/eset.h"
-/* For opt_retain */
-#include "jemalloc/internal/extent_mmap.h"
 
 const bitmap_info_t eset_bitmap_info =
     BITMAP_INFO_INITIALIZER(SC_NPSIZES+1);
@@ -162,16 +160,13 @@ eset_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
  * for others.
  */
 static edata_t *
-eset_first_fit(eset_t *eset, size_t size, unsigned lg_max_fit) {
+eset_first_fit(eset_t *eset, size_t size, bool exact_only,
+    unsigned lg_max_fit) {
 	edata_t *ret = NULL;
 
 	pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(size));
 
-	if (!maps_coalesce && !opt_retain) {
-		/*
-		 * No split / merge allowed (Windows w/o retain). Try exact fit
-		 * only.
-		 */
+	if (exact_only) {
 		return edata_heap_empty(&eset->heaps[pind]) ? NULL :
 		    edata_heap_first(&eset->heaps[pind]);
 	}
@@ -208,14 +203,15 @@ eset_first_fit(eset_t *eset, size_t size, unsigned lg_max_fit) {
 }
 
 edata_t *
-eset_fit(eset_t *eset, size_t esize, size_t alignment, unsigned lg_max_fit) {
+eset_fit(eset_t *eset, size_t esize, size_t alignment, bool exact_only,
+    unsigned lg_max_fit) {
 	size_t max_size = esize + PAGE_CEILING(alignment) - PAGE;
 	/* Beware size_t wrap-around. */
 	if (max_size < esize) {
 		return NULL;
 	}
 
-	edata_t *edata = eset_first_fit(eset, max_size, lg_max_fit);
+	edata_t *edata = eset_first_fit(eset, max_size, exact_only, lg_max_fit);
 
 	if (alignment > PAGE && edata == NULL) {
 		/*
diff --git a/src/extent.c b/src/extent.c
index e570ed59..db658bb6 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -398,6 +398,11 @@ extent_recycle_extract(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 			emap_unlock_edata(tsdn, &emap_global, unlock_edata);
 		}
 	} else {
+		/*
+		 * If split and merge are not allowed (Windows w/o retain), try
+		 * exact fit only.
+		 */
+		bool exact_only = (!maps_coalesce && !opt_retain);
 		/*
 		 * A large extent might be broken up from its original size to
 		 * some small size to satisfy a small request.  When that small
@@ -409,7 +414,8 @@ extent_recycle_extract(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		 */
 		unsigned lg_max_fit = ecache->delay_coalesce
 		    ? (unsigned)opt_lg_extent_max_active_fit : SC_PTR_BITS;
-		edata = eset_fit(&ecache->eset, size, alignment, lg_max_fit);
+		edata = eset_fit(&ecache->eset, size, alignment, exact_only,
+		    lg_max_fit);
 	}
 	if (edata == NULL) {
 		malloc_mutex_unlock(tsdn, &ecache->mtx);

From 294b276fc7b03319bbc829cef5de7dfec71f997c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sat, 14 Mar 2020 10:49:34 -0700
Subject: [PATCH 1683/2608] PA: Parameterize emap.  Move emap_global to arena.

This lets us test the PA module without interfering with the global emap used by
the real allocator (the one not under test).
---
 include/jemalloc/internal/arena_externs.h   |   1 +
 include/jemalloc/internal/arena_inlines_b.h |  52 +++--
 include/jemalloc/internal/emap.h            |   2 -
 include/jemalloc/internal/extent.h          |   8 +-
 include/jemalloc/internal/pa.h              |  11 +-
 src/arena.c                                 |  20 +-
 src/ctl.c                                   |   2 +-
 src/ehooks.c                                |   8 +-
 src/emap.c                                  |   2 -
 src/extent.c                                | 223 ++++++++++----------
 src/inspect.c                               |   4 +-
 src/jemalloc.c                              |  25 ++-
 src/large.c                                 |   2 +-
 src/pa.c                                    |  17 +-
 src/prof.c                                  |   3 +-
 src/tcache.c                                |   5 +-
 test/unit/arena_reset.c                     |   4 +-
 test/unit/binshard.c                        |   4 +-
 test/unit/prof_recent.c                     |   2 +-
 19 files changed, 211 insertions(+), 184 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 24634958..9fea729d 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -15,6 +15,7 @@ extern const char *percpu_arena_mode_names[];
 
 extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS];
 extern malloc_mutex_t arenas_lock;
+extern emap_t arena_emap_global;
 
 extern size_t opt_oversize_threshold;
 extern size_t oversize_threshold;
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 565e2262..7351db98 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -48,10 +48,12 @@ arena_prof_info_get(tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx,
 
 	/* Static check. */
 	if (alloc_ctx == NULL) {
-		edata = emap_edata_lookup(tsd_tsdn(tsd), &emap_global, ptr);
+		edata = emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global,
+		    ptr);
 		is_slab = edata_slab_get(edata);
 	} else if (unlikely(!(is_slab = alloc_ctx->slab))) {
-		edata = emap_edata_lookup(tsd_tsdn(tsd), &emap_global, ptr);
+		edata = emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global,
+		    ptr);
 	}
 
 	if (unlikely(!is_slab)) {
@@ -75,15 +77,15 @@ arena_prof_tctx_reset(tsd_t *tsd, const void *ptr,
 
 	/* Static check. */
 	if (alloc_ctx == NULL) {
-		edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd), &emap_global,
-		    ptr);
+		edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd),
+		    &arena_emap_global, ptr);
 		if (unlikely(!edata_slab_get(edata))) {
 			large_prof_tctx_reset(edata);
 		}
 	} else {
 		if (unlikely(!alloc_ctx->slab)) {
 			edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd),
-			    &emap_global, ptr);
+			    &arena_emap_global, ptr);
 			large_prof_tctx_reset(edata);
 		}
 	}
@@ -94,7 +96,8 @@ arena_prof_tctx_reset_sampled(tsd_t *tsd, const void *ptr) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd), &emap_global, ptr);
+	edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global,
+	    ptr);
 	assert(!edata_slab_get(edata));
 
 	large_prof_tctx_reset(edata);
@@ -157,7 +160,7 @@ arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
 
 JEMALLOC_ALWAYS_INLINE arena_t *
 arena_aalloc(tsdn_t *tsdn, const void *ptr) {
-	edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
+	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
 	unsigned arena_ind = edata_arena_ind_get(edata);
 	return (arena_t *)atomic_load_p(&arenas[arena_ind], ATOMIC_RELAXED);
 }
@@ -166,7 +169,7 @@ JEMALLOC_ALWAYS_INLINE size_t
 arena_salloc(tsdn_t *tsdn, const void *ptr) {
 	assert(ptr != NULL);
 	emap_alloc_ctx_t alloc_ctx;
-	emap_alloc_ctx_lookup(tsdn, &emap_global, ptr, &alloc_ctx);
+	emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr, &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
 
 	return sz_index2size(alloc_ctx.szind);
@@ -184,8 +187,8 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
 	 */
 
 	emap_full_alloc_ctx_t full_alloc_ctx;
-	bool missing = emap_full_alloc_ctx_try_lookup(tsdn, &emap_global, ptr,
-	    &full_alloc_ctx);
+	bool missing = emap_full_alloc_ctx_try_lookup(tsdn, &arena_emap_global,
+	    ptr, &full_alloc_ctx);
 	if (missing) {
 		return 0;
 	}
@@ -208,7 +211,8 @@ arena_dalloc_large_no_tcache(tsdn_t *tsdn, void *ptr, szind_t szind) {
 	if (config_prof && unlikely(szind < SC_NBINS)) {
 		arena_dalloc_promoted(tsdn, ptr, NULL, true);
 	} else {
-		edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
+		edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
+		    ptr);
 		large_dalloc(tsdn, edata);
 	}
 }
@@ -218,10 +222,11 @@ arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
 	assert(ptr != NULL);
 
 	emap_alloc_ctx_t alloc_ctx;
-	emap_alloc_ctx_lookup(tsdn, &emap_global, ptr, &alloc_ctx);
+	emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr, &alloc_ctx);
 
 	if (config_debug) {
-		edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
+		edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
+		    ptr);
 		assert(alloc_ctx.szind == edata_szind_get(edata));
 		assert(alloc_ctx.szind < SC_NSIZES);
 		assert(alloc_ctx.slab == edata_slab_get(edata));
@@ -246,7 +251,8 @@ arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
 			    slow_path);
 		}
 	} else {
-		edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
+		edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
+		    ptr);
 		large_dalloc(tsdn, edata);
 	}
 }
@@ -267,11 +273,13 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 		alloc_ctx = *caller_alloc_ctx;
 	} else {
 		util_assume(!tsdn_null(tsdn));
-		emap_alloc_ctx_lookup(tsdn, &emap_global, ptr, &alloc_ctx);
+		emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr,
+		    &alloc_ctx);
 	}
 
 	if (config_debug) {
-		edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
+		edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
+		    ptr);
 		assert(alloc_ctx.szind == edata_szind_get(edata));
 		assert(alloc_ctx.szind < SC_NSIZES);
 		assert(alloc_ctx.slab == edata_slab_get(edata));
@@ -303,15 +311,16 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
 	}
 
 	if ((config_prof && opt_prof) || config_debug) {
-		emap_alloc_ctx_lookup(tsdn, &emap_global, ptr, &alloc_ctx);
+		emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr,
+		    &alloc_ctx);
 
 		assert(alloc_ctx.szind == sz_size2index(size));
 		assert((config_prof && opt_prof)
 		    || alloc_ctx.slab == (alloc_ctx.szind < SC_NBINS));
 
 		if (config_debug) {
-			edata_t *edata = emap_edata_lookup(tsdn, &emap_global,
-			    ptr);
+			edata_t *edata = emap_edata_lookup(tsdn,
+			    &arena_emap_global, ptr);
 			assert(alloc_ctx.szind == edata_szind_get(edata));
 			assert(alloc_ctx.slab == edata_slab_get(edata));
 		}
@@ -341,7 +350,7 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 	if (config_prof && opt_prof) {
 		if (caller_alloc_ctx == NULL) {
 			/* Uncommon case and should be a static check. */
-			emap_alloc_ctx_lookup(tsdn, &emap_global, ptr,
+			emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr,
 			    &alloc_ctx);
 			assert(alloc_ctx.szind == sz_size2index(size));
 		} else {
@@ -357,7 +366,8 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 	}
 
 	if (config_debug) {
-		edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
+		edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
+		    ptr);
 		assert(alloc_ctx.szind == edata_szind_get(edata));
 		assert(alloc_ctx.slab == edata_slab_get(edata));
 	}
diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 8c7713ce..9f814ce9 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -26,8 +26,6 @@ struct emap_full_alloc_ctx_s {
 	edata_t *edata;
 };
 
-extern emap_t emap_global;
-
 bool emap_init(emap_t *emap, base_t *base, bool zeroed);
 
 /*
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 8b2db184..f5fd8129 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -47,10 +47,10 @@ bool extent_purge_lazy_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length);
 bool extent_purge_forced_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length);
-edata_t *extent_split_wrapper(tsdn_t *tsdn, edata_cache_t *edata_cache,
-    ehooks_t *ehooks, edata_t *edata, size_t size_a, szind_t szind_a,
-    bool slab_a, size_t size_b, szind_t szind_b, bool slab_b);
-bool extent_merge_wrapper(tsdn_t *tsdn, ehooks_t *ehooks,
+edata_t *extent_split_wrapper(tsdn_t *tsdn, pa_shard_t *shard,
+    edata_cache_t *edata_cache, ehooks_t *ehooks, edata_t *edata, size_t size_a,
+    szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b);
+bool extent_merge_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     edata_cache_t *edata_cache, edata_t *a, edata_t *b);
 
 bool extent_boot(void);
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 82676ee4..b216412f 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -5,6 +5,7 @@
 #include "jemalloc/internal/decay.h"
 #include "jemalloc/internal/ecache.h"
 #include "jemalloc/internal/edata_cache.h"
+#include "jemalloc/internal/emap.h"
 #include "jemalloc/internal/lockedint.h"
 
 enum pa_decay_purge_setting_e {
@@ -140,6 +141,9 @@ struct pa_shard_s {
 	decay_t decay_dirty; /* dirty --> muzzy */
 	decay_t decay_muzzy; /* muzzy --> retained */
 
+	/* The emap this shard is tied to. */
+	emap_t *emap;
+
 	/* The base from which we get the ehooks and allocate metadat. */
 	base_t *base;
 };
@@ -171,9 +175,10 @@ pa_shard_ehooks_get(pa_shard_t *shard) {
 }
 
 /* Returns true on error. */
-bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
-    pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx, nstime_t *cur_time,
-    ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms);
+bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
+    unsigned ind, pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx,
+    nstime_t *cur_time, ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms);
+
 /*
  * This does the PA-specific parts of arena reset (i.e. freeing all active
  * allocations).
diff --git a/src/arena.c b/src/arena.c
index c70b1284..2e703088 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -37,6 +37,8 @@ ssize_t opt_muzzy_decay_ms = MUZZY_DECAY_MS_DEFAULT;
 static atomic_zd_t dirty_decay_ms_default;
 static atomic_zd_t muzzy_decay_ms_default;
 
+emap_t arena_emap_global;
+
 const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
 #define STEP(step, h, x, y)			\
 		h,
@@ -668,7 +670,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 
 		malloc_mutex_unlock(tsd_tsdn(tsd), &arena->large_mtx);
 		emap_alloc_ctx_t alloc_ctx;
-		emap_alloc_ctx_lookup(tsd_tsdn(tsd), &emap_global, ptr,
+		emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
 		    &alloc_ctx);
 		assert(alloc_ctx.szind != SC_NSIZES);
 
@@ -1064,11 +1066,11 @@ arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize) {
 		safety_check_set_redzone(ptr, usize, SC_LARGE_MINCLASS);
 	}
 
-	edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
+	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
 
 	szind_t szind = sz_size2index(usize);
 	edata_szind_set(edata, szind);
-	emap_remap(tsdn, &emap_global, edata, szind, /* slab */ false);
+	emap_remap(tsdn, &arena_emap_global, edata, szind, /* slab */ false);
 
 	prof_idump_rollback(tsdn, usize);
 
@@ -1081,7 +1083,7 @@ arena_prof_demote(tsdn_t *tsdn, edata_t *edata, const void *ptr) {
 	assert(ptr != NULL);
 
 	edata_szind_set(edata, SC_NBINS);
-	emap_remap(tsdn, &emap_global, edata, SC_NBINS, /* slab */ false);
+	emap_remap(tsdn, &arena_emap_global, edata, SC_NBINS, /* slab */ false);
 
 	assert(isalloc(tsdn, ptr) == SC_LARGE_MINCLASS);
 
@@ -1094,7 +1096,7 @@ arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 	cassert(config_prof);
 	assert(opt_prof);
 
-	edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
+	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
 	size_t usize = edata_usize_get(edata);
 	size_t bumped_usize = arena_prof_demote(tsdn, edata, ptr);
 	if (config_opt_safety_checks && usize < SC_LARGE_MINCLASS) {
@@ -1223,7 +1225,7 @@ arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, edata_t *edata, void *ptr) {
 
 void
 arena_dalloc_small(tsdn_t *tsdn, void *ptr) {
-	edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
+	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
 	arena_t *arena = arena_get_from_edata(edata);
 
 	arena_dalloc_bin(tsdn, arena, edata, ptr);
@@ -1237,7 +1239,7 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 	/* Calls with non-zero extra had to clamp extra. */
 	assert(extra == 0 || size + extra <= SC_LARGE_MAXCLASS);
 
-	edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
+	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
 	if (unlikely(size > SC_LARGE_MAXCLASS)) {
 		ret = true;
 		goto done;
@@ -1271,7 +1273,7 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 		ret = true;
 	}
 done:
-	assert(edata == emap_edata_lookup(tsdn, &emap_global, ptr));
+	assert(edata == emap_edata_lookup(tsdn, &arena_emap_global, ptr));
 	*newsize = edata_usize_get(edata);
 
 	return ret;
@@ -1491,7 +1493,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 
 	nstime_t cur_time;
 	nstime_init_update(&cur_time);
-	if (pa_shard_init(tsdn, &arena->pa_shard, base, ind,
+	if (pa_shard_init(tsdn, &arena->pa_shard, &arena_emap_global, base, ind,
 	    &arena->stats.pa_shard_stats, LOCKEDINT_MTX(arena->stats.mtx),
 	    &cur_time, arena_dirty_decay_ms_default_get(),
 	    arena_muzzy_decay_ms_default_get())) {
diff --git a/src/ctl.c b/src/ctl.c
index 00fd7441..7555267a 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2650,7 +2650,7 @@ arenas_lookup_ctl(tsd_t *tsd, const size_t *mib,
 	ret = EINVAL;
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	WRITE(ptr, void *);
-	edata = emap_edata_lookup(tsd_tsdn(tsd), &emap_global, ptr);
+	edata = emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr);
 	if (edata == NULL)
 		goto label_return;
 
diff --git a/src/ehooks.c b/src/ehooks.c
index ff459dfb..1016c3e9 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -189,8 +189,8 @@ ehooks_default_split(extent_hooks_t *extent_hooks, void *addr, size_t size,
 
 static inline bool
 ehooks_same_sn(tsdn_t *tsdn, void *addr_a, void *addr_b) {
-	edata_t *a = emap_edata_lookup(tsdn, &emap_global, addr_a);
-	edata_t *b = emap_edata_lookup(tsdn, &emap_global, addr_b);
+	edata_t *a = emap_edata_lookup(tsdn, &arena_emap_global, addr_a);
+	edata_t *b = emap_edata_lookup(tsdn, &arena_emap_global, addr_b);
 	return edata_sn_comp(a, b) == 0;
 }
 
@@ -253,9 +253,9 @@ bool
 ehooks_default_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
     void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
 	tsdn_t *tsdn = tsdn_fetch();
-	edata_t *a = emap_edata_lookup(tsdn, &emap_global, addr_a);
+	edata_t *a = emap_edata_lookup(tsdn, &arena_emap_global, addr_a);
 	bool head_a = edata_is_head_get(a);
-	edata_t *b = emap_edata_lookup(tsdn, &emap_global, addr_b);
+	edata_t *b = emap_edata_lookup(tsdn, &arena_emap_global, addr_b);
 	bool head_b = edata_is_head_get(b);
 	return ehooks_default_merge_impl(tsdn, addr_a, head_a, addr_b, head_b);
 }
diff --git a/src/emap.c b/src/emap.c
index c79dafa7..24d61212 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -3,8 +3,6 @@
 
 #include "jemalloc/internal/emap.h"
 
-emap_t emap_global;
-
 /*
  * Note: Ends without at semicolon, so that
  *     EMAP_DECLARE_RTREE_CTX;
diff --git a/src/extent.c b/src/extent.c
index db658bb6..ae0aa2c9 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -19,11 +19,11 @@ static bool extent_purge_lazy_impl(tsdn_t *tsdn, ehooks_t *ehooks,
     edata_t *edata, size_t offset, size_t length, bool growing_retained);
 static bool extent_purge_forced_impl(tsdn_t *tsdn, ehooks_t *ehooks,
     edata_t *edata, size_t offset, size_t length, bool growing_retained);
-static edata_t *extent_split_impl(tsdn_t *tsdn, edata_cache_t *edata_cache,
-    ehooks_t *ehooks, edata_t *edata, size_t size_a, szind_t szind_a,
-    bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
+static edata_t *extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard,
+    edata_cache_t *edata_cache, ehooks_t *ehooks, edata_t *edata, size_t size_a,
+    szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
     bool growing_retained);
-static bool extent_merge_impl(tsdn_t *tsdn, ehooks_t *ehooks,
+static bool extent_merge_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     edata_cache_t *edata_cache, edata_t *a, edata_t *b, bool growing_retained);
 
 /* Used exclusively for gdump triggering. */
@@ -36,14 +36,14 @@ static atomic_zu_t highpages;
  * definition.
  */
 
-static void extent_deregister(tsdn_t *tsdn, edata_t *edata);
+static void extent_deregister(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata);
 static edata_t *extent_recycle(tsdn_t *tsdn, pa_shard_t *shard,
     ehooks_t *ehooks, ecache_t *ecache, void *new_addr, size_t usize,
     size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit,
     bool growing_retained);
-static edata_t *extent_try_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache,
-    ehooks_t *ehooks, ecache_t *ecache, edata_t *edata, bool *coalesced,
-    bool growing_retained);
+static edata_t *extent_try_coalesce(tsdn_t *tsdn, pa_shard_t *shard,
+    edata_cache_t *edata_cache, ehooks_t *ehooks, ecache_t *ecache,
+    edata_t *edata, bool *coalesced, bool growing_retained);
 static void extent_record(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool growing_retained);
 static edata_t *extent_alloc_retained(tsdn_t *tsdn, pa_shard_t *shard,
@@ -53,12 +53,13 @@ static edata_t *extent_alloc_retained(tsdn_t *tsdn, pa_shard_t *shard,
 /******************************************************************************/
 
 static bool
-extent_try_delayed_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache,
-    ehooks_t *ehooks, ecache_t *ecache, edata_t *edata) {
+extent_try_delayed_coalesce(tsdn_t *tsdn, pa_shard_t *shard,
+    edata_cache_t *edata_cache, ehooks_t *ehooks, ecache_t *ecache,
+    edata_t *edata) {
 	edata_state_set(edata, extent_state_active);
 	bool coalesced;
-	edata = extent_try_coalesce(tsdn, edata_cache, ehooks, ecache, edata,
-	    &coalesced, false);
+	edata = extent_try_coalesce(tsdn, shard, edata_cache, ehooks, ecache,
+	    edata, &coalesced, false);
 	edata_state_set(edata, ecache->state);
 
 	if (!coalesced) {
@@ -156,8 +157,8 @@ ecache_evict(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 			break;
 		}
 		/* Try to coalesce. */
-		if (extent_try_delayed_coalesce(tsdn, &shard->edata_cache,
-		    ehooks, ecache, edata)) {
+		if (extent_try_delayed_coalesce(tsdn, shard,
+		    &shard->edata_cache, ehooks, ecache, edata)) {
 			break;
 		}
 		/*
@@ -178,7 +179,7 @@ ecache_evict(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		edata_state_set(edata, extent_state_active);
 		break;
 	case extent_state_retained:
-		extent_deregister(tsdn, edata);
+		extent_deregister(tsdn, shard, edata);
 		break;
 	default:
 		not_reached();
@@ -278,26 +279,27 @@ extent_gdump_sub(tsdn_t *tsdn, const edata_t *edata) {
 }
 
 static bool
-extent_register_impl(tsdn_t *tsdn, edata_t *edata, bool gdump_add) {
+extent_register_impl(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
+    bool gdump_add) {
 	/*
 	 * We need to hold the lock to protect against a concurrent coalesce
 	 * operation that sees us in a partial state.
 	 */
-	emap_lock_edata(tsdn, &emap_global, edata);
+	emap_lock_edata(tsdn, shard->emap, edata);
 
 	szind_t szind = edata_szind_get_maybe_invalid(edata);
 	bool slab = edata_slab_get(edata);
 
-	if (emap_register_boundary(tsdn, &emap_global, edata, szind, slab)) {
-		emap_unlock_edata(tsdn, &emap_global, edata);
+	if (emap_register_boundary(tsdn, shard->emap, edata, szind, slab)) {
+		emap_unlock_edata(tsdn, shard->emap, edata);
 		return true;
 	}
 
 	if (slab) {
-		emap_register_interior(tsdn, &emap_global, edata, szind);
+		emap_register_interior(tsdn, shard->emap, edata, szind);
 	}
 
-	emap_unlock_edata(tsdn, &emap_global, edata);
+	emap_unlock_edata(tsdn, shard->emap, edata);
 
 	if (config_prof && gdump_add) {
 		extent_gdump_add(tsdn, edata);
@@ -307,18 +309,18 @@ extent_register_impl(tsdn_t *tsdn, edata_t *edata, bool gdump_add) {
 }
 
 static bool
-extent_register(tsdn_t *tsdn, edata_t *edata) {
-	return extent_register_impl(tsdn, edata, true);
+extent_register(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata) {
+	return extent_register_impl(tsdn, shard, edata, true);
 }
 
 static bool
-extent_register_no_gdump_add(tsdn_t *tsdn, edata_t *edata) {
-	return extent_register_impl(tsdn, edata, false);
+extent_register_no_gdump_add(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata) {
+	return extent_register_impl(tsdn, shard, edata, false);
 }
 
 static void
-extent_reregister(tsdn_t *tsdn, edata_t *edata) {
-	bool err = extent_register(tsdn, edata);
+extent_reregister(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata) {
+	bool err = extent_register(tsdn, shard, edata);
 	assert(!err);
 }
 
@@ -326,14 +328,15 @@ extent_reregister(tsdn_t *tsdn, edata_t *edata) {
  * Removes all pointers to the given extent from the global rtree.
  */
 static void
-extent_deregister_impl(tsdn_t *tsdn, edata_t *edata, bool gdump) {
-	emap_lock_edata(tsdn, &emap_global, edata);
-	emap_deregister_boundary(tsdn, &emap_global, edata);
+extent_deregister_impl(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
+    bool gdump) {
+	emap_lock_edata(tsdn, shard->emap, edata);
+	emap_deregister_boundary(tsdn, shard->emap, edata);
 	if (edata_slab_get(edata)) {
-		emap_deregister_interior(tsdn, &emap_global, edata);
+		emap_deregister_interior(tsdn, shard->emap, edata);
 		edata_slab_set(edata, false);
 	}
-	emap_unlock_edata(tsdn, &emap_global, edata);
+	emap_unlock_edata(tsdn, shard->emap, edata);
 
 	if (config_prof && gdump) {
 		extent_gdump_sub(tsdn, edata);
@@ -341,13 +344,14 @@ extent_deregister_impl(tsdn_t *tsdn, edata_t *edata, bool gdump) {
 }
 
 static void
-extent_deregister(tsdn_t *tsdn, edata_t *edata) {
-	extent_deregister_impl(tsdn, edata, true);
+extent_deregister(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata) {
+	extent_deregister_impl(tsdn, shard, edata, true);
 }
 
 static void
-extent_deregister_no_gdump_sub(tsdn_t *tsdn, edata_t *edata) {
-	extent_deregister_impl(tsdn, edata, false);
+extent_deregister_no_gdump_sub(tsdn_t *tsdn, pa_shard_t *shard,
+    edata_t *edata) {
+	extent_deregister_impl(tsdn, shard, edata, false);
 }
 
 /*
@@ -380,7 +384,7 @@ extent_recycle_extract(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 	edata_t *edata;
 	if (new_addr != NULL) {
-		edata = emap_lock_edata_from_addr(tsdn, &emap_global, new_addr,
+		edata = emap_lock_edata_from_addr(tsdn, shard->emap, new_addr,
 		    false);
 		if (edata != NULL) {
 			/*
@@ -395,7 +399,7 @@ extent_recycle_extract(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 			    != ecache->state) {
 				edata = NULL;
 			}
-			emap_unlock_edata(tsdn, &emap_global, unlock_edata);
+			emap_unlock_edata(tsdn, shard->emap, unlock_edata);
 		}
 	} else {
 		/*
@@ -478,9 +482,9 @@ extent_split_interior(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	/* Split the lead. */
 	if (leadsize != 0) {
 		*lead = *edata;
-		*edata = extent_split_impl(tsdn, &shard->edata_cache, ehooks,
-		    *lead, leadsize, SC_NSIZES, false, size + trailsize, szind,
-		    slab, growing_retained);
+		*edata = extent_split_impl(tsdn, shard, &shard->edata_cache,
+		    ehooks, *lead, leadsize, SC_NSIZES, false, size + trailsize,
+		    szind, slab, growing_retained);
 		if (*edata == NULL) {
 			*to_leak = *lead;
 			*lead = NULL;
@@ -490,9 +494,9 @@ extent_split_interior(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 
 	/* Split the trail. */
 	if (trailsize != 0) {
-		*trail = extent_split_impl(tsdn, &shard->edata_cache, ehooks,
-		    *edata, size, szind, slab, trailsize, SC_NSIZES, false,
-		    growing_retained);
+		*trail = extent_split_impl(tsdn, shard, &shard->edata_cache,
+		    ehooks, *edata, size, szind, slab, trailsize, SC_NSIZES,
+		    false, growing_retained);
 		if (*trail == NULL) {
 			*to_leak = *edata;
 			*to_salvage = *lead;
@@ -504,7 +508,7 @@ extent_split_interior(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 
 	if (leadsize == 0 && trailsize == 0) {
 		edata_szind_set(*edata, szind);
-		emap_remap(tsdn, &emap_global, *edata, szind, slab);
+		emap_remap(tsdn, shard->emap, *edata, szind, slab);
 	}
 
 	return extent_split_interior_ok;
@@ -555,14 +559,14 @@ extent_recycle_split(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		 */
 		assert(result == extent_split_interior_error);
 		if (to_salvage != NULL) {
-			extent_deregister(tsdn, to_salvage);
+			extent_deregister(tsdn, shard, to_salvage);
 		}
 		if (to_leak != NULL) {
 			void *leak = edata_base_get(to_leak);
-			extent_deregister_no_gdump_sub(tsdn, to_leak);
+			extent_deregister_no_gdump_sub(tsdn, shard, to_leak);
 			extents_abandon_vm(tsdn, shard, ehooks, ecache, to_leak,
 			    growing_retained);
-			assert(emap_lock_edata_from_addr(tsdn, &emap_global,
+			assert(emap_lock_edata_from_addr(tsdn, shard->emap,
 			    leak, false) == NULL);
 		}
 		return NULL;
@@ -614,7 +618,7 @@ extent_recycle(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	assert(edata_state_get(edata) == extent_state_active);
 	if (slab) {
 		edata_slab_set(edata, slab);
-		emap_register_interior(tsdn, &emap_global, edata, szind);
+		emap_register_interior(tsdn, shard->emap, edata, szind);
 	}
 
 	if (*zero) {
@@ -681,7 +685,7 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	    extent_state_active, zeroed, committed, /* ranged */ false,
 	    EXTENT_IS_HEAD);
 
-	if (extent_register_no_gdump_add(tsdn, edata)) {
+	if (extent_register_no_gdump_add(tsdn, shard, edata)) {
 		edata_cache_put(tsdn, &shard->edata_cache, edata);
 		goto label_err;
 	}
@@ -725,7 +729,7 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 			    &shard->ecache_retained, to_salvage, true);
 		}
 		if (to_leak != NULL) {
-			extent_deregister_no_gdump_sub(tsdn, to_leak);
+			extent_deregister_no_gdump_sub(tsdn, shard, to_leak);
 			extents_abandon_vm(tsdn, shard, ehooks,
 			    &shard->ecache_retained, to_leak, true);
 		}
@@ -769,7 +773,7 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	}
 	if (slab) {
 		edata_slab_set(edata, true);
-		emap_register_interior(tsdn, &emap_global, edata, szind);
+		emap_register_interior(tsdn, shard->emap, edata, szind);
 	}
 	if (*zero && !edata_zeroed_get(edata)) {
 		void *addr = edata_base_get(edata);
@@ -834,7 +838,7 @@ extent_alloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	    size, slab, szind, pa_shard_extent_sn_next(shard),
 	    extent_state_active, *zero, *commit, /* ranged */ false,
 	    EXTENT_NOT_HEAD);
-	if (extent_register(tsdn, edata)) {
+	if (extent_register(tsdn, shard, edata)) {
 		edata_cache_put(tsdn, &shard->edata_cache, edata);
 		return NULL;
 	}
@@ -864,15 +868,15 @@ extent_can_coalesce(ecache_t *ecache, const edata_t *inner,
 }
 
 static bool
-extent_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *inner, edata_t *outer, bool forward,
-    bool growing_retained) {
+extent_coalesce(tsdn_t *tsdn, pa_shard_t *shard, edata_cache_t *edata_cache,
+    ehooks_t *ehooks, ecache_t *ecache, edata_t *inner, edata_t *outer,
+    bool forward, bool growing_retained) {
 	assert(extent_can_coalesce(ecache, inner, outer));
 
 	extent_activate_locked(tsdn, ecache, outer);
 
 	malloc_mutex_unlock(tsdn, &ecache->mtx);
-	bool err = extent_merge_impl(tsdn, ehooks, edata_cache,
+	bool err = extent_merge_impl(tsdn, shard, ehooks, edata_cache,
 	    forward ? inner : outer, forward ? outer : inner, growing_retained);
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 
@@ -884,9 +888,10 @@ extent_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
 }
 
 static edata_t *
-extent_try_coalesce_impl(tsdn_t *tsdn, edata_cache_t *edata_cache,
-    ehooks_t *ehooks, ecache_t *ecache, edata_t *edata, bool *coalesced,
-    bool growing_retained, bool inactive_only) {
+extent_try_coalesce_impl(tsdn_t *tsdn, pa_shard_t *shard,
+    edata_cache_t *edata_cache, ehooks_t *ehooks, ecache_t *ecache,
+    edata_t *edata, bool *coalesced, bool growing_retained,
+    bool inactive_only) {
 	/*
 	 * We avoid checking / locking inactive neighbors for large size
 	 * classes, since they are eagerly coalesced on deallocation which can
@@ -901,7 +906,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, edata_cache_t *edata_cache,
 		again = false;
 
 		/* Try to coalesce forward. */
-		edata_t *next = emap_lock_edata_from_addr(tsdn, &emap_global,
+		edata_t *next = emap_lock_edata_from_addr(tsdn, shard->emap,
 		    edata_past_get(edata), inactive_only);
 		if (next != NULL) {
 			/*
@@ -912,10 +917,10 @@ extent_try_coalesce_impl(tsdn_t *tsdn, edata_cache_t *edata_cache,
 			bool can_coalesce = extent_can_coalesce(ecache,
 			    edata, next);
 
-			emap_unlock_edata(tsdn, &emap_global, next);
+			emap_unlock_edata(tsdn, shard->emap, next);
 
-			if (can_coalesce && !extent_coalesce(tsdn, edata_cache,
-			    ehooks, ecache, edata, next, true,
+			if (can_coalesce && !extent_coalesce(tsdn, shard,
+			    edata_cache, ehooks, ecache, edata, next, true,
 			    growing_retained)) {
 				if (ecache->delay_coalesce) {
 					/* Do minimal coalescing. */
@@ -927,15 +932,15 @@ extent_try_coalesce_impl(tsdn_t *tsdn, edata_cache_t *edata_cache,
 		}
 
 		/* Try to coalesce backward. */
-		edata_t *prev = emap_lock_edata_from_addr(tsdn, &emap_global,
+		edata_t *prev = emap_lock_edata_from_addr(tsdn, shard->emap,
 		    edata_before_get(edata), inactive_only);
 		if (prev != NULL) {
 			bool can_coalesce = extent_can_coalesce(ecache, edata,
 			    prev);
-			emap_unlock_edata(tsdn, &emap_global, prev);
+			emap_unlock_edata(tsdn, shard->emap, prev);
 
-			if (can_coalesce && !extent_coalesce(tsdn, edata_cache,
-			    ehooks, ecache, edata, prev, false,
+			if (can_coalesce && !extent_coalesce(tsdn, shard,
+			    edata_cache, ehooks, ecache, edata, prev, false,
 			    growing_retained)) {
 				edata = prev;
 				if (ecache->delay_coalesce) {
@@ -955,18 +960,19 @@ extent_try_coalesce_impl(tsdn_t *tsdn, edata_cache_t *edata_cache,
 }
 
 static edata_t *
-extent_try_coalesce(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *edata, bool *coalesced, bool growing_retained) {
-	return extent_try_coalesce_impl(tsdn, edata_cache, ehooks,  ecache,
-	    edata, coalesced, growing_retained, false);
+extent_try_coalesce(tsdn_t *tsdn, pa_shard_t *shard, edata_cache_t *edata_cache,
+    ehooks_t *ehooks, ecache_t *ecache, edata_t *edata, bool *coalesced,
+    bool growing_retained) {
+	return extent_try_coalesce_impl(tsdn, shard, edata_cache, ehooks,
+	    ecache, edata, coalesced, growing_retained, false);
 }
 
 static edata_t *
-extent_try_coalesce_large(tsdn_t *tsdn, edata_cache_t *edata_cache,
-    ehooks_t *ehooks, ecache_t *ecache, edata_t *edata, bool *coalesced,
-    bool growing_retained) {
-	return extent_try_coalesce_impl(tsdn, edata_cache, ehooks, ecache,
-	    edata, coalesced, growing_retained, true);
+extent_try_coalesce_large(tsdn_t *tsdn, pa_shard_t *shard,
+    edata_cache_t *edata_cache, ehooks_t *ehooks, ecache_t *ecache,
+    edata_t *edata, bool *coalesced, bool growing_retained) {
+	return extent_try_coalesce_impl(tsdn, shard, edata_cache, ehooks,
+	    ecache, edata, coalesced, growing_retained, true);
 }
 
 /* Purge a single extent to retained / unmapped directly. */
@@ -1007,22 +1013,22 @@ extent_record(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 
 	edata_szind_set(edata, SC_NSIZES);
 	if (edata_slab_get(edata)) {
-		emap_deregister_interior(tsdn, &emap_global, edata);
+		emap_deregister_interior(tsdn, shard->emap, edata);
 		edata_slab_set(edata, false);
 	}
 
-	emap_assert_mapped(tsdn, &emap_global, edata);
+	emap_assert_mapped(tsdn, shard->emap, edata);
 
 	if (!ecache->delay_coalesce) {
-		edata = extent_try_coalesce(tsdn, &shard->edata_cache, ehooks,
-		    ecache, edata, NULL, growing_retained);
+		edata = extent_try_coalesce(tsdn, shard, &shard->edata_cache,
+		    ehooks, ecache, edata, NULL, growing_retained);
 	} else if (edata_size_get(edata) >= SC_LARGE_MINCLASS) {
 		assert(ecache == &shard->ecache_dirty);
 		/* Always coalesce large extents eagerly. */
 		bool coalesced;
 		do {
 			assert(edata_state_get(edata) == extent_state_active);
-			edata = extent_try_coalesce_large(tsdn,
+			edata = extent_try_coalesce_large(tsdn, shard,
 			    &shard->edata_cache, ehooks, ecache, edata,
 			    &coalesced, growing_retained);
 		} while (coalesced);
@@ -1045,7 +1051,7 @@ extent_dalloc_gap(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	if (extent_register(tsdn, edata)) {
+	if (extent_register(tsdn, shard, edata)) {
 		edata_cache_put(tsdn, &shard->edata_cache, edata);
 		return;
 	}
@@ -1088,11 +1094,11 @@ extent_dalloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		 * Deregister first to avoid a race with other allocating
 		 * threads, and reregister if deallocation fails.
 		 */
-		extent_deregister(tsdn, edata);
+		extent_deregister(tsdn, shard, edata);
 		if (!extent_dalloc_wrapper_try(tsdn, shard, ehooks, edata)) {
 			return;
 		}
-		extent_reregister(tsdn, edata);
+		extent_reregister(tsdn, shard, edata);
 	}
 
 	/* Try to decommit; purge if that fails. */
@@ -1131,7 +1137,7 @@ extent_destroy_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	    WITNESS_RANK_CORE, 0);
 
 	/* Deregister first to avoid a race with other allocating threads. */
-	extent_deregister(tsdn, edata);
+	extent_deregister(tsdn, shard, edata);
 
 	edata_addr_set(edata, edata_base_get(edata));
 
@@ -1213,9 +1219,10 @@ extent_purge_forced_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
  * and returns the trail (except in case of error).
  */
 static edata_t *
-extent_split_impl(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
-    edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a,
-    size_t size_b, szind_t szind_b, bool slab_b, bool growing_retained) {
+extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard, edata_cache_t *edata_cache,
+    ehooks_t *ehooks, edata_t *edata, size_t size_a, szind_t szind_a,
+    bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
+    bool growing_retained) {
 	assert(edata_size_get(edata) == size_a + size_b);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
@@ -1235,13 +1242,13 @@ extent_split_impl(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
 	    edata_zeroed_get(edata), edata_committed_get(edata),
 	    edata_ranged_get(edata), EXTENT_NOT_HEAD);
 	emap_prepare_t prepare;
-	bool err = emap_split_prepare(tsdn, &emap_global, &prepare, edata,
+	bool err = emap_split_prepare(tsdn, shard->emap, &prepare, edata,
 	    size_a, szind_a, slab_a, trail, size_b, szind_b, slab_b);
 	if (err) {
 		goto label_error_b;
 	}
 
-	emap_lock_edata2(tsdn, &emap_global, edata, trail);
+	emap_lock_edata2(tsdn, shard->emap, edata, trail);
 
 	err = ehooks_split(tsdn, ehooks, edata_base_get(edata), size_a + size_b,
 	    size_a, size_b, edata_committed_get(edata));
@@ -1252,14 +1259,14 @@ extent_split_impl(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
 
 	edata_size_set(edata, size_a);
 	edata_szind_set(edata, szind_a);
-	emap_split_commit(tsdn, &emap_global, &prepare, edata, size_a, szind_a,
+	emap_split_commit(tsdn, shard->emap, &prepare, edata, size_a, szind_a,
 	    slab_a, trail, size_b, szind_b, slab_b);
 
-	emap_unlock_edata2(tsdn, &emap_global, edata, trail);
+	emap_unlock_edata2(tsdn, shard->emap, edata, trail);
 
 	return trail;
 label_error_c:
-	emap_unlock_edata2(tsdn, &emap_global, edata, trail);
+	emap_unlock_edata2(tsdn, shard->emap, edata, trail);
 label_error_b:
 	edata_cache_put(tsdn, edata_cache, trail);
 label_error_a:
@@ -1267,16 +1274,16 @@ label_error_a:
 }
 
 edata_t *
-extent_split_wrapper(tsdn_t *tsdn, edata_cache_t *edata_cache, ehooks_t *ehooks,
-    edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a,
-    size_t size_b, szind_t szind_b, bool slab_b) {
-	return extent_split_impl(tsdn, edata_cache, ehooks, edata, size_a,
-	    szind_a, slab_a, size_b, szind_b, slab_b, false);
+extent_split_wrapper(tsdn_t *tsdn, pa_shard_t *shard,
+    edata_cache_t *edata_cache, ehooks_t *ehooks, edata_t *edata, size_t size_a,
+    szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b) {
+	return extent_split_impl(tsdn, shard, edata_cache, ehooks, edata,
+	    size_a, szind_a, slab_a, size_b, szind_b, slab_b, false);
 }
 
 static bool
-extent_merge_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_cache_t *edata_cache,
-    edata_t *a, edata_t *b, bool growing_retained) {
+extent_merge_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+    edata_cache_t *edata_cache, edata_t *a, edata_t *b, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	assert(edata_base_get(a) < edata_base_get(b));
@@ -1298,9 +1305,9 @@ extent_merge_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_cache_t *edata_cache,
 	 * than extent_{,de}register() to do things in the right order.
 	 */
 	emap_prepare_t prepare;
-	emap_merge_prepare(tsdn, &emap_global, &prepare, a, b);
+	emap_merge_prepare(tsdn, shard->emap, &prepare, a, b);
 
-	emap_lock_edata2(tsdn, &emap_global, a, b);
+	emap_lock_edata2(tsdn, shard->emap, a, b);
 
 	edata_size_set(a, edata_size_get(a) + edata_size_get(b));
 	edata_szind_set(a, SC_NSIZES);
@@ -1308,8 +1315,8 @@ extent_merge_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_cache_t *edata_cache,
 	    edata_sn_get(a) : edata_sn_get(b));
 	edata_zeroed_set(a, edata_zeroed_get(a) && edata_zeroed_get(b));
 
-	emap_merge_commit(tsdn, &emap_global, &prepare, a, b);
-	emap_unlock_edata2(tsdn, &emap_global, a, b);
+	emap_merge_commit(tsdn, shard->emap, &prepare, a, b);
+	emap_unlock_edata2(tsdn, shard->emap, a, b);
 
 	edata_cache_put(tsdn, edata_cache, b);
 
@@ -1317,9 +1324,9 @@ extent_merge_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_cache_t *edata_cache,
 }
 
 bool
-extent_merge_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_cache_t *edata_cache,
-    edata_t *a, edata_t *b) {
-	return extent_merge_impl(tsdn, ehooks, edata_cache, a, b, false);
+extent_merge_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+    edata_cache_t *edata_cache, edata_t *a, edata_t *b) {
+	return extent_merge_impl(tsdn, shard, ehooks, edata_cache, a, b, false);
 }
 
 bool
diff --git a/src/inspect.c b/src/inspect.c
index 6c4dd8a7..5e8d51d6 100644
--- a/src/inspect.c
+++ b/src/inspect.c
@@ -6,7 +6,7 @@ inspect_extent_util_stats_get(tsdn_t *tsdn, const void *ptr, size_t *nfree,
     size_t *nregs, size_t *size) {
 	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL);
 
-	const edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
+	const edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
 	if (unlikely(edata == NULL)) {
 		*nfree = *nregs = *size = 0;
 		return;
@@ -31,7 +31,7 @@ inspect_extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
 	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL
 	    && bin_nfree != NULL && bin_nregs != NULL && slabcur_addr != NULL);
 
-	const edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
+	const edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
 	if (unlikely(edata == NULL)) {
 		*nfree = *nregs = *size = *bin_nfree = *bin_nregs = 0;
 		*slabcur_addr = NULL;
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 72eb55bf..0be55492 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1623,7 +1623,7 @@ malloc_init_hard_a0_locked() {
 		return true;
 	}
 	/* emap_global is static, hence zeroed. */
-	if (emap_init(&emap_global, b0get(), /* zeroed */ true)) {
+	if (emap_init(&arena_emap_global, b0get(), /* zeroed */ true)) {
 		return true;
 	}
 	if (extent_boot()) {
@@ -2645,7 +2645,8 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	emap_alloc_ctx_t alloc_ctx;
-	emap_alloc_ctx_lookup(tsd_tsdn(tsd), &emap_global, ptr, &alloc_ctx);
+	emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
+	    &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
 
 	size_t usize = sz_index2size(alloc_ctx.szind);
@@ -2699,12 +2700,12 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 			if (config_debug) {
 				emap_alloc_ctx_t dbg_ctx;
 				emap_alloc_ctx_lookup(tsd_tsdn(tsd),
-				    &emap_global, ptr, &dbg_ctx);
+				    &arena_emap_global, ptr, &dbg_ctx);
 				assert(dbg_ctx.szind == alloc_ctx.szind);
 				assert(dbg_ctx.slab == alloc_ctx.slab);
 			}
 		} else if (opt_prof) {
-			emap_alloc_ctx_lookup(tsd_tsdn(tsd), &emap_global,
+			emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global,
 			    ptr, &alloc_ctx);
 
 			if (config_opt_safety_checks) {
@@ -2781,8 +2782,8 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 		if (unlikely(tsd == NULL || !tsd_fast(tsd))) {
 			return false;
 		}
-		bool res = emap_alloc_ctx_try_lookup_fast(tsd, &emap_global,
-		    ptr, &alloc_ctx);
+		bool res = emap_alloc_ctx_try_lookup_fast(tsd,
+		    &arena_emap_global, ptr, &alloc_ctx);
 
 		/* Note: profiled objects will have alloc_ctx.slab set */
 		if (unlikely(!res || !alloc_ctx.slab)) {
@@ -3238,7 +3239,8 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 	}
 
 	emap_alloc_ctx_t alloc_ctx;
-	emap_alloc_ctx_lookup(tsd_tsdn(tsd), &emap_global, ptr, &alloc_ctx);
+	emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
+	    &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
 	old_usize = sz_index2size(alloc_ctx.szind);
 	assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
@@ -3510,11 +3512,12 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	 * object associated with the ptr (though the content of the edata_t
 	 * object can be changed).
 	 */
-	edata_t *old_edata = emap_edata_lookup(tsd_tsdn(tsd), &emap_global,
-	    ptr);
+	edata_t *old_edata = emap_edata_lookup(tsd_tsdn(tsd),
+	    &arena_emap_global, ptr);
 
 	emap_alloc_ctx_t alloc_ctx;
-	emap_alloc_ctx_lookup(tsd_tsdn(tsd), &emap_global, ptr, &alloc_ctx);
+	emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
+	    &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
 	old_usize = sz_index2size(alloc_ctx.szind);
 	assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
@@ -3547,7 +3550,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	 * xallocx() should keep using the same edata_t object (though its
 	 * content can be changed).
 	 */
-	assert(emap_edata_lookup(tsd_tsdn(tsd), &emap_global, ptr)
+	assert(emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr)
 	    == old_edata);
 
 	if (unlikely(usize == old_usize)) {
diff --git a/src/large.c b/src/large.c
index 494a32ba..31205dfc 100644
--- a/src/large.c
+++ b/src/large.c
@@ -202,7 +202,7 @@ void *
 large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache,
     hook_ralloc_args_t *hook_args) {
-	edata_t *edata = emap_edata_lookup(tsdn, &emap_global, ptr);
+	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
 
 	size_t oldusize = edata_usize_get(edata);
 	/* The following should have been caught by callers. */
diff --git a/src/pa.c b/src/pa.c
index 2809630e..2a581ef3 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -13,9 +13,9 @@ pa_nactive_sub(pa_shard_t *shard, size_t sub_pages) {
 }
 
 bool
-pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
-    pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx, nstime_t *cur_time,
-    ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
+pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
+    unsigned ind, pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx,
+    nstime_t *cur_time, ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 	/* This will change eventually, but for now it should hold. */
 	assert(base_ind_get(base) == ind);
 	/*
@@ -68,6 +68,7 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, base_t *base, unsigned ind,
 	shard->stats = stats;
 	memset(shard->stats, 0, sizeof(*shard->stats));
 
+	shard->emap = emap;
 	shard->base = base;
 
 	return false;
@@ -175,8 +176,8 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	if (trail == NULL) {
 		return true;
 	}
-	if (extent_merge_wrapper(tsdn, ehooks, &shard->edata_cache, edata,
-	    trail)) {
+	if (extent_merge_wrapper(tsdn, shard, ehooks, &shard->edata_cache,
+	    edata, trail)) {
 		extent_dalloc_wrapper(tsdn, shard, ehooks, trail);
 		return true;
 	}
@@ -186,7 +187,7 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	}
 	pa_nactive_add(shard, expand_amount >> LG_PAGE);
 	edata_szind_set(edata, szind);
-	emap_remap(tsdn, &emap_global, edata, szind, slab);
+	emap_remap(tsdn, shard->emap, edata, szind, slab);
 	return false;
 }
 
@@ -205,8 +206,8 @@ pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 		return true;
 	}
 
-	edata_t *trail = extent_split_wrapper(tsdn, &shard->edata_cache, ehooks,
-	    edata, new_size, szind, slab, shrink_amount, SC_NSIZES,
+	edata_t *trail = extent_split_wrapper(tsdn, shard, &shard->edata_cache,
+	    ehooks, edata, new_size, szind, slab, shrink_amount, SC_NSIZES,
 	    false);
 	if (trail == NULL) {
 		return true;
diff --git a/src/prof.c b/src/prof.c
index e68694a8..bbf8e9d1 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -229,7 +229,8 @@ prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
 		prof_fetch_sys_thread_name(tsd);
 	}
 
-	edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd), &emap_global, ptr);
+	edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global,
+	    ptr);
 	prof_info_set(tsd, edata, tctx);
 
 	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
diff --git a/src/tcache.c b/src/tcache.c
index 9afc0063..d3453542 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -125,7 +125,7 @@ tbin_edatas_lookup_size_check(tsd_t *tsd, cache_bin_ptr_array_t *arr,
 	size_t szind_sum = binind * nflush;
 	for (unsigned i = 0; i < nflush; i++) {
 		emap_full_alloc_ctx_t full_alloc_ctx;
-		emap_full_alloc_ctx_lookup(tsd_tsdn(tsd), &emap_global,
+		emap_full_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global,
 		    cache_bin_ptr_array_get(arr, i), &full_alloc_ctx);
 		edatas[i] = full_alloc_ctx.edata;
 		szind_sum -= full_alloc_ctx.szind;
@@ -185,7 +185,8 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	} else {
 		for (unsigned i = 0 ; i < nflush; i++) {
 			item_edata[i] = emap_edata_lookup(tsd_tsdn(tsd),
-			    &emap_global, cache_bin_ptr_array_get(&ptrs, i));
+			    &arena_emap_global,
+			    cache_bin_ptr_array_get(&ptrs, i));
 		}
 	}
 
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index a7a23f74..a2cf3e54 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -61,8 +61,8 @@ get_large_size(size_t ind) {
 static size_t
 vsalloc(tsdn_t *tsdn, const void *ptr) {
 	emap_full_alloc_ctx_t full_alloc_ctx;
-	bool missing = emap_full_alloc_ctx_try_lookup(tsdn, &emap_global, ptr,
-	    &full_alloc_ctx);
+	bool missing = emap_full_alloc_ctx_try_lookup(tsdn, &arena_emap_global,
+	    ptr, &full_alloc_ctx);
 	if (missing) {
 		return 0;
 	}
diff --git a/test/unit/binshard.c b/test/unit/binshard.c
index 243a9b3a..040ea54d 100644
--- a/test/unit/binshard.c
+++ b/test/unit/binshard.c
@@ -62,12 +62,12 @@ thd_start(void *varg) {
 		ptr = mallocx(1, MALLOCX_TCACHE_NONE);
 		ptr2 = mallocx(129, MALLOCX_TCACHE_NONE);
 
-		edata = emap_edata_lookup(tsdn, &emap_global, ptr);
+		edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
 		shard1 = edata_binshard_get(edata);
 		dallocx(ptr, 0);
 		expect_u_lt(shard1, 16, "Unexpected bin shard used");
 
-		edata = emap_edata_lookup(tsdn, &emap_global, ptr2);
+		edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr2);
 		shard2 = edata_binshard_get(edata);
 		dallocx(ptr2, 0);
 		expect_u_lt(shard2, 4, "Unexpected bin shard used");
diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index 19ff15fd..4aa9f9e9 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -103,7 +103,7 @@ TEST_END
 static void
 confirm_malloc(void *p) {
 	assert_ptr_not_null(p, "malloc failed unexpectedly");
-	edata_t *e = emap_edata_lookup(TSDN_NULL, &emap_global, p);
+	edata_t *e = emap_edata_lookup(TSDN_NULL, &arena_emap_global, p);
 	assert_ptr_not_null(e, "NULL edata for living pointer");
 	prof_recent_t *n = edata_prof_recent_alloc_get_no_lock(e);
 	assert_ptr_not_null(n, "Record in edata should not be NULL");

From 1a1124462e8c671809535a3dd617f08252a48ce5 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sat, 14 Mar 2020 18:10:29 -0700
Subject: [PATCH 1684/2608] PA: Take zero as a bool rather than as a bool *.

Now that we've moved junking to a higher level of the allocation stack, we don't
care about this performance optimization (which only occurred in debug modes).
---
 include/jemalloc/internal/arena_externs.h |  2 +-
 include/jemalloc/internal/pa.h            |  4 ++--
 src/arena.c                               |  6 ++----
 src/large.c                               | 26 ++---------------------
 src/pa.c                                  | 16 +++++++-------
 5 files changed, 15 insertions(+), 39 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 9fea729d..6e0fe2b6 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -33,7 +33,7 @@ void arena_handle_new_dirty_pages(tsdn_t *tsdn, arena_t *arena);
 size_t arena_slab_regind(edata_t *slab, szind_t binind, const void *ptr);
 #endif
 edata_t *arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena,
-    size_t usize, size_t alignment, bool *zero);
+    size_t usize, size_t alignment, bool zero);
 void arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena,
     edata_t *edata);
 void arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena,
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index b216412f..3e9f1c21 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -195,10 +195,10 @@ size_t pa_shard_extent_sn_next(pa_shard_t *shard);
 
 /* Gets an edata for the given allocation. */
 edata_t *pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size,
-    size_t alignment, bool slab, szind_t szind, bool *zero);
+    size_t alignment, bool slab, szind_t szind, bool zero);
 /* Returns true on error, in which case nothing changed. */
 bool pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
-    size_t new_size, szind_t szind, bool slab, bool *zero);
+    size_t new_size, szind_t szind, bool slab, bool zero);
 /*
  * The same.  Sets *generated_dirty to true if we produced new dirty pages, and
  * false otherwise.
diff --git a/src/arena.c b/src/arena.c
index 2e703088..b983b634 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -347,7 +347,7 @@ arena_large_ralloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t oldusize,
 
 edata_t *
 arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
-    size_t alignment, bool *zero) {
+    size_t alignment, bool zero) {
 	szind_t szind = sz_size2index(usize);
 	size_t esize = usize + sz_large_pad;
 
@@ -736,10 +736,8 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	bool zero = false;
-
 	edata_t *slab = pa_alloc(tsdn, &arena->pa_shard, bin_info->slab_size,
-	    PAGE, /* slab */ true, /* szind */ binind, &zero);
+	    PAGE, /* slab */ true, /* szind */ binind, /* zero */ false);
 
 	if (slab == NULL) {
 		return NULL;
diff --git a/src/large.c b/src/large.c
index 31205dfc..80de716d 100644
--- a/src/large.c
+++ b/src/large.c
@@ -23,7 +23,6 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
     bool zero) {
 	size_t ausize;
 	edata_t *edata;
-	bool is_zeroed;
 	UNUSED bool idump JEMALLOC_CC_SILENCE_INIT(false);
 
 	assert(!tsdn_null(tsdn) || arena != NULL);
@@ -36,17 +35,11 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	if (config_fill && unlikely(opt_zero)) {
 		zero = true;
 	}
-	/*
-	 * Copy zero into is_zeroed and pass the copy when allocating the
-	 * extent, so that it is possible to make correct zero fill decisions
-	 * below, even if is_zeroed ends up true when zero is false.
-	 */
-	is_zeroed = zero;
 	if (likely(!tsdn_null(tsdn))) {
 		arena = arena_choose_maybe_huge(tsdn_tsd(tsdn), arena, usize);
 	}
 	if (unlikely(arena == NULL) || (edata = arena_extent_alloc_large(tsdn,
-	    arena, usize, alignment, &is_zeroed)) == NULL) {
+	    arena, usize, alignment, zero)) == NULL) {
 		return NULL;
 	}
 
@@ -58,10 +51,6 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 		malloc_mutex_unlock(tsdn, &arena->large_mtx);
 	}
 
-	if (zero) {
-		assert(is_zeroed);
-	}
-
 	arena_decay_tick(tsdn, arena);
 	return edata_addr_get(edata);
 }
@@ -99,23 +88,13 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
     bool zero) {
 	arena_t *arena = arena_get_from_edata(edata);
 
-	if (config_fill && unlikely(opt_zero)) {
-		zero = true;
-	}
-
 	size_t old_size = edata_size_get(edata);
 	size_t old_usize = edata_usize_get(edata);
 	size_t new_size = usize + sz_large_pad;
 
-	/*
-	 * Copy zero into is_zeroed_trail and pass the copy when allocating the
-	 * extent, so that it is possible to make correct zero fill decisions
-	 * below, even if is_zeroed_trail ends up true when zero is false.
-	 */
-	bool is_zeroed_trail = zero;
 	szind_t szind = sz_size2index(usize);
 	bool err = pa_expand(tsdn, &arena->pa_shard, edata, old_size, new_size,
-	    szind, /* slab */ false, &is_zeroed_trail);
+	    szind, /* slab */ false, zero);
 	if (err) {
 		return true;
 	}
@@ -137,7 +116,6 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 			assert(nzero > 0);
 			memset(zbase, 0, nzero);
 		}
-		assert(is_zeroed_trail);
 	}
 	arena_extent_ralloc_large_expand(tsdn, arena, edata, old_usize);
 
diff --git a/src/pa.c b/src/pa.c
index 2a581ef3..04762a04 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -112,7 +112,7 @@ pa_shard_may_have_muzzy(pa_shard_t *shard) {
 
 edata_t *
 pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
-    bool slab, szind_t szind, bool *zero) {
+    bool slab, szind_t szind, bool zero) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
@@ -121,16 +121,16 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
 	edata_t *edata = ecache_alloc(tsdn, shard, ehooks,
 	    &shard->ecache_dirty, NULL, size, alignment, slab, szind,
-	    zero);
+	    &zero);
 
 	if (edata == NULL && pa_shard_may_have_muzzy(shard)) {
 		edata = ecache_alloc(tsdn, shard, ehooks, &shard->ecache_muzzy,
-		    NULL, size, alignment, slab, szind, zero);
+		    NULL, size, alignment, slab, szind, &zero);
 	}
 	if (edata == NULL) {
 		edata = ecache_alloc_grow(tsdn, shard, ehooks,
 		    &shard->ecache_retained, NULL, size, alignment, slab,
-		    szind, zero);
+		    szind, &zero);
 		mapped_add = size;
 	}
 	if (edata != NULL) {
@@ -145,7 +145,7 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 
 bool
 pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
-    size_t new_size, szind_t szind, bool slab, bool *zero) {
+    size_t new_size, szind_t szind, bool slab, bool zero) {
 	assert(new_size > old_size);
 	assert(edata_size_get(edata) == old_size);
 	assert((new_size & PAGE_MASK) == 0);
@@ -161,16 +161,16 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	}
 	edata_t *trail = ecache_alloc(tsdn, shard, ehooks, &shard->ecache_dirty,
 	    trail_begin, expand_amount, PAGE, /* slab */ false, SC_NSIZES,
-	    zero);
+	    &zero);
 	if (trail == NULL) {
 		trail = ecache_alloc(tsdn, shard, ehooks, &shard->ecache_muzzy,
 		    trail_begin, expand_amount, PAGE, /* slab */ false,
-		    SC_NSIZES, zero);
+		    SC_NSIZES, &zero);
 	}
 	if (trail == NULL) {
 		trail = ecache_alloc_grow(tsdn, shard, ehooks,
 		    &shard->ecache_retained, trail_begin, expand_amount, PAGE,
-		    /* slab */ false, SC_NSIZES, zero);
+		    /* slab */ false, SC_NSIZES, &zero);
 		mapped_add = expand_amount;
 	}
 	if (trail == NULL) {

From 11c47cb1336491b7f4d21f12eaba45a10af639c3 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sat, 14 Mar 2020 18:19:19 -0700
Subject: [PATCH 1685/2608] Extent: Take "bool zero" over "bool *zero".

---
 include/jemalloc/internal/extent.h |  6 +++---
 src/extent.c                       | 34 ++++++++++++------------------
 src/pa.c                           | 13 ++++++------
 3 files changed, 23 insertions(+), 30 deletions(-)

diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index f5fd8129..9db650fe 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -21,10 +21,10 @@ extern size_t opt_lg_extent_max_active_fit;
 
 edata_t *ecache_alloc(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
-    szind_t szind, bool *zero);
+    szind_t szind, bool zero);
 edata_t *ecache_alloc_grow(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
-    szind_t szind, bool *zero);
+    szind_t szind, bool zero);
 void ecache_dalloc(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata);
 edata_t *ecache_evict(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
@@ -32,7 +32,7 @@ edata_t *ecache_evict(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 
 edata_t *extent_alloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t alignment, bool slab, szind_t szind,
-    bool *zero, bool *commit);
+    bool zero, bool *commit);
 void extent_dalloc_gap(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     edata_t *edata);
 void extent_dalloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
diff --git a/src/extent.c b/src/extent.c
index ae0aa2c9..8cc04478 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -39,7 +39,7 @@ static atomic_zu_t highpages;
 static void extent_deregister(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata);
 static edata_t *extent_recycle(tsdn_t *tsdn, pa_shard_t *shard,
     ehooks_t *ehooks, ecache_t *ecache, void *new_addr, size_t usize,
-    size_t alignment, bool slab, szind_t szind, bool *zero, bool *commit,
+    size_t alignment, bool slab, szind_t szind, bool zero, bool *commit,
     bool growing_retained);
 static edata_t *extent_try_coalesce(tsdn_t *tsdn, pa_shard_t *shard,
     edata_cache_t *edata_cache, ehooks_t *ehooks, ecache_t *ecache,
@@ -48,7 +48,7 @@ static void extent_record(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool growing_retained);
 static edata_t *extent_alloc_retained(tsdn_t *tsdn, pa_shard_t *shard,
     ehooks_t *ehooks, void *new_addr, size_t size, size_t alignment, bool slab,
-    szind_t szind, bool *zero, bool *commit);
+    szind_t szind, bool zero, bool *commit);
 
 /******************************************************************************/
 
@@ -72,7 +72,7 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, pa_shard_t *shard,
 edata_t *
 ecache_alloc(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
-    szind_t szind, bool *zero) {
+    szind_t szind, bool zero) {
 	assert(size != 0);
 	assert(alignment != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -88,7 +88,7 @@ ecache_alloc(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 edata_t *
 ecache_alloc_grow(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
-    szind_t szind, bool *zero) {
+    szind_t szind, bool zero) {
 	assert(size != 0);
 	assert(alignment != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -581,11 +581,11 @@ extent_recycle_split(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 static edata_t *
 extent_recycle(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
-    szind_t szind, bool *zero, bool *commit, bool growing_retained) {
+    szind_t szind, bool zero, bool *commit, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	assert(new_addr == NULL || !slab);
-	assert(!*zero || !slab);
+	assert(!zero || !slab);
 
 	edata_t *edata = extent_recycle_extract(tsdn, shard, ehooks, ecache,
 	    new_addr, size, alignment, slab, growing_retained);
@@ -611,9 +611,6 @@ extent_recycle(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	if (edata_committed_get(edata)) {
 		*commit = true;
 	}
-	if (edata_zeroed_get(edata)) {
-		*zero = true;
-	}
 
 	assert(edata_state_get(edata) == extent_state_active);
 	if (slab) {
@@ -621,7 +618,7 @@ extent_recycle(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		emap_register_interior(tsdn, shard->emap, edata, szind);
 	}
 
-	if (*zero) {
+	if (zero) {
 		void *addr = edata_base_get(edata);
 		if (!edata_zeroed_get(edata)) {
 			size_t size = edata_size_get(edata);
@@ -639,9 +636,9 @@ extent_recycle(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 static edata_t *
 extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     size_t size, size_t alignment, bool slab, szind_t szind,
-    bool *zero, bool *commit) {
+    bool zero, bool *commit) {
 	malloc_mutex_assert_owner(tsdn, &shard->ecache_grow.mtx);
-	assert(!*zero || !slab);
+	assert(!zero || !slab);
 
 	size_t alloc_size_min = size + PAGE_CEILING(alignment) - PAGE;
 	/* Beware size_t wrap-around. */
@@ -690,9 +687,6 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		goto label_err;
 	}
 
-	if (edata_zeroed_get(edata) && edata_committed_get(edata)) {
-		*zero = true;
-	}
 	if (edata_committed_get(edata)) {
 		*commit = true;
 	}
@@ -775,7 +769,7 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		edata_slab_set(edata, true);
 		emap_register_interior(tsdn, shard->emap, edata, szind);
 	}
-	if (*zero && !edata_zeroed_get(edata)) {
+	if (zero && !edata_zeroed_get(edata)) {
 		void *addr = edata_base_get(edata);
 		size_t size = edata_size_get(edata);
 		ehooks_zero(tsdn, ehooks, addr, size);
@@ -790,7 +784,7 @@ label_err:
 static edata_t *
 extent_alloc_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t alignment, bool slab, szind_t szind,
-    bool *zero, bool *commit) {
+    bool zero, bool *commit) {
 	assert(size != 0);
 	assert(alignment != 0);
 
@@ -819,7 +813,7 @@ extent_alloc_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 edata_t *
 extent_alloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t alignment, bool slab,
-    szind_t szind, bool *zero, bool *commit) {
+    szind_t szind, bool zero, bool *commit) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
@@ -829,14 +823,14 @@ extent_alloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	}
 	size_t palignment = ALIGNMENT_CEILING(alignment, PAGE);
 	void *addr = ehooks_alloc(tsdn, ehooks, new_addr, size, palignment,
-	    zero, commit);
+	    &zero, commit);
 	if (addr == NULL) {
 		edata_cache_put(tsdn, &shard->edata_cache, edata);
 		return NULL;
 	}
 	edata_init(edata, ecache_ind_get(&shard->ecache_dirty), addr,
 	    size, slab, szind, pa_shard_extent_sn_next(shard),
-	    extent_state_active, *zero, *commit, /* ranged */ false,
+	    extent_state_active, zero, *commit, /* ranged */ false,
 	    EXTENT_NOT_HEAD);
 	if (extent_register(tsdn, shard, edata)) {
 		edata_cache_put(tsdn, &shard->edata_cache, edata);
diff --git a/src/pa.c b/src/pa.c
index 04762a04..b4a1e5be 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -120,17 +120,16 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
 	edata_t *edata = ecache_alloc(tsdn, shard, ehooks,
-	    &shard->ecache_dirty, NULL, size, alignment, slab, szind,
-	    &zero);
+	    &shard->ecache_dirty, NULL, size, alignment, slab, szind, zero);
 
 	if (edata == NULL && pa_shard_may_have_muzzy(shard)) {
 		edata = ecache_alloc(tsdn, shard, ehooks, &shard->ecache_muzzy,
-		    NULL, size, alignment, slab, szind, &zero);
+		    NULL, size, alignment, slab, szind, zero);
 	}
 	if (edata == NULL) {
 		edata = ecache_alloc_grow(tsdn, shard, ehooks,
 		    &shard->ecache_retained, NULL, size, alignment, slab,
-		    szind, &zero);
+		    szind, zero);
 		mapped_add = size;
 	}
 	if (edata != NULL) {
@@ -161,16 +160,16 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	}
 	edata_t *trail = ecache_alloc(tsdn, shard, ehooks, &shard->ecache_dirty,
 	    trail_begin, expand_amount, PAGE, /* slab */ false, SC_NSIZES,
-	    &zero);
+	    zero);
 	if (trail == NULL) {
 		trail = ecache_alloc(tsdn, shard, ehooks, &shard->ecache_muzzy,
 		    trail_begin, expand_amount, PAGE, /* slab */ false,
-		    SC_NSIZES, &zero);
+		    SC_NSIZES, zero);
 	}
 	if (trail == NULL) {
 		trail = ecache_alloc_grow(tsdn, shard, ehooks,
 		    &shard->ecache_retained, trail_begin, expand_amount, PAGE,
-		    /* slab */ false, SC_NSIZES, &zero);
+		    /* slab */ false, SC_NSIZES, zero);
 		mapped_add = expand_amount;
 	}
 	if (trail == NULL) {

From a4759a1911a6dbb5709302ab5ba94cc1b6322e63 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sun, 15 Mar 2020 09:14:29 -0700
Subject: [PATCH 1686/2608] Ehooks: avoid touching arena_emap_global in tests.

That breaks our ability to test custom emaps in isolation.
---
 src/ehooks.c | 36 +++++-------------------------------
 1 file changed, 5 insertions(+), 31 deletions(-)

diff --git a/src/ehooks.c b/src/ehooks.c
index 1016c3e9..f2525e12 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -187,35 +187,6 @@ ehooks_default_split(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	return ehooks_default_split_impl();
 }
 
-static inline bool
-ehooks_same_sn(tsdn_t *tsdn, void *addr_a, void *addr_b) {
-	edata_t *a = emap_edata_lookup(tsdn, &arena_emap_global, addr_a);
-	edata_t *b = emap_edata_lookup(tsdn, &arena_emap_global, addr_b);
-	return edata_sn_comp(a, b) == 0;
-}
-
-/*
- * Returns true if the given extents can't be merged because of their head bit
- * settings.  Assumes the second extent has the higher address.
- */
-static bool
-ehooks_no_merge_heads(tsdn_t *tsdn, void *addr_a, bool head_a, void *addr_b,
-    bool head_b) {
-	/* If b is a head extent, disallow the cross-region merge. */
-	if (head_b) {
-		/*
-		 * Additionally, sn should not overflow with retain; sanity
-		 * check that different regions have unique sn.
-		 */
-		assert(!ehooks_same_sn(tsdn, addr_a, addr_b));
-		return true;
-	}
-	assert(ehooks_same_sn(tsdn, addr_a, addr_b) || (have_dss &&
-	    (extent_in_dss(addr_a) || extent_in_dss(addr_b))));
-
-	return false;
-}
-
 bool
 ehooks_default_merge_impl(tsdn_t *tsdn, void *addr_a, bool head_a, void *addr_b,
     bool head_b) {
@@ -238,8 +209,11 @@ ehooks_default_merge_impl(tsdn_t *tsdn, void *addr_a, bool head_a, void *addr_b,
 	if (!maps_coalesce && !opt_retain) {
 		return true;
 	}
-	if (opt_retain && ehooks_no_merge_heads(tsdn, addr_a, head_a, addr_b,
-	    head_b)) {
+	/*
+	 * Don't merge across mappings when retain is on -- this preserves
+	 * first-fit ordering.
+	 */
+	if (opt_retain && head_b) {
 		return true;
 	}
 	if (have_dss && !extent_dss_mergeable(addr_a, addr_b)) {

From 93b99dd14054886f3d25305b08b8c0f75f289fc4 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sun, 15 Mar 2020 09:53:09 -0700
Subject: [PATCH 1687/2608] Extent: Stop passing an edata_cache everywhere.

We already pass the pa_shard_t around everywhere; we can just use that.
---
 include/jemalloc/internal/extent.h |   6 +-
 src/extent.c                       | 103 ++++++++++++++---------------
 src/pa.c                           |   8 +--
 3 files changed, 54 insertions(+), 63 deletions(-)

diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 9db650fe..bec21d6a 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -48,10 +48,10 @@ bool extent_purge_lazy_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 bool extent_purge_forced_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length);
 edata_t *extent_split_wrapper(tsdn_t *tsdn, pa_shard_t *shard,
-    edata_cache_t *edata_cache, ehooks_t *ehooks, edata_t *edata, size_t size_a,
-    szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b);
+    ehooks_t *ehooks, edata_t *edata, size_t size_a, szind_t szind_a,
+    bool slab_a, size_t size_b, szind_t szind_b, bool slab_b);
 bool extent_merge_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    edata_cache_t *edata_cache, edata_t *a, edata_t *b);
+    edata_t *a, edata_t *b);
 
 bool extent_boot(void);
 
diff --git a/src/extent.c b/src/extent.c
index 8cc04478..889857eb 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -20,11 +20,11 @@ static bool extent_purge_lazy_impl(tsdn_t *tsdn, ehooks_t *ehooks,
 static bool extent_purge_forced_impl(tsdn_t *tsdn, ehooks_t *ehooks,
     edata_t *edata, size_t offset, size_t length, bool growing_retained);
 static edata_t *extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard,
-    edata_cache_t *edata_cache, ehooks_t *ehooks, edata_t *edata, size_t size_a,
-    szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
+    ehooks_t *ehooks, edata_t *edata, size_t size_a, szind_t szind_a,
+    bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
     bool growing_retained);
 static bool extent_merge_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    edata_cache_t *edata_cache, edata_t *a, edata_t *b, bool growing_retained);
+    edata_t *a, edata_t *b, bool growing_retained);
 
 /* Used exclusively for gdump triggering. */
 static atomic_zu_t curpages;
@@ -42,8 +42,8 @@ static edata_t *extent_recycle(tsdn_t *tsdn, pa_shard_t *shard,
     size_t alignment, bool slab, szind_t szind, bool zero, bool *commit,
     bool growing_retained);
 static edata_t *extent_try_coalesce(tsdn_t *tsdn, pa_shard_t *shard,
-    edata_cache_t *edata_cache, ehooks_t *ehooks, ecache_t *ecache,
-    edata_t *edata, bool *coalesced, bool growing_retained);
+    ehooks_t *ehooks, ecache_t *ecache, edata_t *edata, bool *coalesced,
+    bool growing_retained);
 static void extent_record(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool growing_retained);
 static edata_t *extent_alloc_retained(tsdn_t *tsdn, pa_shard_t *shard,
@@ -54,11 +54,10 @@ static edata_t *extent_alloc_retained(tsdn_t *tsdn, pa_shard_t *shard,
 
 static bool
 extent_try_delayed_coalesce(tsdn_t *tsdn, pa_shard_t *shard,
-    edata_cache_t *edata_cache, ehooks_t *ehooks, ecache_t *ecache,
-    edata_t *edata) {
+    ehooks_t *ehooks, ecache_t *ecache, edata_t *edata) {
 	edata_state_set(edata, extent_state_active);
 	bool coalesced;
-	edata = extent_try_coalesce(tsdn, shard, edata_cache, ehooks, ecache,
+	edata = extent_try_coalesce(tsdn, shard, ehooks, ecache,
 	    edata, &coalesced, false);
 	edata_state_set(edata, ecache->state);
 
@@ -157,8 +156,8 @@ ecache_evict(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 			break;
 		}
 		/* Try to coalesce. */
-		if (extent_try_delayed_coalesce(tsdn, shard,
-		    &shard->edata_cache, ehooks, ecache, edata)) {
+		if (extent_try_delayed_coalesce(tsdn, shard, ehooks, ecache,
+		    edata)) {
 			break;
 		}
 		/*
@@ -482,9 +481,9 @@ extent_split_interior(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	/* Split the lead. */
 	if (leadsize != 0) {
 		*lead = *edata;
-		*edata = extent_split_impl(tsdn, shard, &shard->edata_cache,
-		    ehooks, *lead, leadsize, SC_NSIZES, false, size + trailsize,
-		    szind, slab, growing_retained);
+		*edata = extent_split_impl(tsdn, shard, ehooks, *lead, leadsize,
+		    SC_NSIZES, false, size + trailsize, szind, slab,
+		    growing_retained);
 		if (*edata == NULL) {
 			*to_leak = *lead;
 			*lead = NULL;
@@ -494,9 +493,8 @@ extent_split_interior(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 
 	/* Split the trail. */
 	if (trailsize != 0) {
-		*trail = extent_split_impl(tsdn, shard, &shard->edata_cache,
-		    ehooks, *edata, size, szind, slab, trailsize, SC_NSIZES,
-		    false, growing_retained);
+		*trail = extent_split_impl(tsdn, shard, ehooks, *edata, size,
+		    szind, slab, trailsize, SC_NSIZES, false, growing_retained);
 		if (*trail == NULL) {
 			*to_leak = *edata;
 			*to_salvage = *lead;
@@ -862,15 +860,15 @@ extent_can_coalesce(ecache_t *ecache, const edata_t *inner,
 }
 
 static bool
-extent_coalesce(tsdn_t *tsdn, pa_shard_t *shard, edata_cache_t *edata_cache,
-    ehooks_t *ehooks, ecache_t *ecache, edata_t *inner, edata_t *outer,
-    bool forward, bool growing_retained) {
+extent_coalesce(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+    ecache_t *ecache, edata_t *inner, edata_t *outer, bool forward,
+    bool growing_retained) {
 	assert(extent_can_coalesce(ecache, inner, outer));
 
 	extent_activate_locked(tsdn, ecache, outer);
 
 	malloc_mutex_unlock(tsdn, &ecache->mtx);
-	bool err = extent_merge_impl(tsdn, shard, ehooks, edata_cache,
+	bool err = extent_merge_impl(tsdn, shard, ehooks,
 	    forward ? inner : outer, forward ? outer : inner, growing_retained);
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 
@@ -882,9 +880,8 @@ extent_coalesce(tsdn_t *tsdn, pa_shard_t *shard, edata_cache_t *edata_cache,
 }
 
 static edata_t *
-extent_try_coalesce_impl(tsdn_t *tsdn, pa_shard_t *shard,
-    edata_cache_t *edata_cache, ehooks_t *ehooks, ecache_t *ecache,
-    edata_t *edata, bool *coalesced, bool growing_retained,
+extent_try_coalesce_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+    ecache_t *ecache, edata_t *edata, bool *coalesced, bool growing_retained,
     bool inactive_only) {
 	/*
 	 * We avoid checking / locking inactive neighbors for large size
@@ -914,7 +911,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pa_shard_t *shard,
 			emap_unlock_edata(tsdn, shard->emap, next);
 
 			if (can_coalesce && !extent_coalesce(tsdn, shard,
-			    edata_cache, ehooks, ecache, edata, next, true,
+			    ehooks, ecache, edata, next, true,
 			    growing_retained)) {
 				if (ecache->delay_coalesce) {
 					/* Do minimal coalescing. */
@@ -934,7 +931,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pa_shard_t *shard,
 			emap_unlock_edata(tsdn, shard->emap, prev);
 
 			if (can_coalesce && !extent_coalesce(tsdn, shard,
-			    edata_cache, ehooks, ecache, edata, prev, false,
+			    ehooks, ecache, edata, prev, false,
 			    growing_retained)) {
 				edata = prev;
 				if (ecache->delay_coalesce) {
@@ -954,19 +951,17 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pa_shard_t *shard,
 }
 
 static edata_t *
-extent_try_coalesce(tsdn_t *tsdn, pa_shard_t *shard, edata_cache_t *edata_cache,
-    ehooks_t *ehooks, ecache_t *ecache, edata_t *edata, bool *coalesced,
-    bool growing_retained) {
-	return extent_try_coalesce_impl(tsdn, shard, edata_cache, ehooks,
-	    ecache, edata, coalesced, growing_retained, false);
+extent_try_coalesce(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+    ecache_t *ecache, edata_t *edata, bool *coalesced, bool growing_retained) {
+	return extent_try_coalesce_impl(tsdn, shard, ehooks, ecache, edata,
+	    coalesced, growing_retained, false);
 }
 
 static edata_t *
-extent_try_coalesce_large(tsdn_t *tsdn, pa_shard_t *shard,
-    edata_cache_t *edata_cache, ehooks_t *ehooks, ecache_t *ecache,
-    edata_t *edata, bool *coalesced, bool growing_retained) {
-	return extent_try_coalesce_impl(tsdn, shard, edata_cache, ehooks,
-	    ecache, edata, coalesced, growing_retained, true);
+extent_try_coalesce_large(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+    ecache_t *ecache, edata_t *edata, bool *coalesced, bool growing_retained) {
+	return extent_try_coalesce_impl(tsdn, shard, ehooks, ecache, edata,
+	    coalesced, growing_retained, true);
 }
 
 /* Purge a single extent to retained / unmapped directly. */
@@ -1014,17 +1009,16 @@ extent_record(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	emap_assert_mapped(tsdn, shard->emap, edata);
 
 	if (!ecache->delay_coalesce) {
-		edata = extent_try_coalesce(tsdn, shard, &shard->edata_cache,
-		    ehooks, ecache, edata, NULL, growing_retained);
+		edata = extent_try_coalesce(tsdn, shard,  ehooks, ecache, edata,
+		    NULL, growing_retained);
 	} else if (edata_size_get(edata) >= SC_LARGE_MINCLASS) {
 		assert(ecache == &shard->ecache_dirty);
 		/* Always coalesce large extents eagerly. */
 		bool coalesced;
 		do {
 			assert(edata_state_get(edata) == extent_state_active);
-			edata = extent_try_coalesce_large(tsdn, shard,
-			    &shard->edata_cache, ehooks, ecache, edata,
-			    &coalesced, growing_retained);
+			edata = extent_try_coalesce_large(tsdn, shard, ehooks,
+			    ecache, edata, &coalesced, growing_retained);
 		} while (coalesced);
 		if (edata_size_get(edata) >= oversize_threshold &&
 		    pa_shard_may_force_decay(shard)) {
@@ -1213,10 +1207,9 @@ extent_purge_forced_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
  * and returns the trail (except in case of error).
  */
 static edata_t *
-extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard, edata_cache_t *edata_cache,
-    ehooks_t *ehooks, edata_t *edata, size_t size_a, szind_t szind_a,
-    bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
-    bool growing_retained) {
+extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+    edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a, size_t size_b,
+    szind_t szind_b, bool slab_b, bool growing_retained) {
 	assert(edata_size_get(edata) == size_a + size_b);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
@@ -1225,7 +1218,7 @@ extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard, edata_cache_t *edata_cache,
 		return NULL;
 	}
 
-	edata_t *trail = edata_cache_get(tsdn, edata_cache);
+	edata_t *trail = edata_cache_get(tsdn, &shard->edata_cache);
 	if (trail == NULL) {
 		goto label_error_a;
 	}
@@ -1262,22 +1255,22 @@ extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard, edata_cache_t *edata_cache,
 label_error_c:
 	emap_unlock_edata2(tsdn, shard->emap, edata, trail);
 label_error_b:
-	edata_cache_put(tsdn, edata_cache, trail);
+	edata_cache_put(tsdn, &shard->edata_cache, trail);
 label_error_a:
 	return NULL;
 }
 
 edata_t *
 extent_split_wrapper(tsdn_t *tsdn, pa_shard_t *shard,
-    edata_cache_t *edata_cache, ehooks_t *ehooks, edata_t *edata, size_t size_a,
-    szind_t szind_a, bool slab_a, size_t size_b, szind_t szind_b, bool slab_b) {
-	return extent_split_impl(tsdn, shard, edata_cache, ehooks, edata,
-	    size_a, szind_a, slab_a, size_b, szind_b, slab_b, false);
+    ehooks_t *ehooks, edata_t *edata, size_t size_a, szind_t szind_a,
+    bool slab_a, size_t size_b, szind_t szind_b, bool slab_b) {
+	return extent_split_impl(tsdn, shard, ehooks, edata, size_a, szind_a,
+	    slab_a, size_b, szind_b, slab_b, false);
 }
 
 static bool
-extent_merge_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    edata_cache_t *edata_cache, edata_t *a, edata_t *b, bool growing_retained) {
+extent_merge_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks, edata_t *a,
+    edata_t *b, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	assert(edata_base_get(a) < edata_base_get(b));
@@ -1312,15 +1305,15 @@ extent_merge_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	emap_merge_commit(tsdn, shard->emap, &prepare, a, b);
 	emap_unlock_edata2(tsdn, shard->emap, a, b);
 
-	edata_cache_put(tsdn, edata_cache, b);
+	edata_cache_put(tsdn, &shard->edata_cache, b);
 
 	return false;
 }
 
 bool
 extent_merge_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    edata_cache_t *edata_cache, edata_t *a, edata_t *b) {
-	return extent_merge_impl(tsdn, shard, ehooks, edata_cache, a, b, false);
+    edata_t *a, edata_t *b) {
+	return extent_merge_impl(tsdn, shard, ehooks, a, b, false);
 }
 
 bool
diff --git a/src/pa.c b/src/pa.c
index b4a1e5be..78ff3481 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -175,8 +175,7 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	if (trail == NULL) {
 		return true;
 	}
-	if (extent_merge_wrapper(tsdn, shard, ehooks, &shard->edata_cache,
-	    edata, trail)) {
+	if (extent_merge_wrapper(tsdn, shard, ehooks, edata, trail)) {
 		extent_dalloc_wrapper(tsdn, shard, ehooks, trail);
 		return true;
 	}
@@ -205,9 +204,8 @@ pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 		return true;
 	}
 
-	edata_t *trail = extent_split_wrapper(tsdn, shard, &shard->edata_cache,
-	    ehooks, edata, new_size, szind, slab, shrink_amount, SC_NSIZES,
-	    false);
+	edata_t *trail = extent_split_wrapper(tsdn, shard, ehooks, edata,
+	    new_size, szind, slab, shrink_amount, SC_NSIZES, false);
 	if (trail == NULL) {
 		return true;
 	}

From dc26b3009450aadaffdf2f3e91ff5c41548796d4 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sun, 15 Mar 2020 15:49:42 -0700
Subject: [PATCH 1688/2608] Rtree: Clean up compact/non-compact split.

---
 include/jemalloc/internal/rtree.h | 209 ++++++++++--------------------
 src/emap.c                        |  24 ++--
 test/unit/rtree.c                 |   6 +-
 3 files changed, 83 insertions(+), 156 deletions(-)

diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 1c2715d0..46c58f97 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -43,13 +43,18 @@ struct rtree_node_elm_s {
 	atomic_p_t	child; /* (rtree_{node,leaf}_elm_t *) */
 };
 
-typedef struct rtree_leaf_elm_contents_s rtree_leaf_elm_contents_t;
-struct rtree_leaf_elm_contents_s {
-	edata_t *edata;
+typedef struct rtree_metadata_s rtree_metadata_t;
+struct rtree_metadata_s {
 	szind_t szind;
 	bool slab;
 };
 
+typedef struct rtree_contents_s rtree_contents_t;
+struct rtree_contents_s {
+	edata_t *edata;
+	rtree_metadata_t metadata;
+};
+
 struct rtree_leaf_elm_s {
 #ifdef RTREE_LEAF_COMPACT
 	/*
@@ -67,8 +72,11 @@ struct rtree_leaf_elm_s {
 	atomic_p_t	le_bits;
 #else
 	atomic_p_t	le_edata; /* (edata_t *) */
-	atomic_u_t	le_szind; /* (szind_t) */
-	atomic_b_t	le_slab; /* (bool) */
+	/*
+	 * slab is stored in the low bit; szind is stored in the next lowest
+	 * bits.
+	 */
+	atomic_u_t	le_metadata;
 #endif
 };
 
@@ -171,25 +179,25 @@ rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree,
 }
 
 JEMALLOC_ALWAYS_INLINE uintptr_t
-rtree_leaf_elm_bits_encode(rtree_leaf_elm_contents_t contents) {
+rtree_leaf_elm_bits_encode(rtree_contents_t contents) {
 	uintptr_t edata_bits = (uintptr_t)contents.edata
 	    & (((uintptr_t)1 << LG_VADDR) - 1);
-	uintptr_t szind_bits = (uintptr_t)contents.szind << LG_VADDR;
+	uintptr_t szind_bits = (uintptr_t)contents.metadata.szind << LG_VADDR;
 	/*
 	 * Slab shares the low bit of edata; we know edata is on an even address
 	 * (in fact, it's 128 bytes on 64-bit systems; we can enforce this
 	 * alignment if we want to steal 6 extra rtree leaf bits someday.
 	 */
-	uintptr_t slab_bits = (uintptr_t)contents.slab;
+	uintptr_t slab_bits = (uintptr_t)contents.metadata.slab;
 	return szind_bits | edata_bits | slab_bits;
 }
 
-JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_contents_t
+JEMALLOC_ALWAYS_INLINE rtree_contents_t
 rtree_leaf_elm_bits_decode(uintptr_t bits) {
-	rtree_leaf_elm_contents_t contents;
+	rtree_contents_t contents;
 	/* Do the easy things first. */
-	contents.szind = bits >> LG_VADDR;
-	contents.slab = (bool)(bits & 1);
+	contents.metadata.szind = bits >> LG_VADDR;
+	contents.metadata.slab = (bool)(bits & 1);
 #    ifdef __aarch64__
 	/*
 	 * aarch64 doesn't sign extend the highest virtual address bit to set
@@ -210,109 +218,42 @@ rtree_leaf_elm_bits_decode(uintptr_t bits) {
 
 #  endif /* RTREE_LEAF_COMPACT */
 
-JEMALLOC_ALWAYS_INLINE edata_t *
-rtree_leaf_elm_edata_read(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, bool dependent) {
+JEMALLOC_ALWAYS_INLINE rtree_contents_t
+rtree_leaf_elm_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
+    bool dependent) {
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
-	rtree_leaf_elm_contents_t contents = rtree_leaf_elm_bits_decode(bits);
-	return contents.edata;
+	rtree_contents_t contents = rtree_leaf_elm_bits_decode(bits);
+	return contents;
 #else
-	edata_t *edata = (edata_t *)atomic_load_p(&elm->le_edata, dependent
+	rtree_contents_t contents;
+	unsigned metadata_bits = atomic_load_u(&elm->le_metadata, dependent
 	    ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
-	return edata;
-#endif
-}
+	contents.metadata.slab = (bool)(metadata_bits & 1);
+	contents.metadata.szind = (metadata_bits >> 1);
 
-JEMALLOC_ALWAYS_INLINE szind_t
-rtree_leaf_elm_szind_read(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, bool dependent) {
-#ifdef RTREE_LEAF_COMPACT
-	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
-	rtree_leaf_elm_contents_t contents = rtree_leaf_elm_bits_decode(bits);
-	return contents.szind;
-#else
-	return (szind_t)atomic_load_u(&elm->le_szind, dependent ? ATOMIC_RELAXED
-	    : ATOMIC_ACQUIRE);
-#endif
-}
+	contents.edata = (edata_t *)atomic_load_p(&elm->le_edata, dependent
+	    ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
 
-JEMALLOC_ALWAYS_INLINE bool
-rtree_leaf_elm_slab_read(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, bool dependent) {
-#ifdef RTREE_LEAF_COMPACT
-	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
-	rtree_leaf_elm_contents_t contents = rtree_leaf_elm_bits_decode(bits);
-	return contents.slab;
-#else
-	return atomic_load_b(&elm->le_slab, dependent ? ATOMIC_RELAXED :
-	    ATOMIC_ACQUIRE);
-#endif
-}
-
-static inline void
-rtree_leaf_elm_edata_write(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, edata_t *edata) {
-#ifdef RTREE_LEAF_COMPACT
-	uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, true);
-	rtree_leaf_elm_contents_t contents = rtree_leaf_elm_bits_decode(
-	    old_bits);
-	contents.edata = edata;
-	uintptr_t bits = rtree_leaf_elm_bits_encode(contents);
-	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
-#else
-	atomic_store_p(&elm->le_edata, edata, ATOMIC_RELEASE);
-#endif
-}
-
-static inline void
-rtree_leaf_elm_szind_write(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, szind_t szind) {
-	assert(szind <= SC_NSIZES);
-
-#ifdef RTREE_LEAF_COMPACT
-	uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm,
-	    true);
-	rtree_leaf_elm_contents_t contents = rtree_leaf_elm_bits_decode(
-	    old_bits);
-	contents.szind = szind;
-	uintptr_t bits = rtree_leaf_elm_bits_encode(contents);
-	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
-#else
-	atomic_store_u(&elm->le_szind, szind, ATOMIC_RELEASE);
-#endif
-}
-
-static inline void
-rtree_leaf_elm_slab_write(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, bool slab) {
-#ifdef RTREE_LEAF_COMPACT
-	uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm,
-	    true);
-	rtree_leaf_elm_contents_t contents = rtree_leaf_elm_bits_decode(
-	    old_bits);
-	contents.slab = slab;
-	uintptr_t bits = rtree_leaf_elm_bits_encode(contents);
-	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
-#else
-	atomic_store_b(&elm->le_slab, slab, ATOMIC_RELEASE);
+	return contents;
 #endif
 }
 
 static inline void
 rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, rtree_leaf_elm_contents_t contents) {
+    rtree_leaf_elm_t *elm, rtree_contents_t contents) {
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t bits = rtree_leaf_elm_bits_encode(contents);
 	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
 #else
-	rtree_leaf_elm_slab_write(tsdn, rtree, elm, slab);
-	rtree_leaf_elm_szind_write(tsdn, rtree, elm, szind);
+	unsigned metadata_bits = ((unsigned)contents.metadata.slab
+	    | ((unsigned)contents.metadata.szind << 1));
+	atomic_store_u(&elm->le_metadata, metadata_bits, ATOMIC_RELEASE);
 	/*
 	 * Write edata last, since the element is atomically considered valid
 	 * as soon as the edata field is non-NULL.
 	 */
-	rtree_leaf_elm_edata_write(tsdn, rtree, elm, edata);
+	atomic_store_p(&elm->le_edata, contents.edata, ATOMIC_RELEASE);
 #endif
 }
 
@@ -320,13 +261,15 @@ static inline void
 rtree_leaf_elm_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree,
     rtree_leaf_elm_t *elm, szind_t szind, bool slab) {
 	assert(!slab || szind < SC_NBINS);
-
+	rtree_contents_t contents = rtree_leaf_elm_read(
+	    tsdn, rtree, elm, /* dependent */ true);
 	/*
 	 * The caller implicitly assures that it is the only writer to the szind
 	 * and slab fields, and that the edata field cannot currently change.
 	 */
-	rtree_leaf_elm_slab_write(tsdn, rtree, elm, slab);
-	rtree_leaf_elm_szind_write(tsdn, rtree, elm, szind);
+	contents.metadata.slab = slab;
+	contents.metadata.szind = szind;
+	rtree_leaf_elm_write(tsdn, rtree, elm, contents);
 }
 
 JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_t *
@@ -400,11 +343,11 @@ rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
 		return true;
 	}
 
-	assert(rtree_leaf_elm_edata_read(tsdn, rtree, elm, false) == NULL);
-	rtree_leaf_elm_contents_t contents;
+	assert(rtree_leaf_elm_read(tsdn, rtree, elm, false).edata == NULL);
+	rtree_contents_t contents;
 	contents.edata = edata;
-	contents.szind = szind;
-	contents.slab = slab;
+	contents.metadata.szind = szind;
+	contents.metadata.slab = slab;
 	rtree_leaf_elm_write(tsdn, rtree, elm, contents);
 
 	return false;
@@ -430,7 +373,7 @@ rtree_edata_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	if (!dependent && elm == NULL) {
 		return NULL;
 	}
-	return rtree_leaf_elm_edata_read(tsdn, rtree, elm, dependent);
+	return rtree_leaf_elm_read(tsdn, rtree, elm, dependent).edata;
 }
 
 JEMALLOC_ALWAYS_INLINE szind_t
@@ -441,7 +384,7 @@ rtree_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	if (!dependent && elm == NULL) {
 		return SC_NSIZES;
 	}
-	return rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent);
+	return rtree_leaf_elm_read(tsdn, rtree, elm, dependent).metadata.szind;
 }
 
 /*
@@ -458,18 +401,12 @@ rtree_edata_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree,
 	if (!dependent && elm == NULL) {
 		return true;
 	}
-#ifdef RTREE_LEAF_COMPACT
-	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
-	rtree_leaf_elm_contents_t contents = rtree_leaf_elm_bits_decode(bits);
-
+	rtree_contents_t contents = rtree_leaf_elm_read(tsdn, rtree, elm,
+	    dependent);
 	*r_edata = contents.edata;
-	*r_szind = contents.szind;
-	*r_slab = contents.slab;
-#else
-	*r_edata = rtree_leaf_elm_edata_read(tsdn, rtree, elm, dependent);
-	*r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent);
-	*r_slab = rtree_leaf_elm_slab_read(tsdn, rtree, elm, dependent);
-#endif
+	*r_szind = contents.metadata.szind;
+	*r_slab = contents.metadata.slab;
+
 	return false;
 }
 
@@ -495,22 +432,16 @@ rtree_szind_slab_read_fast(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 		uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1);
 		elm = &leaf[subkey];
 
-#ifdef RTREE_LEAF_COMPACT
-		uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree,
-							  elm, true);
-		rtree_leaf_elm_contents_t contents = rtree_leaf_elm_bits_decode(
-		    bits);
-		*r_szind = contents.szind;
-		*r_slab = contents.slab;
-#else
-		*r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, true);
-		*r_slab = rtree_leaf_elm_slab_read(tsdn, rtree, elm, true);
-#endif
+		rtree_contents_t contents = rtree_leaf_elm_read(tsdn, rtree,
+		    elm, /* dependent */ true);
+		*r_szind = contents.metadata.szind;
+		*r_slab = contents.metadata.slab;
 		return true;
 	} else {
 		return false;
 	}
 }
+
 JEMALLOC_ALWAYS_INLINE bool
 rtree_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key, bool dependent, szind_t *r_szind, bool *r_slab) {
@@ -519,15 +450,11 @@ rtree_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	if (!dependent && elm == NULL) {
 		return true;
 	}
-#ifdef RTREE_LEAF_COMPACT
-	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
-	rtree_leaf_elm_contents_t contents = rtree_leaf_elm_bits_decode(bits);
-	*r_szind = contents.szind;
-	*r_slab = contents.slab;
-#else
-	*r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent);
-	*r_slab = rtree_leaf_elm_slab_read(tsdn, rtree, elm, dependent);
-#endif
+	rtree_contents_t contents = rtree_leaf_elm_read(tsdn, rtree, elm,
+	    /* dependent */ true);
+	*r_szind = contents.metadata.szind;
+	*r_slab = contents.metadata.slab;
+
 	return false;
 }
 
@@ -544,12 +471,12 @@ static inline void
 rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key) {
 	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, true);
-	assert(rtree_leaf_elm_edata_read(tsdn, rtree, elm, false) !=
-	    NULL);
-	rtree_leaf_elm_contents_t contents;
+	assert(rtree_leaf_elm_read(tsdn, rtree, elm,
+	    /* dependent */ false).edata != NULL);
+	rtree_contents_t contents;
 	contents.edata = NULL;
-	contents.szind = SC_NSIZES;
-	contents.slab = false;
+	contents.metadata.szind = SC_NSIZES;
+	contents.metadata.slab = false;
 	rtree_leaf_elm_write(tsdn, rtree, elm, contents);
 }
 
diff --git a/src/emap.c b/src/emap.c
index 24d61212..0d10c79e 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -65,12 +65,12 @@ emap_unlock_edata2(tsdn_t *tsdn, emap_t *emap, edata_t *edata1,
 static inline emap_lock_result_t
 emap_try_lock_rtree_leaf_elm(tsdn_t *tsdn, emap_t *emap, rtree_leaf_elm_t *elm,
     edata_t **result, bool inactive_only) {
-	edata_t *edata1 = rtree_leaf_elm_edata_read(tsdn, &emap->rtree,
-	    elm, true);
+	edata_t *edata1 = rtree_leaf_elm_read(tsdn, &emap->rtree, elm,
+	    /* dependent */ true).edata;
 
 	/* Slab implies active extents and should be skipped. */
-	if (edata1 == NULL || (inactive_only && rtree_leaf_elm_slab_read(tsdn,
-	    &emap->rtree, elm, true))) {
+	if (edata1 == NULL || (inactive_only && rtree_leaf_elm_read(tsdn,
+	    &emap->rtree, elm, /* dependent */ true).metadata.slab)) {
 		return emap_lock_result_no_extent;
 	}
 
@@ -79,8 +79,8 @@ emap_try_lock_rtree_leaf_elm(tsdn_t *tsdn, emap_t *emap, rtree_leaf_elm_t *elm,
 	 * the leaf->edata mapping.  We have to recheck while holding the lock.
 	 */
 	emap_lock_edata(tsdn, emap, edata1);
-	edata_t *edata2 = rtree_leaf_elm_edata_read(tsdn, &emap->rtree, elm,
-	    true);
+	edata_t *edata2 = rtree_leaf_elm_read(tsdn, &emap->rtree, elm,
+	    /* dependent */ true).edata;
 
 	if (edata1 == edata2) {
 		*result = edata1;
@@ -137,10 +137,10 @@ emap_rtree_leaf_elms_lookup(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
 static void
 emap_rtree_write_acquired(tsdn_t *tsdn, emap_t *emap, rtree_leaf_elm_t *elm_a,
     rtree_leaf_elm_t *elm_b, edata_t *edata, szind_t szind, bool slab) {
-	rtree_leaf_elm_contents_t contents;
+	rtree_contents_t contents;
 	contents.edata = edata;
-	contents.szind = szind;
-	contents.slab = slab;
+	contents.metadata.szind = szind;
+	contents.metadata.slab = slab;
 	rtree_leaf_elm_write(tsdn, &emap->rtree, elm_a, contents);
 	if (elm_b != NULL) {
 		rtree_leaf_elm_write(tsdn, &emap->rtree, elm_b, contents);
@@ -278,10 +278,10 @@ emap_merge_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
 void
 emap_merge_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
     edata_t *lead, edata_t *trail) {
-	rtree_leaf_elm_contents_t clear_contents;
+	rtree_contents_t clear_contents;
 	clear_contents.edata = NULL;
-	clear_contents.szind = SC_NSIZES;
-	clear_contents.slab = false;
+	clear_contents.metadata.szind = SC_NSIZES;
+	clear_contents.metadata.slab = false;
 
 	if (prepare->lead_elm_b != NULL) {
 		rtree_leaf_elm_write(tsdn, &emap->rtree,
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 01e710c5..c116420a 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -137,10 +137,10 @@ TEST_BEGIN(test_rtree_random) {
 		    &rtree_ctx, keys[i], false, true);
 		expect_ptr_not_null(elm,
 		    "Unexpected rtree_leaf_elm_lookup() failure");
-		rtree_leaf_elm_contents_t contents;
+		rtree_contents_t contents;
 		contents.edata = &edata;
-		contents.szind = SC_NSIZES;
-		contents.slab = false;
+		contents.metadata.szind = SC_NSIZES;
+		contents.metadata.slab = false;
 		rtree_leaf_elm_write(tsdn, rtree, elm, contents);
 		expect_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx,
 		    keys[i], true), &edata,

From 50289750b369e50265b1f74fa3dd895552b30615 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sun, 15 Mar 2020 18:55:43 -0700
Subject: [PATCH 1689/2608] Extent: Remove szind/slab knowledge.

---
 include/jemalloc/internal/emap.h   |   5 +-
 include/jemalloc/internal/extent.h |  12 +--
 include/jemalloc/internal/pa.h     |   4 +-
 include/jemalloc/internal/rtree.h  |   4 -
 src/extent.c                       | 132 ++++++++++-------------------
 src/large.c                        |   5 +-
 src/pa.c                           |  38 ++++++---
 7 files changed, 81 insertions(+), 119 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 9f814ce9..5fc713d3 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -28,6 +28,9 @@ struct emap_full_alloc_ctx_s {
 
 bool emap_init(emap_t *emap, base_t *base, bool zeroed);
 
+void emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
+    bool slab);
+
 /*
  * Grab the lock or locks associated with the edata or edatas indicated (which
  * is done just by simple address hashing).  The hashing strategy means that
@@ -106,8 +109,6 @@ struct emap_prepare_s {
  * higher-addressed one.  It's the caller's responsibility to set the edata
  * state appropriately.
  */
-void emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
-    bool slab);
 bool emap_split_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
     edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a, edata_t *trail,
     size_t size_b, szind_t szind_b, bool slab_b);
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index bec21d6a..2f14b81f 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -20,19 +20,16 @@
 extern size_t opt_lg_extent_max_active_fit;
 
 edata_t *ecache_alloc(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
-    szind_t szind, bool zero);
+    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool zero);
 edata_t *ecache_alloc_grow(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
-    szind_t szind, bool zero);
+    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool zero);
 void ecache_dalloc(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata);
 edata_t *ecache_evict(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, size_t npages_min);
 
 edata_t *extent_alloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    void *new_addr, size_t size, size_t alignment, bool slab, szind_t szind,
-    bool zero, bool *commit);
+    void *new_addr, size_t size, size_t alignment, bool zero, bool *commit);
 void extent_dalloc_gap(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     edata_t *edata);
 void extent_dalloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
@@ -48,8 +45,7 @@ bool extent_purge_lazy_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 bool extent_purge_forced_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length);
 edata_t *extent_split_wrapper(tsdn_t *tsdn, pa_shard_t *shard,
-    ehooks_t *ehooks, edata_t *edata, size_t size_a, szind_t szind_a,
-    bool slab_a, size_t size_b, szind_t szind_b, bool slab_b);
+    ehooks_t *ehooks, edata_t *edata, size_t size_a, size_t size_b);
 bool extent_merge_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     edata_t *a, edata_t *b);
 
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 3e9f1c21..172c549f 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -198,13 +198,13 @@ edata_t *pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size,
     size_t alignment, bool slab, szind_t szind, bool zero);
 /* Returns true on error, in which case nothing changed. */
 bool pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
-    size_t new_size, szind_t szind, bool slab, bool zero);
+    size_t new_size, szind_t szind, bool zero);
 /*
  * The same.  Sets *generated_dirty to true if we produced new dirty pages, and
  * false otherwise.
  */
 bool pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
-    size_t new_size, szind_t szind, bool slab, bool *generated_dirty);
+    size_t new_size, szind_t szind, bool *generated_dirty);
 /*
  * Frees the given edata back to the pa.  Sets *generated_dirty if we produced
  * new dirty pages (well, we alwyas set it for now; but this need not be the
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 46c58f97..3b21f178 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -334,16 +334,12 @@ rtree_leaf_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 static inline bool
 rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
     edata_t *edata, szind_t szind, bool slab) {
-	/* Use rtree_clear() to set the edata to NULL. */
-	assert(edata != NULL);
-
 	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
 	    key, false, true);
 	if (elm == NULL) {
 		return true;
 	}
 
-	assert(rtree_leaf_elm_read(tsdn, rtree, elm, false).edata == NULL);
 	rtree_contents_t contents;
 	contents.edata = edata;
 	contents.metadata.szind = szind;
diff --git a/src/extent.c b/src/extent.c
index 889857eb..671699ca 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -20,8 +20,7 @@ static bool extent_purge_lazy_impl(tsdn_t *tsdn, ehooks_t *ehooks,
 static bool extent_purge_forced_impl(tsdn_t *tsdn, ehooks_t *ehooks,
     edata_t *edata, size_t offset, size_t length, bool growing_retained);
 static edata_t *extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard,
-    ehooks_t *ehooks, edata_t *edata, size_t size_a, szind_t szind_a,
-    bool slab_a, size_t size_b, szind_t szind_b, bool slab_b,
+    ehooks_t *ehooks, edata_t *edata, size_t size_a, size_t size_b,
     bool growing_retained);
 static bool extent_merge_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     edata_t *a, edata_t *b, bool growing_retained);
@@ -39,16 +38,15 @@ static atomic_zu_t highpages;
 static void extent_deregister(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata);
 static edata_t *extent_recycle(tsdn_t *tsdn, pa_shard_t *shard,
     ehooks_t *ehooks, ecache_t *ecache, void *new_addr, size_t usize,
-    size_t alignment, bool slab, szind_t szind, bool zero, bool *commit,
-    bool growing_retained);
+    size_t alignment, bool zero, bool *commit, bool growing_retained);
 static edata_t *extent_try_coalesce(tsdn_t *tsdn, pa_shard_t *shard,
     ehooks_t *ehooks, ecache_t *ecache, edata_t *edata, bool *coalesced,
     bool growing_retained);
 static void extent_record(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool growing_retained);
 static edata_t *extent_alloc_retained(tsdn_t *tsdn, pa_shard_t *shard,
-    ehooks_t *ehooks, void *new_addr, size_t size, size_t alignment, bool slab,
-    szind_t szind, bool zero, bool *commit);
+    ehooks_t *ehooks, void *new_addr, size_t size, size_t alignment, bool zero,
+    bool *commit);
 
 /******************************************************************************/
 
@@ -70,8 +68,8 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, pa_shard_t *shard,
 
 edata_t *
 ecache_alloc(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
-    szind_t szind, bool zero) {
+    ecache_t *ecache, void *new_addr, size_t size, size_t alignment,
+    bool zero) {
 	assert(size != 0);
 	assert(alignment != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -79,15 +77,15 @@ ecache_alloc(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 
 	bool commit = true;
 	edata_t *edata = extent_recycle(tsdn, shard, ehooks, ecache,
-	    new_addr, size, alignment, slab, szind, zero, &commit, false);
+	    new_addr, size, alignment, zero, &commit, false);
 	assert(edata == NULL || !edata_ranged_get(edata));
 	return edata;
 }
 
 edata_t *
 ecache_alloc_grow(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
-    szind_t szind, bool zero) {
+    ecache_t *ecache, void *new_addr, size_t size, size_t alignment,
+    bool zero) {
 	assert(size != 0);
 	assert(alignment != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -95,7 +93,7 @@ ecache_alloc_grow(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 
 	bool commit = true;
 	edata_t *edata = extent_alloc_retained(tsdn, shard, ehooks, new_addr,
-	    size, alignment, slab, szind, zero, &commit);
+	    size, alignment, zero, &commit);
 	if (edata == NULL) {
 		if (opt_retain && new_addr != NULL) {
 			/*
@@ -107,7 +105,7 @@ ecache_alloc_grow(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 			return NULL;
 		}
 		edata = extent_alloc_wrapper(tsdn, shard, ehooks, new_addr,
-		    size, alignment, slab, szind, zero, &commit);
+		    size, alignment, zero, &commit);
 	}
 
 	assert(edata == NULL || !edata_ranged_get(edata));
@@ -286,18 +284,12 @@ extent_register_impl(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
 	 */
 	emap_lock_edata(tsdn, shard->emap, edata);
 
-	szind_t szind = edata_szind_get_maybe_invalid(edata);
-	bool slab = edata_slab_get(edata);
-
-	if (emap_register_boundary(tsdn, shard->emap, edata, szind, slab)) {
+	if (emap_register_boundary(tsdn, shard->emap, edata, SC_NSIZES,
+	    /* slab */ false)) {
 		emap_unlock_edata(tsdn, shard->emap, edata);
 		return true;
 	}
 
-	if (slab) {
-		emap_register_interior(tsdn, shard->emap, edata, szind);
-	}
-
 	emap_unlock_edata(tsdn, shard->emap, edata);
 
 	if (config_prof && gdump_add) {
@@ -331,10 +323,6 @@ extent_deregister_impl(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
     bool gdump) {
 	emap_lock_edata(tsdn, shard->emap, edata);
 	emap_deregister_boundary(tsdn, shard->emap, edata);
-	if (edata_slab_get(edata)) {
-		emap_deregister_interior(tsdn, shard->emap, edata);
-		edata_slab_set(edata, false);
-	}
 	emap_unlock_edata(tsdn, shard->emap, edata);
 
 	if (config_prof && gdump) {
@@ -359,7 +347,7 @@ extent_deregister_no_gdump_sub(tsdn_t *tsdn, pa_shard_t *shard,
  */
 static edata_t *
 extent_recycle_extract(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
+    ecache_t *ecache, void *new_addr, size_t size, size_t alignment,
     bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
@@ -463,8 +451,7 @@ extent_split_interior(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     edata_t **edata, edata_t **lead, edata_t **trail,
     /* The mess to clean up, in case of error. */
     edata_t **to_leak, edata_t **to_salvage,
-    void *new_addr, size_t size, size_t alignment, bool slab, szind_t szind,
-    bool growing_retained) {
+    void *new_addr, size_t size, size_t alignment, bool growing_retained) {
 	size_t leadsize = ALIGNMENT_CEILING((uintptr_t)edata_base_get(*edata),
 	    PAGE_CEILING(alignment)) - (uintptr_t)edata_base_get(*edata);
 	assert(new_addr == NULL || leadsize == 0);
@@ -482,8 +469,7 @@ extent_split_interior(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	if (leadsize != 0) {
 		*lead = *edata;
 		*edata = extent_split_impl(tsdn, shard, ehooks, *lead, leadsize,
-		    SC_NSIZES, false, size + trailsize, szind, slab,
-		    growing_retained);
+		    size + trailsize, growing_retained);
 		if (*edata == NULL) {
 			*to_leak = *lead;
 			*lead = NULL;
@@ -494,7 +480,7 @@ extent_split_interior(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	/* Split the trail. */
 	if (trailsize != 0) {
 		*trail = extent_split_impl(tsdn, shard, ehooks, *edata, size,
-		    szind, slab, trailsize, SC_NSIZES, false, growing_retained);
+		    trailsize, growing_retained);
 		if (*trail == NULL) {
 			*to_leak = *edata;
 			*to_salvage = *lead;
@@ -504,11 +490,6 @@ extent_split_interior(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		}
 	}
 
-	if (leadsize == 0 && trailsize == 0) {
-		edata_szind_set(*edata, szind);
-		emap_remap(tsdn, shard->emap, *edata, szind, slab);
-	}
-
 	return extent_split_interior_ok;
 }
 
@@ -520,8 +501,8 @@ extent_split_interior(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
  */
 static edata_t *
 extent_recycle_split(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
-    szind_t szind, edata_t *edata, bool growing_retained) {
+    ecache_t *ecache, void *new_addr, size_t size, size_t alignment,
+    edata_t *edata, bool growing_retained) {
 	edata_t *lead;
 	edata_t *trail;
 	edata_t *to_leak JEMALLOC_CC_SILENCE_INIT(NULL);
@@ -529,7 +510,7 @@ extent_recycle_split(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 
 	extent_split_interior_result_t result = extent_split_interior(
 	    tsdn, shard, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage,
-	    new_addr, size, alignment, slab, szind, growing_retained);
+	    new_addr, size, alignment, growing_retained);
 
 	if (!maps_coalesce && result != extent_split_interior_ok
 	    && !opt_retain) {
@@ -578,21 +559,18 @@ extent_recycle_split(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
  */
 static edata_t *
 extent_recycle(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool slab,
-    szind_t szind, bool zero, bool *commit, bool growing_retained) {
+    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool zero,
+    bool *commit, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-	assert(new_addr == NULL || !slab);
-	assert(!zero || !slab);
-
 	edata_t *edata = extent_recycle_extract(tsdn, shard, ehooks, ecache,
-	    new_addr, size, alignment, slab, growing_retained);
+	    new_addr, size, alignment, growing_retained);
 	if (edata == NULL) {
 		return NULL;
 	}
 
 	edata = extent_recycle_split(tsdn, shard, ehooks, ecache, new_addr,
-	    size, alignment, slab, szind, edata, growing_retained);
+	    size, alignment, edata, growing_retained);
 	if (edata == NULL) {
 		return NULL;
 	}
@@ -611,10 +589,6 @@ extent_recycle(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	}
 
 	assert(edata_state_get(edata) == extent_state_active);
-	if (slab) {
-		edata_slab_set(edata, slab);
-		emap_register_interior(tsdn, shard->emap, edata, szind);
-	}
 
 	if (zero) {
 		void *addr = edata_base_get(edata);
@@ -633,10 +607,8 @@ extent_recycle(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
  */
 static edata_t *
 extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    size_t size, size_t alignment, bool slab, szind_t szind,
-    bool zero, bool *commit) {
+    size_t size, size_t alignment, bool zero, bool *commit) {
 	malloc_mutex_assert_owner(tsdn, &shard->ecache_grow.mtx);
-	assert(!zero || !slab);
 
 	size_t alloc_size_min = size + PAGE_CEILING(alignment) - PAGE;
 	/* Beware size_t wrap-around. */
@@ -696,7 +668,7 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 
 	extent_split_interior_result_t result = extent_split_interior(tsdn,
 	    shard, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage, NULL,
-	    size, alignment, slab, szind, true);
+	    size, alignment, /* growing_retained */ true);
 
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
@@ -763,10 +735,6 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		/* Adjust gdump stats now that extent is final size. */
 		extent_gdump_add(tsdn, edata);
 	}
-	if (slab) {
-		edata_slab_set(edata, true);
-		emap_register_interior(tsdn, shard->emap, edata, szind);
-	}
 	if (zero && !edata_zeroed_get(edata)) {
 		void *addr = edata_base_get(edata);
 		size_t size = edata_size_get(edata);
@@ -781,16 +749,15 @@ label_err:
 
 static edata_t *
 extent_alloc_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    void *new_addr, size_t size, size_t alignment, bool slab, szind_t szind,
-    bool zero, bool *commit) {
+    void *new_addr, size_t size, size_t alignment, bool zero, bool *commit) {
 	assert(size != 0);
 	assert(alignment != 0);
 
 	malloc_mutex_lock(tsdn, &shard->ecache_grow.mtx);
 
 	edata_t *edata = extent_recycle(tsdn, shard, ehooks,
-	    &shard->ecache_retained, new_addr, size, alignment, slab,
-	    szind, zero, commit, true);
+	    &shard->ecache_retained, new_addr, size, alignment, zero, commit,
+	    /* growing_retained */ true);
 	if (edata != NULL) {
 		malloc_mutex_unlock(tsdn, &shard->ecache_grow.mtx);
 		if (config_prof) {
@@ -798,7 +765,7 @@ extent_alloc_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		}
 	} else if (opt_retain && new_addr == NULL) {
 		edata = extent_grow_retained(tsdn, shard, ehooks, size,
-		    alignment, slab, szind, zero, commit);
+		    alignment, zero, commit);
 		/* extent_grow_retained() always releases extent_grow_mtx. */
 	} else {
 		malloc_mutex_unlock(tsdn, &shard->ecache_grow.mtx);
@@ -810,8 +777,7 @@ extent_alloc_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 
 edata_t *
 extent_alloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    void *new_addr, size_t size, size_t alignment, bool slab,
-    szind_t szind, bool zero, bool *commit) {
+    void *new_addr, size_t size, size_t alignment, bool zero, bool *commit) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
@@ -827,7 +793,7 @@ extent_alloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		return NULL;
 	}
 	edata_init(edata, ecache_ind_get(&shard->ecache_dirty), addr,
-	    size, slab, szind, pa_shard_extent_sn_next(shard),
+	    size, /* slab */ false, SC_NSIZES, pa_shard_extent_sn_next(shard),
 	    extent_state_active, zero, *commit, /* ranged */ false,
 	    EXTENT_NOT_HEAD);
 	if (extent_register(tsdn, shard, edata)) {
@@ -989,7 +955,7 @@ extent_maximally_purge(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 
 /*
  * Does the metadata management portions of putting an unused extent into the
- * given ecache_t (coalesces, deregisters slab interiors, the heap operations).
+ * given ecache_t (coalesces and inserts into the eset).
  */
 static void
 extent_record(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
@@ -1000,12 +966,6 @@ extent_record(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 
-	edata_szind_set(edata, SC_NSIZES);
-	if (edata_slab_get(edata)) {
-		emap_deregister_interior(tsdn, shard->emap, edata);
-		edata_slab_set(edata, false);
-	}
-
 	emap_assert_mapped(tsdn, shard->emap, edata);
 
 	if (!ecache->delay_coalesce) {
@@ -1208,8 +1168,7 @@ extent_purge_forced_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
  */
 static edata_t *
 extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a, size_t size_b,
-    szind_t szind_b, bool slab_b, bool growing_retained) {
+    edata_t *edata, size_t size_a, size_t size_b, bool growing_retained) {
 	assert(edata_size_get(edata) == size_a + size_b);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
@@ -1225,12 +1184,14 @@ extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 
 	edata_init(trail, edata_arena_ind_get(edata),
 	    (void *)((uintptr_t)edata_base_get(edata) + size_a), size_b,
-	    slab_b, szind_b, edata_sn_get(edata), edata_state_get(edata),
-	    edata_zeroed_get(edata), edata_committed_get(edata),
-	    edata_ranged_get(edata), EXTENT_NOT_HEAD);
+	    /* slab */ false, SC_NSIZES, edata_sn_get(edata),
+	    edata_state_get(edata), edata_zeroed_get(edata),
+	    edata_committed_get(edata), edata_ranged_get(edata),
+	    EXTENT_NOT_HEAD);
 	emap_prepare_t prepare;
 	bool err = emap_split_prepare(tsdn, shard->emap, &prepare, edata,
-	    size_a, szind_a, slab_a, trail, size_b, szind_b, slab_b);
+	    size_a, SC_NSIZES, /* slab */ false, trail, size_b, SC_NSIZES,
+	    /* slab */ false);
 	if (err) {
 		goto label_error_b;
 	}
@@ -1245,9 +1206,8 @@ extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	}
 
 	edata_size_set(edata, size_a);
-	edata_szind_set(edata, szind_a);
-	emap_split_commit(tsdn, shard->emap, &prepare, edata, size_a, szind_a,
-	    slab_a, trail, size_b, szind_b, slab_b);
+	emap_split_commit(tsdn, shard->emap, &prepare, edata, size_a, SC_NSIZES,
+	    /* slab_a */ false, trail, size_b,SC_NSIZES, /* slab_b */ false);
 
 	emap_unlock_edata2(tsdn, shard->emap, edata, trail);
 
@@ -1262,10 +1222,9 @@ label_error_a:
 
 edata_t *
 extent_split_wrapper(tsdn_t *tsdn, pa_shard_t *shard,
-    ehooks_t *ehooks, edata_t *edata, size_t size_a, szind_t szind_a,
-    bool slab_a, size_t size_b, szind_t szind_b, bool slab_b) {
-	return extent_split_impl(tsdn, shard, ehooks, edata, size_a, szind_a,
-	    slab_a, size_b, szind_b, slab_b, false);
+    ehooks_t *ehooks, edata_t *edata, size_t size_a, size_t size_b) {
+	return extent_split_impl(tsdn, shard, ehooks, edata, size_a, size_b,
+	    /* growing_retained */ false);
 }
 
 static bool
@@ -1297,7 +1256,6 @@ extent_merge_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks, edata_t *a,
 	emap_lock_edata2(tsdn, shard->emap, a, b);
 
 	edata_size_set(a, edata_size_get(a) + edata_size_get(b));
-	edata_szind_set(a, SC_NSIZES);
 	edata_sn_set(a, (edata_sn_get(a) < edata_sn_get(b)) ?
 	    edata_sn_get(a) : edata_sn_get(b));
 	edata_zeroed_set(a, edata_zeroed_get(a) && edata_zeroed_get(b));
diff --git a/src/large.c b/src/large.c
index 80de716d..d97009a4 100644
--- a/src/large.c
+++ b/src/large.c
@@ -70,8 +70,7 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, edata_t *edata, size_t usize) {
 
 	bool generated_dirty;
 	bool err = pa_shrink(tsdn, &arena->pa_shard, edata, old_size,
-	    usize + sz_large_pad, sz_size2index(usize), false,
-	    &generated_dirty);
+	    usize + sz_large_pad, sz_size2index(usize), &generated_dirty);
 	if (err) {
 		return true;
 	}
@@ -94,7 +93,7 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 
 	szind_t szind = sz_size2index(usize);
 	bool err = pa_expand(tsdn, &arena->pa_shard, edata, old_size, new_size,
-	    szind, /* slab */ false, zero);
+	    szind, zero);
 	if (err) {
 		return true;
 	}
diff --git a/src/pa.c b/src/pa.c
index 78ff3481..a7fe70fb 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -120,16 +120,15 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
 	edata_t *edata = ecache_alloc(tsdn, shard, ehooks,
-	    &shard->ecache_dirty, NULL, size, alignment, slab, szind, zero);
+	    &shard->ecache_dirty, NULL, size, alignment, zero);
 
 	if (edata == NULL && pa_shard_may_have_muzzy(shard)) {
 		edata = ecache_alloc(tsdn, shard, ehooks, &shard->ecache_muzzy,
-		    NULL, size, alignment, slab, szind, zero);
+		    NULL, size, alignment, zero);
 	}
 	if (edata == NULL) {
 		edata = ecache_alloc_grow(tsdn, shard, ehooks,
-		    &shard->ecache_retained, NULL, size, alignment, slab,
-		    szind, zero);
+		    &shard->ecache_retained, NULL, size, alignment, zero);
 		mapped_add = size;
 	}
 	if (edata != NULL) {
@@ -138,13 +137,19 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 			atomic_fetch_add_zu(&shard->stats->pa_mapped,
 			    mapped_add, ATOMIC_RELAXED);
 		}
+		emap_remap(tsdn, shard->emap, edata, szind, slab);
+		edata_szind_set(edata, szind);
+		edata_slab_set(edata, slab);
+		if (slab) {
+			emap_register_interior(tsdn, shard->emap, edata, szind);
+		}
 	}
 	return edata;
 }
 
 bool
 pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
-    size_t new_size, szind_t szind, bool slab, bool zero) {
+    size_t new_size, szind_t szind, bool zero) {
 	assert(new_size > old_size);
 	assert(edata_size_get(edata) == old_size);
 	assert((new_size & PAGE_MASK) == 0);
@@ -159,17 +164,15 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 		return true;
 	}
 	edata_t *trail = ecache_alloc(tsdn, shard, ehooks, &shard->ecache_dirty,
-	    trail_begin, expand_amount, PAGE, /* slab */ false, SC_NSIZES,
-	    zero);
+	    trail_begin, expand_amount, PAGE, zero);
 	if (trail == NULL) {
 		trail = ecache_alloc(tsdn, shard, ehooks, &shard->ecache_muzzy,
-		    trail_begin, expand_amount, PAGE, /* slab */ false,
-		    SC_NSIZES, zero);
+		    trail_begin, expand_amount, PAGE, zero);
 	}
 	if (trail == NULL) {
 		trail = ecache_alloc_grow(tsdn, shard, ehooks,
 		    &shard->ecache_retained, trail_begin, expand_amount, PAGE,
-		    /* slab */ false, SC_NSIZES, zero);
+		    zero);
 		mapped_add = expand_amount;
 	}
 	if (trail == NULL) {
@@ -185,13 +188,13 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	}
 	pa_nactive_add(shard, expand_amount >> LG_PAGE);
 	edata_szind_set(edata, szind);
-	emap_remap(tsdn, shard->emap, edata, szind, slab);
+	emap_remap(tsdn, shard->emap, edata, szind, /* slab */ false);
 	return false;
 }
 
 bool
 pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
-    size_t new_size, szind_t szind, bool slab, bool *generated_dirty) {
+    size_t new_size, szind_t szind, bool *generated_dirty) {
 	assert(new_size < old_size);
 	assert(edata_size_get(edata) == old_size);
 	assert((new_size & PAGE_MASK) == 0);
@@ -205,7 +208,7 @@ pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	}
 
 	edata_t *trail = extent_split_wrapper(tsdn, shard, ehooks, edata,
-	    new_size, szind, slab, shrink_amount, SC_NSIZES, false);
+	    new_size, shrink_amount);
 	if (trail == NULL) {
 		return true;
 	}
@@ -213,12 +216,21 @@ pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 
 	ecache_dalloc(tsdn, shard, ehooks, &shard->ecache_dirty, trail);
 	*generated_dirty = true;
+
+	edata_szind_set(edata, szind);
+	emap_remap(tsdn, shard->emap, edata, szind, /* slab */ false);
 	return false;
 }
 
 void
 pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
     bool *generated_dirty) {
+	emap_remap(tsdn, shard->emap, edata, SC_NSIZES, /* slab */ false);
+	if (edata_slab_get(edata)) {
+		emap_deregister_interior(tsdn, shard->emap, edata);
+		edata_slab_set(edata, false);
+	}
+	edata_szind_set(edata, SC_NSIZES);
 	pa_nactive_sub(shard, edata_size_get(edata) >> LG_PAGE);
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
 	ecache_dalloc(tsdn, shard, ehooks, &shard->ecache_dirty, edata);

From bb6a418523718c40e8f7c14eb677435911eb7a18 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 16 Mar 2020 11:31:38 -0700
Subject: [PATCH 1690/2608] Emap: Drop szind/slab splitting parameters.

After the previous diff, these are constants.
---
 include/jemalloc/internal/emap.h |  6 ++----
 src/emap.c                       | 20 +++++++++++++-------
 src/extent.c                     |  7 +++----
 3 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 5fc713d3..b7eed84d 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -110,11 +110,9 @@ struct emap_prepare_s {
  * state appropriately.
  */
 bool emap_split_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
-    edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a, edata_t *trail,
-    size_t size_b, szind_t szind_b, bool slab_b);
+    edata_t *edata, size_t size_a, edata_t *trail, size_t size_b);
 void emap_split_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
-    edata_t *lead, size_t size_a, szind_t szind_a, bool slab_a, edata_t *trail,
-    size_t size_b, szind_t szind_b, bool slab_b);
+    edata_t *lead, size_t size_a, edata_t *trail, size_t size_b);
 void emap_merge_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
     edata_t *lead, edata_t *trail);
 void emap_merge_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
diff --git a/src/emap.c b/src/emap.c
index 0d10c79e..f7fac013 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -230,8 +230,7 @@ void emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
 
 bool
 emap_split_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
-    edata_t *edata, size_t size_a, szind_t szind_a, bool slab_a, edata_t *trail,
-    size_t size_b, szind_t szind_b, bool slab_b) {
+    edata_t *edata, size_t size_a, edata_t *trail, size_t size_b) {
 	EMAP_DECLARE_RTREE_CTX;
 
 	/*
@@ -240,7 +239,7 @@ emap_split_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
 	 * facilitate a lookup.
 	 */
 	edata_t lead;
-	edata_init(&lead, 0U, edata_addr_get(edata), size_a, slab_a, szind_a, 0,
+	edata_init(&lead, 0U, edata_addr_get(edata), size_a, false, 0, 0,
 	    extent_state_active, false, false, false, EXTENT_NOT_HEAD);
 
 	emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, &lead, false, true,
@@ -257,12 +256,19 @@ emap_split_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
 
 void
 emap_split_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
-    edata_t *lead, size_t size_a, szind_t szind_a, bool slab_a, edata_t *trail,
-    size_t size_b, szind_t szind_b, bool slab_b) {
+    edata_t *lead, size_t size_a, edata_t *trail, size_t size_b) {
+	/*
+	 * We should think about not writing to the lead leaf element.  We can
+	 * get into situations where a racing realloc-like call can disagree
+	 * with a size lookup request.  I think it's fine to declare that these
+	 * situations are race bugs, but there's an argument to be made that for
+	 * things like xallocx, a size lookup call should return either the old
+	 * size or the new size, but not anything else.
+	 */
 	emap_rtree_write_acquired(tsdn, emap, prepare->lead_elm_a,
-	    prepare->lead_elm_b, lead, szind_a, slab_a);
+	    prepare->lead_elm_b, lead, SC_NSIZES, /* slab */ false);
 	emap_rtree_write_acquired(tsdn, emap, prepare->trail_elm_a,
-	    prepare->trail_elm_b, trail, szind_b, slab_b);
+	    prepare->trail_elm_b, trail, SC_NSIZES, /* slab */ false);
 }
 
 void
diff --git a/src/extent.c b/src/extent.c
index 671699ca..073f8065 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1190,8 +1190,7 @@ extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	    EXTENT_NOT_HEAD);
 	emap_prepare_t prepare;
 	bool err = emap_split_prepare(tsdn, shard->emap, &prepare, edata,
-	    size_a, SC_NSIZES, /* slab */ false, trail, size_b, SC_NSIZES,
-	    /* slab */ false);
+	    size_a, trail, size_b);
 	if (err) {
 		goto label_error_b;
 	}
@@ -1206,8 +1205,8 @@ extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	}
 
 	edata_size_set(edata, size_a);
-	emap_split_commit(tsdn, shard->emap, &prepare, edata, size_a, SC_NSIZES,
-	    /* slab_a */ false, trail, size_b,SC_NSIZES, /* slab_b */ false);
+	emap_split_commit(tsdn, shard->emap, &prepare, edata, size_a, trail,
+	    size_b);
 
 	emap_unlock_edata2(tsdn, shard->emap, edata, trail);
 

From 26e9a3103d443c45e0fbc7e23754fefb12ea181e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 18 Mar 2020 12:04:02 -0700
Subject: [PATCH 1691/2608] PA: Simple decay test.

---
 Makefile.in    |   1 +
 test/unit/pa.c | 117 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 118 insertions(+)
 create mode 100644 test/unit/pa.c

diff --git a/Makefile.in b/Makefile.in
index c0929ce2..6cded807 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -219,6 +219,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/mq.c \
 	$(srcroot)test/unit/mtx.c \
 	$(srcroot)test/unit/nstime.c \
+	$(srcroot)test/unit/pa.c \
 	$(srcroot)test/unit/pack.c \
 	$(srcroot)test/unit/pages.c \
 	$(srcroot)test/unit/ph.c \
diff --git a/test/unit/pa.c b/test/unit/pa.c
new file mode 100644
index 00000000..f7b72902
--- /dev/null
+++ b/test/unit/pa.c
@@ -0,0 +1,117 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/pa.h"
+
+static void *
+alloc_hook(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
+    size_t alignment, bool *zero, bool *commit, unsigned arena_ind) {
+	void *ret = pages_map(new_addr, size, alignment, commit);
+	return ret;
+}
+
+static bool
+merge_hook(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
+    void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
+	return !maps_coalesce;
+}
+
+static bool
+split_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
+    size_t size_a, size_t size_b, bool committed, unsigned arena_ind) {
+	return !maps_coalesce;
+}
+
+static void
+init_test_extent_hooks(extent_hooks_t *hooks) {
+	/*
+	 * The default hooks are mostly fine for testing.  A few of them,
+	 * though, access globals (alloc for dss setting in an arena, split and
+	 * merge touch the global emap to find head state.  The first of these
+	 * can be fixed by keeping that state with the hooks, where it logically
+	 * belongs.  The second, though, we can only fix when we use the extent
+	 * hook API.
+	 */
+	memcpy(hooks, &ehooks_default_extent_hooks, sizeof(extent_hooks_t));
+	hooks->alloc = &alloc_hook;
+	hooks->merge = &merge_hook;
+	hooks->split = &split_hook;
+}
+
+typedef struct test_data_s test_data_t;
+struct test_data_s {
+	pa_shard_t shard;
+	base_t *base;
+	emap_t emap;
+	pa_shard_stats_t stats;
+	malloc_mutex_t stats_mtx;
+	extent_hooks_t hooks;
+};
+
+test_data_t *init_test_data(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
+	test_data_t *test_data = calloc(1, sizeof(test_data_t));
+	assert_ptr_not_null(test_data, "");
+	init_test_extent_hooks(&test_data->hooks);
+
+	base_t *base = base_new(TSDN_NULL, /* ind */ 1, &test_data->hooks);
+	assert_ptr_not_null(base, "");
+
+	test_data->base = base;
+	bool err = emap_init(&test_data->emap, test_data->base,
+	    /* zeroed */ true);
+	assert_false(err, "");
+
+	nstime_t time;
+	nstime_init(&time, 0);
+
+	err = pa_shard_init(TSDN_NULL, &test_data->shard, &test_data->emap,
+	    test_data->base, /* ind */ 1, &test_data->stats,
+	    &test_data->stats_mtx, &time, dirty_decay_ms, muzzy_decay_ms);
+	assert_false(err, "");
+
+	return test_data;
+}
+
+void destroy_test_data(test_data_t *data) {
+	base_delete(TSDN_NULL, data->base);
+	free(data);
+}
+
+static void *
+do_alloc_free_purge(void *arg) {
+	test_data_t *test_data = (test_data_t *)arg;
+	for (int i = 0; i < 10 * 1000; i++) {
+		edata_t *edata = pa_alloc(TSDN_NULL, &test_data->shard, PAGE,
+		    PAGE, /* slab */ false, /* szind */ 0, /* zero */ false);
+		assert_ptr_not_null(edata, "");
+		bool generated_dirty;
+		pa_dalloc(TSDN_NULL, &test_data->shard, edata,
+		    &generated_dirty);
+		malloc_mutex_lock(TSDN_NULL, &test_data->shard.decay_dirty.mtx);
+		pa_decay_all(TSDN_NULL, &test_data->shard,
+		    &test_data->shard.decay_dirty,
+		    &test_data->stats.decay_dirty,
+		    &test_data->shard.ecache_dirty, true);
+		malloc_mutex_unlock(TSDN_NULL,
+		    &test_data->shard.decay_dirty.mtx);
+	}
+	return NULL;
+}
+
+TEST_BEGIN(test_alloc_free_purge_thds) {
+	test_data_t *test_data = init_test_data(0, 0);
+	thd_t thds[4];
+	for (int i = 0; i < 4; i++) {
+		thd_create(&thds[i], do_alloc_free_purge, test_data);
+	}
+	for (int i = 0; i < 4; i++) {
+		thd_join(thds[i], NULL);
+	}
+
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_alloc_free_purge_thds);
+}

From 79ae7f9211e367f0ecc8be24439af73bd3a4ebc4 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 19 Mar 2020 17:58:44 -0700
Subject: [PATCH 1692/2608] Rtree: Remove the per-field accessors.

We instead split things into "edata" and "metadata".
---
 include/jemalloc/internal/emap.h  |  48 ++++---
 include/jemalloc/internal/rtree.h | 215 +++++++++++-------------------
 src/emap.c                        |  31 +++--
 src/jemalloc.c                    |   4 +-
 test/unit/rtree.c                 |  73 ++++++----
 5 files changed, 178 insertions(+), 193 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index b7eed84d..9b925225 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -132,8 +132,7 @@ emap_edata_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr) {
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	return rtree_edata_read(tsdn, &emap->rtree, rtree_ctx, (uintptr_t)ptr,
-	    true);
+	return rtree_read(tsdn, &emap->rtree, rtree_ctx, (uintptr_t)ptr).edata;
 }
 
 /* Fills in alloc_ctx with the info in the map. */
@@ -143,8 +142,10 @@ emap_alloc_ctx_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	rtree_szind_slab_read(tsdn, &emap->rtree, rtree_ctx, (uintptr_t)ptr,
-	    true, &alloc_ctx->szind, &alloc_ctx->slab);
+	rtree_metadata_t metadata = rtree_metadata_read(tsdn, &emap->rtree,
+	    rtree_ctx, (uintptr_t)ptr);
+	alloc_ctx->szind = metadata.szind;
+	alloc_ctx->slab = metadata.slab;
 }
 
 /* The pointer must be mapped. */
@@ -154,9 +155,11 @@ emap_full_alloc_ctx_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	rtree_edata_szind_slab_read(tsdn, &emap->rtree, rtree_ctx,
-	    (uintptr_t)ptr, true, &full_alloc_ctx->edata,
-	    &full_alloc_ctx->szind, &full_alloc_ctx->slab);
+	rtree_contents_t contents = rtree_read(tsdn, &emap->rtree, rtree_ctx,
+	    (uintptr_t)ptr);
+	full_alloc_ctx->edata = contents.edata;
+	full_alloc_ctx->szind = contents.metadata.szind;
+	full_alloc_ctx->slab = contents.metadata.slab;
 }
 
 /*
@@ -170,24 +173,35 @@ emap_full_alloc_ctx_try_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
-	return rtree_edata_szind_slab_read(tsdn, &emap->rtree, rtree_ctx,
-	    (uintptr_t)ptr, false, &full_alloc_ctx->edata,
-	    &full_alloc_ctx->szind, &full_alloc_ctx->slab);
+	rtree_contents_t contents;
+	bool err = rtree_read_independent(tsdn, &emap->rtree, rtree_ctx,
+	    (uintptr_t)ptr, &contents);
+	if (err) {
+		return true;
+	}
+	full_alloc_ctx->edata = contents.edata;
+	full_alloc_ctx->szind = contents.metadata.szind;
+	full_alloc_ctx->slab = contents.metadata.slab;
+	return false;
 }
 
 /*
- * Fills in alloc_ctx, but only if it can be done easily (i.e. with a hit in the
- * L1 rtree cache.
- *
- * Returns whether or not alloc_ctx was filled in.
+ * Returns true on error.
  */
 JEMALLOC_ALWAYS_INLINE bool
 emap_alloc_ctx_try_lookup_fast(tsd_t *tsd, emap_t *emap, const void *ptr,
     emap_alloc_ctx_t *alloc_ctx) {
 	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
-	bool res = rtree_szind_slab_read_fast(tsd_tsdn(tsd), &emap->rtree,
-	    rtree_ctx, (uintptr_t)ptr, &alloc_ctx->szind, &alloc_ctx->slab);
-	return res;
+
+	rtree_metadata_t metadata;
+	bool err = rtree_metadata_try_read_fast(tsd_tsdn(tsd), &emap->rtree,
+	    rtree_ctx, (uintptr_t)ptr, &metadata);
+	if (err) {
+		return true;
+	}
+	alloc_ctx->szind = metadata.szind;
+	alloc_ctx->slab = metadata.slab;
+	return false;
 }
 
 #endif /* JEMALLOC_INTERNAL_EMAP_H */
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 3b21f178..83dfdc81 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -257,19 +257,29 @@ rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree,
 #endif
 }
 
-static inline void
-rtree_leaf_elm_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, szind_t szind, bool slab) {
-	assert(!slab || szind < SC_NBINS);
-	rtree_contents_t contents = rtree_leaf_elm_read(
-	    tsdn, rtree, elm, /* dependent */ true);
-	/*
-	 * The caller implicitly assures that it is the only writer to the szind
-	 * and slab fields, and that the edata field cannot currently change.
-	 */
-	contents.metadata.slab = slab;
-	contents.metadata.szind = szind;
-	rtree_leaf_elm_write(tsdn, rtree, elm, contents);
+/*
+ * Tries to look up the key in the L1 cache, returning it if there's a hit, or
+ * NULL if there's a miss.
+ * Key is allowed to be NULL; returns NULL in this case.
+ */
+JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_t *
+rtree_leaf_elm_lookup_fast(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+    uintptr_t key) {
+	rtree_leaf_elm_t *elm;
+
+	size_t slot = rtree_cache_direct_map(key);
+	uintptr_t leafkey = rtree_leafkey(key);
+	assert(leafkey != RTREE_LEAFKEY_INVALID);
+
+	if (likely(rtree_ctx->cache[slot].leafkey == leafkey)) {
+		rtree_leaf_elm_t *leaf = rtree_ctx->cache[slot].leaf;
+		assert(leaf != NULL);
+		uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1);
+		elm = &leaf[subkey];
+		return elm;
+	} else {
+		return NULL;
+	}
 }
 
 JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_t *
@@ -331,144 +341,79 @@ rtree_leaf_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	    dependent, init_missing);
 }
 
+/*
+ * Returns true on lookup failure.
+ */
+static inline bool
+rtree_read_independent(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+    uintptr_t key, rtree_contents_t *r_contents) {
+	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
+	    key, /* dependent */ false, /* init_missing */ false);
+	if (elm == NULL) {
+		return true;
+	}
+	*r_contents = rtree_leaf_elm_read(tsdn, rtree, elm,
+	    /* dependent */ false);
+	return false;
+}
+
+static inline rtree_contents_t
+rtree_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+    uintptr_t key) {
+	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
+	    key, /* dependent */ true, /* init_missing */ false);
+	assert(elm != NULL);
+	return rtree_leaf_elm_read(tsdn, rtree, elm, /* dependent */ true);
+}
+
+static inline rtree_metadata_t
+rtree_metadata_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+    uintptr_t key) {
+	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
+	    key, /* dependent */ true, /* init_missing */ false);
+	assert(elm != NULL);
+	return rtree_leaf_elm_read(tsdn, rtree, elm,
+	    /* dependent */ true).metadata;
+}
+
+/*
+ * Returns true on error.
+ */
+static inline bool
+rtree_metadata_try_read_fast(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+    uintptr_t key, rtree_metadata_t *r_rtree_metadata) {
+	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup_fast(tsdn, rtree, rtree_ctx,
+	    key);
+	if (elm == NULL) {
+		return true;
+	}
+	*r_rtree_metadata = rtree_leaf_elm_read(tsdn, rtree, elm,
+	    /* dependent */ true).metadata;
+	return false;
+}
+
 static inline bool
 rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
-    edata_t *edata, szind_t szind, bool slab) {
+    rtree_contents_t contents) {
 	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
-	    key, false, true);
+	    key, /* dependent */ false, /* init_missing */ true);
 	if (elm == NULL) {
 		return true;
 	}
 
-	rtree_contents_t contents;
-	contents.edata = edata;
-	contents.metadata.szind = szind;
-	contents.metadata.slab = slab;
 	rtree_leaf_elm_write(tsdn, rtree, elm, contents);
 
 	return false;
 }
 
-JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_t *
-rtree_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
-    bool dependent) {
-	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
-	    key, dependent, false);
-	if (!dependent && elm == NULL) {
-		return NULL;
-	}
-	assert(elm != NULL);
-	return elm;
-}
-
-JEMALLOC_ALWAYS_INLINE edata_t *
-rtree_edata_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
-    uintptr_t key, bool dependent) {
-	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key,
-	    dependent);
-	if (!dependent && elm == NULL) {
-		return NULL;
-	}
-	return rtree_leaf_elm_read(tsdn, rtree, elm, dependent).edata;
-}
-
-JEMALLOC_ALWAYS_INLINE szind_t
-rtree_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
-    uintptr_t key, bool dependent) {
-	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key,
-	    dependent);
-	if (!dependent && elm == NULL) {
-		return SC_NSIZES;
-	}
-	return rtree_leaf_elm_read(tsdn, rtree, elm, dependent).metadata.szind;
-}
-
-/*
- * rtree_slab_read() is intentionally omitted because slab is always read in
- * conjunction with szind, which makes rtree_szind_slab_read() a better choice.
- */
-
-JEMALLOC_ALWAYS_INLINE bool
-rtree_edata_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, edata_t **r_edata,
-    szind_t *r_szind, bool *r_slab) {
-	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key,
-	    dependent);
-	if (!dependent && elm == NULL) {
-		return true;
-	}
-	rtree_contents_t contents = rtree_leaf_elm_read(tsdn, rtree, elm,
-	    dependent);
-	*r_edata = contents.edata;
-	*r_szind = contents.metadata.szind;
-	*r_slab = contents.metadata.slab;
-
-	return false;
-}
-
-/*
- * Try to read szind_slab from the L1 cache.  Returns true on a hit,
- * and fills in r_szind and r_slab.  Otherwise returns false.
- *
- * Key is allowed to be NULL in order to save an extra branch on the
- * fastpath.  returns false in this case.
- */
-JEMALLOC_ALWAYS_INLINE bool
-rtree_szind_slab_read_fast(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
-			    uintptr_t key, szind_t *r_szind, bool *r_slab) {
-	rtree_leaf_elm_t *elm;
-
-	size_t slot = rtree_cache_direct_map(key);
-	uintptr_t leafkey = rtree_leafkey(key);
-	assert(leafkey != RTREE_LEAFKEY_INVALID);
-
-	if (likely(rtree_ctx->cache[slot].leafkey == leafkey)) {
-		rtree_leaf_elm_t *leaf = rtree_ctx->cache[slot].leaf;
-		assert(leaf != NULL);
-		uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1);
-		elm = &leaf[subkey];
-
-		rtree_contents_t contents = rtree_leaf_elm_read(tsdn, rtree,
-		    elm, /* dependent */ true);
-		*r_szind = contents.metadata.szind;
-		*r_slab = contents.metadata.slab;
-		return true;
-	} else {
-		return false;
-	}
-}
-
-JEMALLOC_ALWAYS_INLINE bool
-rtree_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
-    uintptr_t key, bool dependent, szind_t *r_szind, bool *r_slab) {
-	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key,
-	    dependent);
-	if (!dependent && elm == NULL) {
-		return true;
-	}
-	rtree_contents_t contents = rtree_leaf_elm_read(tsdn, rtree, elm,
-	    /* dependent */ true);
-	*r_szind = contents.metadata.szind;
-	*r_slab = contents.metadata.slab;
-
-	return false;
-}
-
-static inline void
-rtree_szind_slab_update(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
-    uintptr_t key, szind_t szind, bool slab) {
-	assert(!slab || szind < SC_NBINS);
-
-	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, true);
-	rtree_leaf_elm_szind_slab_update(tsdn, rtree, elm, szind, slab);
-}
-
 static inline void
 rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key) {
-	rtree_leaf_elm_t *elm = rtree_read(tsdn, rtree, rtree_ctx, key, true);
+	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
+	    key, /* dependent */ true, /* init_missing */ false);
+	assert(elm != NULL);
 	assert(rtree_leaf_elm_read(tsdn, rtree, elm,
-	    /* dependent */ false).edata != NULL);
+	    /* dependent */ true).edata != NULL);
 	rtree_contents_t contents;
 	contents.edata = NULL;
 	contents.metadata.szind = SC_NSIZES;
diff --git a/src/emap.c b/src/emap.c
index f7fac013..637d332b 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -171,9 +171,13 @@ emap_register_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
 
 	/* Register interior. */
 	for (size_t i = 1; i < (edata_size_get(edata) >> LG_PAGE) - 1; i++) {
+		rtree_contents_t contents;
+		contents.edata = edata;
+		contents.metadata.szind = szind;
+		contents.metadata.slab = true;
 		rtree_write(tsdn, &emap->rtree, rtree_ctx,
 		    (uintptr_t)edata_base_get(edata) + (uintptr_t)(i <<
-		    LG_PAGE), edata, szind, true);
+		    LG_PAGE), contents);
 	}
 }
 
@@ -200,13 +204,18 @@ emap_deregister_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	}
 }
 
-void emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
+void
+emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
     bool slab) {
 	EMAP_DECLARE_RTREE_CTX;
 
 	if (szind != SC_NSIZES) {
-		rtree_szind_slab_update(tsdn, &emap->rtree, rtree_ctx,
-		    (uintptr_t)edata_addr_get(edata), szind, slab);
+		rtree_contents_t contents;
+		contents.edata = edata;
+		contents.metadata.szind = szind;
+		contents.metadata.slab = slab;
+		rtree_write(tsdn, &emap->rtree, rtree_ctx,
+		    (uintptr_t)edata_addr_get(edata), contents);
 		/*
 		 * Recall that this is called only for active->inactive and
 		 * inactive->active transitions (since only active extents have
@@ -220,12 +229,12 @@ void emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
 		 * call is coming in those cases, though.
 		 */
 		if (slab && edata_size_get(edata) > PAGE) {
-			rtree_szind_slab_update(tsdn,
-			    &emap->rtree, rtree_ctx,
-			    (uintptr_t)edata_past_get(edata) - (uintptr_t)PAGE,
-			    szind, slab);
-			}
+			uintptr_t key = (uintptr_t)edata_past_get(edata)
+			    - (uintptr_t)PAGE;
+			rtree_write(tsdn, &emap->rtree, rtree_ctx, key,
+			    contents);
 		}
+	}
 }
 
 bool
@@ -311,6 +320,6 @@ void
 emap_do_assert_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	EMAP_DECLARE_RTREE_CTX;
 
-	assert(rtree_edata_read(tsdn, &emap->rtree, rtree_ctx,
-	    (uintptr_t)edata_base_get(edata), true) == edata);
+	assert(rtree_read(tsdn, &emap->rtree, rtree_ctx,
+	    (uintptr_t)edata_base_get(edata)).edata == edata);
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 0be55492..63ef578e 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2782,11 +2782,11 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 		if (unlikely(tsd == NULL || !tsd_fast(tsd))) {
 			return false;
 		}
-		bool res = emap_alloc_ctx_try_lookup_fast(tsd,
+		bool err = emap_alloc_ctx_try_lookup_fast(tsd,
 		    &arena_emap_global, ptr, &alloc_ctx);
 
 		/* Note: profiled objects will have alloc_ctx.slab set */
-		if (unlikely(!res || !alloc_ctx.slab)) {
+		if (unlikely(err || !alloc_ctx.slab)) {
 			return false;
 		}
 		assert(alloc_ctx.szind != SC_NSIZES);
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index c116420a..28029665 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -20,8 +20,9 @@ TEST_BEGIN(test_rtree_read_empty) {
 	rtree_ctx_data_init(&rtree_ctx);
 	expect_false(rtree_new(rtree, base, false),
 	    "Unexpected rtree_new() failure");
-	expect_ptr_null(rtree_edata_read(tsdn, rtree, &rtree_ctx, PAGE,
-	    false), "rtree_edata_read() should return NULL for empty tree");
+	rtree_contents_t contents;
+	expect_true(rtree_read_independent(tsdn, rtree, &rtree_ctx, PAGE,
+	    &contents), "rtree_read_independent() should fail on empty rtree.");
 
 	base_delete(tsdn, base);
 }
@@ -50,21 +51,33 @@ TEST_BEGIN(test_rtree_extrema) {
 	expect_false(rtree_new(rtree, base, false),
 	    "Unexpected rtree_new() failure");
 
-	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, PAGE, &edata_a,
-	    edata_szind_get(&edata_a), edata_slab_get(&edata_a)),
+	rtree_contents_t contents_a;
+	contents_a.edata = &edata_a;
+	contents_a.metadata.szind = edata_szind_get(&edata_a);
+	contents_a.metadata.slab = edata_slab_get(&edata_a);
+	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, PAGE, contents_a),
 	    "Unexpected rtree_write() failure");
-	rtree_szind_slab_update(tsdn, rtree, &rtree_ctx, PAGE,
-	    edata_szind_get(&edata_a), edata_slab_get(&edata_a));
-	expect_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx, PAGE, true),
-	    &edata_a,
-	    "rtree_edata_read() should return previously set value");
+	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, PAGE, contents_a),
+	    "Unexpected rtree_write() failure");
+	rtree_contents_t read_contents_a = rtree_read(tsdn, rtree, &rtree_ctx,
+	    PAGE);
+	expect_true(contents_a.edata == read_contents_a.edata
+	    && contents_a.metadata.szind == read_contents_a.metadata.szind
+	    && contents_a.metadata.slab == read_contents_a.metadata.slab,
+	    "rtree_read() should return previously set value");
 
+	rtree_contents_t contents_b;
+	contents_b.edata = &edata_b;
+	contents_b.metadata.szind = edata_szind_get_maybe_invalid(&edata_b);
+	contents_b.metadata.slab = edata_slab_get(&edata_b);
 	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, ~((uintptr_t)0),
-	    &edata_b, edata_szind_get_maybe_invalid(&edata_b),
-	    edata_slab_get(&edata_b)), "Unexpected rtree_write() failure");
-	expect_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx,
-	    ~((uintptr_t)0), true), &edata_b,
-	    "rtree_edata_read() should return previously set value");
+	    contents_b), "Unexpected rtree_write() failure");
+	rtree_contents_t read_contents_b = rtree_read(tsdn, rtree, &rtree_ctx,
+	    ~((uintptr_t)0));
+	assert_true(contents_b.edata == read_contents_b.edata
+	    && contents_b.metadata.szind == read_contents_b.metadata.szind
+	    && contents_b.metadata.slab == read_contents_b.metadata.slab,
+	    "rtree_read() should return previously set value");
 
 	base_delete(tsdn, base);
 }
@@ -89,19 +102,23 @@ TEST_BEGIN(test_rtree_bits) {
 	    "Unexpected rtree_new() failure");
 
 	for (unsigned i = 0; i < sizeof(keys)/sizeof(uintptr_t); i++) {
+		rtree_contents_t contents;
+		contents.edata = &edata;
+		contents.metadata.szind = SC_NSIZES;
+		contents.metadata.slab = false;
+
 		expect_false(rtree_write(tsdn, rtree, &rtree_ctx, keys[i],
-		    &edata, SC_NSIZES, false),
-		    "Unexpected rtree_write() failure");
+		    contents), "Unexpected rtree_write() failure");
 		for (unsigned j = 0; j < sizeof(keys)/sizeof(uintptr_t); j++) {
-			expect_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx,
-			    keys[j], true), &edata,
+			expect_ptr_eq(rtree_read(tsdn, rtree, &rtree_ctx,
+			    keys[j]).edata, &edata,
 			    "rtree_edata_read() should return previously set "
 			    "value and ignore insignificant key bits; i=%u, "
 			    "j=%u, set key=%#"FMTxPTR", get key=%#"FMTxPTR, i,
 			    j, keys[i], keys[j]);
 		}
-		expect_ptr_null(rtree_edata_read(tsdn, rtree, &rtree_ctx,
-		    (((uintptr_t)2) << LG_PAGE), false),
+		expect_ptr_null(rtree_read(tsdn, rtree, &rtree_ctx,
+		    (((uintptr_t)2) << LG_PAGE)).edata,
 		    "Only leftmost rtree leaf should be set; i=%u", i);
 		rtree_clear(tsdn, rtree, &rtree_ctx, keys[i]);
 	}
@@ -142,26 +159,26 @@ TEST_BEGIN(test_rtree_random) {
 		contents.metadata.szind = SC_NSIZES;
 		contents.metadata.slab = false;
 		rtree_leaf_elm_write(tsdn, rtree, elm, contents);
-		expect_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx,
-		    keys[i], true), &edata,
+		expect_ptr_eq(rtree_read(tsdn, rtree, &rtree_ctx,
+		    keys[i]).edata, &edata,
 		    "rtree_edata_read() should return previously set value");
 	}
 	for (unsigned i = 0; i < NSET; i++) {
-		expect_ptr_eq(rtree_edata_read(tsdn, rtree, &rtree_ctx,
-		    keys[i], true), &edata,
+		expect_ptr_eq(rtree_read(tsdn, rtree, &rtree_ctx,
+		    keys[i]).edata, &edata,
 		    "rtree_edata_read() should return previously set value, "
 		    "i=%u", i);
 	}
 
 	for (unsigned i = 0; i < NSET; i++) {
 		rtree_clear(tsdn, rtree, &rtree_ctx, keys[i]);
-		expect_ptr_null(rtree_edata_read(tsdn, rtree, &rtree_ctx,
-		    keys[i], true),
+		expect_ptr_null(rtree_read(tsdn, rtree, &rtree_ctx,
+		    keys[i]).edata,
 		   "rtree_edata_read() should return previously set value");
 	}
 	for (unsigned i = 0; i < NSET; i++) {
-		expect_ptr_null(rtree_edata_read(tsdn, rtree, &rtree_ctx,
-		    keys[i], true),
+		expect_ptr_null(rtree_read(tsdn, rtree, &rtree_ctx,
+		    keys[i]).edata,
 		    "rtree_edata_read() should return previously set value");
 	}
 

From 877af247a87f6cb335a0f98aef62cd90afcfa520 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 10 Apr 2020 11:38:33 -0700
Subject: [PATCH 1693/2608] QL, QR: Add documentation.

---
 include/jemalloc/internal/ql.h | 86 +++++++++++++++++++++++++++++++---
 include/jemalloc/internal/qr.h | 86 ++++++++++++++++++++++++++++++++--
 2 files changed, 162 insertions(+), 10 deletions(-)

diff --git a/include/jemalloc/internal/ql.h b/include/jemalloc/internal/ql.h
index db672194..c7f52f86 100644
--- a/include/jemalloc/internal/ql.h
+++ b/include/jemalloc/internal/ql.h
@@ -3,45 +3,85 @@
 
 #include "jemalloc/internal/qr.h"
 
+/*
+ * A linked-list implementation.
+ *
+ * This is built on top of the ring implementation, but that can be viewed as an
+ * implementation detail (i.e. trying to advance past the tail of the list
+ * doesn't wrap around).
+ *
+ * You define a struct like so:
+ * typedef strucy my_s my_t;
+ * struct my_s {
+ *   int data;
+ *   ql_elm(my_t) my_link;
+ * };
+ *
+ * // We wobble between "list" and "head" for this type; we're now mostly
+ * // heading towards "list".
+ * typedef ql_head(my_t) my_list_t;
+ *
+ * You then pass a my_list_t * for a_head arguments, a my_t * for a_elm
+ * arguments, the token "my_link" for a_field arguments, and the token "my_t"
+ * for a_type arguments.
+ */
+
 /* List definitions. */
 #define ql_head(a_type)							\
 struct {								\
 	a_type *qlh_first;						\
 }
 
+/* Static initializer for an empty list. */
 #define ql_head_initializer(a_head) {NULL}
 
+/* The field definition. */
 #define ql_elm(a_type)	qr(a_type)
 
-/* List functions. */
+/* A pointer to the first element in the list, or NULL if the list is empty. */
 #define ql_first(a_head) ((a_head)->qlh_first)
 
+/* Dynamically initializes a list. */
 #define ql_new(a_head) do {						\
 	ql_first(a_head) = NULL;					\
 } while (0)
 
-#define ql_clear(a_head) ql_new(a_head)
-
+/*
+ * Sets dest to be the contents of src (overwriting any elements there), leaving
+ * src empty.
+ */
 #define ql_move(a_head_dest, a_head_src) do {				\
 	ql_first(a_head_dest) = ql_first(a_head_src);			\
-	ql_clear(a_head_src);						\
+	ql_new(a_head_src);						\
 } while (0)
 
+/* True if the list is empty, otherwise false. */
 #define ql_empty(a_head) (ql_first(a_head) == NULL)
 
+/*
+ * Initializes a ql_elm.  Must be called even if the field is about to be
+ * overwritten.
+ */
 #define ql_elm_new(a_elm, a_field) qr_new((a_elm), a_field)
 
+/*
+ * Obtains the last item in the list.
+ */
 #define ql_last(a_head, a_field)					\
 	(ql_empty(a_head) ? NULL : qr_prev(ql_first(a_head), a_field))
 
+/*
+ * Gets a pointer to the next/prev element in the list.  Trying to advance past
+ * the end or retreat before the beginning of the list returns NULL.
+ */
 #define ql_next(a_head, a_elm, a_field)					\
 	((ql_last(a_head, a_field) != (a_elm))				\
 	    ? qr_next((a_elm), a_field)	: NULL)
-
 #define ql_prev(a_head, a_elm, a_field)					\
 	((ql_first(a_head) != (a_elm)) ? qr_prev((a_elm), a_field)	\
 				       : NULL)
 
+/* Inserts a_elm before a_qlelm in the list. */
 #define ql_before_insert(a_head, a_qlelm, a_elm, a_field) do {		\
 	qr_before_insert((a_qlelm), (a_elm), a_field);			\
 	if (ql_first(a_head) == (a_qlelm)) {				\
@@ -49,9 +89,11 @@ struct {								\
 	}								\
 } while (0)
 
+/* Inserts a_elm after a_qlelm in the list. */
 #define ql_after_insert(a_qlelm, a_elm, a_field)			\
 	qr_after_insert((a_qlelm), (a_elm), a_field)
 
+/* Inserts a_elm as the first item in the list. */
 #define ql_head_insert(a_head, a_elm, a_field) do {			\
 	if (!ql_empty(a_head)) {					\
 		qr_before_insert(ql_first(a_head), (a_elm), a_field);	\
@@ -59,6 +101,7 @@ struct {								\
 	ql_first(a_head) = (a_elm);					\
 } while (0)
 
+/* Inserts a_elm as the last item in the list. */
 #define ql_tail_insert(a_head, a_elm, a_field) do {			\
 	if (!ql_empty(a_head)) {					\
 		qr_before_insert(ql_first(a_head), (a_elm), a_field);	\
@@ -66,16 +109,21 @@ struct {								\
 	ql_first(a_head) = qr_next((a_elm), a_field);			\
 } while (0)
 
+/*
+ * Given lists a = [a_1, ..., a_n] and [b_1, ..., b_n], results in:
+ * a = [a1, ..., a_n, b_1, ..., b_n] and b = [].
+ */
 #define ql_concat(a_head_a, a_head_b, a_field) do {			\
 	if (ql_empty(a_head_a)) {					\
 		ql_move(a_head_a, a_head_b);				\
 	} else if (!ql_empty(a_head_b)) {				\
 		qr_meld(ql_first(a_head_a), ql_first(a_head_b),		\
 		    a_field);						\
-		ql_clear(a_head_b);					\
+		ql_new(a_head_b);					\
 	}								\
 } while (0)
 
+/* Removes a_elm from the list. */
 #define ql_remove(a_head, a_elm, a_field) do {				\
 	if (ql_first(a_head) == (a_elm)) {				\
 		ql_first(a_head) = qr_next(ql_first(a_head), a_field);	\
@@ -83,20 +131,29 @@ struct {								\
 	if (ql_first(a_head) != (a_elm)) {				\
 		qr_remove((a_elm), a_field);				\
 	} else {							\
-		ql_clear(a_head);					\
+		ql_new(a_head);						\
 	}								\
 } while (0)
 
+/* Removes the first item in the list. */
 #define ql_head_remove(a_head, a_type, a_field) do {			\
 	a_type *t = ql_first(a_head);					\
 	ql_remove((a_head), t, a_field);				\
 } while (0)
 
+/* Removes the last item in the list. */
 #define ql_tail_remove(a_head, a_type, a_field) do {			\
 	a_type *t = ql_last(a_head, a_field);				\
 	ql_remove((a_head), t, a_field);				\
 } while (0)
 
+/*
+ * Given a = [a_1, a_2, ..., a_n-1, a_n, a_n+1, ...],
+ * ql_split(a, a_n, b, some_field) results in
+ *   a = [a_1, a_2, ..., a_n-1]
+ * and replaces b's contents with:
+ *   b = [a_n, a_n+1, ...]
+ */
 #define ql_split(a_head_a, a_elm, a_head_b, a_field) do {		\
 	if (ql_first(a_head_a) == (a_elm)) {				\
 		ql_move(a_head_b, a_head_a);				\
@@ -116,6 +173,21 @@ struct {								\
 	ql_first(a_head) = qr_next(ql_first(a_head), a_field);		\
 } while (0)
 
+/*
+ * Helper macro to iterate over each element in a list in order, starting from
+ * the head (or in reverse order, starting from the tail).  The usage is
+ * (assuming my_t and my_list_t defined as above).
+ *
+ * int sum(my_list_t *list) {
+ *   int sum = 0;
+ *   my_t *iter;
+ *   ql_foreach(iter, list, link) {
+ *     sum += iter->data;
+ *   }
+ *   return sum;
+ * }
+ */
+
 #define ql_foreach(a_var, a_head, a_field)				\
 	qr_foreach((a_var), ql_first(a_head), a_field)
 
diff --git a/include/jemalloc/internal/qr.h b/include/jemalloc/internal/qr.h
index 559cbe42..ece4f556 100644
--- a/include/jemalloc/internal/qr.h
+++ b/include/jemalloc/internal/qr.h
@@ -1,6 +1,21 @@
 #ifndef JEMALLOC_INTERNAL_QR_H
 #define JEMALLOC_INTERNAL_QR_H
 
+/*
+ * A ring implementation based on an embedded circular doubly-linked list.
+ *
+ * You define your struct like so:
+ *
+ * typedef struct my_s my_t;
+ * struct my_s {
+ *   int data;
+ *   qr(my_t) my_link;
+ * };
+ *
+ * And then pass a my_t * into macros for a_qr arguments, and the token
+ * "my_link" into a_field fields.
+ */
+
 /* Ring definitions. */
 #define qr(a_type)							\
 struct {								\
@@ -8,17 +23,41 @@ struct {								\
 	a_type	*qre_prev;						\
 }
 
-/* Ring functions. */
+/*
+ * Initialize a qr link.  Every link must be initialized before being used, even
+ * if that initialization is going to be immediately overwritten (say, by being
+ * passed into an insertion macro).
+ */
 #define qr_new(a_qr, a_field) do {					\
 	(a_qr)->a_field.qre_next = (a_qr);				\
 	(a_qr)->a_field.qre_prev = (a_qr);				\
 } while (0)
 
+/*
+ * Go forwards or backwards in the ring.  Note that (the ring being circular), this
+ * always succeeds -- you just keep looping around and around the ring if you
+ * chase pointers without end.
+ */
 #define qr_next(a_qr, a_field) ((a_qr)->a_field.qre_next)
-
 #define qr_prev(a_qr, a_field) ((a_qr)->a_field.qre_prev)
 
-/* a_qr_a can directly be a qr_next() macro, but a_qr_b cannot.  */
+/*
+ * Given two rings:
+ *    a -> a_1 -> ... -> a_n --
+ *    ^                       |
+ *    |------------------------
+ *
+ *    b -> b_1 -> ... -> b_n --
+ *    ^                       |
+ *    |------------------------
+ *
+ * Results in the ring:
+ *   a -> a_1 -> ... -> a_n -> b -> b_1 -> ... -> b_n --
+ *   ^                                                 |
+ *   |-------------------------------------------------|
+ *
+ * a_qr_a can directly be a qr_next() macro, but a_qr_b cannot.
+ */
 #define qr_meld(a_qr_a, a_qr_b, a_field) do {				\
 	(a_qr_b)->a_field.qre_prev->a_field.qre_next =			\
 	    (a_qr_a)->a_field.qre_prev;					\
@@ -29,28 +68,69 @@ struct {								\
 	(a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_b);	\
 } while (0)
 
+/*
+ * Logically, this is just a meld.  The intent, though, is that a_qrelm is a
+ * single-element ring, so that "before" has a more obvious interpretation than
+ * meld.
+ */
 #define qr_before_insert(a_qrelm, a_qr, a_field)			\
 	qr_meld((a_qrelm), (a_qr), a_field)
 
+/* Ditto, but inserting after rather than before. */
 #define qr_after_insert(a_qrelm, a_qr, a_field)				\
 	qr_before_insert(qr_next(a_qrelm, a_field), (a_qr), a_field)
 
 /*
+ * Inverts meld; given the ring:
+ *   a -> a_1 -> ... -> a_n -> b -> b_1 -> ... -> b_n --
+ *   ^                                                 |
+ *   |-------------------------------------------------|
+ *
+ * Results in two rings:
+ *    a -> a_1 -> ... -> a_n --
+ *    ^                       |
+ *    |------------------------
+ *
+ *    b -> b_1 -> ... -> b_n --
+ *    ^                       |
+ *    |------------------------
+ *
  * qr_meld() and qr_split() are functionally equivalent, so there's no need to
  * have two copies of the code.
  */
 #define qr_split(a_qr_a, a_qr_b, a_field)				\
 	qr_meld((a_qr_a), (a_qr_b), a_field)
 
+/*
+ * Splits off a_qr from the rest of its ring, so that it becomes a
+ * single-element ring.
+ */
 #define qr_remove(a_qr, a_field)					\
 	qr_split(qr_next(a_qr, a_field), (a_qr), a_field)
 
+/*
+ * Helper macro to iterate over each element in a ring exactly once, starting
+ * with a_qr.  The usage is (assuming my_t defined as above):
+ *
+ * int sum(my_t *item) {
+ *   int sum = 0;
+ *   my_t *iter;
+ *   qr_foreach(iter, item, link) {
+ *     sum += iter->data;
+ *   }
+ *   return sum;
+ * }
+ */
 #define qr_foreach(var, a_qr, a_field)					\
 	for ((var) = (a_qr);						\
 	    (var) != NULL;						\
 	    (var) = (((var)->a_field.qre_next != (a_qr))		\
 	    ? (var)->a_field.qre_next : NULL))
 
+/*
+ * The same (and with the same usage) as qr_foreach, but in the opposite order,
+ * ending with a_qr.
+ */
 #define qr_reverse_foreach(var, a_qr, a_field)				\
 	for ((var) = ((a_qr) != NULL) ? qr_prev(a_qr, a_field) : NULL;	\
 	    (var) != NULL;						\

From 3589571bfd4b1fda1d3771f96a08d7d14b7813bd Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 10 Apr 2020 16:09:02 -0700
Subject: [PATCH 1694/2608] SC: use SC_LG_NGROUP instead of its value.

This magic constant introduces inconsistencies.  We should be able to change its
value solely by adjusting the definition in the header.
---
 src/sc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/sc.c b/src/sc.c
index 89ddb6ba..cfce533f 100644
--- a/src/sc.c
+++ b/src/sc.c
@@ -245,7 +245,7 @@ size_classes(
 	assert(sc_data->lg_large_minclass == SC_LG_LARGE_MINCLASS);
 	assert(sc_data->large_maxclass == SC_LARGE_MAXCLASS);
 
-	/* 
+	/*
 	 * In the allocation fastpath, we want to assume that we can
 	 * unconditionally subtract the requested allocation size from
 	 * a ssize_t, and detect passing through 0 correctly.  This
@@ -262,7 +262,7 @@ sc_data_init(sc_data_t *sc_data) {
 	int lg_max_lookup = 12;
 
 	size_classes(sc_data, LG_SIZEOF_PTR, LG_QUANTUM, SC_LG_TINY_MIN,
-	    lg_max_lookup, LG_PAGE, 2);
+	    lg_max_lookup, LG_PAGE, SC_LG_NGROUP);
 
 	sc_data->initialized = true;
 }

From 58a00df2383fbe714da3b8a3697d68c4064d4b4a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 7 Apr 2020 16:28:43 -0700
Subject: [PATCH 1695/2608] TSD: Put all fast-path data together.

---
 include/jemalloc/internal/tsd.h | 168 ++++++++++++++++----------------
 1 file changed, 85 insertions(+), 83 deletions(-)

diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index d88f3d12..7e08f6b2 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -15,57 +15,30 @@
 
 /*
  * Thread-Specific-Data layout
- * --- data accessed on tcache fast path: state, rtree_ctx, stats ---
- * s: state
- * m: thread_allocated
- * k: thread_allocated_next_event_fast
- * f: thread_deallocated
- * h: thread_deallocated_next_event_fast
- * c: rtree_ctx (rtree cache accessed on deallocation)
- * t: tcache
- * --- data not accessed on tcache fast path: arena-related fields ---
- * e: tcache_enabled
- * d: arenas_tdata_bypass
- * r: reentrancy_level
- * n: narenas_tdata
- * l: thread_allocated_last_event
- * j: thread_allocated_next_event
- * q: thread_deallocated_last_event
- * u: thread_deallocated_next_event
- * g: tcache_gc_event_wait
- * y: tcache_gc_dalloc_event_wait
- * w: prof_sample_event_wait (config_prof)
- * x: prof_sample_last_event (config_prof)
- * z: stats_interval_event_wait
- * e: stats_interval_last_event
- * p: prof_tdata (config_prof)
- * v: prng_state
- * i: iarena
- * a: arena
- * o: arenas_tdata
- * b: binshards
- * Loading TSD data is on the critical path of basically all malloc operations.
- * In particular, tcache and rtree_ctx rely on hot CPU cache to be effective.
- * Use a compact layout to reduce cache footprint.
- * +--- 64-bit and 64B cacheline; 1B each letter; First byte on the left. ---+
- * |----------------------------  1st cacheline  ----------------------------|
- * | sedrnnnn mmmmmmmm kkkkkkkk ffffffff hhhhhhhh [c * 24  ........ ........]|
- * |----------------------------  2nd cacheline  ----------------------------|
- * | [c * 64  ........ ........ ........ ........ ........ ........ ........]|
- * |----------------------------  3nd cacheline  ----------------------------|
- * | [c * 40  ........ ........ ........ .......] llllllll jjjjjjjj qqqqqqqq |
- * +----------------------------  4th cacheline  ----------------------------+
- * | uuuuuuuu gggggggg yyyyyyyy wwwwwwww xxxxxxxx zzzzzzzz eeeeeeee pppppppp |
- * +----------------------------  5th and after  ----------------------------+
- * | vvvvvvvv iiiiiiii aaaaaaaa oooooooo [b * 40; then embedded tcache ..... |
- * +-------------------------------------------------------------------------+
- * Note: the entire tcache is embedded into TSD and spans multiple cachelines.
  *
- * The elements after rtree_ctx and before tcache aren't really needed on tcache
- * fast path.  However we have a number of unused tcache bins and witnesses
- * (never touched unless config_debug) at the end of tcache, so we place them
- * there to avoid breaking the cachelines and possibly paging in an extra page.
+ * At least some thread-local data gets touched on the fast-path of almost all
+ * malloc operations.  But much of it is only necessary down slow-paths, or
+ * testing.  We want to colocate the fast-path data so that it can live on the
+ * same cacheline if possible.  So we define three tiers of hotness:
+ * TSD_DATA_FAST: Touched on the alloc/dalloc fast paths.
+ * TSD_DATA_SLOW: Touched down slow paths.  "Slow" here is sort of general;
+ *     there are "semi-slow" paths like "not a sized deallocation, but can still
+ *     live in the tcache".  We'll want to keep these closer to the fast-path
+ *     data.
+ * TSD_DATA_SLOWER: Only touched in test or debug modes, or not touched at all.
+ *
+ * An additional concern is that the larger tcache bins won't be used (we have a
+ * bin per size class, but by default only cache relatively small objects).  So
+ * the earlier bins are in the TSD_DATA_FAST tier, but the later ones are in the
+ * TSD_DATA_SLOWER tier.
+ *
+ * As a result of all this, we put the slow data first, then the fast data, then
+ * the slower data, while keeping the tcache as the last element of the fast
+ * data (so that the fast -> slower transition happens midway through the
+ * tcache).  While we don't yet play alignment tricks to guarantee it, this
+ * increases our odds of getting some cache/page locality on fast paths.
  */
+
 #ifdef JEMALLOC_JET
 typedef void (*test_callback_t)(int *);
 #  define MALLOC_TSD_TEST_DATA_INIT 0x72b65c10
@@ -79,16 +52,11 @@ typedef void (*test_callback_t)(int *);
 #endif
 
 /*  O(name,			type,			nullable type) */
-#define MALLOC_TSD							\
+#define TSD_DATA_SLOW							\
     O(tcache_enabled,		bool,			bool)		\
     O(arenas_tdata_bypass,	bool,			bool)		\
     O(reentrancy_level,		int8_t,			int8_t)		\
     O(narenas_tdata,		uint32_t,		uint32_t)	\
-    O(thread_allocated,		uint64_t,		uint64_t)	\
-    O(thread_allocated_next_event_fast,	uint64_t,	uint64_t)	\
-    O(thread_deallocated,	uint64_t,		uint64_t)	\
-    O(thread_deallocated_next_event_fast, uint64_t,	uint64_t)	\
-    O(rtree_ctx,		rtree_ctx_t,		rtree_ctx_t)	\
     O(thread_allocated_last_event,	uint64_t,	uint64_t)	\
     O(thread_allocated_next_event,	uint64_t,	uint64_t)	\
     O(thread_deallocated_last_event,	uint64_t,	uint64_t)	\
@@ -104,28 +72,13 @@ typedef void (*test_callback_t)(int *);
     O(iarena,			arena_t *,		arena_t *)	\
     O(arena,			arena_t *,		arena_t *)	\
     O(arenas_tdata,		arena_tdata_t *,	arena_tdata_t *)\
-    O(binshards,		tsd_binshards_t,	tsd_binshards_t)\
-    O(tcache,			tcache_t,		tcache_t)	\
-    O(witness_tsd,              witness_tsd_t,		witness_tsdn_t)	\
-    MALLOC_TEST_TSD
+    O(binshards,		tsd_binshards_t,	tsd_binshards_t)
 
-/*
- * TE_MIN_START_WAIT should not exceed the minimal allocation usize.
- */
-#define TE_MIN_START_WAIT ((uint64_t)1U)
-#define TE_MAX_START_WAIT UINT64_MAX
-
-#define TSD_INITIALIZER {						\
-    /* state */			ATOMIC_INIT(tsd_state_uninitialized),	\
+#define TSD_DATA_SLOW_INITIALIZER					\
     /* tcache_enabled */	TCACHE_ENABLED_ZERO_INITIALIZER,	\
     /* arenas_tdata_bypass */	false,					\
     /* reentrancy_level */	0,					\
     /* narenas_tdata */		0,					\
-    /* thread_allocated */	0,					\
-    /* thread_allocated_next_event_fast */ 0, 				\
-    /* thread_deallocated */	0,					\
-    /* thread_deallocated_next_event_fast */	0,			\
-    /* rtree_ctx */		RTREE_CTX_ZERO_INITIALIZER,		\
     /* thread_allocated_last_event */	0,				\
     /* thread_allocated_next_event */	TE_MIN_START_WAIT,		\
     /* thread_deallocated_last_event */	0,				\
@@ -141,10 +94,46 @@ typedef void (*test_callback_t)(int *);
     /* iarena */		NULL,					\
     /* arena */			NULL,					\
     /* arenas_tdata */		NULL,					\
-    /* binshards */		TSD_BINSHARDS_ZERO_INITIALIZER,		\
-    /* tcache */		TCACHE_ZERO_INITIALIZER,		\
+    /* binshards */		TSD_BINSHARDS_ZERO_INITIALIZER,
+
+/*  O(name,			type,			nullable type) */
+#define TSD_DATA_FAST							\
+    O(thread_allocated,		uint64_t,		uint64_t)	\
+    O(thread_allocated_next_event_fast,	uint64_t,	uint64_t)	\
+    O(thread_deallocated,	uint64_t,		uint64_t)	\
+    O(thread_deallocated_next_event_fast, uint64_t,	uint64_t)	\
+    O(rtree_ctx,		rtree_ctx_t,		rtree_ctx_t)	\
+    O(tcache,			tcache_t,		tcache_t)
+
+#define TSD_DATA_FAST_INITIALIZER					\
+    /* thread_allocated */	0,					\
+    /* thread_allocated_next_event_fast */ 0, 				\
+    /* thread_deallocated */	0,					\
+    /* thread_deallocated_next_event_fast */	0,			\
+    /* rtree_ctx */		RTREE_CTX_ZERO_INITIALIZER,		\
+    /* tcache */		TCACHE_ZERO_INITIALIZER,
+
+/*  O(name,			type,			nullable type) */
+#define TSD_DATA_SLOWER							\
+    O(witness_tsd,              witness_tsd_t,		witness_tsdn_t)	\
+    MALLOC_TEST_TSD
+
+#define TSD_DATA_SLOWER_INITIALIZER					\
     /* witness */		WITNESS_TSD_INITIALIZER			\
-    /* test data */		MALLOC_TEST_TSD_INITIALIZER		\
+    /* test data */		MALLOC_TEST_TSD_INITIALIZER
+
+
+/*
+ * TE_MIN_START_WAIT should not exceed the minimal allocation usize.
+ */
+#define TE_MIN_START_WAIT ((uint64_t)1U)
+#define TE_MAX_START_WAIT UINT64_MAX
+
+#define TSD_INITIALIZER {						\
+    				TSD_DATA_SLOW_INITIALIZER		\
+    /* state */			ATOMIC_INIT(tsd_state_uninitialized),	\
+    				TSD_DATA_FAST_INITIALIZER		\
+    				TSD_DATA_SLOWER_INITIALIZER		\
 }
 
 void *malloc_tsd_malloc(size_t size);
@@ -235,14 +224,17 @@ struct tsd_s {
 	 * setters below.
 	 */
 
+#define O(n, t, nt)							\
+	t TSD_MANGLE(n);
+
+	TSD_DATA_SLOW
 	/*
 	 * We manually limit the state to just a single byte.  Unless the 8-bit
 	 * atomics are unavailable (which is rare).
 	 */
 	tsd_state_t state;
-#define O(n, t, nt)							\
-	t TSD_MANGLE(n);
-MALLOC_TSD
+	TSD_DATA_FAST
+	TSD_DATA_SLOWER
 #undef O
 };
 
@@ -308,7 +300,9 @@ JEMALLOC_ALWAYS_INLINE t *						\
 tsd_##n##p_get_unsafe(tsd_t *tsd) {					\
 	return &tsd->TSD_MANGLE(n);					\
 }
-MALLOC_TSD
+TSD_DATA_SLOW
+TSD_DATA_FAST
+TSD_DATA_SLOWER
 #undef O
 
 /* tsd_foop_get(tsd) returns a pointer to the thread-local instance of foo. */
@@ -327,7 +321,9 @@ tsd_##n##p_get(tsd_t *tsd) {						\
 	    state == tsd_state_minimal_initialized);			\
 	return tsd_##n##p_get_unsafe(tsd);				\
 }
-MALLOC_TSD
+TSD_DATA_SLOW
+TSD_DATA_FAST
+TSD_DATA_SLOWER
 #undef O
 
 /*
@@ -343,7 +339,9 @@ tsdn_##n##p_get(tsdn_t *tsdn) {						\
 	tsd_t *tsd = tsdn_tsd(tsdn);					\
 	return (nt *)tsd_##n##p_get(tsd);				\
 }
-MALLOC_TSD
+TSD_DATA_SLOW
+TSD_DATA_FAST
+TSD_DATA_SLOWER
 #undef O
 
 /* tsd_foo_get(tsd) returns the value of the thread-local instance of foo. */
@@ -352,7 +350,9 @@ JEMALLOC_ALWAYS_INLINE t						\
 tsd_##n##_get(tsd_t *tsd) {						\
 	return *tsd_##n##p_get(tsd);					\
 }
-MALLOC_TSD
+TSD_DATA_SLOW
+TSD_DATA_FAST
+TSD_DATA_SLOWER
 #undef O
 
 /* tsd_foo_set(tsd, val) updates the thread-local instance of foo to be val. */
@@ -363,7 +363,9 @@ tsd_##n##_set(tsd_t *tsd, t val) {					\
 	    tsd_state_get(tsd) != tsd_state_minimal_initialized);	\
 	*tsd_##n##p_get(tsd) = val;					\
 }
-MALLOC_TSD
+TSD_DATA_SLOW
+TSD_DATA_FAST
+TSD_DATA_SLOWER
 #undef O
 
 JEMALLOC_ALWAYS_INLINE void

From 40e7aed59ea1ec8edbeabee71c288afdc2316d72 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 7 Apr 2020 16:59:57 -0700
Subject: [PATCH 1696/2608] TSD: Move in some of the tcache fields.

We had put these in the tcache for cache optimization reasons.  After the
previous diff, these no longer apply.
---
 include/jemalloc/internal/tcache_structs.h |  7 -------
 include/jemalloc/internal/tsd.h            | 10 ++++++++--
 src/hook.c                                 |  6 +++---
 src/tsd.c                                  | 10 +++++-----
 4 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index 48dbf0fe..783b2dfd 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -7,9 +7,6 @@
 #include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/tsd_types.h"
 
-/* Various uses of this struct need it to be a named type. */
-typedef ql_elm(tsd_t) tsd_link_t;
-
 struct tcache_s {
 	/*
 	 * To minimize our cache-footprint, we put the frequently accessed data
@@ -30,10 +27,6 @@ struct tcache_s {
 	/* Lets us track all the tcaches in an arena. */
 	ql_elm(tcache_t) link;
 
-	/* Logically scoped to tsd, but put here for cache layout reasons. */
-	ql_elm(tsd_t) tsd_link;
-	bool in_hook;
-
 	/*
 	 * The descriptor lets the arena find our cache bins without seeing the
 	 * tcache definition.  This enables arenas to aggregate stats across
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 7e08f6b2..66f27173 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -51,6 +51,8 @@ typedef void (*test_callback_t)(int *);
 #  define MALLOC_TEST_TSD_INITIALIZER
 #endif
 
+typedef ql_elm(tsd_t) tsd_link_t;
+
 /*  O(name,			type,			nullable type) */
 #define TSD_DATA_SLOW							\
     O(tcache_enabled,		bool,			bool)		\
@@ -72,7 +74,9 @@ typedef void (*test_callback_t)(int *);
     O(iarena,			arena_t *,		arena_t *)	\
     O(arena,			arena_t *,		arena_t *)	\
     O(arenas_tdata,		arena_tdata_t *,	arena_tdata_t *)\
-    O(binshards,		tsd_binshards_t,	tsd_binshards_t)
+    O(binshards,		tsd_binshards_t,	tsd_binshards_t)\
+    O(tsd_link,			tsd_link_t,		tsd_link_t)	\
+    O(in_hook,			bool,			bool)
 
 #define TSD_DATA_SLOW_INITIALIZER					\
     /* tcache_enabled */	TCACHE_ENABLED_ZERO_INITIALIZER,	\
@@ -94,7 +98,9 @@ typedef void (*test_callback_t)(int *);
     /* iarena */		NULL,					\
     /* arena */			NULL,					\
     /* arenas_tdata */		NULL,					\
-    /* binshards */		TSD_BINSHARDS_ZERO_INITIALIZER,
+    /* binshards */		TSD_BINSHARDS_ZERO_INITIALIZER,		\
+    /* tsd_link */		{NULL},					\
+    /* in_hook */		false,
 
 /*  O(name,			type,			nullable type) */
 #define TSD_DATA_FAST							\
diff --git a/src/hook.c b/src/hook.c
index 9ac703cf..493edbbe 100644
--- a/src/hook.c
+++ b/src/hook.c
@@ -130,9 +130,9 @@ hook_reentrantp() {
 	 */
 	static bool in_hook_global = true;
 	tsdn_t *tsdn = tsdn_fetch();
-	tcache_t *tcache = tsdn_tcachep_get(tsdn);
-	if (tcache != NULL) {
-		return &tcache->in_hook;
+	bool *in_hook = tsdn_in_hookp_get(tsdn);
+	if (in_hook!= NULL) {
+		return in_hook;
 	}
 	return &in_hook_global;
 }
diff --git a/src/tsd.c b/src/tsd.c
index 38196c80..c07a4bff 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -74,7 +74,7 @@ tsd_in_nominal_list(tsd_t *tsd) {
 	 * out of it here.
 	 */
 	malloc_mutex_lock(TSDN_NULL, &tsd_nominal_tsds_lock);
-	ql_foreach(tsd_list, &tsd_nominal_tsds, TSD_MANGLE(tcache).tsd_link) {
+	ql_foreach(tsd_list, &tsd_nominal_tsds, TSD_MANGLE(tsd_link)) {
 		if (tsd == tsd_list) {
 			found = true;
 			break;
@@ -88,9 +88,9 @@ static void
 tsd_add_nominal(tsd_t *tsd) {
 	assert(!tsd_in_nominal_list(tsd));
 	assert(tsd_state_get(tsd) <= tsd_state_nominal_max);
-	ql_elm_new(tsd, TSD_MANGLE(tcache).tsd_link);
+	ql_elm_new(tsd, TSD_MANGLE(tsd_link));
 	malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
-	ql_tail_insert(&tsd_nominal_tsds, tsd, TSD_MANGLE(tcache).tsd_link);
+	ql_tail_insert(&tsd_nominal_tsds, tsd, TSD_MANGLE(tsd_link));
 	malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
 }
 
@@ -99,7 +99,7 @@ tsd_remove_nominal(tsd_t *tsd) {
 	assert(tsd_in_nominal_list(tsd));
 	assert(tsd_state_get(tsd) <= tsd_state_nominal_max);
 	malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
-	ql_remove(&tsd_nominal_tsds, tsd, TSD_MANGLE(tcache).tsd_link);
+	ql_remove(&tsd_nominal_tsds, tsd, TSD_MANGLE(tsd_link));
 	malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
 }
 
@@ -112,7 +112,7 @@ tsd_force_recompute(tsdn_t *tsdn) {
 	atomic_fence(ATOMIC_RELEASE);
 	malloc_mutex_lock(tsdn, &tsd_nominal_tsds_lock);
 	tsd_t *remote_tsd;
-	ql_foreach(remote_tsd, &tsd_nominal_tsds, TSD_MANGLE(tcache).tsd_link) {
+	ql_foreach(remote_tsd, &tsd_nominal_tsds, TSD_MANGLE(tsd_link)) {
 		assert(tsd_atomic_load(&remote_tsd->state, ATOMIC_RELAXED)
 		    <= tsd_state_nominal_max);
 		tsd_atomic_store(&remote_tsd->state,

From 7099c66205a9a435edcf1d2c6da56d6a11deb7d8 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 7 Apr 2020 17:49:50 -0700
Subject: [PATCH 1697/2608] Arena: fill in terms of cache_bins.

---
 include/jemalloc/internal/arena_externs.h |  5 +++--
 src/arena.c                               | 18 ++++++++----------
 src/tcache.c                              |  6 +++++-
 3 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 6e0fe2b6..40dad716 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -48,8 +48,9 @@ void arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
     bool all);
 void arena_reset(tsd_t *tsd, arena_t *arena);
 void arena_destroy(tsd_t *tsd, arena_t *arena);
-void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
-    cache_bin_t *tbin, szind_t binind);
+void arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
+    cache_bin_t *cache_bin, cache_bin_info_t *cache_bin_info, szind_t binind,
+    const unsigned nfill);
 
 void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
     szind_t ind, bool zero);
diff --git a/src/arena.c b/src/arena.c
index b983b634..894c05f4 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -821,17 +821,15 @@ arena_bin_choose_lock(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 }
 
 void
-arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
-    cache_bin_t *tbin, szind_t binind) {
-	assert(cache_bin_ncached_get(tbin, &tcache_bin_info[binind]) == 0);
-	tcache->bin_refilled[binind] = true;
+arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
+    cache_bin_t *cache_bin, cache_bin_info_t *cache_bin_info, szind_t binind,
+    const unsigned nfill) {
+	assert(cache_bin_ncached_get(cache_bin, cache_bin_info) == 0);
 
 	const bin_info_t *bin_info = &bin_infos[binind];
-	const unsigned nfill = cache_bin_info_ncached_max(
-	    &tcache_bin_info[binind]) >> tcache->lg_fill_div[binind];
 
 	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nfill);
-	cache_bin_init_ptr_array_for_fill(tbin, &tcache_bin_info[binind], &ptrs,
+	cache_bin_init_ptr_array_for_fill(cache_bin, cache_bin_info, &ptrs,
 	    nfill);
 	/*
 	 * Bin-local resources are used first: 1) bin->slabcur, and 2) nonfull
@@ -915,10 +913,10 @@ label_refill:
 
 	if (config_stats && !alloc_and_retry) {
 		bin->stats.nmalloc += filled;
-		bin->stats.nrequests += tbin->tstats.nrequests;
+		bin->stats.nrequests += cache_bin->tstats.nrequests;
 		bin->stats.curregs += filled;
 		bin->stats.nfills++;
-		tbin->tstats.nrequests = 0;
+		cache_bin->tstats.nrequests = 0;
 	}
 	malloc_mutex_unlock(tsdn, &bin->lock);
 
@@ -944,7 +942,7 @@ label_refill:
 		fresh_slab = NULL;
 	}
 
-	cache_bin_finish_fill(tbin, &tcache_bin_info[binind], &ptrs, filled);
+	cache_bin_finish_fill(cache_bin, cache_bin_info, &ptrs, filled);
 	arena_decay_tick(tsdn, arena);
 }
 
diff --git a/src/tcache.c b/src/tcache.c
index d3453542..2063742b 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -103,7 +103,11 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 	void *ret;
 
 	assert(tcache->arena != NULL);
-	arena_tcache_fill_small(tsdn, arena, tcache, tbin, binind);
+	unsigned nfill = cache_bin_info_ncached_max(&tcache_bin_info[binind])
+	    >> tcache->lg_fill_div[binind];
+	arena_cache_bin_fill_small(tsdn, arena, tbin, &tcache_bin_info[binind],
+	    binind, nfill);
+	tcache->bin_refilled[binind] = true;
 	ret = cache_bin_alloc(tbin, tcache_success);
 
 	return ret;

From a13fbad374f31a7e6e912c0260b442d134bb0f2e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 7 Apr 2020 17:48:35 -0700
Subject: [PATCH 1698/2608] Tcache: split up fast and slow path data.

---
 include/jemalloc/internal/arena_structs.h     |   2 +-
 .../internal/jemalloc_internal_inlines_a.h    |  11 +-
 .../internal/jemalloc_internal_inlines_b.h    |  21 +--
 include/jemalloc/internal/tcache_externs.h    |  11 +-
 include/jemalloc/internal/tcache_structs.h    |  44 +++---
 include/jemalloc/internal/tcache_types.h      |   2 +
 include/jemalloc/internal/tsd.h               |   6 +-
 src/arena.c                                   |  13 +-
 src/ctl.c                                     |   3 +-
 src/jemalloc.c                                |   8 +-
 src/tcache.c                                  | 135 ++++++++++--------
 src/thread_event.c                            |   3 +-
 12 files changed, 156 insertions(+), 103 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index 682450e3..e8c3f81e 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -53,7 +53,7 @@ struct arena_s {
 	 *
 	 * Synchronization: tcache_ql_mtx.
 	 */
-	ql_head(tcache_t)			tcache_ql;
+	ql_head(tcache_slow_t)			tcache_ql;
 	ql_head(cache_bin_array_descriptor_t)	cache_bin_array_descriptor_ql;
 	malloc_mutex_t				tcache_ql_mtx;
 
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index cc5e3595..2e4c034d 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -129,7 +129,7 @@ tcache_available(tsd_t *tsd) {
 	 */
 	if (likely(tsd_tcache_enabled_get(tsd))) {
 		/* Associated arena == NULL implies tcache init in progress. */
-		assert(tsd_tcachep_get(tsd)->arena == NULL ||
+		assert(tsd_tcache_slowp_get(tsd)->arena == NULL ||
 		    !cache_bin_still_zero_initialized(
 		    tcache_small_bin_get(tsd_tcachep_get(tsd), 0)));
 		return true;
@@ -147,6 +147,15 @@ tcache_get(tsd_t *tsd) {
 	return tsd_tcachep_get(tsd);
 }
 
+JEMALLOC_ALWAYS_INLINE tcache_slow_t *
+tcache_slow_get(tsd_t *tsd) {
+	if (!tcache_available(tsd)) {
+		return NULL;
+	}
+
+	return tsd_tcache_slowp_get(tsd);
+}
+
 static inline void
 pre_reentrancy(tsd_t *tsd, arena_t *arena) {
 	/* arena is the current context.  Reentry from a0 is not allowed. */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index 3a0bfc64..1de349e6 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -19,8 +19,10 @@ percpu_arena_update(tsd_t *tsd, unsigned cpu) {
 		arena_migrate(tsd, oldind, newind);
 		tcache_t *tcache = tcache_get(tsd);
 		if (tcache != NULL) {
-			tcache_arena_reassociate(tsd_tsdn(tsd), tcache,
-			    newarena);
+			tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd);
+			tcache_t *tcache = tsd_tcachep_get(tsd);
+			tcache_arena_reassociate(tsd_tsdn(tsd), tcache_slow,
+			    tcache, newarena);
 		}
 	}
 }
@@ -45,18 +47,19 @@ arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {
 		ret = arena_choose_hard(tsd, internal);
 		assert(ret);
 		if (tcache_available(tsd)) {
-			tcache_t *tcache = tcache_get(tsd);
-			if (tcache->arena != NULL) {
+			tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd);
+			tcache_t *tcache = tsd_tcachep_get(tsd);
+			if (tcache_slow->arena != NULL) {
 				/* See comments in tsd_tcache_data_init().*/
-				assert(tcache->arena ==
+				assert(tcache_slow->arena ==
 				    arena_get(tsd_tsdn(tsd), 0, false));
-				if (tcache->arena != ret) {
+				if (tcache_slow->arena != ret) {
 					tcache_arena_reassociate(tsd_tsdn(tsd),
-					    tcache, ret);
+					    tcache_slow, tcache, ret);
 				}
 			} else {
-				tcache_arena_associate(tsd_tsdn(tsd), tcache,
-				    ret);
+				tcache_arena_associate(tsd_tsdn(tsd),
+				    tcache_slow, tcache, ret);
 			}
 		}
 	}
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index c5c8f485..21829acd 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -26,15 +26,17 @@ extern cache_bin_info_t *tcache_bin_info;
 extern tcaches_t	*tcaches;
 
 size_t	tcache_salloc(tsdn_t *tsdn, const void *ptr);
-void	tcache_event_hard(tsd_t *tsd, tcache_t *tcache);
+void	tcache_event_hard(tsd_t *tsd, tcache_slow_t *tcache_slow,
+    tcache_t *tcache);
 void	*tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     cache_bin_t *tbin, szind_t binind, bool *tcache_success);
+
 void	tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
     szind_t binind, unsigned rem);
 void	tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
     szind_t binind, unsigned rem);
-void	tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache,
-    arena_t *arena);
+void	tcache_arena_reassociate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
+    tcache_t *tcache, arena_t *arena);
 tcache_t *tcache_create_explicit(tsd_t *tsd);
 void	tcache_cleanup(tsd_t *tsd);
 void	tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
@@ -42,7 +44,8 @@ bool	tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind);
 void	tcaches_flush(tsd_t *tsd, unsigned ind);
 void	tcaches_destroy(tsd_t *tsd, unsigned ind);
 bool	tcache_boot(tsdn_t *tsdn, base_t *base);
-void tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
+void tcache_arena_associate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
+    tcache_t *tcache, arena_t *arena);
 void tcache_prefork(tsdn_t *tsdn);
 void tcache_postfork_parent(tsdn_t *tsdn);
 void tcache_postfork_child(tsdn_t *tsdn);
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index 783b2dfd..5a27db79 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -7,25 +7,19 @@
 #include "jemalloc/internal/ticker.h"
 #include "jemalloc/internal/tsd_types.h"
 
-struct tcache_s {
-	/*
-	 * To minimize our cache-footprint, we put the frequently accessed data
-	 * together at the start of this struct.
-	 */
+/*
+ * The tcache state is split into the slow and hot path data.  Each has a
+ * pointer to the other, and the data always comes in pairs.  The layout of each
+ * of them varies in practice; tcache_slow lives in the TSD for the automatic
+ * tcache, and as part of a dynamic allocation for manual allocations.  Keeping
+ * a pointer to tcache_slow lets us treat these cases uniformly, rather than
+ * splitting up the tcache [de]allocation code into those paths called with the
+ * TSD tcache and those called with a manual tcache.
+ */
 
-	/*
-	 * The pointer stacks associated with bins follow as a contiguous array.
-	 * During tcache initialization, the avail pointer in each element of
-	 * tbins is initialized to point to the proper offset within this array.
-	 */
-	cache_bin_t	bins_small[SC_NBINS];
-
-	/*
-	 * This data is less hot; we can be a little less careful with our
-	 * footprint here.
-	 */
+struct tcache_slow_s {
 	/* Lets us track all the tcaches in an arena. */
-	ql_elm(tcache_t) link;
+	ql_elm(tcache_slow_t) link;
 
 	/*
 	 * The descriptor lets the arena find our cache bins without seeing the
@@ -45,9 +39,23 @@ struct tcache_s {
 	/*
 	 * The start of the allocation containing the dynamic allocation for
 	 * either the cache bins alone, or the cache bin memory as well as this
-	 * tcache_t.
+	 * tcache_slow_t and its associated tcache_t.
 	 */
 	void		*dyn_alloc;
+
+	/* The associated bins. */
+	tcache_t	*tcache;
+};
+
+struct tcache_s {
+	tcache_slow_t	*tcache_slow;
+	/*
+	 * The pointer stacks associated with bins follow as a contiguous array.
+	 * During tcache initialization, the avail pointer in each element of
+	 * tbins is initialized to point to the proper offset within this array.
+	 */
+	cache_bin_t	bins_small[SC_NBINS];
+
 	/*
 	 * We put the cache bins for large size classes at the end of the
 	 * struct, since some of them might not get used.  This might end up
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index c30a5339..cba86f43 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -3,6 +3,7 @@
 
 #include "jemalloc/internal/sc.h"
 
+typedef struct tcache_slow_s tcache_slow_t;
 typedef struct tcache_s tcache_t;
 typedef struct tcaches_s tcaches_t;
 
@@ -52,6 +53,7 @@ typedef struct tcaches_s tcaches_t;
 
 /* Used in TSD static initializer only. Real init in tsd_tcache_data_init(). */
 #define TCACHE_ZERO_INITIALIZER {0}
+#define TCACHE_SLOW_ZERO_INITIALIZER {0}
 
 /* Used in TSD static initializer only. Will be initialized to opt_tcache. */
 #define TCACHE_ENABLED_ZERO_INITIALIZER false
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 66f27173..37f5aa03 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -76,7 +76,8 @@ typedef ql_elm(tsd_t) tsd_link_t;
     O(arenas_tdata,		arena_tdata_t *,	arena_tdata_t *)\
     O(binshards,		tsd_binshards_t,	tsd_binshards_t)\
     O(tsd_link,			tsd_link_t,		tsd_link_t)	\
-    O(in_hook,			bool,			bool)
+    O(in_hook,			bool,			bool)		\
+    O(tcache_slow,		tcache_slow_t,		tcache_slow_t)
 
 #define TSD_DATA_SLOW_INITIALIZER					\
     /* tcache_enabled */	TCACHE_ENABLED_ZERO_INITIALIZER,	\
@@ -100,7 +101,8 @@ typedef ql_elm(tsd_t) tsd_link_t;
     /* arenas_tdata */		NULL,					\
     /* binshards */		TSD_BINSHARDS_ZERO_INITIALIZER,		\
     /* tsd_link */		{NULL},					\
-    /* in_hook */		false,
+    /* in_hook */		false,					\
+    /* tcache_slow */		TCACHE_SLOW_ZERO_INITIALIZER,
 
 /*  O(name,			type,			nullable type) */
 #define TSD_DATA_FAST							\
diff --git a/src/arena.c b/src/arena.c
index 894c05f4..13b75efb 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1690,15 +1690,16 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	if (config_stats) {
 		ql_new(&arena->tcache_ql);
 		ql_new(&arena->cache_bin_array_descriptor_ql);
-		tcache_t *tcache = tcache_get(tsdn_tsd(tsdn));
-		if (tcache != NULL && tcache->arena == arena) {
-			ql_elm_new(tcache, link);
-			ql_tail_insert(&arena->tcache_ql, tcache, link);
+		tcache_slow_t *tcache_slow = tcache_slow_get(tsdn_tsd(tsdn));
+		if (tcache_slow != NULL && tcache_slow->arena == arena) {
+			tcache_t *tcache = tcache_slow->tcache;
+			ql_elm_new(tcache_slow, link);
+			ql_tail_insert(&arena->tcache_ql, tcache_slow, link);
 			cache_bin_array_descriptor_init(
-			    &tcache->cache_bin_array_descriptor,
+			    &tcache_slow->cache_bin_array_descriptor,
 			    tcache->bins_small, tcache->bins_large);
 			ql_tail_insert(&arena->cache_bin_array_descriptor_ql,
-			    &tcache->cache_bin_array_descriptor, link);
+			    &tcache_slow->cache_bin_array_descriptor, link);
 		}
 	}
 
diff --git a/src/ctl.c b/src/ctl.c
index 7555267a..ae17d44d 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1864,7 +1864,8 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 		arena_migrate(tsd, oldind, newind);
 		if (tcache_available(tsd)) {
 			tcache_arena_reassociate(tsd_tsdn(tsd),
-			    tsd_tcachep_get(tsd), newarena);
+			    tsd_tcache_slowp_get(tsd), tsd_tcachep_get(tsd),
+			    newarena);
 		}
 	}
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 63ef578e..c066680b 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -717,11 +717,13 @@ stats_print_atexit(void) {
 		for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 			arena_t *arena = arena_get(tsdn, i, false);
 			if (arena != NULL) {
-				tcache_t *tcache;
+				tcache_slow_t *tcache_slow;
 
 				malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
-				ql_foreach(tcache, &arena->tcache_ql, link) {
-					tcache_stats_merge(tsdn, tcache, arena);
+				ql_foreach(tcache_slow, &arena->tcache_ql,
+				    link) {
+					tcache_stats_merge(tsdn,
+					    tcache_slow->tcache, arena);
 				}
 				malloc_mutex_unlock(tsdn,
 				    &arena->tcache_ql_mtx);
diff --git a/src/tcache.c b/src/tcache.c
index 2063742b..667a76af 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -41,8 +41,8 @@ tcache_salloc(tsdn_t *tsdn, const void *ptr) {
 }
 
 void
-tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
-	szind_t binind = tcache->next_gc_bin;
+tcache_event_hard(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache) {
+	szind_t binind = tcache_slow->next_gc_bin;
 	cache_bin_t *tbin;
 	bool is_small;
 	if (binind < SC_NBINS) {
@@ -62,7 +62,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 		 * Flush (ceiling) 3/4 of the objects below the low water mark.
 		 */
 		if (is_small) {
-			assert(!tcache->bin_refilled[binind]);
+			assert(!tcache_slow->bin_refilled[binind]);
 			tcache_bin_flush_small(tsd, tcache, tbin, binind,
 			    ncached - low_water + (low_water >> 2));
 			/*
@@ -71,43 +71,45 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
 			 */
 			if ((cache_bin_info_ncached_max(
 			    &tcache_bin_info[binind]) >>
-			    (tcache->lg_fill_div[binind] + 1)) >= 1) {
-				tcache->lg_fill_div[binind]++;
+			    (tcache_slow->lg_fill_div[binind] + 1)) >= 1) {
+				tcache_slow->lg_fill_div[binind]++;
 			}
 		} else {
 			tcache_bin_flush_large(tsd, tcache, tbin, binind,
 			     ncached - low_water + (low_water >> 2));
 		}
-	} else if (is_small && tcache->bin_refilled[binind]) {
+	} else if (is_small && tcache_slow->bin_refilled[binind]) {
 		assert(low_water == 0);
 		/*
 		 * Increase fill count by 2X for small bins.  Make sure
 		 * lg_fill_div stays greater than 0.
 		 */
-		if (tcache->lg_fill_div[binind] > 1) {
-			tcache->lg_fill_div[binind]--;
+		if (tcache_slow->lg_fill_div[binind] > 1) {
+			tcache_slow->lg_fill_div[binind]--;
 		}
-		tcache->bin_refilled[binind] = false;
+		tcache_slow->bin_refilled[binind] = false;
 	}
 	cache_bin_low_water_set(tbin);
 
-	tcache->next_gc_bin++;
-	if (tcache->next_gc_bin == nhbins) {
-		tcache->next_gc_bin = 0;
+	tcache_slow->next_gc_bin++;
+	if (tcache_slow->next_gc_bin == nhbins) {
+		tcache_slow->next_gc_bin = 0;
 	}
 }
 
 void *
-tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
-    cache_bin_t *tbin, szind_t binind, bool *tcache_success) {
+tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
+    tcache_t *tcache, cache_bin_t *tbin, szind_t binind,
+    bool *tcache_success) {
+	tcache_slow_t *tcache_slow = tcache->tcache_slow;
 	void *ret;
 
-	assert(tcache->arena != NULL);
+	assert(tcache_slow->arena != NULL);
 	unsigned nfill = cache_bin_info_ncached_max(&tcache_bin_info[binind])
-	    >> tcache->lg_fill_div[binind];
+	    >> tcache_slow->lg_fill_div[binind];
 	arena_cache_bin_fill_small(tsdn, arena, tbin, &tcache_bin_info[binind],
 	    binind, nfill);
-	tcache->bin_refilled[binind] = true;
+	tcache_slow->bin_refilled[binind] = true;
 	ret = cache_bin_alloc(tbin, tcache_success);
 
 	return ret;
@@ -154,6 +156,7 @@ tcache_bin_flush_match(edata_t *edata, unsigned cur_arena_ind,
 JEMALLOC_ALWAYS_INLINE void
 tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
     szind_t binind, unsigned rem, bool small) {
+	tcache_slow_t *tcache_slow = tcache->tcache_slow;
 	/*
 	 * A couple lookup calls take tsdn; declare it once for convenience
 	 * instead of calling tsd_tsdn(tsd) all the time.
@@ -168,7 +171,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	cache_bin_sz_t ncached = cache_bin_ncached_get(tbin,
 	    &tcache_bin_info[binind]);
 	assert((cache_bin_sz_t)rem <= ncached);
-	arena_t *tcache_arena = tcache->arena;
+	arena_t *tcache_arena = tcache_slow->arena;
 	assert(tcache_arena != NULL);
 
 	unsigned nflush = ncached - rem;
@@ -361,57 +364,60 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 }
 
 void
-tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
-	assert(tcache->arena == NULL);
-	tcache->arena = arena;
+tcache_arena_associate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
+    tcache_t *tcache, arena_t *arena) {
+	assert(tcache_slow->arena == NULL);
+	tcache_slow->arena = arena;
 
 	if (config_stats) {
 		/* Link into list of extant tcaches. */
 		malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
 
-		ql_elm_new(tcache, link);
-		ql_tail_insert(&arena->tcache_ql, tcache, link);
+		ql_elm_new(tcache_slow, link);
+		ql_tail_insert(&arena->tcache_ql, tcache_slow, link);
 		cache_bin_array_descriptor_init(
-		    &tcache->cache_bin_array_descriptor, tcache->bins_small,
-		    tcache->bins_large);
+		    &tcache_slow->cache_bin_array_descriptor,
+		    tcache->bins_small, tcache->bins_large);
 		ql_tail_insert(&arena->cache_bin_array_descriptor_ql,
-		    &tcache->cache_bin_array_descriptor, link);
+		    &tcache_slow->cache_bin_array_descriptor, link);
 
 		malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
 	}
 }
 
 static void
-tcache_arena_dissociate(tsdn_t *tsdn, tcache_t *tcache) {
-	arena_t *arena = tcache->arena;
+tcache_arena_dissociate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
+    tcache_t *tcache) {
+	arena_t *arena = tcache_slow->arena;
 	assert(arena != NULL);
 	if (config_stats) {
 		/* Unlink from list of extant tcaches. */
 		malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
 		if (config_debug) {
 			bool in_ql = false;
-			tcache_t *iter;
+			tcache_slow_t *iter;
 			ql_foreach(iter, &arena->tcache_ql, link) {
-				if (iter == tcache) {
+				if (iter == tcache_slow) {
 					in_ql = true;
 					break;
 				}
 			}
 			assert(in_ql);
 		}
-		ql_remove(&arena->tcache_ql, tcache, link);
+		ql_remove(&arena->tcache_ql, tcache_slow, link);
 		ql_remove(&arena->cache_bin_array_descriptor_ql,
-		    &tcache->cache_bin_array_descriptor, link);
-		tcache_stats_merge(tsdn, tcache, arena);
+		    &tcache_slow->cache_bin_array_descriptor, link);
+		tcache_stats_merge(tsdn, tcache_slow->tcache, arena);
 		malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
 	}
-	tcache->arena = NULL;
+	tcache_slow->arena = NULL;
 }
 
 void
-tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
-	tcache_arena_dissociate(tsdn, tcache);
-	tcache_arena_associate(tsdn, tcache, arena);
+tcache_arena_reassociate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
+    tcache_t *tcache, arena_t *arena) {
+	tcache_arena_dissociate(tsdn, tcache_slow, tcache);
+	tcache_arena_associate(tsdn, tcache_slow, tcache, arena);
 }
 
 bool
@@ -429,11 +435,15 @@ tsd_tcache_enabled_data_init(tsd_t *tsd) {
 }
 
 static void
-tcache_init(tsd_t *tsd, tcache_t *tcache, void *mem) {
-	memset(&tcache->link, 0, sizeof(ql_elm(tcache_t)));
-	tcache->next_gc_bin = 0;
-	tcache->arena = NULL;
-	tcache->dyn_alloc = mem;
+tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
+    void *mem) {
+	tcache->tcache_slow = tcache_slow;
+	tcache_slow->tcache = tcache;
+
+	memset(&tcache_slow->link, 0, sizeof(ql_elm(tcache_t)));
+	tcache_slow->next_gc_bin = 0;
+	tcache_slow->arena = NULL;
+	tcache_slow->dyn_alloc = mem;
 
 	assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
 	memset(tcache->bins_small, 0, sizeof(cache_bin_t) * SC_NBINS);
@@ -444,8 +454,8 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *mem) {
 	cache_bin_preincrement(tcache_bin_info, nhbins, mem,
 	    &cur_offset);
 	for (; i < SC_NBINS; i++) {
-		tcache->lg_fill_div[i] = 1;
-		tcache->bin_refilled[i] = false;
+		tcache_slow->lg_fill_div[i] = 1;
+		tcache_slow->bin_refilled[i] = false;
 		cache_bin_t *bin = tcache_small_bin_get(tcache, i);
 		cache_bin_init(bin, &tcache_bin_info[i], mem,
 		    &cur_offset);
@@ -464,7 +474,9 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *mem) {
 /* Initialize auto tcache (embedded in TSD). */
 bool
 tsd_tcache_data_init(tsd_t *tsd) {
+	tcache_slow_t *tcache_slow = tsd_tcache_slowp_get_unsafe(tsd);
 	tcache_t *tcache = tsd_tcachep_get_unsafe(tsd);
+
 	assert(cache_bin_still_zero_initialized(
 	    tcache_small_bin_get(tcache, 0)));
 	size_t alignment = tcache_bin_alloc_alignment;
@@ -476,7 +488,7 @@ tsd_tcache_data_init(tsd_t *tsd) {
 		return true;
 	}
 
-	tcache_init(tsd, tcache, mem);
+	tcache_init(tsd, tcache_slow, tcache, mem);
 	/*
 	 * Initialization is a bit tricky here.  After malloc init is done, all
 	 * threads can rely on arena_choose and associate tcache accordingly.
@@ -485,20 +497,22 @@ tsd_tcache_data_init(tsd_t *tsd) {
 	 * associate its tcache to a0 temporarily, and later on
 	 * arena_choose_hard() will re-associate properly.
 	 */
-	tcache->arena = NULL;
+	tcache_slow->arena = NULL;
 	arena_t *arena;
 	if (!malloc_initialized()) {
 		/* If in initialization, assign to a0. */
 		arena = arena_get(tsd_tsdn(tsd), 0, false);
-		tcache_arena_associate(tsd_tsdn(tsd), tcache, arena);
+		tcache_arena_associate(tsd_tsdn(tsd), tcache_slow, tcache,
+		    arena);
 	} else {
 		arena = arena_choose(tsd, NULL);
 		/* This may happen if thread.tcache.enabled is used. */
-		if (tcache->arena == NULL) {
-			tcache_arena_associate(tsd_tsdn(tsd), tcache, arena);
+		if (tcache_slow->arena == NULL) {
+			tcache_arena_associate(tsd_tsdn(tsd), tcache_slow,
+			    tcache, arena);
 		}
 	}
-	assert(arena == tcache->arena);
+	assert(arena == tcache_slow->arena);
 
 	return false;
 }
@@ -511,7 +525,8 @@ tcache_create_explicit(tsd_t *tsd) {
 	 * the beginning of the whole allocation (for freeing).  The makes sure
 	 * the cache bins have the requested alignment.
 	 */
-	size_t size = tcache_bin_alloc_size + sizeof(tcache_t);
+	size_t size = tcache_bin_alloc_size + sizeof(tcache_t)
+	    + sizeof(tcache_slow_t);
 	/* Naturally align the pointer stacks. */
 	size = PTR_CEILING(size);
 	size = sz_sa2u(size, tcache_bin_alloc_alignment);
@@ -522,16 +537,20 @@ tcache_create_explicit(tsd_t *tsd) {
 		return NULL;
 	}
 	tcache_t *tcache = (void *)((uintptr_t)mem + tcache_bin_alloc_size);
-	tcache_init(tsd, tcache, mem);
+	tcache_slow_t *tcache_slow =
+	    (void *)((uintptr_t)mem + tcache_bin_alloc_size + sizeof(tcache_t));
+	tcache_init(tsd, tcache_slow, tcache, mem);
 
-	tcache_arena_associate(tsd_tsdn(tsd), tcache, arena_ichoose(tsd, NULL));
+	tcache_arena_associate(tsd_tsdn(tsd), tcache_slow, tcache,
+	    arena_ichoose(tsd, NULL));
 
 	return tcache;
 }
 
 static void
 tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
-	assert(tcache->arena != NULL);
+	tcache_slow_t *tcache_slow = tcache->tcache_slow;
+	assert(tcache_slow->arena != NULL);
 
 	for (unsigned i = 0; i < SC_NBINS; i++) {
 		cache_bin_t *tbin = tcache_small_bin_get(tcache, i);
@@ -559,15 +578,17 @@ tcache_flush(tsd_t *tsd) {
 
 static void
 tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
+	tcache_slow_t *tcache_slow = tcache->tcache_slow;
 	tcache_flush_cache(tsd, tcache);
-	arena_t *arena = tcache->arena;
-	tcache_arena_dissociate(tsd_tsdn(tsd), tcache);
+	arena_t *arena = tcache_slow->arena;
+	tcache_arena_dissociate(tsd_tsdn(tsd), tcache_slow, tcache);
 
 	if (tsd_tcache) {
 		cache_bin_t *bin = tcache_small_bin_get(tcache, 0);
 		cache_bin_assert_empty(bin, &tcache_bin_info[0]);
 	}
-	idalloctm(tsd_tsdn(tsd), tcache->dyn_alloc, NULL, NULL, true, true);
+	idalloctm(tsd_tsdn(tsd), tcache_slow->dyn_alloc, NULL, NULL, true,
+	    true);
 
 	/*
 	 * The deallocation and tcache flush above may not trigger decay since
diff --git a/src/thread_event.c b/src/thread_event.c
index 163ca3f1..c96dea6e 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -50,7 +50,8 @@ tcache_gc_event(tsd_t *tsd) {
 	assert(TCACHE_GC_INCR_BYTES > 0);
 	tcache_t *tcache = tcache_get(tsd);
 	if (tcache != NULL) {
-		tcache_event_hard(tsd, tcache);
+		tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd);
+		tcache_event_hard(tsd, tcache_slow, tcache);
 	}
 }
 

From cd29ebefd01be090a636e5560066d866209b141b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 7 Apr 2020 20:04:46 -0700
Subject: [PATCH 1699/2608] Tcache: treat small and large cache bins uniformly

---
 include/jemalloc/internal/cache_bin.h         |   8 +-
 .../internal/jemalloc_internal_inlines_a.h    |  18 +-
 include/jemalloc/internal/tcache_externs.h    |   2 +
 include/jemalloc/internal/tcache_inlines.h    |  15 +-
 include/jemalloc/internal/tcache_structs.h    |  14 +-
 src/arena.c                                   |  17 +-
 src/jemalloc.c                                |   4 +-
 src/tcache.c                                  | 154 ++++++++----------
 8 files changed, 87 insertions(+), 145 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 5a772bf6..a56b4a1d 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -106,16 +106,14 @@ struct cache_bin_array_descriptor_s {
 	 */
 	ql_elm(cache_bin_array_descriptor_t) link;
 	/* Pointers to the tcache bins. */
-	cache_bin_t *bins_small;
-	cache_bin_t *bins_large;
+	cache_bin_t *bins;
 };
 
 static inline void
 cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
-    cache_bin_t *bins_small, cache_bin_t *bins_large) {
+    cache_bin_t *bins) {
 	ql_elm_new(descriptor, link);
-	descriptor->bins_small = bins_small;
-	descriptor->bins_large = bins_large;
+	descriptor->bins = bins;
 }
 
 /* Returns ncached_max: Upper limit on ncached. */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index 2e4c034d..25e5b50e 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -108,18 +108,6 @@ decay_ticker_get(tsd_t *tsd, unsigned ind) {
 	return &tdata->decay_ticker;
 }
 
-JEMALLOC_ALWAYS_INLINE cache_bin_t *
-tcache_small_bin_get(tcache_t *tcache, szind_t binind) {
-	assert(binind < SC_NBINS);
-	return &tcache->bins_small[binind];
-}
-
-JEMALLOC_ALWAYS_INLINE cache_bin_t *
-tcache_large_bin_get(tcache_t *tcache, szind_t binind) {
-	assert(binind >= SC_NBINS &&binind < nhbins);
-	return &tcache->bins_large[binind - SC_NBINS];
-}
-
 JEMALLOC_ALWAYS_INLINE bool
 tcache_available(tsd_t *tsd) {
 	/*
@@ -129,9 +117,9 @@ tcache_available(tsd_t *tsd) {
 	 */
 	if (likely(tsd_tcache_enabled_get(tsd))) {
 		/* Associated arena == NULL implies tcache init in progress. */
-		assert(tsd_tcache_slowp_get(tsd)->arena == NULL ||
-		    !cache_bin_still_zero_initialized(
-		    tcache_small_bin_get(tsd_tcachep_get(tsd), 0)));
+		if (config_debug && tsd_tcache_slowp_get(tsd)->arena != NULL) {
+			tcache_assert_initialized(tsd_tcachep_get(tsd));
+		}
 		return true;
 	}
 
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 21829acd..7ca38d68 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -53,4 +53,6 @@ void tcache_flush(tsd_t *tsd);
 bool tsd_tcache_data_init(tsd_t *tsd);
 bool tsd_tcache_enabled_data_init(tsd_t *tsd);
 
+void tcache_assert_initialized(tcache_t *tcache);
+
 #endif /* JEMALLOC_INTERNAL_TCACHE_EXTERNS_H */
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 3b78ed27..4cbc2d20 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -30,12 +30,11 @@ JEMALLOC_ALWAYS_INLINE void *
 tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
     size_t size, szind_t binind, bool zero, bool slow_path) {
 	void *ret;
-	cache_bin_t *bin;
 	bool tcache_success;
 	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 
 	assert(binind < SC_NBINS);
-	bin = tcache_small_bin_get(tcache, binind);
+	cache_bin_t *bin = &tcache->bins[binind];
 	ret = cache_bin_alloc(bin, &tcache_success);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
@@ -74,11 +73,10 @@ JEMALLOC_ALWAYS_INLINE void *
 tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
     szind_t binind, bool zero, bool slow_path) {
 	void *ret;
-	cache_bin_t *bin;
 	bool tcache_success;
 
-	assert(binind >= SC_NBINS &&binind < nhbins);
-	bin = tcache_large_bin_get(tcache, binind);
+	assert(binind >= SC_NBINS && binind < nhbins);
+	cache_bin_t *bin = &tcache->bins[binind];
 	ret = cache_bin_alloc(bin, &tcache_success);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
@@ -120,12 +118,10 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 JEMALLOC_ALWAYS_INLINE void
 tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
     bool slow_path) {
-	cache_bin_t *bin;
-
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr)
 	    <= SC_SMALL_MAXCLASS);
 
-	bin = tcache_small_bin_get(tcache, binind);
+	cache_bin_t *bin = &tcache->bins[binind];
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
 		unsigned remain = cache_bin_info_ncached_max(
 		    &tcache_bin_info[binind]) >> 1;
@@ -138,13 +134,12 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 JEMALLOC_ALWAYS_INLINE void
 tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
     bool slow_path) {
-	cache_bin_t *bin;
 
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr)
 	    > SC_SMALL_MAXCLASS);
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_maxclass);
 
-	bin = tcache_large_bin_get(tcache, binind);
+	cache_bin_t *bin = &tcache->bins[binind];
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
 		unsigned remain = cache_bin_info_ncached_max(
 		    &tcache_bin_info[binind]) >> 1;
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index 5a27db79..1c9d4db0 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -49,19 +49,7 @@ struct tcache_slow_s {
 
 struct tcache_s {
 	tcache_slow_t	*tcache_slow;
-	/*
-	 * The pointer stacks associated with bins follow as a contiguous array.
-	 * During tcache initialization, the avail pointer in each element of
-	 * tbins is initialized to point to the proper offset within this array.
-	 */
-	cache_bin_t	bins_small[SC_NBINS];
-
-	/*
-	 * We put the cache bins for large size classes at the end of the
-	 * struct, since some of them might not get used.  This might end up
-	 * letting us avoid touching an extra page if we don't have to.
-	 */
-	cache_bin_t	bins_large[SC_NSIZES-SC_NBINS];
+	cache_bin_t	bins[SC_NSIZES];
 };
 
 /* Linkage for list of available (previously used) explicit tcache IDs. */
diff --git a/src/arena.c b/src/arena.c
index 13b75efb..4ed3c88a 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -148,18 +148,11 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
 	cache_bin_array_descriptor_t *descriptor;
 	ql_foreach(descriptor, &arena->cache_bin_array_descriptor_ql, link) {
-		for (szind_t i = 0; i < SC_NBINS; i++) {
-			cache_bin_t *tbin = &descriptor->bins_small[i];
+		for (szind_t i = 0; i < nhbins; i++) {
+			cache_bin_t *cache_bin = &descriptor->bins[i];
 			astats->tcache_bytes +=
-			    cache_bin_ncached_get(tbin,
-				&tcache_bin_info[i]) * sz_index2size(i);
-		}
-		for (szind_t i = 0; i < nhbins - SC_NBINS; i++) {
-			cache_bin_t *tbin = &descriptor->bins_large[i];
-			astats->tcache_bytes +=
-			    cache_bin_ncached_get(tbin,
-			    &tcache_bin_info[i + SC_NBINS])
-			    * sz_index2size(i + SC_NBINS);
+			    cache_bin_ncached_get(cache_bin,
+			    &tcache_bin_info[i]) * sz_index2size(i);
 		}
 	}
 	malloc_mutex_prof_read(tsdn,
@@ -1697,7 +1690,7 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 			ql_tail_insert(&arena->tcache_ql, tcache_slow, link);
 			cache_bin_array_descriptor_init(
 			    &tcache_slow->cache_bin_array_descriptor,
-			    tcache->bins_small, tcache->bins_large);
+			    tcache->bins);
 			ql_tail_insert(&arena->cache_bin_array_descriptor_ql,
 			    &tcache_slow->cache_bin_array_descriptor, link);
 		}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index c066680b..fab285d1 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2495,7 +2495,7 @@ je_malloc(size_t size) {
 	assert(tsd_fast(tsd));
 
 	tcache_t *tcache = tsd_tcachep_get(tsd);
-	cache_bin_t *bin = tcache_small_bin_get(tcache, ind);
+	cache_bin_t *bin = &tcache->bins[ind];
 	bool tcache_success;
 	void *ret;
 
@@ -2828,7 +2828,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 	}
 
 	tcache_t *tcache = tsd_tcachep_get(tsd);
-	cache_bin_t *bin = tcache_small_bin_get(tcache, alloc_ctx.szind);
+	cache_bin_t *bin = &tcache->bins[alloc_ctx.szind];
 
 	/*
 	 * If junking were enabled, this is where we would do it.  It's not
diff --git a/src/tcache.c b/src/tcache.c
index 667a76af..63e1a4d5 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -43,19 +43,12 @@ tcache_salloc(tsdn_t *tsdn, const void *ptr) {
 void
 tcache_event_hard(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache) {
 	szind_t binind = tcache_slow->next_gc_bin;
-	cache_bin_t *tbin;
-	bool is_small;
-	if (binind < SC_NBINS) {
-		tbin = tcache_small_bin_get(tcache, binind);
-		is_small = true;
-	} else {
-		tbin = tcache_large_bin_get(tcache, binind);
-		is_small = false;
-	}
+	bool is_small = (binind < SC_NBINS);
+	cache_bin_t *cache_bin = &tcache->bins[binind];
 
-	cache_bin_sz_t low_water = cache_bin_low_water_get(tbin,
+	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
 	    &tcache_bin_info[binind]);
-	cache_bin_sz_t ncached = cache_bin_ncached_get(tbin,
+	cache_bin_sz_t ncached = cache_bin_ncached_get(cache_bin,
 	    &tcache_bin_info[binind]);
 	if (low_water > 0) {
 		/*
@@ -63,7 +56,7 @@ tcache_event_hard(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache) {
 		 */
 		if (is_small) {
 			assert(!tcache_slow->bin_refilled[binind]);
-			tcache_bin_flush_small(tsd, tcache, tbin, binind,
+			tcache_bin_flush_small(tsd, tcache, cache_bin, binind,
 			    ncached - low_water + (low_water >> 2));
 			/*
 			 * Reduce fill count by 2X.  Limit lg_fill_div such that
@@ -75,7 +68,7 @@ tcache_event_hard(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache) {
 				tcache_slow->lg_fill_div[binind]++;
 			}
 		} else {
-			tcache_bin_flush_large(tsd, tcache, tbin, binind,
+			tcache_bin_flush_large(tsd, tcache, cache_bin, binind,
 			     ncached - low_water + (low_water >> 2));
 		}
 	} else if (is_small && tcache_slow->bin_refilled[binind]) {
@@ -89,7 +82,7 @@ tcache_event_hard(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache) {
 		}
 		tcache_slow->bin_refilled[binind] = false;
 	}
-	cache_bin_low_water_set(tbin);
+	cache_bin_low_water_set(cache_bin);
 
 	tcache_slow->next_gc_bin++;
 	if (tcache_slow->next_gc_bin == nhbins) {
@@ -99,7 +92,7 @@ tcache_event_hard(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache) {
 
 void *
 tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
-    tcache_t *tcache, cache_bin_t *tbin, szind_t binind,
+    tcache_t *tcache, cache_bin_t *cache_bin, szind_t binind,
     bool *tcache_success) {
 	tcache_slow_t *tcache_slow = tcache->tcache_slow;
 	void *ret;
@@ -107,10 +100,10 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
 	assert(tcache_slow->arena != NULL);
 	unsigned nfill = cache_bin_info_ncached_max(&tcache_bin_info[binind])
 	    >> tcache_slow->lg_fill_div[binind];
-	arena_cache_bin_fill_small(tsdn, arena, tbin, &tcache_bin_info[binind],
-	    binind, nfill);
+	arena_cache_bin_fill_small(tsdn, arena, cache_bin,
+	    &tcache_bin_info[binind], binind, nfill);
 	tcache_slow->bin_refilled[binind] = true;
-	ret = cache_bin_alloc(tbin, tcache_success);
+	ret = cache_bin_alloc(cache_bin, tcache_success);
 
 	return ret;
 }
@@ -154,7 +147,7 @@ tcache_bin_flush_match(edata_t *edata, unsigned cur_arena_ind,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
+tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
     szind_t binind, unsigned rem, bool small) {
 	tcache_slow_t *tcache_slow = tcache->tcache_slow;
 	/*
@@ -168,7 +161,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	} else {
 		assert(binind < nhbins);
 	}
-	cache_bin_sz_t ncached = cache_bin_ncached_get(tbin,
+	cache_bin_sz_t ncached = cache_bin_ncached_get(cache_bin,
 	    &tcache_bin_info[binind]);
 	assert((cache_bin_sz_t)rem <= ncached);
 	arena_t *tcache_arena = tcache_slow->arena;
@@ -182,7 +175,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 	VARIABLE_ARRAY(edata_t *, item_edata, nflush + 1);
 	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nflush);
 
-	cache_bin_init_ptr_array_for_flush(tbin, &tcache_bin_info[binind],
+	cache_bin_init_ptr_array_for_flush(cache_bin, &tcache_bin_info[binind],
 	    &ptrs, nflush);
 
 	/* Look up edata once per item. */
@@ -249,13 +242,13 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 			if (small) {
 				cur_bin->stats.nflushes++;
 				cur_bin->stats.nrequests +=
-				    tbin->tstats.nrequests;
-				tbin->tstats.nrequests = 0;
+				    cache_bin->tstats.nrequests;
+				cache_bin->tstats.nrequests = 0;
 			} else {
 				arena_stats_large_flush_nrequests_add(tsdn,
 				    &tcache_arena->stats, binind,
-				    tbin->tstats.nrequests);
-				tbin->tstats.nrequests = 0;
+				    cache_bin->tstats.nrequests);
+				cache_bin->tstats.nrequests = 0;
 			}
 		}
 
@@ -336,31 +329,31 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
 			bin_t *bin = arena_bin_choose_lock(tsdn, tcache_arena,
 			    binind, &binshard);
 			bin->stats.nflushes++;
-			bin->stats.nrequests += tbin->tstats.nrequests;
-			tbin->tstats.nrequests = 0;
+			bin->stats.nrequests += cache_bin->tstats.nrequests;
+			cache_bin->tstats.nrequests = 0;
 			malloc_mutex_unlock(tsdn, &bin->lock);
 		} else {
 			arena_stats_large_flush_nrequests_add(tsdn,
 			    &tcache_arena->stats, binind,
-			    tbin->tstats.nrequests);
-			tbin->tstats.nrequests = 0;
+			    cache_bin->tstats.nrequests);
+			cache_bin->tstats.nrequests = 0;
 		}
 	}
 
-	cache_bin_finish_flush(tbin, &tcache_bin_info[binind], &ptrs,
+	cache_bin_finish_flush(cache_bin, &tcache_bin_info[binind], &ptrs,
 	    ncached - rem);
 }
 
 void
-tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
+tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
     szind_t binind, unsigned rem) {
-	tcache_bin_flush_impl(tsd, tcache, tbin, binind, rem, true);
+	tcache_bin_flush_impl(tsd, tcache, cache_bin, binind, rem, true);
 }
 
 void
-tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
+tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
     szind_t binind, unsigned rem) {
-	tcache_bin_flush_impl(tsd, tcache, tbin, binind, rem, false);
+	tcache_bin_flush_impl(tsd, tcache, cache_bin, binind, rem, false);
 }
 
 void
@@ -376,8 +369,7 @@ tcache_arena_associate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
 		ql_elm_new(tcache_slow, link);
 		ql_tail_insert(&arena->tcache_ql, tcache_slow, link);
 		cache_bin_array_descriptor_init(
-		    &tcache_slow->cache_bin_array_descriptor,
-		    tcache->bins_small, tcache->bins_large);
+		    &tcache_slow->cache_bin_array_descriptor, tcache->bins);
 		ql_tail_insert(&arena->cache_bin_array_descriptor_ql,
 		    &tcache_slow->cache_bin_array_descriptor, link);
 
@@ -446,23 +438,18 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	tcache_slow->dyn_alloc = mem;
 
 	assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
-	memset(tcache->bins_small, 0, sizeof(cache_bin_t) * SC_NBINS);
-	memset(tcache->bins_large, 0, sizeof(cache_bin_t) * (nhbins - SC_NBINS));
+	memset(tcache->bins, 0, sizeof(cache_bin_t) * nhbins);
 
-	unsigned i = 0;
 	size_t cur_offset = 0;
 	cache_bin_preincrement(tcache_bin_info, nhbins, mem,
 	    &cur_offset);
-	for (; i < SC_NBINS; i++) {
-		tcache_slow->lg_fill_div[i] = 1;
-		tcache_slow->bin_refilled[i] = false;
-		cache_bin_t *bin = tcache_small_bin_get(tcache, i);
-		cache_bin_init(bin, &tcache_bin_info[i], mem,
-		    &cur_offset);
-	}
-	for (; i < nhbins; i++) {
-		cache_bin_t *bin = tcache_large_bin_get(tcache, i);
-		cache_bin_init(bin, &tcache_bin_info[i], mem,
+	for (unsigned i = 0; i < nhbins; i++) {
+		if (i < SC_NBINS) {
+			tcache_slow->lg_fill_div[i] = 1;
+			tcache_slow->bin_refilled[i] = false;
+		}
+		cache_bin_t *cache_bin = &tcache->bins[i];
+		cache_bin_init(cache_bin, &tcache_bin_info[i], mem,
 		    &cur_offset);
 	}
 	cache_bin_postincrement(tcache_bin_info, nhbins, mem,
@@ -477,8 +464,7 @@ tsd_tcache_data_init(tsd_t *tsd) {
 	tcache_slow_t *tcache_slow = tsd_tcache_slowp_get_unsafe(tsd);
 	tcache_t *tcache = tsd_tcachep_get_unsafe(tsd);
 
-	assert(cache_bin_still_zero_initialized(
-	    tcache_small_bin_get(tcache, 0)));
+	assert(cache_bin_still_zero_initialized(&tcache->bins[0]));
 	size_t alignment = tcache_bin_alloc_alignment;
 	size_t size = sz_sa2u(tcache_bin_alloc_size, alignment);
 
@@ -552,20 +538,15 @@ tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
 	tcache_slow_t *tcache_slow = tcache->tcache_slow;
 	assert(tcache_slow->arena != NULL);
 
-	for (unsigned i = 0; i < SC_NBINS; i++) {
-		cache_bin_t *tbin = tcache_small_bin_get(tcache, i);
-		tcache_bin_flush_small(tsd, tcache, tbin, i, 0);
-
-		if (config_stats) {
-			assert(tbin->tstats.nrequests == 0);
+	for (unsigned i = 0; i < nhbins; i++) {
+		cache_bin_t *cache_bin = &tcache->bins[i];
+		if (i < SC_NBINS) {
+			tcache_bin_flush_small(tsd, tcache, cache_bin, i, 0);
+		} else {
+			tcache_bin_flush_large(tsd, tcache, cache_bin, i, 0);
 		}
-	}
-	for (unsigned i = SC_NBINS; i < nhbins; i++) {
-		cache_bin_t *tbin = tcache_large_bin_get(tcache, i);
-		tcache_bin_flush_large(tsd, tcache, tbin, i, 0);
-
 		if (config_stats) {
-			assert(tbin->tstats.nrequests == 0);
+			assert(cache_bin->tstats.nrequests == 0);
 		}
 	}
 }
@@ -584,8 +565,8 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
 	tcache_arena_dissociate(tsd_tsdn(tsd), tcache_slow, tcache);
 
 	if (tsd_tcache) {
-		cache_bin_t *bin = tcache_small_bin_get(tcache, 0);
-		cache_bin_assert_empty(bin, &tcache_bin_info[0]);
+		cache_bin_t *cache_bin = &tcache->bins[0];
+		cache_bin_assert_empty(cache_bin, &tcache_bin_info[0]);
 	}
 	idalloctm(tsd_tsdn(tsd), tcache_slow->dyn_alloc, NULL, NULL, true,
 	    true);
@@ -614,13 +595,11 @@ tcache_cleanup(tsd_t *tsd) {
 	tcache_t *tcache = tsd_tcachep_get(tsd);
 	if (!tcache_available(tsd)) {
 		assert(tsd_tcache_enabled_get(tsd) == false);
-		assert(cache_bin_still_zero_initialized(
-		    tcache_small_bin_get(tcache, 0)));
+		assert(cache_bin_still_zero_initialized(&tcache->bins[0]));
 		return;
 	}
 	assert(tsd_tcache_enabled_get(tsd));
-	assert(!cache_bin_still_zero_initialized(
-	    tcache_small_bin_get(tcache, 0)));
+	assert(!cache_bin_still_zero_initialized(&tcache->bins[0]));
 
 	tcache_destroy(tsd, tcache, true);
 	if (config_debug) {
@@ -628,33 +607,28 @@ tcache_cleanup(tsd_t *tsd) {
 		 * For debug testing only, we want to pretend we're still in the
 		 * zero-initialized state.
 		 */
-		memset(tcache->bins_small, 0, sizeof(cache_bin_t) * SC_NBINS);
-		memset(tcache->bins_large, 0,
-		    sizeof(cache_bin_t) * (nhbins - SC_NBINS));
+		memset(tcache->bins, 0, sizeof(cache_bin_t) * nhbins);
 	}
 }
 
 void
 tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
-	unsigned i;
-
 	cassert(config_stats);
 
 	/* Merge and reset tcache stats. */
-	for (i = 0; i < SC_NBINS; i++) {
-		cache_bin_t *tbin = tcache_small_bin_get(tcache, i);
-		unsigned binshard;
-		bin_t *bin = arena_bin_choose_lock(tsdn, arena, i, &binshard);
-		bin->stats.nrequests += tbin->tstats.nrequests;
-		malloc_mutex_unlock(tsdn, &bin->lock);
-		tbin->tstats.nrequests = 0;
-	}
-
-	for (; i < nhbins; i++) {
-		cache_bin_t *tbin = tcache_large_bin_get(tcache, i);
-		arena_stats_large_flush_nrequests_add(tsdn, &arena->stats, i,
-		    tbin->tstats.nrequests);
-		tbin->tstats.nrequests = 0;
+	for (unsigned i = 0; i < nhbins; i++) {
+		cache_bin_t *cache_bin = &tcache->bins[i];
+		if (i < SC_NBINS) {
+			unsigned binshard;
+			bin_t *bin = arena_bin_choose_lock(tsdn, arena, i,
+			    &binshard);
+			bin->stats.nrequests += cache_bin->tstats.nrequests;
+			malloc_mutex_unlock(tsdn, &bin->lock);
+		} else {
+			arena_stats_large_flush_nrequests_add(tsdn,
+			    &arena->stats, i, cache_bin->tstats.nrequests);
+		}
+		cache_bin->tstats.nrequests = 0;
 	}
 }
 
@@ -824,3 +798,7 @@ void
 tcache_postfork_child(tsdn_t *tsdn) {
 	malloc_mutex_postfork_child(tsdn, &tcaches_mtx);
 }
+
+void tcache_assert_initialized(tcache_t *tcache) {
+	assert(!cache_bin_still_zero_initialized(&tcache->bins[0]));
+}

From 4f8efba8248aaafa2200e3538bae126729e0407d Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 10 Apr 2020 15:02:38 -0700
Subject: [PATCH 1700/2608] TSD: Make rtree_ctx a slow-path field.

Performance-sensitive users will use sized deallocation facilities, so that
actually touching the rtree_ctx is unnecessary.  We make it the last element of
the slow data, so that it is for practical purposes almost-fast.
---
 include/jemalloc/internal/tsd.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 37f5aa03..0f9ec12b 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -77,7 +77,8 @@ typedef ql_elm(tsd_t) tsd_link_t;
     O(binshards,		tsd_binshards_t,	tsd_binshards_t)\
     O(tsd_link,			tsd_link_t,		tsd_link_t)	\
     O(in_hook,			bool,			bool)		\
-    O(tcache_slow,		tcache_slow_t,		tcache_slow_t)
+    O(tcache_slow,		tcache_slow_t,		tcache_slow_t)	\
+    O(rtree_ctx,		rtree_ctx_t,		rtree_ctx_t)
 
 #define TSD_DATA_SLOW_INITIALIZER					\
     /* tcache_enabled */	TCACHE_ENABLED_ZERO_INITIALIZER,	\
@@ -102,7 +103,8 @@ typedef ql_elm(tsd_t) tsd_link_t;
     /* binshards */		TSD_BINSHARDS_ZERO_INITIALIZER,		\
     /* tsd_link */		{NULL},					\
     /* in_hook */		false,					\
-    /* tcache_slow */		TCACHE_SLOW_ZERO_INITIALIZER,
+    /* tcache_slow */		TCACHE_SLOW_ZERO_INITIALIZER,		\
+    /* rtree_ctx */		RTREE_CTX_ZERO_INITIALIZER,
 
 /*  O(name,			type,			nullable type) */
 #define TSD_DATA_FAST							\
@@ -110,7 +112,6 @@ typedef ql_elm(tsd_t) tsd_link_t;
     O(thread_allocated_next_event_fast,	uint64_t,	uint64_t)	\
     O(thread_deallocated,	uint64_t,		uint64_t)	\
     O(thread_deallocated_next_event_fast, uint64_t,	uint64_t)	\
-    O(rtree_ctx,		rtree_ctx_t,		rtree_ctx_t)	\
     O(tcache,			tcache_t,		tcache_t)
 
 #define TSD_DATA_FAST_INITIALIZER					\
@@ -118,7 +119,6 @@ typedef ql_elm(tsd_t) tsd_link_t;
     /* thread_allocated_next_event_fast */ 0, 				\
     /* thread_deallocated */	0,					\
     /* thread_deallocated_next_event_fast */	0,			\
-    /* rtree_ctx */		RTREE_CTX_ZERO_INITIALIZER,		\
     /* tcache */		TCACHE_ZERO_INITIALIZER,
 
 /*  O(name,			type,			nullable type) */

From fb6cfffd39ca50add3356c2e61242e13fff2ce1f Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 13 Apr 2020 11:39:49 -0700
Subject: [PATCH 1701/2608] Configure: Get rid of LG_QUANTA.

This is no longer used.
---
 configure.ac | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/configure.ac b/configure.ac
index 1c2509a6..f67fc3d7 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1552,9 +1552,7 @@ fi
 
 AC_ARG_WITH([lg_quantum],
   [AS_HELP_STRING([--with-lg-quantum=<lg-quantum>],
-   [Base 2 log of minimum allocation alignment])],
-  [LG_QUANTA="$with_lg_quantum"],
-  [LG_QUANTA="3 4"])
+   [Base 2 log of minimum allocation alignment])])
 if test "x$with_lg_quantum" != "x" ; then
   AC_DEFINE_UNQUOTED([LG_QUANTUM], [$with_lg_quantum])
 fi

From 79dd0c04ed88fcebe9f65905d65d6e7ae32c4940 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 14 Apr 2020 18:32:54 -0700
Subject: [PATCH 1702/2608] SC: Simplify SC_NPSIZES computation.

Rather than taking all the sizes and subtracting out those that don't fit, we
instead just add up all the ones that do.
---
 include/jemalloc/internal/sc.h | 48 ++++++++++++++++++----------------
 1 file changed, 26 insertions(+), 22 deletions(-)

diff --git a/include/jemalloc/internal/sc.h b/include/jemalloc/internal/sc.h
index a6341a3d..6bc5db37 100644
--- a/include/jemalloc/internal/sc.h
+++ b/include/jemalloc/internal/sc.h
@@ -197,30 +197,34 @@
     (SC_LG_BASE_MAX - SC_LG_FIRST_REGULAR_BASE + 1) - 1)
 #define SC_NSIZES (SC_NTINY + SC_NPSEUDO + SC_NREGULAR)
 
-/* The number of size classes that are a multiple of the page size. */
-#define SC_NPSIZES (							\
-    /* Start with all the size classes. */				\
-    SC_NSIZES								\
-    /* Subtract out those groups with too small a base. */		\
-    - (LG_PAGE - 1 - SC_LG_FIRST_REGULAR_BASE) * SC_NGROUP		\
-    /* And the pseudo-group. */						\
-    - SC_NPSEUDO							\
-    /* And the tiny group. */						\
-    - SC_NTINY								\
-    /* Sizes where ndelta*delta is not a multiple of the page size. */	\
-    - (SC_LG_NGROUP * SC_NGROUP))
 /*
- * Note that the last line is computed as the sum of the second column in the
- * following table:
- *                      lg(base) | count of sizes to exclude
- * ------------------------------|-----------------------------
- *                   LG_PAGE - 1 | SC_NGROUP - 1
- *                       LG_PAGE | SC_NGROUP - 1
- *                   LG_PAGE + 1 | SC_NGROUP - 2
- *                   LG_PAGE + 2 | SC_NGROUP - 4
- *                           ... | ...
- *  LG_PAGE + (SC_LG_NGROUP - 1) | SC_NGROUP - (SC_NGROUP / 2)
+ * The number of size classes that are a multiple of the page size.
+ *
+ * Here are the first few bases that have a page-sized SC.
+ *
+ *      lg(base) |     base | highest SC | page-multiple SCs
+ * --------------|------------------------------------------
+ *   LG_PAGE - 1 | PAGE / 2 |       PAGE | 1
+ *       LG_PAGE |     PAGE |   2 * PAGE | 1
+ *   LG_PAGE + 1 | 2 * PAGE |   4 * PAGE | 2
+ *   LG_PAGE + 2 | 4 * PAGE |   8 * PAGE | 4
+ *
+ * The number of page-multiple SCs continues to grow in powers of two, up until
+ * lg_delta == lg_page, which corresponds to setting lg_base to lg_page +
+ * SC_LG_NGROUP.  So, then, the number of size classes that are multiples of the
+ * page size whose lg_delta is less than the page size are
+ * is 1 + (2**0 + 2**1 + ... + 2**(lg_ngroup - 1) == 2**lg_ngroup.
+ *
+ * For each base with lg_base in [lg_page + lg_ngroup, lg_base_max), there are
+ * NGROUP page-sized size classes, and when lg_base == lg_base_max, there are
+ * NGROUP - 1.
+ *
+ * This gives us the quantity we seek.
  */
+#define SC_NPSIZES (							\
+    SC_NGROUP								\
+    + (SC_LG_BASE_MAX - (LG_PAGE + SC_LG_NGROUP)) * SC_NGROUP		\
+    + SC_NGROUP - 1)
 
 /*
  * We declare a size class is binnable if size < page size * group. Or, in other

From 46471ea32760a90ac3b860f96805901c78a34f62 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 15 Apr 2020 14:08:20 -0700
Subject: [PATCH 1703/2608] SC: Name the max lookup constant.

---
 include/jemalloc/internal/sc.h | 5 +++--
 src/sc.c                       | 4 +---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/sc.h b/include/jemalloc/internal/sc.h
index 6bc5db37..138da5c0 100644
--- a/include/jemalloc/internal/sc.h
+++ b/include/jemalloc/internal/sc.h
@@ -246,8 +246,9 @@
 #  error "Too many small size classes"
 #endif
 
-/* The largest size class in the lookup table. */
-#define SC_LOOKUP_MAXCLASS ((size_t)1 << 12)
+/* The largest size class in the lookup table, and its binary log. */
+#define SC_LG_MAX_LOOKUP 12
+#define SC_LOOKUP_MAXCLASS ((size_t)1 << SC_LG_MAX_LOOKUP)
 
 /* Internal, only used for the definition of SC_SMALL_MAXCLASS. */
 #define SC_SMALL_MAX_BASE ((size_t)1 << (LG_PAGE + SC_LG_NGROUP - 1))
diff --git a/src/sc.c b/src/sc.c
index cfce533f..1474eacc 100644
--- a/src/sc.c
+++ b/src/sc.c
@@ -259,10 +259,8 @@ void
 sc_data_init(sc_data_t *sc_data) {
 	assert(!sc_data->initialized);
 
-	int lg_max_lookup = 12;
-
 	size_classes(sc_data, LG_SIZEOF_PTR, LG_QUANTUM, SC_LG_TINY_MIN,
-	    lg_max_lookup, LG_PAGE, SC_LG_NGROUP);
+	    SC_LG_MAX_LOOKUP, LG_PAGE, SC_LG_NGROUP);
 
 	sc_data->initialized = true;
 }

From 2c09d43494d1c2f0df41ef16b040acb86ad4b095 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 28 Apr 2020 12:18:36 -0700
Subject: [PATCH 1704/2608] Add a benchmark of large allocations.

---
 Makefile.in                    |  1 +
 test/include/test/bench.h      | 39 ++++++++++++++++++++++++++++++++
 test/stress/large_microbench.c | 33 +++++++++++++++++++++++++++
 test/stress/microbench.c       | 41 +---------------------------------
 4 files changed, 74 insertions(+), 40 deletions(-)
 create mode 100644 test/include/test/bench.h
 create mode 100644 test/stress/large_microbench.c

diff --git a/Makefile.in b/Makefile.in
index 6cded807..d35b74b3 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -289,6 +289,7 @@ CPP_SRCS :=
 TESTS_INTEGRATION_CPP :=
 endif
 TESTS_STRESS := $(srcroot)test/stress/microbench.c \
+	$(srcroot)test/stress/large_microbench.c \
 	$(srcroot)test/stress/hookbench.c
 
 
diff --git a/test/include/test/bench.h b/test/include/test/bench.h
new file mode 100644
index 00000000..6cd19fdd
--- /dev/null
+++ b/test/include/test/bench.h
@@ -0,0 +1,39 @@
+static inline void
+time_func(timedelta_t *timer, uint64_t nwarmup, uint64_t niter,
+    void (*func)(void)) {
+	uint64_t i;
+
+	for (i = 0; i < nwarmup; i++) {
+		func();
+	}
+	timer_start(timer);
+	for (i = 0; i < niter; i++) {
+		func();
+	}
+	timer_stop(timer);
+}
+
+static inline void
+compare_funcs(uint64_t nwarmup, uint64_t niter, const char *name_a,
+    void (*func_a), const char *name_b, void (*func_b)) {
+	timedelta_t timer_a, timer_b;
+	char ratio_buf[6];
+	void *p;
+
+	p = mallocx(1, 0);
+	if (p == NULL) {
+		test_fail("Unexpected mallocx() failure");
+		return;
+	}
+
+	time_func(&timer_a, nwarmup, niter, func_a);
+	time_func(&timer_b, nwarmup, niter, func_b);
+
+	timer_ratio(&timer_a, &timer_b, ratio_buf, sizeof(ratio_buf));
+	malloc_printf("%"FMTu64" iterations, %s=%"FMTu64"us, "
+	    "%s=%"FMTu64"us, ratio=1:%s\n",
+	    niter, name_a, timer_usec(&timer_a), name_b, timer_usec(&timer_b),
+	    ratio_buf);
+
+	dallocx(p, 0);
+}
diff --git a/test/stress/large_microbench.c b/test/stress/large_microbench.c
new file mode 100644
index 00000000..c66b33a1
--- /dev/null
+++ b/test/stress/large_microbench.c
@@ -0,0 +1,33 @@
+#include "test/jemalloc_test.h"
+#include "test/bench.h"
+
+static void
+large_mallocx_free(void) {
+	/*
+	 * We go a bit larger than the large minclass on its own to better
+	 * expose costs from things like zeroing.
+	 */
+	void *p = mallocx(SC_LARGE_MINCLASS, MALLOCX_TCACHE_NONE);
+	assert_ptr_not_null(p, "mallocx shouldn't fail");
+	free(p);
+}
+
+static void
+small_mallocx_free(void) {
+	void *p = mallocx(16, 0);
+	assert_ptr_not_null(p, "mallocx shouldn't fail");
+	free(p);
+}
+
+TEST_BEGIN(test_large_vs_small) {
+	compare_funcs(100*1000, 1*1000*1000, "large mallocx",
+	    large_mallocx_free, "small mallocx", small_mallocx_free);
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_large_vs_small);
+}
+
diff --git a/test/stress/microbench.c b/test/stress/microbench.c
index 988b7938..226677f7 100644
--- a/test/stress/microbench.c
+++ b/test/stress/microbench.c
@@ -1,44 +1,5 @@
 #include "test/jemalloc_test.h"
-
-static inline void
-time_func(timedelta_t *timer, uint64_t nwarmup, uint64_t niter,
-    void (*func)(void)) {
-	uint64_t i;
-
-	for (i = 0; i < nwarmup; i++) {
-		func();
-	}
-	timer_start(timer);
-	for (i = 0; i < niter; i++) {
-		func();
-	}
-	timer_stop(timer);
-}
-
-void
-compare_funcs(uint64_t nwarmup, uint64_t niter, const char *name_a,
-    void (*func_a), const char *name_b, void (*func_b)) {
-	timedelta_t timer_a, timer_b;
-	char ratio_buf[6];
-	void *p;
-
-	p = mallocx(1, 0);
-	if (p == NULL) {
-		test_fail("Unexpected mallocx() failure");
-		return;
-	}
-
-	time_func(&timer_a, nwarmup, niter, func_a);
-	time_func(&timer_b, nwarmup, niter, func_b);
-
-	timer_ratio(&timer_a, &timer_b, ratio_buf, sizeof(ratio_buf));
-	malloc_printf("%"FMTu64" iterations, %s=%"FMTu64"us, "
-	    "%s=%"FMTu64"us, ratio=1:%s\n",
-	    niter, name_a, timer_usec(&timer_a), name_b, timer_usec(&timer_b),
-	    ratio_buf);
-
-	dallocx(p, 0);
-}
+#include "test/bench.h"
 
 static void
 malloc_free(void) {

From f1f8a75496cfff34d14bf067c4af92c63d9a521e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 29 Apr 2020 09:05:57 -0700
Subject: [PATCH 1705/2608] Let opt.zero propagate to core allocation.

I.e. set dopts->zero early on if opt.zero is true, rather than leaving it set by
the entry-point function (malloc, calloc, etc.) and then memsetting.  This
avoids situations where we zero once in the large-alloc pathway and then again
via memset.
---
 src/jemalloc.c | 47 +++++++++++++++++++++++------------------------
 src/large.c    |  3 ---
 2 files changed, 23 insertions(+), 27 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index fab285d1..14b2a088 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2165,7 +2165,9 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 	}
 
 	/* This is the beginning of the "core" algorithm. */
-
+	if (config_fill && sopts->slow && opt_zero) {
+		dopts->zero = true;
+	}
 	if (dopts->alignment == 0) {
 		ind = sz_size2index(size);
 		if (unlikely(ind >= SC_NSIZES)) {
@@ -2263,12 +2265,9 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 
 	assert(usize == isalloc(tsd_tsdn(tsd), allocation));
 
-	if (config_fill && sopts->slow && !dopts->zero) {
-		if (unlikely(opt_junk_alloc)) {
-			junk_alloc_callback(allocation, usize);
-		} else if (unlikely(opt_zero)) {
-			memset(allocation, 0, usize);
-		}
+	if (config_fill && sopts->slow && !dopts->zero
+	    && unlikely(opt_junk_alloc)) {
+		junk_alloc_callback(allocation, usize);
 	}
 
 	if (sopts->slow) {
@@ -3210,7 +3209,6 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 	size_t usize;
 	size_t old_usize;
 	size_t alignment = MALLOCX_ALIGN_GET(flags);
-	bool zero = flags & MALLOCX_ZERO;
 	arena_t *arena;
 	tcache_t *tcache;
 
@@ -3220,6 +3218,11 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 	tsd = tsd_fetch();
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
+	bool zero = flags & MALLOCX_ZERO;
+	if (config_fill && unlikely(opt_zero)) {
+		zero = true;
+	}
+
 	if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) {
 		unsigned arena_ind = MALLOCX_ARENA_GET(flags);
 		arena = arena_get(tsd_tsdn(tsd), arena_ind, true);
@@ -3275,14 +3278,11 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 	UTRACE(ptr, size, p);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	if (config_fill && malloc_slow && !zero && usize > old_usize) {
+	if (config_fill && unlikely(opt_junk_alloc) && usize > old_usize
+	    && !zero) {
 		size_t excess_len = usize - old_usize;
 		void *excess_start = (void *)((uintptr_t)p + old_usize);
-		if (unlikely(opt_junk_alloc)) {
-			junk_alloc_callback(excess_start, excess_len);
-		} else if (unlikely(opt_zero)) {
-			memset(excess_start, 0, excess_len);
-		}
+		junk_alloc_callback(excess_start, excess_len);
 	}
 
 	return p;
@@ -3497,7 +3497,11 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	tsd_t *tsd;
 	size_t usize, old_usize;
 	size_t alignment = MALLOCX_ALIGN_GET(flags);
+
 	bool zero = flags & MALLOCX_ZERO;
+	if (config_fill && unlikely(opt_zero)) {
+		zero = true;
+	}
 
 	LOG("core.xallocx.entry", "ptr: %p, size: %zu, extra: %zu, "
 	    "flags: %d", ptr, size, extra, flags);
@@ -3561,16 +3565,11 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	thread_alloc_event(tsd, usize);
 	thread_dalloc_event(tsd, old_usize);
 
-	if (config_fill && malloc_slow) {
-		if (usize > old_usize && !zero) {
-			size_t excess_len = usize - old_usize;
-			void *excess_start = (void *)((uintptr_t)ptr + old_usize);
-			if (unlikely(opt_junk_alloc)) {
-				junk_alloc_callback(excess_start, excess_len);
-			} else if (unlikely(opt_zero)) {
-				memset(excess_start, 0, excess_len);
-			}
-		}
+	if (config_fill && unlikely(opt_junk_alloc) && usize > old_usize &&
+	    !zero) {
+		size_t excess_len = usize - old_usize;
+		void *excess_start = (void *)((uintptr_t)ptr + old_usize);
+		junk_alloc_callback(excess_start, excess_len);
 	}
 label_not_resized:
 	if (unlikely(!tsd_fast(tsd))) {
diff --git a/src/large.c b/src/large.c
index d97009a4..b8439371 100644
--- a/src/large.c
+++ b/src/large.c
@@ -32,9 +32,6 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 		return NULL;
 	}
 
-	if (config_fill && unlikely(opt_zero)) {
-		zero = true;
-	}
 	if (likely(!tsdn_null(tsdn))) {
 		arena = arena_choose_maybe_huge(tsdn_tsd(tsdn), arena, usize);
 	}

From 0295aa38a2206f3229f60a4105767e15ebdca797 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 21 Apr 2020 13:29:07 -0700
Subject: [PATCH 1706/2608] Deduplicate entries in witness error message

---
 src/witness.c | 43 +++++++++++++++++++++++++++++++++----------
 1 file changed, 33 insertions(+), 10 deletions(-)

diff --git a/src/witness.c b/src/witness.c
index f42b72ad..e9ddf596 100644
--- a/src/witness.c
+++ b/src/witness.c
@@ -14,15 +14,42 @@ witness_init(witness_t *witness, const char *name, witness_rank_t rank,
 	witness->opaque = opaque;
 }
 
+static void
+witness_print_witness(witness_t *w, unsigned n) {
+	assert(n > 0);
+	if (n == 1) {
+		malloc_printf(" %s(%u)", w->name, w->rank);
+	} else {
+		malloc_printf(" %s(%u)X%u", w->name, w->rank, n);
+	}
+}
+
+static void
+witness_print_witnesses(const witness_list_t *witnesses) {
+	witness_t *w, *last = NULL;
+	unsigned n = 0;
+	ql_foreach(w, witnesses, link) {
+		if (last != NULL && w->rank > last->rank) {
+			assert(w->name != last->name);
+			witness_print_witness(last, n);
+			n = 0;
+		} else if (last != NULL) {
+			assert(w->rank == last->rank);
+			assert(w->name == last->name);
+		}
+		last = w;
+		++n;
+	}
+	if (last != NULL) {
+		witness_print_witness(last, n);
+	}
+}
+
 static void
 witness_lock_error_impl(const witness_list_t *witnesses,
     const witness_t *witness) {
-	witness_t *w;
-
 	malloc_printf("<jemalloc>: Lock rank order reversal:");
-	ql_foreach(w, witnesses, link) {
-		malloc_printf(" %s(%u)", w->name, w->rank);
-	}
+	witness_print_witnesses(witnesses);
 	malloc_printf(" %s(%u)\n", witness->name, witness->rank);
 	abort();
 }
@@ -49,13 +76,9 @@ witness_not_owner_error_t *JET_MUTABLE witness_not_owner_error =
 static void
 witness_depth_error_impl(const witness_list_t *witnesses,
     witness_rank_t rank_inclusive, unsigned depth) {
-	witness_t *w;
-
 	malloc_printf("<jemalloc>: Should own %u lock%s of rank >= %u:", depth,
 	    (depth != 1) ?  "s" : "", rank_inclusive);
-	ql_foreach(w, witnesses, link) {
-		malloc_printf(" %s(%u)", w->name, w->rank);
-	}
+	witness_print_witnesses(witnesses);
 	malloc_printf("\n");
 	abort();
 }

From 039bfd4e307df51bd46f164b2af0ffa62142ca5d Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 15 Apr 2020 11:08:25 -0700
Subject: [PATCH 1707/2608] Do not rollback prof idump counter in
 arena_prof_promote()

---
 include/jemalloc/internal/counter.h        | 25 -------------
 include/jemalloc/internal/prof_externs.h   |  1 -
 include/jemalloc/internal/prof_inlines_a.h | 11 ------
 src/arena.c                                |  2 -
 src/prof.c                                 | 12 +-----
 test/unit/counter.c                        | 43 ----------------------
 6 files changed, 1 insertion(+), 93 deletions(-)

diff --git a/include/jemalloc/internal/counter.h b/include/jemalloc/internal/counter.h
index 4aee23dd..896fd02a 100644
--- a/include/jemalloc/internal/counter.h
+++ b/include/jemalloc/internal/counter.h
@@ -51,31 +51,6 @@ counter_accum(tsdn_t *tsdn, counter_accum_t *counter, uint64_t accumbytes) {
 	return overflow;
 }
 
-JEMALLOC_ALWAYS_INLINE void
-counter_rollback(tsdn_t *tsdn, counter_accum_t *counter, uint64_t bytes) {
-	/*
-	 * Cancel out as much of the excessive accumbytes increase as possible
-	 * without underflowing.  Interval-triggered events occur slightly more
-	 * often than intended as a result of incomplete canceling.
-	 */
-	uint64_t a0, a1;
-#ifdef JEMALLOC_ATOMIC_U64
-	a0 = atomic_load_u64(&counter->accumbytes,
-	    ATOMIC_RELAXED);
-	do {
-		a1 = (a0 >= bytes) ? a0 - bytes : 0;
-	} while (!atomic_compare_exchange_weak_u64(
-	    &counter->accumbytes, &a0, a1, ATOMIC_RELAXED,
-	    ATOMIC_RELAXED));
-#else
-	malloc_mutex_lock(tsdn, &counter->mtx);
-	a0 = counter->accumbytes;
-	a1 = (a0 >= bytes) ?  a0 - bytes : 0;
-	counter->accumbytes = a1;
-	malloc_mutex_unlock(tsdn, &counter->mtx);
-#endif
-}
-
 bool counter_accum_init(counter_accum_t *counter, uint64_t interval);
 
 #endif /* JEMALLOC_INTERNAL_COUNTER_H */
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 35181671..f03ef74b 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -50,7 +50,6 @@ extern bool prof_booted;
 
 /* Functions only accessed in prof_inlines_a.h */
 bool prof_idump_accum_impl(tsdn_t *tsdn, uint64_t accumbytes);
-void prof_idump_rollback_impl(tsdn_t *tsdn, size_t usize);
 
 /* Functions only accessed in prof_inlines_b.h */
 prof_tdata_t *prof_tdata_init(tsd_t *tsd);
diff --git a/include/jemalloc/internal/prof_inlines_a.h b/include/jemalloc/internal/prof_inlines_a.h
index 61773a2b..63d429e3 100644
--- a/include/jemalloc/internal/prof_inlines_a.h
+++ b/include/jemalloc/internal/prof_inlines_a.h
@@ -36,15 +36,4 @@ prof_idump_accum(tsdn_t *tsdn, uint64_t accumbytes) {
 	return prof_idump_accum_impl(tsdn, accumbytes);
 }
 
-JEMALLOC_ALWAYS_INLINE void
-prof_idump_rollback(tsdn_t *tsdn, size_t usize) {
-	cassert(config_prof);
-
-	if (prof_interval == 0 || !prof_active_get_unlocked()) {
-		return;
-	}
-
-	prof_idump_rollback_impl(tsdn, usize);
-}
-
 #endif /* JEMALLOC_INTERNAL_PROF_INLINES_A_H */
diff --git a/src/arena.c b/src/arena.c
index 4ed3c88a..12c6b0a8 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1061,8 +1061,6 @@ arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize) {
 	edata_szind_set(edata, szind);
 	emap_remap(tsdn, &arena_emap_global, edata, szind, /* slab */ false);
 
-	prof_idump_rollback(tsdn, usize);
-
 	assert(isalloc(tsdn, ptr) == usize);
 }
 
diff --git a/src/prof.c b/src/prof.c
index bbf8e9d1..9c1fc2a6 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -50,7 +50,7 @@ bool opt_prof_accum = false;
 char opt_prof_prefix[PROF_DUMP_FILENAME_LEN];
 bool opt_prof_experimental_use_sys_thread_name = false;
 
-/* Accessed via prof_idump_[accum/rollback](). */
+/* Accessed via prof_idump_accum(). */
 static counter_accum_t prof_idump_accumulated;
 
 /*
@@ -655,16 +655,6 @@ prof_idump_accum_impl(tsdn_t *tsdn, uint64_t accumbytes) {
 	return counter_accum(tsdn, &prof_idump_accumulated, accumbytes);
 }
 
-void
-prof_idump_rollback_impl(tsdn_t *tsdn, size_t usize) {
-	cassert(config_prof);
-
-	/* Rollback is only done on arena_prof_promote of small sizes. */
-	assert(SC_LARGE_MINCLASS > usize);
-	return counter_rollback(tsdn, &prof_idump_accumulated,
-	    SC_LARGE_MINCLASS - usize);
-}
-
 bool
 prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix) {
 	cassert(config_prof);
diff --git a/test/unit/counter.c b/test/unit/counter.c
index 585cbc63..c14eee31 100644
--- a/test/unit/counter.c
+++ b/test/unit/counter.c
@@ -36,48 +36,6 @@ expect_counter_value(counter_accum_t *c, uint64_t v) {
 	expect_u64_eq(accum, v, "Counter value mismatch");
 }
 
-TEST_BEGIN(test_counter_rollback) {
-	uint64_t half_interval = interval / 2;
-
-	counter_accum_t c;
-	counter_accum_init(&c, interval);
-
-	tsd_t *tsd = tsd_fetch();
-	counter_rollback(tsd_tsdn(tsd), &c, half_interval);
-
-	bool trigger;
-	trigger = counter_accum(tsd_tsdn(tsd), &c, half_interval);
-	expect_b_eq(trigger, false, "Should not trigger");
-	counter_rollback(tsd_tsdn(tsd), &c, half_interval + 1);
-	expect_counter_value(&c,  0);
-
-	trigger = counter_accum(tsd_tsdn(tsd), &c, half_interval);
-	expect_b_eq(trigger, false, "Should not trigger");
-	counter_rollback(tsd_tsdn(tsd), &c, half_interval - 1);
-	expect_counter_value(&c,  1);
-
-	counter_rollback(tsd_tsdn(tsd), &c, 1);
-	expect_counter_value(&c,  0);
-
-	trigger = counter_accum(tsd_tsdn(tsd), &c, half_interval);
-	expect_b_eq(trigger, false, "Should not trigger");
-	counter_rollback(tsd_tsdn(tsd), &c, 1);
-	expect_counter_value(&c,  half_interval - 1);
-
-	trigger = counter_accum(tsd_tsdn(tsd), &c, half_interval);
-	expect_b_eq(trigger, false, "Should not trigger");
-	expect_counter_value(&c,  interval - 1);
-
-	trigger = counter_accum(tsd_tsdn(tsd), &c, 1);
-	expect_b_eq(trigger, true, "Should have triggered");
-	expect_counter_value(&c, 0);
-
-	trigger = counter_accum(tsd_tsdn(tsd), &c, interval + 1);
-	expect_b_eq(trigger, true, "Should have triggered");
-	expect_counter_value(&c, 1);
-}
-TEST_END
-
 #define N_THDS (16)
 #define N_ITER_THD (1 << 12)
 #define ITER_INCREMENT (interval >> 4)
@@ -123,6 +81,5 @@ int
 main(void) {
 	return test(
 	    test_counter_accum,
-	    test_counter_rollback,
 	    test_counter_mt);
 }

From e10e5059e87b8d9c6ec9910d803bd1a1ba55da85 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 15 Apr 2020 12:13:22 -0700
Subject: [PATCH 1708/2608] Make prof_idump_accum() non-inline

---
 include/jemalloc/internal/prof_externs.h   |  4 +---
 include/jemalloc/internal/prof_inlines_a.h | 11 -----------
 src/prof.c                                 |  6 +++++-
 3 files changed, 6 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index f03ef74b..8c657c63 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -48,9 +48,6 @@ extern size_t lg_prof_sample;
 
 extern bool prof_booted;
 
-/* Functions only accessed in prof_inlines_a.h */
-bool prof_idump_accum_impl(tsdn_t *tsdn, uint64_t accumbytes);
-
 /* Functions only accessed in prof_inlines_b.h */
 prof_tdata_t *prof_tdata_init(tsd_t *tsd);
 prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
@@ -78,6 +75,7 @@ void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
 int prof_getpid(void);
 void prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind);
 bool prof_accum_init(void);
+bool prof_idump_accum(tsdn_t *tsdn, uint64_t accumbytes);
 void prof_idump(tsdn_t *tsdn);
 bool prof_mdump(tsd_t *tsd, const char *filename);
 void prof_gdump(tsdn_t *tsdn);
diff --git a/include/jemalloc/internal/prof_inlines_a.h b/include/jemalloc/internal/prof_inlines_a.h
index 63d429e3..4450b1d3 100644
--- a/include/jemalloc/internal/prof_inlines_a.h
+++ b/include/jemalloc/internal/prof_inlines_a.h
@@ -25,15 +25,4 @@ prof_active_get_unlocked(void) {
 	return prof_active;
 }
 
-JEMALLOC_ALWAYS_INLINE bool
-prof_idump_accum(tsdn_t *tsdn, uint64_t accumbytes) {
-	cassert(config_prof);
-
-	if (prof_interval == 0 || !prof_active_get_unlocked()) {
-		return false;
-	}
-
-	return prof_idump_accum_impl(tsdn, accumbytes);
-}
-
 #endif /* JEMALLOC_INTERNAL_PROF_INLINES_A_H */
diff --git a/src/prof.c b/src/prof.c
index 9c1fc2a6..ff09a5d7 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -649,9 +649,13 @@ prof_accum_init(void) {
 }
 
 bool
-prof_idump_accum_impl(tsdn_t *tsdn, uint64_t accumbytes) {
+prof_idump_accum(tsdn_t *tsdn, uint64_t accumbytes) {
 	cassert(config_prof);
 
+	if (prof_interval == 0 || !prof_active_get_unlocked()) {
+		return false;
+	}
+
 	return counter_accum(tsdn, &prof_idump_accumulated, accumbytes);
 }
 

From 8be558449446a5190bdf661da428ecd6b9fb2a8f Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 15 Apr 2020 12:21:38 -0700
Subject: [PATCH 1709/2608] Initialize prof idump counter once rather than once
 per arena

---
 include/jemalloc/internal/prof_externs.h | 1 -
 src/arena.c                              | 6 ------
 src/prof.c                               | 8 ++++++--
 3 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 8c657c63..0fbd3eae 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -74,7 +74,6 @@ void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
 #endif
 int prof_getpid(void);
 void prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind);
-bool prof_accum_init(void);
 bool prof_idump_accum(tsdn_t *tsdn, uint64_t accumbytes);
 void prof_idump(tsdn_t *tsdn);
 bool prof_mdump(tsd_t *tsd, const char *filename);
diff --git a/src/arena.c b/src/arena.c
index 12c6b0a8..b61d373b 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1463,12 +1463,6 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		}
 	}
 
-	if (config_prof) {
-		if (prof_accum_init()) {
-			goto label_error;
-		}
-	}
-
 	atomic_store_u(&arena->dss_prec, (unsigned)extent_dss_prec_get(),
 	    ATOMIC_RELAXED);
 
diff --git a/src/prof.c b/src/prof.c
index ff09a5d7..cb71850f 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -641,8 +641,8 @@ prof_fdump(void) {
 	prof_dump(tsd, false, filename, opt_prof_leak);
 }
 
-bool
-prof_accum_init(void) {
+static bool
+prof_idump_accum_init(void) {
 	cassert(config_prof);
 
 	return counter_accum_init(&prof_idump_accumulated, prof_interval);
@@ -1021,6 +1021,10 @@ prof_boot2(tsd_t *tsd, base_t *base) {
 			return true;
 		}
 
+		if (prof_idump_accum_init()) {
+			return true;
+		}
+
 		if (malloc_mutex_init(&prof_dump_filename_mtx, "prof_dump_filename",
 		    WITNESS_RANK_PROF_DUMP_FILENAME, malloc_mutex_rank_exclusive)) {
 			return true;

From d454af90f102c99eddb38909fc7822769c4213aa Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 15 Apr 2020 12:39:05 -0700
Subject: [PATCH 1710/2608] Remove unused prof_accum field from arena

---
 include/jemalloc/internal/arena_structs.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index e8c3f81e..0c3f42f3 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -57,9 +57,6 @@ struct arena_s {
 	ql_head(cache_bin_array_descriptor_t)	cache_bin_array_descriptor_ql;
 	malloc_mutex_t				tcache_ql_mtx;
 
-	/* Synchronization: internal. */
-	counter_accum_t		prof_accum;
-
 	/*
 	 * Represents a dss_prec_t, but atomically.
 	 *

From e6cb6919c0c1c94e387ccec79190647a44eb7180 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 28 Apr 2020 09:59:37 -0700
Subject: [PATCH 1711/2608] Consolidate prof inline function headers

The prof inline functions are no longer involved in a circular
dependency, so consolidate the two headers into one.
---
 .../internal/jemalloc_internal_includes.h     |  3 +-
 include/jemalloc/internal/prof_externs.h      |  2 +-
 .../{prof_inlines_b.h => prof_inlines.h}      | 22 +++++++++++++++
 include/jemalloc/internal/prof_inlines_a.h    | 28 -------------------
 4 files changed, 24 insertions(+), 31 deletions(-)
 rename include/jemalloc/internal/{prof_inlines_b.h => prof_inlines.h} (90%)
 delete mode 100644 include/jemalloc/internal/prof_inlines_a.h

diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index 72b5a72a..90a12a12 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -73,13 +73,12 @@
  * Include portions of arena code interleaved with tcache code in order to
  * resolve circular dependencies.
  */
-#include "jemalloc/internal/prof_inlines_a.h"
 #include "jemalloc/internal/arena_inlines_a.h"
 #include "jemalloc/internal/jemalloc_internal_inlines_b.h"
 #include "jemalloc/internal/tcache_inlines.h"
 #include "jemalloc/internal/arena_inlines_b.h"
 #include "jemalloc/internal/jemalloc_internal_inlines_c.h"
-#include "jemalloc/internal/prof_inlines_b.h"
+#include "jemalloc/internal/prof_inlines.h"
 #include "jemalloc/internal/background_thread_inlines.h"
 
 #endif /* JEMALLOC_INTERNAL_INCLUDES_H */
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 0fbd3eae..cf61fea2 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -48,7 +48,7 @@ extern size_t lg_prof_sample;
 
 extern bool prof_booted;
 
-/* Functions only accessed in prof_inlines_b.h */
+/* Functions only accessed in prof_inlines.h */
 prof_tdata_t *prof_tdata_init(tsd_t *tsd);
 prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
 
diff --git a/include/jemalloc/internal/prof_inlines_b.h b/include/jemalloc/internal/prof_inlines.h
similarity index 90%
rename from include/jemalloc/internal/prof_inlines_b.h
rename to include/jemalloc/internal/prof_inlines.h
index 29d4020e..d8f401d1 100644
--- a/include/jemalloc/internal/prof_inlines_b.h
+++ b/include/jemalloc/internal/prof_inlines.h
@@ -5,6 +5,28 @@
 #include "jemalloc/internal/sz.h"
 #include "jemalloc/internal/thread_event.h"
 
+JEMALLOC_ALWAYS_INLINE void
+prof_active_assert() {
+	cassert(config_prof);
+	/*
+	 * If opt_prof is off, then prof_active must always be off, regardless
+	 * of whether prof_active_mtx is in effect or not.
+	 */
+	assert(opt_prof || !prof_active);
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+prof_active_get_unlocked(void) {
+	prof_active_assert();
+	/*
+	 * Even if opt_prof is true, sampling can be temporarily disabled by
+	 * setting prof_active to false.  No locking is used when reading
+	 * prof_active in the fast path, so there are no guarantees regarding
+	 * how long it will take for all threads to notice state changes.
+	 */
+	return prof_active;
+}
+
 JEMALLOC_ALWAYS_INLINE bool
 prof_gdump_get_unlocked(void) {
 	/*
diff --git a/include/jemalloc/internal/prof_inlines_a.h b/include/jemalloc/internal/prof_inlines_a.h
deleted file mode 100644
index 4450b1d3..00000000
--- a/include/jemalloc/internal/prof_inlines_a.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_PROF_INLINES_A_H
-#define JEMALLOC_INTERNAL_PROF_INLINES_A_H
-
-#include "jemalloc/internal/mutex.h"
-
-JEMALLOC_ALWAYS_INLINE void
-prof_active_assert() {
-	cassert(config_prof);
-	/*
-	 * If opt_prof is off, then prof_active must always be off, regardless
-	 * of whether prof_active_mtx is in effect or not.
-	 */
-	assert(opt_prof || !prof_active);
-}
-
-JEMALLOC_ALWAYS_INLINE bool
-prof_active_get_unlocked(void) {
-	prof_active_assert();
-	/*
-	 * Even if opt_prof is true, sampling can be temporarily disabled by
-	 * setting prof_active to false.  No locking is used when reading
-	 * prof_active in the fast path, so there are no guarantees regarding
-	 * how long it will take for all threads to notice state changes.
-	 */
-	return prof_active;
-}
-
-#endif /* JEMALLOC_INTERNAL_PROF_INLINES_A_H */

From fef9abdcc07227e9e9cb479c4799707c4efa86ad Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 28 Apr 2020 10:40:46 -0700
Subject: [PATCH 1712/2608] Cleanup tcache allocation logic

The logic in tcache allocation no longer involves profiling or
filling.
---
 include/jemalloc/internal/tcache_inlines.h | 22 ++++------------------
 1 file changed, 4 insertions(+), 18 deletions(-)

diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 4cbc2d20..5d49c4e3 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -31,7 +31,6 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
     size_t size, szind_t binind, bool zero, bool slow_path) {
 	void *ret;
 	bool tcache_success;
-	size_t usize JEMALLOC_CC_SILENCE_INIT(0);
 
 	assert(binind < SC_NBINS);
 	cache_bin_t *bin = &tcache->bins[binind];
@@ -52,15 +51,9 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
 	}
 
 	assert(ret);
-	/*
-	 * Only compute usize if required.  The checks in the following if
-	 * statement are all static.
-	 */
-	if (config_prof || (slow_path && config_fill) || unlikely(zero)) {
-		usize = sz_index2size(binind);
-		assert(tcache_salloc(tsd_tsdn(tsd), ret) == usize);
-	}
 	if (unlikely(zero)) {
+		size_t usize = sz_index2size(binind);
+		assert(tcache_salloc(tsd_tsdn(tsd), ret) == usize);
 		memset(ret, 0, usize);
 	}
 	if (config_stats) {
@@ -94,16 +87,9 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 			return NULL;
 		}
 	} else {
-		size_t usize JEMALLOC_CC_SILENCE_INIT(0);
-
-		/* Only compute usize on demand */
-		if (config_prof || (slow_path && config_fill) ||
-		    unlikely(zero)) {
-			usize = sz_index2size(binind);
-			assert(usize <= tcache_maxclass);
-		}
-
 		if (unlikely(zero)) {
+			size_t usize = sz_index2size(binind);
+			assert(usize <= tcache_maxclass);
 			memset(ret, 0, usize);
 		}
 

From 2097e1945b262f079d82bf6ef78330bf03ebdf08 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 17 Apr 2020 14:49:20 -0700
Subject: [PATCH 1713/2608] Unify write callback signature

---
 include/jemalloc/internal/buf_writer.h              |  2 --
 include/jemalloc/internal/emitter.h                 |  4 ++--
 include/jemalloc/internal/jemalloc_internal_types.h |  3 +++
 include/jemalloc/internal/malloc_io.h               | 12 +++++++-----
 include/jemalloc/internal/prof_externs.h            |  3 +--
 include/jemalloc/internal/stats.h                   |  3 +--
 src/ctl.c                                           |  2 +-
 src/malloc_io.c                                     |  7 +++----
 src/prof_recent.c                                   |  3 +--
 src/stats.c                                         |  3 +--
 10 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/include/jemalloc/internal/buf_writer.h b/include/jemalloc/internal/buf_writer.h
index 55b18ab2..37aa6de5 100644
--- a/include/jemalloc/internal/buf_writer.h
+++ b/include/jemalloc/internal/buf_writer.h
@@ -10,8 +10,6 @@
  * some "option like" content for the write_cb, so it doesn't matter.
  */
 
-typedef void (write_cb_t)(void *, const char *);
-
 typedef struct {
 	write_cb_t *write_cb;
 	void *cbopaque;
diff --git a/include/jemalloc/internal/emitter.h b/include/jemalloc/internal/emitter.h
index c3f47b29..9482f68b 100644
--- a/include/jemalloc/internal/emitter.h
+++ b/include/jemalloc/internal/emitter.h
@@ -68,7 +68,7 @@ typedef struct emitter_s emitter_t;
 struct emitter_s {
 	emitter_output_t output;
 	/* The output information. */
-	void (*write_cb)(void *, const char *);
+	write_cb_t *write_cb;
 	void *cbopaque;
 	int nesting_depth;
 	/* True if we've already emitted a value at the given depth. */
@@ -240,7 +240,7 @@ emitter_json_key_prefix(emitter_t *emitter) {
 
 static inline void
 emitter_init(emitter_t *emitter, emitter_output_t emitter_output,
-    void (*write_cb)(void *, const char *), void *cbopaque) {
+    write_cb_t *write_cb, void *cbopaque) {
 	emitter->output = emitter_output;
 	emitter->write_cb = write_cb;
 	emitter->cbopaque = cbopaque;
diff --git a/include/jemalloc/internal/jemalloc_internal_types.h b/include/jemalloc/internal/jemalloc_internal_types.h
index d8da4dee..1ce0f3aa 100644
--- a/include/jemalloc/internal/jemalloc_internal_types.h
+++ b/include/jemalloc/internal/jemalloc_internal_types.h
@@ -17,6 +17,9 @@ enum zero_realloc_action_e {
 };
 typedef enum zero_realloc_action_e zero_realloc_action_t;
 
+/* Signature of write callback. */
+typedef void (write_cb_t)(void *, const char *);
+
 /*
  * Flags bits:
  *
diff --git a/include/jemalloc/internal/malloc_io.h b/include/jemalloc/internal/malloc_io.h
index fac63612..a375bdae 100644
--- a/include/jemalloc/internal/malloc_io.h
+++ b/include/jemalloc/internal/malloc_io.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_MALLOC_IO_H
 #define JEMALLOC_INTERNAL_MALLOC_IO_H
 
+#include "jemalloc/internal/jemalloc_internal_types.h"
+
 #ifdef _WIN32
 #  ifdef _WIN64
 #    define FMT64_PREFIX "ll"
@@ -40,7 +42,7 @@
  */
 #define MALLOC_PRINTF_BUFSIZE	4096
 
-void wrtmessage(void *cbopaque, const char *s);
+write_cb_t wrtmessage;
 int buferror(int err, char *buf, size_t buflen);
 uintmax_t malloc_strtoumax(const char *restrict nptr, char **restrict endptr,
     int base);
@@ -58,10 +60,10 @@ size_t malloc_snprintf(char *str, size_t size, const char *format, ...)
  * The caller can set write_cb to null to choose to print with the
  * je_malloc_message hook.
  */
-void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *format, va_list ap);
-void malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *format, ...) JEMALLOC_FORMAT_PRINTF(3, 4);
+void malloc_vcprintf(write_cb_t *write_cb, void *cbopaque, const char *format,
+    va_list ap);
+void malloc_cprintf(write_cb_t *write_cb, void *cbopaque, const char *format,
+    ...) JEMALLOC_FORMAT_PRINTF(3, 4);
 void malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
 
 static inline ssize_t
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index cf61fea2..a6b659c1 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -106,7 +106,6 @@ bool prof_log_stop(tsdn_t *tsdn);
 
 ssize_t prof_recent_alloc_max_ctl_read();
 ssize_t prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max);
-void prof_recent_alloc_dump(tsd_t *tsd, void (*write_cb)(void *, const char *),
-    void *cbopaque);
+void prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque);
 
 #endif /* JEMALLOC_INTERNAL_PROF_EXTERNS_H */
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index d1f5eab7..3720619b 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -41,8 +41,7 @@ uint64_t stats_interval_accum_batch_size(void);
 bool stats_interval_accum(tsd_t *tsd, uint64_t bytes);
 
 /* Implements je_malloc_stats_print. */
-void stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *opts);
+void stats_print(write_cb_t *write_cb, void *cbopaque, const char *opts);
 
 bool stats_boot(void);
 
diff --git a/src/ctl.c b/src/ctl.c
index ae17d44d..c3c029ff 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3522,7 +3522,7 @@ label_return:
 
 typedef struct write_cb_packet_s write_cb_packet_t;
 struct write_cb_packet_s {
-	void (*write_cb)(void *, const char *);
+	write_cb_t *write_cb;
 	void *cbopaque;
 };
 
diff --git a/src/malloc_io.c b/src/malloc_io.c
index 4b7d2e4a..d2879bb4 100644
--- a/src/malloc_io.c
+++ b/src/malloc_io.c
@@ -619,8 +619,8 @@ malloc_snprintf(char *str, size_t size, const char *format, ...) {
 }
 
 void
-malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *format, va_list ap) {
+malloc_vcprintf(write_cb_t *write_cb, void *cbopaque, const char *format,
+    va_list ap) {
 	char buf[MALLOC_PRINTF_BUFSIZE];
 
 	if (write_cb == NULL) {
@@ -643,8 +643,7 @@ malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque,
  */
 JEMALLOC_FORMAT_PRINTF(3, 4)
 void
-malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *format, ...) {
+malloc_cprintf(write_cb_t *write_cb, void *cbopaque, const char *format, ...) {
 	va_list ap;
 
 	va_start(ap, format);
diff --git a/src/prof_recent.c b/src/prof_recent.c
index 7fd77e93..cd72bdab 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -444,8 +444,7 @@ dump_bt(emitter_t *emitter, prof_tctx_t *tctx) {
 
 #define PROF_RECENT_PRINT_BUFSIZE 4096
 void
-prof_recent_alloc_dump(tsd_t *tsd, void (*write_cb)(void *, const char *),
-    void *cbopaque) {
+prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque) {
 	buf_writer_t buf_writer;
 	buf_writer_init(tsd_tsdn(tsd), &buf_writer, write_cb, cbopaque, NULL,
 	    PROF_RECENT_PRINT_BUFSIZE);
diff --git a/src/stats.c b/src/stats.c
index dd31032d..0a1a99da 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1431,8 +1431,7 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 }
 
 void
-stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *opts) {
+stats_print(write_cb_t *write_cb, void *cbopaque, const char *opts) {
 	int err;
 	uint64_t epoch;
 	size_t u64sz;

From 4d970f8bfca76e55abd34ba461a738744d71e879 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 15 Apr 2020 14:52:01 -0700
Subject: [PATCH 1714/2608] Add forking handling for counter module

---
 include/jemalloc/internal/counter.h   |  3 +++
 include/jemalloc/internal/lockedint.h |  8 ++++++++
 src/counter.c                         | 15 +++++++++++++++
 3 files changed, 26 insertions(+)

diff --git a/include/jemalloc/internal/counter.h b/include/jemalloc/internal/counter.h
index 896fd02a..c26a08bd 100644
--- a/include/jemalloc/internal/counter.h
+++ b/include/jemalloc/internal/counter.h
@@ -52,5 +52,8 @@ counter_accum(tsdn_t *tsdn, counter_accum_t *counter, uint64_t accumbytes) {
 }
 
 bool counter_accum_init(counter_accum_t *counter, uint64_t interval);
+void counter_prefork(tsdn_t *tsdn, counter_accum_t *counter);
+void counter_postfork_parent(tsdn_t *tsdn, counter_accum_t *counter);
+void counter_postfork_child(tsdn_t *tsdn, counter_accum_t *counter);
 
 #endif /* JEMALLOC_INTERNAL_COUNTER_H */
diff --git a/include/jemalloc/internal/lockedint.h b/include/jemalloc/internal/lockedint.h
index 6a1f9ad1..56cf646c 100644
--- a/include/jemalloc/internal/lockedint.h
+++ b/include/jemalloc/internal/lockedint.h
@@ -31,12 +31,20 @@ struct locked_zu_s {
 #  define LOCKEDINT_MTX(mtx) (&(mtx))
 #  define LOCKEDINT_MTX_LOCK(tsdn, mu) malloc_mutex_lock(tsdn, &(mu))
 #  define LOCKEDINT_MTX_UNLOCK(tsdn, mu) malloc_mutex_unlock(tsdn, &(mu))
+#  define LOCKEDINT_MTX_PREFORK(tsdn, mu) malloc_mutex_prefork(tsdn, &(mu))
+#  define LOCKEDINT_MTX_POSTFORK_PARENT(tsdn, mu)			\
+    malloc_mutex_postfork_parent(tsdn, &(mu))
+#  define LOCKEDINT_MTX_POSTFORK_CHILD(tsdn, mu)			\
+    malloc_mutex_postfork_child(tsdn, &(mu))
 #else
 #  define LOCKEDINT_MTX_DECLARE(name)
 #  define LOCKEDINT_MTX(ptr) NULL
 #  define LOCKEDINT_MTX_INIT(ptr, name, rank, rank_mode) false
 #  define LOCKEDINT_MTX_LOCK(tsdn, mu) do {} while (0)
 #  define LOCKEDINT_MTX_UNLOCK(tsdn, mu) do {} while (0)
+#  define LOCKEDINT_MTX_PREFORK(tsdn, mu)
+#  define LOCKEDINT_MTX_POSTFORK_PARENT(tsdn, mu)
+#  define LOCKEDINT_MTX_POSTFORK_CHILD(tsdn, mu)
 #endif
 
 static inline uint64_t
diff --git a/src/counter.c b/src/counter.c
index 1b8201e5..6fa9c656 100644
--- a/src/counter.c
+++ b/src/counter.c
@@ -20,3 +20,18 @@ counter_accum_init(counter_accum_t *counter, uint64_t interval) {
 
 	return false;
 }
+
+void
+counter_prefork(tsdn_t *tsdn, counter_accum_t *counter) {
+	LOCKEDINT_MTX_PREFORK(tsdn, counter->mtx);
+}
+
+void
+counter_postfork_parent(tsdn_t *tsdn, counter_accum_t *counter) {
+	LOCKEDINT_MTX_POSTFORK_PARENT(tsdn, counter->mtx);
+}
+
+void
+counter_postfork_child(tsdn_t *tsdn, counter_accum_t *counter) {
+	LOCKEDINT_MTX_POSTFORK_CHILD(tsdn, counter->mtx);
+}

From 508303077b020ba369ab84e3cf233ae224da861b Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 15 Apr 2020 14:58:58 -0700
Subject: [PATCH 1715/2608] Add forking handling for prof idump counter

---
 src/prof.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/prof.c b/src/prof.c
index cb71850f..c1e13e91 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -1111,6 +1111,7 @@ prof_prefork0(tsdn_t *tsdn) {
 void
 prof_prefork1(tsdn_t *tsdn) {
 	if (config_prof && opt_prof) {
+		counter_prefork(tsdn, &prof_idump_accumulated);
 		malloc_mutex_prefork(tsdn, &prof_active_mtx);
 		malloc_mutex_prefork(tsdn, &prof_dump_filename_mtx);
 		malloc_mutex_prefork(tsdn, &prof_gdump_mtx);
@@ -1132,6 +1133,7 @@ prof_postfork_parent(tsdn_t *tsdn) {
 		malloc_mutex_postfork_parent(tsdn, &prof_gdump_mtx);
 		malloc_mutex_postfork_parent(tsdn, &prof_dump_filename_mtx);
 		malloc_mutex_postfork_parent(tsdn, &prof_active_mtx);
+		counter_postfork_parent(tsdn, &prof_idump_accumulated);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
 			malloc_mutex_postfork_parent(tsdn, &gctx_locks[i]);
 		}
@@ -1156,6 +1158,7 @@ prof_postfork_child(tsdn_t *tsdn) {
 		malloc_mutex_postfork_child(tsdn, &prof_gdump_mtx);
 		malloc_mutex_postfork_child(tsdn, &prof_dump_filename_mtx);
 		malloc_mutex_postfork_child(tsdn, &prof_active_mtx);
+		counter_postfork_child(tsdn, &prof_idump_accumulated);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
 			malloc_mutex_postfork_child(tsdn, &gctx_locks[i]);
 		}

From f533ab6da623303de5f6621b35e5ec73832a6d22 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 15 Apr 2020 15:09:32 -0700
Subject: [PATCH 1716/2608] Add forking handling for stats

---
 include/jemalloc/internal/stats.h |  3 +++
 src/jemalloc.c                    |  3 +++
 src/stats.c                       | 15 +++++++++++++++
 3 files changed, 21 insertions(+)

diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index 3720619b..7cd14302 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -44,5 +44,8 @@ bool stats_interval_accum(tsd_t *tsd, uint64_t bytes);
 void stats_print(write_cb_t *write_cb, void *cbopaque, const char *opts);
 
 bool stats_boot(void);
+void stats_prefork(tsdn_t *tsdn);
+void stats_postfork_parent(tsdn_t *tsdn);
+void stats_postfork_child(tsdn_t *tsdn);
 
 #endif /* JEMALLOC_INTERNAL_STATS_H */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 14b2a088..78da45bc 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3989,6 +3989,7 @@ _malloc_prefork(void)
 		}
 	}
 	prof_prefork1(tsd_tsdn(tsd));
+	stats_prefork(tsd_tsdn(tsd));
 	tsd_prefork(tsd);
 }
 
@@ -4016,6 +4017,7 @@ _malloc_postfork(void)
 
 	witness_postfork_parent(tsd_witness_tsdp_get(tsd));
 	/* Release all mutexes, now that fork() has completed. */
+	stats_postfork_parent(tsd_tsdn(tsd));
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
 
@@ -4045,6 +4047,7 @@ jemalloc_postfork_child(void) {
 
 	witness_postfork_child(tsd_witness_tsdp_get(tsd));
 	/* Release all mutexes, now that fork() has completed. */
+	stats_postfork_child(tsd_tsdn(tsd));
 	for (i = 0, narenas = narenas_total_get(); i < narenas; i++) {
 		arena_t *arena;
 
diff --git a/src/stats.c b/src/stats.c
index 0a1a99da..56d3b489 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1526,3 +1526,18 @@ stats_boot(void) {
 
 	return counter_accum_init(&stats_interval_accumulated, stats_interval);
 }
+
+void
+stats_prefork(tsdn_t *tsdn) {
+	counter_prefork(tsdn, &stats_interval_accumulated);
+}
+
+void
+stats_postfork_parent(tsdn_t *tsdn) {
+	counter_postfork_parent(tsdn, &stats_interval_accumulated);
+}
+
+void
+stats_postfork_child(tsdn_t *tsdn) {
+	counter_postfork_child(tsdn, &stats_interval_accumulated);
+}

From b543c20a9494eb8ace71742657f90d81e6df9f49 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 14 Apr 2020 14:52:20 -0700
Subject: [PATCH 1717/2608] Minor update to locked int

---
 include/jemalloc/internal/arena_stats.h |  2 +-
 include/jemalloc/internal/lockedint.h   | 31 +++++++++++++++----------
 2 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 9effa61b..02c93405 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -90,7 +90,7 @@ arena_stats_init(tsdn_t *tsdn, arena_stats_t *arena_stats) {
 			assert(((char *)arena_stats)[i] == 0);
 		}
 	}
-	if (LOCKEDINT_MTX_INIT(LOCKEDINT_MTX(arena_stats->mtx), "arena_stats",
+	if (LOCKEDINT_MTX_INIT(arena_stats->mtx, "arena_stats",
 	    WITNESS_RANK_ARENA_STATS, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
diff --git a/include/jemalloc/internal/lockedint.h b/include/jemalloc/internal/lockedint.h
index 56cf646c..9d9d521f 100644
--- a/include/jemalloc/internal/lockedint.h
+++ b/include/jemalloc/internal/lockedint.h
@@ -26,8 +26,8 @@ struct locked_zu_s {
 
 #ifndef JEMALLOC_ATOMIC_U64
 #  define LOCKEDINT_MTX_DECLARE(name) malloc_mutex_t name;
-#  define LOCKEDINT_MTX_INIT(ptr, name, rank, rank_mode)		\
-    malloc_mutex_init(ptr, name, rank, rank_mode)
+#  define LOCKEDINT_MTX_INIT(mu, name, rank, rank_mode)			\
+    malloc_mutex_init(&(mu), name, rank, rank_mode)
 #  define LOCKEDINT_MTX(mtx) (&(mtx))
 #  define LOCKEDINT_MTX_LOCK(tsdn, mu) malloc_mutex_lock(tsdn, &(mu))
 #  define LOCKEDINT_MTX_UNLOCK(tsdn, mu) malloc_mutex_unlock(tsdn, &(mu))
@@ -38,21 +38,28 @@ struct locked_zu_s {
     malloc_mutex_postfork_child(tsdn, &(mu))
 #else
 #  define LOCKEDINT_MTX_DECLARE(name)
-#  define LOCKEDINT_MTX(ptr) NULL
-#  define LOCKEDINT_MTX_INIT(ptr, name, rank, rank_mode) false
-#  define LOCKEDINT_MTX_LOCK(tsdn, mu) do {} while (0)
-#  define LOCKEDINT_MTX_UNLOCK(tsdn, mu) do {} while (0)
+#  define LOCKEDINT_MTX(mtx) NULL
+#  define LOCKEDINT_MTX_INIT(mu, name, rank, rank_mode) false
+#  define LOCKEDINT_MTX_LOCK(tsdn, mu)
+#  define LOCKEDINT_MTX_UNLOCK(tsdn, mu)
 #  define LOCKEDINT_MTX_PREFORK(tsdn, mu)
 #  define LOCKEDINT_MTX_POSTFORK_PARENT(tsdn, mu)
 #  define LOCKEDINT_MTX_POSTFORK_CHILD(tsdn, mu)
 #endif
 
+#ifdef JEMALLOC_ATOMIC_U64
+#  define LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx) assert((mtx) == NULL)
+#else
+#  define LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx)			\
+    malloc_mutex_assert_owner(tsdn, (mtx))
+#endif
+
 static inline uint64_t
 locked_read_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p) {
+	LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx);
 #ifdef JEMALLOC_ATOMIC_U64
 	return atomic_load_u64(&p->val, ATOMIC_RELAXED);
 #else
-	malloc_mutex_assert_owner(tsdn, mtx);
 	return p->val;
 #endif
 }
@@ -60,10 +67,10 @@ locked_read_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p) {
 static inline void
 locked_inc_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p,
     uint64_t x) {
+	LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx);
 #ifdef JEMALLOC_ATOMIC_U64
 	atomic_fetch_add_u64(&p->val, x, ATOMIC_RELAXED);
 #else
-	malloc_mutex_assert_owner(tsdn, mtx);
 	p->val += x;
 #endif
 }
@@ -71,11 +78,11 @@ locked_inc_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p,
 static inline void
 locked_dec_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p,
     uint64_t x) {
+	LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx);
 #ifdef JEMALLOC_ATOMIC_U64
 	uint64_t r = atomic_fetch_sub_u64(&p->val, x, ATOMIC_RELAXED);
 	assert(r - x <= r);
 #else
-	malloc_mutex_assert_owner(tsdn, mtx);
 	p->val -= x;
 	assert(p->val + x >= p->val);
 #endif
@@ -108,10 +115,10 @@ locked_read_u64_unsynchronized(locked_u64_t *p) {
 
 static inline size_t
 locked_read_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p) {
+	LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx);
 #ifdef JEMALLOC_ATOMIC_U64
 	return atomic_load_zu(&p->val, ATOMIC_RELAXED);
 #else
-	malloc_mutex_assert_owner(tsdn, mtx);
 	return atomic_load_zu(&p->val, ATOMIC_RELAXED);
 #endif
 }
@@ -119,10 +126,10 @@ locked_read_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p) {
 static inline void
 locked_inc_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p,
     size_t x) {
+	LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx);
 #ifdef JEMALLOC_ATOMIC_U64
 	atomic_fetch_add_zu(&p->val, x, ATOMIC_RELAXED);
 #else
-	malloc_mutex_assert_owner(tsdn, mtx);
 	size_t cur = atomic_load_zu(&p->val, ATOMIC_RELAXED);
 	atomic_store_zu(&p->val, cur + x, ATOMIC_RELAXED);
 #endif
@@ -131,11 +138,11 @@ locked_inc_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p,
 static inline void
 locked_dec_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p,
     size_t x) {
+	LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx);
 #ifdef JEMALLOC_ATOMIC_U64
 	size_t r = atomic_fetch_sub_zu(&p->val, x, ATOMIC_RELAXED);
 	assert(r - x <= r);
 #else
-	malloc_mutex_assert_owner(tsdn, mtx);
 	size_t cur = atomic_load_zu(&p->val, ATOMIC_RELAXED);
 	atomic_store_zu(&p->val, cur - x, ATOMIC_RELAXED);
 #endif

From fc052ff7284ef3695b81b9127f7d8a7cb25ae0b2 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 14 Apr 2020 15:08:00 -0700
Subject: [PATCH 1718/2608] Migrate counter to use locked int

---
 include/jemalloc/internal/counter.h   | 51 +++++++--------------------
 include/jemalloc/internal/lockedint.h | 38 ++++++++++++++++++++
 src/counter.c                         | 10 ++----
 test/unit/counter.c                   |  7 +---
 4 files changed, 54 insertions(+), 52 deletions(-)

diff --git a/include/jemalloc/internal/counter.h b/include/jemalloc/internal/counter.h
index c26a08bd..79abf064 100644
--- a/include/jemalloc/internal/counter.h
+++ b/include/jemalloc/internal/counter.h
@@ -4,50 +4,25 @@
 #include "jemalloc/internal/mutex.h"
 
 typedef struct counter_accum_s {
-#ifndef JEMALLOC_ATOMIC_U64
-	malloc_mutex_t	mtx;
-	uint64_t accumbytes;
-#else
-	atomic_u64_t accumbytes;
-#endif
+	LOCKEDINT_MTX_DECLARE(mtx)
+	locked_u64_t accumbytes;
 	uint64_t interval;
 } counter_accum_t;
 
 JEMALLOC_ALWAYS_INLINE bool
-counter_accum(tsdn_t *tsdn, counter_accum_t *counter, uint64_t accumbytes) {
-	bool overflow;
-	uint64_t a0, a1;
-
-	/*
-	 * If the event moves fast enough (and/or if the event handling is slow
-	 * enough), extreme overflow here (a1 >= interval * 2) can cause counter
-	 * trigger coalescing.  This is an intentional mechanism that avoids
-	 * rate-limiting allocation.
-	 */
+counter_accum(tsdn_t *tsdn, counter_accum_t *counter, uint64_t bytes) {
 	uint64_t interval = counter->interval;
 	assert(interval > 0);
-#ifdef JEMALLOC_ATOMIC_U64
-	a0 = atomic_load_u64(&counter->accumbytes, ATOMIC_RELAXED);
-	do {
-		a1 = a0 + accumbytes;
-		assert(a1 >= a0);
-		overflow = (a1 >= interval);
-		if (overflow) {
-			a1 %= interval;
-		}
-	} while (!atomic_compare_exchange_weak_u64(&counter->accumbytes, &a0, a1,
-	    ATOMIC_RELAXED, ATOMIC_RELAXED));
-#else
-	malloc_mutex_lock(tsdn, &counter->mtx);
-	a0 = counter->accumbytes;
-	a1 = a0 + accumbytes;
-	overflow = (a1 >= interval);
-	if (overflow) {
-		a1 %= interval;
-	}
-	counter->accumbytes = a1;
-	malloc_mutex_unlock(tsdn, &counter->mtx);
-#endif
+	LOCKEDINT_MTX_LOCK(tsdn, counter->mtx);
+	/*
+	 * If the event moves fast enough (and/or if the event handling is slow
+	 * enough), extreme overflow can cause counter trigger coalescing.
+	 * This is an intentional mechanism that avoids rate-limiting
+	 * allocation.
+	 */
+	bool overflow = locked_inc_mod_u64(tsdn, LOCKEDINT_MTX(counter->mtx),
+	    &counter->accumbytes, bytes, interval);
+	LOCKEDINT_MTX_UNLOCK(tsdn, counter->mtx);
 	return overflow;
 }
 
diff --git a/include/jemalloc/internal/lockedint.h b/include/jemalloc/internal/lockedint.h
index 9d9d521f..d020ebec 100644
--- a/include/jemalloc/internal/lockedint.h
+++ b/include/jemalloc/internal/lockedint.h
@@ -88,6 +88,36 @@ locked_dec_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p,
 #endif
 }
 
+/* Increment and take modulus.  Returns whether the modulo made any change.  */
+static inline bool
+locked_inc_mod_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p,
+    const uint64_t x, const uint64_t modulus) {
+	LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx);
+	uint64_t before, after;
+	bool overflow;
+#ifdef JEMALLOC_ATOMIC_U64
+	before = atomic_load_u64(&p->val, ATOMIC_RELAXED);
+	do {
+		after = before + x;
+		assert(after >= before);
+		overflow = (after >= modulus);
+		if (overflow) {
+			after %= modulus;
+		}
+	} while (!atomic_compare_exchange_weak_u64(&p->val, &before, after,
+	    ATOMIC_RELAXED, ATOMIC_RELAXED));
+#else
+	before = p->val;
+	after = before + x;
+	overflow = (after >= modulus);
+	if (overflow) {
+		after %= modulus;
+	}
+	p->val = after;
+#endif
+	return overflow;
+}
+
 /*
  * Non-atomically sets *dst += src.  *dst needs external synchronization.
  * This lets us avoid the cost of a fetch_add when its unnecessary (note that
@@ -110,7 +140,15 @@ locked_read_u64_unsynchronized(locked_u64_t *p) {
 #else
 	return p->val;
 #endif
+}
 
+static inline void
+locked_init_u64_unsynchronized(locked_u64_t *p, uint64_t x) {
+#ifdef JEMALLOC_ATOMIC_U64
+	atomic_store_u64(&p->val, x, ATOMIC_RELAXED);
+#else
+	p->val = x;
+#endif
 }
 
 static inline size_t
diff --git a/src/counter.c b/src/counter.c
index 6fa9c656..71eda69f 100644
--- a/src/counter.c
+++ b/src/counter.c
@@ -6,18 +6,12 @@
 
 bool
 counter_accum_init(counter_accum_t *counter, uint64_t interval) {
-#ifndef JEMALLOC_ATOMIC_U64
-	if (malloc_mutex_init(&counter->mtx, "counter_accum",
+	if (LOCKEDINT_MTX_INIT(counter->mtx, "counter_accum",
 	    WITNESS_RANK_COUNTER_ACCUM, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
-	counter->accumbytes = 0;
-#else
-	atomic_store_u64(&counter->accumbytes, 0,
-	    ATOMIC_RELAXED);
-#endif
+	locked_init_u64_unsynchronized(&counter->accumbytes, 0);
 	counter->interval = interval;
-
 	return false;
 }
 
diff --git a/test/unit/counter.c b/test/unit/counter.c
index c14eee31..277baac1 100644
--- a/test/unit/counter.c
+++ b/test/unit/counter.c
@@ -27,12 +27,7 @@ TEST_END
 
 void
 expect_counter_value(counter_accum_t *c, uint64_t v) {
-	uint64_t accum;
-#ifdef JEMALLOC_ATOMIC_U64
-	accum = atomic_load_u64(&(c->accumbytes), ATOMIC_RELAXED);
-#else
-	accum = c->accumbytes;
-#endif
+	uint64_t accum = locked_read_u64_unsynchronized(&c->accumbytes);
 	expect_u64_eq(accum, v, "Counter value mismatch");
 }
 

From 855d20f6f3d79d00fad35d63456fbdc0e02a0747 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 23 Mar 2020 14:57:20 -0700
Subject: [PATCH 1719/2608] Remove outdated comments in thread event

---
 include/jemalloc/internal/thread_event.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index b05ff25a..229136b6 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -186,11 +186,9 @@ te_ctx_next_event_set(tsd_t *tsd, te_ctx_t *ctx, uint64_t v) {
  * of thread event handling that we can rely on and need to promise.
  * The invariants are only temporarily violated in the middle of:
  * (a) event_advance() if an event is triggered (the te_event_trigger() call
- *     at the end will restore the invariants),
+ *     at the end will restore the invariants), or
  * (b) te_##event##_event_update() (the te_event_update() call at the
- *     end will restore the invariants), or
- * (c) te_alloc_rollback() if the rollback falls below the last_event
- *     (the te_event_update() call at the end will restore the invariants).
+ *     end will restore the invariants).
  */
 JEMALLOC_ALWAYS_INLINE void
 te_assert_invariants(tsd_t *tsd) {

From 1e2524e15a004af50fd79f79b4b6efcfce0164b8 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 23 Mar 2020 14:58:33 -0700
Subject: [PATCH 1720/2608] Do not reset sample wait time when re-initing tdata

---
 include/jemalloc/internal/prof_data.h | 2 +-
 src/prof.c                            | 4 ++--
 src/prof_data.c                       | 6 +-----
 3 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
index 95dc6b0b..46a35105 100644
--- a/include/jemalloc/internal/prof_data.h
+++ b/include/jemalloc/internal/prof_data.h
@@ -13,7 +13,7 @@ bool prof_data_init(tsd_t *tsd);
 bool prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
     bool leakcheck);
 prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
-    uint64_t thr_discrim, char *thread_name, bool active, bool reset_interval);
+    uint64_t thr_discrim, char *thread_name, bool active);
 void prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata);
 void bt_init(prof_bt_t *bt, void **vec);
 void prof_backtrace(tsd_t *tsd, prof_bt_t *bt);
diff --git a/src/prof.c b/src/prof.c
index c1e13e91..2e1d7689 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -795,7 +795,7 @@ prof_thr_uid_alloc(tsdn_t *tsdn) {
 prof_tdata_t *
 prof_tdata_init(tsd_t *tsd) {
 	return prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0,
-	    NULL, prof_thread_active_init_get(tsd_tsdn(tsd)), false);
+	    NULL, prof_thread_active_init_get(tsd_tsdn(tsd)));
 }
 
 prof_tdata_t *
@@ -808,7 +808,7 @@ prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) {
 
 	prof_tdata_detach(tsd, tdata);
 	return prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name,
-	    active, true);
+	    active);
 }
 
 void
diff --git a/src/prof_data.c b/src/prof_data.c
index 9721cbe7..66ed36a0 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -1245,7 +1245,7 @@ prof_bt_keycomp(const void *k1, const void *k2) {
 
 prof_tdata_t *
 prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
-    char *thread_name, bool active, bool reset_interval) {
+    char *thread_name, bool active) {
 	assert(tsd_reentrancy_level_get(tsd) == 0);
 
 	prof_tdata_t *tdata;
@@ -1274,10 +1274,6 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
 		return NULL;
 	}
 
-	if (reset_interval) {
-		prof_sample_threshold_update(tsd);
-	}
-
 	tdata->enq = false;
 	tdata->enq_idump = false;
 	tdata->enq_gdump = false;

From 733ae918f0d848a64e88e622e348749fe6756d89 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 15 Apr 2020 10:49:08 -0700
Subject: [PATCH 1721/2608] Extract out per event new wait time fetching

---
 include/jemalloc/internal/prof_externs.h   |  5 +++--
 include/jemalloc/internal/stats.h          |  3 ++-
 include/jemalloc/internal/tcache_externs.h |  8 ++++++--
 src/prof.c                                 | 17 +++++++--------
 src/stats.c                                |  2 +-
 src/tcache.c                               | 10 +++++++++
 src/thread_event.c                         | 24 +++++++++++++++++-----
 7 files changed, 48 insertions(+), 21 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index a6b659c1..2284ae65 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -74,7 +74,6 @@ void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
 #endif
 int prof_getpid(void);
 void prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind);
-bool prof_idump_accum(tsdn_t *tsdn, uint64_t accumbytes);
 void prof_idump(tsdn_t *tsdn);
 bool prof_mdump(tsd_t *tsd, const char *filename);
 void prof_gdump(tsdn_t *tsdn);
@@ -99,7 +98,9 @@ void prof_prefork0(tsdn_t *tsdn);
 void prof_prefork1(tsdn_t *tsdn);
 void prof_postfork_parent(tsdn_t *tsdn);
 void prof_postfork_child(tsdn_t *tsdn);
-void prof_sample_threshold_update(tsd_t *tsd);
+/* Only accessed by thread event. */
+uint64_t prof_sample_new_event_wait(tsd_t *tsd);
+bool prof_idump_accum(tsdn_t *tsdn, uint64_t accumbytes);
 
 bool prof_log_start(tsdn_t *tsdn, const char *filename);
 bool prof_log_stop(tsdn_t *tsdn);
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index 7cd14302..42c321e5 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -37,7 +37,8 @@ extern char opt_stats_interval_opts[stats_print_tot_num_options+1];
 #define STATS_INTERVAL_ACCUM_LG_BATCH_SIZE 6
 #define STATS_INTERVAL_ACCUM_BATCH_MAX (4 << 20)
 
-uint64_t stats_interval_accum_batch_size(void);
+/* Only accessed by thread event. */
+uint64_t stats_interval_new_event_wait(tsd_t *tsd);
 bool stats_interval_accum(tsd_t *tsd, uint64_t bytes);
 
 /* Implements je_malloc_stats_print. */
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 7ca38d68..7fd730d6 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -26,8 +26,6 @@ extern cache_bin_info_t *tcache_bin_info;
 extern tcaches_t	*tcaches;
 
 size_t	tcache_salloc(tsdn_t *tsdn, const void *ptr);
-void	tcache_event_hard(tsd_t *tsd, tcache_slow_t *tcache_slow,
-    tcache_t *tcache);
 void	*tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     cache_bin_t *tbin, szind_t binind, bool *tcache_success);
 
@@ -55,4 +53,10 @@ bool tsd_tcache_enabled_data_init(tsd_t *tsd);
 
 void tcache_assert_initialized(tcache_t *tcache);
 
+/* Only accessed by thread event. */
+uint64_t tcache_gc_new_event_wait(tsd_t *tsd);
+uint64_t tcache_gc_dalloc_new_event_wait(tsd_t *tsd);
+void tcache_event_hard(tsd_t *tsd, tcache_slow_t *tcache_slow,
+    tcache_t *tcache);
+
 #endif /* JEMALLOC_INTERNAL_TCACHE_EXTERNS_H */
diff --git a/src/prof.c b/src/prof.c
index 2e1d7689..94055855 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -518,16 +518,11 @@ prof_backtrace(tsd_t *tsd, prof_bt_t *bt) {
  * (e.g.
  * -mno-sse) in order for the workaround to be complete.
  */
-void
-prof_sample_threshold_update(tsd_t *tsd) {
+uint64_t
+prof_sample_new_event_wait(tsd_t *tsd) {
 #ifdef JEMALLOC_PROF
-	if (!config_prof) {
-		return;
-	}
-
 	if (lg_prof_sample == 0) {
-		te_prof_sample_event_update(tsd, TE_MIN_START_WAIT);
-		return;
+		return TE_MIN_START_WAIT;
 	}
 
 	/*
@@ -557,10 +552,12 @@ prof_sample_threshold_update(tsd_t *tsd) {
 	 */
 	uint64_t r = prng_lg_range_u64(tsd_prng_statep_get(tsd), 53);
 	double u = (r == 0U) ? 1.0 : (double)r * (1.0/9007199254740992.0L);
-	uint64_t bytes_until_sample = (uint64_t)(log(u) /
+	return (uint64_t)(log(u) /
 	    log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
 	    + (uint64_t)1U;
-	te_prof_sample_event_update(tsd, bytes_until_sample);
+#else
+	not_reached();
+	return TE_MAX_START_WAIT;
 #endif
 }
 
diff --git a/src/stats.c b/src/stats.c
index 56d3b489..9d13f596 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1499,7 +1499,7 @@ stats_interval_accum(tsd_t *tsd, uint64_t bytes) {
 }
 
 uint64_t
-stats_interval_accum_batch_size(void) {
+stats_interval_new_event_wait(tsd_t *tsd) {
 	return stats_interval_accum_batch;
 }
 
diff --git a/src/tcache.c b/src/tcache.c
index 63e1a4d5..cba2ea72 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -40,6 +40,16 @@ tcache_salloc(tsdn_t *tsdn, const void *ptr) {
 	return arena_salloc(tsdn, ptr);
 }
 
+uint64_t
+tcache_gc_new_event_wait(tsd_t *tsd) {
+	return TCACHE_GC_INCR_BYTES;
+}
+
+uint64_t
+tcache_gc_dalloc_new_event_wait(tsd_t *tsd) {
+	return TCACHE_GC_INCR_BYTES;
+}
+
 void
 tcache_event_hard(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache) {
 	szind_t binind = tcache_slow->next_gc_bin;
diff --git a/src/thread_event.c b/src/thread_event.c
index c96dea6e..99467ee3 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -4,6 +4,17 @@
 
 #include "jemalloc/internal/thread_event.h"
 
+/*
+ * Signatures for functions computing new event wait time.  The functions
+ * should be defined by the modules owning each event.  The signatures here are
+ * used to verify that the definitions are in the right shape.
+ */
+#define E(event, condition_unused, is_alloc_event_unused)		\
+uint64_t event##_new_event_wait(tsd_t *tsd);
+
+ITERATE_OVER_ALL_EVENTS
+#undef E
+
 /* TSD event init function signatures. */
 #define E(event, condition_unused, is_alloc_event_unused)		\
 static void te_tsd_##event##_event_init(tsd_t *tsd);
@@ -22,26 +33,29 @@ ITERATE_OVER_ALL_EVENTS
 static void
 te_tsd_tcache_gc_event_init(tsd_t *tsd) {
 	assert(TCACHE_GC_INCR_BYTES > 0);
-	te_tcache_gc_event_update(tsd, TCACHE_GC_INCR_BYTES);
+	uint64_t wait = tcache_gc_new_event_wait(tsd);
+	te_tcache_gc_event_update(tsd, wait);
 }
 
 static void
 te_tsd_tcache_gc_dalloc_event_init(tsd_t *tsd) {
 	assert(TCACHE_GC_INCR_BYTES > 0);
-	te_tcache_gc_dalloc_event_update(tsd, TCACHE_GC_INCR_BYTES);
+	uint64_t wait = tcache_gc_dalloc_new_event_wait(tsd);
+	te_tcache_gc_dalloc_event_update(tsd, wait);
 }
 
 static void
 te_tsd_prof_sample_event_init(tsd_t *tsd) {
 	assert(config_prof && opt_prof);
-	prof_sample_threshold_update(tsd);
+	uint64_t wait = prof_sample_new_event_wait(tsd);
+	te_prof_sample_event_update(tsd, wait);
 }
 
 static void
 te_tsd_stats_interval_event_init(tsd_t *tsd) {
 	assert(opt_stats_interval >= 0);
-	uint64_t interval = stats_interval_accum_batch_size();
-	te_stats_interval_event_update(tsd, interval);
+	uint64_t wait = stats_interval_new_event_wait(tsd);
+	te_stats_interval_event_update(tsd, wait);
 }
 
 /* Handler functions. */

From 6de77799de0d8a705c595aa11f9dc70f147501ad Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 24 Mar 2020 08:31:34 -0700
Subject: [PATCH 1722/2608] Move thread event wait time update to local

---
 include/jemalloc/internal/thread_event.h | 24 -----------------------
 src/thread_event.c                       | 25 ++++++++++++++++++++++++
 2 files changed, 25 insertions(+), 24 deletions(-)

diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index 229136b6..60fbfcba 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -33,7 +33,6 @@ typedef struct te_ctx_s {
 
 void te_assert_invariants_debug(tsd_t *tsd);
 void te_event_trigger(tsd_t *tsd, te_ctx_t *ctx, bool delay_event);
-void te_event_update(tsd_t *tsd, bool alloc_event);
 void te_recompute_fast_threshold(tsd_t *tsd);
 void tsd_te_init(tsd_t *tsd);
 
@@ -251,27 +250,4 @@ thread_alloc_event(tsd_t *tsd, size_t usize) {
 	te_event_advance(tsd, usize, true);
 }
 
-#define E(event, condition, is_alloc)					\
-JEMALLOC_ALWAYS_INLINE void						\
-te_##event##_event_update(tsd_t *tsd, uint64_t event_wait) {		\
-	te_assert_invariants(tsd);					\
-	assert(condition);						\
-	assert(tsd_nominal(tsd));					\
-	assert(tsd_reentrancy_level_get(tsd) == 0);			\
-	assert(event_wait > 0U);					\
-	if (TE_MIN_START_WAIT > 1U &&					\
-	    unlikely(event_wait < TE_MIN_START_WAIT)) {			\
-		event_wait = TE_MIN_START_WAIT;				\
-	}								\
-	if (TE_MAX_START_WAIT < UINT64_MAX &&				\
-	    unlikely(event_wait > TE_MAX_START_WAIT)) {			\
-		event_wait = TE_MAX_START_WAIT;				\
-	}								\
-	event##_event_wait_set(tsd, event_wait);			\
-	te_event_update(tsd, is_alloc);					\
-}
-
-ITERATE_OVER_ALL_EVENTS
-#undef E
-
 #endif /* JEMALLOC_INTERNAL_THREAD_EVENT_H */
diff --git a/src/thread_event.c b/src/thread_event.c
index 99467ee3..8f718ddc 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -4,6 +4,31 @@
 
 #include "jemalloc/internal/thread_event.h"
 
+static void te_event_update(tsd_t *tsd, bool alloc_event);
+
+#define E(event, condition, is_alloc)					\
+static void								\
+te_##event##_event_update(tsd_t *tsd, uint64_t event_wait) {		\
+	te_assert_invariants(tsd);					\
+	assert(condition);						\
+	assert(tsd_nominal(tsd));					\
+	assert(tsd_reentrancy_level_get(tsd) == 0);			\
+	assert(event_wait > 0U);					\
+	if (TE_MIN_START_WAIT > 1U &&					\
+	    unlikely(event_wait < TE_MIN_START_WAIT)) {			\
+		event_wait = TE_MIN_START_WAIT;				\
+	}								\
+	if (TE_MAX_START_WAIT < UINT64_MAX &&				\
+	    unlikely(event_wait > TE_MAX_START_WAIT)) {			\
+		event_wait = TE_MAX_START_WAIT;				\
+	}								\
+	event##_event_wait_set(tsd, event_wait);			\
+	te_event_update(tsd, is_alloc);					\
+}
+
+ITERATE_OVER_ALL_EVENTS
+#undef E
+
 /*
  * Signatures for functions computing new event wait time.  The functions
  * should be defined by the modules owning each event.  The signatures here are

From 7324c4f85f8d3d9597a1942dffcc6bf98b02fb8c Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 16 Apr 2020 10:00:46 -0700
Subject: [PATCH 1723/2608] Break down event init and handler functions

---
 src/thread_event.c | 66 ++++++++--------------------------------------
 1 file changed, 11 insertions(+), 55 deletions(-)

diff --git a/src/thread_event.c b/src/thread_event.c
index 8f718ddc..d5649dff 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -40,53 +40,16 @@ uint64_t event##_new_event_wait(tsd_t *tsd);
 ITERATE_OVER_ALL_EVENTS
 #undef E
 
-/* TSD event init function signatures. */
-#define E(event, condition_unused, is_alloc_event_unused)		\
-static void te_tsd_##event##_event_init(tsd_t *tsd);
-
-ITERATE_OVER_ALL_EVENTS
-#undef E
-
 /* Event handler function signatures. */
 #define E(event, condition_unused, is_alloc_event_unused)		\
-static void te_##event##_event_handler(tsd_t *tsd);
+static void event##_event_handler(tsd_t *tsd);
 
 ITERATE_OVER_ALL_EVENTS
 #undef E
 
-/* (Re)Init functions. */
-static void
-te_tsd_tcache_gc_event_init(tsd_t *tsd) {
-	assert(TCACHE_GC_INCR_BYTES > 0);
-	uint64_t wait = tcache_gc_new_event_wait(tsd);
-	te_tcache_gc_event_update(tsd, wait);
-}
-
-static void
-te_tsd_tcache_gc_dalloc_event_init(tsd_t *tsd) {
-	assert(TCACHE_GC_INCR_BYTES > 0);
-	uint64_t wait = tcache_gc_dalloc_new_event_wait(tsd);
-	te_tcache_gc_dalloc_event_update(tsd, wait);
-}
-
-static void
-te_tsd_prof_sample_event_init(tsd_t *tsd) {
-	assert(config_prof && opt_prof);
-	uint64_t wait = prof_sample_new_event_wait(tsd);
-	te_prof_sample_event_update(tsd, wait);
-}
-
-static void
-te_tsd_stats_interval_event_init(tsd_t *tsd) {
-	assert(opt_stats_interval >= 0);
-	uint64_t wait = stats_interval_new_event_wait(tsd);
-	te_stats_interval_event_update(tsd, wait);
-}
-
 /* Handler functions. */
 static void
 tcache_gc_event(tsd_t *tsd) {
-	assert(TCACHE_GC_INCR_BYTES > 0);
 	tcache_t *tcache = tcache_get(tsd);
 	if (tcache != NULL) {
 		tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd);
@@ -95,45 +58,35 @@ tcache_gc_event(tsd_t *tsd) {
 }
 
 static void
-te_tcache_gc_event_handler(tsd_t *tsd) {
-	assert(tcache_gc_event_wait_get(tsd) == 0U);
-	te_tsd_tcache_gc_event_init(tsd);
+tcache_gc_event_handler(tsd_t *tsd) {
 	tcache_gc_event(tsd);
 }
 
 static void
-te_tcache_gc_dalloc_event_handler(tsd_t *tsd) {
-	assert(tcache_gc_dalloc_event_wait_get(tsd) == 0U);
-	te_tsd_tcache_gc_dalloc_event_init(tsd);
+tcache_gc_dalloc_event_handler(tsd_t *tsd) {
 	tcache_gc_event(tsd);
 }
 
 static void
-te_prof_sample_event_handler(tsd_t *tsd) {
-	assert(config_prof && opt_prof);
-	assert(prof_sample_event_wait_get(tsd) == 0U);
+prof_sample_event_handler(tsd_t *tsd) {
 	uint64_t last_event = thread_allocated_last_event_get(tsd);
 	uint64_t last_sample_event = prof_sample_last_event_get(tsd);
 	prof_sample_last_event_set(tsd, last_event);
 	if (prof_idump_accum(tsd_tsdn(tsd), last_event - last_sample_event)) {
 		prof_idump(tsd_tsdn(tsd));
 	}
-	te_tsd_prof_sample_event_init(tsd);
 }
 
 static void
-te_stats_interval_event_handler(tsd_t *tsd) {
-	assert(opt_stats_interval >= 0);
-	assert(stats_interval_event_wait_get(tsd) == 0U);
+stats_interval_event_handler(tsd_t *tsd) {
 	uint64_t last_event = thread_allocated_last_event_get(tsd);
 	uint64_t last_stats_event = stats_interval_last_event_get(tsd);
 	stats_interval_last_event_set(tsd, last_event);
-
 	if (stats_interval_accum(tsd, last_event - last_stats_event)) {
 		je_malloc_stats_print(NULL, NULL, opt_stats_interval_opts);
 	}
-	te_tsd_stats_interval_event_init(tsd);
 }
+
 /* Per event facilities done. */
 
 static bool
@@ -352,7 +305,9 @@ te_event_trigger(tsd_t *tsd, te_ctx_t *ctx, bool delay_event) {
 	if (is_alloc == alloc_event && condition &&			\
 	    event##_event_wait_get(tsd) == 0U) {			\
 		assert(allow_event_trigger);				\
-		te_##event##_event_handler(tsd);			\
+		uint64_t wait = event##_new_event_wait(tsd);		\
+		te_##event##_event_update(tsd, wait);			\
+		event##_event_handler(tsd);				\
 	}
 
 	ITERATE_OVER_ALL_EVENTS
@@ -384,7 +339,8 @@ void tsd_te_init(tsd_t *tsd) {
 
 #define E(event, condition, is_alloc_event_unused)			\
 	if (condition) {						\
-		te_tsd_##event##_event_init(tsd);			\
+		uint64_t wait = event##_new_event_wait(tsd);		\
+		te_##event##_event_update(tsd, wait);			\
 	}
 
 	ITERATE_OVER_ALL_EVENTS

From f72014d09773c529e863eab653331461a740c60c Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 25 Mar 2020 09:33:52 -0700
Subject: [PATCH 1724/2608] Only compute thread event threshold once per
 trigger

---
 include/jemalloc/internal/thread_event.h |  12 +-
 src/thread_event.c                       | 146 +++++++++--------------
 2 files changed, 63 insertions(+), 95 deletions(-)

diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index 60fbfcba..321baaac 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -32,7 +32,7 @@ typedef struct te_ctx_s {
 } te_ctx_t;
 
 void te_assert_invariants_debug(tsd_t *tsd);
-void te_event_trigger(tsd_t *tsd, te_ctx_t *ctx, bool delay_event);
+void te_event_trigger(tsd_t *tsd, te_ctx_t *ctx);
 void te_recompute_fast_threshold(tsd_t *tsd);
 void tsd_te_init(tsd_t *tsd);
 
@@ -183,11 +183,9 @@ te_ctx_next_event_set(tsd_t *tsd, te_ctx_t *ctx, uint64_t v) {
  * The function checks in debug mode whether the thread event counters are in
  * a consistent state, which forms the invariants before and after each round
  * of thread event handling that we can rely on and need to promise.
- * The invariants are only temporarily violated in the middle of:
- * (a) event_advance() if an event is triggered (the te_event_trigger() call
- *     at the end will restore the invariants), or
- * (b) te_##event##_event_update() (the te_event_update() call at the
- *     end will restore the invariants).
+ * The invariants are only temporarily violated in the middle of
+ * te_event_advance() if an event is triggered (the te_event_trigger() call at
+ * the end will restore the invariants).
  */
 JEMALLOC_ALWAYS_INLINE void
 te_assert_invariants(tsd_t *tsd) {
@@ -236,7 +234,7 @@ te_event_advance(tsd_t *tsd, size_t usize, bool is_alloc) {
 	if (likely(usize < te_ctx_next_event_get(&ctx) - bytes_before)) {
 		te_assert_invariants(tsd);
 	} else {
-		te_event_trigger(tsd, &ctx, false);
+		te_event_trigger(tsd, &ctx);
 	}
 }
 
diff --git a/src/thread_event.c b/src/thread_event.c
index d5649dff..0d71f32d 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -4,31 +4,6 @@
 
 #include "jemalloc/internal/thread_event.h"
 
-static void te_event_update(tsd_t *tsd, bool alloc_event);
-
-#define E(event, condition, is_alloc)					\
-static void								\
-te_##event##_event_update(tsd_t *tsd, uint64_t event_wait) {		\
-	te_assert_invariants(tsd);					\
-	assert(condition);						\
-	assert(tsd_nominal(tsd));					\
-	assert(tsd_reentrancy_level_get(tsd) == 0);			\
-	assert(event_wait > 0U);					\
-	if (TE_MIN_START_WAIT > 1U &&					\
-	    unlikely(event_wait < TE_MIN_START_WAIT)) {			\
-		event_wait = TE_MIN_START_WAIT;				\
-	}								\
-	if (TE_MAX_START_WAIT < UINT64_MAX &&				\
-	    unlikely(event_wait > TE_MAX_START_WAIT)) {			\
-		event_wait = TE_MAX_START_WAIT;				\
-	}								\
-	event##_event_wait_set(tsd, event_wait);			\
-	te_event_update(tsd, is_alloc);					\
-}
-
-ITERATE_OVER_ALL_EVENTS
-#undef E
-
 /*
  * Signatures for functions computing new event wait time.  The functions
  * should be defined by the modules owning each event.  The signatures here are
@@ -246,33 +221,49 @@ te_adjust_thresholds_helper(tsd_t *tsd, te_ctx_t *ctx,
 }
 
 static uint64_t
-te_batch_accum(tsd_t *tsd, uint64_t accumbytes, bool is_alloc,
-    bool allow_event_trigger) {
+te_clip_event_wait(uint64_t event_wait) {
+	assert(event_wait > 0U);
+	if (TE_MIN_START_WAIT > 1U &&
+	    unlikely(event_wait < TE_MIN_START_WAIT)) {
+		event_wait = TE_MIN_START_WAIT;
+	}
+	if (TE_MAX_START_WAIT < UINT64_MAX &&
+	    unlikely(event_wait > TE_MAX_START_WAIT)) {
+		event_wait = TE_MAX_START_WAIT;
+	}
+	return event_wait;
+}
+
+void
+te_event_trigger(tsd_t *tsd, te_ctx_t *ctx) {
+	/* usize has already been added to thread_allocated. */
+	uint64_t bytes_after = te_ctx_current_bytes_get(ctx);
+	/* The subtraction is intentionally susceptible to underflow. */
+	uint64_t accumbytes = bytes_after - te_ctx_last_event_get(ctx);
+
+	te_ctx_last_event_set(ctx, bytes_after);
+
+	bool allow_event_trigger = tsd_nominal(tsd) &&
+	    tsd_reentrancy_level_get(tsd) == 0;
+	bool is_alloc = ctx->is_alloc;
 	uint64_t wait = TE_MAX_START_WAIT;
 
 #define E(event, condition, alloc_event)				\
+	bool is_##event##_triggered = false;				\
 	if (is_alloc == alloc_event && condition) {			\
 		uint64_t event_wait = event##_event_wait_get(tsd);	\
 		assert(event_wait <= TE_MAX_START_WAIT);		\
 		if (event_wait > accumbytes) {				\
 			event_wait -= accumbytes;			\
+		} else if (!allow_event_trigger) {			\
+			event_wait = TE_MIN_START_WAIT;			\
 		} else {						\
-			event_wait = 0U;				\
-			if (!allow_event_trigger) {			\
-				event_wait = TE_MIN_START_WAIT;		\
-			}						\
+			is_##event##_triggered = true;			\
+			event_wait = event##_new_event_wait(tsd);	\
 		}							\
-		assert(event_wait <= TE_MAX_START_WAIT);		\
+		event_wait = te_clip_event_wait(event_wait);		\
 		event##_event_wait_set(tsd, event_wait);		\
-		/*							\
-		 * If there is a single event, then the remaining wait	\
-		 * time may become zero, and we rely on either the	\
-		 * event handler or a te_event_update() call later	\
-		 * to properly set next_event; if there are multiple	\
-		 * events, then	here we can get the minimum remaining	\
-		 * wait time to	the next already set event.		\
-		 */							\
-		if (event_wait > 0U && event_wait < wait) {		\
+		if (event_wait < wait) {				\
 			wait = event_wait;				\
 		}							\
 	}
@@ -281,68 +272,47 @@ te_batch_accum(tsd_t *tsd, uint64_t accumbytes, bool is_alloc,
 #undef E
 
 	assert(wait <= TE_MAX_START_WAIT);
-	return wait;
-}
-
-void
-te_event_trigger(tsd_t *tsd, te_ctx_t *ctx, bool delay_event) {
-	/* usize has already been added to thread_allocated. */
-	uint64_t bytes_after = te_ctx_current_bytes_get(ctx);
-	/* The subtraction is intentionally susceptible to underflow. */
-	uint64_t accumbytes = bytes_after - te_ctx_last_event_get(ctx);
-
-	te_ctx_last_event_set(ctx, bytes_after);
-	bool allow_event_trigger = !delay_event && tsd_nominal(tsd) &&
-	    tsd_reentrancy_level_get(tsd) == 0;
-
-	bool is_alloc = ctx->is_alloc;
-	uint64_t wait = te_batch_accum(tsd, accumbytes, is_alloc,
-	    allow_event_trigger);
 	te_adjust_thresholds_helper(tsd, ctx, wait);
-
 	te_assert_invariants(tsd);
+
 #define E(event, condition, alloc_event)				\
 	if (is_alloc == alloc_event && condition &&			\
-	    event##_event_wait_get(tsd) == 0U) {			\
+	    is_##event##_triggered) {					\
 		assert(allow_event_trigger);				\
-		uint64_t wait = event##_new_event_wait(tsd);		\
-		te_##event##_event_update(tsd, wait);			\
 		event##_event_handler(tsd);				\
 	}
 
 	ITERATE_OVER_ALL_EVENTS
 #undef E
+
 	te_assert_invariants(tsd);
 }
 
-void
-te_event_update(tsd_t *tsd, bool is_alloc) {
-	te_ctx_t ctx;
-	te_ctx_get(tsd, &ctx, is_alloc);
-
-	uint64_t wait = te_next_event_compute(tsd, is_alloc);
-	te_adjust_thresholds_helper(tsd, &ctx, wait);
-
-	uint64_t last_event = te_ctx_last_event_get(&ctx);
-	/* Both subtractions are intentionally susceptible to underflow. */
-	if (te_ctx_current_bytes_get(&ctx) - last_event >=
-	    te_ctx_next_event_get(&ctx) - last_event) {
-		te_event_trigger(tsd, &ctx, true);
-	} else {
-		te_assert_invariants(tsd);
-	}
-}
-
-void tsd_te_init(tsd_t *tsd) {
-	/* Make sure no overflow for the bytes accumulated on event_trigger. */
-	assert(TE_MAX_INTERVAL <= UINT64_MAX - SC_LARGE_MAXCLASS + 1);
-
-#define E(event, condition, is_alloc_event_unused)			\
-	if (condition) {						\
-		uint64_t wait = event##_new_event_wait(tsd);		\
-		te_##event##_event_update(tsd, wait);			\
+static void
+te_init(tsd_t *tsd, bool is_alloc) {
+	uint64_t wait = TE_MAX_START_WAIT;
+#define E(event, condition, alloc_event)				\
+	if (is_alloc == alloc_event && condition) {			\
+		uint64_t event_wait = event##_new_event_wait(tsd);	\
+		event_wait = te_clip_event_wait(event_wait);		\
+		event##_event_wait_set(tsd, event_wait);		\
+		if (event_wait < wait) {				\
+			wait = event_wait;				\
+		}							\
 	}
 
 	ITERATE_OVER_ALL_EVENTS
 #undef E
+	te_ctx_t ctx;
+	te_ctx_get(tsd, &ctx, is_alloc);
+	te_adjust_thresholds_helper(tsd, &ctx, wait);
+}
+
+void
+tsd_te_init(tsd_t *tsd) {
+	/* Make sure no overflow for the bytes accumulated on event_trigger. */
+	assert(TE_MAX_INTERVAL <= UINT64_MAX - SC_LARGE_MAXCLASS + 1);
+	te_init(tsd, true);
+	te_init(tsd, false);
+	te_assert_invariants(tsd);
 }

From abd467493110efbcf92f0e85a699f9cda47daff7 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 16 Apr 2020 13:33:56 -0700
Subject: [PATCH 1725/2608] Extract out per event postponed wait time fetching

---
 include/jemalloc/internal/prof_externs.h   |  2 ++
 include/jemalloc/internal/stats.h          |  1 +
 include/jemalloc/internal/tcache_externs.h |  2 ++
 src/prof.c                                 |  5 +++++
 src/stats.c                                |  5 +++++
 src/tcache.c                               | 10 ++++++++++
 src/thread_event.c                         | 17 ++++++++++++-----
 7 files changed, 37 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 2284ae65..d500f560 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -98,8 +98,10 @@ void prof_prefork0(tsdn_t *tsdn);
 void prof_prefork1(tsdn_t *tsdn);
 void prof_postfork_parent(tsdn_t *tsdn);
 void prof_postfork_child(tsdn_t *tsdn);
+
 /* Only accessed by thread event. */
 uint64_t prof_sample_new_event_wait(tsd_t *tsd);
+uint64_t prof_sample_postponed_event_wait(tsd_t *tsd);
 bool prof_idump_accum(tsdn_t *tsdn, uint64_t accumbytes);
 
 bool prof_log_start(tsdn_t *tsdn, const char *filename);
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index 42c321e5..4989fe1a 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -39,6 +39,7 @@ extern char opt_stats_interval_opts[stats_print_tot_num_options+1];
 
 /* Only accessed by thread event. */
 uint64_t stats_interval_new_event_wait(tsd_t *tsd);
+uint64_t stats_interval_postponed_event_wait(tsd_t *tsd);
 bool stats_interval_accum(tsd_t *tsd, uint64_t bytes);
 
 /* Implements je_malloc_stats_print. */
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 7fd730d6..3be65286 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -55,7 +55,9 @@ void tcache_assert_initialized(tcache_t *tcache);
 
 /* Only accessed by thread event. */
 uint64_t tcache_gc_new_event_wait(tsd_t *tsd);
+uint64_t tcache_gc_postponed_event_wait(tsd_t *tsd);
 uint64_t tcache_gc_dalloc_new_event_wait(tsd_t *tsd);
+uint64_t tcache_gc_dalloc_postponed_event_wait(tsd_t *tsd);
 void tcache_event_hard(tsd_t *tsd, tcache_slow_t *tcache_slow,
     tcache_t *tcache);
 
diff --git a/src/prof.c b/src/prof.c
index 94055855..ad83cfe5 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -561,6 +561,11 @@ prof_sample_new_event_wait(tsd_t *tsd) {
 #endif
 }
 
+uint64_t
+prof_sample_postponed_event_wait(tsd_t *tsd) {
+	return TE_MIN_START_WAIT;
+}
+
 int
 prof_getpid(void) {
 #ifdef _WIN32
diff --git a/src/stats.c b/src/stats.c
index 9d13f596..16d4e88e 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1503,6 +1503,11 @@ stats_interval_new_event_wait(tsd_t *tsd) {
 	return stats_interval_accum_batch;
 }
 
+uint64_t
+stats_interval_postponed_event_wait(tsd_t *tsd) {
+	return TE_MIN_START_WAIT;
+}
+
 bool
 stats_boot(void) {
 	uint64_t stats_interval;
diff --git a/src/tcache.c b/src/tcache.c
index cba2ea72..16c87cb0 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -45,11 +45,21 @@ tcache_gc_new_event_wait(tsd_t *tsd) {
 	return TCACHE_GC_INCR_BYTES;
 }
 
+uint64_t
+tcache_gc_postponed_event_wait(tsd_t *tsd) {
+	return TE_MIN_START_WAIT;
+}
+
 uint64_t
 tcache_gc_dalloc_new_event_wait(tsd_t *tsd) {
 	return TCACHE_GC_INCR_BYTES;
 }
 
+uint64_t
+tcache_gc_dalloc_postponed_event_wait(tsd_t *tsd) {
+	return TE_MIN_START_WAIT;
+}
+
 void
 tcache_event_hard(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache) {
 	szind_t binind = tcache_slow->next_gc_bin;
diff --git a/src/thread_event.c b/src/thread_event.c
index 0d71f32d..9de8376d 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -5,12 +5,19 @@
 #include "jemalloc/internal/thread_event.h"
 
 /*
- * Signatures for functions computing new event wait time.  The functions
- * should be defined by the modules owning each event.  The signatures here are
- * used to verify that the definitions are in the right shape.
+ * Signatures for functions computing new / postponed event wait time.  New
+ * event wait time is the time till the next event if an event is currently
+ * being triggered; postponed event wait time is the time till the next event
+ * if an event should be triggered but needs to be postponed, e.g. when the TSD
+ * is not nominal or during reentrancy.
+ *
+ * These event wait time computation functions should be defined by the modules
+ * owning each event.  The signatures here are used to verify that the
+ * definitions follow the right format.
  */
 #define E(event, condition_unused, is_alloc_event_unused)		\
-uint64_t event##_new_event_wait(tsd_t *tsd);
+uint64_t event##_new_event_wait(tsd_t *tsd);				\
+uint64_t event##_postponed_event_wait(tsd_t *tsd);
 
 ITERATE_OVER_ALL_EVENTS
 #undef E
@@ -256,7 +263,7 @@ te_event_trigger(tsd_t *tsd, te_ctx_t *ctx) {
 		if (event_wait > accumbytes) {				\
 			event_wait -= accumbytes;			\
 		} else if (!allow_event_trigger) {			\
-			event_wait = TE_MIN_START_WAIT;			\
+			event_wait = event##_postponed_event_wait(tsd);	\
 		} else {						\
 			is_##event##_triggered = true;			\
 			event_wait = event##_new_event_wait(tsd);	\

From 381c97caa41eb85b52afca40794b2223e7f36d33 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 16 Apr 2020 13:37:19 -0700
Subject: [PATCH 1726/2608] Treat postponed prof sample event as new event

---
 src/prof.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/prof.c b/src/prof.c
index ad83cfe5..77aa44d4 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -563,7 +563,15 @@ prof_sample_new_event_wait(tsd_t *tsd) {
 
 uint64_t
 prof_sample_postponed_event_wait(tsd_t *tsd) {
-	return TE_MIN_START_WAIT;
+	/*
+	 * The postponed wait time for prof sample event is computed as if we
+	 * want a new wait time (i.e. as if the event were triggered).  If we
+	 * instead postpone to the immediate next allocation, like how we're
+	 * handling the other events, then we can have sampling bias, if e.g.
+	 * the allocation immediately following a reentrancy always comes from
+	 * the same stack trace.
+	 */
+	return prof_sample_new_event_wait(tsd);
 }
 
 int

From b06dfb9ccc1fb942c6d871a8e184fed496b59fc1 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 17 Apr 2020 10:38:06 -0700
Subject: [PATCH 1727/2608] Push event handlers to constituent modules

---
 include/jemalloc/internal/prof_externs.h   |  2 +-
 include/jemalloc/internal/stats.h          |  2 +-
 include/jemalloc/internal/tcache_externs.h |  4 +-
 include/jemalloc/internal/thread_event.h   |  6 +++
 src/prof.c                                 | 25 ++++-----
 src/stats.c                                | 14 +++--
 src/tcache.c                               | 22 +++++++-
 src/thread_event.c                         | 60 ++++++++++------------
 8 files changed, 78 insertions(+), 57 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index d500f560..3d9fcfb8 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -102,7 +102,7 @@ void prof_postfork_child(tsdn_t *tsdn);
 /* Only accessed by thread event. */
 uint64_t prof_sample_new_event_wait(tsd_t *tsd);
 uint64_t prof_sample_postponed_event_wait(tsd_t *tsd);
-bool prof_idump_accum(tsdn_t *tsdn, uint64_t accumbytes);
+void prof_sample_event_handler(tsd_t *tsd, uint64_t elapsed);
 
 bool prof_log_start(tsdn_t *tsdn, const char *filename);
 bool prof_log_stop(tsdn_t *tsdn);
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index 4989fe1a..93bde22e 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -40,7 +40,7 @@ extern char opt_stats_interval_opts[stats_print_tot_num_options+1];
 /* Only accessed by thread event. */
 uint64_t stats_interval_new_event_wait(tsd_t *tsd);
 uint64_t stats_interval_postponed_event_wait(tsd_t *tsd);
-bool stats_interval_accum(tsd_t *tsd, uint64_t bytes);
+void stats_interval_event_handler(tsd_t *tsd, uint64_t elapsed);
 
 /* Implements je_malloc_stats_print. */
 void stats_print(write_cb_t *write_cb, void *cbopaque, const char *opts);
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 3be65286..6eca928c 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -56,9 +56,9 @@ void tcache_assert_initialized(tcache_t *tcache);
 /* Only accessed by thread event. */
 uint64_t tcache_gc_new_event_wait(tsd_t *tsd);
 uint64_t tcache_gc_postponed_event_wait(tsd_t *tsd);
+void tcache_gc_event_handler(tsd_t *tsd, uint64_t elapsed);
 uint64_t tcache_gc_dalloc_new_event_wait(tsd_t *tsd);
 uint64_t tcache_gc_dalloc_postponed_event_wait(tsd_t *tsd);
-void tcache_event_hard(tsd_t *tsd, tcache_slow_t *tcache_slow,
-    tcache_t *tcache);
+void tcache_gc_dalloc_event_handler(tsd_t *tsd, uint64_t elapsed);
 
 #endif /* JEMALLOC_INTERNAL_TCACHE_EXTERNS_H */
diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index 321baaac..3a848829 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -23,6 +23,12 @@
  */
 #define TE_MAX_INTERVAL ((uint64_t)(4U << 20))
 
+/*
+ * Invalid elapsed time, for situations where elapsed time is not needed.  See
+ * comments in thread_event.c for more info.
+ */
+#define TE_INVALID_ELAPSED UINT64_MAX
+
 typedef struct te_ctx_s {
 	bool is_alloc;
 	uint64_t *current;
diff --git a/src/prof.c b/src/prof.c
index 77aa44d4..c8da81da 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -50,7 +50,7 @@ bool opt_prof_accum = false;
 char opt_prof_prefix[PROF_DUMP_FILENAME_LEN];
 bool opt_prof_experimental_use_sys_thread_name = false;
 
-/* Accessed via prof_idump_accum(). */
+/* Accessed via prof_sample_event_handler(). */
 static counter_accum_t prof_idump_accumulated;
 
 /*
@@ -574,6 +574,18 @@ prof_sample_postponed_event_wait(tsd_t *tsd) {
 	return prof_sample_new_event_wait(tsd);
 }
 
+void
+prof_sample_event_handler(tsd_t *tsd, uint64_t elapsed) {
+	cassert(config_prof);
+	assert(elapsed > 0 && elapsed != TE_INVALID_ELAPSED);
+	if (prof_interval == 0 || !prof_active_get_unlocked()) {
+		return;
+	}
+	if (counter_accum(tsd_tsdn(tsd), &prof_idump_accumulated, elapsed)) {
+		prof_idump(tsd_tsdn(tsd));
+	}
+}
+
 int
 prof_getpid(void) {
 #ifdef _WIN32
@@ -658,17 +670,6 @@ prof_idump_accum_init(void) {
 	return counter_accum_init(&prof_idump_accumulated, prof_interval);
 }
 
-bool
-prof_idump_accum(tsdn_t *tsdn, uint64_t accumbytes) {
-	cassert(config_prof);
-
-	if (prof_interval == 0 || !prof_active_get_unlocked()) {
-		return false;
-	}
-
-	return counter_accum(tsdn, &prof_idump_accumulated, accumbytes);
-}
-
 bool
 prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix) {
 	cassert(config_prof);
diff --git a/src/stats.c b/src/stats.c
index 16d4e88e..42e4a1ca 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1493,11 +1493,6 @@ stats_print(write_cb_t *write_cb, void *cbopaque, const char *opts) {
 	emitter_end(&emitter);
 }
 
-bool
-stats_interval_accum(tsd_t *tsd, uint64_t bytes) {
-	return counter_accum(tsd_tsdn(tsd), &stats_interval_accumulated, bytes);
-}
-
 uint64_t
 stats_interval_new_event_wait(tsd_t *tsd) {
 	return stats_interval_accum_batch;
@@ -1508,6 +1503,15 @@ stats_interval_postponed_event_wait(tsd_t *tsd) {
 	return TE_MIN_START_WAIT;
 }
 
+void
+stats_interval_event_handler(tsd_t *tsd, uint64_t elapsed) {
+	assert(elapsed > 0 && elapsed != TE_INVALID_ELAPSED);
+	if (counter_accum(tsd_tsdn(tsd), &stats_interval_accumulated,
+	    elapsed)) {
+		je_malloc_stats_print(NULL, NULL, opt_stats_interval_opts);
+	}
+}
+
 bool
 stats_boot(void) {
 	uint64_t stats_interval;
diff --git a/src/tcache.c b/src/tcache.c
index 16c87cb0..f8188cb8 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -60,8 +60,14 @@ tcache_gc_dalloc_postponed_event_wait(tsd_t *tsd) {
 	return TE_MIN_START_WAIT;
 }
 
-void
-tcache_event_hard(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache) {
+static void
+tcache_event(tsd_t *tsd) {
+	tcache_t *tcache = tcache_get(tsd);
+	if (tcache == NULL) {
+		return;
+	}
+
+	tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd);
 	szind_t binind = tcache_slow->next_gc_bin;
 	bool is_small = (binind < SC_NBINS);
 	cache_bin_t *cache_bin = &tcache->bins[binind];
@@ -110,6 +116,18 @@ tcache_event_hard(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache) {
 	}
 }
 
+void
+tcache_gc_event_handler(tsd_t *tsd, uint64_t elapsed) {
+	assert(elapsed == TE_INVALID_ELAPSED);
+	tcache_event(tsd);
+}
+
+void
+tcache_gc_dalloc_event_handler(tsd_t *tsd, uint64_t elapsed) {
+	assert(elapsed == TE_INVALID_ELAPSED);
+	tcache_event(tsd);
+}
+
 void *
 tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
     tcache_t *tcache, cache_bin_t *cache_bin, szind_t binind,
diff --git a/src/thread_event.c b/src/thread_event.c
index 9de8376d..40c0487e 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -5,68 +5,59 @@
 #include "jemalloc/internal/thread_event.h"
 
 /*
- * Signatures for functions computing new / postponed event wait time.  New
+ * Signatures for event specific functions.  These functions should be defined
+ * by the modules owning each event.  The signatures here verify that the
+ * definitions follow the right format.
+ *
+ * The first two are functions computing new / postponed event wait time.  New
  * event wait time is the time till the next event if an event is currently
  * being triggered; postponed event wait time is the time till the next event
  * if an event should be triggered but needs to be postponed, e.g. when the TSD
  * is not nominal or during reentrancy.
  *
- * These event wait time computation functions should be defined by the modules
- * owning each event.  The signatures here are used to verify that the
- * definitions follow the right format.
+ * The third is the event handler function, which is called whenever an event
+ * is triggered.  The parameter is the elapsed time since the last time an
+ * event of the same type was triggered.
  */
 #define E(event, condition_unused, is_alloc_event_unused)		\
 uint64_t event##_new_event_wait(tsd_t *tsd);				\
-uint64_t event##_postponed_event_wait(tsd_t *tsd);
+uint64_t event##_postponed_event_wait(tsd_t *tsd);			\
+void event##_event_handler(tsd_t *tsd, uint64_t elapsed);
 
 ITERATE_OVER_ALL_EVENTS
 #undef E
 
-/* Event handler function signatures. */
+/* Signatures for internal functions fetching elapsed time. */
 #define E(event, condition_unused, is_alloc_event_unused)		\
-static void event##_event_handler(tsd_t *tsd);
+static uint64_t event##_fetch_elapsed(tsd_t *tsd);
 
 ITERATE_OVER_ALL_EVENTS
 #undef E
 
-/* Handler functions. */
-static void
-tcache_gc_event(tsd_t *tsd) {
-	tcache_t *tcache = tcache_get(tsd);
-	if (tcache != NULL) {
-		tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd);
-		tcache_event_hard(tsd, tcache_slow, tcache);
-	}
+static uint64_t
+tcache_gc_fetch_elapsed(tsd_t *tsd) {
+	return TE_INVALID_ELAPSED;
 }
 
-static void
-tcache_gc_event_handler(tsd_t *tsd) {
-	tcache_gc_event(tsd);
+static uint64_t
+tcache_gc_dalloc_fetch_elapsed(tsd_t *tsd) {
+	return TE_INVALID_ELAPSED;
 }
 
-static void
-tcache_gc_dalloc_event_handler(tsd_t *tsd) {
-	tcache_gc_event(tsd);
-}
-
-static void
-prof_sample_event_handler(tsd_t *tsd) {
+static uint64_t
+prof_sample_fetch_elapsed(tsd_t *tsd) {
 	uint64_t last_event = thread_allocated_last_event_get(tsd);
 	uint64_t last_sample_event = prof_sample_last_event_get(tsd);
 	prof_sample_last_event_set(tsd, last_event);
-	if (prof_idump_accum(tsd_tsdn(tsd), last_event - last_sample_event)) {
-		prof_idump(tsd_tsdn(tsd));
-	}
+	return last_event - last_sample_event;
 }
 
-static void
-stats_interval_event_handler(tsd_t *tsd) {
+static uint64_t
+stats_interval_fetch_elapsed(tsd_t *tsd) {
 	uint64_t last_event = thread_allocated_last_event_get(tsd);
 	uint64_t last_stats_event = stats_interval_last_event_get(tsd);
 	stats_interval_last_event_set(tsd, last_event);
-	if (stats_interval_accum(tsd, last_event - last_stats_event)) {
-		je_malloc_stats_print(NULL, NULL, opt_stats_interval_opts);
-	}
+	return last_event - last_stats_event;
 }
 
 /* Per event facilities done. */
@@ -286,7 +277,8 @@ te_event_trigger(tsd_t *tsd, te_ctx_t *ctx) {
 	if (is_alloc == alloc_event && condition &&			\
 	    is_##event##_triggered) {					\
 		assert(allow_event_trigger);				\
-		event##_event_handler(tsd);				\
+		uint64_t elapsed = event##_fetch_elapsed(tsd);		\
+		event##_event_handler(tsd, elapsed);			\
 	}
 
 	ITERATE_OVER_ALL_EVENTS

From 75dae934a167424f0dad663e9f96fefdac25ae1b Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 17 Apr 2020 14:10:18 -0700
Subject: [PATCH 1728/2608] Always initialize TE counters in TSD init

---
 src/tsd.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/src/tsd.c b/src/tsd.c
index c07a4bff..cc1b3ac8 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -221,14 +221,8 @@ tsd_state_set(tsd_t *tsd, uint8_t new_state) {
 	te_recompute_fast_threshold(tsd);
 }
 
-static bool
-tsd_data_init(tsd_t *tsd) {
-	/*
-	 * We initialize the rtree context first (before the tcache), since the
-	 * tcache initialization depends on it.
-	 */
-	rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd));
-
+static void
+tsd_prng_state_init(tsd_t *tsd) {
 	/*
 	 * A nondeterministic seed based on the address of tsd reduces
 	 * the likelihood of lockstep non-uniform cache index
@@ -238,10 +232,17 @@ tsd_data_init(tsd_t *tsd) {
 	 */
 	*tsd_prng_statep_get(tsd) = config_debug ? 0 :
 	    (uint64_t)(uintptr_t)tsd;
+}
 
-	/* event_init may use the prng state above. */
-	tsd_te_init(tsd);
-
+static bool
+tsd_data_init(tsd_t *tsd) {
+	/*
+	 * We initialize the rtree context first (before the tcache), since the
+	 * tcache initialization depends on it.
+	 */
+	rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd));
+	tsd_prng_state_init(tsd);
+	tsd_te_init(tsd); /* event_init may use the prng state above. */
 	return tsd_tcache_enabled_data_init(tsd);
 }
 
@@ -270,6 +271,8 @@ tsd_data_init_nocleanup(tsd_t *tsd) {
 	*tsd_arenas_tdata_bypassp_get(tsd) = true;
 	*tsd_tcache_enabledp_get_unsafe(tsd) = false;
 	*tsd_reentrancy_levelp_get(tsd) = 1;
+	tsd_prng_state_init(tsd);
+	tsd_te_init(tsd); /* event_init may use the prng state above. */
 	assert_tsd_data_cleanup_done(tsd);
 
 	return false;

From dcea2c0f8b91d045a58eed6b6b1935719c7acd4b Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 16 Apr 2020 11:50:29 -0700
Subject: [PATCH 1729/2608] Get rid of TSD -> thread event dependency

---
 include/jemalloc/internal/thread_event.h |  6 ++++++
 include/jemalloc/internal/tsd.h          | 18 ++++++------------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index 3a848829..5b5bb9fb 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -5,6 +5,12 @@
 
 /* "te" is short for "thread_event" */
 
+/*
+ * TE_MIN_START_WAIT should not exceed the minimal allocation usize.
+ */
+#define TE_MIN_START_WAIT ((uint64_t)1U)
+#define TE_MAX_START_WAIT UINT64_MAX
+
 /*
  * Maximum threshold on thread_(de)allocated_next_event_fast, so that there is
  * no need to check overflow in malloc fast path. (The allocation size in malloc
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 0f9ec12b..18bdb8fd 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -86,14 +86,14 @@ typedef ql_elm(tsd_t) tsd_link_t;
     /* reentrancy_level */	0,					\
     /* narenas_tdata */		0,					\
     /* thread_allocated_last_event */	0,				\
-    /* thread_allocated_next_event */	TE_MIN_START_WAIT,		\
+    /* thread_allocated_next_event */	0,				\
     /* thread_deallocated_last_event */	0,				\
-    /* thread_deallocated_next_event */	TE_MIN_START_WAIT,		\
-    /* tcache_gc_event_wait */		TE_MIN_START_WAIT,		\
-    /* tcache_gc_dalloc_event_wait */	TE_MIN_START_WAIT,		\
-    /* prof_sample_event_wait */	TE_MIN_START_WAIT,		\
+    /* thread_deallocated_next_event */	0,				\
+    /* tcache_gc_event_wait */		0,				\
+    /* tcache_gc_dalloc_event_wait */	0,				\
+    /* prof_sample_event_wait */	0,				\
     /* prof_sample_last_event */	0,				\
-    /* stats_interval_event_wait */	TE_MIN_START_WAIT,		\
+    /* stats_interval_event_wait */	0,				\
     /* stats_interval_last_event */	0,				\
     /* prof_tdata */		NULL,					\
     /* prng_state */		0,					\
@@ -131,12 +131,6 @@ typedef ql_elm(tsd_t) tsd_link_t;
     /* test data */		MALLOC_TEST_TSD_INITIALIZER
 
 
-/*
- * TE_MIN_START_WAIT should not exceed the minimal allocation usize.
- */
-#define TE_MIN_START_WAIT ((uint64_t)1U)
-#define TE_MAX_START_WAIT UINT64_MAX
-
 #define TSD_INITIALIZER {						\
     				TSD_DATA_SLOW_INITIALIZER		\
     /* state */			ATOMIC_INIT(tsd_state_uninitialized),	\

From 5dead37a9d38494341a6808bd09b8896282becc1 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 13 May 2020 12:20:30 -0700
Subject: [PATCH 1730/2608] Allow narenas:default.

This can be useful when you know you want to override some lower-priority
configuration setting with its default value, but don't know what that value
would be.
---
 src/jemalloc.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 78da45bc..d5d54e2d 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1282,9 +1282,17 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				}
 				CONF_CONTINUE;
 			}
-			CONF_HANDLE_UNSIGNED(opt_narenas, "narenas", 1,
-			    UINT_MAX, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX,
-			    false)
+			if (CONF_MATCH("narenas")) {
+				if (CONF_MATCH_VALUE("default")) {
+					opt_narenas = 0;
+					CONF_CONTINUE;
+				} else {
+					CONF_HANDLE_UNSIGNED(opt_narenas,
+					    "narenas", 1, UINT_MAX,
+					    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX,
+					    /* clip */ false)
+				}
+			}
 			if (CONF_MATCH("bin_shards")) {
 				const char *bin_shards_segment_cur = v;
 				size_t vlen_left = vlen;

From eda9c2858f267961d7e88cb3f3e841f197372125 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 13 May 2020 12:42:04 -0700
Subject: [PATCH 1731/2608] Edata: zero stack edatas before initializing.

This avoids some UB. No compilers take advantage of it for now, but no sense in
tempting fate.
---
 include/jemalloc/internal/edata.h | 7 +++++++
 src/emap.c                        | 2 +-
 src/extent_dss.c                  | 2 +-
 test/unit/rtree.c                 | 6 +++---
 4 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index 3a9ebc81..ac8d647e 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -507,6 +507,13 @@ edata_is_head_set(edata_t *edata, bool is_head) {
 	    ((uint64_t)is_head << EDATA_BITS_IS_HEAD_SHIFT);
 }
 
+/*
+ * Because this function is implemented as a sequence of bitfield modifications,
+ * even though each individual bit is properly initialized, we technically read
+ * uninitialized data within it.  This is mostly fine, since most callers get
+ * their edatas from zeroing sources, but callers who make stack edata_ts need
+ * to manually zero them.
+ */
 static inline void
 edata_init(edata_t *edata, unsigned arena_ind, void *addr, size_t size,
     bool slab, szind_t szind, size_t sn, extent_state_t state, bool zeroed,
diff --git a/src/emap.c b/src/emap.c
index 637d332b..ec1b4cdb 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -247,7 +247,7 @@ emap_split_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
 	 * and commit state, and head status.  This is a fake edata_t, used to
 	 * facilitate a lookup.
 	 */
-	edata_t lead;
+	edata_t lead = {0};
 	edata_init(&lead, 0U, edata_addr_get(edata), size_a, false, 0, 0,
 	    extent_state_active, false, false, false, EXTENT_NOT_HEAD);
 
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 18b68952..17a08227 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -198,7 +198,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					*commit = pages_decommit(ret, size);
 				}
 				if (*zero && *commit) {
-					edata_t edata;
+					edata_t edata = {0};
 					ehooks_t *ehooks = arena_get_ehooks(
 					    arena);
 
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 28029665..63d6e37b 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -33,7 +33,7 @@ TEST_END
 #undef SEED
 
 TEST_BEGIN(test_rtree_extrema) {
-	edata_t edata_a, edata_b;
+	edata_t edata_a = {0}, edata_b = {0};
 	edata_init(&edata_a, INVALID_ARENA_IND, NULL, SC_LARGE_MINCLASS,
 	    false, sz_size2index(SC_LARGE_MINCLASS), 0,
 	    extent_state_active, false, false, false, EXTENT_NOT_HEAD);
@@ -91,7 +91,7 @@ TEST_BEGIN(test_rtree_bits) {
 	uintptr_t keys[] = {PAGE, PAGE + 1,
 	    PAGE + (((uintptr_t)1) << LG_PAGE) - 1};
 
-	edata_t edata;
+	edata_t edata = {0};
 	edata_init(&edata, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, false, EXTENT_NOT_HEAD);
 
@@ -141,7 +141,7 @@ TEST_BEGIN(test_rtree_random) {
 	rtree_ctx_t rtree_ctx;
 	rtree_ctx_data_init(&rtree_ctx);
 
-	edata_t edata;
+	edata_t edata = {0};
 	edata_init(&edata, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, false, EXTENT_NOT_HEAD);
 

From 27f29e424ba9c4f8208e9dd98cb3d39eeb76d5ee Mon Sep 17 00:00:00 2001
From: Brooks Davis <brooks@one-eyed-alien.net>
Date: Fri, 1 May 2020 22:08:37 +0100
Subject: [PATCH 1732/2608] LQ_QUANTUM should be 4 on mips64 hardware.

This matches the ABI stack alignment requirements.
---
 include/jemalloc/internal/quantum.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/quantum.h b/include/jemalloc/internal/quantum.h
index 821086e9..11e870a3 100644
--- a/include/jemalloc/internal/quantum.h
+++ b/include/jemalloc/internal/quantum.h
@@ -34,7 +34,11 @@
 #    define LG_QUANTUM		3
 #  endif
 #  ifdef __mips__
-#    define LG_QUANTUM		3
+#    if defined(__mips_n32) || defined(__mips_n64)
+#      define LG_QUANTUM		4
+#    else
+#      define LG_QUANTUM		3
+#    endif
 #  endif
 #  ifdef __nios2__
 #    define LG_QUANTUM		3

From 33372cbd4075e70b1e365a6dd6708edd0d68c3a4 Mon Sep 17 00:00:00 2001
From: David Carlier <devnexen@gmail.com>
Date: Mon, 27 Apr 2020 20:28:17 +0100
Subject: [PATCH 1733/2608] cpu instruction spin wait for arm32/64

---
 configure.ac | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/configure.ac b/configure.ac
index f67fc3d7..98cb4bc8 100644
--- a/configure.ac
+++ b/configure.ac
@@ -416,6 +416,16 @@ case "${host_cpu}" in
 	    fi
 	fi
 	;;
+  aarch64|arm*)
+	HAVE_CPU_SPINWAIT=1
+    AC_CACHE_VAL([je_cv_yield],
+      [JE_COMPILABLE([yield instruction], [],
+                    [[__asm__ volatile("yield"); return 0;]],
+                    [je_cv_yield])])
+	if test "x${je_cv_yield}" = "xyes" ; then
+	CPU_SPINWAIT='__asm__ volatile("yield")'
+	fi
+    ;;
   *)
 	HAVE_CPU_SPINWAIT=0
 	;;

From 97b7a9cf7702371d5f9827f71b6daf7eafe890ec Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 11 May 2020 13:05:36 -0700
Subject: [PATCH 1734/2608] Add a fill/flush microbenchmark.

---
 Makefile.in              |  1 +
 test/stress/fill_flush.c | 77 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 78 insertions(+)
 create mode 100644 test/stress/fill_flush.c

diff --git a/Makefile.in b/Makefile.in
index d35b74b3..e7666fb1 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -289,6 +289,7 @@ CPP_SRCS :=
 TESTS_INTEGRATION_CPP :=
 endif
 TESTS_STRESS := $(srcroot)test/stress/microbench.c \
+	$(srcroot)test/stress/fill_flush.c \
 	$(srcroot)test/stress/large_microbench.c \
 	$(srcroot)test/stress/hookbench.c
 
diff --git a/test/stress/fill_flush.c b/test/stress/fill_flush.c
new file mode 100644
index 00000000..6ea3ff96
--- /dev/null
+++ b/test/stress/fill_flush.c
@@ -0,0 +1,77 @@
+#include "test/jemalloc_test.h"
+#include "test/bench.h"
+
+#define SMALL_ALLOC_SIZE 128
+#define LARGE_ALLOC_SIZE SC_LARGE_MINCLASS
+#define NALLOCS 1000
+
+/*
+ * We make this volatile so the 1-at-a-time variants can't leave the allocation
+ * in a register, just to try to get the cache behavior closer.
+ */
+void *volatile allocs[NALLOCS];
+
+static void
+array_alloc_dalloc_small(void) {
+	for (int i = 0; i < NALLOCS; i++) {
+		void *p = mallocx(SMALL_ALLOC_SIZE, 0);
+		assert_ptr_not_null(p, "mallocx shouldn't fail");
+		allocs[i] = p;
+	}
+	for (int i = 0; i < NALLOCS; i++) {
+		sdallocx(allocs[i], SMALL_ALLOC_SIZE, 0);
+	}
+}
+
+static void
+item_alloc_dalloc_small(void) {
+	for (int i = 0; i < NALLOCS; i++) {
+		void *p = mallocx(SMALL_ALLOC_SIZE, 0);
+		assert_ptr_not_null(p, "mallocx shouldn't fail");
+		allocs[i] = p;
+		sdallocx(allocs[i], SMALL_ALLOC_SIZE, 0);
+	}
+}
+
+TEST_BEGIN(test_array_vs_item_small) {
+	compare_funcs(1 * 1000, 10 * 1000,
+	    "array of small allocations", array_alloc_dalloc_small,
+	    "small item allocation", item_alloc_dalloc_small);
+}
+TEST_END
+
+static void
+array_alloc_dalloc_large(void) {
+	for (int i = 0; i < NALLOCS; i++) {
+		void *p = mallocx(LARGE_ALLOC_SIZE, 0);
+		assert_ptr_not_null(p, "mallocx shouldn't fail");
+		allocs[i] = p;
+	}
+	for (int i = 0; i < NALLOCS; i++) {
+		sdallocx(allocs[i], LARGE_ALLOC_SIZE, 0);
+	}
+}
+
+static void
+item_alloc_dalloc_large(void) {
+	for (int i = 0; i < NALLOCS; i++) {
+		void *p = mallocx(LARGE_ALLOC_SIZE, 0);
+		assert_ptr_not_null(p, "mallocx shouldn't fail");
+		allocs[i] = p;
+		sdallocx(allocs[i], LARGE_ALLOC_SIZE, 0);
+	}
+}
+
+TEST_BEGIN(test_array_vs_item_large) {
+	compare_funcs(100, 1000,
+	    "array of large allocations", array_alloc_dalloc_large,
+	    "large item allocation", item_alloc_dalloc_large);
+}
+TEST_END
+
+
+int main(void) {
+	return test_no_reentrancy(
+	    test_array_vs_item_small,
+	    test_array_vs_item_large);
+}

From 634afc4124100b5ff11e892481d912d56099be1a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 11 May 2020 12:08:19 -0700
Subject: [PATCH 1735/2608] Tcache: Make size computation configurable.

---
 include/jemalloc/internal/tcache_externs.h |  1 +
 src/jemalloc.c                             |  6 +++
 src/tcache.c                               | 60 ++++++++++++++++------
 3 files changed, 52 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 6eca928c..67fdc00c 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -3,6 +3,7 @@
 
 extern bool	opt_tcache;
 extern ssize_t	opt_lg_tcache_max;
+extern ssize_t	opt_lg_tcache_nslots_mul;
 
 /*
  * Number of tcache bins.  There are SC_NBINS small-object bins, plus 0 or more
diff --git a/src/jemalloc.c b/src/jemalloc.c
index d5d54e2d..fbec733e 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1373,6 +1373,12 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			CONF_HANDLE_BOOL(opt_tcache, "tcache")
 			CONF_HANDLE_SSIZE_T(opt_lg_tcache_max, "lg_tcache_max",
 			    -1, (sizeof(size_t) << 3) - 1)
+			/*
+			 * Anyone trying to set a value outside -16 to 16 is
+			 * deeply confused.
+			 */
+			CONF_HANDLE_SSIZE_T(opt_lg_tcache_nslots_mul,
+			    "lg_tcache_nslots_mul", -16, 16)
 
 			/*
 			 * The runtime option of oversize_threshold remains
diff --git a/src/tcache.c b/src/tcache.c
index f8188cb8..a18d91de 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -13,6 +13,16 @@
 bool	opt_tcache = true;
 ssize_t	opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
 
+/*
+ * We attempt to make the number of slots in a tcache bin for a given size class
+ * equal to the number of objects in a slab times some multiplier.  By default,
+ * the multiplier is 1/2 (i.e. we set the maximum number of objects in the
+ * tcache to half the number of objects in a slab).
+ * This is bounded by some other constraints as well, like the fact that it
+ * must be even, must be less than TCACHE_NSLOTS_SMALL_MAX, etc..
+ */
+ssize_t	opt_lg_tcache_nslots_mul = -1;
+
 cache_bin_info_t	*tcache_bin_info;
 
 /* Total stack size required (per tcache).  Include the padding above. */
@@ -778,6 +788,37 @@ tcaches_destroy(tsd_t *tsd, unsigned ind) {
 	}
 }
 
+static unsigned
+tcache_ncached_max_compute(szind_t szind) {
+	if (szind >= SC_NBINS) {
+		assert(szind < nhbins);
+		return TCACHE_NSLOTS_LARGE;
+	}
+	unsigned slab_nregs = bin_infos[szind].nregs;
+
+	unsigned candidate;
+	if (opt_lg_tcache_nslots_mul < 0) {
+		candidate = slab_nregs >> (-opt_lg_tcache_nslots_mul);
+	} else {
+		candidate = slab_nregs << opt_lg_tcache_nslots_mul;
+	}
+	if (candidate % 2 != 0) {
+		/*
+		 * We need the candidate size to be even -- we assume that we
+		 * can divide by two and get a positive number (e.g. when
+		 * flushing).
+		 */
+		++candidate;
+	}
+	if (candidate <= TCACHE_NSLOTS_SMALL_MIN) {
+		return TCACHE_NSLOTS_SMALL_MIN;
+	} else if (candidate <= TCACHE_NSLOTS_SMALL_MAX) {
+		return candidate;
+	} else {
+		return TCACHE_NSLOTS_SMALL_MAX;
+	}
+}
+
 bool
 tcache_boot(tsdn_t *tsdn, base_t *base) {
 	/* If necessary, clamp opt_lg_tcache_max. */
@@ -801,23 +842,12 @@ tcache_boot(tsdn_t *tsdn, base_t *base) {
 	if (tcache_bin_info == NULL) {
 		return true;
 	}
-	unsigned i, ncached_max;
-	for (i = 0; i < SC_NBINS; i++) {
-		if ((bin_infos[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) {
-			ncached_max = TCACHE_NSLOTS_SMALL_MIN;
-		} else if ((bin_infos[i].nregs << 1) <=
-		    TCACHE_NSLOTS_SMALL_MAX) {
-			ncached_max = bin_infos[i].nregs << 1;
-		} else {
-			ncached_max = TCACHE_NSLOTS_SMALL_MAX;
-		}
+	for (szind_t i = 0; i < nhbins; i++) {
+		unsigned ncached_max = tcache_ncached_max_compute(i);
 		cache_bin_info_init(&tcache_bin_info[i], ncached_max);
 	}
-	for (; i < nhbins; i++) {
-		cache_bin_info_init(&tcache_bin_info[i], TCACHE_NSLOTS_LARGE);
-	}
-	cache_bin_info_compute_alloc(tcache_bin_info, i, &tcache_bin_alloc_size,
-	    &tcache_bin_alloc_alignment);
+	cache_bin_info_compute_alloc(tcache_bin_info, nhbins,
+	    &tcache_bin_alloc_size, &tcache_bin_alloc_alignment);
 
 	return false;
 }

From b58dea8d1b6894eed1616a1264bb9c893194f770 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 11 May 2020 14:19:37 -0700
Subject: [PATCH 1736/2608] Cache bin: expose ncached_max publicly.

---
 include/jemalloc/internal/cache_bin.h | 10 ++++++++++
 src/cache_bin.c                       |  1 +
 2 files changed, 11 insertions(+)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index a56b4a1d..c016769d 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -20,6 +20,16 @@
  */
 typedef uint16_t cache_bin_sz_t;
 
+/*
+ * That implies the following value, for the maximum number of items in any
+ * individual bin.  The cache bins track their bounds looking just at the low
+ * bits of a pointer, compared against a cache_bin_sz_t.  So that's
+ *   1 << (sizeof(cache_bin_sz_t) * 8)
+ * bytes spread across pointer sized objects to get the maximum.
+ */
+#define CACHE_BIN_NCACHED_MAX (((size_t)1 << sizeof(cache_bin_sz_t) * 8) \
+    / sizeof(void *) - 1)
+
 /*
  * This lives inside the cache_bin (for locality reasons), and is initialized
  * alongside it, but is otherwise not modified by any cache bin operations.
diff --git a/src/cache_bin.c b/src/cache_bin.c
index 51b87499..1e26c4ef 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -6,6 +6,7 @@
 void
 cache_bin_info_init(cache_bin_info_t *info,
     cache_bin_sz_t ncached_max) {
+	assert(ncached_max <= CACHE_BIN_NCACHED_MAX);
 	size_t stack_size = (size_t)ncached_max * sizeof(void *);
 	assert(stack_size < ((size_t)1 << (sizeof(cache_bin_sz_t) * 8)));
 	info->ncached_max = (cache_bin_sz_t)ncached_max;

From 181093173d589569a846f2d5d4c9e8ca8fd57b5d Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 11 May 2020 15:03:06 -0700
Subject: [PATCH 1737/2608] Tcache: make slot sizing configurable.

---
 include/jemalloc/internal/tcache_externs.h |  8 +++-
 include/jemalloc/internal/tcache_types.h   | 17 --------
 src/jemalloc.c                             | 10 +++++
 src/tcache.c                               | 49 ++++++++++++++++++----
 test/unit/cache_bin.c                      |  5 ++-
 5 files changed, 59 insertions(+), 30 deletions(-)

diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 67fdc00c..e043ef49 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -1,9 +1,13 @@
 #ifndef JEMALLOC_INTERNAL_TCACHE_EXTERNS_H
 #define JEMALLOC_INTERNAL_TCACHE_EXTERNS_H
 
-extern bool	opt_tcache;
-extern ssize_t	opt_lg_tcache_max;
+extern bool opt_tcache;
+extern ssize_t opt_lg_tcache_max;
 extern ssize_t	opt_lg_tcache_nslots_mul;
+extern unsigned opt_tcache_nslots_small_min;
+extern unsigned opt_tcache_nslots_small_max;
+extern unsigned opt_tcache_nslots_large;
+extern ssize_t opt_lg_tcache_shift;
 
 /*
  * Number of tcache bins.  There are SC_NBINS small-object bins, plus 0 or more
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index cba86f43..34a0599c 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -17,23 +17,6 @@ typedef struct tcaches_s tcaches_t;
 #define TCACHE_STATE_PURGATORY		((tcache_t *)(uintptr_t)3)
 #define TCACHE_STATE_MAX		TCACHE_STATE_PURGATORY
 
-/*
- * Absolute minimum number of cache slots for each small bin.
- */
-#define TCACHE_NSLOTS_SMALL_MIN		20
-
-/*
- * Absolute maximum number of cache slots for each small bin in the thread
- * cache.  This is an additional constraint beyond that imposed as: twice the
- * number of regions per slab for this size class.
- *
- * This constant must be an even number.
- */
-#define TCACHE_NSLOTS_SMALL_MAX		200
-
-/* Number of cache slots for large size classes. */
-#define TCACHE_NSLOTS_LARGE		20
-
 /* (1U << opt_lg_tcache_max) is used to compute tcache_maxclass. */
 #define LG_TCACHE_MAXCLASS_DEFAULT	15
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index fbec733e..4f911e22 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1379,6 +1379,16 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			 */
 			CONF_HANDLE_SSIZE_T(opt_lg_tcache_nslots_mul,
 			    "lg_tcache_nslots_mul", -16, 16)
+			/* Ditto with values past 2048. */
+			CONF_HANDLE_UNSIGNED(opt_tcache_nslots_small_min,
+			    "tcache_nslots_small_min", 1, 2048,
+			    CONF_CHECK_MIN, CONF_CHECK_MAX, /* clip */ true)
+			CONF_HANDLE_UNSIGNED(opt_tcache_nslots_small_max,
+			    "tcache_nslots_small_max", 1, 2048,
+			    CONF_CHECK_MIN, CONF_CHECK_MAX, /* clip */ true)
+			CONF_HANDLE_UNSIGNED(opt_tcache_nslots_large,
+			    "tcache_nslots_large", 1, 2048,
+			    CONF_CHECK_MIN, CONF_CHECK_MAX, /* clip */ true)
 
 			/*
 			 * The runtime option of oversize_threshold remains
diff --git a/src/tcache.c b/src/tcache.c
index a18d91de..9586556e 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -10,8 +10,13 @@
 /******************************************************************************/
 /* Data. */
 
-bool	opt_tcache = true;
-ssize_t	opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
+bool opt_tcache = true;
+ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
+
+/* Reasonable defaults for min and max values. */
+unsigned opt_tcache_nslots_small_min = 20;
+unsigned opt_tcache_nslots_small_max = 200;
+unsigned opt_tcache_nslots_large = 20;
 
 /*
  * We attempt to make the number of slots in a tcache bin for a given size class
@@ -19,7 +24,7 @@ ssize_t	opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
  * the multiplier is 1/2 (i.e. we set the maximum number of objects in the
  * tcache to half the number of objects in a slab).
  * This is bounded by some other constraints as well, like the fact that it
- * must be even, must be less than TCACHE_NSLOTS_SMALL_MAX, etc..
+ * must be even, must be less than opt_tcache_nslots_small_max, etc..
  */
 ssize_t	opt_lg_tcache_nslots_mul = -1;
 
@@ -485,7 +490,6 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	tcache_slow->arena = NULL;
 	tcache_slow->dyn_alloc = mem;
 
-	assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
 	memset(tcache->bins, 0, sizeof(cache_bin_t) * nhbins);
 
 	size_t cur_offset = 0;
@@ -792,10 +796,37 @@ static unsigned
 tcache_ncached_max_compute(szind_t szind) {
 	if (szind >= SC_NBINS) {
 		assert(szind < nhbins);
-		return TCACHE_NSLOTS_LARGE;
+		return opt_tcache_nslots_large;
 	}
 	unsigned slab_nregs = bin_infos[szind].nregs;
 
+	/* We may modify these values; start with the opt versions. */
+	unsigned nslots_small_min = opt_tcache_nslots_small_min;
+	unsigned nslots_small_max = opt_tcache_nslots_small_max;
+
+	/*
+	 * Clamp values to meet our constraints -- even, nonzero, min < max, and
+	 * suitable for a cache bin size.
+	 */
+	if (opt_tcache_nslots_small_max > CACHE_BIN_NCACHED_MAX) {
+		nslots_small_max = CACHE_BIN_NCACHED_MAX;
+	}
+	if (nslots_small_min % 2 != 0) {
+		nslots_small_min++;
+	}
+	if (nslots_small_max % 2 != 0) {
+		nslots_small_max--;
+	}
+	if (nslots_small_min < 2) {
+		nslots_small_min = 2;
+	}
+	if (nslots_small_max < 2) {
+		nslots_small_max = 2;
+	}
+	if (nslots_small_min > nslots_small_max) {
+		nslots_small_min = nslots_small_max;
+	}
+
 	unsigned candidate;
 	if (opt_lg_tcache_nslots_mul < 0) {
 		candidate = slab_nregs >> (-opt_lg_tcache_nslots_mul);
@@ -810,12 +841,12 @@ tcache_ncached_max_compute(szind_t szind) {
 		 */
 		++candidate;
 	}
-	if (candidate <= TCACHE_NSLOTS_SMALL_MIN) {
-		return TCACHE_NSLOTS_SMALL_MIN;
-	} else if (candidate <= TCACHE_NSLOTS_SMALL_MAX) {
+	if (candidate <= nslots_small_min) {
+		return nslots_small_min;
+	} else if (candidate <= nslots_small_max) {
 		return candidate;
 	} else {
-		return TCACHE_NSLOTS_SMALL_MAX;
+		return nslots_small_max;
 	}
 }
 
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index cbd8ce02..43fe8c6c 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -53,12 +53,13 @@ do_flush_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 }
 
 TEST_BEGIN(test_cache_bin) {
+	const int ncached_max = 100;
 	bool success;
 	void *ptr;
 
 	cache_bin_t bin;
 	cache_bin_info_t info;
-	cache_bin_info_init(&info, TCACHE_NSLOTS_SMALL_MAX);
+	cache_bin_info_init(&info, ncached_max);
 
 	size_t size;
 	size_t alignment;
@@ -74,7 +75,7 @@ TEST_BEGIN(test_cache_bin) {
 	assert_zu_eq(cur_offset, size, "Should use all requested memory");
 
 	/* Initialize to empty; should then have 0 elements. */
-	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(&info);
+	expect_d_eq(ncached_max, cache_bin_info_ncached_max(&info), "");
 	expect_true(cache_bin_ncached_get(&bin, &info) == 0, "");
 	expect_true(cache_bin_low_water_get(&bin, &info) == 0, "");
 

From 10b96f635190cd8e27ed73f6b44293a7357e4013 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 11 May 2020 15:27:50 -0700
Subject: [PATCH 1738/2608] Tcache: Remove some unused gc constants.

---
 include/jemalloc/internal/tcache_types.h | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index 34a0599c..0806df9c 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -20,17 +20,6 @@ typedef struct tcaches_s tcaches_t;
 /* (1U << opt_lg_tcache_max) is used to compute tcache_maxclass. */
 #define LG_TCACHE_MAXCLASS_DEFAULT	15
 
-/*
- * TCACHE_GC_SWEEP is the approximate number of allocation events between
- * full GC sweeps.  Integer rounding may cause the actual number to be
- * slightly higher, since GC is performed incrementally.
- */
-#define TCACHE_GC_SWEEP			8192
-
-/* Number of tcache deallocation events between incremental GCs. */
-#define TCACHE_GC_INCR							\
-    ((TCACHE_GC_SWEEP / SC_NBINS) + ((TCACHE_GC_SWEEP / SC_NBINS == 0) ? 0 : 1))
-
 /* Number of allocation bytes between tcache incremental GCs. */
 #define TCACHE_GC_INCR_BYTES 65536U
 

From ec0b5795639fe96883366691e0380eeb0845836b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 11 May 2020 15:33:23 -0700
Subject: [PATCH 1739/2608] Tcache: Privatize opt_lg_tcache_max default.

---
 include/jemalloc/internal/tcache_types.h | 3 ---
 src/tcache.c                             | 7 ++++++-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index 0806df9c..c8fd4c3e 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -17,9 +17,6 @@ typedef struct tcaches_s tcaches_t;
 #define TCACHE_STATE_PURGATORY		((tcache_t *)(uintptr_t)3)
 #define TCACHE_STATE_MAX		TCACHE_STATE_PURGATORY
 
-/* (1U << opt_lg_tcache_max) is used to compute tcache_maxclass. */
-#define LG_TCACHE_MAXCLASS_DEFAULT	15
-
 /* Number of allocation bytes between tcache incremental GCs. */
 #define TCACHE_GC_INCR_BYTES 65536U
 
diff --git a/src/tcache.c b/src/tcache.c
index 9586556e..0366149d 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -11,7 +11,12 @@
 /* Data. */
 
 bool opt_tcache = true;
-ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
+
+/*
+ * (1U << opt_lg_tcache_max) is used to compute tcache_maxclass.  This choice
+ * (32kb by default) works well as a default in practice.
+ */
+ssize_t opt_lg_tcache_max = 15;
 
 /* Reasonable defaults for min and max values. */
 unsigned opt_tcache_nslots_small_min = 20;

From d338dd45d7402df287adb10e82ca98be831ac16b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 11 May 2020 15:53:30 -0700
Subject: [PATCH 1740/2608] Tcache: Make incremental gc bytes configurable.

---
 include/jemalloc/internal/tcache_externs.h |  1 +
 include/jemalloc/internal/tcache_types.h   |  3 ---
 include/jemalloc/internal/thread_event.h   |  4 ++--
 src/jemalloc.c                             |  4 ++++
 src/tcache.c                               | 10 ++++++++--
 test/stress/fill_flush.c                   |  1 -
 6 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index e043ef49..1924fd9c 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -8,6 +8,7 @@ extern unsigned opt_tcache_nslots_small_min;
 extern unsigned opt_tcache_nslots_small_max;
 extern unsigned opt_tcache_nslots_large;
 extern ssize_t opt_lg_tcache_shift;
+extern size_t opt_tcache_gc_incr_bytes;
 
 /*
  * Number of tcache bins.  There are SC_NBINS small-object bins, plus 0 or more
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index c8fd4c3e..fb311e72 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -17,9 +17,6 @@ typedef struct tcaches_s tcaches_t;
 #define TCACHE_STATE_PURGATORY		((tcache_t *)(uintptr_t)3)
 #define TCACHE_STATE_MAX		TCACHE_STATE_PURGATORY
 
-/* Number of allocation bytes between tcache incremental GCs. */
-#define TCACHE_GC_INCR_BYTES 65536U
-
 /* Used in TSD static initializer only. Real init in tsd_tcache_data_init(). */
 #define TCACHE_ZERO_INITIALIZER {0}
 #define TCACHE_SLOW_ZERO_INITIALIZER {0}
diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index 5b5bb9fb..2fcaa88a 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -53,10 +53,10 @@ void tsd_te_init(tsd_t *tsd);
  *  E(event,		(condition), is_alloc_event)
  */
 #define ITERATE_OVER_ALL_EVENTS						\
-    E(tcache_gc,	(TCACHE_GC_INCR_BYTES > 0), true)		\
+    E(tcache_gc,	(opt_tcache_gc_incr_bytes > 0), true)		\
     E(prof_sample,	(config_prof && opt_prof), true)	    	\
     E(stats_interval,	(opt_stats_interval >= 0), true)	    	\
-    E(tcache_gc_dalloc,	(TCACHE_GC_INCR_BYTES > 0), false)
+    E(tcache_gc_dalloc,	(opt_tcache_gc_incr_bytes > 0), false)
 
 #define E(event, condition_unused, is_alloc_event_unused)		\
     C(event##_event_wait)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 4f911e22..068a840a 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1389,6 +1389,10 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			CONF_HANDLE_UNSIGNED(opt_tcache_nslots_large,
 			    "tcache_nslots_large", 1, 2048,
 			    CONF_CHECK_MIN, CONF_CHECK_MAX, /* clip */ true)
+			CONF_HANDLE_SIZE_T(opt_tcache_gc_incr_bytes,
+			    "tcache_gc_incr_bytes", 1024, SIZE_T_MAX,
+			    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX,
+			    /* clip */ true)
 
 			/*
 			 * The runtime option of oversize_threshold remains
diff --git a/src/tcache.c b/src/tcache.c
index 0366149d..9b4a7b7a 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -33,6 +33,12 @@ unsigned opt_tcache_nslots_large = 20;
  */
 ssize_t	opt_lg_tcache_nslots_mul = -1;
 
+/*
+ * Number of allocation bytes between tcache incremental GCs.  Again, this
+ * default just seems to work well; more tuning is possible.
+ */
+size_t opt_tcache_gc_incr_bytes = 65536;
+
 cache_bin_info_t	*tcache_bin_info;
 
 /* Total stack size required (per tcache).  Include the padding above. */
@@ -62,7 +68,7 @@ tcache_salloc(tsdn_t *tsdn, const void *ptr) {
 
 uint64_t
 tcache_gc_new_event_wait(tsd_t *tsd) {
-	return TCACHE_GC_INCR_BYTES;
+	return opt_tcache_gc_incr_bytes;
 }
 
 uint64_t
@@ -72,7 +78,7 @@ tcache_gc_postponed_event_wait(tsd_t *tsd) {
 
 uint64_t
 tcache_gc_dalloc_new_event_wait(tsd_t *tsd) {
-	return TCACHE_GC_INCR_BYTES;
+	return opt_tcache_gc_incr_bytes;
 }
 
 uint64_t
diff --git a/test/stress/fill_flush.c b/test/stress/fill_flush.c
index 6ea3ff96..a2db044d 100644
--- a/test/stress/fill_flush.c
+++ b/test/stress/fill_flush.c
@@ -69,7 +69,6 @@ TEST_BEGIN(test_array_vs_item_large) {
 }
 TEST_END
 
-
 int main(void) {
 	return test_no_reentrancy(
 	    test_array_vs_item_small,

From ee72bf1cfd236d6e076d9d9bdfcb09787016d62b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 11 May 2020 16:24:17 -0700
Subject: [PATCH 1741/2608] Tcache: Add tcache gc delay option.

This can reduce flushing frequency for small size classes.
---
 include/jemalloc/internal/tcache_externs.h |   1 +
 include/jemalloc/internal/tcache_structs.h |   5 +
 src/jemalloc.c                             |   4 +
 src/tcache.c                               | 116 ++++++++++++++++-----
 4 files changed, 99 insertions(+), 27 deletions(-)

diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 1924fd9c..1ee63193 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -9,6 +9,7 @@ extern unsigned opt_tcache_nslots_small_max;
 extern unsigned opt_tcache_nslots_large;
 extern ssize_t opt_lg_tcache_shift;
 extern size_t opt_tcache_gc_incr_bytes;
+extern size_t opt_tcache_gc_delay_bytes;
 
 /*
  * Number of tcache bins.  There are SC_NBINS small-object bins, plus 0 or more
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index 1c9d4db0..331bd247 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -36,6 +36,11 @@ struct tcache_slow_s {
 	uint8_t		lg_fill_div[SC_NBINS];
 	/* For small bins, whether has been refilled since last GC. */
 	bool		bin_refilled[SC_NBINS];
+	/*
+	 * For small bins, the number of items we can pretend to flush before
+	 * actually flushing.
+	 */
+	uint8_t		bin_flush_delay_items[SC_NBINS];
 	/*
 	 * The start of the allocation containing the dynamic allocation for
 	 * either the cache bins alone, or the cache bin memory as well as this
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 068a840a..2903a412 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1393,6 +1393,10 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    "tcache_gc_incr_bytes", 1024, SIZE_T_MAX,
 			    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX,
 			    /* clip */ true)
+			CONF_HANDLE_SIZE_T(opt_tcache_gc_delay_bytes,
+			    "tcache_gc_delay_bytes", 0, SIZE_T_MAX,
+			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
+			    /* clip */ false)
 
 			/*
 			 * The runtime option of oversize_threshold remains
diff --git a/src/tcache.c b/src/tcache.c
index 9b4a7b7a..363a5b36 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -33,6 +33,20 @@ unsigned opt_tcache_nslots_large = 20;
  */
 ssize_t	opt_lg_tcache_nslots_mul = -1;
 
+/*
+ * With default settings, we may end up flushing small bins frequently with
+ * small flush amounts.  To limit this tendency, we can set a number of bytes to
+ * "delay" by.  If we try to flush N M-byte items, we decrease that size-class's
+ * delay by N * M.  So, if delay is 1024 and we're looking at the 64-byte size
+ * class, we won't do any flushing until we've been asked to flush 1024/64 == 16
+ * items.  This can happen in any configuration (i.e. being asked to flush 16
+ * items once, or 4 items 4 times).
+ *
+ * Practically, this is stored as a count of items in a uint8_t, so the
+ * effective maximum value for a size class is 255 * sz.
+ */
+size_t opt_tcache_gc_delay_bytes = 0;
+
 /*
  * Number of allocation bytes between tcache incremental GCs.  Again, this
  * default just seems to work well; more tuning is possible.
@@ -86,6 +100,67 @@ tcache_gc_dalloc_postponed_event_wait(tsd_t *tsd) {
 	return TE_MIN_START_WAIT;
 }
 
+static uint8_t
+tcache_gc_item_delay_compute(szind_t szind) {
+	assert(szind < SC_NBINS);
+	size_t sz = sz_index2size(szind);
+	size_t item_delay = opt_tcache_gc_delay_bytes / sz;
+	size_t delay_max = ZU(1)
+	    << (sizeof(((tcache_slow_t *)NULL)->bin_flush_delay_items[0]) * 8);
+	if (item_delay >= delay_max) {
+		item_delay = delay_max - 1;
+	}
+	return item_delay;
+}
+
+static void
+tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
+    szind_t szind) {
+	/* Aim to flush 3/4 of items below low-water. */
+	assert(szind < SC_NBINS);
+
+	cache_bin_t *cache_bin = &tcache->bins[szind];
+	cache_bin_sz_t ncached = cache_bin_ncached_get(cache_bin,
+	    &tcache_bin_info[szind]);
+	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
+	    &tcache_bin_info[szind]);
+	assert(!tcache_slow->bin_refilled[szind]);
+
+	size_t nflush = low_water - (low_water >> 2);
+	if (nflush < tcache_slow->bin_flush_delay_items[szind]) {
+		tcache_slow->bin_flush_delay_items[szind] -= nflush;
+		return;
+	} else {
+		tcache_slow->bin_flush_delay_items[szind]
+		    = tcache_gc_item_delay_compute(szind);
+	}
+
+	tcache_bin_flush_small(tsd, tcache, cache_bin, szind, ncached - nflush);
+
+	/*
+	 * Reduce fill count by 2X.  Limit lg_fill_div such that
+	 * the fill count is always at least 1.
+	 */
+	if ((cache_bin_info_ncached_max(&tcache_bin_info[szind])
+	    >> (tcache_slow->lg_fill_div[szind] + 1)) >= 1) {
+		tcache_slow->lg_fill_div[szind]++;
+	}
+}
+
+static void
+tcache_gc_large(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
+    szind_t szind) {
+	/* Like the small GC; flush 3/4 of untouched items. */
+	assert(szind >= SC_NBINS);
+	cache_bin_t *cache_bin = &tcache->bins[szind];
+	cache_bin_sz_t ncached = cache_bin_ncached_get(cache_bin,
+	    &tcache_bin_info[szind]);
+	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
+	    &tcache_bin_info[szind]);
+	tcache_bin_flush_large(tsd, tcache, cache_bin, szind,
+	    ncached - low_water + (low_water >> 2));
+}
+
 static void
 tcache_event(tsd_t *tsd) {
 	tcache_t *tcache = tcache_get(tsd);
@@ -94,45 +169,28 @@ tcache_event(tsd_t *tsd) {
 	}
 
 	tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd);
-	szind_t binind = tcache_slow->next_gc_bin;
-	bool is_small = (binind < SC_NBINS);
-	cache_bin_t *cache_bin = &tcache->bins[binind];
+	szind_t szind = tcache_slow->next_gc_bin;
+	bool is_small = (szind < SC_NBINS);
+	cache_bin_t *cache_bin = &tcache->bins[szind];
 
 	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
-	    &tcache_bin_info[binind]);
-	cache_bin_sz_t ncached = cache_bin_ncached_get(cache_bin,
-	    &tcache_bin_info[binind]);
+	    &tcache_bin_info[szind]);
 	if (low_water > 0) {
-		/*
-		 * Flush (ceiling) 3/4 of the objects below the low water mark.
-		 */
 		if (is_small) {
-			assert(!tcache_slow->bin_refilled[binind]);
-			tcache_bin_flush_small(tsd, tcache, cache_bin, binind,
-			    ncached - low_water + (low_water >> 2));
-			/*
-			 * Reduce fill count by 2X.  Limit lg_fill_div such that
-			 * the fill count is always at least 1.
-			 */
-			if ((cache_bin_info_ncached_max(
-			    &tcache_bin_info[binind]) >>
-			    (tcache_slow->lg_fill_div[binind] + 1)) >= 1) {
-				tcache_slow->lg_fill_div[binind]++;
-			}
+			tcache_gc_small(tsd, tcache_slow, tcache, szind);
 		} else {
-			tcache_bin_flush_large(tsd, tcache, cache_bin, binind,
-			     ncached - low_water + (low_water >> 2));
+			tcache_gc_large(tsd, tcache_slow, tcache, szind);
 		}
-	} else if (is_small && tcache_slow->bin_refilled[binind]) {
+	} else if (is_small && tcache_slow->bin_refilled[szind]) {
 		assert(low_water == 0);
 		/*
 		 * Increase fill count by 2X for small bins.  Make sure
 		 * lg_fill_div stays greater than 0.
 		 */
-		if (tcache_slow->lg_fill_div[binind] > 1) {
-			tcache_slow->lg_fill_div[binind]--;
+		if (tcache_slow->lg_fill_div[szind] > 1) {
+			tcache_slow->lg_fill_div[szind]--;
 		}
-		tcache_slow->bin_refilled[binind] = false;
+		tcache_slow->bin_refilled[szind] = false;
 	}
 	cache_bin_low_water_set(cache_bin);
 
@@ -519,6 +577,10 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	    &cur_offset);
 	/* Sanity check that the whole stack is used. */
 	assert(cur_offset == tcache_bin_alloc_size);
+	for (unsigned i = 0; i < SC_NBINS; i++) {
+		tcache_slow->bin_flush_delay_items[i]
+		    = tcache_gc_item_delay_compute(i);
+	}
 }
 
 /* Initialize auto tcache (embedded in TSD). */

From 7503b5b33a9ea446c30e3c51f6ad68660fa6e931 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 13 May 2020 10:36:27 -0700
Subject: [PATCH 1742/2608] Stats, CTL: Expose new tcache settings.

---
 src/ctl.c    | 28 +++++++++++++++++++++++++---
 src/stats.c  |  6 ++++++
 src/tcache.c | 12 ++++++------
 3 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index c3c029ff..c5964d85 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -103,9 +103,15 @@ CTL_PROTO(opt_zero)
 CTL_PROTO(opt_utrace)
 CTL_PROTO(opt_xmalloc)
 CTL_PROTO(opt_tcache)
+CTL_PROTO(opt_lg_tcache_max)
+CTL_PROTO(opt_tcache_nslots_small_min)
+CTL_PROTO(opt_tcache_nslots_small_max)
+CTL_PROTO(opt_tcache_nslots_large)
+CTL_PROTO(opt_lg_tcache_nslots_mul)
+CTL_PROTO(opt_tcache_gc_incr_bytes)
+CTL_PROTO(opt_tcache_gc_delay_bytes)
 CTL_PROTO(opt_thp)
 CTL_PROTO(opt_lg_extent_max_active_fit)
-CTL_PROTO(opt_lg_tcache_max)
 CTL_PROTO(opt_prof)
 CTL_PROTO(opt_prof_prefix)
 CTL_PROTO(opt_prof_active)
@@ -340,9 +346,17 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("utrace"),	CTL(opt_utrace)},
 	{NAME("xmalloc"),	CTL(opt_xmalloc)},
 	{NAME("tcache"),	CTL(opt_tcache)},
+	{NAME("lg_tcache_max"),	CTL(opt_lg_tcache_max)},
+	{NAME("tcache_nslots_small_min"),
+		CTL(opt_tcache_nslots_small_min)},
+	{NAME("tcache_nslots_small_max"),
+		CTL(opt_tcache_nslots_small_max)},
+	{NAME("tcache_nslots_large"),	CTL(opt_tcache_nslots_large)},
+	{NAME("lg_tcache_nslots_mul"),	CTL(opt_lg_tcache_nslots_mul)},
+	{NAME("tcache_gc_incr_bytes"),	CTL(opt_tcache_gc_incr_bytes)},
+	{NAME("tcache_gc_delay_bytes"),	CTL(opt_tcache_gc_delay_bytes)},
 	{NAME("thp"),		CTL(opt_thp)},
 	{NAME("lg_extent_max_active_fit"), CTL(opt_lg_extent_max_active_fit)},
-	{NAME("lg_tcache_max"),	CTL(opt_lg_tcache_max)},
 	{NAME("prof"),		CTL(opt_prof)},
 	{NAME("prof_prefix"),	CTL(opt_prof_prefix)},
 	{NAME("prof_active"),	CTL(opt_prof_active)},
@@ -1793,10 +1807,18 @@ CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool)
 CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool)
 CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool)
 CTL_RO_NL_GEN(opt_tcache, opt_tcache, bool)
+CTL_RO_NL_GEN(opt_lg_tcache_max, opt_lg_tcache_max, ssize_t)
+CTL_RO_NL_GEN(opt_tcache_nslots_small_min, opt_tcache_nslots_small_min,
+    unsigned)
+CTL_RO_NL_GEN(opt_tcache_nslots_small_max, opt_tcache_nslots_small_max,
+    unsigned)
+CTL_RO_NL_GEN(opt_tcache_nslots_large, opt_tcache_nslots_large, unsigned)
+CTL_RO_NL_GEN(opt_lg_tcache_nslots_mul, opt_lg_tcache_nslots_mul, ssize_t)
+CTL_RO_NL_GEN(opt_tcache_gc_incr_bytes, opt_tcache_gc_incr_bytes, size_t)
+CTL_RO_NL_GEN(opt_tcache_gc_delay_bytes, opt_tcache_gc_delay_bytes, size_t)
 CTL_RO_NL_GEN(opt_thp, thp_mode_names[opt_thp], const char *)
 CTL_RO_NL_GEN(opt_lg_extent_max_active_fit, opt_lg_extent_max_active_fit,
     size_t)
-CTL_RO_NL_GEN(opt_lg_tcache_max, opt_lg_tcache_max, ssize_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *)
 CTL_RO_NL_CGEN(config_prof, opt_prof_active, opt_prof_active, bool)
diff --git a/src/stats.c b/src/stats.c
index 42e4a1ca..8be69ca6 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1107,6 +1107,12 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_BOOL("xmalloc")
 	OPT_WRITE_BOOL("tcache")
 	OPT_WRITE_SSIZE_T("lg_tcache_max")
+	OPT_WRITE_UNSIGNED("tcache_nslots_small_min")
+	OPT_WRITE_UNSIGNED("tcache_nslots_small_max")
+	OPT_WRITE_UNSIGNED("tcache_nslots_large")
+	OPT_WRITE_SSIZE_T("lg_tcache_nslots_mul")
+	OPT_WRITE_SIZE_T("tcache_gc_incr_bytes")
+	OPT_WRITE_SIZE_T("tcache_gc_delay_bytes")
 	OPT_WRITE_CHAR_P("thp")
 	OPT_WRITE_BOOL("prof")
 	OPT_WRITE_CHAR_P("prof_prefix")
diff --git a/src/tcache.c b/src/tcache.c
index 363a5b36..c9cb7853 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -33,6 +33,12 @@ unsigned opt_tcache_nslots_large = 20;
  */
 ssize_t	opt_lg_tcache_nslots_mul = -1;
 
+/*
+ * Number of allocation bytes between tcache incremental GCs.  Again, this
+ * default just seems to work well; more tuning is possible.
+ */
+size_t opt_tcache_gc_incr_bytes = 65536;
+
 /*
  * With default settings, we may end up flushing small bins frequently with
  * small flush amounts.  To limit this tendency, we can set a number of bytes to
@@ -47,12 +53,6 @@ ssize_t	opt_lg_tcache_nslots_mul = -1;
  */
 size_t opt_tcache_gc_delay_bytes = 0;
 
-/*
- * Number of allocation bytes between tcache incremental GCs.  Again, this
- * default just seems to work well; more tuning is possible.
- */
-size_t opt_tcache_gc_incr_bytes = 65536;
-
 cache_bin_info_t	*tcache_bin_info;
 
 /* Total stack size required (per tcache).  Include the padding above. */

From 6cdac3c573de86c8d59d69fca8f1778bdbec25e0 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 13 May 2020 15:32:18 -0700
Subject: [PATCH 1743/2608] Tcache: Make flush fractions configurable.

---
 include/jemalloc/internal/tcache_externs.h |  2 ++
 include/jemalloc/internal/tcache_inlines.h |  4 ++--
 src/ctl.c                                  | 10 ++++++++++
 src/jemalloc.c                             |  6 ++++++
 src/stats.c                                |  2 ++
 src/tcache.c                               |  7 +++++++
 6 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 1ee63193..f044d322 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -10,6 +10,8 @@ extern unsigned opt_tcache_nslots_large;
 extern ssize_t opt_lg_tcache_shift;
 extern size_t opt_tcache_gc_incr_bytes;
 extern size_t opt_tcache_gc_delay_bytes;
+extern unsigned opt_lg_tcache_flush_small_div;
+extern unsigned opt_lg_tcache_flush_large_div;
 
 /*
  * Number of tcache bins.  There are SC_NBINS small-object bins, plus 0 or more
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 5d49c4e3..1cba9186 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -110,7 +110,7 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	cache_bin_t *bin = &tcache->bins[binind];
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
 		unsigned remain = cache_bin_info_ncached_max(
-		    &tcache_bin_info[binind]) >> 1;
+		    &tcache_bin_info[binind]) >> opt_lg_tcache_flush_small_div;
 		tcache_bin_flush_small(tsd, tcache, bin, binind, remain);
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
 		assert(ret);
@@ -128,7 +128,7 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	cache_bin_t *bin = &tcache->bins[binind];
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
 		unsigned remain = cache_bin_info_ncached_max(
-		    &tcache_bin_info[binind]) >> 1;
+		    &tcache_bin_info[binind]) >> opt_lg_tcache_flush_large_div;
 		tcache_bin_flush_large(tsd, tcache, bin, binind, remain);
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
 		assert(ret);
diff --git a/src/ctl.c b/src/ctl.c
index c5964d85..be8be10f 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -110,6 +110,8 @@ CTL_PROTO(opt_tcache_nslots_large)
 CTL_PROTO(opt_lg_tcache_nslots_mul)
 CTL_PROTO(opt_tcache_gc_incr_bytes)
 CTL_PROTO(opt_tcache_gc_delay_bytes)
+CTL_PROTO(opt_lg_tcache_flush_small_div)
+CTL_PROTO(opt_lg_tcache_flush_large_div)
 CTL_PROTO(opt_thp)
 CTL_PROTO(opt_lg_extent_max_active_fit)
 CTL_PROTO(opt_prof)
@@ -355,6 +357,10 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("lg_tcache_nslots_mul"),	CTL(opt_lg_tcache_nslots_mul)},
 	{NAME("tcache_gc_incr_bytes"),	CTL(opt_tcache_gc_incr_bytes)},
 	{NAME("tcache_gc_delay_bytes"),	CTL(opt_tcache_gc_delay_bytes)},
+	{NAME("lg_tcache_flush_small_div"),
+		CTL(opt_lg_tcache_flush_small_div)},
+	{NAME("lg_tcache_flush_large_div"),
+		CTL(opt_lg_tcache_flush_large_div)},
 	{NAME("thp"),		CTL(opt_thp)},
 	{NAME("lg_extent_max_active_fit"), CTL(opt_lg_extent_max_active_fit)},
 	{NAME("prof"),		CTL(opt_prof)},
@@ -1816,6 +1822,10 @@ CTL_RO_NL_GEN(opt_tcache_nslots_large, opt_tcache_nslots_large, unsigned)
 CTL_RO_NL_GEN(opt_lg_tcache_nslots_mul, opt_lg_tcache_nslots_mul, ssize_t)
 CTL_RO_NL_GEN(opt_tcache_gc_incr_bytes, opt_tcache_gc_incr_bytes, size_t)
 CTL_RO_NL_GEN(opt_tcache_gc_delay_bytes, opt_tcache_gc_delay_bytes, size_t)
+CTL_RO_NL_GEN(opt_lg_tcache_flush_small_div, opt_lg_tcache_flush_small_div,
+    unsigned)
+CTL_RO_NL_GEN(opt_lg_tcache_flush_large_div, opt_lg_tcache_flush_large_div,
+    unsigned)
 CTL_RO_NL_GEN(opt_thp, thp_mode_names[opt_thp], const char *)
 CTL_RO_NL_GEN(opt_lg_extent_max_active_fit, opt_lg_extent_max_active_fit,
     size_t)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 2903a412..74355d40 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1397,6 +1397,12 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    "tcache_gc_delay_bytes", 0, SIZE_T_MAX,
 			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
 			    /* clip */ false)
+			CONF_HANDLE_UNSIGNED(opt_lg_tcache_flush_small_div,
+			    "lg_tcache_flush_small_div", 1, 16,
+			    CONF_CHECK_MIN, CONF_CHECK_MAX, /* clip */ true)
+			CONF_HANDLE_UNSIGNED(opt_lg_tcache_flush_large_div,
+			    "lg_tcache_flush_large_div", 1, 16,
+			    CONF_CHECK_MIN, CONF_CHECK_MAX, /* clip */ true)
 
 			/*
 			 * The runtime option of oversize_threshold remains
diff --git a/src/stats.c b/src/stats.c
index 8be69ca6..fb88e5a6 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1113,6 +1113,8 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_SSIZE_T("lg_tcache_nslots_mul")
 	OPT_WRITE_SIZE_T("tcache_gc_incr_bytes")
 	OPT_WRITE_SIZE_T("tcache_gc_delay_bytes")
+	OPT_WRITE_UNSIGNED("lg_tcache_flush_small_div")
+	OPT_WRITE_UNSIGNED("lg_tcache_flush_large_div")
 	OPT_WRITE_CHAR_P("thp")
 	OPT_WRITE_BOOL("prof")
 	OPT_WRITE_CHAR_P("prof_prefix")
diff --git a/src/tcache.c b/src/tcache.c
index c9cb7853..2513ca33 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -53,6 +53,13 @@ size_t opt_tcache_gc_incr_bytes = 65536;
  */
 size_t opt_tcache_gc_delay_bytes = 0;
 
+/*
+ * When a cache bin is flushed because it's full, how much of it do we flush?
+ * By default, we flush half the maximum number of items.
+ */
+unsigned opt_lg_tcache_flush_small_div = 1;
+unsigned opt_lg_tcache_flush_large_div = 1;
+
 cache_bin_info_t	*tcache_bin_info;
 
 /* Total stack size required (per tcache).  Include the padding above. */

From cd28e60337d3e4ef183f407df734f0095a3c1352 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 27 May 2020 17:29:25 -0700
Subject: [PATCH 1744/2608] Don't warn on uniform initialization.

---
 configure.ac | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/configure.ac b/configure.ac
index 98cb4bc8..787ef1b0 100644
--- a/configure.ac
+++ b/configure.ac
@@ -256,6 +256,8 @@ if test "x$GCC" = "xyes" ; then
   dnl has lots of nested structs).  See the discussion at.
   dnl https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53119
   JE_CFLAGS_ADD([-Wno-missing-braces])
+  dnl This one too.
+  JE_CFLAGS_ADD([-Wno-missing-field-initializers])
   JE_CFLAGS_ADD([-pipe])
   JE_CFLAGS_ADD([-g3])
 elif test "x$je_cv_msvc" = "xyes" ; then

From 8da0896b7913470250a0220504822028e2aa8f2a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 27 May 2020 17:43:23 -0700
Subject: [PATCH 1745/2608] Tcache: Make an integer conversion explicit.

---
 src/tcache.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index 2513ca33..ff428842 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -142,7 +142,8 @@ tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 		    = tcache_gc_item_delay_compute(szind);
 	}
 
-	tcache_bin_flush_small(tsd, tcache, cache_bin, szind, ncached - nflush);
+	tcache_bin_flush_small(tsd, tcache, cache_bin, szind,
+	    (unsigned)(ncached - nflush));
 
 	/*
 	 * Reduce fill count by 2X.  Limit lg_fill_div such that
@@ -165,7 +166,7 @@ tcache_gc_large(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
 	    &tcache_bin_info[szind]);
 	tcache_bin_flush_large(tsd, tcache, cache_bin, szind,
-	    ncached - low_water + (low_water >> 2));
+	    (unsigned)(ncached - low_water + (low_water >> 2)));
 }
 
 static void

From 035be448674b852637f04d86bd85d04b672d71b3 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 10 Apr 2020 15:41:20 -0700
Subject: [PATCH 1746/2608] Separate out dumping for each prof recent record

---
 src/prof_recent.c | 78 +++++++++++++++++++++++++----------------------
 1 file changed, 41 insertions(+), 37 deletions(-)

diff --git a/src/prof_recent.c b/src/prof_recent.c
index cd72bdab..22ce473f 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -431,7 +431,7 @@ prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max) {
 }
 
 static void
-dump_bt(emitter_t *emitter, prof_tctx_t *tctx) {
+prof_recent_alloc_dump_bt(emitter_t *emitter, prof_tctx_t *tctx) {
 	char bt_buf[2 * sizeof(intptr_t) + 3];
 	char *s = bt_buf;
 	assert(tctx != NULL);
@@ -442,6 +442,43 @@ dump_bt(emitter_t *emitter, prof_tctx_t *tctx) {
 	}
 }
 
+static void
+prof_recent_alloc_dump_node(emitter_t *emitter, prof_recent_t *node) {
+	emitter_json_object_begin(emitter);
+
+	emitter_json_kv(emitter, "size", emitter_type_size, &node->size);
+	size_t usize = sz_s2u(node->size);
+	emitter_json_kv(emitter, "usize", emitter_type_size, &usize);
+	bool released = node->alloc_edata == NULL;
+	emitter_json_kv(emitter, "released", emitter_type_bool, &released);
+
+	emitter_json_kv(emitter, "alloc_thread_uid", emitter_type_uint64,
+	    &node->alloc_tctx->thr_uid);
+	uint64_t alloc_time_ns = nstime_ns(&node->alloc_time);
+	emitter_json_kv(emitter, "alloc_time", emitter_type_uint64,
+	    &alloc_time_ns);
+	emitter_json_array_kv_begin(emitter, "alloc_trace");
+	prof_recent_alloc_dump_bt(emitter, node->alloc_tctx);
+	emitter_json_array_end(emitter);
+
+	if (node->dalloc_tctx != NULL) {
+		assert(released);
+		emitter_json_kv(emitter, "dalloc_thread_uid",
+		    emitter_type_uint64, &node->dalloc_tctx->thr_uid);
+		assert(!nstime_equals_zero(&node->dalloc_time));
+		uint64_t dalloc_time_ns = nstime_ns(&node->dalloc_time);
+		emitter_json_kv(emitter, "dalloc_time", emitter_type_uint64,
+		    &dalloc_time_ns);
+		emitter_json_array_kv_begin(emitter, "dalloc_trace");
+		prof_recent_alloc_dump_bt(emitter, node->dalloc_tctx);
+		emitter_json_array_end(emitter);
+	} else {
+		assert(nstime_equals_zero(&node->dalloc_time));
+	}
+
+	emitter_json_object_end(emitter);
+}
+
 #define PROF_RECENT_PRINT_BUFSIZE 4096
 void
 prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque) {
@@ -465,42 +502,9 @@ prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque) {
 	emitter_json_kv(&emitter, "recent_alloc_max", emitter_type_ssize, &max);
 
 	emitter_json_array_kv_begin(&emitter, "recent_alloc");
-	prof_recent_t *n;
-	ql_foreach(n, &prof_recent_alloc_list, link) {
-		emitter_json_object_begin(&emitter);
-
-		emitter_json_kv(&emitter, "size", emitter_type_size, &n->size);
-		size_t usize = sz_s2u(n->size);
-		emitter_json_kv(&emitter, "usize", emitter_type_size, &usize);
-		bool released = n->alloc_edata == NULL;
-		emitter_json_kv(&emitter, "released", emitter_type_bool,
-		    &released);
-
-		emitter_json_kv(&emitter, "alloc_thread_uid",
-		    emitter_type_uint64, &n->alloc_tctx->thr_uid);
-		uint64_t alloc_time_ns = nstime_ns(&n->alloc_time);
-		emitter_json_kv(&emitter, "alloc_time", emitter_type_uint64,
-		    &alloc_time_ns);
-		emitter_json_array_kv_begin(&emitter, "alloc_trace");
-		dump_bt(&emitter, n->alloc_tctx);
-		emitter_json_array_end(&emitter);
-
-		if (n->dalloc_tctx != NULL) {
-			assert(released);
-			emitter_json_kv(&emitter, "dalloc_thread_uid",
-			    emitter_type_uint64, &n->dalloc_tctx->thr_uid);
-			assert(!nstime_equals_zero(&n->dalloc_time));
-			uint64_t dalloc_time_ns = nstime_ns(&n->dalloc_time);
-			emitter_json_kv(&emitter, "dalloc_time",
-			    emitter_type_uint64, &dalloc_time_ns);
-			emitter_json_array_kv_begin(&emitter, "dalloc_trace");
-			dump_bt(&emitter, n->dalloc_tctx);
-			emitter_json_array_end(&emitter);
-		} else {
-			assert(nstime_equals_zero(&n->dalloc_time));
-		}
-
-		emitter_json_object_end(&emitter);
+	prof_recent_t *node;
+	ql_foreach(node, &prof_recent_alloc_list, link) {
+		prof_recent_alloc_dump_node(&emitter, node);
 	}
 	emitter_json_array_end(&emitter);
 

From 730658f72fd8b7eafabdb50ba83a4d04aa7afbb5 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 10 Apr 2020 15:54:40 -0700
Subject: [PATCH 1747/2608] Extract alloc/dalloc utility for last-N nodes

---
 src/prof_recent.c | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/src/prof_recent.c b/src/prof_recent.c
index 22ce473f..5292c212 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -43,6 +43,20 @@ prof_recent_alloc_max_update(tsd_t *tsd, ssize_t max) {
 	return old_max;
 }
 
+static prof_recent_t *
+prof_recent_allocate_node(tsdn_t *tsdn) {
+	return (prof_recent_t *)iallocztm(tsdn, sizeof(prof_recent_t),
+	    sz_size2index(sizeof(prof_recent_t)), false, NULL, true,
+	    arena_get(tsdn, 0, false), true);
+}
+
+static void
+prof_recent_free_node(tsdn_t *tsdn, prof_recent_t *node) {
+	assert(node != NULL);
+	assert(isalloc(tsdn, node) == sz_s2u(sizeof(prof_recent_t)));
+	idalloctm(tsdn, node, NULL, NULL, true, true);
+}
+
 static inline void
 increment_recent_count(tsd_t *tsd, prof_tctx_t *tctx) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
@@ -277,10 +291,7 @@ prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size) {
 	    prof_recent_alloc_count < prof_recent_alloc_max_get(tsd)) {
 		assert(prof_recent_alloc_max_get(tsd) != 0);
 		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-		reserve = (prof_recent_t *)iallocztm(tsd_tsdn(tsd),
-		    sizeof(prof_recent_t), sz_size2index(sizeof(prof_recent_t)),
-		    false, NULL, true, arena_get(tsd_tsdn(tsd), 0, false),
-		    true);
+		reserve = prof_recent_allocate_node(tsd_tsdn(tsd));
 		malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 		prof_recent_alloc_assert_count(tsd);
 	}
@@ -331,7 +342,7 @@ prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size) {
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 
 	if (reserve != NULL) {
-		idalloctm(tsd_tsdn(tsd), reserve, NULL, NULL, true, true);
+		prof_recent_free_node(tsd_tsdn(tsd), reserve);
 	}
 
 	/*
@@ -353,7 +364,7 @@ label_rollback:
 	prof_recent_alloc_assert_count(tsd);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	if (reserve != NULL) {
-		idalloctm(tsd_tsdn(tsd), reserve, NULL, NULL, true, true);
+		prof_recent_free_node(tsd_tsdn(tsd), reserve);
 	}
 	decrement_recent_count(tsd, tctx);
 }
@@ -422,7 +433,7 @@ prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max) {
 		if (node->dalloc_tctx != NULL) {
 			decrement_recent_count(tsd, node->dalloc_tctx);
 		}
-		idalloctm(tsd_tsdn(tsd), node, NULL, NULL, true, true);
+		prof_recent_free_node(tsd_tsdn(tsd), node);
 		--count;
 	} while (!ql_empty(&old_list));
 	assert(count == 0);

From b8bdea6b26509b3fd06bb9b3344fca7b2f22dee9 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 10 Apr 2020 16:02:39 -0700
Subject: [PATCH 1748/2608] Fix: prof_recent_alloc_max_ctl_read() does not take
 tsd

---
 test/unit/prof_recent.c | 36 +++++++++++++++---------------------
 1 file changed, 15 insertions(+), 21 deletions(-)

diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index 4aa9f9e9..d7dd352e 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -7,7 +7,7 @@
 
 /* Invariant before and after every test (when config_prof is on) */
 static void
-confirm_prof_setup(tsd_t *tsd) {
+confirm_prof_setup() {
 	/* Options */
 	assert_true(opt_prof, "opt_prof not on");
 	assert_true(opt_prof_active, "opt_prof_active not on");
@@ -16,13 +16,13 @@ confirm_prof_setup(tsd_t *tsd) {
 
 	/* Dynamics */
 	assert_true(prof_active, "prof_active not on");
-	assert_zd_eq(prof_recent_alloc_max_ctl_read(tsd), OPT_ALLOC_MAX,
+	assert_zd_eq(prof_recent_alloc_max_ctl_read(), OPT_ALLOC_MAX,
 	    "prof_recent_alloc_max not set correctly");
 }
 
 TEST_BEGIN(test_confirm_setup) {
 	test_skip_if(!config_prof);
-	confirm_prof_setup(tsd_fetch());
+	confirm_prof_setup();
 }
 TEST_END
 
@@ -58,13 +58,11 @@ TEST_BEGIN(test_prof_recent_on) {
 	ssize_t past, future;
 	size_t len = sizeof(ssize_t);
 
-	tsd_t *tsd = tsd_fetch();
-
-	confirm_prof_setup(tsd);
+	confirm_prof_setup();
 
 	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, NULL, 0), 0, "no-op mallctl should be allowed");
-	confirm_prof_setup(tsd);
+	confirm_prof_setup();
 
 	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    &past, &len, NULL, 0), 0, "Read error");
@@ -93,7 +91,7 @@ TEST_BEGIN(test_prof_recent_on) {
 	expect_zd_eq(past, -1,
 	    "Output should not be touched given invalid write");
 
-	confirm_prof_setup(tsd);
+	confirm_prof_setup();
 }
 TEST_END
 
@@ -151,9 +149,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	prof_recent_t *n;
 	ssize_t future;
 
-	tsd_t *tsd = tsd_fetch();
-
-	confirm_prof_setup(tsd);
+	confirm_prof_setup();
 
 	/*
 	 * First batch of 2 * OPT_ALLOC_MAX allocations.  After the
@@ -190,7 +186,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 		free(p);
 	}
 
-	confirm_prof_setup(tsd);
+	confirm_prof_setup();
 
 	b = false;
 	assert_d_eq(mallctl("prof.active", NULL, NULL, &b, sizeof(bool)), 0,
@@ -219,7 +215,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	assert_d_eq(mallctl("prof.active", NULL, NULL, &b, sizeof(bool)), 0,
 	    "mallctl for turning on prof_active failed");
 
-	confirm_prof_setup(tsd);
+	confirm_prof_setup();
 
 	/*
 	 * Third batch of OPT_ALLOC_MAX allocations.  Since prof_active is
@@ -338,7 +334,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 	assert_true(ql_empty(&prof_recent_alloc_list),
 	    "Recent list should be empty");
 
-	confirm_prof_setup(tsd);
+	confirm_prof_setup();
 }
 TEST_END
 
@@ -485,8 +481,7 @@ confirm_record(const char *template,
 TEST_BEGIN(test_prof_recent_alloc_dump) {
 	test_skip_if(!config_prof);
 
-	tsd_t *tsd = tsd_fetch();
-	confirm_prof_setup(tsd);
+	confirm_prof_setup();
 
 	ssize_t future;
 	void *p, *q;
@@ -531,7 +526,7 @@ TEST_BEGIN(test_prof_recent_alloc_dump) {
 	future = OPT_ALLOC_MAX;
 	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
-	confirm_prof_setup(tsd);
+	confirm_prof_setup();
 }
 TEST_END
 
@@ -588,7 +583,7 @@ f_thread(void *arg) {
 		} else if (rand % 5 == 0) {
 			prof_recent_alloc_dump(tsd, test_write_cb, NULL);
 		} else if (rand % 5 == 1) {
-			last_max = prof_recent_alloc_max_ctl_read(tsd);
+			last_max = prof_recent_alloc_max_ctl_read();
 		} else if (rand % 5 == 2) {
 			last_max =
 			    prof_recent_alloc_max_ctl_write(tsd, test_max * 2);
@@ -613,8 +608,7 @@ f_thread(void *arg) {
 TEST_BEGIN(test_prof_recent_stress) {
 	test_skip_if(!config_prof);
 
-	tsd_t *tsd = tsd_fetch();
-	confirm_prof_setup(tsd);
+	confirm_prof_setup();
 
 	test_max = OPT_ALLOC_MAX;
 	for (size_t i = 0; i < N_THREADS; i++) {
@@ -643,7 +637,7 @@ TEST_BEGIN(test_prof_recent_stress) {
 	test_max = OPT_ALLOC_MAX;
 	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &test_max, sizeof(ssize_t)), 0, "Write error");
-	confirm_prof_setup(tsd);
+	confirm_prof_setup();
 }
 TEST_END
 

From 857ebd3daf71963e522cdbc51725ad33b7368186 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 10 Apr 2020 16:26:55 -0700
Subject: [PATCH 1749/2608] Make edata pointer on prof recent record an atomic
 fence

---
 include/jemalloc/internal/prof_recent.h  |  1 +
 include/jemalloc/internal/prof_structs.h |  2 +-
 src/prof_recent.c                        | 49 +++++++++++++++++-------
 test/unit/prof_recent.c                  |  9 +++--
 4 files changed, 42 insertions(+), 19 deletions(-)

diff --git a/include/jemalloc/internal/prof_recent.h b/include/jemalloc/internal/prof_recent.h
index bd046526..defc5fb2 100644
--- a/include/jemalloc/internal/prof_recent.h
+++ b/include/jemalloc/internal/prof_recent.h
@@ -9,6 +9,7 @@ void edata_prof_recent_alloc_init(edata_t *edata);
 #ifdef JEMALLOC_JET
 typedef ql_head(prof_recent_t) prof_recent_list_t;
 extern prof_recent_list_t prof_recent_alloc_list;
+edata_t *prof_recent_alloc_edata_get_no_lock(const prof_recent_t *node);
 prof_recent_t *edata_prof_recent_alloc_get_no_lock(const edata_t *edata);
 #endif
 
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index 73ef8fc5..26942aa6 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -205,8 +205,8 @@ struct prof_recent_s {
 
 	ql_elm(prof_recent_t) link;
 	size_t size;
+	atomic_p_t alloc_edata; /* NULL means allocation has been freed. */
 	prof_tctx_t *alloc_tctx;
-	edata_t *alloc_edata; /* NULL means allocation has been freed. */
 	prof_tctx_t *dalloc_tctx;
 };
 
diff --git a/src/prof_recent.c b/src/prof_recent.c
index 5292c212..37fb01dd 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -102,6 +102,26 @@ decrement_recent_count(tsd_t *tsd, prof_tctx_t *tctx) {
 	prof_tctx_try_destroy(tsd, tctx);
 }
 
+#ifndef JEMALLOC_JET
+static inline
+#endif
+edata_t *
+prof_recent_alloc_edata_get_no_lock(const prof_recent_t *n) {
+	return (edata_t *)atomic_load_p(&n->alloc_edata, ATOMIC_ACQUIRE);
+}
+
+static inline edata_t *
+prof_recent_alloc_edata_get(tsd_t *tsd, const prof_recent_t *n) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	return prof_recent_alloc_edata_get_no_lock(n);
+}
+
+static void
+prof_recent_alloc_edata_set(tsd_t *tsd, prof_recent_t *n, edata_t *edata) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	atomic_store_p(&n->alloc_edata, edata, ATOMIC_RELEASE);
+}
+
 void
 edata_prof_recent_alloc_init(edata_t *edata) {
 	edata_prof_recent_alloc_set_dont_call_directly(edata, NULL);
@@ -120,7 +140,8 @@ edata_prof_recent_alloc_get(tsd_t *tsd, const edata_t *edata) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	prof_recent_t *recent_alloc =
 	    edata_prof_recent_alloc_get_no_lock(edata);
-	assert(recent_alloc == NULL || recent_alloc->alloc_edata == edata);
+	assert(recent_alloc == NULL ||
+	    prof_recent_alloc_edata_get(tsd, recent_alloc) == edata);
 	return recent_alloc;
 }
 
@@ -137,22 +158,24 @@ edata_prof_recent_alloc_update_internal(tsd_t *tsd, edata_t *edata,
 static void
 edata_prof_recent_alloc_set(tsd_t *tsd, edata_t *edata,
     prof_recent_t *recent_alloc) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	assert(recent_alloc != NULL);
 	prof_recent_t *old_recent_alloc =
 	    edata_prof_recent_alloc_update_internal(tsd, edata, recent_alloc);
 	assert(old_recent_alloc == NULL);
-	recent_alloc->alloc_edata = edata;
+	prof_recent_alloc_edata_set(tsd, recent_alloc, edata);
 }
 
 static void
 edata_prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata,
     prof_recent_t *recent_alloc) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	assert(recent_alloc != NULL);
 	prof_recent_t *old_recent_alloc =
 	    edata_prof_recent_alloc_update_internal(tsd, edata, NULL);
 	assert(old_recent_alloc == recent_alloc);
-	assert(edata == recent_alloc->alloc_edata);
-	recent_alloc->alloc_edata = NULL;
+	assert(edata == prof_recent_alloc_edata_get(tsd, recent_alloc));
+	prof_recent_alloc_edata_set(tsd, recent_alloc, NULL);
 }
 
 /*
@@ -191,7 +214,6 @@ prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata) {
 	/* Check again after acquiring the lock.  */
 	prof_recent_t *recent = edata_prof_recent_alloc_get(tsd, edata);
 	if (recent != NULL) {
-		edata_prof_recent_alloc_reset(tsd, edata, recent);
 		assert(nstime_equals_zero(&recent->dalloc_time));
 		assert(recent->dalloc_tctx == NULL);
 		if (dalloc_tctx != NULL) {
@@ -199,6 +221,7 @@ prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata) {
 			recent->dalloc_tctx = dalloc_tctx;
 			dalloc_tctx = NULL;
 		}
+		edata_prof_recent_alloc_reset(tsd, edata, recent);
 	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 
@@ -209,10 +232,11 @@ prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata) {
 }
 
 static void
-prof_recent_alloc_evict_edata(tsd_t *tsd, prof_recent_t *recent) {
+prof_recent_alloc_evict_edata(tsd_t *tsd, prof_recent_t *recent_alloc) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	if (recent->alloc_edata != NULL) {
-		edata_prof_recent_alloc_reset(tsd, recent->alloc_edata, recent);
+	edata_t *edata = prof_recent_alloc_edata_get(tsd, recent_alloc);
+	if (edata != NULL) {
+		edata_prof_recent_alloc_reset(tsd, edata, recent_alloc);
 	}
 }
 
@@ -333,9 +357,9 @@ prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size) {
 	tail->size = size;
 	nstime_copy(&tail->alloc_time, edata_prof_alloc_time_get(edata));
 	tail->alloc_tctx = tctx;
-	edata_prof_recent_alloc_set(tsd, edata, tail);
 	nstime_init_zero(&tail->dalloc_time);
 	tail->dalloc_tctx = NULL;
+	edata_prof_recent_alloc_set(tsd, edata, tail);
 
 	assert(!prof_recent_alloc_is_empty(tsd));
 	prof_recent_alloc_assert_count(tsd);
@@ -460,7 +484,7 @@ prof_recent_alloc_dump_node(emitter_t *emitter, prof_recent_t *node) {
 	emitter_json_kv(emitter, "size", emitter_type_size, &node->size);
 	size_t usize = sz_s2u(node->size);
 	emitter_json_kv(emitter, "usize", emitter_type_size, &usize);
-	bool released = node->alloc_edata == NULL;
+	bool released = prof_recent_alloc_edata_get_no_lock(node) == NULL;
 	emitter_json_kv(emitter, "released", emitter_type_bool, &released);
 
 	emitter_json_kv(emitter, "alloc_thread_uid", emitter_type_uint64,
@@ -472,8 +496,7 @@ prof_recent_alloc_dump_node(emitter_t *emitter, prof_recent_t *node) {
 	prof_recent_alloc_dump_bt(emitter, node->alloc_tctx);
 	emitter_json_array_end(emitter);
 
-	if (node->dalloc_tctx != NULL) {
-		assert(released);
+	if (released && node->dalloc_tctx != NULL) {
 		emitter_json_kv(emitter, "dalloc_thread_uid",
 		    emitter_type_uint64, &node->dalloc_tctx->thr_uid);
 		assert(!nstime_equals_zero(&node->dalloc_time));
@@ -483,8 +506,6 @@ prof_recent_alloc_dump_node(emitter_t *emitter, prof_recent_t *node) {
 		emitter_json_array_kv_begin(emitter, "dalloc_trace");
 		prof_recent_alloc_dump_bt(emitter, node->dalloc_tctx);
 		emitter_json_array_end(emitter);
-	} else {
-		assert(nstime_equals_zero(&node->dalloc_time));
 	}
 
 	emitter_json_object_end(emitter);
diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index d7dd352e..791cc4f2 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -107,7 +107,7 @@ confirm_malloc(void *p) {
 	assert_ptr_not_null(n, "Record in edata should not be NULL");
 	expect_ptr_not_null(n->alloc_tctx,
 	    "alloc_tctx in record should not be NULL");
-	expect_ptr_eq(e, n->alloc_edata,
+	expect_ptr_eq(e, prof_recent_alloc_edata_get_no_lock(n),
 	    "edata pointer in record is not correct");
 	expect_ptr_null(n->dalloc_tctx, "dalloc_tctx in record should be NULL");
 }
@@ -122,9 +122,10 @@ static void
 confirm_record_living(prof_recent_t *n) {
 	expect_ptr_not_null(n->alloc_tctx,
 	    "alloc_tctx in record should not be NULL");
-	assert_ptr_not_null(n->alloc_edata,
+	edata_t *edata = prof_recent_alloc_edata_get_no_lock(n);
+	assert_ptr_not_null(edata,
 	    "Recorded edata should not be NULL for living pointer");
-	expect_ptr_eq(n, edata_prof_recent_alloc_get_no_lock(n->alloc_edata),
+	expect_ptr_eq(n, edata_prof_recent_alloc_get_no_lock(edata),
 	    "Record in edata is not correct");
 	expect_ptr_null(n->dalloc_tctx, "dalloc_tctx in record should be NULL");
 }
@@ -133,7 +134,7 @@ static void
 confirm_record_released(prof_recent_t *n) {
 	expect_ptr_not_null(n->alloc_tctx,
 	    "alloc_tctx in record should not be NULL");
-	expect_ptr_null(n->alloc_edata,
+	expect_ptr_null(prof_recent_alloc_edata_get_no_lock(n),
 	    "Recorded edata should be NULL for released pointer");
 	expect_ptr_not_null(n->dalloc_tctx,
 	    "dalloc_tctx in record should not be NULL for released pointer");

From 264d89d6415be31ee00dd3dd2460140f46cea2e9 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 13 Apr 2020 11:48:01 -0700
Subject: [PATCH 1750/2608] Extract restore and async cleanup functions for
 prof last-N list

---
 src/prof_recent.c | 65 +++++++++++++++++++++--------------------------
 1 file changed, 29 insertions(+), 36 deletions(-)

diff --git a/src/prof_recent.c b/src/prof_recent.c
index 37fb01dd..fd63d504 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -399,23 +399,17 @@ prof_recent_alloc_max_ctl_read() {
 	return prof_recent_alloc_max_get_no_lock();
 }
 
-ssize_t
-prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max) {
-	assert(max >= -1);
-
-	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	prof_recent_alloc_assert_count(tsd);
-
-	const ssize_t old_max = prof_recent_alloc_max_update(tsd, max);
-
+static void
+prof_recent_alloc_restore_locked(tsd_t *tsd, prof_recent_list_t *to_delete) {
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	ssize_t max = prof_recent_alloc_max_get(tsd);
 	if (max == -1 || prof_recent_alloc_count <= max) {
 		/* Easy case - no need to alter the list. */
-		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-		return old_max;
+		ql_new(to_delete);
+		prof_recent_alloc_assert_count(tsd);
+		return;
 	}
 
-	/* For verification purpose only. */
-	ssize_t count = prof_recent_alloc_count - max;
 	prof_recent_t *node;
 	ql_foreach(node, &prof_recent_alloc_list, link) {
 		if (prof_recent_alloc_count == max) {
@@ -426,42 +420,41 @@ prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max) {
 	}
 	assert(prof_recent_alloc_count == max);
 
-	prof_recent_list_t old_list;
-	ql_move(&old_list, &prof_recent_alloc_list);
+	ql_move(to_delete, &prof_recent_alloc_list);
 	if (max == 0) {
 		assert(node == NULL);
 	} else {
 		assert(node != NULL);
-		ql_split(&old_list, node, &prof_recent_alloc_list, link);
+		ql_split(to_delete, node, &prof_recent_alloc_list, link);
 	}
-	assert(!ql_empty(&old_list));
-
+	assert(!ql_empty(to_delete));
 	prof_recent_alloc_assert_count(tsd);
-	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+}
 
-	/*
-	 * Asynchronously handle the tctx of the to-be-deleted nodes, so that
-	 * there's no simultaneous holdings of prof_recent_alloc_mtx and
-	 * tdata->lock.  In the worst case there can be slightly extra space
-	 * overhead taken by these nodes, but the total number of nodes at any
-	 * time is bounded by (max + sum(decreases)), where "max" means the
-	 * most recent prof_recent_alloc_max and "sum(decreases)" means the
-	 * sum of the deltas of all decreases in prof_recent_alloc_max in the
-	 * past.  This (max + sum(decreases)) value is completely transparent
-	 * to and controlled by application.
-	 */
-	do {
-		node = ql_first(&old_list);
-		ql_remove(&old_list, node, link);
+static void
+prof_recent_alloc_async_cleanup(tsd_t *tsd, prof_recent_list_t *to_delete) {
+	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	while (!ql_empty(to_delete)) {
+		prof_recent_t *node = ql_first(to_delete);
+		ql_remove(to_delete, node, link);
 		decrement_recent_count(tsd, node->alloc_tctx);
 		if (node->dalloc_tctx != NULL) {
 			decrement_recent_count(tsd, node->dalloc_tctx);
 		}
 		prof_recent_free_node(tsd_tsdn(tsd), node);
-		--count;
-	} while (!ql_empty(&old_list));
-	assert(count == 0);
+	}
+}
 
+ssize_t
+prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max) {
+	assert(max >= -1);
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	prof_recent_alloc_assert_count(tsd);
+	const ssize_t old_max = prof_recent_alloc_max_update(tsd, max);
+	prof_recent_list_t to_delete;
+	prof_recent_alloc_restore_locked(tsd, &to_delete);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	prof_recent_alloc_async_cleanup(tsd, &to_delete);
 	return old_max;
 }
 

From fc8bc4b5c04501f17f7a3c3a5f3efafbf9b2a82e Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 13 Apr 2020 11:51:25 -0700
Subject: [PATCH 1751/2608] Increase dump buffer for prof last-N list

---
 src/prof_recent.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/prof_recent.c b/src/prof_recent.c
index fd63d504..ab4ab8d4 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -504,7 +504,7 @@ prof_recent_alloc_dump_node(emitter_t *emitter, prof_recent_t *node) {
 	emitter_json_object_end(emitter);
 }
 
-#define PROF_RECENT_PRINT_BUFSIZE 4096
+#define PROF_RECENT_PRINT_BUFSIZE 65536
 void
 prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque) {
 	buf_writer_t buf_writer;

From a835d9cf85286cb0f05c644790df48461544c4d9 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 13 Apr 2020 11:54:03 -0700
Subject: [PATCH 1752/2608] Make prof last-N dumping non-blocking

---
 src/prof_recent.c | 32 +++++++++++++++++++-------------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/src/prof_recent.c b/src/prof_recent.c
index ab4ab8d4..d0a83aac 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -513,31 +513,37 @@ prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque) {
 	emitter_t emitter;
 	emitter_init(&emitter, emitter_output_json_compact, buf_writer_cb,
 	    &buf_writer);
-	emitter_begin(&emitter);
+	prof_recent_list_t temp_list;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	prof_recent_alloc_assert_count(tsd);
+	ssize_t dump_max = prof_recent_alloc_max_get(tsd);
+	ql_move(&temp_list, &prof_recent_alloc_list);
+	ssize_t dump_count = prof_recent_alloc_count;
+	prof_recent_alloc_count = 0;
+	prof_recent_alloc_assert_count(tsd);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 
-	/*
-	 * Set prof_recent_alloc_max to 0 so that dumping won't block sampled
-	 * allocations: the allocations can complete but will not be recorded.
-	 */
-	ssize_t max = prof_recent_alloc_max_update(tsd, 0);
-
-	emitter_json_kv(&emitter, "recent_alloc_max", emitter_type_ssize, &max);
-
+	emitter_begin(&emitter);
+	emitter_json_kv(&emitter, "recent_alloc_max", emitter_type_ssize,
+	    &dump_max);
 	emitter_json_array_kv_begin(&emitter, "recent_alloc");
 	prof_recent_t *node;
-	ql_foreach(node, &prof_recent_alloc_list, link) {
+	ql_foreach(node, &temp_list, link) {
 		prof_recent_alloc_dump_node(&emitter, node);
 	}
 	emitter_json_array_end(&emitter);
+	emitter_end(&emitter);
 
-	max = prof_recent_alloc_max_update(tsd, max);
-	assert(max == 0);
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
+	prof_recent_alloc_assert_count(tsd);
+	ql_concat(&temp_list, &prof_recent_alloc_list, link);
+	ql_move(&prof_recent_alloc_list, &temp_list);
+	prof_recent_alloc_count += dump_count;
+	prof_recent_alloc_restore_locked(tsd, &temp_list);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 
-	emitter_end(&emitter);
+	prof_recent_alloc_async_cleanup(tsd, &temp_list);
 	buf_writer_terminate(tsd_tsdn(tsd), &buf_writer);
 }
 #undef PROF_RECENT_PRINT_BUFSIZE

From 3e19ebd2ea5372c2f5932af6bb268ae8cb5df354 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 13 Apr 2020 12:05:51 -0700
Subject: [PATCH 1753/2608] Add lock to protect prof last-N dumping

---
 include/jemalloc/internal/prof_recent.h |  2 ++
 include/jemalloc/internal/witness.h     | 27 +++++++++++++------------
 src/prof.c                              |  3 +++
 src/prof_recent.c                       | 18 +++++++++++++----
 4 files changed, 33 insertions(+), 17 deletions(-)

diff --git a/include/jemalloc/internal/prof_recent.h b/include/jemalloc/internal/prof_recent.h
index defc5fb2..f97273cb 100644
--- a/include/jemalloc/internal/prof_recent.h
+++ b/include/jemalloc/internal/prof_recent.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_PROF_RECENT_EXTERNS_H
 #define JEMALLOC_INTERNAL_PROF_RECENT_EXTERNS_H
 
+extern malloc_mutex_t prof_recent_dump_mtx;
+
 bool prof_recent_alloc_prepare(tsd_t *tsd, prof_tctx_t *tctx);
 void prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size);
 void prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata);
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index b5fa1c02..58f72664 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -29,7 +29,8 @@
 #define WITNESS_RANK_PROF_TDATA		8U
 #define WITNESS_RANK_PROF_LOG		9U
 #define WITNESS_RANK_PROF_GCTX		10U
-#define WITNESS_RANK_BACKGROUND_THREAD	11U
+#define WITNESS_RANK_PROF_RECENT_DUMP	11U
+#define WITNESS_RANK_BACKGROUND_THREAD	12U
 
 /*
  * Used as an argument to witness_assert_depth_to_rank() in order to validate
@@ -37,19 +38,19 @@
  * witness_assert_depth_to_rank() is inclusive rather than exclusive, this
  * definition can have the same value as the minimally ranked core lock.
  */
-#define WITNESS_RANK_CORE		12U
+#define WITNESS_RANK_CORE		13U
 
-#define WITNESS_RANK_DECAY		12U
-#define WITNESS_RANK_TCACHE_QL		13U
-#define WITNESS_RANK_EXTENT_GROW	14U
-#define WITNESS_RANK_EXTENTS		15U
-#define WITNESS_RANK_EDATA_CACHE	16U
+#define WITNESS_RANK_DECAY		13U
+#define WITNESS_RANK_TCACHE_QL		14U
+#define WITNESS_RANK_EXTENT_GROW	15U
+#define WITNESS_RANK_EXTENTS		16U
+#define WITNESS_RANK_EDATA_CACHE	17U
 
-#define WITNESS_RANK_EMAP		17U
-#define WITNESS_RANK_RTREE		18U
-#define WITNESS_RANK_BASE		19U
-#define WITNESS_RANK_ARENA_LARGE	20U
-#define WITNESS_RANK_HOOK		21U
+#define WITNESS_RANK_EMAP		18U
+#define WITNESS_RANK_RTREE		19U
+#define WITNESS_RANK_BASE		20U
+#define WITNESS_RANK_ARENA_LARGE	21U
+#define WITNESS_RANK_HOOK		22U
 
 #define WITNESS_RANK_LEAF		0xffffffffU
 #define WITNESS_RANK_BIN		WITNESS_RANK_LEAF
@@ -60,8 +61,8 @@
 #define WITNESS_RANK_PROF_DUMP_FILENAME	WITNESS_RANK_LEAF
 #define WITNESS_RANK_PROF_GDUMP		WITNESS_RANK_LEAF
 #define WITNESS_RANK_PROF_NEXT_THR_UID	WITNESS_RANK_LEAF
-#define WITNESS_RANK_PROF_THREAD_ACTIVE_INIT	WITNESS_RANK_LEAF
 #define WITNESS_RANK_PROF_RECENT_ALLOC	WITNESS_RANK_LEAF
+#define WITNESS_RANK_PROF_THREAD_ACTIVE_INIT	WITNESS_RANK_LEAF
 
 /******************************************************************************/
 /* PER-WITNESS DATA */
diff --git a/src/prof.c b/src/prof.c
index c8da81da..38a3db27 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -1116,6 +1116,7 @@ prof_prefork0(tsdn_t *tsdn) {
 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
 			malloc_mutex_prefork(tsdn, &gctx_locks[i]);
 		}
+		malloc_mutex_prefork(tsdn, &prof_recent_dump_mtx);
 	}
 }
 
@@ -1145,6 +1146,7 @@ prof_postfork_parent(tsdn_t *tsdn) {
 		malloc_mutex_postfork_parent(tsdn, &prof_dump_filename_mtx);
 		malloc_mutex_postfork_parent(tsdn, &prof_active_mtx);
 		counter_postfork_parent(tsdn, &prof_idump_accumulated);
+		malloc_mutex_postfork_parent(tsdn, &prof_recent_dump_mtx);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
 			malloc_mutex_postfork_parent(tsdn, &gctx_locks[i]);
 		}
@@ -1170,6 +1172,7 @@ prof_postfork_child(tsdn_t *tsdn) {
 		malloc_mutex_postfork_child(tsdn, &prof_dump_filename_mtx);
 		malloc_mutex_postfork_child(tsdn, &prof_active_mtx);
 		counter_postfork_child(tsdn, &prof_idump_accumulated);
+		malloc_mutex_postfork_child(tsdn, &prof_recent_dump_mtx);
 		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
 			malloc_mutex_postfork_child(tsdn, &gctx_locks[i]);
 		}
diff --git a/src/prof_recent.c b/src/prof_recent.c
index d0a83aac..949ae76b 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -18,6 +18,8 @@ static
 #endif
 prof_recent_list_t prof_recent_alloc_list;
 
+malloc_mutex_t prof_recent_dump_mtx; /* Protects dumping. */
+
 static void
 prof_recent_alloc_max_init() {
 	atomic_store_zd(&prof_recent_alloc_max, opt_prof_recent_alloc_max,
@@ -433,6 +435,7 @@ prof_recent_alloc_restore_locked(tsd_t *tsd, prof_recent_list_t *to_delete) {
 
 static void
 prof_recent_alloc_async_cleanup(tsd_t *tsd, prof_recent_list_t *to_delete) {
+	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), &prof_recent_dump_mtx);
 	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	while (!ql_empty(to_delete)) {
 		prof_recent_t *node = ql_first(to_delete);
@@ -507,6 +510,7 @@ prof_recent_alloc_dump_node(emitter_t *emitter, prof_recent_t *node) {
 #define PROF_RECENT_PRINT_BUFSIZE 65536
 void
 prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque) {
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_dump_mtx);
 	buf_writer_t buf_writer;
 	buf_writer_init(tsd_tsdn(tsd), &buf_writer, write_cb, cbopaque, NULL,
 	    PROF_RECENT_PRINT_BUFSIZE);
@@ -543,8 +547,10 @@ prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque) {
 	prof_recent_alloc_restore_locked(tsd, &temp_list);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 
-	prof_recent_alloc_async_cleanup(tsd, &temp_list);
 	buf_writer_terminate(tsd_tsdn(tsd), &buf_writer);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_dump_mtx);
+
+	prof_recent_alloc_async_cleanup(tsd, &temp_list);
 }
 #undef PROF_RECENT_PRINT_BUFSIZE
 
@@ -552,9 +558,13 @@ bool
 prof_recent_init() {
 	prof_recent_alloc_max_init();
 
-	if (malloc_mutex_init(&prof_recent_alloc_mtx,
-	    "prof_recent_alloc", WITNESS_RANK_PROF_RECENT_ALLOC,
-	    malloc_mutex_rank_exclusive)) {
+	if (malloc_mutex_init(&prof_recent_alloc_mtx, "prof_recent_alloc",
+	    WITNESS_RANK_PROF_RECENT_ALLOC, malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+
+	if (malloc_mutex_init(&prof_recent_dump_mtx, "prof_recent_dump",
+	    WITNESS_RANK_PROF_RECENT_DUMP, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 

From 17a64fe91c4b424d10c96c94051d562390471810 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 27 May 2020 11:56:36 -0700
Subject: [PATCH 1754/2608] Add a small program to print data structure sizes.

---
 Makefile.in         |  3 ++-
 test/stress/sizes.c | 50 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 52 insertions(+), 1 deletion(-)
 create mode 100644 test/stress/sizes.c

diff --git a/Makefile.in b/Makefile.in
index e7666fb1..cd927cfe 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -291,7 +291,8 @@ endif
 TESTS_STRESS := $(srcroot)test/stress/microbench.c \
 	$(srcroot)test/stress/fill_flush.c \
 	$(srcroot)test/stress/large_microbench.c \
-	$(srcroot)test/stress/hookbench.c
+	$(srcroot)test/stress/hookbench.c \
+	$(srcroot)test/stress/sizes.c
 
 
 TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_INTEGRATION_CPP) $(TESTS_STRESS)
diff --git a/test/stress/sizes.c b/test/stress/sizes.c
new file mode 100644
index 00000000..7360494f
--- /dev/null
+++ b/test/stress/sizes.c
@@ -0,0 +1,50 @@
+#include "test/jemalloc_test.h"
+
+#include <stdio.h>
+
+/*
+ * Print the sizes of various important core data structures.  OK, I guess this
+ * isn't really a "stress" test, but it does give useful information about
+ * low-level performance characteristics, as the other things in this directory
+ * do.
+ */
+
+static void
+do_print(const char *name, size_t sz_bytes) {
+	const char *sizes[] = {"bytes", "KB", "MB", "GB", "TB", "PB", "EB",
+		"ZB"};
+	size_t sizes_max = sizeof(sizes)/sizeof(sizes[0]);
+
+	size_t ind = 0;
+	double sz = sz_bytes;
+	while (sz >= 1024 && ind < sizes_max) {
+		sz /= 1024;
+		ind++;
+	}
+	if (ind == 0) {
+		printf("%-20s: %zu bytes\n", name, sz_bytes);
+	} else {
+		printf("%-20s: %f %s\n", name, sz, sizes[ind]);
+	}
+}
+
+int
+main() {
+#define P(type)								\
+	do_print(#type, sizeof(type))
+	P(arena_t);
+	P(arena_stats_t);
+	P(base_t);
+	P(decay_t);
+	P(edata_t);
+	P(ecache_t);
+	P(eset_t);
+	P(malloc_mutex_t);
+	P(prof_tctx_t);
+	P(prof_gctx_t);
+	P(prof_tdata_t);
+	P(tcache_t);
+	P(tcache_slow_t);
+	P(tsd_t);
+#undef P
+}

From fe7108305a449df3d28f68e6bd9ff74dea68946b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 27 May 2020 14:30:28 -0700
Subject: [PATCH 1755/2608] Add peak_t, for tracking allocator net max.

---
 Makefile.in                      |  1 +
 include/jemalloc/internal/peak.h | 37 +++++++++++++++++++++++++
 test/unit/peak.c                 | 47 ++++++++++++++++++++++++++++++++
 3 files changed, 85 insertions(+)
 create mode 100644 include/jemalloc/internal/peak.h
 create mode 100644 test/unit/peak.c

diff --git a/Makefile.in b/Makefile.in
index cd927cfe..b211f889 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -222,6 +222,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/pa.c \
 	$(srcroot)test/unit/pack.c \
 	$(srcroot)test/unit/pages.c \
+	$(srcroot)test/unit/peak.c \
 	$(srcroot)test/unit/ph.c \
 	$(srcroot)test/unit/prng.c \
 	$(srcroot)test/unit/prof_accum.c \
diff --git a/include/jemalloc/internal/peak.h b/include/jemalloc/internal/peak.h
new file mode 100644
index 00000000..59da3e41
--- /dev/null
+++ b/include/jemalloc/internal/peak.h
@@ -0,0 +1,37 @@
+#ifndef JEMALLOC_INTERNAL_PEAK_H
+#define JEMALLOC_INTERNAL_PEAK_H
+
+typedef struct peak_s peak_t;
+struct peak_s {
+	/* The highest recorded peak value, after adjustment (see below). */
+	uint64_t cur_max;
+	/*
+	 * The difference between alloc and dalloc at the last set_zero call;
+	 * this lets us cancel out the appropriate amount of excess.
+	 */
+	uint64_t adjustment;
+};
+
+#define PEAK_INITIALIZER {0, 0}
+
+static inline uint64_t
+peak_max(peak_t *peak) {
+	return peak->cur_max;
+}
+
+static inline void
+peak_update(peak_t *peak, uint64_t alloc, uint64_t dalloc) {
+	int64_t candidate_max = (int64_t)(alloc - dalloc - peak->adjustment);
+	if (candidate_max > (int64_t)peak->cur_max) {
+		peak->cur_max = candidate_max;
+	}
+}
+
+/* Resets the counter to zero; all peaks are now relative to this point. */
+static inline void
+peak_set_zero(peak_t *peak, uint64_t alloc, uint64_t dalloc) {
+	peak->cur_max = 0;
+	peak->adjustment = alloc - dalloc;
+}
+
+#endif /* JEMALLOC_INTERNAL_PEAK_H */
diff --git a/test/unit/peak.c b/test/unit/peak.c
new file mode 100644
index 00000000..11129785
--- /dev/null
+++ b/test/unit/peak.c
@@ -0,0 +1,47 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/peak.h"
+
+TEST_BEGIN(test_peak) {
+	peak_t peak = PEAK_INITIALIZER;
+	expect_u64_eq(0, peak_max(&peak),
+	    "Peak should be zero at initialization");
+	peak_update(&peak, 100, 50);
+	expect_u64_eq(50, peak_max(&peak),
+	    "Missed update");
+	peak_update(&peak, 100, 100);
+	expect_u64_eq(50, peak_max(&peak), "Dallocs shouldn't change peak");
+	peak_update(&peak, 100, 200);
+	expect_u64_eq(50, peak_max(&peak), "Dallocs shouldn't change peak");
+	peak_update(&peak, 200, 200);
+	expect_u64_eq(50, peak_max(&peak), "Haven't reached peak again");
+	peak_update(&peak, 300, 200);
+	expect_u64_eq(100, peak_max(&peak), "Missed an update.");
+	peak_set_zero(&peak, 300, 200);
+	expect_u64_eq(0, peak_max(&peak), "No effect from zeroing");
+	peak_update(&peak, 300, 300);
+	expect_u64_eq(0, peak_max(&peak), "Dalloc shouldn't change peak");
+	peak_update(&peak, 400, 300);
+	expect_u64_eq(0, peak_max(&peak), "Should still be net negative");
+	peak_update(&peak, 500, 300);
+	expect_u64_eq(100, peak_max(&peak), "Missed an update.");
+	/*
+	 * Above, we set to zero while a net allocator; let's try as a
+	 * net-deallocator.
+	 */
+	peak_set_zero(&peak, 600, 700);
+	expect_u64_eq(0, peak_max(&peak), "No effect from zeroing.");
+	peak_update(&peak, 600, 800);
+	expect_u64_eq(0, peak_max(&peak), "Dalloc shouldn't change peak.");
+	peak_update(&peak, 700, 800);
+	expect_u64_eq(0, peak_max(&peak), "Should still be net negative.");
+	peak_update(&peak, 800, 800);
+	expect_u64_eq(100, peak_max(&peak), "Missed an update.");
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_peak);
+}

From d82a164d0ddb5418de3b6a07dd302edddc347129 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 27 May 2020 14:31:00 -0700
Subject: [PATCH 1756/2608] Add thread.peak.[read|reset] mallctls.

These can be used to track net allocator activity on a per-thread basis.
---
 Makefile.in                                   |  1 +
 doc/jemalloc.xml.in                           | 36 ++++++++++
 include/jemalloc/internal/peak_event.h        | 24 +++++++
 include/jemalloc/internal/thread_event.h      | 10 +--
 include/jemalloc/internal/tsd.h               |  7 ++
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |  1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |  3 +
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |  1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |  3 +
 src/ctl.c                                     | 41 +++++++++++
 src/peak_event.c                              | 67 ++++++++++++++++++
 src/thread_event.c                            | 10 +++
 test/unit/mallctl.c                           | 70 ++++++++++++++++++-
 13 files changed, 269 insertions(+), 5 deletions(-)
 create mode 100644 include/jemalloc/internal/peak_event.h
 create mode 100644 src/peak_event.c

diff --git a/Makefile.in b/Makefile.in
index b211f889..2f3fea1e 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -129,6 +129,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/pa.c \
 	$(srcroot)src/pa_extra.c \
 	$(srcroot)src/pages.c \
+	$(srcroot)src/peak_event.c \
 	$(srcroot)src/prng.c \
 	$(srcroot)src/prof.c \
 	$(srcroot)src/prof_data.c \
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 1baf1f6a..5ab84568 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1621,6 +1621,42 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         should not be modified by the application.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="thread.peak.read">
+        <term>
+          <mallctl>thread.peak.read</mallctl>
+          (<type>uint64_t</type>)
+          <literal>r-</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Get an approximation of the maximum value of the
+        difference between the number of bytes allocated and the number of bytes
+        deallocated by the calling thread since the last call to <link
+        linkend="thread.peak.reset"><mallctl>thread.peak.reset</mallctl></link>,
+        or since the thread's creation if it has not called <link
+        linkend="thread.peak.reset"><mallctl>thread.peak.reset</mallctl></link>.
+        No guarantees are made about the quality of the approximation, but
+        jemalloc currently endeavors to maintain accuracy to within one hundred
+        kilobytes.
+        </para></listitem>
+      </varlistentry>
+
+      <varlistentry id="thread.peak.reset">
+        <term>
+          <mallctl>thread.peak.reset</mallctl>
+          (<type>void</type>)
+          <literal>--</literal>
+          [<option>--enable-stats</option>]
+        </term>
+        <listitem><para>Resets the counter for net bytes allocated in the calling
+        thread to zero. This affects subsequent calls to <link
+        linkend="thread.peak.read"><mallctl>thread.peak.read</mallctl></link>,
+        but not the values returned by <link
+        linkend="thread.allocated"><mallctl>thread.allocated</mallctl></link>
+        or <link
+        linkend="thread.deallocated"><mallctl>thread.deallocated</mallctl></link>.
+        </para></listitem>
+      </varlistentry>
+
       <varlistentry id="thread.tcache.enabled">
         <term>
           <mallctl>thread.tcache.enabled</mallctl>
diff --git a/include/jemalloc/internal/peak_event.h b/include/jemalloc/internal/peak_event.h
new file mode 100644
index 00000000..b808ce04
--- /dev/null
+++ b/include/jemalloc/internal/peak_event.h
@@ -0,0 +1,24 @@
+#ifndef JEMALLOC_INTERNAL_PEAK_EVENT_H
+#define JEMALLOC_INTERNAL_PEAK_EVENT_H
+
+/*
+ * While peak.h contains the simple helper struct that tracks state, this
+ * contains the allocator tie-ins (and knows about tsd, the event module, etc.).
+ */
+
+/* Update the peak with current tsd state. */
+void peak_event_update(tsd_t *tsd);
+/* Set current state to zero. */
+void peak_event_zero(tsd_t *tsd);
+uint64_t peak_event_max(tsd_t *tsd);
+
+/* Manual hooks. */
+/* The activity-triggered hooks. */
+uint64_t peak_alloc_new_event_wait(tsd_t *tsd);
+uint64_t peak_alloc_postponed_event_wait(tsd_t *tsd);
+void peak_alloc_event_handler(tsd_t *tsd, uint64_t elapsed);
+uint64_t peak_dalloc_new_event_wait(tsd_t *tsd);
+uint64_t peak_dalloc_postponed_event_wait(tsd_t *tsd);
+void peak_dalloc_event_handler(tsd_t *tsd, uint64_t elapsed);
+
+#endif /* JEMALLOC_INTERNAL_PEAK_EVENT_H */
diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index 2fcaa88a..bca8a447 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -53,10 +53,12 @@ void tsd_te_init(tsd_t *tsd);
  *  E(event,		(condition), is_alloc_event)
  */
 #define ITERATE_OVER_ALL_EVENTS						\
-    E(tcache_gc,	(opt_tcache_gc_incr_bytes > 0), true)		\
-    E(prof_sample,	(config_prof && opt_prof), true)	    	\
-    E(stats_interval,	(opt_stats_interval >= 0), true)	    	\
-    E(tcache_gc_dalloc,	(opt_tcache_gc_incr_bytes > 0), false)
+    E(tcache_gc,		(opt_tcache_gc_incr_bytes > 0), true)	\
+    E(prof_sample,		(config_prof && opt_prof), true)  	\
+    E(stats_interval,		(opt_stats_interval >= 0), true)   	\
+    E(tcache_gc_dalloc,		(opt_tcache_gc_incr_bytes > 0), false)	\
+    E(peak_alloc,		config_stats, true)			\
+    E(peak_dalloc,		config_stats, false)
 
 #define E(event, condition_unused, is_alloc_event_unused)		\
     C(event##_event_wait)
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 18bdb8fd..9408b2ca 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -5,6 +5,7 @@
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/bin_types.h"
 #include "jemalloc/internal/jemalloc_internal_externs.h"
+#include "jemalloc/internal/peak.h"
 #include "jemalloc/internal/prof_types.h"
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/rtree_tsd.h"
@@ -69,6 +70,8 @@ typedef ql_elm(tsd_t) tsd_link_t;
     O(prof_sample_last_event,	uint64_t,		uint64_t)	\
     O(stats_interval_event_wait,	uint64_t,	uint64_t)	\
     O(stats_interval_last_event,	uint64_t,	uint64_t)	\
+    O(peak_alloc_event_wait,	uint64_t,		uint64_t)	\
+    O(peak_dalloc_event_wait,	uint64_t,	uint64_t)		\
     O(prof_tdata,		prof_tdata_t *,		prof_tdata_t *)	\
     O(prng_state,		uint64_t,		uint64_t)	\
     O(iarena,			arena_t *,		arena_t *)	\
@@ -77,6 +80,7 @@ typedef ql_elm(tsd_t) tsd_link_t;
     O(binshards,		tsd_binshards_t,	tsd_binshards_t)\
     O(tsd_link,			tsd_link_t,		tsd_link_t)	\
     O(in_hook,			bool,			bool)		\
+    O(peak,			peak_t,			peak_t)		\
     O(tcache_slow,		tcache_slow_t,		tcache_slow_t)	\
     O(rtree_ctx,		rtree_ctx_t,		rtree_ctx_t)
 
@@ -95,6 +99,8 @@ typedef ql_elm(tsd_t) tsd_link_t;
     /* prof_sample_last_event */	0,				\
     /* stats_interval_event_wait */	0,				\
     /* stats_interval_last_event */	0,				\
+    /* peak_alloc_event_wait */		0,				\
+    /* peak_dalloc_event_wait */	0,				\
     /* prof_tdata */		NULL,					\
     /* prng_state */		0,					\
     /* iarena */		NULL,					\
@@ -103,6 +109,7 @@ typedef ql_elm(tsd_t) tsd_link_t;
     /* binshards */		TSD_BINSHARDS_ZERO_INITIALIZER,		\
     /* tsd_link */		{NULL},					\
     /* in_hook */		false,					\
+    /* peak */			PEAK_INITIALIZER,			\
     /* tcache_slow */		TCACHE_SLOW_ZERO_INITIALIZER,		\
     /* rtree_ctx */		RTREE_CTX_ZERO_INITIALIZER,
 
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 9f81e21d..d50fa884 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -70,6 +70,7 @@
     <ClCompile Include="..\..\..\..\src\pa.c" />
     <ClCompile Include="..\..\..\..\src\pa_extra.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
+    <ClCompile Include="..\..\..\..\src\peak_event.c" />
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 15fe7f08..94db8c0c 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -94,6 +94,9 @@
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\peak_event.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\prng.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index b5fccaed..337dcfe7 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -70,6 +70,7 @@
     <ClCompile Include="..\..\..\..\src\pa.c" />
     <ClCompile Include="..\..\..\..\src\pa_extra.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
+    <ClCompile Include="..\..\..\..\src\peak_event.c" />
     <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 15fe7f08..94db8c0c 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -94,6 +94,9 @@
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\peak_event.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\prng.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/ctl.c b/src/ctl.c
index be8be10f..0bd38feb 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -9,6 +9,7 @@
 #include "jemalloc/internal/inspect.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/nstime.h"
+#include "jemalloc/internal/peak_event.h"
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/util.h"
 
@@ -61,6 +62,8 @@ CTL_PROTO(background_thread)
 CTL_PROTO(max_background_threads)
 CTL_PROTO(thread_tcache_enabled)
 CTL_PROTO(thread_tcache_flush)
+CTL_PROTO(thread_peak_read)
+CTL_PROTO(thread_peak_reset)
 CTL_PROTO(thread_prof_name)
 CTL_PROTO(thread_prof_active)
 CTL_PROTO(thread_arena)
@@ -294,6 +297,11 @@ static const ctl_named_node_t	thread_tcache_node[] = {
 	{NAME("flush"),		CTL(thread_tcache_flush)}
 };
 
+static const ctl_named_node_t	thread_peak_node[] = {
+	{NAME("read"),		CTL(thread_peak_read)},
+	{NAME("reset"),		CTL(thread_peak_reset)},
+};
+
 static const ctl_named_node_t	thread_prof_node[] = {
 	{NAME("name"),		CTL(thread_prof_name)},
 	{NAME("active"),	CTL(thread_prof_active)}
@@ -306,6 +314,7 @@ static const ctl_named_node_t	thread_node[] = {
 	{NAME("deallocated"),	CTL(thread_deallocated)},
 	{NAME("deallocatedp"),	CTL(thread_deallocatedp)},
 	{NAME("tcache"),	CHILD(named, thread_tcache)},
+	{NAME("peak"),		CHILD(named, thread_peak)},
 	{NAME("prof"),		CHILD(named, thread_prof)},
 	{NAME("idle"),		CTL(thread_idle)}
 };
@@ -1953,6 +1962,38 @@ label_return:
 	return ret;
 }
 
+static int
+thread_peak_read_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
+    size_t newlen) {
+	int ret;
+	if (!config_stats) {
+		return ENOENT;
+	}
+	READONLY();
+	peak_event_update(tsd);
+	uint64_t result = peak_event_max(tsd);
+	READ(result, uint64_t);
+	ret = 0;
+label_return:
+	return ret;
+}
+
+static int
+thread_peak_reset_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
+    size_t newlen) {
+	int ret;
+	if (!config_stats) {
+		return ENOENT;
+	}
+	NEITHER_READ_NOR_WRITE();
+	peak_event_zero(tsd);
+	ret = 0;
+label_return:
+	return ret;
+}
+
 static int
 thread_prof_name_ctl(tsd_t *tsd, const size_t *mib,
     size_t miblen, void *oldp, size_t *oldlenp, void *newp,
diff --git a/src/peak_event.c b/src/peak_event.c
new file mode 100644
index 00000000..ffb061bf
--- /dev/null
+++ b/src/peak_event.c
@@ -0,0 +1,67 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/peak.h"
+#include "jemalloc/internal/peak_event.h"
+
+/*
+ * Update every 100k by default.  We're not exposing this as a configuration
+ * option for now; we don't want to bind ourselves too tightly to any particular
+ * performance requirements for small values, or guarantee that we'll even be
+ * able to provide fine-grained accuracy.
+ */
+#define PEAK_EVENT_WAIT (100 * 1024)
+
+/* Update the peak with current tsd state. */
+void
+peak_event_update(tsd_t *tsd) {
+	uint64_t alloc = tsd_thread_allocated_get(tsd);
+	uint64_t dalloc = tsd_thread_deallocated_get(tsd);
+	peak_t *peak = tsd_peakp_get(tsd);
+	peak_update(peak, alloc, dalloc);
+}
+
+/* Set current state to zero. */
+void
+peak_event_zero(tsd_t *tsd) {
+	uint64_t alloc = tsd_thread_allocated_get(tsd);
+	uint64_t dalloc = tsd_thread_deallocated_get(tsd);
+	peak_t *peak = tsd_peakp_get(tsd);
+	peak_set_zero(peak, alloc, dalloc);
+}
+
+uint64_t
+peak_event_max(tsd_t *tsd) {
+	peak_t *peak = tsd_peakp_get(tsd);
+	return peak_max(peak);
+}
+
+uint64_t
+peak_alloc_new_event_wait(tsd_t *tsd) {
+	return PEAK_EVENT_WAIT;
+}
+
+uint64_t
+peak_alloc_postponed_event_wait(tsd_t *tsd) {
+	return TE_MIN_START_WAIT;
+}
+
+void
+peak_alloc_event_handler(tsd_t *tsd, uint64_t elapsed) {
+	peak_event_update(tsd);
+}
+
+uint64_t
+peak_dalloc_new_event_wait(tsd_t *tsd) {
+	return PEAK_EVENT_WAIT;
+}
+
+uint64_t
+peak_dalloc_postponed_event_wait(tsd_t *tsd) {
+	return TE_MIN_START_WAIT;
+}
+
+void
+peak_dalloc_event_handler(tsd_t *tsd, uint64_t elapsed) {
+	peak_event_update(tsd);
+}
diff --git a/src/thread_event.c b/src/thread_event.c
index 40c0487e..99a188dd 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -60,6 +60,16 @@ stats_interval_fetch_elapsed(tsd_t *tsd) {
 	return last_event - last_stats_event;
 }
 
+static uint64_t
+peak_alloc_fetch_elapsed(tsd_t *tsd) {
+	return TE_INVALID_ELAPSED;
+}
+
+static uint64_t
+peak_dalloc_fetch_elapsed(tsd_t *tsd) {
+	return TE_INVALID_ELAPSED;
+}
+
 /* Per event facilities done. */
 
 static bool
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index cc1d5313..10d809fb 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -955,6 +955,73 @@ TEST_BEGIN(test_thread_idle) {
 }
 TEST_END
 
+TEST_BEGIN(test_thread_peak) {
+	test_skip_if(!config_stats);
+
+	/*
+	 * We don't commit to any stable amount of accuracy for peak tracking
+	 * (in practice, when this test was written, we made sure to be within
+	 * 100k).  But 10MB is big for more or less any definition of big.
+	 */
+	size_t big_size = 10 * 1024 * 1024;
+	size_t small_size = 256;
+
+	void *ptr;
+	int err;
+	size_t sz;
+	uint64_t peak;
+	sz = sizeof(uint64_t);
+
+	err = mallctl("thread.peak.reset", NULL, NULL, NULL, 0);
+	expect_d_eq(err, 0, "");
+	ptr = mallocx(SC_SMALL_MAXCLASS, 0);
+	err = mallctl("thread.peak.read", &peak, &sz, NULL, 0);
+	expect_d_eq(err, 0, "");
+	expect_u64_eq(peak, SC_SMALL_MAXCLASS, "Missed an update");
+	free(ptr);
+	err = mallctl("thread.peak.read", &peak, &sz, NULL, 0);
+	expect_d_eq(err, 0, "");
+	expect_u64_eq(peak, SC_SMALL_MAXCLASS, "Freeing changed peak");
+	ptr = mallocx(big_size, 0);
+	free(ptr);
+	/*
+	 * The peak should have hit big_size in the last two lines, even though
+	 * the net allocated bytes has since dropped back down to zero.  We
+	 * should have noticed the peak change without having down any mallctl
+	 * calls while net allocated bytes was high.
+	 */
+	err = mallctl("thread.peak.read", &peak, &sz, NULL, 0);
+	expect_d_eq(err, 0, "");
+	expect_u64_ge(peak, big_size, "Missed a peak change.");
+
+	/* Allocate big_size, but using small allocations. */
+	size_t nallocs = big_size / small_size;
+	void **ptrs = calloc(nallocs, sizeof(void *));
+	err = mallctl("thread.peak.reset", NULL, NULL, NULL, 0);
+	expect_d_eq(err, 0, "");
+	err = mallctl("thread.peak.read", &peak, &sz, NULL, 0);
+	expect_d_eq(err, 0, "");
+	expect_u64_eq(0, peak, "Missed a reset.");
+	for (size_t i = 0; i < nallocs; i++) {
+		ptrs[i] = mallocx(small_size, 0);
+	}
+	for (size_t i = 0; i < nallocs; i++) {
+		free(ptrs[i]);
+	}
+	err = mallctl("thread.peak.read", &peak, &sz, NULL, 0);
+	expect_d_eq(err, 0, "");
+	/*
+	 * We don't guarantee exactness; make sure we're within 10% of the peak,
+	 * though.
+	 */
+	expect_u64_ge(peak, nallocx(small_size, 0) * nallocs * 9 / 10,
+	    "Missed some peak changes.");
+	expect_u64_le(peak, nallocx(small_size, 0) * nallocs * 11 / 10,
+	    "Overcounted peak changes.");
+	free(ptrs);
+}
+TEST_END
+
 int
 main(void) {
 	return test(
@@ -987,5 +1054,6 @@ main(void) {
 	    test_stats_arenas,
 	    test_hooks,
 	    test_hooks_exhaustion,
-	    test_thread_idle);
+	    test_thread_idle,
+	    test_thread_peak);
 }

From 4aea7432795414a72034ef35959078c64c69078e Mon Sep 17 00:00:00 2001
From: Jon Haslam <jonhaslam@fb.com>
Date: Tue, 2 Jun 2020 06:42:44 -0700
Subject: [PATCH 1757/2608] High Resolution Timestamps for Profiling

---
 configure.ac                                  | 12 +++++
 .../internal/jemalloc_internal_defs.h.in      |  5 ++
 .../internal/jemalloc_internal_externs.h      |  1 +
 .../jemalloc/internal/jemalloc_preamble.h.in  |  7 +++
 include/jemalloc/internal/nstime.h            | 17 +++++-
 src/ctl.c                                     |  6 ++-
 src/jemalloc.c                                | 21 ++++++++
 src/large.c                                   |  2 +-
 src/nstime.c                                  | 54 ++++++++++++++++---
 src/prof_log.c                                |  7 ++-
 src/prof_recent.c                             |  2 +-
 test/unit/arena_decay.c                       |  3 +-
 test/unit/nstime.c                            | 25 +--------
 13 files changed, 123 insertions(+), 39 deletions(-)

diff --git a/configure.ac b/configure.ac
index 787ef1b0..d9fdebd7 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1776,6 +1776,18 @@ if test "x${je_cv_mach_absolute_time}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_MACH_ABSOLUTE_TIME])
 fi
 
+dnl check for CLOCK_REALTIME (always should be available on Linux)
+JE_COMPILABLE([clock_gettime(CLOCK_REALTIME, ...)], [
+#include <time.h>
+], [
+	struct timespec ts;
+
+	clock_gettime(CLOCK_REALTIME, &ts);
+], [je_cv_clock_realtime])
+if test "x${je_cv_clock_realtime}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_CLOCK_REALTIME])
+fi
+
 dnl Use syscall(2) (if available) by default.
 AC_ARG_ENABLE([syscall],
   [AS_HELP_STRING([--disable-syscall], [Disable use of syscall(2)])],
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index c442a219..83e733e3 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -100,6 +100,11 @@
  */
 #undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME
 
+/*
+ * Defined if clock_gettime(CLOCK_REALTIME, ...) is available.
+ */
+#undef JEMALLOC_HAVE_CLOCK_REALTIME
+
 /*
  * Defined if _malloc_thread_cleanup() exists.  At least in the case of
  * FreeBSD, pthread_key_create() allocates, which if used during malloc
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 338a590f..3dea1e21 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -3,6 +3,7 @@
 
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/tsd_types.h"
+#include "jemalloc/internal/nstime.h"
 
 /* TSD checks this to set thread local slow state accordingly. */
 extern bool malloc_slow;
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index 3418cbfa..66302ab3 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -209,5 +209,12 @@ static const bool have_background_thread =
     false
 #endif
     ;
+static const bool config_high_res_timer =
+#ifdef JEMALLOC_HAVE_CLOCK_REALTIME
+    true
+#else
+    false
+#endif
+    ;
 
 #endif /* JEMALLOC_PREAMBLE_H */
diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h
index c4bee24d..76e4351a 100644
--- a/include/jemalloc/internal/nstime.h
+++ b/include/jemalloc/internal/nstime.h
@@ -30,10 +30,23 @@ uint64_t nstime_divide(const nstime_t *time, const nstime_t *divisor);
 typedef bool (nstime_monotonic_t)(void);
 extern nstime_monotonic_t *JET_MUTABLE nstime_monotonic;
 
-typedef bool (nstime_update_t)(nstime_t *);
+typedef void (nstime_update_t)(nstime_t *);
 extern nstime_update_t *JET_MUTABLE nstime_update;
 
-bool nstime_init_update(nstime_t *time);
+typedef void (nstime_prof_update_t)(nstime_t *);
+extern nstime_prof_update_t *JET_MUTABLE nstime_prof_update;
+
+void nstime_init_update(nstime_t *time);
+void nstime_prof_init_update(nstime_t *time);
+
+enum prof_time_res_e {
+	prof_time_res_default = 0,
+	prof_time_res_high = 1
+};
+typedef enum prof_time_res_e prof_time_res_t;
+
+extern prof_time_res_t opt_prof_time_res;
+extern const char *prof_time_res_mode_names[];
 
 JEMALLOC_ALWAYS_INLINE void
 nstime_init_zero(nstime_t *time) {
diff --git a/src/ctl.c b/src/ctl.c
index 0bd38feb..24d9eb34 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -129,6 +129,7 @@ CTL_PROTO(opt_prof_leak)
 CTL_PROTO(opt_prof_accum)
 CTL_PROTO(opt_prof_recent_alloc_max)
 CTL_PROTO(opt_prof_experimental_use_sys_thread_name)
+CTL_PROTO(opt_prof_time_res)
 CTL_PROTO(opt_zero_realloc)
 CTL_PROTO(tcache_create)
 CTL_PROTO(tcache_flush)
@@ -385,7 +386,8 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("prof_recent_alloc_max"), CTL(opt_prof_recent_alloc_max)},
 	{NAME("prof_experimental_use_sys_thread_name"),
 	    CTL(opt_prof_experimental_use_sys_thread_name)},
-	{NAME("zero_realloc"),	CTL(opt_zero_realloc)}
+	{NAME("zero_realloc"),	CTL(opt_zero_realloc)},
+	{NAME("prof_time_resolution"),	CTL(opt_prof_time_res)}
 };
 
 static const ctl_named_node_t	tcache_node[] = {
@@ -1853,6 +1855,8 @@ CTL_RO_NL_CGEN(config_prof, opt_prof_recent_alloc_max,
     opt_prof_recent_alloc_max, ssize_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof_experimental_use_sys_thread_name,
     opt_prof_experimental_use_sys_thread_name, bool)
+CTL_RO_NL_CGEN(config_prof, opt_prof_time_res,
+    prof_time_res_mode_names[opt_prof_time_res], const char *)
 CTL_RO_NL_GEN(opt_zero_realloc,
     zero_realloc_mode_names[opt_zero_realloc_action], const char *)
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 74355d40..bb1b38cb 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -14,6 +14,7 @@
 #include "jemalloc/internal/log.h"
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/safety_check.h"
 #include "jemalloc/internal/sc.h"
@@ -1497,6 +1498,26 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				CONF_HANDLE_BOOL(
 				    opt_prof_experimental_use_sys_thread_name,
 				    "prof_experimental_use_sys_thread_name")
+				if (CONF_MATCH("prof_time_resolution")) {
+					if (CONF_MATCH_VALUE("default")) {
+						opt_prof_time_res =
+						    prof_time_res_default;
+					} else if (CONF_MATCH_VALUE("high")) {
+						if (!config_high_res_timer) {
+							CONF_ERROR(
+							    "No high resolution"
+							    " timer support",
+							    k, klen, v, vlen);
+						} else {
+							opt_prof_time_res =
+							    prof_time_res_high;
+						}
+					} else {
+						CONF_ERROR("Invalid conf value",
+						    k, klen, v, vlen);
+					}
+				}
+				CONF_CONTINUE;
 			}
 			if (config_log) {
 				if (CONF_MATCH("log")) {
diff --git a/src/large.c b/src/large.c
index b8439371..cc3e727c 100644
--- a/src/large.c
+++ b/src/large.c
@@ -305,7 +305,7 @@ large_prof_tctx_reset(edata_t *edata) {
 void
 large_prof_info_set(edata_t *edata, prof_tctx_t *tctx) {
 	nstime_t t;
-	nstime_init_update(&t);
+	nstime_prof_init_update(&t);
 	edata_prof_alloc_time_set(edata, &t);
 	edata_prof_recent_alloc_init(edata);
 	large_prof_tctx_set(edata, tctx);
diff --git a/src/nstime.c b/src/nstime.c
index eb8f6c0a..184aa4c9 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -152,25 +152,65 @@ nstime_monotonic_impl(void) {
 }
 nstime_monotonic_t *JET_MUTABLE nstime_monotonic = nstime_monotonic_impl;
 
-static bool
+prof_time_res_t opt_prof_time_res =
+	prof_time_res_default;
+
+const char *prof_time_res_mode_names[] = {
+	"default",
+	"high",
+};
+
+
+static void
+nstime_get_realtime(nstime_t *time) {
+#if defined(JEMALLOC_HAVE_CLOCK_REALTIME) && !defined(_WIN32)
+	struct timespec ts;
+
+	clock_gettime(CLOCK_REALTIME, &ts);
+	nstime_init2(time, ts.tv_sec, ts.tv_nsec);
+#else
+	unreachable();
+#endif
+}
+
+static void
+nstime_prof_update_impl(nstime_t *time) {
+	nstime_t old_time;
+
+	nstime_copy(&old_time, time);
+
+	if (opt_prof_time_res == prof_time_res_high) {
+		nstime_get_realtime(time);
+	} else {
+		nstime_get(time);
+	}
+}
+nstime_prof_update_t *JET_MUTABLE nstime_prof_update = nstime_prof_update_impl;
+
+static void
 nstime_update_impl(nstime_t *time) {
 	nstime_t old_time;
 
 	nstime_copy(&old_time, time);
-	nstime_get(time);
+  nstime_get(time);
 
 	/* Handle non-monotonic clocks. */
 	if (unlikely(nstime_compare(&old_time, time) > 0)) {
 		nstime_copy(time, &old_time);
-		return true;
 	}
-
-	return false;
 }
 nstime_update_t *JET_MUTABLE nstime_update = nstime_update_impl;
 
-bool
+void
 nstime_init_update(nstime_t *time) {
 	nstime_init_zero(time);
-	return nstime_update(time);
+	nstime_update(time);
 }
+
+void
+nstime_prof_init_update(nstime_t *time) {
+	nstime_init_zero(time);
+	nstime_prof_update(time);
+}
+
+
diff --git a/src/prof_log.c b/src/prof_log.c
index 1635979e..7fea8548 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -235,7 +235,7 @@ prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
 
 	nstime_t alloc_time = prof_info->alloc_time;
 	nstime_t free_time;
-	nstime_init_update(&free_time);
+	nstime_prof_init_update(&free_time);
 
 	size_t sz = sizeof(prof_alloc_node_t);
 	prof_alloc_node_t *new_node = (prof_alloc_node_t *)
@@ -572,6 +572,11 @@ prof_log_emit_metadata(emitter_t *emitter) {
 	emitter_json_kv(emitter, "lg_sample_rate",
 	    emitter_type_int, &lg_prof_sample);
 
+  const char *res_type =
+    prof_time_res_mode_names[opt_prof_time_res];
+  emitter_json_kv(emitter, "prof_time_resolution",
+      emitter_type_string, &res_type);
+
 	int pid = prof_getpid();
 	emitter_json_kv(emitter, "pid", emitter_type_int, &pid);
 
diff --git a/src/prof_recent.c b/src/prof_recent.c
index 949ae76b..270691ac 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -219,7 +219,7 @@ prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata) {
 		assert(nstime_equals_zero(&recent->dalloc_time));
 		assert(recent->dalloc_tctx == NULL);
 		if (dalloc_tctx != NULL) {
-			nstime_update(&recent->dalloc_time);
+			nstime_prof_update(&recent->dalloc_time);
 			recent->dalloc_tctx = dalloc_tctx;
 			dalloc_tctx = NULL;
 		}
diff --git a/test/unit/arena_decay.c b/test/unit/arena_decay.c
index 7ed270f4..86f7057d 100644
--- a/test/unit/arena_decay.c
+++ b/test/unit/arena_decay.c
@@ -26,13 +26,12 @@ nstime_monotonic_mock(void) {
 	return monotonic_mock;
 }
 
-static bool
+static void
 nstime_update_mock(nstime_t *time) {
 	nupdates_mock++;
 	if (monotonic_mock) {
 		nstime_copy(time, &time_mock);
 	}
-	return !monotonic_mock;
 }
 
 static unsigned
diff --git a/test/unit/nstime.c b/test/unit/nstime.c
index bf875017..083002bd 100644
--- a/test/unit/nstime.c
+++ b/test/unit/nstime.c
@@ -206,28 +206,6 @@ TEST_BEGIN(test_nstime_monotonic) {
 }
 TEST_END
 
-TEST_BEGIN(test_nstime_update) {
-	nstime_t nst;
-
-	expect_false(nstime_init_update(&nst), "Basic time update failed.");
-
-	/* Only Rip Van Winkle sleeps this long. */
-	{
-		nstime_t addend;
-		nstime_init2(&addend, 631152000, 0);
-		nstime_add(&nst, &addend);
-	}
-	{
-		nstime_t nst0;
-		nstime_copy(&nst0, &nst);
-		expect_true(nstime_update(&nst),
-		    "Update should detect time roll-back.");
-		expect_d_eq(nstime_compare(&nst, &nst0), 0,
-		    "Time should not have been modified");
-	}
-}
-TEST_END
-
 int
 main(void) {
 	return test(
@@ -242,6 +220,5 @@ main(void) {
 	    test_nstime_imultiply,
 	    test_nstime_idivide,
 	    test_nstime_divide,
-	    test_nstime_monotonic,
-	    test_nstime_update);
+	    test_nstime_monotonic);
 }

From 40672b0b78207f3b624bd20772b24865d208f215 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 12 Jun 2020 20:12:15 -0700
Subject: [PATCH 1758/2608] Remove duplicate logging in malloc.

---
 src/jemalloc.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index bb1b38cb..f18fa61b 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2437,7 +2437,11 @@ malloc_default(size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
-	LOG("core.malloc.entry", "size: %zu", size);
+	/*
+	 * This variant has logging hook on exit but not on entry.  It's callled
+	 * only by je_malloc, below, which emits the entry one for us (and, if
+	 * it calls us, does so only via tail call).
+	 */
 
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);

From dcfa6fd507d29e4d686abb5263a195c22d187ca0 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 12 Jun 2020 09:37:39 -0700
Subject: [PATCH 1759/2608] stress/sizes: Add a couple more types.

---
 test/stress/sizes.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/test/stress/sizes.c b/test/stress/sizes.c
index 7360494f..1bdfe160 100644
--- a/test/stress/sizes.c
+++ b/test/stress/sizes.c
@@ -43,6 +43,9 @@ main() {
 	P(prof_tctx_t);
 	P(prof_gctx_t);
 	P(prof_tdata_t);
+	P(rtree_t);
+	P(rtree_leaf_elm_t);
+	P(slab_data_t);
 	P(tcache_t);
 	P(tcache_slow_t);
 	P(tsd_t);

From 7e09a57b395dc88af218873fd7f47c99c0542f4f Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 12 Jun 2020 09:39:46 -0700
Subject: [PATCH 1760/2608] stress/sizes: Fix an off-by-one issue.

Algorithmically, a size greater than 1024 ZB could access one-past-the-end of
the sizes array.  This couldn't really happen since SIZE_MAX is less than 1024
ZB on all platforms we support (and we pick the arguments to this function to be
reasonable anyways), but it's not like there's any reason *not* to fix it,
either.
---
 test/stress/sizes.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/stress/sizes.c b/test/stress/sizes.c
index 1bdfe160..44c9de5e 100644
--- a/test/stress/sizes.c
+++ b/test/stress/sizes.c
@@ -17,7 +17,7 @@ do_print(const char *name, size_t sz_bytes) {
 
 	size_t ind = 0;
 	double sz = sz_bytes;
-	while (sz >= 1024 && ind < sizes_max) {
+	while (sz >= 1024 && ind < sizes_max - 1) {
 		sz /= 1024;
 		ind++;
 	}

From 40fa6674a99a1bac85a4cb0f5cf10ce0e4878a5e Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 17 Jun 2020 15:20:51 -0700
Subject: [PATCH 1761/2608] Fix prof timestamp conf reading

---
 src/jemalloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index f18fa61b..0d84a012 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1516,8 +1516,8 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 						CONF_ERROR("Invalid conf value",
 						    k, klen, v, vlen);
 					}
+					CONF_CONTINUE;
 				}
-				CONF_CONTINUE;
 			}
 			if (config_log) {
 				if (CONF_MATCH("log")) {

From b7858abfc0c605c451027c5f0209680b25ec8891 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 17 Jun 2020 09:57:54 -0700
Subject: [PATCH 1762/2608] Expose prof testing internal functions

---
 include/jemalloc/internal/prof_externs.h | 27 +++++++++++-------------
 include/jemalloc/internal/prof_log.h     |  4 ++--
 include/jemalloc/internal/prof_recent.h  |  8 +++----
 src/prof.c                               |  4 ----
 src/prof_data.c                          |  8 +++----
 src/prof_log.c                           | 19 ++++++-----------
 src/prof_recent.c                        | 24 ++++++++++-----------
 test/unit/prof_recent.c                  | 10 ++++-----
 8 files changed, 46 insertions(+), 58 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 3d9fcfb8..b4339659 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -57,21 +57,6 @@ void prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
     size_t usize, prof_tctx_t *tctx);
 void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info);
 prof_tctx_t *prof_tctx_create(tsd_t *tsd);
-#ifdef JEMALLOC_JET
-typedef int (prof_read_sys_thread_name_t)(char *buf, size_t limit);
-extern prof_read_sys_thread_name_t *JET_MUTABLE prof_read_sys_thread_name;
-size_t prof_tdata_count(void);
-size_t prof_bt_count(void);
-#endif
-typedef int (prof_dump_open_t)(bool, const char *);
-extern prof_dump_open_t *JET_MUTABLE prof_dump_open;
-
-typedef bool (prof_dump_header_t)(tsdn_t *, bool, const prof_cnt_t *);
-extern prof_dump_header_t *JET_MUTABLE prof_dump_header;
-#ifdef JEMALLOC_JET
-void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
-    uint64_t *accumbytes);
-#endif
 int prof_getpid(void);
 void prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind);
 void prof_idump(tsdn_t *tsdn);
@@ -104,6 +89,18 @@ uint64_t prof_sample_new_event_wait(tsd_t *tsd);
 uint64_t prof_sample_postponed_event_wait(tsd_t *tsd);
 void prof_sample_event_handler(tsd_t *tsd, uint64_t elapsed);
 
+/* Used by unit tests. */
+typedef int (prof_read_sys_thread_name_t)(char *buf, size_t limit);
+extern prof_read_sys_thread_name_t *JET_MUTABLE prof_read_sys_thread_name;
+size_t prof_tdata_count(void);
+size_t prof_bt_count(void);
+typedef int (prof_dump_open_t)(bool, const char *);
+extern prof_dump_open_t *JET_MUTABLE prof_dump_open;
+typedef bool (prof_dump_header_t)(tsdn_t *, bool, const prof_cnt_t *);
+extern prof_dump_header_t *JET_MUTABLE prof_dump_header;
+void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
+    uint64_t *accumbytes);
+
 bool prof_log_start(tsdn_t *tsdn, const char *filename);
 bool prof_log_stop(tsdn_t *tsdn);
 
diff --git a/include/jemalloc/internal/prof_log.h b/include/jemalloc/internal/prof_log.h
index c8cc5a3e..928bf27b 100644
--- a/include/jemalloc/internal/prof_log.h
+++ b/include/jemalloc/internal/prof_log.h
@@ -7,13 +7,13 @@ extern malloc_mutex_t log_mtx;
 
 void prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info);
 bool prof_log_init(tsd_t *tsdn);
-#ifdef JEMALLOC_JET
+
+/* Used in unit tests. */
 size_t prof_log_bt_count(void);
 size_t prof_log_alloc_count(void);
 size_t prof_log_thr_count(void);
 bool prof_log_is_logging(void);
 bool prof_log_rep_check(void);
 void prof_log_dummy_set(bool new_value);
-#endif
 
 #endif /* JEMALLOC_INTERNAL_PROF_LOG_EXTERNS_H */
diff --git a/include/jemalloc/internal/prof_recent.h b/include/jemalloc/internal/prof_recent.h
index f97273cb..f88413df 100644
--- a/include/jemalloc/internal/prof_recent.h
+++ b/include/jemalloc/internal/prof_recent.h
@@ -8,11 +8,11 @@ void prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size);
 void prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata);
 bool prof_recent_init();
 void edata_prof_recent_alloc_init(edata_t *edata);
-#ifdef JEMALLOC_JET
+
+/* Used in unit tests. */
 typedef ql_head(prof_recent_t) prof_recent_list_t;
 extern prof_recent_list_t prof_recent_alloc_list;
-edata_t *prof_recent_alloc_edata_get_no_lock(const prof_recent_t *node);
-prof_recent_t *edata_prof_recent_alloc_get_no_lock(const edata_t *edata);
-#endif
+edata_t *prof_recent_alloc_edata_get_no_lock_test(const prof_recent_t *node);
+prof_recent_t *edata_prof_recent_alloc_get_no_lock_test(const edata_t *edata);
 
 #endif /* JEMALLOC_INTERNAL_PROF_RECENT_EXTERNS_H */
diff --git a/src/prof.c b/src/prof.c
index 38a3db27..db895f85 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -205,12 +205,8 @@ prof_read_sys_thread_name_impl(char *buf, size_t limit) {
 	return ENOSYS;
 #endif
 }
-#ifdef JEMALLOC_JET
 prof_read_sys_thread_name_t *JET_MUTABLE prof_read_sys_thread_name =
     prof_read_sys_thread_name_impl;
-#else
-#define prof_read_sys_thread_name prof_read_sys_thread_name_impl
-#endif
 
 static void
 prof_fetch_sys_thread_name(tsd_t *tsd) {
diff --git a/src/prof_data.c b/src/prof_data.c
index 66ed36a0..fe9ef154 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -421,7 +421,7 @@ prof_tctx_create(tsd_t *tsd) {
 	return prof_lookup(tsd, &bt);
 }
 
-#ifdef JEMALLOC_JET
+/* Used in unit tests. */
 static prof_tdata_t *
 prof_tdata_count_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
     void *arg) {
@@ -432,6 +432,7 @@ prof_tdata_count_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
 	return NULL;
 }
 
+/* Used in unit tests. */
 size_t
 prof_tdata_count(void) {
 	size_t tdata_count = 0;
@@ -446,6 +447,7 @@ prof_tdata_count(void) {
 	return tdata_count;
 }
 
+/* Used in unit tests. */
 size_t
 prof_bt_count(void) {
 	size_t bt_count;
@@ -464,7 +466,6 @@ prof_bt_count(void) {
 
 	return bt_count;
 }
-#endif
 
 static int
 prof_dump_open_impl(bool propagate_err, const char *filename) {
@@ -1174,7 +1175,7 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
 	return false;
 }
 
-#ifdef JEMALLOC_JET
+/* Used in unit tests. */
 void
 prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
     uint64_t *accumbytes) {
@@ -1219,7 +1220,6 @@ prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
 		*accumbytes = prof_tdata_merge_iter_arg.cnt_all.accumbytes;
 	}
 }
-#endif
 
 void
 prof_bt_hash(const void *key, size_t r_hash[2]) {
diff --git a/src/prof_log.c b/src/prof_log.c
index 7fea8548..00c76596 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -27,9 +27,8 @@ enum prof_logging_state_e {
  */
 prof_logging_state_t prof_logging_state = prof_logging_state_stopped;
 
-#ifdef JEMALLOC_JET
+/* Used in unit tests. */
 static bool prof_log_dummy = false;
-#endif
 
 /* Incremented for every log file that is output. */
 static uint64_t log_seq = 0;
@@ -305,7 +304,7 @@ prof_thr_node_keycomp(const void *k1, const void *k2) {
 	return thr_node1->thr_uid == thr_node2->thr_uid;
 }
 
-#ifdef JEMALLOC_JET
+/* Used in unit tests. */
 size_t
 prof_log_bt_count(void) {
 	size_t cnt = 0;
@@ -317,6 +316,7 @@ prof_log_bt_count(void) {
 	return cnt;
 }
 
+/* Used in unit tests. */
 size_t
 prof_log_alloc_count(void) {
 	size_t cnt = 0;
@@ -328,6 +328,7 @@ prof_log_alloc_count(void) {
 	return cnt;
 }
 
+/* Used in unit tests. */
 size_t
 prof_log_thr_count(void) {
 	size_t cnt = 0;
@@ -339,11 +340,13 @@ prof_log_thr_count(void) {
 	return cnt;
 }
 
+/* Used in unit tests. */
 bool
 prof_log_is_logging(void) {
 	return prof_logging_state == prof_logging_state_started;
 }
 
+/* Used in unit tests. */
 bool
 prof_log_rep_check(void) {
 	if (prof_logging_state == prof_logging_state_stopped
@@ -395,11 +398,11 @@ prof_log_rep_check(void) {
 	return false;
 }
 
+/* Used in unit tests. */
 void
 prof_log_dummy_set(bool new_value) {
 	prof_log_dummy = new_value;
 }
-#endif
 
 bool
 prof_log_start(tsdn_t *tsdn, const char *filename) {
@@ -451,11 +454,9 @@ prof_emitter_write_cb(void *opaque, const char *to_write) {
 	struct prof_emitter_cb_arg_s *arg =
 	    (struct prof_emitter_cb_arg_s *)opaque;
 	size_t bytes = strlen(to_write);
-#ifdef JEMALLOC_JET
 	if (prof_log_dummy) {
 		return;
 	}
-#endif
 	arg->ret = malloc_write_fd(arg->fd, to_write, bytes);
 }
 
@@ -612,15 +613,11 @@ prof_log_stop(tsdn_t *tsdn) {
 	/* Create a file. */
 
 	int fd;
-#ifdef JEMALLOC_JET
 	if (prof_log_dummy) {
 		fd = 0;
 	} else {
 		fd = creat(log_filename, 0644);
 	}
-#else
-	fd = creat(log_filename, 0644);
-#endif
 
 	if (fd == -1) {
 		malloc_printf("<jemalloc>: creat() for log file \"%s\" "
@@ -668,11 +665,9 @@ prof_log_stop(tsdn_t *tsdn) {
 	prof_logging_state = prof_logging_state_stopped;
 	malloc_mutex_unlock(tsdn, &log_mtx);
 
-#ifdef JEMALLOC_JET
 	if (prof_log_dummy) {
 		return false;
 	}
-#endif
 	return close(fd) || arg.ret == -1;
 }
 #undef PROF_LOG_STOP_BUFSIZE
diff --git a/src/prof_recent.c b/src/prof_recent.c
index 270691ac..9af753f6 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -12,10 +12,6 @@ ssize_t opt_prof_recent_alloc_max = PROF_RECENT_ALLOC_MAX_DEFAULT;
 malloc_mutex_t prof_recent_alloc_mtx; /* Protects the fields below */
 static atomic_zd_t prof_recent_alloc_max;
 static ssize_t prof_recent_alloc_count = 0;
-#ifndef JEMALLOC_JET
-typedef ql_head(prof_recent_t) prof_recent_list_t;
-static
-#endif
 prof_recent_list_t prof_recent_alloc_list;
 
 malloc_mutex_t prof_recent_dump_mtx; /* Protects dumping. */
@@ -104,14 +100,16 @@ decrement_recent_count(tsd_t *tsd, prof_tctx_t *tctx) {
 	prof_tctx_try_destroy(tsd, tctx);
 }
 
-#ifndef JEMALLOC_JET
-static inline
-#endif
-edata_t *
+static inline edata_t *
 prof_recent_alloc_edata_get_no_lock(const prof_recent_t *n) {
 	return (edata_t *)atomic_load_p(&n->alloc_edata, ATOMIC_ACQUIRE);
 }
 
+edata_t *
+prof_recent_alloc_edata_get_no_lock_test(const prof_recent_t *n) {
+	return prof_recent_alloc_edata_get_no_lock(n);
+}
+
 static inline edata_t *
 prof_recent_alloc_edata_get(tsd_t *tsd, const prof_recent_t *n) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
@@ -129,14 +127,16 @@ edata_prof_recent_alloc_init(edata_t *edata) {
 	edata_prof_recent_alloc_set_dont_call_directly(edata, NULL);
 }
 
-#ifndef JEMALLOC_JET
-static inline
-#endif
-prof_recent_t *
+static inline prof_recent_t *
 edata_prof_recent_alloc_get_no_lock(const edata_t *edata) {
 	return edata_prof_recent_alloc_get_dont_call_directly(edata);
 }
 
+prof_recent_t *
+edata_prof_recent_alloc_get_no_lock_test(const edata_t *edata) {
+	return edata_prof_recent_alloc_get_no_lock(edata);
+}
+
 static inline prof_recent_t *
 edata_prof_recent_alloc_get(tsd_t *tsd, const edata_t *edata) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index 791cc4f2..1885a1ac 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -103,11 +103,11 @@ confirm_malloc(void *p) {
 	assert_ptr_not_null(p, "malloc failed unexpectedly");
 	edata_t *e = emap_edata_lookup(TSDN_NULL, &arena_emap_global, p);
 	assert_ptr_not_null(e, "NULL edata for living pointer");
-	prof_recent_t *n = edata_prof_recent_alloc_get_no_lock(e);
+	prof_recent_t *n = edata_prof_recent_alloc_get_no_lock_test(e);
 	assert_ptr_not_null(n, "Record in edata should not be NULL");
 	expect_ptr_not_null(n->alloc_tctx,
 	    "alloc_tctx in record should not be NULL");
-	expect_ptr_eq(e, prof_recent_alloc_edata_get_no_lock(n),
+	expect_ptr_eq(e, prof_recent_alloc_edata_get_no_lock_test(n),
 	    "edata pointer in record is not correct");
 	expect_ptr_null(n->dalloc_tctx, "dalloc_tctx in record should be NULL");
 }
@@ -122,10 +122,10 @@ static void
 confirm_record_living(prof_recent_t *n) {
 	expect_ptr_not_null(n->alloc_tctx,
 	    "alloc_tctx in record should not be NULL");
-	edata_t *edata = prof_recent_alloc_edata_get_no_lock(n);
+	edata_t *edata = prof_recent_alloc_edata_get_no_lock_test(n);
 	assert_ptr_not_null(edata,
 	    "Recorded edata should not be NULL for living pointer");
-	expect_ptr_eq(n, edata_prof_recent_alloc_get_no_lock(edata),
+	expect_ptr_eq(n, edata_prof_recent_alloc_get_no_lock_test(edata),
 	    "Record in edata is not correct");
 	expect_ptr_null(n->dalloc_tctx, "dalloc_tctx in record should be NULL");
 }
@@ -134,7 +134,7 @@ static void
 confirm_record_released(prof_recent_t *n) {
 	expect_ptr_not_null(n->alloc_tctx,
 	    "alloc_tctx in record should not be NULL");
-	expect_ptr_null(prof_recent_alloc_edata_get_no_lock(n),
+	expect_ptr_null(prof_recent_alloc_edata_get_no_lock_test(n),
 	    "Recorded edata should be NULL for released pointer");
 	expect_ptr_not_null(n->dalloc_tctx,
 	    "dalloc_tctx in record should not be NULL for released pointer");

From 2a84f9b8fcf2ff8d87f0f3246b4b6d897520b240 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 13 May 2020 11:16:07 -0700
Subject: [PATCH 1763/2608] Unify alignment flag reading and computation

---
 src/jemalloc.c | 98 +++++++++++++++++++++++++-------------------------
 1 file changed, 48 insertions(+), 50 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 0d84a012..3d5d7443 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2074,6 +2074,37 @@ dynamic_opts_init(dynamic_opts_t *dynamic_opts) {
 	dynamic_opts->arena_ind = ARENA_IND_AUTOMATIC;
 }
 
+/*
+ * ind parameter is optional and is only checked and filled if alignment == 0;
+ * return true if result is out of range.
+ */
+JEMALLOC_ALWAYS_INLINE bool
+aligned_usize_get(size_t size, size_t alignment, size_t *usize, szind_t *ind,
+    bool bump_empty_aligned_alloc) {
+	assert(usize != NULL);
+	if (alignment == 0) {
+		if (ind != NULL) {
+			*ind = sz_size2index(size);
+			if (unlikely(*ind >= SC_NSIZES)) {
+				return true;
+			}
+			*usize = sz_index2size(*ind);
+			assert(*usize > 0 && *usize <= SC_LARGE_MAXCLASS);
+			return false;
+		}
+		*usize = sz_s2u(size);
+	} else {
+		if (bump_empty_aligned_alloc && unlikely(size == 0)) {
+			size = 1;
+		}
+		*usize = sz_sa2u(size, alignment);
+	}
+	if (unlikely(*usize == 0 || *usize > SC_LARGE_MAXCLASS)) {
+		return true;
+	}
+	return false;
+}
+
 /* ind is ignored if dopts->alignment > 0. */
 JEMALLOC_ALWAYS_INLINE void *
 imalloc_no_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
@@ -2227,26 +2258,11 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 	if (config_fill && sopts->slow && opt_zero) {
 		dopts->zero = true;
 	}
-	if (dopts->alignment == 0) {
-		ind = sz_size2index(size);
-		if (unlikely(ind >= SC_NSIZES)) {
-			goto label_oom;
-		}
-		usize = sz_index2size(ind);
-		assert(usize > 0 && usize <= SC_LARGE_MAXCLASS);
-		dopts->usize = usize;
-	} else {
-		if (sopts->bump_empty_aligned_alloc) {
-			if (unlikely(size == 0)) {
-				size = 1;
-			}
-		}
-		usize = sz_sa2u(size, dopts->alignment);
-		dopts->usize = usize;
-		if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
-			goto label_oom;
-		}
+	if (aligned_usize_get(size, dopts->alignment, &usize, &ind,
+	    sopts->bump_empty_aligned_alloc)) {
+		goto label_oom;
 	}
+	dopts->usize = usize;
 	/* Validate the user input. */
 	if (sopts->assert_nonempty_alloc) {
 		assert (size != 0);
@@ -3109,9 +3125,7 @@ JEMALLOC_SMALLOCX_CONCAT_HELPER2(je_smallocx_, JEMALLOC_VERSION_GID_IDENT)
 	dopts.num_items = 1;
 	dopts.item_size = size;
 	if (unlikely(flags != 0)) {
-		if ((flags & MALLOCX_LG_ALIGN_MASK) != 0) {
-			dopts.alignment = MALLOCX_ALIGN_GET_SPECIFIED(flags);
-		}
+		dopts.alignment = MALLOCX_ALIGN_GET(flags);
 
 		dopts.zero = MALLOCX_ZERO_GET(flags);
 
@@ -3162,9 +3176,7 @@ je_mallocx(size_t size, int flags) {
 	dopts.num_items = 1;
 	dopts.item_size = size;
 	if (unlikely(flags != 0)) {
-		if ((flags & MALLOCX_LG_ALIGN_MASK) != 0) {
-			dopts.alignment = MALLOCX_ALIGN_GET_SPECIFIED(flags);
-		}
+		dopts.alignment = MALLOCX_ALIGN_GET(flags);
 
 		dopts.zero = MALLOCX_ZERO_GET(flags);
 
@@ -3316,9 +3328,7 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 	hook_ralloc_args_t hook_args = {is_realloc, {(uintptr_t)ptr, size,
 		flags, 0}};
 	if (config_prof && opt_prof) {
-		usize = (alignment == 0) ?
-		    sz_s2u(size) : sz_sa2u(size, alignment);
-		if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
+		if (aligned_usize_get(size, alignment, &usize, NULL, false)) {
 			goto label_oom;
 		}
 		p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize,
@@ -3501,22 +3511,14 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 	 * prof_realloc() will use the actual usize to decide whether to sample.
 	 */
 	size_t usize_max;
-	if (alignment == 0) {
-		usize_max = sz_s2u(size+extra);
-		assert(usize_max > 0
-		    && usize_max <= SC_LARGE_MAXCLASS);
-	} else {
-		usize_max = sz_sa2u(size+extra, alignment);
-		if (unlikely(usize_max == 0
-		    || usize_max > SC_LARGE_MAXCLASS)) {
-			/*
-			 * usize_max is out of range, and chances are that
-			 * allocation will fail, but use the maximum possible
-			 * value and carry on with prof_alloc_prep(), just in
-			 * case allocation succeeds.
-			 */
-			usize_max = SC_LARGE_MAXCLASS;
-		}
+	if (aligned_usize_get(size + extra, alignment, &usize_max, NULL,
+	    false)) {
+		/*
+		 * usize_max is out of range, and chances are that allocation
+		 * will fail, but use the maximum possible value and carry on
+		 * with prof_alloc_prep(), just in case allocation succeeds.
+		 */
+		usize_max = SC_LARGE_MAXCLASS;
 	}
 	bool prof_active = prof_active_get_unlocked();
 	bool sample_event = te_prof_sample_event_lookahead(tsd, usize_max);
@@ -3726,13 +3728,9 @@ je_dallocx(void *ptr, int flags) {
 JEMALLOC_ALWAYS_INLINE size_t
 inallocx(tsdn_t *tsdn, size_t size, int flags) {
 	check_entry_exit_locking(tsdn);
-
 	size_t usize;
-	if (likely((flags & MALLOCX_LG_ALIGN_MASK) == 0)) {
-		usize = sz_s2u(size);
-	} else {
-		usize = sz_sa2u(size, MALLOCX_ALIGN_GET_SPECIFIED(flags));
-	}
+	/* In case of out of range, let the user see it rather than fail. */
+	aligned_usize_get(size, MALLOCX_ALIGN_GET(flags), &usize, NULL, false);
 	check_entry_exit_locking(tsdn);
 	return usize;
 }

From 4b0c008489020bd9d66c21e1452fe8324d11b3f0 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 13 May 2020 11:19:09 -0700
Subject: [PATCH 1764/2608] Unify zero flag reading and setting

---
 src/jemalloc.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 3d5d7443..afd48900 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2105,6 +2105,15 @@ aligned_usize_get(size_t size, size_t alignment, size_t *usize, szind_t *ind,
 	return false;
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+zero_get(bool guarantee, bool slow) {
+	if (config_fill && slow && unlikely(opt_zero)) {
+		return true;
+	} else {
+		return guarantee;
+	}
+}
+
 /* ind is ignored if dopts->alignment > 0. */
 JEMALLOC_ALWAYS_INLINE void *
 imalloc_no_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
@@ -2255,9 +2264,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 	}
 
 	/* This is the beginning of the "core" algorithm. */
-	if (config_fill && sopts->slow && opt_zero) {
-		dopts->zero = true;
-	}
+	dopts->zero = zero_get(dopts->zero, sopts->slow);
 	if (aligned_usize_get(size, dopts->alignment, &usize, &ind,
 	    sopts->bump_empty_aligned_alloc)) {
 		goto label_oom;
@@ -3293,10 +3300,7 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 	tsd = tsd_fetch();
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	bool zero = flags & MALLOCX_ZERO;
-	if (config_fill && unlikely(opt_zero)) {
-		zero = true;
-	}
+	bool zero = zero_get(MALLOCX_ZERO_GET(flags), /* slow */ true);
 
 	if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) {
 		unsigned arena_ind = MALLOCX_ARENA_GET(flags);
@@ -3562,11 +3566,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	tsd_t *tsd;
 	size_t usize, old_usize;
 	size_t alignment = MALLOCX_ALIGN_GET(flags);
-
-	bool zero = flags & MALLOCX_ZERO;
-	if (config_fill && unlikely(opt_zero)) {
-		zero = true;
-	}
+	bool zero = zero_get(MALLOCX_ZERO_GET(flags), /* slow */ true);
 
 	LOG("core.xallocx.entry", "ptr: %p, size: %zu, extra: %zu, "
 	    "flags: %d", ptr, size, extra, flags);

From 95a59d2f72f4799b1d7aa07216c558408a91917a Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 13 May 2020 14:06:43 -0700
Subject: [PATCH 1765/2608] Unify tcache flag reading and selection

---
 src/jemalloc.c | 179 +++++++++++++++++++------------------------------
 1 file changed, 70 insertions(+), 109 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index afd48900..aacec7b6 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2114,27 +2114,45 @@ zero_get(bool guarantee, bool slow) {
 	}
 }
 
+JEMALLOC_ALWAYS_INLINE tcache_t *
+tcache_get_from_ind(tsd_t *tsd, unsigned tcache_ind, bool slow, bool is_alloc) {
+	tcache_t *tcache;
+	if (tcache_ind == TCACHE_IND_AUTOMATIC) {
+		if (likely(!slow)) {
+			/* Getting tcache ptr unconditionally. */
+			tcache = tsd_tcachep_get(tsd);
+			assert(tcache == tcache_get(tsd));
+		} else if (is_alloc ||
+		    likely(tsd_reentrancy_level_get(tsd) == 0)) {
+			tcache = tcache_get(tsd);
+		} else {
+			tcache = NULL;
+		}
+	} else {
+		/*
+		 * Should not specify tcache on deallocation path when being
+		 * reentrant.
+		 */
+		assert(is_alloc || tsd_reentrancy_level_get(tsd) == 0 ||
+		    tsd_state_nocleanup(tsd));
+		if (tcache_ind == TCACHE_IND_NONE) {
+			tcache = NULL;
+		} else {
+			tcache = tcaches_get(tsd, tcache_ind);
+		}
+	}
+	return tcache;
+}
+
 /* ind is ignored if dopts->alignment > 0. */
 JEMALLOC_ALWAYS_INLINE void *
 imalloc_no_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
     size_t size, size_t usize, szind_t ind) {
-	tcache_t *tcache;
 	arena_t *arena;
 
 	/* Fill in the tcache. */
-	if (dopts->tcache_ind == TCACHE_IND_AUTOMATIC) {
-		if (likely(!sopts->slow)) {
-			/* Getting tcache ptr unconditionally. */
-			tcache = tsd_tcachep_get(tsd);
-			assert(tcache == tcache_get(tsd));
-		} else {
-			tcache = tcache_get(tsd);
-		}
-	} else if (dopts->tcache_ind == TCACHE_IND_NONE) {
-		tcache = NULL;
-	} else {
-		tcache = tcaches_get(tsd, dopts->tcache_ind);
-	}
+	tcache_t *tcache = tcache_get_from_ind(tsd, dopts->tcache_ind,
+	    sopts->slow, /* is_alloc */ true);
 
 	/* Fill in the arena. */
 	if (dopts->arena_ind == ARENA_IND_AUTOMATIC) {
@@ -2579,7 +2597,8 @@ je_malloc(size_t size) {
 	}
 	assert(tsd_fast(tsd));
 
-	tcache_t *tcache = tsd_tcachep_get(tsd);
+	tcache_t *tcache = tcache_get_from_ind(tsd, TCACHE_IND_AUTOMATIC,
+	    /* slow */ false, /* is_alloc */ true);
 	cache_bin_t *bin = &tcache->bins[ind];
 	bool tcache_success;
 	void *ret;
@@ -2839,22 +2858,20 @@ free_default(void *ptr) {
 		tsd_t *tsd = tsd_fetch_min();
 		check_entry_exit_locking(tsd_tsdn(tsd));
 
-		tcache_t *tcache;
 		if (likely(tsd_fast(tsd))) {
-			tsd_assert_fast(tsd);
-			/* Unconditionally get tcache ptr on fast path. */
-			tcache = tsd_tcachep_get(tsd);
-			ifree(tsd, ptr, tcache, false);
+			tcache_t *tcache = tcache_get_from_ind(tsd,
+			    TCACHE_IND_AUTOMATIC, /* slow */ false,
+			    /* is_alloc */ false);
+			ifree(tsd, ptr, tcache, /* slow */ false);
 		} else {
-			if (likely(tsd_reentrancy_level_get(tsd) == 0)) {
-				tcache = tcache_get(tsd);
-			} else {
-				tcache = NULL;
-			}
+			tcache_t *tcache = tcache_get_from_ind(tsd,
+			    TCACHE_IND_AUTOMATIC, /* slow */ true,
+			    /* is_alloc */ false);
 			uintptr_t args_raw[3] = {(uintptr_t)ptr};
 			hook_invoke_dalloc(hook_dalloc_free, ptr, args_raw);
-			ifree(tsd, ptr, tcache, true);
+			ifree(tsd, ptr, tcache, /* slow */ true);
 		}
+
 		check_entry_exit_locking(tsd_tsdn(tsd));
 	}
 }
@@ -2912,7 +2929,8 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 		return false;
 	}
 
-	tcache_t *tcache = tsd_tcachep_get(tsd);
+	tcache_t *tcache = tcache_get_from_ind(tsd, TCACHE_IND_AUTOMATIC,
+	    /* slow */ false, /* is_alloc */ false);
 	cache_bin_t *bin = &tcache->bins[alloc_ctx.szind];
 
 	/*
@@ -3088,6 +3106,17 @@ int __posix_memalign(void** r, size_t a, size_t s) PREALIAS(je_posix_memalign);
  * Begin non-standard functions.
  */
 
+JEMALLOC_ALWAYS_INLINE unsigned
+mallocx_tcache_get(int flags) {
+	if (likely((flags & MALLOCX_TCACHE_MASK) == 0)) {
+		return TCACHE_IND_AUTOMATIC;
+	} else if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) {
+		return TCACHE_IND_NONE;
+	} else {
+		return MALLOCX_TCACHE_GET(flags);
+	}
+}
+
 #ifdef JEMALLOC_EXPERIMENTAL_SMALLOCX_API
 
 #define JEMALLOC_SMALLOCX_CONCAT_HELPER(x, y) x ## y
@@ -3136,16 +3165,7 @@ JEMALLOC_SMALLOCX_CONCAT_HELPER2(je_smallocx_, JEMALLOC_VERSION_GID_IDENT)
 
 		dopts.zero = MALLOCX_ZERO_GET(flags);
 
-		if ((flags & MALLOCX_TCACHE_MASK) != 0) {
-			if ((flags & MALLOCX_TCACHE_MASK)
-			    == MALLOCX_TCACHE_NONE) {
-				dopts.tcache_ind = TCACHE_IND_NONE;
-			} else {
-				dopts.tcache_ind = MALLOCX_TCACHE_GET(flags);
-			}
-		} else {
-			dopts.tcache_ind = TCACHE_IND_AUTOMATIC;
-		}
+		dopts.tcache_ind = mallocx_tcache_get(flags);
 
 		if ((flags & MALLOCX_ARENA_MASK) != 0)
 			dopts.arena_ind = MALLOCX_ARENA_GET(flags);
@@ -3187,16 +3207,7 @@ je_mallocx(size_t size, int flags) {
 
 		dopts.zero = MALLOCX_ZERO_GET(flags);
 
-		if ((flags & MALLOCX_TCACHE_MASK) != 0) {
-			if ((flags & MALLOCX_TCACHE_MASK)
-			    == MALLOCX_TCACHE_NONE) {
-				dopts.tcache_ind = TCACHE_IND_NONE;
-			} else {
-				dopts.tcache_ind = MALLOCX_TCACHE_GET(flags);
-			}
-		} else {
-			dopts.tcache_ind = TCACHE_IND_AUTOMATIC;
-		}
+		dopts.tcache_ind = mallocx_tcache_get(flags);
 
 		if ((flags & MALLOCX_ARENA_MASK) != 0)
 			dopts.arena_ind = MALLOCX_ARENA_GET(flags);
@@ -3292,7 +3303,6 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 	size_t old_usize;
 	size_t alignment = MALLOCX_ALIGN_GET(flags);
 	arena_t *arena;
-	tcache_t *tcache;
 
 	assert(ptr != NULL);
 	assert(size != 0);
@@ -3312,15 +3322,9 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 		arena = NULL;
 	}
 
-	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
-		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) {
-			tcache = NULL;
-		} else {
-			tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags));
-		}
-	} else {
-		tcache = tcache_get(tsd);
-	}
+	unsigned tcache_ind = mallocx_tcache_get(flags);
+	tcache_t *tcache = tcache_get_from_ind(tsd, tcache_ind,
+	    /* slow */ true, /* is_alloc */ true);
 
 	emap_alloc_ctx_t alloc_ctx;
 	emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
@@ -3400,19 +3404,14 @@ do_realloc_nonnull_zero(void *ptr) {
 		return do_rallocx(ptr, 1, MALLOCX_TCACHE_NONE, true);
 	} else if (opt_zero_realloc_action == zero_realloc_action_free) {
 		UTRACE(ptr, 0, 0);
-		tcache_t *tcache;
 		tsd_t *tsd = tsd_fetch();
 		check_entry_exit_locking(tsd_tsdn(tsd));
 
-		if (tsd_reentrancy_level_get(tsd) == 0) {
-			tcache = tcache_get(tsd);
-		} else {
-			tcache = NULL;
-		}
-
+		tcache_t *tcache = tcache_get_from_ind(tsd,
+		    TCACHE_IND_AUTOMATIC, /* slow */ true,
+		    /* is_alloc */ false);
 		uintptr_t args[3] = {(uintptr_t)ptr, 0};
 		hook_invoke_dalloc(hook_dalloc_realloc, ptr, args);
-
 		ifree(tsd, ptr, tcache, true);
 
 		check_entry_exit_locking(tsd_tsdn(tsd));
@@ -3688,28 +3687,9 @@ je_dallocx(void *ptr, int flags) {
 	bool fast = tsd_fast(tsd);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	tcache_t *tcache;
-	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
-		/* Not allowed to be reentrant and specify a custom tcache. */
-		assert(tsd_reentrancy_level_get(tsd) == 0 ||
-		    tsd_state_nocleanup(tsd));
-		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) {
-			tcache = NULL;
-		} else {
-			tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags));
-		}
-	} else {
-		if (likely(fast)) {
-			tcache = tsd_tcachep_get(tsd);
-			assert(tcache == tcache_get(tsd));
-		} else {
-			if (likely(tsd_reentrancy_level_get(tsd) == 0)) {
-				tcache = tcache_get(tsd);
-			}  else {
-				tcache = NULL;
-			}
-		}
-	}
+	unsigned tcache_ind = mallocx_tcache_get(flags);
+	tcache_t *tcache = tcache_get_from_ind(tsd, tcache_ind, !fast,
+	    /* is_alloc */ false);
 
 	UTRACE(ptr, 0, 0);
 	if (likely(fast)) {
@@ -3746,28 +3726,9 @@ sdallocx_default(void *ptr, size_t size, int flags) {
 	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	tcache_t *tcache;
-	if (unlikely((flags & MALLOCX_TCACHE_MASK) != 0)) {
-		/* Not allowed to be reentrant and specify a custom tcache. */
-		assert(tsd_reentrancy_level_get(tsd) == 0 ||
-		    tsd_state_nocleanup(tsd));
-		if ((flags & MALLOCX_TCACHE_MASK) == MALLOCX_TCACHE_NONE) {
-			tcache = NULL;
-		} else {
-			tcache = tcaches_get(tsd, MALLOCX_TCACHE_GET(flags));
-		}
-	} else {
-		if (likely(fast)) {
-			tcache = tsd_tcachep_get(tsd);
-			assert(tcache == tcache_get(tsd));
-		} else {
-			if (likely(tsd_reentrancy_level_get(tsd) == 0)) {
-				tcache = tcache_get(tsd);
-			} else {
-				tcache = NULL;
-			}
-		}
-	}
+	unsigned tcache_ind = mallocx_tcache_get(flags);
+	tcache_t *tcache = tcache_get_from_ind(tsd, tcache_ind, !fast,
+	    /* is_alloc */ false);
 
 	UTRACE(ptr, 0, 0);
 	if (likely(fast)) {

From e128b170a0b884aa34ca7fe3f61e89fc54fce918 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 13 May 2020 14:17:54 -0700
Subject: [PATCH 1766/2608] Do not fallback to auto arena when manual arena is
 requested

---
 src/jemalloc.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index aacec7b6..5d7c2666 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2164,6 +2164,10 @@ imalloc_no_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
 		arena = NULL;
 	} else {
 		arena = arena_get(tsd_tsdn(tsd), dopts->arena_ind, true);
+		if (unlikely(arena == NULL) &&
+		    dopts->arena_ind >= narenas_auto) {
+			return NULL;
+		}
 	}
 
 	if (unlikely(dopts->alignment != 0)) {
@@ -3315,7 +3319,7 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 	if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) {
 		unsigned arena_ind = MALLOCX_ARENA_GET(flags);
 		arena = arena_get(tsd_tsdn(tsd), arena_ind, true);
-		if (unlikely(arena == NULL)) {
+		if (unlikely(arena == NULL) && arena_ind >= narenas_auto) {
 			goto label_oom;
 		}
 	} else {

From 24bbf376cee49691ff734eb5d0415e14fbbe72ca Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 13 May 2020 14:49:41 -0700
Subject: [PATCH 1767/2608] Unify arena flag reading and selection

---
 src/jemalloc.c | 73 ++++++++++++++++++++++++++------------------------
 1 file changed, 38 insertions(+), 35 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 5d7c2666..573118e0 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2144,31 +2144,38 @@ tcache_get_from_ind(tsd_t *tsd, unsigned tcache_ind, bool slow, bool is_alloc) {
 	return tcache;
 }
 
-/* ind is ignored if dopts->alignment > 0. */
-JEMALLOC_ALWAYS_INLINE void *
-imalloc_no_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
-    size_t size, size_t usize, szind_t ind) {
-	arena_t *arena;
-
-	/* Fill in the tcache. */
-	tcache_t *tcache = tcache_get_from_ind(tsd, dopts->tcache_ind,
-	    sopts->slow, /* is_alloc */ true);
-
-	/* Fill in the arena. */
-	if (dopts->arena_ind == ARENA_IND_AUTOMATIC) {
+/* Return true if a manual arena is specified and arena_get() OOMs. */
+JEMALLOC_ALWAYS_INLINE bool
+arena_get_from_ind(tsd_t *tsd, unsigned arena_ind, arena_t **arena_p) {
+	if (arena_ind == ARENA_IND_AUTOMATIC) {
 		/*
 		 * In case of automatic arena management, we defer arena
 		 * computation until as late as we can, hoping to fill the
 		 * allocation out of the tcache.
 		 */
-		arena = NULL;
+		*arena_p = NULL;
 	} else {
-		arena = arena_get(tsd_tsdn(tsd), dopts->arena_ind, true);
-		if (unlikely(arena == NULL) &&
-		    dopts->arena_ind >= narenas_auto) {
-			return NULL;
+		*arena_p = arena_get(tsd_tsdn(tsd), arena_ind, true);
+		if (unlikely(*arena_p == NULL) && arena_ind >= narenas_auto) {
+			return true;
 		}
 	}
+	return false;
+}
+
+/* ind is ignored if dopts->alignment > 0. */
+JEMALLOC_ALWAYS_INLINE void *
+imalloc_no_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
+    size_t size, size_t usize, szind_t ind) {
+	/* Fill in the tcache. */
+	tcache_t *tcache = tcache_get_from_ind(tsd, dopts->tcache_ind,
+	    sopts->slow, /* is_alloc */ true);
+
+	/* Fill in the arena. */
+	arena_t *arena;
+	if (arena_get_from_ind(tsd, dopts->arena_ind, &arena)) {
+		return NULL;
+	}
 
 	if (unlikely(dopts->alignment != 0)) {
 		return ipalloct(tsd_tsdn(tsd), usize, dopts->alignment,
@@ -3121,6 +3128,15 @@ mallocx_tcache_get(int flags) {
 	}
 }
 
+JEMALLOC_ALWAYS_INLINE unsigned
+mallocx_arena_get(int flags) {
+	if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) {
+		return MALLOCX_ARENA_GET(flags);
+	} else {
+		return ARENA_IND_AUTOMATIC;
+	}
+}
+
 #ifdef JEMALLOC_EXPERIMENTAL_SMALLOCX_API
 
 #define JEMALLOC_SMALLOCX_CONCAT_HELPER(x, y) x ## y
@@ -3166,13 +3182,9 @@ JEMALLOC_SMALLOCX_CONCAT_HELPER2(je_smallocx_, JEMALLOC_VERSION_GID_IDENT)
 	dopts.item_size = size;
 	if (unlikely(flags != 0)) {
 		dopts.alignment = MALLOCX_ALIGN_GET(flags);
-
 		dopts.zero = MALLOCX_ZERO_GET(flags);
-
 		dopts.tcache_ind = mallocx_tcache_get(flags);
-
-		if ((flags & MALLOCX_ARENA_MASK) != 0)
-			dopts.arena_ind = MALLOCX_ARENA_GET(flags);
+		dopts.arena_ind = mallocx_arena_get(flags);
 	}
 
 	imalloc(&sopts, &dopts);
@@ -3208,13 +3220,9 @@ je_mallocx(size_t size, int flags) {
 	dopts.item_size = size;
 	if (unlikely(flags != 0)) {
 		dopts.alignment = MALLOCX_ALIGN_GET(flags);
-
 		dopts.zero = MALLOCX_ZERO_GET(flags);
-
 		dopts.tcache_ind = mallocx_tcache_get(flags);
-
-		if ((flags & MALLOCX_ARENA_MASK) != 0)
-			dopts.arena_ind = MALLOCX_ARENA_GET(flags);
+		dopts.arena_ind = mallocx_arena_get(flags);
 	}
 
 	imalloc(&sopts, &dopts);
@@ -3316,14 +3324,9 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 
 	bool zero = zero_get(MALLOCX_ZERO_GET(flags), /* slow */ true);
 
-	if (unlikely((flags & MALLOCX_ARENA_MASK) != 0)) {
-		unsigned arena_ind = MALLOCX_ARENA_GET(flags);
-		arena = arena_get(tsd_tsdn(tsd), arena_ind, true);
-		if (unlikely(arena == NULL) && arena_ind >= narenas_auto) {
-			goto label_oom;
-		}
-	} else {
-		arena = NULL;
+	unsigned arena_ind = mallocx_arena_get(flags);
+	if (arena_get_from_ind(tsd, arena_ind, &arena)) {
+		goto label_oom;
 	}
 
 	unsigned tcache_ind = mallocx_tcache_get(flags);

From a795b1932780503cf5422920975a1c38994c7581 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 17 Jun 2020 16:15:06 -0700
Subject: [PATCH 1768/2608] Remove beginning define in source files

```
sed -i "/^#define JEMALLOC_[A-Z_]*_C_$/d" src/*.c;
```
---
 src/arena.c             | 1 -
 src/background_thread.c | 1 -
 src/base.c              | 1 -
 src/bitmap.c            | 1 -
 src/buf_writer.c        | 1 -
 src/ckh.c               | 1 -
 src/counter.c           | 1 -
 src/ctl.c               | 1 -
 src/extent_dss.c        | 1 -
 src/extent_mmap.c       | 1 -
 src/hash.c              | 1 -
 src/large.c             | 1 -
 src/malloc_io.c         | 1 -
 src/mutex.c             | 1 -
 src/mutex_pool.c        | 1 -
 src/pages.c             | 1 -
 src/prng.c              | 1 -
 src/prof.c              | 1 -
 src/prof_data.c         | 1 -
 src/prof_log.c          | 1 -
 src/prof_recent.c       | 1 -
 src/rtree.c             | 1 -
 src/stats.c             | 1 -
 src/tcache.c            | 1 -
 src/thread_event.c      | 1 -
 src/ticker.c            | 1 -
 src/tsd.c               | 1 -
 src/witness.c           | 1 -
 28 files changed, 28 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index b61d373b..2a3af5c6 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_ARENA_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/background_thread.c b/src/background_thread.c
index 6b680530..db11405e 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_BACKGROUND_THREAD_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/base.c b/src/base.c
index ebb42da5..d3732bab 100644
--- a/src/base.c
+++ b/src/base.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_BASE_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/bitmap.c b/src/bitmap.c
index 468b3178..0ccedc5d 100644
--- a/src/bitmap.c
+++ b/src/bitmap.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_BITMAP_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/buf_writer.c b/src/buf_writer.c
index 06a2735b..7c6f7940 100644
--- a/src/buf_writer.c
+++ b/src/buf_writer.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_BUF_WRITER_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/ckh.c b/src/ckh.c
index 1bf6df5a..9441fbad 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -34,7 +34,6 @@
  * respectively.
  *
  ******************************************************************************/
-#define JEMALLOC_CKH_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 
 #include "jemalloc/internal/ckh.h"
diff --git a/src/counter.c b/src/counter.c
index 71eda69f..8f1ae3af 100644
--- a/src/counter.c
+++ b/src/counter.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_COUNTER_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/ctl.c b/src/ctl.c
index 24d9eb34..24c959c4 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_CTL_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 17a08227..81161b3a 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_EXTENT_DSS_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/extent_mmap.c b/src/extent_mmap.c
index 17fd1c8f..5f0ee2d2 100644
--- a/src/extent_mmap.c
+++ b/src/extent_mmap.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_EXTENT_MMAP_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/hash.c b/src/hash.c
index 7b2bdc2b..3a26b393 100644
--- a/src/hash.c
+++ b/src/hash.c
@@ -1,3 +1,2 @@
-#define JEMALLOC_HASH_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
diff --git a/src/large.c b/src/large.c
index cc3e727c..3ea08be4 100644
--- a/src/large.c
+++ b/src/large.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_LARGE_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/malloc_io.c b/src/malloc_io.c
index d2879bb4..f5004f0a 100644
--- a/src/malloc_io.c
+++ b/src/malloc_io.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_MALLOC_IO_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/mutex.c b/src/mutex.c
index bffcfb5f..83d9ce76 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_MUTEX_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/mutex_pool.c b/src/mutex_pool.c
index f24d10e4..d7861dcd 100644
--- a/src/mutex_pool.c
+++ b/src/mutex_pool.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_MUTEX_POOL_C_
 
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
diff --git a/src/pages.c b/src/pages.c
index 62e84f04..9413d874 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_PAGES_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 
 #include "jemalloc/internal/pages.h"
diff --git a/src/prng.c b/src/prng.c
index 83c04bf9..3a26b393 100644
--- a/src/prng.c
+++ b/src/prng.c
@@ -1,3 +1,2 @@
-#define JEMALLOC_PRNG_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
diff --git a/src/prof.c b/src/prof.c
index db895f85..8ab68932 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_PROF_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/prof_data.c b/src/prof_data.c
index fe9ef154..49cc6ee3 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_PROF_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/prof_log.c b/src/prof_log.c
index 00c76596..bda01d04 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_PROF_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/prof_recent.c b/src/prof_recent.c
index 9af753f6..426f62ec 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_PROF_RECENT_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/rtree.c b/src/rtree.c
index 07a4e9ac..6496b5af 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_RTREE_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/stats.c b/src/stats.c
index fb88e5a6..407b60cc 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_STATS_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/tcache.c b/src/tcache.c
index ff428842..b73fd0dd 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_TCACHE_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/thread_event.c b/src/thread_event.c
index 99a188dd..bb91baa7 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_THREAD_EVENT_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/ticker.c b/src/ticker.c
index d7b8cd26..3a26b393 100644
--- a/src/ticker.c
+++ b/src/ticker.c
@@ -1,3 +1,2 @@
-#define JEMALLOC_TICKER_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
diff --git a/src/tsd.c b/src/tsd.c
index cc1b3ac8..0dd4036b 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_TSD_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/src/witness.c b/src/witness.c
index e9ddf596..4474af04 100644
--- a/src/witness.c
+++ b/src/witness.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_WITNESS_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 

From 092fcac0b4b3854c12c51d22174df00303a3fe6a Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 19 Jun 2020 08:58:22 -0700
Subject: [PATCH 1769/2608] Remove unnecessary source files

---
 Makefile.in                                            | 3 ---
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj         | 3 ---
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters | 9 ---------
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj         | 3 ---
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters | 9 ---------
 src/hash.c                                             | 2 --
 src/prng.c                                             | 2 --
 src/ticker.c                                           | 2 --
 8 files changed, 33 deletions(-)
 delete mode 100644 src/hash.c
 delete mode 100644 src/prng.c
 delete mode 100644 src/ticker.c

diff --git a/Makefile.in b/Makefile.in
index 2f3fea1e..35b4a05d 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -117,7 +117,6 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/extent.c \
 	$(srcroot)src/extent_dss.c \
 	$(srcroot)src/extent_mmap.c \
-	$(srcroot)src/hash.c \
 	$(srcroot)src/hook.c \
 	$(srcroot)src/inspect.c \
 	$(srcroot)src/large.c \
@@ -130,7 +129,6 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/pa_extra.c \
 	$(srcroot)src/pages.c \
 	$(srcroot)src/peak_event.c \
-	$(srcroot)src/prng.c \
 	$(srcroot)src/prof.c \
 	$(srcroot)src/prof_data.c \
 	$(srcroot)src/prof_log.c \
@@ -143,7 +141,6 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/tcache.c \
 	$(srcroot)src/test_hooks.c \
 	$(srcroot)src/thread_event.c \
-	$(srcroot)src/ticker.c \
 	$(srcroot)src/tsd.c \
 	$(srcroot)src/witness.c
 ifeq ($(enable_zone_allocator), 1)
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index d50fa884..bbe814be 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -57,7 +57,6 @@
     <ClCompile Include="..\..\..\..\src\extent.c" />
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
-    <ClCompile Include="..\..\..\..\src\hash.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\inspect.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
@@ -71,7 +70,6 @@
     <ClCompile Include="..\..\..\..\src\pa_extra.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
-    <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
     <ClCompile Include="..\..\..\..\src\prof_log.c" />
@@ -84,7 +82,6 @@
     <ClCompile Include="..\..\..\..\src\tcache.c" />
     <ClCompile Include="..\..\..\..\src\test_hooks.c" />
     <ClCompile Include="..\..\..\..\src\thread_event.c" />
-    <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
     <ClCompile Include="..\..\..\..\src\witness.c" />
   </ItemGroup>
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 94db8c0c..6f7027be 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -55,9 +55,6 @@
     <ClCompile Include="..\..\..\..\src\extent_mmap.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\hash.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hook.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -97,9 +94,6 @@
     <ClCompile Include="..\..\..\..\src\peak_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\prng.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\prof.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -136,9 +130,6 @@
     <ClCompile Include="..\..\..\..\src\thread_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\ticker.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\tsd.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 337dcfe7..ae60133b 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -57,7 +57,6 @@
     <ClCompile Include="..\..\..\..\src\extent.c" />
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
-    <ClCompile Include="..\..\..\..\src\hash.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\inspect.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
@@ -71,7 +70,6 @@
     <ClCompile Include="..\..\..\..\src\pa_extra.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
-    <ClCompile Include="..\..\..\..\src\prng.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
     <ClCompile Include="..\..\..\..\src\prof_log.c" />
@@ -84,7 +82,6 @@
     <ClCompile Include="..\..\..\..\src\tcache.c" />
     <ClCompile Include="..\..\..\..\src\test_hooks.c" />
     <ClCompile Include="..\..\..\..\src\thread_event.c" />
-    <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
     <ClCompile Include="..\..\..\..\src\witness.c" />
   </ItemGroup>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 94db8c0c..6f7027be 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -55,9 +55,6 @@
     <ClCompile Include="..\..\..\..\src\extent_mmap.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\hash.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hook.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -97,9 +94,6 @@
     <ClCompile Include="..\..\..\..\src\peak_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\prng.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\prof.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -136,9 +130,6 @@
     <ClCompile Include="..\..\..\..\src\thread_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\ticker.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\tsd.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/hash.c b/src/hash.c
deleted file mode 100644
index 3a26b393..00000000
--- a/src/hash.c
+++ /dev/null
@@ -1,2 +0,0 @@
-#include "jemalloc/internal/jemalloc_preamble.h"
-#include "jemalloc/internal/jemalloc_internal_includes.h"
diff --git a/src/prng.c b/src/prng.c
deleted file mode 100644
index 3a26b393..00000000
--- a/src/prng.c
+++ /dev/null
@@ -1,2 +0,0 @@
-#include "jemalloc/internal/jemalloc_preamble.h"
-#include "jemalloc/internal/jemalloc_internal_includes.h"
diff --git a/src/ticker.c b/src/ticker.c
deleted file mode 100644
index 3a26b393..00000000
--- a/src/ticker.c
+++ /dev/null
@@ -1,2 +0,0 @@
-#include "jemalloc/internal/jemalloc_preamble.h"
-#include "jemalloc/internal/jemalloc_internal_includes.h"

From 25e43c60223c169ce7dc66982f9472aa6e33306b Mon Sep 17 00:00:00 2001
From: "David T. Goldblatt" <davidtgoldblatt@gmail.com>
Date: Thu, 18 Jun 2020 15:41:56 -0700
Subject: [PATCH 1770/2608] Witness: Make ranks an enum.

This lets us avoid having to increment a bunch of values manually every time we
add a new sort of lock.
---
 include/jemalloc/internal/witness.h | 114 ++++++++++++++--------------
 1 file changed, 58 insertions(+), 56 deletions(-)

diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 58f72664..652afe65 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -7,62 +7,65 @@
 /* LOCK RANKS */
 /******************************************************************************/
 
-/*
- * Witnesses with rank WITNESS_RANK_OMIT are completely ignored by the witness
- * machinery.
- */
+enum witness_rank_e {
+	/*
+	 * Order matters within this enum listing -- higher valued locks can
+	 * only be acquired after lower-valued ones.  We use the
+	 * auto-incrementing-ness of enum values to enforce this.
+	 */
 
-#define WITNESS_RANK_OMIT		0U
+	/*
+	 * Witnesses with rank WITNESS_RANK_OMIT are completely ignored by the
+	 * witness machinery.
+	 */
+	WITNESS_RANK_OMIT,
+	WITNESS_RANK_MIN,
+	WITNESS_RANK_INIT = WITNESS_RANK_MIN,
+	WITNESS_RANK_CTL,
+	WITNESS_RANK_TCACHES,
+	WITNESS_RANK_ARENAS,
+	WITNESS_RANK_BACKGROUND_THREAD_GLOBAL,
+	WITNESS_RANK_PROF_DUMP,
+	WITNESS_RANK_PROF_BT2GCTX,
+	WITNESS_RANK_PROF_TDATAS,
+	WITNESS_RANK_PROF_TDATA,
+	WITNESS_RANK_PROF_LOG,
+	WITNESS_RANK_PROF_GCTX,
+	WITNESS_RANK_PROF_RECENT_DUMP,
+	WITNESS_RANK_BACKGROUND_THREAD,
+	/*
+	 * Used as an argument to witness_assert_depth_to_rank() in order to
+	 * validate depth excluding non-core locks with lower ranks.  Since the
+	 * rank argument to witness_assert_depth_to_rank() is inclusive rather
+	 * than exclusive, this definition can have the same value as the
+	 * minimally ranked core lock.
+	 */
+	WITNESS_RANK_CORE,
+	WITNESS_RANK_DECAY = WITNESS_RANK_CORE,
+	WITNESS_RANK_TCACHE_QL,
+	WITNESS_RANK_EXTENT_GROW,
+	WITNESS_RANK_EXTENTS,
+	WITNESS_RANK_EDATA_CACHE,
 
-#define WITNESS_RANK_MIN		1U
+	WITNESS_RANK_EMAP,
+	WITNESS_RANK_RTREE,
+	WITNESS_RANK_BASE,
+	WITNESS_RANK_ARENA_LARGE,
+	WITNESS_RANK_HOOK,
 
-#define WITNESS_RANK_INIT		1U
-#define WITNESS_RANK_CTL		1U
-#define WITNESS_RANK_TCACHES		2U
-#define WITNESS_RANK_ARENAS		3U
-
-#define WITNESS_RANK_BACKGROUND_THREAD_GLOBAL	4U
-
-#define WITNESS_RANK_PROF_DUMP		5U
-#define WITNESS_RANK_PROF_BT2GCTX	6U
-#define WITNESS_RANK_PROF_TDATAS	7U
-#define WITNESS_RANK_PROF_TDATA		8U
-#define WITNESS_RANK_PROF_LOG		9U
-#define WITNESS_RANK_PROF_GCTX		10U
-#define WITNESS_RANK_PROF_RECENT_DUMP	11U
-#define WITNESS_RANK_BACKGROUND_THREAD	12U
-
-/*
- * Used as an argument to witness_assert_depth_to_rank() in order to validate
- * depth excluding non-core locks with lower ranks.  Since the rank argument to
- * witness_assert_depth_to_rank() is inclusive rather than exclusive, this
- * definition can have the same value as the minimally ranked core lock.
- */
-#define WITNESS_RANK_CORE		13U
-
-#define WITNESS_RANK_DECAY		13U
-#define WITNESS_RANK_TCACHE_QL		14U
-#define WITNESS_RANK_EXTENT_GROW	15U
-#define WITNESS_RANK_EXTENTS		16U
-#define WITNESS_RANK_EDATA_CACHE	17U
-
-#define WITNESS_RANK_EMAP		18U
-#define WITNESS_RANK_RTREE		19U
-#define WITNESS_RANK_BASE		20U
-#define WITNESS_RANK_ARENA_LARGE	21U
-#define WITNESS_RANK_HOOK		22U
-
-#define WITNESS_RANK_LEAF		0xffffffffU
-#define WITNESS_RANK_BIN		WITNESS_RANK_LEAF
-#define WITNESS_RANK_ARENA_STATS	WITNESS_RANK_LEAF
-#define WITNESS_RANK_COUNTER_ACCUM	WITNESS_RANK_LEAF
-#define WITNESS_RANK_DSS		WITNESS_RANK_LEAF
-#define WITNESS_RANK_PROF_ACTIVE	WITNESS_RANK_LEAF
-#define WITNESS_RANK_PROF_DUMP_FILENAME	WITNESS_RANK_LEAF
-#define WITNESS_RANK_PROF_GDUMP		WITNESS_RANK_LEAF
-#define WITNESS_RANK_PROF_NEXT_THR_UID	WITNESS_RANK_LEAF
-#define WITNESS_RANK_PROF_RECENT_ALLOC	WITNESS_RANK_LEAF
-#define WITNESS_RANK_PROF_THREAD_ACTIVE_INIT	WITNESS_RANK_LEAF
+	WITNESS_RANK_LEAF=0x1000,
+	WITNESS_RANK_BIN = WITNESS_RANK_LEAF,
+	WITNESS_RANK_ARENA_STATS = WITNESS_RANK_LEAF,
+	WITNESS_RANK_COUNTER_ACCUM = WITNESS_RANK_LEAF,
+	WITNESS_RANK_DSS = WITNESS_RANK_LEAF,
+	WITNESS_RANK_PROF_ACTIVE = WITNESS_RANK_LEAF,
+	WITNESS_RANK_PROF_DUMP_FILENAME = WITNESS_RANK_LEAF,
+	WITNESS_RANK_PROF_GDUMP = WITNESS_RANK_LEAF,
+	WITNESS_RANK_PROF_NEXT_THR_UID = WITNESS_RANK_LEAF,
+	WITNESS_RANK_PROF_RECENT_ALLOC = WITNESS_RANK_LEAF,
+	WITNESS_RANK_PROF_THREAD_ACTIVE_INIT = WITNESS_RANK_LEAF,
+};
+typedef enum witness_rank_e witness_rank_t;
 
 /******************************************************************************/
 /* PER-WITNESS DATA */
@@ -74,7 +77,6 @@
 #endif
 
 typedef struct witness_s witness_t;
-typedef unsigned witness_rank_t;
 typedef ql_head(witness_t) witness_list_t;
 typedef int witness_comp_t (const witness_t *, void *, const witness_t *,
     void *);
@@ -84,8 +86,8 @@ struct witness_s {
 	const char		*name;
 
 	/*
-	 * Witness rank, where 0 is lowest and UINT_MAX is highest.  Witnesses
-	 * must be acquired in order of increasing rank.
+	 * Witness rank, where 0 is lowest and WITNESS_RANK_LEAF is highest.
+	 * Witnesses must be acquired in order of increasing rank.
 	 */
 	witness_rank_t		rank;
 

From d460333efb22466713dd646b3947bbf0f868b02d Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 19 Jun 2020 15:16:53 -0700
Subject: [PATCH 1771/2608] Improve naming for prof system thread name option

---
 Makefile.in                                   |  2 +-
 include/jemalloc/internal/prof_externs.h      |  6 +++---
 src/ctl.c                                     | 15 +++++++-------
 src/jemalloc.c                                |  5 ++---
 src/prof.c                                    | 18 ++++++++---------
 test/unit/mallctl.c                           |  2 +-
 ...s_thread_name.c => prof_sys_thread_name.c} | 20 +++++++++----------
 test/unit/prof_sys_thread_name.sh             |  5 +++++
 test/unit/prof_use_sys_thread_name.sh         |  5 -----
 9 files changed, 38 insertions(+), 40 deletions(-)
 rename test/unit/{prof_use_sys_thread_name.c => prof_sys_thread_name.c} (74%)
 create mode 100644 test/unit/prof_sys_thread_name.sh
 delete mode 100644 test/unit/prof_use_sys_thread_name.sh

diff --git a/Makefile.in b/Makefile.in
index 35b4a05d..fd52ffc8 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -232,7 +232,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/prof_reset.c \
 	$(srcroot)test/unit/prof_tctx.c \
 	$(srcroot)test/unit/prof_thread_name.c \
-	$(srcroot)test/unit/prof_use_sys_thread_name.c \
+	$(srcroot)test/unit/prof_sys_thread_name.c \
 	$(srcroot)test/unit/ql.c \
 	$(srcroot)test/unit/qr.c \
 	$(srcroot)test/unit/rb.c \
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index b4339659..6021cf45 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -29,7 +29,7 @@ extern ssize_t opt_prof_recent_alloc_max;
 extern malloc_mutex_t prof_recent_alloc_mtx;
 
 /* Whether to use thread name provided by the system or by mallctl. */
-extern bool opt_prof_experimental_use_sys_thread_name;
+extern bool opt_prof_sys_thread_name;
 
 /* Accessed via prof_active_[gs]et{_unlocked,}(). */
 extern bool prof_active;
@@ -90,8 +90,8 @@ uint64_t prof_sample_postponed_event_wait(tsd_t *tsd);
 void prof_sample_event_handler(tsd_t *tsd, uint64_t elapsed);
 
 /* Used by unit tests. */
-typedef int (prof_read_sys_thread_name_t)(char *buf, size_t limit);
-extern prof_read_sys_thread_name_t *JET_MUTABLE prof_read_sys_thread_name;
+typedef int (prof_sys_thread_name_read_t)(char *buf, size_t limit);
+extern prof_sys_thread_name_read_t *JET_MUTABLE prof_sys_thread_name_read;
 size_t prof_tdata_count(void);
 size_t prof_bt_count(void);
 typedef int (prof_dump_open_t)(bool, const char *);
diff --git a/src/ctl.c b/src/ctl.c
index 24c959c4..5cba9af9 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -127,7 +127,7 @@ CTL_PROTO(opt_prof_final)
 CTL_PROTO(opt_prof_leak)
 CTL_PROTO(opt_prof_accum)
 CTL_PROTO(opt_prof_recent_alloc_max)
-CTL_PROTO(opt_prof_experimental_use_sys_thread_name)
+CTL_PROTO(opt_prof_sys_thread_name)
 CTL_PROTO(opt_prof_time_res)
 CTL_PROTO(opt_zero_realloc)
 CTL_PROTO(tcache_create)
@@ -382,11 +382,10 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("prof_final"),	CTL(opt_prof_final)},
 	{NAME("prof_leak"),	CTL(opt_prof_leak)},
 	{NAME("prof_accum"),	CTL(opt_prof_accum)},
-	{NAME("prof_recent_alloc_max"), CTL(opt_prof_recent_alloc_max)},
-	{NAME("prof_experimental_use_sys_thread_name"),
-	    CTL(opt_prof_experimental_use_sys_thread_name)},
-	{NAME("zero_realloc"),	CTL(opt_zero_realloc)},
-	{NAME("prof_time_resolution"),	CTL(opt_prof_time_res)}
+	{NAME("prof_recent_alloc_max"),	CTL(opt_prof_recent_alloc_max)},
+	{NAME("prof_sys_thread_name"),	CTL(opt_prof_sys_thread_name)},
+	{NAME("prof_time_resolution"),	CTL(opt_prof_time_res)},
+	{NAME("zero_realloc"),	CTL(opt_zero_realloc)}
 };
 
 static const ctl_named_node_t	tcache_node[] = {
@@ -1852,8 +1851,8 @@ CTL_RO_NL_CGEN(config_prof, opt_prof_final, opt_prof_final, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_recent_alloc_max,
     opt_prof_recent_alloc_max, ssize_t)
-CTL_RO_NL_CGEN(config_prof, opt_prof_experimental_use_sys_thread_name,
-    opt_prof_experimental_use_sys_thread_name, bool)
+CTL_RO_NL_CGEN(config_prof, opt_prof_sys_thread_name, opt_prof_sys_thread_name,
+    bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_time_res,
     prof_time_res_mode_names[opt_prof_time_res], const char *)
 CTL_RO_NL_GEN(opt_zero_realloc,
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 573118e0..b468d821 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1495,9 +1495,8 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				CONF_HANDLE_BOOL(opt_prof_log, "prof_log")
 				CONF_HANDLE_SSIZE_T(opt_prof_recent_alloc_max,
 				    "prof_recent_alloc_max", -1, SSIZE_MAX)
-				CONF_HANDLE_BOOL(
-				    opt_prof_experimental_use_sys_thread_name,
-				    "prof_experimental_use_sys_thread_name")
+				CONF_HANDLE_BOOL(opt_prof_sys_thread_name,
+				    "prof_sys_thread_name")
 				if (CONF_MATCH("prof_time_resolution")) {
 					if (CONF_MATCH_VALUE("default")) {
 						opt_prof_time_res =
diff --git a/src/prof.c b/src/prof.c
index 8ab68932..5e29f401 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -47,7 +47,7 @@ bool opt_prof_final = false;
 bool opt_prof_leak = false;
 bool opt_prof_accum = false;
 char opt_prof_prefix[PROF_DUMP_FILENAME_LEN];
-bool opt_prof_experimental_use_sys_thread_name = false;
+bool opt_prof_sys_thread_name = false;
 
 /* Accessed via prof_sample_event_handler(). */
 static counter_accum_t prof_idump_accumulated;
@@ -197,21 +197,21 @@ prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name) {
 }
 
 static int
-prof_read_sys_thread_name_impl(char *buf, size_t limit) {
+prof_sys_thread_name_read_impl(char *buf, size_t limit) {
 #ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
 	return pthread_getname_np(pthread_self(), buf, limit);
 #else
 	return ENOSYS;
 #endif
 }
-prof_read_sys_thread_name_t *JET_MUTABLE prof_read_sys_thread_name =
-    prof_read_sys_thread_name_impl;
+prof_sys_thread_name_read_t *JET_MUTABLE prof_sys_thread_name_read =
+    prof_sys_thread_name_read_impl;
 
 static void
-prof_fetch_sys_thread_name(tsd_t *tsd) {
+prof_sys_thread_name_fetch(tsd_t *tsd) {
 #define THREAD_NAME_MAX_LEN 16
 	char buf[THREAD_NAME_MAX_LEN];
-	if (!prof_read_sys_thread_name(buf, THREAD_NAME_MAX_LEN)) {
+	if (!prof_sys_thread_name_read(buf, THREAD_NAME_MAX_LEN)) {
 		prof_thread_name_set_impl(tsd, buf);
 	}
 #undef THREAD_NAME_MAX_LEN
@@ -220,8 +220,8 @@ prof_fetch_sys_thread_name(tsd_t *tsd) {
 void
 prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
     size_t usize, prof_tctx_t *tctx) {
-	if (opt_prof_experimental_use_sys_thread_name) {
-		prof_fetch_sys_thread_name(tsd);
+	if (opt_prof_sys_thread_name) {
+		prof_sys_thread_name_fetch(tsd);
 	}
 
 	edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global,
@@ -870,7 +870,7 @@ prof_thread_name_get(tsd_t *tsd) {
 
 int
 prof_thread_name_set(tsd_t *tsd, const char *thread_name) {
-	if (opt_prof_experimental_use_sys_thread_name) {
+	if (opt_prof_sys_thread_name) {
 		return ENOENT;
 	} else {
 		return prof_thread_name_set_impl(tsd, thread_name);
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 10d809fb..3de56947 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -192,7 +192,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, prof_final, prof);
 	TEST_MALLCTL_OPT(bool, prof_leak, prof);
 	TEST_MALLCTL_OPT(ssize_t, prof_recent_alloc_max, prof);
-	TEST_MALLCTL_OPT(bool, prof_experimental_use_sys_thread_name, prof);
+	TEST_MALLCTL_OPT(bool, prof_sys_thread_name, prof);
 
 #undef TEST_MALLCTL_OPT
 }
diff --git a/test/unit/prof_use_sys_thread_name.c b/test/unit/prof_sys_thread_name.c
similarity index 74%
rename from test/unit/prof_use_sys_thread_name.c
rename to test/unit/prof_sys_thread_name.c
index 60cb55bf..ec1e7745 100644
--- a/test/unit/prof_use_sys_thread_name.c
+++ b/test/unit/prof_sys_thread_name.c
@@ -3,31 +3,31 @@
 static const char *test_thread_name = "test_name";
 
 static int
-test_prof_read_sys_thread_name_error(char *buf, size_t limit) {
+test_prof_sys_thread_name_read_error(char *buf, size_t limit) {
 	return ENOSYS;
 }
 
 static int
-test_prof_read_sys_thread_name(char *buf, size_t limit) {
+test_prof_sys_thread_name_read(char *buf, size_t limit) {
 	assert(strlen(test_thread_name) < limit);
 	strncpy(buf, test_thread_name, limit);
 	return 0;
 }
 
 static int
-test_prof_read_sys_thread_name_clear(char *buf, size_t limit) {
+test_prof_sys_thread_name_read_clear(char *buf, size_t limit) {
 	assert(limit > 0);
 	buf[0] = '\0';
 	return 0;
 }
 
-TEST_BEGIN(test_prof_experimental_use_sys_thread_name) {
+TEST_BEGIN(test_prof_sys_thread_name) {
 	test_skip_if(!config_prof);
 
 	bool oldval;
 	size_t sz = sizeof(oldval);
-	assert_d_eq(mallctl("opt.prof_experimental_use_sys_thread_name",
-	    &oldval, &sz, NULL,	0), 0, "mallctl failed");
+	assert_d_eq(mallctl("opt.prof_sys_thread_name", &oldval, &sz, NULL, 0),
+	    0, "mallctl failed");
 	assert_true(oldval, "option was not set correctly");
 
 	const char *thread_name;
@@ -42,7 +42,7 @@ TEST_BEGIN(test_prof_experimental_use_sys_thread_name) {
 	assert_ptr_eq(thread_name, test_thread_name,
 	    "Thread name should not be touched");
 
-	prof_read_sys_thread_name = test_prof_read_sys_thread_name_error;
+	prof_sys_thread_name_read = test_prof_sys_thread_name_read_error;
 	void *p = malloc(1);
 	free(p);
 	assert_d_eq(mallctl("thread.prof.name", &thread_name, &sz, NULL, 0), 0,
@@ -50,7 +50,7 @@ TEST_BEGIN(test_prof_experimental_use_sys_thread_name) {
 	assert_str_eq(thread_name, "",
 	    "Thread name should stay the same if the system call fails");
 
-	prof_read_sys_thread_name = test_prof_read_sys_thread_name;
+	prof_sys_thread_name_read = test_prof_sys_thread_name_read;
 	p = malloc(1);
 	free(p);
 	assert_d_eq(mallctl("thread.prof.name", &thread_name, &sz, NULL, 0), 0,
@@ -58,7 +58,7 @@ TEST_BEGIN(test_prof_experimental_use_sys_thread_name) {
 	assert_str_eq(thread_name, test_thread_name,
 	    "Thread name should be changed if the system call succeeds");
 
-	prof_read_sys_thread_name = test_prof_read_sys_thread_name_clear;
+	prof_sys_thread_name_read = test_prof_sys_thread_name_read_clear;
 	p = malloc(1);
 	free(p);
 	assert_d_eq(mallctl("thread.prof.name", &thread_name, &sz, NULL, 0), 0,
@@ -71,5 +71,5 @@ TEST_END
 int
 main(void) {
 	return test(
-	    test_prof_experimental_use_sys_thread_name);
+	    test_prof_sys_thread_name);
 }
diff --git a/test/unit/prof_sys_thread_name.sh b/test/unit/prof_sys_thread_name.sh
new file mode 100644
index 00000000..281cf9a0
--- /dev/null
+++ b/test/unit/prof_sys_thread_name.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:true,lg_prof_sample:0,prof_sys_thread_name:true"
+fi
diff --git a/test/unit/prof_use_sys_thread_name.sh b/test/unit/prof_use_sys_thread_name.sh
deleted file mode 100644
index 0e0e0d99..00000000
--- a/test/unit/prof_use_sys_thread_name.sh
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/bin/sh
-
-if [ "x${enable_prof}" = "x1" ] ; then
-  export MALLOC_CONF="prof:true,lg_prof_sample:0,prof_experimental_use_sys_thread_name:true"
-fi

From 537a4bedb4d4ae6238762df85ae1ad2bc8d0ff47 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 4 May 2020 14:58:25 -0700
Subject: [PATCH 1772/2608] Add a tool to examine random number distributions

---
 .gitignore                           |   5 +
 Makefile.in                          |  33 +++-
 test/analyze/rand.c                  | 276 +++++++++++++++++++++++++++
 test/include/test/jemalloc_test.h.in |   4 +-
 4 files changed, 311 insertions(+), 7 deletions(-)
 create mode 100644 test/analyze/rand.c

diff --git a/.gitignore b/.gitignore
index 5ca0ad1d..31cdbb8e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -71,6 +71,11 @@ test/include/test/jemalloc_test_defs.h
 /test/unit/*.[od]
 /test/unit/*.out
 
+/test/analyze/[A-Za-z]*
+!/test/analyze/[A-Za-z]*.*
+/test/analyze/*.[od]
+/test/analyze/*.out
+
 /VERSION
 
 *.pdb
diff --git a/Makefile.in b/Makefile.in
index fd52ffc8..4a0ef874 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -287,6 +287,7 @@ else
 CPP_SRCS :=
 TESTS_INTEGRATION_CPP :=
 endif
+TESTS_ANALYZE := $(srcroot)test/analyze/rand.c
 TESTS_STRESS := $(srcroot)test/stress/microbench.c \
 	$(srcroot)test/stress/fill_flush.c \
 	$(srcroot)test/stress/large_microbench.c \
@@ -294,7 +295,8 @@ TESTS_STRESS := $(srcroot)test/stress/microbench.c \
 	$(srcroot)test/stress/sizes.c
 
 
-TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_INTEGRATION_CPP) $(TESTS_STRESS)
+TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_INTEGRATION_CPP) \
+	$(TESTS_ANALYZE) $(TESTS_STRESS)
 
 PRIVATE_NAMESPACE_HDRS := $(objroot)include/jemalloc/internal/private_namespace.h $(objroot)include/jemalloc/internal/private_namespace_jet.h
 PRIVATE_NAMESPACE_GEN_HDRS := $(PRIVATE_NAMESPACE_HDRS:%.h=%.gen.h)
@@ -310,14 +312,19 @@ C_JET_OBJS := $(C_SRCS:$(srcroot)%.c=$(objroot)%.jet.$(O))
 C_TESTLIB_UNIT_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.unit.$(O))
 C_TESTLIB_INTEGRATION_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.integration.$(O))
 C_UTIL_INTEGRATION_OBJS := $(C_UTIL_INTEGRATION_SRCS:$(srcroot)%.c=$(objroot)%.integration.$(O))
+C_TESTLIB_ANALYZE_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.analyze.$(O))
 C_TESTLIB_STRESS_OBJS := $(C_TESTLIB_SRCS:$(srcroot)%.c=$(objroot)%.stress.$(O))
-C_TESTLIB_OBJS := $(C_TESTLIB_UNIT_OBJS) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(C_TESTLIB_STRESS_OBJS)
+C_TESTLIB_OBJS := $(C_TESTLIB_UNIT_OBJS) $(C_TESTLIB_INTEGRATION_OBJS) \
+	$(C_UTIL_INTEGRATION_OBJS) $(C_TESTLIB_ANALYZE_OBJS) \
+	$(C_TESTLIB_STRESS_OBJS)
 
 TESTS_UNIT_OBJS := $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%.$(O))
 TESTS_INTEGRATION_OBJS := $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%.$(O))
 TESTS_INTEGRATION_CPP_OBJS := $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%.$(O))
+TESTS_ANALYZE_OBJS := $(TESTS_ANALYZE:$(srcroot)%.c=$(objroot)%.$(O))
 TESTS_STRESS_OBJS := $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%.$(O))
-TESTS_OBJS := $(TESTS_UNIT_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_STRESS_OBJS)
+TESTS_OBJS := $(TESTS_UNIT_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_ANALYZE_OBJS) \
+	$(TESTS_STRESS_OBJS)
 TESTS_CPP_OBJS := $(TESTS_INTEGRATION_CPP_OBJS)
 
 .PHONY: all dist build_doc_html build_doc_man build_doc
@@ -391,12 +398,15 @@ $(C_TESTLIB_UNIT_OBJS): CPPFLAGS += -DJEMALLOC_UNIT_TEST
 $(C_TESTLIB_INTEGRATION_OBJS): $(objroot)test/src/%.integration.$(O): $(srcroot)test/src/%.c
 $(C_TESTLIB_INTEGRATION_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_TEST
 $(C_UTIL_INTEGRATION_OBJS): $(objroot)src/%.integration.$(O): $(srcroot)src/%.c
+$(C_TESTLIB_ANALYZE_OBJS): $(objroot)test/src/%.analyze.$(O): $(srcroot)test/src/%.c
+$(C_TESTLIB_ANALYZE_OBJS): CPPFLAGS += -DJEMALLOC_ANALYZE_TEST
 $(C_TESTLIB_STRESS_OBJS): $(objroot)test/src/%.stress.$(O): $(srcroot)test/src/%.c
 $(C_TESTLIB_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST -DJEMALLOC_STRESS_TESTLIB
 $(C_TESTLIB_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include
 $(TESTS_UNIT_OBJS): CPPFLAGS += -DJEMALLOC_UNIT_TEST
 $(TESTS_INTEGRATION_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_TEST
 $(TESTS_INTEGRATION_CPP_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_CPP_TEST
+$(TESTS_ANALYZE_OBJS): CPPFLAGS += -DJEMALLOC_ANALYZE_TEST
 $(TESTS_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST
 $(TESTS_OBJS): $(objroot)test/%.$(O): $(srcroot)test/%.c
 $(TESTS_CPP_OBJS): $(objroot)test/%.$(O): $(srcroot)test/%.cpp
@@ -416,7 +426,7 @@ $(TESTS_OBJS) $(TESTS_CPP_OBJS): $(objroot)test/include/test/jemalloc_test.h
 endif
 
 $(C_OBJS) $(CPP_OBJS) $(C_PIC_OBJS) $(CPP_PIC_OBJS) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_INTEGRATION_CPP_OBJS): $(objroot)include/jemalloc/internal/private_namespace.h
-$(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS) $(C_TESTLIB_STRESS_OBJS) $(TESTS_UNIT_OBJS) $(TESTS_STRESS_OBJS): $(objroot)include/jemalloc/internal/private_namespace_jet.h
+$(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS) $(C_TESTLIB_ANALYZE_OBJS) $(C_TESTLIB_STRESS_OBJS) $(TESTS_UNIT_OBJS) $(TESTS_ANALYZE_OBJS) $(TESTS_STRESS_OBJS): $(objroot)include/jemalloc/internal/private_namespace_jet.h
 
 $(C_SYM_OBJS) $(C_OBJS) $(C_PIC_OBJS) $(C_JET_SYM_OBJS) $(C_JET_OBJS) $(C_TESTLIB_OBJS) $(TESTS_OBJS): %.$(O):
 	@mkdir -p $(@D)
@@ -479,6 +489,10 @@ $(objroot)test/integration/cpp/%$(EXE): $(objroot)test/integration/cpp/%.$(O) $(
 	@mkdir -p $(@D)
 	$(CXX) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) -lm $(EXTRA_LDFLAGS)
 
+$(objroot)test/analyze/%$(EXE): $(objroot)test/analyze/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_ANALYZE_OBJS)
+	@mkdir -p $(@D)
+	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS)
+
 $(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TESTLIB_STRESS_OBJS) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 	@mkdir -p $(@D)
 	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS)
@@ -559,13 +573,16 @@ endif
 
 tests_unit: $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%$(EXE))
 tests_integration: $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%$(EXE)) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%$(EXE))
+tests_analyze: $(TESTS_ANALYZE:$(srcroot)%.c=$(objroot)%$(EXE))
 tests_stress: $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%$(EXE))
-tests: tests_unit tests_integration tests_stress
+tests: tests_unit tests_integration tests_analyze tests_stress
 
 check_unit_dir:
 	@mkdir -p $(objroot)test/unit
 check_integration_dir:
 	@mkdir -p $(objroot)test/integration
+analyze_dir:
+	@mkdir -p $(objroot)test/analyze
 stress_dir:
 	@mkdir -p $(objroot)test/stress
 check_dir: check_unit_dir check_integration_dir
@@ -582,6 +599,12 @@ check_integration_decay: tests_integration check_integration_dir
 	$(MALLOC_CONF)="dirty_decay_ms:0,muzzy_decay_ms:0" $(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%)
 check_integration: tests_integration check_integration_dir
 	$(SHELL) $(objroot)test/test.sh $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%)
+analyze: tests_analyze analyze_dir
+ifeq ($(enable_prof), 1)
+	$(MALLOC_CONF)="prof:true" $(SHELL) $(objroot)test/test.sh $(TESTS_ANALYZE:$(srcroot)%.c=$(objroot)%)
+else
+	$(SHELL) $(objroot)test/test.sh $(TESTS_ANALYZE:$(srcroot)%.c=$(objroot)%)
+endif
 stress: tests_stress stress_dir
 	$(SHELL) $(objroot)test/test.sh $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%)
 check: check_unit check_integration check_integration_decay check_integration_prof
diff --git a/test/analyze/rand.c b/test/analyze/rand.c
new file mode 100644
index 00000000..a4ab49a2
--- /dev/null
+++ b/test/analyze/rand.c
@@ -0,0 +1,276 @@
+#include "test/jemalloc_test.h"
+
+/******************************************************************************/
+
+/*
+ * General purpose tool for examining random number distributions.
+ *
+ * Input -
+ * (a) a random number generator, and
+ * (b) the buckets:
+ *     (1) number of buckets,
+ *     (2) width of each bucket, in log scale,
+ *     (3) expected mean and stddev of the count of random numbers in each
+ *         bucket, and
+ * (c) number of iterations to invoke the generator.
+ *
+ * The program generates the specified amount of random numbers, and assess how
+ * well they conform to the expectations: for each bucket, output -
+ * (a) the (given) expected mean and stddev,
+ * (b) the actual count and any interesting level of deviation:
+ *     (1) ~68% buckets should show no interesting deviation, meaning a
+ *         deviation less than stddev from the expectation;
+ *     (2) ~27% buckets should show '+' / '-', meaning a deviation in the range
+ *         of [stddev, 2 * stddev) from the expectation;
+ *     (3) ~4% buckets should show '++' / '--', meaning a deviation in the
+ *         range of [2 * stddev, 3 * stddev) from the expectation; and
+ *     (4) less than 0.3% buckets should show more than two '+'s / '-'s.
+ *
+ * Technical remarks:
+ * (a) The generator is expected to output uint64_t numbers, so you might need
+ *     to define a wrapper.
+ * (b) The buckets must be of equal width and the lowest bucket starts at
+ *     [0, 2^lg_bucket_width - 1).
+ * (c) Any generated number >= n_bucket * 2^lg_bucket_width will be counted
+ *     towards the last bucket; the expected mean and stddev provided should
+ *     also reflect that.
+ * (d) The number of iterations is adviced to be determined so that the bucket
+ *     with the minimal expected proportion gets a sufficient count.
+ */
+
+static void
+fill(size_t a[], const size_t n, const size_t k) {
+	for (size_t i = 0; i < n; ++i) {
+		a[i] = k;
+	}
+}
+
+static void
+collect_buckets(uint64_t (*gen)(void *), void *opaque, size_t buckets[],
+    const size_t n_bucket, const size_t lg_bucket_width, const size_t n_iter) {
+	for (size_t i = 0; i < n_iter; ++i) {
+		uint64_t num = gen(opaque);
+		uint64_t bucket_id = num >> lg_bucket_width;
+		if (bucket_id >= n_bucket) {
+			bucket_id = n_bucket - 1;
+		}
+		++buckets[bucket_id];
+	}
+}
+
+static void
+print_buckets(const size_t buckets[], const size_t means[],
+    const size_t stddevs[], const size_t n_bucket) {
+	for (size_t i = 0; i < n_bucket; ++i) {
+		malloc_printf("%zu:\tmean = %zu,\tstddev = %zu,\tbucket = %zu",
+		    i, means[i], stddevs[i], buckets[i]);
+
+		/* Make sure there's no overflow. */
+		assert(buckets[i] + stddevs[i] >= stddevs[i]);
+		assert(means[i] + stddevs[i] >= stddevs[i]);
+
+		if (buckets[i] + stddevs[i] <= means[i]) {
+			malloc_write(" ");
+			for (size_t t = means[i] - buckets[i]; t >= stddevs[i];
+			    t -= stddevs[i]) {
+				malloc_write("-");
+			}
+		} else if (buckets[i] >= means[i] + stddevs[i]) {
+			malloc_write(" ");
+			for (size_t t = buckets[i] - means[i]; t >= stddevs[i];
+			    t -= stddevs[i]) {
+				malloc_write("+");
+			}
+		}
+		malloc_write("\n");
+	}
+}
+
+static void
+bucket_analysis(uint64_t (*gen)(void *), void *opaque, size_t buckets[],
+    const size_t means[], const size_t stddevs[], const size_t n_bucket,
+    const size_t lg_bucket_width, const size_t n_iter) {
+	for (size_t i = 1; i <= 3; ++i) {
+		malloc_printf("round %zu\n", i);
+		fill(buckets, n_bucket, 0);
+		collect_buckets(gen, opaque, buckets, n_bucket,
+		    lg_bucket_width, n_iter);
+		print_buckets(buckets, means, stddevs, n_bucket);
+	}
+}
+
+/* (Recommended) minimal bucket mean. */
+#define MIN_BUCKET_MEAN 10000
+
+/******************************************************************************/
+
+/* Uniform random number generator. */
+
+typedef struct uniform_gen_arg_s uniform_gen_arg_t;
+struct uniform_gen_arg_s {
+	uint64_t state;
+	const unsigned lg_range;
+};
+
+static uint64_t
+uniform_gen(void *opaque) {
+	uniform_gen_arg_t *arg = (uniform_gen_arg_t *)opaque;
+	return prng_lg_range_u64(&arg->state, arg->lg_range);
+}
+
+TEST_BEGIN(test_uniform) {
+#define LG_N_BUCKET 5
+#define N_BUCKET (1 << LG_N_BUCKET)
+
+#define QUOTIENT_CEIL(n, d) (((n) - 1) / (d) + 1)
+
+	const unsigned lg_range_test = 25;
+
+	/*
+	 * Mathematical tricks to guarantee that both mean and stddev are
+	 * integers, and that the minimal bucket mean is at least
+	 * MIN_BUCKET_MEAN.
+	 */
+	const size_t q = 1 << QUOTIENT_CEIL(LG_CEIL(QUOTIENT_CEIL(
+	    MIN_BUCKET_MEAN, N_BUCKET * (N_BUCKET - 1))), 2);
+	const size_t stddev = (N_BUCKET - 1) * q;
+	const size_t mean = N_BUCKET * stddev * q;
+	const size_t n_iter = N_BUCKET * mean;
+
+	size_t means[N_BUCKET];
+	fill(means, N_BUCKET, mean);
+	size_t stddevs[N_BUCKET];
+	fill(stddevs, N_BUCKET, stddev);
+
+	uniform_gen_arg_t arg = {(uint64_t)(uintptr_t)&lg_range_test,
+	    lg_range_test};
+	size_t buckets[N_BUCKET];
+	assert_zu_ge(lg_range_test, LG_N_BUCKET, "");
+	const size_t lg_bucket_width = lg_range_test - LG_N_BUCKET;
+
+	bucket_analysis(uniform_gen, &arg, buckets, means, stddevs,
+	    N_BUCKET, lg_bucket_width, n_iter);
+
+#undef LG_N_BUCKET
+#undef N_BUCKET
+#undef QUOTIENT_CEIL
+}
+TEST_END
+
+/******************************************************************************/
+
+/* Geometric random number generator; compiled only when prof is on. */
+
+#ifdef JEMALLOC_PROF
+
+/*
+ * Fills geometric proportions and returns the minimal proportion.  See
+ * comments in test_prof_sample for explanations for n_divide.
+ */
+static double
+fill_geometric_proportions(double proportions[], const size_t n_bucket,
+    const size_t n_divide) {
+	assert(n_bucket > 0);
+	assert(n_divide > 0);
+	double x = 1.;
+	for (size_t i = 0; i < n_bucket; ++i) {
+		if (i == n_bucket - 1) {
+			proportions[i] = x;
+		} else {
+			double y = x * exp(-1. / n_divide);
+			proportions[i] = x - y;
+			x = y;
+		}
+	}
+	/*
+	 * The minimal proportion is the smaller one of the last two
+	 * proportions for geometric distribution.
+	 */
+	double min_proportion = proportions[n_bucket - 1];
+	if (n_bucket >= 2 && proportions[n_bucket - 2] < min_proportion) {
+		min_proportion = proportions[n_bucket - 2];
+	}
+	return min_proportion;
+}
+
+static size_t
+round_to_nearest(const double x) {
+	return (size_t)(x + .5);
+}
+
+static void
+fill_references(size_t means[], size_t stddevs[], const double proportions[],
+    const size_t n_bucket, const size_t n_iter) {
+	for (size_t i = 0; i < n_bucket; ++i) {
+		double x = n_iter * proportions[i];
+		means[i] = round_to_nearest(x);
+		stddevs[i] = round_to_nearest(sqrt(x * (1. - proportions[i])));
+	}
+}
+
+static uint64_t
+prof_sample_gen(void *opaque) {
+	return prof_sample_new_event_wait((tsd_t *)opaque) - 1;
+}
+
+#endif /* JEMALLOC_PROF */
+
+TEST_BEGIN(test_prof_sample) {
+	test_skip_if(!config_prof);
+#ifdef JEMALLOC_PROF
+
+/* Number of divisions within [0, mean). */
+#define LG_N_DIVIDE 3
+#define N_DIVIDE (1 << LG_N_DIVIDE)
+
+/* Coverage of buckets in terms of multiples of mean. */
+#define LG_N_MULTIPLY 2
+#define N_GEO_BUCKET (N_DIVIDE << LG_N_MULTIPLY)
+
+	test_skip_if(!opt_prof);
+
+	size_t lg_prof_sample_test = 25;
+
+	size_t lg_prof_sample_orig = lg_prof_sample;
+	assert_d_eq(mallctl("prof.reset", NULL, NULL, &lg_prof_sample_test,
+	    sizeof(size_t)), 0, "");
+	malloc_printf("lg_prof_sample = %zu\n", lg_prof_sample_test);
+
+	double proportions[N_GEO_BUCKET + 1];
+	const double min_proportion = fill_geometric_proportions(proportions,
+	    N_GEO_BUCKET + 1, N_DIVIDE);
+	const size_t n_iter = round_to_nearest(MIN_BUCKET_MEAN /
+	    min_proportion);
+	size_t means[N_GEO_BUCKET + 1];
+	size_t stddevs[N_GEO_BUCKET + 1];
+	fill_references(means, stddevs, proportions, N_GEO_BUCKET + 1, n_iter);
+
+	tsd_t *tsd = tsd_fetch();
+	assert_ptr_not_null(tsd, "");
+	size_t buckets[N_GEO_BUCKET + 1];
+	assert_zu_ge(lg_prof_sample, LG_N_DIVIDE, "");
+	const size_t lg_bucket_width = lg_prof_sample - LG_N_DIVIDE;
+
+	bucket_analysis(prof_sample_gen, tsd, buckets, means, stddevs,
+	    N_GEO_BUCKET + 1, lg_bucket_width, n_iter);
+
+	assert_d_eq(mallctl("prof.reset", NULL, NULL, &lg_prof_sample_orig,
+	    sizeof(size_t)), 0, "");
+
+#undef LG_N_DIVIDE
+#undef N_DIVIDE
+#undef LG_N_MULTIPLY
+#undef N_GEO_BUCKET
+
+#endif /* JEMALLOC_PROF */
+}
+TEST_END
+
+/******************************************************************************/
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_uniform,
+	    test_prof_sample);
+}
diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in
index e5d63062..ae675745 100644
--- a/test/include/test/jemalloc_test.h.in
+++ b/test/include/test/jemalloc_test.h.in
@@ -38,9 +38,9 @@ extern "C" {
 
 /******************************************************************************/
 /*
- * For unit tests, expose all public and private interfaces.
+ * For unit tests and analytics tests, expose all public and private interfaces.
  */
-#ifdef JEMALLOC_UNIT_TEST
+#if defined(JEMALLOC_UNIT_TEST) || defined (JEMALLOC_ANALYZE_TEST)
 #  define JEMALLOC_JET
 #  define JEMALLOC_MANGLE
 #  include "jemalloc/internal/jemalloc_preamble.h"

From d8cea8756242a3a50dde4baf4fb8bf38eddac55d Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 25 Jun 2020 09:38:23 -0700
Subject: [PATCH 1773/2608] Move size inspections to test/analyze

---
 Makefile.in                      | 6 +++---
 test/{stress => analyze}/sizes.c | 0
 2 files changed, 3 insertions(+), 3 deletions(-)
 rename test/{stress => analyze}/sizes.c (100%)

diff --git a/Makefile.in b/Makefile.in
index 4a0ef874..1c9e4004 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -287,12 +287,12 @@ else
 CPP_SRCS :=
 TESTS_INTEGRATION_CPP :=
 endif
-TESTS_ANALYZE := $(srcroot)test/analyze/rand.c
+TESTS_ANALYZE := $(srcroot)test/analyze/rand.c \
+	$(srcroot)test/analyze/sizes.c
 TESTS_STRESS := $(srcroot)test/stress/microbench.c \
 	$(srcroot)test/stress/fill_flush.c \
 	$(srcroot)test/stress/large_microbench.c \
-	$(srcroot)test/stress/hookbench.c \
-	$(srcroot)test/stress/sizes.c
+	$(srcroot)test/stress/hookbench.c
 
 
 TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_INTEGRATION_CPP) \
diff --git a/test/stress/sizes.c b/test/analyze/sizes.c
similarity index 100%
rename from test/stress/sizes.c
rename to test/analyze/sizes.c

From f307b25804064eb26077f98b1481e6eb42f1dbad Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 17 Mar 2020 11:05:07 -0700
Subject: [PATCH 1774/2608] Only replace the dump file opening function in test

---
 include/jemalloc/internal/prof_externs.h |  4 ++--
 src/prof_data.c                          | 15 ++++++++++-----
 test/unit/prof_accum.c                   |  6 +++---
 test/unit/prof_gdump.c                   |  6 +++---
 test/unit/prof_idump.c                   |  6 +++---
 test/unit/prof_reset.c                   |  6 +++---
 6 files changed, 24 insertions(+), 19 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 6021cf45..2f9f2c95 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -94,8 +94,8 @@ typedef int (prof_sys_thread_name_read_t)(char *buf, size_t limit);
 extern prof_sys_thread_name_read_t *JET_MUTABLE prof_sys_thread_name_read;
 size_t prof_tdata_count(void);
 size_t prof_bt_count(void);
-typedef int (prof_dump_open_t)(bool, const char *);
-extern prof_dump_open_t *JET_MUTABLE prof_dump_open;
+typedef int (prof_dump_open_file_t)(const char *, int);
+extern prof_dump_open_file_t *JET_MUTABLE prof_dump_open_file;
 typedef bool (prof_dump_header_t)(tsdn_t *, bool, const prof_cnt_t *);
 extern prof_dump_header_t *JET_MUTABLE prof_dump_header;
 void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
diff --git a/src/prof_data.c b/src/prof_data.c
index 49cc6ee3..396cea0d 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -467,13 +467,19 @@ prof_bt_count(void) {
 }
 
 static int
-prof_dump_open_impl(bool propagate_err, const char *filename) {
+prof_dump_open_file_impl(const char *filename, int mode) {
+	return creat(filename, mode);
+}
+prof_dump_open_file_t *JET_MUTABLE prof_dump_open_file =
+    prof_dump_open_file_impl;
+
+static int
+prof_dump_open(bool propagate_err, const char *filename) {
 	int fd;
 
-	fd = creat(filename, 0644);
+	fd = prof_dump_open_file(filename, 0644);
 	if (fd == -1 && !propagate_err) {
-		malloc_printf("<jemalloc>: creat(\"%s\"), 0644) failed\n",
-		    filename);
+		malloc_printf("<jemalloc>: failed to open \"%s\"\n", filename);
 		if (opt_abort) {
 			abort();
 		}
@@ -481,7 +487,6 @@ prof_dump_open_impl(bool propagate_err, const char *filename) {
 
 	return fd;
 }
-prof_dump_open_t *JET_MUTABLE prof_dump_open = prof_dump_open_impl;
 
 static bool
 prof_dump_flush(bool propagate_err) {
diff --git a/test/unit/prof_accum.c b/test/unit/prof_accum.c
index 8dfa6780..8fc58813 100644
--- a/test/unit/prof_accum.c
+++ b/test/unit/prof_accum.c
@@ -6,11 +6,11 @@
 #define BT_COUNT_CHECK_INTERVAL	5
 
 static int
-prof_dump_open_intercept(bool propagate_err, const char *filename) {
+prof_dump_open_file_intercept(const char *filename, int mode) {
 	int fd;
 
 	fd = open("/dev/null", O_WRONLY);
-	expect_d_ne(fd, -1, "Unexpected open() failure");
+	assert_d_ne(fd, -1, "Unexpected open() failure");
 
 	return fd;
 }
@@ -62,7 +62,7 @@ TEST_BEGIN(test_idump) {
 	    sizeof(active)), 0,
 	    "Unexpected mallctl failure while activating profiling");
 
-	prof_dump_open = prof_dump_open_intercept;
+	prof_dump_open_file = prof_dump_open_file_intercept;
 
 	for (i = 0; i < NTHREADS; i++) {
 		thd_args[i] = i;
diff --git a/test/unit/prof_gdump.c b/test/unit/prof_gdump.c
index 4c6afbde..6209255e 100644
--- a/test/unit/prof_gdump.c
+++ b/test/unit/prof_gdump.c
@@ -3,13 +3,13 @@
 static bool did_prof_dump_open;
 
 static int
-prof_dump_open_intercept(bool propagate_err, const char *filename) {
+prof_dump_open_file_intercept(const char *filename, int mode) {
 	int fd;
 
 	did_prof_dump_open = true;
 
 	fd = open("/dev/null", O_WRONLY);
-	expect_d_ne(fd, -1, "Unexpected open() failure");
+	assert_d_ne(fd, -1, "Unexpected open() failure");
 
 	return fd;
 }
@@ -26,7 +26,7 @@ TEST_BEGIN(test_gdump) {
 	    sizeof(active)), 0,
 	    "Unexpected mallctl failure while activating profiling");
 
-	prof_dump_open = prof_dump_open_intercept;
+	prof_dump_open_file = prof_dump_open_file_intercept;
 
 	did_prof_dump_open = false;
 	p = mallocx((1U << SC_LG_LARGE_MINCLASS), 0);
diff --git a/test/unit/prof_idump.c b/test/unit/prof_idump.c
index dfcc0ff6..b0c1bc28 100644
--- a/test/unit/prof_idump.c
+++ b/test/unit/prof_idump.c
@@ -5,7 +5,7 @@
 static bool did_prof_dump_open;
 
 static int
-prof_dump_open_intercept(bool propagate_err, const char *filename) {
+prof_dump_open_file_intercept(const char *filename, int mode) {
 	int fd;
 
 	did_prof_dump_open = true;
@@ -15,7 +15,7 @@ prof_dump_open_intercept(bool propagate_err, const char *filename) {
 	    - 1), 0, "Dump file name should start with \"" TEST_PREFIX ".\"");
 
 	fd = open("/dev/null", O_WRONLY);
-	expect_d_ne(fd, -1, "Unexpected open() failure");
+	assert_d_ne(fd, -1, "Unexpected open() failure");
 
 	return fd;
 }
@@ -38,7 +38,7 @@ TEST_BEGIN(test_idump) {
 	    sizeof(active)), 0,
 	    "Unexpected mallctl failure while activating profiling");
 
-	prof_dump_open = prof_dump_open_intercept;
+	prof_dump_open_file = prof_dump_open_file_intercept;
 
 	did_prof_dump_open = false;
 	p = mallocx(1, 0);
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
index e643e546..29fa02bb 100644
--- a/test/unit/prof_reset.c
+++ b/test/unit/prof_reset.c
@@ -1,11 +1,11 @@
 #include "test/jemalloc_test.h"
 
 static int
-prof_dump_open_intercept(bool propagate_err, const char *filename) {
+prof_dump_open_file_intercept(const char *filename, int mode) {
 	int fd;
 
 	fd = open("/dev/null", O_WRONLY);
-	expect_d_ne(fd, -1, "Unexpected open() failure");
+	assert_d_ne(fd, -1, "Unexpected open() failure");
 
 	return fd;
 }
@@ -276,7 +276,7 @@ TEST_END
 int
 main(void) {
 	/* Intercept dumping prior to running any tests. */
-	prof_dump_open = prof_dump_open_intercept;
+	prof_dump_open_file = prof_dump_open_file_intercept;
 
 	return test_no_reentrancy(
 	    test_prof_reset_basic,

From 4bb4037dbe2450c985d09eabd29a1d8534e20641 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 17 Mar 2020 19:46:18 -0700
Subject: [PATCH 1775/2608] Extract utility function for opening maps file

---
 src/prof_data.c | 27 ++++++++++++++++-----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/src/prof_data.c b/src/prof_data.c
index 396cea0d..62b650ca 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -919,7 +919,7 @@ label_return:
 #ifndef _WIN32
 JEMALLOC_FORMAT_PRINTF(1, 2)
 static int
-prof_open_maps(const char *format, ...) {
+prof_open_maps_internal(const char *format, ...) {
 	int mfd;
 	va_list ap;
 	char filename[PATH_MAX + 1];
@@ -941,26 +941,31 @@ prof_open_maps(const char *format, ...) {
 }
 #endif
 
-static bool
-prof_dump_maps(bool propagate_err) {
-	bool ret;
+static int
+prof_dump_open_maps() {
 	int mfd;
 
 	cassert(config_prof);
 #ifdef __FreeBSD__
-	mfd = prof_open_maps("/proc/curproc/map");
+	mfd = prof_open_maps_internal("/proc/curproc/map");
 #elif defined(_WIN32)
 	mfd = -1; // Not implemented
 #else
-	{
-		int pid = prof_getpid();
+	int pid = prof_getpid();
 
-		mfd = prof_open_maps("/proc/%d/task/%d/maps", pid, pid);
-		if (mfd == -1) {
-			mfd = prof_open_maps("/proc/%d/maps", pid);
-		}
+	mfd = prof_open_maps_internal("/proc/%d/task/%d/maps", pid, pid);
+	if (mfd == -1) {
+		mfd = prof_open_maps_internal("/proc/%d/maps", pid);
 	}
 #endif
+	return mfd;
+}
+
+static bool
+prof_dump_maps(bool propagate_err) {
+	bool ret;
+	int mfd = prof_dump_open_maps();
+
 	if (mfd != -1) {
 		ssize_t nread;
 

From 21e44c45d994798d50df9fa77c905465a38a4675 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 17 Mar 2020 19:57:06 -0700
Subject: [PATCH 1776/2608] Make maps file opening replaceable in test

---
 include/jemalloc/internal/prof_externs.h | 2 ++
 src/prof_data.c                          | 4 +++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 2f9f2c95..e5d6ff7b 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -98,6 +98,8 @@ typedef int (prof_dump_open_file_t)(const char *, int);
 extern prof_dump_open_file_t *JET_MUTABLE prof_dump_open_file;
 typedef bool (prof_dump_header_t)(tsdn_t *, bool, const prof_cnt_t *);
 extern prof_dump_header_t *JET_MUTABLE prof_dump_header;
+typedef int (prof_dump_open_maps_t)();
+extern prof_dump_open_maps_t *JET_MUTABLE prof_dump_open_maps;
 void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
     uint64_t *accumbytes);
 
diff --git a/src/prof_data.c b/src/prof_data.c
index 62b650ca..b9b211d4 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -942,7 +942,7 @@ prof_open_maps_internal(const char *format, ...) {
 #endif
 
 static int
-prof_dump_open_maps() {
+prof_dump_open_maps_impl() {
 	int mfd;
 
 	cassert(config_prof);
@@ -960,6 +960,8 @@ prof_dump_open_maps() {
 #endif
 	return mfd;
 }
+prof_dump_open_maps_t *JET_MUTABLE prof_dump_open_maps =
+    prof_dump_open_maps_impl;
 
 static bool
 prof_dump_maps(bool propagate_err) {

From 7455813e5762c93fd2dcaf0672324dffa8aae5a2 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 17 Mar 2020 20:27:52 -0700
Subject: [PATCH 1777/2608] Make dump file writing replaceable in test

---
 include/jemalloc/internal/prof_externs.h | 2 ++
 src/prof_data.c                          | 7 +++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index e5d6ff7b..d644be64 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -96,6 +96,8 @@ size_t prof_tdata_count(void);
 size_t prof_bt_count(void);
 typedef int (prof_dump_open_file_t)(const char *, int);
 extern prof_dump_open_file_t *JET_MUTABLE prof_dump_open_file;
+typedef ssize_t (prof_dump_write_file_t)(int, const void *, size_t);
+extern prof_dump_write_file_t *JET_MUTABLE prof_dump_write_file;
 typedef bool (prof_dump_header_t)(tsdn_t *, bool, const prof_cnt_t *);
 extern prof_dump_header_t *JET_MUTABLE prof_dump_header;
 typedef int (prof_dump_open_maps_t)();
diff --git a/src/prof_data.c b/src/prof_data.c
index b9b211d4..0de728b0 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -488,6 +488,8 @@ prof_dump_open(bool propagate_err, const char *filename) {
 	return fd;
 }
 
+prof_dump_write_file_t *JET_MUTABLE prof_dump_write_file = malloc_write_fd;
+
 static bool
 prof_dump_flush(bool propagate_err) {
 	bool ret = false;
@@ -495,10 +497,11 @@ prof_dump_flush(bool propagate_err) {
 
 	cassert(config_prof);
 
-	err = malloc_write_fd(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
+	err = prof_dump_write_file(prof_dump_fd, prof_dump_buf,
+	    prof_dump_buf_end);
 	if (err == -1) {
 		if (!propagate_err) {
-			malloc_write("<jemalloc>: write() failed during heap "
+			malloc_write("<jemalloc>: failed to write during heap "
 			    "profile flush\n");
 			if (opt_abort) {
 				abort();

From 354183b10d286876ef9811fd9e94758926e66927 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 19 Jun 2020 12:03:12 -0700
Subject: [PATCH 1778/2608] Define prof dump buffer size centrally

---
 include/jemalloc/internal/prof_types.h | 7 ++++++-
 src/prof_data.c                        | 9 +--------
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h
index 4abe5b58..98750f3c 100644
--- a/include/jemalloc/internal/prof_types.h
+++ b/include/jemalloc/internal/prof_types.h
@@ -29,7 +29,12 @@ typedef struct prof_recent_s prof_recent_t;
 #define PROF_CKH_MINITEMS		64
 
 /* Size of memory buffer to use when writing dump files. */
-#define PROF_DUMP_BUFSIZE		65536
+#ifndef JEMALLOC_PROF
+/* Minimize memory bloat for non-prof builds. */
+#  define PROF_DUMP_BUFSIZE		1
+#else
+#  define PROF_DUMP_BUFSIZE		65536
+#endif
 
 /* Size of stack-allocated buffer used by prof_printf(). */
 #define PROF_PRINTF_BUFSIZE		128
diff --git a/src/prof_data.c b/src/prof_data.c
index 0de728b0..d5f55241 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -59,14 +59,7 @@ static prof_tdata_tree_t tdatas;
  * This buffer is rather large for stack allocation, so use a single buffer for
  * all profile dumps.
  */
-static char prof_dump_buf[
-    /* Minimize memory bloat for non-prof builds. */
-#ifdef JEMALLOC_PROF
-    PROF_DUMP_BUFSIZE
-#else
-    1
-#endif
-];
+static char prof_dump_buf[PROF_DUMP_BUFSIZE];
 static size_t prof_dump_buf_end;
 static int prof_dump_fd;
 

From f541871f5df5d711df6fd13830496f86d72439ce Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 19 Jun 2020 12:21:17 -0700
Subject: [PATCH 1779/2608] Reduce prof dump buffer size in debug build

---
 include/jemalloc/internal/prof_types.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h
index 98750f3c..dbd758fa 100644
--- a/include/jemalloc/internal/prof_types.h
+++ b/include/jemalloc/internal/prof_types.h
@@ -32,6 +32,9 @@ typedef struct prof_recent_s prof_recent_t;
 #ifndef JEMALLOC_PROF
 /* Minimize memory bloat for non-prof builds. */
 #  define PROF_DUMP_BUFSIZE		1
+#elif defined(JEMALLOC_DEBUG)
+/* Use a small buffer size in debug build, mainly to facilitate testing. */
+#  define PROF_DUMP_BUFSIZE		16
 #else
 #  define PROF_DUMP_BUFSIZE		65536
 #endif

From 5d292b56609ae2b85658f4c544b03d46b41e66be Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 31 Mar 2020 10:00:37 -0700
Subject: [PATCH 1780/2608] Push error handling logic out of core dumping logic

---
 Makefile.in                              |   1 +
 include/jemalloc/internal/prof_externs.h |   2 +-
 src/prof_data.c                          | 340 ++++++++---------------
 test/unit/prof_mdump.c                   | 214 ++++++++++++++
 test/unit/prof_mdump.sh                  |   6 +
 test/unit/prof_reset.c                   |   7 +-
 6 files changed, 346 insertions(+), 224 deletions(-)
 create mode 100644 test/unit/prof_mdump.c
 create mode 100644 test/unit/prof_mdump.sh

diff --git a/Makefile.in b/Makefile.in
index 1c9e4004..87ddd338 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -228,6 +228,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/prof_gdump.c \
 	$(srcroot)test/unit/prof_idump.c \
 	$(srcroot)test/unit/prof_log.c \
+	$(srcroot)test/unit/prof_mdump.c \
 	$(srcroot)test/unit/prof_recent.c \
 	$(srcroot)test/unit/prof_reset.c \
 	$(srcroot)test/unit/prof_tctx.c \
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index d644be64..9a2b1224 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -98,7 +98,7 @@ typedef int (prof_dump_open_file_t)(const char *, int);
 extern prof_dump_open_file_t *JET_MUTABLE prof_dump_open_file;
 typedef ssize_t (prof_dump_write_file_t)(int, const void *, size_t);
 extern prof_dump_write_file_t *JET_MUTABLE prof_dump_write_file;
-typedef bool (prof_dump_header_t)(tsdn_t *, bool, const prof_cnt_t *);
+typedef void (prof_dump_header_t)(tsdn_t *, const prof_cnt_t *);
 extern prof_dump_header_t *JET_MUTABLE prof_dump_header;
 typedef int (prof_dump_open_maps_t)();
 extern prof_dump_open_maps_t *JET_MUTABLE prof_dump_open_maps;
diff --git a/src/prof_data.c b/src/prof_data.c
index d5f55241..210b153f 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -55,6 +55,20 @@ static ckh_t bt2gctx;
  */
 static prof_tdata_tree_t tdatas;
 
+/* The following are needed for dumping and are protected by prof_dump_mtx. */
+/*
+ * Whether there has been an error in the dumping process, which could have
+ * happened either in file opening or in file writing.  When an error has
+ * already occurred, we will stop further writing to the file.
+ */
+static bool prof_dump_error;
+/*
+ * Whether error should be handled locally: if true, then we print out error
+ * message as well as abort (if opt_abort is true) when an error occurred, and
+ * we also report the error back to the caller in the end; if false, then we
+ * only report the error back to the caller in the end.
+ */
+static bool prof_dump_handle_error_locally;
 /*
  * This buffer is rather large for stack allocation, so use a single buffer for
  * all profile dumps.
@@ -459,6 +473,30 @@ prof_bt_count(void) {
 	return bt_count;
 }
 
+static void
+prof_dump_check_possible_error(bool err_cond, const char *format, ...) {
+	assert(!prof_dump_error);
+	if (!err_cond) {
+		return;
+	}
+
+	prof_dump_error = true;
+	if (!prof_dump_handle_error_locally) {
+		return;
+	}
+
+	va_list ap;
+	char buf[PROF_PRINTF_BUFSIZE];
+	va_start(ap, format);
+	malloc_vsnprintf(buf, sizeof(buf), format, ap);
+	va_end(ap);
+	malloc_write(buf);
+
+	if (opt_abort) {
+		abort();
+	}
+}
+
 static int
 prof_dump_open_file_impl(const char *filename, int mode) {
 	return creat(filename, mode);
@@ -466,61 +504,37 @@ prof_dump_open_file_impl(const char *filename, int mode) {
 prof_dump_open_file_t *JET_MUTABLE prof_dump_open_file =
     prof_dump_open_file_impl;
 
-static int
-prof_dump_open(bool propagate_err, const char *filename) {
-	int fd;
-
-	fd = prof_dump_open_file(filename, 0644);
-	if (fd == -1 && !propagate_err) {
-		malloc_printf("<jemalloc>: failed to open \"%s\"\n", filename);
-		if (opt_abort) {
-			abort();
-		}
-	}
-
-	return fd;
+static void
+prof_dump_open(const char *filename) {
+	prof_dump_fd = prof_dump_open_file(filename, 0644);
+	prof_dump_check_possible_error(prof_dump_fd == -1,
+	    "<jemalloc>: failed to open \"%s\"\n", filename);
 }
 
 prof_dump_write_file_t *JET_MUTABLE prof_dump_write_file = malloc_write_fd;
 
-static bool
-prof_dump_flush(bool propagate_err) {
-	bool ret = false;
-	ssize_t err;
-
+static void
+prof_dump_flush() {
 	cassert(config_prof);
-
-	err = prof_dump_write_file(prof_dump_fd, prof_dump_buf,
-	    prof_dump_buf_end);
-	if (err == -1) {
-		if (!propagate_err) {
-			malloc_write("<jemalloc>: failed to write during heap "
-			    "profile flush\n");
-			if (opt_abort) {
-				abort();
-			}
-		}
-		ret = true;
+	if (!prof_dump_error) {
+		ssize_t err = prof_dump_write_file(prof_dump_fd, prof_dump_buf,
+		    prof_dump_buf_end);
+		prof_dump_check_possible_error(err == -1,
+		    "<jemalloc>: failed to write during heap profile flush\n");
 	}
 	prof_dump_buf_end = 0;
-
-	return ret;
 }
 
-static bool
-prof_dump_close(bool propagate_err) {
-	bool ret;
-
-	assert(prof_dump_fd != -1);
-	ret = prof_dump_flush(propagate_err);
-	close(prof_dump_fd);
-	prof_dump_fd = -1;
-
-	return ret;
+static void
+prof_dump_close() {
+	if (prof_dump_fd != -1) {
+		prof_dump_flush();
+		close(prof_dump_fd);
+	}
 }
 
-static bool
-prof_dump_write(bool propagate_err, const char *s) {
+static void
+prof_dump_write(const char *s) {
 	size_t i, slen, n;
 
 	cassert(config_prof);
@@ -530,9 +544,7 @@ prof_dump_write(bool propagate_err, const char *s) {
 	while (i < slen) {
 		/* Flush the buffer if it is full. */
 		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
-			if (prof_dump_flush(propagate_err) && propagate_err) {
-				return true;
-			}
+			prof_dump_flush();
 		}
 
 		if (prof_dump_buf_end + slen - i <= PROF_DUMP_BUFSIZE) {
@@ -547,23 +559,18 @@ prof_dump_write(bool propagate_err, const char *s) {
 		i += n;
 	}
 	assert(i == slen);
-
-	return false;
 }
 
-JEMALLOC_FORMAT_PRINTF(2, 3)
-static bool
-prof_dump_printf(bool propagate_err, const char *format, ...) {
-	bool ret;
+JEMALLOC_FORMAT_PRINTF(1, 2)
+static void
+prof_dump_printf(const char *format, ...) {
 	va_list ap;
 	char buf[PROF_PRINTF_BUFSIZE];
 
 	va_start(ap, format);
 	malloc_vsnprintf(buf, sizeof(buf), format, ap);
 	va_end(ap);
-	ret = prof_dump_write(propagate_err, buf);
-
-	return ret;
+	prof_dump_write(buf);
 }
 
 static void
@@ -630,17 +637,10 @@ prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
 	return NULL;
 }
 
-struct prof_tctx_dump_iter_arg_s {
-	tsdn_t	*tsdn;
-	bool	propagate_err;
-};
-
 static prof_tctx_t *
-prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) {
-	struct prof_tctx_dump_iter_arg_s *arg =
-	    (struct prof_tctx_dump_iter_arg_s *)opaque;
-
-	malloc_mutex_assert_owner(arg->tsdn, tctx->gctx->lock);
+prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
+	tsdn_t *tsdn = (tsdn_t *)arg;
+	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
 
 	switch (tctx->state) {
 	case prof_tctx_state_initializing:
@@ -649,13 +649,11 @@ prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) {
 		break;
 	case prof_tctx_state_dumping:
 	case prof_tctx_state_purgatory:
-		if (prof_dump_printf(arg->propagate_err,
+		prof_dump_printf(
 		    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": "
 		    "%"FMTu64"]\n", tctx->thr_uid, tctx->dump_cnts.curobjs,
 		    tctx->dump_cnts.curbytes, tctx->dump_cnts.accumobjs,
-		    tctx->dump_cnts.accumbytes)) {
-			return tctx;
-		}
+		    tctx->dump_cnts.accumbytes);
 		break;
 	default:
 		not_reached();
@@ -817,53 +815,37 @@ prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
 
 static prof_tdata_t *
 prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
-    void *arg) {
-	bool propagate_err = *(bool *)arg;
-
+    void *unused) {
 	if (!tdata->dumping) {
 		return NULL;
 	}
 
-	if (prof_dump_printf(propagate_err,
+	prof_dump_printf(
 	    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]%s%s\n",
 	    tdata->thr_uid, tdata->cnt_summed.curobjs,
 	    tdata->cnt_summed.curbytes, tdata->cnt_summed.accumobjs,
 	    tdata->cnt_summed.accumbytes,
 	    (tdata->thread_name != NULL) ? " " : "",
-	    (tdata->thread_name != NULL) ? tdata->thread_name : "")) {
-		return tdata;
-	}
+	    (tdata->thread_name != NULL) ? tdata->thread_name : "");
 	return NULL;
 }
 
-static bool
-prof_dump_header_impl(tsdn_t *tsdn, bool propagate_err,
-    const prof_cnt_t *cnt_all) {
-	bool ret;
-
-	if (prof_dump_printf(propagate_err,
-	    "heap_v2/%"FMTu64"\n"
+static void
+prof_dump_header_impl(tsdn_t *tsdn, const prof_cnt_t *cnt_all) {
+	prof_dump_printf("heap_v2/%"FMTu64"\n"
 	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
 	    ((uint64_t)1U << lg_prof_sample), cnt_all->curobjs,
-	    cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes)) {
-		return true;
-	}
+	    cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes);
 
 	malloc_mutex_lock(tsdn, &tdatas_mtx);
-	ret = (tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter,
-	    (void *)&propagate_err) != NULL);
+	tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter, NULL);
 	malloc_mutex_unlock(tsdn, &tdatas_mtx);
-	return ret;
 }
 prof_dump_header_t *JET_MUTABLE prof_dump_header = prof_dump_header_impl;
 
-static bool
-prof_dump_gctx(tsdn_t *tsdn, bool propagate_err, prof_gctx_t *gctx,
-    const prof_bt_t *bt, prof_gctx_tree_t *gctxs) {
-	bool ret;
-	unsigned i;
-	struct prof_tctx_dump_iter_arg_s prof_tctx_dump_iter_arg;
-
+static void
+prof_dump_gctx(tsdn_t *tsdn, prof_gctx_t *gctx, const prof_bt_t *bt,
+    prof_gctx_tree_t *gctxs) {
 	cassert(config_prof);
 	malloc_mutex_assert_owner(tsdn, gctx->lock);
 
@@ -874,42 +856,21 @@ prof_dump_gctx(tsdn_t *tsdn, bool propagate_err, prof_gctx_t *gctx,
 		assert(gctx->cnt_summed.curbytes == 0);
 		assert(gctx->cnt_summed.accumobjs == 0);
 		assert(gctx->cnt_summed.accumbytes == 0);
-		ret = false;
-		goto label_return;
+		return;
 	}
 
-	if (prof_dump_printf(propagate_err, "@")) {
-		ret = true;
-		goto label_return;
-	}
-	for (i = 0; i < bt->len; i++) {
-		if (prof_dump_printf(propagate_err, " %#"FMTxPTR,
-		    (uintptr_t)bt->vec[i])) {
-			ret = true;
-			goto label_return;
-		}
+	prof_dump_write("@");
+	for (unsigned i = 0; i < bt->len; i++) {
+		prof_dump_printf(" %#"FMTxPTR, (uintptr_t)bt->vec[i]);
 	}
 
-	if (prof_dump_printf(propagate_err,
-	    "\n"
-	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
+	prof_dump_printf(
+	    "\n  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
 	    gctx->cnt_summed.curobjs, gctx->cnt_summed.curbytes,
-	    gctx->cnt_summed.accumobjs, gctx->cnt_summed.accumbytes)) {
-		ret = true;
-		goto label_return;
-	}
+	    gctx->cnt_summed.accumobjs, gctx->cnt_summed.accumbytes);
 
-	prof_tctx_dump_iter_arg.tsdn = tsdn;
-	prof_tctx_dump_iter_arg.propagate_err = propagate_err;
-	if (tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter,
-	    (void *)&prof_tctx_dump_iter_arg) != NULL) {
-		ret = true;
-		goto label_return;
-	}
-
-	ret = false;
-label_return:
-	return ret;
+	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter,
+	    (void *)tsdn);
 }
 
 #ifndef _WIN32
@@ -959,45 +920,26 @@ prof_dump_open_maps_impl() {
 prof_dump_open_maps_t *JET_MUTABLE prof_dump_open_maps =
     prof_dump_open_maps_impl;
 
-static bool
-prof_dump_maps(bool propagate_err) {
-	bool ret;
+static void
+prof_dump_maps() {
 	int mfd = prof_dump_open_maps();
+	if (mfd == -1) {
+		return;
+	}
 
-	if (mfd != -1) {
-		ssize_t nread;
-
-		if (prof_dump_write(propagate_err, "\nMAPPED_LIBRARIES:\n") &&
-		    propagate_err) {
-			ret = true;
-			goto label_return;
+	prof_dump_write("\nMAPPED_LIBRARIES:\n");
+	ssize_t nread = 0;
+	do {
+		prof_dump_buf_end += nread;
+		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
+			/* Make space in prof_dump_buf before read(). */
+			prof_dump_flush();
 		}
-		nread = 0;
-		do {
-			prof_dump_buf_end += nread;
-			if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
-				/* Make space in prof_dump_buf before read(). */
-				if (prof_dump_flush(propagate_err) &&
-				    propagate_err) {
-					ret = true;
-					goto label_return;
-				}
-			}
-			nread = malloc_read_fd(mfd,
-			    &prof_dump_buf[prof_dump_buf_end], PROF_DUMP_BUFSIZE
-			    - prof_dump_buf_end);
-		} while (nread > 0);
-	} else {
-		ret = true;
-		goto label_return;
-	}
+		nread = malloc_read_fd(mfd, &prof_dump_buf[prof_dump_buf_end],
+		    PROF_DUMP_BUFSIZE - prof_dump_buf_end);
+	} while (nread > 0);
 
-	ret = false;
-label_return:
-	if (mfd != -1) {
-		close(mfd);
-	}
-	return ret;
+	close(mfd);
 }
 
 /*
@@ -1035,29 +977,13 @@ prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx,
 #endif
 }
 
-struct prof_gctx_dump_iter_arg_s {
-	tsdn_t	*tsdn;
-	bool	propagate_err;
-};
-
 static prof_gctx_t *
 prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
-	prof_gctx_t *ret;
-	struct prof_gctx_dump_iter_arg_s *arg =
-	    (struct prof_gctx_dump_iter_arg_s *)opaque;
-
-	malloc_mutex_lock(arg->tsdn, gctx->lock);
-
-	if (prof_dump_gctx(arg->tsdn, arg->propagate_err, gctx, &gctx->bt,
-	    gctxs)) {
-		ret = gctx;
-		goto label_return;
-	}
-
-	ret = NULL;
-label_return:
-	malloc_mutex_unlock(arg->tsdn, gctx->lock);
-	return ret;
+	tsdn_t *tsdn = (tsdn_t *)opaque;
+	malloc_mutex_lock(tsdn, gctx->lock);
+	prof_dump_gctx(tsdn, gctx, &gctx->bt, gctxs);
+	malloc_mutex_unlock(tsdn, gctx->lock);
+	return NULL;
 }
 
 static void
@@ -1104,43 +1030,23 @@ prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata,
 
 static bool
 prof_dump_file(tsd_t *tsd, bool propagate_err, const char *filename,
-    bool leakcheck, prof_tdata_t *tdata,
-    struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
-    struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
-    struct prof_gctx_dump_iter_arg_s *prof_gctx_dump_iter_arg,
+    bool leakcheck, prof_tdata_t *tdata, const prof_cnt_t *cnt_all,
     prof_gctx_tree_t *gctxs) {
+	prof_dump_error = false;
+	prof_dump_handle_error_locally = !propagate_err;
+
 	/* Create dump file. */
-	if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1) {
-		return true;
-	}
-
+	prof_dump_open(filename);
 	/* Dump profile header. */
-	if (prof_dump_header(tsd_tsdn(tsd), propagate_err,
-	    &prof_tdata_merge_iter_arg->cnt_all)) {
-		goto label_write_error;
-	}
-
+	prof_dump_header(tsd_tsdn(tsd), cnt_all);
 	/* Dump per gctx profile stats. */
-	prof_gctx_dump_iter_arg->tsdn = tsd_tsdn(tsd);
-	prof_gctx_dump_iter_arg->propagate_err = propagate_err;
-	if (gctx_tree_iter(gctxs, NULL, prof_gctx_dump_iter,
-	    (void *)prof_gctx_dump_iter_arg) != NULL) {
-		goto label_write_error;
-	}
-
+	gctx_tree_iter(gctxs, NULL, prof_gctx_dump_iter, (void *)tsd_tsdn(tsd));
 	/* Dump /proc/<pid>/maps if possible. */
-	if (prof_dump_maps(propagate_err)) {
-		goto label_write_error;
-	}
+	prof_dump_maps();
+	/* Close dump file. */
+	prof_dump_close();
 
-	if (prof_dump_close(propagate_err)) {
-		return true;
-	}
-
-	return false;
-label_write_error:
-	prof_dump_close(propagate_err);
-	return true;
+	return prof_dump_error;
 }
 
 bool
@@ -1160,12 +1066,10 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
 	prof_gctx_tree_t gctxs;
 	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
 	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
-	struct prof_gctx_dump_iter_arg_s prof_gctx_dump_iter_arg;
 	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
 	    &prof_gctx_merge_iter_arg, &gctxs);
-	bool err = prof_dump_file(tsd, propagate_err, filename, leakcheck, tdata,
-	    &prof_tdata_merge_iter_arg, &prof_gctx_merge_iter_arg,
-	    &prof_gctx_dump_iter_arg, &gctxs);
+	bool err = prof_dump_file(tsd, propagate_err, filename, leakcheck,
+	    tdata, &prof_tdata_merge_iter_arg.cnt_all, &gctxs);
 	prof_gctx_finish(tsd, &gctxs);
 
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
diff --git a/test/unit/prof_mdump.c b/test/unit/prof_mdump.c
new file mode 100644
index 00000000..3779c24e
--- /dev/null
+++ b/test/unit/prof_mdump.c
@@ -0,0 +1,214 @@
+#include "test/jemalloc_test.h"
+
+static const char *test_filename = "test_filename";
+static bool did_prof_dump_open;
+
+static int
+prof_dump_open_file_intercept(const char *filename, int mode) {
+	int fd;
+
+	did_prof_dump_open = true;
+
+	/*
+	 * Stronger than a strcmp() - verifying that we internally directly use
+	 * the caller supplied char pointer.
+	 */
+	expect_ptr_eq(filename, test_filename,
+	    "Dump file name should be \"%s\"", test_filename);
+
+	fd = open("/dev/null", O_WRONLY);
+	assert_d_ne(fd, -1, "Unexpected open() failure");
+
+	return fd;
+}
+
+TEST_BEGIN(test_mdump_normal) {
+	test_skip_if(!config_prof);
+
+	prof_dump_open_file_t *open_file_orig = prof_dump_open_file;
+
+	void *p = mallocx(1, 0);
+	assert_ptr_not_null(p, "Unexpected mallocx() failure");
+
+	prof_dump_open_file = prof_dump_open_file_intercept;
+	did_prof_dump_open = false;
+	expect_d_eq(mallctl("prof.dump", NULL, NULL, (void *)&test_filename,
+	    sizeof(test_filename)), 0,
+	    "Unexpected mallctl failure while dumping");
+	expect_true(did_prof_dump_open, "Expected a profile dump");
+
+	dallocx(p, 0);
+
+	prof_dump_open_file = open_file_orig;
+}
+TEST_END
+
+static int
+prof_dump_open_file_error(const char *filename, int mode) {
+	return -1;
+}
+
+/*
+ * In the context of test_mdump_output_error, prof_dump_write_file_count is the
+ * total number of times prof_dump_write_file_error() is expected to be called.
+ * In the context of test_mdump_maps_error, prof_dump_write_file_count is the
+ * total number of times prof_dump_write_file_error() is expected to be called
+ * starting from the one that contains an 'M' (beginning the "MAPPED_LIBRARIES"
+ * header).
+ */
+static int prof_dump_write_file_count;
+
+static ssize_t
+prof_dump_write_file_error(int fd, const void *s, size_t len) {
+	--prof_dump_write_file_count;
+
+	expect_d_ge(prof_dump_write_file_count, 0,
+	    "Write is called after error occurs");
+
+	if (prof_dump_write_file_count == 0) {
+		return -1;
+	} else {
+		/*
+		 * Any non-negative number indicates success, and for
+		 * simplicity we just use 0.  When prof_dump_write_file_count
+		 * is positive, it means that we haven't reached the write that
+		 * we want to fail; when prof_dump_write_file_count is
+		 * negative, it means that we've already violated the
+		 * expect_d_ge(prof_dump_write_file_count, 0) statement above,
+		 * but instead of aborting, we continue the rest of the test,
+		 * and we indicate that all the writes after the failed write
+		 * are successful.
+		 */
+		return 0;
+	}
+}
+
+static void
+expect_write_failure(int count) {
+	prof_dump_write_file_count = count;
+	expect_d_eq(mallctl("prof.dump", NULL, NULL, (void *)&test_filename,
+	    sizeof(test_filename)), EFAULT, "Dump should err");
+	expect_d_eq(prof_dump_write_file_count, 0,
+	    "Dumping stopped after a wrong number of writes");
+}
+
+TEST_BEGIN(test_mdump_output_error) {
+	test_skip_if(!config_prof);
+	test_skip_if(!config_debug);
+
+	prof_dump_open_file_t *open_file_orig = prof_dump_open_file;
+	prof_dump_write_file_t *write_file_orig = prof_dump_write_file;
+
+	prof_dump_write_file = prof_dump_write_file_error;
+
+	void *p = mallocx(1, 0);
+	assert_ptr_not_null(p, "Unexpected mallocx() failure");
+
+	/*
+	 * When opening the dump file fails, there shouldn't be any write, and
+	 * mallctl() should return failure.
+	 */
+	prof_dump_open_file = prof_dump_open_file_error;
+	expect_write_failure(0);
+
+	/*
+	 * When the n-th write fails, there shouldn't be any more write, and
+	 * mallctl() should return failure.
+	 */
+	prof_dump_open_file = prof_dump_open_file_intercept;
+	expect_write_failure(1); /* First write fails. */
+	expect_write_failure(2); /* Second write fails. */
+
+	dallocx(p, 0);
+
+	prof_dump_open_file = open_file_orig;
+	prof_dump_write_file = write_file_orig;
+}
+TEST_END
+
+static int
+prof_dump_open_maps_error() {
+	return -1;
+}
+
+static bool started_piping_maps_file;
+
+static ssize_t
+prof_dump_write_maps_file_error(int fd, const void *s, size_t len) {
+	/* The main dump doesn't contain any capital 'M'. */
+	if (!started_piping_maps_file && strchr(s, 'M') != NULL) {
+		started_piping_maps_file = true;
+	}
+
+	if (started_piping_maps_file) {
+		return prof_dump_write_file_error(fd, s, len);
+	} else {
+		/* Return success when we haven't started piping maps. */
+		return 0;
+	}
+}
+
+static void
+expect_maps_write_failure(int count) {
+	int mfd = prof_dump_open_maps();
+	if (mfd == -1) {
+		/* No need to continue if we just can't find the maps file. */
+		return;
+	}
+	close(mfd);
+	started_piping_maps_file = false;
+	expect_write_failure(count);
+	expect_true(started_piping_maps_file, "Should start piping maps");
+}
+
+TEST_BEGIN(test_mdump_maps_error) {
+	test_skip_if(!config_prof);
+	test_skip_if(!config_debug);
+
+	prof_dump_open_file_t *open_file_orig = prof_dump_open_file;
+	prof_dump_write_file_t *write_file_orig = prof_dump_write_file;
+	prof_dump_open_maps_t *open_maps_orig = prof_dump_open_maps;
+
+	prof_dump_open_file = prof_dump_open_file_intercept;
+	prof_dump_write_file = prof_dump_write_maps_file_error;
+
+	void *p = mallocx(1, 0);
+	assert_ptr_not_null(p, "Unexpected mallocx() failure");
+
+	/*
+	 * When opening the maps file fails, there shouldn't be any maps write,
+	 * and mallctl() should return success.
+	 */
+	prof_dump_open_maps = prof_dump_open_maps_error;
+	started_piping_maps_file = false;
+	prof_dump_write_file_count = 0;
+	expect_d_eq(mallctl("prof.dump", NULL, NULL, (void *)&test_filename,
+	    sizeof(test_filename)), 0,
+	    "mallctl should not fail in case of maps file opening failure");
+	expect_false(started_piping_maps_file, "Shouldn't start piping maps");
+	expect_d_eq(prof_dump_write_file_count, 0,
+	    "Dumping stopped after a wrong number of writes");
+
+	/*
+	 * When the n-th maps write fails (given that we are able to find the
+	 * maps file), there shouldn't be any more maps write, and mallctl()
+	 * should return failure.
+	 */
+	prof_dump_open_maps = open_maps_orig;
+	expect_maps_write_failure(1); /* First write fails. */
+	expect_maps_write_failure(2); /* Second write fails. */
+
+	dallocx(p, 0);
+
+	prof_dump_open_file = open_file_orig;
+	prof_dump_write_file = write_file_orig;
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_mdump_normal,
+	    test_mdump_output_error,
+	    test_mdump_maps_error);
+}
diff --git a/test/unit/prof_mdump.sh b/test/unit/prof_mdump.sh
new file mode 100644
index 00000000..d14cb8c5
--- /dev/null
+++ b/test/unit/prof_mdump.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:true,lg_prof_sample:0"
+fi
+
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
index 29fa02bb..dc64a04c 100644
--- a/test/unit/prof_reset.c
+++ b/test/unit/prof_reset.c
@@ -83,13 +83,10 @@ TEST_END
 
 bool prof_dump_header_intercepted = false;
 prof_cnt_t cnt_all_copy = {0, 0, 0, 0};
-static bool
-prof_dump_header_intercept(tsdn_t *tsdn, bool propagate_err,
-    const prof_cnt_t *cnt_all) {
+static void
+prof_dump_header_intercept(tsdn_t *tsdn, const prof_cnt_t *cnt_all) {
 	prof_dump_header_intercepted = true;
 	memcpy(&cnt_all_copy, cnt_all, sizeof(prof_cnt_t));
-
-	return false;
 }
 
 TEST_BEGIN(test_prof_reset_cleanup) {

From c8683bee80768c191b2e08f1fcef583bc17c9203 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 30 Mar 2020 16:48:45 -0700
Subject: [PATCH 1781/2608] Unify printing for prof counts object

---
 doc/jemalloc.xml.in |  6 +++---
 src/prof_data.c     | 42 ++++++++++++++++++++++--------------------
 2 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 5ab84568..5472294c 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -3437,7 +3437,7 @@ heap_v2/524288
   [...]
 @ 0x5f86da8 0x5f5a1dc [...] 0x29e4d4e 0xa200316 0xabb2988 [...]
   t*: 13: 6688 [0: 0]
-  t3: 12: 6496 [0: ]
+  t3: 12: 6496 [0: 0]
   t99: 1: 192 [0: 0]
 [...]
 
@@ -3448,9 +3448,9 @@ descriptions of the corresponding fields.  <programlisting><![CDATA[
 <heap_profile_format_version>/<mean_sample_interval>
   <aggregate>: <curobjs>: <curbytes> [<cumobjs>: <cumbytes>]
   [...]
-  <thread_3_aggregate>: <curobjs>: <curbytes>[<cumobjs>: <cumbytes>]
+  <thread_3_aggregate>: <curobjs>: <curbytes> [<cumobjs>: <cumbytes>]
   [...]
-  <thread_99_aggregate>: <curobjs>: <curbytes>[<cumobjs>: <cumbytes>]
+  <thread_99_aggregate>: <curobjs>: <curbytes> [<cumobjs>: <cumbytes>]
   [...]
 @ <top_frame> <frame> [...] <frame> <frame> <frame> [...]
   <backtrace_aggregate>: <curobjs>: <curbytes> [<cumobjs>: <cumbytes>]
diff --git a/src/prof_data.c b/src/prof_data.c
index 210b153f..3f8c9918 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -573,6 +573,12 @@ prof_dump_printf(const char *format, ...) {
 	prof_dump_write(buf);
 }
 
+static void
+prof_dump_print_cnts(const prof_cnt_t *cnts) {
+	prof_dump_printf("%"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]",
+	    cnts->curobjs, cnts->curbytes, cnts->accumobjs, cnts->accumbytes);
+}
+
 static void
 prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata) {
 	malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
@@ -649,11 +655,9 @@ prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
 		break;
 	case prof_tctx_state_dumping:
 	case prof_tctx_state_purgatory:
-		prof_dump_printf(
-		    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": "
-		    "%"FMTu64"]\n", tctx->thr_uid, tctx->dump_cnts.curobjs,
-		    tctx->dump_cnts.curbytes, tctx->dump_cnts.accumobjs,
-		    tctx->dump_cnts.accumbytes);
+		prof_dump_printf("  t%"FMTu64": ", tctx->thr_uid);
+		prof_dump_print_cnts(&tctx->dump_cnts);
+		prof_dump_write("\n");
 		break;
 	default:
 		not_reached();
@@ -820,22 +824,21 @@ prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
 		return NULL;
 	}
 
-	prof_dump_printf(
-	    "  t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]%s%s\n",
-	    tdata->thr_uid, tdata->cnt_summed.curobjs,
-	    tdata->cnt_summed.curbytes, tdata->cnt_summed.accumobjs,
-	    tdata->cnt_summed.accumbytes,
-	    (tdata->thread_name != NULL) ? " " : "",
-	    (tdata->thread_name != NULL) ? tdata->thread_name : "");
+	prof_dump_printf("  t%"FMTu64": ", tdata->thr_uid);
+	prof_dump_print_cnts(&tdata->cnt_summed);
+	if (tdata->thread_name != NULL) {
+		prof_dump_printf(" %s", tdata->thread_name);
+	}
+	prof_dump_write("\n");
 	return NULL;
 }
 
 static void
 prof_dump_header_impl(tsdn_t *tsdn, const prof_cnt_t *cnt_all) {
-	prof_dump_printf("heap_v2/%"FMTu64"\n"
-	    "  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
-	    ((uint64_t)1U << lg_prof_sample), cnt_all->curobjs,
-	    cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes);
+	prof_dump_printf("heap_v2/%"FMTu64"\n  t*: ",
+	    ((uint64_t)1U << lg_prof_sample));
+	prof_dump_print_cnts(cnt_all);
+	prof_dump_write("\n");
 
 	malloc_mutex_lock(tsdn, &tdatas_mtx);
 	tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter, NULL);
@@ -864,10 +867,9 @@ prof_dump_gctx(tsdn_t *tsdn, prof_gctx_t *gctx, const prof_bt_t *bt,
 		prof_dump_printf(" %#"FMTxPTR, (uintptr_t)bt->vec[i]);
 	}
 
-	prof_dump_printf(
-	    "\n  t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
-	    gctx->cnt_summed.curobjs, gctx->cnt_summed.curbytes,
-	    gctx->cnt_summed.accumobjs, gctx->cnt_summed.accumbytes);
+	prof_dump_write("\n  t*: ");
+	prof_dump_print_cnts(&gctx->cnt_summed);
+	prof_dump_write("\n");
 
 	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter,
 	    (void *)tsdn);

From f43ac8543e8e6d38a0f0caf9afad22500118f75f Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 31 Mar 2020 09:42:11 -0700
Subject: [PATCH 1782/2608] Correct prof header macro namings

---
 include/jemalloc/internal/prof_data.h   | 6 +++---
 include/jemalloc/internal/prof_log.h    | 6 +++---
 include/jemalloc/internal/prof_recent.h | 6 +++---
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
index 46a35105..09a40994 100644
--- a/include/jemalloc/internal/prof_data.h
+++ b/include/jemalloc/internal/prof_data.h
@@ -1,5 +1,5 @@
-#ifndef JEMALLOC_INTERNAL_PROF_DATA_EXTERNS_H
-#define JEMALLOC_INTERNAL_PROF_DATA_EXTERNS_H
+#ifndef JEMALLOC_INTERNAL_PROF_DATA_H
+#define JEMALLOC_INTERNAL_PROF_DATA_H
 
 #include "jemalloc/internal/mutex.h"
 
@@ -19,4 +19,4 @@ void bt_init(prof_bt_t *bt, void **vec);
 void prof_backtrace(tsd_t *tsd, prof_bt_t *bt);
 void prof_tctx_try_destroy(tsd_t *tsd, prof_tctx_t *tctx);
 
-#endif /* JEMALLOC_INTERNAL_PROF_DATA_EXTERNS_H */
+#endif /* JEMALLOC_INTERNAL_PROF_DATA_H */
diff --git a/include/jemalloc/internal/prof_log.h b/include/jemalloc/internal/prof_log.h
index 928bf27b..e833ced7 100644
--- a/include/jemalloc/internal/prof_log.h
+++ b/include/jemalloc/internal/prof_log.h
@@ -1,5 +1,5 @@
-#ifndef JEMALLOC_INTERNAL_PROF_LOG_EXTERNS_H
-#define JEMALLOC_INTERNAL_PROF_LOG_EXTERNS_H
+#ifndef JEMALLOC_INTERNAL_PROF_LOG_H
+#define JEMALLOC_INTERNAL_PROF_LOG_H
 
 #include "jemalloc/internal/mutex.h"
 
@@ -16,4 +16,4 @@ bool prof_log_is_logging(void);
 bool prof_log_rep_check(void);
 void prof_log_dummy_set(bool new_value);
 
-#endif /* JEMALLOC_INTERNAL_PROF_LOG_EXTERNS_H */
+#endif /* JEMALLOC_INTERNAL_PROF_LOG_H */
diff --git a/include/jemalloc/internal/prof_recent.h b/include/jemalloc/internal/prof_recent.h
index f88413df..4f376c7b 100644
--- a/include/jemalloc/internal/prof_recent.h
+++ b/include/jemalloc/internal/prof_recent.h
@@ -1,5 +1,5 @@
-#ifndef JEMALLOC_INTERNAL_PROF_RECENT_EXTERNS_H
-#define JEMALLOC_INTERNAL_PROF_RECENT_EXTERNS_H
+#ifndef JEMALLOC_INTERNAL_PROF_RECENT_H
+#define JEMALLOC_INTERNAL_PROF_RECENT_H
 
 extern malloc_mutex_t prof_recent_dump_mtx;
 
@@ -15,4 +15,4 @@ extern prof_recent_list_t prof_recent_alloc_list;
 edata_t *prof_recent_alloc_edata_get_no_lock_test(const prof_recent_t *node);
 prof_recent_t *edata_prof_recent_alloc_get_no_lock_test(const edata_t *edata);
 
-#endif /* JEMALLOC_INTERNAL_PROF_RECENT_EXTERNS_H */
+#endif /* JEMALLOC_INTERNAL_PROF_RECENT_H */

From 8118056c034aae3b8d3d250bed36e95eae6676a3 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 31 Mar 2020 10:13:55 -0700
Subject: [PATCH 1783/2608] Expose prof_data testing internals only in prof
 tests

---
 include/jemalloc/internal/prof_data.h    | 8 ++++++++
 include/jemalloc/internal/prof_externs.h | 6 ------
 test/unit/prof_accum.c                   | 2 ++
 test/unit/prof_active.c                  | 2 ++
 test/unit/prof_reset.c                   | 2 ++
 test/unit/prof_tctx.c                    | 2 ++
 6 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
index 09a40994..c1dc3ec4 100644
--- a/include/jemalloc/internal/prof_data.h
+++ b/include/jemalloc/internal/prof_data.h
@@ -19,4 +19,12 @@ void bt_init(prof_bt_t *bt, void **vec);
 void prof_backtrace(tsd_t *tsd, prof_bt_t *bt);
 void prof_tctx_try_destroy(tsd_t *tsd, prof_tctx_t *tctx);
 
+/* Used in unit tests. */
+size_t prof_tdata_count(void);
+size_t prof_bt_count(void);
+typedef void (prof_dump_header_t)(tsdn_t *, const prof_cnt_t *);
+extern prof_dump_header_t *JET_MUTABLE prof_dump_header;
+void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
+    uint64_t *accumbytes);
+
 #endif /* JEMALLOC_INTERNAL_PROF_DATA_H */
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 9a2b1224..a1baaff1 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -92,18 +92,12 @@ void prof_sample_event_handler(tsd_t *tsd, uint64_t elapsed);
 /* Used by unit tests. */
 typedef int (prof_sys_thread_name_read_t)(char *buf, size_t limit);
 extern prof_sys_thread_name_read_t *JET_MUTABLE prof_sys_thread_name_read;
-size_t prof_tdata_count(void);
-size_t prof_bt_count(void);
 typedef int (prof_dump_open_file_t)(const char *, int);
 extern prof_dump_open_file_t *JET_MUTABLE prof_dump_open_file;
 typedef ssize_t (prof_dump_write_file_t)(int, const void *, size_t);
 extern prof_dump_write_file_t *JET_MUTABLE prof_dump_write_file;
-typedef void (prof_dump_header_t)(tsdn_t *, const prof_cnt_t *);
-extern prof_dump_header_t *JET_MUTABLE prof_dump_header;
 typedef int (prof_dump_open_maps_t)();
 extern prof_dump_open_maps_t *JET_MUTABLE prof_dump_open_maps;
-void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
-    uint64_t *accumbytes);
 
 bool prof_log_start(tsdn_t *tsdn, const char *filename);
 bool prof_log_stop(tsdn_t *tsdn);
diff --git a/test/unit/prof_accum.c b/test/unit/prof_accum.c
index 8fc58813..5b8085e1 100644
--- a/test/unit/prof_accum.c
+++ b/test/unit/prof_accum.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/prof_data.h"
+
 #define NTHREADS		4
 #define NALLOCS_PER_THREAD	50
 #define DUMP_INTERVAL		1
diff --git a/test/unit/prof_active.c b/test/unit/prof_active.c
index 41c0512d..af29e7ad 100644
--- a/test/unit/prof_active.c
+++ b/test/unit/prof_active.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/prof_data.h"
+
 static void
 mallctl_bool_get(const char *name, bool expected, const char *func, int line) {
 	bool old;
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
index dc64a04c..22bf7963 100644
--- a/test/unit/prof_reset.c
+++ b/test/unit/prof_reset.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/prof_data.h"
+
 static int
 prof_dump_open_file_intercept(const char *filename, int mode) {
 	int fd;
diff --git a/test/unit/prof_tctx.c b/test/unit/prof_tctx.c
index 4bc597b6..801e5f79 100644
--- a/test/unit/prof_tctx.c
+++ b/test/unit/prof_tctx.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/prof_data.h"
+
 TEST_BEGIN(test_prof_realloc) {
 	tsd_t *tsd;
 	int flags;

From 841af2b4269b425c28b32c032340ac572d4773ae Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 31 Mar 2020 10:39:40 -0700
Subject: [PATCH 1784/2608] Move thread name handling to prof_data module

---
 include/jemalloc/internal/prof_data.h |  2 +
 src/prof.c                            | 63 ---------------------------
 src/prof_data.c                       | 63 +++++++++++++++++++++++++++
 3 files changed, 65 insertions(+), 63 deletions(-)

diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
index c1dc3ec4..6c6c5345 100644
--- a/include/jemalloc/internal/prof_data.h
+++ b/include/jemalloc/internal/prof_data.h
@@ -10,6 +10,8 @@ void prof_bt_hash(const void *key, size_t r_hash[2]);
 bool prof_bt_keycomp(const void *k1, const void *k2);
 
 bool prof_data_init(tsd_t *tsd);
+char *prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name);
+int prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name);
 bool prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
     bool leakcheck);
 prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
diff --git a/src/prof.c b/src/prof.c
index 5e29f401..14577461 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -133,69 +133,6 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx) {
 	}
 }
 
-static char *
-prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name) {
-	char *ret;
-	size_t size;
-
-	if (thread_name == NULL) {
-		return NULL;
-	}
-
-	size = strlen(thread_name) + 1;
-	if (size == 1) {
-		return "";
-	}
-
-	ret = iallocztm(tsdn, size, sz_size2index(size), false, NULL, true,
-	    arena_get(TSDN_NULL, 0, true), true);
-	if (ret == NULL) {
-		return NULL;
-	}
-	memcpy(ret, thread_name, size);
-	return ret;
-}
-
-static int
-prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name) {
-	assert(tsd_reentrancy_level_get(tsd) == 0);
-
-	prof_tdata_t *tdata;
-	unsigned i;
-	char *s;
-
-	tdata = prof_tdata_get(tsd, true);
-	if (tdata == NULL) {
-		return EAGAIN;
-	}
-
-	/* Validate input. */
-	if (thread_name == NULL) {
-		return EFAULT;
-	}
-	for (i = 0; thread_name[i] != '\0'; i++) {
-		char c = thread_name[i];
-		if (!isgraph(c) && !isblank(c)) {
-			return EFAULT;
-		}
-	}
-
-	s = prof_thread_name_alloc(tsd_tsdn(tsd), thread_name);
-	if (s == NULL) {
-		return EAGAIN;
-	}
-
-	if (tdata->thread_name != NULL) {
-		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
-		    true);
-		tdata->thread_name = NULL;
-	}
-	if (strlen(s) > 0) {
-		tdata->thread_name = s;
-	}
-	return 0;
-}
-
 static int
 prof_sys_thread_name_read_impl(char *buf, size_t limit) {
 #ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
diff --git a/src/prof_data.c b/src/prof_data.c
index 3f8c9918..d2ad3748 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -473,6 +473,69 @@ prof_bt_count(void) {
 	return bt_count;
 }
 
+char *
+prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name) {
+	char *ret;
+	size_t size;
+
+	if (thread_name == NULL) {
+		return NULL;
+	}
+
+	size = strlen(thread_name) + 1;
+	if (size == 1) {
+		return "";
+	}
+
+	ret = iallocztm(tsdn, size, sz_size2index(size), false, NULL, true,
+	    arena_get(TSDN_NULL, 0, true), true);
+	if (ret == NULL) {
+		return NULL;
+	}
+	memcpy(ret, thread_name, size);
+	return ret;
+}
+
+int
+prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name) {
+	assert(tsd_reentrancy_level_get(tsd) == 0);
+
+	prof_tdata_t *tdata;
+	unsigned i;
+	char *s;
+
+	tdata = prof_tdata_get(tsd, true);
+	if (tdata == NULL) {
+		return EAGAIN;
+	}
+
+	/* Validate input. */
+	if (thread_name == NULL) {
+		return EFAULT;
+	}
+	for (i = 0; thread_name[i] != '\0'; i++) {
+		char c = thread_name[i];
+		if (!isgraph(c) && !isblank(c)) {
+			return EFAULT;
+		}
+	}
+
+	s = prof_thread_name_alloc(tsd_tsdn(tsd), thread_name);
+	if (s == NULL) {
+		return EAGAIN;
+	}
+
+	if (tdata->thread_name != NULL) {
+		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
+		    true);
+		tdata->thread_name = NULL;
+	}
+	if (strlen(s) > 0) {
+		tdata->thread_name = s;
+	}
+	return 0;
+}
+
 static void
 prof_dump_check_possible_error(bool err_cond, const char *format, ...) {
 	assert(!prof_dump_error);

From adfd9d7b1d69a997a74193bf9d03951616f22ba6 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 31 Mar 2020 10:43:04 -0700
Subject: [PATCH 1785/2608] Change tsdn to tsd for thread name allocation

---
 include/jemalloc/internal/prof_data.h | 2 +-
 src/prof.c                            | 2 +-
 src/prof_data.c                       | 8 ++++----
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
index 6c6c5345..26b8b28e 100644
--- a/include/jemalloc/internal/prof_data.h
+++ b/include/jemalloc/internal/prof_data.h
@@ -10,7 +10,7 @@ void prof_bt_hash(const void *key, size_t r_hash[2]);
 bool prof_bt_keycomp(const void *k1, const void *k2);
 
 bool prof_data_init(tsd_t *tsd);
-char *prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name);
+char *prof_thread_name_alloc(tsd_t *tsd, const char *thread_name);
 int prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name);
 bool prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
     bool leakcheck);
diff --git a/src/prof.c b/src/prof.c
index 14577461..29eb3e6b 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -746,7 +746,7 @@ prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) {
 	uint64_t thr_uid = tdata->thr_uid;
 	uint64_t thr_discrim = tdata->thr_discrim + 1;
 	char *thread_name = (tdata->thread_name != NULL) ?
-	    prof_thread_name_alloc(tsd_tsdn(tsd), tdata->thread_name) : NULL;
+	    prof_thread_name_alloc(tsd, tdata->thread_name) : NULL;
 	bool active = tdata->active;
 
 	prof_tdata_detach(tsd, tdata);
diff --git a/src/prof_data.c b/src/prof_data.c
index d2ad3748..9563293f 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -474,7 +474,7 @@ prof_bt_count(void) {
 }
 
 char *
-prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name) {
+prof_thread_name_alloc(tsd_t *tsd, const char *thread_name) {
 	char *ret;
 	size_t size;
 
@@ -487,8 +487,8 @@ prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name) {
 		return "";
 	}
 
-	ret = iallocztm(tsdn, size, sz_size2index(size), false, NULL, true,
-	    arena_get(TSDN_NULL, 0, true), true);
+	ret = iallocztm(tsd_tsdn(tsd), size, sz_size2index(size), false, NULL,
+	    true, arena_get(TSDN_NULL, 0, true), true);
 	if (ret == NULL) {
 		return NULL;
 	}
@@ -520,7 +520,7 @@ prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name) {
 		}
 	}
 
-	s = prof_thread_name_alloc(tsd_tsdn(tsd), thread_name);
+	s = prof_thread_name_alloc(tsd, thread_name);
 	if (s == NULL) {
 		return EAGAIN;
 	}

From 03ae509f325e952a1447d8b933ee57f3d116434d Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 31 Mar 2020 09:02:55 -0700
Subject: [PATCH 1786/2608] Create prof_sys module for reading system thread
 name

---
 Makefile.in                                   |  1 +
 include/jemalloc/internal/prof_externs.h      |  2 --
 include/jemalloc/internal/prof_sys.h          | 10 +++++++
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |  1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |  3 +++
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |  1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |  3 +++
 src/prof.c                                    | 22 +--------------
 src/prof_sys.c                                | 27 +++++++++++++++++++
 test/unit/prof_sys_thread_name.c              |  2 ++
 10 files changed, 49 insertions(+), 23 deletions(-)
 create mode 100644 include/jemalloc/internal/prof_sys.h
 create mode 100644 src/prof_sys.c

diff --git a/Makefile.in b/Makefile.in
index 87ddd338..7f07d967 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -133,6 +133,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/prof_data.c \
 	$(srcroot)src/prof_log.c \
 	$(srcroot)src/prof_recent.c \
+	$(srcroot)src/prof_sys.c \
 	$(srcroot)src/rtree.c \
 	$(srcroot)src/safety_check.c \
 	$(srcroot)src/sc.c \
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index a1baaff1..135fb29d 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -90,8 +90,6 @@ uint64_t prof_sample_postponed_event_wait(tsd_t *tsd);
 void prof_sample_event_handler(tsd_t *tsd, uint64_t elapsed);
 
 /* Used by unit tests. */
-typedef int (prof_sys_thread_name_read_t)(char *buf, size_t limit);
-extern prof_sys_thread_name_read_t *JET_MUTABLE prof_sys_thread_name_read;
 typedef int (prof_dump_open_file_t)(const char *, int);
 extern prof_dump_open_file_t *JET_MUTABLE prof_dump_open_file;
 typedef ssize_t (prof_dump_write_file_t)(int, const void *, size_t);
diff --git a/include/jemalloc/internal/prof_sys.h b/include/jemalloc/internal/prof_sys.h
new file mode 100644
index 00000000..cfa00591
--- /dev/null
+++ b/include/jemalloc/internal/prof_sys.h
@@ -0,0 +1,10 @@
+#ifndef JEMALLOC_INTERNAL_PROF_SYS_H
+#define JEMALLOC_INTERNAL_PROF_SYS_H
+
+void prof_sys_thread_name_fetch(tsd_t *tsd);
+
+/* Used in unit tests. */
+typedef int (prof_sys_thread_name_read_t)(char *buf, size_t limit);
+extern prof_sys_thread_name_read_t *JET_MUTABLE prof_sys_thread_name_read;
+
+#endif /* JEMALLOC_INTERNAL_PROF_SYS_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index bbe814be..00ea2beb 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -74,6 +74,7 @@
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
     <ClCompile Include="..\..\..\..\src\prof_log.c" />
     <ClCompile Include="..\..\..\..\src\prof_recent.c" />
+    <ClCompile Include="..\..\..\..\src\prof_sys.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\safety_check.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 6f7027be..0bcb45a8 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -106,6 +106,9 @@
     <ClCompile Include="..\..\..\..\src\prof_recent.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_sys.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\rtree.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index ae60133b..446ea606 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -74,6 +74,7 @@
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
     <ClCompile Include="..\..\..\..\src\prof_log.c" />
     <ClCompile Include="..\..\..\..\src\prof_recent.c" />
+    <ClCompile Include="..\..\..\..\src\prof_sys.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\safety_check.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 6f7027be..0bcb45a8 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -106,6 +106,9 @@
     <ClCompile Include="..\..\..\..\src\prof_recent.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_sys.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\rtree.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/prof.c b/src/prof.c
index 29eb3e6b..ea63cfdd 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -8,6 +8,7 @@
 #include "jemalloc/internal/prof_data.h"
 #include "jemalloc/internal/prof_log.h"
 #include "jemalloc/internal/prof_recent.h"
+#include "jemalloc/internal/prof_sys.h"
 #include "jemalloc/internal/thread_event.h"
 
 /*
@@ -133,27 +134,6 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx) {
 	}
 }
 
-static int
-prof_sys_thread_name_read_impl(char *buf, size_t limit) {
-#ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
-	return pthread_getname_np(pthread_self(), buf, limit);
-#else
-	return ENOSYS;
-#endif
-}
-prof_sys_thread_name_read_t *JET_MUTABLE prof_sys_thread_name_read =
-    prof_sys_thread_name_read_impl;
-
-static void
-prof_sys_thread_name_fetch(tsd_t *tsd) {
-#define THREAD_NAME_MAX_LEN 16
-	char buf[THREAD_NAME_MAX_LEN];
-	if (!prof_sys_thread_name_read(buf, THREAD_NAME_MAX_LEN)) {
-		prof_thread_name_set_impl(tsd, buf);
-	}
-#undef THREAD_NAME_MAX_LEN
-}
-
 void
 prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
     size_t usize, prof_tctx_t *tctx) {
diff --git a/src/prof_sys.c b/src/prof_sys.c
new file mode 100644
index 00000000..521a71a0
--- /dev/null
+++ b/src/prof_sys.c
@@ -0,0 +1,27 @@
+#define JEMALLOC_PROF_SYS_C_
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/prof_data.h"
+#include "jemalloc/internal/prof_sys.h"
+
+static int
+prof_sys_thread_name_read_impl(char *buf, size_t limit) {
+#ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
+	return pthread_getname_np(pthread_self(), buf, limit);
+#else
+	return ENOSYS;
+#endif
+}
+prof_sys_thread_name_read_t *JET_MUTABLE prof_sys_thread_name_read =
+    prof_sys_thread_name_read_impl;
+
+void
+prof_sys_thread_name_fetch(tsd_t *tsd) {
+#define THREAD_NAME_MAX_LEN 16
+	char buf[THREAD_NAME_MAX_LEN];
+	if (!prof_sys_thread_name_read(buf, THREAD_NAME_MAX_LEN)) {
+		prof_thread_name_set_impl(tsd, buf);
+	}
+#undef THREAD_NAME_MAX_LEN
+}
diff --git a/test/unit/prof_sys_thread_name.c b/test/unit/prof_sys_thread_name.c
index ec1e7745..affc788a 100644
--- a/test/unit/prof_sys_thread_name.c
+++ b/test/unit/prof_sys_thread_name.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/prof_sys.h"
+
 static const char *test_thread_name = "test_name";
 
 static int

From 767a2e1790656f038123036772fed6656175c7e6 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 2 Apr 2020 16:20:01 -0700
Subject: [PATCH 1787/2608] Move file handling logic in prof to prof_sys

---
 include/jemalloc/internal/prof_externs.h |   2 -
 include/jemalloc/internal/prof_sys.h     |  10 ++
 src/ctl.c                                |   1 +
 src/prof.c                               | 142 +-------------------
 src/prof_log.c                           |   1 +
 src/prof_sys.c                           | 158 +++++++++++++++++++++++
 6 files changed, 175 insertions(+), 139 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 135fb29d..96e08c89 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -58,11 +58,9 @@ void prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
 void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info);
 prof_tctx_t *prof_tctx_create(tsd_t *tsd);
 int prof_getpid(void);
-void prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind);
 void prof_idump(tsdn_t *tsdn);
 bool prof_mdump(tsd_t *tsd, const char *filename);
 void prof_gdump(tsdn_t *tsdn);
-bool prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix);
 
 void prof_reset(tsd_t *tsd, size_t lg_sample);
 void prof_tdata_cleanup(tsd_t *tsd);
diff --git a/include/jemalloc/internal/prof_sys.h b/include/jemalloc/internal/prof_sys.h
index cfa00591..166df6fa 100644
--- a/include/jemalloc/internal/prof_sys.h
+++ b/include/jemalloc/internal/prof_sys.h
@@ -1,10 +1,20 @@
 #ifndef JEMALLOC_INTERNAL_PROF_SYS_H
 #define JEMALLOC_INTERNAL_PROF_SYS_H
 
+extern malloc_mutex_t prof_dump_filename_mtx;
+extern base_t *prof_base;
+
 void prof_sys_thread_name_fetch(tsd_t *tsd);
 
 /* Used in unit tests. */
 typedef int (prof_sys_thread_name_read_t)(char *buf, size_t limit);
 extern prof_sys_thread_name_read_t *JET_MUTABLE prof_sys_thread_name_read;
 
+void prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind);
+bool prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix);
+void prof_fdump_impl(tsd_t *tsd);
+void prof_idump_impl(tsd_t *tsd);
+bool prof_mdump_impl(tsd_t *tsd, const char *filename);
+void prof_gdump_impl(tsd_t *tsd);
+
 #endif /* JEMALLOC_INTERNAL_PROF_SYS_H */
diff --git a/src/ctl.c b/src/ctl.c
index 5cba9af9..fe0b9f99 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -9,6 +9,7 @@
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/peak_event.h"
+#include "jemalloc/internal/prof_sys.h"
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/util.h"
 
diff --git a/src/prof.c b/src/prof.c
index ea63cfdd..7732edea 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -86,38 +86,13 @@ malloc_mutex_t tdatas_mtx;
 static uint64_t next_thr_uid;
 static malloc_mutex_t next_thr_uid_mtx;
 
-static malloc_mutex_t prof_dump_filename_mtx;
-static uint64_t prof_dump_seq;
-static uint64_t prof_dump_iseq;
-static uint64_t prof_dump_mseq;
-static uint64_t prof_dump_useq;
-
-/* The fallback allocator profiling functionality will use. */
-base_t *prof_base;
-
 malloc_mutex_t prof_dump_mtx;
-static char *prof_dump_prefix = NULL;
 
 /* Do not dump any profiles until bootstrapping is complete. */
 bool prof_booted = false;
 
 /******************************************************************************/
 
-/*
- * If profiling is off, then PROF_DUMP_FILENAME_LEN is 1, so we'll end up
- * calling strncpy with a size of 0, which triggers a -Wstringop-truncation
- * warning (strncpy can never actually be called in this case, since we bail out
- * much earlier when config_prof is false).  This function works around the
- * warning to let us leave the warning on.
- */
-static inline void
-prof_strncpy(char *UNUSED dest, const char *UNUSED src, size_t UNUSED size) {
-	cassert(config_prof);
-#ifdef JEMALLOC_PROF
-	strncpy(dest, src, size);
-#endif
-}
-
 void
 prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx) {
 	cassert(config_prof);
@@ -507,57 +482,9 @@ prof_getpid(void) {
 #endif
 }
 
-static const char *
-prof_dump_prefix_get(tsdn_t* tsdn) {
-	malloc_mutex_assert_owner(tsdn, &prof_dump_filename_mtx);
-
-	return prof_dump_prefix == NULL ? opt_prof_prefix : prof_dump_prefix;
-}
-
-static bool
-prof_dump_prefix_is_empty(tsdn_t *tsdn) {
-	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
-	bool ret = (prof_dump_prefix_get(tsdn)[0] == '\0');
-	malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
-	return ret;
-}
-
-#define DUMP_FILENAME_BUFSIZE (PATH_MAX + 1)
-#define VSEQ_INVALID UINT64_C(0xffffffffffffffff)
-static void
-prof_dump_filename(tsd_t *tsd, char *filename, char v, uint64_t vseq) {
-	cassert(config_prof);
-
-	assert(tsd_reentrancy_level_get(tsd) == 0);
-	const char *prof_prefix = prof_dump_prefix_get(tsd_tsdn(tsd));
-
-	if (vseq != VSEQ_INVALID) {
-	        /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
-		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
-		    "%s.%d.%"FMTu64".%c%"FMTu64".heap",
-		    prof_prefix, prof_getpid(), prof_dump_seq, v, vseq);
-	} else {
-	        /* "<prefix>.<pid>.<seq>.<v>.heap" */
-		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
-		    "%s.%d.%"FMTu64".%c.heap",
-		    prof_prefix, prof_getpid(), prof_dump_seq, v);
-	}
-	prof_dump_seq++;
-}
-
-void
-prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind) {
-	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
-	malloc_snprintf(filename, PROF_DUMP_FILENAME_LEN,
-	    "%s.%d.%"FMTu64".json", prof_dump_prefix_get(tsdn), prof_getpid(),
-	    ind);
-	malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
-}
-
 static void
 prof_fdump(void) {
 	tsd_t *tsd;
-	char filename[DUMP_FILENAME_BUFSIZE];
 
 	cassert(config_prof);
 	assert(opt_prof_final);
@@ -567,12 +494,8 @@ prof_fdump(void) {
 	}
 	tsd = tsd_fetch();
 	assert(tsd_reentrancy_level_get(tsd) == 0);
-	assert(!prof_dump_prefix_is_empty(tsd_tsdn(tsd)));
 
-	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
-	prof_dump_filename(tsd, filename, 'f', VSEQ_INVALID);
-	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
-	prof_dump(tsd, false, filename, opt_prof_leak);
+	prof_fdump_impl(tsd);
 }
 
 static bool
@@ -582,31 +505,6 @@ prof_idump_accum_init(void) {
 	return counter_accum_init(&prof_idump_accumulated, prof_interval);
 }
 
-bool
-prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix) {
-	cassert(config_prof);
-	ctl_mtx_assert_held(tsdn);
-	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
-	if (prof_dump_prefix == NULL) {
-		malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
-		/* Everything is still guarded by ctl_mtx. */
-		char *buffer = base_alloc(tsdn, prof_base,
-		    PROF_DUMP_FILENAME_LEN, QUANTUM);
-		if (buffer == NULL) {
-			return true;
-		}
-		malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
-		prof_dump_prefix = buffer;
-	}
-	assert(prof_dump_prefix != NULL);
-
-	prof_strncpy(prof_dump_prefix, prefix, PROF_DUMP_FILENAME_LEN - 1);
-	prof_dump_prefix[PROF_DUMP_FILENAME_LEN - 1] = '\0';
-	malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
-
-	return false;
-}
-
 void
 prof_idump(tsdn_t *tsdn) {
 	tsd_t *tsd;
@@ -631,16 +529,7 @@ prof_idump(tsdn_t *tsdn) {
 		return;
 	}
 
-	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
-	if (prof_dump_prefix_get(tsd_tsdn(tsd))[0] == '\0') {
-		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
-		return;
-	}
-	char filename[PATH_MAX + 1];
-	prof_dump_filename(tsd, filename, 'i', prof_dump_iseq);
-	prof_dump_iseq++;
-	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
-	prof_dump(tsd, false, filename, false);
+	prof_idump_impl(tsd);
 }
 
 bool
@@ -651,20 +540,8 @@ prof_mdump(tsd_t *tsd, const char *filename) {
 	if (!opt_prof || !prof_booted) {
 		return true;
 	}
-	char filename_buf[DUMP_FILENAME_BUFSIZE];
-	if (filename == NULL) {
-		/* No filename specified, so automatically generate one. */
-		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
-		if (prof_dump_prefix_get(tsd_tsdn(tsd))[0] == '\0') {
-			malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
-			return true;
-		}
-		prof_dump_filename(tsd, filename_buf, 'm', prof_dump_mseq);
-		prof_dump_mseq++;
-		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
-		filename = filename_buf;
-	}
-	return prof_dump(tsd, true, filename, false);
+
+	return prof_mdump_impl(tsd, filename);
 }
 
 void
@@ -691,16 +568,7 @@ prof_gdump(tsdn_t *tsdn) {
 		return;
 	}
 
-	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
-	if (prof_dump_prefix_get(tsdn)[0] == '\0') {
-		malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
-		return;
-	}
-	char filename[DUMP_FILENAME_BUFSIZE];
-	prof_dump_filename(tsd, filename, 'u', prof_dump_useq);
-	prof_dump_useq++;
-	malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
-	prof_dump(tsd, false, filename, false);
+	prof_gdump_impl(tsd);
 }
 
 static uint64_t
diff --git a/src/prof_log.c b/src/prof_log.c
index bda01d04..b32d6f63 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -10,6 +10,7 @@
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/prof_data.h"
 #include "jemalloc/internal/prof_log.h"
+#include "jemalloc/internal/prof_sys.h"
 
 bool opt_prof_log = false;
 typedef enum prof_logging_state_e prof_logging_state_t;
diff --git a/src/prof_sys.c b/src/prof_sys.c
index 521a71a0..47bc43b7 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -2,9 +2,22 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/prof_data.h"
 #include "jemalloc/internal/prof_sys.h"
 
+malloc_mutex_t prof_dump_filename_mtx;
+
+static uint64_t prof_dump_seq;
+static uint64_t prof_dump_iseq;
+static uint64_t prof_dump_mseq;
+static uint64_t prof_dump_useq;
+
+static char *prof_dump_prefix = NULL;
+
+/* The fallback allocator profiling functionality will use. */
+base_t *prof_base;
+
 static int
 prof_sys_thread_name_read_impl(char *buf, size_t limit) {
 #ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
@@ -25,3 +38,148 @@ prof_sys_thread_name_fetch(tsd_t *tsd) {
 	}
 #undef THREAD_NAME_MAX_LEN
 }
+
+/*
+ * If profiling is off, then PROF_DUMP_FILENAME_LEN is 1, so we'll end up
+ * calling strncpy with a size of 0, which triggers a -Wstringop-truncation
+ * warning (strncpy can never actually be called in this case, since we bail out
+ * much earlier when config_prof is false).  This function works around the
+ * warning to let us leave the warning on.
+ */
+static inline void
+prof_strncpy(char *UNUSED dest, const char *UNUSED src, size_t UNUSED size) {
+	cassert(config_prof);
+#ifdef JEMALLOC_PROF
+	strncpy(dest, src, size);
+#endif
+}
+
+static const char *
+prof_dump_prefix_get(tsdn_t* tsdn) {
+	malloc_mutex_assert_owner(tsdn, &prof_dump_filename_mtx);
+
+	return prof_dump_prefix == NULL ? opt_prof_prefix : prof_dump_prefix;
+}
+
+static bool
+prof_dump_prefix_is_empty(tsdn_t *tsdn) {
+	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
+	bool ret = (prof_dump_prefix_get(tsdn)[0] == '\0');
+	malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
+	return ret;
+}
+
+#define DUMP_FILENAME_BUFSIZE (PATH_MAX + 1)
+#define VSEQ_INVALID UINT64_C(0xffffffffffffffff)
+static void
+prof_dump_filename(tsd_t *tsd, char *filename, char v, uint64_t vseq) {
+	cassert(config_prof);
+
+	assert(tsd_reentrancy_level_get(tsd) == 0);
+	const char *prof_prefix = prof_dump_prefix_get(tsd_tsdn(tsd));
+
+	if (vseq != VSEQ_INVALID) {
+	        /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
+		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
+		    "%s.%d.%"FMTu64".%c%"FMTu64".heap",
+		    prof_prefix, prof_getpid(), prof_dump_seq, v, vseq);
+	} else {
+	        /* "<prefix>.<pid>.<seq>.<v>.heap" */
+		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
+		    "%s.%d.%"FMTu64".%c.heap",
+		    prof_prefix, prof_getpid(), prof_dump_seq, v);
+	}
+	prof_dump_seq++;
+}
+
+void
+prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind) {
+	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
+	malloc_snprintf(filename, PROF_DUMP_FILENAME_LEN,
+	    "%s.%d.%"FMTu64".json", prof_dump_prefix_get(tsdn), prof_getpid(),
+	    ind);
+	malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
+}
+
+void
+prof_fdump_impl(tsd_t *tsd) {
+	char filename[DUMP_FILENAME_BUFSIZE];
+
+	assert(!prof_dump_prefix_is_empty(tsd_tsdn(tsd)));
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
+	prof_dump_filename(tsd, filename, 'f', VSEQ_INVALID);
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
+	prof_dump(tsd, false, filename, opt_prof_leak);
+}
+
+bool
+prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix) {
+	cassert(config_prof);
+	ctl_mtx_assert_held(tsdn);
+	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
+	if (prof_dump_prefix == NULL) {
+		malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
+		/* Everything is still guarded by ctl_mtx. */
+		char *buffer = base_alloc(tsdn, prof_base,
+		    PROF_DUMP_FILENAME_LEN, QUANTUM);
+		if (buffer == NULL) {
+			return true;
+		}
+		malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
+		prof_dump_prefix = buffer;
+	}
+	assert(prof_dump_prefix != NULL);
+
+	prof_strncpy(prof_dump_prefix, prefix, PROF_DUMP_FILENAME_LEN - 1);
+	prof_dump_prefix[PROF_DUMP_FILENAME_LEN - 1] = '\0';
+	malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
+
+	return false;
+}
+
+void
+prof_idump_impl(tsd_t *tsd) {
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
+	if (prof_dump_prefix_get(tsd_tsdn(tsd))[0] == '\0') {
+		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
+		return;
+	}
+	char filename[PATH_MAX + 1];
+	prof_dump_filename(tsd, filename, 'i', prof_dump_iseq);
+	prof_dump_iseq++;
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
+	prof_dump(tsd, false, filename, false);
+}
+
+bool
+prof_mdump_impl(tsd_t *tsd, const char *filename) {
+	char filename_buf[DUMP_FILENAME_BUFSIZE];
+	if (filename == NULL) {
+		/* No filename specified, so automatically generate one. */
+		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
+		if (prof_dump_prefix_get(tsd_tsdn(tsd))[0] == '\0') {
+			malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
+			return true;
+		}
+		prof_dump_filename(tsd, filename_buf, 'm', prof_dump_mseq);
+		prof_dump_mseq++;
+		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
+		filename = filename_buf;
+	}
+	return prof_dump(tsd, true, filename, false);
+}
+
+void
+prof_gdump_impl(tsd_t *tsd) {
+	tsdn_t *tsdn = tsd_tsdn(tsd);
+	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
+	if (prof_dump_prefix_get(tsdn)[0] == '\0') {
+		malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
+		return;
+	}
+	char filename[DUMP_FILENAME_BUFSIZE];
+	prof_dump_filename(tsd, filename, 'u', prof_dump_useq);
+	prof_dump_useq++;
+	malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
+	prof_dump(tsd, false, filename, false);
+}

From 4736fb4fc9c105320c71dad5425a535cebf390b3 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 2 Apr 2020 16:39:41 -0700
Subject: [PATCH 1788/2608] Move file handling logic in prof_data to prof_sys

---
 include/jemalloc/internal/prof_data.h    |   4 +-
 include/jemalloc/internal/prof_externs.h |   8 -
 include/jemalloc/internal/prof_sys.h     |  15 +-
 src/prof_data.c                          | 240 ++---------------------
 src/prof_sys.c                           | 207 +++++++++++++++++++
 test/unit/prof_accum.c                   |   1 +
 test/unit/prof_gdump.c                   |   2 +
 test/unit/prof_idump.c                   |   2 +
 test/unit/prof_mdump.c                   |   2 +
 test/unit/prof_reset.c                   |   1 +
 10 files changed, 239 insertions(+), 243 deletions(-)

diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
index 26b8b28e..5c3f129f 100644
--- a/include/jemalloc/internal/prof_data.h
+++ b/include/jemalloc/internal/prof_data.h
@@ -12,8 +12,8 @@ bool prof_bt_keycomp(const void *k1, const void *k2);
 bool prof_data_init(tsd_t *tsd);
 char *prof_thread_name_alloc(tsd_t *tsd, const char *thread_name);
 int prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name);
-bool prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
-    bool leakcheck);
+void prof_dump_impl(tsd_t *tsd, prof_tdata_t *tdata,
+    void (*write_cb)(const char *), bool leakcheck);
 prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
     uint64_t thr_discrim, char *thread_name, bool active);
 void prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata);
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 96e08c89..c7c3ccbe 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -87,14 +87,6 @@ uint64_t prof_sample_new_event_wait(tsd_t *tsd);
 uint64_t prof_sample_postponed_event_wait(tsd_t *tsd);
 void prof_sample_event_handler(tsd_t *tsd, uint64_t elapsed);
 
-/* Used by unit tests. */
-typedef int (prof_dump_open_file_t)(const char *, int);
-extern prof_dump_open_file_t *JET_MUTABLE prof_dump_open_file;
-typedef ssize_t (prof_dump_write_file_t)(int, const void *, size_t);
-extern prof_dump_write_file_t *JET_MUTABLE prof_dump_write_file;
-typedef int (prof_dump_open_maps_t)();
-extern prof_dump_open_maps_t *JET_MUTABLE prof_dump_open_maps;
-
 bool prof_log_start(tsdn_t *tsdn, const char *filename);
 bool prof_log_stop(tsdn_t *tsdn);
 
diff --git a/include/jemalloc/internal/prof_sys.h b/include/jemalloc/internal/prof_sys.h
index 166df6fa..0d97cb99 100644
--- a/include/jemalloc/internal/prof_sys.h
+++ b/include/jemalloc/internal/prof_sys.h
@@ -5,11 +5,6 @@ extern malloc_mutex_t prof_dump_filename_mtx;
 extern base_t *prof_base;
 
 void prof_sys_thread_name_fetch(tsd_t *tsd);
-
-/* Used in unit tests. */
-typedef int (prof_sys_thread_name_read_t)(char *buf, size_t limit);
-extern prof_sys_thread_name_read_t *JET_MUTABLE prof_sys_thread_name_read;
-
 void prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind);
 bool prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix);
 void prof_fdump_impl(tsd_t *tsd);
@@ -17,4 +12,14 @@ void prof_idump_impl(tsd_t *tsd);
 bool prof_mdump_impl(tsd_t *tsd, const char *filename);
 void prof_gdump_impl(tsd_t *tsd);
 
+/* Used in unit tests. */
+typedef int (prof_sys_thread_name_read_t)(char *buf, size_t limit);
+extern prof_sys_thread_name_read_t *JET_MUTABLE prof_sys_thread_name_read;
+typedef int (prof_dump_open_file_t)(const char *, int);
+extern prof_dump_open_file_t *JET_MUTABLE prof_dump_open_file;
+typedef ssize_t (prof_dump_write_file_t)(int, const void *, size_t);
+extern prof_dump_write_file_t *JET_MUTABLE prof_dump_write_file;
+typedef int (prof_dump_open_maps_t)();
+extern prof_dump_open_maps_t *JET_MUTABLE prof_dump_open_maps;
+
 #endif /* JEMALLOC_INTERNAL_PROF_SYS_H */
diff --git a/src/prof_data.c b/src/prof_data.c
index 9563293f..80772292 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -55,27 +55,8 @@ static ckh_t bt2gctx;
  */
 static prof_tdata_tree_t tdatas;
 
-/* The following are needed for dumping and are protected by prof_dump_mtx. */
-/*
- * Whether there has been an error in the dumping process, which could have
- * happened either in file opening or in file writing.  When an error has
- * already occurred, we will stop further writing to the file.
- */
-static bool prof_dump_error;
-/*
- * Whether error should be handled locally: if true, then we print out error
- * message as well as abort (if opt_abort is true) when an error occurred, and
- * we also report the error back to the caller in the end; if false, then we
- * only report the error back to the caller in the end.
- */
-static bool prof_dump_handle_error_locally;
-/*
- * This buffer is rather large for stack allocation, so use a single buffer for
- * all profile dumps.
- */
-static char prof_dump_buf[PROF_DUMP_BUFSIZE];
-static size_t prof_dump_buf_end;
-static int prof_dump_fd;
+/* Dump write callback; stored global to simplify function interfaces.  */
+static void (*prof_dump_write)(const char *);
 
 /******************************************************************************/
 /* Red-black trees. */
@@ -536,94 +517,6 @@ prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name) {
 	return 0;
 }
 
-static void
-prof_dump_check_possible_error(bool err_cond, const char *format, ...) {
-	assert(!prof_dump_error);
-	if (!err_cond) {
-		return;
-	}
-
-	prof_dump_error = true;
-	if (!prof_dump_handle_error_locally) {
-		return;
-	}
-
-	va_list ap;
-	char buf[PROF_PRINTF_BUFSIZE];
-	va_start(ap, format);
-	malloc_vsnprintf(buf, sizeof(buf), format, ap);
-	va_end(ap);
-	malloc_write(buf);
-
-	if (opt_abort) {
-		abort();
-	}
-}
-
-static int
-prof_dump_open_file_impl(const char *filename, int mode) {
-	return creat(filename, mode);
-}
-prof_dump_open_file_t *JET_MUTABLE prof_dump_open_file =
-    prof_dump_open_file_impl;
-
-static void
-prof_dump_open(const char *filename) {
-	prof_dump_fd = prof_dump_open_file(filename, 0644);
-	prof_dump_check_possible_error(prof_dump_fd == -1,
-	    "<jemalloc>: failed to open \"%s\"\n", filename);
-}
-
-prof_dump_write_file_t *JET_MUTABLE prof_dump_write_file = malloc_write_fd;
-
-static void
-prof_dump_flush() {
-	cassert(config_prof);
-	if (!prof_dump_error) {
-		ssize_t err = prof_dump_write_file(prof_dump_fd, prof_dump_buf,
-		    prof_dump_buf_end);
-		prof_dump_check_possible_error(err == -1,
-		    "<jemalloc>: failed to write during heap profile flush\n");
-	}
-	prof_dump_buf_end = 0;
-}
-
-static void
-prof_dump_close() {
-	if (prof_dump_fd != -1) {
-		prof_dump_flush();
-		close(prof_dump_fd);
-	}
-}
-
-static void
-prof_dump_write(const char *s) {
-	size_t i, slen, n;
-
-	cassert(config_prof);
-
-	i = 0;
-	slen = strlen(s);
-	while (i < slen) {
-		/* Flush the buffer if it is full. */
-		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
-			prof_dump_flush();
-		}
-
-		if (prof_dump_buf_end + slen - i <= PROF_DUMP_BUFSIZE) {
-			/* Finish writing. */
-			n = slen - i;
-		} else {
-			/* Write as much of s as will fit. */
-			n = PROF_DUMP_BUFSIZE - prof_dump_buf_end;
-		}
-		memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n);
-		prof_dump_buf_end += n;
-		i += n;
-	}
-	assert(i == slen);
-}
-
 JEMALLOC_FORMAT_PRINTF(1, 2)
 static void
 prof_dump_printf(const char *format, ...) {
@@ -938,82 +831,12 @@ prof_dump_gctx(tsdn_t *tsdn, prof_gctx_t *gctx, const prof_bt_t *bt,
 	    (void *)tsdn);
 }
 
-#ifndef _WIN32
-JEMALLOC_FORMAT_PRINTF(1, 2)
-static int
-prof_open_maps_internal(const char *format, ...) {
-	int mfd;
-	va_list ap;
-	char filename[PATH_MAX + 1];
-
-	va_start(ap, format);
-	malloc_vsnprintf(filename, sizeof(filename), format, ap);
-	va_end(ap);
-
-#if defined(O_CLOEXEC)
-	mfd = open(filename, O_RDONLY | O_CLOEXEC);
-#else
-	mfd = open(filename, O_RDONLY);
-	if (mfd != -1) {
-		fcntl(mfd, F_SETFD, fcntl(mfd, F_GETFD) | FD_CLOEXEC);
-	}
-#endif
-
-	return mfd;
-}
-#endif
-
-static int
-prof_dump_open_maps_impl() {
-	int mfd;
-
-	cassert(config_prof);
-#ifdef __FreeBSD__
-	mfd = prof_open_maps_internal("/proc/curproc/map");
-#elif defined(_WIN32)
-	mfd = -1; // Not implemented
-#else
-	int pid = prof_getpid();
-
-	mfd = prof_open_maps_internal("/proc/%d/task/%d/maps", pid, pid);
-	if (mfd == -1) {
-		mfd = prof_open_maps_internal("/proc/%d/maps", pid);
-	}
-#endif
-	return mfd;
-}
-prof_dump_open_maps_t *JET_MUTABLE prof_dump_open_maps =
-    prof_dump_open_maps_impl;
-
-static void
-prof_dump_maps() {
-	int mfd = prof_dump_open_maps();
-	if (mfd == -1) {
-		return;
-	}
-
-	prof_dump_write("\nMAPPED_LIBRARIES:\n");
-	ssize_t nread = 0;
-	do {
-		prof_dump_buf_end += nread;
-		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
-			/* Make space in prof_dump_buf before read(). */
-			prof_dump_flush();
-		}
-		nread = malloc_read_fd(mfd, &prof_dump_buf[prof_dump_buf_end],
-		    PROF_DUMP_BUFSIZE - prof_dump_buf_end);
-	} while (nread > 0);
-
-	close(mfd);
-}
-
 /*
  * See prof_sample_threshold_update() comment for why the body of this function
  * is conditionally compiled.
  */
 static void
-prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx,
-    const char *filename) {
+prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx) {
 #ifdef JEMALLOC_PROF
 	/*
 	 * Scaling is equivalent AdjustSamples() in jeprof, but the result may
@@ -1036,8 +859,7 @@ prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx,
 		    curbytes, (curbytes != 1) ? "s" : "", curobjs, (curobjs !=
 		    1) ? "s" : "", leak_ngctx, (leak_ngctx != 1) ? "s" : "");
 		malloc_printf(
-		    "<jemalloc>: Run jeprof on \"%s\" for leak detail\n",
-		    filename);
+		    "<jemalloc>: Run jeprof on dump output for leak detail\n");
 	}
 #endif
 }
@@ -1093,62 +915,24 @@ prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata,
 	prof_leave(tsd, tdata);
 }
 
-static bool
-prof_dump_file(tsd_t *tsd, bool propagate_err, const char *filename,
-    bool leakcheck, prof_tdata_t *tdata, const prof_cnt_t *cnt_all,
-    prof_gctx_tree_t *gctxs) {
-	prof_dump_error = false;
-	prof_dump_handle_error_locally = !propagate_err;
-
-	/* Create dump file. */
-	prof_dump_open(filename);
-	/* Dump profile header. */
-	prof_dump_header(tsd_tsdn(tsd), cnt_all);
-	/* Dump per gctx profile stats. */
-	gctx_tree_iter(gctxs, NULL, prof_gctx_dump_iter, (void *)tsd_tsdn(tsd));
-	/* Dump /proc/<pid>/maps if possible. */
-	prof_dump_maps();
-	/* Close dump file. */
-	prof_dump_close();
-
-	return prof_dump_error;
-}
-
-bool
-prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
+void
+prof_dump_impl(tsd_t *tsd, prof_tdata_t *tdata, void (*write_cb)(const char *),
     bool leakcheck) {
-	cassert(config_prof);
-	assert(tsd_reentrancy_level_get(tsd) == 0);
-
-	prof_tdata_t * tdata = prof_tdata_get(tsd, true);
-	if (tdata == NULL) {
-		return true;
-	}
-
-	pre_reentrancy(tsd, NULL);
-	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
-
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_dump_mtx);
+	prof_dump_write = write_cb;
 	prof_gctx_tree_t gctxs;
 	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
 	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
 	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
 	    &prof_gctx_merge_iter_arg, &gctxs);
-	bool err = prof_dump_file(tsd, propagate_err, filename, leakcheck,
-	    tdata, &prof_tdata_merge_iter_arg.cnt_all, &gctxs);
+	prof_dump_header(tsd_tsdn(tsd), &prof_tdata_merge_iter_arg.cnt_all);
+	gctx_tree_iter(&gctxs, NULL, prof_gctx_dump_iter,
+	    (void *)tsd_tsdn(tsd));
 	prof_gctx_finish(tsd, &gctxs);
-
-	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
-	post_reentrancy(tsd);
-
-	if (err) {
-		return true;
-	}
-
 	if (leakcheck) {
 		prof_leakcheck(&prof_tdata_merge_iter_arg.cnt_all,
-		    prof_gctx_merge_iter_arg.leak_ngctx, filename);
+		    prof_gctx_merge_iter_arg.leak_ngctx);
 	}
-	return false;
 }
 
 /* Used in unit tests. */
diff --git a/src/prof_sys.c b/src/prof_sys.c
index 47bc43b7..364c315a 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -18,6 +18,28 @@ static char *prof_dump_prefix = NULL;
 /* The fallback allocator profiling functionality will use. */
 base_t *prof_base;
 
+/* The following are needed for dumping and are protected by prof_dump_mtx. */
+/*
+ * Whether there has been an error in the dumping process, which could have
+ * happened either in file opening or in file writing.  When an error has
+ * already occurred, we will stop further writing to the file.
+ */
+static bool prof_dump_error;
+/*
+ * Whether error should be handled locally: if true, then we print out error
+ * message as well as abort (if opt_abort is true) when an error occurred, and
+ * we also report the error back to the caller in the end; if false, then we
+ * only report the error back to the caller in the end.
+ */
+static bool prof_dump_handle_error_locally;
+/*
+ * This buffer is rather large for stack allocation, so use a single buffer for
+ * all profile dumps.
+ */
+static char prof_dump_buf[PROF_DUMP_BUFSIZE];
+static size_t prof_dump_buf_end;
+static int prof_dump_fd;
+
 static int
 prof_sys_thread_name_read_impl(char *buf, size_t limit) {
 #ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
@@ -39,6 +61,191 @@ prof_sys_thread_name_fetch(tsd_t *tsd) {
 #undef THREAD_NAME_MAX_LEN
 }
 
+static void
+prof_dump_check_possible_error(bool err_cond, const char *format, ...) {
+	assert(!prof_dump_error);
+	if (!err_cond) {
+		return;
+	}
+
+	prof_dump_error = true;
+	if (!prof_dump_handle_error_locally) {
+		return;
+	}
+
+	va_list ap;
+	char buf[PROF_PRINTF_BUFSIZE];
+	va_start(ap, format);
+	malloc_vsnprintf(buf, sizeof(buf), format, ap);
+	va_end(ap);
+	malloc_write(buf);
+
+	if (opt_abort) {
+		abort();
+	}
+}
+
+static int
+prof_dump_open_file_impl(const char *filename, int mode) {
+	return creat(filename, mode);
+}
+prof_dump_open_file_t *JET_MUTABLE prof_dump_open_file =
+    prof_dump_open_file_impl;
+
+static void
+prof_dump_open(const char *filename) {
+	prof_dump_fd = prof_dump_open_file(filename, 0644);
+	prof_dump_check_possible_error(prof_dump_fd == -1,
+	    "<jemalloc>: failed to open \"%s\"\n", filename);
+}
+
+prof_dump_write_file_t *JET_MUTABLE prof_dump_write_file = malloc_write_fd;
+
+static void
+prof_dump_flush() {
+	cassert(config_prof);
+	if (!prof_dump_error) {
+		ssize_t err = prof_dump_write_file(prof_dump_fd, prof_dump_buf,
+		    prof_dump_buf_end);
+		prof_dump_check_possible_error(err == -1,
+		    "<jemalloc>: failed to write during heap profile flush\n");
+	}
+	prof_dump_buf_end = 0;
+}
+
+static void
+prof_dump_write(const char *s) {
+	size_t i, slen, n;
+
+	cassert(config_prof);
+
+	i = 0;
+	slen = strlen(s);
+	while (i < slen) {
+		/* Flush the buffer if it is full. */
+		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
+			prof_dump_flush();
+		}
+
+		if (prof_dump_buf_end + slen - i <= PROF_DUMP_BUFSIZE) {
+			/* Finish writing. */
+			n = slen - i;
+		} else {
+			/* Write as much of s as will fit. */
+			n = PROF_DUMP_BUFSIZE - prof_dump_buf_end;
+		}
+		memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n);
+		prof_dump_buf_end += n;
+		i += n;
+	}
+	assert(i == slen);
+}
+
+static void
+prof_dump_close() {
+	if (prof_dump_fd != -1) {
+		prof_dump_flush();
+		close(prof_dump_fd);
+	}
+}
+
+#ifndef _WIN32
+JEMALLOC_FORMAT_PRINTF(1, 2)
+static int
+prof_open_maps_internal(const char *format, ...) {
+	int mfd;
+	va_list ap;
+	char filename[PATH_MAX + 1];
+
+	va_start(ap, format);
+	malloc_vsnprintf(filename, sizeof(filename), format, ap);
+	va_end(ap);
+
+#if defined(O_CLOEXEC)
+	mfd = open(filename, O_RDONLY | O_CLOEXEC);
+#else
+	mfd = open(filename, O_RDONLY);
+	if (mfd != -1) {
+		fcntl(mfd, F_SETFD, fcntl(mfd, F_GETFD) | FD_CLOEXEC);
+	}
+#endif
+
+	return mfd;
+}
+#endif
+
+static int
+prof_dump_open_maps_impl() {
+	int mfd;
+
+	cassert(config_prof);
+#ifdef __FreeBSD__
+	mfd = prof_open_maps_internal("/proc/curproc/map");
+#elif defined(_WIN32)
+	mfd = -1; // Not implemented
+#else
+	int pid = prof_getpid();
+
+	mfd = prof_open_maps_internal("/proc/%d/task/%d/maps", pid, pid);
+	if (mfd == -1) {
+		mfd = prof_open_maps_internal("/proc/%d/maps", pid);
+	}
+#endif
+	return mfd;
+}
+prof_dump_open_maps_t *JET_MUTABLE prof_dump_open_maps =
+    prof_dump_open_maps_impl;
+
+static void
+prof_dump_maps() {
+	int mfd = prof_dump_open_maps();
+	if (mfd == -1) {
+		return;
+	}
+
+	prof_dump_write("\nMAPPED_LIBRARIES:\n");
+	ssize_t nread = 0;
+	do {
+		prof_dump_buf_end += nread;
+		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
+			/* Make space in prof_dump_buf before read(). */
+			prof_dump_flush();
+		}
+		nread = malloc_read_fd(mfd, &prof_dump_buf[prof_dump_buf_end],
+		    PROF_DUMP_BUFSIZE - prof_dump_buf_end);
+	} while (nread > 0);
+
+	close(mfd);
+}
+
+static bool
+prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
+    bool leakcheck) {
+	cassert(config_prof);
+	assert(tsd_reentrancy_level_get(tsd) == 0);
+
+	prof_tdata_t * tdata = prof_tdata_get(tsd, true);
+	if (tdata == NULL) {
+		return true;
+	}
+
+	prof_dump_error = false;
+	prof_dump_handle_error_locally = !propagate_err;
+
+	pre_reentrancy(tsd, NULL);
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
+
+	prof_dump_open(filename);
+	prof_dump_impl(tsd, tdata, prof_dump_write, leakcheck);
+	prof_dump_maps();
+	prof_dump_close();
+
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
+	post_reentrancy(tsd);
+
+	return prof_dump_error;
+}
+
 /*
  * If profiling is off, then PROF_DUMP_FILENAME_LEN is 1, so we'll end up
  * calling strncpy with a size of 0, which triggers a -Wstringop-truncation
diff --git a/test/unit/prof_accum.c b/test/unit/prof_accum.c
index 5b8085e1..ef392acd 100644
--- a/test/unit/prof_accum.c
+++ b/test/unit/prof_accum.c
@@ -1,6 +1,7 @@
 #include "test/jemalloc_test.h"
 
 #include "jemalloc/internal/prof_data.h"
+#include "jemalloc/internal/prof_sys.h"
 
 #define NTHREADS		4
 #define NALLOCS_PER_THREAD	50
diff --git a/test/unit/prof_gdump.c b/test/unit/prof_gdump.c
index 6209255e..9a47a19a 100644
--- a/test/unit/prof_gdump.c
+++ b/test/unit/prof_gdump.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/prof_sys.h"
+
 static bool did_prof_dump_open;
 
 static int
diff --git a/test/unit/prof_idump.c b/test/unit/prof_idump.c
index b0c1bc28..607944c1 100644
--- a/test/unit/prof_idump.c
+++ b/test/unit/prof_idump.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/prof_sys.h"
+
 #define TEST_PREFIX "test_prefix"
 
 static bool did_prof_dump_open;
diff --git a/test/unit/prof_mdump.c b/test/unit/prof_mdump.c
index 3779c24e..75b3a515 100644
--- a/test/unit/prof_mdump.c
+++ b/test/unit/prof_mdump.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/prof_sys.h"
+
 static const char *test_filename = "test_filename";
 static bool did_prof_dump_open;
 
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
index 22bf7963..2bdc37c0 100644
--- a/test/unit/prof_reset.c
+++ b/test/unit/prof_reset.c
@@ -1,6 +1,7 @@
 #include "test/jemalloc_test.h"
 
 #include "jemalloc/internal/prof_data.h"
+#include "jemalloc/internal/prof_sys.h"
 
 static int
 prof_dump_open_file_intercept(const char *filename, int mode) {

From d128efcb6aeddec8d3f1220eda0251dcaa25bab8 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 3 Apr 2020 10:26:03 -0700
Subject: [PATCH 1789/2608] Relocate a few prof utilities to the right modules

---
 include/jemalloc/internal/prof_data.h    |  5 +++++
 include/jemalloc/internal/prof_externs.h | 14 --------------
 include/jemalloc/internal/prof_log.h     |  3 +++
 include/jemalloc/internal/prof_recent.h  |  5 +++++
 include/jemalloc/internal/prof_sys.h     |  1 +
 src/ctl.c                                |  3 +++
 src/prof.c                               | 16 ----------------
 src/prof_data.c                          |  4 ++++
 src/prof_sys.c                           |  9 +++++++++
 9 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
index 5c3f129f..de9f7bae 100644
--- a/include/jemalloc/internal/prof_data.h
+++ b/include/jemalloc/internal/prof_data.h
@@ -3,6 +3,10 @@
 
 #include "jemalloc/internal/mutex.h"
 
+extern malloc_mutex_t bt2gctx_mtx;
+extern malloc_mutex_t tdatas_mtx;
+extern malloc_mutex_t prof_dump_mtx;
+
 extern malloc_mutex_t *gctx_locks;
 extern malloc_mutex_t *tdata_locks;
 
@@ -19,6 +23,7 @@ prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
 void prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata);
 void bt_init(prof_bt_t *bt, void **vec);
 void prof_backtrace(tsd_t *tsd, prof_bt_t *bt);
+void prof_reset(tsd_t *tsd, size_t lg_sample);
 void prof_tctx_try_destroy(tsd_t *tsd, prof_tctx_t *tctx);
 
 /* Used in unit tests. */
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index c7c3ccbe..a4a4aa61 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -3,10 +3,6 @@
 
 #include "jemalloc/internal/mutex.h"
 
-extern malloc_mutex_t bt2gctx_mtx;
-extern malloc_mutex_t tdatas_mtx;
-extern malloc_mutex_t prof_dump_mtx;
-
 extern bool opt_prof;
 extern bool opt_prof_active;
 extern bool opt_prof_thread_active_init;
@@ -26,7 +22,6 @@ extern char opt_prof_prefix[
 
 /* For recording recent allocations */
 extern ssize_t opt_prof_recent_alloc_max;
-extern malloc_mutex_t prof_recent_alloc_mtx;
 
 /* Whether to use thread name provided by the system or by mallctl. */
 extern bool opt_prof_sys_thread_name;
@@ -57,12 +52,10 @@ void prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
     size_t usize, prof_tctx_t *tctx);
 void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info);
 prof_tctx_t *prof_tctx_create(tsd_t *tsd);
-int prof_getpid(void);
 void prof_idump(tsdn_t *tsdn);
 bool prof_mdump(tsd_t *tsd, const char *filename);
 void prof_gdump(tsdn_t *tsdn);
 
-void prof_reset(tsd_t *tsd, size_t lg_sample);
 void prof_tdata_cleanup(tsd_t *tsd);
 bool prof_active_get(tsdn_t *tsdn);
 bool prof_active_set(tsdn_t *tsdn, bool active);
@@ -87,11 +80,4 @@ uint64_t prof_sample_new_event_wait(tsd_t *tsd);
 uint64_t prof_sample_postponed_event_wait(tsd_t *tsd);
 void prof_sample_event_handler(tsd_t *tsd, uint64_t elapsed);
 
-bool prof_log_start(tsdn_t *tsdn, const char *filename);
-bool prof_log_stop(tsdn_t *tsdn);
-
-ssize_t prof_recent_alloc_max_ctl_read();
-ssize_t prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max);
-void prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque);
-
 #endif /* JEMALLOC_INTERNAL_PROF_EXTERNS_H */
diff --git a/include/jemalloc/internal/prof_log.h b/include/jemalloc/internal/prof_log.h
index e833ced7..ccb557dd 100644
--- a/include/jemalloc/internal/prof_log.h
+++ b/include/jemalloc/internal/prof_log.h
@@ -16,4 +16,7 @@ bool prof_log_is_logging(void);
 bool prof_log_rep_check(void);
 void prof_log_dummy_set(bool new_value);
 
+bool prof_log_start(tsdn_t *tsdn, const char *filename);
+bool prof_log_stop(tsdn_t *tsdn);
+
 #endif /* JEMALLOC_INTERNAL_PROF_LOG_H */
diff --git a/include/jemalloc/internal/prof_recent.h b/include/jemalloc/internal/prof_recent.h
index 4f376c7b..d793c6da 100644
--- a/include/jemalloc/internal/prof_recent.h
+++ b/include/jemalloc/internal/prof_recent.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_PROF_RECENT_H
 #define JEMALLOC_INTERNAL_PROF_RECENT_H
 
+extern malloc_mutex_t prof_recent_alloc_mtx;
 extern malloc_mutex_t prof_recent_dump_mtx;
 
 bool prof_recent_alloc_prepare(tsd_t *tsd, prof_tctx_t *tctx);
@@ -15,4 +16,8 @@ extern prof_recent_list_t prof_recent_alloc_list;
 edata_t *prof_recent_alloc_edata_get_no_lock_test(const prof_recent_t *node);
 prof_recent_t *edata_prof_recent_alloc_get_no_lock_test(const edata_t *edata);
 
+ssize_t prof_recent_alloc_max_ctl_read();
+ssize_t prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max);
+void prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque);
+
 #endif /* JEMALLOC_INTERNAL_PROF_RECENT_H */
diff --git a/include/jemalloc/internal/prof_sys.h b/include/jemalloc/internal/prof_sys.h
index 0d97cb99..3896f292 100644
--- a/include/jemalloc/internal/prof_sys.h
+++ b/include/jemalloc/internal/prof_sys.h
@@ -5,6 +5,7 @@ extern malloc_mutex_t prof_dump_filename_mtx;
 extern base_t *prof_base;
 
 void prof_sys_thread_name_fetch(tsd_t *tsd);
+int prof_getpid(void);
 void prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind);
 bool prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix);
 void prof_fdump_impl(tsd_t *tsd);
diff --git a/src/ctl.c b/src/ctl.c
index fe0b9f99..8b9f42ec 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -9,6 +9,9 @@
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/peak_event.h"
+#include "jemalloc/internal/prof_data.h"
+#include "jemalloc/internal/prof_log.h"
+#include "jemalloc/internal/prof_recent.h"
 #include "jemalloc/internal/prof_sys.h"
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/util.h"
diff --git a/src/prof.c b/src/prof.c
index 7732edea..50c08fa3 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -78,16 +78,9 @@ uint64_t prof_interval = 0;
 
 size_t lg_prof_sample;
 
-/* Non static to enable profiling. */
-malloc_mutex_t bt2gctx_mtx;
-
-malloc_mutex_t tdatas_mtx;
-
 static uint64_t next_thr_uid;
 static malloc_mutex_t next_thr_uid_mtx;
 
-malloc_mutex_t prof_dump_mtx;
-
 /* Do not dump any profiles until bootstrapping is complete. */
 bool prof_booted = false;
 
@@ -473,15 +466,6 @@ prof_sample_event_handler(tsd_t *tsd, uint64_t elapsed) {
 	}
 }
 
-int
-prof_getpid(void) {
-#ifdef _WIN32
-	return GetCurrentProcessId();
-#else
-	return getpid();
-#endif
-}
-
 static void
 prof_fdump(void) {
 	tsd_t *tsd;
diff --git a/src/prof_data.c b/src/prof_data.c
index 80772292..6e84e3cf 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -25,6 +25,10 @@
 
 /******************************************************************************/
 
+malloc_mutex_t bt2gctx_mtx;
+malloc_mutex_t tdatas_mtx;
+malloc_mutex_t prof_dump_mtx;
+
 /*
  * Table of mutexes that are shared among gctx's.  These are leaf locks, so
  * there is no problem with using them for more than one gctx at the same time.
diff --git a/src/prof_sys.c b/src/prof_sys.c
index 364c315a..cdec9262 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -61,6 +61,15 @@ prof_sys_thread_name_fetch(tsd_t *tsd) {
 #undef THREAD_NAME_MAX_LEN
 }
 
+int
+prof_getpid(void) {
+#ifdef _WIN32
+	return GetCurrentProcessId();
+#else
+	return getpid();
+#endif
+}
+
 static void
 prof_dump_check_possible_error(bool err_cond, const char *format, ...) {
 	assert(!prof_dump_error);

From dad821bb2239a42517f6ba5e48a29f5f569ab38f Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 3 Apr 2020 11:19:51 -0700
Subject: [PATCH 1790/2608] Move unwind to prof_sys

---
 include/jemalloc/internal/prof_data.h |   3 +-
 include/jemalloc/internal/prof_sys.h  |   3 +
 src/prof.c                            | 269 ++------------------------
 src/prof_data.c                       |  19 +-
 src/prof_sys.c                        | 266 +++++++++++++++++++++++++
 5 files changed, 284 insertions(+), 276 deletions(-)

diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
index de9f7bae..9c2d6970 100644
--- a/include/jemalloc/internal/prof_data.h
+++ b/include/jemalloc/internal/prof_data.h
@@ -14,6 +14,7 @@ void prof_bt_hash(const void *key, size_t r_hash[2]);
 bool prof_bt_keycomp(const void *k1, const void *k2);
 
 bool prof_data_init(tsd_t *tsd);
+prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt);
 char *prof_thread_name_alloc(tsd_t *tsd, const char *thread_name);
 int prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name);
 void prof_dump_impl(tsd_t *tsd, prof_tdata_t *tdata,
@@ -21,8 +22,6 @@ void prof_dump_impl(tsd_t *tsd, prof_tdata_t *tdata,
 prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
     uint64_t thr_discrim, char *thread_name, bool active);
 void prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata);
-void bt_init(prof_bt_t *bt, void **vec);
-void prof_backtrace(tsd_t *tsd, prof_bt_t *bt);
 void prof_reset(tsd_t *tsd, size_t lg_sample);
 void prof_tctx_try_destroy(tsd_t *tsd, prof_tctx_t *tctx);
 
diff --git a/include/jemalloc/internal/prof_sys.h b/include/jemalloc/internal/prof_sys.h
index 3896f292..d784ef91 100644
--- a/include/jemalloc/internal/prof_sys.h
+++ b/include/jemalloc/internal/prof_sys.h
@@ -4,6 +4,9 @@
 extern malloc_mutex_t prof_dump_filename_mtx;
 extern base_t *prof_base;
 
+void bt_init(prof_bt_t *bt, void **vec);
+void prof_backtrace(tsd_t *tsd, prof_bt_t *bt);
+void prof_unwind_init();
 void prof_sys_thread_name_fetch(tsd_t *tsd);
 int prof_getpid(void);
 void prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind);
diff --git a/src/prof.c b/src/prof.c
index 50c08fa3..25735410 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -19,23 +19,6 @@
 
 /******************************************************************************/
 
-#ifdef JEMALLOC_PROF_LIBUNWIND
-#define UNW_LOCAL_ONLY
-#include <libunwind.h>
-#endif
-
-#ifdef JEMALLOC_PROF_LIBGCC
-/*
- * We have a circular dependency -- jemalloc_internal.h tells us if we should
- * use libgcc's unwinding functionality, but after we've included that, we've
- * already hooked _Unwind_Backtrace.  We'll temporarily disable hooking.
- */
-#undef _Unwind_Backtrace
-#include <unwind.h>
-#define _Unwind_Backtrace JEMALLOC_HOOK(_Unwind_Backtrace, test_hooks_libc_hook)
-#endif
-
-/******************************************************************************/
 /* Data. */
 
 bool opt_prof = false;
@@ -147,242 +130,21 @@ prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
 	prof_tctx_try_destroy(tsd, tctx);
 }
 
-void
-bt_init(prof_bt_t *bt, void **vec) {
-	cassert(config_prof);
-
-	bt->vec = vec;
-	bt->len = 0;
-}
-
-#ifdef JEMALLOC_PROF_LIBUNWIND
-static void
-prof_backtrace_impl(prof_bt_t *bt) {
-	int nframes;
-
-	cassert(config_prof);
-	assert(bt->len == 0);
-	assert(bt->vec != NULL);
-
-	nframes = unw_backtrace(bt->vec, PROF_BT_MAX);
-	if (nframes <= 0) {
-		return;
-	}
-	bt->len = nframes;
-}
-#elif (defined(JEMALLOC_PROF_LIBGCC))
-static _Unwind_Reason_Code
-prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) {
-	cassert(config_prof);
-
-	return _URC_NO_REASON;
-}
-
-static _Unwind_Reason_Code
-prof_unwind_callback(struct _Unwind_Context *context, void *arg) {
-	prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
-	void *ip;
-
-	cassert(config_prof);
-
-	ip = (void *)_Unwind_GetIP(context);
-	if (ip == NULL) {
-		return _URC_END_OF_STACK;
-	}
-	data->bt->vec[data->bt->len] = ip;
-	data->bt->len++;
-	if (data->bt->len == data->max) {
-		return _URC_END_OF_STACK;
+prof_tctx_t *
+prof_tctx_create(tsd_t *tsd) {
+	if (!tsd_nominal(tsd) || tsd_reentrancy_level_get(tsd) > 0) {
+		return NULL;
 	}
 
-	return _URC_NO_REASON;
-}
-
-static void
-prof_backtrace_impl(prof_bt_t *bt) {
-	prof_unwind_data_t data = {bt, PROF_BT_MAX};
-
-	cassert(config_prof);
-
-	_Unwind_Backtrace(prof_unwind_callback, &data);
-}
-#elif (defined(JEMALLOC_PROF_GCC))
-static void
-prof_backtrace_impl(prof_bt_t *bt) {
-#define BT_FRAME(i)							\
-	if ((i) < PROF_BT_MAX) {					\
-		void *p;						\
-		if (__builtin_frame_address(i) == 0) {			\
-			return;						\
-		}							\
-		p = __builtin_return_address(i);			\
-		if (p == NULL) {					\
-			return;						\
-		}							\
-		bt->vec[(i)] = p;					\
-		bt->len = (i) + 1;					\
-	} else {							\
-		return;							\
+	prof_tdata_t *tdata = prof_tdata_get(tsd, true);
+	if (tdata == NULL) {
+		return NULL;
 	}
 
-	cassert(config_prof);
-
-	BT_FRAME(0)
-	BT_FRAME(1)
-	BT_FRAME(2)
-	BT_FRAME(3)
-	BT_FRAME(4)
-	BT_FRAME(5)
-	BT_FRAME(6)
-	BT_FRAME(7)
-	BT_FRAME(8)
-	BT_FRAME(9)
-
-	BT_FRAME(10)
-	BT_FRAME(11)
-	BT_FRAME(12)
-	BT_FRAME(13)
-	BT_FRAME(14)
-	BT_FRAME(15)
-	BT_FRAME(16)
-	BT_FRAME(17)
-	BT_FRAME(18)
-	BT_FRAME(19)
-
-	BT_FRAME(20)
-	BT_FRAME(21)
-	BT_FRAME(22)
-	BT_FRAME(23)
-	BT_FRAME(24)
-	BT_FRAME(25)
-	BT_FRAME(26)
-	BT_FRAME(27)
-	BT_FRAME(28)
-	BT_FRAME(29)
-
-	BT_FRAME(30)
-	BT_FRAME(31)
-	BT_FRAME(32)
-	BT_FRAME(33)
-	BT_FRAME(34)
-	BT_FRAME(35)
-	BT_FRAME(36)
-	BT_FRAME(37)
-	BT_FRAME(38)
-	BT_FRAME(39)
-
-	BT_FRAME(40)
-	BT_FRAME(41)
-	BT_FRAME(42)
-	BT_FRAME(43)
-	BT_FRAME(44)
-	BT_FRAME(45)
-	BT_FRAME(46)
-	BT_FRAME(47)
-	BT_FRAME(48)
-	BT_FRAME(49)
-
-	BT_FRAME(50)
-	BT_FRAME(51)
-	BT_FRAME(52)
-	BT_FRAME(53)
-	BT_FRAME(54)
-	BT_FRAME(55)
-	BT_FRAME(56)
-	BT_FRAME(57)
-	BT_FRAME(58)
-	BT_FRAME(59)
-
-	BT_FRAME(60)
-	BT_FRAME(61)
-	BT_FRAME(62)
-	BT_FRAME(63)
-	BT_FRAME(64)
-	BT_FRAME(65)
-	BT_FRAME(66)
-	BT_FRAME(67)
-	BT_FRAME(68)
-	BT_FRAME(69)
-
-	BT_FRAME(70)
-	BT_FRAME(71)
-	BT_FRAME(72)
-	BT_FRAME(73)
-	BT_FRAME(74)
-	BT_FRAME(75)
-	BT_FRAME(76)
-	BT_FRAME(77)
-	BT_FRAME(78)
-	BT_FRAME(79)
-
-	BT_FRAME(80)
-	BT_FRAME(81)
-	BT_FRAME(82)
-	BT_FRAME(83)
-	BT_FRAME(84)
-	BT_FRAME(85)
-	BT_FRAME(86)
-	BT_FRAME(87)
-	BT_FRAME(88)
-	BT_FRAME(89)
-
-	BT_FRAME(90)
-	BT_FRAME(91)
-	BT_FRAME(92)
-	BT_FRAME(93)
-	BT_FRAME(94)
-	BT_FRAME(95)
-	BT_FRAME(96)
-	BT_FRAME(97)
-	BT_FRAME(98)
-	BT_FRAME(99)
-
-	BT_FRAME(100)
-	BT_FRAME(101)
-	BT_FRAME(102)
-	BT_FRAME(103)
-	BT_FRAME(104)
-	BT_FRAME(105)
-	BT_FRAME(106)
-	BT_FRAME(107)
-	BT_FRAME(108)
-	BT_FRAME(109)
-
-	BT_FRAME(110)
-	BT_FRAME(111)
-	BT_FRAME(112)
-	BT_FRAME(113)
-	BT_FRAME(114)
-	BT_FRAME(115)
-	BT_FRAME(116)
-	BT_FRAME(117)
-	BT_FRAME(118)
-	BT_FRAME(119)
-
-	BT_FRAME(120)
-	BT_FRAME(121)
-	BT_FRAME(122)
-	BT_FRAME(123)
-	BT_FRAME(124)
-	BT_FRAME(125)
-	BT_FRAME(126)
-	BT_FRAME(127)
-#undef BT_FRAME
-}
-#else
-static void
-prof_backtrace_impl(prof_bt_t *bt) {
-	cassert(config_prof);
-	not_reached();
-}
-#endif
-
-void
-prof_backtrace(tsd_t *tsd, prof_bt_t *bt) {
-	cassert(config_prof);
-	pre_reentrancy(tsd, NULL);
-	prof_backtrace_impl(bt);
-	post_reentrancy(tsd);
+	prof_bt_t bt;
+	bt_init(&bt, tdata->vec);
+	prof_backtrace(tsd, &bt);
+	return prof_lookup(tsd, &bt);
 }
 
 /*
@@ -852,13 +614,8 @@ prof_boot2(tsd_t *tsd, base_t *base) {
 				return true;
 			}
 		}
-#ifdef JEMALLOC_PROF_LIBGCC
-		/*
-		 * Cause the backtracing machinery to allocate its internal
-		 * state before enabling profiling.
-		 */
-		_Unwind_Backtrace(prof_unwind_init_callback, NULL);
-#endif
+
+		prof_unwind_init();
 	}
 	prof_booted = true;
 
diff --git a/src/prof_data.c b/src/prof_data.c
index 6e84e3cf..e38cb808 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -322,7 +322,7 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
 	return false;
 }
 
-static prof_tctx_t *
+prof_tctx_t *
 prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 	union {
 		prof_tctx_t	*p;
@@ -395,23 +395,6 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 	return ret.p;
 }
 
-prof_tctx_t *
-prof_tctx_create(tsd_t *tsd) {
-	if (!tsd_nominal(tsd) || tsd_reentrancy_level_get(tsd) > 0) {
-		return NULL;
-	}
-
-	prof_tdata_t *tdata = prof_tdata_get(tsd, true);
-	if (tdata == NULL) {
-		return NULL;
-	}
-
-	prof_bt_t bt;
-	bt_init(&bt, tdata->vec);
-	prof_backtrace(tsd, &bt);
-	return prof_lookup(tsd, &bt);
-}
-
 /* Used in unit tests. */
 static prof_tdata_t *
 prof_tdata_count_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
diff --git a/src/prof_sys.c b/src/prof_sys.c
index cdec9262..027da89b 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -6,6 +6,24 @@
 #include "jemalloc/internal/prof_data.h"
 #include "jemalloc/internal/prof_sys.h"
 
+#ifdef JEMALLOC_PROF_LIBUNWIND
+#define UNW_LOCAL_ONLY
+#include <libunwind.h>
+#endif
+
+#ifdef JEMALLOC_PROF_LIBGCC
+/*
+ * We have a circular dependency -- jemalloc_internal.h tells us if we should
+ * use libgcc's unwinding functionality, but after we've included that, we've
+ * already hooked _Unwind_Backtrace.  We'll temporarily disable hooking.
+ */
+#undef _Unwind_Backtrace
+#include <unwind.h>
+#define _Unwind_Backtrace JEMALLOC_HOOK(_Unwind_Backtrace, test_hooks_libc_hook)
+#endif
+
+/******************************************************************************/
+
 malloc_mutex_t prof_dump_filename_mtx;
 
 static uint64_t prof_dump_seq;
@@ -40,6 +58,254 @@ static char prof_dump_buf[PROF_DUMP_BUFSIZE];
 static size_t prof_dump_buf_end;
 static int prof_dump_fd;
 
+void
+bt_init(prof_bt_t *bt, void **vec) {
+	cassert(config_prof);
+
+	bt->vec = vec;
+	bt->len = 0;
+}
+
+#ifdef JEMALLOC_PROF_LIBUNWIND
+static void
+prof_backtrace_impl(prof_bt_t *bt) {
+	int nframes;
+
+	cassert(config_prof);
+	assert(bt->len == 0);
+	assert(bt->vec != NULL);
+
+	nframes = unw_backtrace(bt->vec, PROF_BT_MAX);
+	if (nframes <= 0) {
+		return;
+	}
+	bt->len = nframes;
+}
+#elif (defined(JEMALLOC_PROF_LIBGCC))
+static _Unwind_Reason_Code
+prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) {
+	cassert(config_prof);
+
+	return _URC_NO_REASON;
+}
+
+static _Unwind_Reason_Code
+prof_unwind_callback(struct _Unwind_Context *context, void *arg) {
+	prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
+	void *ip;
+
+	cassert(config_prof);
+
+	ip = (void *)_Unwind_GetIP(context);
+	if (ip == NULL) {
+		return _URC_END_OF_STACK;
+	}
+	data->bt->vec[data->bt->len] = ip;
+	data->bt->len++;
+	if (data->bt->len == data->max) {
+		return _URC_END_OF_STACK;
+	}
+
+	return _URC_NO_REASON;
+}
+
+static void
+prof_backtrace_impl(prof_bt_t *bt) {
+	prof_unwind_data_t data = {bt, PROF_BT_MAX};
+
+	cassert(config_prof);
+
+	_Unwind_Backtrace(prof_unwind_callback, &data);
+}
+#elif (defined(JEMALLOC_PROF_GCC))
+static void
+prof_backtrace_impl(prof_bt_t *bt) {
+#define BT_FRAME(i)							\
+	if ((i) < PROF_BT_MAX) {					\
+		void *p;						\
+		if (__builtin_frame_address(i) == 0) {			\
+			return;						\
+		}							\
+		p = __builtin_return_address(i);			\
+		if (p == NULL) {					\
+			return;						\
+		}							\
+		bt->vec[(i)] = p;					\
+		bt->len = (i) + 1;					\
+	} else {							\
+		return;							\
+	}
+
+	cassert(config_prof);
+
+	BT_FRAME(0)
+	BT_FRAME(1)
+	BT_FRAME(2)
+	BT_FRAME(3)
+	BT_FRAME(4)
+	BT_FRAME(5)
+	BT_FRAME(6)
+	BT_FRAME(7)
+	BT_FRAME(8)
+	BT_FRAME(9)
+
+	BT_FRAME(10)
+	BT_FRAME(11)
+	BT_FRAME(12)
+	BT_FRAME(13)
+	BT_FRAME(14)
+	BT_FRAME(15)
+	BT_FRAME(16)
+	BT_FRAME(17)
+	BT_FRAME(18)
+	BT_FRAME(19)
+
+	BT_FRAME(20)
+	BT_FRAME(21)
+	BT_FRAME(22)
+	BT_FRAME(23)
+	BT_FRAME(24)
+	BT_FRAME(25)
+	BT_FRAME(26)
+	BT_FRAME(27)
+	BT_FRAME(28)
+	BT_FRAME(29)
+
+	BT_FRAME(30)
+	BT_FRAME(31)
+	BT_FRAME(32)
+	BT_FRAME(33)
+	BT_FRAME(34)
+	BT_FRAME(35)
+	BT_FRAME(36)
+	BT_FRAME(37)
+	BT_FRAME(38)
+	BT_FRAME(39)
+
+	BT_FRAME(40)
+	BT_FRAME(41)
+	BT_FRAME(42)
+	BT_FRAME(43)
+	BT_FRAME(44)
+	BT_FRAME(45)
+	BT_FRAME(46)
+	BT_FRAME(47)
+	BT_FRAME(48)
+	BT_FRAME(49)
+
+	BT_FRAME(50)
+	BT_FRAME(51)
+	BT_FRAME(52)
+	BT_FRAME(53)
+	BT_FRAME(54)
+	BT_FRAME(55)
+	BT_FRAME(56)
+	BT_FRAME(57)
+	BT_FRAME(58)
+	BT_FRAME(59)
+
+	BT_FRAME(60)
+	BT_FRAME(61)
+	BT_FRAME(62)
+	BT_FRAME(63)
+	BT_FRAME(64)
+	BT_FRAME(65)
+	BT_FRAME(66)
+	BT_FRAME(67)
+	BT_FRAME(68)
+	BT_FRAME(69)
+
+	BT_FRAME(70)
+	BT_FRAME(71)
+	BT_FRAME(72)
+	BT_FRAME(73)
+	BT_FRAME(74)
+	BT_FRAME(75)
+	BT_FRAME(76)
+	BT_FRAME(77)
+	BT_FRAME(78)
+	BT_FRAME(79)
+
+	BT_FRAME(80)
+	BT_FRAME(81)
+	BT_FRAME(82)
+	BT_FRAME(83)
+	BT_FRAME(84)
+	BT_FRAME(85)
+	BT_FRAME(86)
+	BT_FRAME(87)
+	BT_FRAME(88)
+	BT_FRAME(89)
+
+	BT_FRAME(90)
+	BT_FRAME(91)
+	BT_FRAME(92)
+	BT_FRAME(93)
+	BT_FRAME(94)
+	BT_FRAME(95)
+	BT_FRAME(96)
+	BT_FRAME(97)
+	BT_FRAME(98)
+	BT_FRAME(99)
+
+	BT_FRAME(100)
+	BT_FRAME(101)
+	BT_FRAME(102)
+	BT_FRAME(103)
+	BT_FRAME(104)
+	BT_FRAME(105)
+	BT_FRAME(106)
+	BT_FRAME(107)
+	BT_FRAME(108)
+	BT_FRAME(109)
+
+	BT_FRAME(110)
+	BT_FRAME(111)
+	BT_FRAME(112)
+	BT_FRAME(113)
+	BT_FRAME(114)
+	BT_FRAME(115)
+	BT_FRAME(116)
+	BT_FRAME(117)
+	BT_FRAME(118)
+	BT_FRAME(119)
+
+	BT_FRAME(120)
+	BT_FRAME(121)
+	BT_FRAME(122)
+	BT_FRAME(123)
+	BT_FRAME(124)
+	BT_FRAME(125)
+	BT_FRAME(126)
+	BT_FRAME(127)
+#undef BT_FRAME
+}
+#else
+static void
+prof_backtrace_impl(prof_bt_t *bt) {
+	cassert(config_prof);
+	not_reached();
+}
+#endif
+
+void
+prof_backtrace(tsd_t *tsd, prof_bt_t *bt) {
+	cassert(config_prof);
+	pre_reentrancy(tsd, NULL);
+	prof_backtrace_impl(bt);
+	post_reentrancy(tsd);
+}
+
+void prof_unwind_init() {
+#ifdef JEMALLOC_PROF_LIBGCC
+	/*
+	 * Cause the backtracing machinery to allocate its internal
+	 * state before enabling profiling.
+	 */
+	_Unwind_Backtrace(prof_unwind_init_callback, NULL);
+#endif
+}
+
 static int
 prof_sys_thread_name_read_impl(char *buf, size_t limit) {
 #ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP

From 1c6742e6a04376928ce1d6755666ba6141f038d8 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 13 Apr 2020 14:19:54 -0700
Subject: [PATCH 1791/2608] Migrate prof dumping to use buffered writer

---
 src/prof_sys.c | 59 +++++++++++++++-----------------------------------
 1 file changed, 18 insertions(+), 41 deletions(-)

diff --git a/src/prof_sys.c b/src/prof_sys.c
index 027da89b..f353802e 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -2,6 +2,7 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/buf_writer.h"
 #include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/prof_data.h"
 #include "jemalloc/internal/prof_sys.h"
@@ -55,7 +56,7 @@ static bool prof_dump_handle_error_locally;
  * all profile dumps.
  */
 static char prof_dump_buf[PROF_DUMP_BUFSIZE];
-static size_t prof_dump_buf_end;
+static buf_writer_t prof_dump_buf_writer;
 static int prof_dump_fd;
 
 void
@@ -377,49 +378,24 @@ prof_dump_open(const char *filename) {
 prof_dump_write_file_t *JET_MUTABLE prof_dump_write_file = malloc_write_fd;
 
 static void
-prof_dump_flush() {
+prof_dump_flush(void *cbopaque, const char *s) {
 	cassert(config_prof);
+	assert(cbopaque == NULL);
 	if (!prof_dump_error) {
-		ssize_t err = prof_dump_write_file(prof_dump_fd, prof_dump_buf,
-		    prof_dump_buf_end);
+		ssize_t err = prof_dump_write_file(prof_dump_fd, s, strlen(s));
 		prof_dump_check_possible_error(err == -1,
 		    "<jemalloc>: failed to write during heap profile flush\n");
 	}
-	prof_dump_buf_end = 0;
 }
 
 static void
 prof_dump_write(const char *s) {
-	size_t i, slen, n;
-
-	cassert(config_prof);
-
-	i = 0;
-	slen = strlen(s);
-	while (i < slen) {
-		/* Flush the buffer if it is full. */
-		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
-			prof_dump_flush();
-		}
-
-		if (prof_dump_buf_end + slen - i <= PROF_DUMP_BUFSIZE) {
-			/* Finish writing. */
-			n = slen - i;
-		} else {
-			/* Write as much of s as will fit. */
-			n = PROF_DUMP_BUFSIZE - prof_dump_buf_end;
-		}
-		memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n);
-		prof_dump_buf_end += n;
-		i += n;
-	}
-	assert(i == slen);
+	buf_writer_cb(&prof_dump_buf_writer, s);
 }
 
 static void
 prof_dump_close() {
 	if (prof_dump_fd != -1) {
-		prof_dump_flush();
 		close(prof_dump_fd);
 	}
 }
@@ -471,6 +447,13 @@ prof_dump_open_maps_impl() {
 prof_dump_open_maps_t *JET_MUTABLE prof_dump_open_maps =
     prof_dump_open_maps_impl;
 
+static ssize_t
+prof_dump_read_maps_cb(void *read_cbopaque, void *buf, size_t limit) {
+	int mfd = *(int *)read_cbopaque;
+	assert(mfd != -1);
+	return malloc_read_fd(mfd, buf, limit);
+}
+
 static void
 prof_dump_maps() {
 	int mfd = prof_dump_open_maps();
@@ -479,17 +462,7 @@ prof_dump_maps() {
 	}
 
 	prof_dump_write("\nMAPPED_LIBRARIES:\n");
-	ssize_t nread = 0;
-	do {
-		prof_dump_buf_end += nread;
-		if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
-			/* Make space in prof_dump_buf before read(). */
-			prof_dump_flush();
-		}
-		nread = malloc_read_fd(mfd, &prof_dump_buf[prof_dump_buf_end],
-		    PROF_DUMP_BUFSIZE - prof_dump_buf_end);
-	} while (nread > 0);
-
+	buf_writer_pipe(&prof_dump_buf_writer, prof_dump_read_maps_cb, &mfd);
 	close(mfd);
 }
 
@@ -511,8 +484,12 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
 
 	prof_dump_open(filename);
+	bool err = buf_writer_init(tsd_tsdn(tsd), &prof_dump_buf_writer,
+	    prof_dump_flush, NULL, prof_dump_buf, PROF_DUMP_BUFSIZE);
+	assert(!err);
 	prof_dump_impl(tsd, tdata, prof_dump_write, leakcheck);
 	prof_dump_maps();
+	buf_writer_terminate(tsd_tsdn(tsd), &prof_dump_buf_writer);
 	prof_dump_close();
 
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);

From 4556d3c0c8ad4c00fd3c31762653e68fb2a701e0 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 20 Apr 2020 14:14:53 -0700
Subject: [PATCH 1792/2608] Define structures for prof dump parameters

---
 src/prof_data.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/src/prof_data.c b/src/prof_data.c
index e38cb808..bd1ccf6e 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -653,6 +653,7 @@ prof_dump_gctx_prep(tsdn_t *tsdn, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs) {
 	malloc_mutex_unlock(tsdn, gctx->lock);
 }
 
+typedef struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg_t;
 struct prof_gctx_merge_iter_arg_s {
 	tsdn_t	*tsdn;
 	size_t	leak_ngctx;
@@ -660,8 +661,7 @@ struct prof_gctx_merge_iter_arg_s {
 
 static prof_gctx_t *
 prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
-	struct prof_gctx_merge_iter_arg_s *arg =
-	    (struct prof_gctx_merge_iter_arg_s *)opaque;
+	prof_gctx_merge_iter_arg_t *arg = (prof_gctx_merge_iter_arg_t *)opaque;
 
 	malloc_mutex_lock(arg->tsdn, gctx->lock);
 	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter,
@@ -720,6 +720,7 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) {
 	}
 }
 
+typedef struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg_t;
 struct prof_tdata_merge_iter_arg_s {
 	tsdn_t		*tsdn;
 	prof_cnt_t	cnt_all;
@@ -728,8 +729,8 @@ struct prof_tdata_merge_iter_arg_s {
 static prof_tdata_t *
 prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
     void *opaque) {
-	struct prof_tdata_merge_iter_arg_s *arg =
-	    (struct prof_tdata_merge_iter_arg_s *)opaque;
+	prof_tdata_merge_iter_arg_t *arg =
+	    (prof_tdata_merge_iter_arg_t *)opaque;
 
 	malloc_mutex_lock(arg->tsdn, tdata->lock);
 	if (!tdata->expired) {
@@ -862,8 +863,8 @@ prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
 
 static void
 prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata,
-    struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
-    struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
+    prof_tdata_merge_iter_arg_t *prof_tdata_merge_iter_arg,
+    prof_gctx_merge_iter_arg_t *prof_gctx_merge_iter_arg,
     prof_gctx_tree_t *gctxs) {
 	size_t tabind;
 	union {
@@ -908,8 +909,8 @@ prof_dump_impl(tsd_t *tsd, prof_tdata_t *tdata, void (*write_cb)(const char *),
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_dump_mtx);
 	prof_dump_write = write_cb;
 	prof_gctx_tree_t gctxs;
-	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
-	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
+	prof_tdata_merge_iter_arg_t prof_tdata_merge_iter_arg;
+	prof_gctx_merge_iter_arg_t prof_gctx_merge_iter_arg;
 	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
 	    &prof_gctx_merge_iter_arg, &gctxs);
 	prof_dump_header(tsd_tsdn(tsd), &prof_tdata_merge_iter_arg.cnt_all);
@@ -928,8 +929,8 @@ prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
     uint64_t *accumbytes) {
 	tsd_t *tsd;
 	prof_tdata_t *tdata;
-	struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
-	struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
+	prof_tdata_merge_iter_arg_t prof_tdata_merge_iter_arg;
+	prof_gctx_merge_iter_arg_t prof_gctx_merge_iter_arg;
 	prof_gctx_tree_t gctxs;
 
 	tsd = tsd_fetch();

From 1f5fe3a3e38deaa75d32589a364163060e0ab3b3 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 20 Apr 2020 14:09:08 -0700
Subject: [PATCH 1793/2608] Pass write callback explicitly in prof_data

---
 include/jemalloc/internal/prof_data.h |   7 +-
 src/prof_data.c                       | 118 ++++++++++++++++----------
 src/prof_sys.c                        |  10 +--
 test/unit/prof_reset.c                |   3 +-
 4 files changed, 84 insertions(+), 54 deletions(-)

diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
index 9c2d6970..a0448d0b 100644
--- a/include/jemalloc/internal/prof_data.h
+++ b/include/jemalloc/internal/prof_data.h
@@ -17,8 +17,8 @@ bool prof_data_init(tsd_t *tsd);
 prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt);
 char *prof_thread_name_alloc(tsd_t *tsd, const char *thread_name);
 int prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name);
-void prof_dump_impl(tsd_t *tsd, prof_tdata_t *tdata,
-    void (*write_cb)(const char *), bool leakcheck);
+void prof_dump_impl(tsd_t *tsd, write_cb_t *prof_dump_write, void *cbopaque,
+    prof_tdata_t *tdata, bool leakcheck);
 prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
     uint64_t thr_discrim, char *thread_name, bool active);
 void prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata);
@@ -28,7 +28,8 @@ void prof_tctx_try_destroy(tsd_t *tsd, prof_tctx_t *tctx);
 /* Used in unit tests. */
 size_t prof_tdata_count(void);
 size_t prof_bt_count(void);
-typedef void (prof_dump_header_t)(tsdn_t *, const prof_cnt_t *);
+typedef void (prof_dump_header_t)(tsdn_t *, write_cb_t *, void *,
+    const prof_cnt_t *);
 extern prof_dump_header_t *JET_MUTABLE prof_dump_header;
 void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
     uint64_t *accumbytes);
diff --git a/src/prof_data.c b/src/prof_data.c
index bd1ccf6e..bc389157 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -59,9 +59,6 @@ static ckh_t bt2gctx;
  */
 static prof_tdata_tree_t tdatas;
 
-/* Dump write callback; stored global to simplify function interfaces.  */
-static void (*prof_dump_write)(const char *);
-
 /******************************************************************************/
 /* Red-black trees. */
 
@@ -504,21 +501,24 @@ prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name) {
 	return 0;
 }
 
-JEMALLOC_FORMAT_PRINTF(1, 2)
+JEMALLOC_FORMAT_PRINTF(3, 4)
 static void
-prof_dump_printf(const char *format, ...) {
+prof_dump_printf(write_cb_t *prof_dump_write, void *cbopaque,
+    const char *format, ...) {
 	va_list ap;
 	char buf[PROF_PRINTF_BUFSIZE];
 
 	va_start(ap, format);
 	malloc_vsnprintf(buf, sizeof(buf), format, ap);
 	va_end(ap);
-	prof_dump_write(buf);
+	prof_dump_write(cbopaque, buf);
 }
 
 static void
-prof_dump_print_cnts(const prof_cnt_t *cnts) {
-	prof_dump_printf("%"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]",
+prof_dump_print_cnts(write_cb_t *prof_dump_write, void *cbopaque,
+    const prof_cnt_t *cnts) {
+	prof_dump_printf(prof_dump_write, cbopaque,
+	    "%"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]",
 	    cnts->curobjs, cnts->curbytes, cnts->accumobjs, cnts->accumbytes);
 }
 
@@ -586,10 +586,17 @@ prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
 	return NULL;
 }
 
+typedef struct prof_tctx_dump_iter_arg_s prof_tctx_dump_iter_arg_t;
+struct prof_tctx_dump_iter_arg_s {
+	tsdn_t *tsdn;
+	write_cb_t *prof_dump_write;
+	void *cbopaque;
+};
+
 static prof_tctx_t *
-prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
-	tsdn_t *tsdn = (tsdn_t *)arg;
-	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
+prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) {
+	prof_tctx_dump_iter_arg_t *arg = (prof_tctx_dump_iter_arg_t *)opaque;
+	malloc_mutex_assert_owner(arg->tsdn, tctx->gctx->lock);
 
 	switch (tctx->state) {
 	case prof_tctx_state_initializing:
@@ -598,9 +605,11 @@ prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
 		break;
 	case prof_tctx_state_dumping:
 	case prof_tctx_state_purgatory:
-		prof_dump_printf("  t%"FMTu64": ", tctx->thr_uid);
-		prof_dump_print_cnts(&tctx->dump_cnts);
-		prof_dump_write("\n");
+		prof_dump_printf(arg->prof_dump_write, arg->cbopaque,
+		    "  t%"FMTu64": ", tctx->thr_uid);
+		prof_dump_print_cnts(arg->prof_dump_write, arg->cbopaque,
+		    &tctx->dump_cnts);
+		arg->prof_dump_write(arg->cbopaque, "\n");
 		break;
 	default:
 		not_reached();
@@ -761,38 +770,50 @@ prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
 	return NULL;
 }
 
+typedef struct prof_tdata_dump_iter_arg_s prof_tdata_dump_iter_arg_t;
+struct prof_tdata_dump_iter_arg_s {
+	write_cb_t *prof_dump_write;
+	void *cbopaque;
+};
+
 static prof_tdata_t *
 prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
-    void *unused) {
+    void *opaque) {
 	if (!tdata->dumping) {
 		return NULL;
 	}
 
-	prof_dump_printf("  t%"FMTu64": ", tdata->thr_uid);
-	prof_dump_print_cnts(&tdata->cnt_summed);
+	prof_tdata_dump_iter_arg_t *arg = (prof_tdata_dump_iter_arg_t *)opaque;
+	prof_dump_printf(arg->prof_dump_write, arg->cbopaque, "  t%"FMTu64": ",
+	    tdata->thr_uid);
+	prof_dump_print_cnts(arg->prof_dump_write, arg->cbopaque,
+	    &tdata->cnt_summed);
 	if (tdata->thread_name != NULL) {
-		prof_dump_printf(" %s", tdata->thread_name);
+		arg->prof_dump_write(arg->cbopaque, " ");
+		arg->prof_dump_write(arg->cbopaque, tdata->thread_name);
 	}
-	prof_dump_write("\n");
+	arg->prof_dump_write(arg->cbopaque, "\n");
 	return NULL;
 }
 
 static void
-prof_dump_header_impl(tsdn_t *tsdn, const prof_cnt_t *cnt_all) {
-	prof_dump_printf("heap_v2/%"FMTu64"\n  t*: ",
-	    ((uint64_t)1U << lg_prof_sample));
-	prof_dump_print_cnts(cnt_all);
-	prof_dump_write("\n");
+prof_dump_header_impl(tsdn_t *tsdn, write_cb_t *prof_dump_write,
+    void *cbopaque, const prof_cnt_t *cnt_all) {
+	prof_dump_printf(prof_dump_write, cbopaque,
+	    "heap_v2/%"FMTu64"\n  t*: ", ((uint64_t)1U << lg_prof_sample));
+	prof_dump_print_cnts(prof_dump_write, cbopaque, cnt_all);
+	prof_dump_write(cbopaque, "\n");
 
+	prof_tdata_dump_iter_arg_t arg = {prof_dump_write, cbopaque};
 	malloc_mutex_lock(tsdn, &tdatas_mtx);
-	tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter, NULL);
+	tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter, &arg);
 	malloc_mutex_unlock(tsdn, &tdatas_mtx);
 }
 prof_dump_header_t *JET_MUTABLE prof_dump_header = prof_dump_header_impl;
 
 static void
-prof_dump_gctx(tsdn_t *tsdn, prof_gctx_t *gctx, const prof_bt_t *bt,
-    prof_gctx_tree_t *gctxs) {
+prof_dump_gctx(tsdn_t *tsdn, write_cb_t *prof_dump_write, void *cbopaque,
+    prof_gctx_t *gctx, const prof_bt_t *bt, prof_gctx_tree_t *gctxs) {
 	cassert(config_prof);
 	malloc_mutex_assert_owner(tsdn, gctx->lock);
 
@@ -806,17 +827,18 @@ prof_dump_gctx(tsdn_t *tsdn, prof_gctx_t *gctx, const prof_bt_t *bt,
 		return;
 	}
 
-	prof_dump_write("@");
+	prof_dump_write(cbopaque, "@");
 	for (unsigned i = 0; i < bt->len; i++) {
-		prof_dump_printf(" %#"FMTxPTR, (uintptr_t)bt->vec[i]);
+		prof_dump_printf(prof_dump_write, cbopaque, " %#"FMTxPTR,
+		    (uintptr_t)bt->vec[i]);
 	}
 
-	prof_dump_write("\n  t*: ");
-	prof_dump_print_cnts(&gctx->cnt_summed);
-	prof_dump_write("\n");
+	prof_dump_write(cbopaque, "\n  t*: ");
+	prof_dump_print_cnts(prof_dump_write, cbopaque, &gctx->cnt_summed);
+	prof_dump_write(cbopaque, "\n");
 
-	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter,
-	    (void *)tsdn);
+	prof_tctx_dump_iter_arg_t arg = {tsdn, prof_dump_write, cbopaque};
+	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter, &arg);
 }
 
 /*
@@ -852,12 +874,20 @@ prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx) {
 #endif
 }
 
+typedef struct prof_gctx_dump_iter_arg_s prof_gctx_dump_iter_arg_t;
+struct prof_gctx_dump_iter_arg_s {
+	tsdn_t *tsdn;
+	write_cb_t *prof_dump_write;
+	void *cbopaque;
+};
+
 static prof_gctx_t *
 prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
-	tsdn_t *tsdn = (tsdn_t *)opaque;
-	malloc_mutex_lock(tsdn, gctx->lock);
-	prof_dump_gctx(tsdn, gctx, &gctx->bt, gctxs);
-	malloc_mutex_unlock(tsdn, gctx->lock);
+	prof_gctx_dump_iter_arg_t *arg = (prof_gctx_dump_iter_arg_t *)opaque;
+	malloc_mutex_lock(arg->tsdn, gctx->lock);
+	prof_dump_gctx(arg->tsdn, arg->prof_dump_write, arg->cbopaque, gctx,
+	    &gctx->bt, gctxs);
+	malloc_mutex_unlock(arg->tsdn, gctx->lock);
 	return NULL;
 }
 
@@ -904,18 +934,20 @@ prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata,
 }
 
 void
-prof_dump_impl(tsd_t *tsd, prof_tdata_t *tdata, void (*write_cb)(const char *),
-    bool leakcheck) {
+prof_dump_impl(tsd_t *tsd, write_cb_t *prof_dump_write, void *cbopaque,
+    prof_tdata_t *tdata, bool leakcheck) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_dump_mtx);
-	prof_dump_write = write_cb;
 	prof_gctx_tree_t gctxs;
 	prof_tdata_merge_iter_arg_t prof_tdata_merge_iter_arg;
 	prof_gctx_merge_iter_arg_t prof_gctx_merge_iter_arg;
 	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
 	    &prof_gctx_merge_iter_arg, &gctxs);
-	prof_dump_header(tsd_tsdn(tsd), &prof_tdata_merge_iter_arg.cnt_all);
+	prof_dump_header(tsd_tsdn(tsd), prof_dump_write, cbopaque,
+	    &prof_tdata_merge_iter_arg.cnt_all);
+	prof_gctx_dump_iter_arg_t prof_gctx_dump_iter_arg = {tsd_tsdn(tsd),
+	    prof_dump_write, cbopaque};
 	gctx_tree_iter(&gctxs, NULL, prof_gctx_dump_iter,
-	    (void *)tsd_tsdn(tsd));
+	    &prof_gctx_dump_iter_arg);
 	prof_gctx_finish(tsd, &gctxs);
 	if (leakcheck) {
 		prof_leakcheck(&prof_tdata_merge_iter_arg.cnt_all,
diff --git a/src/prof_sys.c b/src/prof_sys.c
index f353802e..5895ec4b 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -388,11 +388,6 @@ prof_dump_flush(void *cbopaque, const char *s) {
 	}
 }
 
-static void
-prof_dump_write(const char *s) {
-	buf_writer_cb(&prof_dump_buf_writer, s);
-}
-
 static void
 prof_dump_close() {
 	if (prof_dump_fd != -1) {
@@ -461,7 +456,7 @@ prof_dump_maps() {
 		return;
 	}
 
-	prof_dump_write("\nMAPPED_LIBRARIES:\n");
+	buf_writer_cb(&prof_dump_buf_writer, "\nMAPPED_LIBRARIES:\n");
 	buf_writer_pipe(&prof_dump_buf_writer, prof_dump_read_maps_cb, &mfd);
 	close(mfd);
 }
@@ -487,7 +482,8 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
 	bool err = buf_writer_init(tsd_tsdn(tsd), &prof_dump_buf_writer,
 	    prof_dump_flush, NULL, prof_dump_buf, PROF_DUMP_BUFSIZE);
 	assert(!err);
-	prof_dump_impl(tsd, tdata, prof_dump_write, leakcheck);
+	prof_dump_impl(tsd, buf_writer_cb, &prof_dump_buf_writer, tdata,
+	    leakcheck);
 	prof_dump_maps();
 	buf_writer_terminate(tsd_tsdn(tsd), &prof_dump_buf_writer);
 	prof_dump_close();
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
index 2bdc37c0..5916bd1e 100644
--- a/test/unit/prof_reset.c
+++ b/test/unit/prof_reset.c
@@ -87,7 +87,8 @@ TEST_END
 bool prof_dump_header_intercepted = false;
 prof_cnt_t cnt_all_copy = {0, 0, 0, 0};
 static void
-prof_dump_header_intercept(tsdn_t *tsdn, const prof_cnt_t *cnt_all) {
+prof_dump_header_intercept(tsdn_t *tsdn, write_cb_t *cb, void *cbopaque,
+    const prof_cnt_t *cnt_all) {
 	prof_dump_header_intercepted = true;
 	memcpy(&cnt_all_copy, cnt_all, sizeof(prof_cnt_t));
 }

From 5d823f3a910c7d737500b61ff8a00f6b634bc08b Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 20 Apr 2020 14:37:19 -0700
Subject: [PATCH 1794/2608] Consolidate struct definitions for prof dump
 parameters

---
 src/prof_data.c | 32 +++++++++-----------------------
 1 file changed, 9 insertions(+), 23 deletions(-)

diff --git a/src/prof_data.c b/src/prof_data.c
index bc389157..8cf7228c 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -586,8 +586,8 @@ prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
 	return NULL;
 }
 
-typedef struct prof_tctx_dump_iter_arg_s prof_tctx_dump_iter_arg_t;
-struct prof_tctx_dump_iter_arg_s {
+typedef struct prof_dump_iter_arg_s prof_dump_iter_arg_t;
+struct prof_dump_iter_arg_s {
 	tsdn_t *tsdn;
 	write_cb_t *prof_dump_write;
 	void *cbopaque;
@@ -595,7 +595,7 @@ struct prof_tctx_dump_iter_arg_s {
 
 static prof_tctx_t *
 prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) {
-	prof_tctx_dump_iter_arg_t *arg = (prof_tctx_dump_iter_arg_t *)opaque;
+	prof_dump_iter_arg_t *arg = (prof_dump_iter_arg_t *)opaque;
 	malloc_mutex_assert_owner(arg->tsdn, tctx->gctx->lock);
 
 	switch (tctx->state) {
@@ -770,12 +770,6 @@ prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
 	return NULL;
 }
 
-typedef struct prof_tdata_dump_iter_arg_s prof_tdata_dump_iter_arg_t;
-struct prof_tdata_dump_iter_arg_s {
-	write_cb_t *prof_dump_write;
-	void *cbopaque;
-};
-
 static prof_tdata_t *
 prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
     void *opaque) {
@@ -783,7 +777,7 @@ prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
 		return NULL;
 	}
 
-	prof_tdata_dump_iter_arg_t *arg = (prof_tdata_dump_iter_arg_t *)opaque;
+	prof_dump_iter_arg_t *arg = (prof_dump_iter_arg_t *)opaque;
 	prof_dump_printf(arg->prof_dump_write, arg->cbopaque, "  t%"FMTu64": ",
 	    tdata->thr_uid);
 	prof_dump_print_cnts(arg->prof_dump_write, arg->cbopaque,
@@ -804,7 +798,7 @@ prof_dump_header_impl(tsdn_t *tsdn, write_cb_t *prof_dump_write,
 	prof_dump_print_cnts(prof_dump_write, cbopaque, cnt_all);
 	prof_dump_write(cbopaque, "\n");
 
-	prof_tdata_dump_iter_arg_t arg = {prof_dump_write, cbopaque};
+	prof_dump_iter_arg_t arg = {tsdn, prof_dump_write, cbopaque};
 	malloc_mutex_lock(tsdn, &tdatas_mtx);
 	tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter, &arg);
 	malloc_mutex_unlock(tsdn, &tdatas_mtx);
@@ -837,7 +831,7 @@ prof_dump_gctx(tsdn_t *tsdn, write_cb_t *prof_dump_write, void *cbopaque,
 	prof_dump_print_cnts(prof_dump_write, cbopaque, &gctx->cnt_summed);
 	prof_dump_write(cbopaque, "\n");
 
-	prof_tctx_dump_iter_arg_t arg = {tsdn, prof_dump_write, cbopaque};
+	prof_dump_iter_arg_t arg = {tsdn, prof_dump_write, cbopaque};
 	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter, &arg);
 }
 
@@ -874,16 +868,9 @@ prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx) {
 #endif
 }
 
-typedef struct prof_gctx_dump_iter_arg_s prof_gctx_dump_iter_arg_t;
-struct prof_gctx_dump_iter_arg_s {
-	tsdn_t *tsdn;
-	write_cb_t *prof_dump_write;
-	void *cbopaque;
-};
-
 static prof_gctx_t *
 prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
-	prof_gctx_dump_iter_arg_t *arg = (prof_gctx_dump_iter_arg_t *)opaque;
+	prof_dump_iter_arg_t *arg = (prof_dump_iter_arg_t *)opaque;
 	malloc_mutex_lock(arg->tsdn, gctx->lock);
 	prof_dump_gctx(arg->tsdn, arg->prof_dump_write, arg->cbopaque, gctx,
 	    &gctx->bt, gctxs);
@@ -944,10 +931,9 @@ prof_dump_impl(tsd_t *tsd, write_cb_t *prof_dump_write, void *cbopaque,
 	    &prof_gctx_merge_iter_arg, &gctxs);
 	prof_dump_header(tsd_tsdn(tsd), prof_dump_write, cbopaque,
 	    &prof_tdata_merge_iter_arg.cnt_all);
-	prof_gctx_dump_iter_arg_t prof_gctx_dump_iter_arg = {tsd_tsdn(tsd),
+	prof_dump_iter_arg_t prof_dump_iter_arg = {tsd_tsdn(tsd),
 	    prof_dump_write, cbopaque};
-	gctx_tree_iter(&gctxs, NULL, prof_gctx_dump_iter,
-	    &prof_gctx_dump_iter_arg);
+	gctx_tree_iter(&gctxs, NULL, prof_gctx_dump_iter, &prof_dump_iter_arg);
 	prof_gctx_finish(tsd, &gctxs);
 	if (leakcheck) {
 		prof_leakcheck(&prof_tdata_merge_iter_arg.cnt_all,

From d4259ea53bb842169688f5fcda1053fbbaf021a8 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 20 Apr 2020 14:52:05 -0700
Subject: [PATCH 1795/2608] Simplify signatures for prof dump functions

---
 include/jemalloc/internal/prof_data.h |   3 +-
 src/prof_data.c                       | 109 ++++++++++++--------------
 test/unit/prof_reset.c                |   3 +-
 3 files changed, 54 insertions(+), 61 deletions(-)

diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
index a0448d0b..039c2a8a 100644
--- a/include/jemalloc/internal/prof_data.h
+++ b/include/jemalloc/internal/prof_data.h
@@ -28,8 +28,7 @@ void prof_tctx_try_destroy(tsd_t *tsd, prof_tctx_t *tctx);
 /* Used in unit tests. */
 size_t prof_tdata_count(void);
 size_t prof_bt_count(void);
-typedef void (prof_dump_header_t)(tsdn_t *, write_cb_t *, void *,
-    const prof_cnt_t *);
+typedef void (prof_dump_header_t)(void *, const prof_cnt_t *);
 extern prof_dump_header_t *JET_MUTABLE prof_dump_header;
 void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
     uint64_t *accumbytes);
diff --git a/src/prof_data.c b/src/prof_data.c
index 8cf7228c..1d501406 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -664,8 +664,8 @@ prof_dump_gctx_prep(tsdn_t *tsdn, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs) {
 
 typedef struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg_t;
 struct prof_gctx_merge_iter_arg_s {
-	tsdn_t	*tsdn;
-	size_t	leak_ngctx;
+	tsdn_t *tsdn;
+	size_t *leak_ngctx;
 };
 
 static prof_gctx_t *
@@ -676,7 +676,7 @@ prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
 	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter,
 	    (void *)arg->tsdn);
 	if (gctx->cnt_summed.curobjs != 0) {
-		arg->leak_ngctx++;
+		(*arg->leak_ngctx)++;
 	}
 	malloc_mutex_unlock(arg->tsdn, gctx->lock);
 
@@ -731,8 +731,8 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) {
 
 typedef struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg_t;
 struct prof_tdata_merge_iter_arg_s {
-	tsdn_t		*tsdn;
-	prof_cnt_t	cnt_all;
+	tsdn_t *tsdn;
+	prof_cnt_t *cnt_all;
 };
 
 static prof_tdata_t *
@@ -756,11 +756,12 @@ prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
 			prof_tctx_merge_tdata(arg->tsdn, tctx.p, tdata);
 		}
 
-		arg->cnt_all.curobjs += tdata->cnt_summed.curobjs;
-		arg->cnt_all.curbytes += tdata->cnt_summed.curbytes;
+		arg->cnt_all->curobjs += tdata->cnt_summed.curobjs;
+		arg->cnt_all->curbytes += tdata->cnt_summed.curbytes;
 		if (opt_prof_accum) {
-			arg->cnt_all.accumobjs += tdata->cnt_summed.accumobjs;
-			arg->cnt_all.accumbytes += tdata->cnt_summed.accumbytes;
+			arg->cnt_all->accumobjs += tdata->cnt_summed.accumobjs;
+			arg->cnt_all->accumbytes +=
+			    tdata->cnt_summed.accumbytes;
 		}
 	} else {
 		tdata->dumping = false;
@@ -791,25 +792,24 @@ prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
 }
 
 static void
-prof_dump_header_impl(tsdn_t *tsdn, write_cb_t *prof_dump_write,
-    void *cbopaque, const prof_cnt_t *cnt_all) {
-	prof_dump_printf(prof_dump_write, cbopaque,
+prof_dump_header_impl(void *opaque, const prof_cnt_t *cnt_all) {
+	prof_dump_iter_arg_t *arg = (prof_dump_iter_arg_t *)opaque;
+	prof_dump_printf(arg->prof_dump_write, arg->cbopaque,
 	    "heap_v2/%"FMTu64"\n  t*: ", ((uint64_t)1U << lg_prof_sample));
-	prof_dump_print_cnts(prof_dump_write, cbopaque, cnt_all);
-	prof_dump_write(cbopaque, "\n");
+	prof_dump_print_cnts(arg->prof_dump_write, arg->cbopaque, cnt_all);
+	arg->prof_dump_write(arg->cbopaque, "\n");
 
-	prof_dump_iter_arg_t arg = {tsdn, prof_dump_write, cbopaque};
-	malloc_mutex_lock(tsdn, &tdatas_mtx);
-	tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter, &arg);
-	malloc_mutex_unlock(tsdn, &tdatas_mtx);
+	malloc_mutex_lock(arg->tsdn, &tdatas_mtx);
+	tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter, arg);
+	malloc_mutex_unlock(arg->tsdn, &tdatas_mtx);
 }
 prof_dump_header_t *JET_MUTABLE prof_dump_header = prof_dump_header_impl;
 
 static void
-prof_dump_gctx(tsdn_t *tsdn, write_cb_t *prof_dump_write, void *cbopaque,
-    prof_gctx_t *gctx, const prof_bt_t *bt, prof_gctx_tree_t *gctxs) {
+prof_dump_gctx(prof_dump_iter_arg_t *arg, prof_gctx_t *gctx,
+    const prof_bt_t *bt, prof_gctx_tree_t *gctxs) {
 	cassert(config_prof);
-	malloc_mutex_assert_owner(tsdn, gctx->lock);
+	malloc_mutex_assert_owner(arg->tsdn, gctx->lock);
 
 	/* Avoid dumping such gctx's that have no useful data. */
 	if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0) ||
@@ -821,18 +821,18 @@ prof_dump_gctx(tsdn_t *tsdn, write_cb_t *prof_dump_write, void *cbopaque,
 		return;
 	}
 
-	prof_dump_write(cbopaque, "@");
+	arg->prof_dump_write(arg->cbopaque, "@");
 	for (unsigned i = 0; i < bt->len; i++) {
-		prof_dump_printf(prof_dump_write, cbopaque, " %#"FMTxPTR,
-		    (uintptr_t)bt->vec[i]);
+		prof_dump_printf(arg->prof_dump_write, arg->cbopaque,
+		    " %#"FMTxPTR, (uintptr_t)bt->vec[i]);
 	}
 
-	prof_dump_write(cbopaque, "\n  t*: ");
-	prof_dump_print_cnts(prof_dump_write, cbopaque, &gctx->cnt_summed);
-	prof_dump_write(cbopaque, "\n");
+	arg->prof_dump_write(arg->cbopaque, "\n  t*: ");
+	prof_dump_print_cnts(arg->prof_dump_write, arg->cbopaque,
+	    &gctx->cnt_summed);
+	arg->prof_dump_write(arg->cbopaque, "\n");
 
-	prof_dump_iter_arg_t arg = {tsdn, prof_dump_write, cbopaque};
-	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter, &arg);
+	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter, arg);
 }
 
 /*
@@ -872,17 +872,14 @@ static prof_gctx_t *
 prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
 	prof_dump_iter_arg_t *arg = (prof_dump_iter_arg_t *)opaque;
 	malloc_mutex_lock(arg->tsdn, gctx->lock);
-	prof_dump_gctx(arg->tsdn, arg->prof_dump_write, arg->cbopaque, gctx,
-	    &gctx->bt, gctxs);
+	prof_dump_gctx(arg, gctx, &gctx->bt, gctxs);
 	malloc_mutex_unlock(arg->tsdn, gctx->lock);
 	return NULL;
 }
 
 static void
-prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata,
-    prof_tdata_merge_iter_arg_t *prof_tdata_merge_iter_arg,
-    prof_gctx_merge_iter_arg_t *prof_gctx_merge_iter_arg,
-    prof_gctx_tree_t *gctxs) {
+prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata, prof_cnt_t *cnt_all,
+    size_t *leak_ngctx, prof_gctx_tree_t *gctxs) {
 	size_t tabind;
 	union {
 		prof_gctx_t	*p;
@@ -904,18 +901,20 @@ prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata,
 	 * Iterate over tdatas, and for the non-expired ones snapshot their tctx
 	 * stats and merge them into the associated gctx's.
 	 */
-	prof_tdata_merge_iter_arg->tsdn = tsd_tsdn(tsd);
-	memset(&prof_tdata_merge_iter_arg->cnt_all, 0, sizeof(prof_cnt_t));
+	memset(cnt_all, 0, sizeof(prof_cnt_t));
+	prof_tdata_merge_iter_arg_t prof_tdata_merge_iter_arg = {tsd_tsdn(tsd),
+	    cnt_all};
 	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
 	tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter,
-	    (void *)prof_tdata_merge_iter_arg);
+	    &prof_tdata_merge_iter_arg);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
 
 	/* Merge tctx stats into gctx's. */
-	prof_gctx_merge_iter_arg->tsdn = tsd_tsdn(tsd);
-	prof_gctx_merge_iter_arg->leak_ngctx = 0;
+	*leak_ngctx = 0;
+	prof_gctx_merge_iter_arg_t prof_gctx_merge_iter_arg = {tsd_tsdn(tsd),
+	    leak_ngctx};
 	gctx_tree_iter(gctxs, NULL, prof_gctx_merge_iter,
-	    (void *)prof_gctx_merge_iter_arg);
+	    &prof_gctx_merge_iter_arg);
 
 	prof_leave(tsd, tdata);
 }
@@ -924,20 +923,17 @@ void
 prof_dump_impl(tsd_t *tsd, write_cb_t *prof_dump_write, void *cbopaque,
     prof_tdata_t *tdata, bool leakcheck) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_dump_mtx);
+	prof_cnt_t cnt_all;
+	size_t leak_ngctx;
 	prof_gctx_tree_t gctxs;
-	prof_tdata_merge_iter_arg_t prof_tdata_merge_iter_arg;
-	prof_gctx_merge_iter_arg_t prof_gctx_merge_iter_arg;
-	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
-	    &prof_gctx_merge_iter_arg, &gctxs);
-	prof_dump_header(tsd_tsdn(tsd), prof_dump_write, cbopaque,
-	    &prof_tdata_merge_iter_arg.cnt_all);
+	prof_dump_prep(tsd, tdata, &cnt_all, &leak_ngctx, &gctxs);
 	prof_dump_iter_arg_t prof_dump_iter_arg = {tsd_tsdn(tsd),
 	    prof_dump_write, cbopaque};
+	prof_dump_header(&prof_dump_iter_arg, &cnt_all);
 	gctx_tree_iter(&gctxs, NULL, prof_gctx_dump_iter, &prof_dump_iter_arg);
 	prof_gctx_finish(tsd, &gctxs);
 	if (leakcheck) {
-		prof_leakcheck(&prof_tdata_merge_iter_arg.cnt_all,
-		    prof_gctx_merge_iter_arg.leak_ngctx);
+		prof_leakcheck(&cnt_all, leak_ngctx);
 	}
 }
 
@@ -947,8 +943,8 @@ prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
     uint64_t *accumbytes) {
 	tsd_t *tsd;
 	prof_tdata_t *tdata;
-	prof_tdata_merge_iter_arg_t prof_tdata_merge_iter_arg;
-	prof_gctx_merge_iter_arg_t prof_gctx_merge_iter_arg;
+	prof_cnt_t cnt_all;
+	size_t leak_ngctx;
 	prof_gctx_tree_t gctxs;
 
 	tsd = tsd_fetch();
@@ -969,21 +965,20 @@ prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
 		return;
 	}
 
-	prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
-	    &prof_gctx_merge_iter_arg, &gctxs);
+	prof_dump_prep(tsd, tdata, &cnt_all, &leak_ngctx, &gctxs);
 	prof_gctx_finish(tsd, &gctxs);
 
 	if (curobjs != NULL) {
-		*curobjs = prof_tdata_merge_iter_arg.cnt_all.curobjs;
+		*curobjs = cnt_all.curobjs;
 	}
 	if (curbytes != NULL) {
-		*curbytes = prof_tdata_merge_iter_arg.cnt_all.curbytes;
+		*curbytes = cnt_all.curbytes;
 	}
 	if (accumobjs != NULL) {
-		*accumobjs = prof_tdata_merge_iter_arg.cnt_all.accumobjs;
+		*accumobjs = cnt_all.accumobjs;
 	}
 	if (accumbytes != NULL) {
-		*accumbytes = prof_tdata_merge_iter_arg.cnt_all.accumbytes;
+		*accumbytes = cnt_all.accumbytes;
 	}
 }
 
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
index 5916bd1e..8c82e6d5 100644
--- a/test/unit/prof_reset.c
+++ b/test/unit/prof_reset.c
@@ -87,8 +87,7 @@ TEST_END
 bool prof_dump_header_intercepted = false;
 prof_cnt_t cnt_all_copy = {0, 0, 0, 0};
 static void
-prof_dump_header_intercept(tsdn_t *tsdn, write_cb_t *cb, void *cbopaque,
-    const prof_cnt_t *cnt_all) {
+prof_dump_header_intercept(void *opaque, const prof_cnt_t *cnt_all) {
 	prof_dump_header_intercepted = true;
 	memcpy(&cnt_all_copy, cnt_all, sizeof(prof_cnt_t));
 }

From 80d18c18c9a39e534ecb080256cb00e652f3d863 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 20 Apr 2020 15:26:55 -0700
Subject: [PATCH 1796/2608] Pass prof dump parameters explicitly in prof_sys

---
 src/prof_sys.c | 108 ++++++++++++++++++++++++++-----------------------
 1 file changed, 57 insertions(+), 51 deletions(-)

diff --git a/src/prof_sys.c b/src/prof_sys.c
index 5895ec4b..4897988d 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -37,28 +37,6 @@ static char *prof_dump_prefix = NULL;
 /* The fallback allocator profiling functionality will use. */
 base_t *prof_base;
 
-/* The following are needed for dumping and are protected by prof_dump_mtx. */
-/*
- * Whether there has been an error in the dumping process, which could have
- * happened either in file opening or in file writing.  When an error has
- * already occurred, we will stop further writing to the file.
- */
-static bool prof_dump_error;
-/*
- * Whether error should be handled locally: if true, then we print out error
- * message as well as abort (if opt_abort is true) when an error occurred, and
- * we also report the error back to the caller in the end; if false, then we
- * only report the error back to the caller in the end.
- */
-static bool prof_dump_handle_error_locally;
-/*
- * This buffer is rather large for stack allocation, so use a single buffer for
- * all profile dumps.
- */
-static char prof_dump_buf[PROF_DUMP_BUFSIZE];
-static buf_writer_t prof_dump_buf_writer;
-static int prof_dump_fd;
-
 void
 bt_init(prof_bt_t *bt, void **vec) {
 	cassert(config_prof);
@@ -337,15 +315,42 @@ prof_getpid(void) {
 #endif
 }
 
+/*
+ * This buffer is rather large for stack allocation, so use a single buffer for
+ * all profile dumps; protected by prof_dump_mtx.
+ */
+static char prof_dump_buf[PROF_DUMP_BUFSIZE];
+
+typedef struct prof_dump_arg_s prof_dump_arg_t;
+struct prof_dump_arg_s {
+	/*
+	 * Whether error should be handled locally: if true, then we print out
+	 * error message as well as abort (if opt_abort is true) when an error
+	 * occurred, and we also report the error back to the caller in the end;
+	 * if false, then we only report the error back to the caller in the
+	 * end.
+	 */
+	const bool handle_error_locally;
+	/*
+	 * Whether there has been an error in the dumping process, which could
+	 * have happened either in file opening or in file writing.  When an
+	 * error has already occurred, we will stop further writing to the file.
+	 */
+	bool error;
+	/* File descriptor of the dump file. */
+	int prof_dump_fd;
+};
+
 static void
-prof_dump_check_possible_error(bool err_cond, const char *format, ...) {
-	assert(!prof_dump_error);
+prof_dump_check_possible_error(prof_dump_arg_t *arg, bool err_cond,
+    const char *format, ...) {
+	assert(!arg->error);
 	if (!err_cond) {
 		return;
 	}
 
-	prof_dump_error = true;
-	if (!prof_dump_handle_error_locally) {
+	arg->error = true;
+	if (!arg->handle_error_locally) {
 		return;
 	}
 
@@ -369,29 +374,30 @@ prof_dump_open_file_t *JET_MUTABLE prof_dump_open_file =
     prof_dump_open_file_impl;
 
 static void
-prof_dump_open(const char *filename) {
-	prof_dump_fd = prof_dump_open_file(filename, 0644);
-	prof_dump_check_possible_error(prof_dump_fd == -1,
+prof_dump_open(prof_dump_arg_t *arg, const char *filename) {
+	arg->prof_dump_fd = prof_dump_open_file(filename, 0644);
+	prof_dump_check_possible_error(arg, arg->prof_dump_fd == -1,
 	    "<jemalloc>: failed to open \"%s\"\n", filename);
 }
 
 prof_dump_write_file_t *JET_MUTABLE prof_dump_write_file = malloc_write_fd;
 
 static void
-prof_dump_flush(void *cbopaque, const char *s) {
+prof_dump_flush(void *opaque, const char *s) {
 	cassert(config_prof);
-	assert(cbopaque == NULL);
-	if (!prof_dump_error) {
-		ssize_t err = prof_dump_write_file(prof_dump_fd, s, strlen(s));
-		prof_dump_check_possible_error(err == -1,
+	prof_dump_arg_t *arg = (prof_dump_arg_t *)opaque;
+	if (!arg->error) {
+		ssize_t err = prof_dump_write_file(arg->prof_dump_fd, s,
+		    strlen(s));
+		prof_dump_check_possible_error(arg, err == -1,
 		    "<jemalloc>: failed to write during heap profile flush\n");
 	}
 }
 
 static void
-prof_dump_close() {
-	if (prof_dump_fd != -1) {
-		close(prof_dump_fd);
+prof_dump_close(prof_dump_arg_t *arg) {
+	if (arg->prof_dump_fd != -1) {
+		close(arg->prof_dump_fd);
 	}
 }
 
@@ -450,14 +456,14 @@ prof_dump_read_maps_cb(void *read_cbopaque, void *buf, size_t limit) {
 }
 
 static void
-prof_dump_maps() {
+prof_dump_maps(buf_writer_t *buf_writer) {
 	int mfd = prof_dump_open_maps();
 	if (mfd == -1) {
 		return;
 	}
 
-	buf_writer_cb(&prof_dump_buf_writer, "\nMAPPED_LIBRARIES:\n");
-	buf_writer_pipe(&prof_dump_buf_writer, prof_dump_read_maps_cb, &mfd);
+	buf_writer_cb(buf_writer, "\nMAPPED_LIBRARIES:\n");
+	buf_writer_pipe(buf_writer, prof_dump_read_maps_cb, &mfd);
 	close(mfd);
 }
 
@@ -472,26 +478,26 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
 		return true;
 	}
 
-	prof_dump_error = false;
-	prof_dump_handle_error_locally = !propagate_err;
+	prof_dump_arg_t arg = {/* handle_error_locally */ !propagate_err,
+	    /* error */ false, /* prof_dump_fd */ -1};
 
 	pre_reentrancy(tsd, NULL);
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
 
-	prof_dump_open(filename);
-	bool err = buf_writer_init(tsd_tsdn(tsd), &prof_dump_buf_writer,
-	    prof_dump_flush, NULL, prof_dump_buf, PROF_DUMP_BUFSIZE);
+	prof_dump_open(&arg, filename);
+	buf_writer_t buf_writer;
+	bool err = buf_writer_init(tsd_tsdn(tsd), &buf_writer, prof_dump_flush,
+	    &arg, prof_dump_buf, PROF_DUMP_BUFSIZE);
 	assert(!err);
-	prof_dump_impl(tsd, buf_writer_cb, &prof_dump_buf_writer, tdata,
-	    leakcheck);
-	prof_dump_maps();
-	buf_writer_terminate(tsd_tsdn(tsd), &prof_dump_buf_writer);
-	prof_dump_close();
+	prof_dump_impl(tsd, buf_writer_cb, &buf_writer, tdata, leakcheck);
+	prof_dump_maps(&buf_writer);
+	buf_writer_terminate(tsd_tsdn(tsd), &buf_writer);
+	prof_dump_close(&arg);
 
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
 	post_reentrancy(tsd);
 
-	return prof_dump_error;
+	return arg.error;
 }
 
 /*

From f58ebdff7a82ed68f3bc007b0d10ed02ba3d065a Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 26 Jun 2020 14:56:17 -0700
Subject: [PATCH 1797/2608] Generalize prof_cnt_all() for testing

---
 include/jemalloc/internal/prof_data.h |  3 +-
 src/prof_data.c                       | 48 +++++----------------------
 test/unit/prof_tctx.c                 | 16 ++++-----
 3 files changed, 18 insertions(+), 49 deletions(-)

diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
index 039c2a8a..bf6e480e 100644
--- a/include/jemalloc/internal/prof_data.h
+++ b/include/jemalloc/internal/prof_data.h
@@ -30,7 +30,6 @@ size_t prof_tdata_count(void);
 size_t prof_bt_count(void);
 typedef void (prof_dump_header_t)(void *, const prof_cnt_t *);
 extern prof_dump_header_t *JET_MUTABLE prof_dump_header;
-void prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
-    uint64_t *accumbytes);
+void prof_cnt_all(prof_cnt_t *cnt_all);
 
 #endif /* JEMALLOC_INTERNAL_PROF_DATA_H */
diff --git a/src/prof_data.c b/src/prof_data.c
index 1d501406..ee022ccd 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -939,46 +939,16 @@ prof_dump_impl(tsd_t *tsd, write_cb_t *prof_dump_write, void *cbopaque,
 
 /* Used in unit tests. */
 void
-prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
-    uint64_t *accumbytes) {
-	tsd_t *tsd;
-	prof_tdata_t *tdata;
-	prof_cnt_t cnt_all;
-	size_t leak_ngctx;
-	prof_gctx_tree_t gctxs;
-
-	tsd = tsd_fetch();
-	tdata = prof_tdata_get(tsd, false);
+prof_cnt_all(prof_cnt_t *cnt_all) {
+	tsd_t *tsd = tsd_fetch();
+	prof_tdata_t *tdata = prof_tdata_get(tsd, false);
 	if (tdata == NULL) {
-		if (curobjs != NULL) {
-			*curobjs = 0;
-		}
-		if (curbytes != NULL) {
-			*curbytes = 0;
-		}
-		if (accumobjs != NULL) {
-			*accumobjs = 0;
-		}
-		if (accumbytes != NULL) {
-			*accumbytes = 0;
-		}
-		return;
-	}
-
-	prof_dump_prep(tsd, tdata, &cnt_all, &leak_ngctx, &gctxs);
-	prof_gctx_finish(tsd, &gctxs);
-
-	if (curobjs != NULL) {
-		*curobjs = cnt_all.curobjs;
-	}
-	if (curbytes != NULL) {
-		*curbytes = cnt_all.curbytes;
-	}
-	if (accumobjs != NULL) {
-		*accumobjs = cnt_all.accumobjs;
-	}
-	if (accumbytes != NULL) {
-		*accumbytes = cnt_all.accumbytes;
+		memset(cnt_all, 0, sizeof(prof_cnt_t));
+	} else {
+		size_t leak_ngctx;
+		prof_gctx_tree_t gctxs;
+		prof_dump_prep(tsd, tdata, cnt_all, &leak_ngctx, &gctxs);
+		prof_gctx_finish(tsd, &gctxs);
 	}
 }
 
diff --git a/test/unit/prof_tctx.c b/test/unit/prof_tctx.c
index 801e5f79..e0efdc36 100644
--- a/test/unit/prof_tctx.c
+++ b/test/unit/prof_tctx.c
@@ -7,21 +7,21 @@ TEST_BEGIN(test_prof_realloc) {
 	int flags;
 	void *p, *q;
 	prof_info_t prof_info_p, prof_info_q;
-	uint64_t curobjs_0, curobjs_1, curobjs_2, curobjs_3;
+	prof_cnt_t cnt_0, cnt_1, cnt_2, cnt_3;
 
 	test_skip_if(!config_prof);
 
 	tsd = tsd_fetch();
 	flags = MALLOCX_TCACHE_NONE;
 
-	prof_cnt_all(&curobjs_0, NULL, NULL, NULL);
+	prof_cnt_all(&cnt_0);
 	p = mallocx(1024, flags);
 	expect_ptr_not_null(p, "Unexpected mallocx() failure");
 	prof_info_get(tsd, p, NULL, &prof_info_p);
 	expect_ptr_ne(prof_info_p.alloc_tctx, (prof_tctx_t *)(uintptr_t)1U,
 	    "Expected valid tctx");
-	prof_cnt_all(&curobjs_1, NULL, NULL, NULL);
-	expect_u64_eq(curobjs_0 + 1, curobjs_1,
+	prof_cnt_all(&cnt_1);
+	expect_u64_eq(cnt_0.curobjs + 1, cnt_1.curobjs,
 	    "Allocation should have increased sample size");
 
 	q = rallocx(p, 2048, flags);
@@ -30,13 +30,13 @@ TEST_BEGIN(test_prof_realloc) {
 	prof_info_get(tsd, q, NULL, &prof_info_q);
 	expect_ptr_ne(prof_info_q.alloc_tctx, (prof_tctx_t *)(uintptr_t)1U,
 	    "Expected valid tctx");
-	prof_cnt_all(&curobjs_2, NULL, NULL, NULL);
-	expect_u64_eq(curobjs_1, curobjs_2,
+	prof_cnt_all(&cnt_2);
+	expect_u64_eq(cnt_1.curobjs, cnt_2.curobjs,
 	    "Reallocation should not have changed sample size");
 
 	dallocx(q, flags);
-	prof_cnt_all(&curobjs_3, NULL, NULL, NULL);
-	expect_u64_eq(curobjs_0, curobjs_3,
+	prof_cnt_all(&cnt_3);
+	expect_u64_eq(cnt_0.curobjs, cnt_3.curobjs,
 	    "Sample size should have returned to base level");
 }
 TEST_END

From c2e7a063923f43b66a58815ff85f9fcf1681cc76 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 26 Jun 2020 15:26:51 -0700
Subject: [PATCH 1798/2608] No need to intercept prof_dump_header() in tests

---
 include/jemalloc/internal/prof_data.h |  2 --
 src/prof_data.c                       |  4 +---
 test/unit/prof_reset.c                | 31 ++++++---------------------
 3 files changed, 7 insertions(+), 30 deletions(-)

diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
index bf6e480e..e2e4aedb 100644
--- a/include/jemalloc/internal/prof_data.h
+++ b/include/jemalloc/internal/prof_data.h
@@ -28,8 +28,6 @@ void prof_tctx_try_destroy(tsd_t *tsd, prof_tctx_t *tctx);
 /* Used in unit tests. */
 size_t prof_tdata_count(void);
 size_t prof_bt_count(void);
-typedef void (prof_dump_header_t)(void *, const prof_cnt_t *);
-extern prof_dump_header_t *JET_MUTABLE prof_dump_header;
 void prof_cnt_all(prof_cnt_t *cnt_all);
 
 #endif /* JEMALLOC_INTERNAL_PROF_DATA_H */
diff --git a/src/prof_data.c b/src/prof_data.c
index ee022ccd..6b441de1 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -792,8 +792,7 @@ prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
 }
 
 static void
-prof_dump_header_impl(void *opaque, const prof_cnt_t *cnt_all) {
-	prof_dump_iter_arg_t *arg = (prof_dump_iter_arg_t *)opaque;
+prof_dump_header(prof_dump_iter_arg_t *arg, const prof_cnt_t *cnt_all) {
 	prof_dump_printf(arg->prof_dump_write, arg->cbopaque,
 	    "heap_v2/%"FMTu64"\n  t*: ", ((uint64_t)1U << lg_prof_sample));
 	prof_dump_print_cnts(arg->prof_dump_write, arg->cbopaque, cnt_all);
@@ -803,7 +802,6 @@ prof_dump_header_impl(void *opaque, const prof_cnt_t *cnt_all) {
 	tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter, arg);
 	malloc_mutex_unlock(arg->tsdn, &tdatas_mtx);
 }
-prof_dump_header_t *JET_MUTABLE prof_dump_header = prof_dump_header_impl;
 
 static void
 prof_dump_gctx(prof_dump_iter_arg_t *arg, prof_gctx_t *gctx,
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
index 8c82e6d5..a0fb0389 100644
--- a/test/unit/prof_reset.c
+++ b/test/unit/prof_reset.c
@@ -84,45 +84,26 @@ TEST_BEGIN(test_prof_reset_basic) {
 }
 TEST_END
 
-bool prof_dump_header_intercepted = false;
-prof_cnt_t cnt_all_copy = {0, 0, 0, 0};
-static void
-prof_dump_header_intercept(void *opaque, const prof_cnt_t *cnt_all) {
-	prof_dump_header_intercepted = true;
-	memcpy(&cnt_all_copy, cnt_all, sizeof(prof_cnt_t));
-}
-
 TEST_BEGIN(test_prof_reset_cleanup) {
-	void *p;
-	prof_dump_header_t *prof_dump_header_orig;
-
 	test_skip_if(!config_prof);
 
 	set_prof_active(true);
 
 	expect_zu_eq(prof_bt_count(), 0, "Expected 0 backtraces");
-	p = mallocx(1, 0);
+	void *p = mallocx(1, 0);
 	expect_ptr_not_null(p, "Unexpected mallocx() failure");
 	expect_zu_eq(prof_bt_count(), 1, "Expected 1 backtrace");
 
-	prof_dump_header_orig = prof_dump_header;
-	prof_dump_header = prof_dump_header_intercept;
-	expect_false(prof_dump_header_intercepted, "Unexpected intercept");
-
-	expect_d_eq(mallctl("prof.dump", NULL, NULL, NULL, 0),
-	    0, "Unexpected error while dumping heap profile");
-	expect_true(prof_dump_header_intercepted, "Expected intercept");
-	expect_u64_eq(cnt_all_copy.curobjs, 1, "Expected 1 allocation");
+	prof_cnt_t cnt_all;
+	prof_cnt_all(&cnt_all);
+	expect_u64_eq(cnt_all.curobjs, 1, "Expected 1 allocation");
 
 	expect_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0), 0,
 	    "Unexpected error while resetting heap profile data");
-	expect_d_eq(mallctl("prof.dump", NULL, NULL, NULL, 0),
-	    0, "Unexpected error while dumping heap profile");
-	expect_u64_eq(cnt_all_copy.curobjs, 0, "Expected 0 allocations");
+	prof_cnt_all(&cnt_all);
+	expect_u64_eq(cnt_all.curobjs, 0, "Expected 0 allocations");
 	expect_zu_eq(prof_bt_count(), 1, "Expected 1 backtrace");
 
-	prof_dump_header = prof_dump_header_orig;
-
 	dallocx(p, 0);
 	expect_zu_eq(prof_bt_count(), 0, "Expected 0 backtraces");
 

From 00f06c9beb2509fba2133677c17ec702446b2102 Mon Sep 17 00:00:00 2001
From: David Carlier <devnexen@gmail.com>
Date: Sat, 4 Jul 2020 16:09:27 +0100
Subject: [PATCH 1799/2608] enabling mpss on solaris/illumos.

reusing slighty linux configuration as possible, aligning the
 address range to HUGEPAGE.
---
 configure.ac                                      |  8 ++++++++
 .../jemalloc/internal/jemalloc_internal_defs.h.in |  5 +++++
 include/jemalloc/internal/jemalloc_preamble.h.in  |  8 ++++++++
 src/jemalloc.c                                    |  2 +-
 src/pages.c                                       | 15 ++++++++++++---
 5 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/configure.ac b/configure.ac
index d9fdebd7..bcd63632 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1879,6 +1879,14 @@ if test "x$have__pthread_mutex_init_calloc_cb" = "x1" ; then
   wrap_syms="${wrap_syms} _malloc_prefork _malloc_postfork"
 fi
 
+AC_CHECK_FUNC([memcntl],
+	      [have_memcntl="1"],
+	      [have_memcntl="0"],
+	      )
+if test "x$have_memcntl" = "x1" ; then
+  AC_DEFINE([JEMALLOC_HAVE_MEMCNTL], [ ])
+fi
+
 dnl Disable lazy locking by default.
 AC_ARG_ENABLE([lazy_lock],
   [AS_HELP_STRING([--enable-lazy-lock],
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 83e733e3..0aef0bb3 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -301,6 +301,11 @@
  */
 #undef JEMALLOC_THP
 
+/*
+ * Defined if memcntl page admin call is supported
+ */
+#undef JEMALLOC_HAVE_MEMCNTL
+
 /* Define if operating system has alloca.h header. */
 #undef JEMALLOC_HAS_ALLOCA_H
 
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index 66302ab3..740fcfcb 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -217,4 +217,12 @@ static const bool config_high_res_timer =
 #endif
     ;
 
+static const bool have_memcntl =
+#ifdef JEMALLOC_HAVE_MEMCNTL
+    true
+#else
+    false
+#endif
+    ;
+
 #endif /* JEMALLOC_PREAMBLE_H */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index b468d821..9b5ce681 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1533,7 +1533,7 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				for (int i = 0; i < thp_mode_names_limit; i++) {
 					if (strncmp(thp_mode_names[i],v, vlen)
 					    == 0) {
-						if (!have_madvise_huge) {
+						if (!have_madvise_huge && !have_memcntl) {
 							CONF_ERROR(
 							    "No THP support",
 							    k, klen, v, vlen);
diff --git a/src/pages.c b/src/pages.c
index 9413d874..0ddc5ba0 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -363,8 +363,13 @@ pages_huge_impl(void *addr, size_t size, bool aligned) {
 		assert(HUGEPAGE_ADDR2BASE(addr) == addr);
 		assert(HUGEPAGE_CEILING(size) == size);
 	}
-#ifdef JEMALLOC_HAVE_MADVISE_HUGE
+#if defined(JEMALLOC_HAVE_MADVISE_HUGE)
 	return (madvise(addr, size, MADV_HUGEPAGE) != 0);
+#elif defined(JEMALLOC_HAVE_MEMCNTL)
+	struct memcntl_mha m = {0};
+	m.mha_cmd = MHA_MAPSIZE_VA;
+	m.mha_pagesize = HUGEPAGE;
+	return (memcntl(addr, size, MC_HAT_ADVISE, (caddr_t)&m, 0, 0) == 0);
 #else
 	return true;
 #endif
@@ -561,14 +566,14 @@ pages_set_thp_state (void *ptr, size_t size) {
 
 static void
 init_thp_state(void) {
-	if (!have_madvise_huge) {
+	if (!have_madvise_huge && !have_memcntl) {
 		if (metadata_thp_enabled() && opt_abort) {
 			malloc_write("<jemalloc>: no MADV_HUGEPAGE support\n");
 			abort();
 		}
 		goto label_error;
 	}
-
+#if defined(JEMALLOC_HAVE_MADVISE_HUGE)
 	static const char sys_state_madvise[] = "always [madvise] never\n";
 	static const char sys_state_always[] = "[always] madvise never\n";
 	static const char sys_state_never[] = "always madvise [never]\n";
@@ -608,6 +613,10 @@ init_thp_state(void) {
 		goto label_error;
 	}
 	return;
+#elif defined(JEMALLOC_HAVE_MEMCNTL)
+	init_system_thp_mode = thp_mode_default;
+	return;
+#endif
 label_error:
 	opt_thp = init_system_thp_mode = thp_mode_not_supported;
 }

From 129b72705833658d87886781347548e0261fcaeb Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 11 Jun 2020 15:16:38 -0700
Subject: [PATCH 1800/2608] Add typed-list module.

This gives some named convenience wrappers.
---
 include/jemalloc/internal/edata.h      | 42 ++---------------------
 include/jemalloc/internal/typed_list.h | 47 ++++++++++++++++++++++++++
 2 files changed, 50 insertions(+), 39 deletions(-)
 create mode 100644 include/jemalloc/internal/typed_list.h

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index ac8d647e..58bddd1a 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -10,6 +10,7 @@
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/slab_data.h"
 #include "jemalloc/internal/sz.h"
+#include "jemalloc/internal/typed_list.h"
 
 enum extent_state_e {
 	extent_state_active   = 0,
@@ -58,7 +59,6 @@ struct edata_map_info_s {
 
 /* Extent (span of pages).  Use accessor functions for e_* fields. */
 typedef struct edata_s edata_t;
-typedef ql_head(edata_t) edata_list_t;
 typedef ph(edata_t) edata_tree_t;
 typedef ph(edata_t) edata_heap_t;
 struct edata_s {
@@ -209,6 +209,8 @@ struct edata_s {
 	};
 };
 
+TYPED_LIST(edata_list, edata_t, ql_link)
+
 static inline unsigned
 edata_arena_ind_get(const edata_t *edata) {
 	unsigned arena_ind = (unsigned)((edata->e_bits &
@@ -531,7 +533,6 @@ edata_init(edata_t *edata, unsigned arena_ind, void *addr, size_t size,
 	edata_zeroed_set(edata, zeroed);
 	edata_committed_set(edata, committed);
 	edata_ranged_set(edata, ranged);
-	ql_elm_new(edata, ql_link);
 	edata_is_head_set(edata, is_head == EXTENT_IS_HEAD);
 	if (config_prof) {
 		edata_prof_tctx_set(edata, NULL);
@@ -552,43 +553,6 @@ edata_binit(edata_t *edata, void *addr, size_t bsize, size_t sn) {
 	edata_ranged_set(edata, false);
 }
 
-static inline void
-edata_list_init(edata_list_t *list) {
-	ql_new(list);
-}
-
-static inline edata_t *
-edata_list_first(const edata_list_t *list) {
-	return ql_first(list);
-}
-
-static inline edata_t *
-edata_list_last(const edata_list_t *list) {
-	return ql_last(list, ql_link);
-}
-
-static inline void
-edata_list_append(edata_list_t *list, edata_t *edata) {
-	ql_tail_insert(list, edata, ql_link);
-}
-
-static inline void
-edata_list_prepend(edata_list_t *list, edata_t *edata) {
-	ql_head_insert(list, edata, ql_link);
-}
-
-static inline void
-edata_list_replace(edata_list_t *list, edata_t *to_remove,
-    edata_t *to_insert) {
-	ql_after_insert(to_remove, to_insert, ql_link);
-	ql_remove(list, to_remove, ql_link);
-}
-
-static inline void
-edata_list_remove(edata_list_t *list, edata_t *edata) {
-	ql_remove(list, edata, ql_link);
-}
-
 static inline int
 edata_sn_comp(const edata_t *a, const edata_t *b) {
 	size_t a_sn = edata_sn_get(a);
diff --git a/include/jemalloc/internal/typed_list.h b/include/jemalloc/internal/typed_list.h
new file mode 100644
index 00000000..7ad2237f
--- /dev/null
+++ b/include/jemalloc/internal/typed_list.h
@@ -0,0 +1,47 @@
+#ifndef JEMALLOC_INTERNAL_TYPED_LIST_H
+#define JEMALLOC_INTERNAL_TYPED_LIST_H
+
+/*
+ * This wraps the ql module to implement a list class in a way that's a little
+ * bit easier to use; it handles ql_elm_new calls and provides type safety.
+ */
+
+#define TYPED_LIST(list_type, el_type, linkage)				\
+typedef struct {							\
+	ql_head(el_type) head;						\
+} list_type##_t;							\
+static inline void							\
+list_type##_init(list_type##_t *list) {					\
+	ql_new(&list->head);						\
+}									\
+static inline el_type *							\
+list_type##_first(const list_type##_t *list) {				\
+	return ql_first(&list->head);					\
+}									\
+static inline el_type *							\
+list_type##_last(const list_type##_t *list) {				\
+	return ql_last(&list->head, linkage);				\
+}									\
+static inline void							\
+list_type##_append(list_type##_t *list, el_type *item) {		\
+	ql_elm_new(item, linkage);					\
+	ql_tail_insert(&list->head, item, linkage);			\
+}									\
+static inline void							\
+list_type##_prepend(list_type##_t *list, el_type *item) {		\
+	ql_elm_new(item, linkage);					\
+	ql_head_insert(&list->head, item, linkage);			\
+}									\
+static inline void							\
+list_type##_replace(list_type##_t *list, el_type *to_remove,		\
+    el_type *to_insert) {						\
+	ql_elm_new(to_insert, linkage);					\
+	ql_after_insert(to_remove, to_insert, linkage);			\
+	ql_remove(&list->head, to_remove, linkage);			\
+}									\
+static inline void							\
+list_type##_remove(list_type##_t *list, el_type *item) {		\
+	ql_remove(&list->head, item, linkage);				\
+}
+
+#endif /* JEMALLOC_INTERNAL_TYPED_LIST_H */

From 392f645f4d850d2256443299183123258899bb3e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 11 Jun 2020 15:15:51 -0700
Subject: [PATCH 1801/2608] Edata: split up different list linkage uses.

---
 include/jemalloc/internal/arena_structs.h |  2 +-
 include/jemalloc/internal/bin.h           |  2 +-
 include/jemalloc/internal/edata.h         | 37 ++++++++++++++---------
 include/jemalloc/internal/edata_cache.h   |  2 +-
 include/jemalloc/internal/eset.h          |  2 +-
 src/arena.c                               | 14 ++++-----
 src/bin.c                                 |  2 +-
 src/edata_cache.c                         | 10 +++---
 src/eset.c                                |  6 ++--
 src/extent.c                              |  2 +-
 src/large.c                               |  6 ++--
 src/pa.c                                  | 17 ++++++-----
 12 files changed, 55 insertions(+), 47 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index 0c3f42f3..baa7031c 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -69,7 +69,7 @@ struct arena_s {
 	 *
 	 * Synchronization: large_mtx.
 	 */
-	edata_list_t		large;
+	edata_list_active_t	large;
 	/* Synchronizes all large allocation/update/deallocation. */
 	malloc_mutex_t		large_mtx;
 
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index 9a774e90..9241ee73 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -32,7 +32,7 @@ struct bin_s {
 	edata_heap_t		slabs_nonfull;
 
 	/* List used to track full slabs. */
-	edata_list_t		slabs_full;
+	edata_list_active_t	slabs_full;
 
 	/* Bin statistics. */
 	bin_stats_t	stats;
diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index 58bddd1a..fb0b489e 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -185,22 +185,28 @@ struct edata_s {
 		size_t			e_bsize;
 	};
 
-	/*
-	 * List linkage, used by a variety of lists:
-	 * - bin_t's slabs_full
-	 * - extents_t's LRU
-	 * - stashed dirty extents
-	 * - arena's large allocations
-	 */
-	ql_elm(edata_t) ql_link;
-
-	/*
-	 * Linkage for per size class sn/address-ordered heaps, and
-	 * for extent_avail
-	 */
-	phn(edata_t)		ph_link;
+	union {
+		/*
+		 * List linkage used when the edata_t is active; either in
+		 * arena's large allocations or bin_t's slabs_full.
+		 */
+		ql_elm(edata_t)	ql_link_active;
+		/*
+		 * Pairing heap linkage.  Used whenever the extent is inactive
+		 * (in the page allocators), or when it is active and in
+		 * slabs_nonfull, or when the edata_t is unassociated with an
+		 * extent and sitting in an edata_cache.
+		 */
+		phn(edata_t)	ph_link;
+	};
 
 	union {
+		/*
+		 * List linkage used when the extent is inactive:
+		 * - Stashed dirty extents
+		 * - Ecache LRU functionality.
+		 */
+		ql_elm(edata_t) ql_link_inactive;
 		/* Small region slab metadata. */
 		slab_data_t	e_slab_data;
 
@@ -209,7 +215,8 @@ struct edata_s {
 	};
 };
 
-TYPED_LIST(edata_list, edata_t, ql_link)
+TYPED_LIST(edata_list_active, edata_t, ql_link_active)
+TYPED_LIST(edata_list_inactive, edata_t, ql_link_inactive)
 
 static inline unsigned
 edata_arena_ind_get(const edata_t *edata) {
diff --git a/include/jemalloc/internal/edata_cache.h b/include/jemalloc/internal/edata_cache.h
index 620360d1..02685c87 100644
--- a/include/jemalloc/internal/edata_cache.h
+++ b/include/jemalloc/internal/edata_cache.h
@@ -27,7 +27,7 @@ void edata_cache_postfork_child(tsdn_t *tsdn, edata_cache_t *edata_cache);
 
 typedef struct edata_cache_small_s edata_cache_small_t;
 struct edata_cache_small_s {
-	edata_list_t list;
+	edata_list_inactive_t list;
 	size_t count;
 	edata_cache_t *fallback;
 };
diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index e29179d1..d260bc13 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -25,7 +25,7 @@ struct eset_s {
 	bitmap_t bitmap[BITMAP_GROUPS(SC_NPSIZES + 1)];
 
 	/* LRU of all extents in heaps. */
-	edata_list_t lru;
+	edata_list_inactive_t lru;
 
 	/* Page sum for all extents in heaps. */
 	atomic_zu_t npages;
diff --git a/src/arena.c b/src/arena.c
index 2a3af5c6..573dde99 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -594,7 +594,7 @@ arena_bin_slabs_full_insert(arena_t *arena, bin_t *bin, edata_t *slab) {
 	if (arena_is_auto(arena)) {
 		return;
 	}
-	edata_list_append(&bin->slabs_full, slab);
+	edata_list_active_append(&bin->slabs_full, slab);
 }
 
 static void
@@ -602,7 +602,7 @@ arena_bin_slabs_full_remove(arena_t *arena, bin_t *bin, edata_t *slab) {
 	if (arena_is_auto(arena)) {
 		return;
 	}
-	edata_list_remove(&bin->slabs_full, slab);
+	edata_list_active_remove(&bin->slabs_full, slab);
 }
 
 static void
@@ -622,8 +622,8 @@ arena_bin_reset(tsd_t *tsd, arena_t *arena, bin_t *bin) {
 		arena_slab_dalloc(tsd_tsdn(tsd), arena, slab);
 		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 	}
-	for (slab = edata_list_first(&bin->slabs_full); slab != NULL;
-	     slab = edata_list_first(&bin->slabs_full)) {
+	for (slab = edata_list_active_first(&bin->slabs_full); slab != NULL;
+	     slab = edata_list_active_first(&bin->slabs_full)) {
 		arena_bin_slabs_full_remove(arena, bin, slab);
 		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 		arena_slab_dalloc(tsd_tsdn(tsd), arena, slab);
@@ -655,8 +655,8 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 	/* Large allocations. */
 	malloc_mutex_lock(tsd_tsdn(tsd), &arena->large_mtx);
 
-	for (edata_t *edata = edata_list_first(&arena->large); edata !=
-	    NULL; edata = edata_list_first(&arena->large)) {
+	for (edata_t *edata = edata_list_active_first(&arena->large);
+	    edata != NULL; edata = edata_list_active_first(&arena->large)) {
 		void *ptr = edata_base_get(edata);
 		size_t usize;
 
@@ -1465,7 +1465,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	atomic_store_u(&arena->dss_prec, (unsigned)extent_dss_prec_get(),
 	    ATOMIC_RELAXED);
 
-	edata_list_init(&arena->large);
+	edata_list_active_init(&arena->large);
 	if (malloc_mutex_init(&arena->large_mtx, "arena_large",
 	    WITNESS_RANK_ARENA_LARGE, malloc_mutex_rank_exclusive)) {
 		goto label_error;
diff --git a/src/bin.c b/src/bin.c
index 52de9ff3..fa204587 100644
--- a/src/bin.c
+++ b/src/bin.c
@@ -46,7 +46,7 @@ bin_init(bin_t *bin) {
 	}
 	bin->slabcur = NULL;
 	edata_heap_new(&bin->slabs_nonfull);
-	edata_list_init(&bin->slabs_full);
+	edata_list_active_init(&bin->slabs_full);
 	if (config_stats) {
 		memset(&bin->stats, 0, sizeof(bin_stats_t));
 	}
diff --git a/src/edata_cache.c b/src/edata_cache.c
index 4601f33c..d899ce80 100644
--- a/src/edata_cache.c
+++ b/src/edata_cache.c
@@ -59,7 +59,7 @@ edata_cache_postfork_child(tsdn_t *tsdn, edata_cache_t *edata_cache) {
 
 void
 edata_cache_small_init(edata_cache_small_t *ecs, edata_cache_t *fallback) {
-	edata_list_init(&ecs->list);
+	edata_list_inactive_init(&ecs->list);
 	ecs->count = 0;
 	ecs->fallback = fallback;
 }
@@ -67,9 +67,9 @@ edata_cache_small_init(edata_cache_small_t *ecs, edata_cache_t *fallback) {
 edata_t *
 edata_cache_small_get(edata_cache_small_t *ecs) {
 	assert(ecs->count > 0);
-	edata_t *edata = edata_list_first(&ecs->list);
+	edata_t *edata = edata_list_inactive_first(&ecs->list);
 	assert(edata != NULL);
-	edata_list_remove(&ecs->list, edata);
+	edata_list_inactive_remove(&ecs->list, edata);
 	ecs->count--;
 	return edata;
 }
@@ -77,7 +77,7 @@ edata_cache_small_get(edata_cache_small_t *ecs) {
 void
 edata_cache_small_put(edata_cache_small_t *ecs, edata_t *edata) {
 	assert(edata != NULL);
-	edata_list_append(&ecs->list, edata);
+	edata_list_inactive_append(&ecs->list, edata);
 	ecs->count++;
 }
 
@@ -93,7 +93,7 @@ bool edata_cache_small_prepare(tsdn_t *tsdn, edata_cache_small_t *ecs,
 		if (edata == NULL) {
 			return true;
 		}
-		ql_elm_new(edata, ql_link);
+		ql_elm_new(edata, ql_link_inactive);
 		edata_cache_small_put(ecs, edata);
 	}
 	return false;
diff --git a/src/eset.c b/src/eset.c
index c4e39d25..c9af80e1 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -12,7 +12,7 @@ eset_init(eset_t *eset, extent_state_t state) {
 		edata_heap_new(&eset->heaps[i]);
 	}
 	bitmap_init(eset->bitmap, &eset_bitmap_info, true);
-	edata_list_init(&eset->lru);
+	edata_list_inactive_init(&eset->lru);
 	atomic_store_zu(&eset->npages, 0, ATOMIC_RELAXED);
 	eset->state = state;
 }
@@ -65,7 +65,7 @@ eset_insert(eset_t *eset, edata_t *edata) {
 		eset_stats_add(eset, pind, size);
 	}
 
-	edata_list_append(&eset->lru, edata);
+	edata_list_inactive_append(&eset->lru, edata);
 	size_t npages = size >> LG_PAGE;
 	/*
 	 * All modifications to npages hold the mutex (as asserted above), so we
@@ -95,7 +95,7 @@ eset_remove(eset_t *eset, edata_t *edata) {
 		bitmap_set(eset->bitmap, &eset_bitmap_info,
 		    (size_t)pind);
 	}
-	edata_list_remove(&eset->lru, edata);
+	edata_list_inactive_remove(&eset->lru, edata);
 	size_t npages = size >> LG_PAGE;
 	/*
 	 * As in eset_insert, we hold eset->mtx and so don't need atomic
diff --git a/src/extent.c b/src/extent.c
index 073f8065..d6349c31 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -139,7 +139,7 @@ ecache_evict(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	edata_t *edata;
 	while (true) {
 		/* Get the LRU extent, if any. */
-		edata = edata_list_first(&ecache->eset.lru);
+		edata = edata_list_inactive_first(&ecache->eset.lru);
 		if (edata == NULL) {
 			goto label_return;
 		}
diff --git a/src/large.c b/src/large.c
index 3ea08be4..42d2fd7d 100644
--- a/src/large.c
+++ b/src/large.c
@@ -43,7 +43,7 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	if (!arena_is_auto(arena)) {
 		/* Insert edata into large. */
 		malloc_mutex_lock(tsdn, &arena->large_mtx);
-		edata_list_append(&arena->large, edata);
+		edata_list_active_append(&arena->large, edata);
 		malloc_mutex_unlock(tsdn, &arena->large_mtx);
 	}
 
@@ -225,14 +225,14 @@ large_dalloc_prep_impl(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 		/* See comments in arena_bin_slabs_full_insert(). */
 		if (!arena_is_auto(arena)) {
 			malloc_mutex_lock(tsdn, &arena->large_mtx);
-			edata_list_remove(&arena->large, edata);
+			edata_list_active_remove(&arena->large, edata);
 			malloc_mutex_unlock(tsdn, &arena->large_mtx);
 		}
 	} else {
 		/* Only hold the large_mtx if necessary. */
 		if (!arena_is_auto(arena)) {
 			malloc_mutex_assert_owner(tsdn, &arena->large_mtx);
-			edata_list_remove(&arena->large, edata);
+			edata_list_active_remove(&arena->large, edata);
 		}
 	}
 	arena_extent_dalloc_large_prep(tsdn, arena, edata);
diff --git a/src/pa.c b/src/pa.c
index a7fe70fb..50c64b4f 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -239,7 +239,8 @@ pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
 
 static size_t
 pa_stash_decayed(tsdn_t *tsdn, pa_shard_t *shard, ecache_t *ecache,
-    size_t npages_limit, size_t npages_decay_max, edata_list_t *result) {
+    size_t npages_limit, size_t npages_decay_max,
+    edata_list_inactive_t *result) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
@@ -252,7 +253,7 @@ pa_stash_decayed(tsdn_t *tsdn, pa_shard_t *shard, ecache_t *ecache,
 		if (edata == NULL) {
 			break;
 		}
-		edata_list_append(result, edata);
+		edata_list_inactive_append(result, edata);
 		nstashed += edata_size_get(edata) >> LG_PAGE;
 	}
 	return nstashed;
@@ -261,7 +262,7 @@ pa_stash_decayed(tsdn_t *tsdn, pa_shard_t *shard, ecache_t *ecache,
 static size_t
 pa_decay_stashed(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
     pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
-    edata_list_t *decay_extents) {
+    edata_list_inactive_t *decay_extents) {
 	bool err;
 
 	size_t nmadvise = 0;
@@ -272,9 +273,9 @@ pa_decay_stashed(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 
 	bool try_muzzy = !fully_decay && pa_shard_may_have_muzzy(shard);
 
-	for (edata_t *edata = edata_list_first(decay_extents); edata !=
-	    NULL; edata = edata_list_first(decay_extents)) {
-		edata_list_remove(decay_extents, edata);
+	for (edata_t *edata = edata_list_inactive_first(decay_extents);
+	    edata != NULL; edata = edata_list_inactive_first(decay_extents)) {
+		edata_list_inactive_remove(decay_extents, edata);
 
 		size_t size = edata_size_get(edata);
 		size_t npages = size >> LG_PAGE;
@@ -342,8 +343,8 @@ pa_decay_to_limit(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 	decay->purging = true;
 	malloc_mutex_unlock(tsdn, &decay->mtx);
 
-	edata_list_t decay_extents;
-	edata_list_init(&decay_extents);
+	edata_list_inactive_t decay_extents;
+	edata_list_inactive_init(&decay_extents);
 	size_t npurge = pa_stash_decayed(tsdn, shard, ecache, npages_limit,
 	    npages_decay_max, &decay_extents);
 	if (npurge != 0) {

From ae541d3fabd679c97326e81b652fa3979e734404 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 11 Jun 2020 17:16:10 -0700
Subject: [PATCH 1802/2608] Edata: Reserve some space for hugepages.

---
 include/jemalloc/internal/edata.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index fb0b489e..bb7da1d5 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -185,6 +185,18 @@ struct edata_s {
 		size_t			e_bsize;
 	};
 
+	/*
+	 * Reserved for hugepages -- once that allocator is more settled, we
+	 * might be able to claw some of this back.  Until then, don't get any
+	 * funny ideas about using the space we just freed up to keep some other
+	 * bit of metadata around.  That kind of thinking can be hazardous to
+	 * your health.
+	 *
+	 * This keeps the size of an edata_t at exactly 128 bytes on
+	 * architectures with 8-byte pointers and 4k pages.
+	 */
+	void *reserved1, *reserved2;
+
 	union {
 		/*
 		 * List linkage used when the edata_t is active; either in

From f1f4ec315a1831612f6d66b62be55a323fa94312 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 8 Jul 2020 15:50:23 -0700
Subject: [PATCH 1803/2608] Tcache: Tweak nslots_max tuning parameter.

In making these settings configurable, 634afc4124100b5ff11e892481d912d56099be1a
unintentially changed a tuning parameter (reducing the "goal" max by a factor of
4).  This commit undoes that change.
---
 src/tcache.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index b73fd0dd..a33d9e24 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -25,12 +25,12 @@ unsigned opt_tcache_nslots_large = 20;
 /*
  * We attempt to make the number of slots in a tcache bin for a given size class
  * equal to the number of objects in a slab times some multiplier.  By default,
- * the multiplier is 1/2 (i.e. we set the maximum number of objects in the
- * tcache to half the number of objects in a slab).
+ * the multiplier is 2 (i.e. we set the maximum number of objects in the tcache
+ * to twice the number of objects in a slab).
  * This is bounded by some other constraints as well, like the fact that it
  * must be even, must be less than opt_tcache_nslots_small_max, etc..
  */
-ssize_t	opt_lg_tcache_nslots_mul = -1;
+ssize_t	opt_lg_tcache_nslots_mul = 1;
 
 /*
  * Number of allocation bytes between tcache incremental GCs.  Again, this

From 3cf19c6e5e8b49c3bbf84bbfeb9ab49b38f0546c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 29 May 2020 13:21:41 -0700
Subject: [PATCH 1804/2608] atomic: add atomic_load_sub_store

---
 include/jemalloc/internal/atomic.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h
index e5afb202..c0f73122 100644
--- a/include/jemalloc/internal/atomic.h
+++ b/include/jemalloc/internal/atomic.h
@@ -63,6 +63,13 @@
 	    type oldval = atomic_load_##short_type(a, ATOMIC_RELAXED);	\
 	    type newval = oldval + inc;					\
 	    atomic_store_##short_type(a, newval, ATOMIC_RELAXED);	\
+	}								\
+    ATOMIC_INLINE void							\
+    atomic_load_sub_store_##short_type(atomic_##short_type##_t *a,	\
+	type inc) {							\
+	    type oldval = atomic_load_##short_type(a, ATOMIC_RELAXED);	\
+	    type newval = oldval - inc;					\
+	    atomic_store_##short_type(a, newval, ATOMIC_RELAXED);	\
 	}
 
 /*

From 1b5f632e0fbb28d162fbf70d1032434787269f1a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 29 May 2020 15:02:19 -0700
Subject: [PATCH 1805/2608] Introduce PAI: Page allocator interface

---
 include/jemalloc/internal/pa.h  |   8 ++
 include/jemalloc/internal/pai.h |  45 ++++++++++++
 src/pa.c                        | 125 ++++++++++++++++++++++++--------
 3 files changed, 146 insertions(+), 32 deletions(-)
 create mode 100644 include/jemalloc/internal/pai.h

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 172c549f..83fcc4dc 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -7,6 +7,7 @@
 #include "jemalloc/internal/edata_cache.h"
 #include "jemalloc/internal/emap.h"
 #include "jemalloc/internal/lockedint.h"
+#include "jemalloc/internal/pai.h"
 
 enum pa_decay_purge_setting_e {
 	PA_DECAY_PURGE_ALWAYS,
@@ -110,6 +111,13 @@ struct pa_shard_s {
 	 */
 	atomic_zu_t nactive;
 
+	/*
+	 * An interface for page allocation from the ecache framework (i.e. a
+	 * cascade of ecache_dirty, ecache_muzzy, ecache_retained).  Right now
+	 * this is the *only* pai, but we'll soon grow another.
+	 */
+	pai_t ecache_pai;
+
 	/*
 	 * Collections of extents that were previously allocated.  These are
 	 * used when allocating extents, in an attempt to re-use address space.
diff --git a/include/jemalloc/internal/pai.h b/include/jemalloc/internal/pai.h
new file mode 100644
index 00000000..45edd69c
--- /dev/null
+++ b/include/jemalloc/internal/pai.h
@@ -0,0 +1,45 @@
+#ifndef JEMALLOC_INTERNAL_PAI_H
+#define JEMALLOC_INTERNAL_PAI_H
+
+/* An interface for page allocation. */
+
+typedef struct pai_s pai_t;
+struct pai_s {
+	/* Returns NULL on failure. */
+	edata_t *(*alloc)(tsdn_t *tsdn, pai_t *self, size_t size,
+	    size_t alignment, bool zero);
+	bool (*expand)(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+	    size_t old_size, size_t new_size, bool zero);
+	bool (*shrink)(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+	    size_t old_size, size_t new_size);
+	void (*dalloc)(tsdn_t *tsdn, pai_t *self, edata_t *edata);
+};
+
+/*
+ * These are just simple convenience functions to avoid having to reference the
+ * same pai_t twice on every invocation.
+ */
+
+static inline edata_t *
+pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {
+	return self->alloc(tsdn, self, size, alignment, zero);
+}
+
+static inline bool
+pai_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
+    size_t new_size, bool zero) {
+	return self->expand(tsdn, self, edata, old_size, new_size, zero);
+}
+
+static inline bool
+pai_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
+    size_t new_size) {
+	return self->shrink(tsdn, self, edata, old_size, new_size);
+}
+
+static inline void
+pai_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
+	self->dalloc(tsdn, self, edata);
+}
+
+#endif /* JEMALLOC_INTERNAL_PAI_H */
diff --git a/src/pa.c b/src/pa.c
index 50c64b4f..7a0052e8 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -1,6 +1,14 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+static edata_t *ecache_pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
+    size_t alignment, bool zero);
+static bool ecache_pai_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    size_t old_size, size_t new_size, bool zero);
+static bool ecache_pai_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    size_t old_size, size_t new_size);
+static void ecache_pai_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata);
+
 static void
 pa_nactive_add(pa_shard_t *shard, size_t add_pages) {
 	atomic_fetch_add_zu(&shard->nactive, add_pages, ATOMIC_RELAXED);
@@ -71,6 +79,11 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 	shard->emap = emap;
 	shard->base = base;
 
+	shard->ecache_pai.alloc = &ecache_pai_alloc;
+	shard->ecache_pai.expand = &ecache_pai_expand;
+	shard->ecache_pai.shrink = &ecache_pai_shrink;
+	shard->ecache_pai.dalloc = &ecache_pai_dalloc;
+
 	return false;
 }
 
@@ -110,13 +123,11 @@ pa_shard_may_have_muzzy(pa_shard_t *shard) {
 	return pa_shard_muzzy_decay_ms_get(shard) != 0;
 }
 
-edata_t *
-pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
-    bool slab, szind_t szind, bool zero) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
-	size_t mapped_add = 0;
+static edata_t *
+ecache_pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
+    bool zero) {
+	pa_shard_t *shard =
+	    (pa_shard_t *)((uintptr_t)self - offsetof(pa_shard_t, ecache_pai));
 
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
 	edata_t *edata = ecache_alloc(tsdn, shard, ehooks,
@@ -129,14 +140,25 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 	if (edata == NULL) {
 		edata = ecache_alloc_grow(tsdn, shard, ehooks,
 		    &shard->ecache_retained, NULL, size, alignment, zero);
-		mapped_add = size;
+		if (config_stats && edata != NULL) {
+			atomic_fetch_add_zu(&shard->stats->pa_mapped, size,
+			    ATOMIC_RELAXED);
+		}
 	}
+	return edata;
+}
+
+edata_t *
+pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
+    bool slab, szind_t szind, bool zero) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	edata_t *edata = pai_alloc(tsdn, &shard->ecache_pai, size, alignment,
+	    zero);
+
 	if (edata != NULL) {
 		pa_nactive_add(shard, size >> LG_PAGE);
-		if (config_stats && mapped_add > 0) {
-			atomic_fetch_add_zu(&shard->stats->pa_mapped,
-			    mapped_add, ATOMIC_RELAXED);
-		}
 		emap_remap(tsdn, shard->emap, edata, szind, slab);
 		edata_szind_set(edata, szind);
 		edata_slab_set(edata, slab);
@@ -147,18 +169,17 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 	return edata;
 }
 
-bool
-pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
-    size_t new_size, szind_t szind, bool zero) {
-	assert(new_size > old_size);
-	assert(edata_size_get(edata) == old_size);
-	assert((new_size & PAGE_MASK) == 0);
+static bool
+ecache_pai_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
+    size_t new_size, bool zero) {
+	pa_shard_t *shard =
+	    (pa_shard_t *)((uintptr_t)self - offsetof(pa_shard_t, ecache_pai));
 
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
 	void *trail_begin = edata_past_get(edata);
-	size_t expand_amount = new_size - old_size;
 
 	size_t mapped_add = 0;
+	size_t expand_amount = new_size - old_size;
 
 	if (ehooks_merge_will_fail(ehooks)) {
 		return true;
@@ -186,12 +207,53 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 		atomic_fetch_add_zu(&shard->stats->pa_mapped, mapped_add,
 		    ATOMIC_RELAXED);
 	}
+	return false;
+}
+
+bool
+pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
+    size_t new_size, szind_t szind, bool zero) {
+	assert(new_size > old_size);
+	assert(edata_size_get(edata) == old_size);
+	assert((new_size & PAGE_MASK) == 0);
+
+	size_t expand_amount = new_size - old_size;
+
+	bool error = pai_expand(tsdn, &shard->ecache_pai, edata, old_size,
+	    new_size, zero);
+	if (error) {
+		return true;
+	}
+
 	pa_nactive_add(shard, expand_amount >> LG_PAGE);
 	edata_szind_set(edata, szind);
 	emap_remap(tsdn, shard->emap, edata, szind, /* slab */ false);
 	return false;
 }
 
+static bool
+ecache_pai_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
+    size_t new_size) {
+	pa_shard_t *shard =
+	    (pa_shard_t *)((uintptr_t)self - offsetof(pa_shard_t, ecache_pai));
+
+	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
+	size_t shrink_amount = old_size - new_size;
+
+
+	if (ehooks_split_will_fail(ehooks)) {
+		return true;
+	}
+
+	edata_t *trail = extent_split_wrapper(tsdn, shard, ehooks, edata,
+	    new_size, shrink_amount);
+	if (trail == NULL) {
+		return true;
+	}
+	ecache_dalloc(tsdn, shard, ehooks, &shard->ecache_dirty, trail);
+	return false;
+}
+
 bool
 pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
     size_t new_size, szind_t szind, bool *generated_dirty) {
@@ -200,21 +262,13 @@ pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	assert((new_size & PAGE_MASK) == 0);
 	size_t shrink_amount = old_size - new_size;
 
-	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
 	*generated_dirty = false;
-
-	if (ehooks_split_will_fail(ehooks)) {
-		return true;
-	}
-
-	edata_t *trail = extent_split_wrapper(tsdn, shard, ehooks, edata,
-	    new_size, shrink_amount);
-	if (trail == NULL) {
+	bool error = pai_shrink(tsdn, &shard->ecache_pai, edata, old_size,
+	    new_size);
+	if (error) {
 		return true;
 	}
 	pa_nactive_sub(shard, shrink_amount >> LG_PAGE);
-
-	ecache_dalloc(tsdn, shard, ehooks, &shard->ecache_dirty, trail);
 	*generated_dirty = true;
 
 	edata_szind_set(edata, szind);
@@ -222,6 +276,14 @@ pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	return false;
 }
 
+static void
+ecache_pai_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
+	pa_shard_t *shard =
+	    (pa_shard_t *)((uintptr_t)self - offsetof(pa_shard_t, ecache_pai));
+	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
+	ecache_dalloc(tsdn, shard, ehooks, &shard->ecache_dirty, edata);
+}
+
 void
 pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
     bool *generated_dirty) {
@@ -232,8 +294,7 @@ pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
 	}
 	edata_szind_set(edata, SC_NSIZES);
 	pa_nactive_sub(shard, edata_size_get(edata) >> LG_PAGE);
-	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
-	ecache_dalloc(tsdn, shard, ehooks, &shard->ecache_dirty, edata);
+	pai_dalloc(tsdn, &shard->ecache_pai, edata);
 	*generated_dirty = true;
 }
 

From 777b0ba9655f6b40b19a8a9c485c186ce9adb551 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 29 May 2020 16:57:31 -0700
Subject: [PATCH 1806/2608] Add PAC: Page allocator classic.

For now, this is just a stub containing the ecaches, with no surrounding code
changed.  Eventually all the core allocator bits will be moved in, in the
subsequent stack of commits.
---
 include/jemalloc/internal/pa.h  | 27 ++++++++------------
 include/jemalloc/internal/pac.h | 25 ++++++++++++++++++
 src/arena.c                     | 16 ++++++------
 src/background_thread.c         |  8 +++---
 src/ctl.c                       |  6 ++---
 src/extent.c                    | 22 ++++++++--------
 src/pa.c                        | 39 ++++++++++++++--------------
 src/pa_extra.c                  | 45 +++++++++++++++++----------------
 test/unit/pa.c                  |  2 +-
 9 files changed, 106 insertions(+), 84 deletions(-)
 create mode 100644 include/jemalloc/internal/pac.h

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 83fcc4dc..d7f22637 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -7,8 +7,17 @@
 #include "jemalloc/internal/edata_cache.h"
 #include "jemalloc/internal/emap.h"
 #include "jemalloc/internal/lockedint.h"
+#include "jemalloc/internal/pac.h"
 #include "jemalloc/internal/pai.h"
 
+/*
+ * The page allocator; responsible for acquiring pages of memory for
+ * allocations.  It picks the implementation of the page allocator interface
+ * (i.e. a pai_t) to handle a given page-level allocation request.  For now, the
+ * only such implementation is the PAC code ("page allocator classic"), but
+ * others will be coming soon.
+ */
+
 enum pa_decay_purge_setting_e {
 	PA_DECAY_PURGE_ALWAYS,
 	PA_DECAY_PURGE_NEVER,
@@ -16,11 +25,6 @@ enum pa_decay_purge_setting_e {
 };
 typedef enum pa_decay_purge_setting_e pa_decay_purge_setting_t;
 
-/*
- * The page allocator; responsible for acquiring pages of memory for
- * allocations.
- */
-
 typedef struct pa_shard_decay_stats_s pa_shard_decay_stats_t;
 struct pa_shard_decay_stats_s {
 	/* Total number of purge sweeps. */
@@ -117,16 +121,7 @@ struct pa_shard_s {
 	 * this is the *only* pai, but we'll soon grow another.
 	 */
 	pai_t ecache_pai;
-
-	/*
-	 * Collections of extents that were previously allocated.  These are
-	 * used when allocating extents, in an attempt to re-use address space.
-	 *
-	 * Synchronization: internal.
-	 */
-	ecache_t ecache_dirty;
-	ecache_t ecache_muzzy;
-	ecache_t ecache_retained;
+	pac_t pac;
 
 	/* The source of edata_t objects. */
 	edata_cache_t edata_cache;
@@ -167,7 +162,7 @@ pa_shard_muzzy_decay_ms_get(pa_shard_t *shard) {
 
 static inline bool
 pa_shard_dont_decay_muzzy(pa_shard_t *shard) {
-	return ecache_npages_get(&shard->ecache_muzzy) == 0 &&
+	return ecache_npages_get(&shard->pac.ecache_muzzy) == 0 &&
 	    pa_shard_muzzy_decay_ms_get(shard) <= 0;
 }
 
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
new file mode 100644
index 00000000..73e672f6
--- /dev/null
+++ b/include/jemalloc/internal/pac.h
@@ -0,0 +1,25 @@
+#ifndef JEMALLOC_INTERNAL_PAC_H
+#define JEMALLOC_INTERNAL_PAC_H
+
+/*
+ * Page allocator classic; an implementation of the PAI interface that:
+ * - Can be used for arenas with custom extent hooks.
+ * - Can always satisfy any allocation request (including highly-fragmentary
+ *   ones).
+ * - Can use efficient OS-level zeroing primitives for demand-filled pages.
+ */
+
+typedef struct pac_s pac_t;
+struct pac_s {
+	/*
+	 * Collections of extents that were previously allocated.  These are
+	 * used when allocating extents, in an attempt to re-use address space.
+	 *
+	 * Synchronization: internal.
+	 */
+	ecache_t ecache_dirty;
+	ecache_t ecache_muzzy;
+	ecache_t ecache_retained;
+};
+
+#endif /* JEMALLOC_INTERNAL_PAC_H */
diff --git a/src/arena.c b/src/arena.c
index 573dde99..fb9cb7b5 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -454,16 +454,16 @@ bool
 arena_dirty_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
     ssize_t decay_ms) {
 	return arena_decay_ms_set(tsdn, arena, &arena->pa_shard.decay_dirty,
-	    &arena->pa_shard.stats->decay_dirty, &arena->pa_shard.ecache_dirty,
-	    decay_ms);
+	    &arena->pa_shard.stats->decay_dirty,
+	    &arena->pa_shard.pac.ecache_dirty, decay_ms);
 }
 
 bool
 arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
     ssize_t decay_ms) {
 	return arena_decay_ms_set(tsdn, arena, &arena->pa_shard.decay_muzzy,
-	    &arena->pa_shard.stats->decay_muzzy, &arena->pa_shard.ecache_muzzy,
-	    decay_ms);
+	    &arena->pa_shard.stats->decay_muzzy,
+	    &arena->pa_shard.pac.ecache_muzzy, decay_ms);
 }
 
 static bool
@@ -521,8 +521,8 @@ static bool
 arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
     bool all) {
 	return arena_decay_impl(tsdn, arena, &arena->pa_shard.decay_dirty,
-	    &arena->pa_shard.stats->decay_dirty, &arena->pa_shard.ecache_dirty,
-	    is_background_thread, all);
+	    &arena->pa_shard.stats->decay_dirty,
+	    &arena->pa_shard.pac.ecache_dirty, is_background_thread, all);
 }
 
 static bool
@@ -532,8 +532,8 @@ arena_decay_muzzy(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
 		return false;
 	}
 	return arena_decay_impl(tsdn, arena, &arena->pa_shard.decay_muzzy,
-	    &arena->pa_shard.stats->decay_muzzy, &arena->pa_shard.ecache_muzzy,
-	    is_background_thread, all);
+	    &arena->pa_shard.stats->decay_muzzy,
+	    &arena->pa_shard.pac.ecache_muzzy, is_background_thread, all);
 }
 
 void
diff --git a/src/background_thread.c b/src/background_thread.c
index db11405e..557dbc41 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -201,12 +201,12 @@ static uint64_t
 arena_decay_compute_purge_interval(tsdn_t *tsdn, arena_t *arena) {
 	uint64_t i1, i2;
 	i1 = arena_decay_compute_purge_interval_impl(tsdn,
-	    &arena->pa_shard.decay_dirty, &arena->pa_shard.ecache_dirty);
+	    &arena->pa_shard.decay_dirty, &arena->pa_shard.pac.ecache_dirty);
 	if (i1 == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
 		return i1;
 	}
 	i2 = arena_decay_compute_purge_interval_impl(tsdn,
-	    &arena->pa_shard.decay_muzzy, &arena->pa_shard.ecache_muzzy);
+	    &arena->pa_shard.decay_muzzy, &arena->pa_shard.pac.ecache_muzzy);
 
 	return i1 < i2 ? i1 : i2;
 }
@@ -716,8 +716,8 @@ background_thread_interval_check(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	if (info->npages_to_purge_new > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
 		should_signal = true;
 	} else if (unlikely(background_thread_indefinite_sleep(info)) &&
-	    (ecache_npages_get(&arena->pa_shard.ecache_dirty) > 0 ||
-	    ecache_npages_get(&arena->pa_shard.ecache_muzzy) > 0 ||
+	    (ecache_npages_get(&arena->pa_shard.pac.ecache_dirty) > 0 ||
+	    ecache_npages_get(&arena->pa_shard.pac.ecache_muzzy) > 0 ||
 	    info->npages_to_purge_new > 0)) {
 		should_signal = true;
 	} else {
diff --git a/src/ctl.c b/src/ctl.c
index 8b9f42ec..0098d931 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3127,9 +3127,9 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
 		}
 		MUTEX_PROF_RESET(arena->large_mtx);
 		MUTEX_PROF_RESET(arena->pa_shard.edata_cache.mtx);
-		MUTEX_PROF_RESET(arena->pa_shard.ecache_dirty.mtx);
-		MUTEX_PROF_RESET(arena->pa_shard.ecache_muzzy.mtx);
-		MUTEX_PROF_RESET(arena->pa_shard.ecache_retained.mtx);
+		MUTEX_PROF_RESET(arena->pa_shard.pac.ecache_dirty.mtx);
+		MUTEX_PROF_RESET(arena->pa_shard.pac.ecache_muzzy.mtx);
+		MUTEX_PROF_RESET(arena->pa_shard.pac.ecache_retained.mtx);
 		MUTEX_PROF_RESET(arena->pa_shard.decay_dirty.mtx);
 		MUTEX_PROF_RESET(arena->pa_shard.decay_muzzy.mtx);
 		MUTEX_PROF_RESET(arena->tcache_ql_mtx);
diff --git a/src/extent.c b/src/extent.c
index d6349c31..3d827b8c 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -647,7 +647,7 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		goto label_err;
 	}
 
-	edata_init(edata, ecache_ind_get(&shard->ecache_retained), ptr,
+	edata_init(edata, ecache_ind_get(&shard->pac.ecache_retained), ptr,
 	    alloc_size, false, SC_NSIZES, pa_shard_extent_sn_next(shard),
 	    extent_state_active, zeroed, committed, /* ranged */ false,
 	    EXTENT_IS_HEAD);
@@ -673,11 +673,11 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
 			extent_record(tsdn, shard, ehooks,
-			    &shard->ecache_retained, lead, true);
+			    &shard->pac.ecache_retained, lead, true);
 		}
 		if (trail != NULL) {
 			extent_record(tsdn, shard, ehooks,
-			    &shard->ecache_retained, trail, true);
+			    &shard->pac.ecache_retained, trail, true);
 		}
 	} else {
 		/*
@@ -690,12 +690,12 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 				extent_gdump_add(tsdn, to_salvage);
 			}
 			extent_record(tsdn, shard, ehooks,
-			    &shard->ecache_retained, to_salvage, true);
+			    &shard->pac.ecache_retained, to_salvage, true);
 		}
 		if (to_leak != NULL) {
 			extent_deregister_no_gdump_sub(tsdn, shard, to_leak);
 			extents_abandon_vm(tsdn, shard, ehooks,
-			    &shard->ecache_retained, to_leak, true);
+			    &shard->pac.ecache_retained, to_leak, true);
 		}
 		goto label_err;
 	}
@@ -704,7 +704,7 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		if (extent_commit_impl(tsdn, ehooks, edata, 0,
 		    edata_size_get(edata), true)) {
 			extent_record(tsdn, shard, ehooks,
-			    &shard->ecache_retained, edata, true);
+			    &shard->pac.ecache_retained, edata, true);
 			goto label_err;
 		}
 		/* A successful commit should return zeroed memory. */
@@ -756,8 +756,8 @@ extent_alloc_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	malloc_mutex_lock(tsdn, &shard->ecache_grow.mtx);
 
 	edata_t *edata = extent_recycle(tsdn, shard, ehooks,
-	    &shard->ecache_retained, new_addr, size, alignment, zero, commit,
-	    /* growing_retained */ true);
+	    &shard->pac.ecache_retained, new_addr, size, alignment, zero,
+	    commit, /* growing_retained */ true);
 	if (edata != NULL) {
 		malloc_mutex_unlock(tsdn, &shard->ecache_grow.mtx);
 		if (config_prof) {
@@ -792,7 +792,7 @@ extent_alloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		edata_cache_put(tsdn, &shard->edata_cache, edata);
 		return NULL;
 	}
-	edata_init(edata, ecache_ind_get(&shard->ecache_dirty), addr,
+	edata_init(edata, ecache_ind_get(&shard->pac.ecache_dirty), addr,
 	    size, /* slab */ false, SC_NSIZES, pa_shard_extent_sn_next(shard),
 	    extent_state_active, zero, *commit, /* ranged */ false,
 	    EXTENT_NOT_HEAD);
@@ -972,7 +972,7 @@ extent_record(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		edata = extent_try_coalesce(tsdn, shard,  ehooks, ecache, edata,
 		    NULL, growing_retained);
 	} else if (edata_size_get(edata) >= SC_LARGE_MINCLASS) {
-		assert(ecache == &shard->ecache_dirty);
+		assert(ecache == &shard->pac.ecache_dirty);
 		/* Always coalesce large extents eagerly. */
 		bool coalesced;
 		do {
@@ -1072,7 +1072,7 @@ extent_dalloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		extent_gdump_sub(tsdn, edata);
 	}
 
-	extent_record(tsdn, shard, ehooks, &shard->ecache_retained, edata,
+	extent_record(tsdn, shard, ehooks, &shard->pac.ecache_retained, edata,
 	    false);
 }
 
diff --git a/src/pa.c b/src/pa.c
index 7a0052e8..27fc9ee2 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -32,7 +32,7 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 	 * are likely to be reused soon after deallocation, and the cost of
 	 * merging/splitting extents is non-trivial.
 	 */
-	if (ecache_init(tsdn, &shard->ecache_dirty, extent_state_dirty, ind,
+	if (ecache_init(tsdn, &shard->pac.ecache_dirty, extent_state_dirty, ind,
 	    /* delay_coalesce */ true)) {
 		return true;
 	}
@@ -40,7 +40,7 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 	 * Coalesce muzzy extents immediately, because operations on them are in
 	 * the critical path much less often than for dirty extents.
 	 */
-	if (ecache_init(tsdn, &shard->ecache_muzzy, extent_state_muzzy, ind,
+	if (ecache_init(tsdn, &shard->pac.ecache_muzzy, extent_state_muzzy, ind,
 	    /* delay_coalesce */ false)) {
 		return true;
 	}
@@ -50,7 +50,7 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 	 * coalescing), but also because operations on retained extents are not
 	 * in the critical path.
 	 */
-	if (ecache_init(tsdn, &shard->ecache_retained, extent_state_retained,
+	if (ecache_init(tsdn, &shard->pac.ecache_retained, extent_state_retained,
 	    ind, /* delay_coalesce */ false)) {
 		return true;
 	}
@@ -94,8 +94,8 @@ pa_shard_reset(pa_shard_t *shard) {
 
 void
 pa_shard_destroy_retained(tsdn_t *tsdn, pa_shard_t *shard) {
-	assert(ecache_npages_get(&shard->ecache_dirty) == 0);
-	assert(ecache_npages_get(&shard->ecache_muzzy) == 0);
+	assert(ecache_npages_get(&shard->pac.ecache_dirty) == 0);
+	assert(ecache_npages_get(&shard->pac.ecache_muzzy) == 0);
 	/*
 	 * Iterate over the retained extents and destroy them.  This gives the
 	 * extent allocator underlying the extent hooks an opportunity to unmap
@@ -108,7 +108,7 @@ pa_shard_destroy_retained(tsdn_t *tsdn, pa_shard_t *shard) {
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
 	edata_t *edata;
 	while ((edata = ecache_evict(tsdn, shard, ehooks,
-	    &shard->ecache_retained, 0)) != NULL) {
+	    &shard->pac.ecache_retained, 0)) != NULL) {
 		extent_destroy_wrapper(tsdn, shard, ehooks, edata);
 	}
 }
@@ -131,15 +131,15 @@ ecache_pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
 
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
 	edata_t *edata = ecache_alloc(tsdn, shard, ehooks,
-	    &shard->ecache_dirty, NULL, size, alignment, zero);
+	    &shard->pac.ecache_dirty, NULL, size, alignment, zero);
 
 	if (edata == NULL && pa_shard_may_have_muzzy(shard)) {
-		edata = ecache_alloc(tsdn, shard, ehooks, &shard->ecache_muzzy,
-		    NULL, size, alignment, zero);
+		edata = ecache_alloc(tsdn, shard, ehooks,
+		    &shard->pac.ecache_muzzy, NULL, size, alignment, zero);
 	}
 	if (edata == NULL) {
 		edata = ecache_alloc_grow(tsdn, shard, ehooks,
-		    &shard->ecache_retained, NULL, size, alignment, zero);
+		    &shard->pac.ecache_retained, NULL, size, alignment, zero);
 		if (config_stats && edata != NULL) {
 			atomic_fetch_add_zu(&shard->stats->pa_mapped, size,
 			    ATOMIC_RELAXED);
@@ -184,16 +184,17 @@ ecache_pai_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 	if (ehooks_merge_will_fail(ehooks)) {
 		return true;
 	}
-	edata_t *trail = ecache_alloc(tsdn, shard, ehooks, &shard->ecache_dirty,
-	    trail_begin, expand_amount, PAGE, zero);
+	edata_t *trail = ecache_alloc(tsdn, shard, ehooks,
+	    &shard->pac.ecache_dirty, trail_begin, expand_amount, PAGE, zero);
 	if (trail == NULL) {
-		trail = ecache_alloc(tsdn, shard, ehooks, &shard->ecache_muzzy,
-		    trail_begin, expand_amount, PAGE, zero);
+		trail = ecache_alloc(tsdn, shard, ehooks,
+		    &shard->pac.ecache_muzzy, trail_begin, expand_amount, PAGE,
+		    zero);
 	}
 	if (trail == NULL) {
 		trail = ecache_alloc_grow(tsdn, shard, ehooks,
-		    &shard->ecache_retained, trail_begin, expand_amount, PAGE,
-		    zero);
+		    &shard->pac.ecache_retained, trail_begin, expand_amount,
+		    PAGE, zero);
 		mapped_add = expand_amount;
 	}
 	if (trail == NULL) {
@@ -250,7 +251,7 @@ ecache_pai_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 	if (trail == NULL) {
 		return true;
 	}
-	ecache_dalloc(tsdn, shard, ehooks, &shard->ecache_dirty, trail);
+	ecache_dalloc(tsdn, shard, ehooks, &shard->pac.ecache_dirty, trail);
 	return false;
 }
 
@@ -281,7 +282,7 @@ ecache_pai_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	pa_shard_t *shard =
 	    (pa_shard_t *)((uintptr_t)self - offsetof(pa_shard_t, ecache_pai));
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
-	ecache_dalloc(tsdn, shard, ehooks, &shard->ecache_dirty, edata);
+	ecache_dalloc(tsdn, shard, ehooks, &shard->pac.ecache_dirty, edata);
 }
 
 void
@@ -353,7 +354,7 @@ pa_decay_stashed(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 				    edata, /* offset */ 0, size);
 				if (!err) {
 					ecache_dalloc(tsdn, shard, ehooks,
-					    &shard->ecache_muzzy, edata);
+					    &shard->pac.ecache_muzzy, edata);
 					break;
 				}
 			}
diff --git a/src/pa_extra.c b/src/pa_extra.c
index 1f90f7f7..70ef19b6 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -21,9 +21,9 @@ pa_shard_prefork2(tsdn_t *tsdn, pa_shard_t *shard) {
 
 void
 pa_shard_prefork3(tsdn_t *tsdn, pa_shard_t *shard) {
-	ecache_prefork(tsdn, &shard->ecache_dirty);
-	ecache_prefork(tsdn, &shard->ecache_muzzy);
-	ecache_prefork(tsdn, &shard->ecache_retained);
+	ecache_prefork(tsdn, &shard->pac.ecache_dirty);
+	ecache_prefork(tsdn, &shard->pac.ecache_muzzy);
+	ecache_prefork(tsdn, &shard->pac.ecache_retained);
 }
 
 
@@ -35,9 +35,9 @@ pa_shard_prefork4(tsdn_t *tsdn, pa_shard_t *shard) {
 void
 pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard) {
 	edata_cache_postfork_parent(tsdn, &shard->edata_cache);
-	ecache_postfork_parent(tsdn, &shard->ecache_dirty);
-	ecache_postfork_parent(tsdn, &shard->ecache_muzzy);
-	ecache_postfork_parent(tsdn, &shard->ecache_retained);
+	ecache_postfork_parent(tsdn, &shard->pac.ecache_dirty);
+	ecache_postfork_parent(tsdn, &shard->pac.ecache_muzzy);
+	ecache_postfork_parent(tsdn, &shard->pac.ecache_retained);
 	ecache_grow_postfork_parent(tsdn, &shard->ecache_grow);
 	malloc_mutex_postfork_parent(tsdn, &shard->decay_dirty.mtx);
 	malloc_mutex_postfork_parent(tsdn, &shard->decay_muzzy.mtx);
@@ -46,9 +46,9 @@ pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard) {
 void
 pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard) {
 	edata_cache_postfork_child(tsdn, &shard->edata_cache);
-	ecache_postfork_child(tsdn, &shard->ecache_dirty);
-	ecache_postfork_child(tsdn, &shard->ecache_muzzy);
-	ecache_postfork_child(tsdn, &shard->ecache_retained);
+	ecache_postfork_child(tsdn, &shard->pac.ecache_dirty);
+	ecache_postfork_child(tsdn, &shard->pac.ecache_muzzy);
+	ecache_postfork_child(tsdn, &shard->pac.ecache_retained);
 	ecache_grow_postfork_child(tsdn, &shard->ecache_grow);
 	malloc_mutex_postfork_child(tsdn, &shard->decay_dirty.mtx);
 	malloc_mutex_postfork_child(tsdn, &shard->decay_muzzy.mtx);
@@ -58,8 +58,8 @@ void
 pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive, size_t *ndirty,
     size_t *nmuzzy) {
 	*nactive += atomic_load_zu(&shard->nactive, ATOMIC_RELAXED);
-	*ndirty += ecache_npages_get(&shard->ecache_dirty);
-	*nmuzzy += ecache_npages_get(&shard->ecache_muzzy);
+	*ndirty += ecache_npages_get(&shard->pac.ecache_dirty);
+	*nmuzzy += ecache_npages_get(&shard->pac.ecache_muzzy);
 }
 
 void
@@ -69,13 +69,13 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
 	cassert(config_stats);
 
 	shard_stats_out->retained +=
-	    ecache_npages_get(&shard->ecache_retained) << LG_PAGE;
+	    ecache_npages_get(&shard->pac.ecache_retained) << LG_PAGE;
 	shard_stats_out->edata_avail += atomic_load_zu(
 	    &shard->edata_cache.count, ATOMIC_RELAXED);
 
 	size_t resident_pgs = 0;
 	resident_pgs += atomic_load_zu(&shard->nactive, ATOMIC_RELAXED);
-	resident_pgs += ecache_npages_get(&shard->ecache_dirty);
+	resident_pgs += ecache_npages_get(&shard->pac.ecache_dirty);
 	*resident += (resident_pgs << LG_PAGE);
 
 	/* Dirty decay stats */
@@ -112,12 +112,13 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
 	for (pszind_t i = 0; i < SC_NPSIZES; i++) {
 		size_t dirty, muzzy, retained, dirty_bytes, muzzy_bytes,
 		    retained_bytes;
-		dirty = ecache_nextents_get(&shard->ecache_dirty, i);
-		muzzy = ecache_nextents_get(&shard->ecache_muzzy, i);
-		retained = ecache_nextents_get(&shard->ecache_retained, i);
-		dirty_bytes = ecache_nbytes_get(&shard->ecache_dirty, i);
-		muzzy_bytes = ecache_nbytes_get(&shard->ecache_muzzy, i);
-		retained_bytes = ecache_nbytes_get(&shard->ecache_retained, i);
+		dirty = ecache_nextents_get(&shard->pac.ecache_dirty, i);
+		muzzy = ecache_nextents_get(&shard->pac.ecache_muzzy, i);
+		retained = ecache_nextents_get(&shard->pac.ecache_retained, i);
+		dirty_bytes = ecache_nbytes_get(&shard->pac.ecache_dirty, i);
+		muzzy_bytes = ecache_nbytes_get(&shard->pac.ecache_muzzy, i);
+		retained_bytes = ecache_nbytes_get(&shard->pac.ecache_retained,
+		    i);
 
 		extent_stats_out[i].ndirty = dirty;
 		extent_stats_out[i].nmuzzy = muzzy;
@@ -142,11 +143,11 @@ pa_shard_mtx_stats_read(tsdn_t *tsdn, pa_shard_t *shard,
 	pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
 	    &shard->edata_cache.mtx, arena_prof_mutex_extent_avail);
 	pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
-	    &shard->ecache_dirty.mtx, arena_prof_mutex_extents_dirty);
+	    &shard->pac.ecache_dirty.mtx, arena_prof_mutex_extents_dirty);
 	pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
-	    &shard->ecache_muzzy.mtx, arena_prof_mutex_extents_muzzy);
+	    &shard->pac.ecache_muzzy.mtx, arena_prof_mutex_extents_muzzy);
 	pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
-	    &shard->ecache_retained.mtx, arena_prof_mutex_extents_retained);
+	    &shard->pac.ecache_retained.mtx, arena_prof_mutex_extents_retained);
 	pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
 	    &shard->decay_dirty.mtx, arena_prof_mutex_decay_dirty);
 	pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
diff --git a/test/unit/pa.c b/test/unit/pa.c
index f7b72902..8846f614 100644
--- a/test/unit/pa.c
+++ b/test/unit/pa.c
@@ -90,7 +90,7 @@ do_alloc_free_purge(void *arg) {
 		pa_decay_all(TSDN_NULL, &test_data->shard,
 		    &test_data->shard.decay_dirty,
 		    &test_data->stats.decay_dirty,
-		    &test_data->shard.ecache_dirty, true);
+		    &test_data->shard.pac.ecache_dirty, true);
 		malloc_mutex_unlock(TSDN_NULL,
 		    &test_data->shard.decay_dirty.mtx);
 	}

From 722652222a159c10f616d61b6dc145d07f84e025 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 29 May 2020 17:14:16 -0700
Subject: [PATCH 1807/2608] PAC: Move in edata_cache accesses.

---
 include/jemalloc/internal/pac.h |  2 ++
 src/extent.c                    | 26 +++++++++++++-------------
 src/pa.c                        |  1 +
 src/pa_extra.c                  |  1 -
 4 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index 73e672f6..bd1c8566 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -20,6 +20,8 @@ struct pac_s {
 	ecache_t ecache_dirty;
 	ecache_t ecache_muzzy;
 	ecache_t ecache_retained;
+
+	edata_cache_t *edata_cache;
 };
 
 #endif /* JEMALLOC_INTERNAL_PAC_H */
diff --git a/src/extent.c b/src/extent.c
index 3d827b8c..4810a614 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -210,7 +210,7 @@ extents_abandon_vm(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 			    edata_size_get(edata), growing_retained);
 		}
 	}
-	edata_cache_put(tsdn, &shard->edata_cache, edata);
+	edata_cache_put(tsdn, shard->pac.edata_cache, edata);
 }
 
 static void
@@ -632,7 +632,7 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		    shard->ecache_grow.next + egn_skip);
 	}
 
-	edata_t *edata = edata_cache_get(tsdn, &shard->edata_cache);
+	edata_t *edata = edata_cache_get(tsdn, shard->pac.edata_cache);
 	if (edata == NULL) {
 		goto label_err;
 	}
@@ -643,7 +643,7 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	    &committed);
 
 	if (ptr == NULL) {
-		edata_cache_put(tsdn, &shard->edata_cache, edata);
+		edata_cache_put(tsdn, shard->pac.edata_cache, edata);
 		goto label_err;
 	}
 
@@ -653,7 +653,7 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	    EXTENT_IS_HEAD);
 
 	if (extent_register_no_gdump_add(tsdn, shard, edata)) {
-		edata_cache_put(tsdn, &shard->edata_cache, edata);
+		edata_cache_put(tsdn, shard->pac.edata_cache, edata);
 		goto label_err;
 	}
 
@@ -781,7 +781,7 @@ extent_alloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	edata_t *edata = edata_cache_get(tsdn, &shard->edata_cache);
+	edata_t *edata = edata_cache_get(tsdn, shard->pac.edata_cache);
 	if (edata == NULL) {
 		return NULL;
 	}
@@ -789,7 +789,7 @@ extent_alloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	void *addr = ehooks_alloc(tsdn, ehooks, new_addr, size, palignment,
 	    &zero, commit);
 	if (addr == NULL) {
-		edata_cache_put(tsdn, &shard->edata_cache, edata);
+		edata_cache_put(tsdn, shard->pac.edata_cache, edata);
 		return NULL;
 	}
 	edata_init(edata, ecache_ind_get(&shard->pac.ecache_dirty), addr,
@@ -797,7 +797,7 @@ extent_alloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	    extent_state_active, zero, *commit, /* ranged */ false,
 	    EXTENT_NOT_HEAD);
 	if (extent_register(tsdn, shard, edata)) {
-		edata_cache_put(tsdn, &shard->edata_cache, edata);
+		edata_cache_put(tsdn, shard->pac.edata_cache, edata);
 		return NULL;
 	}
 
@@ -1000,7 +1000,7 @@ extent_dalloc_gap(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	    WITNESS_RANK_CORE, 0);
 
 	if (extent_register(tsdn, shard, edata)) {
-		edata_cache_put(tsdn, &shard->edata_cache, edata);
+		edata_cache_put(tsdn, shard->pac.edata_cache, edata);
 		return;
 	}
 	extent_dalloc_wrapper(tsdn, shard, ehooks, edata);
@@ -1023,7 +1023,7 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	    edata_size_get(edata), edata_committed_get(edata));
 
 	if (!err) {
-		edata_cache_put(tsdn, &shard->edata_cache, edata);
+		edata_cache_put(tsdn, shard->pac.edata_cache, edata);
 	}
 
 	return err;
@@ -1093,7 +1093,7 @@ extent_destroy_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	ehooks_destroy(tsdn, ehooks, edata_base_get(edata),
 	    edata_size_get(edata), edata_committed_get(edata));
 
-	edata_cache_put(tsdn, &shard->edata_cache, edata);
+	edata_cache_put(tsdn, shard->pac.edata_cache, edata);
 }
 
 static bool
@@ -1177,7 +1177,7 @@ extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		return NULL;
 	}
 
-	edata_t *trail = edata_cache_get(tsdn, &shard->edata_cache);
+	edata_t *trail = edata_cache_get(tsdn, shard->pac.edata_cache);
 	if (trail == NULL) {
 		goto label_error_a;
 	}
@@ -1214,7 +1214,7 @@ extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 label_error_c:
 	emap_unlock_edata2(tsdn, shard->emap, edata, trail);
 label_error_b:
-	edata_cache_put(tsdn, &shard->edata_cache, trail);
+	edata_cache_put(tsdn, shard->pac.edata_cache, trail);
 label_error_a:
 	return NULL;
 }
@@ -1262,7 +1262,7 @@ extent_merge_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks, edata_t *a,
 	emap_merge_commit(tsdn, shard->emap, &prepare, a, b);
 	emap_unlock_edata2(tsdn, shard->emap, a, b);
 
-	edata_cache_put(tsdn, &shard->edata_cache, b);
+	edata_cache_put(tsdn, shard->pac.edata_cache, b);
 
 	return false;
 }
diff --git a/src/pa.c b/src/pa.c
index 27fc9ee2..f37337de 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -57,6 +57,7 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 	if (edata_cache_init(&shard->edata_cache, base)) {
 		return true;
 	}
+	shard->pac.edata_cache = &shard->edata_cache;
 
 	if (ecache_grow_init(tsdn, &shard->ecache_grow)) {
 		return true;
diff --git a/src/pa_extra.c b/src/pa_extra.c
index 70ef19b6..caa94d82 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -26,7 +26,6 @@ pa_shard_prefork3(tsdn_t *tsdn, pa_shard_t *shard) {
 	ecache_prefork(tsdn, &shard->pac.ecache_retained);
 }
 
-
 void
 pa_shard_prefork4(tsdn_t *tsdn, pa_shard_t *shard) {
 	edata_cache_prefork(tsdn, &shard->edata_cache);

From 7efcb946c4707f12728e38f82fae1344591b9757 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 29 May 2020 17:32:37 -0700
Subject: [PATCH 1808/2608] PAC: Add an init function.

---
 Makefile.in                                   |  1 +
 include/jemalloc/internal/pac.h               |  3 ++
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |  1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |  3 ++
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |  1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |  3 ++
 src/pa.c                                      | 33 ++--------------
 src/pac.c                                     | 39 +++++++++++++++++++
 8 files changed, 54 insertions(+), 30 deletions(-)
 create mode 100644 src/pac.c

diff --git a/Makefile.in b/Makefile.in
index 7f07d967..2802f7f2 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -127,6 +127,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/nstime.c \
 	$(srcroot)src/pa.c \
 	$(srcroot)src/pa_extra.c \
+	$(srcroot)src/pac.c \
 	$(srcroot)src/pages.c \
 	$(srcroot)src/peak_event.c \
 	$(srcroot)src/prof.c \
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index bd1c8566..5eb1e80e 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -24,4 +24,7 @@ struct pac_s {
 	edata_cache_t *edata_cache;
 };
 
+bool pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind,
+    edata_cache_t *edata_cache);
+
 #endif /* JEMALLOC_INTERNAL_PAC_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 00ea2beb..fe147790 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -68,6 +68,7 @@
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pa.c" />
     <ClCompile Include="..\..\..\..\src\pa_extra.c" />
+    <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 0bcb45a8..4b7b6baf 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -88,6 +88,9 @@
     <ClCompile Include="..\..\..\..\src\pa_extra.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pac.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 446ea606..6bd43c78 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -68,6 +68,7 @@
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pa.c" />
     <ClCompile Include="..\..\..\..\src\pa_extra.c" />
+    <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 0bcb45a8..4b7b6baf 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -88,6 +88,9 @@
     <ClCompile Include="..\..\..\..\src\pa_extra.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pac.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/pa.c b/src/pa.c
index f37337de..f8fa9222 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -26,39 +26,12 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
     nstime_t *cur_time, ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 	/* This will change eventually, but for now it should hold. */
 	assert(base_ind_get(base) == ind);
-	/*
-	 * Delay coalescing for dirty extents despite the disruptive effect on
-	 * memory layout for best-fit extent allocation, since cached extents
-	 * are likely to be reused soon after deallocation, and the cost of
-	 * merging/splitting extents is non-trivial.
-	 */
-	if (ecache_init(tsdn, &shard->pac.ecache_dirty, extent_state_dirty, ind,
-	    /* delay_coalesce */ true)) {
-		return true;
-	}
-	/*
-	 * Coalesce muzzy extents immediately, because operations on them are in
-	 * the critical path much less often than for dirty extents.
-	 */
-	if (ecache_init(tsdn, &shard->pac.ecache_muzzy, extent_state_muzzy, ind,
-	    /* delay_coalesce */ false)) {
-		return true;
-	}
-	/*
-	 * Coalesce retained extents immediately, in part because they will
-	 * never be evicted (and therefore there's no opportunity for delayed
-	 * coalescing), but also because operations on retained extents are not
-	 * in the critical path.
-	 */
-	if (ecache_init(tsdn, &shard->pac.ecache_retained, extent_state_retained,
-	    ind, /* delay_coalesce */ false)) {
-		return true;
-	}
 	if (edata_cache_init(&shard->edata_cache, base)) {
 		return true;
 	}
-	shard->pac.edata_cache = &shard->edata_cache;
-
+	if (pac_init(tsdn, &shard->pac, ind, &shard->edata_cache)) {
+		return true;
+	}
 	if (ecache_grow_init(tsdn, &shard->ecache_grow)) {
 		return true;
 	}
diff --git a/src/pac.c b/src/pac.c
new file mode 100644
index 00000000..746bd4c8
--- /dev/null
+++ b/src/pac.c
@@ -0,0 +1,39 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/pac.h"
+
+bool
+pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind, edata_cache_t *edata_cache) {
+	/*
+	 * Delay coalescing for dirty extents despite the disruptive effect on
+	 * memory layout for best-fit extent allocation, since cached extents
+	 * are likely to be reused soon after deallocation, and the cost of
+	 * merging/splitting extents is non-trivial.
+	 */
+	if (ecache_init(tsdn, &pac->ecache_dirty, extent_state_dirty, ind,
+	    /* delay_coalesce */ true)) {
+		return true;
+	}
+	/*
+	 * Coalesce muzzy extents immediately, because operations on them are in
+	 * the critical path much less often than for dirty extents.
+	 */
+	if (ecache_init(tsdn, &pac->ecache_muzzy, extent_state_muzzy, ind,
+	    /* delay_coalesce */ false)) {
+		return true;
+	}
+	/*
+	 * Coalesce retained extents immediately, in part because they will
+	 * never be evicted (and therefore there's no opportunity for delayed
+	 * coalescing), but also because operations on retained extents are not
+	 * in the critical path.
+	 */
+	if (ecache_init(tsdn, &pac->ecache_retained, extent_state_retained,
+	    ind, /* delay_coalesce */ false)) {
+		return true;
+	}
+
+	pac->edata_cache = edata_cache;
+	return false;
+}

From 65803171a7f441f567b5d7e3809df22bda871d62 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 29 May 2020 17:47:04 -0700
Subject: [PATCH 1809/2608] PAC: move in emap

---
 include/jemalloc/internal/pac.h |  3 +-
 src/extent.c                    | 50 ++++++++++++++++-----------------
 src/pa.c                        |  2 +-
 src/pac.c                       |  4 ++-
 4 files changed, 31 insertions(+), 28 deletions(-)

diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index 5eb1e80e..8a89b6d0 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -21,10 +21,11 @@ struct pac_s {
 	ecache_t ecache_muzzy;
 	ecache_t ecache_retained;
 
+	emap_t *emap;
 	edata_cache_t *edata_cache;
 };
 
-bool pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind,
+bool pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind, emap_t *emap,
     edata_cache_t *edata_cache);
 
 #endif /* JEMALLOC_INTERNAL_PAC_H */
diff --git a/src/extent.c b/src/extent.c
index 4810a614..269bc7c1 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -282,15 +282,15 @@ extent_register_impl(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
 	 * We need to hold the lock to protect against a concurrent coalesce
 	 * operation that sees us in a partial state.
 	 */
-	emap_lock_edata(tsdn, shard->emap, edata);
+	emap_lock_edata(tsdn, shard->pac.emap, edata);
 
-	if (emap_register_boundary(tsdn, shard->emap, edata, SC_NSIZES,
+	if (emap_register_boundary(tsdn, shard->pac.emap, edata, SC_NSIZES,
 	    /* slab */ false)) {
-		emap_unlock_edata(tsdn, shard->emap, edata);
+		emap_unlock_edata(tsdn, shard->pac.emap, edata);
 		return true;
 	}
 
-	emap_unlock_edata(tsdn, shard->emap, edata);
+	emap_unlock_edata(tsdn, shard->pac.emap, edata);
 
 	if (config_prof && gdump_add) {
 		extent_gdump_add(tsdn, edata);
@@ -321,9 +321,9 @@ extent_reregister(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata) {
 static void
 extent_deregister_impl(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
     bool gdump) {
-	emap_lock_edata(tsdn, shard->emap, edata);
-	emap_deregister_boundary(tsdn, shard->emap, edata);
-	emap_unlock_edata(tsdn, shard->emap, edata);
+	emap_lock_edata(tsdn, shard->pac.emap, edata);
+	emap_deregister_boundary(tsdn, shard->pac.emap, edata);
+	emap_unlock_edata(tsdn, shard->pac.emap, edata);
 
 	if (config_prof && gdump) {
 		extent_gdump_sub(tsdn, edata);
@@ -371,8 +371,8 @@ extent_recycle_extract(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 	edata_t *edata;
 	if (new_addr != NULL) {
-		edata = emap_lock_edata_from_addr(tsdn, shard->emap, new_addr,
-		    false);
+		edata = emap_lock_edata_from_addr(tsdn, shard->pac.emap,
+		    new_addr, false);
 		if (edata != NULL) {
 			/*
 			 * We might null-out edata to report an error, but we
@@ -386,7 +386,7 @@ extent_recycle_extract(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 			    != ecache->state) {
 				edata = NULL;
 			}
-			emap_unlock_edata(tsdn, shard->emap, unlock_edata);
+			emap_unlock_edata(tsdn, shard->pac.emap, unlock_edata);
 		}
 	} else {
 		/*
@@ -545,7 +545,7 @@ extent_recycle_split(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 			extent_deregister_no_gdump_sub(tsdn, shard, to_leak);
 			extents_abandon_vm(tsdn, shard, ehooks, ecache, to_leak,
 			    growing_retained);
-			assert(emap_lock_edata_from_addr(tsdn, shard->emap,
+			assert(emap_lock_edata_from_addr(tsdn, shard->pac.emap,
 			    leak, false) == NULL);
 		}
 		return NULL;
@@ -863,7 +863,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		again = false;
 
 		/* Try to coalesce forward. */
-		edata_t *next = emap_lock_edata_from_addr(tsdn, shard->emap,
+		edata_t *next = emap_lock_edata_from_addr(tsdn, shard->pac.emap,
 		    edata_past_get(edata), inactive_only);
 		if (next != NULL) {
 			/*
@@ -874,7 +874,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 			bool can_coalesce = extent_can_coalesce(ecache,
 			    edata, next);
 
-			emap_unlock_edata(tsdn, shard->emap, next);
+			emap_unlock_edata(tsdn, shard->pac.emap, next);
 
 			if (can_coalesce && !extent_coalesce(tsdn, shard,
 			    ehooks, ecache, edata, next, true,
@@ -889,12 +889,12 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		}
 
 		/* Try to coalesce backward. */
-		edata_t *prev = emap_lock_edata_from_addr(tsdn, shard->emap,
+		edata_t *prev = emap_lock_edata_from_addr(tsdn, shard->pac.emap,
 		    edata_before_get(edata), inactive_only);
 		if (prev != NULL) {
 			bool can_coalesce = extent_can_coalesce(ecache, edata,
 			    prev);
-			emap_unlock_edata(tsdn, shard->emap, prev);
+			emap_unlock_edata(tsdn, shard->pac.emap, prev);
 
 			if (can_coalesce && !extent_coalesce(tsdn, shard,
 			    ehooks, ecache, edata, prev, false,
@@ -966,7 +966,7 @@ extent_record(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 
-	emap_assert_mapped(tsdn, shard->emap, edata);
+	emap_assert_mapped(tsdn, shard->pac.emap, edata);
 
 	if (!ecache->delay_coalesce) {
 		edata = extent_try_coalesce(tsdn, shard,  ehooks, ecache, edata,
@@ -1189,13 +1189,13 @@ extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	    edata_committed_get(edata), edata_ranged_get(edata),
 	    EXTENT_NOT_HEAD);
 	emap_prepare_t prepare;
-	bool err = emap_split_prepare(tsdn, shard->emap, &prepare, edata,
+	bool err = emap_split_prepare(tsdn, shard->pac.emap, &prepare, edata,
 	    size_a, trail, size_b);
 	if (err) {
 		goto label_error_b;
 	}
 
-	emap_lock_edata2(tsdn, shard->emap, edata, trail);
+	emap_lock_edata2(tsdn, shard->pac.emap, edata, trail);
 
 	err = ehooks_split(tsdn, ehooks, edata_base_get(edata), size_a + size_b,
 	    size_a, size_b, edata_committed_get(edata));
@@ -1205,14 +1205,14 @@ extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	}
 
 	edata_size_set(edata, size_a);
-	emap_split_commit(tsdn, shard->emap, &prepare, edata, size_a, trail,
+	emap_split_commit(tsdn, shard->pac.emap, &prepare, edata, size_a, trail,
 	    size_b);
 
-	emap_unlock_edata2(tsdn, shard->emap, edata, trail);
+	emap_unlock_edata2(tsdn, shard->pac.emap, edata, trail);
 
 	return trail;
 label_error_c:
-	emap_unlock_edata2(tsdn, shard->emap, edata, trail);
+	emap_unlock_edata2(tsdn, shard->pac.emap, edata, trail);
 label_error_b:
 	edata_cache_put(tsdn, shard->pac.edata_cache, trail);
 label_error_a:
@@ -1250,17 +1250,17 @@ extent_merge_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks, edata_t *a,
 	 * than extent_{,de}register() to do things in the right order.
 	 */
 	emap_prepare_t prepare;
-	emap_merge_prepare(tsdn, shard->emap, &prepare, a, b);
+	emap_merge_prepare(tsdn, shard->pac.emap, &prepare, a, b);
 
-	emap_lock_edata2(tsdn, shard->emap, a, b);
+	emap_lock_edata2(tsdn, shard->pac.emap, a, b);
 
 	edata_size_set(a, edata_size_get(a) + edata_size_get(b));
 	edata_sn_set(a, (edata_sn_get(a) < edata_sn_get(b)) ?
 	    edata_sn_get(a) : edata_sn_get(b));
 	edata_zeroed_set(a, edata_zeroed_get(a) && edata_zeroed_get(b));
 
-	emap_merge_commit(tsdn, shard->emap, &prepare, a, b);
-	emap_unlock_edata2(tsdn, shard->emap, a, b);
+	emap_merge_commit(tsdn, shard->pac.emap, &prepare, a, b);
+	emap_unlock_edata2(tsdn, shard->pac.emap, a, b);
 
 	edata_cache_put(tsdn, shard->pac.edata_cache, b);
 
diff --git a/src/pa.c b/src/pa.c
index f8fa9222..9d35dd5d 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -29,7 +29,7 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 	if (edata_cache_init(&shard->edata_cache, base)) {
 		return true;
 	}
-	if (pac_init(tsdn, &shard->pac, ind, &shard->edata_cache)) {
+	if (pac_init(tsdn, &shard->pac, ind, emap, &shard->edata_cache)) {
 		return true;
 	}
 	if (ecache_grow_init(tsdn, &shard->ecache_grow)) {
diff --git a/src/pac.c b/src/pac.c
index 746bd4c8..7df5b02b 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -4,7 +4,8 @@
 #include "jemalloc/internal/pac.h"
 
 bool
-pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind, edata_cache_t *edata_cache) {
+pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind, emap_t *emap,
+    edata_cache_t *edata_cache) {
 	/*
 	 * Delay coalescing for dirty extents despite the disruptive effect on
 	 * memory layout for best-fit extent allocation, since cached extents
@@ -34,6 +35,7 @@ pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind, edata_cache_t *edata_cache) {
 		return true;
 	}
 
+	pac->emap = emap;
 	pac->edata_cache = edata_cache;
 	return false;
 }

From c81e389996ef37c0d27b5a28bba0e04337d02a54 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 1 Jun 2020 16:01:53 -0700
Subject: [PATCH 1810/2608] PAC: Move in ecache_grow.

---
 include/jemalloc/internal/pa.h  |  2 --
 include/jemalloc/internal/pac.h |  5 +++++
 src/extent.c                    | 28 ++++++++++++++--------------
 src/pa.c                        | 24 ++----------------------
 src/pa_extra.c                  |  6 +++---
 src/pac.c                       | 28 ++++++++++++++++++++++++++++
 test/unit/retained.c            |  2 +-
 7 files changed, 53 insertions(+), 42 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index d7f22637..0b3e5289 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -126,8 +126,6 @@ struct pa_shard_s {
 	/* The source of edata_t objects. */
 	edata_cache_t edata_cache;
 
-	/* The grow info for the retained ecache. */
-	ecache_grow_t ecache_grow;
 
 	/* Extent serial number generator state. */
 	atomic_zu_t extent_sn_next;
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index 8a89b6d0..3ad00970 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -23,9 +23,14 @@ struct pac_s {
 
 	emap_t *emap;
 	edata_cache_t *edata_cache;
+
+	/* The grow info for the retained ecache. */
+	ecache_grow_t ecache_grow;
 };
 
 bool pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind, emap_t *emap,
     edata_cache_t *edata_cache);
+bool pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
+    size_t *new_limit);
 
 #endif /* JEMALLOC_INTERNAL_PAC_H */
diff --git a/src/extent.c b/src/extent.c
index 269bc7c1..ed90a159 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -608,7 +608,7 @@ extent_recycle(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 static edata_t *
 extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     size_t size, size_t alignment, bool zero, bool *commit) {
-	malloc_mutex_assert_owner(tsdn, &shard->ecache_grow.mtx);
+	malloc_mutex_assert_owner(tsdn, &shard->pac.ecache_grow.mtx);
 
 	size_t alloc_size_min = size + PAGE_CEILING(alignment) - PAGE;
 	/* Beware size_t wrap-around. */
@@ -620,16 +620,16 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	 * satisfy this request.
 	 */
 	pszind_t egn_skip = 0;
-	size_t alloc_size = sz_pind2sz(shard->ecache_grow.next + egn_skip);
+	size_t alloc_size = sz_pind2sz(shard->pac.ecache_grow.next + egn_skip);
 	while (alloc_size < alloc_size_min) {
 		egn_skip++;
-		if (shard->ecache_grow.next + egn_skip >=
+		if (shard->pac.ecache_grow.next + egn_skip >=
 		    sz_psz2ind(SC_LARGE_MAXCLASS)) {
 			/* Outside legal range. */
 			goto label_err;
 		}
 		alloc_size = sz_pind2sz(
-		    shard->ecache_grow.next + egn_skip);
+		    shard->pac.ecache_grow.next + egn_skip);
 	}
 
 	edata_t *edata = edata_cache_get(tsdn, shard->pac.edata_cache);
@@ -722,14 +722,14 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	 * Increment extent_grow_next if doing so wouldn't exceed the allowed
 	 * range.
 	 */
-	if (shard->ecache_grow.next + egn_skip + 1 <=
-	    shard->ecache_grow.limit) {
-		shard->ecache_grow.next += egn_skip + 1;
+	if (shard->pac.ecache_grow.next + egn_skip + 1 <=
+	    shard->pac.ecache_grow.limit) {
+		shard->pac.ecache_grow.next += egn_skip + 1;
 	} else {
-		shard->ecache_grow.next = shard->ecache_grow.limit;
+		shard->pac.ecache_grow.next = shard->pac.ecache_grow.limit;
 	}
 	/* All opportunities for failure are past. */
-	malloc_mutex_unlock(tsdn, &shard->ecache_grow.mtx);
+	malloc_mutex_unlock(tsdn, &shard->pac.ecache_grow.mtx);
 
 	if (config_prof) {
 		/* Adjust gdump stats now that extent is final size. */
@@ -743,7 +743,7 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 
 	return edata;
 label_err:
-	malloc_mutex_unlock(tsdn, &shard->ecache_grow.mtx);
+	malloc_mutex_unlock(tsdn, &shard->pac.ecache_grow.mtx);
 	return NULL;
 }
 
@@ -753,13 +753,13 @@ extent_alloc_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	assert(size != 0);
 	assert(alignment != 0);
 
-	malloc_mutex_lock(tsdn, &shard->ecache_grow.mtx);
+	malloc_mutex_lock(tsdn, &shard->pac.ecache_grow.mtx);
 
 	edata_t *edata = extent_recycle(tsdn, shard, ehooks,
 	    &shard->pac.ecache_retained, new_addr, size, alignment, zero,
 	    commit, /* growing_retained */ true);
 	if (edata != NULL) {
-		malloc_mutex_unlock(tsdn, &shard->ecache_grow.mtx);
+		malloc_mutex_unlock(tsdn, &shard->pac.ecache_grow.mtx);
 		if (config_prof) {
 			extent_gdump_add(tsdn, edata);
 		}
@@ -768,9 +768,9 @@ extent_alloc_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		    alignment, zero, commit);
 		/* extent_grow_retained() always releases extent_grow_mtx. */
 	} else {
-		malloc_mutex_unlock(tsdn, &shard->ecache_grow.mtx);
+		malloc_mutex_unlock(tsdn, &shard->pac.ecache_grow.mtx);
 	}
-	malloc_mutex_assert_not_owner(tsdn, &shard->ecache_grow.mtx);
+	malloc_mutex_assert_not_owner(tsdn, &shard->pac.ecache_grow.mtx);
 
 	return edata;
 }
diff --git a/src/pa.c b/src/pa.c
index 9d35dd5d..98deba54 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -32,9 +32,6 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 	if (pac_init(tsdn, &shard->pac, ind, emap, &shard->edata_cache)) {
 		return true;
 	}
-	if (ecache_grow_init(tsdn, &shard->ecache_grow)) {
-		return true;
-	}
 
 	if (decay_init(&shard->decay_dirty, cur_time, dirty_decay_ms)) {
 		return true;
@@ -455,23 +452,6 @@ pa_maybe_decay_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 bool
 pa_shard_retain_grow_limit_get_set(tsdn_t *tsdn, pa_shard_t *shard,
     size_t *old_limit, size_t *new_limit) {
-	pszind_t new_ind JEMALLOC_CC_SILENCE_INIT(0);
-	if (new_limit != NULL) {
-		size_t limit = *new_limit;
-		/* Grow no more than the new limit. */
-		if ((new_ind = sz_psz2ind(limit + 1) - 1) >= SC_NPSIZES) {
-			return true;
-		}
-	}
-
-	malloc_mutex_lock(tsdn, &shard->ecache_grow.mtx);
-	if (old_limit != NULL) {
-		*old_limit = sz_pind2sz(shard->ecache_grow.limit);
-	}
-	if (new_limit != NULL) {
-		shard->ecache_grow.limit = new_ind;
-	}
-	malloc_mutex_unlock(tsdn, &shard->ecache_grow.mtx);
-
-	return false;
+	return pac_retain_grow_limit_get_set(tsdn, &shard->pac, old_limit,
+	    new_limit);
 }
diff --git a/src/pa_extra.c b/src/pa_extra.c
index caa94d82..a755781c 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -16,7 +16,7 @@ pa_shard_prefork0(tsdn_t *tsdn, pa_shard_t *shard) {
 
 void
 pa_shard_prefork2(tsdn_t *tsdn, pa_shard_t *shard) {
-	ecache_grow_prefork(tsdn, &shard->ecache_grow);
+	ecache_grow_prefork(tsdn, &shard->pac.ecache_grow);
 }
 
 void
@@ -37,7 +37,7 @@ pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard) {
 	ecache_postfork_parent(tsdn, &shard->pac.ecache_dirty);
 	ecache_postfork_parent(tsdn, &shard->pac.ecache_muzzy);
 	ecache_postfork_parent(tsdn, &shard->pac.ecache_retained);
-	ecache_grow_postfork_parent(tsdn, &shard->ecache_grow);
+	ecache_grow_postfork_parent(tsdn, &shard->pac.ecache_grow);
 	malloc_mutex_postfork_parent(tsdn, &shard->decay_dirty.mtx);
 	malloc_mutex_postfork_parent(tsdn, &shard->decay_muzzy.mtx);
 }
@@ -48,7 +48,7 @@ pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard) {
 	ecache_postfork_child(tsdn, &shard->pac.ecache_dirty);
 	ecache_postfork_child(tsdn, &shard->pac.ecache_muzzy);
 	ecache_postfork_child(tsdn, &shard->pac.ecache_retained);
-	ecache_grow_postfork_child(tsdn, &shard->ecache_grow);
+	ecache_grow_postfork_child(tsdn, &shard->pac.ecache_grow);
 	malloc_mutex_postfork_child(tsdn, &shard->decay_dirty.mtx);
 	malloc_mutex_postfork_child(tsdn, &shard->decay_muzzy.mtx);
 }
diff --git a/src/pac.c b/src/pac.c
index 7df5b02b..f30c4bbf 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -34,8 +34,36 @@ pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind, emap_t *emap,
 	    ind, /* delay_coalesce */ false)) {
 		return true;
 	}
+	if (ecache_grow_init(tsdn, &pac->ecache_grow)) {
+		return true;
+	}
 
 	pac->emap = emap;
 	pac->edata_cache = edata_cache;
 	return false;
 }
+
+bool
+pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
+    size_t *new_limit) {
+	pszind_t new_ind JEMALLOC_CC_SILENCE_INIT(0);
+	if (new_limit != NULL) {
+		size_t limit = *new_limit;
+		/* Grow no more than the new limit. */
+		if ((new_ind = sz_psz2ind(limit + 1) - 1) >= SC_NPSIZES) {
+			return true;
+		}
+	}
+
+	malloc_mutex_lock(tsdn, &pac->ecache_grow.mtx);
+	if (old_limit != NULL) {
+		*old_limit = sz_pind2sz(pac->ecache_grow.limit);
+	}
+	if (new_limit != NULL) {
+		pac->ecache_grow.limit = new_ind;
+	}
+	malloc_mutex_unlock(tsdn, &pac->ecache_grow.mtx);
+
+	return false;
+}
+
diff --git a/test/unit/retained.c b/test/unit/retained.c
index cf3de1ea..ef301aa0 100644
--- a/test/unit/retained.c
+++ b/test/unit/retained.c
@@ -142,7 +142,7 @@ TEST_BEGIN(test_retained) {
 		size_t usable = 0;
 		size_t fragmented = 0;
 		for (pszind_t pind = sz_psz2ind(HUGEPAGE); pind <
-		    arena->pa_shard.ecache_grow.next; pind++) {
+		    arena->pa_shard.pac.ecache_grow.next; pind++) {
 			size_t psz = sz_pind2sz(pind);
 			size_t psz_fragmented = psz % esz;
 			size_t psz_usable = psz - psz_fragmented;

From db211eefbfe2e35441dad0a7857e073ba4e8130e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 1 Jun 2020 16:35:17 -0700
Subject: [PATCH 1811/2608] PAC: Move in decay.

---
 include/jemalloc/internal/arena_inlines_b.h   |  3 ---
 .../internal/background_thread_inlines.h      |  2 +-
 include/jemalloc/internal/pa.h                | 25 +++----------------
 include/jemalloc/internal/pac.h               | 22 +++++++++++++++-
 src/arena.c                                   | 12 ++++-----
 src/background_thread.c                       |  4 +--
 src/ctl.c                                     |  4 +--
 src/pa.c                                      | 12 +++------
 src/pa_extra.c                                | 16 ++++++------
 src/pac.c                                     |  9 ++++++-
 test/unit/pa.c                                |  7 +++---
 11 files changed, 58 insertions(+), 58 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 7351db98..335c0797 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -131,9 +131,6 @@ arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) {
 
 JEMALLOC_ALWAYS_INLINE void
 arena_decay_tick(tsdn_t *tsdn, arena_t *arena) {
-	malloc_mutex_assert_not_owner(tsdn, &arena->pa_shard.decay_dirty.mtx);
-	malloc_mutex_assert_not_owner(tsdn, &arena->pa_shard.decay_muzzy.mtx);
-
 	arena_decay_ticks(tsdn, arena, 1);
 }
 
diff --git a/include/jemalloc/internal/background_thread_inlines.h b/include/jemalloc/internal/background_thread_inlines.h
index 7bdbe928..71b433cb 100644
--- a/include/jemalloc/internal/background_thread_inlines.h
+++ b/include/jemalloc/internal/background_thread_inlines.h
@@ -55,7 +55,7 @@ arena_background_thread_inactivity_check(tsdn_t *tsdn, arena_t *arena,
 	    arena_background_thread_info_get(arena);
 	if (background_thread_indefinite_sleep(info)) {
 		background_thread_interval_check(tsdn, arena,
-		    &arena->pa_shard.decay_dirty, 0);
+		    &arena->pa_shard.pac.decay_dirty, 0);
 	}
 }
 
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 0b3e5289..ca6482a8 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -126,22 +126,12 @@ struct pa_shard_s {
 	/* The source of edata_t objects. */
 	edata_cache_t edata_cache;
 
-
 	/* Extent serial number generator state. */
 	atomic_zu_t extent_sn_next;
 
 	malloc_mutex_t *stats_mtx;
 	pa_shard_stats_t *stats;
 
-	/*
-	 * Decay-based purging state, responsible for scheduling extent state
-	 * transitions.
-	 *
-	 * Synchronization: via the internal mutex.
-	 */
-	decay_t decay_dirty; /* dirty --> muzzy */
-	decay_t decay_muzzy; /* muzzy --> retained */
-
 	/* The emap this shard is tied to. */
 	emap_t *emap;
 
@@ -149,25 +139,16 @@ struct pa_shard_s {
 	base_t *base;
 };
 
-static inline ssize_t
-pa_shard_dirty_decay_ms_get(pa_shard_t *shard) {
-	return decay_ms_read(&shard->decay_dirty);
-}
-static inline ssize_t
-pa_shard_muzzy_decay_ms_get(pa_shard_t *shard) {
-	return decay_ms_read(&shard->decay_muzzy);
-}
-
 static inline bool
 pa_shard_dont_decay_muzzy(pa_shard_t *shard) {
 	return ecache_npages_get(&shard->pac.ecache_muzzy) == 0 &&
-	    pa_shard_muzzy_decay_ms_get(shard) <= 0;
+	    pac_muzzy_decay_ms_get(&shard->pac) <= 0;
 }
 
 static inline bool
 pa_shard_may_force_decay(pa_shard_t *shard) {
-	return !(pa_shard_dirty_decay_ms_get(shard) == -1
-	    || pa_shard_muzzy_decay_ms_get(shard) == -1);
+	return !(pac_dirty_decay_ms_get(&shard->pac) == -1
+	    || pac_muzzy_decay_ms_get(&shard->pac) == -1);
 }
 
 static inline ehooks_t *
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index 3ad00970..da14b621 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -26,11 +26,31 @@ struct pac_s {
 
 	/* The grow info for the retained ecache. */
 	ecache_grow_t ecache_grow;
+
+	/*
+	 * Decay-based purging state, responsible for scheduling extent state
+	 * transitions.
+	 *
+	 * Synchronization: via the internal mutex.
+	 */
+	decay_t decay_dirty; /* dirty --> muzzy */
+	decay_t decay_muzzy; /* muzzy --> retained */
 };
 
 bool pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind, emap_t *emap,
-    edata_cache_t *edata_cache);
+    edata_cache_t *edata_cache, nstime_t *cur_time, ssize_t dirty_decay_ms,
+    ssize_t muzzy_decay_ms);
 bool pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
     size_t *new_limit);
 
+static inline ssize_t
+pac_dirty_decay_ms_get(pac_t *pac) {
+	return decay_ms_read(&pac->decay_dirty);
+}
+
+static inline ssize_t
+pac_muzzy_decay_ms_get(pac_t *pac) {
+	return decay_ms_read(&pac->decay_muzzy);
+}
+
 #endif /* JEMALLOC_INTERNAL_PAC_H */
diff --git a/src/arena.c b/src/arena.c
index fb9cb7b5..9fa2db7f 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -397,12 +397,12 @@ arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 
 ssize_t
 arena_dirty_decay_ms_get(arena_t *arena) {
-	return pa_shard_dirty_decay_ms_get(&arena->pa_shard);
+	return pac_dirty_decay_ms_get(&arena->pa_shard.pac);
 }
 
 ssize_t
 arena_muzzy_decay_ms_get(arena_t *arena) {
-	return pa_shard_muzzy_decay_ms_get(&arena->pa_shard);
+	return pac_muzzy_decay_ms_get(&arena->pa_shard.pac);
 }
 
 /*
@@ -453,7 +453,7 @@ arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 bool
 arena_dirty_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
     ssize_t decay_ms) {
-	return arena_decay_ms_set(tsdn, arena, &arena->pa_shard.decay_dirty,
+	return arena_decay_ms_set(tsdn, arena, &arena->pa_shard.pac.decay_dirty,
 	    &arena->pa_shard.stats->decay_dirty,
 	    &arena->pa_shard.pac.ecache_dirty, decay_ms);
 }
@@ -461,7 +461,7 @@ arena_dirty_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
 bool
 arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
     ssize_t decay_ms) {
-	return arena_decay_ms_set(tsdn, arena, &arena->pa_shard.decay_muzzy,
+	return arena_decay_ms_set(tsdn, arena, &arena->pa_shard.pac.decay_muzzy,
 	    &arena->pa_shard.stats->decay_muzzy,
 	    &arena->pa_shard.pac.ecache_muzzy, decay_ms);
 }
@@ -520,7 +520,7 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 static bool
 arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
     bool all) {
-	return arena_decay_impl(tsdn, arena, &arena->pa_shard.decay_dirty,
+	return arena_decay_impl(tsdn, arena, &arena->pa_shard.pac.decay_dirty,
 	    &arena->pa_shard.stats->decay_dirty,
 	    &arena->pa_shard.pac.ecache_dirty, is_background_thread, all);
 }
@@ -531,7 +531,7 @@ arena_decay_muzzy(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
 	if (pa_shard_dont_decay_muzzy(&arena->pa_shard)) {
 		return false;
 	}
-	return arena_decay_impl(tsdn, arena, &arena->pa_shard.decay_muzzy,
+	return arena_decay_impl(tsdn, arena, &arena->pa_shard.pac.decay_muzzy,
 	    &arena->pa_shard.stats->decay_muzzy,
 	    &arena->pa_shard.pac.ecache_muzzy, is_background_thread, all);
 }
diff --git a/src/background_thread.c b/src/background_thread.c
index 557dbc41..a36836cb 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -201,12 +201,12 @@ static uint64_t
 arena_decay_compute_purge_interval(tsdn_t *tsdn, arena_t *arena) {
 	uint64_t i1, i2;
 	i1 = arena_decay_compute_purge_interval_impl(tsdn,
-	    &arena->pa_shard.decay_dirty, &arena->pa_shard.pac.ecache_dirty);
+	    &arena->pa_shard.pac.decay_dirty, &arena->pa_shard.pac.ecache_dirty);
 	if (i1 == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
 		return i1;
 	}
 	i2 = arena_decay_compute_purge_interval_impl(tsdn,
-	    &arena->pa_shard.decay_muzzy, &arena->pa_shard.pac.ecache_muzzy);
+	    &arena->pa_shard.pac.decay_muzzy, &arena->pa_shard.pac.ecache_muzzy);
 
 	return i1 < i2 ? i1 : i2;
 }
diff --git a/src/ctl.c b/src/ctl.c
index 0098d931..56dcf823 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3130,8 +3130,8 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
 		MUTEX_PROF_RESET(arena->pa_shard.pac.ecache_dirty.mtx);
 		MUTEX_PROF_RESET(arena->pa_shard.pac.ecache_muzzy.mtx);
 		MUTEX_PROF_RESET(arena->pa_shard.pac.ecache_retained.mtx);
-		MUTEX_PROF_RESET(arena->pa_shard.decay_dirty.mtx);
-		MUTEX_PROF_RESET(arena->pa_shard.decay_muzzy.mtx);
+		MUTEX_PROF_RESET(arena->pa_shard.pac.decay_dirty.mtx);
+		MUTEX_PROF_RESET(arena->pa_shard.pac.decay_muzzy.mtx);
 		MUTEX_PROF_RESET(arena->tcache_ql_mtx);
 		MUTEX_PROF_RESET(arena->base->mtx);
 
diff --git a/src/pa.c b/src/pa.c
index 98deba54..501d57c3 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -29,14 +29,8 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 	if (edata_cache_init(&shard->edata_cache, base)) {
 		return true;
 	}
-	if (pac_init(tsdn, &shard->pac, ind, emap, &shard->edata_cache)) {
-		return true;
-	}
-
-	if (decay_init(&shard->decay_dirty, cur_time, dirty_decay_ms)) {
-		return true;
-	}
-	if (decay_init(&shard->decay_muzzy, cur_time, muzzy_decay_ms)) {
+	if (pac_init(tsdn, &shard->pac, ind, emap, &shard->edata_cache,
+	    cur_time, dirty_decay_ms, muzzy_decay_ms)) {
 		return true;
 	}
 
@@ -91,7 +85,7 @@ pa_shard_extent_sn_next(pa_shard_t *shard) {
 
 static bool
 pa_shard_may_have_muzzy(pa_shard_t *shard) {
-	return pa_shard_muzzy_decay_ms_get(shard) != 0;
+	return pac_muzzy_decay_ms_get(&shard->pac) != 0;
 }
 
 static edata_t *
diff --git a/src/pa_extra.c b/src/pa_extra.c
index a755781c..ae5855aa 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -10,8 +10,8 @@
 
 void
 pa_shard_prefork0(tsdn_t *tsdn, pa_shard_t *shard) {
-	malloc_mutex_prefork(tsdn, &shard->decay_dirty.mtx);
-	malloc_mutex_prefork(tsdn, &shard->decay_muzzy.mtx);
+	malloc_mutex_prefork(tsdn, &shard->pac.decay_dirty.mtx);
+	malloc_mutex_prefork(tsdn, &shard->pac.decay_muzzy.mtx);
 }
 
 void
@@ -38,8 +38,8 @@ pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard) {
 	ecache_postfork_parent(tsdn, &shard->pac.ecache_muzzy);
 	ecache_postfork_parent(tsdn, &shard->pac.ecache_retained);
 	ecache_grow_postfork_parent(tsdn, &shard->pac.ecache_grow);
-	malloc_mutex_postfork_parent(tsdn, &shard->decay_dirty.mtx);
-	malloc_mutex_postfork_parent(tsdn, &shard->decay_muzzy.mtx);
+	malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_dirty.mtx);
+	malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_muzzy.mtx);
 }
 
 void
@@ -49,8 +49,8 @@ pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard) {
 	ecache_postfork_child(tsdn, &shard->pac.ecache_muzzy);
 	ecache_postfork_child(tsdn, &shard->pac.ecache_retained);
 	ecache_grow_postfork_child(tsdn, &shard->pac.ecache_grow);
-	malloc_mutex_postfork_child(tsdn, &shard->decay_dirty.mtx);
-	malloc_mutex_postfork_child(tsdn, &shard->decay_muzzy.mtx);
+	malloc_mutex_postfork_child(tsdn, &shard->pac.decay_dirty.mtx);
+	malloc_mutex_postfork_child(tsdn, &shard->pac.decay_muzzy.mtx);
 }
 
 void
@@ -148,7 +148,7 @@ pa_shard_mtx_stats_read(tsdn_t *tsdn, pa_shard_t *shard,
 	pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
 	    &shard->pac.ecache_retained.mtx, arena_prof_mutex_extents_retained);
 	pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
-	    &shard->decay_dirty.mtx, arena_prof_mutex_decay_dirty);
+	    &shard->pac.decay_dirty.mtx, arena_prof_mutex_decay_dirty);
 	pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
-	    &shard->decay_muzzy.mtx, arena_prof_mutex_decay_muzzy);
+	    &shard->pac.decay_muzzy.mtx, arena_prof_mutex_decay_muzzy);
 }
diff --git a/src/pac.c b/src/pac.c
index f30c4bbf..1e20d652 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -5,7 +5,8 @@
 
 bool
 pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind, emap_t *emap,
-    edata_cache_t *edata_cache) {
+    edata_cache_t *edata_cache, nstime_t *cur_time, ssize_t dirty_decay_ms,
+    ssize_t muzzy_decay_ms) {
 	/*
 	 * Delay coalescing for dirty extents despite the disruptive effect on
 	 * memory layout for best-fit extent allocation, since cached extents
@@ -37,6 +38,12 @@ pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind, emap_t *emap,
 	if (ecache_grow_init(tsdn, &pac->ecache_grow)) {
 		return true;
 	}
+	if (decay_init(&pac->decay_dirty, cur_time, dirty_decay_ms)) {
+		return true;
+	}
+	if (decay_init(&pac->decay_muzzy, cur_time, muzzy_decay_ms)) {
+		return true;
+	}
 
 	pac->emap = emap;
 	pac->edata_cache = edata_cache;
diff --git a/test/unit/pa.c b/test/unit/pa.c
index 8846f614..7cd9fa17 100644
--- a/test/unit/pa.c
+++ b/test/unit/pa.c
@@ -86,13 +86,14 @@ do_alloc_free_purge(void *arg) {
 		bool generated_dirty;
 		pa_dalloc(TSDN_NULL, &test_data->shard, edata,
 		    &generated_dirty);
-		malloc_mutex_lock(TSDN_NULL, &test_data->shard.decay_dirty.mtx);
+		malloc_mutex_lock(TSDN_NULL,
+		    &test_data->shard.pac.decay_dirty.mtx);
 		pa_decay_all(TSDN_NULL, &test_data->shard,
-		    &test_data->shard.decay_dirty,
+		    &test_data->shard.pac.decay_dirty,
 		    &test_data->stats.decay_dirty,
 		    &test_data->shard.pac.ecache_dirty, true);
 		malloc_mutex_unlock(TSDN_NULL,
-		    &test_data->shard.decay_dirty.mtx);
+		    &test_data->shard.pac.decay_dirty.mtx);
 	}
 	return NULL;
 }

From 73913823491ef32a7ea1471de1ef185219e44d41 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 1 Jun 2020 17:42:27 -0700
Subject: [PATCH 1812/2608] PA->PAC: Move in stats.

---
 include/jemalloc/internal/arena_externs.h |  2 +-
 include/jemalloc/internal/ctl.h           |  2 +-
 include/jemalloc/internal/pa.h            | 67 ++++-------------------
 include/jemalloc/internal/pac.h           | 64 +++++++++++++++++++++-
 src/arena.c                               | 18 +++---
 src/ctl.c                                 | 62 ++++++++++-----------
 src/extent.c                              | 17 +++---
 src/pa.c                                  | 19 ++++---
 src/pa_extra.c                            | 46 ++++++++--------
 src/pac.c                                 |  4 +-
 test/unit/pa.c                            |  2 +-
 11 files changed, 159 insertions(+), 144 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 40dad716..e6e9a0b9 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -27,7 +27,7 @@ void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
     bin_stats_data_t *bstats, arena_stats_large_t *lstats,
-    pa_extent_stats_t *estats);
+    pac_estats_t *estats);
 void arena_handle_new_dirty_pages(tsdn_t *tsdn, arena_t *arena);
 #ifdef JEMALLOC_JET
 size_t arena_slab_regind(edata_t *slab, szind_t binind, const void *ptr);
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index e0b46fa3..fbc432bf 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -44,7 +44,7 @@ typedef struct ctl_arena_stats_s {
 
 	bin_stats_data_t bstats[SC_NBINS];
 	arena_stats_large_t lstats[SC_NSIZES - SC_NBINS];
-	pa_extent_stats_t estats[SC_NPSIZES];
+	pac_estats_t estats[SC_NPSIZES];
 } ctl_arena_stats_t;
 
 typedef struct ctl_stats_s {
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index ca6482a8..2891d7c4 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -25,33 +25,6 @@ enum pa_decay_purge_setting_e {
 };
 typedef enum pa_decay_purge_setting_e pa_decay_purge_setting_t;
 
-typedef struct pa_shard_decay_stats_s pa_shard_decay_stats_t;
-struct pa_shard_decay_stats_s {
-	/* Total number of purge sweeps. */
-	locked_u64_t npurge;
-	/* Total number of madvise calls made. */
-	locked_u64_t nmadvise;
-	/* Total number of pages purged. */
-	locked_u64_t purged;
-};
-
-typedef struct pa_extent_stats_s pa_extent_stats_t;
-struct pa_extent_stats_s {
-	/*
-	 * Stats for a given index in the range [0, SC_NPSIZES] in the various
-	 * ecache_ts.
-	 * We track both bytes and # of extents: two extents in the same bucket
-	 * may have different sizes if adjacent size classes differ by more than
-	 * a page, so bytes cannot always be derived from # of extents.
-	 */
-	size_t ndirty;
-	size_t dirty_bytes;
-	size_t nmuzzy;
-	size_t muzzy_bytes;
-	size_t nretained;
-	size_t retained_bytes;
-};
-
 /*
  * The stats for a particular pa_shard.  Because of the way the ctl module
  * handles stats epoch data collection (it has its own arena_stats, and merges
@@ -65,30 +38,15 @@ struct pa_extent_stats_s {
  */
 typedef struct pa_shard_stats_s pa_shard_stats_t;
 struct pa_shard_stats_s {
-	pa_shard_decay_stats_t decay_dirty;
-	pa_shard_decay_stats_t decay_muzzy;
-
-	/*
-	 * Number of unused virtual memory bytes currently retained.  Retained
-	 * bytes are technically mapped (though always decommitted or purged),
-	 * but they are excluded from the mapped statistic (above).
-	 */
-	size_t retained; /* Derived. */
-
-	/*
-	 * Number of bytes currently mapped, excluding retained memory (and any
-	 * base-allocated memory, which is tracked by the arena stats).
-	 *
-	 * We name this "pa_mapped" to avoid confusion with the arena_stats
-	 * "mapped".
-	 */
-	atomic_zu_t pa_mapped;
-
 	/* Number of edata_t structs allocated by base, but not being used. */
 	size_t edata_avail; /* Derived. */
-
-	/* VM space had to be leaked (undocumented).  Normally 0. */
-	atomic_zu_t abandoned_vm;
+	/*
+	 * Stats specific to the PAC.  For now, these are the only stats that
+	 * exist, but there will eventually be other page allocators.  Things
+	 * like edata_avail make sense in a cross-PA sense, but things like
+	 * npurges don't.
+	 */
+	pac_stats_t pac_stats;
 };
 
 /*
@@ -208,14 +166,14 @@ void pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
  * concurrently with the call.
  */
 void pa_decay_all(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
-    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay);
+    pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay);
 /*
  * Updates decay settings for the current time, and conditionally purges in
  * response (depending on decay_purge_setting).  Returns whether or not the
  * epoch advanced.
  */
 bool pa_maybe_decay_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
-    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
+    pac_decay_stats_t *decay_stats, ecache_t *ecache,
     pa_decay_purge_setting_t decay_purge_setting);
 
 /*
@@ -251,13 +209,8 @@ void pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard);
 void pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive,
     size_t *ndirty, size_t *nmuzzy);
 
-static inline size_t
-pa_shard_pa_mapped(pa_shard_t *shard) {
-	return atomic_load_zu(&shard->stats->pa_mapped, ATOMIC_RELAXED);
-}
-
 void pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
-    pa_shard_stats_t *shard_stats_out, pa_extent_stats_t *extent_stats_out,
+    pa_shard_stats_t *pa_shard_stats_out, pac_estats_t *estats_out,
     size_t *resident);
 
 /*
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index da14b621..14ee09f3 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -9,6 +9,58 @@
  * - Can use efficient OS-level zeroing primitives for demand-filled pages.
  */
 
+typedef struct pac_decay_stats_s pac_decay_stats_t;
+struct pac_decay_stats_s {
+	/* Total number of purge sweeps. */
+	locked_u64_t npurge;
+	/* Total number of madvise calls made. */
+	locked_u64_t nmadvise;
+	/* Total number of pages purged. */
+	locked_u64_t purged;
+};
+
+typedef struct pac_estats_s pac_estats_t;
+struct pac_estats_s {
+	/*
+	 * Stats for a given index in the range [0, SC_NPSIZES] in the various
+	 * ecache_ts.
+	 * We track both bytes and # of extents: two extents in the same bucket
+	 * may have different sizes if adjacent size classes differ by more than
+	 * a page, so bytes cannot always be derived from # of extents.
+	 */
+	size_t ndirty;
+	size_t dirty_bytes;
+	size_t nmuzzy;
+	size_t muzzy_bytes;
+	size_t nretained;
+	size_t retained_bytes;
+};
+
+typedef struct pac_stats_s pac_stats_t;
+struct pac_stats_s {
+	pac_decay_stats_t decay_dirty;
+	pac_decay_stats_t decay_muzzy;
+
+	/*
+	 * Number of unused virtual memory bytes currently retained.  Retained
+	 * bytes are technically mapped (though always decommitted or purged),
+	 * but they are excluded from the mapped statistic (above).
+	 */
+	size_t retained; /* Derived. */
+
+	/*
+	 * Number of bytes currently mapped, excluding retained memory (and any
+	 * base-allocated memory, which is tracked by the arena stats).
+	 *
+	 * We name this "pac_mapped" to avoid confusion with the arena_stats
+	 * "mapped".
+	 */
+	atomic_zu_t pac_mapped;
+
+	/* VM space had to be leaked (undocumented).  Normally 0. */
+	atomic_zu_t abandoned_vm;
+};
+
 typedef struct pac_s pac_t;
 struct pac_s {
 	/*
@@ -35,13 +87,18 @@ struct pac_s {
 	 */
 	decay_t decay_dirty; /* dirty --> muzzy */
 	decay_t decay_muzzy; /* muzzy --> retained */
+
+	malloc_mutex_t *stats_mtx;
+	pac_stats_t *stats;
 };
 
 bool pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind, emap_t *emap,
     edata_cache_t *edata_cache, nstime_t *cur_time, ssize_t dirty_decay_ms,
-    ssize_t muzzy_decay_ms);
+    ssize_t muzzy_decay_ms, pac_stats_t *pac_stats, malloc_mutex_t *stats_mtx);
 bool pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
     size_t *new_limit);
+void pac_stats_merge(tsdn_t *tsdn, pac_t *pac, pac_stats_t *pac_stats_out,
+    pac_estats_t *estats_out, size_t *resident);
 
 static inline ssize_t
 pac_dirty_decay_ms_get(pac_t *pac) {
@@ -53,4 +110,9 @@ pac_muzzy_decay_ms_get(pac_t *pac) {
 	return decay_ms_read(&pac->decay_muzzy);
 }
 
+static inline size_t
+pac_mapped(pac_t *pac) {
+	return atomic_load_zu(&pac->stats->pac_mapped, ATOMIC_RELAXED);
+}
+
 #endif /* JEMALLOC_INTERNAL_PAC_H */
diff --git a/src/arena.c b/src/arena.c
index 9fa2db7f..619060f4 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -80,7 +80,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
     bin_stats_data_t *bstats, arena_stats_large_t *lstats,
-    pa_extent_stats_t *estats) {
+    pac_estats_t *estats) {
 	cassert(config_stats);
 
 	arena_basic_stats_merge(tsdn, arena, nthreads, dss, dirty_decay_ms,
@@ -89,8 +89,8 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	size_t base_allocated, base_resident, base_mapped, metadata_thp;
 	base_stats_get(tsdn, arena->base, &base_allocated, &base_resident,
 	    &base_mapped, &metadata_thp);
-	size_t pa_mapped = pa_shard_pa_mapped(&arena->pa_shard);
-	astats->mapped += base_mapped + pa_mapped;
+	size_t pac_mapped_sz = pac_mapped(&arena->pa_shard.pac);
+	astats->mapped += base_mapped + pac_mapped_sz;
 	astats->resident += base_resident;
 
 	LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
@@ -423,7 +423,7 @@ arena_decide_unforced_decay_purge_setting(bool is_background_thread) {
 
 static bool
 arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
-    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, ssize_t decay_ms) {
+    pac_decay_stats_t *decay_stats, ecache_t *ecache, ssize_t decay_ms) {
 	if (!decay_ms_valid(decay_ms)) {
 		return true;
 	}
@@ -454,7 +454,7 @@ bool
 arena_dirty_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
     ssize_t decay_ms) {
 	return arena_decay_ms_set(tsdn, arena, &arena->pa_shard.pac.decay_dirty,
-	    &arena->pa_shard.stats->decay_dirty,
+	    &arena->pa_shard.pac.stats->decay_dirty,
 	    &arena->pa_shard.pac.ecache_dirty, decay_ms);
 }
 
@@ -462,13 +462,13 @@ bool
 arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
     ssize_t decay_ms) {
 	return arena_decay_ms_set(tsdn, arena, &arena->pa_shard.pac.decay_muzzy,
-	    &arena->pa_shard.stats->decay_muzzy,
+	    &arena->pa_shard.pac.stats->decay_muzzy,
 	    &arena->pa_shard.pac.ecache_muzzy, decay_ms);
 }
 
 static bool
 arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
-    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
+    pac_decay_stats_t *decay_stats, ecache_t *ecache,
     bool is_background_thread, bool all) {
 	if (all) {
 		malloc_mutex_lock(tsdn, &decay->mtx);
@@ -521,7 +521,7 @@ static bool
 arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
     bool all) {
 	return arena_decay_impl(tsdn, arena, &arena->pa_shard.pac.decay_dirty,
-	    &arena->pa_shard.stats->decay_dirty,
+	    &arena->pa_shard.pac.stats->decay_dirty,
 	    &arena->pa_shard.pac.ecache_dirty, is_background_thread, all);
 }
 
@@ -532,7 +532,7 @@ arena_decay_muzzy(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
 		return false;
 	}
 	return arena_decay_impl(tsdn, arena, &arena->pa_shard.pac.decay_muzzy,
-	    &arena->pa_shard.stats->decay_muzzy,
+	    &arena->pa_shard.pac.stats->decay_muzzy,
 	    &arena->pa_shard.pac.ecache_muzzy, is_background_thread, all);
 }
 
diff --git a/src/ctl.c b/src/ctl.c
index 56dcf823..8b4b764a 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -831,7 +831,7 @@ ctl_arena_clear(ctl_arena_t *ctl_arena) {
 		memset(ctl_arena->astats->lstats, 0, (SC_NSIZES - SC_NBINS) *
 		    sizeof(arena_stats_large_t));
 		memset(ctl_arena->astats->estats, 0, SC_NPSIZES *
-		    sizeof(pa_extent_stats_t));
+		    sizeof(pac_estats_t));
 	}
 }
 
@@ -889,32 +889,31 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 
 		if (!destroyed) {
 			sdstats->astats.mapped += astats->astats.mapped;
-			sdstats->astats.pa_shard_stats.retained
-			    += astats->astats.pa_shard_stats.retained;
+			sdstats->astats.pa_shard_stats.pac_stats.retained
+			    += astats->astats.pa_shard_stats.pac_stats.retained;
 			sdstats->astats.pa_shard_stats.edata_avail
 			    += astats->astats.pa_shard_stats.edata_avail;
 		}
 
+		ctl_accum_locked_u64(
+		    &sdstats->astats.pa_shard_stats.pac_stats.decay_dirty.npurge,
+		    &astats->astats.pa_shard_stats.pac_stats.decay_dirty.npurge);
+		ctl_accum_locked_u64(
+		    &sdstats->astats.pa_shard_stats.pac_stats.decay_dirty.nmadvise,
+		    &astats->astats.pa_shard_stats.pac_stats.decay_dirty.nmadvise);
+		ctl_accum_locked_u64(
+		    &sdstats->astats.pa_shard_stats.pac_stats.decay_dirty.purged,
+		    &astats->astats.pa_shard_stats.pac_stats.decay_dirty.purged);
 
 		ctl_accum_locked_u64(
-		    &sdstats->astats.pa_shard_stats.decay_dirty.npurge,
-		    &astats->astats.pa_shard_stats.decay_dirty.npurge);
+		    &sdstats->astats.pa_shard_stats.pac_stats.decay_muzzy.npurge,
+		    &astats->astats.pa_shard_stats.pac_stats.decay_muzzy.npurge);
 		ctl_accum_locked_u64(
-		    &sdstats->astats.pa_shard_stats.decay_dirty.nmadvise,
-		    &astats->astats.pa_shard_stats.decay_dirty.nmadvise);
+		    &sdstats->astats.pa_shard_stats.pac_stats.decay_muzzy.nmadvise,
+		    &astats->astats.pa_shard_stats.pac_stats.decay_muzzy.nmadvise);
 		ctl_accum_locked_u64(
-		    &sdstats->astats.pa_shard_stats.decay_dirty.purged,
-		    &astats->astats.pa_shard_stats.decay_dirty.purged);
-
-		ctl_accum_locked_u64(
-		    &sdstats->astats.pa_shard_stats.decay_muzzy.npurge,
-		    &astats->astats.pa_shard_stats.decay_muzzy.npurge);
-		ctl_accum_locked_u64(
-		    &sdstats->astats.pa_shard_stats.decay_muzzy.nmadvise,
-		    &astats->astats.pa_shard_stats.decay_muzzy.nmadvise);
-		ctl_accum_locked_u64(
-		    &sdstats->astats.pa_shard_stats.decay_muzzy.purged,
-		    &astats->astats.pa_shard_stats.decay_muzzy.purged);
+		    &sdstats->astats.pa_shard_stats.pac_stats.decay_muzzy.purged,
+		    &astats->astats.pa_shard_stats.pac_stats.decay_muzzy.purged);
 
 #define OP(mtx) malloc_mutex_prof_merge(				\
 		    &(sdstats->astats.mutex_prof_data[			\
@@ -957,8 +956,8 @@ MUTEX_PROF_ARENA_MUTEXES
 		    += astats->astats.nrequests_large;
 		sdstats->astats.nflushes_large += astats->astats.nflushes_large;
 		ctl_accum_atomic_zu(
-		    &sdstats->astats.pa_shard_stats.abandoned_vm,
-		    &astats->astats.pa_shard_stats.abandoned_vm);
+		    &sdstats->astats.pa_shard_stats.pac_stats.abandoned_vm,
+		    &astats->astats.pa_shard_stats.pac_stats.abandoned_vm);
 
 		sdstats->astats.tcache_bytes += astats->astats.tcache_bytes;
 
@@ -1117,8 +1116,8 @@ ctl_refresh(tsdn_t *tsdn) {
 		ctl_stats->metadata_thp =
 		    ctl_sarena->astats->astats.metadata_thp;
 		ctl_stats->mapped = ctl_sarena->astats->astats.mapped;
-		ctl_stats->retained =
-		    ctl_sarena->astats->astats.pa_shard_stats.retained;
+		ctl_stats->retained = ctl_sarena->astats->astats
+		    .pa_shard_stats.pac_stats.retained;
 
 		ctl_background_thread_stats_read(tsdn);
 
@@ -2976,35 +2975,34 @@ CTL_RO_GEN(stats_arenas_i_pmuzzy, arenas_i(mib[2])->pmuzzy, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_mapped,
     arenas_i(mib[2])->astats->astats.mapped, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_retained,
-    arenas_i(mib[2])->astats->astats.pa_shard_stats.retained,
-    size_t)
+    arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.retained, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_extent_avail,
     arenas_i(mib[2])->astats->astats.pa_shard_stats.edata_avail, size_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_npurge,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.pa_shard_stats.decay_dirty.npurge),
+    &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.decay_dirty.npurge),
     uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_nmadvise,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.pa_shard_stats.decay_dirty.nmadvise),
+    &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.decay_dirty.nmadvise),
     uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_purged,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.pa_shard_stats.decay_dirty.purged),
+    &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.decay_dirty.purged),
     uint64_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_npurge,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.pa_shard_stats.decay_muzzy.npurge),
+    &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.npurge),
     uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_nmadvise,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.pa_shard_stats.decay_muzzy.nmadvise),
+    &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.nmadvise),
     uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_purged,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.pa_shard_stats.decay_muzzy.purged),
+    &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.purged),
     uint64_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_base,
@@ -3022,7 +3020,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_resident,
     size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_abandoned_vm,
     atomic_load_zu(
-    &arenas_i(mib[2])->astats->astats.pa_shard_stats.abandoned_vm,
+    &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.abandoned_vm,
     ATOMIC_RELAXED), size_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_small_allocated,
diff --git a/src/extent.c b/src/extent.c
index ed90a159..fb6ccebd 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -196,7 +196,7 @@ extents_abandon_vm(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool growing_retained) {
 	size_t sz = edata_size_get(edata);
 	if (config_stats) {
-		atomic_fetch_add_zu(&shard->stats->abandoned_vm, sz,
+		atomic_fetch_add_zu(&shard->pac.stats->abandoned_vm, sz,
 		    ATOMIC_RELAXED);
 	}
 	/*
@@ -938,21 +938,20 @@ extent_maximally_purge(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	extent_dalloc_wrapper(tsdn, shard, ehooks, edata);
 	if (config_stats) {
 		/* Update stats accordingly. */
-		LOCKEDINT_MTX_LOCK(tsdn, *shard->stats_mtx);
+		LOCKEDINT_MTX_LOCK(tsdn, *shard->pac.stats_mtx);
 		locked_inc_u64(tsdn,
-		    LOCKEDINT_MTX(*shard->stats_mtx),
-		    &shard->stats->decay_dirty.nmadvise, 1);
+		    LOCKEDINT_MTX(*shard->pac.stats_mtx),
+		    &shard->pac.stats->decay_dirty.nmadvise, 1);
 		locked_inc_u64(tsdn,
-		    LOCKEDINT_MTX(*shard->stats_mtx),
-		    &shard->stats->decay_dirty.purged,
+		    LOCKEDINT_MTX(*shard->pac.stats_mtx),
+		    &shard->pac.stats->decay_dirty.purged,
 		    extent_size >> LG_PAGE);
-		LOCKEDINT_MTX_UNLOCK(tsdn, *shard->stats_mtx);
-		atomic_fetch_sub_zu(&shard->stats->pa_mapped, extent_size,
+		LOCKEDINT_MTX_UNLOCK(tsdn, *shard->pac.stats_mtx);
+		atomic_fetch_sub_zu(&shard->pac.stats->pac_mapped, extent_size,
 		    ATOMIC_RELAXED);
 	}
 }
 
-
 /*
  * Does the metadata management portions of putting an unused extent into the
  * given ecache_t (coalesces and inserts into the eset).
diff --git a/src/pa.c b/src/pa.c
index 501d57c3..3ca8e35d 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -30,7 +30,8 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 		return true;
 	}
 	if (pac_init(tsdn, &shard->pac, ind, emap, &shard->edata_cache,
-	    cur_time, dirty_decay_ms, muzzy_decay_ms)) {
+	    cur_time, dirty_decay_ms, muzzy_decay_ms, &stats->pac_stats,
+	    stats_mtx)) {
 		return true;
 	}
 
@@ -106,7 +107,7 @@ ecache_pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
 		edata = ecache_alloc_grow(tsdn, shard, ehooks,
 		    &shard->pac.ecache_retained, NULL, size, alignment, zero);
 		if (config_stats && edata != NULL) {
-			atomic_fetch_add_zu(&shard->stats->pa_mapped, size,
+			atomic_fetch_add_zu(&shard->pac.stats->pac_mapped, size,
 			    ATOMIC_RELAXED);
 		}
 	}
@@ -170,7 +171,7 @@ ecache_pai_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 		return true;
 	}
 	if (config_stats && mapped_add > 0) {
-		atomic_fetch_add_zu(&shard->stats->pa_mapped, mapped_add,
+		atomic_fetch_add_zu(&shard->pac.stats->pac_mapped, mapped_add,
 		    ATOMIC_RELAXED);
 	}
 	return false;
@@ -288,7 +289,7 @@ pa_stash_decayed(tsdn_t *tsdn, pa_shard_t *shard, ecache_t *ecache,
 
 static size_t
 pa_decay_stashed(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
-    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
+    pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
     edata_list_inactive_t *decay_extents) {
 	bool err;
 
@@ -343,7 +344,7 @@ pa_decay_stashed(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 		locked_inc_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
 		    &decay_stats->purged, npurged);
 		LOCKEDINT_MTX_UNLOCK(tsdn, *shard->stats_mtx);
-		atomic_fetch_sub_zu(&shard->stats->pa_mapped,
+		atomic_fetch_sub_zu(&shard->pac.stats->pac_mapped,
 		    nunmapped << LG_PAGE, ATOMIC_RELAXED);
 	}
 
@@ -359,7 +360,7 @@ pa_decay_stashed(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
  */
 static void
 pa_decay_to_limit(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
-    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
+    pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
     size_t npages_limit, size_t npages_decay_max) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 1);
@@ -386,7 +387,7 @@ pa_decay_to_limit(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 
 void
 pa_decay_all(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
-    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay) {
+    pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay) {
 	malloc_mutex_assert_owner(tsdn, &decay->mtx);
 	pa_decay_to_limit(tsdn, shard, decay, decay_stats, ecache, fully_decay,
 	    /* npages_limit */ 0, ecache_npages_get(ecache));
@@ -394,7 +395,7 @@ pa_decay_all(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 
 static void
 pa_decay_try_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
-    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
+    pac_decay_stats_t *decay_stats, ecache_t *ecache,
     size_t current_npages, size_t npages_limit) {
 	if (current_npages > npages_limit) {
 		pa_decay_to_limit(tsdn, shard, decay, decay_stats, ecache,
@@ -405,7 +406,7 @@ pa_decay_try_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 
 bool
 pa_maybe_decay_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
-    pa_shard_decay_stats_t *decay_stats, ecache_t *ecache,
+    pac_decay_stats_t *decay_stats, ecache_t *ecache,
     pa_decay_purge_setting_t decay_purge_setting) {
 	malloc_mutex_assert_owner(tsdn, &decay->mtx);
 
diff --git a/src/pa_extra.c b/src/pa_extra.c
index ae5855aa..26a196b6 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -63,13 +63,13 @@ pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive, size_t *ndirty,
 
 void
 pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
-    pa_shard_stats_t *shard_stats_out, pa_extent_stats_t *extent_stats_out,
+    pa_shard_stats_t *pa_shard_stats_out, pac_estats_t *estats_out,
     size_t *resident) {
 	cassert(config_stats);
 
-	shard_stats_out->retained +=
+	pa_shard_stats_out->pac_stats.retained +=
 	    ecache_npages_get(&shard->pac.ecache_retained) << LG_PAGE;
-	shard_stats_out->edata_avail += atomic_load_zu(
+	pa_shard_stats_out->edata_avail += atomic_load_zu(
 	    &shard->edata_cache.count, ATOMIC_RELAXED);
 
 	size_t resident_pgs = 0;
@@ -79,34 +79,34 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
 
 	/* Dirty decay stats */
 	locked_inc_u64_unsynchronized(
-	    &shard_stats_out->decay_dirty.npurge,
+	    &pa_shard_stats_out->pac_stats.decay_dirty.npurge,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-	    &shard->stats->decay_dirty.npurge));
+	    &shard->pac.stats->decay_dirty.npurge));
 	locked_inc_u64_unsynchronized(
-	    &shard_stats_out->decay_dirty.nmadvise,
+	    &pa_shard_stats_out->pac_stats.decay_dirty.nmadvise,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-	    &shard->stats->decay_dirty.nmadvise));
+	    &shard->pac.stats->decay_dirty.nmadvise));
 	locked_inc_u64_unsynchronized(
-	    &shard_stats_out->decay_dirty.purged,
+	    &pa_shard_stats_out->pac_stats.decay_dirty.purged,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-	    &shard->stats->decay_dirty.purged));
+	    &shard->pac.stats->decay_dirty.purged));
 
 	/* Muzzy decay stats */
 	locked_inc_u64_unsynchronized(
-	    &shard_stats_out->decay_muzzy.npurge,
+	    &pa_shard_stats_out->pac_stats.decay_muzzy.npurge,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-	    &shard->stats->decay_muzzy.npurge));
+	    &shard->pac.stats->decay_muzzy.npurge));
 	locked_inc_u64_unsynchronized(
-	    &shard_stats_out->decay_muzzy.nmadvise,
+	    &pa_shard_stats_out->pac_stats.decay_muzzy.nmadvise,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-	    &shard->stats->decay_muzzy.nmadvise));
+	    &shard->pac.stats->decay_muzzy.nmadvise));
 	locked_inc_u64_unsynchronized(
-	    &shard_stats_out->decay_muzzy.purged,
+	    &pa_shard_stats_out->pac_stats.decay_muzzy.purged,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-	    &shard->stats->decay_muzzy.purged));
+	    &shard->pac.stats->decay_muzzy.purged));
 
-	atomic_load_add_store_zu(&shard_stats_out->abandoned_vm,
-	    atomic_load_zu(&shard->stats->abandoned_vm, ATOMIC_RELAXED));
+	atomic_load_add_store_zu(&pa_shard_stats_out->pac_stats.abandoned_vm,
+	    atomic_load_zu(&shard->pac.stats->abandoned_vm, ATOMIC_RELAXED));
 
 	for (pszind_t i = 0; i < SC_NPSIZES; i++) {
 		size_t dirty, muzzy, retained, dirty_bytes, muzzy_bytes,
@@ -119,12 +119,12 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
 		retained_bytes = ecache_nbytes_get(&shard->pac.ecache_retained,
 		    i);
 
-		extent_stats_out[i].ndirty = dirty;
-		extent_stats_out[i].nmuzzy = muzzy;
-		extent_stats_out[i].nretained = retained;
-		extent_stats_out[i].dirty_bytes = dirty_bytes;
-		extent_stats_out[i].muzzy_bytes = muzzy_bytes;
-		extent_stats_out[i].retained_bytes = retained_bytes;
+		estats_out[i].ndirty = dirty;
+		estats_out[i].nmuzzy = muzzy;
+		estats_out[i].nretained = retained;
+		estats_out[i].dirty_bytes = dirty_bytes;
+		estats_out[i].muzzy_bytes = muzzy_bytes;
+		estats_out[i].retained_bytes = retained_bytes;
 	}
 }
 
diff --git a/src/pac.c b/src/pac.c
index 1e20d652..9192f54c 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -6,7 +6,7 @@
 bool
 pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind, emap_t *emap,
     edata_cache_t *edata_cache, nstime_t *cur_time, ssize_t dirty_decay_ms,
-    ssize_t muzzy_decay_ms) {
+    ssize_t muzzy_decay_ms, pac_stats_t *pac_stats, malloc_mutex_t *stats_mtx) {
 	/*
 	 * Delay coalescing for dirty extents despite the disruptive effect on
 	 * memory layout for best-fit extent allocation, since cached extents
@@ -47,6 +47,8 @@ pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind, emap_t *emap,
 
 	pac->emap = emap;
 	pac->edata_cache = edata_cache;
+	pac->stats = pac_stats;
+	pac->stats_mtx = stats_mtx;
 	return false;
 }
 
diff --git a/test/unit/pa.c b/test/unit/pa.c
index 7cd9fa17..17889b53 100644
--- a/test/unit/pa.c
+++ b/test/unit/pa.c
@@ -90,7 +90,7 @@ do_alloc_free_purge(void *arg) {
 		    &test_data->shard.pac.decay_dirty.mtx);
 		pa_decay_all(TSDN_NULL, &test_data->shard,
 		    &test_data->shard.pac.decay_dirty,
-		    &test_data->stats.decay_dirty,
+		    &test_data->shard.pac.stats->decay_dirty,
 		    &test_data->shard.pac.ecache_dirty, true);
 		malloc_mutex_unlock(TSDN_NULL,
 		    &test_data->shard.pac.decay_dirty.mtx);

From dee5d1c42de6e0908e1ee8e3c4c89cffcbee72ff Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 1 Jun 2020 18:01:19 -0700
Subject: [PATCH 1813/2608] PA->PAC: Move in extent_sn.

---
 include/jemalloc/internal/extent.h | 2 +-
 include/jemalloc/internal/pa.h     | 5 -----
 include/jemalloc/internal/pac.h    | 3 +++
 src/extent.c                       | 9 +++++++--
 src/extent_dss.c                   | 7 ++++---
 src/pa.c                           | 6 ------
 src/pac.c                          | 1 +
 7 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 2f14b81f..2eb53f66 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -48,7 +48,7 @@ edata_t *extent_split_wrapper(tsdn_t *tsdn, pa_shard_t *shard,
     ehooks_t *ehooks, edata_t *edata, size_t size_a, size_t size_b);
 bool extent_merge_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
     edata_t *a, edata_t *b);
-
+size_t extent_sn_next(pac_t *pac);
 bool extent_boot(void);
 
 #endif /* JEMALLOC_INTERNAL_EXTENT_H */
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 2891d7c4..e6ed1fd9 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -84,9 +84,6 @@ struct pa_shard_s {
 	/* The source of edata_t objects. */
 	edata_cache_t edata_cache;
 
-	/* Extent serial number generator state. */
-	atomic_zu_t extent_sn_next;
-
 	malloc_mutex_t *stats_mtx;
 	pa_shard_stats_t *stats;
 
@@ -131,8 +128,6 @@ void pa_shard_reset(pa_shard_t *shard);
  */
 void pa_shard_destroy_retained(tsdn_t *tsdn, pa_shard_t *shard);
 
-size_t pa_shard_extent_sn_next(pa_shard_t *shard);
-
 /* Gets an edata for the given allocation. */
 edata_t *pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size,
     size_t alignment, bool slab, szind_t szind, bool zero);
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index 14ee09f3..d1d68534 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -90,6 +90,9 @@ struct pac_s {
 
 	malloc_mutex_t *stats_mtx;
 	pac_stats_t *stats;
+
+	/* Extent serial number generator state. */
+	atomic_zu_t extent_sn_next;
 };
 
 bool pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind, emap_t *emap,
diff --git a/src/extent.c b/src/extent.c
index fb6ccebd..bb5daba1 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -50,6 +50,11 @@ static edata_t *extent_alloc_retained(tsdn_t *tsdn, pa_shard_t *shard,
 
 /******************************************************************************/
 
+size_t
+extent_sn_next(pac_t *pac) {
+	return atomic_fetch_add_zu(&pac->extent_sn_next, 1, ATOMIC_RELAXED);
+}
+
 static bool
 extent_try_delayed_coalesce(tsdn_t *tsdn, pa_shard_t *shard,
     ehooks_t *ehooks, ecache_t *ecache, edata_t *edata) {
@@ -648,7 +653,7 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	}
 
 	edata_init(edata, ecache_ind_get(&shard->pac.ecache_retained), ptr,
-	    alloc_size, false, SC_NSIZES, pa_shard_extent_sn_next(shard),
+	    alloc_size, false, SC_NSIZES, extent_sn_next(&shard->pac),
 	    extent_state_active, zeroed, committed, /* ranged */ false,
 	    EXTENT_IS_HEAD);
 
@@ -793,7 +798,7 @@ extent_alloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		return NULL;
 	}
 	edata_init(edata, ecache_ind_get(&shard->pac.ecache_dirty), addr,
-	    size, /* slab */ false, SC_NSIZES, pa_shard_extent_sn_next(shard),
+	    size, /* slab */ false, SC_NSIZES, extent_sn_next(&shard->pac),
 	    extent_state_active, zero, *commit, /* ranged */ false,
 	    EXTENT_NOT_HEAD);
 	if (extent_register(tsdn, shard, edata)) {
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 81161b3a..dff231d9 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -154,9 +154,10 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			if (gap_size_page != 0) {
 				edata_init(gap, arena_ind_get(arena),
 				    gap_addr_page, gap_size_page, false,
-				    SC_NSIZES, pa_shard_extent_sn_next(
-					&arena->pa_shard), extent_state_active,
-				    false, true, false, EXTENT_NOT_HEAD);
+				    SC_NSIZES, extent_sn_next(
+					&arena->pa_shard.pac),
+				    extent_state_active, false, true, false,
+				    EXTENT_NOT_HEAD);
 			}
 			/*
 			 * Compute the address just past the end of the desired
diff --git a/src/pa.c b/src/pa.c
index 3ca8e35d..2f970c76 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -35,7 +35,6 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 		return true;
 	}
 
-	atomic_store_zu(&shard->extent_sn_next, 0, ATOMIC_RELAXED);
 	atomic_store_zu(&shard->nactive, 0, ATOMIC_RELAXED);
 
 	shard->stats_mtx = stats_mtx;
@@ -79,11 +78,6 @@ pa_shard_destroy_retained(tsdn_t *tsdn, pa_shard_t *shard) {
 	}
 }
 
-size_t
-pa_shard_extent_sn_next(pa_shard_t *shard) {
-	return atomic_fetch_add_zu(&shard->extent_sn_next, 1, ATOMIC_RELAXED);
-}
-
 static bool
 pa_shard_may_have_muzzy(pa_shard_t *shard) {
 	return pac_muzzy_decay_ms_get(&shard->pac) != 0;
diff --git a/src/pac.c b/src/pac.c
index 9192f54c..8ff6f1c8 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -49,6 +49,7 @@ pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind, emap_t *emap,
 	pac->edata_cache = edata_cache;
 	pac->stats = pac_stats;
 	pac->stats_mtx = stats_mtx;
+	atomic_store_zu(&pac->extent_sn_next, 0, ATOMIC_RELAXED);
 	return false;
 }
 

From 72435b0aba3e121d598be10e865f43d9491c71e2 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 1 Jun 2020 18:49:42 -0700
Subject: [PATCH 1814/2608] PA->PAC: Make extent.c forget about PA.

---
 include/jemalloc/internal/extent.h |  20 +-
 include/jemalloc/internal/pa.h     |   6 -
 src/extent.c                       | 366 ++++++++++++++---------------
 src/extent_dss.c                   |   2 +-
 src/pa.c                           |  34 +--
 5 files changed, 211 insertions(+), 217 deletions(-)

diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 2eb53f66..f6207362 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -19,22 +19,22 @@
 #define LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT 6
 extern size_t opt_lg_extent_max_active_fit;
 
-edata_t *ecache_alloc(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+edata_t *ecache_alloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool zero);
-edata_t *ecache_alloc_grow(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+edata_t *ecache_alloc_grow(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool zero);
-void ecache_dalloc(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+void ecache_dalloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata);
-edata_t *ecache_evict(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+edata_t *ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, size_t npages_min);
 
-edata_t *extent_alloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+edata_t *extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t alignment, bool zero, bool *commit);
-void extent_dalloc_gap(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+void extent_dalloc_gap(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata);
-void extent_dalloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+void extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata);
-void extent_destroy_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+void extent_destroy_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata);
 bool extent_commit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length);
@@ -44,9 +44,9 @@ bool extent_purge_lazy_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length);
 bool extent_purge_forced_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length);
-edata_t *extent_split_wrapper(tsdn_t *tsdn, pa_shard_t *shard,
+edata_t *extent_split_wrapper(tsdn_t *tsdn, pac_t *pac,
     ehooks_t *ehooks, edata_t *edata, size_t size_a, size_t size_b);
-bool extent_merge_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+bool extent_merge_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *a, edata_t *b);
 size_t extent_sn_next(pac_t *pac);
 bool extent_boot(void);
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index e6ed1fd9..9482380a 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -100,12 +100,6 @@ pa_shard_dont_decay_muzzy(pa_shard_t *shard) {
 	    pac_muzzy_decay_ms_get(&shard->pac) <= 0;
 }
 
-static inline bool
-pa_shard_may_force_decay(pa_shard_t *shard) {
-	return !(pac_dirty_decay_ms_get(&shard->pac) == -1
-	    || pac_muzzy_decay_ms_get(&shard->pac) == -1);
-}
-
 static inline ehooks_t *
 pa_shard_ehooks_get(pa_shard_t *shard) {
 	return base_ehooks_get(shard->base);
diff --git a/src/extent.c b/src/extent.c
index bb5daba1..87d6a9a2 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -19,10 +19,9 @@ static bool extent_purge_lazy_impl(tsdn_t *tsdn, ehooks_t *ehooks,
     edata_t *edata, size_t offset, size_t length, bool growing_retained);
 static bool extent_purge_forced_impl(tsdn_t *tsdn, ehooks_t *ehooks,
     edata_t *edata, size_t offset, size_t length, bool growing_retained);
-static edata_t *extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard,
-    ehooks_t *ehooks, edata_t *edata, size_t size_a, size_t size_b,
-    bool growing_retained);
-static bool extent_merge_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+static edata_t *extent_split_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
+    edata_t *edata, size_t size_a, size_t size_b, bool growing_retained);
+static bool extent_merge_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *a, edata_t *b, bool growing_retained);
 
 /* Used exclusively for gdump triggering. */
@@ -35,16 +34,15 @@ static atomic_zu_t highpages;
  * definition.
  */
 
-static void extent_deregister(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata);
-static edata_t *extent_recycle(tsdn_t *tsdn, pa_shard_t *shard,
-    ehooks_t *ehooks, ecache_t *ecache, void *new_addr, size_t usize,
-    size_t alignment, bool zero, bool *commit, bool growing_retained);
-static edata_t *extent_try_coalesce(tsdn_t *tsdn, pa_shard_t *shard,
-    ehooks_t *ehooks, ecache_t *ecache, edata_t *edata, bool *coalesced,
-    bool growing_retained);
-static void extent_record(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+static void extent_deregister(tsdn_t *tsdn, pac_t *pac, edata_t *edata);
+static edata_t *extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
+    ecache_t *ecache, void *new_addr, size_t usize, size_t alignment, bool zero,
+    bool *commit, bool growing_retained);
+static edata_t *extent_try_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
+    ecache_t *ecache, edata_t *edata, bool *coalesced, bool growing_retained);
+static void extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool growing_retained);
-static edata_t *extent_alloc_retained(tsdn_t *tsdn, pa_shard_t *shard,
+static edata_t *extent_alloc_retained(tsdn_t *tsdn, pac_t *pac,
     ehooks_t *ehooks, void *new_addr, size_t size, size_t alignment, bool zero,
     bool *commit);
 
@@ -55,12 +53,18 @@ extent_sn_next(pac_t *pac) {
 	return atomic_fetch_add_zu(&pac->extent_sn_next, 1, ATOMIC_RELAXED);
 }
 
+static inline bool
+extent_may_force_decay(pac_t *pac) {
+	return !(pac_dirty_decay_ms_get(pac) == -1
+	    || pac_muzzy_decay_ms_get(pac) == -1);
+}
+
 static bool
-extent_try_delayed_coalesce(tsdn_t *tsdn, pa_shard_t *shard,
-    ehooks_t *ehooks, ecache_t *ecache, edata_t *edata) {
+extent_try_delayed_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
+    ecache_t *ecache, edata_t *edata) {
 	edata_state_set(edata, extent_state_active);
 	bool coalesced;
-	edata = extent_try_coalesce(tsdn, shard, ehooks, ecache,
+	edata = extent_try_coalesce(tsdn, pac, ehooks, ecache,
 	    edata, &coalesced, false);
 	edata_state_set(edata, ecache->state);
 
@@ -72,32 +76,30 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, pa_shard_t *shard,
 }
 
 edata_t *
-ecache_alloc(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t alignment,
-    bool zero) {
+ecache_alloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
+    void *new_addr, size_t size, size_t alignment, bool zero) {
 	assert(size != 0);
 	assert(alignment != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
 	bool commit = true;
-	edata_t *edata = extent_recycle(tsdn, shard, ehooks, ecache,
-	    new_addr, size, alignment, zero, &commit, false);
+	edata_t *edata = extent_recycle(tsdn, pac, ehooks, ecache, new_addr,
+	    size, alignment, zero, &commit, false);
 	assert(edata == NULL || !edata_ranged_get(edata));
 	return edata;
 }
 
 edata_t *
-ecache_alloc_grow(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t alignment,
-    bool zero) {
+ecache_alloc_grow(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
+    void *new_addr, size_t size, size_t alignment, bool zero) {
 	assert(size != 0);
 	assert(alignment != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
 	bool commit = true;
-	edata_t *edata = extent_alloc_retained(tsdn, shard, ehooks, new_addr,
+	edata_t *edata = extent_alloc_retained(tsdn, pac, ehooks, new_addr,
 	    size, alignment, zero, &commit);
 	if (edata == NULL) {
 		if (opt_retain && new_addr != NULL) {
@@ -109,7 +111,7 @@ ecache_alloc_grow(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 			 */
 			return NULL;
 		}
-		edata = extent_alloc_wrapper(tsdn, shard, ehooks, new_addr,
+		edata = extent_alloc_wrapper(tsdn, pac, ehooks, new_addr,
 		    size, alignment, zero, &commit);
 	}
 
@@ -118,8 +120,8 @@ ecache_alloc_grow(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 }
 
 void
-ecache_dalloc(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *edata) {
+ecache_dalloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
+    edata_t *edata) {
 	assert(edata_base_get(edata) != NULL);
 	assert(edata_size_get(edata) != 0);
 	assert(!edata_ranged_get(edata));
@@ -129,11 +131,11 @@ ecache_dalloc(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	edata_addr_set(edata, edata_base_get(edata));
 	edata_zeroed_set(edata, false);
 
-	extent_record(tsdn, shard, ehooks, ecache, edata, false);
+	extent_record(tsdn, pac, ehooks, ecache, edata, false);
 }
 
 edata_t *
-ecache_evict(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, size_t npages_min) {
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 
@@ -159,7 +161,7 @@ ecache_evict(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 			break;
 		}
 		/* Try to coalesce. */
-		if (extent_try_delayed_coalesce(tsdn, shard, ehooks, ecache,
+		if (extent_try_delayed_coalesce(tsdn, pac, ehooks, ecache,
 		    edata)) {
 			break;
 		}
@@ -181,7 +183,7 @@ ecache_evict(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		edata_state_set(edata, extent_state_active);
 		break;
 	case extent_state_retained:
-		extent_deregister(tsdn, shard, edata);
+		extent_deregister(tsdn, pac, edata);
 		break;
 	default:
 		not_reached();
@@ -197,11 +199,11 @@ label_return:
  * indicates OOM), e.g. when trying to split an existing extent.
  */
 static void
-extents_abandon_vm(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *edata, bool growing_retained) {
+extents_abandon_vm(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
+    edata_t *edata, bool growing_retained) {
 	size_t sz = edata_size_get(edata);
 	if (config_stats) {
-		atomic_fetch_add_zu(&shard->pac.stats->abandoned_vm, sz,
+		atomic_fetch_add_zu(&pac->stats->abandoned_vm, sz,
 		    ATOMIC_RELAXED);
 	}
 	/*
@@ -215,7 +217,7 @@ extents_abandon_vm(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 			    edata_size_get(edata), growing_retained);
 		}
 	}
-	edata_cache_put(tsdn, shard->pac.edata_cache, edata);
+	edata_cache_put(tsdn, pac->edata_cache, edata);
 }
 
 static void
@@ -281,21 +283,20 @@ extent_gdump_sub(tsdn_t *tsdn, const edata_t *edata) {
 }
 
 static bool
-extent_register_impl(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
-    bool gdump_add) {
+extent_register_impl(tsdn_t *tsdn, pac_t *pac, edata_t *edata, bool gdump_add) {
 	/*
 	 * We need to hold the lock to protect against a concurrent coalesce
 	 * operation that sees us in a partial state.
 	 */
-	emap_lock_edata(tsdn, shard->pac.emap, edata);
+	emap_lock_edata(tsdn, pac->emap, edata);
 
-	if (emap_register_boundary(tsdn, shard->pac.emap, edata, SC_NSIZES,
+	if (emap_register_boundary(tsdn, pac->emap, edata, SC_NSIZES,
 	    /* slab */ false)) {
-		emap_unlock_edata(tsdn, shard->pac.emap, edata);
+		emap_unlock_edata(tsdn, pac->emap, edata);
 		return true;
 	}
 
-	emap_unlock_edata(tsdn, shard->pac.emap, edata);
+	emap_unlock_edata(tsdn, pac->emap, edata);
 
 	if (config_prof && gdump_add) {
 		extent_gdump_add(tsdn, edata);
@@ -305,18 +306,18 @@ extent_register_impl(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
 }
 
 static bool
-extent_register(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata) {
-	return extent_register_impl(tsdn, shard, edata, true);
+extent_register(tsdn_t *tsdn, pac_t *pac, edata_t *edata) {
+	return extent_register_impl(tsdn, pac, edata, true);
 }
 
 static bool
-extent_register_no_gdump_add(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata) {
-	return extent_register_impl(tsdn, shard, edata, false);
+extent_register_no_gdump_add(tsdn_t *tsdn, pac_t *pac, edata_t *edata) {
+	return extent_register_impl(tsdn, pac, edata, false);
 }
 
 static void
-extent_reregister(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata) {
-	bool err = extent_register(tsdn, shard, edata);
+extent_reregister(tsdn_t *tsdn, pac_t *pac, edata_t *edata) {
+	bool err = extent_register(tsdn, pac, edata);
 	assert(!err);
 }
 
@@ -324,11 +325,11 @@ extent_reregister(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata) {
  * Removes all pointers to the given extent from the global rtree.
  */
 static void
-extent_deregister_impl(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
+extent_deregister_impl(tsdn_t *tsdn, pac_t *pac, edata_t *edata,
     bool gdump) {
-	emap_lock_edata(tsdn, shard->pac.emap, edata);
-	emap_deregister_boundary(tsdn, shard->pac.emap, edata);
-	emap_unlock_edata(tsdn, shard->pac.emap, edata);
+	emap_lock_edata(tsdn, pac->emap, edata);
+	emap_deregister_boundary(tsdn, pac->emap, edata);
+	emap_unlock_edata(tsdn, pac->emap, edata);
 
 	if (config_prof && gdump) {
 		extent_gdump_sub(tsdn, edata);
@@ -336,14 +337,14 @@ extent_deregister_impl(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
 }
 
 static void
-extent_deregister(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata) {
-	extent_deregister_impl(tsdn, shard, edata, true);
+extent_deregister(tsdn_t *tsdn, pac_t *pac, edata_t *edata) {
+	extent_deregister_impl(tsdn, pac, edata, true);
 }
 
 static void
-extent_deregister_no_gdump_sub(tsdn_t *tsdn, pa_shard_t *shard,
+extent_deregister_no_gdump_sub(tsdn_t *tsdn, pac_t *pac,
     edata_t *edata) {
-	extent_deregister_impl(tsdn, shard, edata, false);
+	extent_deregister_impl(tsdn, pac, edata, false);
 }
 
 /*
@@ -351,7 +352,7 @@ extent_deregister_no_gdump_sub(tsdn_t *tsdn, pa_shard_t *shard,
  * given allocation request.
  */
 static edata_t *
-extent_recycle_extract(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t alignment,
     bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -376,8 +377,8 @@ extent_recycle_extract(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 	edata_t *edata;
 	if (new_addr != NULL) {
-		edata = emap_lock_edata_from_addr(tsdn, shard->pac.emap,
-		    new_addr, false);
+		edata = emap_lock_edata_from_addr(tsdn, pac->emap, new_addr,
+		    false);
 		if (edata != NULL) {
 			/*
 			 * We might null-out edata to report an error, but we
@@ -391,7 +392,7 @@ extent_recycle_extract(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 			    != ecache->state) {
 				edata = NULL;
 			}
-			emap_unlock_edata(tsdn, shard->pac.emap, unlock_edata);
+			emap_unlock_edata(tsdn, pac->emap, unlock_edata);
 		}
 	} else {
 		/*
@@ -451,7 +452,7 @@ typedef enum {
 } extent_split_interior_result_t;
 
 static extent_split_interior_result_t
-extent_split_interior(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+extent_split_interior(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     /* The result of splitting, in case of success. */
     edata_t **edata, edata_t **lead, edata_t **trail,
     /* The mess to clean up, in case of error. */
@@ -473,7 +474,7 @@ extent_split_interior(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	/* Split the lead. */
 	if (leadsize != 0) {
 		*lead = *edata;
-		*edata = extent_split_impl(tsdn, shard, ehooks, *lead, leadsize,
+		*edata = extent_split_impl(tsdn, pac, ehooks, *lead, leadsize,
 		    size + trailsize, growing_retained);
 		if (*edata == NULL) {
 			*to_leak = *lead;
@@ -484,7 +485,7 @@ extent_split_interior(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 
 	/* Split the trail. */
 	if (trailsize != 0) {
-		*trail = extent_split_impl(tsdn, shard, ehooks, *edata, size,
+		*trail = extent_split_impl(tsdn, pac, ehooks, *edata, size,
 		    trailsize, growing_retained);
 		if (*trail == NULL) {
 			*to_leak = *edata;
@@ -505,7 +506,7 @@ extent_split_interior(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
  * and put back into ecache.
  */
 static edata_t *
-extent_recycle_split(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+extent_recycle_split(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, void *new_addr, size_t size, size_t alignment,
     edata_t *edata, bool growing_retained) {
 	edata_t *lead;
@@ -514,7 +515,7 @@ extent_recycle_split(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	edata_t *to_salvage JEMALLOC_CC_SILENCE_INIT(NULL);
 
 	extent_split_interior_result_t result = extent_split_interior(
-	    tsdn, shard, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage,
+	    tsdn, pac, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage,
 	    new_addr, size, alignment, growing_retained);
 
 	if (!maps_coalesce && result != extent_split_interior_ok
@@ -543,14 +544,14 @@ extent_recycle_split(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		 */
 		assert(result == extent_split_interior_error);
 		if (to_salvage != NULL) {
-			extent_deregister(tsdn, shard, to_salvage);
+			extent_deregister(tsdn, pac, to_salvage);
 		}
 		if (to_leak != NULL) {
 			void *leak = edata_base_get(to_leak);
-			extent_deregister_no_gdump_sub(tsdn, shard, to_leak);
-			extents_abandon_vm(tsdn, shard, ehooks, ecache, to_leak,
+			extent_deregister_no_gdump_sub(tsdn, pac, to_leak);
+			extents_abandon_vm(tsdn, pac, ehooks, ecache, to_leak,
 			    growing_retained);
-			assert(emap_lock_edata_from_addr(tsdn, shard->pac.emap,
+			assert(emap_lock_edata_from_addr(tsdn, pac->emap,
 			    leak, false) == NULL);
 		}
 		return NULL;
@@ -563,18 +564,18 @@ extent_recycle_split(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
  * in the given ecache_t.
  */
 static edata_t *
-extent_recycle(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool zero,
-    bool *commit, bool growing_retained) {
+extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
+    void *new_addr, size_t size, size_t alignment, bool zero, bool *commit,
+    bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-	edata_t *edata = extent_recycle_extract(tsdn, shard, ehooks, ecache,
+	edata_t *edata = extent_recycle_extract(tsdn, pac, ehooks, ecache,
 	    new_addr, size, alignment, growing_retained);
 	if (edata == NULL) {
 		return NULL;
 	}
 
-	edata = extent_recycle_split(tsdn, shard, ehooks, ecache, new_addr,
+	edata = extent_recycle_split(tsdn, pac, ehooks, ecache, new_addr,
 	    size, alignment, edata, growing_retained);
 	if (edata == NULL) {
 		return NULL;
@@ -583,7 +584,7 @@ extent_recycle(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	if (*commit && !edata_committed_get(edata)) {
 		if (extent_commit_impl(tsdn, ehooks, edata, 0,
 		    edata_size_get(edata), growing_retained)) {
-			extent_record(tsdn, shard, ehooks, ecache, edata,
+			extent_record(tsdn, pac, ehooks, ecache, edata,
 			    growing_retained);
 			return NULL;
 		}
@@ -611,9 +612,9 @@ extent_recycle(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
  * virtual memory ranges retained by each shard.
  */
 static edata_t *
-extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     size_t size, size_t alignment, bool zero, bool *commit) {
-	malloc_mutex_assert_owner(tsdn, &shard->pac.ecache_grow.mtx);
+	malloc_mutex_assert_owner(tsdn, &pac->ecache_grow.mtx);
 
 	size_t alloc_size_min = size + PAGE_CEILING(alignment) - PAGE;
 	/* Beware size_t wrap-around. */
@@ -625,19 +626,18 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	 * satisfy this request.
 	 */
 	pszind_t egn_skip = 0;
-	size_t alloc_size = sz_pind2sz(shard->pac.ecache_grow.next + egn_skip);
+	size_t alloc_size = sz_pind2sz(pac->ecache_grow.next + egn_skip);
 	while (alloc_size < alloc_size_min) {
 		egn_skip++;
-		if (shard->pac.ecache_grow.next + egn_skip >=
+		if (pac->ecache_grow.next + egn_skip >=
 		    sz_psz2ind(SC_LARGE_MAXCLASS)) {
 			/* Outside legal range. */
 			goto label_err;
 		}
-		alloc_size = sz_pind2sz(
-		    shard->pac.ecache_grow.next + egn_skip);
+		alloc_size = sz_pind2sz(pac->ecache_grow.next + egn_skip);
 	}
 
-	edata_t *edata = edata_cache_get(tsdn, shard->pac.edata_cache);
+	edata_t *edata = edata_cache_get(tsdn, pac->edata_cache);
 	if (edata == NULL) {
 		goto label_err;
 	}
@@ -648,17 +648,17 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	    &committed);
 
 	if (ptr == NULL) {
-		edata_cache_put(tsdn, shard->pac.edata_cache, edata);
+		edata_cache_put(tsdn, pac->edata_cache, edata);
 		goto label_err;
 	}
 
-	edata_init(edata, ecache_ind_get(&shard->pac.ecache_retained), ptr,
-	    alloc_size, false, SC_NSIZES, extent_sn_next(&shard->pac),
+	edata_init(edata, ecache_ind_get(&pac->ecache_retained), ptr,
+	    alloc_size, false, SC_NSIZES, extent_sn_next(pac),
 	    extent_state_active, zeroed, committed, /* ranged */ false,
 	    EXTENT_IS_HEAD);
 
-	if (extent_register_no_gdump_add(tsdn, shard, edata)) {
-		edata_cache_put(tsdn, shard->pac.edata_cache, edata);
+	if (extent_register_no_gdump_add(tsdn, pac, edata)) {
+		edata_cache_put(tsdn, pac->edata_cache, edata);
 		goto label_err;
 	}
 
@@ -672,17 +672,17 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	edata_t *to_salvage JEMALLOC_CC_SILENCE_INIT(NULL);
 
 	extent_split_interior_result_t result = extent_split_interior(tsdn,
-	    shard, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage, NULL,
+	    pac, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage, NULL,
 	    size, alignment, /* growing_retained */ true);
 
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
-			extent_record(tsdn, shard, ehooks,
-			    &shard->pac.ecache_retained, lead, true);
+			extent_record(tsdn, pac, ehooks, &pac->ecache_retained,
+			    lead, true);
 		}
 		if (trail != NULL) {
-			extent_record(tsdn, shard, ehooks,
-			    &shard->pac.ecache_retained, trail, true);
+			extent_record(tsdn, pac, ehooks,
+			    &pac->ecache_retained, trail, true);
 		}
 	} else {
 		/*
@@ -694,13 +694,13 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 			if (config_prof) {
 				extent_gdump_add(tsdn, to_salvage);
 			}
-			extent_record(tsdn, shard, ehooks,
-			    &shard->pac.ecache_retained, to_salvage, true);
+			extent_record(tsdn, pac, ehooks, &pac->ecache_retained,
+			    to_salvage, true);
 		}
 		if (to_leak != NULL) {
-			extent_deregister_no_gdump_sub(tsdn, shard, to_leak);
-			extents_abandon_vm(tsdn, shard, ehooks,
-			    &shard->pac.ecache_retained, to_leak, true);
+			extent_deregister_no_gdump_sub(tsdn, pac, to_leak);
+			extents_abandon_vm(tsdn, pac, ehooks,
+			    &pac->ecache_retained, to_leak, true);
 		}
 		goto label_err;
 	}
@@ -708,8 +708,8 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	if (*commit && !edata_committed_get(edata)) {
 		if (extent_commit_impl(tsdn, ehooks, edata, 0,
 		    edata_size_get(edata), true)) {
-			extent_record(tsdn, shard, ehooks,
-			    &shard->pac.ecache_retained, edata, true);
+			extent_record(tsdn, pac, ehooks,
+			    &pac->ecache_retained, edata, true);
 			goto label_err;
 		}
 		/* A successful commit should return zeroed memory. */
@@ -727,14 +727,13 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	 * Increment extent_grow_next if doing so wouldn't exceed the allowed
 	 * range.
 	 */
-	if (shard->pac.ecache_grow.next + egn_skip + 1 <=
-	    shard->pac.ecache_grow.limit) {
-		shard->pac.ecache_grow.next += egn_skip + 1;
+	if (pac->ecache_grow.next + egn_skip + 1 <= pac->ecache_grow.limit) {
+		pac->ecache_grow.next += egn_skip + 1;
 	} else {
-		shard->pac.ecache_grow.next = shard->pac.ecache_grow.limit;
+		pac->ecache_grow.next = pac->ecache_grow.limit;
 	}
 	/* All opportunities for failure are past. */
-	malloc_mutex_unlock(tsdn, &shard->pac.ecache_grow.mtx);
+	malloc_mutex_unlock(tsdn, &pac->ecache_grow.mtx);
 
 	if (config_prof) {
 		/* Adjust gdump stats now that extent is final size. */
@@ -748,45 +747,45 @@ extent_grow_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 
 	return edata;
 label_err:
-	malloc_mutex_unlock(tsdn, &shard->pac.ecache_grow.mtx);
+	malloc_mutex_unlock(tsdn, &pac->ecache_grow.mtx);
 	return NULL;
 }
 
 static edata_t *
-extent_alloc_retained(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+extent_alloc_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t alignment, bool zero, bool *commit) {
 	assert(size != 0);
 	assert(alignment != 0);
 
-	malloc_mutex_lock(tsdn, &shard->pac.ecache_grow.mtx);
+	malloc_mutex_lock(tsdn, &pac->ecache_grow.mtx);
 
-	edata_t *edata = extent_recycle(tsdn, shard, ehooks,
-	    &shard->pac.ecache_retained, new_addr, size, alignment, zero,
+	edata_t *edata = extent_recycle(tsdn, pac, ehooks,
+	    &pac->ecache_retained, new_addr, size, alignment, zero,
 	    commit, /* growing_retained */ true);
 	if (edata != NULL) {
-		malloc_mutex_unlock(tsdn, &shard->pac.ecache_grow.mtx);
+		malloc_mutex_unlock(tsdn, &pac->ecache_grow.mtx);
 		if (config_prof) {
 			extent_gdump_add(tsdn, edata);
 		}
 	} else if (opt_retain && new_addr == NULL) {
-		edata = extent_grow_retained(tsdn, shard, ehooks, size,
+		edata = extent_grow_retained(tsdn, pac, ehooks, size,
 		    alignment, zero, commit);
 		/* extent_grow_retained() always releases extent_grow_mtx. */
 	} else {
-		malloc_mutex_unlock(tsdn, &shard->pac.ecache_grow.mtx);
+		malloc_mutex_unlock(tsdn, &pac->ecache_grow.mtx);
 	}
-	malloc_mutex_assert_not_owner(tsdn, &shard->pac.ecache_grow.mtx);
+	malloc_mutex_assert_not_owner(tsdn, &pac->ecache_grow.mtx);
 
 	return edata;
 }
 
 edata_t *
-extent_alloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t alignment, bool zero, bool *commit) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	edata_t *edata = edata_cache_get(tsdn, shard->pac.edata_cache);
+	edata_t *edata = edata_cache_get(tsdn, pac->edata_cache);
 	if (edata == NULL) {
 		return NULL;
 	}
@@ -794,15 +793,15 @@ extent_alloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	void *addr = ehooks_alloc(tsdn, ehooks, new_addr, size, palignment,
 	    &zero, commit);
 	if (addr == NULL) {
-		edata_cache_put(tsdn, shard->pac.edata_cache, edata);
+		edata_cache_put(tsdn, pac->edata_cache, edata);
 		return NULL;
 	}
-	edata_init(edata, ecache_ind_get(&shard->pac.ecache_dirty), addr,
-	    size, /* slab */ false, SC_NSIZES, extent_sn_next(&shard->pac),
+	edata_init(edata, ecache_ind_get(&pac->ecache_dirty), addr,
+	    size, /* slab */ false, SC_NSIZES, extent_sn_next(pac),
 	    extent_state_active, zero, *commit, /* ranged */ false,
 	    EXTENT_NOT_HEAD);
-	if (extent_register(tsdn, shard, edata)) {
-		edata_cache_put(tsdn, shard->pac.edata_cache, edata);
+	if (extent_register(tsdn, pac, edata)) {
+		edata_cache_put(tsdn, pac->edata_cache, edata);
 		return NULL;
 	}
 
@@ -831,15 +830,14 @@ extent_can_coalesce(ecache_t *ecache, const edata_t *inner,
 }
 
 static bool
-extent_coalesce(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *inner, edata_t *outer, bool forward,
-    bool growing_retained) {
+extent_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
+    edata_t *inner, edata_t *outer, bool forward, bool growing_retained) {
 	assert(extent_can_coalesce(ecache, inner, outer));
 
 	extent_activate_locked(tsdn, ecache, outer);
 
 	malloc_mutex_unlock(tsdn, &ecache->mtx);
-	bool err = extent_merge_impl(tsdn, shard, ehooks,
+	bool err = extent_merge_impl(tsdn, pac, ehooks,
 	    forward ? inner : outer, forward ? outer : inner, growing_retained);
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 
@@ -851,7 +849,7 @@ extent_coalesce(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 }
 
 static edata_t *
-extent_try_coalesce_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool *coalesced, bool growing_retained,
     bool inactive_only) {
 	/*
@@ -868,7 +866,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		again = false;
 
 		/* Try to coalesce forward. */
-		edata_t *next = emap_lock_edata_from_addr(tsdn, shard->pac.emap,
+		edata_t *next = emap_lock_edata_from_addr(tsdn, pac->emap,
 		    edata_past_get(edata), inactive_only);
 		if (next != NULL) {
 			/*
@@ -879,9 +877,9 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 			bool can_coalesce = extent_can_coalesce(ecache,
 			    edata, next);
 
-			emap_unlock_edata(tsdn, shard->pac.emap, next);
+			emap_unlock_edata(tsdn, pac->emap, next);
 
-			if (can_coalesce && !extent_coalesce(tsdn, shard,
+			if (can_coalesce && !extent_coalesce(tsdn, pac,
 			    ehooks, ecache, edata, next, true,
 			    growing_retained)) {
 				if (ecache->delay_coalesce) {
@@ -894,14 +892,14 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		}
 
 		/* Try to coalesce backward. */
-		edata_t *prev = emap_lock_edata_from_addr(tsdn, shard->pac.emap,
+		edata_t *prev = emap_lock_edata_from_addr(tsdn, pac->emap,
 		    edata_before_get(edata), inactive_only);
 		if (prev != NULL) {
 			bool can_coalesce = extent_can_coalesce(ecache, edata,
 			    prev);
-			emap_unlock_edata(tsdn, shard->pac.emap, prev);
+			emap_unlock_edata(tsdn, pac->emap, prev);
 
-			if (can_coalesce && !extent_coalesce(tsdn, shard,
+			if (can_coalesce && !extent_coalesce(tsdn, pac,
 			    ehooks, ecache, edata, prev, false,
 			    growing_retained)) {
 				edata = prev;
@@ -922,37 +920,37 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 }
 
 static edata_t *
-extent_try_coalesce(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+extent_try_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool *coalesced, bool growing_retained) {
-	return extent_try_coalesce_impl(tsdn, shard, ehooks, ecache, edata,
+	return extent_try_coalesce_impl(tsdn, pac, ehooks, ecache, edata,
 	    coalesced, growing_retained, false);
 }
 
 static edata_t *
-extent_try_coalesce_large(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+extent_try_coalesce_large(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool *coalesced, bool growing_retained) {
-	return extent_try_coalesce_impl(tsdn, shard, ehooks, ecache, edata,
+	return extent_try_coalesce_impl(tsdn, pac, ehooks, ecache, edata,
 	    coalesced, growing_retained, true);
 }
 
 /* Purge a single extent to retained / unmapped directly. */
 static void
-extent_maximally_purge(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+extent_maximally_purge(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata) {
 	size_t extent_size = edata_size_get(edata);
-	extent_dalloc_wrapper(tsdn, shard, ehooks, edata);
+	extent_dalloc_wrapper(tsdn, pac, ehooks, edata);
 	if (config_stats) {
 		/* Update stats accordingly. */
-		LOCKEDINT_MTX_LOCK(tsdn, *shard->pac.stats_mtx);
+		LOCKEDINT_MTX_LOCK(tsdn, *pac->stats_mtx);
 		locked_inc_u64(tsdn,
-		    LOCKEDINT_MTX(*shard->pac.stats_mtx),
-		    &shard->pac.stats->decay_dirty.nmadvise, 1);
+		    LOCKEDINT_MTX(*pac->stats_mtx),
+		    &pac->stats->decay_dirty.nmadvise, 1);
 		locked_inc_u64(tsdn,
-		    LOCKEDINT_MTX(*shard->pac.stats_mtx),
-		    &shard->pac.stats->decay_dirty.purged,
+		    LOCKEDINT_MTX(*pac->stats_mtx),
+		    &pac->stats->decay_dirty.purged,
 		    extent_size >> LG_PAGE);
-		LOCKEDINT_MTX_UNLOCK(tsdn, *shard->pac.stats_mtx);
-		atomic_fetch_sub_zu(&shard->pac.stats->pac_mapped, extent_size,
+		LOCKEDINT_MTX_UNLOCK(tsdn, *pac->stats_mtx);
+		atomic_fetch_sub_zu(&pac->stats->pac_mapped, extent_size,
 		    ATOMIC_RELAXED);
 	}
 }
@@ -962,7 +960,7 @@ extent_maximally_purge(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
  * given ecache_t (coalesces and inserts into the eset).
  */
 static void
-extent_record(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool growing_retained) {
 	assert((ecache->state != extent_state_dirty &&
 	    ecache->state != extent_state_muzzy) ||
@@ -970,25 +968,25 @@ extent_record(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 
-	emap_assert_mapped(tsdn, shard->pac.emap, edata);
+	emap_assert_mapped(tsdn, pac->emap, edata);
 
 	if (!ecache->delay_coalesce) {
-		edata = extent_try_coalesce(tsdn, shard,  ehooks, ecache, edata,
+		edata = extent_try_coalesce(tsdn, pac,  ehooks, ecache, edata,
 		    NULL, growing_retained);
 	} else if (edata_size_get(edata) >= SC_LARGE_MINCLASS) {
-		assert(ecache == &shard->pac.ecache_dirty);
+		assert(ecache == &pac->ecache_dirty);
 		/* Always coalesce large extents eagerly. */
 		bool coalesced;
 		do {
 			assert(edata_state_get(edata) == extent_state_active);
-			edata = extent_try_coalesce_large(tsdn, shard, ehooks,
+			edata = extent_try_coalesce_large(tsdn, pac, ehooks,
 			    ecache, edata, &coalesced, growing_retained);
 		} while (coalesced);
 		if (edata_size_get(edata) >= oversize_threshold &&
-		    pa_shard_may_force_decay(shard)) {
+		    extent_may_force_decay(pac)) {
 			/* Shortcut to purge the oversize extent eagerly. */
 			malloc_mutex_unlock(tsdn, &ecache->mtx);
-			extent_maximally_purge(tsdn, shard, ehooks, edata);
+			extent_maximally_purge(tsdn, pac, ehooks, edata);
 			return;
 		}
 	}
@@ -998,20 +996,20 @@ extent_record(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 }
 
 void
-extent_dalloc_gap(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+extent_dalloc_gap(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	if (extent_register(tsdn, shard, edata)) {
-		edata_cache_put(tsdn, shard->pac.edata_cache, edata);
+	if (extent_register(tsdn, pac, edata)) {
+		edata_cache_put(tsdn, pac->edata_cache, edata);
 		return;
 	}
-	extent_dalloc_wrapper(tsdn, shard, ehooks, edata);
+	extent_dalloc_wrapper(tsdn, pac, ehooks, edata);
 }
 
 static bool
-extent_dalloc_wrapper_try(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+extent_dalloc_wrapper_try(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata) {
 	bool err;
 
@@ -1027,14 +1025,14 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	    edata_size_get(edata), edata_committed_get(edata));
 
 	if (!err) {
-		edata_cache_put(tsdn, shard->pac.edata_cache, edata);
+		edata_cache_put(tsdn, pac->edata_cache, edata);
 	}
 
 	return err;
 }
 
 void
-extent_dalloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata) {
 	assert(!edata_ranged_get(edata));
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -1046,11 +1044,11 @@ extent_dalloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		 * Deregister first to avoid a race with other allocating
 		 * threads, and reregister if deallocation fails.
 		 */
-		extent_deregister(tsdn, shard, edata);
-		if (!extent_dalloc_wrapper_try(tsdn, shard, ehooks, edata)) {
+		extent_deregister(tsdn, pac, edata);
+		if (!extent_dalloc_wrapper_try(tsdn, pac, ehooks, edata)) {
 			return;
 		}
-		extent_reregister(tsdn, shard, edata);
+		extent_reregister(tsdn, pac, edata);
 	}
 
 	/* Try to decommit; purge if that fails. */
@@ -1076,12 +1074,12 @@ extent_dalloc_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		extent_gdump_sub(tsdn, edata);
 	}
 
-	extent_record(tsdn, shard, ehooks, &shard->pac.ecache_retained, edata,
+	extent_record(tsdn, pac, ehooks, &pac->ecache_retained, edata,
 	    false);
 }
 
 void
-extent_destroy_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+extent_destroy_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata) {
 	assert(edata_base_get(edata) != NULL);
 	assert(edata_size_get(edata) != 0);
@@ -1089,7 +1087,7 @@ extent_destroy_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	    WITNESS_RANK_CORE, 0);
 
 	/* Deregister first to avoid a race with other allocating threads. */
-	extent_deregister(tsdn, shard, edata);
+	extent_deregister(tsdn, pac, edata);
 
 	edata_addr_set(edata, edata_base_get(edata));
 
@@ -1097,7 +1095,7 @@ extent_destroy_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	ehooks_destroy(tsdn, ehooks, edata_base_get(edata),
 	    edata_size_get(edata), edata_committed_get(edata));
 
-	edata_cache_put(tsdn, shard->pac.edata_cache, edata);
+	edata_cache_put(tsdn, pac->edata_cache, edata);
 }
 
 static bool
@@ -1171,7 +1169,7 @@ extent_purge_forced_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
  * and returns the trail (except in case of error).
  */
 static edata_t *
-extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+extent_split_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata, size_t size_a, size_t size_b, bool growing_retained) {
 	assert(edata_size_get(edata) == size_a + size_b);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -1181,7 +1179,7 @@ extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 		return NULL;
 	}
 
-	edata_t *trail = edata_cache_get(tsdn, shard->pac.edata_cache);
+	edata_t *trail = edata_cache_get(tsdn, pac->edata_cache);
 	if (trail == NULL) {
 		goto label_error_a;
 	}
@@ -1193,13 +1191,13 @@ extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	    edata_committed_get(edata), edata_ranged_get(edata),
 	    EXTENT_NOT_HEAD);
 	emap_prepare_t prepare;
-	bool err = emap_split_prepare(tsdn, shard->pac.emap, &prepare, edata,
+	bool err = emap_split_prepare(tsdn, pac->emap, &prepare, edata,
 	    size_a, trail, size_b);
 	if (err) {
 		goto label_error_b;
 	}
 
-	emap_lock_edata2(tsdn, shard->pac.emap, edata, trail);
+	emap_lock_edata2(tsdn, pac->emap, edata, trail);
 
 	err = ehooks_split(tsdn, ehooks, edata_base_get(edata), size_a + size_b,
 	    size_a, size_b, edata_committed_get(edata));
@@ -1209,29 +1207,29 @@ extent_split_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
 	}
 
 	edata_size_set(edata, size_a);
-	emap_split_commit(tsdn, shard->pac.emap, &prepare, edata, size_a, trail,
+	emap_split_commit(tsdn, pac->emap, &prepare, edata, size_a, trail,
 	    size_b);
 
-	emap_unlock_edata2(tsdn, shard->pac.emap, edata, trail);
+	emap_unlock_edata2(tsdn, pac->emap, edata, trail);
 
 	return trail;
 label_error_c:
-	emap_unlock_edata2(tsdn, shard->pac.emap, edata, trail);
+	emap_unlock_edata2(tsdn, pac->emap, edata, trail);
 label_error_b:
-	edata_cache_put(tsdn, shard->pac.edata_cache, trail);
+	edata_cache_put(tsdn, pac->edata_cache, trail);
 label_error_a:
 	return NULL;
 }
 
 edata_t *
-extent_split_wrapper(tsdn_t *tsdn, pa_shard_t *shard,
-    ehooks_t *ehooks, edata_t *edata, size_t size_a, size_t size_b) {
-	return extent_split_impl(tsdn, shard, ehooks, edata, size_a, size_b,
+extent_split_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata,
+    size_t size_a, size_t size_b) {
+	return extent_split_impl(tsdn, pac, ehooks, edata, size_a, size_b,
 	    /* growing_retained */ false);
 }
 
 static bool
-extent_merge_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks, edata_t *a,
+extent_merge_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *a,
     edata_t *b, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
@@ -1254,27 +1252,27 @@ extent_merge_impl(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks, edata_t *a,
 	 * than extent_{,de}register() to do things in the right order.
 	 */
 	emap_prepare_t prepare;
-	emap_merge_prepare(tsdn, shard->pac.emap, &prepare, a, b);
+	emap_merge_prepare(tsdn, pac->emap, &prepare, a, b);
 
-	emap_lock_edata2(tsdn, shard->pac.emap, a, b);
+	emap_lock_edata2(tsdn, pac->emap, a, b);
 
 	edata_size_set(a, edata_size_get(a) + edata_size_get(b));
 	edata_sn_set(a, (edata_sn_get(a) < edata_sn_get(b)) ?
 	    edata_sn_get(a) : edata_sn_get(b));
 	edata_zeroed_set(a, edata_zeroed_get(a) && edata_zeroed_get(b));
 
-	emap_merge_commit(tsdn, shard->pac.emap, &prepare, a, b);
-	emap_unlock_edata2(tsdn, shard->pac.emap, a, b);
+	emap_merge_commit(tsdn, pac->emap, &prepare, a, b);
+	emap_unlock_edata2(tsdn, pac->emap, a, b);
 
-	edata_cache_put(tsdn, shard->pac.edata_cache, b);
+	edata_cache_put(tsdn, pac->edata_cache, b);
 
 	return false;
 }
 
 bool
-extent_merge_wrapper(tsdn_t *tsdn, pa_shard_t *shard, ehooks_t *ehooks,
+extent_merge_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *a, edata_t *b) {
-	return extent_merge_impl(tsdn, shard, ehooks, a, b, false);
+	return extent_merge_impl(tsdn, pac, ehooks, a, b, false);
 }
 
 bool
diff --git a/src/extent_dss.c b/src/extent_dss.c
index dff231d9..7427cd8f 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -189,7 +189,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					ehooks_t *ehooks = arena_get_ehooks(
 					    arena);
 					extent_dalloc_gap(tsdn,
-					    &arena->pa_shard, ehooks, gap);
+					    &arena->pa_shard.pac, ehooks, gap);
 				} else {
 					edata_cache_put(tsdn,
 					    &arena->pa_shard.edata_cache, gap);
diff --git a/src/pa.c b/src/pa.c
index 2f970c76..e8c88a05 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -72,9 +72,9 @@ pa_shard_destroy_retained(tsdn_t *tsdn, pa_shard_t *shard) {
 	 */
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
 	edata_t *edata;
-	while ((edata = ecache_evict(tsdn, shard, ehooks,
+	while ((edata = ecache_evict(tsdn, &shard->pac, ehooks,
 	    &shard->pac.ecache_retained, 0)) != NULL) {
-		extent_destroy_wrapper(tsdn, shard, ehooks, edata);
+		extent_destroy_wrapper(tsdn, &shard->pac, ehooks, edata);
 	}
 }
 
@@ -90,15 +90,15 @@ ecache_pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
 	    (pa_shard_t *)((uintptr_t)self - offsetof(pa_shard_t, ecache_pai));
 
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
-	edata_t *edata = ecache_alloc(tsdn, shard, ehooks,
+	edata_t *edata = ecache_alloc(tsdn, &shard->pac, ehooks,
 	    &shard->pac.ecache_dirty, NULL, size, alignment, zero);
 
 	if (edata == NULL && pa_shard_may_have_muzzy(shard)) {
-		edata = ecache_alloc(tsdn, shard, ehooks,
+		edata = ecache_alloc(tsdn, &shard->pac, ehooks,
 		    &shard->pac.ecache_muzzy, NULL, size, alignment, zero);
 	}
 	if (edata == NULL) {
-		edata = ecache_alloc_grow(tsdn, shard, ehooks,
+		edata = ecache_alloc_grow(tsdn, &shard->pac, ehooks,
 		    &shard->pac.ecache_retained, NULL, size, alignment, zero);
 		if (config_stats && edata != NULL) {
 			atomic_fetch_add_zu(&shard->pac.stats->pac_mapped, size,
@@ -144,15 +144,15 @@ ecache_pai_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 	if (ehooks_merge_will_fail(ehooks)) {
 		return true;
 	}
-	edata_t *trail = ecache_alloc(tsdn, shard, ehooks,
+	edata_t *trail = ecache_alloc(tsdn, &shard->pac, ehooks,
 	    &shard->pac.ecache_dirty, trail_begin, expand_amount, PAGE, zero);
 	if (trail == NULL) {
-		trail = ecache_alloc(tsdn, shard, ehooks,
+		trail = ecache_alloc(tsdn, &shard->pac, ehooks,
 		    &shard->pac.ecache_muzzy, trail_begin, expand_amount, PAGE,
 		    zero);
 	}
 	if (trail == NULL) {
-		trail = ecache_alloc_grow(tsdn, shard, ehooks,
+		trail = ecache_alloc_grow(tsdn, &shard->pac, ehooks,
 		    &shard->pac.ecache_retained, trail_begin, expand_amount,
 		    PAGE, zero);
 		mapped_add = expand_amount;
@@ -160,8 +160,8 @@ ecache_pai_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 	if (trail == NULL) {
 		return true;
 	}
-	if (extent_merge_wrapper(tsdn, shard, ehooks, edata, trail)) {
-		extent_dalloc_wrapper(tsdn, shard, ehooks, trail);
+	if (extent_merge_wrapper(tsdn, &shard->pac, ehooks, edata, trail)) {
+		extent_dalloc_wrapper(tsdn, &shard->pac, ehooks, trail);
 		return true;
 	}
 	if (config_stats && mapped_add > 0) {
@@ -206,12 +206,13 @@ ecache_pai_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 		return true;
 	}
 
-	edata_t *trail = extent_split_wrapper(tsdn, shard, ehooks, edata,
+	edata_t *trail = extent_split_wrapper(tsdn, &shard->pac, ehooks, edata,
 	    new_size, shrink_amount);
 	if (trail == NULL) {
 		return true;
 	}
-	ecache_dalloc(tsdn, shard, ehooks, &shard->pac.ecache_dirty, trail);
+	ecache_dalloc(tsdn, &shard->pac, ehooks, &shard->pac.ecache_dirty,
+	    trail);
 	return false;
 }
 
@@ -242,7 +243,8 @@ ecache_pai_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	pa_shard_t *shard =
 	    (pa_shard_t *)((uintptr_t)self - offsetof(pa_shard_t, ecache_pai));
 	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
-	ecache_dalloc(tsdn, shard, ehooks, &shard->pac.ecache_dirty, edata);
+	ecache_dalloc(tsdn, &shard->pac, ehooks, &shard->pac.ecache_dirty,
+	    edata);
 }
 
 void
@@ -270,7 +272,7 @@ pa_stash_decayed(tsdn_t *tsdn, pa_shard_t *shard, ecache_t *ecache,
 	/* Stash extents according to npages_limit. */
 	size_t nstashed = 0;
 	while (nstashed < npages_decay_max) {
-		edata_t *edata = ecache_evict(tsdn, shard, ehooks, ecache,
+		edata_t *edata = ecache_evict(tsdn, &shard->pac, ehooks, ecache,
 		    npages_limit);
 		if (edata == NULL) {
 			break;
@@ -313,14 +315,14 @@ pa_decay_stashed(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 				err = extent_purge_lazy_wrapper(tsdn, ehooks,
 				    edata, /* offset */ 0, size);
 				if (!err) {
-					ecache_dalloc(tsdn, shard, ehooks,
+					ecache_dalloc(tsdn, &shard->pac, ehooks,
 					    &shard->pac.ecache_muzzy, edata);
 					break;
 				}
 			}
 			JEMALLOC_FALLTHROUGH;
 		case extent_state_muzzy:
-			extent_dalloc_wrapper(tsdn, shard, ehooks, edata);
+			extent_dalloc_wrapper(tsdn, &shard->pac, ehooks, edata);
 			nunmapped += npages;
 			break;
 		case extent_state_retained:

From 4ee75be3a3d549619930cf07b5bc8a3809eab008 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 2 Jun 2020 12:45:39 -0700
Subject: [PATCH 1815/2608] PA -> PAC: Move in decay_purge enum.

---
 include/jemalloc/internal/pa.h  |  9 +--------
 include/jemalloc/internal/pac.h |  8 ++++++++
 src/arena.c                     | 12 ++++++------
 src/pa.c                        |  6 +++---
 src/pac.c                       |  1 -
 5 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 9482380a..e5a46f95 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -18,13 +18,6 @@
  * others will be coming soon.
  */
 
-enum pa_decay_purge_setting_e {
-	PA_DECAY_PURGE_ALWAYS,
-	PA_DECAY_PURGE_NEVER,
-	PA_DECAY_PURGE_ON_EPOCH_ADVANCE
-};
-typedef enum pa_decay_purge_setting_e pa_decay_purge_setting_t;
-
 /*
  * The stats for a particular pa_shard.  Because of the way the ctl module
  * handles stats epoch data collection (it has its own arena_stats, and merges
@@ -163,7 +156,7 @@ void pa_decay_all(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
  */
 bool pa_maybe_decay_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
     pac_decay_stats_t *decay_stats, ecache_t *ecache,
-    pa_decay_purge_setting_t decay_purge_setting);
+    pac_decay_purge_setting_t decay_purge_setting);
 
 /*
  * Gets / sets the maximum amount that we'll grow an arena down the
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index d1d68534..aa4a76af 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -9,6 +9,14 @@
  * - Can use efficient OS-level zeroing primitives for demand-filled pages.
  */
 
+/* How "eager" decay/purging should be. */
+enum pac_decay_purge_setting_e {
+	PAC_DECAY_PURGE_ALWAYS,
+	PAC_DECAY_PURGE_NEVER,
+	PAC_DECAY_PURGE_ON_EPOCH_ADVANCE
+};
+typedef enum pac_decay_purge_setting_e pac_decay_purge_setting_t;
+
 typedef struct pac_decay_stats_s pac_decay_stats_t;
 struct pac_decay_stats_s {
 	/* Total number of purge sweeps. */
diff --git a/src/arena.c b/src/arena.c
index 619060f4..95dea18f 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -410,14 +410,14 @@ arena_muzzy_decay_ms_get(arena_t *arena) {
  * specifically requested it), should we purge ourselves, or wait for the
  * background thread to get to it.
  */
-static pa_decay_purge_setting_t
+static pac_decay_purge_setting_t
 arena_decide_unforced_decay_purge_setting(bool is_background_thread) {
 	if (is_background_thread) {
-		return PA_DECAY_PURGE_ALWAYS;
+		return PAC_DECAY_PURGE_ALWAYS;
 	} else if (!is_background_thread && background_thread_enabled()) {
-		return PA_DECAY_PURGE_NEVER;
+		return PAC_DECAY_PURGE_NEVER;
 	} else {
-		return PA_DECAY_PURGE_ON_EPOCH_ADVANCE;
+		return PAC_DECAY_PURGE_ON_EPOCH_ADVANCE;
 	}
 }
 
@@ -440,7 +440,7 @@ arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	nstime_t cur_time;
 	nstime_init_update(&cur_time);
 	decay_reinit(decay, &cur_time, decay_ms);
-	pa_decay_purge_setting_t decay_purge =
+	pac_decay_purge_setting_t decay_purge =
 	    arena_decide_unforced_decay_purge_setting(
 		/* is_background_thread */ false);
 	pa_maybe_decay_purge(tsdn, &arena->pa_shard, decay, decay_stats, ecache,
@@ -497,7 +497,7 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 		/* No need to wait if another thread is in progress. */
 		return true;
 	}
-	pa_decay_purge_setting_t decay_purge =
+	pac_decay_purge_setting_t decay_purge =
 	    arena_decide_unforced_decay_purge_setting(is_background_thread);
 	bool epoch_advanced = pa_maybe_decay_purge(tsdn, &arena->pa_shard,
 	    decay, decay_stats, ecache, decay_purge);
diff --git a/src/pa.c b/src/pa.c
index e8c88a05..66a9fbc7 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -403,7 +403,7 @@ pa_decay_try_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 bool
 pa_maybe_decay_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
     pac_decay_stats_t *decay_stats, ecache_t *ecache,
-    pa_decay_purge_setting_t decay_purge_setting) {
+    pac_decay_purge_setting_t decay_purge_setting) {
 	malloc_mutex_assert_owner(tsdn, &decay->mtx);
 
 	/* Purge all or nothing if the option is disabled. */
@@ -429,9 +429,9 @@ pa_maybe_decay_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 	size_t npages_current = ecache_npages_get(ecache);
 	bool epoch_advanced = decay_maybe_advance_epoch(decay, &time,
 	    npages_current);
-	if (decay_purge_setting == PA_DECAY_PURGE_ALWAYS
+	if (decay_purge_setting == PAC_DECAY_PURGE_ALWAYS
 	    || (epoch_advanced && decay_purge_setting
-	    == PA_DECAY_PURGE_ON_EPOCH_ADVANCE)) {
+	    == PAC_DECAY_PURGE_ON_EPOCH_ADVANCE)) {
 		size_t npages_limit = decay_npages_limit_get(decay);
 		pa_decay_try_purge(tsdn, shard, decay, decay_stats, ecache,
 		    npages_current, npages_limit);
diff --git a/src/pac.c b/src/pac.c
index 8ff6f1c8..e2e6b58f 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -76,4 +76,3 @@ pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
 
 	return false;
 }
-

From 6a2774719fe6b4cdae35c4a087afc2ef7f8c9110 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 3 Jun 2020 14:43:28 -0700
Subject: [PATCH 1816/2608] PA->PAC: Move in decay functions.

---
 include/jemalloc/internal/pa.h  |  33 ------
 include/jemalloc/internal/pac.h |  36 +++++-
 src/arena.c                     |  14 +--
 src/pa.c                        |   2 +-
 src/pac.c                       | 187 +++++++++++++++++++++++++++++++-
 test/unit/pa.c                  |   2 +-
 6 files changed, 230 insertions(+), 44 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index e5a46f95..a2fa0ba4 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -137,39 +137,6 @@ bool pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 void pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
     bool *generated_dirty);
 
-/*
- * All purging functions require holding decay->mtx.  This is one of the few
- * places external modules are allowed to peek inside pa_shard_t internals.
- */
-
-/*
- * Decays the number of pages currently in the ecache.  This might not leave the
- * ecache empty if other threads are inserting dirty objects into it
- * concurrently with the call.
- */
-void pa_decay_all(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
-    pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay);
-/*
- * Updates decay settings for the current time, and conditionally purges in
- * response (depending on decay_purge_setting).  Returns whether or not the
- * epoch advanced.
- */
-bool pa_maybe_decay_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
-    pac_decay_stats_t *decay_stats, ecache_t *ecache,
-    pac_decay_purge_setting_t decay_purge_setting);
-
-/*
- * Gets / sets the maximum amount that we'll grow an arena down the
- * grow-retained pathways (unless forced to by an allocaction request).
- *
- * Set new_limit to NULL if it's just a query, or old_limit to NULL if you don't
- * care about the previous value.
- *
- * Returns true on error (if the new limit is not valid).
- */
-bool pa_shard_retain_grow_limit_get_set(tsdn_t *tsdn, pa_shard_t *shard,
-    size_t *old_limit, size_t *new_limit);
-
 /******************************************************************************/
 /*
  * Various bits of "boring" functionality that are still part of this module,
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index aa4a76af..6c3721fb 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -81,6 +81,7 @@ struct pac_s {
 	ecache_t ecache_muzzy;
 	ecache_t ecache_retained;
 
+	base_t *base;
 	emap_t *emap;
 	edata_cache_t *edata_cache;
 
@@ -103,7 +104,7 @@ struct pac_s {
 	atomic_zu_t extent_sn_next;
 };
 
-bool pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind, emap_t *emap,
+bool pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
     edata_cache_t *edata_cache, nstime_t *cur_time, ssize_t dirty_decay_ms,
     ssize_t muzzy_decay_ms, pac_stats_t *pac_stats, malloc_mutex_t *stats_mtx);
 bool pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
@@ -126,4 +127,37 @@ pac_mapped(pac_t *pac) {
 	return atomic_load_zu(&pac->stats->pac_mapped, ATOMIC_RELAXED);
 }
 
+/*
+ * All purging functions require holding decay->mtx.  This is one of the few
+ * places external modules are allowed to peek inside pa_shard_t internals.
+ */
+
+/*
+ * Decays the number of pages currently in the ecache.  This might not leave the
+ * ecache empty if other threads are inserting dirty objects into it
+ * concurrently with the call.
+ */
+void pac_decay_all(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
+    pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay);
+/*
+ * Updates decay settings for the current time, and conditionally purges in
+ * response (depending on decay_purge_setting).  Returns whether or not the
+ * epoch advanced.
+ */
+bool pac_maybe_decay_purge(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
+    pac_decay_stats_t *decay_stats, ecache_t *ecache,
+    pac_decay_purge_setting_t decay_purge_setting);
+
+/*
+ * Gets / sets the maximum amount that we'll grow an arena down the
+ * grow-retained pathways (unless forced to by an allocaction request).
+ *
+ * Set new_limit to NULL if it's just a query, or old_limit to NULL if you don't
+ * care about the previous value.
+ *
+ * Returns true on error (if the new limit is not valid).
+ */
+bool pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
+    size_t *new_limit);
+
 #endif /* JEMALLOC_INTERNAL_PAC_H */
diff --git a/src/arena.c b/src/arena.c
index 95dea18f..8263d8ee 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -443,8 +443,8 @@ arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	pac_decay_purge_setting_t decay_purge =
 	    arena_decide_unforced_decay_purge_setting(
 		/* is_background_thread */ false);
-	pa_maybe_decay_purge(tsdn, &arena->pa_shard, decay, decay_stats, ecache,
-	    decay_purge);
+	pac_maybe_decay_purge(tsdn, &arena->pa_shard.pac, decay, decay_stats,
+	    ecache, decay_purge);
 	malloc_mutex_unlock(tsdn, &decay->mtx);
 
 	return false;
@@ -472,8 +472,8 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
     bool is_background_thread, bool all) {
 	if (all) {
 		malloc_mutex_lock(tsdn, &decay->mtx);
-		pa_decay_all(tsdn, &arena->pa_shard, decay, decay_stats, ecache,
-		    /* fully_decay */ all);
+		pac_decay_all(tsdn, &arena->pa_shard.pac, decay, decay_stats,
+		    ecache, /* fully_decay */ all);
 		malloc_mutex_unlock(tsdn, &decay->mtx);
 		/*
 		 * The previous pa_decay_all call may not have actually decayed
@@ -499,7 +499,7 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	}
 	pac_decay_purge_setting_t decay_purge =
 	    arena_decide_unforced_decay_purge_setting(is_background_thread);
-	bool epoch_advanced = pa_maybe_decay_purge(tsdn, &arena->pa_shard,
+	bool epoch_advanced = pac_maybe_decay_purge(tsdn, &arena->pa_shard.pac,
 	    decay, decay_stats, ecache, decay_purge);
 	size_t npages_new;
 	if (epoch_advanced) {
@@ -1401,8 +1401,8 @@ bool
 arena_retain_grow_limit_get_set(tsd_t *tsd, arena_t *arena, size_t *old_limit,
     size_t *new_limit) {
 	assert(opt_retain);
-	return pa_shard_retain_grow_limit_get_set(tsd_tsdn(tsd),
-	    &arena->pa_shard, old_limit, new_limit);
+	return pac_retain_grow_limit_get_set(tsd_tsdn(tsd),
+	    &arena->pa_shard.pac, old_limit, new_limit);
 }
 
 unsigned
diff --git a/src/pa.c b/src/pa.c
index 66a9fbc7..43dc318f 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -29,7 +29,7 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 	if (edata_cache_init(&shard->edata_cache, base)) {
 		return true;
 	}
-	if (pac_init(tsdn, &shard->pac, ind, emap, &shard->edata_cache,
+	if (pac_init(tsdn, &shard->pac, base, emap, &shard->edata_cache,
 	    cur_time, dirty_decay_ms, muzzy_decay_ms, &stats->pac_stats,
 	    stats_mtx)) {
 		return true;
diff --git a/src/pac.c b/src/pac.c
index e2e6b58f..5ed1151d 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -3,10 +3,16 @@
 
 #include "jemalloc/internal/pac.h"
 
+static ehooks_t *
+pac_ehooks_get(pac_t *pac) {
+	return base_ehooks_get(pac->base);
+}
+
 bool
-pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind, emap_t *emap,
+pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
     edata_cache_t *edata_cache, nstime_t *cur_time, ssize_t dirty_decay_ms,
     ssize_t muzzy_decay_ms, pac_stats_t *pac_stats, malloc_mutex_t *stats_mtx) {
+	unsigned ind = base_ind_get(base);
 	/*
 	 * Delay coalescing for dirty extents despite the disruptive effect on
 	 * memory layout for best-fit extent allocation, since cached extents
@@ -45,6 +51,7 @@ pac_init(tsdn_t *tsdn, pac_t *pac, unsigned ind, emap_t *emap,
 		return true;
 	}
 
+	pac->base = base;
 	pac->emap = emap;
 	pac->edata_cache = edata_cache;
 	pac->stats = pac_stats;
@@ -76,3 +83,181 @@ pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
 
 	return false;
 }
+
+static size_t
+pac_stash_decayed(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
+    size_t npages_limit, size_t npages_decay_max, edata_list_t *result) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+	ehooks_t *ehooks = pac_ehooks_get(pac);
+
+	/* Stash extents according to npages_limit. */
+	size_t nstashed = 0;
+	while (nstashed < npages_decay_max) {
+		edata_t *edata = ecache_evict(tsdn, pac, ehooks, ecache,
+		    npages_limit);
+		if (edata == NULL) {
+			break;
+		}
+		edata_list_append(result, edata);
+		nstashed += edata_size_get(edata) >> LG_PAGE;
+	}
+	return nstashed;
+}
+
+static size_t
+pac_decay_stashed(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
+    pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
+    edata_list_t *decay_extents) {
+	bool err;
+
+	size_t nmadvise = 0;
+	size_t nunmapped = 0;
+	size_t npurged = 0;
+
+	ehooks_t *ehooks = pac_ehooks_get(pac);
+
+	bool try_muzzy = !fully_decay && pac_muzzy_decay_ms_get(pac) != 0;
+
+	for (edata_t *edata = edata_list_first(decay_extents); edata !=
+	    NULL; edata = edata_list_first(decay_extents)) {
+		edata_list_remove(decay_extents, edata);
+
+		size_t size = edata_size_get(edata);
+		size_t npages = size >> LG_PAGE;
+
+		nmadvise++;
+		npurged += npages;
+
+		switch (ecache->state) {
+		case extent_state_active:
+			not_reached();
+		case extent_state_dirty:
+			if (try_muzzy) {
+				err = extent_purge_lazy_wrapper(tsdn, ehooks,
+				    edata, /* offset */ 0, size);
+				if (!err) {
+					ecache_dalloc(tsdn, pac, ehooks,
+					    &pac->ecache_muzzy, edata);
+					break;
+				}
+			}
+			JEMALLOC_FALLTHROUGH;
+		case extent_state_muzzy:
+			extent_dalloc_wrapper(tsdn, pac, ehooks, edata);
+			nunmapped += npages;
+			break;
+		case extent_state_retained:
+		default:
+			not_reached();
+		}
+	}
+
+	if (config_stats) {
+		LOCKEDINT_MTX_LOCK(tsdn, *pac->stats_mtx);
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(*pac->stats_mtx),
+		    &decay_stats->npurge, 1);
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(*pac->stats_mtx),
+		    &decay_stats->nmadvise, nmadvise);
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(*pac->stats_mtx),
+		    &decay_stats->purged, npurged);
+		LOCKEDINT_MTX_UNLOCK(tsdn, *pac->stats_mtx);
+		atomic_fetch_sub_zu(&pac->stats->pac_mapped,
+		    nunmapped << LG_PAGE, ATOMIC_RELAXED);
+	}
+
+	return npurged;
+}
+
+/*
+ * npages_limit: Decay at most npages_decay_max pages without violating the
+ * invariant: (ecache_npages_get(ecache) >= npages_limit).  We need an upper
+ * bound on number of pages in order to prevent unbounded growth (namely in
+ * stashed), otherwise unbounded new pages could be added to extents during the
+ * current decay run, so that the purging thread never finishes.
+ */
+static void
+pac_decay_to_limit(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
+    pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
+    size_t npages_limit, size_t npages_decay_max) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 1);
+
+	if (decay->purging || npages_decay_max == 0) {
+		return;
+	}
+	decay->purging = true;
+	malloc_mutex_unlock(tsdn, &decay->mtx);
+
+	edata_list_t decay_extents;
+	edata_list_init(&decay_extents);
+	size_t npurge = pac_stash_decayed(tsdn, pac, ecache, npages_limit,
+	    npages_decay_max, &decay_extents);
+	if (npurge != 0) {
+		size_t npurged = pac_decay_stashed(tsdn, pac, decay,
+		    decay_stats, ecache, fully_decay, &decay_extents);
+		assert(npurged == npurge);
+	}
+
+	malloc_mutex_lock(tsdn, &decay->mtx);
+	decay->purging = false;
+}
+
+void
+pac_decay_all(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
+    pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay) {
+	malloc_mutex_assert_owner(tsdn, &decay->mtx);
+	pac_decay_to_limit(tsdn, pac, decay, decay_stats, ecache, fully_decay,
+	    /* npages_limit */ 0, ecache_npages_get(ecache));
+}
+
+static void
+pac_decay_try_purge(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
+    pac_decay_stats_t *decay_stats, ecache_t *ecache,
+    size_t current_npages, size_t npages_limit) {
+	if (current_npages > npages_limit) {
+		pac_decay_to_limit(tsdn, pac, decay, decay_stats, ecache,
+		    /* fully_decay */ false, npages_limit,
+		    current_npages - npages_limit);
+	}
+}
+
+bool
+pac_maybe_decay_purge(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
+    pac_decay_stats_t *decay_stats, ecache_t *ecache,
+    pac_decay_purge_setting_t decay_purge_setting) {
+	malloc_mutex_assert_owner(tsdn, &decay->mtx);
+
+	/* Purge all or nothing if the option is disabled. */
+	ssize_t decay_ms = decay_ms_read(decay);
+	if (decay_ms <= 0) {
+		if (decay_ms == 0) {
+			pac_decay_to_limit(tsdn, pac, decay, decay_stats,
+			    ecache, /* fully_decay */ false,
+			    /* npages_limit */ 0, ecache_npages_get(ecache));
+		}
+		return false;
+	}
+
+	/*
+	 * If the deadline has been reached, advance to the current epoch and
+	 * purge to the new limit if necessary.  Note that dirty pages created
+	 * during the current epoch are not subject to purge until a future
+	 * epoch, so as a result purging only happens during epoch advances, or
+	 * being triggered by background threads (scheduled event).
+	 */
+	nstime_t time;
+	nstime_init_update(&time);
+	size_t npages_current = ecache_npages_get(ecache);
+	bool epoch_advanced = decay_maybe_advance_epoch(decay, &time,
+	    npages_current);
+	if (decay_purge_setting == PAC_DECAY_PURGE_ALWAYS
+	    || (epoch_advanced && decay_purge_setting
+	    == PAC_DECAY_PURGE_ON_EPOCH_ADVANCE)) {
+		size_t npages_limit = decay_npages_limit_get(decay);
+		pac_decay_try_purge(tsdn, pac, decay, decay_stats, ecache,
+		    npages_current, npages_limit);
+	}
+
+	return epoch_advanced;
+}
diff --git a/test/unit/pa.c b/test/unit/pa.c
index 17889b53..63cd976f 100644
--- a/test/unit/pa.c
+++ b/test/unit/pa.c
@@ -88,7 +88,7 @@ do_alloc_free_purge(void *arg) {
 		    &generated_dirty);
 		malloc_mutex_lock(TSDN_NULL,
 		    &test_data->shard.pac.decay_dirty.mtx);
-		pa_decay_all(TSDN_NULL, &test_data->shard,
+		pac_decay_all(TSDN_NULL, &test_data->shard.pac,
 		    &test_data->shard.pac.decay_dirty,
 		    &test_data->shard.pac.stats->decay_dirty,
 		    &test_data->shard.pac.ecache_dirty, true);

From 471eb5913cfdef1d102219ddab683066e3462f43 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 3 Jun 2020 18:30:33 -0700
Subject: [PATCH 1817/2608] PAC: Move in decay rate setting.

---
 include/jemalloc/internal/arena_externs.h |  7 +-
 include/jemalloc/internal/pa.h            |  6 +-
 include/jemalloc/internal/pac.h           | 25 +++----
 src/arena.c                               | 86 +++++++----------------
 src/ctl.c                                 | 10 +--
 src/extent.c                              |  4 +-
 src/pa.c                                  | 66 ++++-------------
 src/pac.c                                 | 70 ++++++++++++++++--
 test/unit/pa.c                            |  1 -
 9 files changed, 126 insertions(+), 149 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index e6e9a0b9..674c98f5 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -40,10 +40,9 @@ void arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena,
     edata_t *edata, size_t oldsize);
 void arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena,
     edata_t *edata, size_t oldsize);
-ssize_t arena_dirty_decay_ms_get(arena_t *arena);
-bool arena_dirty_decay_ms_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_ms);
-ssize_t arena_muzzy_decay_ms_get(arena_t *arena);
-bool arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena, ssize_t decay_ms);
+bool arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, extent_state_t state,
+    ssize_t decay_ms);
+ssize_t arena_decay_ms_get(arena_t *arena, extent_state_t state);
 void arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
     bool all);
 void arena_reset(tsd_t *tsd, arena_t *arena);
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index a2fa0ba4..4bdd8ac1 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -90,7 +90,7 @@ struct pa_shard_s {
 static inline bool
 pa_shard_dont_decay_muzzy(pa_shard_t *shard) {
 	return ecache_npages_get(&shard->pac.ecache_muzzy) == 0 &&
-	    pac_muzzy_decay_ms_get(&shard->pac) <= 0;
+	    pac_decay_ms_get(&shard->pac, extent_state_muzzy) <= 0;
 }
 
 static inline ehooks_t *
@@ -137,6 +137,10 @@ bool pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 void pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
     bool *generated_dirty);
 
+bool pa_decay_ms_set(tsdn_t *tsdn, pa_shard_t *shard, extent_state_t state,
+    ssize_t decay_ms, pac_purge_eagerness_t eagerness);
+ssize_t pa_decay_ms_get(pa_shard_t *shard, extent_state_t state);
+
 /******************************************************************************/
 /*
  * Various bits of "boring" functionality that are still part of this module,
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index 6c3721fb..de01c519 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -10,12 +10,12 @@
  */
 
 /* How "eager" decay/purging should be. */
-enum pac_decay_purge_setting_e {
-	PAC_DECAY_PURGE_ALWAYS,
-	PAC_DECAY_PURGE_NEVER,
-	PAC_DECAY_PURGE_ON_EPOCH_ADVANCE
+enum pac_purge_eagerness_e {
+	PAC_PURGE_ALWAYS,
+	PAC_PURGE_NEVER,
+	PAC_PURGE_ON_EPOCH_ADVANCE
 };
-typedef enum pac_decay_purge_setting_e pac_decay_purge_setting_t;
+typedef enum pac_purge_eagerness_e pac_purge_eagerness_t;
 
 typedef struct pac_decay_stats_s pac_decay_stats_t;
 struct pac_decay_stats_s {
@@ -112,16 +112,6 @@ bool pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
 void pac_stats_merge(tsdn_t *tsdn, pac_t *pac, pac_stats_t *pac_stats_out,
     pac_estats_t *estats_out, size_t *resident);
 
-static inline ssize_t
-pac_dirty_decay_ms_get(pac_t *pac) {
-	return decay_ms_read(&pac->decay_dirty);
-}
-
-static inline ssize_t
-pac_muzzy_decay_ms_get(pac_t *pac) {
-	return decay_ms_read(&pac->decay_muzzy);
-}
-
 static inline size_t
 pac_mapped(pac_t *pac) {
 	return atomic_load_zu(&pac->stats->pac_mapped, ATOMIC_RELAXED);
@@ -146,7 +136,7 @@ void pac_decay_all(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
  */
 bool pac_maybe_decay_purge(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
     pac_decay_stats_t *decay_stats, ecache_t *ecache,
-    pac_decay_purge_setting_t decay_purge_setting);
+    pac_purge_eagerness_t eagerness);
 
 /*
  * Gets / sets the maximum amount that we'll grow an arena down the
@@ -160,4 +150,7 @@ bool pac_maybe_decay_purge(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 bool pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
     size_t *new_limit);
 
+bool pac_decay_ms_set(tsdn_t *tsdn, pac_t *pac, extent_state_t state,
+    ssize_t decay_ms, pac_purge_eagerness_t eagerness);
+ssize_t pac_decay_ms_get(pac_t *pac, extent_state_t state);
 #endif /* JEMALLOC_INTERNAL_PAC_H */
diff --git a/src/arena.c b/src/arena.c
index 8263d8ee..72fa2281 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -70,8 +70,8 @@ arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy) {
 	*nthreads += arena_nthreads_get(arena, false);
 	*dss = dss_prec_names[arena_dss_prec_get(arena)];
-	*dirty_decay_ms = arena_dirty_decay_ms_get(arena);
-	*muzzy_decay_ms = arena_muzzy_decay_ms_get(arena);
+	*dirty_decay_ms = arena_decay_ms_get(arena, extent_state_dirty);
+	*muzzy_decay_ms = arena_decay_ms_get(arena, extent_state_muzzy);
 	pa_shard_basic_stats_merge(&arena->pa_shard, nactive, ndirty, nmuzzy);
 }
 
@@ -189,7 +189,7 @@ void arena_handle_new_dirty_pages(tsdn_t *tsdn, arena_t *arena) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	if (arena_dirty_decay_ms_get(arena) == 0) {
+	if (arena_decay_ms_get(arena, extent_state_dirty) == 0) {
 		arena_decay_dirty(tsdn, arena, false, true);
 	} else {
 		arena_background_thread_inactivity_check(tsdn, arena, false);
@@ -395,77 +395,37 @@ arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 	}
 }
 
-ssize_t
-arena_dirty_decay_ms_get(arena_t *arena) {
-	return pac_dirty_decay_ms_get(&arena->pa_shard.pac);
-}
-
-ssize_t
-arena_muzzy_decay_ms_get(arena_t *arena) {
-	return pac_muzzy_decay_ms_get(&arena->pa_shard.pac);
-}
-
 /*
  * In situations where we're not forcing a decay (i.e. because the user
  * specifically requested it), should we purge ourselves, or wait for the
  * background thread to get to it.
  */
-static pac_decay_purge_setting_t
-arena_decide_unforced_decay_purge_setting(bool is_background_thread) {
+static pac_purge_eagerness_t
+arena_decide_unforced_purge_eagerness(bool is_background_thread) {
 	if (is_background_thread) {
-		return PAC_DECAY_PURGE_ALWAYS;
+		return PAC_PURGE_ALWAYS;
 	} else if (!is_background_thread && background_thread_enabled()) {
-		return PAC_DECAY_PURGE_NEVER;
+		return PAC_PURGE_NEVER;
 	} else {
-		return PAC_DECAY_PURGE_ON_EPOCH_ADVANCE;
+		return PAC_PURGE_ON_EPOCH_ADVANCE;
 	}
 }
 
-static bool
-arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
-    pac_decay_stats_t *decay_stats, ecache_t *ecache, ssize_t decay_ms) {
-	if (!decay_ms_valid(decay_ms)) {
-		return true;
-	}
-
-	malloc_mutex_lock(tsdn, &decay->mtx);
-	/*
-	 * Restart decay backlog from scratch, which may cause many dirty pages
-	 * to be immediately purged.  It would conceptually be possible to map
-	 * the old backlog onto the new backlog, but there is no justification
-	 * for such complexity since decay_ms changes are intended to be
-	 * infrequent, either between the {-1, 0, >0} states, or a one-time
-	 * arbitrary change during initial arena configuration.
-	 */
-	nstime_t cur_time;
-	nstime_init_update(&cur_time);
-	decay_reinit(decay, &cur_time, decay_ms);
-	pac_decay_purge_setting_t decay_purge =
-	    arena_decide_unforced_decay_purge_setting(
-		/* is_background_thread */ false);
-	pac_maybe_decay_purge(tsdn, &arena->pa_shard.pac, decay, decay_stats,
-	    ecache, decay_purge);
-	malloc_mutex_unlock(tsdn, &decay->mtx);
-
-	return false;
-}
-
 bool
-arena_dirty_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
+arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, extent_state_t state,
     ssize_t decay_ms) {
-	return arena_decay_ms_set(tsdn, arena, &arena->pa_shard.pac.decay_dirty,
-	    &arena->pa_shard.pac.stats->decay_dirty,
-	    &arena->pa_shard.pac.ecache_dirty, decay_ms);
+	pac_purge_eagerness_t eagerness = arena_decide_unforced_purge_eagerness(
+	    /* is_background_thread */ false);
+	return pa_decay_ms_set(tsdn, &arena->pa_shard, state, decay_ms,
+	    eagerness);
 }
 
-bool
-arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena,
-    ssize_t decay_ms) {
-	return arena_decay_ms_set(tsdn, arena, &arena->pa_shard.pac.decay_muzzy,
-	    &arena->pa_shard.pac.stats->decay_muzzy,
-	    &arena->pa_shard.pac.ecache_muzzy, decay_ms);
+ssize_t
+arena_decay_ms_get(arena_t *arena, extent_state_t state) {
+	return pa_decay_ms_get(&arena->pa_shard, state);
 }
 
+
 static bool
 arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
     pac_decay_stats_t *decay_stats, ecache_t *ecache,
@@ -497,10 +457,10 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 		/* No need to wait if another thread is in progress. */
 		return true;
 	}
-	pac_decay_purge_setting_t decay_purge =
-	    arena_decide_unforced_decay_purge_setting(is_background_thread);
+	pac_purge_eagerness_t eagerness =
+	    arena_decide_unforced_purge_eagerness(is_background_thread);
 	bool epoch_advanced = pac_maybe_decay_purge(tsdn, &arena->pa_shard.pac,
-	    decay, decay_stats, ecache, decay_purge);
+	    decay, decay_stats, ecache, eagerness);
 	size_t npages_new;
 	if (epoch_advanced) {
 		/* Backlog is updated on epoch advance. */
@@ -1546,10 +1506,12 @@ arena_choose_huge(tsd_t *tsd) {
 		 * expected for huge allocations.
 		 */
 		if (arena_dirty_decay_ms_default_get() > 0) {
-			arena_dirty_decay_ms_set(tsd_tsdn(tsd), huge_arena, 0);
+			arena_decay_ms_set(tsd_tsdn(tsd), huge_arena,
+			    extent_state_dirty, 0);
 		}
 		if (arena_muzzy_decay_ms_default_get() > 0) {
-			arena_muzzy_decay_ms_set(tsd_tsdn(tsd), huge_arena, 0);
+			arena_decay_ms_set(tsd_tsdn(tsd), huge_arena,
+			    extent_state_muzzy, 0);
 		}
 	}
 
diff --git a/src/ctl.c b/src/ctl.c
index 8b4b764a..62a82a20 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2430,10 +2430,10 @@ arena_i_decay_ms_ctl_impl(tsd_t *tsd, const size_t *mib, size_t miblen,
 		ret = EFAULT;
 		goto label_return;
 	}
+	extent_state_t state = dirty ? extent_state_dirty : extent_state_muzzy;
 
 	if (oldp != NULL && oldlenp != NULL) {
-		size_t oldval = dirty ? arena_dirty_decay_ms_get(arena) :
-		    arena_muzzy_decay_ms_get(arena);
+		size_t oldval = arena_decay_ms_get(arena, state);
 		READ(oldval, ssize_t);
 	}
 	if (newp != NULL) {
@@ -2452,9 +2452,9 @@ arena_i_decay_ms_ctl_impl(tsd_t *tsd, const size_t *mib, size_t miblen,
 				goto label_return;
 			}
 		}
-		if (dirty ? arena_dirty_decay_ms_set(tsd_tsdn(tsd), arena,
-		    *(ssize_t *)newp) : arena_muzzy_decay_ms_set(tsd_tsdn(tsd),
-		    arena, *(ssize_t *)newp)) {
+
+		if (arena_decay_ms_set(tsd_tsdn(tsd), arena, state,
+		    *(ssize_t *)newp)) {
 			ret = EFAULT;
 			goto label_return;
 		}
diff --git a/src/extent.c b/src/extent.c
index 87d6a9a2..98db40ec 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -55,8 +55,8 @@ extent_sn_next(pac_t *pac) {
 
 static inline bool
 extent_may_force_decay(pac_t *pac) {
-	return !(pac_dirty_decay_ms_get(pac) == -1
-	    || pac_muzzy_decay_ms_get(pac) == -1);
+	return !(pac_decay_ms_get(pac, extent_state_dirty) == -1
+	    || pac_decay_ms_get(pac, extent_state_muzzy) == -1);
 }
 
 static bool
diff --git a/src/pa.c b/src/pa.c
index 43dc318f..444ea5be 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -78,9 +78,9 @@ pa_shard_destroy_retained(tsdn_t *tsdn, pa_shard_t *shard) {
 	}
 }
 
-static bool
+static inline bool
 pa_shard_may_have_muzzy(pa_shard_t *shard) {
-	return pac_muzzy_decay_ms_get(&shard->pac) != 0;
+	return pac_decay_ms_get(&shard->pac, extent_state_muzzy) != 0;
 }
 
 static edata_t *
@@ -389,60 +389,20 @@ pa_decay_all(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
 	    /* npages_limit */ 0, ecache_npages_get(ecache));
 }
 
-static void
-pa_decay_try_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
-    pac_decay_stats_t *decay_stats, ecache_t *ecache,
-    size_t current_npages, size_t npages_limit) {
-	if (current_npages > npages_limit) {
-		pa_decay_to_limit(tsdn, shard, decay, decay_stats, ecache,
-		    /* fully_decay */ false, npages_limit,
-		    current_npages - npages_limit);
-	}
-}
-
-bool
-pa_maybe_decay_purge(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
-    pac_decay_stats_t *decay_stats, ecache_t *ecache,
-    pac_decay_purge_setting_t decay_purge_setting) {
-	malloc_mutex_assert_owner(tsdn, &decay->mtx);
-
-	/* Purge all or nothing if the option is disabled. */
-	ssize_t decay_ms = decay_ms_read(decay);
-	if (decay_ms <= 0) {
-		if (decay_ms == 0) {
-			pa_decay_to_limit(tsdn, shard, decay, decay_stats,
-			    ecache, /* fully_decay */ false,
-			    /* npages_limit */ 0, ecache_npages_get(ecache));
-		}
-		return false;
-	}
-
-	/*
-	 * If the deadline has been reached, advance to the current epoch and
-	 * purge to the new limit if necessary.  Note that dirty pages created
-	 * during the current epoch are not subject to purge until a future
-	 * epoch, so as a result purging only happens during epoch advances, or
-	 * being triggered by background threads (scheduled event).
-	 */
-	nstime_t time;
-	nstime_init_update(&time);
-	size_t npages_current = ecache_npages_get(ecache);
-	bool epoch_advanced = decay_maybe_advance_epoch(decay, &time,
-	    npages_current);
-	if (decay_purge_setting == PAC_DECAY_PURGE_ALWAYS
-	    || (epoch_advanced && decay_purge_setting
-	    == PAC_DECAY_PURGE_ON_EPOCH_ADVANCE)) {
-		size_t npages_limit = decay_npages_limit_get(decay);
-		pa_decay_try_purge(tsdn, shard, decay, decay_stats, ecache,
-		    npages_current, npages_limit);
-	}
-
-	return epoch_advanced;
-}
-
 bool
 pa_shard_retain_grow_limit_get_set(tsdn_t *tsdn, pa_shard_t *shard,
     size_t *old_limit, size_t *new_limit) {
 	return pac_retain_grow_limit_get_set(tsdn, &shard->pac, old_limit,
 	    new_limit);
 }
+
+bool
+pa_decay_ms_set(tsdn_t *tsdn, pa_shard_t *shard, extent_state_t state,
+    ssize_t decay_ms, pac_purge_eagerness_t eagerness) {
+	return pac_decay_ms_set(tsdn, &shard->pac, state, decay_ms, eagerness);
+}
+
+ssize_t
+pa_decay_ms_get(pa_shard_t *shard, extent_state_t state) {
+	return pac_decay_ms_get(&shard->pac, state);
+}
diff --git a/src/pac.c b/src/pac.c
index 5ed1151d..bc9f7433 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -8,6 +8,27 @@ pac_ehooks_get(pac_t *pac) {
 	return base_ehooks_get(pac->base);
 }
 
+static inline void
+pac_decay_data_get(pac_t *pac, extent_state_t state,
+    decay_t **r_decay, pac_decay_stats_t **r_decay_stats, ecache_t **r_ecache) {
+	switch(state) {
+	case extent_state_dirty:
+		*r_decay = &pac->decay_dirty;
+		*r_decay_stats = &pac->stats->decay_dirty;
+		*r_ecache = &pac->ecache_dirty;
+		return;
+	case extent_state_muzzy:
+		*r_decay = &pac->decay_muzzy;
+		*r_decay_stats = &pac->stats->decay_muzzy;
+		*r_ecache = &pac->ecache_muzzy;
+		return;
+	default:
+		unreachable();
+	}
+}
+
+
+
 bool
 pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
     edata_cache_t *edata_cache, nstime_t *cur_time, ssize_t dirty_decay_ms,
@@ -117,7 +138,8 @@ pac_decay_stashed(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 
 	ehooks_t *ehooks = pac_ehooks_get(pac);
 
-	bool try_muzzy = !fully_decay && pac_muzzy_decay_ms_get(pac) != 0;
+	bool try_muzzy = !fully_decay
+	    && pac_decay_ms_get(pac, extent_state_muzzy) != 0;
 
 	for (edata_t *edata = edata_list_first(decay_extents); edata !=
 	    NULL; edata = edata_list_first(decay_extents)) {
@@ -225,7 +247,7 @@ pac_decay_try_purge(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 bool
 pac_maybe_decay_purge(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
     pac_decay_stats_t *decay_stats, ecache_t *ecache,
-    pac_decay_purge_setting_t decay_purge_setting) {
+    pac_purge_eagerness_t eagerness) {
 	malloc_mutex_assert_owner(tsdn, &decay->mtx);
 
 	/* Purge all or nothing if the option is disabled. */
@@ -251,9 +273,8 @@ pac_maybe_decay_purge(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 	size_t npages_current = ecache_npages_get(ecache);
 	bool epoch_advanced = decay_maybe_advance_epoch(decay, &time,
 	    npages_current);
-	if (decay_purge_setting == PAC_DECAY_PURGE_ALWAYS
-	    || (epoch_advanced && decay_purge_setting
-	    == PAC_DECAY_PURGE_ON_EPOCH_ADVANCE)) {
+	if (eagerness == PAC_PURGE_ALWAYS
+	    || (epoch_advanced && eagerness == PAC_PURGE_ON_EPOCH_ADVANCE)) {
 		size_t npages_limit = decay_npages_limit_get(decay);
 		pac_decay_try_purge(tsdn, pac, decay, decay_stats, ecache,
 		    npages_current, npages_limit);
@@ -261,3 +282,42 @@ pac_maybe_decay_purge(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 
 	return epoch_advanced;
 }
+
+bool
+pac_decay_ms_set(tsdn_t *tsdn, pac_t *pac, extent_state_t state,
+    ssize_t decay_ms, pac_purge_eagerness_t eagerness) {
+	decay_t *decay;
+	pac_decay_stats_t *decay_stats;
+	ecache_t *ecache;
+	pac_decay_data_get(pac, state, &decay, &decay_stats, &ecache);
+
+	if (!decay_ms_valid(decay_ms)) {
+		return true;
+	}
+
+	malloc_mutex_lock(tsdn, &decay->mtx);
+	/*
+	 * Restart decay backlog from scratch, which may cause many dirty pages
+	 * to be immediately purged.  It would conceptually be possible to map
+	 * the old backlog onto the new backlog, but there is no justification
+	 * for such complexity since decay_ms changes are intended to be
+	 * infrequent, either between the {-1, 0, >0} states, or a one-time
+	 * arbitrary change during initial arena configuration.
+	 */
+	nstime_t cur_time;
+	nstime_init_update(&cur_time);
+	decay_reinit(decay, &cur_time, decay_ms);
+	pac_maybe_decay_purge(tsdn, pac, decay, decay_stats, ecache, eagerness);
+	malloc_mutex_unlock(tsdn, &decay->mtx);
+
+	return false;
+}
+
+ssize_t
+pac_decay_ms_get(pac_t *pac, extent_state_t state) {
+	decay_t *decay;
+	pac_decay_stats_t *decay_stats;
+	ecache_t *ecache;
+	pac_decay_data_get(pac, state, &decay, &decay_stats, &ecache);
+	return decay_ms_read(decay);
+}
diff --git a/test/unit/pa.c b/test/unit/pa.c
index 63cd976f..3a910235 100644
--- a/test/unit/pa.c
+++ b/test/unit/pa.c
@@ -107,7 +107,6 @@ TEST_BEGIN(test_alloc_free_purge_thds) {
 	for (int i = 0; i < 4; i++) {
 		thd_join(thds[i], NULL);
 	}
-
 }
 TEST_END
 

From cbf096b05ee1b21ce4244f04870083c63798ad64 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 4 Jun 2020 12:44:50 -0700
Subject: [PATCH 1818/2608] Arena: remove redundant bg inactivity check.

---
 src/arena.c | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 72fa2281..2bf02de6 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -425,7 +425,6 @@ arena_decay_ms_get(arena_t *arena, extent_state_t state) {
 	return pa_decay_ms_get(&arena->pa_shard, state);
 }
 
-
 static bool
 arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
     pac_decay_stats_t *decay_stats, ecache_t *ecache,
@@ -435,21 +434,6 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 		pac_decay_all(tsdn, &arena->pa_shard.pac, decay, decay_stats,
 		    ecache, /* fully_decay */ all);
 		malloc_mutex_unlock(tsdn, &decay->mtx);
-		/*
-		 * The previous pa_decay_all call may not have actually decayed
-		 * all pages, if new pages were added concurrently with the
-		 * purge.
-		 *
-		 * I don't think we need an activity check for that case (some
-		 * other thread must be deallocating, and they should do one),
-		 * but we do one anyways.  This line comes out of a refactoring
-		 * diff in which the check was pulled out of the callee, and so
-		 * an extra redundant check minimizes the change.  We should
-		 * reevaluate.
-		 */
-		assert(!is_background_thread);
-		arena_background_thread_inactivity_check(tsdn, arena,
-		    /* is_background_thread */ false);
 		return false;
 	}
 

From 6041aaba9742c792cfa1d9ddbede6c646dd92d33 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 10 Jun 2020 17:42:49 -0700
Subject: [PATCH 1819/2608] PA -> PAC: Move in destruction functions.

---
 include/jemalloc/internal/pa.h  |  2 +-
 include/jemalloc/internal/pac.h |  4 ++++
 src/arena.c                     |  2 +-
 src/pa.c                        | 20 ++------------------
 src/pac.c                       | 31 +++++++++++++++++++++++++++++++
 5 files changed, 39 insertions(+), 20 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 4bdd8ac1..b3fc8e2b 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -113,7 +113,7 @@ void pa_shard_reset(pa_shard_t *shard);
  * decaying all active, dirty, and muzzy extents to the retained state, as the
  * last step in destroying the shard.
  */
-void pa_shard_destroy_retained(tsdn_t *tsdn, pa_shard_t *shard);
+void pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard);
 
 /* Gets an edata for the given allocation. */
 edata_t *pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size,
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index de01c519..302ac078 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -153,4 +153,8 @@ bool pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
 bool pac_decay_ms_set(tsdn_t *tsdn, pac_t *pac, extent_state_t state,
     ssize_t decay_ms, pac_purge_eagerness_t eagerness);
 ssize_t pac_decay_ms_get(pac_t *pac, extent_state_t state);
+
+void pac_reset(tsdn_t *tsdn, pac_t *pac);
+void pac_destroy(tsdn_t *tsdn, pac_t *pac);
+
 #endif /* JEMALLOC_INTERNAL_PAC_H */
diff --git a/src/arena.c b/src/arena.c
index 2bf02de6..46da3859 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -645,7 +645,7 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
 	 * extents, so only retained extents may remain and it's safe to call
 	 * pa_shard_destroy_retained.
 	 */
-	pa_shard_destroy_retained(tsd_tsdn(tsd), &arena->pa_shard);
+	pa_shard_destroy(tsd_tsdn(tsd), &arena->pa_shard);
 
 	/*
 	 * Remove the arena pointer from the arenas array.  We rely on the fact
diff --git a/src/pa.c b/src/pa.c
index 444ea5be..6a3db3c6 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -58,24 +58,8 @@ pa_shard_reset(pa_shard_t *shard) {
 }
 
 void
-pa_shard_destroy_retained(tsdn_t *tsdn, pa_shard_t *shard) {
-	assert(ecache_npages_get(&shard->pac.ecache_dirty) == 0);
-	assert(ecache_npages_get(&shard->pac.ecache_muzzy) == 0);
-	/*
-	 * Iterate over the retained extents and destroy them.  This gives the
-	 * extent allocator underlying the extent hooks an opportunity to unmap
-	 * all retained memory without having to keep its own metadata
-	 * structures.  In practice, virtual memory for dss-allocated extents is
-	 * leaked here, so best practice is to avoid dss for arenas to be
-	 * destroyed, or provide custom extent hooks that track retained
-	 * dss-based extents for later reuse.
-	 */
-	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
-	edata_t *edata;
-	while ((edata = ecache_evict(tsdn, &shard->pac, ehooks,
-	    &shard->pac.ecache_retained, 0)) != NULL) {
-		extent_destroy_wrapper(tsdn, &shard->pac, ehooks, edata);
-	}
+pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard) {
+	pac_destroy(tsdn, &shard->pac);
 }
 
 static inline bool
diff --git a/src/pac.c b/src/pac.c
index bc9f7433..ed17a2f5 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -321,3 +321,34 @@ pac_decay_ms_get(pac_t *pac, extent_state_t state) {
 	pac_decay_data_get(pac, state, &decay, &decay_stats, &ecache);
 	return decay_ms_read(decay);
 }
+
+void
+pac_reset(tsdn_t *tsdn, pac_t *pac) {
+	/*
+	 * No-op for now; purging is still done at the arena-level.  It should
+	 * get moved in here, though.
+	 */
+	(void)tsdn;
+	(void)pac;
+}
+
+void
+pac_destroy(tsdn_t *tsdn, pac_t *pac) {
+	assert(ecache_npages_get(&pac->ecache_dirty) == 0);
+	assert(ecache_npages_get(&pac->ecache_muzzy) == 0);
+	/*
+	 * Iterate over the retained extents and destroy them.  This gives the
+	 * extent allocator underlying the extent hooks an opportunity to unmap
+	 * all retained memory without having to keep its own metadata
+	 * structures.  In practice, virtual memory for dss-allocated extents is
+	 * leaked here, so best practice is to avoid dss for arenas to be
+	 * destroyed, or provide custom extent hooks that track retained
+	 * dss-based extents for later reuse.
+	 */
+	ehooks_t *ehooks = pac_ehooks_get(pac);
+	edata_t *edata;
+	while ((edata = ecache_evict(tsdn, pac, ehooks,
+	    &pac->ecache_retained, 0)) != NULL) {
+		extent_destroy_wrapper(tsdn, pac, ehooks, edata);
+	}
+}

From 6107857b7b40cd3d5c64053aeaf44e275374e9e8 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 11 Jun 2020 11:53:30 -0700
Subject: [PATCH 1820/2608] PA->PAC: Move in PAI implementation.

---
 include/jemalloc/internal/pa.h  |   7 +-
 include/jemalloc/internal/pac.h |   7 +
 src/pa.c                        | 254 +-------------------------------
 src/pac.c                       | 131 ++++++++++++++--
 4 files changed, 133 insertions(+), 266 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index b3fc8e2b..f6d0a7c3 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -66,12 +66,7 @@ struct pa_shard_s {
 	 */
 	atomic_zu_t nactive;
 
-	/*
-	 * An interface for page allocation from the ecache framework (i.e. a
-	 * cascade of ecache_dirty, ecache_muzzy, ecache_retained).  Right now
-	 * this is the *only* pai, but we'll soon grow another.
-	 */
-	pai_t ecache_pai;
+	/* Allocates from a PAC. */
 	pac_t pac;
 
 	/* The source of edata_t objects. */
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index 302ac078..2d02bda0 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_PAC_H
 #define JEMALLOC_INTERNAL_PAC_H
 
+#include "jemalloc/internal/pai.h"
+
 /*
  * Page allocator classic; an implementation of the PAI interface that:
  * - Can be used for arenas with custom extent hooks.
@@ -71,6 +73,11 @@ struct pac_stats_s {
 
 typedef struct pac_s pac_t;
 struct pac_s {
+	/*
+	 * Must be the first member (we convert it to a PAC given only a
+	 * pointer).  The handle to the allocation interface.
+	 */
+	pai_t pai;
 	/*
 	 * Collections of extents that were previously allocated.  These are
 	 * used when allocating extents, in an attempt to re-use address space.
diff --git a/src/pa.c b/src/pa.c
index 6a3db3c6..f068fd96 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -1,14 +1,6 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
-static edata_t *ecache_pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t alignment, bool zero);
-static bool ecache_pai_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool zero);
-static bool ecache_pai_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size);
-static void ecache_pai_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata);
-
 static void
 pa_nactive_add(pa_shard_t *shard, size_t add_pages) {
 	atomic_fetch_add_zu(&shard->nactive, add_pages, ATOMIC_RELAXED);
@@ -44,11 +36,6 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 	shard->emap = emap;
 	shard->base = base;
 
-	shard->ecache_pai.alloc = &ecache_pai_alloc;
-	shard->ecache_pai.expand = &ecache_pai_expand;
-	shard->ecache_pai.shrink = &ecache_pai_shrink;
-	shard->ecache_pai.dalloc = &ecache_pai_dalloc;
-
 	return false;
 }
 
@@ -62,43 +49,13 @@ pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard) {
 	pac_destroy(tsdn, &shard->pac);
 }
 
-static inline bool
-pa_shard_may_have_muzzy(pa_shard_t *shard) {
-	return pac_decay_ms_get(&shard->pac, extent_state_muzzy) != 0;
-}
-
-static edata_t *
-ecache_pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
-    bool zero) {
-	pa_shard_t *shard =
-	    (pa_shard_t *)((uintptr_t)self - offsetof(pa_shard_t, ecache_pai));
-
-	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
-	edata_t *edata = ecache_alloc(tsdn, &shard->pac, ehooks,
-	    &shard->pac.ecache_dirty, NULL, size, alignment, zero);
-
-	if (edata == NULL && pa_shard_may_have_muzzy(shard)) {
-		edata = ecache_alloc(tsdn, &shard->pac, ehooks,
-		    &shard->pac.ecache_muzzy, NULL, size, alignment, zero);
-	}
-	if (edata == NULL) {
-		edata = ecache_alloc_grow(tsdn, &shard->pac, ehooks,
-		    &shard->pac.ecache_retained, NULL, size, alignment, zero);
-		if (config_stats && edata != NULL) {
-			atomic_fetch_add_zu(&shard->pac.stats->pac_mapped, size,
-			    ATOMIC_RELAXED);
-		}
-	}
-	return edata;
-}
-
 edata_t *
 pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
     bool slab, szind_t szind, bool zero) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	edata_t *edata = pai_alloc(tsdn, &shard->ecache_pai, size, alignment,
+	edata_t *edata = pai_alloc(tsdn, &shard->pac.pai, size, alignment,
 	    zero);
 
 	if (edata != NULL) {
@@ -113,48 +70,6 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 	return edata;
 }
 
-static bool
-ecache_pai_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
-    size_t new_size, bool zero) {
-	pa_shard_t *shard =
-	    (pa_shard_t *)((uintptr_t)self - offsetof(pa_shard_t, ecache_pai));
-
-	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
-	void *trail_begin = edata_past_get(edata);
-
-	size_t mapped_add = 0;
-	size_t expand_amount = new_size - old_size;
-
-	if (ehooks_merge_will_fail(ehooks)) {
-		return true;
-	}
-	edata_t *trail = ecache_alloc(tsdn, &shard->pac, ehooks,
-	    &shard->pac.ecache_dirty, trail_begin, expand_amount, PAGE, zero);
-	if (trail == NULL) {
-		trail = ecache_alloc(tsdn, &shard->pac, ehooks,
-		    &shard->pac.ecache_muzzy, trail_begin, expand_amount, PAGE,
-		    zero);
-	}
-	if (trail == NULL) {
-		trail = ecache_alloc_grow(tsdn, &shard->pac, ehooks,
-		    &shard->pac.ecache_retained, trail_begin, expand_amount,
-		    PAGE, zero);
-		mapped_add = expand_amount;
-	}
-	if (trail == NULL) {
-		return true;
-	}
-	if (extent_merge_wrapper(tsdn, &shard->pac, ehooks, edata, trail)) {
-		extent_dalloc_wrapper(tsdn, &shard->pac, ehooks, trail);
-		return true;
-	}
-	if (config_stats && mapped_add > 0) {
-		atomic_fetch_add_zu(&shard->pac.stats->pac_mapped, mapped_add,
-		    ATOMIC_RELAXED);
-	}
-	return false;
-}
-
 bool
 pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
     size_t new_size, szind_t szind, bool zero) {
@@ -164,7 +79,7 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 
 	size_t expand_amount = new_size - old_size;
 
-	bool error = pai_expand(tsdn, &shard->ecache_pai, edata, old_size,
+	bool error = pai_expand(tsdn, &shard->pac.pai, edata, old_size,
 	    new_size, zero);
 	if (error) {
 		return true;
@@ -176,30 +91,6 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	return false;
 }
 
-static bool
-ecache_pai_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
-    size_t new_size) {
-	pa_shard_t *shard =
-	    (pa_shard_t *)((uintptr_t)self - offsetof(pa_shard_t, ecache_pai));
-
-	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
-	size_t shrink_amount = old_size - new_size;
-
-
-	if (ehooks_split_will_fail(ehooks)) {
-		return true;
-	}
-
-	edata_t *trail = extent_split_wrapper(tsdn, &shard->pac, ehooks, edata,
-	    new_size, shrink_amount);
-	if (trail == NULL) {
-		return true;
-	}
-	ecache_dalloc(tsdn, &shard->pac, ehooks, &shard->pac.ecache_dirty,
-	    trail);
-	return false;
-}
-
 bool
 pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
     size_t new_size, szind_t szind, bool *generated_dirty) {
@@ -209,7 +100,7 @@ pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	size_t shrink_amount = old_size - new_size;
 
 	*generated_dirty = false;
-	bool error = pai_shrink(tsdn, &shard->ecache_pai, edata, old_size,
+	bool error = pai_shrink(tsdn, &shard->pac.pai, edata, old_size,
 	    new_size);
 	if (error) {
 		return true;
@@ -222,15 +113,6 @@ pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	return false;
 }
 
-static void
-ecache_pai_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
-	pa_shard_t *shard =
-	    (pa_shard_t *)((uintptr_t)self - offsetof(pa_shard_t, ecache_pai));
-	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
-	ecache_dalloc(tsdn, &shard->pac, ehooks, &shard->pac.ecache_dirty,
-	    edata);
-}
-
 void
 pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
     bool *generated_dirty) {
@@ -241,138 +123,10 @@ pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
 	}
 	edata_szind_set(edata, SC_NSIZES);
 	pa_nactive_sub(shard, edata_size_get(edata) >> LG_PAGE);
-	pai_dalloc(tsdn, &shard->ecache_pai, edata);
+	pai_dalloc(tsdn, &shard->pac.pai, edata);
 	*generated_dirty = true;
 }
 
-static size_t
-pa_stash_decayed(tsdn_t *tsdn, pa_shard_t *shard, ecache_t *ecache,
-    size_t npages_limit, size_t npages_decay_max,
-    edata_list_inactive_t *result) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
-
-	/* Stash extents according to npages_limit. */
-	size_t nstashed = 0;
-	while (nstashed < npages_decay_max) {
-		edata_t *edata = ecache_evict(tsdn, &shard->pac, ehooks, ecache,
-		    npages_limit);
-		if (edata == NULL) {
-			break;
-		}
-		edata_list_inactive_append(result, edata);
-		nstashed += edata_size_get(edata) >> LG_PAGE;
-	}
-	return nstashed;
-}
-
-static size_t
-pa_decay_stashed(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
-    pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
-    edata_list_inactive_t *decay_extents) {
-	bool err;
-
-	size_t nmadvise = 0;
-	size_t nunmapped = 0;
-	size_t npurged = 0;
-
-	ehooks_t *ehooks = pa_shard_ehooks_get(shard);
-
-	bool try_muzzy = !fully_decay && pa_shard_may_have_muzzy(shard);
-
-	for (edata_t *edata = edata_list_inactive_first(decay_extents);
-	    edata != NULL; edata = edata_list_inactive_first(decay_extents)) {
-		edata_list_inactive_remove(decay_extents, edata);
-
-		size_t size = edata_size_get(edata);
-		size_t npages = size >> LG_PAGE;
-
-		nmadvise++;
-		npurged += npages;
-
-		switch (ecache->state) {
-		case extent_state_active:
-			not_reached();
-		case extent_state_dirty:
-			if (try_muzzy) {
-				err = extent_purge_lazy_wrapper(tsdn, ehooks,
-				    edata, /* offset */ 0, size);
-				if (!err) {
-					ecache_dalloc(tsdn, &shard->pac, ehooks,
-					    &shard->pac.ecache_muzzy, edata);
-					break;
-				}
-			}
-			JEMALLOC_FALLTHROUGH;
-		case extent_state_muzzy:
-			extent_dalloc_wrapper(tsdn, &shard->pac, ehooks, edata);
-			nunmapped += npages;
-			break;
-		case extent_state_retained:
-		default:
-			not_reached();
-		}
-	}
-
-	if (config_stats) {
-		LOCKEDINT_MTX_LOCK(tsdn, *shard->stats_mtx);
-		locked_inc_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-		    &decay_stats->npurge, 1);
-		locked_inc_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-		    &decay_stats->nmadvise, nmadvise);
-		locked_inc_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-		    &decay_stats->purged, npurged);
-		LOCKEDINT_MTX_UNLOCK(tsdn, *shard->stats_mtx);
-		atomic_fetch_sub_zu(&shard->pac.stats->pac_mapped,
-		    nunmapped << LG_PAGE, ATOMIC_RELAXED);
-	}
-
-	return npurged;
-}
-
-/*
- * npages_limit: Decay at most npages_decay_max pages without violating the
- * invariant: (ecache_npages_get(ecache) >= npages_limit).  We need an upper
- * bound on number of pages in order to prevent unbounded growth (namely in
- * stashed), otherwise unbounded new pages could be added to extents during the
- * current decay run, so that the purging thread never finishes.
- */
-static void
-pa_decay_to_limit(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
-    pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
-    size_t npages_limit, size_t npages_decay_max) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 1);
-
-	if (decay->purging || npages_decay_max == 0) {
-		return;
-	}
-	decay->purging = true;
-	malloc_mutex_unlock(tsdn, &decay->mtx);
-
-	edata_list_inactive_t decay_extents;
-	edata_list_inactive_init(&decay_extents);
-	size_t npurge = pa_stash_decayed(tsdn, shard, ecache, npages_limit,
-	    npages_decay_max, &decay_extents);
-	if (npurge != 0) {
-		size_t npurged = pa_decay_stashed(tsdn, shard, decay,
-		    decay_stats, ecache, fully_decay, &decay_extents);
-		assert(npurged == npurge);
-	}
-
-	malloc_mutex_lock(tsdn, &decay->mtx);
-	decay->purging = false;
-}
-
-void
-pa_decay_all(tsdn_t *tsdn, pa_shard_t *shard, decay_t *decay,
-    pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay) {
-	malloc_mutex_assert_owner(tsdn, &decay->mtx);
-	pa_decay_to_limit(tsdn, shard, decay, decay_stats, ecache, fully_decay,
-	    /* npages_limit */ 0, ecache_npages_get(ecache));
-}
-
 bool
 pa_shard_retain_grow_limit_get_set(tsdn_t *tsdn, pa_shard_t *shard,
     size_t *old_limit, size_t *new_limit) {
diff --git a/src/pac.c b/src/pac.c
index ed17a2f5..a4370888 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -3,6 +3,14 @@
 
 #include "jemalloc/internal/pac.h"
 
+static edata_t *pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size,
+    size_t alignment, bool zero);
+static bool pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    size_t old_size, size_t new_size, bool zero);
+static bool pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    size_t old_size, size_t new_size);
+static void pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata);
+
 static ehooks_t *
 pac_ehooks_get(pac_t *pac) {
 	return base_ehooks_get(pac->base);
@@ -27,8 +35,6 @@ pac_decay_data_get(pac_t *pac, extent_state_t state,
 	}
 }
 
-
-
 bool
 pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
     edata_cache_t *edata_cache, nstime_t *cur_time, ssize_t dirty_decay_ms,
@@ -78,9 +84,113 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
 	pac->stats = pac_stats;
 	pac->stats_mtx = stats_mtx;
 	atomic_store_zu(&pac->extent_sn_next, 0, ATOMIC_RELAXED);
+
+	pac->pai.alloc = &pac_alloc_impl;
+	pac->pai.expand = &pac_expand_impl;
+	pac->pai.shrink = &pac_shrink_impl;
+	pac->pai.dalloc = &pac_dalloc_impl;
+
 	return false;
 }
 
+static inline bool
+pac_may_have_muzzy(pac_t *pac) {
+	return pac_decay_ms_get(pac, extent_state_muzzy) != 0;
+}
+
+static edata_t *
+pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
+    bool zero) {
+	pac_t *pac = (pac_t *)self;
+
+	ehooks_t *ehooks = pac_ehooks_get(pac);
+	edata_t *edata = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_dirty,
+	    NULL, size, alignment, zero);
+
+	if (edata == NULL && pac_may_have_muzzy(pac)) {
+		edata = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_muzzy,
+		    NULL, size, alignment, zero);
+	}
+	if (edata == NULL) {
+		edata = ecache_alloc_grow(tsdn, pac, ehooks,
+		    &pac->ecache_retained, NULL, size, alignment, zero);
+		if (config_stats && edata != NULL) {
+			atomic_fetch_add_zu(&pac->stats->pac_mapped, size,
+			    ATOMIC_RELAXED);
+		}
+	}
+	return edata;
+}
+
+static bool
+pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
+    size_t new_size, bool zero) {
+	pac_t *pac = (pac_t *)self;
+
+	ehooks_t *ehooks = pac_ehooks_get(pac);
+	void *trail_begin = edata_past_get(edata);
+
+	size_t mapped_add = 0;
+	size_t expand_amount = new_size - old_size;
+
+	if (ehooks_merge_will_fail(ehooks)) {
+		return true;
+	}
+	edata_t *trail = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_dirty,
+	    trail_begin, expand_amount, PAGE, zero);
+	if (trail == NULL) {
+		trail = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_muzzy,
+		    trail_begin, expand_amount, PAGE, zero);
+	}
+	if (trail == NULL) {
+		trail = ecache_alloc_grow(tsdn, pac, ehooks,
+		    &pac->ecache_retained, trail_begin, expand_amount, PAGE,
+		    zero);
+		mapped_add = expand_amount;
+	}
+	if (trail == NULL) {
+		return true;
+	}
+	if (extent_merge_wrapper(tsdn, pac, ehooks, edata, trail)) {
+		extent_dalloc_wrapper(tsdn, pac, ehooks, trail);
+		return true;
+	}
+	if (config_stats && mapped_add > 0) {
+		atomic_fetch_add_zu(&pac->stats->pac_mapped, mapped_add,
+		    ATOMIC_RELAXED);
+	}
+	return false;
+}
+
+static bool
+pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
+    size_t new_size) {
+	pac_t *pac = (pac_t *)self;
+
+	ehooks_t *ehooks = pac_ehooks_get(pac);
+	size_t shrink_amount = old_size - new_size;
+
+
+	if (ehooks_split_will_fail(ehooks)) {
+		return true;
+	}
+
+	edata_t *trail = extent_split_wrapper(tsdn, pac, ehooks, edata,
+	    new_size, shrink_amount);
+	if (trail == NULL) {
+		return true;
+	}
+	ecache_dalloc(tsdn, pac, ehooks, &pac->ecache_dirty, trail);
+	return false;
+}
+
+static void
+pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
+	pac_t *pac = (pac_t *)self;
+	ehooks_t *ehooks = pac_ehooks_get(pac);
+	ecache_dalloc(tsdn, pac, ehooks, &pac->ecache_dirty, edata);
+}
+
 bool
 pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
     size_t *new_limit) {
@@ -107,7 +217,8 @@ pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
 
 static size_t
 pac_stash_decayed(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
-    size_t npages_limit, size_t npages_decay_max, edata_list_t *result) {
+    size_t npages_limit, size_t npages_decay_max,
+    edata_list_inactive_t *result) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 	ehooks_t *ehooks = pac_ehooks_get(pac);
@@ -120,7 +231,7 @@ pac_stash_decayed(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
 		if (edata == NULL) {
 			break;
 		}
-		edata_list_append(result, edata);
+		edata_list_inactive_append(result, edata);
 		nstashed += edata_size_get(edata) >> LG_PAGE;
 	}
 	return nstashed;
@@ -129,7 +240,7 @@ pac_stash_decayed(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
 static size_t
 pac_decay_stashed(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
     pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
-    edata_list_t *decay_extents) {
+    edata_list_inactive_t *decay_extents) {
 	bool err;
 
 	size_t nmadvise = 0;
@@ -141,9 +252,9 @@ pac_decay_stashed(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 	bool try_muzzy = !fully_decay
 	    && pac_decay_ms_get(pac, extent_state_muzzy) != 0;
 
-	for (edata_t *edata = edata_list_first(decay_extents); edata !=
-	    NULL; edata = edata_list_first(decay_extents)) {
-		edata_list_remove(decay_extents, edata);
+	for (edata_t *edata = edata_list_inactive_first(decay_extents); edata !=
+	    NULL; edata = edata_list_inactive_first(decay_extents)) {
+		edata_list_inactive_remove(decay_extents, edata);
 
 		size_t size = edata_size_get(edata);
 		size_t npages = size >> LG_PAGE;
@@ -211,8 +322,8 @@ pac_decay_to_limit(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 	decay->purging = true;
 	malloc_mutex_unlock(tsdn, &decay->mtx);
 
-	edata_list_t decay_extents;
-	edata_list_init(&decay_extents);
+	edata_list_inactive_t decay_extents;
+	edata_list_inactive_init(&decay_extents);
 	size_t npurge = pac_stash_decayed(tsdn, pac, ecache, npages_limit,
 	    npages_decay_max, &decay_extents);
 	if (npurge != 0) {

From e6cb7a1c9b31de3c6eca367d9164a1896bbb60ae Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 7 Jul 2020 13:33:30 -0700
Subject: [PATCH 1821/2608] Shorten wait time for peak events

---
 src/peak_event.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/peak_event.c b/src/peak_event.c
index ffb061bf..79d91e02 100644
--- a/src/peak_event.c
+++ b/src/peak_event.c
@@ -5,12 +5,12 @@
 #include "jemalloc/internal/peak_event.h"
 
 /*
- * Update every 100k by default.  We're not exposing this as a configuration
+ * Update every 64K by default.  We're not exposing this as a configuration
  * option for now; we don't want to bind ourselves too tightly to any particular
  * performance requirements for small values, or guarantee that we'll even be
  * able to provide fine-grained accuracy.
  */
-#define PEAK_EVENT_WAIT (100 * 1024)
+#define PEAK_EVENT_WAIT (64 * 1024)
 
 /* Update the peak with current tsd state. */
 void

From 4258402047a1b1c9b78ff12dcb26bd869f6ae8cd Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 6 Jul 2020 15:48:15 -0700
Subject: [PATCH 1822/2608] Corrections for prof_log_start()

---
 src/prof_log.c | 35 +++++++++++------------------------
 1 file changed, 11 insertions(+), 24 deletions(-)

diff --git a/src/prof_log.c b/src/prof_log.c
index b32d6f63..3a653fb4 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -87,7 +87,7 @@ struct prof_alloc_node_s {
 };
 
 /*
- * Created on the first call to prof_log_start and deleted on prof_log_stop.
+ * Created on the first call to prof_try_log and deleted on prof_log_stop.
  * These are the backtraces and threads that have already been logged by an
  * allocation.
  */
@@ -406,7 +406,7 @@ prof_log_dummy_set(bool new_value) {
 
 bool
 prof_log_start(tsdn_t *tsdn, const char *filename) {
-	if (!opt_prof || !prof_booted) {
+	if (!opt_prof) {
 		return true;
 	}
 
@@ -429,7 +429,7 @@ prof_log_start(tsdn_t *tsdn, const char *filename) {
 	}
 
 	if (!ret) {
-		nstime_update(&log_start_timestamp);
+		nstime_prof_init_update(&log_start_timestamp);
 	}
 
 	malloc_mutex_unlock(tsdn, &log_mtx);
@@ -573,10 +573,9 @@ prof_log_emit_metadata(emitter_t *emitter) {
 	emitter_json_kv(emitter, "lg_sample_rate",
 	    emitter_type_int, &lg_prof_sample);
 
-  const char *res_type =
-    prof_time_res_mode_names[opt_prof_time_res];
-  emitter_json_kv(emitter, "prof_time_resolution",
-      emitter_type_string, &res_type);
+	const char *res_type = prof_time_res_mode_names[opt_prof_time_res];
+	emitter_json_kv(emitter, "prof_time_resolution", emitter_type_string,
+	    &res_type);
 
 	int pid = prof_getpid();
 	emitter_json_kv(emitter, "pid", emitter_type_int, &pid);
@@ -673,6 +672,11 @@ prof_log_stop(tsdn_t *tsdn) {
 #undef PROF_LOG_STOP_BUFSIZE
 
 bool prof_log_init(tsd_t *tsd) {
+	if (malloc_mutex_init(&log_mtx, "prof_log",
+	    WITNESS_RANK_PROF_LOG, malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+
 	if (opt_prof_log) {
 		prof_log_start(tsd_tsdn(tsd), NULL);
 	}
@@ -683,26 +687,9 @@ bool prof_log_init(tsd_t *tsd) {
 		if (opt_abort) {
 			abort();
 		}
-	}
-
-	if (malloc_mutex_init(&log_mtx, "prof_log",
-	    WITNESS_RANK_PROF_LOG, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 
-	if (ckh_new(tsd, &log_bt_node_set, PROF_CKH_MINITEMS,
-	    prof_bt_node_hash, prof_bt_node_keycomp)) {
-		return true;
-	}
-
-	if (ckh_new(tsd, &log_thr_node_set, PROF_CKH_MINITEMS,
-	    prof_thr_node_hash, prof_thr_node_keycomp)) {
-		return true;
-	}
-
-	nstime_init_zero(&log_start_timestamp);
-
-	log_tables_initialized = true;
 	return false;
 }
 

From f5fb4e5a970077e308d7e4e3f1cbbec4cf76a8d9 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 22 Jul 2020 14:20:38 -0700
Subject: [PATCH 1823/2608] Modify mallctl output length when needed

This is the only reason why `oldlenp` was designed to be in the form
of a pointer.
---
 doc/jemalloc.xml.in | 3 ++-
 src/ctl.c           | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 5472294c..19afe362 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -3608,7 +3608,8 @@ MAPPED_LIBRARIES:
             <constant>NULL</constant>, and <parameter>newlen</parameter> is too
             large or too small.  Alternatively, <parameter>*oldlenp</parameter>
             is too large or too small; in this case as much data as possible
-            are read despite the error.</para></listitem>
+            are read despite the error, with the amount of data read being
+            recorded in <parameter>*oldlenp</parameter>.</para></listitem>
           </varlistentry>
           <varlistentry>
             <term><errorname>ENOENT</errorname></term>
diff --git a/src/ctl.c b/src/ctl.c
index 62a82a20..92e9f511 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1494,6 +1494,7 @@ ctl_mtx_assert_held(tsdn_t *tsdn) {
 			size_t	copylen = (sizeof(t) <= *oldlenp)	\
 			    ? sizeof(t) : *oldlenp;			\
 			memcpy(oldp, (void *)&(v), copylen);		\
+			*oldlenp = copylen;				\
 			ret = EINVAL;					\
 			goto label_return;				\
 		}							\

From fb347dc6186d5b1747f66075c9209c673d23720b Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 22 Jul 2020 14:46:43 -0700
Subject: [PATCH 1824/2608] Verify output space before doing heavy work in
 mallctl

---
 doc/jemalloc.xml.in | 21 +++++++++++++++++++--
 src/ctl.c           | 37 +++++++++++++++++++++++--------------
 2 files changed, 42 insertions(+), 16 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 19afe362..f283fd37 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1760,7 +1760,16 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         automatically managed one that is used by default.  Each explicit cache
         can be used by only one thread at a time; the application must assure
         that this constraint holds.
+        </para>
+
+        <para>If the amount of space supplied for storing the thread-specific
+        cache identifier does not equal
+        <code language="C">sizeof(<type>unsigned</type>)</code>, no
+        thread-specific cache will be created, no data will be written to the
+        space pointed by <parameter>oldp</parameter>, and
+        <parameter>*oldlenp</parameter> will be set to 0.
         </para></listitem>
+
       </varlistentry>
 
       <varlistentry id="tcache.flush">
@@ -2300,7 +2309,14 @@ struct extent_hooks_s {
         </term>
         <listitem><para>Explicitly create a new arena outside the range of
         automatically managed arenas, with optionally specified extent hooks,
-        and return the new arena index.</para></listitem>
+        and return the new arena index.</para>
+
+        <para>If the amount of space supplied for storing the arena index does
+        not equal <code language="C">sizeof(<type>unsigned</type>)</code>, no
+        arena will be created, no data will be written to the space pointed by
+        <parameter>oldp</parameter>, and <parameter>*oldlenp</parameter> will
+        be set to 0.
+        </para></listitem>
       </varlistentry>
 
       <varlistentry id="arenas.lookup">
@@ -3607,7 +3623,8 @@ MAPPED_LIBRARIES:
             <listitem><para><parameter>newp</parameter> is not
             <constant>NULL</constant>, and <parameter>newlen</parameter> is too
             large or too small.  Alternatively, <parameter>*oldlenp</parameter>
-            is too large or too small; in this case as much data as possible
+            is too large or too small; when it happens, except for a very few
+            cases explicitly documented otherwise, as much data as possible
             are read despite the error, with the amount of data read being
             recorded in <parameter>*oldlenp</parameter>.</para></listitem>
           </varlistentry>
diff --git a/src/ctl.c b/src/ctl.c
index 92e9f511..9cfb2588 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1488,6 +1488,15 @@ ctl_mtx_assert_held(tsdn_t *tsdn) {
 	}								\
 } while (0)
 
+/* Verify that the space provided is enough. */
+#define VERIFY_READ(t)	do {						\
+	if (oldp == NULL || oldlenp == NULL || *oldlenp != sizeof(t)) {	\
+		*oldlenp = 0;						\
+		ret = EINVAL;						\
+		goto label_return;					\
+	}								\
+} while (0)
+
 #define READ(v, t)	do {						\
 	if (oldp != NULL && oldlenp != NULL) {				\
 		if (*oldlenp != sizeof(t)) {				\
@@ -1559,8 +1568,8 @@ label_return:								\
 
 #define CTL_RO_CGEN(c, n, v, t)						\
 static int								\
-n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, \
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {			\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,			\
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {		\
 	int ret;							\
 	t oldval;							\
 									\
@@ -1602,8 +1611,8 @@ label_return:								\
  */
 #define CTL_RO_NL_CGEN(c, n, v, t)					\
 static int								\
-n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, \
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {			\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,			\
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {		\
 	int ret;							\
 	t oldval;							\
 									\
@@ -1621,8 +1630,8 @@ label_return:								\
 
 #define CTL_RO_NL_GEN(n, v, t)						\
 static int								\
-n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, \
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {			\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,			\
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {		\
 	int ret;							\
 	t oldval;							\
 									\
@@ -1637,8 +1646,8 @@ label_return:								\
 
 #define CTL_RO_CONFIG_GEN(n, t)						\
 static int								\
-n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, \
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {			\
+n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,			\
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {		\
 	int ret;							\
 	t oldval;							\
 									\
@@ -2103,6 +2112,7 @@ tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	unsigned tcache_ind;
 
 	READONLY();
+	VERIFY_READ(unsigned);
 	if (tcaches_create(tsd, b0get(), &tcache_ind)) {
 		ret = EFAULT;
 		goto label_return;
@@ -2608,10 +2618,6 @@ arenas_narenas_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	READONLY();
-	if (*oldlenp != sizeof(unsigned)) {
-		ret = EINVAL;
-		goto label_return;
-	}
 	narenas = ctl_arenas->narenas;
 	READ(narenas, unsigned);
 
@@ -2702,6 +2708,7 @@ arenas_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 
+	VERIFY_READ(unsigned);
 	extent_hooks = (extent_hooks_t *)&ehooks_default_extent_hooks;
 	WRITE(extent_hooks, extent_hooks_t *);
 	if ((arena_ind = ctl_arena_init(tsd, extent_hooks)) == UINT_MAX) {
@@ -2731,12 +2738,14 @@ arenas_lookup_ctl(tsd_t *tsd, const size_t *mib,
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	WRITE(ptr, void *);
 	edata = emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr);
-	if (edata == NULL)
+	if (edata == NULL) {
 		goto label_return;
+	}
 
 	arena = arena_get_from_edata(edata);
-	if (arena == NULL)
+	if (arena == NULL) {
 		goto label_return;
+	}
 
 	arena_ind = arena_ind_get(arena);
 	READ(arena_ind, unsigned);

From 786a27b9e5dfb732bc1d893cc236354c225c8f1c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 27 Jul 2020 13:24:38 -0700
Subject: [PATCH 1825/2608] CI: Update keyring.

---
 .appveyor.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index 90b03688..f74f0993 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -30,8 +30,10 @@ environment:
 install:
   - set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH%
   - if defined MSVC call "c:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" %MSVC%
+  - curl -O http://repo.msys2.org/msys/x86_64/msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz
+  - pacman --noconfirm -U msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz --nodeps
   - if defined MSVC pacman --noconfirm -Rsc mingw-w64-%CPU%-gcc gcc
-  - pacman --noconfirm -Suy mingw-w64-%CPU%-make
+  - pacman --noconfirm -S mingw-w64-%CPU%-make
 
 build_script:
   - bash -c "autoconf"

From 1ed0288d9c471771eba98ad5c3f6981fa922e7c4 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 22 Jul 2020 08:07:12 -0700
Subject: [PATCH 1826/2608] bit_util: Change ffs functions indexing.

Making these 0-based instead of 1-based makes calling code simpler and will be
more consistent with functions introduced in subsequent diffs.
---
 include/jemalloc/internal/bit_util.h | 28 ++++++++++-----
 include/jemalloc/internal/bitmap.h   | 16 ++++-----
 include/jemalloc/internal/prng.h     |  6 ++--
 src/pages.c                          |  6 ++--
 test/unit/bit_util.c                 | 54 +++++++++++++++++++++++++++-
 5 files changed, 86 insertions(+), 24 deletions(-)

diff --git a/include/jemalloc/internal/bit_util.h b/include/jemalloc/internal/bit_util.h
index c045eb86..258fd978 100644
--- a/include/jemalloc/internal/bit_util.h
+++ b/include/jemalloc/internal/bit_util.h
@@ -11,20 +11,29 @@
 #  error JEMALLOC_INTERNAL_FFS{,L,LL} should have been defined by configure
 #endif
 
-
+/*
+ * Unlike the builtins and posix ffs functions, our ffs requires a non-zero
+ * input, and returns the position of the lowest bit set (as opposed to the
+ * posix versions, which return 1 larger than that position and use a return
+ * value of zero as a sentinel.  This tends to simplify logic in callers, and
+ * allows for consistency with the builtins we build fls on top of.
+ */
 BIT_UTIL_INLINE unsigned
-ffs_llu(unsigned long long bitmap) {
-	return JEMALLOC_INTERNAL_FFSLL(bitmap);
+ffs_llu(unsigned long long x) {
+	util_assume(x != 0);
+	return JEMALLOC_INTERNAL_FFSLL(x) - 1;
 }
 
 BIT_UTIL_INLINE unsigned
-ffs_lu(unsigned long bitmap) {
-	return JEMALLOC_INTERNAL_FFSL(bitmap);
+ffs_lu(unsigned long x) {
+	util_assume(x != 0);
+	return JEMALLOC_INTERNAL_FFSL(x) - 1;
 }
 
 BIT_UTIL_INLINE unsigned
-ffs_u(unsigned bitmap) {
-	return JEMALLOC_INTERNAL_FFS(bitmap);
+ffs_u(unsigned x) {
+	util_assume(x != 0);
+	return JEMALLOC_INTERNAL_FFS(x) - 1;
 }
 
 #ifdef JEMALLOC_INTERNAL_POPCOUNTL
@@ -41,7 +50,8 @@ popcount_lu(unsigned long bitmap) {
 
 BIT_UTIL_INLINE size_t
 cfs_lu(unsigned long* bitmap) {
-	size_t bit = ffs_lu(*bitmap) - 1;
+	util_assume(*bitmap != 0);
+	size_t bit = ffs_lu(*bitmap);
 	*bitmap ^= ZU(1) << bit;
 	return bit;
 }
@@ -209,7 +219,7 @@ lg_floor(size_t x) {
 		return (8 << LG_SIZEOF_PTR) - 1;
 	}
 	x++;
-	return ffs_zu(x) - 2;
+	return ffs_zu(x) - 1;
 }
 #endif
 
diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h
index f7152a6a..dc19454d 100644
--- a/include/jemalloc/internal/bitmap.h
+++ b/include/jemalloc/internal/bitmap.h
@@ -272,7 +272,7 @@ bitmap_ffu(const bitmap_t *bitmap, const bitmap_info_t *binfo, size_t min_bit) {
 			}
 			return bitmap_ffu(bitmap, binfo, sib_base);
 		}
-		bit += ((size_t)(ffs_lu(group_masked) - 1)) <<
+		bit += ((size_t)ffs_lu(group_masked)) <<
 		    (lg_bits_per_group - LG_BITMAP_GROUP_NBITS);
 	}
 	assert(bit >= min_bit);
@@ -284,9 +284,9 @@ bitmap_ffu(const bitmap_t *bitmap, const bitmap_info_t *binfo, size_t min_bit) {
 	    - 1);
 	size_t bit;
 	do {
-		bit = ffs_lu(g);
-		if (bit != 0) {
-			return (i << LG_BITMAP_GROUP_NBITS) + (bit - 1);
+		if (g != 0) {
+			bit = ffs_lu(g);
+			return (i << LG_BITMAP_GROUP_NBITS) + bit;
 		}
 		i++;
 		g = bitmap[i];
@@ -307,20 +307,20 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) {
 #ifdef BITMAP_USE_TREE
 	i = binfo->nlevels - 1;
 	g = bitmap[binfo->levels[i].group_offset];
-	bit = ffs_lu(g) - 1;
+	bit = ffs_lu(g);
 	while (i > 0) {
 		i--;
 		g = bitmap[binfo->levels[i].group_offset + bit];
-		bit = (bit << LG_BITMAP_GROUP_NBITS) + (ffs_lu(g) - 1);
+		bit = (bit << LG_BITMAP_GROUP_NBITS) + ffs_lu(g);
 	}
 #else
 	i = 0;
 	g = bitmap[0];
-	while ((bit = ffs_lu(g)) == 0) {
+	while (g == 0) {
 		i++;
 		g = bitmap[i];
 	}
-	bit = (i << LG_BITMAP_GROUP_NBITS) + (bit - 1);
+	bit = (i << LG_BITMAP_GROUP_NBITS) + ffs_lu(g);
 #endif
 	bitmap_set(bitmap, binfo, bit);
 	return bit;
diff --git a/include/jemalloc/internal/prng.h b/include/jemalloc/internal/prng.h
index 15cc2d18..12380b41 100644
--- a/include/jemalloc/internal/prng.h
+++ b/include/jemalloc/internal/prng.h
@@ -136,7 +136,7 @@ prng_range_u32(atomic_u32_t *state, uint32_t range, bool atomic) {
 	assert(range > 1);
 
 	/* Compute the ceiling of lg(range). */
-	lg_range = ffs_u32(pow2_ceil_u32(range)) - 1;
+	lg_range = ffs_u32(pow2_ceil_u32(range));
 
 	/* Generate a result in [0..range) via repeated trial. */
 	do {
@@ -154,7 +154,7 @@ prng_range_u64(uint64_t *state, uint64_t range) {
 	assert(range > 1);
 
 	/* Compute the ceiling of lg(range). */
-	lg_range = ffs_u64(pow2_ceil_u64(range)) - 1;
+	lg_range = ffs_u64(pow2_ceil_u64(range));
 
 	/* Generate a result in [0..range) via repeated trial. */
 	do {
@@ -172,7 +172,7 @@ prng_range_zu(atomic_zu_t *state, size_t range, bool atomic) {
 	assert(range > 1);
 
 	/* Compute the ceiling of lg(range). */
-	lg_range = ffs_u64(pow2_ceil_u64(range)) - 1;
+	lg_range = ffs_u64(pow2_ceil_u64(range));
 
 	/* Generate a result in [0..range) via repeated trial. */
 	do {
diff --git a/src/pages.c b/src/pages.c
index 0ddc5ba0..05bbf728 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -211,8 +211,8 @@ pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
 			flags |= MAP_FIXED | MAP_EXCL;
 		} else {
 			unsigned alignment_bits = ffs_zu(alignment);
-			assert(alignment_bits > 1);
-			flags |= MAP_ALIGNED(alignment_bits - 1);
+			assert(alignment_bits > 0);
+			flags |= MAP_ALIGNED(alignment_bits);
 		}
 
 		void *ret = mmap(addr, size, prot, flags, -1, 0);
@@ -600,7 +600,7 @@ init_thp_state(void) {
 #endif
 
         if (nread < 0) {
-		goto label_error; 
+		goto label_error;
         }
 
 	if (strncmp(buf, sys_state_madvise, (size_t)nread) == 0) {
diff --git a/test/unit/bit_util.c b/test/unit/bit_util.c
index 3eeb7a31..f3761fd7 100644
--- a/test/unit/bit_util.c
+++ b/test/unit/bit_util.c
@@ -101,11 +101,63 @@ TEST_BEGIN(test_lg_ceil_floor) {
 }
 TEST_END
 
+#define TEST_FFS(t, suf, test_suf, pri) do {				\
+	for (unsigned i = 0; i < sizeof(t) * 8; i++) {			\
+		for (unsigned j = 0; j <= i; j++) {			\
+			for (unsigned k = 0; k <= j; k++) {		\
+				t x = (t)1 << i;			\
+				x |= (t)1 << j;				\
+				x |= (t)1 << k;				\
+				expect_##test_suf##_eq(ffs_##suf(x), k,	\
+				    "Unexpected result, x=%"pri, x);	\
+			}						\
+		}							\
+	}								\
+} while(0)
+
+TEST_BEGIN(test_ffs_u) {
+	TEST_FFS(unsigned, u, u,"u");
+}
+TEST_END
+
+
+TEST_BEGIN(test_ffs_lu) {
+	TEST_FFS(unsigned long, lu, lu, "lu");
+}
+TEST_END
+
+TEST_BEGIN(test_ffs_llu) {
+	TEST_FFS(unsigned long long, llu, qd, "llu");
+}
+TEST_END
+
+TEST_BEGIN(test_ffs_u32) {
+	TEST_FFS(uint32_t, u32, u32, FMTu32);
+}
+TEST_END
+
+
+TEST_BEGIN(test_ffs_u64) {
+	TEST_FFS(uint64_t, u64, u64, FMTu64);
+}
+TEST_END
+
+TEST_BEGIN(test_ffs_zu) {
+	TEST_FFS(size_t, zu, zu, "zu");
+}
+TEST_END
+
 int
 main(void) {
 	return test(
 	    test_pow2_ceil_u64,
 	    test_pow2_ceil_u32,
 	    test_pow2_ceil_zu,
-	    test_lg_ceil_floor);
+	    test_lg_ceil_floor,
+	    test_ffs_u,
+	    test_ffs_lu,
+	    test_ffs_llu,
+	    test_ffs_u32,
+	    test_ffs_u64,
+	    test_ffs_zu);
 }

From 22da836094f315b3fe1609e21c0e1092e7b0f2f5 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 22 Jul 2020 07:10:06 -0700
Subject: [PATCH 1827/2608] bit_util: Add fls_ functions; "find last set".

These simplify a lot of the bit_util module, which had grown bits and pieces of
this functionality across a variety of places over the years.

While we're here, kill off BIT_UTIL_INLINE and don't do reentrancy testing for
bit_util.
---
 configure.ac                         |   6 +-
 include/jemalloc/internal/bit_util.h | 344 +++++++++++++++++----------
 test/unit/bit_util.c                 |  75 +++++-
 3 files changed, 292 insertions(+), 133 deletions(-)

diff --git a/configure.ac b/configure.ac
index bcd63632..b197d32e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2118,7 +2118,7 @@ esac
 fi
 
 dnl ============================================================================
-dnl Check for __builtin_clz() and __builtin_clzl().
+dnl Check for __builtin_clz(), __builtin_clzl(), and __builtin_clzll().
 
 AC_CACHE_CHECK([for __builtin_clz],
                [je_cv_builtin_clz],
@@ -2132,6 +2132,10 @@ AC_CACHE_CHECK([for __builtin_clz],
                                                         unsigned long x = 0;
                                                         int y = __builtin_clzl(x);
                                                 }
+                                                {
+                                                        unsigned long long x = 0;
+                                                        int y = __builtin_clzll(x);
+                                                }
                                                 ])],
                                [je_cv_builtin_clz=yes],
                                [je_cv_builtin_clz=no])])
diff --git a/include/jemalloc/internal/bit_util.h b/include/jemalloc/internal/bit_util.h
index 258fd978..c5158f67 100644
--- a/include/jemalloc/internal/bit_util.h
+++ b/include/jemalloc/internal/bit_util.h
@@ -3,8 +3,6 @@
 
 #include "jemalloc/internal/assert.h"
 
-#define BIT_UTIL_INLINE static inline
-
 /* Sanity check. */
 #if !defined(JEMALLOC_INTERNAL_FFSLL) || !defined(JEMALLOC_INTERNAL_FFSL) \
     || !defined(JEMALLOC_INTERNAL_FFS)
@@ -18,26 +16,171 @@
  * value of zero as a sentinel.  This tends to simplify logic in callers, and
  * allows for consistency with the builtins we build fls on top of.
  */
-BIT_UTIL_INLINE unsigned
+static inline unsigned
 ffs_llu(unsigned long long x) {
 	util_assume(x != 0);
 	return JEMALLOC_INTERNAL_FFSLL(x) - 1;
 }
 
-BIT_UTIL_INLINE unsigned
+static inline unsigned
 ffs_lu(unsigned long x) {
 	util_assume(x != 0);
 	return JEMALLOC_INTERNAL_FFSL(x) - 1;
 }
 
-BIT_UTIL_INLINE unsigned
+static inline unsigned
 ffs_u(unsigned x) {
 	util_assume(x != 0);
 	return JEMALLOC_INTERNAL_FFS(x) - 1;
 }
 
+#define DO_FLS_SLOW(x, suffix) do {					\
+	util_assume(x != 0);						\
+	x |= (x >> 1);							\
+	x |= (x >> 2);							\
+	x |= (x >> 4);							\
+	x |= (x >> 8);							\
+	x |= (x >> 16);							\
+	if (sizeof(x) > 4) {						\
+		/*							\
+		 * If sizeof(x) is 4, then the expression "x >> 32"	\
+		 * will generate compiler warnings even if the code	\
+		 * never executes.  This circumvents the warning, and	\
+		 * gets compiled out in optimized builds.		\
+		 */							\
+		int constant_32 = sizeof(x) * 4;			\
+		x |= (x >> constant_32);				\
+	}								\
+	x++;								\
+	if (x == 0) {							\
+		return 8 * sizeof(x) - 1;				\
+	}								\
+	return ffs_##suffix(x) - 1;					\
+} while(0)
+
+static inline unsigned
+fls_llu_slow(unsigned long long x) {
+	DO_FLS_SLOW(x, llu);
+}
+
+static inline unsigned
+fls_lu_slow(unsigned long x) {
+	DO_FLS_SLOW(x, lu);
+}
+
+static inline unsigned
+fls_u_slow(unsigned x) {
+	DO_FLS_SLOW(x, u);
+}
+
+#undef DO_FLS_SLOW
+
+#ifdef JEMALLOC_HAVE_BUILTIN_CLZ
+static inline unsigned
+fls_llu(unsigned long long x) {
+	util_assume(x != 0);
+	/*
+	 * Note that the xor here is more naturally written as subtraction; the
+	 * last bit set is the number of bits in the type minus the number of
+	 * leading zero bits.  But GCC implements that as:
+	 *    bsr     edi, edi
+	 *    mov     eax, 31
+	 *    xor     edi, 31
+	 *    sub     eax, edi
+	 * If we write it as xor instead, then we get
+	 *    bsr     eax, edi
+	 * as desired.
+	 */
+	return (8 * sizeof(x) - 1) ^ __builtin_clzll(x);
+}
+
+static inline unsigned
+fls_lu(unsigned long x) {
+	util_assume(x != 0);
+	return (8 * sizeof(x) - 1) ^ __builtin_clzl(x);
+}
+
+static inline unsigned
+fls_u(unsigned x) {
+	util_assume(x != 0);
+	return (8 * sizeof(x) - 1) ^ __builtin_clz(x);
+}
+#elif defined(_MSC_VER)
+
+#if LG_SIZEOF_PTR == 3
+#define DO_BSR64(bit, x) _BitScanReverse64(&bit, x)
+#else
+/*
+ * This never actually runs; we're just dodging a compiler error for the
+ * never-taken branch where sizeof(void *) == 8.
+ */
+#define DO_BSR64(bit, x) bit = 0; unreachable()
+#endif
+
+#define DO_FLS(x) do {							\
+	if (x == 0) {							\
+		return 8 * sizeof(x);					\
+	}								\
+	unsigned long bit;						\
+	if (sizeof(x) == 4) {						\
+		_BitScanReverse(&bit, (unsigned)x);			\
+		return (unsigned)bit;					\
+	}								\
+	if (sizeof(x) == 8 && sizeof(void *) == 8) {			\
+		DO_BSR64(bit, x);					\
+		return (unsigned)bit;					\
+	}								\
+	if (sizeof(x) == 8 && sizeof(void *) == 4) {			\
+		/* Dodge a compiler warning, as above. */		\
+		int constant_32 = sizeof(x) * 4;			\
+		if (_BitScanReverse(&bit,				\
+		    (unsigned)(x >> constant_32))) {			\
+			return 32 + (unsigned)bit;			\
+		} else {						\
+			_BitScanReverse(&bit, (unsigned)x);		\
+			return (unsigned)bit;				\
+		}							\
+	}								\
+	unreachable();							\
+} while (0)
+
+static inline unsigned
+fls_llu(unsigned long long x) {
+	DO_FLS(x);
+}
+
+static inline unsigned
+fls_lu(unsigned long x) {
+	DO_FLS(x);
+}
+
+static inline unsigned
+fls_u(unsigned x) {
+	DO_FLS(x);
+}
+
+#undef DO_FLS
+#undef DO_BSR64
+#else
+
+static inline unsigned
+fls_llu(unsigned long long x) {
+	return fls_llu_slow(x);
+}
+
+static inline unsigned
+fls_lu(unsigned long x) {
+	return fls_lu_slow(x);
+}
+
+static inline unsigned
+fls_u(unsigned x) {
+	return fls_u_slow(x);
+}
+#endif
+
 #ifdef JEMALLOC_INTERNAL_POPCOUNTL
-BIT_UTIL_INLINE unsigned
+static inline unsigned
 popcount_lu(unsigned long bitmap) {
   return JEMALLOC_INTERNAL_POPCOUNTL(bitmap);
 }
@@ -48,7 +191,7 @@ popcount_lu(unsigned long bitmap) {
  * place of bit.  bitmap *must not* be 0.
  */
 
-BIT_UTIL_INLINE size_t
+static inline size_t
 cfs_lu(unsigned long* bitmap) {
 	util_assume(*bitmap != 0);
 	size_t bit = ffs_lu(*bitmap);
@@ -56,101 +199,102 @@ cfs_lu(unsigned long* bitmap) {
 	return bit;
 }
 
-BIT_UTIL_INLINE unsigned
-ffs_zu(size_t bitmap) {
+static inline unsigned
+ffs_zu(size_t x) {
 #if LG_SIZEOF_PTR == LG_SIZEOF_INT
-	return ffs_u(bitmap);
+	return ffs_u(x);
 #elif LG_SIZEOF_PTR == LG_SIZEOF_LONG
-	return ffs_lu(bitmap);
+	return ffs_lu(x);
 #elif LG_SIZEOF_PTR == LG_SIZEOF_LONG_LONG
-	return ffs_llu(bitmap);
+	return ffs_llu(x);
 #else
 #error No implementation for size_t ffs()
 #endif
 }
 
-BIT_UTIL_INLINE unsigned
-ffs_u64(uint64_t bitmap) {
+static inline unsigned
+fls_zu(size_t x) {
+#if LG_SIZEOF_PTR == LG_SIZEOF_INT
+	return fls_u(x);
+#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG
+	return fls_lu(x);
+#elif LG_SIZEOF_PTR == LG_SIZEOF_LONG_LONG
+	return fls_llu(x);
+#else
+#error No implementation for size_t fls()
+#endif
+}
+
+
+static inline unsigned
+ffs_u64(uint64_t x) {
 #if LG_SIZEOF_LONG == 3
-	return ffs_lu(bitmap);
+	return ffs_lu(x);
 #elif LG_SIZEOF_LONG_LONG == 3
-	return ffs_llu(bitmap);
+	return ffs_llu(x);
 #else
 #error No implementation for 64-bit ffs()
 #endif
 }
 
-BIT_UTIL_INLINE unsigned
-ffs_u32(uint32_t bitmap) {
+static inline unsigned
+fls_u64(uint64_t x) {
+#if LG_SIZEOF_LONG == 3
+	return fls_lu(x);
+#elif LG_SIZEOF_LONG_LONG == 3
+	return fls_llu(x);
+#else
+#error No implementation for 64-bit fls()
+#endif
+}
+
+static inline unsigned
+ffs_u32(uint32_t x) {
 #if LG_SIZEOF_INT == 2
-	return ffs_u(bitmap);
+	return ffs_u(x);
 #else
 #error No implementation for 32-bit ffs()
 #endif
-	return ffs_u(bitmap);
+	return ffs_u(x);
 }
 
-BIT_UTIL_INLINE uint64_t
+static inline unsigned
+fls_u32(uint32_t x) {
+#if LG_SIZEOF_INT == 2
+	return fls_u(x);
+#else
+#error No implementation for 32-bit fls()
+#endif
+	return fls_u(x);
+}
+
+static inline uint64_t
 pow2_ceil_u64(uint64_t x) {
-#if (defined(__amd64__) || defined(__x86_64__) || defined(JEMALLOC_HAVE_BUILTIN_CLZ))
-	if(unlikely(x <= 1)) {
+	if (unlikely(x <= 1)) {
 		return x;
 	}
-	size_t msb_on_index;
-#if (defined(__amd64__) || defined(__x86_64__))
-	asm ("bsrq %1, %0"
-			: "=r"(msb_on_index) // Outputs.
-			: "r"(x-1)           // Inputs.
-		);
-#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ))
-	msb_on_index = (63 ^ __builtin_clzll(x - 1));
-#endif
+	size_t msb_on_index = fls_u64(x - 1);
+	/*
+	 * Range-check; it's on the callers to ensure that the result of this
+	 * call won't overflow.
+	 */
 	assert(msb_on_index < 63);
 	return 1ULL << (msb_on_index + 1);
-#else
-	x--;
-	x |= x >> 1;
-	x |= x >> 2;
-	x |= x >> 4;
-	x |= x >> 8;
-	x |= x >> 16;
-	x |= x >> 32;
-	x++;
-	return x;
-#endif
 }
 
-BIT_UTIL_INLINE uint32_t
+static inline uint32_t
 pow2_ceil_u32(uint32_t x) {
-#if ((defined(__i386__) || defined(JEMALLOC_HAVE_BUILTIN_CLZ)) && (!defined(__s390__)))
-	if(unlikely(x <= 1)) {
-		return x;
+	if (unlikely(x <= 1)) {
+	    return x;
 	}
-	size_t msb_on_index;
-#if (defined(__i386__))
-	asm ("bsr %1, %0"
-			: "=r"(msb_on_index) // Outputs.
-			: "r"(x-1)           // Inputs.
-		);
-#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ))
-	msb_on_index = (31 ^ __builtin_clz(x - 1));
-#endif
+	size_t msb_on_index = fls_u32(x - 1);
+	/* As above. */
 	assert(msb_on_index < 31);
 	return 1U << (msb_on_index + 1);
-#else
-	x--;
-	x |= x >> 1;
-	x |= x >> 2;
-	x |= x >> 4;
-	x |= x >> 8;
-	x |= x >> 16;
-	x++;
-	return x;
-#endif
 }
 
 /* Compute the smallest power of 2 that is >= x. */
-BIT_UTIL_INLINE size_t
+static inline size_t
 pow2_ceil_zu(size_t x) {
 #if (LG_SIZEOF_PTR == 3)
 	return pow2_ceil_u64(x);
@@ -159,77 +303,21 @@ pow2_ceil_zu(size_t x) {
 #endif
 }
 
-#if (defined(__i386__) || defined(__amd64__) || defined(__x86_64__))
-BIT_UTIL_INLINE unsigned
+static inline unsigned
 lg_floor(size_t x) {
-	size_t ret;
-	assert(x != 0);
-
-	asm ("bsr %1, %0"
-	    : "=r"(ret) // Outputs.
-	    : "r"(x)    // Inputs.
-	    );
-	assert(ret < UINT_MAX);
-	return (unsigned)ret;
-}
-#elif (defined(_MSC_VER))
-BIT_UTIL_INLINE unsigned
-lg_floor(size_t x) {
-	unsigned long ret;
-
-	assert(x != 0);
-
+	util_assume(x != 0);
 #if (LG_SIZEOF_PTR == 3)
-	_BitScanReverse64(&ret, x);
-#elif (LG_SIZEOF_PTR == 2)
-	_BitScanReverse(&ret, x);
+	return fls_u64(x);
 #else
-#  error "Unsupported type size for lg_floor()"
-#endif
-	assert(ret < UINT_MAX);
-	return (unsigned)ret;
-}
-#elif (defined(JEMALLOC_HAVE_BUILTIN_CLZ))
-BIT_UTIL_INLINE unsigned
-lg_floor(size_t x) {
-	assert(x != 0);
-
-#if (LG_SIZEOF_PTR == LG_SIZEOF_INT)
-	return ((8 << LG_SIZEOF_PTR) - 1) - __builtin_clz(x);
-#elif (LG_SIZEOF_PTR == LG_SIZEOF_LONG)
-	return ((8 << LG_SIZEOF_PTR) - 1) - __builtin_clzl(x);
-#else
-#  error "Unsupported type size for lg_floor()"
+	return fls_u32(x);
 #endif
 }
-#else
-BIT_UTIL_INLINE unsigned
-lg_floor(size_t x) {
-	assert(x != 0);
 
-	x |= (x >> 1);
-	x |= (x >> 2);
-	x |= (x >> 4);
-	x |= (x >> 8);
-	x |= (x >> 16);
-#if (LG_SIZEOF_PTR == 3)
-	x |= (x >> 32);
-#endif
-	if (x == SIZE_T_MAX) {
-		return (8 << LG_SIZEOF_PTR) - 1;
-	}
-	x++;
-	return ffs_zu(x) - 1;
-}
-#endif
-
-BIT_UTIL_INLINE unsigned
+static inline unsigned
 lg_ceil(size_t x) {
 	return lg_floor(x) + ((x & (x - 1)) == 0 ? 0 : 1);
 }
 
-#undef BIT_UTIL_INLINE
-
 /* A compile-time version of lg_floor and lg_ceil. */
 #define LG_FLOOR_1(x) 0
 #define LG_FLOOR_2(x) (x < (1ULL << 1) ? LG_FLOOR_1(x) : 1 + LG_FLOOR_1(x >> 1))
diff --git a/test/unit/bit_util.c b/test/unit/bit_util.c
index f3761fd7..045cf8b4 100644
--- a/test/unit/bit_util.c
+++ b/test/unit/bit_util.c
@@ -120,7 +120,6 @@ TEST_BEGIN(test_ffs_u) {
 }
 TEST_END
 
-
 TEST_BEGIN(test_ffs_lu) {
 	TEST_FFS(unsigned long, lu, lu, "lu");
 }
@@ -136,7 +135,6 @@ TEST_BEGIN(test_ffs_u32) {
 }
 TEST_END
 
-
 TEST_BEGIN(test_ffs_u64) {
 	TEST_FFS(uint64_t, u64, u64, FMTu64);
 }
@@ -147,9 +145,69 @@ TEST_BEGIN(test_ffs_zu) {
 }
 TEST_END
 
+#define TEST_FLS(t, suf, test_suf, pri) do {				\
+	for (unsigned i = 0; i < sizeof(t) * 8; i++) {			\
+		for (unsigned j = 0; j <= i; j++) {			\
+			for (unsigned k = 0; k <= j; k++) {		\
+				t x = (t)1 << i;			\
+				x |= (t)1 << j;				\
+				x |= (t)1 << k;				\
+				expect_##test_suf##_eq(fls_##suf(x), i,	\
+				    "Unexpected result, x=%"pri, x);	\
+			}						\
+		}							\
+	}								\
+} while(0)
+
+TEST_BEGIN(test_fls_u) {
+	TEST_FLS(unsigned, u, u,"u");
+}
+TEST_END
+
+TEST_BEGIN(test_fls_lu) {
+	TEST_FLS(unsigned long, lu, lu, "lu");
+}
+TEST_END
+
+TEST_BEGIN(test_fls_llu) {
+	TEST_FLS(unsigned long long, llu, qd, "llu");
+}
+TEST_END
+
+TEST_BEGIN(test_fls_u32) {
+	TEST_FLS(uint32_t, u32, u32, FMTu32);
+}
+TEST_END
+
+TEST_BEGIN(test_fls_u64) {
+	TEST_FLS(uint64_t, u64, u64, FMTu64);
+}
+TEST_END
+
+TEST_BEGIN(test_fls_zu) {
+	TEST_FLS(size_t, zu, zu, "zu");
+}
+TEST_END
+
+TEST_BEGIN(test_fls_u_slow) {
+	TEST_FLS(unsigned, u_slow, u,"u");
+}
+TEST_END
+
+TEST_BEGIN(test_fls_lu_slow) {
+	TEST_FLS(unsigned long, lu_slow, lu, "lu");
+}
+TEST_END
+
+TEST_BEGIN(test_fls_llu_slow) {
+	TEST_FLS(unsigned long long, llu_slow, qd, "llu");
+}
+TEST_END
+
+
 int
 main(void) {
-	return test(
+	return test_no_reentrancy(
 	    test_pow2_ceil_u64,
 	    test_pow2_ceil_u32,
 	    test_pow2_ceil_zu,
@@ -159,5 +217,14 @@ main(void) {
 	    test_ffs_llu,
 	    test_ffs_u32,
 	    test_ffs_u64,
-	    test_ffs_zu);
+	    test_ffs_zu,
+	    test_fls_u,
+	    test_fls_lu,
+	    test_fls_llu,
+	    test_fls_u32,
+	    test_fls_u64,
+	    test_fls_zu,
+	    test_fls_u_slow,
+	    test_fls_lu_slow,
+	    test_fls_llu_slow);
 }

From efeab1f4985281fb7cb12ffd985a84317bfb3332 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 17 Jul 2020 16:12:28 -0700
Subject: [PATCH 1828/2608] bitset test: Pull NBITS_TAB into its own file.

---
 test/include/test/nbits.h | 98 +++++++++++++++++++++++++++++++++++++++
 test/unit/bitmap.c        | 92 +-----------------------------------
 2 files changed, 99 insertions(+), 91 deletions(-)
 create mode 100644 test/include/test/nbits.h

diff --git a/test/include/test/nbits.h b/test/include/test/nbits.h
new file mode 100644
index 00000000..be74baa9
--- /dev/null
+++ b/test/include/test/nbits.h
@@ -0,0 +1,98 @@
+#ifndef TEST_NBITS_H
+#define TEST_NBITS_H
+
+/* Interesting bitmap counts to test. */
+
+#define NBITS_TAB \
+    NB( 1) \
+    NB( 2) \
+    NB( 3) \
+    NB( 4) \
+    NB( 5) \
+    NB( 6) \
+    NB( 7) \
+    NB( 8) \
+    NB( 9) \
+    NB(10) \
+    NB(11) \
+    NB(12) \
+    NB(13) \
+    NB(14) \
+    NB(15) \
+    NB(16) \
+    NB(17) \
+    NB(18) \
+    NB(19) \
+    NB(20) \
+    NB(21) \
+    NB(22) \
+    NB(23) \
+    NB(24) \
+    NB(25) \
+    NB(26) \
+    NB(27) \
+    NB(28) \
+    NB(29) \
+    NB(30) \
+    NB(31) \
+    NB(32) \
+    \
+    NB(33) \
+    NB(34) \
+    NB(35) \
+    NB(36) \
+    NB(37) \
+    NB(38) \
+    NB(39) \
+    NB(40) \
+    NB(41) \
+    NB(42) \
+    NB(43) \
+    NB(44) \
+    NB(45) \
+    NB(46) \
+    NB(47) \
+    NB(48) \
+    NB(49) \
+    NB(50) \
+    NB(51) \
+    NB(52) \
+    NB(53) \
+    NB(54) \
+    NB(55) \
+    NB(56) \
+    NB(57) \
+    NB(58) \
+    NB(59) \
+    NB(60) \
+    NB(61) \
+    NB(62) \
+    NB(63) \
+    NB(64) \
+    NB(65) \
+    \
+    NB(126) \
+    NB(127) \
+    NB(128) \
+    NB(129) \
+    NB(130) \
+    \
+    NB(254) \
+    NB(255) \
+    NB(256) \
+    NB(257) \
+    NB(258) \
+    \
+    NB(510) \
+    NB(511) \
+    NB(512) \
+    NB(513) \
+    NB(514) \
+    \
+    NB(1024) \
+    NB(2048) \
+    NB(4096) \
+    NB(8192) \
+    NB(16384)
+
+#endif /* TEST_NBITS_H */
diff --git a/test/unit/bitmap.c b/test/unit/bitmap.c
index 6b0ea9ef..78e542b6 100644
--- a/test/unit/bitmap.c
+++ b/test/unit/bitmap.c
@@ -1,96 +1,6 @@
 #include "test/jemalloc_test.h"
 
-#define NBITS_TAB \
-    NB( 1) \
-    NB( 2) \
-    NB( 3) \
-    NB( 4) \
-    NB( 5) \
-    NB( 6) \
-    NB( 7) \
-    NB( 8) \
-    NB( 9) \
-    NB(10) \
-    NB(11) \
-    NB(12) \
-    NB(13) \
-    NB(14) \
-    NB(15) \
-    NB(16) \
-    NB(17) \
-    NB(18) \
-    NB(19) \
-    NB(20) \
-    NB(21) \
-    NB(22) \
-    NB(23) \
-    NB(24) \
-    NB(25) \
-    NB(26) \
-    NB(27) \
-    NB(28) \
-    NB(29) \
-    NB(30) \
-    NB(31) \
-    NB(32) \
-    \
-    NB(33) \
-    NB(34) \
-    NB(35) \
-    NB(36) \
-    NB(37) \
-    NB(38) \
-    NB(39) \
-    NB(40) \
-    NB(41) \
-    NB(42) \
-    NB(43) \
-    NB(44) \
-    NB(45) \
-    NB(46) \
-    NB(47) \
-    NB(48) \
-    NB(49) \
-    NB(50) \
-    NB(51) \
-    NB(52) \
-    NB(53) \
-    NB(54) \
-    NB(55) \
-    NB(56) \
-    NB(57) \
-    NB(58) \
-    NB(59) \
-    NB(60) \
-    NB(61) \
-    NB(62) \
-    NB(63) \
-    NB(64) \
-    NB(65) \
-    \
-    NB(126) \
-    NB(127) \
-    NB(128) \
-    NB(129) \
-    NB(130) \
-    \
-    NB(254) \
-    NB(255) \
-    NB(256) \
-    NB(257) \
-    NB(258) \
-    \
-    NB(510) \
-    NB(511) \
-    NB(512) \
-    NB(513) \
-    NB(514) \
-    \
-    NB(1024) \
-    NB(2048) \
-    NB(4096) \
-    NB(8192) \
-    NB(16384) \
+#include "test/nbits.h"
 
 static void
 test_bitmap_initializer_body(const bitmap_info_t *binfo, size_t nbits) {

From 7fde6ac490bd6a257023aafcbedcf422a9413b4f Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 17 Jul 2020 16:15:49 -0700
Subject: [PATCH 1829/2608] Nbits: Add a couple more interesting sizes.

Previously, all tests with more than two levels came in powers of 2.  It's
usefule to check cases where we have a partially filled group at above the
second level.
---
 test/include/test/nbits.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/test/include/test/nbits.h b/test/include/test/nbits.h
index be74baa9..c06cf1b4 100644
--- a/test/include/test/nbits.h
+++ b/test/include/test/nbits.h
@@ -70,6 +70,8 @@
     NB(63) \
     NB(64) \
     NB(65) \
+    NB(66) \
+    NB(67) \
     \
     NB(126) \
     NB(127) \
@@ -89,9 +91,20 @@
     NB(513) \
     NB(514) \
     \
+    NB(1022) \
+    NB(1023) \
     NB(1024) \
+    NB(1025) \
+    NB(1026) \
+    \
     NB(2048) \
+    \
+    NB(4094) \
+    NB(4095) \
     NB(4096) \
+    NB(4097) \
+    NB(4098) \
+    \
     NB(8192) \
     NB(16384)
 

From ceee823519bb534c2609e1dadd9b923bd28853b4 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 17 Jul 2020 18:42:50 -0700
Subject: [PATCH 1830/2608] Add flat_bitmap.

The flat_bitmap module offers an extended API, at the cost of decreased
performance in the case of very large bitmaps.
---
 Makefile.in                             |   1 +
 include/jemalloc/internal/flat_bitmap.h | 222 +++++++++++++++++
 test/unit/flat_bitmap.c                 | 313 ++++++++++++++++++++++++
 3 files changed, 536 insertions(+)
 create mode 100644 include/jemalloc/internal/flat_bitmap.h
 create mode 100644 test/unit/flat_bitmap.c

diff --git a/Makefile.in b/Makefile.in
index 2802f7f2..10c5392e 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -203,6 +203,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/edata_cache.c \
 	$(srcroot)test/unit/emitter.c \
 	$(srcroot)test/unit/extent_quantize.c \
+	${srcroot}test/unit/flat_bitmap.c \
 	$(srcroot)test/unit/fork.c \
 	$(srcroot)test/unit/hash.c \
 	$(srcroot)test/unit/hook.c \
diff --git a/include/jemalloc/internal/flat_bitmap.h b/include/jemalloc/internal/flat_bitmap.h
new file mode 100644
index 00000000..cf2baab0
--- /dev/null
+++ b/include/jemalloc/internal/flat_bitmap.h
@@ -0,0 +1,222 @@
+#ifndef JEMALLOC_INTERNAL_FB_H
+#define JEMALLOC_INTERNAL_FB_H
+
+/*
+ * The flat bitmap module.  This has a larger API relative to the bitmap module
+ * (supporting things like backwards searches, and searching for both set and
+ * unset bits), at the cost of slower operations for very large bitmaps.
+ *
+ * Initialized flat bitmaps start at all-zeros (all bits unset).
+ */
+
+typedef unsigned long fb_group_t;
+#define FB_GROUP_BITS (ZU(1) << (LG_SIZEOF_LONG + 3))
+#define FB_NGROUPS(nbits) ((nbits) / FB_GROUP_BITS \
+    + ((nbits) % FB_GROUP_BITS == 0 ? 0 : 1))
+
+static inline void
+fb_init(fb_group_t *fb, size_t nbits) {
+	size_t ngroups = FB_NGROUPS(nbits);
+	memset(fb, 0, ngroups * sizeof(fb_group_t));
+}
+
+static inline bool
+fb_empty(fb_group_t *fb, size_t nbits) {
+	size_t ngroups = FB_NGROUPS(nbits);
+	for (size_t i = 0; i < ngroups; i++) {
+		if (fb[i] != 0) {
+			return false;
+		}
+	}
+	return true;
+}
+
+static inline bool
+fb_full(fb_group_t *fb, size_t nbits) {
+	size_t ngroups = FB_NGROUPS(nbits);
+	size_t trailing_bits = nbits % FB_GROUP_BITS;
+	size_t limit = (trailing_bits == 0 ? ngroups : ngroups - 1);
+	for (size_t i = 0; i < limit; i++) {
+		if (fb[i] != ~(fb_group_t)0) {
+			return false;
+		}
+	}
+	if (trailing_bits == 0) {
+		return true;
+	}
+	return fb[ngroups - 1] == ((fb_group_t)1 << trailing_bits) - 1;
+}
+
+static inline bool
+fb_get(fb_group_t *fb, size_t nbits, size_t bit) {
+	assert(bit < nbits);
+	size_t group_ind = bit / FB_GROUP_BITS;
+	size_t bit_ind = bit % FB_GROUP_BITS;
+	return (bool)(fb[group_ind] & ((fb_group_t)1 << bit_ind));
+}
+
+static inline void
+fb_set(fb_group_t *fb, size_t nbits, size_t bit) {
+	assert(bit < nbits);
+	size_t group_ind = bit / FB_GROUP_BITS;
+	size_t bit_ind = bit % FB_GROUP_BITS;
+	fb[group_ind] |= ((fb_group_t)1 << bit_ind);
+}
+
+static inline void
+fb_unset(fb_group_t *fb, size_t nbits, size_t bit) {
+	assert(bit < nbits);
+	size_t group_ind = bit / FB_GROUP_BITS;
+	size_t bit_ind = bit % FB_GROUP_BITS;
+	fb[group_ind] &= ~((fb_group_t)1 << bit_ind);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+fb_assign_group_impl(fb_group_t *fb, size_t start, size_t cnt, bool val) {
+	assert(cnt > 0);
+	assert(start + cnt - 1 < FB_GROUP_BITS);
+	fb_group_t bits = ((~(fb_group_t)0) >> (FB_GROUP_BITS - cnt)) << start;
+	if (val) {
+		*fb |= bits;
+	} else {
+		*fb &= ~bits;
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void
+fb_assign_impl(fb_group_t *fb, size_t nbits, size_t start, size_t cnt,
+    bool val) {
+	assert(start + cnt - 1 < nbits);
+	size_t group_ind = start / FB_GROUP_BITS;
+	size_t start_bit_ind = start % FB_GROUP_BITS;
+	/*
+	 * The first group is special; it's the only one we don't start writing
+	 * to from bit 0.
+	 */
+	size_t first_group_cnt =
+	    (start_bit_ind + cnt > FB_GROUP_BITS
+		? FB_GROUP_BITS - start_bit_ind
+		: cnt);
+	/*
+	 * We can basically split affected words into:
+	 *   - The first group, where we touch only the high bits
+	 *   - The last group, where we touch only the low bits
+	 *   - The middle, where we set all the bits to the same thing.
+	 * We treat each case individually.  The last two could be merged, but
+	 * this can lead to bad codegen for those middle words.
+	 */
+	/* First group */
+	fb_assign_group_impl(&fb[group_ind], start_bit_ind, first_group_cnt,
+	    val);
+	cnt -= first_group_cnt;
+	group_ind++;
+	/* Middle groups */
+	while (cnt > FB_GROUP_BITS) {
+		fb_assign_group_impl(&fb[group_ind], 0, FB_GROUP_BITS, val);
+		cnt -= FB_GROUP_BITS;
+		group_ind++;
+	}
+	/* Last group */
+	if (cnt != 0) {
+		fb_assign_group_impl(&fb[group_ind], 0, cnt, val);
+	}
+}
+
+/* Sets the cnt bits starting at position start.  Must not have a 0 count. */
+static inline void
+fb_set_range(fb_group_t *fb, size_t nbits, size_t start, size_t cnt) {
+	fb_assign_impl(fb, nbits, start, cnt, true);
+}
+
+/* Unsets the cnt bits starting at position start.  Must not have a 0 count. */
+static inline void
+fb_unset_range(fb_group_t *fb, size_t nbits, size_t start, size_t cnt) {
+	fb_assign_impl(fb, nbits, start, cnt, false);
+}
+
+/*
+ * An implementation detail; find the first bit at position >= min_bit with the
+ * value val.
+ *
+ * Returns the number of bits in the bitmap if no such bit exists.
+ */
+JEMALLOC_ALWAYS_INLINE ssize_t
+fb_find_impl(fb_group_t *fb, size_t nbits, size_t start, bool val,
+    bool forward) {
+	assert(start < nbits);
+	size_t ngroups = FB_NGROUPS(nbits);
+	ssize_t group_ind = start / FB_GROUP_BITS;
+	size_t bit_ind = start % FB_GROUP_BITS;
+
+	fb_group_t maybe_invert = (val ? 0 : (fb_group_t)-1);
+
+	fb_group_t group = fb[group_ind];
+	group ^= maybe_invert;
+	if (forward) {
+		/* Only keep ones in bits bit_ind and above. */
+		group &= ~((1LU << bit_ind) - 1);
+	} else {
+		/*
+		 * Only keep ones in bits bit_ind and below.  You might more
+		 * naturally express this as (1 << (bit_ind + 1)) - 1, but
+		 * that shifts by an invalid amount if bit_ind is one less than
+		 * FB_GROUP_BITS.
+		 */
+		group &= ((2LU << bit_ind) - 1);
+	}
+	ssize_t group_ind_bound = forward ? (ssize_t)ngroups : -1;
+	while (group == 0) {
+		group_ind += forward ? 1 : -1;
+		if (group_ind == group_ind_bound) {
+			return forward ? (ssize_t)nbits : (ssize_t)-1;
+		}
+		group = fb[group_ind];
+		group ^= maybe_invert;
+	}
+	assert(group != 0);
+	size_t bit = forward ? ffs_lu(group) : fls_lu(group);
+	size_t pos = group_ind * FB_GROUP_BITS + bit;
+	/*
+	 * The high bits of a partially filled last group are zeros, so if we're
+	 * looking for zeros we don't want to report an invalid result.
+	 */
+	if (forward && !val && pos > nbits) {
+		return nbits;
+	}
+	return pos;
+}
+
+/*
+ * Find the first set bit in the bitmap with an index >= min_bit.  Returns the
+ * number of bits in the bitmap if no such bit exists.
+ */
+static inline size_t
+fb_ffu(fb_group_t *fb, size_t nbits, size_t min_bit) {
+	return (size_t)fb_find_impl(fb, nbits, min_bit, /* val */ false,
+	    /* forward */ true);
+}
+
+/* The same, but looks for an unset bit. */
+static inline size_t
+fb_ffs(fb_group_t *fb, size_t nbits, size_t min_bit) {
+	return (size_t)fb_find_impl(fb, nbits, min_bit, /* val */ true,
+	    /* forward */ true);
+}
+
+/*
+ * Find the last set bit in the bitmap with an index <= max_bit.  Returns -1 if
+ * no such bit exists.
+ */
+static inline ssize_t
+fb_flu(fb_group_t *fb, size_t nbits, size_t max_bit) {
+	return fb_find_impl(fb, nbits, max_bit, /* val */ false,
+	    /* forward */ false);
+}
+
+static inline ssize_t
+fb_fls(fb_group_t *fb, size_t nbits, size_t max_bit) {
+	return fb_find_impl(fb, nbits, max_bit, /* val */ true,
+	    /* forward */ false);
+}
+
+#endif /* JEMALLOC_INTERNAL_FB_H */
diff --git a/test/unit/flat_bitmap.c b/test/unit/flat_bitmap.c
new file mode 100644
index 00000000..1667f777
--- /dev/null
+++ b/test/unit/flat_bitmap.c
@@ -0,0 +1,313 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/flat_bitmap.h"
+#include "test/nbits.h"
+
+static void
+do_test_init(size_t nbits) {
+	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	fb_group_t *fb = malloc(sz);
+	/* Junk fb's contents. */
+	memset(fb, 99, sz);
+	fb_init(fb, nbits);
+	for (size_t i = 0; i < nbits; i++) {
+		expect_false(fb_get(fb, nbits, i),
+		    "bitmap should start empty");
+	}
+	free(fb);
+}
+
+TEST_BEGIN(test_fb_init) {
+#define NB(nbits) \
+	do_test_init(nbits);
+	NBITS_TAB
+#undef NB
+}
+TEST_END
+
+static void
+do_test_get_set_unset(size_t nbits) {
+	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	fb_group_t *fb = malloc(sz);
+	fb_init(fb, nbits);
+	/* Set the bits divisible by 3. */
+	for (size_t i = 0; i < nbits; i++) {
+		if (i % 3 == 0) {
+			fb_set(fb, nbits, i);
+		}
+	}
+	/* Check them. */
+	for (size_t i = 0; i < nbits; i++) {
+		expect_b_eq(i % 3 == 0, fb_get(fb, nbits, i),
+		    "Unexpected bit at position %zu", i);
+	}
+	/* Unset those divisible by 5. */
+	for (size_t i = 0; i < nbits; i++) {
+		if (i % 5 == 0) {
+			fb_unset(fb, nbits, i);
+		}
+	}
+	/* Check them. */
+	for (size_t i = 0; i < nbits; i++) {
+		expect_b_eq(i % 3 == 0 && i % 5 != 0, fb_get(fb, nbits, i),
+		    "Unexpected bit at position %zu", i);
+	}
+	free(fb);
+}
+
+TEST_BEGIN(test_get_set_unset) {
+#define NB(nbits) \
+	do_test_get_set_unset(nbits);
+	NBITS_TAB
+#undef NB
+}
+TEST_END
+
+static ssize_t
+find_3_5_compute(ssize_t i, size_t nbits, bool bit, bool forward) {
+	for(; i < (ssize_t)nbits && i >= 0; i += (forward ? 1 : -1)) {
+		bool expected_bit = i % 3 == 0 || i % 5 == 0;
+		if (expected_bit == bit) {
+			return i;
+		}
+	}
+	return forward ? (ssize_t)nbits : (ssize_t)-1;
+}
+
+static void
+do_test_search_simple(size_t nbits) {
+	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	fb_group_t *fb = malloc(sz);
+	fb_init(fb, nbits);
+
+	/* We pick multiples of 3 or 5. */
+	for (size_t i = 0; i < nbits; i++) {
+		if (i % 3 == 0) {
+			fb_set(fb, nbits, i);
+		}
+		/* This tests double-setting a little, too. */
+		if (i % 5 == 0) {
+			fb_set(fb, nbits, i);
+		}
+	}
+	for (size_t i = 0; i < nbits; i++) {
+		size_t ffs_compute = find_3_5_compute(i, nbits, true, true);
+		size_t ffs_search = fb_ffs(fb, nbits, i);
+		expect_zu_eq(ffs_compute, ffs_search, "ffs mismatch at %zu", i);
+
+		ssize_t fls_compute = find_3_5_compute(i, nbits, true, false);
+		size_t fls_search = fb_fls(fb, nbits, i);
+		expect_zu_eq(fls_compute, fls_search, "fls mismatch at %zu", i);
+
+		size_t ffu_compute = find_3_5_compute(i, nbits, false, true);
+		size_t ffu_search = fb_ffu(fb, nbits, i);
+		expect_zu_eq(ffu_compute, ffu_search, "ffu mismatch at %zu", i);
+
+		size_t flu_compute = find_3_5_compute(i, nbits, false, false);
+		size_t flu_search = fb_flu(fb, nbits, i);
+		expect_zu_eq(flu_compute, flu_search, "flu mismatch at %zu", i);
+	}
+
+	free(fb);
+}
+
+TEST_BEGIN(test_search_simple) {
+#define NB(nbits) \
+	do_test_search_simple(nbits);
+	NBITS_TAB
+#undef NB
+}
+TEST_END
+
+static void
+expect_exhaustive_results(fb_group_t *mostly_full, fb_group_t *mostly_empty,
+    size_t nbits, size_t special_bit, size_t position) {
+	if (position < special_bit) {
+		expect_zu_eq(special_bit, fb_ffs(mostly_empty, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(-1, fb_fls(mostly_empty, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zu_eq(position, fb_ffu(mostly_empty, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(position, fb_flu(mostly_empty, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+
+		expect_zu_eq(position, fb_ffs(mostly_full, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(position, fb_fls(mostly_full, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zu_eq(special_bit, fb_ffu(mostly_full, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(-1, fb_flu(mostly_full, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+	} else if (position == special_bit) {
+		expect_zu_eq(special_bit, fb_ffs(mostly_empty, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(special_bit, fb_fls(mostly_empty, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zu_eq(position + 1, fb_ffu(mostly_empty, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(position - 1, fb_flu(mostly_empty, nbits,
+		    position), "mismatch at %zu, %zu", position, special_bit);
+
+		expect_zu_eq(position + 1, fb_ffs(mostly_full, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(position - 1, fb_fls(mostly_full, nbits,
+		    position), "mismatch at %zu, %zu", position, special_bit);
+		expect_zu_eq(position, fb_ffu(mostly_full, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(position, fb_flu(mostly_full, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+	} else {
+		/* position > special_bit. */
+		expect_zu_eq(nbits, fb_ffs(mostly_empty, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(special_bit, fb_fls(mostly_empty, nbits,
+		    position), "mismatch at %zu, %zu", position, special_bit);
+		expect_zu_eq(position, fb_ffu(mostly_empty, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(position, fb_flu(mostly_empty, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+
+		expect_zu_eq(position, fb_ffs(mostly_full, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(position, fb_fls(mostly_full, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zu_eq(nbits, fb_ffu(mostly_full, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(special_bit, fb_flu(mostly_full, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+	}
+}
+
+static void
+do_test_search_exhaustive(size_t nbits) {
+	/* This test is quadratic; let's not get too big. */
+	if (nbits > 1000) {
+		return;
+	}
+	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	fb_group_t *empty = malloc(sz);
+	fb_init(empty, nbits);
+	fb_group_t *full = malloc(sz);
+	fb_init(full, nbits);
+	fb_set_range(full, nbits, 0, nbits);
+
+	for (size_t i = 0; i < nbits; i++) {
+		fb_set(empty, nbits, i);
+		fb_unset(full, nbits, i);
+
+		for (size_t j = 0; j < nbits; j++) {
+			expect_exhaustive_results(full, empty, nbits, i, j);
+		}
+		fb_unset(empty, nbits, i);
+		fb_set(full, nbits, i);
+	}
+
+	free(empty);
+	free(full);
+}
+
+TEST_BEGIN(test_search_exhaustive) {
+#define NB(nbits) \
+	do_test_search_exhaustive(nbits);
+	NBITS_TAB
+#undef NB
+}
+TEST_END
+
+TEST_BEGIN(test_range_simple) {
+	/*
+	 * Just pick a constant big enough to have nontrivial middle sizes, and
+	 * big enough that usages of things like weirdnum (below) near the
+	 * beginning fit comfortably into the beginning of the bitmap.
+	 */
+	size_t nbits = 64 * 10;
+	size_t ngroups = FB_NGROUPS(nbits);
+	fb_group_t *fb = malloc(sizeof(fb_group_t) * ngroups);
+	fb_init(fb, nbits);
+	for (size_t i = 0; i < nbits; i++) {
+		if (i % 2 == 0) {
+			fb_set_range(fb, nbits, i, 1);
+		}
+	}
+	for (size_t i = 0; i < nbits; i++) {
+		expect_b_eq(i % 2 == 0, fb_get(fb, nbits, i),
+		    "mismatch at position %zu", i);
+	}
+	fb_set_range(fb, nbits, 0, nbits / 2);
+	fb_unset_range(fb, nbits, nbits / 2, nbits / 2);
+	for (size_t i = 0; i < nbits; i++) {
+		expect_b_eq(i < nbits / 2, fb_get(fb, nbits, i),
+		    "mismatch at position %zu", i);
+	}
+
+	static const size_t weirdnum = 7;
+	fb_set_range(fb, nbits, 0, nbits);
+	fb_unset_range(fb, nbits, weirdnum, FB_GROUP_BITS + weirdnum);
+	for (size_t i = 0; i < nbits; i++) {
+		expect_b_eq(7 <= i && i <= 2 * weirdnum + FB_GROUP_BITS - 1,
+		    !fb_get(fb, nbits, i), "mismatch at position %zu", i);
+	}
+	free(fb);
+}
+TEST_END
+
+static void
+do_test_empty_full_exhaustive(size_t nbits) {
+	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	fb_group_t *empty = malloc(sz);
+	fb_init(empty, nbits);
+	fb_group_t *full = malloc(sz);
+	fb_init(full, nbits);
+	fb_set_range(full, nbits, 0, nbits);
+
+	expect_true(fb_full(full, nbits), "");
+	expect_false(fb_empty(full, nbits), "");
+	expect_false(fb_full(empty, nbits), "");
+	expect_true(fb_empty(empty, nbits), "");
+
+	for (size_t i = 0; i < nbits; i++) {
+		fb_set(empty, nbits, i);
+		fb_unset(full, nbits, i);
+
+		expect_false(fb_empty(empty, nbits), "error at bit %zu", i);
+		if (nbits != 1) {
+			expect_false(fb_full(empty, nbits),
+			    "error at bit %zu", i);
+			expect_false(fb_empty(full, nbits),
+			    "error at bit %zu", i);
+		} else {
+			expect_true(fb_full(empty, nbits),
+			    "error at bit %zu", i);
+			expect_true(fb_empty(full, nbits),
+			    "error at bit %zu", i);
+		}
+		expect_false(fb_full(full, nbits), "error at bit %zu", i);
+
+		fb_unset(empty, nbits, i);
+		fb_set(full, nbits, i);
+	}
+
+	free(empty);
+	free(full);
+}
+
+TEST_BEGIN(test_empty_full) {
+#define NB(nbits) \
+	do_test_empty_full_exhaustive(nbits);
+	NBITS_TAB
+#undef NB
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_fb_init,
+	    test_get_set_unset,
+	    test_search_simple,
+	    test_search_exhaustive,
+	    test_range_simple,
+	    test_empty_full);
+}

From ddb8dc4ad0523e07ab0475d6c9583d8ca27de8dc Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 27 Jul 2020 12:26:06 -0700
Subject: [PATCH 1831/2608] FB: Add range iteration support.

---
 include/jemalloc/internal/flat_bitmap.h |  65 +++++++
 test/unit/flat_bitmap.c                 | 234 +++++++++++++++++++++++-
 2 files changed, 298 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/flat_bitmap.h b/include/jemalloc/internal/flat_bitmap.h
index cf2baab0..7b894d53 100644
--- a/include/jemalloc/internal/flat_bitmap.h
+++ b/include/jemalloc/internal/flat_bitmap.h
@@ -219,4 +219,69 @@ fb_fls(fb_group_t *fb, size_t nbits, size_t max_bit) {
 	    /* forward */ false);
 }
 
+/* Returns whether or not we found a range. */
+JEMALLOC_ALWAYS_INLINE bool
+fb_iter_range_impl(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
+    size_t *r_len, bool val, bool forward) {
+	assert(start < nbits);
+	ssize_t next_range_begin = fb_find_impl(fb, nbits, start, val, forward);
+	if ((forward && next_range_begin == (ssize_t)nbits)
+	    || (!forward && next_range_begin == (ssize_t)-1)) {
+		return false;
+	}
+	/* Half open range; the set bits are [begin, end). */
+	ssize_t next_range_end = fb_find_impl(fb, nbits, next_range_begin, !val,
+	    forward);
+	if (forward) {
+		*r_begin = next_range_begin;
+		*r_len = next_range_end - next_range_begin;
+	} else {
+		*r_begin = next_range_end + 1;
+		*r_len = next_range_begin - next_range_end;
+	}
+	return true;
+}
+
+/*
+ * Used to iterate through ranges of set bits.
+ *
+ * Tries to find the next contiguous sequence of set bits with a first index >=
+ * start.  If one exists, puts the earliest bit of the range in *r_begin, its
+ * length in *r_len, and returns true.  Otherwise, returns false (without
+ * touching *r_begin or *r_end).
+ */
+static inline bool
+fb_srange_iter(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
+    size_t *r_len) {
+	return fb_iter_range_impl(fb, nbits, start, r_begin, r_len,
+	    /* val */ true, /* forward */ true);
+}
+
+/*
+ * The same as fb_srange_iter, but searches backwards from start rather than
+ * forwards.  (The position returned is still the earliest bit in the range).
+ */
+static inline bool
+fb_srange_riter(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
+    size_t *r_len) {
+	return fb_iter_range_impl(fb, nbits, start, r_begin, r_len,
+	    /* val */ true, /* forward */ false);
+}
+
+/* Similar to fb_srange_iter, but searches for unset bits. */
+static inline bool
+fb_urange_iter(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
+    size_t *r_len) {
+	return fb_iter_range_impl(fb, nbits, start, r_begin, r_len,
+	    /* val */ false, /* forward */ true);
+}
+
+/* Similar to fb_srange_riter, but searches for unset bits. */
+static inline bool
+fb_urange_riter(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
+    size_t *r_len) {
+	return fb_iter_range_impl(fb, nbits, start, r_begin, r_len,
+	    /* val */ false, /* forward */ false);
+}
+
 #endif /* JEMALLOC_INTERNAL_FB_H */
diff --git a/test/unit/flat_bitmap.c b/test/unit/flat_bitmap.c
index 1667f777..14ac6ba4 100644
--- a/test/unit/flat_bitmap.c
+++ b/test/unit/flat_bitmap.c
@@ -301,6 +301,236 @@ TEST_BEGIN(test_empty_full) {
 }
 TEST_END
 
+TEST_BEGIN(test_iter_range_simple) {
+	size_t set_limit = 30;
+	size_t nbits = 100;
+	fb_group_t fb[FB_NGROUPS(100)];
+
+	fb_init(fb, nbits);
+
+	/*
+	 * Failing to initialize these can lead to build failures with -Wall;
+	 * the compiler can't prove that they're set.
+	 */
+	size_t begin = (size_t)-1;
+	size_t len = (size_t)-1;
+	bool result;
+
+	/* A set of checks with only the first set_limit bits *set*. */
+	fb_set_range(fb, nbits, 0, set_limit);
+	for (size_t i = 0; i < set_limit; i++) {
+		result = fb_srange_iter(fb, nbits, i, &begin, &len);
+		expect_true(result, "Should have found a range at %zu", i);
+		expect_zu_eq(i, begin, "Incorrect begin at %zu", i);
+		expect_zu_eq(set_limit - i, len, "Incorrect len at %zu", i);
+
+		result = fb_urange_iter(fb, nbits, i, &begin, &len);
+		expect_true(result, "Should have found a range at %zu", i);
+		expect_zu_eq(set_limit, begin, "Incorrect begin at %zu", i);
+		expect_zu_eq(nbits - set_limit, len, "Incorrect len at %zu", i);
+
+		result = fb_srange_riter(fb, nbits, i, &begin, &len);
+		expect_true(result, "Should have found a range at %zu", i);
+		expect_zu_eq(0, begin, "Incorrect begin at %zu", i);
+		expect_zu_eq(i + 1, len, "Incorrect len at %zu", i);
+
+		result = fb_urange_riter(fb, nbits, i, &begin, &len);
+		expect_false(result, "Should not have found a range at %zu", i);
+	}
+	for (size_t i = set_limit; i < nbits; i++) {
+		result = fb_srange_iter(fb, nbits, i, &begin, &len);
+		expect_false(result, "Should not have found a range at %zu", i);
+
+		result = fb_urange_iter(fb, nbits, i, &begin, &len);
+		expect_true(result, "Should have found a range at %zu", i);
+		expect_zu_eq(i, begin, "Incorrect begin at %zu", i);
+		expect_zu_eq(nbits - i, len, "Incorrect len at %zu", i);
+
+		result = fb_srange_riter(fb, nbits, i, &begin, &len);
+		expect_true(result, "Should have found a range at %zu", i);
+		expect_zu_eq(0, begin, "Incorrect begin at %zu", i);
+		expect_zu_eq(set_limit, len, "Incorrect len at %zu", i);
+
+		result = fb_urange_riter(fb, nbits, i, &begin, &len);
+		expect_true(result, "Should have found a range at %zu", i);
+		expect_zu_eq(set_limit, begin, "Incorrect begin at %zu", i);
+		expect_zu_eq(i - set_limit + 1, len, "Incorrect len at %zu", i);
+	}
+
+	/* A set of checks with only the first set_limit bits *unset*. */
+	fb_unset_range(fb, nbits, 0, set_limit);
+	fb_set_range(fb, nbits, set_limit, nbits - set_limit);
+	for (size_t i = 0; i < set_limit; i++) {
+		result = fb_srange_iter(fb, nbits, i, &begin, &len);
+		expect_true(result, "Should have found a range at %zu", i);
+		expect_zu_eq(set_limit, begin, "Incorrect begin at %zu", i);
+		expect_zu_eq(nbits - set_limit, len, "Incorrect len at %zu", i);
+
+		result = fb_urange_iter(fb, nbits, i, &begin, &len);
+		expect_true(result, "Should have found a range at %zu", i);
+		expect_zu_eq(i, begin, "Incorrect begin at %zu", i);
+		expect_zu_eq(set_limit - i, len, "Incorrect len at %zu", i);
+
+		result = fb_srange_riter(fb, nbits, i, &begin, &len);
+		expect_false(result, "Should not have found a range at %zu", i);
+
+		result = fb_urange_riter(fb, nbits, i, &begin, &len);
+		expect_true(result, "Should not have found a range at %zu", i);
+		expect_zu_eq(0, begin, "Incorrect begin at %zu", i);
+		expect_zu_eq(i + 1, len, "Incorrect len at %zu", i);
+	}
+	for (size_t i = set_limit; i < nbits; i++) {
+		result = fb_srange_iter(fb, nbits, i, &begin, &len);
+		expect_true(result, "Should have found a range at %zu", i);
+		expect_zu_eq(i, begin, "Incorrect begin at %zu", i);
+		expect_zu_eq(nbits - i, len, "Incorrect len at %zu", i);
+
+		result = fb_urange_iter(fb, nbits, i, &begin, &len);
+		expect_false(result, "Should not have found a range at %zu", i);
+
+		result = fb_srange_riter(fb, nbits, i, &begin, &len);
+		expect_true(result, "Should have found a range at %zu", i);
+		expect_zu_eq(set_limit, begin, "Incorrect begin at %zu", i);
+		expect_zu_eq(i - set_limit + 1, len, "Incorrect len at %zu", i);
+
+		result = fb_urange_riter(fb, nbits, i, &begin, &len);
+		expect_true(result, "Should have found a range at %zu", i);
+		expect_zu_eq(0, begin, "Incorrect begin at %zu", i);
+		expect_zu_eq(set_limit, len, "Incorrect len at %zu", i);
+	}
+
+}
+TEST_END
+
+/*
+ * Doing this bit-by-bit is too slow for a real implementation, but for testing
+ * code, it's easy to get right.  In the exhaustive tests, we'll compare the
+ * (fast but tricky) real implementation against the (slow but simple) testing
+ * one.
+ */
+static bool
+fb_iter_simple(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
+    size_t *r_len, bool val, bool forward) {
+	ssize_t stride = (forward ? (ssize_t)1 : (ssize_t)-1);
+	ssize_t range_begin = (ssize_t)start;
+	for (; range_begin != (ssize_t)nbits && range_begin != -1;
+	    range_begin += stride) {
+		if (fb_get(fb, nbits, range_begin) == val) {
+			ssize_t range_end = range_begin;
+			for (; range_end != (ssize_t)nbits && range_end != -1;
+			    range_end += stride) {
+				if (fb_get(fb, nbits, range_end) != val) {
+					break;
+				}
+			}
+			if (forward) {
+				*r_begin = range_begin;
+				*r_len = range_end - range_begin;
+			} else {
+				*r_begin = range_end + 1;
+				*r_len = range_begin - range_end;
+			}
+			return true;
+		}
+	}
+	return false;
+}
+
+static void
+expect_iter_results_at(fb_group_t *fb, size_t nbits, size_t pos,
+    bool val, bool forward) {
+	bool iter_res;
+	size_t iter_begin;
+	size_t iter_len;
+	if (val) {
+		if (forward) {
+			iter_res = fb_srange_iter(fb, nbits, pos,
+			    &iter_begin, &iter_len);
+		} else {
+			iter_res = fb_srange_riter(fb, nbits, pos,
+			    &iter_begin, &iter_len);
+		}
+	} else {
+		if (forward) {
+			iter_res = fb_urange_iter(fb, nbits, pos,
+			    &iter_begin, &iter_len);
+		} else {
+			iter_res = fb_urange_riter(fb, nbits, pos,
+			    &iter_begin, &iter_len);
+		}
+	}
+
+	bool simple_iter_res;
+	size_t simple_iter_begin;
+	size_t simple_iter_len;
+	simple_iter_res = fb_iter_simple(fb, nbits, pos, &simple_iter_begin,
+	    &simple_iter_len, val, forward);
+
+	expect_b_eq(iter_res, simple_iter_res, "Result mismatch at %zu", pos);
+	if (iter_res && simple_iter_res) {
+		assert_zu_eq(iter_begin, simple_iter_begin,
+		    "Begin mismatch at %zu", pos);
+		expect_zu_eq(iter_len, simple_iter_len,
+		    "Length mismatch at %zu", pos);
+	}
+}
+
+static void
+expect_iter_results(fb_group_t *fb, size_t nbits) {
+	for (size_t i = 0; i < nbits; i++) {
+		expect_iter_results_at(fb, nbits, i, false, false);
+		expect_iter_results_at(fb, nbits, i, false, true);
+		expect_iter_results_at(fb, nbits, i, true, false);
+		expect_iter_results_at(fb, nbits, i, true, true);
+	}
+}
+
+static void
+set_pattern_3(fb_group_t *fb, size_t nbits, bool zero_val) {
+	for (size_t i = 0; i < nbits; i++) {
+		if ((i % 6 < 3 && zero_val) || (i % 6 >= 3 && !zero_val)) {
+			fb_set(fb, nbits, i);
+		} else {
+			fb_unset(fb, nbits, i);
+		}
+	}
+}
+
+static void
+do_test_iter_range_exhaustive(size_t nbits) {
+	/* This test is also pretty slow. */
+	if (nbits > 1000) {
+		return;
+	}
+	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	fb_group_t *fb = malloc(sz);
+	fb_init(fb, nbits);
+
+	set_pattern_3(fb, nbits, /* zero_val */ true);
+	expect_iter_results(fb, nbits);
+
+	set_pattern_3(fb, nbits, /* zero_val */ false);
+	expect_iter_results(fb, nbits);
+
+	fb_set_range(fb, nbits, 0, nbits);
+	fb_unset_range(fb, nbits, 0, nbits / 2 == 0 ? 1 : nbits / 2);
+	expect_iter_results(fb, nbits);
+
+	fb_unset_range(fb, nbits, 0, nbits);
+	fb_set_range(fb, nbits, 0, nbits / 2 == 0 ? 1: nbits / 2);
+	expect_iter_results(fb, nbits);
+
+	free(fb);
+}
+
+TEST_BEGIN(test_iter_range_exhaustive) {
+#define NB(nbits) \
+	do_test_iter_range_exhaustive(nbits);
+	NBITS_TAB
+#undef NB
+}
+TEST_END
+
 int
 main(void) {
 	return test_no_reentrancy(
@@ -309,5 +539,7 @@ main(void) {
 	    test_search_simple,
 	    test_search_exhaustive,
 	    test_range_simple,
-	    test_empty_full);
+	    test_empty_full,
+	    test_iter_range_simple,
+	    test_iter_range_exhaustive);
 }

From f28cc2bc87199e031b9d035ccdff6a2d429274c9 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 22 Apr 2020 17:22:43 -0700
Subject: [PATCH 1832/2608] Extract bin shard selection out of bin locking

---
 include/jemalloc/internal/arena_externs.h |  2 +-
 src/arena.c                               | 32 ++++++++++++-----------
 src/tcache.c                              | 11 ++++----
 3 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 674c98f5..c600d10f 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -85,7 +85,7 @@ arena_t *arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
 bool arena_init_huge(void);
 bool arena_is_huge(unsigned arena_ind);
 arena_t *arena_choose_huge(tsd_t *tsd);
-bin_t *arena_bin_choose_lock(tsdn_t *tsdn, arena_t *arena, szind_t binind,
+bin_t *arena_bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
     unsigned *binshard);
 void arena_boot(sc_data_t *sc_data);
 void arena_prefork0(tsdn_t *tsdn, arena_t *arena);
diff --git a/src/arena.c b/src/arena.c
index 46da3859..1df276b0 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -739,21 +739,20 @@ arena_bin_refill_slabcur_no_fresh_slab(tsdn_t *tsdn, arena_t *arena,
 	return (bin->slabcur == NULL);
 }
 
-/* Choose a bin shard and return the locked bin. */
 bin_t *
-arena_bin_choose_lock(tsdn_t *tsdn, arena_t *arena, szind_t binind,
-    unsigned *binshard) {
-	bin_t *bin;
+arena_bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
+    unsigned *binshard_p) {
+	unsigned binshard;
 	if (tsdn_null(tsdn) || tsd_arena_get(tsdn_tsd(tsdn)) == NULL) {
-		*binshard = 0;
+		binshard = 0;
 	} else {
-		*binshard = tsd_binshardsp_get(tsdn_tsd(tsdn))->binshard[binind];
+		binshard = tsd_binshardsp_get(tsdn_tsd(tsdn))->binshard[binind];
 	}
-	assert(*binshard < bin_infos[binind].n_shards);
-	bin = &arena->bins[binind].bin_shards[*binshard];
-	malloc_mutex_lock(tsdn, &bin->lock);
-
-	return bin;
+	assert(binshard < bin_infos[binind].n_shards);
+	if (binshard_p != NULL) {
+		*binshard_p = binshard;
+	}
+	return &arena->bins[binind].bin_shards[binshard];
 }
 
 void
@@ -797,11 +796,12 @@ arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
 	edata_t *fresh_slab = NULL;
 	bool alloc_and_retry = false;
 	unsigned filled = 0;
-
-	bin_t *bin;
 	unsigned binshard;
+	bin_t *bin = arena_bin_choose(tsdn, arena, binind, &binshard);
+
 label_refill:
-	bin = arena_bin_choose_lock(tsdn, arena, binind, &binshard);
+	malloc_mutex_lock(tsdn, &bin->lock);
+
 	while (filled < nfill) {
 		/* Try batch-fill from slabcur first. */
 		edata_t *slabcur = bin->slabcur;
@@ -854,6 +854,7 @@ label_refill:
 		bin->stats.nfills++;
 		cache_bin->tstats.nrequests = 0;
 	}
+
 	malloc_mutex_unlock(tsdn, &bin->lock);
 
 	if (alloc_and_retry) {
@@ -906,8 +907,9 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
 	const bin_info_t *bin_info = &bin_infos[binind];
 	size_t usize = sz_index2size(binind);
 	unsigned binshard;
-	bin_t *bin = arena_bin_choose_lock(tsdn, arena, binind, &binshard);
+	bin_t *bin = arena_bin_choose(tsdn, arena, binind, &binshard);
 
+	malloc_mutex_lock(tsdn, &bin->lock);
 	edata_t *fresh_slab = NULL;
 	void *ret = arena_bin_malloc_no_fresh_slab(tsdn, arena, bin, binind);
 	if (ret == NULL) {
diff --git a/src/tcache.c b/src/tcache.c
index a33d9e24..b681ee10 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -454,9 +454,9 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 			 * thread's arena, so the stats didn't get merged.
 			 * Manually do so now.
 			 */
-			unsigned binshard;
-			bin_t *bin = arena_bin_choose_lock(tsdn, tcache_arena,
-			    binind, &binshard);
+			bin_t *bin = arena_bin_choose(tsdn, tcache_arena,
+			    binind, NULL);
+			malloc_mutex_lock(tsdn, &bin->lock);
 			bin->stats.nflushes++;
 			bin->stats.nrequests += cache_bin->tstats.nrequests;
 			cache_bin->tstats.nrequests = 0;
@@ -751,9 +751,8 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 	for (unsigned i = 0; i < nhbins; i++) {
 		cache_bin_t *cache_bin = &tcache->bins[i];
 		if (i < SC_NBINS) {
-			unsigned binshard;
-			bin_t *bin = arena_bin_choose_lock(tsdn, arena, i,
-			    &binshard);
+			bin_t *bin = arena_bin_choose(tsdn, arena, i, NULL);
+			malloc_mutex_lock(tsdn, &bin->lock);
 			bin->stats.nrequests += cache_bin->tstats.nrequests;
 			malloc_mutex_unlock(tsdn, &bin->lock);
 		} else {

From 2bb8060d572311e4a42a35fb52e78f78e42725ee Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 22 Jul 2020 16:44:18 -0700
Subject: [PATCH 1833/2608] Add empty test and concat for typed list

---
 include/jemalloc/internal/typed_list.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/include/jemalloc/internal/typed_list.h b/include/jemalloc/internal/typed_list.h
index 7ad2237f..6535055a 100644
--- a/include/jemalloc/internal/typed_list.h
+++ b/include/jemalloc/internal/typed_list.h
@@ -42,6 +42,14 @@ list_type##_replace(list_type##_t *list, el_type *to_remove,		\
 static inline void							\
 list_type##_remove(list_type##_t *list, el_type *item) {		\
 	ql_remove(&list->head, item, linkage);				\
+}									\
+static inline bool							\
+list_type##_empty(list_type##_t *list) {				\
+	return ql_empty(&list->head);					\
+}									\
+static inline void							\
+list_type##_concat(list_type##_t *list_a, list_type##_t *list_b) {	\
+	ql_concat(&list_a->head, &list_b->head, linkage);		\
 }
 
 #endif /* JEMALLOC_INTERNAL_TYPED_LIST_H */

From 49e5c2fe7d35ffdeb2dc767ab7d3c569eb5c6a40 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 22 Apr 2020 18:13:06 -0700
Subject: [PATCH 1834/2608] Add batch allocation from fresh slabs

---
 include/jemalloc/internal/arena_externs.h |  2 +
 src/arena.c                               | 62 +++++++++++++++++++++++
 2 files changed, 64 insertions(+)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index c600d10f..8134f247 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -87,6 +87,8 @@ bool arena_is_huge(unsigned arena_ind);
 arena_t *arena_choose_huge(tsd_t *tsd);
 bin_t *arena_bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
     unsigned *binshard);
+size_t arena_fill_small_fresh(tsdn_t *tsdn, arena_t *arena, szind_t binind,
+    void **ptrs, size_t nfill);
 void arena_boot(sc_data_t *sc_data);
 void arena_prefork0(tsdn_t *tsdn, arena_t *arena);
 void arena_prefork1(tsdn_t *tsdn, arena_t *arena);
diff --git a/src/arena.c b/src/arena.c
index 1df276b0..0a5c60b8 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -883,6 +883,68 @@ label_refill:
 	arena_decay_tick(tsdn, arena);
 }
 
+size_t
+arena_fill_small_fresh(tsdn_t *tsdn, arena_t *arena, szind_t binind,
+    void **ptrs, size_t nfill) {
+	assert(binind < SC_NBINS);
+	const bin_info_t *bin_info = &bin_infos[binind];
+	const size_t nregs = bin_info->nregs;
+	assert(nregs > 0);
+	const bool manual_arena = !arena_is_auto(arena);
+	unsigned binshard;
+	bin_t *bin = arena_bin_choose(tsdn, arena, binind, &binshard);
+
+	size_t nslab = 0;
+	size_t filled = 0;
+	edata_t *slab = NULL;
+	edata_list_active_t fulls;
+	edata_list_active_init(&fulls);
+
+	while (filled < nfill && (slab = arena_slab_alloc(tsdn, arena, binind,
+	    binshard, bin_info)) != NULL) {
+		assert((size_t)edata_nfree_get(slab) == nregs);
+		++nslab;
+		size_t batch = nfill - filled;
+		if (batch > nregs) {
+			batch = nregs;
+		}
+		assert(batch > 0);
+		arena_slab_reg_alloc_batch(slab, bin_info, (unsigned)batch,
+		    &ptrs[filled]);
+		filled += batch;
+		if (batch == nregs) {
+			if (manual_arena) {
+				edata_list_active_append(&fulls, slab);
+			}
+			slab = NULL;
+		}
+	}
+
+	malloc_mutex_lock(tsdn, &bin->lock);
+	/*
+	 * Only the last slab can be non-empty, and the last slab is non-empty
+	 * iff slab != NULL.
+	 */
+	if (slab != NULL) {
+		arena_bin_lower_slab(tsdn, arena, slab, bin);
+	}
+	if (manual_arena) {
+		edata_list_active_concat(&bin->slabs_full, &fulls);
+	}
+	assert(edata_list_active_empty(&fulls));
+	if (config_stats) {
+		bin->stats.nslabs += nslab;
+		bin->stats.curslabs += nslab;
+		bin->stats.nmalloc += filled;
+		bin->stats.nrequests += filled;
+		bin->stats.curregs += filled;
+	}
+	malloc_mutex_unlock(tsdn, &bin->lock);
+
+	arena_decay_tick(tsdn, arena);
+	return filled;
+}
+
 /*
  * Without allocating a new slab, try arena_slab_reg_alloc() and re-fill
  * bin->slabcur if necessary.

From f805468957343e0fb02c84c0548eb39f98b9e29c Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 22 Jul 2020 17:01:44 -0700
Subject: [PATCH 1835/2608] Add zero option to arena batch allocation

---
 include/jemalloc/internal/arena_externs.h | 2 +-
 src/arena.c                               | 8 +++++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 8134f247..a2fdff9f 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -88,7 +88,7 @@ arena_t *arena_choose_huge(tsd_t *tsd);
 bin_t *arena_bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
     unsigned *binshard);
 size_t arena_fill_small_fresh(tsdn_t *tsdn, arena_t *arena, szind_t binind,
-    void **ptrs, size_t nfill);
+    void **ptrs, size_t nfill, bool zero);
 void arena_boot(sc_data_t *sc_data);
 void arena_prefork0(tsdn_t *tsdn, arena_t *arena);
 void arena_prefork1(tsdn_t *tsdn, arena_t *arena);
diff --git a/src/arena.c b/src/arena.c
index 0a5c60b8..b2feff49 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -885,11 +885,13 @@ label_refill:
 
 size_t
 arena_fill_small_fresh(tsdn_t *tsdn, arena_t *arena, szind_t binind,
-    void **ptrs, size_t nfill) {
+    void **ptrs, size_t nfill, bool zero) {
 	assert(binind < SC_NBINS);
 	const bin_info_t *bin_info = &bin_infos[binind];
 	const size_t nregs = bin_info->nregs;
 	assert(nregs > 0);
+	const size_t usize = bin_info->reg_size;
+
 	const bool manual_arena = !arena_is_auto(arena);
 	unsigned binshard;
 	bin_t *bin = arena_bin_choose(tsdn, arena, binind, &binshard);
@@ -911,6 +913,10 @@ arena_fill_small_fresh(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 		assert(batch > 0);
 		arena_slab_reg_alloc_batch(slab, bin_info, (unsigned)batch,
 		    &ptrs[filled]);
+		assert(edata_addr_get(slab) == ptrs[filled]);
+		if (zero) {
+			memset(ptrs[filled], 0, batch * usize);
+		}
 		filled += batch;
 		if (batch == nregs) {
 			if (manual_arena) {

From c6f59e9bb450bbce279f256ed56c0780092473c4 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 12 Mar 2020 15:24:36 -0700
Subject: [PATCH 1836/2608] Add surplus reading API for thread event lookahead

---
 include/jemalloc/internal/thread_event.h | 50 ++++++++++++++++++++++--
 1 file changed, 47 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index bca8a447..5925563a 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -226,12 +226,56 @@ te_ctx_get(tsd_t *tsd, te_ctx_t *ctx, bool is_alloc) {
 	}
 }
 
+/*
+ * The lookahead functionality facilitates events to be able to lookahead, i.e.
+ * without touching the event counters, to determine whether an event would be
+ * triggered.  The event counters are not advanced until the end of the
+ * allocation / deallocation calls, so the lookahead can be useful if some
+ * preparation work for some event must be done early in the allocation /
+ * deallocation calls.
+ *
+ * Currently only the profiling sampling event needs the lookahead
+ * functionality, so we don't yet define general purpose lookahead functions.
+ *
+ * Surplus is a terminology referring to the amount of bytes beyond what's
+ * needed for triggering an event, which can be a useful quantity to have in
+ * general when lookahead is being called.
+ */
+
+JEMALLOC_ALWAYS_INLINE bool
+te_prof_sample_event_lookahead_surplus(tsd_t *tsd, size_t usize,
+    size_t *surplus) {
+	if (surplus != NULL) {
+		/*
+		 * This is a dead store: the surplus will be overwritten before
+		 * any read.  The initialization suppresses compiler warnings.
+		 * Meanwhile, using SIZE_MAX to initialize is good for
+		 * debugging purpose, because a valid surplus value is strictly
+		 * less than usize, which is at most SIZE_MAX.
+		 */
+		*surplus = SIZE_MAX;
+	}
+	if (unlikely(!tsd_nominal(tsd) || tsd_reentrancy_level_get(tsd) > 0)) {
+		return false;
+	}
+	/* The subtraction is intentionally susceptible to underflow. */
+	uint64_t accumbytes = tsd_thread_allocated_get(tsd) + usize -
+	    tsd_thread_allocated_last_event_get(tsd);
+	uint64_t sample_wait = tsd_prof_sample_event_wait_get(tsd);
+	if (accumbytes < sample_wait) {
+		return false;
+	}
+	assert(accumbytes - sample_wait < (uint64_t)usize);
+	if (surplus != NULL) {
+		*surplus = (size_t)(accumbytes - sample_wait);
+	}
+	return true;
+}
+
 JEMALLOC_ALWAYS_INLINE bool
 te_prof_sample_event_lookahead(tsd_t *tsd, size_t usize) {
 	assert(usize == sz_s2u(usize));
-	return tsd_thread_allocated_get(tsd) + usize -
-	    tsd_thread_allocated_last_event_get(tsd) >=
-	    tsd_prof_sample_event_wait_get(tsd);
+	return te_prof_sample_event_lookahead_surplus(tsd, usize, NULL);
 }
 
 JEMALLOC_ALWAYS_INLINE void

From 978f830ee300c15460085bdc49b4bdb9ef1a16d8 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 23 Apr 2020 15:46:45 -0700
Subject: [PATCH 1837/2608] Add batch allocation API

---
 Makefile.in                                   |   4 +-
 .../internal/jemalloc_internal_externs.h      |   1 +
 include/jemalloc/internal/prof_inlines.h      |  11 +
 include/jemalloc/internal/thread_event.h      |   1 -
 src/jemalloc.c                                | 117 +++++++++++
 test/unit/batch_alloc.c                       | 190 ++++++++++++++++++
 test/unit/batch_alloc.sh                      |   3 +
 test/unit/batch_alloc_prof.c                  |   1 +
 test/unit/batch_alloc_prof.sh                 |   3 +
 9 files changed, 329 insertions(+), 2 deletions(-)
 create mode 100644 test/unit/batch_alloc.c
 create mode 100644 test/unit/batch_alloc.sh
 create mode 100644 test/unit/batch_alloc_prof.c
 create mode 100644 test/unit/batch_alloc_prof.sh

diff --git a/Makefile.in b/Makefile.in
index 10c5392e..da094f08 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -191,6 +191,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/background_thread.c \
 	$(srcroot)test/unit/background_thread_enable.c \
 	$(srcroot)test/unit/base.c \
+	$(srcroot)test/unit/batch_alloc.c \
 	$(srcroot)test/unit/binshard.c \
 	$(srcroot)test/unit/bitmap.c \
 	$(srcroot)test/unit/bit_util.c \
@@ -264,7 +265,8 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/zero_reallocs.c
 ifeq (@enable_prof@, 1)
 TESTS_UNIT += \
-	$(srcroot)test/unit/arena_reset_prof.c
+	$(srcroot)test/unit/arena_reset_prof.c \
+	$(srcroot)test/unit/batch_alloc_prof.c
 endif
 TESTS_INTEGRATION := $(srcroot)test/integration/aligned_alloc.c \
 	$(srcroot)test/integration/allocated.c \
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 3dea1e21..3e7124d5 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -54,6 +54,7 @@ void arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind);
 void iarena_cleanup(tsd_t *tsd);
 void arena_cleanup(tsd_t *tsd);
 void arenas_tdata_cleanup(tsd_t *tsd);
+size_t batch_alloc(void **ptrs, size_t num, size_t size, int flags);
 void jemalloc_prefork(void);
 void jemalloc_postfork_parent(void);
 void jemalloc_postfork_child(void);
diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h
index d8f401d1..3d0bd14a 100644
--- a/include/jemalloc/internal/prof_inlines.h
+++ b/include/jemalloc/internal/prof_inlines.h
@@ -229,6 +229,17 @@ prof_sample_aligned(const void *ptr) {
 	return ((uintptr_t)ptr & PAGE_MASK) == 0;
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+prof_sampled(tsd_t *tsd, const void *ptr) {
+	prof_info_t prof_info;
+	prof_info_get(tsd, ptr, NULL, &prof_info);
+	bool sampled = (uintptr_t)prof_info.alloc_tctx > (uintptr_t)1U;
+	if (sampled) {
+		assert(prof_sample_aligned(ptr));
+	}
+	return sampled;
+}
+
 JEMALLOC_ALWAYS_INLINE void
 prof_free(tsd_t *tsd, const void *ptr, size_t usize,
     emap_alloc_ctx_t *alloc_ctx) {
diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index 5925563a..525019b6 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -274,7 +274,6 @@ te_prof_sample_event_lookahead_surplus(tsd_t *tsd, size_t usize,
 
 JEMALLOC_ALWAYS_INLINE bool
 te_prof_sample_event_lookahead(tsd_t *tsd, size_t usize) {
-	assert(usize == sz_s2u(usize));
 	return te_prof_sample_event_lookahead_surplus(tsd, usize, NULL);
 }
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 9b5ce681..f2e5f8eb 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3916,6 +3916,123 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) {
 	return ret;
 }
 
+static void
+batch_alloc_prof_sample_assert(tsd_t *tsd, size_t batch, size_t usize) {
+	assert(config_prof && opt_prof);
+	bool prof_sample_event = te_prof_sample_event_lookahead(tsd,
+	    batch * usize);
+	assert(!prof_sample_event);
+	size_t surplus;
+	prof_sample_event = te_prof_sample_event_lookahead_surplus(tsd,
+	    (batch + 1) * usize, &surplus);
+	assert(prof_sample_event);
+	assert(surplus < usize);
+}
+
+size_t
+batch_alloc(void **ptrs, size_t num, size_t size, int flags) {
+	LOG("core.batch_alloc.entry",
+	    "ptrs: %p, num: %zu, size: %zu, flags: %d", ptrs, num, size, flags);
+
+	tsd_t *tsd = tsd_fetch();
+	check_entry_exit_locking(tsd_tsdn(tsd));
+
+	size_t filled = 0;
+
+	if (unlikely(tsd == NULL || tsd_reentrancy_level_get(tsd) > 0)) {
+		goto label_done;
+	}
+
+	size_t alignment = MALLOCX_ALIGN_GET(flags);
+	size_t usize;
+	if (aligned_usize_get(size, alignment, &usize, NULL, false)) {
+		goto label_done;
+	}
+
+	szind_t ind = sz_size2index(usize);
+	if (unlikely(ind >= SC_NBINS)) {
+		/* No optimization for large sizes. */
+		void *p;
+		while (filled < num && (p = je_mallocx(size, flags)) != NULL) {
+			ptrs[filled++] = p;
+		}
+		goto label_done;
+	}
+
+	bool zero = zero_get(MALLOCX_ZERO_GET(flags), /* slow */ true);
+
+	unsigned arena_ind = mallocx_arena_get(flags);
+	arena_t *arena;
+	if (arena_get_from_ind(tsd, arena_ind, &arena)) {
+		goto label_done;
+	}
+	if (arena == NULL) {
+		arena = arena_choose(tsd, NULL);
+	} else {
+		/* When a manual arena is specified, bypass the tcache. */
+		flags |= MALLOCX_TCACHE_NONE;
+	}
+	if (unlikely(arena == NULL)) {
+		goto label_done;
+	}
+
+	while (filled < num) {
+		size_t batch = num - filled;
+		size_t surplus = SIZE_MAX; /* Dead store. */
+		bool prof_sample_event = config_prof && opt_prof
+		    && te_prof_sample_event_lookahead_surplus(tsd,
+		    batch * usize, &surplus);
+
+		if (prof_sample_event) {
+			/*
+			 * Adjust so that the batch does not trigger prof
+			 * sampling.
+			 */
+			batch -= surplus / usize + 1;
+			batch_alloc_prof_sample_assert(tsd, batch, usize);
+		}
+
+		size_t n = arena_fill_small_fresh(tsd_tsdn(tsd), arena,
+		    ind, ptrs + filled, batch, zero);
+		filled += n;
+
+		/*
+		 * For thread events other than prof sampling, trigger them as
+		 * if there's a single allocation of size (n * usize).  This is
+		 * fine because:
+		 * (a) these events do not alter the allocation itself, and
+		 * (b) it's possible that some event would have been triggered
+		 *     multiple times, instead of only once, if the allocations
+		 *     were handled individually, but it would do no harm (or
+		 *     even be beneficial) to coalesce the triggerings.
+		 */
+		thread_alloc_event(tsd, n * usize);
+
+		if (n < batch) { /* OOM */
+			break;
+		}
+
+		if (prof_sample_event) {
+			/*
+			 * The next allocation will be prof sampled.  The
+			 * thread event logic is handled within the mallocx()
+			 * call.
+			 */
+			void *p = je_mallocx(size, flags);
+			if (p == NULL) { /* OOM */
+				break;
+			}
+			assert(prof_sampled(tsd, p));
+			ptrs[filled++] = p;
+		}
+	}
+
+label_done:
+	check_entry_exit_locking(tsd_tsdn(tsd));
+	LOG("core.batch_alloc.exit", "result: %zu", filled);
+	return filled;
+}
+
 /*
  * End non-standard functions.
  */
diff --git a/test/unit/batch_alloc.c b/test/unit/batch_alloc.c
new file mode 100644
index 00000000..66e0565f
--- /dev/null
+++ b/test/unit/batch_alloc.c
@@ -0,0 +1,190 @@
+#include "test/jemalloc_test.h"
+
+#define BATCH_MAX ((1U << 16) + 1024)
+static void *ptrs[BATCH_MAX];
+
+#define PAGE_ALIGNED(ptr) (((uintptr_t)ptr & PAGE_MASK) == 0)
+
+static void
+verify_stats(bin_stats_t *before, bin_stats_t *after, size_t batch,
+    unsigned nregs) {
+	if (!config_stats) {
+		return;
+	}
+	if (config_prof && opt_prof) {
+		/*
+		 * Checking the stats when prof is on is feasible but
+		 * complicated, while checking the non-prof case suffices for
+		 * unit-test purpose.
+		 */
+		return;
+	}
+	expect_u64_eq(before->nmalloc + batch, after->nmalloc, "");
+	expect_u64_eq(before->nrequests + batch, after->nrequests, "");
+	expect_zu_eq(before->curregs + batch, after->curregs, "");
+	size_t nslab = batch / nregs;
+	size_t n_nonfull = 0;
+	if (batch % nregs != 0) {
+		++nslab;
+		++n_nonfull;
+	}
+	expect_u64_eq(before->nslabs + nslab, after->nslabs, "");
+	expect_zu_eq(before->curslabs + nslab, after->curslabs, "");
+	expect_zu_eq(before->nonfull_slabs + n_nonfull, after->nonfull_slabs,
+	    "");
+}
+
+static void
+verify_batch(tsd_t *tsd, void **ptrs, size_t batch, size_t usize, bool zero,
+    arena_t *arena, unsigned nregs) {
+	for (size_t i = 0, j = 0; i < batch; ++i, ++j) {
+		if (j == nregs) {
+			j = 0;
+		}
+		void *p = ptrs[i];
+		expect_zu_eq(isalloc(tsd_tsdn(tsd), p), usize, "");
+		expect_ptr_eq(iaalloc(tsd_tsdn(tsd), p), arena, "");
+		if (zero) {
+			for (size_t k = 0; k < usize; ++k) {
+				expect_true(*((unsigned char *)p + k) == 0, "");
+			}
+		}
+		if (j == 0) {
+			expect_true(PAGE_ALIGNED(p), "");
+			continue;
+		}
+		assert(i > 0);
+		void *q = ptrs[i - 1];
+		bool adjacent = (uintptr_t)p > (uintptr_t)q
+		    && (size_t)((uintptr_t)p - (uintptr_t)q) == usize;
+		if (config_prof && opt_prof) {
+			if (adjacent) {
+				expect_false(prof_sampled(tsd, p)
+				    || prof_sampled(tsd, q), "");
+			} else {
+				expect_true(prof_sampled(tsd, p)
+				    || prof_sampled(tsd, q), "");
+				expect_true(PAGE_ALIGNED(p), "");
+				j = 0;
+			}
+		} else {
+			expect_true(adjacent, "");
+		}
+	}
+}
+
+static void
+release_batch(void **ptrs, size_t batch, size_t size) {
+	for (size_t i = 0; i < batch; ++i) {
+		sdallocx(ptrs[i], size, 0);
+	}
+}
+
+static void
+test_wrapper(size_t size, size_t alignment, bool zero, unsigned arena_flag) {
+	tsd_t *tsd = tsd_fetch();
+	assert(tsd != NULL);
+	const size_t usize =
+	    (alignment != 0 ? sz_sa2u(size, alignment) : sz_s2u(size));
+	const szind_t ind = sz_size2index(usize);
+	const bin_info_t *bin_info = &bin_infos[ind];
+	const unsigned nregs = bin_info->nregs;
+	assert(nregs > 0);
+	arena_t *arena;
+	if (arena_flag != 0) {
+		arena = arena_get(tsd_tsdn(tsd), MALLOCX_ARENA_GET(arena_flag),
+		    false);
+	} else {
+		arena = arena_choose(tsd, NULL);
+	}
+	assert(arena != NULL);
+	bin_t *bin = arena_bin_choose(tsd_tsdn(tsd), arena, ind, NULL);
+	assert(bin != NULL);
+	int flags = arena_flag;
+	if (alignment != 0) {
+		flags |= MALLOCX_ALIGN(alignment);
+	}
+	if (zero) {
+		flags |= MALLOCX_ZERO;
+	}
+
+	/*
+	 * Allocate for the purpose of bootstrapping arena_tdata, so that the
+	 * change in bin stats won't contaminate the stats to be verified below.
+	 */
+	void *p = mallocx(size, flags | MALLOCX_TCACHE_NONE);
+
+	for (size_t i = 0; i < 4; ++i) {
+		size_t base = 0;
+		if (i == 1) {
+			base = nregs;
+		} else if (i == 2) {
+			base = nregs * 2;
+		} else if (i == 3) {
+			base = (1 << 16);
+		}
+		for (int j = -1; j <= 1; ++j) {
+			if (base == 0 && j == -1) {
+				continue;
+			}
+			size_t batch = base + (size_t)j;
+			assert(batch < BATCH_MAX);
+			bin_stats_t stats_before, stats_after;
+			memcpy(&stats_before, &bin->stats, sizeof(bin_stats_t));
+			size_t filled = batch_alloc(ptrs, batch, size, flags);
+			assert_zu_eq(filled, batch, "");
+			memcpy(&stats_after, &bin->stats, sizeof(bin_stats_t));
+			verify_stats(&stats_before, &stats_after, batch, nregs);
+			verify_batch(tsd, ptrs, batch, usize, zero, arena,
+			    nregs);
+			release_batch(ptrs, batch, usize);
+		}
+	}
+
+	free(p);
+}
+
+TEST_BEGIN(test_batch_alloc) {
+	test_wrapper(11, 0, false, 0);
+}
+TEST_END
+
+TEST_BEGIN(test_batch_alloc_zero) {
+	test_wrapper(11, 0, true, 0);
+}
+TEST_END
+
+TEST_BEGIN(test_batch_alloc_aligned) {
+	test_wrapper(7, 16, false, 0);
+}
+TEST_END
+
+TEST_BEGIN(test_batch_alloc_manual_arena) {
+	unsigned arena_ind;
+	size_t len_unsigned = sizeof(unsigned);
+	assert_d_eq(mallctl("arenas.create", &arena_ind, &len_unsigned, NULL,
+	    0), 0, "");
+	test_wrapper(11, 0, false, MALLOCX_ARENA(arena_ind));
+}
+TEST_END
+
+TEST_BEGIN(test_batch_alloc_fallback) {
+	const size_t size = SC_LARGE_MINCLASS;
+	for (size_t batch = 0; batch < 4; ++batch) {
+		assert(batch < BATCH_MAX);
+		size_t filled = batch_alloc(ptrs, batch, size, 0);
+		assert_zu_eq(filled, batch, "");
+		release_batch(ptrs, batch, size);
+	}
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_batch_alloc,
+	    test_batch_alloc_zero,
+	    test_batch_alloc_aligned,
+	    test_batch_alloc_manual_arena,
+	    test_batch_alloc_fallback);
+}
diff --git a/test/unit/batch_alloc.sh b/test/unit/batch_alloc.sh
new file mode 100644
index 00000000..9d81010a
--- /dev/null
+++ b/test/unit/batch_alloc.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="tcache_gc_incr_bytes:2147483648"
diff --git a/test/unit/batch_alloc_prof.c b/test/unit/batch_alloc_prof.c
new file mode 100644
index 00000000..ef644586
--- /dev/null
+++ b/test/unit/batch_alloc_prof.c
@@ -0,0 +1 @@
+#include "batch_alloc.c"
diff --git a/test/unit/batch_alloc_prof.sh b/test/unit/batch_alloc_prof.sh
new file mode 100644
index 00000000..a2697a61
--- /dev/null
+++ b/test/unit/batch_alloc_prof.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="prof:true,lg_prof_sample:14"

From f6cf5eb388eefd1c48c04d6b8c550105b2ad8c17 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 15 Jul 2020 10:42:07 -0700
Subject: [PATCH 1838/2608] Add mallctl for batch allocation API

---
 src/ctl.c               | 32 +++++++++++++++++++++++++++++++-
 test/unit/batch_alloc.c | 21 ++++++++++++++++++++-
 2 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index 9cfb2588..aec3473e 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -254,6 +254,7 @@ CTL_PROTO(experimental_arenas_i_pactivep)
 INDEX_PROTO(experimental_arenas_i)
 CTL_PROTO(experimental_prof_recent_alloc_max)
 CTL_PROTO(experimental_prof_recent_alloc_dump)
+CTL_PROTO(experimental_batch_alloc)
 
 #define MUTEX_STATS_CTL_PROTO_GEN(n)					\
 CTL_PROTO(stats_##n##_num_ops)						\
@@ -675,7 +676,8 @@ static const ctl_named_node_t experimental_node[] = {
 	{NAME("hooks"),		CHILD(named, experimental_hooks)},
 	{NAME("utilization"),	CHILD(named, experimental_utilization)},
 	{NAME("arenas"),	CHILD(indexed, experimental_arenas)},
-	{NAME("prof_recent"),	CHILD(named, experimental_prof_recent)}
+	{NAME("prof_recent"),	CHILD(named, experimental_prof_recent)},
+	{NAME("batch_alloc"),	CTL(experimental_batch_alloc)}
 };
 
 static const ctl_named_node_t	root_node[] = {
@@ -3637,3 +3639,31 @@ experimental_prof_recent_alloc_dump_ctl(tsd_t *tsd, const size_t *mib,
 label_return:
 	return ret;
 }
+
+typedef struct batch_alloc_packet_s batch_alloc_packet_t;
+struct batch_alloc_packet_s {
+	void **ptrs;
+	size_t num;
+	size_t size;
+	int flags;
+};
+
+static int
+experimental_batch_alloc_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+
+	VERIFY_READ(size_t);
+
+	batch_alloc_packet_t batch_alloc_packet;
+	ASSURED_WRITE(batch_alloc_packet, batch_alloc_packet_t);
+	size_t filled = batch_alloc(batch_alloc_packet.ptrs,
+	    batch_alloc_packet.num, batch_alloc_packet.size,
+	    batch_alloc_packet.flags);
+	READ(filled, size_t);
+
+	ret = 0;
+
+label_return:
+	return ret;
+}
diff --git a/test/unit/batch_alloc.c b/test/unit/batch_alloc.c
index 66e0565f..08d6f66a 100644
--- a/test/unit/batch_alloc.c
+++ b/test/unit/batch_alloc.c
@@ -80,6 +80,24 @@ release_batch(void **ptrs, size_t batch, size_t size) {
 	}
 }
 
+typedef struct batch_alloc_packet_s batch_alloc_packet_t;
+struct batch_alloc_packet_s {
+	void **ptrs;
+	size_t num;
+	size_t size;
+	int flags;
+};
+
+static size_t
+batch_alloc_wrapper(void **ptrs, size_t num, size_t size, int flags) {
+	batch_alloc_packet_t batch_alloc_packet = {ptrs, num, size, flags};
+	size_t filled;
+	size_t len = sizeof(size_t);
+	assert_d_eq(mallctl("experimental.batch_alloc", &filled, &len,
+	    &batch_alloc_packet, sizeof(batch_alloc_packet)), 0, "");
+	return filled;
+}
+
 static void
 test_wrapper(size_t size, size_t alignment, bool zero, unsigned arena_flag) {
 	tsd_t *tsd = tsd_fetch();
@@ -131,7 +149,8 @@ test_wrapper(size_t size, size_t alignment, bool zero, unsigned arena_flag) {
 			assert(batch < BATCH_MAX);
 			bin_stats_t stats_before, stats_after;
 			memcpy(&stats_before, &bin->stats, sizeof(bin_stats_t));
-			size_t filled = batch_alloc(ptrs, batch, size, flags);
+			size_t filled = batch_alloc_wrapper(ptrs, batch, size,
+			    flags);
 			assert_zu_eq(filled, batch, "");
 			memcpy(&stats_after, &bin->stats, sizeof(bin_stats_t));
 			verify_stats(&stats_before, &stats_after, batch, nregs);

From e032a1a1de75cf7faf087406a21789ced2b2f650 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 31 Jul 2020 15:56:38 -0700
Subject: [PATCH 1839/2608] Add a stress test for batch allocation

---
 Makefile.in               |  3 +-
 test/stress/batch_alloc.c | 88 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 90 insertions(+), 1 deletion(-)
 create mode 100644 test/stress/batch_alloc.c

diff --git a/Makefile.in b/Makefile.in
index da094f08..506d9da3 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -298,7 +298,8 @@ TESTS_ANALYZE := $(srcroot)test/analyze/rand.c \
 TESTS_STRESS := $(srcroot)test/stress/microbench.c \
 	$(srcroot)test/stress/fill_flush.c \
 	$(srcroot)test/stress/large_microbench.c \
-	$(srcroot)test/stress/hookbench.c
+	$(srcroot)test/stress/hookbench.c \
+	$(srcroot)test/stress/batch_alloc.c
 
 
 TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_INTEGRATION_CPP) \
diff --git a/test/stress/batch_alloc.c b/test/stress/batch_alloc.c
new file mode 100644
index 00000000..b203e05e
--- /dev/null
+++ b/test/stress/batch_alloc.c
@@ -0,0 +1,88 @@
+#include "test/jemalloc_test.h"
+#include "test/bench.h"
+
+#define BATCH (1000 * 1000)
+#define HUGE_BATCH (100 * BATCH)
+static void *batch_ptrs[HUGE_BATCH];
+static void *item_ptrs[HUGE_BATCH];
+
+#define SIZE 7
+
+typedef struct batch_alloc_packet_s batch_alloc_packet_t;
+struct batch_alloc_packet_s {
+	void **ptrs;
+	size_t num;
+	size_t size;
+	int flags;
+};
+
+static void
+batch_alloc_wrapper(size_t batch) {
+	batch_alloc_packet_t batch_alloc_packet = {batch_ptrs, batch, SIZE, 0};
+	size_t filled;
+	size_t len = sizeof(size_t);
+	assert_d_eq(mallctl("experimental.batch_alloc", &filled, &len,
+	    &batch_alloc_packet, sizeof(batch_alloc_packet)), 0, "");
+	assert_zu_eq(filled, batch, "");
+}
+
+static void
+item_alloc_wrapper(size_t batch) {
+	for (size_t i = 0; i < batch; ++i) {
+		item_ptrs[i] = malloc(SIZE);
+	}
+}
+
+static void
+release_and_clear(void **ptrs, size_t batch) {
+	for (size_t i = 0; i < batch; ++i) {
+		void *p = ptrs[i];
+		assert_ptr_not_null(p, "allocation failed");
+		sdallocx(p, SIZE, 0);
+		ptrs[i] = NULL;
+	}
+}
+
+static void
+batch_alloc_small_can_repeat() {
+	batch_alloc_wrapper(BATCH);
+	release_and_clear(batch_ptrs, BATCH);
+}
+
+static void
+item_alloc_small_can_repeat() {
+	item_alloc_wrapper(BATCH);
+	release_and_clear(item_ptrs, BATCH);
+}
+
+TEST_BEGIN(test_small_batch_with_free) {
+	compare_funcs(10, 100,
+	    "batch allocation", batch_alloc_small_can_repeat,
+	    "item allocation", item_alloc_small_can_repeat);
+}
+TEST_END
+
+static void
+batch_alloc_huge_cannot_repeat() {
+	batch_alloc_wrapper(HUGE_BATCH);
+}
+
+static void
+item_alloc_huge_cannot_repeat() {
+	item_alloc_wrapper(HUGE_BATCH);
+}
+
+TEST_BEGIN(test_huge_batch_without_free) {
+	compare_funcs(0, 1,
+	    "batch allocation", batch_alloc_huge_cannot_repeat,
+	    "item allocation", item_alloc_huge_cannot_repeat);
+	release_and_clear(batch_ptrs, HUGE_BATCH);
+	release_and_clear(item_ptrs, HUGE_BATCH);
+}
+TEST_END
+
+int main(void) {
+	return test_no_reentrancy(
+	    test_small_batch_with_free,
+	    test_huge_batch_without_free);
+}

From 81c2f841e5386294834d143fa66c32beb825e4b5 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 3 Aug 2020 15:27:08 -0700
Subject: [PATCH 1840/2608] Add a simple utility to detect profiling bias.

---
 Makefile.in                              |  3 +-
 include/jemalloc/internal/prof_externs.h |  7 +++
 src/prof_sys.c                           |  7 ++-
 test/analyze/prof_bias.c                 | 60 ++++++++++++++++++++++++
 4 files changed, 75 insertions(+), 2 deletions(-)
 create mode 100644 test/analyze/prof_bias.c

diff --git a/Makefile.in b/Makefile.in
index 506d9da3..7d147583 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -294,7 +294,8 @@ CPP_SRCS :=
 TESTS_INTEGRATION_CPP :=
 endif
 TESTS_ANALYZE := $(srcroot)test/analyze/rand.c \
-	$(srcroot)test/analyze/sizes.c
+	$(srcroot)test/analyze/sizes.c \
+	$(srcroot)test/analyze/prof_bias.c
 TESTS_STRESS := $(srcroot)test/stress/microbench.c \
 	$(srcroot)test/stress/fill_flush.c \
 	$(srcroot)test/stress/large_microbench.c \
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index a4a4aa61..4579ab02 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -43,6 +43,13 @@ extern size_t lg_prof_sample;
 
 extern bool prof_booted;
 
+/*
+ * A hook to mock out backtrace functionality.  This can be handy, since it's
+ * otherwise difficult to guarantee that two allocations are reported as coming
+ * from the exact same stack trace in the presence of an optimizing compiler.
+ */
+extern void (* JET_MUTABLE prof_backtrace_hook)(prof_bt_t *bt);
+
 /* Functions only accessed in prof_inlines.h */
 prof_tdata_t *prof_tdata_init(tsd_t *tsd);
 prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
diff --git a/src/prof_sys.c b/src/prof_sys.c
index 4897988d..dddba4b6 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -27,6 +27,8 @@
 
 malloc_mutex_t prof_dump_filename_mtx;
 
+bool prof_do_mock = false;
+
 static uint64_t prof_dump_seq;
 static uint64_t prof_dump_iseq;
 static uint64_t prof_dump_mseq;
@@ -267,11 +269,14 @@ prof_backtrace_impl(prof_bt_t *bt) {
 }
 #endif
 
+
+void (* JET_MUTABLE prof_backtrace_hook)(prof_bt_t *bt) = &prof_backtrace_impl;
+
 void
 prof_backtrace(tsd_t *tsd, prof_bt_t *bt) {
 	cassert(config_prof);
 	pre_reentrancy(tsd, NULL);
-	prof_backtrace_impl(bt);
+	prof_backtrace_hook(bt);
 	post_reentrancy(tsd);
 }
 
diff --git a/test/analyze/prof_bias.c b/test/analyze/prof_bias.c
new file mode 100644
index 00000000..0aae766b
--- /dev/null
+++ b/test/analyze/prof_bias.c
@@ -0,0 +1,60 @@
+#include "test/jemalloc_test.h"
+
+/*
+ * This is a helper utility, only meant to be run manually (and, for example,
+ * doesn't check for failures, try to skip execution in non-prof modes, etc.).
+ * It runs, allocates objects of two different sizes from the same stack trace,
+ * and exits.
+ *
+ * The idea is that some human operator will run it like:
+ *     MALLOC_CONF="prof:true,prof_final:true" test/analyze/prof_bias
+ * and manually inspect the results.
+ *
+ * The results should be:
+ * jeprof --text test/analyze/prof_bias --inuse_space jeprof.<pid>.0.f.heap:
+ * 	around 1024 MB
+ * jeprof --text test/analyze/prof_bias --inuse_objects jeprof.<pid>.0.f.heap:
+ * 	around 33554448 = 16 + 32 * 1024 * 1024
+ *
+ * And, if prof_accum is on:
+ * jeprof --text test/analyze/prof_bias --alloc_space jeprof.<pid>.0.f.heap:
+ *     around 2048 MB
+ * jeprof --text test/analyze/prof_bias --alloc_objects jeprof.<pid>.0.f.heap:
+ * 	around 67108896 = 2 * (16 + 32 * 1024 * 1024)
+ */
+
+static void
+mock_backtrace(prof_bt_t *bt) {
+	bt->len = 4;
+	bt->vec[0] = (void *)0x111;
+	bt->vec[1] = (void *)0x222;
+	bt->vec[2] = (void *)0x333;
+	bt->vec[3] = (void *)0x444;
+}
+
+static void
+do_allocs(size_t sz, size_t cnt, bool do_frees) {
+	for (size_t i = 0; i < cnt; i++) {
+		void *ptr = mallocx(sz, 0);
+		assert_ptr_not_null(ptr, "Unexpected mallocx failure");
+		if (do_frees) {
+			dallocx(ptr, 0);
+		}
+	}
+}
+
+int
+main(void) {
+	size_t lg_prof_sample = 19;
+	int err = mallctl("prof.reset", NULL, NULL, (void *)&lg_prof_sample,
+	    sizeof(lg_prof_sample));
+	assert(err == 0);
+
+	prof_backtrace_hook = &mock_backtrace;
+	do_allocs(16, 32 * 1024 * 1024, /* do_frees */ true);
+	do_allocs(32 * 1024* 1024, 16, /* do_frees */ true);
+	do_allocs(16, 32 * 1024 * 1024, /* do_frees */ false);
+	do_allocs(32 * 1024* 1024, 16, /* do_frees */ false);
+
+	return 0;
+}

From 60993697d8bd3f8a07756091df397ed4044da921 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 3 Aug 2020 13:05:34 -0700
Subject: [PATCH 1841/2608] Prof: Add prof_unbias.

This gives more accurate attribution of bytes and counts to stack traces,
without introducing backwards incompatibilities in heap-profile parsing tools.
We track the ideal reported (to the end user) number of bytes more carefully
inside core jemalloc.  When dumping heap profiles, insteading of outputting our
counts directly, we output counts that will cause parsing tools to give a result
close to the value we want.

We retain the old version as an opt setting, to let users who are tracking
values on a per-component basis to keep their metrics stable until they decide
to switch.
---
 include/jemalloc/internal/prof_externs.h |   4 +
 include/jemalloc/internal/prof_structs.h |   4 +
 src/jemalloc.c                           |  10 ++
 src/prof.c                               |  65 +++++++++
 src/prof_data.c                          | 160 ++++++++++++++++++++++-
 5 files changed, 241 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 4579ab02..ba5933af 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -19,6 +19,7 @@ extern char opt_prof_prefix[
     PATH_MAX +
 #endif
     1];
+extern bool opt_prof_unbias;
 
 /* For recording recent allocations */
 extern ssize_t opt_prof_recent_alloc_max;
@@ -40,6 +41,9 @@ extern uint64_t prof_interval;
  * resets.
  */
 extern size_t lg_prof_sample;
+extern size_t prof_unbiased_sz[SC_NSIZES];
+extern size_t prof_shifted_unbiased_cnt[SC_NSIZES];
+void prof_unbias_map_init();
 
 extern bool prof_booted;
 
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index 26942aa6..fbad6145 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -24,9 +24,13 @@ typedef struct {
 struct prof_cnt_s {
 	/* Profiling counters. */
 	uint64_t	curobjs;
+	uint64_t	curobjs_shifted_unbiased;
 	uint64_t	curbytes;
+	uint64_t	curbytes_unbiased;
 	uint64_t	accumobjs;
+	uint64_t	accumobjs_shifted_unbiased;
 	uint64_t	accumbytes;
+	uint64_t	accumbytes_unbiased;
 };
 
 typedef enum {
diff --git a/src/jemalloc.c b/src/jemalloc.c
index f2e5f8eb..ae9ef3d1 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1517,6 +1517,16 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 					}
 					CONF_CONTINUE;
 				}
+				/*
+				 * Undocumented.  When set to false, don't
+				 * correct for an unbiasing bug in jeprof
+				 * attribution.  This can be handy if you want
+				 * to get consistent numbers from your binary
+				 * across different jemalloc versions, even if
+				 * those numbers are incorrect.  The default is
+				 * true.
+				 */
+				CONF_HANDLE_BOOL(opt_prof_unbias, "prof_unbias")
 			}
 			if (config_log) {
 				if (CONF_MATCH("log")) {
diff --git a/src/prof.c b/src/prof.c
index 25735410..7b649e49 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -32,6 +32,7 @@ bool opt_prof_leak = false;
 bool opt_prof_accum = false;
 char opt_prof_prefix[PROF_DUMP_FILENAME_LEN];
 bool opt_prof_sys_thread_name = false;
+bool opt_prof_unbias = true;
 
 /* Accessed via prof_sample_event_handler(). */
 static counter_accum_t prof_idump_accumulated;
@@ -60,6 +61,8 @@ static malloc_mutex_t prof_gdump_mtx;
 uint64_t prof_interval = 0;
 
 size_t lg_prof_sample;
+size_t prof_unbiased_sz[SC_NSIZES];
+size_t prof_shifted_unbiased_cnt[SC_NSIZES];
 
 static uint64_t next_thr_uid;
 static malloc_mutex_t next_thr_uid_mtx;
@@ -69,6 +72,40 @@ bool prof_booted = false;
 
 /******************************************************************************/
 
+void prof_unbias_map_init() {
+	/* See the comment in prof_sample_new_event_wait */
+#ifdef JEMALLOC_PROF
+	for (szind_t i = 0; i < SC_NSIZES; i++) {
+		double sz = (double)sz_index2size(i);
+		double rate = (double)(ZU(1) << lg_prof_sample);
+		double div_val = 1.0 - exp(-sz / rate);
+		double unbiased_sz = sz / div_val;
+		/*
+		 * The "true" right value for the unbiased count is
+		 * 1.0/(1 - exp(-sz/rate)).  The problem is, we keep the counts
+		 * as integers (for a variety of reasons -- rounding errors
+		 * could trigger asserts, and not all libcs can properly handle
+		 * floating point arithmetic during malloc calls inside libc).
+		 * Rounding to an integer, though, can lead to rounding errors
+		 * of over 30% for sizes close to the sampling rate.  So
+		 * instead, we multiply by a constant, dividing the maximum
+		 * possible roundoff error by that constant.  To avoid overflow
+		 * in summing up size_t values, the largest safe constant we can
+		 * pick is the size of the smallest allocation.
+		 */
+		double cnt_shift = (double)(ZU(1) << SC_LG_TINY_MIN);
+		double shifted_unbiased_cnt = cnt_shift / div_val;
+		prof_unbiased_sz[i] = (size_t)round(unbiased_sz);
+		prof_shifted_unbiased_cnt[i] = (size_t)round(
+		    shifted_unbiased_cnt);
+	}
+#else
+	unreachable();
+#endif
+}
+
+/******************************************************************************/
+
 void
 prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx) {
 	cassert(config_prof);
@@ -96,12 +133,30 @@ prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
 	    ptr);
 	prof_info_set(tsd, edata, tctx);
 
+	szind_t szind = sz_size2index(size);
+
 	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
+	/*
+	 * We need to do these map lookups while holding the lock, to avoid the
+	 * possibility of races with prof_reset calls, which update the map and
+	 * then acquire the lock.  This actually still leaves a data race on the
+	 * contents of the unbias map, but we have not yet gone through and
+	 * atomic-ified the prof module, and compilers are not yet causing us
+	 * issues.  The key thing is to make sure that, if we read garbage data,
+	 * the prof_reset call is about to mark our tctx as expired before any
+	 * dumping of our corrupted output is attempted.
+	 */
+	size_t shifted_unbiased_cnt = prof_shifted_unbiased_cnt[szind];
+	size_t unbiased_bytes = prof_unbiased_sz[szind];
 	tctx->cnts.curobjs++;
+	tctx->cnts.curobjs_shifted_unbiased += shifted_unbiased_cnt;
 	tctx->cnts.curbytes += usize;
+	tctx->cnts.curbytes_unbiased += unbiased_bytes;
 	if (opt_prof_accum) {
 		tctx->cnts.accumobjs++;
+		tctx->cnts.accumobjs_shifted_unbiased += shifted_unbiased_cnt;
 		tctx->cnts.accumbytes += usize;
+		tctx->cnts.accumbytes_unbiased += unbiased_bytes;
 	}
 	bool record_recent = prof_recent_alloc_prepare(tsd, tctx);
 	tctx->prepared = false;
@@ -118,12 +173,21 @@ prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
 	prof_tctx_t *tctx = prof_info->alloc_tctx;
 	assert((uintptr_t)tctx > (uintptr_t)1U);
 
+	szind_t szind = sz_size2index(usize);
 	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
 
 	assert(tctx->cnts.curobjs > 0);
 	assert(tctx->cnts.curbytes >= usize);
+	/*
+	 * It's not correct to do equivalent asserts for unbiased bytes, because
+	 * of the potential for races with prof.reset calls.  The map contents
+	 * should really be atomic, but we have not atomic-ified the prof module
+	 * yet.
+	 */
 	tctx->cnts.curobjs--;
+	tctx->cnts.curobjs_shifted_unbiased -= prof_shifted_unbiased_cnt[szind];
 	tctx->cnts.curbytes -= usize;
+	tctx->cnts.curbytes_unbiased -= prof_unbiased_sz[szind];
 
 	prof_try_log(tsd, usize, prof_info);
 
@@ -517,6 +581,7 @@ prof_boot2(tsd_t *tsd, base_t *base) {
 		unsigned i;
 
 		lg_prof_sample = opt_lg_prof_sample;
+		prof_unbias_map_init();
 
 		prof_active = opt_prof_active;
 		if (malloc_mutex_init(&prof_active_mtx, "prof_active",
diff --git a/src/prof_data.c b/src/prof_data.c
index 6b441de1..ae9cd4b1 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -514,12 +514,121 @@ prof_dump_printf(write_cb_t *prof_dump_write, void *cbopaque,
 	prof_dump_write(cbopaque, buf);
 }
 
+/*
+ * Casting a double to a uint64_t may not necessarily be in range; this can be
+ * UB.  I don't think this is practically possible with the cur counters, but
+ * plausibly could be with the accum counters.
+ */
+#ifdef JEMALLOC_PROF
+static uint64_t
+prof_double_uint64_cast(double d) {
+	/*
+	 * Note: UINT64_MAX + 1 is exactly representable as a double on all
+	 * reasonable platforms (certainly those we'll support).  Writing this
+	 * as !(a < b) instead of (a >= b) means that we're NaN-safe.
+	 */
+	double rounded = round(d);
+	if (!(rounded < (double)UINT64_MAX)) {
+		return UINT64_MAX;
+	}
+	return (uint64_t)rounded;
+}
+#endif
+
+/*
+ * The unbiasing story is long.  The jeprof unbiasing logic was copied from
+ * pprof.  Both shared an issue: they unbiased using the average size of the
+ * allocations at a particular stack trace.  This can work out OK if allocations
+ * are mostly of the same size given some stack, but not otherwise.  We now
+ * internally track what the unbiased results ought to be.  We can't just report
+ * them as they are though; they'll still go through the jeprof unbiasing
+ * process.  Instead, we figure out what values we can feed *into* jeprof's
+ * unbiasing mechanism that will lead to getting the right values out.
+ *
+ * It'll unbias count and aggregate size as:
+ *
+ *   c_out = c_in * 1/(1-exp(-s_in/c_in/R)
+ *   s_out = s_in * 1/(1-exp(-s_in/c_in/R)
+ *
+ * We want to solve for the values of c_in and s_in that will
+ * give the c_out and s_out that we've computed internally.
+ *
+ * Let's do a change of variables (both to make the math easier and to make it
+ * easier to write):
+ *   x = s_in / c_in
+ *   y = s_in
+ *   k = 1/R.
+ *
+ * Then
+ *   c_out = y/x * 1/(1-exp(-k*x))
+ *   s_out = y * 1/(1-exp(-k*x))
+ *
+ * The first equation gives:
+ *   y = x * c_out * (1-exp(-k*x))
+ * The second gives:
+ *   y = s_out * (1-exp(-k*x))
+ * So we have
+ *   x = s_out / c_out.
+ * And all the other values fall out from that.
+ *
+ * This is all a fair bit of work.  The thing we get out of it is that we don't
+ * break backwards compatibility with jeprof (and the various tools that have
+ * copied its unbiasing logic).  Eventually, we anticipate a v3 heap profile
+ * dump format based on JSON, at which point I think much of this logic can get
+ * cleaned up (since we'll be taking a compatibility break there anyways).
+ */
+static void
+prof_do_unbias(uint64_t c_out_shifted_i, uint64_t s_out_i, uint64_t *r_c_in,
+    uint64_t *r_s_in) {
+#ifdef JEMALLOC_PROF
+	if (c_out_shifted_i == 0 || s_out_i == 0) {
+		*r_c_in = 0;
+		*r_s_in = 0;
+		return;
+	}
+	/*
+	 * See the note in prof_unbias_map_init() to see why we take c_out in a
+	 * shifted form.
+	 */
+	double c_out = (double)c_out_shifted_i
+	    / (double)(ZU(1) << SC_LG_TINY_MIN);
+	double s_out = (double)s_out_i;
+	double R = (double)(ZU(1) << lg_prof_sample);
+
+	double x = s_out / c_out;
+	double y = s_out * (1.0 - exp(-x / R));
+
+	double c_in = y / x;
+	double s_in = y;
+
+	*r_c_in = prof_double_uint64_cast(c_in);
+	*r_s_in = prof_double_uint64_cast(s_in);
+#else
+	unreachable();
+#endif
+}
+
 static void
 prof_dump_print_cnts(write_cb_t *prof_dump_write, void *cbopaque,
     const prof_cnt_t *cnts) {
+	uint64_t curobjs;
+	uint64_t curbytes;
+	uint64_t accumobjs;
+	uint64_t accumbytes;
+	if (opt_prof_unbias) {
+		prof_do_unbias(cnts->curobjs_shifted_unbiased,
+		    cnts->curbytes_unbiased, &curobjs, &curbytes);
+		prof_do_unbias(cnts->accumobjs_shifted_unbiased,
+		    cnts->accumbytes_unbiased, &accumobjs, &accumbytes);
+	} else {
+		curobjs = cnts->curobjs;
+		curbytes = cnts->curbytes;
+		accumobjs = cnts->accumobjs;
+		accumbytes = cnts->accumbytes;
+	}
 	prof_dump_printf(prof_dump_write, cbopaque,
 	    "%"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]",
-	    cnts->curobjs, cnts->curbytes, cnts->accumobjs, cnts->accumbytes);
+	    curobjs, curbytes, accumobjs, accumbytes);
 }
 
 static void
@@ -539,12 +648,20 @@ prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata) {
 		memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t));
 
 		tdata->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
+		tdata->cnt_summed.curobjs_shifted_unbiased
+		    += tctx->dump_cnts.curobjs_shifted_unbiased;
 		tdata->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
+		tdata->cnt_summed.curbytes_unbiased
+		    += tctx->dump_cnts.curbytes_unbiased;
 		if (opt_prof_accum) {
 			tdata->cnt_summed.accumobjs +=
 			    tctx->dump_cnts.accumobjs;
+			tdata->cnt_summed.accumobjs_shifted_unbiased +=
+			    tctx->dump_cnts.accumobjs_shifted_unbiased;
 			tdata->cnt_summed.accumbytes +=
 			    tctx->dump_cnts.accumbytes;
+			tdata->cnt_summed.accumbytes_unbiased +=
+			    tctx->dump_cnts.accumbytes_unbiased;
 		}
 		break;
 	case prof_tctx_state_dumping:
@@ -558,10 +675,17 @@ prof_tctx_merge_gctx(tsdn_t *tsdn, prof_tctx_t *tctx, prof_gctx_t *gctx) {
 	malloc_mutex_assert_owner(tsdn, gctx->lock);
 
 	gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
+	gctx->cnt_summed.curobjs_shifted_unbiased
+	    += tctx->dump_cnts.curobjs_shifted_unbiased;
 	gctx->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
+	gctx->cnt_summed.curbytes_unbiased += tctx->dump_cnts.curbytes_unbiased;
 	if (opt_prof_accum) {
 		gctx->cnt_summed.accumobjs += tctx->dump_cnts.accumobjs;
+		gctx->cnt_summed.accumobjs_shifted_unbiased
+		    += tctx->dump_cnts.accumobjs_shifted_unbiased;
 		gctx->cnt_summed.accumbytes += tctx->dump_cnts.accumbytes;
+		gctx->cnt_summed.accumbytes_unbiased
+		    += tctx->dump_cnts.accumbytes_unbiased;
 	}
 }
 
@@ -757,11 +881,19 @@ prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
 		}
 
 		arg->cnt_all->curobjs += tdata->cnt_summed.curobjs;
+		arg->cnt_all->curobjs_shifted_unbiased
+		    += tdata->cnt_summed.curobjs_shifted_unbiased;
 		arg->cnt_all->curbytes += tdata->cnt_summed.curbytes;
+		arg->cnt_all->curbytes_unbiased
+		    += tdata->cnt_summed.curbytes_unbiased;
 		if (opt_prof_accum) {
 			arg->cnt_all->accumobjs += tdata->cnt_summed.accumobjs;
+			arg->cnt_all->accumobjs_shifted_unbiased
+			    += tdata->cnt_summed.accumobjs_shifted_unbiased;
 			arg->cnt_all->accumbytes +=
 			    tdata->cnt_summed.accumbytes;
+			arg->cnt_all->accumbytes_unbiased +=
+			    tdata->cnt_summed.accumbytes_unbiased;
 		}
 	} else {
 		tdata->dumping = false;
@@ -814,8 +946,16 @@ prof_dump_gctx(prof_dump_iter_arg_t *arg, prof_gctx_t *gctx,
 	    (opt_prof_accum && gctx->cnt_summed.accumobjs == 0)) {
 		assert(gctx->cnt_summed.curobjs == 0);
 		assert(gctx->cnt_summed.curbytes == 0);
+		/*
+		 * These asserts would not be correct -- see the comment on races
+		 * in prof.c
+		 * assert(gctx->cnt_summed.curobjs_unbiased == 0);
+		 * assert(gctx->cnt_summed.curbytes_unbiased == 0);
+		*/
 		assert(gctx->cnt_summed.accumobjs == 0);
+		assert(gctx->cnt_summed.accumobjs_shifted_unbiased == 0);
 		assert(gctx->cnt_summed.accumbytes == 0);
+		assert(gctx->cnt_summed.accumbytes_unbiased == 0);
 		return;
 	}
 
@@ -834,7 +974,7 @@ prof_dump_gctx(prof_dump_iter_arg_t *arg, prof_gctx_t *gctx,
 }
 
 /*
- * See prof_sample_threshold_update() comment for why the body of this function
+ * See prof_sample_new_event_wait() comment for why the body of this function
  * is conditionally compiled.
  */
 static void
@@ -1120,6 +1260,7 @@ prof_reset(tsd_t *tsd, size_t lg_sample) {
 	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
 
 	lg_prof_sample = lg_sample;
+	prof_unbias_map_init();
 
 	next = NULL;
 	do {
@@ -1162,9 +1303,24 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
 
 	assert(tctx->cnts.curobjs == 0);
 	assert(tctx->cnts.curbytes == 0);
+	/*
+	 * These asserts are not correct -- see the comment about races in
+	 * prof.c
+	 *
+	 * assert(tctx->cnts.curobjs_shifted_unbiased == 0);
+	 * assert(tctx->cnts.curbytes_unbiased == 0);
+	 */
 	assert(!opt_prof_accum);
 	assert(tctx->cnts.accumobjs == 0);
 	assert(tctx->cnts.accumbytes == 0);
+	/*
+	 * These ones are, since accumbyte counts never go down.  Either
+	 * prof_accum is off (in which case these should never have changed from
+	 * their initial value of zero), or it's on (in which case we shouldn't
+	 * be destroying this tctx).
+	 */
+	assert(tctx->cnts.accumobjs_shifted_unbiased == 0);
+	assert(tctx->cnts.accumbytes_unbiased == 0);
 
 	prof_gctx_t *gctx = tctx->gctx;
 

From 53084cc5c285954d576b2f4a19a230a853014f82 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 3 Aug 2020 18:24:05 -0700
Subject: [PATCH 1842/2608] Safety check: Don't directly abort.

The sized dealloc checks called the generic safety_check_fail, and then called
abort.  This means the failure case isn't mockable, hence not testable.  Fix it
in anticipation of a coming diff.
---
 src/safety_check.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/safety_check.c b/src/safety_check.c
index a83dca76..c692835a 100644
--- a/src/safety_check.c
+++ b/src/safety_check.c
@@ -4,7 +4,6 @@
 static void (*safety_check_abort)(const char *message);
 
 void safety_check_fail_sized_dealloc(bool current_dealloc) {
-	assert(config_opt_safety_checks);
 	char *src = current_dealloc ? "the current pointer being freed" :
 	    "in thread cache, possibly from previous deallocations";
 
@@ -12,7 +11,6 @@ void safety_check_fail_sized_dealloc(bool current_dealloc) {
 	   " application sized deallocation bugs (source: %s). Suggest building"
 	    "with --enable-debug or address sanitizer for debugging. Abort.\n",
 	    src);
-	abort();
 }
 
 void safety_check_set_abort(void (*abort_fn)(const char *)) {

From eaed1e39be8574b1a59d21824b68e31af378cd0f Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 3 Aug 2020 18:23:36 -0700
Subject: [PATCH 1843/2608] Add sized-delete size-checking functionality.

The existing checks are good at finding such issues (on tcache flush), but not
so good at pinpointing them.  Debug mode can find them, but sometimes debug mode
slows down a program so much that hard-to-hit bugs can take a long time to
crash.

This commit adds functionality to keep programs mostly on their fast paths,
while also checking every sized delete argument they get.
---
 Makefile.in                                   |  1 +
 configure.ac                                  | 17 +++++
 .../internal/jemalloc_internal_defs.h.in      |  3 +
 .../jemalloc/internal/jemalloc_preamble.h.in  | 13 ++++
 src/jemalloc.c                                | 48 +++++++++++---
 test/unit/size_check.c                        | 62 +++++++++++++++++++
 6 files changed, 135 insertions(+), 9 deletions(-)
 create mode 100644 test/unit/size_check.c

diff --git a/Makefile.in b/Makefile.in
index 7d147583..a63f69f1 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -247,6 +247,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/sc.c \
 	$(srcroot)test/unit/seq.c \
 	$(srcroot)test/unit/SFMT.c \
+	$(srcroot)test/unit/size_check.c \
 	$(srcroot)test/unit/size_classes.c \
 	$(srcroot)test/unit/slab.c \
 	$(srcroot)test/unit/smoothstep.c \
diff --git a/configure.ac b/configure.ac
index b197d32e..d68d376c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1492,6 +1492,23 @@ if test "x$enable_opt_safety_checks" = "x1" ; then
 fi
 AC_SUBST([enable_opt_safety_checks])
 
+dnl Look for sized-deallocation bugs while otherwise being in opt mode.
+AC_ARG_ENABLE([opt-size-checks],
+  [AS_HELP_STRING([--enable-opt-size-checks],
+  [Perform sized-deallocation argument checks, even in opt mode])],
+[if test "x$enable_opt_size_checks" = "xno" ; then
+  enable_opt_size_checks="0"
+else
+  enable_opt_size_checks="1"
+fi
+],
+[enable_opt_size_checks="0"]
+)
+if test "x$enable_opt_size_checks" = "x1" ; then
+  AC_DEFINE([JEMALLOC_OPT_SIZE_CHECKS], [ ])
+fi
+AC_SUBST([enable_opt_size_checks])
+
 JE_COMPILABLE([a program using __builtin_unreachable], [
 void foo (void) {
   __builtin_unreachable();
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 0aef0bb3..ee052bb8 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -373,4 +373,7 @@
 /* Performs additional safety checks when defined. */
 #undef JEMALLOC_OPT_SAFETY_CHECKS
 
+/* Performs additional size checks when defined. */
+#undef JEMALLOC_OPT_SIZE_CHECKS
+
 #endif /* JEMALLOC_INTERNAL_DEFS_H_ */
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index 740fcfcb..4012eb25 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -180,6 +180,19 @@ static const bool config_opt_safety_checks =
 #endif
     ;
 
+/*
+ * Extra debugging of sized deallocations too onerous to be included in the
+ * general safety checks.
+ */
+static const bool config_opt_size_checks =
+#if defined(JEMALLOC_OPT_SIZE_CHECKS) || defined(JEMALLOC_OPT_SAFETY_CHECKS) \
+    || defined(JEMALLOC_DEBUG)
+    true
+#else
+    false
+#endif
+    ;
+
 #if defined(_WIN32) || defined(JEMALLOC_HAVE_SCHED_GETCPU)
 /* Currently percpu_arena depends on sched_getcpu. */
 #define JEMALLOC_PERCPU_ARENA
diff --git a/src/jemalloc.c b/src/jemalloc.c
index ae9ef3d1..51a1a23a 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2793,6 +2793,27 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 	thread_dalloc_event(tsd, usize);
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+maybe_check_alloc_ctx(tsd_t *tsd, void *ptr, emap_alloc_ctx_t *alloc_ctx) {
+	if (config_opt_size_checks) {
+		emap_alloc_ctx_t dbg_ctx;
+		emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
+		    &dbg_ctx);
+		if (alloc_ctx->szind != dbg_ctx.szind) {
+			safety_check_fail_sized_dealloc(
+			    /* curent_dealloc */ true);
+			return true;
+		}
+		if (alloc_ctx->slab != dbg_ctx.slab) {
+			safety_check_fail(
+			    "Internal heap corruption detected: "
+			    "mismatch in slab bit");
+			return true;
+		}
+	}
+	return false;
+}
+
 JEMALLOC_ALWAYS_INLINE void
 isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 	if (!slow_path) {
@@ -2823,13 +2844,6 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 				/* Non page aligned must be slab allocated. */
 				alloc_ctx.slab = true;
 			}
-			if (config_debug) {
-				emap_alloc_ctx_t dbg_ctx;
-				emap_alloc_ctx_lookup(tsd_tsdn(tsd),
-				    &arena_emap_global, ptr, &dbg_ctx);
-				assert(dbg_ctx.szind == alloc_ctx.szind);
-				assert(dbg_ctx.slab == alloc_ctx.slab);
-			}
 		} else if (opt_prof) {
 			emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global,
 			    ptr, &alloc_ctx);
@@ -2845,6 +2859,16 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 			alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
 		}
 	}
+	bool fail = maybe_check_alloc_ctx(tsd, ptr, &alloc_ctx);
+	if (fail) {
+		/*
+		 * This is a heap corruption bug.  In real life we'll crash; for
+		 * the unit test we just want to avoid breaking anything too
+		 * badly to get a test result out.  Let's leak instead of trying
+		 * to free.
+		 */
+		return;
+	}
 
 	if (config_prof && opt_prof) {
 		prof_free(tsd, ptr, usize, &alloc_ctx);
@@ -2934,8 +2958,15 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 			return false;
 		}
 		alloc_ctx.szind = sz_size2index_lookup(size);
-		alloc_ctx.slab = false;
+		/* This is a dead store, except when opt size checking is on. */
+		alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
 	}
+	bool fail = maybe_check_alloc_ctx(tsd, ptr, &alloc_ctx);
+	if (fail) {
+		/* See the comment in isfree. */
+		return true;
+	}
+
 	uint64_t deallocated, threshold;
 	te_free_fastpath_ctx(tsd, &deallocated, &threshold, size_hint);
 
@@ -3739,7 +3770,6 @@ sdallocx_default(void *ptr, size_t size, int flags) {
 	tsd_t *tsd = tsd_fetch_min();
 	bool fast = tsd_fast(tsd);
 	size_t usize = inallocx(tsd_tsdn(tsd), size, flags);
-	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
 	unsigned tcache_ind = mallocx_tcache_get(flags);
diff --git a/test/unit/size_check.c b/test/unit/size_check.c
new file mode 100644
index 00000000..3d2912df
--- /dev/null
+++ b/test/unit/size_check.c
@@ -0,0 +1,62 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/safety_check.h"
+
+bool fake_abort_called;
+void fake_abort(const char *message) {
+	(void)message;
+	fake_abort_called = true;
+}
+
+#define SIZE1 SC_SMALL_MAXCLASS
+#define SIZE2 (SC_SMALL_MAXCLASS / 2)
+
+TEST_BEGIN(test_invalid_size_sdallocx) {
+	test_skip_if(!config_opt_size_checks);
+	safety_check_set_abort(&fake_abort);
+
+	fake_abort_called = false;
+	void *ptr = malloc(SIZE1);
+	assert_ptr_not_null(ptr, "Unexpected failure");
+	sdallocx(ptr, SIZE2, 0);
+	expect_true(fake_abort_called, "Safety check didn't fire");
+
+	safety_check_set_abort(NULL);
+}
+TEST_END
+
+TEST_BEGIN(test_invalid_size_sdallocx_nonzero_flag) {
+	test_skip_if(!config_opt_size_checks);
+	safety_check_set_abort(&fake_abort);
+
+	fake_abort_called = false;
+	void *ptr = malloc(SIZE1);
+	assert_ptr_not_null(ptr, "Unexpected failure");
+	sdallocx(ptr, SIZE2, MALLOCX_TCACHE_NONE);
+	expect_true(fake_abort_called, "Safety check didn't fire");
+
+	safety_check_set_abort(NULL);
+}
+TEST_END
+
+TEST_BEGIN(test_invalid_size_sdallocx_noflags) {
+	test_skip_if(!config_opt_size_checks);
+	safety_check_set_abort(&fake_abort);
+
+	fake_abort_called = false;
+	void *ptr = malloc(SIZE1);
+	assert_ptr_not_null(ptr, "Unexpected failure");
+	je_sdallocx_noflags(ptr, SIZE2);
+	expect_true(fake_abort_called, "Safety check didn't fire");
+
+	safety_check_set_abort(NULL);
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_invalid_size_sdallocx,
+	    test_invalid_size_sdallocx_nonzero_flag,
+	    test_invalid_size_sdallocx_noflags);
+}

From 743021b63fd06ad23a81af310d467e2e26108a9a Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 5 Aug 2020 14:43:03 -0700
Subject: [PATCH 1844/2608] Fix size miscalculation bug in reallocation

---
 src/arena.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arena.c b/src/arena.c
index b2feff49..f8e8cba2 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1297,7 +1297,7 @@ void *
 arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
     size_t size, size_t alignment, bool zero, tcache_t *tcache,
     hook_ralloc_args_t *hook_args) {
-	size_t usize = sz_s2u(size);
+	size_t usize = alignment == 0 ? sz_s2u(size) : sz_sa2u(size, alignment);
 	if (unlikely(usize == 0 || size > SC_LARGE_MAXCLASS)) {
 		return NULL;
 	}

From 8f9e958e1e81342091b1178005c0dedfed5573dd Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 10 Aug 2020 15:39:16 -0700
Subject: [PATCH 1845/2608] Add alignment stress test for rallocx

---
 test/integration/rallocx.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c
index 6cc4437d..57c7967f 100644
--- a/test/integration/rallocx.c
+++ b/test/integration/rallocx.c
@@ -171,6 +171,39 @@ TEST_BEGIN(test_align) {
 }
 TEST_END
 
+TEST_BEGIN(test_align_enum) {
+/* Span both small sizes and large sizes. */
+#define LG_MIN 12
+#define LG_MAX 15
+	for (size_t lg_align = LG_MIN; lg_align <= LG_MAX; ++lg_align) {
+		for (size_t lg_size = LG_MIN; lg_size <= LG_MAX; ++lg_size) {
+			size_t size = 1 << lg_size;
+			for (size_t lg_align_next = LG_MIN;
+			    lg_align_next <= LG_MAX; ++lg_align_next) {
+				int flags = MALLOCX_LG_ALIGN(lg_align);
+				void *p = mallocx(1, flags);
+				assert_ptr_not_null(p,
+				    "Unexpected mallocx() error");
+				assert_zu_eq(nallocx(1, flags),
+				    malloc_usable_size(p),
+				    "Wrong mallocx() usable size");
+				int flags_next =
+				    MALLOCX_LG_ALIGN(lg_align_next);
+				p = rallocx(p, size, flags_next);
+				assert_ptr_not_null(p,
+				    "Unexpected rallocx() error");
+				expect_zu_eq(nallocx(size, flags_next),
+				    malloc_usable_size(p),
+				    "Wrong rallocx() usable size");
+				free(p);
+			}
+		}
+	}
+#undef LG_MAX
+#undef LG_MIN
+}
+TEST_END
+
 TEST_BEGIN(test_lg_align_and_zero) {
 	void *p, *q;
 	unsigned lg_align;
@@ -253,6 +286,7 @@ main(void) {
 	    test_grow_and_shrink,
 	    test_zero,
 	    test_align,
+	    test_align_enum,
 	    test_lg_align_and_zero,
 	    test_overflow);
 }

From 9e18ae639f760d9c655e79baa2880e26b32c54db Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 12 Aug 2020 11:00:50 -0700
Subject: [PATCH 1846/2608] Config: safety checks don't imply size checks.

The commit introducing size checks accidentally enabled them whenever any safety
checks were on.  This ends up causing the regression that splitting up the
features was intended to avoid.  Fix the issue.
---
 include/jemalloc/internal/jemalloc_preamble.h.in | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index 4012eb25..d62fee09 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -185,8 +185,7 @@ static const bool config_opt_safety_checks =
  * general safety checks.
  */
 static const bool config_opt_size_checks =
-#if defined(JEMALLOC_OPT_SIZE_CHECKS) || defined(JEMALLOC_OPT_SAFETY_CHECKS) \
-    || defined(JEMALLOC_DEBUG)
+#if defined(JEMALLOC_OPT_SIZE_CHECKS) || defined(JEMALLOC_DEBUG)
     true
 #else
     false

From ab274a23b98c228c073f1dfef89d0323fbe8b4c2 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 12 Aug 2020 12:07:42 -0700
Subject: [PATCH 1847/2608] Add narenas_ratio.

This allows setting arenas per cpu dynamically, rather than forcing the user to
know the number of CPUs in advance if they want a particular CPU/space tradeoff.
---
 src/jemalloc.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 51a1a23a..0ca400e3 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -127,6 +127,7 @@ bool	opt_utrace = false;
 bool	opt_xmalloc = false;
 bool	opt_zero = false;
 unsigned	opt_narenas = 0;
+unsigned	opt_narenas_ratio = 4;
 
 unsigned	ncpus;
 
@@ -1294,6 +1295,12 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 					    /* clip */ false)
 				}
 			}
+			if (CONF_MATCH("narenas_ratio")) {
+				CONF_HANDLE_UNSIGNED(opt_narenas_ratio,
+				    "narenas_ratio", 1, UINT_MAX,
+				    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX,
+				    /* clip */ false)
+			}
 			if (CONF_MATCH("bin_shards")) {
 				const char *bin_shards_segment_cur = v;
 				size_t vlen_left = vlen;
@@ -1781,7 +1788,7 @@ malloc_narenas_default(void) {
 	 * default.
 	 */
 	if (ncpus > 1) {
-		return ncpus << 2;
+		return ncpus * opt_narenas_ratio;
 	} else {
 		return 1;
 	}

From 38867c5c1723efa7e42898e1737e1587b5c734e1 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 5 Aug 2020 16:27:50 -0700
Subject: [PATCH 1848/2608] Makefile: alphabetize stress/analyze utilities.

---
 Makefile.in | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index a63f69f1..7140c258 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -294,14 +294,15 @@ else
 CPP_SRCS :=
 TESTS_INTEGRATION_CPP :=
 endif
-TESTS_ANALYZE := $(srcroot)test/analyze/rand.c \
-	$(srcroot)test/analyze/sizes.c \
-	$(srcroot)test/analyze/prof_bias.c
-TESTS_STRESS := $(srcroot)test/stress/microbench.c \
+TESTS_ANALYZE := $(srcroot)test/analyze/prof_bias.c \
+	$(srcroot)test/analyze/rand.c \
+	$(srcroot)test/analyze/sizes.c
+TESTS_STRESS := $(srcroot)test/stress/batch_alloc.c \
 	$(srcroot)test/stress/fill_flush.c \
-	$(srcroot)test/stress/large_microbench.c \
 	$(srcroot)test/stress/hookbench.c \
-	$(srcroot)test/stress/batch_alloc.c
+	$(srcroot)test/stress/large_microbench.c \
+	$(srcroot)test/stress/microbench.c
+	
 
 
 TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_INTEGRATION_CPP) \

From 32d46732217ab592032567350c176850ba0249c6 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 5 Aug 2020 16:57:09 -0700
Subject: [PATCH 1849/2608] Add a mallctl speed stress test.

---
 Makefile.in           |  1 +
 test/stress/mallctl.c | 74 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 75 insertions(+)
 create mode 100644 test/stress/mallctl.c

diff --git a/Makefile.in b/Makefile.in
index 7140c258..80e5aaf4 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -301,6 +301,7 @@ TESTS_STRESS := $(srcroot)test/stress/batch_alloc.c \
 	$(srcroot)test/stress/fill_flush.c \
 	$(srcroot)test/stress/hookbench.c \
 	$(srcroot)test/stress/large_microbench.c \
+	$(srcroot)test/stress/mallctl.c \
 	$(srcroot)test/stress/microbench.c
 	
 
diff --git a/test/stress/mallctl.c b/test/stress/mallctl.c
new file mode 100644
index 00000000..6d2e5c50
--- /dev/null
+++ b/test/stress/mallctl.c
@@ -0,0 +1,74 @@
+#include "test/jemalloc_test.h"
+#include "test/bench.h"
+
+static void
+mallctl_short(void) {
+	const char *version;
+	size_t sz = sizeof(version);
+	int err = mallctl("version", &version, &sz, NULL, 0);
+	assert_d_eq(err, 0, "mallctl failure");
+}
+
+size_t mib_short[1];
+
+static void
+mallctlbymib_short(void) {
+	size_t miblen = sizeof(mib_short)/sizeof(mib_short[0]);
+	const char *version;
+	size_t sz = sizeof(version);
+	int err = mallctlbymib(mib_short, miblen, &version, &sz, NULL, 0);
+	assert_d_eq(err, 0, "mallctlbymib failure");
+}
+
+TEST_BEGIN(test_mallctl_vs_mallctlbymib_short) {
+	size_t miblen = sizeof(mib_short)/sizeof(mib_short[0]);
+
+	int err = mallctlnametomib("version", mib_short, &miblen);
+	assert_d_eq(err, 0, "mallctlnametomib failure");
+	compare_funcs(10*1000*1000, 10*1000*1000, "mallctl_short",
+	    mallctl_short, "mallctlbymib_short", mallctlbymib_short);
+}
+TEST_END
+
+static void
+mallctl_long(void) {
+	uint64_t nmalloc;
+	size_t sz = sizeof(nmalloc);
+	int err = mallctl("stats.arenas.0.bins.0.nmalloc", &nmalloc, &sz, NULL,
+	    0);
+	assert_d_eq(err, 0, "mallctl failure");
+}
+
+size_t mib_long[6];
+
+static void
+mallctlbymib_long(void) {
+	size_t miblen = sizeof(mib_long)/sizeof(mib_long[0]);
+	const char *version;
+	size_t sz = sizeof(version);
+	int err = mallctlbymib(mib_long, miblen, &version, &sz, NULL, 0);
+	assert_d_eq(err, 0, "mallctlbymib failure");
+}
+
+TEST_BEGIN(test_mallctl_vs_mallctlbymib_long) {
+	/*
+	 * We want to use the longest mallctl we have; that needs stats support
+	 * to be allowed.
+	 */
+	test_skip_if(!config_stats);
+
+	size_t miblen = sizeof(mib_long)/sizeof(mib_long[0]);
+	int err = mallctlnametomib("stats.arenas.0.bins.0.nmalloc", mib_long,
+	    &miblen);
+	assert_d_eq(err, 0, "mallctlnametomib failure");
+	compare_funcs(10*1000*1000, 10*1000*1000, "mallctl_long",
+	    mallctl_long, "mallctlbymib_long", mallctlbymib_long);
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_mallctl_vs_mallctlbymib_short,
+	    test_mallctl_vs_mallctlbymib_long);
+}

From 7b187360e9641c8f664709d3ac50296e3a87b2e0 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 5 Aug 2020 18:30:34 -0700
Subject: [PATCH 1850/2608] IO: Support 0-padding for unsigned numbers.

---
 src/malloc_io.c       | 28 ++++++++++++++++++++++++++--
 test/unit/malloc_io.c | 10 ++++++++++
 2 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/src/malloc_io.c b/src/malloc_io.c
index f5004f0a..59a0cbfc 100644
--- a/src/malloc_io.c
+++ b/src/malloc_io.c
@@ -346,7 +346,11 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 	if (!left_justify && pad_len != 0) {				\
 		size_t j;						\
 		for (j = 0; j < pad_len; j++) {				\
-			APPEND_C(' ');					\
+			if (pad_zero) {					\
+				APPEND_C('0');				\
+			} else {					\
+				APPEND_C(' ');				\
+			}						\
 		}							\
 	}								\
 	/* Value. */							\
@@ -418,6 +422,8 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 			unsigned char len = '?';
 			char *s;
 			size_t slen;
+			bool first_width_digit = true;
+			bool pad_zero = false;
 
 			f++;
 			/* Flags. */
@@ -454,7 +460,12 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 					width = -width;
 				}
 				break;
-			case '0': case '1': case '2': case '3': case '4':
+			case '0':
+				if (first_width_digit) {
+					pad_zero = true;
+				}
+				JEMALLOC_FALLTHROUGH;
+			case '1': case '2': case '3': case '4':
 			case '5': case '6': case '7': case '8': case '9': {
 				uintmax_t uwidth;
 				set_errno(0);
@@ -462,6 +473,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 				assert(uwidth != UINTMAX_MAX || get_errno() !=
 				    ERANGE);
 				width = (int)uwidth;
+				first_width_digit = false;
 				break;
 			} default:
 				break;
@@ -519,6 +531,18 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 				intmax_t val JEMALLOC_CC_SILENCE_INIT(0);
 				char buf[D2S_BUFSIZE];
 
+				/*
+				 * Outputting negative, zero-padded numbers
+				 * would require a nontrivial rework of the
+				 * interaction between the width and padding
+				 * (since 0 padding goes between the '-' and the
+				 * number, while ' ' padding goes either before
+				 * the - or after the number.  Since we
+				 * currently don't ever need 0-padded negative
+				 * numbers, just don't bother supporting it.
+				 */
+				assert(!pad_zero);
+
 				GET_ARG_NUMERIC(val, len);
 				s = d2s(val, (plus_plus ? '+' : (plus_space ?
 				    ' ' : '-')), buf, &slen);
diff --git a/test/unit/malloc_io.c b/test/unit/malloc_io.c
index 1a6e5f63..385f7450 100644
--- a/test/unit/malloc_io.c
+++ b/test/unit/malloc_io.c
@@ -175,6 +175,7 @@ TEST_BEGIN(test_malloc_snprintf) {
 	TEST("_1234_", "_%o_", 01234);
 	TEST("_01234_", "_%#o_", 01234);
 	TEST("_1234_", "_%u_", 1234);
+	TEST("01234", "%05u", 1234);
 
 	TEST("_1234_", "_%d_", 1234);
 	TEST("_ 1234_", "_% d_", 1234);
@@ -183,6 +184,15 @@ TEST_BEGIN(test_malloc_snprintf) {
 	TEST("_-1234_", "_% d_", -1234);
 	TEST("_-1234_", "_%+d_", -1234);
 
+	/*
+	 * Morally, we should test these too, but 0-padded signed types are not
+	 * yet supported.
+	 *
+	 * TEST("01234", "%05", 1234);
+	 * TEST("-1234", "%05d", -1234);
+	 * TEST("-01234", "%06d", -1234);
+	*/
+
 	TEST("_-1234_", "_%d_", -1234);
 	TEST("_1234_", "_%d_", 1234);
 	TEST("_-1234_", "_%i_", -1234);

From 753bbf1849caaf4f523567b2da6cb1de6147d811 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 5 Aug 2020 17:39:45 -0700
Subject: [PATCH 1851/2608] Benchmarks: Also print ns / iter.

This is often what we really care about.  It's not easy to do the division
mentally in all cases.
---
 test/include/test/bench.h | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/test/include/test/bench.h b/test/include/test/bench.h
index 6cd19fdd..0397c948 100644
--- a/test/include/test/bench.h
+++ b/test/include/test/bench.h
@@ -13,6 +13,20 @@ time_func(timedelta_t *timer, uint64_t nwarmup, uint64_t niter,
 	timer_stop(timer);
 }
 
+#define FMT_NSECS_BUF_SIZE 100
+/* Print nanoseconds / iter into the buffer "buf". */
+static inline void
+fmt_nsecs(uint64_t usec, uint64_t iters, char *buf) {
+	uint64_t nsec = usec * 1000;
+	/* We'll display 3 digits after the decimal point. */
+	uint64_t nsec1000 = nsec * 1000;
+	uint64_t nsecs_per_iter1000 = nsec1000 / iters;
+	uint64_t intpart = nsecs_per_iter1000 / 1000;
+	uint64_t fracpart = nsecs_per_iter1000 % 1000;
+	malloc_snprintf(buf, FMT_NSECS_BUF_SIZE, "%"FMTu64".%03"FMTu64, intpart,
+	    fracpart);
+}
+
 static inline void
 compare_funcs(uint64_t nwarmup, uint64_t niter, const char *name_a,
     void (*func_a), const char *name_b, void (*func_b)) {
@@ -29,11 +43,18 @@ compare_funcs(uint64_t nwarmup, uint64_t niter, const char *name_a,
 	time_func(&timer_a, nwarmup, niter, func_a);
 	time_func(&timer_b, nwarmup, niter, func_b);
 
+	uint64_t usec_a = timer_usec(&timer_a);
+	char buf_a[FMT_NSECS_BUF_SIZE];
+	fmt_nsecs(usec_a, niter, buf_a);
+
+	uint64_t usec_b = timer_usec(&timer_b);
+	char buf_b[FMT_NSECS_BUF_SIZE];
+	fmt_nsecs(usec_b, niter, buf_b);
+
 	timer_ratio(&timer_a, &timer_b, ratio_buf, sizeof(ratio_buf));
-	malloc_printf("%"FMTu64" iterations, %s=%"FMTu64"us, "
-	    "%s=%"FMTu64"us, ratio=1:%s\n",
-	    niter, name_a, timer_usec(&timer_a), name_b, timer_usec(&timer_b),
-	    ratio_buf);
+	malloc_printf("%"FMTu64" iterations, %s=%"FMTu64"us (%s ns/iter), "
+	    "%s=%"FMTu64"us (%s ns/iter), ratio=1:%s\n",
+	    niter, name_a, usec_a, buf_a, name_b, usec_b, buf_b, ratio_buf);
 
 	dallocx(p, 0);
 }

From b0ffa39cac2af955b8b39e5457e9ca8ed3e8748b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 17 Aug 2020 09:04:33 -0700
Subject: [PATCH 1852/2608] Mallctl stress test: fix a type.

The mallctlbymib_long helper was copy-pasted from mallctlbymib_short, and
incorrectly used its output variable (a char *) rather than the output variable
of the mallctl call it was using (a uint64_t), causing breakages when
sizeof(char *) differed from sizeof(uint64_t).
---
 test/stress/mallctl.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/stress/mallctl.c b/test/stress/mallctl.c
index 6d2e5c50..d29b3118 100644
--- a/test/stress/mallctl.c
+++ b/test/stress/mallctl.c
@@ -44,9 +44,9 @@ size_t mib_long[6];
 static void
 mallctlbymib_long(void) {
 	size_t miblen = sizeof(mib_long)/sizeof(mib_long[0]);
-	const char *version;
-	size_t sz = sizeof(version);
-	int err = mallctlbymib(mib_long, miblen, &version, &sz, NULL, 0);
+	uint64_t nmalloc;
+	size_t sz = sizeof(nmalloc);
+	int err = mallctlbymib(mib_long, miblen, &nmalloc, &sz, NULL, 0);
 	assert_d_eq(err, 0, "mallctlbymib failure");
 }
 

From b399463fba68d7098d52123b513ab51a2e1ace49 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 13 Aug 2020 13:09:05 -0700
Subject: [PATCH 1853/2608] flat_bitmap unit test: Silence a warning.

---
 test/unit/flat_bitmap.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/test/unit/flat_bitmap.c b/test/unit/flat_bitmap.c
index 14ac6ba4..410e94ff 100644
--- a/test/unit/flat_bitmap.c
+++ b/test/unit/flat_bitmap.c
@@ -461,8 +461,12 @@ expect_iter_results_at(fb_group_t *fb, size_t nbits, size_t pos,
 	}
 
 	bool simple_iter_res;
-	size_t simple_iter_begin;
-	size_t simple_iter_len;
+	/*
+	 * These are dead stores, but the compiler can't always figure that out
+	 * statically, and warns on the uninitialized variable.
+	 */
+	size_t simple_iter_begin = 0;
+	size_t simple_iter_len = 0;
 	simple_iter_res = fb_iter_simple(fb, nbits, pos, &simple_iter_begin,
 	    &simple_iter_len, val, forward);
 

From 131b1b53383720de3ca8877c676e85d968205103 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 7 Aug 2020 18:03:40 -0700
Subject: [PATCH 1854/2608] Rename ecache_grow -> geom_grow.

We're about to start using it outside of the ecaches, in the HPA central
allocator.
---
 Makefile.in                           |  1 +
 include/jemalloc/internal/ecache.h    | 25 -----------------------
 include/jemalloc/internal/geom_grow.h | 29 +++++++++++++++++++++++++++
 include/jemalloc/internal/pac.h       |  4 +++-
 src/ecache.c                          | 26 ------------------------
 src/extent.c                          | 26 ++++++++++++------------
 src/geom_grow.c                       | 29 +++++++++++++++++++++++++++
 src/pa_extra.c                        |  6 +++---
 src/pac.c                             | 10 ++++-----
 test/unit/retained.c                  |  2 +-
 10 files changed, 84 insertions(+), 74 deletions(-)
 create mode 100644 include/jemalloc/internal/geom_grow.h
 create mode 100644 src/geom_grow.c

diff --git a/Makefile.in b/Makefile.in
index 80e5aaf4..3697e071 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -117,6 +117,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/extent.c \
 	$(srcroot)src/extent_dss.c \
 	$(srcroot)src/extent_mmap.c \
+	$(srcroot)src/geom_grow.c \
 	$(srcroot)src/hook.c \
 	$(srcroot)src/inspect.c \
 	$(srcroot)src/large.c \
diff --git a/include/jemalloc/internal/ecache.h b/include/jemalloc/internal/ecache.h
index a11418c0..cc2752f5 100644
--- a/include/jemalloc/internal/ecache.h
+++ b/include/jemalloc/internal/ecache.h
@@ -19,26 +19,6 @@ struct ecache_s {
 	bool delay_coalesce;
 };
 
-typedef struct ecache_grow_s ecache_grow_t;
-struct ecache_grow_s {
-	/*
-	 * Next extent size class in a growing series to use when satisfying a
-	 * request via the extent hooks (only if opt_retain).  This limits the
-	 * number of disjoint virtual memory ranges so that extent merging can
-	 * be effective even if multiple arenas' extent allocation requests are
-	 * highly interleaved.
-	 *
-	 * retain_grow_limit is the max allowed size ind to expand (unless the
-	 * required size is greater).  Default is no limit, and controlled
-	 * through mallctl only.
-	 *
-	 * Synchronization: extent_grow_mtx
-	 */
-	pszind_t next;
-	pszind_t limit;
-	malloc_mutex_t mtx;
-};
-
 static inline size_t
 ecache_npages_get(ecache_t *ecache) {
 	return eset_npages_get(&ecache->eset);
@@ -65,9 +45,4 @@ void ecache_prefork(tsdn_t *tsdn, ecache_t *ecache);
 void ecache_postfork_parent(tsdn_t *tsdn, ecache_t *ecache);
 void ecache_postfork_child(tsdn_t *tsdn, ecache_t *ecache);
 
-bool ecache_grow_init(tsdn_t *tsdn, ecache_grow_t *ecache_grow);
-void ecache_grow_prefork(tsdn_t *tsdn, ecache_grow_t *ecache_grow);
-void ecache_grow_postfork_parent(tsdn_t *tsdn, ecache_grow_t *ecache_grow);
-void ecache_grow_postfork_child(tsdn_t *tsdn, ecache_grow_t *ecache_grow);
-
 #endif /* JEMALLOC_INTERNAL_ECACHE_H */
diff --git a/include/jemalloc/internal/geom_grow.h b/include/jemalloc/internal/geom_grow.h
new file mode 100644
index 00000000..a28c17c9
--- /dev/null
+++ b/include/jemalloc/internal/geom_grow.h
@@ -0,0 +1,29 @@
+#ifndef JEMALLOC_INTERNAL_ECACHE_GROW_H
+#define JEMALLOC_INTERNAL_ECACHE_GROW_H
+
+typedef struct geom_grow_s geom_grow_t;
+struct geom_grow_s {
+	/*
+	 * Next extent size class in a growing series to use when satisfying a
+	 * request via the extent hooks (only if opt_retain).  This limits the
+	 * number of disjoint virtual memory ranges so that extent merging can
+	 * be effective even if multiple arenas' extent allocation requests are
+	 * highly interleaved.
+	 *
+	 * retain_grow_limit is the max allowed size ind to expand (unless the
+	 * required size is greater).  Default is no limit, and controlled
+	 * through mallctl only.
+	 *
+	 * Synchronization: mtx
+	 */
+	pszind_t next;
+	pszind_t limit;
+	malloc_mutex_t mtx;
+};
+
+bool geom_grow_init(tsdn_t *tsdn, geom_grow_t *geom_grow);
+void geom_grow_prefork(tsdn_t *tsdn, geom_grow_t *geom_grow);
+void geom_grow_postfork_parent(tsdn_t *tsdn, geom_grow_t *geom_grow);
+void geom_grow_postfork_child(tsdn_t *tsdn, geom_grow_t *geom_grow);
+
+#endif /* JEMALLOC_INTERNAL_ECACHE_GROW_H */
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index 2d02bda0..a028456c 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -1,8 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_PAC_H
 #define JEMALLOC_INTERNAL_PAC_H
 
+#include "jemalloc/internal/geom_grow.h"
 #include "jemalloc/internal/pai.h"
 
+
 /*
  * Page allocator classic; an implementation of the PAI interface that:
  * - Can be used for arenas with custom extent hooks.
@@ -93,7 +95,7 @@ struct pac_s {
 	edata_cache_t *edata_cache;
 
 	/* The grow info for the retained ecache. */
-	ecache_grow_t ecache_grow;
+	geom_grow_t geom_grow;
 
 	/*
 	 * Decay-based purging state, responsible for scheduling extent state
diff --git a/src/ecache.c b/src/ecache.c
index 301b7ca6..3c1a2274 100644
--- a/src/ecache.c
+++ b/src/ecache.c
@@ -29,29 +29,3 @@ void
 ecache_postfork_child(tsdn_t *tsdn, ecache_t *ecache) {
 	malloc_mutex_postfork_child(tsdn, &ecache->mtx);
 }
-
-bool
-ecache_grow_init(tsdn_t *tsdn, ecache_grow_t *ecache_grow) {
-	ecache_grow->next = sz_psz2ind(HUGEPAGE);
-	ecache_grow->limit = sz_psz2ind(SC_LARGE_MAXCLASS);
-	if (malloc_mutex_init(&ecache_grow->mtx, "extent_grow",
-	    WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) {
-		return true;
-	}
-	return false;
-}
-
-void
-ecache_grow_prefork(tsdn_t *tsdn, ecache_grow_t *ecache_grow) {
-	malloc_mutex_prefork(tsdn, &ecache_grow->mtx);
-}
-
-void
-ecache_grow_postfork_parent(tsdn_t *tsdn, ecache_grow_t *ecache_grow) {
-	malloc_mutex_postfork_parent(tsdn, &ecache_grow->mtx);
-}
-
-void
-ecache_grow_postfork_child(tsdn_t *tsdn, ecache_grow_t *ecache_grow) {
-	malloc_mutex_postfork_child(tsdn, &ecache_grow->mtx);
-}
diff --git a/src/extent.c b/src/extent.c
index 98db40ec..644623d1 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -614,7 +614,7 @@ extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 static edata_t *
 extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     size_t size, size_t alignment, bool zero, bool *commit) {
-	malloc_mutex_assert_owner(tsdn, &pac->ecache_grow.mtx);
+	malloc_mutex_assert_owner(tsdn, &pac->geom_grow.mtx);
 
 	size_t alloc_size_min = size + PAGE_CEILING(alignment) - PAGE;
 	/* Beware size_t wrap-around. */
@@ -626,15 +626,15 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	 * satisfy this request.
 	 */
 	pszind_t egn_skip = 0;
-	size_t alloc_size = sz_pind2sz(pac->ecache_grow.next + egn_skip);
+	size_t alloc_size = sz_pind2sz(pac->geom_grow.next + egn_skip);
 	while (alloc_size < alloc_size_min) {
 		egn_skip++;
-		if (pac->ecache_grow.next + egn_skip >=
+		if (pac->geom_grow.next + egn_skip >=
 		    sz_psz2ind(SC_LARGE_MAXCLASS)) {
 			/* Outside legal range. */
 			goto label_err;
 		}
-		alloc_size = sz_pind2sz(pac->ecache_grow.next + egn_skip);
+		alloc_size = sz_pind2sz(pac->geom_grow.next + egn_skip);
 	}
 
 	edata_t *edata = edata_cache_get(tsdn, pac->edata_cache);
@@ -727,13 +727,13 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	 * Increment extent_grow_next if doing so wouldn't exceed the allowed
 	 * range.
 	 */
-	if (pac->ecache_grow.next + egn_skip + 1 <= pac->ecache_grow.limit) {
-		pac->ecache_grow.next += egn_skip + 1;
+	if (pac->geom_grow.next + egn_skip + 1 <= pac->geom_grow.limit) {
+		pac->geom_grow.next += egn_skip + 1;
 	} else {
-		pac->ecache_grow.next = pac->ecache_grow.limit;
+		pac->geom_grow.next = pac->geom_grow.limit;
 	}
 	/* All opportunities for failure are past. */
-	malloc_mutex_unlock(tsdn, &pac->ecache_grow.mtx);
+	malloc_mutex_unlock(tsdn, &pac->geom_grow.mtx);
 
 	if (config_prof) {
 		/* Adjust gdump stats now that extent is final size. */
@@ -747,7 +747,7 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 
 	return edata;
 label_err:
-	malloc_mutex_unlock(tsdn, &pac->ecache_grow.mtx);
+	malloc_mutex_unlock(tsdn, &pac->geom_grow.mtx);
 	return NULL;
 }
 
@@ -757,13 +757,13 @@ extent_alloc_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	assert(size != 0);
 	assert(alignment != 0);
 
-	malloc_mutex_lock(tsdn, &pac->ecache_grow.mtx);
+	malloc_mutex_lock(tsdn, &pac->geom_grow.mtx);
 
 	edata_t *edata = extent_recycle(tsdn, pac, ehooks,
 	    &pac->ecache_retained, new_addr, size, alignment, zero,
 	    commit, /* growing_retained */ true);
 	if (edata != NULL) {
-		malloc_mutex_unlock(tsdn, &pac->ecache_grow.mtx);
+		malloc_mutex_unlock(tsdn, &pac->geom_grow.mtx);
 		if (config_prof) {
 			extent_gdump_add(tsdn, edata);
 		}
@@ -772,9 +772,9 @@ extent_alloc_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		    alignment, zero, commit);
 		/* extent_grow_retained() always releases extent_grow_mtx. */
 	} else {
-		malloc_mutex_unlock(tsdn, &pac->ecache_grow.mtx);
+		malloc_mutex_unlock(tsdn, &pac->geom_grow.mtx);
 	}
-	malloc_mutex_assert_not_owner(tsdn, &pac->ecache_grow.mtx);
+	malloc_mutex_assert_not_owner(tsdn, &pac->geom_grow.mtx);
 
 	return edata;
 }
diff --git a/src/geom_grow.c b/src/geom_grow.c
new file mode 100644
index 00000000..d188bb89
--- /dev/null
+++ b/src/geom_grow.c
@@ -0,0 +1,29 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+bool
+geom_grow_init(tsdn_t *tsdn, geom_grow_t *geom_grow) {
+	geom_grow->next = sz_psz2ind(HUGEPAGE);
+	geom_grow->limit = sz_psz2ind(SC_LARGE_MAXCLASS);
+	if (malloc_mutex_init(&geom_grow->mtx, "extent_grow",
+	    WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+	return false;
+}
+
+void
+geom_grow_prefork(tsdn_t *tsdn, geom_grow_t *geom_grow) {
+	malloc_mutex_prefork(tsdn, &geom_grow->mtx);
+}
+
+void
+geom_grow_postfork_parent(tsdn_t *tsdn, geom_grow_t *geom_grow) {
+	malloc_mutex_postfork_parent(tsdn, &geom_grow->mtx);
+}
+
+void
+geom_grow_postfork_child(tsdn_t *tsdn, geom_grow_t *geom_grow) {
+	malloc_mutex_postfork_child(tsdn, &geom_grow->mtx);
+}
+
diff --git a/src/pa_extra.c b/src/pa_extra.c
index 26a196b6..9e083cae 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -16,7 +16,7 @@ pa_shard_prefork0(tsdn_t *tsdn, pa_shard_t *shard) {
 
 void
 pa_shard_prefork2(tsdn_t *tsdn, pa_shard_t *shard) {
-	ecache_grow_prefork(tsdn, &shard->pac.ecache_grow);
+	geom_grow_prefork(tsdn, &shard->pac.geom_grow);
 }
 
 void
@@ -37,7 +37,7 @@ pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard) {
 	ecache_postfork_parent(tsdn, &shard->pac.ecache_dirty);
 	ecache_postfork_parent(tsdn, &shard->pac.ecache_muzzy);
 	ecache_postfork_parent(tsdn, &shard->pac.ecache_retained);
-	ecache_grow_postfork_parent(tsdn, &shard->pac.ecache_grow);
+	geom_grow_postfork_parent(tsdn, &shard->pac.geom_grow);
 	malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_dirty.mtx);
 	malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_muzzy.mtx);
 }
@@ -48,7 +48,7 @@ pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard) {
 	ecache_postfork_child(tsdn, &shard->pac.ecache_dirty);
 	ecache_postfork_child(tsdn, &shard->pac.ecache_muzzy);
 	ecache_postfork_child(tsdn, &shard->pac.ecache_retained);
-	ecache_grow_postfork_child(tsdn, &shard->pac.ecache_grow);
+	geom_grow_postfork_child(tsdn, &shard->pac.geom_grow);
 	malloc_mutex_postfork_child(tsdn, &shard->pac.decay_dirty.mtx);
 	malloc_mutex_postfork_child(tsdn, &shard->pac.decay_muzzy.mtx);
 }
diff --git a/src/pac.c b/src/pac.c
index a4370888..151be209 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -68,7 +68,7 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
 	    ind, /* delay_coalesce */ false)) {
 		return true;
 	}
-	if (ecache_grow_init(tsdn, &pac->ecache_grow)) {
+	if (geom_grow_init(tsdn, &pac->geom_grow)) {
 		return true;
 	}
 	if (decay_init(&pac->decay_dirty, cur_time, dirty_decay_ms)) {
@@ -203,14 +203,14 @@ pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
 		}
 	}
 
-	malloc_mutex_lock(tsdn, &pac->ecache_grow.mtx);
+	malloc_mutex_lock(tsdn, &pac->geom_grow.mtx);
 	if (old_limit != NULL) {
-		*old_limit = sz_pind2sz(pac->ecache_grow.limit);
+		*old_limit = sz_pind2sz(pac->geom_grow.limit);
 	}
 	if (new_limit != NULL) {
-		pac->ecache_grow.limit = new_ind;
+		pac->geom_grow.limit = new_ind;
 	}
-	malloc_mutex_unlock(tsdn, &pac->ecache_grow.mtx);
+	malloc_mutex_unlock(tsdn, &pac->geom_grow.mtx);
 
 	return false;
 }
diff --git a/test/unit/retained.c b/test/unit/retained.c
index ef301aa0..81396170 100644
--- a/test/unit/retained.c
+++ b/test/unit/retained.c
@@ -142,7 +142,7 @@ TEST_BEGIN(test_retained) {
 		size_t usable = 0;
 		size_t fragmented = 0;
 		for (pszind_t pind = sz_psz2ind(HUGEPAGE); pind <
-		    arena->pa_shard.pac.ecache_grow.next; pind++) {
+		    arena->pa_shard.pac.geom_grow.next; pind++) {
 			size_t psz = sz_pind2sz(pind);
 			size_t psz_fragmented = psz % esz;
 			size_t psz_usable = psz - psz_fragmented;

From ffe552223cc3b50dd88458e46d531f970b45096e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 7 Aug 2020 18:16:31 -0700
Subject: [PATCH 1855/2608] Geom_grow: Move in advancing logic.

---
 include/jemalloc/internal/geom_grow.h | 27 +++++++++++++++++++++++++++
 src/extent.c                          | 22 +++++++---------------
 2 files changed, 34 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/geom_grow.h b/include/jemalloc/internal/geom_grow.h
index a28c17c9..d3ac6c95 100644
--- a/include/jemalloc/internal/geom_grow.h
+++ b/include/jemalloc/internal/geom_grow.h
@@ -21,6 +21,33 @@ struct geom_grow_s {
 	malloc_mutex_t mtx;
 };
 
+static inline bool
+geom_grow_size_prepare(geom_grow_t *geom_grow, size_t alloc_size_min,
+    size_t *r_alloc_size, pszind_t *r_skip) {
+	*r_skip = 0;
+	*r_alloc_size = sz_pind2sz(geom_grow->next + *r_skip);
+	while (*r_alloc_size < alloc_size_min) {
+		(*r_skip)++;
+		if (geom_grow->next + *r_skip  >=
+		    sz_psz2ind(SC_LARGE_MAXCLASS)) {
+			/* Outside legal range. */
+			return true;
+		}
+		*r_alloc_size = sz_pind2sz(geom_grow->next + *r_skip);
+	}
+	return false;
+}
+
+static inline void
+geom_grow_size_commit(geom_grow_t *geom_grow, pszind_t skip) {
+	if (geom_grow->next + skip + 1 <= geom_grow->limit) {
+		geom_grow->next += skip + 1;
+	} else {
+		geom_grow->next = geom_grow->limit;
+	}
+
+}
+
 bool geom_grow_init(tsdn_t *tsdn, geom_grow_t *geom_grow);
 void geom_grow_prefork(tsdn_t *tsdn, geom_grow_t *geom_grow);
 void geom_grow_postfork_parent(tsdn_t *tsdn, geom_grow_t *geom_grow);
diff --git a/src/extent.c b/src/extent.c
index 644623d1..6abaadf0 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -625,16 +625,12 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	 * Find the next extent size in the series that would be large enough to
 	 * satisfy this request.
 	 */
-	pszind_t egn_skip = 0;
-	size_t alloc_size = sz_pind2sz(pac->geom_grow.next + egn_skip);
-	while (alloc_size < alloc_size_min) {
-		egn_skip++;
-		if (pac->geom_grow.next + egn_skip >=
-		    sz_psz2ind(SC_LARGE_MAXCLASS)) {
-			/* Outside legal range. */
-			goto label_err;
-		}
-		alloc_size = sz_pind2sz(pac->geom_grow.next + egn_skip);
+	size_t alloc_size;
+	pszind_t geom_grow_skip;
+	bool err = geom_grow_size_prepare(&pac->geom_grow, alloc_size_min,
+	    &alloc_size, &geom_grow_skip);
+	if (err) {
+		goto label_err;
 	}
 
 	edata_t *edata = edata_cache_get(tsdn, pac->edata_cache);
@@ -727,12 +723,8 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	 * Increment extent_grow_next if doing so wouldn't exceed the allowed
 	 * range.
 	 */
-	if (pac->geom_grow.next + egn_skip + 1 <= pac->geom_grow.limit) {
-		pac->geom_grow.next += egn_skip + 1;
-	} else {
-		pac->geom_grow.next = pac->geom_grow.limit;
-	}
 	/* All opportunities for failure are past. */
+	geom_grow_size_commit(&pac->geom_grow, geom_grow_skip);
 	malloc_mutex_unlock(tsdn, &pac->geom_grow.mtx);
 
 	if (config_prof) {

From c57494879fe12157470cefc44bbd121726ec363a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 7 Aug 2020 18:26:52 -0700
Subject: [PATCH 1856/2608] Geom_grow: Don't take tsdn at init.

It's never used.
---
 include/jemalloc/internal/geom_grow.h | 2 +-
 src/geom_grow.c                       | 2 +-
 src/pac.c                             | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/geom_grow.h b/include/jemalloc/internal/geom_grow.h
index d3ac6c95..128c56f9 100644
--- a/include/jemalloc/internal/geom_grow.h
+++ b/include/jemalloc/internal/geom_grow.h
@@ -48,7 +48,7 @@ geom_grow_size_commit(geom_grow_t *geom_grow, pszind_t skip) {
 
 }
 
-bool geom_grow_init(tsdn_t *tsdn, geom_grow_t *geom_grow);
+bool geom_grow_init(geom_grow_t *geom_grow);
 void geom_grow_prefork(tsdn_t *tsdn, geom_grow_t *geom_grow);
 void geom_grow_postfork_parent(tsdn_t *tsdn, geom_grow_t *geom_grow);
 void geom_grow_postfork_child(tsdn_t *tsdn, geom_grow_t *geom_grow);
diff --git a/src/geom_grow.c b/src/geom_grow.c
index d188bb89..eab8bc96 100644
--- a/src/geom_grow.c
+++ b/src/geom_grow.c
@@ -2,7 +2,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 bool
-geom_grow_init(tsdn_t *tsdn, geom_grow_t *geom_grow) {
+geom_grow_init(geom_grow_t *geom_grow) {
 	geom_grow->next = sz_psz2ind(HUGEPAGE);
 	geom_grow->limit = sz_psz2ind(SC_LARGE_MAXCLASS);
 	if (malloc_mutex_init(&geom_grow->mtx, "extent_grow",
diff --git a/src/pac.c b/src/pac.c
index 151be209..6d52a937 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -68,7 +68,7 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
 	    ind, /* delay_coalesce */ false)) {
 		return true;
 	}
-	if (geom_grow_init(tsdn, &pac->geom_grow)) {
+	if (geom_grow_init(&pac->geom_grow)) {
 		return true;
 	}
 	if (decay_init(&pac->decay_dirty, cur_time, dirty_decay_ms)) {

From 5e90fd006e97d62d74c79ce67cbf0cae5429ecdc Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 11 Aug 2020 10:18:31 -0700
Subject: [PATCH 1857/2608] Geom_grow: Don't keep the mutex internal.

We're about to use it in ways that will have external synchronization.
---
 include/jemalloc/internal/geom_grow.h |  8 +-------
 include/jemalloc/internal/pac.h       |  1 +
 src/extent.c                          | 16 ++++++++--------
 src/geom_grow.c                       | 23 +----------------------
 src/pa_extra.c                        |  6 +++---
 src/pac.c                             |  8 +++++---
 6 files changed, 19 insertions(+), 43 deletions(-)

diff --git a/include/jemalloc/internal/geom_grow.h b/include/jemalloc/internal/geom_grow.h
index 128c56f9..ba83386f 100644
--- a/include/jemalloc/internal/geom_grow.h
+++ b/include/jemalloc/internal/geom_grow.h
@@ -13,12 +13,9 @@ struct geom_grow_s {
 	 * retain_grow_limit is the max allowed size ind to expand (unless the
 	 * required size is greater).  Default is no limit, and controlled
 	 * through mallctl only.
-	 *
-	 * Synchronization: mtx
 	 */
 	pszind_t next;
 	pszind_t limit;
-	malloc_mutex_t mtx;
 };
 
 static inline bool
@@ -48,9 +45,6 @@ geom_grow_size_commit(geom_grow_t *geom_grow, pszind_t skip) {
 
 }
 
-bool geom_grow_init(geom_grow_t *geom_grow);
-void geom_grow_prefork(tsdn_t *tsdn, geom_grow_t *geom_grow);
-void geom_grow_postfork_parent(tsdn_t *tsdn, geom_grow_t *geom_grow);
-void geom_grow_postfork_child(tsdn_t *tsdn, geom_grow_t *geom_grow);
+void geom_grow_init(geom_grow_t *geom_grow);
 
 #endif /* JEMALLOC_INTERNAL_ECACHE_GROW_H */
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index a028456c..614d34a5 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -96,6 +96,7 @@ struct pac_s {
 
 	/* The grow info for the retained ecache. */
 	geom_grow_t geom_grow;
+	malloc_mutex_t grow_mtx;
 
 	/*
 	 * Decay-based purging state, responsible for scheduling extent state
diff --git a/src/extent.c b/src/extent.c
index 6abaadf0..26a5c13f 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -614,7 +614,7 @@ extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 static edata_t *
 extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     size_t size, size_t alignment, bool zero, bool *commit) {
-	malloc_mutex_assert_owner(tsdn, &pac->geom_grow.mtx);
+	malloc_mutex_assert_owner(tsdn, &pac->grow_mtx);
 
 	size_t alloc_size_min = size + PAGE_CEILING(alignment) - PAGE;
 	/* Beware size_t wrap-around. */
@@ -725,7 +725,7 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	 */
 	/* All opportunities for failure are past. */
 	geom_grow_size_commit(&pac->geom_grow, geom_grow_skip);
-	malloc_mutex_unlock(tsdn, &pac->geom_grow.mtx);
+	malloc_mutex_unlock(tsdn, &pac->grow_mtx);
 
 	if (config_prof) {
 		/* Adjust gdump stats now that extent is final size. */
@@ -739,7 +739,7 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 
 	return edata;
 label_err:
-	malloc_mutex_unlock(tsdn, &pac->geom_grow.mtx);
+	malloc_mutex_unlock(tsdn, &pac->grow_mtx);
 	return NULL;
 }
 
@@ -749,24 +749,24 @@ extent_alloc_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	assert(size != 0);
 	assert(alignment != 0);
 
-	malloc_mutex_lock(tsdn, &pac->geom_grow.mtx);
+	malloc_mutex_lock(tsdn, &pac->grow_mtx);
 
 	edata_t *edata = extent_recycle(tsdn, pac, ehooks,
 	    &pac->ecache_retained, new_addr, size, alignment, zero,
 	    commit, /* growing_retained */ true);
 	if (edata != NULL) {
-		malloc_mutex_unlock(tsdn, &pac->geom_grow.mtx);
+		malloc_mutex_unlock(tsdn, &pac->grow_mtx);
 		if (config_prof) {
 			extent_gdump_add(tsdn, edata);
 		}
 	} else if (opt_retain && new_addr == NULL) {
 		edata = extent_grow_retained(tsdn, pac, ehooks, size,
 		    alignment, zero, commit);
-		/* extent_grow_retained() always releases extent_grow_mtx. */
+		/* extent_grow_retained() always releases pac->grow_mtx. */
 	} else {
-		malloc_mutex_unlock(tsdn, &pac->geom_grow.mtx);
+		malloc_mutex_unlock(tsdn, &pac->grow_mtx);
 	}
-	malloc_mutex_assert_not_owner(tsdn, &pac->geom_grow.mtx);
+	malloc_mutex_assert_not_owner(tsdn, &pac->grow_mtx);
 
 	return edata;
 }
diff --git a/src/geom_grow.c b/src/geom_grow.c
index eab8bc96..4816bb7f 100644
--- a/src/geom_grow.c
+++ b/src/geom_grow.c
@@ -1,29 +1,8 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
-bool
+void
 geom_grow_init(geom_grow_t *geom_grow) {
 	geom_grow->next = sz_psz2ind(HUGEPAGE);
 	geom_grow->limit = sz_psz2ind(SC_LARGE_MAXCLASS);
-	if (malloc_mutex_init(&geom_grow->mtx, "extent_grow",
-	    WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) {
-		return true;
-	}
-	return false;
 }
-
-void
-geom_grow_prefork(tsdn_t *tsdn, geom_grow_t *geom_grow) {
-	malloc_mutex_prefork(tsdn, &geom_grow->mtx);
-}
-
-void
-geom_grow_postfork_parent(tsdn_t *tsdn, geom_grow_t *geom_grow) {
-	malloc_mutex_postfork_parent(tsdn, &geom_grow->mtx);
-}
-
-void
-geom_grow_postfork_child(tsdn_t *tsdn, geom_grow_t *geom_grow) {
-	malloc_mutex_postfork_child(tsdn, &geom_grow->mtx);
-}
-
diff --git a/src/pa_extra.c b/src/pa_extra.c
index 9e083cae..8bf54b96 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -16,7 +16,7 @@ pa_shard_prefork0(tsdn_t *tsdn, pa_shard_t *shard) {
 
 void
 pa_shard_prefork2(tsdn_t *tsdn, pa_shard_t *shard) {
-	geom_grow_prefork(tsdn, &shard->pac.geom_grow);
+	malloc_mutex_prefork(tsdn, &shard->pac.grow_mtx);
 }
 
 void
@@ -37,7 +37,7 @@ pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard) {
 	ecache_postfork_parent(tsdn, &shard->pac.ecache_dirty);
 	ecache_postfork_parent(tsdn, &shard->pac.ecache_muzzy);
 	ecache_postfork_parent(tsdn, &shard->pac.ecache_retained);
-	geom_grow_postfork_parent(tsdn, &shard->pac.geom_grow);
+	malloc_mutex_postfork_parent(tsdn, &shard->pac.grow_mtx);
 	malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_dirty.mtx);
 	malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_muzzy.mtx);
 }
@@ -48,7 +48,7 @@ pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard) {
 	ecache_postfork_child(tsdn, &shard->pac.ecache_dirty);
 	ecache_postfork_child(tsdn, &shard->pac.ecache_muzzy);
 	ecache_postfork_child(tsdn, &shard->pac.ecache_retained);
-	geom_grow_postfork_child(tsdn, &shard->pac.geom_grow);
+	malloc_mutex_postfork_child(tsdn, &shard->pac.grow_mtx);
 	malloc_mutex_postfork_child(tsdn, &shard->pac.decay_dirty.mtx);
 	malloc_mutex_postfork_child(tsdn, &shard->pac.decay_muzzy.mtx);
 }
diff --git a/src/pac.c b/src/pac.c
index 6d52a937..f50e82b0 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -68,7 +68,9 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
 	    ind, /* delay_coalesce */ false)) {
 		return true;
 	}
-	if (geom_grow_init(&pac->geom_grow)) {
+	geom_grow_init(&pac->geom_grow);
+	if (malloc_mutex_init(&pac->grow_mtx, "extent_grow",
+	    WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	if (decay_init(&pac->decay_dirty, cur_time, dirty_decay_ms)) {
@@ -203,14 +205,14 @@ pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
 		}
 	}
 
-	malloc_mutex_lock(tsdn, &pac->geom_grow.mtx);
+	malloc_mutex_lock(tsdn, &pac->grow_mtx);
 	if (old_limit != NULL) {
 		*old_limit = sz_pind2sz(pac->geom_grow.limit);
 	}
 	if (new_limit != NULL) {
 		pac->geom_grow.limit = new_ind;
 	}
-	malloc_mutex_unlock(tsdn, &pac->geom_grow.mtx);
+	malloc_mutex_unlock(tsdn, &pac->grow_mtx);
 
 	return false;
 }

From 8efcdc3f98d896c0a67cc2dc34ff0494639b6bf5 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 21 Aug 2020 10:23:23 -0700
Subject: [PATCH 1858/2608] Move unbias data to prof_data

---
 include/jemalloc/internal/prof_data.h    |  4 +++
 include/jemalloc/internal/prof_externs.h |  3 --
 src/prof.c                               | 36 ------------------------
 src/prof_data.c                          | 35 +++++++++++++++++++++++
 4 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
index e2e4aedb..d7c3c521 100644
--- a/include/jemalloc/internal/prof_data.h
+++ b/include/jemalloc/internal/prof_data.h
@@ -10,6 +10,9 @@ extern malloc_mutex_t prof_dump_mtx;
 extern malloc_mutex_t *gctx_locks;
 extern malloc_mutex_t *tdata_locks;
 
+extern size_t prof_unbiased_sz[SC_NSIZES];
+extern size_t prof_shifted_unbiased_cnt[SC_NSIZES];
+
 void prof_bt_hash(const void *key, size_t r_hash[2]);
 bool prof_bt_keycomp(const void *k1, const void *k2);
 
@@ -17,6 +20,7 @@ bool prof_data_init(tsd_t *tsd);
 prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt);
 char *prof_thread_name_alloc(tsd_t *tsd, const char *thread_name);
 int prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name);
+void prof_unbias_map_init();
 void prof_dump_impl(tsd_t *tsd, write_cb_t *prof_dump_write, void *cbopaque,
     prof_tdata_t *tdata, bool leakcheck);
 prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index ba5933af..b94fbed3 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -41,9 +41,6 @@ extern uint64_t prof_interval;
  * resets.
  */
 extern size_t lg_prof_sample;
-extern size_t prof_unbiased_sz[SC_NSIZES];
-extern size_t prof_shifted_unbiased_cnt[SC_NSIZES];
-void prof_unbias_map_init();
 
 extern bool prof_booted;
 
diff --git a/src/prof.c b/src/prof.c
index 7b649e49..0c12c492 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -61,8 +61,6 @@ static malloc_mutex_t prof_gdump_mtx;
 uint64_t prof_interval = 0;
 
 size_t lg_prof_sample;
-size_t prof_unbiased_sz[SC_NSIZES];
-size_t prof_shifted_unbiased_cnt[SC_NSIZES];
 
 static uint64_t next_thr_uid;
 static malloc_mutex_t next_thr_uid_mtx;
@@ -72,40 +70,6 @@ bool prof_booted = false;
 
 /******************************************************************************/
 
-void prof_unbias_map_init() {
-	/* See the comment in prof_sample_new_event_wait */
-#ifdef JEMALLOC_PROF
-	for (szind_t i = 0; i < SC_NSIZES; i++) {
-		double sz = (double)sz_index2size(i);
-		double rate = (double)(ZU(1) << lg_prof_sample);
-		double div_val = 1.0 - exp(-sz / rate);
-		double unbiased_sz = sz / div_val;
-		/*
-		 * The "true" right value for the unbiased count is
-		 * 1.0/(1 - exp(-sz/rate)).  The problem is, we keep the counts
-		 * as integers (for a variety of reasons -- rounding errors
-		 * could trigger asserts, and not all libcs can properly handle
-		 * floating point arithmetic during malloc calls inside libc).
-		 * Rounding to an integer, though, can lead to rounding errors
-		 * of over 30% for sizes close to the sampling rate.  So
-		 * instead, we multiply by a constant, dividing the maximum
-		 * possible roundoff error by that constant.  To avoid overflow
-		 * in summing up size_t values, the largest safe constant we can
-		 * pick is the size of the smallest allocation.
-		 */
-		double cnt_shift = (double)(ZU(1) << SC_LG_TINY_MIN);
-		double shifted_unbiased_cnt = cnt_shift / div_val;
-		prof_unbiased_sz[i] = (size_t)round(unbiased_sz);
-		prof_shifted_unbiased_cnt[i] = (size_t)round(
-		    shifted_unbiased_cnt);
-	}
-#else
-	unreachable();
-#endif
-}
-
-/******************************************************************************/
-
 void
 prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx) {
 	cassert(config_prof);
diff --git a/src/prof_data.c b/src/prof_data.c
index ae9cd4b1..8dd1fd0e 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -59,6 +59,9 @@ static ckh_t bt2gctx;
  */
 static prof_tdata_tree_t tdatas;
 
+size_t prof_unbiased_sz[SC_NSIZES];
+size_t prof_shifted_unbiased_cnt[SC_NSIZES];
+
 /******************************************************************************/
 /* Red-black trees. */
 
@@ -535,6 +538,38 @@ prof_double_uint64_cast(double d) {
 }
 #endif
 
+void prof_unbias_map_init() {
+	/* See the comment in prof_sample_new_event_wait */
+#ifdef JEMALLOC_PROF
+	for (szind_t i = 0; i < SC_NSIZES; i++) {
+		double sz = (double)sz_index2size(i);
+		double rate = (double)(ZU(1) << lg_prof_sample);
+		double div_val = 1.0 - exp(-sz / rate);
+		double unbiased_sz = sz / div_val;
+		/*
+		 * The "true" right value for the unbiased count is
+		 * 1.0/(1 - exp(-sz/rate)).  The problem is, we keep the counts
+		 * as integers (for a variety of reasons -- rounding errors
+		 * could trigger asserts, and not all libcs can properly handle
+		 * floating point arithmetic during malloc calls inside libc).
+		 * Rounding to an integer, though, can lead to rounding errors
+		 * of over 30% for sizes close to the sampling rate.  So
+		 * instead, we multiply by a constant, dividing the maximum
+		 * possible roundoff error by that constant.  To avoid overflow
+		 * in summing up size_t values, the largest safe constant we can
+		 * pick is the size of the smallest allocation.
+		 */
+		double cnt_shift = (double)(ZU(1) << SC_LG_TINY_MIN);
+		double shifted_unbiased_cnt = cnt_shift / div_val;
+		prof_unbiased_sz[i] = (size_t)round(unbiased_sz);
+		prof_shifted_unbiased_cnt[i] = (size_t)round(
+		    shifted_unbiased_cnt);
+	}
+#else
+	unreachable();
+#endif
+}
+
 /*
  * The unbiasing story is long.  The jeprof unbiasing logic was copied from
  * pprof.  Both shared an issue: they unbiased using the average size of the

From 20f2479ed79a8ef152c9ef50efdee2aec5dc5737 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 21 Aug 2020 15:33:50 -0700
Subject: [PATCH 1859/2608] Do not create size class tables for non-prof builds

---
 include/jemalloc/internal/prof_data.h  | 4 ++--
 include/jemalloc/internal/prof_types.h | 8 ++++++++
 src/prof.c                             | 4 ++++
 src/prof_data.c                        | 4 ++--
 4 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
index d7c3c521..4c8e22c7 100644
--- a/include/jemalloc/internal/prof_data.h
+++ b/include/jemalloc/internal/prof_data.h
@@ -10,8 +10,8 @@ extern malloc_mutex_t prof_dump_mtx;
 extern malloc_mutex_t *gctx_locks;
 extern malloc_mutex_t *tdata_locks;
 
-extern size_t prof_unbiased_sz[SC_NSIZES];
-extern size_t prof_shifted_unbiased_cnt[SC_NSIZES];
+extern size_t prof_unbiased_sz[PROF_SC_NSIZES];
+extern size_t prof_shifted_unbiased_cnt[PROF_SC_NSIZES];
 
 void prof_bt_hash(const void *key, size_t r_hash[2]);
 bool prof_bt_keycomp(const void *k1, const void *k2);
diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h
index dbd758fa..ba628654 100644
--- a/include/jemalloc/internal/prof_types.h
+++ b/include/jemalloc/internal/prof_types.h
@@ -39,6 +39,14 @@ typedef struct prof_recent_s prof_recent_t;
 #  define PROF_DUMP_BUFSIZE		65536
 #endif
 
+/* Size of size class related tables */
+#ifdef JEMALLOC_PROF
+#  define PROF_SC_NSIZES		SC_NSIZES
+#else
+/* Minimize memory bloat for non-prof builds. */
+#  define PROF_SC_NSIZES		1
+#endif
+
 /* Size of stack-allocated buffer used by prof_printf(). */
 #define PROF_PRINTF_BUFSIZE		128
 
diff --git a/src/prof.c b/src/prof.c
index 0c12c492..d50cbe34 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -89,6 +89,8 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx) {
 void
 prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
     size_t usize, prof_tctx_t *tctx) {
+	cassert(config_prof);
+
 	if (opt_prof_sys_thread_name) {
 		prof_sys_thread_name_fetch(tsd);
 	}
@@ -133,6 +135,8 @@ prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
 
 void
 prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
+	cassert(config_prof);
+
 	assert(prof_info != NULL);
 	prof_tctx_t *tctx = prof_info->alloc_tctx;
 	assert((uintptr_t)tctx > (uintptr_t)1U);
diff --git a/src/prof_data.c b/src/prof_data.c
index 8dd1fd0e..63349850 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -59,8 +59,8 @@ static ckh_t bt2gctx;
  */
 static prof_tdata_tree_t tdatas;
 
-size_t prof_unbiased_sz[SC_NSIZES];
-size_t prof_shifted_unbiased_cnt[SC_NSIZES];
+size_t prof_unbiased_sz[PROF_SC_NSIZES];
+size_t prof_shifted_unbiased_cnt[PROF_SC_NSIZES];
 
 /******************************************************************************/
 /* Red-black trees. */

From 866231fc6166b9c937ce071c5717844998a51413 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 24 Aug 2020 20:56:34 -0700
Subject: [PATCH 1860/2608] Do not repeat reentrancy test in profiling

---
 include/jemalloc/internal/prof_inlines.h | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h
index 3d0bd14a..62c56832 100644
--- a/include/jemalloc/internal/prof_inlines.h
+++ b/include/jemalloc/internal/prof_inlines.h
@@ -1,5 +1,5 @@
-#ifndef JEMALLOC_INTERNAL_PROF_INLINES_B_H
-#define JEMALLOC_INTERNAL_PROF_INLINES_B_H
+#ifndef JEMALLOC_INTERNAL_PROF_INLINES_H
+#define JEMALLOC_INTERNAL_PROF_INLINES_H
 
 #include "jemalloc/internal/safety_check.h"
 #include "jemalloc/internal/sz.h"
@@ -115,9 +115,12 @@ prof_sample_should_skip(tsd_t *tsd, bool sample_event) {
 		return true;
 	}
 
-	if (tsd_reentrancy_level_get(tsd) > 0) {
-		return true;
-	}
+	/*
+	 * sample_event is always obtained from the thread event module, and
+	 * whenever it's true, it means that the thread event module has
+	 * already checked the reentrancy level.
+	 */
+	assert(tsd_reentrancy_level_get(tsd) == 0);
 
 	prof_tdata_t *tdata = prof_tdata_get(tsd, true);
 	if (unlikely(tdata == NULL)) {
@@ -255,4 +258,4 @@ prof_free(tsd_t *tsd, const void *ptr, size_t usize,
 	}
 }
 
-#endif /* JEMALLOC_INTERNAL_PROF_INLINES_B_H */
+#endif /* JEMALLOC_INTERNAL_PROF_INLINES_H */

From 202f01d4f8b28237d9f349f9ee91691ec220425a Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 26 Aug 2020 14:52:25 -0700
Subject: [PATCH 1861/2608] Fix szind computation in profiling

---
 src/prof.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/prof.c b/src/prof.c
index d50cbe34..4f451998 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -99,7 +99,7 @@ prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
 	    ptr);
 	prof_info_set(tsd, edata, tctx);
 
-	szind_t szind = sz_size2index(size);
+	szind_t szind = sz_size2index(usize);
 
 	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
 	/*

From b549389e4a491f48ea466dce4fda475bcd6b7936 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 11 Aug 2020 15:35:41 -0700
Subject: [PATCH 1862/2608] Correct usize in prof last-N record

---
 include/jemalloc/internal/prof_recent.h  | 2 +-
 include/jemalloc/internal/prof_structs.h | 1 +
 src/prof.c                               | 2 +-
 src/prof_recent.c                        | 6 +++---
 4 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/prof_recent.h b/include/jemalloc/internal/prof_recent.h
index d793c6da..df410236 100644
--- a/include/jemalloc/internal/prof_recent.h
+++ b/include/jemalloc/internal/prof_recent.h
@@ -5,7 +5,7 @@ extern malloc_mutex_t prof_recent_alloc_mtx;
 extern malloc_mutex_t prof_recent_dump_mtx;
 
 bool prof_recent_alloc_prepare(tsd_t *tsd, prof_tctx_t *tctx);
-void prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size);
+void prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size, size_t usize);
 void prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata);
 bool prof_recent_init();
 void edata_prof_recent_alloc_init(edata_t *edata);
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index fbad6145..73ac3d5c 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -209,6 +209,7 @@ struct prof_recent_s {
 
 	ql_elm(prof_recent_t) link;
 	size_t size;
+	size_t usize;
 	atomic_p_t alloc_edata; /* NULL means allocation has been freed. */
 	prof_tctx_t *alloc_tctx;
 	prof_tctx_t *dalloc_tctx;
diff --git a/src/prof.c b/src/prof.c
index 4f451998..9b651db8 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -129,7 +129,7 @@ prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
 	malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
 	if (record_recent) {
 		assert(tctx == edata_prof_tctx_get(edata));
-		prof_recent_alloc(tsd, edata, size);
+		prof_recent_alloc(tsd, edata, size, usize);
 	}
 }
 
diff --git a/src/prof_recent.c b/src/prof_recent.c
index 426f62ec..cfaa5a68 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -270,7 +270,7 @@ prof_recent_alloc_assert_count(tsd_t *tsd) {
 }
 
 void
-prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size) {
+prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size, size_t usize) {
 	assert(edata != NULL);
 	prof_tctx_t *tctx = edata_prof_tctx_get(edata);
 
@@ -356,6 +356,7 @@ prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size) {
 	prof_recent_t *tail = ql_last(&prof_recent_alloc_list, link);
 	assert(tail != NULL);
 	tail->size = size;
+	tail->usize = usize;
 	nstime_copy(&tail->alloc_time, edata_prof_alloc_time_get(edata));
 	tail->alloc_tctx = tctx;
 	nstime_init_zero(&tail->dalloc_time);
@@ -477,8 +478,7 @@ prof_recent_alloc_dump_node(emitter_t *emitter, prof_recent_t *node) {
 	emitter_json_object_begin(emitter);
 
 	emitter_json_kv(emitter, "size", emitter_type_size, &node->size);
-	size_t usize = sz_s2u(node->size);
-	emitter_json_kv(emitter, "usize", emitter_type_size, &usize);
+	emitter_json_kv(emitter, "usize", emitter_type_size, &node->usize);
 	bool released = prof_recent_alloc_edata_get_no_lock(node) == NULL;
 	emitter_json_kv(emitter, "released", emitter_type_bool, &released);
 

From 09eda2c9b621ced9982514f2e69e4e572e06ca2d Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 9 Sep 2020 11:07:00 -0700
Subject: [PATCH 1863/2608] Add unit tests for usize in prof recent records

---
 test/unit/prof_recent.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index 1885a1ac..180f13fc 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -363,6 +363,7 @@ call_dump() {
 
 typedef struct {
 	size_t size;
+	size_t usize;
 	bool released;
 } confirm_record_t;
 
@@ -421,7 +422,7 @@ confirm_record(const char *template,
 
 		ASSERT_STR("\"usize\"");
 		ASSERT_CHAR(':');
-		ASSERT_FORMATTED_STR("%zu", sz_s2u(record->size));
+		ASSERT_FORMATTED_STR("%zu", record->usize);
 		ASSERT_CHAR(',');
 
 		ASSERT_STR("\"released\"");
@@ -505,12 +506,14 @@ TEST_BEGIN(test_prof_recent_alloc_dump) {
 	p = malloc(7);
 	call_dump();
 	records[0].size = 7;
+	records[0].usize = sz_s2u(7);
 	records[0].released = false;
 	confirm_record(template, records, 1);
 
-	q = malloc(17);
+	q = mallocx(17, MALLOCX_ALIGN(128));
 	call_dump();
 	records[1].size = 17;
+	records[1].usize = sz_sa2u(17, 128);
 	records[1].released = false;
 	confirm_record(template, records, 2);
 

From d243b4ec487224248172547643630f7a5fb5e84d Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 10 Aug 2020 14:53:08 -0700
Subject: [PATCH 1864/2608] Add PROFILING_INTERNALS.md

This documents and explains some of the logic behind the profiling
implementation.
---
 .gitignore                          |   2 +
 doc_internal/PROFILING_INTERNALS.md | 127 ++++++++++++++++++++++++++++
 2 files changed, 129 insertions(+)
 create mode 100644 doc_internal/PROFILING_INTERNALS.md

diff --git a/.gitignore b/.gitignore
index 31cdbb8e..0c3c040e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,6 +13,8 @@
 /doc/jemalloc.html
 /doc/jemalloc.3
 
+/doc_internal/PROFILING_INTERNALS.pdf
+
 /jemalloc.pc
 
 /lib/
diff --git a/doc_internal/PROFILING_INTERNALS.md b/doc_internal/PROFILING_INTERNALS.md
new file mode 100644
index 00000000..0a9f31c0
--- /dev/null
+++ b/doc_internal/PROFILING_INTERNALS.md
@@ -0,0 +1,127 @@
+# jemalloc profiling
+This describes the mathematical basis behind jemalloc's profiling implementation, as well as the implementation tricks that make it effective. Historically, the jemalloc profiling design simply copied tcmalloc's. The implementation has since diverged, due to both the desire to record additional information, and to correct some biasing bugs.
+
+Note: this document is markdown with embedded LaTeX; different markdown renderers may not produce the expected output.  Viewing with `pandoc -s PROFILING_INTERNALS.md -o PROFILING_INTERNALS.pdf` is recommended.
+
+## Some tricks in our implementation toolbag
+
+### Sampling
+Recording our metadata is quite expensive; we need to walk up the stack to get a stack trace. On top of that, we need to allocate storage to record that stack trace, and stick it somewhere where a profile-dumping call can find it. That call might happen on another thread, so we'll probably need to take a lock to do so. These costs are quite large compared to the average cost of an allocation. To manage this, we'll only sample some fraction of allocations. This will miss some of them, so our data will be incomplete, but we'll try to make up for it. We can tune our sampling rate to balance accuracy and performance.
+
+### Fast Bernoulli sampling
+Compared to our fast paths, even a `coinflip(p)` function can be quite expensive. Having to do a random-number generation and some floating point operations would be a sizeable relative cost. However (as pointed out in [[Vitter, 1987](https://dl.acm.org/doi/10.1145/23002.23003)]), if we can orchestrate our algorithm so that many of our `coinflip` calls share their parameter value, we can do better. We can sample from the geometric distribution, and initialize a counter with the result. When the counter hits 0, the `coinflip` function returns true (and reinitializes its internal counter).
+This can let us do a random-number generation once per (logical) coinflip that comes up heads, rather than once per (logical) coinflip. Since we expect to sample relatively rarely, this can be a large win.
+
+### Fast-path / slow-path thinking
+Most programs have a skewed distribution of allocations. Smaller allocations are much more frequent than large ones, but shorter lived and less common as a fraction of program memory. "Small" and "large" are necessarily sort of fuzzy terms, but if we define "small" as "allocations jemalloc puts into slabs" and "large" as the others, then it's not uncommon for small allocations to be hundreds of times more frequent than large ones, but take up around half the amount of heap space as large ones. Moreover, small allocations tend to be much cheaper than large ones (often by a factor of 20-30): they're more likely to hit in thread caches, less likely to have to do an mmap, and cheaper to fill (by the user) once the allocation has been returned.
+
+## An unbiased estimator of space consumption from (almost) arbitrary sampling strategies
+Suppose we have a sampling strategy that meets the following criteria:
+
+  - One allocation being sampled is independent of other allocations being sampled.
+  - Each allocation has a non-zero probability of being sampled.
+
+We can then estimate the bytes in live allocations through some particular stack trace as:
+
+$$ \sum_i S_i I_i \frac{1}{\mathrm{E}[I_i]} $$
+
+where the sum ranges over some index variable of live allocations from that stack, $S_i$ is the size of the $i$'th allocation, and $I_i$ is an indicator random variable for whether or not the $i'th$ allocation is sampled. $S_i$ and $\mathrm{E}[I_i]$ are constants (the program allocations are fixed; the random variables are the sampling decisions), so taking the expectation we get
+
+$$ \sum_i S_i \mathrm{E}[I_i] \frac{1}{\mathrm{E}[I_i]}.$$
+
+This is of course $\sum_i S_i$, as we want (and, a similar calculation could be done for allocation counts as well).
+This is a fairly general strategy; note that while we require that sampling decisions be independent of one another's outcomes, they don't have to be independent of previous allocations, total bytes allocated, etc. You can imagine strategies that:
+
+  - Sample allocations at program startup at a higher rate than subsequent allocations
+  - Sample even-indexed allocations more frequently than odd-indexed ones (so long as no allocation has zero sampling probability)
+  - Let threads declare themselves as high-sampling-priority, and sample their allocations at an increased rate.
+
+These can all be fit into this framework to give an unbiased estimator.
+
+## Evaluating sampling strategies
+Not all strategies for picking allocations to sample are equally good, of course. Among unbiased estimators, the lower the variance, the lower the mean squared error. Using the estimator above, the variance is:
+
+$$
+\begin{aligned}
+& \mathrm{Var}[\sum_i S_i I_i \frac{1}{\mathrm{E}[I_i]}]  \\
+=& \sum_i \mathrm{Var}[S_i I_i \frac{1}{\mathrm{E}[I_i]}] \\
+=& \sum_i \frac{S_i^2}{\mathrm{E}[I_i]^2} \mathrm{Var}[I_i] \\
+=& \sum_i \frac{S_i^2}{\mathrm{E}[I_i]^2} \mathrm{Var}[I_i] \\
+=& \sum_i \frac{S_i^2}{\mathrm{E}[I_i]^2} \mathrm{E}[I_i](1 - \mathrm{E}[I_i]) \\
+=& \sum_i S_i^2 \frac{1 - \mathrm{E}[I_i]}{\mathrm{E}[I_i]}.
+\end{aligned}
+$$
+
+We can use this formula to compare various strategy choices. All else being equal, lower-variance strategies are better.
+
+## Possible sampling strategies
+Because of the desire to avoid the fast-path costs, we'd like to use our Bernoulli trick if possible. There are two obvious counters to use: a coinflip per allocation, and a coinflip per byte allocated.
+
+### Bernoulli sampling per-allocation
+An obvious strategy is to pick some large $N$, and give each allocation a $1/N$ chance of being sampled. This would let us use our Bernoulli-via-Geometric trick. Using the formula from above, we can compute the variance as:
+
+$$ \sum_i S_i^2 \frac{1 - \frac{1}{N}}{\frac{1}{N}}  = (N-1) \sum_i S_i^2.$$
+
+That is, an allocation of size $Z$ contributes a term of $(N-1)Z^2$ to the variance.
+
+### Bernoulli sampling per-byte
+Another option we have is to pick some rate $R$, and give each byte a $1/R$ chance of being picked for sampling (at which point we would sample its contained allocation). The chance of an allocation of size $Z$ being sampled, then, is
+
+$$1-(1-\frac{1}{R})^{Z}$$
+
+and an allocation of size $Z$ contributes a term of
+
+$$Z^2 \frac{(1-\frac{1}{R})^{Z}}{1-(1-\frac{1}{R})^{Z}}.$$
+
+In practical settings, $R$ is large, and so this is well-approximated by
+
+$$Z^2 \frac{e^{-Z/R}}{1 - e^{-Z/R}} .$$
+
+Just to get a sense of the dynamics here, let's look at the behavior for various values of $Z$. When $Z$ is small relative to $R$, we can use $e^z \approx 1 + x$, and conclude that the variance contributed by a small-$Z$ allocation is around
+
+$$Z^2 \frac{1-Z/R}{Z/R} \approx RZ.$$
+
+When $Z$ is comparable to $R$, the variance term is near $Z^2$ (we have $\frac{e^{-Z/R}}{1 - e^{-Z/R}} = 1$ when $Z/R = \ln 2 \approx 0.693$). When $Z$ is large relative to $R$, the variance term goes to zero.
+
+## Picking a sampling strategy
+The fast-path/slow-path dynamics of allocation patterns point us towards the per-byte sampling approach:
+
+  - The quadratic increase in variance per allocation in the first approach is quite costly when heaps have a non-negligible portion of their bytes in those allocations, which is practically often the case.
+  - The Bernoulli-per-byte approach shifts more of its samples towards large allocations, which are already a slow-path.
+  - We drive several tickers (e.g. tcache gc) by bytes allocated, and report bytes-allocated as a user-visible statistic, so we have to do all the necessary bookkeeping anyways.
+
+Indeed, this is the approach we use in jemalloc. Our heap dumps record the size of the allocation and the sampling rate $R$, and jeprof unbiases by dividing by $1 - e^{-Z/R}$.  The framework above would suggest dividing by $1-(1-1/R)^Z$; instead, we use the fact that $R$ is large in practical situations, and so $e^{-Z/R}$ is a good approximation (and faster to compute).  (Equivalently, we may also see this as the factor that falls out from viewing sampling as a Poisson process directly).
+
+## Consequences for heap dump consumers
+Using this approach means that there are a few things users need to be aware of.
+
+### Stack counts are not proportional to allocation frequencies
+If one stack appears twice as often as another, this by itself does not imply that it allocates twice as often. Consider the case in which there are only two types of allocating call stacks in a program. Stack A allocates 8 bytes, and occurs a million times in a program. Stack B allocates 8 MB, and occurs just once in a program. If our sampling rate $R$ is about 1MB, we expect stack A to show up about 8 times, and stack B to show up once. Stack A isn't 8 times more frequent than stack B, though; it's a million times more frequent.
+
+### Aggregation must be done after unbiasing samples
+Some tools manually parse heap dump output, and aggregate across stacks (or across program runs) to provide wider-scale data analyses. When doing this aggregation, though, it's important to unbias-and-then-sum, rather than sum-and-then-unbias. Reusing our example from the previous section: suppose we collect heap dumps of the program from a million machines. We then have 8 million occurs of stack A (each of 8 bytes), and a million occurrences of stack B (each of 8 MB). If we sum first, we'll attribute 64 MB to stack A, and 8 TB to stack B. Unbiasing changes these numbers by an infinitesimal amount, so that sum-then-unbias dramatically underreports the amount of memory allocated by stack A.
+
+## An avenue for future exploration
+While the framework we laid out above is pretty general, as an engineering decision we're only interested in fairly simple approaches (i.e. ones for which the chance of an allocation being sampled depends only on its size). Our job is then: for each size class $Z$, pick a probability $p_Z$ that an allocation of that size will be sampled. We made some handwave-y references to statistical distributions to justify our choices, but there's no reason we need to pick them that way. Any set of non-zero probabilities is a valid choice.
+The real limiting factor in our ability to reduce estimator variance is that fact that sampling is expensive; we want to make sure we only do it on a small fraction of allocations. Our goal, then, is to pick the $p_Z$ to minimize variance given some maximum sampling rate $P$. If we define $a_Z$ to be the fraction of allocations of size $Z$, and $l_Z$ to be the fraction of allocations of size $Z$ still alive at the time of a heap dump, then we can phrase this as an optimization problem over the choices of $p_Z$:
+
+Minimize
+
+$$ \sum_Z Z^2 l_Z \frac{1-p_Z}{p_Z} $$
+
+subject to
+
+$$ \sum_Z a_Z p_Z \leq P $$
+
+Ignoring a term that doesn't depend on $p_Z$, the objective is minimized whenever
+
+$$ \sum_Z Z^2 l_Z \frac{1}{p_Z} $$
+
+is. For a particular program, $l_Z$ and $a_Z$ are just numbers that can be obtained (exactly) from existing stats introspection facilities, and we have a fairly tractable convex optimization problem (it can be framed as a second-order cone program). It would be interesting to evaluate, for various common allocation patterns, how well our current strategy adapts. Do our actual choices for $p_Z$ closely correspond to the optimal ones? How close is the variance of our choices to the variance of the optimal strategy?
+You can imagine an implementation that actually goes all the way, and makes $p_Z$ selections a tuning parameter. I don't think this is a good use of development time for the foreseeable future; but I do wonder about the answers to some of these questions.
+
+## Implementation realities
+
+The nice story above is at least partially a lie. Initially, jeprof (copying its logic from pprof)  had the sum-then-unbias error described above.  The current version of jemalloc does the unbiasing step on a per-allocation basis internally, so that we're always tracking what the unbiased numbers "should" be.  The problem is, actually surfacing those unbiased numbers would require a breaking change to jeprof (and the various already-deployed tools that have copied its logic). Instead, we use a little bit more trickery. Since we know at dump time the numbers we want jeprof to report, we simply choose the values we'll output so that the jeprof numbers will match the true numbers.  The math is described in `src/prof_data.c` (where the only cleverness is a change of variables that lets the exponentials fall out).
+
+This has the effect of making the output of jeprof (and related tools) correct, while making its inputs incorrect.  This can be annoying to human readers of raw profiling dump output.

From 1541ffc76571d8a2a0baad4a13a379305b7df5f2 Mon Sep 17 00:00:00 2001
From: Hao Liu <hliu@os.amperecomputing.com>
Date: Wed, 9 Sep 2020 12:21:41 +0800
Subject: [PATCH 1865/2608] configure: add --with-lg-slab-maxregs configure
 option.

Specify the maximum number of regions in a slab, which is
(<lg-page> - <lg-tiny-min>) by default. This increases the limit of slab sizes
specified by "slab_sizes" in malloc_conf. This should never be less than
the default value. The max value of this option is related to LG_BITMAP_MAXBITS
(see more in bitmap.h).

For example, on a 4k page size system, if we:
  1) configure jemalloc with with --with-lg-slab-maxregs=12.
  2) export MALLOC_CONF="slab_sizes:9-16:4"
The slab size of 16 bytes is set to 4 pages. Previously, the default
lg-slab-maxregs is 9 (i.e. 12 - 3). The max slab size of 16 bytes is 2 pages
(i.e. (1<<9) * 16 bytes). By increasing the value from 9 to 12, the max slab
size can be set by MALLOC_CONF is 16 pages (i.e. (1<<12) * 16 bytes).
---
 INSTALL.md                                            |  7 +++++++
 configure.ac                                          |  9 +++++++++
 include/jemalloc/internal/jemalloc_internal_defs.h.in |  3 +++
 include/jemalloc/internal/sc.h                        | 10 ++++++++--
 4 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/INSTALL.md b/INSTALL.md
index eb55acfd..2aaa33e1 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -250,6 +250,13 @@ any of the following arguments (not a definitive list) to 'configure':
     configuration, jemalloc will provide additional size classes that are not
     16-byte-aligned (24, 40, and 56).
 
+* `--with-lg-slab-maxregs=<lg-slab-maxregs>`
+
+    Specify the maximum number of regions in a slab, which is
+    (<lg-page> - <lg-tiny-min>) by default. This increases the limit of slab
+    sizes specified by "slab_sizes" in malloc_conf. This should never be less
+    than the default value.
+
 * `--with-lg-vaddr=<lg-vaddr>`
 
     Specify the number of significant virtual address bits.  By default, the
diff --git a/configure.ac b/configure.ac
index d68d376c..7c203020 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1586,6 +1586,15 @@ if test "x$with_lg_quantum" != "x" ; then
   AC_DEFINE_UNQUOTED([LG_QUANTUM], [$with_lg_quantum])
 fi
 
+AC_ARG_WITH([lg_slab_maxregs],
+  [AS_HELP_STRING([--with-lg-slab-maxregs=<lg-slab-maxregs>],
+   [Base 2 log of maximum number of regions in a slab (used with malloc_conf slab_sizes)])],
+  [LG_SLAB_MAXREGS="with_lg_slab_maxregs"],
+  [LG_SLAB_MAXREGS=""])
+if test "x$with_lg_slab_maxregs" != "x" ; then
+  AC_DEFINE_UNQUOTED([LG_SLAB_MAXREGS], [$with_lg_slab_maxregs])
+fi
+
 AC_ARG_WITH([lg_page],
   [AS_HELP_STRING([--with-lg-page=<lg-page>], [Base 2 log of system page size])],
   [LG_PAGE="$with_lg_page"], [LG_PAGE="detect"])
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index ee052bb8..7a4ebf17 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -182,6 +182,9 @@
 /* One page is 2^LG_PAGE bytes. */
 #undef LG_PAGE
 
+/* Maximum number of regions in a slab. */
+#undef LG_SLAB_MAXREGS
+
 /*
  * One huge page is 2^LG_HUGEPAGE bytes.  Note that this is defined even if the
  * system does not explicitly support huge pages; system calls that require
diff --git a/include/jemalloc/internal/sc.h b/include/jemalloc/internal/sc.h
index 138da5c0..133763da 100644
--- a/include/jemalloc/internal/sc.h
+++ b/include/jemalloc/internal/sc.h
@@ -270,8 +270,14 @@
 #define SC_LARGE_MAXCLASS (SC_MAX_BASE + (SC_NGROUP - 1) * SC_MAX_DELTA)
 
 /* Maximum number of regions in one slab. */
-#define SC_LG_SLAB_MAXREGS (LG_PAGE - SC_LG_TINY_MIN)
-#define SC_SLAB_MAXREGS (1U << LG_SLAB_MAXREGS)
+#ifndef LG_SLAB_MAXREGS
+#  define SC_LG_SLAB_MAXREGS (LG_PAGE - SC_LG_TINY_MIN)
+#elif (LG_SLAB_MAXREGS < (LG_PAGE - SC_LG_TINY_MIN))
+#  error "Unsupported SC_LG_SLAB_MAXREGS"
+#else
+#  define SC_LG_SLAB_MAXREGS LG_SLAB_MAXREGS
+#endif
+#define SC_SLAB_MAXREGS (1U << SC_LG_SLAB_MAXREGS)
 
 
 typedef struct sc_s sc_t;

From 36ebb5abe319d473c8535488e2dc1f4f0bc4e9d4 Mon Sep 17 00:00:00 2001
From: ezeeyahoo <guptaeshant@gmail.com>
Date: Fri, 11 Sep 2020 13:37:10 +0530
Subject: [PATCH 1866/2608] CI support for PPC64LE architecture

---
 .travis.yml           | 191 +++++++++++++++++++++++++++++++++++++++---
 scripts/gen_travis.py |  43 ++++++++--
 2 files changed, 214 insertions(+), 20 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 777aa3ec..b61627bd 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,195 +4,360 @@ dist: precise
 matrix:
   include:
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: ppc64le
+      addons: &gcc_ppc
+        apt:
+          packages:
+            - g++-8
+      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      arch: amd64
       addons: &gcc_multilib
         apt:
           packages:
             - gcc-multilib
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
+      arch: amd64
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-      addons: *gcc_multilib
+      arch: ppc64le
+      addons: &gcc_ppc
+        apt:
+          packages:
+            - g++-8
+      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: ppc64le
+      addons: &gcc_ppc
+        apt:
+          packages:
+            - g++-8
+      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      addons: &gcc_ppc
+        apt:
+          packages:
+            - g++-8
+      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      addons: &gcc_ppc
+        apt:
+          packages:
+            - g++-8
+      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      addons: &gcc_ppc
+        apt:
+          packages:
+            - g++-8
+      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      addons: &gcc_ppc
+        apt:
+          packages:
+            - g++-8
+      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      addons: &gcc_ppc
+        apt:
+          packages:
+            - g++-8
+      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      addons: &gcc_ppc
+        apt:
+          packages:
+            - g++-8
+      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      addons: &gcc_ppc
+        apt:
+          packages:
+            - g++-8
+      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      addons: &gcc_ppc
+        apt:
+          packages:
+            - g++-8
+      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
+      addons: *gcc_multilib
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
+      addons: *gcc_multilib
       env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-      addons: *gcc_multilib
     - os: linux
+      arch: amd64
+      addons: *gcc_multilib
       env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-      addons: *gcc_multilib
     - os: linux
+      arch: amd64
+      addons: *gcc_multilib
       env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-      addons: *gcc_multilib
     - os: linux
+      arch: amd64
+      addons: *gcc_multilib
       env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-      addons: *gcc_multilib
     - os: linux
+      arch: amd64
+      addons: *gcc_multilib
       env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-      addons: *gcc_multilib
     - os: linux
+      arch: amd64
+      addons: *gcc_multilib
       env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-      addons: *gcc_multilib
     - os: linux
+      arch: amd64
+      addons: *gcc_multilib
       env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-      addons: *gcc_multilib
     - os: linux
+      arch: amd64
+      addons: *gcc_multilib
       env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-      addons: *gcc_multilib
     - os: linux
+      arch: amd64
+      addons: *gcc_multilib
       env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-      addons: *gcc_multilib
     - os: linux
+      arch: amd64
+      addons: *gcc_multilib
       env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-      addons: *gcc_multilib
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
+      arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     # Development build
     - os: linux
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index b46bd001..6832f91b 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -36,8 +36,12 @@ MAX_UNUSUAL_OPTIONS = 2
 os_default = 'linux'
 os_unusual = 'osx'
 
+arch_default = 'amd64'
+arch_unusual = 'ppc64le'
+
 compilers_default = 'CC=gcc CXX=g++'
 compilers_unusual = 'CC=clang CXX=clang++'
+compilers_ppc_default = 'CC=gcc-8 CXX=g++-8'
 
 compiler_flag_unusuals = ['-m32']
 
@@ -58,7 +62,7 @@ malloc_conf_unusuals = [
 ]
 
 all_unusuals = (
-    [os_unusual] + [compilers_unusual] + compiler_flag_unusuals
+    [os_unusual] + [arch_unusual] + [compilers_unusual] + compiler_flag_unusuals
     + configure_flag_unusuals + malloc_conf_unusuals
 )
 
@@ -67,13 +71,15 @@ for i in xrange(MAX_UNUSUAL_OPTIONS + 1):
     unusual_combinations_to_test += combinations(all_unusuals, i)
 
 gcc_multilib_set = False
+gcc_ppc_set = False
 # Formats a job from a combination of flags
 def format_job(combination):
     global gcc_multilib_set
+    global gcc_ppc_set
 
     os = os_unusual if os_unusual in combination else os_default
     compilers = compilers_unusual if compilers_unusual in combination else compilers_default
-
+    arch = arch_unusual if arch_unusual in combination else arch_default
     compiler_flags = [x for x in combination if x in compiler_flag_unusuals]
     configure_flags = [x for x in combination if x in configure_flag_unusuals]
     malloc_conf = [x for x in combination if x in malloc_conf_unusuals]
@@ -90,14 +96,18 @@ def format_job(combination):
     if os == 'osx' and '--enable-prof' in configure_flags:
         return ""
 
-    # We get some spurious errors when -Warray-bounds is enabled.
-    env_string = ('{} COMPILER_FLAGS="{}" CONFIGURE_FLAGS="{}" '
-	'EXTRA_CFLAGS="-Werror -Wno-array-bounds"').format(
-        compilers, " ".join(compiler_flags), " ".join(configure_flags))
+    # Filter out unsupported OSX configuration on PPC64LE
+    if arch == 'ppc64le' and (
+        os == 'osx'
+        or '-m32' in combination
+        or compilers_unusual in combination
+        ):
+        return ""
 
     job = ""
     job += '    - os: %s\n' % os
-    job += '      env: %s\n' % env_string
+    job += '      arch: %s\n' % arch
+
     if '-m32' in combination and os == 'linux':
         job += '      addons:'
         if gcc_multilib_set:
@@ -108,6 +118,25 @@ def format_job(combination):
             job += '          packages:\n'
             job += '            - gcc-multilib\n'
             gcc_multilib_set = True
+
+    if arch == 'ppc64le':
+        job += '      addons:'
+        if gcc_ppc_set:
+            job += ' *gcc_ppc\n'
+        else:
+            job += ' &gcc_ppc\n'
+            job += '        apt:\n'
+            job += '          packages:\n'
+            job += '            - g++-8\n'
+        # Compilers overwritten for PPC64LE to gcc-8
+        compilers = compilers_ppc_default
+
+    # We get some spurious errors when -Warray-bounds is enabled.
+    env_string = ('{} COMPILER_FLAGS="{}" CONFIGURE_FLAGS="{}" '
+                'EXTRA_CFLAGS="-Werror -Wno-array-bounds"').format(
+                compilers, " ".join(compiler_flags), " ".join(configure_flags))
+
+    job += '      env: %s\n' % env_string
     return job
 
 include_rows = ""

From 40cf71a06d07faadc03b81f97697826c53b3fa62 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 16 Sep 2020 14:03:59 -0700
Subject: [PATCH 1867/2608] Remove --with-slab-maxregs options from INSTALL.md

The variable slab sizes feature is still experimental; we don't want people to
start using it willy-nilly, or document its existence as a guarantee.
---
 INSTALL.md | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/INSTALL.md b/INSTALL.md
index 2aaa33e1..eb55acfd 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -250,13 +250,6 @@ any of the following arguments (not a definitive list) to 'configure':
     configuration, jemalloc will provide additional size classes that are not
     16-byte-aligned (24, 40, and 56).
 
-* `--with-lg-slab-maxregs=<lg-slab-maxregs>`
-
-    Specify the maximum number of regions in a slab, which is
-    (<lg-page> - <lg-tiny-min>) by default. This increases the limit of slab
-    sizes specified by "slab_sizes" in malloc_conf. This should never be less
-    than the default value.
-
 * `--with-lg-vaddr=<lg-vaddr>`
 
     Specify the number of significant virtual address bits.  By default, the

From 7ad2f7866343265f570dc83b2f2df163ef0c03f9 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 16 Sep 2020 15:19:06 -0700
Subject: [PATCH 1868/2608] Avoid a -Wundef warning on LG_SLAB_MAXREGS.

---
 configure.ac                                         |  6 +++---
 .../jemalloc/internal/jemalloc_internal_defs.h.in    |  2 +-
 include/jemalloc/internal/sc.h                       | 12 +++++++-----
 3 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/configure.ac b/configure.ac
index 7c203020..d55c0b8e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1589,10 +1589,10 @@ fi
 AC_ARG_WITH([lg_slab_maxregs],
   [AS_HELP_STRING([--with-lg-slab-maxregs=<lg-slab-maxregs>],
    [Base 2 log of maximum number of regions in a slab (used with malloc_conf slab_sizes)])],
-  [LG_SLAB_MAXREGS="with_lg_slab_maxregs"],
-  [LG_SLAB_MAXREGS=""])
+  [CONFIG_LG_SLAB_MAXREGS="with_lg_slab_maxregs"],
+  [CONFIG_LG_SLAB_MAXREGS=""])
 if test "x$with_lg_slab_maxregs" != "x" ; then
-  AC_DEFINE_UNQUOTED([LG_SLAB_MAXREGS], [$with_lg_slab_maxregs])
+  AC_DEFINE_UNQUOTED([CONFIG_LG_SLAB_MAXREGS], [$with_lg_slab_maxregs])
 fi
 
 AC_ARG_WITH([lg_page],
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 7a4ebf17..7af28f73 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -183,7 +183,7 @@
 #undef LG_PAGE
 
 /* Maximum number of regions in a slab. */
-#undef LG_SLAB_MAXREGS
+#undef CONFIG_LG_SLAB_MAXREGS
 
 /*
  * One huge page is 2^LG_HUGEPAGE bytes.  Note that this is defined even if the
diff --git a/include/jemalloc/internal/sc.h b/include/jemalloc/internal/sc.h
index 133763da..031ffff4 100644
--- a/include/jemalloc/internal/sc.h
+++ b/include/jemalloc/internal/sc.h
@@ -270,15 +270,17 @@
 #define SC_LARGE_MAXCLASS (SC_MAX_BASE + (SC_NGROUP - 1) * SC_MAX_DELTA)
 
 /* Maximum number of regions in one slab. */
-#ifndef LG_SLAB_MAXREGS
+#ifndef CONFIG_LG_SLAB_MAXREGS
 #  define SC_LG_SLAB_MAXREGS (LG_PAGE - SC_LG_TINY_MIN)
-#elif (LG_SLAB_MAXREGS < (LG_PAGE - SC_LG_TINY_MIN))
-#  error "Unsupported SC_LG_SLAB_MAXREGS"
 #else
-#  define SC_LG_SLAB_MAXREGS LG_SLAB_MAXREGS
+#  if CONFIG_LG_SLAB_MAXREGS < (LG_PAGE - SC_LG_TINY_MIN)
+#    error "Unsupported SC_LG_SLAB_MAXREGS"
+#  else
+#    define SC_LG_SLAB_MAXREGS CONFIG_LG_SLAB_MAXREGS
+#  endif
 #endif
-#define SC_SLAB_MAXREGS (1U << SC_LG_SLAB_MAXREGS)
 
+#define SC_SLAB_MAXREGS (1U << SC_LG_SLAB_MAXREGS)
 
 typedef struct sc_s sc_t;
 struct sc_s {

From e034500698fe74d4a82cf44131eda0110862f4e8 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 9 Jul 2020 18:07:17 -0700
Subject: [PATCH 1869/2608] Edata: rename "ranged" bit to "pai".

This better represents its intended purpose; the hugepage allocator design
evolved away from needing contiguity of hugepage virtual address space.
---
 include/jemalloc/internal/edata.h | 53 +++++++++++++++++++------------
 src/emap.c                        |  2 +-
 src/extent.c                      | 15 ++++-----
 src/extent_dss.c                  |  6 ++--
 test/unit/rtree.c                 |  8 ++---
 test/unit/slab.c                  |  2 +-
 6 files changed, 48 insertions(+), 38 deletions(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index bb7da1d5..f1ae56a4 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -26,6 +26,16 @@ enum extent_head_state_e {
 };
 typedef enum extent_head_state_e extent_head_state_t;
 
+/*
+ * Which implementation of the page allocator interface, (PAI, defined in
+ * pai.h) owns the given extent?
+ */
+enum extent_pai_e {
+	EXTENT_PAI_PAC = 0,
+	EXTENT_PAI_HPA = 1
+};
+typedef enum extent_pai_e extent_pai_t;
+
 struct e_prof_info_s {
 	/* Time when this was allocated. */
 	nstime_t	e_prof_alloc_time;
@@ -68,7 +78,7 @@ struct edata_s {
 	 * a: arena_ind
 	 * b: slab
 	 * c: committed
-	 * r: ranged
+	 * p: pai
 	 * z: zeroed
 	 * t: state
 	 * i: szind
@@ -76,7 +86,7 @@ struct edata_s {
 	 * s: bin_shard
 	 * n: sn
 	 *
-	 * nnnnnnnn ... nnnnnnss ssssffff ffffffii iiiiiitt zrcbaaaa aaaaaaaa
+	 * nnnnnnnn ... nnnnnnss ssssffff ffffffii iiiiiitt zpcbaaaa aaaaaaaa
 	 *
 	 * arena_ind: Arena from which this extent came, or all 1 bits if
 	 *            unassociated.
@@ -91,10 +101,7 @@ struct edata_s {
 	 *            as on a system that overcommits and satisfies physical
 	 *            memory needs on demand via soft page faults.
 	 *
-	 * ranged: Whether or not this extent is currently owned by the range
-	 *         allocator.  This may be false even if the extent originally
-	 *         came from a range allocator; this indicates its *current*
-	 *         owner, not its original owner.
+	 * pai: The pai flag is an extent_pai_t.
 	 *
 	 * zeroed: The zeroed flag is used by extent recycling code to track
 	 *         whether memory is zero-filled.
@@ -136,12 +143,12 @@ struct edata_s {
 #define EDATA_BITS_COMMITTED_SHIFT  (EDATA_BITS_SLAB_WIDTH + EDATA_BITS_SLAB_SHIFT)
 #define EDATA_BITS_COMMITTED_MASK  MASK(EDATA_BITS_COMMITTED_WIDTH, EDATA_BITS_COMMITTED_SHIFT)
 
-#define EDATA_BITS_RANGED_WIDTH  1
-#define EDATA_BITS_RANGED_SHIFT  (EDATA_BITS_COMMITTED_WIDTH + EDATA_BITS_COMMITTED_SHIFT)
-#define EDATA_BITS_RANGED_MASK  MASK(EDATA_BITS_RANGED_WIDTH, EDATA_BITS_RANGED_SHIFT)
+#define EDATA_BITS_PAI_WIDTH  1
+#define EDATA_BITS_PAI_SHIFT  (EDATA_BITS_COMMITTED_WIDTH + EDATA_BITS_COMMITTED_SHIFT)
+#define EDATA_BITS_PAI_MASK  MASK(EDATA_BITS_PAI_WIDTH, EDATA_BITS_PAI_SHIFT)
 
 #define EDATA_BITS_ZEROED_WIDTH  1
-#define EDATA_BITS_ZEROED_SHIFT  (EDATA_BITS_RANGED_WIDTH + EDATA_BITS_RANGED_SHIFT)
+#define EDATA_BITS_ZEROED_SHIFT  (EDATA_BITS_PAI_WIDTH + EDATA_BITS_PAI_SHIFT)
 #define EDATA_BITS_ZEROED_MASK  MASK(EDATA_BITS_ZEROED_WIDTH, EDATA_BITS_ZEROED_SHIFT)
 
 #define EDATA_BITS_STATE_WIDTH  2
@@ -291,10 +298,10 @@ edata_committed_get(const edata_t *edata) {
 	    EDATA_BITS_COMMITTED_SHIFT);
 }
 
-static inline bool
-edata_ranged_get(const edata_t *edata) {
-	return (bool)((edata->e_bits & EDATA_BITS_RANGED_MASK) >>
-	    EDATA_BITS_RANGED_SHIFT);
+static inline extent_pai_t
+edata_pai_get(const edata_t *edata) {
+	return (extent_pai_t)((edata->e_bits & EDATA_BITS_PAI_MASK) >>
+	    EDATA_BITS_PAI_SHIFT);
 }
 
 static inline bool
@@ -488,9 +495,9 @@ edata_committed_set(edata_t *edata, bool committed) {
 }
 
 static inline void
-edata_ranged_set(edata_t *edata, bool ranged) {
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_RANGED_MASK) |
-	    ((uint64_t)ranged << EDATA_BITS_RANGED_SHIFT);
+edata_pai_set(edata_t *edata, extent_pai_t pai) {
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_PAI_MASK) |
+	    ((uint64_t)pai << EDATA_BITS_PAI_SHIFT);
 }
 
 static inline void
@@ -538,9 +545,8 @@ edata_is_head_set(edata_t *edata, bool is_head) {
 static inline void
 edata_init(edata_t *edata, unsigned arena_ind, void *addr, size_t size,
     bool slab, szind_t szind, size_t sn, extent_state_t state, bool zeroed,
-    bool committed, bool ranged, extent_head_state_t is_head) {
+    bool committed, extent_pai_t pai, extent_head_state_t is_head) {
 	assert(addr == PAGE_ADDR2BASE(addr) || !slab);
-	assert(ranged == false);
 
 	edata_arena_ind_set(edata, arena_ind);
 	edata_addr_set(edata, addr);
@@ -551,7 +557,7 @@ edata_init(edata_t *edata, unsigned arena_ind, void *addr, size_t size,
 	edata_state_set(edata, state);
 	edata_zeroed_set(edata, zeroed);
 	edata_committed_set(edata, committed);
-	edata_ranged_set(edata, ranged);
+	edata_pai_set(edata, pai);
 	edata_is_head_set(edata, is_head == EXTENT_IS_HEAD);
 	if (config_prof) {
 		edata_prof_tctx_set(edata, NULL);
@@ -569,7 +575,12 @@ edata_binit(edata_t *edata, void *addr, size_t bsize, size_t sn) {
 	edata_state_set(edata, extent_state_active);
 	edata_zeroed_set(edata, true);
 	edata_committed_set(edata, true);
-	edata_ranged_set(edata, false);
+	/*
+	 * This isn't strictly true, but base allocated extents never get
+	 * deallocated and can't be looked up in the emap, but no sense in
+	 * wasting a state bit to encode this fact.
+	 */
+	edata_pai_set(edata, EXTENT_PAI_PAC);
 }
 
 static inline int
diff --git a/src/emap.c b/src/emap.c
index ec1b4cdb..4e7ca8d0 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -249,7 +249,7 @@ emap_split_prepare(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
 	 */
 	edata_t lead = {0};
 	edata_init(&lead, 0U, edata_addr_get(edata), size_a, false, 0, 0,
-	    extent_state_active, false, false, false, EXTENT_NOT_HEAD);
+	    extent_state_active, false, false, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 
 	emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, &lead, false, true,
 	    &prepare->lead_elm_a, &prepare->lead_elm_b);
diff --git a/src/extent.c b/src/extent.c
index 26a5c13f..58ec8205 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -86,7 +86,7 @@ ecache_alloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 	bool commit = true;
 	edata_t *edata = extent_recycle(tsdn, pac, ehooks, ecache, new_addr,
 	    size, alignment, zero, &commit, false);
-	assert(edata == NULL || !edata_ranged_get(edata));
+	assert(edata == NULL || edata_pai_get(edata) == EXTENT_PAI_PAC);
 	return edata;
 }
 
@@ -115,7 +115,7 @@ ecache_alloc_grow(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 		    size, alignment, zero, &commit);
 	}
 
-	assert(edata == NULL || !edata_ranged_get(edata));
+	assert(edata == NULL || edata_pai_get(edata) == EXTENT_PAI_PAC);
 	return edata;
 }
 
@@ -124,7 +124,7 @@ ecache_dalloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
     edata_t *edata) {
 	assert(edata_base_get(edata) != NULL);
 	assert(edata_size_get(edata) != 0);
-	assert(!edata_ranged_get(edata));
+	assert(edata_pai_get(edata) == EXTENT_PAI_PAC);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
@@ -650,7 +650,7 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 
 	edata_init(edata, ecache_ind_get(&pac->ecache_retained), ptr,
 	    alloc_size, false, SC_NSIZES, extent_sn_next(pac),
-	    extent_state_active, zeroed, committed, /* ranged */ false,
+	    extent_state_active, zeroed, committed, EXTENT_PAI_PAC,
 	    EXTENT_IS_HEAD);
 
 	if (extent_register_no_gdump_add(tsdn, pac, edata)) {
@@ -790,7 +790,7 @@ extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	}
 	edata_init(edata, ecache_ind_get(&pac->ecache_dirty), addr,
 	    size, /* slab */ false, SC_NSIZES, extent_sn_next(pac),
-	    extent_state_active, zero, *commit, /* ranged */ false,
+	    extent_state_active, zero, *commit, EXTENT_PAI_PAC,
 	    EXTENT_NOT_HEAD);
 	if (extent_register(tsdn, pac, edata)) {
 		edata_cache_put(tsdn, pac->edata_cache, edata);
@@ -1026,7 +1026,7 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 void
 extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata) {
-	assert(!edata_ranged_get(edata));
+	assert(edata_pai_get(edata) == EXTENT_PAI_PAC);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
@@ -1180,8 +1180,7 @@ extent_split_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	    (void *)((uintptr_t)edata_base_get(edata) + size_a), size_b,
 	    /* slab */ false, SC_NSIZES, edata_sn_get(edata),
 	    edata_state_get(edata), edata_zeroed_get(edata),
-	    edata_committed_get(edata), edata_ranged_get(edata),
-	    EXTENT_NOT_HEAD);
+	    edata_committed_get(edata), EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 	emap_prepare_t prepare;
 	bool err = emap_split_prepare(tsdn, pac->emap, &prepare, edata,
 	    size_a, trail, size_b);
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 7427cd8f..9857fd29 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -156,8 +156,8 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 				    gap_addr_page, gap_size_page, false,
 				    SC_NSIZES, extent_sn_next(
 					&arena->pa_shard.pac),
-				    extent_state_active, false, true, false,
-				    EXTENT_NOT_HEAD);
+				    extent_state_active, false, true,
+				    EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 			}
 			/*
 			 * Compute the address just past the end of the desired
@@ -206,7 +206,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					    arena_ind_get(arena), ret, size,
 					    size, false, SC_NSIZES,
 					    extent_state_active, false, true,
-					    false, EXTENT_NOT_HEAD);
+					    EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 					if (extent_purge_forced_wrapper(tsdn,
 					    ehooks, &edata, 0, size)) {
 						memset(ret, 0, size);
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 63d6e37b..775bc190 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -36,9 +36,9 @@ TEST_BEGIN(test_rtree_extrema) {
 	edata_t edata_a = {0}, edata_b = {0};
 	edata_init(&edata_a, INVALID_ARENA_IND, NULL, SC_LARGE_MINCLASS,
 	    false, sz_size2index(SC_LARGE_MINCLASS), 0,
-	    extent_state_active, false, false, false, EXTENT_NOT_HEAD);
+	    extent_state_active, false, false, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 	edata_init(&edata_b, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
-	    extent_state_active, false, false, false, EXTENT_NOT_HEAD);
+	    extent_state_active, false, false, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 
 	tsdn_t *tsdn = tsdn_fetch();
 
@@ -93,7 +93,7 @@ TEST_BEGIN(test_rtree_bits) {
 
 	edata_t edata = {0};
 	edata_init(&edata, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
-	    extent_state_active, false, false, false, EXTENT_NOT_HEAD);
+	    extent_state_active, false, false, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 
 	rtree_t *rtree = &test_rtree;
 	rtree_ctx_t rtree_ctx;
@@ -143,7 +143,7 @@ TEST_BEGIN(test_rtree_random) {
 
 	edata_t edata = {0};
 	edata_init(&edata, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
-	    extent_state_active, false, false, false, EXTENT_NOT_HEAD);
+	    extent_state_active, false, false, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 
 	expect_false(rtree_new(rtree, base, false),
 	    "Unexpected rtree_new() failure");
diff --git a/test/unit/slab.c b/test/unit/slab.c
index 5ca8c441..6baa9d3a 100644
--- a/test/unit/slab.c
+++ b/test/unit/slab.c
@@ -12,7 +12,7 @@ TEST_BEGIN(test_arena_slab_regind) {
 		edata_init(&slab, INVALID_ARENA_IND,
 		    mallocx(bin_info->slab_size, MALLOCX_LG_ALIGN(LG_PAGE)),
 		    bin_info->slab_size, true,
-		    binind, 0, extent_state_active, false, true, false,
+		    binind, 0, extent_state_active, false, true, EXTENT_PAI_PAC,
 		    EXTENT_NOT_HEAD);
 		expect_ptr_not_null(edata_addr_get(&slab),
 		    "Unexpected malloc() failure");

From ed99d300b93777787aad82549a4b0c4be129df35 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 17 Sep 2020 18:12:06 -0700
Subject: [PATCH 1870/2608] Flat bitmap: Add longest-range computation.

This will come in handy in the (upcoming) page-slab set assertions.
---
 include/jemalloc/internal/flat_bitmap.h | 25 +++++++++++++++
 test/unit/flat_bitmap.c                 | 41 +++++++++++++++++++++++++
 2 files changed, 66 insertions(+)

diff --git a/include/jemalloc/internal/flat_bitmap.h b/include/jemalloc/internal/flat_bitmap.h
index 7b894d53..0faf447e 100644
--- a/include/jemalloc/internal/flat_bitmap.h
+++ b/include/jemalloc/internal/flat_bitmap.h
@@ -284,4 +284,29 @@ fb_urange_riter(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
 	    /* val */ false, /* forward */ false);
 }
 
+JEMALLOC_ALWAYS_INLINE size_t
+fb_range_longest_impl(fb_group_t *fb, size_t nbits, bool val) {
+	size_t begin = 0;
+	size_t longest_len = 0;
+	size_t len = 0;
+	while (begin < nbits && fb_iter_range_impl(fb, nbits, begin, &begin,
+	    &len, val, /* forward */ true)) {
+		if (len > longest_len) {
+			longest_len = len;
+		}
+		begin += len;
+	}
+	return longest_len;
+}
+
+static inline size_t
+fb_srange_longest(fb_group_t *fb, size_t nbits) {
+	return fb_range_longest_impl(fb, nbits, /* val */ true);
+}
+
+static inline size_t
+fb_urange_longest(fb_group_t *fb, size_t nbits) {
+	return fb_range_longest_impl(fb, nbits, /* val */ false);
+}
+
 #endif /* JEMALLOC_INTERNAL_FB_H */
diff --git a/test/unit/flat_bitmap.c b/test/unit/flat_bitmap.c
index 410e94ff..2f360d30 100644
--- a/test/unit/flat_bitmap.c
+++ b/test/unit/flat_bitmap.c
@@ -301,6 +301,10 @@ TEST_BEGIN(test_empty_full) {
 }
 TEST_END
 
+/*
+ * This tests both iter_range and the longest range functionality, which is
+ * built closely on top of it.
+ */
 TEST_BEGIN(test_iter_range_simple) {
 	size_t set_limit = 30;
 	size_t nbits = 100;
@@ -318,6 +322,10 @@ TEST_BEGIN(test_iter_range_simple) {
 
 	/* A set of checks with only the first set_limit bits *set*. */
 	fb_set_range(fb, nbits, 0, set_limit);
+	expect_zu_eq(set_limit, fb_srange_longest(fb, nbits),
+	    "Incorrect longest set range");
+	expect_zu_eq(nbits - set_limit, fb_urange_longest(fb, nbits),
+	    "Incorrect longest unset range");
 	for (size_t i = 0; i < set_limit; i++) {
 		result = fb_srange_iter(fb, nbits, i, &begin, &len);
 		expect_true(result, "Should have found a range at %zu", i);
@@ -360,6 +368,10 @@ TEST_BEGIN(test_iter_range_simple) {
 	/* A set of checks with only the first set_limit bits *unset*. */
 	fb_unset_range(fb, nbits, 0, set_limit);
 	fb_set_range(fb, nbits, set_limit, nbits - set_limit);
+	expect_zu_eq(nbits - set_limit, fb_srange_longest(fb, nbits),
+	    "Incorrect longest set range");
+	expect_zu_eq(set_limit, fb_urange_longest(fb, nbits),
+	    "Incorrect longest unset range");
 	for (size_t i = 0; i < set_limit; i++) {
 		result = fb_srange_iter(fb, nbits, i, &begin, &len);
 		expect_true(result, "Should have found a range at %zu", i);
@@ -436,6 +448,27 @@ fb_iter_simple(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
 	return false;
 }
 
+/* Similar, but for finding longest ranges. */
+static size_t
+fb_range_longest_simple(fb_group_t *fb, size_t nbits, bool val) {
+	size_t longest_so_far = 0;
+	for (size_t begin = 0; begin < nbits; begin++) {
+		if (fb_get(fb, nbits, begin) != val) {
+			continue;
+		}
+		size_t end = begin + 1;
+		for (; end < nbits; end++) {
+			if (fb_get(fb, nbits, end) != val) {
+				break;
+			}
+		}
+		if (end - begin > longest_so_far) {
+			longest_so_far = end - begin;
+		}
+	}
+	return longest_so_far;
+}
+
 static void
 expect_iter_results_at(fb_group_t *fb, size_t nbits, size_t pos,
     bool val, bool forward) {
@@ -487,6 +520,10 @@ expect_iter_results(fb_group_t *fb, size_t nbits) {
 		expect_iter_results_at(fb, nbits, i, true, false);
 		expect_iter_results_at(fb, nbits, i, true, true);
 	}
+	expect_zu_eq(fb_range_longest_simple(fb, nbits, true),
+	    fb_srange_longest(fb, nbits), "Longest range mismatch");
+	expect_zu_eq(fb_range_longest_simple(fb, nbits, false),
+	    fb_urange_longest(fb, nbits), "Longest range mismatch");
 }
 
 static void
@@ -527,6 +564,10 @@ do_test_iter_range_exhaustive(size_t nbits) {
 	free(fb);
 }
 
+/*
+ * Like test_iter_range_simple, this tests both iteration and longest-range
+ * computation.
+ */
 TEST_BEGIN(test_iter_range_exhaustive) {
 #define NB(nbits) \
 	do_test_iter_range_exhaustive(nbits);

From 018b162d673e64230b7d202075dca0e846e28e6a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 10 Jul 2020 17:40:13 -0700
Subject: [PATCH 1871/2608] Add psset: a set of pageslabs.

This introduces a new sort of edata_t; a pageslab, and a set to manage them.
This is part of a series of a commits to implement a hugepage allocator; the
pageset will be per-arena, and track small page allocations requests within a
larger extent allocated from a centralized hugepage allocator.
---
 Makefile.in                                   |   2 +
 include/jemalloc/internal/edata.h             |  55 +++-
 include/jemalloc/internal/psset.h             |  61 ++++
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |   3 +
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |   3 +
 src/psset.c                                   | 239 ++++++++++++++
 test/unit/psset.c                             | 306 ++++++++++++++++++
 9 files changed, 670 insertions(+), 1 deletion(-)
 create mode 100644 include/jemalloc/internal/psset.h
 create mode 100644 src/psset.c
 create mode 100644 test/unit/psset.c

diff --git a/Makefile.in b/Makefile.in
index 3697e071..4769d48f 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -136,6 +136,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/prof_log.c \
 	$(srcroot)src/prof_recent.c \
 	$(srcroot)src/prof_sys.c \
+	$(srcroot)src/psset.c \
 	$(srcroot)src/rtree.c \
 	$(srcroot)src/safety_check.c \
 	$(srcroot)src/sc.c \
@@ -239,6 +240,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/prof_tctx.c \
 	$(srcroot)test/unit/prof_thread_name.c \
 	$(srcroot)test/unit/prof_sys_thread_name.c \
+	$(srcroot)test/unit/psset.c \
 	$(srcroot)test/unit/ql.c \
 	$(srcroot)test/unit/qr.c \
 	$(srcroot)test/unit/rb.c \
diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index f1ae56a4..4fee76bf 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -202,7 +202,31 @@ struct edata_s {
 	 * This keeps the size of an edata_t at exactly 128 bytes on
 	 * architectures with 8-byte pointers and 4k pages.
 	 */
-	void *reserved1, *reserved2;
+	void *reserved1;
+	union {
+		/*
+		 * We could steal a low bit from these fields to indicate what
+		 * sort of "thing" this is (a page slab, an object within a page
+		 * slab, or a non-pageslab range).  We don't do this yet, but it
+		 * would enable some extra asserts.
+		 */
+
+		/*
+		 * If this edata is from an HPA, it may be part of some larger
+		 * pageslab.  Track it if so.  Otherwise (either because it's
+		 * not part of a pageslab, or not from the HPA at all), NULL.
+		 */
+		edata_t *ps;
+		/*
+		 * If this edata *is* a pageslab, then it has some longest free
+		 * range in it.  Track it.
+		 */
+		struct {
+			uint32_t longest_free_range;
+			/* Not yet tracked. */
+			/* uint32_t longest_free_range_pos; */
+		};
+	};
 
 	union {
 		/*
@@ -346,6 +370,18 @@ edata_bsize_get(const edata_t *edata) {
 	return edata->e_bsize;
 }
 
+static inline edata_t *
+edata_ps_get(const edata_t *edata) {
+	assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
+	return edata->ps;
+}
+
+static inline uint32_t
+edata_longest_free_range_get(const edata_t *edata) {
+	assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
+	return edata->longest_free_range;
+}
+
 static inline void *
 edata_before_get(const edata_t *edata) {
 	return (void *)((uintptr_t)edata_base_get(edata) - PAGE);
@@ -428,6 +464,19 @@ edata_bsize_set(edata_t *edata, size_t bsize) {
 	edata->e_bsize = bsize;
 }
 
+static inline void
+edata_ps_set(edata_t *edata, edata_t *ps) {
+	assert(edata_pai_get(edata) == EXTENT_PAI_HPA || ps == NULL);
+	edata->ps = ps;
+}
+
+static inline void
+edata_longest_free_range_set(edata_t *edata, uint32_t longest_free_range) {
+	assert(edata_pai_get(edata) == EXTENT_PAI_HPA
+	    || longest_free_range == 0);
+	edata->longest_free_range = longest_free_range;
+}
+
 static inline void
 edata_szind_set(edata_t *edata, szind_t szind) {
 	assert(szind <= SC_NSIZES); /* SC_NSIZES means "invalid". */
@@ -562,6 +611,8 @@ edata_init(edata_t *edata, unsigned arena_ind, void *addr, size_t size,
 	if (config_prof) {
 		edata_prof_tctx_set(edata, NULL);
 	}
+	edata_ps_set(edata, NULL);
+	edata_longest_free_range_set(edata, 0);
 }
 
 static inline void
@@ -581,6 +632,8 @@ edata_binit(edata_t *edata, void *addr, size_t bsize, size_t sn) {
 	 * wasting a state bit to encode this fact.
 	 */
 	edata_pai_set(edata, EXTENT_PAI_PAC);
+	edata_ps_set(edata, NULL);
+	edata_longest_free_range_set(edata, 0);
 }
 
 static inline int
diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
new file mode 100644
index 00000000..8f3f9ee7
--- /dev/null
+++ b/include/jemalloc/internal/psset.h
@@ -0,0 +1,61 @@
+#ifndef JEMALLOC_INTERNAL_PSSET_H
+#define JEMALLOC_INTERNAL_PSSET_H
+
+/*
+ * A page-slab set.  What the eset is to PAC, the psset is to HPA.  It maintains
+ * a collection of page-slabs (the intent being that they are backed by
+ * hugepages, or at least could be), and handles allocation and deallocation
+ * requests.
+ *
+ * It has the same synchronization guarantees as the eset; stats queries don't
+ * need any external synchronization, everything else does.
+ */
+
+/*
+ * One more than the maximum pszind_t we will serve out of the HPA.
+ * Practically, we expect only the first few to be actually used.  This
+ * corresponds to a maximum size of of 512MB on systems with 4k pages and
+ * SC_NGROUP == 4, which is already an unreasonably large maximum.  Morally, you
+ * can think of this as being SC_NPSIZES, but there's no sense in wasting that
+ * much space in the arena, making bitmaps that much larger, etc.
+ */
+#define PSSET_NPSIZES 64
+
+typedef struct psset_s psset_t;
+struct psset_s {
+	/*
+	 * The pageslabs, quantized by the size class of the largest contiguous
+	 * free run of pages in a pageslab.
+	 */
+	edata_heap_t pageslabs[PSSET_NPSIZES];
+	bitmap_t bitmap[BITMAP_GROUPS(PSSET_NPSIZES)];
+};
+
+void psset_init(psset_t *psset);
+
+
+/*
+ * Tries to obtain a chunk from an existing pageslab already in the set.
+ * Returns true on failure.
+ */
+bool psset_alloc_reuse(psset_t *psset, edata_t *r_edata, size_t size);
+
+/*
+ * Given a newly created pageslab ps (not currently in the set), pass ownership
+ * to the psset and allocate an extent from within it.  The passed-in pageslab
+ * must be at least as big as size.
+ */
+void psset_alloc_new(psset_t *psset, edata_t *ps,
+    edata_t *r_edata, size_t size);
+
+/*
+ * Given an extent that comes from a pageslab in this pageslab set, returns it
+ * to its slab.  Does not take ownership of the underlying edata_t.
+ *
+ * If some slab becomes empty as a result of the dalloc, it is retuend -- the
+ * result must be checked and deallocated to the central HPA.  Otherwise returns
+ * NULL.
+ */
+edata_t *psset_dalloc(psset_t *psset, edata_t *edata);
+
+#endif /* JEMALLOC_INTERNAL_PSSET_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index fe147790..3200eaba 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -76,6 +76,7 @@
     <ClCompile Include="..\..\..\..\src\prof_log.c" />
     <ClCompile Include="..\..\..\..\src\prof_recent.c" />
     <ClCompile Include="..\..\..\..\src\prof_sys.c" />
+    <ClCompile Include="..\..\..\..\src\psset.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\safety_check.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 4b7b6baf..8d459804 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -112,6 +112,9 @@
     <ClCompile Include="..\..\..\..\src\prof_sys.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\psset.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\rtree.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 6bd43c78..7badc63c 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -76,6 +76,7 @@
     <ClCompile Include="..\..\..\..\src\prof_log.c" />
     <ClCompile Include="..\..\..\..\src\prof_recent.c" />
     <ClCompile Include="..\..\..\..\src\prof_sys.c" />
+    <ClCompile Include="..\..\..\..\src\psset.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\safety_check.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 4b7b6baf..8d459804 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -112,6 +112,9 @@
     <ClCompile Include="..\..\..\..\src\prof_sys.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\psset.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\rtree.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/psset.c b/src/psset.c
new file mode 100644
index 00000000..9675a0d1
--- /dev/null
+++ b/src/psset.c
@@ -0,0 +1,239 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/psset.h"
+
+#include "jemalloc/internal/flat_bitmap.h"
+
+static const bitmap_info_t psset_bitmap_info =
+    BITMAP_INFO_INITIALIZER(PSSET_NPSIZES);
+
+void
+psset_init(psset_t *psset) {
+	for (unsigned i = 0; i < PSSET_NPSIZES; i++) {
+		edata_heap_new(&psset->pageslabs[i]);
+	}
+	bitmap_init(psset->bitmap, &psset_bitmap_info, /* fill */ true);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+psset_assert_ps_consistent(edata_t *ps) {
+	assert(fb_urange_longest(edata_slab_data_get(ps)->bitmap,
+	    edata_size_get(ps) >> LG_PAGE) == edata_longest_free_range_get(ps));
+}
+
+/*
+ * Similar to PAC's extent_recycle_extract.  Out of all the pageslabs in the
+ * set, picks one that can satisfy the allocation and remove it from the set.
+ */
+static edata_t *
+psset_recycle_extract(psset_t *psset, size_t size) {
+	pszind_t ret_ind;
+	edata_t *ret = NULL;
+	pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(size));
+	for (pszind_t i = (pszind_t)bitmap_ffu(psset->bitmap,
+	    &psset_bitmap_info, (size_t)pind);
+	    i < PSSET_NPSIZES;
+	    i = (pszind_t)bitmap_ffu(psset->bitmap, &psset_bitmap_info,
+		(size_t)i + 1)) {
+		assert(!edata_heap_empty(&psset->pageslabs[i]));
+		edata_t *ps = edata_heap_first(&psset->pageslabs[i]);
+		if (ret == NULL || edata_snad_comp(ps, ret) < 0) {
+			ret = ps;
+			ret_ind = i;
+		}
+	}
+	if (ret == NULL) {
+		return NULL;
+	}
+	edata_heap_remove(&psset->pageslabs[ret_ind], ret);
+	if (edata_heap_empty(&psset->pageslabs[ret_ind])) {
+		bitmap_set(psset->bitmap, &psset_bitmap_info, ret_ind);
+	}
+
+	psset_assert_ps_consistent(ret);
+	return ret;
+}
+
+static void
+psset_insert(psset_t *psset, edata_t *ps, size_t largest_range) {
+	psset_assert_ps_consistent(ps);
+
+	pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
+	    largest_range << LG_PAGE));
+
+	assert(pind < PSSET_NPSIZES);
+
+	if (edata_heap_empty(&psset->pageslabs[pind])) {
+		bitmap_unset(psset->bitmap, &psset_bitmap_info, (size_t)pind);
+	}
+	edata_heap_insert(&psset->pageslabs[pind], ps);
+}
+
+/*
+ * Given a pageslab ps and an edata to allocate size bytes from, initializes the
+ * edata with a range in the pageslab, and puts ps back in the set.
+ */
+static void
+psset_ps_alloc_insert(psset_t *psset, edata_t *ps, edata_t *r_edata,
+    size_t size) {
+	size_t start = 0;
+	/*
+	 * These are dead stores, but the compiler will issue warnings on them
+	 * since it can't tell statically that found is always true below.
+	 */
+	size_t begin = 0;
+	size_t len = 0;
+
+	fb_group_t *ps_fb = edata_slab_data_get(ps)->bitmap;
+
+	size_t npages = size >> LG_PAGE;
+	size_t ps_npages = edata_size_get(ps) >> LG_PAGE;
+
+	size_t largest_unchosen_range = 0;
+	while (true) {
+		bool found = fb_urange_iter(ps_fb, ps_npages, start, &begin,
+		    &len);
+		/*
+		 * A precondition to this function is that ps must be able to
+		 * serve the allocation.
+		 */
+		assert(found);
+		if (len >= npages) {
+			/*
+			 * We use first-fit within the page slabs; this gives
+			 * bounded worst-case fragmentation within a slab.  It's
+			 * not necessarily right; we could experiment with
+			 * various other options.
+			 */
+			break;
+		}
+		if (len > largest_unchosen_range) {
+			largest_unchosen_range = len;
+		}
+		start = begin + len;
+	}
+	uintptr_t addr = (uintptr_t)edata_base_get(ps) + begin * PAGE;
+	edata_init(r_edata, edata_arena_ind_get(r_edata), (void *)addr, size,
+	    /* slab */ false, SC_NSIZES, /* sn */ 0, extent_state_active,
+	    /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
+	    EXTENT_NOT_HEAD);
+	edata_ps_set(r_edata, ps);
+	fb_set_range(ps_fb, ps_npages, begin, npages);
+	/*
+	 * OK, we've got to put the pageslab back.  First we have to figure out
+	 * where, though; we've only checked run sizes before the pageslab we
+	 * picked.  We also need to look for ones after the one we picked.  Note
+	 * that we want begin + npages as the start position, not begin + len;
+	 * we might not have used the whole range.
+	 *
+	 * TODO: With a little bit more care, we can guarantee that the longest
+	 * free range field in the edata is accurate upon entry, and avoid doing
+	 * this check in the case where we're allocating from some smaller run.
+	 */
+	start = begin + npages;
+	while (start < ps_npages) {
+		bool found = fb_urange_iter(ps_fb, ps_npages, start, &begin,
+		    &len);
+		if (!found) {
+			break;
+		}
+		if (len > largest_unchosen_range) {
+			largest_unchosen_range = len;
+		}
+		start = begin + len;
+	}
+	edata_longest_free_range_set(ps, (uint32_t)largest_unchosen_range);
+	if (largest_unchosen_range != 0) {
+		psset_insert(psset, ps, largest_unchosen_range);
+	}
+}
+
+bool
+psset_alloc_reuse(psset_t *psset, edata_t *r_edata, size_t size) {
+	edata_t *ps = psset_recycle_extract(psset, size);
+	if (ps == NULL) {
+		return true;
+	}
+	psset_ps_alloc_insert(psset, ps, r_edata, size);
+	return false;
+}
+
+void
+psset_alloc_new(psset_t *psset, edata_t *ps, edata_t *r_edata, size_t size) {
+	fb_group_t *ps_fb = edata_slab_data_get(ps)->bitmap;
+	size_t ps_npages = edata_size_get(ps) >> LG_PAGE;
+	assert(fb_empty(ps_fb, ps_npages));
+
+	assert(ps_npages >= (size >> LG_PAGE));
+	psset_ps_alloc_insert(psset, ps, r_edata, size);
+}
+
+edata_t *
+psset_dalloc(psset_t *psset, edata_t *edata) {
+	assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
+	assert(edata_ps_get(edata) != NULL);
+
+	edata_t *ps = edata_ps_get(edata);
+	fb_group_t *ps_fb = edata_slab_data_get(ps)->bitmap;
+	size_t ps_old_longest_free_range = edata_longest_free_range_get(ps);
+
+	size_t ps_npages = edata_size_get(ps) >> LG_PAGE;
+	size_t begin =
+	    ((uintptr_t)edata_base_get(edata) - (uintptr_t)edata_base_get(ps))
+	    >> LG_PAGE;
+	size_t len = edata_size_get(edata) >> LG_PAGE;
+	fb_unset_range(ps_fb, ps_npages, begin, len);
+
+	/* We might have just created a new, larger range. */
+	size_t new_begin = (size_t)(fb_fls(ps_fb, ps_npages, begin) + 1);
+	size_t new_end = fb_ffs(ps_fb, ps_npages, begin + len - 1);
+	size_t new_range_len = new_end - new_begin;
+	/*
+	 * If the new free range is no longer than the previous longest one,
+	 * then the pageslab is non-empty and doesn't need to change bins.
+	 * We're done, and don't need to return a pageslab to evict.
+	 */
+	if (new_range_len <= ps_old_longest_free_range) {
+		return NULL;
+	}
+	/*
+	 * Otherwise, it might need to get evicted from the set, or change its
+	 * bin.
+	 */
+	edata_longest_free_range_set(ps, (uint32_t)new_range_len);
+	/*
+	 * If it was previously non-full, then it's in some (possibly now
+	 * incorrect) bin already; remove it.
+	 *
+	 * TODO: We bailed out early above if we didn't expand the longest free
+	 * range, which should avoid a lot of redundant remove/reinserts in the
+	 * same bin.  But it doesn't eliminate all of them; it's possible that
+	 * we decreased the longest free range length, but only slightly, and
+	 * not enough to change our pszind.  We could check that more precisely.
+	 * (Or, ideally, size class dequantization will happen at some point,
+	 * and the issue becomes moot).
+	 */
+	if (ps_old_longest_free_range > 0) {
+		pszind_t old_pind = sz_psz2ind(sz_psz_quantize_floor(
+		    ps_old_longest_free_range<< LG_PAGE));
+		edata_heap_remove(&psset->pageslabs[old_pind], ps);
+		if (edata_heap_empty(&psset->pageslabs[old_pind])) {
+			bitmap_set(psset->bitmap, &psset_bitmap_info,
+			    (size_t)old_pind);
+		}
+	}
+	/* If the pageslab is empty, it gets evicted from the set. */
+	if (new_range_len == ps_npages) {
+		return ps;
+	}
+	/* Otherwise, it gets reinserted. */
+	pszind_t new_pind = sz_psz2ind(sz_psz_quantize_floor(
+	    new_range_len << LG_PAGE));
+	if (edata_heap_empty(&psset->pageslabs[new_pind])) {
+		bitmap_unset(psset->bitmap, &psset_bitmap_info,
+		    (size_t)new_pind);
+	}
+	edata_heap_insert(&psset->pageslabs[new_pind], ps);
+	return NULL;
+}
diff --git a/test/unit/psset.c b/test/unit/psset.c
new file mode 100644
index 00000000..8a5090d3
--- /dev/null
+++ b/test/unit/psset.c
@@ -0,0 +1,306 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/psset.h"
+
+#define PAGESLAB_PAGES 64
+#define PAGESLAB_SIZE (PAGESLAB_PAGES << LG_PAGE)
+#define PAGESLAB_SN 123
+#define PAGESLAB_ADDR ((void *)(1234 << LG_PAGE))
+
+#define ALLOC_ARENA_IND 111
+#define ALLOC_ESN 222
+
+static void
+edata_init_test(edata_t *edata) {
+	memset(edata, 0, sizeof(*edata));
+	edata_arena_ind_set(edata, ALLOC_ARENA_IND);
+	edata_esn_set(edata, ALLOC_ESN);
+}
+
+static void
+edata_expect(edata_t *edata, size_t page_offset, size_t page_cnt) {
+	/*
+	 * Note that allocations should get the arena ind of their home
+	 * arena, *not* the arena ind of the pageslab allocator.
+	 */
+	expect_u_eq(ALLOC_ARENA_IND, edata_arena_ind_get(edata),
+	    "Arena ind changed");
+	expect_ptr_eq(
+	    (void *)((uintptr_t)PAGESLAB_ADDR + (page_offset << LG_PAGE)),
+	    edata_addr_get(edata), "Didn't allocate in order");
+	expect_zu_eq(page_cnt << LG_PAGE, edata_size_get(edata), "");
+	expect_false(edata_slab_get(edata), "");
+	expect_u_eq(SC_NSIZES, edata_szind_get_maybe_invalid(edata),
+	    "");
+	expect_zu_eq(0, edata_sn_get(edata), "");
+	expect_d_eq(edata_state_get(edata), extent_state_active, "");
+	expect_false(edata_zeroed_get(edata), "");
+	expect_true(edata_committed_get(edata), "");
+	expect_d_eq(EXTENT_PAI_HPA, edata_pai_get(edata), "");
+	expect_false(edata_is_head_get(edata), "");
+}
+
+TEST_BEGIN(test_empty) {
+	bool err;
+	edata_t pageslab;
+	memset(&pageslab, 0, sizeof(pageslab));
+	edata_t alloc;
+
+	edata_init(&pageslab, /* arena_ind */ 0, PAGESLAB_ADDR, PAGESLAB_SIZE,
+	    /* slab */ true, SC_NSIZES, PAGESLAB_SN, extent_state_active,
+	    /* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA,
+	    EXTENT_IS_HEAD);
+	edata_init_test(&alloc);
+
+	psset_t psset;
+	psset_init(&psset);
+
+	/* Empty psset should return fail allocations. */
+	err = psset_alloc_reuse(&psset, &alloc, PAGE);
+	expect_true(err, "Empty psset succeeded in an allocation.");
+}
+TEST_END
+
+TEST_BEGIN(test_fill) {
+	bool err;
+	edata_t pageslab;
+	memset(&pageslab, 0, sizeof(pageslab));
+	edata_t alloc[PAGESLAB_PAGES];
+
+	edata_init(&pageslab, /* arena_ind */ 0, PAGESLAB_ADDR, PAGESLAB_SIZE,
+	    /* slab */ true, SC_NSIZES, PAGESLAB_SN, extent_state_active,
+	    /* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA,
+	    EXTENT_IS_HEAD);
+
+	psset_t psset;
+	psset_init(&psset);
+
+	edata_init_test(&alloc[0]);
+	psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
+	for (size_t i = 1; i < PAGESLAB_PAGES; i++) {
+		edata_init_test(&alloc[i]);
+		err = psset_alloc_reuse(&psset, &alloc[i], PAGE);
+		expect_false(err, "Nonempty psset failed page allocation.");
+	}
+
+	for (size_t i = 0; i < PAGESLAB_PAGES; i++) {
+		edata_t *edata = &alloc[i];
+		edata_expect(edata, i, 1);
+	}
+
+	/* The pageslab, and thus psset, should now have no allocations. */
+	edata_t extra_alloc;
+	edata_init_test(&extra_alloc);
+	err = psset_alloc_reuse(&psset, &extra_alloc, PAGE);
+	expect_true(err, "Alloc succeeded even though psset should be empty");
+}
+TEST_END
+
+TEST_BEGIN(test_reuse) {
+	bool err;
+	edata_t *ps;
+
+	edata_t pageslab;
+	memset(&pageslab, 0, sizeof(pageslab));
+	edata_t alloc[PAGESLAB_PAGES];
+
+	edata_init(&pageslab, /* arena_ind */ 0, PAGESLAB_ADDR, PAGESLAB_SIZE,
+	    /* slab */ true, SC_NSIZES, PAGESLAB_SN, extent_state_active,
+	    /* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA,
+	    EXTENT_IS_HEAD);
+
+	psset_t psset;
+	psset_init(&psset);
+
+	edata_init_test(&alloc[0]);
+	psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
+	for (size_t i = 1; i < PAGESLAB_PAGES; i++) {
+		edata_init_test(&alloc[i]);
+		err = psset_alloc_reuse(&psset, &alloc[i], PAGE);
+		expect_false(err, "Nonempty psset failed page allocation.");
+	}
+
+	/* Free odd indices. */
+	for (size_t i = 0; i < PAGESLAB_PAGES; i ++) {
+		if (i % 2 == 0) {
+			continue;
+		}
+		ps = psset_dalloc(&psset, &alloc[i]);
+		expect_ptr_null(ps, "Nonempty pageslab evicted");
+	}
+	/* Realloc into them. */
+	for (size_t i = 0; i < PAGESLAB_PAGES; i++) {
+		if (i % 2 == 0) {
+			continue;
+		}
+		err = psset_alloc_reuse(&psset, &alloc[i], PAGE);
+		expect_false(err, "Nonempty psset failed page allocation.");
+		edata_expect(&alloc[i], i, 1);
+	}
+	/* Now, free the pages at indices 0 or 1 mod 2. */
+	for (size_t i = 0; i < PAGESLAB_PAGES; i++) {
+		if (i % 4 > 1) {
+			continue;
+		}
+		ps = psset_dalloc(&psset, &alloc[i]);
+		expect_ptr_null(ps, "Nonempty pageslab evicted");
+	}
+	/* And realloc 2-page allocations into them. */
+	for (size_t i = 0; i < PAGESLAB_PAGES; i++) {
+		if (i % 4 != 0) {
+			continue;
+		}
+		err = psset_alloc_reuse(&psset, &alloc[i], 2 * PAGE);
+		expect_false(err, "Nonempty psset failed page allocation.");
+		edata_expect(&alloc[i], i, 2);
+	}
+	/* Free all the 2-page allocations. */
+	for (size_t i = 0; i < PAGESLAB_PAGES; i++) {
+		if (i % 4 != 0) {
+			continue;
+		}
+		ps = psset_dalloc(&psset, &alloc[i]);
+		expect_ptr_null(ps, "Nonempty pageslab evicted");
+	}
+	/*
+	 * Free up a 1-page hole next to a 2-page hole, but somewhere in the
+	 * middle of the pageslab.  Index 11 should be right before such a hole
+	 * (since 12 % 4 == 0).
+	 */
+	size_t index_of_3 = 11;
+	ps = psset_dalloc(&psset, &alloc[index_of_3]);
+	expect_ptr_null(ps, "Nonempty pageslab evicted");
+	err = psset_alloc_reuse(&psset, &alloc[index_of_3], 3 * PAGE);
+	expect_false(err, "Should have been able to find alloc.");
+	edata_expect(&alloc[index_of_3], index_of_3, 3);
+
+	/* Free up a 4-page hole at the end. */
+	ps = psset_dalloc(&psset, &alloc[PAGESLAB_PAGES - 1]);
+	expect_ptr_null(ps, "Nonempty pageslab evicted");
+	ps = psset_dalloc(&psset, &alloc[PAGESLAB_PAGES - 2]);
+	expect_ptr_null(ps, "Nonempty pageslab evicted");
+
+	/* Make sure we can satisfy an allocation at the very end of a slab. */
+	size_t index_of_4 = PAGESLAB_PAGES - 4;
+	ps = psset_dalloc(&psset, &alloc[index_of_4]);
+	expect_ptr_null(ps, "Nonempty pageslab evicted");
+	err = psset_alloc_reuse(&psset, &alloc[index_of_4], 4 * PAGE);
+	expect_false(err, "Should have been able to find alloc.");
+	edata_expect(&alloc[index_of_4], index_of_4, 4);
+}
+TEST_END
+
+TEST_BEGIN(test_evict) {
+	bool err;
+	edata_t *ps;
+	edata_t pageslab;
+	memset(&pageslab, 0, sizeof(pageslab));
+	edata_t alloc[PAGESLAB_PAGES];
+
+	edata_init(&pageslab, /* arena_ind */ 0, PAGESLAB_ADDR, PAGESLAB_SIZE,
+	    /* slab */ true, SC_NSIZES, PAGESLAB_SN, extent_state_active,
+	    /* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA,
+	    EXTENT_IS_HEAD);
+	psset_t psset;
+	psset_init(&psset);
+
+	/* Alloc the whole slab. */
+	edata_init_test(&alloc[0]);
+	psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
+	for (size_t i = 1; i < PAGESLAB_PAGES; i++) {
+		edata_init_test(&alloc[i]);
+		err = psset_alloc_reuse(&psset, &alloc[i], PAGE);
+		expect_false(err, "Unxpected allocation failure");
+	}
+
+	/* Dealloc the whole slab, going forwards. */
+	for (size_t i = 0; i < PAGESLAB_PAGES - 1; i++) {
+		ps = psset_dalloc(&psset, &alloc[i]);
+		expect_ptr_null(ps, "Nonempty pageslab evicted");
+	}
+	ps = psset_dalloc(&psset, &alloc[PAGESLAB_PAGES - 1]);
+	expect_ptr_eq(&pageslab, ps, "Empty pageslab not evicted.");
+
+	err = psset_alloc_reuse(&psset, &alloc[0], PAGE);
+	expect_true(err, "psset should be empty.");
+}
+TEST_END
+
+TEST_BEGIN(test_multi_pageslab) {
+	bool err;
+	edata_t *ps;
+	edata_t pageslab[2];
+	memset(&pageslab, 0, sizeof(pageslab));
+	edata_t alloc[2][PAGESLAB_PAGES];
+
+	edata_init(&pageslab[0], /* arena_ind */ 0, PAGESLAB_ADDR, PAGESLAB_SIZE,
+	    /* slab */ true, SC_NSIZES, PAGESLAB_SN, extent_state_active,
+	    /* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA,
+	    EXTENT_IS_HEAD);
+	edata_init(&pageslab[1], /* arena_ind */ 0,
+	    (void *)((uintptr_t)PAGESLAB_ADDR + PAGESLAB_SIZE), PAGESLAB_SIZE,
+	    /* slab */ true, SC_NSIZES, PAGESLAB_SN, extent_state_active,
+	    /* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA,
+	    EXTENT_IS_HEAD);
+
+	psset_t psset;
+	psset_init(&psset);
+
+	/* Insert both slabs. */
+	edata_init_test(&alloc[0][0]);
+	psset_alloc_new(&psset, &pageslab[0], &alloc[0][0], PAGE);
+	edata_init_test(&alloc[1][0]);
+	psset_alloc_new(&psset, &pageslab[1], &alloc[1][0], PAGE);
+
+	/* Fill them both up; make sure we do so in first-fit order. */
+	for (size_t i = 0; i < 2; i++) {
+		for (size_t j = 1; j < PAGESLAB_PAGES; j++) {
+			edata_init_test(&alloc[i][j]);
+			err = psset_alloc_reuse(&psset, &alloc[i][j], PAGE);
+			expect_false(err,
+			    "Nonempty psset failed page allocation.");
+			assert_ptr_eq(&pageslab[i], edata_ps_get(&alloc[i][j]),
+			    "Didn't pick pageslabs in first-fit");
+		}
+	}
+
+	/*
+	 * Free up a 2-page hole in the earlier slab, and a 1-page one in the
+	 * later one.  We should still pick the earlier slab for a 1-page
+	 * allocation.
+	 */
+	ps = psset_dalloc(&psset, &alloc[0][0]);
+	expect_ptr_null(ps, "Unexpected eviction");
+	ps = psset_dalloc(&psset, &alloc[0][1]);
+	expect_ptr_null(ps, "Unexpected eviction");
+	ps = psset_dalloc(&psset, &alloc[1][0]);
+	expect_ptr_null(ps, "Unexpected eviction");
+	err = psset_alloc_reuse(&psset, &alloc[0][0], PAGE);
+	expect_ptr_eq(&pageslab[0], edata_ps_get(&alloc[0][0]),
+	    "Should have picked first pageslab");
+
+	/*
+	 * Now both slabs have 1-page holes. Free up a second one in the later
+	 * slab.
+	 */
+	ps = psset_dalloc(&psset, &alloc[1][1]);
+	expect_ptr_null(ps, "Unexpected eviction");
+
+	/*
+	 * We should be able to allocate a 2-page object, even though an earlier
+	 * size class is nonempty.
+	 */
+	err = psset_alloc_reuse(&psset, &alloc[1][0], 2 * PAGE);
+	expect_false(err, "Allocation should have succeeded");
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_empty,
+	    test_fill,
+	    test_reuse,
+	    test_evict,
+	    test_multi_pageslab);
+}

From 259c5e3e8f4731f2e32ceac71c66f4bc7d078145 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 2 Sep 2020 12:59:10 -0700
Subject: [PATCH 1872/2608] psset: Add stats

---
 include/jemalloc/internal/edata.h | 12 +++--
 include/jemalloc/internal/psset.h | 16 ++++++
 src/psset.c                       | 85 ++++++++++++++++++++++++++++---
 test/unit/psset.c                 | 78 +++++++++++++++++++++++++++-
 4 files changed, 178 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index 4fee76bf..f175af94 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -218,13 +218,17 @@ struct edata_s {
 		 */
 		edata_t *ps;
 		/*
-		 * If this edata *is* a pageslab, then it has some longest free
-		 * range in it.  Track it.
+		 * If this edata *is* a pageslab, then we cache some useful
+		 * information about its associated bitmap.
 		 */
 		struct {
+			/*
+			 * The longest free range a pageslab contains determines
+			 * the heap it lives in.  If we know that it didn't
+			 * change after an operation, we can avoid moving it
+			 * between heaps.
+			 */
 			uint32_t longest_free_range;
-			/* Not yet tracked. */
-			/* uint32_t longest_free_range_pos; */
 		};
 	};
 
diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index 8f3f9ee7..abbfc241 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -21,6 +21,16 @@
  */
 #define PSSET_NPSIZES 64
 
+typedef struct psset_bin_stats_s psset_bin_stats_t;
+struct psset_bin_stats_s {
+	/* How many pageslabs are in this bin? */
+	size_t npageslabs;
+	/* Of them, how many pages are active? */
+	size_t nactive;
+	/* How many are inactive? */
+	size_t ninactive;
+};
+
 typedef struct psset_s psset_t;
 struct psset_s {
 	/*
@@ -29,6 +39,12 @@ struct psset_s {
 	 */
 	edata_heap_t pageslabs[PSSET_NPSIZES];
 	bitmap_t bitmap[BITMAP_GROUPS(PSSET_NPSIZES)];
+	/*
+	 * Full slabs don't live in any edata heap.  But we still track their
+	 * stats.
+	 */
+	psset_bin_stats_t full_slab_stats;
+	psset_bin_stats_t slab_stats[PSSET_NPSIZES];
 };
 
 void psset_init(psset_t *psset);
diff --git a/src/psset.c b/src/psset.c
index 9675a0d1..04d3548f 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -14,6 +14,48 @@ psset_init(psset_t *psset) {
 		edata_heap_new(&psset->pageslabs[i]);
 	}
 	bitmap_init(psset->bitmap, &psset_bitmap_info, /* fill */ true);
+	psset->full_slab_stats.npageslabs = 0;
+	psset->full_slab_stats.nactive = 0;
+	psset->full_slab_stats.ninactive = 0;
+	for (unsigned i = 0; i < PSSET_NPSIZES; i++) {
+		psset->slab_stats[i].npageslabs = 0;
+		psset->slab_stats[i].nactive = 0;
+		psset->slab_stats[i].ninactive = 0;
+	}
+}
+
+/*
+ * The stats maintenance strategy is simple, but not necessarily obvious.
+ * edata_nfree and the bitmap must remain consistent at all times.  If they
+ * change while an edata is within an edata_heap (or full), then the associated
+ * stats bin (or the full bin) must also change.  If they change while not in a
+ * bin (say, in between extraction and reinsertion), then the bin stats need not
+ * change.  If a pageslab is removed from a bin (or becomes nonfull), it should
+ * no longer contribute to that bin's stats (or the full stats).  These help
+ * ensure we don't miss any heap modification operations.
+ */
+JEMALLOC_ALWAYS_INLINE void
+psset_bin_stats_adjust(psset_bin_stats_t *binstats, edata_t *ps, bool inc) {
+	size_t mul = inc ? (size_t)1 : (size_t)-1;
+
+	size_t npages = edata_size_get(ps) >> LG_PAGE;
+	size_t ninactive = edata_nfree_get(ps);
+	size_t nactive = npages - ninactive;
+	binstats->npageslabs += mul * 1;
+	binstats->nactive += mul * nactive;
+	binstats->ninactive += mul * ninactive;
+}
+
+static void
+psset_edata_heap_remove(psset_t *psset, pszind_t pind, edata_t *ps) {
+	edata_heap_remove(&psset->pageslabs[pind], ps);
+	psset_bin_stats_adjust(&psset->slab_stats[pind], ps, /* inc */ false);
+}
+
+static void
+psset_edata_heap_insert(psset_t *psset, pszind_t pind, edata_t *ps) {
+	edata_heap_insert(&psset->pageslabs[pind], ps);
+	psset_bin_stats_adjust(&psset->slab_stats[pind], ps, /* inc */ true);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -46,7 +88,8 @@ psset_recycle_extract(psset_t *psset, size_t size) {
 	if (ret == NULL) {
 		return NULL;
 	}
-	edata_heap_remove(&psset->pageslabs[ret_ind], ret);
+
+	psset_edata_heap_remove(psset, ret_ind, ret);
 	if (edata_heap_empty(&psset->pageslabs[ret_ind])) {
 		bitmap_set(psset->bitmap, &psset_bitmap_info, ret_ind);
 	}
@@ -67,7 +110,7 @@ psset_insert(psset_t *psset, edata_t *ps, size_t largest_range) {
 	if (edata_heap_empty(&psset->pageslabs[pind])) {
 		bitmap_unset(psset->bitmap, &psset_bitmap_info, (size_t)pind);
 	}
-	edata_heap_insert(&psset->pageslabs[pind], ps);
+	psset_edata_heap_insert(psset, pind, ps);
 }
 
 /*
@@ -120,6 +163,9 @@ psset_ps_alloc_insert(psset_t *psset, edata_t *ps, edata_t *r_edata,
 	    EXTENT_NOT_HEAD);
 	edata_ps_set(r_edata, ps);
 	fb_set_range(ps_fb, ps_npages, begin, npages);
+	edata_nfree_set(ps, (uint32_t)(edata_nfree_get(ps) - npages));
+	/* The pageslab isn't in a bin, so no bin stats need to change. */
+
 	/*
 	 * OK, we've got to put the pageslab back.  First we have to figure out
 	 * where, though; we've only checked run sizes before the pageslab we
@@ -144,7 +190,10 @@ psset_ps_alloc_insert(psset_t *psset, edata_t *ps, edata_t *r_edata,
 		start = begin + len;
 	}
 	edata_longest_free_range_set(ps, (uint32_t)largest_unchosen_range);
-	if (largest_unchosen_range != 0) {
+	if (largest_unchosen_range == 0) {
+		psset_bin_stats_adjust(&psset->full_slab_stats, ps,
+		    /* inc */ true);
+	} else {
 		psset_insert(psset, ps, largest_unchosen_range);
 	}
 }
@@ -164,8 +213,8 @@ psset_alloc_new(psset_t *psset, edata_t *ps, edata_t *r_edata, size_t size) {
 	fb_group_t *ps_fb = edata_slab_data_get(ps)->bitmap;
 	size_t ps_npages = edata_size_get(ps) >> LG_PAGE;
 	assert(fb_empty(ps_fb, ps_npages));
-
 	assert(ps_npages >= (size >> LG_PAGE));
+	edata_nfree_set(ps, (uint32_t)ps_npages);
 	psset_ps_alloc_insert(psset, ps, r_edata, size);
 }
 
@@ -177,6 +226,11 @@ psset_dalloc(psset_t *psset, edata_t *edata) {
 	edata_t *ps = edata_ps_get(edata);
 	fb_group_t *ps_fb = edata_slab_data_get(ps)->bitmap;
 	size_t ps_old_longest_free_range = edata_longest_free_range_get(ps);
+	pszind_t old_pind = SC_NPSIZES;
+	if (ps_old_longest_free_range != 0) {
+		old_pind = sz_psz2ind(sz_psz_quantize_floor(
+		    ps_old_longest_free_range << LG_PAGE));
+	}
 
 	size_t ps_npages = edata_size_get(ps) >> LG_PAGE;
 	size_t begin =
@@ -184,6 +238,23 @@ psset_dalloc(psset_t *psset, edata_t *edata) {
 	    >> LG_PAGE;
 	size_t len = edata_size_get(edata) >> LG_PAGE;
 	fb_unset_range(ps_fb, ps_npages, begin, len);
+	if (ps_old_longest_free_range == 0) {
+		/* We were in the (imaginary) full bin; update stats for it. */
+		psset_bin_stats_adjust(&psset->full_slab_stats, ps,
+		    /* inc */ false);
+	} else {
+		/*
+		 * The edata is still in the bin, need to update its
+		 * contribution.
+		 */
+		psset->slab_stats[old_pind].nactive -= len;
+		psset->slab_stats[old_pind].ninactive += len;
+	}
+	/*
+	 * Note that we want to do this after the stats updates, since if it was
+	 * full it psset_bin_stats_adjust would have looked at the old version.
+	 */
+	edata_nfree_set(ps, (uint32_t)(edata_nfree_get(ps) + len));
 
 	/* We might have just created a new, larger range. */
 	size_t new_begin = (size_t)(fb_fls(ps_fb, ps_npages, begin) + 1);
@@ -215,9 +286,7 @@ psset_dalloc(psset_t *psset, edata_t *edata) {
 	 * and the issue becomes moot).
 	 */
 	if (ps_old_longest_free_range > 0) {
-		pszind_t old_pind = sz_psz2ind(sz_psz_quantize_floor(
-		    ps_old_longest_free_range<< LG_PAGE));
-		edata_heap_remove(&psset->pageslabs[old_pind], ps);
+		psset_edata_heap_remove(psset, old_pind, ps);
 		if (edata_heap_empty(&psset->pageslabs[old_pind])) {
 			bitmap_set(psset->bitmap, &psset_bitmap_info,
 			    (size_t)old_pind);
@@ -234,6 +303,6 @@ psset_dalloc(psset_t *psset, edata_t *edata) {
 		bitmap_unset(psset->bitmap, &psset_bitmap_info,
 		    (size_t)new_pind);
 	}
-	edata_heap_insert(&psset->pageslabs[new_pind], ps);
+	psset_edata_heap_insert(psset, new_pind, ps);
 	return NULL;
 }
diff --git a/test/unit/psset.c b/test/unit/psset.c
index 8a5090d3..0bc4460f 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -295,6 +295,81 @@ TEST_BEGIN(test_multi_pageslab) {
 }
 TEST_END
 
+static void
+stats_expect_empty(psset_bin_stats_t *stats) {
+	assert_zu_eq(0, stats->npageslabs,
+	    "Supposedly empty bin had positive npageslabs");
+	expect_zu_eq(0, stats->nactive, "Unexpected nonempty bin"
+	    "Supposedly empty bin had positive nactive");
+	expect_zu_eq(0, stats->ninactive, "Unexpected nonempty bin"
+	    "Supposedly empty bin had positive ninactive");
+}
+
+static void
+stats_expect(psset_t *psset, size_t nactive) {
+	if (nactive == PAGESLAB_PAGES) {
+		expect_zu_eq(1, psset->full_slab_stats.npageslabs,
+		    "Expected a full slab");
+		expect_zu_eq(PAGESLAB_PAGES, psset->full_slab_stats.nactive,
+		    "Should have exactly filled the bin");
+		expect_zu_eq(0, psset->full_slab_stats.ninactive,
+		    "Should never have inactive pages in a full slab");
+	} else {
+		stats_expect_empty(&psset->full_slab_stats);
+	}
+	size_t ninactive = PAGESLAB_PAGES - nactive;
+	pszind_t nonempty_pind = PSSET_NPSIZES;
+	if (ninactive != 0 && ninactive < PAGESLAB_PAGES) {
+		nonempty_pind = sz_psz2ind(sz_psz_quantize_floor(
+		    ninactive << LG_PAGE));
+	}
+	for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
+		if (i == nonempty_pind) {
+			assert_zu_eq(1, psset->slab_stats[i].npageslabs,
+			    "Should have found a slab");
+			expect_zu_eq(nactive, psset->slab_stats[i].nactive,
+			    "Mismatch in active pages");
+			expect_zu_eq(ninactive, psset->slab_stats[i].ninactive,
+			    "Mismatch in inactive pages");
+		} else {
+			stats_expect_empty(&psset->slab_stats[i]);
+		}
+	}
+}
+
+TEST_BEGIN(test_stats) {
+	bool err;
+	edata_t pageslab;
+	memset(&pageslab, 0, sizeof(pageslab));
+	edata_t alloc[PAGESLAB_PAGES];
+
+	edata_init(&pageslab, /* arena_ind */ 0, PAGESLAB_ADDR, PAGESLAB_SIZE,
+	    /* slab */ true, SC_NSIZES, PAGESLAB_SN, extent_state_active,
+	    /* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA,
+	    EXTENT_IS_HEAD);
+
+	psset_t psset;
+	psset_init(&psset);
+	stats_expect(&psset, 0);
+
+	edata_init_test(&alloc[0]);
+	psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
+	for (size_t i = 1; i < PAGESLAB_PAGES; i++) {
+		stats_expect(&psset, i);
+		edata_init_test(&alloc[i]);
+		err = psset_alloc_reuse(&psset, &alloc[i], PAGE);
+		expect_false(err, "Nonempty psset failed page allocation.");
+	}
+	stats_expect(&psset, PAGESLAB_PAGES);
+	for (ssize_t i = PAGESLAB_PAGES - 1; i >= 0; i--) {
+		edata_t *ps = psset_dalloc(&psset, &alloc[i]);
+		expect_true((ps == NULL) == (i != 0),
+		    "psset_dalloc should only evict a slab on the last free");
+		stats_expect(&psset, i);
+	}
+}
+TEST_END
+
 int
 main(void) {
 	return test_no_reentrancy(
@@ -302,5 +377,6 @@ main(void) {
 	    test_fill,
 	    test_reuse,
 	    test_evict,
-	    test_multi_pageslab);
+	    test_multi_pageslab,
+	    test_stats);
 }

From f6bbfc1e965e3f165ea3bbdbc630d26778a7fbf4 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 10 Sep 2020 16:01:23 -0700
Subject: [PATCH 1873/2608] Add a .clang-format file.

---
 .clang-format | 122 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 122 insertions(+)
 create mode 100644 .clang-format

diff --git a/.clang-format b/.clang-format
new file mode 100644
index 00000000..719c03c5
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,122 @@
+# jemalloc targets clang-format version 8.  We include every option it supports
+# here, but comment out the ones that aren't relevant for us.
+---
+# AccessModifierOffset: -2
+AlignAfterOpenBracket: DontAlign
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AlignEscapedNewlines: Right
+AlignOperands: false
+AlignTrailingComments: false
+AllowAllParametersOfDeclarationOnNextLine: true
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: Empty
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterReturnType: AllDefinitions
+AlwaysBreakBeforeMultilineStrings: true
+# AlwaysBreakTemplateDeclarations: Yes
+BinPackArguments: true
+BinPackParameters: true
+BraceWrapping:
+  AfterClass: false
+  AfterControlStatement: false
+  AfterEnum: false
+  AfterFunction: false
+  AfterNamespace: false
+  AfterObjCDeclaration: false
+  AfterStruct: false
+  AfterUnion: false
+  BeforeCatch: false
+  BeforeElse: false
+  IndentBraces: false
+# BreakAfterJavaFieldAnnotations: true
+BreakBeforeBinaryOperators: NonAssignment
+BreakBeforeBraces: Attach
+BreakBeforeTernaryOperators: true
+# BreakConstructorInitializers: BeforeColon
+# BreakInheritanceList: BeforeColon
+BreakStringLiterals: false
+ColumnLimit: 80
+# CommentPragmas: ''
+# CompactNamespaces: true
+# ConstructorInitializerAllOnOneLineOrOnePerLine: true
+# ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 2
+Cpp11BracedListStyle: true
+DerivePointerAlignment: false
+DisableFormat:   false
+ExperimentalAutoDetectBinPacking: false
+FixNamespaceComments: true
+ForEachMacros:   [ ql_foreach, qr_foreach, ]
+# IncludeBlocks: Preserve
+# IncludeCategories:
+#   - Regex:           '^<.*\.h(pp)?>'
+#     Priority:        1
+# IncludeIsMainRegex: ''
+IndentCaseLabels: false
+IndentPPDirectives: AfterHash
+IndentWidth: 4
+IndentWrappedFunctionNames: false
+# JavaImportGroups: []
+# JavaScriptQuotes: Leave
+# JavaScriptWrapImports: True
+KeepEmptyLinesAtTheStartOfBlocks: false
+Language: Cpp
+MacroBlockBegin: ''
+MacroBlockEnd: ''
+MaxEmptyLinesToKeep: 1
+# NamespaceIndentation: None
+# ObjCBinPackProtocolList: Auto
+# ObjCBlockIndentWidth: 2
+# ObjCSpaceAfterProperty: false
+# ObjCSpaceBeforeProtocolList: false
+
+PenaltyBreakAssignment: 2
+PenaltyBreakBeforeFirstCallParameter: 1
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakString: 1000
+# PenaltyBreakTemplateDeclaration: 10
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 60
+PointerAlignment: Right
+# RawStringFormats:
+#   - Language: TextProto
+#       Delimiters:
+#         - 'pb'
+#         - 'proto'
+#       EnclosingFunctions:
+#         - 'PARSE_TEXT_PROTO'
+#       BasedOnStyle: google
+#   - Language: Cpp
+#       Delimiters:
+#         - 'cc'
+#         - 'cpp'
+#       BasedOnStyle: llvm
+#       CanonicalDelimiter: 'cc'
+ReflowComments: true
+SortIncludes: false
+SpaceAfterCStyleCast: false
+# SpaceAfterTemplateKeyword: true
+SpaceBeforeAssignmentOperators: true
+# SpaceBeforeCpp11BracedList: false
+# SpaceBeforeCtorInitializerColon: true
+# SpaceBeforeInheritanceColon: true
+SpaceBeforeParens: ControlStatements
+# SpaceBeforeRangeBasedForLoopColon: true
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 2
+SpacesInAngles:  false
+SpacesInCStyleCastParentheses: false
+# SpacesInContainerLiterals: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+# Standard: Cpp11
+# This is nominally supported in clang-format version 8, but not in the build
+# used by some of the core jemalloc developers.
+# StatementMacros: []
+TabWidth: 8
+UseTab: Never
+...

From 025d8c37c93a69ec0aa5d8a55e3793cb480a5ac8 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 10 Sep 2020 17:21:32 -0700
Subject: [PATCH 1874/2608] Add a script to check for clang-formattedness.

---
 scripts/check-formatting.sh | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100755 scripts/check-formatting.sh

diff --git a/scripts/check-formatting.sh b/scripts/check-formatting.sh
new file mode 100755
index 00000000..68cafd8e
--- /dev/null
+++ b/scripts/check-formatting.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+# The files that need to be properly formatted.  We'll grow this incrementally
+# until it includes all the jemalloc source files (as we convert things over),
+# and then just replace it with
+#    find -name '*.c' -o -name '*.h' -o -name '*.cpp
+FILES=(
+)
+
+if command -v clang-format &> /dev/null; then
+  CLANG_FORMAT="clang-format"
+elif command -v clang-format-8 &> /dev/null; then
+  CLANG_FORMAT="clang-format-8"
+else
+  echo "Couldn't find clang-format."
+fi
+
+if ! $CLANG_FORMAT -version | grep "version 8\." &> /dev/null; then
+  echo "clang-format is the wrong version."
+  exit 1
+fi
+
+for file in ${FILES[@]}; do
+  if ! cmp --silent $file <($CLANG_FORMAT $file) &> /dev/null; then
+    echo "Error: $file is not clang-formatted"
+    exit 1
+  fi
+done

From bdb60a8053dcac4eb39deaa17129b6e40ba6b17a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 5 Oct 2020 18:31:55 -0700
Subject: [PATCH 1875/2608] Appveyor: don't update msys2 keyring.

This is no longer required, and the step now fails.
---
 .appveyor.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index f74f0993..f44868da 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -30,8 +30,6 @@ environment:
 install:
   - set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH%
   - if defined MSVC call "c:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" %MSVC%
-  - curl -O http://repo.msys2.org/msys/x86_64/msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz
-  - pacman --noconfirm -U msys2-keyring-r21.b39fb11-1-any.pkg.tar.xz --nodeps
   - if defined MSVC pacman --noconfirm -Rsc mingw-w64-%CPU%-gcc gcc
   - pacman --noconfirm -S mingw-w64-%CPU%-make
 

From 05130471701b7f42b545e2103f21fad61b67bfb0 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 13 Aug 2020 18:02:25 -0700
Subject: [PATCH 1876/2608] PRNG: Allow a a range argument of 1.

This is convenient when the range argument itself is generated from some
computation whose value we don't know in advance.
---
 include/jemalloc/internal/prng.h | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/prng.h b/include/jemalloc/internal/prng.h
index 12380b41..a309e962 100644
--- a/include/jemalloc/internal/prng.h
+++ b/include/jemalloc/internal/prng.h
@@ -133,7 +133,9 @@ prng_range_u32(atomic_u32_t *state, uint32_t range, bool atomic) {
 	uint32_t ret;
 	unsigned lg_range;
 
-	assert(range > 1);
+	if (range == 1) {
+		return 0;
+	}
 
 	/* Compute the ceiling of lg(range). */
 	lg_range = ffs_u32(pow2_ceil_u32(range));
@@ -151,7 +153,9 @@ prng_range_u64(uint64_t *state, uint64_t range) {
 	uint64_t ret;
 	unsigned lg_range;
 
-	assert(range > 1);
+	if (range == 1) {
+		return 0;
+	}
 
 	/* Compute the ceiling of lg(range). */
 	lg_range = ffs_u64(pow2_ceil_u64(range));
@@ -169,7 +173,9 @@ prng_range_zu(atomic_zu_t *state, size_t range, bool atomic) {
 	size_t ret;
 	unsigned lg_range;
 
-	assert(range > 1);
+	if (range == 1) {
+		return 0;
+	}
 
 	/* Compute the ceiling of lg(range). */
 	lg_range = ffs_u64(pow2_ceil_u64(range));

From 9e6aa77ab9d8dd5b00018bdca5adff23b03cbdb8 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 14 Aug 2020 09:17:11 -0700
Subject: [PATCH 1877/2608] PRNG: Remove atomic functionality.

These had no uses and complicated the API.  As a rule we now expect to only use
thread-local randomization for contention-reduction reasons, so we only pay the
API costs and never get the functionality benefits.
---
 include/jemalloc/internal/prng.h | 81 +++++++++++-------------------
 test/unit/prng.c                 | 84 ++++++++++++++++----------------
 2 files changed, 71 insertions(+), 94 deletions(-)

diff --git a/include/jemalloc/internal/prng.h b/include/jemalloc/internal/prng.h
index a309e962..14542aa1 100644
--- a/include/jemalloc/internal/prng.h
+++ b/include/jemalloc/internal/prng.h
@@ -1,7 +1,6 @@
 #ifndef JEMALLOC_INTERNAL_PRNG_H
 #define JEMALLOC_INTERNAL_PRNG_H
 
-#include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/bit_util.h"
 
 /*
@@ -59,66 +58,38 @@ prng_state_next_zu(size_t state) {
 
 /*
  * The prng_lg_range functions give a uniform int in the half-open range [0,
- * 2**lg_range).  If atomic is true, they do so safely from multiple threads.
- * Multithreaded 64-bit prngs aren't supported.
+ * 2**lg_range).
  */
 
 JEMALLOC_ALWAYS_INLINE uint32_t
-prng_lg_range_u32(atomic_u32_t *state, unsigned lg_range, bool atomic) {
-	uint32_t ret, state0, state1;
-
+prng_lg_range_u32(uint32_t *state, unsigned lg_range) {
 	assert(lg_range > 0);
 	assert(lg_range <= 32);
 
-	state0 = atomic_load_u32(state, ATOMIC_RELAXED);
-
-	if (atomic) {
-		do {
-			state1 = prng_state_next_u32(state0);
-		} while (!atomic_compare_exchange_weak_u32(state, &state0,
-		    state1, ATOMIC_RELAXED, ATOMIC_RELAXED));
-	} else {
-		state1 = prng_state_next_u32(state0);
-		atomic_store_u32(state, state1, ATOMIC_RELAXED);
-	}
-	ret = state1 >> (32 - lg_range);
+	*state = prng_state_next_u32(*state);
+	uint32_t ret = *state >> (32 - lg_range);
 
 	return ret;
 }
 
 JEMALLOC_ALWAYS_INLINE uint64_t
 prng_lg_range_u64(uint64_t *state, unsigned lg_range) {
-	uint64_t ret, state1;
-
 	assert(lg_range > 0);
 	assert(lg_range <= 64);
 
-	state1 = prng_state_next_u64(*state);
-	*state = state1;
-	ret = state1 >> (64 - lg_range);
+	*state = prng_state_next_u64(*state);
+	uint64_t ret = *state >> (64 - lg_range);
 
 	return ret;
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-prng_lg_range_zu(atomic_zu_t *state, unsigned lg_range, bool atomic) {
-	size_t ret, state0, state1;
-
+prng_lg_range_zu(size_t *state, unsigned lg_range) {
 	assert(lg_range > 0);
 	assert(lg_range <= ZU(1) << (3 + LG_SIZEOF_PTR));
 
-	state0 = atomic_load_zu(state, ATOMIC_RELAXED);
-
-	if (atomic) {
-		do {
-			state1 = prng_state_next_zu(state0);
-		} while (atomic_compare_exchange_weak_zu(state, &state0,
-		    state1, ATOMIC_RELAXED, ATOMIC_RELAXED));
-	} else {
-		state1 = prng_state_next_zu(state0);
-		atomic_store_zu(state, state1, ATOMIC_RELAXED);
-	}
-	ret = state1 >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) - lg_range);
+	*state = prng_state_next_zu(*state);
+	size_t ret = *state >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) - lg_range);
 
 	return ret;
 }
@@ -129,20 +100,24 @@ prng_lg_range_zu(atomic_zu_t *state, unsigned lg_range, bool atomic) {
  */
 
 JEMALLOC_ALWAYS_INLINE uint32_t
-prng_range_u32(atomic_u32_t *state, uint32_t range, bool atomic) {
-	uint32_t ret;
-	unsigned lg_range;
-
+prng_range_u32(uint32_t *state, uint32_t range) {
+	assert(range != 0);
+	/*
+	 * If range were 1, lg_range would be 0, so the shift in
+	 * prng_lg_range_u32 would be a shift of a 32-bit variable by 32 bits,
+	 * which is UB.  Just handle this case as a one-off.
+	 */
 	if (range == 1) {
 		return 0;
 	}
 
 	/* Compute the ceiling of lg(range). */
-	lg_range = ffs_u32(pow2_ceil_u32(range));
+	unsigned lg_range = ffs_u32(pow2_ceil_u32(range));
 
 	/* Generate a result in [0..range) via repeated trial. */
+	uint32_t ret;
 	do {
-		ret = prng_lg_range_u32(state, lg_range, atomic);
+		ret = prng_lg_range_u32(state, lg_range);
 	} while (ret >= range);
 
 	return ret;
@@ -150,17 +125,18 @@ prng_range_u32(atomic_u32_t *state, uint32_t range, bool atomic) {
 
 JEMALLOC_ALWAYS_INLINE uint64_t
 prng_range_u64(uint64_t *state, uint64_t range) {
-	uint64_t ret;
-	unsigned lg_range;
+	assert(range != 0);
 
+	/* See the note in prng_range_u32. */
 	if (range == 1) {
 		return 0;
 	}
 
 	/* Compute the ceiling of lg(range). */
-	lg_range = ffs_u64(pow2_ceil_u64(range));
+	unsigned lg_range = ffs_u64(pow2_ceil_u64(range));
 
 	/* Generate a result in [0..range) via repeated trial. */
+	uint64_t ret;
 	do {
 		ret = prng_lg_range_u64(state, lg_range);
 	} while (ret >= range);
@@ -169,20 +145,21 @@ prng_range_u64(uint64_t *state, uint64_t range) {
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-prng_range_zu(atomic_zu_t *state, size_t range, bool atomic) {
-	size_t ret;
-	unsigned lg_range;
+prng_range_zu(size_t *state, size_t range) {
+	assert(range != 0);
 
+	/* See the note in prng_range_u32. */
 	if (range == 1) {
 		return 0;
 	}
 
 	/* Compute the ceiling of lg(range). */
-	lg_range = ffs_u64(pow2_ceil_u64(range));
+	unsigned lg_range = ffs_u64(pow2_ceil_u64(range));
 
 	/* Generate a result in [0..range) via repeated trial. */
+	size_t ret;
 	do {
-		ret = prng_lg_range_zu(state, lg_range, atomic);
+		ret = prng_lg_range_zu(state, lg_range);
 	} while (ret >= range);
 
 	return ret;
diff --git a/test/unit/prng.c b/test/unit/prng.c
index 915b3504..baf43d96 100644
--- a/test/unit/prng.c
+++ b/test/unit/prng.c
@@ -1,34 +1,34 @@
 #include "test/jemalloc_test.h"
 
 static void
-test_prng_lg_range_u32(bool atomic) {
-	atomic_u32_t sa, sb;
+test_prng_lg_range_u32() {
+	uint32_t sa, sb;
 	uint32_t ra, rb;
 	unsigned lg_range;
 
-	atomic_store_u32(&sa, 42, ATOMIC_RELAXED);
-	ra = prng_lg_range_u32(&sa, 32, atomic);
-	atomic_store_u32(&sa, 42, ATOMIC_RELAXED);
-	rb = prng_lg_range_u32(&sa, 32, atomic);
+	sa = 42;
+	ra = prng_lg_range_u32(&sa, 32);
+	sa = 42;
+	rb = prng_lg_range_u32(&sa, 32);
 	expect_u32_eq(ra, rb,
 	    "Repeated generation should produce repeated results");
 
-	atomic_store_u32(&sb, 42, ATOMIC_RELAXED);
-	rb = prng_lg_range_u32(&sb, 32, atomic);
+	sb = 42;
+	rb = prng_lg_range_u32(&sb, 32);
 	expect_u32_eq(ra, rb,
 	    "Equivalent generation should produce equivalent results");
 
-	atomic_store_u32(&sa, 42, ATOMIC_RELAXED);
-	ra = prng_lg_range_u32(&sa, 32, atomic);
-	rb = prng_lg_range_u32(&sa, 32, atomic);
+	sa = 42;
+	ra = prng_lg_range_u32(&sa, 32);
+	rb = prng_lg_range_u32(&sa, 32);
 	expect_u32_ne(ra, rb,
 	    "Full-width results must not immediately repeat");
 
-	atomic_store_u32(&sa, 42, ATOMIC_RELAXED);
-	ra = prng_lg_range_u32(&sa, 32, atomic);
+	sa = 42;
+	ra = prng_lg_range_u32(&sa, 32);
 	for (lg_range = 31; lg_range > 0; lg_range--) {
-		atomic_store_u32(&sb, 42, ATOMIC_RELAXED);
-		rb = prng_lg_range_u32(&sb, lg_range, atomic);
+		sb = 42;
+		rb = prng_lg_range_u32(&sb, lg_range);
 		expect_u32_eq((rb & (UINT32_C(0xffffffff) << lg_range)),
 		    0, "High order bits should be 0, lg_range=%u", lg_range);
 		expect_u32_eq(rb, (ra >> (32 - lg_range)),
@@ -74,35 +74,35 @@ test_prng_lg_range_u64(void) {
 }
 
 static void
-test_prng_lg_range_zu(bool atomic) {
-	atomic_zu_t sa, sb;
+test_prng_lg_range_zu() {
+	size_t sa, sb;
 	size_t ra, rb;
 	unsigned lg_range;
 
-	atomic_store_zu(&sa, 42, ATOMIC_RELAXED);
-	ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
-	atomic_store_zu(&sa, 42, ATOMIC_RELAXED);
-	rb = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
+	sa = 42;
+	ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR));
+	sa = 42;
+	rb = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR));
 	expect_zu_eq(ra, rb,
 	    "Repeated generation should produce repeated results");
 
-	atomic_store_zu(&sb, 42, ATOMIC_RELAXED);
-	rb = prng_lg_range_zu(&sb, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
+	sb = 42;
+	rb = prng_lg_range_zu(&sb, ZU(1) << (3 + LG_SIZEOF_PTR));
 	expect_zu_eq(ra, rb,
 	    "Equivalent generation should produce equivalent results");
 
-	atomic_store_zu(&sa, 42, ATOMIC_RELAXED);
-	ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
-	rb = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
+	sa = 42;
+	ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR));
+	rb = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR));
 	expect_zu_ne(ra, rb,
 	    "Full-width results must not immediately repeat");
 
-	atomic_store_zu(&sa, 42, ATOMIC_RELAXED);
-	ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR), atomic);
+	sa = 42;
+	ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR));
 	for (lg_range = (ZU(1) << (3 + LG_SIZEOF_PTR)) - 1; lg_range > 0;
 	    lg_range--) {
-		atomic_store_zu(&sb, 42, ATOMIC_RELAXED);
-		rb = prng_lg_range_zu(&sb, lg_range, atomic);
+		sb = 42;
+		rb = prng_lg_range_zu(&sb, lg_range);
 		expect_zu_eq((rb & (SIZE_T_MAX << lg_range)),
 		    0, "High order bits should be 0, lg_range=%u", lg_range);
 		expect_zu_eq(rb, (ra >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) -
@@ -112,12 +112,12 @@ test_prng_lg_range_zu(bool atomic) {
 }
 
 TEST_BEGIN(test_prng_lg_range_u32_nonatomic) {
-	test_prng_lg_range_u32(false);
+	test_prng_lg_range_u32();
 }
 TEST_END
 
 TEST_BEGIN(test_prng_lg_range_u32_atomic) {
-	test_prng_lg_range_u32(true);
+	test_prng_lg_range_u32();
 }
 TEST_END
 
@@ -127,29 +127,29 @@ TEST_BEGIN(test_prng_lg_range_u64_nonatomic) {
 TEST_END
 
 TEST_BEGIN(test_prng_lg_range_zu_nonatomic) {
-	test_prng_lg_range_zu(false);
+	test_prng_lg_range_zu();
 }
 TEST_END
 
 TEST_BEGIN(test_prng_lg_range_zu_atomic) {
-	test_prng_lg_range_zu(true);
+	test_prng_lg_range_zu();
 }
 TEST_END
 
 static void
-test_prng_range_u32(bool atomic) {
+test_prng_range_u32() {
 	uint32_t range;
 #define MAX_RANGE	10000000
 #define RANGE_STEP	97
 #define NREPS		10
 
 	for (range = 2; range < MAX_RANGE; range += RANGE_STEP) {
-		atomic_u32_t s;
+		uint32_t s;
 		unsigned rep;
 
-		atomic_store_u32(&s, range, ATOMIC_RELAXED);
+		s = range;
 		for (rep = 0; rep < NREPS; rep++) {
-			uint32_t r = prng_range_u32(&s, range, atomic);
+			uint32_t r = prng_range_u32(&s, range);
 
 			expect_u32_lt(r, range, "Out of range");
 		}
@@ -177,19 +177,19 @@ test_prng_range_u64(void) {
 }
 
 static void
-test_prng_range_zu(bool atomic) {
+test_prng_range_zu() {
 	size_t range;
 #define MAX_RANGE	10000000
 #define RANGE_STEP	97
 #define NREPS		10
 
 	for (range = 2; range < MAX_RANGE; range += RANGE_STEP) {
-		atomic_zu_t s;
+		size_t s;
 		unsigned rep;
 
-		atomic_store_zu(&s, range, ATOMIC_RELAXED);
+		s = range;
 		for (rep = 0; rep < NREPS; rep++) {
-			size_t r = prng_range_zu(&s, range, atomic);
+			size_t r = prng_range_zu(&s, range);
 
 			expect_zu_lt(r, range, "Out of range");
 		}

From 2a6ba121b5d7f83498265c3a630ba65e08f4b7e7 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 14 Aug 2020 09:23:42 -0700
Subject: [PATCH 1878/2608] PRNG test: cleanups.

Since we no longer have both atomic and non-atomic variants, there's no reason
to try to test both.
---
 test/unit/prng.c | 124 +++++++++++++++--------------------------------
 1 file changed, 38 insertions(+), 86 deletions(-)

diff --git a/test/unit/prng.c b/test/unit/prng.c
index baf43d96..a6d9b014 100644
--- a/test/unit/prng.c
+++ b/test/unit/prng.c
@@ -1,7 +1,6 @@
 #include "test/jemalloc_test.h"
 
-static void
-test_prng_lg_range_u32() {
+TEST_BEGIN(test_prng_lg_range_u32) {
 	uint32_t sa, sb;
 	uint32_t ra, rb;
 	unsigned lg_range;
@@ -35,10 +34,11 @@ test_prng_lg_range_u32() {
 		    "Expected high order bits of full-width result, "
 		    "lg_range=%u", lg_range);
 	}
-}
 
-static void
-test_prng_lg_range_u64(void) {
+}
+TEST_END
+
+TEST_BEGIN(test_prng_lg_range_u64) {
 	uint64_t sa, sb, ra, rb;
 	unsigned lg_range;
 
@@ -72,9 +72,9 @@ test_prng_lg_range_u64(void) {
 		    "lg_range=%u", lg_range);
 	}
 }
+TEST_END
 
-static void
-test_prng_lg_range_zu() {
+TEST_BEGIN(test_prng_lg_range_zu) {
 	size_t sa, sb;
 	size_t ra, rb;
 	unsigned lg_range;
@@ -109,129 +109,81 @@ test_prng_lg_range_zu() {
 		    lg_range)), "Expected high order bits of full-width "
 		    "result, lg_range=%u", lg_range);
 	}
-}
 
-TEST_BEGIN(test_prng_lg_range_u32_nonatomic) {
-	test_prng_lg_range_u32();
 }
 TEST_END
 
-TEST_BEGIN(test_prng_lg_range_u32_atomic) {
-	test_prng_lg_range_u32();
-}
-TEST_END
-
-TEST_BEGIN(test_prng_lg_range_u64_nonatomic) {
-	test_prng_lg_range_u64();
-}
-TEST_END
-
-TEST_BEGIN(test_prng_lg_range_zu_nonatomic) {
-	test_prng_lg_range_zu();
-}
-TEST_END
-
-TEST_BEGIN(test_prng_lg_range_zu_atomic) {
-	test_prng_lg_range_zu();
-}
-TEST_END
-
-static void
-test_prng_range_u32() {
+TEST_BEGIN(test_prng_range_u32) {
 	uint32_t range;
-#define MAX_RANGE	10000000
-#define RANGE_STEP	97
-#define NREPS		10
 
-	for (range = 2; range < MAX_RANGE; range += RANGE_STEP) {
+	const uint32_t max_range = 10000000;
+	const uint32_t range_step = 97;
+	const unsigned nreps = 10;
+
+	for (range = 2; range < max_range; range += range_step) {
 		uint32_t s;
 		unsigned rep;
 
 		s = range;
-		for (rep = 0; rep < NREPS; rep++) {
+		for (rep = 0; rep < nreps; rep++) {
 			uint32_t r = prng_range_u32(&s, range);
 
 			expect_u32_lt(r, range, "Out of range");
 		}
 	}
 }
+TEST_END
 
-static void
-test_prng_range_u64(void) {
+TEST_BEGIN(test_prng_range_u64) {
 	uint64_t range;
-#define MAX_RANGE	10000000
-#define RANGE_STEP	97
-#define NREPS		10
 
-	for (range = 2; range < MAX_RANGE; range += RANGE_STEP) {
+	const uint64_t max_range = 10000000;
+	const uint64_t range_step = 97;
+	const unsigned nreps = 10;
+
+	for (range = 2; range < max_range; range += range_step) {
 		uint64_t s;
 		unsigned rep;
 
 		s = range;
-		for (rep = 0; rep < NREPS; rep++) {
+		for (rep = 0; rep < nreps; rep++) {
 			uint64_t r = prng_range_u64(&s, range);
 
 			expect_u64_lt(r, range, "Out of range");
 		}
 	}
 }
+TEST_END
 
-static void
-test_prng_range_zu() {
+TEST_BEGIN(test_prng_range_zu) {
 	size_t range;
-#define MAX_RANGE	10000000
-#define RANGE_STEP	97
-#define NREPS		10
 
-	for (range = 2; range < MAX_RANGE; range += RANGE_STEP) {
+	const size_t max_range = 10000000;
+	const size_t range_step = 97;
+	const unsigned nreps = 10;
+
+
+	for (range = 2; range < max_range; range += range_step) {
 		size_t s;
 		unsigned rep;
 
 		s = range;
-		for (rep = 0; rep < NREPS; rep++) {
+		for (rep = 0; rep < nreps; rep++) {
 			size_t r = prng_range_zu(&s, range);
 
 			expect_zu_lt(r, range, "Out of range");
 		}
 	}
 }
-
-TEST_BEGIN(test_prng_range_u32_nonatomic) {
-	test_prng_range_u32(false);
-}
-TEST_END
-
-TEST_BEGIN(test_prng_range_u32_atomic) {
-	test_prng_range_u32(true);
-}
-TEST_END
-
-TEST_BEGIN(test_prng_range_u64_nonatomic) {
-	test_prng_range_u64();
-}
-TEST_END
-
-TEST_BEGIN(test_prng_range_zu_nonatomic) {
-	test_prng_range_zu(false);
-}
-TEST_END
-
-TEST_BEGIN(test_prng_range_zu_atomic) {
-	test_prng_range_zu(true);
-}
 TEST_END
 
 int
 main(void) {
-	return test(
-	    test_prng_lg_range_u32_nonatomic,
-	    test_prng_lg_range_u32_atomic,
-	    test_prng_lg_range_u64_nonatomic,
-	    test_prng_lg_range_zu_nonatomic,
-	    test_prng_lg_range_zu_atomic,
-	    test_prng_range_u32_nonatomic,
-	    test_prng_range_u32_atomic,
-	    test_prng_range_u64_nonatomic,
-	    test_prng_range_zu_nonatomic,
-	    test_prng_range_zu_atomic);
+	return test_no_reentrancy(
+	    test_prng_lg_range_u32,
+	    test_prng_lg_range_u64,
+	    test_prng_lg_range_zu,
+	    test_prng_range_u32,
+	    test_prng_range_u64,
+	    test_prng_range_zu);
 }

From 1ed7ec369f44beeb2dcc0e2ca21d7e947d8dd1b7 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 28 Sep 2020 15:52:36 -0700
Subject: [PATCH 1879/2608] Emap: Add emap_assert_not_mapped.

The counterpart to emap_assert_mapped, it lets callers check that some edata is
not already in the emap.
---
 include/jemalloc/internal/emap.h |  9 +++++++++
 src/emap.c                       | 13 +++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 9b925225..8b2c6ba0 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -127,6 +127,15 @@ emap_assert_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	}
 }
 
+/* Assert that the given edata isn't in the map. */
+void emap_do_assert_not_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata);
+static inline void
+emap_assert_not_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
+	if (config_debug) {
+		emap_do_assert_not_mapped(tsdn, emap, edata);
+	}
+}
+
 JEMALLOC_ALWAYS_INLINE edata_t *
 emap_edata_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr) {
 	rtree_ctx_t rtree_ctx_fallback;
diff --git a/src/emap.c b/src/emap.c
index 4e7ca8d0..537f5884 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -323,3 +323,16 @@ emap_do_assert_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	assert(rtree_read(tsdn, &emap->rtree, rtree_ctx,
 	    (uintptr_t)edata_base_get(edata)).edata == edata);
 }
+
+void
+emap_do_assert_not_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
+	emap_full_alloc_ctx_t context1 = {0};
+	emap_full_alloc_ctx_try_lookup(tsdn, emap, edata_base_get(edata),
+	    &context1);
+	assert(context1.edata == NULL);
+
+	emap_full_alloc_ctx_t context2 = {0};
+	emap_full_alloc_ctx_try_lookup(tsdn, emap, edata_last_get(edata),
+	    &context2);
+	assert(context2.edata == NULL);
+}

From 21b70cb540e0f9ff7d7ff20fa21772e96c2215b0 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 7 Aug 2020 17:47:13 -0700
Subject: [PATCH 1880/2608] Add hpa_central module

This will be the centralized component of the coming hugepage allocator; the
source of larger chunks of memory from which smaller ones can be obtained.
---
 Makefile.in                                   |   2 +
 include/jemalloc/internal/hpa_central.h       |  47 ++
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |   3 +
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |   3 +
 src/extent.c                                  |   9 +
 src/hpa_central.c                             | 206 ++++++++
 test/unit/hpa_central.c                       | 450 ++++++++++++++++++
 9 files changed, 722 insertions(+)
 create mode 100644 include/jemalloc/internal/hpa_central.h
 create mode 100644 src/hpa_central.c
 create mode 100644 test/unit/hpa_central.c

diff --git a/Makefile.in b/Makefile.in
index 4769d48f..ba0c80b6 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -119,6 +119,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/extent_mmap.c \
 	$(srcroot)src/geom_grow.c \
 	$(srcroot)src/hook.c \
+	$(srcroot)src/hpa_central.c \
 	$(srcroot)src/inspect.c \
 	$(srcroot)src/large.c \
 	$(srcroot)src/log.c \
@@ -210,6 +211,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/fork.c \
 	$(srcroot)test/unit/hash.c \
 	$(srcroot)test/unit/hook.c \
+	$(srcroot)test/unit/hpa_central.c \
 	$(srcroot)test/unit/huge.c \
 	$(srcroot)test/unit/inspect.c \
 	$(srcroot)test/unit/junk.c \
diff --git a/include/jemalloc/internal/hpa_central.h b/include/jemalloc/internal/hpa_central.h
new file mode 100644
index 00000000..b90ca41e
--- /dev/null
+++ b/include/jemalloc/internal/hpa_central.h
@@ -0,0 +1,47 @@
+#ifndef JEMALLOC_INTERNAL_HPA_CENTRAL_H
+#define JEMALLOC_INTERNAL_HPA_CENTRAL_H
+
+#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/emap.h"
+
+typedef struct hpa_central_s hpa_central_t;
+struct hpa_central_s {
+	/* The emap we use for metadata operations. */
+	emap_t *emap;
+
+	edata_cache_t *edata_cache;
+	eset_t eset;
+
+	size_t sn_next;
+};
+
+void hpa_central_init(hpa_central_t *central, edata_cache_t *edata_cache,
+    emap_t *emap);
+/*
+ * Tries to satisfy the given allocation request with an extent already given to
+ * central.
+ */
+edata_t *hpa_central_alloc_reuse(tsdn_t *tsdn, hpa_central_t *central,
+    size_t size_min, size_t size_goal);
+/*
+ * Adds the given edata to the central allocator as a new allocation.  The
+ * intent is that after a reuse attempt fails, the caller can allocate a new
+ * extent using whatever growth policy it prefers and allocate from that, giving
+ * the excess to the hpa_central_t (this is analogous to the
+ * extent_grow_retained functionality; we can allocate address space in
+ * exponentially growing chunks).
+ *
+ * The edata_t should come from the same base that this hpa was initialized
+ * with.  Only complete extents should be added (i.e. those for which the head
+ * bit is true, and for which their successor is either not owned by jemalloc
+ * or also has a head bit of true).  It should be active, large enough to
+ * satisfy the requested allocation, and not already in the emap.
+ *
+ * If this returns true, then we did not accept the extent, and took no action.
+ * Otherwise, modifies *edata to satisfy the allocation.
+ */
+bool hpa_central_alloc_grow(tsdn_t *tsdn, hpa_central_t *central,
+    size_t size, edata_t *to_add);
+void hpa_central_dalloc(tsdn_t *tsdn, hpa_central_t *central, edata_t *edata);
+
+#endif /* JEMALLOC_INTERNAL_HPA_CENTRAL_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 3200eaba..2dcc994a 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -58,6 +58,7 @@
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
+    <ClCompile Include="..\..\..\..\src\hpa_central.c" />
     <ClCompile Include="..\..\..\..\src\inspect.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
     <ClCompile Include="..\..\..\..\src\large.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 8d459804..81f39345 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -58,6 +58,9 @@
     <ClCompile Include="..\..\..\..\src\hook.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa_central.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\inspect.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 7badc63c..fd814c32 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -58,6 +58,7 @@
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
+    <ClCompile Include="..\..\..\..\src\hpa_central.c" />
     <ClCompile Include="..\..\..\..\src\inspect.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
     <ClCompile Include="..\..\..\..\src\large.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 8d459804..81f39345 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -58,6 +58,9 @@
     <ClCompile Include="..\..\..\..\src\hook.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa_central.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\inspect.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/extent.c b/src/extent.c
index 58ec8205..e9c76eb6 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -809,6 +809,15 @@ extent_can_coalesce(ecache_t *ecache, const edata_t *inner,
 		return false;
 	}
 
+	/*
+	 * We wouldn't really get into this situation because one or the other
+	 * edata would have to have a head bit set to true, but this is
+	 * conceptually correct and cheap.
+	 */
+	if (edata_pai_get(inner) != edata_pai_get(outer)) {
+		return false;
+	}
+
 	assert(edata_state_get(inner) == extent_state_active);
 	if (edata_state_get(outer) != ecache->state) {
 		return false;
diff --git a/src/hpa_central.c b/src/hpa_central.c
new file mode 100644
index 00000000..d1065951
--- /dev/null
+++ b/src/hpa_central.c
@@ -0,0 +1,206 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/hpa_central.h"
+
+void
+hpa_central_init(hpa_central_t *central, edata_cache_t *edata_cache,
+    emap_t *emap) {
+	central->emap = emap;
+	central->edata_cache = edata_cache;
+	eset_init(&central->eset, extent_state_dirty);
+	central->sn_next = 0;
+}
+
+/*
+ * Returns the trail, or NULL in case of failure (which can only occur in case
+ * of an emap operation failure; i.e. OOM).
+ */
+static edata_t *
+hpa_central_split(tsdn_t *tsdn, hpa_central_t *central, edata_t *edata,
+    size_t size) {
+	edata_t *trail = edata_cache_get(tsdn, central->edata_cache);
+	if (trail == NULL) {
+		return NULL;
+	}
+	size_t cursize = edata_size_get(edata);
+	edata_init(trail, edata_arena_ind_get(edata),
+	    (void *)((uintptr_t)edata_base_get(edata) + size), cursize - size,
+	    /* slab */ false, SC_NSIZES, edata_sn_get(edata),
+	    edata_state_get(edata), edata_zeroed_get(edata),
+	    edata_committed_get(edata), EXTENT_PAI_HPA, EXTENT_NOT_HEAD);
+
+	emap_prepare_t prepare;
+	bool err = emap_split_prepare(tsdn, central->emap, &prepare, edata,
+	    size, trail, cursize - size);
+	if (err) {
+		edata_cache_put(tsdn, central->edata_cache, trail);
+		return NULL;
+	}
+	emap_lock_edata2(tsdn, central->emap, edata, trail);
+	edata_size_set(edata, size);
+	emap_split_commit(tsdn, central->emap, &prepare, edata, size, trail,
+	    cursize - size);
+	emap_unlock_edata2(tsdn, central->emap, edata, trail);
+
+	return trail;
+}
+
+edata_t *
+hpa_central_alloc_reuse(tsdn_t *tsdn, hpa_central_t *central,
+    size_t size_min, size_t size_goal) {
+	assert((size_min & PAGE_MASK) == 0);
+	assert((size_goal & PAGE_MASK) == 0);
+
+	/*
+	 * Fragmentation avoidance is more important in the HPA than giving the
+	 * user their preferred amount of space, since we expect the average
+	 * unused extent to be more costly (PAC extents can get purged away
+	 * easily at any granularity; HPA extents are much more difficult to
+	 * purge away if they get stranded).  So we always search for the
+	 * earliest (in first-fit ordering) extent that can satisfy the request,
+	 * and use it, regardless of the goal size.
+	 */
+	edata_t *edata = eset_fit(&central->eset, size_min, PAGE,
+	    /* exact_only */ false, /* lg_max_fit */ SC_PTR_BITS);
+	if (edata == NULL) {
+		return NULL;
+	}
+
+	eset_remove(&central->eset, edata);
+	/* Maybe the first fit is also under the limit. */
+	if (edata_size_get(edata) <= size_goal) {
+		goto label_success;
+	}
+
+	/* Otherwise, split. */
+	edata_t *trail = hpa_central_split(tsdn, central, edata, size_goal);
+	if (trail == NULL) {
+		eset_insert(&central->eset, edata);
+		return NULL;
+	}
+	eset_insert(&central->eset, trail);
+
+label_success:
+	emap_assert_mapped(tsdn, central->emap, edata);
+	assert(edata_size_get(edata) >= size_min);
+	/*
+	 * We don't yet support purging in the hpa_central; everything should be
+	 * dirty.
+	 */
+	assert(edata_state_get(edata) == extent_state_dirty);
+	assert(edata_base_get(edata) == edata_addr_get(edata));
+	edata_state_set(edata, extent_state_active);
+	return edata;
+}
+
+bool
+hpa_central_alloc_grow(tsdn_t *tsdn, hpa_central_t *central,
+    size_t size, edata_t *edata) {
+	assert((size & PAGE_MASK) == 0);
+	assert(edata_base_get(edata) == edata_addr_get(edata));
+	assert(edata_size_get(edata) >= size);
+	assert(edata_arena_ind_get(edata)
+	    == base_ind_get(central->edata_cache->base));
+	assert(edata_is_head_get(edata));
+	assert(edata_state_get(edata) == extent_state_active);
+	assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
+	assert(edata_slab_get(edata) == false);
+	assert(edata_szind_get_maybe_invalid(edata) == SC_NSIZES);
+
+	/* edata should be a new alloc, and hence not already mapped. */
+	emap_assert_not_mapped(tsdn, central->emap, edata);
+
+	size_t cursize = edata_size_get(edata);
+
+	bool err = emap_register_boundary(tsdn, central->emap, edata, SC_NSIZES,
+	    /* slab */ false);
+	if (err) {
+		return true;
+	}
+	/* No splitting is necessary. */
+	if (cursize == size) {
+		size_t sn = central->sn_next++;
+		edata_sn_set(edata, sn);
+		return false;
+	}
+
+	/* We should split. */
+	edata_t *trail = hpa_central_split(tsdn, central, edata, size);
+	if (trail == NULL) {
+		emap_deregister_boundary(tsdn, central->emap, NULL);
+		return true;
+	}
+	size_t sn = central->sn_next++;
+	edata_sn_set(edata, sn);
+	edata_sn_set(trail, sn);
+
+	edata_state_set(trail, extent_state_dirty);
+	eset_insert(&central->eset, trail);
+	return false;
+}
+
+static edata_t *
+hpa_central_dalloc_get_merge_candidate(tsdn_t *tsdn, hpa_central_t *central,
+    void *addr) {
+	edata_t *edata = emap_lock_edata_from_addr(tsdn, central->emap, addr,
+	    /* inactive_only */ true);
+	if (edata == NULL) {
+		return NULL;
+	}
+	extent_pai_t pai = edata_pai_get(edata);
+	extent_state_t state = edata_state_get(edata);
+	emap_unlock_edata(tsdn, central->emap, edata);
+
+	if (pai != EXTENT_PAI_HPA) {
+		return NULL;
+	}
+	if (state == extent_state_active) {
+		return NULL;
+	}
+
+	return edata;
+}
+
+/* Merges b into a, freeing b back to the edata cache.. */
+static void
+hpa_central_dalloc_merge(tsdn_t *tsdn, hpa_central_t *central, edata_t *a,
+    edata_t *b) {
+	emap_prepare_t prepare;
+	emap_merge_prepare(tsdn, central->emap, &prepare, a, b);
+	emap_lock_edata2(tsdn, central->emap, a, b);
+	edata_size_set(a, edata_size_get(a) + edata_size_get(b));
+	emap_merge_commit(tsdn, central->emap, &prepare, a, b);
+	emap_unlock_edata2(tsdn, central->emap, a, b);
+	edata_cache_put(tsdn, central->edata_cache, b);
+}
+
+void
+hpa_central_dalloc(tsdn_t *tsdn, hpa_central_t *central, edata_t *edata) {
+	assert(edata_state_get(edata) == extent_state_active);
+
+	/*
+	 * These should really be called at the pa interface level, but
+	 * currently they're not.
+	 */
+	edata_addr_set(edata, edata_base_get(edata));
+	edata_zeroed_set(edata, false);
+
+	if (!edata_is_head_get(edata)) {
+		edata_t *lead = hpa_central_dalloc_get_merge_candidate(tsdn,
+		    central, edata_before_get(edata));
+		if (lead != NULL) {
+			eset_remove(&central->eset, lead);
+			hpa_central_dalloc_merge(tsdn, central, lead, edata);
+			edata = lead;
+		}
+	}
+	edata_t *trail = hpa_central_dalloc_get_merge_candidate(tsdn, central,
+	    edata_past_get(edata));
+	if (trail != NULL && !edata_is_head_get(trail)) {
+		eset_remove(&central->eset, trail);
+		hpa_central_dalloc_merge(tsdn, central, edata, trail);
+	}
+	edata_state_set(edata, extent_state_dirty);
+	eset_insert(&central->eset, edata);
+}
diff --git a/test/unit/hpa_central.c b/test/unit/hpa_central.c
new file mode 100644
index 00000000..f90b6e3c
--- /dev/null
+++ b/test/unit/hpa_central.c
@@ -0,0 +1,450 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/hpa_central.h"
+
+typedef struct test_data_s test_data_t;
+struct test_data_s {
+	/*
+	 * Must be the first member -- we convert back and forth between the
+	 * test_data_t and the hpa_central_t;
+	 */
+	hpa_central_t central;
+	base_t *base;
+	edata_cache_t edata_cache;
+	emap_t emap;
+};
+
+void
+create_test_data(hpa_central_t **r_central, base_t **r_base) {
+	bool err;
+	base_t *base = base_new(TSDN_NULL, /* ind */ 111,
+	    &ehooks_default_extent_hooks);
+	assert_ptr_not_null(base, "");
+
+	test_data_t *test_data = malloc(sizeof(test_data_t));
+	assert_ptr_not_null(test_data, "");
+
+	test_data->base = base;
+
+	err = edata_cache_init(&test_data->edata_cache, base);
+	assert_false(err, "");
+
+	err = emap_init(&test_data->emap, test_data->base,
+	    /* zeroed */ false);
+	assert_false(err, "");
+
+	hpa_central_init(&test_data->central, &test_data->edata_cache,
+	    &test_data->emap);
+
+	*r_central = (hpa_central_t *)test_data;
+	*r_base = base;
+}
+
+static void
+destroy_test_data(hpa_central_t *central) {
+	test_data_t *test_data = (test_data_t *)central;
+	base_delete(TSDN_NULL, test_data->base);
+	free(test_data);
+}
+
+static edata_t *
+test_edata(base_t *base, uintptr_t addr, size_t size) {
+	edata_t *edata = base_alloc_edata(TSDN_NULL, base);
+	assert_ptr_not_null(edata, "");
+	edata_init(edata, base_ind_get(base), (void *)addr,
+	    size, /* slab */ false, /* szind_t */ SC_NSIZES, /* sn */ 0,
+	    extent_state_active, /* zeroed */ true, /* comitted */ true,
+	    EXTENT_PAI_HPA, /* is_head */ true);
+	return edata;
+}
+
+static void
+edata_expect_alloc(base_t *base, edata_t *edata, uintptr_t addr, size_t size) {
+	expect_ptr_not_null(edata, "Alloc should have succeeded");
+	expect_u_eq(base_ind_get(base), edata_arena_ind_get(edata), "");
+	expect_u_eq(SC_NSIZES, edata_szind_get_maybe_invalid(edata), "");
+	expect_d_eq(extent_state_active, edata_state_get(edata), "");
+	assert_ptr_eq((void *)addr, edata_base_get(edata), "");
+	assert_zu_eq(size, edata_size_get(edata), "");
+}
+
+
+TEST_BEGIN(test_empty) {
+	hpa_central_t *central;
+	base_t *base;
+	create_test_data(&central, &base);
+
+	edata_t *edata;
+
+	edata = hpa_central_alloc_reuse(TSDN_NULL, central, PAGE, PAGE);
+	expect_ptr_null(edata, "Empty allocator succeed in its allocation");
+
+	edata = hpa_central_alloc_reuse(TSDN_NULL, central, PAGE, 2 * PAGE);
+	expect_ptr_null(edata, "Empty allocator succeed in its allocation");
+
+	edata = hpa_central_alloc_reuse(TSDN_NULL, central, PAGE, 8 * PAGE);
+	expect_ptr_null(edata, "Empty allocator succeed in its allocation");
+
+	edata = hpa_central_alloc_reuse(TSDN_NULL, central, 4 * PAGE, 8 * PAGE);
+	expect_ptr_null(edata, "Empty allocator succeed in its allocation");
+
+	destroy_test_data(central);
+}
+TEST_END
+
+TEST_BEGIN(test_first_fit_simple) {
+	hpa_central_t *central;
+	base_t *base;
+	create_test_data(&central, &base);
+
+	edata_t *edata1 = test_edata(base, 10 * PAGE, 10 * PAGE);
+	bool err = hpa_central_alloc_grow(TSDN_NULL, central, PAGE, edata1);
+	expect_false(err, "Unexpected grow failure");
+	edata_expect_alloc(base, edata1, 10 * PAGE, PAGE);
+
+	edata_t *edata2 = test_edata(base, 4 * PAGE, 1 * PAGE);
+	err = hpa_central_alloc_grow(TSDN_NULL, central, PAGE, edata2);
+	expect_false(err, "Unexpected grow failure");
+	edata_expect_alloc(base, edata2, 4 * PAGE, PAGE);
+
+	hpa_central_dalloc(TSDN_NULL, central, edata2);
+
+	/*
+	 * Even though there's a lower-addressed extent that a by-size search
+	 * will find earlier, we should still pick the earlier one.
+	 */
+	edata_t *edata3 = hpa_central_alloc_reuse(TSDN_NULL, central, PAGE, PAGE);
+	/*
+	 * Recall there's still an active page at the beginning of the extent
+	 * added at 10 * PAGE; the next allocation from it should be at 11 *
+	 * PAGE.
+	 */
+	edata_expect_alloc(base, edata3, 11 * PAGE, PAGE);
+
+	destroy_test_data(central);
+}
+TEST_END
+
+TEST_BEGIN(test_first_fit_large_goal) {
+	/*
+	 * See the comment in hpa_central_alloc_reuse; we should prefer an
+	 * earlier allocation over a later one, even if it means we fall short
+	 * of the goal size.
+	 */
+	hpa_central_t *central;
+	base_t *base;
+	create_test_data(&central, &base);
+
+	edata_t *edata1 = test_edata(base, 10 * PAGE, 10 * PAGE);
+	bool err = hpa_central_alloc_grow(TSDN_NULL, central, 2 * PAGE, edata1);
+	expect_false(err, "Unexpected grow failure");
+	edata_expect_alloc(base, edata1, 10 * PAGE, 2 * PAGE);
+
+	/* We need a page, but would like 2. */
+	edata_t *edata2 = hpa_central_alloc_reuse(TSDN_NULL, central, PAGE,
+	    2 * PAGE);
+	edata_expect_alloc(base, edata2, 12 * PAGE, 2 * PAGE);
+
+	hpa_central_dalloc(TSDN_NULL, central, edata1);
+
+	/*
+	 * Now, we have a 2-page inactive extent, then a 2-page active extent,
+	 * then a 6-page inactive extent.  If our minimum size is 2 but the goal
+	 * size is 4, we should still pick the first hole rather than the
+	 * second.
+	 */
+	edata1 = hpa_central_alloc_reuse(TSDN_NULL, central, 2 * PAGE, 4 * PAGE);
+	edata_expect_alloc(base, edata1, 10 * PAGE, 2 * PAGE);
+
+	/*
+	 * Make sure we didn't succeed only by forgetting about that last range
+	 * or something.
+	 */
+	edata_t *edata3 = hpa_central_alloc_reuse(TSDN_NULL, central, 4 * PAGE,
+	    4 * PAGE);
+	edata_expect_alloc(base, edata3, 14 * PAGE, 4 * PAGE);
+
+	destroy_test_data(central);
+}
+TEST_END
+
+TEST_BEGIN(test_merging) {
+	hpa_central_t *central;
+	base_t *base;
+	create_test_data(&central, &base);
+
+	/* Test an exact match */
+	bool err;
+	edata_t *edata1 = test_edata(base, 10 * PAGE, PAGE);
+	err = hpa_central_alloc_grow(TSDN_NULL, central, PAGE, edata1);
+	expect_false(err, "Alloc should have succeeded");
+	edata_expect_alloc(base, edata1, 10 * PAGE, PAGE);
+
+	edata_t *edata2 = hpa_central_alloc_reuse(TSDN_NULL, central, PAGE,
+	    PAGE);
+	expect_ptr_null(edata2, "Allocation should have failed");
+
+	/*
+	 * Create two more regions; one immediately before the first and one
+	 * immediately after.  The extents shouldn't get merged.
+	 */
+	edata2 = test_edata(base, 11 * PAGE, PAGE);
+	err = hpa_central_alloc_grow(TSDN_NULL, central, PAGE, edata2);
+	edata_expect_alloc(base, edata2, 11 * PAGE, PAGE);
+
+	edata_t *edata3 = test_edata(base, 12 * PAGE, 20 * PAGE);
+	err = hpa_central_alloc_grow(TSDN_NULL, central, PAGE, edata3);
+	edata_expect_alloc(base, edata3, 12 * PAGE, PAGE);
+
+	/*
+	 * OK, we've got 3 contiguous ranges; [10, 11), [11, 12), and [12, 22).
+	 * They shouldn't get merged though, even once freed.  We free the
+	 * middle range last to test merging (or rather, the lack thereof) in
+	 * both directions.
+	 */
+	hpa_central_dalloc(TSDN_NULL, central, edata1);
+	hpa_central_dalloc(TSDN_NULL, central, edata3);
+	hpa_central_dalloc(TSDN_NULL, central, edata2);
+
+	/*
+	 * A two-page range should only be satisfied by the third added region.
+	 */
+	edata_t *edata = hpa_central_alloc_reuse(TSDN_NULL, central, 2 * PAGE,
+	    2 * PAGE);
+	edata_expect_alloc(base, edata, 12 * PAGE, 2 * PAGE);
+	hpa_central_dalloc(TSDN_NULL, central, edata);
+
+	/* Same with a three-page range. */
+	edata = hpa_central_alloc_reuse(TSDN_NULL, central, 3 * PAGE, 3 * PAGE);
+	edata_expect_alloc(base, edata, 12 * PAGE, 3 * PAGE);
+	hpa_central_dalloc(TSDN_NULL, central, edata);
+
+	/* Let's try some cases that *should* get merged. */
+	edata1 = hpa_central_alloc_reuse(TSDN_NULL, central, 2 * PAGE, 2 * PAGE);
+	edata_expect_alloc(base, edata1, 12 * PAGE, 2 * PAGE);
+	edata2 = hpa_central_alloc_reuse(TSDN_NULL, central, 2 * PAGE, 2 * PAGE);
+	edata_expect_alloc(base, edata2, 14 * PAGE, 2 * PAGE);
+	edata3 = hpa_central_alloc_reuse(TSDN_NULL, central, 2 * PAGE, 2 * PAGE);
+	edata_expect_alloc(base, edata3, 16 * PAGE, 2 * PAGE);
+
+	/* Merge with predecessor. */
+	hpa_central_dalloc(TSDN_NULL, central, edata1);
+	hpa_central_dalloc(TSDN_NULL, central, edata2);
+	edata1 = hpa_central_alloc_reuse(TSDN_NULL, central, 4 * PAGE,
+	    4 * PAGE);
+	edata_expect_alloc(base, edata1, 12 * PAGE, 4 * PAGE);
+
+	/* Merge with successor */
+	hpa_central_dalloc(TSDN_NULL, central, edata3);
+	hpa_central_dalloc(TSDN_NULL, central, edata1);
+	edata1 = hpa_central_alloc_reuse(TSDN_NULL, central, 6 * PAGE,
+	    6 * PAGE);
+	edata_expect_alloc(base, edata1, 12 * PAGE, 6 * PAGE);
+	hpa_central_dalloc(TSDN_NULL, central, edata1);
+
+	/*
+	 * Let's try merging with both.  We need to get three adjacent
+	 * allocations again; do it the same way as before.
+	 */
+	edata1 = hpa_central_alloc_reuse(TSDN_NULL, central, 2 * PAGE, 2 * PAGE);
+	edata_expect_alloc(base, edata1, 12 * PAGE, 2 * PAGE);
+	edata2 = hpa_central_alloc_reuse(TSDN_NULL, central, 2 * PAGE, 2 * PAGE);
+	edata_expect_alloc(base, edata2, 14 * PAGE, 2 * PAGE);
+	edata3 = hpa_central_alloc_reuse(TSDN_NULL, central, 2 * PAGE, 2 * PAGE);
+	edata_expect_alloc(base, edata3, 16 * PAGE, 2 * PAGE);
+
+	hpa_central_dalloc(TSDN_NULL, central, edata1);
+	hpa_central_dalloc(TSDN_NULL, central, edata3);
+	hpa_central_dalloc(TSDN_NULL, central, edata2);
+
+	edata1 = hpa_central_alloc_reuse(TSDN_NULL, central, 6 * PAGE,
+	    6 * PAGE);
+	edata_expect_alloc(base, edata1, 12 * PAGE, 6 * PAGE);
+
+	destroy_test_data(central);
+}
+TEST_END
+
+TEST_BEGIN(test_stress_simple) {
+	hpa_central_t *central;
+	base_t *base;
+	create_test_data(&central, &base);
+
+	enum {
+		range_base = 1024 * PAGE,
+		range_pages = 256,
+		range_size = range_pages * PAGE
+	};
+
+	edata_t *edatas[range_pages];
+
+	bool err;
+	edata_t *range = test_edata(base, range_base, range_size);
+	err = hpa_central_alloc_grow(TSDN_NULL, central, PAGE, range);
+	expect_false(err, "Unexpected grow failure");
+	hpa_central_dalloc(TSDN_NULL, central, range);
+
+	for (size_t i = 0; i < range_pages; i++) {
+		edatas[i] = hpa_central_alloc_reuse(TSDN_NULL, central, PAGE,
+		    PAGE);
+		edata_expect_alloc(base, edatas[i], range_base + i * PAGE,
+		    PAGE);
+	}
+	/* Free up the odd indices. */
+	for (size_t i = 0; i < range_pages; i++) {
+		if (i % 2 == 0) {
+			continue;
+		}
+		hpa_central_dalloc(TSDN_NULL, central, edatas[i]);
+	}
+	/*
+	 * Reallocate them again.  Try it with a goal size that can't be
+	 * satisfied.
+	 */
+	for (size_t i = 0; i < range_pages; i++) {
+		if (i % 2 == 0) {
+			continue;
+		}
+		edatas[i] = hpa_central_alloc_reuse(TSDN_NULL, central, PAGE,
+		    PAGE);
+		edata_expect_alloc(base, edatas[i], range_base + i * PAGE,
+		    PAGE);
+	}
+	/*
+	 * In each batch of 8, create a free range of 4 pages and a free range
+	 * of 2 pages.
+	 */
+	for (size_t i = 0; i < range_pages; i += 8) {
+		hpa_central_dalloc(TSDN_NULL, central, edatas[i + 1]);
+		hpa_central_dalloc(TSDN_NULL, central, edatas[i + 2]);
+		hpa_central_dalloc(TSDN_NULL, central, edatas[i + 3]);
+		hpa_central_dalloc(TSDN_NULL, central, edatas[i + 4]);
+
+		hpa_central_dalloc(TSDN_NULL, central, edatas[i + 6]);
+		hpa_central_dalloc(TSDN_NULL, central, edatas[i + 7]);
+	}
+
+	/*
+	 * And allocate 3 pages into the first, and 2 pages into the second.  To
+	 * mix things up a little, lets get those amounts via goal sizes
+	 * instead.
+	 */
+	for (size_t i = 0; i < range_pages; i += 8) {
+		edatas[i + 1] = hpa_central_alloc_reuse(TSDN_NULL, central,
+		    2 * PAGE, 3 * PAGE);
+		edata_expect_alloc(base, edatas[i + 1],
+		    range_base + (i + 1) * PAGE, 3 * PAGE);
+
+		edatas[i + 6] = hpa_central_alloc_reuse(TSDN_NULL, central,
+		    2 * PAGE, 4 * PAGE);
+		edata_expect_alloc(base, edatas[i + 6],
+		    range_base + (i + 6) * PAGE, 2 * PAGE);
+	}
+
+	edata_t *edata = hpa_central_alloc_reuse(TSDN_NULL, central, 2 * PAGE,
+	    2 * PAGE);
+	expect_ptr_null(edata, "Should be no free ranges of 2 pages");
+
+	destroy_test_data(central);
+}
+TEST_END
+
+TEST_BEGIN(test_stress_random) {
+	const size_t range_length = 32 * PAGE;
+	const size_t range_base = 100 * PAGE;
+	const size_t size_max_pages = 16;
+
+	hpa_central_t *central;
+	base_t *base;
+	create_test_data(&central, &base);
+
+	/*
+	 * We loop through this once per some operations, so we don't want it to
+	 * get too big.
+	 */
+	const size_t nlive_edatas_max = 100;
+	size_t nlive_edatas = 0;
+	edata_t **live_edatas = calloc(nlive_edatas_max, sizeof(edata_t *));
+	size_t nranges = 0;
+
+	/*
+	 * Nothing special about this constant; we're only fixing it for
+	 * consistency across runs.
+	 */
+	size_t prng_state = (size_t)0x76999ffb014df07c;
+	for (size_t i = 0; i < 100 * 1000; i++) {
+		size_t operation = prng_range_zu(&prng_state, 2);
+		if (operation == 0) {
+			/* Do an alloc. */
+			if (nlive_edatas == nlive_edatas_max) {
+				continue;
+			}
+			size_t min_pages = 1 + prng_range_zu(
+			    &prng_state, size_max_pages);
+			size_t goal_pages = min_pages + prng_range_zu(
+			    &prng_state, size_max_pages - min_pages + 1);
+			edata_t *edata = hpa_central_alloc_reuse(TSDN_NULL,
+			    central, min_pages * PAGE, goal_pages * PAGE);
+			if (edata == NULL) {
+				edata = test_edata(base,
+				    range_base + range_length * nranges,
+				    range_length);
+				bool err = hpa_central_alloc_grow(TSDN_NULL,
+				    central, goal_pages * PAGE, edata);
+				assert_false(err, "Unexpected grow failure");
+				nranges++;
+			}
+			uintptr_t begin = (uintptr_t)edata_base_get(edata);
+			uintptr_t end = (uintptr_t)edata_last_get(edata);
+			size_t range_begin = (begin - range_base) / range_length;
+			size_t range_end = (end - range_base) / range_length;
+			expect_zu_eq(range_begin, range_end,
+			    "Should not have allocations spanning "
+			    "multiple ranges");
+			expect_zu_ge(begin, range_base,
+			    "Gave back a pointer outside of the reserved "
+			    "range");
+			expect_zu_lt(end, range_base + range_length * nranges,
+			    "Gave back a pointer outside of the reserved "
+			    "range");
+			for (size_t j = 0; j < nlive_edatas; j++) {
+				edata_t *other = live_edatas[j];
+				uintptr_t other_begin =
+				    (uintptr_t)edata_base_get(other);
+				uintptr_t other_end =
+				    (uintptr_t)edata_last_get(other);
+				expect_true(
+				    (begin < other_begin && end < other_begin)
+				    || (begin > other_end),
+				    "Gave back two extents that overlap");
+			}
+			live_edatas[nlive_edatas] = edata;
+			nlive_edatas++;
+		} else {
+			/* Do a free. */
+			if (nlive_edatas == 0) {
+				continue;
+			}
+			size_t victim = prng_range_zu(&prng_state,
+			    nlive_edatas);
+			edata_t *to_free = live_edatas[victim];
+			live_edatas[victim] = live_edatas[nlive_edatas - 1];
+			nlive_edatas--;
+			hpa_central_dalloc(TSDN_NULL, central, to_free);
+		}
+	}
+
+	free(live_edatas);
+	destroy_test_data(central);
+}
+TEST_END
+
+int main(void) {
+	return test_no_reentrancy(
+	    test_empty,
+	    test_first_fit_simple,
+	    test_first_fit_large_goal,
+	    test_merging,
+	    test_stress_simple,
+	    test_stress_random);
+}

From b971f7c4dda04ba26f9fb52709c7153cef27021c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 5 Oct 2020 17:39:01 -0700
Subject: [PATCH 1881/2608] Add "default" option to slab sizes.

This comes in handy when overriding earlier settings to test alternate ones.  We
don't really include tests for this, but I claim that's OK here:
- It's fairly straightforward
- It's fairly hard to test well
- This entire code path is undocumented and mostly for our internal
  experimentation in the first place.
- I tested manually.
---
 src/jemalloc.c | 4 ++++
 src/sc.c       | 2 --
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 0ca400e3..b21b2d9d 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1458,6 +1458,10 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 					   CONF_CHECK_MIN, CONF_CHECK_MAX,
 					   true);
 			if (CONF_MATCH("slab_sizes")) {
+				if (CONF_MATCH_VALUE("default")) {
+					sc_data_init(sc_data);
+					CONF_CONTINUE;
+				}
 				bool err;
 				const char *slab_size_segment_cur = v;
 				size_t vlen_left = vlen;
diff --git a/src/sc.c b/src/sc.c
index 1474eacc..37683ff4 100644
--- a/src/sc.c
+++ b/src/sc.c
@@ -257,8 +257,6 @@ size_classes(
 
 void
 sc_data_init(sc_data_t *sc_data) {
-	assert(!sc_data->initialized);
-
 	size_classes(sc_data, LG_SIZEOF_PTR, LG_QUANTUM, SC_LG_TINY_MIN,
 	    SC_LG_MAX_LOOKUP, LG_PAGE, SC_LG_NGROUP);
 

From a9aa6f6d0fd695d57a0fd1123da6099bb85132c3 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 12 Oct 2020 16:11:51 -0700
Subject: [PATCH 1882/2608] Fix the alloc_ctx check in free_fastpath.

The sanity check requires a functional TSD, which free_fastpath only guarantees
after the threshold branch.  Move the check function to afterwards.
---
 src/jemalloc.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index b21b2d9d..bbf62555 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2972,11 +2972,6 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 		/* This is a dead store, except when opt size checking is on. */
 		alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
 	}
-	bool fail = maybe_check_alloc_ctx(tsd, ptr, &alloc_ctx);
-	if (fail) {
-		/* See the comment in isfree. */
-		return true;
-	}
 
 	uint64_t deallocated, threshold;
 	te_free_fastpath_ctx(tsd, &deallocated, &threshold, size_hint);
@@ -2985,12 +2980,21 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 	uint64_t deallocated_after = deallocated + usize;
 	/*
 	 * Check for events and tsd non-nominal (fast_threshold will be set to
-	 * 0) in a single branch.
+	 * 0) in a single branch.  Note that this handles the uninitialized case
+	 * as well (TSD init will be triggered on the non-fastpath).  Therefore
+	 * anything depends on a functional TSD (e.g. the alloc_ctx sanity check
+	 * below) needs to be after this branch.
 	 */
 	if (unlikely(deallocated_after >= threshold)) {
 		return false;
 	}
 
+	bool fail = maybe_check_alloc_ctx(tsd, ptr, &alloc_ctx);
+	if (fail) {
+		/* See the comment in isfree. */
+		return true;
+	}
+
 	tcache_t *tcache = tcache_get_from_ind(tsd, TCACHE_IND_AUTOMATIC,
 	    /* slow */ false, /* is_alloc */ false);
 	cache_bin_t *bin = &tcache->bins[alloc_ctx.szind];

From be9548f2bef30b75294fdd0eb6721d1bf6e6a56a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 13 Oct 2020 12:40:34 -0700
Subject: [PATCH 1883/2608] Tcaches: Fix a subtle race condition.

Without a lock held continuously between checking tcaches_past and incrementing
it, it's possible for two threads to go down manual creation path
simultaneously.  If the number of tcaches is one less than the maximum, it's
possible for both to create a tcache and increment tcaches_past, with the second
thread returning a value larger than TCACHES_MAX.
---
 src/tcache.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index b681ee10..90ca372e 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -767,7 +767,7 @@ static bool
 tcaches_create_prep(tsd_t *tsd, base_t *base) {
 	bool err;
 
-	malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
+	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tcaches_mtx);
 
 	if (tcaches == NULL) {
 		tcaches = base_alloc(tsd_tsdn(tsd), base,
@@ -785,7 +785,6 @@ tcaches_create_prep(tsd_t *tsd, base_t *base) {
 
 	err = false;
 label_return:
-	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
 	return err;
 }
 
@@ -795,6 +794,8 @@ tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind) {
 
 	bool err;
 
+	malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
+
 	if (tcaches_create_prep(tsd, base)) {
 		err = true;
 		goto label_return;
@@ -807,7 +808,6 @@ tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind) {
 	}
 
 	tcaches_t *elm;
-	malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
 	if (tcaches_avail != NULL) {
 		elm = tcaches_avail;
 		tcaches_avail = tcaches_avail->next;
@@ -819,10 +819,10 @@ tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind) {
 		*r_ind = tcaches_past;
 		tcaches_past++;
 	}
-	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
 
 	err = false;
 label_return:
+	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
 	witness_assert_depth(tsdn_witness_tsdp_get(tsd_tsdn(tsd)), 0);
 	return err;
 }

From 3de19ba401bd752af37e4f235878f764c8ba55fb Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 14 Oct 2020 16:45:19 -0700
Subject: [PATCH 1884/2608] Eagerly detect double free and sized dealloc bugs
 for large sizes.

---
 Makefile.in                                 |  2 +-
 include/jemalloc/internal/arena_inlines_b.h | 35 ++++++++++++
 src/jemalloc.c                              |  2 +-
 src/tcache.c                                |  4 ++
 test/unit/double_free.c                     | 56 +++++++++++++++++++
 test/unit/double_free.h                     |  1 +
 test/unit/size_check.c                      | 59 +++++++++++++--------
 7 files changed, 136 insertions(+), 23 deletions(-)
 create mode 100644 test/unit/double_free.c
 create mode 100644 test/unit/double_free.h

diff --git a/Makefile.in b/Makefile.in
index ba0c80b6..008cffd8 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -204,6 +204,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/counter.c \
 	$(srcroot)test/unit/decay.c \
 	$(srcroot)test/unit/div.c \
+	$(srcroot)test/unit/double_free.c \
 	$(srcroot)test/unit/edata_cache.c \
 	$(srcroot)test/unit/emitter.c \
 	$(srcroot)test/unit/extent_quantize.c \
@@ -308,7 +309,6 @@ TESTS_STRESS := $(srcroot)test/stress/batch_alloc.c \
 	$(srcroot)test/stress/large_microbench.c \
 	$(srcroot)test/stress/mallctl.c \
 	$(srcroot)test/stress/microbench.c
-	
 
 
 TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_INTEGRATION_CPP) \
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 335c0797..7971b4c7 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -5,6 +5,7 @@
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/rtree.h"
+#include "jemalloc/internal/safety_check.h"
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/sz.h"
 #include "jemalloc/internal/ticker.h"
@@ -203,6 +204,32 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
 	return sz_index2size(full_alloc_ctx.szind);
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+large_dalloc_safety_checks(edata_t *edata, szind_t szind) {
+	if (!config_opt_safety_checks) {
+		return false;
+	}
+
+	/*
+	 * Eagerly detect double free and sized dealloc bugs for large sizes.
+	 * The cost is low enough (as edata will be accessed anyway) to be
+	 * enabled all the time.
+	 */
+	if (unlikely(edata_state_get(edata) != extent_state_active)) {
+		safety_check_fail("Invalid deallocation detected: "
+		    "pages being freed (%p) not currently active, "
+		    "possibly caused by double free bugs.",
+		    (uintptr_t)edata_addr_get(edata));
+		return true;
+	}
+	if (unlikely(sz_index2size(szind) != edata_usize_get(edata))) {
+		safety_check_fail_sized_dealloc(/* current_dealloc */ true);
+		return true;
+	}
+
+	return false;
+}
+
 static inline void
 arena_dalloc_large_no_tcache(tsdn_t *tsdn, void *ptr, szind_t szind) {
 	if (config_prof && unlikely(szind < SC_NBINS)) {
@@ -210,6 +237,10 @@ arena_dalloc_large_no_tcache(tsdn_t *tsdn, void *ptr, szind_t szind) {
 	} else {
 		edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
 		    ptr);
+		if (large_dalloc_safety_checks(edata, szind)) {
+			/* See the comment in isfree. */
+			return;
+		}
 		large_dalloc(tsdn, edata);
 	}
 }
@@ -250,6 +281,10 @@ arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
 	} else {
 		edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
 		    ptr);
+		if (large_dalloc_safety_checks(edata, szind)) {
+			/* See the comment in isfree. */
+			return;
+		}
 		large_dalloc(tsdn, edata);
 	}
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index bbf62555..1d6191ae 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2812,7 +2812,7 @@ maybe_check_alloc_ctx(tsd_t *tsd, void *ptr, emap_alloc_ctx_t *alloc_ctx) {
 		    &dbg_ctx);
 		if (alloc_ctx->szind != dbg_ctx.szind) {
 			safety_check_fail_sized_dealloc(
-			    /* curent_dealloc */ true);
+			    /* current_dealloc */ true);
 			return true;
 		}
 		if (alloc_ctx->slab != dbg_ctx.slab) {
diff --git a/src/tcache.c b/src/tcache.c
index 90ca372e..06efe66a 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -428,6 +428,10 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 					dalloc_count++;
 				}
 			} else {
+				if (large_dalloc_safety_checks(edata, binind)) {
+					/* See the comment in isfree. */
+					continue;
+				}
 				large_dalloc_finish(tsdn, edata);
 			}
 		}
diff --git a/test/unit/double_free.c b/test/unit/double_free.c
new file mode 100644
index 00000000..73155b9c
--- /dev/null
+++ b/test/unit/double_free.c
@@ -0,0 +1,56 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/safety_check.h"
+
+bool fake_abort_called;
+void fake_abort(const char *message) {
+	(void)message;
+	fake_abort_called = true;
+}
+
+void
+test_large_double_free_pre(void) {
+	safety_check_set_abort(&fake_abort);
+	fake_abort_called = false;
+}
+
+void
+test_large_double_free_post() {
+	expect_b_eq(fake_abort_called, true, "Double-free check didn't fire.");
+	safety_check_set_abort(NULL);
+}
+
+TEST_BEGIN(test_large_double_free_tcache) {
+	test_skip_if(!config_opt_safety_checks);
+	/*
+	 * Skip debug builds, since too many assertions will be triggered with
+	 * double-free before hitting the one we are interested in.
+	 */
+	test_skip_if(config_debug);
+
+	test_large_double_free_pre();
+	char *ptr = malloc(SC_LARGE_MINCLASS);
+	free(ptr);
+	free(ptr);
+	mallctl("thread.tcache.flush", NULL, NULL, NULL, 0);
+	test_large_double_free_post();
+}
+TEST_END
+
+TEST_BEGIN(test_large_double_free_no_tcache) {
+	test_skip_if(!config_opt_safety_checks);
+	test_skip_if(config_debug);
+
+	test_large_double_free_pre();
+	char *ptr = mallocx(SC_LARGE_MINCLASS, MALLOCX_TCACHE_NONE);
+	dallocx(ptr, MALLOCX_TCACHE_NONE);
+	dallocx(ptr, MALLOCX_TCACHE_NONE);
+	test_large_double_free_post();
+}
+TEST_END
+
+int
+main(void) {
+	return test(test_large_double_free_no_tcache,
+	    test_large_double_free_tcache);
+}
diff --git a/test/unit/double_free.h b/test/unit/double_free.h
new file mode 100644
index 00000000..8b137891
--- /dev/null
+++ b/test/unit/double_free.h
@@ -0,0 +1 @@
+
diff --git a/test/unit/size_check.c b/test/unit/size_check.c
index 3d2912df..accdc405 100644
--- a/test/unit/size_check.c
+++ b/test/unit/size_check.c
@@ -8,48 +8,65 @@ void fake_abort(const char *message) {
 	fake_abort_called = true;
 }
 
-#define SIZE1 SC_SMALL_MAXCLASS
-#define SIZE2 (SC_SMALL_MAXCLASS / 2)
+#define SMALL_SIZE1 SC_SMALL_MAXCLASS
+#define SMALL_SIZE2 (SC_SMALL_MAXCLASS / 2)
 
-TEST_BEGIN(test_invalid_size_sdallocx) {
-	test_skip_if(!config_opt_size_checks);
+#define LARGE_SIZE1 SC_LARGE_MINCLASS
+#define LARGE_SIZE2 (LARGE_SIZE1 * 2)
+
+void *
+test_invalid_size_pre(size_t sz) {
 	safety_check_set_abort(&fake_abort);
 
 	fake_abort_called = false;
-	void *ptr = malloc(SIZE1);
+	void *ptr = malloc(sz);
 	assert_ptr_not_null(ptr, "Unexpected failure");
-	sdallocx(ptr, SIZE2, 0);
-	expect_true(fake_abort_called, "Safety check didn't fire");
 
+	return ptr;
+}
+
+void
+test_invalid_size_post(void) {
+	expect_true(fake_abort_called, "Safety check didn't fire");
 	safety_check_set_abort(NULL);
 }
+
+TEST_BEGIN(test_invalid_size_sdallocx) {
+	test_skip_if(!config_opt_size_checks);
+
+	void *ptr = test_invalid_size_pre(SMALL_SIZE1);
+	sdallocx(ptr, SMALL_SIZE2, 0);
+	test_invalid_size_post();
+
+	ptr = test_invalid_size_pre(LARGE_SIZE1);
+	sdallocx(ptr, LARGE_SIZE2, 0);
+	test_invalid_size_post();
+}
 TEST_END
 
 TEST_BEGIN(test_invalid_size_sdallocx_nonzero_flag) {
 	test_skip_if(!config_opt_size_checks);
-	safety_check_set_abort(&fake_abort);
 
-	fake_abort_called = false;
-	void *ptr = malloc(SIZE1);
-	assert_ptr_not_null(ptr, "Unexpected failure");
-	sdallocx(ptr, SIZE2, MALLOCX_TCACHE_NONE);
-	expect_true(fake_abort_called, "Safety check didn't fire");
+	void *ptr = test_invalid_size_pre(SMALL_SIZE1);
+	sdallocx(ptr, SMALL_SIZE2, MALLOCX_TCACHE_NONE);
+	test_invalid_size_post();
 
-	safety_check_set_abort(NULL);
+	ptr = test_invalid_size_pre(LARGE_SIZE1);
+	sdallocx(ptr, LARGE_SIZE2, MALLOCX_TCACHE_NONE);
+	test_invalid_size_post();
 }
 TEST_END
 
 TEST_BEGIN(test_invalid_size_sdallocx_noflags) {
 	test_skip_if(!config_opt_size_checks);
-	safety_check_set_abort(&fake_abort);
 
-	fake_abort_called = false;
-	void *ptr = malloc(SIZE1);
-	assert_ptr_not_null(ptr, "Unexpected failure");
-	je_sdallocx_noflags(ptr, SIZE2);
-	expect_true(fake_abort_called, "Safety check didn't fire");
+	void *ptr = test_invalid_size_pre(SMALL_SIZE1);
+	je_sdallocx_noflags(ptr, SMALL_SIZE2);
+	test_invalid_size_post();
 
-	safety_check_set_abort(NULL);
+	ptr = test_invalid_size_pre(LARGE_SIZE1);
+	je_sdallocx_noflags(ptr, LARGE_SIZE2);
+	test_invalid_size_post();
 }
 TEST_END
 

From 5e41ff9b740258bddebcbd5575e1670a15f8b1ae Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 15 Oct 2020 16:37:16 -0700
Subject: [PATCH 1885/2608] Add a hard limit on tcache max size class.

For locality reasons, tcache bins are integrated in TSD.  Allowing all size
classes to be cached has little benefit, but takes up much thread local storage.
In addition, it complicates the layout which we try hard to optimize.
---
 doc/jemalloc.xml.in                        |  4 ++--
 include/jemalloc/internal/tcache_structs.h |  2 +-
 include/jemalloc/internal/tcache_types.h   |  5 +++++
 src/tcache.c                               | 12 ++++++------
 4 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index f283fd37..8e9a5d8f 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1314,8 +1314,8 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         </term>
         <listitem><para>Maximum size class (log base 2) to cache in the
         thread-specific cache (tcache).  At a minimum, all small size classes
-        are cached, and at a maximum all large size classes are cached.  The
-        default maximum is 32 KiB (2^15).</para></listitem>
+        are cached; and at a maximum, size classes up to 8 MiB can be cached.
+        The default maximum is 32 KiB (2^15).</para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.thp">
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index 331bd247..176d73de 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -54,7 +54,7 @@ struct tcache_slow_s {
 
 struct tcache_s {
 	tcache_slow_t	*tcache_slow;
-	cache_bin_t	bins[SC_NSIZES];
+	cache_bin_t	bins[TCACHE_NBINS_MAX];
 };
 
 /* Linkage for list of available (previously used) explicit tcache IDs. */
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index fb311e72..583677ea 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -27,4 +27,9 @@ typedef struct tcaches_s tcaches_t;
 /* Used for explicit tcache only. Means flushed but not destroyed. */
 #define TCACHES_ELM_NEED_REINIT ((tcache_t *)(uintptr_t)1)
 
+#define TCACHE_LG_MAXCLASS_LIMIT 23 /* tcache_maxclass = 8M */
+#define TCACHE_MAXCLASS_LIMIT ((size_t)1 << TCACHE_LG_MAXCLASS_LIMIT)
+#define TCACHE_NBINS_MAX (SC_NBINS + SC_NGROUP *			\
+    (TCACHE_LG_MAXCLASS_LIMIT - SC_LG_LARGE_MINCLASS) + 1)
+
 #endif /* JEMALLOC_INTERNAL_TCACHE_TYPES_H */
diff --git a/src/tcache.c b/src/tcache.c
index 06efe66a..63eddc2d 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -936,20 +936,20 @@ tcache_ncached_max_compute(szind_t szind) {
 bool
 tcache_boot(tsdn_t *tsdn, base_t *base) {
 	/* If necessary, clamp opt_lg_tcache_max. */
-	if (opt_lg_tcache_max < 0 || (ZU(1) << opt_lg_tcache_max) <
-	    SC_SMALL_MAXCLASS) {
+	tcache_maxclass = opt_lg_tcache_max < 0 ? 0 :
+	    ZU(1) << opt_lg_tcache_max;
+	if (tcache_maxclass < SC_SMALL_MAXCLASS) {
 		tcache_maxclass = SC_SMALL_MAXCLASS;
-	} else {
-		tcache_maxclass = (ZU(1) << opt_lg_tcache_max);
+	} else if (tcache_maxclass > TCACHE_MAXCLASS_LIMIT) {
+		tcache_maxclass = TCACHE_MAXCLASS_LIMIT;
 	}
+	nhbins = sz_size2index(tcache_maxclass) + 1;
 
 	if (malloc_mutex_init(&tcaches_mtx, "tcaches", WITNESS_RANK_TCACHES,
 	    malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 
-	nhbins = sz_size2index(tcache_maxclass) + 1;
-
 	/* Initialize tcache_bin_info. */
 	tcache_bin_info = (cache_bin_info_t *)base_alloc(tsdn, base,
 	    nhbins * sizeof(cache_bin_info_t), CACHELINE);

From 4ef5b8b4df3d4e2e534bbbdf558740f1056bc524 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 13 Oct 2020 15:18:35 -0700
Subject: [PATCH 1886/2608] Add a logo to doc_internal.

This is the logo from the jemalloc development team's snazzy windbreakers.  We
don't actually use it in any documentation yet, but there's no reason we
couldn't.  In the meantime, it's probably best if it exists somewhere more
stable than various email inboxes.
---
 doc_internal/jemalloc.svg | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 doc_internal/jemalloc.svg

diff --git a/doc_internal/jemalloc.svg b/doc_internal/jemalloc.svg
new file mode 100644
index 00000000..5e77327e
--- /dev/null
+++ b/doc_internal/jemalloc.svg
@@ -0,0 +1 @@
+<svg id="Layer_3" data-name="Layer 3" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" viewBox="0 0 499 184.27"><defs><style>.cls-1,.cls-3{fill:none;}.cls-2{clip-path:url(#clip-path);}.cls-3{stroke:#262262;stroke-linecap:round;stroke-linejoin:round;stroke-width:4px;}</style><clipPath id="clip-path" transform="translate(-100.66 -259.87)"><path class="cls-1" d="M144.57,396c0,18.2-9.37,27.83-37.33,23.55V400.1c11.11,2.14,12.18-.27,12.18-11.5V324.11h25Zm-12.71-78.66c-9,0-15.52-1.48-15.52-12.71S122.9,292,131.86,292s15.52,1.2,15.52,12.58C147.38,315.55,141,317.29,131.86,317.29Zm50.57,76.39c-30.64,0-35.85-18.86-35.85-35.59s5.61-35.32,35.72-35.32c35.32,0,33.44,28,33.44,40.67H170.12c.54,9.5,4,14.05,11.37,14.05,6.83,0,9.64-3.34,10-7.89l24.75.13C215.48,383.38,205.84,393.68,182.43,393.68Zm-1.47-55c-6.69,0-10,2.81-10.84,12h21.41C190.73,341.9,188.18,338.69,181,338.69Zm112.78,53.65V351.4c0-4.15-1.33-8.16-6-8.16-5,0-6,3.75-6,8.16v40.94H256.42V351.4c0-4.15-.81-8.16-5.89-8.16s-6.29,3.75-6.29,8.16v40.94H219.09V324.11h14l4.15,8c2.67-4.69,10.56-9.37,18.86-9.37,7.36,0,16.19,2.14,21,9.1,3.48-5.22,11.11-9.1,20.21-9.1,19.13,0,21.54,11.37,21.54,27.16v42.41Zm83.09,0L372.41,383c-5.48,7.22-13.11,10.7-24.35,10.7-14.85,0-26.75-6-26.75-19.93,0-15.26,12.44-20.88,44.28-23,0-9.5-1.61-12.57-8.83-12.57-6.69,0-8.56,3.48-8.56,9.9H323.45c0-12.85,6.82-25.29,32.64-25.29,30,0,34.65,14.45,34.65,31.17v38.4Zm-21.54-28.63c-6.29.94-8.3,4.28-8.3,7.36,0,4.28,2.41,6.69,8.3,6.69s10.17-4.82,10.17-15.12ZM396,392.34V297.75h24.75v94.59Zm30.77,0V297.75h24.75v94.59Zm62.21,1.34c-28.09,0-34.11-18.6-34.11-35.32s6.29-35.59,34.38-35.59c27.7,0,34.12,19,34.12,35.59C523.33,375.22,516.91,393.68,488.94,393.68Zm.27-50.84c-7.89,0-11.37,4.82-11.37,15.52s3.61,15.39,11.1,15.39c7.9,0,11.38-4.42,11.38-15.39C500.32,347.79,497.24,342.84,489.21,342.84Zm69.17,50.84c-28.9,0-34.52-18.6-34.52-35.32s5.76-35.59,34.12-35.59c21.14,0,34.52,10.84,34.52,31.17H568.42c0-9.23-5.49-11.23-10.17-11.23-7,0-11.11,4.54-11.11,15.38s4,15.52,11.11,15.52c4.81,0,10-2.41,10-10.57H592.5C592.5,383.38,579,393.68,558.38,393.68Z"/></clipPath></defs><title>jemalloc Final Logo</title><g class="cls-2"><line class="cls-3" x1="345" y1="182.27" x2="345" y2="2"/><line class="cls-3" x1="225" y1="182.27" x2="225" y2="2"/><line class="cls-3" x1="105" y1="182.27" x2="105" y2="2"/><line class="cls-3" x1="43" y1="182.27" x2="43" y2="2"/><line class="cls-3" x1="475" y1="182.27" x2="475" y2="2"/><line class="cls-3" x1="195" y1="182.27" x2="195" y2="2"/><line class="cls-3" x1="75" y1="182.27" x2="75" y2="2"/><line class="cls-3" x1="337" y1="182.27" x2="337" y2="2"/><line class="cls-3" x1="215" y1="182.27" x2="215" y2="2"/><line class="cls-3" x1="95" y1="182.27" x2="95" y2="2"/><line class="cls-3" x1="415" y1="182.27" x2="415" y2="2"/><line class="cls-3" x1="385" y1="182.27" x2="385" y2="2"/><line class="cls-3" x1="183" y1="182.27" x2="183" y2="2"/><line class="cls-3" x1="65" y1="182.27" x2="65" y2="2"/><line class="cls-3" x1="173" y1="182.27" x2="173" y2="2"/><line class="cls-3" x1="145" y1="182.27" x2="145" y2="2"/><line class="cls-3" x1="163" y1="182.27" x2="163" y2="2"/><line class="cls-3" x1="460" y1="182.27" x2="460" y2="2"/><line class="cls-3" x1="281" y1="182.27" x2="281" y2="2"/><line class="cls-3" x1="313" y1="182.27" x2="313" y2="2"/><line class="cls-3" x1="252" y1="182.27" x2="252" y2="2"/><line class="cls-3" x1="450" y1="182.27" x2="450" y2="2"/><line class="cls-3" x1="271" y1="182.27" x2="271" y2="2"/><line class="cls-3" x1="332" y1="182.27" x2="332" y2="2"/><line class="cls-3" x1="203" y1="182.27" x2="203" y2="2"/><line class="cls-3" x1="13" y1="182.27" x2="13" y2="2"/><line class="cls-3" x1="373" y1="182.27" x2="373" y2="2"/><line class="cls-3" x1="354" y1="182.27" x2="354" y2="2"/><line class="cls-3" x1="235" y1="182.27" x2="235" y2="2"/><line class="cls-3" x1="115" y1="182.27" x2="115" y2="2"/><line class="cls-3" x1="53" y1="182.27" x2="53" y2="2"/><line class="cls-3" x1="484" y1="182.27" x2="484" y2="2"/><line class="cls-3" x1="405" y1="182.27" x2="405" y2="2"/><line class="cls-3" x1="85" y1="182.27" x2="85" y2="2"/><line class="cls-3" x1="225" y1="182.27" x2="225" y2="2"/><line class="cls-3" x1="105" y1="182.27" x2="105" y2="2"/><line class="cls-3" x1="43" y1="182.27" x2="43" y2="2"/><line class="cls-3" x1="435" y1="182.27" x2="435" y2="2"/><line class="cls-3" x1="123" y1="182.27" x2="123" y2="2"/><line class="cls-3" x1="75" y1="182.27" x2="75" y2="2"/><line class="cls-3" x1="183" y1="182.27" x2="183" y2="2"/><line class="cls-3" x1="155" y1="182.27" x2="155" y2="2"/><line class="cls-3" x1="173" y1="182.27" x2="173" y2="2"/><line class="cls-3" x1="145" y1="182.27" x2="145" y2="2"/><line class="cls-3" x1="470" y1="182.27" x2="470" y2="2"/><line class="cls-3" x1="292" y1="182.27" x2="292" y2="2"/><line class="cls-3" x1="262" y1="182.27" x2="262" y2="2"/><line class="cls-3" x1="460" y1="182.27" x2="460" y2="2"/><line class="cls-3" x1="281" y1="182.27" x2="281" y2="2"/><line class="cls-3" x1="313" y1="182.27" x2="313" y2="2"/><line class="cls-3" x1="243" y1="182.27" x2="243" y2="2"/><line class="cls-3" x1="22" y1="182.27" x2="22" y2="2"/><line class="cls-3" x1="383" y1="182.27" x2="383" y2="2"/><line class="cls-3" x1="5" y1="182.27" x2="5" y2="2"/><line class="cls-3" x1="133" y1="182.27" x2="133" y2="2"/><line class="cls-3" x1="362" y1="182.27" x2="362" y2="2"/><line class="cls-3" x1="288" y1="182.27" x2="288" y2="2"/><line class="cls-3" x1="298" y1="182.27" x2="298" y2="2"/><line class="cls-3" x1="423" y1="182.27" x2="423" y2="2"/><line class="cls-3" x1="369" y1="182.27" x2="369" y2="2"/><line class="cls-3" x1="490" y1="182.27" x2="490" y2="2"/><line class="cls-3" x1="2" y1="182.27" x2="2" y2="2"/><line class="cls-3" x1="493" y1="182.27" x2="493" y2="2"/><line class="cls-3" x1="225" y1="182.27" x2="225" y2="2"/><line class="cls-3" x1="105" y1="182.27" x2="105" y2="2"/><line class="cls-3" x1="43" y1="182.27" x2="43" y2="2"/><line class="cls-3" x1="475" y1="182.27" x2="475" y2="2"/><line class="cls-3" x1="195" y1="182.27" x2="195" y2="2"/><line class="cls-3" x1="75" y1="182.27" x2="75" y2="2"/><line class="cls-3" x1="337" y1="182.27" x2="337" y2="2"/><line class="cls-3" x1="215" y1="182.27" x2="215" y2="2"/><line class="cls-3" x1="95" y1="182.27" x2="95" y2="2"/><line class="cls-3" x1="415" y1="182.27" x2="415" y2="2"/><line class="cls-3" x1="385" y1="182.27" x2="385" y2="2"/><line class="cls-3" x1="183" y1="182.27" x2="183" y2="2"/><line class="cls-3" x1="65" y1="182.27" x2="65" y2="2"/><line class="cls-3" x1="173" y1="182.27" x2="173" y2="2"/><line class="cls-3" x1="145" y1="182.27" x2="145" y2="2"/><line class="cls-3" x1="163" y1="182.27" x2="163" y2="2"/><line class="cls-3" x1="460" y1="182.27" x2="460" y2="2"/><line class="cls-3" x1="281" y1="182.27" x2="281" y2="2"/><line class="cls-3" x1="313" y1="182.27" x2="313" y2="2"/><line class="cls-3" x1="252" y1="182.27" x2="252" y2="2"/><line class="cls-3" x1="450" y1="182.27" x2="450" y2="2"/><line class="cls-3" x1="271" y1="182.27" x2="271" y2="2"/><line class="cls-3" x1="306" y1="182.27" x2="306" y2="2"/><line class="cls-3" x1="203" y1="182.27" x2="203" y2="2"/><line class="cls-3" x1="13" y1="182.27" x2="13" y2="2"/><line class="cls-3" x1="373" y1="182.27" x2="373" y2="2"/><line class="cls-3" x1="354" y1="182.27" x2="354" y2="2"/><line class="cls-3" x1="235" y1="182.27" x2="235" y2="2"/><line class="cls-3" x1="115" y1="182.27" x2="115" y2="2"/><line class="cls-3" x1="53" y1="182.27" x2="53" y2="2"/><line class="cls-3" x1="484" y1="182.27" x2="484" y2="2"/><line class="cls-3" x1="405" y1="182.27" x2="405" y2="2"/><line class="cls-3" x1="85" y1="182.27" x2="85" y2="2"/><line class="cls-3" x1="225" y1="182.27" x2="225" y2="2"/><line class="cls-3" x1="105" y1="182.27" x2="105" y2="2"/><line class="cls-3" x1="43" y1="182.27" x2="43" y2="2"/><line class="cls-3" x1="435" y1="182.27" x2="435" y2="2"/><line class="cls-3" x1="123" y1="182.27" x2="123" y2="2"/><line class="cls-3" x1="75" y1="182.27" x2="75" y2="2"/><line class="cls-3" x1="183" y1="182.27" x2="183" y2="2"/><line class="cls-3" x1="155" y1="182.27" x2="155" y2="2"/><line class="cls-3" x1="173" y1="182.27" x2="173" y2="2"/><line class="cls-3" x1="145" y1="182.27" x2="145" y2="2"/><line class="cls-3" x1="470" y1="182.27" x2="470" y2="2"/><line class="cls-3" x1="292" y1="182.27" x2="292" y2="2"/><line class="cls-3" x1="262" y1="182.27" x2="262" y2="2"/><line class="cls-3" x1="460" y1="182.27" x2="460" y2="2"/><line class="cls-3" x1="281" y1="182.27" x2="281" y2="2"/><line class="cls-3" x1="328" y1="182.27" x2="328" y2="2"/><line class="cls-3" x1="243" y1="182.27" x2="243" y2="2"/><line class="cls-3" x1="22" y1="182.27" x2="22" y2="2"/><line class="cls-3" x1="383" y1="182.27" x2="383" y2="2"/><line class="cls-3" x1="5" y1="182.27" x2="5" y2="2"/><line class="cls-3" x1="32" y1="182.27" x2="32" y2="2"/><line class="cls-3" x1="133" y1="182.27" x2="133" y2="2"/><line class="cls-3" x1="362" y1="182.27" x2="362" y2="2"/><line class="cls-3" x1="288" y1="182.27" x2="288" y2="2"/><line class="cls-3" x1="298" y1="182.27" x2="298" y2="2"/><line class="cls-3" x1="423" y1="182.27" x2="423" y2="2"/><line class="cls-3" x1="369" y1="182.27" x2="369" y2="2"/><line class="cls-3" x1="490" y1="182.27" x2="490" y2="2"/><line class="cls-3" x1="2" y1="182.27" x2="2" y2="2"/><line class="cls-3" x1="493" y1="182.27" x2="493" y2="2"/><line class="cls-3" x1="349" y1="182.27" x2="349" y2="2"/><line class="cls-3" x1="229" y1="182.27" x2="229" y2="2"/><line class="cls-3" x1="109" y1="182.27" x2="109" y2="2"/><line class="cls-3" x1="47" y1="182.27" x2="47" y2="2"/><line class="cls-3" x1="479" y1="182.27" x2="479" y2="2"/><line class="cls-3" x1="399" y1="182.27" x2="399" y2="2"/><line class="cls-3" x1="199" y1="182.27" x2="199" y2="2"/><line class="cls-3" x1="79" y1="182.27" x2="79" y2="2"/><line class="cls-3" x1="341" y1="182.27" x2="341" y2="2"/><line class="cls-3" x1="219" y1="182.27" x2="219" y2="2"/><line class="cls-3" x1="99" y1="182.27" x2="99" y2="2"/><line class="cls-3" x1="41" y1="182.27" x2="41" y2="2"/><line class="cls-3" x1="419" y1="182.27" x2="419" y2="2"/><line class="cls-3" x1="389" y1="182.27" x2="389" y2="2"/><line class="cls-3" x1="187" y1="182.27" x2="187" y2="2"/><line class="cls-3" x1="69" y1="182.27" x2="69" y2="2"/><line class="cls-3" x1="177" y1="182.27" x2="177" y2="2"/><line class="cls-3" x1="149" y1="182.27" x2="149" y2="2"/><line class="cls-3" x1="464" y1="182.27" x2="464" y2="2"/><line class="cls-3" x1="285" y1="182.27" x2="285" y2="2"/><line class="cls-3" x1="317" y1="182.27" x2="317" y2="2"/><line class="cls-3" x1="454" y1="182.27" x2="454" y2="2"/><line class="cls-3" x1="275" y1="182.27" x2="275" y2="2"/><line class="cls-3" x1="308" y1="182.27" x2="308" y2="2"/><line class="cls-3" x1="207" y1="182.27" x2="207" y2="2"/><line class="cls-3" x1="17" y1="182.27" x2="17" y2="2"/><line class="cls-3" x1="377" y1="182.27" x2="377" y2="2"/><line class="cls-3" x1="358" y1="182.27" x2="358" y2="2"/><line class="cls-3" x1="238" y1="182.27" x2="238" y2="2"/><line class="cls-3" x1="119" y1="182.27" x2="119" y2="2"/><line class="cls-3" x1="488" y1="182.27" x2="488" y2="2"/><line class="cls-3" x1="409" y1="182.27" x2="409" y2="2"/><line class="cls-3" x1="229" y1="182.27" x2="229" y2="2"/><line class="cls-3" x1="109" y1="182.27" x2="109" y2="2"/><line class="cls-3" x1="47" y1="182.27" x2="47" y2="2"/><line class="cls-3" x1="439" y1="182.27" x2="439" y2="2"/><line class="cls-3" x1="399" y1="182.27" x2="399" y2="2"/><line class="cls-3" x1="127" y1="182.27" x2="127" y2="2"/><line class="cls-3" x1="79" y1="182.27" x2="79" y2="2"/><line class="cls-3" x1="187" y1="182.27" x2="187" y2="2"/><line class="cls-3" x1="159" y1="182.27" x2="159" y2="2"/><line class="cls-3" x1="177" y1="182.27" x2="177" y2="2"/><line class="cls-3" x1="149" y1="182.27" x2="149" y2="2"/><line class="cls-3" x1="474" y1="182.27" x2="474" y2="2"/><line class="cls-3" x1="266" y1="182.27" x2="266" y2="2"/><line class="cls-3" x1="464" y1="182.27" x2="464" y2="2"/><line class="cls-3" x1="285" y1="182.27" x2="285" y2="2"/><line class="cls-3" x1="317" y1="182.27" x2="317" y2="2"/><line class="cls-3" x1="247" y1="182.27" x2="247" y2="2"/><line class="cls-3" x1="26" y1="182.27" x2="26" y2="2"/><line class="cls-3" x1="387" y1="182.27" x2="387" y2="2"/><line class="cls-3" x1="9" y1="182.27" x2="9" y2="2"/><line class="cls-3" x1="137" y1="182.27" x2="137" y2="2"/><line class="cls-3" x1="292" y1="182.27" x2="292" y2="2"/><line class="cls-3" x1="373" y1="182.27" x2="373" y2="2"/><line class="cls-3" x1="56" y1="182.27" x2="56" y2="2"/><line class="cls-3" x1="494" y1="182.27" x2="494" y2="2"/><line class="cls-3" x1="497" y1="182.27" x2="497" y2="2"/><line class="cls-3" x1="349" y1="182.27" x2="349" y2="2"/><line class="cls-3" x1="229" y1="182.27" x2="229" y2="2"/><line class="cls-3" x1="109" y1="182.27" x2="109" y2="2"/><line class="cls-3" x1="47" y1="182.27" x2="47" y2="2"/><line class="cls-3" x1="479" y1="182.27" x2="479" y2="2"/><line class="cls-3" x1="399" y1="182.27" x2="399" y2="2"/><line class="cls-3" x1="199" y1="182.27" x2="199" y2="2"/><line class="cls-3" x1="79" y1="182.27" x2="79" y2="2"/><line class="cls-3" x1="341" y1="182.27" x2="341" y2="2"/><line class="cls-3" x1="219" y1="182.27" x2="219" y2="2"/><line class="cls-3" x1="99" y1="182.27" x2="99" y2="2"/><line class="cls-3" x1="41" y1="182.27" x2="41" y2="2"/><line class="cls-3" x1="419" y1="182.27" x2="419" y2="2"/><line class="cls-3" x1="389" y1="182.27" x2="389" y2="2"/><line class="cls-3" x1="187" y1="182.27" x2="187" y2="2"/><line class="cls-3" x1="69" y1="182.27" x2="69" y2="2"/><line class="cls-3" x1="177" y1="182.27" x2="177" y2="2"/><line class="cls-3" x1="149" y1="182.27" x2="149" y2="2"/><line class="cls-3" x1="141" y1="182.27" x2="141" y2="2"/><line class="cls-3" x1="464" y1="182.27" x2="464" y2="2"/><line class="cls-3" x1="285" y1="182.27" x2="285" y2="2"/><line class="cls-3" x1="317" y1="182.27" x2="317" y2="2"/><line class="cls-3" x1="454" y1="182.27" x2="454" y2="2"/><line class="cls-3" x1="275" y1="182.27" x2="275" y2="2"/><line class="cls-3" x1="308" y1="182.27" x2="308" y2="2"/><line class="cls-3" x1="207" y1="182.27" x2="207" y2="2"/><line class="cls-3" x1="17" y1="182.27" x2="17" y2="2"/><line class="cls-3" x1="377" y1="182.27" x2="377" y2="2"/><line class="cls-3" x1="119" y1="182.27" x2="119" y2="2"/><line class="cls-3" x1="488" y1="182.27" x2="488" y2="2"/><line class="cls-3" x1="409" y1="182.27" x2="409" y2="2"/><line class="cls-3" x1="229" y1="182.27" x2="229" y2="2"/><line class="cls-3" x1="109" y1="182.27" x2="109" y2="2"/><line class="cls-3" x1="47" y1="182.27" x2="47" y2="2"/><line class="cls-3" x1="439" y1="182.27" x2="439" y2="2"/><line class="cls-3" x1="399" y1="182.27" x2="399" y2="2"/><line class="cls-3" x1="127" y1="182.27" x2="127" y2="2"/><line class="cls-3" x1="79" y1="182.27" x2="79" y2="2"/><line class="cls-3" x1="187" y1="182.27" x2="187" y2="2"/><line class="cls-3" x1="159" y1="182.27" x2="159" y2="2"/><line class="cls-3" x1="177" y1="182.27" x2="177" y2="2"/><line class="cls-3" x1="149" y1="182.27" x2="149" y2="2"/><line class="cls-3" x1="474" y1="182.27" x2="474" y2="2"/><line class="cls-3" x1="295" y1="182.27" x2="295" y2="2"/><line class="cls-3" x1="266" y1="182.27" x2="266" y2="2"/><line class="cls-3" x1="464" y1="182.27" x2="464" y2="2"/><line class="cls-3" x1="285" y1="182.27" x2="285" y2="2"/><line class="cls-3" x1="317" y1="182.27" x2="317" y2="2"/><line class="cls-3" x1="247" y1="182.27" x2="247" y2="2"/><line class="cls-3" x1="58" y1="182.27" x2="58" y2="2"/><line class="cls-3" x1="387" y1="182.27" x2="387" y2="2"/><line class="cls-3" x1="9" y1="182.27" x2="9" y2="2"/><line class="cls-3" x1="292" y1="182.27" x2="292" y2="2"/><line class="cls-3" x1="301" y1="182.27" x2="301" y2="2"/><line class="cls-3" x1="428" y1="182.27" x2="428" y2="2"/><line class="cls-3" x1="373" y1="182.27" x2="373" y2="2"/><line class="cls-3" x1="56" y1="182.27" x2="56" y2="2"/><line class="cls-3" x1="494" y1="182.27" x2="494" y2="2"/><line class="cls-3" x1="497" y1="182.27" x2="497" y2="2"/></g></svg>
\ No newline at end of file

From 5ba861715abde3a68f6ad73a54ccb41f39874ece Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 14 Oct 2020 11:02:39 -0700
Subject: [PATCH 1887/2608] Add thread name in prof last-N records

---
 src/prof_recent.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/prof_recent.c b/src/prof_recent.c
index cfaa5a68..b1aeef32 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -484,6 +484,12 @@ prof_recent_alloc_dump_node(emitter_t *emitter, prof_recent_t *node) {
 
 	emitter_json_kv(emitter, "alloc_thread_uid", emitter_type_uint64,
 	    &node->alloc_tctx->thr_uid);
+	prof_tdata_t *alloc_tdata = node->alloc_tctx->tdata;
+	assert(alloc_tdata != NULL);
+	if (alloc_tdata->thread_name != NULL) {
+		emitter_json_kv(emitter, "alloc_thread_name",
+		    emitter_type_string, &alloc_tdata->thread_name);
+	}
 	uint64_t alloc_time_ns = nstime_ns(&node->alloc_time);
 	emitter_json_kv(emitter, "alloc_time", emitter_type_uint64,
 	    &alloc_time_ns);
@@ -494,6 +500,12 @@ prof_recent_alloc_dump_node(emitter_t *emitter, prof_recent_t *node) {
 	if (released && node->dalloc_tctx != NULL) {
 		emitter_json_kv(emitter, "dalloc_thread_uid",
 		    emitter_type_uint64, &node->dalloc_tctx->thr_uid);
+		prof_tdata_t *dalloc_tdata = node->dalloc_tctx->tdata;
+		assert(dalloc_tdata != NULL);
+		if (dalloc_tdata->thread_name != NULL) {
+			emitter_json_kv(emitter, "dalloc_thread_name",
+			    emitter_type_string, &dalloc_tdata->thread_name);
+		}
 		assert(!nstime_equals_zero(&node->dalloc_time));
 		uint64_t dalloc_time_ns = nstime_ns(&node->dalloc_time);
 		emitter_json_kv(emitter, "dalloc_time", emitter_type_uint64,

From c8209150f9d219a137412b06431c9d52839c7272 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 19 Oct 2020 22:48:26 -0700
Subject: [PATCH 1888/2608] Switch from opt.lg_tcache_max to opt.tcache_max

Though for convenience, keep parsing lg_tcache_max.
---
 doc/jemalloc.xml.in                        | 16 +++++-----
 include/jemalloc/internal/tcache_externs.h |  2 +-
 src/ctl.c                                  |  6 ++--
 src/jemalloc.c                             | 35 +++++++++++++++++-----
 src/tcache.c                               | 14 +++------
 test/unit/mallctl.c                        |  2 +-
 test/unit/stats.c                          |  4 +--
 7 files changed, 47 insertions(+), 32 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 8e9a5d8f..e5f2aa67 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1301,21 +1301,23 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         a certain size.  Thread-specific caching allows many allocations to be
         satisfied without performing any thread synchronization, at the cost of
         increased memory use.  See the <link
-        linkend="opt.lg_tcache_max"><mallctl>opt.lg_tcache_max</mallctl></link>
+        linkend="opt.tcache_max"><mallctl>opt.tcache_max</mallctl></link>
         option for related tuning information.  This option is enabled by
         default.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="opt.lg_tcache_max">
+      <varlistentry id="opt.tcache_max">
         <term>
-          <mallctl>opt.lg_tcache_max</mallctl>
+          <mallctl>opt.tcache_max</mallctl>
           (<type>size_t</type>)
           <literal>r-</literal>
         </term>
-        <listitem><para>Maximum size class (log base 2) to cache in the
-        thread-specific cache (tcache).  At a minimum, all small size classes
-        are cached; and at a maximum, size classes up to 8 MiB can be cached.
-        The default maximum is 32 KiB (2^15).</para></listitem>
+        <listitem><para>Maximum size class to cache in the thread-specific cache
+        (tcache).  At a minimum, all small size classes are cached; and at a
+        maximum, size classes up to 8 MiB can be cached.  The default maximum is
+        32 KiB (2^15).  As a convenience, this may also be set by specifying
+        lg_tcache_max, which will be taken to be the base-2 logarithm of the
+        setting of tcache_max</para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.thp">
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index f044d322..95f3a682 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -2,7 +2,7 @@
 #define JEMALLOC_INTERNAL_TCACHE_EXTERNS_H
 
 extern bool opt_tcache;
-extern ssize_t opt_lg_tcache_max;
+extern size_t opt_tcache_max;
 extern ssize_t	opt_lg_tcache_nslots_mul;
 extern unsigned opt_tcache_nslots_small_min;
 extern unsigned opt_tcache_nslots_small_max;
diff --git a/src/ctl.c b/src/ctl.c
index aec3473e..db0e05f0 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -109,7 +109,7 @@ CTL_PROTO(opt_zero)
 CTL_PROTO(opt_utrace)
 CTL_PROTO(opt_xmalloc)
 CTL_PROTO(opt_tcache)
-CTL_PROTO(opt_lg_tcache_max)
+CTL_PROTO(opt_tcache_max)
 CTL_PROTO(opt_tcache_nslots_small_min)
 CTL_PROTO(opt_tcache_nslots_small_max)
 CTL_PROTO(opt_tcache_nslots_large)
@@ -362,7 +362,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("utrace"),	CTL(opt_utrace)},
 	{NAME("xmalloc"),	CTL(opt_xmalloc)},
 	{NAME("tcache"),	CTL(opt_tcache)},
-	{NAME("lg_tcache_max"),	CTL(opt_lg_tcache_max)},
+	{NAME("tcache_max"),	CTL(opt_tcache_max)},
 	{NAME("tcache_nslots_small_min"),
 		CTL(opt_tcache_nslots_small_min)},
 	{NAME("tcache_nslots_small_max"),
@@ -1837,7 +1837,7 @@ CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool)
 CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool)
 CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool)
 CTL_RO_NL_GEN(opt_tcache, opt_tcache, bool)
-CTL_RO_NL_GEN(opt_lg_tcache_max, opt_lg_tcache_max, ssize_t)
+CTL_RO_NL_GEN(opt_tcache_max, opt_tcache_max, size_t)
 CTL_RO_NL_GEN(opt_tcache_nslots_small_min, opt_tcache_nslots_small_min,
     unsigned)
 CTL_RO_NL_GEN(opt_tcache_nslots_small_max, opt_tcache_nslots_small_max,
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 1d6191ae..170b1723 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1170,15 +1170,18 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 #define CONF_DONT_CHECK_MAX(um, max)	false
 #define CONF_CHECK_MAX(um, max)	((um) > (max))
 
+#define CONF_VALUE_READ(max_t, result)					\
+	      char *end;						\
+	      set_errno(0);						\
+	      result = (max_t)malloc_strtoumax(v, &end, 0);
+#define CONF_VALUE_READ_FAIL()						\
+	      (get_errno() != 0 || (uintptr_t)end - (uintptr_t)v != vlen)
+
 #define CONF_HANDLE_T(t, max_t, o, n, min, max, check_min, check_max, clip) \
 			if (CONF_MATCH(n)) {				\
 				max_t mv;				\
-				char *end;				\
-									\
-				set_errno(0);				\
-				mv = (max_t)malloc_strtoumax(v, &end, 0); \
-				if (get_errno() != 0 || (uintptr_t)end -\
-				    (uintptr_t)v != vlen) {		\
+				CONF_VALUE_READ(max_t, mv)		\
+				if (CONF_VALUE_READ_FAIL()) {		\
 					CONF_ERROR("Invalid conf value",\
 					    k, klen, v, vlen);		\
 				} else if (clip) {			\
@@ -1379,8 +1382,24 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				CONF_HANDLE_BOOL(opt_xmalloc, "xmalloc")
 			}
 			CONF_HANDLE_BOOL(opt_tcache, "tcache")
-			CONF_HANDLE_SSIZE_T(opt_lg_tcache_max, "lg_tcache_max",
-			    -1, (sizeof(size_t) << 3) - 1)
+			CONF_HANDLE_SIZE_T(opt_tcache_max, "tcache_max",
+			    0, TCACHE_MAXCLASS_LIMIT, CONF_DONT_CHECK_MIN,
+			    CONF_CHECK_MAX, /* clip */ true)
+			if (CONF_MATCH("lg_tcache_max")) {
+				size_t m;
+				CONF_VALUE_READ(size_t, m)
+				if (CONF_VALUE_READ_FAIL()) {
+					CONF_ERROR("Invalid conf value",
+					    k, klen, v, vlen);
+				} else {
+					/* clip if necessary */
+					if (m > TCACHE_LG_MAXCLASS_LIMIT) {
+						m = TCACHE_LG_MAXCLASS_LIMIT;
+					}
+					opt_tcache_max = (size_t)1 << m;
+				}
+				CONF_CONTINUE;
+			}
 			/*
 			 * Anyone trying to set a value outside -16 to 16 is
 			 * deeply confused.
diff --git a/src/tcache.c b/src/tcache.c
index 63eddc2d..6bf1d309 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -11,11 +11,8 @@
 
 bool opt_tcache = true;
 
-/*
- * (1U << opt_lg_tcache_max) is used to compute tcache_maxclass.  This choice
- * (32kb by default) works well as a default in practice.
- */
-ssize_t opt_lg_tcache_max = 15;
+/* tcache_maxclass is set to 32KB by default.  */
+size_t opt_tcache_max = ((size_t)1) << 15;
 
 /* Reasonable defaults for min and max values. */
 unsigned opt_tcache_nslots_small_min = 20;
@@ -935,14 +932,11 @@ tcache_ncached_max_compute(szind_t szind) {
 
 bool
 tcache_boot(tsdn_t *tsdn, base_t *base) {
-	/* If necessary, clamp opt_lg_tcache_max. */
-	tcache_maxclass = opt_lg_tcache_max < 0 ? 0 :
-	    ZU(1) << opt_lg_tcache_max;
+	tcache_maxclass = sz_s2u(opt_tcache_max);
 	if (tcache_maxclass < SC_SMALL_MAXCLASS) {
 		tcache_maxclass = SC_SMALL_MAXCLASS;
-	} else if (tcache_maxclass > TCACHE_MAXCLASS_LIMIT) {
-		tcache_maxclass = TCACHE_MAXCLASS_LIMIT;
 	}
+	assert(tcache_maxclass <= TCACHE_MAXCLASS_LIMIT);
 	nhbins = sz_size2index(tcache_maxclass) + 1;
 
 	if (malloc_mutex_init(&tcaches_mtx, "tcaches", WITNESS_RANK_TCACHES,
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 3de56947..cf5c88e0 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -179,7 +179,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, xmalloc, xmalloc);
 	TEST_MALLCTL_OPT(bool, tcache, always);
 	TEST_MALLCTL_OPT(size_t, lg_extent_max_active_fit, always);
-	TEST_MALLCTL_OPT(size_t, lg_tcache_max, always);
+	TEST_MALLCTL_OPT(size_t, tcache_max, always);
 	TEST_MALLCTL_OPT(const char *, thp, always);
 	TEST_MALLCTL_OPT(const char *, zero_realloc, always);
 	TEST_MALLCTL_OPT(bool, prof, prof);
diff --git a/test/unit/stats.c b/test/unit/stats.c
index 20a32ddf..21a29a6f 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -393,7 +393,7 @@ test_tcache_bytes_for_usize(size_t usize) {
 TEST_BEGIN(test_stats_tcache_bytes_small) {
 	test_skip_if(!config_stats);
 	test_skip_if(!opt_tcache);
-	test_skip_if((ZU(1) << opt_lg_tcache_max) < SC_SMALL_MAXCLASS);
+	test_skip_if(opt_tcache_max < SC_SMALL_MAXCLASS);
 
 	test_tcache_bytes_for_usize(SC_SMALL_MAXCLASS);
 }
@@ -402,7 +402,7 @@ TEST_END
 TEST_BEGIN(test_stats_tcache_bytes_large) {
 	test_skip_if(!config_stats);
 	test_skip_if(!opt_tcache);
-	test_skip_if((ZU(1) << opt_lg_tcache_max) < SC_LARGE_MINCLASS);
+	test_skip_if(opt_tcache_max < SC_LARGE_MINCLASS);
 
 	test_tcache_bytes_for_usize(SC_LARGE_MINCLASS);
 }

From 1c7da3331795970c6049e5b526637bf692a4243e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 14 Aug 2020 13:36:41 -0700
Subject: [PATCH 1889/2608] HPA: Tie components into a PAI implementation.

---
 Makefile.in                                   |   2 +
 include/jemalloc/internal/arena_externs.h     |   1 +
 include/jemalloc/internal/hpa.h               |  92 ++++
 .../internal/jemalloc_internal_externs.h      |   1 +
 include/jemalloc/internal/pa.h                |  32 ++
 include/jemalloc/internal/psset.h             |   1 -
 include/jemalloc/internal/witness.h           |   8 +
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |   3 +
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |   3 +
 src/arena.c                                   |  16 +
 src/ctl.c                                     |   3 +
 src/hpa.c                                     | 447 ++++++++++++++++++
 src/hpa_central.c                             |   2 +
 src/jemalloc.c                                |  42 ++
 src/pa.c                                      |  70 ++-
 src/pa_extra.c                                |  12 +
 src/stats.c                                   |   1 +
 test/unit/arena_decay.c                       |   5 +
 test/unit/hpa.c                               | 235 +++++++++
 test/unit/mallctl.c                           |   1 +
 test/unit/prof_gdump.c                        |   1 +
 test/unit/retained.c                          |   1 +
 test/unit/stats.c                             |   2 +-
 25 files changed, 972 insertions(+), 11 deletions(-)
 create mode 100644 include/jemalloc/internal/hpa.h
 create mode 100644 src/hpa.c
 create mode 100644 test/unit/hpa.c

diff --git a/Makefile.in b/Makefile.in
index 008cffd8..67568f00 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -119,6 +119,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/extent_mmap.c \
 	$(srcroot)src/geom_grow.c \
 	$(srcroot)src/hook.c \
+	$(srcroot)src/hpa.c \
 	$(srcroot)src/hpa_central.c \
 	$(srcroot)src/inspect.c \
 	$(srcroot)src/large.c \
@@ -212,6 +213,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/fork.c \
 	$(srcroot)test/unit/hash.c \
 	$(srcroot)test/unit/hook.c \
+	$(srcroot)test/unit/hpa.c \
 	$(srcroot)test/unit/hpa_central.c \
 	$(srcroot)test/unit/huge.c \
 	$(srcroot)test/unit/inspect.c \
diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index a2fdff9f..9d4da31b 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -16,6 +16,7 @@ extern const char *percpu_arena_mode_names[];
 extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS];
 extern malloc_mutex_t arenas_lock;
 extern emap_t arena_emap_global;
+extern hpa_t arena_hpa_global;
 
 extern size_t opt_oversize_threshold;
 extern size_t oversize_threshold;
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
new file mode 100644
index 00000000..83f22033
--- /dev/null
+++ b/include/jemalloc/internal/hpa.h
@@ -0,0 +1,92 @@
+#ifndef JEMALLOC_INTERNAL_HPA_H
+#define JEMALLOC_INTERNAL_HPA_H
+
+#include "jemalloc/internal/geom_grow.h"
+#include "jemalloc/internal/hpa_central.h"
+#include "jemalloc/internal/pai.h"
+#include "jemalloc/internal/psset.h"
+
+typedef struct hpa_s hpa_t;
+struct hpa_s {
+	/*
+	 * We have two mutexes for the central allocator; mtx protects its
+	 * state, while grow_mtx protects controls the ability to grow the
+	 * backing store.  This prevents race conditions in which the central
+	 * allocator has exhausted its memory while mutiple threads are trying
+	 * to allocate.  If they all reserved more address space from the OS
+	 * without synchronization, we'd end consuming much more than necessary.
+	 */
+	malloc_mutex_t grow_mtx;
+	malloc_mutex_t mtx;
+	hpa_central_t central;
+	/* The arena ind we're associated with. */
+	unsigned ind;
+	/*
+	 * This edata cache is the global one that we use for new allocations in
+	 * growing; practically, it comes from a0.
+	 */
+	edata_cache_t *edata_cache;
+	geom_grow_t geom_grow;
+};
+
+typedef struct hpa_shard_s hpa_shard_t;
+struct hpa_shard_s {
+	/*
+	 * pai must be the first member; we cast from a pointer to it to a
+	 * pointer to the hpa_shard_t.
+	 */
+	pai_t pai;
+	malloc_mutex_t grow_mtx;
+	malloc_mutex_t mtx;
+	/*
+	 * This edata cache is the one we use when allocating a small extent
+	 * from a pageslab.  The pageslab itself comes from the centralized
+	 * allocator, and so will use its edata_cache.
+	 */
+	edata_cache_t *edata_cache;
+	hpa_t *hpa;
+	psset_t psset;
+
+	/*
+	 * When we're grabbing a new ps from the central allocator, how big
+	 * would we like it to be?  This is mostly about the level of batching
+	 * we use in our requests to the centralized allocator.
+	 */
+	size_t ps_goal;
+	/*
+	 * What's the maximum size we'll try to allocate out of the psset?  We
+	 * don't want this to be too large relative to ps_goal, as a
+	 * fragmentation avoidance measure.
+	 */
+	size_t ps_alloc_max;
+	/* The arena ind we're associated with. */
+	unsigned ind;
+};
+
+bool hpa_init(hpa_t *hpa, base_t *base, emap_t *emap,
+    edata_cache_t *edata_cache);
+bool hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa,
+    edata_cache_t *edata_cache, unsigned ind, size_t ps_goal,
+    size_t ps_alloc_max);
+void hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard);
+
+/*
+ * We share the fork ordering with the PA and arena prefork handling; that's why
+ * these are 2 and 3 rather than 0 or 1.
+ */
+void hpa_shard_prefork2(tsdn_t *tsdn, hpa_shard_t *shard);
+void hpa_shard_prefork3(tsdn_t *tsdn, hpa_shard_t *shard);
+void hpa_shard_postfork_parent(tsdn_t *tsdn, hpa_shard_t *shard);
+void hpa_shard_postfork_child(tsdn_t *tsdn, hpa_shard_t *shard);
+
+/*
+ * These should be acquired after all the shard locks in phase 4, but before any
+ * locks in phase 4.  The central HPA may acquire an edata cache mutex (of a0),
+ * so it needs to be lower in the witness ordering, but it's also logically
+ * global and not tied to any particular arena.
+ */
+void hpa_prefork3(tsdn_t *tsdn, hpa_t *hpa);
+void hpa_postfork_parent(tsdn_t *tsdn, hpa_t *hpa);
+void hpa_postfork_child(tsdn_t *tsdn, hpa_t *hpa);
+
+#endif /* JEMALLOC_INTERNAL_HPA_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 3e7124d5..c26153e3 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -12,6 +12,7 @@ extern bool malloc_slow;
 extern bool opt_abort;
 extern bool opt_abort_conf;
 extern bool opt_confirm_conf;
+extern bool opt_hpa;
 extern const char *opt_junk;
 extern bool opt_junk_alloc;
 extern bool opt_junk_free;
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index f6d0a7c3..7f73c274 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -6,6 +6,7 @@
 #include "jemalloc/internal/ecache.h"
 #include "jemalloc/internal/edata_cache.h"
 #include "jemalloc/internal/emap.h"
+#include "jemalloc/internal/hpa.h"
 #include "jemalloc/internal/lockedint.h"
 #include "jemalloc/internal/pac.h"
 #include "jemalloc/internal/pai.h"
@@ -66,12 +67,32 @@ struct pa_shard_s {
 	 */
 	atomic_zu_t nactive;
 
+	/*
+	 * Whether or not we should prefer the hugepage allocator.  Atomic since
+	 * it may be concurrently modified by a thread setting extent hooks.
+	 * Note that we still may do HPA operations in this arena; if use_hpa is
+	 * changed from true to false, we'll free back to the hugepage allocator
+	 * for those allocations.
+	 */
+	atomic_b_t use_hpa;
+	/*
+	 * If we never used the HPA to begin with, it wasn't initialized, and so
+	 * we shouldn't try to e.g. acquire its mutexes during fork.  This
+	 * tracks that knowledge.
+	 */
+	bool ever_used_hpa;
+
 	/* Allocates from a PAC. */
 	pac_t pac;
 
+	/* Allocates from a HPA. */
+	hpa_shard_t hpa_shard;
+
 	/* The source of edata_t objects. */
 	edata_cache_t edata_cache;
 
+	unsigned ind;
+
 	malloc_mutex_t *stats_mtx;
 	pa_shard_stats_t *stats;
 
@@ -98,6 +119,17 @@ bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
     unsigned ind, pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx,
     nstime_t *cur_time, ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms);
 
+/*
+ * This isn't exposed to users; we allow late enablement of the HPA shard so
+ * that we can boot without worrying about the HPA, then turn it on in a0.
+ */
+bool pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa);
+/*
+ * We stop using the HPA when custom extent hooks are installed, but still
+ * redirect deallocations to it.
+ */
+void pa_shard_disable_hpa(pa_shard_t *shard);
+
 /*
  * This does the PA-specific parts of arena reset (i.e. freeing all active
  * allocations).
diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index abbfc241..72ff240e 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -49,7 +49,6 @@ struct psset_s {
 
 void psset_init(psset_t *psset);
 
-
 /*
  * Tries to obtain a chunk from an existing pageslab already in the set.
  * Returns true on failure.
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 652afe65..686bf403 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -43,8 +43,16 @@ enum witness_rank_e {
 	WITNESS_RANK_CORE,
 	WITNESS_RANK_DECAY = WITNESS_RANK_CORE,
 	WITNESS_RANK_TCACHE_QL,
+
 	WITNESS_RANK_EXTENT_GROW,
+	WITNESS_RANK_HPA_SHARD_GROW = WITNESS_RANK_EXTENT_GROW,
+
 	WITNESS_RANK_EXTENTS,
+	WITNESS_RANK_HPA_SHARD = WITNESS_RANK_EXTENTS,
+
+	WITNESS_RANK_HPA_GROW,
+	WITNESS_RANK_HPA,
+
 	WITNESS_RANK_EDATA_CACHE,
 
 	WITNESS_RANK_EMAP,
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 2dcc994a..46e497ac 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -58,6 +58,7 @@
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
+    <ClCompile Include="..\..\..\..\src\hpa.c" />
     <ClCompile Include="..\..\..\..\src\hpa_central.c" />
     <ClCompile Include="..\..\..\..\src\inspect.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 81f39345..f46a92fa 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -58,6 +58,9 @@
     <ClCompile Include="..\..\..\..\src\hook.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hpa_central.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index fd814c32..dbf6f95a 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -58,6 +58,7 @@
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
+    <ClCompile Include="..\..\..\..\src\hpa.c" />
     <ClCompile Include="..\..\..\..\src\hpa_central.c" />
     <ClCompile Include="..\..\..\..\src\inspect.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 81f39345..f46a92fa 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -58,6 +58,9 @@
     <ClCompile Include="..\..\..\..\src\hook.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hpa_central.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/arena.c b/src/arena.c
index f8e8cba2..74f90ccc 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -37,6 +37,7 @@ static atomic_zd_t dirty_decay_ms_default;
 static atomic_zd_t muzzy_decay_ms_default;
 
 emap_t arena_emap_global;
+hpa_t arena_hpa_global;
 
 const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
 #define STEP(step, h, x, y)			\
@@ -1360,6 +1361,8 @@ arena_set_extent_hooks(tsd_t *tsd, arena_t *arena,
 		info = arena_background_thread_info_get(arena);
 		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
 	}
+	/* No using the HPA now that we have the custom hooks. */
+	pa_shard_disable_hpa(&arena->pa_shard);
 	extent_hooks_t *ret = base_extent_hooks_set(arena->base, extent_hooks);
 	if (have_background_thread) {
 		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
@@ -1516,6 +1519,19 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 
 	nstime_init_update(&arena->create_time);
 
+	/*
+	 * We turn on the HPA if set to.  There are two exceptions:
+	 * - Custom extent hooks (we should only return memory allocated from
+	 *   them in that case).
+	 * - Arena 0 initialization.  In this case, we're mid-bootstrapping, and
+	 *   so arena_hpa_global is not yet initialized.
+	 */
+	if (opt_hpa && ehooks_are_default(base_ehooks_get(base)) && ind != 0) {
+		if (pa_shard_enable_hpa(&arena->pa_shard, &arena_hpa_global)) {
+			goto label_error;
+		}
+	}
+
 	/* We don't support reentrancy for arena 0 bootstrapping. */
 	if (ind != 0) {
 		/*
diff --git a/src/ctl.c b/src/ctl.c
index db0e05f0..9e22e66c 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -90,6 +90,7 @@ CTL_PROTO(config_xmalloc)
 CTL_PROTO(opt_abort)
 CTL_PROTO(opt_abort_conf)
 CTL_PROTO(opt_confirm_conf)
+CTL_PROTO(opt_hpa)
 CTL_PROTO(opt_metadata_thp)
 CTL_PROTO(opt_retain)
 CTL_PROTO(opt_dss)
@@ -343,6 +344,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("abort"),		CTL(opt_abort)},
 	{NAME("abort_conf"),	CTL(opt_abort_conf)},
 	{NAME("confirm_conf"),	CTL(opt_confirm_conf)},
+	{NAME("hpa"),		CTL(opt_hpa)},
 	{NAME("metadata_thp"),	CTL(opt_metadata_thp)},
 	{NAME("retain"),	CTL(opt_retain)},
 	{NAME("dss"),		CTL(opt_dss)},
@@ -1816,6 +1818,7 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool)
 CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
 CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
 CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
+CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool)
 CTL_RO_NL_GEN(opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp],
     const char *)
 CTL_RO_NL_GEN(opt_retain, opt_retain, bool)
diff --git a/src/hpa.c b/src/hpa.c
new file mode 100644
index 00000000..842384bd
--- /dev/null
+++ b/src/hpa.c
@@ -0,0 +1,447 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/hpa.h"
+
+#include "jemalloc/internal/flat_bitmap.h"
+#include "jemalloc/internal/witness.h"
+
+static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
+    size_t alignment, bool zero);
+static bool hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    size_t old_size, size_t new_size, bool zero);
+static bool hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    size_t old_size, size_t new_size);
+static void hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata);
+
+bool
+hpa_init(hpa_t *hpa, base_t *base, emap_t *emap, edata_cache_t *edata_cache) {
+	bool err;
+
+	/*
+	 * We fundamentally rely on a address-space-hungry growth strategy for
+	 * hugepages.  This may change in the future, but for now we should have
+	 * refused to turn on any HPA at a higher level of the stack.
+	 */
+	assert(LG_SIZEOF_PTR == 3);
+
+	err = malloc_mutex_init(&hpa->grow_mtx, "hpa_grow", WITNESS_RANK_HPA_GROW,
+	    malloc_mutex_rank_exclusive);
+	if (err) {
+		return true;
+	}
+	err = malloc_mutex_init(&hpa->mtx, "hpa", WITNESS_RANK_HPA,
+	    malloc_mutex_rank_exclusive);
+	if (err) {
+		return true;
+	}
+
+	hpa_central_init(&hpa->central, edata_cache, emap);
+	if (err) {
+		return true;
+	}
+	hpa->ind = base_ind_get(base);
+	hpa->edata_cache = edata_cache;
+
+	geom_grow_init(&hpa->geom_grow);
+
+	return false;
+}
+
+bool
+hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa, edata_cache_t *edata_cache,
+    unsigned ind, size_t ps_goal, size_t ps_alloc_max) {
+	bool err;
+	err = malloc_mutex_init(&shard->grow_mtx, "hpa_shard_grow",
+	    WITNESS_RANK_HPA_SHARD_GROW, malloc_mutex_rank_exclusive);
+	if (err) {
+		return true;
+	}
+	err = malloc_mutex_init(&shard->mtx, "hpa_shard",
+	    WITNESS_RANK_HPA_SHARD, malloc_mutex_rank_exclusive);
+	if (err) {
+		return true;
+	}
+
+	shard->edata_cache = edata_cache;
+	shard->hpa = hpa;
+	psset_init(&shard->psset);
+	shard->ps_goal = ps_goal;
+	shard->ps_alloc_max = ps_alloc_max;
+
+	/*
+	 * Fill these in last, so that if an hpa_shard gets used despite
+	 * initialization failing, we'll at least crash instead of just
+	 * operating on corrupted data.
+	 */
+	shard->pai.alloc = &hpa_alloc;
+	shard->pai.expand = &hpa_expand;
+	shard->pai.shrink = &hpa_shrink;
+	shard->pai.dalloc = &hpa_dalloc;
+
+	shard->ind = ind;
+	assert(ind == base_ind_get(edata_cache->base));
+
+	return false;
+}
+
+static edata_t *
+hpa_alloc_central(tsdn_t *tsdn, hpa_shard_t *shard, size_t size_min,
+    size_t size_goal) {
+	bool err;
+	edata_t *edata;
+
+	hpa_t *hpa = shard->hpa;
+
+	malloc_mutex_lock(tsdn, &hpa->mtx);
+	edata = hpa_central_alloc_reuse(tsdn, &hpa->central, size_min,
+	    size_goal);
+	malloc_mutex_unlock(tsdn, &hpa->mtx);
+	if (edata != NULL) {
+		edata_arena_ind_set(edata, shard->ind);
+		return edata;
+	}
+	/* No existing range can satisfy the request; try to grow. */
+	malloc_mutex_lock(tsdn, &hpa->grow_mtx);
+
+	/*
+	 * We could have raced with other grow attempts; re-check to see if we
+	 * did, and are now able to satisfy the request.
+	 */
+	malloc_mutex_lock(tsdn, &hpa->mtx);
+	edata = hpa_central_alloc_reuse(tsdn, &hpa->central, size_min,
+	    size_goal);
+	malloc_mutex_unlock(tsdn, &hpa->mtx);
+	if (edata != NULL) {
+		malloc_mutex_unlock(tsdn, &hpa->grow_mtx);
+		edata_arena_ind_set(edata, shard->ind);
+		return edata;
+	}
+
+	/*
+	 * No such luck. We've dropped mtx, so other allocations can proceed
+	 * while we allocate the new extent.  We know no one else will grow in
+	 * the meantime, though, since we still hold grow_mtx.
+	 */
+	size_t alloc_size;
+	pszind_t skip;
+
+	size_t hugepage_goal_min = HUGEPAGE_CEILING(size_goal);
+
+	err = geom_grow_size_prepare(&hpa->geom_grow, hugepage_goal_min,
+	    &alloc_size, &skip);
+	if (err) {
+		malloc_mutex_unlock(tsdn, &hpa->grow_mtx);
+		return NULL;
+	}
+	alloc_size = HUGEPAGE_CEILING(alloc_size);
+
+	/*
+	 * Eventually, we need to think about this more systematically, and in
+	 * terms of extent hooks.  For now, though, we know we only care about
+	 * overcommitting systems, and we're not going to purge much.
+	 */
+	bool commit = true;
+	void *addr = pages_map(NULL, alloc_size, HUGEPAGE, &commit);
+	if (addr == NULL) {
+		malloc_mutex_unlock(tsdn, &hpa->grow_mtx);
+		return NULL;
+	}
+	err = pages_huge(addr, alloc_size);
+	/*
+	 * Ignore this for now; even if the allocation fails, the address space
+	 * should still be usable.
+	 */
+	(void)err;
+
+	edata = edata_cache_get(tsdn, hpa->edata_cache);
+	if (edata == NULL) {
+		malloc_mutex_unlock(tsdn, &hpa->grow_mtx);
+		pages_unmap(addr, alloc_size);
+		return NULL;
+	}
+
+	/*
+	 * The serial number here is just a placeholder; the hpa_central gets to
+	 * decide how it wants to fill it in.
+	 *
+	 * The grow edata is associated with the hpa_central_t arena ind; the
+	 * subsequent allocation we get (in the hpa_central_alloc_grow call
+	 * below) will be filled in with the shard ind.
+	 */
+	edata_init(edata, hpa->ind, addr, alloc_size, /* slab */ false,
+	    SC_NSIZES, /* sn */ 0, extent_state_active, /* zeroed */ true,
+	    /* comitted */ true, EXTENT_PAI_HPA, /* is_head */ true);
+
+	malloc_mutex_lock(tsdn, &hpa->mtx);
+	/* Note that this replace edata with the allocation to return. */
+	err = hpa_central_alloc_grow(tsdn, &hpa->central, size_goal, edata);
+	malloc_mutex_unlock(tsdn, &hpa->mtx);
+
+	if (!err) {
+		geom_grow_size_commit(&hpa->geom_grow, skip);
+	}
+	malloc_mutex_unlock(tsdn, &hpa->grow_mtx);
+	edata_arena_ind_set(edata, shard->ind);
+
+	if (err) {
+		pages_unmap(addr, alloc_size);
+		edata_cache_put(tsdn, hpa->edata_cache, edata);
+		return NULL;
+	}
+
+	return edata;
+}
+
+static edata_t *
+hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
+	assert(size < shard->ps_alloc_max);
+
+	bool err;
+	edata_t *edata = edata_cache_get(tsdn, shard->edata_cache);
+	if (edata == NULL) {
+		return NULL;
+	}
+	edata_arena_ind_set(edata, shard->ind);
+
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	err = psset_alloc_reuse(&shard->psset, edata, size);
+	malloc_mutex_unlock(tsdn, &shard->mtx);
+	if (!err) {
+		return edata;
+	}
+	/* Nothing in the psset works; we have to grow it. */
+	malloc_mutex_lock(tsdn, &shard->grow_mtx);
+
+	/* As above; check for grow races. */
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	err = psset_alloc_reuse(&shard->psset, edata, size);
+	malloc_mutex_unlock(tsdn, &shard->mtx);
+	if (!err) {
+		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
+		return edata;
+	}
+
+	edata_t *grow_edata = hpa_alloc_central(tsdn, shard, size,
+	    shard->ps_goal);
+	if (grow_edata == NULL) {
+		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
+		edata_cache_put(tsdn, shard->edata_cache, edata);
+		return NULL;
+	}
+	edata_arena_ind_set(grow_edata, shard->ind);
+	edata_slab_set(grow_edata, true);
+	fb_group_t *fb = edata_slab_data_get(grow_edata)->bitmap;
+	fb_init(fb, shard->ps_goal / PAGE);
+
+	/* We got the new edata; allocate from it. */
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	psset_alloc_new(&shard->psset, grow_edata, edata, size);
+	malloc_mutex_unlock(tsdn, &shard->mtx);
+
+	malloc_mutex_unlock(tsdn, &shard->grow_mtx);
+	return edata;
+}
+
+static hpa_shard_t *
+hpa_from_pai(pai_t *self) {
+	assert(self->alloc = &hpa_alloc);
+	assert(self->expand = &hpa_expand);
+	assert(self->shrink = &hpa_shrink);
+	assert(self->dalloc = &hpa_dalloc);
+	return (hpa_shard_t *)self;
+}
+
+static edata_t *
+hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
+    size_t alignment, bool zero) {
+
+	assert((size & PAGE_MASK) == 0);
+	/* We don't handle alignment or zeroing for now. */
+	if (alignment > PAGE || zero) {
+		return NULL;
+	}
+
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	hpa_shard_t *shard = hpa_from_pai(self);
+
+	edata_t *edata;
+	if (size <= shard->ps_alloc_max) {
+		edata = hpa_alloc_psset(tsdn, shard, size);
+		if (edata != NULL) {
+			emap_register_boundary(tsdn, shard->hpa->central.emap,
+			    edata, SC_NSIZES, /* slab */ false);
+		}
+	} else {
+		edata = hpa_alloc_central(tsdn, shard, size, size);
+	}
+
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+	if (edata != NULL) {
+		emap_assert_mapped(tsdn, shard->hpa->central.emap, edata);
+		assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
+		assert(edata_state_get(edata) == extent_state_active);
+		assert(edata_arena_ind_get(edata) == shard->ind);
+		assert(edata_szind_get_maybe_invalid(edata) == SC_NSIZES);
+		assert(!edata_slab_get(edata));
+		assert(edata_committed_get(edata));
+		assert(edata_base_get(edata) == edata_addr_get(edata));
+		assert(edata_base_get(edata) != NULL);
+	}
+	return edata;
+}
+
+static bool
+hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    size_t old_size, size_t new_size, bool zero) {
+	/* Expand not yet supported. */
+	return true;
+}
+
+static bool
+hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    size_t old_size, size_t new_size) {
+	/* Shrink not yet supported. */
+	return true;
+}
+
+static void
+hpa_dalloc_central(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) {
+	hpa_t *hpa = shard->hpa;
+
+	edata_arena_ind_set(edata, hpa->ind);
+	malloc_mutex_lock(tsdn, &hpa->mtx);
+	hpa_central_dalloc(tsdn, &hpa->central, edata);
+	malloc_mutex_unlock(tsdn, &hpa->mtx);
+}
+
+static void
+hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
+	hpa_shard_t *shard = hpa_from_pai(self);
+
+	edata_addr_set(edata, edata_base_get(edata));
+	edata_zeroed_set(edata, false);
+
+	assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
+	assert(edata_state_get(edata) == extent_state_active);
+	assert(edata_arena_ind_get(edata) == shard->ind);
+	assert(edata_szind_get_maybe_invalid(edata) == SC_NSIZES);
+	assert(!edata_slab_get(edata));
+	assert(edata_committed_get(edata));
+	assert(edata_base_get(edata) != NULL);
+
+	/*
+	 * There are two cases:
+	 * - The psset field is NULL.  In this case, the edata comes directly
+	 *   from the hpa_central_t and should be returned to it.
+	 * - THe psset field is not NULL, in which case we return the edata to
+	 *   the appropriate slab (which may in turn cause it to become empty,
+	 *   triggering an eviction of the whole slab, which should then be
+	 *   returned to the hpa_central_t).
+	 */
+	if (edata_ps_get(edata) != NULL) {
+		emap_deregister_boundary(tsdn, shard->hpa->central.emap, edata);
+
+		malloc_mutex_lock(tsdn, &shard->mtx);
+		edata_t *evicted_ps = psset_dalloc(&shard->psset, edata);
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+
+		edata_cache_put(tsdn, shard->edata_cache, edata);
+
+		if (evicted_ps != NULL) {
+			/*
+			 * The deallocation caused a pageslab to become empty.
+			 * Free it back to the centralized allocator.
+			 */
+			bool err = emap_register_boundary(tsdn,
+			    shard->hpa->central.emap, evicted_ps, SC_NSIZES,
+			    /* slab */ false);
+			/*
+			 * Registration can only fail on OOM, but the boundary
+			 * mappings should have been initialized during
+			 * allocation.
+			 */
+			assert(!err);
+			edata_slab_set(evicted_ps, false);
+			edata_ps_set(evicted_ps, NULL);
+
+			assert(edata_arena_ind_get(evicted_ps) == shard->ind);
+			hpa_dalloc_central(tsdn, shard, evicted_ps);
+		}
+	} else {
+		hpa_dalloc_central(tsdn, shard, edata);
+	}
+}
+
+static void
+hpa_shard_assert_stats_empty(psset_bin_stats_t *bin_stats) {
+	assert(bin_stats->npageslabs == 0);
+	assert(bin_stats->nactive == 0);
+	assert(bin_stats->ninactive == 0);
+}
+
+void
+hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
+	/*
+	 * By the time we're here, the arena code should have dalloc'd all the
+	 * active extents, which means we should have eventually evicted
+	 * everything from the psset, so it shouldn't be able to serve even a
+	 * 1-page allocation.
+	 */
+	if (config_debug) {
+		edata_t edata = {0};
+		malloc_mutex_lock(tsdn, &shard->mtx);
+		bool psset_empty = psset_alloc_reuse(&shard->psset, &edata,
+		    PAGE);
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+		assert(psset_empty);
+		hpa_shard_assert_stats_empty(&shard->psset.full_slab_stats);
+		for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
+			hpa_shard_assert_stats_empty(
+			    &shard->psset.slab_stats[i]);
+		}
+	}
+}
+
+void
+hpa_shard_prefork2(tsdn_t *tsdn, hpa_shard_t *shard) {
+	malloc_mutex_prefork(tsdn, &shard->grow_mtx);
+}
+
+void
+hpa_shard_prefork3(tsdn_t *tsdn, hpa_shard_t *shard) {
+	malloc_mutex_prefork(tsdn, &shard->mtx);
+}
+
+void
+hpa_shard_postfork_parent(tsdn_t *tsdn, hpa_shard_t *shard) {
+	malloc_mutex_postfork_parent(tsdn, &shard->grow_mtx);
+	malloc_mutex_postfork_parent(tsdn, &shard->mtx);
+}
+
+void
+hpa_shard_postfork_child(tsdn_t *tsdn, hpa_shard_t *shard) {
+	malloc_mutex_postfork_child(tsdn, &shard->grow_mtx);
+	malloc_mutex_postfork_child(tsdn, &shard->mtx);
+}
+
+void
+hpa_prefork3(tsdn_t *tsdn, hpa_t *hpa) {
+	malloc_mutex_prefork(tsdn, &hpa->grow_mtx);
+	malloc_mutex_prefork(tsdn, &hpa->mtx);
+}
+
+void
+hpa_postfork_parent(tsdn_t *tsdn, hpa_t *hpa) {
+	malloc_mutex_postfork_parent(tsdn, &hpa->grow_mtx);
+	malloc_mutex_postfork_parent(tsdn, &hpa->mtx);
+}
+
+void
+hpa_postfork_child(tsdn_t *tsdn, hpa_t *hpa) {
+	malloc_mutex_postfork_child(tsdn, &hpa->grow_mtx);
+	malloc_mutex_postfork_child(tsdn, &hpa->mtx);
+}
diff --git a/src/hpa_central.c b/src/hpa_central.c
index d1065951..a1895c87 100644
--- a/src/hpa_central.c
+++ b/src/hpa_central.c
@@ -79,6 +79,7 @@ hpa_central_alloc_reuse(tsdn_t *tsdn, hpa_central_t *central,
 		eset_insert(&central->eset, edata);
 		return NULL;
 	}
+	emap_assert_mapped(tsdn, central->emap, trail);
 	eset_insert(&central->eset, trail);
 
 label_success:
@@ -178,6 +179,7 @@ hpa_central_dalloc_merge(tsdn_t *tsdn, hpa_central_t *central, edata_t *a,
 void
 hpa_central_dalloc(tsdn_t *tsdn, hpa_central_t *central, edata_t *edata) {
 	assert(edata_state_get(edata) == extent_state_active);
+	assert(edata_ps_get(edata) == NULL);
 
 	/*
 	 * These should really be called at the pa interface level, but
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 170b1723..0dc685b2 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -133,6 +133,10 @@ unsigned	ncpus;
 
 /* Protects arenas initialization. */
 malloc_mutex_t arenas_lock;
+
+/* The global hpa, and whether it's on. */
+bool opt_hpa = false;
+
 /*
  * Arenas that are used to service external requests.  Not all elements of the
  * arenas array are necessarily used; arenas are created lazily as needed.
@@ -1476,6 +1480,7 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 					   opt_max_background_threads,
 					   CONF_CHECK_MIN, CONF_CHECK_MAX,
 					   true);
+			CONF_HANDLE_BOOL(opt_hpa, "hpa")
 			if (CONF_MATCH("slab_sizes")) {
 				if (CONF_MATCH_VALUE("default")) {
 					sc_data_init(sc_data);
@@ -1760,6 +1765,33 @@ malloc_init_hard_a0_locked() {
 		return true;
 	}
 	a0 = arena_get(TSDN_NULL, 0, false);
+
+	if (opt_hpa && LG_SIZEOF_PTR == 2) {
+		if (opt_abort_conf) {
+			malloc_printf("<jemalloc>: Hugepages not currently "
+			    "supported on 32-bit architectures; aborting.");
+		} else {
+			malloc_printf("<jemalloc>: Hugepages not currently "
+			    "supported on 32-bit architectures; disabling.");
+			opt_hpa = false;
+		}
+	} else if (opt_hpa) {
+		/*
+		 * The global HPA uses the edata cache from a0, and so needs to
+		 * be initialized specially, after a0 is.  The arena init code
+		 * handles this case specially, and does not turn on the HPA for
+		 * a0 when opt_hpa is true.  This lets us do global HPA
+		 * initialization against a valid a0.
+		 */
+		if (hpa_init(&arena_hpa_global, b0get(), &arena_emap_global,
+		    &a0->pa_shard.edata_cache)) {
+			return true;
+		}
+		if (pa_shard_enable_hpa(&a0->pa_shard, &arena_hpa_global)) {
+			return true;
+		}
+	}
+
 	malloc_init_state = malloc_init_a0_initialized;
 
 	return false;
@@ -4206,6 +4238,10 @@ _malloc_prefork(void)
 				}
 			}
 		}
+		if (i == 3 && opt_hpa) {
+			hpa_prefork3(tsd_tsdn(tsd), &arena_hpa_global);
+		}
+
 	}
 	prof_prefork1(tsd_tsdn(tsd));
 	stats_prefork(tsd_tsdn(tsd));
@@ -4244,6 +4280,9 @@ _malloc_postfork(void)
 			arena_postfork_parent(tsd_tsdn(tsd), arena);
 		}
 	}
+	if (opt_hpa) {
+		hpa_postfork_parent(tsd_tsdn(tsd), &arena_hpa_global);
+	}
 	prof_postfork_parent(tsd_tsdn(tsd));
 	if (have_background_thread) {
 		background_thread_postfork_parent(tsd_tsdn(tsd));
@@ -4274,6 +4313,9 @@ jemalloc_postfork_child(void) {
 			arena_postfork_child(tsd_tsdn(tsd), arena);
 		}
 	}
+	if (opt_hpa) {
+		hpa_postfork_child(tsd_tsdn(tsd), &arena_hpa_global);
+	}
 	prof_postfork_child(tsd_tsdn(tsd));
 	if (have_background_thread) {
 		background_thread_postfork_child(tsd_tsdn(tsd));
diff --git a/src/pa.c b/src/pa.c
index f068fd96..672db7b0 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -1,6 +1,8 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/hpa.h"
+
 static void
 pa_nactive_add(pa_shard_t *shard, size_t add_pages) {
 	atomic_fetch_add_zu(&shard->nactive, add_pages, ATOMIC_RELAXED);
@@ -21,12 +23,18 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 	if (edata_cache_init(&shard->edata_cache, base)) {
 		return true;
 	}
+
 	if (pac_init(tsdn, &shard->pac, base, emap, &shard->edata_cache,
 	    cur_time, dirty_decay_ms, muzzy_decay_ms, &stats->pac_stats,
 	    stats_mtx)) {
 		return true;
 	}
 
+	shard->ind = ind;
+
+	shard->ever_used_hpa = false;
+	atomic_store_b(&shard->use_hpa, false, ATOMIC_RELAXED);
+
 	atomic_store_zu(&shard->nactive, 0, ATOMIC_RELAXED);
 
 	shard->stats_mtx = stats_mtx;
@@ -39,6 +47,29 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 	return false;
 }
 
+bool
+pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa) {
+	/*
+	 * These are constants for now; eventually they'll probably be
+	 * tuneable.
+	 */
+	size_t ps_goal = 512 * 1024;
+	size_t ps_alloc_max = 256 * 1024;
+	if (hpa_shard_init(&shard->hpa_shard, hpa, &shard->edata_cache,
+	    shard->ind, ps_goal, ps_alloc_max)) {
+		return true;
+	}
+	shard->ever_used_hpa = true;
+	atomic_store_b(&shard->use_hpa, true, ATOMIC_RELAXED);
+
+	return false;
+}
+
+void
+pa_shard_disable_hpa(pa_shard_t *shard) {
+	atomic_store_b(&shard->use_hpa, false, ATOMIC_RELAXED);
+}
+
 void
 pa_shard_reset(pa_shard_t *shard) {
 	atomic_store_zu(&shard->nactive, 0, ATOMIC_RELAXED);
@@ -49,14 +80,30 @@ pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard) {
 	pac_destroy(tsdn, &shard->pac);
 }
 
+static pai_t *
+pa_get_pai(pa_shard_t *shard, edata_t *edata) {
+	return (edata_pai_get(edata) == EXTENT_PAI_PAC
+	    ? &shard->pac.pai : &shard->hpa_shard.pai);
+}
+
 edata_t *
 pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
     bool slab, szind_t szind, bool zero) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	edata_t *edata = pai_alloc(tsdn, &shard->pac.pai, size, alignment,
-	    zero);
+	edata_t *edata = NULL;
+	if (atomic_load_b(&shard->use_hpa, ATOMIC_RELAXED)) {
+		edata = pai_alloc(tsdn, &shard->hpa_shard.pai, size, alignment,
+		    zero);
+	}
+	/*
+	 * Fall back to the PAC if the HPA is off or couldn't serve the given
+	 * allocation request.
+	 */
+	if (edata == NULL) {
+		edata = pai_alloc(tsdn, &shard->pac.pai, size, alignment, zero);
+	}
 
 	if (edata != NULL) {
 		pa_nactive_add(shard, size >> LG_PAGE);
@@ -67,6 +114,9 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 			emap_register_interior(tsdn, shard->emap, edata, szind);
 		}
 	}
+	if (edata != NULL) {
+		assert(edata_arena_ind_get(edata) == shard->ind);
+	}
 	return edata;
 }
 
@@ -79,8 +129,9 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 
 	size_t expand_amount = new_size - old_size;
 
-	bool error = pai_expand(tsdn, &shard->pac.pai, edata, old_size,
-	    new_size, zero);
+	pai_t *pai = pa_get_pai(shard, edata);
+
+	bool error = pai_expand(tsdn, pai, edata, old_size, new_size, zero);
 	if (error) {
 		return true;
 	}
@@ -100,13 +151,13 @@ pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	size_t shrink_amount = old_size - new_size;
 
 	*generated_dirty = false;
-	bool error = pai_shrink(tsdn, &shard->pac.pai, edata, old_size,
-	    new_size);
+	pai_t *pai = pa_get_pai(shard, edata);
+	bool error = pai_shrink(tsdn, pai, edata, old_size, new_size);
 	if (error) {
 		return true;
 	}
 	pa_nactive_sub(shard, shrink_amount >> LG_PAGE);
-	*generated_dirty = true;
+	*generated_dirty = (edata_pai_get(edata) == EXTENT_PAI_PAC);
 
 	edata_szind_set(edata, szind);
 	emap_remap(tsdn, shard->emap, edata, szind, /* slab */ false);
@@ -123,8 +174,9 @@ pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
 	}
 	edata_szind_set(edata, SC_NSIZES);
 	pa_nactive_sub(shard, edata_size_get(edata) >> LG_PAGE);
-	pai_dalloc(tsdn, &shard->pac.pai, edata);
-	*generated_dirty = true;
+	pai_t *pai = pa_get_pai(shard, edata);
+	pai_dalloc(tsdn, pai, edata);
+	*generated_dirty = (edata_pai_get(edata) == EXTENT_PAI_PAC);
 }
 
 bool
diff --git a/src/pa_extra.c b/src/pa_extra.c
index 8bf54b96..402603ea 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -17,6 +17,9 @@ pa_shard_prefork0(tsdn_t *tsdn, pa_shard_t *shard) {
 void
 pa_shard_prefork2(tsdn_t *tsdn, pa_shard_t *shard) {
 	malloc_mutex_prefork(tsdn, &shard->pac.grow_mtx);
+	if (shard->ever_used_hpa) {
+		hpa_shard_prefork2(tsdn, &shard->hpa_shard);
+	}
 }
 
 void
@@ -24,6 +27,9 @@ pa_shard_prefork3(tsdn_t *tsdn, pa_shard_t *shard) {
 	ecache_prefork(tsdn, &shard->pac.ecache_dirty);
 	ecache_prefork(tsdn, &shard->pac.ecache_muzzy);
 	ecache_prefork(tsdn, &shard->pac.ecache_retained);
+	if (shard->ever_used_hpa) {
+		hpa_shard_prefork3(tsdn, &shard->hpa_shard);
+	}
 }
 
 void
@@ -40,6 +46,9 @@ pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard) {
 	malloc_mutex_postfork_parent(tsdn, &shard->pac.grow_mtx);
 	malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_dirty.mtx);
 	malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_muzzy.mtx);
+	if (shard->ever_used_hpa) {
+		hpa_shard_postfork_parent(tsdn, &shard->hpa_shard);
+	}
 }
 
 void
@@ -51,6 +60,9 @@ pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard) {
 	malloc_mutex_postfork_child(tsdn, &shard->pac.grow_mtx);
 	malloc_mutex_postfork_child(tsdn, &shard->pac.decay_dirty.mtx);
 	malloc_mutex_postfork_child(tsdn, &shard->pac.decay_muzzy.mtx);
+	if (shard->ever_used_hpa) {
+		hpa_shard_postfork_child(tsdn, &shard->hpa_shard);
+	}
 }
 
 void
diff --git a/src/stats.c b/src/stats.c
index 407b60cc..b2ec57b7 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1095,6 +1095,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_UNSIGNED("narenas")
 	OPT_WRITE_CHAR_P("percpu_arena")
 	OPT_WRITE_SIZE_T("oversize_threshold")
+	OPT_WRITE_BOOL("hpa")
 	OPT_WRITE_CHAR_P("metadata_thp")
 	OPT_WRITE_BOOL_MUTABLE("background_thread", "background_thread")
 	OPT_WRITE_SSIZE_T_MUTABLE("dirty_decay_ms", "arenas.dirty_decay_ms")
diff --git a/test/unit/arena_decay.c b/test/unit/arena_decay.c
index 86f7057d..555f71ae 100644
--- a/test/unit/arena_decay.c
+++ b/test/unit/arena_decay.c
@@ -185,6 +185,7 @@ generate_dirty(unsigned arena_ind, size_t size) {
 
 TEST_BEGIN(test_decay_ticks) {
 	test_skip_if(check_background_thread_enabled());
+	test_skip_if(opt_hpa);
 
 	ticker_t *decay_ticker;
 	unsigned tick0, tick1, arena_ind;
@@ -424,6 +425,7 @@ decay_ticker_helper(unsigned arena_ind, int flags, bool dirty, ssize_t dt,
 
 TEST_BEGIN(test_decay_ticker) {
 	test_skip_if(check_background_thread_enabled());
+	test_skip_if(opt_hpa);
 #define NPS 2048
 	ssize_t ddt = opt_dirty_decay_ms;
 	ssize_t mdt = opt_muzzy_decay_ms;
@@ -485,6 +487,7 @@ TEST_END
 
 TEST_BEGIN(test_decay_nonmonotonic) {
 	test_skip_if(check_background_thread_enabled());
+	test_skip_if(opt_hpa);
 #define NPS (SMOOTHSTEP_NSTEPS + 1)
 	int flags = (MALLOCX_ARENA(0) | MALLOCX_TCACHE_NONE);
 	void *ps[NPS];
@@ -542,6 +545,7 @@ TEST_END
 
 TEST_BEGIN(test_decay_now) {
 	test_skip_if(check_background_thread_enabled());
+	test_skip_if(opt_hpa);
 
 	unsigned arena_ind = do_arena_create(0, 0);
 	expect_zu_eq(get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages");
@@ -562,6 +566,7 @@ TEST_END
 
 TEST_BEGIN(test_decay_never) {
 	test_skip_if(check_background_thread_enabled() || !config_stats);
+	test_skip_if(opt_hpa);
 
 	unsigned arena_ind = do_arena_create(-1, -1);
 	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
new file mode 100644
index 00000000..8b319b9e
--- /dev/null
+++ b/test/unit/hpa.c
@@ -0,0 +1,235 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/hpa.h"
+
+#define HPA_IND 111
+#define SHARD_IND 222
+
+#define PS_GOAL (128 * PAGE)
+#define PS_ALLOC_MAX (64 * PAGE)
+
+typedef struct test_data_s test_data_t;
+struct test_data_s {
+	/*
+	 * Must be the first member -- we convert back and forth between the
+	 * test_data_t and the hpa_shard_t;
+	 */
+	hpa_shard_t shard;
+	base_t *shard_base;
+	edata_cache_t shard_edata_cache;
+
+	hpa_t hpa;
+	base_t *hpa_base;
+	edata_cache_t hpa_edata_cache;
+
+	emap_t emap;
+};
+
+static hpa_shard_t *
+create_test_data() {
+	bool err;
+	base_t *shard_base = base_new(TSDN_NULL, /* ind */ SHARD_IND,
+	    &ehooks_default_extent_hooks);
+	assert_ptr_not_null(shard_base, "");
+
+	base_t *hpa_base = base_new(TSDN_NULL, /* ind */ HPA_IND,
+	    &ehooks_default_extent_hooks);
+	assert_ptr_not_null(hpa_base, "");
+
+	test_data_t *test_data = malloc(sizeof(test_data_t));
+	assert_ptr_not_null(test_data, "");
+
+	test_data->shard_base = shard_base;
+	test_data->hpa_base = hpa_base;
+
+	err = edata_cache_init(&test_data->shard_edata_cache, shard_base);
+	assert_false(err, "");
+
+	err = edata_cache_init(&test_data->hpa_edata_cache, hpa_base);
+	assert_false(err, "");
+
+	err = emap_init(&test_data->emap, test_data->hpa_base,
+	    /* zeroed */ false);
+	assert_false(err, "");
+
+	err = hpa_init(&test_data->hpa, hpa_base, &test_data->emap,
+	    &test_data->hpa_edata_cache);
+	assert_false(err, "");
+
+	err = hpa_shard_init(&test_data->shard, &test_data->hpa,
+	    &test_data->shard_edata_cache, SHARD_IND, PS_GOAL, PS_ALLOC_MAX);
+	assert_false(err, "");
+
+	return (hpa_shard_t *)test_data;
+}
+
+static void
+destroy_test_data(hpa_shard_t *shard) {
+	test_data_t *test_data = (test_data_t *)shard;
+	base_delete(TSDN_NULL, test_data->shard_base);
+	base_delete(TSDN_NULL, test_data->hpa_base);
+	free(test_data);
+}
+
+typedef struct mem_contents_s mem_contents_t;
+struct mem_contents_s {
+	uintptr_t my_addr;
+	size_t size;
+	edata_t *my_edata;
+	rb_node(mem_contents_t) link;
+};
+
+static int
+mem_contents_cmp(const mem_contents_t *a, const mem_contents_t *b) {
+	return (a->my_addr > b->my_addr) - (a->my_addr < b->my_addr);
+}
+
+typedef rb_tree(mem_contents_t) mem_tree_t;
+rb_gen(static, mem_tree_, mem_tree_t, mem_contents_t, link,
+    mem_contents_cmp);
+
+static void
+node_assert_ordered(mem_contents_t *a, mem_contents_t *b) {
+	assert_zu_lt(a->my_addr, a->my_addr + a->size, "Overflow");
+	assert_zu_le(a->my_addr + a->size, b->my_addr, "");
+}
+
+static void
+node_check(mem_tree_t *tree, mem_contents_t *contents) {
+	edata_t *edata = contents->my_edata;
+	assert_ptr_eq(contents, (void *)contents->my_addr, "");
+	assert_ptr_eq(contents, edata_base_get(edata), "");
+	assert_zu_eq(contents->size, edata_size_get(edata), "");
+	assert_ptr_eq(contents->my_edata, edata, "");
+
+	mem_contents_t *next = mem_tree_next(tree, contents);
+	if (next != NULL) {
+		node_assert_ordered(contents, next);
+	}
+	mem_contents_t *prev = mem_tree_prev(tree, contents);
+	if (prev != NULL) {
+		node_assert_ordered(prev, contents);
+	}
+}
+
+static void
+node_insert(mem_tree_t *tree, edata_t *edata, size_t npages) {
+	mem_contents_t *contents = (mem_contents_t *)edata_base_get(edata);
+	contents->my_addr = (uintptr_t)edata_base_get(edata);
+	contents->size = edata_size_get(edata);
+	contents->my_edata = edata;
+	mem_tree_insert(tree, contents);
+	node_check(tree, contents);
+}
+
+static void
+node_remove(mem_tree_t *tree, edata_t *edata) {
+	mem_contents_t *contents = (mem_contents_t *)edata_base_get(edata);
+	node_check(tree, contents);
+	mem_tree_remove(tree, contents);
+}
+
+TEST_BEGIN(test_stress) {
+	test_skip_if(LG_SIZEOF_PTR != 3);
+
+	hpa_shard_t *shard = create_test_data();
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+
+	const size_t nlive_edatas_max = 500;
+	size_t nlive_edatas = 0;
+	edata_t **live_edatas = calloc(nlive_edatas_max, sizeof(edata_t *));
+	/*
+	 * Nothing special about this constant; we're only fixing it for
+	 * consistency across runs.
+	 */
+	size_t prng_state = (size_t)0x76999ffb014df07c;
+
+	mem_tree_t tree;
+	mem_tree_new(&tree);
+
+	for (size_t i = 0; i < 100 * 1000; i++) {
+		size_t operation = prng_range_zu(&prng_state, 4);
+		if (operation < 2) {
+			/* Alloc */
+			if (nlive_edatas == nlive_edatas_max) {
+				continue;
+			}
+
+			size_t npages_min;
+			size_t npages_max;
+			/*
+			 * We make sure to get an even balance of small and
+			 * large allocations.
+			 */
+			if (operation == 0) {
+				npages_min = 1;
+				npages_max = SC_LARGE_MINCLASS / PAGE - 1;
+			} else {
+				npages_min = SC_LARGE_MINCLASS / PAGE;
+				npages_max = 5 * npages_min;
+			}
+			size_t npages = npages_min + prng_range_zu(&prng_state,
+			    npages_max - npages_min);
+			edata_t *edata = pai_alloc(tsdn, &shard->pai,
+			    npages * PAGE, PAGE, false);
+			assert_ptr_not_null(edata,
+			    "Unexpected allocation failure");
+			live_edatas[nlive_edatas] = edata;
+			nlive_edatas++;
+			node_insert(&tree, edata, npages);
+		} else {
+			/* Free. */
+			if (nlive_edatas == 0) {
+				continue;
+			}
+			size_t victim = prng_range_zu(&prng_state, nlive_edatas);
+			edata_t *to_free = live_edatas[victim];
+			live_edatas[victim] = live_edatas[nlive_edatas - 1];
+			nlive_edatas--;
+			node_remove(&tree, to_free);
+			pai_dalloc(tsdn, &shard->pai, to_free);
+		}
+	}
+
+	size_t ntreenodes = 0;
+	for (mem_contents_t *contents = mem_tree_first(&tree); contents != NULL;
+	    contents = mem_tree_next(&tree, contents)) {
+		ntreenodes++;
+		node_check(&tree, contents);
+	}
+	expect_zu_eq(ntreenodes, nlive_edatas, "");
+
+	/*
+	 * Test hpa_shard_destroy, which requires as a precondition that all its
+	 * extents have been deallocated.
+	 */
+	for (size_t i = 0; i < nlive_edatas; i++) {
+		edata_t *to_free = live_edatas[i];
+		node_remove(&tree, to_free);
+		pai_dalloc(tsdn, &shard->pai, to_free);
+	}
+	hpa_shard_destroy(tsdn, shard);
+
+	free(live_edatas);
+	destroy_test_data(shard);
+}
+TEST_END
+
+int
+main(void) {
+	/*
+	 * These trigger unused-function warnings on CI runs, even if declared
+	 * with static inline.
+	 */
+	(void)mem_tree_empty;
+	(void)mem_tree_last;
+	(void)mem_tree_search;
+	(void)mem_tree_nsearch;
+	(void)mem_tree_psearch;
+	(void)mem_tree_iter;
+	(void)mem_tree_reverse_iter;
+	(void)mem_tree_destroy;
+	return test_no_reentrancy(
+	    test_stress);
+}
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index cf5c88e0..cda1a659 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -163,6 +163,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(const char *, metadata_thp, always);
 	TEST_MALLCTL_OPT(bool, retain, always);
 	TEST_MALLCTL_OPT(const char *, dss, always);
+	TEST_MALLCTL_OPT(bool, hpa, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);
 	TEST_MALLCTL_OPT(const char *, percpu_arena, always);
 	TEST_MALLCTL_OPT(size_t, oversize_threshold, always);
diff --git a/test/unit/prof_gdump.c b/test/unit/prof_gdump.c
index 9a47a19a..46e45036 100644
--- a/test/unit/prof_gdump.c
+++ b/test/unit/prof_gdump.c
@@ -17,6 +17,7 @@ prof_dump_open_file_intercept(const char *filename, int mode) {
 }
 
 TEST_BEGIN(test_gdump) {
+	test_skip_if(opt_hpa);
 	bool active, gdump, gdump_old;
 	void *p, *q, *r, *s;
 	size_t sz;
diff --git a/test/unit/retained.c b/test/unit/retained.c
index 81396170..80ee8cdf 100644
--- a/test/unit/retained.c
+++ b/test/unit/retained.c
@@ -99,6 +99,7 @@ thd_start(void *arg) {
 
 TEST_BEGIN(test_retained) {
 	test_skip_if(!config_stats);
+	test_skip_if(opt_hpa);
 
 	arena_ind = do_arena_create(NULL);
 	sz = nallocx(HUGEPAGE, 0);
diff --git a/test/unit/stats.c b/test/unit/stats.c
index 21a29a6f..6b6594d2 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -119,7 +119,7 @@ TEST_BEGIN(test_stats_arenas_summary) {
 	    "Unexepected mallctl() result");
 
 	if (config_stats) {
-		if (!background_thread_enabled()) {
+		if (!background_thread_enabled() && !opt_hpa) {
 			expect_u64_gt(dirty_npurge + muzzy_npurge, 0,
 			    "At least one purge should have occurred");
 		}

From bf025d2ec8f68fa50c5eb8bdb303a684c3f9c544 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 4 Sep 2020 15:22:47 -0700
Subject: [PATCH 1890/2608] HPA: Make slab sizes and maxes configurable.

This allows easy experimentation with them as tuning parameters.
---
 .../internal/jemalloc_internal_externs.h         |  2 ++
 include/jemalloc/internal/pa.h                   |  3 ++-
 src/arena.c                                      |  3 ++-
 src/ctl.c                                        |  6 ++++++
 src/jemalloc.c                                   | 16 +++++++++++++++-
 src/pa.c                                         | 15 ++++++++-------
 src/stats.c                                      |  2 ++
 7 files changed, 37 insertions(+), 10 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index c26153e3..b152068a 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -13,6 +13,8 @@ extern bool opt_abort;
 extern bool opt_abort_conf;
 extern bool opt_confirm_conf;
 extern bool opt_hpa;
+extern size_t opt_hpa_slab_goal;
+extern size_t opt_hpa_slab_max_alloc;
 extern const char *opt_junk;
 extern bool opt_junk_alloc;
 extern bool opt_junk_free;
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 7f73c274..eced8cae 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -123,7 +123,8 @@ bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
  * This isn't exposed to users; we allow late enablement of the HPA shard so
  * that we can boot without worrying about the HPA, then turn it on in a0.
  */
-bool pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa);
+bool pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa, size_t ps_goal,
+    size_t ps_alloc_max);
 /*
  * We stop using the HPA when custom extent hooks are installed, but still
  * redirect deallocations to it.
diff --git a/src/arena.c b/src/arena.c
index 74f90ccc..3403526e 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1527,7 +1527,8 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 *   so arena_hpa_global is not yet initialized.
 	 */
 	if (opt_hpa && ehooks_are_default(base_ehooks_get(base)) && ind != 0) {
-		if (pa_shard_enable_hpa(&arena->pa_shard, &arena_hpa_global)) {
+		if (pa_shard_enable_hpa(&arena->pa_shard, &arena_hpa_global,
+		    opt_hpa_slab_goal, opt_hpa_slab_max_alloc)) {
 			goto label_error;
 		}
 	}
diff --git a/src/ctl.c b/src/ctl.c
index 9e22e66c..fe6e8440 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -91,6 +91,8 @@ CTL_PROTO(opt_abort)
 CTL_PROTO(opt_abort_conf)
 CTL_PROTO(opt_confirm_conf)
 CTL_PROTO(opt_hpa)
+CTL_PROTO(opt_hpa_slab_goal)
+CTL_PROTO(opt_hpa_slab_max_alloc)
 CTL_PROTO(opt_metadata_thp)
 CTL_PROTO(opt_retain)
 CTL_PROTO(opt_dss)
@@ -345,6 +347,8 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("abort_conf"),	CTL(opt_abort_conf)},
 	{NAME("confirm_conf"),	CTL(opt_confirm_conf)},
 	{NAME("hpa"),		CTL(opt_hpa)},
+	{NAME("hpa_slab_goal"),	CTL(opt_hpa_slab_goal)},
+	{NAME("hpa_max_alloc"),	CTL(opt_hpa_slab_max_alloc)},
 	{NAME("metadata_thp"),	CTL(opt_metadata_thp)},
 	{NAME("retain"),	CTL(opt_retain)},
 	{NAME("dss"),		CTL(opt_dss)},
@@ -1819,6 +1823,8 @@ CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
 CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
 CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
 CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool)
+CTL_RO_NL_GEN(opt_hpa_slab_goal, opt_hpa_slab_goal, size_t)
+CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_slab_max_alloc, size_t)
 CTL_RO_NL_GEN(opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp],
     const char *)
 CTL_RO_NL_GEN(opt_retain, opt_retain, bool)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 0dc685b2..fd822e07 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -136,6 +136,8 @@ malloc_mutex_t arenas_lock;
 
 /* The global hpa, and whether it's on. */
 bool opt_hpa = false;
+size_t opt_hpa_slab_goal = 512 * 1024;
+size_t opt_hpa_slab_max_alloc = 256 * 1024;
 
 /*
  * Arenas that are used to service external requests.  Not all elements of the
@@ -1481,6 +1483,17 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 					   CONF_CHECK_MIN, CONF_CHECK_MAX,
 					   true);
 			CONF_HANDLE_BOOL(opt_hpa, "hpa")
+			/*
+			 * If someone violates these mins and maxes, they're
+			 * confused.
+			 */
+			CONF_HANDLE_SIZE_T(opt_hpa_slab_goal, "hpa_slab_goal",
+			    PAGE, 512 * PAGE, CONF_CHECK_MIN, CONF_CHECK_MAX,
+			    true)
+			CONF_HANDLE_SIZE_T(opt_hpa_slab_max_alloc,
+			    "hpa_slab_max_alloc", PAGE, 512 * PAGE,
+			    CONF_CHECK_MIN, CONF_CHECK_MAX, true)
+
 			if (CONF_MATCH("slab_sizes")) {
 				if (CONF_MATCH_VALUE("default")) {
 					sc_data_init(sc_data);
@@ -1787,7 +1800,8 @@ malloc_init_hard_a0_locked() {
 		    &a0->pa_shard.edata_cache)) {
 			return true;
 		}
-		if (pa_shard_enable_hpa(&a0->pa_shard, &arena_hpa_global)) {
+		if (pa_shard_enable_hpa(&a0->pa_shard, &arena_hpa_global,
+		    opt_hpa_slab_goal, opt_hpa_slab_max_alloc)) {
 			return true;
 		}
 	}
diff --git a/src/pa.c b/src/pa.c
index 672db7b0..a8aa32d3 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -48,13 +48,14 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 }
 
 bool
-pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa) {
-	/*
-	 * These are constants for now; eventually they'll probably be
-	 * tuneable.
-	 */
-	size_t ps_goal = 512 * 1024;
-	size_t ps_alloc_max = 256 * 1024;
+pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa, size_t ps_goal,
+    size_t ps_alloc_max) {
+	ps_goal &= ~PAGE_MASK;
+	ps_alloc_max &= ~PAGE_MASK;
+
+	if (ps_alloc_max > ps_goal) {
+		ps_alloc_max = ps_goal;
+	}
 	if (hpa_shard_init(&shard->hpa_shard, hpa, &shard->edata_cache,
 	    shard->ind, ps_goal, ps_alloc_max)) {
 		return true;
diff --git a/src/stats.c b/src/stats.c
index b2ec57b7..78068f49 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1096,6 +1096,8 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_CHAR_P("percpu_arena")
 	OPT_WRITE_SIZE_T("oversize_threshold")
 	OPT_WRITE_BOOL("hpa")
+	OPT_WRITE_SIZE_T("hpa_slab_goal")
+	OPT_WRITE_SIZE_T("hpa_slab_max_alloc")
 	OPT_WRITE_CHAR_P("metadata_thp")
 	OPT_WRITE_BOOL_MUTABLE("background_thread", "background_thread")
 	OPT_WRITE_SSIZE_T_MUTABLE("dirty_decay_ms", "arenas.dirty_decay_ms")

From 484f04733e5bd9908faf502fced6df66ca33f9f9 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 4 Sep 2020 15:35:10 -0700
Subject: [PATCH 1891/2608] HPA: Add central mutex contention stats.

---
 include/jemalloc/internal/mutex_prof.h |  4 +++-
 src/ctl.c                              | 12 ++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/mutex_prof.h b/include/jemalloc/internal/mutex_prof.h
index 190402e6..91ab4114 100644
--- a/include/jemalloc/internal/mutex_prof.h
+++ b/include/jemalloc/internal/mutex_prof.h
@@ -11,7 +11,9 @@
     OP(ctl)								\
     OP(prof)								\
     OP(prof_thds_data)							\
-    OP(prof_dump)
+    OP(prof_dump)							\
+    OP(hpa_central)							\
+    OP(hpa_central_grow)
 
 typedef enum {
 #define OP(mtx) global_prof_mutex_##mtx,
diff --git a/src/ctl.c b/src/ctl.c
index fe6e8440..89b75452 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1142,6 +1142,14 @@ ctl_refresh(tsdn_t *tsdn) {
 			READ_GLOBAL_MUTEX_PROF_DATA(
 			    global_prof_mutex_prof_dump, prof_dump_mtx);
 		}
+		if (opt_hpa) {
+			READ_GLOBAL_MUTEX_PROF_DATA(
+			    global_prof_mutex_hpa_central,
+			    arena_hpa_global.mtx);
+			READ_GLOBAL_MUTEX_PROF_DATA(
+			    global_prof_mutex_hpa_central_grow,
+			    arena_hpa_global.grow_mtx);
+		}
 		if (have_background_thread) {
 			READ_GLOBAL_MUTEX_PROF_DATA(
 			    global_prof_mutex_background_thread,
@@ -3134,6 +3142,10 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
 		MUTEX_PROF_RESET(tdatas_mtx);
 		MUTEX_PROF_RESET(prof_dump_mtx);
 	}
+	if (opt_hpa) {
+		MUTEX_PROF_RESET(arena_hpa_global.mtx);
+		MUTEX_PROF_RESET(arena_hpa_global.grow_mtx);
+	}
 
 
 	/* Per arena mutexes. */

From 534504d4a7086084a46ac42c700e9429d2c72fd1 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 4 Sep 2020 18:29:28 -0700
Subject: [PATCH 1892/2608] HPA: add size-exclusion functionality.

I.e. only allowing allocations under or over certain sizes.
---
 include/jemalloc/internal/hpa.h               | 13 ++++++-
 .../internal/jemalloc_internal_externs.h      |  2 +
 include/jemalloc/internal/pa.h                |  2 +-
 src/arena.c                                   |  3 +-
 src/ctl.c                                     |  8 +++-
 src/hpa.c                                     | 13 +++++--
 src/jemalloc.c                                | 11 +++++-
 src/pa.c                                      |  4 +-
 src/stats.c                                   |  2 +
 test/unit/hpa.c                               | 38 +++++++++++++++++--
 test/unit/mallctl.c                           |  4 ++
 11 files changed, 84 insertions(+), 16 deletions(-)

diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 83f22033..3decbf17 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -59,6 +59,17 @@ struct hpa_shard_s {
 	 * fragmentation avoidance measure.
 	 */
 	size_t ps_alloc_max;
+	/*
+	 * What's the maximum size we'll try to allocate out of the shard at
+	 * all?
+	 */
+	size_t small_max;
+	/*
+	 * What's the minimum size for which we'll go straight to the global
+	 * arena?
+	 */
+	size_t large_min;
+
 	/* The arena ind we're associated with. */
 	unsigned ind;
 };
@@ -67,7 +78,7 @@ bool hpa_init(hpa_t *hpa, base_t *base, emap_t *emap,
     edata_cache_t *edata_cache);
 bool hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa,
     edata_cache_t *edata_cache, unsigned ind, size_t ps_goal,
-    size_t ps_alloc_max);
+    size_t ps_alloc_max, size_t small_max, size_t large_min);
 void hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard);
 
 /*
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index b152068a..8faadaa1 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -15,6 +15,8 @@ extern bool opt_confirm_conf;
 extern bool opt_hpa;
 extern size_t opt_hpa_slab_goal;
 extern size_t opt_hpa_slab_max_alloc;
+extern size_t opt_hpa_small_max;
+extern size_t opt_hpa_large_min;
 extern const char *opt_junk;
 extern bool opt_junk_alloc;
 extern bool opt_junk_free;
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index eced8cae..473d682b 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -124,7 +124,7 @@ bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
  * that we can boot without worrying about the HPA, then turn it on in a0.
  */
 bool pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa, size_t ps_goal,
-    size_t ps_alloc_max);
+    size_t ps_alloc_max, size_t small_max, size_t large_min);
 /*
  * We stop using the HPA when custom extent hooks are installed, but still
  * redirect deallocations to it.
diff --git a/src/arena.c b/src/arena.c
index 3403526e..5fb5843b 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1528,7 +1528,8 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 */
 	if (opt_hpa && ehooks_are_default(base_ehooks_get(base)) && ind != 0) {
 		if (pa_shard_enable_hpa(&arena->pa_shard, &arena_hpa_global,
-		    opt_hpa_slab_goal, opt_hpa_slab_max_alloc)) {
+		    opt_hpa_slab_goal, opt_hpa_slab_max_alloc,
+		    opt_hpa_small_max, opt_hpa_large_min)) {
 			goto label_error;
 		}
 	}
diff --git a/src/ctl.c b/src/ctl.c
index 89b75452..9b8ab752 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -93,6 +93,8 @@ CTL_PROTO(opt_confirm_conf)
 CTL_PROTO(opt_hpa)
 CTL_PROTO(opt_hpa_slab_goal)
 CTL_PROTO(opt_hpa_slab_max_alloc)
+CTL_PROTO(opt_hpa_small_max)
+CTL_PROTO(opt_hpa_large_min)
 CTL_PROTO(opt_metadata_thp)
 CTL_PROTO(opt_retain)
 CTL_PROTO(opt_dss)
@@ -348,7 +350,9 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("confirm_conf"),	CTL(opt_confirm_conf)},
 	{NAME("hpa"),		CTL(opt_hpa)},
 	{NAME("hpa_slab_goal"),	CTL(opt_hpa_slab_goal)},
-	{NAME("hpa_max_alloc"),	CTL(opt_hpa_slab_max_alloc)},
+	{NAME("hpa_slab_max_alloc"),	CTL(opt_hpa_slab_max_alloc)},
+	{NAME("hpa_small_max"),	CTL(opt_hpa_small_max)},
+	{NAME("hpa_large_min"),	CTL(opt_hpa_large_min)},
 	{NAME("metadata_thp"),	CTL(opt_metadata_thp)},
 	{NAME("retain"),	CTL(opt_retain)},
 	{NAME("dss"),		CTL(opt_dss)},
@@ -1833,6 +1837,8 @@ CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
 CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool)
 CTL_RO_NL_GEN(opt_hpa_slab_goal, opt_hpa_slab_goal, size_t)
 CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_slab_max_alloc, size_t)
+CTL_RO_NL_GEN(opt_hpa_small_max, opt_hpa_small_max, size_t)
+CTL_RO_NL_GEN(opt_hpa_large_min, opt_hpa_large_min, size_t)
 CTL_RO_NL_GEN(opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp],
     const char *)
 CTL_RO_NL_GEN(opt_retain, opt_retain, bool)
diff --git a/src/hpa.c b/src/hpa.c
index 842384bd..597261d4 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -50,7 +50,8 @@ hpa_init(hpa_t *hpa, base_t *base, emap_t *emap, edata_cache_t *edata_cache) {
 
 bool
 hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa, edata_cache_t *edata_cache,
-    unsigned ind, size_t ps_goal, size_t ps_alloc_max) {
+    unsigned ind, size_t ps_goal, size_t ps_alloc_max, size_t small_max,
+    size_t large_min) {
 	bool err;
 	err = malloc_mutex_init(&shard->grow_mtx, "hpa_shard_grow",
 	    WITNESS_RANK_HPA_SHARD_GROW, malloc_mutex_rank_exclusive);
@@ -68,6 +69,8 @@ hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa, edata_cache_t *edata_cache,
 	psset_init(&shard->psset);
 	shard->ps_goal = ps_goal;
 	shard->ps_alloc_max = ps_alloc_max;
+	shard->small_max = small_max;
+	shard->large_min = large_min;
 
 	/*
 	 * Fill these in last, so that if an hpa_shard gets used despite
@@ -195,7 +198,7 @@ hpa_alloc_central(tsdn_t *tsdn, hpa_shard_t *shard, size_t size_min,
 
 static edata_t *
 hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
-	assert(size < shard->ps_alloc_max);
+	assert(size <= shard->ps_alloc_max);
 
 	bool err;
 	edata_t *edata = edata_cache_get(tsdn, shard->edata_cache);
@@ -257,16 +260,18 @@ hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t alignment, bool zero) {
 
 	assert((size & PAGE_MASK) == 0);
+	hpa_shard_t *shard = hpa_from_pai(self);
 	/* We don't handle alignment or zeroing for now. */
 	if (alignment > PAGE || zero) {
 		return NULL;
 	}
+	if (size > shard->small_max && size < shard->large_min) {
+		return NULL;
+	}
 
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	hpa_shard_t *shard = hpa_from_pai(self);
-
 	edata_t *edata;
 	if (size <= shard->ps_alloc_max) {
 		edata = hpa_alloc_psset(tsdn, shard, size);
diff --git a/src/jemalloc.c b/src/jemalloc.c
index fd822e07..8ce9ca1e 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -136,8 +136,10 @@ malloc_mutex_t arenas_lock;
 
 /* The global hpa, and whether it's on. */
 bool opt_hpa = false;
-size_t opt_hpa_slab_goal = 512 * 1024;
+size_t opt_hpa_slab_goal = 128 * 1024;
 size_t opt_hpa_slab_max_alloc = 256 * 1024;
+size_t opt_hpa_small_max = 32 * 1024;
+size_t opt_hpa_large_min = 4 * 1024 * 1024;
 
 /*
  * Arenas that are used to service external requests.  Not all elements of the
@@ -1493,6 +1495,10 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			CONF_HANDLE_SIZE_T(opt_hpa_slab_max_alloc,
 			    "hpa_slab_max_alloc", PAGE, 512 * PAGE,
 			    CONF_CHECK_MIN, CONF_CHECK_MAX, true)
+			CONF_HANDLE_SIZE_T(opt_hpa_small_max, "hpa_small_max",
+			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true)
+			CONF_HANDLE_SIZE_T(opt_hpa_large_min, "hpa_large_min",
+			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true)
 
 			if (CONF_MATCH("slab_sizes")) {
 				if (CONF_MATCH_VALUE("default")) {
@@ -1801,7 +1807,8 @@ malloc_init_hard_a0_locked() {
 			return true;
 		}
 		if (pa_shard_enable_hpa(&a0->pa_shard, &arena_hpa_global,
-		    opt_hpa_slab_goal, opt_hpa_slab_max_alloc)) {
+		    opt_hpa_slab_goal, opt_hpa_slab_max_alloc,
+		    opt_hpa_small_max, opt_hpa_large_min)) {
 			return true;
 		}
 	}
diff --git a/src/pa.c b/src/pa.c
index a8aa32d3..8e1ec842 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -49,7 +49,7 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 
 bool
 pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa, size_t ps_goal,
-    size_t ps_alloc_max) {
+    size_t ps_alloc_max, size_t small_max, size_t large_min) {
 	ps_goal &= ~PAGE_MASK;
 	ps_alloc_max &= ~PAGE_MASK;
 
@@ -57,7 +57,7 @@ pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa, size_t ps_goal,
 		ps_alloc_max = ps_goal;
 	}
 	if (hpa_shard_init(&shard->hpa_shard, hpa, &shard->edata_cache,
-	    shard->ind, ps_goal, ps_alloc_max)) {
+	    shard->ind, ps_goal, ps_alloc_max, small_max, large_min)) {
 		return true;
 	}
 	shard->ever_used_hpa = true;
diff --git a/src/stats.c b/src/stats.c
index 78068f49..7cbf2048 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1098,6 +1098,8 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_BOOL("hpa")
 	OPT_WRITE_SIZE_T("hpa_slab_goal")
 	OPT_WRITE_SIZE_T("hpa_slab_max_alloc")
+	OPT_WRITE_SIZE_T("hpa_small_max")
+	OPT_WRITE_SIZE_T("hpa_large_min")
 	OPT_WRITE_CHAR_P("metadata_thp")
 	OPT_WRITE_BOOL_MUTABLE("background_thread", "background_thread")
 	OPT_WRITE_SSIZE_T_MUTABLE("dirty_decay_ms", "arenas.dirty_decay_ms")
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 8b319b9e..b58dcede 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -8,6 +8,9 @@
 #define PS_GOAL (128 * PAGE)
 #define PS_ALLOC_MAX (64 * PAGE)
 
+#define HPA_SMALL_MAX (200 * PAGE)
+#define HPA_LARGE_MIN (300 * PAGE)
+
 typedef struct test_data_s test_data_t;
 struct test_data_s {
 	/*
@@ -57,7 +60,8 @@ create_test_data() {
 	assert_false(err, "");
 
 	err = hpa_shard_init(&test_data->shard, &test_data->hpa,
-	    &test_data->shard_edata_cache, SHARD_IND, PS_GOAL, PS_ALLOC_MAX);
+	    &test_data->shard_edata_cache, SHARD_IND, PS_GOAL, PS_ALLOC_MAX,
+	    HPA_SMALL_MAX, HPA_LARGE_MIN);
 	assert_false(err, "");
 
 	return (hpa_shard_t *)test_data;
@@ -71,6 +75,31 @@ destroy_test_data(hpa_shard_t *shard) {
 	free(test_data);
 }
 
+TEST_BEGIN(test_small_max_large_min) {
+	test_skip_if(LG_SIZEOF_PTR != 3);
+
+	hpa_shard_t *shard = create_test_data();
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+
+	edata_t *edata;
+
+	/* Small max */
+	edata = pai_alloc(tsdn, &shard->pai, HPA_SMALL_MAX, PAGE, false);
+	expect_ptr_not_null(edata, "Allocation of small max failed");
+	edata = pai_alloc(tsdn, &shard->pai, HPA_SMALL_MAX + PAGE, PAGE, false);
+	expect_ptr_null(edata, "Allocation of larger than small max succeeded");
+
+	/* Large min */
+	edata = pai_alloc(tsdn, &shard->pai, HPA_LARGE_MIN, PAGE, false);
+	expect_ptr_not_null(edata, "Allocation of large min failed");
+	edata = pai_alloc(tsdn, &shard->pai, HPA_LARGE_MIN - PAGE, PAGE, false);
+	expect_ptr_null(edata,
+	    "Allocation of smaller than large min succeeded");
+
+	destroy_test_data(shard);
+}
+TEST_END
+
 typedef struct mem_contents_s mem_contents_t;
 struct mem_contents_s {
 	uintptr_t my_addr;
@@ -164,10 +193,10 @@ TEST_BEGIN(test_stress) {
 			 */
 			if (operation == 0) {
 				npages_min = 1;
-				npages_max = SC_LARGE_MINCLASS / PAGE - 1;
+				npages_max = HPA_SMALL_MAX / PAGE;
 			} else {
-				npages_min = SC_LARGE_MINCLASS / PAGE;
-				npages_max = 5 * npages_min;
+				npages_min = HPA_LARGE_MIN / PAGE;
+				npages_max = HPA_LARGE_MIN / PAGE + 20;
 			}
 			size_t npages = npages_min + prng_range_zu(&prng_state,
 			    npages_max - npages_min);
@@ -231,5 +260,6 @@ main(void) {
 	(void)mem_tree_reverse_iter;
 	(void)mem_tree_destroy;
 	return test_no_reentrancy(
+	    test_small_max_large_min,
 	    test_stress);
 }
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index cda1a659..ecbcda9e 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -164,6 +164,10 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, retain, always);
 	TEST_MALLCTL_OPT(const char *, dss, always);
 	TEST_MALLCTL_OPT(bool, hpa, always);
+	TEST_MALLCTL_OPT(size_t, hpa_slab_goal, always);
+	TEST_MALLCTL_OPT(size_t, hpa_slab_max_alloc, always);
+	TEST_MALLCTL_OPT(size_t, hpa_small_max, always);
+	TEST_MALLCTL_OPT(size_t, hpa_large_min, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);
 	TEST_MALLCTL_OPT(const char *, percpu_arena, always);
 	TEST_MALLCTL_OPT(size_t, oversize_threshold, always);

From 1964b08394e01a5b6881013c0f34ee20073cc328 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 4 Sep 2020 12:01:52 -0700
Subject: [PATCH 1893/2608] HPA: Add stats for the hpa_shard.

---
 include/jemalloc/internal/arena_externs.h |   2 +-
 include/jemalloc/internal/ctl.h           |   1 +
 include/jemalloc/internal/hpa.h           |   7 ++
 include/jemalloc/internal/mutex_prof.h    |   4 +-
 include/jemalloc/internal/pa.h            |   2 +-
 include/jemalloc/internal/psset.h         |   7 ++
 include/jemalloc/internal/stats.h         |   3 +-
 src/arena.c                               |   4 +-
 src/ctl.c                                 |  87 +++++++++++++++++-
 src/hpa.c                                 |   1 -
 src/pa_extra.c                            |  22 ++++-
 src/stats.c                               | 106 ++++++++++++++++++++--
 12 files changed, 230 insertions(+), 16 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 9d4da31b..c8e1e38d 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -28,7 +28,7 @@ void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
     bin_stats_data_t *bstats, arena_stats_large_t *lstats,
-    pac_estats_t *estats);
+    pac_estats_t *estats, hpa_shard_stats_t *hpastats);
 void arena_handle_new_dirty_pages(tsdn_t *tsdn, arena_t *arena);
 #ifdef JEMALLOC_JET
 size_t arena_slab_regind(edata_t *slab, szind_t binind, const void *ptr);
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index fbc432bf..305d3655 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -45,6 +45,7 @@ typedef struct ctl_arena_stats_s {
 	bin_stats_data_t bstats[SC_NBINS];
 	arena_stats_large_t lstats[SC_NSIZES - SC_NBINS];
 	pac_estats_t estats[SC_NPSIZES];
+	hpa_shard_stats_t hpastats;
 } ctl_arena_stats_t;
 
 typedef struct ctl_stats_s {
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 3decbf17..3fe9fc48 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -29,6 +29,13 @@ struct hpa_s {
 	geom_grow_t geom_grow;
 };
 
+/* Used only by CTL; not actually stored here (i.e., all derived). */
+typedef struct hpa_shard_stats_s hpa_shard_stats_t;
+struct hpa_shard_stats_s {
+	psset_bin_stats_t psset_full_slab_stats;
+	psset_bin_stats_t psset_slab_stats[PSSET_NPSIZES];
+};
+
 typedef struct hpa_shard_s hpa_shard_t;
 struct hpa_shard_s {
 	/*
diff --git a/include/jemalloc/internal/mutex_prof.h b/include/jemalloc/internal/mutex_prof.h
index 91ab4114..970f469b 100644
--- a/include/jemalloc/internal/mutex_prof.h
+++ b/include/jemalloc/internal/mutex_prof.h
@@ -31,7 +31,9 @@ typedef enum {
     OP(decay_dirty)							\
     OP(decay_muzzy)							\
     OP(base)								\
-    OP(tcache_list)
+    OP(tcache_list)							\
+    OP(hpa_shard)							\
+    OP(hpa_shard_grow)
 
 typedef enum {
 #define OP(mtx) arena_prof_mutex_##mtx,
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 473d682b..d138f2f0 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -192,7 +192,7 @@ void pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive,
 
 void pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
     pa_shard_stats_t *pa_shard_stats_out, pac_estats_t *estats_out,
-    size_t *resident);
+    hpa_shard_stats_t *hpa_stats_out, size_t *resident);
 
 /*
  * Reads the PA-owned mutex stats into the output stats array, at the
diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index 72ff240e..7bba3cbc 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -31,6 +31,13 @@ struct psset_bin_stats_s {
 	size_t ninactive;
 };
 
+static inline void
+psset_bin_stats_accum(psset_bin_stats_t *dst, psset_bin_stats_t *src) {
+	dst->npageslabs += src->npageslabs;
+	dst->nactive += src->nactive;
+	dst->ninactive += src->ninactive;
+}
+
 typedef struct psset_s psset_t;
 struct psset_s {
 	/*
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index 93bde22e..727f7dcb 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -11,7 +11,8 @@
     OPTION('b',		bins,		true,		false)		\
     OPTION('l',		large,		true,		false)		\
     OPTION('x',		mutex,		true,		false)		\
-    OPTION('e',		extents,	true,		false)
+    OPTION('e',		extents,	true,		false)		\
+    OPTION('h',		hpa,		config_stats,	false)
 
 enum {
 #define OPTION(o, v, d, s) stats_print_option_num_##v,
diff --git a/src/arena.c b/src/arena.c
index 5fb5843b..dc58a287 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -81,7 +81,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
     bin_stats_data_t *bstats, arena_stats_large_t *lstats,
-    pac_estats_t *estats) {
+    pac_estats_t *estats, hpa_shard_stats_t *hpastats) {
 	cassert(config_stats);
 
 	arena_basic_stats_merge(tsdn, arena, nthreads, dss, dirty_decay_ms,
@@ -139,7 +139,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	}
 
 	pa_shard_stats_merge(tsdn, &arena->pa_shard, &astats->pa_shard_stats,
-	    estats, &astats->resident);
+	    estats, hpastats, &astats->resident);
 
 	LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 
diff --git a/src/ctl.c b/src/ctl.c
index 9b8ab752..b4e65172 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -216,6 +216,13 @@ CTL_PROTO(stats_arenas_i_extents_j_dirty_bytes)
 CTL_PROTO(stats_arenas_i_extents_j_muzzy_bytes)
 CTL_PROTO(stats_arenas_i_extents_j_retained_bytes)
 INDEX_PROTO(stats_arenas_i_extents_j)
+CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs)
+CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive)
+CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive)
+INDEX_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j)
+CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_npageslabs)
+CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_nactive)
+CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_ninactive)
 CTL_PROTO(stats_arenas_i_nthreads)
 CTL_PROTO(stats_arenas_i_uptime)
 CTL_PROTO(stats_arenas_i_dss)
@@ -584,6 +591,41 @@ MUTEX_PROF_ARENA_MUTEXES
 #undef OP
 };
 
+static const ctl_named_node_t stats_arenas_i_hpa_shard_full_slabs_node[] = {
+	{NAME("npageslabs"),
+		CTL(stats_arenas_i_hpa_shard_full_slabs_npageslabs)},
+	{NAME("nactive"),
+		CTL(stats_arenas_i_hpa_shard_full_slabs_nactive)},
+	{NAME("ninactive"),
+		CTL(stats_arenas_i_hpa_shard_full_slabs_ninactive)}
+};
+
+static const ctl_named_node_t stats_arenas_i_hpa_shard_nonfull_slabs_j_node[] = {
+	{NAME("npageslabs"),
+		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs)},
+	{NAME("nactive"),
+		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive)},
+	{NAME("ninactive"),
+		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive)}
+};
+
+static const ctl_named_node_t super_stats_arenas_i_hpa_shard_nonfull_slabs_j_node[] = {
+	{NAME(""),
+		CHILD(named, stats_arenas_i_hpa_shard_nonfull_slabs_j)}
+};
+
+static const ctl_indexed_node_t stats_arenas_i_hpa_shard_nonfull_slabs_node[] =
+{
+	{INDEX(stats_arenas_i_hpa_shard_nonfull_slabs_j)}
+};
+
+static const ctl_named_node_t stats_arenas_i_hpa_shard_node[] = {
+	{NAME("full_slabs"),	CHILD(named,
+	    stats_arenas_i_hpa_shard_full_slabs)},
+	{NAME("nonfull_slabs"),	CHILD(indexed,
+	    stats_arenas_i_hpa_shard_nonfull_slabs)}
+};
+
 static const ctl_named_node_t stats_arenas_i_node[] = {
 	{NAME("nthreads"),	CTL(stats_arenas_i_nthreads)},
 	{NAME("uptime"),	CTL(stats_arenas_i_uptime)},
@@ -613,7 +655,8 @@ static const ctl_named_node_t stats_arenas_i_node[] = {
 	{NAME("bins"),		CHILD(indexed, stats_arenas_i_bins)},
 	{NAME("lextents"),	CHILD(indexed, stats_arenas_i_lextents)},
 	{NAME("extents"),	CHILD(indexed, stats_arenas_i_extents)},
-	{NAME("mutexes"),	CHILD(named, stats_arenas_i_mutexes)}
+	{NAME("mutexes"),	CHILD(named, stats_arenas_i_mutexes)},
+	{NAME("hpa_shard"),	CHILD(named, stats_arenas_i_hpa_shard)}
 };
 static const ctl_named_node_t super_stats_arenas_i_node[] = {
 	{NAME(""),		CHILD(named, stats_arenas_i)}
@@ -844,6 +887,8 @@ ctl_arena_clear(ctl_arena_t *ctl_arena) {
 		    sizeof(arena_stats_large_t));
 		memset(ctl_arena->astats->estats, 0, SC_NPSIZES *
 		    sizeof(pac_estats_t));
+		memset(&ctl_arena->astats->hpastats, 0,
+		    sizeof(hpa_shard_stats_t));
 	}
 }
 
@@ -857,7 +902,8 @@ ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena) {
 		    &ctl_arena->muzzy_decay_ms, &ctl_arena->pactive,
 		    &ctl_arena->pdirty, &ctl_arena->pmuzzy,
 		    &ctl_arena->astats->astats, ctl_arena->astats->bstats,
-		    ctl_arena->astats->lstats, ctl_arena->astats->estats);
+		    ctl_arena->astats->lstats, ctl_arena->astats->estats,
+		    &ctl_arena->astats->hpastats);
 
 		for (i = 0; i < SC_NBINS; i++) {
 			bin_stats_t *bstats =
@@ -1033,6 +1079,16 @@ MUTEX_PROF_ARENA_MUTEXES
 			sdstats->estats[i].retained_bytes
 			    += astats->estats[i].retained_bytes;
 		}
+
+		/* Merge HPA stats. */
+		psset_bin_stats_accum(&sdstats->hpastats.psset_full_slab_stats,
+		    &astats->hpastats.psset_full_slab_stats);
+		for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
+			psset_bin_stats_accum(
+			    &sdstats->hpastats.psset_slab_stats[i],
+			    &astats->hpastats.psset_slab_stats[i]);
+		}
+
 	}
 }
 
@@ -3256,6 +3312,33 @@ stats_arenas_i_extents_j_index(tsdn_t *tsdn, const size_t *mib,
 	return super_stats_arenas_i_extents_j_node;
 }
 
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs,
+    arenas_i(mib[2])->astats->hpastats.psset_full_slab_stats.npageslabs,
+    size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_nactive,
+    arenas_i(mib[2])->astats->hpastats.psset_full_slab_stats.nactive, size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_ninactive,
+    arenas_i(mib[2])->astats->hpastats.psset_full_slab_stats.ninactive, size_t);
+
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs,
+    arenas_i(mib[2])->astats->hpastats.psset_slab_stats[mib[5]].npageslabs,
+    size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive,
+    arenas_i(mib[2])->astats->hpastats.psset_slab_stats[mib[5]].nactive,
+    size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive,
+    arenas_i(mib[2])->astats->hpastats.psset_slab_stats[mib[5]].ninactive,
+    size_t);
+
+static const ctl_named_node_t *
+stats_arenas_i_hpa_shard_nonfull_slabs_j_index(tsdn_t *tsdn, const size_t *mib,
+    size_t miblen, size_t j) {
+	if (j >= PSSET_NPSIZES) {
+		return NULL;
+	}
+	return super_stats_arenas_i_hpa_shard_nonfull_slabs_j_node;
+}
+
 static bool
 ctl_arenas_i_verify(size_t i) {
 	size_t a = arenas_i2a_impl(i, true, true);
diff --git a/src/hpa.c b/src/hpa.c
index 597261d4..08992bda 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -258,7 +258,6 @@ hpa_from_pai(pai_t *self) {
 static edata_t *
 hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t alignment, bool zero) {
-
 	assert((size & PAGE_MASK) == 0);
 	hpa_shard_t *shard = hpa_from_pai(self);
 	/* We don't handle alignment or zeroing for now. */
diff --git a/src/pa_extra.c b/src/pa_extra.c
index 402603ea..db236ad8 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -76,7 +76,7 @@ pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive, size_t *ndirty,
 void
 pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
     pa_shard_stats_t *pa_shard_stats_out, pac_estats_t *estats_out,
-    size_t *resident) {
+    hpa_shard_stats_t *hpa_stats_out, size_t *resident) {
 	cassert(config_stats);
 
 	pa_shard_stats_out->pac_stats.retained +=
@@ -138,6 +138,18 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
 		estats_out[i].muzzy_bytes = muzzy_bytes;
 		estats_out[i].retained_bytes = retained_bytes;
 	}
+
+	if (shard->ever_used_hpa) {
+		malloc_mutex_lock(tsdn, &shard->hpa_shard.mtx);
+		psset_bin_stats_accum(&hpa_stats_out->psset_full_slab_stats,
+		    &shard->hpa_shard.psset.full_slab_stats);
+		for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
+			psset_bin_stats_accum(
+			    &hpa_stats_out->psset_slab_stats[i],
+			    &shard->hpa_shard.psset.slab_stats[i]);
+		}
+		malloc_mutex_unlock(tsdn, &shard->hpa_shard.mtx);
+	}
 }
 
 static void
@@ -163,4 +175,12 @@ pa_shard_mtx_stats_read(tsdn_t *tsdn, pa_shard_t *shard,
 	    &shard->pac.decay_dirty.mtx, arena_prof_mutex_decay_dirty);
 	pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
 	    &shard->pac.decay_muzzy.mtx, arena_prof_mutex_decay_muzzy);
+
+	if (shard->ever_used_hpa) {
+		pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
+		    &shard->hpa_shard.mtx, arena_prof_mutex_hpa_shard);
+		pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
+		    &shard->hpa_shard.grow_mtx,
+		    arena_prof_mutex_hpa_shard_grow);
+	}
 }
diff --git a/src/stats.c b/src/stats.c
index 7cbf2048..f03e5e44 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -43,6 +43,16 @@ const char *arena_mutex_names[mutex_prof_num_arena_mutexes] = {
 	xmallctlbymib(mib, miblen, (void *)v, &sz, NULL, 0);		\
 } while (0)
 
+#define CTL_M2_M5_GET(n, i, j, v, t) do {				\
+	size_t mib[CTL_MAX_DEPTH];					\
+	size_t miblen = sizeof(mib) / sizeof(size_t);			\
+	size_t sz = sizeof(t);						\
+	xmallctlnametomib(n, mib, &miblen);				\
+	mib[2] = (i);							\
+	mib[5] = (j);							\
+	xmallctlbymib(mib, miblen, (void *)v, &sz, NULL, 0);		\
+} while (0)
+
 /******************************************************************************/
 /* Data. */
 
@@ -650,6 +660,87 @@ stats_arena_extents_print(emitter_t *emitter, unsigned i) {
 	}
 }
 
+static void
+stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i) {
+	emitter_row_t header_row;
+	emitter_row_init(&header_row);
+	emitter_row_t row;
+	emitter_row_init(&row);
+
+	size_t npageslabs;
+	size_t nactive;
+	size_t ninactive;
+
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs",
+	    i, &npageslabs, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive",
+	    i, &nactive, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ninactive",
+	    i, &ninactive, size_t);
+
+	emitter_table_printf(emitter,
+	    "HPA shard stats:\n"
+	    "  In full slabs:\n"
+	    "      npageslabs: %zu\n"
+	    "      nactive: %zu\n"
+	    "      ninactive: %zu\n",
+	    npageslabs, nactive, ninactive);
+	emitter_json_object_kv_begin(emitter, "hpa_shard");
+	emitter_json_object_kv_begin(emitter, "full_slabs");
+	emitter_json_kv(emitter, "npageslabs", emitter_type_size, &npageslabs);
+	emitter_json_kv(emitter, "nactive", emitter_type_size, &nactive);
+	emitter_json_kv(emitter, "ninactive", emitter_type_size, &ninactive);
+	emitter_json_object_end(emitter); /* End "full_slabs" */
+
+	COL_HDR(row, size, NULL, right, 20, size)
+	COL_HDR(row, ind, NULL, right, 4, unsigned)
+	COL_HDR(row, npageslabs, NULL, right, 13, size)
+	COL_HDR(row, nactive, NULL, right, 13, size)
+	COL_HDR(row, ninactive, NULL, right, 13, size)
+
+	emitter_table_row(emitter, &header_row);
+	emitter_json_array_kv_begin(emitter, "nonfull_slabs");
+	bool in_gap = false;
+	for (pszind_t j = 0; j < PSSET_NPSIZES; j++) {
+		CTL_M2_M5_GET(
+		    "stats.arenas.0.hpa_shard.nonfull_slabs.0.npageslabs",
+		    i, j, &npageslabs, size_t);
+		CTL_M2_M5_GET(
+		    "stats.arenas.0.hpa_shard.nonfull_slabs.0.nactive",
+		    i, j, &nactive, size_t);
+		CTL_M2_M5_GET(
+		    "stats.arenas.0.hpa_shard.nonfull_slabs.0.ninactive",
+		    i, j, &ninactive, size_t);
+
+		bool in_gap_prev = in_gap;
+		in_gap = (npageslabs == 0);
+		if (in_gap_prev && !in_gap) {
+			emitter_table_printf(emitter,
+			    "                     ---\n");
+		}
+
+		col_size.size_val = sz_pind2sz(j);
+		col_ind.size_val = j;
+		col_npageslabs.size_val = npageslabs;
+		col_nactive.size_val = nactive;
+		col_ninactive.size_val = ninactive;
+		if (!in_gap) {
+			emitter_table_row(emitter, &row);
+		}
+
+		emitter_json_object_begin(emitter);
+		emitter_json_kv(emitter, "npageslabs", emitter_type_size,
+		    &npageslabs);
+		emitter_json_kv(emitter, "nactive", emitter_type_size,
+		    &nactive);
+		emitter_json_kv(emitter, "ninactive", emitter_type_size,
+		    &ninactive);
+		emitter_json_object_end(emitter);
+	}
+	emitter_json_array_end(emitter); /* End "nonfull_slabs" */
+	emitter_json_object_end(emitter); /* End "hpa_shard" */
+}
+
 static void
 stats_arena_mutexes_print(emitter_t *emitter, unsigned arena_ind, uint64_t uptime) {
 	emitter_row_t row;
@@ -677,7 +768,7 @@ stats_arena_mutexes_print(emitter_t *emitter, unsigned arena_ind, uint64_t uptim
 
 static void
 stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
-    bool mutex, bool extents) {
+    bool mutex, bool extents, bool hpa) {
 	unsigned nthreads;
 	const char *dss;
 	ssize_t dirty_decay_ms, muzzy_decay_ms;
@@ -997,6 +1088,9 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	if (extents) {
 		stats_arena_extents_print(emitter, i);
 	}
+	if (hpa) {
+		stats_arena_hpa_shard_print(emitter, i);
+	}
 }
 
 static void
@@ -1272,7 +1366,7 @@ stats_general_print(emitter_t *emitter) {
 
 static void
 stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
-    bool unmerged, bool bins, bool large, bool mutex, bool extents) {
+    bool unmerged, bool bins, bool large, bool mutex, bool extents, bool hpa) {
 	/*
 	 * These should be deleted.  We keep them around for a while, to aid in
 	 * the transition to the emitter code.
@@ -1405,7 +1499,7 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 			emitter_table_printf(emitter, "Merged arenas stats:\n");
 			emitter_json_object_kv_begin(emitter, "merged");
 			stats_arena_print(emitter, MALLCTL_ARENAS_ALL, bins,
-			    large, mutex, extents);
+			    large, mutex, extents, hpa);
 			emitter_json_object_end(emitter); /* Close "merged". */
 		}
 
@@ -1416,7 +1510,7 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 			    "Destroyed arenas stats:\n");
 			emitter_json_object_kv_begin(emitter, "destroyed");
 			stats_arena_print(emitter, MALLCTL_ARENAS_DESTROYED,
-			    bins, large, mutex, extents);
+			    bins, large, mutex, extents, hpa);
 			emitter_json_object_end(emitter); /* Close "destroyed". */
 		}
 
@@ -1432,7 +1526,7 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 					emitter_table_printf(emitter,
 					    "arenas[%s]:\n", arena_ind_str);
 					stats_arena_print(emitter, i, bins,
-					    large, mutex, extents);
+					    large, mutex, extents, hpa);
 					/* Close "<arena-ind>". */
 					emitter_json_object_end(emitter);
 				}
@@ -1497,7 +1591,7 @@ stats_print(write_cb_t *write_cb, void *cbopaque, const char *opts) {
 	}
 	if (config_stats) {
 		stats_print_helper(&emitter, merged, destroyed, unmerged,
-		    bins, large, mutex, extents);
+		    bins, large, mutex, extents, hpa);
 	}
 
 	emitter_json_object_end(&emitter); /* Closes the "jemalloc" dict. */

From ea51e97bb893f560c70f42478d67c8159ee09b3d Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 15 Oct 2020 13:46:38 -0700
Subject: [PATCH 1894/2608] Add SEC module: a small extent cache.

This can be used to take pressure off a more centralized, worse-sharded
allocator without requiring a full break of the arena abstraction.
---
 Makefile.in                                   |   2 +
 include/jemalloc/internal/cache_bin.h         |   1 -
 include/jemalloc/internal/sec.h               | 118 +++++
 include/jemalloc/internal/witness.h           |   2 +
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |   3 +
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |   3 +
 src/sec.c                                     | 263 +++++++++
 test/unit/sec.c                               | 500 ++++++++++++++++++
 10 files changed, 893 insertions(+), 1 deletion(-)
 create mode 100644 include/jemalloc/internal/sec.h
 create mode 100644 src/sec.c
 create mode 100644 test/unit/sec.c

diff --git a/Makefile.in b/Makefile.in
index 67568f00..0136a40e 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -142,6 +142,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/rtree.c \
 	$(srcroot)src/safety_check.c \
 	$(srcroot)src/sc.c \
+	$(srcroot)src/sec.c \
 	$(srcroot)src/stats.c \
 	$(srcroot)src/sz.c \
 	$(srcroot)src/tcache.c \
@@ -253,6 +254,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/rtree.c \
 	$(srcroot)test/unit/safety_check.c \
 	$(srcroot)test/unit/sc.c \
+	$(srcroot)test/unit/sec.c \
 	$(srcroot)test/unit/seq.c \
 	$(srcroot)test/unit/SFMT.c \
 	$(srcroot)test/unit/size_check.c \
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index c016769d..0767862c 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -99,7 +99,6 @@ struct cache_bin_s {
 	 * array.
 	 */
 	uint16_t low_bits_empty;
-
 };
 
 /*
diff --git a/include/jemalloc/internal/sec.h b/include/jemalloc/internal/sec.h
new file mode 100644
index 00000000..7c1465ed
--- /dev/null
+++ b/include/jemalloc/internal/sec.h
@@ -0,0 +1,118 @@
+#ifndef JEMALLOC_INTERNAL_SEC_H
+#define JEMALLOC_INTERNAL_SEC_H
+
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/pai.h"
+
+/*
+ * Small extent cache.
+ *
+ * This includes some utilities to cache small extents.  We have a per-pszind
+ * bin with its own lock and edata heap (including only extents of that size).
+ * We don't try to do any coalescing of extents (since it would require
+ * cross-bin locks).  As a result, we need to be careful about fragmentation.
+ * As a gesture in that direction, we limit the size of caches, apply first-fit
+ * within the bins, and, when flushing a bin, flush all of its extents rather
+ * than just those up to some threshold.  When we allocate again, we'll get a
+ * chance to move to better ones.
+ */
+
+/*
+ * This is a *small* extent cache, after all.  Assuming 4k pages and an ngroup
+ * of 4, this allows caching of sizes up to 128k.
+ */
+#define SEC_NPSIZES 16
+/*
+ * For now, we put a cap on the number of SECs an arena can have.  There's no
+ * reason it can't be dynamic; it's just inconvenient.  This number of shards
+ * are embedded in the arenas, so there's a space / configurability tradeoff
+ * here.  Eventually, we should probably dynamically allocate only however many
+ * we require.
+ */
+#define SEC_NSHARDS_MAX 8
+
+/*
+ * For now, this is just one field; eventually, we'll probably want to get more
+ * fine-grained data out (like per-size class statistics).
+ */
+typedef struct sec_stats_s sec_stats_t;
+struct sec_stats_s {
+	/* Sum of bytes_cur across all shards. */
+	size_t bytes;
+};
+
+static inline void
+sec_stats_accum(sec_stats_t *dst, sec_stats_t *src) {
+	dst->bytes += src->bytes;
+}
+
+typedef struct sec_shard_s sec_shard_t;
+struct sec_shard_s {
+	/*
+	 * We don't keep per-bin mutexes, even though that would allow more
+	 * sharding; this allows global cache-eviction, which in turn allows for
+	 * better balancing across free lists.
+	 */
+	malloc_mutex_t mtx;
+	/*
+	 * A SEC may need to be shut down (i.e. flushed of its contents and
+	 * prevented from further caching).  To avoid tricky synchronization
+	 * issues, we just track enabled-status in each shard, guarded by a
+	 * mutex.  In practice, this is only ever checked during brief races,
+	 * since the arena-level atomic boolean tracking HPA enabled-ness means
+	 * that we won't go down these pathways very often after custom extent
+	 * hooks are installed.
+	 */
+	bool enabled;
+	edata_list_active_t freelist[SEC_NPSIZES];
+	size_t bytes_cur;
+};
+
+typedef struct sec_s sec_t;
+struct sec_s {
+	pai_t pai;
+	pai_t *fallback;
+
+	/*
+	 * We'll automatically refuse to cache any objects in this sec if
+	 * they're larger than alloc_max bytes.
+	 */
+	size_t alloc_max;
+	/*
+	 * Exceeding this amount of cached extents in a shard causes *all* of
+	 * the shards in that bin to be flushed.
+	 */
+	size_t bytes_max;
+
+	/*
+	 * We don't necessarily always use all the shards; requests are
+	 * distributed across shards [0, nshards - 1).
+	 */
+	size_t nshards;
+	sec_shard_t shards[SEC_NSHARDS_MAX];
+};
+
+bool sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t alloc_max,
+    size_t bytes_max);
+void sec_flush(tsdn_t *tsdn, sec_t *sec);
+void sec_disable(tsdn_t *tsdn, sec_t *sec);
+
+/*
+ * Morally, these two stats methods probably ought to be a single one (and the
+ * mutex_prof_data ought to live in the sec_stats_t.  But splitting them apart
+ * lets them fit easily into the pa_shard stats framework (which also has this
+ * split), which simplifies the stats management.
+ */
+void sec_stats_merge(tsdn_t *tsdn, sec_t *sec, sec_stats_t *stats);
+void sec_mutex_stats_read(tsdn_t *tsdn, sec_t *sec,
+    mutex_prof_data_t *mutex_prof_data);
+
+/*
+ * We use the arena lock ordering; these are acquired in phase 2 of forking, but
+ * should be acquired before the underlying allocator mutexes.
+ */
+void sec_prefork2(tsdn_t *tsdn, sec_t *sec);
+void sec_postfork_parent(tsdn_t *tsdn, sec_t *sec);
+void sec_postfork_child(tsdn_t *tsdn, sec_t *sec);
+
+#endif /* JEMALLOC_INTERNAL_SEC_H */
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 686bf403..662907c8 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -44,6 +44,8 @@ enum witness_rank_e {
 	WITNESS_RANK_DECAY = WITNESS_RANK_CORE,
 	WITNESS_RANK_TCACHE_QL,
 
+	WITNESS_RANK_SEC_SHARD,
+
 	WITNESS_RANK_EXTENT_GROW,
 	WITNESS_RANK_HPA_SHARD_GROW = WITNESS_RANK_EXTENT_GROW,
 
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 46e497ac..f14f87ff 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -82,6 +82,7 @@
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\safety_check.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
+    <ClCompile Include="..\..\..\..\src\sec.c" />
     <ClCompile Include="..\..\..\..\src\stats.c" />
     <ClCompile Include="..\..\..\..\src\sz.c" />
     <ClCompile Include="..\..\..\..\src\tcache.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index f46a92fa..689a520c 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -130,6 +130,9 @@
     <ClCompile Include="..\..\..\..\src\sc.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\sec.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\stats.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index dbf6f95a..30c6b295 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -82,6 +82,7 @@
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\safety_check.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
+    <ClCompile Include="..\..\..\..\src\sec.c" />
     <ClCompile Include="..\..\..\..\src\stats.c" />
     <ClCompile Include="..\..\..\..\src\sz.c" />
     <ClCompile Include="..\..\..\..\src\tcache.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index f46a92fa..689a520c 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -130,6 +130,9 @@
     <ClCompile Include="..\..\..\..\src\sc.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\sec.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\stats.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/sec.c b/src/sec.c
new file mode 100644
index 00000000..f3c906bc
--- /dev/null
+++ b/src/sec.c
@@ -0,0 +1,263 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/sec.h"
+
+static edata_t *sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
+    size_t alignment, bool zero);
+static bool sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    size_t old_size, size_t new_size, bool zero);
+static bool sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    size_t old_size, size_t new_size);
+static void sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata);
+
+bool sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t alloc_max,
+    size_t bytes_max) {
+	if (nshards > SEC_NSHARDS_MAX) {
+		nshards = SEC_NSHARDS_MAX;
+	}
+	for (size_t i = 0; i < nshards; i++) {
+		sec_shard_t *shard = &sec->shards[i];
+		bool err = malloc_mutex_init(&shard->mtx, "sec_shard",
+		    WITNESS_RANK_SEC_SHARD, malloc_mutex_rank_exclusive);
+		if (err) {
+			return true;
+		}
+		shard->enabled = true;
+		for (pszind_t j = 0; j < SEC_NPSIZES; j++) {
+			edata_list_active_init(&shard->freelist[j]);
+		}
+		shard->bytes_cur = 0;
+	}
+	sec->fallback = fallback;
+	sec->alloc_max = alloc_max;
+	if (sec->alloc_max > sz_pind2sz(SEC_NPSIZES - 1)) {
+		sec->alloc_max = sz_pind2sz(SEC_NPSIZES - 1);
+	}
+
+	sec->bytes_max = bytes_max;
+	sec->nshards = nshards;
+
+	/*
+	 * Initialize these last so that an improper use of an SEC whose
+	 * initialization failed will segfault in an easy-to-spot way.
+	 */
+	sec->pai.alloc = &sec_alloc;
+	sec->pai.expand = &sec_expand;
+	sec->pai.shrink = &sec_shrink;
+	sec->pai.dalloc = &sec_dalloc;
+
+	return false;
+}
+
+static sec_shard_t *
+sec_shard_pick(tsdn_t *tsdn, sec_t *sec) {
+	/*
+	 * Eventually, we should implement affinity, tracking source shard using
+	 * the edata_t's newly freed up fields.  For now, just randomly
+	 * distribute across all shards.
+	 */
+	if (tsdn_null(tsdn)) {
+		return &sec->shards[0];
+	}
+	tsd_t *tsd = tsdn_tsd(tsdn);
+	/*
+	 * Use the trick from Daniel Lemire's "A fast alternative to the modulo
+	 * reduction.  Use a 64 bit number to store 32 bits, since we'll
+	 * deliberately overflow when we multiply by the number of shards.
+	 */
+	uint64_t rand32 = prng_lg_range_u64(tsd_prng_statep_get(tsd), 32);
+	uint32_t idx = (uint32_t)((rand32 * (uint64_t)sec->nshards) >> 32);
+	return &sec->shards[idx];
+}
+
+static edata_t *
+sec_shard_alloc_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
+    pszind_t pszind) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	if (!shard->enabled) {
+		return NULL;
+	}
+	edata_t *edata = edata_list_active_first(&shard->freelist[pszind]);
+	if (edata != NULL) {
+		edata_list_active_remove(&shard->freelist[pszind], edata);
+		assert(edata_size_get(edata) <= shard->bytes_cur);
+		shard->bytes_cur -= edata_size_get(edata);
+	}
+	return edata;
+}
+
+static edata_t *
+sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {
+	assert((size & PAGE_MASK) == 0);
+
+	sec_t *sec = (sec_t *)self;
+
+	if (zero || alignment > PAGE || sec->nshards == 0
+	    || size > sec->alloc_max) {
+		return pai_alloc(tsdn, sec->fallback, size, alignment, zero);
+	}
+	pszind_t pszind = sz_psz2ind(size);
+	sec_shard_t *shard = sec_shard_pick(tsdn, sec);
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	edata_t *edata = sec_shard_alloc_locked(tsdn, sec, shard, pszind);
+	malloc_mutex_unlock(tsdn, &shard->mtx);
+	if (edata == NULL) {
+		/*
+		 * See the note in dalloc, below; really, we should add a
+		 * batch_alloc method to the PAI and get more than one extent at
+		 * a time.
+		 */
+		edata = pai_alloc(tsdn, sec->fallback, size, alignment, zero);
+	}
+	return edata;
+}
+
+static bool
+sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
+    size_t new_size, bool zero) {
+	sec_t *sec = (sec_t *)self;
+	return pai_expand(tsdn, sec->fallback, edata, old_size, new_size, zero);
+}
+
+static bool
+sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
+    size_t new_size) {
+	sec_t *sec = (sec_t *)self;
+	return pai_shrink(tsdn, sec->fallback, edata, old_size, new_size);
+}
+
+static void
+sec_do_flush_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	shard->bytes_cur = 0;
+	edata_list_active_t to_flush;
+	edata_list_active_init(&to_flush);
+	for (pszind_t i = 0; i < SEC_NPSIZES; i++) {
+		edata_list_active_concat(&to_flush, &shard->freelist[i]);
+	}
+	/*
+	 * A better way to do this would be to add a batch dalloc function to
+	 * the pai_t.  Practically, the current method turns into O(n) locks and
+	 * unlocks at the fallback allocator.  But some implementations (e.g.
+	 * HPA) can straightforwardly do many deallocations in a single lock /
+	 * unlock pair.
+	 */
+	while (!edata_list_active_empty(&to_flush)) {
+		edata_t *e = edata_list_active_first(&to_flush);
+		edata_list_active_remove(&to_flush, e);
+		pai_dalloc(tsdn, sec->fallback, e);
+	}
+}
+
+static void
+sec_shard_dalloc_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
+    edata_t *edata) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	assert(shard->bytes_cur <= sec->bytes_max);
+	size_t size = edata_size_get(edata);
+	pszind_t pszind = sz_psz2ind(size);
+	/*
+	 * Prepending here results in FIFO allocation per bin, which seems
+	 * reasonable.
+	 */
+	edata_list_active_prepend(&shard->freelist[pszind], edata);
+	shard->bytes_cur += size;
+	if (shard->bytes_cur > sec->bytes_max) {
+		/*
+		 * We've exceeded the shard limit.  We make two nods in the
+		 * direction of fragmentation avoidance: we flush everything in
+		 * the shard, rather than one particular bin, and we hold the
+		 * lock while flushing (in case one of the extents we flush is
+		 * highly preferred from a fragmentation-avoidance perspective
+		 * in the backing allocator).  This has the extra advantage of
+		 * not requiring advanced cache balancing strategies.
+		 */
+		sec_do_flush_locked(tsdn, sec, shard);
+	}
+}
+
+static void
+sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
+	sec_t *sec = (sec_t *)self;
+	if (sec->nshards == 0 || edata_size_get(edata) > sec->alloc_max) {
+		pai_dalloc(tsdn, sec->fallback, edata);
+		return;
+	}
+	sec_shard_t *shard = sec_shard_pick(tsdn, sec);
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	if (shard->enabled) {
+		sec_shard_dalloc_locked(tsdn, sec, shard, edata);
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+	} else {
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+		pai_dalloc(tsdn, sec->fallback, edata);
+	}
+}
+
+void
+sec_flush(tsdn_t *tsdn, sec_t *sec) {
+	for (size_t i = 0; i < sec->nshards; i++) {
+		malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
+		sec_do_flush_locked(tsdn, sec, &sec->shards[i]);
+		malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
+	}
+}
+
+void
+sec_disable(tsdn_t *tsdn, sec_t *sec) {
+	for (size_t i = 0; i < sec->nshards; i++) {
+		malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
+		sec->shards[i].enabled = false;
+		sec_do_flush_locked(tsdn, sec, &sec->shards[i]);
+		malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
+	}
+}
+
+void
+sec_stats_merge(tsdn_t *tsdn, sec_t *sec, sec_stats_t *stats) {
+	size_t sum = 0;
+	for (size_t i = 0; i < sec->nshards; i++) {
+		/*
+		 * We could save these lock acquisitions by making bytes_cur
+		 * atomic, but stats collection is rare anyways and we expect
+		 * the number and type of stats to get more interesting.
+		 */
+		malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
+		sum += sec->shards[i].bytes_cur;
+		malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
+	}
+	stats->bytes += sum;
+}
+
+void
+sec_mutex_stats_read(tsdn_t *tsdn, sec_t *sec,
+    mutex_prof_data_t *mutex_prof_data) {
+	for (size_t i = 0; i < sec->nshards; i++) {
+		malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
+		malloc_mutex_prof_accum(tsdn, mutex_prof_data,
+		    &sec->shards[i].mtx);
+		malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
+	}
+}
+
+void
+sec_prefork2(tsdn_t *tsdn, sec_t *sec) {
+	for (size_t i = 0; i < sec->nshards; i++) {
+		malloc_mutex_prefork(tsdn, &sec->shards[i].mtx);
+	}
+}
+
+void
+sec_postfork_parent(tsdn_t *tsdn, sec_t *sec) {
+	for (size_t i = 0; i < sec->nshards; i++) {
+		malloc_mutex_postfork_parent(tsdn, &sec->shards[i].mtx);
+	}
+}
+
+void
+sec_postfork_child(tsdn_t *tsdn, sec_t *sec) {
+	for (size_t i = 0; i < sec->nshards; i++) {
+		malloc_mutex_postfork_child(tsdn, &sec->shards[i].mtx);
+	}
+}
diff --git a/test/unit/sec.c b/test/unit/sec.c
new file mode 100644
index 00000000..cb0c17d1
--- /dev/null
+++ b/test/unit/sec.c
@@ -0,0 +1,500 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/sec.h"
+
+typedef struct pai_test_allocator_s pai_test_allocator_t;
+struct pai_test_allocator_s {
+	pai_t pai;
+	bool alloc_fail;
+	size_t alloc_count;
+	size_t dalloc_count;
+	/*
+	 * We use a simple bump allocator as the implementation.  This isn't
+	 * *really* correct, since we may allow expansion into a subsequent
+	 * allocation, but it's not like the SEC is really examining the
+	 * pointers it gets back; this is mostly just helpful for debugging.
+	 */
+	uintptr_t next_ptr;
+	size_t expand_count;
+	bool expand_return_value;
+	size_t shrink_count;
+	bool shrink_return_value;
+};
+
+static inline edata_t *
+pai_test_allocator_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
+    size_t alignment, bool zero) {
+	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
+	if (ta->alloc_fail) {
+		return NULL;
+	}
+	edata_t *edata = malloc(sizeof(edata_t));
+	assert_ptr_not_null(edata, "");
+	ta->next_ptr += alignment - 1;
+	edata_init(edata, /* arena_ind */ 0,
+	    (void *)(ta->next_ptr & ~(alignment - 1)), size,
+	    /* slab */ false,
+	    /* szind */ 0, /* sn */ 1, extent_state_active, /* zero */ zero,
+	    /* comitted */ true, /* ranged */ false, EXTENT_NOT_HEAD);
+	ta->next_ptr += size;
+	ta->alloc_count++;
+	return edata;
+}
+
+static bool
+pai_test_allocator_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    size_t old_size, size_t new_size, bool zero) {
+	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
+	ta->expand_count++;
+	return ta->expand_return_value;
+}
+
+static bool
+pai_test_allocator_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    size_t old_size, size_t new_size) {
+	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
+	ta->shrink_count++;
+	return ta->shrink_return_value;
+}
+
+static void
+pai_test_allocator_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
+	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
+	ta->dalloc_count++;
+	free(edata);
+}
+
+static inline void
+pai_test_allocator_init(pai_test_allocator_t *ta) {
+	ta->alloc_fail = false;
+	ta->alloc_count = 0;
+	ta->dalloc_count = 0;
+	/* Just don't start the edata at 0. */
+	ta->next_ptr = 10 * PAGE;
+	ta->expand_count = 0;
+	ta->expand_return_value = false;
+	ta->shrink_count = 0;
+	ta->shrink_return_value = false;
+	ta->pai.alloc = &pai_test_allocator_alloc;
+	ta->pai.expand = &pai_test_allocator_expand;
+	ta->pai.shrink = &pai_test_allocator_shrink;
+	ta->pai.dalloc = &pai_test_allocator_dalloc;
+}
+
+TEST_BEGIN(test_reuse) {
+	pai_test_allocator_t ta;
+	pai_test_allocator_init(&ta);
+	sec_t sec;
+	/*
+	 * We can't use the "real" tsd, since we malloc within the test
+	 * allocator hooks; we'd get lock inversion crashes.  Eventually, we
+	 * should have a way to mock tsds, but for now just don't do any
+	 * lock-order checking.
+	 */
+	tsdn_t *tsdn = TSDN_NULL;
+	/*
+	 * 10-allocs apiece of 1-PAGE and 2-PAGE objects means that we should be
+	 * able to get to 30 pages in the cache before triggering a flush.
+	 */
+	enum { NALLOCS = 10 };
+	edata_t *one_page[NALLOCS];
+	edata_t *two_page[NALLOCS];
+	sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ 2 * PAGE,
+	    /* bytes_max */ NALLOCS * PAGE + NALLOCS * 2 * PAGE);
+	for (int i = 0; i < NALLOCS; i++) {
+		one_page[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
+		    /* zero */ false);
+		expect_ptr_not_null(one_page[i], "Unexpected alloc failure");
+		two_page[i] = pai_alloc(tsdn, &sec.pai, 2 * PAGE, PAGE,
+		    /* zero */ false);
+		expect_ptr_not_null(one_page[i], "Unexpected alloc failure");
+	}
+	expect_zu_eq(2 * NALLOCS, ta.alloc_count,
+	    "Incorrect number of allocations");
+	expect_zu_eq(0, ta.dalloc_count,
+	    "Incorrect number of allocations");
+	/*
+	 * Free in a different order than we allocated, to make sure free-list
+	 * separation works correctly.
+	 */
+	for (int i = NALLOCS - 1; i >= 0; i--) {
+		pai_dalloc(tsdn, &sec.pai, one_page[i]);
+	}
+	for (int i = NALLOCS - 1; i >= 0; i--) {
+		pai_dalloc(tsdn, &sec.pai, two_page[i]);
+	}
+	expect_zu_eq(2 * NALLOCS, ta.alloc_count,
+	    "Incorrect number of allocations");
+	expect_zu_eq(0, ta.dalloc_count,
+	    "Incorrect number of allocations");
+	/*
+	 * Check that the n'th most recent deallocated extent is returned for
+	 * the n'th alloc request of a given size.
+	 */
+	for (int i = 0; i < NALLOCS; i++) {
+		edata_t *alloc1 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
+		    /* zero */ false);
+		edata_t *alloc2 = pai_alloc(tsdn, &sec.pai, 2 * PAGE, PAGE,
+		    /* zero */ false);
+		expect_ptr_eq(one_page[i], alloc1,
+		    "Got unexpected allocation");
+		expect_ptr_eq(two_page[i], alloc2,
+		    "Got unexpected allocation");
+	}
+	expect_zu_eq(2 * NALLOCS, ta.alloc_count,
+	    "Incorrect number of allocations");
+	expect_zu_eq(0, ta.dalloc_count,
+	    "Incorrect number of allocations");
+}
+TEST_END
+
+
+TEST_BEGIN(test_auto_flush) {
+	pai_test_allocator_t ta;
+	pai_test_allocator_init(&ta);
+	sec_t sec;
+	/* See the note above -- we can't use the real tsd. */
+	tsdn_t *tsdn = TSDN_NULL;
+	/*
+	 * 10-allocs apiece of 1-PAGE and 2-PAGE objects means that we should be
+	 * able to get to 30 pages in the cache before triggering a flush.
+	 */
+	enum { NALLOCS = 10 };
+	edata_t *extra_alloc;
+	edata_t *allocs[NALLOCS];
+	sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE,
+	    /* bytes_max */ NALLOCS * PAGE);
+	for (int i = 0; i < NALLOCS; i++) {
+		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
+		    /* zero */ false);
+		expect_ptr_not_null(allocs[i], "Unexpected alloc failure");
+	}
+	extra_alloc = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false);
+	expect_ptr_not_null(extra_alloc, "Unexpected alloc failure");
+	expect_zu_eq(NALLOCS + 1, ta.alloc_count,
+	    "Incorrect number of allocations");
+	expect_zu_eq(0, ta.dalloc_count,
+	    "Incorrect number of allocations");
+	/* Free until the SEC is full, but should not have flushed yet. */
+	for (int i = 0; i < NALLOCS; i++) {
+		pai_dalloc(tsdn, &sec.pai, allocs[i]);
+	}
+	expect_zu_eq(NALLOCS + 1, ta.alloc_count,
+	    "Incorrect number of allocations");
+	expect_zu_eq(0, ta.dalloc_count,
+	    "Incorrect number of allocations");
+	/*
+	 * Free the extra allocation; this should trigger a flush of all
+	 * extents in the cache.
+	 */
+	pai_dalloc(tsdn, &sec.pai, extra_alloc);
+	expect_zu_eq(NALLOCS + 1, ta.alloc_count,
+	    "Incorrect number of allocations");
+	expect_zu_eq(NALLOCS + 1, ta.dalloc_count,
+	    "Incorrect number of deallocations");
+}
+TEST_END
+
+/*
+ * A disable and a flush are *almost* equivalent; the only difference is what
+ * happens afterwards; disabling disallows all future caching as well.
+ */
+static void
+do_disable_flush_test(bool is_disable) {
+	pai_test_allocator_t ta;
+	pai_test_allocator_init(&ta);
+	sec_t sec;
+	/* See the note above -- we can't use the real tsd. */
+	tsdn_t *tsdn = TSDN_NULL;
+
+	enum { NALLOCS = 10 };
+	edata_t *allocs[NALLOCS];
+	sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE,
+	    /* bytes_max */ NALLOCS * PAGE);
+	for (int i = 0; i < NALLOCS; i++) {
+		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
+		    /* zero */ false);
+		expect_ptr_not_null(allocs[i], "Unexpected alloc failure");
+	}
+	/* Free all but the last aloc. */
+	for (int i = 0; i < NALLOCS - 1; i++) {
+		pai_dalloc(tsdn, &sec.pai, allocs[i]);
+	}
+	expect_zu_eq(NALLOCS, ta.alloc_count,
+	    "Incorrect number of allocations");
+	expect_zu_eq(0, ta.dalloc_count,
+	    "Incorrect number of allocations");
+
+	if (is_disable) {
+		sec_disable(tsdn, &sec);
+	} else {
+		sec_flush(tsdn, &sec);
+	}
+
+	expect_zu_eq(NALLOCS, ta.alloc_count,
+	    "Incorrect number of allocations");
+	expect_zu_eq(NALLOCS - 1, ta.dalloc_count,
+	    "Incorrect number of deallocations");
+
+	/*
+	 * If we free into a disabled SEC, it should forward to the fallback.
+	 * Otherwise, the SEC should accept the allocation.
+	 */
+	pai_dalloc(tsdn, &sec.pai, allocs[NALLOCS - 1]);
+
+	expect_zu_eq(NALLOCS, ta.alloc_count,
+	    "Incorrect number of allocations");
+	expect_zu_eq(is_disable ? NALLOCS : NALLOCS - 1, ta.dalloc_count,
+	    "Incorrect number of deallocations");
+}
+
+TEST_BEGIN(test_disable) {
+	do_disable_flush_test(/* is_disable */ true);
+}
+TEST_END
+
+TEST_BEGIN(test_flush) {
+	do_disable_flush_test(/* is_disable */ false);
+}
+TEST_END
+
+TEST_BEGIN(test_alloc_max_respected) {
+	pai_test_allocator_t ta;
+	pai_test_allocator_init(&ta);
+	sec_t sec;
+	/* See the note above -- we can't use the real tsd. */
+	tsdn_t *tsdn = TSDN_NULL;
+
+	size_t alloc_max = 2 * PAGE;
+	size_t attempted_alloc = 3 * PAGE;
+
+	sec_init(&sec, &ta.pai, /* nshards */ 1, alloc_max,
+	    /* bytes_max */ 1000 * PAGE);
+
+	for (size_t i = 0; i < 100; i++) {
+		expect_zu_eq(i, ta.alloc_count,
+		    "Incorrect number of allocations");
+		expect_zu_eq(i, ta.dalloc_count,
+		    "Incorrect number of deallocations");
+		edata_t *edata = pai_alloc(tsdn, &sec.pai, attempted_alloc,
+		    PAGE, /* zero */ false);
+		expect_ptr_not_null(edata, "Unexpected alloc failure");
+		expect_zu_eq(i + 1, ta.alloc_count,
+		    "Incorrect number of allocations");
+		expect_zu_eq(i, ta.dalloc_count,
+		    "Incorrect number of deallocations");
+		pai_dalloc(tsdn, &sec.pai, edata);
+	}
+}
+TEST_END
+
+TEST_BEGIN(test_expand_shrink_delegate) {
+	/*
+	 * Expand and shrink shouldn't affect sec state; they should just
+	 * delegate to the fallback PAI.
+	 */
+	pai_test_allocator_t ta;
+	pai_test_allocator_init(&ta);
+	sec_t sec;
+	/* See the note above -- we can't use the real tsd. */
+	tsdn_t *tsdn = TSDN_NULL;
+
+	sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ 10 * PAGE,
+	    /* bytes_max */ 1000 * PAGE);
+	edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
+	    /* zero */ false);
+	expect_ptr_not_null(edata, "Unexpected alloc failure");
+
+	bool err = pai_expand(tsdn, &sec.pai, edata, PAGE, 4 * PAGE,
+	    /* zero */ false);
+	expect_false(err, "Unexpected expand failure");
+	expect_zu_eq(1, ta.expand_count, "");
+	ta.expand_return_value = true;
+	err = pai_expand(tsdn, &sec.pai, edata, 4 * PAGE, 3 * PAGE,
+	    /* zero */ false);
+	expect_true(err, "Unexpected expand success");
+	expect_zu_eq(2, ta.expand_count, "");
+
+	err = pai_shrink(tsdn, &sec.pai, edata, 4 * PAGE, 2 * PAGE);
+	expect_false(err, "Unexpected shrink failure");
+	expect_zu_eq(1, ta.shrink_count, "");
+	ta.shrink_return_value = true;
+	err = pai_shrink(tsdn, &sec.pai, edata, 2 * PAGE, PAGE);
+	expect_true(err, "Unexpected shrink success");
+	expect_zu_eq(2, ta.shrink_count, "");
+}
+TEST_END
+
+TEST_BEGIN(test_nshards_0) {
+	pai_test_allocator_t ta;
+	pai_test_allocator_init(&ta);
+	sec_t sec;
+	/* See the note above -- we can't use the real tsd. */
+	tsdn_t *tsdn = TSDN_NULL;
+
+	sec_init(&sec, &ta.pai, /* nshards */ 0, /* alloc_max */ 10 * PAGE,
+	    /* bytes_max */ 1000 * PAGE);
+
+	edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
+	    /* zero */ false);
+	pai_dalloc(tsdn, &sec.pai, edata);
+
+	/* Both operations should have gone directly to the fallback. */
+	expect_zu_eq(1, ta.alloc_count, "");
+	expect_zu_eq(1, ta.dalloc_count, "");
+}
+TEST_END
+
+static void
+expect_stats_pages(tsdn_t *tsdn, sec_t *sec, size_t npages) {
+	sec_stats_t stats;
+	/*
+	 * Check that the stats merging accumulates rather than overwrites by
+	 * putting some (made up) data there to begin with.
+	 */
+	stats.bytes = 123;
+	sec_stats_merge(tsdn, sec, &stats);
+	assert_zu_eq(npages * PAGE + 123, stats.bytes, "");
+}
+
+TEST_BEGIN(test_stats_simple) {
+	pai_test_allocator_t ta;
+	pai_test_allocator_init(&ta);
+	sec_t sec;
+
+	/* See the note above -- we can't use the real tsd. */
+	tsdn_t *tsdn = TSDN_NULL;
+
+	enum {
+		NITERS = 100,
+		FLUSH_PAGES = 10,
+	};
+
+	sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE,
+	    /* bytes_max */ FLUSH_PAGES * PAGE);
+
+	edata_t *allocs[FLUSH_PAGES];
+	for (size_t i = 0; i < FLUSH_PAGES; i++) {
+		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
+		    /* zero */ false);
+		expect_stats_pages(tsdn, &sec, 0);
+	}
+
+	/* Increase and decrease, without flushing. */
+	for (size_t i = 0; i < NITERS; i++) {
+		for (size_t j = 0; j < FLUSH_PAGES / 2; j++) {
+			pai_dalloc(tsdn, &sec.pai, allocs[j]);
+			expect_stats_pages(tsdn, &sec, j + 1);
+		}
+		for (size_t j = 0; j < FLUSH_PAGES / 2; j++) {
+			allocs[j] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
+			    /* zero */ false);
+			expect_stats_pages(tsdn, &sec, FLUSH_PAGES / 2 - j - 1);
+		}
+	}
+}
+TEST_END
+
+TEST_BEGIN(test_stats_auto_flush) {
+	pai_test_allocator_t ta;
+	pai_test_allocator_init(&ta);
+	sec_t sec;
+
+	/* See the note above -- we can't use the real tsd. */
+	tsdn_t *tsdn = TSDN_NULL;
+
+	enum {
+		FLUSH_PAGES = 10,
+	};
+
+	sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE,
+	    /* bytes_max */ FLUSH_PAGES * PAGE);
+
+	edata_t *extra_alloc0;
+	edata_t *extra_alloc1;
+	edata_t *allocs[2 * FLUSH_PAGES];
+
+	extra_alloc0 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false);
+	extra_alloc1 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false);
+
+	for (size_t i = 0; i < 2 * FLUSH_PAGES; i++) {
+		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
+		    /* zero */ false);
+		expect_stats_pages(tsdn, &sec, 0);
+	}
+
+	for (size_t i = 0; i < FLUSH_PAGES; i++) {
+		pai_dalloc(tsdn, &sec.pai, allocs[i]);
+		expect_stats_pages(tsdn, &sec, i + 1);
+	}
+	pai_dalloc(tsdn, &sec.pai, extra_alloc0);
+	/* The last dalloc should have triggered a flush. */
+	expect_stats_pages(tsdn, &sec, 0);
+
+	/* Flush the remaining pages; stats should still work. */
+	for (size_t i = 0; i < FLUSH_PAGES; i++) {
+		pai_dalloc(tsdn, &sec.pai, allocs[FLUSH_PAGES + i]);
+		expect_stats_pages(tsdn, &sec, i + 1);
+	}
+
+	pai_dalloc(tsdn, &sec.pai, extra_alloc1);
+	/* The last dalloc should have triggered a flush, again. */
+	expect_stats_pages(tsdn, &sec, 0);
+}
+TEST_END
+
+TEST_BEGIN(test_stats_manual_flush) {
+	pai_test_allocator_t ta;
+	pai_test_allocator_init(&ta);
+	sec_t sec;
+
+	/* See the note above -- we can't use the real tsd. */
+	tsdn_t *tsdn = TSDN_NULL;
+
+	enum {
+		FLUSH_PAGES = 10,
+	};
+
+	sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE,
+	    /* bytes_max */ FLUSH_PAGES * PAGE);
+
+	edata_t *allocs[FLUSH_PAGES];
+	for (size_t i = 0; i < FLUSH_PAGES; i++) {
+		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
+		    /* zero */ false);
+		expect_stats_pages(tsdn, &sec, 0);
+	}
+
+	/* Dalloc the first half of the allocations. */
+	for (size_t i = 0; i < FLUSH_PAGES / 2; i++) {
+		pai_dalloc(tsdn, &sec.pai, allocs[i]);
+		expect_stats_pages(tsdn, &sec, i + 1);
+	}
+
+	sec_flush(tsdn, &sec);
+	expect_stats_pages(tsdn, &sec, 0);
+
+	/* Flush the remaining pages. */
+	for (size_t i = 0; i < FLUSH_PAGES / 2; i++) {
+		pai_dalloc(tsdn, &sec.pai, allocs[FLUSH_PAGES / 2 + i]);
+		expect_stats_pages(tsdn, &sec, i + 1);
+	}
+	sec_disable(tsdn, &sec);
+	expect_stats_pages(tsdn, &sec, 0);
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_reuse,
+	    test_auto_flush,
+	    test_disable,
+	    test_flush,
+	    test_alloc_max_respected,
+	    test_expand_shrink_delegate,
+	    test_nshards_0,
+	    test_stats_simple,
+	    test_stats_auto_flush,
+	    test_stats_manual_flush);
+}

From 6599651aee2b1b1ab0c52fdb03f23394bd683c47 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 16 Oct 2020 13:14:59 -0700
Subject: [PATCH 1895/2608] PA: Use an SEC in fron of the HPA shard.

---
 include/jemalloc/internal/arena_externs.h     |  3 +-
 include/jemalloc/internal/ctl.h               |  1 +
 include/jemalloc/internal/hpa.h               |  6 ++--
 .../internal/jemalloc_internal_externs.h      |  5 ++++
 include/jemalloc/internal/mutex_prof.h        |  3 +-
 include/jemalloc/internal/pa.h                | 20 +++++++++----
 src/arena.c                                   | 28 ++++++++++++-----
 src/ctl.c                                     | 19 +++++++++++-
 src/hpa.c                                     |  6 ++--
 src/jemalloc.c                                | 30 ++++++++++++++-----
 src/pa.c                                      | 19 ++++++++----
 src/pa_extra.c                                | 25 ++++++++++++----
 src/stats.c                                   |  8 +++++
 src/tcache.c                                  |  6 ++--
 test/unit/mallctl.c                           |  3 ++
 15 files changed, 141 insertions(+), 41 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index c8e1e38d..40223b58 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -28,7 +28,7 @@ void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
     bin_stats_data_t *bstats, arena_stats_large_t *lstats,
-    pac_estats_t *estats, hpa_shard_stats_t *hpastats);
+    pac_estats_t *estats, hpa_shard_stats_t *hpastats, sec_stats_t *secstats);
 void arena_handle_new_dirty_pages(tsdn_t *tsdn, arena_t *arena);
 #ifdef JEMALLOC_JET
 size_t arena_slab_regind(edata_t *slab, szind_t binind, const void *ptr);
@@ -99,6 +99,7 @@ void arena_prefork4(tsdn_t *tsdn, arena_t *arena);
 void arena_prefork5(tsdn_t *tsdn, arena_t *arena);
 void arena_prefork6(tsdn_t *tsdn, arena_t *arena);
 void arena_prefork7(tsdn_t *tsdn, arena_t *arena);
+void arena_prefork8(tsdn_t *tsdn, arena_t *arena);
 void arena_postfork_parent(tsdn_t *tsdn, arena_t *arena);
 void arena_postfork_child(tsdn_t *tsdn, arena_t *arena);
 
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index 305d3655..a6ae05c1 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -46,6 +46,7 @@ typedef struct ctl_arena_stats_s {
 	arena_stats_large_t lstats[SC_NSIZES - SC_NBINS];
 	pac_estats_t estats[SC_NPSIZES];
 	hpa_shard_stats_t hpastats;
+	sec_stats_t secstats;
 } ctl_arena_stats_t;
 
 typedef struct ctl_stats_s {
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 3fe9fc48..24c68560 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -90,10 +90,10 @@ void hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard);
 
 /*
  * We share the fork ordering with the PA and arena prefork handling; that's why
- * these are 2 and 3 rather than 0 or 1.
+ * these are 3 and 4 rather than 0 and 1.
  */
-void hpa_shard_prefork2(tsdn_t *tsdn, hpa_shard_t *shard);
 void hpa_shard_prefork3(tsdn_t *tsdn, hpa_shard_t *shard);
+void hpa_shard_prefork4(tsdn_t *tsdn, hpa_shard_t *shard);
 void hpa_shard_postfork_parent(tsdn_t *tsdn, hpa_shard_t *shard);
 void hpa_shard_postfork_child(tsdn_t *tsdn, hpa_shard_t *shard);
 
@@ -103,7 +103,7 @@ void hpa_shard_postfork_child(tsdn_t *tsdn, hpa_shard_t *shard);
  * so it needs to be lower in the witness ordering, but it's also logically
  * global and not tied to any particular arena.
  */
-void hpa_prefork3(tsdn_t *tsdn, hpa_t *hpa);
+void hpa_prefork4(tsdn_t *tsdn, hpa_t *hpa);
 void hpa_postfork_parent(tsdn_t *tsdn, hpa_t *hpa);
 void hpa_postfork_child(tsdn_t *tsdn, hpa_t *hpa);
 
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 8faadaa1..814a7a1b 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -17,6 +17,11 @@ extern size_t opt_hpa_slab_goal;
 extern size_t opt_hpa_slab_max_alloc;
 extern size_t opt_hpa_small_max;
 extern size_t opt_hpa_large_min;
+
+extern size_t opt_hpa_sec_max_alloc;
+extern size_t opt_hpa_sec_max_bytes;
+extern size_t opt_hpa_sec_nshards;
+
 extern const char *opt_junk;
 extern bool opt_junk_alloc;
 extern bool opt_junk_free;
diff --git a/include/jemalloc/internal/mutex_prof.h b/include/jemalloc/internal/mutex_prof.h
index 970f469b..ef0bf0d3 100644
--- a/include/jemalloc/internal/mutex_prof.h
+++ b/include/jemalloc/internal/mutex_prof.h
@@ -33,7 +33,8 @@ typedef enum {
     OP(base)								\
     OP(tcache_list)							\
     OP(hpa_shard)							\
-    OP(hpa_shard_grow)
+    OP(hpa_shard_grow)							\
+    OP(hpa_sec)
 
 typedef enum {
 #define OP(mtx) arena_prof_mutex_##mtx,
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index d138f2f0..5e97d0b0 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -10,6 +10,7 @@
 #include "jemalloc/internal/lockedint.h"
 #include "jemalloc/internal/pac.h"
 #include "jemalloc/internal/pai.h"
+#include "jemalloc/internal/sec.h"
 
 /*
  * The page allocator; responsible for acquiring pages of memory for
@@ -85,7 +86,12 @@ struct pa_shard_s {
 	/* Allocates from a PAC. */
 	pac_t pac;
 
-	/* Allocates from a HPA. */
+	/*
+	 * We place a small extent cache in front of the HPA, since we intend
+	 * these configurations to use many fewer arenas, and therefore have a
+	 * higher risk of hot locks.
+	 */
+	sec_t hpa_sec;
 	hpa_shard_t hpa_shard;
 
 	/* The source of edata_t objects. */
@@ -124,18 +130,20 @@ bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
  * that we can boot without worrying about the HPA, then turn it on in a0.
  */
 bool pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa, size_t ps_goal,
-    size_t ps_alloc_max, size_t small_max, size_t large_min);
+    size_t ps_alloc_max, size_t small_max, size_t large_min, size_t sec_nshards,
+    size_t sec_alloc_max, size_t sec_bytes_max);
 /*
  * We stop using the HPA when custom extent hooks are installed, but still
  * redirect deallocations to it.
  */
-void pa_shard_disable_hpa(pa_shard_t *shard);
+void pa_shard_disable_hpa(tsdn_t *tsdn, pa_shard_t *shard);
 
 /*
  * This does the PA-specific parts of arena reset (i.e. freeing all active
  * allocations).
  */
-void pa_shard_reset(pa_shard_t *shard);
+void pa_shard_reset(tsdn_t *tsdn, pa_shard_t *shard);
+
 /*
  * Destroy all the remaining retained extents.  Should only be called after
  * decaying all active, dirty, and muzzy extents to the retained state, as the
@@ -184,6 +192,7 @@ void pa_shard_prefork0(tsdn_t *tsdn, pa_shard_t *shard);
 void pa_shard_prefork2(tsdn_t *tsdn, pa_shard_t *shard);
 void pa_shard_prefork3(tsdn_t *tsdn, pa_shard_t *shard);
 void pa_shard_prefork4(tsdn_t *tsdn, pa_shard_t *shard);
+void pa_shard_prefork5(tsdn_t *tsdn, pa_shard_t *shard);
 void pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard);
 void pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard);
 
@@ -192,7 +201,8 @@ void pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive,
 
 void pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
     pa_shard_stats_t *pa_shard_stats_out, pac_estats_t *estats_out,
-    hpa_shard_stats_t *hpa_stats_out, size_t *resident);
+    hpa_shard_stats_t *hpa_stats_out, sec_stats_t *sec_stats_out,
+    size_t *resident);
 
 /*
  * Reads the PA-owned mutex stats into the output stats array, at the
diff --git a/src/arena.c b/src/arena.c
index dc58a287..360827ef 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -81,7 +81,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
     bin_stats_data_t *bstats, arena_stats_large_t *lstats,
-    pac_estats_t *estats, hpa_shard_stats_t *hpastats) {
+    pac_estats_t *estats, hpa_shard_stats_t *hpastats, sec_stats_t *secstats) {
 	cassert(config_stats);
 
 	arena_basic_stats_merge(tsdn, arena, nthreads, dss, dirty_decay_ms,
@@ -139,7 +139,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	}
 
 	pa_shard_stats_merge(tsdn, &arena->pa_shard, &astats->pa_shard_stats,
-	    estats, hpastats, &astats->resident);
+	    estats, hpastats, secstats, &astats->resident);
 
 	LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 
@@ -483,6 +483,14 @@ arena_decay_muzzy(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
 
 void
 arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) {
+	if (all) {
+		/*
+		 * We should take a purge of "all" to mean "save as much memory
+		 * as possible", including flushing any caches (for situations
+		 * like thread death, or manual purge calls).
+		 */
+		sec_flush(tsdn, &arena->pa_shard.hpa_sec);
+	}
 	if (arena_decay_dirty(tsdn, arena, is_background_thread, all)) {
 		return;
 	}
@@ -631,7 +639,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 			    &arena->bins[i].bin_shards[j]);
 		}
 	}
-	pa_shard_reset(&arena->pa_shard);
+	pa_shard_reset(tsd_tsdn(tsd), &arena->pa_shard);
 }
 
 void
@@ -1362,7 +1370,7 @@ arena_set_extent_hooks(tsd_t *tsd, arena_t *arena,
 		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
 	}
 	/* No using the HPA now that we have the custom hooks. */
-	pa_shard_disable_hpa(&arena->pa_shard);
+	pa_shard_disable_hpa(tsd_tsdn(tsd), &arena->pa_shard);
 	extent_hooks_t *ret = base_extent_hooks_set(arena->base, extent_hooks);
 	if (have_background_thread) {
 		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
@@ -1529,7 +1537,8 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	if (opt_hpa && ehooks_are_default(base_ehooks_get(base)) && ind != 0) {
 		if (pa_shard_enable_hpa(&arena->pa_shard, &arena_hpa_global,
 		    opt_hpa_slab_goal, opt_hpa_slab_max_alloc,
-		    opt_hpa_small_max, opt_hpa_large_min)) {
+		    opt_hpa_small_max, opt_hpa_large_min, opt_hpa_sec_nshards,
+		    opt_hpa_sec_max_alloc, opt_hpa_sec_max_bytes)) {
 			goto label_error;
 		}
 	}
@@ -1658,16 +1667,21 @@ arena_prefork4(tsdn_t *tsdn, arena_t *arena) {
 
 void
 arena_prefork5(tsdn_t *tsdn, arena_t *arena) {
-	base_prefork(tsdn, arena->base);
+	pa_shard_prefork5(tsdn, &arena->pa_shard);
 }
 
 void
 arena_prefork6(tsdn_t *tsdn, arena_t *arena) {
-	malloc_mutex_prefork(tsdn, &arena->large_mtx);
+	base_prefork(tsdn, arena->base);
 }
 
 void
 arena_prefork7(tsdn_t *tsdn, arena_t *arena) {
+	malloc_mutex_prefork(tsdn, &arena->large_mtx);
+}
+
+void
+arena_prefork8(tsdn_t *tsdn, arena_t *arena) {
 	for (unsigned i = 0; i < SC_NBINS; i++) {
 		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
 			bin_prefork(tsdn, &arena->bins[i].bin_shards[j]);
diff --git a/src/ctl.c b/src/ctl.c
index b4e65172..874aaac2 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -95,6 +95,9 @@ CTL_PROTO(opt_hpa_slab_goal)
 CTL_PROTO(opt_hpa_slab_max_alloc)
 CTL_PROTO(opt_hpa_small_max)
 CTL_PROTO(opt_hpa_large_min)
+CTL_PROTO(opt_hpa_sec_max_alloc)
+CTL_PROTO(opt_hpa_sec_max_bytes)
+CTL_PROTO(opt_hpa_sec_nshards)
 CTL_PROTO(opt_metadata_thp)
 CTL_PROTO(opt_retain)
 CTL_PROTO(opt_dss)
@@ -246,6 +249,7 @@ CTL_PROTO(stats_arenas_i_metadata_thp)
 CTL_PROTO(stats_arenas_i_tcache_bytes)
 CTL_PROTO(stats_arenas_i_resident)
 CTL_PROTO(stats_arenas_i_abandoned_vm)
+CTL_PROTO(stats_arenas_i_hpa_sec_bytes)
 INDEX_PROTO(stats_arenas_i)
 CTL_PROTO(stats_allocated)
 CTL_PROTO(stats_active)
@@ -360,6 +364,9 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("hpa_slab_max_alloc"),	CTL(opt_hpa_slab_max_alloc)},
 	{NAME("hpa_small_max"),	CTL(opt_hpa_small_max)},
 	{NAME("hpa_large_min"),	CTL(opt_hpa_large_min)},
+	{NAME("hpa_sec_max_alloc"),	CTL(opt_hpa_sec_max_alloc)},
+	{NAME("hpa_sec_max_bytes"),	CTL(opt_hpa_sec_max_bytes)},
+	{NAME("hpa_sec_nshards"),	CTL(opt_hpa_sec_nshards)},
 	{NAME("metadata_thp"),	CTL(opt_metadata_thp)},
 	{NAME("retain"),	CTL(opt_retain)},
 	{NAME("dss"),		CTL(opt_dss)},
@@ -650,6 +657,7 @@ static const ctl_named_node_t stats_arenas_i_node[] = {
 	{NAME("tcache_bytes"),	CTL(stats_arenas_i_tcache_bytes)},
 	{NAME("resident"),	CTL(stats_arenas_i_resident)},
 	{NAME("abandoned_vm"),	CTL(stats_arenas_i_abandoned_vm)},
+	{NAME("hpa_sec_bytes"),	CTL(stats_arenas_i_hpa_sec_bytes)},
 	{NAME("small"),		CHILD(named, stats_arenas_i_small)},
 	{NAME("large"),		CHILD(named, stats_arenas_i_large)},
 	{NAME("bins"),		CHILD(indexed, stats_arenas_i_bins)},
@@ -889,6 +897,8 @@ ctl_arena_clear(ctl_arena_t *ctl_arena) {
 		    sizeof(pac_estats_t));
 		memset(&ctl_arena->astats->hpastats, 0,
 		    sizeof(hpa_shard_stats_t));
+		memset(&ctl_arena->astats->secstats, 0,
+		    sizeof(sec_stats_t));
 	}
 }
 
@@ -903,7 +913,7 @@ ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena) {
 		    &ctl_arena->pdirty, &ctl_arena->pmuzzy,
 		    &ctl_arena->astats->astats, ctl_arena->astats->bstats,
 		    ctl_arena->astats->lstats, ctl_arena->astats->estats,
-		    &ctl_arena->astats->hpastats);
+		    &ctl_arena->astats->hpastats, &ctl_arena->astats->secstats);
 
 		for (i = 0; i < SC_NBINS; i++) {
 			bin_stats_t *bstats =
@@ -1089,6 +1099,7 @@ MUTEX_PROF_ARENA_MUTEXES
 			    &astats->hpastats.psset_slab_stats[i]);
 		}
 
+		sec_stats_accum(&sdstats->secstats, &astats->secstats);
 	}
 }
 
@@ -1895,6 +1906,9 @@ CTL_RO_NL_GEN(opt_hpa_slab_goal, opt_hpa_slab_goal, size_t)
 CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_slab_max_alloc, size_t)
 CTL_RO_NL_GEN(opt_hpa_small_max, opt_hpa_small_max, size_t)
 CTL_RO_NL_GEN(opt_hpa_large_min, opt_hpa_large_min, size_t)
+CTL_RO_NL_GEN(opt_hpa_sec_max_alloc, opt_hpa_sec_max_alloc, size_t)
+CTL_RO_NL_GEN(opt_hpa_sec_max_bytes, opt_hpa_sec_max_bytes, size_t)
+CTL_RO_NL_GEN(opt_hpa_sec_nshards, opt_hpa_sec_nshards, size_t)
 CTL_RO_NL_GEN(opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp],
     const char *)
 CTL_RO_NL_GEN(opt_retain, opt_retain, bool)
@@ -3114,6 +3128,9 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_abandoned_vm,
     &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.abandoned_vm,
     ATOMIC_RELAXED), size_t)
 
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_sec_bytes,
+    arenas_i(mib[2])->astats->secstats.bytes, size_t)
+
 CTL_RO_CGEN(config_stats, stats_arenas_i_small_allocated,
     arenas_i(mib[2])->astats->allocated_small, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_small_nmalloc,
diff --git a/src/hpa.c b/src/hpa.c
index 08992bda..f49aa2b0 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -411,12 +411,12 @@ hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
 }
 
 void
-hpa_shard_prefork2(tsdn_t *tsdn, hpa_shard_t *shard) {
+hpa_shard_prefork3(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_prefork(tsdn, &shard->grow_mtx);
 }
 
 void
-hpa_shard_prefork3(tsdn_t *tsdn, hpa_shard_t *shard) {
+hpa_shard_prefork4(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_prefork(tsdn, &shard->mtx);
 }
 
@@ -433,7 +433,7 @@ hpa_shard_postfork_child(tsdn_t *tsdn, hpa_shard_t *shard) {
 }
 
 void
-hpa_prefork3(tsdn_t *tsdn, hpa_t *hpa) {
+hpa_prefork4(tsdn_t *tsdn, hpa_t *hpa) {
 	malloc_mutex_prefork(tsdn, &hpa->grow_mtx);
 	malloc_mutex_prefork(tsdn, &hpa->mtx);
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 8ce9ca1e..09b168ca 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -141,6 +141,11 @@ size_t opt_hpa_slab_max_alloc = 256 * 1024;
 size_t opt_hpa_small_max = 32 * 1024;
 size_t opt_hpa_large_min = 4 * 1024 * 1024;
 
+size_t opt_hpa_sec_max_alloc = 32 * 1024;
+/* These settings correspond to a maximum of 1MB cached per arena. */
+size_t opt_hpa_sec_max_bytes = 256 * 1024;
+size_t opt_hpa_sec_nshards = 4;
+
 /*
  * Arenas that are used to service external requests.  Not all elements of the
  * arenas array are necessarily used; arenas are created lazily as needed.
@@ -1494,11 +1499,18 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    true)
 			CONF_HANDLE_SIZE_T(opt_hpa_slab_max_alloc,
 			    "hpa_slab_max_alloc", PAGE, 512 * PAGE,
-			    CONF_CHECK_MIN, CONF_CHECK_MAX, true)
+			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
 			CONF_HANDLE_SIZE_T(opt_hpa_small_max, "hpa_small_max",
-			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true)
+			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
 			CONF_HANDLE_SIZE_T(opt_hpa_large_min, "hpa_large_min",
-			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true)
+			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
+
+			CONF_HANDLE_SIZE_T(opt_hpa_sec_max_alloc, "hpa_sec_max_alloc",
+			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
+			CONF_HANDLE_SIZE_T(opt_hpa_sec_max_bytes, "hpa_sec_max_bytes",
+			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
+			CONF_HANDLE_SIZE_T(opt_hpa_sec_nshards, "hpa_sec_nshards",
+			    0, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
 
 			if (CONF_MATCH("slab_sizes")) {
 				if (CONF_MATCH_VALUE("default")) {
@@ -1808,7 +1820,8 @@ malloc_init_hard_a0_locked() {
 		}
 		if (pa_shard_enable_hpa(&a0->pa_shard, &arena_hpa_global,
 		    opt_hpa_slab_goal, opt_hpa_slab_max_alloc,
-		    opt_hpa_small_max, opt_hpa_large_min)) {
+		    opt_hpa_small_max, opt_hpa_large_min, opt_hpa_sec_nshards,
+		    opt_hpa_sec_max_alloc, opt_hpa_sec_max_bytes)) {
 			return true;
 		}
 	}
@@ -4226,7 +4239,7 @@ _malloc_prefork(void)
 		background_thread_prefork1(tsd_tsdn(tsd));
 	}
 	/* Break arena prefork into stages to preserve lock order. */
-	for (i = 0; i < 8; i++) {
+	for (i = 0; i < 9; i++) {
 		for (j = 0; j < narenas; j++) {
 			if ((arena = arena_get(tsd_tsdn(tsd), j, false)) !=
 			    NULL) {
@@ -4255,12 +4268,15 @@ _malloc_prefork(void)
 				case 7:
 					arena_prefork7(tsd_tsdn(tsd), arena);
 					break;
+				case 8:
+					arena_prefork8(tsd_tsdn(tsd), arena);
+					break;
 				default: not_reached();
 				}
 			}
 		}
-		if (i == 3 && opt_hpa) {
-			hpa_prefork3(tsd_tsdn(tsd), &arena_hpa_global);
+		if (i == 4 && opt_hpa) {
+			hpa_prefork4(tsd_tsdn(tsd), &arena_hpa_global);
 		}
 
 	}
diff --git a/src/pa.c b/src/pa.c
index 8e1ec842..825b10ab 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -49,7 +49,8 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 
 bool
 pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa, size_t ps_goal,
-    size_t ps_alloc_max, size_t small_max, size_t large_min) {
+    size_t ps_alloc_max, size_t small_max, size_t large_min,
+    size_t sec_nshards, size_t sec_alloc_max, size_t sec_bytes_max) {
 	ps_goal &= ~PAGE_MASK;
 	ps_alloc_max &= ~PAGE_MASK;
 
@@ -60,6 +61,10 @@ pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa, size_t ps_goal,
 	    shard->ind, ps_goal, ps_alloc_max, small_max, large_min)) {
 		return true;
 	}
+	if (sec_init(&shard->hpa_sec, &shard->hpa_shard.pai, sec_nshards,
+	    sec_alloc_max, sec_bytes_max)) {
+		return true;
+	}
 	shard->ever_used_hpa = true;
 	atomic_store_b(&shard->use_hpa, true, ATOMIC_RELAXED);
 
@@ -67,24 +72,27 @@ pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa, size_t ps_goal,
 }
 
 void
-pa_shard_disable_hpa(pa_shard_t *shard) {
+pa_shard_disable_hpa(tsdn_t *tsdn, pa_shard_t *shard) {
 	atomic_store_b(&shard->use_hpa, false, ATOMIC_RELAXED);
+	sec_disable(tsdn, &shard->hpa_sec);
 }
 
 void
-pa_shard_reset(pa_shard_t *shard) {
+pa_shard_reset(tsdn_t *tsdn, pa_shard_t *shard) {
 	atomic_store_zu(&shard->nactive, 0, ATOMIC_RELAXED);
+	sec_flush(tsdn, &shard->hpa_sec);
 }
 
 void
 pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard) {
+	sec_flush(tsdn, &shard->hpa_sec);
 	pac_destroy(tsdn, &shard->pac);
 }
 
 static pai_t *
 pa_get_pai(pa_shard_t *shard, edata_t *edata) {
 	return (edata_pai_get(edata) == EXTENT_PAI_PAC
-	    ? &shard->pac.pai : &shard->hpa_shard.pai);
+	    ? &shard->pac.pai : &shard->hpa_sec.pai);
 }
 
 edata_t *
@@ -95,7 +103,7 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 
 	edata_t *edata = NULL;
 	if (atomic_load_b(&shard->use_hpa, ATOMIC_RELAXED)) {
-		edata = pai_alloc(tsdn, &shard->hpa_shard.pai, size, alignment,
+		edata = pai_alloc(tsdn, &shard->hpa_sec.pai, size, alignment,
 		    zero);
 	}
 	/*
@@ -173,6 +181,7 @@ pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
 		emap_deregister_interior(tsdn, shard->emap, edata);
 		edata_slab_set(edata, false);
 	}
+	edata_addr_set(edata, edata_base_get(edata));
 	edata_szind_set(edata, SC_NSIZES);
 	pa_nactive_sub(shard, edata_size_get(edata) >> LG_PAGE);
 	pai_t *pai = pa_get_pai(shard, edata);
diff --git a/src/pa_extra.c b/src/pa_extra.c
index db236ad8..24cb6537 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -16,17 +16,14 @@ pa_shard_prefork0(tsdn_t *tsdn, pa_shard_t *shard) {
 
 void
 pa_shard_prefork2(tsdn_t *tsdn, pa_shard_t *shard) {
-	malloc_mutex_prefork(tsdn, &shard->pac.grow_mtx);
 	if (shard->ever_used_hpa) {
-		hpa_shard_prefork2(tsdn, &shard->hpa_shard);
+		sec_prefork2(tsdn, &shard->hpa_sec);
 	}
 }
 
 void
 pa_shard_prefork3(tsdn_t *tsdn, pa_shard_t *shard) {
-	ecache_prefork(tsdn, &shard->pac.ecache_dirty);
-	ecache_prefork(tsdn, &shard->pac.ecache_muzzy);
-	ecache_prefork(tsdn, &shard->pac.ecache_retained);
+	malloc_mutex_prefork(tsdn, &shard->pac.grow_mtx);
 	if (shard->ever_used_hpa) {
 		hpa_shard_prefork3(tsdn, &shard->hpa_shard);
 	}
@@ -34,6 +31,16 @@ pa_shard_prefork3(tsdn_t *tsdn, pa_shard_t *shard) {
 
 void
 pa_shard_prefork4(tsdn_t *tsdn, pa_shard_t *shard) {
+	ecache_prefork(tsdn, &shard->pac.ecache_dirty);
+	ecache_prefork(tsdn, &shard->pac.ecache_muzzy);
+	ecache_prefork(tsdn, &shard->pac.ecache_retained);
+	if (shard->ever_used_hpa) {
+		hpa_shard_prefork4(tsdn, &shard->hpa_shard);
+	}
+}
+
+void
+pa_shard_prefork5(tsdn_t *tsdn, pa_shard_t *shard) {
 	edata_cache_prefork(tsdn, &shard->edata_cache);
 }
 
@@ -47,6 +54,7 @@ pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard) {
 	malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_dirty.mtx);
 	malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_muzzy.mtx);
 	if (shard->ever_used_hpa) {
+		sec_postfork_parent(tsdn, &shard->hpa_sec);
 		hpa_shard_postfork_parent(tsdn, &shard->hpa_shard);
 	}
 }
@@ -61,6 +69,7 @@ pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard) {
 	malloc_mutex_postfork_child(tsdn, &shard->pac.decay_dirty.mtx);
 	malloc_mutex_postfork_child(tsdn, &shard->pac.decay_muzzy.mtx);
 	if (shard->ever_used_hpa) {
+		sec_postfork_child(tsdn, &shard->hpa_sec);
 		hpa_shard_postfork_child(tsdn, &shard->hpa_shard);
 	}
 }
@@ -76,7 +85,8 @@ pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive, size_t *ndirty,
 void
 pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
     pa_shard_stats_t *pa_shard_stats_out, pac_estats_t *estats_out,
-    hpa_shard_stats_t *hpa_stats_out, size_t *resident) {
+    hpa_shard_stats_t *hpa_stats_out, sec_stats_t *sec_stats_out,
+    size_t *resident) {
 	cassert(config_stats);
 
 	pa_shard_stats_out->pac_stats.retained +=
@@ -149,6 +159,7 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
 			    &shard->hpa_shard.psset.slab_stats[i]);
 		}
 		malloc_mutex_unlock(tsdn, &shard->hpa_shard.mtx);
+		sec_stats_merge(tsdn, &shard->hpa_sec, sec_stats_out);
 	}
 }
 
@@ -182,5 +193,7 @@ pa_shard_mtx_stats_read(tsdn_t *tsdn, pa_shard_t *shard,
 		pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
 		    &shard->hpa_shard.grow_mtx,
 		    arena_prof_mutex_hpa_shard_grow);
+		sec_mutex_stats_read(tsdn, &shard->hpa_sec,
+		    &mutex_prof_data[arena_prof_mutex_hpa_sec]);
 	}
 }
diff --git a/src/stats.c b/src/stats.c
index f03e5e44..4b40721a 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -678,6 +678,11 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i) {
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ninactive",
 	    i, &ninactive, size_t);
 
+	size_t sec_bytes;
+	CTL_M2_GET("stats.arenas.0.hpa_sec_bytes", i, &sec_bytes, size_t);
+	emitter_kv(emitter, "sec_bytes", "Bytes in small extent cache",
+	    emitter_type_size, &sec_bytes);
+
 	emitter_table_printf(emitter,
 	    "HPA shard stats:\n"
 	    "  In full slabs:\n"
@@ -1194,6 +1199,9 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_SIZE_T("hpa_slab_max_alloc")
 	OPT_WRITE_SIZE_T("hpa_small_max")
 	OPT_WRITE_SIZE_T("hpa_large_min")
+	OPT_WRITE_SIZE_T("hpa_sec_max_alloc")
+	OPT_WRITE_SIZE_T("hpa_sec_max_bytes")
+	OPT_WRITE_SIZE_T("hpa_sec_nshards")
 	OPT_WRITE_CHAR_P("metadata_thp")
 	OPT_WRITE_BOOL_MUTABLE("background_thread", "background_thread")
 	OPT_WRITE_SSIZE_T_MUTABLE("dirty_decay_ms", "arenas.dirty_decay_ms")
diff --git a/src/tcache.c b/src/tcache.c
index 6bf1d309..edbedf79 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -716,9 +716,11 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
 	if (arena_nthreads_get(arena, false) == 0 &&
 	    !background_thread_enabled()) {
 		/* Force purging when no threads assigned to the arena anymore. */
-		arena_decay(tsd_tsdn(tsd), arena, false, true);
+		arena_decay(tsd_tsdn(tsd), arena,
+		    /* is_background_thread */ false, /* all */ true);
 	} else {
-		arena_decay(tsd_tsdn(tsd), arena, false, false);
+		arena_decay(tsd_tsdn(tsd), arena,
+		    /* is_background_thread */ false, /* all */ false);
 	}
 }
 
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index ecbcda9e..278bd09d 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -168,6 +168,9 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(size_t, hpa_slab_max_alloc, always);
 	TEST_MALLCTL_OPT(size_t, hpa_small_max, always);
 	TEST_MALLCTL_OPT(size_t, hpa_large_min, always);
+	TEST_MALLCTL_OPT(size_t, hpa_sec_max_alloc, always);
+	TEST_MALLCTL_OPT(size_t, hpa_sec_max_bytes, always);
+	TEST_MALLCTL_OPT(size_t, hpa_sec_nshards, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);
 	TEST_MALLCTL_OPT(const char *, percpu_arena, always);
 	TEST_MALLCTL_OPT(size_t, oversize_threshold, always);

From 634ec6f50abd57e6371e0c745ab699f2cf6d08e6 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 18 Sep 2020 15:50:27 -0700
Subject: [PATCH 1896/2608] Edata: add an "age" field.

---
 include/jemalloc/internal/edata.h | 44 ++++++++++++++++++++++++-------
 src/edata.c                       |  1 +
 2 files changed, 36 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index f175af94..632c6c32 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -71,6 +71,7 @@ struct edata_map_info_s {
 typedef struct edata_s edata_t;
 typedef ph(edata_t) edata_tree_t;
 typedef ph(edata_t) edata_heap_t;
+typedef ph(edata_t) edata_age_heap_t;
 struct edata_s {
 	/*
 	 * Bitfield containing several fields:
@@ -193,16 +194,11 @@ struct edata_s {
 	};
 
 	/*
-	 * Reserved for hugepages -- once that allocator is more settled, we
-	 * might be able to claw some of this back.  Until then, don't get any
-	 * funny ideas about using the space we just freed up to keep some other
-	 * bit of metadata around.  That kind of thinking can be hazardous to
-	 * your health.
-	 *
-	 * This keeps the size of an edata_t at exactly 128 bytes on
-	 * architectures with 8-byte pointers and 4k pages.
+	 * In some context-specific sense, the age of an active extent.  Each
+	 * context can pick a specific meaning, and share the definition of the
+	 * edata_age_heap_t below.
 	 */
-	void *reserved1;
+	uint64_t age;
 	union {
 		/*
 		 * We could steal a low bit from these fields to indicate what
@@ -374,6 +370,11 @@ edata_bsize_get(const edata_t *edata) {
 	return edata->e_bsize;
 }
 
+static inline uint64_t
+edata_age_get(const edata_t *edata) {
+	return edata->age;
+}
+
 static inline edata_t *
 edata_ps_get(const edata_t *edata) {
 	assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
@@ -468,6 +469,11 @@ edata_bsize_set(edata_t *edata, size_t bsize) {
 	edata->e_bsize = bsize;
 }
 
+static inline void
+edata_age_set(edata_t *edata, uint64_t age) {
+	edata->age = age;
+}
+
 static inline void
 edata_ps_set(edata_t *edata, edata_t *ps) {
 	assert(edata_pai_get(edata) == EXTENT_PAI_HPA || ps == NULL);
@@ -615,6 +621,7 @@ edata_init(edata_t *edata, unsigned arena_ind, void *addr, size_t size,
 	if (config_prof) {
 		edata_prof_tctx_set(edata, NULL);
 	}
+	edata_age_set(edata, 0);
 	edata_ps_set(edata, NULL);
 	edata_longest_free_range_set(edata, 0);
 }
@@ -630,6 +637,7 @@ edata_binit(edata_t *edata, void *addr, size_t bsize, size_t sn) {
 	edata_state_set(edata, extent_state_active);
 	edata_zeroed_set(edata, true);
 	edata_committed_set(edata, true);
+	edata_age_set(edata, 0);
 	/*
 	 * This isn't strictly true, but base allocated extents never get
 	 * deallocated and can't be looked up in the emap, but no sense in
@@ -698,7 +706,25 @@ edata_esnead_comp(const edata_t *a, const edata_t *b) {
 	return ret;
 }
 
+static inline int
+edata_age_comp(const edata_t *a, const edata_t *b) {
+	uint64_t a_age = edata_age_get(a);
+	uint64_t b_age = edata_age_get(b);
+
+	/*
+	 * Equal ages are possible in certain race conditions, like two distinct
+	 * threads simultaneously allocating a new fresh slab without holding a
+	 * bin lock.
+	 */
+	int ret = (a_age > b_age) - (a_age < b_age);
+	if (ret != 0) {
+		return ret;
+	}
+	return edata_snad_comp(a, b);
+}
+
 ph_proto(, edata_avail_, edata_tree_t, edata_t)
 ph_proto(, edata_heap_, edata_heap_t, edata_t)
+ph_proto(, edata_age_heap_, edata_age_heap_t, edata_t);
 
 #endif /* JEMALLOC_INTERNAL_EDATA_H */
diff --git a/src/edata.c b/src/edata.c
index 5e53e998..214e993e 100644
--- a/src/edata.c
+++ b/src/edata.c
@@ -4,3 +4,4 @@
 ph_gen(, edata_avail_, edata_tree_t, edata_t, ph_link,
     edata_esnead_comp)
 ph_gen(, edata_heap_, edata_heap_t, edata_t, ph_link, edata_snad_comp)
+ph_gen(, edata_age_heap_, edata_age_heap_t, edata_t, ph_link, edata_age_comp)

From d16849c91da35c37359331195c6213421a17976a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 18 Sep 2020 16:36:40 -0700
Subject: [PATCH 1897/2608] psset: Do first-fit based on slab age.

This functions more like the serial number strategy of the ecache and
hpa_central_t.  Longer-lived slabs are more likely to continue to live for
longer in the future.
---
 include/jemalloc/internal/psset.h |  5 +-
 src/psset.c                       | 47 ++++++++----------
 test/unit/psset.c                 | 80 +++++++++++++++++++++++++++++--
 3 files changed, 100 insertions(+), 32 deletions(-)

diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index 7bba3cbc..14311239 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -44,7 +44,7 @@ struct psset_s {
 	 * The pageslabs, quantized by the size class of the largest contiguous
 	 * free run of pages in a pageslab.
 	 */
-	edata_heap_t pageslabs[PSSET_NPSIZES];
+	edata_age_heap_t pageslabs[PSSET_NPSIZES];
 	bitmap_t bitmap[BITMAP_GROUPS(PSSET_NPSIZES)];
 	/*
 	 * Full slabs don't live in any edata heap.  But we still track their
@@ -52,6 +52,9 @@ struct psset_s {
 	 */
 	psset_bin_stats_t full_slab_stats;
 	psset_bin_stats_t slab_stats[PSSET_NPSIZES];
+
+	/* How many alloc_new calls have happened? */
+	uint64_t age_counter;
 };
 
 void psset_init(psset_t *psset);
diff --git a/src/psset.c b/src/psset.c
index 04d3548f..9fc7ec14 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -11,7 +11,7 @@ static const bitmap_info_t psset_bitmap_info =
 void
 psset_init(psset_t *psset) {
 	for (unsigned i = 0; i < PSSET_NPSIZES; i++) {
-		edata_heap_new(&psset->pageslabs[i]);
+		edata_age_heap_new(&psset->pageslabs[i]);
 	}
 	bitmap_init(psset->bitmap, &psset_bitmap_info, /* fill */ true);
 	psset->full_slab_stats.npageslabs = 0;
@@ -22,6 +22,7 @@ psset_init(psset_t *psset) {
 		psset->slab_stats[i].nactive = 0;
 		psset->slab_stats[i].ninactive = 0;
 	}
+	psset->age_counter = 0;
 }
 
 /*
@@ -48,13 +49,13 @@ psset_bin_stats_adjust(psset_bin_stats_t *binstats, edata_t *ps, bool inc) {
 
 static void
 psset_edata_heap_remove(psset_t *psset, pszind_t pind, edata_t *ps) {
-	edata_heap_remove(&psset->pageslabs[pind], ps);
+	edata_age_heap_remove(&psset->pageslabs[pind], ps);
 	psset_bin_stats_adjust(&psset->slab_stats[pind], ps, /* inc */ false);
 }
 
 static void
 psset_edata_heap_insert(psset_t *psset, pszind_t pind, edata_t *ps) {
-	edata_heap_insert(&psset->pageslabs[pind], ps);
+	edata_age_heap_insert(&psset->pageslabs[pind], ps);
 	psset_bin_stats_adjust(&psset->slab_stats[pind], ps, /* inc */ true);
 }
 
@@ -70,32 +71,24 @@ psset_assert_ps_consistent(edata_t *ps) {
  */
 static edata_t *
 psset_recycle_extract(psset_t *psset, size_t size) {
-	pszind_t ret_ind;
-	edata_t *ret = NULL;
-	pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(size));
-	for (pszind_t i = (pszind_t)bitmap_ffu(psset->bitmap,
-	    &psset_bitmap_info, (size_t)pind);
-	    i < PSSET_NPSIZES;
-	    i = (pszind_t)bitmap_ffu(psset->bitmap, &psset_bitmap_info,
-		(size_t)i + 1)) {
-		assert(!edata_heap_empty(&psset->pageslabs[i]));
-		edata_t *ps = edata_heap_first(&psset->pageslabs[i]);
-		if (ret == NULL || edata_snad_comp(ps, ret) < 0) {
-			ret = ps;
-			ret_ind = i;
-		}
+	pszind_t min_pind = sz_psz2ind(sz_psz_quantize_ceil(size));
+	pszind_t pind = (pszind_t)bitmap_ffu(psset->bitmap, &psset_bitmap_info,
+	    (size_t)min_pind);
+	if (pind == PSSET_NPSIZES) {
+		return NULL;
 	}
-	if (ret == NULL) {
+	edata_t *ps = edata_age_heap_first(&psset->pageslabs[pind]);
+	if (ps == NULL) {
 		return NULL;
 	}
 
-	psset_edata_heap_remove(psset, ret_ind, ret);
-	if (edata_heap_empty(&psset->pageslabs[ret_ind])) {
-		bitmap_set(psset->bitmap, &psset_bitmap_info, ret_ind);
+	psset_edata_heap_remove(psset, pind, ps);
+	if (edata_age_heap_empty(&psset->pageslabs[pind])) {
+		bitmap_set(psset->bitmap, &psset_bitmap_info, pind);
 	}
 
-	psset_assert_ps_consistent(ret);
-	return ret;
+	psset_assert_ps_consistent(ps);
+	return ps;
 }
 
 static void
@@ -107,7 +100,7 @@ psset_insert(psset_t *psset, edata_t *ps, size_t largest_range) {
 
 	assert(pind < PSSET_NPSIZES);
 
-	if (edata_heap_empty(&psset->pageslabs[pind])) {
+	if (edata_age_heap_empty(&psset->pageslabs[pind])) {
 		bitmap_unset(psset->bitmap, &psset_bitmap_info, (size_t)pind);
 	}
 	psset_edata_heap_insert(psset, pind, ps);
@@ -215,6 +208,8 @@ psset_alloc_new(psset_t *psset, edata_t *ps, edata_t *r_edata, size_t size) {
 	assert(fb_empty(ps_fb, ps_npages));
 	assert(ps_npages >= (size >> LG_PAGE));
 	edata_nfree_set(ps, (uint32_t)ps_npages);
+	edata_age_set(ps, psset->age_counter);
+	psset->age_counter++;
 	psset_ps_alloc_insert(psset, ps, r_edata, size);
 }
 
@@ -287,7 +282,7 @@ psset_dalloc(psset_t *psset, edata_t *edata) {
 	 */
 	if (ps_old_longest_free_range > 0) {
 		psset_edata_heap_remove(psset, old_pind, ps);
-		if (edata_heap_empty(&psset->pageslabs[old_pind])) {
+		if (edata_age_heap_empty(&psset->pageslabs[old_pind])) {
 			bitmap_set(psset->bitmap, &psset_bitmap_info,
 			    (size_t)old_pind);
 		}
@@ -299,7 +294,7 @@ psset_dalloc(psset_t *psset, edata_t *edata) {
 	/* Otherwise, it gets reinserted. */
 	pszind_t new_pind = sz_psz2ind(sz_psz_quantize_floor(
 	    new_range_len << LG_PAGE));
-	if (edata_heap_empty(&psset->pageslabs[new_pind])) {
+	if (edata_age_heap_empty(&psset->pageslabs[new_pind])) {
 		bitmap_unset(psset->bitmap, &psset_bitmap_info,
 		    (size_t)new_pind);
 	}
diff --git a/test/unit/psset.c b/test/unit/psset.c
index 0bc4460f..861903d6 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -266,8 +266,7 @@ TEST_BEGIN(test_multi_pageslab) {
 
 	/*
 	 * Free up a 2-page hole in the earlier slab, and a 1-page one in the
-	 * later one.  We should still pick the earlier slab for a 1-page
-	 * allocation.
+	 * later one.  We should still pick the later one.
 	 */
 	ps = psset_dalloc(&psset, &alloc[0][0]);
 	expect_ptr_null(ps, "Unexpected eviction");
@@ -276,8 +275,8 @@ TEST_BEGIN(test_multi_pageslab) {
 	ps = psset_dalloc(&psset, &alloc[1][0]);
 	expect_ptr_null(ps, "Unexpected eviction");
 	err = psset_alloc_reuse(&psset, &alloc[0][0], PAGE);
-	expect_ptr_eq(&pageslab[0], edata_ps_get(&alloc[0][0]),
-	    "Should have picked first pageslab");
+	expect_ptr_eq(&pageslab[1], edata_ps_get(&alloc[0][0]),
+	    "Should have picked the fuller pageslab");
 
 	/*
 	 * Now both slabs have 1-page holes. Free up a second one in the later
@@ -370,6 +369,76 @@ TEST_BEGIN(test_stats) {
 }
 TEST_END
 
+TEST_BEGIN(test_oldest_fit) {
+	bool err;
+	edata_t alloc[PAGESLAB_PAGES];
+	edata_t worse_alloc[PAGESLAB_PAGES];
+
+	edata_t pageslab;
+	memset(&pageslab, 0, sizeof(pageslab));
+	edata_init(&pageslab, /* arena_ind */ 0, (void *)(10 * PAGESLAB_SIZE),
+	    PAGESLAB_SIZE, /* slab */ true, SC_NSIZES, PAGESLAB_SN + 1,
+	    extent_state_active, /* zeroed */ false, /* comitted */ true,
+	    EXTENT_PAI_HPA, EXTENT_IS_HEAD);
+
+	/*
+	 * This pageslab is better from an edata_comp_snad POV, but will be
+	 * added to the set after the previous one, and so should be less
+	 * preferred for allocations.
+	 */
+	edata_t worse_pageslab;
+	memset(&worse_pageslab, 0, sizeof(pageslab));
+	edata_init(&worse_pageslab, /* arena_ind */ 0,
+	    (void *)(9 * PAGESLAB_SIZE), PAGESLAB_SIZE, /* slab */ true,
+	    SC_NSIZES, PAGESLAB_SN - 1, extent_state_active, /* zeroed */ false,
+	    /* comitted */ true, EXTENT_PAI_HPA, EXTENT_IS_HEAD);
+
+	psset_t psset;
+	psset_init(&psset);
+
+	edata_init_test(&alloc[0]);
+	psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
+	for (size_t i = 1; i < PAGESLAB_PAGES; i++) {
+		edata_init_test(&alloc[i]);
+		err = psset_alloc_reuse(&psset, &alloc[i], PAGE);
+		expect_false(err, "Nonempty psset failed page allocation.");
+		expect_ptr_eq(&pageslab, edata_ps_get(&alloc[i]),
+		    "Allocated from the wrong pageslab");
+	}
+
+	edata_init_test(&worse_alloc[0]);
+	psset_alloc_new(&psset, &worse_pageslab, &worse_alloc[0], PAGE);
+	expect_ptr_eq(&worse_pageslab, edata_ps_get(&worse_alloc[0]),
+	    "Allocated from the wrong pageslab");
+	/*
+	 * Make the two pssets otherwise indistinguishable; all full except for
+	 * a single page.
+	 */
+	for (size_t i = 1; i < PAGESLAB_PAGES - 1; i++) {
+		edata_init_test(&worse_alloc[i]);
+		err = psset_alloc_reuse(&psset, &alloc[i], PAGE);
+		expect_false(err, "Nonempty psset failed page allocation.");
+		expect_ptr_eq(&worse_pageslab, edata_ps_get(&alloc[i]),
+		    "Allocated from the wrong pageslab");
+	}
+
+	/* Deallocate the last page from the older pageslab. */
+	edata_t *evicted = psset_dalloc(&psset, &alloc[PAGESLAB_PAGES - 1]);
+	expect_ptr_null(evicted, "Unexpected eviction");
+
+	/*
+	 * This edata is the whole purpose for the test; it should come from the
+	 * older pageslab.
+	 */
+	edata_t test_edata;
+	edata_init_test(&test_edata);
+	err = psset_alloc_reuse(&psset, &test_edata, PAGE);
+	expect_false(err, "Nonempty psset failed page allocation");
+	expect_ptr_eq(&pageslab, edata_ps_get(&test_edata),
+	    "Allocated from the wrong pageslab");
+}
+TEST_END
+
 int
 main(void) {
 	return test_no_reentrancy(
@@ -378,5 +447,6 @@ main(void) {
 	    test_reuse,
 	    test_evict,
 	    test_multi_pageslab,
-	    test_stats);
+	    test_stats,
+	    test_oldest_fit);
 }

From ea32060f9ca5e14077cda7fa2401a1f91f55ad82 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 22 Oct 2020 14:13:09 -0700
Subject: [PATCH 1898/2608] SEC: Implement thread affinity.

For now, just have every thread pick a shard once and stick with it.
---
 include/jemalloc/internal/tsd.h |  2 ++
 src/sec.c                       | 22 ++++++++++++++--------
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 9408b2ca..5ac85e14 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -77,6 +77,7 @@ typedef ql_elm(tsd_t) tsd_link_t;
     O(iarena,			arena_t *,		arena_t *)	\
     O(arena,			arena_t *,		arena_t *)	\
     O(arenas_tdata,		arena_tdata_t *,	arena_tdata_t *)\
+    O(sec_shard,		uint8_t,		uint8_t)	\
     O(binshards,		tsd_binshards_t,	tsd_binshards_t)\
     O(tsd_link,			tsd_link_t,		tsd_link_t)	\
     O(in_hook,			bool,			bool)		\
@@ -106,6 +107,7 @@ typedef ql_elm(tsd_t) tsd_link_t;
     /* iarena */		NULL,					\
     /* arena */			NULL,					\
     /* arenas_tdata */		NULL,					\
+    /* sec_shard */		(uint8_t)-1,				\
     /* binshards */		TSD_BINSHARDS_ZERO_INITIALIZER,		\
     /* tsd_link */		{NULL},					\
     /* in_hook */		false,					\
diff --git a/src/sec.c b/src/sec.c
index f3c906bc..262d813d 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -61,14 +61,20 @@ sec_shard_pick(tsdn_t *tsdn, sec_t *sec) {
 		return &sec->shards[0];
 	}
 	tsd_t *tsd = tsdn_tsd(tsdn);
-	/*
-	 * Use the trick from Daniel Lemire's "A fast alternative to the modulo
-	 * reduction.  Use a 64 bit number to store 32 bits, since we'll
-	 * deliberately overflow when we multiply by the number of shards.
-	 */
-	uint64_t rand32 = prng_lg_range_u64(tsd_prng_statep_get(tsd), 32);
-	uint32_t idx = (uint32_t)((rand32 * (uint64_t)sec->nshards) >> 32);
-	return &sec->shards[idx];
+	uint8_t *idxp = tsd_sec_shardp_get(tsd);
+	if (*idxp == (uint8_t)-1) {
+		/*
+		 * First use; initialize using the trick from Daniel Lemire's
+		 * "A fast alternative to the modulo reduction.  Use a 64 bit
+		 * number to store 32 bits, since we'll deliberately overflow
+		 * when we multiply by the number of shards.
+		 */
+		uint64_t rand32 = prng_lg_range_u64(tsd_prng_statep_get(tsd), 32);
+		uint32_t idx = (uint32_t)((rand32 * (uint64_t)sec->nshards) >> 32);
+		assert(idx < (uint32_t)sec->nshards);
+		*idxp = (uint8_t)idx;
+	}
+	return &sec->shards[*idxp];
 }
 
 static edata_t *

From bf72188f80c59328b20441c79861f9373c22bccd Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 21 Oct 2020 19:47:57 -0700
Subject: [PATCH 1899/2608] Allow opt.tcache_max to accept small size classes.

Previously all the small size classes were cached.  However this has downsides
-- particularly when page size is greater than 4K (e.g. iOS), which will result
in much higher SMALL_MAXCLASS.

This change allows tcache_max to be set to lower values, to better control
resources taken by tcache.
---
 Makefile.in                                |   1 +
 doc/jemalloc.xml.in                        |   2 +-
 include/jemalloc/internal/cache_bin.h      |  11 ++
 include/jemalloc/internal/tcache_inlines.h |  31 +++-
 src/cache_bin.c                            |  19 +--
 src/tcache.c                               |  52 +++++--
 test/unit/arena_decay.c                    |   7 +-
 test/unit/arena_decay.sh                   |   2 +-
 test/unit/tcache_max.c                     | 170 +++++++++++++++++++++
 test/unit/tcache_max.sh                    |   3 +
 10 files changed, 265 insertions(+), 33 deletions(-)
 create mode 100644 test/unit/tcache_max.c
 create mode 100644 test/unit/tcache_max.sh

diff --git a/Makefile.in b/Makefile.in
index 0136a40e..34df2398 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -264,6 +264,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/spin.c \
 	$(srcroot)test/unit/stats.c \
 	$(srcroot)test/unit/stats_print.c \
+	$(srcroot)test/unit/tcache_max.c \
 	$(srcroot)test/unit/test_hooks.c \
 	$(srcroot)test/unit/thread_event.c \
 	$(srcroot)test/unit/ticker.c \
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index e5f2aa67..e24c191c 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1313,7 +1313,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
           <literal>r-</literal>
         </term>
         <listitem><para>Maximum size class to cache in the thread-specific cache
-        (tcache).  At a minimum, all small size classes are cached; and at a
+        (tcache).  At a minimum, the first size class is cached; and at a
         maximum, size classes up to 8 MiB can be cached.  The default maximum is
         32 KiB (2^15).  As a convenience, this may also be set by specifying
         lg_tcache_max, which will be taken to be the base-2 logarithm of the
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 0767862c..64275f24 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -20,6 +20,17 @@
  */
 typedef uint16_t cache_bin_sz_t;
 
+/*
+ * Leave a noticeable mark pattern on the cache bin stack boundaries, in case a
+ * bug starts leaking those.  Make it look like the junk pattern but be distinct
+ * from it.
+ */
+static const uintptr_t cache_bin_preceding_junk =
+    (uintptr_t)0x7a7a7a7a7a7a7a7aULL;
+/* Note: a7 vs. 7a above -- this tells you which pointer leaked. */
+static const uintptr_t cache_bin_trailing_junk =
+    (uintptr_t)0xa7a7a7a7a7a7a7a7ULL;
+
 /*
  * That implies the following value, for the maximum number of items in any
  * individual bin.  The cache bins track their bounds looking just at the low
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 1cba9186..926c852d 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -26,6 +26,20 @@ tcache_enabled_set(tsd_t *tsd, bool enabled) {
 	tsd_slow_update(tsd);
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+tcache_small_bin_disabled(szind_t ind, cache_bin_t *bin) {
+	assert(ind < SC_NBINS);
+	bool ret = (cache_bin_info_ncached_max(&tcache_bin_info[ind]) == 0);
+	if (ret && bin != NULL) {
+		/* small size class but cache bin disabled. */
+		assert(ind >= nhbins);
+		assert((uintptr_t)(*bin->stack_head) ==
+		    cache_bin_preceding_junk);
+	}
+
+	return ret;
+}
+
 JEMALLOC_ALWAYS_INLINE void *
 tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
     size_t size, szind_t binind, bool zero, bool slow_path) {
@@ -42,6 +56,11 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
 		if (unlikely(arena == NULL)) {
 			return NULL;
 		}
+		if (unlikely(tcache_small_bin_disabled(binind, bin))) {
+			/* stats and zero are handled directly by the arena. */
+			return arena_malloc_hard(tsd_tsdn(tsd), arena, size,
+			    binind, zero);
+		}
 
 		ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache,
 		    bin, binind, &tcache_hard_success);
@@ -104,13 +123,17 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 JEMALLOC_ALWAYS_INLINE void
 tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
     bool slow_path) {
-	assert(tcache_salloc(tsd_tsdn(tsd), ptr)
-	    <= SC_SMALL_MAXCLASS);
+	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SC_SMALL_MAXCLASS);
 
 	cache_bin_t *bin = &tcache->bins[binind];
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
-		unsigned remain = cache_bin_info_ncached_max(
-		    &tcache_bin_info[binind]) >> opt_lg_tcache_flush_small_div;
+		if (unlikely(tcache_small_bin_disabled(binind, bin))) {
+			arena_dalloc_small(tsd_tsdn(tsd), ptr);
+			return;
+		}
+		cache_bin_sz_t max = cache_bin_info_ncached_max(
+		    &tcache_bin_info[binind]);
+		unsigned remain = max >> opt_lg_tcache_flush_small_div;
 		tcache_bin_flush_small(tsd, tcache, bin, binind, remain);
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
 		assert(ret);
diff --git a/src/cache_bin.c b/src/cache_bin.c
index 1e26c4ef..1d04b0dd 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -24,6 +24,7 @@ cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos,
 	 */
 	*size = sizeof(void *) * 2;
 	for (szind_t i = 0; i < ninfos; i++) {
+		assert(infos[i].ncached_max > 0);
 		*size += infos[i].ncached_max * sizeof(void *);
 	}
 
@@ -46,26 +47,20 @@ cache_bin_preincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
 		    &computed_alignment);
 		assert(((uintptr_t)alloc & (computed_alignment - 1)) == 0);
 	}
-	/*
-	 * Leave a noticeable mark pattern on the boundaries, in case a bug
-	 * starts leaking those.  Make it look like the junk pattern but be
-	 * distinct from it.
-	 */
-	uintptr_t preceding_ptr_junk = (uintptr_t)0x7a7a7a7a7a7a7a7aULL;
-	*(uintptr_t *)((uintptr_t)alloc + *cur_offset) = preceding_ptr_junk;
+
+	*(uintptr_t *)((uintptr_t)alloc + *cur_offset) =
+	    cache_bin_preceding_junk;
 	*cur_offset += sizeof(void *);
 }
 
 void
 cache_bin_postincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
     size_t *cur_offset) {
-	/* Note: a7 vs. 7a above -- this tells you which pointer leaked. */
-	uintptr_t trailing_ptr_junk = (uintptr_t)0xa7a7a7a7a7a7a7a7ULL;
-	*(uintptr_t *)((uintptr_t)alloc + *cur_offset) = trailing_ptr_junk;
+	*(uintptr_t *)((uintptr_t)alloc + *cur_offset) =
+	    cache_bin_trailing_junk;
 	*cur_offset += sizeof(void *);
 }
 
-
 void
 cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
     size_t *cur_offset) {
@@ -90,6 +85,8 @@ cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
 	    (uint16_t)(uintptr_t) bin->stack_head) == bin_stack_size);
 	assert(cache_bin_ncached_get(bin, info) == 0);
 	assert(cache_bin_empty_position_get(bin, info) == empty_position);
+
+	assert(bin_stack_size > 0 || empty_position == full_position);
 }
 
 bool
diff --git a/src/tcache.c b/src/tcache.c
index edbedf79..41a1b828 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -62,7 +62,9 @@ cache_bin_info_t	*tcache_bin_info;
 static size_t tcache_bin_alloc_size;
 static size_t tcache_bin_alloc_alignment;
 
+/* Number of cache bins enabled, including both large and small. */
 unsigned		nhbins;
+/* Max size class to be cached (can be small or large). */
 size_t			tcache_maxclass;
 
 tcaches_t		*tcaches;
@@ -567,7 +569,14 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	tcache_slow->arena = NULL;
 	tcache_slow->dyn_alloc = mem;
 
-	memset(tcache->bins, 0, sizeof(cache_bin_t) * nhbins);
+	/*
+	 * We reserve cache bins for all small size classes, even if some may
+	 * not get used (i.e. bins higher than nhbins).  This allows the fast
+	 * and common paths to access cache bin metadata safely w/o worrying
+	 * about which ones are disabled.
+	 */
+	unsigned n_reserved_bins = nhbins < SC_NBINS ? SC_NBINS : nhbins;
+	memset(tcache->bins, 0, sizeof(cache_bin_t) * n_reserved_bins);
 
 	size_t cur_offset = 0;
 	cache_bin_preincrement(tcache_bin_info, nhbins, mem,
@@ -576,19 +585,34 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 		if (i < SC_NBINS) {
 			tcache_slow->lg_fill_div[i] = 1;
 			tcache_slow->bin_refilled[i] = false;
+			tcache_slow->bin_flush_delay_items[i]
+			    = tcache_gc_item_delay_compute(i);
 		}
 		cache_bin_t *cache_bin = &tcache->bins[i];
 		cache_bin_init(cache_bin, &tcache_bin_info[i], mem,
 		    &cur_offset);
 	}
+	/*
+	 * For small size classes beyond tcache_maxclass (i.e. nhbins < NBINS),
+	 * their cache bins are initialized to a state to safely and efficiently
+	 * fail all fastpath alloc / free, so that no additional check around
+	 * nhbins is needed on fastpath.
+	 */
+	for (unsigned i = nhbins; i < SC_NBINS; i++) {
+		/* Disabled small bins. */
+		cache_bin_t *cache_bin = &tcache->bins[i];
+		void *fake_stack = mem;
+		size_t fake_offset = 0;
+
+		cache_bin_init(cache_bin, &tcache_bin_info[i], fake_stack,
+		    &fake_offset);
+		assert(tcache_small_bin_disabled(i, cache_bin));
+	}
+
 	cache_bin_postincrement(tcache_bin_info, nhbins, mem,
 	    &cur_offset);
 	/* Sanity check that the whole stack is used. */
 	assert(cur_offset == tcache_bin_alloc_size);
-	for (unsigned i = 0; i < SC_NBINS; i++) {
-		tcache_slow->bin_flush_delay_items[i]
-		    = tcache_gc_item_delay_compute(i);
-	}
 }
 
 /* Initialize auto tcache (embedded in TSD). */
@@ -935,9 +959,6 @@ tcache_ncached_max_compute(szind_t szind) {
 bool
 tcache_boot(tsdn_t *tsdn, base_t *base) {
 	tcache_maxclass = sz_s2u(opt_tcache_max);
-	if (tcache_maxclass < SC_SMALL_MAXCLASS) {
-		tcache_maxclass = SC_SMALL_MAXCLASS;
-	}
 	assert(tcache_maxclass <= TCACHE_MAXCLASS_LIMIT);
 	nhbins = sz_size2index(tcache_maxclass) + 1;
 
@@ -946,16 +967,25 @@ tcache_boot(tsdn_t *tsdn, base_t *base) {
 		return true;
 	}
 
-	/* Initialize tcache_bin_info. */
-	tcache_bin_info = (cache_bin_info_t *)base_alloc(tsdn, base,
-	    nhbins * sizeof(cache_bin_info_t), CACHELINE);
+	/* Initialize tcache_bin_info.  See comments in tcache_init(). */
+	unsigned n_reserved_bins = nhbins < SC_NBINS ? SC_NBINS : nhbins;
+	size_t size = n_reserved_bins * sizeof(cache_bin_info_t);
+	tcache_bin_info = (cache_bin_info_t *)base_alloc(tsdn, base, size,
+	    CACHELINE);
 	if (tcache_bin_info == NULL) {
 		return true;
 	}
+
 	for (szind_t i = 0; i < nhbins; i++) {
 		unsigned ncached_max = tcache_ncached_max_compute(i);
 		cache_bin_info_init(&tcache_bin_info[i], ncached_max);
 	}
+	for (szind_t i = nhbins; i < SC_NBINS; i++) {
+		/* Disabled small bins. */
+		cache_bin_info_init(&tcache_bin_info[i], 0);
+		assert(tcache_small_bin_disabled(i, NULL));
+	}
+
 	cache_bin_info_compute_alloc(tcache_bin_info, nhbins,
 	    &tcache_bin_alloc_size, &tcache_bin_alloc_alignment);
 
diff --git a/test/unit/arena_decay.c b/test/unit/arena_decay.c
index 555f71ae..a2661682 100644
--- a/test/unit/arena_decay.c
+++ b/test/unit/arena_decay.c
@@ -432,7 +432,6 @@ TEST_BEGIN(test_decay_ticker) {
 	unsigned arena_ind = do_arena_create(ddt, mdt);
 	int flags = (MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
 	void *ps[NPS];
-	size_t large;
 
 	/*
 	 * Allocate a bunch of large objects, pause the clock, deallocate every
@@ -440,12 +439,10 @@ TEST_BEGIN(test_decay_ticker) {
 	 * [md]allocx() in a tight loop while advancing time rapidly to verify
 	 * the ticker triggers purging.
 	 */
-
-	size_t tcache_max;
+	size_t large;
 	size_t sz = sizeof(size_t);
-	expect_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max, &sz, NULL,
+	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&large, &sz, NULL,
 	    0), 0, "Unexpected mallctl failure");
-	large = nallocx(tcache_max + 1, flags);
 
 	do_purge(arena_ind);
 	uint64_t dirty_npurge0 = get_arena_dirty_npurge(arena_ind);
diff --git a/test/unit/arena_decay.sh b/test/unit/arena_decay.sh
index 45aeccf4..52f1b207 100644
--- a/test/unit/arena_decay.sh
+++ b/test/unit/arena_decay.sh
@@ -1,3 +1,3 @@
 #!/bin/sh
 
-export MALLOC_CONF="dirty_decay_ms:1000,muzzy_decay_ms:1000,lg_tcache_max:0"
+export MALLOC_CONF="dirty_decay_ms:1000,muzzy_decay_ms:1000,tcache_max:1024"
diff --git a/test/unit/tcache_max.c b/test/unit/tcache_max.c
new file mode 100644
index 00000000..0594ceff
--- /dev/null
+++ b/test/unit/tcache_max.c
@@ -0,0 +1,170 @@
+#include "test/jemalloc_test.h"
+
+enum {
+	alloc_option_start = 0,
+	use_malloc = 0,
+	use_mallocx,
+	alloc_option_end
+};
+
+enum {
+	dalloc_option_start = 0,
+	use_free = 0,
+	use_dallocx,
+	use_sdallocx,
+	dalloc_option_end
+};
+
+static unsigned alloc_option, dalloc_option;
+static size_t tcache_max;
+
+static void *
+alloc_func(size_t sz) {
+	void *ret;
+
+	switch (alloc_option) {
+	case use_malloc:
+		ret = malloc(sz);
+		break;
+	case use_mallocx:
+		ret = mallocx(sz, 0);
+		break;
+	default:
+		unreachable();
+	}
+	expect_ptr_not_null(ret, "Unexpected malloc / mallocx failure");
+
+	return ret;
+}
+
+static void
+dalloc_func(void *ptr, size_t sz) {
+	switch (dalloc_option) {
+	case use_free:
+		free(ptr);
+		break;
+	case use_dallocx:
+		dallocx(ptr, 0);
+		break;
+	case use_sdallocx:
+		sdallocx(ptr, sz, 0);
+		break;
+	default:
+		unreachable();
+	}
+}
+
+static size_t
+tcache_bytes_read(void) {
+	uint64_t epoch;
+	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	    0, "Unexpected mallctl() failure");
+
+	size_t tcache_bytes;
+	size_t sz = sizeof(tcache_bytes);
+	assert_d_eq(mallctl(
+	    "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL) ".tcache_bytes",
+	    &tcache_bytes, &sz, NULL, 0), 0, "Unexpected mallctl failure");
+
+	return tcache_bytes;
+}
+
+static void
+tcache_bytes_check_update(size_t *prev, ssize_t diff) {
+	size_t tcache_bytes = tcache_bytes_read();
+	expect_zu_eq(tcache_bytes, *prev + diff, "tcache bytes not expected");
+
+	*prev += diff;
+}
+
+static void
+test_tcache_bytes_alloc(size_t alloc_size) {
+	expect_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0), 0,
+	    "Unexpected tcache flush failure");
+
+	size_t usize = sz_s2u(alloc_size);
+	/* No change is expected if usize is outside of tcache_max range. */
+	bool cached = (usize <= tcache_max);
+	ssize_t diff = cached ? usize : 0;
+
+	void *ptr1 = alloc_func(alloc_size);
+	void *ptr2 = alloc_func(alloc_size);
+
+	size_t bytes = tcache_bytes_read();
+	dalloc_func(ptr2, alloc_size);
+	/* Expect tcache_bytes increase after dalloc */
+	tcache_bytes_check_update(&bytes, diff);
+
+	dalloc_func(ptr1, alloc_size);
+	/* Expect tcache_bytes increase again */
+	tcache_bytes_check_update(&bytes, diff);
+
+	void *ptr3 = alloc_func(alloc_size);
+	if (cached) {
+		expect_ptr_eq(ptr1, ptr3, "Unexpected cached ptr");
+	}
+	/* Expect tcache_bytes decrease after alloc */
+	tcache_bytes_check_update(&bytes, -diff);
+
+	void *ptr4 = alloc_func(alloc_size);
+	if (cached) {
+		expect_ptr_eq(ptr2, ptr4, "Unexpected cached ptr");
+	}
+	/* Expect tcache_bytes decrease again */
+	tcache_bytes_check_update(&bytes, -diff);
+
+	dalloc_func(ptr3, alloc_size);
+	tcache_bytes_check_update(&bytes, diff);
+	dalloc_func(ptr4, alloc_size);
+	tcache_bytes_check_update(&bytes, diff);
+}
+
+static void
+test_tcache_max_impl(void) {
+	size_t sz;
+	sz = sizeof(tcache_max);
+	assert_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max,
+	    &sz, NULL, 0), 0, "Unexpected mallctl() failure");
+
+	/* opt.tcache_max set to 1024 in tcache_max.sh */
+	expect_zu_eq(tcache_max, 1024, "tcache_max not expected");
+
+	test_tcache_bytes_alloc(1);
+	test_tcache_bytes_alloc(tcache_max - 1);
+	test_tcache_bytes_alloc(tcache_max);
+	test_tcache_bytes_alloc(tcache_max + 1);
+
+	test_tcache_bytes_alloc(PAGE - 1);
+	test_tcache_bytes_alloc(PAGE);
+	test_tcache_bytes_alloc(PAGE + 1);
+
+	size_t large;
+	sz = sizeof(large);
+	assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&large, &sz, NULL,
+	    0), 0, "Unexpected mallctl() failure");
+
+	test_tcache_bytes_alloc(large - 1);
+	test_tcache_bytes_alloc(large);
+	test_tcache_bytes_alloc(large + 1);
+}
+
+TEST_BEGIN(test_tcache_max) {
+	test_skip_if(!config_stats);
+	test_skip_if(!opt_tcache);
+
+	for (alloc_option = alloc_option_start;
+	     alloc_option < alloc_option_end;
+	     alloc_option++) {
+		for (dalloc_option = dalloc_option_start;
+		     dalloc_option < dalloc_option_end;
+		     dalloc_option++) {
+			test_tcache_max_impl();
+		}
+	}
+}
+TEST_END
+
+int
+main(void) {
+	return test(test_tcache_max);
+}
diff --git a/test/unit/tcache_max.sh b/test/unit/tcache_max.sh
new file mode 100644
index 00000000..4480d733
--- /dev/null
+++ b/test/unit/tcache_max.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="tcache_max:1024"

From ef6d51ed44ab864e6db8722a19758f67cc7b12d9 Mon Sep 17 00:00:00 2001
From: DC <devnexen@gmail.com>
Date: Sun, 25 Oct 2020 15:17:24 +0000
Subject: [PATCH 1900/2608] DragonFlyBSD build support.

---
 include/jemalloc/internal/jemalloc_internal_decls.h | 2 +-
 src/background_thread.c                             | 2 +-
 src/jemalloc.c                                      | 2 +-
 src/prof_sys.c                                      | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
index 32058ced..7d212c4e 100644
--- a/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -32,7 +32,7 @@
 #    include <sys/uio.h>
 #  endif
 #  include <pthread.h>
-#  ifdef __FreeBSD__
+#  if defined(__FreeBSD__) || defined(__DragonFly__)
 #  include <pthread_np.h>
 #  endif
 #  include <signal.h>
diff --git a/src/background_thread.c b/src/background_thread.c
index a36836cb..d4f96b1a 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -506,7 +506,7 @@ background_thread_entry(void *ind_arg) {
 	assert(thread_ind < max_background_threads);
 #ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
 	pthread_setname_np(pthread_self(), "jemalloc_bg_thd");
-#elif defined(__FreeBSD__)
+#elif defined(__FreeBSD__) || defined(__DragonFly__)
 	pthread_set_name_np(pthread_self(), "jemalloc_bg_thd");
 #endif
 	if (opt_percpu_arena != percpu_arena_disabled) {
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 09b168ca..2a791e17 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -815,7 +815,7 @@ malloc_ncpus(void) {
 	 * is available, to avoid using more arenas than necessary.
 	 */
 	{
-#  if defined(__FreeBSD__)
+#  if defined(__FreeBSD__) || defined(__DragonFly__)
 		cpuset_t set;
 #  else
 		cpu_set_t set;
diff --git a/src/prof_sys.c b/src/prof_sys.c
index dddba4b6..777ef1d2 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -436,7 +436,7 @@ prof_dump_open_maps_impl() {
 	int mfd;
 
 	cassert(config_prof);
-#ifdef __FreeBSD__
+#if defined(__FreeBSD__) || defined(__DragonFly__)
 	mfd = prof_open_maps_internal("/proc/curproc/map");
 #elif defined(_WIN32)
 	mfd = -1; // Not implemented

From 180b84315933b7d986fff7539eeb262eb44bc75d Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 27 Oct 2020 12:42:23 -0700
Subject: [PATCH 1901/2608] Appveyor: fix 404 errors.

It looks like the mirrors we were using no longer carry this package, but that
it is installed by default and so no longer needs a remote mirror.
---
 .appveyor.yml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index f44868da..d31f9aed 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -31,7 +31,6 @@ install:
   - set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH%
   - if defined MSVC call "c:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" %MSVC%
   - if defined MSVC pacman --noconfirm -Rsc mingw-w64-%CPU%-gcc gcc
-  - pacman --noconfirm -S mingw-w64-%CPU%-make
 
 build_script:
   - bash -c "autoconf"

From d2d941017b8a62ee7d835ccfb7b34c54ce32e371 Mon Sep 17 00:00:00 2001
From: David Carlier <devnexen@gmail.com>
Date: Sun, 1 Nov 2020 20:52:56 +0000
Subject: [PATCH 1902/2608] MADV_DO[NOT]DUMP support equivalence on FreeBSD.

---
 configure.ac                                          | 10 ++++++++++
 include/jemalloc/internal/jemalloc_internal_defs.h.in |  5 +++++
 src/pages.c                                           |  8 ++++++--
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index d55c0b8e..ca5e2f13 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2132,6 +2132,16 @@ if test "x${je_cv_madvise}" = "xyes" ; then
 	madvise((void *)0, 0, MADV_HUGEPAGE);
 	madvise((void *)0, 0, MADV_NOHUGEPAGE);
 ], [je_cv_thp])
+  dnl Check for madvise(..., MADV_[NO]CORE).
+  JE_COMPILABLE([madvise(..., MADV_[[NO]]CORE)], [
+#include <sys/mman.h>
+], [
+	madvise((void *)0, 0, MADV_NOCORE);
+	madvise((void *)0, 0, MADV_CORE);
+], [je_cv_madv_nocore])
+  if test "x${je_cv_madv_nocore}" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_MADVISE_NOCORE], [ ])
+  fi
 case "${host_cpu}" in
   arm*)
     ;;
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 7af28f73..5ea1a191 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -298,6 +298,11 @@
  */
 #undef JEMALLOC_MADVISE_DONTDUMP
 
+/*
+ * Defined if MADV_[NO]CORE is supported as an argument to madvise.
+ */
+#undef JEMALLOC_MADVISE_NOCORE
+
 /*
  * Defined if transparent huge pages (THPs) are supported via the
  * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled.
diff --git a/src/pages.c b/src/pages.c
index 05bbf728..59a03f21 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -413,8 +413,10 @@ bool
 pages_dontdump(void *addr, size_t size) {
 	assert(PAGE_ADDR2BASE(addr) == addr);
 	assert(PAGE_CEILING(size) == size);
-#ifdef JEMALLOC_MADVISE_DONTDUMP
+#if defined(JEMALLOC_MADVISE_DONTDUMP)
 	return madvise(addr, size, MADV_DONTDUMP) != 0;
+#elif defined(JEMALLOC_MADVISE_NOCORE)
+	return madvise(addr, size, MADV_NOCORE) != 0;
 #else
 	return false;
 #endif
@@ -424,8 +426,10 @@ bool
 pages_dodump(void *addr, size_t size) {
 	assert(PAGE_ADDR2BASE(addr) == addr);
 	assert(PAGE_CEILING(size) == size);
-#ifdef JEMALLOC_MADVISE_DONTDUMP
+#if defined(JEMALLOC_MADVISE_DONTDUMP)
 	return madvise(addr, size, MADV_DODUMP) != 0;
+#elif defined(JEMALLOC_MADVISE_NOCORE)
+	return madvise(addr, size, MADV_CORE) != 0;
 #else
 	return false;
 #endif

From 27ef02ca9a21f2e6a432e67dd3d2bafc8a04371f Mon Sep 17 00:00:00 2001
From: David Carlier <devnexen@gmail.com>
Date: Thu, 23 Apr 2020 20:23:04 +0100
Subject: [PATCH 1903/2608] Android build fix proposal.

These are detected at configure time while they are glibc
specifics. the bionic equivalent is not api compatible
and dlopen is restricted in this platform.
---
 configure.ac | 52 ++++++++++++++++++++++++++++------------------------
 1 file changed, 28 insertions(+), 24 deletions(-)

diff --git a/configure.ac b/configure.ac
index ca5e2f13..1e6de8a8 100644
--- a/configure.ac
+++ b/configure.ac
@@ -661,10 +661,11 @@ case "${host}" in
   *-*-bitrig*)
 	abi="elf"
 	;;
-  *-*-linux-android)
+  *-*-linux-android*)
 	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
 	JE_APPEND_VS(CPPFLAGS, -D_GNU_SOURCE)
 	abi="elf"
+	glibc="0"
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS], [ ])
 	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
 	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ])
@@ -679,6 +680,7 @@ case "${host}" in
 	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
 	JE_APPEND_VS(CPPFLAGS, -D_GNU_SOURCE)
 	abi="elf"
+	glibc="1"
 	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS], [ ])
 	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
 	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ])
@@ -2258,37 +2260,39 @@ fi
 dnl ============================================================================
 dnl Check for glibc malloc hooks
 
-JE_COMPILABLE([glibc malloc hook], [
-#include <stddef.h>
+if test "x$glibc" = "x1" ; then
+  JE_COMPILABLE([glibc malloc hook], [
+  #include <stddef.h>
 
-extern void (* __free_hook)(void *ptr);
-extern void *(* __malloc_hook)(size_t size);
-extern void *(* __realloc_hook)(void *ptr, size_t size);
+  extern void (* __free_hook)(void *ptr);
+  extern void *(* __malloc_hook)(size_t size);
+  extern void *(* __realloc_hook)(void *ptr, size_t size);
 ], [
-  void *ptr = 0L;
-  if (__malloc_hook) ptr = __malloc_hook(1);
-  if (__realloc_hook) ptr = __realloc_hook(ptr, 2);
-  if (__free_hook && ptr) __free_hook(ptr);
+    void *ptr = 0L;
+    if (__malloc_hook) ptr = __malloc_hook(1);
+    if (__realloc_hook) ptr = __realloc_hook(ptr, 2);
+    if (__free_hook && ptr) __free_hook(ptr);
 ], [je_cv_glibc_malloc_hook])
-if test "x${je_cv_glibc_malloc_hook}" = "xyes" ; then
-  if test "x${JEMALLOC_PREFIX}" = "x" ; then
-    AC_DEFINE([JEMALLOC_GLIBC_MALLOC_HOOK], [ ])
-    wrap_syms="${wrap_syms} __free_hook __malloc_hook __realloc_hook"
+  if test "x${je_cv_glibc_malloc_hook}" = "xyes" ; then
+    if test "x${JEMALLOC_PREFIX}" = "x" ; then
+      AC_DEFINE([JEMALLOC_GLIBC_MALLOC_HOOK], [ ])
+      wrap_syms="${wrap_syms} __free_hook __malloc_hook __realloc_hook"
+    fi
   fi
-fi
 
-JE_COMPILABLE([glibc memalign hook], [
-#include <stddef.h>
+  JE_COMPILABLE([glibc memalign hook], [
+  #include <stddef.h>
 
-extern void *(* __memalign_hook)(size_t alignment, size_t size);
+  extern void *(* __memalign_hook)(size_t alignment, size_t size);
 ], [
-  void *ptr = 0L;
-  if (__memalign_hook) ptr = __memalign_hook(16, 7);
+    void *ptr = 0L;
+    if (__memalign_hook) ptr = __memalign_hook(16, 7);
 ], [je_cv_glibc_memalign_hook])
-if test "x${je_cv_glibc_memalign_hook}" = "xyes" ; then
-  if test "x${JEMALLOC_PREFIX}" = "x" ; then
-    AC_DEFINE([JEMALLOC_GLIBC_MEMALIGN_HOOK], [ ])
-    wrap_syms="${wrap_syms} __memalign_hook"
+  if test "x${je_cv_glibc_memalign_hook}" = "xyes" ; then
+    if test "x${JEMALLOC_PREFIX}" = "x" ; then
+      AC_DEFINE([JEMALLOC_GLIBC_MEMALIGN_HOOK], [ ])
+      wrap_syms="${wrap_syms} __memalign_hook"
+    fi
   fi
 fi
 

From 1b3ee75667dd7820808d35d16bfcebdd146be70a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 30 Oct 2020 16:31:32 -0700
Subject: [PATCH 1904/2608] Add experimental.thread.activity_callback.

This (experimental, undocumented) functionality can be used by users to track
various statistics of interest at a finer level of granularity than the thread.
---
 include/jemalloc/internal/activity_callback.h | 23 ++++++
 include/jemalloc/internal/tsd.h               |  5 ++
 src/ctl.c                                     | 35 ++++++++-
 src/peak_event.c                              | 17 ++++-
 test/unit/mallctl.c                           | 74 ++++++++++++++++++-
 5 files changed, 151 insertions(+), 3 deletions(-)
 create mode 100644 include/jemalloc/internal/activity_callback.h

diff --git a/include/jemalloc/internal/activity_callback.h b/include/jemalloc/internal/activity_callback.h
new file mode 100644
index 00000000..6c2e84e3
--- /dev/null
+++ b/include/jemalloc/internal/activity_callback.h
@@ -0,0 +1,23 @@
+#ifndef JEMALLOC_INTERNAL_ACTIVITY_CALLBACK_H
+#define JEMALLOC_INTERNAL_ACTIVITY_CALLBACK_H
+
+/*
+ * The callback to be executed "periodically", in response to some amount of
+ * allocator activity.
+ *
+ * This callback need not be computing any sort of peak (although that's the
+ * intended first use case), but we drive it from the peak counter, so it's
+ * keeps things tidy to keep it here.
+ *
+ * The calls to this thunk get driven by the peak_event module.
+ */
+#define ACTIVITY_CALLBACK_THUNK_INITIALIZER {NULL, NULL}
+typedef void (*activity_callback_t)(void *uctx, uint64_t allocated,
+    uint64_t deallocated);
+typedef struct activity_callback_thunk_s activity_callback_thunk_t;
+struct activity_callback_thunk_s {
+	activity_callback_t callback;
+	void *uctx;
+};
+
+#endif /* JEMALLOC_INTERNAL_ACTIVITY_CALLBACK_H */
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 5ac85e14..60764199 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_TSD_H
 #define JEMALLOC_INTERNAL_TSD_H
 
+#include "jemalloc/internal/activity_callback.h"
 #include "jemalloc/internal/arena_types.h"
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/bin_types.h"
@@ -82,6 +83,8 @@ typedef ql_elm(tsd_t) tsd_link_t;
     O(tsd_link,			tsd_link_t,		tsd_link_t)	\
     O(in_hook,			bool,			bool)		\
     O(peak,			peak_t,			peak_t)		\
+    O(activity_callback_thunk,	activity_callback_thunk_t,		\
+	activity_callback_thunk_t)					\
     O(tcache_slow,		tcache_slow_t,		tcache_slow_t)	\
     O(rtree_ctx,		rtree_ctx_t,		rtree_ctx_t)
 
@@ -112,6 +115,8 @@ typedef ql_elm(tsd_t) tsd_link_t;
     /* tsd_link */		{NULL},					\
     /* in_hook */		false,					\
     /* peak */			PEAK_INITIALIZER,			\
+    /* activity_callback_thunk */					\
+	ACTIVITY_CALLBACK_THUNK_INITIALIZER,				\
     /* tcache_slow */		TCACHE_SLOW_ZERO_INITIALIZER,		\
     /* rtree_ctx */		RTREE_CTX_ZERO_INITIALIZER,
 
diff --git a/src/ctl.c b/src/ctl.c
index 874aaac2..d5dd1d16 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -264,6 +264,7 @@ CTL_PROTO(stats_retained)
 CTL_PROTO(stats_zero_reallocs)
 CTL_PROTO(experimental_hooks_install)
 CTL_PROTO(experimental_hooks_remove)
+CTL_PROTO(experimental_thread_activity_callback)
 CTL_PROTO(experimental_utilization_query)
 CTL_PROTO(experimental_utilization_batch_query)
 CTL_PROTO(experimental_arenas_i_pactivep)
@@ -712,6 +713,11 @@ static const ctl_named_node_t experimental_hooks_node[] = {
 	{NAME("remove"),	CTL(experimental_hooks_remove)}
 };
 
+static const ctl_named_node_t experimental_thread_node[] = {
+	{NAME("activity_callback"),
+		CTL(experimental_thread_activity_callback)}
+};
+
 static const ctl_named_node_t experimental_utilization_node[] = {
 	{NAME("query"),		CTL(experimental_utilization_query)},
 	{NAME("batch_query"),	CTL(experimental_utilization_batch_query)}
@@ -738,7 +744,8 @@ static const ctl_named_node_t experimental_node[] = {
 	{NAME("utilization"),	CHILD(named, experimental_utilization)},
 	{NAME("arenas"),	CHILD(indexed, experimental_arenas)},
 	{NAME("prof_recent"),	CHILD(named, experimental_prof_recent)},
-	{NAME("batch_alloc"),	CTL(experimental_batch_alloc)}
+	{NAME("batch_alloc"),	CTL(experimental_batch_alloc)},
+	{NAME("thread"),	CHILD(named, experimental_thread)}
 };
 
 static const ctl_named_node_t	root_node[] = {
@@ -3428,6 +3435,32 @@ label_return:
 	return ret;
 }
 
+static int
+experimental_thread_activity_callback_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+
+	if (!config_stats) {
+		return ENOENT;
+	}
+
+	activity_callback_thunk_t t_old = tsd_activity_callback_thunk_get(tsd);
+	READ(t_old, activity_callback_thunk_t);
+
+	if (newp != NULL) {
+		/*
+		 * This initialization is unnecessary.  If it's omitted, though,
+		 * clang gets confused and warns on the subsequent use of t_new.
+		 */
+		activity_callback_thunk_t t_new = {NULL, NULL};
+		WRITE(t_new, activity_callback_thunk_t);
+		tsd_activity_callback_thunk_set(tsd, t_new);
+	}
+	ret = 0;
+label_return:
+	return ret;
+}
+
 /*
  * Output six memory utilization entries for an input pointer, the first one of
  * type (void *) and the remaining five of type size_t, describing the following
diff --git a/src/peak_event.c b/src/peak_event.c
index 79d91e02..4093fbcc 100644
--- a/src/peak_event.c
+++ b/src/peak_event.c
@@ -1,9 +1,11 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
-#include "jemalloc/internal/peak.h"
 #include "jemalloc/internal/peak_event.h"
 
+#include "jemalloc/internal/activity_callback.h"
+#include "jemalloc/internal/peak.h"
+
 /*
  * Update every 64K by default.  We're not exposing this as a configuration
  * option for now; we don't want to bind ourselves too tightly to any particular
@@ -21,6 +23,17 @@ peak_event_update(tsd_t *tsd) {
 	peak_update(peak, alloc, dalloc);
 }
 
+static void
+peak_event_activity_callback(tsd_t *tsd) {
+	activity_callback_thunk_t *thunk = tsd_activity_callback_thunkp_get(
+	    tsd);
+	uint64_t alloc = tsd_thread_allocated_get(tsd);
+	uint64_t dalloc = tsd_thread_deallocated_get(tsd);
+	if (thunk->callback != NULL) {
+		thunk->callback(thunk->uctx, alloc, dalloc);
+	}
+}
+
 /* Set current state to zero. */
 void
 peak_event_zero(tsd_t *tsd) {
@@ -49,6 +62,7 @@ peak_alloc_postponed_event_wait(tsd_t *tsd) {
 void
 peak_alloc_event_handler(tsd_t *tsd, uint64_t elapsed) {
 	peak_event_update(tsd);
+	peak_event_activity_callback(tsd);
 }
 
 uint64_t
@@ -64,4 +78,5 @@ peak_dalloc_postponed_event_wait(tsd_t *tsd) {
 void
 peak_dalloc_event_handler(tsd_t *tsd, uint64_t elapsed) {
 	peak_event_update(tsd);
+	peak_event_activity_callback(tsd);
 }
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 278bd09d..d4e2621e 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -1030,6 +1030,77 @@ TEST_BEGIN(test_thread_peak) {
 }
 TEST_END
 
+typedef struct activity_test_data_s activity_test_data_t;
+struct activity_test_data_s {
+	uint64_t obtained_alloc;
+	uint64_t obtained_dalloc;
+};
+
+static void
+activity_test_callback(void *uctx, uint64_t alloc, uint64_t dalloc) {
+	activity_test_data_t *test_data = (activity_test_data_t *)uctx;
+	test_data->obtained_alloc = alloc;
+	test_data->obtained_dalloc = dalloc;
+}
+
+TEST_BEGIN(test_thread_activity_callback) {
+	test_skip_if(!config_stats);
+
+	const size_t big_size = 10 * 1024 * 1024;
+	void *ptr;
+	int err;
+	size_t sz;
+
+	uint64_t *allocatedp;
+	uint64_t *deallocatedp;
+	sz = sizeof(allocatedp);
+	err = mallctl("thread.allocatedp", &allocatedp, &sz, NULL, 0);
+	assert_d_eq(0, err, "");
+	err = mallctl("thread.deallocatedp", &deallocatedp, &sz, NULL, 0);
+	assert_d_eq(0, err, "");
+
+	activity_callback_thunk_t old_thunk = {(activity_callback_t)111,
+		(void *)222};
+
+	activity_test_data_t test_data = {333, 444};
+	activity_callback_thunk_t new_thunk =
+	    {&activity_test_callback, &test_data};
+
+	sz = sizeof(old_thunk);
+	err = mallctl("experimental.thread.activity_callback", &old_thunk, &sz,
+	    &new_thunk, sizeof(new_thunk));
+	assert_d_eq(0, err, "");
+
+	expect_true(old_thunk.callback == NULL, "Callback already installed");
+	expect_true(old_thunk.uctx == NULL, "Callback data already installed");
+
+	ptr = mallocx(big_size, 0);
+	expect_u64_eq(test_data.obtained_alloc, *allocatedp, "");
+	expect_u64_eq(test_data.obtained_dalloc, *deallocatedp, "");
+
+	free(ptr);
+	expect_u64_eq(test_data.obtained_alloc, *allocatedp, "");
+	expect_u64_eq(test_data.obtained_dalloc, *deallocatedp, "");
+
+	sz = sizeof(old_thunk);
+	new_thunk = (activity_callback_thunk_t){ NULL, NULL };
+	err = mallctl("experimental.thread.activity_callback", &old_thunk, &sz,
+	    &new_thunk, sizeof(new_thunk));
+	assert_d_eq(0, err, "");
+
+	expect_true(old_thunk.callback == &activity_test_callback, "");
+	expect_true(old_thunk.uctx == &test_data, "");
+
+	/* Inserting NULL should have turned off tracking. */
+	test_data.obtained_alloc = 333;
+	test_data.obtained_dalloc = 444;
+	ptr = mallocx(big_size, 0);
+	free(ptr);
+	expect_u64_eq(333, test_data.obtained_alloc, "");
+	expect_u64_eq(444, test_data.obtained_dalloc, "");
+}
+TEST_END
+
 int
 main(void) {
 	return test(
@@ -1063,5 +1134,6 @@ main(void) {
 	    test_hooks,
 	    test_hooks_exhaustion,
 	    test_thread_idle,
-	    test_thread_peak);
+	    test_thread_peak,
+	    test_thread_activity_callback);
 }

From c9757d9e3ba6b53e7f4ecbe9c1872a74df51fe4b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 30 Oct 2020 15:05:48 -0700
Subject: [PATCH 1905/2608] HPA: Don't disable shards that were never started.

---
 src/pa.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/pa.c b/src/pa.c
index 825b10ab..59873c12 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -74,19 +74,26 @@ pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa, size_t ps_goal,
 void
 pa_shard_disable_hpa(tsdn_t *tsdn, pa_shard_t *shard) {
 	atomic_store_b(&shard->use_hpa, false, ATOMIC_RELAXED);
-	sec_disable(tsdn, &shard->hpa_sec);
+	if (shard->ever_used_hpa) {
+		sec_disable(tsdn, &shard->hpa_sec);
+	}
 }
 
 void
 pa_shard_reset(tsdn_t *tsdn, pa_shard_t *shard) {
 	atomic_store_zu(&shard->nactive, 0, ATOMIC_RELAXED);
-	sec_flush(tsdn, &shard->hpa_sec);
+	if (shard->ever_used_hpa) {
+		sec_flush(tsdn, &shard->hpa_sec);
+	}
 }
 
 void
 pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard) {
 	sec_flush(tsdn, &shard->hpa_sec);
 	pac_destroy(tsdn, &shard->pac);
+	if (shard->ever_used_hpa) {
+		sec_flush(tsdn, &shard->hpa_sec);
+	}
 }
 
 static pai_t *

From 03a604711113c9d883242291ca11b77c83ba4c75 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 29 Oct 2020 05:11:16 -0700
Subject: [PATCH 1906/2608] Edata cache small: rewrite.

In previous designs, this was intended to be a sort of cache that couldn't fail.
In the current design, we want to use it just as a contention reduction
mechanism.  Rewrite it with those goals in mind.
---
 include/jemalloc/internal/edata_cache.h |  39 ++---
 src/edata_cache.c                       | 134 ++++++++++-----
 test/unit/edata_cache.c                 | 206 +++++++++++++++++++++---
 3 files changed, 302 insertions(+), 77 deletions(-)

diff --git a/include/jemalloc/internal/edata_cache.h b/include/jemalloc/internal/edata_cache.h
index 02685c87..f7d0c319 100644
--- a/include/jemalloc/internal/edata_cache.h
+++ b/include/jemalloc/internal/edata_cache.h
@@ -3,6 +3,16 @@
 
 #include "jemalloc/internal/base.h"
 
+/*
+ * Public for tests.  When we go to the fallback when the small cache is empty,
+ * we grab up to 8 items (grabbing less only if the fallback is exhausted).
+ * When we exceed 16, we flush.  This caps the maximum memory lost per cache to
+ * 16 * sizeof(edata_t), a max of 2k on architectures where the edata_t is 128
+ * bytes.
+ */
+#define EDATA_CACHE_SMALL_MAX 16
+#define EDATA_CACHE_SMALL_FILL 8
+
 /*
  * A cache of edata_t structures allocated via base_alloc_edata (as opposed to
  * the underlying extents they describe).  The contents of returned edata_t
@@ -25,32 +35,23 @@ void edata_cache_prefork(tsdn_t *tsdn, edata_cache_t *edata_cache);
 void edata_cache_postfork_parent(tsdn_t *tsdn, edata_cache_t *edata_cache);
 void edata_cache_postfork_child(tsdn_t *tsdn, edata_cache_t *edata_cache);
 
+/*
+ * An edata_cache_small is like an edata_cache, but it relies on external
+ * synchronization and avoids first-fit strategies.
+ */
+
 typedef struct edata_cache_small_s edata_cache_small_t;
 struct edata_cache_small_s {
 	edata_list_inactive_t list;
 	size_t count;
 	edata_cache_t *fallback;
+	bool disabled;
 };
 
-/*
- * An edata_cache_small is like an edata_cache, but it relies on external
- * synchronization and avoids first-fit strategies.  You can call "prepare" to
- * acquire at least num edata_t objects, and then "finish" to flush all
- * excess ones back to their fallback edata_cache_t.  Once they have been
- * acquired, they can be allocated without failing (and in fact, this is
- * required -- it's not permitted to attempt to get an edata_t without first
- * preparing for it).
- */
-
 void edata_cache_small_init(edata_cache_small_t *ecs, edata_cache_t *fallback);
-
-/* Returns whether or not an error occurred. */
-bool edata_cache_small_prepare(tsdn_t *tsdn, edata_cache_small_t *ecs,
-    size_t num);
-edata_t *edata_cache_small_get(edata_cache_small_t *ecs);
-
-void edata_cache_small_put(edata_cache_small_t *ecs, edata_t *edata);
-void edata_cache_small_finish(tsdn_t *tsdn, edata_cache_small_t *ecs,
-    size_t num);
+edata_t *edata_cache_small_get(tsdn_t *tsdn, edata_cache_small_t *ecs);
+void edata_cache_small_put(tsdn_t *tsdn, edata_cache_small_t *ecs,
+    edata_t *edata);
+void edata_cache_small_disable(tsdn_t *tsdn, edata_cache_small_t *ecs);
 
 #endif /* JEMALLOC_INTERNAL_EDATA_CACHE_H */
diff --git a/src/edata_cache.c b/src/edata_cache.c
index d899ce80..ecfce414 100644
--- a/src/edata_cache.c
+++ b/src/edata_cache.c
@@ -27,8 +27,7 @@ edata_cache_get(tsdn_t *tsdn, edata_cache_t *edata_cache) {
 		return base_alloc_edata(tsdn, edata_cache->base);
 	}
 	edata_avail_remove(&edata_cache->avail, edata);
-	size_t count = atomic_load_zu(&edata_cache->count, ATOMIC_RELAXED);
-	atomic_store_zu(&edata_cache->count, count - 1, ATOMIC_RELAXED);
+	atomic_load_sub_store_zu(&edata_cache->count, 1);
 	malloc_mutex_unlock(tsdn, &edata_cache->mtx);
 	return edata;
 }
@@ -37,8 +36,7 @@ void
 edata_cache_put(tsdn_t *tsdn, edata_cache_t *edata_cache, edata_t *edata) {
 	malloc_mutex_lock(tsdn, &edata_cache->mtx);
 	edata_avail_insert(&edata_cache->avail, edata);
-	size_t count = atomic_load_zu(&edata_cache->count, ATOMIC_RELAXED);
-	atomic_store_zu(&edata_cache->count, count + 1, ATOMIC_RELAXED);
+	atomic_load_add_store_zu(&edata_cache->count, 1);
 	malloc_mutex_unlock(tsdn, &edata_cache->mtx);
 }
 
@@ -62,48 +60,110 @@ edata_cache_small_init(edata_cache_small_t *ecs, edata_cache_t *fallback) {
 	edata_list_inactive_init(&ecs->list);
 	ecs->count = 0;
 	ecs->fallback = fallback;
+	ecs->disabled = false;
+}
+
+static void
+edata_cache_small_try_fill_from_fallback(tsdn_t *tsdn,
+    edata_cache_small_t *ecs) {
+	assert(ecs->count == 0);
+	edata_t *edata;
+	malloc_mutex_lock(tsdn, &ecs->fallback->mtx);
+	while (ecs->count < EDATA_CACHE_SMALL_FILL) {
+		edata = edata_avail_first(&ecs->fallback->avail);
+		if (edata == NULL) {
+			break;
+		}
+		edata_avail_remove(&ecs->fallback->avail, edata);
+		edata_list_inactive_append(&ecs->list, edata);
+		ecs->count++;
+		atomic_load_sub_store_zu(&ecs->fallback->count, 1);
+	}
+	malloc_mutex_unlock(tsdn, &ecs->fallback->mtx);
 }
 
 edata_t *
-edata_cache_small_get(edata_cache_small_t *ecs) {
-	assert(ecs->count > 0);
+edata_cache_small_get(tsdn_t *tsdn, edata_cache_small_t *ecs) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_EDATA_CACHE, 0);
+
+	if (ecs->disabled) {
+		assert(ecs->count == 0);
+		assert(edata_list_inactive_first(&ecs->list) == NULL);
+		return edata_cache_get(tsdn, ecs->fallback);
+	}
+
 	edata_t *edata = edata_list_inactive_first(&ecs->list);
-	assert(edata != NULL);
-	edata_list_inactive_remove(&ecs->list, edata);
-	ecs->count--;
+	if (edata != NULL) {
+		edata_list_inactive_remove(&ecs->list, edata);
+		ecs->count--;
+		return edata;
+	}
+	/* Slow path; requires synchronization. */
+	edata_cache_small_try_fill_from_fallback(tsdn, ecs);
+	edata = edata_list_inactive_first(&ecs->list);
+	if (edata != NULL) {
+		edata_list_inactive_remove(&ecs->list, edata);
+		ecs->count--;
+	} else {
+		/*
+		 * Slowest path (fallback was also empty); allocate something
+		 * new.
+		 */
+		edata = base_alloc_edata(tsdn, ecs->fallback->base);
+	}
 	return edata;
 }
 
+static void
+edata_cache_small_flush_all(tsdn_t *tsdn, edata_cache_small_t *ecs) {
+	/*
+	 * You could imagine smarter cache management policies (like
+	 * only flushing down to some threshold in anticipation of
+	 * future get requests).  But just flushing everything provides
+	 * a good opportunity to defrag too, and lets us share code between the
+	 * flush and disable pathways.
+	 */
+	edata_t *edata;
+	size_t nflushed = 0;
+	malloc_mutex_lock(tsdn, &ecs->fallback->mtx);
+	while ((edata = edata_list_inactive_first(&ecs->list)) != NULL) {
+		edata_list_inactive_remove(&ecs->list, edata);
+		edata_avail_insert(&ecs->fallback->avail, edata);
+		nflushed++;
+	}
+	atomic_load_add_store_zu(&ecs->fallback->count, ecs->count);
+	malloc_mutex_unlock(tsdn, &ecs->fallback->mtx);
+	assert(nflushed == ecs->count);
+	ecs->count = 0;
+}
+
 void
-edata_cache_small_put(edata_cache_small_t *ecs, edata_t *edata) {
-	assert(edata != NULL);
-	edata_list_inactive_append(&ecs->list, edata);
-	ecs->count++;
-}
+edata_cache_small_put(tsdn_t *tsdn, edata_cache_small_t *ecs, edata_t *edata) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_EDATA_CACHE, 0);
 
-bool edata_cache_small_prepare(tsdn_t *tsdn, edata_cache_small_t *ecs,
-    size_t num) {
-	while (ecs->count < num) {
-		/*
-		 * Obviously, we can be smarter here and batch the locking that
-		 * happens inside of edata_cache_get.  But for now, something
-		 * quick-and-dirty is fine.
-		 */
-		edata_t *edata = edata_cache_get(tsdn, ecs->fallback);
-		if (edata == NULL) {
-			return true;
-		}
-		ql_elm_new(edata, ql_link_inactive);
-		edata_cache_small_put(ecs, edata);
-	}
-	return false;
-}
-
-void edata_cache_small_finish(tsdn_t *tsdn, edata_cache_small_t *ecs,
-    size_t num) {
-	while (ecs->count > num) {
-		/* Same deal here -- we should be batching. */
-		edata_t *edata = edata_cache_small_get(ecs);
+	if (ecs->disabled) {
+		assert(ecs->count == 0);
+		assert(edata_list_inactive_first(&ecs->list) == NULL);
 		edata_cache_put(tsdn, ecs->fallback, edata);
+		return;
+	}
+
+	/*
+	 * Prepend rather than append, to do LIFO ordering in the hopes of some
+	 * cache locality.
+	 */
+	edata_list_inactive_prepend(&ecs->list, edata);
+	ecs->count++;
+	if (ecs->count > EDATA_CACHE_SMALL_MAX) {
+		assert(ecs->count == EDATA_CACHE_SMALL_MAX + 1);
+		edata_cache_small_flush_all(tsdn, ecs);
 	}
 }
+
+void
+edata_cache_small_disable(tsdn_t *tsdn, edata_cache_small_t *ecs) {
+	edata_cache_small_flush_all(tsdn, ecs);
+	ecs->disabled = true;
+}
diff --git a/test/unit/edata_cache.c b/test/unit/edata_cache.c
index 22c9dcb8..9a5d14b0 100644
--- a/test/unit/edata_cache.c
+++ b/test/unit/edata_cache.c
@@ -47,37 +47,198 @@ TEST_BEGIN(test_edata_cache) {
 }
 TEST_END
 
-TEST_BEGIN(test_edata_cache_small) {
+TEST_BEGIN(test_edata_cache_small_simple) {
 	edata_cache_t ec;
 	edata_cache_small_t ecs;
 
 	test_edata_cache_init(&ec);
 	edata_cache_small_init(&ecs, &ec);
 
-	bool err = edata_cache_small_prepare(TSDN_NULL, &ecs, 2);
-	assert_false(err, "");
-	assert_zu_eq(ecs.count, 2, "");
-	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
+	edata_t *ed1 = edata_cache_small_get(TSDN_NULL, &ecs);
+	expect_ptr_not_null(ed1, "");
+	expect_zu_eq(ecs.count, 0, "");
+	expect_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
 
-	edata_t *ed1 = edata_cache_small_get(&ecs);
-	assert_zu_eq(ecs.count, 1, "");
-	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
+	edata_t *ed2 = edata_cache_small_get(TSDN_NULL, &ecs);
+	expect_ptr_not_null(ed2, "");
+	expect_zu_eq(ecs.count, 0, "");
+	expect_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
 
-	edata_t *ed2 = edata_cache_small_get(&ecs);
-	assert_zu_eq(ecs.count, 0, "");
-	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
+	edata_cache_small_put(TSDN_NULL, &ecs, ed1);
+	expect_zu_eq(ecs.count, 1, "");
+	expect_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
 
-	edata_cache_small_put(&ecs, ed1);
-	assert_zu_eq(ecs.count, 1, "");
-	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
+	edata_cache_small_put(TSDN_NULL, &ecs, ed2);
+	expect_zu_eq(ecs.count, 2, "");
+	expect_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
 
-	edata_cache_small_put(&ecs, ed2);
-	assert_zu_eq(ecs.count, 2, "");
-	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
+	/* LIFO ordering. */
+	expect_ptr_eq(ed2, edata_cache_small_get(TSDN_NULL, &ecs), "");
+	expect_zu_eq(ecs.count, 1, "");
+	expect_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
 
-	edata_cache_small_finish(TSDN_NULL, &ecs, 1);
-	assert_zu_eq(ecs.count, 1, "");
-	assert_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 1, "");
+	expect_ptr_eq(ed1, edata_cache_small_get(TSDN_NULL, &ecs), "");
+	expect_zu_eq(ecs.count, 0, "");
+	expect_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
+
+	test_edata_cache_destroy(&ec);
+}
+TEST_END
+
+TEST_BEGIN(test_edata_cache_fill) {
+	edata_cache_t ec;
+	edata_cache_small_t ecs;
+
+	test_edata_cache_init(&ec);
+	edata_cache_small_init(&ecs, &ec);
+
+	edata_t *allocs[EDATA_CACHE_SMALL_FILL * 2];
+
+	/*
+	 * If the fallback cache can't satisfy the request, we shouldn't do
+	 * extra allocations until compelled to.  Put half the fill goal in the
+	 * fallback.
+	 */
+	for (int i = 0; i < EDATA_CACHE_SMALL_FILL / 2; i++) {
+		allocs[i] = edata_cache_get(TSDN_NULL, &ec);
+	}
+	for (int i = 0; i < EDATA_CACHE_SMALL_FILL / 2; i++) {
+		edata_cache_put(TSDN_NULL, &ec, allocs[i]);
+	}
+	expect_zu_eq(EDATA_CACHE_SMALL_FILL / 2,
+	    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
+
+	allocs[0] = edata_cache_small_get(TSDN_NULL, &ecs);
+	expect_zu_eq(EDATA_CACHE_SMALL_FILL / 2 - 1, ecs.count,
+	    "Should have grabbed all edatas available but no more.");
+
+	for (int i = 1; i < EDATA_CACHE_SMALL_FILL / 2; i++) {
+		allocs[i] = edata_cache_small_get(TSDN_NULL, &ecs);
+		expect_ptr_not_null(allocs[i], "");
+	}
+	expect_zu_eq(0, ecs.count, "");
+
+	/* When forced, we should alloc from the base. */
+	edata_t *edata = edata_cache_small_get(TSDN_NULL, &ecs);
+	expect_ptr_not_null(edata, "");
+	expect_zu_eq(0, ecs.count, "Allocated more than necessary");
+	expect_zu_eq(0, atomic_load_zu(&ec.count, ATOMIC_RELAXED),
+	    "Allocated more than necessary");
+
+	/*
+	 * We should correctly fill in the common case where the fallback isn't
+	 * exhausted, too.
+	 */
+	for (int i = 0; i < EDATA_CACHE_SMALL_FILL * 2; i++) {
+		allocs[i] = edata_cache_get(TSDN_NULL, &ec);
+		expect_ptr_not_null(allocs[i], "");
+	}
+	for (int i = 0; i < EDATA_CACHE_SMALL_FILL * 2; i++) {
+		edata_cache_put(TSDN_NULL, &ec, allocs[i]);
+	}
+
+	allocs[0] = edata_cache_small_get(TSDN_NULL, &ecs);
+	expect_zu_eq(EDATA_CACHE_SMALL_FILL - 1, ecs.count, "");
+	expect_zu_eq(EDATA_CACHE_SMALL_FILL,
+	    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
+	for (int i = 1; i < EDATA_CACHE_SMALL_FILL; i++) {
+		expect_zu_eq(EDATA_CACHE_SMALL_FILL - i, ecs.count, "");
+		expect_zu_eq(EDATA_CACHE_SMALL_FILL,
+		    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
+		allocs[i] = edata_cache_small_get(TSDN_NULL, &ecs);
+		expect_ptr_not_null(allocs[i], "");
+	}
+	expect_zu_eq(0, ecs.count, "");
+	expect_zu_eq(EDATA_CACHE_SMALL_FILL,
+	    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
+
+	allocs[0] = edata_cache_small_get(TSDN_NULL, &ecs);
+	expect_zu_eq(EDATA_CACHE_SMALL_FILL - 1, ecs.count, "");
+	expect_zu_eq(0, atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
+	for (int i = 1; i < EDATA_CACHE_SMALL_FILL; i++) {
+		expect_zu_eq(EDATA_CACHE_SMALL_FILL - i, ecs.count, "");
+		expect_zu_eq(0, atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
+		allocs[i] = edata_cache_small_get(TSDN_NULL, &ecs);
+		expect_ptr_not_null(allocs[i], "");
+	}
+	expect_zu_eq(0, ecs.count, "");
+	expect_zu_eq(0, atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
+
+	test_edata_cache_destroy(&ec);
+}
+TEST_END
+
+TEST_BEGIN(test_edata_cache_flush) {
+	edata_cache_t ec;
+	edata_cache_small_t ecs;
+
+	test_edata_cache_init(&ec);
+	edata_cache_small_init(&ecs, &ec);
+
+	edata_t *allocs[2 * EDATA_CACHE_SMALL_MAX + 2];
+	for (int i = 0; i < 2 * EDATA_CACHE_SMALL_MAX + 2; i++) {
+		allocs[i] = edata_cache_get(TSDN_NULL, &ec);
+		expect_ptr_not_null(allocs[i], "");
+	}
+	for (int i = 0; i < EDATA_CACHE_SMALL_MAX; i++) {
+		edata_cache_small_put(TSDN_NULL, &ecs, allocs[i]);
+		expect_zu_eq(i + 1, ecs.count, "");
+		expect_zu_eq(0, atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
+	}
+	edata_cache_small_put(TSDN_NULL, &ecs, allocs[EDATA_CACHE_SMALL_MAX]);
+	expect_zu_eq(0, ecs.count, "");
+	expect_zu_eq(EDATA_CACHE_SMALL_MAX + 1,
+	    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
+
+	for (int i = EDATA_CACHE_SMALL_MAX + 1;
+	    i < 2 * EDATA_CACHE_SMALL_MAX + 1; i++) {
+		edata_cache_small_put(TSDN_NULL, &ecs, allocs[i]);
+		expect_zu_eq(i - EDATA_CACHE_SMALL_MAX, ecs.count, "");
+		expect_zu_eq(EDATA_CACHE_SMALL_MAX + 1,
+		    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
+	}
+	edata_cache_small_put(TSDN_NULL, &ecs, allocs[2 * EDATA_CACHE_SMALL_MAX + 1]);
+	expect_zu_eq(0, ecs.count, "");
+	expect_zu_eq(2 * EDATA_CACHE_SMALL_MAX + 2,
+	    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
+
+	test_edata_cache_destroy(&ec);
+}
+TEST_END
+
+TEST_BEGIN(test_edata_cache_disable) {
+	edata_cache_t ec;
+	edata_cache_small_t ecs;
+
+	test_edata_cache_init(&ec);
+	edata_cache_small_init(&ecs, &ec);
+
+	for (int i = 0; i < EDATA_CACHE_SMALL_FILL; i++) {
+		edata_t *edata = edata_cache_get(TSDN_NULL, &ec);
+		expect_ptr_not_null(edata, "");
+		edata_cache_small_put(TSDN_NULL, &ecs, edata);
+	}
+
+	expect_zu_eq(EDATA_CACHE_SMALL_FILL, ecs.count, "");
+	expect_zu_eq(0, atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
+
+	edata_cache_small_disable(TSDN_NULL, &ecs);
+
+	expect_zu_eq(0, ecs.count, "");
+	expect_zu_eq(EDATA_CACHE_SMALL_FILL,
+	    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "Disabling should flush");
+
+	edata_t *edata = edata_cache_small_get(TSDN_NULL, &ecs);
+	expect_zu_eq(0, ecs.count, "");
+	expect_zu_eq(EDATA_CACHE_SMALL_FILL - 1,
+	    atomic_load_zu(&ec.count, ATOMIC_RELAXED),
+	    "Disabled ecs should forward on get");
+
+	edata_cache_small_put(TSDN_NULL, &ecs, edata);
+	expect_zu_eq(0, ecs.count, "");
+	expect_zu_eq(EDATA_CACHE_SMALL_FILL,
+	    atomic_load_zu(&ec.count, ATOMIC_RELAXED),
+	    "Disabled ecs should forward on put");
 
 	test_edata_cache_destroy(&ec);
 }
@@ -87,5 +248,8 @@ int
 main(void) {
 	return test(
 	    test_edata_cache,
-	    test_edata_cache_small);
+	    test_edata_cache_small_simple,
+	    test_edata_cache_fill,
+	    test_edata_cache_flush,
+	    test_edata_cache_disable);
 }

From 589638182ae58ae8031eac2cd9ba9d5b05783b42 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 30 Oct 2020 14:43:43 -0700
Subject: [PATCH 1907/2608] Use the edata_cache_small_t in the HPA.

---
 include/jemalloc/internal/hpa.h         | 11 ++++++++++-
 include/jemalloc/internal/hpa_central.h |  2 +-
 src/hpa.c                               | 23 ++++++++++++++++++-----
 src/hpa_central.c                       | 10 +++++-----
 src/pa.c                                |  3 ++-
 5 files changed, 36 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 24c68560..1cef6e5d 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -24,6 +24,9 @@ struct hpa_s {
 	/*
 	 * This edata cache is the global one that we use for new allocations in
 	 * growing; practically, it comes from a0.
+	 *
+	 * We don't use an edata_cache_small in front of this, since we expect a
+	 * small finite number of allocations from it.
 	 */
 	edata_cache_t *edata_cache;
 	geom_grow_t geom_grow;
@@ -50,7 +53,7 @@ struct hpa_shard_s {
 	 * from a pageslab.  The pageslab itself comes from the centralized
 	 * allocator, and so will use its edata_cache.
 	 */
-	edata_cache_t *edata_cache;
+	edata_cache_small_t ecs;
 	hpa_t *hpa;
 	psset_t psset;
 
@@ -86,6 +89,12 @@ bool hpa_init(hpa_t *hpa, base_t *base, emap_t *emap,
 bool hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa,
     edata_cache_t *edata_cache, unsigned ind, size_t ps_goal,
     size_t ps_alloc_max, size_t small_max, size_t large_min);
+/*
+ * Notify the shard that we won't use it for allocations much longer.  Due to
+ * the possibility of races, we don't actually prevent allocations; just flush
+ * and disable the embedded edata_cache_small.
+ */
+void hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard);
 void hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard);
 
 /*
diff --git a/include/jemalloc/internal/hpa_central.h b/include/jemalloc/internal/hpa_central.h
index b90ca41e..8659f712 100644
--- a/include/jemalloc/internal/hpa_central.h
+++ b/include/jemalloc/internal/hpa_central.h
@@ -9,7 +9,7 @@ struct hpa_central_s {
 	/* The emap we use for metadata operations. */
 	emap_t *emap;
 
-	edata_cache_t *edata_cache;
+	edata_cache_small_t ecs;
 	eset_t eset;
 
 	size_t sn_next;
diff --git a/src/hpa.c b/src/hpa.c
index f49aa2b0..b329dbbb 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -64,7 +64,8 @@ hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa, edata_cache_t *edata_cache,
 		return true;
 	}
 
-	shard->edata_cache = edata_cache;
+	assert(edata_cache != NULL);
+	edata_cache_small_init(&shard->ecs, edata_cache);
 	shard->hpa = hpa;
 	psset_init(&shard->psset);
 	shard->ps_goal = ps_goal;
@@ -201,13 +202,14 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 	assert(size <= shard->ps_alloc_max);
 
 	bool err;
-	edata_t *edata = edata_cache_get(tsdn, shard->edata_cache);
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	edata_t *edata = edata_cache_small_get(tsdn, &shard->ecs);
 	if (edata == NULL) {
+		malloc_mutex_unlock(tsdn, &shard->mtx);
 		return NULL;
 	}
 	edata_arena_ind_set(edata, shard->ind);
 
-	malloc_mutex_lock(tsdn, &shard->mtx);
 	err = psset_alloc_reuse(&shard->psset, edata, size);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 	if (!err) {
@@ -229,7 +231,11 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 	    shard->ps_goal);
 	if (grow_edata == NULL) {
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
-		edata_cache_put(tsdn, shard->edata_cache, edata);
+
+		malloc_mutex_lock(tsdn, &shard->mtx);
+		edata_cache_small_put(tsdn, &shard->ecs, edata);
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+
 		return NULL;
 	}
 	edata_arena_ind_set(grow_edata, shard->ind);
@@ -351,9 +357,9 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 
 		malloc_mutex_lock(tsdn, &shard->mtx);
 		edata_t *evicted_ps = psset_dalloc(&shard->psset, edata);
+		edata_cache_small_put(tsdn, &shard->ecs, edata);
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 
-		edata_cache_put(tsdn, shard->edata_cache, edata);
 
 		if (evicted_ps != NULL) {
 			/*
@@ -387,6 +393,13 @@ hpa_shard_assert_stats_empty(psset_bin_stats_t *bin_stats) {
 	assert(bin_stats->ninactive == 0);
 }
 
+void
+hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard) {
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	edata_cache_small_disable(tsdn, &shard->ecs);
+	malloc_mutex_unlock(tsdn, &shard->mtx);
+}
+
 void
 hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
 	/*
diff --git a/src/hpa_central.c b/src/hpa_central.c
index a1895c87..346d9422 100644
--- a/src/hpa_central.c
+++ b/src/hpa_central.c
@@ -7,7 +7,7 @@ void
 hpa_central_init(hpa_central_t *central, edata_cache_t *edata_cache,
     emap_t *emap) {
 	central->emap = emap;
-	central->edata_cache = edata_cache;
+	edata_cache_small_init(&central->ecs, edata_cache);
 	eset_init(&central->eset, extent_state_dirty);
 	central->sn_next = 0;
 }
@@ -19,7 +19,7 @@ hpa_central_init(hpa_central_t *central, edata_cache_t *edata_cache,
 static edata_t *
 hpa_central_split(tsdn_t *tsdn, hpa_central_t *central, edata_t *edata,
     size_t size) {
-	edata_t *trail = edata_cache_get(tsdn, central->edata_cache);
+	edata_t *trail = edata_cache_small_get(tsdn, &central->ecs);
 	if (trail == NULL) {
 		return NULL;
 	}
@@ -34,7 +34,7 @@ hpa_central_split(tsdn_t *tsdn, hpa_central_t *central, edata_t *edata,
 	bool err = emap_split_prepare(tsdn, central->emap, &prepare, edata,
 	    size, trail, cursize - size);
 	if (err) {
-		edata_cache_put(tsdn, central->edata_cache, trail);
+		edata_cache_small_put(tsdn, &central->ecs, trail);
 		return NULL;
 	}
 	emap_lock_edata2(tsdn, central->emap, edata, trail);
@@ -102,7 +102,7 @@ hpa_central_alloc_grow(tsdn_t *tsdn, hpa_central_t *central,
 	assert(edata_base_get(edata) == edata_addr_get(edata));
 	assert(edata_size_get(edata) >= size);
 	assert(edata_arena_ind_get(edata)
-	    == base_ind_get(central->edata_cache->base));
+	    == base_ind_get(central->ecs.fallback->base));
 	assert(edata_is_head_get(edata));
 	assert(edata_state_get(edata) == extent_state_active);
 	assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
@@ -173,7 +173,7 @@ hpa_central_dalloc_merge(tsdn_t *tsdn, hpa_central_t *central, edata_t *a,
 	edata_size_set(a, edata_size_get(a) + edata_size_get(b));
 	emap_merge_commit(tsdn, central->emap, &prepare, a, b);
 	emap_unlock_edata2(tsdn, central->emap, a, b);
-	edata_cache_put(tsdn, central->edata_cache, b);
+	edata_cache_small_put(tsdn, &central->ecs, b);
 }
 
 void
diff --git a/src/pa.c b/src/pa.c
index 59873c12..aee7bcd8 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -76,6 +76,7 @@ pa_shard_disable_hpa(tsdn_t *tsdn, pa_shard_t *shard) {
 	atomic_store_b(&shard->use_hpa, false, ATOMIC_RELAXED);
 	if (shard->ever_used_hpa) {
 		sec_disable(tsdn, &shard->hpa_sec);
+		hpa_shard_disable(tsdn, &shard->hpa_shard);
 	}
 }
 
@@ -89,10 +90,10 @@ pa_shard_reset(tsdn_t *tsdn, pa_shard_t *shard) {
 
 void
 pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard) {
-	sec_flush(tsdn, &shard->hpa_sec);
 	pac_destroy(tsdn, &shard->pac);
 	if (shard->ever_used_hpa) {
 		sec_flush(tsdn, &shard->hpa_sec);
+		hpa_shard_disable(tsdn, &shard->hpa_shard);
 	}
 }
 

From b3c5690b7e982c7343d22592f9a43d0e2857defe Mon Sep 17 00:00:00 2001
From: "Issam E. Maghni" <issam.e.maghni@mailbox.org>
Date: Mon, 9 Nov 2020 12:28:56 -0500
Subject: [PATCH 1908/2608] Update config.{guess,sub} to 2020-11-07@77632d9

---
 build-aux/config.guess | 1008 ++++++++------
 build-aux/config.sub   | 2946 ++++++++++++++++++++--------------------
 2 files changed, 2103 insertions(+), 1851 deletions(-)

diff --git a/build-aux/config.guess b/build-aux/config.guess
index 2e9ad7fe..0fc11edb 100755
--- a/build-aux/config.guess
+++ b/build-aux/config.guess
@@ -1,8 +1,8 @@
 #! /bin/sh
 # Attempt to guess a canonical system name.
-#   Copyright 1992-2016 Free Software Foundation, Inc.
+#   Copyright 1992-2020 Free Software Foundation, Inc.
 
-timestamp='2016-10-02'
+timestamp='2020-11-07'
 
 # This file is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
@@ -15,7 +15,7 @@ timestamp='2016-10-02'
 # General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
-# along with this program; if not, see <http://www.gnu.org/licenses/>.
+# along with this program; if not, see <https://www.gnu.org/licenses/>.
 #
 # As a special exception to the GNU General Public License, if you
 # distribute this file as part of a program that contains a
@@ -27,19 +27,19 @@ timestamp='2016-10-02'
 # Originally written by Per Bothner; maintained since 2000 by Ben Elliston.
 #
 # You can get the latest version of this script from:
-# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
+# https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
 #
 # Please send patches to <config-patches@gnu.org>.
 
 
-me=`echo "$0" | sed -e 's,.*/,,'`
+me=$(echo "$0" | sed -e 's,.*/,,')
 
 usage="\
 Usage: $0 [OPTION]
 
 Output the configuration name of the system \`$me' is run on.
 
-Operation modes:
+Options:
   -h, --help         print this help, then exit
   -t, --time-stamp   print date of last modification, then exit
   -v, --version      print version number, then exit
@@ -50,7 +50,7 @@ version="\
 GNU config.guess ($timestamp)
 
 Originally written by Per Bothner.
-Copyright 1992-2016 Free Software Foundation, Inc.
+Copyright 1992-2020 Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -84,8 +84,6 @@ if test $# != 0; then
   exit 1
 fi
 
-trap 'exit 1' 1 2 15
-
 # CC_FOR_BUILD -- compiler used by this script. Note that the use of a
 # compiler to aid in system detection is discouraged as it requires
 # temporary files to be created and, as you can see below, it is a
@@ -96,66 +94,77 @@ trap 'exit 1' 1 2 15
 
 # Portable tmp directory creation inspired by the Autoconf team.
 
-set_cc_for_build='
-trap "exitcode=\$?; (rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null) && exit \$exitcode" 0 ;
-trap "rm -f \$tmpfiles 2>/dev/null; rmdir \$tmp 2>/dev/null; exit 1" 1 2 13 15 ;
-: ${TMPDIR=/tmp} ;
- { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } ||
- { test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir $tmp) ; } ||
- { tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir $tmp) && echo "Warning: creating insecure temp directory" >&2 ; } ||
- { echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; } ;
-dummy=$tmp/dummy ;
-tmpfiles="$dummy.c $dummy.o $dummy.rel $dummy" ;
-case $CC_FOR_BUILD,$HOST_CC,$CC in
- ,,)    echo "int x;" > $dummy.c ;
-	for c in cc gcc c89 c99 ; do
-	  if ($c -c -o $dummy.o $dummy.c) >/dev/null 2>&1 ; then
-	     CC_FOR_BUILD="$c"; break ;
-	  fi ;
-	done ;
-	if test x"$CC_FOR_BUILD" = x ; then
-	  CC_FOR_BUILD=no_compiler_found ;
-	fi
-	;;
- ,,*)   CC_FOR_BUILD=$CC ;;
- ,*,*)  CC_FOR_BUILD=$HOST_CC ;;
-esac ; set_cc_for_build= ;'
+tmp=
+# shellcheck disable=SC2172
+trap 'test -z "$tmp" || rm -fr "$tmp"' 0 1 2 13 15
+
+set_cc_for_build() {
+    # prevent multiple calls if $tmp is already set
+    test "$tmp" && return 0
+    : "${TMPDIR=/tmp}"
+    # shellcheck disable=SC2039
+    { tmp=$( (umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null) && test -n "$tmp" && test -d "$tmp" ; } ||
+	{ test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir "$tmp" 2>/dev/null) ; } ||
+	{ tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir "$tmp" 2>/dev/null) && echo "Warning: creating insecure temp directory" >&2 ; } ||
+	{ echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; }
+    dummy=$tmp/dummy
+    case ${CC_FOR_BUILD-},${HOST_CC-},${CC-} in
+	,,)    echo "int x;" > "$dummy.c"
+	       for driver in cc gcc c89 c99 ; do
+		   if ($driver -c -o "$dummy.o" "$dummy.c") >/dev/null 2>&1 ; then
+		       CC_FOR_BUILD="$driver"
+		       break
+		   fi
+	       done
+	       if test x"$CC_FOR_BUILD" = x ; then
+		   CC_FOR_BUILD=no_compiler_found
+	       fi
+	       ;;
+	,,*)   CC_FOR_BUILD=$CC ;;
+	,*,*)  CC_FOR_BUILD=$HOST_CC ;;
+    esac
+}
 
 # This is needed to find uname on a Pyramid OSx when run in the BSD universe.
 # (ghazi@noc.rutgers.edu 1994-08-24)
-if (test -f /.attbin/uname) >/dev/null 2>&1 ; then
+if test -f /.attbin/uname ; then
 	PATH=$PATH:/.attbin ; export PATH
 fi
 
-UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown
-UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown
-UNAME_SYSTEM=`(uname -s) 2>/dev/null`  || UNAME_SYSTEM=unknown
-UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
+UNAME_MACHINE=$( (uname -m) 2>/dev/null) || UNAME_MACHINE=unknown
+UNAME_RELEASE=$( (uname -r) 2>/dev/null) || UNAME_RELEASE=unknown
+UNAME_SYSTEM=$( (uname -s) 2>/dev/null) || UNAME_SYSTEM=unknown
+UNAME_VERSION=$( (uname -v) 2>/dev/null) || UNAME_VERSION=unknown
 
-case "${UNAME_SYSTEM}" in
+case "$UNAME_SYSTEM" in
 Linux|GNU|GNU/*)
 	# If the system lacks a compiler, then just pick glibc.
 	# We could probably try harder.
 	LIBC=gnu
 
-	eval $set_cc_for_build
-	cat <<-EOF > $dummy.c
+	set_cc_for_build
+	cat <<-EOF > "$dummy.c"
 	#include <features.h>
 	#if defined(__UCLIBC__)
 	LIBC=uclibc
 	#elif defined(__dietlibc__)
 	LIBC=dietlibc
 	#else
+	#include <stdarg.h>
+	#ifdef __DEFINED_va_list
+	LIBC=musl
+	#else
 	LIBC=gnu
 	#endif
+	#endif
 	EOF
-	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^LIBC' | sed 's, ,,g'`
+	eval "$($CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^LIBC' | sed 's, ,,g')"
 	;;
 esac
 
 # Note: order is significant - the case branches are not exclusive.
 
-case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
+case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
     *:NetBSD:*:*)
 	# NetBSD (nbsd) targets should (where applicable) match one or
 	# more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*,
@@ -168,31 +177,32 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	# Note: NetBSD doesn't particularly care about the vendor
 	# portion of the name.  We always set it to "unknown".
 	sysctl="sysctl -n hw.machine_arch"
-	UNAME_MACHINE_ARCH=`(uname -p 2>/dev/null || \
-	    /sbin/$sysctl 2>/dev/null || \
-	    /usr/sbin/$sysctl 2>/dev/null || \
-	    echo unknown)`
-	case "${UNAME_MACHINE_ARCH}" in
+	UNAME_MACHINE_ARCH=$( (uname -p 2>/dev/null || \
+	    "/sbin/$sysctl" 2>/dev/null || \
+	    "/usr/sbin/$sysctl" 2>/dev/null || \
+	    echo unknown))
+	case "$UNAME_MACHINE_ARCH" in
+	    aarch64eb) machine=aarch64_be-unknown ;;
 	    armeb) machine=armeb-unknown ;;
 	    arm*) machine=arm-unknown ;;
 	    sh3el) machine=shl-unknown ;;
 	    sh3eb) machine=sh-unknown ;;
 	    sh5el) machine=sh5le-unknown ;;
 	    earmv*)
-		arch=`echo ${UNAME_MACHINE_ARCH} | sed -e 's,^e\(armv[0-9]\).*$,\1,'`
-		endian=`echo ${UNAME_MACHINE_ARCH} | sed -ne 's,^.*\(eb\)$,\1,p'`
-		machine=${arch}${endian}-unknown
+		arch=$(echo "$UNAME_MACHINE_ARCH" | sed -e 's,^e\(armv[0-9]\).*$,\1,')
+		endian=$(echo "$UNAME_MACHINE_ARCH" | sed -ne 's,^.*\(eb\)$,\1,p')
+		machine="${arch}${endian}"-unknown
 		;;
-	    *) machine=${UNAME_MACHINE_ARCH}-unknown ;;
+	    *) machine="$UNAME_MACHINE_ARCH"-unknown ;;
 	esac
 	# The Operating System including object format, if it has switched
 	# to ELF recently (or will in the future) and ABI.
-	case "${UNAME_MACHINE_ARCH}" in
+	case "$UNAME_MACHINE_ARCH" in
 	    earm*)
 		os=netbsdelf
 		;;
 	    arm*|i386|m68k|ns32k|sh3*|sparc|vax)
-		eval $set_cc_for_build
+		set_cc_for_build
 		if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
 			| grep -q __ELF__
 		then
@@ -208,10 +218,10 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 		;;
 	esac
 	# Determine ABI tags.
-	case "${UNAME_MACHINE_ARCH}" in
+	case "$UNAME_MACHINE_ARCH" in
 	    earm*)
 		expr='s/^earmv[0-9]/-eabi/;s/eb$//'
-		abi=`echo ${UNAME_MACHINE_ARCH} | sed -e "$expr"`
+		abi=$(echo "$UNAME_MACHINE_ARCH" | sed -e "$expr")
 		;;
 	esac
 	# The OS release
@@ -219,60 +229,75 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	# thus, need a distinct triplet. However, they do not need
 	# kernel version information, so it can be replaced with a
 	# suitable tag, in the style of linux-gnu.
-	case "${UNAME_VERSION}" in
+	case "$UNAME_VERSION" in
 	    Debian*)
 		release='-gnu'
 		;;
 	    *)
-		release=`echo ${UNAME_RELEASE} | sed -e 's/[-_].*//' | cut -d. -f1,2`
+		release=$(echo "$UNAME_RELEASE" | sed -e 's/[-_].*//' | cut -d. -f1,2)
 		;;
 	esac
 	# Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM:
 	# contains redundant information, the shorter form:
 	# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
-	echo "${machine}-${os}${release}${abi}"
+	echo "$machine-${os}${release}${abi-}"
 	exit ;;
     *:Bitrig:*:*)
-	UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'`
-	echo ${UNAME_MACHINE_ARCH}-unknown-bitrig${UNAME_RELEASE}
+	UNAME_MACHINE_ARCH=$(arch | sed 's/Bitrig.//')
+	echo "$UNAME_MACHINE_ARCH"-unknown-bitrig"$UNAME_RELEASE"
 	exit ;;
     *:OpenBSD:*:*)
-	UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'`
-	echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE}
+	UNAME_MACHINE_ARCH=$(arch | sed 's/OpenBSD.//')
+	echo "$UNAME_MACHINE_ARCH"-unknown-openbsd"$UNAME_RELEASE"
 	exit ;;
     *:LibertyBSD:*:*)
-	UNAME_MACHINE_ARCH=`arch | sed 's/^.*BSD\.//'`
-	echo ${UNAME_MACHINE_ARCH}-unknown-libertybsd${UNAME_RELEASE}
+	UNAME_MACHINE_ARCH=$(arch | sed 's/^.*BSD\.//')
+	echo "$UNAME_MACHINE_ARCH"-unknown-libertybsd"$UNAME_RELEASE"
+	exit ;;
+    *:MidnightBSD:*:*)
+	echo "$UNAME_MACHINE"-unknown-midnightbsd"$UNAME_RELEASE"
 	exit ;;
     *:ekkoBSD:*:*)
-	echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE}
+	echo "$UNAME_MACHINE"-unknown-ekkobsd"$UNAME_RELEASE"
 	exit ;;
     *:SolidBSD:*:*)
-	echo ${UNAME_MACHINE}-unknown-solidbsd${UNAME_RELEASE}
+	echo "$UNAME_MACHINE"-unknown-solidbsd"$UNAME_RELEASE"
+	exit ;;
+    *:OS108:*:*)
+	echo "$UNAME_MACHINE"-unknown-os108_"$UNAME_RELEASE"
 	exit ;;
     macppc:MirBSD:*:*)
-	echo powerpc-unknown-mirbsd${UNAME_RELEASE}
+	echo powerpc-unknown-mirbsd"$UNAME_RELEASE"
 	exit ;;
     *:MirBSD:*:*)
-	echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE}
+	echo "$UNAME_MACHINE"-unknown-mirbsd"$UNAME_RELEASE"
 	exit ;;
     *:Sortix:*:*)
-	echo ${UNAME_MACHINE}-unknown-sortix
+	echo "$UNAME_MACHINE"-unknown-sortix
+	exit ;;
+    *:Twizzler:*:*)
+	echo "$UNAME_MACHINE"-unknown-twizzler
+	exit ;;
+    *:Redox:*:*)
+	echo "$UNAME_MACHINE"-unknown-redox
+	exit ;;
+    mips:OSF1:*.*)
+	echo mips-dec-osf1
 	exit ;;
     alpha:OSF1:*:*)
 	case $UNAME_RELEASE in
 	*4.0)
-		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
+		UNAME_RELEASE=$(/usr/sbin/sizer -v | awk '{print $3}')
 		;;
 	*5.*)
-		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
+		UNAME_RELEASE=$(/usr/sbin/sizer -v | awk '{print $4}')
 		;;
 	esac
 	# According to Compaq, /usr/sbin/psrinfo has been available on
 	# OSF/1 and Tru64 systems produced since 1995.  I hope that
 	# covers most systems running today.  This code pipes the CPU
 	# types through head -n 1, so we only detect the type of CPU 0.
-	ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^  The alpha \(.*\) processor.*$/\1/p' | head -n 1`
+	ALPHA_CPU_TYPE=$(/usr/sbin/psrinfo -v | sed -n -e 's/^  The alpha \(.*\) processor.*$/\1/p' | head -n 1)
 	case "$ALPHA_CPU_TYPE" in
 	    "EV4 (21064)")
 		UNAME_MACHINE=alpha ;;
@@ -310,28 +335,19 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	# A Tn.n version is a released field test version.
 	# A Xn.n version is an unreleased experimental baselevel.
 	# 1.2 uses "1.2" for uname -r.
-	echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`
+	echo "$UNAME_MACHINE"-dec-osf"$(echo "$UNAME_RELEASE" | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz)"
 	# Reset EXIT trap before exiting to avoid spurious non-zero exit code.
 	exitcode=$?
 	trap '' 0
 	exit $exitcode ;;
-    Alpha\ *:Windows_NT*:*)
-	# How do we know it's Interix rather than the generic POSIX subsystem?
-	# Should we change UNAME_MACHINE based on the output of uname instead
-	# of the specific Alpha model?
-	echo alpha-pc-interix
-	exit ;;
-    21064:Windows_NT:50:3)
-	echo alpha-dec-winnt3.5
-	exit ;;
     Amiga*:UNIX_System_V:4.0:*)
 	echo m68k-unknown-sysv4
 	exit ;;
     *:[Aa]miga[Oo][Ss]:*:*)
-	echo ${UNAME_MACHINE}-unknown-amigaos
+	echo "$UNAME_MACHINE"-unknown-amigaos
 	exit ;;
     *:[Mm]orph[Oo][Ss]:*:*)
-	echo ${UNAME_MACHINE}-unknown-morphos
+	echo "$UNAME_MACHINE"-unknown-morphos
 	exit ;;
     *:OS/390:*:*)
 	echo i370-ibm-openedition
@@ -343,7 +359,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	echo powerpc-ibm-os400
 	exit ;;
     arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
-	echo arm-acorn-riscix${UNAME_RELEASE}
+	echo arm-acorn-riscix"$UNAME_RELEASE"
 	exit ;;
     arm*:riscos:*:*|arm*:RISCOS:*:*)
 	echo arm-unknown-riscos
@@ -353,7 +369,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	exit ;;
     Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*)
 	# akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE.
-	if test "`(/bin/universe) 2>/dev/null`" = att ; then
+	if test "$( (/bin/universe) 2>/dev/null)" = att ; then
 		echo pyramid-pyramid-sysv3
 	else
 		echo pyramid-pyramid-bsd
@@ -366,28 +382,28 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 	echo sparc-icl-nx6
 	exit ;;
     DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*)
-	case `/usr/bin/uname -p` in
+	case $(/usr/bin/uname -p) in
 	    sparc) echo sparc-icl-nx7; exit ;;
 	esac ;;
     s390x:SunOS:*:*)
-	echo ${UNAME_MACHINE}-ibm-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	echo "$UNAME_MACHINE"-ibm-solaris2"$(echo "$UNAME_RELEASE" | sed -e 's/[^.]*//')"
 	exit ;;
     sun4H:SunOS:5.*:*)
-	echo sparc-hal-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	echo sparc-hal-solaris2"$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*//')"
 	exit ;;
     sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
-	echo sparc-sun-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	echo sparc-sun-solaris2"$(echo "$UNAME_RELEASE" | sed -e 's/[^.]*//')"
 	exit ;;
     i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*)
-	echo i386-pc-auroraux${UNAME_RELEASE}
+	echo i386-pc-auroraux"$UNAME_RELEASE"
 	exit ;;
     i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
-	eval $set_cc_for_build
+	set_cc_for_build
 	SUN_ARCH=i386
 	# If there is a compiler, see if it is configured for 64-bit objects.
 	# Note that the Sun cc does not turn __LP64__ into 1 like gcc does.
 	# This test works for both compilers.
-	if [ "$CC_FOR_BUILD" != no_compiler_found ]; then
+	if test "$CC_FOR_BUILD" != no_compiler_found; then
 	    if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \
 		(CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
 		grep IS_64BIT_ARCH >/dev/null
@@ -395,40 +411,40 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 		SUN_ARCH=x86_64
 	    fi
 	fi
-	echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	echo "$SUN_ARCH"-pc-solaris2"$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*//')"
 	exit ;;
     sun4*:SunOS:6*:*)
 	# According to config.sub, this is the proper way to canonicalize
 	# SunOS6.  Hard to guess exactly what SunOS6 will be like, but
 	# it's likely to be more like Solaris than SunOS4.
-	echo sparc-sun-solaris3`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	echo sparc-sun-solaris3"$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*//')"
 	exit ;;
     sun4*:SunOS:*:*)
-	case "`/usr/bin/arch -k`" in
+	case "$(/usr/bin/arch -k)" in
 	    Series*|S4*)
-		UNAME_RELEASE=`uname -v`
+		UNAME_RELEASE=$(uname -v)
 		;;
 	esac
 	# Japanese Language versions have a version number like `4.1.3-JL'.
-	echo sparc-sun-sunos`echo ${UNAME_RELEASE}|sed -e 's/-/_/'`
+	echo sparc-sun-sunos"$(echo "$UNAME_RELEASE"|sed -e 's/-/_/')"
 	exit ;;
     sun3*:SunOS:*:*)
-	echo m68k-sun-sunos${UNAME_RELEASE}
+	echo m68k-sun-sunos"$UNAME_RELEASE"
 	exit ;;
     sun*:*:4.2BSD:*)
-	UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null`
-	test "x${UNAME_RELEASE}" = x && UNAME_RELEASE=3
-	case "`/bin/arch`" in
+	UNAME_RELEASE=$( (sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null)
+	test "x$UNAME_RELEASE" = x && UNAME_RELEASE=3
+	case "$(/bin/arch)" in
 	    sun3)
-		echo m68k-sun-sunos${UNAME_RELEASE}
+		echo m68k-sun-sunos"$UNAME_RELEASE"
 		;;
 	    sun4)
-		echo sparc-sun-sunos${UNAME_RELEASE}
+		echo sparc-sun-sunos"$UNAME_RELEASE"
 		;;
 	esac
 	exit ;;
     aushp:SunOS:*:*)
-	echo sparc-auspex-sunos${UNAME_RELEASE}
+	echo sparc-auspex-sunos"$UNAME_RELEASE"
 	exit ;;
     # The situation for MiNT is a little confusing.  The machine name
     # can be virtually everything (everything which is not
@@ -439,44 +455,44 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
     # MiNT.  But MiNT is downward compatible to TOS, so this should
     # be no problem.
     atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
-	echo m68k-atari-mint${UNAME_RELEASE}
+	echo m68k-atari-mint"$UNAME_RELEASE"
 	exit ;;
     atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
-	echo m68k-atari-mint${UNAME_RELEASE}
+	echo m68k-atari-mint"$UNAME_RELEASE"
 	exit ;;
     *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
-	echo m68k-atari-mint${UNAME_RELEASE}
+	echo m68k-atari-mint"$UNAME_RELEASE"
 	exit ;;
     milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
-	echo m68k-milan-mint${UNAME_RELEASE}
+	echo m68k-milan-mint"$UNAME_RELEASE"
 	exit ;;
     hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
-	echo m68k-hades-mint${UNAME_RELEASE}
+	echo m68k-hades-mint"$UNAME_RELEASE"
 	exit ;;
     *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
-	echo m68k-unknown-mint${UNAME_RELEASE}
+	echo m68k-unknown-mint"$UNAME_RELEASE"
 	exit ;;
     m68k:machten:*:*)
-	echo m68k-apple-machten${UNAME_RELEASE}
+	echo m68k-apple-machten"$UNAME_RELEASE"
 	exit ;;
     powerpc:machten:*:*)
-	echo powerpc-apple-machten${UNAME_RELEASE}
+	echo powerpc-apple-machten"$UNAME_RELEASE"
 	exit ;;
     RISC*:Mach:*:*)
 	echo mips-dec-mach_bsd4.3
 	exit ;;
     RISC*:ULTRIX:*:*)
-	echo mips-dec-ultrix${UNAME_RELEASE}
+	echo mips-dec-ultrix"$UNAME_RELEASE"
 	exit ;;
     VAX*:ULTRIX*:*:*)
-	echo vax-dec-ultrix${UNAME_RELEASE}
+	echo vax-dec-ultrix"$UNAME_RELEASE"
 	exit ;;
     2020:CLIX:*:* | 2430:CLIX:*:*)
-	echo clipper-intergraph-clix${UNAME_RELEASE}
+	echo clipper-intergraph-clix"$UNAME_RELEASE"
 	exit ;;
     mips:*:*:UMIPS | mips:*:*:RISCos)
-	eval $set_cc_for_build
-	sed 's/^	//' << EOF >$dummy.c
+	set_cc_for_build
+	sed 's/^	//' << EOF > "$dummy.c"
 #ifdef __cplusplus
 #include <stdio.h>  /* for printf() prototype */
 	int main (int argc, char *argv[]) {
@@ -485,23 +501,23 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
 #endif
 	#if defined (host_mips) && defined (MIPSEB)
 	#if defined (SYSTYPE_SYSV)
-	  printf ("mips-mips-riscos%ssysv\n", argv[1]); exit (0);
+	  printf ("mips-mips-riscos%ssysv\\n", argv[1]); exit (0);
 	#endif
 	#if defined (SYSTYPE_SVR4)
-	  printf ("mips-mips-riscos%ssvr4\n", argv[1]); exit (0);
+	  printf ("mips-mips-riscos%ssvr4\\n", argv[1]); exit (0);
 	#endif
 	#if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD)
-	  printf ("mips-mips-riscos%sbsd\n", argv[1]); exit (0);
+	  printf ("mips-mips-riscos%sbsd\\n", argv[1]); exit (0);
 	#endif
 	#endif
 	  exit (-1);
 	}
 EOF
-	$CC_FOR_BUILD -o $dummy $dummy.c &&
-	  dummyarg=`echo "${UNAME_RELEASE}" | sed -n 's/\([0-9]*\).*/\1/p'` &&
-	  SYSTEM_NAME=`$dummy $dummyarg` &&
+	$CC_FOR_BUILD -o "$dummy" "$dummy.c" &&
+	  dummyarg=$(echo "$UNAME_RELEASE" | sed -n 's/\([0-9]*\).*/\1/p') &&
+	  SYSTEM_NAME=$("$dummy" "$dummyarg") &&
 	    { echo "$SYSTEM_NAME"; exit; }
-	echo mips-mips-riscos${UNAME_RELEASE}
+	echo mips-mips-riscos"$UNAME_RELEASE"
 	exit ;;
     Motorola:PowerMAX_OS:*:*)
 	echo powerpc-motorola-powermax
@@ -526,18 +542,18 @@ EOF
 	exit ;;
     AViiON:dgux:*:*)
 	# DG/UX returns AViiON for all architectures
-	UNAME_PROCESSOR=`/usr/bin/uname -p`
-	if [ $UNAME_PROCESSOR = mc88100 ] || [ $UNAME_PROCESSOR = mc88110 ]
+	UNAME_PROCESSOR=$(/usr/bin/uname -p)
+	if test "$UNAME_PROCESSOR" = mc88100 || test "$UNAME_PROCESSOR" = mc88110
 	then
-	    if [ ${TARGET_BINARY_INTERFACE}x = m88kdguxelfx ] || \
-	       [ ${TARGET_BINARY_INTERFACE}x = x ]
+	    if test "$TARGET_BINARY_INTERFACE"x = m88kdguxelfx || \
+	       test "$TARGET_BINARY_INTERFACE"x = x
 	    then
-		echo m88k-dg-dgux${UNAME_RELEASE}
+		echo m88k-dg-dgux"$UNAME_RELEASE"
 	    else
-		echo m88k-dg-dguxbcs${UNAME_RELEASE}
+		echo m88k-dg-dguxbcs"$UNAME_RELEASE"
 	    fi
 	else
-	    echo i586-dg-dgux${UNAME_RELEASE}
+	    echo i586-dg-dgux"$UNAME_RELEASE"
 	fi
 	exit ;;
     M88*:DolphinOS:*:*)	# DolphinOS (SVR3)
@@ -554,26 +570,26 @@ EOF
 	echo m68k-tektronix-bsd
 	exit ;;
     *:IRIX*:*:*)
-	echo mips-sgi-irix`echo ${UNAME_RELEASE}|sed -e 's/-/_/g'`
+	echo mips-sgi-irix"$(echo "$UNAME_RELEASE"|sed -e 's/-/_/g')"
 	exit ;;
     ????????:AIX?:[12].1:2)   # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX.
 	echo romp-ibm-aix     # uname -m gives an 8 hex-code CPU id
-	exit ;;               # Note that: echo "'`uname -s`'" gives 'AIX '
+	exit ;;               # Note that: echo "'$(uname -s)'" gives 'AIX '
     i*86:AIX:*:*)
 	echo i386-ibm-aix
 	exit ;;
     ia64:AIX:*:*)
-	if [ -x /usr/bin/oslevel ] ; then
-		IBM_REV=`/usr/bin/oslevel`
+	if test -x /usr/bin/oslevel ; then
+		IBM_REV=$(/usr/bin/oslevel)
 	else
-		IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
+		IBM_REV="$UNAME_VERSION.$UNAME_RELEASE"
 	fi
-	echo ${UNAME_MACHINE}-ibm-aix${IBM_REV}
+	echo "$UNAME_MACHINE"-ibm-aix"$IBM_REV"
 	exit ;;
     *:AIX:2:3)
 	if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then
-		eval $set_cc_for_build
-		sed 's/^		//' << EOF >$dummy.c
+		set_cc_for_build
+		sed 's/^		//' << EOF > "$dummy.c"
 		#include <sys/systemcfg.h>
 
 		main()
@@ -584,7 +600,7 @@ EOF
 			exit(0);
 			}
 EOF
-		if $CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy`
+		if $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=$("$dummy")
 		then
 			echo "$SYSTEM_NAME"
 		else
@@ -597,28 +613,28 @@ EOF
 	fi
 	exit ;;
     *:AIX:*:[4567])
-	IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
-	if /usr/sbin/lsattr -El ${IBM_CPU_ID} | grep ' POWER' >/dev/null 2>&1; then
+	IBM_CPU_ID=$(/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }')
+	if /usr/sbin/lsattr -El "$IBM_CPU_ID" | grep ' POWER' >/dev/null 2>&1; then
 		IBM_ARCH=rs6000
 	else
 		IBM_ARCH=powerpc
 	fi
-	if [ -x /usr/bin/lslpp ] ; then
-		IBM_REV=`/usr/bin/lslpp -Lqc bos.rte.libc |
-			   awk -F: '{ print $3 }' | sed s/[0-9]*$/0/`
+	if test -x /usr/bin/lslpp ; then
+		IBM_REV=$(/usr/bin/lslpp -Lqc bos.rte.libc |
+			   awk -F: '{ print $3 }' | sed s/[0-9]*$/0/)
 	else
-		IBM_REV=${UNAME_VERSION}.${UNAME_RELEASE}
+		IBM_REV="$UNAME_VERSION.$UNAME_RELEASE"
 	fi
-	echo ${IBM_ARCH}-ibm-aix${IBM_REV}
+	echo "$IBM_ARCH"-ibm-aix"$IBM_REV"
 	exit ;;
     *:AIX:*:*)
 	echo rs6000-ibm-aix
 	exit ;;
-    ibmrt:4.4BSD:*|romp-ibm:BSD:*)
+    ibmrt:4.4BSD:*|romp-ibm:4.4BSD:*)
 	echo romp-ibm-bsd4.4
 	exit ;;
     ibmrt:*BSD:*|romp-ibm:BSD:*)            # covers RT/PC BSD and
-	echo romp-ibm-bsd${UNAME_RELEASE}   # 4.3 with uname added to
+	echo romp-ibm-bsd"$UNAME_RELEASE"   # 4.3 with uname added to
 	exit ;;                             # report: romp-ibm BSD 4.3
     *:BOSX:*:*)
 	echo rs6000-bull-bosx
@@ -633,28 +649,28 @@ EOF
 	echo m68k-hp-bsd4.4
 	exit ;;
     9000/[34678]??:HP-UX:*:*)
-	HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
-	case "${UNAME_MACHINE}" in
-	    9000/31? )            HP_ARCH=m68000 ;;
-	    9000/[34]?? )         HP_ARCH=m68k ;;
+	HPUX_REV=$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*.[0B]*//')
+	case "$UNAME_MACHINE" in
+	    9000/31?)            HP_ARCH=m68000 ;;
+	    9000/[34]??)         HP_ARCH=m68k ;;
 	    9000/[678][0-9][0-9])
-		if [ -x /usr/bin/getconf ]; then
-		    sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
-		    sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
-		    case "${sc_cpu_version}" in
+		if test -x /usr/bin/getconf; then
+		    sc_cpu_version=$(/usr/bin/getconf SC_CPU_VERSION 2>/dev/null)
+		    sc_kernel_bits=$(/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null)
+		    case "$sc_cpu_version" in
 		      523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0
 		      528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1
 		      532)                      # CPU_PA_RISC2_0
-			case "${sc_kernel_bits}" in
+			case "$sc_kernel_bits" in
 			  32) HP_ARCH=hppa2.0n ;;
 			  64) HP_ARCH=hppa2.0w ;;
 			  '') HP_ARCH=hppa2.0 ;;   # HP-UX 10.20
 			esac ;;
 		    esac
 		fi
-		if [ "${HP_ARCH}" = "" ]; then
-		    eval $set_cc_for_build
-		    sed 's/^		//' << EOF >$dummy.c
+		if test "$HP_ARCH" = ""; then
+		    set_cc_for_build
+		    sed 's/^		//' << EOF > "$dummy.c"
 
 		#define _HPUX_SOURCE
 		#include <stdlib.h>
@@ -687,13 +703,13 @@ EOF
 		    exit (0);
 		}
 EOF
-		    (CCOPTS="" $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy`
+		    (CCOPTS="" $CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null) && HP_ARCH=$("$dummy")
 		    test -z "$HP_ARCH" && HP_ARCH=hppa
 		fi ;;
 	esac
-	if [ ${HP_ARCH} = hppa2.0w ]
+	if test "$HP_ARCH" = hppa2.0w
 	then
-	    eval $set_cc_for_build
+	    set_cc_for_build
 
 	    # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating
 	    # 32-bit code.  hppa64-hp-hpux* has the same kernel and a compiler
@@ -712,15 +728,15 @@ EOF
 		HP_ARCH=hppa64
 	    fi
 	fi
-	echo ${HP_ARCH}-hp-hpux${HPUX_REV}
+	echo "$HP_ARCH"-hp-hpux"$HPUX_REV"
 	exit ;;
     ia64:HP-UX:*:*)
-	HPUX_REV=`echo ${UNAME_RELEASE}|sed -e 's/[^.]*.[0B]*//'`
-	echo ia64-hp-hpux${HPUX_REV}
+	HPUX_REV=$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*.[0B]*//')
+	echo ia64-hp-hpux"$HPUX_REV"
 	exit ;;
     3050*:HI-UX:*:*)
-	eval $set_cc_for_build
-	sed 's/^	//' << EOF >$dummy.c
+	set_cc_for_build
+	sed 's/^	//' << EOF > "$dummy.c"
 	#include <unistd.h>
 	int
 	main ()
@@ -745,11 +761,11 @@ EOF
 	  exit (0);
 	}
 EOF
-	$CC_FOR_BUILD -o $dummy $dummy.c && SYSTEM_NAME=`$dummy` &&
+	$CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=$("$dummy") &&
 		{ echo "$SYSTEM_NAME"; exit; }
 	echo unknown-hitachi-hiuxwe2
 	exit ;;
-    9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:* )
+    9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:*)
 	echo hppa1.1-hp-bsd
 	exit ;;
     9000/8??:4.3bsd:*:*)
@@ -758,17 +774,17 @@ EOF
     *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*)
 	echo hppa1.0-hp-mpeix
 	exit ;;
-    hp7??:OSF1:*:* | hp8?[79]:OSF1:*:* )
+    hp7??:OSF1:*:* | hp8?[79]:OSF1:*:*)
 	echo hppa1.1-hp-osf
 	exit ;;
     hp8??:OSF1:*:*)
 	echo hppa1.0-hp-osf
 	exit ;;
     i*86:OSF1:*:*)
-	if [ -x /usr/sbin/sysversion ] ; then
-	    echo ${UNAME_MACHINE}-unknown-osf1mk
+	if test -x /usr/sbin/sysversion ; then
+	    echo "$UNAME_MACHINE"-unknown-osf1mk
 	else
-	    echo ${UNAME_MACHINE}-unknown-osf1
+	    echo "$UNAME_MACHINE"-unknown-osf1
 	fi
 	exit ;;
     parisc*:Lites*:*:*)
@@ -793,130 +809,123 @@ EOF
 	echo c4-convex-bsd
 	exit ;;
     CRAY*Y-MP:*:*:*)
-	echo ymp-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+	echo ymp-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
 	exit ;;
     CRAY*[A-Z]90:*:*:*)
-	echo ${UNAME_MACHINE}-cray-unicos${UNAME_RELEASE} \
+	echo "$UNAME_MACHINE"-cray-unicos"$UNAME_RELEASE" \
 	| sed -e 's/CRAY.*\([A-Z]90\)/\1/' \
 	      -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \
 	      -e 's/\.[^.]*$/.X/'
 	exit ;;
     CRAY*TS:*:*:*)
-	echo t90-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+	echo t90-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
 	exit ;;
     CRAY*T3E:*:*:*)
-	echo alphaev5-cray-unicosmk${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+	echo alphaev5-cray-unicosmk"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
 	exit ;;
     CRAY*SV1:*:*:*)
-	echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+	echo sv1-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
 	exit ;;
     *:UNICOS/mp:*:*)
-	echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/'
+	echo craynv-cray-unicosmp"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
 	exit ;;
     F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
-	FUJITSU_PROC=`uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`
-	FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'`
-	FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'`
+	FUJITSU_PROC=$(uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz)
+	FUJITSU_SYS=$(uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///')
+	FUJITSU_REL=$(echo "$UNAME_RELEASE" | sed -e 's/ /_/')
 	echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
 	exit ;;
     5000:UNIX_System_V:4.*:*)
-	FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'`
-	FUJITSU_REL=`echo ${UNAME_RELEASE} | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/'`
+	FUJITSU_SYS=$(uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///')
+	FUJITSU_REL=$(echo "$UNAME_RELEASE" | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/')
 	echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
 	exit ;;
     i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
-	echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE}
+	echo "$UNAME_MACHINE"-pc-bsdi"$UNAME_RELEASE"
 	exit ;;
     sparc*:BSD/OS:*:*)
-	echo sparc-unknown-bsdi${UNAME_RELEASE}
+	echo sparc-unknown-bsdi"$UNAME_RELEASE"
 	exit ;;
     *:BSD/OS:*:*)
-	echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE}
+	echo "$UNAME_MACHINE"-unknown-bsdi"$UNAME_RELEASE"
+	exit ;;
+    arm:FreeBSD:*:*)
+	UNAME_PROCESSOR=$(uname -p)
+	set_cc_for_build
+	if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
+	    | grep -q __ARM_PCS_VFP
+	then
+	    echo "${UNAME_PROCESSOR}"-unknown-freebsd"$(echo ${UNAME_RELEASE}|sed -e 's/[-(].*//')"-gnueabi
+	else
+	    echo "${UNAME_PROCESSOR}"-unknown-freebsd"$(echo ${UNAME_RELEASE}|sed -e 's/[-(].*//')"-gnueabihf
+	fi
 	exit ;;
     *:FreeBSD:*:*)
-	UNAME_PROCESSOR=`/usr/bin/uname -p`
-	case ${UNAME_PROCESSOR} in
+	UNAME_PROCESSOR=$(/usr/bin/uname -p)
+	case "$UNAME_PROCESSOR" in
 	    amd64)
-		echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
-	    *)
-		echo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+		UNAME_PROCESSOR=x86_64 ;;
+	    i386)
+		UNAME_PROCESSOR=i586 ;;
 	esac
+	echo "$UNAME_PROCESSOR"-unknown-freebsd"$(echo "$UNAME_RELEASE"|sed -e 's/[-(].*//')"
 	exit ;;
     i*:CYGWIN*:*)
-	echo ${UNAME_MACHINE}-pc-cygwin
+	echo "$UNAME_MACHINE"-pc-cygwin
 	exit ;;
     *:MINGW64*:*)
-	echo ${UNAME_MACHINE}-pc-mingw64
+	echo "$UNAME_MACHINE"-pc-mingw64
 	exit ;;
     *:MINGW*:*)
-	echo ${UNAME_MACHINE}-pc-mingw32
+	echo "$UNAME_MACHINE"-pc-mingw32
 	exit ;;
     *:MSYS*:*)
-	echo ${UNAME_MACHINE}-pc-msys
-	exit ;;
-    i*:windows32*:*)
-	# uname -m includes "-pc" on this system.
-	echo ${UNAME_MACHINE}-mingw32
+	echo "$UNAME_MACHINE"-pc-msys
 	exit ;;
     i*:PW*:*)
-	echo ${UNAME_MACHINE}-pc-pw32
+	echo "$UNAME_MACHINE"-pc-pw32
 	exit ;;
     *:Interix*:*)
-	case ${UNAME_MACHINE} in
+	case "$UNAME_MACHINE" in
 	    x86)
-		echo i586-pc-interix${UNAME_RELEASE}
+		echo i586-pc-interix"$UNAME_RELEASE"
 		exit ;;
 	    authenticamd | genuineintel | EM64T)
-		echo x86_64-unknown-interix${UNAME_RELEASE}
+		echo x86_64-unknown-interix"$UNAME_RELEASE"
 		exit ;;
 	    IA64)
-		echo ia64-unknown-interix${UNAME_RELEASE}
+		echo ia64-unknown-interix"$UNAME_RELEASE"
 		exit ;;
 	esac ;;
-    [345]86:Windows_95:* | [345]86:Windows_98:* | [345]86:Windows_NT:*)
-	echo i${UNAME_MACHINE}-pc-mks
-	exit ;;
-    8664:Windows_NT:*)
-	echo x86_64-pc-mks
-	exit ;;
-    i*:Windows_NT*:* | Pentium*:Windows_NT*:*)
-	# How do we know it's Interix rather than the generic POSIX subsystem?
-	# It also conflicts with pre-2.0 versions of AT&T UWIN. Should we
-	# UNAME_MACHINE based on the output of uname instead of i386?
-	echo i586-pc-interix
-	exit ;;
     i*:UWIN*:*)
-	echo ${UNAME_MACHINE}-pc-uwin
+	echo "$UNAME_MACHINE"-pc-uwin
 	exit ;;
     amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*)
-	echo x86_64-unknown-cygwin
-	exit ;;
-    p*:CYGWIN*:*)
-	echo powerpcle-unknown-cygwin
+	echo x86_64-pc-cygwin
 	exit ;;
     prep*:SunOS:5.*:*)
-	echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'`
+	echo powerpcle-unknown-solaris2"$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*//')"
 	exit ;;
     *:GNU:*:*)
 	# the GNU system
-	echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-${LIBC}`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'`
+	echo "$(echo "$UNAME_MACHINE"|sed -e 's,[-/].*$,,')-unknown-$LIBC$(echo "$UNAME_RELEASE"|sed -e 's,/.*$,,')"
 	exit ;;
     *:GNU/*:*:*)
 	# other systems with GNU libc and userland
-	echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC}
+	echo "$UNAME_MACHINE-unknown-$(echo "$UNAME_SYSTEM" | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]")$(echo "$UNAME_RELEASE"|sed -e 's/[-(].*//')-$LIBC"
 	exit ;;
-    i*86:Minix:*:*)
-	echo ${UNAME_MACHINE}-pc-minix
+    *:Minix:*:*)
+	echo "$UNAME_MACHINE"-unknown-minix
 	exit ;;
     aarch64:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     aarch64_be:Linux:*:*)
 	UNAME_MACHINE=aarch64_be
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     alpha:Linux:*:*)
-	case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
+	case $(sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' /proc/cpuinfo 2>/dev/null) in
 	  EV5)   UNAME_MACHINE=alphaev5 ;;
 	  EV56)  UNAME_MACHINE=alphaev56 ;;
 	  PCA56) UNAME_MACHINE=alphapca56 ;;
@@ -927,140 +936,178 @@ EOF
 	esac
 	objdump --private-headers /bin/sh | grep -q ld.so.1
 	if test "$?" = 0 ; then LIBC=gnulibc1 ; fi
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     arc:Linux:*:* | arceb:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     arm*:Linux:*:*)
-	eval $set_cc_for_build
+	set_cc_for_build
 	if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
 	    | grep -q __ARM_EABI__
 	then
-	    echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	    echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	else
 	    if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
 		| grep -q __ARM_PCS_VFP
 	    then
-		echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabi
+		echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"eabi
 	    else
-		echo ${UNAME_MACHINE}-unknown-linux-${LIBC}eabihf
+		echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"eabihf
 	    fi
 	fi
 	exit ;;
     avr32*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     cris:Linux:*:*)
-	echo ${UNAME_MACHINE}-axis-linux-${LIBC}
+	echo "$UNAME_MACHINE"-axis-linux-"$LIBC"
 	exit ;;
     crisv32:Linux:*:*)
-	echo ${UNAME_MACHINE}-axis-linux-${LIBC}
+	echo "$UNAME_MACHINE"-axis-linux-"$LIBC"
 	exit ;;
     e2k:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     frv:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     hexagon:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     i*86:Linux:*:*)
-	echo ${UNAME_MACHINE}-pc-linux-${LIBC}
+	echo "$UNAME_MACHINE"-pc-linux-"$LIBC"
 	exit ;;
     ia64:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     k1om:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     m32r*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     m68*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     mips:Linux:*:* | mips64:Linux:*:*)
-	eval $set_cc_for_build
-	sed 's/^	//' << EOF >$dummy.c
+	set_cc_for_build
+	IS_GLIBC=0
+	test x"${LIBC}" = xgnu && IS_GLIBC=1
+	sed 's/^	//' << EOF > "$dummy.c"
 	#undef CPU
-	#undef ${UNAME_MACHINE}
-	#undef ${UNAME_MACHINE}el
+	#undef mips
+	#undef mipsel
+	#undef mips64
+	#undef mips64el
+	#if ${IS_GLIBC} && defined(_ABI64)
+	LIBCABI=gnuabi64
+	#else
+	#if ${IS_GLIBC} && defined(_ABIN32)
+	LIBCABI=gnuabin32
+	#else
+	LIBCABI=${LIBC}
+	#endif
+	#endif
+
+	#if ${IS_GLIBC} && defined(__mips64) && defined(__mips_isa_rev) && __mips_isa_rev>=6
+	CPU=mipsisa64r6
+	#else
+	#if ${IS_GLIBC} && !defined(__mips64) && defined(__mips_isa_rev) && __mips_isa_rev>=6
+	CPU=mipsisa32r6
+	#else
+	#if defined(__mips64)
+	CPU=mips64
+	#else
+	CPU=mips
+	#endif
+	#endif
+	#endif
+
 	#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
-	CPU=${UNAME_MACHINE}el
+	MIPS_ENDIAN=el
 	#else
 	#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
-	CPU=${UNAME_MACHINE}
+	MIPS_ENDIAN=
 	#else
-	CPU=
+	MIPS_ENDIAN=
 	#endif
 	#endif
 EOF
-	eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep '^CPU'`
-	test x"${CPU}" != x && { echo "${CPU}-unknown-linux-${LIBC}"; exit; }
+	eval "$($CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^CPU\|^MIPS_ENDIAN\|^LIBCABI')"
+	test "x$CPU" != x && { echo "$CPU${MIPS_ENDIAN}-unknown-linux-$LIBCABI"; exit; }
 	;;
     mips64el:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     openrisc*:Linux:*:*)
-	echo or1k-unknown-linux-${LIBC}
+	echo or1k-unknown-linux-"$LIBC"
 	exit ;;
     or32:Linux:*:* | or1k*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     padre:Linux:*:*)
-	echo sparc-unknown-linux-${LIBC}
+	echo sparc-unknown-linux-"$LIBC"
 	exit ;;
     parisc64:Linux:*:* | hppa64:Linux:*:*)
-	echo hppa64-unknown-linux-${LIBC}
+	echo hppa64-unknown-linux-"$LIBC"
 	exit ;;
     parisc:Linux:*:* | hppa:Linux:*:*)
 	# Look for CPU level
-	case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in
-	  PA7*) echo hppa1.1-unknown-linux-${LIBC} ;;
-	  PA8*) echo hppa2.0-unknown-linux-${LIBC} ;;
-	  *)    echo hppa-unknown-linux-${LIBC} ;;
+	case $(grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2) in
+	  PA7*) echo hppa1.1-unknown-linux-"$LIBC" ;;
+	  PA8*) echo hppa2.0-unknown-linux-"$LIBC" ;;
+	  *)    echo hppa-unknown-linux-"$LIBC" ;;
 	esac
 	exit ;;
     ppc64:Linux:*:*)
-	echo powerpc64-unknown-linux-${LIBC}
+	echo powerpc64-unknown-linux-"$LIBC"
 	exit ;;
     ppc:Linux:*:*)
-	echo powerpc-unknown-linux-${LIBC}
+	echo powerpc-unknown-linux-"$LIBC"
 	exit ;;
     ppc64le:Linux:*:*)
-	echo powerpc64le-unknown-linux-${LIBC}
+	echo powerpc64le-unknown-linux-"$LIBC"
 	exit ;;
     ppcle:Linux:*:*)
-	echo powerpcle-unknown-linux-${LIBC}
+	echo powerpcle-unknown-linux-"$LIBC"
 	exit ;;
     riscv32:Linux:*:* | riscv64:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     s390:Linux:*:* | s390x:Linux:*:*)
-	echo ${UNAME_MACHINE}-ibm-linux-${LIBC}
+	echo "$UNAME_MACHINE"-ibm-linux-"$LIBC"
 	exit ;;
     sh64*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     sh*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     sparc:Linux:*:* | sparc64:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     tile*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     vax:Linux:*:*)
-	echo ${UNAME_MACHINE}-dec-linux-${LIBC}
+	echo "$UNAME_MACHINE"-dec-linux-"$LIBC"
 	exit ;;
     x86_64:Linux:*:*)
-	echo ${UNAME_MACHINE}-pc-linux-${LIBC}
+	set_cc_for_build
+	LIBCABI=$LIBC
+	if test "$CC_FOR_BUILD" != no_compiler_found; then
+	    if (echo '#ifdef __ILP32__'; echo IS_X32; echo '#endif') | \
+		(CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
+		grep IS_X32 >/dev/null
+	    then
+		LIBCABI="$LIBC"x32
+	    fi
+	fi
+	echo "$UNAME_MACHINE"-pc-linux-"$LIBCABI"
 	exit ;;
     xtensa*:Linux:*:*)
-	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     i*86:DYNIX/ptx:4*:*)
 	# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
@@ -1074,51 +1121,51 @@ EOF
 	# I am not positive that other SVR4 systems won't match this,
 	# I just have to hope.  -- rms.
 	# Use sysv4.2uw... so that sysv4* matches it.
-	echo ${UNAME_MACHINE}-pc-sysv4.2uw${UNAME_VERSION}
+	echo "$UNAME_MACHINE"-pc-sysv4.2uw"$UNAME_VERSION"
 	exit ;;
     i*86:OS/2:*:*)
 	# If we were able to find `uname', then EMX Unix compatibility
 	# is probably installed.
-	echo ${UNAME_MACHINE}-pc-os2-emx
+	echo "$UNAME_MACHINE"-pc-os2-emx
 	exit ;;
     i*86:XTS-300:*:STOP)
-	echo ${UNAME_MACHINE}-unknown-stop
+	echo "$UNAME_MACHINE"-unknown-stop
 	exit ;;
     i*86:atheos:*:*)
-	echo ${UNAME_MACHINE}-unknown-atheos
+	echo "$UNAME_MACHINE"-unknown-atheos
 	exit ;;
     i*86:syllable:*:*)
-	echo ${UNAME_MACHINE}-pc-syllable
+	echo "$UNAME_MACHINE"-pc-syllable
 	exit ;;
     i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*)
-	echo i386-unknown-lynxos${UNAME_RELEASE}
+	echo i386-unknown-lynxos"$UNAME_RELEASE"
 	exit ;;
     i*86:*DOS:*:*)
-	echo ${UNAME_MACHINE}-pc-msdosdjgpp
+	echo "$UNAME_MACHINE"-pc-msdosdjgpp
 	exit ;;
-    i*86:*:4.*:* | i*86:SYSTEM_V:4.*:*)
-	UNAME_REL=`echo ${UNAME_RELEASE} | sed 's/\/MP$//'`
+    i*86:*:4.*:*)
+	UNAME_REL=$(echo "$UNAME_RELEASE" | sed 's/\/MP$//')
 	if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then
-		echo ${UNAME_MACHINE}-univel-sysv${UNAME_REL}
+		echo "$UNAME_MACHINE"-univel-sysv"$UNAME_REL"
 	else
-		echo ${UNAME_MACHINE}-pc-sysv${UNAME_REL}
+		echo "$UNAME_MACHINE"-pc-sysv"$UNAME_REL"
 	fi
 	exit ;;
     i*86:*:5:[678]*)
 	# UnixWare 7.x, OpenUNIX and OpenServer 6.
-	case `/bin/uname -X | grep "^Machine"` in
+	case $(/bin/uname -X | grep "^Machine") in
 	    *486*)	     UNAME_MACHINE=i486 ;;
 	    *Pentium)	     UNAME_MACHINE=i586 ;;
 	    *Pent*|*Celeron) UNAME_MACHINE=i686 ;;
 	esac
-	echo ${UNAME_MACHINE}-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION}
+	echo "$UNAME_MACHINE-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION}"
 	exit ;;
     i*86:*:3.2:*)
 	if test -f /usr/options/cb.name; then
-		UNAME_REL=`sed -n 's/.*Version //p' </usr/options/cb.name`
-		echo ${UNAME_MACHINE}-pc-isc$UNAME_REL
+		UNAME_REL=$(sed -n 's/.*Version //p' </usr/options/cb.name)
+		echo "$UNAME_MACHINE"-pc-isc"$UNAME_REL"
 	elif /bin/uname -X 2>/dev/null >/dev/null ; then
-		UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')`
+		UNAME_REL=$( (/bin/uname -X|grep Release|sed -e 's/.*= //'))
 		(/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486
 		(/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \
 			&& UNAME_MACHINE=i586
@@ -1126,9 +1173,9 @@ EOF
 			&& UNAME_MACHINE=i686
 		(/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \
 			&& UNAME_MACHINE=i686
-		echo ${UNAME_MACHINE}-pc-sco$UNAME_REL
+		echo "$UNAME_MACHINE"-pc-sco"$UNAME_REL"
 	else
-		echo ${UNAME_MACHINE}-pc-sysv32
+		echo "$UNAME_MACHINE"-pc-sysv32
 	fi
 	exit ;;
     pc:*:*:*)
@@ -1148,9 +1195,9 @@ EOF
 	exit ;;
     i860:*:4.*:*) # i860-SVR4
 	if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then
-	  echo i860-stardent-sysv${UNAME_RELEASE} # Stardent Vistra i860-SVR4
+	  echo i860-stardent-sysv"$UNAME_RELEASE" # Stardent Vistra i860-SVR4
 	else # Add other i860-SVR4 vendors below as they are discovered.
-	  echo i860-unknown-sysv${UNAME_RELEASE}  # Unknown i860-SVR4
+	  echo i860-unknown-sysv"$UNAME_RELEASE"  # Unknown i860-SVR4
 	fi
 	exit ;;
     mini*:CTIX:SYS*5:*)
@@ -1168,41 +1215,41 @@ EOF
     3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0)
 	OS_REL=''
 	test -r /etc/.relid \
-	&& OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
+	&& OS_REL=.$(sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid)
 	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
-	  && { echo i486-ncr-sysv4.3${OS_REL}; exit; }
+	  && { echo i486-ncr-sysv4.3"$OS_REL"; exit; }
 	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
-	  && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
+	  && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;;
     3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
 	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
 	  && { echo i486-ncr-sysv4; exit; } ;;
     NCR*:*:4.2:* | MPRAS*:*:4.2:*)
 	OS_REL='.3'
 	test -r /etc/.relid \
-	    && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
+	    && OS_REL=.$(sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid)
 	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
-	    && { echo i486-ncr-sysv4.3${OS_REL}; exit; }
+	    && { echo i486-ncr-sysv4.3"$OS_REL"; exit; }
 	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
-	    && { echo i586-ncr-sysv4.3${OS_REL}; exit; }
+	    && { echo i586-ncr-sysv4.3"$OS_REL"; exit; }
 	/bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \
-	    && { echo i586-ncr-sysv4.3${OS_REL}; exit; } ;;
+	    && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;;
     m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*)
-	echo m68k-unknown-lynxos${UNAME_RELEASE}
+	echo m68k-unknown-lynxos"$UNAME_RELEASE"
 	exit ;;
     mc68030:UNIX_System_V:4.*:*)
 	echo m68k-atari-sysv4
 	exit ;;
     TSUNAMI:LynxOS:2.*:*)
-	echo sparc-unknown-lynxos${UNAME_RELEASE}
+	echo sparc-unknown-lynxos"$UNAME_RELEASE"
 	exit ;;
     rs6000:LynxOS:2.*:*)
-	echo rs6000-unknown-lynxos${UNAME_RELEASE}
+	echo rs6000-unknown-lynxos"$UNAME_RELEASE"
 	exit ;;
     PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*)
-	echo powerpc-unknown-lynxos${UNAME_RELEASE}
+	echo powerpc-unknown-lynxos"$UNAME_RELEASE"
 	exit ;;
     SM[BE]S:UNIX_SV:*:*)
-	echo mips-dde-sysv${UNAME_RELEASE}
+	echo mips-dde-sysv"$UNAME_RELEASE"
 	exit ;;
     RM*:ReliantUNIX-*:*:*)
 	echo mips-sni-sysv4
@@ -1212,8 +1259,8 @@ EOF
 	exit ;;
     *:SINIX-*:*:*)
 	if uname -p 2>/dev/null >/dev/null ; then
-		UNAME_MACHINE=`(uname -p) 2>/dev/null`
-		echo ${UNAME_MACHINE}-sni-sysv4
+		UNAME_MACHINE=$( (uname -p) 2>/dev/null)
+		echo "$UNAME_MACHINE"-sni-sysv4
 	else
 		echo ns32k-sni-sysv
 	fi
@@ -1233,23 +1280,23 @@ EOF
 	exit ;;
     i*86:VOS:*:*)
 	# From Paul.Green@stratus.com.
-	echo ${UNAME_MACHINE}-stratus-vos
+	echo "$UNAME_MACHINE"-stratus-vos
 	exit ;;
     *:VOS:*:*)
 	# From Paul.Green@stratus.com.
 	echo hppa1.1-stratus-vos
 	exit ;;
     mc68*:A/UX:*:*)
-	echo m68k-apple-aux${UNAME_RELEASE}
+	echo m68k-apple-aux"$UNAME_RELEASE"
 	exit ;;
     news*:NEWS-OS:6*:*)
 	echo mips-sony-newsos6
 	exit ;;
     R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
-	if [ -d /usr/nec ]; then
-		echo mips-nec-sysv${UNAME_RELEASE}
+	if test -d /usr/nec; then
+		echo mips-nec-sysv"$UNAME_RELEASE"
 	else
-		echo mips-unknown-sysv${UNAME_RELEASE}
+		echo mips-unknown-sysv"$UNAME_RELEASE"
 	fi
 	exit ;;
     BeBox:BeOS:*:*)	# BeOS running on hardware made by Be, PPC only.
@@ -1268,80 +1315,97 @@ EOF
 	echo x86_64-unknown-haiku
 	exit ;;
     SX-4:SUPER-UX:*:*)
-	echo sx4-nec-superux${UNAME_RELEASE}
+	echo sx4-nec-superux"$UNAME_RELEASE"
 	exit ;;
     SX-5:SUPER-UX:*:*)
-	echo sx5-nec-superux${UNAME_RELEASE}
+	echo sx5-nec-superux"$UNAME_RELEASE"
 	exit ;;
     SX-6:SUPER-UX:*:*)
-	echo sx6-nec-superux${UNAME_RELEASE}
+	echo sx6-nec-superux"$UNAME_RELEASE"
 	exit ;;
     SX-7:SUPER-UX:*:*)
-	echo sx7-nec-superux${UNAME_RELEASE}
+	echo sx7-nec-superux"$UNAME_RELEASE"
 	exit ;;
     SX-8:SUPER-UX:*:*)
-	echo sx8-nec-superux${UNAME_RELEASE}
+	echo sx8-nec-superux"$UNAME_RELEASE"
 	exit ;;
     SX-8R:SUPER-UX:*:*)
-	echo sx8r-nec-superux${UNAME_RELEASE}
+	echo sx8r-nec-superux"$UNAME_RELEASE"
 	exit ;;
     SX-ACE:SUPER-UX:*:*)
-	echo sxace-nec-superux${UNAME_RELEASE}
+	echo sxace-nec-superux"$UNAME_RELEASE"
 	exit ;;
     Power*:Rhapsody:*:*)
-	echo powerpc-apple-rhapsody${UNAME_RELEASE}
+	echo powerpc-apple-rhapsody"$UNAME_RELEASE"
 	exit ;;
     *:Rhapsody:*:*)
-	echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE}
+	echo "$UNAME_MACHINE"-apple-rhapsody"$UNAME_RELEASE"
+	exit ;;
+    arm64:Darwin:*:*)
+	echo aarch64-apple-darwin"$UNAME_RELEASE"
 	exit ;;
     *:Darwin:*:*)
-	UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown
-	eval $set_cc_for_build
-	if test "$UNAME_PROCESSOR" = unknown ; then
-	    UNAME_PROCESSOR=powerpc
+	UNAME_PROCESSOR=$(uname -p)
+	case $UNAME_PROCESSOR in
+	    unknown) UNAME_PROCESSOR=powerpc ;;
+	esac
+	if command -v xcode-select > /dev/null 2> /dev/null && \
+		! xcode-select --print-path > /dev/null 2> /dev/null ; then
+	    # Avoid executing cc if there is no toolchain installed as
+	    # cc will be a stub that puts up a graphical alert
+	    # prompting the user to install developer tools.
+	    CC_FOR_BUILD=no_compiler_found
+	else
+	    set_cc_for_build
 	fi
-	if test `echo "$UNAME_RELEASE" | sed -e 's/\..*//'` -le 10 ; then
-	    if [ "$CC_FOR_BUILD" != no_compiler_found ]; then
-		if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
-		    (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
-		    grep IS_64BIT_ARCH >/dev/null
-		then
-		    case $UNAME_PROCESSOR in
-			i386) UNAME_PROCESSOR=x86_64 ;;
-			powerpc) UNAME_PROCESSOR=powerpc64 ;;
-		    esac
-		fi
+	if test "$CC_FOR_BUILD" != no_compiler_found; then
+	    if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
+		   (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
+		   grep IS_64BIT_ARCH >/dev/null
+	    then
+		case $UNAME_PROCESSOR in
+		    i386) UNAME_PROCESSOR=x86_64 ;;
+		    powerpc) UNAME_PROCESSOR=powerpc64 ;;
+		esac
+	    fi
+	    # On 10.4-10.6 one might compile for PowerPC via gcc -arch ppc
+	    if (echo '#ifdef __POWERPC__'; echo IS_PPC; echo '#endif') | \
+		   (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
+		   grep IS_PPC >/dev/null
+	    then
+		UNAME_PROCESSOR=powerpc
 	    fi
 	elif test "$UNAME_PROCESSOR" = i386 ; then
-	    # Avoid executing cc on OS X 10.9, as it ships with a stub
-	    # that puts up a graphical alert prompting to install
-	    # developer tools.  Any system running Mac OS X 10.7 or
-	    # later (Darwin 11 and later) is required to have a 64-bit
-	    # processor. This is not true of the ARM version of Darwin
-	    # that Apple uses in portable devices.
-	    UNAME_PROCESSOR=x86_64
+	    # uname -m returns i386 or x86_64
+	    UNAME_PROCESSOR=$UNAME_MACHINE
 	fi
-	echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE}
+	echo "$UNAME_PROCESSOR"-apple-darwin"$UNAME_RELEASE"
 	exit ;;
     *:procnto*:*:* | *:QNX:[0123456789]*:*)
-	UNAME_PROCESSOR=`uname -p`
+	UNAME_PROCESSOR=$(uname -p)
 	if test "$UNAME_PROCESSOR" = x86; then
 		UNAME_PROCESSOR=i386
 		UNAME_MACHINE=pc
 	fi
-	echo ${UNAME_PROCESSOR}-${UNAME_MACHINE}-nto-qnx${UNAME_RELEASE}
+	echo "$UNAME_PROCESSOR"-"$UNAME_MACHINE"-nto-qnx"$UNAME_RELEASE"
 	exit ;;
     *:QNX:*:4*)
 	echo i386-pc-qnx
 	exit ;;
-    NEO-?:NONSTOP_KERNEL:*:*)
-	echo neo-tandem-nsk${UNAME_RELEASE}
+    NEO-*:NONSTOP_KERNEL:*:*)
+	echo neo-tandem-nsk"$UNAME_RELEASE"
 	exit ;;
     NSE-*:NONSTOP_KERNEL:*:*)
-	echo nse-tandem-nsk${UNAME_RELEASE}
+	echo nse-tandem-nsk"$UNAME_RELEASE"
 	exit ;;
-    NSR-?:NONSTOP_KERNEL:*:*)
-	echo nsr-tandem-nsk${UNAME_RELEASE}
+    NSR-*:NONSTOP_KERNEL:*:*)
+	echo nsr-tandem-nsk"$UNAME_RELEASE"
+	exit ;;
+    NSV-*:NONSTOP_KERNEL:*:*)
+	echo nsv-tandem-nsk"$UNAME_RELEASE"
+	exit ;;
+    NSX-*:NONSTOP_KERNEL:*:*)
+	echo nsx-tandem-nsk"$UNAME_RELEASE"
 	exit ;;
     *:NonStop-UX:*:*)
 	echo mips-compaq-nonstopux
@@ -1350,18 +1414,19 @@ EOF
 	echo bs2000-siemens-sysv
 	exit ;;
     DS/*:UNIX_System_V:*:*)
-	echo ${UNAME_MACHINE}-${UNAME_SYSTEM}-${UNAME_RELEASE}
+	echo "$UNAME_MACHINE"-"$UNAME_SYSTEM"-"$UNAME_RELEASE"
 	exit ;;
     *:Plan9:*:*)
 	# "uname -m" is not consistent, so use $cputype instead. 386
 	# is converted to i386 for consistency with other x86
 	# operating systems.
+	# shellcheck disable=SC2154
 	if test "$cputype" = 386; then
 	    UNAME_MACHINE=i386
 	else
 	    UNAME_MACHINE="$cputype"
 	fi
-	echo ${UNAME_MACHINE}-unknown-plan9
+	echo "$UNAME_MACHINE"-unknown-plan9
 	exit ;;
     *:TOPS-10:*:*)
 	echo pdp10-unknown-tops10
@@ -1382,14 +1447,14 @@ EOF
 	echo pdp10-unknown-its
 	exit ;;
     SEI:*:*:SEIUX)
-	echo mips-sei-seiux${UNAME_RELEASE}
+	echo mips-sei-seiux"$UNAME_RELEASE"
 	exit ;;
     *:DragonFly:*:*)
-	echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`
+	echo "$UNAME_MACHINE"-unknown-dragonfly"$(echo "$UNAME_RELEASE"|sed -e 's/[-(].*//')"
 	exit ;;
     *:*VMS:*:*)
-	UNAME_MACHINE=`(uname -p) 2>/dev/null`
-	case "${UNAME_MACHINE}" in
+	UNAME_MACHINE=$( (uname -p) 2>/dev/null)
+	case "$UNAME_MACHINE" in
 	    A*) echo alpha-dec-vms ; exit ;;
 	    I*) echo ia64-dec-vms ; exit ;;
 	    V*) echo vax-dec-vms ; exit ;;
@@ -1398,32 +1463,190 @@ EOF
 	echo i386-pc-xenix
 	exit ;;
     i*86:skyos:*:*)
-	echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE} | sed -e 's/ .*$//'`
+	echo "$UNAME_MACHINE"-pc-skyos"$(echo "$UNAME_RELEASE" | sed -e 's/ .*$//')"
 	exit ;;
     i*86:rdos:*:*)
-	echo ${UNAME_MACHINE}-pc-rdos
+	echo "$UNAME_MACHINE"-pc-rdos
 	exit ;;
     i*86:AROS:*:*)
-	echo ${UNAME_MACHINE}-pc-aros
+	echo "$UNAME_MACHINE"-pc-aros
 	exit ;;
     x86_64:VMkernel:*:*)
-	echo ${UNAME_MACHINE}-unknown-esx
+	echo "$UNAME_MACHINE"-unknown-esx
 	exit ;;
     amd64:Isilon\ OneFS:*:*)
 	echo x86_64-unknown-onefs
 	exit ;;
+    *:Unleashed:*:*)
+	echo "$UNAME_MACHINE"-unknown-unleashed"$UNAME_RELEASE"
+	exit ;;
+esac
+
+# No uname command or uname output not recognized.
+set_cc_for_build
+cat > "$dummy.c" <<EOF
+#ifdef _SEQUENT_
+#include <sys/types.h>
+#include <sys/utsname.h>
+#endif
+#if defined(ultrix) || defined(_ultrix) || defined(__ultrix) || defined(__ultrix__)
+#if defined (vax) || defined (__vax) || defined (__vax__) || defined(mips) || defined(__mips) || defined(__mips__) || defined(MIPS) || defined(__MIPS__)
+#include <signal.h>
+#if defined(_SIZE_T_) || defined(SIGLOST)
+#include <sys/utsname.h>
+#endif
+#endif
+#endif
+main ()
+{
+#if defined (sony)
+#if defined (MIPSEB)
+  /* BFD wants "bsd" instead of "newsos".  Perhaps BFD should be changed,
+     I don't know....  */
+  printf ("mips-sony-bsd\n"); exit (0);
+#else
+#include <sys/param.h>
+  printf ("m68k-sony-newsos%s\n",
+#ifdef NEWSOS4
+  "4"
+#else
+  ""
+#endif
+  ); exit (0);
+#endif
+#endif
+
+#if defined (NeXT)
+#if !defined (__ARCHITECTURE__)
+#define __ARCHITECTURE__ "m68k"
+#endif
+  int version;
+  version=$( (hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null);
+  if (version < 4)
+    printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version);
+  else
+    printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version);
+  exit (0);
+#endif
+
+#if defined (MULTIMAX) || defined (n16)
+#if defined (UMAXV)
+  printf ("ns32k-encore-sysv\n"); exit (0);
+#else
+#if defined (CMU)
+  printf ("ns32k-encore-mach\n"); exit (0);
+#else
+  printf ("ns32k-encore-bsd\n"); exit (0);
+#endif
+#endif
+#endif
+
+#if defined (__386BSD__)
+  printf ("i386-pc-bsd\n"); exit (0);
+#endif
+
+#if defined (sequent)
+#if defined (i386)
+  printf ("i386-sequent-dynix\n"); exit (0);
+#endif
+#if defined (ns32000)
+  printf ("ns32k-sequent-dynix\n"); exit (0);
+#endif
+#endif
+
+#if defined (_SEQUENT_)
+  struct utsname un;
+
+  uname(&un);
+  if (strncmp(un.version, "V2", 2) == 0) {
+    printf ("i386-sequent-ptx2\n"); exit (0);
+  }
+  if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */
+    printf ("i386-sequent-ptx1\n"); exit (0);
+  }
+  printf ("i386-sequent-ptx\n"); exit (0);
+#endif
+
+#if defined (vax)
+#if !defined (ultrix)
+#include <sys/param.h>
+#if defined (BSD)
+#if BSD == 43
+  printf ("vax-dec-bsd4.3\n"); exit (0);
+#else
+#if BSD == 199006
+  printf ("vax-dec-bsd4.3reno\n"); exit (0);
+#else
+  printf ("vax-dec-bsd\n"); exit (0);
+#endif
+#endif
+#else
+  printf ("vax-dec-bsd\n"); exit (0);
+#endif
+#else
+#if defined(_SIZE_T_) || defined(SIGLOST)
+  struct utsname un;
+  uname (&un);
+  printf ("vax-dec-ultrix%s\n", un.release); exit (0);
+#else
+  printf ("vax-dec-ultrix\n"); exit (0);
+#endif
+#endif
+#endif
+#if defined(ultrix) || defined(_ultrix) || defined(__ultrix) || defined(__ultrix__)
+#if defined(mips) || defined(__mips) || defined(__mips__) || defined(MIPS) || defined(__MIPS__)
+#if defined(_SIZE_T_) || defined(SIGLOST)
+  struct utsname *un;
+  uname (&un);
+  printf ("mips-dec-ultrix%s\n", un.release); exit (0);
+#else
+  printf ("mips-dec-ultrix\n"); exit (0);
+#endif
+#endif
+#endif
+
+#if defined (alliant) && defined (i860)
+  printf ("i860-alliant-bsd\n"); exit (0);
+#endif
+
+  exit (1);
+}
+EOF
+
+$CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null && SYSTEM_NAME=$($dummy) &&
+	{ echo "$SYSTEM_NAME"; exit; }
+
+# Apollos put the system type in the environment.
+test -d /usr/apollo && { echo "$ISP-apollo-$SYSTYPE"; exit; }
+
+echo "$0: unable to guess system type" >&2
+
+case "$UNAME_MACHINE:$UNAME_SYSTEM" in
+    mips:Linux | mips64:Linux)
+	# If we got here on MIPS GNU/Linux, output extra information.
+	cat >&2 <<EOF
+
+NOTE: MIPS GNU/Linux systems require a C compiler to fully recognize
+the system type. Please install a C compiler and try again.
+EOF
+	;;
 esac
 
 cat >&2 <<EOF
-$0: unable to guess system type
 
 This script (version $timestamp), has failed to recognize the
-operating system you are using. If your script is old, overwrite
-config.guess and config.sub with the latest versions from:
+operating system you are using. If your script is old, overwrite *all*
+copies of config.guess and config.sub with the latest versions from:
 
-  http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
+  https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
 and
-  http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub
+  https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub
+EOF
+
+year=$(echo $timestamp | sed 's,-.*,,')
+# shellcheck disable=SC2003
+if test "$(expr "$(date +%Y)" - "$year")" -lt 3 ; then
+   cat >&2 <<EOF
 
 If $0 has already been updated, send the following data and any
 information you think might be pertinent to config-patches@gnu.org to
@@ -1431,31 +1654,32 @@ provide the necessary information to handle your system.
 
 config.guess timestamp = $timestamp
 
-uname -m = `(uname -m) 2>/dev/null || echo unknown`
-uname -r = `(uname -r) 2>/dev/null || echo unknown`
-uname -s = `(uname -s) 2>/dev/null || echo unknown`
-uname -v = `(uname -v) 2>/dev/null || echo unknown`
+uname -m = $( (uname -m) 2>/dev/null || echo unknown)
+uname -r = $( (uname -r) 2>/dev/null || echo unknown)
+uname -s = $( (uname -s) 2>/dev/null || echo unknown)
+uname -v = $( (uname -v) 2>/dev/null || echo unknown)
 
-/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null`
-/bin/uname -X     = `(/bin/uname -X) 2>/dev/null`
+/usr/bin/uname -p = $( (/usr/bin/uname -p) 2>/dev/null)
+/bin/uname -X     = $( (/bin/uname -X) 2>/dev/null)
 
-hostinfo               = `(hostinfo) 2>/dev/null`
-/bin/universe          = `(/bin/universe) 2>/dev/null`
-/usr/bin/arch -k       = `(/usr/bin/arch -k) 2>/dev/null`
-/bin/arch              = `(/bin/arch) 2>/dev/null`
-/usr/bin/oslevel       = `(/usr/bin/oslevel) 2>/dev/null`
-/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null`
+hostinfo               = $( (hostinfo) 2>/dev/null)
+/bin/universe          = $( (/bin/universe) 2>/dev/null)
+/usr/bin/arch -k       = $( (/usr/bin/arch -k) 2>/dev/null)
+/bin/arch              = $( (/bin/arch) 2>/dev/null)
+/usr/bin/oslevel       = $( (/usr/bin/oslevel) 2>/dev/null)
+/usr/convex/getsysinfo = $( (/usr/convex/getsysinfo) 2>/dev/null)
 
-UNAME_MACHINE = ${UNAME_MACHINE}
-UNAME_RELEASE = ${UNAME_RELEASE}
-UNAME_SYSTEM  = ${UNAME_SYSTEM}
-UNAME_VERSION = ${UNAME_VERSION}
+UNAME_MACHINE = "$UNAME_MACHINE"
+UNAME_RELEASE = "$UNAME_RELEASE"
+UNAME_SYSTEM  = "$UNAME_SYSTEM"
+UNAME_VERSION = "$UNAME_VERSION"
 EOF
+fi
 
 exit 1
 
 # Local variables:
-# eval: (add-hook 'write-file-hooks 'time-stamp)
+# eval: (add-hook 'before-save-hook 'time-stamp)
 # time-stamp-start: "timestamp='"
 # time-stamp-format: "%:y-%02m-%02d"
 # time-stamp-end: "'"
diff --git a/build-aux/config.sub b/build-aux/config.sub
index dd2ca93c..c874b7a9 100755
--- a/build-aux/config.sub
+++ b/build-aux/config.sub
@@ -1,8 +1,8 @@
 #! /bin/sh
 # Configuration validation subroutine script.
-#   Copyright 1992-2016 Free Software Foundation, Inc.
+#   Copyright 1992-2020 Free Software Foundation, Inc.
 
-timestamp='2016-11-04'
+timestamp='2020-11-07'
 
 # This file is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
@@ -15,7 +15,7 @@ timestamp='2016-11-04'
 # General Public License for more details.
 #
 # You should have received a copy of the GNU General Public License
-# along with this program; if not, see <http://www.gnu.org/licenses/>.
+# along with this program; if not, see <https://www.gnu.org/licenses/>.
 #
 # As a special exception to the GNU General Public License, if you
 # distribute this file as part of a program that contains a
@@ -33,7 +33,7 @@ timestamp='2016-11-04'
 # Otherwise, we print the canonical config type on stdout and succeed.
 
 # You can get the latest version of this script from:
-# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub
+# https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub
 
 # This file is supposed to be the same for all GNU packages
 # and recognize all the CPU types, system types and aliases
@@ -50,14 +50,14 @@ timestamp='2016-11-04'
 #	CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM
 # It is wrong to echo any other type of specification.
 
-me=`echo "$0" | sed -e 's,.*/,,'`
+me=$(echo "$0" | sed -e 's,.*/,,')
 
 usage="\
 Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS
 
 Canonicalize a configuration name.
 
-Operation modes:
+Options:
   -h, --help         print this help, then exit
   -t, --time-stamp   print date of last modification, then exit
   -v, --version      print version number, then exit
@@ -67,7 +67,7 @@ Report bugs and patches to <config-patches@gnu.org>."
 version="\
 GNU config.sub ($timestamp)
 
-Copyright 1992-2016 Free Software Foundation, Inc.
+Copyright 1992-2020 Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -89,12 +89,12 @@ while test $# -gt 0 ; do
     - )	# Use stdin as input.
        break ;;
     -* )
-       echo "$me: invalid option $1$help"
+       echo "$me: invalid option $1$help" >&2
        exit 1 ;;
 
     *local*)
        # First pass through any local machine types.
-       echo $1
+       echo "$1"
        exit ;;
 
     * )
@@ -110,1244 +110,1167 @@ case $# in
     exit 1;;
 esac
 
-# Separate what the user gave into CPU-COMPANY and OS or KERNEL-OS (if any).
-# Here we must recognize all the valid KERNEL-OS combinations.
-maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
-case $maybe_os in
-  nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \
-  linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \
-  knetbsd*-gnu* | netbsd*-gnu* | netbsd*-eabi* | \
-  kopensolaris*-gnu* | cloudabi*-eabi* | \
-  storm-chaos* | os2-emx* | rtmk-nova*)
-    os=-$maybe_os
-    basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
-    ;;
-  android-linux)
-    os=-linux-android
-    basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`-unknown
-    ;;
-  *)
-    basic_machine=`echo $1 | sed 's/-[^-]*$//'`
-    if [ $basic_machine != $1 ]
-    then os=`echo $1 | sed 's/.*-/-/'`
-    else os=; fi
-    ;;
-esac
+# Split fields of configuration type
+# shellcheck disable=SC2162
+IFS="-" read field1 field2 field3 field4 <<EOF
+$1
+EOF
 
-### Let's recognize common machines as not being operating systems so
-### that things like config.sub decstation-3100 work.  We also
-### recognize some manufacturers as not being operating systems, so we
-### can provide default operating systems below.
-case $os in
-	-sun*os*)
-		# Prevent following clause from handling this invalid input.
+# Separate into logical components for further validation
+case $1 in
+	*-*-*-*-*)
+		echo Invalid configuration \`"$1"\': more than four components >&2
+		exit 1
 		;;
-	-dec* | -mips* | -sequent* | -encore* | -pc532* | -sgi* | -sony* | \
-	-att* | -7300* | -3300* | -delta* | -motorola* | -sun[234]* | \
-	-unicom* | -ibm* | -next | -hp | -isi* | -apollo | -altos* | \
-	-convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
-	-c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
-	-harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
-	-apple | -axis | -knuth | -cray | -microblaze*)
-		os=
-		basic_machine=$1
+	*-*-*-*)
+		basic_machine=$field1-$field2
+		basic_os=$field3-$field4
 		;;
-	-bluegene*)
-		os=-cnk
+	*-*-*)
+		# Ambiguous whether COMPANY is present, or skipped and KERNEL-OS is two
+		# parts
+		maybe_os=$field2-$field3
+		case $maybe_os in
+			nto-qnx* | linux-* | uclinux-uclibc* \
+			| uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* \
+			| netbsd*-eabi* | kopensolaris*-gnu* | cloudabi*-eabi* \
+			| storm-chaos* | os2-emx* | rtmk-nova*)
+				basic_machine=$field1
+				basic_os=$maybe_os
+				;;
+			android-linux)
+				basic_machine=$field1-unknown
+				basic_os=linux-android
+				;;
+			*)
+				basic_machine=$field1-$field2
+				basic_os=$field3
+				;;
+		esac
 		;;
-	-sim | -cisco | -oki | -wec | -winbond)
-		os=
-		basic_machine=$1
+	*-*)
+		# A lone config we happen to match not fitting any pattern
+		case $field1-$field2 in
+			decstation-3100)
+				basic_machine=mips-dec
+				basic_os=
+				;;
+			*-*)
+				# Second component is usually, but not always the OS
+				case $field2 in
+					# Prevent following clause from handling this valid os
+					sun*os*)
+						basic_machine=$field1
+						basic_os=$field2
+						;;
+					# Manufacturers
+					dec* | mips* | sequent* | encore* | pc533* | sgi* | sony* \
+					| att* | 7300* | 3300* | delta* | motorola* | sun[234]* \
+					| unicom* | ibm* | next | hp | isi* | apollo | altos* \
+					| convergent* | ncr* | news | 32* | 3600* | 3100* \
+					| hitachi* | c[123]* | convex* | sun | crds | omron* | dg \
+					| ultra | tti* | harris | dolphin | highlevel | gould \
+					| cbm | ns | masscomp | apple | axis | knuth | cray \
+					| microblaze* | sim | cisco \
+					| oki | wec | wrs | winbond)
+						basic_machine=$field1-$field2
+						basic_os=
+						;;
+					*)
+						basic_machine=$field1
+						basic_os=$field2
+						;;
+				esac
+			;;
+		esac
 		;;
-	-scout)
-		;;
-	-wrs)
-		os=-vxworks
-		basic_machine=$1
-		;;
-	-chorusos*)
-		os=-chorusos
-		basic_machine=$1
-		;;
-	-chorusrdb)
-		os=-chorusrdb
-		basic_machine=$1
-		;;
-	-hiux*)
-		os=-hiuxwe2
-		;;
-	-sco6)
-		os=-sco5v6
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
-		;;
-	-sco5)
-		os=-sco3.2v5
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
-		;;
-	-sco4)
-		os=-sco3.2v4
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
-		;;
-	-sco3.2.[4-9]*)
-		os=`echo $os | sed -e 's/sco3.2./sco3.2v/'`
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
-		;;
-	-sco3.2v[4-9]*)
-		# Don't forget version if it is 3.2v4 or newer.
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
-		;;
-	-sco5v6*)
-		# Don't forget version if it is 3.2v4 or newer.
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
-		;;
-	-sco*)
-		os=-sco3.2v2
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
-		;;
-	-udk*)
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
-		;;
-	-isc)
-		os=-isc2.2
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
-		;;
-	-clix*)
-		basic_machine=clipper-intergraph
-		;;
-	-isc*)
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
-		;;
-	-lynx*178)
-		os=-lynxos178
-		;;
-	-lynx*5)
-		os=-lynxos5
-		;;
-	-lynx*)
-		os=-lynxos
-		;;
-	-ptx*)
-		basic_machine=`echo $1 | sed -e 's/86-.*/86-sequent/'`
-		;;
-	-windowsnt*)
-		os=`echo $os | sed -e 's/windowsnt/winnt/'`
-		;;
-	-psos*)
-		os=-psos
-		;;
-	-mint | -mint[0-9]*)
-		basic_machine=m68k-atari
-		os=-mint
+	*)
+		# Convert single-component short-hands not valid as part of
+		# multi-component configurations.
+		case $field1 in
+			386bsd)
+				basic_machine=i386-pc
+				basic_os=bsd
+				;;
+			a29khif)
+				basic_machine=a29k-amd
+				basic_os=udi
+				;;
+			adobe68k)
+				basic_machine=m68010-adobe
+				basic_os=scout
+				;;
+			alliant)
+				basic_machine=fx80-alliant
+				basic_os=
+				;;
+			altos | altos3068)
+				basic_machine=m68k-altos
+				basic_os=
+				;;
+			am29k)
+				basic_machine=a29k-none
+				basic_os=bsd
+				;;
+			amdahl)
+				basic_machine=580-amdahl
+				basic_os=sysv
+				;;
+			amiga)
+				basic_machine=m68k-unknown
+				basic_os=
+				;;
+			amigaos | amigados)
+				basic_machine=m68k-unknown
+				basic_os=amigaos
+				;;
+			amigaunix | amix)
+				basic_machine=m68k-unknown
+				basic_os=sysv4
+				;;
+			apollo68)
+				basic_machine=m68k-apollo
+				basic_os=sysv
+				;;
+			apollo68bsd)
+				basic_machine=m68k-apollo
+				basic_os=bsd
+				;;
+			aros)
+				basic_machine=i386-pc
+				basic_os=aros
+				;;
+			aux)
+				basic_machine=m68k-apple
+				basic_os=aux
+				;;
+			balance)
+				basic_machine=ns32k-sequent
+				basic_os=dynix
+				;;
+			blackfin)
+				basic_machine=bfin-unknown
+				basic_os=linux
+				;;
+			cegcc)
+				basic_machine=arm-unknown
+				basic_os=cegcc
+				;;
+			convex-c1)
+				basic_machine=c1-convex
+				basic_os=bsd
+				;;
+			convex-c2)
+				basic_machine=c2-convex
+				basic_os=bsd
+				;;
+			convex-c32)
+				basic_machine=c32-convex
+				basic_os=bsd
+				;;
+			convex-c34)
+				basic_machine=c34-convex
+				basic_os=bsd
+				;;
+			convex-c38)
+				basic_machine=c38-convex
+				basic_os=bsd
+				;;
+			cray)
+				basic_machine=j90-cray
+				basic_os=unicos
+				;;
+			crds | unos)
+				basic_machine=m68k-crds
+				basic_os=
+				;;
+			da30)
+				basic_machine=m68k-da30
+				basic_os=
+				;;
+			decstation | pmax | pmin | dec3100 | decstatn)
+				basic_machine=mips-dec
+				basic_os=
+				;;
+			delta88)
+				basic_machine=m88k-motorola
+				basic_os=sysv3
+				;;
+			dicos)
+				basic_machine=i686-pc
+				basic_os=dicos
+				;;
+			djgpp)
+				basic_machine=i586-pc
+				basic_os=msdosdjgpp
+				;;
+			ebmon29k)
+				basic_machine=a29k-amd
+				basic_os=ebmon
+				;;
+			es1800 | OSE68k | ose68k | ose | OSE)
+				basic_machine=m68k-ericsson
+				basic_os=ose
+				;;
+			gmicro)
+				basic_machine=tron-gmicro
+				basic_os=sysv
+				;;
+			go32)
+				basic_machine=i386-pc
+				basic_os=go32
+				;;
+			h8300hms)
+				basic_machine=h8300-hitachi
+				basic_os=hms
+				;;
+			h8300xray)
+				basic_machine=h8300-hitachi
+				basic_os=xray
+				;;
+			h8500hms)
+				basic_machine=h8500-hitachi
+				basic_os=hms
+				;;
+			harris)
+				basic_machine=m88k-harris
+				basic_os=sysv3
+				;;
+			hp300 | hp300hpux)
+				basic_machine=m68k-hp
+				basic_os=hpux
+				;;
+			hp300bsd)
+				basic_machine=m68k-hp
+				basic_os=bsd
+				;;
+			hppaosf)
+				basic_machine=hppa1.1-hp
+				basic_os=osf
+				;;
+			hppro)
+				basic_machine=hppa1.1-hp
+				basic_os=proelf
+				;;
+			i386mach)
+				basic_machine=i386-mach
+				basic_os=mach
+				;;
+			isi68 | isi)
+				basic_machine=m68k-isi
+				basic_os=sysv
+				;;
+			m68knommu)
+				basic_machine=m68k-unknown
+				basic_os=linux
+				;;
+			magnum | m3230)
+				basic_machine=mips-mips
+				basic_os=sysv
+				;;
+			merlin)
+				basic_machine=ns32k-utek
+				basic_os=sysv
+				;;
+			mingw64)
+				basic_machine=x86_64-pc
+				basic_os=mingw64
+				;;
+			mingw32)
+				basic_machine=i686-pc
+				basic_os=mingw32
+				;;
+			mingw32ce)
+				basic_machine=arm-unknown
+				basic_os=mingw32ce
+				;;
+			monitor)
+				basic_machine=m68k-rom68k
+				basic_os=coff
+				;;
+			morphos)
+				basic_machine=powerpc-unknown
+				basic_os=morphos
+				;;
+			moxiebox)
+				basic_machine=moxie-unknown
+				basic_os=moxiebox
+				;;
+			msdos)
+				basic_machine=i386-pc
+				basic_os=msdos
+				;;
+			msys)
+				basic_machine=i686-pc
+				basic_os=msys
+				;;
+			mvs)
+				basic_machine=i370-ibm
+				basic_os=mvs
+				;;
+			nacl)
+				basic_machine=le32-unknown
+				basic_os=nacl
+				;;
+			ncr3000)
+				basic_machine=i486-ncr
+				basic_os=sysv4
+				;;
+			netbsd386)
+				basic_machine=i386-pc
+				basic_os=netbsd
+				;;
+			netwinder)
+				basic_machine=armv4l-rebel
+				basic_os=linux
+				;;
+			news | news700 | news800 | news900)
+				basic_machine=m68k-sony
+				basic_os=newsos
+				;;
+			news1000)
+				basic_machine=m68030-sony
+				basic_os=newsos
+				;;
+			necv70)
+				basic_machine=v70-nec
+				basic_os=sysv
+				;;
+			nh3000)
+				basic_machine=m68k-harris
+				basic_os=cxux
+				;;
+			nh[45]000)
+				basic_machine=m88k-harris
+				basic_os=cxux
+				;;
+			nindy960)
+				basic_machine=i960-intel
+				basic_os=nindy
+				;;
+			mon960)
+				basic_machine=i960-intel
+				basic_os=mon960
+				;;
+			nonstopux)
+				basic_machine=mips-compaq
+				basic_os=nonstopux
+				;;
+			os400)
+				basic_machine=powerpc-ibm
+				basic_os=os400
+				;;
+			OSE68000 | ose68000)
+				basic_machine=m68000-ericsson
+				basic_os=ose
+				;;
+			os68k)
+				basic_machine=m68k-none
+				basic_os=os68k
+				;;
+			paragon)
+				basic_machine=i860-intel
+				basic_os=osf
+				;;
+			parisc)
+				basic_machine=hppa-unknown
+				basic_os=linux
+				;;
+			psp)
+				basic_machine=mipsallegrexel-sony
+				basic_os=psp
+				;;
+			pw32)
+				basic_machine=i586-unknown
+				basic_os=pw32
+				;;
+			rdos | rdos64)
+				basic_machine=x86_64-pc
+				basic_os=rdos
+				;;
+			rdos32)
+				basic_machine=i386-pc
+				basic_os=rdos
+				;;
+			rom68k)
+				basic_machine=m68k-rom68k
+				basic_os=coff
+				;;
+			sa29200)
+				basic_machine=a29k-amd
+				basic_os=udi
+				;;
+			sei)
+				basic_machine=mips-sei
+				basic_os=seiux
+				;;
+			sequent)
+				basic_machine=i386-sequent
+				basic_os=
+				;;
+			sps7)
+				basic_machine=m68k-bull
+				basic_os=sysv2
+				;;
+			st2000)
+				basic_machine=m68k-tandem
+				basic_os=
+				;;
+			stratus)
+				basic_machine=i860-stratus
+				basic_os=sysv4
+				;;
+			sun2)
+				basic_machine=m68000-sun
+				basic_os=
+				;;
+			sun2os3)
+				basic_machine=m68000-sun
+				basic_os=sunos3
+				;;
+			sun2os4)
+				basic_machine=m68000-sun
+				basic_os=sunos4
+				;;
+			sun3)
+				basic_machine=m68k-sun
+				basic_os=
+				;;
+			sun3os3)
+				basic_machine=m68k-sun
+				basic_os=sunos3
+				;;
+			sun3os4)
+				basic_machine=m68k-sun
+				basic_os=sunos4
+				;;
+			sun4)
+				basic_machine=sparc-sun
+				basic_os=
+				;;
+			sun4os3)
+				basic_machine=sparc-sun
+				basic_os=sunos3
+				;;
+			sun4os4)
+				basic_machine=sparc-sun
+				basic_os=sunos4
+				;;
+			sun4sol2)
+				basic_machine=sparc-sun
+				basic_os=solaris2
+				;;
+			sun386 | sun386i | roadrunner)
+				basic_machine=i386-sun
+				basic_os=
+				;;
+			sv1)
+				basic_machine=sv1-cray
+				basic_os=unicos
+				;;
+			symmetry)
+				basic_machine=i386-sequent
+				basic_os=dynix
+				;;
+			t3e)
+				basic_machine=alphaev5-cray
+				basic_os=unicos
+				;;
+			t90)
+				basic_machine=t90-cray
+				basic_os=unicos
+				;;
+			toad1)
+				basic_machine=pdp10-xkl
+				basic_os=tops20
+				;;
+			tpf)
+				basic_machine=s390x-ibm
+				basic_os=tpf
+				;;
+			udi29k)
+				basic_machine=a29k-amd
+				basic_os=udi
+				;;
+			ultra3)
+				basic_machine=a29k-nyu
+				basic_os=sym1
+				;;
+			v810 | necv810)
+				basic_machine=v810-nec
+				basic_os=none
+				;;
+			vaxv)
+				basic_machine=vax-dec
+				basic_os=sysv
+				;;
+			vms)
+				basic_machine=vax-dec
+				basic_os=vms
+				;;
+			vsta)
+				basic_machine=i386-pc
+				basic_os=vsta
+				;;
+			vxworks960)
+				basic_machine=i960-wrs
+				basic_os=vxworks
+				;;
+			vxworks68)
+				basic_machine=m68k-wrs
+				basic_os=vxworks
+				;;
+			vxworks29k)
+				basic_machine=a29k-wrs
+				basic_os=vxworks
+				;;
+			xbox)
+				basic_machine=i686-pc
+				basic_os=mingw32
+				;;
+			ymp)
+				basic_machine=ymp-cray
+				basic_os=unicos
+				;;
+			*)
+				basic_machine=$1
+				basic_os=
+				;;
+		esac
 		;;
 esac
 
-# Decode aliases for certain CPU-COMPANY combinations.
+# Decode 1-component or ad-hoc basic machines
 case $basic_machine in
-	# Recognize the basic CPU types without company name.
-	# Some are omitted here because they have special meanings below.
-	1750a | 580 \
-	| a29k \
-	| aarch64 | aarch64_be \
-	| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
-	| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
-	| am33_2.0 \
-	| arc | arceb \
-	| arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv7[arm] \
-	| avr | avr32 \
-	| ba \
-	| be32 | be64 \
-	| bfin \
-	| c4x | c8051 | clipper \
-	| d10v | d30v | dlx | dsp16xx \
-	| e2k | epiphany \
-	| fido | fr30 | frv | ft32 \
-	| h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
-	| hexagon \
-	| i370 | i860 | i960 | ia64 \
-	| ip2k | iq2000 \
-	| k1om \
-	| le32 | le64 \
-	| lm32 \
-	| m32c | m32r | m32rle | m68000 | m68k | m88k \
-	| maxq | mb | microblaze | microblazeel | mcore | mep | metag \
-	| mips | mipsbe | mipseb | mipsel | mipsle \
-	| mips16 \
-	| mips64 | mips64el \
-	| mips64octeon | mips64octeonel \
-	| mips64orion | mips64orionel \
-	| mips64r5900 | mips64r5900el \
-	| mips64vr | mips64vrel \
-	| mips64vr4100 | mips64vr4100el \
-	| mips64vr4300 | mips64vr4300el \
-	| mips64vr5000 | mips64vr5000el \
-	| mips64vr5900 | mips64vr5900el \
-	| mipsisa32 | mipsisa32el \
-	| mipsisa32r2 | mipsisa32r2el \
-	| mipsisa32r6 | mipsisa32r6el \
-	| mipsisa64 | mipsisa64el \
-	| mipsisa64r2 | mipsisa64r2el \
-	| mipsisa64r6 | mipsisa64r6el \
-	| mipsisa64sb1 | mipsisa64sb1el \
-	| mipsisa64sr71k | mipsisa64sr71kel \
-	| mipsr5900 | mipsr5900el \
-	| mipstx39 | mipstx39el \
-	| mn10200 | mn10300 \
-	| moxie \
-	| mt \
-	| msp430 \
-	| nds32 | nds32le | nds32be \
-	| nios | nios2 | nios2eb | nios2el \
-	| ns16k | ns32k \
-	| open8 | or1k | or1knd | or32 \
-	| pdp10 | pdp11 | pj | pjl \
-	| powerpc | powerpc64 | powerpc64le | powerpcle \
-	| pru \
-	| pyramid \
-	| riscv32 | riscv64 \
-	| rl78 | rx \
-	| score \
-	| sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[234]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
-	| sh64 | sh64le \
-	| sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet | sparclite \
-	| sparcv8 | sparcv9 | sparcv9b | sparcv9v \
-	| spu \
-	| tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \
-	| ubicom32 \
-	| v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \
-	| visium \
-	| we32k \
-	| x86 | xc16x | xstormy16 | xtensa \
-	| z8k | z80)
-		basic_machine=$basic_machine-unknown
+	# Here we handle the default manufacturer of certain CPU types.  It is in
+	# some cases the only manufacturer, in others, it is the most popular.
+	w89k)
+		cpu=hppa1.1
+		vendor=winbond
 		;;
-	c54x)
-		basic_machine=tic54x-unknown
+	op50n)
+		cpu=hppa1.1
+		vendor=oki
 		;;
-	c55x)
-		basic_machine=tic55x-unknown
+	op60c)
+		cpu=hppa1.1
+		vendor=oki
 		;;
-	c6x)
-		basic_machine=tic6x-unknown
+	ibm*)
+		cpu=i370
+		vendor=ibm
+		;;
+	orion105)
+		cpu=clipper
+		vendor=highlevel
+		;;
+	mac | mpw | mac-mpw)
+		cpu=m68k
+		vendor=apple
+		;;
+	pmac | pmac-mpw)
+		cpu=powerpc
+		vendor=apple
+		;;
+
+	# Recognize the various machine names and aliases which stand
+	# for a CPU type and a company and sometimes even an OS.
+	3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc)
+		cpu=m68000
+		vendor=att
+		;;
+	3b*)
+		cpu=we32k
+		vendor=att
+		;;
+	bluegene*)
+		cpu=powerpc
+		vendor=ibm
+		basic_os=cnk
+		;;
+	decsystem10* | dec10*)
+		cpu=pdp10
+		vendor=dec
+		basic_os=tops10
+		;;
+	decsystem20* | dec20*)
+		cpu=pdp10
+		vendor=dec
+		basic_os=tops20
+		;;
+	delta | 3300 | motorola-3300 | motorola-delta \
+	      | 3300-motorola | delta-motorola)
+		cpu=m68k
+		vendor=motorola
+		;;
+	dpx2*)
+		cpu=m68k
+		vendor=bull
+		basic_os=sysv3
+		;;
+	encore | umax | mmax)
+		cpu=ns32k
+		vendor=encore
+		;;
+	elxsi)
+		cpu=elxsi
+		vendor=elxsi
+		basic_os=${basic_os:-bsd}
+		;;
+	fx2800)
+		cpu=i860
+		vendor=alliant
+		;;
+	genix)
+		cpu=ns32k
+		vendor=ns
+		;;
+	h3050r* | hiux*)
+		cpu=hppa1.1
+		vendor=hitachi
+		basic_os=hiuxwe2
+		;;
+	hp3k9[0-9][0-9] | hp9[0-9][0-9])
+		cpu=hppa1.0
+		vendor=hp
+		;;
+	hp9k2[0-9][0-9] | hp9k31[0-9])
+		cpu=m68000
+		vendor=hp
+		;;
+	hp9k3[2-9][0-9])
+		cpu=m68k
+		vendor=hp
+		;;
+	hp9k6[0-9][0-9] | hp6[0-9][0-9])
+		cpu=hppa1.0
+		vendor=hp
+		;;
+	hp9k7[0-79][0-9] | hp7[0-79][0-9])
+		cpu=hppa1.1
+		vendor=hp
+		;;
+	hp9k78[0-9] | hp78[0-9])
+		# FIXME: really hppa2.0-hp
+		cpu=hppa1.1
+		vendor=hp
+		;;
+	hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893)
+		# FIXME: really hppa2.0-hp
+		cpu=hppa1.1
+		vendor=hp
+		;;
+	hp9k8[0-9][13679] | hp8[0-9][13679])
+		cpu=hppa1.1
+		vendor=hp
+		;;
+	hp9k8[0-9][0-9] | hp8[0-9][0-9])
+		cpu=hppa1.0
+		vendor=hp
+		;;
+	i*86v32)
+		cpu=$(echo "$1" | sed -e 's/86.*/86/')
+		vendor=pc
+		basic_os=sysv32
+		;;
+	i*86v4*)
+		cpu=$(echo "$1" | sed -e 's/86.*/86/')
+		vendor=pc
+		basic_os=sysv4
+		;;
+	i*86v)
+		cpu=$(echo "$1" | sed -e 's/86.*/86/')
+		vendor=pc
+		basic_os=sysv
+		;;
+	i*86sol2)
+		cpu=$(echo "$1" | sed -e 's/86.*/86/')
+		vendor=pc
+		basic_os=solaris2
+		;;
+	j90 | j90-cray)
+		cpu=j90
+		vendor=cray
+		basic_os=${basic_os:-unicos}
+		;;
+	iris | iris4d)
+		cpu=mips
+		vendor=sgi
+		case $basic_os in
+		    irix*)
+			;;
+		    *)
+			basic_os=irix4
+			;;
+		esac
+		;;
+	miniframe)
+		cpu=m68000
+		vendor=convergent
+		;;
+	*mint | mint[0-9]* | *MiNT | *MiNT[0-9]*)
+		cpu=m68k
+		vendor=atari
+		basic_os=mint
+		;;
+	news-3600 | risc-news)
+		cpu=mips
+		vendor=sony
+		basic_os=newsos
+		;;
+	next | m*-next)
+		cpu=m68k
+		vendor=next
+		case $basic_os in
+		    openstep*)
+		        ;;
+		    nextstep*)
+			;;
+		    ns2*)
+		      basic_os=nextstep2
+			;;
+		    *)
+		      basic_os=nextstep3
+			;;
+		esac
+		;;
+	np1)
+		cpu=np1
+		vendor=gould
+		;;
+	op50n-* | op60c-*)
+		cpu=hppa1.1
+		vendor=oki
+		basic_os=proelf
+		;;
+	pa-hitachi)
+		cpu=hppa1.1
+		vendor=hitachi
+		basic_os=hiuxwe2
+		;;
+	pbd)
+		cpu=sparc
+		vendor=tti
+		;;
+	pbb)
+		cpu=m68k
+		vendor=tti
+		;;
+	pc532)
+		cpu=ns32k
+		vendor=pc532
+		;;
+	pn)
+		cpu=pn
+		vendor=gould
+		;;
+	power)
+		cpu=power
+		vendor=ibm
+		;;
+	ps2)
+		cpu=i386
+		vendor=ibm
+		;;
+	rm[46]00)
+		cpu=mips
+		vendor=siemens
+		;;
+	rtpc | rtpc-*)
+		cpu=romp
+		vendor=ibm
+		;;
+	sde)
+		cpu=mipsisa32
+		vendor=sde
+		basic_os=${basic_os:-elf}
+		;;
+	simso-wrs)
+		cpu=sparclite
+		vendor=wrs
+		basic_os=vxworks
+		;;
+	tower | tower-32)
+		cpu=m68k
+		vendor=ncr
+		;;
+	vpp*|vx|vx-*)
+		cpu=f301
+		vendor=fujitsu
+		;;
+	w65)
+		cpu=w65
+		vendor=wdc
+		;;
+	w89k-*)
+		cpu=hppa1.1
+		vendor=winbond
+		basic_os=proelf
+		;;
+	none)
+		cpu=none
+		vendor=none
 		;;
 	leon|leon[3-9])
-		basic_machine=sparc-$basic_machine
+		cpu=sparc
+		vendor=$basic_machine
 		;;
-	m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | nvptx | picochip)
-		basic_machine=$basic_machine-unknown
-		os=-none
-		;;
-	m88110 | m680[12346]0 | m683?2 | m68360 | m5200 | v70 | w65 | z8k)
-		;;
-	ms1)
-		basic_machine=mt-unknown
+	leon-*|leon[3-9]-*)
+		cpu=sparc
+		vendor=$(echo "$basic_machine" | sed 's/-.*//')
 		;;
 
-	strongarm | thumb | xscale)
-		basic_machine=arm-unknown
+	*-*)
+		# shellcheck disable=SC2162
+		IFS="-" read cpu vendor <<EOF
+$basic_machine
+EOF
 		;;
-	xgate)
-		basic_machine=$basic_machine-unknown
-		os=-none
-		;;
-	xscaleeb)
-		basic_machine=armeb-unknown
-		;;
-
-	xscaleel)
-		basic_machine=armel-unknown
-		;;
-
 	# We use `pc' rather than `unknown'
 	# because (1) that's what they normally are, and
 	# (2) the word "unknown" tends to confuse beginning users.
 	i*86 | x86_64)
-	  basic_machine=$basic_machine-pc
-	  ;;
-	# Object if more than one company name word.
-	*-*-*)
-		echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
-		exit 1
-		;;
-	# Recognize the basic CPU types with company name.
-	580-* \
-	| a29k-* \
-	| aarch64-* | aarch64_be-* \
-	| alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
-	| alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
-	| alphapca5[67]-* | alpha64pca5[67]-* | arc-* | arceb-* \
-	| arm-*  | armbe-* | armle-* | armeb-* | armv*-* \
-	| avr-* | avr32-* \
-	| ba-* \
-	| be32-* | be64-* \
-	| bfin-* | bs2000-* \
-	| c[123]* | c30-* | [cjt]90-* | c4x-* \
-	| c8051-* | clipper-* | craynv-* | cydra-* \
-	| d10v-* | d30v-* | dlx-* \
-	| e2k-* | elxsi-* \
-	| f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
-	| h8300-* | h8500-* \
-	| hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
-	| hexagon-* \
-	| i*86-* | i860-* | i960-* | ia64-* \
-	| ip2k-* | iq2000-* \
-	| k1om-* \
-	| le32-* | le64-* \
-	| lm32-* \
-	| m32c-* | m32r-* | m32rle-* \
-	| m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
-	| m88110-* | m88k-* | maxq-* | mcore-* | metag-* \
-	| microblaze-* | microblazeel-* \
-	| mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \
-	| mips16-* \
-	| mips64-* | mips64el-* \
-	| mips64octeon-* | mips64octeonel-* \
-	| mips64orion-* | mips64orionel-* \
-	| mips64r5900-* | mips64r5900el-* \
-	| mips64vr-* | mips64vrel-* \
-	| mips64vr4100-* | mips64vr4100el-* \
-	| mips64vr4300-* | mips64vr4300el-* \
-	| mips64vr5000-* | mips64vr5000el-* \
-	| mips64vr5900-* | mips64vr5900el-* \
-	| mipsisa32-* | mipsisa32el-* \
-	| mipsisa32r2-* | mipsisa32r2el-* \
-	| mipsisa32r6-* | mipsisa32r6el-* \
-	| mipsisa64-* | mipsisa64el-* \
-	| mipsisa64r2-* | mipsisa64r2el-* \
-	| mipsisa64r6-* | mipsisa64r6el-* \
-	| mipsisa64sb1-* | mipsisa64sb1el-* \
-	| mipsisa64sr71k-* | mipsisa64sr71kel-* \
-	| mipsr5900-* | mipsr5900el-* \
-	| mipstx39-* | mipstx39el-* \
-	| mmix-* \
-	| mt-* \
-	| msp430-* \
-	| nds32-* | nds32le-* | nds32be-* \
-	| nios-* | nios2-* | nios2eb-* | nios2el-* \
-	| none-* | np1-* | ns16k-* | ns32k-* \
-	| open8-* \
-	| or1k*-* \
-	| orion-* \
-	| pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
-	| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \
-	| pru-* \
-	| pyramid-* \
-	| riscv32-* | riscv64-* \
-	| rl78-* | romp-* | rs6000-* | rx-* \
-	| sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \
-	| shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
-	| sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \
-	| sparclite-* \
-	| sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx*-* \
-	| tahoe-* \
-	| tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \
-	| tile*-* \
-	| tron-* \
-	| ubicom32-* \
-	| v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \
-	| vax-* \
-	| visium-* \
-	| we32k-* \
-	| x86-* | x86_64-* | xc16x-* | xps100-* \
-	| xstormy16-* | xtensa*-* \
-	| ymp-* \
-	| z8k-* | z80-*)
-		;;
-	# Recognize the basic CPU types without company name, with glob match.
-	xtensa*)
-		basic_machine=$basic_machine-unknown
-		;;
-	# Recognize the various machine names and aliases which stand
-	# for a CPU type and a company and sometimes even an OS.
-	386bsd)
-		basic_machine=i386-unknown
-		os=-bsd
-		;;
-	3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc)
-		basic_machine=m68000-att
-		;;
-	3b*)
-		basic_machine=we32k-att
-		;;
-	a29khif)
-		basic_machine=a29k-amd
-		os=-udi
-		;;
-	abacus)
-		basic_machine=abacus-unknown
-		;;
-	adobe68k)
-		basic_machine=m68010-adobe
-		os=-scout
-		;;
-	alliant | fx80)
-		basic_machine=fx80-alliant
-		;;
-	altos | altos3068)
-		basic_machine=m68k-altos
-		;;
-	am29k)
-		basic_machine=a29k-none
-		os=-bsd
-		;;
-	amd64)
-		basic_machine=x86_64-pc
-		;;
-	amd64-*)
-		basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'`
-		;;
-	amdahl)
-		basic_machine=580-amdahl
-		os=-sysv
-		;;
-	amiga | amiga-*)
-		basic_machine=m68k-unknown
-		;;
-	amigaos | amigados)
-		basic_machine=m68k-unknown
-		os=-amigaos
-		;;
-	amigaunix | amix)
-		basic_machine=m68k-unknown
-		os=-sysv4
-		;;
-	apollo68)
-		basic_machine=m68k-apollo
-		os=-sysv
-		;;
-	apollo68bsd)
-		basic_machine=m68k-apollo
-		os=-bsd
-		;;
-	aros)
-		basic_machine=i386-pc
-		os=-aros
-		;;
-	asmjs)
-		basic_machine=asmjs-unknown
-		;;
-	aux)
-		basic_machine=m68k-apple
-		os=-aux
-		;;
-	balance)
-		basic_machine=ns32k-sequent
-		os=-dynix
-		;;
-	blackfin)
-		basic_machine=bfin-unknown
-		os=-linux
-		;;
-	blackfin-*)
-		basic_machine=bfin-`echo $basic_machine | sed 's/^[^-]*-//'`
-		os=-linux
-		;;
-	bluegene*)
-		basic_machine=powerpc-ibm
-		os=-cnk
-		;;
-	c54x-*)
-		basic_machine=tic54x-`echo $basic_machine | sed 's/^[^-]*-//'`
-		;;
-	c55x-*)
-		basic_machine=tic55x-`echo $basic_machine | sed 's/^[^-]*-//'`
-		;;
-	c6x-*)
-		basic_machine=tic6x-`echo $basic_machine | sed 's/^[^-]*-//'`
-		;;
-	c90)
-		basic_machine=c90-cray
-		os=-unicos
-		;;
-	cegcc)
-		basic_machine=arm-unknown
-		os=-cegcc
-		;;
-	convex-c1)
-		basic_machine=c1-convex
-		os=-bsd
-		;;
-	convex-c2)
-		basic_machine=c2-convex
-		os=-bsd
-		;;
-	convex-c32)
-		basic_machine=c32-convex
-		os=-bsd
-		;;
-	convex-c34)
-		basic_machine=c34-convex
-		os=-bsd
-		;;
-	convex-c38)
-		basic_machine=c38-convex
-		os=-bsd
-		;;
-	cray | j90)
-		basic_machine=j90-cray
-		os=-unicos
-		;;
-	craynv)
-		basic_machine=craynv-cray
-		os=-unicosmp
-		;;
-	cr16 | cr16-*)
-		basic_machine=cr16-unknown
-		os=-elf
-		;;
-	crds | unos)
-		basic_machine=m68k-crds
-		;;
-	crisv32 | crisv32-* | etraxfs*)
-		basic_machine=crisv32-axis
-		;;
-	cris | cris-* | etrax*)
-		basic_machine=cris-axis
-		;;
-	crx)
-		basic_machine=crx-unknown
-		os=-elf
-		;;
-	da30 | da30-*)
-		basic_machine=m68k-da30
-		;;
-	decstation | decstation-3100 | pmax | pmax-* | pmin | dec3100 | decstatn)
-		basic_machine=mips-dec
-		;;
-	decsystem10* | dec10*)
-		basic_machine=pdp10-dec
-		os=-tops10
-		;;
-	decsystem20* | dec20*)
-		basic_machine=pdp10-dec
-		os=-tops20
-		;;
-	delta | 3300 | motorola-3300 | motorola-delta \
-	      | 3300-motorola | delta-motorola)
-		basic_machine=m68k-motorola
-		;;
-	delta88)
-		basic_machine=m88k-motorola
-		os=-sysv3
-		;;
-	dicos)
-		basic_machine=i686-pc
-		os=-dicos
-		;;
-	djgpp)
-		basic_machine=i586-pc
-		os=-msdosdjgpp
-		;;
-	dpx20 | dpx20-*)
-		basic_machine=rs6000-bull
-		os=-bosx
-		;;
-	dpx2* | dpx2*-bull)
-		basic_machine=m68k-bull
-		os=-sysv3
-		;;
-	e500v[12])
-		basic_machine=powerpc-unknown
-		os=$os"spe"
-		;;
-	e500v[12]-*)
-		basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
-		os=$os"spe"
-		;;
-	ebmon29k)
-		basic_machine=a29k-amd
-		os=-ebmon
-		;;
-	elxsi)
-		basic_machine=elxsi-elxsi
-		os=-bsd
-		;;
-	encore | umax | mmax)
-		basic_machine=ns32k-encore
-		;;
-	es1800 | OSE68k | ose68k | ose | OSE)
-		basic_machine=m68k-ericsson
-		os=-ose
-		;;
-	fx2800)
-		basic_machine=i860-alliant
-		;;
-	genix)
-		basic_machine=ns32k-ns
-		;;
-	gmicro)
-		basic_machine=tron-gmicro
-		os=-sysv
-		;;
-	go32)
-		basic_machine=i386-pc
-		os=-go32
-		;;
-	h3050r* | hiux*)
-		basic_machine=hppa1.1-hitachi
-		os=-hiuxwe2
-		;;
-	h8300hms)
-		basic_machine=h8300-hitachi
-		os=-hms
-		;;
-	h8300xray)
-		basic_machine=h8300-hitachi
-		os=-xray
-		;;
-	h8500hms)
-		basic_machine=h8500-hitachi
-		os=-hms
-		;;
-	harris)
-		basic_machine=m88k-harris
-		os=-sysv3
-		;;
-	hp300-*)
-		basic_machine=m68k-hp
-		;;
-	hp300bsd)
-		basic_machine=m68k-hp
-		os=-bsd
-		;;
-	hp300hpux)
-		basic_machine=m68k-hp
-		os=-hpux
-		;;
-	hp3k9[0-9][0-9] | hp9[0-9][0-9])
-		basic_machine=hppa1.0-hp
-		;;
-	hp9k2[0-9][0-9] | hp9k31[0-9])
-		basic_machine=m68000-hp
-		;;
-	hp9k3[2-9][0-9])
-		basic_machine=m68k-hp
-		;;
-	hp9k6[0-9][0-9] | hp6[0-9][0-9])
-		basic_machine=hppa1.0-hp
-		;;
-	hp9k7[0-79][0-9] | hp7[0-79][0-9])
-		basic_machine=hppa1.1-hp
-		;;
-	hp9k78[0-9] | hp78[0-9])
-		# FIXME: really hppa2.0-hp
-		basic_machine=hppa1.1-hp
-		;;
-	hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893)
-		# FIXME: really hppa2.0-hp
-		basic_machine=hppa1.1-hp
-		;;
-	hp9k8[0-9][13679] | hp8[0-9][13679])
-		basic_machine=hppa1.1-hp
-		;;
-	hp9k8[0-9][0-9] | hp8[0-9][0-9])
-		basic_machine=hppa1.0-hp
-		;;
-	hppa-next)
-		os=-nextstep3
-		;;
-	hppaosf)
-		basic_machine=hppa1.1-hp
-		os=-osf
-		;;
-	hppro)
-		basic_machine=hppa1.1-hp
-		os=-proelf
-		;;
-	i370-ibm* | ibm*)
-		basic_machine=i370-ibm
-		;;
-	i*86v32)
-		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
-		os=-sysv32
-		;;
-	i*86v4*)
-		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
-		os=-sysv4
-		;;
-	i*86v)
-		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
-		os=-sysv
-		;;
-	i*86sol2)
-		basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
-		os=-solaris2
-		;;
-	i386mach)
-		basic_machine=i386-mach
-		os=-mach
-		;;
-	i386-vsta | vsta)
-		basic_machine=i386-unknown
-		os=-vsta
-		;;
-	iris | iris4d)
-		basic_machine=mips-sgi
-		case $os in
-		    -irix*)
-			;;
-		    *)
-			os=-irix4
-			;;
-		esac
-		;;
-	isi68 | isi)
-		basic_machine=m68k-isi
-		os=-sysv
-		;;
-	leon-*|leon[3-9]-*)
-		basic_machine=sparc-`echo $basic_machine | sed 's/-.*//'`
-		;;
-	m68knommu)
-		basic_machine=m68k-unknown
-		os=-linux
-		;;
-	m68knommu-*)
-		basic_machine=m68k-`echo $basic_machine | sed 's/^[^-]*-//'`
-		os=-linux
-		;;
-	m88k-omron*)
-		basic_machine=m88k-omron
-		;;
-	magnum | m3230)
-		basic_machine=mips-mips
-		os=-sysv
-		;;
-	merlin)
-		basic_machine=ns32k-utek
-		os=-sysv
-		;;
-	microblaze*)
-		basic_machine=microblaze-xilinx
-		;;
-	mingw64)
-		basic_machine=x86_64-pc
-		os=-mingw64
-		;;
-	mingw32)
-		basic_machine=i686-pc
-		os=-mingw32
-		;;
-	mingw32ce)
-		basic_machine=arm-unknown
-		os=-mingw32ce
-		;;
-	miniframe)
-		basic_machine=m68000-convergent
-		;;
-	*mint | -mint[0-9]* | *MiNT | *MiNT[0-9]*)
-		basic_machine=m68k-atari
-		os=-mint
-		;;
-	mips3*-*)
-		basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`
-		;;
-	mips3*)
-		basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown
-		;;
-	monitor)
-		basic_machine=m68k-rom68k
-		os=-coff
-		;;
-	morphos)
-		basic_machine=powerpc-unknown
-		os=-morphos
-		;;
-	moxiebox)
-		basic_machine=moxie-unknown
-		os=-moxiebox
-		;;
-	msdos)
-		basic_machine=i386-pc
-		os=-msdos
-		;;
-	ms1-*)
-		basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'`
-		;;
-	msys)
-		basic_machine=i686-pc
-		os=-msys
-		;;
-	mvs)
-		basic_machine=i370-ibm
-		os=-mvs
-		;;
-	nacl)
-		basic_machine=le32-unknown
-		os=-nacl
-		;;
-	ncr3000)
-		basic_machine=i486-ncr
-		os=-sysv4
-		;;
-	netbsd386)
-		basic_machine=i386-unknown
-		os=-netbsd
-		;;
-	netwinder)
-		basic_machine=armv4l-rebel
-		os=-linux
-		;;
-	news | news700 | news800 | news900)
-		basic_machine=m68k-sony
-		os=-newsos
-		;;
-	news1000)
-		basic_machine=m68030-sony
-		os=-newsos
-		;;
-	news-3600 | risc-news)
-		basic_machine=mips-sony
-		os=-newsos
-		;;
-	necv70)
-		basic_machine=v70-nec
-		os=-sysv
-		;;
-	next | m*-next )
-		basic_machine=m68k-next
-		case $os in
-		    -nextstep* )
-			;;
-		    -ns2*)
-		      os=-nextstep2
-			;;
-		    *)
-		      os=-nextstep3
-			;;
-		esac
-		;;
-	nh3000)
-		basic_machine=m68k-harris
-		os=-cxux
-		;;
-	nh[45]000)
-		basic_machine=m88k-harris
-		os=-cxux
-		;;
-	nindy960)
-		basic_machine=i960-intel
-		os=-nindy
-		;;
-	mon960)
-		basic_machine=i960-intel
-		os=-mon960
-		;;
-	nonstopux)
-		basic_machine=mips-compaq
-		os=-nonstopux
-		;;
-	np1)
-		basic_machine=np1-gould
-		;;
-	neo-tandem)
-		basic_machine=neo-tandem
-		;;
-	nse-tandem)
-		basic_machine=nse-tandem
-		;;
-	nsr-tandem)
-		basic_machine=nsr-tandem
-		;;
-	op50n-* | op60c-*)
-		basic_machine=hppa1.1-oki
-		os=-proelf
-		;;
-	openrisc | openrisc-*)
-		basic_machine=or32-unknown
-		;;
-	os400)
-		basic_machine=powerpc-ibm
-		os=-os400
-		;;
-	OSE68000 | ose68000)
-		basic_machine=m68000-ericsson
-		os=-ose
-		;;
-	os68k)
-		basic_machine=m68k-none
-		os=-os68k
-		;;
-	pa-hitachi)
-		basic_machine=hppa1.1-hitachi
-		os=-hiuxwe2
-		;;
-	paragon)
-		basic_machine=i860-intel
-		os=-osf
-		;;
-	parisc)
-		basic_machine=hppa-unknown
-		os=-linux
-		;;
-	parisc-*)
-		basic_machine=hppa-`echo $basic_machine | sed 's/^[^-]*-//'`
-		os=-linux
-		;;
-	pbd)
-		basic_machine=sparc-tti
-		;;
-	pbb)
-		basic_machine=m68k-tti
-		;;
-	pc532 | pc532-*)
-		basic_machine=ns32k-pc532
+		cpu=$basic_machine
+		vendor=pc
 		;;
+	# These rules are duplicated from below for sake of the special case above;
+	# i.e. things that normalized to x86 arches should also default to "pc"
 	pc98)
-		basic_machine=i386-pc
+		cpu=i386
+		vendor=pc
 		;;
-	pc98-*)
-		basic_machine=i386-`echo $basic_machine | sed 's/^[^-]*-//'`
+	x64 | amd64)
+		cpu=x86_64
+		vendor=pc
 		;;
-	pentium | p5 | k5 | k6 | nexgen | viac3)
-		basic_machine=i586-pc
+	# Recognize the basic CPU types without company name.
+	*)
+		cpu=$basic_machine
+		vendor=unknown
 		;;
-	pentiumpro | p6 | 6x86 | athlon | athlon_*)
-		basic_machine=i686-pc
+esac
+
+unset -v basic_machine
+
+# Decode basic machines in the full and proper CPU-Company form.
+case $cpu-$vendor in
+	# Here we handle the default manufacturer of certain CPU types in canonical form. It is in
+	# some cases the only manufacturer, in others, it is the most popular.
+	craynv-unknown)
+		vendor=cray
+		basic_os=${basic_os:-unicosmp}
 		;;
-	pentiumii | pentium2 | pentiumiii | pentium3)
-		basic_machine=i686-pc
+	c90-unknown | c90-cray)
+		vendor=cray
+		basic_os=${Basic_os:-unicos}
 		;;
-	pentium4)
-		basic_machine=i786-pc
+	fx80-unknown)
+		vendor=alliant
 		;;
-	pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*)
-		basic_machine=i586-`echo $basic_machine | sed 's/^[^-]*-//'`
+	romp-unknown)
+		vendor=ibm
 		;;
-	pentiumpro-* | p6-* | 6x86-* | athlon-*)
-		basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
+	mmix-unknown)
+		vendor=knuth
 		;;
-	pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*)
-		basic_machine=i686-`echo $basic_machine | sed 's/^[^-]*-//'`
+	microblaze-unknown | microblazeel-unknown)
+		vendor=xilinx
 		;;
-	pentium4-*)
-		basic_machine=i786-`echo $basic_machine | sed 's/^[^-]*-//'`
+	rs6000-unknown)
+		vendor=ibm
 		;;
-	pn)
-		basic_machine=pn-gould
+	vax-unknown)
+		vendor=dec
 		;;
-	power)	basic_machine=power-ibm
+	pdp11-unknown)
+		vendor=dec
 		;;
-	ppc | ppcbe)	basic_machine=powerpc-unknown
+	we32k-unknown)
+		vendor=att
 		;;
-	ppc-* | ppcbe-*)
-		basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'`
+	cydra-unknown)
+		vendor=cydrome
 		;;
-	ppcle | powerpclittle)
-		basic_machine=powerpcle-unknown
+	i370-ibm*)
+		vendor=ibm
 		;;
-	ppcle-* | powerpclittle-*)
-		basic_machine=powerpcle-`echo $basic_machine | sed 's/^[^-]*-//'`
+	orion-unknown)
+		vendor=highlevel
 		;;
-	ppc64)	basic_machine=powerpc64-unknown
-		;;
-	ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'`
-		;;
-	ppc64le | powerpc64little)
-		basic_machine=powerpc64le-unknown
-		;;
-	ppc64le-* | powerpc64little-*)
-		basic_machine=powerpc64le-`echo $basic_machine | sed 's/^[^-]*-//'`
-		;;
-	ps2)
-		basic_machine=i386-ibm
-		;;
-	pw32)
-		basic_machine=i586-unknown
-		os=-pw32
-		;;
-	rdos | rdos64)
-		basic_machine=x86_64-pc
-		os=-rdos
-		;;
-	rdos32)
-		basic_machine=i386-pc
-		os=-rdos
-		;;
-	rom68k)
-		basic_machine=m68k-rom68k
-		os=-coff
-		;;
-	rm[46]00)
-		basic_machine=mips-siemens
-		;;
-	rtpc | rtpc-*)
-		basic_machine=romp-ibm
-		;;
-	s390 | s390-*)
-		basic_machine=s390-ibm
-		;;
-	s390x | s390x-*)
-		basic_machine=s390x-ibm
-		;;
-	sa29200)
-		basic_machine=a29k-amd
-		os=-udi
-		;;
-	sb1)
-		basic_machine=mipsisa64sb1-unknown
-		;;
-	sb1el)
-		basic_machine=mipsisa64sb1el-unknown
-		;;
-	sde)
-		basic_machine=mipsisa32-sde
-		os=-elf
-		;;
-	sei)
-		basic_machine=mips-sei
-		os=-seiux
-		;;
-	sequent)
-		basic_machine=i386-sequent
-		;;
-	sh)
-		basic_machine=sh-hitachi
-		os=-hms
-		;;
-	sh5el)
-		basic_machine=sh5le-unknown
-		;;
-	sh64)
-		basic_machine=sh64-unknown
-		;;
-	sparclite-wrs | simso-wrs)
-		basic_machine=sparclite-wrs
-		os=-vxworks
-		;;
-	sps7)
-		basic_machine=m68k-bull
-		os=-sysv2
-		;;
-	spur)
-		basic_machine=spur-unknown
-		;;
-	st2000)
-		basic_machine=m68k-tandem
-		;;
-	stratus)
-		basic_machine=i860-stratus
-		os=-sysv4
-		;;
-	strongarm-* | thumb-*)
-		basic_machine=arm-`echo $basic_machine | sed 's/^[^-]*-//'`
-		;;
-	sun2)
-		basic_machine=m68000-sun
-		;;
-	sun2os3)
-		basic_machine=m68000-sun
-		os=-sunos3
-		;;
-	sun2os4)
-		basic_machine=m68000-sun
-		os=-sunos4
-		;;
-	sun3os3)
-		basic_machine=m68k-sun
-		os=-sunos3
-		;;
-	sun3os4)
-		basic_machine=m68k-sun
-		os=-sunos4
-		;;
-	sun4os3)
-		basic_machine=sparc-sun
-		os=-sunos3
-		;;
-	sun4os4)
-		basic_machine=sparc-sun
-		os=-sunos4
-		;;
-	sun4sol2)
-		basic_machine=sparc-sun
-		os=-solaris2
-		;;
-	sun3 | sun3-*)
-		basic_machine=m68k-sun
-		;;
-	sun4)
-		basic_machine=sparc-sun
-		;;
-	sun386 | sun386i | roadrunner)
-		basic_machine=i386-sun
-		;;
-	sv1)
-		basic_machine=sv1-cray
-		os=-unicos
-		;;
-	symmetry)
-		basic_machine=i386-sequent
-		os=-dynix
-		;;
-	t3e)
-		basic_machine=alphaev5-cray
-		os=-unicos
-		;;
-	t90)
-		basic_machine=t90-cray
-		os=-unicos
-		;;
-	tile*)
-		basic_machine=$basic_machine-unknown
-		os=-linux-gnu
-		;;
-	tx39)
-		basic_machine=mipstx39-unknown
-		;;
-	tx39el)
-		basic_machine=mipstx39el-unknown
-		;;
-	toad1)
-		basic_machine=pdp10-xkl
-		os=-tops20
-		;;
-	tower | tower-32)
-		basic_machine=m68k-ncr
-		;;
-	tpf)
-		basic_machine=s390x-ibm
-		os=-tpf
-		;;
-	udi29k)
-		basic_machine=a29k-amd
-		os=-udi
-		;;
-	ultra3)
-		basic_machine=a29k-nyu
-		os=-sym1
-		;;
-	v810 | necv810)
-		basic_machine=v810-nec
-		os=-none
-		;;
-	vaxv)
-		basic_machine=vax-dec
-		os=-sysv
-		;;
-	vms)
-		basic_machine=vax-dec
-		os=-vms
-		;;
-	vpp*|vx|vx-*)
-		basic_machine=f301-fujitsu
-		;;
-	vxworks960)
-		basic_machine=i960-wrs
-		os=-vxworks
-		;;
-	vxworks68)
-		basic_machine=m68k-wrs
-		os=-vxworks
-		;;
-	vxworks29k)
-		basic_machine=a29k-wrs
-		os=-vxworks
-		;;
-	w65*)
-		basic_machine=w65-wdc
-		os=-none
-		;;
-	w89k-*)
-		basic_machine=hppa1.1-winbond
-		os=-proelf
-		;;
-	xbox)
-		basic_machine=i686-pc
-		os=-mingw32
-		;;
-	xps | xps100)
-		basic_machine=xps100-honeywell
-		;;
-	xscale-* | xscalee[bl]-*)
-		basic_machine=`echo $basic_machine | sed 's/^xscale/arm/'`
-		;;
-	ymp)
-		basic_machine=ymp-cray
-		os=-unicos
-		;;
-	z8k-*-coff)
-		basic_machine=z8k-unknown
-		os=-sim
-		;;
-	z80-*-coff)
-		basic_machine=z80-unknown
-		os=-sim
-		;;
-	none)
-		basic_machine=none-none
-		os=-none
+	xps-unknown | xps100-unknown)
+		cpu=xps100
+		vendor=honeywell
 		;;
 
-# Here we handle the default manufacturer of certain CPU types.  It is in
-# some cases the only manufacturer, in others, it is the most popular.
-	w89k)
-		basic_machine=hppa1.1-winbond
+	# Here we normalize CPU types with a missing or matching vendor
+	dpx20-unknown | dpx20-bull)
+		cpu=rs6000
+		vendor=bull
+		basic_os=${basic_os:-bosx}
 		;;
-	op50n)
-		basic_machine=hppa1.1-oki
+
+	# Here we normalize CPU types irrespective of the vendor
+	amd64-*)
+		cpu=x86_64
 		;;
-	op60c)
-		basic_machine=hppa1.1-oki
+	blackfin-*)
+		cpu=bfin
+		basic_os=linux
 		;;
-	romp)
-		basic_machine=romp-ibm
+	c54x-*)
+		cpu=tic54x
 		;;
-	mmix)
-		basic_machine=mmix-knuth
+	c55x-*)
+		cpu=tic55x
 		;;
-	rs6000)
-		basic_machine=rs6000-ibm
+	c6x-*)
+		cpu=tic6x
 		;;
-	vax)
-		basic_machine=vax-dec
+	e500v[12]-*)
+		cpu=powerpc
+		basic_os=${basic_os}"spe"
 		;;
-	pdp10)
-		# there are many clones, so DEC is not a safe bet
-		basic_machine=pdp10-unknown
+	mips3*-*)
+		cpu=mips64
 		;;
-	pdp11)
-		basic_machine=pdp11-dec
+	ms1-*)
+		cpu=mt
 		;;
-	we32k)
-		basic_machine=we32k-att
+	m68knommu-*)
+		cpu=m68k
+		basic_os=linux
 		;;
-	sh[1234] | sh[24]a | sh[24]aeb | sh[34]eb | sh[1234]le | sh[23]ele)
-		basic_machine=sh-unknown
+	m9s12z-* | m68hcs12z-* | hcs12z-* | s12z-*)
+		cpu=s12z
 		;;
-	sparc | sparcv8 | sparcv9 | sparcv9b | sparcv9v)
-		basic_machine=sparc-sun
+	openrisc-*)
+		cpu=or32
 		;;
-	cydra)
-		basic_machine=cydra-cydrome
+	parisc-*)
+		cpu=hppa
+		basic_os=linux
 		;;
-	orion)
-		basic_machine=orion-highlevel
+	pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*)
+		cpu=i586
 		;;
-	orion105)
-		basic_machine=clipper-highlevel
+	pentiumpro-* | p6-* | 6x86-* | athlon-* | athalon_*-*)
+		cpu=i686
 		;;
-	mac | mpw | mac-mpw)
-		basic_machine=m68k-apple
+	pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*)
+		cpu=i686
 		;;
-	pmac | pmac-mpw)
-		basic_machine=powerpc-apple
+	pentium4-*)
+		cpu=i786
 		;;
-	*-unknown)
-		# Make sure to match an already-canonicalized machine name.
+	pc98-*)
+		cpu=i386
 		;;
+	ppc-* | ppcbe-*)
+		cpu=powerpc
+		;;
+	ppcle-* | powerpclittle-*)
+		cpu=powerpcle
+		;;
+	ppc64-*)
+		cpu=powerpc64
+		;;
+	ppc64le-* | powerpc64little-*)
+		cpu=powerpc64le
+		;;
+	sb1-*)
+		cpu=mipsisa64sb1
+		;;
+	sb1el-*)
+		cpu=mipsisa64sb1el
+		;;
+	sh5e[lb]-*)
+		cpu=$(echo "$cpu" | sed 's/^\(sh.\)e\(.\)$/\1\2e/')
+		;;
+	spur-*)
+		cpu=spur
+		;;
+	strongarm-* | thumb-*)
+		cpu=arm
+		;;
+	tx39-*)
+		cpu=mipstx39
+		;;
+	tx39el-*)
+		cpu=mipstx39el
+		;;
+	x64-*)
+		cpu=x86_64
+		;;
+	xscale-* | xscalee[bl]-*)
+		cpu=$(echo "$cpu" | sed 's/^xscale/arm/')
+		;;
+	arm64-*)
+		cpu=aarch64
+		;;
+
+	# Recognize the canonical CPU Types that limit and/or modify the
+	# company names they are paired with.
+	cr16-*)
+		basic_os=${basic_os:-elf}
+		;;
+	crisv32-* | etraxfs*-*)
+		cpu=crisv32
+		vendor=axis
+		;;
+	cris-* | etrax*-*)
+		cpu=cris
+		vendor=axis
+		;;
+	crx-*)
+		basic_os=${basic_os:-elf}
+		;;
+	neo-tandem)
+		cpu=neo
+		vendor=tandem
+		;;
+	nse-tandem)
+		cpu=nse
+		vendor=tandem
+		;;
+	nsr-tandem)
+		cpu=nsr
+		vendor=tandem
+		;;
+	nsv-tandem)
+		cpu=nsv
+		vendor=tandem
+		;;
+	nsx-tandem)
+		cpu=nsx
+		vendor=tandem
+		;;
+	mipsallegrexel-sony)
+		cpu=mipsallegrexel
+		vendor=sony
+		;;
+	tile*-*)
+		basic_os=${basic_os:-linux-gnu}
+		;;
+
 	*)
-		echo Invalid configuration \`$1\': machine \`$basic_machine\' not recognized 1>&2
-		exit 1
+		# Recognize the canonical CPU types that are allowed with any
+		# company name.
+		case $cpu in
+			1750a | 580 \
+			| a29k \
+			| aarch64 | aarch64_be \
+			| abacus \
+			| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] \
+			| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] \
+			| alphapca5[67] | alpha64pca5[67] \
+			| am33_2.0 \
+			| amdgcn \
+			| arc | arceb \
+			| arm | arm[lb]e | arme[lb] | armv* \
+			| avr | avr32 \
+			| asmjs \
+			| ba \
+			| be32 | be64 \
+			| bfin | bpf | bs2000 \
+			| c[123]* | c30 | [cjt]90 | c4x \
+			| c8051 | clipper | craynv | csky | cydra \
+			| d10v | d30v | dlx | dsp16xx \
+			| e2k | elxsi | epiphany \
+			| f30[01] | f700 | fido | fr30 | frv | ft32 | fx80 \
+			| h8300 | h8500 \
+			| hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
+			| hexagon \
+			| i370 | i*86 | i860 | i960 | ia16 | ia64 \
+			| ip2k | iq2000 \
+			| k1om \
+			| le32 | le64 \
+			| lm32 \
+			| m32c | m32r | m32rle \
+			| m5200 | m68000 | m680[012346]0 | m68360 | m683?2 | m68k \
+			| m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x \
+			| m88110 | m88k | maxq | mb | mcore | mep | metag \
+			| microblaze | microblazeel \
+			| mips | mipsbe | mipseb | mipsel | mipsle \
+			| mips16 \
+			| mips64 | mips64eb | mips64el \
+			| mips64octeon | mips64octeonel \
+			| mips64orion | mips64orionel \
+			| mips64r5900 | mips64r5900el \
+			| mips64vr | mips64vrel \
+			| mips64vr4100 | mips64vr4100el \
+			| mips64vr4300 | mips64vr4300el \
+			| mips64vr5000 | mips64vr5000el \
+			| mips64vr5900 | mips64vr5900el \
+			| mipsisa32 | mipsisa32el \
+			| mipsisa32r2 | mipsisa32r2el \
+			| mipsisa32r6 | mipsisa32r6el \
+			| mipsisa64 | mipsisa64el \
+			| mipsisa64r2 | mipsisa64r2el \
+			| mipsisa64r6 | mipsisa64r6el \
+			| mipsisa64sb1 | mipsisa64sb1el \
+			| mipsisa64sr71k | mipsisa64sr71kel \
+			| mipsr5900 | mipsr5900el \
+			| mipstx39 | mipstx39el \
+			| mmix \
+			| mn10200 | mn10300 \
+			| moxie \
+			| mt \
+			| msp430 \
+			| nds32 | nds32le | nds32be \
+			| nfp \
+			| nios | nios2 | nios2eb | nios2el \
+			| none | np1 | ns16k | ns32k | nvptx \
+			| open8 \
+			| or1k* \
+			| or32 \
+			| orion \
+			| picochip \
+			| pdp10 | pdp11 | pj | pjl | pn | power \
+			| powerpc | powerpc64 | powerpc64le | powerpcle | powerpcspe \
+			| pru \
+			| pyramid \
+			| riscv | riscv32 | riscv64 \
+			| rl78 | romp | rs6000 | rx \
+			| s390 | s390x \
+			| score \
+			| sh | shl \
+			| sh[1234] | sh[24]a | sh[24]ae[lb] | sh[23]e | she[lb] | sh[lb]e \
+			| sh[1234]e[lb] |  sh[12345][lb]e | sh[23]ele | sh64 | sh64le \
+			| sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet \
+			| sparclite \
+			| sparcv8 | sparcv9 | sparcv9b | sparcv9v | sv1 | sx* \
+			| spu \
+			| tahoe \
+			| tic30 | tic4x | tic54x | tic55x | tic6x | tic80 \
+			| tron \
+			| ubicom32 \
+			| v70 | v850 | v850e | v850e1 | v850es | v850e2 | v850e2v3 \
+			| vax \
+			| visium \
+			| w65 \
+			| wasm32 | wasm64 \
+			| we32k \
+			| x86 | x86_64 | xc16x | xgate | xps100 \
+			| xstormy16 | xtensa* \
+			| ymp \
+			| z8k | z80)
+				;;
+
+			*)
+				echo Invalid configuration \`"$1"\': machine \`"$cpu-$vendor"\' not recognized 1>&2
+				exit 1
+				;;
+		esac
 		;;
 esac
 
 # Here we canonicalize certain aliases for manufacturers.
-case $basic_machine in
-	*-digital*)
-		basic_machine=`echo $basic_machine | sed 's/digital.*/dec/'`
+case $vendor in
+	digital*)
+		vendor=dec
 		;;
-	*-commodore*)
-		basic_machine=`echo $basic_machine | sed 's/commodore.*/cbm/'`
+	commodore*)
+		vendor=cbm
 		;;
 	*)
 		;;
@@ -1355,203 +1278,213 @@ esac
 
 # Decode manufacturer-specific aliases for certain operating systems.
 
-if [ x"$os" != x"" ]
+if test x$basic_os != x
 then
+
+# First recognize some ad-hoc caes, or perhaps split kernel-os, or else just
+# set os.
+case $basic_os in
+	gnu/linux*)
+		kernel=linux
+		os=$(echo $basic_os | sed -e 's|gnu/linux|gnu|')
+		;;
+	os2-emx)
+		kernel=os2
+		os=$(echo $basic_os | sed -e 's|os2-emx|emx|')
+		;;
+	nto-qnx*)
+		kernel=nto
+		os=$(echo $basic_os | sed -e 's|nto-qnx|qnx|')
+		;;
+	*-*)
+		# shellcheck disable=SC2162
+		IFS="-" read kernel os <<EOF
+$basic_os
+EOF
+		;;
+	# Default OS when just kernel was specified
+	nto*)
+		kernel=nto
+		os=$(echo $basic_os | sed -e 's|nto|qnx|')
+		;;
+	linux*)
+		kernel=linux
+		os=$(echo $basic_os | sed -e 's|linux|gnu|')
+		;;
+	*)
+		kernel=
+		os=$basic_os
+		;;
+esac
+
+# Now, normalize the OS (knowing we just have one component, it's not a kernel,
+# etc.)
 case $os in
-	# First match some system type aliases
-	# that might get confused with valid system types.
-	# -solaris* is a basic system type, with this one exception.
-	-auroraux)
-		os=-auroraux
+	# First match some system type aliases that might get confused
+	# with valid system types.
+	# solaris* is a basic system type, with this one exception.
+	auroraux)
+		os=auroraux
 		;;
-	-solaris1 | -solaris1.*)
-		os=`echo $os | sed -e 's|solaris1|sunos4|'`
+	bluegene*)
+		os=cnk
 		;;
-	-solaris)
-		os=-solaris2
+	solaris1 | solaris1.*)
+		os=$(echo $os | sed -e 's|solaris1|sunos4|')
 		;;
-	-svr4*)
-		os=-sysv4
+	solaris)
+		os=solaris2
 		;;
-	-unixware*)
-		os=-sysv4.2uw
+	unixware*)
+		os=sysv4.2uw
 		;;
-	-gnu/linux*)
-		os=`echo $os | sed -e 's|gnu/linux|linux-gnu|'`
+	# es1800 is here to avoid being matched by es* (a different OS)
+	es1800*)
+		os=ose
 		;;
-	# First accept the basic system types.
-	# The portable systems comes first.
-	# Each alternative MUST END IN A *, to match a version number.
-	# -sysv* is not here because it comes later, after sysvr4.
-	-gnu* | -bsd* | -mach* | -minix* | -genix* | -ultrix* | -irix* \
-	      | -*vms* | -sco* | -esix* | -isc* | -aix* | -cnk* | -sunos | -sunos[34]*\
-	      | -hpux* | -unos* | -osf* | -luna* | -dgux* | -auroraux* | -solaris* \
-	      | -sym* | -kopensolaris* | -plan9* \
-	      | -amigaos* | -amigados* | -msdos* | -newsos* | -unicos* | -aof* \
-	      | -aos* | -aros* | -cloudabi* | -sortix* \
-	      | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
-	      | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
-	      | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \
-	      | -bitrig* | -openbsd* | -solidbsd* | -libertybsd* \
-	      | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \
-	      | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
-	      | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
-	      | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
-	      | -chorusos* | -chorusrdb* | -cegcc* \
-	      | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
-	      | -midipix* | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \
-	      | -linux-newlib* | -linux-musl* | -linux-uclibc* \
-	      | -uxpv* | -beos* | -mpeix* | -udk* | -moxiebox* \
-	      | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
-	      | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
-	      | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \
-	      | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \
-	      | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \
-	      | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \
-	      | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es* \
-	      | -onefs* | -tirtos* | -phoenix* | -fuchsia*)
-	# Remember, each alternative MUST END IN *, to match a version number.
+	# Some version numbers need modification
+	chorusos*)
+		os=chorusos
 		;;
-	-qnx*)
-		case $basic_machine in
-		    x86-* | i*86-*)
+	isc)
+		os=isc2.2
+		;;
+	sco6)
+		os=sco5v6
+		;;
+	sco5)
+		os=sco3.2v5
+		;;
+	sco4)
+		os=sco3.2v4
+		;;
+	sco3.2.[4-9]*)
+		os=$(echo $os | sed -e 's/sco3.2./sco3.2v/')
+		;;
+	sco*v* | scout)
+		# Don't match below
+		;;
+	sco*)
+		os=sco3.2v2
+		;;
+	psos*)
+		os=psos
+		;;
+	qnx*)
+		os=qnx
+		;;
+	hiux*)
+		os=hiuxwe2
+		;;
+	lynx*178)
+		os=lynxos178
+		;;
+	lynx*5)
+		os=lynxos5
+		;;
+	lynxos*)
+		# don't get caught up in next wildcard
+		;;
+	lynx*)
+		os=lynxos
+		;;
+	mac[0-9]*)
+		os=$(echo "$os" | sed -e 's|mac|macos|')
+		;;
+	opened*)
+		os=openedition
+		;;
+	os400*)
+		os=os400
+		;;
+	sunos5*)
+		os=$(echo "$os" | sed -e 's|sunos5|solaris2|')
+		;;
+	sunos6*)
+		os=$(echo "$os" | sed -e 's|sunos6|solaris3|')
+		;;
+	wince*)
+		os=wince
+		;;
+	utek*)
+		os=bsd
+		;;
+	dynix*)
+		os=bsd
+		;;
+	acis*)
+		os=aos
+		;;
+	atheos*)
+		os=atheos
+		;;
+	syllable*)
+		os=syllable
+		;;
+	386bsd)
+		os=bsd
+		;;
+	ctix* | uts*)
+		os=sysv
+		;;
+	nova*)
+		os=rtmk-nova
+		;;
+	ns2)
+		os=nextstep2
+		;;
+	# Preserve the version number of sinix5.
+	sinix5.*)
+		os=$(echo $os | sed -e 's|sinix|sysv|')
+		;;
+	sinix*)
+		os=sysv4
+		;;
+	tpf*)
+		os=tpf
+		;;
+	triton*)
+		os=sysv3
+		;;
+	oss*)
+		os=sysv3
+		;;
+	svr4*)
+		os=sysv4
+		;;
+	svr3)
+		os=sysv3
+		;;
+	sysvr4)
+		os=sysv4
+		;;
+	ose*)
+		os=ose
+		;;
+	*mint | mint[0-9]* | *MiNT | MiNT[0-9]*)
+		os=mint
+		;;
+	dicos*)
+		os=dicos
+		;;
+	pikeos*)
+		# Until real need of OS specific support for
+		# particular features comes up, bare metal
+		# configurations are quite functional.
+		case $cpu in
+		    arm*)
+			os=eabi
 			;;
 		    *)
-			os=-nto$os
+			os=elf
 			;;
 		esac
 		;;
-	-nto-qnx*)
-		;;
-	-nto*)
-		os=`echo $os | sed -e 's|nto|nto-qnx|'`
-		;;
-	-sim | -es1800* | -hms* | -xray | -os68k* | -none* | -v88r* \
-	      | -windows* | -osx | -abug | -netware* | -os9* | -beos* | -haiku* \
-	      | -macos* | -mpw* | -magic* | -mmixware* | -mon960* | -lnews*)
-		;;
-	-mac*)
-		os=`echo $os | sed -e 's|mac|macos|'`
-		;;
-	-linux-dietlibc)
-		os=-linux-dietlibc
-		;;
-	-linux*)
-		os=`echo $os | sed -e 's|linux|linux-gnu|'`
-		;;
-	-sunos5*)
-		os=`echo $os | sed -e 's|sunos5|solaris2|'`
-		;;
-	-sunos6*)
-		os=`echo $os | sed -e 's|sunos6|solaris3|'`
-		;;
-	-opened*)
-		os=-openedition
-		;;
-	-os400*)
-		os=-os400
-		;;
-	-wince*)
-		os=-wince
-		;;
-	-osfrose*)
-		os=-osfrose
-		;;
-	-osf*)
-		os=-osf
-		;;
-	-utek*)
-		os=-bsd
-		;;
-	-dynix*)
-		os=-bsd
-		;;
-	-acis*)
-		os=-aos
-		;;
-	-atheos*)
-		os=-atheos
-		;;
-	-syllable*)
-		os=-syllable
-		;;
-	-386bsd)
-		os=-bsd
-		;;
-	-ctix* | -uts*)
-		os=-sysv
-		;;
-	-nova*)
-		os=-rtmk-nova
-		;;
-	-ns2 )
-		os=-nextstep2
-		;;
-	-nsk*)
-		os=-nsk
-		;;
-	# Preserve the version number of sinix5.
-	-sinix5.*)
-		os=`echo $os | sed -e 's|sinix|sysv|'`
-		;;
-	-sinix*)
-		os=-sysv4
-		;;
-	-tpf*)
-		os=-tpf
-		;;
-	-triton*)
-		os=-sysv3
-		;;
-	-oss*)
-		os=-sysv3
-		;;
-	-svr4)
-		os=-sysv4
-		;;
-	-svr3)
-		os=-sysv3
-		;;
-	-sysvr4)
-		os=-sysv4
-		;;
-	# This must come after -sysvr4.
-	-sysv*)
-		;;
-	-ose*)
-		os=-ose
-		;;
-	-es1800*)
-		os=-ose
-		;;
-	-xenix)
-		os=-xenix
-		;;
-	-*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
-		os=-mint
-		;;
-	-aros*)
-		os=-aros
-		;;
-	-zvmoe)
-		os=-zvmoe
-		;;
-	-dicos*)
-		os=-dicos
-		;;
-	-nacl*)
-		;;
-	-ios)
-		;;
-	-none)
-		;;
 	*)
-		# Get rid of the `-' at the beginning of $os.
-		os=`echo $os | sed 's/[^-]*-//'`
-		echo Invalid configuration \`$1\': system \`$os\' not recognized 1>&2
-		exit 1
+		# No normalization, but not necessarily accepted, that comes below.
 		;;
 esac
+
 else
 
 # Here we handle the default operating systems that come with various machines.
@@ -1564,261 +1497,356 @@ else
 # will signal an error saying that MANUFACTURER isn't an operating
 # system, and we'll never get to this point.
 
-case $basic_machine in
+kernel=
+case $cpu-$vendor in
 	score-*)
-		os=-elf
+		os=elf
 		;;
 	spu-*)
-		os=-elf
+		os=elf
 		;;
 	*-acorn)
-		os=-riscix1.2
+		os=riscix1.2
 		;;
 	arm*-rebel)
-		os=-linux
+		kernel=linux
+		os=gnu
 		;;
 	arm*-semi)
-		os=-aout
+		os=aout
 		;;
 	c4x-* | tic4x-*)
-		os=-coff
+		os=coff
 		;;
 	c8051-*)
-		os=-elf
+		os=elf
+		;;
+	clipper-intergraph)
+		os=clix
 		;;
 	hexagon-*)
-		os=-elf
+		os=elf
 		;;
 	tic54x-*)
-		os=-coff
+		os=coff
 		;;
 	tic55x-*)
-		os=-coff
+		os=coff
 		;;
 	tic6x-*)
-		os=-coff
+		os=coff
 		;;
 	# This must come before the *-dec entry.
 	pdp10-*)
-		os=-tops20
+		os=tops20
 		;;
 	pdp11-*)
-		os=-none
+		os=none
 		;;
 	*-dec | vax-*)
-		os=-ultrix4.2
+		os=ultrix4.2
 		;;
 	m68*-apollo)
-		os=-domain
+		os=domain
 		;;
 	i386-sun)
-		os=-sunos4.0.2
+		os=sunos4.0.2
 		;;
 	m68000-sun)
-		os=-sunos3
+		os=sunos3
 		;;
 	m68*-cisco)
-		os=-aout
+		os=aout
 		;;
 	mep-*)
-		os=-elf
+		os=elf
 		;;
 	mips*-cisco)
-		os=-elf
+		os=elf
 		;;
 	mips*-*)
-		os=-elf
+		os=elf
 		;;
 	or32-*)
-		os=-coff
+		os=coff
 		;;
 	*-tti)	# must be before sparc entry or we get the wrong os.
-		os=-sysv3
+		os=sysv3
 		;;
 	sparc-* | *-sun)
-		os=-sunos4.1.1
+		os=sunos4.1.1
+		;;
+	pru-*)
+		os=elf
 		;;
 	*-be)
-		os=-beos
-		;;
-	*-haiku)
-		os=-haiku
+		os=beos
 		;;
 	*-ibm)
-		os=-aix
+		os=aix
 		;;
 	*-knuth)
-		os=-mmixware
+		os=mmixware
 		;;
 	*-wec)
-		os=-proelf
+		os=proelf
 		;;
 	*-winbond)
-		os=-proelf
+		os=proelf
 		;;
 	*-oki)
-		os=-proelf
+		os=proelf
 		;;
 	*-hp)
-		os=-hpux
+		os=hpux
 		;;
 	*-hitachi)
-		os=-hiux
+		os=hiux
 		;;
 	i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent)
-		os=-sysv
+		os=sysv
 		;;
 	*-cbm)
-		os=-amigaos
+		os=amigaos
 		;;
 	*-dg)
-		os=-dgux
+		os=dgux
 		;;
 	*-dolphin)
-		os=-sysv3
+		os=sysv3
 		;;
 	m68k-ccur)
-		os=-rtu
+		os=rtu
 		;;
 	m88k-omron*)
-		os=-luna
-		;;
-	*-next )
-		os=-nextstep
-		;;
-	*-sequent)
-		os=-ptx
-		;;
-	*-crds)
-		os=-unos
-		;;
-	*-ns)
-		os=-genix
-		;;
-	i370-*)
-		os=-mvs
+		os=luna
 		;;
 	*-next)
-		os=-nextstep3
+		os=nextstep
+		;;
+	*-sequent)
+		os=ptx
+		;;
+	*-crds)
+		os=unos
+		;;
+	*-ns)
+		os=genix
+		;;
+	i370-*)
+		os=mvs
 		;;
 	*-gould)
-		os=-sysv
+		os=sysv
 		;;
 	*-highlevel)
-		os=-bsd
+		os=bsd
 		;;
 	*-encore)
-		os=-bsd
+		os=bsd
 		;;
 	*-sgi)
-		os=-irix
+		os=irix
 		;;
 	*-siemens)
-		os=-sysv4
+		os=sysv4
 		;;
 	*-masscomp)
-		os=-rtu
+		os=rtu
 		;;
 	f30[01]-fujitsu | f700-fujitsu)
-		os=-uxpv
+		os=uxpv
 		;;
 	*-rom68k)
-		os=-coff
+		os=coff
 		;;
 	*-*bug)
-		os=-coff
+		os=coff
 		;;
 	*-apple)
-		os=-macos
+		os=macos
 		;;
 	*-atari*)
-		os=-mint
+		os=mint
+		;;
+	*-wrs)
+		os=vxworks
 		;;
 	*)
-		os=-none
+		os=none
 		;;
 esac
+
 fi
 
+# Now, validate our (potentially fixed-up) OS.
+case $os in
+	# Sometimes we do "kernel-abi", so those need to count as OSes.
+	musl* | newlib* | uclibc*)
+		;;
+	# Likewise for "kernel-libc"
+	eabi | eabihf | gnueabi | gnueabihf)
+		;;
+	# Now accept the basic system types.
+	# The portable systems comes first.
+	# Each alternative MUST end in a * to match a version number.
+	gnu* | android* | bsd* | mach* | minix* | genix* | ultrix* | irix* \
+	     | *vms* | esix* | aix* | cnk* | sunos | sunos[34]* \
+	     | hpux* | unos* | osf* | luna* | dgux* | auroraux* | solaris* \
+	     | sym* |  plan9* | psp* | sim* | xray* | os68k* | v88r* \
+	     | hiux* | abug | nacl* | netware* | windows* \
+	     | os9* | macos* | osx* | ios* \
+	     | mpw* | magic* | mmixware* | mon960* | lnews* \
+	     | amigaos* | amigados* | msdos* | newsos* | unicos* | aof* \
+	     | aos* | aros* | cloudabi* | sortix* | twizzler* \
+	     | nindy* | vxsim* | vxworks* | ebmon* | hms* | mvs* \
+	     | clix* | riscos* | uniplus* | iris* | isc* | rtu* | xenix* \
+	     | mirbsd* | netbsd* | dicos* | openedition* | ose* \
+	     | bitrig* | openbsd* | solidbsd* | libertybsd* | os108* \
+	     | ekkobsd* | freebsd* | riscix* | lynxos* | os400* \
+	     | bosx* | nextstep* | cxux* | aout* | elf* | oabi* \
+	     | ptx* | coff* | ecoff* | winnt* | domain* | vsta* \
+	     | udi* | lites* | ieee* | go32* | aux* | hcos* \
+	     | chorusrdb* | cegcc* | glidix* \
+	     | cygwin* | msys* | pe* | moss* | proelf* | rtems* \
+	     | midipix* | mingw32* | mingw64* | mint* \
+	     | uxpv* | beos* | mpeix* | udk* | moxiebox* \
+	     | interix* | uwin* | mks* | rhapsody* | darwin* \
+	     | openstep* | oskit* | conix* | pw32* | nonstopux* \
+	     | storm-chaos* | tops10* | tenex* | tops20* | its* \
+	     | os2* | vos* | palmos* | uclinux* | nucleus* | morphos* \
+	     | scout* | superux* | sysv* | rtmk* | tpf* | windiss* \
+	     | powermax* | dnix* | nx6 | nx7 | sei* | dragonfly* \
+	     | skyos* | haiku* | rdos* | toppers* | drops* | es* \
+	     | onefs* | tirtos* | phoenix* | fuchsia* | redox* | bme* \
+	     | midnightbsd* | amdhsa* | unleashed* | emscripten* | wasi* \
+	     | nsk* | powerunix* | genode* | zvmoe* | qnx* | emx*)
+		;;
+	# This one is extra strict with allowed versions
+	sco3.2v2 | sco3.2v[4-9]* | sco5v6*)
+		# Don't forget version if it is 3.2v4 or newer.
+		;;
+	none)
+		;;
+	*)
+		echo Invalid configuration \`"$1"\': OS \`"$os"\' not recognized 1>&2
+		exit 1
+		;;
+esac
+
+# As a final step for OS-related things, validate the OS-kernel combination
+# (given a valid OS), if there is a kernel.
+case $kernel-$os in
+	linux-gnu* | linux-dietlibc* | linux-android* | linux-newlib* | linux-musl* | linux-uclibc* )
+		;;
+	uclinux-uclibc* )
+		;;
+	-dietlibc* | -newlib* | -musl* | -uclibc* )
+		# These are just libc implementations, not actual OSes, and thus
+		# require a kernel.
+		echo "Invalid configuration \`$1': libc \`$os' needs explicit kernel." 1>&2
+		exit 1
+		;;
+	kfreebsd*-gnu* | kopensolaris*-gnu*)
+		;;
+	nto-qnx*)
+		;;
+	os2-emx)
+		;;
+	*-eabi* | *-gnueabi*)
+		;;
+	-*)
+		# Blank kernel with real OS is always fine.
+		;;
+	*-*)
+		echo "Invalid configuration \`$1': Kernel \`$kernel' not known to work with OS \`$os'." 1>&2
+		exit 1
+		;;
+esac
+
 # Here we handle the case where we know the os, and the CPU type, but not the
 # manufacturer.  We pick the logical manufacturer.
-vendor=unknown
-case $basic_machine in
-	*-unknown)
-		case $os in
-			-riscix*)
+case $vendor in
+	unknown)
+		case $cpu-$os in
+			*-riscix*)
 				vendor=acorn
 				;;
-			-sunos*)
+			*-sunos*)
 				vendor=sun
 				;;
-			-cnk*|-aix*)
+			*-cnk* | *-aix*)
 				vendor=ibm
 				;;
-			-beos*)
+			*-beos*)
 				vendor=be
 				;;
-			-hpux*)
+			*-hpux*)
 				vendor=hp
 				;;
-			-mpeix*)
+			*-mpeix*)
 				vendor=hp
 				;;
-			-hiux*)
+			*-hiux*)
 				vendor=hitachi
 				;;
-			-unos*)
+			*-unos*)
 				vendor=crds
 				;;
-			-dgux*)
+			*-dgux*)
 				vendor=dg
 				;;
-			-luna*)
+			*-luna*)
 				vendor=omron
 				;;
-			-genix*)
+			*-genix*)
 				vendor=ns
 				;;
-			-mvs* | -opened*)
+			*-clix*)
+				vendor=intergraph
+				;;
+			*-mvs* | *-opened*)
 				vendor=ibm
 				;;
-			-os400*)
+			*-os400*)
 				vendor=ibm
 				;;
-			-ptx*)
+			s390-* | s390x-*)
+				vendor=ibm
+				;;
+			*-ptx*)
 				vendor=sequent
 				;;
-			-tpf*)
+			*-tpf*)
 				vendor=ibm
 				;;
-			-vxsim* | -vxworks* | -windiss*)
+			*-vxsim* | *-vxworks* | *-windiss*)
 				vendor=wrs
 				;;
-			-aux*)
+			*-aux*)
 				vendor=apple
 				;;
-			-hms*)
+			*-hms*)
 				vendor=hitachi
 				;;
-			-mpw* | -macos*)
+			*-mpw* | *-macos*)
 				vendor=apple
 				;;
-			-*mint | -mint[0-9]* | -*MiNT | -MiNT[0-9]*)
+			*-*mint | *-mint[0-9]* | *-*MiNT | *-MiNT[0-9]*)
 				vendor=atari
 				;;
-			-vos*)
+			*-vos*)
 				vendor=stratus
 				;;
 		esac
-		basic_machine=`echo $basic_machine | sed "s/unknown/$vendor/"`
 		;;
 esac
 
-echo $basic_machine$os
+echo "$cpu-$vendor-${kernel:+$kernel-}$os"
 exit
 
 # Local variables:
-# eval: (add-hook 'write-file-hooks 'time-stamp)
+# eval: (add-hook 'before-save-hook 'time-stamp)
 # time-stamp-start: "timestamp='"
 # time-stamp-format: "%:y-%02m-%02d"
 # time-stamp-end: "'"

From 95f0a77fdef6573dc581cc92279f6d9acefa3ebf Mon Sep 17 00:00:00 2001
From: David Carlier <devnexen@gmail.com>
Date: Mon, 2 Nov 2020 20:29:48 +0000
Subject: [PATCH 1909/2608] Detect pthread_getname_np explicitly.

At least one libc (musl) defines pthread_setname_np without defining
pthread_getname_np. Detect the presence of each individually, rather than
inferring both must be defined if set is.
---
 configure.ac                                  | 31 +++++++++++++++++++
 .../internal/jemalloc_internal_defs.h.in      |  6 ++++
 src/prof_sys.c                                |  5 ++-
 3 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 1e6de8a8..eeceb12f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1745,6 +1745,37 @@ dnl Check if we have dlsym support.
   if test "x${je_cv_pthread_setname_np}" = "xyes" ; then
     AC_DEFINE([JEMALLOC_HAVE_PTHREAD_SETNAME_NP], [ ])
   fi
+  dnl Check if pthread_getname_np is not necessarily present despite
+  dnl the pthread_setname_np counterpart
+  JE_COMPILABLE([pthread_getname_np(3)], [
+#include <pthread.h>
+#include <stdlib.h>
+], [
+  {
+  	char *name = malloc(16);
+  	pthread_getname_np(pthread_self(), name, 16);
+	free(name);
+  }
+], [je_cv_pthread_getname_np])
+  if test "x${je_cv_pthread_getname_np}" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_HAVE_PTHREAD_GETNAME_NP], [ ])
+  fi
+  dnl Check if pthread_get_name_np is not necessarily present despite
+  dnl the pthread_set_name_np counterpart
+  JE_COMPILABLE([pthread_get_name_np(3)], [
+#include <pthread.h>
+#include <pthread_np.h>
+#include <stdlib.h>
+], [
+  {
+  	char *name = malloc(16);
+  	pthread_get_name_np(pthread_self(), name, 16);
+	free(name);
+  }
+], [je_cv_pthread_get_name_np])
+  if test "x${je_cv_pthread_get_name_np}" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_HAVE_PTHREAD_GET_NAME_NP], [ ])
+  fi
 fi
 
 JE_APPEND_VS(CPPFLAGS, -D_REENTRANT)
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 5ea1a191..bcc35596 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -85,6 +85,12 @@
 /* Defined if pthread_setname_np(3) is available. */
 #undef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
 
+/* Defined if pthread_getname_np(3) is available. */
+#undef JEMALLOC_HAVE_PTHREAD_GETNAME_NP
+
+/* Defined if pthread_get_name_np(3) is available. */
+#undef JEMALLOC_HAVE_PTHREAD_GET_NAME_NP
+
 /*
  * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available.
  */
diff --git a/src/prof_sys.c b/src/prof_sys.c
index 777ef1d2..87cd2b2f 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -292,8 +292,11 @@ void prof_unwind_init() {
 
 static int
 prof_sys_thread_name_read_impl(char *buf, size_t limit) {
-#ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
+#if defined(JEMALLOC_HAVE_PTHREAD_GETNAME_NP)
 	return pthread_getname_np(pthread_self(), buf, limit);
+#elif defined(JEMALLOC_HAVE_PTHREAD_GET_NAME_NP)
+	pthread_get_name_np(pthread_self(), buf, limit);
+	return 0;
 #else
 	return ENOSYS;
 #endif

From b4c37a6e81ef2e0286b66a0bc9fc09060690c9a5 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 4 Nov 2020 16:00:52 -0800
Subject: [PATCH 1910/2608] Rename edata_tree_t -> edata_avail_t.

This isn't a tree any more, and it mildly irritates me any time I see it.
---
 include/jemalloc/internal/edata.h       | 4 ++--
 include/jemalloc/internal/edata_cache.h | 2 +-
 src/edata.c                             | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index 632c6c32..5ec12beb 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -69,7 +69,7 @@ struct edata_map_info_s {
 
 /* Extent (span of pages).  Use accessor functions for e_* fields. */
 typedef struct edata_s edata_t;
-typedef ph(edata_t) edata_tree_t;
+typedef ph(edata_t) edata_avail_t;
 typedef ph(edata_t) edata_heap_t;
 typedef ph(edata_t) edata_age_heap_t;
 struct edata_s {
@@ -723,7 +723,7 @@ edata_age_comp(const edata_t *a, const edata_t *b) {
 	return edata_snad_comp(a, b);
 }
 
-ph_proto(, edata_avail_, edata_tree_t, edata_t)
+ph_proto(, edata_avail_, edata_avail_t, edata_t)
 ph_proto(, edata_heap_, edata_heap_t, edata_t)
 ph_proto(, edata_age_heap_, edata_age_heap_t, edata_t);
 
diff --git a/include/jemalloc/internal/edata_cache.h b/include/jemalloc/internal/edata_cache.h
index f7d0c319..9a54df0e 100644
--- a/include/jemalloc/internal/edata_cache.h
+++ b/include/jemalloc/internal/edata_cache.h
@@ -21,7 +21,7 @@
 
 typedef struct edata_cache_s edata_cache_t;
 struct edata_cache_s {
-	edata_tree_t avail;
+	edata_avail_t avail;
 	atomic_zu_t count;
 	malloc_mutex_t mtx;
 	base_t *base;
diff --git a/src/edata.c b/src/edata.c
index 214e993e..a6597312 100644
--- a/src/edata.c
+++ b/src/edata.c
@@ -1,7 +1,7 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
-ph_gen(, edata_avail_, edata_tree_t, edata_t, ph_link,
+ph_gen(, edata_avail_, edata_avail_t, edata_t, ph_link,
     edata_esnead_comp)
 ph_gen(, edata_heap_, edata_heap_t, edata_t, ph_link, edata_snad_comp)
 ph_gen(, edata_age_heap_, edata_age_heap_t, edata_t, ph_link, edata_age_comp)

From 4ca3d91e96c316d3baf67ce4846c164819e2697c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 6 Nov 2020 14:38:17 -0800
Subject: [PATCH 1911/2608] Rename geom_grow -> exp_grow.

This was promised in the review of the introduction of geom_grow, but would have
been painful to do there because of the series that introduced it.  Now that
those are comitted, renaming is easier.
---
 Makefile.in                                   |  2 +-
 .../internal/{geom_grow.h => exp_grow.h}      | 28 +++++++++----------
 include/jemalloc/internal/hpa.h               |  4 +--
 include/jemalloc/internal/pac.h               |  4 +--
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |  1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |  3 ++
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |  1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |  3 ++
 src/exp_grow.c                                |  8 ++++++
 src/extent.c                                  |  8 +++---
 src/geom_grow.c                               |  8 ------
 src/hpa.c                                     |  6 ++--
 src/pac.c                                     |  6 ++--
 test/unit/retained.c                          |  2 +-
 14 files changed, 46 insertions(+), 38 deletions(-)
 rename include/jemalloc/internal/{geom_grow.h => exp_grow.h} (56%)
 create mode 100644 src/exp_grow.c
 delete mode 100644 src/geom_grow.c

diff --git a/Makefile.in b/Makefile.in
index 34df2398..ca9b17b3 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -114,10 +114,10 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/ehooks.c \
 	$(srcroot)src/emap.c \
 	$(srcroot)src/eset.c \
+	$(srcroot)src/exp_grow.c \
 	$(srcroot)src/extent.c \
 	$(srcroot)src/extent_dss.c \
 	$(srcroot)src/extent_mmap.c \
-	$(srcroot)src/geom_grow.c \
 	$(srcroot)src/hook.c \
 	$(srcroot)src/hpa.c \
 	$(srcroot)src/hpa_central.c \
diff --git a/include/jemalloc/internal/geom_grow.h b/include/jemalloc/internal/exp_grow.h
similarity index 56%
rename from include/jemalloc/internal/geom_grow.h
rename to include/jemalloc/internal/exp_grow.h
index ba83386f..8566b8a4 100644
--- a/include/jemalloc/internal/geom_grow.h
+++ b/include/jemalloc/internal/exp_grow.h
@@ -1,8 +1,8 @@
-#ifndef JEMALLOC_INTERNAL_ECACHE_GROW_H
-#define JEMALLOC_INTERNAL_ECACHE_GROW_H
+#ifndef JEMALLOC_INTERNAL_EXP_GROW_H
+#define JEMALLOC_INTERNAL_EXP_GROW_H
 
-typedef struct geom_grow_s geom_grow_t;
-struct geom_grow_s {
+typedef struct exp_grow_s exp_grow_t;
+struct exp_grow_s {
 	/*
 	 * Next extent size class in a growing series to use when satisfying a
 	 * request via the extent hooks (only if opt_retain).  This limits the
@@ -19,32 +19,32 @@ struct geom_grow_s {
 };
 
 static inline bool
-geom_grow_size_prepare(geom_grow_t *geom_grow, size_t alloc_size_min,
+exp_grow_size_prepare(exp_grow_t *exp_grow, size_t alloc_size_min,
     size_t *r_alloc_size, pszind_t *r_skip) {
 	*r_skip = 0;
-	*r_alloc_size = sz_pind2sz(geom_grow->next + *r_skip);
+	*r_alloc_size = sz_pind2sz(exp_grow->next + *r_skip);
 	while (*r_alloc_size < alloc_size_min) {
 		(*r_skip)++;
-		if (geom_grow->next + *r_skip  >=
+		if (exp_grow->next + *r_skip  >=
 		    sz_psz2ind(SC_LARGE_MAXCLASS)) {
 			/* Outside legal range. */
 			return true;
 		}
-		*r_alloc_size = sz_pind2sz(geom_grow->next + *r_skip);
+		*r_alloc_size = sz_pind2sz(exp_grow->next + *r_skip);
 	}
 	return false;
 }
 
 static inline void
-geom_grow_size_commit(geom_grow_t *geom_grow, pszind_t skip) {
-	if (geom_grow->next + skip + 1 <= geom_grow->limit) {
-		geom_grow->next += skip + 1;
+exp_grow_size_commit(exp_grow_t *exp_grow, pszind_t skip) {
+	if (exp_grow->next + skip + 1 <= exp_grow->limit) {
+		exp_grow->next += skip + 1;
 	} else {
-		geom_grow->next = geom_grow->limit;
+		exp_grow->next = exp_grow->limit;
 	}
 
 }
 
-void geom_grow_init(geom_grow_t *geom_grow);
+void exp_grow_init(exp_grow_t *exp_grow);
 
-#endif /* JEMALLOC_INTERNAL_ECACHE_GROW_H */
+#endif /* JEMALLOC_INTERNAL_EXP_GROW_H */
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 1cef6e5d..159f0d02 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -1,7 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_HPA_H
 #define JEMALLOC_INTERNAL_HPA_H
 
-#include "jemalloc/internal/geom_grow.h"
+#include "jemalloc/internal/exp_grow.h"
 #include "jemalloc/internal/hpa_central.h"
 #include "jemalloc/internal/pai.h"
 #include "jemalloc/internal/psset.h"
@@ -29,7 +29,7 @@ struct hpa_s {
 	 * small finite number of allocations from it.
 	 */
 	edata_cache_t *edata_cache;
-	geom_grow_t geom_grow;
+	exp_grow_t exp_grow;
 };
 
 /* Used only by CTL; not actually stored here (i.e., all derived). */
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index 614d34a5..b998b69a 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -1,7 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_PAC_H
 #define JEMALLOC_INTERNAL_PAC_H
 
-#include "jemalloc/internal/geom_grow.h"
+#include "jemalloc/internal/exp_grow.h"
 #include "jemalloc/internal/pai.h"
 
 
@@ -95,7 +95,7 @@ struct pac_s {
 	edata_cache_t *edata_cache;
 
 	/* The grow info for the retained ecache. */
-	geom_grow_t geom_grow;
+	exp_grow_t exp_grow;
 	malloc_mutex_t grow_mtx;
 
 	/*
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index f14f87ff..2d6b4b6e 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -54,6 +54,7 @@
     <ClCompile Include="..\..\..\..\src\ehooks.c" />
     <ClCompile Include="..\..\..\..\src\emap.c" />
     <ClCompile Include="..\..\..\..\src\eset.c" />
+    <ClCompile Include="..\..\..\..\src\exp_grow.c" />
     <ClCompile Include="..\..\..\..\src\extent.c" />
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 689a520c..e3b7e0c5 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -46,6 +46,9 @@
     <ClCompile Include="..\..\..\..\src\emap.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\exp_grow.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\extent.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 30c6b295..33d87a44 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -54,6 +54,7 @@
     <ClCompile Include="..\..\..\..\src\ehooks.c" />
     <ClCompile Include="..\..\..\..\src\emap.c" />
     <ClCompile Include="..\..\..\..\src\eset.c" />
+    <ClCompile Include="..\..\..\..\src\exp_grow.c" />
     <ClCompile Include="..\..\..\..\src\extent.c" />
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 689a520c..e3b7e0c5 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -46,6 +46,9 @@
     <ClCompile Include="..\..\..\..\src\emap.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\exp_grow.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\extent.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/exp_grow.c b/src/exp_grow.c
new file mode 100644
index 00000000..386471f4
--- /dev/null
+++ b/src/exp_grow.c
@@ -0,0 +1,8 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+void
+exp_grow_init(exp_grow_t *exp_grow) {
+	exp_grow->next = sz_psz2ind(HUGEPAGE);
+	exp_grow->limit = sz_psz2ind(SC_LARGE_MAXCLASS);
+}
diff --git a/src/extent.c b/src/extent.c
index e9c76eb6..c7dcc2e9 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -626,9 +626,9 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	 * satisfy this request.
 	 */
 	size_t alloc_size;
-	pszind_t geom_grow_skip;
-	bool err = geom_grow_size_prepare(&pac->geom_grow, alloc_size_min,
-	    &alloc_size, &geom_grow_skip);
+	pszind_t exp_grow_skip;
+	bool err = exp_grow_size_prepare(&pac->exp_grow, alloc_size_min,
+	    &alloc_size, &exp_grow_skip);
 	if (err) {
 		goto label_err;
 	}
@@ -724,7 +724,7 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	 * range.
 	 */
 	/* All opportunities for failure are past. */
-	geom_grow_size_commit(&pac->geom_grow, geom_grow_skip);
+	exp_grow_size_commit(&pac->exp_grow, exp_grow_skip);
 	malloc_mutex_unlock(tsdn, &pac->grow_mtx);
 
 	if (config_prof) {
diff --git a/src/geom_grow.c b/src/geom_grow.c
deleted file mode 100644
index 4816bb7f..00000000
--- a/src/geom_grow.c
+++ /dev/null
@@ -1,8 +0,0 @@
-#include "jemalloc/internal/jemalloc_preamble.h"
-#include "jemalloc/internal/jemalloc_internal_includes.h"
-
-void
-geom_grow_init(geom_grow_t *geom_grow) {
-	geom_grow->next = sz_psz2ind(HUGEPAGE);
-	geom_grow->limit = sz_psz2ind(SC_LARGE_MAXCLASS);
-}
diff --git a/src/hpa.c b/src/hpa.c
index b329dbbb..8029e0bd 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -43,7 +43,7 @@ hpa_init(hpa_t *hpa, base_t *base, emap_t *emap, edata_cache_t *edata_cache) {
 	hpa->ind = base_ind_get(base);
 	hpa->edata_cache = edata_cache;
 
-	geom_grow_init(&hpa->geom_grow);
+	exp_grow_init(&hpa->exp_grow);
 
 	return false;
 }
@@ -132,7 +132,7 @@ hpa_alloc_central(tsdn_t *tsdn, hpa_shard_t *shard, size_t size_min,
 
 	size_t hugepage_goal_min = HUGEPAGE_CEILING(size_goal);
 
-	err = geom_grow_size_prepare(&hpa->geom_grow, hugepage_goal_min,
+	err = exp_grow_size_prepare(&hpa->exp_grow, hugepage_goal_min,
 	    &alloc_size, &skip);
 	if (err) {
 		malloc_mutex_unlock(tsdn, &hpa->grow_mtx);
@@ -183,7 +183,7 @@ hpa_alloc_central(tsdn_t *tsdn, hpa_shard_t *shard, size_t size_min,
 	malloc_mutex_unlock(tsdn, &hpa->mtx);
 
 	if (!err) {
-		geom_grow_size_commit(&hpa->geom_grow, skip);
+		exp_grow_size_commit(&hpa->exp_grow, skip);
 	}
 	malloc_mutex_unlock(tsdn, &hpa->grow_mtx);
 	edata_arena_ind_set(edata, shard->ind);
diff --git a/src/pac.c b/src/pac.c
index f50e82b0..07c9d23d 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -68,7 +68,7 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
 	    ind, /* delay_coalesce */ false)) {
 		return true;
 	}
-	geom_grow_init(&pac->geom_grow);
+	exp_grow_init(&pac->exp_grow);
 	if (malloc_mutex_init(&pac->grow_mtx, "extent_grow",
 	    WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) {
 		return true;
@@ -207,10 +207,10 @@ pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
 
 	malloc_mutex_lock(tsdn, &pac->grow_mtx);
 	if (old_limit != NULL) {
-		*old_limit = sz_pind2sz(pac->geom_grow.limit);
+		*old_limit = sz_pind2sz(pac->exp_grow.limit);
 	}
 	if (new_limit != NULL) {
-		pac->geom_grow.limit = new_ind;
+		pac->exp_grow.limit = new_ind;
 	}
 	malloc_mutex_unlock(tsdn, &pac->grow_mtx);
 
diff --git a/test/unit/retained.c b/test/unit/retained.c
index 80ee8cdf..9ad9940e 100644
--- a/test/unit/retained.c
+++ b/test/unit/retained.c
@@ -143,7 +143,7 @@ TEST_BEGIN(test_retained) {
 		size_t usable = 0;
 		size_t fragmented = 0;
 		for (pszind_t pind = sz_psz2ind(HUGEPAGE); pind <
-		    arena->pa_shard.pac.geom_grow.next; pind++) {
+		    arena->pa_shard.pac.exp_grow.next; pind++) {
 			size_t psz = sz_pind2sz(pind);
 			size_t psz_fragmented = psz % esz;
 			size_t psz_usable = psz - psz_fragmented;

From cf2549a149dc27eefef1101500cd9ee743e477a0 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 11 Nov 2020 13:34:43 -0800
Subject: [PATCH 1912/2608] Add a per-arena oversize_threshold.

This can let manual arenas trade off memory and CPU the way auto arenas do.
---
 Makefile.in                     |   1 +
 include/jemalloc/internal/pa.h  |   3 +-
 include/jemalloc/internal/pac.h |   8 +-
 src/arena.c                     |   2 +-
 src/ctl.c                       |  38 +++++++++
 src/extent.c                    |   5 +-
 src/pa.c                        |   7 +-
 src/pac.c                       |   7 +-
 test/unit/oversize_threshold.c  | 131 ++++++++++++++++++++++++++++++++
 test/unit/pa.c                  |   4 +-
 10 files changed, 194 insertions(+), 12 deletions(-)
 create mode 100644 test/unit/oversize_threshold.c

diff --git a/Makefile.in b/Makefile.in
index ca9b17b3..03dbbdf5 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -229,6 +229,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/mq.c \
 	$(srcroot)test/unit/mtx.c \
 	$(srcroot)test/unit/nstime.c \
+	$(srcroot)test/unit/oversize_threshold.c \
 	$(srcroot)test/unit/pa.c \
 	$(srcroot)test/unit/pack.c \
 	$(srcroot)test/unit/pages.c \
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 5e97d0b0..f1823e6b 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -123,7 +123,8 @@ pa_shard_ehooks_get(pa_shard_t *shard) {
 /* Returns true on error. */
 bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
     unsigned ind, pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx,
-    nstime_t *cur_time, ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms);
+    nstime_t *cur_time, size_t oversize_threshold, ssize_t dirty_decay_ms,
+    ssize_t muzzy_decay_ms);
 
 /*
  * This isn't exposed to users; we allow late enablement of the HPA shard so
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index b998b69a..6d4dfbaf 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -98,6 +98,9 @@ struct pac_s {
 	exp_grow_t exp_grow;
 	malloc_mutex_t grow_mtx;
 
+	/* How large extents should be before getting auto-purged. */
+	atomic_zu_t oversize_threshold;
+
 	/*
 	 * Decay-based purging state, responsible for scheduling extent state
 	 * transitions.
@@ -115,8 +118,9 @@ struct pac_s {
 };
 
 bool pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
-    edata_cache_t *edata_cache, nstime_t *cur_time, ssize_t dirty_decay_ms,
-    ssize_t muzzy_decay_ms, pac_stats_t *pac_stats, malloc_mutex_t *stats_mtx);
+    edata_cache_t *edata_cache, nstime_t *cur_time, size_t oversize_threshold,
+    ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms, pac_stats_t *pac_stats,
+    malloc_mutex_t *stats_mtx);
 bool pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
     size_t *new_limit);
 void pac_stats_merge(tsdn_t *tsdn, pac_t *pac, pac_stats_t *pac_stats_out,
diff --git a/src/arena.c b/src/arena.c
index 360827ef..7099713a 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1500,7 +1500,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	nstime_init_update(&cur_time);
 	if (pa_shard_init(tsdn, &arena->pa_shard, &arena_emap_global, base, ind,
 	    &arena->stats.pa_shard_stats, LOCKEDINT_MTX(arena->stats.mtx),
-	    &cur_time, arena_dirty_decay_ms_default_get(),
+	    &cur_time, oversize_threshold, arena_dirty_decay_ms_default_get(),
 	    arena_muzzy_decay_ms_default_get())) {
 		goto label_error;
 	}
diff --git a/src/ctl.c b/src/ctl.c
index d5dd1d16..4bb422a2 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -151,6 +151,7 @@ CTL_PROTO(arena_i_purge)
 CTL_PROTO(arena_i_reset)
 CTL_PROTO(arena_i_destroy)
 CTL_PROTO(arena_i_dss)
+CTL_PROTO(arena_i_oversize_threshold)
 CTL_PROTO(arena_i_dirty_decay_ms)
 CTL_PROTO(arena_i_muzzy_decay_ms)
 CTL_PROTO(arena_i_extent_hooks)
@@ -431,6 +432,11 @@ static const ctl_named_node_t arena_i_node[] = {
 	{NAME("reset"),		CTL(arena_i_reset)},
 	{NAME("destroy"),	CTL(arena_i_destroy)},
 	{NAME("dss"),		CTL(arena_i_dss)},
+	/*
+	 * Undocumented for now, since we anticipate an arena API in flux after
+	 * we cut the last 5-series release.
+	 */
+	{NAME("oversize_threshold"), CTL(arena_i_oversize_threshold)},
 	{NAME("dirty_decay_ms"), CTL(arena_i_dirty_decay_ms)},
 	{NAME("muzzy_decay_ms"), CTL(arena_i_muzzy_decay_ms)},
 	{NAME("extent_hooks"),	CTL(arena_i_extent_hooks)},
@@ -2530,6 +2536,38 @@ label_return:
 	return ret;
 }
 
+static int
+arena_i_oversize_threshold_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+
+	unsigned arena_ind;
+	MIB_UNSIGNED(arena_ind, 1);
+
+	arena_t *arena = arena_get(tsd_tsdn(tsd), arena_ind, false);
+	if (arena == NULL) {
+		ret = EFAULT;
+		goto label_return;
+	}
+
+	if (oldp != NULL && oldlenp != NULL) {
+		size_t oldval = atomic_load_zu(
+		    &arena->pa_shard.pac.oversize_threshold, ATOMIC_RELAXED);
+		READ(oldval, size_t);
+	}
+	if (newp != NULL) {
+		if (newlen != sizeof(size_t)) {
+			ret = EINVAL;
+			goto label_return;
+		}
+		atomic_store_zu(&arena->pa_shard.pac.oversize_threshold,
+		    *(size_t *)newp, ATOMIC_RELAXED);
+	}
+	ret = 0;
+label_return:
+	return ret;
+}
+
 static int
 arena_i_decay_ms_ctl_impl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen, bool dirty) {
diff --git a/src/extent.c b/src/extent.c
index c7dcc2e9..378bc733 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -983,8 +983,9 @@ extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 			edata = extent_try_coalesce_large(tsdn, pac, ehooks,
 			    ecache, edata, &coalesced, growing_retained);
 		} while (coalesced);
-		if (edata_size_get(edata) >= oversize_threshold &&
-		    extent_may_force_decay(pac)) {
+		if (edata_size_get(edata) >=
+		    atomic_load_zu(&pac->oversize_threshold, ATOMIC_RELAXED)
+		    && extent_may_force_decay(pac)) {
 			/* Shortcut to purge the oversize extent eagerly. */
 			malloc_mutex_unlock(tsdn, &ecache->mtx);
 			extent_maximally_purge(tsdn, pac, ehooks, edata);
diff --git a/src/pa.c b/src/pa.c
index aee7bcd8..e5fcbb7b 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -17,7 +17,8 @@ pa_nactive_sub(pa_shard_t *shard, size_t sub_pages) {
 bool
 pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
     unsigned ind, pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx,
-    nstime_t *cur_time, ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
+    nstime_t *cur_time, size_t oversize_threshold, ssize_t dirty_decay_ms,
+    ssize_t muzzy_decay_ms) {
 	/* This will change eventually, but for now it should hold. */
 	assert(base_ind_get(base) == ind);
 	if (edata_cache_init(&shard->edata_cache, base)) {
@@ -25,8 +26,8 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 	}
 
 	if (pac_init(tsdn, &shard->pac, base, emap, &shard->edata_cache,
-	    cur_time, dirty_decay_ms, muzzy_decay_ms, &stats->pac_stats,
-	    stats_mtx)) {
+	    cur_time, oversize_threshold, dirty_decay_ms, muzzy_decay_ms,
+	    &stats->pac_stats, stats_mtx)) {
 		return true;
 	}
 
diff --git a/src/pac.c b/src/pac.c
index 07c9d23d..80646155 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -37,8 +37,9 @@ pac_decay_data_get(pac_t *pac, extent_state_t state,
 
 bool
 pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
-    edata_cache_t *edata_cache, nstime_t *cur_time, ssize_t dirty_decay_ms,
-    ssize_t muzzy_decay_ms, pac_stats_t *pac_stats, malloc_mutex_t *stats_mtx) {
+    edata_cache_t *edata_cache, nstime_t *cur_time, size_t oversize_threshold,
+    ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms, pac_stats_t *pac_stats,
+    malloc_mutex_t *stats_mtx) {
 	unsigned ind = base_ind_get(base);
 	/*
 	 * Delay coalescing for dirty extents despite the disruptive effect on
@@ -73,6 +74,8 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
 	    WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
+	atomic_store_zu(&pac->oversize_threshold, oversize_threshold,
+	    ATOMIC_RELAXED);
 	if (decay_init(&pac->decay_dirty, cur_time, dirty_decay_ms)) {
 		return true;
 	}
diff --git a/test/unit/oversize_threshold.c b/test/unit/oversize_threshold.c
new file mode 100644
index 00000000..e374b142
--- /dev/null
+++ b/test/unit/oversize_threshold.c
@@ -0,0 +1,131 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/ctl.h"
+
+static void
+arena_mallctl(const char *mallctl_str, unsigned arena, void *oldp,
+    size_t *oldlen, void *newp, size_t newlen) {
+	int err;
+	char buf[100];
+	malloc_snprintf(buf, sizeof(buf), mallctl_str, arena);
+
+	err = mallctl(buf, oldp, oldlen, newp, newlen);
+	expect_d_eq(0, err, "Mallctl failed; %s", buf);
+}
+
+TEST_BEGIN(test_oversize_threshold_get_set) {
+	int err;
+	size_t old_threshold;
+	size_t new_threshold;
+	size_t threshold_sz = sizeof(old_threshold);
+
+	unsigned arena;
+	size_t arena_sz = sizeof(arena);
+	err = mallctl("arenas.create", (void *)&arena, &arena_sz, NULL, 0);
+	expect_d_eq(0, err, "Arena creation failed");
+
+	/* Just a write. */
+	new_threshold = 1024 * 1024;
+	arena_mallctl("arena.%u.oversize_threshold", arena, NULL, NULL,
+	    &new_threshold, threshold_sz);
+
+	/* Read and write */
+	new_threshold = 2 * 1024 * 1024;
+	arena_mallctl("arena.%u.oversize_threshold", arena, &old_threshold,
+	    &threshold_sz, &new_threshold, threshold_sz);
+	expect_zu_eq(1024 * 1024, old_threshold, "Should have read old value");
+
+	/* Just a read */
+	arena_mallctl("arena.%u.oversize_threshold", arena, &old_threshold,
+	    &threshold_sz, NULL, 0);
+	expect_zu_eq(2 * 1024 * 1024, old_threshold, "Should have read old value");
+}
+TEST_END
+
+static size_t max_purged = 0;
+static bool
+purge_forced_record_max(extent_hooks_t* hooks, void *addr, size_t sz,
+    size_t offset, size_t length, unsigned arena_ind) {
+	if (length > max_purged) {
+		max_purged = length;
+	}
+	return false;
+}
+
+static bool
+dalloc_record_max(extent_hooks_t *extent_hooks, void *addr, size_t sz,
+    bool comitted, unsigned arena_ind) {
+	if (sz > max_purged) {
+		max_purged = sz;
+	}
+	return false;
+}
+
+extent_hooks_t max_recording_extent_hooks;
+
+TEST_BEGIN(test_oversize_threshold) {
+	max_recording_extent_hooks = ehooks_default_extent_hooks;
+	max_recording_extent_hooks.purge_forced = &purge_forced_record_max;
+	max_recording_extent_hooks.dalloc = &dalloc_record_max;
+
+	extent_hooks_t *extent_hooks = &max_recording_extent_hooks;
+
+	int err;
+
+	unsigned arena;
+	size_t arena_sz = sizeof(arena);
+	err = mallctl("arenas.create", (void *)&arena, &arena_sz, NULL, 0);
+	expect_d_eq(0, err, "Arena creation failed");
+	arena_mallctl("arena.%u.extent_hooks", arena, NULL, NULL, &extent_hooks,
+	    sizeof(extent_hooks));
+
+	/*
+	 * This test will fundamentally race with purging, since we're going to
+	 * check the dirty stats to see if our oversized allocation got purged.
+	 * We don't want other purging to happen accidentally.  We can't just
+	 * disable purging entirely, though, since that will also disable
+	 * oversize purging.  Just set purging intervals to be very large.
+	 */
+	ssize_t decay_ms = 100 * 1000;
+	ssize_t decay_ms_sz = sizeof(decay_ms);
+	arena_mallctl("arena.%u.dirty_decay_ms", arena, NULL, NULL, &decay_ms,
+	    decay_ms_sz);
+	arena_mallctl("arena.%u.muzzy_decay_ms", arena, NULL, NULL, &decay_ms,
+	    decay_ms_sz);
+
+	/* Clean everything out. */
+	arena_mallctl("arena.%u.purge", arena, NULL, NULL, NULL, 0);
+	max_purged = 0;
+
+	/* Set threshold to 1MB. */
+	size_t threshold = 1024 * 1024;
+	size_t threshold_sz = sizeof(threshold);
+	arena_mallctl("arena.%u.oversize_threshold", arena, NULL, NULL,
+	    &threshold, threshold_sz);
+
+	/* Allocating and freeing half a megabyte should leave them dirty. */
+	void *ptr = mallocx(512 * 1024, MALLOCX_ARENA(arena));
+	dallocx(ptr, MALLOCX_TCACHE_NONE);
+	expect_zu_lt(max_purged, 512 * 1024, "Expected no 512k purge");
+
+	/* Purge again to reset everything out. */
+	arena_mallctl("arena.%u.purge", arena, NULL, NULL, NULL, 0);
+	max_purged = 0;
+
+	/*
+	 * Allocating and freeing 2 megabytes should leave them dirty because of
+	 * the oversize threshold.
+	 */
+	ptr = mallocx(2 * 1024 * 1024, MALLOCX_ARENA(arena));
+	dallocx(ptr, MALLOCX_TCACHE_NONE);
+	expect_zu_ge(max_purged, 2 * 1024 * 1024, "Expected a 2MB purge");
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_oversize_threshold_get_set,
+	    test_oversize_threshold);
+}
+
diff --git a/test/unit/pa.c b/test/unit/pa.c
index 3a910235..dacd8e70 100644
--- a/test/unit/pa.c
+++ b/test/unit/pa.c
@@ -63,9 +63,11 @@ test_data_t *init_test_data(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 	nstime_t time;
 	nstime_init(&time, 0);
 
+	const size_t oversize_threshold = 8 * 1024 * 1024;
 	err = pa_shard_init(TSDN_NULL, &test_data->shard, &test_data->emap,
 	    test_data->base, /* ind */ 1, &test_data->stats,
-	    &test_data->stats_mtx, &time, dirty_decay_ms, muzzy_decay_ms);
+	    &test_data->stats_mtx, &time, oversize_threshold, dirty_decay_ms,
+	    muzzy_decay_ms);
 	assert_false(err, "");
 
 	return test_data;

From 9545c2cd36e758f41857b93b8cb55355cf0bc508 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 13 Nov 2020 11:28:37 -0800
Subject: [PATCH 1913/2608] Add sample interval to prof last-N dump

---
 src/prof_recent.c       |  3 +++
 test/unit/prof_recent.c | 18 +++++++++++-------
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/src/prof_recent.c b/src/prof_recent.c
index b1aeef32..ff876783 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -540,6 +540,9 @@ prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque) {
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 
 	emitter_begin(&emitter);
+	uint64_t sample_interval = (uint64_t)1U << lg_prof_sample;
+	emitter_json_kv(&emitter, "sample_interval", emitter_type_uint64,
+	    &sample_interval);
 	emitter_json_kv(&emitter, "recent_alloc_max", emitter_type_ssize,
 	    &dump_max);
 	emitter_json_array_kv_begin(&emitter, "recent_alloc");
diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index 180f13fc..e16a849a 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -370,16 +370,16 @@ typedef struct {
 #define DUMP_ERROR "Dump output is wrong"
 
 static void
-confirm_record(const char *template,
-    const confirm_record_t *records, const size_t n_records) {
+confirm_record(const char *template, const confirm_record_t *records,
+    const size_t n_records) {
 	static const char *types[2] = {"alloc", "dalloc"};
 	static char buf[64];
 
 	/*
 	 * The template string would be in the form of:
-	 * "{\"recent_alloc_max\":XYZ,\"recent_alloc\":[]}",
+	 * "{...,\"recent_alloc\":[]}",
 	 * and dump_out would be in the form of:
-	 * "{\"recent_alloc_max\":XYZ,\"recent_alloc\":[...]}".
+	 * "{...,\"recent_alloc\":[...]}".
 	 * Using "- 2" serves to cut right before the ending "]}".
 	 */
 	assert_d_eq(memcmp(dump_out, template, strlen(template) - 2), 0,
@@ -489,18 +489,22 @@ TEST_BEGIN(test_prof_recent_alloc_dump) {
 	void *p, *q;
 	confirm_record_t records[2];
 
+	assert_zu_eq(lg_prof_sample, (size_t)0,
+	    "lg_prof_sample not set correctly");
+
 	future = 0;
 	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	call_dump();
-	expect_str_eq(dump_out, "{\"recent_alloc_max\":0,\"recent_alloc\":[]}",
-	    DUMP_ERROR);
+	expect_str_eq(dump_out, "{\"sample_interval\":1,"
+	    "\"recent_alloc_max\":0,\"recent_alloc\":[]}", DUMP_ERROR);
 
 	future = 2;
 	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
 	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
 	call_dump();
-	const char *template = "{\"recent_alloc_max\":2,\"recent_alloc\":[]}";
+	const char *template = "{\"sample_interval\":1,"
+	    "\"recent_alloc_max\":2,\"recent_alloc\":[]}";
 	expect_str_eq(dump_out, template, DUMP_ERROR);
 
 	p = malloc(7);

From 566c4a8594d433ac40ebfd5a4736a53c431f81dd Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 22 Oct 2020 14:44:36 -0700
Subject: [PATCH 1914/2608] Slight changes to cache bin internal functions

---
 include/jemalloc/internal/cache_bin.h | 44 ++++++++++++++++-----------
 src/cache_bin.c                       |  2 +-
 2 files changed, 28 insertions(+), 18 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 64275f24..551afc85 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -167,16 +167,21 @@ cache_bin_diff(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
 	return later - earlier;
 }
 
-/* Number of items currently cached in the bin. */
+/* Number of items currently cached in the bin, without checking ncached_max. */
 static inline cache_bin_sz_t
-cache_bin_ncached_get(cache_bin_t *bin, cache_bin_info_t *info) {
+cache_bin_ncached_get_internal(cache_bin_t *bin) {
 	cache_bin_sz_t diff = cache_bin_diff(bin,
 	    (uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty);
 	cache_bin_sz_t n = diff / sizeof(void *);
-
-	assert(n <= cache_bin_info_ncached_max(info));
 	assert(n == 0 || *(bin->stack_head) != NULL);
+	return n;
+}
 
+/* Number of items currently cached in the bin, with checking ncached_max. */
+static inline cache_bin_sz_t
+cache_bin_ncached_get(cache_bin_t *bin, cache_bin_info_t *info) {
+	cache_bin_sz_t n = cache_bin_ncached_get_internal(bin);
+	assert(n <= cache_bin_info_ncached_max(info));
 	return n;
 }
 
@@ -186,7 +191,7 @@ cache_bin_ncached_get(cache_bin_t *bin, cache_bin_info_t *info) {
  * A pointer to the position one past the end of the backing array.
  */
 static inline void **
-cache_bin_empty_position_get(cache_bin_t *bin, cache_bin_info_t *info) {
+cache_bin_empty_position_get(cache_bin_t *bin) {
 	cache_bin_sz_t diff = cache_bin_diff(bin,
 	    (uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty);
 	uintptr_t empty_bits = (uintptr_t)bin->stack_head + diff;
@@ -204,7 +209,7 @@ cache_bin_empty_position_get(cache_bin_t *bin, cache_bin_info_t *info) {
 static inline void
 cache_bin_assert_empty(cache_bin_t *bin, cache_bin_info_t *info) {
 	assert(cache_bin_ncached_get(bin, info) == 0);
-	assert(cache_bin_empty_position_get(bin, info) == bin->stack_head);
+	assert(cache_bin_empty_position_get(bin) == bin->stack_head);
 }
 
 /*
@@ -213,7 +218,7 @@ cache_bin_assert_empty(cache_bin_t *bin, cache_bin_info_t *info) {
  * ncached >= low_water during flush).
  */
 static inline cache_bin_sz_t
-cache_bin_low_water_get_internal(cache_bin_t *bin, cache_bin_info_t *info) {
+cache_bin_low_water_get_internal(cache_bin_t *bin) {
 	return cache_bin_diff(bin, bin->low_bits_low_water,
 	    bin->low_bits_empty) / sizeof(void *);
 }
@@ -221,7 +226,7 @@ cache_bin_low_water_get_internal(cache_bin_t *bin, cache_bin_info_t *info) {
 /* Returns the numeric value of low water in [0, ncached]. */
 static inline cache_bin_sz_t
 cache_bin_low_water_get(cache_bin_t *bin, cache_bin_info_t *info) {
-	cache_bin_sz_t low_water = cache_bin_low_water_get_internal(bin, info);
+	cache_bin_sz_t low_water = cache_bin_low_water_get_internal(bin);
 	assert(low_water <= cache_bin_info_ncached_max(info));
 	assert(low_water <= cache_bin_ncached_get(bin, info));
 
@@ -240,6 +245,14 @@ cache_bin_low_water_set(cache_bin_t *bin) {
 	bin->low_bits_low_water = (uint16_t)(uintptr_t)bin->stack_head;
 }
 
+static inline void
+cache_bin_low_water_adjust(cache_bin_t *bin) {
+	if (cache_bin_ncached_get_internal(bin)
+	    < cache_bin_low_water_get_internal(bin)) {
+		cache_bin_low_water_set(bin);
+	}
+}
+
 JEMALLOC_ALWAYS_INLINE void *
 cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) {
 	/*
@@ -365,8 +378,8 @@ struct cache_bin_ptr_array_s {
 static inline void
 cache_bin_init_ptr_array_for_fill(cache_bin_t *bin, cache_bin_info_t *info,
     cache_bin_ptr_array_t *arr, cache_bin_sz_t nfill) {
-	assert(cache_bin_ncached_get(bin, info) == 0);
-	arr->ptr = cache_bin_empty_position_get(bin, info) - nfill;
+	cache_bin_assert_empty(bin, info);
+	arr->ptr = cache_bin_empty_position_get(bin) - nfill;
 }
 
 /*
@@ -377,8 +390,8 @@ cache_bin_init_ptr_array_for_fill(cache_bin_t *bin, cache_bin_info_t *info,
 static inline void
 cache_bin_finish_fill(cache_bin_t *bin, cache_bin_info_t *info,
     cache_bin_ptr_array_t *arr, cache_bin_sz_t nfilled) {
-	assert(cache_bin_ncached_get(bin, info) == 0);
-	void **empty_position = cache_bin_empty_position_get(bin, info);
+	cache_bin_assert_empty(bin, info);
+	void **empty_position = cache_bin_empty_position_get(bin);
 	if (nfilled < arr->n) {
 		memmove(empty_position - nfilled, empty_position - arr->n,
 		    nfilled * sizeof(void *));
@@ -390,7 +403,7 @@ cache_bin_finish_fill(cache_bin_t *bin, cache_bin_info_t *info,
 static inline void
 cache_bin_init_ptr_array_for_flush(cache_bin_t *bin, cache_bin_info_t *info,
     cache_bin_ptr_array_t *arr, cache_bin_sz_t nflush) {
-	arr->ptr = cache_bin_empty_position_get(bin, info) - 1;
+	arr->ptr = cache_bin_empty_position_get(bin) - 1;
 	assert(cache_bin_ncached_get(bin, info) == 0
 	    || *arr->ptr != NULL);
 }
@@ -416,10 +429,7 @@ cache_bin_finish_flush(cache_bin_t *bin, cache_bin_info_t *info,
 	memmove(bin->stack_head + nflushed, bin->stack_head,
 	    rem * sizeof(void *));
 	bin->stack_head = bin->stack_head + nflushed;
-	if (cache_bin_ncached_get(bin, info)
-	    < cache_bin_low_water_get_internal(bin, info)) {
-		bin->low_bits_low_water = (uint16_t)(uintptr_t)bin->stack_head;
-	}
+	cache_bin_low_water_adjust(bin);
 }
 
 /*
diff --git a/src/cache_bin.c b/src/cache_bin.c
index 1d04b0dd..5f506062 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -84,7 +84,7 @@ cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
 	assert(cache_bin_diff(bin, bin->low_bits_full,
 	    (uint16_t)(uintptr_t) bin->stack_head) == bin_stack_size);
 	assert(cache_bin_ncached_get(bin, info) == 0);
-	assert(cache_bin_empty_position_get(bin, info) == empty_position);
+	assert(cache_bin_empty_position_get(bin) == empty_position);
 
 	assert(bin_stack_size > 0 || empty_position == full_position);
 }

From 4a65f34930fb5e72b2d6ab55d23b5971a5efefbd Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 22 Oct 2020 14:56:15 -0700
Subject: [PATCH 1915/2608] Fix a cache bin test

---
 test/unit/cache_bin.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index 43fe8c6c..7798bfab 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -186,7 +186,7 @@ TEST_BEGIN(test_cache_bin) {
 	    ncached_max / 2);
 	/* Try to fill some, succeed partially. */
 	do_fill_test(&bin, &info, ptrs, ncached_max, ncached_max / 2,
-	    ncached_max / 2);
+	    ncached_max / 4);
 	/* Try to fill some, fail completely. */
 	do_fill_test(&bin, &info, ptrs, ncached_max, ncached_max / 2, 0);
 
@@ -196,6 +196,8 @@ TEST_BEGIN(test_cache_bin) {
 	do_flush_test(&bin, &info, ptrs, ncached_max / 2, ncached_max / 2);
 	do_flush_test(&bin, &info, ptrs, ncached_max / 2, ncached_max / 4);
 	do_flush_test(&bin, &info, ptrs, ncached_max / 2, 0);
+
+	free(ptrs);
 }
 TEST_END
 

From be5e49f4fa09247a91557690cdaef42a82a83d6a Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 22 Oct 2020 16:07:25 -0700
Subject: [PATCH 1916/2608] Add a batch mode for cache_bin_alloc()

---
 include/jemalloc/internal/cache_bin.h | 12 ++++++
 test/unit/cache_bin.c                 | 53 +++++++++++++++++++++++++++
 2 files changed, 65 insertions(+)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 551afc85..c1b8fc42 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -317,6 +317,18 @@ cache_bin_alloc(cache_bin_t *bin, bool *success) {
 	return cache_bin_alloc_impl(bin, success, true);
 }
 
+JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
+cache_bin_alloc_batch(cache_bin_t *bin, size_t num, void **out) {
+	size_t n = cache_bin_ncached_get_internal(bin);
+	if (n > num) {
+		n = num;
+	}
+	memcpy(out, bin->stack_head, n * sizeof(void *));
+	bin->stack_head += n;
+	cache_bin_low_water_adjust(bin);
+	return n;
+}
+
 /*
  * Free an object into the given bin.  Fails only if the bin is full.
  */
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index 7798bfab..b31d07d2 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -52,6 +52,34 @@ do_flush_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 	}
 }
 
+static void
+do_batch_alloc_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
+    cache_bin_sz_t nfill, size_t batch) {
+	assert_true(cache_bin_ncached_get(bin, info) == 0, "");
+	CACHE_BIN_PTR_ARRAY_DECLARE(arr, nfill);
+	cache_bin_init_ptr_array_for_fill(bin, info, &arr, nfill);
+	for (cache_bin_sz_t i = 0; i < nfill; i++) {
+		arr.ptr[i] = &ptrs[i];
+	}
+	cache_bin_finish_fill(bin, info, &arr, nfill);
+	assert_true(cache_bin_ncached_get(bin, info) == nfill, "");
+	cache_bin_low_water_set(bin);
+
+	void **out = malloc((batch + 1) * sizeof(void *));
+	size_t n = cache_bin_alloc_batch(bin, batch, out);
+	assert_true(n == ((size_t)nfill < batch ? (size_t)nfill : batch), "");
+	for (cache_bin_sz_t i = 0; i < (cache_bin_sz_t)n; i++) {
+		expect_ptr_eq(out[i], &ptrs[i], "");
+	}
+	expect_true(cache_bin_low_water_get(bin, info) == nfill -
+	    (cache_bin_sz_t)n, "");
+	while (cache_bin_ncached_get(bin, info) > 0) {
+		bool success;
+		cache_bin_alloc(bin, &success);
+	}
+	free(out);
+}
+
 TEST_BEGIN(test_cache_bin) {
 	const int ncached_max = 100;
 	bool success;
@@ -197,6 +225,31 @@ TEST_BEGIN(test_cache_bin) {
 	do_flush_test(&bin, &info, ptrs, ncached_max / 2, ncached_max / 4);
 	do_flush_test(&bin, &info, ptrs, ncached_max / 2, 0);
 
+	do_batch_alloc_test(&bin, &info, ptrs, ncached_max, ncached_max);
+	do_batch_alloc_test(&bin, &info, ptrs, ncached_max, ncached_max * 2);
+	do_batch_alloc_test(&bin, &info, ptrs, ncached_max, ncached_max / 2);
+	do_batch_alloc_test(&bin, &info, ptrs, ncached_max, 2);
+	do_batch_alloc_test(&bin, &info, ptrs, ncached_max, 1);
+	do_batch_alloc_test(&bin, &info, ptrs, ncached_max, 0);
+	do_batch_alloc_test(&bin, &info, ptrs, ncached_max / 2,
+	    ncached_max / 2);
+	do_batch_alloc_test(&bin, &info, ptrs, ncached_max / 2, ncached_max);
+	do_batch_alloc_test(&bin, &info, ptrs, ncached_max / 2,
+	    ncached_max / 4);
+	do_batch_alloc_test(&bin, &info, ptrs, ncached_max / 2, 2);
+	do_batch_alloc_test(&bin, &info, ptrs, ncached_max / 2, 1);
+	do_batch_alloc_test(&bin, &info, ptrs, ncached_max / 2, 0);
+	do_batch_alloc_test(&bin, &info, ptrs, 2, ncached_max);
+	do_batch_alloc_test(&bin, &info, ptrs, 2, 2);
+	do_batch_alloc_test(&bin, &info, ptrs, 2, 1);
+	do_batch_alloc_test(&bin, &info, ptrs, 2, 0);
+	do_batch_alloc_test(&bin, &info, ptrs, 1, 2);
+	do_batch_alloc_test(&bin, &info, ptrs, 1, 1);
+	do_batch_alloc_test(&bin, &info, ptrs, 1, 0);
+	do_batch_alloc_test(&bin, &info, ptrs, 0, 2);
+	do_batch_alloc_test(&bin, &info, ptrs, 0, 1);
+	do_batch_alloc_test(&bin, &info, ptrs, 0, 0);
+
 	free(ptrs);
 }
 TEST_END

From ac480136d76010243f50997a1c1231a5572548aa Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 20 Oct 2020 11:00:09 -0700
Subject: [PATCH 1917/2608] Split out locality checking in batch allocation
 tests

---
 test/unit/batch_alloc.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/test/unit/batch_alloc.c b/test/unit/batch_alloc.c
index 08d6f66a..cb46513d 100644
--- a/test/unit/batch_alloc.c
+++ b/test/unit/batch_alloc.c
@@ -35,20 +35,28 @@ verify_stats(bin_stats_t *before, bin_stats_t *after, size_t batch,
 }
 
 static void
-verify_batch(tsd_t *tsd, void **ptrs, size_t batch, size_t usize, bool zero,
+verify_batch_basic(tsd_t *tsd, void **ptrs, size_t batch, size_t usize,
+    bool zero) {
+	for (size_t i = 0; i < batch; ++i) {
+		void *p = ptrs[i];
+		expect_zu_eq(isalloc(tsd_tsdn(tsd), p), usize, "");
+		if (zero) {
+			for (size_t k = 0; k < usize; ++k) {
+				expect_true(*((unsigned char *)p + k) == 0, "");
+			}
+		}
+	}
+}
+
+static void
+verify_batch_locality(tsd_t *tsd, void **ptrs, size_t batch, size_t usize,
     arena_t *arena, unsigned nregs) {
 	for (size_t i = 0, j = 0; i < batch; ++i, ++j) {
 		if (j == nregs) {
 			j = 0;
 		}
 		void *p = ptrs[i];
-		expect_zu_eq(isalloc(tsd_tsdn(tsd), p), usize, "");
 		expect_ptr_eq(iaalloc(tsd_tsdn(tsd), p), arena, "");
-		if (zero) {
-			for (size_t k = 0; k < usize; ++k) {
-				expect_true(*((unsigned char *)p + k) == 0, "");
-			}
-		}
 		if (j == 0) {
 			expect_true(PAGE_ALIGNED(p), "");
 			continue;
@@ -154,7 +162,8 @@ test_wrapper(size_t size, size_t alignment, bool zero, unsigned arena_flag) {
 			assert_zu_eq(filled, batch, "");
 			memcpy(&stats_after, &bin->stats, sizeof(bin_stats_t));
 			verify_stats(&stats_before, &stats_after, batch, nregs);
-			verify_batch(tsd, ptrs, batch, usize, zero, arena,
+			verify_batch_basic(tsd, ptrs, batch, usize, zero);
+			verify_batch_locality(tsd, ptrs, batch, usize, arena,
 			    nregs);
 			release_batch(ptrs, batch, usize);
 		}

From d96e4525adaefbde79f349d024eb5f94e72faf50 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 12 Nov 2020 14:54:25 -0800
Subject: [PATCH 1918/2608] Route batch allocation of small batch size to
 tcache

---
 src/jemalloc.c          | 106 ++++++++++++++++++++++++++--------------
 test/unit/batch_alloc.c |  76 +++++++++-------------------
 2 files changed, 91 insertions(+), 91 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 2a791e17..575a63cf 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -4088,32 +4088,15 @@ batch_alloc(void **ptrs, size_t num, size_t size, int flags) {
 	if (aligned_usize_get(size, alignment, &usize, NULL, false)) {
 		goto label_done;
 	}
-
 	szind_t ind = sz_size2index(usize);
-	if (unlikely(ind >= SC_NBINS)) {
-		/* No optimization for large sizes. */
-		void *p;
-		while (filled < num && (p = je_mallocx(size, flags)) != NULL) {
-			ptrs[filled++] = p;
-		}
-		goto label_done;
-	}
-
 	bool zero = zero_get(MALLOCX_ZERO_GET(flags), /* slow */ true);
 
-	unsigned arena_ind = mallocx_arena_get(flags);
-	arena_t *arena;
-	if (arena_get_from_ind(tsd, arena_ind, &arena)) {
-		goto label_done;
-	}
-	if (arena == NULL) {
-		arena = arena_choose(tsd, NULL);
-	} else {
-		/* When a manual arena is specified, bypass the tcache. */
-		flags |= MALLOCX_TCACHE_NONE;
-	}
-	if (unlikely(arena == NULL)) {
-		goto label_done;
+	cache_bin_t *bin = NULL;
+	arena_t *arena = NULL;
+	size_t nregs = 0;
+	if (likely(ind < SC_NBINS)) {
+		nregs = bin_infos[ind].nregs;
+		assert(nregs > 0);
 	}
 
 	while (filled < num) {
@@ -4132,9 +4115,63 @@ batch_alloc(void **ptrs, size_t num, size_t size, int flags) {
 			batch_alloc_prof_sample_assert(tsd, batch, usize);
 		}
 
-		size_t n = arena_fill_small_fresh(tsd_tsdn(tsd), arena,
-		    ind, ptrs + filled, batch, zero);
-		filled += n;
+		size_t progress = 0;
+
+		if (likely(ind < SC_NBINS) && batch >= nregs) {
+			if (arena == NULL) {
+				unsigned arena_ind = mallocx_arena_get(flags);
+				if (arena_get_from_ind(tsd, arena_ind,
+				    &arena)) {
+					goto label_done;
+				}
+				if (arena == NULL) {
+					arena = arena_choose(tsd, NULL);
+				}
+				if (unlikely(arena == NULL)) {
+					goto label_done;
+				}
+			}
+			size_t arena_batch = batch - batch % nregs;
+			size_t n = arena_fill_small_fresh(tsd_tsdn(tsd), arena,
+			    ind, ptrs + filled, arena_batch, zero);
+			progress += n;
+			filled += n;
+		}
+
+		if (likely(ind < nhbins) && progress < batch) {
+			if (bin == NULL) {
+				unsigned tcache_ind = mallocx_tcache_get(flags);
+				tcache_t *tcache = tcache_get_from_ind(tsd,
+				    tcache_ind, /* slow */ true,
+				    /* is_alloc */ true);
+				if (tcache != NULL) {
+					bin = &tcache->bins[ind];
+				}
+			}
+			if (bin != NULL) {
+				size_t bin_batch = batch - progress;
+				size_t n = cache_bin_alloc_batch(bin, bin_batch,
+				    ptrs + filled);
+				if (config_stats) {
+					bin->tstats.nrequests += n;
+				}
+				if (zero) {
+					for (size_t i = 0; i < n; ++i) {
+						memset(ptrs[filled + i], 0,
+						    usize);
+					}
+				}
+				if (config_prof && opt_prof
+				    && unlikely(ind >= SC_NBINS)) {
+					for (size_t i = 0; i < n; ++i) {
+						prof_tctx_reset_sampled(tsd,
+						    ptrs[filled + i]);
+					}
+				}
+				progress += n;
+				filled += n;
+			}
+		}
 
 		/*
 		 * For thread events other than prof sampling, trigger them as
@@ -4146,23 +4183,16 @@ batch_alloc(void **ptrs, size_t num, size_t size, int flags) {
 		 *     were handled individually, but it would do no harm (or
 		 *     even be beneficial) to coalesce the triggerings.
 		 */
-		thread_alloc_event(tsd, n * usize);
+		thread_alloc_event(tsd, progress * usize);
 
-		if (n < batch) { /* OOM */
-			break;
-		}
-
-		if (prof_sample_event) {
-			/*
-			 * The next allocation will be prof sampled.  The
-			 * thread event logic is handled within the mallocx()
-			 * call.
-			 */
+		if (progress < batch || prof_sample_event) {
 			void *p = je_mallocx(size, flags);
 			if (p == NULL) { /* OOM */
 				break;
 			}
-			assert(prof_sampled(tsd, p));
+			if (progress == batch) {
+				assert(prof_sampled(tsd, p));
+			}
 			ptrs[filled++] = p;
 		}
 	}
diff --git a/test/unit/batch_alloc.c b/test/unit/batch_alloc.c
index cb46513d..992990f3 100644
--- a/test/unit/batch_alloc.c
+++ b/test/unit/batch_alloc.c
@@ -5,35 +5,6 @@ static void *ptrs[BATCH_MAX];
 
 #define PAGE_ALIGNED(ptr) (((uintptr_t)ptr & PAGE_MASK) == 0)
 
-static void
-verify_stats(bin_stats_t *before, bin_stats_t *after, size_t batch,
-    unsigned nregs) {
-	if (!config_stats) {
-		return;
-	}
-	if (config_prof && opt_prof) {
-		/*
-		 * Checking the stats when prof is on is feasible but
-		 * complicated, while checking the non-prof case suffices for
-		 * unit-test purpose.
-		 */
-		return;
-	}
-	expect_u64_eq(before->nmalloc + batch, after->nmalloc, "");
-	expect_u64_eq(before->nrequests + batch, after->nrequests, "");
-	expect_zu_eq(before->curregs + batch, after->curregs, "");
-	size_t nslab = batch / nregs;
-	size_t n_nonfull = 0;
-	if (batch % nregs != 0) {
-		++nslab;
-		++n_nonfull;
-	}
-	expect_u64_eq(before->nslabs + nslab, after->nslabs, "");
-	expect_zu_eq(before->curslabs + nslab, after->curslabs, "");
-	expect_zu_eq(before->nonfull_slabs + n_nonfull, after->nonfull_slabs,
-	    "");
-}
-
 static void
 verify_batch_basic(tsd_t *tsd, void **ptrs, size_t batch, size_t usize,
     bool zero) {
@@ -51,10 +22,21 @@ verify_batch_basic(tsd_t *tsd, void **ptrs, size_t batch, size_t usize,
 static void
 verify_batch_locality(tsd_t *tsd, void **ptrs, size_t batch, size_t usize,
     arena_t *arena, unsigned nregs) {
+	if (config_prof && opt_prof) {
+		/*
+		 * Checking batch locality when prof is on is feasible but
+		 * complicated, while checking the non-prof case suffices for
+		 * unit-test purpose.
+		 */
+		return;
+	}
 	for (size_t i = 0, j = 0; i < batch; ++i, ++j) {
 		if (j == nregs) {
 			j = 0;
 		}
+		if (j == 0 && batch - i < nregs) {
+			break;
+		}
 		void *p = ptrs[i];
 		expect_ptr_eq(iaalloc(tsd_tsdn(tsd), p), arena, "");
 		if (j == 0) {
@@ -63,21 +45,8 @@ verify_batch_locality(tsd_t *tsd, void **ptrs, size_t batch, size_t usize,
 		}
 		assert(i > 0);
 		void *q = ptrs[i - 1];
-		bool adjacent = (uintptr_t)p > (uintptr_t)q
-		    && (size_t)((uintptr_t)p - (uintptr_t)q) == usize;
-		if (config_prof && opt_prof) {
-			if (adjacent) {
-				expect_false(prof_sampled(tsd, p)
-				    || prof_sampled(tsd, q), "");
-			} else {
-				expect_true(prof_sampled(tsd, p)
-				    || prof_sampled(tsd, q), "");
-				expect_true(PAGE_ALIGNED(p), "");
-				j = 0;
-			}
-		} else {
-			expect_true(adjacent, "");
-		}
+		expect_true((uintptr_t)p > (uintptr_t)q
+		    && (size_t)((uintptr_t)p - (uintptr_t)q) == usize, "");
 	}
 }
 
@@ -124,8 +93,6 @@ test_wrapper(size_t size, size_t alignment, bool zero, unsigned arena_flag) {
 		arena = arena_choose(tsd, NULL);
 	}
 	assert(arena != NULL);
-	bin_t *bin = arena_bin_choose(tsd_tsdn(tsd), arena, ind, NULL);
-	assert(bin != NULL);
 	int flags = arena_flag;
 	if (alignment != 0) {
 		flags |= MALLOCX_ALIGN(alignment);
@@ -155,13 +122,9 @@ test_wrapper(size_t size, size_t alignment, bool zero, unsigned arena_flag) {
 			}
 			size_t batch = base + (size_t)j;
 			assert(batch < BATCH_MAX);
-			bin_stats_t stats_before, stats_after;
-			memcpy(&stats_before, &bin->stats, sizeof(bin_stats_t));
 			size_t filled = batch_alloc_wrapper(ptrs, batch, size,
 			    flags);
 			assert_zu_eq(filled, batch, "");
-			memcpy(&stats_after, &bin->stats, sizeof(bin_stats_t));
-			verify_stats(&stats_before, &stats_after, batch, nregs);
 			verify_batch_basic(tsd, ptrs, batch, usize, zero);
 			verify_batch_locality(tsd, ptrs, batch, usize, arena,
 			    nregs);
@@ -196,8 +159,15 @@ TEST_BEGIN(test_batch_alloc_manual_arena) {
 }
 TEST_END
 
-TEST_BEGIN(test_batch_alloc_fallback) {
-	const size_t size = SC_LARGE_MINCLASS;
+TEST_BEGIN(test_batch_alloc_large) {
+	size_t size = SC_LARGE_MINCLASS;
+	for (size_t batch = 0; batch < 4; ++batch) {
+		assert(batch < BATCH_MAX);
+		size_t filled = batch_alloc(ptrs, batch, size, 0);
+		assert_zu_eq(filled, batch, "");
+		release_batch(ptrs, batch, size);
+	}
+	size = tcache_maxclass + 1;
 	for (size_t batch = 0; batch < 4; ++batch) {
 		assert(batch < BATCH_MAX);
 		size_t filled = batch_alloc(ptrs, batch, size, 0);
@@ -214,5 +184,5 @@ main(void) {
 	    test_batch_alloc_zero,
 	    test_batch_alloc_aligned,
 	    test_batch_alloc_manual_arena,
-	    test_batch_alloc_fallback);
+	    test_batch_alloc_large);
 }

From 92e189be8b725be1f4de5f476f410173db29bc7d Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 13 Nov 2020 17:15:35 -0800
Subject: [PATCH 1919/2608] Add some comments to the batch allocation logic
 flow

---
 src/jemalloc.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 575a63cf..ebc66696 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -4091,8 +4091,13 @@ batch_alloc(void **ptrs, size_t num, size_t size, int flags) {
 	szind_t ind = sz_size2index(usize);
 	bool zero = zero_get(MALLOCX_ZERO_GET(flags), /* slow */ true);
 
+	/*
+	 * The cache bin and arena will be lazily initialized; it's hard to
+	 * know in advance whether each of them needs to be initialized.
+	 */
 	cache_bin_t *bin = NULL;
 	arena_t *arena = NULL;
+
 	size_t nregs = 0;
 	if (likely(ind < SC_NBINS)) {
 		nregs = bin_infos[ind].nregs;
@@ -4148,8 +4153,33 @@ batch_alloc(void **ptrs, size_t num, size_t size, int flags) {
 					bin = &tcache->bins[ind];
 				}
 			}
+			/*
+			 * If we don't have a tcache bin, we don't want to
+			 * immediately give up, because there's the possibility
+			 * that the user explicitly requested to bypass the
+			 * tcache, or that the user explicitly turned off the
+			 * tcache; in such cases, we go through the slow path,
+			 * i.e. the mallocx() call at the end of the while loop.
+			 */
 			if (bin != NULL) {
 				size_t bin_batch = batch - progress;
+				/*
+				 * n can be less than bin_batch, meaning that
+				 * the cache bin does not have enough memory.
+				 * In such cases, we rely on the slow path,
+				 * i.e. the mallocx() call at the end of the
+				 * while loop, to fill in the cache, and in the
+				 * next iteration of the while loop, the tcache
+				 * will contain a lot of memory, and we can
+				 * harvest them here.  Compared to the
+				 * alternative approach where we directly go to
+				 * the arena bins here, the overhead of our
+				 * current approach should usually be minimal,
+				 * since we never try to fetch more memory than
+				 * what a slab contains via the tcache.  An
+				 * additional benefit is that the tcache will
+				 * not be empty for the next allocation request.
+				 */
 				size_t n = cache_bin_alloc_batch(bin, bin_batch,
 				    ptrs + filled);
 				if (config_stats) {

From 520b75fa2daf3313d87780f40ca0101c83c10398 Mon Sep 17 00:00:00 2001
From: David Carlier <devnexen@gmail.com>
Date: Mon, 23 Nov 2020 15:00:38 +0000
Subject: [PATCH 1920/2608] utrace support with label based signature.

---
 configure.ac                                  | 23 +++++++++++++++----
 .../internal/jemalloc_internal_defs.h.in      |  3 +++
 .../jemalloc/internal/jemalloc_preamble.h.in  |  8 ++++++-
 src/jemalloc.c                                |  2 +-
 4 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/configure.ac b/configure.ac
index eeceb12f..8e21f3f9 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1405,10 +1405,25 @@ JE_COMPILABLE([utrace(2)], [
 	utrace((void *)0, 0);
 ], [je_cv_utrace])
 if test "x${je_cv_utrace}" = "xno" ; then
-  enable_utrace="0"
-fi
-if test "x$enable_utrace" = "x1" ; then
-  AC_DEFINE([JEMALLOC_UTRACE], [ ])
+  JE_COMPILABLE([utrace(2) with label], [
+  #include <sys/types.h>
+  #include <sys/param.h>
+  #include <sys/time.h>
+  #include <sys/uio.h>
+  #include <sys/ktrace.h>
+  ], [
+	  utrace((void *)0, (void *)0, 0);
+  ], [je_cv_utrace_label])
+  if test "x${je_cv_utrace_label}" = "xno"; then
+    enable_utrace="0"
+  fi
+  if test "x$enable_utrace" = "x1" ; then
+    AC_DEFINE([JEMALLOC_UTRACE_LABEL], [ ])
+  fi
+else
+  if test "x$enable_utrace" = "x1" ; then
+    AC_DEFINE([JEMALLOC_UTRACE], [ ])
+  fi
 fi
 AC_SUBST([enable_utrace])
 
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index bcc35596..ff0e15b1 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -173,6 +173,9 @@
 /* Support utrace(2)-based tracing. */
 #undef JEMALLOC_UTRACE
 
+/* Support utrace(2)-based tracing (label based signature). */
+#undef JEMALLOC_UTRACE_LABEL
+
 /* Support optional abort() on OOM. */
 #undef JEMALLOC_XMALLOC
 
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index d62fee09..ef1cbaee 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -4,8 +4,14 @@
 #include "jemalloc_internal_defs.h"
 #include "jemalloc/internal/jemalloc_internal_decls.h"
 
-#ifdef JEMALLOC_UTRACE
+#if defined(JEMALLOC_UTRACE) || defined(JEMALLOC_UTRACE_LABEL)
 #include <sys/ktrace.h>
+#  if defined(JEMALLOC_UTRACE)
+#    define UTRACE_CALL(p, l) utrace(p, l)
+#  else
+#    define UTRACE_CALL(p, l) utrace("jemalloc_process", p, l)
+#    define JEMALLOC_UTRACE
+#  endif
 #endif
 
 #define JEMALLOC_NO_DEMANGLE
diff --git a/src/jemalloc.c b/src/jemalloc.c
index ebc66696..1a8db833 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -250,7 +250,7 @@ typedef struct {
 		ut.p = (a);						\
 		ut.s = (b);						\
 		ut.r = (c);						\
-		utrace(&ut, sizeof(ut));				\
+		UTRACE_CALL(&ut, sizeof(ut));				\
 		errno = utrace_serrno;					\
 	}								\
 } while (0)

From 99c2d6c232eca19e29224f48425517ecebcc1ab0 Mon Sep 17 00:00:00 2001
From: Igor Wiedler <iwiedler@gitlab.com>
Date: Thu, 19 Nov 2020 16:50:09 +0100
Subject: [PATCH 1921/2608] Backport jeprof --collapse for flamegraph
 generation

---
 bin/jeprof.in | 52 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 52 insertions(+)

diff --git a/bin/jeprof.in b/bin/jeprof.in
index 3ed408c9..d47359cf 100644
--- a/bin/jeprof.in
+++ b/bin/jeprof.in
@@ -205,6 +205,8 @@ Output type:
    --svg               Generate SVG to stdout
    --gif               Generate GIF to stdout
    --raw               Generate symbolized jeprof data (useful with remote fetch)
+   --collapsed         Generate collapsed stacks for building flame graphs
+                       (see http://www.brendangregg.com/flamegraphs.html)
 
 Heap-Profile Options:
    --inuse_space       Display in-use (mega)bytes [default]
@@ -332,6 +334,7 @@ sub Init() {
   $main::opt_gif = 0;
   $main::opt_svg = 0;
   $main::opt_raw = 0;
+  $main::opt_collapsed = 0;
 
   $main::opt_nodecount = 80;
   $main::opt_nodefraction = 0.005;
@@ -405,6 +408,7 @@ sub Init() {
              "svg!"           => \$main::opt_svg,
              "gif!"           => \$main::opt_gif,
              "raw!"           => \$main::opt_raw,
+             "collapsed!"     => \$main::opt_collapsed,
              "interactive!"   => \$main::opt_interactive,
              "nodecount=i"    => \$main::opt_nodecount,
              "nodefraction=f" => \$main::opt_nodefraction,
@@ -490,6 +494,7 @@ sub Init() {
       $main::opt_svg +
       $main::opt_gif +
       $main::opt_raw +
+      $main::opt_collapsed +
       $main::opt_interactive +
       0;
   if ($modes > 1) {
@@ -621,6 +626,8 @@ sub FilterAndPrint {
       PrintText($symbols, $flat, $cumulative, -1);
     } elsif ($main::opt_raw) {
       PrintSymbolizedProfile($symbols, $profile, $main::prog);
+    } elsif ($main::opt_collapsed) {
+      PrintCollapsedStacks($symbols, $profile);
     } elsif ($main::opt_callgrind) {
       PrintCallgrind($calls);
     } else {
@@ -2810,6 +2817,40 @@ sub IsSecondPcAlwaysTheSame {
   return $second_pc;
 }
 
+sub ExtractSymbolNameInlineStack {
+  my $symbols = shift;
+  my $address = shift;
+
+  my @stack = ();
+
+  if (exists $symbols->{$address}) {
+    my @localinlinestack = @{$symbols->{$address}};
+    for (my $i = $#localinlinestack; $i > 0; $i-=3) {
+      my $file = $localinlinestack[$i-1];
+      my $fn = $localinlinestack[$i-0];
+
+      if ($file eq "?" || $file eq ":0") {
+        $file = "??:0";
+      }
+      if ($fn eq '??') {
+        # If we can't get the symbol name, at least use the file information.
+        $fn = $file;
+      }
+      my $suffix = "[inline]";
+      if ($i == 2) {
+        $suffix = "";
+      }
+      push (@stack, $fn.$suffix);
+    }
+  }
+  else {
+    # If we can't get a symbol name, at least fill in the address.
+    push (@stack, $address);
+  }
+
+  return @stack;
+}
+
 sub ExtractSymbolLocation {
   my $symbols = shift;
   my $address = shift;
@@ -2884,6 +2925,17 @@ sub FilterFrames {
   return $result;
 }
 
+sub PrintCollapsedStacks {
+  my $symbols = shift;
+  my $profile = shift;
+
+  while (my ($stack_trace, $count) = each %$profile) {
+    my @address = split(/\n/, $stack_trace);
+    my @names = reverse ( map { ExtractSymbolNameInlineStack($symbols, $_) } @address );
+    printf("%s %d\n", join(";", @names), $count);
+  }
+}
+
 sub RemoveUninterestingFrames {
   my $symbols = shift;
   my $profile = shift;

From ecd39418aca14cddcf69acc86c2aa3cbb13a72e1 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 1 Dec 2020 13:00:57 -0800
Subject: [PATCH 1922/2608] Add fxp: A fixed-point math library.

This will be used in the next commit to allow non-integer values for
narenas_ratio.
---
 Makefile.in                                   |   2 +
 include/jemalloc/internal/fxp.h               | 100 +++++
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |   3 +
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |   3 +
 src/fxp.c                                     | 124 +++++++
 test/unit/fxp.c                               | 344 ++++++++++++++++++
 8 files changed, 578 insertions(+)
 create mode 100644 include/jemalloc/internal/fxp.h
 create mode 100644 src/fxp.c
 create mode 100644 test/unit/fxp.c

diff --git a/Makefile.in b/Makefile.in
index 03dbbdf5..eae30653 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -118,6 +118,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/extent.c \
 	$(srcroot)src/extent_dss.c \
 	$(srcroot)src/extent_mmap.c \
+	$(srcroot)src/fxp.c \
 	$(srcroot)src/hook.c \
 	$(srcroot)src/hpa.c \
 	$(srcroot)src/hpa_central.c \
@@ -212,6 +213,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/extent_quantize.c \
 	${srcroot}test/unit/flat_bitmap.c \
 	$(srcroot)test/unit/fork.c \
+	${srcroot}test/unit/fxp.c \
 	$(srcroot)test/unit/hash.c \
 	$(srcroot)test/unit/hook.c \
 	$(srcroot)test/unit/hpa.c \
diff --git a/include/jemalloc/internal/fxp.h b/include/jemalloc/internal/fxp.h
new file mode 100644
index 00000000..d9438090
--- /dev/null
+++ b/include/jemalloc/internal/fxp.h
@@ -0,0 +1,100 @@
+#ifndef JEMALLOC_INTERNAL_FXP_H
+#define JEMALLOC_INTERNAL_FXP_H
+
+/*
+ * A simple fixed-point math implementation, supporting only unsigned values
+ * (with overflow being an error).
+ *
+ * It's not in general safe to use floating point in core code, because various
+ * libc implementations we get linked against can assume that malloc won't touch
+ * floating point state and call it with an unusual calling convention.
+ */
+
+/*
+ * High 16 bits are the integer part, low 16 are the fractional part.  Or
+ * equivalently, repr == 2**16 * val, where we use "val" to refer to the
+ * (imaginary) fractional representation of the true value.
+ *
+ * We pick a uint32_t here since it's convenient in some places to
+ * double the representation size (i.e. multiplication and division use
+ * 64-bit integer types), and a uint64_t is the largest type we're
+ * certain is available.
+ */
+typedef uint32_t fxp_t;
+#define FXP_INIT_INT(x) ((x) << 16)
+
+/*
+ * Amount of precision used in parsing and printing numbers.  The integer bound
+ * is simply because the integer part of the number gets 16 bits, and so is
+ * bounded by 65536.
+ *
+ * We use a lot of precision for the fractional part, even though most of it
+ * gets rounded off; this lets us get exact values for the important special
+ * case where the denominator is a small power of 2 (for instance,
+ * 1/512 == 0.001953125 is exactly representable even with only 16 bits of
+ * fractional precision).  We need to left-shift by 16 before dividing by
+ * 10**precision, so we pick precision to be floor(log(2**48)) = 14.
+ */
+#define FXP_INTEGER_PART_DIGITS 5
+#define FXP_FRACTIONAL_PART_DIGITS 14
+
+/*
+ * In addition to the integer and fractional parts of the number, we need to
+ * include a null character and (possibly) a decimal point.
+ */
+#define FXP_BUF_SIZE (FXP_INTEGER_PART_DIGITS + FXP_FRACTIONAL_PART_DIGITS + 2)
+
+static inline fxp_t
+fxp_add(fxp_t a, fxp_t b) {
+	return a + b;
+}
+
+static inline fxp_t
+fxp_sub(fxp_t a, fxp_t b) {
+	assert(a >= b);
+	return a - b;
+}
+
+static inline fxp_t
+fxp_mul(fxp_t a, fxp_t b) {
+	uint64_t unshifted = (uint64_t)a * (uint64_t)b;
+	/*
+	 * Unshifted is (a.val * 2**16) * (b.val * 2**16)
+	 *   == (a.val * b.val) * 2**32, but we want
+	 * (a.val * b.val) * 2 ** 16.
+	 */
+	return (uint32_t)(unshifted >> 16);
+}
+
+static inline fxp_t
+fxp_div(fxp_t a, fxp_t b) {
+	assert(b != 0);
+	uint64_t unshifted = ((uint64_t)a << 32) / (uint64_t)b;
+	/*
+	 * Unshifted is (a.val * 2**16) * (2**32) / (b.val * 2**16)
+	 *   == (a.val / b.val) * (2 ** 32), which again corresponds to a right
+	 *   shift of 16.
+	 */
+	return (uint32_t)(unshifted >> 16);
+}
+
+static inline uint32_t
+fxp_round_down(fxp_t a) {
+	return a >> 16;
+}
+
+static inline uint32_t
+fxp_round_nearest(fxp_t a) {
+	uint32_t fractional_part = (a  & ((1U << 16) - 1));
+	uint32_t increment = (uint32_t)(fractional_part >= (1U << 15));
+	return (a >> 16) + increment;
+}
+
+/*
+ * Returns true on error.  Otherwise, returns false and updates *ptr to point to
+ * the first character not parsed (because it wasn't a digit).
+ */
+bool fxp_parse(fxp_t *a, const char *ptr, char **end);
+void fxp_print(fxp_t a, char buf[FXP_BUF_SIZE]);
+
+#endif /* JEMALLOC_INTERNAL_FXP_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 2d6b4b6e..6c4e7fdc 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -58,6 +58,7 @@
     <ClCompile Include="..\..\..\..\src\extent.c" />
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
+    <ClCompile Include="..\..\..\..\src\fxp.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
     <ClCompile Include="..\..\..\..\src\hpa_central.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index e3b7e0c5..84ff5748 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -58,6 +58,9 @@
     <ClCompile Include="..\..\..\..\src\extent_mmap.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\fxp.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hook.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 33d87a44..07fbe21e 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -58,6 +58,7 @@
     <ClCompile Include="..\..\..\..\src\extent.c" />
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
+    <ClCompile Include="..\..\..\..\src\fxp.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
     <ClCompile Include="..\..\..\..\src\hpa_central.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index e3b7e0c5..84ff5748 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -58,6 +58,9 @@
     <ClCompile Include="..\..\..\..\src\extent_mmap.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\fxp.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hook.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/fxp.c b/src/fxp.c
new file mode 100644
index 00000000..96585f0a
--- /dev/null
+++ b/src/fxp.c
@@ -0,0 +1,124 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/fxp.h"
+
+static bool
+fxp_isdigit(char c) {
+	return '0' <= c && c <= '9';
+}
+
+bool
+fxp_parse(fxp_t *result, const char *str, char **end) {
+	/*
+	 * Using malloc_strtoumax in this method isn't as handy as you might
+	 * expect (I tried). In the fractional part, significant leading zeros
+	 * mean that you still need to do your own parsing, now with trickier
+	 * math.  In the integer part, the casting (uintmax_t to uint32_t)
+	 * forces more reasoning about bounds than just checking for overflow as
+	 * we parse.
+	 */
+	uint32_t integer_part = 0;
+
+	const char *cur = str;
+
+	/* The string must start with a digit or a decimal point. */
+	if (*cur != '.' && !fxp_isdigit(*cur)) {
+		return true;
+	}
+
+	while ('0' <= *cur && *cur <= '9') {
+		integer_part *= 10;
+		integer_part += *cur - '0';
+		if (integer_part >= (1U << 16)) {
+			return true;
+		}
+		cur++;
+	}
+
+	/*
+	 * We've parsed all digits at the beginning of the string, without
+	 * overflow.  Either we're done, or there's a fractional part.
+	 */
+	if (*cur != '.') {
+		*result = (integer_part << 16);
+		if (end != NULL) {
+			*end = (char *)cur;
+		}
+		return false;
+	}
+
+	/* There's a fractional part. */
+	cur++;
+	if (!fxp_isdigit(*cur)) {
+		/* Shouldn't end on the decimal point. */
+		return true;
+	}
+
+	/*
+	 * We use a lot of precision for the fractional part, even though we'll
+	 * discard most of it; this lets us get exact values for the important
+	 * special case where the denominator is a small power of 2 (for
+	 * instance, 1/512 == 0.001953125 is exactly representable even with
+	 * only 16 bits of fractional precision).  We need to left-shift by 16
+	 * before dividing so we pick the number of digits to be
+	 * floor(log(2**48)) = 14.
+	 */
+	uint64_t fractional_part = 0;
+	uint64_t frac_div = 1;
+	for (int i = 0; i < FXP_FRACTIONAL_PART_DIGITS; i++) {
+		fractional_part *= 10;
+		frac_div *= 10;
+		if (fxp_isdigit(*cur)) {
+			fractional_part += *cur - '0';
+			cur++;
+		}
+	}
+	/*
+	 * We only parse the first maxdigits characters, but we can still ignore
+	 * any digits after that.
+	 */
+	while (fxp_isdigit(*cur)) {
+		cur++;
+	}
+
+	assert(fractional_part < frac_div);
+	uint32_t fractional_repr = (uint32_t)(
+	    (fractional_part << 16) / frac_div);
+
+	/* Success! */
+	*result = (integer_part << 16) + fractional_repr;
+	if (end != NULL) {
+		*end = (char *)cur;
+	}
+	return false;
+}
+
+void
+fxp_print(fxp_t a, char buf[FXP_BUF_SIZE]) {
+	uint32_t integer_part = fxp_round_down(a);
+	uint32_t fractional_part = (a & ((1U << 16) - 1));
+
+	int leading_fraction_zeros = 0;
+	uint64_t fraction_digits = fractional_part;
+	for (int i = 0; i < FXP_FRACTIONAL_PART_DIGITS; i++) {
+		if (fraction_digits < (1U << 16)
+		    && fraction_digits * 10 >= (1U << 16)) {
+			leading_fraction_zeros = i;
+		}
+		fraction_digits *= 10;
+	}
+	fraction_digits >>= 16;
+	while (fraction_digits > 0 && fraction_digits % 10 == 0) {
+		fraction_digits /= 10;
+	}
+
+	size_t printed = malloc_snprintf(buf, FXP_BUF_SIZE, "%"FMTu32".",
+	    integer_part);
+	for (int i = 0; i < leading_fraction_zeros; i++) {
+		buf[printed] = '0';
+		printed++;
+	}
+	malloc_snprintf(&buf[printed], FXP_BUF_SIZE - printed, "%"FMTu64,
+	    fraction_digits);
+}
diff --git a/test/unit/fxp.c b/test/unit/fxp.c
new file mode 100644
index 00000000..89f0ca65
--- /dev/null
+++ b/test/unit/fxp.c
@@ -0,0 +1,344 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/fxp.h"
+
+static double
+fxp2double(fxp_t a) {
+	double intpart = (double)(a >> 16);
+	double fracpart = (double)(a & ((1U << 16) - 1)) / (1U << 16);
+	return intpart + fracpart;
+}
+
+/* Is a close to b? */
+static bool
+double_close(double a, double b) {
+	/*
+	 * Our implementation doesn't try for precision.  Correspondingly, don't
+	 * enforce it too strenuously here; accept values that are close in
+	 * either relative or absolute terms.
+	 */
+	return fabs(a - b) < 0.01 || fabs(a - b) / a < 0.01;
+}
+
+static bool
+fxp_close(fxp_t a, fxp_t b) {
+	return double_close(fxp2double(a), fxp2double(b));
+}
+
+static fxp_t
+xparse_fxp(const char *str) {
+	fxp_t result;
+	bool err = fxp_parse(&result, str, NULL);
+	assert_false(err, "Invalid fxp string: %s", str);
+	return result;
+}
+
+static void
+expect_parse_accurate(const char *str, const char *parse_str) {
+	double true_val = strtod(str, NULL);
+	fxp_t fxp_val;
+	char *end;
+	bool err = fxp_parse(&fxp_val, parse_str, &end);
+	expect_false(err, "Unexpected parse failure");
+	expect_ptr_eq(parse_str + strlen(str), end,
+	    "Didn't parse whole string");
+	expect_true(double_close(fxp2double(fxp_val), true_val),
+	    "Misparsed %s", str);
+}
+
+static void
+parse_valid_trial(const char *str) {
+	/* The value it parses should be correct. */
+	expect_parse_accurate(str, str);
+	char buf[100];
+	snprintf(buf, sizeof(buf), "%swith_some_trailing_text", str);
+	expect_parse_accurate(str, buf);
+	snprintf(buf, sizeof(buf), "%s with a space", str);
+	expect_parse_accurate(str, buf);
+	snprintf(buf, sizeof(buf), "%s,in_a_malloc_conf_string:1", str);
+	expect_parse_accurate(str, buf);
+}
+
+TEST_BEGIN(test_parse_valid) {
+	parse_valid_trial("0");
+	parse_valid_trial("1");
+	parse_valid_trial("2");
+	parse_valid_trial("100");
+	parse_valid_trial("345");
+	parse_valid_trial("00000000123");
+	parse_valid_trial("00000000987");
+
+	parse_valid_trial("0.0");
+	parse_valid_trial("0.00000000000456456456");
+	parse_valid_trial("100.00000000000456456456");
+
+	parse_valid_trial("123.1");
+	parse_valid_trial("123.01");
+	parse_valid_trial("123.001");
+	parse_valid_trial("123.0001");
+	parse_valid_trial("123.00001");
+	parse_valid_trial("123.000001");
+	parse_valid_trial("123.0000001");
+
+	parse_valid_trial(".0");
+	parse_valid_trial(".1");
+	parse_valid_trial(".01");
+	parse_valid_trial(".001");
+	parse_valid_trial(".0001");
+	parse_valid_trial(".00001");
+	parse_valid_trial(".000001");
+
+	parse_valid_trial(".1");
+	parse_valid_trial(".10");
+	parse_valid_trial(".100");
+	parse_valid_trial(".1000");
+	parse_valid_trial(".100000");
+}
+TEST_END
+
+static void expect_parse_failure(const char *str) {
+	fxp_t result = FXP_INIT_INT(333);
+	char *end = (void *)0x123;
+	bool err = fxp_parse(&result, str, &end);
+	expect_true(err, "Expected a parse error on: %s", str);
+	expect_ptr_eq((void *)0x123, end,
+	    "Parse error shouldn't change results");
+	expect_u32_eq(result, FXP_INIT_INT(333),
+	    "Parse error shouldn't change results");
+}
+
+TEST_BEGIN(test_parse_invalid) {
+	expect_parse_failure("123.");
+	expect_parse_failure("3.a");
+	expect_parse_failure(".a");
+	expect_parse_failure("a.1");
+	expect_parse_failure("a");
+	/* A valid string, but one that overflows. */
+	expect_parse_failure("123456789");
+	expect_parse_failure("0000000123456789");
+	expect_parse_failure("1000000");
+}
+TEST_END
+
+static void
+expect_add(const char *astr, const char *bstr, const char* resultstr) {
+	fxp_t a = xparse_fxp(astr);
+	fxp_t b = xparse_fxp(bstr);
+	fxp_t result = xparse_fxp(resultstr);
+	expect_true(fxp_close(fxp_add(a, b), result),
+	    "Expected %s + %s == %s", astr, bstr, resultstr);
+}
+
+TEST_BEGIN(test_add_simple) {
+	expect_add("0", "0", "0");
+	expect_add("0", "1", "1");
+	expect_add("1", "1", "2");
+	expect_add("1.5", "1.5", "3");
+	expect_add("0.1", "0.1", "0.2");
+	expect_add("123", "456", "579");
+}
+TEST_END
+
+static void
+expect_sub(const char *astr, const char *bstr, const char* resultstr) {
+	fxp_t a = xparse_fxp(astr);
+	fxp_t b = xparse_fxp(bstr);
+	fxp_t result = xparse_fxp(resultstr);
+	expect_true(fxp_close(fxp_sub(a, b), result),
+	    "Expected %s - %s == %s", astr, bstr, resultstr);
+}
+
+TEST_BEGIN(test_sub_simple) {
+	expect_sub("0", "0", "0");
+	expect_sub("1", "0", "1");
+	expect_sub("1", "1", "0");
+	expect_sub("3.5", "1.5", "2");
+	expect_sub("0.3", "0.1", "0.2");
+	expect_sub("456", "123", "333");
+}
+TEST_END
+
+static void
+expect_mul(const char *astr, const char *bstr, const char* resultstr) {
+	fxp_t a = xparse_fxp(astr);
+	fxp_t b = xparse_fxp(bstr);
+	fxp_t result = xparse_fxp(resultstr);
+	expect_true(fxp_close(fxp_mul(a, b), result),
+	    "Expected %s * %s == %s", astr, bstr, resultstr);
+}
+
+TEST_BEGIN(test_mul_simple) {
+	expect_mul("0", "0", "0");
+	expect_mul("1", "0", "0");
+	expect_mul("1", "1", "1");
+	expect_mul("1.5", "1.5", "2.25");
+	expect_mul("100.0", "10", "1000");
+	expect_mul(".1", "10", "1");
+}
+TEST_END
+
+static void
+expect_div(const char *astr, const char *bstr, const char* resultstr) {
+	fxp_t a = xparse_fxp(astr);
+	fxp_t b = xparse_fxp(bstr);
+	fxp_t result = xparse_fxp(resultstr);
+	expect_true(fxp_close(fxp_div(a, b), result),
+	    "Expected %s / %s == %s", astr, bstr, resultstr);
+}
+
+TEST_BEGIN(test_div_simple) {
+	expect_div("1", "1", "1");
+	expect_div("0", "1", "0");
+	expect_div("2", "1", "2");
+	expect_div("3", "2", "1.5");
+	expect_div("3", "1.5", "2");
+	expect_div("10", ".1", "100");
+	expect_div("123", "456", ".2697368421");
+}
+TEST_END
+
+static void
+expect_round(const char *str, uint32_t rounded_down, uint32_t rounded_nearest) {
+	fxp_t fxp = xparse_fxp(str);
+	uint32_t fxp_rounded_down = fxp_round_down(fxp);
+	uint32_t fxp_rounded_nearest = fxp_round_nearest(fxp);
+	expect_u32_eq(rounded_down, fxp_rounded_down,
+	    "Mistake rounding %s down", str);
+	expect_u32_eq(rounded_nearest, fxp_rounded_nearest,
+	    "Mistake rounding %s to nearest", str);
+}
+
+TEST_BEGIN(test_round_simple) {
+	expect_round("1.5", 1, 2);
+	expect_round("0", 0, 0);
+	expect_round("0.1", 0, 0);
+	expect_round("0.4", 0, 0);
+	expect_round("0.40000", 0, 0);
+	expect_round("0.5", 0, 1);
+	expect_round("0.6", 0, 1);
+	expect_round("123", 123, 123);
+	expect_round("123.4", 123, 123);
+	expect_round("123.5", 123, 124);
+}
+TEST_END
+
+static void
+expect_print(const char *str) {
+	fxp_t fxp = xparse_fxp(str);
+	char buf[FXP_BUF_SIZE];
+	fxp_print(fxp, buf);
+	expect_d_eq(0, strcmp(str, buf), "Couldn't round-trip print %s", str);
+}
+
+TEST_BEGIN(test_print_simple) {
+	expect_print("0.0");
+	expect_print("1.0");
+	expect_print("2.0");
+	expect_print("123.0");
+	/*
+	 * We hit the possibility of roundoff errors whenever the fractional
+	 * component isn't a round binary number; only check these here (we
+	 * round-trip properly in the stress test).
+	 */
+	expect_print("1.5");
+	expect_print("3.375");
+	expect_print("0.25");
+	expect_print("0.125");
+	/* 1 / 2**14 */
+	expect_print("0.00006103515625");
+}
+TEST_END
+
+TEST_BEGIN(test_stress) {
+	const char *numbers[] = {
+		"0.0", "0.1", "0.2", "0.3", "0.4",
+		"0.5", "0.6", "0.7", "0.8", "0.9",
+
+		"1.0", "1.1", "1.2", "1.3", "1.4",
+		"1.5", "1.6", "1.7", "1.8", "1.9",
+
+		"2.0", "2.1", "2.2", "2.3", "2.4",
+		"2.5", "2.6", "2.7", "2.8", "2.9",
+
+		"17.0", "17.1", "17.2", "17.3", "17.4",
+		"17.5", "17.6", "17.7", "17.8", "17.9",
+
+		"18.0", "18.1", "18.2", "18.3", "18.4",
+		"18.5", "18.6", "18.7", "18.8", "18.9",
+
+		"123.0", "123.1", "123.2", "123.3", "123.4",
+		"123.5", "123.6", "123.7", "123.8", "123.9",
+
+		"124.0", "124.1", "124.2", "124.3", "124.4",
+		"124.5", "124.6", "124.7", "124.8", "124.9",
+
+		"125.0", "125.1", "125.2", "125.3", "125.4",
+		"125.5", "125.6", "125.7", "125.8", "125.9"};
+	size_t numbers_len = sizeof(numbers)/sizeof(numbers[0]);
+	for (size_t i = 0; i < numbers_len; i++) {
+		fxp_t fxp_a = xparse_fxp(numbers[i]);
+		double double_a = strtod(numbers[i], NULL);
+
+		uint32_t fxp_rounded_down = fxp_round_down(fxp_a);
+		uint32_t fxp_rounded_nearest = fxp_round_nearest(fxp_a);
+		uint32_t double_rounded_down = (uint32_t)double_a;
+		uint32_t double_rounded_nearest = (uint32_t)round(double_a);
+
+		expect_u32_eq(double_rounded_down, fxp_rounded_down,
+		    "Incorrectly rounded down %s", numbers[i]);
+		expect_u32_eq(double_rounded_nearest, fxp_rounded_nearest,
+		    "Incorrectly rounded-to-nearest %s", numbers[i]);
+
+		for (size_t j = 0; j < numbers_len; j++) {
+			fxp_t fxp_b = xparse_fxp(numbers[j]);
+			double double_b = strtod(numbers[j], NULL);
+
+			fxp_t fxp_sum = fxp_add(fxp_a, fxp_b);
+			double double_sum = double_a + double_b;
+			expect_true(
+			    double_close(fxp2double(fxp_sum), double_sum),
+			    "Miscomputed %s + %s", numbers[i], numbers[j]);
+
+			if (double_a > double_b) {
+				fxp_t fxp_diff = fxp_sub(fxp_a, fxp_b);
+				double double_diff = double_a - double_b;
+				expect_true(
+				    double_close(fxp2double(fxp_diff),
+				    double_diff),
+				    "Miscomputed %s - %s", numbers[i],
+				    numbers[j]);
+			}
+
+			fxp_t fxp_prod = fxp_mul(fxp_a, fxp_b);
+			double double_prod = double_a * double_b;
+			expect_true(
+			    double_close(fxp2double(fxp_prod), double_prod),
+			    "Miscomputed %s * %s", numbers[i], numbers[j]);
+
+			if (double_b != 0.0) {
+				fxp_t fxp_quot = fxp_div(fxp_a, fxp_b);
+				double double_quot = double_a / double_b;
+				expect_true(
+				    double_close(fxp2double(fxp_quot),
+				    double_quot),
+				    "Miscomputed %s / %s", numbers[i],
+				    numbers[j]);
+			}
+		}
+	}
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_parse_valid,
+	    test_parse_invalid,
+	    test_add_simple,
+	    test_sub_simple,
+	    test_mul_simple,
+	    test_div_simple,
+	    test_round_simple,
+	    test_print_simple,
+	    test_stress);
+}

From d438296b1fbb898653b9f3f454f3f84b33d30986 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 1 Dec 2020 13:13:55 -0800
Subject: [PATCH 1923/2608] narenas_ratio: Accept fractional values.

With recent scalability improvements to the HPA, we're experimenting with much
lower arena counts; this gets annoying when trying to test across different
hardware configurations using only the narenas setting.
---
 src/jemalloc.c | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 1a8db833..74240c0a 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -9,6 +9,7 @@
 #include "jemalloc/internal/emap.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/extent_mmap.h"
+#include "jemalloc/internal/fxp.h"
 #include "jemalloc/internal/hook.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/log.h"
@@ -127,7 +128,7 @@ bool	opt_utrace = false;
 bool	opt_xmalloc = false;
 bool	opt_zero = false;
 unsigned	opt_narenas = 0;
-unsigned	opt_narenas_ratio = 4;
+fxp_t		opt_narenas_ratio = FXP_INIT_INT(4);
 
 unsigned	ncpus;
 
@@ -1312,10 +1313,14 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				}
 			}
 			if (CONF_MATCH("narenas_ratio")) {
-				CONF_HANDLE_UNSIGNED(opt_narenas_ratio,
-				    "narenas_ratio", 1, UINT_MAX,
-				    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX,
-				    /* clip */ false)
+				char *end;
+				bool err = fxp_parse(&opt_narenas_ratio, v,
+				    &end);
+				if (err || (size_t)(end - v) != vlen) {
+					CONF_ERROR("Invalid conf value",
+					    k, klen, v, vlen);
+				}
+				CONF_CONTINUE;
 			}
 			if (CONF_MATCH("bin_shards")) {
 				const char *bin_shards_segment_cur = v;
@@ -1877,7 +1882,13 @@ malloc_narenas_default(void) {
 	 * default.
 	 */
 	if (ncpus > 1) {
-		return ncpus * opt_narenas_ratio;
+		fxp_t fxp_ncpus = FXP_INIT_INT(ncpus);
+		fxp_t goal = fxp_mul(fxp_ncpus, opt_narenas_ratio);
+		uint32_t int_goal = fxp_round_nearest(goal);
+		if (int_goal == 0) {
+			return 1;
+		}
+		return int_goal;
 	} else {
 		return 1;
 	}

From d0a991d47b2717ac6abe6a7d8adc52c967ecd115 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 9 Nov 2020 17:24:31 -0800
Subject: [PATCH 1924/2608] psset: Add insert/remove functions.

These will allow us to (for instance) move pageslabs from a psset dedicated to
not-yet-hugeified pages to one dedicated to hugeified ones.
---
 include/jemalloc/internal/psset.h |   3 +
 src/psset.c                       |  62 +++++++++++----
 test/unit/psset.c                 | 121 +++++++++++++++++++++++-------
 3 files changed, 144 insertions(+), 42 deletions(-)

diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index 14311239..4b0c4da4 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -59,6 +59,9 @@ struct psset_s {
 
 void psset_init(psset_t *psset);
 
+void psset_insert(psset_t *psset, edata_t *ps);
+void psset_remove(psset_t *psset, edata_t *ps);
+
 /*
  * Tries to obtain a chunk from an existing pageslab already in the set.
  * Returns true on failure.
diff --git a/src/psset.c b/src/psset.c
index 9fc7ec14..cd0dcae7 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -65,6 +65,51 @@ psset_assert_ps_consistent(edata_t *ps) {
 	    edata_size_get(ps) >> LG_PAGE) == edata_longest_free_range_get(ps));
 }
 
+void
+psset_insert(psset_t *psset, edata_t *ps) {
+	psset_assert_ps_consistent(ps);
+	size_t longest_free_range = edata_longest_free_range_get(ps);
+
+	if (longest_free_range == 0) {
+		/*
+		 * We don't ned to track full slabs; just pretend to for stats
+		 * purposes.  See the comment at psset_bin_stats_adjust.
+		 */
+		psset_bin_stats_adjust(&psset->full_slab_stats, ps,
+		    /* inc */ true);
+		return;
+	}
+
+	pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
+	    longest_free_range << LG_PAGE));
+
+	assert(pind < PSSET_NPSIZES);
+	if (edata_age_heap_empty(&psset->pageslabs[pind])) {
+		bitmap_unset(psset->bitmap, &psset_bitmap_info, (size_t)pind);
+	}
+	psset_edata_heap_insert(psset, pind, ps);
+}
+
+void
+psset_remove(psset_t *psset, edata_t *ps) {
+	psset_assert_ps_consistent(ps);
+	size_t longest_free_range = edata_longest_free_range_get(ps);
+
+	if (longest_free_range == 0) {
+		psset_bin_stats_adjust(&psset->full_slab_stats, ps,
+		    /* inc */ true);
+		return;
+	}
+
+	pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
+	    longest_free_range << LG_PAGE));
+	assert(pind < PSSET_NPSIZES);
+	psset_edata_heap_remove(psset, pind, ps);
+	if (edata_age_heap_empty(&psset->pageslabs[pind])) {
+		bitmap_set(psset->bitmap, &psset_bitmap_info, (size_t)pind);
+	}
+}
+
 /*
  * Similar to PAC's extent_recycle_extract.  Out of all the pageslabs in the
  * set, picks one that can satisfy the allocation and remove it from the set.
@@ -91,21 +136,6 @@ psset_recycle_extract(psset_t *psset, size_t size) {
 	return ps;
 }
 
-static void
-psset_insert(psset_t *psset, edata_t *ps, size_t largest_range) {
-	psset_assert_ps_consistent(ps);
-
-	pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
-	    largest_range << LG_PAGE));
-
-	assert(pind < PSSET_NPSIZES);
-
-	if (edata_age_heap_empty(&psset->pageslabs[pind])) {
-		bitmap_unset(psset->bitmap, &psset_bitmap_info, (size_t)pind);
-	}
-	psset_edata_heap_insert(psset, pind, ps);
-}
-
 /*
  * Given a pageslab ps and an edata to allocate size bytes from, initializes the
  * edata with a range in the pageslab, and puts ps back in the set.
@@ -187,7 +217,7 @@ psset_ps_alloc_insert(psset_t *psset, edata_t *ps, edata_t *r_edata,
 		psset_bin_stats_adjust(&psset->full_slab_stats, ps,
 		    /* inc */ true);
 	} else {
-		psset_insert(psset, ps, largest_unchosen_range);
+		psset_insert(psset, ps);
 	}
 }
 
diff --git a/test/unit/psset.c b/test/unit/psset.c
index 861903d6..e734ec8e 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -360,23 +360,37 @@ TEST_BEGIN(test_stats) {
 		expect_false(err, "Nonempty psset failed page allocation.");
 	}
 	stats_expect(&psset, PAGESLAB_PAGES);
+	edata_t *ps;
 	for (ssize_t i = PAGESLAB_PAGES - 1; i >= 0; i--) {
-		edata_t *ps = psset_dalloc(&psset, &alloc[i]);
+		ps = psset_dalloc(&psset, &alloc[i]);
 		expect_true((ps == NULL) == (i != 0),
 		    "psset_dalloc should only evict a slab on the last free");
 		stats_expect(&psset, i);
 	}
+
+	psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
+	stats_expect(&psset, 1);
+	psset_remove(&psset, &pageslab);
+	stats_expect(&psset, 0);
+	psset_insert(&psset, &pageslab);
+	stats_expect(&psset, 1);
 }
 TEST_END
 
-TEST_BEGIN(test_oldest_fit) {
+/*
+ * Fills in and inserts two pageslabs, with the first better than the second,
+ * and each fully allocated (into the allocations in allocs and worse_allocs,
+ * each of which should be PAGESLAB_PAGES long).
+ *
+ * (There's nothing magic about these numbers; it's just useful to share the
+ * setup between the oldest fit and the insert/remove test).
+ */
+static void
+init_test_pageslabs(psset_t *psset, edata_t *pageslab, edata_t *worse_pageslab,
+    edata_t *alloc, edata_t *worse_alloc) {
 	bool err;
-	edata_t alloc[PAGESLAB_PAGES];
-	edata_t worse_alloc[PAGESLAB_PAGES];
-
-	edata_t pageslab;
-	memset(&pageslab, 0, sizeof(pageslab));
-	edata_init(&pageslab, /* arena_ind */ 0, (void *)(10 * PAGESLAB_SIZE),
+	memset(pageslab, 0, sizeof(*pageslab));
+	edata_init(pageslab, /* arena_ind */ 0, (void *)(10 * PAGESLAB_SIZE),
 	    PAGESLAB_SIZE, /* slab */ true, SC_NSIZES, PAGESLAB_SN + 1,
 	    extent_state_active, /* zeroed */ false, /* comitted */ true,
 	    EXTENT_PAI_HPA, EXTENT_IS_HEAD);
@@ -386,29 +400,27 @@ TEST_BEGIN(test_oldest_fit) {
 	 * added to the set after the previous one, and so should be less
 	 * preferred for allocations.
 	 */
-	edata_t worse_pageslab;
-	memset(&worse_pageslab, 0, sizeof(pageslab));
-	edata_init(&worse_pageslab, /* arena_ind */ 0,
+	memset(worse_pageslab, 0, sizeof(*worse_pageslab));
+	edata_init(worse_pageslab, /* arena_ind */ 0,
 	    (void *)(9 * PAGESLAB_SIZE), PAGESLAB_SIZE, /* slab */ true,
 	    SC_NSIZES, PAGESLAB_SN - 1, extent_state_active, /* zeroed */ false,
 	    /* comitted */ true, EXTENT_PAI_HPA, EXTENT_IS_HEAD);
 
-	psset_t psset;
-	psset_init(&psset);
+	psset_init(psset);
 
 	edata_init_test(&alloc[0]);
-	psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
+	psset_alloc_new(psset, pageslab, &alloc[0], PAGE);
 	for (size_t i = 1; i < PAGESLAB_PAGES; i++) {
 		edata_init_test(&alloc[i]);
-		err = psset_alloc_reuse(&psset, &alloc[i], PAGE);
+		err = psset_alloc_reuse(psset, &alloc[i], PAGE);
 		expect_false(err, "Nonempty psset failed page allocation.");
-		expect_ptr_eq(&pageslab, edata_ps_get(&alloc[i]),
+		expect_ptr_eq(pageslab, edata_ps_get(&alloc[i]),
 		    "Allocated from the wrong pageslab");
 	}
 
 	edata_init_test(&worse_alloc[0]);
-	psset_alloc_new(&psset, &worse_pageslab, &worse_alloc[0], PAGE);
-	expect_ptr_eq(&worse_pageslab, edata_ps_get(&worse_alloc[0]),
+	psset_alloc_new(psset, worse_pageslab, &worse_alloc[0], PAGE);
+	expect_ptr_eq(worse_pageslab, edata_ps_get(&worse_alloc[0]),
 	    "Allocated from the wrong pageslab");
 	/*
 	 * Make the two pssets otherwise indistinguishable; all full except for
@@ -416,20 +428,31 @@ TEST_BEGIN(test_oldest_fit) {
 	 */
 	for (size_t i = 1; i < PAGESLAB_PAGES - 1; i++) {
 		edata_init_test(&worse_alloc[i]);
-		err = psset_alloc_reuse(&psset, &alloc[i], PAGE);
+		err = psset_alloc_reuse(psset, &alloc[i], PAGE);
 		expect_false(err, "Nonempty psset failed page allocation.");
-		expect_ptr_eq(&worse_pageslab, edata_ps_get(&alloc[i]),
+		expect_ptr_eq(worse_pageslab, edata_ps_get(&alloc[i]),
 		    "Allocated from the wrong pageslab");
 	}
 
 	/* Deallocate the last page from the older pageslab. */
-	edata_t *evicted = psset_dalloc(&psset, &alloc[PAGESLAB_PAGES - 1]);
+	edata_t *evicted = psset_dalloc(psset, &alloc[PAGESLAB_PAGES - 1]);
 	expect_ptr_null(evicted, "Unexpected eviction");
+}
 
-	/*
-	 * This edata is the whole purpose for the test; it should come from the
-	 * older pageslab.
-	 */
+TEST_BEGIN(test_oldest_fit) {
+	bool err;
+	edata_t alloc[PAGESLAB_PAGES];
+	edata_t worse_alloc[PAGESLAB_PAGES];
+
+	edata_t pageslab;
+	edata_t worse_pageslab;
+
+	psset_t psset;
+
+	init_test_pageslabs(&psset, &pageslab, &worse_pageslab, alloc,
+	    worse_alloc);
+
+	/* The edata should come from the better pageslab. */
 	edata_t test_edata;
 	edata_init_test(&test_edata);
 	err = psset_alloc_reuse(&psset, &test_edata, PAGE);
@@ -439,6 +462,51 @@ TEST_BEGIN(test_oldest_fit) {
 }
 TEST_END
 
+TEST_BEGIN(test_insert_remove) {
+	bool err;
+	edata_t *ps;
+	edata_t alloc[PAGESLAB_PAGES];
+	edata_t worse_alloc[PAGESLAB_PAGES];
+
+	edata_t pageslab;
+	edata_t worse_pageslab;
+
+	psset_t psset;
+
+	init_test_pageslabs(&psset, &pageslab, &worse_pageslab, alloc,
+	    worse_alloc);
+
+	/* Remove better; should still be able to alloc from worse. */
+	psset_remove(&psset, &pageslab);
+	err = psset_alloc_reuse(&psset, &worse_alloc[PAGESLAB_PAGES - 1], PAGE);
+	expect_false(err, "Removal should still leave an empty page");
+	expect_ptr_eq(&worse_pageslab,
+	    edata_ps_get(&worse_alloc[PAGESLAB_PAGES - 1]),
+	    "Allocated out of wrong ps");
+
+	/*
+	 * After deallocating the previous alloc and reinserting better, it
+	 * should be preferred for future allocations.
+	 */
+	ps = psset_dalloc(&psset, &worse_alloc[PAGESLAB_PAGES - 1]);
+	expect_ptr_null(ps, "Incorrect eviction of nonempty pageslab");
+	psset_insert(&psset, &pageslab);
+	err = psset_alloc_reuse(&psset, &alloc[PAGESLAB_PAGES - 1], PAGE);
+	expect_false(err, "psset should be nonempty");
+	expect_ptr_eq(&pageslab, edata_ps_get(&alloc[PAGESLAB_PAGES - 1]),
+	    "Removal/reinsertion shouldn't change ordering");
+	/*
+	 * After deallocating and removing both, allocations should fail.
+	 */
+	ps = psset_dalloc(&psset, &alloc[PAGESLAB_PAGES - 1]);
+	expect_ptr_null(ps, "Incorrect eviction");
+	psset_remove(&psset, &pageslab);
+	psset_remove(&psset, &worse_pageslab);
+	err = psset_alloc_reuse(&psset, &alloc[PAGESLAB_PAGES - 1], PAGE);
+	expect_true(err, "psset should be empty, but an alloc succeeded");
+}
+TEST_END
+
 int
 main(void) {
 	return test_no_reentrancy(
@@ -448,5 +516,6 @@ main(void) {
 	    test_evict,
 	    test_multi_pageslab,
 	    test_stats,
-	    test_oldest_fit);
+	    test_oldest_fit,
+	    test_insert_remove);
 }

From c1b2a77933135ebefa62a5ec4c7d9efa94b14592 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 10 Nov 2020 16:23:03 -0800
Subject: [PATCH 1925/2608] psset: Move in stats.

A later change will benefit from having these functions pulled into a
psset-module set of functions.
---
 include/jemalloc/internal/hpa.h   |  6 +++--
 include/jemalloc/internal/psset.h | 24 ++++++++---------
 src/ctl.c                         | 21 +++++----------
 src/hpa.c                         | 22 ++++++++++++++--
 src/pa_extra.c                    | 10 +------
 src/psset.c                       | 43 +++++++++++++++++++------------
 test/unit/psset.c                 | 19 ++++++++------
 7 files changed, 82 insertions(+), 63 deletions(-)

diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 159f0d02..12a7a17d 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -35,8 +35,7 @@ struct hpa_s {
 /* Used only by CTL; not actually stored here (i.e., all derived). */
 typedef struct hpa_shard_stats_s hpa_shard_stats_t;
 struct hpa_shard_stats_s {
-	psset_bin_stats_t psset_full_slab_stats;
-	psset_bin_stats_t psset_slab_stats[PSSET_NPSIZES];
+	psset_stats_t psset_stats;
 };
 
 typedef struct hpa_shard_s hpa_shard_t;
@@ -89,6 +88,9 @@ bool hpa_init(hpa_t *hpa, base_t *base, emap_t *emap,
 bool hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa,
     edata_cache_t *edata_cache, unsigned ind, size_t ps_goal,
     size_t ps_alloc_max, size_t small_max, size_t large_min);
+
+void hpa_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src);
+void hpa_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard, hpa_shard_stats_t *dst);
 /*
  * Notify the shard that we won't use it for allocations much longer.  Due to
  * the possibility of races, we don't actually prevent allocations; just flush
diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index 4b0c4da4..4529827a 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -31,12 +31,16 @@ struct psset_bin_stats_s {
 	size_t ninactive;
 };
 
-static inline void
-psset_bin_stats_accum(psset_bin_stats_t *dst, psset_bin_stats_t *src) {
-	dst->npageslabs += src->npageslabs;
-	dst->nactive += src->nactive;
-	dst->ninactive += src->ninactive;
-}
+/* Used only by CTL; not actually stored here (i.e., all derived). */
+typedef struct psset_stats_s psset_stats_t;
+struct psset_stats_s {
+	/*
+	 * Full slabs don't live in any edata heap.  But we still track their
+	 * stats.
+	 */
+	psset_bin_stats_t full_slabs;
+	psset_bin_stats_t nonfull_slabs[PSSET_NPSIZES];
+};
 
 typedef struct psset_s psset_t;
 struct psset_s {
@@ -46,18 +50,14 @@ struct psset_s {
 	 */
 	edata_age_heap_t pageslabs[PSSET_NPSIZES];
 	bitmap_t bitmap[BITMAP_GROUPS(PSSET_NPSIZES)];
-	/*
-	 * Full slabs don't live in any edata heap.  But we still track their
-	 * stats.
-	 */
-	psset_bin_stats_t full_slab_stats;
-	psset_bin_stats_t slab_stats[PSSET_NPSIZES];
+	psset_stats_t stats;
 
 	/* How many alloc_new calls have happened? */
 	uint64_t age_counter;
 };
 
 void psset_init(psset_t *psset);
+void psset_stats_accum(psset_stats_t *dst, psset_stats_t *src);
 
 void psset_insert(psset_t *psset, edata_t *ps);
 void psset_remove(psset_t *psset, edata_t *ps);
diff --git a/src/ctl.c b/src/ctl.c
index 4bb422a2..f0df73b7 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1104,14 +1104,7 @@ MUTEX_PROF_ARENA_MUTEXES
 		}
 
 		/* Merge HPA stats. */
-		psset_bin_stats_accum(&sdstats->hpastats.psset_full_slab_stats,
-		    &astats->hpastats.psset_full_slab_stats);
-		for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
-			psset_bin_stats_accum(
-			    &sdstats->hpastats.psset_slab_stats[i],
-			    &astats->hpastats.psset_slab_stats[i]);
-		}
-
+		hpa_stats_accum(&sdstats->hpastats, &astats->hpastats);
 		sec_stats_accum(&sdstats->secstats, &astats->secstats);
 	}
 }
@@ -3375,21 +3368,21 @@ stats_arenas_i_extents_j_index(tsdn_t *tsdn, const size_t *mib,
 }
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs,
-    arenas_i(mib[2])->astats->hpastats.psset_full_slab_stats.npageslabs,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.npageslabs,
     size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_nactive,
-    arenas_i(mib[2])->astats->hpastats.psset_full_slab_stats.nactive, size_t);
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.nactive, size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_ninactive,
-    arenas_i(mib[2])->astats->hpastats.psset_full_slab_stats.ninactive, size_t);
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.ninactive, size_t);
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs,
-    arenas_i(mib[2])->astats->hpastats.psset_slab_stats[mib[5]].npageslabs,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].npageslabs,
     size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive,
-    arenas_i(mib[2])->astats->hpastats.psset_slab_stats[mib[5]].nactive,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].nactive,
     size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive,
-    arenas_i(mib[2])->astats->hpastats.psset_slab_stats[mib[5]].ninactive,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].ninactive,
     size_t);
 
 static const ctl_named_node_t *
diff --git a/src/hpa.c b/src/hpa.c
index 8029e0bd..e7548adb 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -89,6 +89,24 @@ hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa, edata_cache_t *edata_cache,
 	return false;
 }
 
+/*
+ * Note that the stats functions here follow the usual stats naming conventions;
+ * "merge" obtains the stats from some live object of instance, while "accum"
+ * only combines the stats from one stats objet to another.  Hence the lack of
+ * locking here.
+ */
+void
+hpa_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src) {
+	psset_stats_accum(&dst->psset_stats, &src->psset_stats);
+}
+
+void
+hpa_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard, hpa_shard_stats_t *dst) {
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	psset_stats_accum(&dst->psset_stats, &shard->psset.stats);
+	malloc_mutex_unlock(tsdn, &shard->mtx);
+}
+
 static edata_t *
 hpa_alloc_central(tsdn_t *tsdn, hpa_shard_t *shard, size_t size_min,
     size_t size_goal) {
@@ -415,10 +433,10 @@ hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
 		    PAGE);
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 		assert(psset_empty);
-		hpa_shard_assert_stats_empty(&shard->psset.full_slab_stats);
+		hpa_shard_assert_stats_empty(&shard->psset.stats.full_slabs);
 		for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
 			hpa_shard_assert_stats_empty(
-			    &shard->psset.slab_stats[i]);
+			    &shard->psset.stats.nonfull_slabs[i]);
 		}
 	}
 }
diff --git a/src/pa_extra.c b/src/pa_extra.c
index 24cb6537..2002418a 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -150,15 +150,7 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
 	}
 
 	if (shard->ever_used_hpa) {
-		malloc_mutex_lock(tsdn, &shard->hpa_shard.mtx);
-		psset_bin_stats_accum(&hpa_stats_out->psset_full_slab_stats,
-		    &shard->hpa_shard.psset.full_slab_stats);
-		for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
-			psset_bin_stats_accum(
-			    &hpa_stats_out->psset_slab_stats[i],
-			    &shard->hpa_shard.psset.slab_stats[i]);
-		}
-		malloc_mutex_unlock(tsdn, &shard->hpa_shard.mtx);
+		hpa_stats_merge(tsdn, &shard->hpa_shard, hpa_stats_out);
 		sec_stats_merge(tsdn, &shard->hpa_sec, sec_stats_out);
 	}
 }
diff --git a/src/psset.c b/src/psset.c
index cd0dcae7..c24266ce 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -14,17 +14,26 @@ psset_init(psset_t *psset) {
 		edata_age_heap_new(&psset->pageslabs[i]);
 	}
 	bitmap_init(psset->bitmap, &psset_bitmap_info, /* fill */ true);
-	psset->full_slab_stats.npageslabs = 0;
-	psset->full_slab_stats.nactive = 0;
-	psset->full_slab_stats.ninactive = 0;
-	for (unsigned i = 0; i < PSSET_NPSIZES; i++) {
-		psset->slab_stats[i].npageslabs = 0;
-		psset->slab_stats[i].nactive = 0;
-		psset->slab_stats[i].ninactive = 0;
-	}
+	memset(&psset->stats, 0, sizeof(psset->stats));
 	psset->age_counter = 0;
 }
 
+static void
+psset_bin_stats_accum(psset_bin_stats_t *dst, psset_bin_stats_t *src) {
+	dst->npageslabs += src->npageslabs;
+	dst->nactive += src->nactive;
+	dst->ninactive += src->ninactive;
+}
+
+void
+psset_stats_accum(psset_stats_t *dst, psset_stats_t *src) {
+	psset_bin_stats_accum(&dst->full_slabs, &src->full_slabs);
+	for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
+		psset_bin_stats_accum(&dst->nonfull_slabs[i],
+		    &src->nonfull_slabs[i]);
+	}
+}
+
 /*
  * The stats maintenance strategy is simple, but not necessarily obvious.
  * edata_nfree and the bitmap must remain consistent at all times.  If they
@@ -50,13 +59,15 @@ psset_bin_stats_adjust(psset_bin_stats_t *binstats, edata_t *ps, bool inc) {
 static void
 psset_edata_heap_remove(psset_t *psset, pszind_t pind, edata_t *ps) {
 	edata_age_heap_remove(&psset->pageslabs[pind], ps);
-	psset_bin_stats_adjust(&psset->slab_stats[pind], ps, /* inc */ false);
+	psset_bin_stats_adjust(&psset->stats.nonfull_slabs[pind], ps,
+	    /* inc */ false);
 }
 
 static void
 psset_edata_heap_insert(psset_t *psset, pszind_t pind, edata_t *ps) {
 	edata_age_heap_insert(&psset->pageslabs[pind], ps);
-	psset_bin_stats_adjust(&psset->slab_stats[pind], ps, /* inc */ true);
+	psset_bin_stats_adjust(&psset->stats.nonfull_slabs[pind], ps,
+	    /* inc */ true);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -75,7 +86,7 @@ psset_insert(psset_t *psset, edata_t *ps) {
 		 * We don't ned to track full slabs; just pretend to for stats
 		 * purposes.  See the comment at psset_bin_stats_adjust.
 		 */
-		psset_bin_stats_adjust(&psset->full_slab_stats, ps,
+		psset_bin_stats_adjust(&psset->stats.full_slabs, ps,
 		    /* inc */ true);
 		return;
 	}
@@ -96,7 +107,7 @@ psset_remove(psset_t *psset, edata_t *ps) {
 	size_t longest_free_range = edata_longest_free_range_get(ps);
 
 	if (longest_free_range == 0) {
-		psset_bin_stats_adjust(&psset->full_slab_stats, ps,
+		psset_bin_stats_adjust(&psset->stats.full_slabs, ps,
 		    /* inc */ true);
 		return;
 	}
@@ -214,7 +225,7 @@ psset_ps_alloc_insert(psset_t *psset, edata_t *ps, edata_t *r_edata,
 	}
 	edata_longest_free_range_set(ps, (uint32_t)largest_unchosen_range);
 	if (largest_unchosen_range == 0) {
-		psset_bin_stats_adjust(&psset->full_slab_stats, ps,
+		psset_bin_stats_adjust(&psset->stats.full_slabs, ps,
 		    /* inc */ true);
 	} else {
 		psset_insert(psset, ps);
@@ -265,15 +276,15 @@ psset_dalloc(psset_t *psset, edata_t *edata) {
 	fb_unset_range(ps_fb, ps_npages, begin, len);
 	if (ps_old_longest_free_range == 0) {
 		/* We were in the (imaginary) full bin; update stats for it. */
-		psset_bin_stats_adjust(&psset->full_slab_stats, ps,
+		psset_bin_stats_adjust(&psset->stats.full_slabs, ps,
 		    /* inc */ false);
 	} else {
 		/*
 		 * The edata is still in the bin, need to update its
 		 * contribution.
 		 */
-		psset->slab_stats[old_pind].nactive -= len;
-		psset->slab_stats[old_pind].ninactive += len;
+		psset->stats.nonfull_slabs[old_pind].nactive -= len;
+		psset->stats.nonfull_slabs[old_pind].ninactive += len;
 	}
 	/*
 	 * Note that we want to do this after the stats updates, since if it was
diff --git a/test/unit/psset.c b/test/unit/psset.c
index e734ec8e..e07bdc46 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -307,14 +307,14 @@ stats_expect_empty(psset_bin_stats_t *stats) {
 static void
 stats_expect(psset_t *psset, size_t nactive) {
 	if (nactive == PAGESLAB_PAGES) {
-		expect_zu_eq(1, psset->full_slab_stats.npageslabs,
+		expect_zu_eq(1, psset->stats.full_slabs.npageslabs,
 		    "Expected a full slab");
-		expect_zu_eq(PAGESLAB_PAGES, psset->full_slab_stats.nactive,
+		expect_zu_eq(PAGESLAB_PAGES, psset->stats.full_slabs.nactive,
 		    "Should have exactly filled the bin");
-		expect_zu_eq(0, psset->full_slab_stats.ninactive,
+		expect_zu_eq(0, psset->stats.full_slabs.ninactive,
 		    "Should never have inactive pages in a full slab");
 	} else {
-		stats_expect_empty(&psset->full_slab_stats);
+		stats_expect_empty(&psset->stats.full_slabs);
 	}
 	size_t ninactive = PAGESLAB_PAGES - nactive;
 	pszind_t nonempty_pind = PSSET_NPSIZES;
@@ -324,14 +324,17 @@ stats_expect(psset_t *psset, size_t nactive) {
 	}
 	for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
 		if (i == nonempty_pind) {
-			assert_zu_eq(1, psset->slab_stats[i].npageslabs,
+			assert_zu_eq(1,
+			    psset->stats.nonfull_slabs[i].npageslabs,
 			    "Should have found a slab");
-			expect_zu_eq(nactive, psset->slab_stats[i].nactive,
+			expect_zu_eq(nactive,
+			    psset->stats.nonfull_slabs[i].nactive,
 			    "Mismatch in active pages");
-			expect_zu_eq(ninactive, psset->slab_stats[i].ninactive,
+			expect_zu_eq(ninactive,
+			    psset->stats.nonfull_slabs[i].ninactive,
 			    "Mismatch in inactive pages");
 		} else {
-			stats_expect_empty(&psset->slab_stats[i]);
+			stats_expect_empty(&psset->stats.nonfull_slabs[i]);
 		}
 	}
 }

From 63677dde631e089c4dc00b6cca5e6e03ac9fdc90 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sat, 5 Dec 2020 09:10:15 -0800
Subject: [PATCH 1926/2608] Pages: Statically detect if pages_huge may succeed

---
 include/jemalloc/internal/pages.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index 7dae633a..cfaa0fc2 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -58,6 +58,18 @@ static const bool pages_can_purge_forced =
 #endif
     ;
 
+#if defined(JEMALLOC_HAVE_MADVISE_HUGE) || defined(JEMALLOC_HAVE_MEMCNTL)
+#  define PAGES_CAN_HUGIFY
+#endif
+
+static const bool pages_can_hugify =
+#ifdef PAGES_CAN_HUGIFY
+    true
+#else
+    false
+#endif
+    ;
+
 typedef enum {
 	thp_mode_default       = 0, /* Do not change hugepage settings. */
 	thp_mode_always        = 1, /* Always set MADV_HUGEPAGE. */

From 43af63fff496967bf2173c92737aea1cca4ca025 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 9 Nov 2020 13:49:30 -0800
Subject: [PATCH 1927/2608] HPA: Manage whole hugepages at a time.

This redesigns the HPA implementation to allow us to manage hugepages all at
once, locally, without relying on a global fallback.
---
 include/jemalloc/internal/arena_externs.h |   1 -
 include/jemalloc/internal/edata.h         |  18 +-
 include/jemalloc/internal/hpa.h           |  93 ++--
 include/jemalloc/internal/mutex_prof.h    |   4 +-
 include/jemalloc/internal/pa.h            |   5 +-
 include/jemalloc/internal/psset.h         |  11 +-
 src/arena.c                               |   6 +-
 src/ctl.c                                 | 115 ++--
 src/hpa.c                                 | 613 +++++++++++++---------
 src/jemalloc.c                            |  37 +-
 src/pa.c                                  |  15 +-
 src/pa_extra.c                            |   2 +-
 src/psset.c                               | 120 +++--
 src/stats.c                               | 113 ++--
 test/unit/hpa.c                           |  76 +--
 test/unit/psset.c                         |  21 +-
 16 files changed, 700 insertions(+), 550 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 40223b58..e3cfcee2 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -16,7 +16,6 @@ extern const char *percpu_arena_mode_names[];
 extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS];
 extern malloc_mutex_t arenas_lock;
 extern emap_t arena_emap_global;
-extern hpa_t arena_hpa_global;
 
 extern size_t opt_oversize_threshold;
 extern size_t oversize_threshold;
diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index 5ec12beb..465c962f 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -208,9 +208,9 @@ struct edata_s {
 		 */
 
 		/*
-		 * If this edata is from an HPA, it may be part of some larger
-		 * pageslab.  Track it if so.  Otherwise (either because it's
-		 * not part of a pageslab, or not from the HPA at all), NULL.
+		 * If this edata is a user allocation from an HPA, it comes out
+		 * of some pageslab (we don't yet support huegpage allocations
+		 * that don't fit into pageslabs).  This tracks it.
 		 */
 		edata_t *ps;
 		/*
@@ -225,6 +225,8 @@ struct edata_s {
 			 * between heaps.
 			 */
 			uint32_t longest_free_range;
+			/* Whether or not the slab is backed by a hugepage. */
+			bool hugeified;
 		};
 	};
 
@@ -328,6 +330,11 @@ edata_pai_get(const edata_t *edata) {
 	    EDATA_BITS_PAI_SHIFT);
 }
 
+static inline bool
+edata_hugeified_get(const edata_t *edata) {
+	return edata->hugeified;
+}
+
 static inline bool
 edata_slab_get(const edata_t *edata) {
 	return (bool)((edata->e_bits & EDATA_BITS_SLAB_MASK) >>
@@ -559,6 +566,11 @@ edata_pai_set(edata_t *edata, extent_pai_t pai) {
 	    ((uint64_t)pai << EDATA_BITS_PAI_SHIFT);
 }
 
+static inline void
+edata_hugeified_set(edata_t *edata, bool hugeified) {
+	edata->hugeified = hugeified;
+}
+
 static inline void
 edata_slab_set(edata_t *edata, bool slab) {
 	edata->e_bits = (edata->e_bits & ~EDATA_BITS_SLAB_MASK) |
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 12a7a17d..1c4585df 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -6,32 +6,6 @@
 #include "jemalloc/internal/pai.h"
 #include "jemalloc/internal/psset.h"
 
-typedef struct hpa_s hpa_t;
-struct hpa_s {
-	/*
-	 * We have two mutexes for the central allocator; mtx protects its
-	 * state, while grow_mtx protects controls the ability to grow the
-	 * backing store.  This prevents race conditions in which the central
-	 * allocator has exhausted its memory while mutiple threads are trying
-	 * to allocate.  If they all reserved more address space from the OS
-	 * without synchronization, we'd end consuming much more than necessary.
-	 */
-	malloc_mutex_t grow_mtx;
-	malloc_mutex_t mtx;
-	hpa_central_t central;
-	/* The arena ind we're associated with. */
-	unsigned ind;
-	/*
-	 * This edata cache is the global one that we use for new allocations in
-	 * growing; practically, it comes from a0.
-	 *
-	 * We don't use an edata_cache_small in front of this, since we expect a
-	 * small finite number of allocations from it.
-	 */
-	edata_cache_t *edata_cache;
-	exp_grow_t exp_grow;
-};
-
 /* Used only by CTL; not actually stored here (i.e., all derived). */
 typedef struct hpa_shard_stats_s hpa_shard_stats_t;
 struct hpa_shard_stats_s {
@@ -53,44 +27,53 @@ struct hpa_shard_s {
 	 * allocator, and so will use its edata_cache.
 	 */
 	edata_cache_small_t ecs;
-	hpa_t *hpa;
+
 	psset_t psset;
 
 	/*
-	 * When we're grabbing a new ps from the central allocator, how big
-	 * would we like it to be?  This is mostly about the level of batching
-	 * we use in our requests to the centralized allocator.
+	 * The largest size we'll allocate out of the shard.  For those
+	 * allocations refused, the caller (in practice, the PA module) will
+	 * fall back to the more general (for now) PAC, which can always handle
+	 * any allocation request.
 	 */
-	size_t ps_goal;
+	size_t alloc_max;
+
 	/*
-	 * What's the maximum size we'll try to allocate out of the psset?  We
-	 * don't want this to be too large relative to ps_goal, as a
-	 * fragmentation avoidance measure.
+	 * Slabs currently purged away.  They are hugepage-sized and
+	 * hugepage-aligned, but have had pages_nohuge and pages_purge_forced
+	 * called on them.
+	 *
+	 * Guarded by grow_mtx.
 	 */
-	size_t ps_alloc_max;
+	edata_list_inactive_t unused_slabs;
+
 	/*
-	 * What's the maximum size we'll try to allocate out of the shard at
-	 * all?
+	 * Either NULL (if empty), or some integer multiple of a
+	 * hugepage-aligned number of hugepages.  We carve them off one at a
+	 * time to satisfy new pageslab requests.
+	 *
+	 * Guarded by grow_mtx.
 	 */
-	size_t small_max;
-	/*
-	 * What's the minimum size for which we'll go straight to the global
-	 * arena?
-	 */
-	size_t large_min;
+	edata_t *eden;
 
 	/* The arena ind we're associated with. */
 	unsigned ind;
+	emap_t *emap;
 };
 
-bool hpa_init(hpa_t *hpa, base_t *base, emap_t *emap,
-    edata_cache_t *edata_cache);
-bool hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa,
-    edata_cache_t *edata_cache, unsigned ind, size_t ps_goal,
-    size_t ps_alloc_max, size_t small_max, size_t large_min);
+/*
+ * Whether or not the HPA can be used given the current configuration.  This is
+ * is not necessarily a guarantee that it backs its allocations by hugepages,
+ * just that it can function properly given the system it's running on.
+ */
+bool hpa_supported();
+bool hpa_shard_init(hpa_shard_t *shard, emap_t *emap,
+    edata_cache_t *edata_cache, unsigned ind, size_t alloc_max);
+
+void hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src);
+void hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard,
+    hpa_shard_stats_t *dst);
 
-void hpa_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src);
-void hpa_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard, hpa_shard_stats_t *dst);
 /*
  * Notify the shard that we won't use it for allocations much longer.  Due to
  * the possibility of races, we don't actually prevent allocations; just flush
@@ -108,14 +91,4 @@ void hpa_shard_prefork4(tsdn_t *tsdn, hpa_shard_t *shard);
 void hpa_shard_postfork_parent(tsdn_t *tsdn, hpa_shard_t *shard);
 void hpa_shard_postfork_child(tsdn_t *tsdn, hpa_shard_t *shard);
 
-/*
- * These should be acquired after all the shard locks in phase 4, but before any
- * locks in phase 4.  The central HPA may acquire an edata cache mutex (of a0),
- * so it needs to be lower in the witness ordering, but it's also logically
- * global and not tied to any particular arena.
- */
-void hpa_prefork4(tsdn_t *tsdn, hpa_t *hpa);
-void hpa_postfork_parent(tsdn_t *tsdn, hpa_t *hpa);
-void hpa_postfork_child(tsdn_t *tsdn, hpa_t *hpa);
-
 #endif /* JEMALLOC_INTERNAL_HPA_H */
diff --git a/include/jemalloc/internal/mutex_prof.h b/include/jemalloc/internal/mutex_prof.h
index ef0bf0d3..3759daaf 100644
--- a/include/jemalloc/internal/mutex_prof.h
+++ b/include/jemalloc/internal/mutex_prof.h
@@ -11,9 +11,7 @@
     OP(ctl)								\
     OP(prof)								\
     OP(prof_thds_data)							\
-    OP(prof_dump)							\
-    OP(hpa_central)							\
-    OP(hpa_central_grow)
+    OP(prof_dump)
 
 typedef enum {
 #define OP(mtx) global_prof_mutex_##mtx,
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index f1823e6b..b9030226 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -130,9 +130,8 @@ bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
  * This isn't exposed to users; we allow late enablement of the HPA shard so
  * that we can boot without worrying about the HPA, then turn it on in a0.
  */
-bool pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa, size_t ps_goal,
-    size_t ps_alloc_max, size_t small_max, size_t large_min, size_t sec_nshards,
-    size_t sec_alloc_max, size_t sec_bytes_max);
+bool pa_shard_enable_hpa(pa_shard_t *shard, size_t alloc_max,
+    size_t sec_nshards, size_t sec_alloc_max, size_t sec_bytes_max);
 /*
  * We stop using the HPA when custom extent hooks are installed, but still
  * redirect deallocations to it.
diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index 4529827a..3c9f23bb 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -24,11 +24,14 @@
 typedef struct psset_bin_stats_s psset_bin_stats_t;
 struct psset_bin_stats_s {
 	/* How many pageslabs are in this bin? */
-	size_t npageslabs;
+	size_t npageslabs_huge;
+	size_t npageslabs_nonhuge;
 	/* Of them, how many pages are active? */
-	size_t nactive;
+	size_t nactive_huge;
+	size_t nactive_nonhuge;
 	/* How many are inactive? */
-	size_t ninactive;
+	size_t ninactive_huge;
+	size_t ninactive_nonhuge;
 };
 
 /* Used only by CTL; not actually stored here (i.e., all derived). */
@@ -62,6 +65,8 @@ void psset_stats_accum(psset_stats_t *dst, psset_stats_t *src);
 void psset_insert(psset_t *psset, edata_t *ps);
 void psset_remove(psset_t *psset, edata_t *ps);
 
+void psset_hugify(psset_t *psset, edata_t *ps);
+
 /*
  * Tries to obtain a chunk from an existing pageslab already in the set.
  * Returns true on failure.
diff --git a/src/arena.c b/src/arena.c
index 7099713a..209eb347 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -37,7 +37,6 @@ static atomic_zd_t dirty_decay_ms_default;
 static atomic_zd_t muzzy_decay_ms_default;
 
 emap_t arena_emap_global;
-hpa_t arena_hpa_global;
 
 const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
 #define STEP(step, h, x, y)			\
@@ -1535,9 +1534,8 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 *   so arena_hpa_global is not yet initialized.
 	 */
 	if (opt_hpa && ehooks_are_default(base_ehooks_get(base)) && ind != 0) {
-		if (pa_shard_enable_hpa(&arena->pa_shard, &arena_hpa_global,
-		    opt_hpa_slab_goal, opt_hpa_slab_max_alloc,
-		    opt_hpa_small_max, opt_hpa_large_min, opt_hpa_sec_nshards,
+		if (pa_shard_enable_hpa(&arena->pa_shard,
+		    opt_hpa_slab_max_alloc, opt_hpa_sec_nshards,
 		    opt_hpa_sec_max_alloc, opt_hpa_sec_max_bytes)) {
 			goto label_error;
 		}
diff --git a/src/ctl.c b/src/ctl.c
index f0df73b7..88cee666 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -220,13 +220,19 @@ CTL_PROTO(stats_arenas_i_extents_j_dirty_bytes)
 CTL_PROTO(stats_arenas_i_extents_j_muzzy_bytes)
 CTL_PROTO(stats_arenas_i_extents_j_retained_bytes)
 INDEX_PROTO(stats_arenas_i_extents_j)
-CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs)
-CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive)
-CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive)
+CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge)
+CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_nactive_huge)
+CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_ninactive_huge)
+CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge)
+CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge)
+CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_ninactive_nonhuge)
+CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge)
+CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge)
+CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge)
+CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge)
+CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_huge)
+CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_nonhuge)
 INDEX_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j)
-CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_npageslabs)
-CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_nactive)
-CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_ninactive)
 CTL_PROTO(stats_arenas_i_nthreads)
 CTL_PROTO(stats_arenas_i_uptime)
 CTL_PROTO(stats_arenas_i_dss)
@@ -606,21 +612,33 @@ MUTEX_PROF_ARENA_MUTEXES
 };
 
 static const ctl_named_node_t stats_arenas_i_hpa_shard_full_slabs_node[] = {
-	{NAME("npageslabs"),
-		CTL(stats_arenas_i_hpa_shard_full_slabs_npageslabs)},
-	{NAME("nactive"),
-		CTL(stats_arenas_i_hpa_shard_full_slabs_nactive)},
-	{NAME("ninactive"),
-		CTL(stats_arenas_i_hpa_shard_full_slabs_ninactive)}
+	{NAME("npageslabs_huge"),
+		CTL(stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge)},
+	{NAME("nactive_huge"),
+		CTL(stats_arenas_i_hpa_shard_full_slabs_nactive_huge)},
+	{NAME("ninactive_huge"),
+		CTL(stats_arenas_i_hpa_shard_full_slabs_ninactive_huge)},
+	{NAME("npageslabs_nonhuge"),
+		CTL(stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge)},
+	{NAME("nactive_nonhuge"),
+		CTL(stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge)},
+	{NAME("ninactive_nonhuge"),
+		CTL(stats_arenas_i_hpa_shard_full_slabs_ninactive_nonhuge)},
 };
 
 static const ctl_named_node_t stats_arenas_i_hpa_shard_nonfull_slabs_j_node[] = {
-	{NAME("npageslabs"),
-		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs)},
-	{NAME("nactive"),
-		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive)},
-	{NAME("ninactive"),
-		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive)}
+	{NAME("npageslabs_huge"),
+		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge)},
+	{NAME("nactive_huge"),
+		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge)},
+	{NAME("ninactive_huge"),
+		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_huge)},
+	{NAME("npageslabs_nonhuge"),
+		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge)},
+	{NAME("nactive_nonhuge"),
+		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge)},
+	{NAME("ninactive_nonhuge"),
+		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_nonhuge)}
 };
 
 static const ctl_named_node_t super_stats_arenas_i_hpa_shard_nonfull_slabs_j_node[] = {
@@ -1104,7 +1122,7 @@ MUTEX_PROF_ARENA_MUTEXES
 		}
 
 		/* Merge HPA stats. */
-		hpa_stats_accum(&sdstats->hpastats, &astats->hpastats);
+		hpa_shard_stats_accum(&sdstats->hpastats, &astats->hpastats);
 		sec_stats_accum(&sdstats->secstats, &astats->secstats);
 	}
 }
@@ -1219,14 +1237,6 @@ ctl_refresh(tsdn_t *tsdn) {
 			READ_GLOBAL_MUTEX_PROF_DATA(
 			    global_prof_mutex_prof_dump, prof_dump_mtx);
 		}
-		if (opt_hpa) {
-			READ_GLOBAL_MUTEX_PROF_DATA(
-			    global_prof_mutex_hpa_central,
-			    arena_hpa_global.mtx);
-			READ_GLOBAL_MUTEX_PROF_DATA(
-			    global_prof_mutex_hpa_central_grow,
-			    arena_hpa_global.grow_mtx);
-		}
 		if (have_background_thread) {
 			READ_GLOBAL_MUTEX_PROF_DATA(
 			    global_prof_mutex_background_thread,
@@ -3259,11 +3269,6 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
 		MUTEX_PROF_RESET(tdatas_mtx);
 		MUTEX_PROF_RESET(prof_dump_mtx);
 	}
-	if (opt_hpa) {
-		MUTEX_PROF_RESET(arena_hpa_global.mtx);
-		MUTEX_PROF_RESET(arena_hpa_global.grow_mtx);
-	}
-
 
 	/* Per arena mutexes. */
 	unsigned n = narenas_total_get();
@@ -3367,22 +3372,44 @@ stats_arenas_i_extents_j_index(tsdn_t *tsdn, const size_t *mib,
 	return super_stats_arenas_i_extents_j_node;
 }
 
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.npageslabs,
+/* Full, huge */
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.npageslabs_huge,
     size_t);
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_nactive,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.nactive, size_t);
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_ninactive,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.ninactive, size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_nactive_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.nactive_huge, size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_ninactive_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.ninactive_huge, size_t);
 
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].npageslabs,
+/* Full, nonhuge */
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.npageslabs_nonhuge,
     size_t);
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].nactive,
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.nactive_nonhuge, size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_ninactive_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.ninactive_nonhuge, size_t);
+
+/* Nonfull, huge */
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].npageslabs_huge,
     size_t);
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].ninactive,
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].nactive_huge,
+    size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].ninactive_huge,
+    size_t);
+
+/* Nonfull, nonhuge */
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].npageslabs_nonhuge,
+    size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].nactive_nonhuge,
+    size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].ninactive_nonhuge,
     size_t);
 
 static const ctl_named_node_t *
diff --git a/src/hpa.c b/src/hpa.c
index e7548adb..ca75628c 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -6,6 +6,8 @@
 #include "jemalloc/internal/flat_bitmap.h"
 #include "jemalloc/internal/witness.h"
 
+#define HPA_EDEN_SIZE (128 * HUGEPAGE)
+
 static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t alignment, bool zero);
 static bool hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
@@ -15,43 +17,40 @@ static bool hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 static void hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata);
 
 bool
-hpa_init(hpa_t *hpa, base_t *base, emap_t *emap, edata_cache_t *edata_cache) {
-	bool err;
-
+hpa_supported() {
+#ifdef _WIN32
+	/*
+	 * At least until the API and implementation is somewhat settled, we
+	 * don't want to try to debug the VM subsystem on the hardest-to-test
+	 * platform.
+	 */
+	return false;
+#endif
+	if (!pages_can_hugify) {
+		return false;
+	}
 	/*
 	 * We fundamentally rely on a address-space-hungry growth strategy for
-	 * hugepages.  This may change in the future, but for now we should have
-	 * refused to turn on any HPA at a higher level of the stack.
+	 * hugepages.
 	 */
-	assert(LG_SIZEOF_PTR == 3);
-
-	err = malloc_mutex_init(&hpa->grow_mtx, "hpa_grow", WITNESS_RANK_HPA_GROW,
-	    malloc_mutex_rank_exclusive);
-	if (err) {
-		return true;
+	if (LG_SIZEOF_PTR == 2) {
+		return false;
 	}
-	err = malloc_mutex_init(&hpa->mtx, "hpa", WITNESS_RANK_HPA,
-	    malloc_mutex_rank_exclusive);
-	if (err) {
-		return true;
+	/*
+	 * We use the edata bitmap; it needs to have at least as many bits as a
+	 * hugepage has pages.
+	 */
+	if (HUGEPAGE / PAGE > BITMAP_GROUPS_MAX * sizeof(bitmap_t) * 8) {
+		return false;
 	}
-
-	hpa_central_init(&hpa->central, edata_cache, emap);
-	if (err) {
-		return true;
-	}
-	hpa->ind = base_ind_get(base);
-	hpa->edata_cache = edata_cache;
-
-	exp_grow_init(&hpa->exp_grow);
-
-	return false;
+	return true;
 }
 
 bool
-hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa, edata_cache_t *edata_cache,
-    unsigned ind, size_t ps_goal, size_t ps_alloc_max, size_t small_max,
-    size_t large_min) {
+hpa_shard_init(hpa_shard_t *shard, emap_t *emap, edata_cache_t *edata_cache,
+    unsigned ind, size_t alloc_max) {
+	/* malloc_conf processing should have filtered out these cases. */
+	assert(hpa_supported());
 	bool err;
 	err = malloc_mutex_init(&shard->grow_mtx, "hpa_shard_grow",
 	    WITNESS_RANK_HPA_SHARD_GROW, malloc_mutex_rank_exclusive);
@@ -66,12 +65,12 @@ hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa, edata_cache_t *edata_cache,
 
 	assert(edata_cache != NULL);
 	edata_cache_small_init(&shard->ecs, edata_cache);
-	shard->hpa = hpa;
 	psset_init(&shard->psset);
-	shard->ps_goal = ps_goal;
-	shard->ps_alloc_max = ps_alloc_max;
-	shard->small_max = small_max;
-	shard->large_min = large_min;
+	shard->alloc_max = alloc_max;
+	edata_list_inactive_init(&shard->unused_slabs);
+	shard->eden = NULL;
+	shard->ind = ind;
+	shard->emap = emap;
 
 	/*
 	 * Fill these in last, so that if an hpa_shard gets used despite
@@ -83,9 +82,6 @@ hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa, edata_cache_t *edata_cache,
 	shard->pai.shrink = &hpa_shrink;
 	shard->pai.dalloc = &hpa_dalloc;
 
-	shard->ind = ind;
-	assert(ind == base_ind_get(edata_cache->base));
-
 	return false;
 }
 
@@ -96,176 +92,333 @@ hpa_shard_init(hpa_shard_t *shard, hpa_t *hpa, edata_cache_t *edata_cache,
  * locking here.
  */
 void
-hpa_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src) {
+hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src) {
 	psset_stats_accum(&dst->psset_stats, &src->psset_stats);
 }
 
 void
-hpa_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard, hpa_shard_stats_t *dst) {
+hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard,
+    hpa_shard_stats_t *dst) {
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	psset_stats_accum(&dst->psset_stats, &shard->psset.stats);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 }
 
-static edata_t *
-hpa_alloc_central(tsdn_t *tsdn, hpa_shard_t *shard, size_t size_min,
-    size_t size_goal) {
-	bool err;
-	edata_t *edata;
-
-	hpa_t *hpa = shard->hpa;
-
-	malloc_mutex_lock(tsdn, &hpa->mtx);
-	edata = hpa_central_alloc_reuse(tsdn, &hpa->central, size_min,
-	    size_goal);
-	malloc_mutex_unlock(tsdn, &hpa->mtx);
-	if (edata != NULL) {
-		edata_arena_ind_set(edata, shard->ind);
-		return edata;
-	}
-	/* No existing range can satisfy the request; try to grow. */
-	malloc_mutex_lock(tsdn, &hpa->grow_mtx);
-
+static bool
+hpa_should_hugify(hpa_shard_t *shard, edata_t *ps) {
 	/*
-	 * We could have raced with other grow attempts; re-check to see if we
-	 * did, and are now able to satisfy the request.
+	 * For now, just use a static check; hugify a page if it's <= 5%
+	 * inactive.  Eventually, this should be a malloc conf option.
 	 */
-	malloc_mutex_lock(tsdn, &hpa->mtx);
-	edata = hpa_central_alloc_reuse(tsdn, &hpa->central, size_min,
-	    size_goal);
-	malloc_mutex_unlock(tsdn, &hpa->mtx);
-	if (edata != NULL) {
-		malloc_mutex_unlock(tsdn, &hpa->grow_mtx);
-		edata_arena_ind_set(edata, shard->ind);
-		return edata;
-	}
+	return !edata_hugeified_get(ps)
+	    && edata_nfree_get(ps) < (HUGEPAGE / PAGE) * 5 / 100;
+}
 
+/* Returns true on error. */
+static void
+hpa_hugify(edata_t *ps) {
+	assert(edata_size_get(ps) == HUGEPAGE);
+	assert(edata_hugeified_get(ps));
+	bool err = pages_huge(edata_base_get(ps), HUGEPAGE);
 	/*
-	 * No such luck. We've dropped mtx, so other allocations can proceed
-	 * while we allocate the new extent.  We know no one else will grow in
-	 * the meantime, though, since we still hold grow_mtx.
-	 */
-	size_t alloc_size;
-	pszind_t skip;
-
-	size_t hugepage_goal_min = HUGEPAGE_CEILING(size_goal);
-
-	err = exp_grow_size_prepare(&hpa->exp_grow, hugepage_goal_min,
-	    &alloc_size, &skip);
-	if (err) {
-		malloc_mutex_unlock(tsdn, &hpa->grow_mtx);
-		return NULL;
-	}
-	alloc_size = HUGEPAGE_CEILING(alloc_size);
-
-	/*
-	 * Eventually, we need to think about this more systematically, and in
-	 * terms of extent hooks.  For now, though, we know we only care about
-	 * overcommitting systems, and we're not going to purge much.
-	 */
-	bool commit = true;
-	void *addr = pages_map(NULL, alloc_size, HUGEPAGE, &commit);
-	if (addr == NULL) {
-		malloc_mutex_unlock(tsdn, &hpa->grow_mtx);
-		return NULL;
-	}
-	err = pages_huge(addr, alloc_size);
-	/*
-	 * Ignore this for now; even if the allocation fails, the address space
-	 * should still be usable.
+	 * Eat the error; even if the hugeification failed, it's still safe to
+	 * pretend it didn't (and would require extraordinary measures to
+	 * unhugify).
 	 */
 	(void)err;
+}
 
-	edata = edata_cache_get(tsdn, hpa->edata_cache);
-	if (edata == NULL) {
-		malloc_mutex_unlock(tsdn, &hpa->grow_mtx);
-		pages_unmap(addr, alloc_size);
-		return NULL;
+static void
+hpa_dehugify(edata_t *ps) {
+	/* Purge, then dehugify while unbacked. */
+	pages_purge_forced(edata_addr_get(ps), HUGEPAGE);
+	pages_nohuge(edata_addr_get(ps), HUGEPAGE);
+	edata_hugeified_set(ps, false);
+}
+
+static edata_t *
+hpa_grow(tsdn_t *tsdn, hpa_shard_t *shard) {
+	malloc_mutex_assert_owner(tsdn, &shard->grow_mtx);
+	edata_t *ps = NULL;
+
+	/* Is there address space waiting for reuse? */
+	malloc_mutex_assert_owner(tsdn, &shard->grow_mtx);
+	ps = edata_list_inactive_first(&shard->unused_slabs);
+	if (ps != NULL) {
+		edata_list_inactive_remove(&shard->unused_slabs, ps);
+		return ps;
+	}
+
+	/* Is eden a perfect fit? */
+	if (shard->eden != NULL && edata_size_get(shard->eden) == HUGEPAGE) {
+		ps = shard->eden;
+		shard->eden = NULL;
+		return ps;
 	}
 
 	/*
-	 * The serial number here is just a placeholder; the hpa_central gets to
-	 * decide how it wants to fill it in.
-	 *
-	 * The grow edata is associated with the hpa_central_t arena ind; the
-	 * subsequent allocation we get (in the hpa_central_alloc_grow call
-	 * below) will be filled in with the shard ind.
+	 * We're about to try to allocate from eden by splitting.  If eden is
+	 * NULL, we have to allocate it too.  Otherwise, we just have to
+	 * allocate an edata_t for the new psset.
 	 */
-	edata_init(edata, hpa->ind, addr, alloc_size, /* slab */ false,
-	    SC_NSIZES, /* sn */ 0, extent_state_active, /* zeroed */ true,
-	    /* comitted */ true, EXTENT_PAI_HPA, /* is_head */ true);
+	if (shard->eden == NULL) {
+		/*
+		 * During development, we're primarily concerned with systems
+		 * with overcommit.  Eventually, we should be more careful here.
+		 */
+		bool commit = true;
+		/* Allocate address space, bailing if we fail. */
+		void *new_eden = pages_map(NULL, HPA_EDEN_SIZE, HUGEPAGE,
+		    &commit);
+		if (new_eden == NULL) {
+			return NULL;
+		}
+		malloc_mutex_lock(tsdn, &shard->mtx);
+		/* Allocate ps edata, bailing if we fail. */
+		ps = edata_cache_small_get(tsdn, &shard->ecs);
+		if (ps == NULL) {
+			malloc_mutex_unlock(tsdn, &shard->mtx);
+			pages_unmap(new_eden, HPA_EDEN_SIZE);
+			return NULL;
+		}
+		/* Allocate eden edata, bailing if we fail. */
+		shard->eden = edata_cache_small_get(tsdn, &shard->ecs);
+		if (shard->eden == NULL) {
+			edata_cache_small_put(tsdn, &shard->ecs, ps);
+			malloc_mutex_unlock(tsdn, &shard->mtx);
+			pages_unmap(new_eden, HPA_EDEN_SIZE);
+			return NULL;
+		}
+		/* Success. */
+		malloc_mutex_unlock(tsdn, &shard->mtx);
 
-	malloc_mutex_lock(tsdn, &hpa->mtx);
-	/* Note that this replace edata with the allocation to return. */
-	err = hpa_central_alloc_grow(tsdn, &hpa->central, size_goal, edata);
-	malloc_mutex_unlock(tsdn, &hpa->mtx);
-
-	if (!err) {
-		exp_grow_size_commit(&hpa->exp_grow, skip);
+		/*
+		 * Note that the values here don't really make sense (e.g. eden
+		 * is actually zeroed).  But we don't use the slab metadata in
+		 * determining subsequent allocation metadata (e.g. zero
+		 * tracking should be done at the per-page level, not at the
+		 * level of the hugepage).  It's just a convenient data
+		 * structure that contains much of the helpers we need (defined
+		 * lists, a bitmap, an address field, etc.).  Eventually, we'll
+		 * have a "real" representation of a hugepage that's unconnected
+		 * to the edata_ts it will serve allocations into.
+		 */
+		edata_init(shard->eden, shard->ind, new_eden, HPA_EDEN_SIZE,
+		    /* slab */ false, SC_NSIZES, /* sn */ 0, extent_state_dirty,
+		    /* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA,
+		    /* is_head */ true);
+		edata_hugeified_set(shard->eden, false);
+	} else {
+		/* Eden is already nonempty; only need an edata for ps. */
+		malloc_mutex_lock(tsdn, &shard->mtx);
+		ps = edata_cache_small_get(tsdn, &shard->ecs);
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+		if (ps == NULL) {
+			return NULL;
+		}
 	}
-	malloc_mutex_unlock(tsdn, &hpa->grow_mtx);
-	edata_arena_ind_set(edata, shard->ind);
+	/*
+	 * We should have dropped mtx since we're not touching ecs any more, but
+	 * we should continue to hold the grow mutex, since we're about to touch
+	 * eden.
+	 */
+	malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
+	malloc_mutex_assert_owner(tsdn, &shard->grow_mtx);
 
+	assert(shard->eden != NULL);
+	assert(edata_size_get(shard->eden) > HUGEPAGE);
+	assert(edata_size_get(shard->eden) % HUGEPAGE == 0);
+	assert(edata_addr_get(shard->eden)
+	    == HUGEPAGE_ADDR2BASE(edata_addr_get(shard->eden)));
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	ps = edata_cache_small_get(tsdn, &shard->ecs);
+	malloc_mutex_unlock(tsdn, &shard->mtx);
+	if (ps == NULL) {
+		return NULL;
+	}
+	edata_init(ps, edata_arena_ind_get(shard->eden),
+	    edata_addr_get(shard->eden), HUGEPAGE, /* slab */ false,
+	    /* szind */ SC_NSIZES, /* sn */ 0, extent_state_dirty,
+	    /* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA,
+	    /* is_head */ true);
+	edata_hugeified_set(ps, false);
+	edata_addr_set(shard->eden, edata_past_get(ps));
+	edata_size_set(shard->eden,
+	    edata_size_get(shard->eden) - HUGEPAGE);
+
+	return ps;
+}
+
+/*
+ * The psset does not hold empty slabs.  Upon becoming empty, then, we need to
+ * put them somewhere.  We take this as an opportunity to purge, and retain
+ * their address space in a list outside the psset.
+ */
+static void
+hpa_handle_ps_eviction(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *ps) {
+	/*
+	 * We do relatively expensive system calls.  The ps was evicted, so no
+	 * one should touch it while we're also touching it.
+	 */
+	malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
+	malloc_mutex_assert_not_owner(tsdn, &shard->grow_mtx);
+
+	assert(edata_size_get(ps) == HUGEPAGE);
+	assert(HUGEPAGE_ADDR2BASE(edata_addr_get(ps)) == edata_addr_get(ps));
+
+	/*
+	 * We do this unconditionally, even for pages which were not originally
+	 * hugeified; it has the same effect.
+	 */
+	hpa_dehugify(ps);
+
+	malloc_mutex_lock(tsdn, &shard->grow_mtx);
+	edata_list_inactive_prepend(&shard->unused_slabs, ps);
+	malloc_mutex_unlock(tsdn, &shard->grow_mtx);
+}
+
+static edata_t *
+hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom) {
+	bool err;
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	edata_t *edata = edata_cache_small_get(tsdn, &shard->ecs);
+	*oom = false;
+	if (edata == NULL) {
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+		*oom = true;
+		return NULL;
+	}
+	assert(edata_arena_ind_get(edata) == shard->ind);
+
+	err = psset_alloc_reuse(&shard->psset, edata, size);
 	if (err) {
-		pages_unmap(addr, alloc_size);
-		edata_cache_put(tsdn, hpa->edata_cache, edata);
+		edata_cache_small_put(tsdn, &shard->ecs, edata);
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+		return NULL;
+	}
+	/*
+	 * This could theoretically be moved outside of the critical section,
+	 * but that introduces the potential for a race.  Without the lock, the
+	 * (initially nonempty, since this is the reuse pathway) pageslab we
+	 * allocated out of could become otherwise empty while the lock is
+	 * dropped.  This would force us to deal with a pageslab eviction down
+	 * the error pathway, which is a pain.
+	 */
+	err = emap_register_boundary(tsdn, shard->emap, edata,
+	    SC_NSIZES, /* slab */ false);
+	if (err) {
+		edata_t *ps = psset_dalloc(&shard->psset, edata);
+		/*
+		 * The pageslab was nonempty before we started; it
+		 * should still be nonempty now, and so shouldn't get
+		 * evicted.
+		 */
+		assert(ps == NULL);
+		edata_cache_small_put(tsdn, &shard->ecs, edata);
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+		*oom = true;
 		return NULL;
 	}
 
+	edata_t *ps = edata_ps_get(edata);
+	assert(ps != NULL);
+	bool hugify = hpa_should_hugify(shard, ps);
+	if (hugify) {
+		/*
+		 * Do the metadata modification while holding the lock; we'll
+		 * actually change state with the lock dropped.
+		 */
+		psset_hugify(&shard->psset, ps);
+	}
+	malloc_mutex_unlock(tsdn, &shard->mtx);
+	if (hugify) {
+		/*
+		 * Hugifying with the lock dropped is safe, even with
+		 * concurrent modifications to the ps.  This relies on
+		 * the fact that the current implementation will never
+		 * dehugify a non-empty pageslab, and ps will never
+		 * become empty before we return edata to the user to be
+		 * freed.
+		 *
+		 * Note that holding the lock would prevent not just operations
+		 * on this page slab, but also operations any other alloc/dalloc
+		 * operations in this hpa shard.
+		 */
+		hpa_hugify(ps);
+	}
 	return edata;
 }
 
 static edata_t *
 hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
-	assert(size <= shard->ps_alloc_max);
-
+	assert(size <= shard->alloc_max);
 	bool err;
-	malloc_mutex_lock(tsdn, &shard->mtx);
-	edata_t *edata = edata_cache_small_get(tsdn, &shard->ecs);
-	if (edata == NULL) {
-		malloc_mutex_unlock(tsdn, &shard->mtx);
-		return NULL;
-	}
-	edata_arena_ind_set(edata, shard->ind);
+	bool oom;
+	edata_t *edata;
 
-	err = psset_alloc_reuse(&shard->psset, edata, size);
-	malloc_mutex_unlock(tsdn, &shard->mtx);
-	if (!err) {
+	edata = hpa_try_alloc_no_grow(tsdn, shard, size, &oom);
+	if (edata != NULL) {
 		return edata;
 	}
+
 	/* Nothing in the psset works; we have to grow it. */
 	malloc_mutex_lock(tsdn, &shard->grow_mtx);
-
-	/* As above; check for grow races. */
-	malloc_mutex_lock(tsdn, &shard->mtx);
-	err = psset_alloc_reuse(&shard->psset, edata, size);
-	malloc_mutex_unlock(tsdn, &shard->mtx);
-	if (!err) {
+	/*
+	 * Check for grow races; maybe some earlier thread expanded the psset
+	 * in between when we dropped the main mutex and grabbed the grow mutex.
+	 */
+	edata = hpa_try_alloc_no_grow(tsdn, shard, size, &oom);
+	if (edata != NULL || oom) {
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 		return edata;
 	}
 
-	edata_t *grow_edata = hpa_alloc_central(tsdn, shard, size,
-	    shard->ps_goal);
+	/*
+	 * Note that we don't hold shard->mtx here (while growing);
+	 * deallocations (and allocations of smaller sizes) may still succeed
+	 * while we're doing this potentially expensive system call.
+	 */
+	edata_t *grow_edata = hpa_grow(tsdn, shard);
 	if (grow_edata == NULL) {
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
-
-		malloc_mutex_lock(tsdn, &shard->mtx);
-		edata_cache_small_put(tsdn, &shard->ecs, edata);
-		malloc_mutex_unlock(tsdn, &shard->mtx);
-
 		return NULL;
 	}
-	edata_arena_ind_set(grow_edata, shard->ind);
+	assert(edata_arena_ind_get(grow_edata) == shard->ind);
+
 	edata_slab_set(grow_edata, true);
 	fb_group_t *fb = edata_slab_data_get(grow_edata)->bitmap;
-	fb_init(fb, shard->ps_goal / PAGE);
+	fb_init(fb, HUGEPAGE / PAGE);
 
 	/* We got the new edata; allocate from it. */
 	malloc_mutex_lock(tsdn, &shard->mtx);
+	edata = edata_cache_small_get(tsdn, &shard->ecs);
+	if (edata == NULL) {
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
+		return NULL;
+	}
 	psset_alloc_new(&shard->psset, grow_edata, edata, size);
+	err = emap_register_boundary(tsdn, shard->emap, edata,
+	    SC_NSIZES, /* slab */ false);
+	if (err) {
+		edata_t *ps = psset_dalloc(&shard->psset, edata);
+		/*
+		 * The pageslab was empty except for the new allocation; it
+		 * should get evicted.
+		 */
+		assert(ps == grow_edata);
+		edata_cache_small_put(tsdn, &shard->ecs, edata);
+		/*
+		 * Technically the same as fallthrough at the time of this
+		 * writing, but consistent with the error handling in the rest
+		 * of the function.
+		 */
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
+		hpa_handle_ps_eviction(tsdn, shard, ps);
+		return NULL;
+	}
 	malloc_mutex_unlock(tsdn, &shard->mtx);
-
 	malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 	return edata;
 }
@@ -283,33 +436,25 @@ static edata_t *
 hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t alignment, bool zero) {
 	assert((size & PAGE_MASK) == 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
 	hpa_shard_t *shard = hpa_from_pai(self);
 	/* We don't handle alignment or zeroing for now. */
 	if (alignment > PAGE || zero) {
 		return NULL;
 	}
-	if (size > shard->small_max && size < shard->large_min) {
+	if (size > shard->alloc_max) {
 		return NULL;
 	}
 
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
-	edata_t *edata;
-	if (size <= shard->ps_alloc_max) {
-		edata = hpa_alloc_psset(tsdn, shard, size);
-		if (edata != NULL) {
-			emap_register_boundary(tsdn, shard->hpa->central.emap,
-			    edata, SC_NSIZES, /* slab */ false);
-		}
-	} else {
-		edata = hpa_alloc_central(tsdn, shard, size, size);
-	}
+	edata_t *edata = hpa_alloc_psset(tsdn, shard, size);
 
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
+
 	if (edata != NULL) {
-		emap_assert_mapped(tsdn, shard->hpa->central.emap, edata);
+		emap_assert_mapped(tsdn, shard->emap, edata);
 		assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
 		assert(edata_state_get(edata) == extent_state_active);
 		assert(edata_arena_ind_get(edata) == shard->ind);
@@ -336,16 +481,6 @@ hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 	return true;
 }
 
-static void
-hpa_dalloc_central(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) {
-	hpa_t *hpa = shard->hpa;
-
-	edata_arena_ind_set(edata, hpa->ind);
-	malloc_mutex_lock(tsdn, &hpa->mtx);
-	hpa_central_dalloc(tsdn, &hpa->central, edata);
-	malloc_mutex_unlock(tsdn, &hpa->mtx);
-}
-
 static void
 hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	hpa_shard_t *shard = hpa_from_pai(self);
@@ -361,56 +496,29 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	assert(edata_committed_get(edata));
 	assert(edata_base_get(edata) != NULL);
 
+	edata_t *ps = edata_ps_get(edata);
+	/* Currently, all edatas come from pageslabs. */
+	assert(ps != NULL);
+	emap_deregister_boundary(tsdn, shard->emap, edata);
+	malloc_mutex_lock(tsdn, &shard->mtx);
 	/*
-	 * There are two cases:
-	 * - The psset field is NULL.  In this case, the edata comes directly
-	 *   from the hpa_central_t and should be returned to it.
-	 * - THe psset field is not NULL, in which case we return the edata to
-	 *   the appropriate slab (which may in turn cause it to become empty,
-	 *   triggering an eviction of the whole slab, which should then be
-	 *   returned to the hpa_central_t).
+	 * Note that the shard mutex protects the edata hugeified field, too.
+	 * Page slabs can move between pssets (and have their hugeified status
+	 * change) in racy ways.
 	 */
-	if (edata_ps_get(edata) != NULL) {
-		emap_deregister_boundary(tsdn, shard->hpa->central.emap, edata);
-
-		malloc_mutex_lock(tsdn, &shard->mtx);
-		edata_t *evicted_ps = psset_dalloc(&shard->psset, edata);
-		edata_cache_small_put(tsdn, &shard->ecs, edata);
-		malloc_mutex_unlock(tsdn, &shard->mtx);
-
-
-		if (evicted_ps != NULL) {
-			/*
-			 * The deallocation caused a pageslab to become empty.
-			 * Free it back to the centralized allocator.
-			 */
-			bool err = emap_register_boundary(tsdn,
-			    shard->hpa->central.emap, evicted_ps, SC_NSIZES,
-			    /* slab */ false);
-			/*
-			 * Registration can only fail on OOM, but the boundary
-			 * mappings should have been initialized during
-			 * allocation.
-			 */
-			assert(!err);
-			edata_slab_set(evicted_ps, false);
-			edata_ps_set(evicted_ps, NULL);
-
-			assert(edata_arena_ind_get(evicted_ps) == shard->ind);
-			hpa_dalloc_central(tsdn, shard, evicted_ps);
-		}
-	} else {
-		hpa_dalloc_central(tsdn, shard, edata);
+	edata_t *evicted_ps = psset_dalloc(&shard->psset, edata);
+	/*
+	 * If a pageslab became empty because of the dalloc, it better have been
+	 * the one we expected.
+	 */
+	assert(evicted_ps == NULL || evicted_ps == ps);
+	edata_cache_small_put(tsdn, &shard->ecs, edata);
+	malloc_mutex_unlock(tsdn, &shard->mtx);
+	if (evicted_ps != NULL) {
+		hpa_handle_ps_eviction(tsdn, shard, evicted_ps);
 	}
 }
 
-static void
-hpa_shard_assert_stats_empty(psset_bin_stats_t *bin_stats) {
-	assert(bin_stats->npageslabs == 0);
-	assert(bin_stats->nactive == 0);
-	assert(bin_stats->ninactive == 0);
-}
-
 void
 hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_lock(tsdn, &shard->mtx);
@@ -418,6 +526,29 @@ hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 }
 
+static void
+hpa_shard_assert_stats_empty(psset_bin_stats_t *bin_stats) {
+	assert(bin_stats->npageslabs_huge == 0);
+	assert(bin_stats->nactive_huge == 0);
+	assert(bin_stats->ninactive_huge == 0);
+	assert(bin_stats->npageslabs_nonhuge == 0);
+	assert(bin_stats->nactive_nonhuge == 0);
+	assert(bin_stats->ninactive_nonhuge == 0);
+}
+
+static void
+hpa_assert_empty(tsdn_t *tsdn, hpa_shard_t *shard, psset_t *psset) {
+	edata_t edata = {0};
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	bool psset_empty = psset_alloc_reuse(psset, &edata, PAGE);
+	assert(psset_empty);
+	hpa_shard_assert_stats_empty(&psset->stats.full_slabs);
+	for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
+		hpa_shard_assert_stats_empty(
+		    &psset->stats.nonfull_slabs[i]);
+	}
+}
+
 void
 hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
 	/*
@@ -427,17 +558,15 @@ hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
 	 * 1-page allocation.
 	 */
 	if (config_debug) {
-		edata_t edata = {0};
 		malloc_mutex_lock(tsdn, &shard->mtx);
-		bool psset_empty = psset_alloc_reuse(&shard->psset, &edata,
-		    PAGE);
+		hpa_assert_empty(tsdn, shard, &shard->psset);
 		malloc_mutex_unlock(tsdn, &shard->mtx);
-		assert(psset_empty);
-		hpa_shard_assert_stats_empty(&shard->psset.stats.full_slabs);
-		for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
-			hpa_shard_assert_stats_empty(
-			    &shard->psset.stats.nonfull_slabs[i]);
-		}
+	}
+	edata_t *ps;
+	while ((ps = edata_list_inactive_first(&shard->unused_slabs)) != NULL) {
+		assert(edata_size_get(ps) == HUGEPAGE);
+		edata_list_inactive_remove(&shard->unused_slabs, ps);
+		pages_unmap(edata_base_get(ps), HUGEPAGE);
 	}
 }
 
@@ -462,21 +591,3 @@ hpa_shard_postfork_child(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_postfork_child(tsdn, &shard->grow_mtx);
 	malloc_mutex_postfork_child(tsdn, &shard->mtx);
 }
-
-void
-hpa_prefork4(tsdn_t *tsdn, hpa_t *hpa) {
-	malloc_mutex_prefork(tsdn, &hpa->grow_mtx);
-	malloc_mutex_prefork(tsdn, &hpa->mtx);
-}
-
-void
-hpa_postfork_parent(tsdn_t *tsdn, hpa_t *hpa) {
-	malloc_mutex_postfork_parent(tsdn, &hpa->grow_mtx);
-	malloc_mutex_postfork_parent(tsdn, &hpa->mtx);
-}
-
-void
-hpa_postfork_child(tsdn_t *tsdn, hpa_t *hpa) {
-	malloc_mutex_postfork_child(tsdn, &hpa->grow_mtx);
-	malloc_mutex_postfork_child(tsdn, &hpa->mtx);
-}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 74240c0a..277b9e72 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1802,31 +1802,19 @@ malloc_init_hard_a0_locked() {
 	}
 	a0 = arena_get(TSDN_NULL, 0, false);
 
-	if (opt_hpa && LG_SIZEOF_PTR == 2) {
+	if (opt_hpa && !hpa_supported()) {
+		malloc_printf("<jemalloc>: HPA not supported in the current "
+		    "configuration; %s.",
+		    opt_abort_conf ? "aborting" : "disabling");
 		if (opt_abort_conf) {
-			malloc_printf("<jemalloc>: Hugepages not currently "
-			    "supported on 32-bit architectures; aborting.");
+			malloc_abort_invalid_conf();
 		} else {
-			malloc_printf("<jemalloc>: Hugepages not currently "
-			    "supported on 32-bit architectures; disabling.");
 			opt_hpa = false;
 		}
 	} else if (opt_hpa) {
-		/*
-		 * The global HPA uses the edata cache from a0, and so needs to
-		 * be initialized specially, after a0 is.  The arena init code
-		 * handles this case specially, and does not turn on the HPA for
-		 * a0 when opt_hpa is true.  This lets us do global HPA
-		 * initialization against a valid a0.
-		 */
-		if (hpa_init(&arena_hpa_global, b0get(), &arena_emap_global,
-		    &a0->pa_shard.edata_cache)) {
-			return true;
-		}
-		if (pa_shard_enable_hpa(&a0->pa_shard, &arena_hpa_global,
-		    opt_hpa_slab_goal, opt_hpa_slab_max_alloc,
-		    opt_hpa_small_max, opt_hpa_large_min, opt_hpa_sec_nshards,
-		    opt_hpa_sec_max_alloc, opt_hpa_sec_max_bytes)) {
+		if (pa_shard_enable_hpa(&a0->pa_shard, opt_hpa_slab_max_alloc,
+		    opt_hpa_sec_nshards, opt_hpa_sec_max_alloc,
+		    opt_hpa_sec_max_bytes)) {
 			return true;
 		}
 	}
@@ -4346,9 +4334,6 @@ _malloc_prefork(void)
 				}
 			}
 		}
-		if (i == 4 && opt_hpa) {
-			hpa_prefork4(tsd_tsdn(tsd), &arena_hpa_global);
-		}
 
 	}
 	prof_prefork1(tsd_tsdn(tsd));
@@ -4388,9 +4373,6 @@ _malloc_postfork(void)
 			arena_postfork_parent(tsd_tsdn(tsd), arena);
 		}
 	}
-	if (opt_hpa) {
-		hpa_postfork_parent(tsd_tsdn(tsd), &arena_hpa_global);
-	}
 	prof_postfork_parent(tsd_tsdn(tsd));
 	if (have_background_thread) {
 		background_thread_postfork_parent(tsd_tsdn(tsd));
@@ -4421,9 +4403,6 @@ jemalloc_postfork_child(void) {
 			arena_postfork_child(tsd_tsdn(tsd), arena);
 		}
 	}
-	if (opt_hpa) {
-		hpa_postfork_child(tsd_tsdn(tsd), &arena_hpa_global);
-	}
 	prof_postfork_child(tsd_tsdn(tsd));
 	if (have_background_thread) {
 		background_thread_postfork_child(tsd_tsdn(tsd));
diff --git a/src/pa.c b/src/pa.c
index e5fcbb7b..bc52ff43 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -49,17 +49,10 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 }
 
 bool
-pa_shard_enable_hpa(pa_shard_t *shard, hpa_t *hpa, size_t ps_goal,
-    size_t ps_alloc_max, size_t small_max, size_t large_min,
-    size_t sec_nshards, size_t sec_alloc_max, size_t sec_bytes_max) {
-	ps_goal &= ~PAGE_MASK;
-	ps_alloc_max &= ~PAGE_MASK;
-
-	if (ps_alloc_max > ps_goal) {
-		ps_alloc_max = ps_goal;
-	}
-	if (hpa_shard_init(&shard->hpa_shard, hpa, &shard->edata_cache,
-	    shard->ind, ps_goal, ps_alloc_max, small_max, large_min)) {
+pa_shard_enable_hpa(pa_shard_t *shard, size_t alloc_max, size_t sec_nshards,
+    size_t sec_alloc_max, size_t sec_bytes_max) {
+	if (hpa_shard_init(&shard->hpa_shard, shard->emap, &shard->edata_cache,
+	    shard->ind, alloc_max)) {
 		return true;
 	}
 	if (sec_init(&shard->hpa_sec, &shard->hpa_shard.pai, sec_nshards,
diff --git a/src/pa_extra.c b/src/pa_extra.c
index 2002418a..0f488be6 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -150,7 +150,7 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
 	}
 
 	if (shard->ever_used_hpa) {
-		hpa_stats_merge(tsdn, &shard->hpa_shard, hpa_stats_out);
+		hpa_shard_stats_merge(tsdn, &shard->hpa_shard, hpa_stats_out);
 		sec_stats_merge(tsdn, &shard->hpa_sec, sec_stats_out);
 	}
 }
diff --git a/src/psset.c b/src/psset.c
index c24266ce..2ee683b6 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -20,9 +20,13 @@ psset_init(psset_t *psset) {
 
 static void
 psset_bin_stats_accum(psset_bin_stats_t *dst, psset_bin_stats_t *src) {
-	dst->npageslabs += src->npageslabs;
-	dst->nactive += src->nactive;
-	dst->ninactive += src->ninactive;
+	dst->npageslabs_huge += src->npageslabs_huge;
+	dst->nactive_huge += src->nactive_huge;
+	dst->ninactive_huge += src->ninactive_huge;
+
+	dst->npageslabs_nonhuge += src->npageslabs_nonhuge;
+	dst->nactive_nonhuge += src->nactive_nonhuge;
+	dst->ninactive_nonhuge += src->ninactive_nonhuge;
 }
 
 void
@@ -45,29 +49,62 @@ psset_stats_accum(psset_stats_t *dst, psset_stats_t *src) {
  * ensure we don't miss any heap modification operations.
  */
 JEMALLOC_ALWAYS_INLINE void
-psset_bin_stats_adjust(psset_bin_stats_t *binstats, edata_t *ps, bool inc) {
-	size_t mul = inc ? (size_t)1 : (size_t)-1;
+psset_bin_stats_insert_remove(psset_bin_stats_t *binstats, edata_t *ps,
+    bool insert) {
+	size_t *npageslabs_dst = edata_hugeified_get(ps)
+	    ? &binstats->npageslabs_huge : &binstats->npageslabs_nonhuge;
+	size_t *nactive_dst = edata_hugeified_get(ps)
+	    ? &binstats->nactive_huge : &binstats->nactive_nonhuge;
+	size_t *ninactive_dst = edata_hugeified_get(ps)
+	    ? &binstats->ninactive_huge : &binstats->ninactive_nonhuge;
 
 	size_t npages = edata_size_get(ps) >> LG_PAGE;
 	size_t ninactive = edata_nfree_get(ps);
 	size_t nactive = npages - ninactive;
-	binstats->npageslabs += mul * 1;
-	binstats->nactive += mul * nactive;
-	binstats->ninactive += mul * ninactive;
+
+	size_t mul = insert ? (size_t)1 : (size_t)-1;
+	*npageslabs_dst += mul * 1;
+	*nactive_dst += mul * nactive;
+	*ninactive_dst += mul * ninactive;
+}
+
+static void
+psset_bin_stats_insert(psset_bin_stats_t *binstats, edata_t *ps) {
+	psset_bin_stats_insert_remove(binstats, ps, /* insert */ true);
+}
+
+static void
+psset_bin_stats_remove(psset_bin_stats_t *binstats, edata_t *ps) {
+	psset_bin_stats_insert_remove(binstats, ps, /* insert */ false);
+}
+
+/*
+ * We don't currently need an "activate" equivalent to this, since down the
+ * allocation pathways we don't do the optimization in which we change a slab
+ * without first removing it from a bin.
+ */
+static void
+psset_bin_stats_deactivate(psset_bin_stats_t *binstats, bool huge, size_t num) {
+	size_t *nactive_dst = huge
+	    ? &binstats->nactive_huge : &binstats->nactive_nonhuge;
+	size_t *ninactive_dst = huge
+	    ? &binstats->ninactive_huge : &binstats->ninactive_nonhuge;
+
+	assert(*nactive_dst >= num);
+	*nactive_dst -= num;
+	*ninactive_dst += num;
 }
 
 static void
 psset_edata_heap_remove(psset_t *psset, pszind_t pind, edata_t *ps) {
 	edata_age_heap_remove(&psset->pageslabs[pind], ps);
-	psset_bin_stats_adjust(&psset->stats.nonfull_slabs[pind], ps,
-	    /* inc */ false);
+	psset_bin_stats_remove(&psset->stats.nonfull_slabs[pind], ps);
 }
 
 static void
 psset_edata_heap_insert(psset_t *psset, pszind_t pind, edata_t *ps) {
 	edata_age_heap_insert(&psset->pageslabs[pind], ps);
-	psset_bin_stats_adjust(&psset->stats.nonfull_slabs[pind], ps,
-	    /* inc */ true);
+	psset_bin_stats_insert(&psset->stats.nonfull_slabs[pind], ps);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -86,8 +123,7 @@ psset_insert(psset_t *psset, edata_t *ps) {
 		 * We don't ned to track full slabs; just pretend to for stats
 		 * purposes.  See the comment at psset_bin_stats_adjust.
 		 */
-		psset_bin_stats_adjust(&psset->stats.full_slabs, ps,
-		    /* inc */ true);
+		psset_bin_stats_insert(&psset->stats.full_slabs, ps);
 		return;
 	}
 
@@ -107,8 +143,7 @@ psset_remove(psset_t *psset, edata_t *ps) {
 	size_t longest_free_range = edata_longest_free_range_get(ps);
 
 	if (longest_free_range == 0) {
-		psset_bin_stats_adjust(&psset->stats.full_slabs, ps,
-		    /* inc */ true);
+		psset_bin_stats_remove(&psset->stats.full_slabs, ps);
 		return;
 	}
 
@@ -121,6 +156,26 @@ psset_remove(psset_t *psset, edata_t *ps) {
 	}
 }
 
+void
+psset_hugify(psset_t *psset, edata_t *ps) {
+	assert(!edata_hugeified_get(ps));
+	psset_assert_ps_consistent(ps);
+
+	size_t longest_free_range = edata_longest_free_range_get(ps);
+	psset_bin_stats_t *bin_stats;
+	if (longest_free_range == 0) {
+		bin_stats = &psset->stats.full_slabs;
+	} else {
+		pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
+		    longest_free_range << LG_PAGE));
+		assert(pind < PSSET_NPSIZES);
+		bin_stats = &psset->stats.nonfull_slabs[pind];
+	}
+	psset_bin_stats_remove(bin_stats, ps);
+	edata_hugeified_set(ps, true);
+	psset_bin_stats_insert(bin_stats, ps);
+}
+
 /*
  * Similar to PAC's extent_recycle_extract.  Out of all the pageslabs in the
  * set, picks one that can satisfy the allocation and remove it from the set.
@@ -225,8 +280,7 @@ psset_ps_alloc_insert(psset_t *psset, edata_t *ps, edata_t *r_edata,
 	}
 	edata_longest_free_range_set(ps, (uint32_t)largest_unchosen_range);
 	if (largest_unchosen_range == 0) {
-		psset_bin_stats_adjust(&psset->stats.full_slabs, ps,
-		    /* inc */ true);
+		psset_bin_stats_insert(&psset->stats.full_slabs, ps);
 	} else {
 		psset_insert(psset, ps);
 	}
@@ -258,8 +312,8 @@ edata_t *
 psset_dalloc(psset_t *psset, edata_t *edata) {
 	assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
 	assert(edata_ps_get(edata) != NULL);
-
 	edata_t *ps = edata_ps_get(edata);
+
 	fb_group_t *ps_fb = edata_slab_data_get(ps)->bitmap;
 	size_t ps_old_longest_free_range = edata_longest_free_range_get(ps);
 	pszind_t old_pind = SC_NPSIZES;
@@ -274,22 +328,12 @@ psset_dalloc(psset_t *psset, edata_t *edata) {
 	    >> LG_PAGE;
 	size_t len = edata_size_get(edata) >> LG_PAGE;
 	fb_unset_range(ps_fb, ps_npages, begin, len);
-	if (ps_old_longest_free_range == 0) {
-		/* We were in the (imaginary) full bin; update stats for it. */
-		psset_bin_stats_adjust(&psset->stats.full_slabs, ps,
-		    /* inc */ false);
-	} else {
-		/*
-		 * The edata is still in the bin, need to update its
-		 * contribution.
-		 */
-		psset->stats.nonfull_slabs[old_pind].nactive -= len;
-		psset->stats.nonfull_slabs[old_pind].ninactive += len;
-	}
-	/*
-	 * Note that we want to do this after the stats updates, since if it was
-	 * full it psset_bin_stats_adjust would have looked at the old version.
-	 */
+
+	/* The pageslab is still in the bin; adjust its stats first. */
+	psset_bin_stats_t *bin_stats = (ps_old_longest_free_range == 0
+	    ? &psset->stats.full_slabs : &psset->stats.nonfull_slabs[old_pind]);
+	psset_bin_stats_deactivate(bin_stats, edata_hugeified_get(ps), len);
+
 	edata_nfree_set(ps, (uint32_t)(edata_nfree_get(ps) + len));
 
 	/* We might have just created a new, larger range. */
@@ -327,6 +371,12 @@ psset_dalloc(psset_t *psset, edata_t *edata) {
 			bitmap_set(psset->bitmap, &psset_bitmap_info,
 			    (size_t)old_pind);
 		}
+	} else {
+		/*
+		 * Otherwise, the bin was full, and we need to adjust the full
+		 * bin stats.
+		 */
+		psset_bin_stats_remove(&psset->stats.full_slabs, ps);
 	}
 	/* If the pageslab is empty, it gets evicted from the set. */
 	if (new_range_len == ps_npages) {
diff --git a/src/stats.c b/src/stats.c
index 4b40721a..abe3ab16 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -667,16 +667,27 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i) {
 	emitter_row_t row;
 	emitter_row_init(&row);
 
-	size_t npageslabs;
-	size_t nactive;
-	size_t ninactive;
+	size_t npageslabs_huge;
+	size_t nactive_huge;
+	size_t ninactive_huge;
 
-	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs",
-	    i, &npageslabs, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive",
-	    i, &nactive, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ninactive",
-	    i, &ninactive, size_t);
+	size_t npageslabs_nonhuge;
+	size_t nactive_nonhuge;
+	size_t ninactive_nonhuge;
+
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs_huge",
+	    i, &npageslabs_huge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive_huge",
+	    i, &nactive_huge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ninactive_huge",
+	    i, &ninactive_huge, size_t);
+
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs_nonhuge",
+	    i, &npageslabs_nonhuge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive_nonhuge",
+	    i, &nactive_nonhuge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ninactive_nonhuge",
+	    i, &ninactive_nonhuge, size_t);
 
 	size_t sec_bytes;
 	CTL_M2_GET("stats.arenas.0.hpa_sec_bytes", i, &sec_bytes, size_t);
@@ -686,39 +697,62 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i) {
 	emitter_table_printf(emitter,
 	    "HPA shard stats:\n"
 	    "  In full slabs:\n"
-	    "      npageslabs: %zu\n"
-	    "      nactive: %zu\n"
-	    "      ninactive: %zu\n",
-	    npageslabs, nactive, ninactive);
+	    "      npageslabs: %zu huge, %zu nonhuge\n"
+	    "      nactive: %zu huge, %zu nonhuge \n"
+	    "      ninactive: %zu huge, %zu nonhuge \n",
+	    npageslabs_huge, npageslabs_nonhuge, nactive_huge, nactive_nonhuge,
+	    ninactive_huge, ninactive_nonhuge);
 	emitter_json_object_kv_begin(emitter, "hpa_shard");
 	emitter_json_object_kv_begin(emitter, "full_slabs");
-	emitter_json_kv(emitter, "npageslabs", emitter_type_size, &npageslabs);
-	emitter_json_kv(emitter, "nactive", emitter_type_size, &nactive);
-	emitter_json_kv(emitter, "ninactive", emitter_type_size, &ninactive);
+	emitter_json_kv(emitter, "npageslabs_huge", emitter_type_size,
+	    &npageslabs_huge);
+	emitter_json_kv(emitter, "npageslabs_nonhuge", emitter_type_size,
+	    &npageslabs_nonhuge);
+	emitter_json_kv(emitter, "nactive_huge", emitter_type_size,
+	    &nactive_huge);
+	emitter_json_kv(emitter, "nactive_nonhuge", emitter_type_size,
+	    &nactive_nonhuge);
+	emitter_json_kv(emitter, "ninactive_huge", emitter_type_size,
+	    &ninactive_huge);
+	emitter_json_kv(emitter, "ninactive_nonhuge", emitter_type_size,
+	    &ninactive_nonhuge);
 	emitter_json_object_end(emitter); /* End "full_slabs" */
 
 	COL_HDR(row, size, NULL, right, 20, size)
 	COL_HDR(row, ind, NULL, right, 4, unsigned)
-	COL_HDR(row, npageslabs, NULL, right, 13, size)
-	COL_HDR(row, nactive, NULL, right, 13, size)
-	COL_HDR(row, ninactive, NULL, right, 13, size)
+	COL_HDR(row, npageslabs_huge, NULL, right, 16, size)
+	COL_HDR(row, nactive_huge, NULL, right, 16, size)
+	COL_HDR(row, ninactive_huge, NULL, right, 16, size)
+	COL_HDR(row, npageslabs_nonhuge, NULL, right, 20, size)
+	COL_HDR(row, nactive_nonhuge, NULL, right, 20, size)
+	COL_HDR(row, ninactive_nonhuge, NULL, right, 20, size)
 
 	emitter_table_row(emitter, &header_row);
 	emitter_json_array_kv_begin(emitter, "nonfull_slabs");
 	bool in_gap = false;
 	for (pszind_t j = 0; j < PSSET_NPSIZES; j++) {
 		CTL_M2_M5_GET(
-		    "stats.arenas.0.hpa_shard.nonfull_slabs.0.npageslabs",
-		    i, j, &npageslabs, size_t);
+		    "stats.arenas.0.hpa_shard.nonfull_slabs.0.npageslabs_huge",
+		    i, j, &npageslabs_huge, size_t);
 		CTL_M2_M5_GET(
-		    "stats.arenas.0.hpa_shard.nonfull_slabs.0.nactive",
-		    i, j, &nactive, size_t);
+		    "stats.arenas.0.hpa_shard.nonfull_slabs.0.nactive_huge",
+		    i, j, &nactive_huge, size_t);
 		CTL_M2_M5_GET(
-		    "stats.arenas.0.hpa_shard.nonfull_slabs.0.ninactive",
-		    i, j, &ninactive, size_t);
+		    "stats.arenas.0.hpa_shard.nonfull_slabs.0.ninactive_huge",
+		    i, j, &ninactive_huge, size_t);
+
+		CTL_M2_M5_GET(
+		    "stats.arenas.0.hpa_shard.nonfull_slabs.0.npageslabs_nonhuge",
+		    i, j, &npageslabs_nonhuge, size_t);
+		CTL_M2_M5_GET(
+		    "stats.arenas.0.hpa_shard.nonfull_slabs.0.nactive_nonhuge",
+		    i, j, &nactive_nonhuge, size_t);
+		CTL_M2_M5_GET(
+		    "stats.arenas.0.hpa_shard.nonfull_slabs.0.ninactive_nonhuge",
+		    i, j, &ninactive_nonhuge, size_t);
 
 		bool in_gap_prev = in_gap;
-		in_gap = (npageslabs == 0);
+		in_gap = (npageslabs_huge == 0 && npageslabs_nonhuge == 0);
 		if (in_gap_prev && !in_gap) {
 			emitter_table_printf(emitter,
 			    "                     ---\n");
@@ -726,20 +760,29 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i) {
 
 		col_size.size_val = sz_pind2sz(j);
 		col_ind.size_val = j;
-		col_npageslabs.size_val = npageslabs;
-		col_nactive.size_val = nactive;
-		col_ninactive.size_val = ninactive;
+		col_npageslabs_huge.size_val = npageslabs_huge;
+		col_nactive_huge.size_val = nactive_huge;
+		col_ninactive_huge.size_val = ninactive_huge;
+		col_npageslabs_nonhuge.size_val = npageslabs_nonhuge;
+		col_nactive_nonhuge.size_val = nactive_nonhuge;
+		col_ninactive_nonhuge.size_val = ninactive_nonhuge;
 		if (!in_gap) {
 			emitter_table_row(emitter, &row);
 		}
 
 		emitter_json_object_begin(emitter);
-		emitter_json_kv(emitter, "npageslabs", emitter_type_size,
-		    &npageslabs);
-		emitter_json_kv(emitter, "nactive", emitter_type_size,
-		    &nactive);
-		emitter_json_kv(emitter, "ninactive", emitter_type_size,
-		    &ninactive);
+		emitter_json_kv(emitter, "npageslabs_huge", emitter_type_size,
+		    &npageslabs_huge);
+		emitter_json_kv(emitter, "nactive_huge", emitter_type_size,
+		    &nactive_huge);
+		emitter_json_kv(emitter, "ninactive_huge", emitter_type_size,
+		    &ninactive_huge);
+		emitter_json_kv(emitter, "npageslabs_nonhuge", emitter_type_size,
+		    &npageslabs_nonhuge);
+		emitter_json_kv(emitter, "nactive_nonhuge", emitter_type_size,
+		    &nactive_nonhuge);
+		emitter_json_kv(emitter, "ninactive_nonhuge", emitter_type_size,
+		    &ninactive_huge);
 		emitter_json_object_end(emitter);
 	}
 	emitter_json_array_end(emitter); /* End "nonfull_slabs" */
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index b58dcede..72a20c32 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -2,14 +2,9 @@
 
 #include "jemalloc/internal/hpa.h"
 
-#define HPA_IND 111
-#define SHARD_IND 222
+#define SHARD_IND 111
 
-#define PS_GOAL (128 * PAGE)
-#define PS_ALLOC_MAX (64 * PAGE)
-
-#define HPA_SMALL_MAX (200 * PAGE)
-#define HPA_LARGE_MIN (300 * PAGE)
+#define ALLOC_MAX (HUGEPAGE / 4)
 
 typedef struct test_data_s test_data_t;
 struct test_data_s {
@@ -18,50 +13,32 @@ struct test_data_s {
 	 * test_data_t and the hpa_shard_t;
 	 */
 	hpa_shard_t shard;
-	base_t *shard_base;
+	base_t *base;
 	edata_cache_t shard_edata_cache;
 
-	hpa_t hpa;
-	base_t *hpa_base;
-	edata_cache_t hpa_edata_cache;
-
 	emap_t emap;
 };
 
 static hpa_shard_t *
 create_test_data() {
 	bool err;
-	base_t *shard_base = base_new(TSDN_NULL, /* ind */ SHARD_IND,
+	base_t *base = base_new(TSDN_NULL, /* ind */ SHARD_IND,
 	    &ehooks_default_extent_hooks);
-	assert_ptr_not_null(shard_base, "");
-
-	base_t *hpa_base = base_new(TSDN_NULL, /* ind */ HPA_IND,
-	    &ehooks_default_extent_hooks);
-	assert_ptr_not_null(hpa_base, "");
+	assert_ptr_not_null(base, "");
 
 	test_data_t *test_data = malloc(sizeof(test_data_t));
 	assert_ptr_not_null(test_data, "");
 
-	test_data->shard_base = shard_base;
-	test_data->hpa_base = hpa_base;
+	test_data->base = base;
 
-	err = edata_cache_init(&test_data->shard_edata_cache, shard_base);
+	err = edata_cache_init(&test_data->shard_edata_cache, base);
 	assert_false(err, "");
 
-	err = edata_cache_init(&test_data->hpa_edata_cache, hpa_base);
+	err = emap_init(&test_data->emap, test_data->base, /* zeroed */ false);
 	assert_false(err, "");
 
-	err = emap_init(&test_data->emap, test_data->hpa_base,
-	    /* zeroed */ false);
-	assert_false(err, "");
-
-	err = hpa_init(&test_data->hpa, hpa_base, &test_data->emap,
-	    &test_data->hpa_edata_cache);
-	assert_false(err, "");
-
-	err = hpa_shard_init(&test_data->shard, &test_data->hpa,
-	    &test_data->shard_edata_cache, SHARD_IND, PS_GOAL, PS_ALLOC_MAX,
-	    HPA_SMALL_MAX, HPA_LARGE_MIN);
+	err = hpa_shard_init(&test_data->shard, &test_data->emap,
+	    &test_data->shard_edata_cache, SHARD_IND, ALLOC_MAX);
 	assert_false(err, "");
 
 	return (hpa_shard_t *)test_data;
@@ -70,12 +47,11 @@ create_test_data() {
 static void
 destroy_test_data(hpa_shard_t *shard) {
 	test_data_t *test_data = (test_data_t *)shard;
-	base_delete(TSDN_NULL, test_data->shard_base);
-	base_delete(TSDN_NULL, test_data->hpa_base);
+	base_delete(TSDN_NULL, test_data->base);
 	free(test_data);
 }
 
-TEST_BEGIN(test_small_max_large_min) {
+TEST_BEGIN(test_alloc_max) {
 	test_skip_if(LG_SIZEOF_PTR != 3);
 
 	hpa_shard_t *shard = create_test_data();
@@ -84,18 +60,11 @@ TEST_BEGIN(test_small_max_large_min) {
 	edata_t *edata;
 
 	/* Small max */
-	edata = pai_alloc(tsdn, &shard->pai, HPA_SMALL_MAX, PAGE, false);
+	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX, PAGE, false);
 	expect_ptr_not_null(edata, "Allocation of small max failed");
-	edata = pai_alloc(tsdn, &shard->pai, HPA_SMALL_MAX + PAGE, PAGE, false);
+	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX + PAGE, PAGE, false);
 	expect_ptr_null(edata, "Allocation of larger than small max succeeded");
 
-	/* Large min */
-	edata = pai_alloc(tsdn, &shard->pai, HPA_LARGE_MIN, PAGE, false);
-	expect_ptr_not_null(edata, "Allocation of large min failed");
-	edata = pai_alloc(tsdn, &shard->pai, HPA_LARGE_MIN - PAGE, PAGE, false);
-	expect_ptr_null(edata,
-	    "Allocation of smaller than large min succeeded");
-
 	destroy_test_data(shard);
 }
 TEST_END
@@ -178,26 +147,19 @@ TEST_BEGIN(test_stress) {
 	mem_tree_new(&tree);
 
 	for (size_t i = 0; i < 100 * 1000; i++) {
-		size_t operation = prng_range_zu(&prng_state, 4);
-		if (operation < 2) {
+		size_t operation = prng_range_zu(&prng_state, 2);
+		if (operation == 0) {
 			/* Alloc */
 			if (nlive_edatas == nlive_edatas_max) {
 				continue;
 			}
 
-			size_t npages_min;
-			size_t npages_max;
 			/*
 			 * We make sure to get an even balance of small and
 			 * large allocations.
 			 */
-			if (operation == 0) {
-				npages_min = 1;
-				npages_max = HPA_SMALL_MAX / PAGE;
-			} else {
-				npages_min = HPA_LARGE_MIN / PAGE;
-				npages_max = HPA_LARGE_MIN / PAGE + 20;
-			}
+			size_t npages_min = 1;
+			size_t npages_max = ALLOC_MAX / PAGE;
 			size_t npages = npages_min + prng_range_zu(&prng_state,
 			    npages_max - npages_min);
 			edata_t *edata = pai_alloc(tsdn, &shard->pai,
@@ -260,6 +222,6 @@ main(void) {
 	(void)mem_tree_reverse_iter;
 	(void)mem_tree_destroy;
 	return test_no_reentrancy(
-	    test_small_max_large_min,
+	    test_alloc_max,
 	    test_stress);
 }
diff --git a/test/unit/psset.c b/test/unit/psset.c
index e07bdc46..ea61ab92 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -2,7 +2,7 @@
 
 #include "jemalloc/internal/psset.h"
 
-#define PAGESLAB_PAGES 64
+#define PAGESLAB_PAGES (HUGEPAGE / PAGE)
 #define PAGESLAB_SIZE (PAGESLAB_PAGES << LG_PAGE)
 #define PAGESLAB_SN 123
 #define PAGESLAB_ADDR ((void *)(1234 << LG_PAGE))
@@ -296,22 +296,23 @@ TEST_END
 
 static void
 stats_expect_empty(psset_bin_stats_t *stats) {
-	assert_zu_eq(0, stats->npageslabs,
+	assert_zu_eq(0, stats->npageslabs_nonhuge,
 	    "Supposedly empty bin had positive npageslabs");
-	expect_zu_eq(0, stats->nactive, "Unexpected nonempty bin"
+	expect_zu_eq(0, stats->nactive_nonhuge, "Unexpected nonempty bin"
 	    "Supposedly empty bin had positive nactive");
-	expect_zu_eq(0, stats->ninactive, "Unexpected nonempty bin"
+	expect_zu_eq(0, stats->ninactive_nonhuge, "Unexpected nonempty bin"
 	    "Supposedly empty bin had positive ninactive");
 }
 
 static void
 stats_expect(psset_t *psset, size_t nactive) {
 	if (nactive == PAGESLAB_PAGES) {
-		expect_zu_eq(1, psset->stats.full_slabs.npageslabs,
+		expect_zu_eq(1, psset->stats.full_slabs.npageslabs_nonhuge,
 		    "Expected a full slab");
-		expect_zu_eq(PAGESLAB_PAGES, psset->stats.full_slabs.nactive,
+		expect_zu_eq(PAGESLAB_PAGES,
+		    psset->stats.full_slabs.nactive_nonhuge,
 		    "Should have exactly filled the bin");
-		expect_zu_eq(0, psset->stats.full_slabs.ninactive,
+		expect_zu_eq(0, psset->stats.full_slabs.ninactive_nonhuge,
 		    "Should never have inactive pages in a full slab");
 	} else {
 		stats_expect_empty(&psset->stats.full_slabs);
@@ -325,13 +326,13 @@ stats_expect(psset_t *psset, size_t nactive) {
 	for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
 		if (i == nonempty_pind) {
 			assert_zu_eq(1,
-			    psset->stats.nonfull_slabs[i].npageslabs,
+			    psset->stats.nonfull_slabs[i].npageslabs_nonhuge,
 			    "Should have found a slab");
 			expect_zu_eq(nactive,
-			    psset->stats.nonfull_slabs[i].nactive,
+			    psset->stats.nonfull_slabs[i].nactive_nonhuge,
 			    "Mismatch in active pages");
 			expect_zu_eq(ninactive,
-			    psset->stats.nonfull_slabs[i].ninactive,
+			    psset->stats.nonfull_slabs[i].ninactive_nonhuge,
 			    "Mismatch in inactive pages");
 		} else {
 			stats_expect_empty(&psset->stats.nonfull_slabs[i]);

From 4a15008cfbf414136f40a57fb1ceac80b22ea09f Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 2 Dec 2020 15:54:29 -0800
Subject: [PATCH 1928/2608] HPA unit test: skip if unsupported.

Previously, we replicated the logic in hpa_supported in the test as well.
---
 test/unit/hpa.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 72a20c32..94efd4ae 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -52,7 +52,7 @@ destroy_test_data(hpa_shard_t *shard) {
 }
 
 TEST_BEGIN(test_alloc_max) {
-	test_skip_if(LG_SIZEOF_PTR != 3);
+	test_skip_if(!hpa_supported());
 
 	hpa_shard_t *shard = create_test_data();
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
@@ -128,7 +128,7 @@ node_remove(mem_tree_t *tree, edata_t *edata) {
 }
 
 TEST_BEGIN(test_stress) {
-	test_skip_if(LG_SIZEOF_PTR != 3);
+	test_skip_if(!hpa_supported());
 
 	hpa_shard_t *shard = create_test_data();
 

From ca30b5db2bbf51b9c4d5aefa2ec87490b7f93395 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 17 Nov 2020 16:32:45 -0800
Subject: [PATCH 1929/2608] Introduce hpdata_t.

Using an edata_t both for hugepages and the allocations within those hugepages
was convenient at first, but has outlived its usefulness.  Representing
hugepages explicitly, with their own data structure, will make future
development easier.
---
 Makefile.in                                   |   1 +
 include/jemalloc/internal/edata.h             | 109 ++--------
 include/jemalloc/internal/hpa.h               |  16 +-
 include/jemalloc/internal/hpdata.h            | 124 +++++++++++
 include/jemalloc/internal/pages.h             |  14 ++
 include/jemalloc/internal/psset.h             |  17 +-
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |   3 +
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |   3 +
 src/edata.c                                   |   1 -
 src/hpa.c                                     | 178 +++++++---------
 src/hpdata.c                                  |  18 ++
 src/pa.c                                      |   4 +-
 src/psset.c                                   | 134 ++++++------
 test/unit/hpa.c                               |   3 +-
 test/unit/psset.c                             | 192 ++++++++----------
 17 files changed, 414 insertions(+), 405 deletions(-)
 create mode 100644 include/jemalloc/internal/hpdata.h
 create mode 100644 src/hpdata.c

diff --git a/Makefile.in b/Makefile.in
index eae30653..f263fc32 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -122,6 +122,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/hook.c \
 	$(srcroot)src/hpa.c \
 	$(srcroot)src/hpa_central.c \
+	$(srcroot)src/hpdata.c \
 	$(srcroot)src/inspect.c \
 	$(srcroot)src/large.c \
 	$(srcroot)src/log.c \
diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index 465c962f..c0482883 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -4,6 +4,7 @@
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/bin_info.h"
 #include "jemalloc/internal/bit_util.h"
+#include "jemalloc/internal/hpdata.h"
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/ph.h"
 #include "jemalloc/internal/ql.h"
@@ -71,7 +72,6 @@ struct edata_map_info_s {
 typedef struct edata_s edata_t;
 typedef ph(edata_t) edata_avail_t;
 typedef ph(edata_t) edata_heap_t;
-typedef ph(edata_t) edata_age_heap_t;
 struct edata_s {
 	/*
 	 * Bitfield containing several fields:
@@ -194,41 +194,13 @@ struct edata_s {
 	};
 
 	/*
-	 * In some context-specific sense, the age of an active extent.  Each
-	 * context can pick a specific meaning, and share the definition of the
-	 * edata_age_heap_t below.
+	 * If this edata is a user allocation from an HPA, it comes out of some
+	 * pageslab (we don't yet support huegpage allocations that don't fit
+	 * into pageslabs).  This tracks it.
 	 */
-	uint64_t age;
-	union {
-		/*
-		 * We could steal a low bit from these fields to indicate what
-		 * sort of "thing" this is (a page slab, an object within a page
-		 * slab, or a non-pageslab range).  We don't do this yet, but it
-		 * would enable some extra asserts.
-		 */
-
-		/*
-		 * If this edata is a user allocation from an HPA, it comes out
-		 * of some pageslab (we don't yet support huegpage allocations
-		 * that don't fit into pageslabs).  This tracks it.
-		 */
-		edata_t *ps;
-		/*
-		 * If this edata *is* a pageslab, then we cache some useful
-		 * information about its associated bitmap.
-		 */
-		struct {
-			/*
-			 * The longest free range a pageslab contains determines
-			 * the heap it lives in.  If we know that it didn't
-			 * change after an operation, we can avoid moving it
-			 * between heaps.
-			 */
-			uint32_t longest_free_range;
-			/* Whether or not the slab is backed by a hugepage. */
-			bool hugeified;
-		};
-	};
+	hpdata_t *e_ps;
+	/* Extra field reserved for HPA. */
+	void *e_reserved;
 
 	union {
 		/*
@@ -330,11 +302,6 @@ edata_pai_get(const edata_t *edata) {
 	    EDATA_BITS_PAI_SHIFT);
 }
 
-static inline bool
-edata_hugeified_get(const edata_t *edata) {
-	return edata->hugeified;
-}
-
 static inline bool
 edata_slab_get(const edata_t *edata) {
 	return (bool)((edata->e_bits & EDATA_BITS_SLAB_MASK) >>
@@ -377,21 +344,10 @@ edata_bsize_get(const edata_t *edata) {
 	return edata->e_bsize;
 }
 
-static inline uint64_t
-edata_age_get(const edata_t *edata) {
-	return edata->age;
-}
-
-static inline edata_t *
+static inline hpdata_t *
 edata_ps_get(const edata_t *edata) {
 	assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
-	return edata->ps;
-}
-
-static inline uint32_t
-edata_longest_free_range_get(const edata_t *edata) {
-	assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
-	return edata->longest_free_range;
+	return edata->e_ps;
 }
 
 static inline void *
@@ -477,21 +433,9 @@ edata_bsize_set(edata_t *edata, size_t bsize) {
 }
 
 static inline void
-edata_age_set(edata_t *edata, uint64_t age) {
-	edata->age = age;
-}
-
-static inline void
-edata_ps_set(edata_t *edata, edata_t *ps) {
-	assert(edata_pai_get(edata) == EXTENT_PAI_HPA || ps == NULL);
-	edata->ps = ps;
-}
-
-static inline void
-edata_longest_free_range_set(edata_t *edata, uint32_t longest_free_range) {
-	assert(edata_pai_get(edata) == EXTENT_PAI_HPA
-	    || longest_free_range == 0);
-	edata->longest_free_range = longest_free_range;
+edata_ps_set(edata_t *edata, hpdata_t *ps) {
+	assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
+	edata->e_ps = ps;
 }
 
 static inline void
@@ -566,11 +510,6 @@ edata_pai_set(edata_t *edata, extent_pai_t pai) {
 	    ((uint64_t)pai << EDATA_BITS_PAI_SHIFT);
 }
 
-static inline void
-edata_hugeified_set(edata_t *edata, bool hugeified) {
-	edata->hugeified = hugeified;
-}
-
 static inline void
 edata_slab_set(edata_t *edata, bool slab) {
 	edata->e_bits = (edata->e_bits & ~EDATA_BITS_SLAB_MASK) |
@@ -633,9 +572,6 @@ edata_init(edata_t *edata, unsigned arena_ind, void *addr, size_t size,
 	if (config_prof) {
 		edata_prof_tctx_set(edata, NULL);
 	}
-	edata_age_set(edata, 0);
-	edata_ps_set(edata, NULL);
-	edata_longest_free_range_set(edata, 0);
 }
 
 static inline void
@@ -649,15 +585,12 @@ edata_binit(edata_t *edata, void *addr, size_t bsize, size_t sn) {
 	edata_state_set(edata, extent_state_active);
 	edata_zeroed_set(edata, true);
 	edata_committed_set(edata, true);
-	edata_age_set(edata, 0);
 	/*
 	 * This isn't strictly true, but base allocated extents never get
 	 * deallocated and can't be looked up in the emap, but no sense in
 	 * wasting a state bit to encode this fact.
 	 */
 	edata_pai_set(edata, EXTENT_PAI_PAC);
-	edata_ps_set(edata, NULL);
-	edata_longest_free_range_set(edata, 0);
 }
 
 static inline int
@@ -718,25 +651,7 @@ edata_esnead_comp(const edata_t *a, const edata_t *b) {
 	return ret;
 }
 
-static inline int
-edata_age_comp(const edata_t *a, const edata_t *b) {
-	uint64_t a_age = edata_age_get(a);
-	uint64_t b_age = edata_age_get(b);
-
-	/*
-	 * Equal ages are possible in certain race conditions, like two distinct
-	 * threads simultaneously allocating a new fresh slab without holding a
-	 * bin lock.
-	 */
-	int ret = (a_age > b_age) - (a_age < b_age);
-	if (ret != 0) {
-		return ret;
-	}
-	return edata_snad_comp(a, b);
-}
-
 ph_proto(, edata_avail_, edata_avail_t, edata_t)
 ph_proto(, edata_heap_, edata_heap_t, edata_t)
-ph_proto(, edata_age_heap_, edata_age_heap_t, edata_t);
 
 #endif /* JEMALLOC_INTERNAL_EDATA_H */
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 1c4585df..edb36179 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -21,6 +21,8 @@ struct hpa_shard_s {
 	pai_t pai;
 	malloc_mutex_t grow_mtx;
 	malloc_mutex_t mtx;
+	/* The base metadata allocator. */
+	base_t *base;
 	/*
 	 * This edata cache is the one we use when allocating a small extent
 	 * from a pageslab.  The pageslab itself comes from the centralized
@@ -45,7 +47,14 @@ struct hpa_shard_s {
 	 *
 	 * Guarded by grow_mtx.
 	 */
-	edata_list_inactive_t unused_slabs;
+	hpdata_list_t unused_slabs;
+
+	/*
+	 * How many grow operations have occurred.
+	 *
+	 * Guarded by grow_mtx.
+	 */
+	uint64_t age_counter;
 
 	/*
 	 * Either NULL (if empty), or some integer multiple of a
@@ -54,7 +63,8 @@ struct hpa_shard_s {
 	 *
 	 * Guarded by grow_mtx.
 	 */
-	edata_t *eden;
+	void *eden;
+	size_t eden_len;
 
 	/* The arena ind we're associated with. */
 	unsigned ind;
@@ -67,7 +77,7 @@ struct hpa_shard_s {
  * just that it can function properly given the system it's running on.
  */
 bool hpa_supported();
-bool hpa_shard_init(hpa_shard_t *shard, emap_t *emap,
+bool hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
     edata_cache_t *edata_cache, unsigned ind, size_t alloc_max);
 
 void hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src);
diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
new file mode 100644
index 00000000..c4bf6ef5
--- /dev/null
+++ b/include/jemalloc/internal/hpdata.h
@@ -0,0 +1,124 @@
+#ifndef JEMALLOC_INTERNAL_HPDATA_H
+#define JEMALLOC_INTERNAL_HPDATA_H
+
+#include "jemalloc/internal/flat_bitmap.h"
+#include "jemalloc/internal/ph.h"
+#include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/typed_list.h"
+
+/*
+ * The metadata representation we use for extents in hugepages.  While the PAC
+ * uses the edata_t to represent both active and inactive extents, the HP only
+ * uses the edata_t for active ones; instead, inactive extent state is tracked
+ * within hpdata associated with the enclosing hugepage-sized, hugepage-aligned
+ * region of virtual address space.
+ *
+ * An hpdata need not be "truly" backed by a hugepage (which is not necessarily
+ * an observable property of any given region of address space).  It's just
+ * hugepage-sized and hugepage-aligned; it's *potentially* huge.
+ */
+typedef struct hpdata_s hpdata_t;
+struct hpdata_s {
+	/*
+	 * We likewise follow the edata convention of mangling names and forcing
+	 * the use of accessors -- this lets us add some consistency checks on
+	 * access.
+	 */
+
+	/*
+	 * The address of the hugepage in question.  This can't be named h_addr,
+	 * since that conflicts with a macro defined in Windows headers.
+	 */
+	void *h_address;
+	/* Its age (measured in psset operations). */
+	uint64_t h_age;
+	/* Whether or not we think the hugepage is mapped that way by the OS. */
+	bool h_huge;
+	union {
+		/* When nonempty, used by the psset bins. */
+		phn(hpdata_t) ph_link;
+		/*
+		 * When empty (or not corresponding to any hugepage), list
+		 * linkage.
+		 */
+		ql_elm(hpdata_t) ql_link;
+	};
+
+	/* Number of currently free pages (regardless of contiguity). */
+	size_t h_nfree;
+	/* The length of the largest contiguous sequence of inactive pages. */
+	size_t h_longest_free_range;
+
+	/* A bitmap with bits set in the active pages. */
+	fb_group_t active_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
+};
+
+static inline void *
+hpdata_addr_get(const hpdata_t *hpdata) {
+	return hpdata->h_address;
+}
+
+static inline void
+hpdata_addr_set(hpdata_t *hpdata, void *addr) {
+	assert(HUGEPAGE_ADDR2BASE(addr) == addr);
+	hpdata->h_address = addr;
+}
+
+static inline uint64_t
+hpdata_age_get(const hpdata_t *hpdata) {
+	return hpdata->h_age;
+}
+
+static inline void
+hpdata_age_set(hpdata_t *hpdata, uint64_t age) {
+	hpdata->h_age = age;
+}
+
+static inline bool
+hpdata_huge_get(const hpdata_t *hpdata) {
+	return hpdata->h_huge;
+}
+
+static inline void
+hpdata_huge_set(hpdata_t *hpdata, bool huge) {
+	hpdata->h_huge = huge;
+}
+
+static inline size_t
+hpdata_nfree_get(const hpdata_t *hpdata) {
+	return hpdata->h_nfree;
+}
+
+static inline void
+hpdata_nfree_set(hpdata_t *hpdata, size_t nfree) {
+	assert(nfree <= HUGEPAGE_PAGES);
+	hpdata->h_nfree = nfree;
+}
+
+static inline size_t
+hpdata_longest_free_range_get(const hpdata_t *hpdata) {
+	return hpdata->h_longest_free_range;
+}
+
+static inline void
+hpdata_longest_free_range_set(hpdata_t *hpdata, size_t longest_free_range) {
+	assert(longest_free_range <= HUGEPAGE_PAGES);
+	hpdata->h_longest_free_range = longest_free_range;
+}
+
+static inline void
+hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
+	hpdata_addr_set(hpdata, addr);
+	hpdata_age_set(hpdata, age);
+	hpdata_huge_set(hpdata, false);
+	hpdata_nfree_set(hpdata, HUGEPAGE_PAGES);
+	hpdata_longest_free_range_set(hpdata, HUGEPAGE_PAGES);
+	fb_init(hpdata->active_pages, HUGEPAGE_PAGES);
+}
+
+TYPED_LIST(hpdata_list, hpdata_t, ql_link)
+
+typedef ph(hpdata_t) hpdata_age_heap_t;
+ph_proto(, hpdata_age_heap_, hpdata_age_heap_t, hpdata_t);
+
+#endif /* JEMALLOC_INTERNAL_HPDATA_H */
diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index cfaa0fc2..035364e2 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -17,6 +17,20 @@
 /* Huge page size.  LG_HUGEPAGE is determined by the configure script. */
 #define HUGEPAGE	((size_t)(1U << LG_HUGEPAGE))
 #define HUGEPAGE_MASK	((size_t)(HUGEPAGE - 1))
+
+#if LG_HUGEPAGE != 0
+#  define HUGEPAGE_PAGES (HUGEPAGE / PAGE)
+#else
+/*
+ * It's convenient to define arrays (or bitmaps) of HUGEPAGE_PAGES lengths.  If
+ * we can't autodetect the hugepage size, it gets treated as 0, in which case
+ * we'll trigger a compiler error in those arrays.  Avoid this case by ensuring
+ * that this value is at least 1.  (We won't ever run in this degraded state;
+ * hpa_supported() returns false in this case.
+ */
+#  define HUGEPAGE_PAGES 1
+#endif
+
 /* Return the huge page base address for the huge page containing address a. */
 #define HUGEPAGE_ADDR2BASE(a)						\
 	((void *)((uintptr_t)(a) & ~HUGEPAGE_MASK))
diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index 3c9f23bb..01b4e80a 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_PSSET_H
 #define JEMALLOC_INTERNAL_PSSET_H
 
+#include "jemalloc/internal/hpdata.h"
+
 /*
  * A page-slab set.  What the eset is to PAC, the psset is to HPA.  It maintains
  * a collection of page-slabs (the intent being that they are backed by
@@ -51,21 +53,18 @@ struct psset_s {
 	 * The pageslabs, quantized by the size class of the largest contiguous
 	 * free run of pages in a pageslab.
 	 */
-	edata_age_heap_t pageslabs[PSSET_NPSIZES];
+	hpdata_age_heap_t pageslabs[PSSET_NPSIZES];
 	bitmap_t bitmap[BITMAP_GROUPS(PSSET_NPSIZES)];
 	psset_stats_t stats;
-
-	/* How many alloc_new calls have happened? */
-	uint64_t age_counter;
 };
 
 void psset_init(psset_t *psset);
 void psset_stats_accum(psset_stats_t *dst, psset_stats_t *src);
 
-void psset_insert(psset_t *psset, edata_t *ps);
-void psset_remove(psset_t *psset, edata_t *ps);
+void psset_insert(psset_t *psset, hpdata_t *ps);
+void psset_remove(psset_t *psset, hpdata_t *ps);
 
-void psset_hugify(psset_t *psset, edata_t *ps);
+void psset_hugify(psset_t *psset, hpdata_t *ps);
 
 /*
  * Tries to obtain a chunk from an existing pageslab already in the set.
@@ -78,7 +77,7 @@ bool psset_alloc_reuse(psset_t *psset, edata_t *r_edata, size_t size);
  * to the psset and allocate an extent from within it.  The passed-in pageslab
  * must be at least as big as size.
  */
-void psset_alloc_new(psset_t *psset, edata_t *ps,
+void psset_alloc_new(psset_t *psset, hpdata_t *ps,
     edata_t *r_edata, size_t size);
 
 /*
@@ -89,6 +88,6 @@ void psset_alloc_new(psset_t *psset, edata_t *ps,
  * result must be checked and deallocated to the central HPA.  Otherwise returns
  * NULL.
  */
-edata_t *psset_dalloc(psset_t *psset, edata_t *edata);
+hpdata_t *psset_dalloc(psset_t *psset, edata_t *edata);
 
 #endif /* JEMALLOC_INTERNAL_PSSET_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 6c4e7fdc..531dd9a6 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -62,6 +62,7 @@
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
     <ClCompile Include="..\..\..\..\src\hpa_central.c" />
+    <ClCompile Include="..\..\..\..\src\hpdata.c" />
     <ClCompile Include="..\..\..\..\src\inspect.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
     <ClCompile Include="..\..\..\..\src\large.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 84ff5748..f031fb10 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -70,6 +70,9 @@
     <ClCompile Include="..\..\..\..\src\hpa_central.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpdata.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\inspect.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 07fbe21e..bc64de5c 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -62,6 +62,7 @@
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
     <ClCompile Include="..\..\..\..\src\hpa_central.c" />
+    <ClCompile Include="..\..\..\..\src\hpdata.c" />
     <ClCompile Include="..\..\..\..\src\inspect.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
     <ClCompile Include="..\..\..\..\src\large.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 84ff5748..f031fb10 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -70,6 +70,9 @@
     <ClCompile Include="..\..\..\..\src\hpa_central.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpdata.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\inspect.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/edata.c b/src/edata.c
index a6597312..23523dd0 100644
--- a/src/edata.c
+++ b/src/edata.c
@@ -4,4 +4,3 @@
 ph_gen(, edata_avail_, edata_avail_t, edata_t, ph_link,
     edata_esnead_comp)
 ph_gen(, edata_heap_, edata_heap_t, edata_t, ph_link, edata_snad_comp)
-ph_gen(, edata_age_heap_, edata_age_heap_t, edata_t, ph_link, edata_age_comp)
diff --git a/src/hpa.c b/src/hpa.c
index ca75628c..9a190c8a 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -33,22 +33,22 @@ hpa_supported() {
 	 * We fundamentally rely on a address-space-hungry growth strategy for
 	 * hugepages.
 	 */
-	if (LG_SIZEOF_PTR == 2) {
+	if (LG_SIZEOF_PTR != 3) {
 		return false;
 	}
 	/*
-	 * We use the edata bitmap; it needs to have at least as many bits as a
-	 * hugepage has pages.
+	 * If we couldn't detect the value of HUGEPAGE, HUGEPAGE_PAGES becomes
+	 * this sentinel value -- see the comment in pages.h.
 	 */
-	if (HUGEPAGE / PAGE > BITMAP_GROUPS_MAX * sizeof(bitmap_t) * 8) {
+	if (HUGEPAGE_PAGES == 1) {
 		return false;
 	}
 	return true;
 }
 
 bool
-hpa_shard_init(hpa_shard_t *shard, emap_t *emap, edata_cache_t *edata_cache,
-    unsigned ind, size_t alloc_max) {
+hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
+    edata_cache_t *edata_cache, unsigned ind, size_t alloc_max) {
 	/* malloc_conf processing should have filtered out these cases. */
 	assert(hpa_supported());
 	bool err;
@@ -64,11 +64,14 @@ hpa_shard_init(hpa_shard_t *shard, emap_t *emap, edata_cache_t *edata_cache,
 	}
 
 	assert(edata_cache != NULL);
+	shard->base = base;
 	edata_cache_small_init(&shard->ecs, edata_cache);
 	psset_init(&shard->psset);
 	shard->alloc_max = alloc_max;
-	edata_list_inactive_init(&shard->unused_slabs);
+	hpdata_list_init(&shard->unused_slabs);
+	shard->age_counter = 0;
 	shard->eden = NULL;
+	shard->eden_len = 0;
 	shard->ind = ind;
 	shard->emap = emap;
 
@@ -104,22 +107,27 @@ hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard,
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 }
 
+static hpdata_t *
+hpa_alloc_ps(tsdn_t *tsdn, hpa_shard_t *shard) {
+	return (hpdata_t *)base_alloc(tsdn, shard->base, sizeof(hpdata_t),
+	    CACHELINE);
+}
+
 static bool
-hpa_should_hugify(hpa_shard_t *shard, edata_t *ps) {
+hpa_should_hugify(hpa_shard_t *shard, hpdata_t *ps) {
 	/*
 	 * For now, just use a static check; hugify a page if it's <= 5%
 	 * inactive.  Eventually, this should be a malloc conf option.
 	 */
-	return !edata_hugeified_get(ps)
-	    && edata_nfree_get(ps) < (HUGEPAGE / PAGE) * 5 / 100;
+	return !hpdata_huge_get(ps)
+	    && hpdata_nfree_get(ps) < (HUGEPAGE / PAGE) * 5 / 100;
 }
 
 /* Returns true on error. */
 static void
-hpa_hugify(edata_t *ps) {
-	assert(edata_size_get(ps) == HUGEPAGE);
-	assert(edata_hugeified_get(ps));
-	bool err = pages_huge(edata_base_get(ps), HUGEPAGE);
+hpa_hugify(hpdata_t *ps) {
+	assert(hpdata_huge_get(ps));
+	bool err = pages_huge(hpdata_addr_get(ps), HUGEPAGE);
 	/*
 	 * Eat the error; even if the hugeification failed, it's still safe to
 	 * pretend it didn't (and would require extraordinary measures to
@@ -129,30 +137,36 @@ hpa_hugify(edata_t *ps) {
 }
 
 static void
-hpa_dehugify(edata_t *ps) {
+hpa_dehugify(hpdata_t *ps) {
 	/* Purge, then dehugify while unbacked. */
-	pages_purge_forced(edata_addr_get(ps), HUGEPAGE);
-	pages_nohuge(edata_addr_get(ps), HUGEPAGE);
-	edata_hugeified_set(ps, false);
+	pages_purge_forced(hpdata_addr_get(ps), HUGEPAGE);
+	pages_nohuge(hpdata_addr_get(ps), HUGEPAGE);
+	hpdata_huge_set(ps, false);
 }
 
-static edata_t *
+static hpdata_t *
 hpa_grow(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_assert_owner(tsdn, &shard->grow_mtx);
-	edata_t *ps = NULL;
+	hpdata_t *ps = NULL;
 
 	/* Is there address space waiting for reuse? */
 	malloc_mutex_assert_owner(tsdn, &shard->grow_mtx);
-	ps = edata_list_inactive_first(&shard->unused_slabs);
+	ps = hpdata_list_first(&shard->unused_slabs);
 	if (ps != NULL) {
-		edata_list_inactive_remove(&shard->unused_slabs, ps);
+		hpdata_list_remove(&shard->unused_slabs, ps);
+		hpdata_age_set(ps, shard->age_counter++);
 		return ps;
 	}
 
 	/* Is eden a perfect fit? */
-	if (shard->eden != NULL && edata_size_get(shard->eden) == HUGEPAGE) {
-		ps = shard->eden;
+	if (shard->eden != NULL && shard->eden_len == HUGEPAGE) {
+		ps = hpa_alloc_ps(tsdn, shard);
+		if (ps == NULL) {
+			return NULL;
+		}
+		hpdata_init(ps, shard->eden, shard->age_counter++);
 		shard->eden = NULL;
+		shard->eden_len = 0;
 		return ps;
 	}
 
@@ -173,78 +187,32 @@ hpa_grow(tsdn_t *tsdn, hpa_shard_t *shard) {
 		if (new_eden == NULL) {
 			return NULL;
 		}
-		malloc_mutex_lock(tsdn, &shard->mtx);
-		/* Allocate ps edata, bailing if we fail. */
-		ps = edata_cache_small_get(tsdn, &shard->ecs);
+		ps = hpa_alloc_ps(tsdn, shard);
 		if (ps == NULL) {
-			malloc_mutex_unlock(tsdn, &shard->mtx);
 			pages_unmap(new_eden, HPA_EDEN_SIZE);
 			return NULL;
 		}
-		/* Allocate eden edata, bailing if we fail. */
-		shard->eden = edata_cache_small_get(tsdn, &shard->ecs);
-		if (shard->eden == NULL) {
-			edata_cache_small_put(tsdn, &shard->ecs, ps);
-			malloc_mutex_unlock(tsdn, &shard->mtx);
-			pages_unmap(new_eden, HPA_EDEN_SIZE);
-			return NULL;
-		}
-		/* Success. */
-		malloc_mutex_unlock(tsdn, &shard->mtx);
-
-		/*
-		 * Note that the values here don't really make sense (e.g. eden
-		 * is actually zeroed).  But we don't use the slab metadata in
-		 * determining subsequent allocation metadata (e.g. zero
-		 * tracking should be done at the per-page level, not at the
-		 * level of the hugepage).  It's just a convenient data
-		 * structure that contains much of the helpers we need (defined
-		 * lists, a bitmap, an address field, etc.).  Eventually, we'll
-		 * have a "real" representation of a hugepage that's unconnected
-		 * to the edata_ts it will serve allocations into.
-		 */
-		edata_init(shard->eden, shard->ind, new_eden, HPA_EDEN_SIZE,
-		    /* slab */ false, SC_NSIZES, /* sn */ 0, extent_state_dirty,
-		    /* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA,
-		    /* is_head */ true);
-		edata_hugeified_set(shard->eden, false);
+		shard->eden = new_eden;
+		shard->eden_len = HPA_EDEN_SIZE;
 	} else {
 		/* Eden is already nonempty; only need an edata for ps. */
-		malloc_mutex_lock(tsdn, &shard->mtx);
-		ps = edata_cache_small_get(tsdn, &shard->ecs);
-		malloc_mutex_unlock(tsdn, &shard->mtx);
+		ps = hpa_alloc_ps(tsdn, shard);
 		if (ps == NULL) {
 			return NULL;
 		}
 	}
-	/*
-	 * We should have dropped mtx since we're not touching ecs any more, but
-	 * we should continue to hold the grow mutex, since we're about to touch
-	 * eden.
-	 */
-	malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
-	malloc_mutex_assert_owner(tsdn, &shard->grow_mtx);
-
+	assert(ps != NULL);
 	assert(shard->eden != NULL);
-	assert(edata_size_get(shard->eden) > HUGEPAGE);
-	assert(edata_size_get(shard->eden) % HUGEPAGE == 0);
-	assert(edata_addr_get(shard->eden)
-	    == HUGEPAGE_ADDR2BASE(edata_addr_get(shard->eden)));
-	malloc_mutex_lock(tsdn, &shard->mtx);
-	ps = edata_cache_small_get(tsdn, &shard->ecs);
-	malloc_mutex_unlock(tsdn, &shard->mtx);
-	if (ps == NULL) {
-		return NULL;
-	}
-	edata_init(ps, edata_arena_ind_get(shard->eden),
-	    edata_addr_get(shard->eden), HUGEPAGE, /* slab */ false,
-	    /* szind */ SC_NSIZES, /* sn */ 0, extent_state_dirty,
-	    /* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA,
-	    /* is_head */ true);
-	edata_hugeified_set(ps, false);
-	edata_addr_set(shard->eden, edata_past_get(ps));
-	edata_size_set(shard->eden,
-	    edata_size_get(shard->eden) - HUGEPAGE);
+	assert(shard->eden_len > HUGEPAGE);
+	assert(shard->eden_len % HUGEPAGE == 0);
+	assert(HUGEPAGE_ADDR2BASE(shard->eden) == shard->eden);
+
+	hpdata_init(ps, shard->eden, shard->age_counter++);
+
+	char *eden_char = (char *)shard->eden;
+	eden_char += HUGEPAGE;
+	shard->eden = (void *)eden_char;
+	shard->eden_len -= HUGEPAGE;
 
 	return ps;
 }
@@ -255,7 +223,7 @@ hpa_grow(tsdn_t *tsdn, hpa_shard_t *shard) {
  * their address space in a list outside the psset.
  */
 static void
-hpa_handle_ps_eviction(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *ps) {
+hpa_handle_ps_eviction(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
 	/*
 	 * We do relatively expensive system calls.  The ps was evicted, so no
 	 * one should touch it while we're also touching it.
@@ -263,9 +231,6 @@ hpa_handle_ps_eviction(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *ps) {
 	malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
 	malloc_mutex_assert_not_owner(tsdn, &shard->grow_mtx);
 
-	assert(edata_size_get(ps) == HUGEPAGE);
-	assert(HUGEPAGE_ADDR2BASE(edata_addr_get(ps)) == edata_addr_get(ps));
-
 	/*
 	 * We do this unconditionally, even for pages which were not originally
 	 * hugeified; it has the same effect.
@@ -273,7 +238,7 @@ hpa_handle_ps_eviction(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *ps) {
 	hpa_dehugify(ps);
 
 	malloc_mutex_lock(tsdn, &shard->grow_mtx);
-	edata_list_inactive_prepend(&shard->unused_slabs, ps);
+	hpdata_list_prepend(&shard->unused_slabs, ps);
 	malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 }
 
@@ -307,7 +272,7 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 	err = emap_register_boundary(tsdn, shard->emap, edata,
 	    SC_NSIZES, /* slab */ false);
 	if (err) {
-		edata_t *ps = psset_dalloc(&shard->psset, edata);
+		hpdata_t *ps = psset_dalloc(&shard->psset, edata);
 		/*
 		 * The pageslab was nonempty before we started; it
 		 * should still be nonempty now, and so shouldn't get
@@ -320,7 +285,7 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 		return NULL;
 	}
 
-	edata_t *ps = edata_ps_get(edata);
+	hpdata_t *ps = edata_ps_get(edata);
 	assert(ps != NULL);
 	bool hugify = hpa_should_hugify(shard, ps);
 	if (hugify) {
@@ -378,16 +343,11 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 	 * deallocations (and allocations of smaller sizes) may still succeed
 	 * while we're doing this potentially expensive system call.
 	 */
-	edata_t *grow_edata = hpa_grow(tsdn, shard);
-	if (grow_edata == NULL) {
+	hpdata_t *grow_ps = hpa_grow(tsdn, shard);
+	if (grow_ps == NULL) {
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 		return NULL;
 	}
-	assert(edata_arena_ind_get(grow_edata) == shard->ind);
-
-	edata_slab_set(grow_edata, true);
-	fb_group_t *fb = edata_slab_data_get(grow_edata)->bitmap;
-	fb_init(fb, HUGEPAGE / PAGE);
 
 	/* We got the new edata; allocate from it. */
 	malloc_mutex_lock(tsdn, &shard->mtx);
@@ -395,18 +355,19 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 	if (edata == NULL) {
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
+		hpa_handle_ps_eviction(tsdn, shard, grow_ps);
 		return NULL;
 	}
-	psset_alloc_new(&shard->psset, grow_edata, edata, size);
+	psset_alloc_new(&shard->psset, grow_ps, edata, size);
 	err = emap_register_boundary(tsdn, shard->emap, edata,
 	    SC_NSIZES, /* slab */ false);
 	if (err) {
-		edata_t *ps = psset_dalloc(&shard->psset, edata);
+		hpdata_t *ps = psset_dalloc(&shard->psset, edata);
 		/*
 		 * The pageslab was empty except for the new allocation; it
 		 * should get evicted.
 		 */
-		assert(ps == grow_edata);
+		assert(ps == grow_ps);
 		edata_cache_small_put(tsdn, &shard->ecs, edata);
 		/*
 		 * Technically the same as fallthrough at the time of this
@@ -496,7 +457,7 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	assert(edata_committed_get(edata));
 	assert(edata_base_get(edata) != NULL);
 
-	edata_t *ps = edata_ps_get(edata);
+	hpdata_t *ps = edata_ps_get(edata);
 	/* Currently, all edatas come from pageslabs. */
 	assert(ps != NULL);
 	emap_deregister_boundary(tsdn, shard->emap, edata);
@@ -506,7 +467,7 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	 * Page slabs can move between pssets (and have their hugeified status
 	 * change) in racy ways.
 	 */
-	edata_t *evicted_ps = psset_dalloc(&shard->psset, edata);
+	hpdata_t *evicted_ps = psset_dalloc(&shard->psset, edata);
 	/*
 	 * If a pageslab became empty because of the dalloc, it better have been
 	 * the one we expected.
@@ -562,11 +523,10 @@ hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
 		hpa_assert_empty(tsdn, shard, &shard->psset);
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 	}
-	edata_t *ps;
-	while ((ps = edata_list_inactive_first(&shard->unused_slabs)) != NULL) {
-		assert(edata_size_get(ps) == HUGEPAGE);
-		edata_list_inactive_remove(&shard->unused_slabs, ps);
-		pages_unmap(edata_base_get(ps), HUGEPAGE);
+	hpdata_t *ps;
+	while ((ps = hpdata_list_first(&shard->unused_slabs)) != NULL) {
+		hpdata_list_remove(&shard->unused_slabs, ps);
+		pages_unmap(hpdata_addr_get(ps), HUGEPAGE);
 	}
 }
 
diff --git a/src/hpdata.c b/src/hpdata.c
new file mode 100644
index 00000000..bbe3acce
--- /dev/null
+++ b/src/hpdata.c
@@ -0,0 +1,18 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/hpdata.h"
+
+static int
+hpdata_age_comp(const hpdata_t *a, const hpdata_t *b) {
+	uint64_t a_age = hpdata_age_get(a);
+	uint64_t b_age = hpdata_age_get(b);
+	/*
+	 * hpdata ages are operation counts in the psset; no two should be the
+	 * same.
+	 */
+	assert(a_age != b_age);
+	return (a_age > b_age) - (a_age < b_age);
+}
+
+ph_gen(, hpdata_age_heap_, hpdata_age_heap_t, hpdata_t, ph_link, hpdata_age_comp)
diff --git a/src/pa.c b/src/pa.c
index bc52ff43..da64b829 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -51,8 +51,8 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 bool
 pa_shard_enable_hpa(pa_shard_t *shard, size_t alloc_max, size_t sec_nshards,
     size_t sec_alloc_max, size_t sec_bytes_max) {
-	if (hpa_shard_init(&shard->hpa_shard, shard->emap, &shard->edata_cache,
-	    shard->ind, alloc_max)) {
+	if (hpa_shard_init(&shard->hpa_shard, shard->emap, shard->base,
+	    &shard->edata_cache, shard->ind, alloc_max)) {
 		return true;
 	}
 	if (sec_init(&shard->hpa_sec, &shard->hpa_shard.pai, sec_nshards,
diff --git a/src/psset.c b/src/psset.c
index 2ee683b6..cebc1ce8 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -11,11 +11,10 @@ static const bitmap_info_t psset_bitmap_info =
 void
 psset_init(psset_t *psset) {
 	for (unsigned i = 0; i < PSSET_NPSIZES; i++) {
-		edata_age_heap_new(&psset->pageslabs[i]);
+		hpdata_age_heap_new(&psset->pageslabs[i]);
 	}
 	bitmap_init(psset->bitmap, &psset_bitmap_info, /* fill */ true);
 	memset(&psset->stats, 0, sizeof(psset->stats));
-	psset->age_counter = 0;
 }
 
 static void
@@ -49,18 +48,17 @@ psset_stats_accum(psset_stats_t *dst, psset_stats_t *src) {
  * ensure we don't miss any heap modification operations.
  */
 JEMALLOC_ALWAYS_INLINE void
-psset_bin_stats_insert_remove(psset_bin_stats_t *binstats, edata_t *ps,
+psset_bin_stats_insert_remove(psset_bin_stats_t *binstats, hpdata_t *ps,
     bool insert) {
-	size_t *npageslabs_dst = edata_hugeified_get(ps)
+	size_t *npageslabs_dst = hpdata_huge_get(ps)
 	    ? &binstats->npageslabs_huge : &binstats->npageslabs_nonhuge;
-	size_t *nactive_dst = edata_hugeified_get(ps)
+	size_t *nactive_dst = hpdata_huge_get(ps)
 	    ? &binstats->nactive_huge : &binstats->nactive_nonhuge;
-	size_t *ninactive_dst = edata_hugeified_get(ps)
+	size_t *ninactive_dst = hpdata_huge_get(ps)
 	    ? &binstats->ninactive_huge : &binstats->ninactive_nonhuge;
 
-	size_t npages = edata_size_get(ps) >> LG_PAGE;
-	size_t ninactive = edata_nfree_get(ps);
-	size_t nactive = npages - ninactive;
+	size_t ninactive = hpdata_nfree_get(ps);
+	size_t nactive = HUGEPAGE_PAGES - ninactive;
 
 	size_t mul = insert ? (size_t)1 : (size_t)-1;
 	*npageslabs_dst += mul * 1;
@@ -69,12 +67,12 @@ psset_bin_stats_insert_remove(psset_bin_stats_t *binstats, edata_t *ps,
 }
 
 static void
-psset_bin_stats_insert(psset_bin_stats_t *binstats, edata_t *ps) {
+psset_bin_stats_insert(psset_bin_stats_t *binstats, hpdata_t *ps) {
 	psset_bin_stats_insert_remove(binstats, ps, /* insert */ true);
 }
 
 static void
-psset_bin_stats_remove(psset_bin_stats_t *binstats, edata_t *ps) {
+psset_bin_stats_remove(psset_bin_stats_t *binstats, hpdata_t *ps) {
 	psset_bin_stats_insert_remove(binstats, ps, /* insert */ false);
 }
 
@@ -96,27 +94,27 @@ psset_bin_stats_deactivate(psset_bin_stats_t *binstats, bool huge, size_t num) {
 }
 
 static void
-psset_edata_heap_remove(psset_t *psset, pszind_t pind, edata_t *ps) {
-	edata_age_heap_remove(&psset->pageslabs[pind], ps);
+psset_hpdata_heap_remove(psset_t *psset, pszind_t pind, hpdata_t *ps) {
+	hpdata_age_heap_remove(&psset->pageslabs[pind], ps);
 	psset_bin_stats_remove(&psset->stats.nonfull_slabs[pind], ps);
 }
 
 static void
-psset_edata_heap_insert(psset_t *psset, pszind_t pind, edata_t *ps) {
-	edata_age_heap_insert(&psset->pageslabs[pind], ps);
+psset_hpdata_heap_insert(psset_t *psset, pszind_t pind, hpdata_t *ps) {
+	hpdata_age_heap_insert(&psset->pageslabs[pind], ps);
 	psset_bin_stats_insert(&psset->stats.nonfull_slabs[pind], ps);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-psset_assert_ps_consistent(edata_t *ps) {
-	assert(fb_urange_longest(edata_slab_data_get(ps)->bitmap,
-	    edata_size_get(ps) >> LG_PAGE) == edata_longest_free_range_get(ps));
+psset_assert_ps_consistent(hpdata_t *ps) {
+	assert(fb_urange_longest(ps->active_pages, HUGEPAGE_PAGES)
+	    == hpdata_longest_free_range_get(ps));
 }
 
 void
-psset_insert(psset_t *psset, edata_t *ps) {
+psset_insert(psset_t *psset, hpdata_t *ps) {
 	psset_assert_ps_consistent(ps);
-	size_t longest_free_range = edata_longest_free_range_get(ps);
+	size_t longest_free_range = hpdata_longest_free_range_get(ps);
 
 	if (longest_free_range == 0) {
 		/*
@@ -131,16 +129,16 @@ psset_insert(psset_t *psset, edata_t *ps) {
 	    longest_free_range << LG_PAGE));
 
 	assert(pind < PSSET_NPSIZES);
-	if (edata_age_heap_empty(&psset->pageslabs[pind])) {
+	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
 		bitmap_unset(psset->bitmap, &psset_bitmap_info, (size_t)pind);
 	}
-	psset_edata_heap_insert(psset, pind, ps);
+	psset_hpdata_heap_insert(psset, pind, ps);
 }
 
 void
-psset_remove(psset_t *psset, edata_t *ps) {
+psset_remove(psset_t *psset, hpdata_t *ps) {
 	psset_assert_ps_consistent(ps);
-	size_t longest_free_range = edata_longest_free_range_get(ps);
+	size_t longest_free_range = hpdata_longest_free_range_get(ps);
 
 	if (longest_free_range == 0) {
 		psset_bin_stats_remove(&psset->stats.full_slabs, ps);
@@ -150,18 +148,18 @@ psset_remove(psset_t *psset, edata_t *ps) {
 	pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
 	    longest_free_range << LG_PAGE));
 	assert(pind < PSSET_NPSIZES);
-	psset_edata_heap_remove(psset, pind, ps);
-	if (edata_age_heap_empty(&psset->pageslabs[pind])) {
+	psset_hpdata_heap_remove(psset, pind, ps);
+	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
 		bitmap_set(psset->bitmap, &psset_bitmap_info, (size_t)pind);
 	}
 }
 
 void
-psset_hugify(psset_t *psset, edata_t *ps) {
-	assert(!edata_hugeified_get(ps));
+psset_hugify(psset_t *psset, hpdata_t *ps) {
+	assert(!hpdata_huge_get(ps));
 	psset_assert_ps_consistent(ps);
 
-	size_t longest_free_range = edata_longest_free_range_get(ps);
+	size_t longest_free_range = hpdata_longest_free_range_get(ps);
 	psset_bin_stats_t *bin_stats;
 	if (longest_free_range == 0) {
 		bin_stats = &psset->stats.full_slabs;
@@ -172,7 +170,7 @@ psset_hugify(psset_t *psset, edata_t *ps) {
 		bin_stats = &psset->stats.nonfull_slabs[pind];
 	}
 	psset_bin_stats_remove(bin_stats, ps);
-	edata_hugeified_set(ps, true);
+	hpdata_huge_set(ps, true);
 	psset_bin_stats_insert(bin_stats, ps);
 }
 
@@ -180,7 +178,7 @@ psset_hugify(psset_t *psset, edata_t *ps) {
  * Similar to PAC's extent_recycle_extract.  Out of all the pageslabs in the
  * set, picks one that can satisfy the allocation and remove it from the set.
  */
-static edata_t *
+static hpdata_t *
 psset_recycle_extract(psset_t *psset, size_t size) {
 	pszind_t min_pind = sz_psz2ind(sz_psz_quantize_ceil(size));
 	pszind_t pind = (pszind_t)bitmap_ffu(psset->bitmap, &psset_bitmap_info,
@@ -188,13 +186,13 @@ psset_recycle_extract(psset_t *psset, size_t size) {
 	if (pind == PSSET_NPSIZES) {
 		return NULL;
 	}
-	edata_t *ps = edata_age_heap_first(&psset->pageslabs[pind]);
+	hpdata_t *ps = hpdata_age_heap_first(&psset->pageslabs[pind]);
 	if (ps == NULL) {
 		return NULL;
 	}
 
-	psset_edata_heap_remove(psset, pind, ps);
-	if (edata_age_heap_empty(&psset->pageslabs[pind])) {
+	psset_hpdata_heap_remove(psset, pind, ps);
+	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
 		bitmap_set(psset->bitmap, &psset_bitmap_info, pind);
 	}
 
@@ -207,7 +205,7 @@ psset_recycle_extract(psset_t *psset, size_t size) {
  * edata with a range in the pageslab, and puts ps back in the set.
  */
 static void
-psset_ps_alloc_insert(psset_t *psset, edata_t *ps, edata_t *r_edata,
+psset_ps_alloc_insert(psset_t *psset, hpdata_t *ps, edata_t *r_edata,
     size_t size) {
 	size_t start = 0;
 	/*
@@ -217,15 +215,14 @@ psset_ps_alloc_insert(psset_t *psset, edata_t *ps, edata_t *r_edata,
 	size_t begin = 0;
 	size_t len = 0;
 
-	fb_group_t *ps_fb = edata_slab_data_get(ps)->bitmap;
+	fb_group_t *ps_fb = ps->active_pages;
 
 	size_t npages = size >> LG_PAGE;
-	size_t ps_npages = edata_size_get(ps) >> LG_PAGE;
 
 	size_t largest_unchosen_range = 0;
 	while (true) {
-		bool found = fb_urange_iter(ps_fb, ps_npages, start, &begin,
-		    &len);
+		bool found = fb_urange_iter(ps_fb, HUGEPAGE_PAGES, start,
+		    &begin, &len);
 		/*
 		 * A precondition to this function is that ps must be able to
 		 * serve the allocation.
@@ -245,14 +242,14 @@ psset_ps_alloc_insert(psset_t *psset, edata_t *ps, edata_t *r_edata,
 		}
 		start = begin + len;
 	}
-	uintptr_t addr = (uintptr_t)edata_base_get(ps) + begin * PAGE;
+	uintptr_t addr = (uintptr_t)hpdata_addr_get(ps) + begin * PAGE;
 	edata_init(r_edata, edata_arena_ind_get(r_edata), (void *)addr, size,
 	    /* slab */ false, SC_NSIZES, /* sn */ 0, extent_state_active,
 	    /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
 	    EXTENT_NOT_HEAD);
 	edata_ps_set(r_edata, ps);
-	fb_set_range(ps_fb, ps_npages, begin, npages);
-	edata_nfree_set(ps, (uint32_t)(edata_nfree_get(ps) - npages));
+	fb_set_range(ps_fb, HUGEPAGE_PAGES, begin, npages);
+	hpdata_nfree_set(ps, (uint32_t)(hpdata_nfree_get(ps) - npages));
 	/* The pageslab isn't in a bin, so no bin stats need to change. */
 
 	/*
@@ -267,8 +264,8 @@ psset_ps_alloc_insert(psset_t *psset, edata_t *ps, edata_t *r_edata,
 	 * this check in the case where we're allocating from some smaller run.
 	 */
 	start = begin + npages;
-	while (start < ps_npages) {
-		bool found = fb_urange_iter(ps_fb, ps_npages, start, &begin,
+	while (start < HUGEPAGE_PAGES) {
+		bool found = fb_urange_iter(ps_fb, HUGEPAGE_PAGES, start, &begin,
 		    &len);
 		if (!found) {
 			break;
@@ -278,7 +275,7 @@ psset_ps_alloc_insert(psset_t *psset, edata_t *ps, edata_t *r_edata,
 		}
 		start = begin + len;
 	}
-	edata_longest_free_range_set(ps, (uint32_t)largest_unchosen_range);
+	hpdata_longest_free_range_set(ps, (uint32_t)largest_unchosen_range);
 	if (largest_unchosen_range == 0) {
 		psset_bin_stats_insert(&psset->stats.full_slabs, ps);
 	} else {
@@ -288,7 +285,7 @@ psset_ps_alloc_insert(psset_t *psset, edata_t *ps, edata_t *r_edata,
 
 bool
 psset_alloc_reuse(psset_t *psset, edata_t *r_edata, size_t size) {
-	edata_t *ps = psset_recycle_extract(psset, size);
+	hpdata_t *ps = psset_recycle_extract(psset, size);
 	if (ps == NULL) {
 		return true;
 	}
@@ -297,48 +294,43 @@ psset_alloc_reuse(psset_t *psset, edata_t *r_edata, size_t size) {
 }
 
 void
-psset_alloc_new(psset_t *psset, edata_t *ps, edata_t *r_edata, size_t size) {
-	fb_group_t *ps_fb = edata_slab_data_get(ps)->bitmap;
-	size_t ps_npages = edata_size_get(ps) >> LG_PAGE;
-	assert(fb_empty(ps_fb, ps_npages));
-	assert(ps_npages >= (size >> LG_PAGE));
-	edata_nfree_set(ps, (uint32_t)ps_npages);
-	edata_age_set(ps, psset->age_counter);
-	psset->age_counter++;
+psset_alloc_new(psset_t *psset, hpdata_t *ps, edata_t *r_edata, size_t size) {
+	fb_group_t *ps_fb = ps->active_pages;
+	assert(fb_empty(ps_fb, HUGEPAGE_PAGES));
+	assert(hpdata_nfree_get(ps) == HUGEPAGE_PAGES);
 	psset_ps_alloc_insert(psset, ps, r_edata, size);
 }
 
-edata_t *
+hpdata_t *
 psset_dalloc(psset_t *psset, edata_t *edata) {
 	assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
 	assert(edata_ps_get(edata) != NULL);
-	edata_t *ps = edata_ps_get(edata);
+	hpdata_t *ps = edata_ps_get(edata);
 
-	fb_group_t *ps_fb = edata_slab_data_get(ps)->bitmap;
-	size_t ps_old_longest_free_range = edata_longest_free_range_get(ps);
+	fb_group_t *ps_fb = ps->active_pages;
+	size_t ps_old_longest_free_range = hpdata_longest_free_range_get(ps);
 	pszind_t old_pind = SC_NPSIZES;
 	if (ps_old_longest_free_range != 0) {
 		old_pind = sz_psz2ind(sz_psz_quantize_floor(
 		    ps_old_longest_free_range << LG_PAGE));
 	}
 
-	size_t ps_npages = edata_size_get(ps) >> LG_PAGE;
 	size_t begin =
-	    ((uintptr_t)edata_base_get(edata) - (uintptr_t)edata_base_get(ps))
+	    ((uintptr_t)edata_base_get(edata) - (uintptr_t)hpdata_addr_get(ps))
 	    >> LG_PAGE;
 	size_t len = edata_size_get(edata) >> LG_PAGE;
-	fb_unset_range(ps_fb, ps_npages, begin, len);
+	fb_unset_range(ps_fb, HUGEPAGE_PAGES, begin, len);
 
 	/* The pageslab is still in the bin; adjust its stats first. */
 	psset_bin_stats_t *bin_stats = (ps_old_longest_free_range == 0
 	    ? &psset->stats.full_slabs : &psset->stats.nonfull_slabs[old_pind]);
-	psset_bin_stats_deactivate(bin_stats, edata_hugeified_get(ps), len);
+	psset_bin_stats_deactivate(bin_stats, hpdata_huge_get(ps), len);
 
-	edata_nfree_set(ps, (uint32_t)(edata_nfree_get(ps) + len));
+	hpdata_nfree_set(ps, (uint32_t)(hpdata_nfree_get(ps) + len));
 
 	/* We might have just created a new, larger range. */
-	size_t new_begin = (size_t)(fb_fls(ps_fb, ps_npages, begin) + 1);
-	size_t new_end = fb_ffs(ps_fb, ps_npages, begin + len - 1);
+	size_t new_begin = (size_t)(fb_fls(ps_fb, HUGEPAGE_PAGES, begin) + 1);
+	size_t new_end = fb_ffs(ps_fb, HUGEPAGE_PAGES, begin + len - 1);
 	size_t new_range_len = new_end - new_begin;
 	/*
 	 * If the new free range is no longer than the previous longest one,
@@ -352,7 +344,7 @@ psset_dalloc(psset_t *psset, edata_t *edata) {
 	 * Otherwise, it might need to get evicted from the set, or change its
 	 * bin.
 	 */
-	edata_longest_free_range_set(ps, (uint32_t)new_range_len);
+	hpdata_longest_free_range_set(ps, (uint32_t)new_range_len);
 	/*
 	 * If it was previously non-full, then it's in some (possibly now
 	 * incorrect) bin already; remove it.
@@ -366,8 +358,8 @@ psset_dalloc(psset_t *psset, edata_t *edata) {
 	 * and the issue becomes moot).
 	 */
 	if (ps_old_longest_free_range > 0) {
-		psset_edata_heap_remove(psset, old_pind, ps);
-		if (edata_age_heap_empty(&psset->pageslabs[old_pind])) {
+		psset_hpdata_heap_remove(psset, old_pind, ps);
+		if (hpdata_age_heap_empty(&psset->pageslabs[old_pind])) {
 			bitmap_set(psset->bitmap, &psset_bitmap_info,
 			    (size_t)old_pind);
 		}
@@ -379,16 +371,16 @@ psset_dalloc(psset_t *psset, edata_t *edata) {
 		psset_bin_stats_remove(&psset->stats.full_slabs, ps);
 	}
 	/* If the pageslab is empty, it gets evicted from the set. */
-	if (new_range_len == ps_npages) {
+	if (new_range_len == HUGEPAGE_PAGES) {
 		return ps;
 	}
 	/* Otherwise, it gets reinserted. */
 	pszind_t new_pind = sz_psz2ind(sz_psz_quantize_floor(
 	    new_range_len << LG_PAGE));
-	if (edata_age_heap_empty(&psset->pageslabs[new_pind])) {
+	if (hpdata_age_heap_empty(&psset->pageslabs[new_pind])) {
 		bitmap_unset(psset->bitmap, &psset_bitmap_info,
 		    (size_t)new_pind);
 	}
-	psset_edata_heap_insert(psset, new_pind, ps);
+	psset_hpdata_heap_insert(psset, new_pind, ps);
 	return NULL;
 }
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 94efd4ae..90ec89e4 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -38,7 +38,8 @@ create_test_data() {
 	assert_false(err, "");
 
 	err = hpa_shard_init(&test_data->shard, &test_data->emap,
-	    &test_data->shard_edata_cache, SHARD_IND, ALLOC_MAX);
+	    test_data->base, &test_data->shard_edata_cache, SHARD_IND,
+	    ALLOC_MAX);
 	assert_false(err, "");
 
 	return (hpa_shard_t *)test_data;
diff --git a/test/unit/psset.c b/test/unit/psset.c
index ea61ab92..811c7be1 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -2,10 +2,8 @@
 
 #include "jemalloc/internal/psset.h"
 
-#define PAGESLAB_PAGES (HUGEPAGE / PAGE)
-#define PAGESLAB_SIZE (PAGESLAB_PAGES << LG_PAGE)
-#define PAGESLAB_SN 123
-#define PAGESLAB_ADDR ((void *)(1234 << LG_PAGE))
+#define PAGESLAB_ADDR ((void *)(1234 * HUGEPAGE))
+#define PAGESLAB_AGE 5678
 
 #define ALLOC_ARENA_IND 111
 #define ALLOC_ESN 222
@@ -42,14 +40,10 @@ edata_expect(edata_t *edata, size_t page_offset, size_t page_cnt) {
 
 TEST_BEGIN(test_empty) {
 	bool err;
-	edata_t pageslab;
-	memset(&pageslab, 0, sizeof(pageslab));
-	edata_t alloc;
+	hpdata_t pageslab;
+	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
 
-	edata_init(&pageslab, /* arena_ind */ 0, PAGESLAB_ADDR, PAGESLAB_SIZE,
-	    /* slab */ true, SC_NSIZES, PAGESLAB_SN, extent_state_active,
-	    /* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA,
-	    EXTENT_IS_HEAD);
+	edata_t alloc;
 	edata_init_test(&alloc);
 
 	psset_t psset;
@@ -63,27 +57,24 @@ TEST_END
 
 TEST_BEGIN(test_fill) {
 	bool err;
-	edata_t pageslab;
-	memset(&pageslab, 0, sizeof(pageslab));
-	edata_t alloc[PAGESLAB_PAGES];
 
-	edata_init(&pageslab, /* arena_ind */ 0, PAGESLAB_ADDR, PAGESLAB_SIZE,
-	    /* slab */ true, SC_NSIZES, PAGESLAB_SN, extent_state_active,
-	    /* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA,
-	    EXTENT_IS_HEAD);
+	hpdata_t pageslab;
+	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
+
+	edata_t alloc[HUGEPAGE_PAGES];
 
 	psset_t psset;
 	psset_init(&psset);
 
 	edata_init_test(&alloc[0]);
 	psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
-	for (size_t i = 1; i < PAGESLAB_PAGES; i++) {
+	for (size_t i = 1; i < HUGEPAGE_PAGES; i++) {
 		edata_init_test(&alloc[i]);
 		err = psset_alloc_reuse(&psset, &alloc[i], PAGE);
 		expect_false(err, "Nonempty psset failed page allocation.");
 	}
 
-	for (size_t i = 0; i < PAGESLAB_PAGES; i++) {
+	for (size_t i = 0; i < HUGEPAGE_PAGES; i++) {
 		edata_t *edata = &alloc[i];
 		edata_expect(edata, i, 1);
 	}
@@ -98,30 +89,26 @@ TEST_END
 
 TEST_BEGIN(test_reuse) {
 	bool err;
-	edata_t *ps;
+	hpdata_t *ps;
 
-	edata_t pageslab;
-	memset(&pageslab, 0, sizeof(pageslab));
-	edata_t alloc[PAGESLAB_PAGES];
+	hpdata_t pageslab;
+	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
 
-	edata_init(&pageslab, /* arena_ind */ 0, PAGESLAB_ADDR, PAGESLAB_SIZE,
-	    /* slab */ true, SC_NSIZES, PAGESLAB_SN, extent_state_active,
-	    /* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA,
-	    EXTENT_IS_HEAD);
+	edata_t alloc[HUGEPAGE_PAGES];
 
 	psset_t psset;
 	psset_init(&psset);
 
 	edata_init_test(&alloc[0]);
 	psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
-	for (size_t i = 1; i < PAGESLAB_PAGES; i++) {
+	for (size_t i = 1; i < HUGEPAGE_PAGES; i++) {
 		edata_init_test(&alloc[i]);
 		err = psset_alloc_reuse(&psset, &alloc[i], PAGE);
 		expect_false(err, "Nonempty psset failed page allocation.");
 	}
 
 	/* Free odd indices. */
-	for (size_t i = 0; i < PAGESLAB_PAGES; i ++) {
+	for (size_t i = 0; i < HUGEPAGE_PAGES; i ++) {
 		if (i % 2 == 0) {
 			continue;
 		}
@@ -129,7 +116,7 @@ TEST_BEGIN(test_reuse) {
 		expect_ptr_null(ps, "Nonempty pageslab evicted");
 	}
 	/* Realloc into them. */
-	for (size_t i = 0; i < PAGESLAB_PAGES; i++) {
+	for (size_t i = 0; i < HUGEPAGE_PAGES; i++) {
 		if (i % 2 == 0) {
 			continue;
 		}
@@ -138,7 +125,7 @@ TEST_BEGIN(test_reuse) {
 		edata_expect(&alloc[i], i, 1);
 	}
 	/* Now, free the pages at indices 0 or 1 mod 2. */
-	for (size_t i = 0; i < PAGESLAB_PAGES; i++) {
+	for (size_t i = 0; i < HUGEPAGE_PAGES; i++) {
 		if (i % 4 > 1) {
 			continue;
 		}
@@ -146,7 +133,7 @@ TEST_BEGIN(test_reuse) {
 		expect_ptr_null(ps, "Nonempty pageslab evicted");
 	}
 	/* And realloc 2-page allocations into them. */
-	for (size_t i = 0; i < PAGESLAB_PAGES; i++) {
+	for (size_t i = 0; i < HUGEPAGE_PAGES; i++) {
 		if (i % 4 != 0) {
 			continue;
 		}
@@ -155,7 +142,7 @@ TEST_BEGIN(test_reuse) {
 		edata_expect(&alloc[i], i, 2);
 	}
 	/* Free all the 2-page allocations. */
-	for (size_t i = 0; i < PAGESLAB_PAGES; i++) {
+	for (size_t i = 0; i < HUGEPAGE_PAGES; i++) {
 		if (i % 4 != 0) {
 			continue;
 		}
@@ -175,13 +162,13 @@ TEST_BEGIN(test_reuse) {
 	edata_expect(&alloc[index_of_3], index_of_3, 3);
 
 	/* Free up a 4-page hole at the end. */
-	ps = psset_dalloc(&psset, &alloc[PAGESLAB_PAGES - 1]);
+	ps = psset_dalloc(&psset, &alloc[HUGEPAGE_PAGES - 1]);
 	expect_ptr_null(ps, "Nonempty pageslab evicted");
-	ps = psset_dalloc(&psset, &alloc[PAGESLAB_PAGES - 2]);
+	ps = psset_dalloc(&psset, &alloc[HUGEPAGE_PAGES - 2]);
 	expect_ptr_null(ps, "Nonempty pageslab evicted");
 
 	/* Make sure we can satisfy an allocation at the very end of a slab. */
-	size_t index_of_4 = PAGESLAB_PAGES - 4;
+	size_t index_of_4 = HUGEPAGE_PAGES - 4;
 	ps = psset_dalloc(&psset, &alloc[index_of_4]);
 	expect_ptr_null(ps, "Nonempty pageslab evicted");
 	err = psset_alloc_reuse(&psset, &alloc[index_of_4], 4 * PAGE);
@@ -192,33 +179,31 @@ TEST_END
 
 TEST_BEGIN(test_evict) {
 	bool err;
-	edata_t *ps;
-	edata_t pageslab;
-	memset(&pageslab, 0, sizeof(pageslab));
-	edata_t alloc[PAGESLAB_PAGES];
+	hpdata_t *ps;
+
+	hpdata_t pageslab;
+	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
+
+	edata_t alloc[HUGEPAGE_PAGES];
 
-	edata_init(&pageslab, /* arena_ind */ 0, PAGESLAB_ADDR, PAGESLAB_SIZE,
-	    /* slab */ true, SC_NSIZES, PAGESLAB_SN, extent_state_active,
-	    /* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA,
-	    EXTENT_IS_HEAD);
 	psset_t psset;
 	psset_init(&psset);
 
 	/* Alloc the whole slab. */
 	edata_init_test(&alloc[0]);
 	psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
-	for (size_t i = 1; i < PAGESLAB_PAGES; i++) {
+	for (size_t i = 1; i < HUGEPAGE_PAGES; i++) {
 		edata_init_test(&alloc[i]);
 		err = psset_alloc_reuse(&psset, &alloc[i], PAGE);
 		expect_false(err, "Unxpected allocation failure");
 	}
 
 	/* Dealloc the whole slab, going forwards. */
-	for (size_t i = 0; i < PAGESLAB_PAGES - 1; i++) {
+	for (size_t i = 0; i < HUGEPAGE_PAGES - 1; i++) {
 		ps = psset_dalloc(&psset, &alloc[i]);
 		expect_ptr_null(ps, "Nonempty pageslab evicted");
 	}
-	ps = psset_dalloc(&psset, &alloc[PAGESLAB_PAGES - 1]);
+	ps = psset_dalloc(&psset, &alloc[HUGEPAGE_PAGES - 1]);
 	expect_ptr_eq(&pageslab, ps, "Empty pageslab not evicted.");
 
 	err = psset_alloc_reuse(&psset, &alloc[0], PAGE);
@@ -228,20 +213,15 @@ TEST_END
 
 TEST_BEGIN(test_multi_pageslab) {
 	bool err;
-	edata_t *ps;
-	edata_t pageslab[2];
-	memset(&pageslab, 0, sizeof(pageslab));
-	edata_t alloc[2][PAGESLAB_PAGES];
+	hpdata_t *ps;
 
-	edata_init(&pageslab[0], /* arena_ind */ 0, PAGESLAB_ADDR, PAGESLAB_SIZE,
-	    /* slab */ true, SC_NSIZES, PAGESLAB_SN, extent_state_active,
-	    /* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA,
-	    EXTENT_IS_HEAD);
-	edata_init(&pageslab[1], /* arena_ind */ 0,
-	    (void *)((uintptr_t)PAGESLAB_ADDR + PAGESLAB_SIZE), PAGESLAB_SIZE,
-	    /* slab */ true, SC_NSIZES, PAGESLAB_SN, extent_state_active,
-	    /* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA,
-	    EXTENT_IS_HEAD);
+	hpdata_t pageslab[2];
+	hpdata_init(&pageslab[0], PAGESLAB_ADDR, PAGESLAB_AGE);
+	hpdata_init(&pageslab[1],
+	    (void *)((uintptr_t)PAGESLAB_ADDR + HUGEPAGE),
+	    PAGESLAB_AGE + 1);
+
+	edata_t alloc[2][HUGEPAGE_PAGES];
 
 	psset_t psset;
 	psset_init(&psset);
@@ -254,7 +234,7 @@ TEST_BEGIN(test_multi_pageslab) {
 
 	/* Fill them both up; make sure we do so in first-fit order. */
 	for (size_t i = 0; i < 2; i++) {
-		for (size_t j = 1; j < PAGESLAB_PAGES; j++) {
+		for (size_t j = 1; j < HUGEPAGE_PAGES; j++) {
 			edata_init_test(&alloc[i][j]);
 			err = psset_alloc_reuse(&psset, &alloc[i][j], PAGE);
 			expect_false(err,
@@ -306,10 +286,10 @@ stats_expect_empty(psset_bin_stats_t *stats) {
 
 static void
 stats_expect(psset_t *psset, size_t nactive) {
-	if (nactive == PAGESLAB_PAGES) {
+	if (nactive == HUGEPAGE_PAGES) {
 		expect_zu_eq(1, psset->stats.full_slabs.npageslabs_nonhuge,
 		    "Expected a full slab");
-		expect_zu_eq(PAGESLAB_PAGES,
+		expect_zu_eq(HUGEPAGE_PAGES,
 		    psset->stats.full_slabs.nactive_nonhuge,
 		    "Should have exactly filled the bin");
 		expect_zu_eq(0, psset->stats.full_slabs.ninactive_nonhuge,
@@ -317,9 +297,9 @@ stats_expect(psset_t *psset, size_t nactive) {
 	} else {
 		stats_expect_empty(&psset->stats.full_slabs);
 	}
-	size_t ninactive = PAGESLAB_PAGES - nactive;
+	size_t ninactive = HUGEPAGE_PAGES - nactive;
 	pszind_t nonempty_pind = PSSET_NPSIZES;
-	if (ninactive != 0 && ninactive < PAGESLAB_PAGES) {
+	if (ninactive != 0 && ninactive < HUGEPAGE_PAGES) {
 		nonempty_pind = sz_psz2ind(sz_psz_quantize_floor(
 		    ninactive << LG_PAGE));
 	}
@@ -342,14 +322,11 @@ stats_expect(psset_t *psset, size_t nactive) {
 
 TEST_BEGIN(test_stats) {
 	bool err;
-	edata_t pageslab;
-	memset(&pageslab, 0, sizeof(pageslab));
-	edata_t alloc[PAGESLAB_PAGES];
 
-	edata_init(&pageslab, /* arena_ind */ 0, PAGESLAB_ADDR, PAGESLAB_SIZE,
-	    /* slab */ true, SC_NSIZES, PAGESLAB_SN, extent_state_active,
-	    /* zeroed */ false, /* comitted */ true, EXTENT_PAI_HPA,
-	    EXTENT_IS_HEAD);
+	hpdata_t pageslab;
+	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
+
+	edata_t alloc[HUGEPAGE_PAGES];
 
 	psset_t psset;
 	psset_init(&psset);
@@ -357,15 +334,15 @@ TEST_BEGIN(test_stats) {
 
 	edata_init_test(&alloc[0]);
 	psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
-	for (size_t i = 1; i < PAGESLAB_PAGES; i++) {
+	for (size_t i = 1; i < HUGEPAGE_PAGES; i++) {
 		stats_expect(&psset, i);
 		edata_init_test(&alloc[i]);
 		err = psset_alloc_reuse(&psset, &alloc[i], PAGE);
 		expect_false(err, "Nonempty psset failed page allocation.");
 	}
-	stats_expect(&psset, PAGESLAB_PAGES);
-	edata_t *ps;
-	for (ssize_t i = PAGESLAB_PAGES - 1; i >= 0; i--) {
+	stats_expect(&psset, HUGEPAGE_PAGES);
+	hpdata_t *ps;
+	for (ssize_t i = HUGEPAGE_PAGES - 1; i >= 0; i--) {
 		ps = psset_dalloc(&psset, &alloc[i]);
 		expect_true((ps == NULL) == (i != 0),
 		    "psset_dalloc should only evict a slab on the last free");
@@ -384,37 +361,28 @@ TEST_END
 /*
  * Fills in and inserts two pageslabs, with the first better than the second,
  * and each fully allocated (into the allocations in allocs and worse_allocs,
- * each of which should be PAGESLAB_PAGES long).
+ * each of which should be HUGEPAGE_PAGES long).
  *
  * (There's nothing magic about these numbers; it's just useful to share the
  * setup between the oldest fit and the insert/remove test).
  */
 static void
-init_test_pageslabs(psset_t *psset, edata_t *pageslab, edata_t *worse_pageslab,
-    edata_t *alloc, edata_t *worse_alloc) {
+init_test_pageslabs(psset_t *psset, hpdata_t *pageslab,
+    hpdata_t *worse_pageslab, edata_t *alloc, edata_t *worse_alloc) {
 	bool err;
-	memset(pageslab, 0, sizeof(*pageslab));
-	edata_init(pageslab, /* arena_ind */ 0, (void *)(10 * PAGESLAB_SIZE),
-	    PAGESLAB_SIZE, /* slab */ true, SC_NSIZES, PAGESLAB_SN + 1,
-	    extent_state_active, /* zeroed */ false, /* comitted */ true,
-	    EXTENT_PAI_HPA, EXTENT_IS_HEAD);
 
+	hpdata_init(pageslab, (void *)(10 * HUGEPAGE), PAGESLAB_AGE);
 	/*
-	 * This pageslab is better from an edata_comp_snad POV, but will be
-	 * added to the set after the previous one, and so should be less
-	 * preferred for allocations.
+	 * This pageslab would be better from an address-first-fit POV, but
+	 * better from an age POV.
 	 */
-	memset(worse_pageslab, 0, sizeof(*worse_pageslab));
-	edata_init(worse_pageslab, /* arena_ind */ 0,
-	    (void *)(9 * PAGESLAB_SIZE), PAGESLAB_SIZE, /* slab */ true,
-	    SC_NSIZES, PAGESLAB_SN - 1, extent_state_active, /* zeroed */ false,
-	    /* comitted */ true, EXTENT_PAI_HPA, EXTENT_IS_HEAD);
+	hpdata_init(worse_pageslab, (void *)(9 * HUGEPAGE), PAGESLAB_AGE + 1);
 
 	psset_init(psset);
 
 	edata_init_test(&alloc[0]);
 	psset_alloc_new(psset, pageslab, &alloc[0], PAGE);
-	for (size_t i = 1; i < PAGESLAB_PAGES; i++) {
+	for (size_t i = 1; i < HUGEPAGE_PAGES; i++) {
 		edata_init_test(&alloc[i]);
 		err = psset_alloc_reuse(psset, &alloc[i], PAGE);
 		expect_false(err, "Nonempty psset failed page allocation.");
@@ -430,7 +398,7 @@ init_test_pageslabs(psset_t *psset, edata_t *pageslab, edata_t *worse_pageslab,
 	 * Make the two pssets otherwise indistinguishable; all full except for
 	 * a single page.
 	 */
-	for (size_t i = 1; i < PAGESLAB_PAGES - 1; i++) {
+	for (size_t i = 1; i < HUGEPAGE_PAGES - 1; i++) {
 		edata_init_test(&worse_alloc[i]);
 		err = psset_alloc_reuse(psset, &alloc[i], PAGE);
 		expect_false(err, "Nonempty psset failed page allocation.");
@@ -439,17 +407,17 @@ init_test_pageslabs(psset_t *psset, edata_t *pageslab, edata_t *worse_pageslab,
 	}
 
 	/* Deallocate the last page from the older pageslab. */
-	edata_t *evicted = psset_dalloc(psset, &alloc[PAGESLAB_PAGES - 1]);
+	hpdata_t *evicted = psset_dalloc(psset, &alloc[HUGEPAGE_PAGES - 1]);
 	expect_ptr_null(evicted, "Unexpected eviction");
 }
 
 TEST_BEGIN(test_oldest_fit) {
 	bool err;
-	edata_t alloc[PAGESLAB_PAGES];
-	edata_t worse_alloc[PAGESLAB_PAGES];
+	edata_t alloc[HUGEPAGE_PAGES];
+	edata_t worse_alloc[HUGEPAGE_PAGES];
 
-	edata_t pageslab;
-	edata_t worse_pageslab;
+	hpdata_t pageslab;
+	hpdata_t worse_pageslab;
 
 	psset_t psset;
 
@@ -468,12 +436,12 @@ TEST_END
 
 TEST_BEGIN(test_insert_remove) {
 	bool err;
-	edata_t *ps;
-	edata_t alloc[PAGESLAB_PAGES];
-	edata_t worse_alloc[PAGESLAB_PAGES];
+	hpdata_t *ps;
+	edata_t alloc[HUGEPAGE_PAGES];
+	edata_t worse_alloc[HUGEPAGE_PAGES];
 
-	edata_t pageslab;
-	edata_t worse_pageslab;
+	hpdata_t pageslab;
+	hpdata_t worse_pageslab;
 
 	psset_t psset;
 
@@ -482,31 +450,31 @@ TEST_BEGIN(test_insert_remove) {
 
 	/* Remove better; should still be able to alloc from worse. */
 	psset_remove(&psset, &pageslab);
-	err = psset_alloc_reuse(&psset, &worse_alloc[PAGESLAB_PAGES - 1], PAGE);
+	err = psset_alloc_reuse(&psset, &worse_alloc[HUGEPAGE_PAGES - 1], PAGE);
 	expect_false(err, "Removal should still leave an empty page");
 	expect_ptr_eq(&worse_pageslab,
-	    edata_ps_get(&worse_alloc[PAGESLAB_PAGES - 1]),
+	    edata_ps_get(&worse_alloc[HUGEPAGE_PAGES - 1]),
 	    "Allocated out of wrong ps");
 
 	/*
 	 * After deallocating the previous alloc and reinserting better, it
 	 * should be preferred for future allocations.
 	 */
-	ps = psset_dalloc(&psset, &worse_alloc[PAGESLAB_PAGES - 1]);
+	ps = psset_dalloc(&psset, &worse_alloc[HUGEPAGE_PAGES - 1]);
 	expect_ptr_null(ps, "Incorrect eviction of nonempty pageslab");
 	psset_insert(&psset, &pageslab);
-	err = psset_alloc_reuse(&psset, &alloc[PAGESLAB_PAGES - 1], PAGE);
+	err = psset_alloc_reuse(&psset, &alloc[HUGEPAGE_PAGES - 1], PAGE);
 	expect_false(err, "psset should be nonempty");
-	expect_ptr_eq(&pageslab, edata_ps_get(&alloc[PAGESLAB_PAGES - 1]),
+	expect_ptr_eq(&pageslab, edata_ps_get(&alloc[HUGEPAGE_PAGES - 1]),
 	    "Removal/reinsertion shouldn't change ordering");
 	/*
 	 * After deallocating and removing both, allocations should fail.
 	 */
-	ps = psset_dalloc(&psset, &alloc[PAGESLAB_PAGES - 1]);
+	ps = psset_dalloc(&psset, &alloc[HUGEPAGE_PAGES - 1]);
 	expect_ptr_null(ps, "Incorrect eviction");
 	psset_remove(&psset, &pageslab);
 	psset_remove(&psset, &worse_pageslab);
-	err = psset_alloc_reuse(&psset, &alloc[PAGESLAB_PAGES - 1], PAGE);
+	err = psset_alloc_reuse(&psset, &alloc[HUGEPAGE_PAGES - 1], PAGE);
 	expect_true(err, "psset should be empty, but an alloc succeeded");
 }
 TEST_END

From 089f8fa4429f5e9ee0e679411941ef180e446248 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 18 Nov 2020 14:52:19 -0800
Subject: [PATCH 1930/2608] Move hpdata bitmap logic out of the psset.

---
 include/jemalloc/internal/hpdata.h |  24 +++++--
 src/hpdata.c                       |  96 ++++++++++++++++++++++++++
 src/psset.c                        | 104 ++++-------------------------
 3 files changed, 127 insertions(+), 97 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index c4bf6ef5..7bedaf4b 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -107,13 +107,15 @@ hpdata_longest_free_range_set(hpdata_t *hpdata, size_t longest_free_range) {
 }
 
 static inline void
-hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
-	hpdata_addr_set(hpdata, addr);
-	hpdata_age_set(hpdata, age);
-	hpdata_huge_set(hpdata, false);
-	hpdata_nfree_set(hpdata, HUGEPAGE_PAGES);
-	hpdata_longest_free_range_set(hpdata, HUGEPAGE_PAGES);
-	fb_init(hpdata->active_pages, HUGEPAGE_PAGES);
+hpdata_assert_empty(hpdata_t *hpdata) {
+	assert(fb_empty(hpdata->active_pages, HUGEPAGE_PAGES));
+	assert(hpdata_nfree_get(hpdata) == HUGEPAGE_PAGES);
+}
+
+static inline void
+hpdata_assert_consistent(hpdata_t *hpdata) {
+	assert(fb_urange_longest(hpdata->active_pages, HUGEPAGE_PAGES)
+	    == hpdata_longest_free_range_get(hpdata));
 }
 
 TYPED_LIST(hpdata_list, hpdata_t, ql_link)
@@ -121,4 +123,12 @@ TYPED_LIST(hpdata_list, hpdata_t, ql_link)
 typedef ph(hpdata_t) hpdata_age_heap_t;
 ph_proto(, hpdata_age_heap_, hpdata_age_heap_t, hpdata_t);
 
+void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age);
+/*
+ * Given an hpdata which can serve an allocation request, pick and reserve an
+ * offset within that allocation.
+ */
+size_t hpdata_reserve_alloc(hpdata_t *hpdata, size_t npages);
+void hpdata_unreserve(hpdata_t *hpdata, size_t start, size_t npages);
+
 #endif /* JEMALLOC_INTERNAL_HPDATA_H */
diff --git a/src/hpdata.c b/src/hpdata.c
index bbe3acce..a876a302 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -16,3 +16,99 @@ hpdata_age_comp(const hpdata_t *a, const hpdata_t *b) {
 }
 
 ph_gen(, hpdata_age_heap_, hpdata_age_heap_t, hpdata_t, ph_link, hpdata_age_comp)
+
+
+void
+hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
+	hpdata_addr_set(hpdata, addr);
+	hpdata_age_set(hpdata, age);
+	hpdata_huge_set(hpdata, false);
+	hpdata_nfree_set(hpdata, HUGEPAGE_PAGES);
+	hpdata_longest_free_range_set(hpdata, HUGEPAGE_PAGES);
+	fb_init(hpdata->active_pages, HUGEPAGE_PAGES);
+}
+
+size_t
+hpdata_reserve_alloc(hpdata_t *hpdata, size_t npages) {
+	assert(npages <= hpdata_longest_free_range_get(hpdata));
+
+	size_t result;
+
+	size_t start = 0;
+	/*
+	 * These are dead stores, but the compiler will issue warnings on them
+	 * since it can't tell statically that found is always true below.
+	 */
+	size_t begin = 0;
+	size_t len = 0;
+
+	size_t largest_unchosen_range = 0;
+	while (true) {
+		bool found = fb_urange_iter(hpdata->active_pages,
+		    HUGEPAGE_PAGES, start, &begin, &len);
+		/*
+		 * A precondition to this function is that hpdata must be able
+		 * to serve the allocation.
+		 */
+		assert(found);
+		if (len >= npages) {
+			/*
+			 * We use first-fit within the page slabs; this gives
+			 * bounded worst-case fragmentation within a slab.  It's
+			 * not necessarily right; we could experiment with
+			 * various other options.
+			 */
+			break;
+		}
+		if (len > largest_unchosen_range) {
+			largest_unchosen_range = len;
+		}
+		start = begin + len;
+	}
+	/* We found a range; remember it. */
+	result = begin;
+	fb_set_range(hpdata->active_pages, HUGEPAGE_PAGES, begin, npages);
+	hpdata_nfree_set(hpdata, hpdata_nfree_get(hpdata) - npages);
+
+	/*
+	 * We might have shrunk the longest free range.  We have to keep
+	 * scanning until the end of the hpdata to be sure.
+	 *
+	 * TODO: As an optimization, we should only do this when the range we
+	 * just allocated from was equal to the longest free range size.
+	 */
+	start = begin + npages;
+	while (start < HUGEPAGE_PAGES) {
+		bool found = fb_urange_iter(hpdata->active_pages,
+		    HUGEPAGE_PAGES, start, &begin, &len);
+		if (!found) {
+			break;
+		}
+		if (len > largest_unchosen_range) {
+			largest_unchosen_range = len;
+		}
+		start = begin + len;
+	}
+	hpdata_longest_free_range_set(hpdata, largest_unchosen_range);
+
+	return result;
+}
+
+void
+hpdata_unreserve(hpdata_t *hpdata, size_t begin, size_t npages) {
+	size_t old_longest_range = hpdata_longest_free_range_get(hpdata);
+
+	fb_unset_range(hpdata->active_pages, HUGEPAGE_PAGES, begin, npages);
+	/* We might have just created a new, larger range. */
+	size_t new_begin = (fb_fls(hpdata->active_pages, HUGEPAGE_PAGES,
+	    begin) + 1);
+	size_t new_end = fb_ffs(hpdata->active_pages, HUGEPAGE_PAGES,
+	    begin + npages - 1);
+	size_t new_range_len = new_end - new_begin;
+
+	if (new_range_len > old_longest_range) {
+		hpdata_longest_free_range_set(hpdata, new_range_len);
+	}
+
+	hpdata_nfree_set(hpdata, hpdata_nfree_get(hpdata) + npages);
+}
diff --git a/src/psset.c b/src/psset.c
index cebc1ce8..54188518 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -105,15 +105,9 @@ psset_hpdata_heap_insert(psset_t *psset, pszind_t pind, hpdata_t *ps) {
 	psset_bin_stats_insert(&psset->stats.nonfull_slabs[pind], ps);
 }
 
-JEMALLOC_ALWAYS_INLINE void
-psset_assert_ps_consistent(hpdata_t *ps) {
-	assert(fb_urange_longest(ps->active_pages, HUGEPAGE_PAGES)
-	    == hpdata_longest_free_range_get(ps));
-}
-
 void
 psset_insert(psset_t *psset, hpdata_t *ps) {
-	psset_assert_ps_consistent(ps);
+	hpdata_assert_consistent(ps);
 	size_t longest_free_range = hpdata_longest_free_range_get(ps);
 
 	if (longest_free_range == 0) {
@@ -137,7 +131,7 @@ psset_insert(psset_t *psset, hpdata_t *ps) {
 
 void
 psset_remove(psset_t *psset, hpdata_t *ps) {
-	psset_assert_ps_consistent(ps);
+	hpdata_assert_consistent(ps);
 	size_t longest_free_range = hpdata_longest_free_range_get(ps);
 
 	if (longest_free_range == 0) {
@@ -157,7 +151,7 @@ psset_remove(psset_t *psset, hpdata_t *ps) {
 void
 psset_hugify(psset_t *psset, hpdata_t *ps) {
 	assert(!hpdata_huge_get(ps));
-	psset_assert_ps_consistent(ps);
+	hpdata_assert_consistent(ps);
 
 	size_t longest_free_range = hpdata_longest_free_range_get(ps);
 	psset_bin_stats_t *bin_stats;
@@ -196,7 +190,7 @@ psset_recycle_extract(psset_t *psset, size_t size) {
 		bitmap_set(psset->bitmap, &psset_bitmap_info, pind);
 	}
 
-	psset_assert_ps_consistent(ps);
+	hpdata_assert_consistent(ps);
 	return ps;
 }
 
@@ -207,76 +201,18 @@ psset_recycle_extract(psset_t *psset, size_t size) {
 static void
 psset_ps_alloc_insert(psset_t *psset, hpdata_t *ps, edata_t *r_edata,
     size_t size) {
-	size_t start = 0;
-	/*
-	 * These are dead stores, but the compiler will issue warnings on them
-	 * since it can't tell statically that found is always true below.
-	 */
-	size_t begin = 0;
-	size_t len = 0;
-
-	fb_group_t *ps_fb = ps->active_pages;
-
-	size_t npages = size >> LG_PAGE;
-
-	size_t largest_unchosen_range = 0;
-	while (true) {
-		bool found = fb_urange_iter(ps_fb, HUGEPAGE_PAGES, start,
-		    &begin, &len);
-		/*
-		 * A precondition to this function is that ps must be able to
-		 * serve the allocation.
-		 */
-		assert(found);
-		if (len >= npages) {
-			/*
-			 * We use first-fit within the page slabs; this gives
-			 * bounded worst-case fragmentation within a slab.  It's
-			 * not necessarily right; we could experiment with
-			 * various other options.
-			 */
-			break;
-		}
-		if (len > largest_unchosen_range) {
-			largest_unchosen_range = len;
-		}
-		start = begin + len;
-	}
+	size_t npages = size / PAGE;
+	size_t begin = hpdata_reserve_alloc(ps, npages);
 	uintptr_t addr = (uintptr_t)hpdata_addr_get(ps) + begin * PAGE;
 	edata_init(r_edata, edata_arena_ind_get(r_edata), (void *)addr, size,
 	    /* slab */ false, SC_NSIZES, /* sn */ 0, extent_state_active,
 	    /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
 	    EXTENT_NOT_HEAD);
 	edata_ps_set(r_edata, ps);
-	fb_set_range(ps_fb, HUGEPAGE_PAGES, begin, npages);
-	hpdata_nfree_set(ps, (uint32_t)(hpdata_nfree_get(ps) - npages));
 	/* The pageslab isn't in a bin, so no bin stats need to change. */
 
-	/*
-	 * OK, we've got to put the pageslab back.  First we have to figure out
-	 * where, though; we've only checked run sizes before the pageslab we
-	 * picked.  We also need to look for ones after the one we picked.  Note
-	 * that we want begin + npages as the start position, not begin + len;
-	 * we might not have used the whole range.
-	 *
-	 * TODO: With a little bit more care, we can guarantee that the longest
-	 * free range field in the edata is accurate upon entry, and avoid doing
-	 * this check in the case where we're allocating from some smaller run.
-	 */
-	start = begin + npages;
-	while (start < HUGEPAGE_PAGES) {
-		bool found = fb_urange_iter(ps_fb, HUGEPAGE_PAGES, start, &begin,
-		    &len);
-		if (!found) {
-			break;
-		}
-		if (len > largest_unchosen_range) {
-			largest_unchosen_range = len;
-		}
-		start = begin + len;
-	}
-	hpdata_longest_free_range_set(ps, (uint32_t)largest_unchosen_range);
-	if (largest_unchosen_range == 0) {
+	size_t longest_free_range = hpdata_longest_free_range_get(ps);
+	if (longest_free_range == 0) {
 		psset_bin_stats_insert(&psset->stats.full_slabs, ps);
 	} else {
 		psset_insert(psset, ps);
@@ -295,9 +231,7 @@ psset_alloc_reuse(psset_t *psset, edata_t *r_edata, size_t size) {
 
 void
 psset_alloc_new(psset_t *psset, hpdata_t *ps, edata_t *r_edata, size_t size) {
-	fb_group_t *ps_fb = ps->active_pages;
-	assert(fb_empty(ps_fb, HUGEPAGE_PAGES));
-	assert(hpdata_nfree_get(ps) == HUGEPAGE_PAGES);
+	hpdata_assert_empty(ps);
 	psset_ps_alloc_insert(psset, ps, r_edata, size);
 }
 
@@ -307,7 +241,6 @@ psset_dalloc(psset_t *psset, edata_t *edata) {
 	assert(edata_ps_get(edata) != NULL);
 	hpdata_t *ps = edata_ps_get(edata);
 
-	fb_group_t *ps_fb = ps->active_pages;
 	size_t ps_old_longest_free_range = hpdata_longest_free_range_get(ps);
 	pszind_t old_pind = SC_NPSIZES;
 	if (ps_old_longest_free_range != 0) {
@@ -319,32 +252,23 @@ psset_dalloc(psset_t *psset, edata_t *edata) {
 	    ((uintptr_t)edata_base_get(edata) - (uintptr_t)hpdata_addr_get(ps))
 	    >> LG_PAGE;
 	size_t len = edata_size_get(edata) >> LG_PAGE;
-	fb_unset_range(ps_fb, HUGEPAGE_PAGES, begin, len);
 
 	/* The pageslab is still in the bin; adjust its stats first. */
 	psset_bin_stats_t *bin_stats = (ps_old_longest_free_range == 0
 	    ? &psset->stats.full_slabs : &psset->stats.nonfull_slabs[old_pind]);
 	psset_bin_stats_deactivate(bin_stats, hpdata_huge_get(ps), len);
 
-	hpdata_nfree_set(ps, (uint32_t)(hpdata_nfree_get(ps) + len));
+	hpdata_unreserve(ps, begin, len);
+	size_t ps_new_longest_free_range = hpdata_longest_free_range_get(ps);
 
-	/* We might have just created a new, larger range. */
-	size_t new_begin = (size_t)(fb_fls(ps_fb, HUGEPAGE_PAGES, begin) + 1);
-	size_t new_end = fb_ffs(ps_fb, HUGEPAGE_PAGES, begin + len - 1);
-	size_t new_range_len = new_end - new_begin;
 	/*
 	 * If the new free range is no longer than the previous longest one,
 	 * then the pageslab is non-empty and doesn't need to change bins.
 	 * We're done, and don't need to return a pageslab to evict.
 	 */
-	if (new_range_len <= ps_old_longest_free_range) {
+	if (ps_new_longest_free_range <= ps_old_longest_free_range) {
 		return NULL;
 	}
-	/*
-	 * Otherwise, it might need to get evicted from the set, or change its
-	 * bin.
-	 */
-	hpdata_longest_free_range_set(ps, (uint32_t)new_range_len);
 	/*
 	 * If it was previously non-full, then it's in some (possibly now
 	 * incorrect) bin already; remove it.
@@ -371,12 +295,12 @@ psset_dalloc(psset_t *psset, edata_t *edata) {
 		psset_bin_stats_remove(&psset->stats.full_slabs, ps);
 	}
 	/* If the pageslab is empty, it gets evicted from the set. */
-	if (new_range_len == HUGEPAGE_PAGES) {
+	if (ps_new_longest_free_range == HUGEPAGE_PAGES) {
 		return ps;
 	}
 	/* Otherwise, it gets reinserted. */
 	pszind_t new_pind = sz_psz2ind(sz_psz_quantize_floor(
-	    new_range_len << LG_PAGE));
+	    ps_new_longest_free_range << LG_PAGE));
 	if (hpdata_age_heap_empty(&psset->pageslabs[new_pind])) {
 		bitmap_unset(psset->bitmap, &psset_bitmap_info,
 		    (size_t)new_pind);

From 5228d869ee9af9c547302abe3165bd63f6bdbbf5 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 30 Nov 2020 13:28:54 -0800
Subject: [PATCH 1931/2608] psset: Use fit/insert/remove as basis functions.

All other functionality can be implemented in terms of these; doing so (while
retaining the same API) will be convenient for subsequent refactors.
---
 include/jemalloc/internal/hpdata.h |   5 +
 include/jemalloc/internal/psset.h  |  10 +-
 src/hpa.c                          |  12 ++-
 src/psset.c                        | 160 +++++------------------------
 4 files changed, 43 insertions(+), 144 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 7bedaf4b..d221c577 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -123,6 +123,11 @@ TYPED_LIST(hpdata_list, hpdata_t, ql_link)
 typedef ph(hpdata_t) hpdata_age_heap_t;
 ph_proto(, hpdata_age_heap_, hpdata_age_heap_t, hpdata_t);
 
+static inline bool
+hpdata_empty(hpdata_t *hpdata) {
+	return hpdata_nfree_get(hpdata) == HUGEPAGE_PAGES;
+}
+
 void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age);
 /*
  * Given an hpdata which can serve an allocation request, pick and reserve an
diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index 01b4e80a..c876c5cf 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -64,13 +64,8 @@ void psset_stats_accum(psset_stats_t *dst, psset_stats_t *src);
 void psset_insert(psset_t *psset, hpdata_t *ps);
 void psset_remove(psset_t *psset, hpdata_t *ps);
 
-void psset_hugify(psset_t *psset, hpdata_t *ps);
-
-/*
- * Tries to obtain a chunk from an existing pageslab already in the set.
- * Returns true on failure.
- */
-bool psset_alloc_reuse(psset_t *psset, edata_t *r_edata, size_t size);
+/* Analogous to the eset_fit; pick a hpdata to serve the request. */
+hpdata_t *psset_fit(psset_t *psset, size_t size);
 
 /*
  * Given a newly created pageslab ps (not currently in the set), pass ownership
@@ -79,6 +74,7 @@ bool psset_alloc_reuse(psset_t *psset, edata_t *r_edata, size_t size);
  */
 void psset_alloc_new(psset_t *psset, hpdata_t *ps,
     edata_t *r_edata, size_t size);
+bool psset_alloc_reuse(psset_t *psset, edata_t *r_edata, size_t size);
 
 /*
  * Given an extent that comes from a pageslab in this pageslab set, returns it
diff --git a/src/hpa.c b/src/hpa.c
index 9a190c8a..56149619 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -129,7 +129,7 @@ hpa_hugify(hpdata_t *ps) {
 	assert(hpdata_huge_get(ps));
 	bool err = pages_huge(hpdata_addr_get(ps), HUGEPAGE);
 	/*
-	 * Eat the error; even if the hugeification failed, it's still safe to
+	 * Eat the error; even if the hugification failed, it's still safe to
 	 * pretend it didn't (and would require extraordinary measures to
 	 * unhugify).
 	 */
@@ -233,7 +233,7 @@ hpa_handle_ps_eviction(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
 
 	/*
 	 * We do this unconditionally, even for pages which were not originally
-	 * hugeified; it has the same effect.
+	 * hugified; it has the same effect.
 	 */
 	hpa_dehugify(ps);
 
@@ -293,7 +293,9 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 		 * Do the metadata modification while holding the lock; we'll
 		 * actually change state with the lock dropped.
 		 */
-		psset_hugify(&shard->psset, ps);
+		psset_remove(&shard->psset, ps);
+		hpdata_huge_set(ps, true);
+		psset_insert(&shard->psset, ps);
 	}
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 	if (hugify) {
@@ -463,8 +465,8 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	emap_deregister_boundary(tsdn, shard->emap, edata);
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	/*
-	 * Note that the shard mutex protects the edata hugeified field, too.
-	 * Page slabs can move between pssets (and have their hugeified status
+	 * Note that the shard mutex protects the edata hugified field, too.
+	 * Page slabs can move between pssets (and have their hugified status
 	 * change) in racy ways.
 	 */
 	hpdata_t *evicted_ps = psset_dalloc(&shard->psset, edata);
diff --git a/src/psset.c b/src/psset.c
index 54188518..2e3558c1 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -76,23 +76,6 @@ psset_bin_stats_remove(psset_bin_stats_t *binstats, hpdata_t *ps) {
 	psset_bin_stats_insert_remove(binstats, ps, /* insert */ false);
 }
 
-/*
- * We don't currently need an "activate" equivalent to this, since down the
- * allocation pathways we don't do the optimization in which we change a slab
- * without first removing it from a bin.
- */
-static void
-psset_bin_stats_deactivate(psset_bin_stats_t *binstats, bool huge, size_t num) {
-	size_t *nactive_dst = huge
-	    ? &binstats->nactive_huge : &binstats->nactive_nonhuge;
-	size_t *ninactive_dst = huge
-	    ? &binstats->ninactive_huge : &binstats->ninactive_nonhuge;
-
-	assert(*nactive_dst >= num);
-	*nactive_dst -= num;
-	*ninactive_dst += num;
-}
-
 static void
 psset_hpdata_heap_remove(psset_t *psset, pszind_t pind, hpdata_t *ps) {
 	hpdata_age_heap_remove(&psset->pageslabs[pind], ps);
@@ -148,32 +131,8 @@ psset_remove(psset_t *psset, hpdata_t *ps) {
 	}
 }
 
-void
-psset_hugify(psset_t *psset, hpdata_t *ps) {
-	assert(!hpdata_huge_get(ps));
-	hpdata_assert_consistent(ps);
-
-	size_t longest_free_range = hpdata_longest_free_range_get(ps);
-	psset_bin_stats_t *bin_stats;
-	if (longest_free_range == 0) {
-		bin_stats = &psset->stats.full_slabs;
-	} else {
-		pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
-		    longest_free_range << LG_PAGE));
-		assert(pind < PSSET_NPSIZES);
-		bin_stats = &psset->stats.nonfull_slabs[pind];
-	}
-	psset_bin_stats_remove(bin_stats, ps);
-	hpdata_huge_set(ps, true);
-	psset_bin_stats_insert(bin_stats, ps);
-}
-
-/*
- * Similar to PAC's extent_recycle_extract.  Out of all the pageslabs in the
- * set, picks one that can satisfy the allocation and remove it from the set.
- */
-static hpdata_t *
-psset_recycle_extract(psset_t *psset, size_t size) {
+hpdata_t *
+psset_fit(psset_t *psset, size_t size) {
 	pszind_t min_pind = sz_psz2ind(sz_psz_quantize_ceil(size));
 	pszind_t pind = (pszind_t)bitmap_ffu(psset->bitmap, &psset_bitmap_info,
 	    (size_t)min_pind);
@@ -185,22 +144,14 @@ psset_recycle_extract(psset_t *psset, size_t size) {
 		return NULL;
 	}
 
-	psset_hpdata_heap_remove(psset, pind, ps);
-	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
-		bitmap_set(psset->bitmap, &psset_bitmap_info, pind);
-	}
-
 	hpdata_assert_consistent(ps);
+
 	return ps;
 }
 
-/*
- * Given a pageslab ps and an edata to allocate size bytes from, initializes the
- * edata with a range in the pageslab, and puts ps back in the set.
- */
-static void
-psset_ps_alloc_insert(psset_t *psset, hpdata_t *ps, edata_t *r_edata,
-    size_t size) {
+void
+psset_alloc_new(psset_t *psset, hpdata_t *ps, edata_t *r_edata, size_t size) {
+	hpdata_assert_empty(ps);
 	size_t npages = size / PAGE;
 	size_t begin = hpdata_reserve_alloc(ps, npages);
 	uintptr_t addr = (uintptr_t)hpdata_addr_get(ps) + begin * PAGE;
@@ -209,30 +160,28 @@ psset_ps_alloc_insert(psset_t *psset, hpdata_t *ps, edata_t *r_edata,
 	    /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
 	    EXTENT_NOT_HEAD);
 	edata_ps_set(r_edata, ps);
-	/* The pageslab isn't in a bin, so no bin stats need to change. */
-
-	size_t longest_free_range = hpdata_longest_free_range_get(ps);
-	if (longest_free_range == 0) {
-		psset_bin_stats_insert(&psset->stats.full_slabs, ps);
-	} else {
-		psset_insert(psset, ps);
-	}
+	psset_insert(psset, ps);
 }
 
 bool
 psset_alloc_reuse(psset_t *psset, edata_t *r_edata, size_t size) {
-	hpdata_t *ps = psset_recycle_extract(psset, size);
-	if (ps == NULL) {
-		return true;
-	}
-	psset_ps_alloc_insert(psset, ps, r_edata, size);
-	return false;
-}
+       hpdata_t *ps = psset_fit(psset, size);
+       if (ps == NULL) {
+               return true;
+       }
+       psset_remove(psset, ps);
 
-void
-psset_alloc_new(psset_t *psset, hpdata_t *ps, edata_t *r_edata, size_t size) {
-	hpdata_assert_empty(ps);
-	psset_ps_alloc_insert(psset, ps, r_edata, size);
+       size_t npages = size / PAGE;
+       size_t begin = hpdata_reserve_alloc(ps, npages);
+       uintptr_t addr = (uintptr_t)hpdata_addr_get(ps) + begin * PAGE;
+       edata_init(r_edata, edata_arena_ind_get(r_edata), (void *)addr, size,
+	   /* slab */ false, SC_NSIZES, /* sn */ 0, extent_state_active,
+	   /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
+	   EXTENT_NOT_HEAD);
+       edata_ps_set(r_edata, ps);
+       psset_insert(psset, ps);
+
+       return false;
 }
 
 hpdata_t *
@@ -241,70 +190,17 @@ psset_dalloc(psset_t *psset, edata_t *edata) {
 	assert(edata_ps_get(edata) != NULL);
 	hpdata_t *ps = edata_ps_get(edata);
 
-	size_t ps_old_longest_free_range = hpdata_longest_free_range_get(ps);
-	pszind_t old_pind = SC_NPSIZES;
-	if (ps_old_longest_free_range != 0) {
-		old_pind = sz_psz2ind(sz_psz_quantize_floor(
-		    ps_old_longest_free_range << LG_PAGE));
-	}
-
 	size_t begin =
 	    ((uintptr_t)edata_base_get(edata) - (uintptr_t)hpdata_addr_get(ps))
 	    >> LG_PAGE;
 	size_t len = edata_size_get(edata) >> LG_PAGE;
 
-	/* The pageslab is still in the bin; adjust its stats first. */
-	psset_bin_stats_t *bin_stats = (ps_old_longest_free_range == 0
-	    ? &psset->stats.full_slabs : &psset->stats.nonfull_slabs[old_pind]);
-	psset_bin_stats_deactivate(bin_stats, hpdata_huge_get(ps), len);
-
+	psset_remove(psset, ps);
 	hpdata_unreserve(ps, begin, len);
-	size_t ps_new_longest_free_range = hpdata_longest_free_range_get(ps);
-
-	/*
-	 * If the new free range is no longer than the previous longest one,
-	 * then the pageslab is non-empty and doesn't need to change bins.
-	 * We're done, and don't need to return a pageslab to evict.
-	 */
-	if (ps_new_longest_free_range <= ps_old_longest_free_range) {
+	if (hpdata_empty(ps)) {
+		return ps;
+	} else {
+		psset_insert(psset, ps);
 		return NULL;
 	}
-	/*
-	 * If it was previously non-full, then it's in some (possibly now
-	 * incorrect) bin already; remove it.
-	 *
-	 * TODO: We bailed out early above if we didn't expand the longest free
-	 * range, which should avoid a lot of redundant remove/reinserts in the
-	 * same bin.  But it doesn't eliminate all of them; it's possible that
-	 * we decreased the longest free range length, but only slightly, and
-	 * not enough to change our pszind.  We could check that more precisely.
-	 * (Or, ideally, size class dequantization will happen at some point,
-	 * and the issue becomes moot).
-	 */
-	if (ps_old_longest_free_range > 0) {
-		psset_hpdata_heap_remove(psset, old_pind, ps);
-		if (hpdata_age_heap_empty(&psset->pageslabs[old_pind])) {
-			bitmap_set(psset->bitmap, &psset_bitmap_info,
-			    (size_t)old_pind);
-		}
-	} else {
-		/*
-		 * Otherwise, the bin was full, and we need to adjust the full
-		 * bin stats.
-		 */
-		psset_bin_stats_remove(&psset->stats.full_slabs, ps);
-	}
-	/* If the pageslab is empty, it gets evicted from the set. */
-	if (ps_new_longest_free_range == HUGEPAGE_PAGES) {
-		return ps;
-	}
-	/* Otherwise, it gets reinserted. */
-	pszind_t new_pind = sz_psz2ind(sz_psz_quantize_floor(
-	    ps_new_longest_free_range << LG_PAGE));
-	if (hpdata_age_heap_empty(&psset->pageslabs[new_pind])) {
-		bitmap_unset(psset->bitmap, &psset_bitmap_info,
-		    (size_t)new_pind);
-	}
-	psset_hpdata_heap_insert(psset, new_pind, ps);
-	return NULL;
 }

From 0971e1e4e33edf1cd0d5be808d1eb092ffeab9f3 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 30 Nov 2020 14:34:27 -0800
Subject: [PATCH 1932/2608] hpdata: Use addr/size instead of begin/npages.

This is easier for the users of the hpdata.
---
 include/jemalloc/internal/hpdata.h |  4 ++--
 src/hpdata.c                       | 16 ++++++++++++----
 src/psset.c                        | 20 ++++++--------------
 3 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index d221c577..cb034eae 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -133,7 +133,7 @@ void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age);
  * Given an hpdata which can serve an allocation request, pick and reserve an
  * offset within that allocation.
  */
-size_t hpdata_reserve_alloc(hpdata_t *hpdata, size_t npages);
-void hpdata_unreserve(hpdata_t *hpdata, size_t start, size_t npages);
+void *hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz);
+void hpdata_unreserve(hpdata_t *hpdata, void *begin, size_t sz);
 
 #endif /* JEMALLOC_INTERNAL_HPDATA_H */
diff --git a/src/hpdata.c b/src/hpdata.c
index a876a302..847eb9da 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -28,8 +28,10 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 	fb_init(hpdata->active_pages, HUGEPAGE_PAGES);
 }
 
-size_t
-hpdata_reserve_alloc(hpdata_t *hpdata, size_t npages) {
+void *
+hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
+	assert((sz & PAGE_MASK) == 0);
+	size_t npages = sz >> LG_PAGE;
 	assert(npages <= hpdata_longest_free_range_get(hpdata));
 
 	size_t result;
@@ -91,11 +93,17 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t npages) {
 	}
 	hpdata_longest_free_range_set(hpdata, largest_unchosen_range);
 
-	return result;
+	return (void *)(
+	    (uintptr_t)hpdata_addr_get(hpdata) + (result << LG_PAGE));
 }
 
 void
-hpdata_unreserve(hpdata_t *hpdata, size_t begin, size_t npages) {
+hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
+	assert((sz & PAGE_MASK) == 0);
+	size_t begin = ((uintptr_t)addr - (uintptr_t)hpdata_addr_get(hpdata))
+	    >> LG_PAGE;
+	assert(begin < HUGEPAGE_PAGES);
+	size_t npages = sz >> LG_PAGE;
 	size_t old_longest_range = hpdata_longest_free_range_get(hpdata);
 
 	fb_unset_range(hpdata->active_pages, HUGEPAGE_PAGES, begin, npages);
diff --git a/src/psset.c b/src/psset.c
index 2e3558c1..c31520fd 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -152,10 +152,8 @@ psset_fit(psset_t *psset, size_t size) {
 void
 psset_alloc_new(psset_t *psset, hpdata_t *ps, edata_t *r_edata, size_t size) {
 	hpdata_assert_empty(ps);
-	size_t npages = size / PAGE;
-	size_t begin = hpdata_reserve_alloc(ps, npages);
-	uintptr_t addr = (uintptr_t)hpdata_addr_get(ps) + begin * PAGE;
-	edata_init(r_edata, edata_arena_ind_get(r_edata), (void *)addr, size,
+	void *addr = hpdata_reserve_alloc(ps, size);
+	edata_init(r_edata, edata_arena_ind_get(r_edata), addr, size,
 	    /* slab */ false, SC_NSIZES, /* sn */ 0, extent_state_active,
 	    /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
 	    EXTENT_NOT_HEAD);
@@ -171,10 +169,9 @@ psset_alloc_reuse(psset_t *psset, edata_t *r_edata, size_t size) {
        }
        psset_remove(psset, ps);
 
-       size_t npages = size / PAGE;
-       size_t begin = hpdata_reserve_alloc(ps, npages);
-       uintptr_t addr = (uintptr_t)hpdata_addr_get(ps) + begin * PAGE;
-       edata_init(r_edata, edata_arena_ind_get(r_edata), (void *)addr, size,
+
+       void *addr = hpdata_reserve_alloc(ps, size);
+       edata_init(r_edata, edata_arena_ind_get(r_edata), addr, size,
 	   /* slab */ false, SC_NSIZES, /* sn */ 0, extent_state_active,
 	   /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
 	   EXTENT_NOT_HEAD);
@@ -190,13 +187,8 @@ psset_dalloc(psset_t *psset, edata_t *edata) {
 	assert(edata_ps_get(edata) != NULL);
 	hpdata_t *ps = edata_ps_get(edata);
 
-	size_t begin =
-	    ((uintptr_t)edata_base_get(edata) - (uintptr_t)hpdata_addr_get(ps))
-	    >> LG_PAGE;
-	size_t len = edata_size_get(edata) >> LG_PAGE;
-
 	psset_remove(psset, ps);
-	hpdata_unreserve(ps, begin, len);
+	hpdata_unreserve(ps, edata_base_get(edata), edata_size_get(edata));
 	if (hpdata_empty(ps)) {
 		return ps;
 	} else {

From f9299ca572e976597987a1786ac3c5a173a3dbce Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 30 Nov 2020 15:15:21 -0800
Subject: [PATCH 1933/2608] HPA: Use psset fit/insert/remove.

This will let us remove alloc_new and alloc_reuse functions from the psset.
---
 src/hpa.c | 86 +++++++++++++++++++++++++++----------------------------
 1 file changed, 42 insertions(+), 44 deletions(-)

diff --git a/src/hpa.c b/src/hpa.c
index 56149619..79f97dc7 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -255,12 +255,20 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 	}
 	assert(edata_arena_ind_get(edata) == shard->ind);
 
-	err = psset_alloc_reuse(&shard->psset, edata, size);
-	if (err) {
+	hpdata_t *ps = psset_fit(&shard->psset, size);
+	if (ps == NULL) {
 		edata_cache_small_put(tsdn, &shard->ecs, edata);
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 		return NULL;
 	}
+
+	psset_remove(&shard->psset, ps);
+	void *addr = hpdata_reserve_alloc(ps, size);
+	edata_init(edata, shard->ind, addr, size, /* slab */ false,
+	    SC_NSIZES, /* sn */ 0, extent_state_active, /* zeroed */ false,
+	    /* committed */ true, EXTENT_PAI_HPA, EXTENT_NOT_HEAD);
+	edata_ps_set(edata, ps);
+
 	/*
 	 * This could theoretically be moved outside of the critical section,
 	 * but that introduces the potential for a race.  Without the lock, the
@@ -272,31 +280,21 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 	err = emap_register_boundary(tsdn, shard->emap, edata,
 	    SC_NSIZES, /* slab */ false);
 	if (err) {
-		hpdata_t *ps = psset_dalloc(&shard->psset, edata);
-		/*
-		 * The pageslab was nonempty before we started; it
-		 * should still be nonempty now, and so shouldn't get
-		 * evicted.
-		 */
-		assert(ps == NULL);
+		hpdata_unreserve(ps, edata_addr_get(edata),
+		    edata_size_get(edata));
+		psset_insert(&shard->psset, ps);
 		edata_cache_small_put(tsdn, &shard->ecs, edata);
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 		*oom = true;
 		return NULL;
 	}
 
-	hpdata_t *ps = edata_ps_get(edata);
-	assert(ps != NULL);
 	bool hugify = hpa_should_hugify(shard, ps);
 	if (hugify) {
-		/*
-		 * Do the metadata modification while holding the lock; we'll
-		 * actually change state with the lock dropped.
-		 */
-		psset_remove(&shard->psset, ps);
 		hpdata_huge_set(ps, true);
-		psset_insert(&shard->psset, ps);
 	}
+	psset_insert(&shard->psset, ps);
+
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 	if (hugify) {
 		/*
@@ -345,8 +343,8 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 	 * deallocations (and allocations of smaller sizes) may still succeed
 	 * while we're doing this potentially expensive system call.
 	 */
-	hpdata_t *grow_ps = hpa_grow(tsdn, shard);
-	if (grow_ps == NULL) {
+	hpdata_t *ps = hpa_grow(tsdn, shard);
+	if (ps == NULL) {
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 		return NULL;
 	}
@@ -357,19 +355,21 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 	if (edata == NULL) {
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
-		hpa_handle_ps_eviction(tsdn, shard, grow_ps);
+		hpa_handle_ps_eviction(tsdn, shard, ps);
 		return NULL;
 	}
-	psset_alloc_new(&shard->psset, grow_ps, edata, size);
+
+	void *addr = hpdata_reserve_alloc(ps, size);
+	edata_init(edata, shard->ind, addr, size, /* slab */ false,
+	    SC_NSIZES, /* sn */ 0, extent_state_active, /* zeroed */ false,
+	    /* committed */ true, EXTENT_PAI_HPA, EXTENT_NOT_HEAD);
+	edata_ps_set(edata, ps);
+
 	err = emap_register_boundary(tsdn, shard->emap, edata,
 	    SC_NSIZES, /* slab */ false);
 	if (err) {
-		hpdata_t *ps = psset_dalloc(&shard->psset, edata);
-		/*
-		 * The pageslab was empty except for the new allocation; it
-		 * should get evicted.
-		 */
-		assert(ps == grow_ps);
+		hpdata_unreserve(ps, edata_addr_get(edata),
+		    edata_size_get(edata));
 		edata_cache_small_put(tsdn, &shard->ecs, edata);
 		/*
 		 * Technically the same as fallthrough at the time of this
@@ -381,6 +381,8 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 		hpa_handle_ps_eviction(tsdn, shard, ps);
 		return NULL;
 	}
+	psset_insert(&shard->psset, ps);
+
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 	malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 	return edata;
@@ -464,21 +466,18 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	assert(ps != NULL);
 	emap_deregister_boundary(tsdn, shard->emap, edata);
 	malloc_mutex_lock(tsdn, &shard->mtx);
-	/*
-	 * Note that the shard mutex protects the edata hugified field, too.
-	 * Page slabs can move between pssets (and have their hugified status
-	 * change) in racy ways.
-	 */
-	hpdata_t *evicted_ps = psset_dalloc(&shard->psset, edata);
-	/*
-	 * If a pageslab became empty because of the dalloc, it better have been
-	 * the one we expected.
-	 */
-	assert(evicted_ps == NULL || evicted_ps == ps);
+
+	/* Note that the shard mutex protects ps's metadata too. */
+	psset_remove(&shard->psset, ps);
+	hpdata_unreserve(ps, edata_addr_get(edata), edata_size_get(edata));
+
 	edata_cache_small_put(tsdn, &shard->ecs, edata);
-	malloc_mutex_unlock(tsdn, &shard->mtx);
-	if (evicted_ps != NULL) {
-		hpa_handle_ps_eviction(tsdn, shard, evicted_ps);
+	if (hpdata_empty(ps)) {
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+		hpa_handle_ps_eviction(tsdn, shard, ps);
+	} else {
+		psset_insert(&shard->psset, ps);
+		malloc_mutex_unlock(tsdn, &shard->mtx);
 	}
 }
 
@@ -501,10 +500,9 @@ hpa_shard_assert_stats_empty(psset_bin_stats_t *bin_stats) {
 
 static void
 hpa_assert_empty(tsdn_t *tsdn, hpa_shard_t *shard, psset_t *psset) {
-	edata_t edata = {0};
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
-	bool psset_empty = psset_alloc_reuse(psset, &edata, PAGE);
-	assert(psset_empty);
+	hpdata_t *ps = psset_fit(psset, PAGE);
+	assert(ps == NULL);
 	hpa_shard_assert_stats_empty(&psset->stats.full_slabs);
 	for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
 		hpa_shard_assert_stats_empty(

From f7cf23aa4d7c266af512c599205b1fab80b26796 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 30 Nov 2020 16:10:56 -0800
Subject: [PATCH 1934/2608] psset: Relegate alloc/dalloc to test code.

This is no longer part of the "core" functionality; we only need the stub
implementations as an end-to-end test of hpdata + psset interactions when
metadata is being modified.  Treat them accordingly.
---
 include/jemalloc/internal/psset.h |  19 ----
 src/psset.c                       |  49 +----------
 test/unit/psset.c                 | 140 ++++++++++++++++++++----------
 3 files changed, 94 insertions(+), 114 deletions(-)

diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index c876c5cf..7027cff7 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -67,23 +67,4 @@ void psset_remove(psset_t *psset, hpdata_t *ps);
 /* Analogous to the eset_fit; pick a hpdata to serve the request. */
 hpdata_t *psset_fit(psset_t *psset, size_t size);
 
-/*
- * Given a newly created pageslab ps (not currently in the set), pass ownership
- * to the psset and allocate an extent from within it.  The passed-in pageslab
- * must be at least as big as size.
- */
-void psset_alloc_new(psset_t *psset, hpdata_t *ps,
-    edata_t *r_edata, size_t size);
-bool psset_alloc_reuse(psset_t *psset, edata_t *r_edata, size_t size);
-
-/*
- * Given an extent that comes from a pageslab in this pageslab set, returns it
- * to its slab.  Does not take ownership of the underlying edata_t.
- *
- * If some slab becomes empty as a result of the dalloc, it is retuend -- the
- * result must be checked and deallocated to the central HPA.  Otherwise returns
- * NULL.
- */
-hpdata_t *psset_dalloc(psset_t *psset, edata_t *edata);
-
 #endif /* JEMALLOC_INTERNAL_PSSET_H */
diff --git a/src/psset.c b/src/psset.c
index c31520fd..7a5bd604 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -90,6 +90,7 @@ psset_hpdata_heap_insert(psset_t *psset, pszind_t pind, hpdata_t *ps) {
 
 void
 psset_insert(psset_t *psset, hpdata_t *ps) {
+	assert(!hpdata_empty(ps));
 	hpdata_assert_consistent(ps);
 	size_t longest_free_range = hpdata_longest_free_range_get(ps);
 
@@ -148,51 +149,3 @@ psset_fit(psset_t *psset, size_t size) {
 
 	return ps;
 }
-
-void
-psset_alloc_new(psset_t *psset, hpdata_t *ps, edata_t *r_edata, size_t size) {
-	hpdata_assert_empty(ps);
-	void *addr = hpdata_reserve_alloc(ps, size);
-	edata_init(r_edata, edata_arena_ind_get(r_edata), addr, size,
-	    /* slab */ false, SC_NSIZES, /* sn */ 0, extent_state_active,
-	    /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
-	    EXTENT_NOT_HEAD);
-	edata_ps_set(r_edata, ps);
-	psset_insert(psset, ps);
-}
-
-bool
-psset_alloc_reuse(psset_t *psset, edata_t *r_edata, size_t size) {
-       hpdata_t *ps = psset_fit(psset, size);
-       if (ps == NULL) {
-               return true;
-       }
-       psset_remove(psset, ps);
-
-
-       void *addr = hpdata_reserve_alloc(ps, size);
-       edata_init(r_edata, edata_arena_ind_get(r_edata), addr, size,
-	   /* slab */ false, SC_NSIZES, /* sn */ 0, extent_state_active,
-	   /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
-	   EXTENT_NOT_HEAD);
-       edata_ps_set(r_edata, ps);
-       psset_insert(psset, ps);
-
-       return false;
-}
-
-hpdata_t *
-psset_dalloc(psset_t *psset, edata_t *edata) {
-	assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
-	assert(edata_ps_get(edata) != NULL);
-	hpdata_t *ps = edata_ps_get(edata);
-
-	psset_remove(psset, ps);
-	hpdata_unreserve(ps, edata_base_get(edata), edata_size_get(edata));
-	if (hpdata_empty(ps)) {
-		return ps;
-	} else {
-		psset_insert(psset, ps);
-		return NULL;
-	}
-}
diff --git a/test/unit/psset.c b/test/unit/psset.c
index 811c7be1..4147729c 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -15,6 +15,49 @@ edata_init_test(edata_t *edata) {
 	edata_esn_set(edata, ALLOC_ESN);
 }
 
+static void
+test_psset_alloc_new(psset_t *psset, hpdata_t *ps, edata_t *r_edata,
+    size_t size) {
+	hpdata_assert_empty(ps);
+        void *addr = hpdata_reserve_alloc(ps, size);
+        edata_init(r_edata, edata_arena_ind_get(r_edata), addr, size,
+	    /* slab */ false, SC_NSIZES, /* sn */ 0, extent_state_active,
+            /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
+            EXTENT_NOT_HEAD);
+        edata_ps_set(r_edata, ps);
+	psset_insert(psset, ps);
+}
+
+static bool
+test_psset_alloc_reuse(psset_t *psset, edata_t *r_edata, size_t size) {
+	hpdata_t *ps = psset_fit(psset, size);
+	if (ps == NULL) {
+		return true;
+	}
+	psset_remove(psset, ps);
+	void *addr = hpdata_reserve_alloc(ps, size);
+	edata_init(r_edata, edata_arena_ind_get(r_edata), addr, size,
+	    /* slab */ false, SC_NSIZES, /* sn */ 0, extent_state_active,
+	    /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
+	    EXTENT_NOT_HEAD);
+	edata_ps_set(r_edata, ps);
+	psset_insert(psset, ps);
+	return false;
+}
+
+static hpdata_t *
+test_psset_dalloc(psset_t *psset, edata_t *edata) {
+	hpdata_t *ps = edata_ps_get(edata);
+	psset_remove(psset, ps);
+	hpdata_unreserve(ps, edata_addr_get(edata), edata_size_get(edata));
+	if (hpdata_empty(ps)) {
+		return ps;
+	} else {
+		psset_insert(psset, ps);
+		return NULL;
+	}
+}
+
 static void
 edata_expect(edata_t *edata, size_t page_offset, size_t page_cnt) {
 	/*
@@ -50,7 +93,7 @@ TEST_BEGIN(test_empty) {
 	psset_init(&psset);
 
 	/* Empty psset should return fail allocations. */
-	err = psset_alloc_reuse(&psset, &alloc, PAGE);
+	err = test_psset_alloc_reuse(&psset, &alloc, PAGE);
 	expect_true(err, "Empty psset succeeded in an allocation.");
 }
 TEST_END
@@ -67,10 +110,10 @@ TEST_BEGIN(test_fill) {
 	psset_init(&psset);
 
 	edata_init_test(&alloc[0]);
-	psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
+	test_psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
 	for (size_t i = 1; i < HUGEPAGE_PAGES; i++) {
 		edata_init_test(&alloc[i]);
-		err = psset_alloc_reuse(&psset, &alloc[i], PAGE);
+		err = test_psset_alloc_reuse(&psset, &alloc[i], PAGE);
 		expect_false(err, "Nonempty psset failed page allocation.");
 	}
 
@@ -82,7 +125,7 @@ TEST_BEGIN(test_fill) {
 	/* The pageslab, and thus psset, should now have no allocations. */
 	edata_t extra_alloc;
 	edata_init_test(&extra_alloc);
-	err = psset_alloc_reuse(&psset, &extra_alloc, PAGE);
+	err = test_psset_alloc_reuse(&psset, &extra_alloc, PAGE);
 	expect_true(err, "Alloc succeeded even though psset should be empty");
 }
 TEST_END
@@ -100,10 +143,10 @@ TEST_BEGIN(test_reuse) {
 	psset_init(&psset);
 
 	edata_init_test(&alloc[0]);
-	psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
+	test_psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
 	for (size_t i = 1; i < HUGEPAGE_PAGES; i++) {
 		edata_init_test(&alloc[i]);
-		err = psset_alloc_reuse(&psset, &alloc[i], PAGE);
+		err = test_psset_alloc_reuse(&psset, &alloc[i], PAGE);
 		expect_false(err, "Nonempty psset failed page allocation.");
 	}
 
@@ -112,7 +155,7 @@ TEST_BEGIN(test_reuse) {
 		if (i % 2 == 0) {
 			continue;
 		}
-		ps = psset_dalloc(&psset, &alloc[i]);
+		ps = test_psset_dalloc(&psset, &alloc[i]);
 		expect_ptr_null(ps, "Nonempty pageslab evicted");
 	}
 	/* Realloc into them. */
@@ -120,7 +163,7 @@ TEST_BEGIN(test_reuse) {
 		if (i % 2 == 0) {
 			continue;
 		}
-		err = psset_alloc_reuse(&psset, &alloc[i], PAGE);
+		err = test_psset_alloc_reuse(&psset, &alloc[i], PAGE);
 		expect_false(err, "Nonempty psset failed page allocation.");
 		edata_expect(&alloc[i], i, 1);
 	}
@@ -129,7 +172,7 @@ TEST_BEGIN(test_reuse) {
 		if (i % 4 > 1) {
 			continue;
 		}
-		ps = psset_dalloc(&psset, &alloc[i]);
+		ps = test_psset_dalloc(&psset, &alloc[i]);
 		expect_ptr_null(ps, "Nonempty pageslab evicted");
 	}
 	/* And realloc 2-page allocations into them. */
@@ -137,7 +180,7 @@ TEST_BEGIN(test_reuse) {
 		if (i % 4 != 0) {
 			continue;
 		}
-		err = psset_alloc_reuse(&psset, &alloc[i], 2 * PAGE);
+		err = test_psset_alloc_reuse(&psset, &alloc[i], 2 * PAGE);
 		expect_false(err, "Nonempty psset failed page allocation.");
 		edata_expect(&alloc[i], i, 2);
 	}
@@ -146,7 +189,7 @@ TEST_BEGIN(test_reuse) {
 		if (i % 4 != 0) {
 			continue;
 		}
-		ps = psset_dalloc(&psset, &alloc[i]);
+		ps = test_psset_dalloc(&psset, &alloc[i]);
 		expect_ptr_null(ps, "Nonempty pageslab evicted");
 	}
 	/*
@@ -155,23 +198,23 @@ TEST_BEGIN(test_reuse) {
 	 * (since 12 % 4 == 0).
 	 */
 	size_t index_of_3 = 11;
-	ps = psset_dalloc(&psset, &alloc[index_of_3]);
+	ps = test_psset_dalloc(&psset, &alloc[index_of_3]);
 	expect_ptr_null(ps, "Nonempty pageslab evicted");
-	err = psset_alloc_reuse(&psset, &alloc[index_of_3], 3 * PAGE);
+	err = test_psset_alloc_reuse(&psset, &alloc[index_of_3], 3 * PAGE);
 	expect_false(err, "Should have been able to find alloc.");
 	edata_expect(&alloc[index_of_3], index_of_3, 3);
 
 	/* Free up a 4-page hole at the end. */
-	ps = psset_dalloc(&psset, &alloc[HUGEPAGE_PAGES - 1]);
+	ps = test_psset_dalloc(&psset, &alloc[HUGEPAGE_PAGES - 1]);
 	expect_ptr_null(ps, "Nonempty pageslab evicted");
-	ps = psset_dalloc(&psset, &alloc[HUGEPAGE_PAGES - 2]);
+	ps = test_psset_dalloc(&psset, &alloc[HUGEPAGE_PAGES - 2]);
 	expect_ptr_null(ps, "Nonempty pageslab evicted");
 
 	/* Make sure we can satisfy an allocation at the very end of a slab. */
 	size_t index_of_4 = HUGEPAGE_PAGES - 4;
-	ps = psset_dalloc(&psset, &alloc[index_of_4]);
+	ps = test_psset_dalloc(&psset, &alloc[index_of_4]);
 	expect_ptr_null(ps, "Nonempty pageslab evicted");
-	err = psset_alloc_reuse(&psset, &alloc[index_of_4], 4 * PAGE);
+	err = test_psset_alloc_reuse(&psset, &alloc[index_of_4], 4 * PAGE);
 	expect_false(err, "Should have been able to find alloc.");
 	edata_expect(&alloc[index_of_4], index_of_4, 4);
 }
@@ -191,22 +234,22 @@ TEST_BEGIN(test_evict) {
 
 	/* Alloc the whole slab. */
 	edata_init_test(&alloc[0]);
-	psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
+	test_psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
 	for (size_t i = 1; i < HUGEPAGE_PAGES; i++) {
 		edata_init_test(&alloc[i]);
-		err = psset_alloc_reuse(&psset, &alloc[i], PAGE);
+		err = test_psset_alloc_reuse(&psset, &alloc[i], PAGE);
 		expect_false(err, "Unxpected allocation failure");
 	}
 
 	/* Dealloc the whole slab, going forwards. */
 	for (size_t i = 0; i < HUGEPAGE_PAGES - 1; i++) {
-		ps = psset_dalloc(&psset, &alloc[i]);
+		ps = test_psset_dalloc(&psset, &alloc[i]);
 		expect_ptr_null(ps, "Nonempty pageslab evicted");
 	}
-	ps = psset_dalloc(&psset, &alloc[HUGEPAGE_PAGES - 1]);
+	ps = test_psset_dalloc(&psset, &alloc[HUGEPAGE_PAGES - 1]);
 	expect_ptr_eq(&pageslab, ps, "Empty pageslab not evicted.");
 
-	err = psset_alloc_reuse(&psset, &alloc[0], PAGE);
+	err = test_psset_alloc_reuse(&psset, &alloc[0], PAGE);
 	expect_true(err, "psset should be empty.");
 }
 TEST_END
@@ -228,15 +271,15 @@ TEST_BEGIN(test_multi_pageslab) {
 
 	/* Insert both slabs. */
 	edata_init_test(&alloc[0][0]);
-	psset_alloc_new(&psset, &pageslab[0], &alloc[0][0], PAGE);
+	test_psset_alloc_new(&psset, &pageslab[0], &alloc[0][0], PAGE);
 	edata_init_test(&alloc[1][0]);
-	psset_alloc_new(&psset, &pageslab[1], &alloc[1][0], PAGE);
+	test_psset_alloc_new(&psset, &pageslab[1], &alloc[1][0], PAGE);
 
 	/* Fill them both up; make sure we do so in first-fit order. */
 	for (size_t i = 0; i < 2; i++) {
 		for (size_t j = 1; j < HUGEPAGE_PAGES; j++) {
 			edata_init_test(&alloc[i][j]);
-			err = psset_alloc_reuse(&psset, &alloc[i][j], PAGE);
+			err = test_psset_alloc_reuse(&psset, &alloc[i][j], PAGE);
 			expect_false(err,
 			    "Nonempty psset failed page allocation.");
 			assert_ptr_eq(&pageslab[i], edata_ps_get(&alloc[i][j]),
@@ -248,13 +291,13 @@ TEST_BEGIN(test_multi_pageslab) {
 	 * Free up a 2-page hole in the earlier slab, and a 1-page one in the
 	 * later one.  We should still pick the later one.
 	 */
-	ps = psset_dalloc(&psset, &alloc[0][0]);
+	ps = test_psset_dalloc(&psset, &alloc[0][0]);
 	expect_ptr_null(ps, "Unexpected eviction");
-	ps = psset_dalloc(&psset, &alloc[0][1]);
+	ps = test_psset_dalloc(&psset, &alloc[0][1]);
 	expect_ptr_null(ps, "Unexpected eviction");
-	ps = psset_dalloc(&psset, &alloc[1][0]);
+	ps = test_psset_dalloc(&psset, &alloc[1][0]);
 	expect_ptr_null(ps, "Unexpected eviction");
-	err = psset_alloc_reuse(&psset, &alloc[0][0], PAGE);
+	err = test_psset_alloc_reuse(&psset, &alloc[0][0], PAGE);
 	expect_ptr_eq(&pageslab[1], edata_ps_get(&alloc[0][0]),
 	    "Should have picked the fuller pageslab");
 
@@ -262,14 +305,14 @@ TEST_BEGIN(test_multi_pageslab) {
 	 * Now both slabs have 1-page holes. Free up a second one in the later
 	 * slab.
 	 */
-	ps = psset_dalloc(&psset, &alloc[1][1]);
+	ps = test_psset_dalloc(&psset, &alloc[1][1]);
 	expect_ptr_null(ps, "Unexpected eviction");
 
 	/*
 	 * We should be able to allocate a 2-page object, even though an earlier
 	 * size class is nonempty.
 	 */
-	err = psset_alloc_reuse(&psset, &alloc[1][0], 2 * PAGE);
+	err = test_psset_alloc_reuse(&psset, &alloc[1][0], 2 * PAGE);
 	expect_false(err, "Allocation should have succeeded");
 }
 TEST_END
@@ -333,23 +376,24 @@ TEST_BEGIN(test_stats) {
 	stats_expect(&psset, 0);
 
 	edata_init_test(&alloc[0]);
-	psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
+	test_psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
 	for (size_t i = 1; i < HUGEPAGE_PAGES; i++) {
 		stats_expect(&psset, i);
 		edata_init_test(&alloc[i]);
-		err = psset_alloc_reuse(&psset, &alloc[i], PAGE);
+		err = test_psset_alloc_reuse(&psset, &alloc[i], PAGE);
 		expect_false(err, "Nonempty psset failed page allocation.");
 	}
 	stats_expect(&psset, HUGEPAGE_PAGES);
 	hpdata_t *ps;
 	for (ssize_t i = HUGEPAGE_PAGES - 1; i >= 0; i--) {
-		ps = psset_dalloc(&psset, &alloc[i]);
+		ps = test_psset_dalloc(&psset, &alloc[i]);
 		expect_true((ps == NULL) == (i != 0),
-		    "psset_dalloc should only evict a slab on the last free");
+		    "test_psset_dalloc should only evict a slab on the last "
+		    "free");
 		stats_expect(&psset, i);
 	}
 
-	psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
+	test_psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
 	stats_expect(&psset, 1);
 	psset_remove(&psset, &pageslab);
 	stats_expect(&psset, 0);
@@ -381,17 +425,17 @@ init_test_pageslabs(psset_t *psset, hpdata_t *pageslab,
 	psset_init(psset);
 
 	edata_init_test(&alloc[0]);
-	psset_alloc_new(psset, pageslab, &alloc[0], PAGE);
+	test_psset_alloc_new(psset, pageslab, &alloc[0], PAGE);
 	for (size_t i = 1; i < HUGEPAGE_PAGES; i++) {
 		edata_init_test(&alloc[i]);
-		err = psset_alloc_reuse(psset, &alloc[i], PAGE);
+		err = test_psset_alloc_reuse(psset, &alloc[i], PAGE);
 		expect_false(err, "Nonempty psset failed page allocation.");
 		expect_ptr_eq(pageslab, edata_ps_get(&alloc[i]),
 		    "Allocated from the wrong pageslab");
 	}
 
 	edata_init_test(&worse_alloc[0]);
-	psset_alloc_new(psset, worse_pageslab, &worse_alloc[0], PAGE);
+	test_psset_alloc_new(psset, worse_pageslab, &worse_alloc[0], PAGE);
 	expect_ptr_eq(worse_pageslab, edata_ps_get(&worse_alloc[0]),
 	    "Allocated from the wrong pageslab");
 	/*
@@ -400,14 +444,15 @@ init_test_pageslabs(psset_t *psset, hpdata_t *pageslab,
 	 */
 	for (size_t i = 1; i < HUGEPAGE_PAGES - 1; i++) {
 		edata_init_test(&worse_alloc[i]);
-		err = psset_alloc_reuse(psset, &alloc[i], PAGE);
+		err = test_psset_alloc_reuse(psset, &alloc[i], PAGE);
 		expect_false(err, "Nonempty psset failed page allocation.");
 		expect_ptr_eq(worse_pageslab, edata_ps_get(&alloc[i]),
 		    "Allocated from the wrong pageslab");
 	}
 
 	/* Deallocate the last page from the older pageslab. */
-	hpdata_t *evicted = psset_dalloc(psset, &alloc[HUGEPAGE_PAGES - 1]);
+	hpdata_t *evicted = test_psset_dalloc(psset,
+	    &alloc[HUGEPAGE_PAGES - 1]);
 	expect_ptr_null(evicted, "Unexpected eviction");
 }
 
@@ -427,7 +472,7 @@ TEST_BEGIN(test_oldest_fit) {
 	/* The edata should come from the better pageslab. */
 	edata_t test_edata;
 	edata_init_test(&test_edata);
-	err = psset_alloc_reuse(&psset, &test_edata, PAGE);
+	err = test_psset_alloc_reuse(&psset, &test_edata, PAGE);
 	expect_false(err, "Nonempty psset failed page allocation");
 	expect_ptr_eq(&pageslab, edata_ps_get(&test_edata),
 	    "Allocated from the wrong pageslab");
@@ -450,7 +495,8 @@ TEST_BEGIN(test_insert_remove) {
 
 	/* Remove better; should still be able to alloc from worse. */
 	psset_remove(&psset, &pageslab);
-	err = psset_alloc_reuse(&psset, &worse_alloc[HUGEPAGE_PAGES - 1], PAGE);
+	err = test_psset_alloc_reuse(&psset, &worse_alloc[HUGEPAGE_PAGES - 1],
+	    PAGE);
 	expect_false(err, "Removal should still leave an empty page");
 	expect_ptr_eq(&worse_pageslab,
 	    edata_ps_get(&worse_alloc[HUGEPAGE_PAGES - 1]),
@@ -460,21 +506,21 @@ TEST_BEGIN(test_insert_remove) {
 	 * After deallocating the previous alloc and reinserting better, it
 	 * should be preferred for future allocations.
 	 */
-	ps = psset_dalloc(&psset, &worse_alloc[HUGEPAGE_PAGES - 1]);
+	ps = test_psset_dalloc(&psset, &worse_alloc[HUGEPAGE_PAGES - 1]);
 	expect_ptr_null(ps, "Incorrect eviction of nonempty pageslab");
 	psset_insert(&psset, &pageslab);
-	err = psset_alloc_reuse(&psset, &alloc[HUGEPAGE_PAGES - 1], PAGE);
+	err = test_psset_alloc_reuse(&psset, &alloc[HUGEPAGE_PAGES - 1], PAGE);
 	expect_false(err, "psset should be nonempty");
 	expect_ptr_eq(&pageslab, edata_ps_get(&alloc[HUGEPAGE_PAGES - 1]),
 	    "Removal/reinsertion shouldn't change ordering");
 	/*
 	 * After deallocating and removing both, allocations should fail.
 	 */
-	ps = psset_dalloc(&psset, &alloc[HUGEPAGE_PAGES - 1]);
+	ps = test_psset_dalloc(&psset, &alloc[HUGEPAGE_PAGES - 1]);
 	expect_ptr_null(ps, "Incorrect eviction");
 	psset_remove(&psset, &pageslab);
 	psset_remove(&psset, &worse_pageslab);
-	err = psset_alloc_reuse(&psset, &alloc[HUGEPAGE_PAGES - 1], PAGE);
+	err = test_psset_alloc_reuse(&psset, &alloc[HUGEPAGE_PAGES - 1], PAGE);
 	expect_true(err, "psset should be empty, but an alloc succeeded");
 }
 TEST_END

From fffcefed338429b43ad29a185067f976fe564d11 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 30 Nov 2020 17:25:54 -0800
Subject: [PATCH 1935/2608] malloc_conf: Clarify HPA options.

---
 src/jemalloc.c | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 277b9e72..30c2fe16 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -137,8 +137,8 @@ malloc_mutex_t arenas_lock;
 
 /* The global hpa, and whether it's on. */
 bool opt_hpa = false;
-size_t opt_hpa_slab_goal = 128 * 1024;
 size_t opt_hpa_slab_max_alloc = 256 * 1024;
+size_t opt_hpa_slab_goal = 128 * 1024;
 size_t opt_hpa_small_max = 32 * 1024;
 size_t opt_hpa_large_min = 4 * 1024 * 1024;
 
@@ -1495,20 +1495,9 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 					   CONF_CHECK_MIN, CONF_CHECK_MAX,
 					   true);
 			CONF_HANDLE_BOOL(opt_hpa, "hpa")
-			/*
-			 * If someone violates these mins and maxes, they're
-			 * confused.
-			 */
-			CONF_HANDLE_SIZE_T(opt_hpa_slab_goal, "hpa_slab_goal",
-			    PAGE, 512 * PAGE, CONF_CHECK_MIN, CONF_CHECK_MAX,
-			    true)
 			CONF_HANDLE_SIZE_T(opt_hpa_slab_max_alloc,
 			    "hpa_slab_max_alloc", PAGE, 512 * PAGE,
 			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
-			CONF_HANDLE_SIZE_T(opt_hpa_small_max, "hpa_small_max",
-			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
-			CONF_HANDLE_SIZE_T(opt_hpa_large_min, "hpa_large_min",
-			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
 
 			CONF_HANDLE_SIZE_T(opt_hpa_sec_max_alloc, "hpa_sec_max_alloc",
 			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
@@ -1517,6 +1506,21 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			CONF_HANDLE_SIZE_T(opt_hpa_sec_nshards, "hpa_sec_nshards",
 			    0, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
 
+			/*
+			 * These no longer have any effect, but various
+			 * non-public test configs set them as we iterate on HPA
+			 * development.  We parse and report them for now, but
+			 * they don't affect behavior.  Eventually they'll be
+			 * removed.
+			 */
+			CONF_HANDLE_SIZE_T(opt_hpa_slab_goal, "hpa_slab_goal",
+			    PAGE, 512 * PAGE, CONF_CHECK_MIN, CONF_CHECK_MAX,
+			    true)
+			CONF_HANDLE_SIZE_T(opt_hpa_small_max, "hpa_small_max",
+			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
+			CONF_HANDLE_SIZE_T(opt_hpa_large_min, "hpa_large_min",
+			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
+
 			if (CONF_MATCH("slab_sizes")) {
 				if (CONF_MATCH_VALUE("default")) {
 					sc_data_init(sc_data);

From 3ed0b4e8a3f53c099ba6b2989b1e38878b40ef9b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 30 Nov 2020 19:06:50 -0800
Subject: [PATCH 1936/2608] HPA: Add an nevictions counter.

I.e. the number of times we've purged a hugepage-sized region.
---
 include/jemalloc/internal/hpa.h | 12 +++++++++++-
 src/ctl.c                       |  7 ++++++-
 src/hpa.c                       | 14 +++++++++-----
 src/stats.c                     | 16 +++++++++++++---
 4 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index edb36179..217604e7 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -6,10 +6,12 @@
 #include "jemalloc/internal/pai.h"
 #include "jemalloc/internal/psset.h"
 
-/* Used only by CTL; not actually stored here (i.e., all derived). */
+/* Completely derived; only used by CTL. */
 typedef struct hpa_shard_stats_s hpa_shard_stats_t;
 struct hpa_shard_stats_s {
 	psset_stats_t psset_stats;
+	/* The stat version of the nevictions counter. */
+	uint64_t nevictions;
 };
 
 typedef struct hpa_shard_s hpa_shard_t;
@@ -69,6 +71,14 @@ struct hpa_shard_s {
 	/* The arena ind we're associated with. */
 	unsigned ind;
 	emap_t *emap;
+
+	/*
+	 * The number of times we've purged a hugepage.  Each eviction purges a
+	 * single hugepage.
+	 *
+	 * Guarded by the grow mutex.
+	 */
+	uint64_t nevictions;
 };
 
 /*
diff --git a/src/ctl.c b/src/ctl.c
index 88cee666..4266e4bd 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -220,6 +220,7 @@ CTL_PROTO(stats_arenas_i_extents_j_dirty_bytes)
 CTL_PROTO(stats_arenas_i_extents_j_muzzy_bytes)
 CTL_PROTO(stats_arenas_i_extents_j_retained_bytes)
 INDEX_PROTO(stats_arenas_i_extents_j)
+CTL_PROTO(stats_arenas_i_hpa_shard_nevictions)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_nactive_huge)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_ninactive_huge)
@@ -655,7 +656,8 @@ static const ctl_named_node_t stats_arenas_i_hpa_shard_node[] = {
 	{NAME("full_slabs"),	CHILD(named,
 	    stats_arenas_i_hpa_shard_full_slabs)},
 	{NAME("nonfull_slabs"),	CHILD(indexed,
-	    stats_arenas_i_hpa_shard_nonfull_slabs)}
+	    stats_arenas_i_hpa_shard_nonfull_slabs)},
+	{NAME("nevictions"),	CTL(stats_arenas_i_hpa_shard_nevictions)}
 };
 
 static const ctl_named_node_t stats_arenas_i_node[] = {
@@ -3372,6 +3374,9 @@ stats_arenas_i_extents_j_index(tsdn_t *tsdn, const size_t *mib,
 	return super_stats_arenas_i_extents_j_node;
 }
 
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nevictions,
+    arenas_i(mib[2])->astats->hpastats.nevictions, uint64_t);
+
 /* Full, huge */
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge,
     arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.npageslabs_huge,
diff --git a/src/hpa.c b/src/hpa.c
index 79f97dc7..a51f83ce 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -74,6 +74,7 @@ hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
 	shard->eden_len = 0;
 	shard->ind = ind;
 	shard->emap = emap;
+	shard->nevictions = 0;
 
 	/*
 	 * Fill these in last, so that if an hpa_shard gets used despite
@@ -97,14 +98,18 @@ hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
 void
 hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src) {
 	psset_stats_accum(&dst->psset_stats, &src->psset_stats);
+	dst->nevictions += src->nevictions;
 }
 
 void
 hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard,
     hpa_shard_stats_t *dst) {
+	malloc_mutex_lock(tsdn, &shard->grow_mtx);
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	psset_stats_accum(&dst->psset_stats, &shard->psset.stats);
+	dst->nevictions += shard->nevictions;
 	malloc_mutex_unlock(tsdn, &shard->mtx);
+	malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 }
 
 static hpdata_t *
@@ -238,6 +243,7 @@ hpa_handle_ps_eviction(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
 	hpa_dehugify(ps);
 
 	malloc_mutex_lock(tsdn, &shard->grow_mtx);
+	shard->nevictions++;
 	hpdata_list_prepend(&shard->unused_slabs, ps);
 	malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 }
@@ -353,6 +359,7 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	edata = edata_cache_small_get(tsdn, &shard->ecs);
 	if (edata == NULL) {
+		shard->nevictions++;
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 		hpa_handle_ps_eviction(tsdn, shard, ps);
@@ -371,11 +378,8 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 		hpdata_unreserve(ps, edata_addr_get(edata),
 		    edata_size_get(edata));
 		edata_cache_small_put(tsdn, &shard->ecs, edata);
-		/*
-		 * Technically the same as fallthrough at the time of this
-		 * writing, but consistent with the error handling in the rest
-		 * of the function.
-		 */
+
+		shard->nevictions++;
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 		hpa_handle_ps_eviction(tsdn, shard, ps);
diff --git a/src/stats.c b/src/stats.c
index abe3ab16..aab9fb5c 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -661,12 +661,14 @@ stats_arena_extents_print(emitter_t *emitter, unsigned i) {
 }
 
 static void
-stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i) {
+stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	emitter_row_t header_row;
 	emitter_row_init(&header_row);
 	emitter_row_t row;
 	emitter_row_init(&row);
 
+	uint64_t nevictions;
+
 	size_t npageslabs_huge;
 	size_t nactive_huge;
 	size_t ninactive_huge;
@@ -675,6 +677,9 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i) {
 	size_t nactive_nonhuge;
 	size_t ninactive_nonhuge;
 
+	CTL_M2_GET("stats.arenas.0.hpa_shard.nevictions",
+	    i, &nevictions, uint64_t);
+
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs_huge",
 	    i, &npageslabs_huge, size_t);
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive_huge",
@@ -696,13 +701,18 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i) {
 
 	emitter_table_printf(emitter,
 	    "HPA shard stats:\n"
+	    "  Evictions: %" FMTu64 " (%" FMTu64 " / sec)\n"
 	    "  In full slabs:\n"
 	    "      npageslabs: %zu huge, %zu nonhuge\n"
 	    "      nactive: %zu huge, %zu nonhuge \n"
 	    "      ninactive: %zu huge, %zu nonhuge \n",
-	    npageslabs_huge, npageslabs_nonhuge, nactive_huge, nactive_nonhuge,
+	    nevictions, rate_per_second(nevictions, uptime),
+	    npageslabs_huge, npageslabs_nonhuge,
+	    nactive_huge, nactive_nonhuge,
 	    ninactive_huge, ninactive_nonhuge);
 	emitter_json_object_kv_begin(emitter, "hpa_shard");
+	emitter_json_kv(emitter, "nevictions", emitter_type_uint64,
+	    &nevictions);
 	emitter_json_object_kv_begin(emitter, "full_slabs");
 	emitter_json_kv(emitter, "npageslabs_huge", emitter_type_size,
 	    &npageslabs_huge);
@@ -1137,7 +1147,7 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 		stats_arena_extents_print(emitter, i);
 	}
 	if (hpa) {
-		stats_arena_hpa_shard_print(emitter, i);
+		stats_arena_hpa_shard_print(emitter, i, uptime);
 	}
 }
 

From d9f7e6c66899b29976cd6ec828ee0f14d4db3aac Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 2 Dec 2020 07:04:01 -0800
Subject: [PATCH 1937/2608] hpdata: Add a test.

We're about to make the functionality here more complicated; testing hpdata
directly (rather than relying on user's tests) will make debugging easier.
---
 Makefile.in                        |  1 +
 include/jemalloc/internal/hpdata.h |  9 ++++-
 test/unit/hpdata.c                 | 61 ++++++++++++++++++++++++++++++
 3 files changed, 69 insertions(+), 2 deletions(-)
 create mode 100644 test/unit/hpdata.c

diff --git a/Makefile.in b/Makefile.in
index f263fc32..ba6dd763 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -219,6 +219,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/hook.c \
 	$(srcroot)test/unit/hpa.c \
 	$(srcroot)test/unit/hpa_central.c \
+	$(srcroot)test/unit/hpdata.c \
 	$(srcroot)test/unit/huge.c \
 	$(srcroot)test/unit/inspect.c \
 	$(srcroot)test/unit/junk.c \
diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index cb034eae..e8433c53 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -112,10 +112,15 @@ hpdata_assert_empty(hpdata_t *hpdata) {
 	assert(hpdata_nfree_get(hpdata) == HUGEPAGE_PAGES);
 }
 
+static inline bool
+hpdata_consistent(hpdata_t *hpdata) {
+	return fb_urange_longest(hpdata->active_pages, HUGEPAGE_PAGES)
+	    == hpdata_longest_free_range_get(hpdata);
+}
+
 static inline void
 hpdata_assert_consistent(hpdata_t *hpdata) {
-	assert(fb_urange_longest(hpdata->active_pages, HUGEPAGE_PAGES)
-	    == hpdata_longest_free_range_get(hpdata));
+	assert(hpdata_consistent(hpdata));
 }
 
 TYPED_LIST(hpdata_list, hpdata_t, ql_link)
diff --git a/test/unit/hpdata.c b/test/unit/hpdata.c
new file mode 100644
index 00000000..1bf58bca
--- /dev/null
+++ b/test/unit/hpdata.c
@@ -0,0 +1,61 @@
+#include "test/jemalloc_test.h"
+
+#define HPDATA_ADDR ((void *)(10 * HUGEPAGE))
+#define HPDATA_AGE 123
+
+TEST_BEGIN(test_reserve_alloc) {
+	hpdata_t hpdata;
+	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
+
+	/* Allocating a page at a time, we should do first fit. */
+	for (size_t i = 0; i < HUGEPAGE_PAGES; i++) {
+		expect_true(hpdata_consistent(&hpdata), "");
+		expect_zu_eq(HUGEPAGE_PAGES - i,
+		    hpdata_longest_free_range_get(&hpdata), "");
+		void *alloc = hpdata_reserve_alloc(&hpdata, PAGE);
+		expect_ptr_eq((char *)HPDATA_ADDR + i * PAGE, alloc, "");
+		expect_true(hpdata_consistent(&hpdata), "");
+	}
+	expect_true(hpdata_consistent(&hpdata), "");
+	expect_zu_eq(0, hpdata_longest_free_range_get(&hpdata), "");
+
+	/*
+	 * Build up a bigger free-range, 2 pages at a time, until we've got 6
+	 * adjacent free pages total.  Pages 8-13 should be unreserved after
+	 * this.
+	 */
+	hpdata_unreserve(&hpdata, (char *)HPDATA_ADDR + 10 * PAGE, 2 * PAGE);
+	expect_true(hpdata_consistent(&hpdata), "");
+	expect_zu_eq(2, hpdata_longest_free_range_get(&hpdata), "");
+
+	hpdata_unreserve(&hpdata, (char *)HPDATA_ADDR + 12 * PAGE, 2 * PAGE);
+	expect_true(hpdata_consistent(&hpdata), "");
+	expect_zu_eq(4, hpdata_longest_free_range_get(&hpdata), "");
+
+	hpdata_unreserve(&hpdata, (char *)HPDATA_ADDR + 8 * PAGE, 2 * PAGE);
+	expect_true(hpdata_consistent(&hpdata), "");
+	expect_zu_eq(6, hpdata_longest_free_range_get(&hpdata), "");
+
+	/*
+	 * Leave page 14 reserved, but free page 15 (this test the case where
+	 * unreserving combines two ranges).
+	 */
+	hpdata_unreserve(&hpdata, (char *)HPDATA_ADDR + 15 * PAGE, PAGE);
+	/*
+	 * Longest free range shouldn't change; we've got a free range of size
+	 * 6, then a reserved page, then another free range.
+	 */
+	expect_true(hpdata_consistent(&hpdata), "");
+	expect_zu_eq(6, hpdata_longest_free_range_get(&hpdata), "");
+
+	/* After freeing page 14, the two ranges get combined. */
+	hpdata_unreserve(&hpdata, (char *)HPDATA_ADDR + 14 * PAGE, PAGE);
+	expect_true(hpdata_consistent(&hpdata), "");
+	expect_zu_eq(8, hpdata_longest_free_range_get(&hpdata), "");
+}
+TEST_END
+
+int main(void) {
+	return test_no_reentrancy(
+	    test_reserve_alloc);
+}

From 734e72ce8fb897bdbcbd48bb994c3778dba50dc6 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 2 Dec 2020 10:04:32 -0800
Subject: [PATCH 1938/2608] bit_util: Guarantee popcount's presence.

Implement popcount generically, so that we can rely on it being present.
---
 configure.ac                         |  1 +
 include/jemalloc/internal/bit_util.h | 91 +++++++++++++++++++++++++++-
 test/unit/bit_util.c                 | 79 +++++++++++++++++++++++-
 3 files changed, 167 insertions(+), 4 deletions(-)

diff --git a/configure.ac b/configure.ac
index 8e21f3f9..8284e87a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1594,6 +1594,7 @@ JE_COMPILABLE([a program using __builtin_popcountl], [
 if test "x${je_cv_gcc_builtin_popcountl}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_INTERNAL_POPCOUNT], [__builtin_popcount])
   AC_DEFINE([JEMALLOC_INTERNAL_POPCOUNTL], [__builtin_popcountl])
+  AC_DEFINE([JEMALLOC_INTERNAL_POPCOUNTLL], [__builtin_popcountll])
 fi
 
 AC_ARG_WITH([lg_quantum],
diff --git a/include/jemalloc/internal/bit_util.h b/include/jemalloc/internal/bit_util.h
index c5158f67..bac59140 100644
--- a/include/jemalloc/internal/bit_util.h
+++ b/include/jemalloc/internal/bit_util.h
@@ -179,12 +179,97 @@ fls_u(unsigned x) {
 }
 #endif
 
-#ifdef JEMALLOC_INTERNAL_POPCOUNTL
+#if LG_SIZEOF_LONG_LONG > 3
+#  error "Haven't implemented popcount for 16-byte ints."
+#endif
+
+#define DO_POPCOUNT(x, type) do {					\
+	/*								\
+	 * Algorithm from an old AMD optimization reference manual.	\
+	 * We're putting a little bit more work than you might expect	\
+	 * into the no-instrinsic case, since we only support the	\
+	 * GCC intrinsics spelling of popcount (for now).  Detecting	\
+	 * whether or not the popcount builtin is actually useable in	\
+	 * MSVC is nontrivial.						\
+	 */								\
+									\
+	type bmul = (type)0x0101010101010101ULL;			\
+									\
+	/*								\
+	 * Replace each 2 bits with the sideways sum of the original	\
+	 * values.  0x5 = 0b0101.					\
+	 *								\
+	 * You might expect this to be:					\
+	 *   x = (x & 0x55...) + ((x >> 1) & 0x55...).			\
+	 * That costs an extra mask relative to this, though.		\
+	 */								\
+	x = x - ((x >> 1) & (0x55U * bmul));				\
+	/* Replace each 4 bits with their sideays sum.  0x3 = 0b0011. */\
+	x = (x & (bmul * 0x33U)) + ((x >> 2) & (bmul * 0x33U));		\
+	/*								\
+	 * Replace each 8 bits with their sideways sum.  Note that we	\
+	 * can't overflow within each 4-bit sum here, so we can skip	\
+	 * the initial mask.						\
+	 */								\
+	x = (x + (x >> 4)) & (bmul * 0x0FU);				\
+	/*								\
+	 * None of the partial sums in this multiplication (viewed in	\
+	 * base-256) can overflow into the next digit.  So the least	\
+	 * significant byte of the product will be the least		\
+	 * significant byte of the original value, the second least	\
+	 * significant byte will be the sum of the two least		\
+	 * significant bytes of the original value, and so on.		\
+	 * Importantly, the high byte will be the byte-wise sum of all	\
+	 * the bytes of the original value.				\
+	 */								\
+	x = x * bmul;							\
+	x >>= ((sizeof(x) - 1) * 8);					\
+	return (unsigned)x;						\
+} while(0)
+
+static inline unsigned
+popcount_u_slow(unsigned bitmap) {
+	DO_POPCOUNT(bitmap, unsigned);
+}
+
+static inline unsigned
+popcount_lu_slow(unsigned long bitmap) {
+	DO_POPCOUNT(bitmap, unsigned long);
+}
+
+static inline unsigned
+popcount_llu_slow(unsigned long long bitmap) {
+	DO_POPCOUNT(bitmap, unsigned long long);
+}
+
+#undef DO_POPCOUNT
+
+static inline unsigned
+popcount_u(unsigned bitmap) {
+#ifdef JEMALLOC_INTERNAL_POPCOUNT
+	return JEMALLOC_INTERNAL_POPCOUNT(bitmap);
+#else
+	return popcount_u_slow(bitmap);
+#endif
+}
+
 static inline unsigned
 popcount_lu(unsigned long bitmap) {
-  return JEMALLOC_INTERNAL_POPCOUNTL(bitmap);
-}
+#ifdef JEMALLOC_INTERNAL_POPCOUNTL
+	return JEMALLOC_INTERNAL_POPCOUNTL(bitmap);
+#else
+	return popcount_lu_slow(bitmap);
 #endif
+}
+
+static inline unsigned
+popcount_llu(unsigned long long bitmap) {
+#ifdef JEMALLOC_INTERNAL_POPCOUNTLL
+	return JEMALLOC_INTERNAL_POPCOUNTLL(bitmap);
+#else
+	return popcount_llu_slow(bitmap);
+#endif
+}
 
 /*
  * Clears first unset bit in bitmap, and returns
diff --git a/test/unit/bit_util.c b/test/unit/bit_util.c
index 045cf8b4..7d31b210 100644
--- a/test/unit/bit_util.c
+++ b/test/unit/bit_util.c
@@ -204,6 +204,77 @@ TEST_BEGIN(test_fls_llu_slow) {
 }
 TEST_END
 
+static unsigned
+popcount_byte(unsigned byte) {
+	int count = 0;
+	for (int i = 0; i < 8; i++) {
+		if ((byte & (1 << i)) != 0) {
+			count++;
+		}
+	}
+	return count;
+}
+
+static uint64_t
+expand_byte_to_mask(unsigned byte) {
+	uint64_t result = 0;
+	for (int i = 0; i < 8; i++) {
+		if ((byte & (1 << i)) != 0) {
+			result |= ((uint64_t)0xFF << (i * 8));
+		}
+	}
+	return result;
+}
+
+#define TEST_POPCOUNT(t, suf, pri_hex) do {				\
+	t bmul = (t)0x0101010101010101ULL;				\
+	for (unsigned i = 0; i < (1 << sizeof(t)); i++) {		\
+		for (unsigned j = 0; j < 256; j++) {			\
+			/*						\
+			 * Replicate the byte j into various		\
+			 * bytes of the integer (as indicated by the	\
+			 * mask in i), and ensure that the popcount of	\
+			 * the result is popcount(i) * popcount(j)	\
+			 */						\
+			t mask = (t)expand_byte_to_mask(i);		\
+			t x = (bmul * j) & mask;			\
+			expect_u_eq(					\
+			    popcount_byte(i) * popcount_byte(j),	\
+			    popcount_##suf(x),				\
+			    "Unexpected result, x=0x%"pri_hex, x);	\
+		}							\
+	}								\
+} while (0)
+
+TEST_BEGIN(test_popcount_u) {
+	TEST_POPCOUNT(unsigned, u, "x");
+}
+TEST_END
+
+TEST_BEGIN(test_popcount_u_slow) {
+	TEST_POPCOUNT(unsigned, u_slow, "x");
+}
+TEST_END
+
+TEST_BEGIN(test_popcount_lu) {
+	TEST_POPCOUNT(unsigned long, lu, "lx");
+}
+TEST_END
+
+TEST_BEGIN(test_popcount_lu_slow) {
+	TEST_POPCOUNT(unsigned long, lu_slow, "lx");
+}
+TEST_END
+
+TEST_BEGIN(test_popcount_llu) {
+	TEST_POPCOUNT(unsigned long long, llu, "llx");
+}
+TEST_END
+
+TEST_BEGIN(test_popcount_llu_slow) {
+	TEST_POPCOUNT(unsigned long long, llu_slow, "llx");
+}
+TEST_END
 
 int
 main(void) {
@@ -226,5 +297,11 @@ main(void) {
 	    test_fls_zu,
 	    test_fls_u_slow,
 	    test_fls_lu_slow,
-	    test_fls_llu_slow);
+	    test_fls_llu_slow,
+	    test_popcount_u,
+	    test_popcount_u_slow,
+	    test_popcount_lu,
+	    test_popcount_lu_slow,
+	    test_popcount_llu,
+	    test_popcount_llu_slow);
 }

From e6c057ad35b0c83eef100bf0e125f75ebf8b5edc Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 2 Dec 2020 11:46:49 -0800
Subject: [PATCH 1939/2608] fb: implement assign in terms of a visitor.

We'll reuse this visitor in the next commit.
---
 include/jemalloc/internal/flat_bitmap.h | 55 ++++++++++++++-----------
 1 file changed, 32 insertions(+), 23 deletions(-)

diff --git a/include/jemalloc/internal/flat_bitmap.h b/include/jemalloc/internal/flat_bitmap.h
index 0faf447e..9f1909e1 100644
--- a/include/jemalloc/internal/flat_bitmap.h
+++ b/include/jemalloc/internal/flat_bitmap.h
@@ -71,21 +71,16 @@ fb_unset(fb_group_t *fb, size_t nbits, size_t bit) {
 	fb[group_ind] &= ~((fb_group_t)1 << bit_ind);
 }
 
-JEMALLOC_ALWAYS_INLINE void
-fb_assign_group_impl(fb_group_t *fb, size_t start, size_t cnt, bool val) {
-	assert(cnt > 0);
-	assert(start + cnt - 1 < FB_GROUP_BITS);
-	fb_group_t bits = ((~(fb_group_t)0) >> (FB_GROUP_BITS - cnt)) << start;
-	if (val) {
-		*fb |= bits;
-	} else {
-		*fb &= ~bits;
-	}
-}
 
+/*
+ * Some implementation details.  This visitation function lets us apply a group
+ * visitor to each group in the bitmap (potentially modifying it).  The mask
+ * indicates which bits are logically part of the visitation.
+ */
+typedef void (*fb_group_visitor_t)(void *ctx, fb_group_t *fb, fb_group_t mask);
 JEMALLOC_ALWAYS_INLINE void
-fb_assign_impl(fb_group_t *fb, size_t nbits, size_t start, size_t cnt,
-    bool val) {
+fb_visit_impl(fb_group_t *fb, size_t nbits, fb_group_visitor_t visit, void *ctx,
+    size_t start, size_t cnt) {
 	assert(start + cnt - 1 < nbits);
 	size_t group_ind = start / FB_GROUP_BITS;
 	size_t start_bit_ind = start % FB_GROUP_BITS;
@@ -93,10 +88,8 @@ fb_assign_impl(fb_group_t *fb, size_t nbits, size_t start, size_t cnt,
 	 * The first group is special; it's the only one we don't start writing
 	 * to from bit 0.
 	 */
-	size_t first_group_cnt =
-	    (start_bit_ind + cnt > FB_GROUP_BITS
-		? FB_GROUP_BITS - start_bit_ind
-		: cnt);
+	size_t first_group_cnt = (start_bit_ind + cnt > FB_GROUP_BITS
+		? FB_GROUP_BITS - start_bit_ind : cnt);
 	/*
 	 * We can basically split affected words into:
 	 *   - The first group, where we touch only the high bits
@@ -106,32 +99,48 @@ fb_assign_impl(fb_group_t *fb, size_t nbits, size_t start, size_t cnt,
 	 * this can lead to bad codegen for those middle words.
 	 */
 	/* First group */
-	fb_assign_group_impl(&fb[group_ind], start_bit_ind, first_group_cnt,
-	    val);
+	fb_group_t mask = ((~(fb_group_t)0)
+	    >> (FB_GROUP_BITS - first_group_cnt))
+	    << start_bit_ind;
+	visit(ctx, &fb[group_ind], mask);
+
 	cnt -= first_group_cnt;
 	group_ind++;
 	/* Middle groups */
 	while (cnt > FB_GROUP_BITS) {
-		fb_assign_group_impl(&fb[group_ind], 0, FB_GROUP_BITS, val);
+		visit(ctx, &fb[group_ind], ~(fb_group_t)0);
 		cnt -= FB_GROUP_BITS;
 		group_ind++;
 	}
 	/* Last group */
 	if (cnt != 0) {
-		fb_assign_group_impl(&fb[group_ind], 0, cnt, val);
+		mask = (~(fb_group_t)0) >> (FB_GROUP_BITS - cnt);
+		visit(ctx, &fb[group_ind], mask);
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void
+fb_assign_visitor(void *ctx, fb_group_t *fb, fb_group_t mask) {
+	bool val = *(bool *)ctx;
+	if (val) {
+		*fb |= mask;
+	} else {
+		*fb &= ~mask;
 	}
 }
 
 /* Sets the cnt bits starting at position start.  Must not have a 0 count. */
 static inline void
 fb_set_range(fb_group_t *fb, size_t nbits, size_t start, size_t cnt) {
-	fb_assign_impl(fb, nbits, start, cnt, true);
+	bool val = true;
+	fb_visit_impl(fb, nbits, &fb_assign_visitor, &val, start, cnt);
 }
 
 /* Unsets the cnt bits starting at position start.  Must not have a 0 count. */
 static inline void
 fb_unset_range(fb_group_t *fb, size_t nbits, size_t start, size_t cnt) {
-	fb_assign_impl(fb, nbits, start, cnt, false);
+	bool val = false;
+	fb_visit_impl(fb, nbits, &fb_assign_visitor, &val, start, cnt);
 }
 
 /*

From 54c94c1679899db53c4a1002256e8604bc60eb36 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 2 Dec 2020 13:29:13 -0800
Subject: [PATCH 1940/2608] flat bitmap: add scount / ucount functions.

These can compute the number or set or unset bits in a subrange of the bitmap.
---
 include/jemalloc/internal/flat_bitmap.h |  24 ++-
 test/unit/flat_bitmap.c                 | 236 +++++++++++++++++++++++-
 2 files changed, 258 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/flat_bitmap.h b/include/jemalloc/internal/flat_bitmap.h
index 9f1909e1..c8cf518a 100644
--- a/include/jemalloc/internal/flat_bitmap.h
+++ b/include/jemalloc/internal/flat_bitmap.h
@@ -81,7 +81,8 @@ typedef void (*fb_group_visitor_t)(void *ctx, fb_group_t *fb, fb_group_t mask);
 JEMALLOC_ALWAYS_INLINE void
 fb_visit_impl(fb_group_t *fb, size_t nbits, fb_group_visitor_t visit, void *ctx,
     size_t start, size_t cnt) {
-	assert(start + cnt - 1 < nbits);
+	assert(cnt > 0);
+	assert(start + cnt <= nbits);
 	size_t group_ind = start / FB_GROUP_BITS;
 	size_t start_bit_ind = start % FB_GROUP_BITS;
 	/*
@@ -143,6 +144,27 @@ fb_unset_range(fb_group_t *fb, size_t nbits, size_t start, size_t cnt) {
 	fb_visit_impl(fb, nbits, &fb_assign_visitor, &val, start, cnt);
 }
 
+JEMALLOC_ALWAYS_INLINE void
+fb_scount_visitor(void *ctx, fb_group_t *fb, fb_group_t mask) {
+	size_t *scount = (size_t *)ctx;
+	*scount += popcount_lu(*fb & mask);
+}
+
+/* Finds the number of set bit in the of length cnt starting at start. */
+JEMALLOC_ALWAYS_INLINE size_t
+fb_scount(fb_group_t *fb, size_t nbits, size_t start, size_t cnt) {
+	size_t scount = 0;
+	fb_visit_impl(fb, nbits, &fb_scount_visitor, &scount, start, cnt);
+	return scount;
+}
+
+/* Finds the number of unset bit in the of length cnt starting at start. */
+JEMALLOC_ALWAYS_INLINE size_t
+fb_ucount(fb_group_t *fb, size_t nbits, size_t start, size_t cnt) {
+	size_t scount = fb_scount(fb, nbits, start, cnt);
+	return cnt - scount;
+}
+
 /*
  * An implementation detail; find the first bit at position >= min_bit with the
  * value val.
diff --git a/test/unit/flat_bitmap.c b/test/unit/flat_bitmap.c
index 2f360d30..f0883790 100644
--- a/test/unit/flat_bitmap.c
+++ b/test/unit/flat_bitmap.c
@@ -576,6 +576,237 @@ TEST_BEGIN(test_iter_range_exhaustive) {
 }
 TEST_END
 
+/*
+ * If all set bits in the bitmap are contiguous, in [set_start, set_end),
+ * returns the number of set bits in [scount_start, scount_end).
+ */
+static size_t
+scount_contiguous(size_t set_start, size_t set_end, size_t scount_start,
+    size_t scount_end) {
+	/* No overlap. */
+	if (set_end <= scount_start || scount_end <= set_start) {
+		return 0;
+	}
+	/* set range contains scount range */
+	if (set_start <= scount_start && set_end >= scount_end) {
+		return scount_end - scount_start;
+	}
+	/* scount range contains set range. */
+	if (scount_start <= set_start && scount_end >= set_end) {
+		return set_end - set_start;
+	}
+	/* Partial overlap, with set range starting first. */
+	if (set_start < scount_start && set_end < scount_end) {
+		return set_end - scount_start;
+	}
+	/* Partial overlap, with scount range starting first. */
+	if (scount_start < set_start && scount_end < set_end) {
+		return scount_end - set_start;
+	}
+	/*
+	 * Trigger an assert failure; the above list should have been
+	 * exhaustive.
+	 */
+	unreachable();
+}
+
+static size_t
+ucount_contiguous(size_t set_start, size_t set_end, size_t ucount_start,
+    size_t ucount_end) {
+	/* No overlap. */
+	if (set_end <= ucount_start || ucount_end <= set_start) {
+		return ucount_end - ucount_start;
+	}
+	/* set range contains ucount range */
+	if (set_start <= ucount_start && set_end >= ucount_end) {
+		return 0;
+	}
+	/* ucount range contains set range. */
+	if (ucount_start <= set_start && ucount_end >= set_end) {
+		return (ucount_end - ucount_start) - (set_end - set_start);
+	}
+	/* Partial overlap, with set range starting first. */
+	if (set_start < ucount_start && set_end < ucount_end) {
+		return ucount_end - set_end;
+	}
+	/* Partial overlap, with ucount range starting first. */
+	if (ucount_start < set_start && ucount_end < set_end) {
+		return set_start - ucount_start;
+	}
+	/*
+	 * Trigger an assert failure; the above list should have been
+	 * exhaustive.
+	 */
+	unreachable();
+}
+
+static void
+expect_count_match_contiguous(fb_group_t *fb, size_t nbits, size_t set_start,
+    size_t set_end) {
+	for (size_t i = 0; i < nbits; i++) {
+		for (size_t j = i + 1; j <= nbits; j++) {
+			size_t cnt = j - i;
+			size_t scount_expected = scount_contiguous(set_start,
+			    set_end, i, j);
+			size_t scount_computed = fb_scount(fb, nbits, i, cnt);
+			expect_zu_eq(scount_expected, scount_computed,
+			    "fb_scount error with nbits=%zu, start=%zu, "
+			    "cnt=%zu, with bits set in [%zu, %zu)",
+			    nbits, i, cnt, set_start, set_end);
+
+			size_t ucount_expected = ucount_contiguous(set_start,
+			    set_end, i, j);
+			size_t ucount_computed = fb_ucount(fb, nbits, i, cnt);
+			assert_zu_eq(ucount_expected, ucount_computed,
+			    "fb_ucount error with nbits=%zu, start=%zu, "
+			    "cnt=%zu, with bits set in [%zu, %zu)",
+			    nbits, i, cnt, set_start, set_end);
+
+		}
+	}
+}
+
+static void
+do_test_count_contiguous(size_t nbits) {
+	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	fb_group_t *fb = malloc(sz);
+
+	fb_init(fb, nbits);
+
+	expect_count_match_contiguous(fb, nbits, 0, 0);
+	for (size_t i = 0; i < nbits; i++) {
+		fb_set(fb, nbits, i);
+		expect_count_match_contiguous(fb, nbits, 0, i + 1);
+	}
+
+	for (size_t i = 0; i < nbits; i++) {
+		fb_unset(fb, nbits, i);
+		expect_count_match_contiguous(fb, nbits, i + 1, nbits);
+	}
+
+	free(fb);
+}
+
+TEST_BEGIN(test_count_contiguous_simple) {
+	enum {nbits = 300};
+	fb_group_t fb[FB_NGROUPS(nbits)];
+	fb_init(fb, nbits);
+	/* Just an arbitrary number. */
+	size_t start = 23;
+
+	fb_set_range(fb, nbits, start, 30 - start);
+	expect_count_match_contiguous(fb, nbits, start, 30);
+
+	fb_set_range(fb, nbits, start, 40 - start);
+	expect_count_match_contiguous(fb, nbits, start, 40);
+
+	fb_set_range(fb, nbits, start, 70 - start);
+	expect_count_match_contiguous(fb, nbits, start, 70);
+
+	fb_set_range(fb, nbits, start, 120 - start);
+	expect_count_match_contiguous(fb, nbits, start, 120);
+
+	fb_set_range(fb, nbits, start, 150 - start);
+	expect_count_match_contiguous(fb, nbits, start, 150);
+
+	fb_set_range(fb, nbits, start, 200 - start);
+	expect_count_match_contiguous(fb, nbits, start, 200);
+
+	fb_set_range(fb, nbits, start, 290 - start);
+	expect_count_match_contiguous(fb, nbits, start, 290);
+}
+TEST_END
+
+TEST_BEGIN(test_count_contiguous) {
+#define NB(nbits) \
+	/* This test is *particularly* slow in debug builds. */ \
+	if ((!config_debug && nbits < 300) || nbits < 150) { \
+		do_test_count_contiguous(nbits); \
+	}
+	NBITS_TAB
+#undef NB
+}
+TEST_END
+
+static void
+expect_count_match_alternating(fb_group_t *fb_even, fb_group_t *fb_odd,
+    size_t nbits) {
+	for (size_t i = 0; i < nbits; i++) {
+		for (size_t j = i + 1; j <= nbits; j++) {
+			size_t cnt = j - i;
+			size_t odd_scount = cnt / 2
+			    + (size_t)(cnt % 2 == 1 && i % 2 == 1);
+			size_t odd_scount_computed = fb_scount(fb_odd, nbits,
+			    i, j - i);
+			assert_zu_eq(odd_scount, odd_scount_computed,
+			    "fb_scount error with nbits=%zu, start=%zu, "
+			    "cnt=%zu, with alternating bits set.",
+			    nbits, i, j - i);
+
+			size_t odd_ucount = cnt / 2
+			    + (size_t)(cnt % 2 == 1 && i % 2 == 0);
+			size_t odd_ucount_computed = fb_ucount(fb_odd, nbits,
+			    i, j - i);
+			assert_zu_eq(odd_ucount, odd_ucount_computed,
+			    "fb_ucount error with nbits=%zu, start=%zu, "
+			    "cnt=%zu, with alternating bits set.",
+			    nbits, i, j - i);
+
+			size_t even_scount = cnt / 2
+			    + (size_t)(cnt % 2 == 1 && i % 2 == 0);
+			size_t even_scount_computed = fb_scount(fb_even, nbits,
+			    i, j - i);
+			assert_zu_eq(even_scount, even_scount_computed,
+			    "fb_scount error with nbits=%zu, start=%zu, "
+			    "cnt=%zu, with alternating bits set.",
+			    nbits, i, j - i);
+
+			size_t even_ucount = cnt / 2
+			    + (size_t)(cnt % 2 == 1 && i % 2 == 1);
+			size_t even_ucount_computed = fb_ucount(fb_even, nbits,
+			    i, j - i);
+			assert_zu_eq(even_ucount, even_ucount_computed,
+			    "fb_ucount error with nbits=%zu, start=%zu, "
+			    "cnt=%zu, with alternating bits set.",
+			    nbits, i, j - i);
+		}
+	}
+}
+
+static void
+do_test_count_alternating(size_t nbits) {
+	if (nbits > 1000) {
+		return;
+	}
+	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	fb_group_t *fb_even = malloc(sz);
+	fb_group_t *fb_odd = malloc(sz);
+
+	fb_init(fb_even, nbits);
+	fb_init(fb_odd, nbits);
+
+	for (size_t i = 0; i < nbits; i++) {
+		if (i % 2 == 0) {
+			fb_set(fb_even, nbits, i);
+		} else {
+			fb_set(fb_odd, nbits, i);
+		}
+	}
+
+	expect_count_match_alternating(fb_even, fb_odd, nbits);
+
+	free(fb_even);
+	free(fb_odd);
+}
+
+TEST_BEGIN(test_count_alternating) {
+#define NB(nbits) \
+	do_test_count_alternating(nbits);
+	NBITS_TAB
+#undef NB
+}
+TEST_END
+
 int
 main(void) {
 	return test_no_reentrancy(
@@ -586,5 +817,8 @@ main(void) {
 	    test_range_simple,
 	    test_empty_full,
 	    test_iter_range_simple,
-	    test_iter_range_exhaustive);
+	    test_iter_range_exhaustive,
+	    test_count_contiguous_simple,
+	    test_count_contiguous,
+	    test_count_alternating);
 }

From f51948d9e11046ed0b131767bad47879807e2d8b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 2 Dec 2020 14:20:45 -0800
Subject: [PATCH 1941/2608] psset unit test: fix a bug.

The next commit adds assertions that reveal a bug in the test code
(double-free).  Fix it.
---
 test/unit/psset.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/test/unit/psset.c b/test/unit/psset.c
index 4147729c..6f35fa8d 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -204,7 +204,11 @@ TEST_BEGIN(test_reuse) {
 	expect_false(err, "Should have been able to find alloc.");
 	edata_expect(&alloc[index_of_3], index_of_3, 3);
 
-	/* Free up a 4-page hole at the end. */
+	/*
+	 * Free up a 4-page hole at the end.  Recall that the pages at offsets 0
+	 * and 1 mod 4 were freed above, so we just have to free the last
+	 * allocations.
+	 */
 	ps = test_psset_dalloc(&psset, &alloc[HUGEPAGE_PAGES - 1]);
 	expect_ptr_null(ps, "Nonempty pageslab evicted");
 	ps = test_psset_dalloc(&psset, &alloc[HUGEPAGE_PAGES - 2]);
@@ -212,8 +216,6 @@ TEST_BEGIN(test_reuse) {
 
 	/* Make sure we can satisfy an allocation at the very end of a slab. */
 	size_t index_of_4 = HUGEPAGE_PAGES - 4;
-	ps = test_psset_dalloc(&psset, &alloc[index_of_4]);
-	expect_ptr_null(ps, "Nonempty pageslab evicted");
 	err = test_psset_alloc_reuse(&psset, &alloc[index_of_4], 4 * PAGE);
 	expect_false(err, "Should have been able to find alloc.");
 	edata_expect(&alloc[index_of_4], index_of_4, 4);
@@ -405,7 +407,8 @@ TEST_END
 /*
  * Fills in and inserts two pageslabs, with the first better than the second,
  * and each fully allocated (into the allocations in allocs and worse_allocs,
- * each of which should be HUGEPAGE_PAGES long).
+ * each of which should be HUGEPAGE_PAGES long), except for a single free page
+ * at the end.
  *
  * (There's nothing magic about these numbers; it's just useful to share the
  * setup between the oldest fit and the insert/remove test).
@@ -418,7 +421,7 @@ init_test_pageslabs(psset_t *psset, hpdata_t *pageslab,
 	hpdata_init(pageslab, (void *)(10 * HUGEPAGE), PAGESLAB_AGE);
 	/*
 	 * This pageslab would be better from an address-first-fit POV, but
-	 * better from an age POV.
+	 * worse from an age POV.
 	 */
 	hpdata_init(worse_pageslab, (void *)(9 * HUGEPAGE), PAGESLAB_AGE + 1);
 

From a559caf74aa5421f608a59bd2d38da688b1f2572 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 2 Dec 2020 14:21:36 -0800
Subject: [PATCH 1942/2608] hpdata: Strengthen assertions.

Now that we have flat bitmap bit counting functions, we can easily assert that
nfree is always correct.  While we're tightening up this code, enforce
consistency on API boundaries as well.
---
 include/jemalloc/internal/hpdata.h | 11 +++++++++--
 src/hpdata.c                       |  9 ++++++++-
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index e8433c53..fdd6673f 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -114,8 +114,15 @@ hpdata_assert_empty(hpdata_t *hpdata) {
 
 static inline bool
 hpdata_consistent(hpdata_t *hpdata) {
-	return fb_urange_longest(hpdata->active_pages, HUGEPAGE_PAGES)
-	    == hpdata_longest_free_range_get(hpdata);
+	if(fb_urange_longest(hpdata->active_pages, HUGEPAGE_PAGES)
+	    != hpdata_longest_free_range_get(hpdata)) {
+		return false;
+	}
+	if (fb_ucount(hpdata->active_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES)
+	    != hpdata_nfree_get(hpdata)) {
+		return false;
+	}
+	return true;
 }
 
 static inline void
diff --git a/src/hpdata.c b/src/hpdata.c
index 847eb9da..a242efea 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -17,7 +17,6 @@ hpdata_age_comp(const hpdata_t *a, const hpdata_t *b) {
 
 ph_gen(, hpdata_age_heap_, hpdata_age_heap_t, hpdata_t, ph_link, hpdata_age_comp)
 
-
 void
 hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 	hpdata_addr_set(hpdata, addr);
@@ -26,10 +25,13 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 	hpdata_nfree_set(hpdata, HUGEPAGE_PAGES);
 	hpdata_longest_free_range_set(hpdata, HUGEPAGE_PAGES);
 	fb_init(hpdata->active_pages, HUGEPAGE_PAGES);
+
+	hpdata_assert_consistent(hpdata);
 }
 
 void *
 hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
+	hpdata_assert_consistent(hpdata);
 	assert((sz & PAGE_MASK) == 0);
 	size_t npages = sz >> LG_PAGE;
 	assert(npages <= hpdata_longest_free_range_get(hpdata));
@@ -93,12 +95,15 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 	}
 	hpdata_longest_free_range_set(hpdata, largest_unchosen_range);
 
+	hpdata_assert_consistent(hpdata);
 	return (void *)(
 	    (uintptr_t)hpdata_addr_get(hpdata) + (result << LG_PAGE));
 }
 
 void
 hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
+	hpdata_assert_consistent(hpdata);
+	assert(((uintptr_t)addr & PAGE_MASK) == 0);
 	assert((sz & PAGE_MASK) == 0);
 	size_t begin = ((uintptr_t)addr - (uintptr_t)hpdata_addr_get(hpdata))
 	    >> LG_PAGE;
@@ -119,4 +124,6 @@ hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
 	}
 
 	hpdata_nfree_set(hpdata, hpdata_nfree_get(hpdata) + npages);
+
+	hpdata_assert_consistent(hpdata);
 }

From 9522ae41d6167ea32a4b30ffcf0b21fc4db80c2b Mon Sep 17 00:00:00 2001
From: Aditya Kumar <aditya7@fb.com>
Date: Sun, 6 Dec 2020 19:03:13 -0800
Subject: [PATCH 1943/2608] Move n_search outside of assert as reported by
 static analyzer

---
 src/background_thread.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/background_thread.c b/src/background_thread.c
index d4f96b1a..7302a303 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -185,7 +185,8 @@ arena_decay_compute_purge_interval_impl(tsdn_t *tsdn, decay_t *decay,
 			lb = target;
 			npurge_lb = npurge;
 		}
-		assert(n_search++ < lg_floor(SMOOTHSTEP_NSTEPS) + 1);
+		assert(n_search < lg_floor(SMOOTHSTEP_NSTEPS) + 1);
+		++n_search;
 	}
 	interval = decay_interval_ns * (ub + lb) / 2;
 label_done:

From 0dfdd31e0fc69206b7198b52f4bd4a8eb805d8be Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 2 Dec 2020 17:17:28 -0800
Subject: [PATCH 1944/2608] Add tiny batch size to batch allocation stress test

---
 test/stress/batch_alloc.c | 162 +++++++++++++++++++++++++++++++-------
 1 file changed, 133 insertions(+), 29 deletions(-)

diff --git a/test/stress/batch_alloc.c b/test/stress/batch_alloc.c
index b203e05e..14a870e7 100644
--- a/test/stress/batch_alloc.c
+++ b/test/stress/batch_alloc.c
@@ -1,10 +1,15 @@
 #include "test/jemalloc_test.h"
 #include "test/bench.h"
 
-#define BATCH (1000 * 1000)
-#define HUGE_BATCH (100 * BATCH)
-static void *batch_ptrs[HUGE_BATCH];
-static void *item_ptrs[HUGE_BATCH];
+#define TINY_BATCH 10
+#define TINY_BATCH_ITER (10 * 1000 * 1000)
+#define HUGE_BATCH (1000 * 1000)
+#define HUGE_BATCH_ITER 100
+#define LEN (100 * 1000 * 1000)
+static void *batch_ptrs[LEN];
+static size_t batch_ptrs_next = 0;
+static void *item_ptrs[LEN];
+static size_t item_ptrs_next = 0;
 
 #define SIZE 7
 
@@ -18,7 +23,8 @@ struct batch_alloc_packet_s {
 
 static void
 batch_alloc_wrapper(size_t batch) {
-	batch_alloc_packet_t batch_alloc_packet = {batch_ptrs, batch, SIZE, 0};
+	batch_alloc_packet_t batch_alloc_packet =
+	    {batch_ptrs + batch_ptrs_next, batch, SIZE, 0};
 	size_t filled;
 	size_t len = sizeof(size_t);
 	assert_d_eq(mallctl("experimental.batch_alloc", &filled, &len,
@@ -28,14 +34,14 @@ batch_alloc_wrapper(size_t batch) {
 
 static void
 item_alloc_wrapper(size_t batch) {
-	for (size_t i = 0; i < batch; ++i) {
+	for (size_t i = item_ptrs_next, end = i + batch; i < end; ++i) {
 		item_ptrs[i] = malloc(SIZE);
 	}
 }
 
 static void
-release_and_clear(void **ptrs, size_t batch) {
-	for (size_t i = 0; i < batch; ++i) {
+release_and_clear(void **ptrs, size_t len) {
+	for (size_t i = 0; i < len; ++i) {
 		void *p = ptrs[i];
 		assert_ptr_not_null(p, "allocation failed");
 		sdallocx(p, SIZE, 0);
@@ -44,45 +50,143 @@ release_and_clear(void **ptrs, size_t batch) {
 }
 
 static void
-batch_alloc_small_can_repeat() {
-	batch_alloc_wrapper(BATCH);
-	release_and_clear(batch_ptrs, BATCH);
+batch_alloc_without_free(size_t batch) {
+	batch_alloc_wrapper(batch);
+	batch_ptrs_next += batch;
 }
 
 static void
-item_alloc_small_can_repeat() {
-	item_alloc_wrapper(BATCH);
-	release_and_clear(item_ptrs, BATCH);
+item_alloc_without_free(size_t batch) {
+	item_alloc_wrapper(batch);
+	item_ptrs_next += batch;
 }
 
-TEST_BEGIN(test_small_batch_with_free) {
-	compare_funcs(10, 100,
-	    "batch allocation", batch_alloc_small_can_repeat,
-	    "item allocation", item_alloc_small_can_repeat);
+static void
+batch_alloc_with_free(size_t batch) {
+	batch_alloc_wrapper(batch);
+	release_and_clear(batch_ptrs + batch_ptrs_next, batch);
+	batch_ptrs_next += batch;
+}
+
+static void
+item_alloc_with_free(size_t batch) {
+	item_alloc_wrapper(batch);
+	release_and_clear(item_ptrs + item_ptrs_next, batch);
+	item_ptrs_next += batch;
+}
+
+static void
+compare_without_free(size_t batch, size_t iter,
+    void (*batch_alloc_without_free_func)(void),
+    void (*item_alloc_without_free_func)(void)) {
+	assert(batch_ptrs_next == 0);
+	assert(item_ptrs_next == 0);
+	assert(batch * iter <= LEN);
+	for (size_t i = 0; i < iter; ++i) {
+		batch_alloc_without_free_func();
+		item_alloc_without_free_func();
+	}
+	release_and_clear(batch_ptrs, batch_ptrs_next);
+	batch_ptrs_next = 0;
+	release_and_clear(item_ptrs, item_ptrs_next);
+	item_ptrs_next = 0;
+	compare_funcs(0, iter,
+	    "batch allocation", batch_alloc_without_free_func,
+	    "item allocation", item_alloc_without_free_func);
+	release_and_clear(batch_ptrs, batch_ptrs_next);
+	batch_ptrs_next = 0;
+	release_and_clear(item_ptrs, item_ptrs_next);
+	item_ptrs_next = 0;
+}
+
+static void
+compare_with_free(size_t batch, size_t iter,
+    void (*batch_alloc_with_free_func)(void),
+    void (*item_alloc_with_free_func)(void)) {
+	assert(batch_ptrs_next == 0);
+	assert(item_ptrs_next == 0);
+	assert(batch * iter <= LEN);
+	for (size_t i = 0; i < iter; ++i) {
+		batch_alloc_with_free_func();
+		item_alloc_with_free_func();
+	}
+	batch_ptrs_next = 0;
+	item_ptrs_next = 0;
+	compare_funcs(0, iter,
+	    "batch allocation", batch_alloc_with_free_func,
+	    "item allocation", item_alloc_with_free_func);
+	batch_ptrs_next = 0;
+	item_ptrs_next = 0;
+}
+
+static void
+batch_alloc_without_free_tiny() {
+	batch_alloc_without_free(TINY_BATCH);
+}
+
+static void
+item_alloc_without_free_tiny() {
+	item_alloc_without_free(TINY_BATCH);
+}
+
+TEST_BEGIN(test_tiny_batch_without_free) {
+	compare_without_free(TINY_BATCH, TINY_BATCH_ITER,
+	    batch_alloc_without_free_tiny, item_alloc_without_free_tiny);
 }
 TEST_END
 
 static void
-batch_alloc_huge_cannot_repeat() {
-	batch_alloc_wrapper(HUGE_BATCH);
+batch_alloc_with_free_tiny() {
+	batch_alloc_with_free(TINY_BATCH);
 }
 
 static void
-item_alloc_huge_cannot_repeat() {
-	item_alloc_wrapper(HUGE_BATCH);
+item_alloc_with_free_tiny() {
+	item_alloc_with_free(TINY_BATCH);
+}
+
+TEST_BEGIN(test_tiny_batch_with_free) {
+	compare_with_free(TINY_BATCH, TINY_BATCH_ITER,
+	    batch_alloc_with_free_tiny, item_alloc_with_free_tiny);
+}
+TEST_END
+
+static void
+batch_alloc_without_free_huge() {
+	batch_alloc_without_free(HUGE_BATCH);
+}
+
+static void
+item_alloc_without_free_huge() {
+	item_alloc_without_free(HUGE_BATCH);
 }
 
 TEST_BEGIN(test_huge_batch_without_free) {
-	compare_funcs(0, 1,
-	    "batch allocation", batch_alloc_huge_cannot_repeat,
-	    "item allocation", item_alloc_huge_cannot_repeat);
-	release_and_clear(batch_ptrs, HUGE_BATCH);
-	release_and_clear(item_ptrs, HUGE_BATCH);
+	compare_without_free(HUGE_BATCH, HUGE_BATCH_ITER,
+	    batch_alloc_without_free_huge, item_alloc_without_free_huge);
+}
+TEST_END
+
+static void
+batch_alloc_with_free_huge() {
+	batch_alloc_with_free(HUGE_BATCH);
+}
+
+static void
+item_alloc_with_free_huge() {
+	item_alloc_with_free(HUGE_BATCH);
+}
+
+TEST_BEGIN(test_huge_batch_with_free) {
+	compare_with_free(HUGE_BATCH, HUGE_BATCH_ITER,
+	    batch_alloc_with_free_huge, item_alloc_with_free_huge);
 }
 TEST_END
 
 int main(void) {
 	return test_no_reentrancy(
-	    test_small_batch_with_free,
-	    test_huge_batch_without_free);
+	    test_tiny_batch_without_free,
+	    test_tiny_batch_with_free,
+	    test_huge_batch_without_free,
+	    test_huge_batch_with_free);
 }

From e82771807ec33c6a7db7612158cbfb9af87818b9 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 2 Dec 2020 17:09:59 -0800
Subject: [PATCH 1945/2608] Cache mallctl mib for batch allocation stress test

---
 test/stress/batch_alloc.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/test/stress/batch_alloc.c b/test/stress/batch_alloc.c
index 14a870e7..427e1cba 100644
--- a/test/stress/batch_alloc.c
+++ b/test/stress/batch_alloc.c
@@ -1,6 +1,10 @@
 #include "test/jemalloc_test.h"
 #include "test/bench.h"
 
+#define MIBLEN 8
+static size_t mib[MIBLEN];
+static size_t miblen = MIBLEN;
+
 #define TINY_BATCH 10
 #define TINY_BATCH_ITER (10 * 1000 * 1000)
 #define HUGE_BATCH (1000 * 1000)
@@ -27,7 +31,7 @@ batch_alloc_wrapper(size_t batch) {
 	    {batch_ptrs + batch_ptrs_next, batch, SIZE, 0};
 	size_t filled;
 	size_t len = sizeof(size_t);
-	assert_d_eq(mallctl("experimental.batch_alloc", &filled, &len,
+	assert_d_eq(mallctlbymib(mib, miblen, &filled, &len,
 	    &batch_alloc_packet, sizeof(batch_alloc_packet)), 0, "");
 	assert_zu_eq(filled, batch, "");
 }
@@ -184,6 +188,8 @@ TEST_BEGIN(test_huge_batch_with_free) {
 TEST_END
 
 int main(void) {
+	assert_d_eq(mallctlnametomib("experimental.batch_alloc", mib, &miblen),
+	    0, "");
 	return test_no_reentrancy(
 	    test_tiny_batch_without_free,
 	    test_tiny_batch_with_free,

From 1e3b8636ff02fa2150cd84720727d300455b4c63 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 8 Dec 2020 09:39:27 -0800
Subject: [PATCH 1946/2608] HPA: Remove unused malloc_conf options.

---
 .../internal/jemalloc_internal_externs.h       |  3 ---
 src/ctl.c                                      |  9 ---------
 src/jemalloc.c                                 | 18 ------------------
 src/stats.c                                    |  3 ---
 test/unit/mallctl.c                            |  3 ---
 5 files changed, 36 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 814a7a1b..fb8dc3fe 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -13,10 +13,7 @@ extern bool opt_abort;
 extern bool opt_abort_conf;
 extern bool opt_confirm_conf;
 extern bool opt_hpa;
-extern size_t opt_hpa_slab_goal;
 extern size_t opt_hpa_slab_max_alloc;
-extern size_t opt_hpa_small_max;
-extern size_t opt_hpa_large_min;
 
 extern size_t opt_hpa_sec_max_alloc;
 extern size_t opt_hpa_sec_max_bytes;
diff --git a/src/ctl.c b/src/ctl.c
index 4266e4bd..f113742f 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -91,10 +91,7 @@ CTL_PROTO(opt_abort)
 CTL_PROTO(opt_abort_conf)
 CTL_PROTO(opt_confirm_conf)
 CTL_PROTO(opt_hpa)
-CTL_PROTO(opt_hpa_slab_goal)
 CTL_PROTO(opt_hpa_slab_max_alloc)
-CTL_PROTO(opt_hpa_small_max)
-CTL_PROTO(opt_hpa_large_min)
 CTL_PROTO(opt_hpa_sec_max_alloc)
 CTL_PROTO(opt_hpa_sec_max_bytes)
 CTL_PROTO(opt_hpa_sec_nshards)
@@ -369,10 +366,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("abort_conf"),	CTL(opt_abort_conf)},
 	{NAME("confirm_conf"),	CTL(opt_confirm_conf)},
 	{NAME("hpa"),		CTL(opt_hpa)},
-	{NAME("hpa_slab_goal"),	CTL(opt_hpa_slab_goal)},
 	{NAME("hpa_slab_max_alloc"),	CTL(opt_hpa_slab_max_alloc)},
-	{NAME("hpa_small_max"),	CTL(opt_hpa_small_max)},
-	{NAME("hpa_large_min"),	CTL(opt_hpa_large_min)},
 	{NAME("hpa_sec_max_alloc"),	CTL(opt_hpa_sec_max_alloc)},
 	{NAME("hpa_sec_max_bytes"),	CTL(opt_hpa_sec_max_bytes)},
 	{NAME("hpa_sec_nshards"),	CTL(opt_hpa_sec_nshards)},
@@ -1920,10 +1914,7 @@ CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
 CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
 CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
 CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool)
-CTL_RO_NL_GEN(opt_hpa_slab_goal, opt_hpa_slab_goal, size_t)
 CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_slab_max_alloc, size_t)
-CTL_RO_NL_GEN(opt_hpa_small_max, opt_hpa_small_max, size_t)
-CTL_RO_NL_GEN(opt_hpa_large_min, opt_hpa_large_min, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_max_alloc, opt_hpa_sec_max_alloc, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_max_bytes, opt_hpa_sec_max_bytes, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_nshards, opt_hpa_sec_nshards, size_t)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 30c2fe16..c2817cf1 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -138,9 +138,6 @@ malloc_mutex_t arenas_lock;
 /* The global hpa, and whether it's on. */
 bool opt_hpa = false;
 size_t opt_hpa_slab_max_alloc = 256 * 1024;
-size_t opt_hpa_slab_goal = 128 * 1024;
-size_t opt_hpa_small_max = 32 * 1024;
-size_t opt_hpa_large_min = 4 * 1024 * 1024;
 
 size_t opt_hpa_sec_max_alloc = 32 * 1024;
 /* These settings correspond to a maximum of 1MB cached per arena. */
@@ -1506,21 +1503,6 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			CONF_HANDLE_SIZE_T(opt_hpa_sec_nshards, "hpa_sec_nshards",
 			    0, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
 
-			/*
-			 * These no longer have any effect, but various
-			 * non-public test configs set them as we iterate on HPA
-			 * development.  We parse and report them for now, but
-			 * they don't affect behavior.  Eventually they'll be
-			 * removed.
-			 */
-			CONF_HANDLE_SIZE_T(opt_hpa_slab_goal, "hpa_slab_goal",
-			    PAGE, 512 * PAGE, CONF_CHECK_MIN, CONF_CHECK_MAX,
-			    true)
-			CONF_HANDLE_SIZE_T(opt_hpa_small_max, "hpa_small_max",
-			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
-			CONF_HANDLE_SIZE_T(opt_hpa_large_min, "hpa_large_min",
-			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
-
 			if (CONF_MATCH("slab_sizes")) {
 				if (CONF_MATCH_VALUE("default")) {
 					sc_data_init(sc_data);
diff --git a/src/stats.c b/src/stats.c
index aab9fb5c..ab440c4d 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1248,10 +1248,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_CHAR_P("percpu_arena")
 	OPT_WRITE_SIZE_T("oversize_threshold")
 	OPT_WRITE_BOOL("hpa")
-	OPT_WRITE_SIZE_T("hpa_slab_goal")
 	OPT_WRITE_SIZE_T("hpa_slab_max_alloc")
-	OPT_WRITE_SIZE_T("hpa_small_max")
-	OPT_WRITE_SIZE_T("hpa_large_min")
 	OPT_WRITE_SIZE_T("hpa_sec_max_alloc")
 	OPT_WRITE_SIZE_T("hpa_sec_max_bytes")
 	OPT_WRITE_SIZE_T("hpa_sec_nshards")
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index d4e2621e..72dc0f3d 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -164,10 +164,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, retain, always);
 	TEST_MALLCTL_OPT(const char *, dss, always);
 	TEST_MALLCTL_OPT(bool, hpa, always);
-	TEST_MALLCTL_OPT(size_t, hpa_slab_goal, always);
 	TEST_MALLCTL_OPT(size_t, hpa_slab_max_alloc, always);
-	TEST_MALLCTL_OPT(size_t, hpa_small_max, always);
-	TEST_MALLCTL_OPT(size_t, hpa_large_min, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_max_alloc, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_max_bytes, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_nshards, always);

From 986cbe4881609f46897915e75a1e58971a814d84 Mon Sep 17 00:00:00 2001
From: Jin Qian <jqian@aurora.tech>
Date: Mon, 2 Nov 2020 16:15:14 -0800
Subject: [PATCH 1947/2608] Disable JEMALLOC_TLS for QNX

TLS access triggers recurisive malloc during bootstrapping. Need to use
pthread_getspecific and pthread_setspecific with a follow up fix.
---
 configure.ac | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/configure.ac b/configure.ac
index 8284e87a..468c0258 100644
--- a/configure.ac
+++ b/configure.ac
@@ -765,6 +765,10 @@ case "${host}" in
 	  default_retain="1"
 	fi
 	;;
+  *-*-nto-qnx)
+	abi="elf"
+  force_tls="0"
+	;;
   *)
 	AC_MSG_RESULT([Unsupported operating system: ${host}])
 	abi="elf"

From 96a59c3bb59a1d725c266019ca0acf0bc28ff1a5 Mon Sep 17 00:00:00 2001
From: Jin Qian <jqian@aurora.tech>
Date: Thu, 29 Oct 2020 18:28:35 -0700
Subject: [PATCH 1948/2608] Fix recursive malloc during bootstrap on QNX

pthread_key_create on QNX triggers recursive allocation during tsd
bootstrapping. Using tsd_init_check_recursion to detect that.

Before pthread_key_create, the address of tsd_boot_wrapper is returned
from tsd_get_wrapper instead of using TLS to store the pointer.
tsd_set_wrapper becomes a no-op. After that, the address of
tsd_boot_wrapper is written to TLS and bootstrap continues as before.

Signed-off-by: Jin Qian <jqian@aurora.tech>
---
 include/jemalloc/internal/tsd_generic.h | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/tsd_generic.h b/include/jemalloc/internal/tsd_generic.h
index cf73c0c7..a718472f 100644
--- a/include/jemalloc/internal/tsd_generic.h
+++ b/include/jemalloc/internal/tsd_generic.h
@@ -52,6 +52,9 @@ tsd_cleanup_wrapper(void *arg) {
 
 JEMALLOC_ALWAYS_INLINE void
 tsd_wrapper_set(tsd_wrapper_t *wrapper) {
+	if (unlikely(!tsd_booted)) {
+		return;
+	}
 	if (pthread_setspecific(tsd_tsd, (void *)wrapper) != 0) {
 		malloc_write("<jemalloc>: Error setting TSD\n");
 		abort();
@@ -60,7 +63,13 @@ tsd_wrapper_set(tsd_wrapper_t *wrapper) {
 
 JEMALLOC_ALWAYS_INLINE tsd_wrapper_t *
 tsd_wrapper_get(bool init) {
-	tsd_wrapper_t *wrapper = (tsd_wrapper_t *)pthread_getspecific(tsd_tsd);
+	tsd_wrapper_t *wrapper;
+
+	if (unlikely(!tsd_booted)) {
+		return &tsd_boot_wrapper;
+	}
+
+	wrapper = (tsd_wrapper_t *)pthread_getspecific(tsd_tsd);
 
 	if (init && unlikely(wrapper == NULL)) {
 		tsd_init_block_t block;
@@ -91,11 +100,21 @@ tsd_wrapper_get(bool init) {
 
 JEMALLOC_ALWAYS_INLINE bool
 tsd_boot0(void) {
+	tsd_wrapper_t *wrapper;
+	tsd_init_block_t block;
+
+	wrapper = (tsd_wrapper_t *)
+	    tsd_init_check_recursion(&tsd_init_head, &block);
+	if (wrapper) {
+		return false;
+	}
+	block.data = &tsd_boot_wrapper;
 	if (pthread_key_create(&tsd_tsd, tsd_cleanup_wrapper) != 0) {
 		return true;
 	}
-	tsd_wrapper_set(&tsd_boot_wrapper);
 	tsd_booted = true;
+	tsd_wrapper_set(&tsd_boot_wrapper);
+	tsd_init_finish(&tsd_init_head, &block);
 	return false;
 }
 

From 26c1dc5a3aa49e95bfdf5af0d01d784a67edf0cb Mon Sep 17 00:00:00 2001
From: Jin Qian <jqian@aurora.tech>
Date: Fri, 30 Oct 2020 13:54:36 -0700
Subject: [PATCH 1949/2608] Support AutoConf for posix_madvise and
 POSIX_MADV_DONTNEED

---
 configure.ac                                  | 20 +++++++++++++++++++
 .../internal/jemalloc_internal_defs.h.in      | 11 ++++++++++
 2 files changed, 31 insertions(+)

diff --git a/configure.ac b/configure.ac
index 468c0258..e7430d83 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2204,6 +2204,26 @@ case "${host_cpu}" in
   fi
   ;;
 esac
+else
+  dnl Check for posix_madvise.
+  JE_COMPILABLE([posix_madvise], [
+  #include <sys/mman.h>
+  ], [
+    posix_madvise((void *)0, 0, 0);
+  ], [je_cv_posix_madvise])
+  if test "x${je_cv_posix_madvise}" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_HAVE_POSIX_MADVISE], [ ])
+
+    dnl Check for posix_madvise(..., POSIX_MADV_DONTNEED).
+    JE_COMPILABLE([posix_madvise(..., POSIX_MADV_DONTNEED)], [
+  #include <sys/mman.h>
+  ], [
+    posix_madvise((void *)0, 0, POSIX_MADV_DONTNEED);
+  ], [je_cv_posix_madv_dontneed])
+    if test "x${je_cv_posix_madv_dontneed}" = "xyes" ; then
+      AC_DEFINE([JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED], [ ])
+    fi
+  fi
 fi
 
 dnl ============================================================================
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index ff0e15b1..dc4f01fb 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -318,6 +318,17 @@
  */
 #undef JEMALLOC_THP
 
+/* Defined if posix_madvise is available. */
+#undef JEMALLOC_HAVE_POSIX_MADVISE
+
+/*
+ * Method for purging unused pages using posix_madvise.
+ *
+ *   posix_madvise(..., POSIX_MADV_DONTNEED)
+ */
+#undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED
+#undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS
+
 /*
  * Defined if memcntl page admin call is supported
  */

From 4e3fe218e90c125a3d9616a0b50e8ccb506e9a44 Mon Sep 17 00:00:00 2001
From: Jin Qian <jqian@aurora.tech>
Date: Fri, 30 Oct 2020 14:09:05 -0700
Subject: [PATCH 1950/2608] Use posix_madvise to purge pages when available

---
 src/pages.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/pages.c b/src/pages.c
index 59a03f21..b23c9e9e 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -332,6 +332,9 @@ pages_purge_lazy(void *addr, size_t size) {
 #elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
     !defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
 	return (madvise(addr, size, MADV_DONTNEED) != 0);
+#elif defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED) && \
+    !defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS)
+	return (posix_madvise(addr, size, POSIX_MADV_DONTNEED) != 0);
 #else
 	not_reached();
 #endif
@@ -349,6 +352,9 @@ pages_purge_forced(void *addr, size_t size) {
 #if defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
     defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
 	return (madvise(addr, size, MADV_DONTNEED) != 0);
+#elif defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED) && \
+    defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS)
+	return (posix_madvise(addr, size, POSIX_MADV_DONTNEED) != 0);
 #elif defined(JEMALLOC_MAPS_COALESCE)
 	/* Try to overlay a new demand-zeroed mapping. */
 	return pages_commit(addr, size);

From 063a767ffe453624a1d4c5b26115efcc1ea5f2e1 Mon Sep 17 00:00:00 2001
From: Jin Qian <jqian@aurora.tech>
Date: Fri, 30 Oct 2020 14:36:07 -0700
Subject: [PATCH 1951/2608] Define JEMALLOC_HAS_ALLOCA_H for QNX

QNX has <alloca.h>
---
 configure.ac | 1 +
 1 file changed, 1 insertion(+)

diff --git a/configure.ac b/configure.ac
index e7430d83..eba3e786 100644
--- a/configure.ac
+++ b/configure.ac
@@ -768,6 +768,7 @@ case "${host}" in
   *-*-nto-qnx)
 	abi="elf"
   force_tls="0"
+  AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
 	;;
   *)
 	AC_MSG_RESULT([Unsupported operating system: ${host}])

From 91e006c4c2c523f185077015e66d99f862165262 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 13 Aug 2020 09:56:53 -0700
Subject: [PATCH 1952/2608] Enable ctl_lookup() to start from arbitrary node

---
 src/ctl.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index f113742f..f7ed1483 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1328,8 +1328,8 @@ label_return:
 }
 
 static int
-ctl_lookup(tsdn_t *tsdn, const char *name, ctl_node_t const **nodesp,
-    size_t *mibp, size_t *depthp) {
+ctl_lookup(tsdn_t *tsdn, const ctl_named_node_t *starting_node,
+    const char *name, ctl_node_t const **nodesp, size_t *mibp, size_t *depthp) {
 	int ret;
 	const char *elm, *tdot, *dot;
 	size_t elen, i, j;
@@ -1343,7 +1343,7 @@ ctl_lookup(tsdn_t *tsdn, const char *name, ctl_node_t const **nodesp,
 		ret = ENOENT;
 		goto label_return;
 	}
-	node = super_root_node;
+	node = starting_node;
 	for (i = 0; i < *depthp; i++) {
 		assert(node);
 		assert(node->nchildren > 0);
@@ -1440,7 +1440,8 @@ ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
 	}
 
 	depth = CTL_MAX_DEPTH;
-	ret = ctl_lookup(tsd_tsdn(tsd), name, nodes, mib, &depth);
+	ret = ctl_lookup(tsd_tsdn(tsd), super_root_node, name, nodes, mib,
+	    &depth);
 	if (ret != 0) {
 		goto label_return;
 	}
@@ -1466,7 +1467,8 @@ ctl_nametomib(tsd_t *tsd, const char *name, size_t *mibp, size_t *miblenp) {
 		goto label_return;
 	}
 
-	ret = ctl_lookup(tsd_tsdn(tsd), name, NULL, mibp, miblenp);
+	ret = ctl_lookup(tsd_tsdn(tsd), super_root_node, name, NULL, mibp,
+	    miblenp);
 label_return:
 	return(ret);
 }

From 3a627b9674a9d12413b01be8c4e7d2d2bf4965e7 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 13 Aug 2020 10:08:42 -0700
Subject: [PATCH 1953/2608] No need to record all nodes in ctl_lookup()

---
 src/ctl.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index f7ed1483..62402208 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1329,7 +1329,8 @@ label_return:
 
 static int
 ctl_lookup(tsdn_t *tsdn, const ctl_named_node_t *starting_node,
-    const char *name, ctl_node_t const **nodesp, size_t *mibp, size_t *depthp) {
+    const char *name, const ctl_named_node_t **ending_nodep, size_t *mibp,
+    size_t *depthp) {
 	int ret;
 	const char *elm, *tdot, *dot;
 	size_t elen, i, j;
@@ -1357,10 +1358,6 @@ ctl_lookup(tsdn_t *tsdn, const ctl_named_node_t *starting_node,
 				if (strlen(child->name) == elen &&
 				    strncmp(elm, child->name, elen) == 0) {
 					node = child;
-					if (nodesp != NULL) {
-						nodesp[i] =
-						    (const ctl_node_t *)node;
-					}
 					mibp[i] = j;
 					break;
 				}
@@ -1387,9 +1384,6 @@ ctl_lookup(tsdn_t *tsdn, const ctl_named_node_t *starting_node,
 				goto label_return;
 			}
 
-			if (nodesp != NULL) {
-				nodesp[i] = (const ctl_node_t *)node;
-			}
 			mibp[i] = (size_t)index;
 		}
 
@@ -1419,6 +1413,9 @@ ctl_lookup(tsdn_t *tsdn, const ctl_named_node_t *starting_node,
 		    strchr(elm, '\0');
 		elen = (size_t)((uintptr_t)dot - (uintptr_t)elm);
 	}
+	if (ending_nodep != NULL) {
+		*ending_nodep = node;
+	}
 
 	ret = 0;
 label_return:
@@ -1430,7 +1427,6 @@ ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
     void *newp, size_t newlen) {
 	int ret;
 	size_t depth;
-	ctl_node_t const *nodes[CTL_MAX_DEPTH];
 	size_t mib[CTL_MAX_DEPTH];
 	const ctl_named_node_t *node;
 
@@ -1440,13 +1436,12 @@ ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
 	}
 
 	depth = CTL_MAX_DEPTH;
-	ret = ctl_lookup(tsd_tsdn(tsd), super_root_node, name, nodes, mib,
+	ret = ctl_lookup(tsd_tsdn(tsd), super_root_node, name, &node, mib,
 	    &depth);
 	if (ret != 0) {
 		goto label_return;
 	}
 
-	node = ctl_named_node(nodes[depth-1]);
 	if (node != NULL && node->ctl) {
 		ret = node->ctl(tsd, mib, depth, oldp, oldlenp, newp, newlen);
 	} else {

From 6ab181d2b72ece43cb6bcc706172ff8f0fe7dd51 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 13 Aug 2020 10:36:00 -0700
Subject: [PATCH 1954/2608] Extract node lookup given mib input

---
 src/ctl.c | 43 +++++++++++++++++++++++++++++--------------
 1 file changed, 29 insertions(+), 14 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index 62402208..7bb6c1dc 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1468,21 +1468,13 @@ label_return:
 	return(ret);
 }
 
-int
-ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
-    size_t *oldlenp, void *newp, size_t newlen) {
+static int
+ctl_lookupbymib(tsdn_t *tsdn, const ctl_named_node_t **ending_nodep,
+    const size_t *mib, size_t miblen) {
 	int ret;
-	const ctl_named_node_t *node;
-	size_t i;
 
-	if (!ctl_initialized && ctl_init(tsd)) {
-		ret = EAGAIN;
-		goto label_return;
-	}
-
-	/* Iterate down the tree. */
-	node = super_root_node;
-	for (i = 0; i < miblen; i++) {
+	const ctl_named_node_t *node = super_root_node;
+	for (size_t i = 0; i < miblen; i++) {
 		assert(node);
 		assert(node->nchildren > 0);
 		if (ctl_named_node(node->children) != NULL) {
@@ -1497,13 +1489,36 @@ ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 			/* Indexed element. */
 			inode = ctl_indexed_node(node->children);
-			node = inode->index(tsd_tsdn(tsd), mib, miblen, mib[i]);
+			node = inode->index(tsdn, mib, miblen, mib[i]);
 			if (node == NULL) {
 				ret = ENOENT;
 				goto label_return;
 			}
 		}
 	}
+	assert(ending_nodep != NULL);
+	*ending_nodep = node;
+	ret = 0;
+
+label_return:
+	return(ret);
+}
+
+int
+ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+	const ctl_named_node_t *node;
+
+	if (!ctl_initialized && ctl_init(tsd)) {
+		ret = EAGAIN;
+		goto label_return;
+	}
+
+	ret = ctl_lookupbymib(tsd_tsdn(tsd), &node, mib, miblen);
+	if (ret != 0) {
+		goto label_return;
+	}
 
 	/* Call the ctl function. */
 	if (node && node->ctl) {

From f2e1a5be776de0a4d12c03820bcb5fb0d475d756 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 13 Aug 2020 13:26:44 -0700
Subject: [PATCH 1955/2608] Do not fail on partial ctl path for ctl_nametomib()

We do not fail on partial ctl path when the given `mib` array is
shorter than the given name, and we should keep the behavior the
same in the reverse case, which I feel is also the more natural way.
---
 src/ctl.c           |  8 ++------
 test/unit/mallctl.c | 15 +++++++++++++++
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index 7bb6c1dc..d139e6e0 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1387,7 +1387,8 @@ ctl_lookup(tsdn_t *tsdn, const ctl_named_node_t *starting_node,
 			mibp[i] = (size_t)index;
 		}
 
-		if (node->ctl != NULL) {
+		/* Reached the end? */
+		if (node->ctl != NULL || *dot == '\0') {
 			/* Terminal node. */
 			if (*dot != '\0') {
 				/*
@@ -1403,11 +1404,6 @@ ctl_lookup(tsdn_t *tsdn, const ctl_named_node_t *starting_node,
 		}
 
 		/* Update elm. */
-		if (*dot == '\0') {
-			/* No more elements. */
-			ret = ENOENT;
-			goto label_return;
-		}
 		elm = &dot[1];
 		dot = ((tdot = strchr(elm, '.')) != NULL) ? tdot :
 		    strchr(elm, '\0');
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 72dc0f3d..3cd0c4d2 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -117,6 +117,20 @@ TEST_BEGIN(test_mallctlnametomib_short_mib) {
 }
 TEST_END
 
+TEST_BEGIN(test_mallctlnametomib_short_name) {
+	size_t mib[4];
+	size_t miblen;
+
+	miblen = 4;
+	mib[3] = 42;
+	expect_d_eq(mallctlnametomib("arenas.bin.0", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	expect_zu_eq(miblen, 3, "Unexpected mib output length");
+	expect_zu_eq(mib[3], 42,
+	    "mallctlnametomib() wrote past the end of the input mib");
+}
+TEST_END
+
 TEST_BEGIN(test_mallctl_config) {
 #define TEST_MALLCTL_CONFIG(config, t) do {				\
 	t oldval;							\
@@ -1106,6 +1120,7 @@ main(void) {
 	    test_mallctlbymib_errors,
 	    test_mallctl_read_write,
 	    test_mallctlnametomib_short_mib,
+	    test_mallctlnametomib_short_name,
 	    test_mallctl_config,
 	    test_mallctl_opt,
 	    test_manpage_example,

From 006dd0414e6356ee76218ca6b2db960fc671df16 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 13 Aug 2020 11:28:22 -0700
Subject: [PATCH 1956/2608] Add partial name-to-mib functionality

---
 include/jemalloc/internal/ctl.h |  3 +-
 src/ctl.c                       | 30 +++++++++++++++++
 test/unit/mallctl.c             | 57 +++++++++++++++++++++++++++++++++
 3 files changed, 89 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index a6ae05c1..e124977e 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -98,9 +98,10 @@ typedef struct ctl_arenas_s {
 int ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
     void *newp, size_t newlen);
 int ctl_nametomib(tsd_t *tsd, const char *name, size_t *mibp, size_t *miblenp);
-
 int ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen);
+int ctl_mibnametomib(tsd_t *tsd, size_t *mib, size_t miblen, const char *name,
+    size_t *miblenp);
 bool ctl_boot(void);
 void ctl_prefork(tsdn_t *tsdn);
 void ctl_postfork_parent(tsdn_t *tsdn);
diff --git a/src/ctl.c b/src/ctl.c
index d139e6e0..307fd2d4 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1528,6 +1528,36 @@ label_return:
 	return(ret);
 }
 
+int
+ctl_mibnametomib(tsd_t *tsd, size_t *mib, size_t miblen, const char *name,
+    size_t *miblenp) {
+	int ret;
+	const ctl_named_node_t *node;
+
+	if (!ctl_initialized && ctl_init(tsd)) {
+		ret = EAGAIN;
+		goto label_return;
+	}
+
+	ret = ctl_lookupbymib(tsd_tsdn(tsd), &node, mib, miblen);
+	if (ret != 0) {
+		goto label_return;
+	}
+	if (node == NULL || node->ctl != NULL) {
+		ret = ENOENT;
+		goto label_return;
+	}
+
+	assert(miblenp != NULL);
+	assert(*miblenp >= miblen);
+	*miblenp -= miblen;
+	ret = ctl_lookup(tsd_tsdn(tsd), node, name, NULL, mib + miblen,
+	    miblenp);
+	*miblenp += miblen;
+label_return:
+	return(ret);
+}
+
 bool
 ctl_boot(void) {
 	if (malloc_mutex_init(&ctl_mtx, "ctl", WITNESS_RANK_CTL,
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 3cd0c4d2..7dfc344f 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -1,5 +1,6 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/hook.h"
 #include "jemalloc/internal/util.h"
 
@@ -131,6 +132,61 @@ TEST_BEGIN(test_mallctlnametomib_short_name) {
 }
 TEST_END
 
+TEST_BEGIN(test_mallctlmibnametomib) {
+	size_t mib[4];
+	size_t miblen = 4;
+	uint32_t result, result_ref;
+	size_t len_result = sizeof(uint32_t);
+
+	tsd_t *tsd = tsd_fetch();
+
+	/* Error cases */
+	assert_d_eq(ctl_mibnametomib(tsd, mib, 0, "bob", &miblen), ENOENT, "");
+	assert_zu_eq(miblen, 4, "");
+	assert_d_eq(ctl_mibnametomib(tsd, mib, 0, "9999", &miblen), ENOENT, "");
+	assert_zu_eq(miblen, 4, "");
+
+	/* Valid case. */
+	assert_d_eq(ctl_mibnametomib(tsd, mib, 0, "arenas", &miblen), 0, "");
+	assert_zu_eq(miblen, 1, "");
+	miblen = 4;
+	assert_d_eq(ctl_mibnametomib(tsd, mib, 1, "bin", &miblen), 0, "");
+	assert_zu_eq(miblen, 2, "");
+	expect_d_eq(mallctlbymib(mib, miblen, &result, &len_result, NULL, 0),
+	    ENOENT, "mallctlbymib() should fail on partial path");
+
+	/* Error cases. */
+	miblen = 4;
+	assert_d_eq(ctl_mibnametomib(tsd, mib, 2, "bob", &miblen), ENOENT, "");
+	assert_zu_eq(miblen, 4, "");
+	assert_d_eq(ctl_mibnametomib(tsd, mib, 2, "9999", &miblen), ENOENT, "");
+	assert_zu_eq(miblen, 4, "");
+
+	/* Valid case. */
+	assert_d_eq(ctl_mibnametomib(tsd, mib, 2, "0", &miblen), 0, "");
+	assert_zu_eq(miblen, 3, "");
+	expect_d_eq(mallctlbymib(mib, miblen, &result, &len_result, NULL, 0),
+	    ENOENT, "mallctlbymib() should fail on partial path");
+
+	/* Error cases. */
+	miblen = 4;
+	assert_d_eq(ctl_mibnametomib(tsd, mib, 3, "bob", &miblen), ENOENT, "");
+	assert_zu_eq(miblen, 4, "");
+	assert_d_eq(ctl_mibnametomib(tsd, mib, 3, "9999", &miblen), ENOENT, "");
+	assert_zu_eq(miblen, 4, "");
+
+	/* Valid case. */
+	assert_d_eq(ctl_mibnametomib(tsd, mib, 3, "nregs", &miblen), 0, "");
+	assert_zu_eq(miblen, 4, "");
+	assert_d_eq(mallctlbymib(mib, miblen, &result, &len_result, NULL, 0),
+	    0, "Unexpected mallctlbymib() failure");
+	assert_d_eq(mallctl("arenas.bin.0.nregs", &result_ref, &len_result,
+	    NULL, 0), 0, "Unexpected mallctl() failure");
+	expect_zu_eq(result, result_ref,
+	    "mallctlbymib() and mallctl() returned different result");
+}
+TEST_END
+
 TEST_BEGIN(test_mallctl_config) {
 #define TEST_MALLCTL_CONFIG(config, t) do {				\
 	t oldval;							\
@@ -1121,6 +1177,7 @@ main(void) {
 	    test_mallctl_read_write,
 	    test_mallctlnametomib_short_mib,
 	    test_mallctlnametomib_short_name,
+	    test_mallctlmibnametomib,
 	    test_mallctl_config,
 	    test_mallctl_opt,
 	    test_manpage_example,

From 4557c0a67d8804945935b99b5c493d257be71b43 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 13 Aug 2020 15:26:46 -0700
Subject: [PATCH 1957/2608] Enable ctl on partial mib and partial name

---
 include/jemalloc/internal/ctl.h |  2 ++
 src/ctl.c                       | 45 +++++++++++++++++++++++++++++
 test/unit/mallctl.c             | 51 +++++++++++++++++++++++++++++++++
 3 files changed, 98 insertions(+)

diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index e124977e..174b9f77 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -102,6 +102,8 @@ int ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen);
 int ctl_mibnametomib(tsd_t *tsd, size_t *mib, size_t miblen, const char *name,
     size_t *miblenp);
+int ctl_bymibname(tsd_t *tsd, size_t *mib, size_t miblen, const char *name,
+    size_t *miblenp, void *oldp, size_t *oldlenp, void *newp, size_t newlen);
 bool ctl_boot(void);
 void ctl_prefork(tsdn_t *tsdn);
 void ctl_postfork_parent(tsdn_t *tsdn);
diff --git a/src/ctl.c b/src/ctl.c
index 307fd2d4..0f1f652b 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1558,6 +1558,51 @@ label_return:
 	return(ret);
 }
 
+int
+ctl_bymibname(tsd_t *tsd, size_t *mib, size_t miblen, const char *name,
+    size_t *miblenp, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+	const ctl_named_node_t *node;
+
+	if (!ctl_initialized && ctl_init(tsd)) {
+		ret = EAGAIN;
+		goto label_return;
+	}
+
+	ret = ctl_lookupbymib(tsd_tsdn(tsd), &node, mib, miblen);
+	if (ret != 0) {
+		goto label_return;
+	}
+	if (node == NULL || node->ctl != NULL) {
+		ret = ENOENT;
+		goto label_return;
+	}
+
+	assert(miblenp != NULL);
+	assert(*miblenp >= miblen);
+	*miblenp -= miblen;
+	/*
+	 * The same node supplies the starting node and stores the ending node.
+	 */
+	ret = ctl_lookup(tsd_tsdn(tsd), node, name, &node, mib + miblen,
+	    miblenp);
+	*miblenp += miblen;
+	if (ret != 0) {
+		goto label_return;
+	}
+
+	if (node != NULL && node->ctl) {
+		ret = node->ctl(tsd, mib, *miblenp, oldp, oldlenp, newp,
+		    newlen);
+	} else {
+		/* The name refers to a partial path through the ctl tree. */
+		ret = ENOENT;
+	}
+
+label_return:
+	return(ret);
+}
+
 bool
 ctl_boot(void) {
 	if (malloc_mutex_init(&ctl_mtx, "ctl", WITNESS_RANK_CTL,
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 7dfc344f..3d5b2788 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -187,6 +187,56 @@ TEST_BEGIN(test_mallctlmibnametomib) {
 }
 TEST_END
 
+TEST_BEGIN(test_mallctlbymibname) {
+	size_t mib[4];
+	size_t miblen = 4;
+	uint32_t result, result_ref;
+	size_t len_result = sizeof(uint32_t);
+
+	tsd_t *tsd = tsd_fetch();
+
+	/* Error cases. */
+
+	assert_d_eq(mallctlnametomib("arenas", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	assert_zu_eq(miblen, 1, "");
+
+	miblen = 4;
+	assert_d_eq(ctl_bymibname(tsd, mib, 1, "bin.0", &miblen,
+	    &result, &len_result, NULL, 0), ENOENT, "");
+	miblen = 4;
+	assert_d_eq(ctl_bymibname(tsd, mib, 1, "bin.0.bob", &miblen,
+	    &result, &len_result, NULL, 0), ENOENT, "");
+	assert_zu_eq(miblen, 4, "");
+
+	/* Valid cases. */
+
+	assert_d_eq(mallctl("arenas.bin.0.nregs", &result_ref, &len_result,
+	    NULL, 0), 0, "Unexpected mallctl() failure");
+	miblen = 4;
+
+	assert_d_eq(ctl_bymibname(tsd, mib, 0, "arenas.bin.0.nregs", &miblen,
+	    &result, &len_result, NULL, 0), 0, "");
+	assert_zu_eq(miblen, 4, "");
+	expect_zu_eq(result, result_ref, "Unexpected result");
+
+	assert_d_eq(ctl_bymibname(tsd, mib, 1, "bin.0.nregs", &miblen, &result,
+	    &len_result, NULL, 0), 0, "");
+	assert_zu_eq(miblen, 4, "");
+	expect_zu_eq(result, result_ref, "Unexpected result");
+
+	assert_d_eq(ctl_bymibname(tsd, mib, 2, "0.nregs", &miblen, &result,
+	    &len_result, NULL, 0), 0, "");
+	assert_zu_eq(miblen, 4, "");
+	expect_zu_eq(result, result_ref, "Unexpected result");
+
+	assert_d_eq(ctl_bymibname(tsd, mib, 3, "nregs", &miblen, &result,
+	    &len_result, NULL, 0), 0, "");
+	assert_zu_eq(miblen, 4, "");
+	expect_zu_eq(result, result_ref, "Unexpected result");
+}
+TEST_END
+
 TEST_BEGIN(test_mallctl_config) {
 #define TEST_MALLCTL_CONFIG(config, t) do {				\
 	t oldval;							\
@@ -1178,6 +1228,7 @@ main(void) {
 	    test_mallctlnametomib_short_mib,
 	    test_mallctlnametomib_short_name,
 	    test_mallctlmibnametomib,
+	    test_mallctlbymibname,
 	    test_mallctl_config,
 	    test_mallctl_opt,
 	    test_manpage_example,

From 74bd63b2034c5f25bbc1fdf46095dfed08fdd2a5 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 17 Dec 2020 14:01:56 -0800
Subject: [PATCH 1958/2608] Optimize stats print using partial name-to-mib

---
 include/jemalloc/internal/ctl.h |  19 ++
 src/stats.c                     | 306 +++++++++++++++++---------------
 2 files changed, 181 insertions(+), 144 deletions(-)

diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index 174b9f77..63d27f8a 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -137,4 +137,23 @@ void ctl_mtx_assert_held(tsdn_t *tsdn);
 	}								\
 } while (0)
 
+#define xmallctlmibnametomib(mib, miblen, name, miblenp) do {		\
+	if (ctl_mibnametomib(tsd_fetch(), mib, miblen, name, miblenp)	\
+	    != 0) {							\
+		malloc_write(						\
+		    "<jemalloc>: Failure in ctl_mibnametomib()\n");	\
+		abort();						\
+	}								\
+} while (0)
+
+#define xmallctlbymibname(mib, miblen, name, miblenp, oldp, oldlenp,	\
+    newp, newlen) do {							\
+	if (ctl_bymibname(tsd_fetch(), mib, miblen, name, miblenp,	\
+	    oldp, oldlenp, newp, newlen) != 0) {			\
+		malloc_write(						\
+		    "<jemalloc>: Failure in ctl_bymibname()\n");	\
+		abort();						\
+	}								\
+} while (0)
+
 #endif /* JEMALLOC_INTERNAL_CTL_H */
diff --git a/src/stats.c b/src/stats.c
index ab440c4d..999ba9ff 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -24,6 +24,22 @@ const char *arena_mutex_names[mutex_prof_num_arena_mutexes] = {
 	xmallctl(n, (void *)v, &sz, NULL, 0);				\
 } while (0)
 
+#define CTL_LEAF_PREPARE(mib, miblen, name) do {			\
+	assert(miblen < CTL_MAX_DEPTH);					\
+	size_t miblen_new = CTL_MAX_DEPTH;				\
+	xmallctlmibnametomib(mib, miblen, name, &miblen_new);		\
+	assert(miblen_new > miblen);					\
+} while (0)
+
+#define CTL_LEAF(mib, miblen, leaf, v, t) do {			\
+	assert(miblen < CTL_MAX_DEPTH);					\
+	size_t miblen_new = CTL_MAX_DEPTH;				\
+	size_t sz = sizeof(t);						\
+	xmallctlbymibname(mib, miblen, leaf, &miblen_new, (void *)v,	\
+	    &sz, NULL, 0);						\
+	assert(miblen_new == miblen + 1);				\
+} while (0)
+
 #define CTL_M2_GET(n, i, v, t) do {					\
 	size_t mib[CTL_MAX_DEPTH];					\
 	size_t miblen = sizeof(mib) / sizeof(size_t);			\
@@ -33,26 +49,6 @@ const char *arena_mutex_names[mutex_prof_num_arena_mutexes] = {
 	xmallctlbymib(mib, miblen, (void *)v, &sz, NULL, 0);		\
 } while (0)
 
-#define CTL_M2_M4_GET(n, i, j, v, t) do {				\
-	size_t mib[CTL_MAX_DEPTH];					\
-	size_t miblen = sizeof(mib) / sizeof(size_t);			\
-	size_t sz = sizeof(t);						\
-	xmallctlnametomib(n, mib, &miblen);				\
-	mib[2] = (i);							\
-	mib[4] = (j);							\
-	xmallctlbymib(mib, miblen, (void *)v, &sz, NULL, 0);		\
-} while (0)
-
-#define CTL_M2_M5_GET(n, i, j, v, t) do {				\
-	size_t mib[CTL_MAX_DEPTH];					\
-	size_t miblen = sizeof(mib) / sizeof(size_t);			\
-	size_t sz = sizeof(t);						\
-	xmallctlnametomib(n, mib, &miblen);				\
-	mib[2] = (i);							\
-	mib[5] = (j);							\
-	xmallctlbymib(mib, miblen, (void *)v, &sz, NULL, 0);		\
-} while (0)
-
 /******************************************************************************/
 /* Data. */
 
@@ -107,13 +103,6 @@ get_rate_str(uint64_t dividend, uint64_t divisor, char str[6]) {
 	return false;
 }
 
-#define MUTEX_CTL_STR_MAX_LENGTH 128
-static void
-gen_mutex_ctl_str(char *str, size_t buf_len, const char *prefix,
-    const char *mutex, const char *counter) {
-	malloc_snprintf(str, buf_len, "stats.%s.%s.%s", prefix, mutex, counter);
-}
-
 static void
 mutex_stats_init_cols(emitter_row_t *row, const char *table_name,
     emitter_col_t *name,
@@ -150,11 +139,13 @@ mutex_stats_init_cols(emitter_row_t *row, const char *table_name,
 }
 
 static void
-mutex_stats_read_global(const char *name, emitter_col_t *col_name,
+mutex_stats_read_global(size_t mib[], size_t miblen, const char *name,
+    emitter_col_t *col_name,
     emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters],
     emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters],
     uint64_t uptime) {
-	char cmd[MUTEX_CTL_STR_MAX_LENGTH];
+	CTL_LEAF_PREPARE(mib, miblen, name);
+	size_t miblen_name = miblen + 1;
 
 	col_name->str_val = name;
 
@@ -165,44 +156,7 @@ mutex_stats_read_global(const char *name, emitter_col_t *col_name,
 	dst = &col_##counter_type[mutex_counter_##counter];		\
 	dst->type = EMITTER_TYPE_##counter_type;			\
 	if (!derived) {							\
-		gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,	\
-		    "mutexes", name, #counter);				\
-		CTL_GET(cmd, (counter_type *)&dst->bool_val,		\
-		    counter_type);					\
-	} else {							\
-		emitter_col_t *base =					\
-		    &col_##counter_type[mutex_counter_##base_counter];	\
-		dst->counter_type##_val =				\
-		    (counter_type)rate_per_second(			\
-		    base->counter_type##_val, uptime);			\
-	}
-	MUTEX_PROF_COUNTERS
-#undef OP
-#undef EMITTER_TYPE_uint32_t
-#undef EMITTER_TYPE_uint64_t
-}
-
-static void
-mutex_stats_read_arena(unsigned arena_ind, mutex_prof_arena_ind_t mutex_ind,
-    const char *name, emitter_col_t *col_name,
-    emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters],
-    emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters],
-    uint64_t uptime) {
-	char cmd[MUTEX_CTL_STR_MAX_LENGTH];
-
-	col_name->str_val = name;
-
-	emitter_col_t *dst;
-#define EMITTER_TYPE_uint32_t emitter_type_uint32
-#define EMITTER_TYPE_uint64_t emitter_type_uint64
-#define OP(counter, counter_type, human, derived, base_counter)		\
-	dst = &col_##counter_type[mutex_counter_##counter];		\
-	dst->type = EMITTER_TYPE_##counter_type;			\
-	if (!derived) {							\
-		gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,        \
-		    "arenas.0.mutexes", arena_mutex_names[mutex_ind],	\
-		    #counter);						\
-		CTL_M2_GET(cmd, arena_ind,				\
+		CTL_LEAF(mib, miblen_name, #counter,			\
 		    (counter_type *)&dst->bool_val, counter_type);	\
 	} else {							\
 		emitter_col_t *base =					\
@@ -218,11 +172,46 @@ mutex_stats_read_arena(unsigned arena_ind, mutex_prof_arena_ind_t mutex_ind,
 }
 
 static void
-mutex_stats_read_arena_bin(unsigned arena_ind, unsigned bin_ind,
+mutex_stats_read_arena(size_t mib[], size_t miblen, const char *name,
+    emitter_col_t *col_name,
     emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters],
     emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters],
     uint64_t uptime) {
-	char cmd[MUTEX_CTL_STR_MAX_LENGTH];
+	CTL_LEAF_PREPARE(mib, miblen, name);
+	size_t miblen_name = miblen + 1;
+
+	col_name->str_val = name;
+
+	emitter_col_t *dst;
+#define EMITTER_TYPE_uint32_t emitter_type_uint32
+#define EMITTER_TYPE_uint64_t emitter_type_uint64
+#define OP(counter, counter_type, human, derived, base_counter)		\
+	dst = &col_##counter_type[mutex_counter_##counter];		\
+	dst->type = EMITTER_TYPE_##counter_type;			\
+	if (!derived) {							\
+		CTL_LEAF(mib, miblen_name, #counter,			\
+		    (counter_type *)&dst->bool_val, counter_type);	\
+	} else {							\
+		emitter_col_t *base =					\
+		    &col_##counter_type[mutex_counter_##base_counter];	\
+		dst->counter_type##_val =				\
+		    (counter_type)rate_per_second(			\
+		    base->counter_type##_val, uptime);			\
+	}
+	MUTEX_PROF_COUNTERS
+#undef OP
+#undef EMITTER_TYPE_uint32_t
+#undef EMITTER_TYPE_uint64_t
+}
+
+static void
+mutex_stats_read_arena_bin(size_t mib[], size_t miblen,
+    emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters],
+    emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters],
+    uint64_t uptime) {
+	CTL_LEAF_PREPARE(mib, miblen, "mutex");
+	size_t miblen_mutex = miblen + 1;
+
 	emitter_col_t *dst;
 
 #define EMITTER_TYPE_uint32_t emitter_type_uint32
@@ -231,9 +220,7 @@ mutex_stats_read_arena_bin(unsigned arena_ind, unsigned bin_ind,
 	dst = &col_##counter_type[mutex_counter_##counter];		\
 	dst->type = EMITTER_TYPE_##counter_type;			\
 	if (!derived) {							\
-		gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH,	\
-		    "arenas.0.bins.0","mutex", #counter);		\
-		CTL_M2_M4_GET(cmd, arena_ind, bin_ind,			\
+		CTL_LEAF(mib, miblen_mutex, #counter,			\
 		    (counter_type *)&dst->bool_val, counter_type);	\
 	} else {							\
 		emitter_col_t *base =					\
@@ -362,6 +349,14 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t upti
 	emitter_table_row(emitter, &header_row);
 	emitter_json_array_kv_begin(emitter, "bins");
 
+	size_t stats_arenas_mib[CTL_MAX_DEPTH];
+	CTL_LEAF_PREPARE(stats_arenas_mib, 0, "stats.arenas");
+	stats_arenas_mib[2] = i;
+	CTL_LEAF_PREPARE(stats_arenas_mib, 3, "bins");
+
+	size_t arenas_bin_mib[CTL_MAX_DEPTH];
+	CTL_LEAF_PREPARE(arenas_bin_mib, 0, "arenas.bin");
+
 	for (j = 0, in_gap = false; j < nbins; j++) {
 		uint64_t nslabs;
 		size_t reg_size, slab_size, curregs;
@@ -371,8 +366,11 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t upti
 		uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes;
 		uint64_t nreslabs;
 
-		CTL_M2_M4_GET("stats.arenas.0.bins.0.nslabs", i, j, &nslabs,
-		    uint64_t);
+		stats_arenas_mib[4] = j;
+		arenas_bin_mib[2] = j;
+
+		CTL_LEAF(stats_arenas_mib, 5, "nslabs", &nslabs, uint64_t);
+
 		in_gap_prev = in_gap;
 		in_gap = (nslabs == 0);
 
@@ -381,33 +379,25 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t upti
 			    "                     ---\n");
 		}
 
-		CTL_M2_GET("arenas.bin.0.size", j, &reg_size, size_t);
-		CTL_M2_GET("arenas.bin.0.nregs", j, &nregs, uint32_t);
-		CTL_M2_GET("arenas.bin.0.slab_size", j, &slab_size, size_t);
-		CTL_M2_GET("arenas.bin.0.nshards", j, &nshards, uint32_t);
-
-		CTL_M2_M4_GET("stats.arenas.0.bins.0.nmalloc", i, j, &nmalloc,
+		CTL_LEAF(arenas_bin_mib, 3, "size", &reg_size, size_t);
+		CTL_LEAF(arenas_bin_mib, 3, "nregs", &nregs, uint32_t);
+		CTL_LEAF(arenas_bin_mib, 3, "slab_size", &slab_size, size_t);
+		CTL_LEAF(arenas_bin_mib, 3, "nshards", &nshards, uint32_t);
+		CTL_LEAF(stats_arenas_mib, 5, "nmalloc", &nmalloc, uint64_t);
+		CTL_LEAF(stats_arenas_mib, 5, "ndalloc", &ndalloc, uint64_t);
+		CTL_LEAF(stats_arenas_mib, 5, "curregs", &curregs, size_t);
+		CTL_LEAF(stats_arenas_mib, 5, "nrequests", &nrequests,
 		    uint64_t);
-		CTL_M2_M4_GET("stats.arenas.0.bins.0.ndalloc", i, j, &ndalloc,
-		    uint64_t);
-		CTL_M2_M4_GET("stats.arenas.0.bins.0.curregs", i, j, &curregs,
-		    size_t);
-		CTL_M2_M4_GET("stats.arenas.0.bins.0.nrequests", i, j,
-		    &nrequests, uint64_t);
-		CTL_M2_M4_GET("stats.arenas.0.bins.0.nfills", i, j, &nfills,
-		    uint64_t);
-		CTL_M2_M4_GET("stats.arenas.0.bins.0.nflushes", i, j, &nflushes,
-		    uint64_t);
-		CTL_M2_M4_GET("stats.arenas.0.bins.0.nreslabs", i, j, &nreslabs,
-		    uint64_t);
-		CTL_M2_M4_GET("stats.arenas.0.bins.0.curslabs", i, j, &curslabs,
-		    size_t);
-		CTL_M2_M4_GET("stats.arenas.0.bins.0.nonfull_slabs", i, j, &nonfull_slabs,
+		CTL_LEAF(stats_arenas_mib, 5, "nfills", &nfills, uint64_t);
+		CTL_LEAF(stats_arenas_mib, 5, "nflushes", &nflushes, uint64_t);
+		CTL_LEAF(stats_arenas_mib, 5, "nreslabs", &nreslabs, uint64_t);
+		CTL_LEAF(stats_arenas_mib, 5, "curslabs", &curslabs, size_t);
+		CTL_LEAF(stats_arenas_mib, 5, "nonfull_slabs", &nonfull_slabs,
 		    size_t);
 
 		if (mutex) {
-			mutex_stats_read_arena_bin(i, j, col_mutex64,
-			    col_mutex32, uptime);
+			mutex_stats_read_arena_bin(stats_arenas_mib, 5,
+			    col_mutex64, col_mutex32, uptime);
 		}
 
 		emitter_json_object_begin(emitter);
@@ -524,16 +514,26 @@ stats_arena_lextents_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	emitter_table_row(emitter, &header_row);
 	emitter_json_array_kv_begin(emitter, "lextents");
 
+	size_t stats_arenas_mib[CTL_MAX_DEPTH];
+	CTL_LEAF_PREPARE(stats_arenas_mib, 0, "stats.arenas");
+	stats_arenas_mib[2] = i;
+	CTL_LEAF_PREPARE(stats_arenas_mib, 3, "lextents");
+
+	size_t arenas_lextent_mib[CTL_MAX_DEPTH];
+	CTL_LEAF_PREPARE(arenas_lextent_mib, 0, "arenas.lextent");
+
 	for (j = 0, in_gap = false; j < nlextents; j++) {
 		uint64_t nmalloc, ndalloc, nrequests;
 		size_t lextent_size, curlextents;
 
-		CTL_M2_M4_GET("stats.arenas.0.lextents.0.nmalloc", i, j,
-		    &nmalloc, uint64_t);
-		CTL_M2_M4_GET("stats.arenas.0.lextents.0.ndalloc", i, j,
-		    &ndalloc, uint64_t);
-		CTL_M2_M4_GET("stats.arenas.0.lextents.0.nrequests", i, j,
-		    &nrequests, uint64_t);
+		stats_arenas_mib[4] = j;
+		arenas_lextent_mib[2] = j;
+
+		CTL_LEAF(stats_arenas_mib, 5, "nmalloc", &nmalloc, uint64_t);
+		CTL_LEAF(stats_arenas_mib, 5, "ndalloc", &ndalloc, uint64_t);
+		CTL_LEAF(stats_arenas_mib, 5, "nrequests", &nrequests,
+		    uint64_t);
+
 		in_gap_prev = in_gap;
 		in_gap = (nrequests == 0);
 
@@ -542,9 +542,9 @@ stats_arena_lextents_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 			    "                     ---\n");
 		}
 
-		CTL_M2_GET("arenas.lextent.0.size", j, &lextent_size, size_t);
-		CTL_M2_M4_GET("stats.arenas.0.lextents.0.curlextents", i, j,
-		    &curlextents, size_t);
+		CTL_LEAF(arenas_lextent_mib, 3, "size", &lextent_size, size_t);
+		CTL_LEAF(stats_arenas_mib, 5, "curlextents", &curlextents,
+		    size_t);
 
 		emitter_json_object_begin(emitter);
 		emitter_json_kv(emitter, "curlextents", emitter_type_size,
@@ -598,22 +598,27 @@ stats_arena_extents_print(emitter_t *emitter, unsigned i) {
 	emitter_table_row(emitter, &header_row);
 	emitter_json_array_kv_begin(emitter, "extents");
 
+	size_t stats_arenas_mib[CTL_MAX_DEPTH];
+	CTL_LEAF_PREPARE(stats_arenas_mib, 0, "stats.arenas");
+	stats_arenas_mib[2] = i;
+	CTL_LEAF_PREPARE(stats_arenas_mib, 3, "extents");
+
 	in_gap = false;
 	for (j = 0; j < SC_NPSIZES; j++) {
 		size_t ndirty, nmuzzy, nretained, total, dirty_bytes,
 		    muzzy_bytes, retained_bytes, total_bytes;
-		CTL_M2_M4_GET("stats.arenas.0.extents.0.ndirty", i, j,
-		    &ndirty, size_t);
-		CTL_M2_M4_GET("stats.arenas.0.extents.0.nmuzzy", i, j,
-		    &nmuzzy, size_t);
-		CTL_M2_M4_GET("stats.arenas.0.extents.0.nretained", i, j,
-		    &nretained, size_t);
-		CTL_M2_M4_GET("stats.arenas.0.extents.0.dirty_bytes", i, j,
-		    &dirty_bytes, size_t);
-		CTL_M2_M4_GET("stats.arenas.0.extents.0.muzzy_bytes", i, j,
-		    &muzzy_bytes, size_t);
-		CTL_M2_M4_GET("stats.arenas.0.extents.0.retained_bytes", i, j,
+		stats_arenas_mib[4] = j;
+
+		CTL_LEAF(stats_arenas_mib, 5, "ndirty", &ndirty, size_t);
+		CTL_LEAF(stats_arenas_mib, 5, "nmuzzy", &nmuzzy, size_t);
+		CTL_LEAF(stats_arenas_mib, 5, "nretained", &nretained, size_t);
+		CTL_LEAF(stats_arenas_mib, 5, "dirty_bytes", &dirty_bytes,
+		    size_t);
+		CTL_LEAF(stats_arenas_mib, 5, "muzzy_bytes", &muzzy_bytes,
+		    size_t);
+		CTL_LEAF(stats_arenas_mib, 5, "retained_bytes",
 		    &retained_bytes, size_t);
+
 		total = ndirty + nmuzzy + nretained;
 		total_bytes = dirty_bytes + muzzy_bytes + retained_bytes;
 
@@ -737,29 +742,29 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	COL_HDR(row, nactive_nonhuge, NULL, right, 20, size)
 	COL_HDR(row, ninactive_nonhuge, NULL, right, 20, size)
 
+	size_t stats_arenas_mib[CTL_MAX_DEPTH];
+	CTL_LEAF_PREPARE(stats_arenas_mib, 0, "stats.arenas");
+	stats_arenas_mib[2] = i;
+	CTL_LEAF_PREPARE(stats_arenas_mib, 3, "hpa_shard.nonfull_slabs");
+
 	emitter_table_row(emitter, &header_row);
 	emitter_json_array_kv_begin(emitter, "nonfull_slabs");
 	bool in_gap = false;
 	for (pszind_t j = 0; j < PSSET_NPSIZES; j++) {
-		CTL_M2_M5_GET(
-		    "stats.arenas.0.hpa_shard.nonfull_slabs.0.npageslabs_huge",
-		    i, j, &npageslabs_huge, size_t);
-		CTL_M2_M5_GET(
-		    "stats.arenas.0.hpa_shard.nonfull_slabs.0.nactive_huge",
-		    i, j, &nactive_huge, size_t);
-		CTL_M2_M5_GET(
-		    "stats.arenas.0.hpa_shard.nonfull_slabs.0.ninactive_huge",
-		    i, j, &ninactive_huge, size_t);
+		stats_arenas_mib[5] = j;
 
-		CTL_M2_M5_GET(
-		    "stats.arenas.0.hpa_shard.nonfull_slabs.0.npageslabs_nonhuge",
-		    i, j, &npageslabs_nonhuge, size_t);
-		CTL_M2_M5_GET(
-		    "stats.arenas.0.hpa_shard.nonfull_slabs.0.nactive_nonhuge",
-		    i, j, &nactive_nonhuge, size_t);
-		CTL_M2_M5_GET(
-		    "stats.arenas.0.hpa_shard.nonfull_slabs.0.ninactive_nonhuge",
-		    i, j, &ninactive_nonhuge, size_t);
+		CTL_LEAF(stats_arenas_mib, 6, "npageslabs_huge",
+		    &npageslabs_huge, size_t);
+		CTL_LEAF(stats_arenas_mib, 6, "nactive_huge",
+		    &nactive_huge, size_t);
+		CTL_LEAF(stats_arenas_mib, 6, "ninactive_huge",
+		    &ninactive_huge, size_t);
+		CTL_LEAF(stats_arenas_mib, 6, "npageslabs_nonhuge",
+		    &npageslabs_nonhuge, size_t);
+		CTL_LEAF(stats_arenas_mib, 6, "nactive_nonhuge",
+		    &nactive_nonhuge, size_t);
+		CTL_LEAF(stats_arenas_mib, 6, "ninactive_nonhuge",
+		    &ninactive_nonhuge, size_t);
 
 		bool in_gap_prev = in_gap;
 		in_gap = (npageslabs_huge == 0 && npageslabs_nonhuge == 0);
@@ -812,12 +817,17 @@ stats_arena_mutexes_print(emitter_t *emitter, unsigned arena_ind, uint64_t uptim
 	emitter_json_object_kv_begin(emitter, "mutexes");
 	emitter_table_row(emitter, &row);
 
+	size_t stats_arenas_mib[CTL_MAX_DEPTH];
+	CTL_LEAF_PREPARE(stats_arenas_mib, 0, "stats.arenas");
+	stats_arenas_mib[2] = arena_ind;
+	CTL_LEAF_PREPARE(stats_arenas_mib, 3, "mutexes");
+
 	for (mutex_prof_arena_ind_t i = 0; i < mutex_prof_num_arena_mutexes;
 	    i++) {
 		const char *name = arena_mutex_names[i];
 		emitter_json_object_kv_begin(emitter, name);
-		mutex_stats_read_arena(arena_ind, i, name, &col_name, col64,
-		    col32, uptime);
+		mutex_stats_read_arena(stats_arenas_mib, 4, name, &col_name,
+		    col64, col32, uptime);
 		mutex_stats_emit(emitter, &row, col64, col32);
 		emitter_json_object_end(emitter); /* Close the mutex dict. */
 	}
@@ -1376,22 +1386,25 @@ stats_general_print(emitter_t *emitter) {
 	 */
 	if (emitter_outputs_json(emitter)) {
 		emitter_json_array_kv_begin(emitter, "bin");
+		size_t arenas_bin_mib[CTL_MAX_DEPTH];
+		CTL_LEAF_PREPARE(arenas_bin_mib, 0, "arenas.bin");
 		for (unsigned i = 0; i < nbins; i++) {
+			arenas_bin_mib[2] = i;
 			emitter_json_object_begin(emitter);
 
-			CTL_M2_GET("arenas.bin.0.size", i, &sv, size_t);
+			CTL_LEAF(arenas_bin_mib, 3, "size", &sv, size_t);
 			emitter_json_kv(emitter, "size", emitter_type_size,
 			    &sv);
 
-			CTL_M2_GET("arenas.bin.0.nregs", i, &u32v, uint32_t);
+			CTL_LEAF(arenas_bin_mib, 3, "nregs", &u32v, uint32_t);
 			emitter_json_kv(emitter, "nregs", emitter_type_uint32,
 			    &u32v);
 
-			CTL_M2_GET("arenas.bin.0.slab_size", i, &sv, size_t);
+			CTL_LEAF(arenas_bin_mib, 3, "slab_size", &sv, size_t);
 			emitter_json_kv(emitter, "slab_size", emitter_type_size,
 			    &sv);
 
-			CTL_M2_GET("arenas.bin.0.nshards", i, &u32v, uint32_t);
+			CTL_LEAF(arenas_bin_mib, 3, "nshards", &u32v, uint32_t);
 			emitter_json_kv(emitter, "nshards", emitter_type_uint32,
 			    &u32v);
 
@@ -1407,10 +1420,13 @@ stats_general_print(emitter_t *emitter) {
 
 	if (emitter_outputs_json(emitter)) {
 		emitter_json_array_kv_begin(emitter, "lextent");
+		size_t arenas_lextent_mib[CTL_MAX_DEPTH];
+		CTL_LEAF_PREPARE(arenas_lextent_mib, 0, "arenas.lextent");
 		for (unsigned i = 0; i < nlextents; i++) {
+			arenas_lextent_mib[2] = i;
 			emitter_json_object_begin(emitter);
 
-			CTL_M2_GET("arenas.lextent.0.size", i, &sv, size_t);
+			CTL_LEAF(arenas_lextent_mib, 3, "size", &sv, size_t);
 			emitter_json_kv(emitter, "size", emitter_type_size,
 			    &sv);
 
@@ -1510,9 +1526,11 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 
 		CTL_M2_GET("stats.arenas.0.uptime", 0, &uptime, uint64_t);
 
+		size_t stats_mutexes_mib[CTL_MAX_DEPTH];
+		CTL_LEAF_PREPARE(stats_mutexes_mib, 0, "stats.mutexes");
 		for (int i = 0; i < mutex_prof_num_global_mutexes; i++) {
-			mutex_stats_read_global(global_mutex_names[i], &name,
-			    col64, col32, uptime);
+			mutex_stats_read_global(stats_mutexes_mib, 2,
+			    global_mutex_names[i], &name, col64, col32, uptime);
 			emitter_json_object_kv_begin(emitter, global_mutex_names[i]);
 			mutex_stats_emit(emitter, &row, col64, col32);
 			emitter_json_object_end(emitter);

From ea013d8fa4eaa0a3d1fa1c15e8506a32f4e70475 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 25 Aug 2020 11:31:58 -0700
Subject: [PATCH 1959/2608] Enforce realloc sizing stability

---
 src/jemalloc.c | 34 ++++++++++------------------------
 1 file changed, 10 insertions(+), 24 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index c2817cf1..8384cfca 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3391,18 +3391,18 @@ irallocx_prof_sample(tsdn_t *tsdn, void *old_ptr, size_t old_usize,
 
 JEMALLOC_ALWAYS_INLINE void *
 irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
-    size_t alignment, size_t *usize, bool zero, tcache_t *tcache,
+    size_t alignment, size_t usize, bool zero, tcache_t *tcache,
     arena_t *arena, emap_alloc_ctx_t *alloc_ctx,
     hook_ralloc_args_t *hook_args) {
 	prof_info_t old_prof_info;
 	prof_info_get_and_reset_recent(tsd, old_ptr, alloc_ctx, &old_prof_info);
 	bool prof_active = prof_active_get_unlocked();
-	bool sample_event = te_prof_sample_event_lookahead(tsd, *usize);
+	bool sample_event = te_prof_sample_event_lookahead(tsd, usize);
 	prof_tctx_t *tctx = prof_alloc_prep(tsd, prof_active, sample_event);
 	void *p;
 	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
 		p = irallocx_prof_sample(tsd_tsdn(tsd), old_ptr, old_usize,
-		    *usize, alignment, zero, tcache, arena, tctx, hook_args);
+		    usize, alignment, zero, tcache, arena, tctx, hook_args);
 	} else {
 		p = iralloct(tsd_tsdn(tsd), old_ptr, old_usize, size, alignment,
 		    zero, tcache, arena, hook_args);
@@ -3411,22 +3411,8 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 		prof_alloc_rollback(tsd, tctx);
 		return NULL;
 	}
-
-	if (p == old_ptr && alignment != 0) {
-		/*
-		 * The allocation did not move, so it is possible that the size
-		 * class is smaller than would guarantee the requested
-		 * alignment, and that the alignment constraint was
-		 * serendipitously satisfied.  Additionally, old_usize may not
-		 * be the same as the current usize because of in-place large
-		 * reallocation.  Therefore, query the actual value of usize.
-		 */
-		assert(*usize >= isalloc(tsd_tsdn(tsd), p));
-		*usize = isalloc(tsd_tsdn(tsd), p);
-	}
-
-	sample_event = te_prof_sample_event_lookahead(tsd, *usize);
-	prof_realloc(tsd, p, size, *usize, tctx, prof_active, old_ptr,
+	assert(usize == isalloc(tsd_tsdn(tsd), p));
+	prof_realloc(tsd, p, size, usize, tctx, prof_active, old_ptr,
 	    old_usize, &old_prof_info, sample_event);
 
 	return p;
@@ -3464,14 +3450,14 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 	assert(alloc_ctx.szind != SC_NSIZES);
 	old_usize = sz_index2size(alloc_ctx.szind);
 	assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
+	if (aligned_usize_get(size, alignment, &usize, NULL, false)) {
+		goto label_oom;
+	}
 
 	hook_ralloc_args_t hook_args = {is_realloc, {(uintptr_t)ptr, size,
 		flags, 0}};
 	if (config_prof && opt_prof) {
-		if (aligned_usize_get(size, alignment, &usize, NULL, false)) {
-			goto label_oom;
-		}
-		p = irallocx_prof(tsd, ptr, old_usize, size, alignment, &usize,
+		p = irallocx_prof(tsd, ptr, old_usize, size, alignment, usize,
 		    zero, tcache, arena, &alloc_ctx, &hook_args);
 		if (unlikely(p == NULL)) {
 			goto label_oom;
@@ -3482,7 +3468,7 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 		if (unlikely(p == NULL)) {
 			goto label_oom;
 		}
-		usize = isalloc(tsd_tsdn(tsd), p);
+		assert(usize == isalloc(tsd_tsdn(tsd), p));
 	}
 	assert(alignment == 0 || ((uintptr_t)p & (alignment - 1)) == ZU(0));
 	thread_alloc_event(tsd, usize);

From 6c5a3a24dd03e98c8b78178496c2a9756ec1490a Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 24 Aug 2020 15:15:27 -0700
Subject: [PATCH 1960/2608] Omit bin stats rows with no data

---
 src/stats.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/stats.c b/src/stats.c
index 999ba9ff..d5b94fb1 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -379,6 +379,10 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t upti
 			    "                     ---\n");
 		}
 
+		if (in_gap && !emitter_outputs_json(emitter)) {
+			continue;
+		}
+
 		CTL_LEAF(arenas_bin_mib, 3, "size", &reg_size, size_t);
 		CTL_LEAF(arenas_bin_mib, 3, "nregs", &nregs, uint32_t);
 		CTL_LEAF(arenas_bin_mib, 3, "slab_size", &slab_size, size_t);

From 22d62d8cbd873fd3b2acb4bfccf6a06cd2e0d2e7 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 18 Dec 2020 11:06:22 -0800
Subject: [PATCH 1961/2608] Handle ending gap properly for HPA stats

---
 src/stats.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/stats.c b/src/stats.c
index d5b94fb1..7c2707e2 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -806,6 +806,9 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	}
 	emitter_json_array_end(emitter); /* End "nonfull_slabs" */
 	emitter_json_object_end(emitter); /* End "hpa_shard" */
+	if (in_gap) {
+		emitter_table_printf(emitter, "                     ---\n");
+	}
 }
 
 static void

From 8a56d6b6369487a9595dff69c28ccc88073d643e Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 28 Dec 2020 14:30:43 -0800
Subject: [PATCH 1962/2608] Add last-N mutex stats

---
 include/jemalloc/internal/mutex_prof.h | 4 +++-
 src/ctl.c                              | 8 ++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/mutex_prof.h b/include/jemalloc/internal/mutex_prof.h
index 3759daaf..a13e285e 100644
--- a/include/jemalloc/internal/mutex_prof.h
+++ b/include/jemalloc/internal/mutex_prof.h
@@ -11,7 +11,9 @@
     OP(ctl)								\
     OP(prof)								\
     OP(prof_thds_data)							\
-    OP(prof_dump)
+    OP(prof_dump)							\
+    OP(prof_recent_alloc)						\
+    OP(prof_recent_dump)
 
 typedef enum {
 #define OP(mtx) global_prof_mutex_##mtx,
diff --git a/src/ctl.c b/src/ctl.c
index 0f1f652b..8f6aff3e 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1232,6 +1232,12 @@ ctl_refresh(tsdn_t *tsdn) {
 			    global_prof_mutex_prof_thds_data, tdatas_mtx);
 			READ_GLOBAL_MUTEX_PROF_DATA(
 			    global_prof_mutex_prof_dump, prof_dump_mtx);
+			READ_GLOBAL_MUTEX_PROF_DATA(
+			    global_prof_mutex_prof_recent_alloc,
+			    prof_recent_alloc_mtx);
+			READ_GLOBAL_MUTEX_PROF_DATA(
+			    global_prof_mutex_prof_recent_dump,
+			    prof_recent_dump_mtx);
 		}
 		if (have_background_thread) {
 			READ_GLOBAL_MUTEX_PROF_DATA(
@@ -3344,6 +3350,8 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
 		MUTEX_PROF_RESET(bt2gctx_mtx);
 		MUTEX_PROF_RESET(tdatas_mtx);
 		MUTEX_PROF_RESET(prof_dump_mtx);
+		MUTEX_PROF_RESET(prof_recent_alloc_mtx);
+		MUTEX_PROF_RESET(prof_recent_dump_mtx);
 	}
 
 	/* Per arena mutexes. */

From b35ac00d58529b266598322de2529414c91909cd Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Tue, 25 Aug 2020 14:30:37 -0700
Subject: [PATCH 1963/2608] Do not bump to large size for page aligned request

---
 include/jemalloc/internal/sz.h |  2 +-
 src/arena.c                    | 13 ++++++++++---
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index b0941169..91940ccd 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -288,7 +288,7 @@ sz_sa2u(size_t size, size_t alignment) {
 	assert(alignment != 0 && ((alignment - 1) & alignment) == 0);
 
 	/* Try for a small size class. */
-	if (size <= SC_SMALL_MAXCLASS && alignment < PAGE) {
+	if (size <= SC_SMALL_MAXCLASS && alignment <= PAGE) {
 		/*
 		 * Round size up to the nearest multiple of alignment.
 		 *
diff --git a/src/arena.c b/src/arena.c
index 209eb347..6a062de2 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1049,10 +1049,17 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
     bool zero, tcache_t *tcache) {
 	void *ret;
 
-	if (usize <= SC_SMALL_MAXCLASS
-	    && (alignment < PAGE
-	    || (alignment == PAGE && (usize & PAGE_MASK) == 0))) {
+	if (usize <= SC_SMALL_MAXCLASS) {
 		/* Small; alignment doesn't require special slab placement. */
+
+		/* usize should be a result of sz_sa2u() */
+		assert((usize & (alignment - 1)) == 0);
+
+		/*
+		 * Small usize can't come from an alignment larger than a page.
+		 */
+		assert(alignment <= PAGE);
+
 		ret = arena_malloc(tsdn, arena, usize, sz_size2index(usize),
 		    zero, tcache, true);
 	} else {

From 526180b76d9e54f40d0fb9e58b0647a21a7e5f77 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 17 Dec 2020 17:14:30 -0800
Subject: [PATCH 1964/2608] Extent.c: Avoid an rtree NULL-check.

The edge case in which pages_map returns (void *)PAGE can trigger an incorrect
assertion failure.  Avoid it.
---
 src/extent.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index 378bc733..c41f17ce 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -893,8 +893,22 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		}
 
 		/* Try to coalesce backward. */
-		edata_t *prev = emap_lock_edata_from_addr(tsdn, pac->emap,
-		    edata_before_get(edata), inactive_only);
+		edata_t *prev = NULL;
+		if (edata_before_get(edata) != NULL) {
+			/*
+			 * This is subtle; the rtree code asserts that its input
+			 * pointer is non-NULL, and this is a useful thing to
+			 * check.  But it's possible that edata corresponds to
+			 * an address of (void *)PAGE (in practice, this has
+			 * only been observed on FreeBSD when address-space
+			 * randomization is on, but it could in principle happen
+			 * anywhere).  In this case, edata_before_get(edata) is
+			 * NULL, triggering the assert.
+			 */
+			prev = emap_lock_edata_from_addr(tsdn, pac->emap,
+			    edata_before_get(edata), inactive_only);
+
+		}
 		if (prev != NULL) {
 			bool can_coalesce = extent_can_coalesce(ecache, edata,
 			    prev);

From 83cad746aeb7ed68bedec501b4cb6c0eff438c11 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 17 Dec 2020 11:18:21 -0800
Subject: [PATCH 1965/2608] prof_log: cassert(config_prof) in public functions

This lets the compiler infer that the code is dead in builds where profiling is
enabled, saving on space there.
---
 src/prof_log.c       | 11 +++++++++++
 test/unit/prof_log.c |  4 +++-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/prof_log.c b/src/prof_log.c
index 3a653fb4..44658211 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -202,6 +202,7 @@ prof_log_thr_index(tsd_t *tsd, uint64_t thr_uid, const char *name) {
 
 void
 prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
+	cassert(config_prof);
 	prof_tctx_t *tctx = prof_info->alloc_tctx;
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
 
@@ -307,6 +308,7 @@ prof_thr_node_keycomp(const void *k1, const void *k2) {
 /* Used in unit tests. */
 size_t
 prof_log_bt_count(void) {
+	cassert(config_prof);
 	size_t cnt = 0;
 	prof_bt_node_t *node = log_bt_first;
 	while (node != NULL) {
@@ -319,6 +321,7 @@ prof_log_bt_count(void) {
 /* Used in unit tests. */
 size_t
 prof_log_alloc_count(void) {
+	cassert(config_prof);
 	size_t cnt = 0;
 	prof_alloc_node_t *node = log_alloc_first;
 	while (node != NULL) {
@@ -331,6 +334,7 @@ prof_log_alloc_count(void) {
 /* Used in unit tests. */
 size_t
 prof_log_thr_count(void) {
+	cassert(config_prof);
 	size_t cnt = 0;
 	prof_thr_node_t *node = log_thr_first;
 	while (node != NULL) {
@@ -343,12 +347,14 @@ prof_log_thr_count(void) {
 /* Used in unit tests. */
 bool
 prof_log_is_logging(void) {
+	cassert(config_prof);
 	return prof_logging_state == prof_logging_state_started;
 }
 
 /* Used in unit tests. */
 bool
 prof_log_rep_check(void) {
+	cassert(config_prof);
 	if (prof_logging_state == prof_logging_state_stopped
 	    && log_tables_initialized) {
 		return true;
@@ -401,11 +407,14 @@ prof_log_rep_check(void) {
 /* Used in unit tests. */
 void
 prof_log_dummy_set(bool new_value) {
+	cassert(config_prof);
 	prof_log_dummy = new_value;
 }
 
 bool
 prof_log_start(tsdn_t *tsdn, const char *filename) {
+	cassert(config_prof);
+
 	if (!opt_prof) {
 		return true;
 	}
@@ -586,6 +595,7 @@ prof_log_emit_metadata(emitter_t *emitter) {
 #define PROF_LOG_STOP_BUFSIZE PROF_DUMP_BUFSIZE
 bool
 prof_log_stop(tsdn_t *tsdn) {
+	cassert(config_prof);
 	if (!opt_prof || !prof_booted) {
 		return true;
 	}
@@ -672,6 +682,7 @@ prof_log_stop(tsdn_t *tsdn) {
 #undef PROF_LOG_STOP_BUFSIZE
 
 bool prof_log_init(tsd_t *tsd) {
+	cassert(config_prof);
 	if (malloc_mutex_init(&log_mtx, "prof_log",
 	    WITNESS_RANK_PROF_LOG, malloc_mutex_rank_exclusive)) {
 		return true;
diff --git a/test/unit/prof_log.c b/test/unit/prof_log.c
index 6b2336dc..5ff208e2 100644
--- a/test/unit/prof_log.c
+++ b/test/unit/prof_log.c
@@ -141,7 +141,9 @@ TEST_END
 
 int
 main(void) {
-	prof_log_dummy_set(true);
+	if (config_prof) {
+		prof_log_dummy_set(true);
+	}
 	return test_no_reentrancy(
 	    test_prof_log_many_logs,
 	    test_prof_log_many_traces,

From 5d8e70ab26baf712a8741f9ba2acb646fba4de45 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 17 Dec 2020 11:25:13 -0800
Subject: [PATCH 1966/2608] prof_recent: cassert(config_prof) more often.

This tells the compiler that these functions are never called, which lets them
be optimized away in builds where profiling is disabled.
---
 src/prof_recent.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/prof_recent.c b/src/prof_recent.c
index ff876783..af758607 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -63,6 +63,7 @@ increment_recent_count(tsd_t *tsd, prof_tctx_t *tctx) {
 
 bool
 prof_recent_alloc_prepare(tsd_t *tsd, prof_tctx_t *tctx) {
+	cassert(config_prof);
 	assert(opt_prof && prof_booted);
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
 	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
@@ -106,6 +107,7 @@ prof_recent_alloc_edata_get_no_lock(const prof_recent_t *n) {
 
 edata_t *
 prof_recent_alloc_edata_get_no_lock_test(const prof_recent_t *n) {
+	cassert(config_prof);
 	return prof_recent_alloc_edata_get_no_lock(n);
 }
 
@@ -123,16 +125,19 @@ prof_recent_alloc_edata_set(tsd_t *tsd, prof_recent_t *n, edata_t *edata) {
 
 void
 edata_prof_recent_alloc_init(edata_t *edata) {
+	cassert(config_prof);
 	edata_prof_recent_alloc_set_dont_call_directly(edata, NULL);
 }
 
 static inline prof_recent_t *
 edata_prof_recent_alloc_get_no_lock(const edata_t *edata) {
+	cassert(config_prof);
 	return edata_prof_recent_alloc_get_dont_call_directly(edata);
 }
 
 prof_recent_t *
 edata_prof_recent_alloc_get_no_lock_test(const edata_t *edata) {
+	cassert(config_prof);
 	return edata_prof_recent_alloc_get_no_lock(edata);
 }
 
@@ -189,6 +194,7 @@ edata_prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata,
  */
 void
 prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata) {
+	cassert(config_prof);
 	/*
 	 * Check whether the recent allocation record still exists without
 	 * trying to acquire the lock.
@@ -271,6 +277,7 @@ prof_recent_alloc_assert_count(tsd_t *tsd) {
 
 void
 prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size, size_t usize) {
+	cassert(config_prof);
 	assert(edata != NULL);
 	prof_tctx_t *tctx = edata_prof_tctx_get(edata);
 
@@ -397,6 +404,7 @@ label_rollback:
 
 ssize_t
 prof_recent_alloc_max_ctl_read() {
+	cassert(config_prof);
 	/* Don't bother to acquire the lock. */
 	return prof_recent_alloc_max_get_no_lock();
 }
@@ -450,6 +458,7 @@ prof_recent_alloc_async_cleanup(tsd_t *tsd, prof_recent_list_t *to_delete) {
 
 ssize_t
 prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max) {
+	cassert(config_prof);
 	assert(max >= -1);
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	prof_recent_alloc_assert_count(tsd);
@@ -521,6 +530,7 @@ prof_recent_alloc_dump_node(emitter_t *emitter, prof_recent_t *node) {
 #define PROF_RECENT_PRINT_BUFSIZE 65536
 void
 prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque) {
+	cassert(config_prof);
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_dump_mtx);
 	buf_writer_t buf_writer;
 	buf_writer_init(tsd_tsdn(tsd), &buf_writer, write_cb, cbopaque, NULL,
@@ -570,6 +580,7 @@ prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque) {
 
 bool
 prof_recent_init() {
+	cassert(config_prof);
 	prof_recent_alloc_max_init();
 
 	if (malloc_mutex_init(&prof_recent_alloc_mtx, "prof_recent_alloc",

From a9fa2defdbe98b849151688cb70e24ba55dc8587 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 17 Dec 2020 12:04:07 -0800
Subject: [PATCH 1967/2608] Add JEMALLOC_COLD, and mark some functions cold.

This hints to the compiler that it should care more about space than CPU (among
other things).  In cases where the compiler lacks profile-guided information,
this can be a substantial space savings.

For now, we mark the mallctl or atexit driven profiling and stats functions that
take up the most space.
---
 configure.ac                          | 12 ++++++++++++
 include/jemalloc/jemalloc_defs.h.in   |  3 +++
 include/jemalloc/jemalloc_macros.h.in |  7 +++++++
 src/malloc_io.c                       |  1 +
 src/prof_log.c                        |  7 ++++++-
 src/prof_recent.c                     |  1 +
 src/stats.c                           |  6 ++++++
 7 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index eba3e786..53ac7cce 100644
--- a/configure.ac
+++ b/configure.ac
@@ -914,6 +914,18 @@ if test "x${je_cv_fallthrough}" = "xyes" ; then
   JE_CXXFLAGS_ADD([-Wimplicit-fallthrough])
 fi
 
+dnl Check for cold attribute support.
+JE_CFLAGS_SAVE()
+JE_CFLAGS_ADD([-Werror])
+JE_CFLAGS_ADD([-herror_on_warning])
+JE_COMPILABLE([cold attribute], [],
+              [__attribute__((__cold__)) void foo();],
+              [je_cv_cold])
+JE_CFLAGS_RESTORE()
+if test "x${je_cv_cold}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_ATTR_COLD], [ ])
+fi
+
 dnl Support optional additions to rpath.
 AC_ARG_WITH([rpath],
   [AS_HELP_STRING([--with-rpath=<rpath>], [Colon-separated rpath (ELF systems only)])],
diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in
index 032fba4d..cbe2fca6 100644
--- a/include/jemalloc/jemalloc_defs.h.in
+++ b/include/jemalloc/jemalloc_defs.h.in
@@ -16,6 +16,9 @@
 /* Defined if fallthrough attribute is supported. */
 #undef JEMALLOC_HAVE_ATTR_FALLTHROUGH
 
+/* Defined if cold attribute is supported. */
+#undef JEMALLOC_HAVE_ATTR_COLD
+
 /*
  * Define overrides for non-standard allocator-related functions if they are
  * present on the system.
diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in
index 1ceb7b17..5bb5c755 100644
--- a/include/jemalloc/jemalloc_macros.h.in
+++ b/include/jemalloc/jemalloc_macros.h.in
@@ -85,6 +85,7 @@
 #  else
 #    define JEMALLOC_ALLOCATOR
 #  endif
+#  define JEMALLOC_COLD
 #elif defined(JEMALLOC_HAVE_ATTR)
 #  define JEMALLOC_ATTR(s) __attribute__((s))
 #  define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s))
@@ -120,6 +121,11 @@
 #  define JEMALLOC_SECTION(s) JEMALLOC_ATTR(section(s))
 #  define JEMALLOC_RESTRICT_RETURN
 #  define JEMALLOC_ALLOCATOR
+#  ifdef JEMALLOC_HAVE_ATTR_COLD
+#    define JEMALLOC_COLD JEMALLOC_ATTR(__cold__)
+#  else
+#    define JEMALLOC_COLD
+#  endif
 #else
 #  define JEMALLOC_ATTR(s)
 #  define JEMALLOC_ALIGNED(s)
@@ -133,6 +139,7 @@
 #  define JEMALLOC_SECTION(s)
 #  define JEMALLOC_RESTRICT_RETURN
 #  define JEMALLOC_ALLOCATOR
+#  define JEMALLOC_COLD
 #endif
 
 #if defined(__APPLE__) && !defined(JEMALLOC_NO_RENAME)
diff --git a/src/malloc_io.c b/src/malloc_io.c
index 59a0cbfc..b76885cb 100644
--- a/src/malloc_io.c
+++ b/src/malloc_io.c
@@ -321,6 +321,7 @@ x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, size_t *slen_p) {
 	return s;
 }
 
+JEMALLOC_COLD
 size_t
 malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 	size_t i;
diff --git a/src/prof_log.c b/src/prof_log.c
index 44658211..356a886a 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -200,6 +200,7 @@ prof_log_thr_index(tsd_t *tsd, uint64_t thr_uid, const char *name) {
 	}
 }
 
+JEMALLOC_COLD
 void
 prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
 	cassert(config_prof);
@@ -411,6 +412,7 @@ prof_log_dummy_set(bool new_value) {
 	prof_log_dummy = new_value;
 }
 
+JEMALLOC_COLD
 bool
 prof_log_start(tsdn_t *tsdn, const char *filename) {
 	cassert(config_prof);
@@ -593,6 +595,7 @@ prof_log_emit_metadata(emitter_t *emitter) {
 }
 
 #define PROF_LOG_STOP_BUFSIZE PROF_DUMP_BUFSIZE
+JEMALLOC_COLD
 bool
 prof_log_stop(tsdn_t *tsdn) {
 	cassert(config_prof);
@@ -681,7 +684,9 @@ prof_log_stop(tsdn_t *tsdn) {
 }
 #undef PROF_LOG_STOP_BUFSIZE
 
-bool prof_log_init(tsd_t *tsd) {
+JEMALLOC_COLD
+bool
+prof_log_init(tsd_t *tsd) {
 	cassert(config_prof);
 	if (malloc_mutex_init(&log_mtx, "prof_log",
 	    WITNESS_RANK_PROF_LOG, malloc_mutex_rank_exclusive)) {
diff --git a/src/prof_recent.c b/src/prof_recent.c
index af758607..834a9446 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -528,6 +528,7 @@ prof_recent_alloc_dump_node(emitter_t *emitter, prof_recent_t *node) {
 }
 
 #define PROF_RECENT_PRINT_BUFSIZE 65536
+JEMALLOC_COLD
 void
 prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque) {
 	cassert(config_prof);
diff --git a/src/stats.c b/src/stats.c
index 7c2707e2..dac06834 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -281,6 +281,7 @@ mutex_stats_emit(emitter_t *emitter, emitter_row_t *row,
 	header_##column_name.str_val = human ? human : #column_name;
 
 
+JEMALLOC_COLD
 static void
 stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t uptime) {
 	size_t page;
@@ -488,6 +489,7 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t upti
 	}
 }
 
+JEMALLOC_COLD
 static void
 stats_arena_lextents_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	unsigned nbins, nlextents, j;
@@ -576,6 +578,7 @@ stats_arena_lextents_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	}
 }
 
+JEMALLOC_COLD
 static void
 stats_arena_extents_print(emitter_t *emitter, unsigned i) {
 	unsigned j;
@@ -841,6 +844,7 @@ stats_arena_mutexes_print(emitter_t *emitter, unsigned arena_ind, uint64_t uptim
 	emitter_json_object_end(emitter); /* End "mutexes". */
 }
 
+JEMALLOC_COLD
 static void
 stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
     bool mutex, bool extents, bool hpa) {
@@ -1168,6 +1172,7 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	}
 }
 
+JEMALLOC_COLD
 static void
 stats_general_print(emitter_t *emitter) {
 	const char *cpv;
@@ -1445,6 +1450,7 @@ stats_general_print(emitter_t *emitter) {
 	emitter_json_object_end(emitter); /* Close "arenas" */
 }
 
+JEMALLOC_COLD
 static void
 stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
     bool unmerged, bool bins, bool large, bool mutex, bool extents, bool hpa) {

From f9bb8dedef92fc00225c52546acfb58bd8e74217 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 17 Dec 2020 12:16:38 -0800
Subject: [PATCH 1968/2608] Un-force-inline do_rallocx.

The additional overhead of the function-call setup and flags checking is
relatively small, but costs us the replication of the entire realloc pathway in
terms of size.
---
 src/jemalloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 8384cfca..b0a3b76b 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3418,7 +3418,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 	return p;
 }
 
-JEMALLOC_ALWAYS_INLINE void *
+static void *
 do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 	void *p;
 	tsd_t *tsd;

From afa489c3c5fd16bd31b2756c081c92e08937e6b7 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 21 Aug 2020 11:31:53 -0700
Subject: [PATCH 1969/2608] Record request size in prof info

---
 include/jemalloc/internal/arena_inlines_b.h |  5 +++--
 include/jemalloc/internal/edata.h           | 12 ++++++++++++
 include/jemalloc/internal/large_externs.h   |  2 +-
 include/jemalloc/internal/prof_inlines.h    |  4 ++--
 include/jemalloc/internal/prof_structs.h    |  2 ++
 src/large.c                                 |  4 +++-
 src/prof.c                                  |  2 +-
 7 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 7971b4c7..aaef45c0 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -105,11 +105,12 @@ arena_prof_tctx_reset_sampled(tsd_t *tsd, const void *ptr) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_prof_info_set(tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx) {
+arena_prof_info_set(tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx,
+    size_t size) {
 	cassert(config_prof);
 
 	assert(!edata_slab_get(edata));
-	large_prof_info_set(edata, tctx);
+	large_prof_info_set(edata, tctx, size);
 }
 
 JEMALLOC_ALWAYS_INLINE void
diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index c0482883..11358ea1 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -40,6 +40,8 @@ typedef enum extent_pai_e extent_pai_t;
 struct e_prof_info_s {
 	/* Time when this was allocated. */
 	nstime_t	e_prof_alloc_time;
+	/* Allocation request size. */
+	size_t		e_prof_alloc_size;
 	/* Points to a prof_tctx_t. */
 	atomic_p_t	e_prof_tctx;
 	/*
@@ -390,6 +392,11 @@ edata_prof_alloc_time_get(const edata_t *edata) {
 	return &edata->e_prof_info.e_prof_alloc_time;
 }
 
+static inline size_t
+edata_prof_alloc_size_get(const edata_t *edata) {
+	return edata->e_prof_info.e_prof_alloc_size;
+}
+
 static inline prof_recent_t *
 edata_prof_recent_alloc_get_dont_call_directly(const edata_t *edata) {
 	return (prof_recent_t *)atomic_load_p(
@@ -526,6 +533,11 @@ edata_prof_alloc_time_set(edata_t *edata, nstime_t *t) {
 	nstime_copy(&edata->e_prof_info.e_prof_alloc_time, t);
 }
 
+static inline void
+edata_prof_alloc_size_set(edata_t *edata, size_t size) {
+	edata->e_prof_info.e_prof_alloc_size = size;
+}
+
 static inline void
 edata_prof_recent_alloc_set_dont_call_directly(edata_t *edata,
     prof_recent_t *recent_alloc) {
diff --git a/include/jemalloc/internal/large_externs.h b/include/jemalloc/internal/large_externs.h
index 27979648..8e09122d 100644
--- a/include/jemalloc/internal/large_externs.h
+++ b/include/jemalloc/internal/large_externs.h
@@ -19,6 +19,6 @@ size_t large_salloc(tsdn_t *tsdn, const edata_t *edata);
 void large_prof_info_get(tsd_t *tsd, edata_t *edata, prof_info_t *prof_info,
     bool reset_recent);
 void large_prof_tctx_reset(edata_t *edata);
-void large_prof_info_set(edata_t *edata, prof_tctx_t *tctx);
+void large_prof_info_set(edata_t *edata, prof_tctx_t *tctx, size_t size);
 
 #endif /* JEMALLOC_INTERNAL_LARGE_EXTERNS_H */
diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h
index 62c56832..c76d2ae5 100644
--- a/include/jemalloc/internal/prof_inlines.h
+++ b/include/jemalloc/internal/prof_inlines.h
@@ -98,12 +98,12 @@ prof_tctx_reset_sampled(tsd_t *tsd, const void *ptr) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_info_set(tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx) {
+prof_info_set(tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx, size_t size) {
 	cassert(config_prof);
 	assert(edata != NULL);
 	assert((uintptr_t)tctx > (uintptr_t)1U);
 
-	arena_prof_info_set(tsd, edata, tctx);
+	arena_prof_info_set(tsd, edata, tctx, size);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index 73ac3d5c..c2a111a9 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -103,6 +103,8 @@ struct prof_info_s {
 	nstime_t		alloc_time;
 	/* Points to the prof_tctx_t corresponding to the allocation. */
 	prof_tctx_t		*alloc_tctx;
+	/* Allocation request size. */
+	size_t			alloc_size;
 };
 
 struct prof_gctx_s {
diff --git a/src/large.c b/src/large.c
index 42d2fd7d..f23839f7 100644
--- a/src/large.c
+++ b/src/large.c
@@ -281,6 +281,7 @@ large_prof_info_get(tsd_t *tsd, edata_t *edata, prof_info_t *prof_info,
 	if ((uintptr_t)alloc_tctx > (uintptr_t)1U) {
 		nstime_copy(&prof_info->alloc_time,
 		    edata_prof_alloc_time_get(edata));
+		prof_info->alloc_size = edata_prof_alloc_size_get(edata);
 		if (reset_recent) {
 			/*
 			 * Reset the pointer on the recent allocation record,
@@ -302,10 +303,11 @@ large_prof_tctx_reset(edata_t *edata) {
 }
 
 void
-large_prof_info_set(edata_t *edata, prof_tctx_t *tctx) {
+large_prof_info_set(edata_t *edata, prof_tctx_t *tctx, size_t size) {
 	nstime_t t;
 	nstime_prof_init_update(&t);
 	edata_prof_alloc_time_set(edata, &t);
+	edata_prof_alloc_size_set(edata, size);
 	edata_prof_recent_alloc_init(edata);
 	large_prof_tctx_set(edata, tctx);
 }
diff --git a/src/prof.c b/src/prof.c
index 9b651db8..258b5f2d 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -97,7 +97,7 @@ prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
 
 	edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global,
 	    ptr);
-	prof_info_set(tsd, edata, tctx);
+	prof_info_set(tsd, edata, tctx, size);
 
 	szind_t szind = sz_size2index(usize);
 

From 40fa4d29d3e938765d0b608f92701410ce90b887 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 18 Dec 2020 17:14:59 -0800
Subject: [PATCH 1970/2608] Track per size class internal fragmentation

---
 Makefile.in                                   |  2 +
 include/jemalloc/internal/prof_externs.h      |  3 +
 include/jemalloc/internal/prof_stats.h        | 17 ++++
 include/jemalloc/internal/witness.h           |  1 +
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |  1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |  3 +
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |  1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |  3 +
 src/ctl.c                                     |  3 +
 src/jemalloc.c                                |  1 +
 src/prof.c                                    | 31 +++++--
 src/prof_stats.c                              | 57 +++++++++++++
 test/unit/mallctl.c                           |  1 +
 test/unit/prof_stats.c                        | 80 +++++++++++++++++++
 test/unit/prof_stats.sh                       |  5 ++
 15 files changed, 203 insertions(+), 6 deletions(-)
 create mode 100644 include/jemalloc/internal/prof_stats.h
 create mode 100644 src/prof_stats.c
 create mode 100644 test/unit/prof_stats.c
 create mode 100644 test/unit/prof_stats.sh

diff --git a/Makefile.in b/Makefile.in
index ba6dd763..3cb3161e 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -139,6 +139,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/prof_data.c \
 	$(srcroot)src/prof_log.c \
 	$(srcroot)src/prof_recent.c \
+	$(srcroot)src/prof_stats.c \
 	$(srcroot)src/prof_sys.c \
 	$(srcroot)src/psset.c \
 	$(srcroot)src/rtree.c \
@@ -248,6 +249,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/prof_mdump.c \
 	$(srcroot)test/unit/prof_recent.c \
 	$(srcroot)test/unit/prof_reset.c \
+	$(srcroot)test/unit/prof_stats.c \
 	$(srcroot)test/unit/prof_tctx.c \
 	$(srcroot)test/unit/prof_thread_name.c \
 	$(srcroot)test/unit/prof_sys_thread_name.c \
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index b94fbed3..671ac9b8 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -27,6 +27,9 @@ extern ssize_t opt_prof_recent_alloc_max;
 /* Whether to use thread name provided by the system or by mallctl. */
 extern bool opt_prof_sys_thread_name;
 
+/* Whether to record per size class counts and request size totals. */
+extern bool opt_prof_stats;
+
 /* Accessed via prof_active_[gs]et{_unlocked,}(). */
 extern bool prof_active;
 
diff --git a/include/jemalloc/internal/prof_stats.h b/include/jemalloc/internal/prof_stats.h
new file mode 100644
index 00000000..7954e82d
--- /dev/null
+++ b/include/jemalloc/internal/prof_stats.h
@@ -0,0 +1,17 @@
+#ifndef JEMALLOC_INTERNAL_PROF_STATS_H
+#define JEMALLOC_INTERNAL_PROF_STATS_H
+
+typedef struct prof_stats_s prof_stats_t;
+struct prof_stats_s {
+	uint64_t req_sum;
+	uint64_t count;
+};
+
+extern malloc_mutex_t prof_stats_mtx;
+
+void prof_stats_inc(tsd_t *tsd, szind_t ind, size_t size);
+void prof_stats_dec(tsd_t *tsd, szind_t ind, size_t size);
+void prof_stats_get_live(tsd_t *tsd, szind_t ind, prof_stats_t *stats);
+void prof_stats_get_accum(tsd_t *tsd, szind_t ind, prof_stats_t *stats);
+
+#endif /* JEMALLOC_INTERNAL_PROF_STATS_H */
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 662907c8..66dcf664 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -73,6 +73,7 @@ enum witness_rank_e {
 	WITNESS_RANK_PROF_GDUMP = WITNESS_RANK_LEAF,
 	WITNESS_RANK_PROF_NEXT_THR_UID = WITNESS_RANK_LEAF,
 	WITNESS_RANK_PROF_RECENT_ALLOC = WITNESS_RANK_LEAF,
+	WITNESS_RANK_PROF_STATS = WITNESS_RANK_LEAF,
 	WITNESS_RANK_PROF_THREAD_ACTIVE_INIT = WITNESS_RANK_LEAF,
 };
 typedef enum witness_rank_e witness_rank_t;
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 531dd9a6..9443ac55 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -80,6 +80,7 @@
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
     <ClCompile Include="..\..\..\..\src\prof_log.c" />
     <ClCompile Include="..\..\..\..\src\prof_recent.c" />
+    <ClCompile Include="..\..\..\..\src\prof_stats.c" />
     <ClCompile Include="..\..\..\..\src\prof_sys.c" />
     <ClCompile Include="..\..\..\..\src\psset.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index f031fb10..3c4bff62 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -124,6 +124,9 @@
     <ClCompile Include="..\..\..\..\src\prof_recent.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_stats.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\prof_sys.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index bc64de5c..fafb4914 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -80,6 +80,7 @@
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
     <ClCompile Include="..\..\..\..\src\prof_log.c" />
     <ClCompile Include="..\..\..\..\src\prof_recent.c" />
+    <ClCompile Include="..\..\..\..\src\prof_stats.c" />
     <ClCompile Include="..\..\..\..\src\prof_sys.c" />
     <ClCompile Include="..\..\..\..\src\psset.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index f031fb10..3c4bff62 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -124,6 +124,9 @@
     <ClCompile Include="..\..\..\..\src\prof_recent.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_stats.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\prof_sys.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/ctl.c b/src/ctl.c
index 8f6aff3e..598759cd 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -136,6 +136,7 @@ CTL_PROTO(opt_prof_final)
 CTL_PROTO(opt_prof_leak)
 CTL_PROTO(opt_prof_accum)
 CTL_PROTO(opt_prof_recent_alloc_max)
+CTL_PROTO(opt_prof_stats)
 CTL_PROTO(opt_prof_sys_thread_name)
 CTL_PROTO(opt_prof_time_res)
 CTL_PROTO(opt_zero_realloc)
@@ -415,6 +416,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("prof_leak"),	CTL(opt_prof_leak)},
 	{NAME("prof_accum"),	CTL(opt_prof_accum)},
 	{NAME("prof_recent_alloc_max"),	CTL(opt_prof_recent_alloc_max)},
+	{NAME("prof_stats"),	CTL(opt_prof_stats)},
 	{NAME("prof_sys_thread_name"),	CTL(opt_prof_sys_thread_name)},
 	{NAME("prof_time_resolution"),	CTL(opt_prof_time_res)},
 	{NAME("zero_realloc"),	CTL(opt_zero_realloc)}
@@ -2057,6 +2059,7 @@ CTL_RO_NL_CGEN(config_prof, opt_prof_final, opt_prof_final, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_recent_alloc_max,
     opt_prof_recent_alloc_max, ssize_t)
+CTL_RO_NL_CGEN(config_prof, opt_prof_stats, opt_prof_stats, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_sys_thread_name, opt_prof_sys_thread_name,
     bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_time_res,
diff --git a/src/jemalloc.c b/src/jemalloc.c
index b0a3b76b..02714158 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1552,6 +1552,7 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				CONF_HANDLE_BOOL(opt_prof_log, "prof_log")
 				CONF_HANDLE_SSIZE_T(opt_prof_recent_alloc_max,
 				    "prof_recent_alloc_max", -1, SSIZE_MAX)
+				CONF_HANDLE_BOOL(opt_prof_stats, "prof_stats")
 				CONF_HANDLE_BOOL(opt_prof_sys_thread_name,
 				    "prof_sys_thread_name")
 				if (CONF_MATCH("prof_time_resolution")) {
diff --git a/src/prof.c b/src/prof.c
index 258b5f2d..0f1f7a71 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -8,6 +8,7 @@
 #include "jemalloc/internal/prof_data.h"
 #include "jemalloc/internal/prof_log.h"
 #include "jemalloc/internal/prof_recent.h"
+#include "jemalloc/internal/prof_stats.h"
 #include "jemalloc/internal/prof_sys.h"
 #include "jemalloc/internal/thread_event.h"
 
@@ -131,6 +132,10 @@ prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
 		assert(tctx == edata_prof_tctx_get(edata));
 		prof_recent_alloc(tsd, edata, size, usize);
 	}
+
+	if (opt_prof_stats) {
+		prof_stats_inc(tsd, szind, size);
+	}
 }
 
 void
@@ -160,6 +165,10 @@ prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
 	prof_try_log(tsd, usize, prof_info);
 
 	prof_tctx_try_destroy(tsd, tctx);
+
+	if (opt_prof_stats) {
+		prof_stats_dec(tsd, szind, prof_info->alloc_size);
+	}
 }
 
 prof_tctx_t *
@@ -587,7 +596,13 @@ prof_boot2(tsd_t *tsd, base_t *base) {
 
 		next_thr_uid = 0;
 		if (malloc_mutex_init(&next_thr_uid_mtx, "prof_next_thr_uid",
-		    WITNESS_RANK_PROF_NEXT_THR_UID, malloc_mutex_rank_exclusive)) {
+		    WITNESS_RANK_PROF_NEXT_THR_UID,
+		    malloc_mutex_rank_exclusive)) {
+			return true;
+		}
+
+		if (malloc_mutex_init(&prof_stats_mtx, "prof_stats",
+		    WITNESS_RANK_PROF_STATS, malloc_mutex_rank_exclusive)) {
 			return true;
 		}
 
@@ -595,8 +610,9 @@ prof_boot2(tsd_t *tsd, base_t *base) {
 			return true;
 		}
 
-		if (malloc_mutex_init(&prof_dump_filename_mtx, "prof_dump_filename",
-		    WITNESS_RANK_PROF_DUMP_FILENAME, malloc_mutex_rank_exclusive)) {
+		if (malloc_mutex_init(&prof_dump_filename_mtx,
+		    "prof_dump_filename", WITNESS_RANK_PROF_DUMP_FILENAME,
+		    malloc_mutex_rank_exclusive)) {
 			return true;
 		}
 		if (malloc_mutex_init(&prof_dump_mtx, "prof_dump",
@@ -681,9 +697,10 @@ prof_prefork1(tsdn_t *tsdn) {
 		malloc_mutex_prefork(tsdn, &prof_active_mtx);
 		malloc_mutex_prefork(tsdn, &prof_dump_filename_mtx);
 		malloc_mutex_prefork(tsdn, &prof_gdump_mtx);
+		malloc_mutex_prefork(tsdn, &prof_recent_alloc_mtx);
+		malloc_mutex_prefork(tsdn, &prof_stats_mtx);
 		malloc_mutex_prefork(tsdn, &next_thr_uid_mtx);
 		malloc_mutex_prefork(tsdn, &prof_thread_active_init_mtx);
-		malloc_mutex_prefork(tsdn, &prof_recent_alloc_mtx);
 	}
 }
 
@@ -692,10 +709,11 @@ prof_postfork_parent(tsdn_t *tsdn) {
 	if (config_prof && opt_prof) {
 		unsigned i;
 
-		malloc_mutex_postfork_parent(tsdn, &prof_recent_alloc_mtx);
 		malloc_mutex_postfork_parent(tsdn,
 		    &prof_thread_active_init_mtx);
 		malloc_mutex_postfork_parent(tsdn, &next_thr_uid_mtx);
+		malloc_mutex_postfork_parent(tsdn, &prof_stats_mtx);
+		malloc_mutex_postfork_parent(tsdn, &prof_recent_alloc_mtx);
 		malloc_mutex_postfork_parent(tsdn, &prof_gdump_mtx);
 		malloc_mutex_postfork_parent(tsdn, &prof_dump_filename_mtx);
 		malloc_mutex_postfork_parent(tsdn, &prof_active_mtx);
@@ -719,9 +737,10 @@ prof_postfork_child(tsdn_t *tsdn) {
 	if (config_prof && opt_prof) {
 		unsigned i;
 
-		malloc_mutex_postfork_child(tsdn, &prof_recent_alloc_mtx);
 		malloc_mutex_postfork_child(tsdn, &prof_thread_active_init_mtx);
 		malloc_mutex_postfork_child(tsdn, &next_thr_uid_mtx);
+		malloc_mutex_postfork_child(tsdn, &prof_stats_mtx);
+		malloc_mutex_postfork_child(tsdn, &prof_recent_alloc_mtx);
 		malloc_mutex_postfork_child(tsdn, &prof_gdump_mtx);
 		malloc_mutex_postfork_child(tsdn, &prof_dump_filename_mtx);
 		malloc_mutex_postfork_child(tsdn, &prof_active_mtx);
diff --git a/src/prof_stats.c b/src/prof_stats.c
new file mode 100644
index 00000000..5d1a506b
--- /dev/null
+++ b/src/prof_stats.c
@@ -0,0 +1,57 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/prof_stats.h"
+
+bool opt_prof_stats = false;
+malloc_mutex_t prof_stats_mtx;
+static prof_stats_t prof_stats_live[PROF_SC_NSIZES];
+static prof_stats_t prof_stats_accum[PROF_SC_NSIZES];
+
+static void
+prof_stats_enter(tsd_t *tsd, szind_t ind) {
+	assert(opt_prof && opt_prof_stats);
+	assert(ind < SC_NSIZES);
+	malloc_mutex_lock(tsd_tsdn(tsd), &prof_stats_mtx);
+}
+
+static void
+prof_stats_leave(tsd_t *tsd) {
+	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_stats_mtx);
+}
+
+void
+prof_stats_inc(tsd_t *tsd, szind_t ind, size_t size) {
+	cassert(config_prof);
+	prof_stats_enter(tsd, ind);
+	prof_stats_live[ind].req_sum += size;
+	prof_stats_live[ind].count++;
+	prof_stats_accum[ind].req_sum += size;
+	prof_stats_accum[ind].count++;
+	prof_stats_leave(tsd);
+}
+
+void
+prof_stats_dec(tsd_t *tsd, szind_t ind, size_t size) {
+	cassert(config_prof);
+	prof_stats_enter(tsd, ind);
+	prof_stats_live[ind].req_sum -= size;
+	prof_stats_live[ind].count--;
+	prof_stats_leave(tsd);
+}
+
+void
+prof_stats_get_live(tsd_t *tsd, szind_t ind, prof_stats_t *stats) {
+	cassert(config_prof);
+	prof_stats_enter(tsd, ind);
+	memcpy(stats, &prof_stats_live[ind], sizeof(prof_stats_t));
+	prof_stats_leave(tsd);
+}
+
+void
+prof_stats_get_accum(tsd_t *tsd, szind_t ind, prof_stats_t *stats) {
+	cassert(config_prof);
+	prof_stats_enter(tsd, ind);
+	memcpy(stats, &prof_stats_accum[ind], sizeof(prof_stats_t));
+	prof_stats_leave(tsd);
+}
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 3d5b2788..85dcb4e2 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -317,6 +317,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, prof_final, prof);
 	TEST_MALLCTL_OPT(bool, prof_leak, prof);
 	TEST_MALLCTL_OPT(ssize_t, prof_recent_alloc_max, prof);
+	TEST_MALLCTL_OPT(bool, prof_stats, prof);
 	TEST_MALLCTL_OPT(bool, prof_sys_thread_name, prof);
 
 #undef TEST_MALLCTL_OPT
diff --git a/test/unit/prof_stats.c b/test/unit/prof_stats.c
new file mode 100644
index 00000000..555b69e3
--- /dev/null
+++ b/test/unit/prof_stats.c
@@ -0,0 +1,80 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/prof_stats.h"
+
+static void
+test_wrapper(szind_t ind) {
+#define N_PTRS 3
+	assert(opt_prof && opt_prof_stats);
+
+	tsd_t *tsd = tsd_fetch();
+
+	prof_stats_t live_stats_orig;
+	prof_stats_get_live(tsd, ind, &live_stats_orig);
+	prof_stats_t accum_stats_orig;
+	prof_stats_get_accum(tsd, ind, &accum_stats_orig);
+
+	void *ptrs[N_PTRS];
+
+	uint64_t live_req_sum = 0;
+	uint64_t live_count = 0;
+	uint64_t accum_req_sum = 0;
+	uint64_t accum_count = 0;
+
+	for (size_t i = 0, sz = sz_index2size(ind) - N_PTRS; i < N_PTRS;
+	    ++i, ++sz) {
+		void *p = malloc(sz);
+		assert_ptr_not_null(p, "malloc() failed");
+		ptrs[i] = p;
+		live_req_sum += sz;
+		live_count++;
+		accum_req_sum += sz;
+		accum_count++;
+		prof_stats_t live_stats;
+		prof_stats_get_live(tsd, ind, &live_stats);
+		expect_u64_eq(live_stats.req_sum - live_stats_orig.req_sum,
+		    live_req_sum, "");
+		expect_u64_eq(live_stats.count - live_stats_orig.count,
+		    live_count, "");
+		prof_stats_t accum_stats;
+		prof_stats_get_accum(tsd, ind, &accum_stats);
+		expect_u64_eq(accum_stats.req_sum - accum_stats_orig.req_sum,
+		    accum_req_sum, "");
+		expect_u64_eq(accum_stats.count - accum_stats_orig.count,
+		    accum_count, "");
+	}
+
+	for (size_t i = 0, sz = sz_index2size(ind) - N_PTRS; i < N_PTRS;
+	    ++i, ++sz) {
+		free(ptrs[i]);
+		live_req_sum -= sz;
+		live_count--;
+		prof_stats_t live_stats;
+		prof_stats_get_live(tsd, ind, &live_stats);
+		expect_u64_eq(live_stats.req_sum - live_stats_orig.req_sum,
+		    live_req_sum, "");
+		expect_u64_eq(live_stats.count - live_stats_orig.count,
+		    live_count, "");
+		prof_stats_t accum_stats;
+		prof_stats_get_accum(tsd, ind, &accum_stats);
+		expect_u64_eq(accum_stats.req_sum - accum_stats_orig.req_sum,
+		    accum_req_sum, "");
+		expect_u64_eq(accum_stats.count - accum_stats_orig.count,
+		    accum_count, "");
+	}
+#undef N_PTRS
+}
+
+TEST_BEGIN(test_prof_stats) {
+	test_skip_if(!config_prof);
+	test_wrapper(0);
+	test_wrapper(1);
+	test_wrapper(2);
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_prof_stats);
+}
diff --git a/test/unit/prof_stats.sh b/test/unit/prof_stats.sh
new file mode 100644
index 00000000..b01dfd45
--- /dev/null
+++ b/test/unit/prof_stats.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:true,lg_prof_sample:0,prof_stats:true"
+fi

From 54f3351f1f699a2d50f42da7f9a73a8d1a25ea30 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Fri, 21 Aug 2020 14:37:34 -0700
Subject: [PATCH 1971/2608] Add mallctl for prof stats fetching

---
 src/ctl.c              | 170 ++++++++++++++++++++++++++++++++++++++++-
 test/unit/prof_stats.c |  69 +++++++++++------
 2 files changed, 215 insertions(+), 24 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index 598759cd..a4f1916c 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -12,6 +12,7 @@
 #include "jemalloc/internal/prof_data.h"
 #include "jemalloc/internal/prof_log.h"
 #include "jemalloc/internal/prof_recent.h"
+#include "jemalloc/internal/prof_stats.h"
 #include "jemalloc/internal/prof_sys.h"
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/util.h"
@@ -183,6 +184,12 @@ CTL_PROTO(prof_interval)
 CTL_PROTO(lg_prof_sample)
 CTL_PROTO(prof_log_start)
 CTL_PROTO(prof_log_stop)
+CTL_PROTO(prof_stats_bins_i_live)
+CTL_PROTO(prof_stats_bins_i_accum)
+INDEX_PROTO(prof_stats_bins_i)
+CTL_PROTO(prof_stats_lextents_i_live)
+CTL_PROTO(prof_stats_lextents_i_accum)
+INDEX_PROTO(prof_stats_lextents_i)
 CTL_PROTO(stats_arenas_i_small_allocated)
 CTL_PROTO(stats_arenas_i_small_nmalloc)
 CTL_PROTO(stats_arenas_i_small_ndalloc)
@@ -494,6 +501,37 @@ static const ctl_named_node_t arenas_node[] = {
 	{NAME("lookup"),	CTL(arenas_lookup)}
 };
 
+static const ctl_named_node_t prof_stats_bins_i_node[] = {
+	{NAME("live"),		CTL(prof_stats_bins_i_live)},
+	{NAME("accum"),		CTL(prof_stats_bins_i_accum)}
+};
+
+static const ctl_named_node_t super_prof_stats_bins_i_node[] = {
+	{NAME(""),		CHILD(named, prof_stats_bins_i)}
+};
+
+static const ctl_indexed_node_t prof_stats_bins_node[] = {
+	{INDEX(prof_stats_bins_i)}
+};
+
+static const ctl_named_node_t prof_stats_lextents_i_node[] = {
+	{NAME("live"),		CTL(prof_stats_lextents_i_live)},
+	{NAME("accum"),		CTL(prof_stats_lextents_i_accum)}
+};
+
+static const ctl_named_node_t super_prof_stats_lextents_i_node[] = {
+	{NAME(""),		CHILD(named, prof_stats_lextents_i)}
+};
+
+static const ctl_indexed_node_t prof_stats_lextents_node[] = {
+	{INDEX(prof_stats_lextents_i)}
+};
+
+static const ctl_named_node_t	prof_stats_node[] = {
+	{NAME("bins"),		CHILD(indexed, prof_stats_bins)},
+	{NAME("lextents"),	CHILD(indexed, prof_stats_lextents)},
+};
+
 static const ctl_named_node_t	prof_node[] = {
 	{NAME("thread_active_init"), CTL(prof_thread_active_init)},
 	{NAME("active"),	CTL(prof_active)},
@@ -504,8 +542,10 @@ static const ctl_named_node_t	prof_node[] = {
 	{NAME("interval"),	CTL(prof_interval)},
 	{NAME("lg_sample"),	CTL(lg_prof_sample)},
 	{NAME("log_start"),	CTL(prof_log_start)},
-	{NAME("log_stop"),	CTL(prof_log_stop)}
+	{NAME("log_stop"),	CTL(prof_log_stop)},
+	{NAME("stats"),		CHILD(named, prof_stats)}
 };
+
 static const ctl_named_node_t stats_arenas_i_small_node[] = {
 	{NAME("allocated"),	CTL(stats_arenas_i_small_allocated)},
 	{NAME("nmalloc"),	CTL(stats_arenas_i_small_nmalloc)},
@@ -3975,3 +4015,131 @@ experimental_batch_alloc_ctl(tsd_t *tsd, const size_t *mib,
 label_return:
 	return ret;
 }
+
+static int
+prof_stats_bins_i_live_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+	unsigned binind;
+	prof_stats_t stats;
+
+	if (!(config_prof && opt_prof && opt_prof_stats)) {
+		ret = ENOENT;
+		goto label_return;
+	}
+
+	READONLY();
+	MIB_UNSIGNED(binind, 3);
+	if (binind >= SC_NBINS) {
+		ret = EINVAL;
+		goto label_return;
+	}
+	prof_stats_get_live(tsd, (szind_t)binind, &stats);
+	READ(stats, prof_stats_t);
+
+	ret = 0;
+label_return:
+	return ret;
+}
+
+static int
+prof_stats_bins_i_accum_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+	unsigned binind;
+	prof_stats_t stats;
+
+	if (!(config_prof && opt_prof && opt_prof_stats)) {
+		ret = ENOENT;
+		goto label_return;
+	}
+
+	READONLY();
+	MIB_UNSIGNED(binind, 3);
+	if (binind >= SC_NBINS) {
+		ret = EINVAL;
+		goto label_return;
+	}
+	prof_stats_get_accum(tsd, (szind_t)binind, &stats);
+	READ(stats, prof_stats_t);
+
+	ret = 0;
+label_return:
+	return ret;
+}
+
+static const ctl_named_node_t *
+prof_stats_bins_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
+    size_t i) {
+	if (!(config_prof && opt_prof && opt_prof_stats)) {
+		return NULL;
+	}
+	if (i >= SC_NBINS) {
+		return NULL;
+	}
+	return super_prof_stats_bins_i_node;
+}
+
+static int
+prof_stats_lextents_i_live_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+	unsigned lextent_ind;
+	prof_stats_t stats;
+
+	if (!(config_prof && opt_prof && opt_prof_stats)) {
+		ret = ENOENT;
+		goto label_return;
+	}
+
+	READONLY();
+	MIB_UNSIGNED(lextent_ind, 3);
+	if (lextent_ind >= SC_NSIZES - SC_NBINS) {
+		ret = EINVAL;
+		goto label_return;
+	}
+	prof_stats_get_live(tsd, (szind_t)(lextent_ind + SC_NBINS), &stats);
+	READ(stats, prof_stats_t);
+
+	ret = 0;
+label_return:
+	return ret;
+}
+
+static int
+prof_stats_lextents_i_accum_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+	unsigned lextent_ind;
+	prof_stats_t stats;
+
+	if (!(config_prof && opt_prof && opt_prof_stats)) {
+		ret = ENOENT;
+		goto label_return;
+	}
+
+	READONLY();
+	MIB_UNSIGNED(lextent_ind, 3);
+	if (lextent_ind >= SC_NSIZES - SC_NBINS) {
+		ret = EINVAL;
+		goto label_return;
+	}
+	prof_stats_get_accum(tsd, (szind_t)(lextent_ind + SC_NBINS), &stats);
+	READ(stats, prof_stats_t);
+
+	ret = 0;
+label_return:
+	return ret;
+}
+
+static const ctl_named_node_t *
+prof_stats_lextents_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
+    size_t i) {
+	if (!(config_prof && opt_prof && opt_prof_stats)) {
+		return NULL;
+	}
+	if (i >= SC_NSIZES - SC_NBINS) {
+		return NULL;
+	}
+	return super_prof_stats_lextents_i_node;
+}
diff --git a/test/unit/prof_stats.c b/test/unit/prof_stats.c
index 555b69e3..123d899c 100644
--- a/test/unit/prof_stats.c
+++ b/test/unit/prof_stats.c
@@ -1,18 +1,33 @@
 #include "test/jemalloc_test.h"
 
-#include "jemalloc/internal/prof_stats.h"
-
 static void
 test_wrapper(szind_t ind) {
 #define N_PTRS 3
+#define MALLCTL_STR_LEN 64
 	assert(opt_prof && opt_prof_stats);
 
-	tsd_t *tsd = tsd_fetch();
+	char mallctl_live_str[MALLCTL_STR_LEN];
+	char mallctl_accum_str[MALLCTL_STR_LEN];
+	if (ind < SC_NBINS) {
+		malloc_snprintf(mallctl_live_str, MALLCTL_STR_LEN,
+		    "prof.stats.bins.%u.live", (unsigned)ind);
+		malloc_snprintf(mallctl_accum_str, MALLCTL_STR_LEN,
+		    "prof.stats.bins.%u.accum", (unsigned)ind);
+	} else {
+		malloc_snprintf(mallctl_live_str, MALLCTL_STR_LEN,
+		    "prof.stats.lextents.%u.live", (unsigned)(ind - SC_NBINS));
+		malloc_snprintf(mallctl_accum_str, MALLCTL_STR_LEN,
+		    "prof.stats.lextents.%u.accum", (unsigned)(ind - SC_NBINS));
+	}
 
-	prof_stats_t live_stats_orig;
-	prof_stats_get_live(tsd, ind, &live_stats_orig);
-	prof_stats_t accum_stats_orig;
-	prof_stats_get_accum(tsd, ind, &accum_stats_orig);
+	size_t stats_len = 2 * sizeof(uint64_t);
+
+	uint64_t live_stats_orig[2];
+	assert_d_eq(mallctl(mallctl_live_str, &live_stats_orig, &stats_len,
+	    NULL, 0), 0, "");
+	uint64_t accum_stats_orig[2];
+	assert_d_eq(mallctl(mallctl_accum_str, &accum_stats_orig, &stats_len,
+	    NULL, 0), 0, "");
 
 	void *ptrs[N_PTRS];
 
@@ -30,17 +45,19 @@ test_wrapper(szind_t ind) {
 		live_count++;
 		accum_req_sum += sz;
 		accum_count++;
-		prof_stats_t live_stats;
-		prof_stats_get_live(tsd, ind, &live_stats);
-		expect_u64_eq(live_stats.req_sum - live_stats_orig.req_sum,
+		uint64_t live_stats[2];
+		assert_d_eq(mallctl(mallctl_live_str, &live_stats, &stats_len,
+		    NULL, 0), 0, "");
+		expect_u64_eq(live_stats[0] - live_stats_orig[0],
 		    live_req_sum, "");
-		expect_u64_eq(live_stats.count - live_stats_orig.count,
+		expect_u64_eq(live_stats[1] - live_stats_orig[1],
 		    live_count, "");
-		prof_stats_t accum_stats;
-		prof_stats_get_accum(tsd, ind, &accum_stats);
-		expect_u64_eq(accum_stats.req_sum - accum_stats_orig.req_sum,
+		uint64_t accum_stats[2];
+		assert_d_eq(mallctl(mallctl_accum_str, &accum_stats, &stats_len,
+		    NULL, 0), 0, "");
+		expect_u64_eq(accum_stats[0] - accum_stats_orig[0],
 		    accum_req_sum, "");
-		expect_u64_eq(accum_stats.count - accum_stats_orig.count,
+		expect_u64_eq(accum_stats[1] - accum_stats_orig[1],
 		    accum_count, "");
 	}
 
@@ -49,19 +66,22 @@ test_wrapper(szind_t ind) {
 		free(ptrs[i]);
 		live_req_sum -= sz;
 		live_count--;
-		prof_stats_t live_stats;
-		prof_stats_get_live(tsd, ind, &live_stats);
-		expect_u64_eq(live_stats.req_sum - live_stats_orig.req_sum,
+		uint64_t live_stats[2];
+		assert_d_eq(mallctl(mallctl_live_str, &live_stats, &stats_len,
+		    NULL, 0), 0, "");
+		expect_u64_eq(live_stats[0] - live_stats_orig[0],
 		    live_req_sum, "");
-		expect_u64_eq(live_stats.count - live_stats_orig.count,
+		expect_u64_eq(live_stats[1] - live_stats_orig[1],
 		    live_count, "");
-		prof_stats_t accum_stats;
-		prof_stats_get_accum(tsd, ind, &accum_stats);
-		expect_u64_eq(accum_stats.req_sum - accum_stats_orig.req_sum,
+		uint64_t accum_stats[2];
+		assert_d_eq(mallctl(mallctl_accum_str, &accum_stats, &stats_len,
+		    NULL, 0), 0, "");
+		expect_u64_eq(accum_stats[0] - accum_stats_orig[0],
 		    accum_req_sum, "");
-		expect_u64_eq(accum_stats.count - accum_stats_orig.count,
+		expect_u64_eq(accum_stats[1] - accum_stats_orig[1],
 		    accum_count, "");
 	}
+#undef MALLCTL_STR_LEN
 #undef N_PTRS
 }
 
@@ -70,6 +90,9 @@ TEST_BEGIN(test_prof_stats) {
 	test_wrapper(0);
 	test_wrapper(1);
 	test_wrapper(2);
+	test_wrapper(SC_NBINS);
+	test_wrapper(SC_NBINS + 1);
+	test_wrapper(SC_NBINS + 2);
 }
 TEST_END
 

From 4352cbc21c597d5147c352740fdeefdcc4af0f11 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Wed, 26 Aug 2020 16:48:59 -0700
Subject: [PATCH 1972/2608] Add alignment tests for prof stats

---
 test/unit/prof_stats.c | 80 +++++++++++++++++++++++++++++++++---------
 1 file changed, 64 insertions(+), 16 deletions(-)

diff --git a/test/unit/prof_stats.c b/test/unit/prof_stats.c
index 123d899c..a9145871 100644
--- a/test/unit/prof_stats.c
+++ b/test/unit/prof_stats.c
@@ -1,8 +1,10 @@
 #include "test/jemalloc_test.h"
 
-static void
-test_wrapper(szind_t ind) {
 #define N_PTRS 3
+
+static void
+test_combinations(szind_t ind, size_t sizes_array[N_PTRS],
+    int flags_array[N_PTRS]) {
 #define MALLCTL_STR_LEN 64
 	assert(opt_prof && opt_prof_stats);
 
@@ -36,10 +38,12 @@ test_wrapper(szind_t ind) {
 	uint64_t accum_req_sum = 0;
 	uint64_t accum_count = 0;
 
-	for (size_t i = 0, sz = sz_index2size(ind) - N_PTRS; i < N_PTRS;
-	    ++i, ++sz) {
-		void *p = malloc(sz);
+	for (size_t i = 0; i < N_PTRS; ++i) {
+		size_t sz = sizes_array[i];
+		int flags = flags_array[i];
+		void *p = mallocx(sz, flags);
 		assert_ptr_not_null(p, "malloc() failed");
+		assert(malloc_usable_size(p) == sz_index2size(ind));
 		ptrs[i] = p;
 		live_req_sum += sz;
 		live_count++;
@@ -61,9 +65,10 @@ test_wrapper(szind_t ind) {
 		    accum_count, "");
 	}
 
-	for (size_t i = 0, sz = sz_index2size(ind) - N_PTRS; i < N_PTRS;
-	    ++i, ++sz) {
-		free(ptrs[i]);
+	for (size_t i = 0; i < N_PTRS; ++i) {
+		size_t sz = sizes_array[i];
+		int flags = flags_array[i];
+		sdallocx(ptrs[i], sz, flags);
 		live_req_sum -= sz;
 		live_count--;
 		uint64_t live_stats[2];
@@ -82,22 +87,65 @@ test_wrapper(szind_t ind) {
 		    accum_count, "");
 	}
 #undef MALLCTL_STR_LEN
-#undef N_PTRS
+}
+
+static void
+test_szind_wrapper(szind_t ind) {
+	size_t sizes_array[N_PTRS];
+	int flags_array[N_PTRS];
+	for (size_t i = 0, sz = sz_index2size(ind) - N_PTRS; i < N_PTRS;
+	    ++i, ++sz) {
+		sizes_array[i] = sz;
+		flags_array[i] = 0;
+	}
+	test_combinations(ind, sizes_array, flags_array);
 }
 
 TEST_BEGIN(test_prof_stats) {
 	test_skip_if(!config_prof);
-	test_wrapper(0);
-	test_wrapper(1);
-	test_wrapper(2);
-	test_wrapper(SC_NBINS);
-	test_wrapper(SC_NBINS + 1);
-	test_wrapper(SC_NBINS + 2);
+	test_szind_wrapper(0);
+	test_szind_wrapper(1);
+	test_szind_wrapper(2);
+	test_szind_wrapper(SC_NBINS);
+	test_szind_wrapper(SC_NBINS + 1);
+	test_szind_wrapper(SC_NBINS + 2);
+}
+TEST_END
+
+static void
+test_szind_aligned_wrapper(szind_t ind, unsigned lg_align) {
+	size_t sizes_array[N_PTRS];
+	int flags_array[N_PTRS];
+	int flags = MALLOCX_LG_ALIGN(lg_align);
+	for (size_t i = 0, sz = sz_index2size(ind) - N_PTRS; i < N_PTRS;
+	    ++i, ++sz) {
+		sizes_array[i] = sz;
+		flags_array[i] = flags;
+	}
+	test_combinations(
+	    sz_size2index(sz_sa2u(sz_index2size(ind), 1 << lg_align)),
+	    sizes_array, flags_array);
+}
+
+TEST_BEGIN(test_prof_stats_aligned) {
+	test_skip_if(!config_prof);
+	for (szind_t ind = 0; ind < 10; ++ind) {
+		for (unsigned lg_align = 0; lg_align < 10; ++lg_align) {
+			test_szind_aligned_wrapper(ind, lg_align);
+		}
+	}
+	for (szind_t ind = SC_NBINS - 5; ind < SC_NBINS + 5; ++ind) {
+		for (unsigned lg_align = SC_LG_LARGE_MINCLASS - 5;
+		    lg_align < SC_LG_LARGE_MINCLASS + 5; ++lg_align) {
+			test_szind_aligned_wrapper(ind, lg_align);
+		}
+	}
 }
 TEST_END
 
 int
 main(void) {
 	return test(
-	    test_prof_stats);
+	    test_prof_stats,
+	    test_prof_stats_aligned);
 }

From 1f1a0231ed9909119db2d350a2b44e1b21bda60f Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 17 Aug 2020 10:40:28 -0700
Subject: [PATCH 1973/2608] Split macros for initializing stats headers

---
 src/stats.c | 39 +++++++++++++++++++++++++++------------
 1 file changed, 27 insertions(+), 12 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index dac06834..393df150 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -264,22 +264,37 @@ mutex_stats_emit(emitter_t *emitter, emitter_row_t *row,
 #undef EMITTER_TYPE_uint64_t
 }
 
-#define COL(row_name, column_name, left_or_right, col_width, etype)      \
-	emitter_col_t col_##column_name;                                     \
-	emitter_col_init(&col_##column_name, &row_name);                     \
-	col_##column_name.justify = emitter_justify_##left_or_right;         \
-	col_##column_name.width = col_width;                                 \
+#define COL_DECLARE(column_name)					\
+	emitter_col_t col_##column_name;
+
+#define COL_INIT(row_name, column_name, left_or_right, col_width, etype)\
+	emitter_col_init(&col_##column_name, &row_name);		\
+	col_##column_name.justify = emitter_justify_##left_or_right;	\
+	col_##column_name.width = col_width;				\
 	col_##column_name.type = emitter_type_##etype;
 
-#define COL_HDR(row_name, column_name, human, left_or_right, col_width, etype)  \
-	COL(row_name, column_name, left_or_right, col_width, etype)	         \
-	emitter_col_t header_##column_name;                                  \
-	emitter_col_init(&header_##column_name, &header_##row_name);         \
-	header_##column_name.justify = emitter_justify_##left_or_right;      \
-	header_##column_name.width = col_width;                              \
-	header_##column_name.type = emitter_type_title;                      \
+#define COL(row_name, column_name, left_or_right, col_width, etype)	\
+	COL_DECLARE(column_name);					\
+	COL_INIT(row_name, column_name, left_or_right, col_width, etype)
+
+#define COL_HDR_DECLARE(column_name)					\
+	COL_DECLARE(column_name);					\
+	emitter_col_t header_##column_name;
+
+#define COL_HDR_INIT(row_name, column_name, human, left_or_right,	\
+	col_width, etype)						\
+	COL_INIT(row_name, column_name, left_or_right, col_width, etype)\
+	emitter_col_init(&header_##column_name, &header_##row_name);	\
+	header_##column_name.justify = emitter_justify_##left_or_right;	\
+	header_##column_name.width = col_width;				\
+	header_##column_name.type = emitter_type_title;			\
 	header_##column_name.str_val = human ? human : #column_name;
 
+#define COL_HDR(row_name, column_name, human, left_or_right, col_width,	\
+    etype)								\
+	COL_HDR_DECLARE(column_name)					\
+	COL_HDR_INIT(row_name, column_name, human, left_or_right,	\
+	    col_width, etype)
 
 JEMALLOC_COLD
 static void

From 9f71b5779be6d59d2a603b0270e4c0c896d49d1c Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Thu, 13 Aug 2020 16:47:40 -0700
Subject: [PATCH 1974/2608] Output prof stats in stats print

---
 src/stats.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 98 insertions(+), 2 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index 393df150..86a2c01a 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -6,6 +6,7 @@
 #include "jemalloc/internal/emitter.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/mutex_prof.h"
+#include "jemalloc/internal/prof_stats.h"
 
 const char *global_mutex_names[mutex_prof_num_global_mutexes] = {
 #define OP(mtx) #mtx,
@@ -298,7 +299,8 @@ mutex_stats_emit(emitter_t *emitter, emitter_row_t *row,
 
 JEMALLOC_COLD
 static void
-stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t uptime) {
+stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
+    uint64_t uptime) {
 	size_t page;
 	bool in_gap, in_gap_prev;
 	unsigned nbins, j;
@@ -313,6 +315,9 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t upti
 	emitter_row_t row;
 	emitter_row_init(&row);
 
+	bool prof_stats_on = config_prof && opt_prof && opt_prof_stats
+	    && i == MALLCTL_ARENAS_ALL;
+
 	COL_HDR(row, size, NULL, right, 20, size)
 	COL_HDR(row, ind, NULL, right, 4, unsigned)
 	COL_HDR(row, allocated, NULL, right, 13, uint64)
@@ -322,6 +327,16 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t upti
 	COL_HDR(row, ndalloc_ps, "(#/sec)", right, 8, uint64)
 	COL_HDR(row, nrequests, NULL, right, 13, uint64)
 	COL_HDR(row, nrequests_ps, "(#/sec)", right, 10, uint64)
+	COL_HDR_DECLARE(prof_live_requested);
+	COL_HDR_DECLARE(prof_live_count);
+	COL_HDR_DECLARE(prof_accum_requested);
+	COL_HDR_DECLARE(prof_accum_count);
+	if (prof_stats_on) {
+		COL_HDR_INIT(row, prof_live_requested, NULL, right, 21, uint64)
+		COL_HDR_INIT(row, prof_live_count, NULL, right, 17, uint64)
+		COL_HDR_INIT(row, prof_accum_requested, NULL, right, 21, uint64)
+		COL_HDR_INIT(row, prof_accum_count, NULL, right, 17, uint64)
+	}
 	COL_HDR(row, nshards, NULL, right, 9, unsigned)
 	COL_HDR(row, curregs, NULL, right, 13, size)
 	COL_HDR(row, curslabs, NULL, right, 13, size)
@@ -373,6 +388,11 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t upti
 	size_t arenas_bin_mib[CTL_MAX_DEPTH];
 	CTL_LEAF_PREPARE(arenas_bin_mib, 0, "arenas.bin");
 
+	size_t prof_stats_mib[CTL_MAX_DEPTH];
+	if (prof_stats_on) {
+		CTL_LEAF_PREPARE(prof_stats_mib, 0, "prof.stats.bins");
+	}
+
 	for (j = 0, in_gap = false; j < nbins; j++) {
 		uint64_t nslabs;
 		size_t reg_size, slab_size, curregs;
@@ -381,14 +401,28 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t upti
 		uint32_t nregs, nshards;
 		uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes;
 		uint64_t nreslabs;
+		prof_stats_t prof_live;
+		prof_stats_t prof_accum;
 
 		stats_arenas_mib[4] = j;
 		arenas_bin_mib[2] = j;
 
 		CTL_LEAF(stats_arenas_mib, 5, "nslabs", &nslabs, uint64_t);
 
+		if (prof_stats_on) {
+			prof_stats_mib[3] = j;
+			CTL_LEAF(prof_stats_mib, 4, "live", &prof_live,
+			    prof_stats_t);
+			CTL_LEAF(prof_stats_mib, 4, "accum", &prof_accum,
+			    prof_stats_t);
+		}
+
 		in_gap_prev = in_gap;
-		in_gap = (nslabs == 0);
+		if (prof_stats_on) {
+			in_gap = (nslabs == 0 && prof_accum.count == 0);
+		} else {
+			in_gap = (nslabs == 0);
+		}
 
 		if (in_gap_prev && !in_gap) {
 			emitter_table_printf(emitter,
@@ -429,6 +463,16 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t upti
 		    &curregs);
 		emitter_json_kv(emitter, "nrequests", emitter_type_uint64,
 		    &nrequests);
+		if (prof_stats_on) {
+			emitter_json_kv(emitter, "prof_live_requested",
+			    emitter_type_uint64, &prof_live.req_sum);
+			emitter_json_kv(emitter, "prof_live_count",
+			    emitter_type_uint64, &prof_live.count);
+			emitter_json_kv(emitter, "prof_accum_requested",
+			    emitter_type_uint64, &prof_accum.req_sum);
+			emitter_json_kv(emitter, "prof_accum_count",
+			    emitter_type_uint64, &prof_accum.count);
+		}
 		emitter_json_kv(emitter, "nfills", emitter_type_uint64,
 		    &nfills);
 		emitter_json_kv(emitter, "nflushes", emitter_type_uint64,
@@ -475,6 +519,13 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i, uint64_t upti
 		col_ndalloc_ps.uint64_val = rate_per_second(ndalloc, uptime);
 		col_nrequests.uint64_val = nrequests;
 		col_nrequests_ps.uint64_val = rate_per_second(nrequests, uptime);
+		if (prof_stats_on) {
+			col_prof_live_requested.uint64_val = prof_live.req_sum;
+			col_prof_live_count.uint64_val = prof_live.count;
+			col_prof_accum_requested.uint64_val =
+			    prof_accum.req_sum;
+			col_prof_accum_count.uint64_val = prof_accum.count;
+		}
 		col_nshards.unsigned_val = nshards;
 		col_curregs.size_val = curregs;
 		col_curslabs.size_val = curslabs;
@@ -518,6 +569,9 @@ stats_arena_lextents_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	emitter_row_t row;
 	emitter_row_init(&row);
 
+	bool prof_stats_on = config_prof && opt_prof && opt_prof_stats
+	    && i == MALLCTL_ARENAS_ALL;
+
 	COL_HDR(row, size, NULL, right, 20, size)
 	COL_HDR(row, ind, NULL, right, 4, unsigned)
 	COL_HDR(row, allocated, NULL, right, 13, size)
@@ -527,6 +581,16 @@ stats_arena_lextents_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	COL_HDR(row, ndalloc_ps, "(#/sec)", right, 8, uint64)
 	COL_HDR(row, nrequests, NULL, right, 13, uint64)
 	COL_HDR(row, nrequests_ps, "(#/sec)", right, 8, uint64)
+	COL_HDR_DECLARE(prof_live_requested)
+	COL_HDR_DECLARE(prof_live_count)
+	COL_HDR_DECLARE(prof_accum_requested)
+	COL_HDR_DECLARE(prof_accum_count)
+	if (prof_stats_on) {
+		COL_HDR_INIT(row, prof_live_requested, NULL, right, 21, uint64)
+		COL_HDR_INIT(row, prof_live_count, NULL, right, 17, uint64)
+		COL_HDR_INIT(row, prof_accum_requested, NULL, right, 21, uint64)
+		COL_HDR_INIT(row, prof_accum_count, NULL, right, 17, uint64)
+	}
 	COL_HDR(row, curlextents, NULL, right, 13, size)
 
 	/* As with bins, we label the large extents table. */
@@ -543,9 +607,16 @@ stats_arena_lextents_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	size_t arenas_lextent_mib[CTL_MAX_DEPTH];
 	CTL_LEAF_PREPARE(arenas_lextent_mib, 0, "arenas.lextent");
 
+	size_t prof_stats_mib[CTL_MAX_DEPTH];
+	if (prof_stats_on) {
+		CTL_LEAF_PREPARE(prof_stats_mib, 0, "prof.stats.lextents");
+	}
+
 	for (j = 0, in_gap = false; j < nlextents; j++) {
 		uint64_t nmalloc, ndalloc, nrequests;
 		size_t lextent_size, curlextents;
+		prof_stats_t prof_live;
+		prof_stats_t prof_accum;
 
 		stats_arenas_mib[4] = j;
 		arenas_lextent_mib[2] = j;
@@ -567,7 +638,25 @@ stats_arena_lextents_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 		CTL_LEAF(stats_arenas_mib, 5, "curlextents", &curlextents,
 		    size_t);
 
+		if (prof_stats_on) {
+			prof_stats_mib[3] = j;
+			CTL_LEAF(prof_stats_mib, 4, "live", &prof_live,
+			    prof_stats_t);
+			CTL_LEAF(prof_stats_mib, 4, "accum", &prof_accum,
+			    prof_stats_t);
+		}
+
 		emitter_json_object_begin(emitter);
+		if (prof_stats_on) {
+			emitter_json_kv(emitter, "prof_live_requested",
+			    emitter_type_uint64, &prof_live.req_sum);
+			emitter_json_kv(emitter, "prof_live_count",
+			    emitter_type_uint64, &prof_live.count);
+			emitter_json_kv(emitter, "prof_accum_requested",
+			    emitter_type_uint64, &prof_accum.req_sum);
+			emitter_json_kv(emitter, "prof_accum_count",
+			    emitter_type_uint64, &prof_accum.count);
+		}
 		emitter_json_kv(emitter, "curlextents", emitter_type_size,
 		    &curlextents);
 		emitter_json_object_end(emitter);
@@ -581,6 +670,13 @@ stats_arena_lextents_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 		col_ndalloc_ps.uint64_val = rate_per_second(ndalloc, uptime);
 		col_nrequests.uint64_val = nrequests;
 		col_nrequests_ps.uint64_val = rate_per_second(nrequests, uptime);
+		if (prof_stats_on) {
+			col_prof_live_requested.uint64_val = prof_live.req_sum;
+			col_prof_live_count.uint64_val = prof_live.count;
+			col_prof_accum_requested.uint64_val =
+			    prof_accum.req_sum;
+			col_prof_accum_count.uint64_val = prof_accum.count;
+		}
 		col_curlextents.size_val = curlextents;
 
 		if (!in_gap) {

From 14d689c0f990f1f946eae5d4706008882d5457a8 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 28 Dec 2020 14:47:50 -0800
Subject: [PATCH 1975/2608] Add prof stats mutex stats

---
 include/jemalloc/internal/mutex_prof.h | 3 ++-
 src/ctl.c                              | 3 +++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/mutex_prof.h b/include/jemalloc/internal/mutex_prof.h
index a13e285e..4a526a5a 100644
--- a/include/jemalloc/internal/mutex_prof.h
+++ b/include/jemalloc/internal/mutex_prof.h
@@ -13,7 +13,8 @@
     OP(prof_thds_data)							\
     OP(prof_dump)							\
     OP(prof_recent_alloc)						\
-    OP(prof_recent_dump)
+    OP(prof_recent_dump)						\
+    OP(prof_stats)
 
 typedef enum {
 #define OP(mtx) global_prof_mutex_##mtx,
diff --git a/src/ctl.c b/src/ctl.c
index a4f1916c..b94ef646 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1280,6 +1280,8 @@ ctl_refresh(tsdn_t *tsdn) {
 			READ_GLOBAL_MUTEX_PROF_DATA(
 			    global_prof_mutex_prof_recent_dump,
 			    prof_recent_dump_mtx);
+			READ_GLOBAL_MUTEX_PROF_DATA(
+			    global_prof_mutex_prof_stats, prof_stats_mtx);
 		}
 		if (have_background_thread) {
 			READ_GLOBAL_MUTEX_PROF_DATA(
@@ -3395,6 +3397,7 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
 		MUTEX_PROF_RESET(prof_dump_mtx);
 		MUTEX_PROF_RESET(prof_recent_alloc_mtx);
 		MUTEX_PROF_RESET(prof_recent_dump_mtx);
+		MUTEX_PROF_RESET(prof_stats_mtx);
 	}
 
 	/* Per arena mutexes. */

From a011c4c22d3fd1da5415dd5001afd195f5cd7ad5 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 7 Jan 2021 13:22:08 -0800
Subject: [PATCH 1976/2608] cache_bin: Separate out local and remote accesses.

This fixes an incorrect debug-mode assert:
- T1 starts an arena stats update and reads stack_head from another thread's
  cache bin, when that cache bin has 1 item in it.
- T2 allocates from that cache bin.  The cache_bin's stack_head now points to a
  NULL pointer, since the cache bin is empty.
- T1 Re-reads the cache_bin's stack_head to perform an assertion check (since it
  previously saw that the bin was empty, whatever stack_head points to should be
  non-NULL).
---
 include/jemalloc/internal/cache_bin.h | 54 +++++++++++++++++++++------
 src/arena.c                           |  4 +-
 src/cache_bin.c                       |  2 +-
 src/tcache.c                          |  6 +--
 test/unit/cache_bin.c                 | 40 +++++++++++---------
 5 files changed, 70 insertions(+), 36 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index c1b8fc42..cf5ed3e0 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -167,20 +167,50 @@ cache_bin_diff(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
 	return later - earlier;
 }
 
-/* Number of items currently cached in the bin, without checking ncached_max. */
+/*
+ * Number of items currently cached in the bin, without checking ncached_max.
+ * We require specifying whether or not the request is racy or not (i.e. whether
+ * or not concurrent modifications are possible).
+ */
 static inline cache_bin_sz_t
-cache_bin_ncached_get_internal(cache_bin_t *bin) {
+cache_bin_ncached_get_internal(cache_bin_t *bin, bool racy) {
 	cache_bin_sz_t diff = cache_bin_diff(bin,
 	    (uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty);
 	cache_bin_sz_t n = diff / sizeof(void *);
-	assert(n == 0 || *(bin->stack_head) != NULL);
+	/*
+	 * We have undefined behavior here; if this function is called from the
+	 * arena stats updating code, then stack_head could change from the
+	 * first line to the next one.  Morally, these loads should be atomic,
+	 * but compilers won't currently generate comparisons with in-memory
+	 * operands against atomics, and these variables get accessed on the
+	 * fast paths.  This should still be "safe" in the sense of generating
+	 * the correct assembly for the foreseeable future, though.
+	 */
+	assert(n == 0 || *(bin->stack_head) != NULL || racy);
 	return n;
 }
 
-/* Number of items currently cached in the bin, with checking ncached_max. */
+/*
+ * Number of items currently cached in the bin, with checking ncached_max.  The
+ * caller must know that no concurrent modification of the cache_bin is
+ * possible.
+ */
 static inline cache_bin_sz_t
-cache_bin_ncached_get(cache_bin_t *bin, cache_bin_info_t *info) {
-	cache_bin_sz_t n = cache_bin_ncached_get_internal(bin);
+cache_bin_ncached_get_local(cache_bin_t *bin, cache_bin_info_t *info) {
+	cache_bin_sz_t n = cache_bin_ncached_get_internal(bin,
+	    /* racy */ false);
+	assert(n <= cache_bin_info_ncached_max(info));
+	return n;
+}
+
+/*
+ * Obtain a racy view of the number of items currently in the cache bin, in the
+ * presence of possible concurrent modifications.
+ */
+static inline cache_bin_sz_t
+cache_bin_ncached_get_remote(cache_bin_t *bin, cache_bin_info_t *info) {
+	cache_bin_sz_t n = cache_bin_ncached_get_internal(bin,
+	    /* racy */ true);
 	assert(n <= cache_bin_info_ncached_max(info));
 	return n;
 }
@@ -208,7 +238,7 @@ cache_bin_empty_position_get(cache_bin_t *bin) {
  */
 static inline void
 cache_bin_assert_empty(cache_bin_t *bin, cache_bin_info_t *info) {
-	assert(cache_bin_ncached_get(bin, info) == 0);
+	assert(cache_bin_ncached_get_local(bin, info) == 0);
 	assert(cache_bin_empty_position_get(bin) == bin->stack_head);
 }
 
@@ -228,7 +258,7 @@ static inline cache_bin_sz_t
 cache_bin_low_water_get(cache_bin_t *bin, cache_bin_info_t *info) {
 	cache_bin_sz_t low_water = cache_bin_low_water_get_internal(bin);
 	assert(low_water <= cache_bin_info_ncached_max(info));
-	assert(low_water <= cache_bin_ncached_get(bin, info));
+	assert(low_water <= cache_bin_ncached_get_local(bin, info));
 
 	cache_bin_assert_earlier(bin, (uint16_t)(uintptr_t)bin->stack_head,
 	    bin->low_bits_low_water);
@@ -247,7 +277,7 @@ cache_bin_low_water_set(cache_bin_t *bin) {
 
 static inline void
 cache_bin_low_water_adjust(cache_bin_t *bin) {
-	if (cache_bin_ncached_get_internal(bin)
+	if (cache_bin_ncached_get_internal(bin, /* racy */ false)
 	    < cache_bin_low_water_get_internal(bin)) {
 		cache_bin_low_water_set(bin);
 	}
@@ -319,7 +349,7 @@ cache_bin_alloc(cache_bin_t *bin, bool *success) {
 
 JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
 cache_bin_alloc_batch(cache_bin_t *bin, size_t num, void **out) {
-	size_t n = cache_bin_ncached_get_internal(bin);
+	size_t n = cache_bin_ncached_get_internal(bin, /* racy */ false);
 	if (n > num) {
 		n = num;
 	}
@@ -416,7 +446,7 @@ static inline void
 cache_bin_init_ptr_array_for_flush(cache_bin_t *bin, cache_bin_info_t *info,
     cache_bin_ptr_array_t *arr, cache_bin_sz_t nflush) {
 	arr->ptr = cache_bin_empty_position_get(bin) - 1;
-	assert(cache_bin_ncached_get(bin, info) == 0
+	assert(cache_bin_ncached_get_local(bin, info) == 0
 	    || *arr->ptr != NULL);
 }
 
@@ -437,7 +467,7 @@ cache_bin_ptr_array_set(cache_bin_ptr_array_t *arr, cache_bin_sz_t n, void *p) {
 static inline void
 cache_bin_finish_flush(cache_bin_t *bin, cache_bin_info_t *info,
     cache_bin_ptr_array_t *arr, cache_bin_sz_t nflushed) {
-	unsigned rem = cache_bin_ncached_get(bin, info) - nflushed;
+	unsigned rem = cache_bin_ncached_get_local(bin, info) - nflushed;
 	memmove(bin->stack_head + nflushed, bin->stack_head,
 	    rem * sizeof(void *));
 	bin->stack_head = bin->stack_head + nflushed;
diff --git a/src/arena.c b/src/arena.c
index 6a062de2..914e63f1 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -150,7 +150,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		for (szind_t i = 0; i < nhbins; i++) {
 			cache_bin_t *cache_bin = &descriptor->bins[i];
 			astats->tcache_bytes +=
-			    cache_bin_ncached_get(cache_bin,
+			    cache_bin_ncached_get_remote(cache_bin,
 			    &tcache_bin_info[i]) * sz_index2size(i);
 		}
 	}
@@ -767,7 +767,7 @@ void
 arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
     cache_bin_t *cache_bin, cache_bin_info_t *cache_bin_info, szind_t binind,
     const unsigned nfill) {
-	assert(cache_bin_ncached_get(cache_bin, cache_bin_info) == 0);
+	assert(cache_bin_ncached_get_local(cache_bin, cache_bin_info) == 0);
 
 	const bin_info_t *bin_info = &bin_infos[binind];
 
diff --git a/src/cache_bin.c b/src/cache_bin.c
index 5f506062..b7470823 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -83,7 +83,7 @@ cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
 	bin->low_bits_empty = (uint16_t)(uintptr_t)empty_position;
 	assert(cache_bin_diff(bin, bin->low_bits_full,
 	    (uint16_t)(uintptr_t) bin->stack_head) == bin_stack_size);
-	assert(cache_bin_ncached_get(bin, info) == 0);
+	assert(cache_bin_ncached_get_local(bin, info) == 0);
 	assert(cache_bin_empty_position_get(bin) == empty_position);
 
 	assert(bin_stack_size > 0 || empty_position == full_position);
diff --git a/src/tcache.c b/src/tcache.c
index 41a1b828..ef0b87d0 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -125,7 +125,7 @@ tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	assert(szind < SC_NBINS);
 
 	cache_bin_t *cache_bin = &tcache->bins[szind];
-	cache_bin_sz_t ncached = cache_bin_ncached_get(cache_bin,
+	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
 	    &tcache_bin_info[szind]);
 	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
 	    &tcache_bin_info[szind]);
@@ -159,7 +159,7 @@ tcache_gc_large(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	/* Like the small GC; flush 3/4 of untouched items. */
 	assert(szind >= SC_NBINS);
 	cache_bin_t *cache_bin = &tcache->bins[szind];
-	cache_bin_sz_t ncached = cache_bin_ncached_get(cache_bin,
+	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
 	    &tcache_bin_info[szind]);
 	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
 	    &tcache_bin_info[szind]);
@@ -289,7 +289,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	} else {
 		assert(binind < nhbins);
 	}
-	cache_bin_sz_t ncached = cache_bin_ncached_get(cache_bin,
+	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
 	    &tcache_bin_info[binind]);
 	assert((cache_bin_sz_t)rem <= ncached);
 	arena_t *tcache_arena = tcache_slow->arena;
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index b31d07d2..a69cad6b 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -6,14 +6,15 @@ do_fill_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
     cache_bin_sz_t nfill_succeed) {
 	bool success;
 	void *ptr;
-	assert_true(cache_bin_ncached_get(bin, info) == 0, "");
+	assert_true(cache_bin_ncached_get_local(bin, info) == 0, "");
 	CACHE_BIN_PTR_ARRAY_DECLARE(arr, nfill_attempt);
 	cache_bin_init_ptr_array_for_fill(bin, info, &arr, nfill_attempt);
 	for (cache_bin_sz_t i = 0; i < nfill_succeed; i++) {
 		arr.ptr[i] = &ptrs[i];
 	}
 	cache_bin_finish_fill(bin, info, &arr, nfill_succeed);
-	expect_true(cache_bin_ncached_get(bin, info) == nfill_succeed, "");
+	expect_true(cache_bin_ncached_get_local(bin, info) == nfill_succeed,
+	    "");
 	cache_bin_low_water_set(bin);
 
 	for (cache_bin_sz_t i = 0; i < nfill_succeed; i++) {
@@ -24,7 +25,7 @@ do_fill_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 		expect_true(cache_bin_low_water_get(bin, info)
 		    == nfill_succeed - i - 1, "");
 	}
-	expect_true(cache_bin_ncached_get(bin, info) == 0, "");
+	expect_true(cache_bin_ncached_get_local(bin, info) == 0, "");
 	expect_true(cache_bin_low_water_get(bin, info) == 0, "");
 }
 
@@ -32,7 +33,7 @@ static void
 do_flush_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
     cache_bin_sz_t nfill, cache_bin_sz_t nflush) {
 	bool success;
-	assert_true(cache_bin_ncached_get(bin, info) == 0, "");
+	assert_true(cache_bin_ncached_get_local(bin, info) == 0, "");
 
 	for (cache_bin_sz_t i = 0; i < nfill; i++) {
 		success = cache_bin_dalloc_easy(bin, &ptrs[i]);
@@ -46,8 +47,9 @@ do_flush_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 	}
 	cache_bin_finish_flush(bin, info, &arr, nflush);
 
-	expect_true(cache_bin_ncached_get(bin, info) == nfill - nflush, "");
-	while (cache_bin_ncached_get(bin, info) > 0) {
+	expect_true(cache_bin_ncached_get_local(bin, info) == nfill - nflush,
+	    "");
+	while (cache_bin_ncached_get_local(bin, info) > 0) {
 		cache_bin_alloc(bin, &success);
 	}
 }
@@ -55,14 +57,14 @@ do_flush_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 static void
 do_batch_alloc_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
     cache_bin_sz_t nfill, size_t batch) {
-	assert_true(cache_bin_ncached_get(bin, info) == 0, "");
+	assert_true(cache_bin_ncached_get_local(bin, info) == 0, "");
 	CACHE_BIN_PTR_ARRAY_DECLARE(arr, nfill);
 	cache_bin_init_ptr_array_for_fill(bin, info, &arr, nfill);
 	for (cache_bin_sz_t i = 0; i < nfill; i++) {
 		arr.ptr[i] = &ptrs[i];
 	}
 	cache_bin_finish_fill(bin, info, &arr, nfill);
-	assert_true(cache_bin_ncached_get(bin, info) == nfill, "");
+	assert_true(cache_bin_ncached_get_local(bin, info) == nfill, "");
 	cache_bin_low_water_set(bin);
 
 	void **out = malloc((batch + 1) * sizeof(void *));
@@ -73,7 +75,7 @@ do_batch_alloc_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 	}
 	expect_true(cache_bin_low_water_get(bin, info) == nfill -
 	    (cache_bin_sz_t)n, "");
-	while (cache_bin_ncached_get(bin, info) > 0) {
+	while (cache_bin_ncached_get_local(bin, info) > 0) {
 		bool success;
 		cache_bin_alloc(bin, &success);
 	}
@@ -104,7 +106,7 @@ TEST_BEGIN(test_cache_bin) {
 
 	/* Initialize to empty; should then have 0 elements. */
 	expect_d_eq(ncached_max, cache_bin_info_ncached_max(&info), "");
-	expect_true(cache_bin_ncached_get(&bin, &info) == 0, "");
+	expect_true(cache_bin_ncached_get_local(&bin, &info) == 0, "");
 	expect_true(cache_bin_low_water_get(&bin, &info) == 0, "");
 
 	ptr = cache_bin_alloc_easy(&bin, &success);
@@ -122,14 +124,15 @@ TEST_BEGIN(test_cache_bin) {
 	void **ptrs = mallocx(sizeof(void *) * (ncached_max + 1), 0);
 	assert_ptr_not_null(ptrs, "Unexpected mallocx failure");
 	for  (cache_bin_sz_t i = 0; i < ncached_max; i++) {
-		expect_true(cache_bin_ncached_get(&bin, &info) == i, "");
+		expect_true(cache_bin_ncached_get_local(&bin, &info) == i, "");
 		success = cache_bin_dalloc_easy(&bin, &ptrs[i]);
 		expect_true(success,
 		    "Should be able to dalloc into a non-full cache bin.");
 		expect_true(cache_bin_low_water_get(&bin, &info) == 0,
 		    "Pushes and pops shouldn't change low water of zero.");
 	}
-	expect_true(cache_bin_ncached_get(&bin, &info) == ncached_max, "");
+	expect_true(cache_bin_ncached_get_local(&bin, &info) == ncached_max,
+	    "");
 	success = cache_bin_dalloc_easy(&bin, &ptrs[ncached_max]);
 	expect_false(success, "Shouldn't be able to dalloc into a full bin.");
 
@@ -138,7 +141,7 @@ TEST_BEGIN(test_cache_bin) {
 	for (cache_bin_sz_t i = 0; i < ncached_max; i++) {
 		expect_true(cache_bin_low_water_get(&bin, &info)
 		    == ncached_max - i, "");
-		expect_true(cache_bin_ncached_get(&bin, &info)
+		expect_true(cache_bin_ncached_get_local(&bin, &info)
 		    == ncached_max - i, "");
 		/*
 		 * This should fail -- the easy variant can't change the low
@@ -149,7 +152,7 @@ TEST_BEGIN(test_cache_bin) {
 		expect_false(success, "");
 		expect_true(cache_bin_low_water_get(&bin, &info)
 		    == ncached_max - i, "");
-		expect_true(cache_bin_ncached_get(&bin, &info)
+		expect_true(cache_bin_ncached_get_local(&bin, &info)
 		    == ncached_max - i, "");
 
 		/* This should succeed, though. */
@@ -159,11 +162,11 @@ TEST_BEGIN(test_cache_bin) {
 		    "Alloc should pop in stack order");
 		expect_true(cache_bin_low_water_get(&bin, &info)
 		    == ncached_max - i - 1, "");
-		expect_true(cache_bin_ncached_get(&bin, &info)
+		expect_true(cache_bin_ncached_get_local(&bin, &info)
 		    == ncached_max - i - 1, "");
 	}
 	/* Now we're empty -- all alloc attempts should fail. */
-	expect_true(cache_bin_ncached_get(&bin, &info) == 0, "");
+	expect_true(cache_bin_ncached_get_local(&bin, &info) == 0, "");
 	ptr = cache_bin_alloc_easy(&bin, &success);
 	expect_ptr_null(ptr, "");
 	expect_false(success, "");
@@ -179,7 +182,8 @@ TEST_BEGIN(test_cache_bin) {
 	for (cache_bin_sz_t i = ncached_max / 2; i < ncached_max; i++) {
 		cache_bin_dalloc_easy(&bin, &ptrs[i]);
 	}
-	expect_true(cache_bin_ncached_get(&bin, &info) == ncached_max, "");
+	expect_true(cache_bin_ncached_get_local(&bin, &info) == ncached_max,
+	    "");
 	for (cache_bin_sz_t i = ncached_max - 1; i >= ncached_max / 2; i--) {
 		/*
 		 * Size is bigger than low water -- the reduced version should
@@ -195,7 +199,7 @@ TEST_BEGIN(test_cache_bin) {
 	expect_ptr_null(ptr, "");
 
 	/* We're going to test filling -- we must be empty to start. */
-	while (cache_bin_ncached_get(&bin, &info)) {
+	while (cache_bin_ncached_get_local(&bin, &info)) {
 		cache_bin_alloc(&bin, &success);
 		expect_true(success, "");
 	}

From 2e3104ba07da1df4c04586231ff9266a1e35094d Mon Sep 17 00:00:00 2001
From: "Uwe L. Korn" <uwelk@xhochy.com>
Date: Sun, 10 Jan 2021 15:48:13 +0100
Subject: [PATCH 1977/2608] Update config.{sub,guess} to support
 support-aarch64-apple-darwin as a target

---
 build-aux/config.guess | 39 +++++++++++++++++++++++++++------------
 build-aux/config.sub   | 14 ++++++++------
 2 files changed, 35 insertions(+), 18 deletions(-)

diff --git a/build-aux/config.guess b/build-aux/config.guess
index 0fc11edb..f7727026 100755
--- a/build-aux/config.guess
+++ b/build-aux/config.guess
@@ -1,8 +1,8 @@
 #! /bin/sh
 # Attempt to guess a canonical system name.
-#   Copyright 1992-2020 Free Software Foundation, Inc.
+#   Copyright 1992-2021 Free Software Foundation, Inc.
 
-timestamp='2020-11-07'
+timestamp='2021-01-01'
 
 # This file is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
@@ -27,7 +27,7 @@ timestamp='2020-11-07'
 # Originally written by Per Bothner; maintained since 2000 by Ben Elliston.
 #
 # You can get the latest version of this script from:
-# https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
+# https://git.savannah.gnu.org/cgit/config.git/plain/config.guess
 #
 # Please send patches to <config-patches@gnu.org>.
 
@@ -50,7 +50,7 @@ version="\
 GNU config.guess ($timestamp)
 
 Originally written by Per Bothner.
-Copyright 1992-2020 Free Software Foundation, Inc.
+Copyright 1992-2021 Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -138,9 +138,7 @@ UNAME_VERSION=$( (uname -v) 2>/dev/null) || UNAME_VERSION=unknown
 
 case "$UNAME_SYSTEM" in
 Linux|GNU|GNU/*)
-	# If the system lacks a compiler, then just pick glibc.
-	# We could probably try harder.
-	LIBC=gnu
+	LIBC=unknown
 
 	set_cc_for_build
 	cat <<-EOF > "$dummy.c"
@@ -149,16 +147,30 @@ Linux|GNU|GNU/*)
 	LIBC=uclibc
 	#elif defined(__dietlibc__)
 	LIBC=dietlibc
+	#elif defined(__GLIBC__)
+	LIBC=gnu
 	#else
 	#include <stdarg.h>
+	/* First heuristic to detect musl libc.  */
 	#ifdef __DEFINED_va_list
 	LIBC=musl
-	#else
-	LIBC=gnu
 	#endif
 	#endif
 	EOF
 	eval "$($CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^LIBC' | sed 's, ,,g')"
+
+	# Second heuristic to detect musl libc.
+	if [ "$LIBC" = unknown ] &&
+	   command -v ldd >/dev/null &&
+	   ldd --version 2>&1 | grep -q ^musl; then
+		LIBC=musl
+	fi
+
+	# If the system lacks a compiler, then just pick glibc.
+	# We could probably try harder.
+	if [ "$LIBC" = unknown ]; then
+		LIBC=gnu
+	fi
 	;;
 esac
 
@@ -984,6 +996,9 @@ EOF
     k1om:Linux:*:*)
 	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
+    loongarch32:Linux:*:* | loongarch64:Linux:*:* | loongarchx32:Linux:*:*)
+	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	exit ;;
     m32r*:Linux:*:*)
 	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
@@ -1072,7 +1087,7 @@ EOF
     ppcle:Linux:*:*)
 	echo powerpcle-unknown-linux-"$LIBC"
 	exit ;;
-    riscv32:Linux:*:* | riscv64:Linux:*:*)
+    riscv32:Linux:*:* | riscv32be:Linux:*:* | riscv64:Linux:*:* | riscv64be:Linux:*:*)
 	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
     s390:Linux:*:* | s390x:Linux:*:*)
@@ -1638,9 +1653,9 @@ This script (version $timestamp), has failed to recognize the
 operating system you are using. If your script is old, overwrite *all*
 copies of config.guess and config.sub with the latest versions from:
 
-  https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess
+  https://git.savannah.gnu.org/cgit/config.git/plain/config.guess
 and
-  https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub
+  https://git.savannah.gnu.org/cgit/config.git/plain/config.sub
 EOF
 
 year=$(echo $timestamp | sed 's,-.*,,')
diff --git a/build-aux/config.sub b/build-aux/config.sub
index c874b7a9..b0f84923 100755
--- a/build-aux/config.sub
+++ b/build-aux/config.sub
@@ -1,8 +1,8 @@
 #! /bin/sh
 # Configuration validation subroutine script.
-#   Copyright 1992-2020 Free Software Foundation, Inc.
+#   Copyright 1992-2021 Free Software Foundation, Inc.
 
-timestamp='2020-11-07'
+timestamp='2021-01-07'
 
 # This file is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
@@ -33,7 +33,7 @@ timestamp='2020-11-07'
 # Otherwise, we print the canonical config type on stdout and succeed.
 
 # You can get the latest version of this script from:
-# https://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub
+# https://git.savannah.gnu.org/cgit/config.git/plain/config.sub
 
 # This file is supposed to be the same for all GNU packages
 # and recognize all the CPU types, system types and aliases
@@ -67,7 +67,7 @@ Report bugs and patches to <config-patches@gnu.org>."
 version="\
 GNU config.sub ($timestamp)
 
-Copyright 1992-2020 Free Software Foundation, Inc.
+Copyright 1992-2021 Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
@@ -1185,6 +1185,7 @@ case $cpu-$vendor in
 			| k1om \
 			| le32 | le64 \
 			| lm32 \
+			| loongarch32 | loongarch64 | loongarchx32 \
 			| m32c | m32r | m32rle \
 			| m5200 | m68000 | m680[012346]0 | m68360 | m683?2 | m68k \
 			| m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x \
@@ -1229,7 +1230,7 @@ case $cpu-$vendor in
 			| powerpc | powerpc64 | powerpc64le | powerpcle | powerpcspe \
 			| pru \
 			| pyramid \
-			| riscv | riscv32 | riscv64 \
+			| riscv | riscv32 | riscv32be | riscv64 | riscv64be \
 			| rl78 | romp | rs6000 | rx \
 			| s390 | s390x \
 			| score \
@@ -1241,6 +1242,7 @@ case $cpu-$vendor in
 			| sparcv8 | sparcv9 | sparcv9b | sparcv9v | sv1 | sx* \
 			| spu \
 			| tahoe \
+			| thumbv7* \
 			| tic30 | tic4x | tic54x | tic55x | tic6x | tic80 \
 			| tron \
 			| ubicom32 \
@@ -1685,7 +1687,7 @@ case $os in
 	musl* | newlib* | uclibc*)
 		;;
 	# Likewise for "kernel-libc"
-	eabi | eabihf | gnueabi | gnueabihf)
+	eabi* | gnueabi*)
 		;;
 	# Now accept the basic system types.
 	# The portable systems comes first.

From a943172b732e65da34a19469f31cd3ec70cf05b0 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 18 Dec 2020 22:23:35 +0300
Subject: [PATCH 1978/2608] Add runtime detection for MADV_DONTNEED zeroes
 pages (mostly for qemu)

qemu does not support this, yet [1], and you can get very tricky assert
if you will run program with jemalloc in use under qemu:

    <jemalloc>: ../contrib/jemalloc/src/extent.c:1195: Failed assertion: "p[i] == 0"

  [1]: https://patchwork.kernel.org/patch/10576637/

Here is a simple example that shows the problem [2]:

    // Gist to check possible issues with MADV_DONTNEED
    // For example it does not supported by qemu user
    // There is a patch for this [1], but it hasn't been applied.
    //   [1]: https://lists.gnu.org/archive/html/qemu-devel/2018-08/msg05422.html

    #include <sys/mman.h>
    #include <stdio.h>
    #include <stddef.h>
    #include <assert.h>
    #include <string.h>

    int main(int argc, char **argv)
    {
        void *addr = mmap(NULL, 1<<16, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
        if (addr == MAP_FAILED) {
            perror("mmap");
            return 1;
        }
        memset(addr, 'A', 1<<16);

        if (!madvise(addr, 1<<16, MADV_DONTNEED)) {
            puts("MADV_DONTNEED does not return error. Check memory.");
            for (int i = 0; i < 1<<16; ++i) {
                assert(((unsigned char *)addr)[i] == 0);
            }
        } else {
            perror("madvise");
        }

        if (munmap(addr, 1<<16)) {
            perror("munmap");
            return 1;
        }

        return 0;
    }

  ### unpatched qemu

      $ qemu-x86_64-static /tmp/test-MADV_DONTNEED
      MADV_DONTNEED does not return error. Check memory.
      test-MADV_DONTNEED: /tmp/test-MADV_DONTNEED.c:19: main: Assertion `((unsigned char *)addr)[i] == 0' failed.
      qemu: uncaught target signal 6 (Aborted) - core dumped
      Aborted (core dumped)

  ### patched qemu (by returning ENOSYS error)

      $ qemu-x86_64 /tmp/test-MADV_DONTNEED
      madvise: Success

  ### patch for qemu to return ENOSYS

      diff --git a/linux-user/syscall.c b/linux-user/syscall.c
      index 897d20c076..5540792e0e 100644
      --- a/linux-user/syscall.c
      +++ b/linux-user/syscall.c
      @@ -11775,7 +11775,7 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1,
                  turns private file-backed mappings into anonymous mappings.
                  This will break MADV_DONTNEED.
                  This is a hint, so ignoring and returning success is ok.  */
      -        return 0;
      +        return ENOSYS;
       #endif
       #ifdef TARGET_NR_fcntl64
           case TARGET_NR_fcntl64:

  [2]: https://gist.github.com/azat/12ba2c825b710653ece34dba7f926ece

v2:
- review fixes
- add opt_dont_trust_madvise
v3:
- review fixes
- rename opt_dont_trust_madvise to opt_trust_madvise
---
 doc/jemalloc.xml.in                           | 12 ++++
 .../internal/jemalloc_internal_externs.h      |  1 +
 src/ctl.c                                     |  3 +
 src/jemalloc.c                                |  8 +++
 src/pages.c                                   | 71 ++++++++++++++++++-
 test/unit/mallctl.c                           |  1 +
 6 files changed, 94 insertions(+), 2 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index e24c191c..4b93c5a9 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -950,6 +950,18 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         is <quote>disabled</quote>.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.trust_madvise">
+        <term>
+          <mallctl>opt.trust_madvise</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Do not perform runtime check for MADV_DONTNEED, to
+        check that it actually zeros pages.  The default is
+        <quote>disabled</quote> on linux and <quote>enabled</quote> elsewhere.
+        </para></listitem>
+      </varlistentry>
+
       <varlistentry id="opt.retain">
         <term>
           <mallctl>opt.retain</mallctl>
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index fb8dc3fe..40591b99 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -11,6 +11,7 @@ extern bool malloc_slow;
 /* Run-time options. */
 extern bool opt_abort;
 extern bool opt_abort_conf;
+extern bool opt_trust_madvise;
 extern bool opt_confirm_conf;
 extern bool opt_hpa;
 extern size_t opt_hpa_slab_max_alloc;
diff --git a/src/ctl.c b/src/ctl.c
index b94ef646..d516196a 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -90,6 +90,7 @@ CTL_PROTO(config_utrace)
 CTL_PROTO(config_xmalloc)
 CTL_PROTO(opt_abort)
 CTL_PROTO(opt_abort_conf)
+CTL_PROTO(opt_trust_madvise)
 CTL_PROTO(opt_confirm_conf)
 CTL_PROTO(opt_hpa)
 CTL_PROTO(opt_hpa_slab_max_alloc)
@@ -372,6 +373,7 @@ static const ctl_named_node_t	config_node[] = {
 static const ctl_named_node_t opt_node[] = {
 	{NAME("abort"),		CTL(opt_abort)},
 	{NAME("abort_conf"),	CTL(opt_abort_conf)},
+	{NAME("trust_madvise"),	CTL(opt_trust_madvise)},
 	{NAME("confirm_conf"),	CTL(opt_confirm_conf)},
 	{NAME("hpa"),		CTL(opt_hpa)},
 	{NAME("hpa_slab_max_alloc"),	CTL(opt_hpa_slab_max_alloc)},
@@ -2045,6 +2047,7 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool)
 
 CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
 CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
+CTL_RO_NL_GEN(opt_trust_madvise, opt_trust_madvise, bool)
 CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
 CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool)
 CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_slab_max_alloc, size_t)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 02714158..f7c3963d 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -94,6 +94,13 @@ bool	opt_junk_free =
     false
 #endif
     ;
+bool	opt_trust_madvise =
+#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS
+    false
+#else
+    true
+#endif
+    ;
 
 zero_realloc_action_t opt_zero_realloc_action =
     zero_realloc_action_strict;
@@ -1256,6 +1263,7 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 
 			CONF_HANDLE_BOOL(opt_abort, "abort")
 			CONF_HANDLE_BOOL(opt_abort_conf, "abort_conf")
+			CONF_HANDLE_BOOL(opt_trust_madvise, "trust_madvise")
 			if (strncmp("metadata_thp", k, klen) == 0) {
 				int i;
 				bool match = false;
diff --git a/src/pages.c b/src/pages.c
index b23c9e9e..6984d2a0 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -42,6 +42,57 @@ thp_mode_t init_system_thp_mode;
 /* Runtime support for lazy purge. Irrelevant when !pages_can_purge_lazy. */
 static bool pages_can_purge_lazy_runtime = true;
 
+#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS
+static int madvise_dont_need_zeros_is_faulty = -1;
+/**
+ * Check that MADV_DONTNEED will actually zero pages on subsequent access.
+ *
+ * Since qemu does not support this, yet [1], and you can get very tricky
+ * assert if you will run program with jemalloc in use under qemu:
+ *
+ *     <jemalloc>: ../contrib/jemalloc/src/extent.c:1195: Failed assertion: "p[i] == 0"
+ *
+ *   [1]: https://patchwork.kernel.org/patch/10576637/
+ */
+static int madvise_MADV_DONTNEED_zeroes_pages()
+{
+	int works = -1;
+	size_t size = PAGE;
+
+	void * addr = mmap(NULL, size, PROT_READ|PROT_WRITE,
+	    MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+
+	if (addr == MAP_FAILED) {
+		malloc_write("<jemalloc>: Cannot allocate memory for "
+		    "MADV_DONTNEED check\n");
+		if (opt_abort) {
+			abort();
+		}
+	}
+
+	memset(addr, 'A', size);
+	if (madvise(addr, size, MADV_DONTNEED) == 0) {
+		works = memchr(addr, 'A', size) == NULL;
+	} else {
+		/*
+		 * If madvise() does not support MADV_DONTNEED, then we can
+		 * call it anyway, and use it's return code.
+		 */
+		works = 1;
+	}
+
+	if (munmap(addr, size) != 0) {
+		malloc_write("<jemalloc>: Cannot deallocate memory for "
+		    "MADV_DONTNEED check\n");
+		if (opt_abort) {
+			abort();
+		}
+	}
+
+	return works;
+}
+#endif
+
 /******************************************************************************/
 /*
  * Function prototypes for static functions that are referenced prior to
@@ -351,10 +402,12 @@ pages_purge_forced(void *addr, size_t size) {
 
 #if defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
     defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
-	return (madvise(addr, size, MADV_DONTNEED) != 0);
+	return (unlikely(madvise_dont_need_zeros_is_faulty) ||
+	    madvise(addr, size, MADV_DONTNEED) != 0);
 #elif defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED) && \
     defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS)
-	return (posix_madvise(addr, size, POSIX_MADV_DONTNEED) != 0);
+	return (unlikely(madvise_dont_need_zeros_is_faulty) ||
+	    posix_madvise(addr, size, POSIX_MADV_DONTNEED) != 0);
 #elif defined(JEMALLOC_MAPS_COALESCE)
 	/* Try to overlay a new demand-zeroed mapping. */
 	return pages_commit(addr, size);
@@ -642,6 +695,20 @@ pages_boot(void) {
 		return true;
 	}
 
+#ifdef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS
+	if (!opt_trust_madvise) {
+		madvise_dont_need_zeros_is_faulty = !madvise_MADV_DONTNEED_zeroes_pages();
+		if (madvise_dont_need_zeros_is_faulty) {
+			malloc_write("<jemalloc>: MADV_DONTNEED does not work (memset will be used instead)\n");
+			malloc_write("<jemalloc>: (This is the expected behaviour if you are running under QEMU)\n");
+		}
+	} else {
+		/* In case opt_trust_madvise is disable,
+		 * do not do runtime check */
+		madvise_dont_need_zeros_is_faulty = 0;
+	}
+#endif
+
 #ifndef _WIN32
 	mmap_flags = MAP_PRIVATE | MAP_ANON;
 #endif
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 85dcb4e2..6f5a8f18 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -279,6 +279,7 @@ TEST_BEGIN(test_mallctl_opt) {
 
 	TEST_MALLCTL_OPT(bool, abort, always);
 	TEST_MALLCTL_OPT(bool, abort_conf, always);
+	TEST_MALLCTL_OPT(bool, trust_madvise, always);
 	TEST_MALLCTL_OPT(bool, confirm_conf, always);
 	TEST_MALLCTL_OPT(const char *, metadata_thp, always);
 	TEST_MALLCTL_OPT(bool, retain, always);

From f6699803e2772de2a4eb253d5b55f00c3842a950 Mon Sep 17 00:00:00 2001
From: Yinan Zhang <zyn8950@gmail.com>
Date: Mon, 25 Jan 2021 14:05:23 -0800
Subject: [PATCH 1979/2608] Fix duration in prof log

---
 src/prof_log.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/prof_log.c b/src/prof_log.c
index 356a886a..0f27a128 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -573,7 +573,7 @@ prof_log_emit_metadata(emitter_t *emitter) {
 
 	nstime_t now;
 
-	nstime_init_update(&now);
+	nstime_prof_init_update(&now);
 	uint64_t ns = nstime_ns(&now) - nstime_ns(&log_start_timestamp);
 	emitter_json_kv(emitter, "duration", emitter_type_uint64, &ns);
 

From 35a8552605be4fcbded961bf2dcbee5655401575 Mon Sep 17 00:00:00 2001
From: David CARLIER <devnexen@gmail.com>
Date: Tue, 26 Jan 2021 21:49:08 +0000
Subject: [PATCH 1980/2608] Mac OS: Tag mapped pages.

This can be used to help profiling tools (e.g. vmmap) identify the
sources of mappings more specifically.
---
 configure.ac                                         | 12 ++++++++++++
 .../jemalloc/internal/jemalloc_internal_defs.h.in    |  3 +++
 src/pages.c                                          |  9 +++++++--
 3 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index 53ac7cce..34613feb 100644
--- a/configure.ac
+++ b/configure.ac
@@ -926,6 +926,18 @@ if test "x${je_cv_cold}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_ATTR_COLD], [ ])
 fi
 
+dnl Check for VM_MAKE_TAG for mmap support.
+JE_COMPILABLE([vm_make_tag],
+	      [#include <sys/mman.h>
+	       #include <mach/vm_statistics.h>],
+	      [void *p;
+	       p = mmap(0, 16, PROT_READ, MAP_ANON|MAP_PRIVATE, VM_MAKE_TAG(1), 0);
+	       munmap(p, 16);],
+	      [je_cv_vm_make_tag])
+if test "x${je_cv_vm_make_tag}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_VM_MAKE_TAG], [ ])
+fi
+
 dnl Support optional additions to rpath.
 AC_ARG_WITH([rpath],
   [AS_HELP_STRING([--with-rpath=<rpath>], [Colon-separated rpath (ELF systems only)])],
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index dc4f01fb..093c8be0 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -404,4 +404,7 @@
 /* Performs additional size checks when defined. */
 #undef JEMALLOC_OPT_SIZE_CHECKS
 
+/* Darwin VM_MAKE_TAG support */
+#undef JEMALLOC_HAVE_VM_MAKE_TAG
+
 #endif /* JEMALLOC_INTERNAL_DEFS_H_ */
diff --git a/src/pages.c b/src/pages.c
index 6984d2a0..42618858 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -16,6 +16,11 @@
 #ifdef __NetBSD__
 #include <sys/bitops.h>	/* ilog2 */
 #endif
+#ifdef JEMALLOC_HAVE_VM_MAKE_TAG
+#define PAGES_FD_TAG VM_MAKE_TAG(101U)
+#else
+#define PAGES_FD_TAG -1
+#endif
 
 /******************************************************************************/
 /* Data. */
@@ -141,7 +146,7 @@ os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
 #endif
 		int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
 
-		ret = mmap(addr, size, prot, mmap_flags, -1, 0);
+		ret = mmap(addr, size, prot, mmap_flags, PAGES_FD_TAG, 0);
 	}
 	assert(ret != NULL);
 
@@ -326,7 +331,7 @@ pages_commit_impl(void *addr, size_t size, bool commit) {
 	{
 		int prot = commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
 		void *result = mmap(addr, size, prot, mmap_flags | MAP_FIXED,
-		    -1, 0);
+		    PAGES_FD_TAG, 0);
 		if (result == MAP_FAILED) {
 			return true;
 		}

From c007c537ff038538b9312cf110bc5d395da14000 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 27 Jan 2021 15:36:11 -0800
Subject: [PATCH 1981/2608] Tcache flush: Unify edata lookup path.

---
 src/tcache.c | 25 +++++++------------------
 1 file changed, 7 insertions(+), 18 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index ef0b87d0..678fe524 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -236,18 +236,16 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
 	return ret;
 }
 
-/* Enabled with --enable-extra-size-check. */
 static void
-tbin_edatas_lookup_size_check(tsd_t *tsd, cache_bin_ptr_array_t *arr,
+tcache_bin_flush_edatas_lookup(tsd_t *tsd, cache_bin_ptr_array_t *arr,
     szind_t binind, size_t nflush, edata_t **edatas) {
 	/* Avoids null-checking tsdn in the loop below. */
 	util_assume(tsd != NULL);
 
 	/*
-	 * Verify that the items in the tcache all have the correct size; this
-	 * is useful for catching sized deallocation bugs, also to fail early
-	 * instead of corrupting metadata.  Since this can be turned on for opt
-	 * builds, avoid the branch in the loop.
+	 * This gets compiled away when config_opt_safety_checks is false.
+	 * Checks for sized deallocation bugs, failing early rather than
+	 * corrupting metadata.
 	 */
 	size_t szind_sum = binind * nflush;
 	for (unsigned i = 0; i < nflush; i++) {
@@ -258,9 +256,10 @@ tbin_edatas_lookup_size_check(tsd_t *tsd, cache_bin_ptr_array_t *arr,
 		szind_sum -= full_alloc_ctx.szind;
 	}
 
-	if (szind_sum != 0) {
+	if (config_opt_safety_checks && szind_sum != 0) {
 		safety_check_fail_sized_dealloc(false);
 	}
+
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -306,17 +305,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	cache_bin_init_ptr_array_for_flush(cache_bin, &tcache_bin_info[binind],
 	    &ptrs, nflush);
 
-	/* Look up edata once per item. */
-	if (config_opt_safety_checks) {
-		tbin_edatas_lookup_size_check(tsd, &ptrs, binind, nflush,
-		    item_edata);
-	} else {
-		for (unsigned i = 0 ; i < nflush; i++) {
-			item_edata[i] = emap_edata_lookup(tsd_tsdn(tsd),
-			    &arena_emap_global,
-			    cache_bin_ptr_array_get(&ptrs, i));
-		}
-	}
+	tcache_bin_flush_edatas_lookup(tsd, &ptrs, binind, nflush, item_edata);
 
 	/*
 	 * The slabs where we freed the last remaining object in the slab (and

From 181ba7fd4d039a3acfc4d2b115be55d93ac8c406 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 27 Jan 2021 16:10:37 -0800
Subject: [PATCH 1982/2608] Tcache flush: Add an emap "batch lookup" path.

For now this is a no-op; but the interface is a little more flexible for our
purposes.
---
 include/jemalloc/internal/emap.h | 33 ++++++++++++++++++++++++++++++++
 src/tcache.c                     | 28 ++++++++++++++++-----------
 2 files changed, 50 insertions(+), 11 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 8b2c6ba0..f0d7e768 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -213,4 +213,37 @@ emap_alloc_ctx_try_lookup_fast(tsd_t *tsd, emap_t *emap, const void *ptr,
 	return false;
 }
 
+/*
+ * We want to do batch lookups out of the cache bins, which use
+ * cache_bin_ptr_array_get to access the i'th element of the bin (since they
+ * invert usual ordering in deciding what to flush).  This lets the emap avoid
+ * caring about its caller's ordering.
+ */
+typedef const void *(*emap_ptr_getter)(void *ctx, size_t ind);
+/*
+ * This allows size-checking assertions, which we can only do while we're in the
+ * process of edata lookups.
+ */
+typedef void (*emap_metadata_visitor)(void *ctx, emap_full_alloc_ctx_t *alloc_ctx);
+
+JEMALLOC_ALWAYS_INLINE void
+emap_edata_lookup_batch(tsd_t *tsd, emap_t *emap, size_t nptrs,
+    emap_ptr_getter ptr_getter, void *ptr_getter_ctx,
+    emap_metadata_visitor metadata_visitor, void *metadata_visitor_ctx,
+    edata_t **r_edatas) {
+
+	/* Avoids null-checking tsdn in the loop below. */
+	util_assume(tsd != NULL);
+
+	for (size_t i = 0; i < nptrs; i++) {
+		emap_full_alloc_ctx_t full_alloc_ctx;
+		const void *ptr = ptr_getter(ptr_getter_ctx, i);
+
+		emap_full_alloc_ctx_lookup(tsd_tsdn(tsd), emap, ptr,
+		    &full_alloc_ctx);
+		r_edatas[i] = full_alloc_ctx.edata;
+		metadata_visitor(metadata_visitor_ctx, &full_alloc_ctx);
+	}
+}
+
 #endif /* JEMALLOC_INTERNAL_EMAP_H */
diff --git a/src/tcache.c b/src/tcache.c
index 678fe524..602823d9 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -236,11 +236,22 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
 	return ret;
 }
 
+static const void *
+tcache_bin_flush_ptr_getter(void *arr_ctx, size_t ind) {
+	cache_bin_ptr_array_t *arr = (cache_bin_ptr_array_t *)arr_ctx;
+	return cache_bin_ptr_array_get(arr, (unsigned)ind);
+}
+
+static void
+tcache_bin_flush_metadata_visitor(void *szind_sum_ctx,
+    emap_full_alloc_ctx_t *alloc_ctx) {
+	size_t *szind_sum = (size_t *)szind_sum_ctx;
+	*szind_sum -= alloc_ctx->szind;
+}
+
 static void
 tcache_bin_flush_edatas_lookup(tsd_t *tsd, cache_bin_ptr_array_t *arr,
     szind_t binind, size_t nflush, edata_t **edatas) {
-	/* Avoids null-checking tsdn in the loop below. */
-	util_assume(tsd != NULL);
 
 	/*
 	 * This gets compiled away when config_opt_safety_checks is false.
@@ -248,18 +259,13 @@ tcache_bin_flush_edatas_lookup(tsd_t *tsd, cache_bin_ptr_array_t *arr,
 	 * corrupting metadata.
 	 */
 	size_t szind_sum = binind * nflush;
-	for (unsigned i = 0; i < nflush; i++) {
-		emap_full_alloc_ctx_t full_alloc_ctx;
-		emap_full_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global,
-		    cache_bin_ptr_array_get(arr, i), &full_alloc_ctx);
-		edatas[i] = full_alloc_ctx.edata;
-		szind_sum -= full_alloc_ctx.szind;
-	}
-
+	emap_edata_lookup_batch(tsd, &arena_emap_global, nflush,
+	    &tcache_bin_flush_ptr_getter, (void *)arr,
+	    &tcache_bin_flush_metadata_visitor, (void *)&szind_sum,
+	    edatas);
 	if (config_opt_safety_checks && szind_sum != 0) {
 		safety_check_fail_sized_dealloc(false);
 	}
-
 }
 
 JEMALLOC_ALWAYS_INLINE bool

From 9f9247a62ed5ac1157519cd2b1f966cacf772aaa Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 27 Jan 2021 17:14:38 -0800
Subject: [PATCH 1983/2608] Tcache fluhing: increase cache miss parallelism.

In practice, many rtree_leaf_elm accesses are cache misses.  By restructuring,
we can make it more likely that these misses occur without blocking us from
starting later lookups, taking more of those misses in parallel.
---
 include/jemalloc/internal/emap.h | 38 ++++++++++++++++++++++++++------
 src/tcache.c                     | 12 +++++-----
 2 files changed, 37 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index f0d7e768..ac0050b5 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -226,23 +226,47 @@ typedef const void *(*emap_ptr_getter)(void *ctx, size_t ind);
  */
 typedef void (*emap_metadata_visitor)(void *ctx, emap_full_alloc_ctx_t *alloc_ctx);
 
+typedef union emap_batch_lookup_result_u emap_batch_lookup_result_t;
+union emap_batch_lookup_result_u {
+	edata_t *edata;
+	rtree_leaf_elm_t *rtree_leaf;
+};
+
 JEMALLOC_ALWAYS_INLINE void
 emap_edata_lookup_batch(tsd_t *tsd, emap_t *emap, size_t nptrs,
     emap_ptr_getter ptr_getter, void *ptr_getter_ctx,
     emap_metadata_visitor metadata_visitor, void *metadata_visitor_ctx,
-    edata_t **r_edatas) {
-
+    emap_batch_lookup_result_t *result) {
 	/* Avoids null-checking tsdn in the loop below. */
 	util_assume(tsd != NULL);
+	rtree_ctx_t *rtree_ctx = tsd_rtree_ctxp_get(tsd);
 
 	for (size_t i = 0; i < nptrs; i++) {
-		emap_full_alloc_ctx_t full_alloc_ctx;
 		const void *ptr = ptr_getter(ptr_getter_ctx, i);
+		/*
+		 * Reuse the edatas array as a temp buffer, lying a little about
+		 * the types.
+		 */
+		result[i].rtree_leaf = rtree_leaf_elm_lookup(tsd_tsdn(tsd),
+		    &emap->rtree, rtree_ctx, (uintptr_t)ptr,
+		    /* dependent */ true, /* init_missing */ false);
+	}
 
-		emap_full_alloc_ctx_lookup(tsd_tsdn(tsd), emap, ptr,
-		    &full_alloc_ctx);
-		r_edatas[i] = full_alloc_ctx.edata;
-		metadata_visitor(metadata_visitor_ctx, &full_alloc_ctx);
+	for (size_t i = 0; i < nptrs; i++) {
+		rtree_leaf_elm_t *elm = result[i].rtree_leaf;
+		rtree_contents_t contents = rtree_leaf_elm_read(tsd_tsdn(tsd),
+		    &emap->rtree, elm, /* dependent */ true);
+		result[i].edata = contents.edata;
+		emap_full_alloc_ctx_t alloc_ctx;
+		/*
+		 * Not all these fields are read in practice by the metadata
+		 * visitor.  But the compiler can easily optimize away the ones
+		 * that aren't, so no sense in being incomplete.
+		 */
+		alloc_ctx.szind = contents.metadata.szind;
+		alloc_ctx.slab = contents.metadata.slab;
+		alloc_ctx.edata = contents.edata;
+		metadata_visitor(metadata_visitor_ctx, &alloc_ctx);
 	}
 }
 
diff --git a/src/tcache.c b/src/tcache.c
index 602823d9..635ba0b1 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -251,7 +251,7 @@ tcache_bin_flush_metadata_visitor(void *szind_sum_ctx,
 
 static void
 tcache_bin_flush_edatas_lookup(tsd_t *tsd, cache_bin_ptr_array_t *arr,
-    szind_t binind, size_t nflush, edata_t **edatas) {
+    szind_t binind, size_t nflush, emap_batch_lookup_result_t *edatas) {
 
 	/*
 	 * This gets compiled away when config_opt_safety_checks is false.
@@ -305,7 +305,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	 * Variable length array must have > 0 length; the last element is never
 	 * touched (it's just included to satisfy the no-zero-length rule).
 	 */
-	VARIABLE_ARRAY(edata_t *, item_edata, nflush + 1);
+	VARIABLE_ARRAY(emap_batch_lookup_result_t, item_edata, nflush + 1);
 	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nflush);
 
 	cache_bin_init_ptr_array_for_flush(cache_bin, &tcache_bin_info[binind],
@@ -329,7 +329,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	bool merged_stats = false;
 	while (nflush > 0) {
 		/* Lock the arena, or bin, associated with the first object. */
-		edata_t *edata = item_edata[0];
+		edata_t *edata = item_edata[0].edata;
 		unsigned cur_arena_ind = edata_arena_ind_get(edata);
 		arena_t *cur_arena = arena_get(tsdn, cur_arena_ind, false);
 
@@ -382,7 +382,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 		if (!small) {
 			for (unsigned i = 0; i < nflush; i++) {
 				void *ptr = cache_bin_ptr_array_get(&ptrs, i);
-				edata = item_edata[i];
+				edata = item_edata[i].edata;
 				assert(ptr != NULL && edata != NULL);
 
 				if (tcache_bin_flush_match(edata, cur_arena_ind,
@@ -400,7 +400,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 		unsigned ndeferred = 0;
 		for (unsigned i = 0; i < nflush; i++) {
 			void *ptr = cache_bin_ptr_array_get(&ptrs, i);
-			edata = item_edata[i];
+			edata = item_edata[i].edata;
 			assert(ptr != NULL && edata != NULL);
 			if (!tcache_bin_flush_match(edata, cur_arena_ind,
 			    cur_binshard, small)) {
@@ -411,7 +411,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 				 * it can be handled in a future pass.
 				 */
 				cache_bin_ptr_array_set(&ptrs, ndeferred, ptr);
-				item_edata[ndeferred] = edata;
+				item_edata[ndeferred].edata = edata;
 				ndeferred++;
 				continue;
 			}

From 31a629c3dea4c903d16025b4fe5261d2f3db8bd6 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 28 Jan 2021 16:14:39 -0800
Subject: [PATCH 1984/2608] Tcache flush: prefetch edata contents.

This frontloads more of the miss latency.  It also moves it to a pathway where
we have not yet acquired any locks, so that it should (hopefully) reduce hold
times.
---
 include/jemalloc/internal/util.h | 49 ++++++++++++++++++++++++++++++++
 src/tcache.c                     |  1 +
 2 files changed, 50 insertions(+)

diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index cb751479..dcb1c0a5 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -69,6 +69,55 @@ util_assume(bool b) {
 	}
 }
 
+/* ptr should be valid. */
+JEMALLOC_ALWAYS_INLINE void
+util_prefetch_read(void *ptr) {
+	/*
+	 * This should arguably be a config check; but any version of GCC so old
+	 * that it doesn't support __builtin_prefetch is also too old to build
+	 * jemalloc.
+	 */
+#ifdef __GNUC__
+	if (config_debug) {
+		/* Enforce the "valid ptr" requirement. */
+		*(volatile char *)ptr;
+	}
+	__builtin_prefetch(ptr, /* read or write */ 0, /* locality hint */ 3);
+#else
+	*(volatile char *)ptr;
+#endif
+}
+
+JEMALLOC_ALWAYS_INLINE void
+util_prefetch_write(void *ptr) {
+#ifdef __GNUC__
+	if (config_debug) {
+		*(volatile char *)ptr;
+	}
+	/*
+	 * The only difference from the read variant is that this has a 1 as the
+	 * second argument (the write hint).
+	 */
+	__builtin_prefetch(ptr, 1, 3);
+#else
+	*(volatile char *)ptr;
+#endif
+}
+
+JEMALLOC_ALWAYS_INLINE void
+util_prefetch_read_range(void *ptr, size_t sz) {
+	for (size_t i = 0; i < sz; i += CACHELINE) {
+		util_prefetch_read((void *)((uintptr_t)ptr + i));
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void
+util_prefetch_write_range(void *ptr, size_t sz) {
+	for (size_t i = 0; i < sz; i += CACHELINE) {
+		util_prefetch_write((void *)((uintptr_t)ptr + i));
+	}
+}
+
 #undef UTIL_INLINE
 
 #endif /* JEMALLOC_INTERNAL_UTIL_H */
diff --git a/src/tcache.c b/src/tcache.c
index 635ba0b1..3daf4263 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -247,6 +247,7 @@ tcache_bin_flush_metadata_visitor(void *szind_sum_ctx,
     emap_full_alloc_ctx_t *alloc_ctx) {
 	size_t *szind_sum = (size_t *)szind_sum_ctx;
 	*szind_sum -= alloc_ctx->szind;
+	util_prefetch_write_range(alloc_ctx->edata, sizeof(edata_t));
 }
 
 static void

From 229994a204f7d4712fe5ecd1508fbbe679c1baf6 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 29 Jan 2021 16:06:28 -0800
Subject: [PATCH 1985/2608] Tcache flush: keep common path state in registers.

By carefully force-inlining the division constants and the operation sum count,
we can eliminate redundant operations in the arena-level dalloc function.  Do
so.
---
 include/jemalloc/internal/arena_externs.h   | 13 +--
 include/jemalloc/internal/arena_inlines_b.h | 93 +++++++++++++++++++++
 src/arena.c                                 | 87 ++++---------------
 src/tcache.c                                | 11 ++-
 test/unit/slab.c                            |  5 +-
 5 files changed, 130 insertions(+), 79 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index e3cfcee2..f06cb345 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_ARENA_EXTERNS_H
 
 #include "jemalloc/internal/bin.h"
+#include "jemalloc/internal/div.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/hook.h"
 #include "jemalloc/internal/pages.h"
@@ -13,6 +14,8 @@ extern ssize_t opt_muzzy_decay_ms;
 extern percpu_arena_mode_t opt_percpu_arena;
 extern const char *percpu_arena_mode_names[];
 
+extern div_info_t arena_binind_div_info[SC_NBINS];
+
 extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS];
 extern malloc_mutex_t arenas_lock;
 extern emap_t arena_emap_global;
@@ -29,9 +32,6 @@ void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     bin_stats_data_t *bstats, arena_stats_large_t *lstats,
     pac_estats_t *estats, hpa_shard_stats_t *hpastats, sec_stats_t *secstats);
 void arena_handle_new_dirty_pages(tsdn_t *tsdn, arena_t *arena);
-#ifdef JEMALLOC_JET
-size_t arena_slab_regind(edata_t *slab, szind_t binind, const void *ptr);
-#endif
 edata_t *arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena,
     size_t usize, size_t alignment, bool zero);
 void arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena,
@@ -59,8 +59,11 @@ void arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize);
 void arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
     bool slow_path);
 void arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab);
-bool arena_dalloc_bin_locked(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    szind_t binind, edata_t *edata, void *ptr);
+
+void arena_dalloc_bin_locked_handle_newly_empty(tsdn_t *tsdn, arena_t *arena,
+    edata_t *slab, bin_t *bin);
+void arena_dalloc_bin_locked_handle_newly_nonempty(tsdn_t *tsdn, arena_t *arena,
+    edata_t *slab, bin_t *bin);
 void arena_dalloc_small(tsdn_t *tsdn, void *ptr);
 bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t extra, bool zero, size_t *newsize);
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index aaef45c0..66dcff07 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_INLINES_B_H
 #define JEMALLOC_INTERNAL_ARENA_INLINES_B_H
 
+#include "jemalloc/internal/div.h"
 #include "jemalloc/internal/emap.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/mutex.h"
@@ -441,4 +442,96 @@ arena_cache_oblivious_randomize(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 	}
 }
 
+/*
+ * The dalloc bin info contains just the information that the common paths need
+ * during tcache flushes.  By force-inlining these paths, and using local copies
+ * of data (so that the compiler knows it's constant), we avoid a whole bunch of
+ * redundant loads and stores by leaving this information in registers.
+ */
+typedef struct arena_dalloc_bin_locked_info_s arena_dalloc_bin_locked_info_t;
+struct arena_dalloc_bin_locked_info_s {
+	div_info_t div_info;
+	uint32_t nregs;
+	uint64_t ndalloc;
+};
+
+JEMALLOC_ALWAYS_INLINE size_t
+arena_slab_regind(arena_dalloc_bin_locked_info_t *info, szind_t binind,
+    edata_t *slab, const void *ptr) {
+	size_t diff, regind;
+
+	/* Freeing a pointer outside the slab can cause assertion failure. */
+	assert((uintptr_t)ptr >= (uintptr_t)edata_addr_get(slab));
+	assert((uintptr_t)ptr < (uintptr_t)edata_past_get(slab));
+	/* Freeing an interior pointer can cause assertion failure. */
+	assert(((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab)) %
+	    (uintptr_t)bin_infos[binind].reg_size == 0);
+
+	diff = (size_t)((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab));
+
+	/* Avoid doing division with a variable divisor. */
+	regind = div_compute(&info->div_info, diff);
+
+	assert(regind < bin_infos[binind].nregs);
+
+	return regind;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_dalloc_bin_locked_begin(arena_dalloc_bin_locked_info_t *info,
+    szind_t binind) {
+	info->div_info = arena_binind_div_info[binind];
+	info->nregs = bin_infos[binind].nregs;
+	info->ndalloc = 0;
+}
+
+/*
+ * Does the deallocation work associated with freeing a single pointer (a
+ * "step") in between a arena_dalloc_bin_locked begin and end call.
+ *
+ * Returns true if arena_slab_dalloc must be called on slab.  Doesn't do
+ * stats updates, which happen during finish (this lets running counts get left
+ * in a register).
+ */
+JEMALLOC_ALWAYS_INLINE bool
+arena_dalloc_bin_locked_step(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
+    arena_dalloc_bin_locked_info_t *info, szind_t binind, edata_t *slab,
+    void *ptr) {
+	const bin_info_t *bin_info = &bin_infos[binind];
+	size_t regind = arena_slab_regind(info, binind, slab, ptr);
+	slab_data_t *slab_data = edata_slab_data_get(slab);
+
+	assert(edata_nfree_get(slab) < bin_info->nregs);
+	/* Freeing an unallocated pointer can cause assertion failure. */
+	assert(bitmap_get(slab_data->bitmap, &bin_info->bitmap_info, regind));
+
+	bitmap_unset(slab_data->bitmap, &bin_info->bitmap_info, regind);
+	edata_nfree_inc(slab);
+
+	if (config_stats) {
+		info->ndalloc++;
+	}
+
+	unsigned nfree = edata_nfree_get(slab);
+	if (nfree == bin_info->nregs) {
+		arena_dalloc_bin_locked_handle_newly_empty(tsdn, arena, slab,
+		    bin);
+		return true;
+	} else if (nfree == 1 && slab != bin->slabcur) {
+		arena_dalloc_bin_locked_handle_newly_nonempty(tsdn, arena, slab,
+		    bin);
+	}
+	return false;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_dalloc_bin_locked_finish(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
+    arena_dalloc_bin_locked_info_t *info) {
+	if (config_stats) {
+		bin->stats.ndalloc += info->ndalloc;
+		assert(bin->stats.curregs >= (size_t)info->ndalloc);
+		bin->stats.curregs -= (size_t)info->ndalloc;
+	}
+}
+
 #endif /* JEMALLOC_INTERNAL_ARENA_INLINES_B_H */
diff --git a/src/arena.c b/src/arena.c
index 914e63f1..56c34af5 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -3,7 +3,6 @@
 
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/decay.h"
-#include "jemalloc/internal/div.h"
 #include "jemalloc/internal/ehooks.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/extent_mmap.h"
@@ -45,7 +44,7 @@ const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
 #undef STEP
 };
 
-static div_info_t arena_binind_div_info[SC_NBINS];
+div_info_t arena_binind_div_info[SC_NBINS];
 
 size_t opt_oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
 size_t oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
@@ -260,44 +259,6 @@ arena_slab_reg_alloc_batch(edata_t *slab, const bin_info_t *bin_info,
 	edata_nfree_sub(slab, cnt);
 }
 
-#ifndef JEMALLOC_JET
-static
-#endif
-size_t
-arena_slab_regind(edata_t *slab, szind_t binind, const void *ptr) {
-	size_t diff, regind;
-
-	/* Freeing a pointer outside the slab can cause assertion failure. */
-	assert((uintptr_t)ptr >= (uintptr_t)edata_addr_get(slab));
-	assert((uintptr_t)ptr < (uintptr_t)edata_past_get(slab));
-	/* Freeing an interior pointer can cause assertion failure. */
-	assert(((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab)) %
-	    (uintptr_t)bin_infos[binind].reg_size == 0);
-
-	diff = (size_t)((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab));
-
-	/* Avoid doing division with a variable divisor. */
-	regind = div_compute(&arena_binind_div_info[binind], diff);
-
-	assert(regind < bin_infos[binind].nregs);
-
-	return regind;
-}
-
-static void
-arena_slab_reg_dalloc(edata_t *slab, slab_data_t *slab_data, void *ptr) {
-	szind_t binind = edata_szind_get(slab);
-	const bin_info_t *bin_info = &bin_infos[binind];
-	size_t regind = arena_slab_regind(slab, binind, ptr);
-
-	assert(edata_nfree_get(slab) < bin_info->nregs);
-	/* Freeing an unallocated pointer can cause assertion failure. */
-	assert(bitmap_get(slab_data->bitmap, &bin_info->bitmap_info, regind));
-
-	bitmap_unset(slab_data->bitmap, &bin_info->bitmap_info, regind);
-	edata_nfree_inc(slab);
-}
-
 static void
 arena_large_malloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 	szind_t index, hindex;
@@ -1189,37 +1150,18 @@ arena_dalloc_bin_slab_prepare(tsdn_t *tsdn, edata_t *slab, bin_t *bin) {
 	}
 }
 
-/* Returns true if arena_slab_dalloc must be called on slab */
-static bool
-arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    szind_t binind, edata_t *slab, void *ptr) {
-	const bin_info_t *bin_info = &bin_infos[binind];
-	arena_slab_reg_dalloc(slab, edata_slab_data_get(slab), ptr);
-
-	bool ret = false;
-	unsigned nfree = edata_nfree_get(slab);
-	if (nfree == bin_info->nregs) {
-		arena_dissociate_bin_slab(arena, slab, bin);
-		arena_dalloc_bin_slab_prepare(tsdn, slab, bin);
-		ret = true;
-	} else if (nfree == 1 && slab != bin->slabcur) {
-		arena_bin_slabs_full_remove(arena, bin, slab);
-		arena_bin_lower_slab(tsdn, arena, slab, bin);
-	}
-
-	if (config_stats) {
-		bin->stats.ndalloc++;
-		bin->stats.curregs--;
-	}
-
-	return ret;
+void
+arena_dalloc_bin_locked_handle_newly_empty(tsdn_t *tsdn, arena_t *arena,
+    edata_t *slab, bin_t *bin) {
+	arena_dissociate_bin_slab(arena, slab, bin);
+	arena_dalloc_bin_slab_prepare(tsdn, slab, bin);
 }
 
-bool
-arena_dalloc_bin_locked(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-szind_t binind, edata_t *edata, void *ptr) {
-	return arena_dalloc_bin_locked_impl(tsdn, arena, bin, binind, edata,
-	    ptr);
+void
+arena_dalloc_bin_locked_handle_newly_nonempty(tsdn_t *tsdn, arena_t *arena,
+    edata_t *slab, bin_t *bin) {
+	arena_bin_slabs_full_remove(arena, bin, slab);
+	arena_bin_lower_slab(tsdn, arena, slab, bin);
 }
 
 static void
@@ -1229,8 +1171,11 @@ arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, edata_t *edata, void *ptr) {
 	bin_t *bin = &arena->bins[binind].bin_shards[binshard];
 
 	malloc_mutex_lock(tsdn, &bin->lock);
-	bool ret = arena_dalloc_bin_locked_impl(tsdn, arena, bin, binind, edata,
-	    ptr);
+	arena_dalloc_bin_locked_info_t info;
+	arena_dalloc_bin_locked_begin(&info, binind);
+	bool ret = arena_dalloc_bin_locked_step(tsdn, arena, bin,
+	    &info, binind, edata, ptr);
+	arena_dalloc_bin_locked_finish(tsdn, arena, bin, &info);
 	malloc_mutex_unlock(tsdn, &bin->lock);
 
 	if (ret) {
diff --git a/src/tcache.c b/src/tcache.c
index 3daf4263..c7bdbf99 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -399,6 +399,10 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 
 		/* Deallocate whatever we can. */
 		unsigned ndeferred = 0;
+		arena_dalloc_bin_locked_info_t dalloc_bin_info;
+		if (small) {
+			arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind);
+		}
 		for (unsigned i = 0; i < nflush; i++) {
 			void *ptr = cache_bin_ptr_array_get(&ptrs, i);
 			edata = item_edata[i].edata;
@@ -417,8 +421,9 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 				continue;
 			}
 			if (small) {
-				if (arena_dalloc_bin_locked(tsdn, cur_arena,
-				    cur_bin, binind, edata, ptr)) {
+				if (arena_dalloc_bin_locked_step(tsdn,
+				    cur_arena, cur_bin, &dalloc_bin_info,
+				    binind, edata, ptr)) {
 					dalloc_slabs[dalloc_count] = edata;
 					dalloc_count++;
 				}
@@ -432,6 +437,8 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 		}
 
 		if (small) {
+			arena_dalloc_bin_locked_finish(tsdn, cur_arena, cur_bin,
+			    &dalloc_bin_info);
 			malloc_mutex_unlock(tsdn, &cur_bin->lock);
 		}
 		arena_decay_ticks(tsdn, cur_arena, nflush - ndeferred);
diff --git a/test/unit/slab.c b/test/unit/slab.c
index 6baa9d3a..70fc5c7d 100644
--- a/test/unit/slab.c
+++ b/test/unit/slab.c
@@ -16,10 +16,13 @@ TEST_BEGIN(test_arena_slab_regind) {
 		    EXTENT_NOT_HEAD);
 		expect_ptr_not_null(edata_addr_get(&slab),
 		    "Unexpected malloc() failure");
+		arena_dalloc_bin_locked_info_t dalloc_info;
+		arena_dalloc_bin_locked_begin(&dalloc_info, binind);
 		for (regind = 0; regind < bin_info->nregs; regind++) {
 			void *reg = (void *)((uintptr_t)edata_addr_get(&slab) +
 			    (bin_info->reg_size * regind));
-			expect_zu_eq(arena_slab_regind(&slab, binind, reg),
+			expect_zu_eq(arena_slab_regind(&dalloc_info, binind,
+			    &slab, reg),
 			    regind,
 			    "Incorrect region index computed for size %zu",
 			    bin_info->reg_size);

From 4c46e11365566ec03723c46356cd524f4abd7fd8 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 29 Jan 2021 21:22:57 -0800
Subject: [PATCH 1986/2608] Cache an arena's index in the arena.

This saves us a pointer hop down some perf-sensitive paths.
---
 include/jemalloc/internal/arena_inlines_a.h | 2 +-
 include/jemalloc/internal/arena_structs.h   | 6 ++++++
 src/arena.c                                 | 1 +
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/arena_inlines_a.h b/include/jemalloc/internal/arena_inlines_a.h
index b83d0e8e..8568358c 100644
--- a/include/jemalloc/internal/arena_inlines_a.h
+++ b/include/jemalloc/internal/arena_inlines_a.h
@@ -3,7 +3,7 @@
 
 static inline unsigned
 arena_ind_get(const arena_t *arena) {
-	return base_ind_get(arena->base);
+	return arena->ind;
 }
 
 static inline void
diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index baa7031c..913184d3 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -83,6 +83,12 @@ struct arena_s {
 	 */
 	bins_t			bins[SC_NBINS];
 
+	/*
+	 * A cached copy of base->ind.  This can get accessed on hot paths;
+	 * looking it up in base requires an extra pointer hop / cache miss.
+	 */
+	unsigned ind;
+
 	/*
 	 * Base allocator, from which arena metadata are allocated.
 	 *
diff --git a/src/arena.c b/src/arena.c
index 56c34af5..7836e27d 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1475,6 +1475,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	arena->base = base;
 	/* Set arena before creating background threads. */
 	arena_set(ind, arena);
+	arena->ind = ind;
 
 	nstime_init_update(&arena->create_time);
 

From 2fcbd18115c93fb4649d2861dd2e0d3351bf6f6f Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 29 Jan 2021 13:10:44 -0800
Subject: [PATCH 1987/2608] Cache bin: Don't reverse flush order.

The items we pick to flush matter a lot, but the order in which they get flushed
doesn't; just use forward scans.  This simplifies the accessing code, both in
terms of the C and the generated assembly (i.e. this speeds up the flush
pathways).
---
 include/jemalloc/internal/cache_bin.h | 21 +++++----------------
 src/tcache.c                          |  8 ++++----
 test/unit/cache_bin.c                 |  2 +-
 3 files changed, 10 insertions(+), 21 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index cf5ed3e0..41942e97 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -441,29 +441,18 @@ cache_bin_finish_fill(cache_bin_t *bin, cache_bin_info_t *info,
 	bin->stack_head = empty_position - nfilled;
 }
 
-/* Same deal, but with flush. */
+/*
+ * Same deal, but with flush.  Unlike fill (which can fail), the user must flush
+ * everything we give them.
+ */
 static inline void
 cache_bin_init_ptr_array_for_flush(cache_bin_t *bin, cache_bin_info_t *info,
     cache_bin_ptr_array_t *arr, cache_bin_sz_t nflush) {
-	arr->ptr = cache_bin_empty_position_get(bin) - 1;
+	arr->ptr = cache_bin_empty_position_get(bin) - nflush;
 	assert(cache_bin_ncached_get_local(bin, info) == 0
 	    || *arr->ptr != NULL);
 }
 
-/*
- * These accessors are used by the flush pathways -- they reverse ordinary array
- * ordering.  See the note above.
- */
-JEMALLOC_ALWAYS_INLINE void *
-cache_bin_ptr_array_get(cache_bin_ptr_array_t *arr, cache_bin_sz_t n) {
-	return *(arr->ptr - n);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-cache_bin_ptr_array_set(cache_bin_ptr_array_t *arr, cache_bin_sz_t n, void *p) {
-	*(arr->ptr - n) = p;
-}
-
 static inline void
 cache_bin_finish_flush(cache_bin_t *bin, cache_bin_info_t *info,
     cache_bin_ptr_array_t *arr, cache_bin_sz_t nflushed) {
diff --git a/src/tcache.c b/src/tcache.c
index c7bdbf99..a7337e72 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -239,7 +239,7 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
 static const void *
 tcache_bin_flush_ptr_getter(void *arr_ctx, size_t ind) {
 	cache_bin_ptr_array_t *arr = (cache_bin_ptr_array_t *)arr_ctx;
-	return cache_bin_ptr_array_get(arr, (unsigned)ind);
+	return arr->ptr[ind];
 }
 
 static void
@@ -382,7 +382,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 		 */
 		if (!small) {
 			for (unsigned i = 0; i < nflush; i++) {
-				void *ptr = cache_bin_ptr_array_get(&ptrs, i);
+				void *ptr = ptrs.ptr[i];
 				edata = item_edata[i].edata;
 				assert(ptr != NULL && edata != NULL);
 
@@ -404,7 +404,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 			arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind);
 		}
 		for (unsigned i = 0; i < nflush; i++) {
-			void *ptr = cache_bin_ptr_array_get(&ptrs, i);
+			void *ptr = ptrs.ptr[i];
 			edata = item_edata[i].edata;
 			assert(ptr != NULL && edata != NULL);
 			if (!tcache_bin_flush_match(edata, cur_arena_ind,
@@ -415,7 +415,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 				 * arena.  Either way, stash the object so that
 				 * it can be handled in a future pass.
 				 */
-				cache_bin_ptr_array_set(&ptrs, ndeferred, ptr);
+				ptrs.ptr[ndeferred] = ptr;
 				item_edata[ndeferred].edata = edata;
 				ndeferred++;
 				continue;
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index a69cad6b..56e69018 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -43,7 +43,7 @@ do_flush_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 	CACHE_BIN_PTR_ARRAY_DECLARE(arr, nflush);
 	cache_bin_init_ptr_array_for_flush(bin, info, &arr, nflush);
 	for (cache_bin_sz_t i = 0; i < nflush; i++) {
-		expect_ptr_eq(cache_bin_ptr_array_get(&arr, i), &ptrs[i], "");
+		expect_ptr_eq(arr.ptr[i], &ptrs[nflush - i - 1], "");
 	}
 	cache_bin_finish_flush(bin, info, &arr, nflush);
 

From 39673298130bdeb95859c95fe314c0a1d7181329 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sat, 30 Jan 2021 15:35:33 -0800
Subject: [PATCH 1988/2608] Arena: share bin offsets in a global.

This saves us a cache miss when lookup up the arena bin offset in a remote
arena during tcache flush.  All arenas share the base offset, and so we don't
need to look it up repeatedly for each arena.  Secondarily, it shaves 288 bytes
off the arena on, e.g., x86-64.
---
 include/jemalloc/internal/arena_externs.h   |  6 ++
 include/jemalloc/internal/arena_inlines_b.h |  6 ++
 include/jemalloc/internal/arena_structs.h   | 13 ++---
 src/arena.c                                 | 64 +++++++++------------
 src/ctl.c                                   |  2 +-
 src/inspect.c                               |  4 +-
 src/tcache.c                                |  4 +-
 7 files changed, 50 insertions(+), 49 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index f06cb345..360653f9 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -23,6 +23,12 @@ extern emap_t arena_emap_global;
 extern size_t opt_oversize_threshold;
 extern size_t oversize_threshold;
 
+/*
+ * arena_bin_offsets[binind] is the offset of the first bin shard for size class
+ * binind.
+ */
+extern uint32_t arena_bin_offsets[SC_NBINS];
+
 void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
     unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms,
     ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy);
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 66dcff07..318de11c 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -534,4 +534,10 @@ arena_dalloc_bin_locked_finish(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 	}
 }
 
+static inline bin_t *
+arena_get_bin(arena_t *arena, szind_t binind, unsigned binshard) {
+	bin_t *shard0 = (bin_t *)((uintptr_t)arena + arena_bin_offsets[binind]);
+	return shard0 + binshard;
+}
+
 #endif /* JEMALLOC_INTERNAL_ARENA_INLINES_B_H */
diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index 913184d3..4aff63c9 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -76,13 +76,6 @@ struct arena_s {
 	/* The page-level allocator shard this arena uses. */
 	pa_shard_t		pa_shard;
 
-	/*
-	 * bins is used to store heaps of free regions.
-	 *
-	 * Synchronization: internal.
-	 */
-	bins_t			bins[SC_NBINS];
-
 	/*
 	 * A cached copy of base->ind.  This can get accessed on hot paths;
 	 * looking it up in base requires an extra pointer hop / cache miss.
@@ -97,6 +90,12 @@ struct arena_s {
 	base_t			*base;
 	/* Used to determine uptime.  Read-only after initialization. */
 	nstime_t		create_time;
+
+	/*
+	 * The arena is allocated alongside its bins; really this is a
+	 * dynamically sized array determined by the binshard settings.
+	 */
+	bin_t			bins[0];
 };
 
 /* Used in conjunction with tsd for fast arena-related context lookup. */
diff --git a/src/arena.c b/src/arena.c
index 7836e27d..3448160f 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -48,6 +48,10 @@ div_info_t arena_binind_div_info[SC_NBINS];
 
 size_t opt_oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
 size_t oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
+
+uint32_t arena_bin_offsets[SC_NBINS];
+static unsigned nbins_total;
+
 static unsigned huge_arena_ind;
 
 /******************************************************************************/
@@ -179,7 +183,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	for (szind_t i = 0; i < SC_NBINS; i++) {
 		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
 			bin_stats_merge(tsdn, &bstats[i],
-			    &arena->bins[i].bin_shards[j]);
+			    arena_get_bin(arena, i, j));
 		}
 	}
 }
@@ -595,8 +599,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 	/* Bins. */
 	for (unsigned i = 0; i < SC_NBINS; i++) {
 		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
-			arena_bin_reset(tsd, arena,
-			    &arena->bins[i].bin_shards[j]);
+			arena_bin_reset(tsd, arena, arena_get_bin(arena, i, j));
 		}
 	}
 	pa_shard_reset(tsd_tsdn(tsd), &arena->pa_shard);
@@ -721,7 +724,7 @@ arena_bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 	if (binshard_p != NULL) {
 		*binshard_p = binshard;
 	}
-	return &arena->bins[binind].bin_shards[binshard];
+	return arena_get_bin(arena, binind, binshard);
 }
 
 void
@@ -1168,7 +1171,7 @@ static void
 arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, edata_t *edata, void *ptr) {
 	szind_t binind = edata_szind_get(edata);
 	unsigned binshard = edata_binshard_get(edata);
-	bin_t *bin = &arena->bins[binind].bin_shards[binshard];
+	bin_t *bin = arena_get_bin(arena, binind, binshard);
 
 	malloc_mutex_lock(tsdn, &bin->lock);
 	arena_dalloc_bin_locked_info_t info;
@@ -1411,10 +1414,6 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		}
 	}
 
-	unsigned nbins_total = 0;
-	for (i = 0; i < SC_NBINS; i++) {
-		nbins_total += bin_infos[i].n_shards;
-	}
 	size_t arena_size = sizeof(arena_t) + sizeof(bin_t) * nbins_total;
 	arena = (arena_t *)base_alloc(tsdn, base, arena_size, CACHELINE);
 	if (arena == NULL) {
@@ -1457,20 +1456,13 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	}
 
 	/* Initialize bins. */
-	uintptr_t bin_addr = (uintptr_t)arena + sizeof(arena_t);
 	atomic_store_u(&arena->binshard_next, 0, ATOMIC_RELEASE);
-	for (i = 0; i < SC_NBINS; i++) {
-		unsigned nshards = bin_infos[i].n_shards;
-		arena->bins[i].bin_shards = (bin_t *)bin_addr;
-		bin_addr += nshards * sizeof(bin_t);
-		for (unsigned j = 0; j < nshards; j++) {
-			bool err = bin_init(&arena->bins[i].bin_shards[j]);
-			if (err) {
-				goto label_error;
-			}
+	for (i = 0; i < nbins_total; i++) {
+		bool err = bin_init(&arena->bins[i]);
+		if (err) {
+			goto label_error;
 		}
 	}
-	assert(bin_addr == (uintptr_t)arena + arena_size);
 
 	arena->base = base;
 	/* Set arena before creating background threads. */
@@ -1587,6 +1579,13 @@ arena_boot(sc_data_t *sc_data) {
 		div_init(&arena_binind_div_info[i],
 		    (1U << sc->lg_base) + (sc->ndelta << sc->lg_delta));
 	}
+
+	uint32_t cur_offset = (uint32_t)offsetof(arena_t, bins);
+	for (szind_t i = 0; i < SC_NBINS; i++) {
+		arena_bin_offsets[i] = cur_offset;
+		nbins_total += bin_infos[i].n_shards;
+		cur_offset += (uint32_t)(bin_infos[i].n_shards * sizeof(bin_t));
+	}
 }
 
 void
@@ -1633,23 +1632,17 @@ arena_prefork7(tsdn_t *tsdn, arena_t *arena) {
 
 void
 arena_prefork8(tsdn_t *tsdn, arena_t *arena) {
-	for (unsigned i = 0; i < SC_NBINS; i++) {
-		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
-			bin_prefork(tsdn, &arena->bins[i].bin_shards[j]);
-		}
+	for (unsigned i = 0; i < nbins_total; i++) {
+		bin_prefork(tsdn, &arena->bins[i]);
 	}
 }
 
 void
 arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
-	unsigned i;
-
-	for (i = 0; i < SC_NBINS; i++) {
-		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
-			bin_postfork_parent(tsdn,
-			    &arena->bins[i].bin_shards[j]);
-		}
+	for (unsigned i = 0; i < nbins_total; i++) {
+		bin_postfork_parent(tsdn, &arena->bins[i]);
 	}
+
 	malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
 	base_postfork_parent(tsdn, arena->base);
 	pa_shard_postfork_parent(tsdn, &arena->pa_shard);
@@ -1660,8 +1653,6 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 
 void
 arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
-	unsigned i;
-
 	atomic_store_u(&arena->nthreads[0], 0, ATOMIC_RELAXED);
 	atomic_store_u(&arena->nthreads[1], 0, ATOMIC_RELAXED);
 	if (tsd_arena_get(tsdn_tsd(tsdn)) == arena) {
@@ -1686,11 +1677,10 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 		}
 	}
 
-	for (i = 0; i < SC_NBINS; i++) {
-		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
-			bin_postfork_child(tsdn, &arena->bins[i].bin_shards[j]);
-		}
+	for (unsigned i = 0; i < nbins_total; i++) {
+		bin_postfork_child(tsdn, &arena->bins[i]);
 	}
+
 	malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
 	base_postfork_child(tsdn, arena->base);
 	pa_shard_postfork_child(tsdn, &arena->pa_shard);
diff --git a/src/ctl.c b/src/ctl.c
index d516196a..324925d7 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3423,7 +3423,7 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
 
 		for (szind_t i = 0; i < SC_NBINS; i++) {
 			for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
-				bin_t *bin = &arena->bins[i].bin_shards[j];
+				bin_t *bin = arena_get_bin(arena, i, j);
 				MUTEX_PROF_RESET(bin->lock);
 			}
 		}
diff --git a/src/inspect.c b/src/inspect.c
index 5e8d51d6..911b5d52 100644
--- a/src/inspect.c
+++ b/src/inspect.c
@@ -52,11 +52,11 @@ inspect_extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
 	assert(*nfree <= *nregs);
 	assert(*nfree * edata_usize_get(edata) <= *size);
 
-	const arena_t *arena = (arena_t *)atomic_load_p(
+	arena_t *arena = (arena_t *)atomic_load_p(
 	    &arenas[edata_arena_ind_get(edata)], ATOMIC_RELAXED);
 	assert(arena != NULL);
 	const unsigned binshard = edata_binshard_get(edata);
-	bin_t *bin = &arena->bins[szind].bin_shards[binshard];
+	bin_t *bin = arena_get_bin(arena, szind, binshard);
 
 	malloc_mutex_lock(tsdn, &bin->lock);
 	if (config_stats) {
diff --git a/src/tcache.c b/src/tcache.c
index a7337e72..19e330a2 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -344,8 +344,8 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 		bin_t *cur_bin = NULL;
 		if (small) {
 			cur_binshard = edata_binshard_get(edata);
-			cur_bin = &cur_arena->bins[binind].bin_shards[
-			    cur_binshard];
+			cur_bin = arena_get_bin(cur_arena, binind,
+			    cur_binshard);
 			assert(cur_binshard < bin_infos[binind].n_shards);
 		}
 

From 8edfc5b1700eab47d64d7cfa6a246ad88f832845 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sun, 31 Jan 2021 11:55:45 -0800
Subject: [PATCH 1989/2608] Add ticker_geom_t.

This lets a single ticker object drive events across a large number of different
tick streams while sharing state.
---
 Makefile.in                                   |  4 +-
 include/jemalloc/internal/ticker.h            | 92 ++++++++++++++++++-
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |  1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |  3 +
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |  1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |  3 +
 src/ticker.c                                  | 32 +++++++
 src/ticker.py                                 | 15 +++
 test/unit/ticker.c                            | 29 +++++-
 9 files changed, 174 insertions(+), 6 deletions(-)
 create mode 100644 src/ticker.c
 create mode 100755 src/ticker.py

diff --git a/Makefile.in b/Makefile.in
index 3cb3161e..40c41442 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -151,6 +151,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/tcache.c \
 	$(srcroot)src/test_hooks.c \
 	$(srcroot)src/thread_event.c \
+	$(srcroot)src/ticker.c \
 	$(srcroot)src/tsd.c \
 	$(srcroot)src/witness.c
 ifeq ($(enable_zone_allocator), 1)
@@ -188,7 +189,8 @@ ifeq (1, $(link_whole_archive))
 C_UTIL_INTEGRATION_SRCS :=
 C_UTIL_CPP_SRCS :=
 else
-C_UTIL_INTEGRATION_SRCS := $(srcroot)src/nstime.c $(srcroot)src/malloc_io.c
+C_UTIL_INTEGRATION_SRCS := $(srcroot)src/nstime.c $(srcroot)src/malloc_io.c \
+	$(srcroot)src/ticker.c
 C_UTIL_CPP_SRCS := $(srcroot)src/nstime.c $(srcroot)src/malloc_io.c
 endif
 TESTS_UNIT := \
diff --git a/include/jemalloc/internal/ticker.h b/include/jemalloc/internal/ticker.h
index 52d0db4c..6b51ddec 100644
--- a/include/jemalloc/internal/ticker.h
+++ b/include/jemalloc/internal/ticker.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_TICKER_H
 #define JEMALLOC_INTERNAL_TICKER_H
 
+#include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/util.h"
 
 /**
@@ -10,11 +11,11 @@
  * have occurred with a call to ticker_ticks), which will return true (and reset
  * the counter) if the countdown hit zero.
  */
-
-typedef struct {
+typedef struct ticker_s ticker_t;
+struct ticker_s {
 	int32_t tick;
 	int32_t nticks;
-} ticker_t;
+};
 
 static inline void
 ticker_init(ticker_t *ticker, int32_t nticks) {
@@ -75,7 +76,7 @@ ticker_tick(ticker_t *ticker) {
 	return ticker_ticks(ticker, 1);
 }
 
-/* 
+/*
  * Try to tick.  If ticker would fire, return true, but rely on
  * slowpath to reset ticker.
  */
@@ -88,4 +89,87 @@ ticker_trytick(ticker_t *ticker) {
 	return false;
 }
 
+/*
+ * The ticker_geom_t is much like the ticker_t, except that instead of ticker
+ * having a constant countdown, it has an approximate one; each tick has
+ * approximately a 1/nticks chance of triggering the count.
+ *
+ * The motivation is in triggering arena decay.  With a naive strategy, each
+ * thread would maintain a ticker per arena, and check if decay is necessary
+ * each time that the arena's ticker fires.  This has two costs:
+ * - Since under reasonable assumptions both threads and arenas can scale
+ *   linearly with the number of CPUs, maintaining per-arena data in each thread
+ *   scales quadratically with the number of CPUs.
+ * - These tickers are often a cache miss down tcache flush pathways.
+ *
+ * By giving each tick a 1/nticks chance of firing, we still maintain the same
+ * average number of ticks-until-firing per arena, with only a single ticker's
+ * worth of metadata.
+ */
+
+/* See ticker.c for an explanation of these constants. */
+#define TICKER_GEOM_NBITS 6
+#define TICKER_GEOM_MUL 61
+extern const uint8_t ticker_geom_table[1 << TICKER_GEOM_NBITS];
+
+/* Not actually any different from ticker_t; just for type safety. */
+typedef struct ticker_geom_s ticker_geom_t;
+struct ticker_geom_s {
+	int32_t tick;
+	int32_t nticks;
+};
+
+/*
+ * Just pick the average delay for the first counter.  We're more concerned with
+ * the behavior over long periods of time rather than the exact timing of the
+ * initial ticks.
+ */
+#define TICKER_GEOM_INIT(nticks) {nticks, nticks}
+
+static inline void
+ticker_geom_init(ticker_geom_t *ticker, int32_t nticks) {
+	/*
+	 * Make sure there's no overflow possible.  This shouldn't really be a
+	 * problem for reasonable nticks choices, which are all static and
+	 * relatively small.
+	 */
+	assert((uint64_t)nticks * (uint64_t)255 / (uint64_t)TICKER_GEOM_MUL
+	    <= (uint64_t)INT32_MAX);
+	ticker->tick = nticks;
+	ticker->nticks = nticks;
+}
+
+static inline int32_t
+ticker_geom_read(const ticker_geom_t *ticker) {
+	return ticker->tick;
+}
+
+/* Same deal as above. */
+#if defined(__GNUC__) && !defined(__clang__)				\
+    && (defined(__x86_64__) || defined(__i386__))
+JEMALLOC_NOINLINE
+#endif
+static bool
+ticker_geom_fixup(ticker_geom_t *ticker, uint64_t *prng_state) {
+	uint64_t idx = prng_lg_range_u64(prng_state, TICKER_GEOM_NBITS);
+	ticker->tick = (uint32_t)(
+	    (uint64_t)ticker->nticks * (uint64_t)ticker_geom_table[idx]
+	    / (uint64_t)TICKER_GEOM_MUL);
+	return true;
+}
+
+static inline bool
+ticker_geom_ticks(ticker_geom_t *ticker, uint64_t *prng_state, int32_t nticks) {
+	ticker->tick -= nticks;
+	if (unlikely(ticker->tick < 0)) {
+		return ticker_geom_fixup(ticker, prng_state);
+	}
+	return false;
+}
+
+static inline bool
+ticker_geom_tick(ticker_geom_t *ticker, uint64_t *prng_state) {
+	return ticker_geom_ticks(ticker, prng_state, 1);
+}
+
 #endif /* JEMALLOC_INTERNAL_TICKER_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 9443ac55..a93511d1 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -92,6 +92,7 @@
     <ClCompile Include="..\..\..\..\src\tcache.c" />
     <ClCompile Include="..\..\..\..\src\test_hooks.c" />
     <ClCompile Include="..\..\..\..\src\thread_event.c" />
+    <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
     <ClCompile Include="..\..\..\..\src\witness.c" />
   </ItemGroup>
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 3c4bff62..06460e5a 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -160,6 +160,9 @@
     <ClCompile Include="..\..\..\..\src\thread_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ticker.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\tsd.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index fafb4914..916460a7 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -92,6 +92,7 @@
     <ClCompile Include="..\..\..\..\src\tcache.c" />
     <ClCompile Include="..\..\..\..\src\test_hooks.c" />
     <ClCompile Include="..\..\..\..\src\thread_event.c" />
+    <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
     <ClCompile Include="..\..\..\..\src\witness.c" />
   </ItemGroup>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 3c4bff62..06460e5a 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -160,6 +160,9 @@
     <ClCompile Include="..\..\..\..\src\thread_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ticker.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\tsd.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/ticker.c b/src/ticker.c
new file mode 100644
index 00000000..790b5c20
--- /dev/null
+++ b/src/ticker.c
@@ -0,0 +1,32 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+/*
+ * To avoid using floating point math down core paths (still necessary because
+ * versions of the glibc dynamic loader that did not preserve xmm registers are
+ * still somewhat common, requiring us to be compilable with -mno-sse), and also
+ * to avoid generally expensive library calls, we use a precomputed table of
+ * values.  We want to sample U uniformly on [0, 1], and then compute
+ * ceil(log(u)/log(1-1/nticks)).  We're mostly interested in the case where
+ * nticks is reasonably big, so 1/log(1-1/nticks) is well-approximated by
+ * -nticks.
+ *
+ * To compute log(u), we sample an integer in [1, 64] and divide, then just look
+ * up results in a table.  As a space-compression mechanism, we store these as
+ * uint8_t by dividing the range (255) by the highest-magnitude value the log
+ * can take on, and using that as a multiplier.  We then have to divide by that
+ * multiplier at the end of the computation.
+ *
+ * The values here are computed in src/ticker.py
+ */
+
+const uint8_t ticker_geom_table[1 << TICKER_GEOM_NBITS] = {
+	254, 211, 187, 169, 156, 144, 135, 127,
+	120, 113, 107, 102, 97, 93, 89, 85,
+	81, 77, 74, 71, 68, 65, 62, 60,
+	57, 55, 53, 50, 48, 46, 44, 42,
+	40, 39, 37, 35, 33, 32, 30, 29,
+	27, 26, 24, 23, 21, 20, 19, 18,
+	16, 15, 14, 13, 12, 10, 9, 8,
+	7, 6, 5, 4, 3, 2, 1, 0
+};
diff --git a/src/ticker.py b/src/ticker.py
new file mode 100755
index 00000000..3807740c
--- /dev/null
+++ b/src/ticker.py
@@ -0,0 +1,15 @@
+#!/usr/bin/env python3
+
+import math
+
+# Must match TICKER_GEOM_NBITS
+lg_table_size = 6
+table_size = 2**lg_table_size
+byte_max = 255
+mul = math.floor(-byte_max/math.log(1 / table_size))
+values = [round(-mul * math.log(i / table_size))
+	for i in range(1, table_size+1)]
+print("mul =", mul)
+print("values:")
+for i in range(table_size // 8):
+	print(", ".join((str(x) for x in values[i*8 : i*8 + 8])))
diff --git a/test/unit/ticker.c b/test/unit/ticker.c
index 1cf10b0c..0dd77861 100644
--- a/test/unit/ticker.c
+++ b/test/unit/ticker.c
@@ -64,10 +64,37 @@ TEST_BEGIN(test_ticker_copy) {
 }
 TEST_END
 
+TEST_BEGIN(test_ticker_geom) {
+	const int32_t ticks = 100;
+	const uint64_t niters = 100 * 1000;
+
+	ticker_geom_t ticker;
+	ticker_geom_init(&ticker, ticks);
+	uint64_t total_ticks = 0;
+	/* Just some random constant. */
+	uint64_t prng_state = 0x343219f93496db9fULL;
+	for (uint64_t i = 0; i < niters; i++) {
+		while(!ticker_geom_tick(&ticker, &prng_state)) {
+			total_ticks++;
+		}
+	}
+	/*
+	 * In fact, with this choice of random seed and the PRNG implementation
+	 * used at the time this was tested, total_ticks is 95.1% of the
+	 * expected ticks.
+	 */
+	expect_u64_ge(total_ticks , niters * ticks * 9 / 10,
+	    "Mean off by > 10%%");
+	expect_u64_le(total_ticks , niters * ticks * 11 / 10,
+	    "Mean off by > 10%%");
+}
+TEST_END
+
 int
 main(void) {
 	return test(
 	    test_ticker_tick,
 	    test_ticker_ticks,
-	    test_ticker_copy);
+	    test_ticker_copy,
+	    test_ticker_geom);
 }

From c259323ab3082324100c708109dbfff660d0f4b8 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sun, 31 Jan 2021 12:50:55 -0800
Subject: [PATCH 1990/2608] Use ticker_geom_t for arena tcache decay.

---
 include/jemalloc/internal/arena_inlines_b.h   | 22 +++--
 include/jemalloc/internal/arena_structs.h     |  5 --
 include/jemalloc/internal/arena_types.h       |  3 +-
 .../internal/jemalloc_internal_externs.h      |  2 -
 .../internal/jemalloc_internal_inlines_a.h    | 36 --------
 include/jemalloc/internal/tsd.h               |  9 +-
 src/jemalloc.c                                | 90 -------------------
 src/tsd.c                                     |  6 --
 test/unit/arena_decay.c                       | 77 ++++++++--------
 9 files changed, 52 insertions(+), 198 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 318de11c..13e6eb52 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -116,18 +116,22 @@ arena_prof_info_set(tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx,
 
 JEMALLOC_ALWAYS_INLINE void
 arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) {
-	tsd_t *tsd;
-	ticker_t *decay_ticker;
-
 	if (unlikely(tsdn_null(tsdn))) {
 		return;
 	}
-	tsd = tsdn_tsd(tsdn);
-	decay_ticker = decay_ticker_get(tsd, arena_ind_get(arena));
-	if (unlikely(decay_ticker == NULL)) {
-		return;
-	}
-	if (unlikely(ticker_ticks(decay_ticker, nticks))) {
+	tsd_t *tsd = tsdn_tsd(tsdn);
+	/*
+	 * We use the ticker_geom_t to avoid having per-arena state in the tsd.
+	 * Instead of having a countdown-until-decay timer running for every
+	 * arena in every thread, we flip a coin once per tick, whose
+	 * probability of coming up heads is 1/nticks; this is effectively the
+	 * operation of the ticker_geom_t.  Each arena has the same chance of a
+	 * coinflip coming up heads (1/ARENA_DECAY_NTICKS_PER_UPDATE), so we can
+	 * use a single ticker for all of them.
+	 */
+	ticker_geom_t *decay_ticker = tsd_arena_decay_tickerp_get(tsd);
+	uint64_t *prng_state = tsd_prng_statep_get(tsd);
+	if (unlikely(ticker_geom_ticks(decay_ticker, prng_state, nticks))) {
 		arena_decay(tsdn, arena, false, false);
 	}
 }
diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index 4aff63c9..ad76a79a 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -98,9 +98,4 @@ struct arena_s {
 	bin_t			bins[0];
 };
 
-/* Used in conjunction with tsd for fast arena-related context lookup. */
-struct arena_tdata_s {
-	ticker_t		decay_ticker;
-};
-
 #endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H */
diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h
index b13d8a05..e0f8218d 100644
--- a/include/jemalloc/internal/arena_types.h
+++ b/include/jemalloc/internal/arena_types.h
@@ -7,11 +7,10 @@
 #define DIRTY_DECAY_MS_DEFAULT	ZD(10 * 1000)
 #define MUZZY_DECAY_MS_DEFAULT	(0)
 /* Number of event ticks between time checks. */
-#define DECAY_NTICKS_PER_UPDATE	1000
+#define ARENA_DECAY_NTICKS_PER_UPDATE	1000
 
 typedef struct arena_decay_s arena_decay_t;
 typedef struct arena_s arena_t;
-typedef struct arena_tdata_s arena_tdata_t;
 
 typedef enum {
 	percpu_arena_mode_names_base   = 0, /* Used for options processing. */
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 40591b99..c78db06e 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -56,12 +56,10 @@ void bootstrap_free(void *ptr);
 void arena_set(unsigned ind, arena_t *arena);
 unsigned narenas_total_get(void);
 arena_t *arena_init(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
-arena_tdata_t *arena_tdata_get_hard(tsd_t *tsd, unsigned ind);
 arena_t *arena_choose_hard(tsd_t *tsd, bool internal);
 void arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind);
 void iarena_cleanup(tsd_t *tsd);
 void arena_cleanup(tsd_t *tsd);
-void arenas_tdata_cleanup(tsd_t *tsd);
 size_t batch_alloc(void **ptrs, size_t num, size_t size, int flags);
 void jemalloc_prefork(void);
 void jemalloc_postfork_parent(void);
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index 25e5b50e..24e42d38 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -56,31 +56,6 @@ percpu_arena_ind_limit(percpu_arena_mode_t mode) {
 	}
 }
 
-static inline arena_tdata_t *
-arena_tdata_get(tsd_t *tsd, unsigned ind, bool refresh_if_missing) {
-	arena_tdata_t *tdata;
-	arena_tdata_t *arenas_tdata = tsd_arenas_tdata_get(tsd);
-
-	if (unlikely(arenas_tdata == NULL)) {
-		/* arenas_tdata hasn't been initialized yet. */
-		return arena_tdata_get_hard(tsd, ind);
-	}
-	if (unlikely(ind >= tsd_narenas_tdata_get(tsd))) {
-		/*
-		 * ind is invalid, cache is old (too small), or tdata to be
-		 * initialized.
-		 */
-		return (refresh_if_missing ? arena_tdata_get_hard(tsd, ind) :
-		    NULL);
-	}
-
-	tdata = &arenas_tdata[ind];
-	if (likely(tdata != NULL) || !refresh_if_missing) {
-		return tdata;
-	}
-	return arena_tdata_get_hard(tsd, ind);
-}
-
 static inline arena_t *
 arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing) {
 	arena_t *ret;
@@ -97,17 +72,6 @@ arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing) {
 	return ret;
 }
 
-static inline ticker_t *
-decay_ticker_get(tsd_t *tsd, unsigned ind) {
-	arena_tdata_t *tdata;
-
-	tdata = arena_tdata_get(tsd, ind, true);
-	if (unlikely(tdata == NULL)) {
-		return NULL;
-	}
-	return &tdata->decay_ticker;
-}
-
 JEMALLOC_ALWAYS_INLINE bool
 tcache_available(tsd_t *tsd) {
 	/*
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 60764199..d22fdc94 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -58,9 +58,7 @@ typedef ql_elm(tsd_t) tsd_link_t;
 /*  O(name,			type,			nullable type) */
 #define TSD_DATA_SLOW							\
     O(tcache_enabled,		bool,			bool)		\
-    O(arenas_tdata_bypass,	bool,			bool)		\
     O(reentrancy_level,		int8_t,			int8_t)		\
-    O(narenas_tdata,		uint32_t,		uint32_t)	\
     O(thread_allocated_last_event,	uint64_t,	uint64_t)	\
     O(thread_allocated_next_event,	uint64_t,	uint64_t)	\
     O(thread_deallocated_last_event,	uint64_t,	uint64_t)	\
@@ -77,7 +75,7 @@ typedef ql_elm(tsd_t) tsd_link_t;
     O(prng_state,		uint64_t,		uint64_t)	\
     O(iarena,			arena_t *,		arena_t *)	\
     O(arena,			arena_t *,		arena_t *)	\
-    O(arenas_tdata,		arena_tdata_t *,	arena_tdata_t *)\
+    O(arena_decay_ticker,	ticker_geom_t,		ticker_geom_t)	\
     O(sec_shard,		uint8_t,		uint8_t)	\
     O(binshards,		tsd_binshards_t,	tsd_binshards_t)\
     O(tsd_link,			tsd_link_t,		tsd_link_t)	\
@@ -90,9 +88,7 @@ typedef ql_elm(tsd_t) tsd_link_t;
 
 #define TSD_DATA_SLOW_INITIALIZER					\
     /* tcache_enabled */	TCACHE_ENABLED_ZERO_INITIALIZER,	\
-    /* arenas_tdata_bypass */	false,					\
     /* reentrancy_level */	0,					\
-    /* narenas_tdata */		0,					\
     /* thread_allocated_last_event */	0,				\
     /* thread_allocated_next_event */	0,				\
     /* thread_deallocated_last_event */	0,				\
@@ -109,7 +105,8 @@ typedef ql_elm(tsd_t) tsd_link_t;
     /* prng_state */		0,					\
     /* iarena */		NULL,					\
     /* arena */			NULL,					\
-    /* arenas_tdata */		NULL,					\
+    /* arena_decay_ticker */						\
+	TICKER_GEOM_INIT(ARENA_DECAY_NTICKS_PER_UPDATE),		\
     /* sec_shard */		(uint8_t)-1,				\
     /* binshards */		TSD_BINSHARDS_ZERO_INITIALIZER,		\
     /* tsd_link */		{NULL},					\
diff --git a/src/jemalloc.c b/src/jemalloc.c
index f7c3963d..ca8a7deb 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -493,82 +493,6 @@ arena_unbind(tsd_t *tsd, unsigned ind, bool internal) {
 	}
 }
 
-arena_tdata_t *
-arena_tdata_get_hard(tsd_t *tsd, unsigned ind) {
-	arena_tdata_t *tdata, *arenas_tdata_old;
-	arena_tdata_t *arenas_tdata = tsd_arenas_tdata_get(tsd);
-	unsigned narenas_tdata_old, i;
-	unsigned narenas_tdata = tsd_narenas_tdata_get(tsd);
-	unsigned narenas_actual = narenas_total_get();
-
-	/*
-	 * Dissociate old tdata array (and set up for deallocation upon return)
-	 * if it's too small.
-	 */
-	if (arenas_tdata != NULL && narenas_tdata < narenas_actual) {
-		arenas_tdata_old = arenas_tdata;
-		narenas_tdata_old = narenas_tdata;
-		arenas_tdata = NULL;
-		narenas_tdata = 0;
-		tsd_arenas_tdata_set(tsd, arenas_tdata);
-		tsd_narenas_tdata_set(tsd, narenas_tdata);
-	} else {
-		arenas_tdata_old = NULL;
-		narenas_tdata_old = 0;
-	}
-
-	/* Allocate tdata array if it's missing. */
-	if (arenas_tdata == NULL) {
-		bool *arenas_tdata_bypassp = tsd_arenas_tdata_bypassp_get(tsd);
-		narenas_tdata = (ind < narenas_actual) ? narenas_actual : ind+1;
-
-		if (tsd_nominal(tsd) && !*arenas_tdata_bypassp) {
-			*arenas_tdata_bypassp = true;
-			arenas_tdata = (arena_tdata_t *)a0malloc(
-			    sizeof(arena_tdata_t) * narenas_tdata);
-			*arenas_tdata_bypassp = false;
-		}
-		if (arenas_tdata == NULL) {
-			tdata = NULL;
-			goto label_return;
-		}
-		assert(tsd_nominal(tsd) && !*arenas_tdata_bypassp);
-		tsd_arenas_tdata_set(tsd, arenas_tdata);
-		tsd_narenas_tdata_set(tsd, narenas_tdata);
-	}
-
-	/*
-	 * Copy to tdata array.  It's possible that the actual number of arenas
-	 * has increased since narenas_total_get() was called above, but that
-	 * causes no correctness issues unless two threads concurrently execute
-	 * the arenas.create mallctl, which we trust mallctl synchronization to
-	 * prevent.
-	 */
-
-	/* Copy/initialize tickers. */
-	for (i = 0; i < narenas_actual; i++) {
-		if (i < narenas_tdata_old) {
-			ticker_copy(&arenas_tdata[i].decay_ticker,
-			    &arenas_tdata_old[i].decay_ticker);
-		} else {
-			ticker_init(&arenas_tdata[i].decay_ticker,
-			    DECAY_NTICKS_PER_UPDATE);
-		}
-	}
-	if (narenas_tdata > narenas_actual) {
-		memset(&arenas_tdata[narenas_actual], 0, sizeof(arena_tdata_t)
-		    * (narenas_tdata - narenas_actual));
-	}
-
-	/* Read the refreshed tdata array. */
-	tdata = &arenas_tdata[ind];
-label_return:
-	if (arenas_tdata_old != NULL) {
-		a0dalloc(arenas_tdata_old);
-	}
-	return tdata;
-}
-
 /* Slow path, called only by arena_choose(). */
 arena_t *
 arena_choose_hard(tsd_t *tsd, bool internal) {
@@ -705,20 +629,6 @@ arena_cleanup(tsd_t *tsd) {
 	}
 }
 
-void
-arenas_tdata_cleanup(tsd_t *tsd) {
-	arena_tdata_t *arenas_tdata;
-
-	/* Prevent tsd->arenas_tdata from being (re)created. */
-	*tsd_arenas_tdata_bypassp_get(tsd) = true;
-
-	arenas_tdata = tsd_arenas_tdata_get(tsd);
-	if (arenas_tdata != NULL) {
-		tsd_arenas_tdata_set(tsd, NULL);
-		a0dalloc(arenas_tdata);
-	}
-}
-
 static void
 stats_print_atexit(void) {
 	if (config_stats) {
diff --git a/src/tsd.c b/src/tsd.c
index 0dd4036b..6820eb62 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -251,8 +251,6 @@ assert_tsd_data_cleanup_done(tsd_t *tsd) {
 	assert(!tsd_in_nominal_list(tsd));
 	assert(*tsd_arenap_get_unsafe(tsd) == NULL);
 	assert(*tsd_iarenap_get_unsafe(tsd) == NULL);
-	assert(*tsd_arenas_tdata_bypassp_get_unsafe(tsd) == true);
-	assert(*tsd_arenas_tdatap_get_unsafe(tsd) == NULL);
 	assert(*tsd_tcache_enabledp_get_unsafe(tsd) == false);
 	assert(*tsd_prof_tdatap_get_unsafe(tsd) == NULL);
 }
@@ -267,7 +265,6 @@ tsd_data_init_nocleanup(tsd_t *tsd) {
 	 * We set up tsd in a way that no cleanup is needed.
 	 */
 	rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd));
-	*tsd_arenas_tdata_bypassp_get(tsd) = true;
 	*tsd_tcache_enabledp_get_unsafe(tsd) = false;
 	*tsd_reentrancy_levelp_get(tsd) = 1;
 	tsd_prng_state_init(tsd);
@@ -375,7 +372,6 @@ tsd_do_data_cleanup(tsd_t *tsd) {
 	prof_tdata_cleanup(tsd);
 	iarena_cleanup(tsd);
 	arena_cleanup(tsd);
-	arenas_tdata_cleanup(tsd);
 	tcache_cleanup(tsd);
 	witnesses_cleanup(tsd_witness_tsdp_get_unsafe(tsd));
 	*tsd_reentrancy_levelp_get(tsd) = 1;
@@ -439,7 +435,6 @@ malloc_tsd_boot0(void) {
 		return NULL;
 	}
 	tsd = tsd_fetch();
-	*tsd_arenas_tdata_bypassp_get(tsd) = true;
 	return tsd;
 }
 
@@ -449,7 +444,6 @@ malloc_tsd_boot1(void) {
 	tsd_t *tsd = tsd_fetch();
 	/* malloc_slow has been set properly.  Update tsd_slow. */
 	tsd_slow_update(tsd);
-	*tsd_arenas_tdata_bypassp_get(tsd) = false;
 }
 
 #ifdef _WIN32
diff --git a/test/unit/arena_decay.c b/test/unit/arena_decay.c
index a2661682..cea39e09 100644
--- a/test/unit/arena_decay.c
+++ b/test/unit/arena_decay.c
@@ -187,7 +187,7 @@ TEST_BEGIN(test_decay_ticks) {
 	test_skip_if(check_background_thread_enabled());
 	test_skip_if(opt_hpa);
 
-	ticker_t *decay_ticker;
+	ticker_geom_t *decay_ticker;
 	unsigned tick0, tick1, arena_ind;
 	size_t sz, large0;
 	void *p;
@@ -205,7 +205,7 @@ TEST_BEGIN(test_decay_ticks) {
 	expect_d_eq(mallctl("thread.arena", (void *)&old_arena_ind,
 	    &sz_arena_ind, (void *)&arena_ind, sizeof(arena_ind)), 0,
 	    "Unexpected mallctl() failure");
-	decay_ticker = decay_ticker_get(tsd_fetch(), arena_ind);
+	decay_ticker = tsd_arena_decay_tickerp_get(tsd_fetch());
 	expect_ptr_not_null(decay_ticker,
 	    "Unexpected failure getting decay ticker");
 
@@ -216,60 +216,60 @@ TEST_BEGIN(test_decay_ticks) {
 	 */
 
 	/* malloc(). */
-	tick0 = ticker_read(decay_ticker);
+	tick0 = ticker_geom_read(decay_ticker);
 	p = malloc(large0);
 	expect_ptr_not_null(p, "Unexpected malloc() failure");
-	tick1 = ticker_read(decay_ticker);
+	tick1 = ticker_geom_read(decay_ticker);
 	expect_u32_ne(tick1, tick0, "Expected ticker to tick during malloc()");
 	/* free(). */
-	tick0 = ticker_read(decay_ticker);
+	tick0 = ticker_geom_read(decay_ticker);
 	free(p);
-	tick1 = ticker_read(decay_ticker);
+	tick1 = ticker_geom_read(decay_ticker);
 	expect_u32_ne(tick1, tick0, "Expected ticker to tick during free()");
 
 	/* calloc(). */
-	tick0 = ticker_read(decay_ticker);
+	tick0 = ticker_geom_read(decay_ticker);
 	p = calloc(1, large0);
 	expect_ptr_not_null(p, "Unexpected calloc() failure");
-	tick1 = ticker_read(decay_ticker);
+	tick1 = ticker_geom_read(decay_ticker);
 	expect_u32_ne(tick1, tick0, "Expected ticker to tick during calloc()");
 	free(p);
 
 	/* posix_memalign(). */
-	tick0 = ticker_read(decay_ticker);
+	tick0 = ticker_geom_read(decay_ticker);
 	expect_d_eq(posix_memalign(&p, sizeof(size_t), large0), 0,
 	    "Unexpected posix_memalign() failure");
-	tick1 = ticker_read(decay_ticker);
+	tick1 = ticker_geom_read(decay_ticker);
 	expect_u32_ne(tick1, tick0,
 	    "Expected ticker to tick during posix_memalign()");
 	free(p);
 
 	/* aligned_alloc(). */
-	tick0 = ticker_read(decay_ticker);
+	tick0 = ticker_geom_read(decay_ticker);
 	p = aligned_alloc(sizeof(size_t), large0);
 	expect_ptr_not_null(p, "Unexpected aligned_alloc() failure");
-	tick1 = ticker_read(decay_ticker);
+	tick1 = ticker_geom_read(decay_ticker);
 	expect_u32_ne(tick1, tick0,
 	    "Expected ticker to tick during aligned_alloc()");
 	free(p);
 
 	/* realloc(). */
 	/* Allocate. */
-	tick0 = ticker_read(decay_ticker);
+	tick0 = ticker_geom_read(decay_ticker);
 	p = realloc(NULL, large0);
 	expect_ptr_not_null(p, "Unexpected realloc() failure");
-	tick1 = ticker_read(decay_ticker);
+	tick1 = ticker_geom_read(decay_ticker);
 	expect_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()");
 	/* Reallocate. */
-	tick0 = ticker_read(decay_ticker);
+	tick0 = ticker_geom_read(decay_ticker);
 	p = realloc(p, large0);
 	expect_ptr_not_null(p, "Unexpected realloc() failure");
-	tick1 = ticker_read(decay_ticker);
+	tick1 = ticker_geom_read(decay_ticker);
 	expect_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()");
 	/* Deallocate. */
-	tick0 = ticker_read(decay_ticker);
+	tick0 = ticker_geom_read(decay_ticker);
 	realloc(p, 0);
-	tick1 = ticker_read(decay_ticker);
+	tick1 = ticker_geom_read(decay_ticker);
 	expect_u32_ne(tick1, tick0, "Expected ticker to tick during realloc()");
 
 	/*
@@ -286,41 +286,41 @@ TEST_BEGIN(test_decay_ticks) {
 			sz = allocx_sizes[i];
 
 			/* mallocx(). */
-			tick0 = ticker_read(decay_ticker);
+			tick0 = ticker_geom_read(decay_ticker);
 			p = mallocx(sz, MALLOCX_TCACHE_NONE);
 			expect_ptr_not_null(p, "Unexpected mallocx() failure");
-			tick1 = ticker_read(decay_ticker);
+			tick1 = ticker_geom_read(decay_ticker);
 			expect_u32_ne(tick1, tick0,
 			    "Expected ticker to tick during mallocx() (sz=%zu)",
 			    sz);
 			/* rallocx(). */
-			tick0 = ticker_read(decay_ticker);
+			tick0 = ticker_geom_read(decay_ticker);
 			p = rallocx(p, sz, MALLOCX_TCACHE_NONE);
 			expect_ptr_not_null(p, "Unexpected rallocx() failure");
-			tick1 = ticker_read(decay_ticker);
+			tick1 = ticker_geom_read(decay_ticker);
 			expect_u32_ne(tick1, tick0,
 			    "Expected ticker to tick during rallocx() (sz=%zu)",
 			    sz);
 			/* xallocx(). */
-			tick0 = ticker_read(decay_ticker);
+			tick0 = ticker_geom_read(decay_ticker);
 			xallocx(p, sz, 0, MALLOCX_TCACHE_NONE);
-			tick1 = ticker_read(decay_ticker);
+			tick1 = ticker_geom_read(decay_ticker);
 			expect_u32_ne(tick1, tick0,
 			    "Expected ticker to tick during xallocx() (sz=%zu)",
 			    sz);
 			/* dallocx(). */
-			tick0 = ticker_read(decay_ticker);
+			tick0 = ticker_geom_read(decay_ticker);
 			dallocx(p, MALLOCX_TCACHE_NONE);
-			tick1 = ticker_read(decay_ticker);
+			tick1 = ticker_geom_read(decay_ticker);
 			expect_u32_ne(tick1, tick0,
 			    "Expected ticker to tick during dallocx() (sz=%zu)",
 			    sz);
 			/* sdallocx(). */
 			p = mallocx(sz, MALLOCX_TCACHE_NONE);
 			expect_ptr_not_null(p, "Unexpected mallocx() failure");
-			tick0 = ticker_read(decay_ticker);
+			tick0 = ticker_geom_read(decay_ticker);
 			sdallocx(p, sz, MALLOCX_TCACHE_NONE);
-			tick1 = ticker_read(decay_ticker);
+			tick1 = ticker_geom_read(decay_ticker);
 			expect_u32_ne(tick1, tick0,
 			    "Expected ticker to tick during sdallocx() "
 			    "(sz=%zu)", sz);
@@ -349,31 +349,24 @@ TEST_BEGIN(test_decay_ticks) {
 		sz = tcache_sizes[i];
 
 		/* tcache fill. */
-		tick0 = ticker_read(decay_ticker);
+		tick0 = ticker_geom_read(decay_ticker);
 		p = mallocx(sz, MALLOCX_TCACHE(tcache_ind));
 		expect_ptr_not_null(p, "Unexpected mallocx() failure");
-		tick1 = ticker_read(decay_ticker);
+		tick1 = ticker_geom_read(decay_ticker);
 		expect_u32_ne(tick1, tick0,
 		    "Expected ticker to tick during tcache fill "
 		    "(sz=%zu)", sz);
 		/* tcache flush. */
 		dallocx(p, MALLOCX_TCACHE(tcache_ind));
-		tick0 = ticker_read(decay_ticker);
+		tick0 = ticker_geom_read(decay_ticker);
 		expect_d_eq(mallctl("tcache.flush", NULL, NULL,
 		    (void *)&tcache_ind, sizeof(unsigned)), 0,
 		    "Unexpected mallctl failure");
-		tick1 = ticker_read(decay_ticker);
+		tick1 = ticker_geom_read(decay_ticker);
 
 		/* Will only tick if it's in tcache. */
-		if (sz <= tcache_max) {
-			expect_u32_ne(tick1, tick0,
-			    "Expected ticker to tick during tcache "
-			    "flush (sz=%zu)", sz);
-		} else {
-			expect_u32_eq(tick1, tick0,
-			    "Unexpected ticker tick during tcache "
-			    "flush (sz=%zu)", sz);
-		}
+		expect_u32_ne(tick1, tick0,
+		    "Expected ticker to tick during tcache flush (sz=%zu)", sz);
 	}
 }
 TEST_END
@@ -401,7 +394,7 @@ decay_ticker_helper(unsigned arena_ind, int flags, bool dirty, ssize_t dt,
 	void *p = do_mallocx(1, flags);
 	uint64_t dirty_npurge1, muzzy_npurge1;
 	do {
-		for (unsigned i = 0; i < DECAY_NTICKS_PER_UPDATE / 2;
+		for (unsigned i = 0; i < ARENA_DECAY_NTICKS_PER_UPDATE / 2;
 		    i++) {
 			void *q = do_mallocx(1, flags);
 			dallocx(q, flags);

From 20140629b44f9a76241749b9c47e3905202d034c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 1 Feb 2021 12:03:11 -0800
Subject: [PATCH 1991/2608] Bin: Move stats closer to the mutex.

This is a slight cache locality optimization.
---
 include/jemalloc/internal/bin.h | 9 ++++++---
 src/tcache.c                    | 7 +++++++
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index 9241ee73..63f97395 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -16,6 +16,12 @@ struct bin_s {
 	/* All operations on bin_t fields require lock ownership. */
 	malloc_mutex_t		lock;
 
+	/*
+	 * Bin statistics.  These get touched every time the lock is acquired,
+	 * so put them close by in the hopes of getting some cache locality.
+	 */
+	bin_stats_t	stats;
+
 	/*
 	 * Current slab being used to service allocations of this bin's size
 	 * class.  slabcur is independent of slabs_{nonfull,full}; whenever
@@ -33,9 +39,6 @@ struct bin_s {
 
 	/* List used to track full slabs. */
 	edata_list_active_t	slabs_full;
-
-	/* Bin statistics. */
-	bin_stats_t	stats;
 };
 
 /* A set of sharded bins of the same size class. */
diff --git a/src/tcache.c b/src/tcache.c
index 19e330a2..7c4047f4 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -347,6 +347,13 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 			cur_bin = arena_get_bin(cur_arena, binind,
 			    cur_binshard);
 			assert(cur_binshard < bin_infos[binind].n_shards);
+			/*
+			 * If you're looking at profiles, you might think this
+			 * is a good place to prefetch the bin stats, which are
+			 * often a cache miss.  This turns out not to be
+			 * helpful on the workloads we've looked at, with moving
+			 * the bin stats next to the lock seeming to do better.
+			 */
 		}
 
 		if (small) {

From 3624dd42ffd88e63a8f7c2ee0a6ed3cbdfff81b7 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 28 Jan 2021 13:19:41 -0800
Subject: [PATCH 1992/2608] hpdata: Add a comment for hpdata_consistent.

---
 include/jemalloc/internal/hpdata.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index fdd6673f..65cd073f 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -112,6 +112,11 @@ hpdata_assert_empty(hpdata_t *hpdata) {
 	assert(hpdata_nfree_get(hpdata) == HUGEPAGE_PAGES);
 }
 
+/*
+ * Only used in tests, and in hpdata_assert_consistent, below.  Verifies some
+ * consistency properties of the hpdata (e.g. that cached counts of page stats
+ * match computed ones).
+ */
 static inline bool
 hpdata_consistent(hpdata_t *hpdata) {
 	if(fb_urange_longest(hpdata->active_pages, HUGEPAGE_PAGES)

From ff4086aa6b9b957409ccdc6d818490154decd343 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 2 Dec 2020 17:01:57 -0800
Subject: [PATCH 1993/2608] hpdata: count active pages instead of free ones.

This will be more consistent with later naming choices.
---
 include/jemalloc/internal/hpdata.h | 29 ++++++++++++-----------------
 src/hpa.c                          |  2 +-
 src/hpdata.c                       |  6 +++---
 src/psset.c                        |  4 ++--
 4 files changed, 18 insertions(+), 23 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 65cd073f..7cefb5cc 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -44,11 +44,12 @@ struct hpdata_s {
 		ql_elm(hpdata_t) ql_link;
 	};
 
-	/* Number of currently free pages (regardless of contiguity). */
-	size_t h_nfree;
 	/* The length of the largest contiguous sequence of inactive pages. */
 	size_t h_longest_free_range;
 
+	/* Number of active pages. */
+	size_t h_nactive;
+
 	/* A bitmap with bits set in the active pages. */
 	fb_group_t active_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
 };
@@ -84,17 +85,6 @@ hpdata_huge_set(hpdata_t *hpdata, bool huge) {
 	hpdata->h_huge = huge;
 }
 
-static inline size_t
-hpdata_nfree_get(const hpdata_t *hpdata) {
-	return hpdata->h_nfree;
-}
-
-static inline void
-hpdata_nfree_set(hpdata_t *hpdata, size_t nfree) {
-	assert(nfree <= HUGEPAGE_PAGES);
-	hpdata->h_nfree = nfree;
-}
-
 static inline size_t
 hpdata_longest_free_range_get(const hpdata_t *hpdata) {
 	return hpdata->h_longest_free_range;
@@ -106,10 +96,15 @@ hpdata_longest_free_range_set(hpdata_t *hpdata, size_t longest_free_range) {
 	hpdata->h_longest_free_range = longest_free_range;
 }
 
+static inline size_t
+hpdata_nactive_get(hpdata_t *hpdata) {
+	return hpdata->h_nactive;
+}
+
 static inline void
 hpdata_assert_empty(hpdata_t *hpdata) {
 	assert(fb_empty(hpdata->active_pages, HUGEPAGE_PAGES));
-	assert(hpdata_nfree_get(hpdata) == HUGEPAGE_PAGES);
+	assert(hpdata->h_nactive == 0);
 }
 
 /*
@@ -123,8 +118,8 @@ hpdata_consistent(hpdata_t *hpdata) {
 	    != hpdata_longest_free_range_get(hpdata)) {
 		return false;
 	}
-	if (fb_ucount(hpdata->active_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES)
-	    != hpdata_nfree_get(hpdata)) {
+	if (fb_scount(hpdata->active_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES)
+	    != hpdata->h_nactive) {
 		return false;
 	}
 	return true;
@@ -142,7 +137,7 @@ ph_proto(, hpdata_age_heap_, hpdata_age_heap_t, hpdata_t);
 
 static inline bool
 hpdata_empty(hpdata_t *hpdata) {
-	return hpdata_nfree_get(hpdata) == HUGEPAGE_PAGES;
+	return hpdata->h_nactive == 0;
 }
 
 void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age);
diff --git a/src/hpa.c b/src/hpa.c
index a51f83ce..8bbe8a87 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -125,7 +125,7 @@ hpa_should_hugify(hpa_shard_t *shard, hpdata_t *ps) {
 	 * inactive.  Eventually, this should be a malloc conf option.
 	 */
 	return !hpdata_huge_get(ps)
-	    && hpdata_nfree_get(ps) < (HUGEPAGE / PAGE) * 5 / 100;
+	    && hpdata_nactive_get(ps) >= (HUGEPAGE_PAGES) * 95 / 100;
 }
 
 /* Returns true on error. */
diff --git a/src/hpdata.c b/src/hpdata.c
index a242efea..d513896a 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -22,7 +22,7 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 	hpdata_addr_set(hpdata, addr);
 	hpdata_age_set(hpdata, age);
 	hpdata_huge_set(hpdata, false);
-	hpdata_nfree_set(hpdata, HUGEPAGE_PAGES);
+	hpdata->h_nactive = 0;
 	hpdata_longest_free_range_set(hpdata, HUGEPAGE_PAGES);
 	fb_init(hpdata->active_pages, HUGEPAGE_PAGES);
 
@@ -72,7 +72,7 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 	/* We found a range; remember it. */
 	result = begin;
 	fb_set_range(hpdata->active_pages, HUGEPAGE_PAGES, begin, npages);
-	hpdata_nfree_set(hpdata, hpdata_nfree_get(hpdata) - npages);
+	hpdata->h_nactive += npages;
 
 	/*
 	 * We might have shrunk the longest free range.  We have to keep
@@ -123,7 +123,7 @@ hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
 		hpdata_longest_free_range_set(hpdata, new_range_len);
 	}
 
-	hpdata_nfree_set(hpdata, hpdata_nfree_get(hpdata) + npages);
+	hpdata->h_nactive -= npages;
 
 	hpdata_assert_consistent(hpdata);
 }
diff --git a/src/psset.c b/src/psset.c
index 7a5bd604..9fcdac22 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -57,8 +57,8 @@ psset_bin_stats_insert_remove(psset_bin_stats_t *binstats, hpdata_t *ps,
 	size_t *ninactive_dst = hpdata_huge_get(ps)
 	    ? &binstats->ninactive_huge : &binstats->ninactive_nonhuge;
 
-	size_t ninactive = hpdata_nfree_get(ps);
-	size_t nactive = HUGEPAGE_PAGES - ninactive;
+	size_t nactive = hpdata_nactive_get(ps);
+	size_t ninactive = HUGEPAGE_PAGES - nactive;
 
 	size_t mul = insert ? (size_t)1 : (size_t)-1;
 	*npageslabs_dst += mul * 1;

From 2ae966222f071929dd124d2953b35ca16feb2ba0 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 2 Dec 2020 17:56:58 -0800
Subject: [PATCH 1994/2608] hpdata: track per-page dirty state.

---
 include/jemalloc/internal/hpdata.h | 42 ++++++++++++++++++++++++----
 src/hpa.c                          |  7 +++--
 src/hpdata.c                       | 45 ++++++++++++++++++++++++++++--
 3 files changed, 85 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 7cefb5cc..5952a18f 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -52,6 +52,16 @@ struct hpdata_s {
 
 	/* A bitmap with bits set in the active pages. */
 	fb_group_t active_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
+
+	/*
+	 * Number of dirty pages, and a bitmap tracking them.  This really means
+	 * "dirty" from the OS's point of view; it includes both active and
+	 * inactive pages that have been touched by the user.
+	 */
+	size_t h_ndirty;
+
+	/* The dirty pages (using the same definition as above). */
+	fb_group_t dirty_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
 };
 
 static inline void *
@@ -80,11 +90,6 @@ hpdata_huge_get(const hpdata_t *hpdata) {
 	return hpdata->h_huge;
 }
 
-static inline void
-hpdata_huge_set(hpdata_t *hpdata, bool huge) {
-	hpdata->h_huge = huge;
-}
-
 static inline size_t
 hpdata_longest_free_range_get(const hpdata_t *hpdata) {
 	return hpdata->h_longest_free_range;
@@ -122,6 +127,16 @@ hpdata_consistent(hpdata_t *hpdata) {
 	    != hpdata->h_nactive) {
 		return false;
 	}
+	if (fb_scount(hpdata->dirty_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES)
+	    != hpdata->h_ndirty) {
+		return false;
+	}
+	if (hpdata->h_ndirty < hpdata->h_nactive) {
+		return false;
+	}
+	if (hpdata->h_huge && hpdata->h_ndirty != HUGEPAGE_PAGES) {
+		return false;
+	}
 	return true;
 }
 
@@ -141,6 +156,7 @@ hpdata_empty(hpdata_t *hpdata) {
 }
 
 void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age);
+
 /*
  * Given an hpdata which can serve an allocation request, pick and reserve an
  * offset within that allocation.
@@ -148,4 +164,20 @@ void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age);
 void *hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz);
 void hpdata_unreserve(hpdata_t *hpdata, void *begin, size_t sz);
 
+/*
+ * Tell the hpdata that it's now a hugepage (which, correspondingly, means that
+ * all its pages become dirty.
+ */
+void hpdata_hugify(hpdata_t *hpdata);
+/*
+ * Tell the hpdata that it's no longer a hugepage (all its pages are still
+ * counted as dirty, though; an explicit purge call is required to change that).
+ */
+void hpdata_dehugify(hpdata_t *hpdata);
+/*
+ * Tell the hpdata (which should be empty) that all dirty pages in it have been
+ * purged.
+ */
+void hpdata_purge(hpdata_t *hpdata);
+
 #endif /* JEMALLOC_INTERNAL_HPDATA_H */
diff --git a/src/hpa.c b/src/hpa.c
index 8bbe8a87..75636047 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -146,7 +146,10 @@ hpa_dehugify(hpdata_t *ps) {
 	/* Purge, then dehugify while unbacked. */
 	pages_purge_forced(hpdata_addr_get(ps), HUGEPAGE);
 	pages_nohuge(hpdata_addr_get(ps), HUGEPAGE);
-	hpdata_huge_set(ps, false);
+
+	/* Update metadata. */
+	hpdata_dehugify(ps);
+	hpdata_purge(ps);
 }
 
 static hpdata_t *
@@ -297,7 +300,7 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 
 	bool hugify = hpa_should_hugify(shard, ps);
 	if (hugify) {
-		hpdata_huge_set(ps, true);
+		hpdata_hugify(ps);
 	}
 	psset_insert(&shard->psset, ps);
 
diff --git a/src/hpdata.c b/src/hpdata.c
index d513896a..8297158e 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -21,10 +21,12 @@ void
 hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 	hpdata_addr_set(hpdata, addr);
 	hpdata_age_set(hpdata, age);
-	hpdata_huge_set(hpdata, false);
-	hpdata->h_nactive = 0;
+	hpdata->h_huge = false;
 	hpdata_longest_free_range_set(hpdata, HUGEPAGE_PAGES);
+	hpdata->h_nactive = 0;
 	fb_init(hpdata->active_pages, HUGEPAGE_PAGES);
+	hpdata->h_ndirty = 0;
+	fb_init(hpdata->dirty_pages, HUGEPAGE_PAGES);
 
 	hpdata_assert_consistent(hpdata);
 }
@@ -74,6 +76,15 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 	fb_set_range(hpdata->active_pages, HUGEPAGE_PAGES, begin, npages);
 	hpdata->h_nactive += npages;
 
+	/*
+	 * We might be about to dirty some memory for the first time; update our
+	 * count if so.
+	 */
+	size_t new_dirty = fb_ucount(hpdata->dirty_pages,  HUGEPAGE_PAGES,
+	    result, npages);
+	fb_set_range(hpdata->dirty_pages, HUGEPAGE_PAGES, result, npages);
+	hpdata->h_ndirty += new_dirty;
+
 	/*
 	 * We might have shrunk the longest free range.  We have to keep
 	 * scanning until the end of the hpdata to be sure.
@@ -127,3 +138,33 @@ hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
 
 	hpdata_assert_consistent(hpdata);
 }
+
+void
+hpdata_hugify(hpdata_t *hpdata) {
+	hpdata_assert_consistent(hpdata);
+	hpdata->h_huge = true;
+	fb_set_range(hpdata->dirty_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES);
+	hpdata->h_ndirty = HUGEPAGE_PAGES;
+	hpdata_assert_consistent(hpdata);
+}
+
+void
+hpdata_dehugify(hpdata_t *hpdata) {
+	hpdata_assert_consistent(hpdata);
+	hpdata->h_huge = false;
+	hpdata_assert_consistent(hpdata);
+}
+
+void
+hpdata_purge(hpdata_t *hpdata) {
+	hpdata_assert_consistent(hpdata);
+	/*
+	 * The hpdata must be empty; we don't (yet) support partial purges of
+	 * hugepages.
+	 */
+	assert(hpdata->h_nactive == 0);
+	fb_unset_range(hpdata->dirty_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES);
+	fb_init(hpdata->dirty_pages, HUGEPAGE_PAGES);
+	hpdata->h_ndirty = 0;
+	hpdata_assert_consistent(hpdata);
+}

From 9b75808be171cc7c586e32ddb9d5dd86eca38669 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 2 Dec 2020 19:06:32 -0800
Subject: [PATCH 1995/2608] flat bitmap: Add a bitwise and/or/not.

We're about to need them.
---
 include/jemalloc/internal/flat_bitmap.h |  30 ++++++
 test/unit/flat_bitmap.c                 | 132 +++++++++++++++++++++++-
 2 files changed, 161 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/flat_bitmap.h b/include/jemalloc/internal/flat_bitmap.h
index c8cf518a..90c4091f 100644
--- a/include/jemalloc/internal/flat_bitmap.h
+++ b/include/jemalloc/internal/flat_bitmap.h
@@ -340,4 +340,34 @@ fb_urange_longest(fb_group_t *fb, size_t nbits) {
 	return fb_range_longest_impl(fb, nbits, /* val */ false);
 }
 
+/*
+ * Initializes each bit of dst with the bitwise-AND of the corresponding bits of
+ * src1 and src2.  All bitmaps must be the same size.
+ */
+static inline void
+fb_bit_and(fb_group_t *dst, fb_group_t *src1, fb_group_t *src2, size_t nbits) {
+	size_t ngroups = FB_NGROUPS(nbits);
+	for (size_t i = 0; i < ngroups; i++) {
+		dst[i] = src1[i] & src2[i];
+	}
+}
+
+/* Like fb_bit_and, but with bitwise-OR. */
+static inline void
+fb_bit_or(fb_group_t *dst, fb_group_t *src1, fb_group_t *src2, size_t nbits) {
+	size_t ngroups = FB_NGROUPS(nbits);
+	for (size_t i = 0; i < ngroups; i++) {
+		dst[i] = src1[i] | src2[i];
+	}
+}
+
+/* Initializes dst bit i to the negation of source bit i. */
+static inline void
+fb_bit_not(fb_group_t *dst, fb_group_t *src, size_t nbits) {
+	size_t ngroups = FB_NGROUPS(nbits);
+	for (size_t i = 0; i < ngroups; i++) {
+		dst[i] = ~src[i];
+	}
+}
+
 #endif /* JEMALLOC_INTERNAL_FB_H */
diff --git a/test/unit/flat_bitmap.c b/test/unit/flat_bitmap.c
index f0883790..6b0bcc34 100644
--- a/test/unit/flat_bitmap.c
+++ b/test/unit/flat_bitmap.c
@@ -807,6 +807,133 @@ TEST_BEGIN(test_count_alternating) {
 }
 TEST_END
 
+static void
+do_test_bit_op(size_t nbits, bool (*op)(bool a, bool b),
+    void (*fb_op)(fb_group_t *dst, fb_group_t *src1, fb_group_t *src2, size_t nbits)) {
+	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	fb_group_t *fb1 = malloc(sz);
+	fb_group_t *fb2 = malloc(sz);
+	fb_group_t *fb_result = malloc(sz);
+	fb_init(fb1, nbits);
+	fb_init(fb2, nbits);
+	fb_init(fb_result, nbits);
+
+	/* Just two random numbers. */
+	const uint64_t prng_init1 = (uint64_t)0X4E9A9DE6A35691CDULL;
+	const uint64_t prng_init2 = (uint64_t)0X7856E396B063C36EULL;
+
+	uint64_t prng1 = prng_init1;
+	uint64_t prng2 = prng_init2;
+
+	for (size_t i = 0; i < nbits; i++) {
+		bool bit1 = ((prng1 & (1ULL << (i % 64))) != 0);
+		bool bit2 = ((prng2 & (1ULL << (i % 64))) != 0);
+
+		if (bit1) {
+			fb_set(fb1, nbits, i);
+		}
+		if (bit2) {
+			fb_set(fb2, nbits, i);
+		}
+
+		if (i % 64 == 0) {
+			prng1 = prng_state_next_u64(prng1);
+			prng2 = prng_state_next_u64(prng2);
+		}
+	}
+
+	fb_op(fb_result, fb1, fb2, nbits);
+
+	/* Reset the prngs to replay them. */
+	prng1 = prng_init1;
+	prng2 = prng_init2;
+
+	for (size_t i = 0; i < nbits; i++) {
+		bool bit1 = ((prng1 & (1ULL << (i % 64))) != 0);
+		bool bit2 = ((prng2 & (1ULL << (i % 64))) != 0);
+
+		/* Original bitmaps shouldn't change. */
+		expect_b_eq(bit1, fb_get(fb1, nbits, i), "difference at bit %zu", i);
+		expect_b_eq(bit2, fb_get(fb2, nbits, i), "difference at bit %zu", i);
+
+		/* New one should be bitwise and. */
+		expect_b_eq(op(bit1, bit2), fb_get(fb_result, nbits, i),
+		    "difference at bit %zu", i);
+
+		/* Update the same way we did last time. */
+		if (i % 64 == 0) {
+			prng1 = prng_state_next_u64(prng1);
+			prng2 = prng_state_next_u64(prng2);
+		}
+	}
+
+	free(fb1);
+	free(fb2);
+	free(fb_result);
+}
+
+static bool
+binary_and(bool a, bool b) {
+	return a & b;
+}
+
+static void
+do_test_bit_and(size_t nbits) {
+	do_test_bit_op(nbits, &binary_and, &fb_bit_and);
+}
+
+TEST_BEGIN(test_bit_and) {
+#define NB(nbits) \
+	do_test_bit_and(nbits);
+	NBITS_TAB
+#undef NB
+}
+TEST_END
+
+static bool
+binary_or(bool a, bool b) {
+	return a | b;
+}
+
+static void
+do_test_bit_or(size_t nbits) {
+	do_test_bit_op(nbits, &binary_or, &fb_bit_or);
+}
+
+TEST_BEGIN(test_bit_or) {
+#define NB(nbits) \
+	do_test_bit_or(nbits);
+	NBITS_TAB
+#undef NB
+}
+TEST_END
+
+static bool
+binary_not(bool a, bool b) {
+	(void)b;
+	return !a;
+}
+
+static void
+fb_bit_not_shim(fb_group_t *dst, fb_group_t *src1, fb_group_t *src2,
+    size_t nbits) {
+	(void)src2;
+	fb_bit_not(dst, src1, nbits);
+}
+
+static void
+do_test_bit_not(size_t nbits) {
+	do_test_bit_op(nbits, &binary_not, &fb_bit_not_shim);
+}
+
+TEST_BEGIN(test_bit_not) {
+#define NB(nbits) \
+	do_test_bit_not(nbits);
+	NBITS_TAB
+#undef NB
+}
+TEST_END
+
 int
 main(void) {
 	return test_no_reentrancy(
@@ -820,5 +947,8 @@ main(void) {
 	    test_iter_range_exhaustive,
 	    test_count_contiguous_simple,
 	    test_count_contiguous,
-	    test_count_alternating);
+	    test_count_alternating,
+	    test_bit_and,
+	    test_bit_or,
+	    test_bit_not);
 }

From 70692cfb13332678af49f9d3c7bfe1fde65ec1aa Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 2 Dec 2020 18:44:34 -0800
Subject: [PATCH 1996/2608] hpdata: Add state changing helpers.

We're about to allow hugepage subextent purging; get as much of our metadata
handling ready as possible.
---
 include/jemalloc/internal/hpdata.h | 100 ++++++++++++++++++++---
 src/hpa.c                          |   5 +-
 src/hpdata.c                       | 112 ++++++++++++++++++++++++-
 test/unit/hpdata.c                 | 127 ++++++++++++++++++++++++++++-
 4 files changed, 331 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 5952a18f..faa62434 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -34,6 +34,16 @@ struct hpdata_s {
 	uint64_t h_age;
 	/* Whether or not we think the hugepage is mapped that way by the OS. */
 	bool h_huge;
+
+	/*
+	 * Whether or not some thread is purging this hpdata (i.e. has called
+	 * hpdata_purge_begin but not yet called hpdata_purge_end), or
+	 * hugifying it.  Only one thread at a time is allowed to change a
+	 * hugepage's state.
+	 */
+	bool h_mid_purge;
+	bool h_mid_hugify;
+
 	union {
 		/* When nonempty, used by the psset bins. */
 		phn(hpdata_t) ph_link;
@@ -90,6 +100,22 @@ hpdata_huge_get(const hpdata_t *hpdata) {
 	return hpdata->h_huge;
 }
 
+static inline bool
+hpdata_changing_state_get(const hpdata_t *hpdata) {
+	return hpdata->h_mid_purge || hpdata->h_mid_hugify;
+}
+
+static inline bool
+hpdata_mid_purge_get(const hpdata_t *hpdata) {
+	return hpdata->h_mid_purge;
+}
+
+static inline bool
+hpdata_mid_hugify_get(const hpdata_t *hpdata) {
+	return hpdata->h_mid_hugify;
+}
+
+
 static inline size_t
 hpdata_longest_free_range_get(const hpdata_t *hpdata) {
 	return hpdata->h_longest_free_range;
@@ -106,6 +132,11 @@ hpdata_nactive_get(hpdata_t *hpdata) {
 	return hpdata->h_nactive;
 }
 
+static inline size_t
+hpdata_ndirty_get(hpdata_t *hpdata) {
+	return hpdata->h_ndirty;
+}
+
 static inline void
 hpdata_assert_empty(hpdata_t *hpdata) {
 	assert(fb_empty(hpdata->active_pages, HUGEPAGE_PAGES));
@@ -164,20 +195,69 @@ void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age);
 void *hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz);
 void hpdata_unreserve(hpdata_t *hpdata, void *begin, size_t sz);
 
-/*
- * Tell the hpdata that it's now a hugepage (which, correspondingly, means that
- * all its pages become dirty.
- */
-void hpdata_hugify(hpdata_t *hpdata);
-/*
- * Tell the hpdata that it's no longer a hugepage (all its pages are still
- * counted as dirty, though; an explicit purge call is required to change that).
- */
-void hpdata_dehugify(hpdata_t *hpdata);
 /*
  * Tell the hpdata (which should be empty) that all dirty pages in it have been
  * purged.
  */
 void hpdata_purge(hpdata_t *hpdata);
 
+/*
+ * The hpdata_purge_prepare_t allows grabbing the metadata required to purge
+ * subranges of a hugepage while holding a lock, drop the lock during the actual
+ * purging of them, and reacquire it to update the metadata again.
+ */
+typedef struct hpdata_purge_state_s hpdata_purge_state_t;
+struct hpdata_purge_state_s {
+	size_t npurged;
+	fb_group_t to_purge[FB_NGROUPS(HUGEPAGE_PAGES)];
+	size_t next_purge_search_begin;
+};
+
+/*
+ * Initializes purge state.  The access to hpdata must be externally
+ * synchronized with other hpdata_* calls.
+ *
+ * You can tell whether or not a thread is purging or hugifying a given hpdata
+ * via hpdata_changing_state_get(hpdata).  Racing hugification or purging
+ * operations aren't allowed.
+ *
+ * Once you begin purging, you have to follow through and call hpdata_purge_next
+ * until you're done, and then end.  Allocating out of an hpdata undergoing
+ * purging is not allowed.
+ */
+void hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state);
+/*
+ * If there are more extents to purge, sets *r_purge_addr and *r_purge_size to
+ * true, and returns true.  Otherwise, returns false to indicate that we're
+ * done.
+ *
+ * This requires exclusive access to the purge state, but *not* to the hpdata.
+ * In particular, unreserve calls are allowed while purging (i.e. you can dalloc
+ * into one part of the hpdata while purging a different part).
+ */
+bool hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
+    void **r_purge_addr, size_t *r_purge_size);
+/*
+ * Updates the hpdata metadata after all purging is done.  Needs external
+ * synchronization.
+ */
+void hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state);
+
+/*
+ * Similarly, when hugifying , callers can do the metadata modifications while
+ * holding a lock (thereby setting the change_state field), but actually do the
+ * operation without blocking other threads.
+ */
+void hpdata_hugify_begin(hpdata_t *hpdata);
+void hpdata_hugify_end(hpdata_t *hpdata);
+
+/*
+ * Tell the hpdata that it's no longer a hugepage (all its pages are still
+ * counted as dirty, though; an explicit purge call is required to change that).
+ *
+ * This should only be done after starting to purge, and before actually purging
+ * any contents.
+ */
+void hpdata_dehugify(hpdata_t *hpdata);
+
 #endif /* JEMALLOC_INTERNAL_HPDATA_H */
diff --git a/src/hpa.c b/src/hpa.c
index 75636047..a36eee4e 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -300,7 +300,7 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 
 	bool hugify = hpa_should_hugify(shard, ps);
 	if (hugify) {
-		hpdata_hugify(ps);
+		hpdata_hugify_begin(ps);
 	}
 	psset_insert(&shard->psset, ps);
 
@@ -319,6 +319,9 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 		 * operations in this hpa shard.
 		 */
 		hpa_hugify(ps);
+		malloc_mutex_lock(tsdn, &shard->mtx);
+		hpdata_hugify_end(ps);
+		malloc_mutex_unlock(tsdn, &shard->mtx);
 	}
 	return edata;
 }
diff --git a/src/hpdata.c b/src/hpdata.c
index 8297158e..29aecff5 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -22,6 +22,8 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 	hpdata_addr_set(hpdata, addr);
 	hpdata_age_set(hpdata, age);
 	hpdata->h_huge = false;
+	hpdata->h_mid_purge = false;
+	hpdata->h_mid_hugify = false;
 	hpdata_longest_free_range_set(hpdata, HUGEPAGE_PAGES);
 	hpdata->h_nactive = 0;
 	fb_init(hpdata->active_pages, HUGEPAGE_PAGES);
@@ -140,17 +142,125 @@ hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
 }
 
 void
-hpdata_hugify(hpdata_t *hpdata) {
+hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 	hpdata_assert_consistent(hpdata);
+	assert(!hpdata->h_mid_purge);
+	assert(!hpdata->h_mid_hugify);
+	hpdata->h_mid_purge = true;
+
+	purge_state->npurged = 0;
+	purge_state->next_purge_search_begin = 0;
+
+	/*
+	 * Initialize to_purge with everything that's not active but that is
+	 * dirty.
+	 *
+	 * As an optimization, we could note that in practice we never allocate
+	 * out of a hugepage while purging within it, and so could try to
+	 * combine dirty extents separated by a non-dirty but non-active extent
+	 * to avoid purge calls.  This does nontrivially complicate metadata
+	 * tracking though, so let's hold off for now.
+	 */
+	fb_bit_not(purge_state->to_purge, hpdata->active_pages, HUGEPAGE_PAGES);
+	fb_bit_and(purge_state->to_purge, purge_state->to_purge,
+	    hpdata->dirty_pages, HUGEPAGE_PAGES);
+
+	/* We purge everything we can. */
+	assert(hpdata->h_ndirty - hpdata->h_nactive == fb_scount(
+	    purge_state->to_purge, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
+
+	hpdata_assert_consistent(hpdata);
+}
+
+bool
+hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
+    void **r_purge_addr, size_t *r_purge_size) {
+	/*
+	 * Note that we don't have a consistency check here; we're accessing
+	 * hpdata without synchronization, and therefore have no right to expect
+	 * a consistent state.
+	 */
+	assert(hpdata->h_mid_purge);
+	/* Should have dehugified already (if necessary). */
+	assert(!hpdata->h_huge);
+	assert(!hpdata->h_mid_hugify);
+
+	if (purge_state->next_purge_search_begin == HUGEPAGE_PAGES) {
+		return false;
+	}
+	size_t purge_begin;
+	size_t purge_len;
+	bool found_range = fb_srange_iter(purge_state->to_purge, HUGEPAGE_PAGES,
+	    purge_state->next_purge_search_begin, &purge_begin, &purge_len);
+	if (!found_range) {
+		return false;
+	}
+
+	*r_purge_addr = (void *)(
+	    (uintptr_t)hpdata_addr_get(hpdata) + purge_begin * PAGE);
+	*r_purge_size = purge_len * PAGE;
+
+	purge_state->next_purge_search_begin = purge_begin + purge_len;
+	purge_state->npurged += purge_len;
+	assert(purge_state->npurged <= HUGEPAGE_PAGES);
+
+	return true;
+}
+
+void
+hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
+	hpdata_assert_consistent(hpdata);
+	assert(hpdata->h_mid_purge);
+	assert(!hpdata->h_mid_hugify);
+	hpdata->h_mid_purge = false;
+
+	assert(purge_state->npurged == fb_scount(purge_state->to_purge,
+	    HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
+
+	fb_bit_not(purge_state->to_purge, purge_state->to_purge,
+	    HUGEPAGE_PAGES);
+	fb_bit_and(hpdata->dirty_pages, hpdata->dirty_pages,
+	    purge_state->to_purge, HUGEPAGE_PAGES);
+	assert(hpdata->h_ndirty >= purge_state->npurged);
+	hpdata->h_ndirty -= purge_state->npurged;
+
+	hpdata_assert_consistent(hpdata);
+}
+
+void
+hpdata_hugify_begin(hpdata_t *hpdata) {
+	hpdata_assert_consistent(hpdata);
+	assert(!hpdata->h_mid_purge);
+	assert(!hpdata->h_mid_hugify);
+	hpdata->h_mid_hugify = true;
 	hpdata->h_huge = true;
 	fb_set_range(hpdata->dirty_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES);
 	hpdata->h_ndirty = HUGEPAGE_PAGES;
 	hpdata_assert_consistent(hpdata);
 }
 
+void
+hpdata_hugify_end(hpdata_t *hpdata) {
+	hpdata_assert_consistent(hpdata);
+	assert(!hpdata->h_mid_purge);
+	assert(hpdata->h_mid_hugify);
+	hpdata->h_mid_hugify = false;
+	hpdata_assert_consistent(hpdata);
+}
+
 void
 hpdata_dehugify(hpdata_t *hpdata) {
 	hpdata_assert_consistent(hpdata);
+	/*
+	 * These asserts are morally right; for now, though, we have the "purge a
+	 * hugepage only in its entirety, when it becomes empty", path sharing
+	 * hpdata_dehugify with the new purge pathway coming in the next
+	 * commit.
+	 */
+	/*
+	assert(hpdata->h_mid_purge);
+	assert(!hpdata->h_mid_hugify);
+	*/
 	hpdata->h_huge = false;
 	hpdata_assert_consistent(hpdata);
 }
diff --git a/test/unit/hpdata.c b/test/unit/hpdata.c
index 1bf58bca..2fd9a367 100644
--- a/test/unit/hpdata.c
+++ b/test/unit/hpdata.c
@@ -55,7 +55,132 @@ TEST_BEGIN(test_reserve_alloc) {
 }
 TEST_END
 
+TEST_BEGIN(test_purge_simple) {
+	hpdata_t hpdata;
+	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
+
+	void *alloc = hpdata_reserve_alloc(&hpdata, HUGEPAGE_PAGES / 2 * PAGE);
+	expect_ptr_eq(alloc, HPDATA_ADDR, "");
+
+	/* Create HUGEPAGE_PAGES / 4 dirty inactive pages at the beginning. */
+	hpdata_unreserve(&hpdata, alloc, HUGEPAGE_PAGES / 4 * PAGE);
+
+	expect_zu_eq(hpdata_ndirty_get(&hpdata), HUGEPAGE_PAGES / 2, "");
+
+	expect_false(hpdata_changing_state_get(&hpdata), "");
+
+	hpdata_purge_state_t purge_state;
+	hpdata_purge_begin(&hpdata, &purge_state);
+
+	expect_true(hpdata_changing_state_get(&hpdata), "");
+
+	void *purge_addr;
+	size_t purge_size;
+	bool got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
+	    &purge_size);
+	expect_true(got_result, "");
+	expect_ptr_eq(HPDATA_ADDR, purge_addr, "");
+	expect_zu_eq(HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
+
+	expect_true(hpdata_changing_state_get(&hpdata), "");
+
+	got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
+	    &purge_size);
+	expect_false(got_result, "Unexpected additional purge range: "
+	    "extent at %p of size %zu", purge_addr, purge_size);
+
+	expect_true(hpdata_changing_state_get(&hpdata), "");
+
+	hpdata_purge_end(&hpdata, &purge_state);
+	expect_false(hpdata_changing_state_get(&hpdata), "");
+	expect_zu_eq(hpdata_ndirty_get(&hpdata), HUGEPAGE_PAGES / 4, "");
+}
+TEST_END
+
+/*
+ * We only test intervening dalloc's not intervening allocs; we don't need
+ * intervening allocs, and foreseeable optimizations will make them not just
+ * unnecessary but incorrect.  In particular, if there are two dirty extents
+ * separated only by a retained extent, we can just purge the entire range,
+ * saving a purge call.
+ */
+TEST_BEGIN(test_purge_intervening_dalloc) {
+	hpdata_t hpdata;
+	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
+
+	/* Allocate the first 3/4 of the pages. */
+	void *alloc = hpdata_reserve_alloc(&hpdata, 3 * HUGEPAGE_PAGES / 4  * PAGE);
+	expect_ptr_eq(alloc, HPDATA_ADDR, "");
+
+	/* Free the first 1/4 and the third 1/4 of the pages. */
+	hpdata_unreserve(&hpdata, alloc, HUGEPAGE_PAGES / 4 * PAGE);
+	hpdata_unreserve(&hpdata,
+	    (void *)((uintptr_t)alloc + 2 * HUGEPAGE_PAGES / 4 * PAGE),
+	    HUGEPAGE_PAGES / 4 * PAGE);
+
+	expect_zu_eq(hpdata_ndirty_get(&hpdata), 3 * HUGEPAGE_PAGES / 4, "");
+
+	hpdata_purge_state_t purge_state;
+	hpdata_purge_begin(&hpdata, &purge_state);
+
+	void *purge_addr;
+	size_t purge_size;
+	/* First purge. */
+	bool got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
+	    &purge_size);
+	expect_true(got_result, "");
+	expect_ptr_eq(HPDATA_ADDR, purge_addr, "");
+	expect_zu_eq(HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
+
+	/* Deallocate the second 1/4 before the second purge occurs. */
+	hpdata_unreserve(&hpdata,
+	    (void *)((uintptr_t)alloc + 1 * HUGEPAGE_PAGES / 4 * PAGE),
+	    HUGEPAGE_PAGES / 4 * PAGE);
+
+	/* Now continue purging. */
+	got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
+	    &purge_size);
+	expect_true(got_result, "");
+	expect_ptr_eq(
+	    (void *)((uintptr_t)alloc + 2 * HUGEPAGE_PAGES / 4 * PAGE),
+	    purge_addr, "");
+	expect_zu_eq(HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
+
+	got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
+	    &purge_size);
+	expect_false(got_result, "Unexpected additional purge range: "
+	    "extent at %p of size %zu", purge_addr, purge_size);
+
+	hpdata_purge_end(&hpdata, &purge_state);
+
+	expect_zu_eq(hpdata_ndirty_get(&hpdata), HUGEPAGE_PAGES / 4, "");
+}
+TEST_END
+
+TEST_BEGIN(test_hugify) {
+	hpdata_t hpdata;
+	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
+
+	void *alloc = hpdata_reserve_alloc(&hpdata, HUGEPAGE / 2);
+	expect_ptr_eq(alloc, HPDATA_ADDR, "");
+
+	expect_zu_eq(HUGEPAGE_PAGES / 2, hpdata_ndirty_get(&hpdata), "");
+
+	expect_false(hpdata_changing_state_get(&hpdata), "");
+	hpdata_hugify_begin(&hpdata);
+	expect_true(hpdata_changing_state_get(&hpdata), "");
+	hpdata_hugify_end(&hpdata);
+	expect_false(hpdata_changing_state_get(&hpdata), "");
+
+	/* Hugeifying should have increased the dirty page count. */
+	expect_zu_eq(HUGEPAGE_PAGES, hpdata_ndirty_get(&hpdata), "");
+}
+TEST_END
+
 int main(void) {
 	return test_no_reentrancy(
-	    test_reserve_alloc);
+	    test_reserve_alloc,
+	    test_purge_simple,
+	    test_purge_intervening_dalloc,
+	    test_hugify);
 }

From 30b9e8162b9127d5c352fc312dfdea5e07d51e56 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 2 Dec 2020 22:24:15 -0800
Subject: [PATCH 1997/2608] HPA: Generalize purging.

Previously, we would purge a hugepage only when it's completely empty.  With
this change, we can purge even when only partially empty.  Although the
heuristic here is still fairly primitive, this infrastructure can scale to
become more advanced.
---
 include/jemalloc/internal/hpdata.h |  23 +++-
 src/hpa.c                          | 208 ++++++++++++++++++++++++-----
 src/hpdata.c                       |  35 ++---
 src/psset.c                        |   5 +
 test/unit/hpdata.c                 |   1 +
 5 files changed, 208 insertions(+), 64 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index faa62434..66473d2e 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -44,6 +44,9 @@ struct hpdata_s {
 	bool h_mid_purge;
 	bool h_mid_hugify;
 
+	/* Whether or not the hpdata is a the psset. */
+	bool h_in_psset;
+
 	union {
 		/* When nonempty, used by the psset bins. */
 		phn(hpdata_t) ph_link;
@@ -115,6 +118,15 @@ hpdata_mid_hugify_get(const hpdata_t *hpdata) {
 	return hpdata->h_mid_hugify;
 }
 
+static inline bool
+hpdata_in_psset_get(const hpdata_t *hpdata) {
+	return hpdata->h_in_psset;
+}
+
+static inline void
+hpdata_in_psset_set(hpdata_t *hpdata, bool in_psset) {
+	hpdata->h_in_psset = in_psset;
+}
 
 static inline size_t
 hpdata_longest_free_range_get(const hpdata_t *hpdata) {
@@ -195,12 +207,6 @@ void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age);
 void *hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz);
 void hpdata_unreserve(hpdata_t *hpdata, void *begin, size_t sz);
 
-/*
- * Tell the hpdata (which should be empty) that all dirty pages in it have been
- * purged.
- */
-void hpdata_purge(hpdata_t *hpdata);
-
 /*
  * The hpdata_purge_prepare_t allows grabbing the metadata required to purge
  * subranges of a hugepage while holding a lock, drop the lock during the actual
@@ -247,6 +253,11 @@ void hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state);
  * Similarly, when hugifying , callers can do the metadata modifications while
  * holding a lock (thereby setting the change_state field), but actually do the
  * operation without blocking other threads.
+ *
+ * Unlike most metadata operations, hugification ending should happen while an
+ * hpdata is in the psset (or upcoming hugepage collections).  This is because
+ * while purge/use races are unsafe, purge/hugepageify races are perfectly
+ * reasonable.
  */
 void hpdata_hugify_begin(hpdata_t *hpdata);
 void hpdata_hugify_end(hpdata_t *hpdata);
diff --git a/src/hpa.c b/src/hpa.c
index a36eee4e..99594549 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -124,32 +124,26 @@ hpa_should_hugify(hpa_shard_t *shard, hpdata_t *ps) {
 	 * For now, just use a static check; hugify a page if it's <= 5%
 	 * inactive.  Eventually, this should be a malloc conf option.
 	 */
+	if (hpdata_changing_state_get(ps)) {
+		return false;
+	}
 	return !hpdata_huge_get(ps)
 	    && hpdata_nactive_get(ps) >= (HUGEPAGE_PAGES) * 95 / 100;
 }
 
-/* Returns true on error. */
-static void
-hpa_hugify(hpdata_t *ps) {
-	assert(hpdata_huge_get(ps));
-	bool err = pages_huge(hpdata_addr_get(ps), HUGEPAGE);
-	/*
-	 * Eat the error; even if the hugification failed, it's still safe to
-	 * pretend it didn't (and would require extraordinary measures to
-	 * unhugify).
-	 */
-	(void)err;
-}
-
-static void
-hpa_dehugify(hpdata_t *ps) {
-	/* Purge, then dehugify while unbacked. */
-	pages_purge_forced(hpdata_addr_get(ps), HUGEPAGE);
-	pages_nohuge(hpdata_addr_get(ps), HUGEPAGE);
-
-	/* Update metadata. */
-	hpdata_dehugify(ps);
-	hpdata_purge(ps);
+/*
+ * Whether or not the given pageslab meets the criteria for being purged (and,
+ * if necessary, dehugified).
+ */
+static bool
+hpa_should_purge(hpa_shard_t *shard, hpdata_t *ps) {
+	/* Ditto. */
+	if (hpdata_changing_state_get(ps)) {
+		return false;
+	}
+	size_t purgeable = hpdata_ndirty_get(ps) - hpdata_nactive_get(ps);
+	return purgeable > HUGEPAGE_PAGES * 25 / 100
+	    || (purgeable > 0 && hpdata_empty(ps));
 }
 
 static hpdata_t *
@@ -226,9 +220,65 @@ hpa_grow(tsdn_t *tsdn, hpa_shard_t *shard) {
 }
 
 /*
- * The psset does not hold empty slabs.  Upon becoming empty, then, we need to
- * put them somewhere.  We take this as an opportunity to purge, and retain
- * their address space in a list outside the psset.
+ * As a precondition, ps should not be in the psset (we can handle deallocation
+ * races, but not allocation ones), and we should hold the shard mutex.
+ */
+static void
+hpa_purge(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	while (hpa_should_purge(shard, ps)) {
+		/* Do the metadata update bit while holding the lock. */
+		hpdata_purge_state_t purge_state;
+		hpdata_purge_begin(ps, &purge_state);
+
+		/*
+		 * Dehugifying can only happen on the first loop iteration,
+		 * since no other threads can allocate out of this ps while
+		 * we're purging (and thus, can't hugify it), but there's not a
+		 * natural way to express that in the control flow.
+		 */
+		bool needs_dehugify = false;
+		if (hpdata_huge_get(ps)) {
+			needs_dehugify = true;
+			hpdata_dehugify(ps);
+		}
+
+		/* Drop the lock to do the OS calls. */
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+
+		if (needs_dehugify) {
+			pages_nohuge(hpdata_addr_get(ps), HUGEPAGE);
+		}
+
+		size_t total_purged = 0;
+		void *purge_addr;
+		size_t purge_size;
+		while (hpdata_purge_next(ps, &purge_state, &purge_addr,
+		    &purge_size)) {
+			pages_purge_forced(purge_addr, purge_size);
+			total_purged += purge_size;
+		}
+
+		/* Reacquire to finish our metadata update. */
+		malloc_mutex_lock(tsdn, &shard->mtx);
+		hpdata_purge_end(ps, &purge_state);
+
+		assert(total_purged <= HUGEPAGE);
+
+		/*
+		 * We're not done here; other threads can't allocate out of ps
+		 * while purging, but they can still deallocate.  Those
+		 * deallocations could have meant more purging than what we
+		 * planned ought to happen.  We have to re-check now that we've
+		 * reacquired the mutex again.
+		 */
+	}
+}
+
+/*
+ * Does the metadata tracking associated with a page slab becoming empty.  The
+ * psset doesn't hold empty pageslabs, but we do want address space reuse, so we
+ * track these pages outside the psset.
  */
 static void
 hpa_handle_ps_eviction(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
@@ -239,12 +289,6 @@ hpa_handle_ps_eviction(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
 	malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
 	malloc_mutex_assert_not_owner(tsdn, &shard->grow_mtx);
 
-	/*
-	 * We do this unconditionally, even for pages which were not originally
-	 * hugified; it has the same effect.
-	 */
-	hpa_dehugify(ps);
-
 	malloc_mutex_lock(tsdn, &shard->grow_mtx);
 	shard->nevictions++;
 	hpdata_list_prepend(&shard->unused_slabs, ps);
@@ -291,6 +335,11 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 	if (err) {
 		hpdata_unreserve(ps, edata_addr_get(edata),
 		    edata_size_get(edata));
+		/*
+		 * We should arguably reset dirty state here, but this would
+		 * require some sort of prepare + commit functionality that's a
+		 * little much to deal with for now.
+		 */
 		psset_insert(&shard->psset, ps);
 		edata_cache_small_put(tsdn, &shard->ecs, edata);
 		malloc_mutex_unlock(tsdn, &shard->mtx);
@@ -318,9 +367,26 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 		 * on this page slab, but also operations any other alloc/dalloc
 		 * operations in this hpa shard.
 		 */
-		hpa_hugify(ps);
+		bool err = pages_huge(hpdata_addr_get(ps), HUGEPAGE);
+		/*
+		 * Pretending we succeed when we actually failed is safe; trying
+		 * to rolllback would be tricky, though.  Eat the error.
+		 */
+		(void)err;
+
 		malloc_mutex_lock(tsdn, &shard->mtx);
 		hpdata_hugify_end(ps);
+		if (hpa_should_purge(shard, ps)) {
+			/*
+			 * There was a race in which the ps went from being
+			 * almost full to having lots of free space while we
+			 * hugified.  Undo our operation, taking care to meet
+			 * the precondition that the ps isn't in the psset.
+			 */
+			psset_remove(&shard->psset, ps);
+			hpa_purge(tsdn, shard, ps);
+			psset_insert(&shard->psset, ps);
+		}
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 	}
 	return edata;
@@ -383,11 +449,28 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 	if (err) {
 		hpdata_unreserve(ps, edata_addr_get(edata),
 		    edata_size_get(edata));
+
 		edata_cache_small_put(tsdn, &shard->ecs, edata);
 
 		shard->nevictions++;
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
+
+		/* We'll do a fake purge; the pages weren't really touched. */
+		hpdata_purge_state_t purge_state;
+		void *purge_addr;
+		size_t purge_size;
+		hpdata_purge_begin(ps, &purge_state);
+		bool found_extent = hpdata_purge_next(ps, &purge_state,
+		    &purge_addr, &purge_size);
+		assert(found_extent);
+		assert(purge_addr == addr);
+		assert(purge_size == size);
+		found_extent = hpdata_purge_next(ps, &purge_state,
+		    &purge_addr, &purge_size);
+		assert(!found_extent);
+		hpdata_purge_end(ps, &purge_state);
+
 		hpa_handle_ps_eviction(tsdn, shard, ps);
 		return NULL;
 	}
@@ -475,13 +558,66 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	/* Currently, all edatas come from pageslabs. */
 	assert(ps != NULL);
 	emap_deregister_boundary(tsdn, shard->emap, edata);
+	/*
+	 * Note that the shard mutex protects ps's metadata too; it wouldn't be
+	 * correct to try to read most information out of it without the lock.
+	 */
 	malloc_mutex_lock(tsdn, &shard->mtx);
 
-	/* Note that the shard mutex protects ps's metadata too. */
-	psset_remove(&shard->psset, ps);
-	hpdata_unreserve(ps, edata_addr_get(edata), edata_size_get(edata));
-
+	/*
+	 * Release the metadata early, to avoid having to remember to do it
+	 * while we're also doing tricky purging logic.
+	 */
+	void *unreserve_addr = edata_addr_get(edata);
+	size_t unreserve_size = edata_size_get(edata);
 	edata_cache_small_put(tsdn, &shard->ecs, edata);
+
+	/*
+	 * We have three rules interacting here:
+	 * - You can't update ps metadata while it's still in the psset.  We
+	 *   enforce this because it's necessary for stats tracking and metadata
+	 *   management.
+	 * - The ps must not be in the psset while purging.  This is because we
+	 *   can't handle purge/alloc races.
+	 * - Whoever removes the ps from the psset is the one to reinsert it (or
+	 *   to pass it to hpa_handle_ps_eviction upon emptying).  This keeps
+	 *   responsibility tracking simple.
+	 */
+	if (hpdata_mid_purge_get(ps)) {
+		/*
+		 * Another thread started purging, and so the ps is not in the
+		 * psset and we can do our metadata update.  The other thread is
+		 * in charge of reinserting the ps, so we're done.
+		 */
+		assert(!hpdata_in_psset_get(ps));
+		hpdata_unreserve(ps, unreserve_addr, unreserve_size);
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+		return;
+	}
+	/*
+	 * No other thread is purging, and the ps is non-empty, so it should be
+	 * in the psset.
+	 */
+	assert(hpdata_in_psset_get(ps));
+	psset_remove(&shard->psset, ps);
+	hpdata_unreserve(ps, unreserve_addr, unreserve_size);
+	if (!hpa_should_purge(shard, ps)) {
+		/*
+		 * This should be the common case; no other thread is purging,
+		 * and we won't purge either.
+		 */
+		psset_insert(&shard->psset, ps);
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+		return;
+	}
+
+	/* It's our job to purge. */
+	hpa_purge(tsdn, shard, ps);
+
+	/*
+	 * OK, the hpdata is as purged as we want it to be, and it's going back
+	 * into the psset (if nonempty) or getting evicted (if empty).
+	 */
 	if (hpdata_empty(ps)) {
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 		hpa_handle_ps_eviction(tsdn, shard, ps);
diff --git a/src/hpdata.c b/src/hpdata.c
index 29aecff5..78816196 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -24,6 +24,7 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 	hpdata->h_huge = false;
 	hpdata->h_mid_purge = false;
 	hpdata->h_mid_hugify = false;
+	hpdata->h_in_psset = false;
 	hpdata_longest_free_range_set(hpdata, HUGEPAGE_PAGES);
 	hpdata->h_nactive = 0;
 	fb_init(hpdata->active_pages, HUGEPAGE_PAGES);
@@ -36,6 +37,7 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 void *
 hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 	hpdata_assert_consistent(hpdata);
+	assert(!hpdata_in_psset_get(hpdata));
 	assert((sz & PAGE_MASK) == 0);
 	size_t npages = sz >> LG_PAGE;
 	assert(npages <= hpdata_longest_free_range_get(hpdata));
@@ -116,6 +118,7 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 void
 hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
 	hpdata_assert_consistent(hpdata);
+	assert(!hpdata->h_in_psset);
 	assert(((uintptr_t)addr & PAGE_MASK) == 0);
 	assert((sz & PAGE_MASK) == 0);
 	size_t begin = ((uintptr_t)addr - (uintptr_t)hpdata_addr_get(hpdata))
@@ -144,6 +147,7 @@ hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
 void
 hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 	hpdata_assert_consistent(hpdata);
+	assert(!hpdata->h_in_psset);
 	assert(!hpdata->h_mid_purge);
 	assert(!hpdata->h_mid_hugify);
 	hpdata->h_mid_purge = true;
@@ -181,6 +185,7 @@ hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
 	 * a consistent state.
 	 */
 	assert(hpdata->h_mid_purge);
+	assert(!hpdata->h_in_psset);
 	/* Should have dehugified already (if necessary). */
 	assert(!hpdata->h_huge);
 	assert(!hpdata->h_mid_hugify);
@@ -210,6 +215,7 @@ hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
 void
 hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 	hpdata_assert_consistent(hpdata);
+	assert(!hpdata->h_in_psset);
 	assert(hpdata->h_mid_purge);
 	assert(!hpdata->h_mid_hugify);
 	hpdata->h_mid_purge = false;
@@ -230,6 +236,7 @@ hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 void
 hpdata_hugify_begin(hpdata_t *hpdata) {
 	hpdata_assert_consistent(hpdata);
+	assert(!hpdata_in_psset_get(hpdata));
 	assert(!hpdata->h_mid_purge);
 	assert(!hpdata->h_mid_hugify);
 	hpdata->h_mid_hugify = true;
@@ -242,6 +249,11 @@ hpdata_hugify_begin(hpdata_t *hpdata) {
 void
 hpdata_hugify_end(hpdata_t *hpdata) {
 	hpdata_assert_consistent(hpdata);
+	/*
+	 * This is the exception to the "no metadata tweaks while in the psset"
+	 * rule.
+	 */
+	/* assert(!hpdata_in_psset_get(hpdata)); */
 	assert(!hpdata->h_mid_purge);
 	assert(hpdata->h_mid_hugify);
 	hpdata->h_mid_hugify = false;
@@ -251,30 +263,9 @@ hpdata_hugify_end(hpdata_t *hpdata) {
 void
 hpdata_dehugify(hpdata_t *hpdata) {
 	hpdata_assert_consistent(hpdata);
-	/*
-	 * These asserts are morally right; for now, though, we have the "purge a
-	 * hugepage only in its entirety, when it becomes empty", path sharing
-	 * hpdata_dehugify with the new purge pathway coming in the next
-	 * commit.
-	 */
-	/*
+	assert(!hpdata_in_psset_get(hpdata));
 	assert(hpdata->h_mid_purge);
 	assert(!hpdata->h_mid_hugify);
-	*/
 	hpdata->h_huge = false;
 	hpdata_assert_consistent(hpdata);
 }
-
-void
-hpdata_purge(hpdata_t *hpdata) {
-	hpdata_assert_consistent(hpdata);
-	/*
-	 * The hpdata must be empty; we don't (yet) support partial purges of
-	 * hugepages.
-	 */
-	assert(hpdata->h_nactive == 0);
-	fb_unset_range(hpdata->dirty_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES);
-	fb_init(hpdata->dirty_pages, HUGEPAGE_PAGES);
-	hpdata->h_ndirty = 0;
-	hpdata_assert_consistent(hpdata);
-}
diff --git a/src/psset.c b/src/psset.c
index 9fcdac22..688cd620 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -92,6 +92,8 @@ void
 psset_insert(psset_t *psset, hpdata_t *ps) {
 	assert(!hpdata_empty(ps));
 	hpdata_assert_consistent(ps);
+	assert(!hpdata_in_psset_get(ps));
+	hpdata_in_psset_set(ps, true);
 	size_t longest_free_range = hpdata_longest_free_range_get(ps);
 
 	if (longest_free_range == 0) {
@@ -116,6 +118,9 @@ psset_insert(psset_t *psset, hpdata_t *ps) {
 void
 psset_remove(psset_t *psset, hpdata_t *ps) {
 	hpdata_assert_consistent(ps);
+	assert(hpdata_in_psset_get(ps));
+	hpdata_in_psset_set(ps, false);
+
 	size_t longest_free_range = hpdata_longest_free_range_get(ps);
 
 	if (longest_free_range == 0) {
diff --git a/test/unit/hpdata.c b/test/unit/hpdata.c
index 2fd9a367..aa4506f7 100644
--- a/test/unit/hpdata.c
+++ b/test/unit/hpdata.c
@@ -169,6 +169,7 @@ TEST_BEGIN(test_hugify) {
 	expect_false(hpdata_changing_state_get(&hpdata), "");
 	hpdata_hugify_begin(&hpdata);
 	expect_true(hpdata_changing_state_get(&hpdata), "");
+
 	hpdata_hugify_end(&hpdata);
 	expect_false(hpdata_changing_state_get(&hpdata), "");
 

From 746ea3de6f0c372aebb4d7d56172eb2614c83d2d Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 3 Dec 2020 15:35:38 -0800
Subject: [PATCH 1998/2608] HPA stats: Allow some derived stats.

However, we put them in their own struct, to avoid the messiness that the arena
has (mixing derived and non-derived stats in the arena_stats_t).
---
 include/jemalloc/internal/hpa.h | 22 +++++++++++++++-------
 src/ctl.c                       |  2 +-
 src/hpa.c                       | 19 +++++++++++++------
 3 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 217604e7..8dc9b3c0 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -6,12 +6,22 @@
 #include "jemalloc/internal/pai.h"
 #include "jemalloc/internal/psset.h"
 
+typedef struct hpa_shard_nonderived_stats_s hpa_shard_nonderived_stats_t;
+struct hpa_shard_nonderived_stats_s {
+	/*
+	 * The number of times we've purged a hugepage.  Each eviction purges a
+	 * single hugepage.
+	 *
+	 * Guarded by the grow mutex.
+	 */
+	uint64_t nevictions;
+};
+
 /* Completely derived; only used by CTL. */
 typedef struct hpa_shard_stats_s hpa_shard_stats_t;
 struct hpa_shard_stats_s {
 	psset_stats_t psset_stats;
-	/* The stat version of the nevictions counter. */
-	uint64_t nevictions;
+	hpa_shard_nonderived_stats_t nonderived_stats;
 };
 
 typedef struct hpa_shard_s hpa_shard_t;
@@ -73,12 +83,10 @@ struct hpa_shard_s {
 	emap_t *emap;
 
 	/*
-	 * The number of times we've purged a hugepage.  Each eviction purges a
-	 * single hugepage.
-	 *
-	 * Guarded by the grow mutex.
+	 * Those stats which are copied directly into the CTL-centric hpa shard
+	 * stats.
 	 */
-	uint64_t nevictions;
+	hpa_shard_nonderived_stats_t stats;
 };
 
 /*
diff --git a/src/ctl.c b/src/ctl.c
index 324925d7..7e30b750 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3506,7 +3506,7 @@ stats_arenas_i_extents_j_index(tsdn_t *tsdn, const size_t *mib,
 }
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nevictions,
-    arenas_i(mib[2])->astats->hpastats.nevictions, uint64_t);
+    arenas_i(mib[2])->astats->hpastats.nonderived_stats.nevictions, uint64_t);
 
 /* Full, huge */
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge,
diff --git a/src/hpa.c b/src/hpa.c
index 99594549..d1a5431d 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -74,7 +74,7 @@ hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
 	shard->eden_len = 0;
 	shard->ind = ind;
 	shard->emap = emap;
-	shard->nevictions = 0;
+	shard->stats.nevictions = 0;
 
 	/*
 	 * Fill these in last, so that if an hpa_shard gets used despite
@@ -95,10 +95,17 @@ hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
  * only combines the stats from one stats objet to another.  Hence the lack of
  * locking here.
  */
+static void
+hpa_shard_nonderived_stats_accum(hpa_shard_nonderived_stats_t *dst,
+    hpa_shard_nonderived_stats_t *src) {
+	dst->nevictions += src->nevictions;
+}
+
 void
 hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src) {
 	psset_stats_accum(&dst->psset_stats, &src->psset_stats);
-	dst->nevictions += src->nevictions;
+	hpa_shard_nonderived_stats_accum(&dst->nonderived_stats,
+	    &src->nonderived_stats);
 }
 
 void
@@ -107,7 +114,7 @@ hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard,
 	malloc_mutex_lock(tsdn, &shard->grow_mtx);
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	psset_stats_accum(&dst->psset_stats, &shard->psset.stats);
-	dst->nevictions += shard->nevictions;
+	hpa_shard_nonderived_stats_accum(&dst->nonderived_stats, &shard->stats);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 	malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 }
@@ -290,7 +297,7 @@ hpa_handle_ps_eviction(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
 	malloc_mutex_assert_not_owner(tsdn, &shard->grow_mtx);
 
 	malloc_mutex_lock(tsdn, &shard->grow_mtx);
-	shard->nevictions++;
+	shard->stats.nevictions++;
 	hpdata_list_prepend(&shard->unused_slabs, ps);
 	malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 }
@@ -431,7 +438,7 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	edata = edata_cache_small_get(tsdn, &shard->ecs);
 	if (edata == NULL) {
-		shard->nevictions++;
+		shard->stats.nevictions++;
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 		hpa_handle_ps_eviction(tsdn, shard, ps);
@@ -452,7 +459,7 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 
 		edata_cache_small_put(tsdn, &shard->ecs, edata);
 
-		shard->nevictions++;
+		shard->stats.nevictions++;
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 

From b25ee5d88e07adcb3c085c19654039bb6b32dcf4 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 3 Dec 2020 16:09:50 -0800
Subject: [PATCH 1999/2608] HPA: Add purge stats.

---
 include/jemalloc/internal/hpa.h | 34 ++++++++++++++++++++++++++++---
 src/ctl.c                       | 19 ++++++++++++++++-
 src/hpa.c                       | 15 ++++++++++++++
 src/stats.c                     | 36 ++++++++++++++++++++++++++++++---
 4 files changed, 97 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 8dc9b3c0..bea88c37 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -9,12 +9,40 @@
 typedef struct hpa_shard_nonderived_stats_s hpa_shard_nonderived_stats_t;
 struct hpa_shard_nonderived_stats_s {
 	/*
-	 * The number of times we've purged a hugepage.  Each eviction purges a
-	 * single hugepage.
+	 * The number of times we've fully purged a hugepage and evicted it from
+	 * the psset.
 	 *
-	 * Guarded by the grow mutex.
+	 * Guarded by grow_mtx.
 	 */
 	uint64_t nevictions;
+
+	/*
+	 * The number of times we've purged within a hugepage.
+	 *
+	 * Guarded by mtx.
+	 */
+	uint64_t npurge_passes;
+	/*
+	 * The number of individual purge calls we perform (which should always
+	 * be bigger than npurge_passes, since each pass purges at least one
+	 * extent within a hugepage.
+	 *
+	 * Guarded by mtx.
+	 */
+	uint64_t npurges;
+
+	/*
+	 * The number of times we've hugified a pageslab.
+	 *
+	 * Guarded by mtx.
+	 */
+	uint64_t nhugifies;
+	/*
+	 * The number of times we've dehugified a pageslab.
+	 *
+	 * Guarded by mtx.
+	 */
+	uint64_t ndehugifies;
 };
 
 /* Completely derived; only used by CTL. */
diff --git a/src/ctl.c b/src/ctl.c
index 7e30b750..8871fd15 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -227,6 +227,10 @@ CTL_PROTO(stats_arenas_i_extents_j_muzzy_bytes)
 CTL_PROTO(stats_arenas_i_extents_j_retained_bytes)
 INDEX_PROTO(stats_arenas_i_extents_j)
 CTL_PROTO(stats_arenas_i_hpa_shard_nevictions)
+CTL_PROTO(stats_arenas_i_hpa_shard_npurge_passes)
+CTL_PROTO(stats_arenas_i_hpa_shard_npurges)
+CTL_PROTO(stats_arenas_i_hpa_shard_nhugifies)
+CTL_PROTO(stats_arenas_i_hpa_shard_ndehugifies)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_nactive_huge)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_ninactive_huge)
@@ -695,7 +699,12 @@ static const ctl_named_node_t stats_arenas_i_hpa_shard_node[] = {
 	    stats_arenas_i_hpa_shard_full_slabs)},
 	{NAME("nonfull_slabs"),	CHILD(indexed,
 	    stats_arenas_i_hpa_shard_nonfull_slabs)},
-	{NAME("nevictions"),	CTL(stats_arenas_i_hpa_shard_nevictions)}
+
+	{NAME("nevictions"),	CTL(stats_arenas_i_hpa_shard_nevictions)},
+	{NAME("npurge_passes"),	CTL(stats_arenas_i_hpa_shard_npurge_passes)},
+	{NAME("npurges"),	CTL(stats_arenas_i_hpa_shard_npurges)},
+	{NAME("nhugifies"),	CTL(stats_arenas_i_hpa_shard_nhugifies)},
+	{NAME("ndehugifies"),	CTL(stats_arenas_i_hpa_shard_ndehugifies)}
 };
 
 static const ctl_named_node_t stats_arenas_i_node[] = {
@@ -3507,6 +3516,14 @@ stats_arenas_i_extents_j_index(tsdn_t *tsdn, const size_t *mib,
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nevictions,
     arenas_i(mib[2])->astats->hpastats.nonderived_stats.nevictions, uint64_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_npurge_passes,
+    arenas_i(mib[2])->astats->hpastats.nonderived_stats.npurge_passes, uint64_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_npurges,
+    arenas_i(mib[2])->astats->hpastats.nonderived_stats.npurges, uint64_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nhugifies,
+    arenas_i(mib[2])->astats->hpastats.nonderived_stats.nhugifies, uint64_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_ndehugifies,
+    arenas_i(mib[2])->astats->hpastats.nonderived_stats.ndehugifies, uint64_t);
 
 /* Full, huge */
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge,
diff --git a/src/hpa.c b/src/hpa.c
index d1a5431d..5230f6ba 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -74,7 +74,12 @@ hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
 	shard->eden_len = 0;
 	shard->ind = ind;
 	shard->emap = emap;
+
 	shard->stats.nevictions = 0;
+	shard->stats.npurge_passes = 0;
+	shard->stats.npurges = 0;
+	shard->stats.nhugifies = 0;
+	shard->stats.ndehugifies = 0;
 
 	/*
 	 * Fill these in last, so that if an hpa_shard gets used despite
@@ -99,6 +104,10 @@ static void
 hpa_shard_nonderived_stats_accum(hpa_shard_nonderived_stats_t *dst,
     hpa_shard_nonderived_stats_t *src) {
 	dst->nevictions += src->nevictions;
+	dst->npurge_passes += src->npurge_passes;
+	dst->npurges += src->npurges;
+	dst->nhugifies += src->nhugifies;
+	dst->ndehugifies += src->ndehugifies;
 }
 
 void
@@ -237,6 +246,7 @@ hpa_purge(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
 		/* Do the metadata update bit while holding the lock. */
 		hpdata_purge_state_t purge_state;
 		hpdata_purge_begin(ps, &purge_state);
+		shard->stats.npurge_passes++;
 
 		/*
 		 * Dehugifying can only happen on the first loop iteration,
@@ -247,6 +257,7 @@ hpa_purge(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
 		bool needs_dehugify = false;
 		if (hpdata_huge_get(ps)) {
 			needs_dehugify = true;
+			shard->stats.ndehugifies++;
 			hpdata_dehugify(ps);
 		}
 
@@ -258,16 +269,19 @@ hpa_purge(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
 		}
 
 		size_t total_purged = 0;
+		uint64_t purges_this_pass = 0;
 		void *purge_addr;
 		size_t purge_size;
 		while (hpdata_purge_next(ps, &purge_state, &purge_addr,
 		    &purge_size)) {
+			purges_this_pass++;
 			pages_purge_forced(purge_addr, purge_size);
 			total_purged += purge_size;
 		}
 
 		/* Reacquire to finish our metadata update. */
 		malloc_mutex_lock(tsdn, &shard->mtx);
+		shard->stats.npurges += purges_this_pass;
 		hpdata_purge_end(ps, &purge_state);
 
 		assert(total_purged <= HUGEPAGE);
@@ -357,6 +371,7 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 	bool hugify = hpa_should_hugify(shard, ps);
 	if (hugify) {
 		hpdata_hugify_begin(ps);
+		shard->stats.nhugifies++;
 	}
 	psset_insert(&shard->psset, ps);
 
diff --git a/src/stats.c b/src/stats.c
index 86a2c01a..1b51c8b7 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -791,6 +791,21 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	emitter_row_init(&row);
 
 	uint64_t nevictions;
+	uint64_t npurge_passes;
+	uint64_t npurges;
+	uint64_t nhugifies;
+	uint64_t ndehugifies;
+
+	CTL_M2_GET("stats.arenas.0.hpa_shard.nevictions",
+	    i, &nevictions, uint64_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.npurge_passes",
+	    i, &npurge_passes, uint64_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.npurges",
+	    i, &npurges, uint64_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.nhugifies",
+	    i, &nhugifies, uint64_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.ndehugifies",
+	    i, &ndehugifies, uint64_t);
 
 	size_t npageslabs_huge;
 	size_t nactive_huge;
@@ -800,9 +815,6 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	size_t nactive_nonhuge;
 	size_t ninactive_nonhuge;
 
-	CTL_M2_GET("stats.arenas.0.hpa_shard.nevictions",
-	    i, &nevictions, uint64_t);
-
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs_huge",
 	    i, &npageslabs_huge, size_t);
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive_huge",
@@ -825,17 +837,35 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	emitter_table_printf(emitter,
 	    "HPA shard stats:\n"
 	    "  Evictions: %" FMTu64 " (%" FMTu64 " / sec)\n"
+	    "  Purge passes: %" FMTu64 " (%" FMTu64 " / sec)\n"
+	    "  Purges: %" FMTu64 " (%" FMTu64 " / sec)\n"
+	    "  Hugeifies: %" FMTu64 " (%" FMTu64 " / sec)\n"
+	    "  Dehugifies: %" FMTu64 " (%" FMTu64 " / sec)\n"
+	    "\n"
 	    "  In full slabs:\n"
 	    "      npageslabs: %zu huge, %zu nonhuge\n"
 	    "      nactive: %zu huge, %zu nonhuge \n"
 	    "      ninactive: %zu huge, %zu nonhuge \n",
 	    nevictions, rate_per_second(nevictions, uptime),
+	    npurge_passes, rate_per_second(npurge_passes, uptime),
+	    npurges, rate_per_second(npurges, uptime),
+	    nhugifies, rate_per_second(nhugifies, uptime),
+	    ndehugifies, rate_per_second(ndehugifies, uptime),
 	    npageslabs_huge, npageslabs_nonhuge,
 	    nactive_huge, nactive_nonhuge,
 	    ninactive_huge, ninactive_nonhuge);
 	emitter_json_object_kv_begin(emitter, "hpa_shard");
 	emitter_json_kv(emitter, "nevictions", emitter_type_uint64,
 	    &nevictions);
+	emitter_json_kv(emitter, "npurge_passes", emitter_type_uint64,
+	    &npurge_passes);
+	emitter_json_kv(emitter, "npurges", emitter_type_uint64,
+	    &npurges);
+	emitter_json_kv(emitter, "nhugifies", emitter_type_uint64,
+	    &nhugifies);
+	emitter_json_kv(emitter, "ndehugifies", emitter_type_uint64,
+	    &ndehugifies);
+
 	emitter_json_object_kv_begin(emitter, "full_slabs");
 	emitter_json_kv(emitter, "npageslabs_huge", emitter_type_size,
 	    &npageslabs_huge);

From 94cd9444c5eecdeea871f008a1e2d805d48dfe5d Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 3 Dec 2020 18:02:23 -0800
Subject: [PATCH 2000/2608] HPA: Some minor reformattings.

---
 include/jemalloc/internal/hpdata.h |  9 ++++-----
 src/hpa.c                          | 12 ++++++++----
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 66473d2e..12a72a66 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -77,6 +77,10 @@ struct hpdata_s {
 	fb_group_t dirty_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
 };
 
+TYPED_LIST(hpdata_list, hpdata_t, ql_link)
+typedef ph(hpdata_t) hpdata_age_heap_t;
+ph_proto(, hpdata_age_heap_, hpdata_age_heap_t, hpdata_t);
+
 static inline void *
 hpdata_addr_get(const hpdata_t *hpdata) {
 	return hpdata->h_address;
@@ -188,11 +192,6 @@ hpdata_assert_consistent(hpdata_t *hpdata) {
 	assert(hpdata_consistent(hpdata));
 }
 
-TYPED_LIST(hpdata_list, hpdata_t, ql_link)
-
-typedef ph(hpdata_t) hpdata_age_heap_t;
-ph_proto(, hpdata_age_heap_, hpdata_age_heap_t, hpdata_t);
-
 static inline bool
 hpdata_empty(hpdata_t *hpdata) {
 	return hpdata->h_nactive == 0;
diff --git a/src/hpa.c b/src/hpa.c
index 5230f6ba..4069c1ea 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -135,16 +135,20 @@ hpa_alloc_ps(tsdn_t *tsdn, hpa_shard_t *shard) {
 }
 
 static bool
-hpa_should_hugify(hpa_shard_t *shard, hpdata_t *ps) {
+hpa_good_hugification_candidate(hpa_shard_t *shard, hpdata_t *ps) {
 	/*
 	 * For now, just use a static check; hugify a page if it's <= 5%
 	 * inactive.  Eventually, this should be a malloc conf option.
 	 */
-	if (hpdata_changing_state_get(ps)) {
+	return hpdata_nactive_get(ps) >= (HUGEPAGE_PAGES) * 95 / 100;
+}
+
+static bool
+hpa_should_hugify(hpa_shard_t *shard, hpdata_t *ps) {
+	if (hpdata_changing_state_get(ps) || hpdata_huge_get(ps)) {
 		return false;
 	}
-	return !hpdata_huge_get(ps)
-	    && hpdata_nactive_get(ps) >= (HUGEPAGE_PAGES) * 95 / 100;
+	return hpa_good_hugification_candidate(shard, ps);
 }
 
 /*

From 55e0f60ca1c154659b56ec90a85c8b53b580361e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 3 Dec 2020 18:32:42 -0800
Subject: [PATCH 2001/2608] psset stats: Simplify handling.

We can treat the huge and nonhuge cases uniformly using huge state as an array
index.
---
 include/jemalloc/internal/psset.h | 21 +++++++------
 src/ctl.c                         | 49 ++++++++++++++++---------------
 src/hpa.c                         | 19 ++++++------
 src/psset.c                       | 49 +++++++++++++------------------
 test/unit/psset.c                 | 22 +++++++-------
 5 files changed, 77 insertions(+), 83 deletions(-)

diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index 7027cff7..d8189666 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -26,25 +26,28 @@
 typedef struct psset_bin_stats_s psset_bin_stats_t;
 struct psset_bin_stats_s {
 	/* How many pageslabs are in this bin? */
-	size_t npageslabs_huge;
-	size_t npageslabs_nonhuge;
+	size_t npageslabs;
 	/* Of them, how many pages are active? */
-	size_t nactive_huge;
-	size_t nactive_nonhuge;
+	size_t nactive;
 	/* How many are inactive? */
-	size_t ninactive_huge;
-	size_t ninactive_nonhuge;
+	size_t ninactive;
 };
 
-/* Used only by CTL; not actually stored here (i.e., all derived). */
 typedef struct psset_stats_s psset_stats_t;
 struct psset_stats_s {
+
+	/*
+	 * The second index is huge stats; nonfull_slabs[pszind][0] contains
+	 * stats for the non-huge slabs in bucket pszind, while
+	 * nonfull_slabs[pszind][1] contains stats for the huge slabs.
+	 */
+	psset_bin_stats_t nonfull_slabs[PSSET_NPSIZES][2];
+
 	/*
 	 * Full slabs don't live in any edata heap.  But we still track their
 	 * stats.
 	 */
-	psset_bin_stats_t full_slabs;
-	psset_bin_stats_t nonfull_slabs[PSSET_NPSIZES];
+	psset_bin_stats_t full_slabs[2];
 };
 
 typedef struct psset_s psset_t;
diff --git a/src/ctl.c b/src/ctl.c
index 8871fd15..516add4e 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3525,46 +3525,47 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nhugifies,
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_ndehugifies,
     arenas_i(mib[2])->astats->hpastats.nonderived_stats.ndehugifies, uint64_t);
 
-/* Full, huge */
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.npageslabs_huge,
-    size_t);
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_nactive_huge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.nactive_huge, size_t);
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_ninactive_huge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.ninactive_huge, size_t);
-
 /* Full, nonhuge */
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.npageslabs_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[0].npageslabs,
     size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.nactive_nonhuge, size_t);
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[0].nactive, size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_ninactive_nonhuge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs.ninactive_nonhuge, size_t);
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[0].ninactive, size_t);
 
-/* Nonfull, huge */
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].npageslabs_huge,
-    size_t);
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].nactive_huge,
-    size_t);
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_huge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].ninactive_huge,
+/* Full, huge */
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[1].npageslabs,
     size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_nactive_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[1].nactive, size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_ninactive_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[1].ninactive, size_t);
 
 /* Nonfull, nonhuge */
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].npageslabs_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][0].npageslabs,
     size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].nactive_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][0].nactive,
     size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_nonhuge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]].ninactive_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][0].ninactive,
     size_t);
 
+/* Nonfull, huge */
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][1].npageslabs,
+    size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][1].nactive,
+    size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][1].ninactive,
+    size_t);
+
+
 static const ctl_named_node_t *
 stats_arenas_i_hpa_shard_nonfull_slabs_j_index(tsdn_t *tsdn, const size_t *mib,
     size_t miblen, size_t j) {
diff --git a/src/hpa.c b/src/hpa.c
index 4069c1ea..a206cffe 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -662,12 +662,9 @@ hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard) {
 
 static void
 hpa_shard_assert_stats_empty(psset_bin_stats_t *bin_stats) {
-	assert(bin_stats->npageslabs_huge == 0);
-	assert(bin_stats->nactive_huge == 0);
-	assert(bin_stats->ninactive_huge == 0);
-	assert(bin_stats->npageslabs_nonhuge == 0);
-	assert(bin_stats->nactive_nonhuge == 0);
-	assert(bin_stats->ninactive_nonhuge == 0);
+	assert(bin_stats->npageslabs == 0);
+	assert(bin_stats->nactive == 0);
+	assert(bin_stats->ninactive == 0);
 }
 
 static void
@@ -675,10 +672,12 @@ hpa_assert_empty(tsdn_t *tsdn, hpa_shard_t *shard, psset_t *psset) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	hpdata_t *ps = psset_fit(psset, PAGE);
 	assert(ps == NULL);
-	hpa_shard_assert_stats_empty(&psset->stats.full_slabs);
-	for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
-		hpa_shard_assert_stats_empty(
-		    &psset->stats.nonfull_slabs[i]);
+	for (int huge = 0; huge <= 1; huge++) {
+		hpa_shard_assert_stats_empty(&psset->stats.full_slabs[huge]);
+		for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
+			hpa_shard_assert_stats_empty(
+			    &psset->stats.nonfull_slabs[i][huge]);
+		}
 	}
 }
 
diff --git a/src/psset.c b/src/psset.c
index 688cd620..a91653f4 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -19,21 +19,20 @@ psset_init(psset_t *psset) {
 
 static void
 psset_bin_stats_accum(psset_bin_stats_t *dst, psset_bin_stats_t *src) {
-	dst->npageslabs_huge += src->npageslabs_huge;
-	dst->nactive_huge += src->nactive_huge;
-	dst->ninactive_huge += src->ninactive_huge;
-
-	dst->npageslabs_nonhuge += src->npageslabs_nonhuge;
-	dst->nactive_nonhuge += src->nactive_nonhuge;
-	dst->ninactive_nonhuge += src->ninactive_nonhuge;
+	dst->npageslabs += src->npageslabs;
+	dst->nactive += src->nactive;
+	dst->ninactive += src->ninactive;
 }
 
 void
 psset_stats_accum(psset_stats_t *dst, psset_stats_t *src) {
-	psset_bin_stats_accum(&dst->full_slabs, &src->full_slabs);
+	psset_bin_stats_accum(&dst->full_slabs[0], &src->full_slabs[0]);
+	psset_bin_stats_accum(&dst->full_slabs[1], &src->full_slabs[1]);
 	for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
-		psset_bin_stats_accum(&dst->nonfull_slabs[i],
-		    &src->nonfull_slabs[i]);
+		psset_bin_stats_accum(&dst->nonfull_slabs[i][0],
+		    &src->nonfull_slabs[i][0]);
+		psset_bin_stats_accum(&dst->nonfull_slabs[i][1],
+		    &src->nonfull_slabs[i][1]);
 	}
 }
 
@@ -50,42 +49,34 @@ psset_stats_accum(psset_stats_t *dst, psset_stats_t *src) {
 JEMALLOC_ALWAYS_INLINE void
 psset_bin_stats_insert_remove(psset_bin_stats_t *binstats, hpdata_t *ps,
     bool insert) {
-	size_t *npageslabs_dst = hpdata_huge_get(ps)
-	    ? &binstats->npageslabs_huge : &binstats->npageslabs_nonhuge;
-	size_t *nactive_dst = hpdata_huge_get(ps)
-	    ? &binstats->nactive_huge : &binstats->nactive_nonhuge;
-	size_t *ninactive_dst = hpdata_huge_get(ps)
-	    ? &binstats->ninactive_huge : &binstats->ninactive_nonhuge;
-
-	size_t nactive = hpdata_nactive_get(ps);
-	size_t ninactive = HUGEPAGE_PAGES - nactive;
-
 	size_t mul = insert ? (size_t)1 : (size_t)-1;
-	*npageslabs_dst += mul * 1;
-	*nactive_dst += mul * nactive;
-	*ninactive_dst += mul * ninactive;
+	size_t huge_idx = (size_t)hpdata_huge_get(ps);
+	binstats[huge_idx].npageslabs += mul * 1;
+	size_t nactive = hpdata_nactive_get(ps);
+	binstats[huge_idx].nactive += mul * nactive;
+	binstats[huge_idx].ninactive += mul * (HUGEPAGE_PAGES - nactive);
 }
 
 static void
 psset_bin_stats_insert(psset_bin_stats_t *binstats, hpdata_t *ps) {
-	psset_bin_stats_insert_remove(binstats, ps, /* insert */ true);
+	psset_bin_stats_insert_remove(binstats, ps, true);
 }
 
 static void
 psset_bin_stats_remove(psset_bin_stats_t *binstats, hpdata_t *ps) {
-	psset_bin_stats_insert_remove(binstats, ps, /* insert */ false);
+	psset_bin_stats_insert_remove(binstats, ps, false);
 }
 
 static void
 psset_hpdata_heap_remove(psset_t *psset, pszind_t pind, hpdata_t *ps) {
 	hpdata_age_heap_remove(&psset->pageslabs[pind], ps);
-	psset_bin_stats_remove(&psset->stats.nonfull_slabs[pind], ps);
+	psset_bin_stats_remove(psset->stats.nonfull_slabs[pind], ps);
 }
 
 static void
 psset_hpdata_heap_insert(psset_t *psset, pszind_t pind, hpdata_t *ps) {
 	hpdata_age_heap_insert(&psset->pageslabs[pind], ps);
-	psset_bin_stats_insert(&psset->stats.nonfull_slabs[pind], ps);
+	psset_bin_stats_insert(psset->stats.nonfull_slabs[pind], ps);
 }
 
 void
@@ -101,7 +92,7 @@ psset_insert(psset_t *psset, hpdata_t *ps) {
 		 * We don't ned to track full slabs; just pretend to for stats
 		 * purposes.  See the comment at psset_bin_stats_adjust.
 		 */
-		psset_bin_stats_insert(&psset->stats.full_slabs, ps);
+		psset_bin_stats_insert(psset->stats.full_slabs, ps);
 		return;
 	}
 
@@ -124,7 +115,7 @@ psset_remove(psset_t *psset, hpdata_t *ps) {
 	size_t longest_free_range = hpdata_longest_free_range_get(ps);
 
 	if (longest_free_range == 0) {
-		psset_bin_stats_remove(&psset->stats.full_slabs, ps);
+		psset_bin_stats_remove(psset->stats.full_slabs, ps);
 		return;
 	}
 
diff --git a/test/unit/psset.c b/test/unit/psset.c
index 6f35fa8d..020a8325 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -321,26 +321,26 @@ TEST_END
 
 static void
 stats_expect_empty(psset_bin_stats_t *stats) {
-	assert_zu_eq(0, stats->npageslabs_nonhuge,
+	assert_zu_eq(0, stats->npageslabs,
 	    "Supposedly empty bin had positive npageslabs");
-	expect_zu_eq(0, stats->nactive_nonhuge, "Unexpected nonempty bin"
+	expect_zu_eq(0, stats->nactive, "Unexpected nonempty bin"
 	    "Supposedly empty bin had positive nactive");
-	expect_zu_eq(0, stats->ninactive_nonhuge, "Unexpected nonempty bin"
+	expect_zu_eq(0, stats->ninactive, "Unexpected nonempty bin"
 	    "Supposedly empty bin had positive ninactive");
 }
 
 static void
 stats_expect(psset_t *psset, size_t nactive) {
 	if (nactive == HUGEPAGE_PAGES) {
-		expect_zu_eq(1, psset->stats.full_slabs.npageslabs_nonhuge,
+		expect_zu_eq(1, psset->stats.full_slabs[0].npageslabs,
 		    "Expected a full slab");
 		expect_zu_eq(HUGEPAGE_PAGES,
-		    psset->stats.full_slabs.nactive_nonhuge,
+		    psset->stats.full_slabs[0].nactive,
 		    "Should have exactly filled the bin");
-		expect_zu_eq(0, psset->stats.full_slabs.ninactive_nonhuge,
+		expect_zu_eq(0, psset->stats.full_slabs[0].ninactive,
 		    "Should never have inactive pages in a full slab");
 	} else {
-		stats_expect_empty(&psset->stats.full_slabs);
+		stats_expect_empty(&psset->stats.full_slabs[0]);
 	}
 	size_t ninactive = HUGEPAGE_PAGES - nactive;
 	pszind_t nonempty_pind = PSSET_NPSIZES;
@@ -351,16 +351,16 @@ stats_expect(psset_t *psset, size_t nactive) {
 	for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
 		if (i == nonempty_pind) {
 			assert_zu_eq(1,
-			    psset->stats.nonfull_slabs[i].npageslabs_nonhuge,
+			    psset->stats.nonfull_slabs[i][0].npageslabs,
 			    "Should have found a slab");
 			expect_zu_eq(nactive,
-			    psset->stats.nonfull_slabs[i].nactive_nonhuge,
+			    psset->stats.nonfull_slabs[i][0].nactive,
 			    "Mismatch in active pages");
 			expect_zu_eq(ninactive,
-			    psset->stats.nonfull_slabs[i].ninactive_nonhuge,
+			    psset->stats.nonfull_slabs[i][0].ninactive,
 			    "Mismatch in inactive pages");
 		} else {
-			stats_expect_empty(&psset->stats.nonfull_slabs[i]);
+			stats_expect_empty(&psset->stats.nonfull_slabs[i][0]);
 		}
 	}
 }

From be0d7a53f3ca361d68f9a820157e9af49c989398 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 3 Dec 2020 18:43:10 -0800
Subject: [PATCH 2002/2608] HPA: Don't track inactive pages.

This is really only useful for human consumption.  Correspondingly, emit it only
in the human-readable stats, and let everybody else compute from the hugepage
size and nactive.
---
 include/jemalloc/internal/psset.h |  2 --
 src/ctl.c                         | 27 ++-------------------------
 src/hpa.c                         |  1 -
 src/psset.c                       |  2 --
 src/stats.c                       | 23 +++++++----------------
 test/unit/psset.c                 |  7 -------
 6 files changed, 9 insertions(+), 53 deletions(-)

diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index d8189666..3320d4e5 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -29,8 +29,6 @@ struct psset_bin_stats_s {
 	size_t npageslabs;
 	/* Of them, how many pages are active? */
 	size_t nactive;
-	/* How many are inactive? */
-	size_t ninactive;
 };
 
 typedef struct psset_stats_s psset_stats_t;
diff --git a/src/ctl.c b/src/ctl.c
index 516add4e..aa878583 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -233,16 +233,12 @@ CTL_PROTO(stats_arenas_i_hpa_shard_nhugifies)
 CTL_PROTO(stats_arenas_i_hpa_shard_ndehugifies)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_nactive_huge)
-CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_ninactive_huge)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge)
-CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_ninactive_nonhuge)
 CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge)
 CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge)
 CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge)
 CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge)
-CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_huge)
-CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_nonhuge)
 INDEX_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j)
 CTL_PROTO(stats_arenas_i_nthreads)
 CTL_PROTO(stats_arenas_i_uptime)
@@ -659,14 +655,10 @@ static const ctl_named_node_t stats_arenas_i_hpa_shard_full_slabs_node[] = {
 		CTL(stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge)},
 	{NAME("nactive_huge"),
 		CTL(stats_arenas_i_hpa_shard_full_slabs_nactive_huge)},
-	{NAME("ninactive_huge"),
-		CTL(stats_arenas_i_hpa_shard_full_slabs_ninactive_huge)},
 	{NAME("npageslabs_nonhuge"),
 		CTL(stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge)},
 	{NAME("nactive_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge)},
-	{NAME("ninactive_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_full_slabs_ninactive_nonhuge)},
+		CTL(stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge)}
 };
 
 static const ctl_named_node_t stats_arenas_i_hpa_shard_nonfull_slabs_j_node[] = {
@@ -674,14 +666,10 @@ static const ctl_named_node_t stats_arenas_i_hpa_shard_nonfull_slabs_j_node[] =
 		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge)},
 	{NAME("nactive_huge"),
 		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge)},
-	{NAME("ninactive_huge"),
-		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_huge)},
 	{NAME("npageslabs_nonhuge"),
 		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge)},
 	{NAME("nactive_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge)},
-	{NAME("ninactive_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_nonhuge)}
+		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge)}
 };
 
 static const ctl_named_node_t super_stats_arenas_i_hpa_shard_nonfull_slabs_j_node[] = {
@@ -3531,8 +3519,6 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge
     size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge,
     arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[0].nactive, size_t);
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_ninactive_nonhuge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[0].ninactive, size_t);
 
 /* Full, huge */
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge,
@@ -3540,8 +3526,6 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge,
     size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_nactive_huge,
     arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[1].nactive, size_t);
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_ninactive_huge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[1].ninactive, size_t);
 
 /* Nonfull, nonhuge */
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge,
@@ -3550,9 +3534,6 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_no
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge,
     arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][0].nactive,
     size_t);
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_nonhuge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][0].ninactive,
-    size_t);
 
 /* Nonfull, huge */
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge,
@@ -3561,10 +3542,6 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_hu
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge,
     arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][1].nactive,
     size_t);
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_ninactive_huge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][1].ninactive,
-    size_t);
-
 
 static const ctl_named_node_t *
 stats_arenas_i_hpa_shard_nonfull_slabs_j_index(tsdn_t *tsdn, const size_t *mib,
diff --git a/src/hpa.c b/src/hpa.c
index a206cffe..4397c9d6 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -664,7 +664,6 @@ static void
 hpa_shard_assert_stats_empty(psset_bin_stats_t *bin_stats) {
 	assert(bin_stats->npageslabs == 0);
 	assert(bin_stats->nactive == 0);
-	assert(bin_stats->ninactive == 0);
 }
 
 static void
diff --git a/src/psset.c b/src/psset.c
index a91653f4..e8d847a3 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -21,7 +21,6 @@ static void
 psset_bin_stats_accum(psset_bin_stats_t *dst, psset_bin_stats_t *src) {
 	dst->npageslabs += src->npageslabs;
 	dst->nactive += src->nactive;
-	dst->ninactive += src->ninactive;
 }
 
 void
@@ -54,7 +53,6 @@ psset_bin_stats_insert_remove(psset_bin_stats_t *binstats, hpdata_t *ps,
 	binstats[huge_idx].npageslabs += mul * 1;
 	size_t nactive = hpdata_nactive_get(ps);
 	binstats[huge_idx].nactive += mul * nactive;
-	binstats[huge_idx].ninactive += mul * (HUGEPAGE_PAGES - nactive);
 }
 
 static void
diff --git a/src/stats.c b/src/stats.c
index 1b51c8b7..a8d3ffe8 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -819,15 +819,14 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	    i, &npageslabs_huge, size_t);
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive_huge",
 	    i, &nactive_huge, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ninactive_huge",
-	    i, &ninactive_huge, size_t);
+	ninactive_huge = npageslabs_huge * HUGEPAGE_PAGES - nactive_huge;
 
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs_nonhuge",
 	    i, &npageslabs_nonhuge, size_t);
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive_nonhuge",
 	    i, &nactive_nonhuge, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ninactive_nonhuge",
-	    i, &ninactive_nonhuge, size_t);
+	ninactive_nonhuge = npageslabs_nonhuge * HUGEPAGE_PAGES
+	    - nactive_nonhuge;
 
 	size_t sec_bytes;
 	CTL_M2_GET("stats.arenas.0.hpa_sec_bytes", i, &sec_bytes, size_t);
@@ -875,10 +874,6 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	    &nactive_huge);
 	emitter_json_kv(emitter, "nactive_nonhuge", emitter_type_size,
 	    &nactive_nonhuge);
-	emitter_json_kv(emitter, "ninactive_huge", emitter_type_size,
-	    &ninactive_huge);
-	emitter_json_kv(emitter, "ninactive_nonhuge", emitter_type_size,
-	    &ninactive_nonhuge);
 	emitter_json_object_end(emitter); /* End "full_slabs" */
 
 	COL_HDR(row, size, NULL, right, 20, size)
@@ -905,14 +900,14 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 		    &npageslabs_huge, size_t);
 		CTL_LEAF(stats_arenas_mib, 6, "nactive_huge",
 		    &nactive_huge, size_t);
-		CTL_LEAF(stats_arenas_mib, 6, "ninactive_huge",
-		    &ninactive_huge, size_t);
+		ninactive_huge = npageslabs_huge * HUGEPAGE_PAGES
+		    - nactive_huge;
 		CTL_LEAF(stats_arenas_mib, 6, "npageslabs_nonhuge",
 		    &npageslabs_nonhuge, size_t);
 		CTL_LEAF(stats_arenas_mib, 6, "nactive_nonhuge",
 		    &nactive_nonhuge, size_t);
-		CTL_LEAF(stats_arenas_mib, 6, "ninactive_nonhuge",
-		    &ninactive_nonhuge, size_t);
+		ninactive_nonhuge = npageslabs_nonhuge * HUGEPAGE_PAGES
+		    - nactive_nonhuge;
 
 		bool in_gap_prev = in_gap;
 		in_gap = (npageslabs_huge == 0 && npageslabs_nonhuge == 0);
@@ -938,14 +933,10 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 		    &npageslabs_huge);
 		emitter_json_kv(emitter, "nactive_huge", emitter_type_size,
 		    &nactive_huge);
-		emitter_json_kv(emitter, "ninactive_huge", emitter_type_size,
-		    &ninactive_huge);
 		emitter_json_kv(emitter, "npageslabs_nonhuge", emitter_type_size,
 		    &npageslabs_nonhuge);
 		emitter_json_kv(emitter, "nactive_nonhuge", emitter_type_size,
 		    &nactive_nonhuge);
-		emitter_json_kv(emitter, "ninactive_nonhuge", emitter_type_size,
-		    &ninactive_huge);
 		emitter_json_object_end(emitter);
 	}
 	emitter_json_array_end(emitter); /* End "nonfull_slabs" */
diff --git a/test/unit/psset.c b/test/unit/psset.c
index 020a8325..88014445 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -325,8 +325,6 @@ stats_expect_empty(psset_bin_stats_t *stats) {
 	    "Supposedly empty bin had positive npageslabs");
 	expect_zu_eq(0, stats->nactive, "Unexpected nonempty bin"
 	    "Supposedly empty bin had positive nactive");
-	expect_zu_eq(0, stats->ninactive, "Unexpected nonempty bin"
-	    "Supposedly empty bin had positive ninactive");
 }
 
 static void
@@ -337,8 +335,6 @@ stats_expect(psset_t *psset, size_t nactive) {
 		expect_zu_eq(HUGEPAGE_PAGES,
 		    psset->stats.full_slabs[0].nactive,
 		    "Should have exactly filled the bin");
-		expect_zu_eq(0, psset->stats.full_slabs[0].ninactive,
-		    "Should never have inactive pages in a full slab");
 	} else {
 		stats_expect_empty(&psset->stats.full_slabs[0]);
 	}
@@ -356,9 +352,6 @@ stats_expect(psset_t *psset, size_t nactive) {
 			expect_zu_eq(nactive,
 			    psset->stats.nonfull_slabs[i][0].nactive,
 			    "Mismatch in active pages");
-			expect_zu_eq(ninactive,
-			    psset->stats.nonfull_slabs[i][0].ninactive,
-			    "Mismatch in inactive pages");
 		} else {
 			stats_expect_empty(&psset->stats.nonfull_slabs[i][0]);
 		}

From 68a1666e915382cec716247d3b5950a066ef0768 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 3 Dec 2020 18:58:58 -0800
Subject: [PATCH 2003/2608] hpdata: Rename "dirty" to "touched".

This matches the usage in the rest of the codebase.
---
 include/jemalloc/internal/hpdata.h | 25 +++++++++++++++----------
 src/hpa.c                          |  2 +-
 src/hpdata.c                       | 24 ++++++++++++------------
 test/unit/hpdata.c                 | 12 ++++++------
 4 files changed, 34 insertions(+), 29 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 12a72a66..f8001586 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -67,14 +67,14 @@ struct hpdata_s {
 	fb_group_t active_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
 
 	/*
-	 * Number of dirty pages, and a bitmap tracking them.  This really means
-	 * "dirty" from the OS's point of view; it includes both active and
-	 * inactive pages that have been touched by the user.
+	 * Number of dirty or active pages, and a bitmap tracking them.  One
+	 * way to think of this is as which pages are dirty from the OS's
+	 * perspective.
 	 */
-	size_t h_ndirty;
+	size_t h_ntouched;
 
 	/* The dirty pages (using the same definition as above). */
-	fb_group_t dirty_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
+	fb_group_t touched_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
 };
 
 TYPED_LIST(hpdata_list, hpdata_t, ql_link)
@@ -148,9 +148,14 @@ hpdata_nactive_get(hpdata_t *hpdata) {
 	return hpdata->h_nactive;
 }
 
+static inline size_t
+hpdata_ntouched_get(hpdata_t *hpdata) {
+	return hpdata->h_ntouched;
+}
+
 static inline size_t
 hpdata_ndirty_get(hpdata_t *hpdata) {
-	return hpdata->h_ndirty;
+	return hpdata->h_ntouched - hpdata->h_nactive;
 }
 
 static inline void
@@ -174,14 +179,14 @@ hpdata_consistent(hpdata_t *hpdata) {
 	    != hpdata->h_nactive) {
 		return false;
 	}
-	if (fb_scount(hpdata->dirty_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES)
-	    != hpdata->h_ndirty) {
+	if (fb_scount(hpdata->touched_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES)
+	    != hpdata->h_ntouched) {
 		return false;
 	}
-	if (hpdata->h_ndirty < hpdata->h_nactive) {
+	if (hpdata->h_ntouched < hpdata->h_nactive) {
 		return false;
 	}
-	if (hpdata->h_huge && hpdata->h_ndirty != HUGEPAGE_PAGES) {
+	if (hpdata->h_huge && hpdata->h_ntouched != HUGEPAGE_PAGES) {
 		return false;
 	}
 	return true;
diff --git a/src/hpa.c b/src/hpa.c
index 4397c9d6..822e3bac 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -161,7 +161,7 @@ hpa_should_purge(hpa_shard_t *shard, hpdata_t *ps) {
 	if (hpdata_changing_state_get(ps)) {
 		return false;
 	}
-	size_t purgeable = hpdata_ndirty_get(ps) - hpdata_nactive_get(ps);
+	size_t purgeable = hpdata_ndirty_get(ps);
 	return purgeable > HUGEPAGE_PAGES * 25 / 100
 	    || (purgeable > 0 && hpdata_empty(ps));
 }
diff --git a/src/hpdata.c b/src/hpdata.c
index 78816196..e2a0b37f 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -28,8 +28,8 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 	hpdata_longest_free_range_set(hpdata, HUGEPAGE_PAGES);
 	hpdata->h_nactive = 0;
 	fb_init(hpdata->active_pages, HUGEPAGE_PAGES);
-	hpdata->h_ndirty = 0;
-	fb_init(hpdata->dirty_pages, HUGEPAGE_PAGES);
+	hpdata->h_ntouched = 0;
+	fb_init(hpdata->touched_pages, HUGEPAGE_PAGES);
 
 	hpdata_assert_consistent(hpdata);
 }
@@ -84,10 +84,10 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 	 * We might be about to dirty some memory for the first time; update our
 	 * count if so.
 	 */
-	size_t new_dirty = fb_ucount(hpdata->dirty_pages,  HUGEPAGE_PAGES,
+	size_t new_dirty = fb_ucount(hpdata->touched_pages,  HUGEPAGE_PAGES,
 	    result, npages);
-	fb_set_range(hpdata->dirty_pages, HUGEPAGE_PAGES, result, npages);
-	hpdata->h_ndirty += new_dirty;
+	fb_set_range(hpdata->touched_pages, HUGEPAGE_PAGES, result, npages);
+	hpdata->h_ntouched += new_dirty;
 
 	/*
 	 * We might have shrunk the longest free range.  We have to keep
@@ -167,10 +167,10 @@ hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 	 */
 	fb_bit_not(purge_state->to_purge, hpdata->active_pages, HUGEPAGE_PAGES);
 	fb_bit_and(purge_state->to_purge, purge_state->to_purge,
-	    hpdata->dirty_pages, HUGEPAGE_PAGES);
+	    hpdata->touched_pages, HUGEPAGE_PAGES);
 
 	/* We purge everything we can. */
-	assert(hpdata->h_ndirty - hpdata->h_nactive == fb_scount(
+	assert(hpdata->h_ntouched - hpdata->h_nactive == fb_scount(
 	    purge_state->to_purge, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
 
 	hpdata_assert_consistent(hpdata);
@@ -225,10 +225,10 @@ hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 
 	fb_bit_not(purge_state->to_purge, purge_state->to_purge,
 	    HUGEPAGE_PAGES);
-	fb_bit_and(hpdata->dirty_pages, hpdata->dirty_pages,
+	fb_bit_and(hpdata->touched_pages, hpdata->touched_pages,
 	    purge_state->to_purge, HUGEPAGE_PAGES);
-	assert(hpdata->h_ndirty >= purge_state->npurged);
-	hpdata->h_ndirty -= purge_state->npurged;
+	assert(hpdata->h_ntouched >= purge_state->npurged);
+	hpdata->h_ntouched -= purge_state->npurged;
 
 	hpdata_assert_consistent(hpdata);
 }
@@ -241,8 +241,8 @@ hpdata_hugify_begin(hpdata_t *hpdata) {
 	assert(!hpdata->h_mid_hugify);
 	hpdata->h_mid_hugify = true;
 	hpdata->h_huge = true;
-	fb_set_range(hpdata->dirty_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES);
-	hpdata->h_ndirty = HUGEPAGE_PAGES;
+	fb_set_range(hpdata->touched_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES);
+	hpdata->h_ntouched = HUGEPAGE_PAGES;
 	hpdata_assert_consistent(hpdata);
 }
 
diff --git a/test/unit/hpdata.c b/test/unit/hpdata.c
index aa4506f7..688911a6 100644
--- a/test/unit/hpdata.c
+++ b/test/unit/hpdata.c
@@ -65,7 +65,7 @@ TEST_BEGIN(test_purge_simple) {
 	/* Create HUGEPAGE_PAGES / 4 dirty inactive pages at the beginning. */
 	hpdata_unreserve(&hpdata, alloc, HUGEPAGE_PAGES / 4 * PAGE);
 
-	expect_zu_eq(hpdata_ndirty_get(&hpdata), HUGEPAGE_PAGES / 2, "");
+	expect_zu_eq(hpdata_ntouched_get(&hpdata), HUGEPAGE_PAGES / 2, "");
 
 	expect_false(hpdata_changing_state_get(&hpdata), "");
 
@@ -93,7 +93,7 @@ TEST_BEGIN(test_purge_simple) {
 
 	hpdata_purge_end(&hpdata, &purge_state);
 	expect_false(hpdata_changing_state_get(&hpdata), "");
-	expect_zu_eq(hpdata_ndirty_get(&hpdata), HUGEPAGE_PAGES / 4, "");
+	expect_zu_eq(hpdata_ntouched_get(&hpdata), HUGEPAGE_PAGES / 4, "");
 }
 TEST_END
 
@@ -118,7 +118,7 @@ TEST_BEGIN(test_purge_intervening_dalloc) {
 	    (void *)((uintptr_t)alloc + 2 * HUGEPAGE_PAGES / 4 * PAGE),
 	    HUGEPAGE_PAGES / 4 * PAGE);
 
-	expect_zu_eq(hpdata_ndirty_get(&hpdata), 3 * HUGEPAGE_PAGES / 4, "");
+	expect_zu_eq(hpdata_ntouched_get(&hpdata), 3 * HUGEPAGE_PAGES / 4, "");
 
 	hpdata_purge_state_t purge_state;
 	hpdata_purge_begin(&hpdata, &purge_state);
@@ -153,7 +153,7 @@ TEST_BEGIN(test_purge_intervening_dalloc) {
 
 	hpdata_purge_end(&hpdata, &purge_state);
 
-	expect_zu_eq(hpdata_ndirty_get(&hpdata), HUGEPAGE_PAGES / 4, "");
+	expect_zu_eq(hpdata_ntouched_get(&hpdata), HUGEPAGE_PAGES / 4, "");
 }
 TEST_END
 
@@ -164,7 +164,7 @@ TEST_BEGIN(test_hugify) {
 	void *alloc = hpdata_reserve_alloc(&hpdata, HUGEPAGE / 2);
 	expect_ptr_eq(alloc, HPDATA_ADDR, "");
 
-	expect_zu_eq(HUGEPAGE_PAGES / 2, hpdata_ndirty_get(&hpdata), "");
+	expect_zu_eq(HUGEPAGE_PAGES / 2, hpdata_ntouched_get(&hpdata), "");
 
 	expect_false(hpdata_changing_state_get(&hpdata), "");
 	hpdata_hugify_begin(&hpdata);
@@ -174,7 +174,7 @@ TEST_BEGIN(test_hugify) {
 	expect_false(hpdata_changing_state_get(&hpdata), "");
 
 	/* Hugeifying should have increased the dirty page count. */
-	expect_zu_eq(HUGEPAGE_PAGES, hpdata_ndirty_get(&hpdata), "");
+	expect_zu_eq(HUGEPAGE_PAGES, hpdata_ntouched_get(&hpdata), "");
 }
 TEST_END
 

From d3e5ea03c5660ba46b6efcc10ad0b804140e2690 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 3 Dec 2020 19:15:54 -0800
Subject: [PATCH 2004/2608] HPA: Track dirty stats.

---
 include/jemalloc/internal/psset.h |  2 ++
 src/ctl.c                         | 51 ++++++++++++++++++++++---------
 src/psset.c                       |  5 +--
 src/stats.c                       | 34 ++++++++++++++++++---
 4 files changed, 72 insertions(+), 20 deletions(-)

diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index 3320d4e5..fef0468e 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -29,6 +29,8 @@ struct psset_bin_stats_s {
 	size_t npageslabs;
 	/* Of them, how many pages are active? */
 	size_t nactive;
+	/* And how many are dirty? */
+	size_t ndirty;
 };
 
 typedef struct psset_stats_s psset_stats_t;
diff --git a/src/ctl.c b/src/ctl.c
index aa878583..80fb90e3 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -231,14 +231,18 @@ CTL_PROTO(stats_arenas_i_hpa_shard_npurge_passes)
 CTL_PROTO(stats_arenas_i_hpa_shard_npurges)
 CTL_PROTO(stats_arenas_i_hpa_shard_nhugifies)
 CTL_PROTO(stats_arenas_i_hpa_shard_ndehugifies)
-CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge)
-CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_nactive_huge)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge)
+CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge)
-CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge)
+CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_nactive_huge)
+CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_ndirty_nonhuge)
+CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_ndirty_huge)
 CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge)
-CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge)
+CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge)
 CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge)
+CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge)
+CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_nonhuge)
+CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_huge)
 INDEX_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j)
 CTL_PROTO(stats_arenas_i_nthreads)
 CTL_PROTO(stats_arenas_i_uptime)
@@ -651,25 +655,33 @@ MUTEX_PROF_ARENA_MUTEXES
 };
 
 static const ctl_named_node_t stats_arenas_i_hpa_shard_full_slabs_node[] = {
-	{NAME("npageslabs_huge"),
-		CTL(stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge)},
-	{NAME("nactive_huge"),
-		CTL(stats_arenas_i_hpa_shard_full_slabs_nactive_huge)},
 	{NAME("npageslabs_nonhuge"),
 		CTL(stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge)},
+	{NAME("npageslabs_huge"),
+		CTL(stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge)},
 	{NAME("nactive_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge)}
+		CTL(stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge)},
+	{NAME("nactive_huge"),
+		CTL(stats_arenas_i_hpa_shard_full_slabs_nactive_huge)},
+	{NAME("ndirty_nonhuge"),
+		CTL(stats_arenas_i_hpa_shard_full_slabs_ndirty_nonhuge)},
+	{NAME("ndirty_huge"),
+		CTL(stats_arenas_i_hpa_shard_full_slabs_ndirty_huge)}
 };
 
 static const ctl_named_node_t stats_arenas_i_hpa_shard_nonfull_slabs_j_node[] = {
-	{NAME("npageslabs_huge"),
-		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge)},
-	{NAME("nactive_huge"),
-		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge)},
 	{NAME("npageslabs_nonhuge"),
 		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge)},
+	{NAME("npageslabs_huge"),
+		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge)},
 	{NAME("nactive_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge)}
+		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge)},
+	{NAME("nactive_huge"),
+		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge)},
+	{NAME("ndirty_nonhuge"),
+		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_nonhuge)},
+	{NAME("ndirty_huge"),
+		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_huge)}
 };
 
 static const ctl_named_node_t super_stats_arenas_i_hpa_shard_nonfull_slabs_j_node[] = {
@@ -3519,6 +3531,8 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge
     size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge,
     arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[0].nactive, size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_ndirty_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[0].ndirty, size_t);
 
 /* Full, huge */
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge,
@@ -3526,6 +3540,9 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge,
     size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_nactive_huge,
     arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[1].nactive, size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_ndirty_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[1].ndirty, size_t);
+
 
 /* Nonfull, nonhuge */
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge,
@@ -3534,6 +3551,9 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_no
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge,
     arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][0].nactive,
     size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][0].ndirty,
+    size_t);
 
 /* Nonfull, huge */
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge,
@@ -3542,6 +3562,9 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_hu
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge,
     arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][1].nactive,
     size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][1].ndirty,
+    size_t);
 
 static const ctl_named_node_t *
 stats_arenas_i_hpa_shard_nonfull_slabs_j_index(tsdn_t *tsdn, const size_t *mib,
diff --git a/src/psset.c b/src/psset.c
index e8d847a3..a09913c5 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -21,6 +21,7 @@ static void
 psset_bin_stats_accum(psset_bin_stats_t *dst, psset_bin_stats_t *src) {
 	dst->npageslabs += src->npageslabs;
 	dst->nactive += src->nactive;
+	dst->ndirty += src->ndirty;
 }
 
 void
@@ -51,8 +52,8 @@ psset_bin_stats_insert_remove(psset_bin_stats_t *binstats, hpdata_t *ps,
 	size_t mul = insert ? (size_t)1 : (size_t)-1;
 	size_t huge_idx = (size_t)hpdata_huge_get(ps);
 	binstats[huge_idx].npageslabs += mul * 1;
-	size_t nactive = hpdata_nactive_get(ps);
-	binstats[huge_idx].nactive += mul * nactive;
+	binstats[huge_idx].nactive += mul * hpdata_nactive_get(ps);
+	binstats[huge_idx].ndirty += mul * hpdata_ndirty_get(ps);
 }
 
 static void
diff --git a/src/stats.c b/src/stats.c
index a8d3ffe8..ea0be980 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -810,16 +810,20 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	size_t npageslabs_huge;
 	size_t nactive_huge;
 	size_t ninactive_huge;
+	size_t ndirty_huge;
 
 	size_t npageslabs_nonhuge;
 	size_t nactive_nonhuge;
 	size_t ninactive_nonhuge;
+	size_t ndirty_nonhuge;
 
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs_huge",
 	    i, &npageslabs_huge, size_t);
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive_huge",
 	    i, &nactive_huge, size_t);
 	ninactive_huge = npageslabs_huge * HUGEPAGE_PAGES - nactive_huge;
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ndirty_huge",
+	    i, &ndirty_huge, size_t);
 
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs_nonhuge",
 	    i, &npageslabs_nonhuge, size_t);
@@ -827,6 +831,8 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	    i, &nactive_nonhuge, size_t);
 	ninactive_nonhuge = npageslabs_nonhuge * HUGEPAGE_PAGES
 	    - nactive_nonhuge;
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ndirty_nonhuge",
+	    i, &ndirty_nonhuge, size_t);
 
 	size_t sec_bytes;
 	CTL_M2_GET("stats.arenas.0.hpa_sec_bytes", i, &sec_bytes, size_t);
@@ -844,7 +850,8 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	    "  In full slabs:\n"
 	    "      npageslabs: %zu huge, %zu nonhuge\n"
 	    "      nactive: %zu huge, %zu nonhuge \n"
-	    "      ninactive: %zu huge, %zu nonhuge \n",
+	    "      ninactive: %zu huge, %zu nonhuge \n"
+	    "      ndirty: %zu huge, %zu nonhuge \n",
 	    nevictions, rate_per_second(nevictions, uptime),
 	    npurge_passes, rate_per_second(npurge_passes, uptime),
 	    npurges, rate_per_second(npurges, uptime),
@@ -852,7 +859,9 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	    ndehugifies, rate_per_second(ndehugifies, uptime),
 	    npageslabs_huge, npageslabs_nonhuge,
 	    nactive_huge, nactive_nonhuge,
-	    ninactive_huge, ninactive_nonhuge);
+	    ninactive_huge, ninactive_nonhuge,
+	    ndirty_huge, ndirty_nonhuge);
+
 	emitter_json_object_kv_begin(emitter, "hpa_shard");
 	emitter_json_kv(emitter, "nevictions", emitter_type_uint64,
 	    &nevictions);
@@ -868,12 +877,16 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	emitter_json_object_kv_begin(emitter, "full_slabs");
 	emitter_json_kv(emitter, "npageslabs_huge", emitter_type_size,
 	    &npageslabs_huge);
-	emitter_json_kv(emitter, "npageslabs_nonhuge", emitter_type_size,
-	    &npageslabs_nonhuge);
 	emitter_json_kv(emitter, "nactive_huge", emitter_type_size,
 	    &nactive_huge);
+	emitter_json_kv(emitter, "nactive_huge", emitter_type_size,
+	    &nactive_huge);
+	emitter_json_kv(emitter, "npageslabs_nonhuge", emitter_type_size,
+	    &npageslabs_nonhuge);
 	emitter_json_kv(emitter, "nactive_nonhuge", emitter_type_size,
 	    &nactive_nonhuge);
+	emitter_json_kv(emitter, "ndirty_nonhuge", emitter_type_size,
+	    &ndirty_nonhuge);
 	emitter_json_object_end(emitter); /* End "full_slabs" */
 
 	COL_HDR(row, size, NULL, right, 20, size)
@@ -881,9 +894,11 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	COL_HDR(row, npageslabs_huge, NULL, right, 16, size)
 	COL_HDR(row, nactive_huge, NULL, right, 16, size)
 	COL_HDR(row, ninactive_huge, NULL, right, 16, size)
+	COL_HDR(row, ndirty_huge, NULL, right, 16, size)
 	COL_HDR(row, npageslabs_nonhuge, NULL, right, 20, size)
 	COL_HDR(row, nactive_nonhuge, NULL, right, 20, size)
 	COL_HDR(row, ninactive_nonhuge, NULL, right, 20, size)
+	COL_HDR(row, ndirty_nonhuge, NULL, right, 20, size)
 
 	size_t stats_arenas_mib[CTL_MAX_DEPTH];
 	CTL_LEAF_PREPARE(stats_arenas_mib, 0, "stats.arenas");
@@ -900,12 +915,17 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 		    &npageslabs_huge, size_t);
 		CTL_LEAF(stats_arenas_mib, 6, "nactive_huge",
 		    &nactive_huge, size_t);
+		CTL_LEAF(stats_arenas_mib, 6, "ndirty_huge",
+		    &ndirty_huge, size_t);
 		ninactive_huge = npageslabs_huge * HUGEPAGE_PAGES
 		    - nactive_huge;
+
 		CTL_LEAF(stats_arenas_mib, 6, "npageslabs_nonhuge",
 		    &npageslabs_nonhuge, size_t);
 		CTL_LEAF(stats_arenas_mib, 6, "nactive_nonhuge",
 		    &nactive_nonhuge, size_t);
+		CTL_LEAF(stats_arenas_mib, 6, "ndirty_nonhuge",
+		    &ndirty_nonhuge, size_t);
 		ninactive_nonhuge = npageslabs_nonhuge * HUGEPAGE_PAGES
 		    - nactive_nonhuge;
 
@@ -921,9 +941,11 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 		col_npageslabs_huge.size_val = npageslabs_huge;
 		col_nactive_huge.size_val = nactive_huge;
 		col_ninactive_huge.size_val = ninactive_huge;
+		col_ndirty_huge.size_val = ndirty_huge;
 		col_npageslabs_nonhuge.size_val = npageslabs_nonhuge;
 		col_nactive_nonhuge.size_val = nactive_nonhuge;
 		col_ninactive_nonhuge.size_val = ninactive_nonhuge;
+		col_ndirty_nonhuge.size_val = ndirty_nonhuge;
 		if (!in_gap) {
 			emitter_table_row(emitter, &row);
 		}
@@ -933,10 +955,14 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 		    &npageslabs_huge);
 		emitter_json_kv(emitter, "nactive_huge", emitter_type_size,
 		    &nactive_huge);
+		emitter_json_kv(emitter, "ndirty_huge", emitter_type_size,
+		    &ndirty_huge);
 		emitter_json_kv(emitter, "npageslabs_nonhuge", emitter_type_size,
 		    &npageslabs_nonhuge);
 		emitter_json_kv(emitter, "nactive_nonhuge", emitter_type_size,
 		    &nactive_nonhuge);
+		emitter_json_kv(emitter, "ndirty_nonhuge", emitter_type_size,
+		    &ndirty_nonhuge);
 		emitter_json_object_end(emitter);
 	}
 	emitter_json_array_end(emitter); /* End "nonfull_slabs" */

From 061cabb7122d1fd63b8bfbe980a1fb1dcf3033f4 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 3 Dec 2020 19:35:21 -0800
Subject: [PATCH 2005/2608] HPA stats: report retained instead of inactive.

This more closely maps to the PAC.
---
 src/stats.c | 28 +++++++++++-----------------
 1 file changed, 11 insertions(+), 17 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index ea0be980..355921c0 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -809,19 +809,17 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 
 	size_t npageslabs_huge;
 	size_t nactive_huge;
-	size_t ninactive_huge;
 	size_t ndirty_huge;
 
 	size_t npageslabs_nonhuge;
 	size_t nactive_nonhuge;
-	size_t ninactive_nonhuge;
 	size_t ndirty_nonhuge;
+	size_t nretained_nonhuge;
 
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs_huge",
 	    i, &npageslabs_huge, size_t);
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive_huge",
 	    i, &nactive_huge, size_t);
-	ninactive_huge = npageslabs_huge * HUGEPAGE_PAGES - nactive_huge;
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ndirty_huge",
 	    i, &ndirty_huge, size_t);
 
@@ -829,10 +827,10 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	    i, &npageslabs_nonhuge, size_t);
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive_nonhuge",
 	    i, &nactive_nonhuge, size_t);
-	ninactive_nonhuge = npageslabs_nonhuge * HUGEPAGE_PAGES
-	    - nactive_nonhuge;
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ndirty_nonhuge",
 	    i, &ndirty_nonhuge, size_t);
+	nretained_nonhuge = npageslabs_nonhuge * HUGEPAGE_PAGES
+	    - nactive_nonhuge - ndirty_nonhuge;
 
 	size_t sec_bytes;
 	CTL_M2_GET("stats.arenas.0.hpa_sec_bytes", i, &sec_bytes, size_t);
@@ -850,8 +848,8 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	    "  In full slabs:\n"
 	    "      npageslabs: %zu huge, %zu nonhuge\n"
 	    "      nactive: %zu huge, %zu nonhuge \n"
-	    "      ninactive: %zu huge, %zu nonhuge \n"
-	    "      ndirty: %zu huge, %zu nonhuge \n",
+	    "      ndirty: %zu huge, %zu nonhuge \n"
+	    "      nretained: 0 huge, %zu nonhuge \n",
 	    nevictions, rate_per_second(nevictions, uptime),
 	    npurge_passes, rate_per_second(npurge_passes, uptime),
 	    npurges, rate_per_second(npurges, uptime),
@@ -859,8 +857,8 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	    ndehugifies, rate_per_second(ndehugifies, uptime),
 	    npageslabs_huge, npageslabs_nonhuge,
 	    nactive_huge, nactive_nonhuge,
-	    ninactive_huge, ninactive_nonhuge,
-	    ndirty_huge, ndirty_nonhuge);
+	    ndirty_huge, ndirty_nonhuge,
+	    nretained_nonhuge);
 
 	emitter_json_object_kv_begin(emitter, "hpa_shard");
 	emitter_json_kv(emitter, "nevictions", emitter_type_uint64,
@@ -893,12 +891,11 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	COL_HDR(row, ind, NULL, right, 4, unsigned)
 	COL_HDR(row, npageslabs_huge, NULL, right, 16, size)
 	COL_HDR(row, nactive_huge, NULL, right, 16, size)
-	COL_HDR(row, ninactive_huge, NULL, right, 16, size)
 	COL_HDR(row, ndirty_huge, NULL, right, 16, size)
 	COL_HDR(row, npageslabs_nonhuge, NULL, right, 20, size)
 	COL_HDR(row, nactive_nonhuge, NULL, right, 20, size)
-	COL_HDR(row, ninactive_nonhuge, NULL, right, 20, size)
 	COL_HDR(row, ndirty_nonhuge, NULL, right, 20, size)
+	COL_HDR(row, nretained_nonhuge, NULL, right, 20, size)
 
 	size_t stats_arenas_mib[CTL_MAX_DEPTH];
 	CTL_LEAF_PREPARE(stats_arenas_mib, 0, "stats.arenas");
@@ -917,8 +914,6 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 		    &nactive_huge, size_t);
 		CTL_LEAF(stats_arenas_mib, 6, "ndirty_huge",
 		    &ndirty_huge, size_t);
-		ninactive_huge = npageslabs_huge * HUGEPAGE_PAGES
-		    - nactive_huge;
 
 		CTL_LEAF(stats_arenas_mib, 6, "npageslabs_nonhuge",
 		    &npageslabs_nonhuge, size_t);
@@ -926,8 +921,8 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 		    &nactive_nonhuge, size_t);
 		CTL_LEAF(stats_arenas_mib, 6, "ndirty_nonhuge",
 		    &ndirty_nonhuge, size_t);
-		ninactive_nonhuge = npageslabs_nonhuge * HUGEPAGE_PAGES
-		    - nactive_nonhuge;
+		nretained_nonhuge = npageslabs_nonhuge * HUGEPAGE_PAGES
+		    - nactive_nonhuge - ndirty_nonhuge;
 
 		bool in_gap_prev = in_gap;
 		in_gap = (npageslabs_huge == 0 && npageslabs_nonhuge == 0);
@@ -940,12 +935,11 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 		col_ind.size_val = j;
 		col_npageslabs_huge.size_val = npageslabs_huge;
 		col_nactive_huge.size_val = nactive_huge;
-		col_ninactive_huge.size_val = ninactive_huge;
 		col_ndirty_huge.size_val = ndirty_huge;
 		col_npageslabs_nonhuge.size_val = npageslabs_nonhuge;
 		col_nactive_nonhuge.size_val = nactive_nonhuge;
-		col_ninactive_nonhuge.size_val = ninactive_nonhuge;
 		col_ndirty_nonhuge.size_val = ndirty_nonhuge;
+		col_nretained_nonhuge.size_val = nretained_nonhuge;
 		if (!in_gap) {
 			emitter_table_row(emitter, &row);
 		}

From 99fc0717e653277c3d7fe77fe84316ad47381936 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sat, 5 Dec 2020 15:58:31 -0800
Subject: [PATCH 2006/2608] psset: Reconceptualize insertion/removal.

Really, this isn't a functional change, just a naming change.  We start thinking
of pageslabs as being always in the psset.  What we used to think of as removal
is now thought of as being in the psset, but in the process of being updated
(and therefore, unavalable for serving new allocations).

This is in preparation of subsequent changes to support deferred purging;
allocations will still be in the psset for the purposes of choosing when to
purge, but not for purposes of allocation/deallocation.
---
 include/jemalloc/internal/hpdata.h | 18 +++++++----
 include/jemalloc/internal/psset.h  | 10 ++++--
 src/hpa.c                          | 35 ++++++++++++--------
 src/hpdata.c                       | 23 ++++++-------
 src/psset.c                        | 52 +++++++++++++++---------------
 test/unit/hpdata.c                 |  6 ++++
 test/unit/psset.c                  | 33 ++++++++++++-------
 7 files changed, 106 insertions(+), 71 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index f8001586..2e2e1d8e 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -44,8 +44,13 @@ struct hpdata_s {
 	bool h_mid_purge;
 	bool h_mid_hugify;
 
-	/* Whether or not the hpdata is a the psset. */
-	bool h_in_psset;
+	/*
+	 * Whether or not the hpdata is being updated in the psset (i.e. if
+	 * there has been a psset_update_begin call issued without a matching
+	 * psset_update_end call).  Eventually this will expand to other types
+	 * of updates.
+	 */
+	bool h_updating;
 
 	union {
 		/* When nonempty, used by the psset bins. */
@@ -123,13 +128,14 @@ hpdata_mid_hugify_get(const hpdata_t *hpdata) {
 }
 
 static inline bool
-hpdata_in_psset_get(const hpdata_t *hpdata) {
-	return hpdata->h_in_psset;
+hpdata_updating_get(const hpdata_t *hpdata) {
+	return hpdata->h_updating;
 }
 
 static inline void
-hpdata_in_psset_set(hpdata_t *hpdata, bool in_psset) {
-	hpdata->h_in_psset = in_psset;
+hpdata_updating_set(hpdata_t *hpdata, bool updating) {
+	assert(updating != hpdata->h_updating);
+	hpdata->h_updating = updating;
 }
 
 static inline size_t
diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index fef0468e..a7c9a8b6 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -64,10 +64,14 @@ struct psset_s {
 void psset_init(psset_t *psset);
 void psset_stats_accum(psset_stats_t *dst, psset_stats_t *src);
 
-void psset_insert(psset_t *psset, hpdata_t *ps);
-void psset_remove(psset_t *psset, hpdata_t *ps);
+/*
+ * Begin or end updating the given pageslab's metadata.  While the pageslab is
+ * being updated, it won't be returned from psset_fit calls.
+ */
+void psset_update_begin(psset_t *psset, hpdata_t *ps);
+void psset_update_end(psset_t *psset, hpdata_t *ps);
 
 /* Analogous to the eset_fit; pick a hpdata to serve the request. */
-hpdata_t *psset_fit(psset_t *psset, size_t size);
+hpdata_t *psset_pick_alloc(psset_t *psset, size_t size);
 
 #endif /* JEMALLOC_INTERNAL_PSSET_H */
diff --git a/src/hpa.c b/src/hpa.c
index 822e3bac..6a4f2a6d 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -333,14 +333,14 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 	}
 	assert(edata_arena_ind_get(edata) == shard->ind);
 
-	hpdata_t *ps = psset_fit(&shard->psset, size);
+	hpdata_t *ps = psset_pick_alloc(&shard->psset, size);
 	if (ps == NULL) {
 		edata_cache_small_put(tsdn, &shard->ecs, edata);
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 		return NULL;
 	}
 
-	psset_remove(&shard->psset, ps);
+	psset_update_begin(&shard->psset, ps);
 	void *addr = hpdata_reserve_alloc(ps, size);
 	edata_init(edata, shard->ind, addr, size, /* slab */ false,
 	    SC_NSIZES, /* sn */ 0, extent_state_active, /* zeroed */ false,
@@ -365,7 +365,7 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 		 * require some sort of prepare + commit functionality that's a
 		 * little much to deal with for now.
 		 */
-		psset_insert(&shard->psset, ps);
+		psset_update_end(&shard->psset, ps);
 		edata_cache_small_put(tsdn, &shard->ecs, edata);
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 		*oom = true;
@@ -377,7 +377,7 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 		hpdata_hugify_begin(ps);
 		shard->stats.nhugifies++;
 	}
-	psset_insert(&shard->psset, ps);
+	psset_update_end(&shard->psset, ps);
 
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 	if (hugify) {
@@ -409,9 +409,9 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 			 * hugified.  Undo our operation, taking care to meet
 			 * the precondition that the ps isn't in the psset.
 			 */
-			psset_remove(&shard->psset, ps);
+			psset_update_begin(&shard->psset, ps);
 			hpa_purge(tsdn, shard, ps);
-			psset_insert(&shard->psset, ps);
+			psset_update_end(&shard->psset, ps);
 		}
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 	}
@@ -455,6 +455,15 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 
 	/* We got the new edata; allocate from it. */
 	malloc_mutex_lock(tsdn, &shard->mtx);
+	/*
+	 * This will go away soon.  The psset doesn't draw a distinction between
+	 * pageslab removal and updating.  If this is a new pageslab, we pretend
+	 * that it's an old one that's been getting updated.
+	 */
+	if (!hpdata_updating_get(ps)) {
+		hpdata_updating_set(ps, true);
+	}
+
 	edata = edata_cache_small_get(tsdn, &shard->ecs);
 	if (edata == NULL) {
 		shard->stats.nevictions++;
@@ -500,7 +509,7 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 		hpa_handle_ps_eviction(tsdn, shard, ps);
 		return NULL;
 	}
-	psset_insert(&shard->psset, ps);
+	psset_update_end(&shard->psset, ps);
 
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 	malloc_mutex_unlock(tsdn, &shard->grow_mtx);
@@ -615,7 +624,7 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 		 * psset and we can do our metadata update.  The other thread is
 		 * in charge of reinserting the ps, so we're done.
 		 */
-		assert(!hpdata_in_psset_get(ps));
+		assert(hpdata_updating_get(ps));
 		hpdata_unreserve(ps, unreserve_addr, unreserve_size);
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 		return;
@@ -624,15 +633,15 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	 * No other thread is purging, and the ps is non-empty, so it should be
 	 * in the psset.
 	 */
-	assert(hpdata_in_psset_get(ps));
-	psset_remove(&shard->psset, ps);
+	assert(!hpdata_updating_get(ps));
+	psset_update_begin(&shard->psset, ps);
 	hpdata_unreserve(ps, unreserve_addr, unreserve_size);
 	if (!hpa_should_purge(shard, ps)) {
 		/*
 		 * This should be the common case; no other thread is purging,
 		 * and we won't purge either.
 		 */
-		psset_insert(&shard->psset, ps);
+		psset_update_end(&shard->psset, ps);
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 		return;
 	}
@@ -648,7 +657,7 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 		hpa_handle_ps_eviction(tsdn, shard, ps);
 	} else {
-		psset_insert(&shard->psset, ps);
+		psset_update_end(&shard->psset, ps);
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 	}
 }
@@ -669,7 +678,7 @@ hpa_shard_assert_stats_empty(psset_bin_stats_t *bin_stats) {
 static void
 hpa_assert_empty(tsdn_t *tsdn, hpa_shard_t *shard, psset_t *psset) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
-	hpdata_t *ps = psset_fit(psset, PAGE);
+	hpdata_t *ps = psset_pick_alloc(psset, PAGE);
 	assert(ps == NULL);
 	for (int huge = 0; huge <= 1; huge++) {
 		hpa_shard_assert_stats_empty(&psset->stats.full_slabs[huge]);
diff --git a/src/hpdata.c b/src/hpdata.c
index e2a0b37f..0af7da0c 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -24,7 +24,7 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 	hpdata->h_huge = false;
 	hpdata->h_mid_purge = false;
 	hpdata->h_mid_hugify = false;
-	hpdata->h_in_psset = false;
+	hpdata->h_updating = false;
 	hpdata_longest_free_range_set(hpdata, HUGEPAGE_PAGES);
 	hpdata->h_nactive = 0;
 	fb_init(hpdata->active_pages, HUGEPAGE_PAGES);
@@ -37,7 +37,7 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 void *
 hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 	hpdata_assert_consistent(hpdata);
-	assert(!hpdata_in_psset_get(hpdata));
+	assert(hpdata->h_updating);
 	assert((sz & PAGE_MASK) == 0);
 	size_t npages = sz >> LG_PAGE;
 	assert(npages <= hpdata_longest_free_range_get(hpdata));
@@ -118,7 +118,7 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 void
 hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
 	hpdata_assert_consistent(hpdata);
-	assert(!hpdata->h_in_psset);
+	assert(hpdata->h_updating);
 	assert(((uintptr_t)addr & PAGE_MASK) == 0);
 	assert((sz & PAGE_MASK) == 0);
 	size_t begin = ((uintptr_t)addr - (uintptr_t)hpdata_addr_get(hpdata))
@@ -147,7 +147,7 @@ hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
 void
 hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 	hpdata_assert_consistent(hpdata);
-	assert(!hpdata->h_in_psset);
+	assert(hpdata->h_updating);
 	assert(!hpdata->h_mid_purge);
 	assert(!hpdata->h_mid_hugify);
 	hpdata->h_mid_purge = true;
@@ -185,7 +185,7 @@ hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
 	 * a consistent state.
 	 */
 	assert(hpdata->h_mid_purge);
-	assert(!hpdata->h_in_psset);
+	assert(hpdata->h_updating);
 	/* Should have dehugified already (if necessary). */
 	assert(!hpdata->h_huge);
 	assert(!hpdata->h_mid_hugify);
@@ -215,7 +215,7 @@ hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
 void
 hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 	hpdata_assert_consistent(hpdata);
-	assert(!hpdata->h_in_psset);
+	assert(hpdata->h_updating);
 	assert(hpdata->h_mid_purge);
 	assert(!hpdata->h_mid_hugify);
 	hpdata->h_mid_purge = false;
@@ -236,7 +236,7 @@ hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 void
 hpdata_hugify_begin(hpdata_t *hpdata) {
 	hpdata_assert_consistent(hpdata);
-	assert(!hpdata_in_psset_get(hpdata));
+	assert(hpdata->h_updating);
 	assert(!hpdata->h_mid_purge);
 	assert(!hpdata->h_mid_hugify);
 	hpdata->h_mid_hugify = true;
@@ -250,10 +250,10 @@ void
 hpdata_hugify_end(hpdata_t *hpdata) {
 	hpdata_assert_consistent(hpdata);
 	/*
-	 * This is the exception to the "no metadata tweaks while in the psset"
-	 * rule.
+	 * This is the exception to the "no-metadata updates without informing
+	 * the psset first" rule; this assert would be incorrect.
 	 */
-	/* assert(!hpdata_in_psset_get(hpdata)); */
+	/* assert(hpdata->h_updating); */
 	assert(!hpdata->h_mid_purge);
 	assert(hpdata->h_mid_hugify);
 	hpdata->h_mid_hugify = false;
@@ -263,7 +263,8 @@ hpdata_hugify_end(hpdata_t *hpdata) {
 void
 hpdata_dehugify(hpdata_t *hpdata) {
 	hpdata_assert_consistent(hpdata);
-	assert(!hpdata_in_psset_get(hpdata));
+	assert(hpdata->h_updating);
+	assert(hpdata->h_updating);
 	assert(hpdata->h_mid_purge);
 	assert(!hpdata->h_mid_hugify);
 	hpdata->h_huge = false;
diff --git a/src/psset.c b/src/psset.c
index a09913c5..22564605 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -79,11 +79,33 @@ psset_hpdata_heap_insert(psset_t *psset, pszind_t pind, hpdata_t *ps) {
 }
 
 void
-psset_insert(psset_t *psset, hpdata_t *ps) {
+psset_update_begin(psset_t *psset, hpdata_t *ps) {
+	hpdata_assert_consistent(ps);
+	assert(!hpdata_updating_get(ps));
+	hpdata_updating_set(ps, true);
+
+	size_t longest_free_range = hpdata_longest_free_range_get(ps);
+
+	if (longest_free_range == 0) {
+		psset_bin_stats_remove(psset->stats.full_slabs, ps);
+		return;
+	}
+
+	pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
+	    longest_free_range << LG_PAGE));
+	assert(pind < PSSET_NPSIZES);
+	psset_hpdata_heap_remove(psset, pind, ps);
+	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
+		bitmap_set(psset->bitmap, &psset_bitmap_info, (size_t)pind);
+	}
+}
+
+void
+psset_update_end(psset_t *psset, hpdata_t *ps) {
 	assert(!hpdata_empty(ps));
 	hpdata_assert_consistent(ps);
-	assert(!hpdata_in_psset_get(ps));
-	hpdata_in_psset_set(ps, true);
+	assert(hpdata_updating_get(ps));
+	hpdata_updating_set(ps, false);
 	size_t longest_free_range = hpdata_longest_free_range_get(ps);
 
 	if (longest_free_range == 0) {
@@ -105,30 +127,8 @@ psset_insert(psset_t *psset, hpdata_t *ps) {
 	psset_hpdata_heap_insert(psset, pind, ps);
 }
 
-void
-psset_remove(psset_t *psset, hpdata_t *ps) {
-	hpdata_assert_consistent(ps);
-	assert(hpdata_in_psset_get(ps));
-	hpdata_in_psset_set(ps, false);
-
-	size_t longest_free_range = hpdata_longest_free_range_get(ps);
-
-	if (longest_free_range == 0) {
-		psset_bin_stats_remove(psset->stats.full_slabs, ps);
-		return;
-	}
-
-	pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
-	    longest_free_range << LG_PAGE));
-	assert(pind < PSSET_NPSIZES);
-	psset_hpdata_heap_remove(psset, pind, ps);
-	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
-		bitmap_set(psset->bitmap, &psset_bitmap_info, (size_t)pind);
-	}
-}
-
 hpdata_t *
-psset_fit(psset_t *psset, size_t size) {
+psset_pick_alloc(psset_t *psset, size_t size) {
 	pszind_t min_pind = sz_psz2ind(sz_psz_quantize_ceil(size));
 	pszind_t pind = (pszind_t)bitmap_ffu(psset->bitmap, &psset_bitmap_info,
 	    (size_t)min_pind);
diff --git a/test/unit/hpdata.c b/test/unit/hpdata.c
index 688911a6..cf7b89fd 100644
--- a/test/unit/hpdata.c
+++ b/test/unit/hpdata.c
@@ -7,6 +7,8 @@ TEST_BEGIN(test_reserve_alloc) {
 	hpdata_t hpdata;
 	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
 
+	hpdata_updating_set(&hpdata, true);
+
 	/* Allocating a page at a time, we should do first fit. */
 	for (size_t i = 0; i < HUGEPAGE_PAGES; i++) {
 		expect_true(hpdata_consistent(&hpdata), "");
@@ -59,6 +61,8 @@ TEST_BEGIN(test_purge_simple) {
 	hpdata_t hpdata;
 	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
 
+	hpdata_updating_set(&hpdata, true);
+
 	void *alloc = hpdata_reserve_alloc(&hpdata, HUGEPAGE_PAGES / 2 * PAGE);
 	expect_ptr_eq(alloc, HPDATA_ADDR, "");
 
@@ -107,6 +111,7 @@ TEST_END
 TEST_BEGIN(test_purge_intervening_dalloc) {
 	hpdata_t hpdata;
 	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
+	hpdata_updating_set(&hpdata, true);
 
 	/* Allocate the first 3/4 of the pages. */
 	void *alloc = hpdata_reserve_alloc(&hpdata, 3 * HUGEPAGE_PAGES / 4  * PAGE);
@@ -160,6 +165,7 @@ TEST_END
 TEST_BEGIN(test_hugify) {
 	hpdata_t hpdata;
 	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
+	hpdata_updating_set(&hpdata, true);
 
 	void *alloc = hpdata_reserve_alloc(&hpdata, HUGEPAGE / 2);
 	expect_ptr_eq(alloc, HPDATA_ADDR, "");
diff --git a/test/unit/psset.c b/test/unit/psset.c
index 88014445..2043e4eb 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -19,41 +19,50 @@ static void
 test_psset_alloc_new(psset_t *psset, hpdata_t *ps, edata_t *r_edata,
     size_t size) {
 	hpdata_assert_empty(ps);
+
+	/*
+	 * As in hpa.c; pretend that the ps is already in the psset and just
+	 * being updated, until we implement true insert/removal support.
+	 */
+	if (!hpdata_updating_get(ps)) {
+		hpdata_updating_set(ps, true);
+	}
+
         void *addr = hpdata_reserve_alloc(ps, size);
         edata_init(r_edata, edata_arena_ind_get(r_edata), addr, size,
 	    /* slab */ false, SC_NSIZES, /* sn */ 0, extent_state_active,
             /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
             EXTENT_NOT_HEAD);
         edata_ps_set(r_edata, ps);
-	psset_insert(psset, ps);
+	psset_update_end(psset, ps);
 }
 
 static bool
 test_psset_alloc_reuse(psset_t *psset, edata_t *r_edata, size_t size) {
-	hpdata_t *ps = psset_fit(psset, size);
+	hpdata_t *ps = psset_pick_alloc(psset, size);
 	if (ps == NULL) {
 		return true;
 	}
-	psset_remove(psset, ps);
+	psset_update_begin(psset, ps);
 	void *addr = hpdata_reserve_alloc(ps, size);
 	edata_init(r_edata, edata_arena_ind_get(r_edata), addr, size,
 	    /* slab */ false, SC_NSIZES, /* sn */ 0, extent_state_active,
 	    /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
 	    EXTENT_NOT_HEAD);
 	edata_ps_set(r_edata, ps);
-	psset_insert(psset, ps);
+	psset_update_end(psset, ps);
 	return false;
 }
 
 static hpdata_t *
 test_psset_dalloc(psset_t *psset, edata_t *edata) {
 	hpdata_t *ps = edata_ps_get(edata);
-	psset_remove(psset, ps);
+	psset_update_begin(psset, ps);
 	hpdata_unreserve(ps, edata_addr_get(edata), edata_size_get(edata));
 	if (hpdata_empty(ps)) {
 		return ps;
 	} else {
-		psset_insert(psset, ps);
+		psset_update_end(psset, ps);
 		return NULL;
 	}
 }
@@ -390,9 +399,9 @@ TEST_BEGIN(test_stats) {
 
 	test_psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
 	stats_expect(&psset, 1);
-	psset_remove(&psset, &pageslab);
+	psset_update_begin(&psset, &pageslab);
 	stats_expect(&psset, 0);
-	psset_insert(&psset, &pageslab);
+	psset_update_end(&psset, &pageslab);
 	stats_expect(&psset, 1);
 }
 TEST_END
@@ -490,7 +499,7 @@ TEST_BEGIN(test_insert_remove) {
 	    worse_alloc);
 
 	/* Remove better; should still be able to alloc from worse. */
-	psset_remove(&psset, &pageslab);
+	psset_update_begin(&psset, &pageslab);
 	err = test_psset_alloc_reuse(&psset, &worse_alloc[HUGEPAGE_PAGES - 1],
 	    PAGE);
 	expect_false(err, "Removal should still leave an empty page");
@@ -504,7 +513,7 @@ TEST_BEGIN(test_insert_remove) {
 	 */
 	ps = test_psset_dalloc(&psset, &worse_alloc[HUGEPAGE_PAGES - 1]);
 	expect_ptr_null(ps, "Incorrect eviction of nonempty pageslab");
-	psset_insert(&psset, &pageslab);
+	psset_update_end(&psset, &pageslab);
 	err = test_psset_alloc_reuse(&psset, &alloc[HUGEPAGE_PAGES - 1], PAGE);
 	expect_false(err, "psset should be nonempty");
 	expect_ptr_eq(&pageslab, edata_ps_get(&alloc[HUGEPAGE_PAGES - 1]),
@@ -514,8 +523,8 @@ TEST_BEGIN(test_insert_remove) {
 	 */
 	ps = test_psset_dalloc(&psset, &alloc[HUGEPAGE_PAGES - 1]);
 	expect_ptr_null(ps, "Incorrect eviction");
-	psset_remove(&psset, &pageslab);
-	psset_remove(&psset, &worse_pageslab);
+	psset_update_begin(&psset, &pageslab);
+	psset_update_begin(&psset, &worse_pageslab);
 	err = test_psset_alloc_reuse(&psset, &alloc[HUGEPAGE_PAGES - 1], PAGE);
 	expect_true(err, "psset should be empty, but an alloc succeeded");
 }

From bf64557ed66897b6833875542a6674652e640653 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sat, 5 Dec 2020 17:42:04 -0800
Subject: [PATCH 2007/2608] Move empty slab tracking to the psset.

We're moving towards a world in which purging decisions are less rigidly
enforced at a single-hugepage level.  In that world, it makes sense to keep
around some hpdatas which are not completely purged, in which case we'll need to
track them.
---
 include/jemalloc/internal/hpa.h    |  17 ----
 include/jemalloc/internal/hpdata.h |  25 ++++-
 include/jemalloc/internal/psset.h  |  11 ++-
 src/ctl.c                          |   4 -
 src/hpa.c                          |  94 ++++++------------
 src/hpdata.c                       |  25 +++--
 src/psset.c                        | 148 +++++++++++++++++++----------
 src/stats.c                        |   7 --
 test/unit/hpdata.c                 |   6 --
 test/unit/psset.c                  |  25 +++--
 10 files changed, 193 insertions(+), 169 deletions(-)

diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index bea88c37..f62c3278 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -8,14 +8,6 @@
 
 typedef struct hpa_shard_nonderived_stats_s hpa_shard_nonderived_stats_t;
 struct hpa_shard_nonderived_stats_s {
-	/*
-	 * The number of times we've fully purged a hugepage and evicted it from
-	 * the psset.
-	 *
-	 * Guarded by grow_mtx.
-	 */
-	uint64_t nevictions;
-
 	/*
 	 * The number of times we've purged within a hugepage.
 	 *
@@ -80,15 +72,6 @@ struct hpa_shard_s {
 	 */
 	size_t alloc_max;
 
-	/*
-	 * Slabs currently purged away.  They are hugepage-sized and
-	 * hugepage-aligned, but have had pages_nohuge and pages_purge_forced
-	 * called on them.
-	 *
-	 * Guarded by grow_mtx.
-	 */
-	hpdata_list_t unused_slabs;
-
 	/*
 	 * How many grow operations have occurred.
 	 *
diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 2e2e1d8e..393ed27f 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -52,14 +52,17 @@ struct hpdata_s {
 	 */
 	bool h_updating;
 
+	/* Whether or not the hpdata is in a psset. */
+	bool h_in_psset;
+
 	union {
-		/* When nonempty, used by the psset bins. */
+		/* When nonempty (and also nonfull), used by the psset bins. */
 		phn(hpdata_t) ph_link;
 		/*
 		 * When empty (or not corresponding to any hugepage), list
 		 * linkage.
 		 */
-		ql_elm(hpdata_t) ql_link;
+		ql_elm(hpdata_t) ql_link_empty;
 	};
 
 	/* The length of the largest contiguous sequence of inactive pages. */
@@ -82,7 +85,7 @@ struct hpdata_s {
 	fb_group_t touched_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
 };
 
-TYPED_LIST(hpdata_list, hpdata_t, ql_link)
+TYPED_LIST(hpdata_empty_list, hpdata_t, ql_link_empty)
 typedef ph(hpdata_t) hpdata_age_heap_t;
 ph_proto(, hpdata_age_heap_, hpdata_age_heap_t, hpdata_t);
 
@@ -138,6 +141,17 @@ hpdata_updating_set(hpdata_t *hpdata, bool updating) {
 	hpdata->h_updating = updating;
 }
 
+static inline bool
+hpdata_in_psset_get(const hpdata_t *hpdata) {
+	return hpdata->h_in_psset;
+}
+
+static inline void
+hpdata_in_psset_set(hpdata_t *hpdata, bool in_psset) {
+	assert(in_psset != hpdata->h_in_psset);
+	hpdata->h_in_psset = in_psset;
+}
+
 static inline size_t
 hpdata_longest_free_range_get(const hpdata_t *hpdata) {
 	return hpdata->h_longest_free_range;
@@ -208,6 +222,11 @@ hpdata_empty(hpdata_t *hpdata) {
 	return hpdata->h_nactive == 0;
 }
 
+static inline bool
+hpdata_full(hpdata_t *hpdata) {
+	return hpdata->h_nactive == HUGEPAGE_PAGES;
+}
+
 void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age);
 
 /*
diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index a7c9a8b6..b220609b 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -35,7 +35,6 @@ struct psset_bin_stats_s {
 
 typedef struct psset_stats_s psset_stats_t;
 struct psset_stats_s {
-
 	/*
 	 * The second index is huge stats; nonfull_slabs[pszind][0] contains
 	 * stats for the non-huge slabs in bucket pszind, while
@@ -44,10 +43,13 @@ struct psset_stats_s {
 	psset_bin_stats_t nonfull_slabs[PSSET_NPSIZES][2];
 
 	/*
-	 * Full slabs don't live in any edata heap.  But we still track their
+	 * Full slabs don't live in any edata heap, but we still track their
 	 * stats.
 	 */
 	psset_bin_stats_t full_slabs[2];
+
+	/* Empty slabs are similar. */
+	psset_bin_stats_t empty_slabs[2];
 };
 
 typedef struct psset_s psset_t;
@@ -59,6 +61,8 @@ struct psset_s {
 	hpdata_age_heap_t pageslabs[PSSET_NPSIZES];
 	bitmap_t bitmap[BITMAP_GROUPS(PSSET_NPSIZES)];
 	psset_stats_t stats;
+	/* Slabs with no active allocations. */
+	hpdata_empty_list_t empty_slabs;
 };
 
 void psset_init(psset_t *psset);
@@ -74,4 +78,7 @@ void psset_update_end(psset_t *psset, hpdata_t *ps);
 /* Analogous to the eset_fit; pick a hpdata to serve the request. */
 hpdata_t *psset_pick_alloc(psset_t *psset, size_t size);
 
+void psset_insert(psset_t *psset, hpdata_t *ps);
+void psset_remove(psset_t *psset, hpdata_t *ps);
+
 #endif /* JEMALLOC_INTERNAL_PSSET_H */
diff --git a/src/ctl.c b/src/ctl.c
index 80fb90e3..3cec637c 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -226,7 +226,6 @@ CTL_PROTO(stats_arenas_i_extents_j_dirty_bytes)
 CTL_PROTO(stats_arenas_i_extents_j_muzzy_bytes)
 CTL_PROTO(stats_arenas_i_extents_j_retained_bytes)
 INDEX_PROTO(stats_arenas_i_extents_j)
-CTL_PROTO(stats_arenas_i_hpa_shard_nevictions)
 CTL_PROTO(stats_arenas_i_hpa_shard_npurge_passes)
 CTL_PROTO(stats_arenas_i_hpa_shard_npurges)
 CTL_PROTO(stats_arenas_i_hpa_shard_nhugifies)
@@ -700,7 +699,6 @@ static const ctl_named_node_t stats_arenas_i_hpa_shard_node[] = {
 	{NAME("nonfull_slabs"),	CHILD(indexed,
 	    stats_arenas_i_hpa_shard_nonfull_slabs)},
 
-	{NAME("nevictions"),	CTL(stats_arenas_i_hpa_shard_nevictions)},
 	{NAME("npurge_passes"),	CTL(stats_arenas_i_hpa_shard_npurge_passes)},
 	{NAME("npurges"),	CTL(stats_arenas_i_hpa_shard_npurges)},
 	{NAME("nhugifies"),	CTL(stats_arenas_i_hpa_shard_nhugifies)},
@@ -3514,8 +3512,6 @@ stats_arenas_i_extents_j_index(tsdn_t *tsdn, const size_t *mib,
 	return super_stats_arenas_i_extents_j_node;
 }
 
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nevictions,
-    arenas_i(mib[2])->astats->hpastats.nonderived_stats.nevictions, uint64_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_npurge_passes,
     arenas_i(mib[2])->astats->hpastats.nonderived_stats.npurge_passes, uint64_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_npurges,
diff --git a/src/hpa.c b/src/hpa.c
index 6a4f2a6d..8f4642c8 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -68,14 +68,12 @@ hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
 	edata_cache_small_init(&shard->ecs, edata_cache);
 	psset_init(&shard->psset);
 	shard->alloc_max = alloc_max;
-	hpdata_list_init(&shard->unused_slabs);
 	shard->age_counter = 0;
 	shard->eden = NULL;
 	shard->eden_len = 0;
 	shard->ind = ind;
 	shard->emap = emap;
 
-	shard->stats.nevictions = 0;
 	shard->stats.npurge_passes = 0;
 	shard->stats.npurges = 0;
 	shard->stats.nhugifies = 0;
@@ -103,7 +101,6 @@ hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
 static void
 hpa_shard_nonderived_stats_accum(hpa_shard_nonderived_stats_t *dst,
     hpa_shard_nonderived_stats_t *src) {
-	dst->nevictions += src->nevictions;
 	dst->npurge_passes += src->npurge_passes;
 	dst->npurges += src->npurges;
 	dst->nhugifies += src->nhugifies;
@@ -171,15 +168,6 @@ hpa_grow(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_assert_owner(tsdn, &shard->grow_mtx);
 	hpdata_t *ps = NULL;
 
-	/* Is there address space waiting for reuse? */
-	malloc_mutex_assert_owner(tsdn, &shard->grow_mtx);
-	ps = hpdata_list_first(&shard->unused_slabs);
-	if (ps != NULL) {
-		hpdata_list_remove(&shard->unused_slabs, ps);
-		hpdata_age_set(ps, shard->age_counter++);
-		return ps;
-	}
-
 	/* Is eden a perfect fit? */
 	if (shard->eden != NULL && shard->eden_len == HUGEPAGE) {
 		ps = hpa_alloc_ps(tsdn, shard);
@@ -300,26 +288,6 @@ hpa_purge(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
 	}
 }
 
-/*
- * Does the metadata tracking associated with a page slab becoming empty.  The
- * psset doesn't hold empty pageslabs, but we do want address space reuse, so we
- * track these pages outside the psset.
- */
-static void
-hpa_handle_ps_eviction(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
-	/*
-	 * We do relatively expensive system calls.  The ps was evicted, so no
-	 * one should touch it while we're also touching it.
-	 */
-	malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
-	malloc_mutex_assert_not_owner(tsdn, &shard->grow_mtx);
-
-	malloc_mutex_lock(tsdn, &shard->grow_mtx);
-	shard->stats.nevictions++;
-	hpdata_list_prepend(&shard->unused_slabs, ps);
-	malloc_mutex_unlock(tsdn, &shard->grow_mtx);
-}
-
 static edata_t *
 hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom) {
 	bool err;
@@ -341,6 +309,18 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 	}
 
 	psset_update_begin(&shard->psset, ps);
+
+	if (hpdata_empty(ps)) {
+		/*
+		 * If the pageslab used to be empty, treat it as though it's
+		 * brand new for fragmentation-avoidance purposes; what we're
+		 * trying to approximate is the age of the allocations *in* that
+		 * pageslab, and the allocations in the new pageslab are
+		 * definitionally the youngest in this hpa shard.
+		 */
+		hpdata_age_set(ps, shard->age_counter++);
+	}
+
 	void *addr = hpdata_reserve_alloc(ps, size);
 	edata_init(edata, shard->ind, addr, size, /* slab */ false,
 	    SC_NSIZES, /* sn */ 0, extent_state_active, /* zeroed */ false,
@@ -453,26 +433,20 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 		return NULL;
 	}
 
-	/* We got the new edata; allocate from it. */
+	/* We got the pageslab; allocate from it. */
 	malloc_mutex_lock(tsdn, &shard->mtx);
-	/*
-	 * This will go away soon.  The psset doesn't draw a distinction between
-	 * pageslab removal and updating.  If this is a new pageslab, we pretend
-	 * that it's an old one that's been getting updated.
-	 */
-	if (!hpdata_updating_get(ps)) {
-		hpdata_updating_set(ps, true);
-	}
+
+	psset_insert(&shard->psset, ps);
 
 	edata = edata_cache_small_get(tsdn, &shard->ecs);
 	if (edata == NULL) {
-		shard->stats.nevictions++;
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
-		hpa_handle_ps_eviction(tsdn, shard, ps);
 		return NULL;
 	}
 
+	psset_update_begin(&shard->psset, ps);
+
 	void *addr = hpdata_reserve_alloc(ps, size);
 	edata_init(edata, shard->ind, addr, size, /* slab */ false,
 	    SC_NSIZES, /* sn */ 0, extent_state_active, /* zeroed */ false,
@@ -487,10 +461,6 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 
 		edata_cache_small_put(tsdn, &shard->ecs, edata);
 
-		shard->stats.nevictions++;
-		malloc_mutex_unlock(tsdn, &shard->mtx);
-		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
-
 		/* We'll do a fake purge; the pages weren't really touched. */
 		hpdata_purge_state_t purge_state;
 		void *purge_addr;
@@ -506,7 +476,9 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 		assert(!found_extent);
 		hpdata_purge_end(ps, &purge_state);
 
-		hpa_handle_ps_eviction(tsdn, shard, ps);
+		psset_update_end(&shard->psset, ps);
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 		return NULL;
 	}
 	psset_update_end(&shard->psset, ps);
@@ -614,9 +586,7 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	 *   management.
 	 * - The ps must not be in the psset while purging.  This is because we
 	 *   can't handle purge/alloc races.
-	 * - Whoever removes the ps from the psset is the one to reinsert it (or
-	 *   to pass it to hpa_handle_ps_eviction upon emptying).  This keeps
-	 *   responsibility tracking simple.
+	 * - Whoever removes the ps from the psset is the one to reinsert it.
 	 */
 	if (hpdata_mid_purge_get(ps)) {
 		/*
@@ -649,17 +619,9 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	/* It's our job to purge. */
 	hpa_purge(tsdn, shard, ps);
 
-	/*
-	 * OK, the hpdata is as purged as we want it to be, and it's going back
-	 * into the psset (if nonempty) or getting evicted (if empty).
-	 */
-	if (hpdata_empty(ps)) {
-		malloc_mutex_unlock(tsdn, &shard->mtx);
-		hpa_handle_ps_eviction(tsdn, shard, ps);
-	} else {
-		psset_update_end(&shard->psset, ps);
-		malloc_mutex_unlock(tsdn, &shard->mtx);
-	}
+	psset_update_end(&shard->psset, ps);
+
+	malloc_mutex_unlock(tsdn, &shard->mtx);
 }
 
 void
@@ -678,8 +640,6 @@ hpa_shard_assert_stats_empty(psset_bin_stats_t *bin_stats) {
 static void
 hpa_assert_empty(tsdn_t *tsdn, hpa_shard_t *shard, psset_t *psset) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
-	hpdata_t *ps = psset_pick_alloc(psset, PAGE);
-	assert(ps == NULL);
 	for (int huge = 0; huge <= 1; huge++) {
 		hpa_shard_assert_stats_empty(&psset->stats.full_slabs[huge]);
 		for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
@@ -703,8 +663,10 @@ hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 	}
 	hpdata_t *ps;
-	while ((ps = hpdata_list_first(&shard->unused_slabs)) != NULL) {
-		hpdata_list_remove(&shard->unused_slabs, ps);
+	while ((ps = psset_pick_alloc(&shard->psset, PAGE)) != NULL) {
+		/* There should be no allocations anywhere. */
+		assert(hpdata_empty(ps));
+		psset_remove(&shard->psset, ps);
 		pages_unmap(hpdata_addr_get(ps), HUGEPAGE);
 	}
 }
diff --git a/src/hpdata.c b/src/hpdata.c
index 0af7da0c..0cfeeed2 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -25,6 +25,7 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 	hpdata->h_mid_purge = false;
 	hpdata->h_mid_hugify = false;
 	hpdata->h_updating = false;
+	hpdata->h_in_psset = false;
 	hpdata_longest_free_range_set(hpdata, HUGEPAGE_PAGES);
 	hpdata->h_nactive = 0;
 	fb_init(hpdata->active_pages, HUGEPAGE_PAGES);
@@ -37,7 +38,12 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 void *
 hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 	hpdata_assert_consistent(hpdata);
-	assert(hpdata->h_updating);
+	/*
+	 * This is a metadata change; the hpdata should therefore either not be
+	 * in the psset, or should have explicitly marked itself as being
+	 * mid-update.
+	 */
+	assert(!hpdata->h_in_psset || hpdata->h_updating);
 	assert((sz & PAGE_MASK) == 0);
 	size_t npages = sz >> LG_PAGE;
 	assert(npages <= hpdata_longest_free_range_get(hpdata));
@@ -118,7 +124,8 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 void
 hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
 	hpdata_assert_consistent(hpdata);
-	assert(hpdata->h_updating);
+	/* See the comment in reserve. */
+	assert(!hpdata->h_in_psset || hpdata->h_updating);
 	assert(((uintptr_t)addr & PAGE_MASK) == 0);
 	assert((sz & PAGE_MASK) == 0);
 	size_t begin = ((uintptr_t)addr - (uintptr_t)hpdata_addr_get(hpdata))
@@ -147,7 +154,8 @@ hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
 void
 hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 	hpdata_assert_consistent(hpdata);
-	assert(hpdata->h_updating);
+	/* See the comment in reserve. */
+	assert(!hpdata->h_in_psset || hpdata->h_updating);
 	assert(!hpdata->h_mid_purge);
 	assert(!hpdata->h_mid_hugify);
 	hpdata->h_mid_purge = true;
@@ -185,7 +193,8 @@ hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
 	 * a consistent state.
 	 */
 	assert(hpdata->h_mid_purge);
-	assert(hpdata->h_updating);
+	/* See the comment in reserve. */
+	assert(!hpdata->h_in_psset || hpdata->h_updating);
 	/* Should have dehugified already (if necessary). */
 	assert(!hpdata->h_huge);
 	assert(!hpdata->h_mid_hugify);
@@ -215,7 +224,8 @@ hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
 void
 hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 	hpdata_assert_consistent(hpdata);
-	assert(hpdata->h_updating);
+	/* See the comment in reserve. */
+	assert(!hpdata->h_in_psset || hpdata->h_updating);
 	assert(hpdata->h_mid_purge);
 	assert(!hpdata->h_mid_hugify);
 	hpdata->h_mid_purge = false;
@@ -236,7 +246,8 @@ hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 void
 hpdata_hugify_begin(hpdata_t *hpdata) {
 	hpdata_assert_consistent(hpdata);
-	assert(hpdata->h_updating);
+	/* See the comment in reserve. */
+	assert(!hpdata->h_in_psset || hpdata->h_updating);
 	assert(!hpdata->h_mid_purge);
 	assert(!hpdata->h_mid_hugify);
 	hpdata->h_mid_hugify = true;
@@ -253,7 +264,7 @@ hpdata_hugify_end(hpdata_t *hpdata) {
 	 * This is the exception to the "no-metadata updates without informing
 	 * the psset first" rule; this assert would be incorrect.
 	 */
-	/* assert(hpdata->h_updating); */
+	/* assert(!hpdata->h_in_psset || hpdata->h_updating); */
 	assert(!hpdata->h_mid_purge);
 	assert(hpdata->h_mid_hugify);
 	hpdata->h_mid_hugify = false;
diff --git a/src/psset.c b/src/psset.c
index 22564605..89971020 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -15,6 +15,7 @@ psset_init(psset_t *psset) {
 	}
 	bitmap_init(psset->bitmap, &psset_bitmap_info, /* fill */ true);
 	memset(&psset->stats, 0, sizeof(psset->stats));
+	hpdata_empty_list_init(&psset->empty_slabs);
 }
 
 static void
@@ -28,6 +29,8 @@ void
 psset_stats_accum(psset_stats_t *dst, psset_stats_t *src) {
 	psset_bin_stats_accum(&dst->full_slabs[0], &src->full_slabs[0]);
 	psset_bin_stats_accum(&dst->full_slabs[1], &src->full_slabs[1]);
+	psset_bin_stats_accum(&dst->empty_slabs[0], &src->empty_slabs[0]);
+	psset_bin_stats_accum(&dst->empty_slabs[1], &src->empty_slabs[1]);
 	for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
 		psset_bin_stats_accum(&dst->nonfull_slabs[i][0],
 		    &src->nonfull_slabs[i][0]);
@@ -69,71 +72,104 @@ psset_bin_stats_remove(psset_bin_stats_t *binstats, hpdata_t *ps) {
 static void
 psset_hpdata_heap_remove(psset_t *psset, pszind_t pind, hpdata_t *ps) {
 	hpdata_age_heap_remove(&psset->pageslabs[pind], ps);
-	psset_bin_stats_remove(psset->stats.nonfull_slabs[pind], ps);
-}
-
-static void
-psset_hpdata_heap_insert(psset_t *psset, pszind_t pind, hpdata_t *ps) {
-	hpdata_age_heap_insert(&psset->pageslabs[pind], ps);
-	psset_bin_stats_insert(psset->stats.nonfull_slabs[pind], ps);
-}
-
-void
-psset_update_begin(psset_t *psset, hpdata_t *ps) {
-	hpdata_assert_consistent(ps);
-	assert(!hpdata_updating_get(ps));
-	hpdata_updating_set(ps, true);
-
-	size_t longest_free_range = hpdata_longest_free_range_get(ps);
-
-	if (longest_free_range == 0) {
-		psset_bin_stats_remove(psset->stats.full_slabs, ps);
-		return;
-	}
-
-	pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
-	    longest_free_range << LG_PAGE));
-	assert(pind < PSSET_NPSIZES);
-	psset_hpdata_heap_remove(psset, pind, ps);
 	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
 		bitmap_set(psset->bitmap, &psset_bitmap_info, (size_t)pind);
 	}
 }
 
-void
-psset_update_end(psset_t *psset, hpdata_t *ps) {
-	assert(!hpdata_empty(ps));
-	hpdata_assert_consistent(ps);
-	assert(hpdata_updating_get(ps));
-	hpdata_updating_set(ps, false);
-	size_t longest_free_range = hpdata_longest_free_range_get(ps);
-
-	if (longest_free_range == 0) {
-		/*
-		 * We don't ned to track full slabs; just pretend to for stats
-		 * purposes.  See the comment at psset_bin_stats_adjust.
-		 */
-		psset_bin_stats_insert(psset->stats.full_slabs, ps);
-		return;
-	}
-
-	pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
-	    longest_free_range << LG_PAGE));
-
-	assert(pind < PSSET_NPSIZES);
+static void
+psset_hpdata_heap_insert(psset_t *psset, pszind_t pind, hpdata_t *ps) {
 	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
 		bitmap_unset(psset->bitmap, &psset_bitmap_info, (size_t)pind);
 	}
-	psset_hpdata_heap_insert(psset, pind, ps);
+	hpdata_age_heap_insert(&psset->pageslabs[pind], ps);
+}
+
+/*
+ * Insert ps into the data structures we use to track allocation stats and pick
+ * the pageslabs for new allocations.
+ *
+ * In particular, this does *not* remove ps from any hugification / purging
+ * queues it may be in.
+ */
+static void
+psset_do_alloc_tracking_insert(psset_t *psset, hpdata_t *ps) {
+	if (hpdata_empty(ps)) {
+		psset_bin_stats_insert(psset->stats.empty_slabs, ps);
+		/*
+		 * This prepend, paired with popping the head in psset_fit,
+		 * means we implement LIFO ordering for the empty slabs set,
+		 * which seems reasonable.
+		 */
+		hpdata_empty_list_prepend(&psset->empty_slabs, ps);
+	} else if (hpdata_full(ps)) {
+		psset_bin_stats_insert(psset->stats.full_slabs, ps);
+		/*
+		 * We don't need to keep track of the full slabs; we're never
+		 * going to return them from a psset_pick_alloc call.
+		 */
+	} else {
+		size_t longest_free_range = hpdata_longest_free_range_get(ps);
+
+		pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
+		    longest_free_range << LG_PAGE));
+		assert(pind < PSSET_NPSIZES);
+
+		psset_bin_stats_insert(psset->stats.nonfull_slabs[pind], ps);
+		psset_hpdata_heap_insert(psset, pind, ps);
+	}
+}
+
+/* Remove ps from those collections. */
+static void
+psset_do_alloc_tracking_remove(psset_t *psset, hpdata_t *ps) {
+	if (hpdata_empty(ps)) {
+		psset_bin_stats_remove(psset->stats.empty_slabs, ps);
+		hpdata_empty_list_remove(&psset->empty_slabs, ps);
+	} else if (hpdata_full(ps)) {
+		/*
+		 * We don't need to maintain an explicit container of full
+		 * pageslabs anywhere, but we do have to update stats.
+		 */
+		psset_bin_stats_remove(psset->stats.full_slabs, ps);
+	} else {
+		size_t longest_free_range = hpdata_longest_free_range_get(ps);
+
+		pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
+		    longest_free_range << LG_PAGE));
+		assert(pind < PSSET_NPSIZES);
+
+		psset_bin_stats_remove(psset->stats.nonfull_slabs[pind], ps);
+		psset_hpdata_heap_remove(psset, pind, ps);
+	}
+}
+
+void
+psset_update_begin(psset_t *psset, hpdata_t *ps) {
+	hpdata_assert_consistent(ps);
+	assert(hpdata_in_psset_get(ps));
+	hpdata_updating_set(ps, true);
+	psset_do_alloc_tracking_remove(psset, ps);
+}
+
+void
+psset_update_end(psset_t *psset, hpdata_t *ps) {
+	hpdata_assert_consistent(ps);
+	assert(hpdata_in_psset_get(ps));
+	hpdata_updating_set(ps, false);
+	psset_do_alloc_tracking_insert(psset, ps);
 }
 
 hpdata_t *
 psset_pick_alloc(psset_t *psset, size_t size) {
+	assert((size & PAGE_MASK) == 0);
+	assert(size <= HUGEPAGE);
+
 	pszind_t min_pind = sz_psz2ind(sz_psz_quantize_ceil(size));
 	pszind_t pind = (pszind_t)bitmap_ffu(psset->bitmap, &psset_bitmap_info,
 	    (size_t)min_pind);
 	if (pind == PSSET_NPSIZES) {
-		return NULL;
+		return hpdata_empty_list_first(&psset->empty_slabs);
 	}
 	hpdata_t *ps = hpdata_age_heap_first(&psset->pageslabs[pind]);
 	if (ps == NULL) {
@@ -144,3 +180,17 @@ psset_pick_alloc(psset_t *psset, size_t size) {
 
 	return ps;
 }
+
+void
+psset_insert(psset_t *psset, hpdata_t *ps) {
+	/* We only support inserting empty pageslabs, for now. */
+	assert(hpdata_empty(ps));
+	hpdata_in_psset_set(ps, true);
+	psset_do_alloc_tracking_insert(psset, ps);
+}
+
+void
+psset_remove(psset_t *psset, hpdata_t *ps) {
+	hpdata_in_psset_set(ps, false);
+	psset_do_alloc_tracking_remove(psset, ps);
+}
diff --git a/src/stats.c b/src/stats.c
index 355921c0..7f56014c 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -790,14 +790,11 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	emitter_row_t row;
 	emitter_row_init(&row);
 
-	uint64_t nevictions;
 	uint64_t npurge_passes;
 	uint64_t npurges;
 	uint64_t nhugifies;
 	uint64_t ndehugifies;
 
-	CTL_M2_GET("stats.arenas.0.hpa_shard.nevictions",
-	    i, &nevictions, uint64_t);
 	CTL_M2_GET("stats.arenas.0.hpa_shard.npurge_passes",
 	    i, &npurge_passes, uint64_t);
 	CTL_M2_GET("stats.arenas.0.hpa_shard.npurges",
@@ -839,7 +836,6 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 
 	emitter_table_printf(emitter,
 	    "HPA shard stats:\n"
-	    "  Evictions: %" FMTu64 " (%" FMTu64 " / sec)\n"
 	    "  Purge passes: %" FMTu64 " (%" FMTu64 " / sec)\n"
 	    "  Purges: %" FMTu64 " (%" FMTu64 " / sec)\n"
 	    "  Hugeifies: %" FMTu64 " (%" FMTu64 " / sec)\n"
@@ -850,7 +846,6 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	    "      nactive: %zu huge, %zu nonhuge \n"
 	    "      ndirty: %zu huge, %zu nonhuge \n"
 	    "      nretained: 0 huge, %zu nonhuge \n",
-	    nevictions, rate_per_second(nevictions, uptime),
 	    npurge_passes, rate_per_second(npurge_passes, uptime),
 	    npurges, rate_per_second(npurges, uptime),
 	    nhugifies, rate_per_second(nhugifies, uptime),
@@ -861,8 +856,6 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	    nretained_nonhuge);
 
 	emitter_json_object_kv_begin(emitter, "hpa_shard");
-	emitter_json_kv(emitter, "nevictions", emitter_type_uint64,
-	    &nevictions);
 	emitter_json_kv(emitter, "npurge_passes", emitter_type_uint64,
 	    &npurge_passes);
 	emitter_json_kv(emitter, "npurges", emitter_type_uint64,
diff --git a/test/unit/hpdata.c b/test/unit/hpdata.c
index cf7b89fd..688911a6 100644
--- a/test/unit/hpdata.c
+++ b/test/unit/hpdata.c
@@ -7,8 +7,6 @@ TEST_BEGIN(test_reserve_alloc) {
 	hpdata_t hpdata;
 	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
 
-	hpdata_updating_set(&hpdata, true);
-
 	/* Allocating a page at a time, we should do first fit. */
 	for (size_t i = 0; i < HUGEPAGE_PAGES; i++) {
 		expect_true(hpdata_consistent(&hpdata), "");
@@ -61,8 +59,6 @@ TEST_BEGIN(test_purge_simple) {
 	hpdata_t hpdata;
 	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
 
-	hpdata_updating_set(&hpdata, true);
-
 	void *alloc = hpdata_reserve_alloc(&hpdata, HUGEPAGE_PAGES / 2 * PAGE);
 	expect_ptr_eq(alloc, HPDATA_ADDR, "");
 
@@ -111,7 +107,6 @@ TEST_END
 TEST_BEGIN(test_purge_intervening_dalloc) {
 	hpdata_t hpdata;
 	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
-	hpdata_updating_set(&hpdata, true);
 
 	/* Allocate the first 3/4 of the pages. */
 	void *alloc = hpdata_reserve_alloc(&hpdata, 3 * HUGEPAGE_PAGES / 4  * PAGE);
@@ -165,7 +160,6 @@ TEST_END
 TEST_BEGIN(test_hugify) {
 	hpdata_t hpdata;
 	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
-	hpdata_updating_set(&hpdata, true);
 
 	void *alloc = hpdata_reserve_alloc(&hpdata, HUGEPAGE / 2);
 	expect_ptr_eq(alloc, HPDATA_ADDR, "");
diff --git a/test/unit/psset.c b/test/unit/psset.c
index 2043e4eb..f5e1bad5 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -15,18 +15,26 @@ edata_init_test(edata_t *edata) {
 	edata_esn_set(edata, ALLOC_ESN);
 }
 
+static void
+test_psset_fake_purge(hpdata_t *ps) {
+	hpdata_purge_state_t purge_state;
+	hpdata_purge_begin(ps, &purge_state);
+	void *addr;
+	size_t size;
+	while (hpdata_purge_next(ps, &purge_state, &addr, &size)) {
+	}
+	hpdata_purge_end(ps, &purge_state);
+}
+
 static void
 test_psset_alloc_new(psset_t *psset, hpdata_t *ps, edata_t *r_edata,
     size_t size) {
 	hpdata_assert_empty(ps);
 
-	/*
-	 * As in hpa.c; pretend that the ps is already in the psset and just
-	 * being updated, until we implement true insert/removal support.
-	 */
-	if (!hpdata_updating_get(ps)) {
-		hpdata_updating_set(ps, true);
-	}
+	test_psset_fake_purge(ps);
+
+	psset_insert(psset, ps);
+	psset_update_begin(psset, ps);
 
         void *addr = hpdata_reserve_alloc(ps, size);
         edata_init(r_edata, edata_arena_ind_get(r_edata), addr, size,
@@ -59,10 +67,11 @@ test_psset_dalloc(psset_t *psset, edata_t *edata) {
 	hpdata_t *ps = edata_ps_get(edata);
 	psset_update_begin(psset, ps);
 	hpdata_unreserve(ps, edata_addr_get(edata), edata_size_get(edata));
+	psset_update_end(psset, ps);
 	if (hpdata_empty(ps)) {
+		psset_remove(psset, ps);
 		return ps;
 	} else {
-		psset_update_end(psset, ps);
 		return NULL;
 	}
 }

From 0ea3d6307cb7eb899c90b86e286ee7b8368f9bb7 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sat, 5 Dec 2020 19:24:23 -0800
Subject: [PATCH 2008/2608] CTL, Stats: report HPA empty slab stats.

---
 src/ctl.c   | 50 +++++++++++++++++++++++++++
 src/stats.c | 98 +++++++++++++++++++++++++++++++++++++++--------------
 2 files changed, 123 insertions(+), 25 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index 3cec637c..feefa687 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -230,18 +230,34 @@ CTL_PROTO(stats_arenas_i_hpa_shard_npurge_passes)
 CTL_PROTO(stats_arenas_i_hpa_shard_npurges)
 CTL_PROTO(stats_arenas_i_hpa_shard_nhugifies)
 CTL_PROTO(stats_arenas_i_hpa_shard_ndehugifies)
+
+/* We have a set of stats for full slabs. */
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_nactive_huge)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_ndirty_nonhuge)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_ndirty_huge)
+
+/* A parallel set for the empty slabs. */
+CTL_PROTO(stats_arenas_i_hpa_shard_empty_slabs_npageslabs_nonhuge)
+CTL_PROTO(stats_arenas_i_hpa_shard_empty_slabs_npageslabs_huge)
+CTL_PROTO(stats_arenas_i_hpa_shard_empty_slabs_nactive_nonhuge)
+CTL_PROTO(stats_arenas_i_hpa_shard_empty_slabs_nactive_huge)
+CTL_PROTO(stats_arenas_i_hpa_shard_empty_slabs_ndirty_nonhuge)
+CTL_PROTO(stats_arenas_i_hpa_shard_empty_slabs_ndirty_huge)
+
+/*
+ * And one for the slabs that are neither empty nor full, but indexed by how
+ * full they are.
+ */
 CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge)
 CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge)
 CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge)
 CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge)
 CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_nonhuge)
 CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_huge)
+
 INDEX_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j)
 CTL_PROTO(stats_arenas_i_nthreads)
 CTL_PROTO(stats_arenas_i_uptime)
@@ -668,6 +684,21 @@ static const ctl_named_node_t stats_arenas_i_hpa_shard_full_slabs_node[] = {
 		CTL(stats_arenas_i_hpa_shard_full_slabs_ndirty_huge)}
 };
 
+static const ctl_named_node_t stats_arenas_i_hpa_shard_empty_slabs_node[] = {
+	{NAME("npageslabs_nonhuge"),
+		CTL(stats_arenas_i_hpa_shard_empty_slabs_npageslabs_nonhuge)},
+	{NAME("npageslabs_huge"),
+		CTL(stats_arenas_i_hpa_shard_empty_slabs_npageslabs_huge)},
+	{NAME("nactive_nonhuge"),
+		CTL(stats_arenas_i_hpa_shard_empty_slabs_nactive_nonhuge)},
+	{NAME("nactive_huge"),
+		CTL(stats_arenas_i_hpa_shard_empty_slabs_nactive_huge)},
+	{NAME("ndirty_nonhuge"),
+		CTL(stats_arenas_i_hpa_shard_empty_slabs_ndirty_nonhuge)},
+	{NAME("ndirty_huge"),
+		CTL(stats_arenas_i_hpa_shard_empty_slabs_ndirty_huge)}
+};
+
 static const ctl_named_node_t stats_arenas_i_hpa_shard_nonfull_slabs_j_node[] = {
 	{NAME("npageslabs_nonhuge"),
 		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge)},
@@ -696,6 +727,8 @@ static const ctl_indexed_node_t stats_arenas_i_hpa_shard_nonfull_slabs_node[] =
 static const ctl_named_node_t stats_arenas_i_hpa_shard_node[] = {
 	{NAME("full_slabs"),	CHILD(named,
 	    stats_arenas_i_hpa_shard_full_slabs)},
+	{NAME("empty_slabs"),	CHILD(named,
+	    stats_arenas_i_hpa_shard_empty_slabs)},
 	{NAME("nonfull_slabs"),	CHILD(indexed,
 	    stats_arenas_i_hpa_shard_nonfull_slabs)},
 
@@ -3539,6 +3572,23 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_nactive_huge,
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_ndirty_huge,
     arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[1].ndirty, size_t);
 
+/* Empty, nonhuge */
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_empty_slabs_npageslabs_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[0].npageslabs,
+    size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_empty_slabs_nactive_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[0].nactive, size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_empty_slabs_ndirty_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[0].ndirty, size_t);
+
+/* Empty, huge */
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_empty_slabs_npageslabs_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[1].npageslabs,
+    size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_empty_slabs_nactive_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[1].nactive, size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_empty_slabs_ndirty_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[1].ndirty, size_t);
 
 /* Nonfull, nonhuge */
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge,
diff --git a/src/stats.c b/src/stats.c
index 7f56014c..8e29656e 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -813,6 +813,35 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	size_t ndirty_nonhuge;
 	size_t nretained_nonhuge;
 
+	size_t sec_bytes;
+	CTL_M2_GET("stats.arenas.0.hpa_sec_bytes", i, &sec_bytes, size_t);
+	emitter_kv(emitter, "sec_bytes", "Bytes in small extent cache",
+	    emitter_type_size, &sec_bytes);
+
+	/* First, global stats. */
+	emitter_table_printf(emitter,
+	    "HPA shard stats:\n"
+	    "  Purge passes: %" FMTu64 " (%" FMTu64 " / sec)\n"
+	    "  Purges: %" FMTu64 " (%" FMTu64 " / sec)\n"
+	    "  Hugeifies: %" FMTu64 " (%" FMTu64 " / sec)\n"
+	    "  Dehugifies: %" FMTu64 " (%" FMTu64 " / sec)\n"
+	    "\n",
+	    npurge_passes, rate_per_second(npurge_passes, uptime),
+	    npurges, rate_per_second(npurges, uptime),
+	    nhugifies, rate_per_second(nhugifies, uptime),
+	    ndehugifies, rate_per_second(ndehugifies, uptime));
+
+	emitter_json_object_kv_begin(emitter, "hpa_shard");
+	emitter_json_kv(emitter, "npurge_passes", emitter_type_uint64,
+	    &npurge_passes);
+	emitter_json_kv(emitter, "npurges", emitter_type_uint64,
+	    &npurges);
+	emitter_json_kv(emitter, "nhugifies", emitter_type_uint64,
+	    &nhugifies);
+	emitter_json_kv(emitter, "ndehugifies", emitter_type_uint64,
+	    &ndehugifies);
+
+	/* Next, full slab stats. */
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs_huge",
 	    i, &npageslabs_huge, size_t);
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive_huge",
@@ -829,42 +858,17 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	nretained_nonhuge = npageslabs_nonhuge * HUGEPAGE_PAGES
 	    - nactive_nonhuge - ndirty_nonhuge;
 
-	size_t sec_bytes;
-	CTL_M2_GET("stats.arenas.0.hpa_sec_bytes", i, &sec_bytes, size_t);
-	emitter_kv(emitter, "sec_bytes", "Bytes in small extent cache",
-	    emitter_type_size, &sec_bytes);
-
 	emitter_table_printf(emitter,
-	    "HPA shard stats:\n"
-	    "  Purge passes: %" FMTu64 " (%" FMTu64 " / sec)\n"
-	    "  Purges: %" FMTu64 " (%" FMTu64 " / sec)\n"
-	    "  Hugeifies: %" FMTu64 " (%" FMTu64 " / sec)\n"
-	    "  Dehugifies: %" FMTu64 " (%" FMTu64 " / sec)\n"
-	    "\n"
 	    "  In full slabs:\n"
 	    "      npageslabs: %zu huge, %zu nonhuge\n"
 	    "      nactive: %zu huge, %zu nonhuge \n"
 	    "      ndirty: %zu huge, %zu nonhuge \n"
 	    "      nretained: 0 huge, %zu nonhuge \n",
-	    npurge_passes, rate_per_second(npurge_passes, uptime),
-	    npurges, rate_per_second(npurges, uptime),
-	    nhugifies, rate_per_second(nhugifies, uptime),
-	    ndehugifies, rate_per_second(ndehugifies, uptime),
 	    npageslabs_huge, npageslabs_nonhuge,
 	    nactive_huge, nactive_nonhuge,
 	    ndirty_huge, ndirty_nonhuge,
 	    nretained_nonhuge);
 
-	emitter_json_object_kv_begin(emitter, "hpa_shard");
-	emitter_json_kv(emitter, "npurge_passes", emitter_type_uint64,
-	    &npurge_passes);
-	emitter_json_kv(emitter, "npurges", emitter_type_uint64,
-	    &npurges);
-	emitter_json_kv(emitter, "nhugifies", emitter_type_uint64,
-	    &nhugifies);
-	emitter_json_kv(emitter, "ndehugifies", emitter_type_uint64,
-	    &ndehugifies);
-
 	emitter_json_object_kv_begin(emitter, "full_slabs");
 	emitter_json_kv(emitter, "npageslabs_huge", emitter_type_size,
 	    &npageslabs_huge);
@@ -880,6 +884,50 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	    &ndirty_nonhuge);
 	emitter_json_object_end(emitter); /* End "full_slabs" */
 
+	/* Next, empty slab stats. */
+	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.npageslabs_huge",
+	    i, &npageslabs_huge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.nactive_huge",
+	    i, &nactive_huge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.ndirty_huge",
+	    i, &ndirty_huge, size_t);
+
+	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.npageslabs_nonhuge",
+	    i, &npageslabs_nonhuge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.nactive_nonhuge",
+	    i, &nactive_nonhuge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.ndirty_nonhuge",
+	    i, &ndirty_nonhuge, size_t);
+	nretained_nonhuge = npageslabs_nonhuge * HUGEPAGE_PAGES
+	    - nactive_nonhuge - ndirty_nonhuge;
+
+	emitter_table_printf(emitter,
+	    "  In empty slabs:\n"
+	    "      npageslabs: %zu huge, %zu nonhuge\n"
+	    "      nactive: %zu huge, %zu nonhuge \n"
+	    "      ndirty: %zu huge, %zu nonhuge \n"
+	    "      nretained: 0 huge, %zu nonhuge \n"
+	    "\n",
+	    npageslabs_huge, npageslabs_nonhuge,
+	    nactive_huge, nactive_nonhuge,
+	    ndirty_huge, ndirty_nonhuge,
+	    nretained_nonhuge);
+
+	emitter_json_object_kv_begin(emitter, "empty_slabs");
+	emitter_json_kv(emitter, "npageslabs_huge", emitter_type_size,
+	    &npageslabs_huge);
+	emitter_json_kv(emitter, "nactive_huge", emitter_type_size,
+	    &nactive_huge);
+	emitter_json_kv(emitter, "nactive_huge", emitter_type_size,
+	    &nactive_huge);
+	emitter_json_kv(emitter, "npageslabs_nonhuge", emitter_type_size,
+	    &npageslabs_nonhuge);
+	emitter_json_kv(emitter, "nactive_nonhuge", emitter_type_size,
+	    &nactive_nonhuge);
+	emitter_json_kv(emitter, "ndirty_nonhuge", emitter_type_size,
+	    &ndirty_nonhuge);
+	emitter_json_object_end(emitter); /* End "empty_slabs" */
+
 	COL_HDR(row, size, NULL, right, 20, size)
 	COL_HDR(row, ind, NULL, right, 4, unsigned)
 	COL_HDR(row, npageslabs_huge, NULL, right, 16, size)

From da63f23e68069e967e6759e2ffa578970243df9e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sun, 6 Dec 2020 09:49:26 -0800
Subject: [PATCH 2009/2608] HPA: Track pending purges/hugifies in the psset.

This finishes the refactoring of the HPA/psset interactions the past few commits
have been building towards.

Rather than the HPA removing and then reinserting hpdatas, it simply begins
updates and ends them.  These updates can set flags on the hpdata that prevent
it from being returned for certain types of requests.  For example, it can call
hpdata_alloc_allowed_set(hpdata, false) during an update, at which point the
given hpdata will no longer be returned for psset_pick_alloc requests.

This has various of benefits:
- It maintains stats correctness during purges and hugifies.
- It allows simpler and more explicit concurrency control for the various
  special cases (e.g. allocations are disallowed during purge, but not during
  hugify).
- It lets allocations and deallocations avoid disturbing the purging and
  hugification orderings.  If an hpdata "loses its place" in one of the queues
  just do to an alloc / dalloc, it can result in pathological edge cases where
  very hot, very full hugepages never get hugified  (and cold extents on the
  same hugepage as hot ones never get purged).

The key benefit though is that tracking hpdatas to be purged / hugified in a
principled way will let us do delayed purging and hugification.  Eventually this
will let us move these operations to background threads, but in the short term
the benefit is that it will let us have global purging policies (e.g. purge when
the entire arena has too many dirty pages, rather than any particular hugepage).
---
 include/jemalloc/internal/hpdata.h | 137 ++++++++++---
 include/jemalloc/internal/psset.h  |  15 +-
 src/hpa.c                          | 303 ++++++++++++++++-------------
 src/hpdata.c                       |  45 +----
 src/psset.c                        | 160 ++++++++++++---
 test/unit/hpdata.c                 |  16 +-
 6 files changed, 436 insertions(+), 240 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 393ed27f..feca5f5e 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -36,11 +36,30 @@ struct hpdata_s {
 	bool h_huge;
 
 	/*
-	 * Whether or not some thread is purging this hpdata (i.e. has called
-	 * hpdata_purge_begin but not yet called hpdata_purge_end), or
-	 * hugifying it.  Only one thread at a time is allowed to change a
-	 * hugepage's state.
+	 * For some properties, we keep parallel sets of bools; h_foo_allowed
+	 * and h_in_psset_foo_container.  This is a decoupling mechanism to
+	 * avoid bothering the hpa (which manages policies) from the psset
+	 * (which is the mechanism used to enforce those policies).  This allows
+	 * all the container management logic to live in one place, without the
+	 * HPA needing to know or care how that happens.
 	 */
+
+	/*
+	 * Whether or not the hpdata is allowed to be used to serve allocations,
+	 * and whether or not the psset is currently tracking it as such.
+	 */
+	bool h_alloc_allowed;
+	bool h_in_psset_alloc_container;
+
+	/* The same, but with purging. */
+	bool h_purge_allowed;
+	bool h_in_psset_purge_container;
+
+	/* And with hugifying. */
+	bool h_hugify_allowed;
+	bool h_in_psset_hugify_container;
+
+	/* Whether or not a purge or hugify is currently happening. */
 	bool h_mid_purge;
 	bool h_mid_hugify;
 
@@ -65,6 +84,12 @@ struct hpdata_s {
 		ql_elm(hpdata_t) ql_link_empty;
 	};
 
+	/*
+	 * Linkage for the psset to track candidates for purging and hugifying.
+	 */
+	ql_elm(hpdata_t) ql_link_purge;
+	ql_elm(hpdata_t) ql_link_hugify;
+
 	/* The length of the largest contiguous sequence of inactive pages. */
 	size_t h_longest_free_range;
 
@@ -86,6 +111,9 @@ struct hpdata_s {
 };
 
 TYPED_LIST(hpdata_empty_list, hpdata_t, ql_link_empty)
+TYPED_LIST(hpdata_purge_list, hpdata_t, ql_link_purge)
+TYPED_LIST(hpdata_hugify_list, hpdata_t, ql_link_hugify)
+
 typedef ph(hpdata_t) hpdata_age_heap_t;
 ph_proto(, hpdata_age_heap_, hpdata_age_heap_t, hpdata_t);
 
@@ -116,8 +144,66 @@ hpdata_huge_get(const hpdata_t *hpdata) {
 }
 
 static inline bool
-hpdata_changing_state_get(const hpdata_t *hpdata) {
-	return hpdata->h_mid_purge || hpdata->h_mid_hugify;
+hpdata_alloc_allowed_get(const hpdata_t *hpdata) {
+	return hpdata->h_alloc_allowed;
+}
+
+static inline void
+hpdata_alloc_allowed_set(hpdata_t *hpdata, bool alloc_allowed) {
+	hpdata->h_alloc_allowed = alloc_allowed;
+}
+
+static inline bool
+hpdata_in_psset_alloc_container_get(const hpdata_t *hpdata) {
+	return hpdata->h_in_psset_alloc_container;
+}
+
+static inline void
+hpdata_in_psset_alloc_container_set(hpdata_t *hpdata, bool in_container) {
+	assert(in_container != hpdata->h_in_psset_alloc_container);
+	hpdata->h_in_psset_alloc_container = in_container;
+}
+
+static inline bool
+hpdata_purge_allowed_get(const hpdata_t *hpdata) {
+	return hpdata->h_purge_allowed;
+}
+
+static inline void
+hpdata_purge_allowed_set(hpdata_t *hpdata, bool purge_allowed) {
+	hpdata->h_purge_allowed = purge_allowed;
+}
+
+static inline bool
+hpdata_in_psset_purge_container_get(const hpdata_t *hpdata) {
+	return hpdata->h_in_psset_purge_container;
+}
+
+static inline void
+hpdata_in_psset_purge_container_set(hpdata_t *hpdata, bool in_container) {
+	assert(in_container != hpdata->h_in_psset_purge_container);
+	hpdata->h_in_psset_purge_container = in_container;
+}
+
+static inline bool
+hpdata_hugify_allowed_get(const hpdata_t *hpdata) {
+	return hpdata->h_hugify_allowed;
+}
+
+static inline void
+hpdata_hugify_allowed_set(hpdata_t *hpdata, bool hugify_allowed) {
+	hpdata->h_hugify_allowed = hugify_allowed;
+}
+
+static inline bool
+hpdata_in_psset_hugify_container_get(const hpdata_t *hpdata) {
+	return hpdata->h_in_psset_hugify_container;
+}
+
+static inline void
+hpdata_in_psset_hugify_container_set(hpdata_t *hpdata, bool in_container) {
+	assert(in_container != hpdata->h_in_psset_hugify_container);
+	hpdata->h_in_psset_hugify_container = in_container;
 }
 
 static inline bool
@@ -125,11 +211,29 @@ hpdata_mid_purge_get(const hpdata_t *hpdata) {
 	return hpdata->h_mid_purge;
 }
 
+static inline void
+hpdata_mid_purge_set(hpdata_t *hpdata, bool mid_purge) {
+	assert(mid_purge != hpdata->h_mid_purge);
+	hpdata->h_mid_purge = mid_purge;
+}
+
 static inline bool
 hpdata_mid_hugify_get(const hpdata_t *hpdata) {
 	return hpdata->h_mid_hugify;
 }
 
+static inline void
+hpdata_mid_hugify_set(hpdata_t *hpdata, bool mid_hugify) {
+	assert(mid_hugify != hpdata->h_mid_hugify);
+	hpdata->h_mid_hugify = mid_hugify;
+}
+
+static inline bool
+hpdata_changing_state_get(const hpdata_t *hpdata) {
+	return hpdata->h_mid_purge || hpdata->h_mid_hugify;
+}
+
+
 static inline bool
 hpdata_updating_get(const hpdata_t *hpdata) {
 	return hpdata->h_updating;
@@ -278,26 +382,7 @@ bool hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
  */
 void hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state);
 
-/*
- * Similarly, when hugifying , callers can do the metadata modifications while
- * holding a lock (thereby setting the change_state field), but actually do the
- * operation without blocking other threads.
- *
- * Unlike most metadata operations, hugification ending should happen while an
- * hpdata is in the psset (or upcoming hugepage collections).  This is because
- * while purge/use races are unsafe, purge/hugepageify races are perfectly
- * reasonable.
- */
-void hpdata_hugify_begin(hpdata_t *hpdata);
-void hpdata_hugify_end(hpdata_t *hpdata);
-
-/*
- * Tell the hpdata that it's no longer a hugepage (all its pages are still
- * counted as dirty, though; an explicit purge call is required to change that).
- *
- * This should only be done after starting to purge, and before actually purging
- * any contents.
- */
+void hpdata_hugify(hpdata_t *hpdata);
 void hpdata_dehugify(hpdata_t *hpdata);
 
 #endif /* JEMALLOC_INTERNAL_HPDATA_H */
diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index b220609b..6e08e8ba 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -61,8 +61,15 @@ struct psset_s {
 	hpdata_age_heap_t pageslabs[PSSET_NPSIZES];
 	bitmap_t bitmap[BITMAP_GROUPS(PSSET_NPSIZES)];
 	psset_stats_t stats;
-	/* Slabs with no active allocations. */
-	hpdata_empty_list_t empty_slabs;
+	/*
+	 * Slabs with no active allocations, but which are allowed to serve new
+	 * allocations.
+	 */
+	hpdata_empty_list_t empty;
+	/* Slabs which are available to be purged. */
+	hpdata_purge_list_t to_purge;
+	/* Slabs which are available to be hugified. */
+	hpdata_hugify_list_t to_hugify;
 };
 
 void psset_init(psset_t *psset);
@@ -77,6 +84,10 @@ void psset_update_end(psset_t *psset, hpdata_t *ps);
 
 /* Analogous to the eset_fit; pick a hpdata to serve the request. */
 hpdata_t *psset_pick_alloc(psset_t *psset, size_t size);
+/* Pick one to purge. */
+hpdata_t *psset_pick_purge(psset_t *psset);
+/* Pick one to hugify. */
+hpdata_t *psset_pick_hugify(psset_t *psset);
 
 void psset_insert(psset_t *psset, hpdata_t *ps);
 void psset_remove(psset_t *psset, hpdata_t *ps);
diff --git a/src/hpa.c b/src/hpa.c
index 8f4642c8..5dd34c3b 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -227,65 +227,150 @@ hpa_grow(tsdn_t *tsdn, hpa_shard_t *shard) {
 	return ps;
 }
 
-/*
- * As a precondition, ps should not be in the psset (we can handle deallocation
- * races, but not allocation ones), and we should hold the shard mutex.
- */
-static void
-hpa_purge(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
+/* Returns whether or not we purged anything. */
+static bool
+hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
-	while (hpa_should_purge(shard, ps)) {
-		/* Do the metadata update bit while holding the lock. */
-		hpdata_purge_state_t purge_state;
-		hpdata_purge_begin(ps, &purge_state);
-		shard->stats.npurge_passes++;
 
-		/*
-		 * Dehugifying can only happen on the first loop iteration,
-		 * since no other threads can allocate out of this ps while
-		 * we're purging (and thus, can't hugify it), but there's not a
-		 * natural way to express that in the control flow.
-		 */
-		bool needs_dehugify = false;
-		if (hpdata_huge_get(ps)) {
-			needs_dehugify = true;
-			shard->stats.ndehugifies++;
-			hpdata_dehugify(ps);
-		}
-
-		/* Drop the lock to do the OS calls. */
-		malloc_mutex_unlock(tsdn, &shard->mtx);
-
-		if (needs_dehugify) {
-			pages_nohuge(hpdata_addr_get(ps), HUGEPAGE);
-		}
-
-		size_t total_purged = 0;
-		uint64_t purges_this_pass = 0;
-		void *purge_addr;
-		size_t purge_size;
-		while (hpdata_purge_next(ps, &purge_state, &purge_addr,
-		    &purge_size)) {
-			purges_this_pass++;
-			pages_purge_forced(purge_addr, purge_size);
-			total_purged += purge_size;
-		}
-
-		/* Reacquire to finish our metadata update. */
-		malloc_mutex_lock(tsdn, &shard->mtx);
-		shard->stats.npurges += purges_this_pass;
-		hpdata_purge_end(ps, &purge_state);
-
-		assert(total_purged <= HUGEPAGE);
-
-		/*
-		 * We're not done here; other threads can't allocate out of ps
-		 * while purging, but they can still deallocate.  Those
-		 * deallocations could have meant more purging than what we
-		 * planned ought to happen.  We have to re-check now that we've
-		 * reacquired the mutex again.
-		 */
+	hpdata_t *to_purge = psset_pick_purge(&shard->psset);
+	if (to_purge == NULL) {
+		return false;
 	}
+	assert(hpdata_purge_allowed_get(to_purge));
+	assert(!hpdata_changing_state_get(to_purge));
+
+	/*
+	 * Don't let anyone else purge or hugify this page while
+	 * we're purging it (allocations and deallocations are
+	 * OK).
+	 */
+	psset_update_begin(&shard->psset, to_purge);
+	assert(hpdata_alloc_allowed_get(to_purge));
+	hpdata_mid_purge_set(to_purge, true);
+	hpdata_purge_allowed_set(to_purge, false);
+	hpdata_hugify_allowed_set(to_purge, false);
+	/*
+	 * Unlike with hugification (where concurrent
+	 * allocations are allowed), concurrent allocation out
+	 * of a hugepage being purged is unsafe; we might hand
+	 * out an extent for an allocation and then purge it
+	 * (clearing out user data).
+	 */
+	hpdata_alloc_allowed_set(to_purge, false);
+	psset_update_end(&shard->psset, to_purge);
+
+	/* Gather all the metadata we'll need during the purge. */
+	bool dehugify = hpdata_huge_get(to_purge);
+	hpdata_purge_state_t purge_state;
+	hpdata_purge_begin(to_purge, &purge_state);
+
+	malloc_mutex_unlock(tsdn, &shard->mtx);
+
+	/* Actually do the purging, now that the lock is dropped. */
+	if (dehugify) {
+		pages_nohuge(hpdata_addr_get(to_purge), HUGEPAGE);
+	}
+	size_t total_purged = 0;
+	uint64_t purges_this_pass = 0;
+	void *purge_addr;
+	size_t purge_size;
+	while (hpdata_purge_next(to_purge, &purge_state, &purge_addr,
+	    &purge_size)) {
+		total_purged += purge_size;
+		assert(total_purged <= HUGEPAGE);
+		purges_this_pass++;
+		pages_purge_forced(purge_addr, purge_size);
+	}
+
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	/* The shard updates */
+	shard->stats.npurge_passes++;
+	shard->stats.npurges += purges_this_pass;
+	if (dehugify) {
+		shard->stats.ndehugifies++;
+	}
+
+	/* The hpdata updates. */
+	psset_update_begin(&shard->psset, to_purge);
+	if (dehugify) {
+		hpdata_dehugify(to_purge);
+	}
+	hpdata_purge_end(to_purge, &purge_state);
+	hpdata_mid_purge_set(to_purge, false);
+
+	hpdata_alloc_allowed_set(to_purge, true);
+	hpdata_purge_allowed_set(to_purge, hpa_should_purge(shard, to_purge));
+	hpdata_hugify_allowed_set(to_purge, hpa_should_hugify(shard, to_purge));
+
+	psset_update_end(&shard->psset, to_purge);
+
+	return true;
+}
+
+/* Returns whether or not we hugified anything. */
+static bool
+hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+
+	hpdata_t *to_hugify = psset_pick_hugify(&shard->psset);
+	if (to_hugify == NULL) {
+		return false;
+	}
+	assert(hpdata_hugify_allowed_get(to_hugify));
+	assert(!hpdata_changing_state_get(to_hugify));
+
+	/*
+	 * Don't let anyone else purge or hugify this page while
+	 * we're hugifying it (allocations and deallocations are
+	 * OK).
+	 */
+	psset_update_begin(&shard->psset, to_hugify);
+	hpdata_mid_hugify_set(to_hugify, true);
+	hpdata_purge_allowed_set(to_hugify, false);
+	hpdata_hugify_allowed_set(to_hugify, false);
+	assert(hpdata_alloc_allowed_get(to_hugify));
+	psset_update_end(&shard->psset, to_hugify);
+
+	malloc_mutex_unlock(tsdn, &shard->mtx);
+
+	bool err = pages_huge(hpdata_addr_get(to_hugify),
+	    HUGEPAGE);
+	/*
+	 * It's not clear what we could do in case of error; we
+	 * might get into situations where we loop trying to
+	 * hugify some page and failing over and over again.
+	 * Just eat the error and pretend we were successful.
+	 */
+	(void)err;
+
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	shard->stats.nhugifies++;
+
+	psset_update_begin(&shard->psset, to_hugify);
+	hpdata_hugify(to_hugify);
+	hpdata_mid_hugify_set(to_hugify, false);
+	hpdata_purge_allowed_set(to_hugify,
+	    hpa_should_purge(shard, to_hugify));
+	hpdata_hugify_allowed_set(to_hugify, false);
+	psset_update_end(&shard->psset, to_hugify);
+
+	return true;
+}
+
+
+static void
+hpa_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
+	bool hugified;
+	bool purged;
+	size_t nloop = 0;
+	/* Just *some* bound, to impose a worst-case latency bound. */
+	size_t maxloops = 100;;
+	do {
+		malloc_mutex_assert_owner(tsdn, &shard->mtx);
+		hugified = hpa_try_hugify(tsdn, shard);
+		purged = hpa_try_purge(tsdn, shard);
+		malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	} while ((hugified || purged) && nloop++ < maxloops);
 }
 
 static edata_t *
@@ -344,6 +429,10 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 		 * We should arguably reset dirty state here, but this would
 		 * require some sort of prepare + commit functionality that's a
 		 * little much to deal with for now.
+		 *
+		 * We don't have a do_deferred_work down this pathway, on the
+		 * principle that we didn't *really* affect shard state (we
+		 * tweaked the stats, but our tweaks weren't really accurate).
 		 */
 		psset_update_end(&shard->psset, ps);
 		edata_cache_small_put(tsdn, &shard->ecs, edata);
@@ -352,49 +441,14 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 		return NULL;
 	}
 
-	bool hugify = hpa_should_hugify(shard, ps);
-	if (hugify) {
-		hpdata_hugify_begin(ps);
-		shard->stats.nhugifies++;
+	if (hpa_should_hugify(shard, ps)) {
+		hpdata_hugify_allowed_set(ps, true);
 	}
 	psset_update_end(&shard->psset, ps);
 
+	hpa_do_deferred_work(tsdn, shard);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
-	if (hugify) {
-		/*
-		 * Hugifying with the lock dropped is safe, even with
-		 * concurrent modifications to the ps.  This relies on
-		 * the fact that the current implementation will never
-		 * dehugify a non-empty pageslab, and ps will never
-		 * become empty before we return edata to the user to be
-		 * freed.
-		 *
-		 * Note that holding the lock would prevent not just operations
-		 * on this page slab, but also operations any other alloc/dalloc
-		 * operations in this hpa shard.
-		 */
-		bool err = pages_huge(hpdata_addr_get(ps), HUGEPAGE);
-		/*
-		 * Pretending we succeed when we actually failed is safe; trying
-		 * to rolllback would be tricky, though.  Eat the error.
-		 */
-		(void)err;
 
-		malloc_mutex_lock(tsdn, &shard->mtx);
-		hpdata_hugify_end(ps);
-		if (hpa_should_purge(shard, ps)) {
-			/*
-			 * There was a race in which the ps went from being
-			 * almost full to having lots of free space while we
-			 * hugified.  Undo our operation, taking care to meet
-			 * the precondition that the ps isn't in the psset.
-			 */
-			psset_update_begin(&shard->psset, ps);
-			hpa_purge(tsdn, shard, ps);
-			psset_update_end(&shard->psset, ps);
-		}
-		malloc_mutex_unlock(tsdn, &shard->mtx);
-	}
 	return edata;
 }
 
@@ -445,6 +499,14 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 		return NULL;
 	}
 
+	/*
+	 * TODO: the tail of this function is quite similar to the tail of
+	 * hpa_try_alloc_no_grow (both, broadly, do the metadata management of
+	 * initializing an edata_t from an hpdata_t once both have been
+	 * allocated).  The only differences are in error case handling and lock
+	 * management (we hold grow_mtx, but should drop it before doing any
+	 * deferred work).  With a little refactoring, we could unify the paths.
+	 */
 	psset_update_begin(&shard->psset, ps);
 
 	void *addr = hpdata_reserve_alloc(ps, size);
@@ -481,10 +543,20 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 		return NULL;
 	}
+	if (hpa_should_hugify(shard, ps)) {
+		hpdata_hugify_allowed_set(ps, true);
+	}
 	psset_update_end(&shard->psset, ps);
 
-	malloc_mutex_unlock(tsdn, &shard->mtx);
+	/*
+	 * Drop grow_mtx before doing deferred work; other threads blocked on it
+	 * should be allowed to proceed while we're working.
+	 */
 	malloc_mutex_unlock(tsdn, &shard->grow_mtx);
+
+	hpa_do_deferred_work(tsdn, shard);
+
+	malloc_mutex_unlock(tsdn, &shard->mtx);
 	return edata;
 }
 
@@ -579,48 +651,15 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	size_t unreserve_size = edata_size_get(edata);
 	edata_cache_small_put(tsdn, &shard->ecs, edata);
 
-	/*
-	 * We have three rules interacting here:
-	 * - You can't update ps metadata while it's still in the psset.  We
-	 *   enforce this because it's necessary for stats tracking and metadata
-	 *   management.
-	 * - The ps must not be in the psset while purging.  This is because we
-	 *   can't handle purge/alloc races.
-	 * - Whoever removes the ps from the psset is the one to reinsert it.
-	 */
-	if (hpdata_mid_purge_get(ps)) {
-		/*
-		 * Another thread started purging, and so the ps is not in the
-		 * psset and we can do our metadata update.  The other thread is
-		 * in charge of reinserting the ps, so we're done.
-		 */
-		assert(hpdata_updating_get(ps));
-		hpdata_unreserve(ps, unreserve_addr, unreserve_size);
-		malloc_mutex_unlock(tsdn, &shard->mtx);
-		return;
-	}
-	/*
-	 * No other thread is purging, and the ps is non-empty, so it should be
-	 * in the psset.
-	 */
-	assert(!hpdata_updating_get(ps));
 	psset_update_begin(&shard->psset, ps);
 	hpdata_unreserve(ps, unreserve_addr, unreserve_size);
-	if (!hpa_should_purge(shard, ps)) {
-		/*
-		 * This should be the common case; no other thread is purging,
-		 * and we won't purge either.
-		 */
-		psset_update_end(&shard->psset, ps);
-		malloc_mutex_unlock(tsdn, &shard->mtx);
-		return;
+	if (hpa_should_purge(shard, ps)) {
+		hpdata_purge_allowed_set(ps, true);
 	}
-
-	/* It's our job to purge. */
-	hpa_purge(tsdn, shard, ps);
-
 	psset_update_end(&shard->psset, ps);
 
+	hpa_do_deferred_work(tsdn, shard);
+
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 }
 
diff --git a/src/hpdata.c b/src/hpdata.c
index 0cfeeed2..bb4808aa 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -22,6 +22,12 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 	hpdata_addr_set(hpdata, addr);
 	hpdata_age_set(hpdata, age);
 	hpdata->h_huge = false;
+	hpdata->h_alloc_allowed = true;
+	hpdata->h_in_psset_alloc_container = false;
+	hpdata->h_purge_allowed = false;
+	hpdata->h_in_psset_purge_container = false;
+	hpdata->h_hugify_allowed = false;
+	hpdata->h_in_psset_hugify_container = false;
 	hpdata->h_mid_purge = false;
 	hpdata->h_mid_hugify = false;
 	hpdata->h_updating = false;
@@ -44,6 +50,7 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 	 * mid-update.
 	 */
 	assert(!hpdata->h_in_psset || hpdata->h_updating);
+	assert(hpdata->h_alloc_allowed);
 	assert((sz & PAGE_MASK) == 0);
 	size_t npages = sz >> LG_PAGE;
 	assert(npages <= hpdata_longest_free_range_get(hpdata));
@@ -155,10 +162,6 @@ void
 hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 	hpdata_assert_consistent(hpdata);
 	/* See the comment in reserve. */
-	assert(!hpdata->h_in_psset || hpdata->h_updating);
-	assert(!hpdata->h_mid_purge);
-	assert(!hpdata->h_mid_hugify);
-	hpdata->h_mid_purge = true;
 
 	purge_state->npurged = 0;
 	purge_state->next_purge_search_begin = 0;
@@ -192,12 +195,6 @@ hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
 	 * hpdata without synchronization, and therefore have no right to expect
 	 * a consistent state.
 	 */
-	assert(hpdata->h_mid_purge);
-	/* See the comment in reserve. */
-	assert(!hpdata->h_in_psset || hpdata->h_updating);
-	/* Should have dehugified already (if necessary). */
-	assert(!hpdata->h_huge);
-	assert(!hpdata->h_mid_hugify);
 
 	if (purge_state->next_purge_search_begin == HUGEPAGE_PAGES) {
 		return false;
@@ -226,9 +223,6 @@ hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 	hpdata_assert_consistent(hpdata);
 	/* See the comment in reserve. */
 	assert(!hpdata->h_in_psset || hpdata->h_updating);
-	assert(hpdata->h_mid_purge);
-	assert(!hpdata->h_mid_hugify);
-	hpdata->h_mid_purge = false;
 
 	assert(purge_state->npurged == fb_scount(purge_state->to_purge,
 	    HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
@@ -244,40 +238,17 @@ hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 }
 
 void
-hpdata_hugify_begin(hpdata_t *hpdata) {
+hpdata_hugify(hpdata_t *hpdata) {
 	hpdata_assert_consistent(hpdata);
-	/* See the comment in reserve. */
-	assert(!hpdata->h_in_psset || hpdata->h_updating);
-	assert(!hpdata->h_mid_purge);
-	assert(!hpdata->h_mid_hugify);
-	hpdata->h_mid_hugify = true;
 	hpdata->h_huge = true;
 	fb_set_range(hpdata->touched_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES);
 	hpdata->h_ntouched = HUGEPAGE_PAGES;
 	hpdata_assert_consistent(hpdata);
 }
 
-void
-hpdata_hugify_end(hpdata_t *hpdata) {
-	hpdata_assert_consistent(hpdata);
-	/*
-	 * This is the exception to the "no-metadata updates without informing
-	 * the psset first" rule; this assert would be incorrect.
-	 */
-	/* assert(!hpdata->h_in_psset || hpdata->h_updating); */
-	assert(!hpdata->h_mid_purge);
-	assert(hpdata->h_mid_hugify);
-	hpdata->h_mid_hugify = false;
-	hpdata_assert_consistent(hpdata);
-}
-
 void
 hpdata_dehugify(hpdata_t *hpdata) {
 	hpdata_assert_consistent(hpdata);
-	assert(hpdata->h_updating);
-	assert(hpdata->h_updating);
-	assert(hpdata->h_mid_purge);
-	assert(!hpdata->h_mid_hugify);
 	hpdata->h_huge = false;
 	hpdata_assert_consistent(hpdata);
 }
diff --git a/src/psset.c b/src/psset.c
index 89971020..bb51e21e 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -15,7 +15,9 @@ psset_init(psset_t *psset) {
 	}
 	bitmap_init(psset->bitmap, &psset_bitmap_info, /* fill */ true);
 	memset(&psset->stats, 0, sizeof(psset->stats));
-	hpdata_empty_list_init(&psset->empty_slabs);
+	hpdata_empty_list_init(&psset->empty);
+	hpdata_purge_list_init(&psset->to_purge);
+	hpdata_hugify_list_init(&psset->to_hugify);
 }
 
 static void
@@ -85,25 +87,56 @@ psset_hpdata_heap_insert(psset_t *psset, pszind_t pind, hpdata_t *ps) {
 	hpdata_age_heap_insert(&psset->pageslabs[pind], ps);
 }
 
-/*
- * Insert ps into the data structures we use to track allocation stats and pick
- * the pageslabs for new allocations.
- *
- * In particular, this does *not* remove ps from any hugification / purging
- * queues it may be in.
- */
 static void
-psset_do_alloc_tracking_insert(psset_t *psset, hpdata_t *ps) {
+psset_stats_insert(psset_t* psset, hpdata_t *ps) {
 	if (hpdata_empty(ps)) {
 		psset_bin_stats_insert(psset->stats.empty_slabs, ps);
+	} else if (hpdata_full(ps)) {
+		psset_bin_stats_insert(psset->stats.full_slabs, ps);
+	} else {
+		size_t longest_free_range = hpdata_longest_free_range_get(ps);
+
+		pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
+		    longest_free_range << LG_PAGE));
+		assert(pind < PSSET_NPSIZES);
+
+		psset_bin_stats_insert(psset->stats.nonfull_slabs[pind], ps);
+	}
+}
+
+static void
+psset_stats_remove(psset_t *psset, hpdata_t *ps) {
+	if (hpdata_empty(ps)) {
+		psset_bin_stats_remove(psset->stats.empty_slabs, ps);
+	} else if (hpdata_full(ps)) {
+		psset_bin_stats_remove(psset->stats.full_slabs, ps);
+	} else {
+		size_t longest_free_range = hpdata_longest_free_range_get(ps);
+
+		pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
+		    longest_free_range << LG_PAGE));
+		assert(pind < PSSET_NPSIZES);
+
+		psset_bin_stats_remove(psset->stats.nonfull_slabs[pind], ps);
+	}
+}
+
+/*
+ * Put ps into some container so that it can be found during future allocation
+ * requests.
+ */
+static void
+psset_alloc_container_insert(psset_t *psset, hpdata_t *ps) {
+	assert(!hpdata_in_psset_alloc_container_get(ps));
+	hpdata_in_psset_alloc_container_set(ps, true);
+	if (hpdata_empty(ps)) {
 		/*
 		 * This prepend, paired with popping the head in psset_fit,
 		 * means we implement LIFO ordering for the empty slabs set,
 		 * which seems reasonable.
 		 */
-		hpdata_empty_list_prepend(&psset->empty_slabs, ps);
+		hpdata_empty_list_prepend(&psset->empty, ps);
 	} else if (hpdata_full(ps)) {
-		psset_bin_stats_insert(psset->stats.full_slabs, ps);
 		/*
 		 * We don't need to keep track of the full slabs; we're never
 		 * going to return them from a psset_pick_alloc call.
@@ -115,23 +148,20 @@ psset_do_alloc_tracking_insert(psset_t *psset, hpdata_t *ps) {
 		    longest_free_range << LG_PAGE));
 		assert(pind < PSSET_NPSIZES);
 
-		psset_bin_stats_insert(psset->stats.nonfull_slabs[pind], ps);
 		psset_hpdata_heap_insert(psset, pind, ps);
 	}
 }
 
 /* Remove ps from those collections. */
 static void
-psset_do_alloc_tracking_remove(psset_t *psset, hpdata_t *ps) {
+psset_alloc_container_remove(psset_t *psset, hpdata_t *ps) {
+	assert(hpdata_in_psset_alloc_container_get(ps));
+	hpdata_in_psset_alloc_container_set(ps, false);
+
 	if (hpdata_empty(ps)) {
-		psset_bin_stats_remove(psset->stats.empty_slabs, ps);
-		hpdata_empty_list_remove(&psset->empty_slabs, ps);
+		hpdata_empty_list_remove(&psset->empty, ps);
 	} else if (hpdata_full(ps)) {
-		/*
-		 * We don't need to maintain an explicit container of full
-		 * pageslabs anywhere, but we do have to update stats.
-		 */
-		psset_bin_stats_remove(psset->stats.full_slabs, ps);
+		/* Same as above -- do nothing in this case. */
 	} else {
 		size_t longest_free_range = hpdata_longest_free_range_get(ps);
 
@@ -139,7 +169,6 @@ psset_do_alloc_tracking_remove(psset_t *psset, hpdata_t *ps) {
 		    longest_free_range << LG_PAGE));
 		assert(pind < PSSET_NPSIZES);
 
-		psset_bin_stats_remove(psset->stats.nonfull_slabs[pind], ps);
 		psset_hpdata_heap_remove(psset, pind, ps);
 	}
 }
@@ -149,7 +178,21 @@ psset_update_begin(psset_t *psset, hpdata_t *ps) {
 	hpdata_assert_consistent(ps);
 	assert(hpdata_in_psset_get(ps));
 	hpdata_updating_set(ps, true);
-	psset_do_alloc_tracking_remove(psset, ps);
+	psset_stats_remove(psset, ps);
+	if (hpdata_in_psset_alloc_container_get(ps)) {
+		/*
+		 * Some metadata updates can break alloc container invariants
+		 * (e.g. the longest free range determines the hpdata_heap_t the
+		 * pageslab lives in).
+		 */
+		assert(hpdata_alloc_allowed_get(ps));
+		psset_alloc_container_remove(psset, ps);
+	}
+	/*
+	 * We don't update presence in the purge list or hugify list; we try to
+	 * keep those FIFO, even in the presence of other metadata updates.
+	 * We'll update presence at the end of the metadata update if necessary.
+	 */
 }
 
 void
@@ -157,7 +200,36 @@ psset_update_end(psset_t *psset, hpdata_t *ps) {
 	hpdata_assert_consistent(ps);
 	assert(hpdata_in_psset_get(ps));
 	hpdata_updating_set(ps, false);
-	psset_do_alloc_tracking_insert(psset, ps);
+	psset_stats_insert(psset, ps);
+
+	/*
+	 * The update begin should have removed ps from whatever alloc container
+	 * it was in.
+	 */
+	assert(!hpdata_in_psset_alloc_container_get(ps));
+	if (hpdata_alloc_allowed_get(ps)) {
+		psset_alloc_container_insert(psset, ps);
+	}
+
+	if (hpdata_purge_allowed_get(ps)
+	    && !hpdata_in_psset_purge_container_get(ps)) {
+		hpdata_in_psset_purge_container_set(ps, true);
+		hpdata_purge_list_append(&psset->to_purge, ps);
+	} else if (!hpdata_purge_allowed_get(ps)
+	    && hpdata_in_psset_purge_container_get(ps)) {
+		hpdata_in_psset_purge_container_set(ps, false);
+		hpdata_purge_list_remove(&psset->to_purge, ps);
+	}
+
+	if (hpdata_hugify_allowed_get(ps)
+	    && !hpdata_in_psset_hugify_container_get(ps)) {
+		hpdata_in_psset_hugify_container_set(ps, true);
+		hpdata_hugify_list_append(&psset->to_hugify, ps);
+	} else if (!hpdata_hugify_allowed_get(ps)
+	    && hpdata_in_psset_hugify_container_get(ps)) {
+		hpdata_in_psset_hugify_container_set(ps, false);
+		hpdata_hugify_list_remove(&psset->to_hugify, ps);
+	}
 }
 
 hpdata_t *
@@ -169,7 +241,7 @@ psset_pick_alloc(psset_t *psset, size_t size) {
 	pszind_t pind = (pszind_t)bitmap_ffu(psset->bitmap, &psset_bitmap_info,
 	    (size_t)min_pind);
 	if (pind == PSSET_NPSIZES) {
-		return hpdata_empty_list_first(&psset->empty_slabs);
+		return hpdata_empty_list_first(&psset->empty);
 	}
 	hpdata_t *ps = hpdata_age_heap_first(&psset->pageslabs[pind]);
 	if (ps == NULL) {
@@ -181,16 +253,48 @@ psset_pick_alloc(psset_t *psset, size_t size) {
 	return ps;
 }
 
+hpdata_t *
+psset_pick_purge(psset_t *psset) {
+	return hpdata_purge_list_first(&psset->to_purge);
+}
+
+hpdata_t *
+psset_pick_hugify(psset_t *psset) {
+	return hpdata_hugify_list_first(&psset->to_hugify);
+}
+
 void
 psset_insert(psset_t *psset, hpdata_t *ps) {
-	/* We only support inserting empty pageslabs, for now. */
-	assert(hpdata_empty(ps));
 	hpdata_in_psset_set(ps, true);
-	psset_do_alloc_tracking_insert(psset, ps);
+
+	psset_stats_insert(psset, ps);
+	if (hpdata_alloc_allowed_get(ps)) {
+		psset_alloc_container_insert(psset, ps);
+	}
+	if (hpdata_purge_allowed_get(ps)) {
+		hpdata_in_psset_purge_container_set(ps, true);
+		hpdata_purge_list_append(&psset->to_purge, ps);
+	}
+	if (hpdata_hugify_allowed_get(ps)) {
+		hpdata_in_psset_hugify_container_set(ps, true);
+		hpdata_hugify_list_append(&psset->to_hugify, ps);
+	}
 }
 
 void
 psset_remove(psset_t *psset, hpdata_t *ps) {
 	hpdata_in_psset_set(ps, false);
-	psset_do_alloc_tracking_remove(psset, ps);
+
+	psset_stats_remove(psset, ps);
+	if (hpdata_in_psset_alloc_container_get(ps)) {
+		psset_alloc_container_remove(psset, ps);
+	}
+	if (hpdata_in_psset_purge_container_get(ps)) {
+		hpdata_in_psset_purge_container_set(ps, false);
+		hpdata_purge_list_remove(&psset->to_purge, ps);
+	}
+	if (hpdata_in_psset_purge_container_get(ps)) {
+		hpdata_in_psset_purge_container_set(ps, false);
+		hpdata_purge_list_remove(&psset->to_purge, ps);
+	}
 }
diff --git a/test/unit/hpdata.c b/test/unit/hpdata.c
index 688911a6..2a702338 100644
--- a/test/unit/hpdata.c
+++ b/test/unit/hpdata.c
@@ -67,13 +67,9 @@ TEST_BEGIN(test_purge_simple) {
 
 	expect_zu_eq(hpdata_ntouched_get(&hpdata), HUGEPAGE_PAGES / 2, "");
 
-	expect_false(hpdata_changing_state_get(&hpdata), "");
-
 	hpdata_purge_state_t purge_state;
 	hpdata_purge_begin(&hpdata, &purge_state);
 
-	expect_true(hpdata_changing_state_get(&hpdata), "");
-
 	void *purge_addr;
 	size_t purge_size;
 	bool got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
@@ -82,17 +78,12 @@ TEST_BEGIN(test_purge_simple) {
 	expect_ptr_eq(HPDATA_ADDR, purge_addr, "");
 	expect_zu_eq(HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
 
-	expect_true(hpdata_changing_state_get(&hpdata), "");
-
 	got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
 	    &purge_size);
 	expect_false(got_result, "Unexpected additional purge range: "
 	    "extent at %p of size %zu", purge_addr, purge_size);
 
-	expect_true(hpdata_changing_state_get(&hpdata), "");
-
 	hpdata_purge_end(&hpdata, &purge_state);
-	expect_false(hpdata_changing_state_get(&hpdata), "");
 	expect_zu_eq(hpdata_ntouched_get(&hpdata), HUGEPAGE_PAGES / 4, "");
 }
 TEST_END
@@ -166,12 +157,7 @@ TEST_BEGIN(test_hugify) {
 
 	expect_zu_eq(HUGEPAGE_PAGES / 2, hpdata_ntouched_get(&hpdata), "");
 
-	expect_false(hpdata_changing_state_get(&hpdata), "");
-	hpdata_hugify_begin(&hpdata);
-	expect_true(hpdata_changing_state_get(&hpdata), "");
-
-	hpdata_hugify_end(&hpdata);
-	expect_false(hpdata_changing_state_get(&hpdata), "");
+	hpdata_hugify(&hpdata);
 
 	/* Hugeifying should have increased the dirty page count. */
 	expect_zu_eq(HUGEPAGE_PAGES, hpdata_ntouched_get(&hpdata), "");

From 9fd9c876bb99acc957f8ec411837138a9b588a1e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sun, 6 Dec 2020 12:49:03 -0800
Subject: [PATCH 2010/2608] psset: keep aggregate stats.

This will let us quickly query these stats to make purging decisions quickly.
---
 include/jemalloc/internal/psset.h | 24 ++++++++++++--
 src/psset.c                       | 52 ++++++++++++++++++++++++-------
 test/unit/psset.c                 |  1 +
 3 files changed, 62 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index 6e08e8ba..d2a8b24a 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -8,9 +8,6 @@
  * a collection of page-slabs (the intent being that they are backed by
  * hugepages, or at least could be), and handles allocation and deallocation
  * requests.
- *
- * It has the same synchronization guarantees as the eset; stats queries don't
- * need any external synchronization, everything else does.
  */
 
 /*
@@ -60,6 +57,12 @@ struct psset_s {
 	 */
 	hpdata_age_heap_t pageslabs[PSSET_NPSIZES];
 	bitmap_t bitmap[BITMAP_GROUPS(PSSET_NPSIZES)];
+	/*
+	 * The sum of all bin stats in stats.  This lets us quickly answer
+	 * queries for the number of dirty, active, and retained pages in the
+	 * entire set.
+	 */
+	psset_bin_stats_t merged_stats;
 	psset_stats_t stats;
 	/*
 	 * Slabs with no active allocations, but which are allowed to serve new
@@ -92,4 +95,19 @@ hpdata_t *psset_pick_hugify(psset_t *psset);
 void psset_insert(psset_t *psset, hpdata_t *ps);
 void psset_remove(psset_t *psset, hpdata_t *ps);
 
+static inline size_t
+psset_npageslabs(psset_t *psset) {
+	return psset->merged_stats.npageslabs;
+}
+
+static inline size_t
+psset_nactive(psset_t *psset) {
+	return psset->merged_stats.nactive;
+}
+
+static inline size_t
+psset_ndirty(psset_t *psset) {
+	return psset->merged_stats.ndirty;
+}
+
 #endif /* JEMALLOC_INTERNAL_PSSET_H */
diff --git a/src/psset.c b/src/psset.c
index bb51e21e..66d37397 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -14,6 +14,7 @@ psset_init(psset_t *psset) {
 		hpdata_age_heap_new(&psset->pageslabs[i]);
 	}
 	bitmap_init(psset->bitmap, &psset_bitmap_info, /* fill */ true);
+	memset(&psset->merged_stats, 0, sizeof(psset->merged_stats));
 	memset(&psset->stats, 0, sizeof(psset->stats));
 	hpdata_empty_list_init(&psset->empty);
 	hpdata_purge_list_init(&psset->to_purge);
@@ -52,23 +53,48 @@ psset_stats_accum(psset_stats_t *dst, psset_stats_t *src) {
  * ensure we don't miss any heap modification operations.
  */
 JEMALLOC_ALWAYS_INLINE void
-psset_bin_stats_insert_remove(psset_bin_stats_t *binstats, hpdata_t *ps,
-    bool insert) {
+psset_bin_stats_insert_remove(psset_t *psset, psset_bin_stats_t *binstats,
+    hpdata_t *ps, bool insert) {
 	size_t mul = insert ? (size_t)1 : (size_t)-1;
 	size_t huge_idx = (size_t)hpdata_huge_get(ps);
+
 	binstats[huge_idx].npageslabs += mul * 1;
 	binstats[huge_idx].nactive += mul * hpdata_nactive_get(ps);
 	binstats[huge_idx].ndirty += mul * hpdata_ndirty_get(ps);
+
+	psset->merged_stats.npageslabs += mul * 1;
+	psset->merged_stats.nactive += mul * hpdata_nactive_get(ps);
+	psset->merged_stats.ndirty += mul * hpdata_ndirty_get(ps);
+
+	if (config_debug) {
+		psset_bin_stats_t check_stats = {0};
+		for (size_t huge = 0; huge <= 1; huge++) {
+			psset_bin_stats_accum(&check_stats,
+			    &psset->stats.full_slabs[huge]);
+			psset_bin_stats_accum(&check_stats,
+			    &psset->stats.empty_slabs[huge]);
+			for (pszind_t pind = 0; pind < PSSET_NPSIZES; pind++) {
+				psset_bin_stats_accum(&check_stats,
+				    &psset->stats.nonfull_slabs[pind][huge]);
+			}
+		}
+		assert(psset->merged_stats.npageslabs
+		    == check_stats.npageslabs);
+		assert(psset->merged_stats.nactive == check_stats.nactive);
+		assert(psset->merged_stats.ndirty == check_stats.ndirty);
+	}
 }
 
 static void
-psset_bin_stats_insert(psset_bin_stats_t *binstats, hpdata_t *ps) {
-	psset_bin_stats_insert_remove(binstats, ps, true);
+psset_bin_stats_insert(psset_t *psset, psset_bin_stats_t *binstats,
+    hpdata_t *ps) {
+	psset_bin_stats_insert_remove(psset, binstats, ps, true);
 }
 
 static void
-psset_bin_stats_remove(psset_bin_stats_t *binstats, hpdata_t *ps) {
-	psset_bin_stats_insert_remove(binstats, ps, false);
+psset_bin_stats_remove(psset_t *psset, psset_bin_stats_t *binstats,
+    hpdata_t *ps) {
+	psset_bin_stats_insert_remove(psset, binstats, ps, false);
 }
 
 static void
@@ -90,9 +116,9 @@ psset_hpdata_heap_insert(psset_t *psset, pszind_t pind, hpdata_t *ps) {
 static void
 psset_stats_insert(psset_t* psset, hpdata_t *ps) {
 	if (hpdata_empty(ps)) {
-		psset_bin_stats_insert(psset->stats.empty_slabs, ps);
+		psset_bin_stats_insert(psset, psset->stats.empty_slabs, ps);
 	} else if (hpdata_full(ps)) {
-		psset_bin_stats_insert(psset->stats.full_slabs, ps);
+		psset_bin_stats_insert(psset, psset->stats.full_slabs, ps);
 	} else {
 		size_t longest_free_range = hpdata_longest_free_range_get(ps);
 
@@ -100,16 +126,17 @@ psset_stats_insert(psset_t* psset, hpdata_t *ps) {
 		    longest_free_range << LG_PAGE));
 		assert(pind < PSSET_NPSIZES);
 
-		psset_bin_stats_insert(psset->stats.nonfull_slabs[pind], ps);
+		psset_bin_stats_insert(psset, psset->stats.nonfull_slabs[pind],
+		    ps);
 	}
 }
 
 static void
 psset_stats_remove(psset_t *psset, hpdata_t *ps) {
 	if (hpdata_empty(ps)) {
-		psset_bin_stats_remove(psset->stats.empty_slabs, ps);
+		psset_bin_stats_remove(psset, psset->stats.empty_slabs, ps);
 	} else if (hpdata_full(ps)) {
-		psset_bin_stats_remove(psset->stats.full_slabs, ps);
+		psset_bin_stats_remove(psset, psset->stats.full_slabs, ps);
 	} else {
 		size_t longest_free_range = hpdata_longest_free_range_get(ps);
 
@@ -117,7 +144,8 @@ psset_stats_remove(psset_t *psset, hpdata_t *ps) {
 		    longest_free_range << LG_PAGE));
 		assert(pind < PSSET_NPSIZES);
 
-		psset_bin_stats_remove(psset->stats.nonfull_slabs[pind], ps);
+		psset_bin_stats_remove(psset, psset->stats.nonfull_slabs[pind],
+		    ps);
 	}
 }
 
diff --git a/test/unit/psset.c b/test/unit/psset.c
index f5e1bad5..b93dfbfe 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -374,6 +374,7 @@ stats_expect(psset_t *psset, size_t nactive) {
 			stats_expect_empty(&psset->stats.nonfull_slabs[i][0]);
 		}
 	}
+	expect_zu_eq(nactive, psset_nactive(psset), "");
 }
 
 TEST_BEGIN(test_stats) {

From dc886e5608d553ff2b8f2538cb8d6595bc90e9ac Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sun, 6 Dec 2020 13:16:51 -0800
Subject: [PATCH 2011/2608] hpdata: Return the number of pages to be purged.

We'll use this in the next commit.
---
 include/jemalloc/internal/hpdata.h | 5 ++++-
 src/hpdata.c                       | 7 +++++--
 test/unit/hpdata.c                 | 6 ++++--
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index feca5f5e..30dd6721 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -363,8 +363,11 @@ struct hpdata_purge_state_s {
  * Once you begin purging, you have to follow through and call hpdata_purge_next
  * until you're done, and then end.  Allocating out of an hpdata undergoing
  * purging is not allowed.
+ *
+ * Returns the number of pages that will be purged.
  */
-void hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state);
+size_t hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state);
+
 /*
  * If there are more extents to purge, sets *r_purge_addr and *r_purge_size to
  * true, and returns true.  Otherwise, returns false to indicate that we're
diff --git a/src/hpdata.c b/src/hpdata.c
index bb4808aa..e11ba8d9 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -158,7 +158,7 @@ hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
 	hpdata_assert_consistent(hpdata);
 }
 
-void
+size_t
 hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 	hpdata_assert_consistent(hpdata);
 	/* See the comment in reserve. */
@@ -181,10 +181,13 @@ hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 	    hpdata->touched_pages, HUGEPAGE_PAGES);
 
 	/* We purge everything we can. */
-	assert(hpdata->h_ntouched - hpdata->h_nactive == fb_scount(
+	size_t to_purge = hpdata->h_ntouched - hpdata->h_nactive;
+	assert(to_purge == fb_scount(
 	    purge_state->to_purge, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
 
 	hpdata_assert_consistent(hpdata);
+
+	return to_purge;
 }
 
 bool
diff --git a/test/unit/hpdata.c b/test/unit/hpdata.c
index 2a702338..11bccc58 100644
--- a/test/unit/hpdata.c
+++ b/test/unit/hpdata.c
@@ -68,7 +68,8 @@ TEST_BEGIN(test_purge_simple) {
 	expect_zu_eq(hpdata_ntouched_get(&hpdata), HUGEPAGE_PAGES / 2, "");
 
 	hpdata_purge_state_t purge_state;
-	hpdata_purge_begin(&hpdata, &purge_state);
+	size_t to_purge = hpdata_purge_begin(&hpdata, &purge_state);
+	expect_zu_eq(HUGEPAGE_PAGES / 4, to_purge, "");
 
 	void *purge_addr;
 	size_t purge_size;
@@ -112,7 +113,8 @@ TEST_BEGIN(test_purge_intervening_dalloc) {
 	expect_zu_eq(hpdata_ntouched_get(&hpdata), 3 * HUGEPAGE_PAGES / 4, "");
 
 	hpdata_purge_state_t purge_state;
-	hpdata_purge_begin(&hpdata, &purge_state);
+	size_t to_purge = hpdata_purge_begin(&hpdata, &purge_state);
+	expect_zu_eq(HUGEPAGE_PAGES / 2, to_purge, "");
 
 	void *purge_addr;
 	size_t purge_size;

From 56e85c0e47f0a4a19cc0f6c71771ece69ef10080 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sun, 6 Dec 2020 13:48:46 -0800
Subject: [PATCH 2012/2608] HPA: Use a whole-shard purging heuristic.

Previously, we used only hpdata-local information to decide whether to purge.
---
 include/jemalloc/internal/hpa.h    |  6 ++
 include/jemalloc/internal/hpdata.h | 14 +++++
 src/hpa.c                          | 98 ++++++++++++++++++++----------
 src/psset.c                        |  2 +-
 4 files changed, 86 insertions(+), 34 deletions(-)

diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index f62c3278..de9cc753 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -93,6 +93,12 @@ struct hpa_shard_s {
 	unsigned ind;
 	emap_t *emap;
 
+	/*
+	 * How many pages have we started but not yet finished purging in this
+	 * hpa shard.
+	 */
+	size_t npending_purge;
+
 	/*
 	 * Those stats which are copied directly into the CTL-centric hpa shard
 	 * stats.
diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 30dd6721..e489e624 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -171,6 +171,7 @@ hpdata_purge_allowed_get(const hpdata_t *hpdata) {
 
 static inline void
 hpdata_purge_allowed_set(hpdata_t *hpdata, bool purge_allowed) {
+	assert(purge_allowed == false || !hpdata->h_mid_purge);
 	hpdata->h_purge_allowed = purge_allowed;
 }
 
@@ -192,6 +193,7 @@ hpdata_hugify_allowed_get(const hpdata_t *hpdata) {
 
 static inline void
 hpdata_hugify_allowed_set(hpdata_t *hpdata, bool hugify_allowed) {
+	assert(hugify_allowed == false || !hpdata->h_mid_hugify);
 	hpdata->h_hugify_allowed = hugify_allowed;
 }
 
@@ -313,6 +315,18 @@ hpdata_consistent(hpdata_t *hpdata) {
 	if (hpdata->h_huge && hpdata->h_ntouched != HUGEPAGE_PAGES) {
 		return false;
 	}
+	if (hpdata_changing_state_get(hpdata)
+	    && (hpdata->h_purge_allowed || hpdata->h_hugify_allowed)) {
+		return false;
+	}
+	if (hpdata_purge_allowed_get(hpdata)
+	    != hpdata_in_psset_purge_container_get(hpdata)) {
+		return false;
+	}
+	if (hpdata_hugify_allowed_get(hpdata)
+	    != hpdata_in_psset_hugify_container_get(hpdata)) {
+		return false;
+	}
 	return true;
 }
 
diff --git a/src/hpa.c b/src/hpa.c
index 5dd34c3b..cd0e803e 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -74,6 +74,8 @@ hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
 	shard->ind = ind;
 	shard->emap = emap;
 
+	shard->npending_purge = 0;
+
 	shard->stats.npurge_passes = 0;
 	shard->stats.npurges = 0;
 	shard->stats.nhugifies = 0;
@@ -141,26 +143,58 @@ hpa_good_hugification_candidate(hpa_shard_t *shard, hpdata_t *ps) {
 }
 
 static bool
-hpa_should_hugify(hpa_shard_t *shard, hpdata_t *ps) {
-	if (hpdata_changing_state_get(ps) || hpdata_huge_get(ps)) {
-		return false;
-	}
-	return hpa_good_hugification_candidate(shard, ps);
+hpa_should_purge(hpa_shard_t *shard) {
+	size_t adjusted_ndirty = psset_ndirty(&shard->psset)
+	    - shard->npending_purge;
+	/*
+	 * Another simple static check; purge whenever dirty exceeds 25% of
+	 * active.
+	 */
+	return adjusted_ndirty > psset_nactive(&shard->psset) / 4;
 }
 
-/*
- * Whether or not the given pageslab meets the criteria for being purged (and,
- * if necessary, dehugified).
- */
-static bool
-hpa_should_purge(hpa_shard_t *shard, hpdata_t *ps) {
-	/* Ditto. */
+static void
+hpa_update_purge_hugify_eligibility(hpa_shard_t *shard, hpdata_t *ps) {
 	if (hpdata_changing_state_get(ps)) {
-		return false;
+		hpdata_purge_allowed_set(ps, false);
+		hpdata_hugify_allowed_set(ps, false);
+		return;
+	}
+	/*
+	 * Hugepages are distinctly costly to purge, so do it only if they're
+	 * *particularly* full of dirty pages.  Eventually, we should use a
+	 * smarter / more dynamic heuristic for situations where we have to
+	 * manually hugify.
+	 *
+	 * In situations where we don't manually hugify, this problem is
+	 * reduced.  The "bad" situation we're trying to avoid is one's that's
+	 * common in some Linux configurations (where both enabled and defrag
+	 * are set to madvise) that can lead to long latency spikes on the first
+	 * access after a hugification.  The ideal policy in such configurations
+	 * is probably time-based for both purging and hugifying; only hugify a
+	 * hugepage if it's met the criteria for some extended period of time,
+	 * and only dehugify it if it's failed to meet the criteria for an
+	 * extended period of time.  When background threads are on, we should
+	 * try to take this hit on one of them, as well.
+	 *
+	 * I think the ideal setting is THP always enabled, and defrag set to
+	 * deferred; in that case we don't need any explicit calls on the
+	 * allocator's end at all; we just try to pack allocations in a
+	 * hugepage-friendly manner and let the OS hugify in the background.
+	 *
+	 * Anyways, our strategy to delay dehugification is to only consider
+	 * purging a hugified hugepage if it's individually dirtier than the
+	 * overall max dirty pages setting.  That setting is 1 dirty page per 4
+	 * active pages; i.e. 4/5s of hugepage pages must be active.
+	 */
+	if ((!hpdata_huge_get(ps) && hpdata_ndirty_get(ps) > 0)
+	    || hpdata_ndirty_get(ps) > HUGEPAGE_PAGES / 5) {
+		hpdata_purge_allowed_set(ps, true);
+	}
+	if (hpa_good_hugification_candidate(shard, ps)
+	    && !hpdata_huge_get(ps)) {
+		hpdata_hugify_allowed_set(ps, true);
 	}
-	size_t purgeable = hpdata_ndirty_get(ps);
-	return purgeable > HUGEPAGE_PAGES * 25 / 100
-	    || (purgeable > 0 && hpdata_empty(ps));
 }
 
 static hpdata_t *
@@ -262,7 +296,9 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	/* Gather all the metadata we'll need during the purge. */
 	bool dehugify = hpdata_huge_get(to_purge);
 	hpdata_purge_state_t purge_state;
-	hpdata_purge_begin(to_purge, &purge_state);
+	size_t num_to_purge = hpdata_purge_begin(to_purge, &purge_state);
+
+	shard->npending_purge += num_to_purge;
 
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 
@@ -284,6 +320,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	/* The shard updates */
+	shard->npending_purge -= num_to_purge;
 	shard->stats.npurge_passes++;
 	shard->stats.npurges += purges_this_pass;
 	if (dehugify) {
@@ -299,8 +336,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	hpdata_mid_purge_set(to_purge, false);
 
 	hpdata_alloc_allowed_set(to_purge, true);
-	hpdata_purge_allowed_set(to_purge, hpa_should_purge(shard, to_purge));
-	hpdata_hugify_allowed_set(to_purge, hpa_should_hugify(shard, to_purge));
+	hpa_update_purge_hugify_eligibility(shard, to_purge);
 
 	psset_update_end(&shard->psset, to_purge);
 
@@ -349,15 +385,12 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 	psset_update_begin(&shard->psset, to_hugify);
 	hpdata_hugify(to_hugify);
 	hpdata_mid_hugify_set(to_hugify, false);
-	hpdata_purge_allowed_set(to_hugify,
-	    hpa_should_purge(shard, to_hugify));
-	hpdata_hugify_allowed_set(to_hugify, false);
+	hpa_update_purge_hugify_eligibility(shard, to_hugify);
 	psset_update_end(&shard->psset, to_hugify);
 
 	return true;
 }
 
-
 static void
 hpa_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
 	bool hugified;
@@ -368,7 +401,11 @@ hpa_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
 	do {
 		malloc_mutex_assert_owner(tsdn, &shard->mtx);
 		hugified = hpa_try_hugify(tsdn, shard);
-		purged = hpa_try_purge(tsdn, shard);
+
+		purged = false;
+		if (hpa_should_purge(shard)) {
+			purged = hpa_try_purge(tsdn, shard);
+		}
 		malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	} while ((hugified || purged) && nloop++ < maxloops);
 }
@@ -441,9 +478,7 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 		return NULL;
 	}
 
-	if (hpa_should_hugify(shard, ps)) {
-		hpdata_hugify_allowed_set(ps, true);
-	}
+	hpa_update_purge_hugify_eligibility(shard, ps);
 	psset_update_end(&shard->psset, ps);
 
 	hpa_do_deferred_work(tsdn, shard);
@@ -543,9 +578,7 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 		return NULL;
 	}
-	if (hpa_should_hugify(shard, ps)) {
-		hpdata_hugify_allowed_set(ps, true);
-	}
+	hpa_update_purge_hugify_eligibility(shard, ps);
 	psset_update_end(&shard->psset, ps);
 
 	/*
@@ -653,9 +686,8 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 
 	psset_update_begin(&shard->psset, ps);
 	hpdata_unreserve(ps, unreserve_addr, unreserve_size);
-	if (hpa_should_purge(shard, ps)) {
-		hpdata_purge_allowed_set(ps, true);
-	}
+
+	hpa_update_purge_hugify_eligibility(shard, ps);
 	psset_update_end(&shard->psset, ps);
 
 	hpa_do_deferred_work(tsdn, shard);
diff --git a/src/psset.c b/src/psset.c
index 66d37397..08c9b6c5 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -225,7 +225,6 @@ psset_update_begin(psset_t *psset, hpdata_t *ps) {
 
 void
 psset_update_end(psset_t *psset, hpdata_t *ps) {
-	hpdata_assert_consistent(ps);
 	assert(hpdata_in_psset_get(ps));
 	hpdata_updating_set(ps, false);
 	psset_stats_insert(psset, ps);
@@ -258,6 +257,7 @@ psset_update_end(psset_t *psset, hpdata_t *ps) {
 		hpdata_in_psset_hugify_container_set(ps, false);
 		hpdata_hugify_list_remove(&psset->to_hugify, ps);
 	}
+	hpdata_assert_consistent(ps);
 }
 
 hpdata_t *

From caef4c2868fce6b0cc0087c20ba00a5d50b67c3a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 8 Dec 2020 13:22:59 -0800
Subject: [PATCH 2013/2608] FXP: add fxp_mul_frac.

This can multiply size_ts by a fraction without the risk of overflow.
---
 include/jemalloc/internal/fxp.h | 25 +++++++++++++++++++++++++
 test/unit/fxp.c                 | 25 +++++++++++++++++++++++++
 2 files changed, 50 insertions(+)

diff --git a/include/jemalloc/internal/fxp.h b/include/jemalloc/internal/fxp.h
index d9438090..b9803a63 100644
--- a/include/jemalloc/internal/fxp.h
+++ b/include/jemalloc/internal/fxp.h
@@ -90,6 +90,31 @@ fxp_round_nearest(fxp_t a) {
 	return (a >> 16) + increment;
 }
 
+/*
+ * Approximately computes x * frac, without the size limitations that would be
+ * imposed by converting u to an fxp_t.
+ */
+static inline size_t
+fxp_mul_frac(size_t x_orig, fxp_t frac) {
+	assert(frac <= (1U << 16));
+	/*
+	 * Work around an over-enthusiastic warning about type limits below (on
+	 * 32-bit platforms, a size_t is always less than 1ULL << 48).
+	 */
+	uint64_t x = (uint64_t)x_orig;
+	/*
+	 * If we can guarantee no overflow, multiply first before shifting, to
+	 * preserve some precision.  Otherwise, shift first and then multiply.
+	 * In the latter case, we only lose the low 16 bits of a 48-bit number,
+	 * so we're still accurate to within 1/2**32.
+	 */
+	if (x < (1ULL << 48)) {
+		return (size_t)((x * frac) >> 16);
+	} else {
+		return (size_t)((x >> 16) * (uint64_t)frac);
+	}
+}
+
 /*
  * Returns true on error.  Otherwise, returns false and updates *ptr to point to
  * the first character not parsed (because it wasn't a digit).
diff --git a/test/unit/fxp.c b/test/unit/fxp.c
index 89f0ca65..0fe5d67a 100644
--- a/test/unit/fxp.c
+++ b/test/unit/fxp.c
@@ -222,6 +222,30 @@ TEST_BEGIN(test_round_simple) {
 }
 TEST_END
 
+static void
+expect_mul_frac(size_t a, const char *fracstr, size_t expected) {
+	fxp_t frac = xparse_fxp(fracstr);
+	size_t result = fxp_mul_frac(a, frac);
+	expect_true(double_close(expected, result),
+	    "Expected %zu * %s == %zu (fracmul); got %zu", a, fracstr,
+	    expected, result);
+}
+
+TEST_BEGIN(test_mul_frac_simple) {
+	expect_mul_frac(SIZE_MAX, "1.0", SIZE_MAX);
+	expect_mul_frac(SIZE_MAX, ".75", SIZE_MAX / 4 * 3);
+	expect_mul_frac(SIZE_MAX, ".5", SIZE_MAX / 2);
+	expect_mul_frac(SIZE_MAX, ".25", SIZE_MAX / 4);
+	expect_mul_frac(1U << 16, "1.0", 1U << 16);
+	expect_mul_frac(1U << 30, "0.5", 1U << 29);
+	expect_mul_frac(1U << 30, "0.25", 1U << 28);
+	expect_mul_frac(1U << 30, "0.125", 1U << 27);
+	expect_mul_frac((1U << 30) + 1, "0.125", 1U << 27);
+	expect_mul_frac(100, "0.25", 25);
+	expect_mul_frac(1000 * 1000, "0.001", 1000);
+}
+TEST_END
+
 static void
 expect_print(const char *str) {
 	fxp_t fxp = xparse_fxp(str);
@@ -339,6 +363,7 @@ main(void) {
 	    test_mul_simple,
 	    test_div_simple,
 	    test_round_simple,
+	    test_mul_frac_simple,
 	    test_print_simple,
 	    test_stress);
 }

From bdb7307ff28cdee92861a32ecae16919cc9af614 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 8 Dec 2020 15:28:28 -0800
Subject: [PATCH 2014/2608] fxp: Add FXP_INIT_PERCENT

This lets us specify fxp values easily in source.
---
 include/jemalloc/internal/fxp.h |  1 +
 test/unit/fxp.c                 | 27 ++++++++++++++++++++++++++-
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/fxp.h b/include/jemalloc/internal/fxp.h
index b9803a63..415a9828 100644
--- a/include/jemalloc/internal/fxp.h
+++ b/include/jemalloc/internal/fxp.h
@@ -22,6 +22,7 @@
  */
 typedef uint32_t fxp_t;
 #define FXP_INIT_INT(x) ((x) << 16)
+#define FXP_INIT_PERCENT(pct) (((pct) << 16) / 100)
 
 /*
  * Amount of precision used in parsing and printing numbers.  The integer bound
diff --git a/test/unit/fxp.c b/test/unit/fxp.c
index 0fe5d67a..27f10976 100644
--- a/test/unit/fxp.c
+++ b/test/unit/fxp.c
@@ -96,7 +96,8 @@ TEST_BEGIN(test_parse_valid) {
 }
 TEST_END
 
-static void expect_parse_failure(const char *str) {
+static void
+expect_parse_failure(const char *str) {
 	fxp_t result = FXP_INIT_INT(333);
 	char *end = (void *)0x123;
 	bool err = fxp_parse(&result, str, &end);
@@ -120,6 +121,29 @@ TEST_BEGIN(test_parse_invalid) {
 }
 TEST_END
 
+static void
+expect_init_percent(unsigned percent, const char *str) {
+	fxp_t result_init = FXP_INIT_PERCENT(percent);
+	fxp_t result_parse = xparse_fxp(str);
+	expect_u32_eq(result_init, result_parse,
+	    "Expect representations of FXP_INIT_PERCENT(%u) and "
+	    "fxp_parse(\"%s\") to be equal; got %x and %x",
+	    percent, str, result_init, result_parse);
+
+}
+
+/*
+ * Every other test uses either parsing or FXP_INIT_INT; it gets tested in those
+ * ways.  We need a one-off for the percent-based initialization, though.
+ */
+TEST_BEGIN(test_init_percent) {
+	expect_init_percent(100, "1");
+	expect_init_percent(75, ".75");
+	expect_init_percent(1, ".01");
+	expect_init_percent(50, ".5");
+}
+TEST_END
+
 static void
 expect_add(const char *astr, const char *bstr, const char* resultstr) {
 	fxp_t a = xparse_fxp(astr);
@@ -358,6 +382,7 @@ main(void) {
 	return test_no_reentrancy(
 	    test_parse_valid,
 	    test_parse_invalid,
+	    test_init_percent,
 	    test_add_simple,
 	    test_sub_simple,
 	    test_mul_simple,

From b3df80bc797f1578b0f51a6919e18049663ffae1 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 8 Dec 2020 16:33:39 -0800
Subject: [PATCH 2015/2608] Pull HPA options into a containing struct.

Currently that just means max_alloc, but we're about to add more.  While we're
touching these lines anyways, tweak things to be more in line with testing.
---
 include/jemalloc/internal/hpa.h               | 15 ++++-------
 include/jemalloc/internal/hpa_opts.h          | 25 +++++++++++++++++++
 .../internal/jemalloc_internal_externs.h      |  4 +--
 include/jemalloc/internal/pa.h                |  2 +-
 src/arena.c                                   |  4 +--
 src/ctl.c                                     |  2 +-
 src/hpa.c                                     |  9 ++++---
 src/jemalloc.c                                |  8 +++---
 src/pa.c                                      |  6 ++---
 test/unit/hpa.c                               |  5 +++-
 10 files changed, 52 insertions(+), 28 deletions(-)
 create mode 100644 include/jemalloc/internal/hpa_opts.h

diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index de9cc753..778d1c92 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -2,7 +2,7 @@
 #define JEMALLOC_INTERNAL_HPA_H
 
 #include "jemalloc/internal/exp_grow.h"
-#include "jemalloc/internal/hpa_central.h"
+#include "jemalloc/internal/hpa_opts.h"
 #include "jemalloc/internal/pai.h"
 #include "jemalloc/internal/psset.h"
 
@@ -64,14 +64,6 @@ struct hpa_shard_s {
 
 	psset_t psset;
 
-	/*
-	 * The largest size we'll allocate out of the shard.  For those
-	 * allocations refused, the caller (in practice, the PA module) will
-	 * fall back to the more general (for now) PAC, which can always handle
-	 * any allocation request.
-	 */
-	size_t alloc_max;
-
 	/*
 	 * How many grow operations have occurred.
 	 *
@@ -93,6 +85,9 @@ struct hpa_shard_s {
 	unsigned ind;
 	emap_t *emap;
 
+	/* The configuration choices for this hpa shard. */
+	hpa_shard_opts_t opts;
+
 	/*
 	 * How many pages have we started but not yet finished purging in this
 	 * hpa shard.
@@ -113,7 +108,7 @@ struct hpa_shard_s {
  */
 bool hpa_supported();
 bool hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
-    edata_cache_t *edata_cache, unsigned ind, size_t alloc_max);
+    edata_cache_t *edata_cache, unsigned ind, const hpa_shard_opts_t *opts);
 
 void hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src);
 void hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard,
diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h
new file mode 100644
index 00000000..95e86b46
--- /dev/null
+++ b/include/jemalloc/internal/hpa_opts.h
@@ -0,0 +1,25 @@
+#ifndef JEMALLOC_INTERNAL_HPA_OPTS_H
+#define JEMALLOC_INTERNAL_HPA_OPTS_H
+
+/*
+ * This file is morally part of hpa.h, but is split out for header-ordering
+ * reasons.
+ */
+
+typedef struct hpa_shard_opts_s hpa_shard_opts_t;
+struct hpa_shard_opts_s {
+	/*
+	 * The largest size we'll allocate out of the shard.  For those
+	 * allocations refused, the caller (in practice, the PA module) will
+	 * fall back to the more general (for now) PAC, which can always handle
+	 * any allocation request.
+	 */
+	size_t slab_max_alloc;
+};
+
+#define HPA_SHARD_OPTS_DEFAULT {					\
+	/* slab_max_alloc */						\
+	64 * 1024							\
+}
+
+#endif /* JEMALLOC_INTERNAL_HPA_OPTS_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index c78db06e..166c91d0 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_EXTERNS_H
 
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/hpa_opts.h"
 #include "jemalloc/internal/tsd_types.h"
 #include "jemalloc/internal/nstime.h"
 
@@ -14,8 +15,7 @@ extern bool opt_abort_conf;
 extern bool opt_trust_madvise;
 extern bool opt_confirm_conf;
 extern bool opt_hpa;
-extern size_t opt_hpa_slab_max_alloc;
-
+extern hpa_shard_opts_t opt_hpa_opts;
 extern size_t opt_hpa_sec_max_alloc;
 extern size_t opt_hpa_sec_max_bytes;
 extern size_t opt_hpa_sec_nshards;
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index b9030226..6ded54f8 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -130,7 +130,7 @@ bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
  * This isn't exposed to users; we allow late enablement of the HPA shard so
  * that we can boot without worrying about the HPA, then turn it on in a0.
  */
-bool pa_shard_enable_hpa(pa_shard_t *shard, size_t alloc_max,
+bool pa_shard_enable_hpa(pa_shard_t *shard, const hpa_shard_opts_t *hpa_opts,
     size_t sec_nshards, size_t sec_alloc_max, size_t sec_bytes_max);
 /*
  * We stop using the HPA when custom extent hooks are installed, but still
diff --git a/src/arena.c b/src/arena.c
index 3448160f..da0f1f02 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1480,8 +1480,8 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 */
 	if (opt_hpa && ehooks_are_default(base_ehooks_get(base)) && ind != 0) {
 		if (pa_shard_enable_hpa(&arena->pa_shard,
-		    opt_hpa_slab_max_alloc, opt_hpa_sec_nshards,
-		    opt_hpa_sec_max_alloc, opt_hpa_sec_max_bytes)) {
+		    &opt_hpa_opts, opt_hpa_sec_nshards, opt_hpa_sec_max_alloc,
+		    opt_hpa_sec_max_bytes)) {
 			goto label_error;
 		}
 	}
diff --git a/src/ctl.c b/src/ctl.c
index feefa687..195a46e9 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2090,7 +2090,7 @@ CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
 CTL_RO_NL_GEN(opt_trust_madvise, opt_trust_madvise, bool)
 CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
 CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool)
-CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_slab_max_alloc, size_t)
+CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_opts.slab_max_alloc, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_max_alloc, opt_hpa_sec_max_alloc, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_max_bytes, opt_hpa_sec_max_bytes, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_nshards, opt_hpa_sec_nshards, size_t)
diff --git a/src/hpa.c b/src/hpa.c
index cd0e803e..dd9be5ad 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -48,7 +48,7 @@ hpa_supported() {
 
 bool
 hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
-    edata_cache_t *edata_cache, unsigned ind, size_t alloc_max) {
+    edata_cache_t *edata_cache, unsigned ind, const hpa_shard_opts_t *opts) {
 	/* malloc_conf processing should have filtered out these cases. */
 	assert(hpa_supported());
 	bool err;
@@ -67,13 +67,14 @@ hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
 	shard->base = base;
 	edata_cache_small_init(&shard->ecs, edata_cache);
 	psset_init(&shard->psset);
-	shard->alloc_max = alloc_max;
 	shard->age_counter = 0;
 	shard->eden = NULL;
 	shard->eden_len = 0;
 	shard->ind = ind;
 	shard->emap = emap;
 
+	shard->opts = *opts;
+
 	shard->npending_purge = 0;
 
 	shard->stats.npurge_passes = 0;
@@ -489,7 +490,7 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 
 static edata_t *
 hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
-	assert(size <= shard->alloc_max);
+	assert(size <= shard->opts.slab_max_alloc);
 	bool err;
 	bool oom;
 	edata_t *edata;
@@ -614,7 +615,7 @@ hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
 	if (alignment > PAGE || zero) {
 		return NULL;
 	}
-	if (size > shard->alloc_max) {
+	if (size > shard->opts.slab_max_alloc) {
 		return NULL;
 	}
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index ca8a7deb..d1b09dd2 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -144,7 +144,7 @@ malloc_mutex_t arenas_lock;
 
 /* The global hpa, and whether it's on. */
 bool opt_hpa = false;
-size_t opt_hpa_slab_max_alloc = 256 * 1024;
+hpa_shard_opts_t opt_hpa_opts = HPA_SHARD_OPTS_DEFAULT;
 
 size_t opt_hpa_sec_max_alloc = 32 * 1024;
 /* These settings correspond to a maximum of 1MB cached per arena. */
@@ -1410,8 +1410,8 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 					   CONF_CHECK_MIN, CONF_CHECK_MAX,
 					   true);
 			CONF_HANDLE_BOOL(opt_hpa, "hpa")
-			CONF_HANDLE_SIZE_T(opt_hpa_slab_max_alloc,
-			    "hpa_slab_max_alloc", PAGE, 512 * PAGE,
+			CONF_HANDLE_SIZE_T(opt_hpa_opts.slab_max_alloc,
+			    "hpa_slab_max_alloc", PAGE, HUGEPAGE,
 			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
 
 			CONF_HANDLE_SIZE_T(opt_hpa_sec_max_alloc, "hpa_sec_max_alloc",
@@ -1717,7 +1717,7 @@ malloc_init_hard_a0_locked() {
 			opt_hpa = false;
 		}
 	} else if (opt_hpa) {
-		if (pa_shard_enable_hpa(&a0->pa_shard, opt_hpa_slab_max_alloc,
+		if (pa_shard_enable_hpa(&a0->pa_shard, &opt_hpa_opts,
 		    opt_hpa_sec_nshards, opt_hpa_sec_max_alloc,
 		    opt_hpa_sec_max_bytes)) {
 			return true;
diff --git a/src/pa.c b/src/pa.c
index da64b829..abe3f00b 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -49,10 +49,10 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 }
 
 bool
-pa_shard_enable_hpa(pa_shard_t *shard, size_t alloc_max, size_t sec_nshards,
-    size_t sec_alloc_max, size_t sec_bytes_max) {
+pa_shard_enable_hpa(pa_shard_t *shard, const hpa_shard_opts_t *hpa_opts,
+    size_t sec_nshards, size_t sec_alloc_max, size_t sec_bytes_max) {
 	if (hpa_shard_init(&shard->hpa_shard, shard->emap, shard->base,
-	    &shard->edata_cache, shard->ind, alloc_max)) {
+	    &shard->edata_cache, shard->ind, hpa_opts)) {
 		return true;
 	}
 	if (sec_init(&shard->hpa_sec, &shard->hpa_shard.pai, sec_nshards,
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 90ec89e4..924795f6 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -37,9 +37,12 @@ create_test_data() {
 	err = emap_init(&test_data->emap, test_data->base, /* zeroed */ false);
 	assert_false(err, "");
 
+	hpa_shard_opts_t opts = HPA_SHARD_OPTS_DEFAULT;
+	opts.slab_max_alloc = ALLOC_MAX;
+
 	err = hpa_shard_init(&test_data->shard, &test_data->emap,
 	    test_data->base, &test_data->shard_edata_cache, SHARD_IND,
-	    ALLOC_MAX);
+	    &opts);
 	assert_false(err, "");
 
 	return (hpa_shard_t *)test_data;

From 4790db15ed2bc751f1b96404358a42bd50c8a461 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 9 Dec 2020 13:52:29 -0800
Subject: [PATCH 2016/2608] HPA: make the hugification threshold configurable.

---
 include/jemalloc/internal/hpa_opts.h |  9 ++++++++-
 src/ctl.c                            |  5 +++++
 src/hpa.c                            |  8 +++++---
 src/jemalloc.c                       | 23 +++++++++++++++++++++++
 src/stats.c                          |  1 +
 5 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h
index 95e86b46..bce0de2c 100644
--- a/include/jemalloc/internal/hpa_opts.h
+++ b/include/jemalloc/internal/hpa_opts.h
@@ -15,11 +15,18 @@ struct hpa_shard_opts_s {
 	 * any allocation request.
 	 */
 	size_t slab_max_alloc;
+	/*
+	 * When the number of active bytes in a hugepage is >=
+	 * hugification_threshold, we force hugify it.
+	 */
+	size_t hugification_threshold;
 };
 
 #define HPA_SHARD_OPTS_DEFAULT {					\
 	/* slab_max_alloc */						\
-	64 * 1024							\
+	64 * 1024,							\
+	/* hugification_threshold */					\
+	HUGEPAGE * 95 / 100,						\
 }
 
 #endif /* JEMALLOC_INTERNAL_HPA_OPTS_H */
diff --git a/src/ctl.c b/src/ctl.c
index 195a46e9..5096162c 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -94,6 +94,7 @@ CTL_PROTO(opt_trust_madvise)
 CTL_PROTO(opt_confirm_conf)
 CTL_PROTO(opt_hpa)
 CTL_PROTO(opt_hpa_slab_max_alloc)
+CTL_PROTO(opt_hpa_hugification_threshold)
 CTL_PROTO(opt_hpa_sec_max_alloc)
 CTL_PROTO(opt_hpa_sec_max_bytes)
 CTL_PROTO(opt_hpa_sec_nshards)
@@ -396,6 +397,8 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("confirm_conf"),	CTL(opt_confirm_conf)},
 	{NAME("hpa"),		CTL(opt_hpa)},
 	{NAME("hpa_slab_max_alloc"),	CTL(opt_hpa_slab_max_alloc)},
+	{NAME("hpa_hugification_threshold"),
+		CTL(opt_hpa_hugification_threshold)},
 	{NAME("hpa_sec_max_alloc"),	CTL(opt_hpa_sec_max_alloc)},
 	{NAME("hpa_sec_max_bytes"),	CTL(opt_hpa_sec_max_bytes)},
 	{NAME("hpa_sec_nshards"),	CTL(opt_hpa_sec_nshards)},
@@ -2091,6 +2094,8 @@ CTL_RO_NL_GEN(opt_trust_madvise, opt_trust_madvise, bool)
 CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
 CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool)
 CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_opts.slab_max_alloc, size_t)
+CTL_RO_NL_GEN(opt_hpa_hugification_threshold,
+    opt_hpa_opts.hugification_threshold, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_max_alloc, opt_hpa_sec_max_alloc, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_max_bytes, opt_hpa_sec_max_bytes, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_nshards, opt_hpa_sec_nshards, size_t)
diff --git a/src/hpa.c b/src/hpa.c
index dd9be5ad..00fb279d 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -137,10 +137,12 @@ hpa_alloc_ps(tsdn_t *tsdn, hpa_shard_t *shard) {
 static bool
 hpa_good_hugification_candidate(hpa_shard_t *shard, hpdata_t *ps) {
 	/*
-	 * For now, just use a static check; hugify a page if it's <= 5%
-	 * inactive.  Eventually, this should be a malloc conf option.
+	 * Note that this needs to be >= rather than just >, because of the
+	 * important special case in which the hugification threshold is exactly
+	 * HUGEPAGE.
 	 */
-	return hpdata_nactive_get(ps) >= (HUGEPAGE_PAGES) * 95 / 100;
+	return hpdata_nactive_get(ps) * PAGE
+	    >= shard->opts.hugification_threshold;
 }
 
 static bool
diff --git a/src/jemalloc.c b/src/jemalloc.c
index d1b09dd2..cd402621 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1414,6 +1414,29 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    "hpa_slab_max_alloc", PAGE, HUGEPAGE,
 			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
 
+			/*
+			 * Accept either a ratio-based or an exact hugification
+			 * threshold.
+			 */
+			CONF_HANDLE_SIZE_T(opt_hpa_opts.hugification_threshold,
+			    "hpa_hugification_threshold", PAGE, HUGEPAGE,
+			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
+			if (CONF_MATCH("hpa_hugification_threshold_ratio")) {
+				fxp_t ratio;
+				char *end;
+				bool err = fxp_parse(&ratio, v,
+				    &end);
+				if (err || (size_t)(end - v) != vlen
+				    || ratio > FXP_INIT_INT(1)) {
+					CONF_ERROR("Invalid conf value",
+					    k, klen, v, vlen);
+				} else {
+					opt_hpa_opts.hugification_threshold =
+					    fxp_mul_frac(HUGEPAGE, ratio);
+				}
+				CONF_CONTINUE;
+			}
+
 			CONF_HANDLE_SIZE_T(opt_hpa_sec_max_alloc, "hpa_sec_max_alloc",
 			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
 			CONF_HANDLE_SIZE_T(opt_hpa_sec_max_bytes, "hpa_sec_max_bytes",
diff --git a/src/stats.c b/src/stats.c
index 8e29656e..27fe5b76 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1464,6 +1464,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_SIZE_T("oversize_threshold")
 	OPT_WRITE_BOOL("hpa")
 	OPT_WRITE_SIZE_T("hpa_slab_max_alloc")
+	OPT_WRITE_SIZE_T("hpa_hugification_threshold")
 	OPT_WRITE_SIZE_T("hpa_sec_max_alloc")
 	OPT_WRITE_SIZE_T("hpa_sec_max_bytes")
 	OPT_WRITE_SIZE_T("hpa_sec_nshards")

From 32dd15379696429dc1807c3c05fe125428a6faac Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 9 Dec 2020 14:42:05 -0800
Subject: [PATCH 2017/2608] HPA: Make dehugification threshold configurable.

---
 include/jemalloc/internal/hpa_opts.h |  7 +++++++
 src/ctl.c                            |  5 +++++
 src/hpa.c                            |  4 +++-
 src/jemalloc.c                       | 21 +++++++++++++++++++++
 src/stats.c                          |  1 +
 5 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h
index bce0de2c..0ed1c417 100644
--- a/include/jemalloc/internal/hpa_opts.h
+++ b/include/jemalloc/internal/hpa_opts.h
@@ -20,6 +20,11 @@ struct hpa_shard_opts_s {
 	 * hugification_threshold, we force hugify it.
 	 */
 	size_t hugification_threshold;
+	/*
+	 * When the number of dirty bytes in a hugepage is >=
+	 * dehugification_threshold, we force dehugify it.
+	 */
+	size_t dehugification_threshold;
 };
 
 #define HPA_SHARD_OPTS_DEFAULT {					\
@@ -27,6 +32,8 @@ struct hpa_shard_opts_s {
 	64 * 1024,							\
 	/* hugification_threshold */					\
 	HUGEPAGE * 95 / 100,						\
+	/* dehugification_threshold */					\
+	HUGEPAGE * 20 / 100						\
 }
 
 #endif /* JEMALLOC_INTERNAL_HPA_OPTS_H */
diff --git a/src/ctl.c b/src/ctl.c
index 5096162c..ba667b5b 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -95,6 +95,7 @@ CTL_PROTO(opt_confirm_conf)
 CTL_PROTO(opt_hpa)
 CTL_PROTO(opt_hpa_slab_max_alloc)
 CTL_PROTO(opt_hpa_hugification_threshold)
+CTL_PROTO(opt_hpa_dehugification_threshold)
 CTL_PROTO(opt_hpa_sec_max_alloc)
 CTL_PROTO(opt_hpa_sec_max_bytes)
 CTL_PROTO(opt_hpa_sec_nshards)
@@ -399,6 +400,8 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("hpa_slab_max_alloc"),	CTL(opt_hpa_slab_max_alloc)},
 	{NAME("hpa_hugification_threshold"),
 		CTL(opt_hpa_hugification_threshold)},
+	{NAME("hpa_dehugification_threshold"),
+		CTL(opt_hpa_dehugification_threshold)},
 	{NAME("hpa_sec_max_alloc"),	CTL(opt_hpa_sec_max_alloc)},
 	{NAME("hpa_sec_max_bytes"),	CTL(opt_hpa_sec_max_bytes)},
 	{NAME("hpa_sec_nshards"),	CTL(opt_hpa_sec_nshards)},
@@ -2096,6 +2099,8 @@ CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool)
 CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_opts.slab_max_alloc, size_t)
 CTL_RO_NL_GEN(opt_hpa_hugification_threshold,
     opt_hpa_opts.hugification_threshold, size_t)
+CTL_RO_NL_GEN(opt_hpa_dehugification_threshold,
+    opt_hpa_opts.dehugification_threshold, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_max_alloc, opt_hpa_sec_max_alloc, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_max_bytes, opt_hpa_sec_max_bytes, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_nshards, opt_hpa_sec_nshards, size_t)
diff --git a/src/hpa.c b/src/hpa.c
index 00fb279d..0e704b8c 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -191,7 +191,9 @@ hpa_update_purge_hugify_eligibility(hpa_shard_t *shard, hpdata_t *ps) {
 	 * active pages; i.e. 4/5s of hugepage pages must be active.
 	 */
 	if ((!hpdata_huge_get(ps) && hpdata_ndirty_get(ps) > 0)
-	    || hpdata_ndirty_get(ps) > HUGEPAGE_PAGES / 5) {
+	    || (hpdata_ndirty_get(ps) != 0
+	    && hpdata_ndirty_get(ps) * PAGE
+	    >= shard->opts.dehugification_threshold)) {
 		hpdata_purge_allowed_set(ps, true);
 	}
 	if (hpa_good_hugification_candidate(shard, ps)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index cd402621..fe8e09e6 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1437,6 +1437,27 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				CONF_CONTINUE;
 			}
 
+			/* And the same for the dehugification_threhsold. */
+			CONF_HANDLE_SIZE_T(
+			    opt_hpa_opts.dehugification_threshold,
+			    "hpa_dehugification_threshold", PAGE, HUGEPAGE,
+			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
+			if (CONF_MATCH("hpa_dehugification_threshold_ratio")) {
+				fxp_t ratio;
+				char *end;
+				bool err = fxp_parse(&ratio, v,
+				    &end);
+				if (err || (size_t)(end - v) != vlen
+				    || ratio > FXP_INIT_INT(1)) {
+					CONF_ERROR("Invalid conf value",
+					    k, klen, v, vlen);
+				} else {
+					opt_hpa_opts.dehugification_threshold =
+					    fxp_mul_frac(HUGEPAGE, ratio);
+				}
+				CONF_CONTINUE;
+			}
+
 			CONF_HANDLE_SIZE_T(opt_hpa_sec_max_alloc, "hpa_sec_max_alloc",
 			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
 			CONF_HANDLE_SIZE_T(opt_hpa_sec_max_bytes, "hpa_sec_max_bytes",
diff --git a/src/stats.c b/src/stats.c
index 27fe5b76..7a0f20bf 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1465,6 +1465,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_BOOL("hpa")
 	OPT_WRITE_SIZE_T("hpa_slab_max_alloc")
 	OPT_WRITE_SIZE_T("hpa_hugification_threshold")
+	OPT_WRITE_SIZE_T("hpa_dehugification_threshold")
 	OPT_WRITE_SIZE_T("hpa_sec_max_alloc")
 	OPT_WRITE_SIZE_T("hpa_sec_max_bytes")
 	OPT_WRITE_SIZE_T("hpa_sec_nshards")

From 79f81a3732c434e9b648561bf8ab6ab6bf74385a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 9 Dec 2020 15:55:17 -0800
Subject: [PATCH 2018/2608] HPA: Make dirty_mult configurable.

---
 include/jemalloc/internal/hpa_opts.h | 11 ++++++++++-
 src/ctl.c                            |  7 +++++++
 src/hpa.c                            |  7 ++++++-
 src/jemalloc.c                       | 18 ++++++++++++++++++
 src/stats.c                          | 21 ++++++++++++++++++++-
 5 files changed, 61 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h
index 0ed1c417..5ff00725 100644
--- a/include/jemalloc/internal/hpa_opts.h
+++ b/include/jemalloc/internal/hpa_opts.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_HPA_OPTS_H
 #define JEMALLOC_INTERNAL_HPA_OPTS_H
 
+#include "jemalloc/internal/fxp.h"
+
 /*
  * This file is morally part of hpa.h, but is split out for header-ordering
  * reasons.
@@ -25,6 +27,11 @@ struct hpa_shard_opts_s {
 	 * dehugification_threshold, we force dehugify it.
 	 */
 	size_t dehugification_threshold;
+	/*
+	 * The HPA purges whenever the number of pages exceeds dirty_mult *
+	 * active_pages.  This may be set to (fxp_t)-1 to disable purging.
+	 */
+	fxp_t dirty_mult;
 };
 
 #define HPA_SHARD_OPTS_DEFAULT {					\
@@ -33,7 +40,9 @@ struct hpa_shard_opts_s {
 	/* hugification_threshold */					\
 	HUGEPAGE * 95 / 100,						\
 	/* dehugification_threshold */					\
-	HUGEPAGE * 20 / 100						\
+	HUGEPAGE * 20 / 100,						\
+	/* dirty_mult */						\
+	FXP_INIT_PERCENT(25)						\
 }
 
 #endif /* JEMALLOC_INTERNAL_HPA_OPTS_H */
diff --git a/src/ctl.c b/src/ctl.c
index ba667b5b..1c5e32ba 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -96,6 +96,7 @@ CTL_PROTO(opt_hpa)
 CTL_PROTO(opt_hpa_slab_max_alloc)
 CTL_PROTO(opt_hpa_hugification_threshold)
 CTL_PROTO(opt_hpa_dehugification_threshold)
+CTL_PROTO(opt_hpa_dirty_mult)
 CTL_PROTO(opt_hpa_sec_max_alloc)
 CTL_PROTO(opt_hpa_sec_max_bytes)
 CTL_PROTO(opt_hpa_sec_nshards)
@@ -402,6 +403,7 @@ static const ctl_named_node_t opt_node[] = {
 		CTL(opt_hpa_hugification_threshold)},
 	{NAME("hpa_dehugification_threshold"),
 		CTL(opt_hpa_dehugification_threshold)},
+	{NAME("hpa_dirty_mult"), CTL(opt_hpa_dirty_mult)},
 	{NAME("hpa_sec_max_alloc"),	CTL(opt_hpa_sec_max_alloc)},
 	{NAME("hpa_sec_max_bytes"),	CTL(opt_hpa_sec_max_bytes)},
 	{NAME("hpa_sec_nshards"),	CTL(opt_hpa_sec_nshards)},
@@ -2101,6 +2103,11 @@ CTL_RO_NL_GEN(opt_hpa_hugification_threshold,
     opt_hpa_opts.hugification_threshold, size_t)
 CTL_RO_NL_GEN(opt_hpa_dehugification_threshold,
     opt_hpa_opts.dehugification_threshold, size_t)
+/*
+ * This will have to change before we publicly document this option; fxp_t and
+ * its representation are internal implementation details.
+ */
+CTL_RO_NL_GEN(opt_hpa_dirty_mult, opt_hpa_opts.dirty_mult, fxp_t)
 CTL_RO_NL_GEN(opt_hpa_sec_max_alloc, opt_hpa_sec_max_alloc, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_max_bytes, opt_hpa_sec_max_bytes, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_nshards, opt_hpa_sec_nshards, size_t)
diff --git a/src/hpa.c b/src/hpa.c
index 0e704b8c..3c706cbf 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -147,13 +147,18 @@ hpa_good_hugification_candidate(hpa_shard_t *shard, hpdata_t *ps) {
 
 static bool
 hpa_should_purge(hpa_shard_t *shard) {
+	if (shard->opts.dirty_mult == (fxp_t)-1) {
+		return false;
+	}
 	size_t adjusted_ndirty = psset_ndirty(&shard->psset)
 	    - shard->npending_purge;
 	/*
 	 * Another simple static check; purge whenever dirty exceeds 25% of
 	 * active.
 	 */
-	return adjusted_ndirty > psset_nactive(&shard->psset) / 4;
+	size_t max_ndirty = fxp_mul_frac(psset_nactive(&shard->psset),
+	    shard->opts.dirty_mult);
+	return adjusted_ndirty > max_ndirty;
 }
 
 static void
diff --git a/src/jemalloc.c b/src/jemalloc.c
index fe8e09e6..c2c75fa5 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1458,6 +1458,24 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				CONF_CONTINUE;
 			}
 
+			if (CONF_MATCH("hpa_dirty_mult")) {
+				if (CONF_MATCH_VALUE("-1")) {
+					opt_hpa_opts.dirty_mult = (fxp_t)-1;
+					CONF_CONTINUE;
+				}
+				fxp_t ratio;
+				char *end;
+				bool err = fxp_parse(&ratio, v,
+				    &end);
+				if (err || (size_t)(end - v) != vlen) {
+					CONF_ERROR("Invalid conf value",
+					    k, klen, v, vlen);
+				} else {
+					opt_hpa_opts.dirty_mult = ratio;
+				}
+				CONF_CONTINUE;
+			}
+
 			CONF_HANDLE_SIZE_T(opt_hpa_sec_max_alloc, "hpa_sec_max_alloc",
 			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
 			CONF_HANDLE_SIZE_T(opt_hpa_sec_max_bytes, "hpa_sec_max_bytes",
diff --git a/src/stats.c b/src/stats.c
index 7a0f20bf..1a7e6e4e 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -4,6 +4,7 @@
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/ctl.h"
 #include "jemalloc/internal/emitter.h"
+#include "jemalloc/internal/fxp.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/mutex_prof.h"
 #include "jemalloc/internal/prof_stats.h"
@@ -1375,13 +1376,14 @@ stats_general_print(emitter_t *emitter) {
 	uint64_t u64v;
 	int64_t i64v;
 	ssize_t ssv, ssv2;
-	size_t sv, bsz, usz, i64sz, ssz, sssz, cpsz;
+	size_t sv, bsz, usz, u32sz, i64sz, ssz, sssz, cpsz;
 
 	bsz = sizeof(bool);
 	usz = sizeof(unsigned);
 	ssz = sizeof(size_t);
 	sssz = sizeof(ssize_t);
 	cpsz = sizeof(const char *);
+	u32sz = sizeof(uint32_t);
 	i64sz = sizeof(int64_t);
 
 	CTL_GET("version", &cpv, const char *);
@@ -1466,6 +1468,23 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_SIZE_T("hpa_slab_max_alloc")
 	OPT_WRITE_SIZE_T("hpa_hugification_threshold")
 	OPT_WRITE_SIZE_T("hpa_dehugification_threshold")
+	if (je_mallctl("opt.hpa_dirty_mult", (void *)&u32v, &u32sz, NULL, 0)
+	    == 0) {
+		/*
+		 * We cheat a little and "know" the secret meaning of this
+		 * representation.
+		 */
+		if (u32v == (uint32_t)-1) {
+			emitter_kv(emitter, "hpa_dirty_mult",
+			    "opt.hpa_dirty_mult", emitter_type_string, "-1");
+		} else {
+			char buf[FXP_BUF_SIZE];
+			fxp_print(u32v, buf);
+			const char *bufp = buf;
+			emitter_kv(emitter, "hpa_dirty_mult",
+			    "opt.hpa_dirty_mult", emitter_type_string, &bufp);
+		}
+	}
 	OPT_WRITE_SIZE_T("hpa_sec_max_alloc")
 	OPT_WRITE_SIZE_T("hpa_sec_max_bytes")
 	OPT_WRITE_SIZE_T("hpa_sec_nshards")

From edbfe6912c1b7e8b561dfee1b058425de6c06285 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 8 Feb 2021 08:49:34 -0800
Subject: [PATCH 2019/2608] Inline malloc fastpath into operator new.

This saves a small but non-negligible amount of CPU in C++ programs.
---
 .../internal/jemalloc_internal_externs.h      |   3 +-
 .../internal/jemalloc_internal_inlines_c.h    | 118 ++++++++++++++++++
 .../internal/jemalloc_internal_types.h        |   8 ++
 src/jemalloc.c                                | 116 +----------------
 src/jemalloc_cpp.cpp                          |  15 ++-
 5 files changed, 141 insertions(+), 119 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 166c91d0..8054ad9c 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -30,6 +30,7 @@ extern bool opt_xmalloc;
 extern bool opt_zero;
 extern unsigned opt_narenas;
 extern zero_realloc_action_t opt_zero_realloc_action;
+extern malloc_init_t malloc_init_state;
 extern const char *zero_realloc_mode_names[];
 extern atomic_zu_t zero_realloc_count;
 
@@ -64,7 +65,7 @@ size_t batch_alloc(void **ptrs, size_t num, size_t size, int flags);
 void jemalloc_prefork(void);
 void jemalloc_postfork_parent(void);
 void jemalloc_postfork_child(void);
-bool malloc_initialized(void);
 void je_sdallocx_noflags(void *ptr, size_t size);
+void *malloc_default(size_t size);
 
 #endif /* JEMALLOC_INTERNAL_EXTERNS_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 0a5ffba5..b0868b7d 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -3,7 +3,9 @@
 
 #include "jemalloc/internal/hook.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/log.h"
 #include "jemalloc/internal/sz.h"
+#include "jemalloc/internal/thread_event.h"
 #include "jemalloc/internal/witness.h"
 
 /*
@@ -219,4 +221,120 @@ ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra,
 	    newsize);
 }
 
+JEMALLOC_ALWAYS_INLINE void
+fastpath_success_finish(tsd_t *tsd, uint64_t allocated_after,
+    cache_bin_t *bin, void *ret) {
+	thread_allocated_set(tsd, allocated_after);
+	if (config_stats) {
+		bin->tstats.nrequests++;
+	}
+
+	LOG("core.malloc.exit", "result: %p", ret);
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+malloc_initialized(void) {
+	return (malloc_init_state == malloc_init_initialized);
+}
+
+/*
+ * malloc() fastpath.  Included here so that we can inline it into operator new;
+ * function call overhead there is non-negligible as a fraction of total CPU in
+ * allocation-heavy C++ programs.  We take the fallback alloc to allow malloc
+ * (which can return NULL) to differ in its behavior from operator new (which
+ * can't).  It matches the signature of malloc / operator new so that we can
+ * tail-call the fallback allocator, allowing us to avoid setting up the call
+ * frame in the common case.
+ *
+ * Fastpath assumes size <= SC_LOOKUP_MAXCLASS, and that we hit
+ * tcache.  If either of these is false, we tail-call to the slowpath,
+ * malloc_default().  Tail-calling is used to avoid any caller-saved
+ * registers.
+ *
+ * fastpath supports ticker and profiling, both of which will also
+ * tail-call to the slowpath if they fire.
+ */
+JEMALLOC_ALWAYS_INLINE void *
+imalloc_fastpath(size_t size, void *(fallback_alloc)(size_t)) {
+	LOG("core.malloc.entry", "size: %zu", size);
+	if (tsd_get_allocates() && unlikely(!malloc_initialized())) {
+		return fallback_alloc(size);
+	}
+
+	tsd_t *tsd = tsd_get(false);
+	if (unlikely((size > SC_LOOKUP_MAXCLASS) || tsd == NULL)) {
+		return fallback_alloc(size);
+	}
+	/*
+	 * The code below till the branch checking the next_event threshold may
+	 * execute before malloc_init(), in which case the threshold is 0 to
+	 * trigger slow path and initialization.
+	 *
+	 * Note that when uninitialized, only the fast-path variants of the sz /
+	 * tsd facilities may be called.
+	 */
+	szind_t ind;
+	/*
+	 * The thread_allocated counter in tsd serves as a general purpose
+	 * accumulator for bytes of allocation to trigger different types of
+	 * events.  usize is always needed to advance thread_allocated, though
+	 * it's not always needed in the core allocation logic.
+	 */
+	size_t usize;
+	sz_size2index_usize_fastpath(size, &ind, &usize);
+	/* Fast path relies on size being a bin. */
+	assert(ind < SC_NBINS);
+	assert((SC_LOOKUP_MAXCLASS < SC_SMALL_MAXCLASS) &&
+	    (size <= SC_SMALL_MAXCLASS));
+
+	uint64_t allocated, threshold;
+	te_malloc_fastpath_ctx(tsd, &allocated, &threshold);
+	uint64_t allocated_after = allocated + usize;
+	/*
+	 * The ind and usize might be uninitialized (or partially) before
+	 * malloc_init().  The assertions check for: 1) full correctness (usize
+	 * & ind) when initialized; and 2) guaranteed slow-path (threshold == 0)
+	 * when !initialized.
+	 */
+	if (!malloc_initialized()) {
+		assert(threshold == 0);
+	} else {
+		assert(ind == sz_size2index(size));
+		assert(usize > 0 && usize == sz_index2size(ind));
+	}
+	/*
+	 * Check for events and tsd non-nominal (fast_threshold will be set to
+	 * 0) in a single branch.
+	 */
+	if (unlikely(allocated_after >= threshold)) {
+		return fallback_alloc(size);
+	}
+	assert(tsd_fast(tsd));
+
+	tcache_t *tcache = tsd_tcachep_get(tsd);
+	assert(tcache == tcache_get(tsd));
+	cache_bin_t *bin = &tcache->bins[ind];
+	bool tcache_success;
+	void *ret;
+
+	/*
+	 * We split up the code this way so that redundant low-water
+	 * computation doesn't happen on the (more common) case in which we
+	 * don't touch the low water mark.  The compiler won't do this
+	 * duplication on its own.
+	 */
+	ret = cache_bin_alloc_easy(bin, &tcache_success);
+	if (tcache_success) {
+		fastpath_success_finish(tsd, allocated_after, bin, ret);
+		return ret;
+	}
+	ret = cache_bin_alloc(bin, &tcache_success);
+	if (tcache_success) {
+		fastpath_success_finish(tsd, allocated_after, bin, ret);
+		return ret;
+	}
+
+	return fallback_alloc(size);
+}
+
 #endif /* JEMALLOC_INTERNAL_INLINES_C_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_types.h b/include/jemalloc/internal/jemalloc_internal_types.h
index 1ce0f3aa..61c1f31a 100644
--- a/include/jemalloc/internal/jemalloc_internal_types.h
+++ b/include/jemalloc/internal/jemalloc_internal_types.h
@@ -20,6 +20,14 @@ typedef enum zero_realloc_action_e zero_realloc_action_t;
 /* Signature of write callback. */
 typedef void (write_cb_t)(void *, const char *);
 
+enum malloc_init_e {
+	malloc_init_uninitialized	= 3,
+	malloc_init_a0_initialized	= 2,
+	malloc_init_recursible		= 1,
+	malloc_init_initialized		= 0 /* Common case --> jnz. */
+};
+typedef enum malloc_init_e malloc_init_t;
+
 /*
  * Flags bits:
  *
diff --git a/src/jemalloc.c b/src/jemalloc.c
index c2c75fa5..dc3c98b6 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -169,13 +169,7 @@ static arena_t		*a0; /* arenas[0]. */
 unsigned		narenas_auto;
 unsigned		manual_arena_base;
 
-typedef enum {
-	malloc_init_uninitialized	= 3,
-	malloc_init_a0_initialized	= 2,
-	malloc_init_recursible		= 1,
-	malloc_init_initialized		= 0 /* Common case --> jnz. */
-} malloc_init_t;
-static malloc_init_t	malloc_init_state = malloc_init_uninitialized;
+malloc_init_t malloc_init_state = malloc_init_uninitialized;
 
 /* False should be the common case.  Set to true to trigger initialization. */
 bool			malloc_slow = true;
@@ -280,11 +274,6 @@ static bool	malloc_init_hard(void);
  * Begin miscellaneous support functions.
  */
 
-bool
-malloc_initialized(void) {
-	return (malloc_init_state == malloc_init_initialized);
-}
-
 JEMALLOC_ALWAYS_INLINE bool
 malloc_init_a0(void) {
 	if (unlikely(malloc_init_state == malloc_init_uninitialized)) {
@@ -2597,112 +2586,11 @@ malloc_default(size_t size) {
  * Begin malloc(3)-compatible functions.
  */
 
-JEMALLOC_ALWAYS_INLINE void
-fastpath_success_finish(tsd_t *tsd, uint64_t allocated_after,
-    cache_bin_t *bin, void *ret) {
-	thread_allocated_set(tsd, allocated_after);
-	if (config_stats) {
-		bin->tstats.nrequests++;
-	}
-
-	LOG("core.malloc.exit", "result: %p", ret);
-}
-
-/*
- * malloc() fastpath.
- *
- * Fastpath assumes size <= SC_LOOKUP_MAXCLASS, and that we hit
- * tcache.  If either of these is false, we tail-call to the slowpath,
- * malloc_default().  Tail-calling is used to avoid any caller-saved
- * registers.
- *
- * fastpath supports ticker and profiling, both of which will also
- * tail-call to the slowpath if they fire.
- */
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
 void JEMALLOC_NOTHROW *
 JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)
 je_malloc(size_t size) {
-	LOG("core.malloc.entry", "size: %zu", size);
-
-	if (tsd_get_allocates() && unlikely(!malloc_initialized())) {
-		return malloc_default(size);
-	}
-
-	tsd_t *tsd = tsd_get(false);
-	if (unlikely((size > SC_LOOKUP_MAXCLASS) || tsd == NULL)) {
-		return malloc_default(size);
-	}
-	/*
-	 * The code below till the branch checking the next_event threshold may
-	 * execute before malloc_init(), in which case the threshold is 0 to
-	 * trigger slow path and initialization.
-	 *
-	 * Note that when uninitialized, only the fast-path variants of the sz /
-	 * tsd facilities may be called.
-	 */
-	szind_t ind;
-	/*
-	 * The thread_allocated counter in tsd serves as a general purpose
-	 * accumulator for bytes of allocation to trigger different types of
-	 * events.  usize is always needed to advance thread_allocated, though
-	 * it's not always needed in the core allocation logic.
-	 */
-	size_t usize;
-	sz_size2index_usize_fastpath(size, &ind, &usize);
-	/* Fast path relies on size being a bin. */
-	assert(ind < SC_NBINS);
-	assert((SC_LOOKUP_MAXCLASS < SC_SMALL_MAXCLASS) &&
-	    (size <= SC_SMALL_MAXCLASS));
-
-	uint64_t allocated, threshold;
-	te_malloc_fastpath_ctx(tsd, &allocated, &threshold);
-	uint64_t allocated_after = allocated + usize;
-	/*
-	 * The ind and usize might be uninitialized (or partially) before
-	 * malloc_init().  The assertions check for: 1) full correctness (usize
-	 * & ind) when initialized; and 2) guaranteed slow-path (threshold == 0)
-	 * when !initialized.
-	 */
-	if (!malloc_initialized()) {
-		assert(threshold == 0);
-	} else {
-		assert(ind == sz_size2index(size));
-		assert(usize > 0 && usize == sz_index2size(ind));
-	}
-	/*
-	 * Check for events and tsd non-nominal (fast_threshold will be set to
-	 * 0) in a single branch.
-	 */
-	if (unlikely(allocated_after >= threshold)) {
-		return malloc_default(size);
-	}
-	assert(tsd_fast(tsd));
-
-	tcache_t *tcache = tcache_get_from_ind(tsd, TCACHE_IND_AUTOMATIC,
-	    /* slow */ false, /* is_alloc */ true);
-	cache_bin_t *bin = &tcache->bins[ind];
-	bool tcache_success;
-	void *ret;
-
-	/*
-	 * We split up the code this way so that redundant low-water
-	 * computation doesn't happen on the (more common) case in which we
-	 * don't touch the low water mark.  The compiler won't do this
-	 * duplication on its own.
-	 */
-	ret = cache_bin_alloc_easy(bin, &tcache_success);
-	if (tcache_success) {
-		fastpath_success_finish(tsd, allocated_after, bin, ret);
-		return ret;
-	}
-	ret = cache_bin_alloc(bin, &tcache_success);
-	if (tcache_success) {
-		fastpath_success_finish(tsd, allocated_after, bin, ret);
-		return ret;
-	}
-
-	return malloc_default(size);
+	return imalloc_fastpath(size, &malloc_default);
 }
 
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index 6959b27f..47ba92a0 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -86,10 +86,10 @@ handleOOM(std::size_t size, bool nothrow) {
 }
 
 template <bool IsNoExcept>
-JEMALLOC_ALWAYS_INLINE
-void *
-newImpl(std::size_t size) noexcept(IsNoExcept) {
-	void *ptr = je_malloc(size);
+JEMALLOC_NOINLINE
+static void *
+fallback_impl(std::size_t size) noexcept(IsNoExcept) {
+	void *ptr = malloc_default(size);
 	if (likely(ptr != nullptr)) {
 		return ptr;
 	}
@@ -97,6 +97,13 @@ newImpl(std::size_t size) noexcept(IsNoExcept) {
 	return handleOOM(size, IsNoExcept);
 }
 
+template <bool IsNoExcept>
+JEMALLOC_ALWAYS_INLINE
+void *
+newImpl(std::size_t size) noexcept(IsNoExcept) {
+	return imalloc_fastpath(size, &fallback_impl<IsNoExcept>);
+}
+
 void *
 operator new(std::size_t size) {
 	return newImpl<false>(size);

From f3b2668b3219e108348b9a28d00c4f805a1b5ab6 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 5 Feb 2021 16:47:09 -0800
Subject: [PATCH 2020/2608] Report the offending pointer on sized dealloc bug
 detection.

---
 include/jemalloc/internal/arena_inlines_b.h |  9 +++++----
 include/jemalloc/internal/safety_check.h    |  2 +-
 src/jemalloc.c                              |  5 +++--
 src/safety_check.c                          |  8 ++++----
 src/tcache.c                                | 21 ++++++++++++++++++---
 5 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 13e6eb52..5df8e858 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -211,7 +211,7 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
 }
 
 JEMALLOC_ALWAYS_INLINE bool
-large_dalloc_safety_checks(edata_t *edata, szind_t szind) {
+large_dalloc_safety_checks(edata_t *edata, void *ptr, szind_t szind) {
 	if (!config_opt_safety_checks) {
 		return false;
 	}
@@ -229,7 +229,8 @@ large_dalloc_safety_checks(edata_t *edata, szind_t szind) {
 		return true;
 	}
 	if (unlikely(sz_index2size(szind) != edata_usize_get(edata))) {
-		safety_check_fail_sized_dealloc(/* current_dealloc */ true);
+		safety_check_fail_sized_dealloc(/* current_dealloc */ true,
+		    ptr);
 		return true;
 	}
 
@@ -243,7 +244,7 @@ arena_dalloc_large_no_tcache(tsdn_t *tsdn, void *ptr, szind_t szind) {
 	} else {
 		edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
 		    ptr);
-		if (large_dalloc_safety_checks(edata, szind)) {
+		if (large_dalloc_safety_checks(edata, ptr, szind)) {
 			/* See the comment in isfree. */
 			return;
 		}
@@ -287,7 +288,7 @@ arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
 	} else {
 		edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
 		    ptr);
-		if (large_dalloc_safety_checks(edata, szind)) {
+		if (large_dalloc_safety_checks(edata, ptr, szind)) {
 			/* See the comment in isfree. */
 			return;
 		}
diff --git a/include/jemalloc/internal/safety_check.h b/include/jemalloc/internal/safety_check.h
index a7a44338..b27ac088 100644
--- a/include/jemalloc/internal/safety_check.h
+++ b/include/jemalloc/internal/safety_check.h
@@ -1,7 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_SAFETY_CHECK_H
 #define JEMALLOC_INTERNAL_SAFETY_CHECK_H
 
-void safety_check_fail_sized_dealloc(bool current_dealloc);
+void safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr);
 void safety_check_fail(const char *format, ...);
 /* Can set to NULL for a default. */
 void safety_check_set_abort(void (*abort_fn)(const char *));
diff --git a/src/jemalloc.c b/src/jemalloc.c
index dc3c98b6..9d038806 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2751,7 +2751,7 @@ maybe_check_alloc_ctx(tsd_t *tsd, void *ptr, emap_alloc_ctx_t *alloc_ctx) {
 		    &dbg_ctx);
 		if (alloc_ctx->szind != dbg_ctx.szind) {
 			safety_check_fail_sized_dealloc(
-			    /* current_dealloc */ true);
+			    /* current_dealloc */ true, ptr);
 			return true;
 		}
 		if (alloc_ctx->slab != dbg_ctx.slab) {
@@ -2801,7 +2801,8 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 			if (config_opt_safety_checks) {
 				/* Small alloc may have !slab (sampled). */
 				if (alloc_ctx.szind != sz_size2index(usize)) {
-					safety_check_fail_sized_dealloc(true);
+					safety_check_fail_sized_dealloc(true,
+					    ptr);
 				}
 			}
 		} else {
diff --git a/src/safety_check.c b/src/safety_check.c
index c692835a..0dff9348 100644
--- a/src/safety_check.c
+++ b/src/safety_check.c
@@ -3,14 +3,14 @@
 
 static void (*safety_check_abort)(const char *message);
 
-void safety_check_fail_sized_dealloc(bool current_dealloc) {
+void safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr) {
 	char *src = current_dealloc ? "the current pointer being freed" :
 	    "in thread cache, possibly from previous deallocations";
 
 	safety_check_fail("<jemalloc>: size mismatch detected, likely caused by"
-	   " application sized deallocation bugs (source: %s). Suggest building"
-	    "with --enable-debug or address sanitizer for debugging. Abort.\n",
-	    src);
+	    " application sized deallocation bugs (source address: %p, %s). "
+	    "Suggest building with --enable-debug or address sanitizer for "
+	    "debugging. Abort.\n", ptr, src);
 }
 
 void safety_check_set_abort(void (*abort_fn)(const char *)) {
diff --git a/src/tcache.c b/src/tcache.c
index 7c4047f4..3489e724 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -250,6 +250,20 @@ tcache_bin_flush_metadata_visitor(void *szind_sum_ctx,
 	util_prefetch_write_range(alloc_ctx->edata, sizeof(edata_t));
 }
 
+JEMALLOC_NOINLINE static void
+tcache_bin_flush_size_check_fail(cache_bin_ptr_array_t *arr, szind_t szind,
+    size_t nptrs, emap_batch_lookup_result_t *edatas) {
+	bool found_mismatch = false;
+	for (size_t i = 0; i < nptrs; i++) {
+		if (edata_szind_get(edatas[i].edata) != szind) {
+			found_mismatch = true;
+			safety_check_fail_sized_dealloc(false,
+			    tcache_bin_flush_ptr_getter(arr, i));
+		}
+	}
+	assert(found_mismatch);
+}
+
 static void
 tcache_bin_flush_edatas_lookup(tsd_t *tsd, cache_bin_ptr_array_t *arr,
     szind_t binind, size_t nflush, emap_batch_lookup_result_t *edatas) {
@@ -264,8 +278,8 @@ tcache_bin_flush_edatas_lookup(tsd_t *tsd, cache_bin_ptr_array_t *arr,
 	    &tcache_bin_flush_ptr_getter, (void *)arr,
 	    &tcache_bin_flush_metadata_visitor, (void *)&szind_sum,
 	    edatas);
-	if (config_opt_safety_checks && szind_sum != 0) {
-		safety_check_fail_sized_dealloc(false);
+	if (config_opt_safety_checks && unlikely(szind_sum != 0)) {
+		tcache_bin_flush_size_check_fail(arr, binind, nflush, edatas);
 	}
 }
 
@@ -435,7 +449,8 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 					dalloc_count++;
 				}
 			} else {
-				if (large_dalloc_safety_checks(edata, binind)) {
+				if (large_dalloc_safety_checks(edata, ptr,
+				    binind)) {
 					/* See the comment in isfree. */
 					continue;
 				}

From 041145c272711b55f91aa42128b108674a12fd91 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 5 Feb 2021 17:26:45 -0800
Subject: [PATCH 2021/2608] Report the correct and wrong sizes on sized dealloc
 bug detection.

---
 include/jemalloc/internal/arena_inlines_b.h |  7 ++++---
 include/jemalloc/internal/safety_check.h    |  3 ++-
 src/jemalloc.c                              | 14 ++++++++++----
 src/safety_check.c                          | 12 +++++++-----
 src/tcache.c                                | 10 +++++++---
 5 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 5df8e858..5410b160 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -228,9 +228,10 @@ large_dalloc_safety_checks(edata_t *edata, void *ptr, szind_t szind) {
 		    (uintptr_t)edata_addr_get(edata));
 		return true;
 	}
-	if (unlikely(sz_index2size(szind) != edata_usize_get(edata))) {
-		safety_check_fail_sized_dealloc(/* current_dealloc */ true,
-		    ptr);
+	size_t input_size = sz_index2size(szind);
+	if (unlikely(input_size != edata_usize_get(edata))) {
+		safety_check_fail_sized_dealloc(/* current_dealloc */ true, ptr,
+		    /* true_size */ edata_usize_get(edata), input_size);
 		return true;
 	}
 
diff --git a/include/jemalloc/internal/safety_check.h b/include/jemalloc/internal/safety_check.h
index b27ac088..f10c68e4 100644
--- a/include/jemalloc/internal/safety_check.h
+++ b/include/jemalloc/internal/safety_check.h
@@ -1,7 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_SAFETY_CHECK_H
 #define JEMALLOC_INTERNAL_SAFETY_CHECK_H
 
-void safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr);
+void safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr,
+    size_t true_size, size_t input_size);
 void safety_check_fail(const char *format, ...);
 /* Can set to NULL for a default. */
 void safety_check_set_abort(void (*abort_fn)(const char *));
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 9d038806..3bccac95 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2751,7 +2751,9 @@ maybe_check_alloc_ctx(tsd_t *tsd, void *ptr, emap_alloc_ctx_t *alloc_ctx) {
 		    &dbg_ctx);
 		if (alloc_ctx->szind != dbg_ctx.szind) {
 			safety_check_fail_sized_dealloc(
-			    /* current_dealloc */ true, ptr);
+			    /* current_dealloc */ true, ptr,
+			    /* true_size */ sz_size2index(dbg_ctx.szind),
+			    /* input_size */ sz_size2index(alloc_ctx->szind));
 			return true;
 		}
 		if (alloc_ctx->slab != dbg_ctx.slab) {
@@ -2800,9 +2802,13 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 
 			if (config_opt_safety_checks) {
 				/* Small alloc may have !slab (sampled). */
-				if (alloc_ctx.szind != sz_size2index(usize)) {
-					safety_check_fail_sized_dealloc(true,
-					    ptr);
+				if (unlikely(alloc_ctx.szind !=
+				    sz_size2index(usize))) {
+					safety_check_fail_sized_dealloc(
+					    /* current_dealloc */ true, ptr,
+					    /* true_size */ sz_index2size(
+					    alloc_ctx.szind),
+					    /* input_size */ usize);
 				}
 			}
 		} else {
diff --git a/src/safety_check.c b/src/safety_check.c
index 0dff9348..9747afef 100644
--- a/src/safety_check.c
+++ b/src/safety_check.c
@@ -3,14 +3,16 @@
 
 static void (*safety_check_abort)(const char *message);
 
-void safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr) {
+void safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr,
+    size_t true_size, size_t input_size) {
 	char *src = current_dealloc ? "the current pointer being freed" :
 	    "in thread cache, possibly from previous deallocations";
 
-	safety_check_fail("<jemalloc>: size mismatch detected, likely caused by"
-	    " application sized deallocation bugs (source address: %p, %s). "
-	    "Suggest building with --enable-debug or address sanitizer for "
-	    "debugging. Abort.\n", ptr, src);
+	safety_check_fail("<jemalloc>: size mismatch detected (true size %zu "
+	    "vs input size %zu), likely caused by application sized "
+	    "dealloction bugs (source address: %p, %s). Suggest building with "
+	    "--enable-debug or address sanitizer for debugging. Abort.\n",
+	    true_size, input_size, ptr, src);
 }
 
 void safety_check_set_abort(void (*abort_fn)(const char *)) {
diff --git a/src/tcache.c b/src/tcache.c
index 3489e724..39a4ea6e 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -255,10 +255,14 @@ tcache_bin_flush_size_check_fail(cache_bin_ptr_array_t *arr, szind_t szind,
     size_t nptrs, emap_batch_lookup_result_t *edatas) {
 	bool found_mismatch = false;
 	for (size_t i = 0; i < nptrs; i++) {
-		if (edata_szind_get(edatas[i].edata) != szind) {
+		szind_t true_szind = edata_szind_get(edatas[i].edata);
+		if (true_szind != szind) {
 			found_mismatch = true;
-			safety_check_fail_sized_dealloc(false,
-			    tcache_bin_flush_ptr_getter(arr, i));
+			safety_check_fail_sized_dealloc(
+			    /* current_dealloc */ false,
+			    /* ptr */ tcache_bin_flush_ptr_getter(arr, i),
+			    /* true_size */ sz_index2size(true_szind),
+			    /* input_size */ sz_index2size(szind));
 		}
 	}
 	assert(found_mismatch);

From 8c5e5f50a29d6ca636bf7394d93be1814de6d74c Mon Sep 17 00:00:00 2001
From: Jordan Rome <jordalgo@fb.com>
Date: Wed, 10 Feb 2021 11:08:18 -0500
Subject: [PATCH 2022/2608] Fix stats for "tcache_max" (was "lg_tcache_max")

This opt was changed here: c8209150f9d219a137412b06431c9d52839c7272
and looks like this got missed.

Also update the write type to be unsigned.
---
 src/stats.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/stats.c b/src/stats.c
index 1a7e6e4e..20ff299c 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1498,7 +1498,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_BOOL("utrace")
 	OPT_WRITE_BOOL("xmalloc")
 	OPT_WRITE_BOOL("tcache")
-	OPT_WRITE_SSIZE_T("lg_tcache_max")
+	OPT_WRITE_SIZE_T("tcache_max")
 	OPT_WRITE_UNSIGNED("tcache_nslots_small_min")
 	OPT_WRITE_UNSIGNED("tcache_nslots_small_max")
 	OPT_WRITE_UNSIGNED("tcache_nslots_large")

From a11be50332c5cdae7ce74d8e0551e7f3143630b8 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 9 Feb 2021 22:24:35 -0800
Subject: [PATCH 2023/2608] Implement opt.cache_oblivious.

Keep config.cache_oblivious for now to remain backward-compatible.
---
 INSTALL.md                                      | 14 +++++++-------
 doc/jemalloc.xml.in                             | 16 ++++++++++++++++
 .../internal/jemalloc_internal_externs.h        |  1 +
 include/jemalloc/internal/prof_inlines.h        |  2 +-
 include/jemalloc/internal/sz.h                  | 14 ++++++--------
 src/ctl.c                                       |  3 +++
 src/jemalloc.c                                  | 17 ++++++++++-------
 src/large.c                                     |  3 ++-
 src/stats.c                                     |  1 +
 src/sz.c                                        |  6 ++++--
 test/unit/extent_quantize.c                     |  2 +-
 test/unit/mallctl.c                             |  1 +
 12 files changed, 53 insertions(+), 27 deletions(-)

diff --git a/INSTALL.md b/INSTALL.md
index eb55acfd..adc72b82 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -185,13 +185,13 @@ any of the following arguments (not a definitive list) to 'configure':
 
 * `--disable-cache-oblivious`
 
-    Disable cache-oblivious large allocation alignment for large allocation
-    requests with no alignment constraints.  If this feature is disabled, all
-    large allocations are page-aligned as an implementation artifact, which can
-    severely harm CPU cache utilization.  However, the cache-oblivious layout
-    comes at the cost of one extra page per large allocation, which in the
-    most extreme case increases physical memory usage for the 16 KiB size class
-    to 20 KiB.
+    Disable cache-oblivious large allocation alignment by default, for large
+    allocation requests with no alignment constraints.  If this feature is
+    disabled, all large allocations are page-aligned as an implementation
+    artifact, which can severely harm CPU cache utilization.  However, the
+    cache-oblivious layout comes at the cost of one extra page per large
+    allocation, which in the most extreme case increases physical memory usage
+    for the 16 KiB size class to 20 KiB.
 
 * `--disable-syscall`
 
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 4b93c5a9..018170ca 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -936,6 +936,22 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         </para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.cache_oblivious">
+        <term>
+          <mallctl>opt.cache_oblivious</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Enable / Disable cache-oblivious large allocation
+        alignment, for large requests with no alignment constraints.  If this
+        feature is disabled, all large allocations are page-aligned as an
+        implementation artifact, which can severely harm CPU cache utilization.
+        However, the cache-oblivious layout comes at the cost of one extra page
+        per large allocation, which in the most extreme case increases physical
+        memory usage for the 16 KiB size class to 20 KiB. This option is enabled
+        by default.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="opt.metadata_thp">
         <term>
           <mallctl>opt.metadata_thp</mallctl>
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 8054ad9c..da693559 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -33,6 +33,7 @@ extern zero_realloc_action_t opt_zero_realloc_action;
 extern malloc_init_t malloc_init_state;
 extern const char *zero_realloc_mode_names[];
 extern atomic_zu_t zero_realloc_count;
+extern bool opt_cache_oblivious;
 
 /* Number of CPUs. */
 extern unsigned ncpus;
diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h
index c76d2ae5..7884e9a7 100644
--- a/include/jemalloc/internal/prof_inlines.h
+++ b/include/jemalloc/internal/prof_inlines.h
@@ -223,7 +223,7 @@ prof_sample_align(size_t orig_align) {
 	 * w/o metadata lookup.
 	 */
 	assert(opt_prof);
-	return (config_cache_oblivious && orig_align < PAGE) ? PAGE :
+	return (opt_cache_oblivious && orig_align < PAGE) ? PAGE :
 	    orig_align;
 }
 
diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index 91940ccd..f2be6139 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -45,15 +45,13 @@ extern size_t sz_index2size_tab[SC_NSIZES];
  */
 extern uint8_t sz_size2index_tab[];
 
-static const size_t sz_large_pad =
-#ifdef JEMALLOC_CACHE_OBLIVIOUS
-    PAGE
-#else
-    0
-#endif
-    ;
+/*
+ * Padding for large allocations: PAGE when opt_cache_oblivious == true (to
+ * enable cache index randomization); 0 otherwise.
+ */
+extern size_t sz_large_pad;
 
-extern void sz_boot(const sc_data_t *sc_data);
+extern void sz_boot(const sc_data_t *sc_data, bool cache_oblivious);
 
 JEMALLOC_ALWAYS_INLINE pszind_t
 sz_psz2ind(size_t psz) {
diff --git a/src/ctl.c b/src/ctl.c
index 1c5e32ba..4fc3ad07 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -90,6 +90,7 @@ CTL_PROTO(config_utrace)
 CTL_PROTO(config_xmalloc)
 CTL_PROTO(opt_abort)
 CTL_PROTO(opt_abort_conf)
+CTL_PROTO(opt_cache_oblivious)
 CTL_PROTO(opt_trust_madvise)
 CTL_PROTO(opt_confirm_conf)
 CTL_PROTO(opt_hpa)
@@ -395,6 +396,7 @@ static const ctl_named_node_t	config_node[] = {
 static const ctl_named_node_t opt_node[] = {
 	{NAME("abort"),		CTL(opt_abort)},
 	{NAME("abort_conf"),	CTL(opt_abort_conf)},
+	{NAME("cache_oblivious"),	CTL(opt_cache_oblivious)},
 	{NAME("trust_madvise"),	CTL(opt_trust_madvise)},
 	{NAME("confirm_conf"),	CTL(opt_confirm_conf)},
 	{NAME("hpa"),		CTL(opt_hpa)},
@@ -2095,6 +2097,7 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool)
 
 CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
 CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
+CTL_RO_NL_GEN(opt_cache_oblivious, opt_cache_oblivious, bool)
 CTL_RO_NL_GEN(opt_trust_madvise, opt_trust_madvise, bool)
 CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
 CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 3bccac95..125682bf 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -102,6 +102,14 @@ bool	opt_trust_madvise =
 #endif
     ;
 
+bool opt_cache_oblivious =
+#ifdef JEMALLOC_CACHE_OBLIVIOUS
+    true
+#else
+    false
+#endif
+    ;
+
 zero_realloc_action_t opt_zero_realloc_action =
     zero_realloc_action_strict;
 
@@ -1697,7 +1705,7 @@ malloc_init_hard_a0_locked() {
 		prof_boot0();
 	}
 	malloc_conf_init(&sc_data, bin_shard_sizes);
-	sz_boot(&sc_data);
+	sz_boot(&sc_data, opt_cache_oblivious);
 	bin_info_boot(&sc_data, bin_shard_sizes);
 
 	if (opt_stats_print) {
@@ -2790,12 +2798,7 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 			 * usize can be trusted to determine szind and slab.
 			 */
 			alloc_ctx.szind = sz_size2index(usize);
-			if (config_cache_oblivious) {
-				alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
-			} else {
-				/* Non page aligned must be slab allocated. */
-				alloc_ctx.slab = true;
-			}
+			alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
 		} else if (opt_prof) {
 			emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global,
 			    ptr, &alloc_ctx);
diff --git a/src/large.c b/src/large.c
index f23839f7..bd29e5c5 100644
--- a/src/large.c
+++ b/src/large.c
@@ -95,7 +95,8 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 	}
 
 	if (zero) {
-		if (config_cache_oblivious) {
+		if (opt_cache_oblivious) {
+			assert(sz_large_pad == PAGE);
 			/*
 			 * Zero the trailing bytes of the original allocation's
 			 * last page, since they are in an indeterminate state.
diff --git a/src/stats.c b/src/stats.c
index 20ff299c..7a0526c5 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1458,6 +1458,7 @@ stats_general_print(emitter_t *emitter) {
 
 	OPT_WRITE_BOOL("abort")
 	OPT_WRITE_BOOL("abort_conf")
+	OPT_WRITE_BOOL("cache_oblivious")
 	OPT_WRITE_BOOL("confirm_conf")
 	OPT_WRITE_BOOL("retain")
 	OPT_WRITE_CHAR_P("dss")
diff --git a/src/sz.c b/src/sz.c
index 7734f394..d3115dda 100644
--- a/src/sz.c
+++ b/src/sz.c
@@ -1,9 +1,10 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
 #include "jemalloc/internal/sz.h"
 
 JEMALLOC_ALIGNED(CACHELINE)
 size_t sz_pind2sz_tab[SC_NPSIZES+1];
-
+size_t sz_large_pad;
 
 size_t
 sz_psz_quantize_floor(size_t size) {
@@ -105,7 +106,8 @@ sz_boot_size2index_tab(const sc_data_t *sc_data) {
 }
 
 void
-sz_boot(const sc_data_t *sc_data) {
+sz_boot(const sc_data_t *sc_data, bool cache_oblivious) {
+	sz_large_pad = cache_oblivious ? PAGE : 0;
 	sz_boot_pind2sz_tab(sc_data);
 	sz_boot_index2size_tab(sc_data);
 	sz_boot_size2index_tab(sc_data);
diff --git a/test/unit/extent_quantize.c b/test/unit/extent_quantize.c
index 27a4a7ea..e6bbd539 100644
--- a/test/unit/extent_quantize.c
+++ b/test/unit/extent_quantize.c
@@ -47,7 +47,7 @@ TEST_BEGIN(test_large_extent_size) {
 	 */
 
 	sz = sizeof(bool);
-	expect_d_eq(mallctl("config.cache_oblivious", (void *)&cache_oblivious,
+	expect_d_eq(mallctl("opt.cache_oblivious", (void *)&cache_oblivious,
 	    &sz, NULL, 0), 0, "Unexpected mallctl failure");
 
 	sz = sizeof(unsigned);
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 6f5a8f18..1fb74667 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -279,6 +279,7 @@ TEST_BEGIN(test_mallctl_opt) {
 
 	TEST_MALLCTL_OPT(bool, abort, always);
 	TEST_MALLCTL_OPT(bool, abort_conf, always);
+	TEST_MALLCTL_OPT(bool, cache_oblivious, always);
 	TEST_MALLCTL_OPT(bool, trust_madvise, always);
 	TEST_MALLCTL_OPT(bool, confirm_conf, always);
 	TEST_MALLCTL_OPT(const char *, metadata_thp, always);

From cde7097ecaba08b50c5594137175e0e1e567f4c4 Mon Sep 17 00:00:00 2001
From: Jordan Rome <jordalgo@fb.com>
Date: Mon, 15 Feb 2021 20:12:23 -0500
Subject: [PATCH 2024/2608] Update INSTALL.md to mention 'autoconf'

'autoconf' needs to be installed for './autogen.sh' to work.
---
 INSTALL.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/INSTALL.md b/INSTALL.md
index adc72b82..14dacfa6 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -12,7 +12,9 @@ that might work is:
     make
     make install
 
-Note that documentation is built by the default target only when xsltproc is
+Notes:
+ - "autoconf" needs to be installed
+ - Documentation is built by the default target only when xsltproc is
 available.  Build will warn but not stop if the dependency is missing.
 
 

From 4b8870c7dbfaeea7136a8e0b9f93a2ad85d31a55 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 4 Jan 2021 18:22:02 -0800
Subject: [PATCH 2025/2608] SEC: Fix a comment typo.

---
 include/jemalloc/internal/sec.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/sec.h b/include/jemalloc/internal/sec.h
index 7c1465ed..6bf5687d 100644
--- a/include/jemalloc/internal/sec.h
+++ b/include/jemalloc/internal/sec.h
@@ -80,7 +80,7 @@ struct sec_s {
 	size_t alloc_max;
 	/*
 	 * Exceeding this amount of cached extents in a shard causes *all* of
-	 * the shards in that bin to be flushed.
+	 * the bins in that shard to be flushed.
 	 */
 	size_t bytes_max;
 

From f47b4c2cd8ed3e843b987ee972d187df45391b69 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 4 Jan 2021 18:40:27 -0800
Subject: [PATCH 2026/2608] PAI/SEC: Add a dalloc_batch function.

This lets the SEC flush all of its items in a single call, rather than flushing
everything at once.
---
 Makefile.in                                   |  1 +
 include/jemalloc/internal/pai.h               | 14 ++++++++
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |  1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |  3 ++
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |  1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |  3 ++
 src/hpa.c                                     |  1 +
 src/pac.c                                     |  1 +
 src/pai.c                                     | 13 +++++++
 src/sec.c                                     |  8 ++---
 test/unit/sec.c                               | 34 +++++++++++++++----
 11 files changed, 69 insertions(+), 11 deletions(-)
 create mode 100644 src/pai.c

diff --git a/Makefile.in b/Makefile.in
index 40c41442..11a553b0 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -132,6 +132,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/nstime.c \
 	$(srcroot)src/pa.c \
 	$(srcroot)src/pa_extra.c \
+	$(srcroot)src/pai.c \
 	$(srcroot)src/pac.c \
 	$(srcroot)src/pages.c \
 	$(srcroot)src/peak_event.c \
diff --git a/include/jemalloc/internal/pai.h b/include/jemalloc/internal/pai.h
index 45edd69c..f7f3e077 100644
--- a/include/jemalloc/internal/pai.h
+++ b/include/jemalloc/internal/pai.h
@@ -13,6 +13,8 @@ struct pai_s {
 	bool (*shrink)(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 	    size_t old_size, size_t new_size);
 	void (*dalloc)(tsdn_t *tsdn, pai_t *self, edata_t *edata);
+	void (*dalloc_batch)(tsdn_t *tsdn, pai_t *self,
+	    edata_list_active_t *list);
 };
 
 /*
@@ -42,4 +44,16 @@ pai_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	self->dalloc(tsdn, self, edata);
 }
 
+static inline void
+pai_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list) {
+	return self->dalloc_batch(tsdn, self, list);
+}
+
+/*
+ * An implementation of batch deallocation that simply calls dalloc once for
+ * each item in the list.
+ */
+void pai_dalloc_batch_default(tsdn_t *tsdn, pai_t *self,
+    edata_list_active_t *list);
+
 #endif /* JEMALLOC_INTERNAL_PAI_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index a93511d1..9ec953a2 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -73,6 +73,7 @@
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pa.c" />
     <ClCompile Include="..\..\..\..\src\pa_extra.c" />
+    <ClCompile Include="..\..\..\..\src\pai.c" />
     <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 06460e5a..210204a5 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -103,6 +103,9 @@
     <ClCompile Include="..\..\..\..\src\pa_extra.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pai.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\pac.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 916460a7..171b95f2 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -73,6 +73,7 @@
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pa.c" />
     <ClCompile Include="..\..\..\..\src\pa_extra.c" />
+    <ClCompile Include="..\..\..\..\src\pai.c" />
     <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 06460e5a..210204a5 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -103,6 +103,9 @@
     <ClCompile Include="..\..\..\..\src\pa_extra.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pai.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\pac.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/hpa.c b/src/hpa.c
index 3c706cbf..013cd7ed 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -91,6 +91,7 @@ hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
 	shard->pai.expand = &hpa_expand;
 	shard->pai.shrink = &hpa_shrink;
 	shard->pai.dalloc = &hpa_dalloc;
+	shard->pai.dalloc_batch = &pai_dalloc_batch_default;
 
 	return false;
 }
diff --git a/src/pac.c b/src/pac.c
index 80646155..0ba0f2f0 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -94,6 +94,7 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
 	pac->pai.expand = &pac_expand_impl;
 	pac->pai.shrink = &pac_shrink_impl;
 	pac->pai.dalloc = &pac_dalloc_impl;
+	pac->pai.dalloc_batch = &pai_dalloc_batch_default;
 
 	return false;
 }
diff --git a/src/pai.c b/src/pai.c
new file mode 100644
index 00000000..1035c850
--- /dev/null
+++ b/src/pai.c
@@ -0,0 +1,13 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+
+void
+pai_dalloc_batch_default(tsdn_t *tsdn, pai_t *self,
+    edata_list_active_t *list) {
+	edata_t *edata;
+	while ((edata = edata_list_active_first(list)) != NULL) {
+		edata_list_active_remove(list, edata);
+		pai_dalloc(tsdn, self, edata);
+	}
+}
diff --git a/src/sec.c b/src/sec.c
index 262d813d..41e75b9e 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -46,6 +46,7 @@ bool sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t alloc_max,
 	sec->pai.expand = &sec_expand;
 	sec->pai.shrink = &sec_shrink;
 	sec->pai.dalloc = &sec_dalloc;
+	sec->pai.dalloc_batch = &pai_dalloc_batch_default;
 
 	return false;
 }
@@ -142,6 +143,7 @@ sec_do_flush_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
 	for (pszind_t i = 0; i < SEC_NPSIZES; i++) {
 		edata_list_active_concat(&to_flush, &shard->freelist[i]);
 	}
+
 	/*
 	 * A better way to do this would be to add a batch dalloc function to
 	 * the pai_t.  Practically, the current method turns into O(n) locks and
@@ -149,11 +151,7 @@ sec_do_flush_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
 	 * HPA) can straightforwardly do many deallocations in a single lock /
 	 * unlock pair.
 	 */
-	while (!edata_list_active_empty(&to_flush)) {
-		edata_t *e = edata_list_active_first(&to_flush);
-		edata_list_active_remove(&to_flush, e);
-		pai_dalloc(tsdn, sec->fallback, e);
-	}
+	pai_dalloc_batch(tsdn, sec->fallback, &to_flush);
 }
 
 static void
diff --git a/test/unit/sec.c b/test/unit/sec.c
index cb0c17d1..7657537b 100644
--- a/test/unit/sec.c
+++ b/test/unit/sec.c
@@ -8,6 +8,7 @@ struct pai_test_allocator_s {
 	bool alloc_fail;
 	size_t alloc_count;
 	size_t dalloc_count;
+	size_t dalloc_batch_count;
 	/*
 	 * We use a simple bump allocator as the implementation.  This isn't
 	 * *really* correct, since we may allow expansion into a subsequent
@@ -64,11 +65,25 @@ pai_test_allocator_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	free(edata);
 }
 
+static void
+pai_test_allocator_dalloc_batch(tsdn_t *tsdn, pai_t *self,
+    edata_list_active_t *list) {
+	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
+
+	edata_t *edata;
+	while ((edata = edata_list_active_first(list)) != NULL) {
+		edata_list_active_remove(list, edata);
+		ta->dalloc_batch_count++;
+		free(edata);
+	}
+}
+
 static inline void
 pai_test_allocator_init(pai_test_allocator_t *ta) {
 	ta->alloc_fail = false;
 	ta->alloc_count = 0;
 	ta->dalloc_count = 0;
+	ta->dalloc_batch_count = 0;
 	/* Just don't start the edata at 0. */
 	ta->next_ptr = 10 * PAGE;
 	ta->expand_count = 0;
@@ -79,6 +94,7 @@ pai_test_allocator_init(pai_test_allocator_t *ta) {
 	ta->pai.expand = &pai_test_allocator_expand;
 	ta->pai.shrink = &pai_test_allocator_shrink;
 	ta->pai.dalloc = &pai_test_allocator_dalloc;
+	ta->pai.dalloc_batch = &pai_test_allocator_dalloc_batch;
 }
 
 TEST_BEGIN(test_reuse) {
@@ -190,8 +206,10 @@ TEST_BEGIN(test_auto_flush) {
 	pai_dalloc(tsdn, &sec.pai, extra_alloc);
 	expect_zu_eq(NALLOCS + 1, ta.alloc_count,
 	    "Incorrect number of allocations");
-	expect_zu_eq(NALLOCS + 1, ta.dalloc_count,
-	    "Incorrect number of deallocations");
+	expect_zu_eq(0, ta.dalloc_count,
+	    "Incorrect number of (non-batch) deallocations");
+	expect_zu_eq(NALLOCS + 1, ta.dalloc_batch_count,
+	    "Incorrect number of batch deallocations");
 }
 TEST_END
 
@@ -233,8 +251,10 @@ do_disable_flush_test(bool is_disable) {
 
 	expect_zu_eq(NALLOCS, ta.alloc_count,
 	    "Incorrect number of allocations");
-	expect_zu_eq(NALLOCS - 1, ta.dalloc_count,
-	    "Incorrect number of deallocations");
+	expect_zu_eq(0, ta.dalloc_count,
+	    "Incorrect number of (non-batch) deallocations");
+	expect_zu_eq(NALLOCS - 1, ta.dalloc_batch_count,
+	    "Incorrect number of batch deallocations");
 
 	/*
 	 * If we free into a disabled SEC, it should forward to the fallback.
@@ -244,8 +264,10 @@ do_disable_flush_test(bool is_disable) {
 
 	expect_zu_eq(NALLOCS, ta.alloc_count,
 	    "Incorrect number of allocations");
-	expect_zu_eq(is_disable ? NALLOCS : NALLOCS - 1, ta.dalloc_count,
-	    "Incorrect number of deallocations");
+	expect_zu_eq(is_disable ? 1 : 0, ta.dalloc_count,
+	    "Incorrect number of (non-batch) deallocations");
+	expect_zu_eq(NALLOCS - 1, ta.dalloc_batch_count,
+	    "Incorrect number of batch deallocations");
 }
 
 TEST_BEGIN(test_disable) {

From 1944ebbe7f079e79fbeda836dc0333f7a049ac26 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 4 Jan 2021 19:43:08 -0800
Subject: [PATCH 2027/2608] HPA: Implement batch deallocation.

This saves O(n) mutex locks/unlocks during SEC flush.
---
 include/jemalloc/internal/pai.h |  1 +
 src/hpa.c                       | 63 ++++++++++++++++++++++++---------
 src/sec.c                       |  7 ----
 3 files changed, 47 insertions(+), 24 deletions(-)

diff --git a/include/jemalloc/internal/pai.h b/include/jemalloc/internal/pai.h
index f7f3e077..73f5433c 100644
--- a/include/jemalloc/internal/pai.h
+++ b/include/jemalloc/internal/pai.h
@@ -13,6 +13,7 @@ struct pai_s {
 	bool (*shrink)(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 	    size_t old_size, size_t new_size);
 	void (*dalloc)(tsdn_t *tsdn, pai_t *self, edata_t *edata);
+	/* This function empties out list as a side-effect of being called. */
 	void (*dalloc_batch)(tsdn_t *tsdn, pai_t *self,
 	    edata_list_active_t *list);
 };
diff --git a/src/hpa.c b/src/hpa.c
index 013cd7ed..fa58bb77 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -15,6 +15,8 @@ static bool hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 static bool hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
     size_t old_size, size_t new_size);
 static void hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata);
+static void hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self,
+    edata_list_active_t *list);
 
 bool
 hpa_supported() {
@@ -91,7 +93,7 @@ hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
 	shard->pai.expand = &hpa_expand;
 	shard->pai.shrink = &hpa_shrink;
 	shard->pai.dalloc = &hpa_dalloc;
-	shard->pai.dalloc_batch = &pai_dalloc_batch_default;
+	shard->pai.dalloc_batch = &hpa_dalloc_batch;
 
 	return false;
 }
@@ -663,11 +665,8 @@ hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 }
 
 static void
-hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
-	hpa_shard_t *shard = hpa_from_pai(self);
-
-	edata_addr_set(edata, edata_base_get(edata));
-	edata_zeroed_set(edata, false);
+hpa_dalloc_prepare_unlocked(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) {
+	malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
 
 	assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
 	assert(edata_state_get(edata) == extent_state_active);
@@ -677,32 +676,62 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	assert(edata_committed_get(edata));
 	assert(edata_base_get(edata) != NULL);
 
-	hpdata_t *ps = edata_ps_get(edata);
-	/* Currently, all edatas come from pageslabs. */
-	assert(ps != NULL);
+	edata_addr_set(edata, edata_base_get(edata));
+	edata_zeroed_set(edata, false);
 	emap_deregister_boundary(tsdn, shard->emap, edata);
-	/*
-	 * Note that the shard mutex protects ps's metadata too; it wouldn't be
-	 * correct to try to read most information out of it without the lock.
-	 */
-	malloc_mutex_lock(tsdn, &shard->mtx);
+}
+
+static void
+hpa_dalloc_locked(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 
 	/*
 	 * Release the metadata early, to avoid having to remember to do it
-	 * while we're also doing tricky purging logic.
+	 * while we're also doing tricky purging logic.  First, we need to grab
+	 * a few bits of metadata from it.
+	 *
+	 * Note that the shard mutex protects ps's metadata too; it wouldn't be
+	 * correct to try to read most information out of it without the lock.
 	 */
+	hpdata_t *ps = edata_ps_get(edata);
+	/* Currently, all edatas come from pageslabs. */
+	assert(ps != NULL);
 	void *unreserve_addr = edata_addr_get(edata);
 	size_t unreserve_size = edata_size_get(edata);
 	edata_cache_small_put(tsdn, &shard->ecs, edata);
 
 	psset_update_begin(&shard->psset, ps);
 	hpdata_unreserve(ps, unreserve_addr, unreserve_size);
-
 	hpa_update_purge_hugify_eligibility(shard, ps);
 	psset_update_end(&shard->psset, ps);
-
 	hpa_do_deferred_work(tsdn, shard);
+}
 
+static void
+hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
+	hpa_shard_t *shard = hpa_from_pai(self);
+
+	hpa_dalloc_prepare_unlocked(tsdn, shard, edata);
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	hpa_dalloc_locked(tsdn, shard, edata);
+	malloc_mutex_unlock(tsdn, &shard->mtx);
+}
+
+static void
+hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list) {
+	hpa_shard_t *shard = hpa_from_pai(self);
+
+	edata_t *edata;
+	ql_foreach(edata, &list->head, ql_link_active) {
+		hpa_dalloc_prepare_unlocked(tsdn, shard, edata);
+	}
+
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	/* Now, remove from the list. */
+	while ((edata = edata_list_active_first(list)) != NULL) {
+		edata_list_active_remove(list, edata);
+		hpa_dalloc_locked(tsdn, shard, edata);
+	}
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 }
 
diff --git a/src/sec.c b/src/sec.c
index 41e75b9e..3a3a0b90 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -144,13 +144,6 @@ sec_do_flush_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
 		edata_list_active_concat(&to_flush, &shard->freelist[i]);
 	}
 
-	/*
-	 * A better way to do this would be to add a batch dalloc function to
-	 * the pai_t.  Practically, the current method turns into O(n) locks and
-	 * unlocks at the fallback allocator.  But some implementations (e.g.
-	 * HPA) can straightforwardly do many deallocations in a single lock /
-	 * unlock pair.
-	 */
 	pai_dalloc_batch(tsdn, sec->fallback, &to_flush);
 }
 

From bf448d7a5a4c2aecbda7ef11767a75829d9aaf77 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 5 Jan 2021 15:52:25 -0800
Subject: [PATCH 2028/2608] SEC: Reduce lock hold times.

Only flush a subset of extents during flushing, and drop the lock while doing
so.
---
 include/jemalloc/internal/sec.h | 40 ++++++++++++---
 src/sec.c                       | 87 +++++++++++++++++++++++++++------
 test/unit/sec.c                 |  7 ++-
 3 files changed, 110 insertions(+), 24 deletions(-)

diff --git a/include/jemalloc/internal/sec.h b/include/jemalloc/internal/sec.h
index 6bf5687d..815b4bbc 100644
--- a/include/jemalloc/internal/sec.h
+++ b/include/jemalloc/internal/sec.h
@@ -8,13 +8,9 @@
  * Small extent cache.
  *
  * This includes some utilities to cache small extents.  We have a per-pszind
- * bin with its own lock and edata heap (including only extents of that size).
- * We don't try to do any coalescing of extents (since it would require
- * cross-bin locks).  As a result, we need to be careful about fragmentation.
- * As a gesture in that direction, we limit the size of caches, apply first-fit
- * within the bins, and, when flushing a bin, flush all of its extents rather
- * than just those up to some threshold.  When we allocate again, we'll get a
- * chance to move to better ones.
+ * bin with its own list of extents of that size.  We don't try to do any
+ * coalescing of extents (since it would in general require cross-shard locks or
+ * knowledge of the underlying PAI implementation).
  */
 
 /*
@@ -46,6 +42,19 @@ sec_stats_accum(sec_stats_t *dst, sec_stats_t *src) {
 	dst->bytes += src->bytes;
 }
 
+/* A collections of free extents, all of the same size. */
+typedef struct sec_bin_s sec_bin_t;
+struct sec_bin_s {
+	/*
+	 * Number of bytes in this particular bin (as opposed to the
+	 * sec_shard_t's bytes_cur.  This isn't user visible or reported in
+	 * stats; rather, it allows us to quickly determine the change in the
+	 * centralized counter when flushing.
+	 */
+	size_t bytes_cur;
+	edata_list_active_t freelist;
+};
+
 typedef struct sec_shard_s sec_shard_t;
 struct sec_shard_s {
 	/*
@@ -64,8 +73,11 @@ struct sec_shard_s {
 	 * hooks are installed.
 	 */
 	bool enabled;
-	edata_list_active_t freelist[SEC_NPSIZES];
+	sec_bin_t bins[SEC_NPSIZES];
+	/* Number of bytes in all bins in the shard. */
 	size_t bytes_cur;
+	/* The next pszind to flush in the flush-some pathways. */
+	pszind_t to_flush_next;
 };
 
 typedef struct sec_s sec_t;
@@ -83,6 +95,18 @@ struct sec_s {
 	 * the bins in that shard to be flushed.
 	 */
 	size_t bytes_max;
+	/*
+	 * The number of bytes (in all bins) we flush down to when we exceed
+	 * bytes_cur.  We want this to be less than bytes_cur, because
+	 * otherwise we could get into situations where a shard undergoing
+	 * net-deallocation keeps bytes_cur very near to bytes_max, so that
+	 * most deallocations get immediately forwarded to the underlying PAI
+	 * implementation, defeating the point of the SEC.
+	 *
+	 * Currently this is just set to bytes_max / 2, but eventually can be
+	 * configurable.
+	 */
+	size_t bytes_after_flush;
 
 	/*
 	 * We don't necessarily always use all the shards; requests are
diff --git a/src/sec.c b/src/sec.c
index 3a3a0b90..49b41047 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -11,7 +11,14 @@ static bool sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
     size_t old_size, size_t new_size);
 static void sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata);
 
-bool sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t alloc_max,
+static void
+sec_bin_init(sec_bin_t *bin) {
+	bin->bytes_cur = 0;
+	edata_list_active_init(&bin->freelist);
+}
+
+bool
+sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t alloc_max,
     size_t bytes_max) {
 	if (nshards > SEC_NSHARDS_MAX) {
 		nshards = SEC_NSHARDS_MAX;
@@ -25,9 +32,10 @@ bool sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t alloc_max,
 		}
 		shard->enabled = true;
 		for (pszind_t j = 0; j < SEC_NPSIZES; j++) {
-			edata_list_active_init(&shard->freelist[j]);
+			sec_bin_init(&shard->bins[j]);
 		}
 		shard->bytes_cur = 0;
+		shard->to_flush_next = 0;
 	}
 	sec->fallback = fallback;
 	sec->alloc_max = alloc_max;
@@ -36,6 +44,7 @@ bool sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t alloc_max,
 	}
 
 	sec->bytes_max = bytes_max;
+	sec->bytes_after_flush = bytes_max / 2;
 	sec->nshards = nshards;
 
 	/*
@@ -85,9 +94,12 @@ sec_shard_alloc_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
 	if (!shard->enabled) {
 		return NULL;
 	}
-	edata_t *edata = edata_list_active_first(&shard->freelist[pszind]);
+	sec_bin_t *bin = &shard->bins[pszind];
+	edata_t *edata = edata_list_active_first(&bin->freelist);
 	if (edata != NULL) {
-		edata_list_active_remove(&shard->freelist[pszind], edata);
+		edata_list_active_remove(&bin->freelist, edata);
+		assert(edata_size_get(edata) <= bin->bytes_cur);
+		bin->bytes_cur -= edata_size_get(edata);
 		assert(edata_size_get(edata) <= shard->bytes_cur);
 		shard->bytes_cur -= edata_size_get(edata);
 	}
@@ -135,30 +147,75 @@ sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 }
 
 static void
-sec_do_flush_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
+sec_flush_all_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	shard->bytes_cur = 0;
 	edata_list_active_t to_flush;
 	edata_list_active_init(&to_flush);
 	for (pszind_t i = 0; i < SEC_NPSIZES; i++) {
-		edata_list_active_concat(&to_flush, &shard->freelist[i]);
+		sec_bin_t *bin = &shard->bins[i];
+		bin->bytes_cur = 0;
+		edata_list_active_concat(&to_flush, &bin->freelist);
 	}
 
+	/*
+	 * Ordinarily we would try to avoid doing the batch deallocation while
+	 * holding the shard mutex, but the flush_all pathways only happen when
+	 * we're disabling the HPA or resetting the arena, both of which are
+	 * rare pathways.
+	 */
 	pai_dalloc_batch(tsdn, sec->fallback, &to_flush);
 }
 
 static void
-sec_shard_dalloc_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
+sec_flush_some_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	edata_list_active_t to_flush;
+	edata_list_active_init(&to_flush);
+	while (shard->bytes_cur > sec->bytes_after_flush) {
+		/* Pick a victim. */
+		sec_bin_t *bin = &shard->bins[shard->to_flush_next];
+
+		/* Update our victim-picking state. */
+		shard->to_flush_next++;
+		if (shard->to_flush_next == SEC_NPSIZES) {
+			shard->to_flush_next = 0;
+		}
+
+		assert(shard->bytes_cur >= bin->bytes_cur);
+		if (bin->bytes_cur != 0) {
+			shard->bytes_cur -= bin->bytes_cur;
+			bin->bytes_cur = 0;
+			edata_list_active_concat(&to_flush, &bin->freelist);
+		}
+		/*
+		 * Either bin->bytes_cur was 0, in which case we didn't touch
+		 * the bin list but it should be empty anyways (or else we
+		 * missed a bytes_cur update on a list modification), or it
+		 * *was* 0 and we emptied it ourselves.  Either way, it should
+		 * be empty now.
+		 */
+		assert(edata_list_active_empty(&bin->freelist));
+	}
+
+	malloc_mutex_unlock(tsdn, &shard->mtx);
+	pai_dalloc_batch(tsdn, sec->fallback, &to_flush);
+}
+
+static void
+sec_shard_dalloc_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
     edata_t *edata) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	assert(shard->bytes_cur <= sec->bytes_max);
 	size_t size = edata_size_get(edata);
 	pszind_t pszind = sz_psz2ind(size);
 	/*
-	 * Prepending here results in FIFO allocation per bin, which seems
+	 * Prepending here results in LIFO allocation per bin, which seems
 	 * reasonable.
 	 */
-	edata_list_active_prepend(&shard->freelist[pszind], edata);
+	sec_bin_t *bin = &shard->bins[pszind];
+	edata_list_active_prepend(&bin->freelist, edata);
+	bin->bytes_cur += size;
 	shard->bytes_cur += size;
 	if (shard->bytes_cur > sec->bytes_max) {
 		/*
@@ -170,7 +227,10 @@ sec_shard_dalloc_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
 		 * in the backing allocator).  This has the extra advantage of
 		 * not requiring advanced cache balancing strategies.
 		 */
-		sec_do_flush_locked(tsdn, sec, shard);
+		sec_flush_some_and_unlock(tsdn, sec, shard);
+		malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
+	} else {
+		malloc_mutex_unlock(tsdn, &shard->mtx);
 	}
 }
 
@@ -184,8 +244,7 @@ sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	sec_shard_t *shard = sec_shard_pick(tsdn, sec);
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	if (shard->enabled) {
-		sec_shard_dalloc_locked(tsdn, sec, shard, edata);
-		malloc_mutex_unlock(tsdn, &shard->mtx);
+		sec_shard_dalloc_and_unlock(tsdn, sec, shard, edata);
 	} else {
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 		pai_dalloc(tsdn, sec->fallback, edata);
@@ -196,7 +255,7 @@ void
 sec_flush(tsdn_t *tsdn, sec_t *sec) {
 	for (size_t i = 0; i < sec->nshards; i++) {
 		malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
-		sec_do_flush_locked(tsdn, sec, &sec->shards[i]);
+		sec_flush_all_locked(tsdn, sec, &sec->shards[i]);
 		malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
 	}
 }
@@ -206,7 +265,7 @@ sec_disable(tsdn_t *tsdn, sec_t *sec) {
 	for (size_t i = 0; i < sec->nshards; i++) {
 		malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
 		sec->shards[i].enabled = false;
-		sec_do_flush_locked(tsdn, sec, &sec->shards[i]);
+		sec_flush_all_locked(tsdn, sec, &sec->shards[i]);
 		malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
 	}
 }
diff --git a/test/unit/sec.c b/test/unit/sec.c
index 7657537b..5fe3550c 100644
--- a/test/unit/sec.c
+++ b/test/unit/sec.c
@@ -200,8 +200,11 @@ TEST_BEGIN(test_auto_flush) {
 	expect_zu_eq(0, ta.dalloc_count,
 	    "Incorrect number of allocations");
 	/*
-	 * Free the extra allocation; this should trigger a flush of all
-	 * extents in the cache.
+	 * Free the extra allocation; this should trigger a flush.  The internal
+	 * flushing logic is allowed to get complicated; for now, we rely on our
+	 * whitebox knowledge of the fact that the SEC flushes bins in their
+	 * entirety when it decides to do so, and it has only one bin active
+	 * right now.
 	 */
 	pai_dalloc(tsdn, &sec.pai, extra_alloc);
 	expect_zu_eq(NALLOCS + 1, ta.alloc_count,

From 480f3b11cd61c1cf37c90d61701829a0cebc98da Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 7 Jan 2021 12:27:43 -0800
Subject: [PATCH 2029/2608] Add a batch allocation interface to the PAI.

For now, no real allocator actually implements this interface; this will change
in subsequent diffs.
---
 include/jemalloc/internal/pai.h | 19 ++++++++++++++++++-
 src/hpa.c                       |  1 +
 src/pac.c                       |  1 +
 src/pai.c                       | 13 +++++++++++++
 src/sec.c                       |  1 +
 test/unit/sec.c                 | 25 +++++++++++++++++++++++++
 6 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/pai.h b/include/jemalloc/internal/pai.h
index 73f5433c..16e022d5 100644
--- a/include/jemalloc/internal/pai.h
+++ b/include/jemalloc/internal/pai.h
@@ -8,6 +8,14 @@ struct pai_s {
 	/* Returns NULL on failure. */
 	edata_t *(*alloc)(tsdn_t *tsdn, pai_t *self, size_t size,
 	    size_t alignment, bool zero);
+	/*
+	 * Returns the number of extents added to the list (which may be fewer
+	 * than requested, in case of OOM).  The list should already be
+	 * initialized.  The only alignment guarantee is page-alignment, and
+	 * the results are not necessarily zeroed.
+	 */
+	size_t (*alloc_batch)(tsdn_t *tsdn, pai_t *self, size_t size,
+	    size_t nallocs, edata_list_active_t *results);
 	bool (*expand)(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 	    size_t old_size, size_t new_size, bool zero);
 	bool (*shrink)(tsdn_t *tsdn, pai_t *self, edata_t *edata,
@@ -28,6 +36,12 @@ pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {
 	return self->alloc(tsdn, self, size, alignment, zero);
 }
 
+static inline size_t
+pai_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
+    edata_list_active_t *results) {
+	return self->alloc_batch(tsdn, self, size, nallocs, results);
+}
+
 static inline bool
 pai_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
     size_t new_size, bool zero) {
@@ -51,9 +65,12 @@ pai_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list) {
 }
 
 /*
- * An implementation of batch deallocation that simply calls dalloc once for
+ * An implementation of batch allocation that simply calls alloc once for
  * each item in the list.
  */
+size_t pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size,
+    size_t nallocs, edata_list_active_t *results);
+/* Ditto, for dalloc. */
 void pai_dalloc_batch_default(tsdn_t *tsdn, pai_t *self,
     edata_list_active_t *list);
 
diff --git a/src/hpa.c b/src/hpa.c
index fa58bb77..338d5759 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -90,6 +90,7 @@ hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
 	 * operating on corrupted data.
 	 */
 	shard->pai.alloc = &hpa_alloc;
+	shard->pai.alloc_batch = &pai_alloc_batch_default;
 	shard->pai.expand = &hpa_expand;
 	shard->pai.shrink = &hpa_shrink;
 	shard->pai.dalloc = &hpa_dalloc;
diff --git a/src/pac.c b/src/pac.c
index 0ba0f2f0..93427ca1 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -91,6 +91,7 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
 	atomic_store_zu(&pac->extent_sn_next, 0, ATOMIC_RELAXED);
 
 	pac->pai.alloc = &pac_alloc_impl;
+	pac->pai.alloc_batch = &pai_alloc_batch_default;
 	pac->pai.expand = &pac_expand_impl;
 	pac->pai.shrink = &pac_shrink_impl;
 	pac->pai.dalloc = &pac_dalloc_impl;
diff --git a/src/pai.c b/src/pai.c
index 1035c850..bd6966c9 100644
--- a/src/pai.c
+++ b/src/pai.c
@@ -1,6 +1,19 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+size_t
+pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size,
+    size_t nallocs, edata_list_active_t *results) {
+	for (size_t i = 0; i < nallocs; i++) {
+		edata_t *edata = pai_alloc(tsdn, self, size, PAGE,
+		    /* zero */ false);
+		if (edata == NULL) {
+			return i;
+		}
+		edata_list_active_append(results, edata);
+	}
+	return nallocs;
+}
 
 void
 pai_dalloc_batch_default(tsdn_t *tsdn, pai_t *self,
diff --git a/src/sec.c b/src/sec.c
index 49b41047..af7c2910 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -52,6 +52,7 @@ sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t alloc_max,
 	 * initialization failed will segfault in an easy-to-spot way.
 	 */
 	sec->pai.alloc = &sec_alloc;
+	sec->pai.alloc_batch = &pai_alloc_batch_default;
 	sec->pai.expand = &sec_expand;
 	sec->pai.shrink = &sec_shrink;
 	sec->pai.dalloc = &sec_dalloc;
diff --git a/test/unit/sec.c b/test/unit/sec.c
index 5fe3550c..69132c1f 100644
--- a/test/unit/sec.c
+++ b/test/unit/sec.c
@@ -7,6 +7,7 @@ struct pai_test_allocator_s {
 	pai_t pai;
 	bool alloc_fail;
 	size_t alloc_count;
+	size_t alloc_batch_count;
 	size_t dalloc_count;
 	size_t dalloc_batch_count;
 	/*
@@ -42,6 +43,28 @@ pai_test_allocator_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
 	return edata;
 }
 
+static inline size_t
+pai_test_allocator_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size,
+    size_t nallocs, edata_list_active_t *results) {
+	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
+	if (ta->alloc_fail) {
+		return 0;
+	}
+	for (size_t i = 0; i < nallocs; i++) {
+		edata_t *edata = malloc(sizeof(edata_t));
+		assert_ptr_not_null(edata, "");
+		edata_init(edata, /* arena_ind */ 0,
+		    (void *)ta->next_ptr, size,
+		    /* slab */ false, /* szind */ 0, /* sn */ 1,
+		    extent_state_active, /* zero */ false, /* comitted */ true,
+		    /* ranged */ false, EXTENT_NOT_HEAD);
+		ta->next_ptr += size;
+		ta->alloc_batch_count++;
+		edata_list_active_append(results, edata);
+	}
+	return nallocs;
+}
+
 static bool
 pai_test_allocator_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
     size_t old_size, size_t new_size, bool zero) {
@@ -82,6 +105,7 @@ static inline void
 pai_test_allocator_init(pai_test_allocator_t *ta) {
 	ta->alloc_fail = false;
 	ta->alloc_count = 0;
+	ta->alloc_batch_count = 0;
 	ta->dalloc_count = 0;
 	ta->dalloc_batch_count = 0;
 	/* Just don't start the edata at 0. */
@@ -91,6 +115,7 @@ pai_test_allocator_init(pai_test_allocator_t *ta) {
 	ta->shrink_count = 0;
 	ta->shrink_return_value = false;
 	ta->pai.alloc = &pai_test_allocator_alloc;
+	ta->pai.alloc_batch = &pai_test_allocator_alloc_batch;
 	ta->pai.expand = &pai_test_allocator_expand;
 	ta->pai.shrink = &pai_test_allocator_shrink;
 	ta->pai.dalloc = &pai_test_allocator_dalloc;

From cdae6706a6dbe6ab75688ea24a82ef4165c3b0b1 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 19 Jan 2021 13:06:43 -0800
Subject: [PATCH 2030/2608] SEC: Use batch fills.

Currently, this doesn't help much, since no PAI implementation supports
flushing.  This will change in subsequent commits.
---
 include/jemalloc/internal/sec.h |  28 ++++++
 src/sec.c                       | 147 ++++++++++++++++++++++----------
 test/unit/sec.c                 |  63 ++++++++------
 3 files changed, 167 insertions(+), 71 deletions(-)

diff --git a/include/jemalloc/internal/sec.h b/include/jemalloc/internal/sec.h
index 815b4bbc..fadf4b61 100644
--- a/include/jemalloc/internal/sec.h
+++ b/include/jemalloc/internal/sec.h
@@ -45,6 +45,24 @@ sec_stats_accum(sec_stats_t *dst, sec_stats_t *src) {
 /* A collections of free extents, all of the same size. */
 typedef struct sec_bin_s sec_bin_t;
 struct sec_bin_s {
+	/*
+	 * When we fail to fulfill an allocation, we do a batch-alloc on the
+	 * underlying allocator to fill extra items, as well.  We drop the SEC
+	 * lock while doing so, to allow operations on other bins to succeed.
+	 * That introduces the possibility of other threads also trying to
+	 * allocate out of this bin, failing, and also going to the backing
+	 * allocator.  To avoid a thundering herd problem in which lots of
+	 * threads do batch allocs and overfill this bin as a result, we only
+	 * allow one batch allocation at a time for a bin.  This bool tracks
+	 * whether or not some thread is already batch allocating.
+	 *
+	 * Eventually, the right answer may be a smarter sharding policy for the
+	 * bins (e.g. a mutex per bin, which would also be more scalable
+	 * generally; the batch-allocating thread could hold it while
+	 * batch-allocating).
+	 */
+	bool being_batch_filled;
+
 	/*
 	 * Number of bytes in this particular bin (as opposed to the
 	 * sec_shard_t's bytes_cur.  This isn't user visible or reported in
@@ -108,6 +126,16 @@ struct sec_s {
 	 */
 	size_t bytes_after_flush;
 
+	/*
+	 * When we can't satisfy an allocation out of the SEC because there are
+	 * no available ones cached, we allocate multiple of that size out of
+	 * the fallback allocator.  Eventually we might want to do something
+	 * cleverer, but for now we just grab a fixed number.
+	 *
+	 * For now, just the constant 4.  Eventually, it should be configurable.
+	 */
+	size_t batch_fill_extra;
+
 	/*
 	 * We don't necessarily always use all the shards; requests are
 	 * distributed across shards [0, nshards - 1).
diff --git a/src/sec.c b/src/sec.c
index af7c2910..f177bbee 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -13,6 +13,7 @@ static void sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata);
 
 static void
 sec_bin_init(sec_bin_t *bin) {
+	bin->being_batch_filled = false;
 	bin->bytes_cur = 0;
 	edata_list_active_init(&bin->freelist);
 }
@@ -45,6 +46,7 @@ sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t alloc_max,
 
 	sec->bytes_max = bytes_max;
 	sec->bytes_after_flush = bytes_max / 2;
+	sec->batch_fill_extra = 4;
 	sec->nshards = nshards;
 
 	/*
@@ -88,14 +90,52 @@ sec_shard_pick(tsdn_t *tsdn, sec_t *sec) {
 	return &sec->shards[*idxp];
 }
 
+/*
+ * Perhaps surprisingly, this can be called on the alloc pathways; if we hit an
+ * empty cache, we'll try to fill it, which can push the shard over it's limit.
+ */
+static void
+sec_flush_some_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	edata_list_active_t to_flush;
+	edata_list_active_init(&to_flush);
+	while (shard->bytes_cur > sec->bytes_after_flush) {
+		/* Pick a victim. */
+		sec_bin_t *bin = &shard->bins[shard->to_flush_next];
+
+		/* Update our victim-picking state. */
+		shard->to_flush_next++;
+		if (shard->to_flush_next == SEC_NPSIZES) {
+			shard->to_flush_next = 0;
+		}
+
+		assert(shard->bytes_cur >= bin->bytes_cur);
+		if (bin->bytes_cur != 0) {
+			shard->bytes_cur -= bin->bytes_cur;
+			bin->bytes_cur = 0;
+			edata_list_active_concat(&to_flush, &bin->freelist);
+		}
+		/*
+		 * Either bin->bytes_cur was 0, in which case we didn't touch
+		 * the bin list but it should be empty anyways (or else we
+		 * missed a bytes_cur update on a list modification), or it
+		 * *was* 0 and we emptied it ourselves.  Either way, it should
+		 * be empty now.
+		 */
+		assert(edata_list_active_empty(&bin->freelist));
+	}
+
+	malloc_mutex_unlock(tsdn, &shard->mtx);
+	pai_dalloc_batch(tsdn, sec->fallback, &to_flush);
+}
+
 static edata_t *
 sec_shard_alloc_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
-    pszind_t pszind) {
+    sec_bin_t *bin) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	if (!shard->enabled) {
 		return NULL;
 	}
-	sec_bin_t *bin = &shard->bins[pszind];
 	edata_t *edata = edata_list_active_first(&bin->freelist);
 	if (edata != NULL) {
 		edata_list_active_remove(&bin->freelist, edata);
@@ -107,6 +147,50 @@ sec_shard_alloc_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
 	return edata;
 }
 
+static edata_t *
+sec_batch_fill_and_alloc(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
+    sec_bin_t *bin, size_t size) {
+	malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
+
+	edata_list_active_t result;
+	edata_list_active_init(&result);
+	size_t nalloc = pai_alloc_batch(tsdn, sec->fallback, size,
+	    1 + sec->batch_fill_extra, &result);
+
+	edata_t *ret = edata_list_active_first(&result);
+	if (ret != NULL) {
+		edata_list_active_remove(&result, ret);
+	}
+
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	bin->being_batch_filled = false;
+	/*
+	 * Handle the easy case first: nothing to cache.  Note that this can
+	 * only happen in case of OOM, since sec_alloc checks the expected
+	 * number of allocs, and doesn't bother going down the batch_fill
+	 * pathway if there won't be anything left to cache.  So to be in this
+	 * code path, we must have asked for > 1 alloc, but only gotten 1 back.
+	 */
+	if (nalloc <= 1) {
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+		return ret;
+	}
+
+	size_t new_cached_bytes = (nalloc - 1) * size;
+
+	edata_list_active_concat(&bin->freelist, &result);
+	bin->bytes_cur += new_cached_bytes;
+	shard->bytes_cur += new_cached_bytes;
+
+	if (shard->bytes_cur > sec->bytes_max) {
+		sec_flush_some_and_unlock(tsdn, sec, shard);
+	} else {
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+	}
+
+	return ret;
+}
+
 static edata_t *
 sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {
 	assert((size & PAGE_MASK) == 0);
@@ -119,16 +203,26 @@ sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {
 	}
 	pszind_t pszind = sz_psz2ind(size);
 	sec_shard_t *shard = sec_shard_pick(tsdn, sec);
+	sec_bin_t *bin = &shard->bins[pszind];
+	bool do_batch_fill = false;
+
 	malloc_mutex_lock(tsdn, &shard->mtx);
-	edata_t *edata = sec_shard_alloc_locked(tsdn, sec, shard, pszind);
+	edata_t *edata = sec_shard_alloc_locked(tsdn, sec, shard, bin);
+	if (edata == NULL) {
+		if (!bin->being_batch_filled && sec->batch_fill_extra > 0) {
+			bin->being_batch_filled = true;
+			do_batch_fill = true;
+		}
+	}
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 	if (edata == NULL) {
-		/*
-		 * See the note in dalloc, below; really, we should add a
-		 * batch_alloc method to the PAI and get more than one extent at
-		 * a time.
-		 */
-		edata = pai_alloc(tsdn, sec->fallback, size, alignment, zero);
+		if (do_batch_fill) {
+			edata = sec_batch_fill_and_alloc(tsdn, sec, shard, bin,
+			    size);
+		} else {
+			edata = pai_alloc(tsdn, sec->fallback, size, alignment,
+			    zero);
+		}
 	}
 	return edata;
 }
@@ -168,41 +262,6 @@ sec_flush_all_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
 	pai_dalloc_batch(tsdn, sec->fallback, &to_flush);
 }
 
-static void
-sec_flush_some_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
-	malloc_mutex_assert_owner(tsdn, &shard->mtx);
-	edata_list_active_t to_flush;
-	edata_list_active_init(&to_flush);
-	while (shard->bytes_cur > sec->bytes_after_flush) {
-		/* Pick a victim. */
-		sec_bin_t *bin = &shard->bins[shard->to_flush_next];
-
-		/* Update our victim-picking state. */
-		shard->to_flush_next++;
-		if (shard->to_flush_next == SEC_NPSIZES) {
-			shard->to_flush_next = 0;
-		}
-
-		assert(shard->bytes_cur >= bin->bytes_cur);
-		if (bin->bytes_cur != 0) {
-			shard->bytes_cur -= bin->bytes_cur;
-			bin->bytes_cur = 0;
-			edata_list_active_concat(&to_flush, &bin->freelist);
-		}
-		/*
-		 * Either bin->bytes_cur was 0, in which case we didn't touch
-		 * the bin list but it should be empty anyways (or else we
-		 * missed a bytes_cur update on a list modification), or it
-		 * *was* 0 and we emptied it ourselves.  Either way, it should
-		 * be empty now.
-		 */
-		assert(edata_list_active_empty(&bin->freelist));
-	}
-
-	malloc_mutex_unlock(tsdn, &shard->mtx);
-	pai_dalloc_batch(tsdn, sec->fallback, &to_flush);
-}
-
 static void
 sec_shard_dalloc_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
     edata_t *edata) {
diff --git a/test/unit/sec.c b/test/unit/sec.c
index 69132c1f..ff39453c 100644
--- a/test/unit/sec.c
+++ b/test/unit/sec.c
@@ -134,14 +134,17 @@ TEST_BEGIN(test_reuse) {
 	 */
 	tsdn_t *tsdn = TSDN_NULL;
 	/*
-	 * 10-allocs apiece of 1-PAGE and 2-PAGE objects means that we should be
-	 * able to get to 30 pages in the cache before triggering a flush.
+	 * 11 allocs apiece of 1-PAGE and 2-PAGE objects means that we should be
+	 * able to get to 33 pages in the cache before triggering a flush.  We
+	 * set the flush liimt to twice this amount, to avoid accidentally
+	 * triggering a flush caused by the batch-allocation down the cache fill
+	 * pathway disrupting ordering.
 	 */
-	enum { NALLOCS = 10 };
+	enum { NALLOCS = 11 };
 	edata_t *one_page[NALLOCS];
 	edata_t *two_page[NALLOCS];
 	sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ 2 * PAGE,
-	    /* bytes_max */ NALLOCS * PAGE + NALLOCS * 2 * PAGE);
+	    /* bytes_max */ 2 * (NALLOCS * PAGE + NALLOCS * 2 * PAGE));
 	for (int i = 0; i < NALLOCS; i++) {
 		one_page[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
 		    /* zero */ false);
@@ -150,7 +153,9 @@ TEST_BEGIN(test_reuse) {
 		    /* zero */ false);
 		expect_ptr_not_null(one_page[i], "Unexpected alloc failure");
 	}
-	expect_zu_eq(2 * NALLOCS, ta.alloc_count,
+	expect_zu_eq(0, ta.alloc_count, "Should be using batch allocs");
+	size_t max_allocs = ta.alloc_count + ta.alloc_batch_count;
+	expect_zu_le(2 * NALLOCS, max_allocs,
 	    "Incorrect number of allocations");
 	expect_zu_eq(0, ta.dalloc_count,
 	    "Incorrect number of allocations");
@@ -164,7 +169,7 @@ TEST_BEGIN(test_reuse) {
 	for (int i = NALLOCS - 1; i >= 0; i--) {
 		pai_dalloc(tsdn, &sec.pai, two_page[i]);
 	}
-	expect_zu_eq(2 * NALLOCS, ta.alloc_count,
+	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
 	    "Incorrect number of allocations");
 	expect_zu_eq(0, ta.dalloc_count,
 	    "Incorrect number of allocations");
@@ -182,7 +187,7 @@ TEST_BEGIN(test_reuse) {
 		expect_ptr_eq(two_page[i], alloc2,
 		    "Got unexpected allocation");
 	}
-	expect_zu_eq(2 * NALLOCS, ta.alloc_count,
+	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
 	    "Incorrect number of allocations");
 	expect_zu_eq(0, ta.dalloc_count,
 	    "Incorrect number of allocations");
@@ -198,7 +203,12 @@ TEST_BEGIN(test_auto_flush) {
 	tsdn_t *tsdn = TSDN_NULL;
 	/*
 	 * 10-allocs apiece of 1-PAGE and 2-PAGE objects means that we should be
-	 * able to get to 30 pages in the cache before triggering a flush.
+	 * able to get to 30 pages in the cache before triggering a flush.  The
+	 * choice of NALLOCS here is chosen to match the batch allocation
+	 * default (4 extra + 1 == 5; so 10 allocations leaves the cache exactly
+	 * empty, even in the presence of batch allocation on fill).
+	 * Eventually, once our allocation batching strategies become smarter,
+	 * this should change.
 	 */
 	enum { NALLOCS = 10 };
 	edata_t *extra_alloc;
@@ -212,7 +222,8 @@ TEST_BEGIN(test_auto_flush) {
 	}
 	extra_alloc = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false);
 	expect_ptr_not_null(extra_alloc, "Unexpected alloc failure");
-	expect_zu_eq(NALLOCS + 1, ta.alloc_count,
+	size_t max_allocs = ta.alloc_count + ta.alloc_batch_count;
+	expect_zu_le(NALLOCS + 1, max_allocs,
 	    "Incorrect number of allocations");
 	expect_zu_eq(0, ta.dalloc_count,
 	    "Incorrect number of allocations");
@@ -220,7 +231,7 @@ TEST_BEGIN(test_auto_flush) {
 	for (int i = 0; i < NALLOCS; i++) {
 		pai_dalloc(tsdn, &sec.pai, allocs[i]);
 	}
-	expect_zu_eq(NALLOCS + 1, ta.alloc_count,
+	expect_zu_le(NALLOCS + 1, max_allocs,
 	    "Incorrect number of allocations");
 	expect_zu_eq(0, ta.dalloc_count,
 	    "Incorrect number of allocations");
@@ -232,7 +243,7 @@ TEST_BEGIN(test_auto_flush) {
 	 * right now.
 	 */
 	pai_dalloc(tsdn, &sec.pai, extra_alloc);
-	expect_zu_eq(NALLOCS + 1, ta.alloc_count,
+	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
 	    "Incorrect number of allocations");
 	expect_zu_eq(0, ta.dalloc_count,
 	    "Incorrect number of (non-batch) deallocations");
@@ -253,7 +264,7 @@ do_disable_flush_test(bool is_disable) {
 	/* See the note above -- we can't use the real tsd. */
 	tsdn_t *tsdn = TSDN_NULL;
 
-	enum { NALLOCS = 10 };
+	enum { NALLOCS = 11 };
 	edata_t *allocs[NALLOCS];
 	sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE,
 	    /* bytes_max */ NALLOCS * PAGE);
@@ -266,8 +277,9 @@ do_disable_flush_test(bool is_disable) {
 	for (int i = 0; i < NALLOCS - 1; i++) {
 		pai_dalloc(tsdn, &sec.pai, allocs[i]);
 	}
-	expect_zu_eq(NALLOCS, ta.alloc_count,
-	    "Incorrect number of allocations");
+	size_t max_allocs = ta.alloc_count + ta.alloc_batch_count;
+
+	expect_zu_le(NALLOCS, max_allocs, "Incorrect number of allocations");
 	expect_zu_eq(0, ta.dalloc_count,
 	    "Incorrect number of allocations");
 
@@ -277,12 +289,13 @@ do_disable_flush_test(bool is_disable) {
 		sec_flush(tsdn, &sec);
 	}
 
-	expect_zu_eq(NALLOCS, ta.alloc_count,
+	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
 	    "Incorrect number of allocations");
 	expect_zu_eq(0, ta.dalloc_count,
 	    "Incorrect number of (non-batch) deallocations");
-	expect_zu_eq(NALLOCS - 1, ta.dalloc_batch_count,
+	expect_zu_le(NALLOCS - 1, ta.dalloc_batch_count,
 	    "Incorrect number of batch deallocations");
+	size_t old_dalloc_batch_count = ta.dalloc_batch_count;
 
 	/*
 	 * If we free into a disabled SEC, it should forward to the fallback.
@@ -290,11 +303,11 @@ do_disable_flush_test(bool is_disable) {
 	 */
 	pai_dalloc(tsdn, &sec.pai, allocs[NALLOCS - 1]);
 
-	expect_zu_eq(NALLOCS, ta.alloc_count,
+	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
 	    "Incorrect number of allocations");
 	expect_zu_eq(is_disable ? 1 : 0, ta.dalloc_count,
 	    "Incorrect number of (non-batch) deallocations");
-	expect_zu_eq(NALLOCS - 1, ta.dalloc_batch_count,
+	expect_zu_eq(old_dalloc_batch_count, ta.dalloc_batch_count,
 	    "Incorrect number of batch deallocations");
 }
 
@@ -404,7 +417,7 @@ expect_stats_pages(tsdn_t *tsdn, sec_t *sec, size_t npages) {
 	 */
 	stats.bytes = 123;
 	sec_stats_merge(tsdn, sec, &stats);
-	assert_zu_eq(npages * PAGE + 123, stats.bytes, "");
+	assert_zu_le(npages * PAGE + 123, stats.bytes, "");
 }
 
 TEST_BEGIN(test_stats_simple) {
@@ -417,7 +430,7 @@ TEST_BEGIN(test_stats_simple) {
 
 	enum {
 		NITERS = 100,
-		FLUSH_PAGES = 10,
+		FLUSH_PAGES = 20,
 	};
 
 	sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE,
@@ -470,26 +483,22 @@ TEST_BEGIN(test_stats_auto_flush) {
 	for (size_t i = 0; i < 2 * FLUSH_PAGES; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
 		    /* zero */ false);
-		expect_stats_pages(tsdn, &sec, 0);
 	}
 
 	for (size_t i = 0; i < FLUSH_PAGES; i++) {
 		pai_dalloc(tsdn, &sec.pai, allocs[i]);
-		expect_stats_pages(tsdn, &sec, i + 1);
 	}
 	pai_dalloc(tsdn, &sec.pai, extra_alloc0);
-	/* The last dalloc should have triggered a flush. */
-	expect_stats_pages(tsdn, &sec, 0);
 
 	/* Flush the remaining pages; stats should still work. */
 	for (size_t i = 0; i < FLUSH_PAGES; i++) {
 		pai_dalloc(tsdn, &sec.pai, allocs[FLUSH_PAGES + i]);
-		expect_stats_pages(tsdn, &sec, i + 1);
 	}
 
 	pai_dalloc(tsdn, &sec.pai, extra_alloc1);
-	/* The last dalloc should have triggered a flush, again. */
-	expect_stats_pages(tsdn, &sec, 0);
+
+	expect_stats_pages(tsdn, &sec, ta.alloc_count + ta.alloc_batch_count
+	    - ta.dalloc_count - ta.dalloc_batch_count);
 }
 TEST_END
 

From ce9386370ad67d4b12dc167600080fe17fcf3113 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 20 Jan 2021 14:55:42 -0800
Subject: [PATCH 2031/2608] HPA: Implement batch allocation.

---
 src/hpa.c       | 191 ++++++++++++++++++++++++------------------------
 test/unit/hpa.c |  74 ++++++++++++++++++-
 2 files changed, 168 insertions(+), 97 deletions(-)

diff --git a/src/hpa.c b/src/hpa.c
index 338d5759..0e9b152a 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -10,6 +10,8 @@
 
 static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t alignment, bool zero);
+static size_t hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size,
+    size_t nallocs, edata_list_active_t *results);
 static bool hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
     size_t old_size, size_t new_size, bool zero);
 static bool hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
@@ -425,13 +427,11 @@ hpa_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
 }
 
 static edata_t *
-hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom) {
+hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
+    bool *oom) {
 	bool err;
-	malloc_mutex_lock(tsdn, &shard->mtx);
 	edata_t *edata = edata_cache_small_get(tsdn, &shard->ecs);
-	*oom = false;
 	if (edata == NULL) {
-		malloc_mutex_unlock(tsdn, &shard->mtx);
 		*oom = true;
 		return NULL;
 	}
@@ -440,7 +440,6 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 	hpdata_t *ps = psset_pick_alloc(&shard->psset, size);
 	if (ps == NULL) {
 		edata_cache_small_put(tsdn, &shard->ecs, edata);
-		malloc_mutex_unlock(tsdn, &shard->mtx);
 		return NULL;
 	}
 
@@ -487,42 +486,61 @@ hpa_try_alloc_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom)
 		 */
 		psset_update_end(&shard->psset, ps);
 		edata_cache_small_put(tsdn, &shard->ecs, edata);
-		malloc_mutex_unlock(tsdn, &shard->mtx);
 		*oom = true;
 		return NULL;
 	}
 
 	hpa_update_purge_hugify_eligibility(shard, ps);
 	psset_update_end(&shard->psset, ps);
-
-	hpa_do_deferred_work(tsdn, shard);
-	malloc_mutex_unlock(tsdn, &shard->mtx);
-
 	return edata;
 }
 
-static edata_t *
-hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
-	assert(size <= shard->opts.slab_max_alloc);
-	bool err;
-	bool oom;
-	edata_t *edata;
-
-	edata = hpa_try_alloc_no_grow(tsdn, shard, size, &oom);
-	if (edata != NULL) {
-		return edata;
+static size_t
+hpa_try_alloc_batch_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
+    bool *oom, size_t nallocs, edata_list_active_t *results) {
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	size_t nsuccess = 0;
+	for (; nsuccess < nallocs; nsuccess++) {
+		edata_t *edata = hpa_try_alloc_one_no_grow(tsdn, shard, size,
+		    oom);
+		if (edata == NULL) {
+			break;
+		}
+		edata_list_active_append(results, edata);
 	}
 
-	/* Nothing in the psset works; we have to grow it. */
+	hpa_do_deferred_work(tsdn, shard);
+	malloc_mutex_unlock(tsdn, &shard->mtx);
+	return nsuccess;
+}
+
+static size_t
+hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
+    size_t nallocs, edata_list_active_t *results) {
+	assert(size <= shard->opts.slab_max_alloc);
+	bool oom = false;
+
+	size_t nsuccess = hpa_try_alloc_batch_no_grow(tsdn, shard, size, &oom,
+	    nallocs, results);
+
+	if (nsuccess == nallocs || oom) {
+		return nsuccess;
+	}
+
+	/*
+	 * We didn't OOM, but weren't able to fill everything requested of us;
+	 * try to grow.
+	 */
 	malloc_mutex_lock(tsdn, &shard->grow_mtx);
 	/*
 	 * Check for grow races; maybe some earlier thread expanded the psset
 	 * in between when we dropped the main mutex and grabbed the grow mutex.
 	 */
-	edata = hpa_try_alloc_no_grow(tsdn, shard, size, &oom);
-	if (edata != NULL || oom) {
+	nsuccess += hpa_try_alloc_batch_no_grow(tsdn, shard, size, &oom,
+	    nallocs - nsuccess, results);
+	if (nsuccess == nallocs || oom) {
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
-		return edata;
+		return nsuccess;
 	}
 
 	/*
@@ -533,78 +551,28 @@ hpa_alloc_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size) {
 	hpdata_t *ps = hpa_grow(tsdn, shard);
 	if (ps == NULL) {
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
-		return NULL;
-	}
-
-	/* We got the pageslab; allocate from it. */
-	malloc_mutex_lock(tsdn, &shard->mtx);
-
-	psset_insert(&shard->psset, ps);
-
-	edata = edata_cache_small_get(tsdn, &shard->ecs);
-	if (edata == NULL) {
-		malloc_mutex_unlock(tsdn, &shard->mtx);
-		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
-		return NULL;
+		return nsuccess;
 	}
 
 	/*
-	 * TODO: the tail of this function is quite similar to the tail of
-	 * hpa_try_alloc_no_grow (both, broadly, do the metadata management of
-	 * initializing an edata_t from an hpdata_t once both have been
-	 * allocated).  The only differences are in error case handling and lock
-	 * management (we hold grow_mtx, but should drop it before doing any
-	 * deferred work).  With a little refactoring, we could unify the paths.
+	 * We got the pageslab; allocate from it.  This does an unlock followed
+	 * by a lock on the same mutex, and holds the grow mutex while doing
+	 * deferred work, but this is an uncommon path; the simplicity is worth
+	 * it.
 	 */
-	psset_update_begin(&shard->psset, ps);
-
-	void *addr = hpdata_reserve_alloc(ps, size);
-	edata_init(edata, shard->ind, addr, size, /* slab */ false,
-	    SC_NSIZES, /* sn */ 0, extent_state_active, /* zeroed */ false,
-	    /* committed */ true, EXTENT_PAI_HPA, EXTENT_NOT_HEAD);
-	edata_ps_set(edata, ps);
-
-	err = emap_register_boundary(tsdn, shard->emap, edata,
-	    SC_NSIZES, /* slab */ false);
-	if (err) {
-		hpdata_unreserve(ps, edata_addr_get(edata),
-		    edata_size_get(edata));
-
-		edata_cache_small_put(tsdn, &shard->ecs, edata);
-
-		/* We'll do a fake purge; the pages weren't really touched. */
-		hpdata_purge_state_t purge_state;
-		void *purge_addr;
-		size_t purge_size;
-		hpdata_purge_begin(ps, &purge_state);
-		bool found_extent = hpdata_purge_next(ps, &purge_state,
-		    &purge_addr, &purge_size);
-		assert(found_extent);
-		assert(purge_addr == addr);
-		assert(purge_size == size);
-		found_extent = hpdata_purge_next(ps, &purge_state,
-		    &purge_addr, &purge_size);
-		assert(!found_extent);
-		hpdata_purge_end(ps, &purge_state);
-
-		psset_update_end(&shard->psset, ps);
-		malloc_mutex_unlock(tsdn, &shard->mtx);
-		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
-		return NULL;
-	}
-	hpa_update_purge_hugify_eligibility(shard, ps);
-	psset_update_end(&shard->psset, ps);
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	psset_insert(&shard->psset, ps);
+	malloc_mutex_unlock(tsdn, &shard->mtx);
 
+	nsuccess += hpa_try_alloc_batch_no_grow(tsdn, shard, size, &oom,
+	    nallocs - nsuccess, results);
 	/*
 	 * Drop grow_mtx before doing deferred work; other threads blocked on it
 	 * should be allowed to proceed while we're working.
 	 */
 	malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 
-	hpa_do_deferred_work(tsdn, shard);
-
-	malloc_mutex_unlock(tsdn, &shard->mtx);
-	return edata;
+	return nsuccess;
 }
 
 static hpa_shard_t *
@@ -616,28 +584,27 @@ hpa_from_pai(pai_t *self) {
 	return (hpa_shard_t *)self;
 }
 
-static edata_t *
-hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t alignment, bool zero) {
+static size_t
+hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
+    edata_list_active_t *results) {
+	assert(nallocs > 0);
 	assert((size & PAGE_MASK) == 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
-
 	hpa_shard_t *shard = hpa_from_pai(self);
-	/* We don't handle alignment or zeroing for now. */
-	if (alignment > PAGE || zero) {
-		return NULL;
-	}
+
 	if (size > shard->opts.slab_max_alloc) {
-		return NULL;
+		return 0;
 	}
 
-	edata_t *edata = hpa_alloc_psset(tsdn, shard, size);
+	size_t nsuccess = hpa_alloc_batch_psset(tsdn, shard, size, nallocs,
+	    results);
 
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	if (edata != NULL) {
+	edata_t *edata;
+	ql_foreach(edata, &results->head, ql_link_active) {
 		emap_assert_mapped(tsdn, shard->emap, edata);
 		assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
 		assert(edata_state_get(edata) == extent_state_active);
@@ -648,6 +615,29 @@ hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
 		assert(edata_base_get(edata) == edata_addr_get(edata));
 		assert(edata_base_get(edata) != NULL);
 	}
+	return nsuccess;
+}
+
+static edata_t *
+hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {
+	assert((size & PAGE_MASK) == 0);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	/* We don't handle alignment or zeroing for now. */
+	if (alignment > PAGE || zero) {
+		return NULL;
+	}
+	/*
+	 * An alloc with alignment == PAGE and zero == false is equivalent to a
+	 * batch alloc of 1.  Just do that, so we can share code.
+	 */
+	edata_list_active_t results;
+	edata_list_active_init(&results);
+	size_t nallocs = hpa_alloc_batch(tsdn, self, size, /* nallocs */ 1,
+	    &results);
+	assert(nallocs == 0 || nallocs == 1);
+	edata_t *edata = edata_list_active_first(&results);
 	return edata;
 }
 
@@ -677,6 +667,15 @@ hpa_dalloc_prepare_unlocked(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) {
 	assert(edata_committed_get(edata));
 	assert(edata_base_get(edata) != NULL);
 
+	/*
+	 * Another thread shouldn't be trying to touch the metadata of an
+	 * allocation being freed.  The one exception is a merge attempt from a
+	 * lower-addressed PAC extent; in this case we have a nominal race on
+	 * the edata metadata bits, but in practice the fact that the PAI bits
+	 * are different will prevent any further access.  The race is bad, but
+	 * benign in practice, and the long term plan is to track enough state
+	 * in the rtree to prevent these merge attempts in the first place.
+	 */
 	edata_addr_set(edata, edata_base_get(edata));
 	edata_zeroed_set(edata, false);
 	emap_deregister_boundary(tsdn, shard->emap, edata);
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 924795f6..46009835 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -211,6 +211,77 @@ TEST_BEGIN(test_stress) {
 }
 TEST_END
 
+static void
+expect_contiguous(edata_t **edatas, size_t nedatas) {
+	for (size_t i = 0; i < nedatas; i++) {
+		size_t expected = (size_t)edata_base_get(edatas[0])
+		    + i * PAGE;
+		expect_zu_eq(expected, (size_t)edata_base_get(edatas[i]),
+		    "Mismatch at index %zu", i);
+	}
+}
+
+TEST_BEGIN(test_alloc_dalloc_batch) {
+	test_skip_if(!hpa_supported());
+
+	hpa_shard_t *shard = create_test_data();
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+
+	enum {NALLOCS = 8};
+
+	edata_t *allocs[NALLOCS];
+	/*
+	 * Allocate a mix of ways; first half from regular alloc, second half
+	 * from alloc_batch.
+	 */
+	for (size_t i = 0; i < NALLOCS / 2; i++) {
+		allocs[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE,
+		    /* zero */ false);
+		expect_ptr_not_null(allocs[i], "Unexpected alloc failure");
+	}
+	edata_list_active_t allocs_list;
+	edata_list_active_init(&allocs_list);
+	size_t nsuccess = pai_alloc_batch(tsdn, &shard->pai, PAGE, NALLOCS / 2,
+	    &allocs_list);
+	expect_zu_eq(NALLOCS / 2, nsuccess, "Unexpected oom");
+	for (size_t i = NALLOCS / 2; i < NALLOCS; i++) {
+		allocs[i] = edata_list_active_first(&allocs_list);
+		edata_list_active_remove(&allocs_list, allocs[i]);
+	}
+
+	/*
+	 * Should have allocated them contiguously, despite the differing
+	 * methods used.
+	 */
+	void *orig_base = edata_base_get(allocs[0]);
+	expect_contiguous(allocs, NALLOCS);
+
+	/*
+	 * Batch dalloc the first half, individually deallocate the second half.
+	 */
+	for (size_t i = 0; i < NALLOCS / 2; i++) {
+		edata_list_active_append(&allocs_list, allocs[i]);
+	}
+	pai_dalloc_batch(tsdn, &shard->pai, &allocs_list);
+	for (size_t i = NALLOCS / 2; i < NALLOCS; i++) {
+		pai_dalloc(tsdn, &shard->pai, allocs[i]);
+	}
+
+	/* Reallocate (individually), and ensure reuse and contiguity. */
+	for (size_t i = 0; i < NALLOCS; i++) {
+		allocs[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE,
+		    /* zero */ false);
+		expect_ptr_not_null(allocs[i], "Unexpected alloc failure.");
+	}
+	void *new_base = edata_base_get(allocs[0]);
+	expect_ptr_eq(orig_base, new_base,
+	    "Failed to reuse the allocated memory.");
+	expect_contiguous(allocs, NALLOCS);
+
+	destroy_test_data(shard);
+}
+TEST_END
+
 int
 main(void) {
 	/*
@@ -227,5 +298,6 @@ main(void) {
 	(void)mem_tree_destroy;
 	return test_no_reentrancy(
 	    test_alloc_max,
-	    test_stress);
+	    test_stress,
+	    test_alloc_dalloc_batch);
 }

From fb327368db39a2edca5f9659a70a53bd3bb0ed6c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 26 Jan 2021 18:35:18 -0800
Subject: [PATCH 2032/2608] SEC: Expand option configurability.

This change pulls the SEC options into a struct, which simplifies their handling
across various modules (e.g. PA needs to forward on SEC options from the
malloc_conf string, but it doesn't really need to know their names).  While
we're here, make some of the fixed constants configurable, and unify naming from
the configuration options to the internals.
---
 .../internal/jemalloc_internal_externs.h      |  5 +-
 include/jemalloc/internal/pa.h                |  2 +-
 include/jemalloc/internal/sec.h               | 42 +------------
 include/jemalloc/internal/sec_opts.h          | 59 +++++++++++++++++
 src/arena.c                                   |  5 +-
 src/ctl.c                                     | 27 ++++++--
 src/jemalloc.c                                | 30 +++++----
 src/pa.c                                      |  5 +-
 src/sec.c                                     | 63 ++++++++++---------
 src/stats.c                                   |  4 +-
 test/unit/mallctl.c                           |  4 +-
 test/unit/sec.c                               | 61 +++++++++++-------
 12 files changed, 185 insertions(+), 122 deletions(-)
 create mode 100644 include/jemalloc/internal/sec_opts.h

diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index da693559..de5731fc 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -3,6 +3,7 @@
 
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/hpa_opts.h"
+#include "jemalloc/internal/sec_opts.h"
 #include "jemalloc/internal/tsd_types.h"
 #include "jemalloc/internal/nstime.h"
 
@@ -16,9 +17,7 @@ extern bool opt_trust_madvise;
 extern bool opt_confirm_conf;
 extern bool opt_hpa;
 extern hpa_shard_opts_t opt_hpa_opts;
-extern size_t opt_hpa_sec_max_alloc;
-extern size_t opt_hpa_sec_max_bytes;
-extern size_t opt_hpa_sec_nshards;
+extern sec_opts_t opt_hpa_sec_opts;
 
 extern const char *opt_junk;
 extern bool opt_junk_alloc;
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 6ded54f8..acb94eb6 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -131,7 +131,7 @@ bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
  * that we can boot without worrying about the HPA, then turn it on in a0.
  */
 bool pa_shard_enable_hpa(pa_shard_t *shard, const hpa_shard_opts_t *hpa_opts,
-    size_t sec_nshards, size_t sec_alloc_max, size_t sec_bytes_max);
+    const sec_opts_t *hpa_sec_opts);
 /*
  * We stop using the HPA when custom extent hooks are installed, but still
  * redirect deallocations to it.
diff --git a/include/jemalloc/internal/sec.h b/include/jemalloc/internal/sec.h
index fadf4b61..ddcdfbdf 100644
--- a/include/jemalloc/internal/sec.h
+++ b/include/jemalloc/internal/sec.h
@@ -103,49 +103,11 @@ struct sec_s {
 	pai_t pai;
 	pai_t *fallback;
 
-	/*
-	 * We'll automatically refuse to cache any objects in this sec if
-	 * they're larger than alloc_max bytes.
-	 */
-	size_t alloc_max;
-	/*
-	 * Exceeding this amount of cached extents in a shard causes *all* of
-	 * the bins in that shard to be flushed.
-	 */
-	size_t bytes_max;
-	/*
-	 * The number of bytes (in all bins) we flush down to when we exceed
-	 * bytes_cur.  We want this to be less than bytes_cur, because
-	 * otherwise we could get into situations where a shard undergoing
-	 * net-deallocation keeps bytes_cur very near to bytes_max, so that
-	 * most deallocations get immediately forwarded to the underlying PAI
-	 * implementation, defeating the point of the SEC.
-	 *
-	 * Currently this is just set to bytes_max / 2, but eventually can be
-	 * configurable.
-	 */
-	size_t bytes_after_flush;
-
-	/*
-	 * When we can't satisfy an allocation out of the SEC because there are
-	 * no available ones cached, we allocate multiple of that size out of
-	 * the fallback allocator.  Eventually we might want to do something
-	 * cleverer, but for now we just grab a fixed number.
-	 *
-	 * For now, just the constant 4.  Eventually, it should be configurable.
-	 */
-	size_t batch_fill_extra;
-
-	/*
-	 * We don't necessarily always use all the shards; requests are
-	 * distributed across shards [0, nshards - 1).
-	 */
-	size_t nshards;
+	sec_opts_t opts;
 	sec_shard_t shards[SEC_NSHARDS_MAX];
 };
 
-bool sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t alloc_max,
-    size_t bytes_max);
+bool sec_init(sec_t *sec, pai_t *fallback, const sec_opts_t *opts);
 void sec_flush(tsdn_t *tsdn, sec_t *sec);
 void sec_disable(tsdn_t *tsdn, sec_t *sec);
 
diff --git a/include/jemalloc/internal/sec_opts.h b/include/jemalloc/internal/sec_opts.h
new file mode 100644
index 00000000..91b6d0de
--- /dev/null
+++ b/include/jemalloc/internal/sec_opts.h
@@ -0,0 +1,59 @@
+#ifndef JEMALLOC_INTERNAL_SEC_OPTS_H
+#define JEMALLOC_INTERNAL_SEC_OPTS_H
+
+/*
+ * The configuration settings used by an sec_t.  Morally, this is part of the
+ * SEC interface, but we put it here for header-ordering reasons.
+ */
+
+typedef struct sec_opts_s sec_opts_t;
+struct sec_opts_s {
+	/*
+	 * We don't necessarily always use all the shards; requests are
+	 * distributed across shards [0, nshards - 1).
+	 */
+	size_t nshards;
+	/*
+	 * We'll automatically refuse to cache any objects in this sec if
+	 * they're larger than max_alloc bytes, instead forwarding such objects
+	 * directly to the fallback.
+	 */
+	size_t max_alloc;
+	/*
+	 * Exceeding this amount of cached extents in a shard causes us to start
+	 * flushing bins in that shard until we fall below bytes_after_flush.
+	 */
+	size_t max_bytes;
+	/*
+	 * The number of bytes (in all bins) we flush down to when we exceed
+	 * bytes_cur.  We want this to be less than bytes_cur, because
+	 * otherwise we could get into situations where a shard undergoing
+	 * net-deallocation keeps bytes_cur very near to max_bytes, so that
+	 * most deallocations get immediately forwarded to the underlying PAI
+	 * implementation, defeating the point of the SEC.
+	 */
+	size_t bytes_after_flush;
+	/*
+	 * When we can't satisfy an allocation out of the SEC because there are
+	 * no available ones cached, we allocate multiple of that size out of
+	 * the fallback allocator.  Eventually we might want to do something
+	 * cleverer, but for now we just grab a fixed number.
+	 */
+	size_t batch_fill_extra;
+};
+
+#define SEC_OPTS_DEFAULT {						\
+	/* nshards */							\
+	4,								\
+	/* max_alloc */							\
+	32 * 1024,							\
+	/* max_bytes */							\
+	256 * 1024,							\
+	/* bytes_after_flush */						\
+	128 * 1024,							\
+	/* batch_fill_extra */						\
+	0								\
+}
+
+
+#endif /* JEMALLOC_INTERNAL_SEC_OPTS_H */
diff --git a/src/arena.c b/src/arena.c
index da0f1f02..f054f093 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1479,9 +1479,8 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 *   so arena_hpa_global is not yet initialized.
 	 */
 	if (opt_hpa && ehooks_are_default(base_ehooks_get(base)) && ind != 0) {
-		if (pa_shard_enable_hpa(&arena->pa_shard,
-		    &opt_hpa_opts, opt_hpa_sec_nshards, opt_hpa_sec_max_alloc,
-		    opt_hpa_sec_max_bytes)) {
+		if (pa_shard_enable_hpa(&arena->pa_shard, &opt_hpa_opts,
+		    &opt_hpa_sec_opts)) {
 			goto label_error;
 		}
 	}
diff --git a/src/ctl.c b/src/ctl.c
index 4fc3ad07..663cf866 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -98,9 +98,11 @@ CTL_PROTO(opt_hpa_slab_max_alloc)
 CTL_PROTO(opt_hpa_hugification_threshold)
 CTL_PROTO(opt_hpa_dehugification_threshold)
 CTL_PROTO(opt_hpa_dirty_mult)
+CTL_PROTO(opt_hpa_sec_nshards)
 CTL_PROTO(opt_hpa_sec_max_alloc)
 CTL_PROTO(opt_hpa_sec_max_bytes)
-CTL_PROTO(opt_hpa_sec_nshards)
+CTL_PROTO(opt_hpa_sec_bytes_after_flush)
+CTL_PROTO(opt_hpa_sec_batch_fill_extra)
 CTL_PROTO(opt_metadata_thp)
 CTL_PROTO(opt_retain)
 CTL_PROTO(opt_dss)
@@ -406,9 +408,13 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("hpa_dehugification_threshold"),
 		CTL(opt_hpa_dehugification_threshold)},
 	{NAME("hpa_dirty_mult"), CTL(opt_hpa_dirty_mult)},
+	{NAME("hpa_sec_nshards"),	CTL(opt_hpa_sec_nshards)},
 	{NAME("hpa_sec_max_alloc"),	CTL(opt_hpa_sec_max_alloc)},
 	{NAME("hpa_sec_max_bytes"),	CTL(opt_hpa_sec_max_bytes)},
-	{NAME("hpa_sec_nshards"),	CTL(opt_hpa_sec_nshards)},
+	{NAME("hpa_sec_bytes_after_flush"),
+		CTL(opt_hpa_sec_bytes_after_flush)},
+	{NAME("hpa_sec_batch_fill_extra"),
+		CTL(opt_hpa_sec_batch_fill_extra)},
 	{NAME("metadata_thp"),	CTL(opt_metadata_thp)},
 	{NAME("retain"),	CTL(opt_retain)},
 	{NAME("dss"),		CTL(opt_dss)},
@@ -2100,8 +2106,9 @@ CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
 CTL_RO_NL_GEN(opt_cache_oblivious, opt_cache_oblivious, bool)
 CTL_RO_NL_GEN(opt_trust_madvise, opt_trust_madvise, bool)
 CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
+
+/* HPA options. */
 CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool)
-CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_opts.slab_max_alloc, size_t)
 CTL_RO_NL_GEN(opt_hpa_hugification_threshold,
     opt_hpa_opts.hugification_threshold, size_t)
 CTL_RO_NL_GEN(opt_hpa_dehugification_threshold,
@@ -2111,9 +2118,17 @@ CTL_RO_NL_GEN(opt_hpa_dehugification_threshold,
  * its representation are internal implementation details.
  */
 CTL_RO_NL_GEN(opt_hpa_dirty_mult, opt_hpa_opts.dirty_mult, fxp_t)
-CTL_RO_NL_GEN(opt_hpa_sec_max_alloc, opt_hpa_sec_max_alloc, size_t)
-CTL_RO_NL_GEN(opt_hpa_sec_max_bytes, opt_hpa_sec_max_bytes, size_t)
-CTL_RO_NL_GEN(opt_hpa_sec_nshards, opt_hpa_sec_nshards, size_t)
+CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_opts.slab_max_alloc, size_t)
+
+/* HPA SEC options */
+CTL_RO_NL_GEN(opt_hpa_sec_nshards, opt_hpa_sec_opts.nshards, size_t)
+CTL_RO_NL_GEN(opt_hpa_sec_max_alloc, opt_hpa_sec_opts.max_alloc, size_t)
+CTL_RO_NL_GEN(opt_hpa_sec_max_bytes, opt_hpa_sec_opts.max_bytes, size_t)
+CTL_RO_NL_GEN(opt_hpa_sec_bytes_after_flush, opt_hpa_sec_opts.bytes_after_flush,
+    size_t)
+CTL_RO_NL_GEN(opt_hpa_sec_batch_fill_extra, opt_hpa_sec_opts.batch_fill_extra,
+    size_t)
+
 CTL_RO_NL_GEN(opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp],
     const char *)
 CTL_RO_NL_GEN(opt_retain, opt_retain, bool)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 125682bf..613733ff 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -153,11 +153,7 @@ malloc_mutex_t arenas_lock;
 /* The global hpa, and whether it's on. */
 bool opt_hpa = false;
 hpa_shard_opts_t opt_hpa_opts = HPA_SHARD_OPTS_DEFAULT;
-
-size_t opt_hpa_sec_max_alloc = 32 * 1024;
-/* These settings correspond to a maximum of 1MB cached per arena. */
-size_t opt_hpa_sec_max_bytes = 256 * 1024;
-size_t opt_hpa_sec_nshards = 4;
+sec_opts_t opt_hpa_sec_opts = SEC_OPTS_DEFAULT;
 
 /*
  * Arenas that are used to service external requests.  Not all elements of the
@@ -1473,12 +1469,21 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				CONF_CONTINUE;
 			}
 
-			CONF_HANDLE_SIZE_T(opt_hpa_sec_max_alloc, "hpa_sec_max_alloc",
-			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
-			CONF_HANDLE_SIZE_T(opt_hpa_sec_max_bytes, "hpa_sec_max_bytes",
-			    PAGE, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
-			CONF_HANDLE_SIZE_T(opt_hpa_sec_nshards, "hpa_sec_nshards",
-			    0, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
+			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.nshards,
+			    "hpa_sec_nshards", 0, 0, CONF_CHECK_MIN,
+			    CONF_DONT_CHECK_MAX, true);
+			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.max_alloc,
+			    "hpa_sec_max_alloc", PAGE, 0, CONF_CHECK_MIN,
+			    CONF_DONT_CHECK_MAX, true);
+			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.max_bytes,
+			    "hpa_sec_max_bytes", PAGE, 0, CONF_CHECK_MIN,
+			    CONF_DONT_CHECK_MAX, true);
+			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.bytes_after_flush,
+			    "hpa_sec_bytes_after_flush", PAGE, 0,
+			    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
+			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.batch_fill_extra,
+			    "hpa_sec_batch_fill_extra", PAGE, 0, CONF_CHECK_MIN,
+			    CONF_DONT_CHECK_MAX, true);
 
 			if (CONF_MATCH("slab_sizes")) {
 				if (CONF_MATCH_VALUE("default")) {
@@ -1777,8 +1782,7 @@ malloc_init_hard_a0_locked() {
 		}
 	} else if (opt_hpa) {
 		if (pa_shard_enable_hpa(&a0->pa_shard, &opt_hpa_opts,
-		    opt_hpa_sec_nshards, opt_hpa_sec_max_alloc,
-		    opt_hpa_sec_max_bytes)) {
+		    &opt_hpa_sec_opts)) {
 			return true;
 		}
 	}
diff --git a/src/pa.c b/src/pa.c
index abe3f00b..dd61aaa2 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -50,13 +50,12 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 
 bool
 pa_shard_enable_hpa(pa_shard_t *shard, const hpa_shard_opts_t *hpa_opts,
-    size_t sec_nshards, size_t sec_alloc_max, size_t sec_bytes_max) {
+    const sec_opts_t *hpa_sec_opts) {
 	if (hpa_shard_init(&shard->hpa_shard, shard->emap, shard->base,
 	    &shard->edata_cache, shard->ind, hpa_opts)) {
 		return true;
 	}
-	if (sec_init(&shard->hpa_sec, &shard->hpa_shard.pai, sec_nshards,
-	    sec_alloc_max, sec_bytes_max)) {
+	if (sec_init(&shard->hpa_sec, &shard->hpa_shard.pai, hpa_sec_opts)) {
 		return true;
 	}
 	shard->ever_used_hpa = true;
diff --git a/src/sec.c b/src/sec.c
index f177bbee..c37cf35c 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -19,12 +19,12 @@ sec_bin_init(sec_bin_t *bin) {
 }
 
 bool
-sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t alloc_max,
-    size_t bytes_max) {
-	if (nshards > SEC_NSHARDS_MAX) {
-		nshards = SEC_NSHARDS_MAX;
+sec_init(sec_t *sec, pai_t *fallback, const sec_opts_t *opts) {
+	size_t nshards_clipped = opts->nshards;
+	if (nshards_clipped > SEC_NSHARDS_MAX) {
+		nshards_clipped = SEC_NSHARDS_MAX;
 	}
-	for (size_t i = 0; i < nshards; i++) {
+	for (size_t i = 0; i < nshards_clipped; i++) {
 		sec_shard_t *shard = &sec->shards[i];
 		bool err = malloc_mutex_init(&shard->mtx, "sec_shard",
 		    WITNESS_RANK_SEC_SHARD, malloc_mutex_rank_exclusive);
@@ -39,15 +39,15 @@ sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t alloc_max,
 		shard->to_flush_next = 0;
 	}
 	sec->fallback = fallback;
-	sec->alloc_max = alloc_max;
-	if (sec->alloc_max > sz_pind2sz(SEC_NPSIZES - 1)) {
-		sec->alloc_max = sz_pind2sz(SEC_NPSIZES - 1);
+
+	size_t max_alloc_clipped = opts->max_alloc;
+	if (max_alloc_clipped > sz_pind2sz(SEC_NPSIZES - 1)) {
+		max_alloc_clipped = sz_pind2sz(SEC_NPSIZES - 1);
 	}
 
-	sec->bytes_max = bytes_max;
-	sec->bytes_after_flush = bytes_max / 2;
-	sec->batch_fill_extra = 4;
-	sec->nshards = nshards;
+	sec->opts = *opts;
+	sec->opts.nshards = nshards_clipped;
+	sec->opts.max_alloc = max_alloc_clipped;
 
 	/*
 	 * Initialize these last so that an improper use of an SEC whose
@@ -83,8 +83,9 @@ sec_shard_pick(tsdn_t *tsdn, sec_t *sec) {
 		 * when we multiply by the number of shards.
 		 */
 		uint64_t rand32 = prng_lg_range_u64(tsd_prng_statep_get(tsd), 32);
-		uint32_t idx = (uint32_t)((rand32 * (uint64_t)sec->nshards) >> 32);
-		assert(idx < (uint32_t)sec->nshards);
+		uint32_t idx =
+		    (uint32_t)((rand32 * (uint64_t)sec->opts.nshards) >> 32);
+		assert(idx < (uint32_t)sec->opts.nshards);
 		*idxp = (uint8_t)idx;
 	}
 	return &sec->shards[*idxp];
@@ -99,7 +100,7 @@ sec_flush_some_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	edata_list_active_t to_flush;
 	edata_list_active_init(&to_flush);
-	while (shard->bytes_cur > sec->bytes_after_flush) {
+	while (shard->bytes_cur > sec->opts.bytes_after_flush) {
 		/* Pick a victim. */
 		sec_bin_t *bin = &shard->bins[shard->to_flush_next];
 
@@ -155,7 +156,7 @@ sec_batch_fill_and_alloc(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
 	edata_list_active_t result;
 	edata_list_active_init(&result);
 	size_t nalloc = pai_alloc_batch(tsdn, sec->fallback, size,
-	    1 + sec->batch_fill_extra, &result);
+	    1 + sec->opts.batch_fill_extra, &result);
 
 	edata_t *ret = edata_list_active_first(&result);
 	if (ret != NULL) {
@@ -182,7 +183,7 @@ sec_batch_fill_and_alloc(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
 	bin->bytes_cur += new_cached_bytes;
 	shard->bytes_cur += new_cached_bytes;
 
-	if (shard->bytes_cur > sec->bytes_max) {
+	if (shard->bytes_cur > sec->opts.max_bytes) {
 		sec_flush_some_and_unlock(tsdn, sec, shard);
 	} else {
 		malloc_mutex_unlock(tsdn, &shard->mtx);
@@ -197,8 +198,8 @@ sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {
 
 	sec_t *sec = (sec_t *)self;
 
-	if (zero || alignment > PAGE || sec->nshards == 0
-	    || size > sec->alloc_max) {
+	if (zero || alignment > PAGE || sec->opts.nshards == 0
+	    || size > sec->opts.max_alloc) {
 		return pai_alloc(tsdn, sec->fallback, size, alignment, zero);
 	}
 	pszind_t pszind = sz_psz2ind(size);
@@ -209,7 +210,8 @@ sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	edata_t *edata = sec_shard_alloc_locked(tsdn, sec, shard, bin);
 	if (edata == NULL) {
-		if (!bin->being_batch_filled && sec->batch_fill_extra > 0) {
+		if (!bin->being_batch_filled
+		    && sec->opts.batch_fill_extra > 0) {
 			bin->being_batch_filled = true;
 			do_batch_fill = true;
 		}
@@ -266,7 +268,7 @@ static void
 sec_shard_dalloc_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
     edata_t *edata) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
-	assert(shard->bytes_cur <= sec->bytes_max);
+	assert(shard->bytes_cur <= sec->opts.max_bytes);
 	size_t size = edata_size_get(edata);
 	pszind_t pszind = sz_psz2ind(size);
 	/*
@@ -277,7 +279,7 @@ sec_shard_dalloc_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
 	edata_list_active_prepend(&bin->freelist, edata);
 	bin->bytes_cur += size;
 	shard->bytes_cur += size;
-	if (shard->bytes_cur > sec->bytes_max) {
+	if (shard->bytes_cur > sec->opts.max_bytes) {
 		/*
 		 * We've exceeded the shard limit.  We make two nods in the
 		 * direction of fragmentation avoidance: we flush everything in
@@ -297,7 +299,8 @@ sec_shard_dalloc_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
 static void
 sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	sec_t *sec = (sec_t *)self;
-	if (sec->nshards == 0 || edata_size_get(edata) > sec->alloc_max) {
+	if (sec->opts.nshards == 0
+	    || edata_size_get(edata) > sec->opts.max_alloc) {
 		pai_dalloc(tsdn, sec->fallback, edata);
 		return;
 	}
@@ -313,7 +316,7 @@ sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 
 void
 sec_flush(tsdn_t *tsdn, sec_t *sec) {
-	for (size_t i = 0; i < sec->nshards; i++) {
+	for (size_t i = 0; i < sec->opts.nshards; i++) {
 		malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
 		sec_flush_all_locked(tsdn, sec, &sec->shards[i]);
 		malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
@@ -322,7 +325,7 @@ sec_flush(tsdn_t *tsdn, sec_t *sec) {
 
 void
 sec_disable(tsdn_t *tsdn, sec_t *sec) {
-	for (size_t i = 0; i < sec->nshards; i++) {
+	for (size_t i = 0; i < sec->opts.nshards; i++) {
 		malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
 		sec->shards[i].enabled = false;
 		sec_flush_all_locked(tsdn, sec, &sec->shards[i]);
@@ -333,7 +336,7 @@ sec_disable(tsdn_t *tsdn, sec_t *sec) {
 void
 sec_stats_merge(tsdn_t *tsdn, sec_t *sec, sec_stats_t *stats) {
 	size_t sum = 0;
-	for (size_t i = 0; i < sec->nshards; i++) {
+	for (size_t i = 0; i < sec->opts.nshards; i++) {
 		/*
 		 * We could save these lock acquisitions by making bytes_cur
 		 * atomic, but stats collection is rare anyways and we expect
@@ -349,7 +352,7 @@ sec_stats_merge(tsdn_t *tsdn, sec_t *sec, sec_stats_t *stats) {
 void
 sec_mutex_stats_read(tsdn_t *tsdn, sec_t *sec,
     mutex_prof_data_t *mutex_prof_data) {
-	for (size_t i = 0; i < sec->nshards; i++) {
+	for (size_t i = 0; i < sec->opts.nshards; i++) {
 		malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
 		malloc_mutex_prof_accum(tsdn, mutex_prof_data,
 		    &sec->shards[i].mtx);
@@ -359,21 +362,21 @@ sec_mutex_stats_read(tsdn_t *tsdn, sec_t *sec,
 
 void
 sec_prefork2(tsdn_t *tsdn, sec_t *sec) {
-	for (size_t i = 0; i < sec->nshards; i++) {
+	for (size_t i = 0; i < sec->opts.nshards; i++) {
 		malloc_mutex_prefork(tsdn, &sec->shards[i].mtx);
 	}
 }
 
 void
 sec_postfork_parent(tsdn_t *tsdn, sec_t *sec) {
-	for (size_t i = 0; i < sec->nshards; i++) {
+	for (size_t i = 0; i < sec->opts.nshards; i++) {
 		malloc_mutex_postfork_parent(tsdn, &sec->shards[i].mtx);
 	}
 }
 
 void
 sec_postfork_child(tsdn_t *tsdn, sec_t *sec) {
-	for (size_t i = 0; i < sec->nshards; i++) {
+	for (size_t i = 0; i < sec->opts.nshards; i++) {
 		malloc_mutex_postfork_child(tsdn, &sec->shards[i].mtx);
 	}
 }
diff --git a/src/stats.c b/src/stats.c
index 7a0526c5..69cb2d3e 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1486,9 +1486,11 @@ stats_general_print(emitter_t *emitter) {
 			    "opt.hpa_dirty_mult", emitter_type_string, &bufp);
 		}
 	}
+	OPT_WRITE_SIZE_T("hpa_sec_nshards")
 	OPT_WRITE_SIZE_T("hpa_sec_max_alloc")
 	OPT_WRITE_SIZE_T("hpa_sec_max_bytes")
-	OPT_WRITE_SIZE_T("hpa_sec_nshards")
+	OPT_WRITE_SIZE_T("hpa_sec_bytes_after_flush")
+	OPT_WRITE_SIZE_T("hpa_sec_batch_fill_extra")
 	OPT_WRITE_CHAR_P("metadata_thp")
 	OPT_WRITE_BOOL_MUTABLE("background_thread", "background_thread")
 	OPT_WRITE_SSIZE_T_MUTABLE("dirty_decay_ms", "arenas.dirty_decay_ms")
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 1fb74667..e9e0feb6 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -287,9 +287,11 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(const char *, dss, always);
 	TEST_MALLCTL_OPT(bool, hpa, always);
 	TEST_MALLCTL_OPT(size_t, hpa_slab_max_alloc, always);
+	TEST_MALLCTL_OPT(size_t, hpa_sec_nshards, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_max_alloc, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_max_bytes, always);
-	TEST_MALLCTL_OPT(size_t, hpa_sec_nshards, always);
+	TEST_MALLCTL_OPT(size_t, hpa_sec_bytes_after_flush, always);
+	TEST_MALLCTL_OPT(size_t, hpa_sec_batch_fill_extra, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);
 	TEST_MALLCTL_OPT(const char *, percpu_arena, always);
 	TEST_MALLCTL_OPT(size_t, oversize_threshold, always);
diff --git a/test/unit/sec.c b/test/unit/sec.c
index ff39453c..36ae1a52 100644
--- a/test/unit/sec.c
+++ b/test/unit/sec.c
@@ -23,6 +23,24 @@ struct pai_test_allocator_s {
 	bool shrink_return_value;
 };
 
+static void
+test_sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t max_alloc,
+    size_t max_bytes) {
+	sec_opts_t opts;
+	opts.nshards = 1;
+	opts.max_alloc = max_alloc;
+	opts.max_bytes = max_bytes;
+	/*
+	 * Just choose reasonable defaults for these; most tests don't care so
+	 * long as they're something reasonable.
+	 */
+	opts.bytes_after_flush = max_bytes / 2;
+	opts.batch_fill_extra = 4;
+
+	bool err = sec_init(sec, fallback, &opts);
+	assert_false(err, "Unexpected initialization failure");
+}
+
 static inline edata_t *
 pai_test_allocator_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t alignment, bool zero) {
@@ -143,8 +161,8 @@ TEST_BEGIN(test_reuse) {
 	enum { NALLOCS = 11 };
 	edata_t *one_page[NALLOCS];
 	edata_t *two_page[NALLOCS];
-	sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ 2 * PAGE,
-	    /* bytes_max */ 2 * (NALLOCS * PAGE + NALLOCS * 2 * PAGE));
+	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ 2 * PAGE,
+	    /* max_bytes */ 2 * (NALLOCS * PAGE + NALLOCS * 2 * PAGE));
 	for (int i = 0; i < NALLOCS; i++) {
 		one_page[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
 		    /* zero */ false);
@@ -213,8 +231,8 @@ TEST_BEGIN(test_auto_flush) {
 	enum { NALLOCS = 10 };
 	edata_t *extra_alloc;
 	edata_t *allocs[NALLOCS];
-	sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE,
-	    /* bytes_max */ NALLOCS * PAGE);
+	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
+	    /* max_bytes */ NALLOCS * PAGE);
 	for (int i = 0; i < NALLOCS; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
 		    /* zero */ false);
@@ -266,8 +284,8 @@ do_disable_flush_test(bool is_disable) {
 
 	enum { NALLOCS = 11 };
 	edata_t *allocs[NALLOCS];
-	sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE,
-	    /* bytes_max */ NALLOCS * PAGE);
+	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
+	    /* max_bytes */ NALLOCS * PAGE);
 	for (int i = 0; i < NALLOCS; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
 		    /* zero */ false);
@@ -321,18 +339,18 @@ TEST_BEGIN(test_flush) {
 }
 TEST_END
 
-TEST_BEGIN(test_alloc_max_respected) {
+TEST_BEGIN(test_max_alloc_respected) {
 	pai_test_allocator_t ta;
 	pai_test_allocator_init(&ta);
 	sec_t sec;
 	/* See the note above -- we can't use the real tsd. */
 	tsdn_t *tsdn = TSDN_NULL;
 
-	size_t alloc_max = 2 * PAGE;
+	size_t max_alloc = 2 * PAGE;
 	size_t attempted_alloc = 3 * PAGE;
 
-	sec_init(&sec, &ta.pai, /* nshards */ 1, alloc_max,
-	    /* bytes_max */ 1000 * PAGE);
+	test_sec_init(&sec, &ta.pai, /* nshards */ 1, max_alloc,
+	    /* max_bytes */ 1000 * PAGE);
 
 	for (size_t i = 0; i < 100; i++) {
 		expect_zu_eq(i, ta.alloc_count,
@@ -362,8 +380,8 @@ TEST_BEGIN(test_expand_shrink_delegate) {
 	/* See the note above -- we can't use the real tsd. */
 	tsdn_t *tsdn = TSDN_NULL;
 
-	sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ 10 * PAGE,
-	    /* bytes_max */ 1000 * PAGE);
+	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ 10 * PAGE,
+	    /* max_bytes */ 1000 * PAGE);
 	edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
 	    /* zero */ false);
 	expect_ptr_not_null(edata, "Unexpected alloc failure");
@@ -395,8 +413,9 @@ TEST_BEGIN(test_nshards_0) {
 	/* See the note above -- we can't use the real tsd. */
 	tsdn_t *tsdn = TSDN_NULL;
 
-	sec_init(&sec, &ta.pai, /* nshards */ 0, /* alloc_max */ 10 * PAGE,
-	    /* bytes_max */ 1000 * PAGE);
+	sec_opts_t opts = SEC_OPTS_DEFAULT;
+	opts.nshards = 0;
+	sec_init(&sec, &ta.pai, &opts);
 
 	edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
 	    /* zero */ false);
@@ -433,8 +452,8 @@ TEST_BEGIN(test_stats_simple) {
 		FLUSH_PAGES = 20,
 	};
 
-	sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE,
-	    /* bytes_max */ FLUSH_PAGES * PAGE);
+	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
+	    /* max_bytes */ FLUSH_PAGES * PAGE);
 
 	edata_t *allocs[FLUSH_PAGES];
 	for (size_t i = 0; i < FLUSH_PAGES; i++) {
@@ -470,8 +489,8 @@ TEST_BEGIN(test_stats_auto_flush) {
 		FLUSH_PAGES = 10,
 	};
 
-	sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE,
-	    /* bytes_max */ FLUSH_PAGES * PAGE);
+	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
+	    /* max_bytes */ FLUSH_PAGES * PAGE);
 
 	edata_t *extra_alloc0;
 	edata_t *extra_alloc1;
@@ -514,8 +533,8 @@ TEST_BEGIN(test_stats_manual_flush) {
 		FLUSH_PAGES = 10,
 	};
 
-	sec_init(&sec, &ta.pai, /* nshards */ 1, /* alloc_max */ PAGE,
-	    /* bytes_max */ FLUSH_PAGES * PAGE);
+	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
+	    /* max_bytes */ FLUSH_PAGES * PAGE);
 
 	edata_t *allocs[FLUSH_PAGES];
 	for (size_t i = 0; i < FLUSH_PAGES; i++) {
@@ -550,7 +569,7 @@ main(void) {
 	    test_auto_flush,
 	    test_disable,
 	    test_flush,
-	    test_alloc_max_respected,
+	    test_max_alloc_respected,
 	    test_expand_shrink_delegate,
 	    test_nshards_0,
 	    test_stats_simple,

From d21d5b46b607542398440d77b5f5ba22116dad5a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sat, 6 Feb 2021 09:29:01 -0800
Subject: [PATCH 2033/2608] Edata: Move sn into its own field.

This lets the bins use a fragmentation avoidance policy that matches the HPA's
(without affecting the PAC).
---
 include/jemalloc/internal/edata.h | 41 +++++++++++--------------------
 src/base.c                        |  2 +-
 src/hpa.c                         |  5 ++--
 test/unit/psset.c                 |  2 +-
 4 files changed, 19 insertions(+), 31 deletions(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index 11358ea1..c71209e5 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -87,9 +87,8 @@ struct edata_s {
 	 * i: szind
 	 * f: nfree
 	 * s: bin_shard
-	 * n: sn
 	 *
-	 * nnnnnnnn ... nnnnnnss ssssffff ffffffii iiiiiitt zpcbaaaa aaaaaaaa
+	 * 00000000 ... 000000ss ssssffff ffffffii iiiiiitt zpcbaaaa aaaaaaaa
 	 *
 	 * arena_ind: Arena from which this extent came, or all 1 bits if
 	 *            unassociated.
@@ -120,16 +119,6 @@ struct edata_s {
 	 * nfree: Number of free regions in slab.
 	 *
 	 * bin_shard: the shard of the bin from which this extent came.
-	 *
-	 * sn: Serial number (potentially non-unique).
-	 *
-	 *     Serial numbers may wrap around if !opt_retain, but as long as
-	 *     comparison functions fall back on address comparison for equal
-	 *     serial numbers, stable (if imperfect) ordering is maintained.
-	 *
-	 *     Serial numbers may not be unique even in the absence of
-	 *     wrap-around, e.g. when splitting an extent and assigning the same
-	 *     serial number to both resulting adjacent extents.
 	 */
 	uint64_t		e_bits;
 #define MASK(CURRENT_FIELD_WIDTH, CURRENT_FIELD_SHIFT) ((((((uint64_t)0x1U) << (CURRENT_FIELD_WIDTH)) - 1)) << (CURRENT_FIELD_SHIFT))
@@ -174,9 +163,6 @@ struct edata_s {
 #define EDATA_BITS_IS_HEAD_SHIFT  (EDATA_BITS_BINSHARD_WIDTH + EDATA_BITS_BINSHARD_SHIFT)
 #define EDATA_BITS_IS_HEAD_MASK  MASK(EDATA_BITS_IS_HEAD_WIDTH, EDATA_BITS_IS_HEAD_SHIFT)
 
-#define EDATA_BITS_SN_SHIFT   (EDATA_BITS_IS_HEAD_WIDTH + EDATA_BITS_IS_HEAD_SHIFT)
-#define EDATA_BITS_SN_MASK  (UINT64_MAX << EDATA_BITS_SN_SHIFT)
-
 	/* Pointer to the extent that this structure is responsible for. */
 	void			*e_addr;
 
@@ -201,8 +187,11 @@ struct edata_s {
 	 * into pageslabs).  This tracks it.
 	 */
 	hpdata_t *e_ps;
-	/* Extra field reserved for HPA. */
-	void *e_reserved;
+	/*
+	 * Serial number.  These are not necessarily unique; splitting an extent
+	 * results in two extents with the same serial number.
+	 */
+	uint64_t e_sn;
 
 	union {
 		/*
@@ -274,10 +263,9 @@ edata_binshard_get(const edata_t *edata) {
 	return binshard;
 }
 
-static inline size_t
+static inline uint64_t
 edata_sn_get(const edata_t *edata) {
-	return (size_t)((edata->e_bits & EDATA_BITS_SN_MASK) >>
-	    EDATA_BITS_SN_SHIFT);
+	return edata->e_sn;
 }
 
 static inline extent_state_t
@@ -488,9 +476,8 @@ edata_nfree_sub(edata_t *edata, uint64_t n) {
 }
 
 static inline void
-edata_sn_set(edata_t *edata, size_t sn) {
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_SN_MASK) |
-	    ((uint64_t)sn << EDATA_BITS_SN_SHIFT);
+edata_sn_set(edata_t *edata, uint64_t sn) {
+	edata->e_sn = sn;
 }
 
 static inline void
@@ -566,7 +553,7 @@ edata_is_head_set(edata_t *edata, bool is_head) {
  */
 static inline void
 edata_init(edata_t *edata, unsigned arena_ind, void *addr, size_t size,
-    bool slab, szind_t szind, size_t sn, extent_state_t state, bool zeroed,
+    bool slab, szind_t szind, uint64_t sn, extent_state_t state, bool zeroed,
     bool committed, extent_pai_t pai, extent_head_state_t is_head) {
 	assert(addr == PAGE_ADDR2BASE(addr) || !slab);
 
@@ -587,7 +574,7 @@ edata_init(edata_t *edata, unsigned arena_ind, void *addr, size_t size,
 }
 
 static inline void
-edata_binit(edata_t *edata, void *addr, size_t bsize, size_t sn) {
+edata_binit(edata_t *edata, void *addr, size_t bsize, uint64_t sn) {
 	edata_arena_ind_set(edata, (1U << MALLOCX_ARENA_BITS) - 1);
 	edata_addr_set(edata, addr);
 	edata_bsize_set(edata, bsize);
@@ -607,8 +594,8 @@ edata_binit(edata_t *edata, void *addr, size_t bsize, size_t sn) {
 
 static inline int
 edata_sn_comp(const edata_t *a, const edata_t *b) {
-	size_t a_sn = edata_sn_get(a);
-	size_t b_sn = edata_sn_get(b);
+	uint64_t a_sn = edata_sn_get(a);
+	uint64_t b_sn = edata_sn_get(b);
 
 	return (a_sn > b_sn) - (a_sn < b_sn);
 }
diff --git a/src/base.c b/src/base.c
index d3732bab..00440f4d 100644
--- a/src/base.c
+++ b/src/base.c
@@ -448,7 +448,7 @@ base_alloc_impl(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment,
 
 	ret = base_extent_bump_alloc(base, edata, usize, alignment);
 	if (esn != NULL) {
-		*esn = edata_sn_get(edata);
+		*esn = (size_t)edata_sn_get(edata);
 	}
 label_return:
 	malloc_mutex_unlock(tsdn, &base->mtx);
diff --git a/src/hpa.c b/src/hpa.c
index 0e9b152a..d078f180 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -458,8 +458,9 @@ hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 
 	void *addr = hpdata_reserve_alloc(ps, size);
 	edata_init(edata, shard->ind, addr, size, /* slab */ false,
-	    SC_NSIZES, /* sn */ 0, extent_state_active, /* zeroed */ false,
-	    /* committed */ true, EXTENT_PAI_HPA, EXTENT_NOT_HEAD);
+	    SC_NSIZES, /* sn */ hpdata_age_get(ps), extent_state_active,
+	    /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
+	    EXTENT_NOT_HEAD);
 	edata_ps_set(edata, ps);
 
 	/*
diff --git a/test/unit/psset.c b/test/unit/psset.c
index b93dfbfe..fdc28d3d 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -91,7 +91,7 @@ edata_expect(edata_t *edata, size_t page_offset, size_t page_cnt) {
 	expect_false(edata_slab_get(edata), "");
 	expect_u_eq(SC_NSIZES, edata_szind_get_maybe_invalid(edata),
 	    "");
-	expect_zu_eq(0, edata_sn_get(edata), "");
+	expect_u64_eq(0, edata_sn_get(edata), "");
 	expect_d_eq(edata_state_get(edata), extent_state_active, "");
 	expect_false(edata_zeroed_get(edata), "");
 	expect_true(edata_committed_get(edata), "");

From 271a676dcd2d5ff863e8f6996089680f56fa0656 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Sat, 6 Feb 2021 11:57:32 -0800
Subject: [PATCH 2034/2608] hpdata: early bailout for longest free range.

A number of common special cases allow us to stop iterating through an hpdata's
bitmap earlier rather than later.
---
 src/hpdata.c | 38 ++++++++++++++++++++++----------------
 1 file changed, 22 insertions(+), 16 deletions(-)

diff --git a/src/hpdata.c b/src/hpdata.c
index e11ba8d9..0fc7b7dc 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -74,6 +74,7 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 		 * to serve the allocation.
 		 */
 		assert(found);
+		assert(len <= hpdata_longest_free_range_get(hpdata));
 		if (len >= npages) {
 			/*
 			 * We use first-fit within the page slabs; this gives
@@ -103,25 +104,30 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 	hpdata->h_ntouched += new_dirty;
 
 	/*
-	 * We might have shrunk the longest free range.  We have to keep
-	 * scanning until the end of the hpdata to be sure.
-	 *
-	 * TODO: As an optimization, we should only do this when the range we
-	 * just allocated from was equal to the longest free range size.
+	 * If we allocated out of a range that was the longest in the hpdata, it
+	 * might be the only one of that size and we'll have to adjust the
+	 * metadata.
 	 */
-	start = begin + npages;
-	while (start < HUGEPAGE_PAGES) {
-		bool found = fb_urange_iter(hpdata->active_pages,
-		    HUGEPAGE_PAGES, start, &begin, &len);
-		if (!found) {
-			break;
+	if (len == hpdata_longest_free_range_get(hpdata)) {
+		start = begin + npages;
+		while (start < HUGEPAGE_PAGES) {
+			bool found = fb_urange_iter(hpdata->active_pages,
+			    HUGEPAGE_PAGES, start, &begin, &len);
+			if (!found) {
+				break;
+			}
+			assert(len <= hpdata_longest_free_range_get(hpdata));
+			if (len == hpdata_longest_free_range_get(hpdata)) {
+				largest_unchosen_range = len;
+				break;
+			}
+			if (len > largest_unchosen_range) {
+				largest_unchosen_range = len;
+			}
+			start = begin + len;
 		}
-		if (len > largest_unchosen_range) {
-			largest_unchosen_range = len;
-		}
-		start = begin + len;
+		hpdata_longest_free_range_set(hpdata, largest_unchosen_range);
 	}
-	hpdata_longest_free_range_set(hpdata, largest_unchosen_range);
 
 	hpdata_assert_consistent(hpdata);
 	return (void *)(

From 154aa5fcc102172fcac0e111ff79df9d5ced7973 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 8 Feb 2021 11:04:46 -0800
Subject: [PATCH 2035/2608] Use the flat bitmap for eset and psset bitmaps.

This is simpler (note that the eset field comment was actually incorrect!), and
slightly faster.
---
 include/jemalloc/internal/eset.h  |  4 ++--
 include/jemalloc/internal/psset.h |  3 ++-
 src/eset.c                        | 30 +++++++++++++-----------------
 src/psset.c                       | 11 ++++-------
 4 files changed, 21 insertions(+), 27 deletions(-)

diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index d260bc13..7b53ecd8 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -2,7 +2,7 @@
 #define JEMALLOC_INTERNAL_ESET_H
 
 #include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/bitmap.h"
+#include "jemalloc/internal/flat_bitmap.h"
 #include "jemalloc/internal/edata.h"
 #include "jemalloc/internal/mutex.h"
 
@@ -22,7 +22,7 @@ struct eset_s {
 	atomic_zu_t nbytes[SC_NPSIZES + 1];
 
 	/* Bitmap for which set bits correspond to non-empty heaps. */
-	bitmap_t bitmap[BITMAP_GROUPS(SC_NPSIZES + 1)];
+	fb_group_t bitmap[FB_NGROUPS(SC_NPSIZES + 1)];
 
 	/* LRU of all extents in heaps. */
 	edata_list_inactive_t lru;
diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index d2a8b24a..2b6ea7bc 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -56,7 +56,8 @@ struct psset_s {
 	 * free run of pages in a pageslab.
 	 */
 	hpdata_age_heap_t pageslabs[PSSET_NPSIZES];
-	bitmap_t bitmap[BITMAP_GROUPS(PSSET_NPSIZES)];
+	/* Bitmap for which set bits correspond to non-empty heaps. */
+	fb_group_t bitmap[FB_NGROUPS(PSSET_NPSIZES)];
 	/*
 	 * The sum of all bin stats in stats.  This lets us quickly answer
 	 * queries for the number of dirty, active, and retained pages in the
diff --git a/src/eset.c b/src/eset.c
index c9af80e1..a52a6f7c 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -3,15 +3,14 @@
 
 #include "jemalloc/internal/eset.h"
 
-const bitmap_info_t eset_bitmap_info =
-    BITMAP_INFO_INITIALIZER(SC_NPSIZES+1);
+#define ESET_NPSIZES (SC_NPSIZES + 1)
 
 void
 eset_init(eset_t *eset, extent_state_t state) {
-	for (unsigned i = 0; i < SC_NPSIZES + 1; i++) {
+	for (unsigned i = 0; i < ESET_NPSIZES; i++) {
 		edata_heap_new(&eset->heaps[i]);
 	}
-	bitmap_init(eset->bitmap, &eset_bitmap_info, true);
+	fb_init(eset->bitmap, ESET_NPSIZES);
 	edata_list_inactive_init(&eset->lru);
 	atomic_store_zu(&eset->npages, 0, ATOMIC_RELAXED);
 	eset->state = state;
@@ -56,8 +55,7 @@ eset_insert(eset_t *eset, edata_t *edata) {
 	size_t psz = sz_psz_quantize_floor(size);
 	pszind_t pind = sz_psz2ind(psz);
 	if (edata_heap_empty(&eset->heaps[pind])) {
-		bitmap_unset(eset->bitmap, &eset_bitmap_info,
-		    (size_t)pind);
+		fb_set(eset->bitmap, ESET_NPSIZES, (size_t)pind);
 	}
 	edata_heap_insert(&eset->heaps[pind], edata);
 
@@ -92,8 +90,7 @@ eset_remove(eset_t *eset, edata_t *edata) {
 	}
 
 	if (edata_heap_empty(&eset->heaps[pind])) {
-		bitmap_set(eset->bitmap, &eset_bitmap_info,
-		    (size_t)pind);
+		fb_unset(eset->bitmap, ESET_NPSIZES, (size_t)pind);
 	}
 	edata_list_inactive_remove(&eset->lru, edata);
 	size_t npages = size >> LG_PAGE;
@@ -122,10 +119,10 @@ eset_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
         pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(min_size));
         pszind_t pind_max = sz_psz2ind(sz_psz_quantize_ceil(max_size));
 
-	for (pszind_t i = (pszind_t)bitmap_ffu(eset->bitmap,
-	    &eset_bitmap_info, (size_t)pind); i < pind_max; i =
-	    (pszind_t)bitmap_ffu(eset->bitmap, &eset_bitmap_info,
-	    (size_t)i+1)) {
+	for (pszind_t i =
+	    (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)pind);
+	    i < pind_max;
+	    i = (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)i + 1)) {
 		assert(i < SC_NPSIZES);
 		assert(!edata_heap_empty(&eset->heaps[i]));
 		edata_t *edata = edata_heap_first(&eset->heaps[i]);
@@ -171,11 +168,10 @@ eset_first_fit(eset_t *eset, size_t size, bool exact_only,
 		    edata_heap_first(&eset->heaps[pind]);
 	}
 
-	for (pszind_t i = (pszind_t)bitmap_ffu(eset->bitmap,
-	    &eset_bitmap_info, (size_t)pind);
-	    i < SC_NPSIZES + 1;
-	    i = (pszind_t)bitmap_ffu(eset->bitmap, &eset_bitmap_info,
-	    (size_t)i+1)) {
+	for (pszind_t i =
+	    (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)pind);
+	    i < ESET_NPSIZES;
+	    i = (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)i + 1)) {
 		assert(!edata_heap_empty(&eset->heaps[i]));
 		edata_t *edata = edata_heap_first(&eset->heaps[i]);
 		assert(edata_size_get(edata) >= size);
diff --git a/src/psset.c b/src/psset.c
index 08c9b6c5..a54e4b75 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -5,15 +5,12 @@
 
 #include "jemalloc/internal/flat_bitmap.h"
 
-static const bitmap_info_t psset_bitmap_info =
-    BITMAP_INFO_INITIALIZER(PSSET_NPSIZES);
-
 void
 psset_init(psset_t *psset) {
 	for (unsigned i = 0; i < PSSET_NPSIZES; i++) {
 		hpdata_age_heap_new(&psset->pageslabs[i]);
 	}
-	bitmap_init(psset->bitmap, &psset_bitmap_info, /* fill */ true);
+	fb_init(psset->bitmap, PSSET_NPSIZES);
 	memset(&psset->merged_stats, 0, sizeof(psset->merged_stats));
 	memset(&psset->stats, 0, sizeof(psset->stats));
 	hpdata_empty_list_init(&psset->empty);
@@ -101,14 +98,14 @@ static void
 psset_hpdata_heap_remove(psset_t *psset, pszind_t pind, hpdata_t *ps) {
 	hpdata_age_heap_remove(&psset->pageslabs[pind], ps);
 	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
-		bitmap_set(psset->bitmap, &psset_bitmap_info, (size_t)pind);
+		fb_unset(psset->bitmap, PSSET_NPSIZES, (size_t)pind);
 	}
 }
 
 static void
 psset_hpdata_heap_insert(psset_t *psset, pszind_t pind, hpdata_t *ps) {
 	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
-		bitmap_unset(psset->bitmap, &psset_bitmap_info, (size_t)pind);
+		fb_set(psset->bitmap, PSSET_NPSIZES, (size_t)pind);
 	}
 	hpdata_age_heap_insert(&psset->pageslabs[pind], ps);
 }
@@ -266,7 +263,7 @@ psset_pick_alloc(psset_t *psset, size_t size) {
 	assert(size <= HUGEPAGE);
 
 	pszind_t min_pind = sz_psz2ind(sz_psz_quantize_ceil(size));
-	pszind_t pind = (pszind_t)bitmap_ffu(psset->bitmap, &psset_bitmap_info,
+	pszind_t pind = (pszind_t)fb_ffs(psset->bitmap, PSSET_NPSIZES,
 	    (size_t)min_pind);
 	if (pind == PSSET_NPSIZES) {
 		return hpdata_empty_list_first(&psset->empty);

From 6bddb92ad64ee096a34c0d099736c237d46f1065 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 8 Feb 2021 11:26:56 -0800
Subject: [PATCH 2036/2608] psset: Rename "bitmap" to "pageslab_bitmap".

It tracks pageslabs.  Soon, we'll have another bitmap (to track dirty pages)
that we want to disambiguate.

While we're here, fix an out-of-date comment.
---
 include/jemalloc/internal/psset.h |  2 +-
 src/psset.c                       | 19 +++++++------------
 2 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index 2b6ea7bc..271d1443 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -57,7 +57,7 @@ struct psset_s {
 	 */
 	hpdata_age_heap_t pageslabs[PSSET_NPSIZES];
 	/* Bitmap for which set bits correspond to non-empty heaps. */
-	fb_group_t bitmap[FB_NGROUPS(PSSET_NPSIZES)];
+	fb_group_t pageslab_bitmap[FB_NGROUPS(PSSET_NPSIZES)];
 	/*
 	 * The sum of all bin stats in stats.  This lets us quickly answer
 	 * queries for the number of dirty, active, and retained pages in the
diff --git a/src/psset.c b/src/psset.c
index a54e4b75..66fd0c49 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -10,7 +10,7 @@ psset_init(psset_t *psset) {
 	for (unsigned i = 0; i < PSSET_NPSIZES; i++) {
 		hpdata_age_heap_new(&psset->pageslabs[i]);
 	}
-	fb_init(psset->bitmap, PSSET_NPSIZES);
+	fb_init(psset->pageslab_bitmap, PSSET_NPSIZES);
 	memset(&psset->merged_stats, 0, sizeof(psset->merged_stats));
 	memset(&psset->stats, 0, sizeof(psset->stats));
 	hpdata_empty_list_init(&psset->empty);
@@ -40,14 +40,9 @@ psset_stats_accum(psset_stats_t *dst, psset_stats_t *src) {
 }
 
 /*
- * The stats maintenance strategy is simple, but not necessarily obvious.
- * edata_nfree and the bitmap must remain consistent at all times.  If they
- * change while an edata is within an edata_heap (or full), then the associated
- * stats bin (or the full bin) must also change.  If they change while not in a
- * bin (say, in between extraction and reinsertion), then the bin stats need not
- * change.  If a pageslab is removed from a bin (or becomes nonfull), it should
- * no longer contribute to that bin's stats (or the full stats).  These help
- * ensure we don't miss any heap modification operations.
+ * The stats maintenance strategy is to remove a pageslab's contribution to the
+ * stats when we call psset_update_begin, and re-add it (to a potentially new
+ * bin) when we call psset_update_end.
  */
 JEMALLOC_ALWAYS_INLINE void
 psset_bin_stats_insert_remove(psset_t *psset, psset_bin_stats_t *binstats,
@@ -98,14 +93,14 @@ static void
 psset_hpdata_heap_remove(psset_t *psset, pszind_t pind, hpdata_t *ps) {
 	hpdata_age_heap_remove(&psset->pageslabs[pind], ps);
 	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
-		fb_unset(psset->bitmap, PSSET_NPSIZES, (size_t)pind);
+		fb_unset(psset->pageslab_bitmap, PSSET_NPSIZES, (size_t)pind);
 	}
 }
 
 static void
 psset_hpdata_heap_insert(psset_t *psset, pszind_t pind, hpdata_t *ps) {
 	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
-		fb_set(psset->bitmap, PSSET_NPSIZES, (size_t)pind);
+		fb_set(psset->pageslab_bitmap, PSSET_NPSIZES, (size_t)pind);
 	}
 	hpdata_age_heap_insert(&psset->pageslabs[pind], ps);
 }
@@ -263,7 +258,7 @@ psset_pick_alloc(psset_t *psset, size_t size) {
 	assert(size <= HUGEPAGE);
 
 	pszind_t min_pind = sz_psz2ind(sz_psz_quantize_ceil(size));
-	pszind_t pind = (pszind_t)fb_ffs(psset->bitmap, PSSET_NPSIZES,
+	pszind_t pind = (pszind_t)fb_ffs(psset->pageslab_bitmap, PSSET_NPSIZES,
 	    (size_t)min_pind);
 	if (pind == PSSET_NPSIZES) {
 		return hpdata_empty_list_first(&psset->empty);

From 0f6c420f83a52c3927cc1c78d155622de05e3ba5 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 5 Feb 2021 10:46:17 -0800
Subject: [PATCH 2037/2608] HPA: Make purging/hugifying more principled.

Before this change, purge/hugify decisions had several sharp edges that could
lead to pathological behavior if tuning parameters weren't carefully chosen.
It's the first of a series; this introduces basic "make every hugepage with
dirty pages purgeable" functionality, and the next commit expands that
functionality to have a smarter policy for picking hugepages to purge.

Previously, the dehugify logic would *never* dehugify a hugepage unless it was
dirtier than the dehugification threshold.  This can lead to situations in which
these pages (which themselves could never be purged) would push us above the
maximum allowed dirty pages in the shard.  This forces immediate purging of any
pages deallocated in non-hugified hugepages, which in turn places nonobvious
practical limitations on the relationships between various config settings.

Instead, we make our preference not to dehugify to purge a soft one rather than
a hard one.  We'll avoid purging them, but only so long as we can do so by
purging non-hugified pages.  If we need to purge them to satisfy our dirty page
limits, or to hugify other, more worthy candidates, we'll still do so.
---
 include/jemalloc/internal/hpdata.h |  76 ++++++++++++++++-----
 include/jemalloc/internal/psset.h  |   4 +-
 src/hpa.c                          | 105 +++++++++++++++++++----------
 src/hpdata.c                       |   4 +-
 src/psset.c                        |  69 +++++++++++++------
 5 files changed, 183 insertions(+), 75 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index e489e624..3bbb7cc8 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -6,6 +6,42 @@
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/typed_list.h"
 
+/*
+ * How badly we want to purge some region of memory.  This is a temporary
+ * definition; it gets deleted in the next commit (where we adopt a more
+ * explicit dirtiest-first policy that only considers hugification status).
+ */
+enum hpdata_purge_level_e {
+	/*
+	 * The level number is important -- we use it as indices into an array
+	 * of size 2 (one for each purge level).
+	 */
+
+	/* "Regular" candidates for purging. */
+	hpdata_purge_level_default = 0,
+
+	/*
+	 * Candidates for purging, but as a last resort.  Practically,
+	 * nonpreferred corresponds to hugified regions that are below the
+	 * hugification threshold but have not yet reached the dehugification
+	 * threshold, while strongly nonpreferred candidates are those which are
+	 * above the hugification threshold.
+	 */
+	hpdata_purge_level_nonpreferred = 1,
+	hpdata_purge_level_strongly_nonpreferred = 2,
+
+	/* Don't purge, no matter what. */
+	hpdata_purge_level_never = 2,
+
+	/*
+	 * How big an array has to be to accomodate all purge levels.  This
+	 * relies on the fact that we don't actually keep unpurgable hpdatas in
+	 * a container.
+	 */
+	hpdata_purge_level_count = hpdata_purge_level_never
+};
+typedef enum hpdata_purge_level_e hpdata_purge_level_t;
+
 /*
  * The metadata representation we use for extents in hugepages.  While the PAC
  * uses the edata_t to represent both active and inactive extents, the HP only
@@ -52,8 +88,8 @@ struct hpdata_s {
 	bool h_in_psset_alloc_container;
 
 	/* The same, but with purging. */
-	bool h_purge_allowed;
-	bool h_in_psset_purge_container;
+	uint8_t h_purge_level;
+	uint8_t h_purge_container_level;
 
 	/* And with hugifying. */
 	bool h_hugify_allowed;
@@ -164,26 +200,26 @@ hpdata_in_psset_alloc_container_set(hpdata_t *hpdata, bool in_container) {
 	hpdata->h_in_psset_alloc_container = in_container;
 }
 
-static inline bool
-hpdata_purge_allowed_get(const hpdata_t *hpdata) {
-	return hpdata->h_purge_allowed;
+static inline hpdata_purge_level_t
+hpdata_purge_level_get(const hpdata_t *hpdata) {
+	return (hpdata_purge_level_t)hpdata->h_purge_level;
 }
 
 static inline void
-hpdata_purge_allowed_set(hpdata_t *hpdata, bool purge_allowed) {
-	assert(purge_allowed == false || !hpdata->h_mid_purge);
-	hpdata->h_purge_allowed = purge_allowed;
+hpdata_purge_level_set(hpdata_t *hpdata, hpdata_purge_level_t level) {
+	assert(level == hpdata_purge_level_never || !hpdata->h_mid_purge);
+	hpdata->h_purge_level = (uint8_t)level;
 }
 
-static inline bool
-hpdata_in_psset_purge_container_get(const hpdata_t *hpdata) {
-	return hpdata->h_in_psset_purge_container;
+static inline hpdata_purge_level_t
+hpdata_purge_container_level_get(const hpdata_t *hpdata) {
+	return (hpdata_purge_level_t)hpdata->h_purge_container_level;
 }
 
 static inline void
-hpdata_in_psset_purge_container_set(hpdata_t *hpdata, bool in_container) {
-	assert(in_container != hpdata->h_in_psset_purge_container);
-	hpdata->h_in_psset_purge_container = in_container;
+hpdata_purge_container_level_set(hpdata_t *hpdata, hpdata_purge_level_t level) {
+	assert(level != hpdata->h_purge_container_level);
+	hpdata->h_purge_container_level = level;
 }
 
 static inline bool
@@ -284,6 +320,11 @@ hpdata_ndirty_get(hpdata_t *hpdata) {
 	return hpdata->h_ntouched - hpdata->h_nactive;
 }
 
+static inline size_t
+hpdata_nretained_get(hpdata_t *hpdata) {
+	return hpdata->h_nactive - hpdata->h_ntouched;
+}
+
 static inline void
 hpdata_assert_empty(hpdata_t *hpdata) {
 	assert(fb_empty(hpdata->active_pages, HUGEPAGE_PAGES));
@@ -316,11 +357,12 @@ hpdata_consistent(hpdata_t *hpdata) {
 		return false;
 	}
 	if (hpdata_changing_state_get(hpdata)
-	    && (hpdata->h_purge_allowed || hpdata->h_hugify_allowed)) {
+	    && ((hpdata->h_purge_level != hpdata_purge_level_never)
+	    || hpdata->h_hugify_allowed)) {
 		return false;
 	}
-	if (hpdata_purge_allowed_get(hpdata)
-	    != hpdata_in_psset_purge_container_get(hpdata)) {
+	if (hpdata_purge_level_get(hpdata)
+	    != hpdata_purge_container_level_get(hpdata)) {
 		return false;
 	}
 	if (hpdata_hugify_allowed_get(hpdata)
diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index 271d1443..285bf6da 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -70,8 +70,8 @@ struct psset_s {
 	 * allocations.
 	 */
 	hpdata_empty_list_t empty;
-	/* Slabs which are available to be purged. */
-	hpdata_purge_list_t to_purge;
+	/* Slabs which are available to be purged, ordered by purge level. */
+	hpdata_purge_list_t to_purge[hpdata_purge_level_count];
 	/* Slabs which are available to be hugified. */
 	hpdata_hugify_list_t to_hugify;
 };
diff --git a/src/hpa.c b/src/hpa.c
index d078f180..90fec354 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -151,34 +151,59 @@ hpa_good_hugification_candidate(hpa_shard_t *shard, hpdata_t *ps) {
 	    >= shard->opts.hugification_threshold;
 }
 
-static bool
-hpa_should_purge(hpa_shard_t *shard) {
+static size_t
+hpa_adjusted_ndirty(tsdn_t *tsdn, hpa_shard_t *shard) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	return psset_ndirty(&shard->psset) - shard->npending_purge;
+}
+
+static size_t
+hpa_ndirty_max(tsdn_t *tsdn, hpa_shard_t *shard) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	if (shard->opts.dirty_mult == (fxp_t)-1) {
+		return (size_t)-1;
+	}
+	return fxp_mul_frac(psset_nactive(&shard->psset),
+	    shard->opts.dirty_mult);
+}
+
+static bool
+hpa_hugify_blocked_by_ndirty(tsdn_t *tsdn, hpa_shard_t *shard) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	hpdata_t *to_hugify = psset_pick_hugify(&shard->psset);
+	if (to_hugify == NULL) {
 		return false;
 	}
-	size_t adjusted_ndirty = psset_ndirty(&shard->psset)
-	    - shard->npending_purge;
-	/*
-	 * Another simple static check; purge whenever dirty exceeds 25% of
-	 * active.
-	 */
-	size_t max_ndirty = fxp_mul_frac(psset_nactive(&shard->psset),
-	    shard->opts.dirty_mult);
-	return adjusted_ndirty > max_ndirty;
+	return hpa_adjusted_ndirty(tsdn, shard)
+	    + hpdata_nretained_get(to_hugify) > hpa_ndirty_max(tsdn, shard);
+}
+
+static bool
+hpa_should_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	if (hpa_adjusted_ndirty(tsdn, shard) > hpa_ndirty_max(tsdn, shard)) {
+		return true;
+	}
+	if (hpa_hugify_blocked_by_ndirty(tsdn, shard)) {
+		return true;
+	}
+	return false;
 }
 
 static void
-hpa_update_purge_hugify_eligibility(hpa_shard_t *shard, hpdata_t *ps) {
+hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard,
+    hpdata_t *ps) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	if (hpdata_changing_state_get(ps)) {
-		hpdata_purge_allowed_set(ps, false);
+		hpdata_purge_level_set(ps, hpdata_purge_level_never);
 		hpdata_hugify_allowed_set(ps, false);
 		return;
 	}
 	/*
-	 * Hugepages are distinctly costly to purge, so do it only if they're
-	 * *particularly* full of dirty pages.  Eventually, we should use a
-	 * smarter / more dynamic heuristic for situations where we have to
-	 * manually hugify.
+	 * Hugepages are distinctly costly to purge, so try to avoid it unless
+	 * they're *particularly* full of dirty pages.  Eventually, we should
+	 * use a smarter / more dynamic heuristic for situations where we have
+	 * to manually hugify.
 	 *
 	 * In situations where we don't manually hugify, this problem is
 	 * reduced.  The "bad" situation we're trying to avoid is one's that's
@@ -195,17 +220,23 @@ hpa_update_purge_hugify_eligibility(hpa_shard_t *shard, hpdata_t *ps) {
 	 * deferred; in that case we don't need any explicit calls on the
 	 * allocator's end at all; we just try to pack allocations in a
 	 * hugepage-friendly manner and let the OS hugify in the background.
-	 *
-	 * Anyways, our strategy to delay dehugification is to only consider
-	 * purging a hugified hugepage if it's individually dirtier than the
-	 * overall max dirty pages setting.  That setting is 1 dirty page per 4
-	 * active pages; i.e. 4/5s of hugepage pages must be active.
 	 */
-	if ((!hpdata_huge_get(ps) && hpdata_ndirty_get(ps) > 0)
-	    || (hpdata_ndirty_get(ps) != 0
-	    && hpdata_ndirty_get(ps) * PAGE
-	    >= shard->opts.dehugification_threshold)) {
-		hpdata_purge_allowed_set(ps, true);
+	if (hpdata_ndirty_get(ps) > 0) {
+		if (hpdata_huge_get(ps)) {
+			if (hpa_good_hugification_candidate(shard, ps)) {
+				hpdata_purge_level_set(ps,
+				    hpdata_purge_level_strongly_nonpreferred);
+			} else if (hpdata_ndirty_get(ps) * PAGE
+			    >= shard->opts.dehugification_threshold) {
+				hpdata_purge_level_set(ps,
+				    hpdata_purge_level_nonpreferred);
+			} else {
+				hpdata_purge_level_set(ps,
+				    hpdata_purge_level_default);
+			}
+		} else {
+			hpdata_purge_level_set(ps, hpdata_purge_level_default);
+		}
 	}
 	if (hpa_good_hugification_candidate(shard, ps)
 	    && !hpdata_huge_get(ps)) {
@@ -286,7 +317,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	if (to_purge == NULL) {
 		return false;
 	}
-	assert(hpdata_purge_allowed_get(to_purge));
+	assert(hpdata_purge_level_get(to_purge) != hpdata_purge_level_never);
 	assert(!hpdata_changing_state_get(to_purge));
 
 	/*
@@ -297,7 +328,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	psset_update_begin(&shard->psset, to_purge);
 	assert(hpdata_alloc_allowed_get(to_purge));
 	hpdata_mid_purge_set(to_purge, true);
-	hpdata_purge_allowed_set(to_purge, false);
+	hpdata_purge_level_set(to_purge, hpdata_purge_level_never);
 	hpdata_hugify_allowed_set(to_purge, false);
 	/*
 	 * Unlike with hugification (where concurrent
@@ -352,7 +383,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	hpdata_mid_purge_set(to_purge, false);
 
 	hpdata_alloc_allowed_set(to_purge, true);
-	hpa_update_purge_hugify_eligibility(shard, to_purge);
+	hpa_update_purge_hugify_eligibility(tsdn, shard, to_purge);
 
 	psset_update_end(&shard->psset, to_purge);
 
@@ -364,6 +395,10 @@ static bool
 hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 
+	if (hpa_hugify_blocked_by_ndirty(tsdn, shard)) {
+		return false;
+	}
+
 	hpdata_t *to_hugify = psset_pick_hugify(&shard->psset);
 	if (to_hugify == NULL) {
 		return false;
@@ -378,7 +413,7 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 	 */
 	psset_update_begin(&shard->psset, to_hugify);
 	hpdata_mid_hugify_set(to_hugify, true);
-	hpdata_purge_allowed_set(to_hugify, false);
+	hpdata_purge_level_set(to_hugify, hpdata_purge_level_never);
 	hpdata_hugify_allowed_set(to_hugify, false);
 	assert(hpdata_alloc_allowed_get(to_hugify));
 	psset_update_end(&shard->psset, to_hugify);
@@ -401,7 +436,7 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 	psset_update_begin(&shard->psset, to_hugify);
 	hpdata_hugify(to_hugify);
 	hpdata_mid_hugify_set(to_hugify, false);
-	hpa_update_purge_hugify_eligibility(shard, to_hugify);
+	hpa_update_purge_hugify_eligibility(tsdn, shard, to_hugify);
 	psset_update_end(&shard->psset, to_hugify);
 
 	return true;
@@ -419,7 +454,7 @@ hpa_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
 		hugified = hpa_try_hugify(tsdn, shard);
 
 		purged = false;
-		if (hpa_should_purge(shard)) {
+		if (hpa_should_purge(tsdn, shard)) {
 			purged = hpa_try_purge(tsdn, shard);
 		}
 		malloc_mutex_assert_owner(tsdn, &shard->mtx);
@@ -491,7 +526,7 @@ hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 		return NULL;
 	}
 
-	hpa_update_purge_hugify_eligibility(shard, ps);
+	hpa_update_purge_hugify_eligibility(tsdn, shard, ps);
 	psset_update_end(&shard->psset, ps);
 	return edata;
 }
@@ -703,7 +738,7 @@ hpa_dalloc_locked(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) {
 
 	psset_update_begin(&shard->psset, ps);
 	hpdata_unreserve(ps, unreserve_addr, unreserve_size);
-	hpa_update_purge_hugify_eligibility(shard, ps);
+	hpa_update_purge_hugify_eligibility(tsdn, shard, ps);
 	psset_update_end(&shard->psset, ps);
 	hpa_do_deferred_work(tsdn, shard);
 }
diff --git a/src/hpdata.c b/src/hpdata.c
index 0fc7b7dc..6aee4f61 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -24,8 +24,8 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 	hpdata->h_huge = false;
 	hpdata->h_alloc_allowed = true;
 	hpdata->h_in_psset_alloc_container = false;
-	hpdata->h_purge_allowed = false;
-	hpdata->h_in_psset_purge_container = false;
+	hpdata->h_purge_level = hpdata_purge_level_never;
+	hpdata->h_purge_container_level = hpdata_purge_level_never;
 	hpdata->h_hugify_allowed = false;
 	hpdata->h_in_psset_hugify_container = false;
 	hpdata->h_mid_purge = false;
diff --git a/src/psset.c b/src/psset.c
index 66fd0c49..6de82605 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -14,7 +14,9 @@ psset_init(psset_t *psset) {
 	memset(&psset->merged_stats, 0, sizeof(psset->merged_stats));
 	memset(&psset->stats, 0, sizeof(psset->stats));
 	hpdata_empty_list_init(&psset->empty);
-	hpdata_purge_list_init(&psset->to_purge);
+	for (int i = 0; i < hpdata_purge_level_count; i++) {
+		hpdata_purge_list_init(&psset->to_purge[i]);
+	}
 	hpdata_hugify_list_init(&psset->to_hugify);
 }
 
@@ -230,14 +232,31 @@ psset_update_end(psset_t *psset, hpdata_t *ps) {
 		psset_alloc_container_insert(psset, ps);
 	}
 
-	if (hpdata_purge_allowed_get(ps)
-	    && !hpdata_in_psset_purge_container_get(ps)) {
-		hpdata_in_psset_purge_container_set(ps, true);
-		hpdata_purge_list_append(&psset->to_purge, ps);
-	} else if (!hpdata_purge_allowed_get(ps)
-	    && hpdata_in_psset_purge_container_get(ps)) {
-		hpdata_in_psset_purge_container_set(ps, false);
-		hpdata_purge_list_remove(&psset->to_purge, ps);
+	if (hpdata_purge_level_get(ps) == hpdata_purge_level_never
+	    && hpdata_purge_container_level_get(ps)
+	    != hpdata_purge_level_never) {
+		/* In some purge container, but shouldn't be in any. */
+		hpdata_purge_list_remove(
+		    &psset->to_purge[hpdata_purge_container_level_get(ps)],
+		    ps);
+		hpdata_purge_container_level_set(ps, hpdata_purge_level_never);
+	} else if (hpdata_purge_level_get(ps) != hpdata_purge_level_never
+	    && hpdata_purge_container_level_get(ps)
+	    == hpdata_purge_level_never) {
+		/* Not in any purge container, but should be in one. */
+		hpdata_purge_list_append(
+		    &psset->to_purge[hpdata_purge_level_get(ps)], ps);
+		hpdata_purge_container_level_set(ps,
+		    hpdata_purge_level_get(ps));
+	} else if (hpdata_purge_level_get(ps)
+	    != hpdata_purge_container_level_get(ps)) {
+		/* Should switch containers. */
+		hpdata_purge_list_remove(
+		    &psset->to_purge[hpdata_purge_container_level_get(ps)], ps);
+		hpdata_purge_list_append(
+		    &psset->to_purge[hpdata_purge_level_get(ps)], ps);
+		hpdata_purge_container_level_set(ps,
+		    hpdata_purge_level_get(ps));
 	}
 
 	if (hpdata_hugify_allowed_get(ps)
@@ -275,7 +294,13 @@ psset_pick_alloc(psset_t *psset, size_t size) {
 
 hpdata_t *
 psset_pick_purge(psset_t *psset) {
-	return hpdata_purge_list_first(&psset->to_purge);
+	for (int i = 0; i < hpdata_purge_level_count; i++) {
+		hpdata_t *ps = hpdata_purge_list_first(&psset->to_purge[i]);
+		if (ps != NULL) {
+			return ps;
+		}
+	}
+	return NULL;
 }
 
 hpdata_t *
@@ -291,10 +316,15 @@ psset_insert(psset_t *psset, hpdata_t *ps) {
 	if (hpdata_alloc_allowed_get(ps)) {
 		psset_alloc_container_insert(psset, ps);
 	}
-	if (hpdata_purge_allowed_get(ps)) {
-		hpdata_in_psset_purge_container_set(ps, true);
-		hpdata_purge_list_append(&psset->to_purge, ps);
+	assert(
+	    hpdata_purge_container_level_get(ps) == hpdata_purge_level_never);
+	if (hpdata_purge_level_get(ps) != hpdata_purge_level_never) {
+		hpdata_purge_container_level_set(ps,
+		    hpdata_purge_level_get(ps));
+		hpdata_purge_list_append(
+		    &psset->to_purge[hpdata_purge_level_get(ps)], ps);
 	}
+
 	if (hpdata_hugify_allowed_get(ps)) {
 		hpdata_in_psset_hugify_container_set(ps, true);
 		hpdata_hugify_list_append(&psset->to_hugify, ps);
@@ -309,12 +339,13 @@ psset_remove(psset_t *psset, hpdata_t *ps) {
 	if (hpdata_in_psset_alloc_container_get(ps)) {
 		psset_alloc_container_remove(psset, ps);
 	}
-	if (hpdata_in_psset_purge_container_get(ps)) {
-		hpdata_in_psset_purge_container_set(ps, false);
-		hpdata_purge_list_remove(&psset->to_purge, ps);
+	if (hpdata_purge_container_level_get(ps) != hpdata_purge_level_never) {
+		hpdata_purge_list_remove(
+		    &psset->to_purge[hpdata_purge_container_level_get(ps)], ps);
+		hpdata_purge_container_level_set(ps, hpdata_purge_level_never);
 	}
-	if (hpdata_in_psset_purge_container_get(ps)) {
-		hpdata_in_psset_purge_container_set(ps, false);
-		hpdata_purge_list_remove(&psset->to_purge, ps);
+	if (hpdata_in_psset_hugify_container_get(ps)) {
+		hpdata_in_psset_hugify_container_set(ps, false);
+		hpdata_hugify_list_remove(&psset->to_hugify, ps);
 	}
 }

From 73ca4b8ef81d2a54970804182c010b8c95a93587 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 8 Feb 2021 14:11:37 -0800
Subject: [PATCH 2038/2608] HPA: Use dirtiest-first purging.

This seems to be practically beneficial, despite some pathological corner cases.
---
 include/jemalloc/internal/hpdata.h |  76 ++++---------------
 include/jemalloc/internal/psset.h  |  12 ++-
 src/hpa.c                          |  26 ++-----
 src/hpdata.c                       |   3 +-
 src/psset.c                        | 113 ++++++++++++++++-------------
 test/unit/psset.c                  |  86 +++++++++++++++++++++-
 6 files changed, 179 insertions(+), 137 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 3bbb7cc8..245116b9 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -6,42 +6,6 @@
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/typed_list.h"
 
-/*
- * How badly we want to purge some region of memory.  This is a temporary
- * definition; it gets deleted in the next commit (where we adopt a more
- * explicit dirtiest-first policy that only considers hugification status).
- */
-enum hpdata_purge_level_e {
-	/*
-	 * The level number is important -- we use it as indices into an array
-	 * of size 2 (one for each purge level).
-	 */
-
-	/* "Regular" candidates for purging. */
-	hpdata_purge_level_default = 0,
-
-	/*
-	 * Candidates for purging, but as a last resort.  Practically,
-	 * nonpreferred corresponds to hugified regions that are below the
-	 * hugification threshold but have not yet reached the dehugification
-	 * threshold, while strongly nonpreferred candidates are those which are
-	 * above the hugification threshold.
-	 */
-	hpdata_purge_level_nonpreferred = 1,
-	hpdata_purge_level_strongly_nonpreferred = 2,
-
-	/* Don't purge, no matter what. */
-	hpdata_purge_level_never = 2,
-
-	/*
-	 * How big an array has to be to accomodate all purge levels.  This
-	 * relies on the fact that we don't actually keep unpurgable hpdatas in
-	 * a container.
-	 */
-	hpdata_purge_level_count = hpdata_purge_level_never
-};
-typedef enum hpdata_purge_level_e hpdata_purge_level_t;
-
 /*
  * The metadata representation we use for extents in hugepages.  While the PAC
  * uses the edata_t to represent both active and inactive extents, the HP only
@@ -87,9 +51,13 @@ struct hpdata_s {
 	bool h_alloc_allowed;
 	bool h_in_psset_alloc_container;
 
-	/* The same, but with purging. */
-	uint8_t h_purge_level;
-	uint8_t h_purge_container_level;
+	/*
+	 * The same, but with purging.  There's no corresponding
+	 * h_in_psset_purge_container, because the psset (currently) always
+	 * removes hpdatas from their containers during updates (to implement
+	 * LRU for purging).
+	 */
+	bool h_purge_allowed;
 
 	/* And with hugifying. */
 	bool h_hugify_allowed;
@@ -200,26 +168,15 @@ hpdata_in_psset_alloc_container_set(hpdata_t *hpdata, bool in_container) {
 	hpdata->h_in_psset_alloc_container = in_container;
 }
 
-static inline hpdata_purge_level_t
-hpdata_purge_level_get(const hpdata_t *hpdata) {
-	return (hpdata_purge_level_t)hpdata->h_purge_level;
+static inline bool
+hpdata_purge_allowed_get(const hpdata_t *hpdata) {
+	return hpdata->h_purge_allowed;
 }
 
 static inline void
-hpdata_purge_level_set(hpdata_t *hpdata, hpdata_purge_level_t level) {
-	assert(level == hpdata_purge_level_never || !hpdata->h_mid_purge);
-	hpdata->h_purge_level = (uint8_t)level;
-}
-
-static inline hpdata_purge_level_t
-hpdata_purge_container_level_get(const hpdata_t *hpdata) {
-	return (hpdata_purge_level_t)hpdata->h_purge_container_level;
-}
-
-static inline void
-hpdata_purge_container_level_set(hpdata_t *hpdata, hpdata_purge_level_t level) {
-	assert(level != hpdata->h_purge_container_level);
-	hpdata->h_purge_container_level = level;
+hpdata_purge_allowed_set(hpdata_t *hpdata, bool purge_allowed) {
+	assert(purge_allowed == false || !hpdata->h_mid_purge);
+	hpdata->h_purge_allowed = purge_allowed;
 }
 
 static inline bool
@@ -357,12 +314,7 @@ hpdata_consistent(hpdata_t *hpdata) {
 		return false;
 	}
 	if (hpdata_changing_state_get(hpdata)
-	    && ((hpdata->h_purge_level != hpdata_purge_level_never)
-	    || hpdata->h_hugify_allowed)) {
-		return false;
-	}
-	if (hpdata_purge_level_get(hpdata)
-	    != hpdata_purge_container_level_get(hpdata)) {
+	    && ((hpdata->h_purge_allowed) || hpdata->h_hugify_allowed)) {
 		return false;
 	}
 	if (hpdata_hugify_allowed_get(hpdata)
diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index 285bf6da..96fb300e 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -20,6 +20,14 @@
  */
 #define PSSET_NPSIZES 64
 
+/*
+ * We keep two purge lists per page size class; one for hugified hpdatas (at
+ * index 2*pszind), and one for the non-hugified hpdatas (at index 2*pszind +
+ * 1).  This lets us implement a preference for purging non-hugified hpdatas
+ * among similarly-dirty ones.
+ */
+#define PSSET_NPURGE_LISTS (2 * PSSET_NPSIZES)
+
 typedef struct psset_bin_stats_s psset_bin_stats_t;
 struct psset_bin_stats_s {
 	/* How many pageslabs are in this bin? */
@@ -71,7 +79,9 @@ struct psset_s {
 	 */
 	hpdata_empty_list_t empty;
 	/* Slabs which are available to be purged, ordered by purge level. */
-	hpdata_purge_list_t to_purge[hpdata_purge_level_count];
+	hpdata_purge_list_t to_purge[PSSET_NPURGE_LISTS];
+	/* Bitmap for which set bits correspond to non-empty purge lists. */
+	fb_group_t purge_bitmap[FB_NGROUPS(PSSET_NPURGE_LISTS)];
 	/* Slabs which are available to be hugified. */
 	hpdata_hugify_list_t to_hugify;
 };
diff --git a/src/hpa.c b/src/hpa.c
index 90fec354..7d4fa1bf 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -195,7 +195,7 @@ hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard,
     hpdata_t *ps) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	if (hpdata_changing_state_get(ps)) {
-		hpdata_purge_level_set(ps, hpdata_purge_level_never);
+		hpdata_purge_allowed_set(ps, false);
 		hpdata_hugify_allowed_set(ps, false);
 		return;
 	}
@@ -221,23 +221,7 @@ hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard,
 	 * allocator's end at all; we just try to pack allocations in a
 	 * hugepage-friendly manner and let the OS hugify in the background.
 	 */
-	if (hpdata_ndirty_get(ps) > 0) {
-		if (hpdata_huge_get(ps)) {
-			if (hpa_good_hugification_candidate(shard, ps)) {
-				hpdata_purge_level_set(ps,
-				    hpdata_purge_level_strongly_nonpreferred);
-			} else if (hpdata_ndirty_get(ps) * PAGE
-			    >= shard->opts.dehugification_threshold) {
-				hpdata_purge_level_set(ps,
-				    hpdata_purge_level_nonpreferred);
-			} else {
-				hpdata_purge_level_set(ps,
-				    hpdata_purge_level_default);
-			}
-		} else {
-			hpdata_purge_level_set(ps, hpdata_purge_level_default);
-		}
-	}
+	hpdata_purge_allowed_set(ps, hpdata_ndirty_get(ps) > 0);
 	if (hpa_good_hugification_candidate(shard, ps)
 	    && !hpdata_huge_get(ps)) {
 		hpdata_hugify_allowed_set(ps, true);
@@ -317,7 +301,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	if (to_purge == NULL) {
 		return false;
 	}
-	assert(hpdata_purge_level_get(to_purge) != hpdata_purge_level_never);
+	assert(hpdata_purge_allowed_get(to_purge));
 	assert(!hpdata_changing_state_get(to_purge));
 
 	/*
@@ -328,7 +312,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	psset_update_begin(&shard->psset, to_purge);
 	assert(hpdata_alloc_allowed_get(to_purge));
 	hpdata_mid_purge_set(to_purge, true);
-	hpdata_purge_level_set(to_purge, hpdata_purge_level_never);
+	hpdata_purge_allowed_set(to_purge, false);
 	hpdata_hugify_allowed_set(to_purge, false);
 	/*
 	 * Unlike with hugification (where concurrent
@@ -413,7 +397,7 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 	 */
 	psset_update_begin(&shard->psset, to_hugify);
 	hpdata_mid_hugify_set(to_hugify, true);
-	hpdata_purge_level_set(to_hugify, hpdata_purge_level_never);
+	hpdata_purge_allowed_set(to_hugify, false);
 	hpdata_hugify_allowed_set(to_hugify, false);
 	assert(hpdata_alloc_allowed_get(to_hugify));
 	psset_update_end(&shard->psset, to_hugify);
diff --git a/src/hpdata.c b/src/hpdata.c
index 6aee4f61..b861e9e4 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -24,8 +24,7 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 	hpdata->h_huge = false;
 	hpdata->h_alloc_allowed = true;
 	hpdata->h_in_psset_alloc_container = false;
-	hpdata->h_purge_level = hpdata_purge_level_never;
-	hpdata->h_purge_container_level = hpdata_purge_level_never;
+	hpdata->h_purge_allowed = false;
 	hpdata->h_hugify_allowed = false;
 	hpdata->h_in_psset_hugify_container = false;
 	hpdata->h_mid_purge = false;
diff --git a/src/psset.c b/src/psset.c
index 6de82605..c4053efc 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -14,9 +14,10 @@ psset_init(psset_t *psset) {
 	memset(&psset->merged_stats, 0, sizeof(psset->merged_stats));
 	memset(&psset->stats, 0, sizeof(psset->stats));
 	hpdata_empty_list_init(&psset->empty);
-	for (int i = 0; i < hpdata_purge_level_count; i++) {
+	for (int i = 0; i < PSSET_NPURGE_LISTS; i++) {
 		hpdata_purge_list_init(&psset->to_purge[i]);
 	}
+	fb_init(psset->purge_bitmap, PSSET_NPURGE_LISTS);
 	hpdata_hugify_list_init(&psset->to_hugify);
 }
 
@@ -195,6 +196,51 @@ psset_alloc_container_remove(psset_t *psset, hpdata_t *ps) {
 	}
 }
 
+static size_t
+psset_purge_list_ind(hpdata_t *ps) {
+	size_t ndirty = hpdata_ndirty_get(ps);
+	/* Shouldn't have something with no dirty pages purgeable. */
+	assert(ndirty > 0);
+	pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(ndirty << LG_PAGE));
+	/*
+	 * Higher indices correspond to lists we'd like to purge earlier;
+	 * increment the index for the nonhugified hpdatas first, so that we'll
+	 * pick them before picking hugified ones.
+	 */
+	return (size_t)pind * 2 + (hpdata_huge_get(ps) ? 0 : 1);
+}
+
+static void
+psset_maybe_remove_purge_list(psset_t *psset, hpdata_t *ps) {
+	/*
+	 * Remove the hpdata from its purge list (if it's in one).  Even if it's
+	 * going to stay in the same one, by appending it during
+	 * psset_update_end, we move it to the end of its queue, so that we
+	 * purge LRU within a given dirtiness bucket.
+	 */
+	if (hpdata_purge_allowed_get(ps)) {
+		size_t ind = psset_purge_list_ind(ps);
+		hpdata_purge_list_t *purge_list = &psset->to_purge[ind];
+		hpdata_purge_list_remove(purge_list, ps);
+		if (hpdata_purge_list_empty(purge_list)) {
+			fb_unset(psset->purge_bitmap, PSSET_NPURGE_LISTS, ind);
+		}
+	}
+}
+
+static void
+psset_maybe_insert_purge_list(psset_t *psset, hpdata_t *ps) {
+	if (hpdata_purge_allowed_get(ps)) {
+		size_t ind = psset_purge_list_ind(ps);
+		hpdata_purge_list_t *purge_list = &psset->to_purge[ind];
+		if (hpdata_purge_list_empty(purge_list)) {
+			fb_set(psset->purge_bitmap, PSSET_NPURGE_LISTS, ind);
+		}
+		hpdata_purge_list_append(purge_list, ps);
+	}
+
+}
+
 void
 psset_update_begin(psset_t *psset, hpdata_t *ps) {
 	hpdata_assert_consistent(ps);
@@ -210,10 +256,11 @@ psset_update_begin(psset_t *psset, hpdata_t *ps) {
 		assert(hpdata_alloc_allowed_get(ps));
 		psset_alloc_container_remove(psset, ps);
 	}
+	psset_maybe_remove_purge_list(psset, ps);
 	/*
-	 * We don't update presence in the purge list or hugify list; we try to
-	 * keep those FIFO, even in the presence of other metadata updates.
-	 * We'll update presence at the end of the metadata update if necessary.
+	 * We don't update presence in the hugify list; we try to keep it FIFO,
+	 * even in the presence of other metadata updates.  We'll update
+	 * presence at the end of the metadata update if necessary.
 	 */
 }
 
@@ -231,33 +278,7 @@ psset_update_end(psset_t *psset, hpdata_t *ps) {
 	if (hpdata_alloc_allowed_get(ps)) {
 		psset_alloc_container_insert(psset, ps);
 	}
-
-	if (hpdata_purge_level_get(ps) == hpdata_purge_level_never
-	    && hpdata_purge_container_level_get(ps)
-	    != hpdata_purge_level_never) {
-		/* In some purge container, but shouldn't be in any. */
-		hpdata_purge_list_remove(
-		    &psset->to_purge[hpdata_purge_container_level_get(ps)],
-		    ps);
-		hpdata_purge_container_level_set(ps, hpdata_purge_level_never);
-	} else if (hpdata_purge_level_get(ps) != hpdata_purge_level_never
-	    && hpdata_purge_container_level_get(ps)
-	    == hpdata_purge_level_never) {
-		/* Not in any purge container, but should be in one. */
-		hpdata_purge_list_append(
-		    &psset->to_purge[hpdata_purge_level_get(ps)], ps);
-		hpdata_purge_container_level_set(ps,
-		    hpdata_purge_level_get(ps));
-	} else if (hpdata_purge_level_get(ps)
-	    != hpdata_purge_container_level_get(ps)) {
-		/* Should switch containers. */
-		hpdata_purge_list_remove(
-		    &psset->to_purge[hpdata_purge_container_level_get(ps)], ps);
-		hpdata_purge_list_append(
-		    &psset->to_purge[hpdata_purge_level_get(ps)], ps);
-		hpdata_purge_container_level_set(ps,
-		    hpdata_purge_level_get(ps));
-	}
+	psset_maybe_insert_purge_list(psset, ps);
 
 	if (hpdata_hugify_allowed_get(ps)
 	    && !hpdata_in_psset_hugify_container_get(ps)) {
@@ -294,13 +315,16 @@ psset_pick_alloc(psset_t *psset, size_t size) {
 
 hpdata_t *
 psset_pick_purge(psset_t *psset) {
-	for (int i = 0; i < hpdata_purge_level_count; i++) {
-		hpdata_t *ps = hpdata_purge_list_first(&psset->to_purge[i]);
-		if (ps != NULL) {
-			return ps;
-		}
+	ssize_t ind_ssz = fb_fls(psset->purge_bitmap, PSSET_NPURGE_LISTS,
+	    PSSET_NPURGE_LISTS - 1);
+	if (ind_ssz < 0) {
+		return NULL;
 	}
-	return NULL;
+	pszind_t ind = (pszind_t)ind_ssz;
+	assert(ind < PSSET_NPSIZES);
+	hpdata_t *ps = hpdata_purge_list_first(&psset->to_purge[ind]);
+	assert(ps != NULL);
+	return ps;
 }
 
 hpdata_t *
@@ -316,14 +340,7 @@ psset_insert(psset_t *psset, hpdata_t *ps) {
 	if (hpdata_alloc_allowed_get(ps)) {
 		psset_alloc_container_insert(psset, ps);
 	}
-	assert(
-	    hpdata_purge_container_level_get(ps) == hpdata_purge_level_never);
-	if (hpdata_purge_level_get(ps) != hpdata_purge_level_never) {
-		hpdata_purge_container_level_set(ps,
-		    hpdata_purge_level_get(ps));
-		hpdata_purge_list_append(
-		    &psset->to_purge[hpdata_purge_level_get(ps)], ps);
-	}
+	psset_maybe_insert_purge_list(psset, ps);
 
 	if (hpdata_hugify_allowed_get(ps)) {
 		hpdata_in_psset_hugify_container_set(ps, true);
@@ -339,11 +356,7 @@ psset_remove(psset_t *psset, hpdata_t *ps) {
 	if (hpdata_in_psset_alloc_container_get(ps)) {
 		psset_alloc_container_remove(psset, ps);
 	}
-	if (hpdata_purge_container_level_get(ps) != hpdata_purge_level_never) {
-		hpdata_purge_list_remove(
-		    &psset->to_purge[hpdata_purge_container_level_get(ps)], ps);
-		hpdata_purge_container_level_set(ps, hpdata_purge_level_never);
-	}
+	psset_maybe_remove_purge_list(psset, ps);
 	if (hpdata_in_psset_hugify_container_get(ps)) {
 		hpdata_in_psset_hugify_container_set(ps, false);
 		hpdata_hugify_list_remove(&psset->to_hugify, ps);
diff --git a/test/unit/psset.c b/test/unit/psset.c
index fdc28d3d..fde403e1 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -540,6 +540,89 @@ TEST_BEGIN(test_insert_remove) {
 }
 TEST_END
 
+TEST_BEGIN(test_purge_prefers_nonhuge) {
+	/*
+	 * All else being equal, we should prefer purging non-huge pages over
+	 * huge ones.
+	 */
+
+	/* Nothing magic about this constant. */
+	enum {
+		NHP = 23,
+	};
+	hpdata_t *hpdata;
+
+	psset_t psset;
+	psset_init(&psset);
+
+	hpdata_t hpdata_huge[NHP];
+	uintptr_t huge_begin = (uintptr_t)&hpdata_huge[0];
+	uintptr_t huge_end = (uintptr_t)&hpdata_huge[NHP];
+	hpdata_t hpdata_nonhuge[NHP];
+	uintptr_t nonhuge_begin = (uintptr_t)&hpdata_nonhuge[0];
+	uintptr_t nonhuge_end = (uintptr_t)&hpdata_nonhuge[NHP];
+
+	for (size_t i = 0; i < NHP; i++) {
+		hpdata_init(&hpdata_huge[i], (void *)((10 + i) * HUGEPAGE),
+		    123 + i);
+		psset_insert(&psset, &hpdata_huge[i]);
+
+		hpdata_init(&hpdata_nonhuge[i],
+		    (void *)((10 + NHP + i) * HUGEPAGE),
+		    456 + i);
+		psset_insert(&psset, &hpdata_nonhuge[i]);
+
+	}
+	for (int i = 0; i < 2 * NHP; i++) {
+		hpdata = psset_pick_alloc(&psset, HUGEPAGE * 3 / 4);
+		psset_update_begin(&psset, hpdata);
+		void *ptr;
+		ptr = hpdata_reserve_alloc(hpdata, HUGEPAGE * 3 / 4);
+		/* Ignore the first alloc, which will stick around. */
+		(void)ptr;
+		/*
+		 * The second alloc is to dirty the pages; free it immediately
+		 * after allocating.
+		 */
+		ptr = hpdata_reserve_alloc(hpdata, HUGEPAGE / 4);
+		hpdata_unreserve(hpdata, ptr, HUGEPAGE / 4);
+
+		if (huge_begin <= (uintptr_t)hpdata
+		    && (uintptr_t)hpdata < huge_end) {
+			hpdata_hugify(hpdata);
+		}
+
+		hpdata_purge_allowed_set(hpdata, true);
+		psset_update_end(&psset, hpdata);
+	}
+
+	/*
+	 * We've got a bunch of 1/8th dirty hpdatas.  It should give us all the
+	 * non-huge ones to purge, then all the huge ones, then refuse to purge
+	 * further.
+	 */
+	for (int i = 0; i < NHP; i++) {
+		hpdata = psset_pick_purge(&psset);
+		assert_true(nonhuge_begin <= (uintptr_t)hpdata
+		    && (uintptr_t)hpdata < nonhuge_end, "");
+		psset_update_begin(&psset, hpdata);
+		test_psset_fake_purge(hpdata);
+		hpdata_purge_allowed_set(hpdata, false);
+		psset_update_end(&psset, hpdata);
+	}
+	for (int i = 0; i < NHP; i++) {
+		hpdata = psset_pick_purge(&psset);
+		expect_true(huge_begin <= (uintptr_t)hpdata
+		    && (uintptr_t)hpdata < huge_end, "");
+		psset_update_begin(&psset, hpdata);
+		hpdata_dehugify(hpdata);
+		test_psset_fake_purge(hpdata);
+		hpdata_purge_allowed_set(hpdata, false);
+		psset_update_end(&psset, hpdata);
+	}
+}
+TEST_END
+
 int
 main(void) {
 	return test_no_reentrancy(
@@ -550,5 +633,6 @@ main(void) {
 	    test_multi_pageslab,
 	    test_stats,
 	    test_oldest_fit,
-	    test_insert_remove);
+	    test_insert_remove,
+	    test_purge_prefers_nonhuge);
 }

From 22be724af4438014245c0336ac7212fe97ad004b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 11 Mar 2021 16:57:15 -0800
Subject: [PATCH 2039/2608] Set is_head in extent_alloc_wrapper w/ retain.

When retain is on, when extent_grow_retained failed (e.g. due to split hook
failures), we'll try extent_alloc_wrapper as the last resort.  Set the is_head
bit in that case to be consistent.  The allocated extent in that case will be
retained properly, but not merged with other extents.
---
 include/jemalloc/internal/extent.h | 2 --
 src/extent.c                       | 6 ++++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index f6207362..f2fee5c1 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -28,8 +28,6 @@ void ecache_dalloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 edata_t *ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, size_t npages_min);
 
-edata_t *extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    void *new_addr, size_t size, size_t alignment, bool zero, bool *commit);
 void extent_dalloc_gap(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata);
 void extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
diff --git a/src/extent.c b/src/extent.c
index c41f17ce..51711efc 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -45,6 +45,8 @@ static void extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 static edata_t *extent_alloc_retained(tsdn_t *tsdn, pac_t *pac,
     ehooks_t *ehooks, void *new_addr, size_t size, size_t alignment, bool zero,
     bool *commit);
+static edata_t *extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
+    void *new_addr, size_t size, size_t alignment, bool zero, bool *commit);
 
 /******************************************************************************/
 
@@ -771,7 +773,7 @@ extent_alloc_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	return edata;
 }
 
-edata_t *
+static edata_t *
 extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t alignment, bool zero, bool *commit) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -791,7 +793,7 @@ extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	edata_init(edata, ecache_ind_get(&pac->ecache_dirty), addr,
 	    size, /* slab */ false, SC_NSIZES, extent_sn_next(pac),
 	    extent_state_active, zero, *commit, EXTENT_PAI_PAC,
-	    EXTENT_NOT_HEAD);
+	    opt_retain ? EXTENT_IS_HEAD : EXTENT_NOT_HEAD);
 	if (extent_register(tsdn, pac, edata)) {
 		edata_cache_put(tsdn, pac->edata_cache, edata);
 		return NULL;

From 11127240caefb579a213ad075ab4f52910f333e2 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 11 Mar 2021 22:26:12 -0800
Subject: [PATCH 2040/2608] Remove redundant enable-debug definition in
 configure.

---
 configure.ac | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/configure.ac b/configure.ac
index 34613feb..41a03d24 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1203,9 +1203,6 @@ fi
 if test "x$enable_debug" = "x1" ; then
   AC_DEFINE([JEMALLOC_DEBUG], [ ])
 fi
-if test "x$enable_debug" = "x1" ; then
-  AC_DEFINE([JEMALLOC_DEBUG], [ ])
-fi
 AC_SUBST([enable_debug])
 
 dnl Only optimize if not debugging.

From 3913077146350bd1b720a757e33e8aa35a34e58b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 12 Mar 2021 11:27:00 -0800
Subject: [PATCH 2041/2608] Mark head state during dss alloc.

Specifically, the extent_dalloc_gap relies on the correct head state to
coalesce.
---
 src/extent_dss.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/extent_dss.c b/src/extent_dss.c
index 9857fd29..9a35bacf 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -140,6 +140,8 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 				goto label_oom;
 			}
 
+			bool head_state = opt_retain ? EXTENT_IS_HEAD :
+			    EXTENT_NOT_HEAD;
 			/*
 			 * Compute how much page-aligned gap space (if any) is
 			 * necessary to satisfy alignment.  This space can be
@@ -157,7 +159,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 				    SC_NSIZES, extent_sn_next(
 					&arena->pa_shard.pac),
 				    extent_state_active, false, true,
-				    EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
+				    EXTENT_PAI_PAC, head_state);
 			}
 			/*
 			 * Compute the address just past the end of the desired
@@ -206,7 +208,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					    arena_ind_get(arena), ret, size,
 					    size, false, SC_NSIZES,
 					    extent_state_active, false, true,
-					    EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
+					    EXTENT_PAI_PAC, head_state);
 					if (extent_purge_forced_wrapper(tsdn,
 					    ehooks, &edata, 0, size)) {
 						memset(ret, 0, size);

From 9193ea2248e6265d2e649e60e246491d414d254a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 25 Feb 2021 10:17:44 -0800
Subject: [PATCH 2042/2608] Cirrus: fix build.

Remaining on 12.1 has started to break with an m4 error.  Upgrading fixes
things.

Mangle public symbols to work around a public definition error.
---
 .cirrus.yml | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/.cirrus.yml b/.cirrus.yml
index d01954f1..30fe830b 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -5,7 +5,7 @@ env:
 task:
   freebsd_instance:
     matrix:
-      image: freebsd-12-1-release-amd64
+      image: freebsd-12-2-release-amd64
   install_script:
     - sed -i.bak -e 's,pkg+http://pkg.FreeBSD.org/\${ABI}/quarterly,pkg+http://pkg.FreeBSD.org/\${ABI}/latest,' /etc/pkg/FreeBSD.conf
     - pkg upgrade -y
@@ -13,7 +13,10 @@ task:
   script:
     - autoconf
     #- ./configure ${COMPILER_FLAGS:+       CC="$CC $COMPILER_FLAGS"       CXX="$CXX $COMPILER_FLAGS" }       $CONFIGURE_FLAGS
-    - ./configure
+    # We don't perfectly track freebsd stdlib.h definitions.  This is fine when
+    # we count as a system header, but breaks otherwise, like during these
+    # tests.
+    - ./configure --with-jemalloc-prefix=ci_
     - export JFLAG=`sysctl -n kern.smp.cpus`
     - gmake -j${JFLAG}
     - gmake -j${JFLAG} tests

From 61afb6a40572adfd7b9f03817ff0e62005110212 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 19 Mar 2021 22:50:22 -0700
Subject: [PATCH 2043/2608] Fix locking on arena_i_destroy_ctl().

The ctl_mtx should be held to protect against concurrent arenas.create.
---
 src/ctl.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/ctl.c b/src/ctl.c
index 663cf866..c713f0e2 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2650,6 +2650,8 @@ arena_i_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	arena_t *arena;
 	ctl_arena_t *ctl_darena, *ctl_arena;
 
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
+
 	ret = arena_i_reset_destroy_helper(tsd, mib, miblen, oldp, oldlenp,
 	    newp, newlen, &arena_ind, &arena);
 	if (ret != 0) {
@@ -2680,6 +2682,8 @@ arena_i_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	assert(ret == 0);
 label_return:
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
+
 	return ret;
 }
 

From 2ae1ef7dbd9aadfc80db9692004b5052fd3b36ea Mon Sep 17 00:00:00 2001
From: lirui <lirui05@kuaishou.com>
Date: Fri, 26 Mar 2021 17:32:35 +0800
Subject: [PATCH 2044/2608] Fix doc large size 54 KiB error

---
 doc/jemalloc.xml.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 018170ca..fa53715d 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -630,7 +630,7 @@ for (i = 0; i < nbins; i++) {
         </row>
         <row>
           <entry>8 KiB</entry>
-          <entry>[40 KiB, 48 KiB, 54 KiB, 64 KiB]</entry>
+          <entry>[40 KiB, 48 KiB, 56 KiB, 64 KiB]</entry>
         </row>
         <row>
           <entry>16 KiB</entry>

From a137a6825253da928b49149a81f82e73ed0d7b75 Mon Sep 17 00:00:00 2001
From: Evers Chen <evers_chen@163.com>
Date: Tue, 30 Mar 2021 07:27:37 +0800
Subject: [PATCH 2045/2608] Remove redundant declaration,
 pac_retain_grow_limit_get_set was declared twice in pac.h

---
 include/jemalloc/internal/pac.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index 6d4dfbaf..d07ccc2f 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -121,8 +121,6 @@ bool pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
     edata_cache_t *edata_cache, nstime_t *cur_time, size_t oversize_threshold,
     ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms, pac_stats_t *pac_stats,
     malloc_mutex_t *stats_mtx);
-bool pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
-    size_t *new_limit);
 void pac_stats_merge(tsdn_t *tsdn, pac_t *pac, pac_stats_t *pac_stats_out,
     pac_estats_t *estats_out, size_t *resident);
 

From 862219e461d642d860d2c9ddc122705b031b6d80 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 19 Mar 2021 22:50:01 -0700
Subject: [PATCH 2046/2608] Add quiescence sync before deleting base during
 arena_destroy.

---
 src/arena.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 87 insertions(+), 1 deletion(-)

diff --git a/src/arena.c b/src/arena.c
index f054f093..78ea92c1 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -605,6 +605,90 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 	pa_shard_reset(tsd_tsdn(tsd), &arena->pa_shard);
 }
 
+static void
+arena_prepare_base_deletion_sync_finish(tsd_t *tsd, malloc_mutex_t **mutexes,
+    unsigned n_mtx) {
+	for (unsigned i = 0; i < n_mtx; i++) {
+		malloc_mutex_lock(tsd_tsdn(tsd), mutexes[i]);
+		malloc_mutex_unlock(tsd_tsdn(tsd), mutexes[i]);
+	}
+}
+
+#define ARENA_DESTROY_MAX_DELAYED_MTX 32
+static void
+arena_prepare_base_deletion_sync(tsd_t *tsd, malloc_mutex_t *mtx,
+    malloc_mutex_t **delayed_mtx, unsigned *n_delayed) {
+	if (!malloc_mutex_trylock(tsd_tsdn(tsd), mtx)) {
+		/* No contention. */
+		malloc_mutex_unlock(tsd_tsdn(tsd), mtx);
+		return;
+	}
+	unsigned n = *n_delayed;
+	assert(n < ARENA_DESTROY_MAX_DELAYED_MTX);
+	/* Add another to the batch. */
+	delayed_mtx[n++] = mtx;
+
+	if (n == ARENA_DESTROY_MAX_DELAYED_MTX) {
+		arena_prepare_base_deletion_sync_finish(tsd, delayed_mtx, n);
+		n = 0;
+	}
+	*n_delayed = n;
+}
+
+static void
+arena_prepare_base_deletion(tsd_t *tsd, base_t *base_to_destroy) {
+	/*
+	 * In order to coalesce, emap_try_acquire_edata_neighbor will attempt to
+	 * check neighbor edata's state to determine eligibility.  This means
+	 * under certain conditions, the metadata from an arena can be accessed
+	 * w/o holding any locks from that arena.  In order to guarantee safe
+	 * memory access, the metadata and the underlying base allocator needs
+	 * to be kept alive, until all pending accesses are done.
+	 *
+	 * 1) with opt_retain, the arena boundary implies the is_head state
+	 * (tracked in the rtree leaf), and the coalesce flow will stop at the
+	 * head state branch.  Therefore no cross arena metadata access
+	 * possible.
+	 *
+	 * 2) w/o opt_retain, the arena id needs to be read from the edata_t,
+	 * meaning read only cross-arena metadata access is possible.  The
+	 * coalesce attempt will stop at the arena_id mismatch, and is always
+	 * under one of the ecache locks.  To allow safe passthrough of such
+	 * metadata accesses, the loop below will iterate through all manual
+	 * arenas' ecache locks.  As all the metadata from this base allocator
+	 * have been unlinked from the rtree, after going through all the
+	 * relevant ecache locks, it's safe to say that a) pending accesses are
+	 * all finished, and b) no new access will be generated.
+	 */
+	if (opt_retain) {
+		return;
+	}
+	unsigned destroy_ind = base_ind_get(base_to_destroy);
+	assert(destroy_ind >= manual_arena_base);
+
+	tsdn_t *tsdn = tsd_tsdn(tsd);
+	malloc_mutex_t *delayed_mtx[ARENA_DESTROY_MAX_DELAYED_MTX];
+	unsigned n_delayed = 0, total = narenas_total_get();
+	for (unsigned i = 0; i < total; i++) {
+		if (i == destroy_ind) {
+			continue;
+		}
+		arena_t *arena = arena_get(tsdn, i, false);
+		if (arena == NULL) {
+			continue;
+		}
+		pac_t *pac = &arena->pa_shard.pac;
+		arena_prepare_base_deletion_sync(tsd, &pac->ecache_dirty.mtx,
+		    delayed_mtx, &n_delayed);
+		arena_prepare_base_deletion_sync(tsd, &pac->ecache_muzzy.mtx,
+		    delayed_mtx, &n_delayed);
+		arena_prepare_base_deletion_sync(tsd, &pac->ecache_retained.mtx,
+		    delayed_mtx, &n_delayed);
+	}
+	arena_prepare_base_deletion_sync_finish(tsd, delayed_mtx, n_delayed);
+}
+#undef ARENA_DESTROY_MAX_DELAYED_MTX
+
 void
 arena_destroy(tsd_t *tsd, arena_t *arena) {
 	assert(base_ind_get(arena->base) >= narenas_auto);
@@ -633,8 +717,10 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
 
 	/*
 	 * Destroy the base allocator, which manages all metadata ever mapped by
-	 * this arena.
+	 * this arena.  The prepare function will make sure no pending access to
+	 * the metadata in this base anymore.
 	 */
+	arena_prepare_base_deletion(tsd, arena->base);
 	base_delete(tsd_tsdn(tsd), arena->base);
 }
 

From 70d1541c5b60ffd3089d312f3e4e534c72738aaf Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 26 Feb 2021 15:11:58 -0800
Subject: [PATCH 2047/2608] Track extent is_head state in rtree leaf.

---
 include/jemalloc/internal/edata.h |  2 +-
 include/jemalloc/internal/rtree.h | 34 +++++++++++++++++++++----------
 src/ehooks.c                      |  4 ++++
 src/emap.c                        | 21 +++++++++++++------
 src/extent.c                      |  2 ++
 test/unit/rtree.c                 | 10 +++++++--
 6 files changed, 53 insertions(+), 20 deletions(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index c71209e5..e75866ba 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -23,7 +23,7 @@ typedef enum extent_state_e extent_state_t;
 
 enum extent_head_state_e {
 	EXTENT_NOT_HEAD,
-	EXTENT_IS_HEAD   /* Only relevant for Windows && opt.retain. */
+	EXTENT_IS_HEAD   /* See comments in ehooks_default_merge_impl(). */
 };
 typedef enum extent_head_state_e extent_head_state_t;
 
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 83dfdc81..3b7972e4 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -46,6 +46,7 @@ struct rtree_node_elm_s {
 typedef struct rtree_metadata_s rtree_metadata_t;
 struct rtree_metadata_s {
 	szind_t szind;
+	bool is_head; /* Mirrors edata->is_head. */
 	bool slab;
 };
 
@@ -65,9 +66,10 @@ struct rtree_leaf_elm_s {
 	 *
 	 * x: index
 	 * e: edata
+	 * h: is_head
 	 * b: slab
 	 *
-	 *   00000000 xxxxxxxx eeeeeeee [...] eeeeeeee eeee000b
+	 *   00000000 xxxxxxxx eeeeeeee [...] eeeeeeee eeee00hb
 	 */
 	atomic_p_t	le_bits;
 #else
@@ -184,12 +186,16 @@ rtree_leaf_elm_bits_encode(rtree_contents_t contents) {
 	    & (((uintptr_t)1 << LG_VADDR) - 1);
 	uintptr_t szind_bits = (uintptr_t)contents.metadata.szind << LG_VADDR;
 	/*
-	 * Slab shares the low bit of edata; we know edata is on an even address
-	 * (in fact, it's 128 bytes on 64-bit systems; we can enforce this
-	 * alignment if we want to steal 6 extra rtree leaf bits someday.
+	 * Metadata shares the low bits of edata. edata is CACHELINE aligned (in
+	 * fact, it's 128 bytes on 64-bit systems); we can enforce this
+	 * alignment if we want to steal the extra rtree leaf bits someday.
 	 */
 	uintptr_t slab_bits = (uintptr_t)contents.metadata.slab;
-	return szind_bits | edata_bits | slab_bits;
+	uintptr_t is_head_bits = (uintptr_t)contents.metadata.is_head << 1;
+	uintptr_t metadata_bits = szind_bits | is_head_bits | slab_bits;
+	assert((edata_bits & metadata_bits) == 0);
+
+	return edata_bits | metadata_bits;
 }
 
 JEMALLOC_ALWAYS_INLINE rtree_contents_t
@@ -198,20 +204,23 @@ rtree_leaf_elm_bits_decode(uintptr_t bits) {
 	/* Do the easy things first. */
 	contents.metadata.szind = bits >> LG_VADDR;
 	contents.metadata.slab = (bool)(bits & 1);
+	contents.metadata.is_head = (bool)(bits & (1 << 1));
+
+	uintptr_t metadata_mask = ~((uintptr_t)((1 << 2) - 1));
 #    ifdef __aarch64__
 	/*
 	 * aarch64 doesn't sign extend the highest virtual address bit to set
 	 * the higher ones.  Instead, the high bits get zeroed.
 	 */
 	uintptr_t high_bit_mask = ((uintptr_t)1 << LG_VADDR) - 1;
-	/* Mask off the slab bit. */
-	uintptr_t low_bit_mask = ~(uintptr_t)1;
+	/* Mask off metadata. */
+	uintptr_t low_bit_mask = metadata_mask;
 	uintptr_t mask = high_bit_mask & low_bit_mask;
 	contents.edata = (edata_t *)(bits & mask);
 #    else
-	/* Restore sign-extended high bits, mask slab bit. */
+	/* Restore sign-extended high bits, mask metadata bits. */
 	contents.edata = (edata_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB)
-	    >> RTREE_NHIB) & ~((uintptr_t)0x1));
+	    >> RTREE_NHIB) & metadata_mask);
 #    endif
 	return contents;
 }
@@ -230,7 +239,8 @@ rtree_leaf_elm_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 	unsigned metadata_bits = atomic_load_u(&elm->le_metadata, dependent
 	    ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
 	contents.metadata.slab = (bool)(metadata_bits & 1);
-	contents.metadata.szind = (metadata_bits >> 1);
+	contents.metadata.is_head = (bool)(metadata_bits & (1 << 1));
+	contents.metadata.szind = (metadata_bits >> 2);
 
 	contents.edata = (edata_t *)atomic_load_p(&elm->le_edata, dependent
 	    ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
@@ -247,7 +257,8 @@ rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree,
 	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
 #else
 	unsigned metadata_bits = ((unsigned)contents.metadata.slab
-	    | ((unsigned)contents.metadata.szind << 1));
+	    | ((unsigned)contents.metadata.is_head << 1)
+	    | ((unsigned)contents.metadata.szind << 2));
 	atomic_store_u(&elm->le_metadata, metadata_bits, ATOMIC_RELEASE);
 	/*
 	 * Write edata last, since the element is atomically considered valid
@@ -418,6 +429,7 @@ rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	contents.edata = NULL;
 	contents.metadata.szind = SC_NSIZES;
 	contents.metadata.slab = false;
+	contents.metadata.is_head = false;
 	rtree_leaf_elm_write(tsdn, rtree, elm, contents);
 }
 
diff --git a/src/ehooks.c b/src/ehooks.c
index f2525e12..e1815ee8 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -227,10 +227,14 @@ bool
 ehooks_default_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
     void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
 	tsdn_t *tsdn = tsdn_fetch();
+
 	edata_t *a = emap_edata_lookup(tsdn, &arena_emap_global, addr_a);
 	bool head_a = edata_is_head_get(a);
 	edata_t *b = emap_edata_lookup(tsdn, &arena_emap_global, addr_b);
 	bool head_b = edata_is_head_get(b);
+	emap_assert_mapped(tsdn, &arena_emap_global, a);
+	emap_assert_mapped(tsdn, &arena_emap_global, b);
+
 	return ehooks_default_merge_impl(tsdn, addr_a, head_a, addr_b, head_b);
 }
 
diff --git a/src/emap.c b/src/emap.c
index 537f5884..62abf4d8 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -141,6 +141,8 @@ emap_rtree_write_acquired(tsdn_t *tsdn, emap_t *emap, rtree_leaf_elm_t *elm_a,
 	contents.edata = edata;
 	contents.metadata.szind = szind;
 	contents.metadata.slab = slab;
+	contents.metadata.is_head = (edata == NULL) ? false :
+	    edata_is_head_get(edata);
 	rtree_leaf_elm_write(tsdn, &emap->rtree, elm_a, contents);
 	if (elm_b != NULL) {
 		rtree_leaf_elm_write(tsdn, &emap->rtree, elm_b, contents);
@@ -169,12 +171,14 @@ emap_register_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
 
 	assert(edata_slab_get(edata));
 
+	rtree_contents_t contents;
+	contents.edata = edata;
+	contents.metadata.szind = szind;
+	contents.metadata.slab = true;
+	contents.metadata.is_head = false; /* Not allowed to access. */
+
 	/* Register interior. */
 	for (size_t i = 1; i < (edata_size_get(edata) >> LG_PAGE) - 1; i++) {
-		rtree_contents_t contents;
-		contents.edata = edata;
-		contents.metadata.szind = szind;
-		contents.metadata.slab = true;
 		rtree_write(tsdn, &emap->rtree, rtree_ctx,
 		    (uintptr_t)edata_base_get(edata) + (uintptr_t)(i <<
 		    LG_PAGE), contents);
@@ -214,6 +218,8 @@ emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
 		contents.edata = edata;
 		contents.metadata.szind = szind;
 		contents.metadata.slab = slab;
+		contents.metadata.is_head = edata_is_head_get(edata);
+
 		rtree_write(tsdn, &emap->rtree, rtree_ctx,
 		    (uintptr_t)edata_addr_get(edata), contents);
 		/*
@@ -297,6 +303,7 @@ emap_merge_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
 	clear_contents.edata = NULL;
 	clear_contents.metadata.szind = SC_NSIZES;
 	clear_contents.metadata.slab = false;
+	clear_contents.metadata.is_head = false;
 
 	if (prepare->lead_elm_b != NULL) {
 		rtree_leaf_elm_write(tsdn, &emap->rtree,
@@ -320,8 +327,10 @@ void
 emap_do_assert_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	EMAP_DECLARE_RTREE_CTX;
 
-	assert(rtree_read(tsdn, &emap->rtree, rtree_ctx,
-	    (uintptr_t)edata_base_get(edata)).edata == edata);
+	rtree_contents_t contents = rtree_read(tsdn, &emap->rtree, rtree_ctx,
+	    (uintptr_t)edata_base_get(edata));
+	assert(contents.edata == edata);
+	assert(contents.metadata.is_head == edata_is_head_get(edata));
 }
 
 void
diff --git a/src/extent.c b/src/extent.c
index 51711efc..a541e7bb 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1254,6 +1254,8 @@ extent_merge_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *a,
 
 	assert(edata_arena_ind_get(a) == edata_arena_ind_get(b));
 	assert(edata_arena_ind_get(a) == ehooks_ind_get(ehooks));
+	emap_assert_mapped(tsdn, pac->emap, a);
+	emap_assert_mapped(tsdn, pac->emap, b);
 
 	bool err = ehooks_merge(tsdn, ehooks, edata_base_get(a),
 	    edata_size_get(a), edata_is_head_get(a), edata_base_get(b),
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 775bc190..a547f188 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -55,6 +55,7 @@ TEST_BEGIN(test_rtree_extrema) {
 	contents_a.edata = &edata_a;
 	contents_a.metadata.szind = edata_szind_get(&edata_a);
 	contents_a.metadata.slab = edata_slab_get(&edata_a);
+	contents_a.metadata.is_head = edata_is_head_get(&edata_a);
 	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, PAGE, contents_a),
 	    "Unexpected rtree_write() failure");
 	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, PAGE, contents_a),
@@ -63,20 +64,23 @@ TEST_BEGIN(test_rtree_extrema) {
 	    PAGE);
 	expect_true(contents_a.edata == read_contents_a.edata
 	    && contents_a.metadata.szind == read_contents_a.metadata.szind
-	    && contents_a.metadata.slab == read_contents_a.metadata.slab,
+	    && contents_a.metadata.slab == read_contents_a.metadata.slab
+	    && contents_a.metadata.is_head == read_contents_a.metadata.is_head,
 	    "rtree_read() should return previously set value");
 
 	rtree_contents_t contents_b;
 	contents_b.edata = &edata_b;
 	contents_b.metadata.szind = edata_szind_get_maybe_invalid(&edata_b);
 	contents_b.metadata.slab = edata_slab_get(&edata_b);
+	contents_b.metadata.is_head = edata_is_head_get(&edata_b);
 	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, ~((uintptr_t)0),
 	    contents_b), "Unexpected rtree_write() failure");
 	rtree_contents_t read_contents_b = rtree_read(tsdn, rtree, &rtree_ctx,
 	    ~((uintptr_t)0));
 	assert_true(contents_b.edata == read_contents_b.edata
 	    && contents_b.metadata.szind == read_contents_b.metadata.szind
-	    && contents_b.metadata.slab == read_contents_b.metadata.slab,
+	    && contents_b.metadata.slab == read_contents_b.metadata.slab
+	    && contents_b.metadata.is_head == read_contents_b.metadata.is_head,
 	    "rtree_read() should return previously set value");
 
 	base_delete(tsdn, base);
@@ -106,6 +110,7 @@ TEST_BEGIN(test_rtree_bits) {
 		contents.edata = &edata;
 		contents.metadata.szind = SC_NSIZES;
 		contents.metadata.slab = false;
+		contents.metadata.is_head = false;
 
 		expect_false(rtree_write(tsdn, rtree, &rtree_ctx, keys[i],
 		    contents), "Unexpected rtree_write() failure");
@@ -158,6 +163,7 @@ TEST_BEGIN(test_rtree_random) {
 		contents.edata = &edata;
 		contents.metadata.szind = SC_NSIZES;
 		contents.metadata.slab = false;
+		contents.metadata.is_head = false;
 		rtree_leaf_elm_write(tsdn, rtree, elm, contents);
 		expect_ptr_eq(rtree_read(tsdn, rtree, &rtree_ctx,
 		    keys[i]).edata, &edata,

From 4d8c22f9a57fb29d39394e2382628854542d1520 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 26 Feb 2021 15:32:41 -0800
Subject: [PATCH 2048/2608] Store edata->state in rtree leaf and make edata_t
 128B aligned.

Verified that this doesn't result in any real increase of edata_t bytes
allocated.
---
 include/jemalloc/internal/edata.h | 10 ++++-
 include/jemalloc/internal/emap.h  | 30 +++++++++++++++
 include/jemalloc/internal/rtree.h | 46 +++++++++++++++--------
 src/base.c                        |  2 +-
 src/emap.c                        |  6 +++
 src/extent.c                      | 36 ++++++++++--------
 src/hpa_central.c                 |  7 ++--
 test/unit/rtree.c                 | 61 +++++++++++++++++++------------
 8 files changed, 138 insertions(+), 60 deletions(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index e75866ba..648b478e 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -13,6 +13,12 @@
 #include "jemalloc/internal/sz.h"
 #include "jemalloc/internal/typed_list.h"
 
+/*
+ * sizeof(edata_t) is 128 bytes on 64-bit architectures.  Ensure the alignment
+ * to free up the low bits in the rtree leaf.
+ */
+#define EDATA_ALIGNMENT 128
+
 enum extent_state_e {
 	extent_state_active   = 0,
 	extent_state_dirty    = 1,
@@ -88,7 +94,7 @@ struct edata_s {
 	 * f: nfree
 	 * s: bin_shard
 	 *
-	 * 00000000 ... 000000ss ssssffff ffffffii iiiiiitt zpcbaaaa aaaaaaaa
+	 * 00000000 ... 00000sss sssfffff fffffiii iiiiittt zpcbaaaa aaaaaaaa
 	 *
 	 * arena_ind: Arena from which this extent came, or all 1 bits if
 	 *            unassociated.
@@ -143,7 +149,7 @@ struct edata_s {
 #define EDATA_BITS_ZEROED_SHIFT  (EDATA_BITS_PAI_WIDTH + EDATA_BITS_PAI_SHIFT)
 #define EDATA_BITS_ZEROED_MASK  MASK(EDATA_BITS_ZEROED_WIDTH, EDATA_BITS_ZEROED_SHIFT)
 
-#define EDATA_BITS_STATE_WIDTH  2
+#define EDATA_BITS_STATE_WIDTH  3
 #define EDATA_BITS_STATE_SHIFT  (EDATA_BITS_ZEROED_WIDTH + EDATA_BITS_ZEROED_SHIFT)
 #define EDATA_BITS_STATE_MASK  MASK(EDATA_BITS_STATE_WIDTH, EDATA_BITS_STATE_SHIFT)
 
diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index ac0050b5..3e397483 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -136,6 +136,36 @@ emap_assert_not_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	}
 }
 
+static inline void
+emap_update_rtree_at_addr(tsdn_t *tsdn, rtree_t *rtree, edata_t *expected_edata,
+    uintptr_t addr, extent_state_t state) {
+	rtree_ctx_t rtree_ctx_fallback;
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+
+	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
+	    addr, /* dependent */ true, /* init_missing */ false);
+	assert(elm != NULL);
+	rtree_contents_t contents = rtree_leaf_elm_read(tsdn, rtree, elm,
+	    /* dependent */ true);
+	assert(contents.edata == expected_edata);
+	contents.metadata.state = state;
+	rtree_leaf_elm_write(tsdn, rtree, elm, contents);
+}
+
+static inline void
+emap_edata_state_update(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
+    extent_state_t state) {
+	/* Only emap is allowed to modify the edata internal state. */
+	edata_state_set(edata, state);
+
+	emap_update_rtree_at_addr(tsdn, &emap->rtree, edata,
+	    (uintptr_t)edata_base_get(edata), state);
+	emap_update_rtree_at_addr(tsdn, &emap->rtree, edata,
+	    (uintptr_t)edata_last_get(edata), state);
+
+	emap_assert_mapped(tsdn, emap, edata);
+}
+
 JEMALLOC_ALWAYS_INLINE edata_t *
 emap_edata_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr) {
 	rtree_ctx_t rtree_ctx_fallback;
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 3b7972e4..89c08cb0 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -46,6 +46,7 @@ struct rtree_node_elm_s {
 typedef struct rtree_metadata_s rtree_metadata_t;
 struct rtree_metadata_s {
 	szind_t szind;
+	extent_state_t state; /* Mirrors edata->state. */
 	bool is_head; /* Mirrors edata->is_head. */
 	bool slab;
 };
@@ -56,6 +57,10 @@ struct rtree_contents_s {
 	rtree_metadata_t metadata;
 };
 
+#define RTREE_LEAF_STATE_WIDTH EDATA_BITS_STATE_WIDTH
+#define RTREE_LEAF_STATE_SHIFT 2
+#define RTREE_LEAF_STATE_MASK MASK(RTREE_LEAF_STATE_WIDTH, RTREE_LEAF_STATE_SHIFT)
+
 struct rtree_leaf_elm_s {
 #ifdef RTREE_LEAF_COMPACT
 	/*
@@ -66,17 +71,17 @@ struct rtree_leaf_elm_s {
 	 *
 	 * x: index
 	 * e: edata
+	 * s: state
 	 * h: is_head
 	 * b: slab
 	 *
-	 *   00000000 xxxxxxxx eeeeeeee [...] eeeeeeee eeee00hb
+	 *   00000000 xxxxxxxx eeeeeeee [...] eeeeeeee e00ssshb
 	 */
 	atomic_p_t	le_bits;
 #else
 	atomic_p_t	le_edata; /* (edata_t *) */
 	/*
-	 * slab is stored in the low bit; szind is stored in the next lowest
-	 * bits.
+	 * From low to high bits: slab, is_head, state.
 	 */
 	atomic_u_t	le_metadata;
 #endif
@@ -184,15 +189,14 @@ JEMALLOC_ALWAYS_INLINE uintptr_t
 rtree_leaf_elm_bits_encode(rtree_contents_t contents) {
 	uintptr_t edata_bits = (uintptr_t)contents.edata
 	    & (((uintptr_t)1 << LG_VADDR) - 1);
+
 	uintptr_t szind_bits = (uintptr_t)contents.metadata.szind << LG_VADDR;
-	/*
-	 * Metadata shares the low bits of edata. edata is CACHELINE aligned (in
-	 * fact, it's 128 bytes on 64-bit systems); we can enforce this
-	 * alignment if we want to steal the extra rtree leaf bits someday.
-	 */
 	uintptr_t slab_bits = (uintptr_t)contents.metadata.slab;
 	uintptr_t is_head_bits = (uintptr_t)contents.metadata.is_head << 1;
-	uintptr_t metadata_bits = szind_bits | is_head_bits | slab_bits;
+	uintptr_t state_bits = (uintptr_t)contents.metadata.state <<
+	    RTREE_LEAF_STATE_SHIFT;
+	uintptr_t metadata_bits = szind_bits | state_bits | is_head_bits |
+	    slab_bits;
 	assert((edata_bits & metadata_bits) == 0);
 
 	return edata_bits | metadata_bits;
@@ -206,7 +210,11 @@ rtree_leaf_elm_bits_decode(uintptr_t bits) {
 	contents.metadata.slab = (bool)(bits & 1);
 	contents.metadata.is_head = (bool)(bits & (1 << 1));
 
-	uintptr_t metadata_mask = ~((uintptr_t)((1 << 2) - 1));
+	uintptr_t state_bits = (bits & RTREE_LEAF_STATE_MASK) >>
+	    RTREE_LEAF_STATE_SHIFT;
+	contents.metadata.state = (extent_state_t)state_bits;
+
+	uintptr_t low_bit_mask = ~((uintptr_t)EDATA_ALIGNMENT - 1);
 #    ifdef __aarch64__
 	/*
 	 * aarch64 doesn't sign extend the highest virtual address bit to set
@@ -214,13 +222,12 @@ rtree_leaf_elm_bits_decode(uintptr_t bits) {
 	 */
 	uintptr_t high_bit_mask = ((uintptr_t)1 << LG_VADDR) - 1;
 	/* Mask off metadata. */
-	uintptr_t low_bit_mask = metadata_mask;
 	uintptr_t mask = high_bit_mask & low_bit_mask;
 	contents.edata = (edata_t *)(bits & mask);
 #    else
 	/* Restore sign-extended high bits, mask metadata bits. */
 	contents.edata = (edata_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB)
-	    >> RTREE_NHIB) & metadata_mask);
+	    >> RTREE_NHIB) & low_bit_mask);
 #    endif
 	return contents;
 }
@@ -240,7 +247,12 @@ rtree_leaf_elm_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 	    ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
 	contents.metadata.slab = (bool)(metadata_bits & 1);
 	contents.metadata.is_head = (bool)(metadata_bits & (1 << 1));
-	contents.metadata.szind = (metadata_bits >> 2);
+
+	uintptr_t state_bits = (metadata_bits & RTREE_LEAF_STATE_MASK) >>
+	    RTREE_LEAF_STATE_SHIFT;
+	contents.metadata.state = (extent_state_t)state_bits;
+	contents.metadata.szind = metadata_bits >> (RTREE_LEAF_STATE_SHIFT +
+	    RTREE_LEAF_STATE_WIDTH);
 
 	contents.edata = (edata_t *)atomic_load_p(&elm->le_edata, dependent
 	    ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
@@ -252,13 +264,16 @@ rtree_leaf_elm_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 static inline void
 rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree,
     rtree_leaf_elm_t *elm, rtree_contents_t contents) {
+	assert((uintptr_t)contents.edata % EDATA_ALIGNMENT == 0);
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t bits = rtree_leaf_elm_bits_encode(contents);
 	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
 #else
-	unsigned metadata_bits = ((unsigned)contents.metadata.slab
+	unsigned metadata_bits = (unsigned)contents.metadata.slab
 	    | ((unsigned)contents.metadata.is_head << 1)
-	    | ((unsigned)contents.metadata.szind << 2));
+	    | ((unsigned)contents.metadata.state << RTREE_LEAF_STATE_SHIFT)
+	    | ((unsigned)contents.metadata.szind << (RTREE_LEAF_STATE_SHIFT +
+	    RTREE_LEAF_STATE_WIDTH));
 	atomic_store_u(&elm->le_metadata, metadata_bits, ATOMIC_RELEASE);
 	/*
 	 * Write edata last, since the element is atomically considered valid
@@ -430,6 +445,7 @@ rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	contents.metadata.szind = SC_NSIZES;
 	contents.metadata.slab = false;
 	contents.metadata.is_head = false;
+	contents.metadata.state = (extent_state_t)0;
 	rtree_leaf_elm_write(tsdn, rtree, elm, contents);
 }
 
diff --git a/src/base.c b/src/base.c
index 00440f4d..9d4ce5c5 100644
--- a/src/base.c
+++ b/src/base.c
@@ -472,7 +472,7 @@ edata_t *
 base_alloc_edata(tsdn_t *tsdn, base_t *base) {
 	size_t esn;
 	edata_t *edata = base_alloc_impl(tsdn, base, sizeof(edata_t),
-	    CACHELINE, &esn);
+	    EDATA_ALIGNMENT, &esn);
 	if (edata == NULL) {
 		return NULL;
 	}
diff --git a/src/emap.c b/src/emap.c
index 62abf4d8..4f3915b5 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -143,6 +143,7 @@ emap_rtree_write_acquired(tsdn_t *tsdn, emap_t *emap, rtree_leaf_elm_t *elm_a,
 	contents.metadata.slab = slab;
 	contents.metadata.is_head = (edata == NULL) ? false :
 	    edata_is_head_get(edata);
+	contents.metadata.state = (edata == NULL) ? 0 : edata_state_get(edata);
 	rtree_leaf_elm_write(tsdn, &emap->rtree, elm_a, contents);
 	if (elm_b != NULL) {
 		rtree_leaf_elm_write(tsdn, &emap->rtree, elm_b, contents);
@@ -170,11 +171,13 @@ emap_register_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
 	EMAP_DECLARE_RTREE_CTX;
 
 	assert(edata_slab_get(edata));
+	assert(edata_state_get(edata) == extent_state_active);
 
 	rtree_contents_t contents;
 	contents.edata = edata;
 	contents.metadata.szind = szind;
 	contents.metadata.slab = true;
+	contents.metadata.state = extent_state_active;
 	contents.metadata.is_head = false; /* Not allowed to access. */
 
 	/* Register interior. */
@@ -219,6 +222,7 @@ emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
 		contents.metadata.szind = szind;
 		contents.metadata.slab = slab;
 		contents.metadata.is_head = edata_is_head_get(edata);
+		contents.metadata.state = edata_state_get(edata);
 
 		rtree_write(tsdn, &emap->rtree, rtree_ctx,
 		    (uintptr_t)edata_addr_get(edata), contents);
@@ -304,6 +308,7 @@ emap_merge_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
 	clear_contents.metadata.szind = SC_NSIZES;
 	clear_contents.metadata.slab = false;
 	clear_contents.metadata.is_head = false;
+	clear_contents.metadata.state = (extent_state_t)0;
 
 	if (prepare->lead_elm_b != NULL) {
 		rtree_leaf_elm_write(tsdn, &emap->rtree,
@@ -331,6 +336,7 @@ emap_do_assert_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	    (uintptr_t)edata_base_get(edata));
 	assert(contents.edata == edata);
 	assert(contents.metadata.is_head == edata_is_head_get(edata));
+	assert(contents.metadata.state == edata_state_get(edata));
 }
 
 void
diff --git a/src/extent.c b/src/extent.c
index a541e7bb..56ea33f6 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -64,11 +64,12 @@ extent_may_force_decay(pac_t *pac) {
 static bool
 extent_try_delayed_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata) {
-	edata_state_set(edata, extent_state_active);
+	emap_edata_state_update(tsdn, pac->emap, edata, extent_state_active);
+
 	bool coalesced;
 	edata = extent_try_coalesce(tsdn, pac, ehooks, ecache,
 	    edata, &coalesced, false);
-	edata_state_set(edata, ecache->state);
+	emap_edata_state_update(tsdn, pac->emap, edata, ecache->state);
 
 	if (!coalesced) {
 		return true;
@@ -182,7 +183,8 @@ ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		not_reached();
 	case extent_state_dirty:
 	case extent_state_muzzy:
-		edata_state_set(edata, extent_state_active);
+		emap_edata_state_update(tsdn, pac->emap, edata,
+		    extent_state_active);
 		break;
 	case extent_state_retained:
 		extent_deregister(tsdn, pac, edata);
@@ -223,28 +225,30 @@ extents_abandon_vm(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 }
 
 static void
-extent_deactivate_locked(tsdn_t *tsdn, ecache_t *ecache, edata_t *edata) {
+extent_deactivate_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
+    edata_t *edata) {
 	assert(edata_arena_ind_get(edata) == ecache_ind_get(ecache));
 	assert(edata_state_get(edata) == extent_state_active);
 
-	edata_state_set(edata, ecache->state);
+	emap_edata_state_update(tsdn, pac->emap, edata, ecache->state);
 	eset_insert(&ecache->eset, edata);
 }
 
 static void
-extent_deactivate(tsdn_t *tsdn, ecache_t *ecache, edata_t *edata) {
+extent_deactivate(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache, edata_t *edata) {
 	malloc_mutex_lock(tsdn, &ecache->mtx);
-	extent_deactivate_locked(tsdn, ecache, edata);
+	extent_deactivate_locked(tsdn, pac, ecache, edata);
 	malloc_mutex_unlock(tsdn, &ecache->mtx);
 }
 
 static void
-extent_activate_locked(tsdn_t *tsdn, ecache_t *ecache, edata_t *edata) {
+extent_activate_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
+    edata_t *edata) {
 	assert(edata_arena_ind_get(edata) == ecache_ind_get(ecache));
 	assert(edata_state_get(edata) == ecache->state);
 
 	eset_remove(&ecache->eset, edata);
-	edata_state_set(edata, extent_state_active);
+	emap_edata_state_update(tsdn, pac->emap, edata, extent_state_active);
 }
 
 static void
@@ -421,7 +425,7 @@ extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		return NULL;
 	}
 
-	extent_activate_locked(tsdn, ecache, edata);
+	extent_activate_locked(tsdn, pac, ecache, edata);
 	malloc_mutex_unlock(tsdn, &ecache->mtx);
 
 	return edata;
@@ -527,16 +531,16 @@ extent_recycle_split(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		 * leaking the extent.
 		 */
 		assert(to_leak != NULL && lead == NULL && trail == NULL);
-		extent_deactivate(tsdn, ecache, to_leak);
+		extent_deactivate(tsdn, pac, ecache, to_leak);
 		return NULL;
 	}
 
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
-			extent_deactivate(tsdn, ecache, lead);
+			extent_deactivate(tsdn, pac, ecache, lead);
 		}
 		if (trail != NULL) {
-			extent_deactivate(tsdn, ecache, trail);
+			extent_deactivate(tsdn, pac, ecache, trail);
 		}
 		return edata;
 	} else {
@@ -837,7 +841,7 @@ extent_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
     edata_t *inner, edata_t *outer, bool forward, bool growing_retained) {
 	assert(extent_can_coalesce(ecache, inner, outer));
 
-	extent_activate_locked(tsdn, ecache, outer);
+	extent_activate_locked(tsdn, pac, ecache, outer);
 
 	malloc_mutex_unlock(tsdn, &ecache->mtx);
 	bool err = extent_merge_impl(tsdn, pac, ehooks,
@@ -845,7 +849,7 @@ extent_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 
 	if (err) {
-		extent_deactivate_locked(tsdn, ecache, outer);
+		extent_deactivate_locked(tsdn, pac, ecache, outer);
 	}
 
 	return err;
@@ -1008,7 +1012,7 @@ extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 			return;
 		}
 	}
-	extent_deactivate_locked(tsdn, ecache, edata);
+	extent_deactivate_locked(tsdn, pac, ecache, edata);
 
 	malloc_mutex_unlock(tsdn, &ecache->mtx);
 }
diff --git a/src/hpa_central.c b/src/hpa_central.c
index 346d9422..36758a03 100644
--- a/src/hpa_central.c
+++ b/src/hpa_central.c
@@ -91,7 +91,8 @@ label_success:
 	 */
 	assert(edata_state_get(edata) == extent_state_dirty);
 	assert(edata_base_get(edata) == edata_addr_get(edata));
-	edata_state_set(edata, extent_state_active);
+	emap_edata_state_update(tsdn, central->emap, edata,
+	    extent_state_active);
 	return edata;
 }
 
@@ -136,7 +137,7 @@ hpa_central_alloc_grow(tsdn_t *tsdn, hpa_central_t *central,
 	edata_sn_set(edata, sn);
 	edata_sn_set(trail, sn);
 
-	edata_state_set(trail, extent_state_dirty);
+	emap_edata_state_update(tsdn, central->emap, trail, extent_state_dirty);
 	eset_insert(&central->eset, trail);
 	return false;
 }
@@ -203,6 +204,6 @@ hpa_central_dalloc(tsdn_t *tsdn, hpa_central_t *central, edata_t *edata) {
 		eset_remove(&central->eset, trail);
 		hpa_central_dalloc_merge(tsdn, central, edata, trail);
 	}
-	edata_state_set(edata, extent_state_dirty);
+	emap_edata_state_update(tsdn, central->emap, edata, extent_state_dirty);
 	eset_insert(&central->eset, edata);
 }
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index a547f188..9251652c 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -32,12 +32,22 @@ TEST_END
 #undef NITERS
 #undef SEED
 
+static edata_t *
+alloc_edata(void) {
+	void *ret = mallocx(sizeof(edata_t), MALLOCX_ALIGN(EDATA_ALIGNMENT));
+	assert_ptr_not_null(ret, "Unexpected mallocx() failure");
+
+	return ret;
+}
+
 TEST_BEGIN(test_rtree_extrema) {
-	edata_t edata_a = {0}, edata_b = {0};
-	edata_init(&edata_a, INVALID_ARENA_IND, NULL, SC_LARGE_MINCLASS,
+	edata_t *edata_a, *edata_b;
+	edata_a = alloc_edata();
+	edata_b = alloc_edata();
+	edata_init(edata_a, INVALID_ARENA_IND, NULL, SC_LARGE_MINCLASS,
 	    false, sz_size2index(SC_LARGE_MINCLASS), 0,
 	    extent_state_active, false, false, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
-	edata_init(&edata_b, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
+	edata_init(edata_b, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 
 	tsdn_t *tsdn = tsdn_fetch();
@@ -52,10 +62,11 @@ TEST_BEGIN(test_rtree_extrema) {
 	    "Unexpected rtree_new() failure");
 
 	rtree_contents_t contents_a;
-	contents_a.edata = &edata_a;
-	contents_a.metadata.szind = edata_szind_get(&edata_a);
-	contents_a.metadata.slab = edata_slab_get(&edata_a);
-	contents_a.metadata.is_head = edata_is_head_get(&edata_a);
+	contents_a.edata = edata_a;
+	contents_a.metadata.szind = edata_szind_get(edata_a);
+	contents_a.metadata.slab = edata_slab_get(edata_a);
+	contents_a.metadata.is_head = edata_is_head_get(edata_a);
+	contents_a.metadata.state = edata_state_get(edata_a);
 	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, PAGE, contents_a),
 	    "Unexpected rtree_write() failure");
 	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, PAGE, contents_a),
@@ -65,14 +76,16 @@ TEST_BEGIN(test_rtree_extrema) {
 	expect_true(contents_a.edata == read_contents_a.edata
 	    && contents_a.metadata.szind == read_contents_a.metadata.szind
 	    && contents_a.metadata.slab == read_contents_a.metadata.slab
-	    && contents_a.metadata.is_head == read_contents_a.metadata.is_head,
+	    && contents_a.metadata.is_head == read_contents_a.metadata.is_head
+	    && contents_a.metadata.state == read_contents_a.metadata.state,
 	    "rtree_read() should return previously set value");
 
 	rtree_contents_t contents_b;
-	contents_b.edata = &edata_b;
-	contents_b.metadata.szind = edata_szind_get_maybe_invalid(&edata_b);
-	contents_b.metadata.slab = edata_slab_get(&edata_b);
-	contents_b.metadata.is_head = edata_is_head_get(&edata_b);
+	contents_b.edata = edata_b;
+	contents_b.metadata.szind = edata_szind_get_maybe_invalid(edata_b);
+	contents_b.metadata.slab = edata_slab_get(edata_b);
+	contents_b.metadata.is_head = edata_is_head_get(edata_b);
+	contents_b.metadata.state = edata_state_get(edata_b);
 	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, ~((uintptr_t)0),
 	    contents_b), "Unexpected rtree_write() failure");
 	rtree_contents_t read_contents_b = rtree_read(tsdn, rtree, &rtree_ctx,
@@ -80,7 +93,8 @@ TEST_BEGIN(test_rtree_extrema) {
 	assert_true(contents_b.edata == read_contents_b.edata
 	    && contents_b.metadata.szind == read_contents_b.metadata.szind
 	    && contents_b.metadata.slab == read_contents_b.metadata.slab
-	    && contents_b.metadata.is_head == read_contents_b.metadata.is_head,
+	    && contents_b.metadata.is_head == read_contents_b.metadata.is_head
+	    && contents_b.metadata.state == read_contents_b.metadata.state,
 	    "rtree_read() should return previously set value");
 
 	base_delete(tsdn, base);
@@ -94,9 +108,8 @@ TEST_BEGIN(test_rtree_bits) {
 
 	uintptr_t keys[] = {PAGE, PAGE + 1,
 	    PAGE + (((uintptr_t)1) << LG_PAGE) - 1};
-
-	edata_t edata = {0};
-	edata_init(&edata, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
+	edata_t *edata_c = alloc_edata();
+	edata_init(edata_c, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 
 	rtree_t *rtree = &test_rtree;
@@ -107,16 +120,17 @@ TEST_BEGIN(test_rtree_bits) {
 
 	for (unsigned i = 0; i < sizeof(keys)/sizeof(uintptr_t); i++) {
 		rtree_contents_t contents;
-		contents.edata = &edata;
+		contents.edata = edata_c;
 		contents.metadata.szind = SC_NSIZES;
 		contents.metadata.slab = false;
 		contents.metadata.is_head = false;
+		contents.metadata.state = extent_state_active;
 
 		expect_false(rtree_write(tsdn, rtree, &rtree_ctx, keys[i],
 		    contents), "Unexpected rtree_write() failure");
 		for (unsigned j = 0; j < sizeof(keys)/sizeof(uintptr_t); j++) {
 			expect_ptr_eq(rtree_read(tsdn, rtree, &rtree_ctx,
-			    keys[j]).edata, &edata,
+			    keys[j]).edata, edata_c,
 			    "rtree_edata_read() should return previously set "
 			    "value and ignore insignificant key bits; i=%u, "
 			    "j=%u, set key=%#"FMTxPTR", get key=%#"FMTxPTR, i,
@@ -146,8 +160,8 @@ TEST_BEGIN(test_rtree_random) {
 	rtree_ctx_t rtree_ctx;
 	rtree_ctx_data_init(&rtree_ctx);
 
-	edata_t edata = {0};
-	edata_init(&edata, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
+	edata_t *edata_d = alloc_edata();
+	edata_init(edata_d, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 
 	expect_false(rtree_new(rtree, base, false),
@@ -160,18 +174,19 @@ TEST_BEGIN(test_rtree_random) {
 		expect_ptr_not_null(elm,
 		    "Unexpected rtree_leaf_elm_lookup() failure");
 		rtree_contents_t contents;
-		contents.edata = &edata;
+		contents.edata = edata_d;
 		contents.metadata.szind = SC_NSIZES;
 		contents.metadata.slab = false;
 		contents.metadata.is_head = false;
+		contents.metadata.state = edata_state_get(edata_d);
 		rtree_leaf_elm_write(tsdn, rtree, elm, contents);
 		expect_ptr_eq(rtree_read(tsdn, rtree, &rtree_ctx,
-		    keys[i]).edata, &edata,
+		    keys[i]).edata, edata_d,
 		    "rtree_edata_read() should return previously set value");
 	}
 	for (unsigned i = 0; i < NSET; i++) {
 		expect_ptr_eq(rtree_read(tsdn, rtree, &rtree_ctx,
-		    keys[i]).edata, &edata,
+		    keys[i]).edata, edata_d,
 		    "rtree_edata_read() should return previously set value, "
 		    "i=%u", i);
 	}

From 9ea235f8feffc5f486f290b49a5a6752adbe70bf Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 2 Mar 2021 17:26:26 -0800
Subject: [PATCH 2049/2608] Add witness_assert_positive_depth_to_rank().

---
 include/jemalloc/internal/emap.h    |  3 ++
 include/jemalloc/internal/witness.h | 54 +++++++++++++++++++----------
 2 files changed, 38 insertions(+), 19 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 3e397483..afb4983c 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -139,6 +139,9 @@ emap_assert_not_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 static inline void
 emap_update_rtree_at_addr(tsdn_t *tsdn, rtree_t *rtree, edata_t *expected_edata,
     uintptr_t addr, extent_state_t state) {
+	witness_assert_positive_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE);
+
 	rtree_ctx_t rtree_ctx_fallback;
 	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
 
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 66dcf664..4cebb6e1 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -243,26 +243,13 @@ witness_assert_not_owner(witness_tsdn_t *witness_tsdn,
 	}
 }
 
-static inline void
-witness_assert_depth_to_rank(witness_tsdn_t *witness_tsdn,
-    witness_rank_t rank_inclusive, unsigned depth) {
-	witness_tsd_t *witness_tsd;
-	unsigned d;
-	witness_list_t *witnesses;
-	witness_t *w;
+/* Returns depth.  Not intended for direct use. */
+static inline unsigned
+witness_depth_to_rank(witness_list_t *witnesses, witness_rank_t rank_inclusive)
+{
+	unsigned d = 0;
+	witness_t *w = ql_last(witnesses, link);
 
-	if (!config_debug) {
-		return;
-	}
-
-	if (witness_tsdn_null(witness_tsdn)) {
-		return;
-	}
-	witness_tsd = witness_tsdn_tsd(witness_tsdn);
-
-	d = 0;
-	witnesses = &witness_tsd->witnesses;
-	w = ql_last(witnesses, link);
 	if (w != NULL) {
 		ql_reverse_foreach(w, witnesses, link) {
 			if (w->rank < rank_inclusive) {
@@ -271,6 +258,20 @@ witness_assert_depth_to_rank(witness_tsdn_t *witness_tsdn,
 			d++;
 		}
 	}
+
+	return d;
+}
+
+static inline void
+witness_assert_depth_to_rank(witness_tsdn_t *witness_tsdn,
+    witness_rank_t rank_inclusive, unsigned depth) {
+	if (!config_debug || witness_tsdn_null(witness_tsdn)) {
+		return;
+	}
+
+	witness_list_t *witnesses = &witness_tsdn_tsd(witness_tsdn)->witnesses;
+	unsigned d = witness_depth_to_rank(witnesses, rank_inclusive);
+
 	if (d != depth) {
 		witness_depth_error(witnesses, rank_inclusive, depth);
 	}
@@ -286,6 +287,21 @@ witness_assert_lockless(witness_tsdn_t *witness_tsdn) {
 	witness_assert_depth(witness_tsdn, 0);
 }
 
+static inline void
+witness_assert_positive_depth_to_rank(witness_tsdn_t *witness_tsdn,
+    witness_rank_t rank_inclusive) {
+	if (!config_debug || witness_tsdn_null(witness_tsdn)) {
+		return;
+	}
+
+	witness_list_t *witnesses = &witness_tsdn_tsd(witness_tsdn)->witnesses;
+	unsigned d = witness_depth_to_rank(witnesses, rank_inclusive);
+
+	if (d == 0) {
+		witness_depth_error(witnesses, rank_inclusive, 1);
+	}
+}
+
 static inline void
 witness_lock(witness_tsdn_t *witness_tsdn, witness_t *witness) {
 	witness_tsd_t *witness_tsd;

From 1784939688b86e459ecb39615e463176dd609685 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 4 Mar 2021 14:33:40 -0800
Subject: [PATCH 2050/2608] Use rtree tracked states to protect edata outside
 of ecache locks.

This avoids the addr-based mutexes (i.e. the mutex_pool), and instead relies on
the metadata tracked in rtree leaf: the head state and extent_state.  Before
trying to access the neighbor edata (e.g. for coalescing), the states will be
verified first -- only neighbor edatas from the same arena and with the same
state will be accessed.
---
 include/jemalloc/internal/edata.h |  11 +-
 include/jemalloc/internal/emap.h  | 119 ++++++++------
 include/jemalloc/internal/rtree.h |  31 +++-
 src/emap.c                        | 263 +++++++++++++++++++++---------
 src/eset.c                        |   3 +-
 src/extent.c                      | 151 +++++------------
 src/hpa_central.c                 |  65 +++-----
 7 files changed, 366 insertions(+), 277 deletions(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index 648b478e..b2e6ee9f 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -23,7 +23,11 @@ enum extent_state_e {
 	extent_state_active   = 0,
 	extent_state_dirty    = 1,
 	extent_state_muzzy    = 2,
-	extent_state_retained = 3
+	extent_state_retained = 3,
+	extent_state_transition = 4, /* States below are intermediate. */
+	extent_state_updating = 4,
+	extent_state_merging = 5,
+	extent_state_max = 5 /* Sanity checking only. */
 };
 typedef enum extent_state_e extent_state_t;
 
@@ -550,6 +554,11 @@ edata_is_head_set(edata_t *edata, bool is_head) {
 	    ((uint64_t)is_head << EDATA_BITS_IS_HEAD_SHIFT);
 }
 
+static inline bool
+edata_state_in_transition(extent_state_t state) {
+	return state >= extent_state_transition;
+}
+
 /*
  * Because this function is implemented as a sequence of bitfield modifications,
  * even though each individual bit is properly initialized, we technically read
diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index afb4983c..239f3e43 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -5,6 +5,15 @@
 #include "jemalloc/internal/mutex_pool.h"
 #include "jemalloc/internal/rtree.h"
 
+/*
+ * Note: Ends without at semicolon, so that
+ *     EMAP_DECLARE_RTREE_CTX;
+ * in uses will avoid empty-statement warnings.
+ */
+#define EMAP_DECLARE_RTREE_CTX						\
+    rtree_ctx_t rtree_ctx_fallback;					\
+    rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback)
+
 typedef struct emap_s emap_t;
 struct emap_s {
 	rtree_t rtree;
@@ -31,20 +40,16 @@ bool emap_init(emap_t *emap, base_t *base, bool zeroed);
 void emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
     bool slab);
 
-/*
- * Grab the lock or locks associated with the edata or edatas indicated (which
- * is done just by simple address hashing).  The hashing strategy means that
- * it's never safe to grab locks incrementally -- you have to grab all the locks
- * you'll need at once, and release them all at once.
- */
-void emap_lock_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata);
-void emap_unlock_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata);
-void emap_lock_edata2(tsdn_t *tsdn, emap_t *emap, edata_t *edata1,
-    edata_t *edata2);
-void emap_unlock_edata2(tsdn_t *tsdn, emap_t *emap, edata_t *edata1,
-    edata_t *edata2);
-edata_t *emap_lock_edata_from_addr(tsdn_t *tsdn, emap_t *emap, void *addr,
-    bool inactive_only);
+void emap_update_edata_state(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
+    extent_state_t state);
+
+edata_t *emap_try_acquire_edata(tsdn_t *tsdn, emap_t *emap, void *addr,
+    extent_state_t expected_state, bool allow_head_extent);
+edata_t *emap_try_acquire_edata_neighbor(tsdn_t *tsdn, emap_t *emap,
+    edata_t *edata, extent_pai_t pai, extent_state_t expected_state,
+    bool forward);
+void emap_release_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
+    extent_state_t new_state);
 
 /*
  * Associate the given edata with its beginning and end address, setting the
@@ -136,43 +141,66 @@ emap_assert_not_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	}
 }
 
-static inline void
-emap_update_rtree_at_addr(tsdn_t *tsdn, rtree_t *rtree, edata_t *expected_edata,
-    uintptr_t addr, extent_state_t state) {
-	witness_assert_positive_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE);
+JEMALLOC_ALWAYS_INLINE bool
+emap_edata_in_transition(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
+	assert(config_debug);
+	emap_assert_mapped(tsdn, emap, edata);
 
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+	EMAP_DECLARE_RTREE_CTX;
+	rtree_contents_t contents = rtree_read(tsdn, &emap->rtree, rtree_ctx,
+	    (uintptr_t)edata_base_get(edata));
 
-	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
-	    addr, /* dependent */ true, /* init_missing */ false);
-	assert(elm != NULL);
-	rtree_contents_t contents = rtree_leaf_elm_read(tsdn, rtree, elm,
-	    /* dependent */ true);
-	assert(contents.edata == expected_edata);
-	contents.metadata.state = state;
-	rtree_leaf_elm_write(tsdn, rtree, elm, contents);
+	return edata_state_in_transition(contents.metadata.state);
 }
 
-static inline void
-emap_edata_state_update(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
-    extent_state_t state) {
-	/* Only emap is allowed to modify the edata internal state. */
-	edata_state_set(edata, state);
+JEMALLOC_ALWAYS_INLINE bool
+emap_edata_is_acquired(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
+	if (!config_debug) {
+		/* For assertions only. */
+		return false;
+	}
 
-	emap_update_rtree_at_addr(tsdn, &emap->rtree, edata,
-	    (uintptr_t)edata_base_get(edata), state);
-	emap_update_rtree_at_addr(tsdn, &emap->rtree, edata,
-	    (uintptr_t)edata_last_get(edata), state);
+	/*
+	 * The edata is considered acquired if no other threads will attempt to
+	 * read / write any fields from it.  This includes a few cases:
+	 *
+	 * 1) edata not hooked into emap yet -- This implies the edata just got
+	 * allocated or initialized.
+	 *
+	 * 2) in an active or transition state -- In both cases, the edata can
+	 * be discovered from the emap, however the state tracked in the rtree
+	 * will prevent other threads from accessing the actual edata.
+	 */
+	EMAP_DECLARE_RTREE_CTX;
+	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, &emap->rtree,
+	    rtree_ctx, (uintptr_t)edata_base_get(edata), /* dependent */ true,
+	    /* init_missing */ false);
+	if (elm == NULL) {
+		return true;
+	}
+	rtree_contents_t contents = rtree_leaf_elm_read(tsdn, &emap->rtree, elm,
+	    /* dependent */ true);
+	if (contents.edata == NULL ||
+	    contents.metadata.state == extent_state_active ||
+	    edata_state_in_transition(contents.metadata.state)) {
+		return true;
+	}
 
-	emap_assert_mapped(tsdn, emap, edata);
+	return false;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+extent_assert_can_coalesce(const edata_t *inner, const edata_t *outer) {
+	assert(edata_arena_ind_get(inner) == edata_arena_ind_get(outer));
+	assert(edata_pai_get(inner) == edata_pai_get(outer));
+	assert(edata_committed_get(inner) == edata_committed_get(outer));
+	assert(edata_state_get(inner) == extent_state_active);
+	assert(edata_state_get(outer) == extent_state_merging);
 }
 
 JEMALLOC_ALWAYS_INLINE edata_t *
 emap_edata_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr) {
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+	EMAP_DECLARE_RTREE_CTX;
 
 	return rtree_read(tsdn, &emap->rtree, rtree_ctx, (uintptr_t)ptr).edata;
 }
@@ -181,8 +209,7 @@ emap_edata_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr) {
 JEMALLOC_ALWAYS_INLINE void
 emap_alloc_ctx_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
     emap_alloc_ctx_t *alloc_ctx) {
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+	EMAP_DECLARE_RTREE_CTX;
 
 	rtree_metadata_t metadata = rtree_metadata_read(tsdn, &emap->rtree,
 	    rtree_ctx, (uintptr_t)ptr);
@@ -194,8 +221,7 @@ emap_alloc_ctx_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
 JEMALLOC_ALWAYS_INLINE void
 emap_full_alloc_ctx_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
     emap_full_alloc_ctx_t *full_alloc_ctx) {
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+	EMAP_DECLARE_RTREE_CTX;
 
 	rtree_contents_t contents = rtree_read(tsdn, &emap->rtree, rtree_ctx,
 	    (uintptr_t)ptr);
@@ -212,8 +238,7 @@ emap_full_alloc_ctx_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
 JEMALLOC_ALWAYS_INLINE bool
 emap_full_alloc_ctx_try_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
     emap_full_alloc_ctx_t *full_alloc_ctx) {
-	rtree_ctx_t rtree_ctx_fallback;
-	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback);
+	EMAP_DECLARE_RTREE_CTX;
 
 	rtree_contents_t contents;
 	bool err = rtree_read_independent(tsdn, &emap->rtree, rtree_ctx,
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 89c08cb0..42aa11c9 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -81,7 +81,7 @@ struct rtree_leaf_elm_s {
 #else
 	atomic_p_t	le_edata; /* (edata_t *) */
 	/*
-	 * From low to high bits: slab, is_head, state.
+	 * From high to low bits: szind (8 bits), state (4 bits), is_head, slab
 	 */
 	atomic_u_t	le_metadata;
 #endif
@@ -187,6 +187,7 @@ rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree,
 
 JEMALLOC_ALWAYS_INLINE uintptr_t
 rtree_leaf_elm_bits_encode(rtree_contents_t contents) {
+	assert((uintptr_t)contents.edata % (uintptr_t)EDATA_ALIGNMENT == 0);
 	uintptr_t edata_bits = (uintptr_t)contents.edata
 	    & (((uintptr_t)1 << LG_VADDR) - 1);
 
@@ -212,6 +213,7 @@ rtree_leaf_elm_bits_decode(uintptr_t bits) {
 
 	uintptr_t state_bits = (bits & RTREE_LEAF_STATE_MASK) >>
 	    RTREE_LEAF_STATE_SHIFT;
+	assert(state_bits <= extent_state_max);
 	contents.metadata.state = (extent_state_t)state_bits;
 
 	uintptr_t low_bit_mask = ~((uintptr_t)EDATA_ALIGNMENT - 1);
@@ -229,6 +231,7 @@ rtree_leaf_elm_bits_decode(uintptr_t bits) {
 	contents.edata = (edata_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB)
 	    >> RTREE_NHIB) & low_bit_mask);
 #    endif
+	assert((uintptr_t)contents.edata % (uintptr_t)EDATA_ALIGNMENT == 0);
 	return contents;
 }
 
@@ -250,6 +253,7 @@ rtree_leaf_elm_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 
 	uintptr_t state_bits = (metadata_bits & RTREE_LEAF_STATE_MASK) >>
 	    RTREE_LEAF_STATE_SHIFT;
+	assert(state_bits <= extent_state_max);
 	contents.metadata.state = (extent_state_t)state_bits;
 	contents.metadata.szind = metadata_bits >> (RTREE_LEAF_STATE_SHIFT +
 	    RTREE_LEAF_STATE_WIDTH);
@@ -283,6 +287,31 @@ rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree,
 #endif
 }
 
+/* The state field can be updated independently (and more frequently). */
+static inline void
+rtree_leaf_elm_state_update(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_leaf_elm_t *elm1, rtree_leaf_elm_t *elm2, extent_state_t state) {
+	assert(elm1 != NULL);
+#ifdef RTREE_LEAF_COMPACT
+	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm1,
+	    /* dependent */ true);
+	bits &= ~RTREE_LEAF_STATE_MASK;
+	bits |= state << RTREE_LEAF_STATE_SHIFT;
+	atomic_store_p(&elm1->le_bits, (void *)bits, ATOMIC_RELEASE);
+	if (elm2 != NULL) {
+		atomic_store_p(&elm2->le_bits, (void *)bits, ATOMIC_RELEASE);
+	}
+#else
+	unsigned bits = atomic_load_u(&elm1->le_metadata, ATOMIC_RELAXED);
+	bits &= ~RTREE_LEAF_STATE_MASK;
+	bits |= state << RTREE_LEAF_STATE_SHIFT;
+	atomic_store_u(&elm1->le_metadata, bits, ATOMIC_RELEASE);
+	if (elm2 != NULL) {
+		atomic_store_u(&elm2->le_metadata, bits, ATOMIC_RELEASE);
+	}
+#endif
+}
+
 /*
  * Tries to look up the key in the L1 cache, returning it if there's a hit, or
  * NULL if there's a miss.
diff --git a/src/emap.c b/src/emap.c
index 4f3915b5..26a079cb 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -3,15 +3,6 @@
 
 #include "jemalloc/internal/emap.h"
 
-/*
- * Note: Ends without at semicolon, so that
- *     EMAP_DECLARE_RTREE_CTX;
- * in uses will avoid empty-statement warnings.
- */
-#define EMAP_DECLARE_RTREE_CTX						\
-    rtree_ctx_t rtree_ctx_fallback;					\
-    rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback)
-
 enum emap_lock_result_e {
 	emap_lock_result_success,
 	emap_lock_result_failure,
@@ -35,82 +26,186 @@ emap_init(emap_t *emap, base_t *base, bool zeroed) {
 }
 
 void
-emap_lock_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
-	assert(edata != NULL);
-	mutex_pool_lock(tsdn, &emap->mtx_pool, (uintptr_t)edata);
-}
+emap_update_edata_state(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
+    extent_state_t state) {
+	witness_assert_positive_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE);
 
-void
-emap_unlock_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
-	assert(edata != NULL);
-	mutex_pool_unlock(tsdn, &emap->mtx_pool, (uintptr_t)edata);
-}
+	edata_state_set(edata, state);
 
-void
-emap_lock_edata2(tsdn_t *tsdn, emap_t *emap, edata_t *edata1,
-    edata_t *edata2) {
-	assert(edata1 != NULL && edata2 != NULL);
-	mutex_pool_lock2(tsdn, &emap->mtx_pool, (uintptr_t)edata1,
-	    (uintptr_t)edata2);
-}
-
-void
-emap_unlock_edata2(tsdn_t *tsdn, emap_t *emap, edata_t *edata1,
-    edata_t *edata2) {
-	assert(edata1 != NULL && edata2 != NULL);
-	mutex_pool_unlock2(tsdn, &emap->mtx_pool, (uintptr_t)edata1,
-	    (uintptr_t)edata2);
-}
-
-static inline emap_lock_result_t
-emap_try_lock_rtree_leaf_elm(tsdn_t *tsdn, emap_t *emap, rtree_leaf_elm_t *elm,
-    edata_t **result, bool inactive_only) {
-	edata_t *edata1 = rtree_leaf_elm_read(tsdn, &emap->rtree, elm,
-	    /* dependent */ true).edata;
-
-	/* Slab implies active extents and should be skipped. */
-	if (edata1 == NULL || (inactive_only && rtree_leaf_elm_read(tsdn,
-	    &emap->rtree, elm, /* dependent */ true).metadata.slab)) {
-		return emap_lock_result_no_extent;
-	}
-
-	/*
-	 * It's possible that the extent changed out from under us, and with it
-	 * the leaf->edata mapping.  We have to recheck while holding the lock.
-	 */
-	emap_lock_edata(tsdn, emap, edata1);
-	edata_t *edata2 = rtree_leaf_elm_read(tsdn, &emap->rtree, elm,
-	    /* dependent */ true).edata;
-
-	if (edata1 == edata2) {
-		*result = edata1;
-		return emap_lock_result_success;
-	} else {
-		emap_unlock_edata(tsdn, emap, edata1);
-		return emap_lock_result_failure;
-	}
-}
-
-/*
- * Returns a pool-locked edata_t * if there's one associated with the given
- * address, and NULL otherwise.
- */
-edata_t *
-emap_lock_edata_from_addr(tsdn_t *tsdn, emap_t *emap, void *addr,
-    bool inactive_only) {
 	EMAP_DECLARE_RTREE_CTX;
-	edata_t *ret = NULL;
+	rtree_leaf_elm_t *elm1 = rtree_leaf_elm_lookup(tsdn, &emap->rtree,
+	    rtree_ctx, (uintptr_t)edata_base_get(edata), /* dependent */ true,
+	    /* init_missing */ false);
+	assert(elm1 != NULL);
+	rtree_leaf_elm_t *elm2 = edata_size_get(edata) == PAGE ? NULL :
+	    rtree_leaf_elm_lookup(tsdn, &emap->rtree, rtree_ctx,
+	    (uintptr_t)edata_last_get(edata), /* dependent */ true,
+	    /* init_missing */ false);
+
+	rtree_leaf_elm_state_update(tsdn, &emap->rtree, elm1, elm2, state);
+
+	emap_assert_mapped(tsdn, emap, edata);
+}
+
+static inline bool
+edata_neighbor_head_state_mergeable(bool edata_is_head,
+    bool neighbor_is_head, bool forward) {
+	/*
+	 * Head states checking: disallow merging if the higher addr extent is a
+	 * head extent.  This helps preserve first-fit, and more importantly
+	 * makes sure no merge across arenas.
+	 */
+	if (forward) {
+		if (neighbor_is_head) {
+			return false;
+		}
+	} else {
+		if (edata_is_head) {
+			return false;
+		}
+	}
+	return true;
+}
+
+static inline bool
+edata_can_acquire_neighbor(edata_t *edata, rtree_contents_t contents,
+    extent_pai_t pai, extent_state_t expected_state, bool forward) {
+	edata_t *neighbor = contents.edata;
+	if (neighbor == NULL) {
+		return false;
+	}
+	/* It's not safe to access *neighbor yet; must verify states first. */
+	bool neighbor_is_head = contents.metadata.is_head;
+	if (!edata_neighbor_head_state_mergeable(edata_is_head_get(edata),
+	    neighbor_is_head, forward)) {
+		return NULL;
+	}
+	extent_state_t neighbor_state = contents.metadata.state;
+	if (pai == EXTENT_PAI_PAC) {
+		if (neighbor_state != expected_state) {
+			return false;
+		}
+		/* From this point, it's safe to access *neighbor. */
+		if (edata_committed_get(edata) !=
+		    edata_committed_get(neighbor)) {
+			/*
+			 * Some platforms (e.g. Windows) require an explicit
+			 * commit step (and writing to uncomitted memory is not
+			 * allowed).
+			 */
+			return false;
+		}
+	} else {
+		if (neighbor_state == extent_state_active) {
+			return false;
+		}
+		/* From this point, it's safe to access *neighbor. */
+	}
+
+	assert(edata_pai_get(edata) == pai);
+	if (edata_pai_get(neighbor) != pai) {
+		return false;
+	}
+	if (opt_retain) {
+		assert(edata_arena_ind_get(edata) ==
+		    edata_arena_ind_get(neighbor));
+	} else {
+		/*
+		 * This isn't entirely safe with the presence of arena_reset /
+		 * destroy, in which case the neighbor edata can be destoryed if
+		 * it belongs to a manual arena.  More on that later.
+		 */
+		if (edata_arena_ind_get(edata) !=
+		    edata_arena_ind_get(neighbor)) {
+			return false;
+		}
+	}
+
+	return true;
+}
+
+/* Will be removed in the next commit. */
+edata_t *
+emap_try_acquire_edata(tsdn_t *tsdn, emap_t *emap, void *addr,
+    extent_state_t expected_state, bool allow_head_extent) {
+	EMAP_DECLARE_RTREE_CTX;
 	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, &emap->rtree,
 	    rtree_ctx, (uintptr_t)addr, false, false);
 	if (elm == NULL) {
 		return NULL;
 	}
-	emap_lock_result_t lock_result;
-	do {
-		lock_result = emap_try_lock_rtree_leaf_elm(tsdn, emap, elm,
-		    &ret, inactive_only);
-	} while (lock_result == emap_lock_result_failure);
-	return ret;
+	rtree_contents_t contents = rtree_leaf_elm_read(tsdn, &emap->rtree, elm,
+	    /* dependent */ true);
+	if (!allow_head_extent && contents.metadata.is_head) {
+		/* !allow_head_extent indicates the expanding path. */
+		return NULL;
+	}
+
+	edata_t *edata = contents.edata;
+	if (edata == NULL || contents.metadata.state != expected_state) {
+		return NULL;
+	}
+	assert(edata_state_get(edata) == expected_state);
+	emap_update_edata_state(tsdn, emap, edata, extent_state_updating);
+
+	return edata;
+}
+
+void
+emap_release_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
+    extent_state_t new_state) {
+	assert(emap_edata_in_transition(tsdn, emap, edata));
+	assert(emap_edata_is_acquired(tsdn, emap, edata));
+
+	emap_update_edata_state(tsdn, emap, edata, new_state);
+}
+
+edata_t *
+emap_try_acquire_edata_neighbor(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
+    extent_pai_t pai, extent_state_t expected_state, bool forward) {
+	witness_assert_positive_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE);
+	assert(!edata_state_in_transition(expected_state));
+	assert(expected_state == extent_state_dirty ||
+	       expected_state == extent_state_muzzy ||
+	       expected_state == extent_state_retained);
+
+	void *neighbor_addr = forward ? edata_past_get(edata) :
+	    edata_before_get(edata);
+	/*
+	 * This is subtle; the rtree code asserts that its input pointer is
+	 * non-NULL, and this is a useful thing to check.  But it's possible
+	 * that edata corresponds to an address of (void *)PAGE (in practice,
+	 * this has only been observed on FreeBSD when address-space
+	 * randomization is on, but it could in principle happen anywhere).  In
+	 * this case, edata_before_get(edata) is NULL, triggering the assert.
+	 */
+	if (neighbor_addr == NULL) {
+		return NULL;
+	}
+
+	EMAP_DECLARE_RTREE_CTX;
+	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, &emap->rtree,
+	    rtree_ctx, (uintptr_t)neighbor_addr, /* dependent*/ false,
+	    /* init_missing */ false);
+	if (elm == NULL) {
+		return NULL;
+	}
+
+	rtree_contents_t neighbor_contents = rtree_leaf_elm_read(tsdn,
+	    &emap->rtree, elm, /* dependent */ true);
+	if (!edata_can_acquire_neighbor(edata, neighbor_contents, pai,
+	    expected_state, forward)) {
+		return NULL;
+	}
+
+	/* From this point, the neighbor edata can be safely acquired. */
+	edata_t *neighbor = neighbor_contents.edata;
+	emap_update_edata_state(tsdn, emap, neighbor, extent_state_merging);
+	extent_assert_can_coalesce(edata, neighbor);
+
+	return neighbor;
 }
 
 static bool
@@ -153,6 +248,7 @@ emap_rtree_write_acquired(tsdn_t *tsdn, emap_t *emap, rtree_leaf_elm_t *elm_a,
 bool
 emap_register_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
     szind_t szind, bool slab) {
+	assert(edata_state_get(edata) == extent_state_active);
 	EMAP_DECLARE_RTREE_CTX;
 
 	rtree_leaf_elm_t *elm_a, *elm_b;
@@ -161,6 +257,10 @@ emap_register_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
 	if (err) {
 		return true;
 	}
+	assert(rtree_leaf_elm_read(tsdn, &emap->rtree, elm_a,
+	    /* dependent */ false).edata == NULL);
+	assert(rtree_leaf_elm_read(tsdn, &emap->rtree, elm_b,
+	    /* dependent */ false).edata == NULL);
 	emap_rtree_write_acquired(tsdn, emap, elm_a, elm_b, edata, szind, slab);
 	return false;
 }
@@ -190,6 +290,15 @@ emap_register_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
 
 void
 emap_deregister_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
+	/*
+	 * The edata must be either in an acquired state, or protected by state
+	 * based locks.
+	 */
+	if (!emap_edata_is_acquired(tsdn, emap, edata)) {
+		witness_assert_positive_depth_to_rank(
+		    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE);
+	}
+
 	EMAP_DECLARE_RTREE_CTX;
 	rtree_leaf_elm_t *elm_a, *elm_b;
 
diff --git a/src/eset.c b/src/eset.c
index a52a6f7c..9183ac67 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -78,7 +78,8 @@ eset_insert(eset_t *eset, edata_t *edata) {
 
 void
 eset_remove(eset_t *eset, edata_t *edata) {
-	assert(edata_state_get(edata) == eset->state);
+	assert(edata_state_get(edata) == eset->state ||
+	    edata_state_in_transition(edata_state_get(edata)));
 
 	size_t size = edata_size_get(edata);
 	size_t psz = sz_psz_quantize_floor(size);
diff --git a/src/extent.c b/src/extent.c
index 56ea33f6..e660d4c5 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -64,12 +64,12 @@ extent_may_force_decay(pac_t *pac) {
 static bool
 extent_try_delayed_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata) {
-	emap_edata_state_update(tsdn, pac->emap, edata, extent_state_active);
+	emap_update_edata_state(tsdn, pac->emap, edata, extent_state_active);
 
 	bool coalesced;
 	edata = extent_try_coalesce(tsdn, pac, ehooks, ecache,
 	    edata, &coalesced, false);
-	emap_edata_state_update(tsdn, pac->emap, edata, ecache->state);
+	emap_update_edata_state(tsdn, pac->emap, edata, ecache->state);
 
 	if (!coalesced) {
 		return true;
@@ -183,7 +183,7 @@ ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		not_reached();
 	case extent_state_dirty:
 	case extent_state_muzzy:
-		emap_edata_state_update(tsdn, pac->emap, edata,
+		emap_update_edata_state(tsdn, pac->emap, edata,
 		    extent_state_active);
 		break;
 	case extent_state_retained:
@@ -230,7 +230,7 @@ extent_deactivate_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
 	assert(edata_arena_ind_get(edata) == ecache_ind_get(ecache));
 	assert(edata_state_get(edata) == extent_state_active);
 
-	emap_edata_state_update(tsdn, pac->emap, edata, ecache->state);
+	emap_update_edata_state(tsdn, pac->emap, edata, ecache->state);
 	eset_insert(&ecache->eset, edata);
 }
 
@@ -245,10 +245,11 @@ static void
 extent_activate_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
     edata_t *edata) {
 	assert(edata_arena_ind_get(edata) == ecache_ind_get(ecache));
-	assert(edata_state_get(edata) == ecache->state);
+	assert(edata_state_get(edata) == ecache->state ||
+	    edata_state_get(edata) == extent_state_updating);
 
 	eset_remove(&ecache->eset, edata);
-	emap_edata_state_update(tsdn, pac->emap, edata, extent_state_active);
+	emap_update_edata_state(tsdn, pac->emap, edata, extent_state_active);
 }
 
 static void
@@ -290,20 +291,16 @@ extent_gdump_sub(tsdn_t *tsdn, const edata_t *edata) {
 
 static bool
 extent_register_impl(tsdn_t *tsdn, pac_t *pac, edata_t *edata, bool gdump_add) {
+	assert(edata_state_get(edata) == extent_state_active);
 	/*
-	 * We need to hold the lock to protect against a concurrent coalesce
-	 * operation that sees us in a partial state.
+	 * No locking needed, as the edata must be in active state, which
+	 * prevents other threads from accessing the edata.
 	 */
-	emap_lock_edata(tsdn, pac->emap, edata);
-
 	if (emap_register_boundary(tsdn, pac->emap, edata, SC_NSIZES,
 	    /* slab */ false)) {
-		emap_unlock_edata(tsdn, pac->emap, edata);
 		return true;
 	}
 
-	emap_unlock_edata(tsdn, pac->emap, edata);
-
 	if (config_prof && gdump_add) {
 		extent_gdump_add(tsdn, edata);
 	}
@@ -333,9 +330,7 @@ extent_reregister(tsdn_t *tsdn, pac_t *pac, edata_t *edata) {
 static void
 extent_deregister_impl(tsdn_t *tsdn, pac_t *pac, edata_t *edata,
     bool gdump) {
-	emap_lock_edata(tsdn, pac->emap, edata);
 	emap_deregister_boundary(tsdn, pac->emap, edata);
-	emap_unlock_edata(tsdn, pac->emap, edata);
 
 	if (config_prof && gdump) {
 		extent_gdump_sub(tsdn, edata);
@@ -383,22 +378,18 @@ extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 	edata_t *edata;
 	if (new_addr != NULL) {
-		edata = emap_lock_edata_from_addr(tsdn, pac->emap, new_addr,
-		    false);
+		edata = emap_try_acquire_edata(tsdn, pac->emap, new_addr,
+		    ecache->state, /* allow_head_extent*/ false);
 		if (edata != NULL) {
-			/*
-			 * We might null-out edata to report an error, but we
-			 * still need to unlock the associated mutex after.
-			 */
-			edata_t *unlock_edata = edata;
 			assert(edata_base_get(edata) == new_addr);
-			if (edata_arena_ind_get(edata) != ecache_ind_get(ecache)
-			    || edata_size_get(edata) < size
-			    || edata_state_get(edata)
-			    != ecache->state) {
+			assert(edata_arena_ind_get(edata) ==
+			    ecache_ind_get(ecache));
+			assert(edata_state_get(edata) == extent_state_updating);
+			if (edata_size_get(edata) < size) {
+				emap_release_edata(tsdn, pac->emap, edata,
+				    ecache->state);
 				edata = NULL;
 			}
-			emap_unlock_edata(tsdn, pac->emap, unlock_edata);
 		}
 	} else {
 		/*
@@ -557,8 +548,8 @@ extent_recycle_split(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 			extent_deregister_no_gdump_sub(tsdn, pac, to_leak);
 			extents_abandon_vm(tsdn, pac, ehooks, ecache, to_leak,
 			    growing_retained);
-			assert(emap_lock_edata_from_addr(tsdn, pac->emap,
-			    leak, false) == NULL);
+			assert(emap_try_acquire_edata(tsdn, pac->emap,
+			    leak, ecache->state, true) == NULL);
 		}
 		return NULL;
 	}
@@ -806,42 +797,11 @@ extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	return edata;
 }
 
-static bool
-extent_can_coalesce(ecache_t *ecache, const edata_t *inner,
-    const edata_t *outer) {
-	assert(edata_arena_ind_get(inner) == ecache_ind_get(ecache));
-
-	if (edata_arena_ind_get(inner) != edata_arena_ind_get(outer)) {
-		return false;
-	}
-
-	/*
-	 * We wouldn't really get into this situation because one or the other
-	 * edata would have to have a head bit set to true, but this is
-	 * conceptually correct and cheap.
-	 */
-	if (edata_pai_get(inner) != edata_pai_get(outer)) {
-		return false;
-	}
-
-	assert(edata_state_get(inner) == extent_state_active);
-	if (edata_state_get(outer) != ecache->state) {
-		return false;
-	}
-
-	if (edata_committed_get(inner) != edata_committed_get(outer)) {
-		return false;
-	}
-
-	return true;
-}
-
 static bool
 extent_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
     edata_t *inner, edata_t *outer, bool forward, bool growing_retained) {
-	assert(extent_can_coalesce(ecache, inner, outer));
-
-	extent_activate_locked(tsdn, pac, ecache, outer);
+	extent_assert_can_coalesce(inner, outer);
+	eset_remove(&ecache->eset, outer);
 
 	malloc_mutex_unlock(tsdn, &ecache->mtx);
 	bool err = extent_merge_impl(tsdn, pac, ehooks,
@@ -873,22 +833,11 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		again = false;
 
 		/* Try to coalesce forward. */
-		edata_t *next = emap_lock_edata_from_addr(tsdn, pac->emap,
-		    edata_past_get(edata), inactive_only);
+		edata_t *next = emap_try_acquire_edata_neighbor(tsdn, pac->emap,
+		    edata, EXTENT_PAI_PAC, ecache->state, /* forward */ true);
 		if (next != NULL) {
-			/*
-			 * ecache->mtx only protects against races for
-			 * like-state extents, so call extent_can_coalesce()
-			 * before releasing next's pool lock.
-			 */
-			bool can_coalesce = extent_can_coalesce(ecache,
-			    edata, next);
-
-			emap_unlock_edata(tsdn, pac->emap, next);
-
-			if (can_coalesce && !extent_coalesce(tsdn, pac,
-			    ehooks, ecache, edata, next, true,
-			    growing_retained)) {
+			if (!extent_coalesce(tsdn, pac, ehooks, ecache, edata,
+			    next, true, growing_retained)) {
 				if (ecache->delay_coalesce) {
 					/* Do minimal coalescing. */
 					*coalesced = true;
@@ -899,30 +848,11 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		}
 
 		/* Try to coalesce backward. */
-		edata_t *prev = NULL;
-		if (edata_before_get(edata) != NULL) {
-			/*
-			 * This is subtle; the rtree code asserts that its input
-			 * pointer is non-NULL, and this is a useful thing to
-			 * check.  But it's possible that edata corresponds to
-			 * an address of (void *)PAGE (in practice, this has
-			 * only been observed on FreeBSD when address-space
-			 * randomization is on, but it could in principle happen
-			 * anywhere).  In this case, edata_before_get(edata) is
-			 * NULL, triggering the assert.
-			 */
-			prev = emap_lock_edata_from_addr(tsdn, pac->emap,
-			    edata_before_get(edata), inactive_only);
-
-		}
+		edata_t *prev = emap_try_acquire_edata_neighbor(tsdn, pac->emap,
+		    edata, EXTENT_PAI_PAC, ecache->state, /* forward */ false);
 		if (prev != NULL) {
-			bool can_coalesce = extent_can_coalesce(ecache, edata,
-			    prev);
-			emap_unlock_edata(tsdn, pac->emap, prev);
-
-			if (can_coalesce && !extent_coalesce(tsdn, pac,
-			    ehooks, ecache, edata, prev, false,
-			    growing_retained)) {
+			if (!extent_coalesce(tsdn, pac, ehooks, ecache, edata,
+			    prev, false, growing_retained)) {
 				edata = prev;
 				if (ecache->delay_coalesce) {
 					/* Do minimal coalescing. */
@@ -1218,24 +1148,27 @@ extent_split_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		goto label_error_b;
 	}
 
-	emap_lock_edata2(tsdn, pac->emap, edata, trail);
+	/*
+	 * No need to acquire trail or edata, because: 1) trail was new (just
+	 * allocated); and 2) edata is either an active allocation (the shrink
+	 * path), or in an acquired state (extracted from the ecache on the
+	 * extent_recycle_split path).
+	 */
+	assert(emap_edata_is_acquired(tsdn, pac->emap, edata));
+	assert(emap_edata_is_acquired(tsdn, pac->emap, trail));
 
 	err = ehooks_split(tsdn, ehooks, edata_base_get(edata), size_a + size_b,
 	    size_a, size_b, edata_committed_get(edata));
 
 	if (err) {
-		goto label_error_c;
+		goto label_error_b;
 	}
 
 	edata_size_set(edata, size_a);
 	emap_split_commit(tsdn, pac->emap, &prepare, edata, size_a, trail,
 	    size_b);
 
-	emap_unlock_edata2(tsdn, pac->emap, edata, trail);
-
 	return trail;
-label_error_c:
-	emap_unlock_edata2(tsdn, pac->emap, edata, trail);
 label_error_b:
 	edata_cache_put(tsdn, pac->edata_cache, trail);
 label_error_a:
@@ -1277,15 +1210,15 @@ extent_merge_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *a,
 	emap_prepare_t prepare;
 	emap_merge_prepare(tsdn, pac->emap, &prepare, a, b);
 
-	emap_lock_edata2(tsdn, pac->emap, a, b);
-
+	assert(edata_state_get(a) == extent_state_active ||
+	    edata_state_get(a) == extent_state_merging);
+	edata_state_set(a, extent_state_active);
 	edata_size_set(a, edata_size_get(a) + edata_size_get(b));
 	edata_sn_set(a, (edata_sn_get(a) < edata_sn_get(b)) ?
 	    edata_sn_get(a) : edata_sn_get(b));
 	edata_zeroed_set(a, edata_zeroed_get(a) && edata_zeroed_get(b));
 
 	emap_merge_commit(tsdn, pac->emap, &prepare, a, b);
-	emap_unlock_edata2(tsdn, pac->emap, a, b);
 
 	edata_cache_put(tsdn, pac->edata_cache, b);
 
diff --git a/src/hpa_central.c b/src/hpa_central.c
index 36758a03..9e00dd64 100644
--- a/src/hpa_central.c
+++ b/src/hpa_central.c
@@ -33,15 +33,16 @@ hpa_central_split(tsdn_t *tsdn, hpa_central_t *central, edata_t *edata,
 	emap_prepare_t prepare;
 	bool err = emap_split_prepare(tsdn, central->emap, &prepare, edata,
 	    size, trail, cursize - size);
+	assert(edata_state_get(edata) == edata_state_get(trail));
 	if (err) {
 		edata_cache_small_put(tsdn, &central->ecs, trail);
 		return NULL;
 	}
-	emap_lock_edata2(tsdn, central->emap, edata, trail);
+	assert(edata_state_get(edata) == edata_state_get(trail));
+
 	edata_size_set(edata, size);
 	emap_split_commit(tsdn, central->emap, &prepare, edata, size, trail,
 	    cursize - size);
-	emap_unlock_edata2(tsdn, central->emap, edata, trail);
 
 	return trail;
 }
@@ -91,7 +92,7 @@ label_success:
 	 */
 	assert(edata_state_get(edata) == extent_state_dirty);
 	assert(edata_base_get(edata) == edata_addr_get(edata));
-	emap_edata_state_update(tsdn, central->emap, edata,
+	emap_update_edata_state(tsdn, central->emap, edata,
 	    extent_state_active);
 	return edata;
 }
@@ -137,43 +138,22 @@ hpa_central_alloc_grow(tsdn_t *tsdn, hpa_central_t *central,
 	edata_sn_set(edata, sn);
 	edata_sn_set(trail, sn);
 
-	emap_edata_state_update(tsdn, central->emap, trail, extent_state_dirty);
+	emap_update_edata_state(tsdn, central->emap, trail, extent_state_dirty);
 	eset_insert(&central->eset, trail);
 	return false;
 }
 
-static edata_t *
-hpa_central_dalloc_get_merge_candidate(tsdn_t *tsdn, hpa_central_t *central,
-    void *addr) {
-	edata_t *edata = emap_lock_edata_from_addr(tsdn, central->emap, addr,
-	    /* inactive_only */ true);
-	if (edata == NULL) {
-		return NULL;
-	}
-	extent_pai_t pai = edata_pai_get(edata);
-	extent_state_t state = edata_state_get(edata);
-	emap_unlock_edata(tsdn, central->emap, edata);
-
-	if (pai != EXTENT_PAI_HPA) {
-		return NULL;
-	}
-	if (state == extent_state_active) {
-		return NULL;
-	}
-
-	return edata;
-}
-
 /* Merges b into a, freeing b back to the edata cache.. */
 static void
 hpa_central_dalloc_merge(tsdn_t *tsdn, hpa_central_t *central, edata_t *a,
     edata_t *b) {
+	assert(emap_edata_is_acquired(tsdn, central->emap, a));
+	assert(emap_edata_is_acquired(tsdn, central->emap, b));
+
 	emap_prepare_t prepare;
 	emap_merge_prepare(tsdn, central->emap, &prepare, a, b);
-	emap_lock_edata2(tsdn, central->emap, a, b);
 	edata_size_set(a, edata_size_get(a) + edata_size_get(b));
 	emap_merge_commit(tsdn, central->emap, &prepare, a, b);
-	emap_unlock_edata2(tsdn, central->emap, a, b);
 	edata_cache_small_put(tsdn, &central->ecs, b);
 }
 
@@ -189,21 +169,24 @@ hpa_central_dalloc(tsdn_t *tsdn, hpa_central_t *central, edata_t *edata) {
 	edata_addr_set(edata, edata_base_get(edata));
 	edata_zeroed_set(edata, false);
 
-	if (!edata_is_head_get(edata)) {
-		edata_t *lead = hpa_central_dalloc_get_merge_candidate(tsdn,
-		    central, edata_before_get(edata));
-		if (lead != NULL) {
-			eset_remove(&central->eset, lead);
-			hpa_central_dalloc_merge(tsdn, central, lead, edata);
-			edata = lead;
-		}
-	}
-	edata_t *trail = hpa_central_dalloc_get_merge_candidate(tsdn, central,
-	    edata_past_get(edata));
-	if (trail != NULL && !edata_is_head_get(trail)) {
+	/*
+	 *  Merge forward first, so that the original *edata stays active state
+	 *  for the second acquire (only necessary for sanity checking).
+	 */
+	edata_t *trail = emap_try_acquire_edata_neighbor(tsdn, central->emap,
+	    edata, EXTENT_PAI_HPA, extent_state_dirty, /* forward */ true);
+	if (trail != NULL) {
 		eset_remove(&central->eset, trail);
 		hpa_central_dalloc_merge(tsdn, central, edata, trail);
 	}
-	emap_edata_state_update(tsdn, central->emap, edata, extent_state_dirty);
+	edata_t *lead = emap_try_acquire_edata_neighbor(tsdn, central->emap,
+	    edata, EXTENT_PAI_HPA, extent_state_dirty, /* forward */ false);
+	if (lead != NULL) {
+		eset_remove(&central->eset, lead);
+		hpa_central_dalloc_merge(tsdn, central, lead, edata);
+		edata = lead;
+	}
+
+	emap_update_edata_state(tsdn, central->emap, edata, extent_state_dirty);
 	eset_insert(&central->eset, edata);
 }

From 49b7d7f0a4731e060df095075bedf6391058a0cd Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 11 Mar 2021 00:21:47 -0800
Subject: [PATCH 2051/2608] Passing down the original edata on the expand path.

Instead of passing down the new_addr, pass down the active edata which allows us
to always use a neighbor-acquiring semantic.  In other words, this tells us both
the original edata and neighbor address.  With this change, only neighbors of a
"known" edata can be acquired, i.e. acquiring an edata based on an arbitrary
address isn't possible anymore.
---
 include/jemalloc/internal/edata.h  |  1 -
 include/jemalloc/internal/emap.h   | 30 +++++++++-
 include/jemalloc/internal/extent.h |  6 +-
 src/emap.c                         | 84 ++++++++++++++--------------
 src/extent.c                       | 88 ++++++++++++++----------------
 src/pac.c                          |  8 +--
 6 files changed, 116 insertions(+), 101 deletions(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index b2e6ee9f..55d1dfed 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -25,7 +25,6 @@ enum extent_state_e {
 	extent_state_muzzy    = 2,
 	extent_state_retained = 3,
 	extent_state_transition = 4, /* States below are intermediate. */
-	extent_state_updating = 4,
 	extent_state_merging = 5,
 	extent_state_max = 5 /* Sanity checking only. */
 };
diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 239f3e43..5a5dbb6d 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -43,11 +43,26 @@ void emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
 void emap_update_edata_state(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
     extent_state_t state);
 
-edata_t *emap_try_acquire_edata(tsdn_t *tsdn, emap_t *emap, void *addr,
-    extent_state_t expected_state, bool allow_head_extent);
+/*
+ * The two acquire functions below allow accessing neighbor edatas, if it's safe
+ * and valid to do so (i.e. from the same arena, of the same state, etc.).  This
+ * is necessary because the ecache locks are state based, and only protect
+ * edatas with the same state.  Therefore the neighbor edata's state needs to be
+ * verified first, before chasing the edata pointer.  The returned edata will be
+ * in an acquired state, meaning other threads will be prevented from accessing
+ * it, even if technically the edata can still be discovered from the rtree.
+ *
+ * This means, at any moment when holding pointers to edata, either one of the
+ * state based locks is held (and the edatas are all of the protected state), or
+ * the edatas are in an acquired state (e.g. in active or merging state).  The
+ * acquire operation itself (changing the edata to an acquired state) is done
+ * under the state locks.
+ */
 edata_t *emap_try_acquire_edata_neighbor(tsdn_t *tsdn, emap_t *emap,
     edata_t *edata, extent_pai_t pai, extent_state_t expected_state,
     bool forward);
+edata_t *emap_try_acquire_edata_neighbor_expand(tsdn_t *tsdn, emap_t *emap,
+    edata_t *edata, extent_pai_t pai, extent_state_t expected_state);
 void emap_release_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
     extent_state_t new_state);
 
@@ -196,6 +211,17 @@ extent_assert_can_coalesce(const edata_t *inner, const edata_t *outer) {
 	assert(edata_committed_get(inner) == edata_committed_get(outer));
 	assert(edata_state_get(inner) == extent_state_active);
 	assert(edata_state_get(outer) == extent_state_merging);
+	assert(edata_base_get(inner) == edata_past_get(outer) ||
+	    edata_base_get(outer) == edata_past_get(inner));
+}
+
+JEMALLOC_ALWAYS_INLINE void
+extent_assert_can_expand(const edata_t *original, const edata_t *expand) {
+	assert(edata_arena_ind_get(original) == edata_arena_ind_get(expand));
+	assert(edata_pai_get(original) == edata_pai_get(expand));
+	assert(edata_state_get(original) == extent_state_active);
+	assert(edata_state_get(expand) == extent_state_merging);
+	assert(edata_past_get(original) == edata_base_get(expand));
 }
 
 JEMALLOC_ALWAYS_INLINE edata_t *
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index f2fee5c1..6a17ba60 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -20,9 +20,11 @@
 extern size_t opt_lg_extent_max_active_fit;
 
 edata_t *ecache_alloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool zero);
+    ecache_t *ecache, edata_t *expand_edata, size_t size, size_t alignment,
+    bool zero);
 edata_t *ecache_alloc_grow(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t alignment, bool zero);
+    ecache_t *ecache, edata_t *expand_edata, size_t size, size_t alignment,
+    bool zero);
 void ecache_dalloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata);
 edata_t *ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
diff --git a/src/emap.c b/src/emap.c
index 26a079cb..949b53e5 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -70,7 +70,8 @@ edata_neighbor_head_state_mergeable(bool edata_is_head,
 
 static inline bool
 edata_can_acquire_neighbor(edata_t *edata, rtree_contents_t contents,
-    extent_pai_t pai, extent_state_t expected_state, bool forward) {
+    extent_pai_t pai, extent_state_t expected_state, bool forward,
+    bool expanding) {
 	edata_t *neighbor = contents.edata;
 	if (neighbor == NULL) {
 		return false;
@@ -87,8 +88,8 @@ edata_can_acquire_neighbor(edata_t *edata, rtree_contents_t contents,
 			return false;
 		}
 		/* From this point, it's safe to access *neighbor. */
-		if (edata_committed_get(edata) !=
-		    edata_committed_get(neighbor)) {
+		if (!expanding && (edata_committed_get(edata) !=
+		    edata_committed_get(neighbor))) {
 			/*
 			 * Some platforms (e.g. Windows) require an explicit
 			 * commit step (and writing to uncomitted memory is not
@@ -125,47 +126,13 @@ edata_can_acquire_neighbor(edata_t *edata, rtree_contents_t contents,
 	return true;
 }
 
-/* Will be removed in the next commit. */
-edata_t *
-emap_try_acquire_edata(tsdn_t *tsdn, emap_t *emap, void *addr,
-    extent_state_t expected_state, bool allow_head_extent) {
-	EMAP_DECLARE_RTREE_CTX;
-	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, &emap->rtree,
-	    rtree_ctx, (uintptr_t)addr, false, false);
-	if (elm == NULL) {
-		return NULL;
-	}
-	rtree_contents_t contents = rtree_leaf_elm_read(tsdn, &emap->rtree, elm,
-	    /* dependent */ true);
-	if (!allow_head_extent && contents.metadata.is_head) {
-		/* !allow_head_extent indicates the expanding path. */
-		return NULL;
-	}
-
-	edata_t *edata = contents.edata;
-	if (edata == NULL || contents.metadata.state != expected_state) {
-		return NULL;
-	}
-	assert(edata_state_get(edata) == expected_state);
-	emap_update_edata_state(tsdn, emap, edata, extent_state_updating);
-
-	return edata;
-}
-
-void
-emap_release_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
-    extent_state_t new_state) {
-	assert(emap_edata_in_transition(tsdn, emap, edata));
-	assert(emap_edata_is_acquired(tsdn, emap, edata));
-
-	emap_update_edata_state(tsdn, emap, edata, new_state);
-}
-
-edata_t *
-emap_try_acquire_edata_neighbor(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
-    extent_pai_t pai, extent_state_t expected_state, bool forward) {
+static inline edata_t *
+emap_try_acquire_edata_neighbor_impl(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
+    extent_pai_t pai, extent_state_t expected_state, bool forward,
+    bool expanding) {
 	witness_assert_positive_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE);
+	assert(!expanding || forward);
 	assert(!edata_state_in_transition(expected_state));
 	assert(expected_state == extent_state_dirty ||
 	       expected_state == extent_state_muzzy ||
@@ -196,18 +163,47 @@ emap_try_acquire_edata_neighbor(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
 	rtree_contents_t neighbor_contents = rtree_leaf_elm_read(tsdn,
 	    &emap->rtree, elm, /* dependent */ true);
 	if (!edata_can_acquire_neighbor(edata, neighbor_contents, pai,
-	    expected_state, forward)) {
+	    expected_state, forward, expanding)) {
 		return NULL;
 	}
 
 	/* From this point, the neighbor edata can be safely acquired. */
 	edata_t *neighbor = neighbor_contents.edata;
+	assert(edata_state_get(neighbor) == expected_state);
 	emap_update_edata_state(tsdn, emap, neighbor, extent_state_merging);
-	extent_assert_can_coalesce(edata, neighbor);
+	if (expanding) {
+		extent_assert_can_expand(edata, neighbor);
+	} else {
+		extent_assert_can_coalesce(edata, neighbor);
+	}
 
 	return neighbor;
 }
 
+edata_t *
+emap_try_acquire_edata_neighbor(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
+    extent_pai_t pai, extent_state_t expected_state, bool forward) {
+	return emap_try_acquire_edata_neighbor_impl(tsdn, emap, edata, pai,
+	    expected_state, forward, /* expand */ false);
+}
+
+edata_t *
+emap_try_acquire_edata_neighbor_expand(tsdn_t *tsdn, emap_t *emap,
+    edata_t *edata, extent_pai_t pai, extent_state_t expected_state) {
+	/* Try expanding forward. */
+	return emap_try_acquire_edata_neighbor_impl(tsdn, emap, edata, pai,
+	    expected_state, /* forward */ true, /* expand */ true);
+}
+
+void
+emap_release_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
+    extent_state_t new_state) {
+	assert(emap_edata_in_transition(tsdn, emap, edata));
+	assert(emap_edata_is_acquired(tsdn, emap, edata));
+
+	emap_update_edata_state(tsdn, emap, edata, new_state);
+}
+
 static bool
 emap_rtree_leaf_elms_lookup(tsdn_t *tsdn, emap_t *emap, rtree_ctx_t *rtree_ctx,
     const edata_t *edata, bool dependent, bool init_missing,
diff --git a/src/extent.c b/src/extent.c
index e660d4c5..6d9e0029 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -36,15 +36,15 @@ static atomic_zu_t highpages;
 
 static void extent_deregister(tsdn_t *tsdn, pac_t *pac, edata_t *edata);
 static edata_t *extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t usize, size_t alignment, bool zero,
-    bool *commit, bool growing_retained);
+    ecache_t *ecache, edata_t *expand_edata, size_t usize, size_t alignment,
+    bool zero, bool *commit, bool growing_retained);
 static edata_t *extent_try_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool *coalesced, bool growing_retained);
 static void extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool growing_retained);
 static edata_t *extent_alloc_retained(tsdn_t *tsdn, pac_t *pac,
-    ehooks_t *ehooks, void *new_addr, size_t size, size_t alignment, bool zero,
-    bool *commit);
+    ehooks_t *ehooks, edata_t *expand_edata, size_t size, size_t alignment,
+    bool zero, bool *commit);
 static edata_t *extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t alignment, bool zero, bool *commit);
 
@@ -80,14 +80,14 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 
 edata_t *
 ecache_alloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
-    void *new_addr, size_t size, size_t alignment, bool zero) {
+    edata_t *expand_edata, size_t size, size_t alignment, bool zero) {
 	assert(size != 0);
 	assert(alignment != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
 	bool commit = true;
-	edata_t *edata = extent_recycle(tsdn, pac, ehooks, ecache, new_addr,
+	edata_t *edata = extent_recycle(tsdn, pac, ehooks, ecache, expand_edata,
 	    size, alignment, zero, &commit, false);
 	assert(edata == NULL || edata_pai_get(edata) == EXTENT_PAI_PAC);
 	return edata;
@@ -95,25 +95,27 @@ ecache_alloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 
 edata_t *
 ecache_alloc_grow(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
-    void *new_addr, size_t size, size_t alignment, bool zero) {
+    edata_t *expand_edata, size_t size, size_t alignment, bool zero) {
 	assert(size != 0);
 	assert(alignment != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
 	bool commit = true;
-	edata_t *edata = extent_alloc_retained(tsdn, pac, ehooks, new_addr,
+	edata_t *edata = extent_alloc_retained(tsdn, pac, ehooks, expand_edata,
 	    size, alignment, zero, &commit);
 	if (edata == NULL) {
-		if (opt_retain && new_addr != NULL) {
+		if (opt_retain && expand_edata != NULL) {
 			/*
-			 * When retain is enabled and new_addr is set, we do not
-			 * attempt extent_alloc_wrapper which does mmap that is
-			 * very unlikely to succeed (unless it happens to be at
-			 * the end).
+			 * When retain is enabled and trying to expand, we do
+			 * not attempt extent_alloc_wrapper which does mmap that
+			 * is very unlikely to succeed (unless it happens to be
+			 * at the end).
 			 */
 			return NULL;
 		}
+		void *new_addr = (expand_edata == NULL) ? NULL :
+		    edata_past_get(expand_edata);
 		edata = extent_alloc_wrapper(tsdn, pac, ehooks, new_addr,
 		    size, alignment, zero, &commit);
 	}
@@ -246,7 +248,7 @@ extent_activate_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
     edata_t *edata) {
 	assert(edata_arena_ind_get(edata) == ecache_ind_get(ecache));
 	assert(edata_state_get(edata) == ecache->state ||
-	    edata_state_get(edata) == extent_state_updating);
+	    edata_state_get(edata) == extent_state_merging);
 
 	eset_remove(&ecache->eset, edata);
 	emap_update_edata_state(tsdn, pac->emap, edata, extent_state_active);
@@ -354,37 +356,30 @@ extent_deregister_no_gdump_sub(tsdn_t *tsdn, pac_t *pac,
  */
 static edata_t *
 extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t alignment,
+    ecache_t *ecache, edata_t *expand_edata, size_t size, size_t alignment,
     bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	assert(alignment > 0);
-	if (config_debug && new_addr != NULL) {
+	if (config_debug && expand_edata != NULL) {
 		/*
-		 * Non-NULL new_addr has two use cases:
-		 *
-		 *   1) Recycle a known-extant extent, e.g. during purging.
-		 *   2) Perform in-place expanding reallocation.
-		 *
-		 * Regardless of use case, new_addr must either refer to a
-		 * non-existing extent, or to the base of an extant extent,
-		 * since only active slabs support interior lookups (which of
-		 * course cannot be recycled).
+		 * Non-NULL expand_edata indicates in-place expanding realloc.
+		 * new_addr must either refer to a non-existing extent, or to
+		 * the base of an extant extent, since only active slabs support
+		 * interior lookups (which of course cannot be recycled).
 		 */
+		void *new_addr = edata_past_get(expand_edata);
 		assert(PAGE_ADDR2BASE(new_addr) == new_addr);
 		assert(alignment <= PAGE);
 	}
 
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 	edata_t *edata;
-	if (new_addr != NULL) {
-		edata = emap_try_acquire_edata(tsdn, pac->emap, new_addr,
-		    ecache->state, /* allow_head_extent*/ false);
+	if (expand_edata != NULL) {
+		edata = emap_try_acquire_edata_neighbor_expand(tsdn, pac->emap,
+		    expand_edata, EXTENT_PAI_PAC, ecache->state);
 		if (edata != NULL) {
-			assert(edata_base_get(edata) == new_addr);
-			assert(edata_arena_ind_get(edata) ==
-			    ecache_ind_get(ecache));
-			assert(edata_state_get(edata) == extent_state_updating);
+			extent_assert_can_expand(expand_edata, edata);
 			if (edata_size_get(edata) < size) {
 				emap_release_edata(tsdn, pac->emap, edata,
 				    ecache->state);
@@ -454,10 +449,11 @@ extent_split_interior(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t **edata, edata_t **lead, edata_t **trail,
     /* The mess to clean up, in case of error. */
     edata_t **to_leak, edata_t **to_salvage,
-    void *new_addr, size_t size, size_t alignment, bool growing_retained) {
+    edata_t *expand_edata, size_t size, size_t alignment,
+    bool growing_retained) {
 	size_t leadsize = ALIGNMENT_CEILING((uintptr_t)edata_base_get(*edata),
 	    PAGE_CEILING(alignment)) - (uintptr_t)edata_base_get(*edata);
-	assert(new_addr == NULL || leadsize == 0);
+	assert(expand_edata == NULL || leadsize == 0);
 	if (edata_size_get(*edata) < leadsize + size) {
 		return extent_split_interior_cant_alloc;
 	}
@@ -504,7 +500,7 @@ extent_split_interior(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
  */
 static edata_t *
 extent_recycle_split(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, void *new_addr, size_t size, size_t alignment,
+    ecache_t *ecache, edata_t *expand_edata, size_t size, size_t alignment,
     edata_t *edata, bool growing_retained) {
 	edata_t *lead;
 	edata_t *trail;
@@ -513,7 +509,7 @@ extent_recycle_split(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 
 	extent_split_interior_result_t result = extent_split_interior(
 	    tsdn, pac, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage,
-	    new_addr, size, alignment, growing_retained);
+	    expand_edata, size, alignment, growing_retained);
 
 	if (!maps_coalesce && result != extent_split_interior_ok
 	    && !opt_retain) {
@@ -544,12 +540,9 @@ extent_recycle_split(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 			extent_deregister(tsdn, pac, to_salvage);
 		}
 		if (to_leak != NULL) {
-			void *leak = edata_base_get(to_leak);
 			extent_deregister_no_gdump_sub(tsdn, pac, to_leak);
 			extents_abandon_vm(tsdn, pac, ehooks, ecache, to_leak,
 			    growing_retained);
-			assert(emap_try_acquire_edata(tsdn, pac->emap,
-			    leak, ecache->state, true) == NULL);
 		}
 		return NULL;
 	}
@@ -562,17 +555,17 @@ extent_recycle_split(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
  */
 static edata_t *
 extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
-    void *new_addr, size_t size, size_t alignment, bool zero, bool *commit,
-    bool growing_retained) {
+    edata_t *expand_edata, size_t size, size_t alignment, bool zero,
+    bool *commit, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	edata_t *edata = extent_recycle_extract(tsdn, pac, ehooks, ecache,
-	    new_addr, size, alignment, growing_retained);
+	    expand_edata, size, alignment, growing_retained);
 	if (edata == NULL) {
 		return NULL;
 	}
 
-	edata = extent_recycle_split(tsdn, pac, ehooks, ecache, new_addr,
+	edata = extent_recycle_split(tsdn, pac, ehooks, ecache, expand_edata,
 	    size, alignment, edata, growing_retained);
 	if (edata == NULL) {
 		return NULL;
@@ -742,21 +735,22 @@ label_err:
 
 static edata_t *
 extent_alloc_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    void *new_addr, size_t size, size_t alignment, bool zero, bool *commit) {
+    edata_t *expand_edata, size_t size, size_t alignment, bool zero,
+    bool *commit) {
 	assert(size != 0);
 	assert(alignment != 0);
 
 	malloc_mutex_lock(tsdn, &pac->grow_mtx);
 
 	edata_t *edata = extent_recycle(tsdn, pac, ehooks,
-	    &pac->ecache_retained, new_addr, size, alignment, zero,
-	    commit, /* growing_retained */ true);
+	    &pac->ecache_retained, expand_edata, size, alignment, zero, commit,
+	    /* growing_retained */ true);
 	if (edata != NULL) {
 		malloc_mutex_unlock(tsdn, &pac->grow_mtx);
 		if (config_prof) {
 			extent_gdump_add(tsdn, edata);
 		}
-	} else if (opt_retain && new_addr == NULL) {
+	} else if (opt_retain && expand_edata == NULL) {
 		edata = extent_grow_retained(tsdn, pac, ehooks, size,
 		    alignment, zero, commit);
 		/* extent_grow_retained() always releases pac->grow_mtx. */
diff --git a/src/pac.c b/src/pac.c
index 93427ca1..0737e68c 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -133,9 +133,7 @@ static bool
 pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
     size_t new_size, bool zero) {
 	pac_t *pac = (pac_t *)self;
-
 	ehooks_t *ehooks = pac_ehooks_get(pac);
-	void *trail_begin = edata_past_get(edata);
 
 	size_t mapped_add = 0;
 	size_t expand_amount = new_size - old_size;
@@ -144,14 +142,14 @@ pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 		return true;
 	}
 	edata_t *trail = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_dirty,
-	    trail_begin, expand_amount, PAGE, zero);
+	    edata, expand_amount, PAGE, zero);
 	if (trail == NULL) {
 		trail = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_muzzy,
-		    trail_begin, expand_amount, PAGE, zero);
+		    edata, expand_amount, PAGE, zero);
 	}
 	if (trail == NULL) {
 		trail = ecache_alloc_grow(tsdn, pac, ehooks,
-		    &pac->ecache_retained, trail_begin, expand_amount, PAGE,
+		    &pac->ecache_retained, edata, expand_amount, PAGE,
 		    zero);
 		mapped_add = expand_amount;
 	}

From add636596afecb87e220d31ae75a9ba0b4601fbc Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 11 Mar 2021 23:41:51 -0800
Subject: [PATCH 2052/2608] Stop checking head state in the merge hook.

Now that all merging go through try_acquire_edata_neighbor, the mergeablility
checks (including head state checking) are done before reaching the merge hook.
In other words, merge hook will never be called if the head state doesn't agree.
---
 include/jemalloc/internal/ehooks.h | 20 ++++--------------
 include/jemalloc/internal/emap.h   | 20 ++++++++++++++++++
 src/ehooks.c                       | 34 ++++++++++++++----------------
 src/emap.c                         | 20 ------------------
 src/extent.c                       |  4 ++--
 5 files changed, 42 insertions(+), 56 deletions(-)

diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index bae468b3..064ecf5a 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -61,8 +61,7 @@ bool ehooks_default_split_impl();
 bool ehooks_default_merge(extent_hooks_t *extent_hooks, void *addr_a,
     size_t size_a, void *addr_b, size_t size_b, bool committed,
     unsigned arena_ind);
-bool ehooks_default_merge_impl(tsdn_t *tsdn, void *addr_a, bool head_a,
-    void *addr_b, bool head_b);
+bool ehooks_default_merge_impl(tsdn_t *tsdn, void *addr_a, void *addr_b);
 void ehooks_default_zero_impl(void *addr, size_t size);
 
 /*
@@ -338,21 +337,10 @@ ehooks_split(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 
 static inline bool
 ehooks_merge(tsdn_t *tsdn, ehooks_t *ehooks, void *addr_a, size_t size_a,
-    bool head_a, void *addr_b, size_t size_b, bool head_b, bool committed) {
+    void *addr_b, size_t size_b, bool committed) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
-	/*
-	 * The definition of extent_hooks merge function doesn't know about
-	 * extent head state, but the implementation does.  As a result, it
-	 * needs to call iealloc again and walk the rtree.  Since the cost of an
-	 * iealloc is large relative to the cost of the default merge hook
-	 * (which on posix-likes is just "return false"), we go even further
-	 * when we short-circuit; we don't just check if the extent hooks
-	 * generally are default, we check if the merge hook specifically is.
-	 */
-	if (extent_hooks == &ehooks_default_extent_hooks
-	    || extent_hooks->merge == &ehooks_default_merge) {
-		return ehooks_default_merge_impl(tsdn, addr_a, head_a, addr_b,
-		    head_b);
+	if (extent_hooks == &ehooks_default_extent_hooks) {
+		return ehooks_default_merge_impl(tsdn, addr_a, addr_b);
 	} else if (extent_hooks->merge == NULL) {
 		return true;
 	} else {
diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 5a5dbb6d..364aefac 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -224,6 +224,26 @@ extent_assert_can_expand(const edata_t *original, const edata_t *expand) {
 	assert(edata_past_get(original) == edata_base_get(expand));
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+edata_neighbor_head_state_mergeable(bool edata_is_head,
+    bool neighbor_is_head, bool forward) {
+	/*
+	 * Head states checking: disallow merging if the higher addr extent is a
+	 * head extent.  This helps preserve first-fit, and more importantly
+	 * makes sure no merge across arenas.
+	 */
+	if (forward) {
+		if (neighbor_is_head) {
+			return false;
+		}
+	} else {
+		if (edata_is_head) {
+			return false;
+		}
+	}
+	return true;
+}
+
 JEMALLOC_ALWAYS_INLINE edata_t *
 emap_edata_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr) {
 	EMAP_DECLARE_RTREE_CTX;
diff --git a/src/ehooks.c b/src/ehooks.c
index e1815ee8..ca3ca209 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -188,11 +188,10 @@ ehooks_default_split(extent_hooks_t *extent_hooks, void *addr, size_t size,
 }
 
 bool
-ehooks_default_merge_impl(tsdn_t *tsdn, void *addr_a, bool head_a, void *addr_b,
-    bool head_b) {
+ehooks_default_merge_impl(tsdn_t *tsdn, void *addr_a, void *addr_b) {
 	assert(addr_a < addr_b);
 	/*
-	 * For non-DSS cases (first 2 branches) --
+	 * For non-DSS cases --
 	 * a) W/o maps_coalesce, merge is not always allowed (Windows):
 	 *   1) w/o retain, never merge (first branch below).
 	 *   2) with retain, only merge extents from the same VirtualAlloc
@@ -204,17 +203,23 @@ ehooks_default_merge_impl(tsdn_t *tsdn, void *addr_a, bool head_a, void *addr_b,
 	 *      disallowed if b is a head extent, i.e. no merging across
 	 *      different mmap regions.
 	 *
-	 * a2) and b2) share the implementation (the no_merge_heads branch).
+	 * a2) and b2) are implemented in emap_try_acquire_edata_neighbor, and
+	 * sanity checked in the second branch below.
 	 */
 	if (!maps_coalesce && !opt_retain) {
 		return true;
 	}
-	/*
-	 * Don't merge across mappings when retain is on -- this preserves
-	 * first-fit ordering.
-	 */
-	if (opt_retain && head_b) {
-		return true;
+	if (config_debug) {
+		edata_t *a = emap_edata_lookup(tsdn, &arena_emap_global,
+		    addr_a);
+		bool head_a = edata_is_head_get(a);
+		edata_t *b = emap_edata_lookup(tsdn, &arena_emap_global,
+		    addr_b);
+		bool head_b = edata_is_head_get(b);
+		emap_assert_mapped(tsdn, &arena_emap_global, a);
+		emap_assert_mapped(tsdn, &arena_emap_global, b);
+		assert(edata_neighbor_head_state_mergeable(head_a, head_b,
+		    /* forward */ true));
 	}
 	if (have_dss && !extent_dss_mergeable(addr_a, addr_b)) {
 		return true;
@@ -228,14 +233,7 @@ ehooks_default_merge(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
     void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
 	tsdn_t *tsdn = tsdn_fetch();
 
-	edata_t *a = emap_edata_lookup(tsdn, &arena_emap_global, addr_a);
-	bool head_a = edata_is_head_get(a);
-	edata_t *b = emap_edata_lookup(tsdn, &arena_emap_global, addr_b);
-	bool head_b = edata_is_head_get(b);
-	emap_assert_mapped(tsdn, &arena_emap_global, a);
-	emap_assert_mapped(tsdn, &arena_emap_global, b);
-
-	return ehooks_default_merge_impl(tsdn, addr_a, head_a, addr_b, head_b);
+	return ehooks_default_merge_impl(tsdn, addr_a, addr_b);
 }
 
 void
diff --git a/src/emap.c b/src/emap.c
index 949b53e5..0fe230ab 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -48,26 +48,6 @@ emap_update_edata_state(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
 	emap_assert_mapped(tsdn, emap, edata);
 }
 
-static inline bool
-edata_neighbor_head_state_mergeable(bool edata_is_head,
-    bool neighbor_is_head, bool forward) {
-	/*
-	 * Head states checking: disallow merging if the higher addr extent is a
-	 * head extent.  This helps preserve first-fit, and more importantly
-	 * makes sure no merge across arenas.
-	 */
-	if (forward) {
-		if (neighbor_is_head) {
-			return false;
-		}
-	} else {
-		if (edata_is_head) {
-			return false;
-		}
-	}
-	return true;
-}
-
 static inline bool
 edata_can_acquire_neighbor(edata_t *edata, rtree_contents_t contents,
     extent_pai_t pai, extent_state_t expected_state, bool forward,
diff --git a/src/extent.c b/src/extent.c
index 6d9e0029..1748d98b 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1189,8 +1189,8 @@ extent_merge_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *a,
 	emap_assert_mapped(tsdn, pac->emap, b);
 
 	bool err = ehooks_merge(tsdn, ehooks, edata_base_get(a),
-	    edata_size_get(a), edata_is_head_get(a), edata_base_get(b),
-	    edata_size_get(b), edata_is_head_get(b), edata_committed_get(a));
+	    edata_size_get(a), edata_base_get(b), edata_size_get(b),
+	    edata_committed_get(a));
 
 	if (err) {
 		return true;

From 7c964b03524de23eeff7fe203c764c7a0c0977ac Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 17 Mar 2021 16:35:57 -0700
Subject: [PATCH 2053/2608] Add rtree_write_range(): writing the same content
 to multiple leaf elements.

Apply to emap_(de)register_interior which became noticeable in perf profiles.
---
 include/jemalloc/internal/rtree.h | 111 ++++++++++++++++++++++++------
 src/emap.c                        |  35 +++++++---
 src/pa.c                          |   2 +-
 test/unit/rtree.c                 |  66 +++++++++++++++++-
 4 files changed, 182 insertions(+), 32 deletions(-)

diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 42aa11c9..c5f0d8c4 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -137,23 +137,24 @@ bool rtree_new(rtree_t *rtree, base_t *base, bool zeroed);
 rtree_leaf_elm_t *rtree_leaf_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree,
     rtree_ctx_t *rtree_ctx, uintptr_t key, bool dependent, bool init_missing);
 
-JEMALLOC_ALWAYS_INLINE uintptr_t
-rtree_leafkey(uintptr_t key) {
+JEMALLOC_ALWAYS_INLINE unsigned
+rtree_leaf_maskbits(void) {
 	unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3);
 	unsigned cumbits = (rtree_levels[RTREE_HEIGHT-1].cumbits -
 	    rtree_levels[RTREE_HEIGHT-1].bits);
-	unsigned maskbits = ptrbits - cumbits;
-	uintptr_t mask = ~((ZU(1) << maskbits) - 1);
+	return ptrbits - cumbits;
+}
+
+JEMALLOC_ALWAYS_INLINE uintptr_t
+rtree_leafkey(uintptr_t key) {
+	uintptr_t mask = ~((ZU(1) << rtree_leaf_maskbits()) - 1);
 	return (key & mask);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
 rtree_cache_direct_map(uintptr_t key) {
-	unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3);
-	unsigned cumbits = (rtree_levels[RTREE_HEIGHT-1].cumbits -
-	    rtree_levels[RTREE_HEIGHT-1].bits);
-	unsigned maskbits = ptrbits - cumbits;
-	return (size_t)((key >> maskbits) & (RTREE_CTX_NCACHE - 1));
+	return (size_t)((key >> rtree_leaf_maskbits()) &
+	    (RTREE_CTX_NCACHE - 1));
 }
 
 JEMALLOC_ALWAYS_INLINE uintptr_t
@@ -265,30 +266,49 @@ rtree_leaf_elm_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
 #endif
 }
 
-static inline void
-rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, rtree_contents_t contents) {
-	assert((uintptr_t)contents.edata % EDATA_ALIGNMENT == 0);
+JEMALLOC_ALWAYS_INLINE void
+rtree_contents_encode(rtree_contents_t contents, void **bits,
+    unsigned *additional) {
 #ifdef RTREE_LEAF_COMPACT
-	uintptr_t bits = rtree_leaf_elm_bits_encode(contents);
-	atomic_store_p(&elm->le_bits, (void *)bits, ATOMIC_RELEASE);
+	*bits = (void *)rtree_leaf_elm_bits_encode(contents);
 #else
-	unsigned metadata_bits = (unsigned)contents.metadata.slab
+	*additional = (unsigned)contents.metadata.slab
 	    | ((unsigned)contents.metadata.is_head << 1)
 	    | ((unsigned)contents.metadata.state << RTREE_LEAF_STATE_SHIFT)
 	    | ((unsigned)contents.metadata.szind << (RTREE_LEAF_STATE_SHIFT +
 	    RTREE_LEAF_STATE_WIDTH));
-	atomic_store_u(&elm->le_metadata, metadata_bits, ATOMIC_RELEASE);
+	*bits = contents.edata;
+#endif
+}
+
+JEMALLOC_ALWAYS_INLINE void
+rtree_leaf_elm_write_commit(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_leaf_elm_t *elm, void *bits, unsigned additional) {
+#ifdef RTREE_LEAF_COMPACT
+	atomic_store_p(&elm->le_bits, bits, ATOMIC_RELEASE);
+#else
+	atomic_store_u(&elm->le_metadata, additional, ATOMIC_RELEASE);
 	/*
 	 * Write edata last, since the element is atomically considered valid
 	 * as soon as the edata field is non-NULL.
 	 */
-	atomic_store_p(&elm->le_edata, contents.edata, ATOMIC_RELEASE);
+	atomic_store_p(&elm->le_edata, bits, ATOMIC_RELEASE);
 #endif
 }
 
+JEMALLOC_ALWAYS_INLINE void
+rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_leaf_elm_t *elm, rtree_contents_t contents) {
+	assert((uintptr_t)contents.edata % EDATA_ALIGNMENT == 0);
+	void *bits;
+	unsigned additional;
+
+	rtree_contents_encode(contents, &bits, &additional);
+	rtree_leaf_elm_write_commit(tsdn, rtree, elm, bits, additional);
+}
+
 /* The state field can be updated independently (and more frequently). */
-static inline void
+JEMALLOC_ALWAYS_INLINE void
 rtree_leaf_elm_state_update(tsdn_t *tsdn, rtree_t *rtree,
     rtree_leaf_elm_t *elm1, rtree_leaf_elm_t *elm2, extent_state_t state) {
 	assert(elm1 != NULL);
@@ -447,7 +467,45 @@ rtree_metadata_try_read_fast(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ct
 	return false;
 }
 
-static inline bool
+JEMALLOC_ALWAYS_INLINE void
+rtree_write_range_impl(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+    uintptr_t base, uintptr_t end, rtree_contents_t contents, bool clearing) {
+	assert((base & PAGE_MASK) == 0 && (end & PAGE_MASK) == 0);
+	/*
+	 * Only used for emap_(de)register_interior, which implies the
+	 * boundaries have been registered already.  Therefore all the lookups
+	 * are dependent w/o init_missing, assuming the range spans across at
+	 * most 2 rtree leaf nodes (each covers 1 GiB of vaddr).
+	 */
+	void *bits;
+	unsigned additional;
+	rtree_contents_encode(contents, &bits, &additional);
+
+	rtree_leaf_elm_t *elm = NULL; /* Dead store. */
+	for (uintptr_t addr = base; addr <= end; addr += PAGE) {
+		if (addr == base ||
+		    (addr & ((ZU(1) << rtree_leaf_maskbits()) - 1)) == 0) {
+			elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx, addr,
+			    /* dependent */ true, /* init_missing */ false);
+			assert(elm != NULL);
+		}
+		assert(elm == rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx, addr,
+		    /* dependent */ true, /* init_missing */ false));
+		assert(!clearing || rtree_leaf_elm_read(tsdn, rtree, elm,
+		    /* dependent */ true).edata != NULL);
+		rtree_leaf_elm_write_commit(tsdn, rtree, elm, bits, additional);
+		elm++;
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void
+rtree_write_range(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+    uintptr_t base, uintptr_t end, rtree_contents_t contents) {
+	rtree_write_range_impl(tsdn, rtree, rtree_ctx, base, end, contents,
+	    /* clearing */ false);
+}
+
+JEMALLOC_ALWAYS_INLINE bool
 rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
     rtree_contents_t contents) {
 	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
@@ -478,4 +536,17 @@ rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	rtree_leaf_elm_write(tsdn, rtree, elm, contents);
 }
 
+static inline void
+rtree_clear_range(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
+    uintptr_t base, uintptr_t end) {
+	rtree_contents_t contents;
+	contents.edata = NULL;
+	contents.metadata.szind = SC_NSIZES;
+	contents.metadata.slab = false;
+	contents.metadata.is_head = false;
+	contents.metadata.state = (extent_state_t)0;
+	rtree_write_range_impl(tsdn, rtree, rtree_ctx, base, end, contents,
+	    /* clearing */ true);
+}
+
 #endif /* JEMALLOC_INTERNAL_RTREE_H */
diff --git a/src/emap.c b/src/emap.c
index 0fe230ab..a1f402b8 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -241,6 +241,7 @@ emap_register_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
 	return false;
 }
 
+/* Invoked *after* emap_register_boundary. */
 void
 emap_register_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
     szind_t szind) {
@@ -249,6 +250,22 @@ emap_register_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
 	assert(edata_slab_get(edata));
 	assert(edata_state_get(edata) == extent_state_active);
 
+	if (config_debug) {
+		/* Making sure the boundary is registered already. */
+		rtree_leaf_elm_t *elm_a, *elm_b;
+		bool err = emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx,
+		    edata, /* dependent */ true, /* init_missing */ false,
+		    &elm_a, &elm_b);
+		assert(!err);
+		rtree_contents_t contents_a, contents_b;
+		contents_a = rtree_leaf_elm_read(tsdn, &emap->rtree, elm_a,
+		    /* dependent */ true);
+		contents_b = rtree_leaf_elm_read(tsdn, &emap->rtree, elm_b,
+		    /* dependent */ true);
+		assert(contents_a.edata == edata && contents_b.edata == edata);
+		assert(contents_a.metadata.slab && contents_b.metadata.slab);
+	}
+
 	rtree_contents_t contents;
 	contents.edata = edata;
 	contents.metadata.szind = szind;
@@ -256,12 +273,10 @@ emap_register_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
 	contents.metadata.state = extent_state_active;
 	contents.metadata.is_head = false; /* Not allowed to access. */
 
-	/* Register interior. */
-	for (size_t i = 1; i < (edata_size_get(edata) >> LG_PAGE) - 1; i++) {
-		rtree_write(tsdn, &emap->rtree, rtree_ctx,
-		    (uintptr_t)edata_base_get(edata) + (uintptr_t)(i <<
-		    LG_PAGE), contents);
-	}
+	assert(edata_size_get(edata) > (2 << LG_PAGE));
+	rtree_write_range(tsdn, &emap->rtree, rtree_ctx,
+	    (uintptr_t)edata_base_get(edata) + PAGE,
+	    (uintptr_t)edata_last_get(edata) - PAGE, contents);
 }
 
 void
@@ -289,10 +304,10 @@ emap_deregister_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	EMAP_DECLARE_RTREE_CTX;
 
 	assert(edata_slab_get(edata));
-	for (size_t i = 1; i < (edata_size_get(edata) >> LG_PAGE) - 1; i++) {
-		rtree_clear(tsdn, &emap->rtree, rtree_ctx,
-		    (uintptr_t)edata_base_get(edata) + (uintptr_t)(i <<
-		    LG_PAGE));
+	if (edata_size_get(edata) > (2 << LG_PAGE)) {
+		rtree_clear_range(tsdn, &emap->rtree, rtree_ctx,
+		    (uintptr_t)edata_base_get(edata) + PAGE,
+		    (uintptr_t)edata_last_get(edata) - PAGE);
 	}
 }
 
diff --git a/src/pa.c b/src/pa.c
index dd61aaa2..90809b35 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -120,7 +120,7 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 		emap_remap(tsdn, shard->emap, edata, szind, slab);
 		edata_szind_set(edata, szind);
 		edata_slab_set(edata, slab);
-		if (slab) {
+		if (slab && (size > 2 * PAGE)) {
 			emap_register_interior(tsdn, shard->emap, edata, szind);
 		}
 	}
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 9251652c..7b2a4e36 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -210,11 +210,75 @@ TEST_BEGIN(test_rtree_random) {
 }
 TEST_END
 
+static void
+test_rtree_range_write(tsdn_t *tsdn, rtree_t *rtree, uintptr_t start,
+    uintptr_t end) {
+	rtree_ctx_t rtree_ctx;
+	rtree_ctx_data_init(&rtree_ctx);
+
+	edata_t *edata_e = alloc_edata();
+	edata_init(edata_e, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
+	    extent_state_active, false, false, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
+	rtree_contents_t contents;
+	contents.edata = edata_e;
+	contents.metadata.szind = SC_NSIZES;
+	contents.metadata.slab = false;
+	contents.metadata.is_head = false;
+	contents.metadata.state = extent_state_active;
+
+	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, start,
+	    contents), "Unexpected rtree_write() failure");
+	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, end,
+	    contents), "Unexpected rtree_write() failure");
+
+	rtree_write_range(tsdn, rtree, &rtree_ctx, start, end, contents);
+	for (uintptr_t i = 0; i < ((end - start) >> LG_PAGE); i++) {
+		expect_ptr_eq(rtree_read(tsdn, rtree, &rtree_ctx,
+		    start + (i << LG_PAGE)).edata, edata_e,
+		    "rtree_edata_read() should return previously set value");
+	}
+	rtree_clear_range(tsdn, rtree, &rtree_ctx, start, end);
+	rtree_leaf_elm_t *elm;
+	for (uintptr_t i = 0; i < ((end - start) >> LG_PAGE); i++) {
+		elm = rtree_leaf_elm_lookup(tsdn, rtree, &rtree_ctx,
+		    start + (i << LG_PAGE), false, false);
+		expect_ptr_not_null(elm, "Should have been initialized.");
+		expect_ptr_null(rtree_leaf_elm_read(tsdn, rtree, elm,
+		    false).edata, "Should have been cleared.");
+	}
+}
+
+TEST_BEGIN(test_rtree_range) {
+	tsdn_t *tsdn = tsdn_fetch();
+	base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks);
+	expect_ptr_not_null(base, "Unexpected base_new failure");
+
+	rtree_t *rtree = &test_rtree;
+	expect_false(rtree_new(rtree, base, false),
+	    "Unexpected rtree_new() failure");
+
+	/* Not crossing rtree node boundary first. */
+	uintptr_t start = ZU(1) << rtree_leaf_maskbits();
+	uintptr_t end = start + (ZU(100) << LG_PAGE);
+	test_rtree_range_write(tsdn, rtree, start, end);
+
+	/* Crossing rtree node boundary. */
+	start = (ZU(1) << rtree_leaf_maskbits()) - (ZU(10) << LG_PAGE);
+	end = start + (ZU(100) << LG_PAGE);
+	assert_ptr_ne((void *)rtree_leafkey(start), (void *)rtree_leafkey(end),
+	    "The range should span across two rtree nodes");
+	test_rtree_range_write(tsdn, rtree, start, end);
+
+	base_delete(tsdn, base);
+}
+TEST_END
+
 int
 main(void) {
 	return test(
 	    test_rtree_read_empty,
 	    test_rtree_extrema,
 	    test_rtree_bits,
-	    test_rtree_random);
+	    test_rtree_random,
+	    test_rtree_range);
 }

From 3093d9455eb179d75ec8a17b1073ee605fb1f0a9 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 25 Mar 2021 15:32:44 -0700
Subject: [PATCH 2054/2608] Move the edata mergeability related functions to
 extent.h.

---
 include/jemalloc/internal/emap.h   | 20 --------
 include/jemalloc/internal/extent.h | 73 ++++++++++++++++++++++++++++++
 src/ehooks.c                       |  2 +-
 src/emap.c                         | 60 +-----------------------
 4 files changed, 75 insertions(+), 80 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 364aefac..5a5dbb6d 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -224,26 +224,6 @@ extent_assert_can_expand(const edata_t *original, const edata_t *expand) {
 	assert(edata_past_get(original) == edata_base_get(expand));
 }
 
-JEMALLOC_ALWAYS_INLINE bool
-edata_neighbor_head_state_mergeable(bool edata_is_head,
-    bool neighbor_is_head, bool forward) {
-	/*
-	 * Head states checking: disallow merging if the higher addr extent is a
-	 * head extent.  This helps preserve first-fit, and more importantly
-	 * makes sure no merge across arenas.
-	 */
-	if (forward) {
-		if (neighbor_is_head) {
-			return false;
-		}
-	} else {
-		if (edata_is_head) {
-			return false;
-		}
-	}
-	return true;
-}
-
 JEMALLOC_ALWAYS_INLINE edata_t *
 emap_edata_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr) {
 	EMAP_DECLARE_RTREE_CTX;
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 6a17ba60..b39e5ed5 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -51,4 +51,77 @@ bool extent_merge_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 size_t extent_sn_next(pac_t *pac);
 bool extent_boot(void);
 
+JEMALLOC_ALWAYS_INLINE bool
+extent_neighbor_head_state_mergeable(bool edata_is_head,
+    bool neighbor_is_head, bool forward) {
+	/*
+	 * Head states checking: disallow merging if the higher addr extent is a
+	 * head extent.  This helps preserve first-fit, and more importantly
+	 * makes sure no merge across arenas.
+	 */
+	if (forward) {
+		if (neighbor_is_head) {
+			return false;
+		}
+	} else {
+		if (edata_is_head) {
+			return false;
+		}
+	}
+	return true;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+extent_can_acquire_neighbor(edata_t *edata, rtree_contents_t contents,
+    extent_pai_t pai, extent_state_t expected_state, bool forward,
+    bool expanding) {
+	edata_t *neighbor = contents.edata;
+	if (neighbor == NULL) {
+		return false;
+	}
+	/* It's not safe to access *neighbor yet; must verify states first. */
+	bool neighbor_is_head = contents.metadata.is_head;
+	if (!extent_neighbor_head_state_mergeable(edata_is_head_get(edata),
+	    neighbor_is_head, forward)) {
+		return NULL;
+	}
+	extent_state_t neighbor_state = contents.metadata.state;
+	if (pai == EXTENT_PAI_PAC) {
+		if (neighbor_state != expected_state) {
+			return false;
+		}
+		/* From this point, it's safe to access *neighbor. */
+		if (!expanding && (edata_committed_get(edata) !=
+		    edata_committed_get(neighbor))) {
+			/*
+			 * Some platforms (e.g. Windows) require an explicit
+			 * commit step (and writing to uncomitted memory is not
+			 * allowed).
+			 */
+			return false;
+		}
+	} else {
+		if (neighbor_state == extent_state_active) {
+			return false;
+		}
+		/* From this point, it's safe to access *neighbor. */
+	}
+
+	assert(edata_pai_get(edata) == pai);
+	if (edata_pai_get(neighbor) != pai) {
+		return false;
+	}
+	if (opt_retain) {
+		assert(edata_arena_ind_get(edata) ==
+		    edata_arena_ind_get(neighbor));
+	} else {
+		if (edata_arena_ind_get(edata) !=
+		    edata_arena_ind_get(neighbor)) {
+			return false;
+		}
+	}
+
+	return true;
+}
+
 #endif /* JEMALLOC_INTERNAL_EXTENT_H */
diff --git a/src/ehooks.c b/src/ehooks.c
index ca3ca209..535066e7 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -218,7 +218,7 @@ ehooks_default_merge_impl(tsdn_t *tsdn, void *addr_a, void *addr_b) {
 		bool head_b = edata_is_head_get(b);
 		emap_assert_mapped(tsdn, &arena_emap_global, a);
 		emap_assert_mapped(tsdn, &arena_emap_global, b);
-		assert(edata_neighbor_head_state_mergeable(head_a, head_b,
+		assert(extent_neighbor_head_state_mergeable(head_a, head_b,
 		    /* forward */ true));
 	}
 	if (have_dss && !extent_dss_mergeable(addr_a, addr_b)) {
diff --git a/src/emap.c b/src/emap.c
index a1f402b8..1cc4fc81 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -48,64 +48,6 @@ emap_update_edata_state(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
 	emap_assert_mapped(tsdn, emap, edata);
 }
 
-static inline bool
-edata_can_acquire_neighbor(edata_t *edata, rtree_contents_t contents,
-    extent_pai_t pai, extent_state_t expected_state, bool forward,
-    bool expanding) {
-	edata_t *neighbor = contents.edata;
-	if (neighbor == NULL) {
-		return false;
-	}
-	/* It's not safe to access *neighbor yet; must verify states first. */
-	bool neighbor_is_head = contents.metadata.is_head;
-	if (!edata_neighbor_head_state_mergeable(edata_is_head_get(edata),
-	    neighbor_is_head, forward)) {
-		return NULL;
-	}
-	extent_state_t neighbor_state = contents.metadata.state;
-	if (pai == EXTENT_PAI_PAC) {
-		if (neighbor_state != expected_state) {
-			return false;
-		}
-		/* From this point, it's safe to access *neighbor. */
-		if (!expanding && (edata_committed_get(edata) !=
-		    edata_committed_get(neighbor))) {
-			/*
-			 * Some platforms (e.g. Windows) require an explicit
-			 * commit step (and writing to uncomitted memory is not
-			 * allowed).
-			 */
-			return false;
-		}
-	} else {
-		if (neighbor_state == extent_state_active) {
-			return false;
-		}
-		/* From this point, it's safe to access *neighbor. */
-	}
-
-	assert(edata_pai_get(edata) == pai);
-	if (edata_pai_get(neighbor) != pai) {
-		return false;
-	}
-	if (opt_retain) {
-		assert(edata_arena_ind_get(edata) ==
-		    edata_arena_ind_get(neighbor));
-	} else {
-		/*
-		 * This isn't entirely safe with the presence of arena_reset /
-		 * destroy, in which case the neighbor edata can be destoryed if
-		 * it belongs to a manual arena.  More on that later.
-		 */
-		if (edata_arena_ind_get(edata) !=
-		    edata_arena_ind_get(neighbor)) {
-			return false;
-		}
-	}
-
-	return true;
-}
-
 static inline edata_t *
 emap_try_acquire_edata_neighbor_impl(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
     extent_pai_t pai, extent_state_t expected_state, bool forward,
@@ -142,7 +84,7 @@ emap_try_acquire_edata_neighbor_impl(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
 
 	rtree_contents_t neighbor_contents = rtree_leaf_elm_read(tsdn,
 	    &emap->rtree, elm, /* dependent */ true);
-	if (!edata_can_acquire_neighbor(edata, neighbor_contents, pai,
+	if (!extent_can_acquire_neighbor(edata, neighbor_contents, pai,
 	    expected_state, forward, expanding)) {
 		return NULL;
 	}

From 03d95cba8868f99fa18683d1e82596467ed08c7e Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 19 Mar 2021 00:23:46 -0700
Subject: [PATCH 2055/2608] Remove the unnecessary arena_ind_set in
 base_alloc_edata().

All edata alloc sites are already followed with proper edata_init().
---
 src/base.c | 1 -
 src/hpa.c  | 1 -
 2 files changed, 2 deletions(-)

diff --git a/src/base.c b/src/base.c
index 9d4ce5c5..44878ad4 100644
--- a/src/base.c
+++ b/src/base.c
@@ -476,7 +476,6 @@ base_alloc_edata(tsdn_t *tsdn, base_t *base) {
 	if (edata == NULL) {
 		return NULL;
 	}
-	edata_arena_ind_set(edata, ehooks_ind_get(&base->ehooks));
 	edata_esn_set(edata, esn);
 	return edata;
 }
diff --git a/src/hpa.c b/src/hpa.c
index 7d4fa1bf..a234e6c5 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -454,7 +454,6 @@ hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 		*oom = true;
 		return NULL;
 	}
-	assert(edata_arena_ind_get(edata) == shard->ind);
 
 	hpdata_t *ps = psset_pick_alloc(&shard->psset, size);
 	if (ps == NULL) {

From 7dc77527ba1fa8a2764b975e9955a55cbb46d034 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 25 Mar 2021 17:44:18 -0700
Subject: [PATCH 2056/2608] Delete the mutex_pool module.

---
 Makefile.in                                   |  1 -
 include/jemalloc/internal/emap.h              |  3 -
 include/jemalloc/internal/mutex_pool.h        | 94 -------------------
 include/jemalloc/internal/witness.h           |  1 -
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |  1 -
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |  3 -
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |  1 -
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |  3 -
 src/emap.c                                    | 12 +--
 src/mutex_pool.c                              | 17 ----
 10 files changed, 1 insertion(+), 135 deletions(-)
 delete mode 100644 include/jemalloc/internal/mutex_pool.h
 delete mode 100644 src/mutex_pool.c

diff --git a/Makefile.in b/Makefile.in
index 11a553b0..c00ad0f3 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -128,7 +128,6 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/log.c \
 	$(srcroot)src/malloc_io.c \
 	$(srcroot)src/mutex.c \
-	$(srcroot)src/mutex_pool.c \
 	$(srcroot)src/nstime.c \
 	$(srcroot)src/pa.c \
 	$(srcroot)src/pa_extra.c \
diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 5a5dbb6d..a40b504b 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -2,7 +2,6 @@
 #define JEMALLOC_INTERNAL_EMAP_H
 
 #include "jemalloc/internal/base.h"
-#include "jemalloc/internal/mutex_pool.h"
 #include "jemalloc/internal/rtree.h"
 
 /*
@@ -17,8 +16,6 @@
 typedef struct emap_s emap_t;
 struct emap_s {
 	rtree_t rtree;
-	/* Keyed by the address of the edata_t being protected. */
-	mutex_pool_t mtx_pool;
 };
 
 /* Used to pass rtree lookup context down the path. */
diff --git a/include/jemalloc/internal/mutex_pool.h b/include/jemalloc/internal/mutex_pool.h
deleted file mode 100644
index 726cece9..00000000
--- a/include/jemalloc/internal/mutex_pool.h
+++ /dev/null
@@ -1,94 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_MUTEX_POOL_H
-#define JEMALLOC_INTERNAL_MUTEX_POOL_H
-
-#include "jemalloc/internal/hash.h"
-#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/witness.h"
-
-/* We do mod reductions by this value, so it should be kept a power of 2. */
-#define MUTEX_POOL_SIZE 256
-
-typedef struct mutex_pool_s mutex_pool_t;
-struct mutex_pool_s {
-	malloc_mutex_t mutexes[MUTEX_POOL_SIZE];
-};
-
-bool mutex_pool_init(mutex_pool_t *pool, const char *name, witness_rank_t rank);
-
-/* Internal helper - not meant to be called outside this module. */
-static inline malloc_mutex_t *
-mutex_pool_mutex(mutex_pool_t *pool, uintptr_t key) {
-	size_t hash_result[2];
-	hash(&key, sizeof(key), 0xd50dcc1b, hash_result);
-	return &pool->mutexes[hash_result[0] % MUTEX_POOL_SIZE];
-}
-
-static inline void
-mutex_pool_assert_not_held(tsdn_t *tsdn, mutex_pool_t *pool) {
-	for (int i = 0; i < MUTEX_POOL_SIZE; i++) {
-		malloc_mutex_assert_not_owner(tsdn, &pool->mutexes[i]);
-	}
-}
-
-/*
- * Note that a mutex pool doesn't work exactly the way an embdedded mutex would.
- * You're not allowed to acquire mutexes in the pool one at a time.  You have to
- * acquire all the mutexes you'll need in a single function call, and then
- * release them all in a single function call.
- */
-
-static inline void
-mutex_pool_lock(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key) {
-	mutex_pool_assert_not_held(tsdn, pool);
-
-	malloc_mutex_t *mutex = mutex_pool_mutex(pool, key);
-	malloc_mutex_lock(tsdn, mutex);
-}
-
-static inline void
-mutex_pool_unlock(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key) {
-	malloc_mutex_t *mutex = mutex_pool_mutex(pool, key);
-	malloc_mutex_unlock(tsdn, mutex);
-
-	mutex_pool_assert_not_held(tsdn, pool);
-}
-
-static inline void
-mutex_pool_lock2(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key1,
-    uintptr_t key2) {
-	mutex_pool_assert_not_held(tsdn, pool);
-
-	malloc_mutex_t *mutex1 = mutex_pool_mutex(pool, key1);
-	malloc_mutex_t *mutex2 = mutex_pool_mutex(pool, key2);
-	if ((uintptr_t)mutex1 < (uintptr_t)mutex2) {
-		malloc_mutex_lock(tsdn, mutex1);
-		malloc_mutex_lock(tsdn, mutex2);
-	} else if ((uintptr_t)mutex1 == (uintptr_t)mutex2) {
-		malloc_mutex_lock(tsdn, mutex1);
-	} else {
-		malloc_mutex_lock(tsdn, mutex2);
-		malloc_mutex_lock(tsdn, mutex1);
-	}
-}
-
-static inline void
-mutex_pool_unlock2(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key1,
-    uintptr_t key2) {
-	malloc_mutex_t *mutex1 = mutex_pool_mutex(pool, key1);
-	malloc_mutex_t *mutex2 = mutex_pool_mutex(pool, key2);
-	if (mutex1 == mutex2) {
-		malloc_mutex_unlock(tsdn, mutex1);
-	} else {
-		malloc_mutex_unlock(tsdn, mutex1);
-		malloc_mutex_unlock(tsdn, mutex2);
-	}
-
-	mutex_pool_assert_not_held(tsdn, pool);
-}
-
-static inline void
-mutex_pool_assert_owner(tsdn_t *tsdn, mutex_pool_t *pool, uintptr_t key) {
-	malloc_mutex_assert_owner(tsdn, mutex_pool_mutex(pool, key));
-}
-
-#endif /* JEMALLOC_INTERNAL_MUTEX_POOL_H */
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 4cebb6e1..0c29321c 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -57,7 +57,6 @@ enum witness_rank_e {
 
 	WITNESS_RANK_EDATA_CACHE,
 
-	WITNESS_RANK_EMAP,
 	WITNESS_RANK_RTREE,
 	WITNESS_RANK_BASE,
 	WITNESS_RANK_ARENA_LARGE,
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 9ec953a2..a66ca36a 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -69,7 +69,6 @@
     <ClCompile Include="..\..\..\..\src\log.c" />
     <ClCompile Include="..\..\..\..\src\malloc_io.c" />
     <ClCompile Include="..\..\..\..\src\mutex.c" />
-    <ClCompile Include="..\..\..\..\src\mutex_pool.c" />
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pa.c" />
     <ClCompile Include="..\..\..\..\src\pa_extra.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 210204a5..0c8e6c7c 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -91,9 +91,6 @@
     <ClCompile Include="..\..\..\..\src\mutex.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\mutex_pool.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\nstime.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 171b95f2..94fcd7bf 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -69,7 +69,6 @@
     <ClCompile Include="..\..\..\..\src\log.c" />
     <ClCompile Include="..\..\..\..\src\malloc_io.c" />
     <ClCompile Include="..\..\..\..\src\mutex.c" />
-    <ClCompile Include="..\..\..\..\src\mutex_pool.c" />
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pa.c" />
     <ClCompile Include="..\..\..\..\src\pa_extra.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 210204a5..0c8e6c7c 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -91,9 +91,6 @@
     <ClCompile Include="..\..\..\..\src\mutex.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\mutex_pool.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\nstime.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/emap.c b/src/emap.c
index 1cc4fc81..e37fea38 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -12,17 +12,7 @@ typedef enum emap_lock_result_e emap_lock_result_t;
 
 bool
 emap_init(emap_t *emap, base_t *base, bool zeroed) {
-	bool err;
-	err = rtree_new(&emap->rtree, base, zeroed);
-	if (err) {
-		return true;
-	}
-	err = mutex_pool_init(&emap->mtx_pool, "emap_mutex_pool",
-	    WITNESS_RANK_EMAP);
-	if (err) {
-		return true;
-	}
-	return false;
+	return rtree_new(&emap->rtree, base, zeroed);
 }
 
 void
diff --git a/src/mutex_pool.c b/src/mutex_pool.c
deleted file mode 100644
index d7861dcd..00000000
--- a/src/mutex_pool.c
+++ /dev/null
@@ -1,17 +0,0 @@
-
-#include "jemalloc/internal/jemalloc_preamble.h"
-#include "jemalloc/internal/jemalloc_internal_includes.h"
-
-#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/mutex_pool.h"
-
-bool
-mutex_pool_init(mutex_pool_t *pool, const char *name, witness_rank_t rank) {
-	for (int i = 0; i < MUTEX_POOL_SIZE; ++i) {
-		if (malloc_mutex_init(&pool->mutexes[i], name, rank,
-		    malloc_mutex_address_ordered)) {
-			return true;
-		}
-	}
-	return false;
-}

From ce68f326b0c6bc5f2ba126a9cc8afef3f8a70039 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 30 Mar 2021 16:09:37 -0700
Subject: [PATCH 2057/2608] Avoid the release & re-acquire of the ecache locks
 around the merge hook.

---
 src/extent.c | 72 +++++++++++++++++++++++++++-------------------------
 1 file changed, 37 insertions(+), 35 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index 1748d98b..c2b8790e 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -22,7 +22,7 @@ static bool extent_purge_forced_impl(tsdn_t *tsdn, ehooks_t *ehooks,
 static edata_t *extent_split_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata, size_t size_a, size_t size_b, bool growing_retained);
 static bool extent_merge_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *a, edata_t *b, bool growing_retained);
+    edata_t *a, edata_t *b, bool holding_core_locks);
 
 /* Used exclusively for gdump triggering. */
 static atomic_zu_t curpages;
@@ -39,9 +39,9 @@ static edata_t *extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *expand_edata, size_t usize, size_t alignment,
     bool zero, bool *commit, bool growing_retained);
 static edata_t *extent_try_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *edata, bool *coalesced, bool growing_retained);
+    ecache_t *ecache, edata_t *edata, bool *coalesced);
 static void extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *edata, bool growing_retained);
+    ecache_t *ecache, edata_t *edata);
 static edata_t *extent_alloc_retained(tsdn_t *tsdn, pac_t *pac,
     ehooks_t *ehooks, edata_t *expand_edata, size_t size, size_t alignment,
     bool zero, bool *commit);
@@ -68,7 +68,7 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 
 	bool coalesced;
 	edata = extent_try_coalesce(tsdn, pac, ehooks, ecache,
-	    edata, &coalesced, false);
+	    edata, &coalesced);
 	emap_update_edata_state(tsdn, pac->emap, edata, ecache->state);
 
 	if (!coalesced) {
@@ -136,7 +136,7 @@ ecache_dalloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 	edata_addr_set(edata, edata_base_get(edata));
 	edata_zeroed_set(edata, false);
 
-	extent_record(tsdn, pac, ehooks, ecache, edata, false);
+	extent_record(tsdn, pac, ehooks, ecache, edata);
 }
 
 edata_t *
@@ -574,8 +574,7 @@ extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 	if (*commit && !edata_committed_get(edata)) {
 		if (extent_commit_impl(tsdn, ehooks, edata, 0,
 		    edata_size_get(edata), growing_retained)) {
-			extent_record(tsdn, pac, ehooks, ecache, edata,
-			    growing_retained);
+			extent_record(tsdn, pac, ehooks, ecache, edata);
 			return NULL;
 		}
 	}
@@ -664,11 +663,11 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
 			extent_record(tsdn, pac, ehooks, &pac->ecache_retained,
-			    lead, true);
+			    lead);
 		}
 		if (trail != NULL) {
-			extent_record(tsdn, pac, ehooks,
-			    &pac->ecache_retained, trail, true);
+			extent_record(tsdn, pac, ehooks, &pac->ecache_retained,
+			    trail);
 		}
 	} else {
 		/*
@@ -681,7 +680,7 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 				extent_gdump_add(tsdn, to_salvage);
 			}
 			extent_record(tsdn, pac, ehooks, &pac->ecache_retained,
-			    to_salvage, true);
+			    to_salvage);
 		}
 		if (to_leak != NULL) {
 			extent_deregister_no_gdump_sub(tsdn, pac, to_leak);
@@ -695,7 +694,7 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		if (extent_commit_impl(tsdn, ehooks, edata, 0,
 		    edata_size_get(edata), true)) {
 			extent_record(tsdn, pac, ehooks,
-			    &pac->ecache_retained, edata, true);
+			    &pac->ecache_retained, edata);
 			goto label_err;
 		}
 		/* A successful commit should return zeroed memory. */
@@ -793,15 +792,13 @@ extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 
 static bool
 extent_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
-    edata_t *inner, edata_t *outer, bool forward, bool growing_retained) {
+    edata_t *inner, edata_t *outer, bool forward) {
 	extent_assert_can_coalesce(inner, outer);
 	eset_remove(&ecache->eset, outer);
 
-	malloc_mutex_unlock(tsdn, &ecache->mtx);
 	bool err = extent_merge_impl(tsdn, pac, ehooks,
-	    forward ? inner : outer, forward ? outer : inner, growing_retained);
-	malloc_mutex_lock(tsdn, &ecache->mtx);
-
+	    forward ? inner : outer, forward ? outer : inner,
+	    /* holding_core_locks */ true);
 	if (err) {
 		extent_deactivate_locked(tsdn, pac, ecache, outer);
 	}
@@ -811,8 +808,7 @@ extent_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 
 static edata_t *
 extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *edata, bool *coalesced, bool growing_retained,
-    bool inactive_only) {
+    ecache_t *ecache, edata_t *edata, bool *coalesced) {
 	/*
 	 * We avoid checking / locking inactive neighbors for large size
 	 * classes, since they are eagerly coalesced on deallocation which can
@@ -831,7 +827,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		    edata, EXTENT_PAI_PAC, ecache->state, /* forward */ true);
 		if (next != NULL) {
 			if (!extent_coalesce(tsdn, pac, ehooks, ecache, edata,
-			    next, true, growing_retained)) {
+			    next, true)) {
 				if (ecache->delay_coalesce) {
 					/* Do minimal coalescing. */
 					*coalesced = true;
@@ -846,7 +842,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		    edata, EXTENT_PAI_PAC, ecache->state, /* forward */ false);
 		if (prev != NULL) {
 			if (!extent_coalesce(tsdn, pac, ehooks, ecache, edata,
-			    prev, false, growing_retained)) {
+			    prev, false)) {
 				edata = prev;
 				if (ecache->delay_coalesce) {
 					/* Do minimal coalescing. */
@@ -866,16 +862,16 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 
 static edata_t *
 extent_try_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *edata, bool *coalesced, bool growing_retained) {
+    ecache_t *ecache, edata_t *edata, bool *coalesced) {
 	return extent_try_coalesce_impl(tsdn, pac, ehooks, ecache, edata,
-	    coalesced, growing_retained, false);
+	    coalesced);
 }
 
 static edata_t *
 extent_try_coalesce_large(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *edata, bool *coalesced, bool growing_retained) {
+    ecache_t *ecache, edata_t *edata, bool *coalesced) {
 	return extent_try_coalesce_impl(tsdn, pac, ehooks, ecache, edata,
-	    coalesced, growing_retained, true);
+	    coalesced);
 }
 
 /* Purge a single extent to retained / unmapped directly. */
@@ -906,7 +902,7 @@ extent_maximally_purge(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
  */
 static void
 extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *edata, bool growing_retained) {
+    ecache_t *ecache, edata_t *edata) {
 	assert((ecache->state != extent_state_dirty &&
 	    ecache->state != extent_state_muzzy) ||
 	    !edata_zeroed_get(edata));
@@ -917,7 +913,7 @@ extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 
 	if (!ecache->delay_coalesce) {
 		edata = extent_try_coalesce(tsdn, pac,  ehooks, ecache, edata,
-		    NULL, growing_retained);
+		    NULL);
 	} else if (edata_size_get(edata) >= SC_LARGE_MINCLASS) {
 		assert(ecache == &pac->ecache_dirty);
 		/* Always coalesce large extents eagerly. */
@@ -925,7 +921,7 @@ extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		do {
 			assert(edata_state_get(edata) == extent_state_active);
 			edata = extent_try_coalesce_large(tsdn, pac, ehooks,
-			    ecache, edata, &coalesced, growing_retained);
+			    ecache, edata, &coalesced);
 		} while (coalesced);
 		if (edata_size_get(edata) >=
 		    atomic_load_zu(&pac->oversize_threshold, ATOMIC_RELAXED)
@@ -1020,8 +1016,7 @@ extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		extent_gdump_sub(tsdn, edata);
 	}
 
-	extent_record(tsdn, pac, ehooks, &pac->ecache_retained, edata,
-	    false);
+	extent_record(tsdn, pac, ehooks, &pac->ecache_retained, edata);
 }
 
 void
@@ -1178,11 +1173,17 @@ extent_split_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata,
 
 static bool
 extent_merge_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *a,
-    edata_t *b, bool growing_retained) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
-	assert(edata_base_get(a) < edata_base_get(b));
+    edata_t *b, bool holding_core_locks) {
+	/* Only the expanding path may merge w/o holding ecache locks. */
+	if (holding_core_locks) {
+		witness_assert_positive_depth_to_rank(
+		    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE);
+	} else {
+		witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+		    WITNESS_RANK_CORE, 0);
+	}
 
+	assert(edata_base_get(a) < edata_base_get(b));
 	assert(edata_arena_ind_get(a) == edata_arena_ind_get(b));
 	assert(edata_arena_ind_get(a) == ehooks_ind_get(ehooks));
 	emap_assert_mapped(tsdn, pac->emap, a);
@@ -1222,7 +1223,8 @@ extent_merge_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *a,
 bool
 extent_merge_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *a, edata_t *b) {
-	return extent_merge_impl(tsdn, pac, ehooks, a, b, false);
+	return extent_merge_impl(tsdn, pac, ehooks, a, b,
+	    /* holding_core_locks */ false);
 }
 
 bool

From 9b523c6c15814e6662a1f659576996e047b7f965 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 30 Mar 2021 16:55:22 -0700
Subject: [PATCH 2058/2608] Refactor the locking in extent_recycle().

Hold the ecache lock across extent_recycle_extract() and extent_recycle_split(),
so that the extent_deactivate after split can avoid re-take the ecache mutex.
---
 src/extent.c | 65 ++++++++++++++++++++++++++++------------------------
 1 file changed, 35 insertions(+), 30 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index c2b8790e..04001142 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -20,7 +20,7 @@ static bool extent_purge_lazy_impl(tsdn_t *tsdn, ehooks_t *ehooks,
 static bool extent_purge_forced_impl(tsdn_t *tsdn, ehooks_t *ehooks,
     edata_t *edata, size_t offset, size_t length, bool growing_retained);
 static edata_t *extent_split_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *edata, size_t size_a, size_t size_b, bool growing_retained);
+    edata_t *edata, size_t size_a, size_t size_b, bool holding_core_locks);
 static bool extent_merge_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *a, edata_t *b, bool holding_core_locks);
 
@@ -229,6 +229,7 @@ extents_abandon_vm(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 static void
 extent_deactivate_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
     edata_t *edata) {
+	malloc_mutex_assert_owner(tsdn, &ecache->mtx);
 	assert(edata_arena_ind_get(edata) == ecache_ind_get(ecache));
 	assert(edata_state_get(edata) == extent_state_active);
 
@@ -236,13 +237,6 @@ extent_deactivate_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
 	eset_insert(&ecache->eset, edata);
 }
 
-static void
-extent_deactivate(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache, edata_t *edata) {
-	malloc_mutex_lock(tsdn, &ecache->mtx);
-	extent_deactivate_locked(tsdn, pac, ecache, edata);
-	malloc_mutex_unlock(tsdn, &ecache->mtx);
-}
-
 static void
 extent_activate_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
     edata_t *edata) {
@@ -356,10 +350,8 @@ extent_deregister_no_gdump_sub(tsdn_t *tsdn, pac_t *pac,
  */
 static edata_t *
 extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *expand_edata, size_t size, size_t alignment,
-    bool growing_retained) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+    ecache_t *ecache, edata_t *expand_edata, size_t size, size_t alignment) {
+	malloc_mutex_assert_owner(tsdn, &ecache->mtx);
 	assert(alignment > 0);
 	if (config_debug && expand_edata != NULL) {
 		/*
@@ -373,7 +365,6 @@ extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		assert(alignment <= PAGE);
 	}
 
-	malloc_mutex_lock(tsdn, &ecache->mtx);
 	edata_t *edata;
 	if (expand_edata != NULL) {
 		edata = emap_try_acquire_edata_neighbor_expand(tsdn, pac->emap,
@@ -407,12 +398,9 @@ extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		    lg_max_fit);
 	}
 	if (edata == NULL) {
-		malloc_mutex_unlock(tsdn, &ecache->mtx);
 		return NULL;
 	}
-
 	extent_activate_locked(tsdn, pac, ecache, edata);
-	malloc_mutex_unlock(tsdn, &ecache->mtx);
 
 	return edata;
 }
@@ -449,8 +437,7 @@ extent_split_interior(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t **edata, edata_t **lead, edata_t **trail,
     /* The mess to clean up, in case of error. */
     edata_t **to_leak, edata_t **to_salvage,
-    edata_t *expand_edata, size_t size, size_t alignment,
-    bool growing_retained) {
+    edata_t *expand_edata, size_t size, size_t alignment) {
 	size_t leadsize = ALIGNMENT_CEILING((uintptr_t)edata_base_get(*edata),
 	    PAGE_CEILING(alignment)) - (uintptr_t)edata_base_get(*edata);
 	assert(expand_edata == NULL || leadsize == 0);
@@ -468,7 +455,7 @@ extent_split_interior(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	if (leadsize != 0) {
 		*lead = *edata;
 		*edata = extent_split_impl(tsdn, pac, ehooks, *lead, leadsize,
-		    size + trailsize, growing_retained);
+		    size + trailsize, /* holding_core_locks*/ true);
 		if (*edata == NULL) {
 			*to_leak = *lead;
 			*lead = NULL;
@@ -479,7 +466,7 @@ extent_split_interior(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	/* Split the trail. */
 	if (trailsize != 0) {
 		*trail = extent_split_impl(tsdn, pac, ehooks, *edata, size,
-		    trailsize, growing_retained);
+		    trailsize, /* holding_core_locks */ true);
 		if (*trail == NULL) {
 			*to_leak = *edata;
 			*to_salvage = *lead;
@@ -502,6 +489,8 @@ static edata_t *
 extent_recycle_split(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *expand_edata, size_t size, size_t alignment,
     edata_t *edata, bool growing_retained) {
+	malloc_mutex_assert_owner(tsdn, &ecache->mtx);
+
 	edata_t *lead;
 	edata_t *trail;
 	edata_t *to_leak JEMALLOC_CC_SILENCE_INIT(NULL);
@@ -509,7 +498,7 @@ extent_recycle_split(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 
 	extent_split_interior_result_t result = extent_split_interior(
 	    tsdn, pac, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage,
-	    expand_edata, size, alignment, growing_retained);
+	    expand_edata, size, alignment);
 
 	if (!maps_coalesce && result != extent_split_interior_ok
 	    && !opt_retain) {
@@ -518,16 +507,16 @@ extent_recycle_split(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		 * leaking the extent.
 		 */
 		assert(to_leak != NULL && lead == NULL && trail == NULL);
-		extent_deactivate(tsdn, pac, ecache, to_leak);
+		extent_deactivate_locked(tsdn, pac, ecache, to_leak);
 		return NULL;
 	}
 
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
-			extent_deactivate(tsdn, pac, ecache, lead);
+			extent_deactivate_locked(tsdn, pac, ecache, lead);
 		}
 		if (trail != NULL) {
-			extent_deactivate(tsdn, pac, ecache, trail);
+			extent_deactivate_locked(tsdn, pac, ecache, trail);
 		}
 		return edata;
 	} else {
@@ -541,8 +530,14 @@ extent_recycle_split(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		}
 		if (to_leak != NULL) {
 			extent_deregister_no_gdump_sub(tsdn, pac, to_leak);
+			/*
+			 * May go down the purge path (which assume no ecache
+			 * locks).  Only happens with OOM caused split failures.
+			 */
+			malloc_mutex_unlock(tsdn, &ecache->mtx);
 			extents_abandon_vm(tsdn, pac, ehooks, ecache, to_leak,
 			    growing_retained);
+			malloc_mutex_lock(tsdn, &ecache->mtx);
 		}
 		return NULL;
 	}
@@ -559,14 +554,18 @@ extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
     bool *commit, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+
+	malloc_mutex_lock(tsdn, &ecache->mtx);
 	edata_t *edata = extent_recycle_extract(tsdn, pac, ehooks, ecache,
-	    expand_edata, size, alignment, growing_retained);
+	    expand_edata, size, alignment);
 	if (edata == NULL) {
+		malloc_mutex_unlock(tsdn, &ecache->mtx);
 		return NULL;
 	}
 
 	edata = extent_recycle_split(tsdn, pac, ehooks, ecache, expand_edata,
 	    size, alignment, edata, growing_retained);
+	malloc_mutex_unlock(tsdn, &ecache->mtx);
 	if (edata == NULL) {
 		return NULL;
 	}
@@ -658,7 +657,7 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 
 	extent_split_interior_result_t result = extent_split_interior(tsdn,
 	    pac, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage, NULL,
-	    size, alignment, /* growing_retained */ true);
+	    size, alignment);
 
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
@@ -1111,10 +1110,16 @@ extent_purge_forced_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
  */
 static edata_t *
 extent_split_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *edata, size_t size_a, size_t size_b, bool growing_retained) {
+    edata_t *edata, size_t size_a, size_t size_b, bool holding_core_locks) {
 	assert(edata_size_get(edata) == size_a + size_b);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+	/* Only the shrink path may split w/o holding core locks. */
+	if (holding_core_locks) {
+		witness_assert_positive_depth_to_rank(
+		    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE);
+	} else {
+		witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+		    WITNESS_RANK_CORE, 0);
+	}
 
 	if (ehooks_split_will_fail(ehooks)) {
 		return NULL;
@@ -1168,7 +1173,7 @@ edata_t *
 extent_split_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata,
     size_t size_a, size_t size_b) {
 	return extent_split_impl(tsdn, pac, ehooks, edata, size_a, size_b,
-	    /* growing_retained */ false);
+	    /* holding_core_locks */ false);
 }
 
 static bool

From 304cdbb132b607cc22ca16eb0e37e4c6d8ecd201 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 30 Mar 2021 14:55:28 -0700
Subject: [PATCH 2059/2608] Fix a prof_recent/prof_sys_thread_name interaction

When both of these are enabled, the output format changes slightly.  Teach the
unit test about the interaction.
---
 test/unit/prof_recent.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index e16a849a..9974d105 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -439,6 +439,18 @@ confirm_record(const char *template, const confirm_record_t *records,
 			}
 			ASSERT_CHAR(',');
 
+			if (opt_prof_sys_thread_name) {
+				ASSERT_FORMATTED_STR("\"%s_thread_name\"",
+				    *type);
+				ASSERT_CHAR(':');
+				ASSERT_CHAR('"');
+				while (*start != '"') {
+					++start;
+				}
+				ASSERT_CHAR('"');
+				ASSERT_CHAR(',');
+			}
+
 			ASSERT_FORMATTED_STR("\"%s_time\"", *type);
 			ASSERT_CHAR(':');
 			while (isdigit(*start)) {

From 12cd13cd418512d9e7596921ccdb62e25a103f87 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 30 Mar 2021 15:20:30 -0700
Subject: [PATCH 2060/2608] Fix thread.name/prof_sys_thread_name interaction

When prof_sys_thread_name is true, we don't allow setting the thread name.
Teach the unit test this.
---
 test/unit/prof_thread_name.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/test/unit/prof_thread_name.c b/test/unit/prof_thread_name.c
index 4a9d38a2..3c4614fc 100644
--- a/test/unit/prof_thread_name.c
+++ b/test/unit/prof_thread_name.c
@@ -22,7 +22,7 @@ mallctl_thread_name_set_impl(const char *thread_name, const char *func,
     int line) {
 	expect_d_eq(mallctl("thread.prof.name", NULL, NULL,
 	    (void *)&thread_name, sizeof(thread_name)), 0,
-	    "%s():%d: Unexpected mallctl failure reading thread.prof.name",
+	    "%s():%d: Unexpected mallctl failure writing thread.prof.name",
 	    func, line);
 	mallctl_thread_name_get_impl(thread_name, func, line);
 }
@@ -33,6 +33,7 @@ TEST_BEGIN(test_prof_thread_name_validation) {
 	const char *thread_name;
 
 	test_skip_if(!config_prof);
+	test_skip_if(opt_prof_sys_thread_name);
 
 	mallctl_thread_name_get("");
 	mallctl_thread_name_set("hi there");
@@ -94,12 +95,13 @@ thd_start(void *varg) {
 }
 
 TEST_BEGIN(test_prof_thread_name_threaded) {
+	test_skip_if(!config_prof);
+	test_skip_if(opt_prof_sys_thread_name);
+
 	thd_t thds[NTHREADS];
 	unsigned thd_args[NTHREADS];
 	unsigned i;
 
-	test_skip_if(!config_prof);
-
 	for (i = 0; i < NTHREADS; i++) {
 		thd_args[i] = i;
 		thd_create(&thds[i], thd_start, (void *)&thd_args[i]);

From 4f7cb3a413a966056a6c23eb996ba1d51d0517a3 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 3 May 2021 17:14:47 -0700
Subject: [PATCH 2061/2608] Sized deallocation: fix a typo.

dealloction -> deallocation.
---
 src/safety_check.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/safety_check.c b/src/safety_check.c
index 9747afef..552b3121 100644
--- a/src/safety_check.c
+++ b/src/safety_check.c
@@ -10,7 +10,7 @@ void safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr,
 
 	safety_check_fail("<jemalloc>: size mismatch detected (true size %zu "
 	    "vs input size %zu), likely caused by application sized "
-	    "dealloction bugs (source address: %p, %s). Suggest building with "
+	    "deallocation bugs (source address: %p, %s). Suggest building with "
 	    "--enable-debug or address sanitizer for debugging. Abort.\n",
 	    true_size, input_size, ptr, src);
 }

From 1f688490e176aafbc3e3529d3025df7fcbce725b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 5 May 2021 16:51:43 -0700
Subject: [PATCH 2062/2608] Stats: Fix a printing bug when hpa_dirty_mult = -1

Missed a layer of indirection.
---
 src/stats.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/stats.c b/src/stats.c
index 69cb2d3e..ef173034 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1476,8 +1476,9 @@ stats_general_print(emitter_t *emitter) {
 		 * representation.
 		 */
 		if (u32v == (uint32_t)-1) {
+			const char *neg1 = "-1";
 			emitter_kv(emitter, "hpa_dirty_mult",
-			    "opt.hpa_dirty_mult", emitter_type_string, "-1");
+			    "opt.hpa_dirty_mult", emitter_type_string, &neg1);
 		} else {
 			char buf[FXP_BUF_SIZE];
 			fxp_print(u32v, buf);

From aea91b8c338594daed753c94f33ff32d4b23fdc9 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 17 Feb 2021 16:23:24 -0800
Subject: [PATCH 2063/2608] Clean up some minor data structure inconsistencies

Namely, unify the include guard styling with the majority of the project, and do
flat_bitmap -> fb, to match its naming convention.
---
 Makefile.in                                       | 2 +-
 include/jemalloc/internal/eset.h                  | 2 +-
 include/jemalloc/internal/{flat_bitmap.h => fb.h} | 0
 include/jemalloc/internal/hpdata.h                | 2 +-
 include/jemalloc/internal/ph.h                    | 8 ++++----
 include/jemalloc/internal/rb.h                    | 8 ++++----
 src/hpa.c                                         | 2 +-
 src/psset.c                                       | 2 +-
 test/unit/{flat_bitmap.c => fb.c}                 | 2 +-
 9 files changed, 14 insertions(+), 14 deletions(-)
 rename include/jemalloc/internal/{flat_bitmap.h => fb.h} (100%)
 rename test/unit/{flat_bitmap.c => fb.c} (99%)

diff --git a/Makefile.in b/Makefile.in
index c00ad0f3..130fa1ee 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -215,7 +215,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/edata_cache.c \
 	$(srcroot)test/unit/emitter.c \
 	$(srcroot)test/unit/extent_quantize.c \
-	${srcroot}test/unit/flat_bitmap.c \
+	${srcroot}test/unit/fb.c \
 	$(srcroot)test/unit/fork.c \
 	${srcroot}test/unit/fxp.c \
 	$(srcroot)test/unit/hash.c \
diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index 7b53ecd8..ff5e57d1 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -2,7 +2,7 @@
 #define JEMALLOC_INTERNAL_ESET_H
 
 #include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/flat_bitmap.h"
+#include "jemalloc/internal/fb.h"
 #include "jemalloc/internal/edata.h"
 #include "jemalloc/internal/mutex.h"
 
diff --git a/include/jemalloc/internal/flat_bitmap.h b/include/jemalloc/internal/fb.h
similarity index 100%
rename from include/jemalloc/internal/flat_bitmap.h
rename to include/jemalloc/internal/fb.h
diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 245116b9..4ff2e575 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -1,7 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_HPDATA_H
 #define JEMALLOC_INTERNAL_HPDATA_H
 
-#include "jemalloc/internal/flat_bitmap.h"
+#include "jemalloc/internal/fb.h"
 #include "jemalloc/internal/ph.h"
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/typed_list.h"
diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index 84d6778a..63aeac91 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -1,3 +1,6 @@
+#ifndef JEMALLOC_INTERNAL_PH_H
+#define JEMALLOC_INTERNAL_PH_H
+
 /*
  * A Pairing Heap implementation.
  *
@@ -12,9 +15,6 @@
  *******************************************************************************
  */
 
-#ifndef PH_H_
-#define PH_H_
-
 /* Node structure. */
 #define phn(a_type)							\
 struct {								\
@@ -388,4 +388,4 @@ a_prefix##remove(a_ph_type *ph, a_type *phn) {				\
 	}								\
 }
 
-#endif /* PH_H_ */
+#endif /* JEMALLOC_INTERNAL_PH_H */
diff --git a/include/jemalloc/internal/rb.h b/include/jemalloc/internal/rb.h
index 47fa5ca9..dfc705aa 100644
--- a/include/jemalloc/internal/rb.h
+++ b/include/jemalloc/internal/rb.h
@@ -1,3 +1,6 @@
+#ifndef JEMALLOC_INTERNAL_RB_H
+#define JEMALLOC_INTERNAL_RB_H
+
 /*-
  *******************************************************************************
  *
@@ -19,9 +22,6 @@
  *******************************************************************************
  */
 
-#ifndef RB_H_
-#define RB_H_
-
 #ifndef __PGI
 #define RB_COMPACT
 #endif
@@ -1003,4 +1003,4 @@ a_prefix##destroy(a_rbt_type *rbtree, void (*cb)(a_type *, void *),	\
     rbtree->rbt_root = NULL;						\
 }
 
-#endif /* RB_H_ */
+#endif /* JEMALLOC_INTERNAL_RB_H */
diff --git a/src/hpa.c b/src/hpa.c
index a234e6c5..22cf0072 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -3,7 +3,7 @@
 
 #include "jemalloc/internal/hpa.h"
 
-#include "jemalloc/internal/flat_bitmap.h"
+#include "jemalloc/internal/fb.h"
 #include "jemalloc/internal/witness.h"
 
 #define HPA_EDEN_SIZE (128 * HUGEPAGE)
diff --git a/src/psset.c b/src/psset.c
index c4053efc..5978202a 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -3,7 +3,7 @@
 
 #include "jemalloc/internal/psset.h"
 
-#include "jemalloc/internal/flat_bitmap.h"
+#include "jemalloc/internal/fb.h"
 
 void
 psset_init(psset_t *psset) {
diff --git a/test/unit/flat_bitmap.c b/test/unit/fb.c
similarity index 99%
rename from test/unit/flat_bitmap.c
rename to test/unit/fb.c
index 6b0bcc34..d5126f6b 100644
--- a/test/unit/flat_bitmap.c
+++ b/test/unit/fb.c
@@ -1,6 +1,6 @@
 #include "test/jemalloc_test.h"
 
-#include "jemalloc/internal/flat_bitmap.h"
+#include "jemalloc/internal/fb.h"
 #include "test/nbits.h"
 
 static void

From b2c08ef2e62a72951488c1603113b2d3881bd9d6 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 4 Mar 2021 15:08:41 -0800
Subject: [PATCH 2064/2608] RB unit tests: don't test reentrantly.

The RB code doesn't do any allocation, and takes a little bit of time to run.
There's no sense in doing everything three times.
---
 test/unit/rb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/unit/rb.c b/test/unit/rb.c
index 2509a6dd..a594fb71 100644
--- a/test/unit/rb.c
+++ b/test/unit/rb.c
@@ -349,7 +349,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
+	return test_no_reentrancy(
 	    test_rb_empty,
 	    test_rb_random);
 }

From 5417938215384d9373d290ba30d5dcccc5db5c80 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 16 Mar 2021 18:08:04 -0700
Subject: [PATCH 2065/2608] Red-black tree: add summarize/filter.

This allows tracking extra information in the nodes of an red-black tree to
filter searches in the tree to just those that match some property.
---
 include/jemalloc/internal/rb.h | 912 +++++++++++++++++++++++++++++++--
 test/unit/rb.c                 | 740 ++++++++++++++++++++++++--
 2 files changed, 1583 insertions(+), 69 deletions(-)

diff --git a/include/jemalloc/internal/rb.h b/include/jemalloc/internal/rb.h
index dfc705aa..a9a51cb6 100644
--- a/include/jemalloc/internal/rb.h
+++ b/include/jemalloc/internal/rb.h
@@ -26,6 +26,15 @@
 #define RB_COMPACT
 #endif
 
+/*
+ * Each node in the RB tree consumes at least 1 byte of space (for the linkage
+ * if nothing else, so there are a maximum of sizeof(void *) << 3 rb tree nodes
+ * in any process (and thus, at most sizeof(void *) << 3 nodes in any rb tree).
+ * The choice of algorithm bounds the depth of a tree to twice the binary log of
+ * the number of elements in the tree; the following bound follows.
+ */
+#define RB_MAX_DEPTH (sizeof(void *) << 4)
+
 #ifdef RB_COMPACT
 /* Node structure. */
 #define rb_node(a_type)							\
@@ -159,12 +168,22 @@ struct {								\
     rbtn_right_set(a_type, a_field, (r_node), (a_node));		\
 } while (0)
 
+#define rb_summarized_only_false(...)
+#define rb_summarized_only_true(...) __VA_ARGS__
+#define rb_empty_summarize(a_node, a_lchild, a_rchild) false
+
 /*
- * The rb_proto() macro generates function prototypes that correspond to the
- * functions generated by an equivalently parameterized call to rb_gen().
+ * The rb_proto() and rb_summarized_proto() macros generate function prototypes
+ * that correspond to the functions generated by an equivalently parameterized
+ * call to rb_gen() or rb_summarized_gen(), respectively.
  */
 
 #define rb_proto(a_attr, a_prefix, a_rbt_type, a_type)			\
+    rb_proto_impl(a_attr, a_prefix, a_rbt_type, a_type, false)
+#define rb_summarized_proto(a_attr, a_prefix, a_rbt_type, a_type)	\
+    rb_proto_impl(a_attr, a_prefix, a_rbt_type, a_type, true)
+#define rb_proto_impl(a_attr, a_prefix, a_rbt_type, a_type,		\
+    a_is_summarized)							\
 a_attr void								\
 a_prefix##new(a_rbt_type *rbtree);					\
 a_attr bool								\
@@ -195,31 +214,94 @@ a_prefix##reverse_iter(a_rbt_type *rbtree, a_type *start,		\
   a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg);		\
 a_attr void								\
 a_prefix##destroy(a_rbt_type *rbtree, void (*cb)(a_type *, void *),	\
-  void *arg);
+  void *arg);								\
+/* Extended API */							\
+rb_summarized_only_##a_is_summarized(					\
+a_attr void								\
+a_prefix##update_summaries(a_rbt_type *rbtree, a_type *node);		\
+a_attr bool								\
+a_prefix##empty_filtered(a_rbt_type *rbtree,				\
+    bool (*filter_node)(void *, a_type *),				\
+    bool (*filter_subtree)(void *, a_type *),				\
+    void *filter_ctx);							\
+a_attr a_type *								\
+a_prefix##first_filtered(a_rbt_type *rbtree,				\
+    bool (*filter_node)(void *, a_type *),				\
+    bool (*filter_subtree)(void *, a_type *),				\
+    void *filter_ctx);							\
+a_attr a_type *								\
+a_prefix##last_filtered(a_rbt_type *rbtree,				\
+    bool (*filter_node)(void *, a_type *),				\
+    bool (*filter_subtree)(void *, a_type *),				\
+    void *filter_ctx);							\
+a_attr a_type *								\
+a_prefix##next_filtered(a_rbt_type *rbtree, a_type *node,		\
+    bool (*filter_node)(void *, a_type *),				\
+    bool (*filter_subtree)(void *, a_type *),				\
+    void *filter_ctx);							\
+a_attr a_type *								\
+a_prefix##prev_filtered(a_rbt_type *rbtree, a_type *node,		\
+    bool (*filter_node)(void *, a_type *),				\
+    bool (*filter_subtree)(void *, a_type *),				\
+    void *filter_ctx);							\
+a_attr a_type *								\
+a_prefix##search_filtered(a_rbt_type *rbtree, const a_type *key,	\
+    bool (*filter_node)(void *, a_type *),				\
+    bool (*filter_subtree)(void *, a_type *),				\
+    void *filter_ctx);							\
+a_attr a_type *								\
+a_prefix##nsearch_filtered(a_rbt_type *rbtree, const a_type *key,	\
+    bool (*filter_node)(void *, a_type *),				\
+    bool (*filter_subtree)(void *, a_type *),				\
+    void *filter_ctx);							\
+a_attr a_type *								\
+a_prefix##psearch_filtered(a_rbt_type *rbtree, const a_type *key,	\
+    bool (*filter_node)(void *, a_type *),				\
+    bool (*filter_subtree)(void *, a_type *),				\
+    void *filter_ctx);							\
+a_attr a_type *								\
+a_prefix##iter_filtered(a_rbt_type *rbtree, a_type *start,		\
+    a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg,		\
+    bool (*filter_node)(void *, a_type *),				\
+    bool (*filter_subtree)(void *, a_type *),				\
+    void *filter_ctx);							\
+a_attr a_type *								\
+a_prefix##reverse_iter_filtered(a_rbt_type *rbtree, a_type *start,	\
+  a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg,		\
+    bool (*filter_node)(void *, a_type *),				\
+    bool (*filter_subtree)(void *, a_type *),				\
+    void *filter_ctx);							\
+)
 
 /*
  * The rb_gen() macro generates a type-specific red-black tree implementation,
  * based on the above cpp macros.
- *
  * Arguments:
  *
- *   a_attr    : Function attribute for generated functions (ex: static).
- *   a_prefix  : Prefix for generated functions (ex: ex_).
- *   a_rb_type : Type for red-black tree data structure (ex: ex_t).
- *   a_type    : Type for red-black tree node data structure (ex: ex_node_t).
- *   a_field   : Name of red-black tree node linkage (ex: ex_link).
- *   a_cmp     : Node comparison function name, with the following prototype:
- *                 int (a_cmp *)(a_type *a_node, a_type *a_other);
- *                                       ^^^^^^
- *                                    or a_key
- *               Interpretation of comparison function return values:
- *                 -1 : a_node <  a_other
- *                  0 : a_node == a_other
- *                  1 : a_node >  a_other
- *               In all cases, the a_node or a_key macro argument is the first
- *               argument to the comparison function, which makes it possible
- *               to write comparison functions that treat the first argument
- *               specially.
+ *   a_attr:
+ *     Function attribute for generated functions (ex: static).
+ *   a_prefix:
+ *     Prefix for generated functions (ex: ex_).
+ *   a_rb_type:
+ *     Type for red-black tree data structure (ex: ex_t).
+ *   a_type:
+ *     Type for red-black tree node data structure (ex: ex_node_t).
+ *   a_field:
+ *     Name of red-black tree node linkage (ex: ex_link).
+ *   a_cmp:
+ *     Node comparison function name, with the following prototype:
+ *
+ *     int a_cmp(a_type *a_node, a_type *a_other);
+ *                        ^^^^^^
+ *                        or a_key
+ *     Interpretation of comparison function return values:
+ *       -1 : a_node <  a_other
+ *        0 : a_node == a_other
+ *        1 : a_node >  a_other
+ *     In all cases, the a_node or a_key macro argument is the first argument to
+ *     the comparison function, which makes it possible to write comparison
+ *     functions that treat the first argument specially.  a_cmp must be a total
+ *     order on values inserted into the tree -- duplicates are not allowed.
  *
  * Assuming the following setup:
  *
@@ -338,8 +420,193 @@ a_prefix##destroy(a_rbt_type *rbtree, void (*cb)(a_type *, void *),	\
  *               during iteration.  There is no way to stop iteration once it
  *               has begun.
  *         arg : Opaque pointer passed to cb().
+ *
+ * The rb_summarized_gen() macro generates all the functions above, but has an
+ * expanded interface.  In introduces the notion of summarizing subtrees, and of
+ * filtering searches in the tree according to the information contained in
+ * those summaries.
+ * The extra macro argument is:
+ *   a_summarize:
+ *     Tree summarization function name, with the following prototype:
+ *
+ *     bool a_summarize(a_type *a_node, const a_type *a_left_child,
+ *         const a_type *a_right_child);
+ *
+ *     This function should update a_node with the summary of the subtree rooted
+ *     there, using the data contained in it and the summaries in a_left_child
+ *     and a_right_child.  One or both of them may be NULL.  When the tree
+ *     changes due to an insertion or removal, it updates the summaries of all
+ *     nodes whose subtrees have changed (always updating the summaries of
+ *     children before their parents).  If the user alters a node in the tree in
+ *     a way that may change its summary, they can call the generated
+ *     update_summaries function to bubble up the summary changes to the root.
+ *     It should return true if the summary changed (or may have changed), and
+ *     false if it didn't (which will allow the implementation to terminate
+ *     "bubbling up" the summaries early).
+ *     As the parameter names indicate, the children are ordered as they are in
+ *     the tree, a_left_child, if it is not NULL, compares less than a_node,
+ *     which in turn compares less than a_right_child (if a_right_child is not
+ *     NULL).
+ *
+ * Using the same setup as above but replacing the macro with
+ *   rb_summarized_gen(static, ex_, ex_t, ex_node_t, ex_link, ex_cmp,
+ *       ex_summarize)
+ *
+ * Generates all the previous functions, but adds some more:
+ *
+ *   static void
+ *   ex_update_summaries(ex_t *tree, ex_node_t *node);
+ *       Description: Recompute all summaries of ancestors of node.
+ *       Args:
+ *         tree: Pointer to an initialized red-black tree object.
+ *         node: The element of the tree whose summary may have changed.
+ *
+ * For each of ex_empty, ex_first, ex_last, ex_next, ex_prev, ex_search,
+ * ex_nsearch, ex_psearch, ex_iter, and ex_reverse_iter, an additional function
+ * is generated as well, with the suffix _filtered (e.g. ex_empty_filtered,
+ * ex_first_filtered, etc.).  These use the concept of a "filter"; a binary
+ * property some node either satisfies or does not satisfy.  Clever use of the
+ * a_summary argument to rb_summarized_gen can allow efficient computation of
+ * these predicates across whole subtrees of the tree.
+ * The extended API functions accept three additional arguments after the
+ * arguments to the corresponding non-extended equivalent.
+ *
+ * ex_fn(..., bool (*filter_node)(void *, ex_node_t *),
+ *     bool (*filter_subtree)(void *, ex_node_t *), void *filter_ctx);
+ *         filter_node    : Returns true if the node passes the filter.
+ *         filter_subtree : Returns true if some node in the subtree rooted at
+ *                          node passes the filter.
+ *         filter_ctx     : A context argument passed to the filters.
+ *
+ * For a more concrete example of summarizing and filtering, suppose we're using
+ * the red-black tree to track a set of integers:
+ *
+ * struct ex_node_s {
+ *     rb_node(ex_node_t) ex_link;
+ *     unsigned data;
+ * };
+ *
+ * Suppose, for some application-specific reason, we want to be able to quickly
+ * find numbers in the set which are divisible by large powers of 2 (say, for
+ * aligned allocation purposes).  We augment the node with a summary field:
+ *
+ * struct ex_node_s {
+ *     rb_node(ex_node_t) ex_link;
+ *     unsigned data;
+ *     unsigned max_subtree_ffs;
+ * }
+ *
+ * and define our summarization function as follows:
+ *
+ * bool
+ * ex_summarize(ex_node_t *node, const ex_node_t *lchild,
+ *   const ex_node_t *rchild) {
+ *     unsigned new_max_subtree_ffs = ffs(node->data);
+ *     if (lchild != NULL && lchild->max_subtree_ffs > new_max_subtree_ffs) {
+ *         new_max_subtree_ffs = lchild->max_subtree_ffs;
+ *     }
+ *     if (rchild != NULL && rchild->max_subtree_ffs > new_max_subtree_ffs) {
+ *         new_max_subtree_ffs = rchild->max_subtree_ffs;
+ *     }
+ *     bool changed = (node->max_subtree_ffs != new_max_subtree_ffs)
+ *     node->max_subtree_ffs = new_max_subtree_ffs;
+ *     // This could be "return true" without any correctness or big-O
+ *     // performance changes; but practically, precisely reporting summary
+ *     // changes reduces the amount of work that has to be done when "bubbling
+ *     // up" summary changes.
+ *     return changed;
+ * }
+ *
+ * We can now implement our filter functions as follows:
+ * bool
+ * ex_filter_node(void *filter_ctx, ex_node_t *node) {
+ *     unsigned required_ffs = *(unsigned *)filter_ctx;
+ *     return ffs(node->data) >= required_ffs;
+ * }
+ * bool
+ * ex_filter_subtree(void *filter_ctx, ex_node_t *node) {
+ *     unsigned required_ffs = *(unsigned *)filter_ctx;
+ *     return node->max_subtree_ffs >= required_ffs;
+ * }
+ *
+ * We can now easily search for, e.g., the smallest integer in the set that's
+ * divisible by 128:
+ * ex_node_t *
+ * find_div_128(ex_tree_t *tree) {
+ *     unsigned min_ffs = 7;
+ *     return ex_first_filtered(tree, &ex_filter_node, &ex_filter_subtree,
+ *         &min_ffs);
+ * }
+ *
+ * We could with similar ease:
+ * - Fnd the next multiple of 128 in the set that's larger than 12345 (with
+ *   ex_nsearch_filtered)
+ * - Iterate over just those multiples of 64 that are in the set (with
+ *   ex_iter_filtered)
+ * - Determine if the set contains any multiples of 1024 (with
+ *   ex_empty_filtered).
+ *
+ * Some possibly subtle API notes:
+ * - The node argument to ex_next_filtered and ex_prev_filtered need not pass
+ *   the filter; it will find the next/prev node that passes the filter.
+ * - ex_search_filtered will fail even for a node in the tree, if that node does
+ *   not pass the filter.  ex_psearch_filtered and ex_nsearch_filtered behave
+ *   similarly; they may return a node larger/smaller than the key, even if a
+ *   node equivalent to the key is in the tree (but does not pass the filter).
+ * - Similarly, if the start argument to a filtered iteration function does not
+ *   pass the filter, the callback won't be invoked on it.
+ *
+ * These should make sense after a moment's reflection; each post-condition is
+ * the same as with the unfiltered version, with the added constraint that the
+ * returned node must pass the filter.
  */
 #define rb_gen(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp)	\
+    rb_gen_impl(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp,	\
+	rb_empty_summarize, false)
+#define rb_summarized_gen(a_attr, a_prefix, a_rbt_type, a_type,		\
+    a_field, a_cmp, a_summarize)					\
+    rb_gen_impl(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp,	\
+	a_summarize, true)
+
+#define rb_gen_impl(a_attr, a_prefix, a_rbt_type, a_type,		\
+    a_field, a_cmp, a_summarize, a_is_summarized)			\
+typedef struct {							\
+    a_type *node;							\
+    int cmp;								\
+} a_prefix##path_entry_t;						\
+static inline void							\
+a_prefix##summarize_range(a_prefix##path_entry_t *rfirst,		\
+    a_prefix##path_entry_t *rlast) {					\
+    while ((uintptr_t)rlast >= (uintptr_t)rfirst) {			\
+	a_type *node = rlast->node;					\
+	/* Avoid a warning when a_summarize is rb_empty_summarize. */	\
+	(void)node;							\
+	bool changed = a_summarize(node, rbtn_left_get(a_type, a_field,	\
+	    node), rbtn_right_get(a_type, a_field, node));		\
+	if (!changed) {							\
+		break;							\
+	}								\
+	rlast--;							\
+    }									\
+}									\
+/* On the remove pathways, we sometimes swap the node being removed   */\
+/* and its first successor; in such cases we need to do two range     */\
+/* updates; one from the node to its (former) swapped successor, the  */\
+/* next from that successor to the root (with either allowed to       */\
+/* bail out early if appropriate.                                     */\
+static inline void							\
+a_prefix##summarize_swapped_range(a_prefix##path_entry_t *rfirst,	\
+    a_prefix##path_entry_t *rlast, a_prefix##path_entry_t *swap_loc) {	\
+	if (swap_loc == NULL || rlast <= swap_loc) {			\
+		a_prefix##summarize_range(rfirst, rlast);		\
+	} else {							\
+		a_prefix##summarize_range(swap_loc + 1, rlast);		\
+		(void)a_summarize(swap_loc->node,			\
+		    rbtn_left_get(a_type, a_field, swap_loc->node),	\
+		    rbtn_right_get(a_type, a_field, swap_loc->node));	\
+		a_prefix##summarize_range(rfirst, swap_loc - 1);	\
+	}								\
+}									\
 a_attr void								\
 a_prefix##new(a_rbt_type *rbtree) {					\
     rb_new(a_type, a_field, rbtree);					\
@@ -465,10 +732,8 @@ a_prefix##psearch(a_rbt_type *rbtree, const a_type *key) {		\
 }									\
 a_attr void								\
 a_prefix##insert(a_rbt_type *rbtree, a_type *node) {			\
-    struct {								\
-	a_type *node;							\
-	int cmp;							\
-    } path[sizeof(void *) << 4], *pathp;				\
+    a_prefix##path_entry_t path[RB_MAX_DEPTH];			\
+    a_prefix##path_entry_t *pathp;					\
     rbt_node_new(a_type, a_field, rbtree, node);			\
     /* Wind. */								\
     path->node = rbtree->rbt_root;					\
@@ -484,6 +749,13 @@ a_prefix##insert(a_rbt_type *rbtree, a_type *node) {			\
 	}								\
     }									\
     pathp->node = node;							\
+    /* A loop invariant we maintain is that all nodes with            */\
+    /* out-of-date summaries live in path[0], path[1], ..., *pathp.   */\
+    /* To maintain this, we have to summarize node, since we          */\
+    /* decrement pathp before the first iteration.                    */\
+    assert(rbtn_left_get(a_type, a_field, node) == NULL);		\
+    assert(rbtn_right_get(a_type, a_field, node) == NULL);		\
+    (void)a_summarize(node, NULL, NULL);				\
     /* Unwind. */							\
     for (pathp--; (uintptr_t)pathp >= (uintptr_t)path; pathp--) {	\
 	a_type *cnode = pathp->node;					\
@@ -498,9 +770,13 @@ a_prefix##insert(a_rbt_type *rbtree, a_type *node) {			\
 		    a_type *tnode;					\
 		    rbtn_black_set(a_type, a_field, leftleft);		\
 		    rbtn_rotate_right(a_type, a_field, cnode, tnode);	\
+		    (void)a_summarize(cnode,				\
+			rbtn_left_get(a_type, a_field, cnode),		\
+			rbtn_right_get(a_type, a_field, cnode));	\
 		    cnode = tnode;					\
 		}							\
 	    } else {							\
+		a_prefix##summarize_range(path, pathp);			\
 		return;							\
 	    }								\
 	} else {							\
@@ -521,13 +797,20 @@ a_prefix##insert(a_rbt_type *rbtree, a_type *node) {			\
 		    rbtn_rotate_left(a_type, a_field, cnode, tnode);	\
 		    rbtn_color_set(a_type, a_field, tnode, tred);	\
 		    rbtn_red_set(a_type, a_field, cnode);		\
+		    (void)a_summarize(cnode,				\
+			rbtn_left_get(a_type, a_field, cnode),		\
+			rbtn_right_get(a_type, a_field, cnode));	\
 		    cnode = tnode;					\
 		}							\
 	    } else {							\
+		a_prefix##summarize_range(path, pathp);			\
 		return;							\
 	    }								\
 	}								\
 	pathp->node = cnode;						\
+	(void)a_summarize(cnode,					\
+	    rbtn_left_get(a_type, a_field, cnode),			\
+	    rbtn_right_get(a_type, a_field, cnode));			\
     }									\
     /* Set root, and make it black. */					\
     rbtree->rbt_root = path->node;					\
@@ -535,12 +818,18 @@ a_prefix##insert(a_rbt_type *rbtree, a_type *node) {			\
 }									\
 a_attr void								\
 a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
-    struct {								\
-	a_type *node;							\
-	int cmp;							\
-    } *pathp, *nodep, path[sizeof(void *) << 4];			\
+    a_prefix##path_entry_t path[RB_MAX_DEPTH];				\
+    a_prefix##path_entry_t *pathp;					\
+    a_prefix##path_entry_t *nodep;					\
+    a_prefix##path_entry_t *swap_loc;					\
+    /* This is a "real" sentinel -- NULL means we didn't swap the     */\
+    /* node to be pruned with one of its successors, and so           */\
+    /* summarization can terminate early whenever some summary        */\
+    /* doesn't change.                                                */\
+    swap_loc = NULL;							\
+    /* This is just to silence a compiler warning. */			\
+    nodep = NULL;							\
     /* Wind. */								\
-    nodep = NULL; /* Silence compiler warning. */			\
     path->node = rbtree->rbt_root;					\
     for (pathp = path; pathp->node != NULL; pathp++) {			\
 	int cmp = pathp->cmp = a_cmp(node, pathp->node);		\
@@ -567,6 +856,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
     pathp--;								\
     if (pathp->node != node) {						\
 	/* Swap node with its successor. */				\
+	swap_loc = nodep;						\
 	bool tred = rbtn_red_get(a_type, a_field, pathp->node);		\
 	rbtn_color_set(a_type, a_field, pathp->node,			\
 	  rbtn_red_get(a_type, a_field, node));				\
@@ -604,6 +894,9 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 	    rbtn_black_set(a_type, a_field, left);			\
 	    if (pathp == path) {					\
 		rbtree->rbt_root = left;				\
+		/* Nothing to summarize -- the subtree rooted at the  */\
+		/* node's left child hasn't changed, and it's now the */\
+		/* root.					      */\
 	    } else {							\
 		if (pathp[-1].cmp < 0) {				\
 		    rbtn_left_set(a_type, a_field, pathp[-1].node,	\
@@ -612,6 +905,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 		    rbtn_right_set(a_type, a_field, pathp[-1].node,	\
 		      left);						\
 		}							\
+		a_prefix##summarize_swapped_range(path, &pathp[-1],	\
+		    swap_loc);						\
 	    }								\
 	    return;							\
 	} else if (pathp == path) {					\
@@ -620,10 +915,15 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 	    return;							\
 	}								\
     }									\
+    /* We've now established the invariant that the node has no right */\
+    /* child (well, morally; we didn't bother nulling it out if we    */\
+    /* swapped it with its successor), and that the only nodes with   */\
+    /* out-of-date summaries live in path[0], path[1], ..., pathp[-1].*/\
     if (rbtn_red_get(a_type, a_field, pathp->node)) {			\
 	/* Prune red node, which requires no fixup. */			\
 	assert(pathp[-1].cmp < 0);					\
 	rbtn_left_set(a_type, a_field, pathp[-1].node, NULL);		\
+	a_prefix##summarize_swapped_range(path, &pathp[-1], swap_loc);	\
 	return;								\
     }									\
     /* The node to be pruned is black, so unwind until balance is     */\
@@ -657,6 +957,12 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 		    rbtn_right_set(a_type, a_field, pathp->node, tnode);\
 		    rbtn_rotate_left(a_type, a_field, pathp->node,	\
 		      tnode);						\
+		    (void)a_summarize(pathp->node,			\
+			rbtn_left_get(a_type, a_field, pathp->node),	\
+			rbtn_right_get(a_type, a_field, pathp->node));	\
+		    (void)a_summarize(right,				\
+			rbtn_left_get(a_type, a_field, right),		\
+			rbtn_right_get(a_type, a_field, right));	\
 		} else {						\
 		    /*      ||                                        */\
 		    /*    pathp(r)                                    */\
@@ -667,7 +973,12 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 		    /*                                                */\
 		    rbtn_rotate_left(a_type, a_field, pathp->node,	\
 		      tnode);						\
+		    (void)a_summarize(pathp->node,			\
+			rbtn_left_get(a_type, a_field, pathp->node),	\
+			rbtn_right_get(a_type, a_field, pathp->node));	\
 		}							\
+		(void)a_summarize(tnode, rbtn_left_get(a_type, a_field,	\
+		    tnode), rbtn_right_get(a_type, a_field, tnode));	\
 		/* Balance restored, but rotation modified subtree    */\
 		/* root.                                              */\
 		assert((uintptr_t)pathp > (uintptr_t)path);		\
@@ -678,6 +989,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 		    rbtn_right_set(a_type, a_field, pathp[-1].node,	\
 		      tnode);						\
 		}							\
+		a_prefix##summarize_swapped_range(path, &pathp[-1],	\
+		    swap_loc);						\
 		return;							\
 	    } else {							\
 		a_type *right = rbtn_right_get(a_type, a_field,		\
@@ -698,6 +1011,15 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 		    rbtn_right_set(a_type, a_field, pathp->node, tnode);\
 		    rbtn_rotate_left(a_type, a_field, pathp->node,	\
 		      tnode);						\
+		    (void)a_summarize(pathp->node,			\
+			rbtn_left_get(a_type, a_field, pathp->node),	\
+			rbtn_right_get(a_type, a_field, pathp->node));	\
+		    (void)a_summarize(right,				\
+			rbtn_left_get(a_type, a_field, right),		\
+			rbtn_right_get(a_type, a_field, right));	\
+		    (void)a_summarize(tnode,				\
+			rbtn_left_get(a_type, a_field, tnode),		\
+			rbtn_right_get(a_type, a_field, tnode));	\
 		    /* Balance restored, but rotation modified        */\
 		    /* subtree root, which may actually be the tree   */\
 		    /* root.                                          */\
@@ -712,6 +1034,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 			    rbtn_right_set(a_type, a_field,		\
 			      pathp[-1].node, tnode);			\
 			}						\
+			a_prefix##summarize_swapped_range(path,		\
+			    &pathp[-1], swap_loc);			\
 		    }							\
 		    return;						\
 		} else {						\
@@ -725,6 +1049,12 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 		    rbtn_red_set(a_type, a_field, pathp->node);		\
 		    rbtn_rotate_left(a_type, a_field, pathp->node,	\
 		      tnode);						\
+		    (void)a_summarize(pathp->node,			\
+			rbtn_left_get(a_type, a_field, pathp->node),	\
+			rbtn_right_get(a_type, a_field, pathp->node));	\
+		    (void)a_summarize(tnode,				\
+			rbtn_left_get(a_type, a_field, tnode),		\
+			rbtn_right_get(a_type, a_field, tnode));	\
 		    pathp->node = tnode;				\
 		}							\
 	    }								\
@@ -757,6 +1087,12 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 		      tnode);						\
 		    rbtn_right_set(a_type, a_field, unode, tnode);	\
 		    rbtn_rotate_left(a_type, a_field, unode, tnode);	\
+		    (void)a_summarize(pathp->node,			\
+			rbtn_left_get(a_type, a_field, pathp->node),	\
+			rbtn_right_get(a_type, a_field, pathp->node));	\
+		    (void)a_summarize(unode,				\
+			rbtn_left_get(a_type, a_field, unode),		\
+			rbtn_right_get(a_type, a_field, unode));	\
 		} else {						\
 		    /*      ||                                        */\
 		    /*    pathp(b)                                    */\
@@ -771,7 +1107,13 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 		    rbtn_rotate_right(a_type, a_field, pathp->node,	\
 		      tnode);						\
 		    rbtn_black_set(a_type, a_field, tnode);		\
+		    (void)a_summarize(pathp->node,			\
+			rbtn_left_get(a_type, a_field, pathp->node),	\
+			rbtn_right_get(a_type, a_field, pathp->node));	\
 		}							\
+		(void)a_summarize(tnode,				\
+		    rbtn_left_get(a_type, a_field, tnode),		\
+		    rbtn_right_get(a_type, a_field, tnode));		\
 		/* Balance restored, but rotation modified subtree    */\
 		/* root, which may actually be the tree root.         */\
 		if (pathp == path) {					\
@@ -785,6 +1127,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 			rbtn_right_set(a_type, a_field, pathp[-1].node,	\
 			  tnode);					\
 		    }							\
+		    a_prefix##summarize_swapped_range(path, &pathp[-1],	\
+			swap_loc);					\
 		}							\
 		return;							\
 	    } else if (rbtn_red_get(a_type, a_field, pathp->node)) {	\
@@ -803,6 +1147,12 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 		    rbtn_black_set(a_type, a_field, leftleft);		\
 		    rbtn_rotate_right(a_type, a_field, pathp->node,	\
 		      tnode);						\
+		    (void)a_summarize(pathp->node,			\
+			rbtn_left_get(a_type, a_field, pathp->node),	\
+			rbtn_right_get(a_type, a_field, pathp->node));	\
+		    (void)a_summarize(tnode,				\
+			rbtn_left_get(a_type, a_field, tnode),		\
+			rbtn_right_get(a_type, a_field, tnode));	\
 		    /* Balance restored, but rotation modified        */\
 		    /* subtree root.                                  */\
 		    assert((uintptr_t)pathp > (uintptr_t)path);		\
@@ -813,6 +1163,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 			rbtn_right_set(a_type, a_field, pathp[-1].node,	\
 			  tnode);					\
 		    }							\
+		    a_prefix##summarize_swapped_range(path, &pathp[-1],	\
+			swap_loc);					\
 		    return;						\
 		} else {						\
 		    /*        ||                                      */\
@@ -824,6 +1176,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 		    rbtn_red_set(a_type, a_field, left);		\
 		    rbtn_black_set(a_type, a_field, pathp->node);	\
 		    /* Balance restored. */				\
+		    a_prefix##summarize_swapped_range(path, pathp,	\
+			swap_loc);					\
 		    return;						\
 		}							\
 	    } else {							\
@@ -840,6 +1194,12 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 		    rbtn_black_set(a_type, a_field, leftleft);		\
 		    rbtn_rotate_right(a_type, a_field, pathp->node,	\
 		      tnode);						\
+		    (void)a_summarize(pathp->node,			\
+			rbtn_left_get(a_type, a_field, pathp->node),	\
+			rbtn_right_get(a_type, a_field, pathp->node));	\
+		    (void)a_summarize(tnode,				\
+			rbtn_left_get(a_type, a_field, tnode),		\
+			rbtn_right_get(a_type, a_field, tnode));	\
 		    /* Balance restored, but rotation modified        */\
 		    /* subtree root, which may actually be the tree   */\
 		    /* root.                                          */\
@@ -854,6 +1214,8 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 			    rbtn_right_set(a_type, a_field,		\
 			      pathp[-1].node, tnode);			\
 			}						\
+		        a_prefix##summarize_swapped_range(path,		\
+			    &pathp[-1], swap_loc);			\
 		    }							\
 		    return;						\
 		} else {						\
@@ -864,6 +1226,9 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 		    /*          /                                     */\
 		    /*        (b)                                     */\
 		    rbtn_red_set(a_type, a_field, left);		\
+		    (void)a_summarize(pathp->node,			\
+			rbtn_left_get(a_type, a_field, pathp->node),	\
+			rbtn_right_get(a_type, a_field, pathp->node));	\
 		}							\
 	    }								\
 	}								\
@@ -1001,6 +1366,491 @@ a_prefix##destroy(a_rbt_type *rbtree, void (*cb)(a_type *, void *),	\
   void *arg) {								\
     a_prefix##destroy_recurse(rbtree, rbtree->rbt_root, cb, arg);	\
     rbtree->rbt_root = NULL;						\
-}
+}									\
+/* BEGIN SUMMARIZED-ONLY IMPLEMENTATION */				\
+rb_summarized_only_##a_is_summarized(					\
+static inline a_prefix##path_entry_t *					\
+a_prefix##wind(a_rbt_type *rbtree,					\
+    a_prefix##path_entry_t path[RB_MAX_DEPTH], a_type *node) {		\
+    a_prefix##path_entry_t *pathp;					\
+    path->node = rbtree->rbt_root;					\
+    for (pathp = path; ; pathp++) {					\
+	assert((size_t)(pathp - path) < RB_MAX_DEPTH);			\
+	pathp->cmp = a_cmp(node, pathp->node);				\
+	if (pathp->cmp < 0) {						\
+	    pathp[1].node = rbtn_left_get(a_type, a_field,		\
+		pathp->node);						\
+	} else if (pathp->cmp == 0) {					\
+	    return pathp;						\
+	} else {							\
+	    pathp[1].node = rbtn_right_get(a_type, a_field,		\
+		pathp->node);						\
+	}								\
+    }									\
+    unreachable();							\
+}									\
+a_attr void								\
+a_prefix##update_summaries(a_rbt_type *rbtree, a_type *node) {		\
+    a_prefix##path_entry_t path[RB_MAX_DEPTH];				\
+    a_prefix##path_entry_t *pathp = a_prefix##wind(rbtree, path, node);	\
+    a_prefix##summarize_range(path, pathp);				\
+}									\
+a_attr bool								\
+a_prefix##empty_filtered(a_rbt_type *rbtree,				\
+  bool (*filter_node)(void *, a_type *),				\
+  bool (*filter_subtree)(void *, a_type *),				\
+  void *filter_ctx) {							\
+    a_type *node = rbtree->rbt_root;					\
+    return node == NULL || !filter_subtree(filter_ctx, node);		\
+}									\
+static inline a_type *							\
+a_prefix##first_filtered_from_node(a_type *node,			\
+  bool (*filter_node)(void *, a_type *),				\
+  bool (*filter_subtree)(void *, a_type *),				\
+  void *filter_ctx) {							\
+    assert(node != NULL && filter_subtree(filter_ctx, node));		\
+    while (true) {							\
+	a_type *left = rbtn_left_get(a_type, a_field, node);		\
+	a_type *right = rbtn_right_get(a_type, a_field, node);		\
+	if (left != NULL && filter_subtree(filter_ctx, left)) {		\
+	    node = left;						\
+	} else if (filter_node(filter_ctx, node)) {			\
+	    return node;						\
+	} else {							\
+		assert(right != NULL					\
+		    && filter_subtree(filter_ctx, right));		\
+		node = right;						\
+	}								\
+    }									\
+    unreachable();							\
+}									\
+a_attr a_type *								\
+a_prefix##first_filtered(a_rbt_type *rbtree,				\
+  bool (*filter_node)(void *, a_type *),				\
+  bool (*filter_subtree)(void *, a_type *),				\
+  void *filter_ctx) {							\
+    a_type *node = rbtree->rbt_root;					\
+    if (node == NULL || !filter_subtree(filter_ctx, node)) {		\
+	return NULL;							\
+    }									\
+    return a_prefix##first_filtered_from_node(node, filter_node,	\
+	filter_subtree, filter_ctx);					\
+}									\
+static inline a_type *							\
+a_prefix##last_filtered_from_node(a_type *node,				\
+  bool (*filter_node)(void *, a_type *),				\
+  bool (*filter_subtree)(void *, a_type *),				\
+  void *filter_ctx) {							\
+    assert(node != NULL && filter_subtree(filter_ctx, node));		\
+    while (true) {							\
+	a_type *left = rbtn_left_get(a_type, a_field, node);		\
+	a_type *right = rbtn_right_get(a_type, a_field, node);		\
+	if (right != NULL && filter_subtree(filter_ctx, right)) {	\
+	    node = right;						\
+	} else if (filter_node(filter_ctx, node)) {			\
+	    return node;						\
+	} else {							\
+		assert(left != NULL					\
+		    && filter_subtree(filter_ctx, left));		\
+		node = left;						\
+	}								\
+    }									\
+    unreachable();							\
+}									\
+a_attr a_type *								\
+a_prefix##last_filtered(a_rbt_type *rbtree,				\
+  bool (*filter_node)(void *, a_type *),				\
+  bool (*filter_subtree)(void *, a_type *),				\
+  void *filter_ctx) {							\
+    a_type *node = rbtree->rbt_root;					\
+    if (node == NULL || !filter_subtree(filter_ctx, node)) {		\
+	return NULL;							\
+    }									\
+    return a_prefix##last_filtered_from_node(node, filter_node,		\
+	filter_subtree, filter_ctx);					\
+}									\
+/* Internal implementation function.  Search for a node comparing     */\
+/* equal to key matching the filter.  If such a node is in the tree,  */\
+/* return it.  Additionally, the caller has the option to ask for     */\
+/* bounds on the next / prev node in the tree passing the filter.     */\
+/* If nextbound is true, then this function will do one of the        */\
+/* following:                                                         */\
+/* - Fill in *nextbound_node with the smallest node in the tree       */\
+/*   greater than key passing the filter, and NULL-out                */\
+/*   *nextbound_subtree.                                              */\
+/* - Fill in *nextbound_subtree with a parent of that node which is   */\
+/*   not a parent of the searched-for node, and NULL-out              */\
+/*   *nextbound_node.                                                 */\
+/* - NULL-out both *nextbound_node and *nextbound_subtree, in which   */\
+/*   case no node greater than key but passing the filter is in the   */\
+/*   tree.                                                            */\
+/* The prevbound case is similar.  If the caller knows that key is in */\
+/* the tree and that the subtree rooted at key does not contain a     */\
+/* node satisfying the bound being searched for, then they can pass   */\
+/* false for include_subtree, in which case we won't bother searching */\
+/* there (risking a cache miss).                                      */\
+/*                                                                    */\
+/* This API is unfortunately complex; but the logic for filtered      */\
+/* searches is very subtle, and otherwise we would have to repeat it  */\
+/* multiple times for filtered search, nsearch, psearch, next, and    */\
+/* prev.                                                              */\
+static inline a_type *							\
+a_prefix##search_with_filter_bounds(a_rbt_type *rbtree,			\
+  const a_type *key,							\
+  bool (*filter_node)(void *, a_type *),				\
+  bool (*filter_subtree)(void *, a_type *),				\
+  void *filter_ctx,							\
+  bool include_subtree,							\
+  bool nextbound, a_type **nextbound_node, a_type **nextbound_subtree,	\
+  bool prevbound, a_type **prevbound_node, a_type **prevbound_subtree) {\
+    if (nextbound) {							\
+	    *nextbound_node = NULL;					\
+	    *nextbound_subtree = NULL;					\
+    }									\
+    if (prevbound) {							\
+	    *prevbound_node = NULL;					\
+	    *prevbound_subtree = NULL;					\
+    }									\
+    a_type *tnode = rbtree->rbt_root;					\
+    while (tnode != NULL && filter_subtree(filter_ctx, tnode)) {	\
+	int cmp = a_cmp(key, tnode);					\
+	a_type *tleft = rbtn_left_get(a_type, a_field, tnode);		\
+	a_type *tright = rbtn_right_get(a_type, a_field, tnode);	\
+	if (cmp < 0) {							\
+	    if (nextbound) {						\
+		if (filter_node(filter_ctx, tnode)) {			\
+		    *nextbound_node = tnode;				\
+		    *nextbound_subtree = NULL;				\
+		} else if (tright != NULL && filter_subtree(		\
+		    filter_ctx, tright)) {				\
+		    *nextbound_node = NULL;				\
+		    *nextbound_subtree = tright;			\
+		}							\
+	    }								\
+	    tnode = tleft;						\
+	} else if (cmp > 0) {						\
+	    if (prevbound) {						\
+		if (filter_node(filter_ctx, tnode)) {			\
+		    *prevbound_node = tnode;				\
+		    *prevbound_subtree = NULL;				\
+		} else if (tleft != NULL && filter_subtree(		\
+		    filter_ctx, tleft)) {				\
+		    *prevbound_node = NULL;				\
+		    *prevbound_subtree = tleft;				\
+		}							\
+	    }								\
+	    tnode = tright;						\
+	} else {							\
+	    if (filter_node(filter_ctx, tnode)) {			\
+		return tnode;						\
+	    }								\
+	    if (include_subtree) {					\
+		if (prevbound && tleft != NULL && filter_subtree(	\
+		    filter_ctx, tleft)) {				\
+		    *prevbound_node = NULL;				\
+		    *prevbound_subtree = tleft;				\
+		}							\
+		if (nextbound && tright != NULL && filter_subtree(	\
+		    filter_ctx, tright)) {				\
+		    *nextbound_node = NULL;				\
+		    *nextbound_subtree = tright;			\
+		}							\
+	    }								\
+	    return NULL;						\
+	}								\
+    }									\
+    return NULL;							\
+}									\
+a_attr a_type *								\
+a_prefix##next_filtered(a_rbt_type *rbtree, a_type *node,		\
+  bool (*filter_node)(void *, a_type *),				\
+  bool (*filter_subtree)(void *, a_type *),				\
+  void *filter_ctx) {							\
+    a_type *nright = rbtn_right_get(a_type, a_field, node);		\
+    if (nright != NULL && filter_subtree(filter_ctx, nright)) {		\
+	return a_prefix##first_filtered_from_node(nright, filter_node,	\
+	    filter_subtree, filter_ctx);				\
+    }									\
+    a_type *node_candidate;						\
+    a_type *subtree_candidate;						\
+    a_type *search_result = a_prefix##search_with_filter_bounds(	\
+	rbtree, node, filter_node, filter_subtree, filter_ctx,		\
+	/* include_subtree */ false,					\
+	/* nextbound */ true, &node_candidate, &subtree_candidate,	\
+	/* prevbound */ false, NULL, NULL);				\
+    assert(node == search_result					\
+	|| !filter_node(filter_ctx, node));				\
+    if (node_candidate != NULL) {					\
+	return node_candidate;						\
+    }									\
+    if (subtree_candidate != NULL) {					\
+	return a_prefix##first_filtered_from_node(			\
+	    subtree_candidate, filter_node, filter_subtree,		\
+	    filter_ctx);						\
+    }									\
+    return NULL;							\
+}									\
+a_attr a_type *								\
+a_prefix##prev_filtered(a_rbt_type *rbtree, a_type *node,		\
+  bool (*filter_node)(void *, a_type *),				\
+  bool (*filter_subtree)(void *, a_type *),				\
+  void *filter_ctx) {							\
+    a_type *nleft = rbtn_left_get(a_type, a_field, node);		\
+    if (nleft != NULL && filter_subtree(filter_ctx, nleft)) {		\
+	return a_prefix##last_filtered_from_node(nleft, filter_node,	\
+	    filter_subtree, filter_ctx);				\
+    }									\
+    a_type *node_candidate;						\
+    a_type *subtree_candidate;						\
+    a_type *search_result = a_prefix##search_with_filter_bounds(	\
+	rbtree, node, filter_node, filter_subtree, filter_ctx,		\
+	/* include_subtree */ false,					\
+	/* nextbound */ false, NULL, NULL,				\
+	/* prevbound */ true, &node_candidate, &subtree_candidate);	\
+    assert(node == search_result					\
+	|| !filter_node(filter_ctx, node));				\
+    if (node_candidate != NULL) {					\
+	return node_candidate;						\
+    }									\
+    if (subtree_candidate != NULL) {					\
+	return a_prefix##last_filtered_from_node(			\
+	    subtree_candidate, filter_node, filter_subtree,		\
+	    filter_ctx);						\
+    }									\
+    return NULL;							\
+}									\
+a_attr a_type *								\
+a_prefix##search_filtered(a_rbt_type *rbtree, const a_type *key,	\
+  bool (*filter_node)(void *, a_type *),				\
+  bool (*filter_subtree)(void *, a_type *),				\
+  void *filter_ctx) {							\
+    a_type *result = a_prefix##search_with_filter_bounds(rbtree, key,	\
+	filter_node, filter_subtree, filter_ctx,			\
+	/* include_subtree */ false,					\
+	/* nextbound */ false, NULL, NULL,				\
+	/* prevbound */ false, NULL, NULL);				\
+    return result;							\
+}									\
+a_attr a_type *								\
+a_prefix##nsearch_filtered(a_rbt_type *rbtree, const a_type *key,	\
+  bool (*filter_node)(void *, a_type *),				\
+  bool (*filter_subtree)(void *, a_type *),				\
+  void *filter_ctx) {							\
+    a_type *node_candidate;						\
+    a_type *subtree_candidate;						\
+    a_type *result = a_prefix##search_with_filter_bounds(rbtree, key,	\
+	filter_node, filter_subtree, filter_ctx,			\
+	/* include_subtree */ true,					\
+	/* nextbound */ true, &node_candidate, &subtree_candidate,	\
+	/* prevbound */ false, NULL, NULL);				\
+    if (result != NULL) {						\
+	return result;							\
+    }									\
+    if (node_candidate != NULL) {					\
+	return node_candidate;						\
+    }									\
+    if (subtree_candidate != NULL) {					\
+	return a_prefix##first_filtered_from_node(			\
+	    subtree_candidate, filter_node, filter_subtree,		\
+	    filter_ctx);						\
+    }									\
+    return NULL;							\
+}									\
+a_attr a_type *								\
+a_prefix##psearch_filtered(a_rbt_type *rbtree, const a_type *key,	\
+  bool (*filter_node)(void *, a_type *),				\
+  bool (*filter_subtree)(void *, a_type *),				\
+  void *filter_ctx) {							\
+    a_type *node_candidate;						\
+    a_type *subtree_candidate;						\
+    a_type *result = a_prefix##search_with_filter_bounds(rbtree, key,	\
+	filter_node, filter_subtree, filter_ctx,			\
+	/* include_subtree */ true,					\
+	/* nextbound */ false, NULL, NULL,				\
+	/* prevbound */ true, &node_candidate, &subtree_candidate);	\
+    if (result != NULL) {						\
+	return result;							\
+    }									\
+    if (node_candidate != NULL) {					\
+	return node_candidate;						\
+    }									\
+    if (subtree_candidate != NULL) {					\
+	return a_prefix##last_filtered_from_node(			\
+	    subtree_candidate, filter_node, filter_subtree,		\
+	    filter_ctx);						\
+    }									\
+    return NULL;							\
+}									\
+a_attr a_type *								\
+a_prefix##iter_recurse_filtered(a_rbt_type *rbtree, a_type *node,	\
+  a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg,		\
+  bool (*filter_node)(void *, a_type *),				\
+  bool (*filter_subtree)(void *, a_type *),				\
+  void *filter_ctx) {							\
+    if (node == NULL || !filter_subtree(filter_ctx, node)) {		\
+	return NULL;							\
+    }									\
+    a_type *ret;							\
+    a_type *left = rbtn_left_get(a_type, a_field, node);		\
+    a_type *right = rbtn_right_get(a_type, a_field, node);		\
+    ret = a_prefix##iter_recurse_filtered(rbtree, left, cb, arg,	\
+      filter_node, filter_subtree, filter_ctx);				\
+    if (ret != NULL) {							\
+	return ret;							\
+    }									\
+    if (filter_node(filter_ctx, node)) {				\
+	ret = cb(rbtree, node, arg);					\
+    }									\
+    if (ret != NULL) {							\
+	return ret;							\
+    }									\
+    return a_prefix##iter_recurse_filtered(rbtree, right, cb, arg,	\
+      filter_node, filter_subtree, filter_ctx);				\
+}									\
+a_attr a_type *								\
+a_prefix##iter_start_filtered(a_rbt_type *rbtree, a_type *start,	\
+  a_type *node, a_type *(*cb)(a_rbt_type *, a_type *, void *),		\
+  void *arg, bool (*filter_node)(void *, a_type *),			\
+  bool (*filter_subtree)(void *, a_type *),				\
+  void *filter_ctx) {							\
+    if (!filter_subtree(filter_ctx, node)) {				\
+	return NULL;							\
+    }									\
+    int cmp = a_cmp(start, node);					\
+    a_type *ret;							\
+    a_type *left = rbtn_left_get(a_type, a_field, node);		\
+    a_type *right = rbtn_right_get(a_type, a_field, node);		\
+    if (cmp < 0) {							\
+	ret = a_prefix##iter_start_filtered(rbtree, start, left, cb,	\
+	    arg, filter_node, filter_subtree, filter_ctx);		\
+	if (ret != NULL) {						\
+	    return ret;							\
+	}								\
+	if (filter_node(filter_ctx, node)) {				\
+	    ret = cb(rbtree, node, arg);				\
+	    if (ret != NULL) {						\
+		return ret;						\
+	    }								\
+	}								\
+	return a_prefix##iter_recurse_filtered(rbtree, right, cb, arg,	\
+	    filter_node, filter_subtree, filter_ctx);			\
+    } else if (cmp > 0) {						\
+	return a_prefix##iter_start_filtered(rbtree, start, right,	\
+	  cb, arg, filter_node, filter_subtree, filter_ctx);		\
+    } else {								\
+	if (filter_node(filter_ctx, node)) {				\
+	    ret = cb(rbtree, node, arg);				\
+	    if (ret != NULL) {						\
+		return ret;						\
+	    }								\
+	}								\
+	return a_prefix##iter_recurse_filtered(rbtree, right, cb, arg,	\
+	  filter_node, filter_subtree, filter_ctx);			\
+    }									\
+}									\
+a_attr a_type *								\
+a_prefix##iter_filtered(a_rbt_type *rbtree, a_type *start,		\
+  a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg,		\
+  bool (*filter_node)(void *, a_type *),				\
+  bool (*filter_subtree)(void *, a_type *),				\
+  void *filter_ctx) {							\
+    a_type *ret;							\
+    if (start != NULL) {						\
+	ret = a_prefix##iter_start_filtered(rbtree, start,		\
+	    rbtree->rbt_root, cb, arg, filter_node, filter_subtree,	\
+	    filter_ctx);						\
+    } else {								\
+	ret = a_prefix##iter_recurse_filtered(rbtree, rbtree->rbt_root,	\
+	    cb, arg, filter_node, filter_subtree, filter_ctx);		\
+    }									\
+    return ret;								\
+}									\
+a_attr a_type *								\
+a_prefix##reverse_iter_recurse_filtered(a_rbt_type *rbtree,		\
+  a_type *node, a_type *(*cb)(a_rbt_type *, a_type *, void *),		\
+  void *arg,								\
+  bool (*filter_node)(void *, a_type *),				\
+  bool (*filter_subtree)(void *, a_type *),				\
+  void *filter_ctx) {							\
+    if (node == NULL || !filter_subtree(filter_ctx, node)) {		\
+	return NULL;							\
+    }									\
+    a_type *ret;							\
+    a_type *left = rbtn_left_get(a_type, a_field, node);		\
+    a_type *right = rbtn_right_get(a_type, a_field, node);		\
+    ret = a_prefix##reverse_iter_recurse_filtered(rbtree, right, cb,	\
+	arg, filter_node, filter_subtree, filter_ctx);			\
+    if (ret != NULL) {							\
+	return ret;							\
+    }									\
+    if (filter_node(filter_ctx, node)) {				\
+	ret = cb(rbtree, node, arg);					\
+    }									\
+    if (ret != NULL) {							\
+	return ret;							\
+    }									\
+    return a_prefix##reverse_iter_recurse_filtered(rbtree, left, cb,	\
+      arg, filter_node, filter_subtree, filter_ctx);			\
+}									\
+a_attr a_type *								\
+a_prefix##reverse_iter_start_filtered(a_rbt_type *rbtree, a_type *start,\
+  a_type *node, a_type *(*cb)(a_rbt_type *, a_type *, void *),		\
+  void *arg, bool (*filter_node)(void *, a_type *),			\
+  bool (*filter_subtree)(void *, a_type *),				\
+  void *filter_ctx) {							\
+    if (!filter_subtree(filter_ctx, node)) {				\
+	return NULL;							\
+    }									\
+    int cmp = a_cmp(start, node);					\
+    a_type *ret;							\
+    a_type *left = rbtn_left_get(a_type, a_field, node);		\
+    a_type *right = rbtn_right_get(a_type, a_field, node);		\
+    if (cmp > 0) {							\
+	ret = a_prefix##reverse_iter_start_filtered(rbtree, start,	\
+	    right, cb, arg, filter_node, filter_subtree, filter_ctx);	\
+	if (ret != NULL) {						\
+	    return ret;							\
+	}								\
+	if (filter_node(filter_ctx, node)) {				\
+	    ret = cb(rbtree, node, arg);				\
+	    if (ret != NULL) {						\
+		return ret;						\
+	    }								\
+	}								\
+	return a_prefix##reverse_iter_recurse_filtered(rbtree, left, cb,\
+	    arg, filter_node, filter_subtree, filter_ctx);		\
+    } else if (cmp < 0) {						\
+	return a_prefix##reverse_iter_start_filtered(rbtree, start,	\
+	  left, cb, arg, filter_node, filter_subtree, filter_ctx);	\
+    } else {								\
+	if (filter_node(filter_ctx, node)) {				\
+	    ret = cb(rbtree, node, arg);				\
+	    if (ret != NULL) {						\
+		return ret;						\
+	    }								\
+	}								\
+	return a_prefix##reverse_iter_recurse_filtered(rbtree, left, cb,\
+	  arg, filter_node, filter_subtree, filter_ctx);		\
+    }									\
+}									\
+a_attr a_type *								\
+a_prefix##reverse_iter_filtered(a_rbt_type *rbtree, a_type *start,	\
+  a_type *(*cb)(a_rbt_type *, a_type *, void *), void *arg,		\
+  bool (*filter_node)(void *, a_type *),				\
+  bool (*filter_subtree)(void *, a_type *),				\
+  void *filter_ctx) {							\
+    a_type *ret;							\
+    if (start != NULL) {						\
+	ret = a_prefix##reverse_iter_start_filtered(rbtree, start,	\
+	    rbtree->rbt_root, cb, arg, filter_node, filter_subtree,	\
+	    filter_ctx);						\
+    } else {								\
+	ret = a_prefix##reverse_iter_recurse_filtered(rbtree,		\
+	    rbtree->rbt_root, cb, arg, filter_node, filter_subtree,	\
+	    filter_ctx);						\
+    }									\
+    return ret;								\
+}									\
+) /* end rb_summarized_only */
 
 #endif /* JEMALLOC_INTERNAL_RB_H */
diff --git a/test/unit/rb.c b/test/unit/rb.c
index a594fb71..7d4c454d 100644
--- a/test/unit/rb.c
+++ b/test/unit/rb.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#include <stdlib.h>
+
 #include "jemalloc/internal/rb.h"
 
 #define rbtn_black_height(a_type, a_field, a_rbt, r_height) do {	\
@@ -13,13 +15,47 @@
 	}								\
 } while (0)
 
-typedef struct node_s node_t;
+static bool summarize_always_returns_true = false;
 
+typedef struct node_s node_t;
 struct node_s {
 #define NODE_MAGIC 0x9823af7e
 	uint32_t magic;
 	rb_node(node_t) link;
+	/* Order used by nodes. */
 	uint64_t key;
+	/*
+	 * Our made-up summary property is "specialness", with summarization
+	 * taking the max.
+	 */
+	uint64_t specialness;
+
+	/*
+	 * Used by some of the test randomization to avoid double-removing
+	 * nodes.
+	 */
+	bool mid_remove;
+
+	/*
+	 * To test searching functionality, we want to temporarily weaken the
+	 * ordering to allow non-equal nodes that nevertheless compare equal.
+	 */
+	bool allow_duplicates;
+
+	/*
+	 * In check_consistency, it's handy to know a node's rank in the tree;
+	 * this tracks it (but only there; not all tests use this).
+	 */
+	int rank;
+	int filtered_rank;
+
+	/*
+	 * Replicate the internal structure of the tree, to make sure the
+	 * implementation doesn't miss any updates.
+	 */
+	const node_t *summary_lchild;
+	const node_t *summary_rchild;
+	uint64_t summary_max_specialness;
 };
 
 static int
@@ -30,10 +66,12 @@ node_cmp(const node_t *a, const node_t *b) {
 	expect_u32_eq(b->magic, NODE_MAGIC, "Bad magic");
 
 	ret = (a->key > b->key) - (a->key < b->key);
-	if (ret == 0) {
+	if (ret == 0 && !a->allow_duplicates) {
 		/*
 		 * Duplicates are not allowed in the tree, so force an
-		 * arbitrary ordering for non-identical items with equal keys.
+		 * arbitrary ordering for non-identical items with equal keys,
+		 * unless the user is searching and wants to allow the
+		 * duplicate.
 		 */
 		ret = (((uintptr_t)a) > ((uintptr_t)b))
 		    - (((uintptr_t)a) < ((uintptr_t)b));
@@ -41,8 +79,77 @@ node_cmp(const node_t *a, const node_t *b) {
 	return ret;
 }
 
+static uint64_t
+node_subtree_specialness(node_t *n, const node_t *lchild,
+    const node_t *rchild) {
+	uint64_t subtree_specialness = n->specialness;
+	if (lchild != NULL
+	    && lchild->summary_max_specialness > subtree_specialness) {
+		subtree_specialness = lchild->summary_max_specialness;
+	}
+	if (rchild != NULL
+	    && rchild->summary_max_specialness > subtree_specialness) {
+		subtree_specialness = rchild->summary_max_specialness;
+	}
+	return subtree_specialness;
+}
+
+static bool
+node_summarize(node_t *a, const node_t *lchild, const node_t *rchild) {
+	uint64_t new_summary_max_specialness = node_subtree_specialness(
+	    a, lchild, rchild);
+	bool changed = (a->summary_lchild != lchild)
+	    || (a->summary_rchild != rchild)
+	    || (new_summary_max_specialness != a->summary_max_specialness);
+	a->summary_max_specialness = new_summary_max_specialness;
+	a->summary_lchild = lchild;
+	a->summary_rchild = rchild;
+	return changed || summarize_always_returns_true;
+}
+
 typedef rb_tree(node_t) tree_t;
-rb_gen(static, tree_, tree_t, node_t, link, node_cmp);
+rb_summarized_proto(static, tree_, tree_t, node_t);
+rb_summarized_gen(static, tree_, tree_t, node_t, link, node_cmp,
+    node_summarize);
+
+static bool
+specialness_filter_node(void *ctx, node_t *node) {
+	uint64_t specialness = *(uint64_t *)ctx;
+	return node->specialness >= specialness;
+}
+
+static bool
+specialness_filter_subtree(void *ctx, node_t *node) {
+	uint64_t specialness = *(uint64_t *)ctx;
+	return node->summary_max_specialness >= specialness;
+}
+
+static node_t *
+tree_iterate_cb(tree_t *tree, node_t *node, void *data) {
+	unsigned *i = (unsigned *)data;
+	node_t *search_node;
+
+	expect_u32_eq(node->magic, NODE_MAGIC, "Bad magic");
+
+	/* Test rb_search(). */
+	search_node = tree_search(tree, node);
+	expect_ptr_eq(search_node, node,
+	    "tree_search() returned unexpected node");
+
+	/* Test rb_nsearch(). */
+	search_node = tree_nsearch(tree, node);
+	expect_ptr_eq(search_node, node,
+	    "tree_nsearch() returned unexpected node");
+
+	/* Test rb_psearch(). */
+	search_node = tree_psearch(tree, node);
+	expect_ptr_eq(search_node, node,
+	    "tree_psearch() returned unexpected node");
+
+	(*i)++;
+
+	return NULL;
+}
 
 TEST_BEGIN(test_rb_empty) {
 	tree_t tree;
@@ -65,6 +172,32 @@ TEST_BEGIN(test_rb_empty) {
 	key.key = 0;
 	key.magic = NODE_MAGIC;
 	expect_ptr_null(tree_psearch(&tree, &key), "Unexpected node");
+
+	unsigned nodes = 0;
+	tree_iter_filtered(&tree, NULL, &tree_iterate_cb,
+	    &nodes, &specialness_filter_node, &specialness_filter_subtree,
+	    NULL);
+	expect_u_eq(0, nodes, "");
+
+	nodes = 0;
+	tree_reverse_iter_filtered(&tree, NULL, &tree_iterate_cb,
+	    &nodes, &specialness_filter_node, &specialness_filter_subtree,
+	    NULL);
+	expect_u_eq(0, nodes, "");
+
+	expect_ptr_null(tree_first_filtered(&tree, &specialness_filter_node,
+	    &specialness_filter_subtree, NULL), "");
+	expect_ptr_null(tree_last_filtered(&tree, &specialness_filter_node,
+	    &specialness_filter_subtree, NULL), "");
+
+	key.key = 0;
+	key.magic = NODE_MAGIC;
+	expect_ptr_null(tree_search_filtered(&tree, &key,
+	    &specialness_filter_node, &specialness_filter_subtree, NULL), "");
+	expect_ptr_null(tree_nsearch_filtered(&tree, &key,
+	    &specialness_filter_node, &specialness_filter_subtree, NULL), "");
+	expect_ptr_null(tree_psearch_filtered(&tree, &key,
+	    &specialness_filter_node, &specialness_filter_subtree, NULL), "");
 }
 TEST_END
 
@@ -81,6 +214,16 @@ tree_recurse(node_t *node, unsigned black_height, unsigned black_depth) {
 	left_node = rbtn_left_get(node_t, link, node);
 	right_node = rbtn_right_get(node_t, link, node);
 
+	expect_ptr_eq(left_node, node->summary_lchild,
+	    "summary missed a tree update");
+	expect_ptr_eq(right_node, node->summary_rchild,
+	    "summary missed a tree update");
+
+	uint64_t expected_subtree_specialness = node_subtree_specialness(node,
+	    left_node, right_node);
+	expect_u64_eq(expected_subtree_specialness,
+	    node->summary_max_specialness, "Incorrect summary");
+
 	if (!rbtn_red_get(node_t, link, node)) {
 		black_depth++;
 	}
@@ -117,33 +260,6 @@ tree_recurse(node_t *node, unsigned black_height, unsigned black_depth) {
 	return ret;
 }
 
-static node_t *
-tree_iterate_cb(tree_t *tree, node_t *node, void *data) {
-	unsigned *i = (unsigned *)data;
-	node_t *search_node;
-
-	expect_u32_eq(node->magic, NODE_MAGIC, "Bad magic");
-
-	/* Test rb_search(). */
-	search_node = tree_search(tree, node);
-	expect_ptr_eq(search_node, node,
-	    "tree_search() returned unexpected node");
-
-	/* Test rb_nsearch(). */
-	search_node = tree_nsearch(tree, node);
-	expect_ptr_eq(search_node, node,
-	    "tree_nsearch() returned unexpected node");
-
-	/* Test rb_psearch(). */
-	search_node = tree_psearch(tree, node);
-	expect_ptr_eq(search_node, node,
-	    "tree_psearch() returned unexpected node");
-
-	(*i)++;
-
-	return NULL;
-}
-
 static unsigned
 tree_iterate(tree_t *tree) {
 	unsigned i;
@@ -225,9 +341,11 @@ destroy_cb(node_t *node, void *data) {
 }
 
 TEST_BEGIN(test_rb_random) {
-#define NNODES 25
-#define NBAGS 250
-#define SEED 42
+	enum {
+		NNODES = 25,
+		NBAGS = 500,
+		SEED = 42
+	};
 	sfmt_t *sfmt;
 	uint64_t bag[NNODES];
 	tree_t tree;
@@ -255,12 +373,26 @@ TEST_BEGIN(test_rb_random) {
 			}
 		}
 
+		/*
+		 * We alternate test behavior with a period of 2 here, and a
+		 * period of 5 down below, so there's no cycle in which certain
+		 * combinations get omitted.
+		 */
+		summarize_always_returns_true = (i % 2 == 0);
+
 		for (j = 1; j <= NNODES; j++) {
 			/* Initialize tree and nodes. */
 			tree_new(&tree);
 			for (k = 0; k < j; k++) {
 				nodes[k].magic = NODE_MAGIC;
 				nodes[k].key = bag[k];
+				nodes[k].specialness = gen_rand64_range(sfmt,
+				    NNODES);
+				nodes[k].mid_remove = false;
+				nodes[k].allow_duplicates = false;
+				nodes[k].summary_lchild = NULL;
+				nodes[k].summary_rchild = NULL;
+				nodes[k].summary_max_specialness = 0;
 			}
 
 			/* Insert nodes. */
@@ -341,9 +473,538 @@ TEST_BEGIN(test_rb_random) {
 		}
 	}
 	fini_gen_rand(sfmt);
-#undef NNODES
-#undef NBAGS
-#undef SEED
+}
+TEST_END
+
+static void
+expect_simple_consistency(tree_t *tree, uint64_t specialness,
+    bool expected_empty, node_t *expected_first, node_t *expected_last) {
+	bool empty;
+	node_t *first;
+	node_t *last;
+
+	empty = tree_empty_filtered(tree, &specialness_filter_node,
+	    &specialness_filter_subtree, &specialness);
+	expect_b_eq(expected_empty, empty, "");
+
+	first = tree_first_filtered(tree,
+	    &specialness_filter_node, &specialness_filter_subtree,
+	    (void *)&specialness);
+	expect_ptr_eq(expected_first, first, "");
+
+	last = tree_last_filtered(tree,
+	    &specialness_filter_node, &specialness_filter_subtree,
+	    (void *)&specialness);
+	expect_ptr_eq(expected_last, last, "");
+}
+
+TEST_BEGIN(test_rb_filter_simple) {
+	enum {FILTER_NODES = 10};
+	node_t nodes[FILTER_NODES];
+	for (unsigned i = 0; i < FILTER_NODES; i++) {
+		nodes[i].magic = NODE_MAGIC;
+		nodes[i].key = i;
+		if (i == 0) {
+			nodes[i].specialness = 0;
+		} else {
+			nodes[i].specialness = ffs_u(i);
+		}
+		nodes[i].mid_remove = false;
+		nodes[i].allow_duplicates = false;
+		nodes[i].summary_lchild = NULL;
+		nodes[i].summary_rchild = NULL;
+		nodes[i].summary_max_specialness = 0;
+	}
+
+	summarize_always_returns_true = false;
+
+	tree_t tree;
+	tree_new(&tree);
+
+	/* Should be empty */
+	expect_simple_consistency(&tree, /* specialness */ 0, /* empty */ true,
+	    /* first */ NULL, /* last */ NULL);
+
+	/* Fill in just the odd nodes. */
+	for (int i = 1; i < FILTER_NODES; i += 2) {
+		tree_insert(&tree, &nodes[i]);
+	}
+
+	/* A search for an odd node should succeed. */
+	expect_simple_consistency(&tree, /* specialness */ 0, /* empty */ false,
+	    /* first */ &nodes[1], /* last */ &nodes[9]);
+
+	/* But a search for an even one should fail. */
+	expect_simple_consistency(&tree, /* specialness */ 1, /* empty */ true,
+	    /* first */ NULL, /* last */ NULL);
+
+	/* Now we add an even. */
+	tree_insert(&tree, &nodes[4]);
+	expect_simple_consistency(&tree, /* specialness */ 1, /* empty */ false,
+	    /* first */ &nodes[4], /* last */ &nodes[4]);
+
+	/* A smaller even, and a larger even. */
+	tree_insert(&tree, &nodes[2]);
+	tree_insert(&tree, &nodes[8]);
+
+	/*
+	 * A first-search (resp. last-search) for an even should switch to the
+	 * lower (higher) one, now that it's been added.
+	 */
+	expect_simple_consistency(&tree, /* specialness */ 1, /* empty */ false,
+	    /* first */ &nodes[2], /* last */ &nodes[8]);
+
+	/*
+	 * If we remove 2, a first-search we should go back to 4, while a
+	 * last-search should remain unchanged.
+	 */
+	tree_remove(&tree, &nodes[2]);
+	expect_simple_consistency(&tree, /* specialness */ 1, /* empty */ false,
+	    /* first */ &nodes[4], /* last */ &nodes[8]);
+
+	/* Reinsert 2, then find it again. */
+	tree_insert(&tree, &nodes[2]);
+	expect_simple_consistency(&tree, /* specialness */ 1, /* empty */ false,
+	    /* first */ &nodes[2], /* last */ &nodes[8]);
+
+	/* Searching for a multiple of 4 should not have changed. */
+	expect_simple_consistency(&tree, /* specialness */ 2, /* empty */ false,
+	    /* first */ &nodes[4], /* last */ &nodes[8]);
+
+	/* And a multiple of 8 */
+	expect_simple_consistency(&tree, /* specialness */ 3, /* empty */ false,
+	    /* first */ &nodes[8], /* last */ &nodes[8]);
+
+	/* But not a multiple of 16 */
+	expect_simple_consistency(&tree, /* specialness */ 4, /* empty */ true,
+	    /* first */ NULL, /* last */ NULL);
+}
+TEST_END
+
+typedef struct iter_ctx_s iter_ctx_t;
+struct iter_ctx_s {
+	int ncalls;
+	node_t *last_node;
+
+	int ncalls_max;
+	bool forward;
+};
+
+static node_t *
+tree_iterate_filtered_cb(tree_t *tree, node_t *node, void *arg) {
+	iter_ctx_t *ctx = (iter_ctx_t *)arg;
+	ctx->ncalls++;
+	expect_u64_ge(node->specialness, 1,
+	    "Should only invoke cb on nodes that pass the filter");
+	if (ctx->last_node != NULL) {
+		if (ctx->forward) {
+			expect_d_lt(node_cmp(ctx->last_node, node), 0,
+			    "Incorrect iteration order");
+		} else {
+			expect_d_gt(node_cmp(ctx->last_node, node), 0,
+			    "Incorrect iteration order");
+		}
+	}
+	ctx->last_node = node;
+	if (ctx->ncalls == ctx->ncalls_max) {
+		return node;
+	}
+	return NULL;
+}
+
+static int
+qsort_node_cmp(const void *ap, const void *bp) {
+	node_t *a = *(node_t **)ap;
+	node_t *b = *(node_t **)bp;
+	return node_cmp(a, b);
+}
+
+#define UPDATE_TEST_MAX 100
+static void
+check_consistency(tree_t *tree, node_t nodes[UPDATE_TEST_MAX], int nnodes) {
+	uint64_t specialness = 1;
+
+	bool empty;
+	bool real_empty = true;
+	node_t *first;
+	node_t *real_first = NULL;
+	node_t *last;
+	node_t *real_last = NULL;
+	for (int i = 0; i < nnodes; i++) {
+		if (nodes[i].specialness >= specialness) {
+			real_empty = false;
+			if (real_first == NULL
+			    || node_cmp(&nodes[i], real_first) < 0) {
+				real_first = &nodes[i];
+			}
+			if (real_last == NULL
+			    || node_cmp(&nodes[i], real_last) > 0) {
+				real_last = &nodes[i];
+			}
+		}
+	}
+
+	empty = tree_empty_filtered(tree, &specialness_filter_node,
+	    &specialness_filter_subtree, &specialness);
+	expect_b_eq(real_empty, empty, "");
+
+	first = tree_first_filtered(tree, &specialness_filter_node,
+	    &specialness_filter_subtree, &specialness);
+	expect_ptr_eq(real_first, first, "");
+
+	last = tree_last_filtered(tree, &specialness_filter_node,
+	    &specialness_filter_subtree, &specialness);
+	expect_ptr_eq(real_last, last, "");
+
+	for (int i = 0; i < nnodes; i++) {
+		node_t *next_filtered;
+		node_t *real_next_filtered = NULL;
+		node_t *prev_filtered;
+		node_t *real_prev_filtered = NULL;
+		for (int j = 0; j < nnodes; j++) {
+			if (nodes[j].specialness < specialness) {
+				continue;
+			}
+			if (node_cmp(&nodes[j], &nodes[i]) < 0
+			    && (real_prev_filtered == NULL
+			    || node_cmp(&nodes[j], real_prev_filtered) > 0)) {
+				real_prev_filtered = &nodes[j];
+			}
+			if (node_cmp(&nodes[j], &nodes[i]) > 0
+			    && (real_next_filtered == NULL
+			    || node_cmp(&nodes[j], real_next_filtered) < 0)) {
+				real_next_filtered = &nodes[j];
+			}
+		}
+		next_filtered = tree_next_filtered(tree, &nodes[i],
+		    &specialness_filter_node, &specialness_filter_subtree,
+		    &specialness);
+		expect_ptr_eq(real_next_filtered, next_filtered, "");
+
+		prev_filtered = tree_prev_filtered(tree, &nodes[i],
+		    &specialness_filter_node, &specialness_filter_subtree,
+		    &specialness);
+		expect_ptr_eq(real_prev_filtered, prev_filtered, "");
+
+		node_t *search_filtered;
+		node_t *real_search_filtered;
+		node_t *nsearch_filtered;
+		node_t *real_nsearch_filtered;
+		node_t *psearch_filtered;
+		node_t *real_psearch_filtered;
+
+		/*
+		 * search, nsearch, psearch from a node before nodes[i] in the
+		 * ordering.
+		 */
+		node_t before;
+		before.magic = NODE_MAGIC;
+		before.key = nodes[i].key - 1;
+		before.allow_duplicates = false;
+		real_search_filtered = NULL;
+		search_filtered = tree_search_filtered(tree, &before,
+		    &specialness_filter_node, &specialness_filter_subtree,
+		    &specialness);
+		expect_ptr_eq(real_search_filtered, search_filtered, "");
+
+		real_nsearch_filtered = (nodes[i].specialness >= specialness ?
+		    &nodes[i] : real_next_filtered);
+		nsearch_filtered = tree_nsearch_filtered(tree, &before,
+		    &specialness_filter_node, &specialness_filter_subtree,
+		    &specialness);
+		expect_ptr_eq(real_nsearch_filtered, nsearch_filtered, "");
+
+		real_psearch_filtered = real_prev_filtered;
+		psearch_filtered = tree_psearch_filtered(tree, &before,
+		    &specialness_filter_node, &specialness_filter_subtree,
+		    &specialness);
+		expect_ptr_eq(real_psearch_filtered, psearch_filtered, "");
+
+		/* search, nsearch, psearch from nodes[i] */
+		real_search_filtered = (nodes[i].specialness >= specialness ?
+		    &nodes[i] : NULL);
+		search_filtered = tree_search_filtered(tree, &nodes[i],
+		    &specialness_filter_node, &specialness_filter_subtree,
+		    &specialness);
+		expect_ptr_eq(real_search_filtered, search_filtered, "");
+
+		real_nsearch_filtered = (nodes[i].specialness >= specialness ?
+		    &nodes[i] : real_next_filtered);
+		nsearch_filtered = tree_nsearch_filtered(tree, &nodes[i],
+		    &specialness_filter_node, &specialness_filter_subtree,
+		    &specialness);
+		expect_ptr_eq(real_nsearch_filtered, nsearch_filtered, "");
+
+		real_psearch_filtered = (nodes[i].specialness >= specialness ?
+		    &nodes[i] : real_prev_filtered);
+		psearch_filtered = tree_psearch_filtered(tree, &nodes[i],
+		    &specialness_filter_node, &specialness_filter_subtree,
+		    &specialness);
+		expect_ptr_eq(real_psearch_filtered, psearch_filtered, "");
+
+		/*
+		 * search, nsearch, psearch from a node equivalent to but
+		 * distinct from nodes[i].
+		 */
+		node_t equiv;
+		equiv.magic = NODE_MAGIC;
+		equiv.key = nodes[i].key;
+		equiv.allow_duplicates = true;
+		real_search_filtered = (nodes[i].specialness >= specialness ?
+		    &nodes[i] : NULL);
+		search_filtered = tree_search_filtered(tree, &equiv,
+		    &specialness_filter_node, &specialness_filter_subtree,
+		    &specialness);
+		expect_ptr_eq(real_search_filtered, search_filtered, "");
+
+		real_nsearch_filtered = (nodes[i].specialness >= specialness ?
+		    &nodes[i] : real_next_filtered);
+		nsearch_filtered = tree_nsearch_filtered(tree, &equiv,
+		    &specialness_filter_node, &specialness_filter_subtree,
+		    &specialness);
+		expect_ptr_eq(real_nsearch_filtered, nsearch_filtered, "");
+
+		real_psearch_filtered = (nodes[i].specialness >= specialness ?
+		    &nodes[i] : real_prev_filtered);
+		psearch_filtered = tree_psearch_filtered(tree, &equiv,
+		    &specialness_filter_node, &specialness_filter_subtree,
+		    &specialness);
+		expect_ptr_eq(real_psearch_filtered, psearch_filtered, "");
+
+		/*
+		 * search, nsearch, psearch from a node after nodes[i] in the
+		 * ordering.
+		 */
+		node_t after;
+		after.magic = NODE_MAGIC;
+		after.key = nodes[i].key + 1;
+		after.allow_duplicates = false;
+		real_search_filtered = NULL;
+		search_filtered = tree_search_filtered(tree, &after,
+		    &specialness_filter_node, &specialness_filter_subtree,
+		    &specialness);
+		expect_ptr_eq(real_search_filtered, search_filtered, "");
+
+		real_nsearch_filtered = real_next_filtered;
+		nsearch_filtered = tree_nsearch_filtered(tree, &after,
+		    &specialness_filter_node, &specialness_filter_subtree,
+		    &specialness);
+		expect_ptr_eq(real_nsearch_filtered, nsearch_filtered, "");
+
+		real_psearch_filtered = (nodes[i].specialness >= specialness ?
+		    &nodes[i] : real_prev_filtered);
+		psearch_filtered = tree_psearch_filtered(tree, &after,
+		    &specialness_filter_node, &specialness_filter_subtree,
+		    &specialness);
+		expect_ptr_eq(real_psearch_filtered, psearch_filtered, "");
+	}
+
+	/* Filtered iteration test setup. */
+	int nspecial = 0;
+	node_t *sorted_nodes[UPDATE_TEST_MAX];
+	node_t *sorted_filtered_nodes[UPDATE_TEST_MAX];
+	for (int i = 0; i < nnodes; i++) {
+		sorted_nodes[i] = &nodes[i];
+	}
+	qsort(sorted_nodes, nnodes, sizeof(node_t *), &qsort_node_cmp);
+	for (int i = 0; i < nnodes; i++) {
+		sorted_nodes[i]->rank = i;
+		sorted_nodes[i]->filtered_rank = nspecial;
+		if (sorted_nodes[i]->specialness >= 1) {
+			sorted_filtered_nodes[nspecial] = sorted_nodes[i];
+			nspecial++;
+		}
+	}
+
+	node_t *iter_result;
+
+	iter_ctx_t ctx;
+	ctx.ncalls = 0;
+	ctx.last_node = NULL;
+	ctx.ncalls_max = INT_MAX;
+	ctx.forward = true;
+
+	/* Filtered forward iteration from the beginning. */
+	iter_result = tree_iter_filtered(tree, NULL, &tree_iterate_filtered_cb,
+	    &ctx, &specialness_filter_node, &specialness_filter_subtree,
+	    &specialness);
+	expect_ptr_null(iter_result, "");
+	expect_d_eq(nspecial, ctx.ncalls, "");
+	/* Filtered forward iteration from a starting point. */
+	for (int i = 0; i < nnodes; i++) {
+		ctx.ncalls = 0;
+		ctx.last_node = NULL;
+		iter_result = tree_iter_filtered(tree, &nodes[i],
+		    &tree_iterate_filtered_cb, &ctx, &specialness_filter_node,
+		    &specialness_filter_subtree, &specialness);
+		expect_ptr_null(iter_result, "");
+		expect_d_eq(nspecial - nodes[i].filtered_rank, ctx.ncalls, "");
+	}
+	/* Filtered forward iteration from the beginning, with stopping */
+	for (int i = 0; i < nspecial; i++) {
+		ctx.ncalls = 0;
+		ctx.last_node = NULL;
+		ctx.ncalls_max = i + 1;
+		iter_result = tree_iter_filtered(tree, NULL,
+		    &tree_iterate_filtered_cb, &ctx, &specialness_filter_node,
+		    &specialness_filter_subtree, &specialness);
+		expect_ptr_eq(sorted_filtered_nodes[i], iter_result, "");
+		expect_d_eq(ctx.ncalls, i + 1, "");
+	}
+	/* Filtered forward iteration from a starting point, with stopping. */
+	for (int i = 0; i < nnodes; i++) {
+		for (int j = 0; j < nspecial - nodes[i].filtered_rank; j++) {
+			ctx.ncalls = 0;
+			ctx.last_node = NULL;
+			ctx.ncalls_max = j + 1;
+			iter_result = tree_iter_filtered(tree, &nodes[i],
+			    &tree_iterate_filtered_cb, &ctx,
+			    &specialness_filter_node,
+			    &specialness_filter_subtree, &specialness);
+			expect_d_eq(j + 1, ctx.ncalls, "");
+			expect_ptr_eq(sorted_filtered_nodes[
+			    nodes[i].filtered_rank + j], iter_result, "");
+		}
+	}
+
+	/* Backwards iteration. */
+	ctx.ncalls = 0;
+	ctx.last_node = NULL;
+	ctx.ncalls_max = INT_MAX;
+	ctx.forward = false;
+
+	/* Filtered backward iteration from the end. */
+	iter_result = tree_reverse_iter_filtered(tree, NULL,
+	    &tree_iterate_filtered_cb, &ctx, &specialness_filter_node,
+	    &specialness_filter_subtree, &specialness);
+	expect_ptr_null(iter_result, "");
+	expect_d_eq(nspecial, ctx.ncalls, "");
+	/* Filtered backward iteration from a starting point. */
+	for (int i = 0; i < nnodes; i++) {
+		ctx.ncalls = 0;
+		ctx.last_node = NULL;
+		iter_result = tree_reverse_iter_filtered(tree, &nodes[i],
+		    &tree_iterate_filtered_cb, &ctx, &specialness_filter_node,
+		    &specialness_filter_subtree, &specialness);
+		expect_ptr_null(iter_result, "");
+		int surplus_rank = (nodes[i].specialness >= 1 ? 1 : 0);
+		expect_d_eq(nodes[i].filtered_rank + surplus_rank, ctx.ncalls,
+		    "");
+	}
+	/* Filtered backward iteration from the end, with stopping */
+	for (int i = 0; i < nspecial; i++) {
+		ctx.ncalls = 0;
+		ctx.last_node = NULL;
+		ctx.ncalls_max = i + 1;
+		iter_result = tree_reverse_iter_filtered(tree, NULL,
+		    &tree_iterate_filtered_cb, &ctx, &specialness_filter_node,
+		    &specialness_filter_subtree, &specialness);
+		expect_ptr_eq(sorted_filtered_nodes[nspecial - i - 1],
+		    iter_result, "");
+		expect_d_eq(ctx.ncalls, i + 1, "");
+	}
+	/* Filtered backward iteration from a starting point, with stopping. */
+	for (int i = 0; i < nnodes; i++) {
+		int surplus_rank = (nodes[i].specialness >= 1 ? 1 : 0);
+		for (int j = 0; j < nodes[i].filtered_rank + surplus_rank;
+		    j++) {
+			ctx.ncalls = 0;
+			ctx.last_node = NULL;
+			ctx.ncalls_max = j + 1;
+			iter_result = tree_reverse_iter_filtered(tree,
+			    &nodes[i], &tree_iterate_filtered_cb, &ctx,
+			    &specialness_filter_node,
+			    &specialness_filter_subtree, &specialness);
+			expect_d_eq(j + 1, ctx.ncalls, "");
+			expect_ptr_eq(sorted_filtered_nodes[
+			    nodes[i].filtered_rank - j - 1 + surplus_rank],
+			    iter_result, "");
+		}
+	}
+}
+
+static void
+do_update_search_test(int nnodes, int ntrees, int nremovals,
+    int nupdates) {
+	node_t nodes[UPDATE_TEST_MAX];
+	assert(nnodes <= UPDATE_TEST_MAX);
+
+	sfmt_t *sfmt = init_gen_rand(12345);
+	for (int i = 0; i < ntrees; i++) {
+		tree_t tree;
+		tree_new(&tree);
+		for (int j = 0; j < nnodes; j++) {
+			nodes[j].magic = NODE_MAGIC;
+			/*
+			 * In consistency checking, we increment or decrement a
+			 * key and assume that the result is not a key in the
+			 * tree.  This isn't a *real* concern with 64-bit keys
+			 * and a good PRNG, but why not be correct anyways?
+			 */
+			nodes[j].key = 2 * gen_rand64(sfmt);
+			nodes[j].specialness = 0;
+			nodes[j].mid_remove = false;
+			nodes[j].allow_duplicates = false;
+			nodes[j].summary_lchild = NULL;
+			nodes[j].summary_rchild = NULL;
+			nodes[j].summary_max_specialness = 0;
+			tree_insert(&tree, &nodes[j]);
+		}
+		for (int j = 0; j < nremovals; j++) {
+			int victim = (int)gen_rand64_range(sfmt, nnodes);
+			if (!nodes[victim].mid_remove) {
+				tree_remove(&tree, &nodes[victim]);
+				nodes[victim].mid_remove = true;
+			}
+		}
+		for (int j = 0; j < nnodes; j++) {
+			if (nodes[j].mid_remove) {
+				nodes[j].mid_remove = false;
+				nodes[j].key = 2 * gen_rand64(sfmt);
+				tree_insert(&tree, &nodes[j]);
+			}
+		}
+		for (int i = 0; i < nupdates; i++) {
+			uint32_t ind = gen_rand32_range(sfmt, nnodes);
+			nodes[ind].specialness = 1 - nodes[ind].specialness;
+			tree_update_summaries(&tree, &nodes[ind]);
+			check_consistency(&tree, nodes, nnodes);
+		}
+	}
+}
+
+TEST_BEGIN(test_rb_update_search) {
+	summarize_always_returns_true = false;
+	do_update_search_test(2, 100, 3, 50);
+	do_update_search_test(5, 100, 3, 50);
+	do_update_search_test(12, 100, 5, 1000);
+	do_update_search_test(100, 1, 50, 500);
+}
+TEST_END
+
+typedef rb_tree(node_t) unsummarized_tree_t;
+rb_gen(static UNUSED, unsummarized_tree_, unsummarized_tree_t, node_t, link,
+    node_cmp);
+
+static node_t *
+unsummarized_tree_iterate_cb(unsummarized_tree_t *tree, node_t *node,
+    void *data) {
+	unsigned *i = (unsigned *)data;
+	(*i)++;
+	return NULL;
+}
+/*
+ * The unsummarized and summarized funtionality is implemented via the same
+ * functions; we don't really need to do much more than test that we can exclude
+ * the filtered functionality without anything breaking.
+ */
+TEST_BEGIN(test_rb_unsummarized) {
+	unsummarized_tree_t tree;
+	unsummarized_tree_new(&tree);
+	unsigned nnodes = 0;
+	unsummarized_tree_iter(&tree, NULL, &unsummarized_tree_iterate_cb,
+	    &nnodes);
+	expect_u_eq(0, nnodes, "");
 }
 TEST_END
 
@@ -351,5 +1012,8 @@ int
 main(void) {
 	return test_no_reentrancy(
 	    test_rb_empty,
-	    test_rb_random);
+	    test_rb_random,
+	    test_rb_filter_simple,
+	    test_rb_update_search,
+	    test_rb_unsummarized);
 }

From 08089589f74ac23268791be18742d031cc5dd041 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 12 May 2021 16:00:38 -0700
Subject: [PATCH 2066/2608] Fix an interaction between the oversize_threshold
 test and bgthds.

Also added the shared utility to check if background_thread is enabled.
---
 test/include/test/bgthd.h            | 17 +++++++++++++++++
 test/include/test/jemalloc_test.h.in |  1 +
 test/integration/extent.c            | 16 ++--------------
 test/unit/arena_decay.c              | 22 +++++-----------------
 test/unit/oversize_threshold.c       |  6 ++++--
 test/unit/stats.c                    |  2 +-
 6 files changed, 30 insertions(+), 34 deletions(-)
 create mode 100644 test/include/test/bgthd.h

diff --git a/test/include/test/bgthd.h b/test/include/test/bgthd.h
new file mode 100644
index 00000000..4fa2395e
--- /dev/null
+++ b/test/include/test/bgthd.h
@@ -0,0 +1,17 @@
+/*
+ * Shared utility for checking if background_thread is enabled, which affects
+ * the purging behavior and assumptions in some tests.
+ */
+
+static inline bool
+is_background_thread_enabled(void) {
+	bool enabled;
+	size_t sz = sizeof(bool);
+	int ret = mallctl("background_thread", (void *)&enabled, &sz, NULL,0);
+	if (ret == ENOENT) {
+		return false;
+	}
+	assert_d_eq(ret, 0, "Unexpected mallctl error");
+
+	return enabled;
+}
diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in
index ae675745..0e332165 100644
--- a/test/include/test/jemalloc_test.h.in
+++ b/test/include/test/jemalloc_test.h.in
@@ -128,6 +128,7 @@ static const bool config_debug =
 #include "test/test.h"
 #include "test/timer.h"
 #include "test/thd.h"
+#include "test/bgthd.h"
 #define MEXP 19937
 #include "test/SFMT.h"
 
diff --git a/test/integration/extent.c b/test/integration/extent.c
index ccc314d9..831ef63f 100644
--- a/test/integration/extent.c
+++ b/test/integration/extent.c
@@ -2,18 +2,6 @@
 
 #include "test/extent_hooks.h"
 
-static bool
-check_background_thread_enabled(void) {
-	bool enabled;
-	size_t sz = sizeof(bool);
-	int ret = mallctl("background_thread", (void *)&enabled, &sz, NULL,0);
-	if (ret == ENOENT) {
-		return false;
-	}
-	expect_d_eq(ret, 0, "Unexpected mallctl error");
-	return enabled;
-}
-
 static void
 test_extent_body(unsigned arena_ind) {
 	void *p;
@@ -177,7 +165,7 @@ test_manual_hook_body(void) {
 	expect_ptr_ne(old_hooks->merge, extent_merge_hook,
 	    "Unexpected extent_hooks error");
 
-	if (!check_background_thread_enabled()) {
+	if (!is_background_thread_enabled()) {
 		test_extent_body(arena_ind);
 	}
 
@@ -235,7 +223,7 @@ TEST_BEGIN(test_extent_auto_hook) {
 	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
 	    (void *)&new_hooks, new_size), 0, "Unexpected mallctl() failure");
 
-	test_skip_if(check_background_thread_enabled());
+	test_skip_if(is_background_thread_enabled());
 	test_extent_body(arena_ind);
 }
 TEST_END
diff --git a/test/unit/arena_decay.c b/test/unit/arena_decay.c
index cea39e09..9fca5385 100644
--- a/test/unit/arena_decay.c
+++ b/test/unit/arena_decay.c
@@ -9,18 +9,6 @@ static unsigned nupdates_mock;
 static nstime_t time_mock;
 static bool monotonic_mock;
 
-static bool
-check_background_thread_enabled(void) {
-	bool enabled;
-	size_t sz = sizeof(bool);
-	int ret = mallctl("background_thread", (void *)&enabled, &sz, NULL,0);
-	if (ret == ENOENT) {
-		return false;
-	}
-	expect_d_eq(ret, 0, "Unexpected mallctl error");
-	return enabled;
-}
-
 static bool
 nstime_monotonic_mock(void) {
 	return monotonic_mock;
@@ -184,7 +172,7 @@ generate_dirty(unsigned arena_ind, size_t size) {
 }
 
 TEST_BEGIN(test_decay_ticks) {
-	test_skip_if(check_background_thread_enabled());
+	test_skip_if(is_background_thread_enabled());
 	test_skip_if(opt_hpa);
 
 	ticker_geom_t *decay_ticker;
@@ -417,7 +405,7 @@ decay_ticker_helper(unsigned arena_ind, int flags, bool dirty, ssize_t dt,
 }
 
 TEST_BEGIN(test_decay_ticker) {
-	test_skip_if(check_background_thread_enabled());
+	test_skip_if(is_background_thread_enabled());
 	test_skip_if(opt_hpa);
 #define NPS 2048
 	ssize_t ddt = opt_dirty_decay_ms;
@@ -476,7 +464,7 @@ TEST_BEGIN(test_decay_ticker) {
 TEST_END
 
 TEST_BEGIN(test_decay_nonmonotonic) {
-	test_skip_if(check_background_thread_enabled());
+	test_skip_if(is_background_thread_enabled());
 	test_skip_if(opt_hpa);
 #define NPS (SMOOTHSTEP_NSTEPS + 1)
 	int flags = (MALLOCX_ARENA(0) | MALLOCX_TCACHE_NONE);
@@ -534,7 +522,7 @@ TEST_BEGIN(test_decay_nonmonotonic) {
 TEST_END
 
 TEST_BEGIN(test_decay_now) {
-	test_skip_if(check_background_thread_enabled());
+	test_skip_if(is_background_thread_enabled());
 	test_skip_if(opt_hpa);
 
 	unsigned arena_ind = do_arena_create(0, 0);
@@ -555,7 +543,7 @@ TEST_BEGIN(test_decay_now) {
 TEST_END
 
 TEST_BEGIN(test_decay_never) {
-	test_skip_if(check_background_thread_enabled() || !config_stats);
+	test_skip_if(is_background_thread_enabled() || !config_stats);
 	test_skip_if(opt_hpa);
 
 	unsigned arena_ind = do_arena_create(-1, -1);
diff --git a/test/unit/oversize_threshold.c b/test/unit/oversize_threshold.c
index e374b142..44a8f76a 100644
--- a/test/unit/oversize_threshold.c
+++ b/test/unit/oversize_threshold.c
@@ -106,14 +106,16 @@ TEST_BEGIN(test_oversize_threshold) {
 	/* Allocating and freeing half a megabyte should leave them dirty. */
 	void *ptr = mallocx(512 * 1024, MALLOCX_ARENA(arena));
 	dallocx(ptr, MALLOCX_TCACHE_NONE);
-	expect_zu_lt(max_purged, 512 * 1024, "Expected no 512k purge");
+	if (!is_background_thread_enabled()) {
+		expect_zu_lt(max_purged, 512 * 1024, "Expected no 512k purge");
+	}
 
 	/* Purge again to reset everything out. */
 	arena_mallctl("arena.%u.purge", arena, NULL, NULL, NULL, 0);
 	max_purged = 0;
 
 	/*
-	 * Allocating and freeing 2 megabytes should leave them dirty because of
+	 * Allocating and freeing 2 megabytes should have them purged because of
 	 * the oversize threshold.
 	 */
 	ptr = mallocx(2 * 1024 * 1024, MALLOCX_ARENA(arena));
diff --git a/test/unit/stats.c b/test/unit/stats.c
index 6b6594d2..cb99b095 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -119,7 +119,7 @@ TEST_BEGIN(test_stats_arenas_summary) {
 	    "Unexepected mallctl() result");
 
 	if (config_stats) {
-		if (!background_thread_enabled() && !opt_hpa) {
+		if (!is_background_thread_enabled() && !opt_hpa) {
 			expect_u64_gt(dirty_npurge + muzzy_npurge, 0,
 			    "At least one purge should have occurred");
 		}

From 11beab38bc5ede45f06af3c513efd003c9d32088 Mon Sep 17 00:00:00 2001
From: Deanna Gelbart <deanna.gelbart@hootsuite.com>
Date: Tue, 11 May 2021 19:02:33 -0700
Subject: [PATCH 2067/2608] Added --debug-syms-by-id option

---
 bin/jeprof.in | 58 +++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 52 insertions(+), 6 deletions(-)

diff --git a/bin/jeprof.in b/bin/jeprof.in
index d47359cf..e0b212ae 100644
--- a/bin/jeprof.in
+++ b/bin/jeprof.in
@@ -240,6 +240,7 @@ Miscellaneous:
    --test              Run unit tests
    --help              This message
    --version           Version information
+   --debug-syms-by-id  (Linux only) Find debug symbol files by build ID as well as by name
 
 Environment Variables:
    JEPROF_TMPDIR        Profiles directory. Defaults to \$HOME/jeprof
@@ -365,6 +366,7 @@ sub Init() {
   $main::opt_tools   = "";
   $main::opt_debug   = 0;
   $main::opt_test    = 0;
+  $main::opt_debug_syms_by_id = 0;
 
   # These are undocumented flags used only by unittests.
   $main::opt_test_stride = 0;
@@ -433,6 +435,7 @@ sub Init() {
              "tools=s"        => \$main::opt_tools,
              "test!"          => \$main::opt_test,
              "debug!"         => \$main::opt_debug,
+             "debug-syms-by-id!" => \$main::opt_debug_syms_by_id,
              # Undocumented flags used only by unittests:
              "test_stride=i"  => \$main::opt_test_stride,
       ) || usage("Invalid option(s)");
@@ -577,6 +580,11 @@ sub Init() {
   foreach (@prefix_list) {
     s|/+$||;
   }
+
+  # Flag to prevent us from trying over and over to use
+  #  elfutils if it's not installed (used only with
+  #  --debug-syms-by-id option).
+  $main::gave_up_on_elfutils = 0;
 }
 
 sub FilterAndPrint {
@@ -4492,16 +4500,54 @@ sub FindLibrary {
 # For libc libraries, the copy in /usr/lib/debug contains debugging symbols
 sub DebuggingLibrary {
   my $file = shift;
-  if ($file =~ m|^/|) {
-      if (-f "/usr/lib/debug$file") {
-        return "/usr/lib/debug$file";
-      } elsif (-f "/usr/lib/debug$file.debug") {
-        return "/usr/lib/debug$file.debug";
-      }
+      
+  if ($file !~ m|^/|) {
+    return undef;
   }
+      
+  # Find debug symbol file if it's named after the library's name.
+  
+  if (-f "/usr/lib/debug$file") {                 
+    if($main::opt_debug) { print STDERR "found debug info for $file in /usr/lib/debug$file\n"; }
+    return "/usr/lib/debug$file";
+  } elsif (-f "/usr/lib/debug$file.debug") {
+    if($main::opt_debug) { print STDERR "found debug info for $file in /usr/lib/debug$file.debug\n"; }
+    return "/usr/lib/debug$file.debug"; 
+  }
+
+  if(!$main::opt_debug_syms_by_id) {
+    if($main::opt_debug) { print STDERR "no debug symbols found for $file\n" };
+    return undef;
+  }
+
+  # Find debug file if it's named after the library's build ID.
+  
+  my $readelf = '';
+  if (!$main::gave_up_on_elfutils) {
+    $readelf = qx/eu-readelf -n ${file}/;
+    if ($?) {
+      print STDERR "Cannot run eu-readelf. To use --debug-syms-by-id you must be on Linux, with elfutils installed.\n";
+      $main::gave_up_on_elfutils = 1;
+      return undef;
+    }
+    my $buildID = $1 if $readelf =~ /Build ID: ([A-Fa-f0-9]+)/s;
+    if (defined $buildID && length $buildID > 0) {
+      my $symbolFile = '/usr/lib/debug/.build-id/' . substr($buildID, 0, 2) . '/' . substr($buildID, 2) . '.debug';
+      if (-e $symbolFile) {
+        if($main::opt_debug) { print STDERR "found debug symbol file $symbolFile for $file\n" };
+        return $symbolFile;
+      } else {
+        if($main::opt_debug) { print STDERR "no debug symbol file found for $file, build ID: $buildID\n" };
+        return undef;
+      }
+    }
+  }
+
+  if($main::opt_debug) { print STDERR "no debug symbols found for $file, build ID unknown\n" };
   return undef;
 }
 
+
 # Parse text section header of a library using objdump
 sub ParseTextSectionHeaderFromObjdump {
   my $lib = shift;

From 36c6bfb963e8a36a8918eb841902e006466fb7c2 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 18 May 2021 14:52:46 -0700
Subject: [PATCH 2068/2608] SEC: Allow arbitrarily many shards, cached sizes.

---
 include/jemalloc/internal/pa.h  |  4 +--
 include/jemalloc/internal/sec.h | 22 ++++-----------
 src/arena.c                     |  2 +-
 src/jemalloc.c                  |  2 +-
 src/pa.c                        |  7 +++--
 src/sec.c                       | 50 +++++++++++++++++++++++----------
 test/unit/sec.c                 | 13 +++++++--
 7 files changed, 59 insertions(+), 41 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index acb94eb6..cb9f8cff 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -130,8 +130,8 @@ bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
  * This isn't exposed to users; we allow late enablement of the HPA shard so
  * that we can boot without worrying about the HPA, then turn it on in a0.
  */
-bool pa_shard_enable_hpa(pa_shard_t *shard, const hpa_shard_opts_t *hpa_opts,
-    const sec_opts_t *hpa_sec_opts);
+bool pa_shard_enable_hpa(tsdn_t *tsdn, pa_shard_t *shard,
+    const hpa_shard_opts_t *hpa_opts, const sec_opts_t *hpa_sec_opts);
 /*
  * We stop using the HPA when custom extent hooks are installed, but still
  * redirect deallocations to it.
diff --git a/include/jemalloc/internal/sec.h b/include/jemalloc/internal/sec.h
index ddcdfbdf..fa863382 100644
--- a/include/jemalloc/internal/sec.h
+++ b/include/jemalloc/internal/sec.h
@@ -13,20 +13,6 @@
  * knowledge of the underlying PAI implementation).
  */
 
-/*
- * This is a *small* extent cache, after all.  Assuming 4k pages and an ngroup
- * of 4, this allows caching of sizes up to 128k.
- */
-#define SEC_NPSIZES 16
-/*
- * For now, we put a cap on the number of SECs an arena can have.  There's no
- * reason it can't be dynamic; it's just inconvenient.  This number of shards
- * are embedded in the arenas, so there's a space / configurability tradeoff
- * here.  Eventually, we should probably dynamically allocate only however many
- * we require.
- */
-#define SEC_NSHARDS_MAX 8
-
 /*
  * For now, this is just one field; eventually, we'll probably want to get more
  * fine-grained data out (like per-size class statistics).
@@ -91,7 +77,7 @@ struct sec_shard_s {
 	 * hooks are installed.
 	 */
 	bool enabled;
-	sec_bin_t bins[SEC_NPSIZES];
+	sec_bin_t *bins;
 	/* Number of bytes in all bins in the shard. */
 	size_t bytes_cur;
 	/* The next pszind to flush in the flush-some pathways. */
@@ -104,10 +90,12 @@ struct sec_s {
 	pai_t *fallback;
 
 	sec_opts_t opts;
-	sec_shard_t shards[SEC_NSHARDS_MAX];
+	sec_shard_t *shards;
+	pszind_t npsizes;
 };
 
-bool sec_init(sec_t *sec, pai_t *fallback, const sec_opts_t *opts);
+bool sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, pai_t *fallback,
+    const sec_opts_t *opts);
 void sec_flush(tsdn_t *tsdn, sec_t *sec);
 void sec_disable(tsdn_t *tsdn, sec_t *sec);
 
diff --git a/src/arena.c b/src/arena.c
index 78ea92c1..3ff91572 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1565,7 +1565,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 *   so arena_hpa_global is not yet initialized.
 	 */
 	if (opt_hpa && ehooks_are_default(base_ehooks_get(base)) && ind != 0) {
-		if (pa_shard_enable_hpa(&arena->pa_shard, &opt_hpa_opts,
+		if (pa_shard_enable_hpa(tsdn, &arena->pa_shard, &opt_hpa_opts,
 		    &opt_hpa_sec_opts)) {
 			goto label_error;
 		}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 613733ff..1f489932 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1781,7 +1781,7 @@ malloc_init_hard_a0_locked() {
 			opt_hpa = false;
 		}
 	} else if (opt_hpa) {
-		if (pa_shard_enable_hpa(&a0->pa_shard, &opt_hpa_opts,
+		if (pa_shard_enable_hpa(TSDN_NULL, &a0->pa_shard, &opt_hpa_opts,
 		    &opt_hpa_sec_opts)) {
 			return true;
 		}
diff --git a/src/pa.c b/src/pa.c
index 90809b35..cb3b3df5 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -49,13 +49,14 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 }
 
 bool
-pa_shard_enable_hpa(pa_shard_t *shard, const hpa_shard_opts_t *hpa_opts,
-    const sec_opts_t *hpa_sec_opts) {
+pa_shard_enable_hpa(tsdn_t *tsdn, pa_shard_t *shard,
+    const hpa_shard_opts_t *hpa_opts, const sec_opts_t *hpa_sec_opts) {
 	if (hpa_shard_init(&shard->hpa_shard, shard->emap, shard->base,
 	    &shard->edata_cache, shard->ind, hpa_opts)) {
 		return true;
 	}
-	if (sec_init(&shard->hpa_sec, &shard->hpa_shard.pai, hpa_sec_opts)) {
+	if (sec_init(tsdn, &shard->hpa_sec, shard->base, &shard->hpa_shard.pai,
+	    hpa_sec_opts)) {
 		return true;
 	}
 	shard->ever_used_hpa = true;
diff --git a/src/sec.c b/src/sec.c
index c37cf35c..41753464 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -19,35 +19,55 @@ sec_bin_init(sec_bin_t *bin) {
 }
 
 bool
-sec_init(sec_t *sec, pai_t *fallback, const sec_opts_t *opts) {
-	size_t nshards_clipped = opts->nshards;
-	if (nshards_clipped > SEC_NSHARDS_MAX) {
-		nshards_clipped = SEC_NSHARDS_MAX;
+sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, pai_t *fallback,
+    const sec_opts_t *opts) {
+	size_t max_alloc = opts->max_alloc & PAGE_MASK;
+	pszind_t npsizes = sz_psz2ind(max_alloc);
+	if (sz_pind2sz(npsizes) > opts->max_alloc) {
+		npsizes--;
 	}
-	for (size_t i = 0; i < nshards_clipped; i++) {
-		sec_shard_t *shard = &sec->shards[i];
+	size_t sz_shards = opts->nshards * sizeof(sec_shard_t);
+	size_t sz_bins = opts->nshards * (size_t)npsizes * sizeof(sec_bin_t);
+	size_t sz_alloc = sz_shards + sz_bins;
+	void *dynalloc = base_alloc(tsdn, base, sz_alloc, CACHELINE);
+	if (dynalloc == NULL) {
+		return true;
+	}
+	sec_shard_t *shard_cur = (sec_shard_t *)dynalloc;
+	sec->shards = shard_cur;
+	sec_bin_t *bin_cur = (sec_bin_t *)&shard_cur[opts->nshards];
+	/* Just for asserts, below. */
+	sec_bin_t *bin_start = bin_cur;
+
+	for (size_t i = 0; i < opts->nshards; i++) {
+		sec_shard_t *shard = shard_cur;
+		shard_cur++;
 		bool err = malloc_mutex_init(&shard->mtx, "sec_shard",
 		    WITNESS_RANK_SEC_SHARD, malloc_mutex_rank_exclusive);
 		if (err) {
 			return true;
 		}
 		shard->enabled = true;
-		for (pszind_t j = 0; j < SEC_NPSIZES; j++) {
+		shard->bins = bin_cur;
+		for (pszind_t j = 0; j < npsizes; j++) {
 			sec_bin_init(&shard->bins[j]);
+			bin_cur++;
 		}
 		shard->bytes_cur = 0;
 		shard->to_flush_next = 0;
 	}
+	/*
+	 * Should have exactly matched the bin_start to the first unused byte
+	 * after the shards.
+	 */
+	assert((void *)shard_cur == (void *)bin_start);
+	/* And the last bin to use up the last bytes of the allocation. */
+	assert((char *)bin_cur == ((char *)dynalloc + sz_alloc));
 	sec->fallback = fallback;
 
-	size_t max_alloc_clipped = opts->max_alloc;
-	if (max_alloc_clipped > sz_pind2sz(SEC_NPSIZES - 1)) {
-		max_alloc_clipped = sz_pind2sz(SEC_NPSIZES - 1);
-	}
 
 	sec->opts = *opts;
-	sec->opts.nshards = nshards_clipped;
-	sec->opts.max_alloc = max_alloc_clipped;
+	sec->npsizes = npsizes;
 
 	/*
 	 * Initialize these last so that an improper use of an SEC whose
@@ -106,7 +126,7 @@ sec_flush_some_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
 
 		/* Update our victim-picking state. */
 		shard->to_flush_next++;
-		if (shard->to_flush_next == SEC_NPSIZES) {
+		if (shard->to_flush_next == sec->npsizes) {
 			shard->to_flush_next = 0;
 		}
 
@@ -249,7 +269,7 @@ sec_flush_all_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
 	shard->bytes_cur = 0;
 	edata_list_active_t to_flush;
 	edata_list_active_init(&to_flush);
-	for (pszind_t i = 0; i < SEC_NPSIZES; i++) {
+	for (pszind_t i = 0; i < sec->npsizes; i++) {
 		sec_bin_t *bin = &shard->bins[i];
 		bin->bytes_cur = 0;
 		edata_list_active_concat(&to_flush, &bin->freelist);
diff --git a/test/unit/sec.c b/test/unit/sec.c
index 36ae1a52..01455c89 100644
--- a/test/unit/sec.c
+++ b/test/unit/sec.c
@@ -37,7 +37,14 @@ test_sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t max_alloc,
 	opts.bytes_after_flush = max_bytes / 2;
 	opts.batch_fill_extra = 4;
 
-	bool err = sec_init(sec, fallback, &opts);
+	/*
+	 * We end up leaking this base, but that's fine; this test is
+	 * short-running, and SECs are arena-scoped in reality.
+	 */
+	base_t *base = base_new(TSDN_NULL, /* ind */ 123,
+	    &ehooks_default_extent_hooks);
+
+	bool err = sec_init(TSDN_NULL, sec, base, fallback, &opts);
 	assert_false(err, "Unexpected initialization failure");
 }
 
@@ -412,10 +419,12 @@ TEST_BEGIN(test_nshards_0) {
 	sec_t sec;
 	/* See the note above -- we can't use the real tsd. */
 	tsdn_t *tsdn = TSDN_NULL;
+	base_t *base = base_new(TSDN_NULL, /* ind */ 123,
+	    &ehooks_default_extent_hooks);
 
 	sec_opts_t opts = SEC_OPTS_DEFAULT;
 	opts.nshards = 0;
-	sec_init(&sec, &ta.pai, &opts);
+	sec_init(TSDN_NULL, &sec, base, &ta.pai, &opts);
 
 	edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
 	    /* zero */ false);

From 2c0f4c2ac3b6a78a849526be384a7a2349d1a09c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ond=C5=99ej=20Sur=C3=BD?= <ondrej@sury.org>
Date: Tue, 25 May 2021 09:19:40 +0200
Subject: [PATCH 2069/2608] Fix typo in configure.ac: experimetal ->
 experimental

---
 configure.ac | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 41a03d24..0748329d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2587,7 +2587,7 @@ AC_MSG_RESULT([static libs        : ${enable_static}])
 AC_MSG_RESULT([autogen            : ${enable_autogen}])
 AC_MSG_RESULT([debug              : ${enable_debug}])
 AC_MSG_RESULT([stats              : ${enable_stats}])
-AC_MSG_RESULT([experimetal_smallocx : ${enable_experimental_smallocx}])
+AC_MSG_RESULT([experimental_smallocx : ${enable_experimental_smallocx}])
 AC_MSG_RESULT([prof               : ${enable_prof}])
 AC_MSG_RESULT([prof-libunwind     : ${enable_prof_libunwind}])
 AC_MSG_RESULT([prof-libgcc        : ${enable_prof_libgcc}])

From 2381efab5754d13da5104b101b1e695afb442590 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Fri, 21 May 2021 07:28:16 -0700
Subject: [PATCH 2070/2608] ARC: add Minimum allocation alignment

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 include/jemalloc/internal/quantum.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/jemalloc/internal/quantum.h b/include/jemalloc/internal/quantum.h
index 11e870a3..760d6add 100644
--- a/include/jemalloc/internal/quantum.h
+++ b/include/jemalloc/internal/quantum.h
@@ -65,6 +65,9 @@
 #  ifdef __le32__
 #    define LG_QUANTUM		4
 #  endif
+#  ifdef __arc__
+#    define LG_QUANTUM		3
+#  endif
 #  ifndef LG_QUANTUM
 #    error "Unknown minimum alignment for architecture; specify via "
 	 "--with-lg-quantum"

From 4fb93a18ee56795fab725c23cc0211b0198dda46 Mon Sep 17 00:00:00 2001
From: David Carlier <devnexen@gmail.com>
Date: Sat, 19 Jun 2021 13:38:44 +0100
Subject: [PATCH 2071/2608] extent_can_acquire_neighbor typo fix

---
 include/jemalloc/internal/extent.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index b39e5ed5..03eebdd5 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -83,7 +83,7 @@ extent_can_acquire_neighbor(edata_t *edata, rtree_contents_t contents,
 	bool neighbor_is_head = contents.metadata.is_head;
 	if (!extent_neighbor_head_state_mergeable(edata_is_head_get(edata),
 	    neighbor_is_head, forward)) {
-		return NULL;
+		return false;
 	}
 	extent_state_t neighbor_state = contents.metadata.state;
 	if (pai == EXTENT_PAI_PAC) {

From 0689448b1e8c8c5ae2d1c216f86c88d22a124166 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 21 Jun 2021 14:07:10 -0700
Subject: [PATCH 2072/2608] Travis: Unbreak the builds.

In the hopes of future-proofing as much as possible, jump to the latest
distribution Travis supports.
---
 .travis.yml              | 155 +++++++++++++--------------------------
 configure.ac             |   1 +
 scripts/gen_travis.py    |  59 +++++++--------
 src/stats.c              |   2 +-
 test/include/test/test.h |   2 -
 test/src/test.c          |  13 ----
 test/unit/fb.c           |   4 +-
 test/unit/log.c          |   2 +-
 8 files changed, 87 insertions(+), 151 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index b61627bd..6aea0581 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,5 +1,5 @@
 language: generic
-dist: precise
+dist: focal
 
 matrix:
   include:
@@ -8,23 +8,20 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: linux
       arch: ppc64le
-      addons: &gcc_ppc
-        apt:
-          packages:
-            - g++-8
-      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
       addons: &gcc_multilib
         apt:
           packages:
             - gcc-multilib
+            - g++-multilib
       env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
@@ -58,132 +55,92 @@ matrix:
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+    - os: osx
+      arch: amd64
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+    - os: osx
+      arch: amd64
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+    - os: osx
+      arch: amd64
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+    - os: osx
+      arch: amd64
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+    - os: osx
+      arch: amd64
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+    - os: linux
+      arch: ppc64le
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: osx
-      arch: amd64
+    - os: linux
+      arch: ppc64le
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: osx
-      arch: amd64
+    - os: linux
+      arch: ppc64le
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: osx
-      arch: amd64
+    - os: linux
+      arch: ppc64le
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: osx
-      arch: amd64
+    - os: linux
+      arch: ppc64le
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: osx
-      arch: amd64
+    - os: linux
+      arch: ppc64le
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: ppc64le
-      addons: &gcc_ppc
-        apt:
-          packages:
-            - g++-8
-      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: ppc64le
-      addons: &gcc_ppc
-        apt:
-          packages:
-            - g++-8
-      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: ppc64le
-      addons: &gcc_ppc
-        apt:
-          packages:
-            - g++-8
-      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      addons: &gcc_ppc
-        apt:
-          packages:
-            - g++-8
-      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      addons: &gcc_ppc
-        apt:
-          packages:
-            - g++-8
-      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      addons: &gcc_ppc
-        apt:
-          packages:
-            - g++-8
-      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      addons: &gcc_ppc
-        apt:
-          packages:
-            - g++-8
-      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      addons: &gcc_ppc
-        apt:
-          packages:
-            - g++-8
-      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      addons: &gcc_ppc
-        apt:
-          packages:
-            - g++-8
-      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      addons: &gcc_ppc
-        apt:
-          packages:
-            - g++-8
-      env: CC=gcc-8 CXX=g++-8 COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
       addons: *gcc_multilib
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
       addons: *gcc_multilib
@@ -366,14 +323,6 @@ matrix:
     - os: linux
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --enable-experimental-smallocx --enable-stats --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
 
-    # Valgrind
-    - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds" JEMALLOC_TEST_PREFIX="valgrind"
-      addons:
-        apt:
-          packages:
-            - valgrind
-
 
 before_script:
   - autoconf
diff --git a/configure.ac b/configure.ac
index 0748329d..17838003 100644
--- a/configure.ac
+++ b/configure.ac
@@ -258,6 +258,7 @@ if test "x$GCC" = "xyes" ; then
   JE_CFLAGS_ADD([-Wno-missing-braces])
   dnl This one too.
   JE_CFLAGS_ADD([-Wno-missing-field-initializers])
+  JE_CFLAGS_ADD([-Wno-missing-attributes])
   JE_CFLAGS_ADD([-pipe])
   JE_CFLAGS_ADD([-g3])
 elif test "x$je_cv_msvc" = "xyes" ; then
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index 6832f91b..992bf005 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -1,10 +1,10 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 from itertools import combinations
 
 travis_template = """\
 language: generic
-dist: precise
+dist: focal
 
 matrix:
   include:
@@ -30,7 +30,6 @@ script:
 # travis though, we don't test all 2**7 = 128 possible combinations of these;
 # instead, we only test combinations of up to 2 'unusual' settings, under the
 # hope that bugs involving interactions of such settings are rare.
-# Things at once, for C(7, 0) + C(7, 1) + C(7, 2) = 29
 MAX_UNUSUAL_OPTIONS = 2
 
 os_default = 'linux'
@@ -41,7 +40,6 @@ arch_unusual = 'ppc64le'
 
 compilers_default = 'CC=gcc CXX=g++'
 compilers_unusual = 'CC=clang CXX=clang++'
-compilers_ppc_default = 'CC=gcc-8 CXX=g++-8'
 
 compiler_flag_unusuals = ['-m32']
 
@@ -67,7 +65,7 @@ all_unusuals = (
 )
 
 unusual_combinations_to_test = []
-for i in xrange(MAX_UNUSUAL_OPTIONS + 1):
+for i in range(MAX_UNUSUAL_OPTIONS + 1):
     unusual_combinations_to_test += combinations(all_unusuals, i)
 
 gcc_multilib_set = False
@@ -117,24 +115,24 @@ def format_job(combination):
             job += '        apt:\n'
             job += '          packages:\n'
             job += '            - gcc-multilib\n'
+            job += '            - g++-multilib\n'
             gcc_multilib_set = True
 
-    if arch == 'ppc64le':
-        job += '      addons:'
-        if gcc_ppc_set:
-            job += ' *gcc_ppc\n'
-        else:
-            job += ' &gcc_ppc\n'
-            job += '        apt:\n'
-            job += '          packages:\n'
-            job += '            - g++-8\n'
-        # Compilers overwritten for PPC64LE to gcc-8
-        compilers = compilers_ppc_default
-
     # We get some spurious errors when -Warray-bounds is enabled.
+    extra_cflags = ['-Werror', '-Wno-array-bounds']
+    if 'clang' in compilers or os == 'osx':
+        extra_cflags += [
+	    '-Wno-unknown-warning-option',
+	    '-Wno-ignored-attributes'
+	]
+    if os == 'osx':
+        extra_cflags += [
+	    '-Wno-deprecated-declarations',
+	]
     env_string = ('{} COMPILER_FLAGS="{}" CONFIGURE_FLAGS="{}" '
-                'EXTRA_CFLAGS="-Werror -Wno-array-bounds"').format(
-                compilers, " ".join(compiler_flags), " ".join(configure_flags))
+        'EXTRA_CFLAGS="{}"'.format(
+        compilers, ' '.join(compiler_flags), ' '.join(configure_flags),
+        ' '.join(extra_cflags)))
 
     job += '      env: %s\n' % env_string
     return job
@@ -157,16 +155,19 @@ include_rows += '''\
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --enable-experimental-smallocx --enable-stats --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
 '''
 
+# Does not seem to be working on newer travis machines. Valgrind has long been a
+# pain point; abandon it for now.
 # Valgrind build bots
-include_rows += '''
-    # Valgrind
-    - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds" JEMALLOC_TEST_PREFIX="valgrind"
-      addons:
-        apt:
-          packages:
-            - valgrind
-'''
+#include_rows += '''
+#    # Valgrind
+#    - os: linux
+#      arch: amd64
+#      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds" JEMALLOC_TEST_PREFIX="valgrind"
+#      addons:
+#        apt:
+#          packages:
+#            - valgrind
+#'''
 
 # To enable valgrind on macosx add:
 #
@@ -176,4 +177,4 @@ include_rows += '''
 #
 # It currently fails due to: https://github.com/jemalloc/jemalloc/issues/1274
 
-print travis_template % include_rows
+print(travis_template % include_rows)
diff --git a/src/stats.c b/src/stats.c
index ef173034..2e8c4516 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -947,7 +947,7 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	emitter_table_row(emitter, &header_row);
 	emitter_json_array_kv_begin(emitter, "nonfull_slabs");
 	bool in_gap = false;
-	for (pszind_t j = 0; j < PSSET_NPSIZES; j++) {
+	for (pszind_t j = 0; j < PSSET_NPSIZES && j < SC_NPSIZES; j++) {
 		stats_arenas_mib[5] = j;
 
 		CTL_LEAF(stats_arenas_mib, 6, "npageslabs_huge",
diff --git a/test/include/test/test.h b/test/include/test/test.h
index 2167e8c6..d4b65912 100644
--- a/test/include/test/test.h
+++ b/test/include/test/test.h
@@ -581,5 +581,3 @@ test_status_t	p_test_no_malloc_init(test_t *t, ...);
 void	p_test_init(const char *name);
 void	p_test_fini(void);
 void	p_test_fail(const char *prefix, const char *message);
-
-void strncpy_cond(void *dst, const char *src, bool cond);
diff --git a/test/src/test.c b/test/src/test.c
index 4583e55a..f97ce4d1 100644
--- a/test/src/test.c
+++ b/test/src/test.c
@@ -232,16 +232,3 @@ p_test_fail(const char *prefix, const char *message) {
 	malloc_cprintf(NULL, NULL, "%s%s\n", prefix, message);
 	test_status = test_status_fail;
 }
-
-void
-strncpy_cond(void *dst, const char *src, bool cond) {
-	if (cond) {
-		/*
-		 * Avoid strcpy and explicitly set length to 0 because the
-		 * `stringop-overflow` check may warn even if the specific test
-		 * is unreachable.
-		 */
-		size_t n = cond ? strlen(src) + 1 : 0;
-		strncpy(dst, src, n);
-	}
-}
diff --git a/test/unit/fb.c b/test/unit/fb.c
index d5126f6b..ad72c75a 100644
--- a/test/unit/fb.c
+++ b/test/unit/fb.c
@@ -473,8 +473,8 @@ static void
 expect_iter_results_at(fb_group_t *fb, size_t nbits, size_t pos,
     bool val, bool forward) {
 	bool iter_res;
-	size_t iter_begin;
-	size_t iter_len;
+	size_t iter_begin JEMALLOC_CC_SILENCE_INIT(0);
+	size_t iter_len JEMALLOC_CC_SILENCE_INIT(0);
 	if (val) {
 		if (forward) {
 			iter_res = fb_srange_iter(fb, nbits, pos,
diff --git a/test/unit/log.c b/test/unit/log.c
index 02e6a6a6..c09b5896 100644
--- a/test/unit/log.c
+++ b/test/unit/log.c
@@ -4,7 +4,7 @@
 
 static void
 update_log_var_names(const char *names) {
-	strncpy_cond(log_var_names, names, config_log);
+	strncpy(log_var_names, names, sizeof(log_var_names));
 }
 
 static void

From 4452a4812ff8bc2a5127a9b220de05999a0652f1 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 21 Jun 2021 13:40:30 -0700
Subject: [PATCH 2073/2608] Add opt.experimental_infallible_new.

This allows a guarantee that operator new never throws.

Fix the .gitignore rules to include test/integration/cpp while we're here.
---
 .gitignore                                    |  1 +
 Makefile.in                                   |  4 +-
 configure.ac                                  |  3 +
 .../internal/jemalloc_internal_defs.h.in      |  3 +
 .../internal/jemalloc_internal_externs.h      |  1 +
 .../jemalloc/internal/jemalloc_preamble.h.in  |  9 +++
 src/jemalloc.c                                |  7 +++
 src/jemalloc_cpp.cpp                          |  7 ++-
 src/stats.c                                   |  1 +
 test/integration/cpp/basic.cpp                |  1 -
 test/integration/cpp/infallible_new_false.cpp | 23 +++++++
 test/integration/cpp/infallible_new_false.sh  |  8 +++
 test/integration/cpp/infallible_new_true.cpp  | 61 +++++++++++++++++++
 test/integration/cpp/infallible_new_true.sh   |  8 +++
 14 files changed, 134 insertions(+), 3 deletions(-)
 create mode 100644 test/integration/cpp/infallible_new_false.cpp
 create mode 100644 test/integration/cpp/infallible_new_false.sh
 create mode 100644 test/integration/cpp/infallible_new_true.cpp
 create mode 100644 test/integration/cpp/infallible_new_true.sh

diff --git a/.gitignore b/.gitignore
index 0c3c040e..1c0b3385 100644
--- a/.gitignore
+++ b/.gitignore
@@ -52,6 +52,7 @@ test/include/test/jemalloc_test.h
 test/include/test/jemalloc_test_defs.h
 
 /test/integration/[A-Za-z]*
+!/test/integration/cpp/
 !/test/integration/[A-Za-z]*.*
 /test/integration/*.[od]
 /test/integration/*.out
diff --git a/Makefile.in b/Makefile.in
index 130fa1ee..c36b818b 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -309,7 +309,9 @@ TESTS_INTEGRATION += \
 endif
 ifeq (@enable_cxx@, 1)
 CPP_SRCS := $(srcroot)src/jemalloc_cpp.cpp
-TESTS_INTEGRATION_CPP := $(srcroot)test/integration/cpp/basic.cpp
+TESTS_INTEGRATION_CPP := $(srcroot)test/integration/cpp/basic.cpp \
+	$(srcroot)test/integration/cpp/infallible_new_true.cpp \
+	$(srcroot)test/integration/cpp/infallible_new_false.cpp
 else
 CPP_SRCS :=
 TESTS_INTEGRATION_CPP :=
diff --git a/configure.ac b/configure.ac
index 17838003..5eb4d46f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -324,6 +324,9 @@ if test "x$enable_cxx" = "x1" ; then
     enable_cxx="0"
   fi
 fi
+if test "x$enable_cxx" = "x1"; then
+  AC_DEFINE([JEMALLOC_ENABLE_CXX], [ ])
+fi
 AC_SUBST([enable_cxx])
 AC_SUBST([CONFIGURE_CXXFLAGS])
 AC_SUBST([SPECIFIED_CXXFLAGS])
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 093c8be0..78d1213e 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -401,6 +401,9 @@
 /* Performs additional safety checks when defined. */
 #undef JEMALLOC_OPT_SAFETY_CHECKS
 
+/* Is C++ support being built? */
+#undef JEMALLOC_ENABLE_CXX
+
 /* Performs additional size checks when defined. */
 #undef JEMALLOC_OPT_SIZE_CHECKS
 
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index de5731fc..af6dc0a2 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -26,6 +26,7 @@ extern void (*junk_free_callback)(void *ptr, size_t size);
 extern void (*junk_alloc_callback)(void *ptr, size_t size);
 extern bool opt_utrace;
 extern bool opt_xmalloc;
+extern bool opt_experimental_infallible_new;
 extern bool opt_zero;
 extern unsigned opt_narenas;
 extern zero_realloc_action_t opt_zero_realloc_action;
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index ef1cbaee..f5d83a66 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -198,6 +198,15 @@ static const bool config_opt_size_checks =
 #endif
     ;
 
+/* Whether or not the C++ extensions are enabled. */
+static const bool config_enable_cxx =
+#ifdef JEMALLOC_ENABLE_CXX
+    true
+#else
+    false
+#endif
+;
+
 #if defined(_WIN32) || defined(JEMALLOC_HAVE_SCHED_GETCPU)
 /* Currently percpu_arena depends on sched_getcpu. */
 #define JEMALLOC_PERCPU_ARENA
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 1f489932..c70244d1 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -141,6 +141,7 @@ void (*junk_free_callback)(void *ptr, size_t size) = &default_junk_free;
 
 bool	opt_utrace = false;
 bool	opt_xmalloc = false;
+bool	opt_experimental_infallible_new = false;
 bool	opt_zero = false;
 unsigned	opt_narenas = 0;
 fxp_t		opt_narenas_ratio = FXP_INIT_INT(4);
@@ -1307,6 +1308,12 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			if (config_xmalloc) {
 				CONF_HANDLE_BOOL(opt_xmalloc, "xmalloc")
 			}
+			if (config_enable_cxx) {
+				CONF_HANDLE_BOOL(
+				    opt_experimental_infallible_new,
+				    "experimental_infallible_new")
+			}
+
 			CONF_HANDLE_BOOL(opt_tcache, "tcache")
 			CONF_HANDLE_SIZE_T(opt_tcache_max, "tcache_max",
 			    0, TCACHE_MAXCLASS_LIMIT, CONF_DONT_CHECK_MIN,
diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index 47ba92a0..451655f1 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -56,6 +56,12 @@ void	operator delete[](void* ptr, std::size_t size, std::align_val_t al) noexcep
 JEMALLOC_NOINLINE
 static void *
 handleOOM(std::size_t size, bool nothrow) {
+	if (opt_experimental_infallible_new) {
+		safety_check_fail("<jemalloc>: Allocation failed and "
+		    "opt.experimental_infallible_new is true. Aborting.\n");
+		return nullptr;
+	}
+
 	void *ptr = nullptr;
 
 	while (ptr == nullptr) {
@@ -93,7 +99,6 @@ fallback_impl(std::size_t size) noexcept(IsNoExcept) {
 	if (likely(ptr != nullptr)) {
 		return ptr;
 	}
-
 	return handleOOM(size, IsNoExcept);
 }
 
diff --git a/src/stats.c b/src/stats.c
index 2e8c4516..34cae0ab 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1501,6 +1501,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_BOOL("zero")
 	OPT_WRITE_BOOL("utrace")
 	OPT_WRITE_BOOL("xmalloc")
+	OPT_WRITE_BOOL("experimental_infallible_new")
 	OPT_WRITE_BOOL("tcache")
 	OPT_WRITE_SIZE_T("tcache_max")
 	OPT_WRITE_UNSIGNED("tcache_nslots_small_min")
diff --git a/test/integration/cpp/basic.cpp b/test/integration/cpp/basic.cpp
index b48ec8aa..c1cf6cd8 100644
--- a/test/integration/cpp/basic.cpp
+++ b/test/integration/cpp/basic.cpp
@@ -1,4 +1,3 @@
-#include <memory>
 #include "test/jemalloc_test.h"
 
 TEST_BEGIN(test_basic) {
diff --git a/test/integration/cpp/infallible_new_false.cpp b/test/integration/cpp/infallible_new_false.cpp
new file mode 100644
index 00000000..42196d6a
--- /dev/null
+++ b/test/integration/cpp/infallible_new_false.cpp
@@ -0,0 +1,23 @@
+#include <memory>
+
+#include "test/jemalloc_test.h"
+
+TEST_BEGIN(test_failing_alloc) {
+	bool saw_exception = false;
+	try {
+		/* Too big of an allocation to succeed. */
+		void *volatile ptr = ::operator new((size_t)-1);
+		(void)ptr;
+	} catch (...) {
+		saw_exception = true;
+	}
+	expect_true(saw_exception, "Didn't get a failure");
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_failing_alloc);
+}
+
diff --git a/test/integration/cpp/infallible_new_false.sh b/test/integration/cpp/infallible_new_false.sh
new file mode 100644
index 00000000..7d41812c
--- /dev/null
+++ b/test/integration/cpp/infallible_new_false.sh
@@ -0,0 +1,8 @@
+#!/bin/sh
+
+XMALLOC_STR=""
+if [ "x${enable_xmalloc}" = "x1" ] ; then
+  XMALLOC_STR="xmalloc:false,"
+fi
+
+export MALLOC_CONF="${XMALLOC_STR}experimental_infallible_new:false"
diff --git a/test/integration/cpp/infallible_new_true.cpp b/test/integration/cpp/infallible_new_true.cpp
new file mode 100644
index 00000000..9b943bd4
--- /dev/null
+++ b/test/integration/cpp/infallible_new_true.cpp
@@ -0,0 +1,61 @@
+#include <stdio.h>
+
+/*
+ * We can't test C++ in unit tests, and we can't change the safety check failure
+ * hook in integration tests.  So we check that we *actually* abort on failure,
+ * by forking and checking the child process exit code.
+ */
+
+/* It's a unix system? */
+#ifdef __unix__
+/* I know this! */
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/wait.h>
+static const bool can_fork = true;
+#else
+static const bool can_fork = false;
+#endif
+
+#include "test/jemalloc_test.h"
+
+TEST_BEGIN(test_failing_alloc) {
+	test_skip_if(!can_fork);
+#ifdef __unix__
+	pid_t pid = fork();
+	expect_d_ne(pid, -1, "Unexpected fork failure");
+	if (pid == 0) {
+		/*
+		 * In the child, we'll print an error message to stderr before
+		 * exiting.  Close stderr to avoid spamming output for this
+		 * expected failure.
+		 */
+		fclose(stderr);
+		try {
+			/* Too big of an allocation to succeed. */
+			void *volatile ptr = ::operator new((size_t)-1);
+			(void)ptr;
+		} catch (...) {
+			/*
+			 * Swallow the exception; remember, we expect this to
+			 * fail via an abort within new, not because an
+			 * exception didn't get caught.
+			 */
+		}
+	} else {
+		int status;
+		pid_t err = waitpid(pid, &status, 0);
+		expect_d_ne(-1, err, "waitpid failure");
+		expect_false(WIFEXITED(status),
+		    "Should have seen an abnormal failure");
+	}
+#endif
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_failing_alloc);
+}
+
diff --git a/test/integration/cpp/infallible_new_true.sh b/test/integration/cpp/infallible_new_true.sh
new file mode 100644
index 00000000..4a0ff542
--- /dev/null
+++ b/test/integration/cpp/infallible_new_true.sh
@@ -0,0 +1,8 @@
+#!/bin/sh
+
+XMALLOC_STR=""
+if [ "x${enable_xmalloc}" = "x1" ] ; then
+  XMALLOC_STR="xmalloc:false,"
+fi
+
+export MALLOC_CONF="${XMALLOC_STR}experimental_infallible_new:true"

From de033f56c08745500f98b590f5138ddc4a5c0732 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 11 May 2021 14:49:55 -0700
Subject: [PATCH 2074/2608] mpsc_queue: Add module.

This is a simple multi-producer, single-consumer queue.  The intended use case
is in the HPA, as we begin supporting hpdatas that move between hpa_shards.  We
take just a single CAS as the cost to send a message (or a batch of messages) in
the low-contention case, and lock-freedom lets us avoid some lock-ordering
issues.
---
 Makefile.in                            |   1 +
 include/jemalloc/internal/mpsc_queue.h | 134 +++++++++++
 test/unit/mpsc_queue.c                 | 304 +++++++++++++++++++++++++
 3 files changed, 439 insertions(+)
 create mode 100644 include/jemalloc/internal/mpsc_queue.h
 create mode 100644 test/unit/mpsc_queue.c

diff --git a/Makefile.in b/Makefile.in
index c36b818b..ed03d4e2 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -233,6 +233,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/malloc_conf_2.c \
 	$(srcroot)test/unit/malloc_io.c \
 	$(srcroot)test/unit/math.c \
+	$(srcroot)test/unit/mpsc_queue.c \
 	$(srcroot)test/unit/mq.c \
 	$(srcroot)test/unit/mtx.c \
 	$(srcroot)test/unit/nstime.c \
diff --git a/include/jemalloc/internal/mpsc_queue.h b/include/jemalloc/internal/mpsc_queue.h
new file mode 100644
index 00000000..316ea9b1
--- /dev/null
+++ b/include/jemalloc/internal/mpsc_queue.h
@@ -0,0 +1,134 @@
+#ifndef JEMALLOC_INTERNAL_MPSC_QUEUE_H
+#define JEMALLOC_INTERNAL_MPSC_QUEUE_H
+
+#include "jemalloc/internal/atomic.h"
+
+/*
+ * A concurrent implementation of a multi-producer, single-consumer queue.  It
+ * supports three concurrent operations:
+ * - Push
+ * - Push batch
+ * - Pop batch
+ *
+ * These operations are all lock-free.
+ *
+ * The implementation is the simple two-stack queue built on a Treiber stack.
+ * It's not terribly efficient, but this isn't expected to go into anywhere with
+ * hot code.  In fact, we don't really even need queue semantics in any
+ * anticipated use cases; we could get away with just the stack.  But this way
+ * lets us frame the API in terms of the existing list types, which is a nice
+ * convenience.  We can save on cache misses by introducing our own (parallel)
+ * single-linked list type here, and dropping FIFO semantics, if we need this to
+ * get faster.  Since we're currently providing queue semantics though, we use
+ * the prev field in the link rather than the next field for Treiber-stack
+ * linkage, so that we can preserve order for bash-pushed lists (recall that the
+ * two-stack tricks reverses orders in the lock-free first stack).
+ */
+
+#define mpsc_queue(a_type)						\
+struct {								\
+	atomic_p_t tail;						\
+}
+
+#define mpsc_queue_proto(a_attr, a_prefix, a_queue_type, a_type,	\
+    a_list_type)							\
+/* Initialize a queue. */						\
+a_attr void								\
+a_prefix##new(a_queue_type *queue);					\
+/* Insert all items in src into the queue, clearing src. */		\
+a_attr void								\
+a_prefix##push_batch(a_queue_type *queue, a_list_type *src);		\
+/* Insert node into the queue. */					\
+a_attr void								\
+a_prefix##push(a_queue_type *queue, a_type *node);			\
+/*									\
+ * Pop all items in the queue into the list at dst.  dst should already	\
+ * be initialized (and may contain existing items, which then remain	\
+ * in dst).								\
+ */									\
+a_attr void								\
+a_prefix##pop_batch(a_queue_type *queue, a_list_type *dst);
+
+#define mpsc_queue_gen(a_attr, a_prefix, a_queue_type, a_type,		\
+    a_list_type, a_link)						\
+a_attr void								\
+a_prefix##new(a_queue_type *queue) {					\
+	atomic_store_p(&queue->tail, NULL, ATOMIC_RELAXED);		\
+}									\
+a_attr void								\
+a_prefix##push_batch(a_queue_type *queue, a_list_type *src) {		\
+	/*								\
+	 * Reuse the ql list next field as the Treiber stack next	\
+	 * field.							\
+	 */								\
+	a_type *first = ql_first(src);					\
+	a_type *last = ql_last(src, a_link);				\
+	void* cur_tail = atomic_load_p(&queue->tail, ATOMIC_RELAXED);	\
+	do {								\
+		/*							\
+		 * Note that this breaks the queue ring structure;	\
+		 * it's not a ring any more!				\
+		 */							\
+		first->a_link.qre_prev = cur_tail;			\
+		/*							\
+		 * Note: the upcoming CAS doesn't need an atomic; every	\
+		 * push only needs to synchronize with the next pop,	\
+		 * which we get from the release sequence rules.	\
+		 */							\
+	} while (!atomic_compare_exchange_weak_p(&queue->tail,		\
+	    &cur_tail, last, ATOMIC_RELEASE, ATOMIC_RELAXED));		\
+	ql_new(src);							\
+}									\
+a_attr void								\
+a_prefix##push(a_queue_type *queue, a_type *node) {			\
+	ql_elm_new(node, a_link);					\
+	a_list_type list;						\
+	ql_new(&list);							\
+	ql_head_insert(&list, node, a_link);				\
+	a_prefix##push_batch(queue, &list);				\
+}									\
+a_attr void								\
+a_prefix##pop_batch(a_queue_type *queue, a_list_type *dst) {		\
+	a_type *tail = atomic_load_p(&queue->tail, ATOMIC_RELAXED);	\
+	if (tail == NULL) {						\
+		/*							\
+		 * In the common special case where there are no	\
+		 * pending elements, bail early without a costly RMW.	\
+		 */							\
+		return;							\
+	}								\
+	tail = atomic_exchange_p(&queue->tail, NULL, ATOMIC_ACQUIRE);	\
+	/*								\
+	 * It's a single-consumer queue, so if cur started non-NULL,	\
+	 * it'd better stay non-NULL.					\
+	 */								\
+	assert(tail != NULL);						\
+	/*								\
+	 * We iterate through the stack and both fix up the link	\
+	 * structure (stack insertion broke the list requirement that	\
+	 * the list be circularly linked).  It's just as efficient at	\
+	 * this point to make the queue a "real" queue, so do that as	\
+	 * well.							\
+	 * If this ever gets to be a hot spot, we can omit this fixup	\
+	 * and make the queue a bag (i.e. not necessarily ordered), but	\
+	 * that would mean jettisoning the existing list API as the 	\
+	 * batch pushing/popping interface.				\
+	 */								\
+	a_list_type reversed;						\
+	ql_new(&reversed);						\
+	while (tail != NULL) {						\
+		/*							\
+		 * Pop an item off the stack, prepend it onto the list	\
+		 * (reversing the order).  Recall that we use the	\
+		 * list prev field as the Treiber stack next field to	\
+		 * preserve order of batch-pushed items when reversed.	\
+		 */							\
+		a_type *next = tail->a_link.qre_prev;			\
+		ql_elm_new(tail, a_link);				\
+		ql_head_insert(&reversed, tail, a_link);		\
+		tail = next;						\
+	}								\
+	ql_concat(dst, &reversed, a_link);				\
+}
+
+#endif /* JEMALLOC_INTERNAL_MPSC_QUEUE_H */
diff --git a/test/unit/mpsc_queue.c b/test/unit/mpsc_queue.c
new file mode 100644
index 00000000..895edf84
--- /dev/null
+++ b/test/unit/mpsc_queue.c
@@ -0,0 +1,304 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/mpsc_queue.h"
+
+typedef struct elem_s elem_t;
+typedef ql_head(elem_t) elem_list_t;
+typedef mpsc_queue(elem_t) elem_mpsc_queue_t;
+struct elem_s {
+	int thread;
+	int idx;
+	ql_elm(elem_t) link;
+};
+
+/* Include both proto and gen to make sure they match up. */
+mpsc_queue_proto(static, elem_mpsc_queue_, elem_mpsc_queue_t, elem_t,
+    elem_list_t);
+mpsc_queue_gen(static, elem_mpsc_queue_, elem_mpsc_queue_t, elem_t,
+    elem_list_t, link);
+
+static void
+init_elems_simple(elem_t *elems, int nelems, int thread) {
+	for (int i = 0; i < nelems; i++) {
+		elems[i].thread = thread;
+		elems[i].idx = i;
+		ql_elm_new(&elems[i], link);
+	}
+}
+
+static void
+check_elems_simple(elem_list_t *list, int nelems, int thread) {
+	elem_t *elem;
+	int next_idx = 0;
+	ql_foreach(elem, list, link) {
+		expect_d_lt(next_idx, nelems, "Too many list items");
+		expect_d_eq(thread, elem->thread, "");
+		expect_d_eq(next_idx, elem->idx, "List out of order");
+		next_idx++;
+	}
+}
+
+TEST_BEGIN(test_simple) {
+	enum {NELEMS = 10};
+	elem_t elems[NELEMS];
+	elem_list_t list;
+	elem_mpsc_queue_t queue;
+
+	/* Pop empty queue onto empty list -> empty list */
+	ql_new(&list);
+	elem_mpsc_queue_new(&queue);
+	elem_mpsc_queue_pop_batch(&queue, &list);
+	expect_true(ql_empty(&list), "");
+
+	/* Pop empty queue onto nonempty list -> list unchanged */
+	ql_new(&list);
+	elem_mpsc_queue_new(&queue);
+	init_elems_simple(elems, NELEMS, 0);
+	for (int i = 0; i < NELEMS; i++) {
+		ql_tail_insert(&list, &elems[i], link);
+	}
+	elem_mpsc_queue_pop_batch(&queue, &list);
+	check_elems_simple(&list, NELEMS, 0);
+
+	/* Pop nonempty queue onto empty list -> list takes queue contents */
+	ql_new(&list);
+	elem_mpsc_queue_new(&queue);
+	init_elems_simple(elems, NELEMS, 0);
+	for (int i = 0; i < NELEMS; i++) {
+		elem_mpsc_queue_push(&queue, &elems[i]);
+	}
+	elem_mpsc_queue_pop_batch(&queue, &list);
+	check_elems_simple(&list, NELEMS, 0);
+
+	/* Pop nonempty queue onto nonempty list -> list gains queue contents */
+	ql_new(&list);
+	elem_mpsc_queue_new(&queue);
+	init_elems_simple(elems, NELEMS, 0);
+	for (int i = 0; i < NELEMS / 2; i++) {
+		ql_tail_insert(&list, &elems[i], link);
+	}
+	for (int i = NELEMS / 2; i < NELEMS; i++) {
+		elem_mpsc_queue_push(&queue, &elems[i]);
+	}
+	elem_mpsc_queue_pop_batch(&queue, &list);
+	check_elems_simple(&list, NELEMS, 0);
+
+}
+TEST_END
+
+TEST_BEGIN(test_push_single_or_batch) {
+	enum {
+		BATCH_MAX = 10,
+		/*
+		 * We'll push i items one-at-a-time, then i items as a batch,
+		 * then i items as a batch again, as i ranges from 1 to
+		 * BATCH_MAX.  So we need 3 times the sum of the numbers from 1
+		 * to BATCH_MAX elements total.
+		 */
+		NELEMS = 3 * BATCH_MAX * (BATCH_MAX - 1) / 2
+	};
+	elem_t elems[NELEMS];
+	init_elems_simple(elems, NELEMS, 0);
+	elem_list_t list;
+	ql_new(&list);
+	elem_mpsc_queue_t queue;
+	elem_mpsc_queue_new(&queue);
+	int next_idx = 0;
+	for (int i = 1; i < 10; i++) {
+		/* Push i items 1 at a time. */
+		for (int j = 0; j < i; j++) {
+			elem_mpsc_queue_push(&queue, &elems[next_idx]);
+			next_idx++;
+		}
+		/* Push i items in batch. */
+		for (int j = 0; j < i; j++) {
+			ql_tail_insert(&list, &elems[next_idx], link);
+			next_idx++;
+		}
+		elem_mpsc_queue_push_batch(&queue, &list);
+		expect_true(ql_empty(&list), "Batch push should empty source");
+		/*
+		 * Push i items in batch, again.  This tests two batches
+		 * proceeding one after the other.
+		 */
+		for (int j = 0; j < i; j++) {
+			ql_tail_insert(&list, &elems[next_idx], link);
+			next_idx++;
+		}
+		elem_mpsc_queue_push_batch(&queue, &list);
+		expect_true(ql_empty(&list), "Batch push should empty source");
+	}
+	expect_d_eq(NELEMS, next_idx, "Miscomputed number of elems to push.");
+
+	expect_true(ql_empty(&list), "");
+	elem_mpsc_queue_pop_batch(&queue, &list);
+	check_elems_simple(&list, NELEMS, 0);
+}
+TEST_END
+
+TEST_BEGIN(test_multi_op) {
+	enum {NELEMS = 20};
+	elem_t elems[NELEMS];
+	init_elems_simple(elems, NELEMS, 0);
+	elem_list_t push_list;
+	ql_new(&push_list);
+	elem_list_t result_list;
+	ql_new(&result_list);
+	elem_mpsc_queue_t queue;
+	elem_mpsc_queue_new(&queue);
+
+	int next_idx = 0;
+	/* Push first quarter 1-at-a-time. */
+	for (int i = 0; i < NELEMS / 4; i++) {
+		elem_mpsc_queue_push(&queue, &elems[next_idx]);
+		next_idx++;
+	}
+	/* Push second quarter in batch. */
+	for (int i = NELEMS / 4; i < NELEMS / 2; i++) {
+		ql_tail_insert(&push_list, &elems[next_idx], link);
+		next_idx++;
+	}
+	elem_mpsc_queue_push_batch(&queue, &push_list);
+	/* Batch pop all pushed elements. */
+	elem_mpsc_queue_pop_batch(&queue, &result_list);
+	/* Push third quarter in batch. */
+	for (int i = NELEMS / 2; i < 3 * NELEMS / 4; i++) {
+		ql_tail_insert(&push_list, &elems[next_idx], link);
+		next_idx++;
+	}
+	elem_mpsc_queue_push_batch(&queue, &push_list);
+	/* Push last quarter one-at-a-time. */
+	for (int i = 3 * NELEMS / 4; i < NELEMS; i++) {
+		elem_mpsc_queue_push(&queue, &elems[next_idx]);
+		next_idx++;
+	}
+	/* Pop them again.  Order of existing list should be preserved. */
+	elem_mpsc_queue_pop_batch(&queue, &result_list);
+
+	check_elems_simple(&result_list, NELEMS, 0);
+
+}
+TEST_END
+
+typedef struct pusher_arg_s pusher_arg_t;
+struct pusher_arg_s {
+	elem_mpsc_queue_t *queue;
+	int thread;
+	elem_t *elems;
+	int nelems;
+};
+
+typedef struct popper_arg_s popper_arg_t;
+struct popper_arg_s {
+	elem_mpsc_queue_t *queue;
+	int npushers;
+	int nelems_per_pusher;
+	int *pusher_counts;
+};
+
+static void *
+thd_pusher(void *void_arg) {
+	pusher_arg_t *arg = (pusher_arg_t *)void_arg;
+	int next_idx = 0;
+	while (next_idx < arg->nelems) {
+		/* Push 10 items in batch. */
+		elem_list_t list;
+		ql_new(&list);
+		int limit = next_idx + 10;
+		while (next_idx < arg->nelems && next_idx < limit) {
+			ql_tail_insert(&list, &arg->elems[next_idx], link);
+			next_idx++;
+		}
+		elem_mpsc_queue_push_batch(arg->queue, &list);
+		/* Push 10 items one-at-a-time. */
+		limit = next_idx + 10;
+		while (next_idx < arg->nelems && next_idx < limit) {
+			elem_mpsc_queue_push(arg->queue, &arg->elems[next_idx]);
+			next_idx++;
+		}
+
+	}
+	return NULL;
+}
+
+static void *
+thd_popper(void *void_arg) {
+	popper_arg_t *arg = (popper_arg_t *)void_arg;
+	int done_pushers = 0;
+	while (done_pushers < arg->npushers) {
+		elem_list_t list;
+		ql_new(&list);
+		elem_mpsc_queue_pop_batch(arg->queue, &list);
+		elem_t *elem;
+		ql_foreach(elem, &list, link) {
+			int thread = elem->thread;
+			int idx = elem->idx;
+			expect_d_eq(arg->pusher_counts[thread], idx,
+			    "Thread's pushes reordered");
+			arg->pusher_counts[thread]++;
+			if (arg->pusher_counts[thread]
+			    == arg->nelems_per_pusher) {
+				done_pushers++;
+			}
+		}
+	}
+	return NULL;
+}
+
+TEST_BEGIN(test_multiple_threads) {
+	enum {
+		NPUSHERS = 4,
+		NELEMS_PER_PUSHER = 1000*1000,
+	};
+	thd_t pushers[NPUSHERS];
+	pusher_arg_t pusher_arg[NPUSHERS];
+
+	thd_t popper;
+	popper_arg_t popper_arg;
+
+	elem_mpsc_queue_t queue;
+	elem_mpsc_queue_new(&queue);
+
+	elem_t *elems = calloc(NPUSHERS * NELEMS_PER_PUSHER, sizeof(elem_t));
+	elem_t *elem_iter = elems;
+	for (int i = 0; i < NPUSHERS; i++) {
+		pusher_arg[i].queue = &queue;
+		pusher_arg[i].thread = i;
+		pusher_arg[i].elems = elem_iter;
+		pusher_arg[i].nelems = NELEMS_PER_PUSHER;
+
+		init_elems_simple(elem_iter, NELEMS_PER_PUSHER, i);
+		elem_iter += NELEMS_PER_PUSHER;
+	}
+	popper_arg.queue = &queue;
+	popper_arg.npushers = NPUSHERS;
+	popper_arg.nelems_per_pusher = NELEMS_PER_PUSHER;
+	int pusher_counts[NPUSHERS] = {0};
+	popper_arg.pusher_counts = pusher_counts;
+
+	thd_create(&popper, thd_popper, (void *)&popper_arg);
+	for (int i = 0; i < NPUSHERS; i++) {
+		thd_create(&pushers[i], thd_pusher, &pusher_arg[i]);
+	}
+
+	thd_join(popper, NULL);
+	for (int i = 0; i < NPUSHERS; i++) {
+		thd_join(pushers[i], NULL);
+	}
+
+	for (int i = 0; i < NPUSHERS; i++) {
+		expect_d_eq(NELEMS_PER_PUSHER, pusher_counts[i], "");
+	}
+
+	free(elems);
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_simple,
+	    test_push_single_or_batch,
+	    test_multi_op,
+	    test_multiple_threads);
+}

From d202218e865a14d8fcff5c41682719a07434518c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 3 Jun 2021 17:14:43 -0700
Subject: [PATCH 2075/2608] HPA: Fix typos with big performance implications.

This fixes two simple but significant typos in the HPA:
- The conf string parsing accidentally set a min value of PAGE for
  hpa_sec_batch_fill_extra; i.e. allocating 4096 extra pages every time we
  attempted to allocate a single page.  This puts us over the SEC flush limit,
  so we then immediately flush all but one of them (probably triggering
  purging).
- The HPA was using the default PAI batch alloc implementation, which meant it
  did not actually get any locking advantages.

This snuck by because I did all the performance testing without using the PAI
interface or config settings.  When I cleaned it up and put everything behind
nice interfaces, I only did correctness checks, and didn't try any performance
ones.
---
 src/hpa.c      | 2 +-
 src/jemalloc.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/hpa.c b/src/hpa.c
index 22cf0072..8ef881fd 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -92,7 +92,7 @@ hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
 	 * operating on corrupted data.
 	 */
 	shard->pai.alloc = &hpa_alloc;
-	shard->pai.alloc_batch = &pai_alloc_batch_default;
+	shard->pai.alloc_batch = &hpa_alloc_batch;
 	shard->pai.expand = &hpa_expand;
 	shard->pai.shrink = &hpa_shrink;
 	shard->pai.dalloc = &hpa_dalloc;
diff --git a/src/jemalloc.c b/src/jemalloc.c
index c70244d1..6ff9f97b 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1489,8 +1489,8 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    "hpa_sec_bytes_after_flush", PAGE, 0,
 			    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
 			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.batch_fill_extra,
-			    "hpa_sec_batch_fill_extra", PAGE, 0, CONF_CHECK_MIN,
-			    CONF_DONT_CHECK_MAX, true);
+			    "hpa_sec_batch_fill_extra", 0, HUGEPAGE_PAGES,
+			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
 
 			if (CONF_MATCH("slab_sizes")) {
 				if (CONF_MATCH_VALUE("default")) {

From 9c42ed2d1491451dcc8cdb429ecf9ee46070054d Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 7 Jul 2021 15:16:38 -0700
Subject: [PATCH 2076/2608] Travis: Don't test "clang" on OS X.

On OS X, "gcc" is really just clang anyways, so this combination gets tested by
the gcc test.  This is purely redundant, and (since it runs early in the output)
increases time to signal for real breakages further down in the list.
---
 .travis.yml           | 3 ---
 scripts/gen_travis.py | 3 +++
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 6aea0581..5cf0e08e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -53,9 +53,6 @@ matrix:
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: osx
-      arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index 992bf005..fe9d8403 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -87,6 +87,9 @@ def format_job(combination):
       'percpu_arena:percpu' in malloc_conf or 'background_thread:true' \
       in malloc_conf):
         return ""
+    # gcc is just a redirect to clang on OS X. No need to test both.
+    if os == 'osx' and compilers_unusual in combination:
+        return ""
     if len(malloc_conf) > 0:
         configure_flags.append('--with-malloc-conf=' + ",".join(malloc_conf))
 

From 347523517bb90210ffeadf115730003531645394 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 8 Jul 2021 10:38:45 -0700
Subject: [PATCH 2077/2608] PAI: Fix a typo.

---
 include/jemalloc/internal/pai.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/pai.h b/include/jemalloc/internal/pai.h
index 16e022d5..4d3a9e01 100644
--- a/include/jemalloc/internal/pai.h
+++ b/include/jemalloc/internal/pai.h
@@ -61,7 +61,7 @@ pai_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 
 static inline void
 pai_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list) {
-	return self->dalloc_batch(tsdn, self, list);
+	self->dalloc_batch(tsdn, self, list);
 }
 
 /*

From 41fd56605e95c40650ab1d012b5e09c273b19490 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 3 Jun 2021 13:29:02 -0700
Subject: [PATCH 2078/2608] HPA: Purge across retained extents.

This lets us cut down on the number of expensive system calls we perform.
---
 include/jemalloc/internal/hpdata.h |  5 +-
 src/hpdata.c                       | 97 ++++++++++++++++++++++++------
 test/unit/hpdata.c                 | 81 +++++++++++++++++++++++--
 test/unit/psset.c                  |  2 +
 4 files changed, 160 insertions(+), 25 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 4ff2e575..32e26248 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -110,7 +110,7 @@ struct hpdata_s {
 	 */
 	size_t h_ntouched;
 
-	/* The dirty pages (using the same definition as above). */
+	/* The touched pages (using the same definition as above). */
 	fb_group_t touched_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
 };
 
@@ -356,6 +356,7 @@ void hpdata_unreserve(hpdata_t *hpdata, void *begin, size_t sz);
 typedef struct hpdata_purge_state_s hpdata_purge_state_t;
 struct hpdata_purge_state_s {
 	size_t npurged;
+	size_t ndirty_to_purge;
 	fb_group_t to_purge[FB_NGROUPS(HUGEPAGE_PAGES)];
 	size_t next_purge_search_begin;
 };
@@ -372,7 +373,7 @@ struct hpdata_purge_state_s {
  * until you're done, and then end.  Allocating out of an hpdata undergoing
  * purging is not allowed.
  *
- * Returns the number of pages that will be purged.
+ * Returns the number of dirty pages that will be purged.
  */
 size_t hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state);
 
diff --git a/src/hpdata.c b/src/hpdata.c
index b861e9e4..18519be3 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -166,33 +166,93 @@ hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
 size_t
 hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 	hpdata_assert_consistent(hpdata);
-	/* See the comment in reserve. */
+	/*
+	 * See the comment below; we might purge any inactive extent, so it's
+	 * unsafe for any other thread to turn any inactive extent active while
+	 * we're operating on it.
+	 */
+	assert(!hpdata_alloc_allowed_get(hpdata));
 
 	purge_state->npurged = 0;
 	purge_state->next_purge_search_begin = 0;
 
 	/*
-	 * Initialize to_purge with everything that's not active but that is
-	 * dirty.
+	 * Initialize to_purge.
 	 *
-	 * As an optimization, we could note that in practice we never allocate
-	 * out of a hugepage while purging within it, and so could try to
-	 * combine dirty extents separated by a non-dirty but non-active extent
-	 * to avoid purge calls.  This does nontrivially complicate metadata
-	 * tracking though, so let's hold off for now.
+	 * It's possible to end up in situations where two dirty extents are
+	 * separated by a retained extent:
+	 * - 1 page allocated.
+	 * - 1 page allocated.
+	 * - 1 pages allocated.
+	 *
+	 * If the middle page is freed and purged, and then the first and third
+	 * pages are freed, and then another purge pass happens, the hpdata
+	 * looks like this:
+	 * - 1 page dirty.
+	 * - 1 page retained.
+	 * - 1 page dirty.
+	 *
+	 * But it's safe to do a single 3-page purge.
+	 *
+	 * We do this by first computing the dirty pages, and then filling in
+	 * any gaps by extending each range in the dirty bitmap to extend until
+	 * the next active page.  This purges more pages, but the expensive part
+	 * of purging is the TLB shootdowns, rather than the kernel state
+	 * tracking; doing a little bit more of the latter is fine if it saves
+	 * us from doing some of the former.
 	 */
-	fb_bit_not(purge_state->to_purge, hpdata->active_pages, HUGEPAGE_PAGES);
-	fb_bit_and(purge_state->to_purge, purge_state->to_purge,
-	    hpdata->touched_pages, HUGEPAGE_PAGES);
 
-	/* We purge everything we can. */
-	size_t to_purge = hpdata->h_ntouched - hpdata->h_nactive;
-	assert(to_purge == fb_scount(
+	/*
+	 * The dirty pages are those that are touched but not active.  Note that
+	 * in a normal-ish case, HUGEPAGE_PAGES is something like 512 and the
+	 * fb_group_t is 64 bits, so this is 64 bytes, spread across 8
+	 * fb_group_ts.
+	 */
+	fb_group_t dirty_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
+	fb_init(dirty_pages, HUGEPAGE_PAGES);
+	fb_bit_not(dirty_pages, hpdata->active_pages, HUGEPAGE_PAGES);
+	fb_bit_and(dirty_pages, dirty_pages, hpdata->touched_pages,
+	    HUGEPAGE_PAGES);
+
+	fb_init(purge_state->to_purge, HUGEPAGE_PAGES);
+	size_t next_bit = 0;
+	while (next_bit < HUGEPAGE_PAGES) {
+		size_t next_dirty = fb_ffs(dirty_pages, HUGEPAGE_PAGES,
+		    next_bit);
+		/* Recall that fb_ffs returns nbits if no set bit is found. */
+		if (next_dirty == HUGEPAGE_PAGES) {
+			break;
+		}
+		size_t next_active = fb_ffs(hpdata->active_pages,
+		    HUGEPAGE_PAGES, next_dirty);
+		/*
+		 * Don't purge past the end of the dirty extent, into retained
+		 * pages.  This helps the kernel a tiny bit, but honestly it's
+		 * mostly helpful for testing (where we tend to write test cases
+		 * that think in terms of the dirty ranges).
+		 */
+		ssize_t last_dirty = fb_fls(dirty_pages, HUGEPAGE_PAGES,
+		    next_active - 1);
+		assert(last_dirty >= 0);
+		assert((size_t)last_dirty >= next_dirty);
+		assert((size_t)last_dirty - next_dirty + 1 <= HUGEPAGE_PAGES);
+
+		fb_set_range(purge_state->to_purge, HUGEPAGE_PAGES, next_dirty,
+		    last_dirty - next_dirty + 1);
+		next_bit = next_active + 1;
+	}
+
+	/* We should purge, at least, everything dirty. */
+	size_t ndirty = hpdata->h_ntouched - hpdata->h_nactive;
+	purge_state->ndirty_to_purge = ndirty;
+	assert(ndirty <= fb_scount(
 	    purge_state->to_purge, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
+	assert(ndirty == fb_scount(dirty_pages, HUGEPAGE_PAGES, 0,
+	    HUGEPAGE_PAGES));
 
 	hpdata_assert_consistent(hpdata);
 
-	return to_purge;
+	return ndirty;
 }
 
 bool
@@ -203,6 +263,7 @@ hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
 	 * hpdata without synchronization, and therefore have no right to expect
 	 * a consistent state.
 	 */
+	assert(!hpdata_alloc_allowed_get(hpdata));
 
 	if (purge_state->next_purge_search_begin == HUGEPAGE_PAGES) {
 		return false;
@@ -228,19 +289,21 @@ hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
 
 void
 hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
+	assert(!hpdata_alloc_allowed_get(hpdata));
 	hpdata_assert_consistent(hpdata);
 	/* See the comment in reserve. */
 	assert(!hpdata->h_in_psset || hpdata->h_updating);
 
 	assert(purge_state->npurged == fb_scount(purge_state->to_purge,
 	    HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
+	assert(purge_state->npurged >= purge_state->ndirty_to_purge);
 
 	fb_bit_not(purge_state->to_purge, purge_state->to_purge,
 	    HUGEPAGE_PAGES);
 	fb_bit_and(hpdata->touched_pages, hpdata->touched_pages,
 	    purge_state->to_purge, HUGEPAGE_PAGES);
-	assert(hpdata->h_ntouched >= purge_state->npurged);
-	hpdata->h_ntouched -= purge_state->npurged;
+	assert(hpdata->h_ntouched >= purge_state->ndirty_to_purge);
+	hpdata->h_ntouched -= purge_state->ndirty_to_purge;
 
 	hpdata_assert_consistent(hpdata);
 }
diff --git a/test/unit/hpdata.c b/test/unit/hpdata.c
index 11bccc58..288e71d4 100644
--- a/test/unit/hpdata.c
+++ b/test/unit/hpdata.c
@@ -67,6 +67,7 @@ TEST_BEGIN(test_purge_simple) {
 
 	expect_zu_eq(hpdata_ntouched_get(&hpdata), HUGEPAGE_PAGES / 2, "");
 
+	hpdata_alloc_allowed_set(&hpdata, false);
 	hpdata_purge_state_t purge_state;
 	size_t to_purge = hpdata_purge_begin(&hpdata, &purge_state);
 	expect_zu_eq(HUGEPAGE_PAGES / 4, to_purge, "");
@@ -90,11 +91,9 @@ TEST_BEGIN(test_purge_simple) {
 TEST_END
 
 /*
- * We only test intervening dalloc's not intervening allocs; we don't need
- * intervening allocs, and foreseeable optimizations will make them not just
- * unnecessary but incorrect.  In particular, if there are two dirty extents
- * separated only by a retained extent, we can just purge the entire range,
- * saving a purge call.
+ * We only test intervening dalloc's not intervening allocs; the latter are
+ * disallowed as a purging precondition (because they interfere with purging
+ * across a retained extent, saving a purge call).
  */
 TEST_BEGIN(test_purge_intervening_dalloc) {
 	hpdata_t hpdata;
@@ -112,6 +111,7 @@ TEST_BEGIN(test_purge_intervening_dalloc) {
 
 	expect_zu_eq(hpdata_ntouched_get(&hpdata), 3 * HUGEPAGE_PAGES / 4, "");
 
+	hpdata_alloc_allowed_set(&hpdata, false);
 	hpdata_purge_state_t purge_state;
 	size_t to_purge = hpdata_purge_begin(&hpdata, &purge_state);
 	expect_zu_eq(HUGEPAGE_PAGES / 2, to_purge, "");
@@ -137,7 +137,7 @@ TEST_BEGIN(test_purge_intervening_dalloc) {
 	expect_ptr_eq(
 	    (void *)((uintptr_t)alloc + 2 * HUGEPAGE_PAGES / 4 * PAGE),
 	    purge_addr, "");
-	expect_zu_eq(HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
+	expect_zu_ge(HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
 
 	got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
 	    &purge_size);
@@ -150,6 +150,74 @@ TEST_BEGIN(test_purge_intervening_dalloc) {
 }
 TEST_END
 
+TEST_BEGIN(test_purge_over_retained) {
+	void *purge_addr;
+	size_t purge_size;
+
+	hpdata_t hpdata;
+	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
+
+	/* Allocate the first 3/4 of the pages. */
+	void *alloc = hpdata_reserve_alloc(&hpdata, 3 * HUGEPAGE_PAGES / 4  * PAGE);
+	expect_ptr_eq(alloc, HPDATA_ADDR, "");
+
+	/* Free the second quarter. */
+	void *second_quarter =
+	    (void *)((uintptr_t)alloc + HUGEPAGE_PAGES / 4 * PAGE);
+	hpdata_unreserve(&hpdata, second_quarter, HUGEPAGE_PAGES / 4 * PAGE);
+
+	expect_zu_eq(hpdata_ntouched_get(&hpdata), 3 * HUGEPAGE_PAGES / 4, "");
+
+	/* Purge the second quarter. */
+	hpdata_alloc_allowed_set(&hpdata, false);
+	hpdata_purge_state_t purge_state;
+	size_t to_purge_dirty = hpdata_purge_begin(&hpdata, &purge_state);
+	expect_zu_eq(HUGEPAGE_PAGES / 4, to_purge_dirty, "");
+
+	bool got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
+	    &purge_size);
+	expect_true(got_result, "");
+	expect_ptr_eq(second_quarter, purge_addr, "");
+	expect_zu_eq(HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
+
+	got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
+	    &purge_size);
+	expect_false(got_result, "Unexpected additional purge range: "
+	    "extent at %p of size %zu", purge_addr, purge_size);
+	hpdata_purge_end(&hpdata, &purge_state);
+
+	expect_zu_eq(hpdata_ntouched_get(&hpdata), HUGEPAGE_PAGES / 2, "");
+
+	/* Free the first and third quarter. */
+	hpdata_unreserve(&hpdata, HPDATA_ADDR, HUGEPAGE_PAGES / 4 * PAGE);
+	hpdata_unreserve(&hpdata,
+	    (void *)((uintptr_t)alloc + 2 * HUGEPAGE_PAGES / 4 * PAGE),
+	    HUGEPAGE_PAGES / 4 * PAGE);
+
+	/*
+	 * Purge again.  The second quarter is retained, so we can safely
+	 * re-purge it.  We expect a single purge of 3/4 of the hugepage,
+	 * purging half its pages.
+	 */
+	to_purge_dirty = hpdata_purge_begin(&hpdata, &purge_state);
+	expect_zu_eq(HUGEPAGE_PAGES / 2, to_purge_dirty, "");
+
+	got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
+	    &purge_size);
+	expect_true(got_result, "");
+	expect_ptr_eq(HPDATA_ADDR, purge_addr, "");
+	expect_zu_eq(3 * HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
+
+	got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
+	    &purge_size);
+	expect_false(got_result, "Unexpected additional purge range: "
+	    "extent at %p of size %zu", purge_addr, purge_size);
+	hpdata_purge_end(&hpdata, &purge_state);
+
+	expect_zu_eq(hpdata_ntouched_get(&hpdata), 0, "");
+}
+TEST_END
+
 TEST_BEGIN(test_hugify) {
 	hpdata_t hpdata;
 	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
@@ -171,5 +239,6 @@ int main(void) {
 	    test_reserve_alloc,
 	    test_purge_simple,
 	    test_purge_intervening_dalloc,
+	    test_purge_over_retained,
 	    test_hugify);
 }
diff --git a/test/unit/psset.c b/test/unit/psset.c
index fde403e1..7bce7c1b 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -18,12 +18,14 @@ edata_init_test(edata_t *edata) {
 static void
 test_psset_fake_purge(hpdata_t *ps) {
 	hpdata_purge_state_t purge_state;
+	hpdata_alloc_allowed_set(ps, false);
 	hpdata_purge_begin(ps, &purge_state);
 	void *addr;
 	size_t size;
 	while (hpdata_purge_next(ps, &purge_state, &addr, &size)) {
 	}
 	hpdata_purge_end(ps, &purge_state);
+	hpdata_alloc_allowed_set(ps, true);
 }
 
 static void

From 47d8a7e6b04a81f2938f1b18f66cb468870fa442 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 3 Jun 2021 16:21:29 -0700
Subject: [PATCH 2079/2608] psset: Purge empty slabs first.

These are particularly good candidates for purging (listed in the diff).
---
 include/jemalloc/internal/psset.h |   9 ++-
 src/psset.c                       |  29 ++++++--
 test/unit/psset.c                 | 112 +++++++++++++++++++++++++++++-
 3 files changed, 143 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index 96fb300e..e1d64970 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -25,6 +25,9 @@
  * index 2*pszind), and one for the non-hugified hpdatas (at index 2*pszind +
  * 1).  This lets us implement a preference for purging non-hugified hpdatas
  * among similarly-dirty ones.
+ * We reserve the last two indices for empty slabs, in that case purging
+ * hugified ones (which are definitionally all waste) before non-hugified ones
+ * (i.e. reversing the order).
  */
 #define PSSET_NPURGE_LISTS (2 * PSSET_NPSIZES)
 
@@ -78,7 +81,11 @@ struct psset_s {
 	 * allocations.
 	 */
 	hpdata_empty_list_t empty;
-	/* Slabs which are available to be purged, ordered by purge level. */
+	/*
+	 * Slabs which are available to be purged, ordered by how much we want
+	 * to purge them (with later indices indicating slabs we want to purge
+	 * more).
+	 */
 	hpdata_purge_list_t to_purge[PSSET_NPURGE_LISTS];
 	/* Bitmap for which set bits correspond to non-empty purge lists. */
 	fb_group_t purge_bitmap[FB_NGROUPS(PSSET_NPURGE_LISTS)];
diff --git a/src/psset.c b/src/psset.c
index 5978202a..9a8f054f 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -201,11 +201,32 @@ psset_purge_list_ind(hpdata_t *ps) {
 	size_t ndirty = hpdata_ndirty_get(ps);
 	/* Shouldn't have something with no dirty pages purgeable. */
 	assert(ndirty > 0);
+	/*
+	 * Higher indices correspond to lists we'd like to purge earlier; make
+	 * the two highest indices correspond to empty lists, which we attempt
+	 * to purge before purging any non-empty list.  This has two advantages:
+	 * - Empty page slabs are the least likely to get reused (we'll only
+	 *   pick them for an allocation if we have no other choice).
+	 * - Empty page slabs can purge every dirty page they contain in a
+	 *   single call, which is not usually the case.
+	 *
+	 * We purge hugeified empty slabs before nonhugeified ones, on the basis
+	 * that they are fully dirty, while nonhugified slabs might not be, so
+	 * we free up more pages more easily.
+	 */
+	if (hpdata_nactive_get(ps) == 0) {
+		if (hpdata_huge_get(ps)) {
+			return PSSET_NPURGE_LISTS - 1;
+		} else {
+			return PSSET_NPURGE_LISTS - 2;
+		}
+	}
+
 	pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(ndirty << LG_PAGE));
 	/*
-	 * Higher indices correspond to lists we'd like to purge earlier;
-	 * increment the index for the nonhugified hpdatas first, so that we'll
-	 * pick them before picking hugified ones.
+	 * For non-empty slabs, we may reuse them again.  Prefer purging
+	 * non-hugeified slabs before hugeified ones then, among pages of
+	 * similar dirtiness.  We still get some benefit from the hugification.
 	 */
 	return (size_t)pind * 2 + (hpdata_huge_get(ps) ? 0 : 1);
 }
@@ -321,7 +342,7 @@ psset_pick_purge(psset_t *psset) {
 		return NULL;
 	}
 	pszind_t ind = (pszind_t)ind_ssz;
-	assert(ind < PSSET_NPSIZES);
+	assert(ind < PSSET_NPURGE_LISTS);
 	hpdata_t *ps = hpdata_purge_list_first(&psset->to_purge[ind]);
 	assert(ps != NULL);
 	return ps;
diff --git a/test/unit/psset.c b/test/unit/psset.c
index 7bce7c1b..6ff72012 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -545,7 +545,7 @@ TEST_END
 TEST_BEGIN(test_purge_prefers_nonhuge) {
 	/*
 	 * All else being equal, we should prefer purging non-huge pages over
-	 * huge ones.
+	 * huge ones for non-empty extents.
 	 */
 
 	/* Nothing magic about this constant. */
@@ -625,6 +625,112 @@ TEST_BEGIN(test_purge_prefers_nonhuge) {
 }
 TEST_END
 
+TEST_BEGIN(test_purge_prefers_empty) {
+	void *ptr;
+
+	psset_t psset;
+	psset_init(&psset);
+
+	hpdata_t hpdata_empty;
+	hpdata_t hpdata_nonempty;
+	hpdata_init(&hpdata_empty, (void *)(10 * HUGEPAGE), 123);
+	psset_insert(&psset, &hpdata_empty);
+	hpdata_init(&hpdata_nonempty, (void *)(11 * HUGEPAGE), 456);
+	psset_insert(&psset, &hpdata_nonempty);
+
+	psset_update_begin(&psset, &hpdata_empty);
+	ptr = hpdata_reserve_alloc(&hpdata_empty, PAGE);
+	expect_ptr_eq(hpdata_addr_get(&hpdata_empty), ptr, "");
+	hpdata_unreserve(&hpdata_empty, ptr, PAGE);
+	hpdata_purge_allowed_set(&hpdata_empty, true);
+	psset_update_end(&psset, &hpdata_empty);
+
+	psset_update_begin(&psset, &hpdata_nonempty);
+	ptr = hpdata_reserve_alloc(&hpdata_nonempty, 10 * PAGE);
+	expect_ptr_eq(hpdata_addr_get(&hpdata_nonempty), ptr, "");
+	hpdata_unreserve(&hpdata_nonempty, ptr, 9 * PAGE);
+	hpdata_purge_allowed_set(&hpdata_nonempty, true);
+	psset_update_end(&psset, &hpdata_nonempty);
+
+	/*
+	 * The nonempty slab has 9 dirty pages, while the empty one has only 1.
+	 * We should still pick the empty one for purging.
+	 */
+	hpdata_t *to_purge = psset_pick_purge(&psset);
+	expect_ptr_eq(&hpdata_empty, to_purge, "");
+}
+TEST_END
+
+TEST_BEGIN(test_purge_prefers_empty_huge) {
+	void *ptr;
+
+	psset_t psset;
+	psset_init(&psset);
+
+	enum {NHP = 10 };
+
+	hpdata_t hpdata_huge[NHP];
+	hpdata_t hpdata_nonhuge[NHP];
+
+	uintptr_t cur_addr = 100 * HUGEPAGE;
+	uint64_t cur_age = 123;
+	for (int i = 0; i < NHP; i++) {
+		hpdata_init(&hpdata_huge[i], (void *)cur_addr, cur_age);
+		cur_addr += HUGEPAGE;
+		cur_age++;
+		psset_insert(&psset, &hpdata_huge[i]);
+
+		hpdata_init(&hpdata_nonhuge[i], (void *)cur_addr, cur_age);
+		cur_addr += HUGEPAGE;
+		cur_age++;
+		psset_insert(&psset, &hpdata_nonhuge[i]);
+
+		/*
+		 * Make the hpdata_huge[i] fully dirty, empty, purgable, and
+		 * huge.
+		 */
+		psset_update_begin(&psset, &hpdata_huge[i]);
+		ptr = hpdata_reserve_alloc(&hpdata_huge[i], HUGEPAGE);
+		expect_ptr_eq(hpdata_addr_get(&hpdata_huge[i]), ptr, "");
+		hpdata_hugify(&hpdata_huge[i]);
+		hpdata_unreserve(&hpdata_huge[i], ptr, HUGEPAGE);
+		hpdata_purge_allowed_set(&hpdata_huge[i], true);
+		psset_update_end(&psset, &hpdata_huge[i]);
+
+		/*
+		 * Make hpdata_nonhuge[i] fully dirty, empty, purgable, and
+		 * non-huge.
+		 */
+		psset_update_begin(&psset, &hpdata_nonhuge[i]);
+		ptr = hpdata_reserve_alloc(&hpdata_nonhuge[i], HUGEPAGE);
+		expect_ptr_eq(hpdata_addr_get(&hpdata_nonhuge[i]), ptr, "");
+		hpdata_unreserve(&hpdata_nonhuge[i], ptr, HUGEPAGE);
+		hpdata_purge_allowed_set(&hpdata_nonhuge[i], true);
+		psset_update_end(&psset, &hpdata_nonhuge[i]);
+	}
+
+	/*
+	 * We have a bunch of empty slabs, half huge, half nonhuge, inserted in
+	 * alternating order.  We should pop all the huge ones before popping
+	 * any of the non-huge ones for purging.
+	 */
+	for (int i = 0; i < NHP; i++) {
+		hpdata_t *to_purge = psset_pick_purge(&psset);
+		expect_ptr_eq(&hpdata_huge[i], to_purge, "");
+		psset_update_begin(&psset, to_purge);
+		hpdata_purge_allowed_set(to_purge, false);
+		psset_update_end(&psset, to_purge);
+	}
+	for (int i = 0; i < NHP; i++) {
+		hpdata_t *to_purge = psset_pick_purge(&psset);
+		expect_ptr_eq(&hpdata_nonhuge[i], to_purge, "");
+		psset_update_begin(&psset, to_purge);
+		hpdata_purge_allowed_set(to_purge, false);
+		psset_update_end(&psset, to_purge);
+	}
+}
+TEST_END
+
 int
 main(void) {
 	return test_no_reentrancy(
@@ -636,5 +742,7 @@ main(void) {
 	    test_stats,
 	    test_oldest_fit,
 	    test_insert_remove,
-	    test_purge_prefers_nonhuge);
+	    test_purge_prefers_nonhuge,
+	    test_purge_prefers_empty,
+	    test_purge_prefers_empty_huge);
 }

From ace329d11bc397444e99ff81ff4b8d2ca26cc21c Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 4 Jun 2021 13:52:28 -0700
Subject: [PATCH 2080/2608] HPA batch dalloc: Just do one deferred work check.

We only need to do one check per batch dalloc, not one check per dalloc in the
batch.
---
 src/hpa.c | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/src/hpa.c b/src/hpa.c
index 8ef881fd..ba02f795 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -723,17 +723,6 @@ hpa_dalloc_locked(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) {
 	hpdata_unreserve(ps, unreserve_addr, unreserve_size);
 	hpa_update_purge_hugify_eligibility(tsdn, shard, ps);
 	psset_update_end(&shard->psset, ps);
-	hpa_do_deferred_work(tsdn, shard);
-}
-
-static void
-hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
-	hpa_shard_t *shard = hpa_from_pai(self);
-
-	hpa_dalloc_prepare_unlocked(tsdn, shard, edata);
-	malloc_mutex_lock(tsdn, &shard->mtx);
-	hpa_dalloc_locked(tsdn, shard, edata);
-	malloc_mutex_unlock(tsdn, &shard->mtx);
 }
 
 static void
@@ -751,9 +740,19 @@ hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list) {
 		edata_list_active_remove(list, edata);
 		hpa_dalloc_locked(tsdn, shard, edata);
 	}
+	hpa_do_deferred_work(tsdn, shard);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 }
 
+static void
+hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
+	/* Just a dalloc_batch of size 1; this lets us share logic. */
+	edata_list_active_t dalloc_list;
+	edata_list_active_init(&dalloc_list);
+	edata_list_active_append(&dalloc_list, edata);
+	hpa_dalloc_batch(tsdn, self, &dalloc_list);
+}
+
 void
 hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_lock(tsdn, &shard->mtx);

From 583284f2d91f79b0174ee23e1b4d946b63845246 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 4 Jun 2021 16:07:27 -0700
Subject: [PATCH 2081/2608] Add HPA deferral functionality.

---
 include/jemalloc/internal/arena_externs.h |  1 +
 include/jemalloc/internal/hpa.h           |  5 +++
 include/jemalloc/internal/hpa_opts.h      | 18 ++++++++-
 src/arena.c                               | 10 ++++-
 src/background_thread.c                   |  2 +-
 src/hpa.c                                 | 46 +++++++++++++++++++----
 src/jemalloc.c                            |  6 ++-
 7 files changed, 76 insertions(+), 12 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 360653f9..bb3462f5 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -51,6 +51,7 @@ bool arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, extent_state_t state,
 ssize_t arena_decay_ms_get(arena_t *arena, extent_state_t state);
 void arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
     bool all);
+void arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena);
 void arena_reset(tsd_t *tsd, arena_t *arena);
 void arena_destroy(tsd_t *tsd, arena_t *arena);
 void arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 778d1c92..27adefc0 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -55,6 +55,7 @@ struct hpa_shard_s {
 	malloc_mutex_t mtx;
 	/* The base metadata allocator. */
 	base_t *base;
+
 	/*
 	 * This edata cache is the one we use when allocating a small extent
 	 * from a pageslab.  The pageslab itself comes from the centralized
@@ -122,6 +123,10 @@ void hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard,
 void hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard);
 void hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard);
 
+void hpa_shard_set_deferral_allowed(tsdn_t *tsdn, hpa_shard_t *shard,
+    bool deferral_allowed);
+void hpa_shard_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard);
+
 /*
  * We share the fork ordering with the PA and arena prefork handling; that's why
  * these are 3 and 4 rather than 0 and 1.
diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h
index 5ff00725..ef162193 100644
--- a/include/jemalloc/internal/hpa_opts.h
+++ b/include/jemalloc/internal/hpa_opts.h
@@ -32,6 +32,14 @@ struct hpa_shard_opts_s {
 	 * active_pages.  This may be set to (fxp_t)-1 to disable purging.
 	 */
 	fxp_t dirty_mult;
+
+	/*
+	 * Whether or not the PAI methods are allowed to defer work to a
+	 * subsequent hpa_shard_do_deferred_work() call.  Practically, this
+	 * corresponds to background threads being enabled.  We track this
+	 * ourselves for encapsulation purposes.
+	 */
+	bool deferral_allowed;
 };
 
 #define HPA_SHARD_OPTS_DEFAULT {					\
@@ -42,7 +50,15 @@ struct hpa_shard_opts_s {
 	/* dehugification_threshold */					\
 	HUGEPAGE * 20 / 100,						\
 	/* dirty_mult */						\
-	FXP_INIT_PERCENT(25)						\
+	FXP_INIT_PERCENT(25),						\
+	/*								\
+	 * deferral_allowed						\
+	 * 								\
+	 * Really, this is always set by the arena during creation	\
+	 * or by an hpa_shard_set_deferral_allowed call, so the value	\
+	 * we put here doesn't matter.					\
+	 */								\
+	false								\
 }
 
 #endif /* JEMALLOC_INTERNAL_HPA_OPTS_H */
diff --git a/src/arena.c b/src/arena.c
index 3ff91572..bdc120fa 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -461,6 +461,12 @@ arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) {
 	arena_decay_muzzy(tsdn, arena, is_background_thread, all);
 }
 
+/* Called from background threads. */
+void
+arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena) {
+	arena_decay(tsdn, arena, true, false);
+}
+
 void
 arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab) {
 	bool generated_dirty;
@@ -1565,7 +1571,9 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	 *   so arena_hpa_global is not yet initialized.
 	 */
 	if (opt_hpa && ehooks_are_default(base_ehooks_get(base)) && ind != 0) {
-		if (pa_shard_enable_hpa(tsdn, &arena->pa_shard, &opt_hpa_opts,
+		hpa_shard_opts_t hpa_shard_opts = opt_hpa_opts;
+		hpa_shard_opts.deferral_allowed = background_thread_enabled();
+		if (pa_shard_enable_hpa(tsdn, &arena->pa_shard, &hpa_shard_opts,
 		    &opt_hpa_sec_opts)) {
 			goto label_error;
 		}
diff --git a/src/background_thread.c b/src/background_thread.c
index 7302a303..edcf786e 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -291,7 +291,7 @@ background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info, unsigne
 		if (!arena) {
 			continue;
 		}
-		arena_decay(tsdn, arena, true, false);
+		arena_do_deferred_work(tsdn, arena);
 		if (min_interval == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
 			/* Min interval will be used. */
 			continue;
diff --git a/src/hpa.c b/src/hpa.c
index ba02f795..ee25e944 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -426,17 +426,29 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 	return true;
 }
 
+/*
+ * Execution of deferred work is forced if it's triggered by an explicit
+ * hpa_shard_do_deferred_work() call.
+ */
 static void
-hpa_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
+hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard,
+    bool forced) {
 	bool hugified;
 	bool purged;
 	size_t nloop = 0;
-	/* Just *some* bound, to impose a worst-case latency bound. */
-	size_t maxloops = 100;;
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	if (!forced && shard->opts.deferral_allowed) {
+		return;
+	}
+	/*
+	 * If we're on a background thread, do work so long as there's work to
+	 * be done.  Otherwise, bound latency to not be *too* bad by doing at
+	 * most a small fixed number of operations.
+	 */
+	size_t maxloops = (forced ? (size_t)-1 : 8);
 	do {
-		malloc_mutex_assert_owner(tsdn, &shard->mtx);
 		hugified = hpa_try_hugify(tsdn, shard);
-
+		malloc_mutex_assert_owner(tsdn, &shard->mtx);
 		purged = false;
 		if (hpa_should_purge(tsdn, shard)) {
 			purged = hpa_try_purge(tsdn, shard);
@@ -528,7 +540,7 @@ hpa_try_alloc_batch_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 		edata_list_active_append(results, edata);
 	}
 
-	hpa_do_deferred_work(tsdn, shard);
+	hpa_shard_maybe_do_deferred_work(tsdn, shard, /* forced */ false);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 	return nsuccess;
 }
@@ -740,7 +752,7 @@ hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list) {
 		edata_list_active_remove(list, edata);
 		hpa_dalloc_locked(tsdn, shard, edata);
 	}
-	hpa_do_deferred_work(tsdn, shard);
+	hpa_shard_maybe_do_deferred_work(tsdn, shard, /* forced */ false);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 }
 
@@ -800,6 +812,26 @@ hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
 	}
 }
 
+void
+hpa_shard_set_deferral_allowed(tsdn_t *tsdn, hpa_shard_t *shard,
+    bool deferral_allowed) {
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	bool deferral_previously_allowed = shard->opts.deferral_allowed;
+	shard->opts.deferral_allowed = deferral_allowed;
+	if (deferral_previously_allowed && !deferral_allowed) {
+		hpa_shard_maybe_do_deferred_work(tsdn, shard,
+		    /* forced */ true);
+	}
+	malloc_mutex_unlock(tsdn, &shard->mtx);
+}
+
+void
+hpa_shard_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	hpa_shard_maybe_do_deferred_work(tsdn, shard, /* forced */ true);
+	malloc_mutex_unlock(tsdn, &shard->mtx);
+}
+
 void
 hpa_shard_prefork3(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_prefork(tsdn, &shard->grow_mtx);
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 6ff9f97b..85d68639 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1788,8 +1788,10 @@ malloc_init_hard_a0_locked() {
 			opt_hpa = false;
 		}
 	} else if (opt_hpa) {
-		if (pa_shard_enable_hpa(TSDN_NULL, &a0->pa_shard, &opt_hpa_opts,
-		    &opt_hpa_sec_opts)) {
+		hpa_shard_opts_t hpa_shard_opts = opt_hpa_opts;
+		hpa_shard_opts.deferral_allowed = background_thread_enabled();
+		if (pa_shard_enable_hpa(TSDN_NULL, &a0->pa_shard,
+		    &hpa_shard_opts, &opt_hpa_sec_opts)) {
 			return true;
 		}
 	}

From 1d4a7666d558b2c21e8cfc2b3e8981020db072fa Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 7 Jun 2021 11:45:57 -0700
Subject: [PATCH 2082/2608] HPA: Do deferred operations on background threads.

---
 Makefile.in                                   |   1 +
 .../internal/background_thread_externs.h      |   1 +
 .../internal/background_thread_structs.h      |   8 +
 include/jemalloc/internal/pa.h                |  12 +-
 src/arena.c                                   |   1 +
 src/background_thread.c                       |  51 +++++-
 src/ctl.c                                     |   5 +
 src/jemalloc.c                                |   9 +
 src/pa.c                                      |  12 ++
 src/stats.c                                   |   1 +
 test/unit/hpa_background_thread.c             | 158 ++++++++++++++++++
 test/unit/hpa_background_thread.sh            |   4 +
 12 files changed, 256 insertions(+), 7 deletions(-)
 create mode 100644 test/unit/hpa_background_thread.c
 create mode 100644 test/unit/hpa_background_thread.sh

diff --git a/Makefile.in b/Makefile.in
index ed03d4e2..3e7d122b 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -221,6 +221,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/hash.c \
 	$(srcroot)test/unit/hook.c \
 	$(srcroot)test/unit/hpa.c \
+	$(srcroot)test/unit/hpa_background_thread.c \
 	$(srcroot)test/unit/hpa_central.c \
 	$(srcroot)test/unit/hpdata.c \
 	$(srcroot)test/unit/huge.c \
diff --git a/include/jemalloc/internal/background_thread_externs.h b/include/jemalloc/internal/background_thread_externs.h
index d5c13695..bc49beaf 100644
--- a/include/jemalloc/internal/background_thread_externs.h
+++ b/include/jemalloc/internal/background_thread_externs.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H
 
 extern bool opt_background_thread;
+extern ssize_t opt_background_thread_hpa_interval_max_ms;
 extern size_t opt_max_background_threads;
 extern malloc_mutex_t background_thread_lock;
 extern atomic_b_t background_thread_enabled_state;
diff --git a/include/jemalloc/internal/background_thread_structs.h b/include/jemalloc/internal/background_thread_structs.h
index 249115c3..cc14dde3 100644
--- a/include/jemalloc/internal/background_thread_structs.h
+++ b/include/jemalloc/internal/background_thread_structs.h
@@ -11,6 +11,14 @@
 #define MAX_BACKGROUND_THREAD_LIMIT MALLOCX_ARENA_LIMIT
 #define DEFAULT_NUM_BACKGROUND_THREAD 4
 
+/*
+ * These exist only as a transitional state.  Eventually, deferral should be
+ * part of the PAI, and each implementation can indicate wait times with more
+ * specificity.
+ */
+#define BACKGROUND_THREAD_HPA_INTERVAL_MAX_UNINITIALIZED (-2)
+#define BACKGROUND_THREAD_HPA_INTERVAL_MAX_DEFAULT_WHEN_ENABLED 5000
+
 typedef enum {
 	background_thread_stopped,
 	background_thread_started,
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index cb9f8cff..0fb77250 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -172,11 +172,21 @@ bool pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
  */
 void pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
     bool *generated_dirty);
-
 bool pa_decay_ms_set(tsdn_t *tsdn, pa_shard_t *shard, extent_state_t state,
     ssize_t decay_ms, pac_purge_eagerness_t eagerness);
 ssize_t pa_decay_ms_get(pa_shard_t *shard, extent_state_t state);
 
+/*
+ * Do deferred work on this PA shard.
+ *
+ * Morally, this should do both PAC decay and the HPA deferred work.  For now,
+ * though, the arena, background thread, and PAC modules are tightly interwoven
+ * in a way that's tricky to extricate, so we only do the HPA-specific parts.
+ */
+void pa_shard_set_deferral_allowed(tsdn_t *tsdn, pa_shard_t *shard,
+    bool deferral_allowed);
+void pa_shard_do_deferred_work(tsdn_t *tsdn, pa_shard_t *shard);
+
 /******************************************************************************/
 /*
  * Various bits of "boring" functionality that are still part of this module,
diff --git a/src/arena.c b/src/arena.c
index bdc120fa..d6a1f674 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -465,6 +465,7 @@ arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) {
 void
 arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena) {
 	arena_decay(tsdn, arena, true, false);
+	pa_shard_do_deferred_work(tsdn, &arena->pa_shard);
 }
 
 void
diff --git a/src/background_thread.c b/src/background_thread.c
index edcf786e..1fb24fe6 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -13,6 +13,13 @@ JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
 /* Read-only after initialization. */
 bool opt_background_thread = BACKGROUND_THREAD_DEFAULT;
 size_t opt_max_background_threads = MAX_BACKGROUND_THREAD_LIMIT + 1;
+/*
+ * This is disabled (and set to -1) if the HPA is.  If the HPA is enabled,
+ * malloc_conf initialization sets it to
+ * BACKGROUND_THREAD_HPA_INTERVAL_MAX_DEFAULT_WHEN_ENABLED.
+ */
+ssize_t opt_background_thread_hpa_interval_max_ms =
+    BACKGROUND_THREAD_HPA_INTERVAL_MAX_UNINITIALIZED;
 
 /* Used for thread creation, termination and stats. */
 malloc_mutex_t background_thread_lock;
@@ -209,7 +216,20 @@ arena_decay_compute_purge_interval(tsdn_t *tsdn, arena_t *arena) {
 	i2 = arena_decay_compute_purge_interval_impl(tsdn,
 	    &arena->pa_shard.pac.decay_muzzy, &arena->pa_shard.pac.ecache_muzzy);
 
-	return i1 < i2 ? i1 : i2;
+	uint64_t min_so_far = i1 < i2 ? i1 : i2;
+	if (opt_background_thread_hpa_interval_max_ms >= 0) {
+		uint64_t hpa_interval = 1000 * 1000 *
+		    (uint64_t)opt_background_thread_hpa_interval_max_ms;
+		if (hpa_interval < min_so_far) {
+			if (hpa_interval < BACKGROUND_THREAD_MIN_INTERVAL_NS) {
+				min_so_far = BACKGROUND_THREAD_MIN_INTERVAL_NS;
+			} else {
+				min_so_far = hpa_interval;
+			}
+		}
+	}
+
+	return min_so_far;
 }
 
 static void
@@ -607,16 +627,16 @@ background_threads_enable(tsd_t *tsd) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
 
 	VARIABLE_ARRAY(bool, marked, max_background_threads);
-	unsigned i, nmarked;
-	for (i = 0; i < max_background_threads; i++) {
+	unsigned nmarked;
+	for (unsigned i = 0; i < max_background_threads; i++) {
 		marked[i] = false;
 	}
 	nmarked = 0;
 	/* Thread 0 is required and created at the end. */
 	marked[0] = true;
 	/* Mark the threads we need to create for thread 0. */
-	unsigned n = narenas_total_get();
-	for (i = 1; i < n; i++) {
+	unsigned narenas = narenas_total_get();
+	for (unsigned i = 1; i < narenas; i++) {
 		if (marked[i % max_background_threads] ||
 		    arena_get(tsd_tsdn(tsd), i, false) == NULL) {
 			continue;
@@ -633,7 +653,18 @@ background_threads_enable(tsd_t *tsd) {
 		}
 	}
 
-	return background_thread_create_locked(tsd, 0);
+	bool err = background_thread_create_locked(tsd, 0);
+	if (err) {
+		return true;
+	}
+	for (unsigned i = 0; i < narenas; i++) {
+		arena_t *arena = arena_get(tsd_tsdn(tsd), i, false);
+		if (arena != NULL) {
+			pa_shard_set_deferral_allowed(tsd_tsdn(tsd),
+			    &arena->pa_shard, true);
+		}
+	}
+	return false;
 }
 
 bool
@@ -647,6 +678,14 @@ background_threads_disable(tsd_t *tsd) {
 		return true;
 	}
 	assert(n_background_threads == 0);
+	unsigned narenas = narenas_total_get();
+	for (unsigned i = 0; i < narenas; i++) {
+		arena_t *arena = arena_get(tsd_tsdn(tsd), i, false);
+		if (arena != NULL) {
+			pa_shard_set_deferral_allowed(tsd_tsdn(tsd),
+			    &arena->pa_shard, false);
+		}
+	}
 
 	return false;
 }
diff --git a/src/ctl.c b/src/ctl.c
index c713f0e2..c66b4d8c 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -111,6 +111,7 @@ CTL_PROTO(opt_percpu_arena)
 CTL_PROTO(opt_oversize_threshold)
 CTL_PROTO(opt_background_thread)
 CTL_PROTO(opt_max_background_threads)
+CTL_PROTO(opt_background_thread_hpa_interval_max_ms)
 CTL_PROTO(opt_dirty_decay_ms)
 CTL_PROTO(opt_muzzy_decay_ms)
 CTL_PROTO(opt_stats_print)
@@ -423,6 +424,8 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("oversize_threshold"),	CTL(opt_oversize_threshold)},
 	{NAME("background_thread"),	CTL(opt_background_thread)},
 	{NAME("max_background_threads"),	CTL(opt_max_background_threads)},
+	{NAME("background_thread_hpa_interval_max_ms"),
+		CTL(opt_background_thread_hpa_interval_max_ms)},
 	{NAME("dirty_decay_ms"), CTL(opt_dirty_decay_ms)},
 	{NAME("muzzy_decay_ms"), CTL(opt_muzzy_decay_ms)},
 	{NAME("stats_print"),	CTL(opt_stats_print)},
@@ -2139,6 +2142,8 @@ CTL_RO_NL_GEN(opt_percpu_arena, percpu_arena_mode_names[opt_percpu_arena],
 CTL_RO_NL_GEN(opt_oversize_threshold, opt_oversize_threshold, size_t)
 CTL_RO_NL_GEN(opt_background_thread, opt_background_thread, bool)
 CTL_RO_NL_GEN(opt_max_background_threads, opt_max_background_threads, size_t)
+CTL_RO_NL_GEN(opt_background_thread_hpa_interval_max_ms,
+    opt_background_thread_hpa_interval_max_ms, ssize_t)
 CTL_RO_NL_GEN(opt_dirty_decay_ms, opt_dirty_decay_ms, ssize_t)
 CTL_RO_NL_GEN(opt_muzzy_decay_ms, opt_muzzy_decay_ms, ssize_t)
 CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 85d68639..28c7fdc0 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1410,6 +1410,10 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 					   CONF_CHECK_MIN, CONF_CHECK_MAX,
 					   true);
 			CONF_HANDLE_BOOL(opt_hpa, "hpa")
+			CONF_HANDLE_SSIZE_T(
+			    opt_background_thread_hpa_interval_max_ms,
+			    "background_thread_hpa_interval_max_ms", -1,
+			    SSIZE_MAX)
 			CONF_HANDLE_SIZE_T(opt_hpa_opts.slab_max_alloc,
 			    "hpa_slab_max_alloc", PAGE, HUGEPAGE,
 			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
@@ -1659,6 +1663,11 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 	malloc_conf_init_helper(NULL, NULL, true, opts_cache, buf);
 	malloc_conf_init_helper(sc_data, bin_shard_sizes, false, opts_cache,
 	    NULL);
+	if (opt_hpa && opt_background_thread_hpa_interval_max_ms
+	    == BACKGROUND_THREAD_HPA_INTERVAL_MAX_UNINITIALIZED) {
+		opt_background_thread_hpa_interval_max_ms =
+		    BACKGROUND_THREAD_HPA_INTERVAL_MAX_DEFAULT_WHEN_ENABLED;
+	}
 }
 
 #undef MALLOC_CONF_NSOURCES
diff --git a/src/pa.c b/src/pa.c
index cb3b3df5..cbc8f760 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -208,3 +208,15 @@ ssize_t
 pa_decay_ms_get(pa_shard_t *shard, extent_state_t state) {
 	return pac_decay_ms_get(&shard->pac, state);
 }
+
+void
+pa_shard_set_deferral_allowed(tsdn_t *tsdn, pa_shard_t *shard,
+    bool deferral_allowed) {
+	hpa_shard_set_deferral_allowed(tsdn, &shard->hpa_shard,
+	    deferral_allowed);
+}
+
+void
+pa_shard_do_deferred_work(tsdn_t *tsdn, pa_shard_t *shard) {
+	hpa_shard_do_deferred_work(tsdn, &shard->hpa_shard);
+}
diff --git a/src/stats.c b/src/stats.c
index 34cae0ab..4e6c3922 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1494,6 +1494,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_SIZE_T("hpa_sec_batch_fill_extra")
 	OPT_WRITE_CHAR_P("metadata_thp")
 	OPT_WRITE_BOOL_MUTABLE("background_thread", "background_thread")
+	OPT_WRITE_SSIZE_T("background_thread_hpa_interval_max_ms")
 	OPT_WRITE_SSIZE_T_MUTABLE("dirty_decay_ms", "arenas.dirty_decay_ms")
 	OPT_WRITE_SSIZE_T_MUTABLE("muzzy_decay_ms", "arenas.muzzy_decay_ms")
 	OPT_WRITE_SIZE_T("lg_extent_max_active_fit")
diff --git a/test/unit/hpa_background_thread.c b/test/unit/hpa_background_thread.c
new file mode 100644
index 00000000..1907a6dd
--- /dev/null
+++ b/test/unit/hpa_background_thread.c
@@ -0,0 +1,158 @@
+#include "test/jemalloc_test.h"
+#include "test/sleep.h"
+
+static void
+sleep_for_background_thread_interval() {
+	/*
+	 * The sleep interval set in our .sh file is 50ms.  So it should
+	 * definitely run if we sleep for for times that.
+	 */
+	sleep_ns(200 * 1000 * 1000);
+}
+
+static unsigned
+create_arena() {
+	unsigned arena_ind;
+	size_t sz;
+
+	sz = sizeof(unsigned);
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 2),
+	    0, "Unexpected mallctl() failure");
+	return arena_ind;
+}
+
+static size_t
+get_empty_ndirty(unsigned arena_ind) {
+	int err;
+	size_t ndirty_huge;
+	size_t ndirty_nonhuge;
+	uint64_t epoch = 1;
+	size_t sz = sizeof(epoch);
+	err = je_mallctl("epoch", (void *)&epoch, &sz, (void *)&epoch,
+	    sizeof(epoch));
+	expect_d_eq(0, err, "Unexpected mallctl() failure");
+
+	size_t mib[6];
+	size_t miblen = sizeof(mib)/sizeof(mib[0]);
+	err = mallctlnametomib(
+	    "stats.arenas.0.hpa_shard.empty_slabs.ndirty_nonhuge", mib,
+	    &miblen);
+	expect_d_eq(0, err, "Unexpected mallctlnametomib() failure");
+
+	sz = sizeof(ndirty_nonhuge);
+	mib[2] = arena_ind;
+	err = mallctlbymib(mib, miblen, &ndirty_nonhuge, &sz, NULL, 0);
+	expect_d_eq(0, err, "Unexpected mallctlbymib() failure");
+
+	err = mallctlnametomib(
+	    "stats.arenas.0.hpa_shard.empty_slabs.ndirty_huge", mib,
+	    &miblen);
+	expect_d_eq(0, err, "Unexpected mallctlnametomib() failure");
+
+	sz = sizeof(ndirty_huge);
+	mib[2] = arena_ind;
+	err = mallctlbymib(mib, miblen, &ndirty_huge, &sz, NULL, 0);
+	expect_d_eq(0, err, "Unexpected mallctlbymib() failure");
+
+	return ndirty_huge + ndirty_nonhuge;
+}
+
+static void
+set_background_thread_enabled(bool enabled) {
+	int err;
+	err = je_mallctl("background_thread", NULL, NULL, &enabled,
+	    sizeof(enabled));
+	expect_d_eq(0, err, "Unexpected mallctl failure");
+}
+
+static void
+expect_purging(unsigned arena_ind, bool expect_deferred) {
+	size_t empty_ndirty;
+
+	empty_ndirty = get_empty_ndirty(arena_ind);
+	expect_zu_eq(0, empty_ndirty, "Expected arena to start unused.");
+
+	/*
+	 * It's possible that we get unlucky with our stats collection timing,
+	 * and the background thread runs in between the deallocation and the
+	 * stats collection.  So we retry 10 times, and see if we *ever* see
+	 * deferred reclamation.
+	 */
+	bool observed_dirty_page = false;
+	for (int i = 0; i < 10; i++) {
+		void *ptr = mallocx(PAGE,
+		    MALLOCX_TCACHE_NONE | MALLOCX_ARENA(arena_ind));
+		empty_ndirty = get_empty_ndirty(arena_ind);
+		expect_zu_eq(0, empty_ndirty, "All pages should be active");
+		dallocx(ptr, MALLOCX_TCACHE_NONE);
+		empty_ndirty = get_empty_ndirty(arena_ind);
+		if (expect_deferred) {
+			expect_true(empty_ndirty == 0 || empty_ndirty == 1,
+			    "Unexpected extra dirty page count: %zu",
+			    empty_ndirty);
+		} else {
+			assert_zu_eq(0, empty_ndirty,
+			    "Saw dirty pages without deferred purging");
+		}
+		if (empty_ndirty > 0) {
+			observed_dirty_page = true;
+			break;
+		}
+	}
+	expect_b_eq(expect_deferred, observed_dirty_page, "");
+	if (expect_deferred) {
+		sleep_for_background_thread_interval();
+	}
+	empty_ndirty = get_empty_ndirty(arena_ind);
+	expect_zu_eq(0, empty_ndirty, "Should have seen a background purge");
+}
+
+TEST_BEGIN(test_hpa_background_thread_purges) {
+	test_skip_if(!config_stats);
+	test_skip_if(!hpa_supported());
+	test_skip_if(!have_background_thread);
+
+	unsigned arena_ind = create_arena();
+	/*
+	 * Our .sh sets dirty mult to 0, so all dirty pages should get purged
+	 * any time any thread frees.
+	 */
+	expect_purging(arena_ind, /* expect_deferred */ true);
+}
+TEST_END
+
+TEST_BEGIN(test_hpa_background_thread_enable_disable) {
+	test_skip_if(!config_stats);
+	test_skip_if(!hpa_supported());
+	test_skip_if(!have_background_thread);
+
+	unsigned arena_ind = create_arena();
+
+	set_background_thread_enabled(false);
+	expect_purging(arena_ind, false);
+
+	set_background_thread_enabled(true);
+	expect_purging(arena_ind, true);
+}
+TEST_END
+
+int
+main(void) {
+	/*
+	 * OK, this is a sort of nasty hack.  We don't want to add *another*
+	 * config option for HPA (the intent is that it becomes available on
+	 * more platforms over time, and we're trying to prune back config
+	 * options generally.  But we'll get initialization errors on other
+	 * platforms if we set hpa:true in the MALLOC_CONF (even if we set
+	 * abort_conf:false as well).  So we reach into the internals and set
+	 * them directly, but only if we know that we're actually going to do
+	 * something nontrivial in the tests.
+	 */
+	if (config_stats && hpa_supported() && have_background_thread) {
+		opt_hpa = true;
+		opt_background_thread = true;
+	}
+	return test_no_reentrancy(
+	    test_hpa_background_thread_purges,
+	    test_hpa_background_thread_enable_disable);
+}
diff --git a/test/unit/hpa_background_thread.sh b/test/unit/hpa_background_thread.sh
new file mode 100644
index 00000000..811da8bd
--- /dev/null
+++ b/test/unit/hpa_background_thread.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+
+export MALLOC_CONF="hpa_dirty_mult:0,background_thread_hpa_interval_max_ms:50,hpa_sec_nshards:0"
+

From 113938b6f43d528793e029d55ae51e21094b79bc Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 14 Jun 2021 14:18:08 -0700
Subject: [PATCH 2083/2608] HPA: Pull out a hooks type.

For now, this is a no-op change.  In a subsequent commit, it will be useful for
testing.
---
 Makefile.in                                   |  1 +
 include/jemalloc/internal/hpa.h               | 12 ++++-
 include/jemalloc/internal/hpa_hooks.h         | 15 ++++++
 include/jemalloc/internal/pa.h                |  4 +-
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |  1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |  3 ++
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |  1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |  3 ++
 src/arena.c                                   |  4 +-
 src/hpa.c                                     | 30 ++++--------
 src/hpa_hooks.c                               | 46 +++++++++++++++++++
 src/jemalloc.c                                |  2 +-
 src/pa.c                                      |  5 +-
 test/unit/hpa.c                               |  2 +-
 14 files changed, 100 insertions(+), 29 deletions(-)
 create mode 100644 include/jemalloc/internal/hpa_hooks.h
 create mode 100644 src/hpa_hooks.c

diff --git a/Makefile.in b/Makefile.in
index 3e7d122b..abdf8004 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -122,6 +122,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/hook.c \
 	$(srcroot)src/hpa.c \
 	$(srcroot)src/hpa_central.c \
+	$(srcroot)src/hpa_hooks.c \
 	$(srcroot)src/hpdata.c \
 	$(srcroot)src/inspect.c \
 	$(srcroot)src/large.c \
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 27adefc0..3132a6f5 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_HPA_H
 
 #include "jemalloc/internal/exp_grow.h"
+#include "jemalloc/internal/hpa_hooks.h"
 #include "jemalloc/internal/hpa_opts.h"
 #include "jemalloc/internal/pai.h"
 #include "jemalloc/internal/psset.h"
@@ -56,6 +57,14 @@ struct hpa_shard_s {
 	/* The base metadata allocator. */
 	base_t *base;
 
+	/*
+	 * The HPA hooks for this shard.  Eventually, once we have the
+	 * hpa_central_t back, these should live there (since it doesn't make
+	 * sense for different shards on the same hpa_central_t to have
+	 * different hooks).
+	 */
+	hpa_hooks_t hooks;
+
 	/*
 	 * This edata cache is the one we use when allocating a small extent
 	 * from a pageslab.  The pageslab itself comes from the centralized
@@ -109,7 +118,8 @@ struct hpa_shard_s {
  */
 bool hpa_supported();
 bool hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
-    edata_cache_t *edata_cache, unsigned ind, const hpa_shard_opts_t *opts);
+    edata_cache_t *edata_cache, unsigned ind, const hpa_hooks_t *hooks,
+    const hpa_shard_opts_t *opts);
 
 void hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src);
 void hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard,
diff --git a/include/jemalloc/internal/hpa_hooks.h b/include/jemalloc/internal/hpa_hooks.h
new file mode 100644
index 00000000..5c5b5f67
--- /dev/null
+++ b/include/jemalloc/internal/hpa_hooks.h
@@ -0,0 +1,15 @@
+#ifndef JEMALLOC_INTERNAL_HPA_HOOKS_H
+#define JEMALLOC_INTERNAL_HPA_HOOKS_H
+
+typedef struct hpa_hooks_s hpa_hooks_t;
+struct hpa_hooks_s {
+	void *(*map)(size_t size);
+	void (*unmap)(void *ptr, size_t size);
+	void (*purge)(void *ptr, size_t size);
+	void (*hugify)(void *ptr, size_t size);
+	void (*dehugify)(void *ptr, size_t size);
+};
+
+extern hpa_hooks_t hpa_hooks_default;
+
+#endif /* JEMALLOC_INTERNAL_HPA_HOOKS_H */
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 0fb77250..582625b1 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -131,7 +131,9 @@ bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
  * that we can boot without worrying about the HPA, then turn it on in a0.
  */
 bool pa_shard_enable_hpa(tsdn_t *tsdn, pa_shard_t *shard,
-    const hpa_shard_opts_t *hpa_opts, const sec_opts_t *hpa_sec_opts);
+    const hpa_hooks_t *hpa_hooks, const hpa_shard_opts_t *hpa_opts,
+    const sec_opts_t *hpa_sec_opts);
+
 /*
  * We stop using the HPA when custom extent hooks are installed, but still
  * redirect deallocations to it.
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index a66ca36a..f6fae7f2 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -62,6 +62,7 @@
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
     <ClCompile Include="..\..\..\..\src\hpa_central.c" />
+    <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
     <ClCompile Include="..\..\..\..\src\hpdata.c" />
     <ClCompile Include="..\..\..\..\src\inspect.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 0c8e6c7c..800861d3 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -70,6 +70,9 @@
     <ClCompile Include="..\..\..\..\src\hpa_central.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa_hooks.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hpdata.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 94fcd7bf..3d3e7174 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -62,6 +62,7 @@
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
     <ClCompile Include="..\..\..\..\src\hpa_central.c" />
+    <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
     <ClCompile Include="..\..\..\..\src\hpdata.c" />
     <ClCompile Include="..\..\..\..\src\inspect.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 0c8e6c7c..800861d3 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -70,6 +70,9 @@
     <ClCompile Include="..\..\..\..\src\hpa_central.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa_hooks.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hpdata.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/arena.c b/src/arena.c
index d6a1f674..5daeea31 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1574,8 +1574,8 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	if (opt_hpa && ehooks_are_default(base_ehooks_get(base)) && ind != 0) {
 		hpa_shard_opts_t hpa_shard_opts = opt_hpa_opts;
 		hpa_shard_opts.deferral_allowed = background_thread_enabled();
-		if (pa_shard_enable_hpa(tsdn, &arena->pa_shard, &hpa_shard_opts,
-		    &opt_hpa_sec_opts)) {
+		if (pa_shard_enable_hpa(tsdn, &arena->pa_shard,
+		    &hpa_hooks_default, &hpa_shard_opts, &opt_hpa_sec_opts)) {
 			goto label_error;
 		}
 	}
diff --git a/src/hpa.c b/src/hpa.c
index ee25e944..07ad117f 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -52,7 +52,8 @@ hpa_supported() {
 
 bool
 hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
-    edata_cache_t *edata_cache, unsigned ind, const hpa_shard_opts_t *opts) {
+    edata_cache_t *edata_cache, unsigned ind,
+    const hpa_hooks_t *hooks, const hpa_shard_opts_t *opts) {
 	/* malloc_conf processing should have filtered out these cases. */
 	assert(hpa_supported());
 	bool err;
@@ -69,6 +70,7 @@ hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
 
 	assert(edata_cache != NULL);
 	shard->base = base;
+	shard->hooks = *hooks;
 	edata_cache_small_init(&shard->ecs, edata_cache);
 	psset_init(&shard->psset);
 	shard->age_counter = 0;
@@ -251,20 +253,14 @@ hpa_grow(tsdn_t *tsdn, hpa_shard_t *shard) {
 	 * allocate an edata_t for the new psset.
 	 */
 	if (shard->eden == NULL) {
-		/*
-		 * During development, we're primarily concerned with systems
-		 * with overcommit.  Eventually, we should be more careful here.
-		 */
-		bool commit = true;
 		/* Allocate address space, bailing if we fail. */
-		void *new_eden = pages_map(NULL, HPA_EDEN_SIZE, HUGEPAGE,
-		    &commit);
+		void *new_eden = shard->hooks.map(HPA_EDEN_SIZE);
 		if (new_eden == NULL) {
 			return NULL;
 		}
 		ps = hpa_alloc_ps(tsdn, shard);
 		if (ps == NULL) {
-			pages_unmap(new_eden, HPA_EDEN_SIZE);
+			shard->hooks.unmap(new_eden, HPA_EDEN_SIZE);
 			return NULL;
 		}
 		shard->eden = new_eden;
@@ -335,7 +331,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 
 	/* Actually do the purging, now that the lock is dropped. */
 	if (dehugify) {
-		pages_nohuge(hpdata_addr_get(to_purge), HUGEPAGE);
+		shard->hooks.dehugify(hpdata_addr_get(to_purge), HUGEPAGE);
 	}
 	size_t total_purged = 0;
 	uint64_t purges_this_pass = 0;
@@ -346,7 +342,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 		total_purged += purge_size;
 		assert(total_purged <= HUGEPAGE);
 		purges_this_pass++;
-		pages_purge_forced(purge_addr, purge_size);
+		shard->hooks.purge(purge_addr, purge_size);
 	}
 
 	malloc_mutex_lock(tsdn, &shard->mtx);
@@ -404,15 +400,7 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 
-	bool err = pages_huge(hpdata_addr_get(to_hugify),
-	    HUGEPAGE);
-	/*
-	 * It's not clear what we could do in case of error; we
-	 * might get into situations where we loop trying to
-	 * hugify some page and failing over and over again.
-	 * Just eat the error and pretend we were successful.
-	 */
-	(void)err;
+	shard->hooks.hugify(hpdata_addr_get(to_hugify), HUGEPAGE);
 
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	shard->stats.nhugifies++;
@@ -808,7 +796,7 @@ hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
 		/* There should be no allocations anywhere. */
 		assert(hpdata_empty(ps));
 		psset_remove(&shard->psset, ps);
-		pages_unmap(hpdata_addr_get(ps), HUGEPAGE);
+		shard->hooks.unmap(hpdata_addr_get(ps), HUGEPAGE);
 	}
 }
 
diff --git a/src/hpa_hooks.c b/src/hpa_hooks.c
new file mode 100644
index 00000000..7e07c31a
--- /dev/null
+++ b/src/hpa_hooks.c
@@ -0,0 +1,46 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/hpa_hooks.h"
+
+static void *hpa_hooks_map(size_t size);
+static void hpa_hooks_unmap(void *ptr, size_t size);
+static void hpa_hooks_purge(void *ptr, size_t size);
+static void hpa_hooks_hugify(void *ptr, size_t size);
+static void hpa_hooks_dehugify(void *ptr, size_t size);
+
+hpa_hooks_t hpa_hooks_default = {
+	&hpa_hooks_map,
+	&hpa_hooks_unmap,
+	&hpa_hooks_purge,
+	&hpa_hooks_hugify,
+	&hpa_hooks_dehugify,
+};
+
+static void *
+hpa_hooks_map(size_t size) {
+	bool commit = true;
+	return pages_map(NULL, size, HUGEPAGE, &commit);
+}
+
+static void
+hpa_hooks_unmap(void *ptr, size_t size) {
+	pages_unmap(ptr, size);
+}
+
+static void
+hpa_hooks_purge(void *ptr, size_t size) {
+	pages_purge_forced(ptr, size);
+}
+
+static void
+hpa_hooks_hugify(void *ptr, size_t size) {
+	bool err = pages_huge(ptr, size);
+	(void)err;
+}
+
+static void
+hpa_hooks_dehugify(void *ptr, size_t size) {
+	bool err = pages_nohuge(ptr, size);
+	(void)err;
+}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 28c7fdc0..5adb5637 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1800,7 +1800,7 @@ malloc_init_hard_a0_locked() {
 		hpa_shard_opts_t hpa_shard_opts = opt_hpa_opts;
 		hpa_shard_opts.deferral_allowed = background_thread_enabled();
 		if (pa_shard_enable_hpa(TSDN_NULL, &a0->pa_shard,
-		    &hpa_shard_opts, &opt_hpa_sec_opts)) {
+		    &hpa_hooks_default, &hpa_shard_opts, &opt_hpa_sec_opts)) {
 			return true;
 		}
 	}
diff --git a/src/pa.c b/src/pa.c
index cbc8f760..0172dfa7 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -50,9 +50,10 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 
 bool
 pa_shard_enable_hpa(tsdn_t *tsdn, pa_shard_t *shard,
-    const hpa_shard_opts_t *hpa_opts, const sec_opts_t *hpa_sec_opts) {
+    const hpa_hooks_t *hpa_hooks, const hpa_shard_opts_t *hpa_opts,
+    const sec_opts_t *hpa_sec_opts) {
 	if (hpa_shard_init(&shard->hpa_shard, shard->emap, shard->base,
-	    &shard->edata_cache, shard->ind, hpa_opts)) {
+	    &shard->edata_cache, shard->ind, hpa_hooks, hpa_opts)) {
 		return true;
 	}
 	if (sec_init(tsdn, &shard->hpa_sec, shard->base, &shard->hpa_shard.pai,
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 46009835..0558680f 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -42,7 +42,7 @@ create_test_data() {
 
 	err = hpa_shard_init(&test_data->shard, &test_data->emap,
 	    test_data->base, &test_data->shard_edata_cache, SHARD_IND,
-	    &opts);
+	    &hpa_hooks_default, &opts);
 	assert_false(err, "");
 
 	return (hpa_shard_t *)test_data;

From 6630c5989672cbbd5ec2369aaa46ce6f5ce1ed4e Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 14 Jun 2021 14:53:23 -0700
Subject: [PATCH 2084/2608] HPA: Hugification hysteresis.

We wait a while after deciding a huge extent should get hugified to see if it
gets purged before long.  This avoids hugifying extents that might shortly get
dehugified for purging.

Rename and use the hpa_dehugification_threshold option support code for this,
since it's now ignored.
---
 include/jemalloc/internal/hpa_hooks.h |   1 +
 include/jemalloc/internal/hpa_opts.h  |  19 ++--
 include/jemalloc/internal/hpdata.h    |  23 ++++-
 src/ctl.c                             |   9 +-
 src/hpa.c                             |  61 ++++++++---
 src/hpa_hooks.c                       |   7 ++
 src/jemalloc.c                        |  24 +----
 src/stats.c                           |   7 +-
 test/unit/hpa.c                       | 143 ++++++++++++++++++++++++--
 9 files changed, 234 insertions(+), 60 deletions(-)

diff --git a/include/jemalloc/internal/hpa_hooks.h b/include/jemalloc/internal/hpa_hooks.h
index 5c5b5f67..3e21d855 100644
--- a/include/jemalloc/internal/hpa_hooks.h
+++ b/include/jemalloc/internal/hpa_hooks.h
@@ -8,6 +8,7 @@ struct hpa_hooks_s {
 	void (*purge)(void *ptr, size_t size);
 	void (*hugify)(void *ptr, size_t size);
 	void (*dehugify)(void *ptr, size_t size);
+	void (*curtime)(nstime_t *r_time);
 };
 
 extern hpa_hooks_t hpa_hooks_default;
diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h
index ef162193..2548f44f 100644
--- a/include/jemalloc/internal/hpa_opts.h
+++ b/include/jemalloc/internal/hpa_opts.h
@@ -17,16 +17,13 @@ struct hpa_shard_opts_s {
 	 * any allocation request.
 	 */
 	size_t slab_max_alloc;
+
 	/*
 	 * When the number of active bytes in a hugepage is >=
 	 * hugification_threshold, we force hugify it.
 	 */
 	size_t hugification_threshold;
-	/*
-	 * When the number of dirty bytes in a hugepage is >=
-	 * dehugification_threshold, we force dehugify it.
-	 */
-	size_t dehugification_threshold;
+
 	/*
 	 * The HPA purges whenever the number of pages exceeds dirty_mult *
 	 * active_pages.  This may be set to (fxp_t)-1 to disable purging.
@@ -40,6 +37,12 @@ struct hpa_shard_opts_s {
 	 * ourselves for encapsulation purposes.
 	 */
 	bool deferral_allowed;
+
+	/*
+	 * How long a hugepage has to be a hugification candidate before it will
+	 * actually get hugified.
+	 */
+	uint64_t hugify_delay_ms;
 };
 
 #define HPA_SHARD_OPTS_DEFAULT {					\
@@ -47,8 +50,6 @@ struct hpa_shard_opts_s {
 	64 * 1024,							\
 	/* hugification_threshold */					\
 	HUGEPAGE * 95 / 100,						\
-	/* dehugification_threshold */					\
-	HUGEPAGE * 20 / 100,						\
 	/* dirty_mult */						\
 	FXP_INIT_PERCENT(25),						\
 	/*								\
@@ -58,7 +59,9 @@ struct hpa_shard_opts_s {
 	 * or by an hpa_shard_set_deferral_allowed call, so the value	\
 	 * we put here doesn't matter.					\
 	 */								\
-	false								\
+	false,								\
+	/* hugify_delay_ms */						\
+	10 * 1000							\
 }
 
 #endif /* JEMALLOC_INTERNAL_HPA_OPTS_H */
diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 32e26248..2a12add9 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -61,6 +61,8 @@ struct hpdata_s {
 
 	/* And with hugifying. */
 	bool h_hugify_allowed;
+	/* When we became a hugification candidate. */
+	nstime_t h_time_hugify_allowed;
 	bool h_in_psset_hugify_container;
 
 	/* Whether or not a purge or hugify is currently happening. */
@@ -175,8 +177,8 @@ hpdata_purge_allowed_get(const hpdata_t *hpdata) {
 
 static inline void
 hpdata_purge_allowed_set(hpdata_t *hpdata, bool purge_allowed) {
-	assert(purge_allowed == false || !hpdata->h_mid_purge);
-	hpdata->h_purge_allowed = purge_allowed;
+       assert(purge_allowed == false || !hpdata->h_mid_purge);
+       hpdata->h_purge_allowed = purge_allowed;
 }
 
 static inline bool
@@ -185,9 +187,20 @@ hpdata_hugify_allowed_get(const hpdata_t *hpdata) {
 }
 
 static inline void
-hpdata_hugify_allowed_set(hpdata_t *hpdata, bool hugify_allowed) {
-	assert(hugify_allowed == false || !hpdata->h_mid_hugify);
-	hpdata->h_hugify_allowed = hugify_allowed;
+hpdata_allow_hugify(hpdata_t *hpdata, nstime_t now) {
+	assert(!hpdata->h_mid_hugify);
+	hpdata->h_hugify_allowed = true;
+	hpdata->h_time_hugify_allowed = now;
+}
+
+static inline nstime_t
+hpdata_time_hugify_allowed(hpdata_t *hpdata) {
+	return hpdata->h_time_hugify_allowed;
+}
+
+static inline void
+hpdata_disallow_hugify(hpdata_t *hpdata) {
+	hpdata->h_hugify_allowed = false;
 }
 
 static inline bool
diff --git a/src/ctl.c b/src/ctl.c
index c66b4d8c..b3e62dfa 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -96,7 +96,7 @@ CTL_PROTO(opt_confirm_conf)
 CTL_PROTO(opt_hpa)
 CTL_PROTO(opt_hpa_slab_max_alloc)
 CTL_PROTO(opt_hpa_hugification_threshold)
-CTL_PROTO(opt_hpa_dehugification_threshold)
+CTL_PROTO(opt_hpa_hugify_delay_ms)
 CTL_PROTO(opt_hpa_dirty_mult)
 CTL_PROTO(opt_hpa_sec_nshards)
 CTL_PROTO(opt_hpa_sec_max_alloc)
@@ -406,8 +406,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("hpa_slab_max_alloc"),	CTL(opt_hpa_slab_max_alloc)},
 	{NAME("hpa_hugification_threshold"),
 		CTL(opt_hpa_hugification_threshold)},
-	{NAME("hpa_dehugification_threshold"),
-		CTL(opt_hpa_dehugification_threshold)},
+	{NAME("hpa_hugify_delay_ms"), CTL(opt_hpa_hugify_delay_ms)},
 	{NAME("hpa_dirty_mult"), CTL(opt_hpa_dirty_mult)},
 	{NAME("hpa_sec_nshards"),	CTL(opt_hpa_sec_nshards)},
 	{NAME("hpa_sec_max_alloc"),	CTL(opt_hpa_sec_max_alloc)},
@@ -2114,8 +2113,8 @@ CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
 CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool)
 CTL_RO_NL_GEN(opt_hpa_hugification_threshold,
     opt_hpa_opts.hugification_threshold, size_t)
-CTL_RO_NL_GEN(opt_hpa_dehugification_threshold,
-    opt_hpa_opts.dehugification_threshold, size_t)
+CTL_RO_NL_GEN(opt_hpa_hugify_delay_ms, opt_hpa_opts.hugify_delay_ms, uint64_t)
+
 /*
  * This will have to change before we publicly document this option; fxp_t and
  * its representation are internal implementation details.
diff --git a/src/hpa.c b/src/hpa.c
index 07ad117f..4ae30b97 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -198,7 +198,7 @@ hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard,
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	if (hpdata_changing_state_get(ps)) {
 		hpdata_purge_allowed_set(ps, false);
-		hpdata_hugify_allowed_set(ps, false);
+		hpdata_disallow_hugify(ps);
 		return;
 	}
 	/*
@@ -226,7 +226,24 @@ hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard,
 	hpdata_purge_allowed_set(ps, hpdata_ndirty_get(ps) > 0);
 	if (hpa_good_hugification_candidate(shard, ps)
 	    && !hpdata_huge_get(ps)) {
-		hpdata_hugify_allowed_set(ps, true);
+		nstime_t now;
+		shard->hooks.curtime(&now);
+		hpdata_allow_hugify(ps, now);
+	}
+	/*
+	 * Once a hugepage has become eligible for hugification, we don't mark
+	 * it as ineligible just because it stops meeting the criteria (this
+	 * could lead to situations where a hugepage that spends most of its
+	 * time meeting the criteria never quite getting hugified if there are
+	 * intervening deallocations).  The idea is that the hugification delay
+	 * will allow them to get purged, reseting their "hugify-allowed" bit.
+	 * If they don't get purged, then the hugification isn't hurting and
+	 * might help.  As an exception, we don't hugify hugepages that are now
+	 * empty; it definitely doesn't help there until the hugepage gets
+	 * reused, which is likely not for a while.
+	 */
+	if (hpdata_nactive_get(ps) == 0) {
+		hpdata_disallow_hugify(ps);
 	}
 }
 
@@ -309,7 +326,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	assert(hpdata_alloc_allowed_get(to_purge));
 	hpdata_mid_purge_set(to_purge, true);
 	hpdata_purge_allowed_set(to_purge, false);
-	hpdata_hugify_allowed_set(to_purge, false);
+	hpdata_disallow_hugify(to_purge);
 	/*
 	 * Unlike with hugification (where concurrent
 	 * allocations are allowed), concurrent allocation out
@@ -386,6 +403,16 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 	assert(hpdata_hugify_allowed_get(to_hugify));
 	assert(!hpdata_changing_state_get(to_hugify));
 
+	/* Make sure that it's been hugifiable for long enough. */
+	nstime_t time_hugify_allowed = hpdata_time_hugify_allowed(to_hugify);
+	nstime_t nstime;
+	shard->hooks.curtime(&nstime);
+	nstime_subtract(&nstime, &time_hugify_allowed);
+	uint64_t millis = nstime_msec(&nstime);
+	if (millis < shard->opts.hugify_delay_ms) {
+		return false;
+	}
+
 	/*
 	 * Don't let anyone else purge or hugify this page while
 	 * we're hugifying it (allocations and deallocations are
@@ -394,7 +421,7 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 	psset_update_begin(&shard->psset, to_hugify);
 	hpdata_mid_hugify_set(to_hugify, true);
 	hpdata_purge_allowed_set(to_hugify, false);
-	hpdata_hugify_allowed_set(to_hugify, false);
+	hpdata_disallow_hugify(to_hugify);
 	assert(hpdata_alloc_allowed_get(to_hugify));
 	psset_update_end(&shard->psset, to_hugify);
 
@@ -421,9 +448,6 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 static void
 hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard,
     bool forced) {
-	bool hugified;
-	bool purged;
-	size_t nloop = 0;
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	if (!forced && shard->opts.deferral_allowed) {
 		return;
@@ -433,16 +457,29 @@ hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard,
 	 * be done.  Otherwise, bound latency to not be *too* bad by doing at
 	 * most a small fixed number of operations.
 	 */
-	size_t maxloops = (forced ? (size_t)-1 : 8);
+	bool hugified = false;
+	bool purged = false;
+	size_t max_ops = (forced ? (size_t)-1 : 16);
+	size_t nops = 0;
 	do {
-		hugified = hpa_try_hugify(tsdn, shard);
-		malloc_mutex_assert_owner(tsdn, &shard->mtx);
+		/*
+		 * Always purge before hugifying, to make sure we get some
+		 * ability to hit our quiescence targets.
+		 */
 		purged = false;
-		if (hpa_should_purge(tsdn, shard)) {
+		while (hpa_should_purge(tsdn, shard) && nops < max_ops) {
 			purged = hpa_try_purge(tsdn, shard);
+			if (purged) {
+				nops++;
+			}
+		}
+		hugified = hpa_try_hugify(tsdn, shard);
+		if (hugified) {
+			nops++;
 		}
 		malloc_mutex_assert_owner(tsdn, &shard->mtx);
-	} while ((hugified || purged) && nloop++ < maxloops);
+		malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	} while ((hugified || purged) && nops < max_ops);
 }
 
 static edata_t *
diff --git a/src/hpa_hooks.c b/src/hpa_hooks.c
index 7e07c31a..6f377613 100644
--- a/src/hpa_hooks.c
+++ b/src/hpa_hooks.c
@@ -8,6 +8,7 @@ static void hpa_hooks_unmap(void *ptr, size_t size);
 static void hpa_hooks_purge(void *ptr, size_t size);
 static void hpa_hooks_hugify(void *ptr, size_t size);
 static void hpa_hooks_dehugify(void *ptr, size_t size);
+static void hpa_hooks_curtime(nstime_t *r_nstime);
 
 hpa_hooks_t hpa_hooks_default = {
 	&hpa_hooks_map,
@@ -15,6 +16,7 @@ hpa_hooks_t hpa_hooks_default = {
 	&hpa_hooks_purge,
 	&hpa_hooks_hugify,
 	&hpa_hooks_dehugify,
+	&hpa_hooks_curtime,
 };
 
 static void *
@@ -44,3 +46,8 @@ hpa_hooks_dehugify(void *ptr, size_t size) {
 	bool err = pages_nohuge(ptr, size);
 	(void)err;
 }
+
+static void
+hpa_hooks_curtime(nstime_t *r_nstime) {
+	nstime_update(r_nstime);
+}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 5adb5637..71efcb61 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1145,6 +1145,9 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 #define CONF_HANDLE_INT64_T(o, n, min, max, check_min, check_max, clip)	\
 			CONF_HANDLE_T_SIGNED(int64_t, o, n, min, max,	\
 			    check_min, check_max, clip)
+#define CONF_HANDLE_UINT64_T(o, n, min, max, check_min, check_max, clip)\
+			CONF_HANDLE_T_U(uint64_t, o, n, min, max,	\
+			    check_min, check_max, clip)
 #define CONF_HANDLE_SSIZE_T(o, n, min, max)				\
 			CONF_HANDLE_T_SIGNED(ssize_t, o, n, min, max,	\
 			    CONF_CHECK_MIN, CONF_CHECK_MAX, false)
@@ -1441,26 +1444,9 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				CONF_CONTINUE;
 			}
 
-			/* And the same for the dehugification_threhsold. */
 			CONF_HANDLE_SIZE_T(
-			    opt_hpa_opts.dehugification_threshold,
-			    "hpa_dehugification_threshold", PAGE, HUGEPAGE,
-			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
-			if (CONF_MATCH("hpa_dehugification_threshold_ratio")) {
-				fxp_t ratio;
-				char *end;
-				bool err = fxp_parse(&ratio, v,
-				    &end);
-				if (err || (size_t)(end - v) != vlen
-				    || ratio > FXP_INIT_INT(1)) {
-					CONF_ERROR("Invalid conf value",
-					    k, klen, v, vlen);
-				} else {
-					opt_hpa_opts.dehugification_threshold =
-					    fxp_mul_frac(HUGEPAGE, ratio);
-				}
-				CONF_CONTINUE;
-			}
+			    opt_hpa_opts.hugify_delay_ms, "hpa_hugify_delay_ms",
+			    0, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
 
 			if (CONF_MATCH("hpa_dirty_mult")) {
 				if (CONF_MATCH_VALUE("-1")) {
diff --git a/src/stats.c b/src/stats.c
index 4e6c3922..16aa3fd4 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1376,7 +1376,7 @@ stats_general_print(emitter_t *emitter) {
 	uint64_t u64v;
 	int64_t i64v;
 	ssize_t ssv, ssv2;
-	size_t sv, bsz, usz, u32sz, i64sz, ssz, sssz, cpsz;
+	size_t sv, bsz, usz, u32sz, u64sz, i64sz, ssz, sssz, cpsz;
 
 	bsz = sizeof(bool);
 	usz = sizeof(unsigned);
@@ -1385,6 +1385,7 @@ stats_general_print(emitter_t *emitter) {
 	cpsz = sizeof(const char *);
 	u32sz = sizeof(uint32_t);
 	i64sz = sizeof(int64_t);
+	u64sz = sizeof(uint64_t);
 
 	CTL_GET("version", &cpv, const char *);
 	emitter_kv(emitter, "version", "Version", emitter_type_string, &cpv);
@@ -1442,6 +1443,8 @@ stats_general_print(emitter_t *emitter) {
 
 #define OPT_WRITE_INT64(name)						\
 	OPT_WRITE(name, i64v, i64sz, emitter_type_int64)
+#define OPT_WRITE_UINT64(name)						\
+	OPT_WRITE(name, u64v, u64sz, emitter_type_uint64)
 
 #define OPT_WRITE_SIZE_T(name)						\
 	OPT_WRITE(name, sv, ssz, emitter_type_size)
@@ -1468,7 +1471,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_BOOL("hpa")
 	OPT_WRITE_SIZE_T("hpa_slab_max_alloc")
 	OPT_WRITE_SIZE_T("hpa_hugification_threshold")
-	OPT_WRITE_SIZE_T("hpa_dehugification_threshold")
+	OPT_WRITE_UINT64("hpa_hugify_delay_ms")
 	if (je_mallctl("opt.hpa_dirty_mult", (void *)&u32v, &u32sz, NULL, 0)
 	    == 0) {
 		/*
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 0558680f..a9e551fc 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -19,8 +19,21 @@ struct test_data_s {
 	emap_t emap;
 };
 
+static hpa_shard_opts_t test_hpa_shard_opts_default = {
+	/* slab_max_alloc */
+	ALLOC_MAX,
+	/* hugification threshold */
+	HUGEPAGE,
+	/* dirty_mult */
+	FXP_INIT_PERCENT(25),
+	/* deferral_allowed */
+	false,
+	/* hugify_delay_ms */
+	10 * 1000,
+};
+
 static hpa_shard_t *
-create_test_data() {
+create_test_data(hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
 	bool err;
 	base_t *base = base_new(TSDN_NULL, /* ind */ SHARD_IND,
 	    &ehooks_default_extent_hooks);
@@ -37,12 +50,9 @@ create_test_data() {
 	err = emap_init(&test_data->emap, test_data->base, /* zeroed */ false);
 	assert_false(err, "");
 
-	hpa_shard_opts_t opts = HPA_SHARD_OPTS_DEFAULT;
-	opts.slab_max_alloc = ALLOC_MAX;
-
 	err = hpa_shard_init(&test_data->shard, &test_data->emap,
 	    test_data->base, &test_data->shard_edata_cache, SHARD_IND,
-	    &hpa_hooks_default, &opts);
+	    hooks, opts);
 	assert_false(err, "");
 
 	return (hpa_shard_t *)test_data;
@@ -58,7 +68,8 @@ destroy_test_data(hpa_shard_t *shard) {
 TEST_BEGIN(test_alloc_max) {
 	test_skip_if(!hpa_supported());
 
-	hpa_shard_t *shard = create_test_data();
+	hpa_shard_t *shard = create_test_data(&hpa_hooks_default,
+	    &test_hpa_shard_opts_default);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 
 	edata_t *edata;
@@ -134,7 +145,8 @@ node_remove(mem_tree_t *tree, edata_t *edata) {
 TEST_BEGIN(test_stress) {
 	test_skip_if(!hpa_supported());
 
-	hpa_shard_t *shard = create_test_data();
+	hpa_shard_t *shard = create_test_data(&hpa_hooks_default,
+	    &test_hpa_shard_opts_default);
 
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 
@@ -224,7 +236,8 @@ expect_contiguous(edata_t **edatas, size_t nedatas) {
 TEST_BEGIN(test_alloc_dalloc_batch) {
 	test_skip_if(!hpa_supported());
 
-	hpa_shard_t *shard = create_test_data();
+	hpa_shard_t *shard = create_test_data(&hpa_hooks_default,
+	    &test_hpa_shard_opts_default);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 
 	enum {NALLOCS = 8};
@@ -282,6 +295,117 @@ TEST_BEGIN(test_alloc_dalloc_batch) {
 }
 TEST_END
 
+static uintptr_t defer_bump_ptr = HUGEPAGE * 123;
+static void *
+defer_test_map(size_t size) {
+	void *result = (void *)defer_bump_ptr;
+	defer_bump_ptr += size;
+	return result;
+}
+
+static void
+defer_test_unmap(void *ptr, size_t size) {
+	(void)ptr;
+	(void)size;
+}
+
+static bool defer_purge_called = false;
+static void
+defer_test_purge(void *ptr, size_t size) {
+	(void)ptr;
+	(void)size;
+	defer_purge_called = true;
+}
+
+static bool defer_hugify_called = false;
+static void
+defer_test_hugify(void *ptr, size_t size) {
+	defer_hugify_called = true;
+}
+
+static bool defer_dehugify_called = false;
+static void
+defer_test_dehugify(void *ptr, size_t size) {
+	defer_dehugify_called = true;
+}
+
+static nstime_t defer_curtime;
+static void
+defer_test_curtime(nstime_t *r_time) {
+	*r_time = defer_curtime;
+}
+
+TEST_BEGIN(test_defer_time) {
+	test_skip_if(!hpa_supported());
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+
+	nstime_init(&defer_curtime, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	edata_t *edatas[HUGEPAGE_PAGES];
+	for (int i = 0; i < (int)HUGEPAGE_PAGES; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_false(defer_hugify_called, "Hugified too early");
+
+	/* Hugification delay is set to 10 seconds in options. */
+	nstime_init2(&defer_curtime, 11, 0);
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_true(defer_hugify_called, "Failed to hugify");
+
+	defer_hugify_called = false;
+
+	/* Purge.  Recall that dirty_mult is .25. */
+	for (int i = 0; i < (int)HUGEPAGE_PAGES / 2; i++) {
+		pai_dalloc(tsdn, &shard->pai, edatas[i]);
+	}
+
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	expect_false(defer_hugify_called, "Hugified too early");
+	expect_true(defer_dehugify_called, "Should have dehugified");
+	expect_true(defer_purge_called, "Should have purged");
+	defer_hugify_called = false;
+	defer_dehugify_called = false;
+	defer_purge_called = false;
+
+	/*
+	 * Refill the page.  We now meet the hugification threshold; we should
+	 * be marked for pending hugify.
+	 */
+	for (int i = 0; i < (int)HUGEPAGE_PAGES / 2; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	/*
+	 * We would be ineligible for hugification, had we not already met the
+	 * threshold before dipping below it.
+	 */
+	pai_dalloc(tsdn, &shard->pai, edatas[0]);
+	/* Wait for the threshold again. */
+	nstime_init2(&defer_curtime, 22, 0);
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_true(defer_hugify_called, "Hugified too early");
+	expect_false(defer_dehugify_called, "Unexpected dehugify");
+	expect_false(defer_purge_called, "Unexpected purge");
+
+	destroy_test_data(shard);
+}
+TEST_END
+
 int
 main(void) {
 	/*
@@ -299,5 +423,6 @@ main(void) {
 	return test_no_reentrancy(
 	    test_alloc_max,
 	    test_stress,
-	    test_alloc_dalloc_batch);
+	    test_alloc_dalloc_batch,
+	    test_defer_time);
 }

From 4b633b9a81bb0fe1b234bd6243496d407cae8665 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Fri, 16 Jul 2021 14:53:25 -0700
Subject: [PATCH 2085/2608] Clean up background thread sleep computation

Isolate the computation of purge interval from background thread logic and
move into more suitable file.
---
 include/jemalloc/internal/decay.h |  11 ++
 src/background_thread.c           | 179 ++++++++----------------------
 src/decay.c                       |  73 ++++++++++++
 3 files changed, 133 insertions(+), 130 deletions(-)

diff --git a/include/jemalloc/internal/decay.h b/include/jemalloc/internal/decay.h
index df396658..a81e3925 100644
--- a/include/jemalloc/internal/decay.h
+++ b/include/jemalloc/internal/decay.h
@@ -3,6 +3,8 @@
 
 #include "jemalloc/internal/smoothstep.h"
 
+#define DECAY_UNBOUNDED_TIME_TO_PURGE ((uint64_t)-1)
+
 /*
  * The decay_t computes the number of pages we should purge at any given time.
  * Page allocators inform a decay object when pages enter a decay-able state
@@ -146,4 +148,13 @@ void decay_reinit(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms);
 bool decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
     size_t current_npages);
 
+/*
+ * Calculates wait time until at least npages_threshold pages should be purged.
+ *
+ * Returns number of nanoseconds or DECAY_UNBOUNDED_TIME_TO_PURGE in case of
+ * indefinite wait.
+ */
+uint64_t decay_ns_until_purge(decay_t *decay, size_t npages_current,
+    uint64_t npages_threshold);
+
 #endif /* JEMALLOC_INTERNAL_DECAY_H */
diff --git a/src/background_thread.c b/src/background_thread.c
index 1fb24fe6..4951cd1a 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -104,134 +104,6 @@ set_current_thread_affinity(int cpu) {
 /* Minimal sleep interval 100 ms. */
 #define BACKGROUND_THREAD_MIN_INTERVAL_NS (BILLION / 10)
 
-static inline size_t
-decay_npurge_after_interval(decay_t *decay, size_t interval) {
-	size_t i;
-	uint64_t sum = 0;
-	for (i = 0; i < interval; i++) {
-		sum += decay->backlog[i] * h_steps[i];
-	}
-	for (; i < SMOOTHSTEP_NSTEPS; i++) {
-		sum += decay->backlog[i] * (h_steps[i] - h_steps[i - interval]);
-	}
-
-	return (size_t)(sum >> SMOOTHSTEP_BFP);
-}
-
-static uint64_t
-arena_decay_compute_purge_interval_impl(tsdn_t *tsdn, decay_t *decay,
-    ecache_t *ecache) {
-	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
-		/* Use minimal interval if decay is contended. */
-		return BACKGROUND_THREAD_MIN_INTERVAL_NS;
-	}
-
-	uint64_t interval;
-	ssize_t decay_time = decay_ms_read(decay);
-	if (decay_time <= 0) {
-		/* Purging is eagerly done or disabled currently. */
-		interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
-		goto label_done;
-	}
-
-	uint64_t decay_interval_ns = decay_epoch_duration_ns(decay);
-	assert(decay_interval_ns > 0);
-	size_t npages = ecache_npages_get(ecache);
-	if (npages == 0) {
-		unsigned i;
-		for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
-			if (decay->backlog[i] > 0) {
-				break;
-			}
-		}
-		if (i == SMOOTHSTEP_NSTEPS) {
-			/* No dirty pages recorded.  Sleep indefinitely. */
-			interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
-			goto label_done;
-		}
-	}
-	if (npages <= BACKGROUND_THREAD_NPAGES_THRESHOLD) {
-		/* Use max interval. */
-		interval = decay_interval_ns * SMOOTHSTEP_NSTEPS;
-		goto label_done;
-	}
-
-	size_t lb = BACKGROUND_THREAD_MIN_INTERVAL_NS / decay_interval_ns;
-	size_t ub = SMOOTHSTEP_NSTEPS;
-	/* Minimal 2 intervals to ensure reaching next epoch deadline. */
-	lb = (lb < 2) ? 2 : lb;
-	if ((decay_interval_ns * ub <= BACKGROUND_THREAD_MIN_INTERVAL_NS) ||
-	    (lb + 2 > ub)) {
-		interval = BACKGROUND_THREAD_MIN_INTERVAL_NS;
-		goto label_done;
-	}
-
-	assert(lb + 2 <= ub);
-	size_t npurge_lb, npurge_ub;
-	npurge_lb = decay_npurge_after_interval(decay, lb);
-	if (npurge_lb > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
-		interval = decay_interval_ns * lb;
-		goto label_done;
-	}
-	npurge_ub = decay_npurge_after_interval(decay, ub);
-	if (npurge_ub < BACKGROUND_THREAD_NPAGES_THRESHOLD) {
-		interval = decay_interval_ns * ub;
-		goto label_done;
-	}
-
-	unsigned n_search = 0;
-	size_t target, npurge;
-	while ((npurge_lb + BACKGROUND_THREAD_NPAGES_THRESHOLD < npurge_ub)
-	    && (lb + 2 < ub)) {
-		target = (lb + ub) / 2;
-		npurge = decay_npurge_after_interval(decay, target);
-		if (npurge > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
-			ub = target;
-			npurge_ub = npurge;
-		} else {
-			lb = target;
-			npurge_lb = npurge;
-		}
-		assert(n_search < lg_floor(SMOOTHSTEP_NSTEPS) + 1);
-		++n_search;
-	}
-	interval = decay_interval_ns * (ub + lb) / 2;
-label_done:
-	interval = (interval < BACKGROUND_THREAD_MIN_INTERVAL_NS) ?
-	    BACKGROUND_THREAD_MIN_INTERVAL_NS : interval;
-	malloc_mutex_unlock(tsdn, &decay->mtx);
-
-	return interval;
-}
-
-/* Compute purge interval for background threads. */
-static uint64_t
-arena_decay_compute_purge_interval(tsdn_t *tsdn, arena_t *arena) {
-	uint64_t i1, i2;
-	i1 = arena_decay_compute_purge_interval_impl(tsdn,
-	    &arena->pa_shard.pac.decay_dirty, &arena->pa_shard.pac.ecache_dirty);
-	if (i1 == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
-		return i1;
-	}
-	i2 = arena_decay_compute_purge_interval_impl(tsdn,
-	    &arena->pa_shard.pac.decay_muzzy, &arena->pa_shard.pac.ecache_muzzy);
-
-	uint64_t min_so_far = i1 < i2 ? i1 : i2;
-	if (opt_background_thread_hpa_interval_max_ms >= 0) {
-		uint64_t hpa_interval = 1000 * 1000 *
-		    (uint64_t)opt_background_thread_hpa_interval_max_ms;
-		if (hpa_interval < min_so_far) {
-			if (hpa_interval < BACKGROUND_THREAD_MIN_INTERVAL_NS) {
-				min_so_far = BACKGROUND_THREAD_MIN_INTERVAL_NS;
-			} else {
-				min_so_far = hpa_interval;
-			}
-		}
-	}
-
-	return min_so_far;
-}
-
 static void
 background_thread_sleep(tsdn_t *tsdn, background_thread_info_t *info,
     uint64_t interval) {
@@ -301,6 +173,52 @@ background_thread_pause_check(tsdn_t *tsdn, background_thread_info_t *info) {
 	return false;
 }
 
+static inline uint64_t
+arena_decay_compute_purge_interval(tsdn_t *tsdn, decay_t *decay,
+    size_t npages) {
+	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
+		/* Use minimal interval if decay is contended. */
+		return BACKGROUND_THREAD_MIN_INTERVAL_NS;
+	}
+	uint64_t decay_ns = decay_ns_until_purge(decay, npages,
+	    BACKGROUND_THREAD_NPAGES_THRESHOLD);
+	malloc_mutex_unlock(tsdn, &decay->mtx);
+
+	return decay_ns < BACKGROUND_THREAD_MIN_INTERVAL_NS ?
+	    BACKGROUND_THREAD_MIN_INTERVAL_NS :
+	    decay_ns;
+}
+
+
+static inline uint64_t
+arena_decay_compute_min_purge_interval(tsdn_t *tsdn, arena_t *arena) {
+	uint64_t dirty, muzzy;
+	dirty = arena_decay_compute_purge_interval(tsdn,
+	    &arena->pa_shard.pac.decay_dirty,
+	    ecache_npages_get(&arena->pa_shard.pac.ecache_dirty));
+	if (dirty == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
+		return dirty;
+	}
+	muzzy = arena_decay_compute_purge_interval(tsdn,
+	    &arena->pa_shard.pac.decay_muzzy,
+	    ecache_npages_get(&arena->pa_shard.pac.ecache_muzzy));
+
+	uint64_t min_so_far = dirty < muzzy ? dirty : muzzy;
+	if (opt_background_thread_hpa_interval_max_ms >= 0) {
+		uint64_t hpa_interval = 1000 * 1000 *
+		    (uint64_t)opt_background_thread_hpa_interval_max_ms;
+		if (hpa_interval < min_so_far) {
+			if (hpa_interval < BACKGROUND_THREAD_MIN_INTERVAL_NS) {
+				min_so_far = BACKGROUND_THREAD_MIN_INTERVAL_NS;
+			} else {
+				min_so_far = hpa_interval;
+			}
+		}
+	}
+
+	return min_so_far;
+}
+
 static inline void
 background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info, unsigned ind) {
 	uint64_t min_interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
@@ -316,10 +234,11 @@ background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info, unsigne
 			/* Min interval will be used. */
 			continue;
 		}
-		uint64_t interval = arena_decay_compute_purge_interval(tsdn,
+		uint64_t interval = arena_decay_compute_min_purge_interval(tsdn,
 		    arena);
 		assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS);
-		if (min_interval > interval) {
+		if (interval != DECAY_UNBOUNDED_TIME_TO_PURGE &&
+		    min_interval > interval) {
 			min_interval = interval;
 		}
 	}
diff --git a/src/decay.c b/src/decay.c
index 23d59da9..87e3a8bd 100644
--- a/src/decay.c
+++ b/src/decay.c
@@ -175,3 +175,76 @@ decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
 
 	return true;
 }
+
+static inline size_t
+decay_npurge_after_interval(decay_t *decay, size_t interval) {
+	size_t i;
+	uint64_t sum = 0;
+	for (i = 0; i < interval; i++) {
+		sum += decay->backlog[i] * h_steps[i];
+	}
+	for (; i < SMOOTHSTEP_NSTEPS; i++) {
+		sum += decay->backlog[i] *
+		    (h_steps[i] - h_steps[i - interval]);
+	}
+
+	return (size_t)(sum >> SMOOTHSTEP_BFP);
+}
+
+uint64_t decay_ns_until_purge(decay_t *decay, size_t npages_current,
+    uint64_t npages_threshold) {
+	ssize_t decay_time = decay_ms_read(decay);
+	if (decay_time <= 0) {
+		/* Purging is eagerly done or disabled currently. */
+		return DECAY_UNBOUNDED_TIME_TO_PURGE;
+	}
+	uint64_t decay_interval_ns = decay_epoch_duration_ns(decay);
+	assert(decay_interval_ns > 0);
+	if (npages_current == 0) {
+		unsigned i;
+		for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
+			if (decay->backlog[i] > 0) {
+				break;
+			}
+		}
+		if (i == SMOOTHSTEP_NSTEPS) {
+			/* No dirty pages recorded.  Sleep indefinitely. */
+			return DECAY_UNBOUNDED_TIME_TO_PURGE;
+		}
+	}
+	if (npages_current <= npages_threshold) {
+		/* Use max interval. */
+		return decay_interval_ns * SMOOTHSTEP_NSTEPS;
+	}
+
+	/* Minimal 2 intervals to ensure reaching next epoch deadline. */
+	size_t lb = 2;
+	size_t ub = SMOOTHSTEP_NSTEPS;
+
+	size_t npurge_lb, npurge_ub;
+	npurge_lb = decay_npurge_after_interval(decay, lb);
+	if (npurge_lb > npages_threshold) {
+		return decay_interval_ns * lb;
+	}
+	npurge_ub = decay_npurge_after_interval(decay, ub);
+	if (npurge_ub < npages_threshold) {
+		return decay_interval_ns * ub;
+	}
+
+	unsigned n_search = 0;
+	size_t target, npurge;
+	while ((npurge_lb + npages_threshold < npurge_ub) && (lb + 2 < ub)) {
+		target = (lb + ub) / 2;
+		npurge = decay_npurge_after_interval(decay, target);
+		if (npurge > npages_threshold) {
+			ub = target;
+			npurge_ub = npurge;
+		} else {
+			lb = target;
+			npurge_lb = npurge;
+		}
+		assert(n_search < lg_floor(SMOOTHSTEP_NSTEPS) + 1);
+		++n_search;
+	}
+	return decay_interval_ns * (ub + lb) / 2;
+}

From aaea4fd1e640690042b34755fd5e4714ebd0459b Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Tue, 20 Jul 2021 13:22:05 -0700
Subject: [PATCH 2086/2608] Add more documentation to decay.c

It took me a while to understand why some things are implemented the way they
are, so hopefully it will help future readers.
---
 include/jemalloc/internal/decay.h |  3 ++-
 src/decay.c                       | 21 +++++++++++++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/decay.h b/include/jemalloc/internal/decay.h
index a81e3925..8e517458 100644
--- a/include/jemalloc/internal/decay.h
+++ b/include/jemalloc/internal/decay.h
@@ -149,7 +149,8 @@ bool decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
     size_t current_npages);
 
 /*
- * Calculates wait time until at least npages_threshold pages should be purged.
+ * Calculates wait time until a number of pages in the interval
+ * [0.5 * npages_threshold .. 1.5 * npages_threshold] should be purged.
  *
  * Returns number of nanoseconds or DECAY_UNBOUNDED_TIME_TO_PURGE in case of
  * indefinite wait.
diff --git a/src/decay.c b/src/decay.c
index 87e3a8bd..fdbd63d8 100644
--- a/src/decay.c
+++ b/src/decay.c
@@ -102,6 +102,11 @@ decay_backlog_npages_limit(const decay_t *decay) {
 	return npages_limit_backlog;
 }
 
+/*
+ * Update backlog, assuming that 'nadvance_u64' time intervals have passed.
+ * Trailing 'nadvance_u64' records should be erased and 'current_npages' is
+ * placed as the newest record.
+ */
 static void
 decay_backlog_update(decay_t *decay, uint64_t nadvance_u64,
     size_t current_npages) {
@@ -176,6 +181,22 @@ decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
 	return true;
 }
 
+/*
+ * Calculate how many pages should be purged after 'interval'.
+ *
+ * First, calculate how many pages should remain at the moment, then subtract
+ * the number of pages that should remain after 'interval'. The difference is
+ * how many pages should be purged until then.
+ *
+ * The number of pages that should remain at a specific moment is calculated
+ * like this: pages(now) = sum(backlog[i] * h_steps[i]). After 'interval'
+ * passes, backlog would shift 'interval' positions to the left and sigmoid
+ * curve would be applied starting with backlog[interval].
+ *
+ * The implementation doesn't directly map to the description, but it's
+ * essentially the same calculation, optimized to avoid iterating over
+ * [interval..SMOOTHSTEP_NSTEPS) twice.
+ */
 static inline size_t
 decay_npurge_after_interval(decay_t *decay, size_t interval) {
 	size_t i;

From c88fe355e64fa18eef932b4446aae7296babcc06 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Wed, 21 Jul 2021 14:45:55 -0700
Subject: [PATCH 2087/2608] Add unit tests for decay

After slight changes in the interface, it's an opportunity to enhance unit
tests.
---
 test/unit/decay.c | 222 ++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 214 insertions(+), 8 deletions(-)

diff --git a/test/unit/decay.c b/test/unit/decay.c
index 9da0d94c..72484c80 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -2,10 +2,68 @@
 
 #include "jemalloc/internal/decay.h"
 
-/*
- * Honestly, this is mostly a stub for now.  Eventually, we should beef up
- * testing here.
- */
+TEST_BEGIN(test_decay_init) {
+	decay_t decay;
+	memset(&decay, 0, sizeof(decay));
+
+	nstime_t curtime;
+	nstime_init(&curtime, 0);
+
+	ssize_t decay_ms = 1000;
+	assert_true(decay_ms_valid(decay_ms), "");
+
+	expect_false(decay_init(&decay, &curtime, decay_ms),
+	    "Failed to initialize decay");
+	expect_zd_eq(decay_ms_read(&decay), decay_ms,
+	    "Decay_ms was initialized incorrectly");
+	expect_u64_ne(decay_epoch_duration_ns(&decay), 0,
+	    "Epoch duration was initialized incorrectly");
+}
+TEST_END
+
+TEST_BEGIN(test_decay_ms_valid) {
+	expect_false(decay_ms_valid(-7),
+	    "Misclassified negative decay as valid");
+	expect_true(decay_ms_valid(-1),
+	    "Misclassified -1 (never decay) as invalid decay");
+	expect_true(decay_ms_valid(8943),
+	    "Misclassified valid decay");
+	if (SSIZE_MAX > NSTIME_SEC_MAX) {
+		expect_false(
+		    decay_ms_valid((ssize_t)(NSTIME_SEC_MAX * KQU(1000) + 39)),
+		    "Misclassified too large decay");
+	}
+}
+TEST_END
+
+TEST_BEGIN(test_decay_maybe_advance_epoch) {
+	decay_t decay;
+	memset(&decay, 0, sizeof(decay));
+
+	nstime_t curtime;
+	nstime_init(&curtime, 0);
+
+	uint64_t decay_ms = 1000;
+
+	bool err = decay_init(&decay, &curtime, (ssize_t)decay_ms);
+	expect_false(err, "");
+
+	bool advanced;
+	advanced = decay_maybe_advance_epoch(&decay, &curtime, 0);
+	expect_false(advanced, "Epoch advanced while time didn't");
+
+	nstime_t interval;
+	nstime_init(&interval, decay_epoch_duration_ns(&decay));
+
+	nstime_add(&curtime, &interval);
+	advanced = decay_maybe_advance_epoch(&decay, &curtime, 0);
+	expect_false(advanced, "Epoch advanced after first interval");
+
+	nstime_add(&curtime, &interval);
+	advanced = decay_maybe_advance_epoch(&decay, &curtime, 0);
+	expect_true(advanced, "Epoch didn't advance after two intervals");
+}
+TEST_END
 
 TEST_BEGIN(test_decay_empty) {
 	/* If we never have any decaying pages, npages_limit should be 0. */
@@ -30,16 +88,164 @@ TEST_BEGIN(test_decay_empty) {
 		    &curtime, dirty_pages);
 		if (epoch_advanced) {
 			nepochs++;
-			assert_zu_eq(decay_npages_limit_get(&decay), 0,
-			    "Should not increase the limit arbitrarily");
+			expect_zu_eq(decay_npages_limit_get(&decay), 0,
+			    "Unexpectedly increased npages_limit");
 		}
 	}
-	assert_d_gt(nepochs, 0, "Should have advanced epochs");
+	expect_d_gt(nepochs, 0, "Epochs never advanced");
+}
+TEST_END
+
+/*
+ * Verify that npages_limit correctly decays as the time goes.
+ *
+ * During first 'nepoch_init' epochs, add new dirty pages.
+ * After that, let them decay and verify npages_limit decreases.
+ * Then proceed with another 'nepoch_init' epochs and check that
+ * all dirty pages are flushed out of backlog, bringing npages_limit
+ * down to zero.
+ */
+TEST_BEGIN(test_decay) {
+	const uint64_t nepoch_init = 10;
+
+	decay_t decay;
+	memset(&decay, 0, sizeof(decay));
+
+	nstime_t curtime;
+	nstime_init(&curtime, 0);
+
+	uint64_t decay_ms = 1000;
+	uint64_t decay_ns = decay_ms * 1000 * 1000;
+
+	bool err = decay_init(&decay, &curtime, (ssize_t)decay_ms);
+	assert_false(err, "");
+
+	expect_zu_eq(decay_npages_limit_get(&decay), 0,
+	    "Empty decay returned nonzero npages_limit");
+
+	nstime_t epochtime;
+	nstime_init(&epochtime, decay_epoch_duration_ns(&decay));
+
+	const size_t dirty_pages_per_epoch = 1000;
+	size_t dirty_pages = 0;
+	uint64_t epoch_ns = decay_epoch_duration_ns(&decay);
+	bool epoch_advanced = false;
+
+	/* Populate backlog with some dirty pages */
+	for (uint64_t i = 0; i < nepoch_init; i++) {
+		nstime_add(&curtime, &epochtime);
+		dirty_pages += dirty_pages_per_epoch;
+		epoch_advanced |= decay_maybe_advance_epoch(&decay, &curtime,
+		    dirty_pages);
+	}
+	expect_true(epoch_advanced, "Epoch never advanced");
+
+	size_t npages_limit = decay_npages_limit_get(&decay);
+	expect_zu_gt(npages_limit, 0, "npages_limit is incorrectly equal "
+	    "to zero after dirty pages have been added");
+
+	/* Keep dirty pages unchanged and verify that npages_limit decreases */
+	for (uint64_t i = nepoch_init; i * epoch_ns < decay_ns; ++i) {
+		nstime_add(&curtime, &epochtime);
+		epoch_advanced = decay_maybe_advance_epoch(&decay, &curtime,
+				    dirty_pages);
+		if (epoch_advanced) {
+			size_t npages_limit_new = decay_npages_limit_get(&decay);
+			expect_zu_lt(npages_limit_new, npages_limit,
+			    "napges_limit failed to decay");
+
+			npages_limit = npages_limit_new;
+		}
+	}
+
+	expect_zu_gt(npages_limit, 0, "npages_limit decayed to zero earlier "
+	    "than decay_ms since last dirty page was added");
+
+	/* Completely push all dirty pages out of the backlog */
+	epoch_advanced = false;
+	for (uint64_t i = 0; i < nepoch_init; i++) {
+		nstime_add(&curtime, &epochtime);
+		epoch_advanced |= decay_maybe_advance_epoch(&decay, &curtime,
+		    dirty_pages);
+	}
+	expect_true(epoch_advanced, "Epoch never advanced");
+
+	npages_limit = decay_npages_limit_get(&decay);
+	expect_zu_eq(npages_limit, 0, "npages_limit didn't decay to 0 after "
+	    "decay_ms since last bump in dirty pages");
+}
+TEST_END
+
+TEST_BEGIN(test_decay_ns_until_purge) {
+	const uint64_t nepoch_init = 10;
+
+	decay_t decay;
+	memset(&decay, 0, sizeof(decay));
+
+	nstime_t curtime;
+	nstime_init(&curtime, 0);
+
+	uint64_t decay_ms = 1000;
+	uint64_t decay_ns = decay_ms * 1000 * 1000;
+
+	bool err = decay_init(&decay, &curtime, (ssize_t)decay_ms);
+	assert_false(err, "");
+
+	nstime_t epochtime;
+	nstime_init(&epochtime, decay_epoch_duration_ns(&decay));
+
+	uint64_t ns_until_purge_empty = decay_ns_until_purge(&decay, 0, 0);
+	expect_u64_eq(ns_until_purge_empty, DECAY_UNBOUNDED_TIME_TO_PURGE,
+	    "Failed to return unbounded wait time for zero threshold");
+
+	const size_t dirty_pages_per_epoch = 1000;
+	size_t dirty_pages = 0;
+	bool epoch_advanced = false;
+	for (uint64_t i = 0; i < nepoch_init; i++) {
+		nstime_add(&curtime, &epochtime);
+		dirty_pages += dirty_pages_per_epoch;
+		epoch_advanced |= decay_maybe_advance_epoch(&decay, &curtime,
+		    dirty_pages);
+	}
+	expect_true(epoch_advanced, "Epoch never advanced");
+
+	uint64_t ns_until_purge_all = decay_ns_until_purge(&decay,
+	    dirty_pages, dirty_pages);
+	expect_u64_ge(ns_until_purge_all, decay_ns,
+	    "Incorrectly calculated time to purge all pages");
+
+	uint64_t ns_until_purge_none = decay_ns_until_purge(&decay,
+	    dirty_pages, 0);
+	expect_u64_eq(ns_until_purge_none, decay_epoch_duration_ns(&decay) * 2,
+	    "Incorrectly calculated time to purge 0 pages");
+
+	uint64_t npages_threshold = dirty_pages / 2;
+	uint64_t ns_until_purge_half = decay_ns_until_purge(&decay,
+	    dirty_pages, npages_threshold);
+
+	nstime_t waittime;
+	nstime_init(&waittime, ns_until_purge_half);
+	nstime_add(&curtime, &waittime);
+
+	decay_maybe_advance_epoch(&decay, &curtime, dirty_pages);
+	size_t npages_limit = decay_npages_limit_get(&decay);
+	expect_zu_lt(npages_limit, dirty_pages,
+	    "npages_limit failed to decrease after waiting");
+	size_t expected = dirty_pages - npages_limit;
+	int deviation = abs((int)expected - (int)(npages_threshold));
+	expect_d_lt(deviation, (int)(npages_threshold / 2),
+	    "After waiting, number of pages is out of the expected interval "
+	    "[0.5 * npages_threshold .. 1.5 * npages_threshold]");
 }
 TEST_END
 
 int
 main(void) {
 	return test(
-	    test_decay_empty);
+	    test_decay_init,
+	    test_decay_ms_valid,
+	    test_decay_maybe_advance_epoch,
+	    test_decay_empty,
+	    test_decay,
+	    test_decay_ns_until_purge);
 }

From e09eac1d4e9df2e889417e1cd3e56b451b959ba8 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 6 May 2021 13:47:01 -0700
Subject: [PATCH 2088/2608] Remove hpa_central.

This is now dead code.
---
 Makefile.in                                   |   2 -
 include/jemalloc/internal/hpa_central.h       |  47 --
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |   1 -
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |   3 -
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |   1 -
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |   3 -
 src/hpa_central.c                             | 192 --------
 test/unit/hpa_central.c                       | 450 ------------------
 8 files changed, 699 deletions(-)
 delete mode 100644 include/jemalloc/internal/hpa_central.h
 delete mode 100644 src/hpa_central.c
 delete mode 100644 test/unit/hpa_central.c

diff --git a/Makefile.in b/Makefile.in
index abdf8004..286f7ea9 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -121,7 +121,6 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/fxp.c \
 	$(srcroot)src/hook.c \
 	$(srcroot)src/hpa.c \
-	$(srcroot)src/hpa_central.c \
 	$(srcroot)src/hpa_hooks.c \
 	$(srcroot)src/hpdata.c \
 	$(srcroot)src/inspect.c \
@@ -223,7 +222,6 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/hook.c \
 	$(srcroot)test/unit/hpa.c \
 	$(srcroot)test/unit/hpa_background_thread.c \
-	$(srcroot)test/unit/hpa_central.c \
 	$(srcroot)test/unit/hpdata.c \
 	$(srcroot)test/unit/huge.c \
 	$(srcroot)test/unit/inspect.c \
diff --git a/include/jemalloc/internal/hpa_central.h b/include/jemalloc/internal/hpa_central.h
deleted file mode 100644
index 8659f712..00000000
--- a/include/jemalloc/internal/hpa_central.h
+++ /dev/null
@@ -1,47 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_HPA_CENTRAL_H
-#define JEMALLOC_INTERNAL_HPA_CENTRAL_H
-
-#include "jemalloc/internal/base.h"
-#include "jemalloc/internal/emap.h"
-
-typedef struct hpa_central_s hpa_central_t;
-struct hpa_central_s {
-	/* The emap we use for metadata operations. */
-	emap_t *emap;
-
-	edata_cache_small_t ecs;
-	eset_t eset;
-
-	size_t sn_next;
-};
-
-void hpa_central_init(hpa_central_t *central, edata_cache_t *edata_cache,
-    emap_t *emap);
-/*
- * Tries to satisfy the given allocation request with an extent already given to
- * central.
- */
-edata_t *hpa_central_alloc_reuse(tsdn_t *tsdn, hpa_central_t *central,
-    size_t size_min, size_t size_goal);
-/*
- * Adds the given edata to the central allocator as a new allocation.  The
- * intent is that after a reuse attempt fails, the caller can allocate a new
- * extent using whatever growth policy it prefers and allocate from that, giving
- * the excess to the hpa_central_t (this is analogous to the
- * extent_grow_retained functionality; we can allocate address space in
- * exponentially growing chunks).
- *
- * The edata_t should come from the same base that this hpa was initialized
- * with.  Only complete extents should be added (i.e. those for which the head
- * bit is true, and for which their successor is either not owned by jemalloc
- * or also has a head bit of true).  It should be active, large enough to
- * satisfy the requested allocation, and not already in the emap.
- *
- * If this returns true, then we did not accept the extent, and took no action.
- * Otherwise, modifies *edata to satisfy the allocation.
- */
-bool hpa_central_alloc_grow(tsdn_t *tsdn, hpa_central_t *central,
-    size_t size, edata_t *to_add);
-void hpa_central_dalloc(tsdn_t *tsdn, hpa_central_t *central, edata_t *edata);
-
-#endif /* JEMALLOC_INTERNAL_HPA_CENTRAL_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index f6fae7f2..597b247b 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -61,7 +61,6 @@
     <ClCompile Include="..\..\..\..\src\fxp.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
-    <ClCompile Include="..\..\..\..\src\hpa_central.c" />
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
     <ClCompile Include="..\..\..\..\src\hpdata.c" />
     <ClCompile Include="..\..\..\..\src\inspect.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 800861d3..d063a019 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -67,9 +67,6 @@
     <ClCompile Include="..\..\..\..\src\hpa.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\hpa_central.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 3d3e7174..46633e82 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -61,7 +61,6 @@
     <ClCompile Include="..\..\..\..\src\fxp.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
-    <ClCompile Include="..\..\..\..\src\hpa_central.c" />
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
     <ClCompile Include="..\..\..\..\src\hpdata.c" />
     <ClCompile Include="..\..\..\..\src\inspect.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 800861d3..d063a019 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -67,9 +67,6 @@
     <ClCompile Include="..\..\..\..\src\hpa.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\hpa_central.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/hpa_central.c b/src/hpa_central.c
deleted file mode 100644
index 9e00dd64..00000000
--- a/src/hpa_central.c
+++ /dev/null
@@ -1,192 +0,0 @@
-#include "jemalloc/internal/jemalloc_preamble.h"
-#include "jemalloc/internal/jemalloc_internal_includes.h"
-
-#include "jemalloc/internal/hpa_central.h"
-
-void
-hpa_central_init(hpa_central_t *central, edata_cache_t *edata_cache,
-    emap_t *emap) {
-	central->emap = emap;
-	edata_cache_small_init(&central->ecs, edata_cache);
-	eset_init(&central->eset, extent_state_dirty);
-	central->sn_next = 0;
-}
-
-/*
- * Returns the trail, or NULL in case of failure (which can only occur in case
- * of an emap operation failure; i.e. OOM).
- */
-static edata_t *
-hpa_central_split(tsdn_t *tsdn, hpa_central_t *central, edata_t *edata,
-    size_t size) {
-	edata_t *trail = edata_cache_small_get(tsdn, &central->ecs);
-	if (trail == NULL) {
-		return NULL;
-	}
-	size_t cursize = edata_size_get(edata);
-	edata_init(trail, edata_arena_ind_get(edata),
-	    (void *)((uintptr_t)edata_base_get(edata) + size), cursize - size,
-	    /* slab */ false, SC_NSIZES, edata_sn_get(edata),
-	    edata_state_get(edata), edata_zeroed_get(edata),
-	    edata_committed_get(edata), EXTENT_PAI_HPA, EXTENT_NOT_HEAD);
-
-	emap_prepare_t prepare;
-	bool err = emap_split_prepare(tsdn, central->emap, &prepare, edata,
-	    size, trail, cursize - size);
-	assert(edata_state_get(edata) == edata_state_get(trail));
-	if (err) {
-		edata_cache_small_put(tsdn, &central->ecs, trail);
-		return NULL;
-	}
-	assert(edata_state_get(edata) == edata_state_get(trail));
-
-	edata_size_set(edata, size);
-	emap_split_commit(tsdn, central->emap, &prepare, edata, size, trail,
-	    cursize - size);
-
-	return trail;
-}
-
-edata_t *
-hpa_central_alloc_reuse(tsdn_t *tsdn, hpa_central_t *central,
-    size_t size_min, size_t size_goal) {
-	assert((size_min & PAGE_MASK) == 0);
-	assert((size_goal & PAGE_MASK) == 0);
-
-	/*
-	 * Fragmentation avoidance is more important in the HPA than giving the
-	 * user their preferred amount of space, since we expect the average
-	 * unused extent to be more costly (PAC extents can get purged away
-	 * easily at any granularity; HPA extents are much more difficult to
-	 * purge away if they get stranded).  So we always search for the
-	 * earliest (in first-fit ordering) extent that can satisfy the request,
-	 * and use it, regardless of the goal size.
-	 */
-	edata_t *edata = eset_fit(&central->eset, size_min, PAGE,
-	    /* exact_only */ false, /* lg_max_fit */ SC_PTR_BITS);
-	if (edata == NULL) {
-		return NULL;
-	}
-
-	eset_remove(&central->eset, edata);
-	/* Maybe the first fit is also under the limit. */
-	if (edata_size_get(edata) <= size_goal) {
-		goto label_success;
-	}
-
-	/* Otherwise, split. */
-	edata_t *trail = hpa_central_split(tsdn, central, edata, size_goal);
-	if (trail == NULL) {
-		eset_insert(&central->eset, edata);
-		return NULL;
-	}
-	emap_assert_mapped(tsdn, central->emap, trail);
-	eset_insert(&central->eset, trail);
-
-label_success:
-	emap_assert_mapped(tsdn, central->emap, edata);
-	assert(edata_size_get(edata) >= size_min);
-	/*
-	 * We don't yet support purging in the hpa_central; everything should be
-	 * dirty.
-	 */
-	assert(edata_state_get(edata) == extent_state_dirty);
-	assert(edata_base_get(edata) == edata_addr_get(edata));
-	emap_update_edata_state(tsdn, central->emap, edata,
-	    extent_state_active);
-	return edata;
-}
-
-bool
-hpa_central_alloc_grow(tsdn_t *tsdn, hpa_central_t *central,
-    size_t size, edata_t *edata) {
-	assert((size & PAGE_MASK) == 0);
-	assert(edata_base_get(edata) == edata_addr_get(edata));
-	assert(edata_size_get(edata) >= size);
-	assert(edata_arena_ind_get(edata)
-	    == base_ind_get(central->ecs.fallback->base));
-	assert(edata_is_head_get(edata));
-	assert(edata_state_get(edata) == extent_state_active);
-	assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
-	assert(edata_slab_get(edata) == false);
-	assert(edata_szind_get_maybe_invalid(edata) == SC_NSIZES);
-
-	/* edata should be a new alloc, and hence not already mapped. */
-	emap_assert_not_mapped(tsdn, central->emap, edata);
-
-	size_t cursize = edata_size_get(edata);
-
-	bool err = emap_register_boundary(tsdn, central->emap, edata, SC_NSIZES,
-	    /* slab */ false);
-	if (err) {
-		return true;
-	}
-	/* No splitting is necessary. */
-	if (cursize == size) {
-		size_t sn = central->sn_next++;
-		edata_sn_set(edata, sn);
-		return false;
-	}
-
-	/* We should split. */
-	edata_t *trail = hpa_central_split(tsdn, central, edata, size);
-	if (trail == NULL) {
-		emap_deregister_boundary(tsdn, central->emap, NULL);
-		return true;
-	}
-	size_t sn = central->sn_next++;
-	edata_sn_set(edata, sn);
-	edata_sn_set(trail, sn);
-
-	emap_update_edata_state(tsdn, central->emap, trail, extent_state_dirty);
-	eset_insert(&central->eset, trail);
-	return false;
-}
-
-/* Merges b into a, freeing b back to the edata cache.. */
-static void
-hpa_central_dalloc_merge(tsdn_t *tsdn, hpa_central_t *central, edata_t *a,
-    edata_t *b) {
-	assert(emap_edata_is_acquired(tsdn, central->emap, a));
-	assert(emap_edata_is_acquired(tsdn, central->emap, b));
-
-	emap_prepare_t prepare;
-	emap_merge_prepare(tsdn, central->emap, &prepare, a, b);
-	edata_size_set(a, edata_size_get(a) + edata_size_get(b));
-	emap_merge_commit(tsdn, central->emap, &prepare, a, b);
-	edata_cache_small_put(tsdn, &central->ecs, b);
-}
-
-void
-hpa_central_dalloc(tsdn_t *tsdn, hpa_central_t *central, edata_t *edata) {
-	assert(edata_state_get(edata) == extent_state_active);
-	assert(edata_ps_get(edata) == NULL);
-
-	/*
-	 * These should really be called at the pa interface level, but
-	 * currently they're not.
-	 */
-	edata_addr_set(edata, edata_base_get(edata));
-	edata_zeroed_set(edata, false);
-
-	/*
-	 *  Merge forward first, so that the original *edata stays active state
-	 *  for the second acquire (only necessary for sanity checking).
-	 */
-	edata_t *trail = emap_try_acquire_edata_neighbor(tsdn, central->emap,
-	    edata, EXTENT_PAI_HPA, extent_state_dirty, /* forward */ true);
-	if (trail != NULL) {
-		eset_remove(&central->eset, trail);
-		hpa_central_dalloc_merge(tsdn, central, edata, trail);
-	}
-	edata_t *lead = emap_try_acquire_edata_neighbor(tsdn, central->emap,
-	    edata, EXTENT_PAI_HPA, extent_state_dirty, /* forward */ false);
-	if (lead != NULL) {
-		eset_remove(&central->eset, lead);
-		hpa_central_dalloc_merge(tsdn, central, lead, edata);
-		edata = lead;
-	}
-
-	emap_update_edata_state(tsdn, central->emap, edata, extent_state_dirty);
-	eset_insert(&central->eset, edata);
-}
diff --git a/test/unit/hpa_central.c b/test/unit/hpa_central.c
deleted file mode 100644
index f90b6e3c..00000000
--- a/test/unit/hpa_central.c
+++ /dev/null
@@ -1,450 +0,0 @@
-#include "test/jemalloc_test.h"
-
-#include "jemalloc/internal/hpa_central.h"
-
-typedef struct test_data_s test_data_t;
-struct test_data_s {
-	/*
-	 * Must be the first member -- we convert back and forth between the
-	 * test_data_t and the hpa_central_t;
-	 */
-	hpa_central_t central;
-	base_t *base;
-	edata_cache_t edata_cache;
-	emap_t emap;
-};
-
-void
-create_test_data(hpa_central_t **r_central, base_t **r_base) {
-	bool err;
-	base_t *base = base_new(TSDN_NULL, /* ind */ 111,
-	    &ehooks_default_extent_hooks);
-	assert_ptr_not_null(base, "");
-
-	test_data_t *test_data = malloc(sizeof(test_data_t));
-	assert_ptr_not_null(test_data, "");
-
-	test_data->base = base;
-
-	err = edata_cache_init(&test_data->edata_cache, base);
-	assert_false(err, "");
-
-	err = emap_init(&test_data->emap, test_data->base,
-	    /* zeroed */ false);
-	assert_false(err, "");
-
-	hpa_central_init(&test_data->central, &test_data->edata_cache,
-	    &test_data->emap);
-
-	*r_central = (hpa_central_t *)test_data;
-	*r_base = base;
-}
-
-static void
-destroy_test_data(hpa_central_t *central) {
-	test_data_t *test_data = (test_data_t *)central;
-	base_delete(TSDN_NULL, test_data->base);
-	free(test_data);
-}
-
-static edata_t *
-test_edata(base_t *base, uintptr_t addr, size_t size) {
-	edata_t *edata = base_alloc_edata(TSDN_NULL, base);
-	assert_ptr_not_null(edata, "");
-	edata_init(edata, base_ind_get(base), (void *)addr,
-	    size, /* slab */ false, /* szind_t */ SC_NSIZES, /* sn */ 0,
-	    extent_state_active, /* zeroed */ true, /* comitted */ true,
-	    EXTENT_PAI_HPA, /* is_head */ true);
-	return edata;
-}
-
-static void
-edata_expect_alloc(base_t *base, edata_t *edata, uintptr_t addr, size_t size) {
-	expect_ptr_not_null(edata, "Alloc should have succeeded");
-	expect_u_eq(base_ind_get(base), edata_arena_ind_get(edata), "");
-	expect_u_eq(SC_NSIZES, edata_szind_get_maybe_invalid(edata), "");
-	expect_d_eq(extent_state_active, edata_state_get(edata), "");
-	assert_ptr_eq((void *)addr, edata_base_get(edata), "");
-	assert_zu_eq(size, edata_size_get(edata), "");
-}
-
-
-TEST_BEGIN(test_empty) {
-	hpa_central_t *central;
-	base_t *base;
-	create_test_data(&central, &base);
-
-	edata_t *edata;
-
-	edata = hpa_central_alloc_reuse(TSDN_NULL, central, PAGE, PAGE);
-	expect_ptr_null(edata, "Empty allocator succeed in its allocation");
-
-	edata = hpa_central_alloc_reuse(TSDN_NULL, central, PAGE, 2 * PAGE);
-	expect_ptr_null(edata, "Empty allocator succeed in its allocation");
-
-	edata = hpa_central_alloc_reuse(TSDN_NULL, central, PAGE, 8 * PAGE);
-	expect_ptr_null(edata, "Empty allocator succeed in its allocation");
-
-	edata = hpa_central_alloc_reuse(TSDN_NULL, central, 4 * PAGE, 8 * PAGE);
-	expect_ptr_null(edata, "Empty allocator succeed in its allocation");
-
-	destroy_test_data(central);
-}
-TEST_END
-
-TEST_BEGIN(test_first_fit_simple) {
-	hpa_central_t *central;
-	base_t *base;
-	create_test_data(&central, &base);
-
-	edata_t *edata1 = test_edata(base, 10 * PAGE, 10 * PAGE);
-	bool err = hpa_central_alloc_grow(TSDN_NULL, central, PAGE, edata1);
-	expect_false(err, "Unexpected grow failure");
-	edata_expect_alloc(base, edata1, 10 * PAGE, PAGE);
-
-	edata_t *edata2 = test_edata(base, 4 * PAGE, 1 * PAGE);
-	err = hpa_central_alloc_grow(TSDN_NULL, central, PAGE, edata2);
-	expect_false(err, "Unexpected grow failure");
-	edata_expect_alloc(base, edata2, 4 * PAGE, PAGE);
-
-	hpa_central_dalloc(TSDN_NULL, central, edata2);
-
-	/*
-	 * Even though there's a lower-addressed extent that a by-size search
-	 * will find earlier, we should still pick the earlier one.
-	 */
-	edata_t *edata3 = hpa_central_alloc_reuse(TSDN_NULL, central, PAGE, PAGE);
-	/*
-	 * Recall there's still an active page at the beginning of the extent
-	 * added at 10 * PAGE; the next allocation from it should be at 11 *
-	 * PAGE.
-	 */
-	edata_expect_alloc(base, edata3, 11 * PAGE, PAGE);
-
-	destroy_test_data(central);
-}
-TEST_END
-
-TEST_BEGIN(test_first_fit_large_goal) {
-	/*
-	 * See the comment in hpa_central_alloc_reuse; we should prefer an
-	 * earlier allocation over a later one, even if it means we fall short
-	 * of the goal size.
-	 */
-	hpa_central_t *central;
-	base_t *base;
-	create_test_data(&central, &base);
-
-	edata_t *edata1 = test_edata(base, 10 * PAGE, 10 * PAGE);
-	bool err = hpa_central_alloc_grow(TSDN_NULL, central, 2 * PAGE, edata1);
-	expect_false(err, "Unexpected grow failure");
-	edata_expect_alloc(base, edata1, 10 * PAGE, 2 * PAGE);
-
-	/* We need a page, but would like 2. */
-	edata_t *edata2 = hpa_central_alloc_reuse(TSDN_NULL, central, PAGE,
-	    2 * PAGE);
-	edata_expect_alloc(base, edata2, 12 * PAGE, 2 * PAGE);
-
-	hpa_central_dalloc(TSDN_NULL, central, edata1);
-
-	/*
-	 * Now, we have a 2-page inactive extent, then a 2-page active extent,
-	 * then a 6-page inactive extent.  If our minimum size is 2 but the goal
-	 * size is 4, we should still pick the first hole rather than the
-	 * second.
-	 */
-	edata1 = hpa_central_alloc_reuse(TSDN_NULL, central, 2 * PAGE, 4 * PAGE);
-	edata_expect_alloc(base, edata1, 10 * PAGE, 2 * PAGE);
-
-	/*
-	 * Make sure we didn't succeed only by forgetting about that last range
-	 * or something.
-	 */
-	edata_t *edata3 = hpa_central_alloc_reuse(TSDN_NULL, central, 4 * PAGE,
-	    4 * PAGE);
-	edata_expect_alloc(base, edata3, 14 * PAGE, 4 * PAGE);
-
-	destroy_test_data(central);
-}
-TEST_END
-
-TEST_BEGIN(test_merging) {
-	hpa_central_t *central;
-	base_t *base;
-	create_test_data(&central, &base);
-
-	/* Test an exact match */
-	bool err;
-	edata_t *edata1 = test_edata(base, 10 * PAGE, PAGE);
-	err = hpa_central_alloc_grow(TSDN_NULL, central, PAGE, edata1);
-	expect_false(err, "Alloc should have succeeded");
-	edata_expect_alloc(base, edata1, 10 * PAGE, PAGE);
-
-	edata_t *edata2 = hpa_central_alloc_reuse(TSDN_NULL, central, PAGE,
-	    PAGE);
-	expect_ptr_null(edata2, "Allocation should have failed");
-
-	/*
-	 * Create two more regions; one immediately before the first and one
-	 * immediately after.  The extents shouldn't get merged.
-	 */
-	edata2 = test_edata(base, 11 * PAGE, PAGE);
-	err = hpa_central_alloc_grow(TSDN_NULL, central, PAGE, edata2);
-	edata_expect_alloc(base, edata2, 11 * PAGE, PAGE);
-
-	edata_t *edata3 = test_edata(base, 12 * PAGE, 20 * PAGE);
-	err = hpa_central_alloc_grow(TSDN_NULL, central, PAGE, edata3);
-	edata_expect_alloc(base, edata3, 12 * PAGE, PAGE);
-
-	/*
-	 * OK, we've got 3 contiguous ranges; [10, 11), [11, 12), and [12, 22).
-	 * They shouldn't get merged though, even once freed.  We free the
-	 * middle range last to test merging (or rather, the lack thereof) in
-	 * both directions.
-	 */
-	hpa_central_dalloc(TSDN_NULL, central, edata1);
-	hpa_central_dalloc(TSDN_NULL, central, edata3);
-	hpa_central_dalloc(TSDN_NULL, central, edata2);
-
-	/*
-	 * A two-page range should only be satisfied by the third added region.
-	 */
-	edata_t *edata = hpa_central_alloc_reuse(TSDN_NULL, central, 2 * PAGE,
-	    2 * PAGE);
-	edata_expect_alloc(base, edata, 12 * PAGE, 2 * PAGE);
-	hpa_central_dalloc(TSDN_NULL, central, edata);
-
-	/* Same with a three-page range. */
-	edata = hpa_central_alloc_reuse(TSDN_NULL, central, 3 * PAGE, 3 * PAGE);
-	edata_expect_alloc(base, edata, 12 * PAGE, 3 * PAGE);
-	hpa_central_dalloc(TSDN_NULL, central, edata);
-
-	/* Let's try some cases that *should* get merged. */
-	edata1 = hpa_central_alloc_reuse(TSDN_NULL, central, 2 * PAGE, 2 * PAGE);
-	edata_expect_alloc(base, edata1, 12 * PAGE, 2 * PAGE);
-	edata2 = hpa_central_alloc_reuse(TSDN_NULL, central, 2 * PAGE, 2 * PAGE);
-	edata_expect_alloc(base, edata2, 14 * PAGE, 2 * PAGE);
-	edata3 = hpa_central_alloc_reuse(TSDN_NULL, central, 2 * PAGE, 2 * PAGE);
-	edata_expect_alloc(base, edata3, 16 * PAGE, 2 * PAGE);
-
-	/* Merge with predecessor. */
-	hpa_central_dalloc(TSDN_NULL, central, edata1);
-	hpa_central_dalloc(TSDN_NULL, central, edata2);
-	edata1 = hpa_central_alloc_reuse(TSDN_NULL, central, 4 * PAGE,
-	    4 * PAGE);
-	edata_expect_alloc(base, edata1, 12 * PAGE, 4 * PAGE);
-
-	/* Merge with successor */
-	hpa_central_dalloc(TSDN_NULL, central, edata3);
-	hpa_central_dalloc(TSDN_NULL, central, edata1);
-	edata1 = hpa_central_alloc_reuse(TSDN_NULL, central, 6 * PAGE,
-	    6 * PAGE);
-	edata_expect_alloc(base, edata1, 12 * PAGE, 6 * PAGE);
-	hpa_central_dalloc(TSDN_NULL, central, edata1);
-
-	/*
-	 * Let's try merging with both.  We need to get three adjacent
-	 * allocations again; do it the same way as before.
-	 */
-	edata1 = hpa_central_alloc_reuse(TSDN_NULL, central, 2 * PAGE, 2 * PAGE);
-	edata_expect_alloc(base, edata1, 12 * PAGE, 2 * PAGE);
-	edata2 = hpa_central_alloc_reuse(TSDN_NULL, central, 2 * PAGE, 2 * PAGE);
-	edata_expect_alloc(base, edata2, 14 * PAGE, 2 * PAGE);
-	edata3 = hpa_central_alloc_reuse(TSDN_NULL, central, 2 * PAGE, 2 * PAGE);
-	edata_expect_alloc(base, edata3, 16 * PAGE, 2 * PAGE);
-
-	hpa_central_dalloc(TSDN_NULL, central, edata1);
-	hpa_central_dalloc(TSDN_NULL, central, edata3);
-	hpa_central_dalloc(TSDN_NULL, central, edata2);
-
-	edata1 = hpa_central_alloc_reuse(TSDN_NULL, central, 6 * PAGE,
-	    6 * PAGE);
-	edata_expect_alloc(base, edata1, 12 * PAGE, 6 * PAGE);
-
-	destroy_test_data(central);
-}
-TEST_END
-
-TEST_BEGIN(test_stress_simple) {
-	hpa_central_t *central;
-	base_t *base;
-	create_test_data(&central, &base);
-
-	enum {
-		range_base = 1024 * PAGE,
-		range_pages = 256,
-		range_size = range_pages * PAGE
-	};
-
-	edata_t *edatas[range_pages];
-
-	bool err;
-	edata_t *range = test_edata(base, range_base, range_size);
-	err = hpa_central_alloc_grow(TSDN_NULL, central, PAGE, range);
-	expect_false(err, "Unexpected grow failure");
-	hpa_central_dalloc(TSDN_NULL, central, range);
-
-	for (size_t i = 0; i < range_pages; i++) {
-		edatas[i] = hpa_central_alloc_reuse(TSDN_NULL, central, PAGE,
-		    PAGE);
-		edata_expect_alloc(base, edatas[i], range_base + i * PAGE,
-		    PAGE);
-	}
-	/* Free up the odd indices. */
-	for (size_t i = 0; i < range_pages; i++) {
-		if (i % 2 == 0) {
-			continue;
-		}
-		hpa_central_dalloc(TSDN_NULL, central, edatas[i]);
-	}
-	/*
-	 * Reallocate them again.  Try it with a goal size that can't be
-	 * satisfied.
-	 */
-	for (size_t i = 0; i < range_pages; i++) {
-		if (i % 2 == 0) {
-			continue;
-		}
-		edatas[i] = hpa_central_alloc_reuse(TSDN_NULL, central, PAGE,
-		    PAGE);
-		edata_expect_alloc(base, edatas[i], range_base + i * PAGE,
-		    PAGE);
-	}
-	/*
-	 * In each batch of 8, create a free range of 4 pages and a free range
-	 * of 2 pages.
-	 */
-	for (size_t i = 0; i < range_pages; i += 8) {
-		hpa_central_dalloc(TSDN_NULL, central, edatas[i + 1]);
-		hpa_central_dalloc(TSDN_NULL, central, edatas[i + 2]);
-		hpa_central_dalloc(TSDN_NULL, central, edatas[i + 3]);
-		hpa_central_dalloc(TSDN_NULL, central, edatas[i + 4]);
-
-		hpa_central_dalloc(TSDN_NULL, central, edatas[i + 6]);
-		hpa_central_dalloc(TSDN_NULL, central, edatas[i + 7]);
-	}
-
-	/*
-	 * And allocate 3 pages into the first, and 2 pages into the second.  To
-	 * mix things up a little, lets get those amounts via goal sizes
-	 * instead.
-	 */
-	for (size_t i = 0; i < range_pages; i += 8) {
-		edatas[i + 1] = hpa_central_alloc_reuse(TSDN_NULL, central,
-		    2 * PAGE, 3 * PAGE);
-		edata_expect_alloc(base, edatas[i + 1],
-		    range_base + (i + 1) * PAGE, 3 * PAGE);
-
-		edatas[i + 6] = hpa_central_alloc_reuse(TSDN_NULL, central,
-		    2 * PAGE, 4 * PAGE);
-		edata_expect_alloc(base, edatas[i + 6],
-		    range_base + (i + 6) * PAGE, 2 * PAGE);
-	}
-
-	edata_t *edata = hpa_central_alloc_reuse(TSDN_NULL, central, 2 * PAGE,
-	    2 * PAGE);
-	expect_ptr_null(edata, "Should be no free ranges of 2 pages");
-
-	destroy_test_data(central);
-}
-TEST_END
-
-TEST_BEGIN(test_stress_random) {
-	const size_t range_length = 32 * PAGE;
-	const size_t range_base = 100 * PAGE;
-	const size_t size_max_pages = 16;
-
-	hpa_central_t *central;
-	base_t *base;
-	create_test_data(&central, &base);
-
-	/*
-	 * We loop through this once per some operations, so we don't want it to
-	 * get too big.
-	 */
-	const size_t nlive_edatas_max = 100;
-	size_t nlive_edatas = 0;
-	edata_t **live_edatas = calloc(nlive_edatas_max, sizeof(edata_t *));
-	size_t nranges = 0;
-
-	/*
-	 * Nothing special about this constant; we're only fixing it for
-	 * consistency across runs.
-	 */
-	size_t prng_state = (size_t)0x76999ffb014df07c;
-	for (size_t i = 0; i < 100 * 1000; i++) {
-		size_t operation = prng_range_zu(&prng_state, 2);
-		if (operation == 0) {
-			/* Do an alloc. */
-			if (nlive_edatas == nlive_edatas_max) {
-				continue;
-			}
-			size_t min_pages = 1 + prng_range_zu(
-			    &prng_state, size_max_pages);
-			size_t goal_pages = min_pages + prng_range_zu(
-			    &prng_state, size_max_pages - min_pages + 1);
-			edata_t *edata = hpa_central_alloc_reuse(TSDN_NULL,
-			    central, min_pages * PAGE, goal_pages * PAGE);
-			if (edata == NULL) {
-				edata = test_edata(base,
-				    range_base + range_length * nranges,
-				    range_length);
-				bool err = hpa_central_alloc_grow(TSDN_NULL,
-				    central, goal_pages * PAGE, edata);
-				assert_false(err, "Unexpected grow failure");
-				nranges++;
-			}
-			uintptr_t begin = (uintptr_t)edata_base_get(edata);
-			uintptr_t end = (uintptr_t)edata_last_get(edata);
-			size_t range_begin = (begin - range_base) / range_length;
-			size_t range_end = (end - range_base) / range_length;
-			expect_zu_eq(range_begin, range_end,
-			    "Should not have allocations spanning "
-			    "multiple ranges");
-			expect_zu_ge(begin, range_base,
-			    "Gave back a pointer outside of the reserved "
-			    "range");
-			expect_zu_lt(end, range_base + range_length * nranges,
-			    "Gave back a pointer outside of the reserved "
-			    "range");
-			for (size_t j = 0; j < nlive_edatas; j++) {
-				edata_t *other = live_edatas[j];
-				uintptr_t other_begin =
-				    (uintptr_t)edata_base_get(other);
-				uintptr_t other_end =
-				    (uintptr_t)edata_last_get(other);
-				expect_true(
-				    (begin < other_begin && end < other_begin)
-				    || (begin > other_end),
-				    "Gave back two extents that overlap");
-			}
-			live_edatas[nlive_edatas] = edata;
-			nlive_edatas++;
-		} else {
-			/* Do a free. */
-			if (nlive_edatas == 0) {
-				continue;
-			}
-			size_t victim = prng_range_zu(&prng_state,
-			    nlive_edatas);
-			edata_t *to_free = live_edatas[victim];
-			live_edatas[victim] = live_edatas[nlive_edatas - 1];
-			nlive_edatas--;
-			hpa_central_dalloc(TSDN_NULL, central, to_free);
-		}
-	}
-
-	free(live_edatas);
-	destroy_test_data(central);
-}
-TEST_END
-
-int main(void) {
-	return test_no_reentrancy(
-	    test_empty,
-	    test_first_fit_simple,
-	    test_first_fit_large_goal,
-	    test_merging,
-	    test_stress_simple,
-	    test_stress_random);
-}

From d93eef2f405b7c6e2a78f589a5037a26d4bd4d44 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 7 May 2021 13:54:26 -0700
Subject: [PATCH 2089/2608] HPA: Introduce a redesigned hpa_central_t.

For now, this only handles allocating virtual address space to shards, with no
reuse.  This is framework, though; it will change over time.
---
 include/jemalloc/internal/arena_externs.h |   2 +-
 include/jemalloc/internal/hpa.h           |  70 +++++---
 include/jemalloc/internal/pa.h            |  24 ++-
 include/jemalloc/internal/witness.h       |   4 +-
 src/arena.c                               |  16 +-
 src/hpa.c                                 | 205 ++++++++++++++--------
 src/jemalloc.c                            |  16 +-
 src/pa.c                                  |  29 ++-
 test/unit/hpa.c                           |  10 +-
 test/unit/pa.c                            |   9 +-
 10 files changed, 257 insertions(+), 128 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index bb3462f5..557e49f1 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -99,7 +99,7 @@ bin_t *arena_bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
     unsigned *binshard);
 size_t arena_fill_small_fresh(tsdn_t *tsdn, arena_t *arena, szind_t binind,
     void **ptrs, size_t nfill, bool zero);
-void arena_boot(sc_data_t *sc_data);
+bool arena_boot(sc_data_t *sc_data, base_t *base, bool hpa);
 void arena_prefork0(tsdn_t *tsdn, arena_t *arena);
 void arena_prefork1(tsdn_t *tsdn, arena_t *arena);
 void arena_prefork2(tsdn_t *tsdn, arena_t *arena);
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 3132a6f5..623f9c40 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -7,6 +7,37 @@
 #include "jemalloc/internal/pai.h"
 #include "jemalloc/internal/psset.h"
 
+typedef struct hpa_central_s hpa_central_t;
+struct hpa_central_s {
+	/*
+	 * The mutex guarding most of the operations on the central data
+	 * structure.
+	 */
+	malloc_mutex_t mtx;
+	/*
+	 * Guards expansion of eden.  We separate this from the regular mutex so
+	 * that cheaper operations can still continue while we're doing the OS
+	 * call.
+	 */
+	malloc_mutex_t grow_mtx;
+	/*
+	 * Either NULL (if empty), or some integer multiple of a
+	 * hugepage-aligned number of hugepages.  We carve them off one at a
+	 * time to satisfy new pageslab requests.
+	 *
+	 * Guarded by grow_mtx.
+	 */
+	void *eden;
+	size_t eden_len;
+	/* Source for metadata. */
+	base_t *base;
+	/* Number of grow operations done on this hpa_central_t. */
+	uint64_t age_counter;
+
+	/* The HPA hooks. */
+	hpa_hooks_t hooks;
+};
+
 typedef struct hpa_shard_nonderived_stats_s hpa_shard_nonderived_stats_t;
 struct hpa_shard_nonderived_stats_s {
 	/*
@@ -52,19 +83,20 @@ struct hpa_shard_s {
 	 * pointer to the hpa_shard_t.
 	 */
 	pai_t pai;
-	malloc_mutex_t grow_mtx;
+
+	/* The central allocator we get our hugepages from. */
+	hpa_central_t *central;
+	/* Protects most of this shard's state. */
 	malloc_mutex_t mtx;
+	/*
+	 * Guards the shard's access to the central allocator (preventing
+	 * multiple threads operating on this shard from accessing the central
+	 * allocator).
+	 */
+	malloc_mutex_t grow_mtx;
 	/* The base metadata allocator. */
 	base_t *base;
 
-	/*
-	 * The HPA hooks for this shard.  Eventually, once we have the
-	 * hpa_central_t back, these should live there (since it doesn't make
-	 * sense for different shards on the same hpa_central_t to have
-	 * different hooks).
-	 */
-	hpa_hooks_t hooks;
-
 	/*
 	 * This edata cache is the one we use when allocating a small extent
 	 * from a pageslab.  The pageslab itself comes from the centralized
@@ -81,18 +113,13 @@ struct hpa_shard_s {
 	 */
 	uint64_t age_counter;
 
-	/*
-	 * Either NULL (if empty), or some integer multiple of a
-	 * hugepage-aligned number of hugepages.  We carve them off one at a
-	 * time to satisfy new pageslab requests.
-	 *
-	 * Guarded by grow_mtx.
-	 */
-	void *eden;
-	size_t eden_len;
-
 	/* The arena ind we're associated with. */
 	unsigned ind;
+
+	/*
+	 * Our emap.  This is just a cache of the emap pointer in the associated
+	 * hpa_central.
+	 */
 	emap_t *emap;
 
 	/* The configuration choices for this hpa shard. */
@@ -117,8 +144,9 @@ struct hpa_shard_s {
  * just that it can function properly given the system it's running on.
  */
 bool hpa_supported();
-bool hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
-    edata_cache_t *edata_cache, unsigned ind, const hpa_hooks_t *hooks,
+bool hpa_central_init(hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks);
+bool hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
+    base_t *base, edata_cache_t *edata_cache, unsigned ind,
     const hpa_shard_opts_t *opts);
 
 void hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src);
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 582625b1..2e5b9ef0 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -20,6 +20,11 @@
  * others will be coming soon.
  */
 
+typedef struct pa_central_s pa_central_t;
+struct pa_central_s {
+	hpa_central_t hpa;
+};
+
 /*
  * The stats for a particular pa_shard.  Because of the way the ctl module
  * handles stats epoch data collection (it has its own arena_stats, and merges
@@ -61,6 +66,9 @@ struct pa_shard_stats_s {
  */
 typedef struct pa_shard_s pa_shard_t;
 struct pa_shard_s {
+	/* The central PA this shard is associated with. */
+	pa_central_t *central;
+
 	/*
 	 * Number of pages in active extents.
 	 *
@@ -76,6 +84,7 @@ struct pa_shard_s {
 	 * for those allocations.
 	 */
 	atomic_b_t use_hpa;
+
 	/*
 	 * If we never used the HPA to begin with, it wasn't initialized, and so
 	 * we shouldn't try to e.g. acquire its mutexes during fork.  This
@@ -121,18 +130,21 @@ pa_shard_ehooks_get(pa_shard_t *shard) {
 }
 
 /* Returns true on error. */
-bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
-    unsigned ind, pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx,
-    nstime_t *cur_time, size_t oversize_threshold, ssize_t dirty_decay_ms,
-    ssize_t muzzy_decay_ms);
+bool pa_central_init(pa_central_t *central, base_t *base, bool hpa,
+    hpa_hooks_t *hpa_hooks);
+
+/* Returns true on error. */
+bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, pa_central_t *central,
+    emap_t *emap, base_t *base, unsigned ind, pa_shard_stats_t *stats,
+    malloc_mutex_t *stats_mtx, nstime_t *cur_time, size_t oversize_threshold,
+    ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms);
 
 /*
  * This isn't exposed to users; we allow late enablement of the HPA shard so
  * that we can boot without worrying about the HPA, then turn it on in a0.
  */
 bool pa_shard_enable_hpa(tsdn_t *tsdn, pa_shard_t *shard,
-    const hpa_hooks_t *hpa_hooks, const hpa_shard_opts_t *hpa_opts,
-    const sec_opts_t *hpa_sec_opts);
+    const hpa_shard_opts_t *hpa_opts, const sec_opts_t *hpa_sec_opts);
 
 /*
  * We stop using the HPA when custom extent hooks are installed, but still
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 0c29321c..c12a705c 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -52,8 +52,8 @@ enum witness_rank_e {
 	WITNESS_RANK_EXTENTS,
 	WITNESS_RANK_HPA_SHARD = WITNESS_RANK_EXTENTS,
 
-	WITNESS_RANK_HPA_GROW,
-	WITNESS_RANK_HPA,
+	WITNESS_RANK_HPA_CENTRAL_GROW,
+	WITNESS_RANK_HPA_CENTRAL,
 
 	WITNESS_RANK_EDATA_CACHE,
 
diff --git a/src/arena.c b/src/arena.c
index 5daeea31..a495ef64 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -36,6 +36,7 @@ static atomic_zd_t dirty_decay_ms_default;
 static atomic_zd_t muzzy_decay_ms_default;
 
 emap_t arena_emap_global;
+pa_central_t arena_pa_central_global;
 
 const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
 #define STEP(step, h, x, y)			\
@@ -1541,9 +1542,10 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 
 	nstime_t cur_time;
 	nstime_init_update(&cur_time);
-	if (pa_shard_init(tsdn, &arena->pa_shard, &arena_emap_global, base, ind,
-	    &arena->stats.pa_shard_stats, LOCKEDINT_MTX(arena->stats.mtx),
-	    &cur_time, oversize_threshold, arena_dirty_decay_ms_default_get(),
+	if (pa_shard_init(tsdn, &arena->pa_shard, &arena_pa_central_global,
+	    &arena_emap_global, base, ind, &arena->stats.pa_shard_stats,
+	    LOCKEDINT_MTX(arena->stats.mtx), &cur_time, oversize_threshold,
+	    arena_dirty_decay_ms_default_get(),
 	    arena_muzzy_decay_ms_default_get())) {
 		goto label_error;
 	}
@@ -1575,7 +1577,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 		hpa_shard_opts_t hpa_shard_opts = opt_hpa_opts;
 		hpa_shard_opts.deferral_allowed = background_thread_enabled();
 		if (pa_shard_enable_hpa(tsdn, &arena->pa_shard,
-		    &hpa_hooks_default, &hpa_shard_opts, &opt_hpa_sec_opts)) {
+		    &hpa_shard_opts, &opt_hpa_sec_opts)) {
 			goto label_error;
 		}
 	}
@@ -1664,8 +1666,8 @@ arena_is_huge(unsigned arena_ind) {
 	return (arena_ind == huge_arena_ind);
 }
 
-void
-arena_boot(sc_data_t *sc_data) {
+bool
+arena_boot(sc_data_t *sc_data, base_t *base, bool hpa) {
 	arena_dirty_decay_ms_default_set(opt_dirty_decay_ms);
 	arena_muzzy_decay_ms_default_set(opt_muzzy_decay_ms);
 	for (unsigned i = 0; i < SC_NBINS; i++) {
@@ -1680,6 +1682,8 @@ arena_boot(sc_data_t *sc_data) {
 		nbins_total += bin_infos[i].n_shards;
 		cur_offset += (uint32_t)(bin_infos[i].n_shards * sizeof(bin_t));
 	}
+	return pa_central_init(&arena_pa_central_global, base, hpa,
+	    &hpa_hooks_default);
 }
 
 void
diff --git a/src/hpa.c b/src/hpa.c
index 4ae30b97..10594587 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -51,9 +51,125 @@ hpa_supported() {
 }
 
 bool
-hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
-    edata_cache_t *edata_cache, unsigned ind,
-    const hpa_hooks_t *hooks, const hpa_shard_opts_t *opts) {
+hpa_central_init(hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks) {
+	/* malloc_conf processing should have filtered out these cases. */
+	assert(hpa_supported());
+	bool err;
+	err = malloc_mutex_init(&central->grow_mtx, "hpa_central_grow",
+	    WITNESS_RANK_HPA_CENTRAL_GROW, malloc_mutex_rank_exclusive);
+	if (err) {
+		return true;
+	}
+	err = malloc_mutex_init(&central->mtx, "hpa_central",
+	    WITNESS_RANK_HPA_CENTRAL, malloc_mutex_rank_exclusive);
+	if (err) {
+		return true;
+	}
+	central->base = base;
+	central->eden = NULL;
+	central->eden_len = 0;
+	central->age_counter = 0;
+	central->hooks = *hooks;
+	return false;
+}
+
+static hpdata_t *
+hpa_alloc_ps(tsdn_t *tsdn, hpa_central_t *central) {
+	return (hpdata_t *)base_alloc(tsdn, central->base, sizeof(hpdata_t),
+	    CACHELINE);
+}
+
+hpdata_t *
+hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
+    bool *oom) {
+	/* Don't yet support big allocations; these should get filtered out. */
+	assert(size <= HUGEPAGE);
+	/*
+	 * Should only try to extract from the central allocator if the local
+	 * shard is exhausted.  We should hold the grow_mtx on that shard.
+	 */
+	witness_assert_positive_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_HPA_SHARD_GROW);
+
+	malloc_mutex_lock(tsdn, &central->grow_mtx);
+	*oom = false;
+
+	hpdata_t *ps = NULL;
+
+	/* Is eden a perfect fit? */
+	if (central->eden != NULL && central->eden_len == HUGEPAGE) {
+		ps = hpa_alloc_ps(tsdn, central);
+		if (ps == NULL) {
+			*oom = true;
+			malloc_mutex_unlock(tsdn, &central->grow_mtx);
+			return NULL;
+		}
+		hpdata_init(ps, central->eden, central->age_counter++);
+		central->eden = NULL;
+		central->eden_len = 0;
+		malloc_mutex_unlock(tsdn, &central->grow_mtx);
+		return ps;
+	}
+
+	/*
+	 * We're about to try to allocate from eden by splitting.  If eden is
+	 * NULL, we have to allocate it too.  Otherwise, we just have to
+	 * allocate an edata_t for the new psset.
+	 */
+	if (central->eden == NULL) {
+		/*
+		 * During development, we're primarily concerned with systems
+		 * with overcommit.  Eventually, we should be more careful here.
+		 */
+		bool commit = true;
+		/* Allocate address space, bailing if we fail. */
+		void *new_eden = pages_map(NULL, HPA_EDEN_SIZE, HUGEPAGE,
+		    &commit);
+		if (new_eden == NULL) {
+			*oom = true;
+			malloc_mutex_unlock(tsdn, &central->grow_mtx);
+			return NULL;
+		}
+		ps = hpa_alloc_ps(tsdn, central);
+		if (ps == NULL) {
+			pages_unmap(new_eden, HPA_EDEN_SIZE);
+			*oom = true;
+			malloc_mutex_unlock(tsdn, &central->grow_mtx);
+			return NULL;
+		}
+		central->eden = new_eden;
+		central->eden_len = HPA_EDEN_SIZE;
+	} else {
+		/* Eden is already nonempty; only need an edata for ps. */
+		ps = hpa_alloc_ps(tsdn, central);
+		if (ps == NULL) {
+			*oom = true;
+			malloc_mutex_unlock(tsdn, &central->grow_mtx);
+			return NULL;
+		}
+	}
+	assert(ps != NULL);
+	assert(central->eden != NULL);
+	assert(central->eden_len > HUGEPAGE);
+	assert(central->eden_len % HUGEPAGE == 0);
+	assert(HUGEPAGE_ADDR2BASE(central->eden) == central->eden);
+
+	hpdata_init(ps, central->eden, central->age_counter++);
+
+	char *eden_char = (char *)central->eden;
+	eden_char += HUGEPAGE;
+	central->eden = (void *)eden_char;
+	central->eden_len -= HUGEPAGE;
+
+	malloc_mutex_unlock(tsdn, &central->grow_mtx);
+
+	return ps;
+}
+
+bool
+hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
+    base_t *base, edata_cache_t *edata_cache, unsigned ind,
+    const hpa_shard_opts_t *opts) {
 	/* malloc_conf processing should have filtered out these cases. */
 	assert(hpa_supported());
 	bool err;
@@ -69,13 +185,11 @@ hpa_shard_init(hpa_shard_t *shard, emap_t *emap, base_t *base,
 	}
 
 	assert(edata_cache != NULL);
+	shard->central = central;
 	shard->base = base;
-	shard->hooks = *hooks;
 	edata_cache_small_init(&shard->ecs, edata_cache);
 	psset_init(&shard->psset);
 	shard->age_counter = 0;
-	shard->eden = NULL;
-	shard->eden_len = 0;
 	shard->ind = ind;
 	shard->emap = emap;
 
@@ -136,12 +250,6 @@ hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard,
 	malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 }
 
-static hpdata_t *
-hpa_alloc_ps(tsdn_t *tsdn, hpa_shard_t *shard) {
-	return (hpdata_t *)base_alloc(tsdn, shard->base, sizeof(hpdata_t),
-	    CACHELINE);
-}
-
 static bool
 hpa_good_hugification_candidate(hpa_shard_t *shard, hpdata_t *ps) {
 	/*
@@ -227,7 +335,7 @@ hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard,
 	if (hpa_good_hugification_candidate(shard, ps)
 	    && !hpdata_huge_get(ps)) {
 		nstime_t now;
-		shard->hooks.curtime(&now);
+		shard->central->hooks.curtime(&now);
 		hpdata_allow_hugify(ps, now);
 	}
 	/*
@@ -247,64 +355,6 @@ hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard,
 	}
 }
 
-static hpdata_t *
-hpa_grow(tsdn_t *tsdn, hpa_shard_t *shard) {
-	malloc_mutex_assert_owner(tsdn, &shard->grow_mtx);
-	hpdata_t *ps = NULL;
-
-	/* Is eden a perfect fit? */
-	if (shard->eden != NULL && shard->eden_len == HUGEPAGE) {
-		ps = hpa_alloc_ps(tsdn, shard);
-		if (ps == NULL) {
-			return NULL;
-		}
-		hpdata_init(ps, shard->eden, shard->age_counter++);
-		shard->eden = NULL;
-		shard->eden_len = 0;
-		return ps;
-	}
-
-	/*
-	 * We're about to try to allocate from eden by splitting.  If eden is
-	 * NULL, we have to allocate it too.  Otherwise, we just have to
-	 * allocate an edata_t for the new psset.
-	 */
-	if (shard->eden == NULL) {
-		/* Allocate address space, bailing if we fail. */
-		void *new_eden = shard->hooks.map(HPA_EDEN_SIZE);
-		if (new_eden == NULL) {
-			return NULL;
-		}
-		ps = hpa_alloc_ps(tsdn, shard);
-		if (ps == NULL) {
-			shard->hooks.unmap(new_eden, HPA_EDEN_SIZE);
-			return NULL;
-		}
-		shard->eden = new_eden;
-		shard->eden_len = HPA_EDEN_SIZE;
-	} else {
-		/* Eden is already nonempty; only need an edata for ps. */
-		ps = hpa_alloc_ps(tsdn, shard);
-		if (ps == NULL) {
-			return NULL;
-		}
-	}
-	assert(ps != NULL);
-	assert(shard->eden != NULL);
-	assert(shard->eden_len > HUGEPAGE);
-	assert(shard->eden_len % HUGEPAGE == 0);
-	assert(HUGEPAGE_ADDR2BASE(shard->eden) == shard->eden);
-
-	hpdata_init(ps, shard->eden, shard->age_counter++);
-
-	char *eden_char = (char *)shard->eden;
-	eden_char += HUGEPAGE;
-	shard->eden = (void *)eden_char;
-	shard->eden_len -= HUGEPAGE;
-
-	return ps;
-}
-
 /* Returns whether or not we purged anything. */
 static bool
 hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
@@ -348,7 +398,8 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 
 	/* Actually do the purging, now that the lock is dropped. */
 	if (dehugify) {
-		shard->hooks.dehugify(hpdata_addr_get(to_purge), HUGEPAGE);
+		shard->central->hooks.dehugify(hpdata_addr_get(to_purge),
+		    HUGEPAGE);
 	}
 	size_t total_purged = 0;
 	uint64_t purges_this_pass = 0;
@@ -359,7 +410,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 		total_purged += purge_size;
 		assert(total_purged <= HUGEPAGE);
 		purges_this_pass++;
-		shard->hooks.purge(purge_addr, purge_size);
+		shard->central->hooks.purge(purge_addr, purge_size);
 	}
 
 	malloc_mutex_lock(tsdn, &shard->mtx);
@@ -406,7 +457,7 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 	/* Make sure that it's been hugifiable for long enough. */
 	nstime_t time_hugify_allowed = hpdata_time_hugify_allowed(to_hugify);
 	nstime_t nstime;
-	shard->hooks.curtime(&nstime);
+	shard->central->hooks.curtime(&nstime);
 	nstime_subtract(&nstime, &time_hugify_allowed);
 	uint64_t millis = nstime_msec(&nstime);
 	if (millis < shard->opts.hugify_delay_ms) {
@@ -427,7 +478,7 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 
-	shard->hooks.hugify(hpdata_addr_get(to_hugify), HUGEPAGE);
+	shard->central->hooks.hugify(hpdata_addr_get(to_hugify), HUGEPAGE);
 
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	shard->stats.nhugifies++;
@@ -604,7 +655,7 @@ hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 	 * deallocations (and allocations of smaller sizes) may still succeed
 	 * while we're doing this potentially expensive system call.
 	 */
-	hpdata_t *ps = hpa_grow(tsdn, shard);
+	hpdata_t *ps = hpa_central_extract(tsdn, shard->central, size, &oom);
 	if (ps == NULL) {
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 		return nsuccess;
@@ -833,7 +884,7 @@ hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
 		/* There should be no allocations anywhere. */
 		assert(hpdata_empty(ps));
 		psset_remove(&shard->psset, ps);
-		shard->hooks.unmap(hpdata_addr_get(ps), HUGEPAGE);
+		shard->central->hooks.unmap(hpdata_addr_get(ps), HUGEPAGE);
 	}
 }
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 71efcb61..8d57180e 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1747,7 +1747,19 @@ malloc_init_hard_a0_locked() {
 	if (config_prof) {
 		prof_boot1();
 	}
-	arena_boot(&sc_data);
+	if (opt_hpa && !hpa_supported()) {
+		malloc_printf("<jemalloc>: HPA not supported in the current "
+		    "configuration; %s.",
+		    opt_abort_conf ? "aborting" : "disabling");
+		if (opt_abort_conf) {
+			malloc_abort_invalid_conf();
+		} else {
+			opt_hpa = false;
+		}
+	}
+	if (arena_boot(&sc_data, b0get(), opt_hpa)) {
+		return true;
+	}
 	if (tcache_boot(TSDN_NULL, b0get())) {
 		return true;
 	}
@@ -1786,7 +1798,7 @@ malloc_init_hard_a0_locked() {
 		hpa_shard_opts_t hpa_shard_opts = opt_hpa_opts;
 		hpa_shard_opts.deferral_allowed = background_thread_enabled();
 		if (pa_shard_enable_hpa(TSDN_NULL, &a0->pa_shard,
-		    &hpa_hooks_default, &hpa_shard_opts, &opt_hpa_sec_opts)) {
+		    &hpa_shard_opts, &opt_hpa_sec_opts)) {
 			return true;
 		}
 	}
diff --git a/src/pa.c b/src/pa.c
index 0172dfa7..aebb8e92 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -15,10 +15,23 @@ pa_nactive_sub(pa_shard_t *shard, size_t sub_pages) {
 }
 
 bool
-pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
-    unsigned ind, pa_shard_stats_t *stats, malloc_mutex_t *stats_mtx,
-    nstime_t *cur_time, size_t oversize_threshold, ssize_t dirty_decay_ms,
-    ssize_t muzzy_decay_ms) {
+pa_central_init(pa_central_t *central, base_t *base, bool hpa,
+    hpa_hooks_t *hpa_hooks) {
+	bool err;
+	if (hpa) {
+		err = hpa_central_init(&central->hpa, base, hpa_hooks);
+		if (err) {
+			return true;
+		}
+	}
+	return false;
+}
+
+bool
+pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, pa_central_t *central,
+    emap_t *emap, base_t *base, unsigned ind, pa_shard_stats_t *stats,
+    malloc_mutex_t *stats_mtx, nstime_t *cur_time, size_t oversize_threshold,
+    ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 	/* This will change eventually, but for now it should hold. */
 	assert(base_ind_get(base) == ind);
 	if (edata_cache_init(&shard->edata_cache, base)) {
@@ -42,6 +55,7 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 	shard->stats = stats;
 	memset(shard->stats, 0, sizeof(*shard->stats));
 
+	shard->central = central;
 	shard->emap = emap;
 	shard->base = base;
 
@@ -50,10 +64,9 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, emap_t *emap, base_t *base,
 
 bool
 pa_shard_enable_hpa(tsdn_t *tsdn, pa_shard_t *shard,
-    const hpa_hooks_t *hpa_hooks, const hpa_shard_opts_t *hpa_opts,
-    const sec_opts_t *hpa_sec_opts) {
-	if (hpa_shard_init(&shard->hpa_shard, shard->emap, shard->base,
-	    &shard->edata_cache, shard->ind, hpa_hooks, hpa_opts)) {
+    const hpa_shard_opts_t *hpa_opts, const sec_opts_t *hpa_sec_opts) {
+	if (hpa_shard_init(&shard->hpa_shard, &shard->central->hpa, shard->emap,
+	    shard->base, &shard->edata_cache, shard->ind, hpa_opts)) {
 		return true;
 	}
 	if (sec_init(tsdn, &shard->hpa_sec, shard->base, &shard->hpa_shard.pai,
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index a9e551fc..2d4fa9b9 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -13,6 +13,7 @@ struct test_data_s {
 	 * test_data_t and the hpa_shard_t;
 	 */
 	hpa_shard_t shard;
+	hpa_central_t central;
 	base_t *base;
 	edata_cache_t shard_edata_cache;
 
@@ -50,9 +51,12 @@ create_test_data(hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
 	err = emap_init(&test_data->emap, test_data->base, /* zeroed */ false);
 	assert_false(err, "");
 
-	err = hpa_shard_init(&test_data->shard, &test_data->emap,
-	    test_data->base, &test_data->shard_edata_cache, SHARD_IND,
-	    hooks, opts);
+	err = hpa_central_init(&test_data->central, test_data->base, hooks);
+	assert_false(err, "");
+
+	err = hpa_shard_init(&test_data->shard, &test_data->central,
+	    &test_data->emap, test_data->base, &test_data->shard_edata_cache,
+	    SHARD_IND, opts);
 	assert_false(err, "");
 
 	return (hpa_shard_t *)test_data;
diff --git a/test/unit/pa.c b/test/unit/pa.c
index dacd8e70..4206e85a 100644
--- a/test/unit/pa.c
+++ b/test/unit/pa.c
@@ -40,6 +40,7 @@ init_test_extent_hooks(extent_hooks_t *hooks) {
 typedef struct test_data_s test_data_t;
 struct test_data_s {
 	pa_shard_t shard;
+	pa_central_t central;
 	base_t *base;
 	emap_t emap;
 	pa_shard_stats_t stats;
@@ -63,9 +64,13 @@ test_data_t *init_test_data(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 	nstime_t time;
 	nstime_init(&time, 0);
 
+	err = pa_central_init(&test_data->central, base, opt_hpa,
+	    &hpa_hooks_default);
+	assert_false(err, "");
+
 	const size_t oversize_threshold = 8 * 1024 * 1024;
-	err = pa_shard_init(TSDN_NULL, &test_data->shard, &test_data->emap,
-	    test_data->base, /* ind */ 1, &test_data->stats,
+	err = pa_shard_init(TSDN_NULL, &test_data->shard, &test_data->central,
+	    &test_data->emap, test_data->base, /* ind */ 1, &test_data->stats,
 	    &test_data->stats_mtx, &time, oversize_threshold, dirty_decay_ms,
 	    muzzy_decay_ms);
 	assert_false(err, "");

From 92a1e38f5286bcc8f206c02219cd6b703b39d80d Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Fri, 23 Jul 2021 15:29:43 -0700
Subject: [PATCH 2090/2608] edata_cache: Allow unbounded fast caching.

The edata_cache_small had a fill/flush heuristic.  In retrospect, this was a
premature optimization; more testing indicates that an unbounded cache is
effectively fine here, and moreover we spend a nontrivial amount of time doing
unnecessary filling/flushing.

As the HPA takes on a larger and larger fraction of all allocations, any
theoretical differences in allocation patterns should shrink.  The HPA is more
efficient with its metadata in general, so it still comes out ahead on metadata
usage anyways.
---
 include/jemalloc/internal/edata_cache.h |  24 ++--
 include/jemalloc/internal/hpa.h         |   2 +-
 src/edata_cache.c                       |  39 ++----
 src/hpa.c                               |  12 +-
 test/unit/edata_cache.c                 | 173 ++++++++++--------------
 5 files changed, 99 insertions(+), 151 deletions(-)

diff --git a/include/jemalloc/internal/edata_cache.h b/include/jemalloc/internal/edata_cache.h
index 9a54df0e..8b6c0ef7 100644
--- a/include/jemalloc/internal/edata_cache.h
+++ b/include/jemalloc/internal/edata_cache.h
@@ -3,15 +3,8 @@
 
 #include "jemalloc/internal/base.h"
 
-/*
- * Public for tests.  When we go to the fallback when the small cache is empty,
- * we grab up to 8 items (grabbing less only if the fallback is exhausted).
- * When we exceed 16, we flush.  This caps the maximum memory lost per cache to
- * 16 * sizeof(edata_t), a max of 2k on architectures where the edata_t is 128
- * bytes.
- */
-#define EDATA_CACHE_SMALL_MAX 16
-#define EDATA_CACHE_SMALL_FILL 8
+/* For tests only. */
+#define EDATA_CACHE_FAST_FILL 4
 
 /*
  * A cache of edata_t structures allocated via base_alloc_edata (as opposed to
@@ -40,18 +33,17 @@ void edata_cache_postfork_child(tsdn_t *tsdn, edata_cache_t *edata_cache);
  * synchronization and avoids first-fit strategies.
  */
 
-typedef struct edata_cache_small_s edata_cache_small_t;
-struct edata_cache_small_s {
+typedef struct edata_cache_fast_s edata_cache_fast_t;
+struct edata_cache_fast_s {
 	edata_list_inactive_t list;
-	size_t count;
 	edata_cache_t *fallback;
 	bool disabled;
 };
 
-void edata_cache_small_init(edata_cache_small_t *ecs, edata_cache_t *fallback);
-edata_t *edata_cache_small_get(tsdn_t *tsdn, edata_cache_small_t *ecs);
-void edata_cache_small_put(tsdn_t *tsdn, edata_cache_small_t *ecs,
+void edata_cache_fast_init(edata_cache_fast_t *ecs, edata_cache_t *fallback);
+edata_t *edata_cache_fast_get(tsdn_t *tsdn, edata_cache_fast_t *ecs);
+void edata_cache_fast_put(tsdn_t *tsdn, edata_cache_fast_t *ecs,
     edata_t *edata);
-void edata_cache_small_disable(tsdn_t *tsdn, edata_cache_small_t *ecs);
+void edata_cache_fast_disable(tsdn_t *tsdn, edata_cache_fast_t *ecs);
 
 #endif /* JEMALLOC_INTERNAL_EDATA_CACHE_H */
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 623f9c40..46878a89 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -102,7 +102,7 @@ struct hpa_shard_s {
 	 * from a pageslab.  The pageslab itself comes from the centralized
 	 * allocator, and so will use its edata_cache.
 	 */
-	edata_cache_small_t ecs;
+	edata_cache_fast_t ecf;
 
 	psset_t psset;
 
diff --git a/src/edata_cache.c b/src/edata_cache.c
index ecfce414..6bc1848c 100644
--- a/src/edata_cache.c
+++ b/src/edata_cache.c
@@ -56,39 +56,34 @@ edata_cache_postfork_child(tsdn_t *tsdn, edata_cache_t *edata_cache) {
 }
 
 void
-edata_cache_small_init(edata_cache_small_t *ecs, edata_cache_t *fallback) {
+edata_cache_fast_init(edata_cache_fast_t *ecs, edata_cache_t *fallback) {
 	edata_list_inactive_init(&ecs->list);
-	ecs->count = 0;
 	ecs->fallback = fallback;
 	ecs->disabled = false;
 }
 
 static void
-edata_cache_small_try_fill_from_fallback(tsdn_t *tsdn,
-    edata_cache_small_t *ecs) {
-	assert(ecs->count == 0);
+edata_cache_fast_try_fill_from_fallback(tsdn_t *tsdn,
+    edata_cache_fast_t *ecs) {
 	edata_t *edata;
 	malloc_mutex_lock(tsdn, &ecs->fallback->mtx);
-	while (ecs->count < EDATA_CACHE_SMALL_FILL) {
-		edata = edata_avail_first(&ecs->fallback->avail);
+	for (int i = 0; i < EDATA_CACHE_FAST_FILL; i++) {
+		edata = edata_avail_remove_first(&ecs->fallback->avail);
 		if (edata == NULL) {
 			break;
 		}
-		edata_avail_remove(&ecs->fallback->avail, edata);
 		edata_list_inactive_append(&ecs->list, edata);
-		ecs->count++;
 		atomic_load_sub_store_zu(&ecs->fallback->count, 1);
 	}
 	malloc_mutex_unlock(tsdn, &ecs->fallback->mtx);
 }
 
 edata_t *
-edata_cache_small_get(tsdn_t *tsdn, edata_cache_small_t *ecs) {
+edata_cache_fast_get(tsdn_t *tsdn, edata_cache_fast_t *ecs) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_EDATA_CACHE, 0);
 
 	if (ecs->disabled) {
-		assert(ecs->count == 0);
 		assert(edata_list_inactive_first(&ecs->list) == NULL);
 		return edata_cache_get(tsdn, ecs->fallback);
 	}
@@ -96,15 +91,13 @@ edata_cache_small_get(tsdn_t *tsdn, edata_cache_small_t *ecs) {
 	edata_t *edata = edata_list_inactive_first(&ecs->list);
 	if (edata != NULL) {
 		edata_list_inactive_remove(&ecs->list, edata);
-		ecs->count--;
 		return edata;
 	}
 	/* Slow path; requires synchronization. */
-	edata_cache_small_try_fill_from_fallback(tsdn, ecs);
+	edata_cache_fast_try_fill_from_fallback(tsdn, ecs);
 	edata = edata_list_inactive_first(&ecs->list);
 	if (edata != NULL) {
 		edata_list_inactive_remove(&ecs->list, edata);
-		ecs->count--;
 	} else {
 		/*
 		 * Slowest path (fallback was also empty); allocate something
@@ -116,7 +109,7 @@ edata_cache_small_get(tsdn_t *tsdn, edata_cache_small_t *ecs) {
 }
 
 static void
-edata_cache_small_flush_all(tsdn_t *tsdn, edata_cache_small_t *ecs) {
+edata_cache_fast_flush_all(tsdn_t *tsdn, edata_cache_fast_t *ecs) {
 	/*
 	 * You could imagine smarter cache management policies (like
 	 * only flushing down to some threshold in anticipation of
@@ -132,19 +125,16 @@ edata_cache_small_flush_all(tsdn_t *tsdn, edata_cache_small_t *ecs) {
 		edata_avail_insert(&ecs->fallback->avail, edata);
 		nflushed++;
 	}
-	atomic_load_add_store_zu(&ecs->fallback->count, ecs->count);
+	atomic_load_add_store_zu(&ecs->fallback->count, nflushed);
 	malloc_mutex_unlock(tsdn, &ecs->fallback->mtx);
-	assert(nflushed == ecs->count);
-	ecs->count = 0;
 }
 
 void
-edata_cache_small_put(tsdn_t *tsdn, edata_cache_small_t *ecs, edata_t *edata) {
+edata_cache_fast_put(tsdn_t *tsdn, edata_cache_fast_t *ecs, edata_t *edata) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_EDATA_CACHE, 0);
 
 	if (ecs->disabled) {
-		assert(ecs->count == 0);
 		assert(edata_list_inactive_first(&ecs->list) == NULL);
 		edata_cache_put(tsdn, ecs->fallback, edata);
 		return;
@@ -155,15 +145,10 @@ edata_cache_small_put(tsdn_t *tsdn, edata_cache_small_t *ecs, edata_t *edata) {
 	 * cache locality.
 	 */
 	edata_list_inactive_prepend(&ecs->list, edata);
-	ecs->count++;
-	if (ecs->count > EDATA_CACHE_SMALL_MAX) {
-		assert(ecs->count == EDATA_CACHE_SMALL_MAX + 1);
-		edata_cache_small_flush_all(tsdn, ecs);
-	}
 }
 
 void
-edata_cache_small_disable(tsdn_t *tsdn, edata_cache_small_t *ecs) {
-	edata_cache_small_flush_all(tsdn, ecs);
+edata_cache_fast_disable(tsdn_t *tsdn, edata_cache_fast_t *ecs) {
+	edata_cache_fast_flush_all(tsdn, ecs);
 	ecs->disabled = true;
 }
diff --git a/src/hpa.c b/src/hpa.c
index 10594587..6441b4ea 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -187,7 +187,7 @@ hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
 	assert(edata_cache != NULL);
 	shard->central = central;
 	shard->base = base;
-	edata_cache_small_init(&shard->ecs, edata_cache);
+	edata_cache_fast_init(&shard->ecf, edata_cache);
 	psset_init(&shard->psset);
 	shard->age_counter = 0;
 	shard->ind = ind;
@@ -537,7 +537,7 @@ static edata_t *
 hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
     bool *oom) {
 	bool err;
-	edata_t *edata = edata_cache_small_get(tsdn, &shard->ecs);
+	edata_t *edata = edata_cache_fast_get(tsdn, &shard->ecf);
 	if (edata == NULL) {
 		*oom = true;
 		return NULL;
@@ -545,7 +545,7 @@ hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 
 	hpdata_t *ps = psset_pick_alloc(&shard->psset, size);
 	if (ps == NULL) {
-		edata_cache_small_put(tsdn, &shard->ecs, edata);
+		edata_cache_fast_put(tsdn, &shard->ecf, edata);
 		return NULL;
 	}
 
@@ -592,7 +592,7 @@ hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 		 * tweaked the stats, but our tweaks weren't really accurate).
 		 */
 		psset_update_end(&shard->psset, ps);
-		edata_cache_small_put(tsdn, &shard->ecs, edata);
+		edata_cache_fast_put(tsdn, &shard->ecf, edata);
 		*oom = true;
 		return NULL;
 	}
@@ -805,7 +805,7 @@ hpa_dalloc_locked(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) {
 	assert(ps != NULL);
 	void *unreserve_addr = edata_addr_get(edata);
 	size_t unreserve_size = edata_size_get(edata);
-	edata_cache_small_put(tsdn, &shard->ecs, edata);
+	edata_cache_fast_put(tsdn, &shard->ecf, edata);
 
 	psset_update_begin(&shard->psset, ps);
 	hpdata_unreserve(ps, unreserve_addr, unreserve_size);
@@ -844,7 +844,7 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 void
 hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_lock(tsdn, &shard->mtx);
-	edata_cache_small_disable(tsdn, &shard->ecs);
+	edata_cache_fast_disable(tsdn, &shard->ecf);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 }
 
diff --git a/test/unit/edata_cache.c b/test/unit/edata_cache.c
index 9a5d14b0..fe920c9a 100644
--- a/test/unit/edata_cache.c
+++ b/test/unit/edata_cache.c
@@ -47,38 +47,48 @@ TEST_BEGIN(test_edata_cache) {
 }
 TEST_END
 
-TEST_BEGIN(test_edata_cache_small_simple) {
+static size_t
+ecf_count(edata_cache_fast_t *ecf) {
+	size_t count = 0;
+	edata_t *cur;
+	ql_foreach(cur, &ecf->list.head, ql_link_inactive) {
+		count++;
+	}
+	return count;
+}
+
+TEST_BEGIN(test_edata_cache_fast_simple) {
 	edata_cache_t ec;
-	edata_cache_small_t ecs;
+	edata_cache_fast_t ecf;
 
 	test_edata_cache_init(&ec);
-	edata_cache_small_init(&ecs, &ec);
+	edata_cache_fast_init(&ecf, &ec);
 
-	edata_t *ed1 = edata_cache_small_get(TSDN_NULL, &ecs);
+	edata_t *ed1 = edata_cache_fast_get(TSDN_NULL, &ecf);
 	expect_ptr_not_null(ed1, "");
-	expect_zu_eq(ecs.count, 0, "");
+	expect_zu_eq(ecf_count(&ecf), 0, "");
 	expect_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
 
-	edata_t *ed2 = edata_cache_small_get(TSDN_NULL, &ecs);
+	edata_t *ed2 = edata_cache_fast_get(TSDN_NULL, &ecf);
 	expect_ptr_not_null(ed2, "");
-	expect_zu_eq(ecs.count, 0, "");
+	expect_zu_eq(ecf_count(&ecf), 0, "");
 	expect_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
 
-	edata_cache_small_put(TSDN_NULL, &ecs, ed1);
-	expect_zu_eq(ecs.count, 1, "");
+	edata_cache_fast_put(TSDN_NULL, &ecf, ed1);
+	expect_zu_eq(ecf_count(&ecf), 1, "");
 	expect_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
 
-	edata_cache_small_put(TSDN_NULL, &ecs, ed2);
-	expect_zu_eq(ecs.count, 2, "");
+	edata_cache_fast_put(TSDN_NULL, &ecf, ed2);
+	expect_zu_eq(ecf_count(&ecf), 2, "");
 	expect_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
 
 	/* LIFO ordering. */
-	expect_ptr_eq(ed2, edata_cache_small_get(TSDN_NULL, &ecs), "");
-	expect_zu_eq(ecs.count, 1, "");
+	expect_ptr_eq(ed2, edata_cache_fast_get(TSDN_NULL, &ecf), "");
+	expect_zu_eq(ecf_count(&ecf), 1, "");
 	expect_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
 
-	expect_ptr_eq(ed1, edata_cache_small_get(TSDN_NULL, &ecs), "");
-	expect_zu_eq(ecs.count, 0, "");
+	expect_ptr_eq(ed1, edata_cache_fast_get(TSDN_NULL, &ecf), "");
+	expect_zu_eq(ecf_count(&ecf), 0, "");
 	expect_zu_eq(atomic_load_zu(&ec.count, ATOMIC_RELAXED), 0, "");
 
 	test_edata_cache_destroy(&ec);
@@ -87,41 +97,41 @@ TEST_END
 
 TEST_BEGIN(test_edata_cache_fill) {
 	edata_cache_t ec;
-	edata_cache_small_t ecs;
+	edata_cache_fast_t ecf;
 
 	test_edata_cache_init(&ec);
-	edata_cache_small_init(&ecs, &ec);
+	edata_cache_fast_init(&ecf, &ec);
 
-	edata_t *allocs[EDATA_CACHE_SMALL_FILL * 2];
+	edata_t *allocs[EDATA_CACHE_FAST_FILL * 2];
 
 	/*
 	 * If the fallback cache can't satisfy the request, we shouldn't do
 	 * extra allocations until compelled to.  Put half the fill goal in the
 	 * fallback.
 	 */
-	for (int i = 0; i < EDATA_CACHE_SMALL_FILL / 2; i++) {
+	for (int i = 0; i < EDATA_CACHE_FAST_FILL / 2; i++) {
 		allocs[i] = edata_cache_get(TSDN_NULL, &ec);
 	}
-	for (int i = 0; i < EDATA_CACHE_SMALL_FILL / 2; i++) {
+	for (int i = 0; i < EDATA_CACHE_FAST_FILL / 2; i++) {
 		edata_cache_put(TSDN_NULL, &ec, allocs[i]);
 	}
-	expect_zu_eq(EDATA_CACHE_SMALL_FILL / 2,
+	expect_zu_eq(EDATA_CACHE_FAST_FILL / 2,
 	    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
 
-	allocs[0] = edata_cache_small_get(TSDN_NULL, &ecs);
-	expect_zu_eq(EDATA_CACHE_SMALL_FILL / 2 - 1, ecs.count,
+	allocs[0] = edata_cache_fast_get(TSDN_NULL, &ecf);
+	expect_zu_eq(EDATA_CACHE_FAST_FILL / 2 - 1, ecf_count(&ecf),
 	    "Should have grabbed all edatas available but no more.");
 
-	for (int i = 1; i < EDATA_CACHE_SMALL_FILL / 2; i++) {
-		allocs[i] = edata_cache_small_get(TSDN_NULL, &ecs);
+	for (int i = 1; i < EDATA_CACHE_FAST_FILL / 2; i++) {
+		allocs[i] = edata_cache_fast_get(TSDN_NULL, &ecf);
 		expect_ptr_not_null(allocs[i], "");
 	}
-	expect_zu_eq(0, ecs.count, "");
+	expect_zu_eq(0, ecf_count(&ecf), "");
 
 	/* When forced, we should alloc from the base. */
-	edata_t *edata = edata_cache_small_get(TSDN_NULL, &ecs);
+	edata_t *edata = edata_cache_fast_get(TSDN_NULL, &ecf);
 	expect_ptr_not_null(edata, "");
-	expect_zu_eq(0, ecs.count, "Allocated more than necessary");
+	expect_zu_eq(0, ecf_count(&ecf), "Allocated more than necessary");
 	expect_zu_eq(0, atomic_load_zu(&ec.count, ATOMIC_RELAXED),
 	    "Allocated more than necessary");
 
@@ -129,116 +139,78 @@ TEST_BEGIN(test_edata_cache_fill) {
 	 * We should correctly fill in the common case where the fallback isn't
 	 * exhausted, too.
 	 */
-	for (int i = 0; i < EDATA_CACHE_SMALL_FILL * 2; i++) {
+	for (int i = 0; i < EDATA_CACHE_FAST_FILL * 2; i++) {
 		allocs[i] = edata_cache_get(TSDN_NULL, &ec);
 		expect_ptr_not_null(allocs[i], "");
 	}
-	for (int i = 0; i < EDATA_CACHE_SMALL_FILL * 2; i++) {
+	for (int i = 0; i < EDATA_CACHE_FAST_FILL * 2; i++) {
 		edata_cache_put(TSDN_NULL, &ec, allocs[i]);
 	}
 
-	allocs[0] = edata_cache_small_get(TSDN_NULL, &ecs);
-	expect_zu_eq(EDATA_CACHE_SMALL_FILL - 1, ecs.count, "");
-	expect_zu_eq(EDATA_CACHE_SMALL_FILL,
+	allocs[0] = edata_cache_fast_get(TSDN_NULL, &ecf);
+	expect_zu_eq(EDATA_CACHE_FAST_FILL - 1, ecf_count(&ecf), "");
+	expect_zu_eq(EDATA_CACHE_FAST_FILL,
 	    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
-	for (int i = 1; i < EDATA_CACHE_SMALL_FILL; i++) {
-		expect_zu_eq(EDATA_CACHE_SMALL_FILL - i, ecs.count, "");
-		expect_zu_eq(EDATA_CACHE_SMALL_FILL,
+	for (int i = 1; i < EDATA_CACHE_FAST_FILL; i++) {
+		expect_zu_eq(EDATA_CACHE_FAST_FILL - i, ecf_count(&ecf), "");
+		expect_zu_eq(EDATA_CACHE_FAST_FILL,
 		    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
-		allocs[i] = edata_cache_small_get(TSDN_NULL, &ecs);
+		allocs[i] = edata_cache_fast_get(TSDN_NULL, &ecf);
 		expect_ptr_not_null(allocs[i], "");
 	}
-	expect_zu_eq(0, ecs.count, "");
-	expect_zu_eq(EDATA_CACHE_SMALL_FILL,
+	expect_zu_eq(0, ecf_count(&ecf), "");
+	expect_zu_eq(EDATA_CACHE_FAST_FILL,
 	    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
 
-	allocs[0] = edata_cache_small_get(TSDN_NULL, &ecs);
-	expect_zu_eq(EDATA_CACHE_SMALL_FILL - 1, ecs.count, "");
+	allocs[0] = edata_cache_fast_get(TSDN_NULL, &ecf);
+	expect_zu_eq(EDATA_CACHE_FAST_FILL - 1, ecf_count(&ecf), "");
 	expect_zu_eq(0, atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
-	for (int i = 1; i < EDATA_CACHE_SMALL_FILL; i++) {
-		expect_zu_eq(EDATA_CACHE_SMALL_FILL - i, ecs.count, "");
+	for (int i = 1; i < EDATA_CACHE_FAST_FILL; i++) {
+		expect_zu_eq(EDATA_CACHE_FAST_FILL - i, ecf_count(&ecf), "");
 		expect_zu_eq(0, atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
-		allocs[i] = edata_cache_small_get(TSDN_NULL, &ecs);
+		allocs[i] = edata_cache_fast_get(TSDN_NULL, &ecf);
 		expect_ptr_not_null(allocs[i], "");
 	}
-	expect_zu_eq(0, ecs.count, "");
+	expect_zu_eq(0, ecf_count(&ecf), "");
 	expect_zu_eq(0, atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
 
 	test_edata_cache_destroy(&ec);
 }
 TEST_END
 
-TEST_BEGIN(test_edata_cache_flush) {
-	edata_cache_t ec;
-	edata_cache_small_t ecs;
-
-	test_edata_cache_init(&ec);
-	edata_cache_small_init(&ecs, &ec);
-
-	edata_t *allocs[2 * EDATA_CACHE_SMALL_MAX + 2];
-	for (int i = 0; i < 2 * EDATA_CACHE_SMALL_MAX + 2; i++) {
-		allocs[i] = edata_cache_get(TSDN_NULL, &ec);
-		expect_ptr_not_null(allocs[i], "");
-	}
-	for (int i = 0; i < EDATA_CACHE_SMALL_MAX; i++) {
-		edata_cache_small_put(TSDN_NULL, &ecs, allocs[i]);
-		expect_zu_eq(i + 1, ecs.count, "");
-		expect_zu_eq(0, atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
-	}
-	edata_cache_small_put(TSDN_NULL, &ecs, allocs[EDATA_CACHE_SMALL_MAX]);
-	expect_zu_eq(0, ecs.count, "");
-	expect_zu_eq(EDATA_CACHE_SMALL_MAX + 1,
-	    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
-
-	for (int i = EDATA_CACHE_SMALL_MAX + 1;
-	    i < 2 * EDATA_CACHE_SMALL_MAX + 1; i++) {
-		edata_cache_small_put(TSDN_NULL, &ecs, allocs[i]);
-		expect_zu_eq(i - EDATA_CACHE_SMALL_MAX, ecs.count, "");
-		expect_zu_eq(EDATA_CACHE_SMALL_MAX + 1,
-		    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
-	}
-	edata_cache_small_put(TSDN_NULL, &ecs, allocs[2 * EDATA_CACHE_SMALL_MAX + 1]);
-	expect_zu_eq(0, ecs.count, "");
-	expect_zu_eq(2 * EDATA_CACHE_SMALL_MAX + 2,
-	    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
-
-	test_edata_cache_destroy(&ec);
-}
-TEST_END
-
 TEST_BEGIN(test_edata_cache_disable) {
 	edata_cache_t ec;
-	edata_cache_small_t ecs;
+	edata_cache_fast_t ecf;
 
 	test_edata_cache_init(&ec);
-	edata_cache_small_init(&ecs, &ec);
+	edata_cache_fast_init(&ecf, &ec);
 
-	for (int i = 0; i < EDATA_CACHE_SMALL_FILL; i++) {
+	for (int i = 0; i < EDATA_CACHE_FAST_FILL; i++) {
 		edata_t *edata = edata_cache_get(TSDN_NULL, &ec);
 		expect_ptr_not_null(edata, "");
-		edata_cache_small_put(TSDN_NULL, &ecs, edata);
+		edata_cache_fast_put(TSDN_NULL, &ecf, edata);
 	}
 
-	expect_zu_eq(EDATA_CACHE_SMALL_FILL, ecs.count, "");
+	expect_zu_eq(EDATA_CACHE_FAST_FILL, ecf_count(&ecf), "");
 	expect_zu_eq(0, atomic_load_zu(&ec.count, ATOMIC_RELAXED), "");
 
-	edata_cache_small_disable(TSDN_NULL, &ecs);
+	edata_cache_fast_disable(TSDN_NULL, &ecf);
 
-	expect_zu_eq(0, ecs.count, "");
-	expect_zu_eq(EDATA_CACHE_SMALL_FILL,
+	expect_zu_eq(0, ecf_count(&ecf), "");
+	expect_zu_eq(EDATA_CACHE_FAST_FILL,
 	    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "Disabling should flush");
 
-	edata_t *edata = edata_cache_small_get(TSDN_NULL, &ecs);
-	expect_zu_eq(0, ecs.count, "");
-	expect_zu_eq(EDATA_CACHE_SMALL_FILL - 1,
+	edata_t *edata = edata_cache_fast_get(TSDN_NULL, &ecf);
+	expect_zu_eq(0, ecf_count(&ecf), "");
+	expect_zu_eq(EDATA_CACHE_FAST_FILL - 1,
 	    atomic_load_zu(&ec.count, ATOMIC_RELAXED),
-	    "Disabled ecs should forward on get");
+	    "Disabled ecf should forward on get");
 
-	edata_cache_small_put(TSDN_NULL, &ecs, edata);
-	expect_zu_eq(0, ecs.count, "");
-	expect_zu_eq(EDATA_CACHE_SMALL_FILL,
+	edata_cache_fast_put(TSDN_NULL, &ecf, edata);
+	expect_zu_eq(0, ecf_count(&ecf), "");
+	expect_zu_eq(EDATA_CACHE_FAST_FILL,
 	    atomic_load_zu(&ec.count, ATOMIC_RELAXED),
-	    "Disabled ecs should forward on put");
+	    "Disabled ecf should forward on put");
 
 	test_edata_cache_destroy(&ec);
 }
@@ -248,8 +220,7 @@ int
 main(void) {
 	return test(
 	    test_edata_cache,
-	    test_edata_cache_small_simple,
+	    test_edata_cache_fast_simple,
 	    test_edata_cache_fill,
-	    test_edata_cache_flush,
 	    test_edata_cache_disable);
 }

From 08a4cc0969edf054c8483efd35981eb8b66eb0c1 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 19 Jul 2021 16:47:10 -0700
Subject: [PATCH 2091/2608] Pairing heap: inline functions instead of macros.

By force-inlining everything that would otherwise be a macro, we get the same
effect (it's not clear in the first place that this is actually a good idea, but
it avoids making any changes to the existing performance profile).

This makes the code more maintainable (in anticipation of subsequent changes),
as well as making performance profiles and debug info more readable (we get
"real" line numbers, instead of making everything point to the macro definition
of all associated functions).
---
 include/jemalloc/internal/edata.h  |  13 +-
 include/jemalloc/internal/hpdata.h |   6 +-
 include/jemalloc/internal/ph.h     | 742 ++++++++++++++++-------------
 src/edata.c                        |   4 +-
 src/hpdata.c                       |   2 +-
 test/unit/ph.c                     |  60 ++-
 6 files changed, 450 insertions(+), 377 deletions(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index 55d1dfed..3a04a9a3 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -81,8 +81,8 @@ struct edata_map_info_s {
 
 /* Extent (span of pages).  Use accessor functions for e_* fields. */
 typedef struct edata_s edata_t;
-typedef ph(edata_t) edata_avail_t;
-typedef ph(edata_t) edata_heap_t;
+ph_structs(edata_avail, edata_t);
+ph_structs(edata_heap, edata_t);
 struct edata_s {
 	/*
 	 * Bitfield containing several fields:
@@ -214,7 +214,10 @@ struct edata_s {
 		 * slabs_nonfull, or when the edata_t is unassociated with an
 		 * extent and sitting in an edata_cache.
 		 */
-		phn(edata_t)	ph_link;
+		union {
+			edata_heap_link_t heap_link;
+			edata_avail_link_t avail_link;
+		};
 	};
 
 	union {
@@ -664,7 +667,7 @@ edata_esnead_comp(const edata_t *a, const edata_t *b) {
 	return ret;
 }
 
-ph_proto(, edata_avail_, edata_avail_t, edata_t)
-ph_proto(, edata_heap_, edata_heap_t, edata_t)
+ph_proto(, edata_avail, edata_t)
+ph_proto(, edata_heap, edata_t)
 
 #endif /* JEMALLOC_INTERNAL_EDATA_H */
diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 2a12add9..c2ed692b 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -18,6 +18,7 @@
  * hugepage-sized and hugepage-aligned; it's *potentially* huge.
  */
 typedef struct hpdata_s hpdata_t;
+ph_structs(hpdata_age_heap, hpdata_t);
 struct hpdata_s {
 	/*
 	 * We likewise follow the edata convention of mangling names and forcing
@@ -82,7 +83,7 @@ struct hpdata_s {
 
 	union {
 		/* When nonempty (and also nonfull), used by the psset bins. */
-		phn(hpdata_t) ph_link;
+		hpdata_age_heap_link_t age_link;
 		/*
 		 * When empty (or not corresponding to any hugepage), list
 		 * linkage.
@@ -120,8 +121,7 @@ TYPED_LIST(hpdata_empty_list, hpdata_t, ql_link_empty)
 TYPED_LIST(hpdata_purge_list, hpdata_t, ql_link_purge)
 TYPED_LIST(hpdata_hugify_list, hpdata_t, ql_link_hugify)
 
-typedef ph(hpdata_t) hpdata_age_heap_t;
-ph_proto(, hpdata_age_heap_, hpdata_age_heap_t, hpdata_t);
+ph_proto(, hpdata_age_heap, hpdata_t);
 
 static inline void *
 hpdata_addr_get(const hpdata_t *hpdata) {
diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index 63aeac91..beb50d54 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -15,377 +15,435 @@
  *******************************************************************************
  */
 
+typedef int (*ph_cmp_t)(void *, void *);
+
 /* Node structure. */
-#define phn(a_type)							\
-struct {								\
-	a_type	*phn_prev;						\
-	a_type	*phn_next;						\
-	a_type	*phn_lchild;						\
+typedef struct phn_link_s phn_link_t;
+struct phn_link_s {
+	void *prev;
+	void *next;
+	void *lchild;
+};
+
+typedef struct ph_s ph_t;
+struct ph_s {
+	void *root;
+};
+
+JEMALLOC_ALWAYS_INLINE phn_link_t *
+phn_link_get(void *phn, size_t offset) {
+	return (phn_link_t *)(((uintptr_t)phn) + offset);
 }
 
-/* Root structure. */
-#define ph(a_type)							\
-struct {								\
-	a_type	*ph_root;						\
+JEMALLOC_ALWAYS_INLINE void
+phn_link_init(void *phn, size_t offset) {
+	phn_link_get(phn, offset)->prev = NULL;
+	phn_link_get(phn, offset)->next = NULL;
+	phn_link_get(phn, offset)->lchild = NULL;
 }
 
-/* Internal utility macros. */
-#define phn_lchild_get(a_type, a_field, a_phn)				\
-	(a_phn->a_field.phn_lchild)
-#define phn_lchild_set(a_type, a_field, a_phn, a_lchild) do {		\
-	a_phn->a_field.phn_lchild = a_lchild;				\
-} while (0)
+/* Internal utility helpers. */
+JEMALLOC_ALWAYS_INLINE void *
+phn_lchild_get(void *phn, size_t offset) {
+	return phn_link_get(phn, offset)->lchild;
+}
 
-#define phn_next_get(a_type, a_field, a_phn)				\
-	(a_phn->a_field.phn_next)
-#define phn_prev_set(a_type, a_field, a_phn, a_prev) do {		\
-	a_phn->a_field.phn_prev = a_prev;				\
-} while (0)
+JEMALLOC_ALWAYS_INLINE void
+phn_lchild_set(void *phn, void *lchild, size_t offset) {
+	phn_link_get(phn, offset)->lchild = lchild;
+}
 
-#define phn_prev_get(a_type, a_field, a_phn)				\
-	(a_phn->a_field.phn_prev)
-#define phn_next_set(a_type, a_field, a_phn, a_next) do {		\
-	a_phn->a_field.phn_next = a_next;				\
-} while (0)
+JEMALLOC_ALWAYS_INLINE void *
+phn_next_get(void *phn, size_t offset) {
+	return phn_link_get(phn, offset)->next;
+}
 
-#define phn_merge_ordered(a_type, a_field, a_phn0, a_phn1, a_cmp) do {	\
-	a_type *phn0child;						\
+JEMALLOC_ALWAYS_INLINE void
+phn_next_set(void *phn, void *next, size_t offset) {
+	phn_link_get(phn, offset)->next = next;
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+phn_prev_get(void *phn, size_t offset) {
+	return phn_link_get(phn, offset)->prev;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+phn_prev_set(void *phn, void *prev, size_t offset) {
+	phn_link_get(phn, offset)->prev = prev;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+phn_merge_ordered(void *phn0, void *phn1, size_t offset,
+    ph_cmp_t cmp) {
+	void *phn0child;
+
+	assert(phn0 != NULL);
+	assert(phn1 != NULL);
+	assert(cmp(phn0, phn1) <= 0);
+
+	phn_prev_set(phn1, phn0, offset);
+	phn0child = phn_lchild_get(phn0, offset);
+	phn_next_set(phn1, phn0child, offset);
+	if (phn0child != NULL) {
+		phn_prev_set(phn0child, phn1, offset);
+	}
+	phn_lchild_set(phn0, phn1, offset);
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+phn_merge(void *phn0, void *phn1, size_t offset, ph_cmp_t cmp) {
+	void *result;
+	if (phn0 == NULL) {
+		result = phn1;
+	} else if (phn1 == NULL) {
+		result = phn0;
+	} else if (cmp(phn0, phn1) < 0) {
+		phn_merge_ordered(phn0, phn1, offset, cmp);
+		result = phn0;
+	} else {
+		phn_merge_ordered(phn1, phn0, offset, cmp);
+		result = phn1;
+	}
+	return result;
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+phn_merge_siblings(void *phn, size_t offset, ph_cmp_t cmp) {
+	void *head = NULL;
+	void *tail = NULL;
+	void *phn0 = phn;
+	void *phn1 = phn_next_get(phn0, offset);
+
+	/*
+	 * Multipass merge, wherein the first two elements of a FIFO
+	 * are repeatedly merged, and each result is appended to the
+	 * singly linked FIFO, until the FIFO contains only a single
+	 * element.  We start with a sibling list but no reference to
+	 * its tail, so we do a single pass over the sibling list to
+	 * populate the FIFO.
+	 */
+	if (phn1 != NULL) {
+		void *phnrest = phn_next_get(phn1, offset);
+		if (phnrest != NULL) {
+			phn_prev_set(phnrest, NULL, offset);
+		}
+		phn_prev_set(phn0, NULL, offset);
+		phn_next_set(phn0, NULL, offset);
+		phn_prev_set(phn1, NULL, offset);
+		phn_next_set(phn1, NULL, offset);
+		phn0 = phn_merge(phn0, phn1, offset, cmp);
+		head = tail = phn0;
+		phn0 = phnrest;
+		while (phn0 != NULL) {
+			phn1 = phn_next_get(phn0, offset);
+			if (phn1 != NULL) {
+				phnrest = phn_next_get(phn1, offset);
+				if (phnrest != NULL) {
+					phn_prev_set(phnrest, NULL, offset);
+				}
+				phn_prev_set(phn0, NULL, offset);
+				phn_next_set(phn0, NULL, offset);
+				phn_prev_set(phn1, NULL, offset);
+				phn_next_set(phn1, NULL, offset);
+				phn0 = phn_merge(phn0, phn1, offset, cmp);
+				phn_next_set(tail, phn0, offset);
+				tail = phn0;
+				phn0 = phnrest;
+			} else {
+				phn_next_set(tail, phn0, offset);
+				tail = phn0;
+				phn0 = NULL;
+			}
+		}
+		phn0 = head;
+		phn1 = phn_next_get(phn0, offset);
+		if (phn1 != NULL) {
+			while (true) {
+				head = phn_next_get(phn1, offset);
+				assert(phn_prev_get(phn0, offset) == NULL);
+				phn_next_set(phn0, NULL, offset);
+				assert(phn_prev_get(phn1, offset) == NULL);
+				phn_next_set(phn1, NULL, offset);
+				phn0 = phn_merge(phn0, phn1, offset, cmp);
+				if (head == NULL) {
+					break;
+				}
+				phn_next_set(tail, phn0, offset);
+				tail = phn0;
+				phn0 = head;
+				phn1 = phn_next_get(phn0, offset);
+			}
+		}
+	}
+	return phn0;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+ph_merge_aux(ph_t *ph, size_t offset, ph_cmp_t cmp) {
+	void *phn = phn_next_get(ph->root, offset);
+	if (phn != NULL) {
+		phn_prev_set(ph->root, NULL, offset);
+		phn_next_set(ph->root, NULL, offset);
+		phn_prev_set(phn, NULL, offset);
+		phn = phn_merge_siblings(phn, offset, cmp);
+		assert(phn_next_get(phn, offset) == NULL);
+		ph->root = phn_merge(ph->root, phn, offset, cmp);
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+ph_merge_children(void *phn, size_t offset, ph_cmp_t cmp) {
+	void *result;
+	void *lchild = phn_lchild_get(phn, offset);
+	if (lchild == NULL) {
+		result = NULL;
+	} else {
+		result = phn_merge_siblings(lchild, offset, cmp);
+	}
+	return result;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+ph_new(ph_t *ph) {
+	ph->root = NULL;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+ph_empty(ph_t *ph) {
+	return ph->root == NULL;
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+ph_first(ph_t *ph, size_t offset, ph_cmp_t cmp) {
+	if (ph->root == NULL) {
+		return NULL;
+	}
+	ph_merge_aux(ph, offset, cmp);
+	return ph->root;
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+ph_any(ph_t *ph, size_t offset) {
+	if (ph->root == NULL) {
+		return NULL;
+	}
+	void *aux = phn_next_get(ph->root, offset);
+	if (aux != NULL) {
+		return aux;
+	}
+	return ph->root;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+ph_insert(ph_t *ph, void *phn, size_t offset) {
+	phn_link_init(phn, offset);
+
+	/*
+	 * Treat the root as an aux list during insertion, and lazily merge
+	 * during a_prefix##remove_first().  For elements that are inserted,
+	 * then removed via a_prefix##remove() before the aux list is ever
+	 * processed, this makes insert/remove constant-time, whereas eager
+	 * merging would make insert O(log n).
+	 */
+	if (ph->root == NULL) {
+		ph->root = phn;
+	} else {
+		phn_next_set(phn, phn_next_get(ph->root, offset), offset);
+		if (phn_next_get(ph->root, offset) != NULL) {
+			phn_prev_set(phn_next_get(ph->root, offset), phn,
+			    offset);
+		}
+		phn_prev_set(phn, ph->root, offset);
+		phn_next_set(ph->root, phn, offset);
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+ph_remove_first(ph_t *ph, size_t offset, ph_cmp_t cmp) {
+	void *ret;
+
+	if (ph->root == NULL) {
+		return NULL;
+	}
+	ph_merge_aux(ph, offset, cmp);
+	ret = ph->root;
+	ph->root = ph_merge_children(ph->root, offset, cmp);
+
+	return ret;
+
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+ph_remove_any(ph_t *ph, size_t offset, ph_cmp_t cmp) {
+	/*
+	 * Remove the most recently inserted aux list element, or the root if
+	 * the aux list is empty.  This has the effect of behaving as a LIFO
+	 * (and insertion/removal is therefore constant-time) if
+	 * a_prefix##[remove_]first() are never called.
+	 */
+	if (ph->root == NULL) {
+		return NULL;
+	}
+	void *ret = phn_next_get(ph->root, offset);
+	if (ret != NULL) {
+		void *aux = phn_next_get(ret, offset);
+		phn_next_set(ph->root, aux, offset);
+		if (aux != NULL) {
+			phn_prev_set(aux, ph->root, offset);
+		}
+		return ret;
+	}
+	ret = ph->root;
+	ph->root = ph_merge_children(ph->root, offset, cmp);
+	return ret;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+ph_remove(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) {
+	void *replace;
+	void *parent;
+
+	if (ph->root == phn) {
+		/*
+		 * We can delete from aux list without merging it, but we need
+		 * to merge if we are dealing with the root node and it has
+		 * children.
+		 */
+		if (phn_lchild_get(phn, offset) == NULL) {
+			ph->root = phn_next_get(phn, offset);
+			if (ph->root != NULL) {
+				phn_prev_set(ph->root, NULL, offset);
+			}
+			return;
+		}
+		ph_merge_aux(ph, offset, cmp);
+		if (ph->root == phn) {
+			ph->root = ph_merge_children(ph->root, offset, cmp);
+			return;
+		}
+	}
+
+	/* Get parent (if phn is leftmost child) before mutating. */
+	if ((parent = phn_prev_get(phn, offset)) != NULL) {
+		if (phn_lchild_get(parent, offset) != phn) {
+			parent = NULL;
+		}
+	}
+	/* Find a possible replacement node, and link to parent. */
+	replace = ph_merge_children(phn, offset, cmp);
+	/* Set next/prev for sibling linked list. */
+	if (replace != NULL) {
+		if (parent != NULL) {
+			phn_prev_set(replace, parent, offset);
+			phn_lchild_set(parent, replace, offset);
+		} else {
+			phn_prev_set(replace, phn_prev_get(phn, offset),
+			    offset);
+			if (phn_prev_get(phn, offset) != NULL) {
+				phn_next_set(phn_prev_get(phn, offset), replace,
+				    offset);
+			}
+		}
+		phn_next_set(replace, phn_next_get(phn, offset), offset);
+		if (phn_next_get(phn, offset) != NULL) {
+			phn_prev_set(phn_next_get(phn, offset), replace,
+			    offset);
+		}
+	} else {
+		if (parent != NULL) {
+			void *next = phn_next_get(phn, offset);
+			phn_lchild_set(parent, next, offset);
+			if (next != NULL) {
+				phn_prev_set(next, parent, offset);
+			}
+		} else {
+			assert(phn_prev_get(phn, offset) != NULL);
+			phn_next_set(
+			    phn_prev_get(phn, offset),
+			    phn_next_get(phn, offset), offset);
+		}
+		if (phn_next_get(phn, offset) != NULL) {
+			phn_prev_set(
+			    phn_next_get(phn, offset),
+			    phn_prev_get(phn, offset), offset);
+		}
+	}
+}
+
+#define ph_structs(a_prefix, a_type)					\
+typedef struct {							\
+	phn_link_t link;						\
+} a_prefix##_link_t;							\
 									\
-	assert(a_phn0 != NULL);						\
-	assert(a_phn1 != NULL);						\
-	assert(a_cmp(a_phn0, a_phn1) <= 0);				\
-									\
-	phn_prev_set(a_type, a_field, a_phn1, a_phn0);			\
-	phn0child = phn_lchild_get(a_type, a_field, a_phn0);		\
-	phn_next_set(a_type, a_field, a_phn1, phn0child);		\
-	if (phn0child != NULL) {					\
-		phn_prev_set(a_type, a_field, phn0child, a_phn1);	\
-	}								\
-	phn_lchild_set(a_type, a_field, a_phn0, a_phn1);		\
-} while (0)
-
-#define phn_merge(a_type, a_field, a_phn0, a_phn1, a_cmp, r_phn) do {	\
-	if (a_phn0 == NULL) {						\
-		r_phn = a_phn1;						\
-	} else if (a_phn1 == NULL) {					\
-		r_phn = a_phn0;						\
-	} else if (a_cmp(a_phn0, a_phn1) < 0) {				\
-		phn_merge_ordered(a_type, a_field, a_phn0, a_phn1,	\
-		    a_cmp);						\
-		r_phn = a_phn0;						\
-	} else {							\
-		phn_merge_ordered(a_type, a_field, a_phn1, a_phn0,	\
-		    a_cmp);						\
-		r_phn = a_phn1;						\
-	}								\
-} while (0)
-
-#define ph_merge_siblings(a_type, a_field, a_phn, a_cmp, r_phn) do {	\
-	a_type *head = NULL;						\
-	a_type *tail = NULL;						\
-	a_type *phn0 = a_phn;						\
-	a_type *phn1 = phn_next_get(a_type, a_field, phn0);		\
-									\
-	/*								\
-	 * Multipass merge, wherein the first two elements of a FIFO	\
-	 * are repeatedly merged, and each result is appended to the	\
-	 * singly linked FIFO, until the FIFO contains only a single	\
-	 * element.  We start with a sibling list but no reference to	\
-	 * its tail, so we do a single pass over the sibling list to	\
-	 * populate the FIFO.						\
-	 */								\
-	if (phn1 != NULL) {						\
-		a_type *phnrest = phn_next_get(a_type, a_field, phn1);	\
-		if (phnrest != NULL) {					\
-			phn_prev_set(a_type, a_field, phnrest, NULL);	\
-		}							\
-		phn_prev_set(a_type, a_field, phn0, NULL);		\
-		phn_next_set(a_type, a_field, phn0, NULL);		\
-		phn_prev_set(a_type, a_field, phn1, NULL);		\
-		phn_next_set(a_type, a_field, phn1, NULL);		\
-		phn_merge(a_type, a_field, phn0, phn1, a_cmp, phn0);	\
-		head = tail = phn0;					\
-		phn0 = phnrest;						\
-		while (phn0 != NULL) {					\
-			phn1 = phn_next_get(a_type, a_field, phn0);	\
-			if (phn1 != NULL) {				\
-				phnrest = phn_next_get(a_type, a_field,	\
-				    phn1);				\
-				if (phnrest != NULL) {			\
-					phn_prev_set(a_type, a_field,	\
-					    phnrest, NULL);		\
-				}					\
-				phn_prev_set(a_type, a_field, phn0,	\
-				    NULL);				\
-				phn_next_set(a_type, a_field, phn0,	\
-				    NULL);				\
-				phn_prev_set(a_type, a_field, phn1,	\
-				    NULL);				\
-				phn_next_set(a_type, a_field, phn1,	\
-				    NULL);				\
-				phn_merge(a_type, a_field, phn0, phn1,	\
-				    a_cmp, phn0);			\
-				phn_next_set(a_type, a_field, tail,	\
-				    phn0);				\
-				tail = phn0;				\
-				phn0 = phnrest;				\
-			} else {					\
-				phn_next_set(a_type, a_field, tail,	\
-				    phn0);				\
-				tail = phn0;				\
-				phn0 = NULL;				\
-			}						\
-		}							\
-		phn0 = head;						\
-		phn1 = phn_next_get(a_type, a_field, phn0);		\
-		if (phn1 != NULL) {					\
-			while (true) {					\
-				head = phn_next_get(a_type, a_field,	\
-				    phn1);				\
-				assert(phn_prev_get(a_type, a_field,	\
-				    phn0) == NULL);			\
-				phn_next_set(a_type, a_field, phn0,	\
-				    NULL);				\
-				assert(phn_prev_get(a_type, a_field,	\
-				    phn1) == NULL);			\
-				phn_next_set(a_type, a_field, phn1,	\
-				    NULL);				\
-				phn_merge(a_type, a_field, phn0, phn1,	\
-				    a_cmp, phn0);			\
-				if (head == NULL) {			\
-					break;				\
-				}					\
-				phn_next_set(a_type, a_field, tail,	\
-				    phn0);				\
-				tail = phn0;				\
-				phn0 = head;				\
-				phn1 = phn_next_get(a_type, a_field,	\
-				    phn0);				\
-			}						\
-		}							\
-	}								\
-	r_phn = phn0;							\
-} while (0)
-
-#define ph_merge_aux(a_type, a_field, a_ph, a_cmp) do {			\
-	a_type *phn = phn_next_get(a_type, a_field, a_ph->ph_root);	\
-	if (phn != NULL) {						\
-		phn_prev_set(a_type, a_field, a_ph->ph_root, NULL);	\
-		phn_next_set(a_type, a_field, a_ph->ph_root, NULL);	\
-		phn_prev_set(a_type, a_field, phn, NULL);		\
-		ph_merge_siblings(a_type, a_field, phn, a_cmp, phn);	\
-		assert(phn_next_get(a_type, a_field, phn) == NULL);	\
-		phn_merge(a_type, a_field, a_ph->ph_root, phn, a_cmp,	\
-		    a_ph->ph_root);					\
-	}								\
-} while (0)
-
-#define ph_merge_children(a_type, a_field, a_phn, a_cmp, r_phn) do {	\
-	a_type *lchild = phn_lchild_get(a_type, a_field, a_phn);	\
-	if (lchild == NULL) {						\
-		r_phn = NULL;						\
-	} else {							\
-		ph_merge_siblings(a_type, a_field, lchild, a_cmp,	\
-		    r_phn);						\
-	}								\
-} while (0)
+typedef struct {							\
+	ph_t ph;							\
+} a_prefix##_t;
 
 /*
  * The ph_proto() macro generates function prototypes that correspond to the
  * functions generated by an equivalently parameterized call to ph_gen().
  */
-#define ph_proto(a_attr, a_prefix, a_ph_type, a_type)			\
-a_attr void	a_prefix##new(a_ph_type *ph);				\
-a_attr bool	a_prefix##empty(a_ph_type *ph);				\
-a_attr a_type	*a_prefix##first(a_ph_type *ph);			\
-a_attr a_type	*a_prefix##any(a_ph_type *ph);				\
-a_attr void	a_prefix##insert(a_ph_type *ph, a_type *phn);		\
-a_attr a_type	*a_prefix##remove_first(a_ph_type *ph);			\
-a_attr a_type	*a_prefix##remove_any(a_ph_type *ph);			\
-a_attr void	a_prefix##remove(a_ph_type *ph, a_type *phn);
+#define ph_proto(a_attr, a_prefix, a_type)				\
+									\
+a_attr void a_prefix##_new(a_prefix##_t *ph);				\
+a_attr bool a_prefix##_empty(a_prefix##_t *ph);				\
+a_attr a_type *a_prefix##_first(a_prefix##_t *ph);			\
+a_attr a_type *a_prefix##_any(a_prefix##_t *ph);			\
+a_attr void a_prefix##_insert(a_prefix##_t *ph, a_type *phn);		\
+a_attr a_type *a_prefix##_remove_first(a_prefix##_t *ph);		\
+a_attr a_type *a_prefix##_remove_any(a_prefix##_t *ph);			\
+a_attr void a_prefix##_remove(a_prefix##_t *ph, a_type *phn);
 
-/*
- * The ph_gen() macro generates a type-specific pairing heap implementation,
- * based on the above cpp macros.
- */
-#define ph_gen(a_attr, a_prefix, a_ph_type, a_type, a_field, a_cmp)	\
-a_attr void								\
-a_prefix##new(a_ph_type *ph) {						\
-	memset(ph, 0, sizeof(ph(a_type)));				\
+/* The ph_gen() macro generates a type-specific pairing heap implementation. */
+#define ph_gen(a_attr, a_prefix, a_type, a_field, a_cmp)		\
+JEMALLOC_ALWAYS_INLINE int						\
+a_prefix##_ph_cmp(void *a, void *b) {					\
+	return a_cmp((a_type *)a, (a_type *)b);				\
 }									\
+									\
+a_attr void								\
+a_prefix##_new(a_prefix##_t *ph) {					\
+	ph_new(&ph->ph);						\
+}									\
+									\
 a_attr bool								\
-a_prefix##empty(a_ph_type *ph) {					\
-	return (ph->ph_root == NULL);					\
+a_prefix##_empty(a_prefix##_t *ph) {					\
+	return ph_empty(&ph->ph);					\
 }									\
+									\
 a_attr a_type *								\
-a_prefix##first(a_ph_type *ph) {					\
-	if (ph->ph_root == NULL) {					\
-		return NULL;						\
-	}								\
-	ph_merge_aux(a_type, a_field, ph, a_cmp);			\
-	return ph->ph_root;						\
+a_prefix##_first(a_prefix##_t *ph) {					\
+	return ph_first(&ph->ph, offsetof(a_type, a_field),		\
+	    &a_prefix##_ph_cmp);					\
 }									\
+									\
 a_attr a_type *								\
-a_prefix##any(a_ph_type *ph) {						\
-	if (ph->ph_root == NULL) {					\
-		return NULL;						\
-	}								\
-	a_type *aux = phn_next_get(a_type, a_field, ph->ph_root);	\
-	if (aux != NULL) {						\
-		return aux;						\
-	}								\
-	return ph->ph_root;						\
+a_prefix##_any(a_prefix##_t *ph) {					\
+	return ph_any(&ph->ph, offsetof(a_type, a_field));		\
 }									\
+									\
 a_attr void								\
-a_prefix##insert(a_ph_type *ph, a_type *phn) {				\
-	memset(&phn->a_field, 0, sizeof(phn(a_type)));			\
-									\
-	/*								\
-	 * Treat the root as an aux list during insertion, and lazily	\
-	 * merge during a_prefix##remove_first().  For elements that	\
-	 * are inserted, then removed via a_prefix##remove() before the	\
-	 * aux list is ever processed, this makes insert/remove		\
-	 * constant-time, whereas eager merging would make insert	\
-	 * O(log n).							\
-	 */								\
-	if (ph->ph_root == NULL) {					\
-		ph->ph_root = phn;					\
-	} else {							\
-		phn_next_set(a_type, a_field, phn, phn_next_get(a_type,	\
-		    a_field, ph->ph_root));				\
-		if (phn_next_get(a_type, a_field, ph->ph_root) !=	\
-		    NULL) {						\
-			phn_prev_set(a_type, a_field,			\
-			    phn_next_get(a_type, a_field, ph->ph_root),	\
-			    phn);					\
-		}							\
-		phn_prev_set(a_type, a_field, phn, ph->ph_root);	\
-		phn_next_set(a_type, a_field, ph->ph_root, phn);	\
-	}								\
+a_prefix##_insert(a_prefix##_t *ph, a_type *phn) {			\
+	ph_insert(&ph->ph, phn, offsetof(a_type, a_field));		\
 }									\
+									\
 a_attr a_type *								\
-a_prefix##remove_first(a_ph_type *ph) {					\
-	a_type *ret;							\
-									\
-	if (ph->ph_root == NULL) {					\
-		return NULL;						\
-	}								\
-	ph_merge_aux(a_type, a_field, ph, a_cmp);			\
-									\
-	ret = ph->ph_root;						\
-									\
-	ph_merge_children(a_type, a_field, ph->ph_root, a_cmp,		\
-	    ph->ph_root);						\
-									\
-	return ret;							\
+a_prefix##_remove_first(a_prefix##_t *ph) {				\
+	return ph_remove_first(&ph->ph, offsetof(a_type, a_field),	\
+	    a_prefix##_ph_cmp);						\
 }									\
+									\
 a_attr a_type *								\
-a_prefix##remove_any(a_ph_type *ph) {					\
-	/*								\
-	 * Remove the most recently inserted aux list element, or the	\
-	 * root if the aux list is empty.  This has the effect of	\
-	 * behaving as a LIFO (and insertion/removal is therefore	\
-	 * constant-time) if a_prefix##[remove_]first() are never	\
-	 * called.							\
-	 */								\
-	if (ph->ph_root == NULL) {					\
-		return NULL;						\
-	}								\
-	a_type *ret = phn_next_get(a_type, a_field, ph->ph_root);	\
-	if (ret != NULL) {						\
-		a_type *aux = phn_next_get(a_type, a_field, ret);	\
-		phn_next_set(a_type, a_field, ph->ph_root, aux);	\
-		if (aux != NULL) {					\
-			phn_prev_set(a_type, a_field, aux,		\
-			    ph->ph_root);				\
-		}							\
-		return ret;						\
-	}								\
-	ret = ph->ph_root;						\
-	ph_merge_children(a_type, a_field, ph->ph_root, a_cmp,		\
-	    ph->ph_root);						\
-	return ret;							\
+a_prefix##_remove_any(a_prefix##_t *ph) {				\
+	return ph_remove_any(&ph->ph, offsetof(a_type, a_field),	\
+	    a_prefix##_ph_cmp);						\
 }									\
+									\
 a_attr void								\
-a_prefix##remove(a_ph_type *ph, a_type *phn) {				\
-	a_type *replace, *parent;					\
-									\
-	if (ph->ph_root == phn) {					\
-		/*							\
-		 * We can delete from aux list without merging it, but	\
-		 * we need to merge if we are dealing with the root	\
-		 * node and it has children.				\
-		 */							\
-		if (phn_lchild_get(a_type, a_field, phn) == NULL) {	\
-			ph->ph_root = phn_next_get(a_type, a_field,	\
-			    phn);					\
-			if (ph->ph_root != NULL) {			\
-				phn_prev_set(a_type, a_field,		\
-				    ph->ph_root, NULL);			\
-			}						\
-			return;						\
-		}							\
-		ph_merge_aux(a_type, a_field, ph, a_cmp);		\
-		if (ph->ph_root == phn) {				\
-			ph_merge_children(a_type, a_field, ph->ph_root,	\
-			    a_cmp, ph->ph_root);			\
-			return;						\
-		}							\
-	}								\
-									\
-	/* Get parent (if phn is leftmost child) before mutating. */	\
-	if ((parent = phn_prev_get(a_type, a_field, phn)) != NULL) {	\
-		if (phn_lchild_get(a_type, a_field, parent) != phn) {	\
-			parent = NULL;					\
-		}							\
-	}								\
-	/* Find a possible replacement node, and link to parent. */	\
-	ph_merge_children(a_type, a_field, phn, a_cmp, replace);	\
-	/* Set next/prev for sibling linked list. */			\
-	if (replace != NULL) {						\
-		if (parent != NULL) {					\
-			phn_prev_set(a_type, a_field, replace, parent);	\
-			phn_lchild_set(a_type, a_field, parent,		\
-			    replace);					\
-		} else {						\
-			phn_prev_set(a_type, a_field, replace,		\
-			    phn_prev_get(a_type, a_field, phn));	\
-			if (phn_prev_get(a_type, a_field, phn) !=	\
-			    NULL) {					\
-				phn_next_set(a_type, a_field,		\
-				    phn_prev_get(a_type, a_field, phn),	\
-				    replace);				\
-			}						\
-		}							\
-		phn_next_set(a_type, a_field, replace,			\
-		    phn_next_get(a_type, a_field, phn));		\
-		if (phn_next_get(a_type, a_field, phn) != NULL) {	\
-			phn_prev_set(a_type, a_field,			\
-			    phn_next_get(a_type, a_field, phn),		\
-			    replace);					\
-		}							\
-	} else {							\
-		if (parent != NULL) {					\
-			a_type *next = phn_next_get(a_type, a_field,	\
-			    phn);					\
-			phn_lchild_set(a_type, a_field, parent, next);	\
-			if (next != NULL) {				\
-				phn_prev_set(a_type, a_field, next,	\
-				    parent);				\
-			}						\
-		} else {						\
-			assert(phn_prev_get(a_type, a_field, phn) !=	\
-			    NULL);					\
-			phn_next_set(a_type, a_field,			\
-			    phn_prev_get(a_type, a_field, phn),		\
-			    phn_next_get(a_type, a_field, phn));	\
-		}							\
-		if (phn_next_get(a_type, a_field, phn) != NULL) {	\
-			phn_prev_set(a_type, a_field,			\
-			    phn_next_get(a_type, a_field, phn),		\
-			    phn_prev_get(a_type, a_field, phn));	\
-		}							\
-	}								\
+a_prefix##_remove(a_prefix##_t *ph, a_type *phn) {			\
+	ph_remove(&ph->ph, phn, offsetof(a_type, a_field),		\
+	    a_prefix##_ph_cmp);						\
 }
 
 #endif /* JEMALLOC_INTERNAL_PH_H */
diff --git a/src/edata.c b/src/edata.c
index 23523dd0..82b6f565 100644
--- a/src/edata.c
+++ b/src/edata.c
@@ -1,6 +1,6 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
-ph_gen(, edata_avail_, edata_avail_t, edata_t, ph_link,
+ph_gen(, edata_avail, edata_t, avail_link,
     edata_esnead_comp)
-ph_gen(, edata_heap_, edata_heap_t, edata_t, ph_link, edata_snad_comp)
+ph_gen(, edata_heap, edata_t, heap_link, edata_snad_comp)
diff --git a/src/hpdata.c b/src/hpdata.c
index 18519be3..e7d7294c 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -15,7 +15,7 @@ hpdata_age_comp(const hpdata_t *a, const hpdata_t *b) {
 	return (a_age > b_age) - (a_age < b_age);
 }
 
-ph_gen(, hpdata_age_heap_, hpdata_age_heap_t, hpdata_t, ph_link, hpdata_age_comp)
+ph_gen(, hpdata_age_heap, hpdata_t, age_link, hpdata_age_comp)
 
 void
 hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
diff --git a/test/unit/ph.c b/test/unit/ph.c
index 0f7c991e..28f5e488 100644
--- a/test/unit/ph.c
+++ b/test/unit/ph.c
@@ -3,11 +3,12 @@
 #include "jemalloc/internal/ph.h"
 
 typedef struct node_s node_t;
+ph_structs(heap, node_t);
 
 struct node_s {
 #define NODE_MAGIC 0x9823af7e
 	uint32_t magic;
-	phn(node_t) link;
+	heap_link_t link;
 	uint64_t key;
 };
 
@@ -36,8 +37,22 @@ node_cmp_magic(const node_t *a, const node_t *b) {
 	return node_cmp(a, b);
 }
 
-typedef ph(node_t) heap_t;
-ph_gen(static, heap_, heap_t, node_t, link, node_cmp_magic);
+ph_gen(static, heap, node_t, link, node_cmp_magic);
+
+static node_t *
+node_next_get(const node_t *node) {
+	return phn_next_get((node_t *)node, offsetof(node_t, link));
+}
+
+static node_t *
+node_prev_get(const node_t *node) {
+	return phn_prev_get((node_t *)node, offsetof(node_t, link));
+}
+
+static node_t *
+node_lchild_get(const node_t *node) {
+	return phn_lchild_get((node_t *)node, offsetof(node_t, link));
+}
 
 static void
 node_print(const node_t *node, unsigned depth) {
@@ -49,14 +64,14 @@ node_print(const node_t *node, unsigned depth) {
 	}
 	malloc_printf("%2"FMTu64"\n", node->key);
 
-	leftmost_child = phn_lchild_get(node_t, link, node);
+	leftmost_child = node_lchild_get(node);
 	if (leftmost_child == NULL) {
 		return;
 	}
 	node_print(leftmost_child, depth + 1);
 
-	for (sibling = phn_next_get(node_t, link, leftmost_child); sibling !=
-	    NULL; sibling = phn_next_get(node_t, link, sibling)) {
+	for (sibling = node_next_get(leftmost_child); sibling !=
+	    NULL; sibling = node_next_get(sibling)) {
 		node_print(sibling, depth + 1);
 	}
 }
@@ -66,16 +81,15 @@ heap_print(const heap_t *heap) {
 	node_t *auxelm;
 
 	malloc_printf("vvv heap %p vvv\n", heap);
-	if (heap->ph_root == NULL) {
+	if (heap->ph.root == NULL) {
 		goto label_return;
 	}
 
-	node_print(heap->ph_root, 0);
+	node_print(heap->ph.root, 0);
 
-	for (auxelm = phn_next_get(node_t, link, heap->ph_root); auxelm != NULL;
-	    auxelm = phn_next_get(node_t, link, auxelm)) {
-		expect_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t,
-		    link, auxelm)), auxelm,
+	for (auxelm = node_next_get(heap->ph.root); auxelm != NULL;
+	    auxelm = node_next_get(auxelm)) {
+		expect_ptr_eq(node_next_get(node_prev_get(auxelm)), auxelm,
 		    "auxelm's prev doesn't link to auxelm");
 		node_print(auxelm, 0);
 	}
@@ -94,18 +108,17 @@ node_validate(const node_t *node, const node_t *parent) {
 		    "Child is less than parent");
 	}
 
-	leftmost_child = phn_lchild_get(node_t, link, node);
+	leftmost_child = node_lchild_get(node);
 	if (leftmost_child == NULL) {
 		return nnodes;
 	}
-	expect_ptr_eq((void *)phn_prev_get(node_t, link, leftmost_child),
+	expect_ptr_eq(node_prev_get(leftmost_child),
 	    (void *)node, "Leftmost child does not link to node");
 	nnodes += node_validate(leftmost_child, node);
 
-	for (sibling = phn_next_get(node_t, link, leftmost_child); sibling !=
-	    NULL; sibling = phn_next_get(node_t, link, sibling)) {
-		expect_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t,
-		    link, sibling)), sibling,
+	for (sibling = node_next_get(leftmost_child); sibling !=
+	    NULL; sibling = node_next_get(sibling)) {
+		expect_ptr_eq(node_next_get(node_prev_get(sibling)), sibling,
 		    "sibling's prev doesn't link to sibling");
 		nnodes += node_validate(sibling, node);
 	}
@@ -117,16 +130,15 @@ heap_validate(const heap_t *heap) {
 	unsigned nnodes = 0;
 	node_t *auxelm;
 
-	if (heap->ph_root == NULL) {
+	if (heap->ph.root == NULL) {
 		goto label_return;
 	}
 
-	nnodes += node_validate(heap->ph_root, NULL);
+	nnodes += node_validate(heap->ph.root, NULL);
 
-	for (auxelm = phn_next_get(node_t, link, heap->ph_root); auxelm != NULL;
-	    auxelm = phn_next_get(node_t, link, auxelm)) {
-		expect_ptr_eq(phn_next_get(node_t, link, phn_prev_get(node_t,
-		    link, auxelm)), auxelm,
+	for (auxelm = node_next_get(heap->ph.root); auxelm != NULL;
+	    auxelm = node_next_get(auxelm)) {
+		expect_ptr_eq(node_next_get(node_prev_get(auxelm)), auxelm,
 		    "auxelm's prev doesn't link to auxelm");
 		nnodes += node_validate(auxelm, NULL);
 	}

From 0170dd198ae0ef92ae923b454c02259802b78b76 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 20 Jul 2021 08:46:19 -0700
Subject: [PATCH 2092/2608] Edata: Fix a couple typos.

Some readability-enhancing whitespace, and a spelling error.
---
 include/jemalloc/internal/edata.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index 3a04a9a3..da0774fb 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -64,7 +64,7 @@ typedef struct e_prof_info_s e_prof_info_t;
 
 /*
  * The information about a particular edata that lives in an emap.  Space is
- * more previous there (the information, plus the edata pointer, has to live in
+ * more precious there (the information, plus the edata pointer, has to live in
  * a 64-bit word if we want to enable a packed representation.
  *
  * There are two things that are special about the information here:
@@ -196,6 +196,7 @@ struct edata_s {
 	 * into pageslabs).  This tracks it.
 	 */
 	hpdata_t *e_ps;
+
 	/*
 	 * Serial number.  These are not necessarily unique; splitting an extent
 	 * results in two extents with the same serial number.

From dc0a4b8b2f2daf17a27b4b1fc869ef48d40d3ef2 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 20 Jul 2021 09:02:17 -0700
Subject: [PATCH 2093/2608] Edata: Pull out comparison fields into a summary.

For now, this is a no-op; eventually, it will allow some caching in the eset.
---
 include/jemalloc/internal/edata.h | 46 ++++++++++++++++---------------
 1 file changed, 24 insertions(+), 22 deletions(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index da0774fb..ff14982c 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -79,6 +79,12 @@ struct edata_map_info_s {
 	szind_t szind;
 };
 
+typedef struct edata_cmp_summary_s edata_cmp_summary_t;
+struct edata_cmp_summary_s {
+	uint64_t sn;
+	uintptr_t addr;
+};
+
 /* Extent (span of pages).  Use accessor functions for e_* fields. */
 typedef struct edata_s edata_t;
 ph_structs(edata_avail, edata_t);
@@ -610,14 +616,6 @@ edata_binit(edata_t *edata, void *addr, size_t bsize, uint64_t sn) {
 	edata_pai_set(edata, EXTENT_PAI_PAC);
 }
 
-static inline int
-edata_sn_comp(const edata_t *a, const edata_t *b) {
-	uint64_t a_sn = edata_sn_get(a);
-	uint64_t b_sn = edata_sn_get(b);
-
-	return (a_sn > b_sn) - (a_sn < b_sn);
-}
-
 static inline int
 edata_esn_comp(const edata_t *a, const edata_t *b) {
 	size_t a_esn = edata_esn_get(a);
@@ -626,14 +624,6 @@ edata_esn_comp(const edata_t *a, const edata_t *b) {
 	return (a_esn > b_esn) - (a_esn < b_esn);
 }
 
-static inline int
-edata_ad_comp(const edata_t *a, const edata_t *b) {
-	uintptr_t a_addr = (uintptr_t)edata_addr_get(a);
-	uintptr_t b_addr = (uintptr_t)edata_addr_get(b);
-
-	return (a_addr > b_addr) - (a_addr < b_addr);
-}
-
 static inline int
 edata_ead_comp(const edata_t *a, const edata_t *b) {
 	uintptr_t a_eaddr = (uintptr_t)a;
@@ -642,19 +632,31 @@ edata_ead_comp(const edata_t *a, const edata_t *b) {
 	return (a_eaddr > b_eaddr) - (a_eaddr < b_eaddr);
 }
 
-static inline int
-edata_snad_comp(const edata_t *a, const edata_t *b) {
-	int ret;
+static inline edata_cmp_summary_t
+edata_cmp_summary_get(const edata_t *edata) {
+	return (edata_cmp_summary_t){edata_sn_get(edata),
+		(uintptr_t)edata_addr_get(edata)};
+}
 
-	ret = edata_sn_comp(a, b);
+static inline int
+edata_cmp_summary_comp(edata_cmp_summary_t a, edata_cmp_summary_t b) {
+	int ret;
+	ret = (a.sn > b.sn) - (a.sn < b.sn);
 	if (ret != 0) {
 		return ret;
 	}
-
-	ret = edata_ad_comp(a, b);
+	ret = (a.addr > b.addr) - (a.addr < b.addr);
 	return ret;
 }
 
+static inline int
+edata_snad_comp(const edata_t *a, const edata_t *b) {
+	edata_cmp_summary_t a_cmp = edata_cmp_summary_get(a);
+	edata_cmp_summary_t b_cmp = edata_cmp_summary_get(b);
+
+	return edata_cmp_summary_comp(a_cmp, b_cmp);
+}
+
 static inline int
 edata_esnead_comp(const edata_t *a, const edata_t *b) {
 	int ret;

From 252e0942d0346f1cc700874b55d0c1fef95c40e7 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 20 Jul 2021 09:26:09 -0700
Subject: [PATCH 2094/2608] Eset: Pull per-pszind data into structs.

We currently have one for stats and one for the data.  The data struct is just a
wrapper around the edata_heap_t, but this will change shortly.
---
 include/jemalloc/internal/eset.h | 22 +++++++++---
 src/eset.c                       | 61 ++++++++++++++++++++------------
 2 files changed, 56 insertions(+), 27 deletions(-)

diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index ff5e57d1..708ef997 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -14,16 +14,28 @@
  * there are mutating operations.  One exception is the stats counters, which
  * may be read without any locking.
  */
+
+typedef struct eset_bin_s eset_bin_t;
+struct eset_bin_s {
+	edata_heap_t heap;
+};
+
+typedef struct eset_bin_stats_s eset_bin_stats_t;
+struct eset_bin_stats_s {
+	atomic_zu_t nextents;
+	atomic_zu_t nbytes;
+};
+
 typedef struct eset_s eset_t;
 struct eset_s {
-	/* Quantized per size class heaps of extents. */
-	edata_heap_t heaps[SC_NPSIZES + 1];
-	atomic_zu_t nextents[SC_NPSIZES + 1];
-	atomic_zu_t nbytes[SC_NPSIZES + 1];
-
 	/* Bitmap for which set bits correspond to non-empty heaps. */
 	fb_group_t bitmap[FB_NGROUPS(SC_NPSIZES + 1)];
 
+	/* Quantized per size class heaps of extents. */
+	eset_bin_t bins[SC_NPSIZES + 1];
+
+	eset_bin_stats_t bin_stats[SC_NPSIZES + 1];
+
 	/* LRU of all extents in heaps. */
 	edata_list_inactive_t lru;
 
diff --git a/src/eset.c b/src/eset.c
index 9183ac67..01af422c 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -5,14 +5,25 @@
 
 #define ESET_NPSIZES (SC_NPSIZES + 1)
 
+static void
+eset_bin_init(eset_bin_t *bin) {
+	edata_heap_new(&bin->heap);
+}
+
+static void
+eset_bin_stats_init(eset_bin_stats_t *bin_stats) {
+	atomic_store_zu(&bin_stats->nextents, 0, ATOMIC_RELAXED);
+	atomic_store_zu(&bin_stats->nbytes, 0, ATOMIC_RELAXED);
+}
+
 void
 eset_init(eset_t *eset, extent_state_t state) {
 	for (unsigned i = 0; i < ESET_NPSIZES; i++) {
-		edata_heap_new(&eset->heaps[i]);
+		eset_bin_init(&eset->bins[i]);
+		eset_bin_stats_init(&eset->bin_stats[i]);
 	}
 	fb_init(eset->bitmap, ESET_NPSIZES);
 	edata_list_inactive_init(&eset->lru);
-	atomic_store_zu(&eset->npages, 0, ATOMIC_RELAXED);
 	eset->state = state;
 }
 
@@ -23,28 +34,34 @@ eset_npages_get(eset_t *eset) {
 
 size_t
 eset_nextents_get(eset_t *eset, pszind_t pind) {
-	return atomic_load_zu(&eset->nextents[pind], ATOMIC_RELAXED);
+	return atomic_load_zu(&eset->bin_stats[pind].nextents, ATOMIC_RELAXED);
 }
 
 size_t
 eset_nbytes_get(eset_t *eset, pszind_t pind) {
-	return atomic_load_zu(&eset->nbytes[pind], ATOMIC_RELAXED);
+	return atomic_load_zu(&eset->bin_stats[pind].nbytes, ATOMIC_RELAXED);
 }
 
 static void
 eset_stats_add(eset_t *eset, pszind_t pind, size_t sz) {
-	size_t cur = atomic_load_zu(&eset->nextents[pind], ATOMIC_RELAXED);
-	atomic_store_zu(&eset->nextents[pind], cur + 1, ATOMIC_RELAXED);
-	cur = atomic_load_zu(&eset->nbytes[pind], ATOMIC_RELAXED);
-	atomic_store_zu(&eset->nbytes[pind], cur + sz, ATOMIC_RELAXED);
+	size_t cur = atomic_load_zu(&eset->bin_stats[pind].nextents,
+	    ATOMIC_RELAXED);
+	atomic_store_zu(&eset->bin_stats[pind].nextents, cur + 1,
+	    ATOMIC_RELAXED);
+	cur = atomic_load_zu(&eset->bin_stats[pind].nbytes, ATOMIC_RELAXED);
+	atomic_store_zu(&eset->bin_stats[pind].nbytes, cur + sz,
+	    ATOMIC_RELAXED);
 }
 
 static void
 eset_stats_sub(eset_t *eset, pszind_t pind, size_t sz) {
-	size_t cur = atomic_load_zu(&eset->nextents[pind], ATOMIC_RELAXED);
-	atomic_store_zu(&eset->nextents[pind], cur - 1, ATOMIC_RELAXED);
-	cur = atomic_load_zu(&eset->nbytes[pind], ATOMIC_RELAXED);
-	atomic_store_zu(&eset->nbytes[pind], cur - sz, ATOMIC_RELAXED);
+	size_t cur = atomic_load_zu(&eset->bin_stats[pind].nextents,
+	    ATOMIC_RELAXED);
+	atomic_store_zu(&eset->bin_stats[pind].nextents, cur - 1,
+	    ATOMIC_RELAXED);
+	cur = atomic_load_zu(&eset->bin_stats[pind].nbytes, ATOMIC_RELAXED);
+	atomic_store_zu(&eset->bin_stats[pind].nbytes, cur - sz,
+	    ATOMIC_RELAXED);
 }
 
 void
@@ -54,10 +71,10 @@ eset_insert(eset_t *eset, edata_t *edata) {
 	size_t size = edata_size_get(edata);
 	size_t psz = sz_psz_quantize_floor(size);
 	pszind_t pind = sz_psz2ind(psz);
-	if (edata_heap_empty(&eset->heaps[pind])) {
+	if (edata_heap_empty(&eset->bins[pind].heap)) {
 		fb_set(eset->bitmap, ESET_NPSIZES, (size_t)pind);
 	}
-	edata_heap_insert(&eset->heaps[pind], edata);
+	edata_heap_insert(&eset->bins[pind].heap, edata);
 
 	if (config_stats) {
 		eset_stats_add(eset, pind, size);
@@ -84,13 +101,13 @@ eset_remove(eset_t *eset, edata_t *edata) {
 	size_t size = edata_size_get(edata);
 	size_t psz = sz_psz_quantize_floor(size);
 	pszind_t pind = sz_psz2ind(psz);
-	edata_heap_remove(&eset->heaps[pind], edata);
+	edata_heap_remove(&eset->bins[pind].heap, edata);
 
 	if (config_stats) {
 		eset_stats_sub(eset, pind, size);
 	}
 
-	if (edata_heap_empty(&eset->heaps[pind])) {
+	if (edata_heap_empty(&eset->bins[pind].heap)) {
 		fb_unset(eset->bitmap, ESET_NPSIZES, (size_t)pind);
 	}
 	edata_list_inactive_remove(&eset->lru, edata);
@@ -125,8 +142,8 @@ eset_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
 	    i < pind_max;
 	    i = (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)i + 1)) {
 		assert(i < SC_NPSIZES);
-		assert(!edata_heap_empty(&eset->heaps[i]));
-		edata_t *edata = edata_heap_first(&eset->heaps[i]);
+		assert(!edata_heap_empty(&eset->bins[i].heap));
+		edata_t *edata = edata_heap_first(&eset->bins[i].heap);
 		uintptr_t base = (uintptr_t)edata_base_get(edata);
 		size_t candidate_size = edata_size_get(edata);
 		assert(candidate_size >= min_size);
@@ -165,16 +182,16 @@ eset_first_fit(eset_t *eset, size_t size, bool exact_only,
 	pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(size));
 
 	if (exact_only) {
-		return edata_heap_empty(&eset->heaps[pind]) ? NULL :
-		    edata_heap_first(&eset->heaps[pind]);
+		return edata_heap_empty(&eset->bins[pind].heap) ? NULL :
+		    edata_heap_first(&eset->bins[pind].heap);
 	}
 
 	for (pszind_t i =
 	    (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)pind);
 	    i < ESET_NPSIZES;
 	    i = (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)i + 1)) {
-		assert(!edata_heap_empty(&eset->heaps[i]));
-		edata_t *edata = edata_heap_first(&eset->heaps[i]);
+		assert(!edata_heap_empty(&eset->bins[i].heap));
+		edata_t *edata = edata_heap_first(&eset->bins[i].heap);
 		assert(edata_size_get(edata) >= size);
 		if (lg_max_fit == SC_PTR_BITS) {
 			/*

From dcb7b83facf4f7641cefc0fc7c11c3d88310dae0 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Tue, 20 Jul 2021 10:20:44 -0700
Subject: [PATCH 2095/2608] Eset: Cache summary information for heap edatas.

This lets us do a single array scan to find first fits, instead of taking a
cache miss per examined size class.
---
 include/jemalloc/internal/eset.h |  8 +++++
 src/eset.c                       | 60 +++++++++++++++++++++++++++-----
 2 files changed, 59 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index 708ef997..4f689b47 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -18,6 +18,14 @@
 typedef struct eset_bin_s eset_bin_t;
 struct eset_bin_s {
 	edata_heap_t heap;
+	/*
+	 * We do first-fit across multiple size classes.  If we compared against
+	 * the min element in each heap directly, we'd take a cache miss per
+	 * extent we looked at.  If we co-locate the edata summaries, we only
+	 * take a miss on the edata we're actually going to return (which is
+	 * inevitable anyways).
+	 */
+	edata_cmp_summary_t heap_min;
 };
 
 typedef struct eset_bin_stats_s eset_bin_stats_t;
diff --git a/src/eset.c b/src/eset.c
index 01af422c..6f8f335e 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -8,6 +8,10 @@
 static void
 eset_bin_init(eset_bin_t *bin) {
 	edata_heap_new(&bin->heap);
+	/*
+	 * heap_min doesn't need initialization; it gets filled in when the bin
+	 * goes from non-empty to empty.
+	 */
 }
 
 static void
@@ -71,8 +75,21 @@ eset_insert(eset_t *eset, edata_t *edata) {
 	size_t size = edata_size_get(edata);
 	size_t psz = sz_psz_quantize_floor(size);
 	pszind_t pind = sz_psz2ind(psz);
+
+	edata_cmp_summary_t edata_cmp_summary = edata_cmp_summary_get(edata);
 	if (edata_heap_empty(&eset->bins[pind].heap)) {
 		fb_set(eset->bitmap, ESET_NPSIZES, (size_t)pind);
+		/* Only element is automatically the min element. */
+		eset->bins[pind].heap_min = edata_cmp_summary;
+	} else {
+		/*
+		 * There's already a min element; update the summary if we're
+		 * about to insert a lower one.
+		 */
+		if (edata_cmp_summary_comp(edata_cmp_summary,
+		    eset->bins[pind].heap_min) < 0) {
+			eset->bins[pind].heap_min = edata_cmp_summary;
+		}
 	}
 	edata_heap_insert(&eset->bins[pind].heap, edata);
 
@@ -101,14 +118,29 @@ eset_remove(eset_t *eset, edata_t *edata) {
 	size_t size = edata_size_get(edata);
 	size_t psz = sz_psz_quantize_floor(size);
 	pszind_t pind = sz_psz2ind(psz);
-	edata_heap_remove(&eset->bins[pind].heap, edata);
-
 	if (config_stats) {
 		eset_stats_sub(eset, pind, size);
 	}
 
+	edata_cmp_summary_t edata_cmp_summary = edata_cmp_summary_get(edata);
+	edata_heap_remove(&eset->bins[pind].heap, edata);
 	if (edata_heap_empty(&eset->bins[pind].heap)) {
 		fb_unset(eset->bitmap, ESET_NPSIZES, (size_t)pind);
+	} else {
+		/*
+		 * This is a little weird; we compare if the summaries are
+		 * equal, rather than if the edata we removed was the heap
+		 * minimum.  The reason why is that getting the heap minimum
+		 * can cause a pairing heap merge operation.  We can avoid this
+		 * if we only update the min if it's changed, in which case the
+		 * summaries of the removed element and the min element should
+		 * compare equal.
+		 */
+		if (edata_cmp_summary_comp(edata_cmp_summary,
+		    eset->bins[pind].heap_min) == 0) {
+			eset->bins[pind].heap_min = edata_cmp_summary_get(
+			    edata_heap_first(&eset->bins[pind].heap));
+		}
 	}
 	edata_list_inactive_remove(&eset->lru, edata);
 	size_t npages = size >> LG_PAGE;
@@ -116,10 +148,6 @@ eset_remove(eset_t *eset, edata_t *edata) {
 	 * As in eset_insert, we hold eset->mtx and so don't need atomic
 	 * operations for updating eset->npages.
 	 */
-	/*
-	 * This class is not thread-safe in general; we rely on external
-	 * synchronization for all mutating operations.
-	 */
 	size_t cur_extents_npages =
 	    atomic_load_zu(&eset->npages, ATOMIC_RELAXED);
 	assert(cur_extents_npages >= npages);
@@ -178,6 +206,7 @@ static edata_t *
 eset_first_fit(eset_t *eset, size_t size, bool exact_only,
     unsigned lg_max_fit) {
 	edata_t *ret = NULL;
+	edata_cmp_summary_t ret_summ JEMALLOC_CC_SILENCE_INIT({0});
 
 	pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(size));
 
@@ -191,8 +220,6 @@ eset_first_fit(eset_t *eset, size_t size, bool exact_only,
 	    i < ESET_NPSIZES;
 	    i = (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)i + 1)) {
 		assert(!edata_heap_empty(&eset->bins[i].heap));
-		edata_t *edata = edata_heap_first(&eset->bins[i].heap);
-		assert(edata_size_get(edata) >= size);
 		if (lg_max_fit == SC_PTR_BITS) {
 			/*
 			 * We'll shift by this below, and shifting out all the
@@ -204,8 +231,23 @@ eset_first_fit(eset_t *eset, size_t size, bool exact_only,
 		if ((sz_pind2sz(i) >> lg_max_fit) > size) {
 			break;
 		}
-		if (ret == NULL || edata_snad_comp(edata, ret) < 0) {
+		if (ret == NULL || edata_cmp_summary_comp(
+		    eset->bins[i].heap_min, ret_summ) < 0) {
+			/*
+			 * We grab the edata as early as possible, even though
+			 * we might change it later.  Practically, a large
+			 * portion of eset_fit calls succeed at the first valid
+			 * index, so this doesn't cost much, and we get the
+			 * effect of prefetching the edata as early as possible.
+			 */
+			edata_t *edata = edata_heap_first(&eset->bins[i].heap);
+			assert(edata_size_get(edata) >= size);
+			assert(ret == NULL || edata_snad_comp(edata, ret) < 0);
+			assert(ret == NULL || edata_cmp_summary_comp(
+			    eset->bins[i].heap_min,
+			    edata_cmp_summary_get(edata)) == 0);
 			ret = edata;
+			ret_summ = eset->bins[i].heap_min;
 		}
 		if (i == SC_NPSIZES) {
 			break;

From 40d53e007c054f37a5666b2550304adc65c74c78 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 26 Jul 2021 11:52:42 -0700
Subject: [PATCH 2096/2608] ph: Add aux-list counting and pre-merging.

---
 include/jemalloc/internal/ph.h | 127 ++++++++++++++++++++++++---------
 1 file changed, 92 insertions(+), 35 deletions(-)

diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index beb50d54..3f7d759d 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -13,6 +13,40 @@
  * http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.106.2988&rep=rep1&type=pdf
  *
  *******************************************************************************
+ *
+ * We include a non-obvious optimization:
+ * - First, we introduce a new pop-and-link operation; pop the two most
+ *   recently-inserted items off the aux-list, link them, and push the resulting
+ *   heap.
+ * - We maintain a count of the number of insertions since the last time we
+ *   merged the aux-list (i.e. via first() or remove_first()).  After N inserts,
+ *   we do ffs(N) pop-and-link operations.
+ *
+ * One way to think of this is that we're progressively building up a tree in
+ * the aux-list, rather than a linked-list (think of the series of merges that
+ * will be performed as the aux-count grows).
+ *
+ * There's a couple reasons we benefit from this:
+ * - Ordinarily, after N insertions, the aux-list is of size N.  With our
+ *   strategy, it's of size O(log(N)).  So we decrease the worst-case time of
+ *   first() calls, and reduce the average cost of remove_min calls.  Since
+ *   these almost always occur while holding a lock, we practically reduce the
+ *   frequency of unusually long hold times.
+ * - This moves the bulk of the work of merging the aux-list onto the threads
+ *   that are inserting into the heap.  In some common scenarios, insertions
+ *   happen in bulk, from a single thread (think tcache flushing; we potentially
+ *   move many slabs from slabs_full to slabs_nonfull).  All the nodes in this
+ *   case are in the inserting threads cache, and linking them is very cheap
+ *   (cache misses dominate linking cost).  Without this optimization, linking
+ *   happens on the next call to remove_first.  Since that remove_first call
+ *   likely happens on a different thread (or at least, after the cache has
+ *   gotten cold if done on the same thread), deferring linking trades cheap
+ *   link operations now for expensive ones later.
+ *
+ * The ffs trick keeps amortized insert cost at constant time.  Similar
+ * strategies based on periodically sorting the list after a batch of operations
+ * perform worse than this in practice, even with various fancy tricks; they
+ * all took amortized complexity of an insert from O(1) to O(log(n)).
  */
 
 typedef int (*ph_cmp_t)(void *, void *);
@@ -28,6 +62,13 @@ struct phn_link_s {
 typedef struct ph_s ph_t;
 struct ph_s {
 	void *root;
+	/*
+	 * Inserts done since the last aux-list merge.  This is not necessarily
+	 * the size of the aux-list, since it's possible that removals have
+	 * happened since, and we don't track whether or not those removals are
+	 * from the aux list.
+	 */
+	size_t auxcount;
 };
 
 JEMALLOC_ALWAYS_INLINE phn_link_t *
@@ -181,6 +222,7 @@ phn_merge_siblings(void *phn, size_t offset, ph_cmp_t cmp) {
 
 JEMALLOC_ALWAYS_INLINE void
 ph_merge_aux(ph_t *ph, size_t offset, ph_cmp_t cmp) {
+	ph->auxcount = 0;
 	void *phn = phn_next_get(ph->root, offset);
 	if (phn != NULL) {
 		phn_prev_set(ph->root, NULL, offset);
@@ -207,6 +249,7 @@ ph_merge_children(void *phn, size_t offset, ph_cmp_t cmp) {
 JEMALLOC_ALWAYS_INLINE void
 ph_new(ph_t *ph) {
 	ph->root = NULL;
+	ph->auxcount = 0;
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -235,8 +278,35 @@ ph_any(ph_t *ph, size_t offset) {
 	return ph->root;
 }
 
+/* Returns true if we should stop trying to merge. */
+JEMALLOC_ALWAYS_INLINE bool
+ph_try_aux_merge_pair(ph_t *ph, size_t offset, ph_cmp_t cmp) {
+	assert(ph->root != NULL);
+	void *phn0 = phn_next_get(ph->root, offset);
+	if (phn0 == NULL) {
+		return true;
+	}
+	void *phn1 = phn_next_get(phn0, offset);
+	if (phn1 == NULL) {
+		return true;
+	}
+	void *next_phn1 = phn_next_get(phn1, offset);
+	phn_next_set(phn0, NULL, offset);
+	phn_prev_set(phn0, NULL, offset);
+	phn_next_set(phn1, NULL, offset);
+	phn_prev_set(phn1, NULL, offset);
+	phn0 = phn_merge(phn0, phn1, offset, cmp);
+	phn_next_set(phn0, next_phn1, offset);
+	if (next_phn1 != NULL) {
+		phn_prev_set(next_phn1, phn0, offset);
+	}
+	phn_next_set(ph->root, phn0, offset);
+	phn_prev_set(phn0, ph->root, offset);
+	return next_phn1 == NULL;
+}
+
 JEMALLOC_ALWAYS_INLINE void
-ph_insert(ph_t *ph, void *phn, size_t offset) {
+ph_insert(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) {
 	phn_link_init(phn, offset);
 
 	/*
@@ -249,6 +319,7 @@ ph_insert(ph_t *ph, void *phn, size_t offset) {
 	if (ph->root == NULL) {
 		ph->root = phn;
 	} else {
+		ph->auxcount++;
 		phn_next_set(phn, phn_next_get(ph->root, offset), offset);
 		if (phn_next_get(ph->root, offset) != NULL) {
 			phn_prev_set(phn_next_get(ph->root, offset), phn,
@@ -257,6 +328,13 @@ ph_insert(ph_t *ph, void *phn, size_t offset) {
 		phn_prev_set(phn, ph->root, offset);
 		phn_next_set(ph->root, phn, offset);
 	}
+	if (ph->auxcount > 1) {
+		unsigned nmerges = ffs_zu(ph->auxcount - 1);
+		bool done = false;
+		for (unsigned i = 0; i < nmerges && !done; i++) {
+			done = ph_try_aux_merge_pair(ph, offset, cmp);
+		}
+	}
 }
 
 JEMALLOC_ALWAYS_INLINE void *
@@ -274,31 +352,6 @@ ph_remove_first(ph_t *ph, size_t offset, ph_cmp_t cmp) {
 
 }
 
-JEMALLOC_ALWAYS_INLINE void *
-ph_remove_any(ph_t *ph, size_t offset, ph_cmp_t cmp) {
-	/*
-	 * Remove the most recently inserted aux list element, or the root if
-	 * the aux list is empty.  This has the effect of behaving as a LIFO
-	 * (and insertion/removal is therefore constant-time) if
-	 * a_prefix##[remove_]first() are never called.
-	 */
-	if (ph->root == NULL) {
-		return NULL;
-	}
-	void *ret = phn_next_get(ph->root, offset);
-	if (ret != NULL) {
-		void *aux = phn_next_get(ret, offset);
-		phn_next_set(ph->root, aux, offset);
-		if (aux != NULL) {
-			phn_prev_set(aux, ph->root, offset);
-		}
-		return ret;
-	}
-	ret = ph->root;
-	ph->root = ph_merge_children(ph->root, offset, cmp);
-	return ret;
-}
-
 JEMALLOC_ALWAYS_INLINE void
 ph_remove(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) {
 	void *replace;
@@ -392,8 +445,8 @@ a_attr a_type *a_prefix##_first(a_prefix##_t *ph);			\
 a_attr a_type *a_prefix##_any(a_prefix##_t *ph);			\
 a_attr void a_prefix##_insert(a_prefix##_t *ph, a_type *phn);		\
 a_attr a_type *a_prefix##_remove_first(a_prefix##_t *ph);		\
-a_attr a_type *a_prefix##_remove_any(a_prefix##_t *ph);			\
-a_attr void a_prefix##_remove(a_prefix##_t *ph, a_type *phn);
+a_attr void a_prefix##_remove(a_prefix##_t *ph, a_type *phn);		\
+a_attr a_type *a_prefix##_remove_any(a_prefix##_t *ph);
 
 /* The ph_gen() macro generates a type-specific pairing heap implementation. */
 #define ph_gen(a_attr, a_prefix, a_type, a_field, a_cmp)		\
@@ -425,7 +478,8 @@ a_prefix##_any(a_prefix##_t *ph) {					\
 									\
 a_attr void								\
 a_prefix##_insert(a_prefix##_t *ph, a_type *phn) {			\
-	ph_insert(&ph->ph, phn, offsetof(a_type, a_field));		\
+	ph_insert(&ph->ph, phn, offsetof(a_type, a_field),		\
+	    a_prefix##_ph_cmp);						\
 }									\
 									\
 a_attr a_type *								\
@@ -434,16 +488,19 @@ a_prefix##_remove_first(a_prefix##_t *ph) {				\
 	    a_prefix##_ph_cmp);						\
 }									\
 									\
-a_attr a_type *								\
-a_prefix##_remove_any(a_prefix##_t *ph) {				\
-	return ph_remove_any(&ph->ph, offsetof(a_type, a_field),	\
-	    a_prefix##_ph_cmp);						\
-}									\
-									\
 a_attr void								\
 a_prefix##_remove(a_prefix##_t *ph, a_type *phn) {			\
 	ph_remove(&ph->ph, phn, offsetof(a_type, a_field),		\
 	    a_prefix##_ph_cmp);						\
+}									\
+									\
+a_attr a_type *								\
+a_prefix##_remove_any(a_prefix##_t *ph) {				\
+	a_type *ret = a_prefix##_any(ph);				\
+	if (ret != NULL) {						\
+		a_prefix##_remove(ph, ret);				\
+	}								\
+	return ret;							\
 }
 
 #endif /* JEMALLOC_INTERNAL_PH_H */

From dae24589bc4e4bcb2a19844e3c5753b8c50d714a Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Mon, 26 Jul 2021 13:51:38 -0700
Subject: [PATCH 2097/2608] PH: Insert-below-min fast-path.

---
 include/jemalloc/internal/ph.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index 3f7d759d..5f091c5f 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -319,6 +319,20 @@ ph_insert(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) {
 	if (ph->root == NULL) {
 		ph->root = phn;
 	} else {
+		/*
+		 * As a special case, check to see if we can replace the root.
+		 * This is practically common in some important cases, and lets
+		 * us defer some insertions (hopefully, until the point where
+		 * some of the items in the aux list have been removed, savings
+		 * us from linking them at all).
+		 */
+		if (cmp(phn, ph->root) < 0) {
+			phn_lchild_set(phn, ph->root, offset);
+			phn_prev_set(ph->root, phn, offset);
+			ph->root = phn;
+			ph->auxcount = 0;
+			return;
+		}
 		ph->auxcount++;
 		phn_next_set(phn, phn_next_get(ph->root, offset), offset);
 		if (phn_next_get(ph->root, offset) != NULL) {

From 6f41ba55ee85ce505d61713650f49f8bbb5bee6b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Wed, 4 Aug 2021 12:53:39 -0700
Subject: [PATCH 2098/2608] Mutex: Make spin count configurable.

Don't document it since we don't want to support this as a "real" setting, but
it's handy for testing.
---
 include/jemalloc/internal/mutex.h | 10 +++-------
 src/ctl.c                         |  3 +++
 src/jemalloc.c                    |  3 +++
 src/mutex.c                       | 10 ++++++++--
 src/stats.c                       |  1 +
 5 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index f5b1163a..63a0b1b3 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -6,6 +6,8 @@
 #include "jemalloc/internal/tsd.h"
 #include "jemalloc/internal/witness.h"
 
+extern int64_t opt_mutex_max_spin;
+
 typedef enum {
 	/* Can only acquire one mutex of a given witness rank at a time. */
 	malloc_mutex_rank_exclusive,
@@ -43,7 +45,7 @@ struct malloc_mutex_s {
 #else
 			pthread_mutex_t		lock;
 #endif
-			/* 
+			/*
 			 * Hint flag to avoid exclusive cache line contention
 			 * during spin waiting
 			 */
@@ -67,12 +69,6 @@ struct malloc_mutex_s {
 #endif
 };
 
-/*
- * Based on benchmark results, a fixed spin with this amount of retries works
- * well for our critical sections.
- */
-#define MALLOC_MUTEX_MAX_SPIN 250
-
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 #    define MALLOC_MUTEX_LOCK(m)    AcquireSRWLockExclusive(&(m)->lock)
diff --git a/src/ctl.c b/src/ctl.c
index b3e62dfa..3ed00072 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -110,6 +110,7 @@ CTL_PROTO(opt_narenas)
 CTL_PROTO(opt_percpu_arena)
 CTL_PROTO(opt_oversize_threshold)
 CTL_PROTO(opt_background_thread)
+CTL_PROTO(opt_mutex_max_spin)
 CTL_PROTO(opt_max_background_threads)
 CTL_PROTO(opt_background_thread_hpa_interval_max_ms)
 CTL_PROTO(opt_dirty_decay_ms)
@@ -421,6 +422,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("narenas"),	CTL(opt_narenas)},
 	{NAME("percpu_arena"),	CTL(opt_percpu_arena)},
 	{NAME("oversize_threshold"),	CTL(opt_oversize_threshold)},
+	{NAME("mutex_max_spin"),	CTL(opt_mutex_max_spin)},
 	{NAME("background_thread"),	CTL(opt_background_thread)},
 	{NAME("max_background_threads"),	CTL(opt_max_background_threads)},
 	{NAME("background_thread_hpa_interval_max_ms"),
@@ -2138,6 +2140,7 @@ CTL_RO_NL_GEN(opt_dss, opt_dss, const char *)
 CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned)
 CTL_RO_NL_GEN(opt_percpu_arena, percpu_arena_mode_names[opt_percpu_arena],
     const char *)
+CTL_RO_NL_GEN(opt_mutex_max_spin, opt_mutex_max_spin, int64_t)
 CTL_RO_NL_GEN(opt_oversize_threshold, opt_oversize_threshold, size_t)
 CTL_RO_NL_GEN(opt_background_thread, opt_background_thread, bool)
 CTL_RO_NL_GEN(opt_max_background_threads, opt_max_background_threads, size_t)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 8d57180e..d5e886e7 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1256,6 +1256,9 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				} while (vlen_left > 0);
 				CONF_CONTINUE;
 			}
+			CONF_HANDLE_INT64_T(opt_mutex_max_spin,
+			    "mutex_max_spin", -1, INT64_MAX, CONF_CHECK_MIN,
+			    CONF_DONT_CHECK_MAX, false);
 			CONF_HANDLE_SSIZE_T(opt_dirty_decay_ms,
 			    "dirty_decay_ms", -1, NSTIME_SEC_MAX * KQU(1000) <
 			    QU(SSIZE_MAX) ? NSTIME_SEC_MAX * KQU(1000) :
diff --git a/src/mutex.c b/src/mutex.c
index 83d9ce76..79b8f275 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -9,6 +9,12 @@
 #define _CRT_SPINCOUNT 4000
 #endif
 
+/*
+ * Based on benchmark results, a fixed spin with this amount of retries works
+ * well for our critical sections.
+ */
+int64_t opt_mutex_max_spin = 250;
+
 /******************************************************************************/
 /* Data. */
 
@@ -51,7 +57,7 @@ malloc_mutex_lock_slow(malloc_mutex_t *mutex) {
 		goto label_spin_done;
 	}
 
-	int cnt = 0, max_cnt = MALLOC_MUTEX_MAX_SPIN;
+	int cnt = 0;
 	do {
 		spin_cpu_spinwait();
 		if (!atomic_load_b(&mutex->locked, ATOMIC_RELAXED)
@@ -59,7 +65,7 @@ malloc_mutex_lock_slow(malloc_mutex_t *mutex) {
 			data->n_spin_acquired++;
 			return;
 		}
-	} while (cnt++ < max_cnt);
+	} while (cnt++ < opt_mutex_max_spin || opt_mutex_max_spin == -1);
 
 	if (!config_stats) {
 		/* Only spin is useful when stats is off. */
diff --git a/src/stats.c b/src/stats.c
index 16aa3fd4..3a2806ed 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1496,6 +1496,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_SIZE_T("hpa_sec_bytes_after_flush")
 	OPT_WRITE_SIZE_T("hpa_sec_batch_fill_extra")
 	OPT_WRITE_CHAR_P("metadata_thp")
+	OPT_WRITE_INT64("mutex_max_spin")
 	OPT_WRITE_BOOL_MUTABLE("background_thread", "background_thread")
 	OPT_WRITE_SSIZE_T("background_thread_hpa_interval_max_ms")
 	OPT_WRITE_SSIZE_T_MUTABLE("dirty_decay_ms", "arenas.dirty_decay_ms")

From 27f71242b74ea402db45c1e6b3b79708b78762d4 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@fb.com>
Date: Thu, 5 Aug 2021 10:27:25 -0700
Subject: [PATCH 2099/2608] Mutex: Tweak internal spin count.

The recent pairing heap optimizations flattened the lock hold time profile.
This was a win for raw cycle counts, but ended up causing us to "just miss"
acquiring the mutex before sleeping more often.  Bump those counts.
---
 src/mutex.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mutex.c b/src/mutex.c
index 79b8f275..0b3547a8 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -13,7 +13,7 @@
  * Based on benchmark results, a fixed spin with this amount of retries works
  * well for our critical sections.
  */
-int64_t opt_mutex_max_spin = 250;
+int64_t opt_mutex_max_spin = 600;
 
 /******************************************************************************/
 /* Data. */

From f58064b9321b30bdf9b31715acbe523e4a964adf Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Thu, 5 Aug 2021 14:28:32 -0700
Subject: [PATCH 2100/2608] Verify that HPA is used before calling its
 functions

This change eliminates the possibility of PA calling functions of uninitialized
HPA.
---
 src/hpa.c | 24 ++++++++++++++++++++++++
 src/pa.c  | 10 +++++++---
 2 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/src/hpa.c b/src/hpa.c
index 6441b4ea..6b7517d8 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -50,6 +50,11 @@ hpa_supported() {
 	return true;
 }
 
+static void
+hpa_do_consistency_checks(hpa_shard_t *shard) {
+	assert(shard->base != NULL);
+}
+
 bool
 hpa_central_init(hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks) {
 	/* malloc_conf processing should have filtered out these cases. */
@@ -214,6 +219,8 @@ hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
 	shard->pai.dalloc = &hpa_dalloc;
 	shard->pai.dalloc_batch = &hpa_dalloc_batch;
 
+	hpa_do_consistency_checks(shard);
+
 	return false;
 }
 
@@ -242,6 +249,8 @@ hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src) {
 void
 hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard,
     hpa_shard_stats_t *dst) {
+	hpa_do_consistency_checks(shard);
+
 	malloc_mutex_lock(tsdn, &shard->grow_mtx);
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	psset_stats_accum(&dst->psset_stats, &shard->psset.stats);
@@ -843,6 +852,8 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 
 void
 hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard) {
+	hpa_do_consistency_checks(shard);
+
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	edata_cache_fast_disable(tsdn, &shard->ecf);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
@@ -868,6 +879,7 @@ hpa_assert_empty(tsdn_t *tsdn, hpa_shard_t *shard, psset_t *psset) {
 
 void
 hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
+	hpa_do_consistency_checks(shard);
 	/*
 	 * By the time we're here, the arena code should have dalloc'd all the
 	 * active extents, which means we should have eventually evicted
@@ -891,6 +903,8 @@ hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
 void
 hpa_shard_set_deferral_allowed(tsdn_t *tsdn, hpa_shard_t *shard,
     bool deferral_allowed) {
+	hpa_do_consistency_checks(shard);
+
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	bool deferral_previously_allowed = shard->opts.deferral_allowed;
 	shard->opts.deferral_allowed = deferral_allowed;
@@ -903,6 +917,8 @@ hpa_shard_set_deferral_allowed(tsdn_t *tsdn, hpa_shard_t *shard,
 
 void
 hpa_shard_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
+	hpa_do_consistency_checks(shard);
+
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	hpa_shard_maybe_do_deferred_work(tsdn, shard, /* forced */ true);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
@@ -910,22 +926,30 @@ hpa_shard_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
 
 void
 hpa_shard_prefork3(tsdn_t *tsdn, hpa_shard_t *shard) {
+	hpa_do_consistency_checks(shard);
+
 	malloc_mutex_prefork(tsdn, &shard->grow_mtx);
 }
 
 void
 hpa_shard_prefork4(tsdn_t *tsdn, hpa_shard_t *shard) {
+	hpa_do_consistency_checks(shard);
+
 	malloc_mutex_prefork(tsdn, &shard->mtx);
 }
 
 void
 hpa_shard_postfork_parent(tsdn_t *tsdn, hpa_shard_t *shard) {
+	hpa_do_consistency_checks(shard);
+
 	malloc_mutex_postfork_parent(tsdn, &shard->grow_mtx);
 	malloc_mutex_postfork_parent(tsdn, &shard->mtx);
 }
 
 void
 hpa_shard_postfork_child(tsdn_t *tsdn, hpa_shard_t *shard) {
+	hpa_do_consistency_checks(shard);
+
 	malloc_mutex_postfork_child(tsdn, &shard->grow_mtx);
 	malloc_mutex_postfork_child(tsdn, &shard->mtx);
 }
diff --git a/src/pa.c b/src/pa.c
index aebb8e92..93da02e0 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -226,11 +226,15 @@ pa_decay_ms_get(pa_shard_t *shard, extent_state_t state) {
 void
 pa_shard_set_deferral_allowed(tsdn_t *tsdn, pa_shard_t *shard,
     bool deferral_allowed) {
-	hpa_shard_set_deferral_allowed(tsdn, &shard->hpa_shard,
-	    deferral_allowed);
+	if (atomic_load_b(&shard->use_hpa, ATOMIC_RELAXED)) {
+		hpa_shard_set_deferral_allowed(tsdn, &shard->hpa_shard,
+		    deferral_allowed);
+	}
 }
 
 void
 pa_shard_do_deferred_work(tsdn_t *tsdn, pa_shard_t *shard) {
-	hpa_shard_do_deferred_work(tsdn, &shard->hpa_shard);
+	if (atomic_load_b(&shard->use_hpa, ATOMIC_RELAXED)) {
+		hpa_shard_do_deferred_work(tsdn, &shard->hpa_shard);
+	}
 }

From 6a0160071241bce956978550a60208a37bc971c1 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 6 Aug 2021 17:15:56 -0700
Subject: [PATCH 2101/2608] Add Cirrus CI testing matrix

Contains 16 testing configs -- a mix of debug, prof, -m32
and a few uncommon options.
---
 .cirrus.yml | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/.cirrus.yml b/.cirrus.yml
index 30fe830b..4cca64ba 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -3,6 +3,30 @@ env:
   ARCH: amd64
 
 task:
+  matrix:
+      env:
+        DEBUG_CONFIG: --enable-debug
+      env:
+        DEBUG_CONFIG: --disable-debug
+  matrix:
+    - env:
+        PROF_CONFIG: --enable-prof
+    - env:
+        PROF_CONFIG: --disable-prof
+  matrix:
+    - name: 64-bit
+      env:
+        CC:
+        CXX:
+    - name: 32-bit
+      env:
+        CC: cc -m32
+        CXX: c++ -m32
+  matrix:
+    - env:
+        UNCOMMON_CONFIG:
+    - env:
+        UNCOMMON_CONFIG: --with-lg-page=16 --with-malloc-conf=tcache:false
   freebsd_instance:
     matrix:
       image: freebsd-12-2-release-amd64
@@ -12,11 +36,10 @@ task:
     - pkg install -y autoconf gmake
   script:
     - autoconf
-    #- ./configure ${COMPILER_FLAGS:+       CC="$CC $COMPILER_FLAGS"       CXX="$CXX $COMPILER_FLAGS" }       $CONFIGURE_FLAGS
     # We don't perfectly track freebsd stdlib.h definitions.  This is fine when
     # we count as a system header, but breaks otherwise, like during these
     # tests.
-    - ./configure --with-jemalloc-prefix=ci_
+    - ./configure --with-jemalloc-prefix=ci_ ${DEBUG_CONFIG} ${PROF_CONFIG} ${UNCOMMON_CONFIG}
     - export JFLAG=`sysctl -n kern.smp.cpus`
     - gmake -j${JFLAG}
     - gmake -j${JFLAG} tests

From 5884a076fb858320e7bcf86b961dd1555a81a75e Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 12 Aug 2021 15:48:02 -0700
Subject: [PATCH 2102/2608] Rename prof.dump_prefix to prof.prefix

This better aligns with our naming convention.  The option has not been included
in any upstream release yet.
---
 doc/jemalloc.xml.in                  | 22 ++++++--------
 include/jemalloc/internal/prof_sys.h |  2 +-
 src/ctl.c                            |  8 +++---
 src/prof_sys.c                       | 43 ++++++++++++++--------------
 test/unit/prof_idump.c               |  6 ++--
 5 files changed, 37 insertions(+), 44 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index fa53715d..b8b96abe 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1410,8 +1410,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         primarily useful for disabling the automatic final heap dump (which
         also disables leak reporting, if enabled).  The default prefix is
         <filename>jeprof</filename>.  This prefix value can be overriden by
-        <link
-        linkend="prof.dump_prefix"><mallctl>prof.dump_prefix</mallctl></link>.
+        <link linkend="prof.prefix"><mallctl>prof.prefix</mallctl></link>.
         </para></listitem>
       </varlistentry>
 
@@ -1492,8 +1491,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         where <literal>&lt;prefix&gt;</literal> is controlled by the
         <link
         linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link> and
-        <link
-        linkend="prof.dump_prefix"><mallctl>prof.dump_prefix</mallctl></link>
+        <link linkend="prof.prefix"><mallctl>prof.prefix</mallctl></link>
         options.  By default, interval-triggered profile dumping is disabled
         (encoded as -1).
         </para></listitem>
@@ -1527,8 +1525,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         <filename>&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.f.heap</filename>,
         where <literal>&lt;prefix&gt;</literal> is controlled by the <link
         linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link> and
-        <link
-        linkend="prof.dump_prefix"><mallctl>prof.dump_prefix</mallctl></link>
+        <link linkend="prof.prefix"><mallctl>prof.prefix</mallctl></link>
         options.  Note that <function>atexit()</function> may allocate
         memory during application initialization and then deadlock internally
         when jemalloc in turn calls <function>atexit()</function>, so
@@ -2398,16 +2395,14 @@ struct extent_hooks_s {
         is specified, to a file according to the pattern
         <filename>&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.m&lt;mseq&gt;.heap</filename>,
         where <literal>&lt;prefix&gt;</literal> is controlled by the
-        <link
-        linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link> and
-        <link
-        linkend="prof.dump_prefix"><mallctl>prof.dump_prefix</mallctl></link>
+        <link linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link>
+        and <link linkend="prof.prefix"><mallctl>prof.prefix</mallctl></link>
         options.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="prof.dump_prefix">
+      <varlistentry id="prof.prefix">
         <term>
-          <mallctl>prof.dump_prefix</mallctl>
+          <mallctl>prof.prefix</mallctl>
           (<type>const char *</type>)
           <literal>-w</literal>
           [<option>--enable-prof</option>]
@@ -2433,8 +2428,7 @@ struct extent_hooks_s {
         <filename>&lt;prefix&gt;.&lt;pid&gt;.&lt;seq&gt;.u&lt;useq&gt;.heap</filename>,
         where <literal>&lt;prefix&gt;</literal> is controlled by the <link
         linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link> and
-        <link
-        linkend="prof.dump_prefix"><mallctl>prof.dump_prefix</mallctl></link>
+        <link linkend="prof.prefix"><mallctl>prof.prefix</mallctl></link>
         options.</para></listitem>
       </varlistentry>
 
diff --git a/include/jemalloc/internal/prof_sys.h b/include/jemalloc/internal/prof_sys.h
index d784ef91..6e4e811a 100644
--- a/include/jemalloc/internal/prof_sys.h
+++ b/include/jemalloc/internal/prof_sys.h
@@ -10,7 +10,7 @@ void prof_unwind_init();
 void prof_sys_thread_name_fetch(tsd_t *tsd);
 int prof_getpid(void);
 void prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind);
-bool prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix);
+bool prof_prefix_set(tsdn_t *tsdn, const char *prefix);
 void prof_fdump_impl(tsd_t *tsd);
 void prof_idump_impl(tsd_t *tsd);
 bool prof_mdump_impl(tsd_t *tsd, const char *filename);
diff --git a/src/ctl.c b/src/ctl.c
index 3ed00072..253341a1 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -187,7 +187,7 @@ CTL_PROTO(prof_thread_active_init)
 CTL_PROTO(prof_active)
 CTL_PROTO(prof_dump)
 CTL_PROTO(prof_gdump)
-CTL_PROTO(prof_dump_prefix)
+CTL_PROTO(prof_prefix)
 CTL_PROTO(prof_reset)
 CTL_PROTO(prof_interval)
 CTL_PROTO(lg_prof_sample)
@@ -578,7 +578,7 @@ static const ctl_named_node_t	prof_node[] = {
 	{NAME("active"),	CTL(prof_active)},
 	{NAME("dump"),		CTL(prof_dump)},
 	{NAME("gdump"),		CTL(prof_gdump)},
-	{NAME("dump_prefix"),	CTL(prof_dump_prefix)},
+	{NAME("prefix"),	CTL(prof_prefix)},
 	{NAME("reset"),		CTL(prof_reset)},
 	{NAME("interval"),	CTL(prof_interval)},
 	{NAME("lg_sample"),	CTL(lg_prof_sample)},
@@ -3227,7 +3227,7 @@ label_return:
 }
 
 static int
-prof_dump_prefix_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+prof_prefix_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	const char *prefix = NULL;
@@ -3240,7 +3240,7 @@ prof_dump_prefix_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	WRITEONLY();
 	WRITE(prefix, const char *);
 
-	ret = prof_dump_prefix_set(tsd_tsdn(tsd), prefix) ? EFAULT : 0;
+	ret = prof_prefix_set(tsd_tsdn(tsd), prefix) ? EFAULT : 0;
 label_return:
 	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
 	return ret;
diff --git a/src/prof_sys.c b/src/prof_sys.c
index 87cd2b2f..6a5b2b16 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -34,7 +34,7 @@ static uint64_t prof_dump_iseq;
 static uint64_t prof_dump_mseq;
 static uint64_t prof_dump_useq;
 
-static char *prof_dump_prefix = NULL;
+static char *prof_prefix = NULL;
 
 /* The fallback allocator profiling functionality will use. */
 base_t *prof_base;
@@ -524,16 +524,16 @@ prof_strncpy(char *UNUSED dest, const char *UNUSED src, size_t UNUSED size) {
 }
 
 static const char *
-prof_dump_prefix_get(tsdn_t* tsdn) {
+prof_prefix_get(tsdn_t* tsdn) {
 	malloc_mutex_assert_owner(tsdn, &prof_dump_filename_mtx);
 
-	return prof_dump_prefix == NULL ? opt_prof_prefix : prof_dump_prefix;
+	return prof_prefix == NULL ? opt_prof_prefix : prof_prefix;
 }
 
 static bool
-prof_dump_prefix_is_empty(tsdn_t *tsdn) {
+prof_prefix_is_empty(tsdn_t *tsdn) {
 	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
-	bool ret = (prof_dump_prefix_get(tsdn)[0] == '\0');
+	bool ret = (prof_prefix_get(tsdn)[0] == '\0');
 	malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
 	return ret;
 }
@@ -545,18 +545,18 @@ prof_dump_filename(tsd_t *tsd, char *filename, char v, uint64_t vseq) {
 	cassert(config_prof);
 
 	assert(tsd_reentrancy_level_get(tsd) == 0);
-	const char *prof_prefix = prof_dump_prefix_get(tsd_tsdn(tsd));
+	const char *prof_prefix = prof_prefix_get(tsd_tsdn(tsd));
 
 	if (vseq != VSEQ_INVALID) {
 	        /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
 		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
-		    "%s.%d.%"FMTu64".%c%"FMTu64".heap",
-		    prof_prefix, prof_getpid(), prof_dump_seq, v, vseq);
+		    "%s.%d.%"FMTu64".%c%"FMTu64".heap", prof_prefix,
+		    prof_getpid(), prof_dump_seq, v, vseq);
 	} else {
 	        /* "<prefix>.<pid>.<seq>.<v>.heap" */
 		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
-		    "%s.%d.%"FMTu64".%c.heap",
-		    prof_prefix, prof_getpid(), prof_dump_seq, v);
+		    "%s.%d.%"FMTu64".%c.heap", prof_prefix,
+		    prof_getpid(), prof_dump_seq, v);
 	}
 	prof_dump_seq++;
 }
@@ -565,8 +565,7 @@ void
 prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind) {
 	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
 	malloc_snprintf(filename, PROF_DUMP_FILENAME_LEN,
-	    "%s.%d.%"FMTu64".json", prof_dump_prefix_get(tsdn), prof_getpid(),
-	    ind);
+	    "%s.%d.%"FMTu64".json", prof_prefix_get(tsdn), prof_getpid(), ind);
 	malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
 }
 
@@ -574,7 +573,7 @@ void
 prof_fdump_impl(tsd_t *tsd) {
 	char filename[DUMP_FILENAME_BUFSIZE];
 
-	assert(!prof_dump_prefix_is_empty(tsd_tsdn(tsd)));
+	assert(!prof_prefix_is_empty(tsd_tsdn(tsd)));
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
 	prof_dump_filename(tsd, filename, 'f', VSEQ_INVALID);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
@@ -582,11 +581,11 @@ prof_fdump_impl(tsd_t *tsd) {
 }
 
 bool
-prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix) {
+prof_prefix_set(tsdn_t *tsdn, const char *prefix) {
 	cassert(config_prof);
 	ctl_mtx_assert_held(tsdn);
 	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
-	if (prof_dump_prefix == NULL) {
+	if (prof_prefix == NULL) {
 		malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
 		/* Everything is still guarded by ctl_mtx. */
 		char *buffer = base_alloc(tsdn, prof_base,
@@ -595,12 +594,12 @@ prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix) {
 			return true;
 		}
 		malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
-		prof_dump_prefix = buffer;
+		prof_prefix = buffer;
 	}
-	assert(prof_dump_prefix != NULL);
+	assert(prof_prefix != NULL);
 
-	prof_strncpy(prof_dump_prefix, prefix, PROF_DUMP_FILENAME_LEN - 1);
-	prof_dump_prefix[PROF_DUMP_FILENAME_LEN - 1] = '\0';
+	prof_strncpy(prof_prefix, prefix, PROF_DUMP_FILENAME_LEN - 1);
+	prof_prefix[PROF_DUMP_FILENAME_LEN - 1] = '\0';
 	malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
 
 	return false;
@@ -609,7 +608,7 @@ prof_dump_prefix_set(tsdn_t *tsdn, const char *prefix) {
 void
 prof_idump_impl(tsd_t *tsd) {
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
-	if (prof_dump_prefix_get(tsd_tsdn(tsd))[0] == '\0') {
+	if (prof_prefix_get(tsd_tsdn(tsd))[0] == '\0') {
 		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
 		return;
 	}
@@ -626,7 +625,7 @@ prof_mdump_impl(tsd_t *tsd, const char *filename) {
 	if (filename == NULL) {
 		/* No filename specified, so automatically generate one. */
 		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
-		if (prof_dump_prefix_get(tsd_tsdn(tsd))[0] == '\0') {
+		if (prof_prefix_get(tsd_tsdn(tsd))[0] == '\0') {
 			malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
 			return true;
 		}
@@ -642,7 +641,7 @@ void
 prof_gdump_impl(tsd_t *tsd) {
 	tsdn_t *tsdn = tsd_tsdn(tsd);
 	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
-	if (prof_dump_prefix_get(tsdn)[0] == '\0') {
+	if (prof_prefix_get(tsdn)[0] == '\0') {
 		malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
 		return;
 	}
diff --git a/test/unit/prof_idump.c b/test/unit/prof_idump.c
index 607944c1..e9f5e56c 100644
--- a/test/unit/prof_idump.c
+++ b/test/unit/prof_idump.c
@@ -26,14 +26,14 @@ TEST_BEGIN(test_idump) {
 	bool active;
 	void *p;
 
-	const char *dump_prefix = TEST_PREFIX;
+	const char *prefix = TEST_PREFIX;
 
 	test_skip_if(!config_prof);
 
 	active = true;
 
-	expect_d_eq(mallctl("prof.dump_prefix", NULL, NULL,
-	    (void *)&dump_prefix, sizeof(dump_prefix)), 0,
+	expect_d_eq(mallctl("prof.prefix", NULL, NULL, (void *)&prefix,
+	    sizeof(prefix)), 0,
 	    "Unexpected mallctl failure while overwriting dump prefix");
 
 	expect_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,

From 9d02bdc8838d03b043de5017eaaa837f21dbc4c0 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Thu, 12 Aug 2021 19:21:56 -0700
Subject: [PATCH 2103/2608] Port gen_run_tests.py to python3

Insignificant changes to make the script runnable on python3.
---
 scripts/gen_run_tests.py | 33 ++++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/scripts/gen_run_tests.py b/scripts/gen_run_tests.py
index 77c2ce53..7c3075f9 100755
--- a/scripts/gen_run_tests.py
+++ b/scripts/gen_run_tests.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 import sys
 from itertools import combinations
@@ -21,7 +21,7 @@ else:
 
 def powerset(items):
     result = []
-    for i in xrange(len(items) + 1):
+    for i in range(len(items) + 1):
         result += combinations(items, i)
     return result
 
@@ -53,19 +53,20 @@ possible_malloc_conf_opts = [
     'background_thread:true',
 ]
 
-print 'set -e'
-print 'if [ -f Makefile ] ; then %(make_cmd)s relclean ; fi' % {'make_cmd': make_cmd}
-print 'autoconf'
-print 'rm -rf run_tests.out'
-print 'mkdir run_tests.out'
-print 'cd run_tests.out'
+print('set -e')
+print('if [ -f Makefile ] ; then %(make_cmd)s relclean ; fi' % {'make_cmd':
+    make_cmd})
+print('autoconf')
+print('rm -rf run_tests.out')
+print('mkdir run_tests.out')
+print('cd run_tests.out')
 
 ind = 0
 for cc, cxx in possible_compilers:
     for compiler_opts in powerset(possible_compiler_opts):
         for config_opts in powerset(possible_config_opts):
             for malloc_conf_opts in powerset(possible_malloc_conf_opts):
-                if cc is 'clang' \
+                if cc == 'clang' \
                   and '-m32' in possible_compiler_opts \
                   and '--enable-prof' in config_opts:
                     continue
@@ -80,9 +81,9 @@ for cc, cxx in possible_compilers:
                 )
 
                 # We don't want to test large vaddr spaces in 32-bit mode.
-		if ('-m32' in compiler_opts and '--with-lg-vaddr=56' in
-                  config_opts):
-		    continue
+                if ('-m32' in compiler_opts and '--with-lg-vaddr=56' in
+                    config_opts):
+                    continue
 
                 # Per CPU arenas are only supported on Linux.
                 linux_supported = ('percpu_arena:percpu' in malloc_conf_opts \
@@ -93,7 +94,7 @@ for cc, cxx in possible_compilers:
                 if (uname == 'Linux' and linux_supported) \
                   or (not linux_supported and (uname != 'Darwin' or \
                   not darwin_unsupported)):
-                    print """cat <<EOF > run_test_%(ind)d.sh
+                    print("""cat <<EOF > run_test_%(ind)d.sh
 #!/bin/sh
 
 set -e
@@ -121,7 +122,9 @@ run_cmd %(make_cmd)s all tests
 run_cmd %(make_cmd)s check
 run_cmd %(make_cmd)s distclean
 EOF
-chmod 755 run_test_%(ind)d.sh""" % {'ind': ind, 'config_line': config_line, 'make_cmd': make_cmd}
+chmod 755 run_test_%(ind)d.sh""" % {'ind': ind, 'config_line': config_line,
+      'make_cmd': make_cmd})
                     ind += 1
 
-print 'for i in `seq 0 %(last_ind)d` ; do echo run_test_${i}.sh ; done | xargs -P %(nparallel)d -n 1 sh' % {'last_ind': ind-1, 'nparallel': nparallel}
+print('for i in `seq 0 %(last_ind)d` ; do echo run_test_${i}.sh ; done | xargs'
+    ' -P %(nparallel)d -n 1 sh' % {'last_ind': ind-1, 'nparallel': nparallel})

From 2c625d5cd97e9cb133072feab2edb6b8c78861ef Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Thu, 12 Aug 2021 19:02:12 -0700
Subject: [PATCH 2104/2608] Fix warnings when compiled with clang

When clang sees an unknown warning option, unlike gcc it doesn't fail the build
with error. It issues a warning. Hence JE_CFLAGS_ADD with warning options that
didnt't exist in clang would still mark those options as available. This led to
several warnings when built with clang or "gcc" on OSX. This change fixes those
warnings by simply making clang fail builds with non-existent warning options.
---
 configure.ac | 1 +
 1 file changed, 1 insertion(+)

diff --git a/configure.ac b/configure.ac
index 5eb4d46f..3e18f4a7 100644
--- a/configure.ac
+++ b/configure.ac
@@ -244,6 +244,7 @@ if test "x$GCC" = "xyes" ; then
       AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT])
     fi
   fi
+  JE_CFLAGS_ADD([-Werror=unknown-warning-option])
   JE_CFLAGS_ADD([-Wall])
   JE_CFLAGS_ADD([-Wextra])
   JE_CFLAGS_ADD([-Wshorten-64-to-32])

From c01a885e94b6edb8545113d3ba43248b4b75e90c Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Thu, 19 Aug 2021 15:16:11 -0700
Subject: [PATCH 2105/2608] HPA: Correctly calculate retained pages

Retained pages are those which haven't been touched and are unbacked from OS
perspective. For a pageslab their number should equal "total pages in slab"
minus "touched pages".
---
 include/jemalloc/internal/hpdata.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index c2ed692b..1fb534db 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -292,7 +292,7 @@ hpdata_ndirty_get(hpdata_t *hpdata) {
 
 static inline size_t
 hpdata_nretained_get(hpdata_t *hpdata) {
-	return hpdata->h_nactive - hpdata->h_ntouched;
+	return HUGEPAGE_PAGES - hpdata->h_ntouched;
 }
 
 static inline void

From 8b24cb8fdf2bf210e243c1d676484a4ffa5c3f6c Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 23 Aug 2021 15:55:54 -0700
Subject: [PATCH 2106/2608] Don't assume initialized arena in the default alloc
 hook.

Specifically, this change allows the default alloc hook to used during
arenas.create.  One use case is to invoke the default alloc hook in a customized
hook arena, i.e. the default hooks can be read out of a default arena, then
create customized ones based on these hooks.  Note that mixing the default with
customized hooks is not recommended, and should only be considered when the
customization is simple and straightforward.
---
 src/ehooks.c            | 25 ++++++++-----------------
 test/unit/arena_reset.c | 15 +++++++++++++++
 2 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/src/ehooks.c b/src/ehooks.c
index 535066e7..5d12d003 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -52,9 +52,12 @@ void *
 ehooks_default_alloc_impl(tsdn_t *tsdn, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit, unsigned arena_ind) {
 	arena_t *arena = arena_get(tsdn, arena_ind, false);
-	void *ret = extent_alloc_core(tsdn, arena, new_addr, size, alignment, zero,
-	    commit, (dss_prec_t)atomic_load_u(&arena->dss_prec,
-	    ATOMIC_RELAXED));
+	/* NULL arena indicates arena_create. */
+	assert(arena != NULL || alignment == HUGEPAGE);
+	dss_prec_t dss = (arena == NULL) ? dss_prec_disabled :
+	    (dss_prec_t)atomic_load_u(&arena->dss_prec, ATOMIC_RELAXED);
+	void *ret = extent_alloc_core(tsdn, arena, new_addr, size, alignment,
+	    zero, commit, dss);
 	if (have_madvise_huge && ret) {
 		pages_set_thp_state(ret, size);
 	}
@@ -64,20 +67,8 @@ ehooks_default_alloc_impl(tsdn_t *tsdn, void *new_addr, size_t size,
 static void *
 ehooks_default_alloc(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit, unsigned arena_ind) {
-	tsdn_t *tsdn;
-	arena_t *arena;
-
-	tsdn = tsdn_fetch();
-	arena = arena_get(tsdn, arena_ind, false);
-	/*
-	 * The arena we're allocating on behalf of must have been initialized
-	 * already.
-	 */
-	assert(arena != NULL);
-
-	return ehooks_default_alloc_impl(tsdn, new_addr, size,
-	    ALIGNMENT_CEILING(alignment, PAGE), zero, commit,
-	    arena_ind_get(arena));
+	return ehooks_default_alloc_impl(tsdn_fetch(), new_addr, size,
+	    ALIGNMENT_CEILING(alignment, PAGE), zero, commit, arena_ind);
 }
 
 bool
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index a2cf3e54..589689c0 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -255,6 +255,21 @@ TEST_BEGIN(test_arena_destroy_hooks_default) {
 	do_arena_reset_post(ptrs, nptrs, arena_ind);
 
 	do_arena_destroy(arena_ind_another);
+
+	/* Try arena.create with custom hooks. */
+	size_t sz = sizeof(extent_hooks_t *);
+	extent_hooks_t *default_hooks;
+	expect_d_eq(mallctl("arena.0.extent_hooks", (void *)&default_hooks,
+	    &sz, NULL, 0), 0, "Unexpected mallctlnametomib() failure");
+
+	/* Default impl; but wrapped as "customized". */
+	extent_hooks_t new_hooks = *default_hooks;
+	extent_hooks_t *hook = &new_hooks;
+	sz = sizeof(unsigned);
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
+	    (void *)&hook, sizeof(void *)), 0,
+	    "Unexpected mallctl() failure");
+	do_arena_destroy(arena_ind);
 }
 TEST_END
 

From e5062e9fb91e5f531266e5691a5567e7cc8fab5f Mon Sep 17 00:00:00 2001
From: Mingli Yu <mingli.yu@windriver.com>
Date: Tue, 10 Aug 2021 13:02:18 +0000
Subject: [PATCH 2107/2608] Makefile.in: make sure doc generated before install

There is a race between the doc generation and the doc installation,
so make the install depend on the build for doc.

Signed-off-by: Mingli Yu <mingli.yu@windriver.com>
---
 Makefile.in | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index 286f7ea9..51276ceb 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -582,21 +582,21 @@ install_lib: install_lib_static
 endif
 install_lib: install_lib_pc
 
-install_doc_html:
+install_doc_html: build_doc_html
 	$(INSTALL) -d $(DATADIR)/doc/jemalloc$(install_suffix)
 	@for d in $(DOCS_HTML); do \
 	echo "$(INSTALL) -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix)"; \
 	$(INSTALL) -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix); \
 done
 
-install_doc_man:
+install_doc_man: build_doc_man
 	$(INSTALL) -d $(MANDIR)/man3
 	@for d in $(DOCS_MAN3); do \
 	echo "$(INSTALL) -m 644 $$d $(MANDIR)/man3"; \
 	$(INSTALL) -m 644 $$d $(MANDIR)/man3; \
 done
 
-install_doc: build_doc install_doc_html install_doc_man
+install_doc: install_doc_html install_doc_man
 
 install: install_bin install_include install_lib
 

From 26140dd24676a06293e105e0ac4e1f1fef04f337 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Wed, 1 Sep 2021 10:45:16 -0700
Subject: [PATCH 2108/2608] Reject --enable-prof-libunwind without
 --enable-prof

Prior to the change you could specify --enable-prof-libunwind without
--enable-prof which would do effectively nothing. This was confusing as I
expected --enable-prof-libunwind to act like --enable-prof, but use libunwind.
---
 configure.ac | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/configure.ac b/configure.ac
index 3e18f4a7..5a5887ac 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1281,6 +1281,9 @@ AC_ARG_ENABLE([prof-libunwind],
   enable_prof_libunwind="0"
 else
   enable_prof_libunwind="1"
+  if test "x$enable_prof" = "x0" ; then
+    AC_MSG_ERROR([--enable-prof-libunwind should only be used with --enable-prof])
+  fi
 fi
 ],
 [enable_prof_libunwind="0"]

From b8b8027f19d089821a19214f56cc9c1202df835d Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Fri, 6 Aug 2021 14:53:05 -0700
Subject: [PATCH 2109/2608] Allow PAI to calculate time until deferred work

Previously the calculation of sleep time between wakeups was implemented within
background_thread. This resulted in some parts of decay and hpa specific
logic mixing with background thread implementation. In this change, background
thread delegates this calculation to arena and it, in turn, delegates it to PAI.
The next step is to implement the actual calculation of time until deferred work
in HPA.
---
 include/jemalloc/internal/arena_externs.h     |   8 +-
 .../internal/background_thread_externs.h      |   3 +
 .../internal/background_thread_inlines.h      |  14 --
 .../internal/background_thread_structs.h      |   3 +
 include/jemalloc/internal/decay.h             |  25 +++
 include/jemalloc/internal/pa.h                |   2 +
 include/jemalloc/internal/pai.h               |   6 +
 src/arena.c                                   |  86 ++++++++-
 src/background_thread.c                       | 177 ++++--------------
 src/decay.c                                   |  30 ++-
 src/hpa.c                                     |   7 +
 src/pa.c                                      |  67 ++++++-
 src/pac.c                                     |   7 +
 test/unit/decay.c                             |  32 ++++
 14 files changed, 298 insertions(+), 169 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 557e49f1..02e7c1cc 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -8,6 +8,12 @@
 #include "jemalloc/internal/pages.h"
 #include "jemalloc/internal/stats.h"
 
+/*
+ * When the amount of pages to be purged exceeds this amount, deferred purge
+ * should happen.
+ */
+#define ARENA_DEFERRED_PURGE_NPAGES_THRESHOLD UINT64_C(1024)
+
 extern ssize_t opt_dirty_decay_ms;
 extern ssize_t opt_muzzy_decay_ms;
 
@@ -16,7 +22,6 @@ extern const char *percpu_arena_mode_names[];
 
 extern div_info_t arena_binind_div_info[SC_NBINS];
 
-extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS];
 extern malloc_mutex_t arenas_lock;
 extern emap_t arena_emap_global;
 
@@ -51,6 +56,7 @@ bool arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, extent_state_t state,
 ssize_t arena_decay_ms_get(arena_t *arena, extent_state_t state);
 void arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
     bool all);
+uint64_t arena_time_until_deferred(tsdn_t *tsdn, arena_t *arena);
 void arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena);
 void arena_reset(tsd_t *tsd, arena_t *arena);
 void arena_destroy(tsd_t *tsd, arena_t *arena);
diff --git a/include/jemalloc/internal/background_thread_externs.h b/include/jemalloc/internal/background_thread_externs.h
index bc49beaf..3d1ea6ce 100644
--- a/include/jemalloc/internal/background_thread_externs.h
+++ b/include/jemalloc/internal/background_thread_externs.h
@@ -13,6 +13,9 @@ extern background_thread_info_t *background_thread_info;
 bool background_thread_create(tsd_t *tsd, unsigned arena_ind);
 bool background_threads_enable(tsd_t *tsd);
 bool background_threads_disable(tsd_t *tsd);
+bool background_thread_running(background_thread_info_t* info);
+void background_thread_wakeup_early(background_thread_info_t *info,
+    nstime_t *remaining_sleep);
 void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
     decay_t *decay, size_t npages_new);
 void background_thread_prefork0(tsdn_t *tsdn);
diff --git a/include/jemalloc/internal/background_thread_inlines.h b/include/jemalloc/internal/background_thread_inlines.h
index 71b433cb..92c5febe 100644
--- a/include/jemalloc/internal/background_thread_inlines.h
+++ b/include/jemalloc/internal/background_thread_inlines.h
@@ -45,18 +45,4 @@ background_thread_indefinite_sleep(background_thread_info_t *info) {
 	return atomic_load_b(&info->indefinite_sleep, ATOMIC_ACQUIRE);
 }
 
-JEMALLOC_ALWAYS_INLINE void
-arena_background_thread_inactivity_check(tsdn_t *tsdn, arena_t *arena,
-    bool is_background_thread) {
-	if (!background_thread_enabled() || is_background_thread) {
-		return;
-	}
-	background_thread_info_t *info =
-	    arena_background_thread_info_get(arena);
-	if (background_thread_indefinite_sleep(info)) {
-		background_thread_interval_check(tsdn, arena,
-		    &arena->pa_shard.pac.decay_dirty, 0);
-	}
-}
-
 #endif /* JEMALLOC_INTERNAL_BACKGROUND_THREAD_INLINES_H */
diff --git a/include/jemalloc/internal/background_thread_structs.h b/include/jemalloc/internal/background_thread_structs.h
index cc14dde3..b884b682 100644
--- a/include/jemalloc/internal/background_thread_structs.h
+++ b/include/jemalloc/internal/background_thread_structs.h
@@ -19,6 +19,9 @@
 #define BACKGROUND_THREAD_HPA_INTERVAL_MAX_UNINITIALIZED (-2)
 #define BACKGROUND_THREAD_HPA_INTERVAL_MAX_DEFAULT_WHEN_ENABLED 5000
 
+#define BACKGROUND_THREAD_DEFERRED_MIN UINT64_C(0)
+#define BACKGROUND_THREAD_DEFERRED_MAX UINT64_C(-1)
+
 typedef enum {
 	background_thread_stopped,
 	background_thread_started,
diff --git a/include/jemalloc/internal/decay.h b/include/jemalloc/internal/decay.h
index 8e517458..cf6a9d22 100644
--- a/include/jemalloc/internal/decay.h
+++ b/include/jemalloc/internal/decay.h
@@ -118,6 +118,25 @@ decay_epoch_duration_ns(const decay_t *decay) {
 	return nstime_ns(&decay->interval);
 }
 
+static inline bool
+decay_immediately(const decay_t *decay) {
+	ssize_t decay_ms = decay_ms_read(decay);
+	return decay_ms == 0;
+}
+
+static inline bool
+decay_disabled(const decay_t *decay) {
+	ssize_t decay_ms = decay_ms_read(decay);
+	return decay_ms < 0;
+}
+
+/* Returns true if decay is enabled and done gradually. */
+static inline bool
+decay_gradually(const decay_t *decay) {
+	ssize_t decay_ms = decay_ms_read(decay);
+	return decay_ms > 0;
+}
+
 /*
  * Returns true if the passed in decay time setting is valid.
  * < -1 : invalid
@@ -144,6 +163,12 @@ bool decay_init(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms);
  */
 void decay_reinit(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms);
 
+/*
+ * Compute how many of 'npages_new' pages we would need to purge in 'time'.
+ */
+uint64_t decay_npages_purge_in(decay_t *decay, nstime_t *time,
+    size_t npages_new);
+
 /* Returns true if the epoch advanced and there are pages to purge. */
 bool decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
     size_t current_npages);
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 2e5b9ef0..b2fed594 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -200,6 +200,8 @@ ssize_t pa_decay_ms_get(pa_shard_t *shard, extent_state_t state);
 void pa_shard_set_deferral_allowed(tsdn_t *tsdn, pa_shard_t *shard,
     bool deferral_allowed);
 void pa_shard_do_deferred_work(tsdn_t *tsdn, pa_shard_t *shard);
+void pa_shard_try_deferred_work(tsdn_t *tsdn, pa_shard_t *shard);
+uint64_t pa_shard_time_until_deferred_work(tsdn_t *tsdn, pa_shard_t *shard);
 
 /******************************************************************************/
 /*
diff --git a/include/jemalloc/internal/pai.h b/include/jemalloc/internal/pai.h
index 4d3a9e01..7179fd36 100644
--- a/include/jemalloc/internal/pai.h
+++ b/include/jemalloc/internal/pai.h
@@ -24,6 +24,7 @@ struct pai_s {
 	/* This function empties out list as a side-effect of being called. */
 	void (*dalloc_batch)(tsdn_t *tsdn, pai_t *self,
 	    edata_list_active_t *list);
+	uint64_t (*time_until_deferred_work)(tsdn_t *tsdn, pai_t *self);
 };
 
 /*
@@ -64,6 +65,11 @@ pai_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list) {
 	self->dalloc_batch(tsdn, self, list);
 }
 
+static inline uint64_t
+pai_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
+	return self->time_until_deferred_work(tsdn, self);
+}
+
 /*
  * An implementation of batch allocation that simply calls alloc once for
  * each item in the list.
diff --git a/src/arena.c b/src/arena.c
index a495ef64..3dd77824 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -38,13 +38,6 @@ static atomic_zd_t muzzy_decay_ms_default;
 emap_t arena_emap_global;
 pa_central_t arena_pa_central_global;
 
-const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
-#define STEP(step, h, x, y)			\
-		h,
-		SMOOTHSTEP
-#undef STEP
-};
-
 div_info_t arena_binind_div_info[SC_NBINS];
 
 size_t opt_oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
@@ -65,6 +58,9 @@ static bool arena_decay_dirty(tsdn_t *tsdn, arena_t *arena,
     bool is_background_thread, bool all);
 static void arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, edata_t *slab,
     bin_t *bin);
+static void
+arena_maybe_do_deferred_work(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
+    size_t npages_new);
 
 /******************************************************************************/
 
@@ -189,6 +185,20 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	}
 }
 
+static void
+arena_background_thread_inactivity_check(tsdn_t *tsdn, arena_t *arena,
+    bool is_background_thread) {
+	if (!background_thread_enabled() || is_background_thread) {
+		return;
+	}
+	background_thread_info_t *info =
+	    arena_background_thread_info_get(arena);
+	if (background_thread_indefinite_sleep(info)) {
+		arena_maybe_do_deferred_work(tsdn, arena,
+		    &arena->pa_shard.pac.decay_dirty, 0);
+	}
+}
+
 void arena_handle_new_dirty_pages(tsdn_t *tsdn, arena_t *arena) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
@@ -420,8 +430,7 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 
 	if (have_background_thread && background_thread_enabled() &&
 	    epoch_advanced && !is_background_thread) {
-		background_thread_interval_check(tsdn, arena, decay,
-		    npages_new);
+		arena_maybe_do_deferred_work(tsdn, arena, decay, npages_new);
 	}
 
 	return false;
@@ -462,6 +471,65 @@ arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) {
 	arena_decay_muzzy(tsdn, arena, is_background_thread, all);
 }
 
+static void
+arena_maybe_do_deferred_work(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
+    size_t npages_new) {
+	background_thread_info_t *info = arena_background_thread_info_get(
+	    arena);
+	if (malloc_mutex_trylock(tsdn, &info->mtx)) {
+		/*
+		 * Background thread may hold the mutex for a long period of
+		 * time.  We'd like to avoid the variance on application
+		 * threads.  So keep this non-blocking, and leave the work to a
+		 * future epoch.
+		 */
+		return;
+	}
+	if (!background_thread_running(info)) {
+		goto label_done;
+	}
+	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
+		goto label_done;
+	}
+	if (!decay_gradually(decay)) {
+		goto label_done_unlock2;
+	}
+
+	nstime_t diff;
+	nstime_init(&diff, background_thread_wakeup_time_get(info));
+	if (nstime_compare(&diff, &decay->epoch) <= 0) {
+		goto label_done_unlock2;
+	}
+	nstime_subtract(&diff, &decay->epoch);
+
+	if (npages_new > 0) {
+		uint64_t npurge_new = decay_npages_purge_in(decay, &diff,
+		    npages_new);
+		info->npages_to_purge_new += npurge_new;
+	}
+
+	bool should_signal;
+	if (info->npages_to_purge_new > ARENA_DEFERRED_PURGE_NPAGES_THRESHOLD) {
+		should_signal = true;
+	} else if (unlikely(background_thread_indefinite_sleep(info)) &&
+	    (ecache_npages_get(&arena->pa_shard.pac.ecache_dirty) > 0 ||
+	    ecache_npages_get(&arena->pa_shard.pac.ecache_muzzy) > 0 ||
+	    info->npages_to_purge_new > 0)) {
+		should_signal = true;
+	} else {
+		should_signal = false;
+	}
+
+	if (should_signal) {
+		info->npages_to_purge_new = 0;
+		background_thread_wakeup_early(info, &diff);
+	}
+label_done_unlock2:
+	malloc_mutex_unlock(tsdn, &decay->mtx);
+label_done:
+	malloc_mutex_unlock(tsdn, &info->mtx);
+}
+
 /* Called from background threads. */
 void
 arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena) {
diff --git a/src/background_thread.c b/src/background_thread.c
index 4951cd1a..9e577cb3 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -60,8 +60,9 @@ pthread_create_wrapper(pthread_t *__restrict thread, const pthread_attr_t *attr,
 bool background_thread_create(tsd_t *tsd, unsigned arena_ind) NOT_REACHED
 bool background_threads_enable(tsd_t *tsd) NOT_REACHED
 bool background_threads_disable(tsd_t *tsd) NOT_REACHED
-void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
-    decay_t *decay, size_t npages_new) NOT_REACHED
+bool background_thread_running(background_thread_info_t *info) NOT_REACHED
+void background_thread_wakeup_early(background_thread_info_t *info,
+    nstime_t *remaining_sleep) NOT_REACHED
 void background_thread_prefork0(tsdn_t *tsdn) NOT_REACHED
 void background_thread_prefork1(tsdn_t *tsdn) NOT_REACHED
 void background_thread_postfork_parent(tsdn_t *tsdn) NOT_REACHED
@@ -98,8 +99,6 @@ set_current_thread_affinity(int cpu) {
 #endif
 }
 
-/* Threshold for determining when to wake up the background thread. */
-#define BACKGROUND_THREAD_NPAGES_THRESHOLD UINT64_C(1024)
 #define BILLION UINT64_C(1000000000)
 /* Minimal sleep interval 100 ms. */
 #define BACKGROUND_THREAD_MIN_INTERVAL_NS (BILLION / 10)
@@ -173,55 +172,10 @@ background_thread_pause_check(tsdn_t *tsdn, background_thread_info_t *info) {
 	return false;
 }
 
-static inline uint64_t
-arena_decay_compute_purge_interval(tsdn_t *tsdn, decay_t *decay,
-    size_t npages) {
-	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
-		/* Use minimal interval if decay is contended. */
-		return BACKGROUND_THREAD_MIN_INTERVAL_NS;
-	}
-	uint64_t decay_ns = decay_ns_until_purge(decay, npages,
-	    BACKGROUND_THREAD_NPAGES_THRESHOLD);
-	malloc_mutex_unlock(tsdn, &decay->mtx);
-
-	return decay_ns < BACKGROUND_THREAD_MIN_INTERVAL_NS ?
-	    BACKGROUND_THREAD_MIN_INTERVAL_NS :
-	    decay_ns;
-}
-
-
-static inline uint64_t
-arena_decay_compute_min_purge_interval(tsdn_t *tsdn, arena_t *arena) {
-	uint64_t dirty, muzzy;
-	dirty = arena_decay_compute_purge_interval(tsdn,
-	    &arena->pa_shard.pac.decay_dirty,
-	    ecache_npages_get(&arena->pa_shard.pac.ecache_dirty));
-	if (dirty == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
-		return dirty;
-	}
-	muzzy = arena_decay_compute_purge_interval(tsdn,
-	    &arena->pa_shard.pac.decay_muzzy,
-	    ecache_npages_get(&arena->pa_shard.pac.ecache_muzzy));
-
-	uint64_t min_so_far = dirty < muzzy ? dirty : muzzy;
-	if (opt_background_thread_hpa_interval_max_ms >= 0) {
-		uint64_t hpa_interval = 1000 * 1000 *
-		    (uint64_t)opt_background_thread_hpa_interval_max_ms;
-		if (hpa_interval < min_so_far) {
-			if (hpa_interval < BACKGROUND_THREAD_MIN_INTERVAL_NS) {
-				min_so_far = BACKGROUND_THREAD_MIN_INTERVAL_NS;
-			} else {
-				min_so_far = hpa_interval;
-			}
-		}
-	}
-
-	return min_so_far;
-}
-
 static inline void
-background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info, unsigned ind) {
-	uint64_t min_interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
+background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info,
+    unsigned ind) {
+	uint64_t ns_until_deferred = BACKGROUND_THREAD_DEFERRED_MAX;
 	unsigned narenas = narenas_total_get();
 
 	for (unsigned i = ind; i < narenas; i += max_background_threads) {
@@ -230,19 +184,29 @@ background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info, unsigne
 			continue;
 		}
 		arena_do_deferred_work(tsdn, arena);
-		if (min_interval == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
+		if (ns_until_deferred <= BACKGROUND_THREAD_MIN_INTERVAL_NS) {
 			/* Min interval will be used. */
 			continue;
 		}
-		uint64_t interval = arena_decay_compute_min_purge_interval(tsdn,
-		    arena);
-		assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS);
-		if (interval != DECAY_UNBOUNDED_TIME_TO_PURGE &&
-		    min_interval > interval) {
-			min_interval = interval;
+		uint64_t ns_arena_deferred = pa_shard_time_until_deferred_work(
+		    tsdn, &arena->pa_shard);
+		if (ns_arena_deferred < ns_until_deferred) {
+			ns_until_deferred = ns_arena_deferred;
 		}
 	}
-	background_thread_sleep(tsdn, info, min_interval);
+
+	uint64_t sleep_ns;
+	if (ns_until_deferred == BACKGROUND_THREAD_DEFERRED_MAX) {
+		sleep_ns = BACKGROUND_THREAD_INDEFINITE_SLEEP;
+	} else {
+		sleep_ns =
+		    (ns_until_deferred < BACKGROUND_THREAD_MIN_INTERVAL_NS)
+		    ? BACKGROUND_THREAD_MIN_INTERVAL_NS
+		    : ns_until_deferred;
+
+	}
+
+	background_thread_sleep(tsdn, info, sleep_ns);
 }
 
 static bool
@@ -609,88 +573,23 @@ background_threads_disable(tsd_t *tsd) {
 	return false;
 }
 
-/* Check if we need to signal the background thread early. */
+bool
+background_thread_running(background_thread_info_t *info) {
+	return info->state == background_thread_started;
+}
+
 void
-background_thread_interval_check(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
-    size_t npages_new) {
-	background_thread_info_t *info = arena_background_thread_info_get(
-	    arena);
-	if (malloc_mutex_trylock(tsdn, &info->mtx)) {
-		/*
-		 * Background thread may hold the mutex for a long period of
-		 * time.  We'd like to avoid the variance on application
-		 * threads.  So keep this non-blocking, and leave the work to a
-		 * future epoch.
-		 */
+background_thread_wakeup_early(background_thread_info_t *info,
+    nstime_t *remaining_sleep) {
+	/*
+	 * This is an optimization to increase batching. At this point
+	 * we know that background thread wakes up soon, so the time to cache
+	 * the just freed memory is bounded and low.
+	 */
+	if (nstime_ns(remaining_sleep) < BACKGROUND_THREAD_MIN_INTERVAL_NS) {
 		return;
 	}
-
-	if (info->state != background_thread_started) {
-		goto label_done;
-	}
-	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
-		goto label_done;
-	}
-
-	ssize_t decay_time = decay_ms_read(decay);
-	if (decay_time <= 0) {
-		/* Purging is eagerly done or disabled currently. */
-		goto label_done_unlock2;
-	}
-	uint64_t decay_interval_ns = decay_epoch_duration_ns(decay);
-	assert(decay_interval_ns > 0);
-
-	nstime_t diff;
-	nstime_init(&diff, background_thread_wakeup_time_get(info));
-	if (nstime_compare(&diff, &decay->epoch) <= 0) {
-		goto label_done_unlock2;
-	}
-	nstime_subtract(&diff, &decay->epoch);
-	if (nstime_ns(&diff) < BACKGROUND_THREAD_MIN_INTERVAL_NS) {
-		goto label_done_unlock2;
-	}
-
-	if (npages_new > 0) {
-		size_t n_epoch = (size_t)(nstime_ns(&diff) / decay_interval_ns);
-		/*
-		 * Compute how many new pages we would need to purge by the next
-		 * wakeup, which is used to determine if we should signal the
-		 * background thread.
-		 */
-		uint64_t npurge_new;
-		if (n_epoch >= SMOOTHSTEP_NSTEPS) {
-			npurge_new = npages_new;
-		} else {
-			uint64_t h_steps_max = h_steps[SMOOTHSTEP_NSTEPS - 1];
-			assert(h_steps_max >=
-			    h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
-			npurge_new = npages_new * (h_steps_max -
-			    h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
-			npurge_new >>= SMOOTHSTEP_BFP;
-		}
-		info->npages_to_purge_new += npurge_new;
-	}
-
-	bool should_signal;
-	if (info->npages_to_purge_new > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
-		should_signal = true;
-	} else if (unlikely(background_thread_indefinite_sleep(info)) &&
-	    (ecache_npages_get(&arena->pa_shard.pac.ecache_dirty) > 0 ||
-	    ecache_npages_get(&arena->pa_shard.pac.ecache_muzzy) > 0 ||
-	    info->npages_to_purge_new > 0)) {
-		should_signal = true;
-	} else {
-		should_signal = false;
-	}
-
-	if (should_signal) {
-		info->npages_to_purge_new = 0;
-		pthread_cond_signal(&info->cond);
-	}
-label_done_unlock2:
-	malloc_mutex_unlock(tsdn, &decay->mtx);
-label_done:
-	malloc_mutex_unlock(tsdn, &info->mtx);
+	pthread_cond_signal(&info->cond);
 }
 
 void
diff --git a/src/decay.c b/src/decay.c
index fdbd63d8..cdb8487b 100644
--- a/src/decay.c
+++ b/src/decay.c
@@ -3,6 +3,13 @@
 
 #include "jemalloc/internal/decay.h"
 
+const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
+#define STEP(step, h, x, y)			\
+		h,
+		SMOOTHSTEP
+#undef STEP
+};
+
 /*
  * Generate a new deadline that is uniformly random within the next epoch after
  * the current one.
@@ -147,6 +154,25 @@ decay_deadline_reached(const decay_t *decay, const nstime_t *time) {
 	return (nstime_compare(&decay->deadline, time) <= 0);
 }
 
+uint64_t
+decay_npages_purge_in(decay_t *decay, nstime_t *time, size_t npages_new) {
+	uint64_t decay_interval_ns = decay_epoch_duration_ns(decay);
+	size_t n_epoch = (size_t)(nstime_ns(time) / decay_interval_ns);
+
+	uint64_t npages_purge;
+	if (n_epoch >= SMOOTHSTEP_NSTEPS) {
+		npages_purge = npages_new;
+	} else {
+		uint64_t h_steps_max = h_steps[SMOOTHSTEP_NSTEPS - 1];
+		assert(h_steps_max >=
+		    h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
+		npages_purge = npages_new * (h_steps_max -
+		    h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
+		npages_purge >>= SMOOTHSTEP_BFP;
+	}
+	return npages_purge;
+}
+
 bool
 decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
     size_t npages_current) {
@@ -214,9 +240,7 @@ decay_npurge_after_interval(decay_t *decay, size_t interval) {
 
 uint64_t decay_ns_until_purge(decay_t *decay, size_t npages_current,
     uint64_t npages_threshold) {
-	ssize_t decay_time = decay_ms_read(decay);
-	if (decay_time <= 0) {
-		/* Purging is eagerly done or disabled currently. */
+	if (!decay_gradually(decay)) {
 		return DECAY_UNBOUNDED_TIME_TO_PURGE;
 	}
 	uint64_t decay_interval_ns = decay_epoch_duration_ns(decay);
diff --git a/src/hpa.c b/src/hpa.c
index 6b7517d8..d45a3bd0 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -19,6 +19,7 @@ static bool hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 static void hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata);
 static void hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self,
     edata_list_active_t *list);
+static uint64_t hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self);
 
 bool
 hpa_supported() {
@@ -218,6 +219,7 @@ hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
 	shard->pai.shrink = &hpa_shrink;
 	shard->pai.dalloc = &hpa_dalloc;
 	shard->pai.dalloc_batch = &hpa_dalloc_batch;
+	shard->pai.time_until_deferred_work = &hpa_time_until_deferred_work;
 
 	hpa_do_consistency_checks(shard);
 
@@ -850,6 +852,11 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	hpa_dalloc_batch(tsdn, self, &dalloc_list);
 }
 
+static uint64_t
+hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
+	return opt_background_thread_hpa_interval_max_ms;
+}
+
 void
 hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard) {
 	hpa_do_consistency_checks(shard);
diff --git a/src/pa.c b/src/pa.c
index 93da02e0..c5b8daa7 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -96,6 +96,11 @@ pa_shard_reset(tsdn_t *tsdn, pa_shard_t *shard) {
 	}
 }
 
+static bool
+pa_shard_uses_hpa(pa_shard_t *shard) {
+	return atomic_load_b(&shard->use_hpa, ATOMIC_RELAXED);
+}
+
 void
 pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard) {
 	pac_destroy(tsdn, &shard->pac);
@@ -118,7 +123,7 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 	    WITNESS_RANK_CORE, 0);
 
 	edata_t *edata = NULL;
-	if (atomic_load_b(&shard->use_hpa, ATOMIC_RELAXED)) {
+	if (pa_shard_uses_hpa(shard)) {
 		edata = pai_alloc(tsdn, &shard->hpa_sec.pai, size, alignment,
 		    zero);
 	}
@@ -226,7 +231,7 @@ pa_decay_ms_get(pa_shard_t *shard, extent_state_t state) {
 void
 pa_shard_set_deferral_allowed(tsdn_t *tsdn, pa_shard_t *shard,
     bool deferral_allowed) {
-	if (atomic_load_b(&shard->use_hpa, ATOMIC_RELAXED)) {
+	if (pa_shard_uses_hpa(shard)) {
 		hpa_shard_set_deferral_allowed(tsdn, &shard->hpa_shard,
 		    deferral_allowed);
 	}
@@ -234,7 +239,63 @@ pa_shard_set_deferral_allowed(tsdn_t *tsdn, pa_shard_t *shard,
 
 void
 pa_shard_do_deferred_work(tsdn_t *tsdn, pa_shard_t *shard) {
-	if (atomic_load_b(&shard->use_hpa, ATOMIC_RELAXED)) {
+	if (pa_shard_uses_hpa(shard)) {
 		hpa_shard_do_deferred_work(tsdn, &shard->hpa_shard);
 	}
 }
+
+static inline uint64_t
+pa_shard_ns_until_purge(tsdn_t *tsdn, decay_t *decay, size_t npages) {
+	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
+		/* Use minimal interval if decay is contended. */
+		return BACKGROUND_THREAD_DEFERRED_MIN;
+	}
+	uint64_t result = decay_ns_until_purge(decay, npages,
+	    ARENA_DEFERRED_PURGE_NPAGES_THRESHOLD);
+
+	malloc_mutex_unlock(tsdn, &decay->mtx);
+	return result;
+}
+
+/*
+ * Get time until next deferred work ought to happen. If there are multiple
+ * things that have been deferred, this function calculates the time until
+ * the soonest of those things.
+ */
+uint64_t
+pa_shard_time_until_deferred_work(tsdn_t *tsdn, pa_shard_t *shard) {
+	uint64_t time;
+	time = pa_shard_ns_until_purge(tsdn,
+	    &shard->pac.decay_dirty,
+	    ecache_npages_get(&shard->pac.ecache_dirty));
+	if (time == BACKGROUND_THREAD_DEFERRED_MIN) {
+		return time;
+	}
+
+	uint64_t muzzy = pa_shard_ns_until_purge(tsdn,
+	    &shard->pac.decay_muzzy,
+	    ecache_npages_get(&shard->pac.ecache_muzzy));
+	if (muzzy < time) {
+		time = muzzy;
+		if (time == BACKGROUND_THREAD_DEFERRED_MIN) {
+			return time;
+		}
+	}
+
+	uint64_t pac = pai_time_until_deferred_work(tsdn, &shard->pac.pai);
+	if (pac < time) {
+		time = pac;
+		if (time == BACKGROUND_THREAD_DEFERRED_MIN) {
+			return time;
+		}
+	}
+
+	if (pa_shard_uses_hpa(shard)) {
+		uint64_t hpa =
+		    pai_time_until_deferred_work(tsdn, &shard->hpa_shard.pai);
+		if (hpa < time) {
+			time = hpa;
+		}
+	}
+	return time;
+}
diff --git a/src/pac.c b/src/pac.c
index 0737e68c..c611d919 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -10,6 +10,7 @@ static bool pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 static bool pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
     size_t old_size, size_t new_size);
 static void pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata);
+static uint64_t pac_time_until_deferred_work(tsdn_t *tsdn, pai_t *self);
 
 static ehooks_t *
 pac_ehooks_get(pac_t *pac) {
@@ -96,6 +97,7 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
 	pac->pai.shrink = &pac_shrink_impl;
 	pac->pai.dalloc = &pac_dalloc_impl;
 	pac->pai.dalloc_batch = &pai_dalloc_batch_default;
+	pac->pai.time_until_deferred_work = &pac_time_until_deferred_work;
 
 	return false;
 }
@@ -196,6 +198,11 @@ pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
 	ecache_dalloc(tsdn, pac, ehooks, &pac->ecache_dirty, edata);
 }
 
+static uint64_t
+pac_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
+	return BACKGROUND_THREAD_DEFERRED_MAX;
+}
+
 bool
 pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
     size_t *new_limit) {
diff --git a/test/unit/decay.c b/test/unit/decay.c
index 72484c80..67722199 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -36,6 +36,37 @@ TEST_BEGIN(test_decay_ms_valid) {
 }
 TEST_END
 
+TEST_BEGIN(test_decay_npages_purge_in) {
+	decay_t decay;
+	memset(&decay, 0, sizeof(decay));
+
+	nstime_t curtime;
+	nstime_init(&curtime, 0);
+
+	uint64_t decay_ms = 1000;
+	nstime_t decay_nstime;
+	nstime_init(&decay_nstime, decay_ms * 1000 * 1000);
+	expect_false(decay_init(&decay, &curtime, (ssize_t)decay_ms),
+	    "Failed to initialize decay");
+
+	const size_t new_pages = 100;
+
+	nstime_t time;
+	nstime_copy(&time, &decay_nstime);
+	expect_u64_eq(decay_npages_purge_in(&decay, &time, new_pages),
+	    new_pages, "Not all pages are expected to decay in decay_ms");
+
+	nstime_init(&time, 0);
+	expect_u64_eq(decay_npages_purge_in(&decay, &time, new_pages), 0,
+	    "More than zero pages are expected to instantly decay");
+
+	nstime_copy(&time, &decay_nstime);
+	nstime_idivide(&time, 2);
+	expect_u64_eq(decay_npages_purge_in(&decay, &time, new_pages),
+	    new_pages / 2, "Not half of pages decay in half the decay period");
+}
+TEST_END
+
 TEST_BEGIN(test_decay_maybe_advance_epoch) {
 	decay_t decay;
 	memset(&decay, 0, sizeof(decay));
@@ -244,6 +275,7 @@ main(void) {
 	return test(
 	    test_decay_init,
 	    test_decay_ms_valid,
+	    test_decay_npages_purge_in,
 	    test_decay_maybe_advance_epoch,
 	    test_decay_empty,
 	    test_decay,

From 97da57c13afec4690a38adf7c94bf97ccd5bfdff Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Wed, 18 Aug 2021 12:22:43 -0700
Subject: [PATCH 2110/2608] HPA: Add min_purge_interval_ms option

This rate limiting option is required to avoid purging too often.
---
 include/jemalloc/internal/hpa_opts.h |  9 ++++++++-
 src/ctl.c                            |  4 ++++
 src/jemalloc.c                       | 10 ++++++++--
 src/stats.c                          |  1 +
 4 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h
index 2548f44f..ee84fea1 100644
--- a/include/jemalloc/internal/hpa_opts.h
+++ b/include/jemalloc/internal/hpa_opts.h
@@ -43,6 +43,11 @@ struct hpa_shard_opts_s {
 	 * actually get hugified.
 	 */
 	uint64_t hugify_delay_ms;
+
+	/*
+	 * Minimum amount of time between purges.
+	 */
+	uint64_t min_purge_interval_ms;
 };
 
 #define HPA_SHARD_OPTS_DEFAULT {					\
@@ -61,7 +66,9 @@ struct hpa_shard_opts_s {
 	 */								\
 	false,								\
 	/* hugify_delay_ms */						\
-	10 * 1000							\
+	10 * 1000,							\
+	/* min_purge_interval_ms */					\
+	5 * 1000							\
 }
 
 #endif /* JEMALLOC_INTERNAL_HPA_OPTS_H */
diff --git a/src/ctl.c b/src/ctl.c
index 253341a1..9647478d 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -97,6 +97,7 @@ CTL_PROTO(opt_hpa)
 CTL_PROTO(opt_hpa_slab_max_alloc)
 CTL_PROTO(opt_hpa_hugification_threshold)
 CTL_PROTO(opt_hpa_hugify_delay_ms)
+CTL_PROTO(opt_hpa_min_purge_interval_ms)
 CTL_PROTO(opt_hpa_dirty_mult)
 CTL_PROTO(opt_hpa_sec_nshards)
 CTL_PROTO(opt_hpa_sec_max_alloc)
@@ -408,6 +409,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("hpa_hugification_threshold"),
 		CTL(opt_hpa_hugification_threshold)},
 	{NAME("hpa_hugify_delay_ms"), CTL(opt_hpa_hugify_delay_ms)},
+	{NAME("hpa_min_purge_interval_ms"), CTL(opt_hpa_min_purge_interval_ms)},
 	{NAME("hpa_dirty_mult"), CTL(opt_hpa_dirty_mult)},
 	{NAME("hpa_sec_nshards"),	CTL(opt_hpa_sec_nshards)},
 	{NAME("hpa_sec_max_alloc"),	CTL(opt_hpa_sec_max_alloc)},
@@ -2116,6 +2118,8 @@ CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool)
 CTL_RO_NL_GEN(opt_hpa_hugification_threshold,
     opt_hpa_opts.hugification_threshold, size_t)
 CTL_RO_NL_GEN(opt_hpa_hugify_delay_ms, opt_hpa_opts.hugify_delay_ms, uint64_t)
+CTL_RO_NL_GEN(opt_hpa_min_purge_interval_ms, opt_hpa_opts.min_purge_interval_ms,
+    uint64_t)
 
 /*
  * This will have to change before we publicly document this option; fxp_t and
diff --git a/src/jemalloc.c b/src/jemalloc.c
index d5e886e7..66e36855 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1447,9 +1447,15 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				CONF_CONTINUE;
 			}
 
-			CONF_HANDLE_SIZE_T(
+			CONF_HANDLE_UINT64_T(
 			    opt_hpa_opts.hugify_delay_ms, "hpa_hugify_delay_ms",
-			    0, 0, CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
+			    0, 0, CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
+			    false);
+
+			CONF_HANDLE_UINT64_T(
+			    opt_hpa_opts.min_purge_interval_ms,
+			    "hpa_min_purge_interval_ms", 0, 0,
+			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false);
 
 			if (CONF_MATCH("hpa_dirty_mult")) {
 				if (CONF_MATCH_VALUE("-1")) {
diff --git a/src/stats.c b/src/stats.c
index 3a2806ed..25ee2355 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1472,6 +1472,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_SIZE_T("hpa_slab_max_alloc")
 	OPT_WRITE_SIZE_T("hpa_hugification_threshold")
 	OPT_WRITE_UINT64("hpa_hugify_delay_ms")
+	OPT_WRITE_UINT64("hpa_min_purge_interval_ms")
 	if (je_mallctl("opt.hpa_dirty_mult", (void *)&u32v, &u32sz, NULL, 0)
 	    == 0) {
 		/*

From 8229cc77c51109737774bcd053adab001de21e0e Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Wed, 18 Aug 2021 19:24:37 -0700
Subject: [PATCH 2111/2608] Wake up background threads on demand

This change allows every allocator conforming to PAI communicate that it
deferred some work for the future. Without it if a background thread goes into
indefinite sleep, there is no way to notify it about upcoming deferred work.
---
 include/jemalloc/internal/arena_externs.h     |   2 +-
 .../internal/background_thread_externs.h      |   2 +-
 .../internal/background_thread_structs.h      |   2 +-
 include/jemalloc/internal/hpa.h               |   5 +
 include/jemalloc/internal/pa.h                |   9 +-
 include/jemalloc/internal/pai.h               |  50 ++++---
 src/arena.c                                   | 113 +++++++++-------
 src/background_thread.c                       |  20 ++-
 src/decay.c                                   |   2 +-
 src/hpa.c                                     | 122 ++++++++++++++----
 src/large.c                                   |  26 ++--
 src/pa.c                                      |  25 ++--
 src/pac.c                                     |  28 ++--
 src/pai.c                                     |  14 +-
 src/sec.c                                     |  49 ++++---
 test/unit/decay.c                             |   2 +-
 test/unit/hpa.c                               |  46 +++++--
 test/unit/hpa_background_thread.c             |  18 +++
 test/unit/hpa_background_thread.sh            |   2 +-
 test/unit/pa.c                                |   7 +-
 test/unit/sec.c                               | 115 +++++++++++------
 21 files changed, 445 insertions(+), 214 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 02e7c1cc..b9231c5d 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -42,7 +42,7 @@ void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
     bin_stats_data_t *bstats, arena_stats_large_t *lstats,
     pac_estats_t *estats, hpa_shard_stats_t *hpastats, sec_stats_t *secstats);
-void arena_handle_new_dirty_pages(tsdn_t *tsdn, arena_t *arena);
+void arena_handle_deferred_work(tsdn_t *tsdn, arena_t *arena);
 edata_t *arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena,
     size_t usize, size_t alignment, bool zero);
 void arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena,
diff --git a/include/jemalloc/internal/background_thread_externs.h b/include/jemalloc/internal/background_thread_externs.h
index 3d1ea6ce..a2d79adf 100644
--- a/include/jemalloc/internal/background_thread_externs.h
+++ b/include/jemalloc/internal/background_thread_externs.h
@@ -13,7 +13,7 @@ extern background_thread_info_t *background_thread_info;
 bool background_thread_create(tsd_t *tsd, unsigned arena_ind);
 bool background_threads_enable(tsd_t *tsd);
 bool background_threads_disable(tsd_t *tsd);
-bool background_thread_running(background_thread_info_t* info);
+bool background_thread_is_started(background_thread_info_t* info);
 void background_thread_wakeup_early(background_thread_info_t *info,
     nstime_t *remaining_sleep);
 void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
diff --git a/include/jemalloc/internal/background_thread_structs.h b/include/jemalloc/internal/background_thread_structs.h
index b884b682..83a91984 100644
--- a/include/jemalloc/internal/background_thread_structs.h
+++ b/include/jemalloc/internal/background_thread_structs.h
@@ -20,7 +20,7 @@
 #define BACKGROUND_THREAD_HPA_INTERVAL_MAX_DEFAULT_WHEN_ENABLED 5000
 
 #define BACKGROUND_THREAD_DEFERRED_MIN UINT64_C(0)
-#define BACKGROUND_THREAD_DEFERRED_MAX UINT64_C(-1)
+#define BACKGROUND_THREAD_DEFERRED_MAX UINT64_MAX
 
 typedef enum {
 	background_thread_stopped,
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 46878a89..f3562853 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -136,6 +136,11 @@ struct hpa_shard_s {
 	 * stats.
 	 */
 	hpa_shard_nonderived_stats_t stats;
+
+	/*
+	 * Last time we performed purge on this shard.
+	 */
+	nstime_t last_purge;
 };
 
 /*
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index b2fed594..97834131 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -167,16 +167,17 @@ void pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard);
 
 /* Gets an edata for the given allocation. */
 edata_t *pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size,
-    size_t alignment, bool slab, szind_t szind, bool zero);
+    size_t alignment, bool slab, szind_t szind, bool zero,
+    bool *deferred_work_generated);
 /* Returns true on error, in which case nothing changed. */
 bool pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
-    size_t new_size, szind_t szind, bool zero);
+    size_t new_size, szind_t szind, bool zero, bool *deferred_work_generated);
 /*
  * The same.  Sets *generated_dirty to true if we produced new dirty pages, and
  * false otherwise.
  */
 bool pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
-    size_t new_size, szind_t szind, bool *generated_dirty);
+    size_t new_size, szind_t szind, bool *deferred_work_generated);
 /*
  * Frees the given edata back to the pa.  Sets *generated_dirty if we produced
  * new dirty pages (well, we alwyas set it for now; but this need not be the
@@ -185,7 +186,7 @@ bool pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
  * consistent with the shrink pathway and our error codes here).
  */
 void pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
-    bool *generated_dirty);
+    bool *deferred_work_generated);
 bool pa_decay_ms_set(tsdn_t *tsdn, pa_shard_t *shard, extent_state_t state,
     ssize_t decay_ms, pac_purge_eagerness_t eagerness);
 ssize_t pa_decay_ms_get(pa_shard_t *shard, extent_state_t state);
diff --git a/include/jemalloc/internal/pai.h b/include/jemalloc/internal/pai.h
index 7179fd36..ca5f616a 100644
--- a/include/jemalloc/internal/pai.h
+++ b/include/jemalloc/internal/pai.h
@@ -7,7 +7,7 @@ typedef struct pai_s pai_t;
 struct pai_s {
 	/* Returns NULL on failure. */
 	edata_t *(*alloc)(tsdn_t *tsdn, pai_t *self, size_t size,
-	    size_t alignment, bool zero);
+	    size_t alignment, bool zero, bool *deferred_work_generated);
 	/*
 	 * Returns the number of extents added to the list (which may be fewer
 	 * than requested, in case of OOM).  The list should already be
@@ -15,15 +15,18 @@ struct pai_s {
 	 * the results are not necessarily zeroed.
 	 */
 	size_t (*alloc_batch)(tsdn_t *tsdn, pai_t *self, size_t size,
-	    size_t nallocs, edata_list_active_t *results);
+	    size_t nallocs, edata_list_active_t *results,
+	    bool *deferred_work_generated);
 	bool (*expand)(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-	    size_t old_size, size_t new_size, bool zero);
+	    size_t old_size, size_t new_size, bool zero,
+	    bool *deferred_work_generated);
 	bool (*shrink)(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-	    size_t old_size, size_t new_size);
-	void (*dalloc)(tsdn_t *tsdn, pai_t *self, edata_t *edata);
+	    size_t old_size, size_t new_size, bool *deferred_work_generated);
+	void (*dalloc)(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+	    bool *deferred_work_generated);
 	/* This function empties out list as a side-effect of being called. */
 	void (*dalloc_batch)(tsdn_t *tsdn, pai_t *self,
-	    edata_list_active_t *list);
+	    edata_list_active_t *list, bool *deferred_work_generated);
 	uint64_t (*time_until_deferred_work)(tsdn_t *tsdn, pai_t *self);
 };
 
@@ -33,36 +36,43 @@ struct pai_s {
  */
 
 static inline edata_t *
-pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {
-	return self->alloc(tsdn, self, size, alignment, zero);
+pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
+    bool *deferred_work_generated) {
+	return self->alloc(tsdn, self, size, alignment, zero,
+	    deferred_work_generated);
 }
 
 static inline size_t
 pai_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
-    edata_list_active_t *results) {
-	return self->alloc_batch(tsdn, self, size, nallocs, results);
+    edata_list_active_t *results, bool *deferred_work_generated) {
+	return self->alloc_batch(tsdn, self, size, nallocs, results,
+	    deferred_work_generated);
 }
 
 static inline bool
 pai_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
-    size_t new_size, bool zero) {
-	return self->expand(tsdn, self, edata, old_size, new_size, zero);
+    size_t new_size, bool zero, bool *deferred_work_generated) {
+	return self->expand(tsdn, self, edata, old_size, new_size, zero,
+	    deferred_work_generated);
 }
 
 static inline bool
 pai_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
-    size_t new_size) {
-	return self->shrink(tsdn, self, edata, old_size, new_size);
+    size_t new_size, bool *deferred_work_generated) {
+	return self->shrink(tsdn, self, edata, old_size, new_size,
+	    deferred_work_generated);
 }
 
 static inline void
-pai_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
-	self->dalloc(tsdn, self, edata);
+pai_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    bool *deferred_work_generated) {
+	self->dalloc(tsdn, self, edata, deferred_work_generated);
 }
 
 static inline void
-pai_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list) {
-	self->dalloc_batch(tsdn, self, list);
+pai_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list,
+    bool *deferred_work_generated) {
+	self->dalloc_batch(tsdn, self, list, deferred_work_generated);
 }
 
 static inline uint64_t
@@ -75,9 +85,9 @@ pai_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
  * each item in the list.
  */
 size_t pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t nallocs, edata_list_active_t *results);
+    size_t nallocs, edata_list_active_t *results, bool *deferred_work_generated);
 /* Ditto, for dalloc. */
 void pai_dalloc_batch_default(tsdn_t *tsdn, pai_t *self,
-    edata_list_active_t *list);
+    edata_list_active_t *list, bool *deferred_work_generated);
 
 #endif /* JEMALLOC_INTERNAL_PAI_H */
diff --git a/src/arena.c b/src/arena.c
index 3dd77824..c720bcb1 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -199,15 +199,17 @@ arena_background_thread_inactivity_check(tsdn_t *tsdn, arena_t *arena,
 	}
 }
 
-void arena_handle_new_dirty_pages(tsdn_t *tsdn, arena_t *arena) {
+/*
+ * React to deferred work generated by a PAI function.
+ */
+void arena_handle_deferred_work(tsdn_t *tsdn, arena_t *arena) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	if (arena_decay_ms_get(arena, extent_state_dirty) == 0) {
+	if (decay_immediately(&arena->pa_shard.pac.decay_dirty)) {
 		arena_decay_dirty(tsdn, arena, false, true);
-	} else {
-		arena_background_thread_inactivity_check(tsdn, arena, false);
 	}
+	arena_background_thread_inactivity_check(tsdn, arena, false);
 }
 
 static void *
@@ -316,11 +318,14 @@ arena_large_ralloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t oldusize,
 edata_t *
 arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool zero) {
+	bool deferred_work_generated;
 	szind_t szind = sz_size2index(usize);
 	size_t esize = usize + sz_large_pad;
 
 	edata_t *edata = pa_alloc(tsdn, &arena->pa_shard, esize, alignment,
-	    /* slab */ false, szind, zero);
+	    /* slab */ false, szind, zero, &deferred_work_generated);
+
+	assert(deferred_work_generated == false);
 
 	if (edata != NULL) {
 		if (config_stats) {
@@ -471,6 +476,45 @@ arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) {
 	arena_decay_muzzy(tsdn, arena, is_background_thread, all);
 }
 
+static bool
+arena_should_decay_early(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
+    background_thread_info_t *info, nstime_t *remaining_sleep,
+    size_t npages_new) {
+	malloc_mutex_assert_owner(tsdn, &info->mtx);
+
+	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
+		return false;
+	}
+
+	if (!decay_gradually(decay)) {
+		malloc_mutex_unlock(tsdn, &decay->mtx);
+		return false;
+	}
+
+	nstime_init(remaining_sleep, background_thread_wakeup_time_get(info));
+	if (nstime_compare(remaining_sleep, &decay->epoch) <= 0) {
+		malloc_mutex_unlock(tsdn, &decay->mtx);
+		return false;
+	}
+	nstime_subtract(remaining_sleep, &decay->epoch);
+	if (npages_new > 0) {
+		uint64_t npurge_new = decay_npages_purge_in(decay,
+		    remaining_sleep, npages_new);
+		info->npages_to_purge_new += npurge_new;
+	}
+	malloc_mutex_unlock(tsdn, &decay->mtx);
+	return info->npages_to_purge_new >
+	    ARENA_DEFERRED_PURGE_NPAGES_THRESHOLD;
+}
+
+/*
+ * Check if deferred work needs to be done sooner than planned.
+ * For decay we might want to wake up earlier because of an influx of dirty
+ * pages. Rather than waiting for previously estimated time, we proactively
+ * purge those pages.
+ * If background thread sleeps indefinitely, always wake up because some
+ * deferred work has been generated.
+ */
 static void
 arena_maybe_do_deferred_work(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
     size_t npages_new) {
@@ -485,47 +529,18 @@ arena_maybe_do_deferred_work(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 		 */
 		return;
 	}
-	if (!background_thread_running(info)) {
+	if (!background_thread_is_started(info)) {
 		goto label_done;
 	}
-	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
-		goto label_done;
-	}
-	if (!decay_gradually(decay)) {
-		goto label_done_unlock2;
-	}
 
-	nstime_t diff;
-	nstime_init(&diff, background_thread_wakeup_time_get(info));
-	if (nstime_compare(&diff, &decay->epoch) <= 0) {
-		goto label_done_unlock2;
-	}
-	nstime_subtract(&diff, &decay->epoch);
-
-	if (npages_new > 0) {
-		uint64_t npurge_new = decay_npages_purge_in(decay, &diff,
-		    npages_new);
-		info->npages_to_purge_new += npurge_new;
-	}
-
-	bool should_signal;
-	if (info->npages_to_purge_new > ARENA_DEFERRED_PURGE_NPAGES_THRESHOLD) {
-		should_signal = true;
-	} else if (unlikely(background_thread_indefinite_sleep(info)) &&
-	    (ecache_npages_get(&arena->pa_shard.pac.ecache_dirty) > 0 ||
-	    ecache_npages_get(&arena->pa_shard.pac.ecache_muzzy) > 0 ||
-	    info->npages_to_purge_new > 0)) {
-		should_signal = true;
-	} else {
-		should_signal = false;
-	}
-
-	if (should_signal) {
+	nstime_t remaining_sleep;
+	if (background_thread_indefinite_sleep(info)) {
+		background_thread_wakeup_early(info, NULL);
+	} else if (arena_should_decay_early(tsdn, arena, decay, info,
+	    &remaining_sleep, npages_new)) {
 		info->npages_to_purge_new = 0;
-		background_thread_wakeup_early(info, &diff);
+		background_thread_wakeup_early(info, &remaining_sleep);
 	}
-label_done_unlock2:
-	malloc_mutex_unlock(tsdn, &decay->mtx);
 label_done:
 	malloc_mutex_unlock(tsdn, &info->mtx);
 }
@@ -539,10 +554,10 @@ arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena) {
 
 void
 arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab) {
-	bool generated_dirty;
-	pa_dalloc(tsdn, &arena->pa_shard, slab, &generated_dirty);
-	if (generated_dirty) {
-		arena_handle_new_dirty_pages(tsdn, arena);
+	bool deferred_work_generated;
+	pa_dalloc(tsdn, &arena->pa_shard, slab, &deferred_work_generated);
+	if (deferred_work_generated) {
+		arena_handle_deferred_work(tsdn, arena);
 	}
 }
 
@@ -803,11 +818,17 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
 static edata_t *
 arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard,
     const bin_info_t *bin_info) {
+	bool deferred_work_generated;
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
 	edata_t *slab = pa_alloc(tsdn, &arena->pa_shard, bin_info->slab_size,
-	    PAGE, /* slab */ true, /* szind */ binind, /* zero */ false);
+	    PAGE, /* slab */ true, /* szind */ binind, /* zero */ false,
+	    &deferred_work_generated);
+
+	if (deferred_work_generated) {
+		arena_handle_deferred_work(tsdn, arena);
+	}
 
 	if (slab == NULL) {
 		return NULL;
diff --git a/src/background_thread.c b/src/background_thread.c
index 9e577cb3..69ef983f 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -119,7 +119,8 @@ background_thread_sleep(tsdn_t *tsdn, background_thread_info_t *info,
 
 	int ret;
 	if (interval == BACKGROUND_THREAD_INDEFINITE_SLEEP) {
-		assert(background_thread_indefinite_sleep(info));
+		background_thread_wakeup_time_set(tsdn, info,
+		    BACKGROUND_THREAD_INDEFINITE_SLEEP);
 		ret = pthread_cond_wait(&info->cond, &info->mtx.lock);
 		assert(ret == 0);
 	} else {
@@ -144,8 +145,6 @@ background_thread_sleep(tsdn_t *tsdn, background_thread_info_t *info,
 		assert(!background_thread_indefinite_sleep(info));
 		ret = pthread_cond_timedwait(&info->cond, &info->mtx.lock, &ts);
 		assert(ret == ETIMEDOUT || ret == 0);
-		background_thread_wakeup_time_set(tsdn, info,
-		    BACKGROUND_THREAD_INDEFINITE_SLEEP);
 	}
 	if (config_stats) {
 		gettimeofday(&tv, NULL);
@@ -177,13 +176,21 @@ background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info,
     unsigned ind) {
 	uint64_t ns_until_deferred = BACKGROUND_THREAD_DEFERRED_MAX;
 	unsigned narenas = narenas_total_get();
+	bool slept_indefinitely = background_thread_indefinite_sleep(info);
 
 	for (unsigned i = ind; i < narenas; i += max_background_threads) {
 		arena_t *arena = arena_get(tsdn, i, false);
 		if (!arena) {
 			continue;
 		}
-		arena_do_deferred_work(tsdn, arena);
+		/*
+		 * If thread was woken up from the indefinite sleep, don't
+		 * do the work instantly, but rather check when the deferred
+		 * work that caused this thread to wake up is scheduled for.
+		 */
+		if (!slept_indefinitely) {
+			arena_do_deferred_work(tsdn, arena);
+		}
 		if (ns_until_deferred <= BACKGROUND_THREAD_MIN_INTERVAL_NS) {
 			/* Min interval will be used. */
 			continue;
@@ -574,7 +581,7 @@ background_threads_disable(tsd_t *tsd) {
 }
 
 bool
-background_thread_running(background_thread_info_t *info) {
+background_thread_is_started(background_thread_info_t *info) {
 	return info->state == background_thread_started;
 }
 
@@ -586,7 +593,8 @@ background_thread_wakeup_early(background_thread_info_t *info,
 	 * we know that background thread wakes up soon, so the time to cache
 	 * the just freed memory is bounded and low.
 	 */
-	if (nstime_ns(remaining_sleep) < BACKGROUND_THREAD_MIN_INTERVAL_NS) {
+	if (remaining_sleep && nstime_ns(remaining_sleep) <
+	    BACKGROUND_THREAD_MIN_INTERVAL_NS) {
 		return;
 	}
 	pthread_cond_signal(&info->cond);
diff --git a/src/decay.c b/src/decay.c
index cdb8487b..d801b2bc 100644
--- a/src/decay.c
+++ b/src/decay.c
@@ -3,7 +3,7 @@
 
 #include "jemalloc/internal/decay.h"
 
-const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
+static const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
 #define STEP(step, h, x, y)			\
 		h,
 		SMOOTHSTEP
diff --git a/src/hpa.c b/src/hpa.c
index d45a3bd0..d7422a3c 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -9,16 +9,17 @@
 #define HPA_EDEN_SIZE (128 * HUGEPAGE)
 
 static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t alignment, bool zero);
+    size_t alignment, bool zero, bool *deferred_work_generated);
 static size_t hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t nallocs, edata_list_active_t *results);
+    size_t nallocs, edata_list_active_t *results, bool *deferred_work_generated);
 static bool hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool zero);
+    size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
 static bool hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size);
-static void hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata);
+    size_t old_size, size_t new_size, bool *deferred_work_generated);
+static void hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    bool *deferred_work_generated);
 static void hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self,
-    edata_list_active_t *list);
+    edata_list_active_t *list, bool *deferred_work_generated);
 static uint64_t hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self);
 
 bool
@@ -366,6 +367,13 @@ hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard,
 	}
 }
 
+static bool
+hpa_shard_has_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	hpdata_t *to_hugify = psset_pick_hugify(&shard->psset);
+	return to_hugify != NULL || hpa_should_purge(tsdn, shard);
+}
+
 /* Returns whether or not we purged anything. */
 static bool
 hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
@@ -429,6 +437,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	shard->npending_purge -= num_to_purge;
 	shard->stats.npurge_passes++;
 	shard->stats.npurges += purges_this_pass;
+	shard->central->hooks.curtime(&shard->last_purge);
 	if (dehugify) {
 		shard->stats.ndehugifies++;
 	}
@@ -615,7 +624,8 @@ hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 
 static size_t
 hpa_try_alloc_batch_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
-    bool *oom, size_t nallocs, edata_list_active_t *results) {
+    bool *oom, size_t nallocs, edata_list_active_t *results,
+    bool *deferred_work_generated) {
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	size_t nsuccess = 0;
 	for (; nsuccess < nallocs; nsuccess++) {
@@ -628,18 +638,20 @@ hpa_try_alloc_batch_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 	}
 
 	hpa_shard_maybe_do_deferred_work(tsdn, shard, /* forced */ false);
+	*deferred_work_generated = hpa_shard_has_deferred_work(tsdn, shard);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 	return nsuccess;
 }
 
 static size_t
 hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
-    size_t nallocs, edata_list_active_t *results) {
+    size_t nallocs, edata_list_active_t *results,
+    bool *deferred_work_generated) {
 	assert(size <= shard->opts.slab_max_alloc);
 	bool oom = false;
 
 	size_t nsuccess = hpa_try_alloc_batch_no_grow(tsdn, shard, size, &oom,
-	    nallocs, results);
+	    nallocs, results, deferred_work_generated);
 
 	if (nsuccess == nallocs || oom) {
 		return nsuccess;
@@ -655,7 +667,7 @@ hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 	 * in between when we dropped the main mutex and grabbed the grow mutex.
 	 */
 	nsuccess += hpa_try_alloc_batch_no_grow(tsdn, shard, size, &oom,
-	    nallocs - nsuccess, results);
+	    nallocs - nsuccess, results, deferred_work_generated);
 	if (nsuccess == nallocs || oom) {
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 		return nsuccess;
@@ -683,7 +695,7 @@ hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 
 	nsuccess += hpa_try_alloc_batch_no_grow(tsdn, shard, size, &oom,
-	    nallocs - nsuccess, results);
+	    nallocs - nsuccess, results, deferred_work_generated);
 	/*
 	 * Drop grow_mtx before doing deferred work; other threads blocked on it
 	 * should be allowed to proceed while we're working.
@@ -704,7 +716,7 @@ hpa_from_pai(pai_t *self) {
 
 static size_t
 hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
-    edata_list_active_t *results) {
+    edata_list_active_t *results, bool *deferred_work_generated) {
 	assert(nallocs > 0);
 	assert((size & PAGE_MASK) == 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -716,7 +728,7 @@ hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
 	}
 
 	size_t nsuccess = hpa_alloc_batch_psset(tsdn, shard, size, nallocs,
-	    results);
+	    results, deferred_work_generated);
 
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
@@ -737,7 +749,8 @@ hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
 }
 
 static edata_t *
-hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {
+hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
+    bool *deferred_work_generated) {
 	assert((size & PAGE_MASK) == 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
@@ -753,23 +766,25 @@ hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {
 	edata_list_active_t results;
 	edata_list_active_init(&results);
 	size_t nallocs = hpa_alloc_batch(tsdn, self, size, /* nallocs */ 1,
-	    &results);
+	    &results, deferred_work_generated);
 	assert(nallocs == 0 || nallocs == 1);
 	edata_t *edata = edata_list_active_first(&results);
 	return edata;
 }
 
 static bool
-hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool zero) {
+hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
+    size_t new_size, bool zero, bool *deferred_work_generated) {
 	/* Expand not yet supported. */
+	*deferred_work_generated = false;
 	return true;
 }
 
 static bool
 hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size) {
+    size_t old_size, size_t new_size, bool *deferred_work_generated) {
 	/* Shrink not yet supported. */
+	*deferred_work_generated = false;
 	return true;
 }
 
@@ -825,7 +840,8 @@ hpa_dalloc_locked(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) {
 }
 
 static void
-hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list) {
+hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list,
+    bool *deferred_work_generated) {
 	hpa_shard_t *shard = hpa_from_pai(self);
 
 	edata_t *edata;
@@ -840,21 +856,83 @@ hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list) {
 		hpa_dalloc_locked(tsdn, shard, edata);
 	}
 	hpa_shard_maybe_do_deferred_work(tsdn, shard, /* forced */ false);
+	*deferred_work_generated =
+	    hpa_shard_has_deferred_work(tsdn, shard);
+
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 }
 
 static void
-hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
+hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    bool *deferred_work_generated) {
 	/* Just a dalloc_batch of size 1; this lets us share logic. */
 	edata_list_active_t dalloc_list;
 	edata_list_active_init(&dalloc_list);
 	edata_list_active_append(&dalloc_list, edata);
-	hpa_dalloc_batch(tsdn, self, &dalloc_list);
+	hpa_dalloc_batch(tsdn, self, &dalloc_list, deferred_work_generated);
 }
 
+/*
+ * Calculate time until either purging or hugification ought to happen.
+ * Called by background threads.
+ */
 static uint64_t
 hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
-	return opt_background_thread_hpa_interval_max_ms;
+	hpa_shard_t *shard = hpa_from_pai(self);
+	uint64_t time_ns = BACKGROUND_THREAD_DEFERRED_MAX;
+
+	malloc_mutex_lock(tsdn, &shard->mtx);
+
+	hpdata_t *to_hugify = psset_pick_hugify(&shard->psset);
+	if (to_hugify != NULL) {
+		nstime_t time_hugify_allowed =
+		    hpdata_time_hugify_allowed(to_hugify);
+		nstime_t nstime;
+		shard->central->hooks.curtime(&nstime);
+		nstime_subtract(&nstime, &time_hugify_allowed);
+		uint64_t since_hugify_allowed_ms = nstime_msec(&nstime);
+		/*
+		 * If not enough time has passed since hugification was allowed,
+		 * sleep for the rest.
+		 */
+		if (since_hugify_allowed_ms < shard->opts.hugify_delay_ms) {
+			time_ns = shard->opts.hugify_delay_ms - since_hugify_allowed_ms;
+			time_ns *= 1000 * 1000;
+		} else {
+			malloc_mutex_unlock(tsdn, &shard->mtx);
+			return BACKGROUND_THREAD_DEFERRED_MIN;
+		}
+	}
+
+	if (hpa_should_purge(tsdn, shard)) {
+		/*
+		 * If we haven't purged before, no need to check interval
+		 * between purges. Simply purge as soon as possible.
+		 */
+		if (shard->stats.npurge_passes == 0) {
+			malloc_mutex_unlock(tsdn, &shard->mtx);
+			return BACKGROUND_THREAD_DEFERRED_MIN;
+		}
+		nstime_t nstime;
+		shard->central->hooks.curtime(&nstime);
+		nstime_subtract(&nstime, &shard->last_purge);
+		uint64_t since_last_purge_ms = nstime_msec(&nstime);
+
+		if (since_last_purge_ms < shard->opts.min_purge_interval_ms) {
+			uint64_t until_purge_ns;
+			until_purge_ns = shard->opts.min_purge_interval_ms -
+			    since_last_purge_ms;
+			until_purge_ns *= 1000 * 1000;
+
+			if (until_purge_ns < time_ns) {
+				time_ns = until_purge_ns;
+			}
+		} else {
+			time_ns = BACKGROUND_THREAD_DEFERRED_MIN;
+		}
+	}
+	malloc_mutex_unlock(tsdn, &shard->mtx);
+	return time_ns;
 }
 
 void
diff --git a/src/large.c b/src/large.c
index bd29e5c5..6dbb3d91 100644
--- a/src/large.c
+++ b/src/large.c
@@ -64,14 +64,15 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, edata_t *edata, size_t usize) {
 		return true;
 	}
 
-	bool generated_dirty;
+	bool deferred_work_generated;
 	bool err = pa_shrink(tsdn, &arena->pa_shard, edata, old_size,
-	    usize + sz_large_pad, sz_size2index(usize), &generated_dirty);
+	    usize + sz_large_pad, sz_size2index(usize),
+	    &deferred_work_generated);
 	if (err) {
 		return true;
 	}
-	if (generated_dirty) {
-		arena_handle_new_dirty_pages(tsdn, arena);
+	if (deferred_work_generated) {
+		arena_handle_deferred_work(tsdn, arena);
 	}
 	arena_extent_ralloc_large_shrink(tsdn, arena, edata, old_usize);
 
@@ -88,8 +89,15 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 	size_t new_size = usize + sz_large_pad;
 
 	szind_t szind = sz_size2index(usize);
+
+	bool deferred_work_generated;
 	bool err = pa_expand(tsdn, &arena->pa_shard, edata, old_size, new_size,
-	    szind, zero);
+	    szind, zero, &deferred_work_generated);
+
+	if (deferred_work_generated) {
+		arena_handle_deferred_work(tsdn, arena);
+	}
+
 	if (err) {
 		return true;
 	}
@@ -241,10 +249,10 @@ large_dalloc_prep_impl(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 
 static void
 large_dalloc_finish_impl(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
-	bool generated_dirty;
-	pa_dalloc(tsdn, &arena->pa_shard, edata, &generated_dirty);
-	if (generated_dirty) {
-		arena_handle_new_dirty_pages(tsdn, arena);
+	bool deferred_work_generated;
+	pa_dalloc(tsdn, &arena->pa_shard, edata, &deferred_work_generated);
+	if (deferred_work_generated) {
+		arena_handle_deferred_work(tsdn, arena);
 	}
 }
 
diff --git a/src/pa.c b/src/pa.c
index c5b8daa7..a29e10b6 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -118,21 +118,23 @@ pa_get_pai(pa_shard_t *shard, edata_t *edata) {
 
 edata_t *
 pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
-    bool slab, szind_t szind, bool zero) {
+    bool slab, szind_t szind, bool zero, bool *deferred_work_generated) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
 	edata_t *edata = NULL;
+	*deferred_work_generated = false;
 	if (pa_shard_uses_hpa(shard)) {
 		edata = pai_alloc(tsdn, &shard->hpa_sec.pai, size, alignment,
-		    zero);
+		    zero, deferred_work_generated);
 	}
 	/*
 	 * Fall back to the PAC if the HPA is off or couldn't serve the given
 	 * allocation request.
 	 */
 	if (edata == NULL) {
-		edata = pai_alloc(tsdn, &shard->pac.pai, size, alignment, zero);
+		edata = pai_alloc(tsdn, &shard->pac.pai, size, alignment, zero,
+		    deferred_work_generated);
 	}
 
 	if (edata != NULL) {
@@ -152,7 +154,7 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 
 bool
 pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
-    size_t new_size, szind_t szind, bool zero) {
+    size_t new_size, szind_t szind, bool zero, bool *deferred_work_generated) {
 	assert(new_size > old_size);
 	assert(edata_size_get(edata) == old_size);
 	assert((new_size & PAGE_MASK) == 0);
@@ -161,7 +163,8 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 
 	pai_t *pai = pa_get_pai(shard, edata);
 
-	bool error = pai_expand(tsdn, pai, edata, old_size, new_size, zero);
+	bool error = pai_expand(tsdn, pai, edata, old_size, new_size, zero,
+	    deferred_work_generated);
 	if (error) {
 		return true;
 	}
@@ -174,20 +177,19 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 
 bool
 pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
-    size_t new_size, szind_t szind, bool *generated_dirty) {
+    size_t new_size, szind_t szind, bool *deferred_work_generated) {
 	assert(new_size < old_size);
 	assert(edata_size_get(edata) == old_size);
 	assert((new_size & PAGE_MASK) == 0);
 	size_t shrink_amount = old_size - new_size;
 
-	*generated_dirty = false;
 	pai_t *pai = pa_get_pai(shard, edata);
-	bool error = pai_shrink(tsdn, pai, edata, old_size, new_size);
+	bool error = pai_shrink(tsdn, pai, edata, old_size, new_size,
+	    deferred_work_generated);
 	if (error) {
 		return true;
 	}
 	pa_nactive_sub(shard, shrink_amount >> LG_PAGE);
-	*generated_dirty = (edata_pai_get(edata) == EXTENT_PAI_PAC);
 
 	edata_szind_set(edata, szind);
 	emap_remap(tsdn, shard->emap, edata, szind, /* slab */ false);
@@ -196,7 +198,7 @@ pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 
 void
 pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
-    bool *generated_dirty) {
+    bool *deferred_work_generated) {
 	emap_remap(tsdn, shard->emap, edata, SC_NSIZES, /* slab */ false);
 	if (edata_slab_get(edata)) {
 		emap_deregister_interior(tsdn, shard->emap, edata);
@@ -206,8 +208,7 @@ pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
 	edata_szind_set(edata, SC_NSIZES);
 	pa_nactive_sub(shard, edata_size_get(edata) >> LG_PAGE);
 	pai_t *pai = pa_get_pai(shard, edata);
-	pai_dalloc(tsdn, pai, edata);
-	*generated_dirty = (edata_pai_get(edata) == EXTENT_PAI_PAC);
+	pai_dalloc(tsdn, pai, edata, deferred_work_generated);
 }
 
 bool
diff --git a/src/pac.c b/src/pac.c
index c611d919..2221c8db 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -4,12 +4,13 @@
 #include "jemalloc/internal/pac.h"
 
 static edata_t *pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t alignment, bool zero);
+    size_t alignment, bool zero, bool *deferred_work_generated);
 static bool pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool zero);
+    size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
 static bool pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size);
-static void pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata);
+    size_t old_size, size_t new_size, bool *deferred_work_generated);
+static void pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    bool *deferred_work_generated);
 static uint64_t pac_time_until_deferred_work(tsdn_t *tsdn, pai_t *self);
 
 static ehooks_t *
@@ -109,9 +110,11 @@ pac_may_have_muzzy(pac_t *pac) {
 
 static edata_t *
 pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
-    bool zero) {
+    bool zero, bool *deferred_work_generated) {
 	pac_t *pac = (pac_t *)self;
 
+	*deferred_work_generated = false;
+
 	ehooks_t *ehooks = pac_ehooks_get(pac);
 	edata_t *edata = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_dirty,
 	    NULL, size, alignment, zero);
@@ -133,10 +136,12 @@ pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
 
 static bool
 pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
-    size_t new_size, bool zero) {
+    size_t new_size, bool zero, bool *deferred_work_generated) {
 	pac_t *pac = (pac_t *)self;
 	ehooks_t *ehooks = pac_ehooks_get(pac);
 
+	*deferred_work_generated = false;
+
 	size_t mapped_add = 0;
 	size_t expand_amount = new_size - old_size;
 
@@ -171,12 +176,13 @@ pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 
 static bool
 pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
-    size_t new_size) {
+    size_t new_size, bool *deferred_work_generated) {
 	pac_t *pac = (pac_t *)self;
-
 	ehooks_t *ehooks = pac_ehooks_get(pac);
+
 	size_t shrink_amount = old_size - new_size;
 
+	*deferred_work_generated = false;
 
 	if (ehooks_split_will_fail(ehooks)) {
 		return true;
@@ -188,14 +194,18 @@ pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 		return true;
 	}
 	ecache_dalloc(tsdn, pac, ehooks, &pac->ecache_dirty, trail);
+	*deferred_work_generated = true;
 	return false;
 }
 
 static void
-pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
+pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    bool *deferred_work_generated) {
 	pac_t *pac = (pac_t *)self;
 	ehooks_t *ehooks = pac_ehooks_get(pac);
 	ecache_dalloc(tsdn, pac, ehooks, &pac->ecache_dirty, edata);
+	/* Purging of deallocated pages is deferred */
+	*deferred_work_generated = true;
 }
 
 static uint64_t
diff --git a/src/pai.c b/src/pai.c
index bd6966c9..e863a9be 100644
--- a/src/pai.c
+++ b/src/pai.c
@@ -2,11 +2,13 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 size_t
-pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t nallocs, edata_list_active_t *results) {
+pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
+    edata_list_active_t *results, bool *deferred_work_generated) {
 	for (size_t i = 0; i < nallocs; i++) {
+		bool deferred_by_alloc = false;
 		edata_t *edata = pai_alloc(tsdn, self, size, PAGE,
-		    /* zero */ false);
+		    /* zero */ false, &deferred_by_alloc);
+		*deferred_work_generated |= deferred_by_alloc;
 		if (edata == NULL) {
 			return i;
 		}
@@ -17,10 +19,12 @@ pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size,
 
 void
 pai_dalloc_batch_default(tsdn_t *tsdn, pai_t *self,
-    edata_list_active_t *list) {
+    edata_list_active_t *list, bool *deferred_work_generated) {
 	edata_t *edata;
 	while ((edata = edata_list_active_first(list)) != NULL) {
+		bool deferred_by_dalloc = false;
 		edata_list_active_remove(list, edata);
-		pai_dalloc(tsdn, self, edata);
+		pai_dalloc(tsdn, self, edata, &deferred_by_dalloc);
+		*deferred_work_generated |= deferred_by_dalloc;
 	}
 }
diff --git a/src/sec.c b/src/sec.c
index 41753464..c6f611f5 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -4,12 +4,13 @@
 #include "jemalloc/internal/sec.h"
 
 static edata_t *sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t alignment, bool zero);
+    size_t alignment, bool zero, bool *deferred_work_generated);
 static bool sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool zero);
+    size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
 static bool sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size);
-static void sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata);
+    size_t old_size, size_t new_size, bool *deferred_work_generated);
+static void sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    bool *deferred_work_generated);
 
 static void
 sec_bin_init(sec_bin_t *bin) {
@@ -147,7 +148,9 @@ sec_flush_some_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
 	}
 
 	malloc_mutex_unlock(tsdn, &shard->mtx);
-	pai_dalloc_batch(tsdn, sec->fallback, &to_flush);
+	bool deferred_work_generated;
+	pai_dalloc_batch(tsdn, sec->fallback, &to_flush,
+	    &deferred_work_generated);
 }
 
 static edata_t *
@@ -175,8 +178,9 @@ sec_batch_fill_and_alloc(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
 
 	edata_list_active_t result;
 	edata_list_active_init(&result);
+	bool deferred_work_generated;
 	size_t nalloc = pai_alloc_batch(tsdn, sec->fallback, size,
-	    1 + sec->opts.batch_fill_extra, &result);
+	    1 + sec->opts.batch_fill_extra, &result, &deferred_work_generated);
 
 	edata_t *ret = edata_list_active_first(&result);
 	if (ret != NULL) {
@@ -213,14 +217,17 @@ sec_batch_fill_and_alloc(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
 }
 
 static edata_t *
-sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {
+sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
+    bool *deferred_work_generated) {
 	assert((size & PAGE_MASK) == 0);
 
 	sec_t *sec = (sec_t *)self;
+	*deferred_work_generated = false;
 
 	if (zero || alignment > PAGE || sec->opts.nshards == 0
 	    || size > sec->opts.max_alloc) {
-		return pai_alloc(tsdn, sec->fallback, size, alignment, zero);
+		return pai_alloc(tsdn, sec->fallback, size, alignment, zero,
+		    deferred_work_generated);
 	}
 	pszind_t pszind = sz_psz2ind(size);
 	sec_shard_t *shard = sec_shard_pick(tsdn, sec);
@@ -243,7 +250,7 @@ sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {
 			    size);
 		} else {
 			edata = pai_alloc(tsdn, sec->fallback, size, alignment,
-			    zero);
+			    zero, deferred_work_generated);
 		}
 	}
 	return edata;
@@ -251,16 +258,18 @@ sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero) {
 
 static bool
 sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
-    size_t new_size, bool zero) {
+    size_t new_size, bool zero, bool *deferred_work_generated) {
 	sec_t *sec = (sec_t *)self;
-	return pai_expand(tsdn, sec->fallback, edata, old_size, new_size, zero);
+	return pai_expand(tsdn, sec->fallback, edata, old_size, new_size, zero,
+	    deferred_work_generated);
 }
 
 static bool
 sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
-    size_t new_size) {
+    size_t new_size, bool *deferred_work_generated) {
 	sec_t *sec = (sec_t *)self;
-	return pai_shrink(tsdn, sec->fallback, edata, old_size, new_size);
+	return pai_shrink(tsdn, sec->fallback, edata, old_size, new_size,
+	    deferred_work_generated);
 }
 
 static void
@@ -281,7 +290,9 @@ sec_flush_all_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
 	 * we're disabling the HPA or resetting the arena, both of which are
 	 * rare pathways.
 	 */
-	pai_dalloc_batch(tsdn, sec->fallback, &to_flush);
+	bool deferred_work_generated;
+	pai_dalloc_batch(tsdn, sec->fallback, &to_flush,
+	    &deferred_work_generated);
 }
 
 static void
@@ -317,20 +328,24 @@ sec_shard_dalloc_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
 }
 
 static void
-sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
+sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    bool *deferred_work_generated) {
 	sec_t *sec = (sec_t *)self;
 	if (sec->opts.nshards == 0
 	    || edata_size_get(edata) > sec->opts.max_alloc) {
-		pai_dalloc(tsdn, sec->fallback, edata);
+		pai_dalloc(tsdn, sec->fallback, edata,
+		    deferred_work_generated);
 		return;
 	}
 	sec_shard_t *shard = sec_shard_pick(tsdn, sec);
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	if (shard->enabled) {
+		*deferred_work_generated = false;
 		sec_shard_dalloc_and_unlock(tsdn, sec, shard, edata);
 	} else {
 		malloc_mutex_unlock(tsdn, &shard->mtx);
-		pai_dalloc(tsdn, sec->fallback, edata);
+		pai_dalloc(tsdn, sec->fallback, edata,
+		    deferred_work_generated);
 	}
 }
 
diff --git a/test/unit/decay.c b/test/unit/decay.c
index 67722199..bdb6d0a3 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -49,7 +49,7 @@ TEST_BEGIN(test_decay_npages_purge_in) {
 	expect_false(decay_init(&decay, &curtime, (ssize_t)decay_ms),
 	    "Failed to initialize decay");
 
-	const size_t new_pages = 100;
+	size_t new_pages = 100;
 
 	nstime_t time;
 	nstime_copy(&time, &decay_nstime);
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 2d4fa9b9..dc3acc08 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -79,9 +79,12 @@ TEST_BEGIN(test_alloc_max) {
 	edata_t *edata;
 
 	/* Small max */
-	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX, PAGE, false);
+	bool deferred_work_generated;
+	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX, PAGE, false,
+	    &deferred_work_generated);
 	expect_ptr_not_null(edata, "Allocation of small max failed");
-	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX + PAGE, PAGE, false);
+	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX + PAGE, PAGE, false,
+	    &deferred_work_generated);
 	expect_ptr_null(edata, "Allocation of larger than small max succeeded");
 
 	destroy_test_data(shard);
@@ -166,6 +169,8 @@ TEST_BEGIN(test_stress) {
 	mem_tree_t tree;
 	mem_tree_new(&tree);
 
+	bool deferred_work_generated;
+
 	for (size_t i = 0; i < 100 * 1000; i++) {
 		size_t operation = prng_range_zu(&prng_state, 2);
 		if (operation == 0) {
@@ -183,7 +188,8 @@ TEST_BEGIN(test_stress) {
 			size_t npages = npages_min + prng_range_zu(&prng_state,
 			    npages_max - npages_min);
 			edata_t *edata = pai_alloc(tsdn, &shard->pai,
-			    npages * PAGE, PAGE, false);
+			    npages * PAGE, PAGE, false,
+			    &deferred_work_generated);
 			assert_ptr_not_null(edata,
 			    "Unexpected allocation failure");
 			live_edatas[nlive_edatas] = edata;
@@ -199,7 +205,8 @@ TEST_BEGIN(test_stress) {
 			live_edatas[victim] = live_edatas[nlive_edatas - 1];
 			nlive_edatas--;
 			node_remove(&tree, to_free);
-			pai_dalloc(tsdn, &shard->pai, to_free);
+			pai_dalloc(tsdn, &shard->pai, to_free,
+			    &deferred_work_generated);
 		}
 	}
 
@@ -218,7 +225,8 @@ TEST_BEGIN(test_stress) {
 	for (size_t i = 0; i < nlive_edatas; i++) {
 		edata_t *to_free = live_edatas[i];
 		node_remove(&tree, to_free);
-		pai_dalloc(tsdn, &shard->pai, to_free);
+		pai_dalloc(tsdn, &shard->pai, to_free,
+		    &deferred_work_generated);
 	}
 	hpa_shard_destroy(tsdn, shard);
 
@@ -244,6 +252,8 @@ TEST_BEGIN(test_alloc_dalloc_batch) {
 	    &test_hpa_shard_opts_default);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 
+	bool deferred_work_generated;
+
 	enum {NALLOCS = 8};
 
 	edata_t *allocs[NALLOCS];
@@ -253,13 +263,13 @@ TEST_BEGIN(test_alloc_dalloc_batch) {
 	 */
 	for (size_t i = 0; i < NALLOCS / 2; i++) {
 		allocs[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE,
-		    /* zero */ false);
+		    /* zero */ false, &deferred_work_generated);
 		expect_ptr_not_null(allocs[i], "Unexpected alloc failure");
 	}
 	edata_list_active_t allocs_list;
 	edata_list_active_init(&allocs_list);
 	size_t nsuccess = pai_alloc_batch(tsdn, &shard->pai, PAGE, NALLOCS / 2,
-	    &allocs_list);
+	    &allocs_list, &deferred_work_generated);
 	expect_zu_eq(NALLOCS / 2, nsuccess, "Unexpected oom");
 	for (size_t i = NALLOCS / 2; i < NALLOCS; i++) {
 		allocs[i] = edata_list_active_first(&allocs_list);
@@ -279,15 +289,17 @@ TEST_BEGIN(test_alloc_dalloc_batch) {
 	for (size_t i = 0; i < NALLOCS / 2; i++) {
 		edata_list_active_append(&allocs_list, allocs[i]);
 	}
-	pai_dalloc_batch(tsdn, &shard->pai, &allocs_list);
+	pai_dalloc_batch(tsdn, &shard->pai, &allocs_list,
+	    &deferred_work_generated);
 	for (size_t i = NALLOCS / 2; i < NALLOCS; i++) {
-		pai_dalloc(tsdn, &shard->pai, allocs[i]);
+		pai_dalloc(tsdn, &shard->pai, allocs[i],
+		    &deferred_work_generated);
 	}
 
 	/* Reallocate (individually), and ensure reuse and contiguity. */
 	for (size_t i = 0; i < NALLOCS; i++) {
 		allocs[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE,
-		    /* zero */ false);
+		    /* zero */ false, &deferred_work_generated);
 		expect_ptr_not_null(allocs[i], "Unexpected alloc failure.");
 	}
 	void *new_base = edata_base_get(allocs[0]);
@@ -355,11 +367,14 @@ TEST_BEGIN(test_defer_time) {
 
 	hpa_shard_t *shard = create_test_data(&hooks, &opts);
 
+	bool deferred_work_generated;
+
 	nstime_init(&defer_curtime, 0);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 	edata_t *edatas[HUGEPAGE_PAGES];
 	for (int i = 0; i < (int)HUGEPAGE_PAGES; i++) {
-		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false);
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    &deferred_work_generated);
 		expect_ptr_not_null(edatas[i], "Unexpected null edata");
 	}
 	hpa_shard_do_deferred_work(tsdn, shard);
@@ -374,7 +389,8 @@ TEST_BEGIN(test_defer_time) {
 
 	/* Purge.  Recall that dirty_mult is .25. */
 	for (int i = 0; i < (int)HUGEPAGE_PAGES / 2; i++) {
-		pai_dalloc(tsdn, &shard->pai, edatas[i]);
+		pai_dalloc(tsdn, &shard->pai, edatas[i],
+		    &deferred_work_generated);
 	}
 
 	hpa_shard_do_deferred_work(tsdn, shard);
@@ -391,14 +407,16 @@ TEST_BEGIN(test_defer_time) {
 	 * be marked for pending hugify.
 	 */
 	for (int i = 0; i < (int)HUGEPAGE_PAGES / 2; i++) {
-		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false);
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    &deferred_work_generated);
 		expect_ptr_not_null(edatas[i], "Unexpected null edata");
 	}
 	/*
 	 * We would be ineligible for hugification, had we not already met the
 	 * threshold before dipping below it.
 	 */
-	pai_dalloc(tsdn, &shard->pai, edatas[0]);
+	pai_dalloc(tsdn, &shard->pai, edatas[0],
+	    &deferred_work_generated);
 	/* Wait for the threshold again. */
 	nstime_init2(&defer_curtime, 22, 0);
 	hpa_shard_do_deferred_work(tsdn, shard);
diff --git a/test/unit/hpa_background_thread.c b/test/unit/hpa_background_thread.c
index 1907a6dd..c4686831 100644
--- a/test/unit/hpa_background_thread.c
+++ b/test/unit/hpa_background_thread.c
@@ -65,6 +65,23 @@ set_background_thread_enabled(bool enabled) {
 	expect_d_eq(0, err, "Unexpected mallctl failure");
 }
 
+static void
+wait_until_thread_is_enabled(unsigned arena_id) {
+	tsd_t* tsd = tsd_fetch();
+
+	bool sleeping = false;
+	int iterations = 0;
+	do {
+		background_thread_info_t *info =
+		    background_thread_info_get(arena_id);
+		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
+		sleeping = background_thread_indefinite_sleep(info);
+		assert_d_lt(iterations, (int)1e6,
+		    "Waiting for a thread to start for too long");
+	} while (!sleeping);
+}
+
 static void
 expect_purging(unsigned arena_ind, bool expect_deferred) {
 	size_t empty_ndirty;
@@ -132,6 +149,7 @@ TEST_BEGIN(test_hpa_background_thread_enable_disable) {
 	expect_purging(arena_ind, false);
 
 	set_background_thread_enabled(true);
+	wait_until_thread_is_enabled(arena_ind);
 	expect_purging(arena_ind, true);
 }
 TEST_END
diff --git a/test/unit/hpa_background_thread.sh b/test/unit/hpa_background_thread.sh
index 811da8bd..65a56a08 100644
--- a/test/unit/hpa_background_thread.sh
+++ b/test/unit/hpa_background_thread.sh
@@ -1,4 +1,4 @@
 #!/bin/sh
 
-export MALLOC_CONF="hpa_dirty_mult:0,background_thread_hpa_interval_max_ms:50,hpa_sec_nshards:0"
+export MALLOC_CONF="hpa_dirty_mult:0,hpa_min_purge_interval_ms:50,hpa_sec_nshards:0"
 
diff --git a/test/unit/pa.c b/test/unit/pa.c
index 4206e85a..4d3ad5e9 100644
--- a/test/unit/pa.c
+++ b/test/unit/pa.c
@@ -87,12 +87,13 @@ static void *
 do_alloc_free_purge(void *arg) {
 	test_data_t *test_data = (test_data_t *)arg;
 	for (int i = 0; i < 10 * 1000; i++) {
+		bool deferred_work_generated;
 		edata_t *edata = pa_alloc(TSDN_NULL, &test_data->shard, PAGE,
-		    PAGE, /* slab */ false, /* szind */ 0, /* zero */ false);
+		    PAGE, /* slab */ false, /* szind */ 0, /* zero */ false,
+		    &deferred_work_generated);
 		assert_ptr_not_null(edata, "");
-		bool generated_dirty;
 		pa_dalloc(TSDN_NULL, &test_data->shard, edata,
-		    &generated_dirty);
+		    &deferred_work_generated);
 		malloc_mutex_lock(TSDN_NULL,
 		    &test_data->shard.pac.decay_dirty.mtx);
 		pac_decay_all(TSDN_NULL, &test_data->shard.pac,
diff --git a/test/unit/sec.c b/test/unit/sec.c
index 01455c89..82b0c9d9 100644
--- a/test/unit/sec.c
+++ b/test/unit/sec.c
@@ -50,8 +50,9 @@ test_sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t max_alloc,
 
 static inline edata_t *
 pai_test_allocator_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t alignment, bool zero) {
+    size_t alignment, bool zero, bool *deferred_work_generated) {
 	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
+	*deferred_work_generated = false;
 	if (ta->alloc_fail) {
 		return NULL;
 	}
@@ -70,8 +71,10 @@ pai_test_allocator_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
 
 static inline size_t
 pai_test_allocator_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t nallocs, edata_list_active_t *results) {
+    size_t nallocs, edata_list_active_t *results,
+    bool *deferred_work_generated) {
 	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
+	*deferred_work_generated = false;
 	if (ta->alloc_fail) {
 		return 0;
 	}
@@ -92,31 +95,37 @@ pai_test_allocator_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size,
 
 static bool
 pai_test_allocator_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool zero) {
+    size_t old_size, size_t new_size, bool zero,
+    bool *deferred_work_generated) {
 	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
+	*deferred_work_generated = false;
 	ta->expand_count++;
 	return ta->expand_return_value;
 }
 
 static bool
 pai_test_allocator_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size) {
+    size_t old_size, size_t new_size, bool *deferred_work_generated) {
 	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
+	*deferred_work_generated = false;
 	ta->shrink_count++;
 	return ta->shrink_return_value;
 }
 
 static void
-pai_test_allocator_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata) {
+pai_test_allocator_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+    bool *deferred_work_generated) {
 	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
+	*deferred_work_generated = false;
 	ta->dalloc_count++;
 	free(edata);
 }
 
 static void
 pai_test_allocator_dalloc_batch(tsdn_t *tsdn, pai_t *self,
-    edata_list_active_t *list) {
+    edata_list_active_t *list, bool *deferred_work_generated) {
 	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
+	*deferred_work_generated = false;
 
 	edata_t *edata;
 	while ((edata = edata_list_active_first(list)) != NULL) {
@@ -168,14 +177,15 @@ TEST_BEGIN(test_reuse) {
 	enum { NALLOCS = 11 };
 	edata_t *one_page[NALLOCS];
 	edata_t *two_page[NALLOCS];
+	bool deferred_work_generated;
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ 2 * PAGE,
 	    /* max_bytes */ 2 * (NALLOCS * PAGE + NALLOCS * 2 * PAGE));
 	for (int i = 0; i < NALLOCS; i++) {
 		one_page[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false);
+		    /* zero */ false, &deferred_work_generated);
 		expect_ptr_not_null(one_page[i], "Unexpected alloc failure");
 		two_page[i] = pai_alloc(tsdn, &sec.pai, 2 * PAGE, PAGE,
-		    /* zero */ false);
+		    /* zero */ false, &deferred_work_generated);
 		expect_ptr_not_null(one_page[i], "Unexpected alloc failure");
 	}
 	expect_zu_eq(0, ta.alloc_count, "Should be using batch allocs");
@@ -189,10 +199,12 @@ TEST_BEGIN(test_reuse) {
 	 * separation works correctly.
 	 */
 	for (int i = NALLOCS - 1; i >= 0; i--) {
-		pai_dalloc(tsdn, &sec.pai, one_page[i]);
+		pai_dalloc(tsdn, &sec.pai, one_page[i],
+		    &deferred_work_generated);
 	}
 	for (int i = NALLOCS - 1; i >= 0; i--) {
-		pai_dalloc(tsdn, &sec.pai, two_page[i]);
+		pai_dalloc(tsdn, &sec.pai, two_page[i],
+		    &deferred_work_generated);
 	}
 	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
 	    "Incorrect number of allocations");
@@ -204,9 +216,9 @@ TEST_BEGIN(test_reuse) {
 	 */
 	for (int i = 0; i < NALLOCS; i++) {
 		edata_t *alloc1 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false);
+		    /* zero */ false, &deferred_work_generated);
 		edata_t *alloc2 = pai_alloc(tsdn, &sec.pai, 2 * PAGE, PAGE,
-		    /* zero */ false);
+		    /* zero */ false, &deferred_work_generated);
 		expect_ptr_eq(one_page[i], alloc1,
 		    "Got unexpected allocation");
 		expect_ptr_eq(two_page[i], alloc2,
@@ -238,14 +250,16 @@ TEST_BEGIN(test_auto_flush) {
 	enum { NALLOCS = 10 };
 	edata_t *extra_alloc;
 	edata_t *allocs[NALLOCS];
+	bool deferred_work_generated;
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
 	    /* max_bytes */ NALLOCS * PAGE);
 	for (int i = 0; i < NALLOCS; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false);
+		    /* zero */ false, &deferred_work_generated);
 		expect_ptr_not_null(allocs[i], "Unexpected alloc failure");
 	}
-	extra_alloc = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false);
+	extra_alloc = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false,
+	    &deferred_work_generated);
 	expect_ptr_not_null(extra_alloc, "Unexpected alloc failure");
 	size_t max_allocs = ta.alloc_count + ta.alloc_batch_count;
 	expect_zu_le(NALLOCS + 1, max_allocs,
@@ -254,7 +268,7 @@ TEST_BEGIN(test_auto_flush) {
 	    "Incorrect number of allocations");
 	/* Free until the SEC is full, but should not have flushed yet. */
 	for (int i = 0; i < NALLOCS; i++) {
-		pai_dalloc(tsdn, &sec.pai, allocs[i]);
+		pai_dalloc(tsdn, &sec.pai, allocs[i], &deferred_work_generated);
 	}
 	expect_zu_le(NALLOCS + 1, max_allocs,
 	    "Incorrect number of allocations");
@@ -267,7 +281,7 @@ TEST_BEGIN(test_auto_flush) {
 	 * entirety when it decides to do so, and it has only one bin active
 	 * right now.
 	 */
-	pai_dalloc(tsdn, &sec.pai, extra_alloc);
+	pai_dalloc(tsdn, &sec.pai, extra_alloc, &deferred_work_generated);
 	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
 	    "Incorrect number of allocations");
 	expect_zu_eq(0, ta.dalloc_count,
@@ -291,16 +305,17 @@ do_disable_flush_test(bool is_disable) {
 
 	enum { NALLOCS = 11 };
 	edata_t *allocs[NALLOCS];
+	bool deferred_work_generated;
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
 	    /* max_bytes */ NALLOCS * PAGE);
 	for (int i = 0; i < NALLOCS; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false);
+		    /* zero */ false, &deferred_work_generated);
 		expect_ptr_not_null(allocs[i], "Unexpected alloc failure");
 	}
 	/* Free all but the last aloc. */
 	for (int i = 0; i < NALLOCS - 1; i++) {
-		pai_dalloc(tsdn, &sec.pai, allocs[i]);
+		pai_dalloc(tsdn, &sec.pai, allocs[i], &deferred_work_generated);
 	}
 	size_t max_allocs = ta.alloc_count + ta.alloc_batch_count;
 
@@ -326,7 +341,8 @@ do_disable_flush_test(bool is_disable) {
 	 * If we free into a disabled SEC, it should forward to the fallback.
 	 * Otherwise, the SEC should accept the allocation.
 	 */
-	pai_dalloc(tsdn, &sec.pai, allocs[NALLOCS - 1]);
+	pai_dalloc(tsdn, &sec.pai, allocs[NALLOCS - 1],
+	    &deferred_work_generated);
 
 	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
 	    "Incorrect number of allocations");
@@ -356,6 +372,8 @@ TEST_BEGIN(test_max_alloc_respected) {
 	size_t max_alloc = 2 * PAGE;
 	size_t attempted_alloc = 3 * PAGE;
 
+	bool deferred_work_generated;
+
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, max_alloc,
 	    /* max_bytes */ 1000 * PAGE);
 
@@ -365,13 +383,13 @@ TEST_BEGIN(test_max_alloc_respected) {
 		expect_zu_eq(i, ta.dalloc_count,
 		    "Incorrect number of deallocations");
 		edata_t *edata = pai_alloc(tsdn, &sec.pai, attempted_alloc,
-		    PAGE, /* zero */ false);
+		    PAGE, /* zero */ false, &deferred_work_generated);
 		expect_ptr_not_null(edata, "Unexpected alloc failure");
 		expect_zu_eq(i + 1, ta.alloc_count,
 		    "Incorrect number of allocations");
 		expect_zu_eq(i, ta.dalloc_count,
 		    "Incorrect number of deallocations");
-		pai_dalloc(tsdn, &sec.pai, edata);
+		pai_dalloc(tsdn, &sec.pai, edata, &deferred_work_generated);
 	}
 }
 TEST_END
@@ -387,27 +405,31 @@ TEST_BEGIN(test_expand_shrink_delegate) {
 	/* See the note above -- we can't use the real tsd. */
 	tsdn_t *tsdn = TSDN_NULL;
 
+	bool deferred_work_generated;
+
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ 10 * PAGE,
 	    /* max_bytes */ 1000 * PAGE);
 	edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-	    /* zero */ false);
+	    /* zero */ false, &deferred_work_generated);
 	expect_ptr_not_null(edata, "Unexpected alloc failure");
 
 	bool err = pai_expand(tsdn, &sec.pai, edata, PAGE, 4 * PAGE,
-	    /* zero */ false);
+	    /* zero */ false, &deferred_work_generated);
 	expect_false(err, "Unexpected expand failure");
 	expect_zu_eq(1, ta.expand_count, "");
 	ta.expand_return_value = true;
 	err = pai_expand(tsdn, &sec.pai, edata, 4 * PAGE, 3 * PAGE,
-	    /* zero */ false);
+	    /* zero */ false, &deferred_work_generated);
 	expect_true(err, "Unexpected expand success");
 	expect_zu_eq(2, ta.expand_count, "");
 
-	err = pai_shrink(tsdn, &sec.pai, edata, 4 * PAGE, 2 * PAGE);
+	err = pai_shrink(tsdn, &sec.pai, edata, 4 * PAGE, 2 * PAGE,
+	    &deferred_work_generated);
 	expect_false(err, "Unexpected shrink failure");
 	expect_zu_eq(1, ta.shrink_count, "");
 	ta.shrink_return_value = true;
-	err = pai_shrink(tsdn, &sec.pai, edata, 2 * PAGE, PAGE);
+	err = pai_shrink(tsdn, &sec.pai, edata, 2 * PAGE, PAGE,
+	    &deferred_work_generated);
 	expect_true(err, "Unexpected shrink success");
 	expect_zu_eq(2, ta.shrink_count, "");
 }
@@ -426,9 +448,10 @@ TEST_BEGIN(test_nshards_0) {
 	opts.nshards = 0;
 	sec_init(TSDN_NULL, &sec, base, &ta.pai, &opts);
 
+	bool deferred_work_generated;
 	edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-	    /* zero */ false);
-	pai_dalloc(tsdn, &sec.pai, edata);
+	    /* zero */ false, &deferred_work_generated);
+	pai_dalloc(tsdn, &sec.pai, edata, &deferred_work_generated);
 
 	/* Both operations should have gone directly to the fallback. */
 	expect_zu_eq(1, ta.alloc_count, "");
@@ -461,25 +484,28 @@ TEST_BEGIN(test_stats_simple) {
 		FLUSH_PAGES = 20,
 	};
 
+	bool deferred_work_generated;
+
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
 	    /* max_bytes */ FLUSH_PAGES * PAGE);
 
 	edata_t *allocs[FLUSH_PAGES];
 	for (size_t i = 0; i < FLUSH_PAGES; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false);
+		    /* zero */ false, &deferred_work_generated);
 		expect_stats_pages(tsdn, &sec, 0);
 	}
 
 	/* Increase and decrease, without flushing. */
 	for (size_t i = 0; i < NITERS; i++) {
 		for (size_t j = 0; j < FLUSH_PAGES / 2; j++) {
-			pai_dalloc(tsdn, &sec.pai, allocs[j]);
+			pai_dalloc(tsdn, &sec.pai, allocs[j],
+			    &deferred_work_generated);
 			expect_stats_pages(tsdn, &sec, j + 1);
 		}
 		for (size_t j = 0; j < FLUSH_PAGES / 2; j++) {
 			allocs[j] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-			    /* zero */ false);
+			    /* zero */ false, &deferred_work_generated);
 			expect_stats_pages(tsdn, &sec, FLUSH_PAGES / 2 - j - 1);
 		}
 	}
@@ -505,25 +531,30 @@ TEST_BEGIN(test_stats_auto_flush) {
 	edata_t *extra_alloc1;
 	edata_t *allocs[2 * FLUSH_PAGES];
 
-	extra_alloc0 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false);
-	extra_alloc1 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false);
+	bool deferred_work_generated;
+
+	extra_alloc0 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false,
+	    &deferred_work_generated);
+	extra_alloc1 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false,
+	    &deferred_work_generated);
 
 	for (size_t i = 0; i < 2 * FLUSH_PAGES; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false);
+		    /* zero */ false, &deferred_work_generated);
 	}
 
 	for (size_t i = 0; i < FLUSH_PAGES; i++) {
-		pai_dalloc(tsdn, &sec.pai, allocs[i]);
+		pai_dalloc(tsdn, &sec.pai, allocs[i], &deferred_work_generated);
 	}
-	pai_dalloc(tsdn, &sec.pai, extra_alloc0);
+	pai_dalloc(tsdn, &sec.pai, extra_alloc0, &deferred_work_generated);
 
 	/* Flush the remaining pages; stats should still work. */
 	for (size_t i = 0; i < FLUSH_PAGES; i++) {
-		pai_dalloc(tsdn, &sec.pai, allocs[FLUSH_PAGES + i]);
+		pai_dalloc(tsdn, &sec.pai, allocs[FLUSH_PAGES + i],
+		    &deferred_work_generated);
 	}
 
-	pai_dalloc(tsdn, &sec.pai, extra_alloc1);
+	pai_dalloc(tsdn, &sec.pai, extra_alloc1, &deferred_work_generated);
 
 	expect_stats_pages(tsdn, &sec, ta.alloc_count + ta.alloc_batch_count
 	    - ta.dalloc_count - ta.dalloc_batch_count);
@@ -545,16 +576,17 @@ TEST_BEGIN(test_stats_manual_flush) {
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
 	    /* max_bytes */ FLUSH_PAGES * PAGE);
 
+	bool deferred_work_generated;
 	edata_t *allocs[FLUSH_PAGES];
 	for (size_t i = 0; i < FLUSH_PAGES; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false);
+		    /* zero */ false, &deferred_work_generated);
 		expect_stats_pages(tsdn, &sec, 0);
 	}
 
 	/* Dalloc the first half of the allocations. */
 	for (size_t i = 0; i < FLUSH_PAGES / 2; i++) {
-		pai_dalloc(tsdn, &sec.pai, allocs[i]);
+		pai_dalloc(tsdn, &sec.pai, allocs[i], &deferred_work_generated);
 		expect_stats_pages(tsdn, &sec, i + 1);
 	}
 
@@ -563,7 +595,8 @@ TEST_BEGIN(test_stats_manual_flush) {
 
 	/* Flush the remaining pages. */
 	for (size_t i = 0; i < FLUSH_PAGES / 2; i++) {
-		pai_dalloc(tsdn, &sec.pai, allocs[FLUSH_PAGES / 2 + i]);
+		pai_dalloc(tsdn, &sec.pai, allocs[FLUSH_PAGES / 2 + i],
+		    &deferred_work_generated);
 		expect_stats_pages(tsdn, &sec, i + 1);
 	}
 	sec_disable(tsdn, &sec);

From 6e848a005e23d5eeb7f0b32424730d53f1d4edf3 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Fri, 10 Sep 2021 17:32:23 -0700
Subject: [PATCH 2112/2608] Remove opt_background_thread_hpa_interval_max_ms

Now that HPA can communicate the time until its deferred work should be done,
this option is not used anymore.
---
 .../internal/background_thread_externs.h      |  3 --
 src/background_thread.c                       | 11 ++----
 src/ctl.c                                     |  5 ---
 src/jemalloc.c                                |  9 -----
 src/pa.c                                      | 36 +------------------
 src/pac.c                                     | 31 +++++++++++++++-
 src/stats.c                                   |  1 -
 test/unit/hpa_background_thread.c             |  2 +-
 8 files changed, 34 insertions(+), 64 deletions(-)

diff --git a/include/jemalloc/internal/background_thread_externs.h b/include/jemalloc/internal/background_thread_externs.h
index a2d79adf..6ae3c8d8 100644
--- a/include/jemalloc/internal/background_thread_externs.h
+++ b/include/jemalloc/internal/background_thread_externs.h
@@ -2,7 +2,6 @@
 #define JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H
 
 extern bool opt_background_thread;
-extern ssize_t opt_background_thread_hpa_interval_max_ms;
 extern size_t opt_max_background_threads;
 extern malloc_mutex_t background_thread_lock;
 extern atomic_b_t background_thread_enabled_state;
@@ -16,8 +15,6 @@ bool background_threads_disable(tsd_t *tsd);
 bool background_thread_is_started(background_thread_info_t* info);
 void background_thread_wakeup_early(background_thread_info_t *info,
     nstime_t *remaining_sleep);
-void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
-    decay_t *decay, size_t npages_new);
 void background_thread_prefork0(tsdn_t *tsdn);
 void background_thread_prefork1(tsdn_t *tsdn);
 void background_thread_postfork_parent(tsdn_t *tsdn);
diff --git a/src/background_thread.c b/src/background_thread.c
index 69ef983f..ac171c37 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -13,13 +13,6 @@ JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
 /* Read-only after initialization. */
 bool opt_background_thread = BACKGROUND_THREAD_DEFAULT;
 size_t opt_max_background_threads = MAX_BACKGROUND_THREAD_LIMIT + 1;
-/*
- * This is disabled (and set to -1) if the HPA is.  If the HPA is enabled,
- * malloc_conf initialization sets it to
- * BACKGROUND_THREAD_HPA_INTERVAL_MAX_DEFAULT_WHEN_ENABLED.
- */
-ssize_t opt_background_thread_hpa_interval_max_ms =
-    BACKGROUND_THREAD_HPA_INTERVAL_MAX_UNINITIALIZED;
 
 /* Used for thread creation, termination and stats. */
 malloc_mutex_t background_thread_lock;
@@ -60,7 +53,7 @@ pthread_create_wrapper(pthread_t *__restrict thread, const pthread_attr_t *attr,
 bool background_thread_create(tsd_t *tsd, unsigned arena_ind) NOT_REACHED
 bool background_threads_enable(tsd_t *tsd) NOT_REACHED
 bool background_threads_disable(tsd_t *tsd) NOT_REACHED
-bool background_thread_running(background_thread_info_t *info) NOT_REACHED
+bool background_thread_is_started(background_thread_info_t *info) NOT_REACHED
 void background_thread_wakeup_early(background_thread_info_t *info,
     nstime_t *remaining_sleep) NOT_REACHED
 void background_thread_prefork0(tsdn_t *tsdn) NOT_REACHED
@@ -593,7 +586,7 @@ background_thread_wakeup_early(background_thread_info_t *info,
 	 * we know that background thread wakes up soon, so the time to cache
 	 * the just freed memory is bounded and low.
 	 */
-	if (remaining_sleep && nstime_ns(remaining_sleep) <
+	if (remaining_sleep != NULL && nstime_ns(remaining_sleep) <
 	    BACKGROUND_THREAD_MIN_INTERVAL_NS) {
 		return;
 	}
diff --git a/src/ctl.c b/src/ctl.c
index 9647478d..42ded601 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -113,7 +113,6 @@ CTL_PROTO(opt_oversize_threshold)
 CTL_PROTO(opt_background_thread)
 CTL_PROTO(opt_mutex_max_spin)
 CTL_PROTO(opt_max_background_threads)
-CTL_PROTO(opt_background_thread_hpa_interval_max_ms)
 CTL_PROTO(opt_dirty_decay_ms)
 CTL_PROTO(opt_muzzy_decay_ms)
 CTL_PROTO(opt_stats_print)
@@ -427,8 +426,6 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("mutex_max_spin"),	CTL(opt_mutex_max_spin)},
 	{NAME("background_thread"),	CTL(opt_background_thread)},
 	{NAME("max_background_threads"),	CTL(opt_max_background_threads)},
-	{NAME("background_thread_hpa_interval_max_ms"),
-		CTL(opt_background_thread_hpa_interval_max_ms)},
 	{NAME("dirty_decay_ms"), CTL(opt_dirty_decay_ms)},
 	{NAME("muzzy_decay_ms"), CTL(opt_muzzy_decay_ms)},
 	{NAME("stats_print"),	CTL(opt_stats_print)},
@@ -2148,8 +2145,6 @@ CTL_RO_NL_GEN(opt_mutex_max_spin, opt_mutex_max_spin, int64_t)
 CTL_RO_NL_GEN(opt_oversize_threshold, opt_oversize_threshold, size_t)
 CTL_RO_NL_GEN(opt_background_thread, opt_background_thread, bool)
 CTL_RO_NL_GEN(opt_max_background_threads, opt_max_background_threads, size_t)
-CTL_RO_NL_GEN(opt_background_thread_hpa_interval_max_ms,
-    opt_background_thread_hpa_interval_max_ms, ssize_t)
 CTL_RO_NL_GEN(opt_dirty_decay_ms, opt_dirty_decay_ms, ssize_t)
 CTL_RO_NL_GEN(opt_muzzy_decay_ms, opt_muzzy_decay_ms, ssize_t)
 CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 66e36855..18b54520 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1416,10 +1416,6 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 					   CONF_CHECK_MIN, CONF_CHECK_MAX,
 					   true);
 			CONF_HANDLE_BOOL(opt_hpa, "hpa")
-			CONF_HANDLE_SSIZE_T(
-			    opt_background_thread_hpa_interval_max_ms,
-			    "background_thread_hpa_interval_max_ms", -1,
-			    SSIZE_MAX)
 			CONF_HANDLE_SIZE_T(opt_hpa_opts.slab_max_alloc,
 			    "hpa_slab_max_alloc", PAGE, HUGEPAGE,
 			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
@@ -1658,11 +1654,6 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 	malloc_conf_init_helper(NULL, NULL, true, opts_cache, buf);
 	malloc_conf_init_helper(sc_data, bin_shard_sizes, false, opts_cache,
 	    NULL);
-	if (opt_hpa && opt_background_thread_hpa_interval_max_ms
-	    == BACKGROUND_THREAD_HPA_INTERVAL_MAX_UNINITIALIZED) {
-		opt_background_thread_hpa_interval_max_ms =
-		    BACKGROUND_THREAD_HPA_INTERVAL_MAX_DEFAULT_WHEN_ENABLED;
-	}
 }
 
 #undef MALLOC_CONF_NSOURCES
diff --git a/src/pa.c b/src/pa.c
index a29e10b6..249de24a 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -245,19 +245,6 @@ pa_shard_do_deferred_work(tsdn_t *tsdn, pa_shard_t *shard) {
 	}
 }
 
-static inline uint64_t
-pa_shard_ns_until_purge(tsdn_t *tsdn, decay_t *decay, size_t npages) {
-	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
-		/* Use minimal interval if decay is contended. */
-		return BACKGROUND_THREAD_DEFERRED_MIN;
-	}
-	uint64_t result = decay_ns_until_purge(decay, npages,
-	    ARENA_DEFERRED_PURGE_NPAGES_THRESHOLD);
-
-	malloc_mutex_unlock(tsdn, &decay->mtx);
-	return result;
-}
-
 /*
  * Get time until next deferred work ought to happen. If there are multiple
  * things that have been deferred, this function calculates the time until
@@ -265,32 +252,11 @@ pa_shard_ns_until_purge(tsdn_t *tsdn, decay_t *decay, size_t npages) {
  */
 uint64_t
 pa_shard_time_until_deferred_work(tsdn_t *tsdn, pa_shard_t *shard) {
-	uint64_t time;
-	time = pa_shard_ns_until_purge(tsdn,
-	    &shard->pac.decay_dirty,
-	    ecache_npages_get(&shard->pac.ecache_dirty));
+	uint64_t time = pai_time_until_deferred_work(tsdn, &shard->pac.pai);
 	if (time == BACKGROUND_THREAD_DEFERRED_MIN) {
 		return time;
 	}
 
-	uint64_t muzzy = pa_shard_ns_until_purge(tsdn,
-	    &shard->pac.decay_muzzy,
-	    ecache_npages_get(&shard->pac.ecache_muzzy));
-	if (muzzy < time) {
-		time = muzzy;
-		if (time == BACKGROUND_THREAD_DEFERRED_MIN) {
-			return time;
-		}
-	}
-
-	uint64_t pac = pai_time_until_deferred_work(tsdn, &shard->pac.pai);
-	if (pac < time) {
-		time = pac;
-		if (time == BACKGROUND_THREAD_DEFERRED_MIN) {
-			return time;
-		}
-	}
-
 	if (pa_shard_uses_hpa(shard)) {
 		uint64_t hpa =
 		    pai_time_until_deferred_work(tsdn, &shard->hpa_shard.pai);
diff --git a/src/pac.c b/src/pac.c
index 2221c8db..03e31972 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -208,9 +208,38 @@ pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 	*deferred_work_generated = true;
 }
 
+static inline uint64_t
+pac_ns_until_purge(tsdn_t *tsdn, decay_t *decay, size_t npages) {
+	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
+		/* Use minimal interval if decay is contended. */
+		return BACKGROUND_THREAD_DEFERRED_MIN;
+	}
+	uint64_t result = decay_ns_until_purge(decay, npages,
+	    ARENA_DEFERRED_PURGE_NPAGES_THRESHOLD);
+
+	malloc_mutex_unlock(tsdn, &decay->mtx);
+	return result;
+}
+
 static uint64_t
 pac_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
-	return BACKGROUND_THREAD_DEFERRED_MAX;
+	uint64_t time;
+	pac_t *pac = (pac_t *)self;
+
+	time = pac_ns_until_purge(tsdn,
+	    &pac->decay_dirty,
+	    ecache_npages_get(&pac->ecache_dirty));
+	if (time == BACKGROUND_THREAD_DEFERRED_MIN) {
+		return time;
+	}
+
+	uint64_t muzzy = pac_ns_until_purge(tsdn,
+	    &pac->decay_muzzy,
+	    ecache_npages_get(&pac->ecache_muzzy));
+	if (muzzy < time) {
+		time = muzzy;
+	}
+	return time;
 }
 
 bool
diff --git a/src/stats.c b/src/stats.c
index 25ee2355..7af5782a 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1499,7 +1499,6 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_CHAR_P("metadata_thp")
 	OPT_WRITE_INT64("mutex_max_spin")
 	OPT_WRITE_BOOL_MUTABLE("background_thread", "background_thread")
-	OPT_WRITE_SSIZE_T("background_thread_hpa_interval_max_ms")
 	OPT_WRITE_SSIZE_T_MUTABLE("dirty_decay_ms", "arenas.dirty_decay_ms")
 	OPT_WRITE_SSIZE_T_MUTABLE("muzzy_decay_ms", "arenas.muzzy_decay_ms")
 	OPT_WRITE_SIZE_T("lg_extent_max_active_fit")
diff --git a/test/unit/hpa_background_thread.c b/test/unit/hpa_background_thread.c
index c4686831..77d05556 100644
--- a/test/unit/hpa_background_thread.c
+++ b/test/unit/hpa_background_thread.c
@@ -77,7 +77,7 @@ wait_until_thread_is_enabled(unsigned arena_id) {
 		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
 		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
 		sleeping = background_thread_indefinite_sleep(info);
-		assert_d_lt(iterations, (int)1e6,
+		assert_d_lt(iterations, UINT64_C(1000000),
 		    "Waiting for a thread to start for too long");
 	} while (!sleeping);
 }

From 523cfa55c5b350decb5efc11083c4bc366cd98c4 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 8 Sep 2021 10:58:04 -0700
Subject: [PATCH 2113/2608] Guard prof related mallctl with opt_prof.

The prof initialization is done only when opt_prof is true.  This change makes
sure the prof_* mallctls only have limited read access (i.e. no access to prof
internals) when opt_prof is false.

In addition, initialize the global prof mutexes even if opt_prof is false.  This
makes sure the mutex stats are set properly.
---
 src/ctl.c           | 47 +++++++++++++++-------
 src/prof.c          | 97 +++++++++++++++++++++------------------------
 test/unit/mallctl.c |  2 +-
 3 files changed, 79 insertions(+), 67 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index 42ded601..8717c96d 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2337,7 +2337,7 @@ thread_prof_name_ctl(tsd_t *tsd, const size_t *mib,
     size_t newlen) {
 	int ret;
 
-	if (!config_prof) {
+	if (!config_prof || !opt_prof) {
 		return ENOENT;
 	}
 
@@ -2374,8 +2374,12 @@ thread_prof_active_ctl(tsd_t *tsd, const size_t *mib,
 		return ENOENT;
 	}
 
-	oldval = prof_thread_active_get(tsd);
+	oldval = opt_prof ? prof_thread_active_get(tsd) : false;
 	if (newp != NULL) {
+		if (!opt_prof) {
+			ret = ENOENT;
+			goto label_return;
+		}
 		if (newlen != sizeof(bool)) {
 			ret = EINVAL;
 			goto label_return;
@@ -3128,6 +3132,10 @@ prof_thread_active_init_ctl(tsd_t *tsd, const size_t *mib,
 	}
 
 	if (newp != NULL) {
+		if (!opt_prof) {
+			ret = ENOENT;
+			goto label_return;
+		}
 		if (newlen != sizeof(bool)) {
 			ret = EINVAL;
 			goto label_return;
@@ -3135,7 +3143,8 @@ prof_thread_active_init_ctl(tsd_t *tsd, const size_t *mib,
 		oldval = prof_thread_active_init_set(tsd_tsdn(tsd),
 		    *(bool *)newp);
 	} else {
-		oldval = prof_thread_active_init_get(tsd_tsdn(tsd));
+		oldval = opt_prof ? prof_thread_active_init_get(tsd_tsdn(tsd)) :
+		    false;
 	}
 	READ(oldval, bool);
 
@@ -3161,13 +3170,19 @@ prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 			goto label_return;
 		}
 		bool val = *(bool *)newp;
-		if (!opt_prof && val) {
-			ret = ENOENT;
-			goto label_return;
+		if (!opt_prof) {
+			if (val) {
+				ret = ENOENT;
+				goto label_return;
+			} else {
+				/* No change needed (already off). */
+				oldval = false;
+			}
+		} else {
+			oldval = prof_active_set(tsd_tsdn(tsd), val);
 		}
-		oldval = prof_active_set(tsd_tsdn(tsd), val);
 	} else {
-		oldval = prof_active_get(tsd_tsdn(tsd));
+		oldval = opt_prof ? prof_active_get(tsd_tsdn(tsd)) : false;
 	}
 	READ(oldval, bool);
 
@@ -3182,7 +3197,7 @@ prof_dump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	int ret;
 	const char *filename = NULL;
 
-	if (!config_prof) {
+	if (!config_prof || !opt_prof) {
 		return ENOENT;
 	}
 
@@ -3210,13 +3225,17 @@ prof_gdump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	}
 
 	if (newp != NULL) {
+		if (!opt_prof) {
+			ret = ENOENT;
+			goto label_return;
+		}
 		if (newlen != sizeof(bool)) {
 			ret = EINVAL;
 			goto label_return;
 		}
 		oldval = prof_gdump_set(tsd_tsdn(tsd), *(bool *)newp);
 	} else {
-		oldval = prof_gdump_get(tsd_tsdn(tsd));
+		oldval = opt_prof ? prof_gdump_get(tsd_tsdn(tsd)) : false;
 	}
 	READ(oldval, bool);
 
@@ -3231,7 +3250,7 @@ prof_prefix_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	int ret;
 	const char *prefix = NULL;
 
-	if (!config_prof) {
+	if (!config_prof || !opt_prof) {
 		return ENOENT;
 	}
 
@@ -3251,7 +3270,7 @@ prof_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 	int ret;
 	size_t lg_sample = lg_prof_sample;
 
-	if (!config_prof) {
+	if (!config_prof || !opt_prof) {
 		return ENOENT;
 	}
 
@@ -3278,7 +3297,7 @@ prof_log_start_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 
 	const char *filename = NULL;
 
-	if (!config_prof) {
+	if (!config_prof || !opt_prof) {
 		return ENOENT;
 	}
 
@@ -3298,7 +3317,7 @@ label_return:
 static int
 prof_log_stop_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen) {
-	if (!config_prof) {
+	if (!config_prof || !opt_prof) {
 		return ENOENT;
 	}
 
diff --git a/src/prof.c b/src/prof.c
index 0f1f7a71..67a7f71a 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -554,72 +554,65 @@ bool
 prof_boot2(tsd_t *tsd, base_t *base) {
 	cassert(config_prof);
 
-	if (opt_prof) {
-		unsigned i;
+	/*
+	 * Initialize the global mutexes unconditionally to maintain correct
+	 * stats when opt_prof is false.
+	 */
+	if (malloc_mutex_init(&prof_active_mtx, "prof_active",
+	    WITNESS_RANK_PROF_ACTIVE, malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+	if (malloc_mutex_init(&prof_gdump_mtx, "prof_gdump",
+	    WITNESS_RANK_PROF_GDUMP, malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+	if (malloc_mutex_init(&prof_thread_active_init_mtx,
+	    "prof_thread_active_init", WITNESS_RANK_PROF_THREAD_ACTIVE_INIT,
+	    malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+	if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx",
+	    WITNESS_RANK_PROF_BT2GCTX, malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+	if (malloc_mutex_init(&tdatas_mtx, "prof_tdatas",
+	    WITNESS_RANK_PROF_TDATAS, malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+	if (malloc_mutex_init(&next_thr_uid_mtx, "prof_next_thr_uid",
+	    WITNESS_RANK_PROF_NEXT_THR_UID, malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+	if (malloc_mutex_init(&prof_stats_mtx, "prof_stats",
+	    WITNESS_RANK_PROF_STATS, malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+	if (malloc_mutex_init(&prof_dump_filename_mtx,
+	    "prof_dump_filename", WITNESS_RANK_PROF_DUMP_FILENAME,
+	    malloc_mutex_rank_exclusive)) {
+		return true;
+	}
+	if (malloc_mutex_init(&prof_dump_mtx, "prof_dump",
+	    WITNESS_RANK_PROF_DUMP, malloc_mutex_rank_exclusive)) {
+		return true;
+	}
 
+	if (opt_prof) {
 		lg_prof_sample = opt_lg_prof_sample;
 		prof_unbias_map_init();
-
 		prof_active = opt_prof_active;
-		if (malloc_mutex_init(&prof_active_mtx, "prof_active",
-		    WITNESS_RANK_PROF_ACTIVE, malloc_mutex_rank_exclusive)) {
-			return true;
-		}
-
 		prof_gdump_val = opt_prof_gdump;
-		if (malloc_mutex_init(&prof_gdump_mtx, "prof_gdump",
-		    WITNESS_RANK_PROF_GDUMP, malloc_mutex_rank_exclusive)) {
-			return true;
-		}
-
 		prof_thread_active_init = opt_prof_thread_active_init;
-		if (malloc_mutex_init(&prof_thread_active_init_mtx,
-		    "prof_thread_active_init",
-		    WITNESS_RANK_PROF_THREAD_ACTIVE_INIT,
-		    malloc_mutex_rank_exclusive)) {
-			return true;
-		}
 
 		if (prof_data_init(tsd)) {
 			return true;
 		}
 
-		if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx",
-		    WITNESS_RANK_PROF_BT2GCTX, malloc_mutex_rank_exclusive)) {
-			return true;
-		}
-
-		if (malloc_mutex_init(&tdatas_mtx, "prof_tdatas",
-		    WITNESS_RANK_PROF_TDATAS, malloc_mutex_rank_exclusive)) {
-			return true;
-		}
-
 		next_thr_uid = 0;
-		if (malloc_mutex_init(&next_thr_uid_mtx, "prof_next_thr_uid",
-		    WITNESS_RANK_PROF_NEXT_THR_UID,
-		    malloc_mutex_rank_exclusive)) {
-			return true;
-		}
-
-		if (malloc_mutex_init(&prof_stats_mtx, "prof_stats",
-		    WITNESS_RANK_PROF_STATS, malloc_mutex_rank_exclusive)) {
-			return true;
-		}
-
 		if (prof_idump_accum_init()) {
 			return true;
 		}
 
-		if (malloc_mutex_init(&prof_dump_filename_mtx,
-		    "prof_dump_filename", WITNESS_RANK_PROF_DUMP_FILENAME,
-		    malloc_mutex_rank_exclusive)) {
-			return true;
-		}
-		if (malloc_mutex_init(&prof_dump_mtx, "prof_dump",
-		    WITNESS_RANK_PROF_DUMP, malloc_mutex_rank_exclusive)) {
-			return true;
-		}
-
 		if (opt_prof_final && opt_prof_prefix[0] != '\0' &&
 		    atexit(prof_fdump) != 0) {
 			malloc_write("<jemalloc>: Error in atexit()\n");
@@ -643,7 +636,7 @@ prof_boot2(tsd_t *tsd, base_t *base) {
 		if (gctx_locks == NULL) {
 			return true;
 		}
-		for (i = 0; i < PROF_NCTX_LOCKS; i++) {
+		for (unsigned i = 0; i < PROF_NCTX_LOCKS; i++) {
 			if (malloc_mutex_init(&gctx_locks[i], "prof_gctx",
 			    WITNESS_RANK_PROF_GCTX,
 			    malloc_mutex_rank_exclusive)) {
@@ -656,7 +649,7 @@ prof_boot2(tsd_t *tsd, base_t *base) {
 		if (tdata_locks == NULL) {
 			return true;
 		}
-		for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
+		for (unsigned i = 0; i < PROF_NTDATA_LOCKS; i++) {
 			if (malloc_mutex_init(&tdata_locks[i], "prof_tdata",
 			    WITNESS_RANK_PROF_TDATA,
 			    malloc_mutex_rank_exclusive)) {
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index e9e0feb6..5cba0837 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -914,7 +914,7 @@ TEST_BEGIN(test_prof_active) {
 	old = true;
 	expect_d_eq(mallctl("prof.active", &old, &len, &active, len), ENOENT,
 	    "Setting prof_active to true should fail when opt_prof is off");
-	expect_true(old, "old valud should not be touched when mallctl fails");
+	expect_true(old, "old value should not be touched when mallctl fails");
 	active = false;
 	expect_d_eq(mallctl("prof.active", NULL, NULL, &active, len), 0,
 	    "Setting prof_active to false should succeed when opt_prof is off");

From f7d46b81197b9879e1f572f9a4d3bfe3b8f850b9 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Mon, 30 Aug 2021 14:05:56 -0700
Subject: [PATCH 2114/2608] Allow setting custom backtrace hook

Existing backtrace implementations skip native stack frames from runtimes like
Python. The hook allows to augment the backtraces to attribute allocations to
native functions in heap profiles.
---
 Makefile.in                              |  1 +
 include/jemalloc/internal/prof_externs.h |  4 +-
 include/jemalloc/internal/prof_hook.h    | 16 +++++++
 include/jemalloc/internal/prof_structs.h |  3 +-
 include/jemalloc/internal/prof_sys.h     |  1 +
 src/ctl.c                                | 36 +++++++++++++-
 src/prof.c                               | 16 +++++++
 src/prof_sys.c                           | 44 +++++++++--------
 test/analyze/prof_bias.c                 | 14 +++---
 test/unit/prof_hook.c                    | 61 ++++++++++++++++++++++++
 test/unit/prof_hook.sh                   |  6 +++
 11 files changed, 172 insertions(+), 30 deletions(-)
 create mode 100644 include/jemalloc/internal/prof_hook.h
 create mode 100644 test/unit/prof_hook.c
 create mode 100644 test/unit/prof_hook.sh

diff --git a/Makefile.in b/Makefile.in
index 51276ceb..a6f61ced 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -247,6 +247,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/prof_accum.c \
 	$(srcroot)test/unit/prof_active.c \
 	$(srcroot)test/unit/prof_gdump.c \
+	$(srcroot)test/unit/prof_hook.c \
 	$(srcroot)test/unit/prof_idump.c \
 	$(srcroot)test/unit/prof_log.c \
 	$(srcroot)test/unit/prof_mdump.c \
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 671ac9b8..75d1d7a0 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_PROF_EXTERNS_H
 
 #include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/prof_hook.h"
 
 extern bool opt_prof;
 extern bool opt_prof_active;
@@ -52,7 +53,8 @@ extern bool prof_booted;
  * otherwise difficult to guarantee that two allocations are reported as coming
  * from the exact same stack trace in the presence of an optimizing compiler.
  */
-extern void (* JET_MUTABLE prof_backtrace_hook)(prof_bt_t *bt);
+void prof_backtrace_hook_set(prof_backtrace_hook_t hook);
+prof_backtrace_hook_t prof_backtrace_hook_get();
 
 /* Functions only accessed in prof_inlines.h */
 prof_tdata_t *prof_tdata_init(tsd_t *tsd);
diff --git a/include/jemalloc/internal/prof_hook.h b/include/jemalloc/internal/prof_hook.h
new file mode 100644
index 00000000..277cd992
--- /dev/null
+++ b/include/jemalloc/internal/prof_hook.h
@@ -0,0 +1,16 @@
+#ifndef JEMALLOC_INTERNAL_PROF_HOOK_H
+#define JEMALLOC_INTERNAL_PROF_HOOK_H
+
+/*
+ * The hooks types of which are declared in this file are experimental and
+ * undocumented, thus the typedefs are located in an 'internal' header.
+ */
+
+/*
+ * A hook to mock out backtrace functionality.  This can be handy, since it's
+ * otherwise difficult to guarantee that two allocations are reported as coming
+ * from the exact same stack trace in the presence of an optimizing compiler.
+ */
+typedef void (*prof_backtrace_hook_t)(void **, unsigned *, unsigned);
+
+#endif /* JEMALLOC_INTERNAL_PROF_HOOK_H */
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index c2a111a9..dd22115f 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -16,7 +16,8 @@ struct prof_bt_s {
 #ifdef JEMALLOC_PROF_LIBGCC
 /* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
 typedef struct {
-	prof_bt_t	*bt;
+	void 		**vec;
+	unsigned	*len;
 	unsigned	max;
 } prof_unwind_data_t;
 #endif
diff --git a/include/jemalloc/internal/prof_sys.h b/include/jemalloc/internal/prof_sys.h
index 6e4e811a..3d25a429 100644
--- a/include/jemalloc/internal/prof_sys.h
+++ b/include/jemalloc/internal/prof_sys.h
@@ -6,6 +6,7 @@ extern base_t *prof_base;
 
 void bt_init(prof_bt_t *bt, void **vec);
 void prof_backtrace(tsd_t *tsd, prof_bt_t *bt);
+void prof_hooks_init();
 void prof_unwind_init();
 void prof_sys_thread_name_fetch(tsd_t *tsd);
 int prof_getpid(void);
diff --git a/src/ctl.c b/src/ctl.c
index 8717c96d..6bf1c946 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -305,6 +305,7 @@ CTL_PROTO(stats_retained)
 CTL_PROTO(stats_zero_reallocs)
 CTL_PROTO(experimental_hooks_install)
 CTL_PROTO(experimental_hooks_remove)
+CTL_PROTO(experimental_hooks_prof_backtrace)
 CTL_PROTO(experimental_thread_activity_callback)
 CTL_PROTO(experimental_utilization_query)
 CTL_PROTO(experimental_utilization_batch_query)
@@ -833,7 +834,8 @@ static const ctl_named_node_t stats_node[] = {
 
 static const ctl_named_node_t experimental_hooks_node[] = {
 	{NAME("install"),	CTL(experimental_hooks_install)},
-	{NAME("remove"),	CTL(experimental_hooks_remove)}
+	{NAME("remove"),	CTL(experimental_hooks_remove)},
+	{NAME("prof_backtrace"),	CTL(experimental_hooks_prof_backtrace)}
 };
 
 static const ctl_named_node_t experimental_thread_node[] = {
@@ -3328,6 +3330,38 @@ prof_log_stop_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	return 0;
 }
 
+static int
+experimental_hooks_prof_backtrace_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+
+	if (oldp == NULL && newp == NULL) {
+		ret = EINVAL;
+		goto label_return;
+	}
+	if (oldp != NULL) {
+		prof_backtrace_hook_t old_hook =
+		    prof_backtrace_hook_get();
+		READ(old_hook, prof_backtrace_hook_t);
+	}
+	if (newp != NULL) {
+		if (!opt_prof) {
+			ret = ENOENT;
+			goto label_return;
+		}
+		prof_backtrace_hook_t new_hook JEMALLOC_CC_SILENCE_INIT(NULL);
+		WRITE(new_hook, prof_backtrace_hook_t);
+		if (new_hook == NULL) {
+			ret = EINVAL;
+			goto label_return;
+		}
+		prof_backtrace_hook_set(new_hook);
+	}
+	ret = 0;
+label_return:
+	return ret;
+}
+
 /******************************************************************************/
 
 CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats->allocated, size_t)
diff --git a/src/prof.c b/src/prof.c
index 67a7f71a..d0cae0e9 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -10,6 +10,7 @@
 #include "jemalloc/internal/prof_recent.h"
 #include "jemalloc/internal/prof_stats.h"
 #include "jemalloc/internal/prof_sys.h"
+#include "jemalloc/internal/prof_hook.h"
 #include "jemalloc/internal/thread_event.h"
 
 /*
@@ -69,6 +70,9 @@ static malloc_mutex_t next_thr_uid_mtx;
 /* Do not dump any profiles until bootstrapping is complete. */
 bool prof_booted = false;
 
+/* Logically a prof_backtrace_hook_t. */
+atomic_p_t prof_backtrace_hook;
+
 /******************************************************************************/
 
 void
@@ -518,6 +522,17 @@ prof_gdump_set(tsdn_t *tsdn, bool gdump) {
 	return prof_gdump_old;
 }
 
+void
+prof_backtrace_hook_set(prof_backtrace_hook_t hook) {
+	atomic_store_p(&prof_backtrace_hook, hook, ATOMIC_RELEASE);
+}
+
+prof_backtrace_hook_t
+prof_backtrace_hook_get() {
+	return (prof_backtrace_hook_t)atomic_load_p(&prof_backtrace_hook,
+	    ATOMIC_ACQUIRE);
+}
+
 void
 prof_boot0(void) {
 	cassert(config_prof);
@@ -657,6 +672,7 @@ prof_boot2(tsd_t *tsd, base_t *base) {
 			}
 		}
 
+		prof_hooks_init();
 		prof_unwind_init();
 	}
 	prof_booted = true;
diff --git a/src/prof_sys.c b/src/prof_sys.c
index 6a5b2b16..1485e8b2 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -49,18 +49,18 @@ bt_init(prof_bt_t *bt, void **vec) {
 
 #ifdef JEMALLOC_PROF_LIBUNWIND
 static void
-prof_backtrace_impl(prof_bt_t *bt) {
+prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 	int nframes;
 
 	cassert(config_prof);
-	assert(bt->len == 0);
-	assert(bt->vec != NULL);
+	assert(*len == 0);
+	assert(vec != NULL);
 
-	nframes = unw_backtrace(bt->vec, PROF_BT_MAX);
+	nframes = unw_backtrace(vec, PROF_BT_MAX);
 	if (nframes <= 0) {
 		return;
 	}
-	bt->len = nframes;
+	*len = nframes;
 }
 #elif (defined(JEMALLOC_PROF_LIBGCC))
 static _Unwind_Reason_Code
@@ -81,9 +81,9 @@ prof_unwind_callback(struct _Unwind_Context *context, void *arg) {
 	if (ip == NULL) {
 		return _URC_END_OF_STACK;
 	}
-	data->bt->vec[data->bt->len] = ip;
-	data->bt->len++;
-	if (data->bt->len == data->max) {
+	data->vec[*data->len] = ip;
+	(*data->len)++;
+	if (*data->len == data->max) {
 		return _URC_END_OF_STACK;
 	}
 
@@ -91,8 +91,8 @@ prof_unwind_callback(struct _Unwind_Context *context, void *arg) {
 }
 
 static void
-prof_backtrace_impl(prof_bt_t *bt) {
-	prof_unwind_data_t data = {bt, PROF_BT_MAX};
+prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
+	prof_unwind_data_t data = {vec, len, max_len};
 
 	cassert(config_prof);
 
@@ -100,9 +100,9 @@ prof_backtrace_impl(prof_bt_t *bt) {
 }
 #elif (defined(JEMALLOC_PROF_GCC))
 static void
-prof_backtrace_impl(prof_bt_t *bt) {
+prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 #define BT_FRAME(i)							\
-	if ((i) < PROF_BT_MAX) {					\
+	if ((i) < max_len) {						\
 		void *p;						\
 		if (__builtin_frame_address(i) == 0) {			\
 			return;						\
@@ -111,8 +111,8 @@ prof_backtrace_impl(prof_bt_t *bt) {
 		if (p == NULL) {					\
 			return;						\
 		}							\
-		bt->vec[(i)] = p;					\
-		bt->len = (i) + 1;					\
+		vec[(i)] = p;						\
+		*len = (i) + 1;						\
 	} else {							\
 		return;							\
 	}
@@ -263,24 +263,28 @@ prof_backtrace_impl(prof_bt_t *bt) {
 }
 #else
 static void
-prof_backtrace_impl(prof_bt_t *bt) {
+prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 	cassert(config_prof);
 	not_reached();
 }
 #endif
 
-
-void (* JET_MUTABLE prof_backtrace_hook)(prof_bt_t *bt) = &prof_backtrace_impl;
-
 void
 prof_backtrace(tsd_t *tsd, prof_bt_t *bt) {
 	cassert(config_prof);
 	pre_reentrancy(tsd, NULL);
-	prof_backtrace_hook(bt);
+	prof_backtrace_hook_t prof_backtrace_hook = prof_backtrace_hook_get();
+	prof_backtrace_hook(bt->vec, &bt->len, PROF_BT_MAX);
 	post_reentrancy(tsd);
 }
 
-void prof_unwind_init() {
+void
+prof_hooks_init() {
+	prof_backtrace_hook_set(&prof_backtrace_impl);
+}
+
+void
+prof_unwind_init() {
 #ifdef JEMALLOC_PROF_LIBGCC
 	/*
 	 * Cause the backtracing machinery to allocate its internal
diff --git a/test/analyze/prof_bias.c b/test/analyze/prof_bias.c
index 0aae766b..4b960a66 100644
--- a/test/analyze/prof_bias.c
+++ b/test/analyze/prof_bias.c
@@ -24,12 +24,12 @@
  */
 
 static void
-mock_backtrace(prof_bt_t *bt) {
-	bt->len = 4;
-	bt->vec[0] = (void *)0x111;
-	bt->vec[1] = (void *)0x222;
-	bt->vec[2] = (void *)0x333;
-	bt->vec[3] = (void *)0x444;
+mock_backtrace(void **vec, unsigned *len, unsigned max_len) {
+	*len = 4;
+	vec[0] = (void *)0x111;
+	vec[1] = (void *)0x222;
+	vec[2] = (void *)0x333;
+	vec[3] = (void *)0x444;
 }
 
 static void
@@ -50,7 +50,7 @@ main(void) {
 	    sizeof(lg_prof_sample));
 	assert(err == 0);
 
-	prof_backtrace_hook = &mock_backtrace;
+	prof_backtrace_hook_set(mock_backtrace);
 	do_allocs(16, 32 * 1024 * 1024, /* do_frees */ true);
 	do_allocs(32 * 1024* 1024, 16, /* do_frees */ true);
 	do_allocs(16, 32 * 1024 * 1024, /* do_frees */ false);
diff --git a/test/unit/prof_hook.c b/test/unit/prof_hook.c
new file mode 100644
index 00000000..32d0e9ea
--- /dev/null
+++ b/test/unit/prof_hook.c
@@ -0,0 +1,61 @@
+#include "test/jemalloc_test.h"
+
+bool mock_bt_hook_called = false;
+
+void
+mock_bt_hook(void **vec, unsigned *len, unsigned max_len) {
+	*len = max_len;
+	for (unsigned i = 0; i < max_len; ++i) {
+		vec[i] = (void *)((uintptr_t)i);
+	}
+	mock_bt_hook_called = true;
+}
+
+TEST_BEGIN(test_prof_backtrace_hook) {
+
+	test_skip_if(!config_prof);
+
+	mock_bt_hook_called = false;
+
+	void *p0 = mallocx(1, 0);
+	assert_ptr_not_null(p0, "Failed to allocate");
+
+	expect_false(mock_bt_hook_called, "Called mock hook before it's set");
+
+	prof_backtrace_hook_t null_hook = NULL;
+	expect_d_eq(mallctl("experimental.hooks.prof_backtrace",
+	    NULL, 0, (void *)&null_hook,  sizeof(null_hook)),
+		EINVAL, "Incorrectly allowed NULL backtrace hook");
+
+	prof_backtrace_hook_t default_hook;
+	size_t default_hook_sz = sizeof(prof_backtrace_hook_t);
+	prof_backtrace_hook_t hook = &mock_bt_hook;
+	expect_d_eq(mallctl("experimental.hooks.prof_backtrace",
+	    (void *)&default_hook, &default_hook_sz, (void *)&hook,
+	    sizeof(hook)), 0, "Unexpected mallctl failure setting hook");
+
+	void *p1 = mallocx(1, 0);
+	assert_ptr_not_null(p1, "Failed to allocate");
+
+	expect_true(mock_bt_hook_called, "Didn't call mock hook");
+
+	prof_backtrace_hook_t current_hook;
+	size_t current_hook_sz = sizeof(prof_backtrace_hook_t);
+	expect_d_eq(mallctl("experimental.hooks.prof_backtrace",
+	    (void *)&current_hook, &current_hook_sz, (void *)&default_hook,
+	    sizeof(default_hook)), 0,
+	    "Unexpected mallctl failure resetting hook to default");
+
+	expect_ptr_eq(current_hook, hook,
+	    "Hook returned by mallctl is not equal to mock hook");
+
+	dallocx(p1, 0);
+	dallocx(p0, 0);
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_prof_backtrace_hook);
+}
diff --git a/test/unit/prof_hook.sh b/test/unit/prof_hook.sh
new file mode 100644
index 00000000..d14cb8c5
--- /dev/null
+++ b/test/unit/prof_hook.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:true,lg_prof_sample:0"
+fi
+

From a9031a0970df9c999873617423f789bd46bfe619 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Wed, 1 Sep 2021 13:00:01 -0700
Subject: [PATCH 2115/2608] Allow setting a dump hook

If users want to be notified when a heap dump occurs, they can set this hook.
---
 include/jemalloc/internal/prof_externs.h |   8 +-
 include/jemalloc/internal/prof_hook.h    |   5 +
 src/ctl.c                                |  32 ++++++-
 src/prof.c                               |  16 +++-
 src/prof_sys.c                           |  14 ++-
 test/unit/prof_hook.c                    | 114 ++++++++++++++++++++++-
 6 files changed, 178 insertions(+), 11 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 75d1d7a0..75dd90bf 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -48,14 +48,12 @@ extern size_t lg_prof_sample;
 
 extern bool prof_booted;
 
-/*
- * A hook to mock out backtrace functionality.  This can be handy, since it's
- * otherwise difficult to guarantee that two allocations are reported as coming
- * from the exact same stack trace in the presence of an optimizing compiler.
- */
 void prof_backtrace_hook_set(prof_backtrace_hook_t hook);
 prof_backtrace_hook_t prof_backtrace_hook_get();
 
+void prof_dump_hook_set(prof_dump_hook_t hook);
+prof_dump_hook_t prof_dump_hook_get();
+
 /* Functions only accessed in prof_inlines.h */
 prof_tdata_t *prof_tdata_init(tsd_t *tsd);
 prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
diff --git a/include/jemalloc/internal/prof_hook.h b/include/jemalloc/internal/prof_hook.h
index 277cd992..150d19d3 100644
--- a/include/jemalloc/internal/prof_hook.h
+++ b/include/jemalloc/internal/prof_hook.h
@@ -13,4 +13,9 @@
  */
 typedef void (*prof_backtrace_hook_t)(void **, unsigned *, unsigned);
 
+/*
+ * A callback hook that notifies about recently dumped heap profile.
+ */
+typedef void (*prof_dump_hook_t)(const char *filename);
+
 #endif /* JEMALLOC_INTERNAL_PROF_HOOK_H */
diff --git a/src/ctl.c b/src/ctl.c
index 6bf1c946..3aaa5a74 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -306,6 +306,7 @@ CTL_PROTO(stats_zero_reallocs)
 CTL_PROTO(experimental_hooks_install)
 CTL_PROTO(experimental_hooks_remove)
 CTL_PROTO(experimental_hooks_prof_backtrace)
+CTL_PROTO(experimental_hooks_prof_dump)
 CTL_PROTO(experimental_thread_activity_callback)
 CTL_PROTO(experimental_utilization_query)
 CTL_PROTO(experimental_utilization_batch_query)
@@ -835,7 +836,8 @@ static const ctl_named_node_t stats_node[] = {
 static const ctl_named_node_t experimental_hooks_node[] = {
 	{NAME("install"),	CTL(experimental_hooks_install)},
 	{NAME("remove"),	CTL(experimental_hooks_remove)},
-	{NAME("prof_backtrace"),	CTL(experimental_hooks_prof_backtrace)}
+	{NAME("prof_backtrace"),	CTL(experimental_hooks_prof_backtrace)},
+	{NAME("prof_dump"),	CTL(experimental_hooks_prof_dump)},
 };
 
 static const ctl_named_node_t experimental_thread_node[] = {
@@ -3362,6 +3364,34 @@ label_return:
 	return ret;
 }
 
+static int
+experimental_hooks_prof_dump_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+
+	if (oldp == NULL && newp == NULL) {
+		ret = EINVAL;
+		goto label_return;
+	}
+	if (oldp != NULL) {
+		prof_dump_hook_t old_hook =
+		    prof_dump_hook_get();
+		READ(old_hook, prof_dump_hook_t);
+	}
+	if (newp != NULL) {
+		if (!opt_prof) {
+			ret = ENOENT;
+			goto label_return;
+		}
+		prof_dump_hook_t new_hook JEMALLOC_CC_SILENCE_INIT(NULL);
+		WRITE(new_hook, prof_dump_hook_t);
+		prof_dump_hook_set(new_hook);
+	}
+	ret = 0;
+label_return:
+	return ret;
+}
+
 /******************************************************************************/
 
 CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats->allocated, size_t)
diff --git a/src/prof.c b/src/prof.c
index d0cae0e9..625bcd73 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -73,6 +73,9 @@ bool prof_booted = false;
 /* Logically a prof_backtrace_hook_t. */
 atomic_p_t prof_backtrace_hook;
 
+/* Logically a prof_dump_hook_t. */
+atomic_p_t prof_dump_hook;
+
 /******************************************************************************/
 
 void
@@ -533,6 +536,17 @@ prof_backtrace_hook_get() {
 	    ATOMIC_ACQUIRE);
 }
 
+void
+prof_dump_hook_set(prof_dump_hook_t hook) {
+	atomic_store_p(&prof_dump_hook, hook, ATOMIC_RELEASE);
+}
+
+prof_dump_hook_t
+prof_dump_hook_get() {
+	return (prof_dump_hook_t)atomic_load_p(&prof_dump_hook,
+	    ATOMIC_ACQUIRE);
+}
+
 void
 prof_boot0(void) {
 	cassert(config_prof);
@@ -672,8 +686,8 @@ prof_boot2(tsd_t *tsd, base_t *base) {
 			}
 		}
 
-		prof_hooks_init();
 		prof_unwind_init();
+		prof_hooks_init();
 	}
 	prof_booted = true;
 
diff --git a/src/prof_sys.c b/src/prof_sys.c
index 1485e8b2..fd41e86c 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -55,6 +55,7 @@ prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 	cassert(config_prof);
 	assert(*len == 0);
 	assert(vec != NULL);
+	assert(max_len == PROF_BT_MAX);
 
 	nframes = unw_backtrace(vec, PROF_BT_MAX);
 	if (nframes <= 0) {
@@ -95,6 +96,8 @@ prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 	prof_unwind_data_t data = {vec, len, max_len};
 
 	cassert(config_prof);
+	assert(vec != NULL);
+	assert(max_len == PROF_BT_MAX);
 
 	_Unwind_Backtrace(prof_unwind_callback, &data);
 }
@@ -118,6 +121,8 @@ prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 	}
 
 	cassert(config_prof);
+	assert(vec != NULL);
+	assert(max_len == PROF_BT_MAX);
 
 	BT_FRAME(0)
 	BT_FRAME(1)
@@ -272,8 +277,10 @@ prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 void
 prof_backtrace(tsd_t *tsd, prof_bt_t *bt) {
 	cassert(config_prof);
-	pre_reentrancy(tsd, NULL);
 	prof_backtrace_hook_t prof_backtrace_hook = prof_backtrace_hook_get();
+	assert(prof_backtrace_hook != NULL);
+
+	pre_reentrancy(tsd, NULL);
 	prof_backtrace_hook(bt->vec, &bt->len, PROF_BT_MAX);
 	post_reentrancy(tsd);
 }
@@ -281,6 +288,7 @@ prof_backtrace(tsd_t *tsd, prof_bt_t *bt) {
 void
 prof_hooks_init() {
 	prof_backtrace_hook_set(&prof_backtrace_impl);
+	prof_dump_hook_set(NULL);
 }
 
 void
@@ -506,6 +514,10 @@ prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
 	buf_writer_terminate(tsd_tsdn(tsd), &buf_writer);
 	prof_dump_close(&arg);
 
+	prof_dump_hook_t dump_hook = prof_dump_hook_get();
+	if (dump_hook != NULL) {
+		dump_hook(filename);
+	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
 	post_reentrancy(tsd);
 
diff --git a/test/unit/prof_hook.c b/test/unit/prof_hook.c
index 32d0e9ea..6480d930 100644
--- a/test/unit/prof_hook.c
+++ b/test/unit/prof_hook.c
@@ -1,6 +1,11 @@
 #include "test/jemalloc_test.h"
 
+const char *dump_filename = "/dev/null";
+
+prof_backtrace_hook_t default_hook;
+
 bool mock_bt_hook_called = false;
+bool mock_dump_hook_called = false;
 
 void
 mock_bt_hook(void **vec, unsigned *len, unsigned max_len) {
@@ -11,7 +16,38 @@ mock_bt_hook(void **vec, unsigned *len, unsigned max_len) {
 	mock_bt_hook_called = true;
 }
 
-TEST_BEGIN(test_prof_backtrace_hook) {
+void
+mock_bt_augmenting_hook(void **vec, unsigned *len, unsigned max_len) {
+	default_hook(vec, len, max_len);
+	expect_u_gt(*len, 0, "Default backtrace hook returned empty backtrace");
+	expect_u_lt(*len, max_len,
+	    "Default backtrace hook returned too large backtrace");
+
+	/* Add a separator between default frames and augmented */
+	vec[*len] = (void *)0x030303030;
+	(*len)++;
+
+	/* Add more stack frames */
+	for (unsigned i = 0; i < 3; ++i) {
+		if (*len == max_len) {
+			break;
+		}
+		vec[*len] = (void *)((uintptr_t)i);
+		(*len)++;
+	}
+
+
+	mock_bt_hook_called = true;
+}
+
+void
+mock_dump_hook(const char *filename) {
+	mock_dump_hook_called = true;
+	expect_str_eq(filename, dump_filename,
+	    "Incorrect file name passed to the dump hook");
+}
+
+TEST_BEGIN(test_prof_backtrace_hook_replace) {
 
 	test_skip_if(!config_prof);
 
@@ -27,7 +63,6 @@ TEST_BEGIN(test_prof_backtrace_hook) {
 	    NULL, 0, (void *)&null_hook,  sizeof(null_hook)),
 		EINVAL, "Incorrectly allowed NULL backtrace hook");
 
-	prof_backtrace_hook_t default_hook;
 	size_t default_hook_sz = sizeof(prof_backtrace_hook_t);
 	prof_backtrace_hook_t hook = &mock_bt_hook;
 	expect_d_eq(mallctl("experimental.hooks.prof_backtrace",
@@ -54,8 +89,81 @@ TEST_BEGIN(test_prof_backtrace_hook) {
 }
 TEST_END
 
+TEST_BEGIN(test_prof_backtrace_hook_augment) {
+
+	test_skip_if(!config_prof);
+
+	mock_bt_hook_called = false;
+
+	void *p0 = mallocx(1, 0);
+	assert_ptr_not_null(p0, "Failed to allocate");
+
+	expect_false(mock_bt_hook_called, "Called mock hook before it's set");
+
+	size_t default_hook_sz = sizeof(prof_backtrace_hook_t);
+	prof_backtrace_hook_t hook = &mock_bt_augmenting_hook;
+	expect_d_eq(mallctl("experimental.hooks.prof_backtrace",
+	    (void *)&default_hook, &default_hook_sz, (void *)&hook,
+	    sizeof(hook)), 0, "Unexpected mallctl failure setting hook");
+
+	void *p1 = mallocx(1, 0);
+	assert_ptr_not_null(p1, "Failed to allocate");
+
+	expect_true(mock_bt_hook_called, "Didn't call mock hook");
+
+	prof_backtrace_hook_t current_hook;
+	size_t current_hook_sz = sizeof(prof_backtrace_hook_t);
+	expect_d_eq(mallctl("experimental.hooks.prof_backtrace",
+	    (void *)&current_hook, &current_hook_sz, (void *)&default_hook,
+	    sizeof(default_hook)), 0,
+	    "Unexpected mallctl failure resetting hook to default");
+
+	expect_ptr_eq(current_hook, hook,
+	    "Hook returned by mallctl is not equal to mock hook");
+
+	dallocx(p1, 0);
+	dallocx(p0, 0);
+}
+TEST_END
+
+TEST_BEGIN(test_prof_dump_hook) {
+
+	test_skip_if(!config_prof);
+
+	mock_dump_hook_called = false;
+
+	expect_d_eq(mallctl("prof.dump", NULL, NULL, (void *)&dump_filename,
+	    sizeof(dump_filename)), 0, "Failed to dump heap profile");
+
+	expect_false(mock_dump_hook_called, "Called dump hook before it's set");
+
+	size_t default_hook_sz = sizeof(prof_dump_hook_t);
+	prof_dump_hook_t hook = &mock_dump_hook;
+	expect_d_eq(mallctl("experimental.hooks.prof_dump",
+	    (void *)&default_hook, &default_hook_sz, (void *)&hook,
+	    sizeof(hook)), 0, "Unexpected mallctl failure setting hook");
+
+	expect_d_eq(mallctl("prof.dump", NULL, NULL, (void *)&dump_filename,
+	    sizeof(dump_filename)), 0, "Failed to dump heap profile");
+
+	expect_true(mock_dump_hook_called, "Didn't call mock hook");
+
+	prof_dump_hook_t current_hook;
+	size_t current_hook_sz = sizeof(prof_dump_hook_t);
+	expect_d_eq(mallctl("experimental.hooks.prof_dump",
+	    (void *)&current_hook, &current_hook_sz, (void *)&default_hook,
+	    sizeof(default_hook)), 0,
+	    "Unexpected mallctl failure resetting hook to default");
+
+	expect_ptr_eq(current_hook, hook,
+	    "Hook returned by mallctl is not equal to mock hook");
+}
+TEST_END
+
 int
 main(void) {
 	return test(
-	    test_prof_backtrace_hook);
+	    test_prof_backtrace_hook_replace,
+	    test_prof_backtrace_hook_augment,
+	    test_prof_dump_hook);
 }

From 7bb05e04be693b26536dc2335b4d230dacc5d7d2 Mon Sep 17 00:00:00 2001
From: Piotr Balcer <piotr.balcer@intel.com>
Date: Mon, 23 Aug 2021 14:03:35 +0200
Subject: [PATCH 2116/2608] add experimental.arenas_create_ext mallctl

This mallctl accepts an arena_config_t structure which
can be used to customize the behavior of the arena.
Right now it contains extent_hooks and a new option,
metadata_use_hooks, which controls whether the extent
hooks are also used for metadata allocation.

The medata_use_hooks option has two main use cases:

1. In heterogeneous memory systems, to avoid metadata
being placed on potentially slower memory.

2. Avoiding virtual memory from being leaked as a result
of metadata allocation failure originating in an extent hook.
---
 include/jemalloc/internal/arena_externs.h     |  2 +-
 include/jemalloc/internal/arena_types.h       | 14 +++++
 include/jemalloc/internal/base.h              |  7 ++-
 include/jemalloc/internal/base_structs.h      |  5 ++
 .../internal/jemalloc_internal_externs.h      |  2 +-
 .../internal/jemalloc_internal_inlines_a.h    |  2 +-
 src/arena.c                                   | 11 ++--
 src/base.c                                    | 21 ++++++--
 src/ctl.c                                     | 42 ++++++++++++---
 src/jemalloc.c                                | 14 +++--
 test/integration/extent.c                     | 53 ++++++++++++++++++-
 test/unit/base.c                              |  7 +--
 test/unit/edata_cache.c                       |  2 +-
 test/unit/hpa.c                               |  2 +-
 test/unit/pa.c                                |  3 +-
 test/unit/rtree.c                             | 15 ++++--
 test/unit/sec.c                               |  4 +-
 17 files changed, 165 insertions(+), 41 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index b9231c5d..e6fceaaf 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -97,7 +97,7 @@ bool arena_retain_grow_limit_get_set(tsd_t *tsd, arena_t *arena,
 unsigned arena_nthreads_get(arena_t *arena, bool internal);
 void arena_nthreads_inc(arena_t *arena, bool internal);
 void arena_nthreads_dec(arena_t *arena, bool internal);
-arena_t *arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
+arena_t *arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config);
 bool arena_init_huge(void);
 bool arena_is_huge(unsigned arena_ind);
 arena_t *arena_choose_huge(tsd_t *tsd);
diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h
index e0f8218d..f763a8ca 100644
--- a/include/jemalloc/internal/arena_types.h
+++ b/include/jemalloc/internal/arena_types.h
@@ -41,4 +41,18 @@ typedef enum {
  */
 #define OVERSIZE_THRESHOLD_DEFAULT (8 << 20)
 
+struct arena_config_s {
+	/* extent hooks to be used for the arena */
+	struct extent_hooks_s *extent_hooks;
+
+	/*
+	 * Use extent hooks for metadata (base) allocations when true.
+	 */
+	bool metadata_use_hooks;
+};
+
+typedef struct arena_config_s arena_config_t;
+
+extern const arena_config_t arena_config_default;
+
 #endif /* JEMALLOC_INTERNAL_ARENA_TYPES_H */
diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h
index 628e393b..67e19409 100644
--- a/include/jemalloc/internal/base.h
+++ b/include/jemalloc/internal/base.h
@@ -46,6 +46,11 @@ struct base_s {
 	 */
 	ehooks_t ehooks;
 
+	/*
+	 * Use user hooks for metadata when true.
+	 */
+	bool metadata_use_hooks;
+
 	/* Protects base_alloc() and base_stats_get() operations. */
 	malloc_mutex_t mtx;
 
@@ -87,7 +92,7 @@ metadata_thp_enabled(void) {
 
 base_t *b0get(void);
 base_t *base_new(tsdn_t *tsdn, unsigned ind,
-    const extent_hooks_t *extent_hooks);
+    const extent_hooks_t *extent_hooks, bool metadata_use_hooks);
 void base_delete(tsdn_t *tsdn, base_t *base);
 ehooks_t *base_ehooks_get(base_t *base);
 extent_hooks_t *base_extent_hooks_set(base_t *base,
diff --git a/include/jemalloc/internal/base_structs.h b/include/jemalloc/internal/base_structs.h
index ff1fdfb3..914c5b59 100644
--- a/include/jemalloc/internal/base_structs.h
+++ b/include/jemalloc/internal/base_structs.h
@@ -25,6 +25,11 @@ struct base_s {
 	 */
 	ehooks_t ehooks;
 
+	/*
+	 * Use user hooks for metadata when true.
+	 */
+	bool metadata_use_hooks;
+
 	/* Protects base_alloc() and base_stats_get() operations. */
 	malloc_mutex_t mtx;
 
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index af6dc0a2..e8bfb03b 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -57,7 +57,7 @@ void *bootstrap_calloc(size_t num, size_t size);
 void bootstrap_free(void *ptr);
 void arena_set(unsigned ind, arena_t *arena);
 unsigned narenas_total_get(void);
-arena_t *arena_init(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks);
+arena_t *arena_init(tsdn_t *tsdn, unsigned ind, const arena_config_t *config);
 arena_t *arena_choose_hard(tsd_t *tsd, bool internal);
 void arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind);
 void iarena_cleanup(tsd_t *tsd);
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index 24e42d38..1bca34cf 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -66,7 +66,7 @@ arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing) {
 	if (unlikely(ret == NULL)) {
 		if (init_if_missing) {
 			ret = arena_init(tsdn, ind,
-			    (extent_hooks_t *)&ehooks_default_extent_hooks);
+			    &arena_config_default);
 		}
 	}
 	return ret;
diff --git a/src/arena.c b/src/arena.c
index c720bcb1..c2842c6c 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -48,6 +48,11 @@ static unsigned nbins_total;
 
 static unsigned huge_arena_ind;
 
+const arena_config_t arena_config_default = {
+	/* .extent_hooks = */ (extent_hooks_t *)&ehooks_default_extent_hooks,
+	/* .metadata_use_hooks = */ true,
+};
+
 /******************************************************************************/
 /*
  * Function prototypes for static functions that are referenced prior to
@@ -1516,7 +1521,6 @@ arena_set_extent_hooks(tsd_t *tsd, arena_t *arena,
 	return ret;
 }
 
-
 dss_prec_t
 arena_dss_prec_get(arena_t *arena) {
 	return (dss_prec_t)atomic_load_u(&arena->dss_prec, ATOMIC_ACQUIRE);
@@ -1583,7 +1587,7 @@ arena_nthreads_dec(arena_t *arena, bool internal) {
 }
 
 arena_t *
-arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
+arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 	arena_t *arena;
 	base_t *base;
 	unsigned i;
@@ -1591,7 +1595,8 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	if (ind == 0) {
 		base = b0get();
 	} else {
-		base = base_new(tsdn, ind, extent_hooks);
+		base = base_new(tsdn, ind, config->extent_hooks,
+		    config->metadata_use_hooks);
 		if (base == NULL) {
 			return NULL;
 		}
diff --git a/src/base.c b/src/base.c
index 44878ad4..cc127ea0 100644
--- a/src/base.c
+++ b/src/base.c
@@ -295,6 +295,12 @@ base_block_alloc(tsdn_t *tsdn, base_t *base, ehooks_t *ehooks, unsigned ind,
 	return block;
 }
 
+static ehooks_t *
+base_ehooks_get_for_metadata(base_t *base) {
+	return base->metadata_use_hooks ? &base->ehooks :
+		(struct ehooks_s *)&ehooks_default_extent_hooks;
+}
+
 /*
  * Allocate an extent that is at least as large as specified size, with
  * specified alignment.
@@ -303,7 +309,7 @@ static edata_t *
 base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
 	malloc_mutex_assert_owner(tsdn, &base->mtx);
 
-	ehooks_t *ehooks = base_ehooks_get(base);
+	ehooks_t *ehooks = base_ehooks_get_for_metadata(base);
 	/*
 	 * Drop mutex during base_block_alloc(), because an extent hook will be
 	 * called.
@@ -342,7 +348,8 @@ b0get(void) {
 }
 
 base_t *
-base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks) {
+base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks,
+    bool metadata_use_hooks) {
 	pszind_t pind_last = 0;
 	size_t extent_sn_next = 0;
 
@@ -352,7 +359,9 @@ base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks) {
 	 * memory, and then initialize the ehooks within the base_t.
 	 */
 	ehooks_t fake_ehooks;
-	ehooks_init(&fake_ehooks, (extent_hooks_t *)extent_hooks, ind);
+	ehooks_init(&fake_ehooks, metadata_use_hooks ?
+	    (extent_hooks_t *)extent_hooks :
+	    (extent_hooks_t *)&ehooks_default_extent_hooks, ind);
 
 	base_block_t *block = base_block_alloc(tsdn, NULL, &fake_ehooks, ind,
 	    &pind_last, &extent_sn_next, sizeof(base_t), QUANTUM);
@@ -375,6 +384,7 @@ base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks) {
 	base->extent_sn_next = extent_sn_next;
 	base->blocks = block;
 	base->auto_thp_switched = false;
+	base->metadata_use_hooks = metadata_use_hooks;
 	for (szind_t i = 0; i < SC_NSIZES; i++) {
 		edata_heap_new(&base->avail[i]);
 	}
@@ -397,7 +407,7 @@ base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks) {
 
 void
 base_delete(tsdn_t *tsdn, base_t *base) {
-	ehooks_t *ehooks = base_ehooks_get(base);
+	ehooks_t *ehooks = base_ehooks_get_for_metadata(base);
 	base_block_t *next = base->blocks;
 	do {
 		base_block_t *block = next;
@@ -512,6 +522,7 @@ base_postfork_child(tsdn_t *tsdn, base_t *base) {
 
 bool
 base_boot(tsdn_t *tsdn) {
-	b0 = base_new(tsdn, 0, (extent_hooks_t *)&ehooks_default_extent_hooks);
+	b0 = base_new(tsdn, 0,
+		(extent_hooks_t *)&ehooks_default_extent_hooks, true);
 	return (b0 == NULL);
 }
diff --git a/src/ctl.c b/src/ctl.c
index 3aaa5a74..491a333b 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -315,6 +315,7 @@ INDEX_PROTO(experimental_arenas_i)
 CTL_PROTO(experimental_prof_recent_alloc_max)
 CTL_PROTO(experimental_prof_recent_alloc_dump)
 CTL_PROTO(experimental_batch_alloc)
+CTL_PROTO(experimental_arenas_create_ext)
 
 #define MUTEX_STATS_CTL_PROTO_GEN(n)					\
 CTL_PROTO(stats_##n##_num_ops)						\
@@ -870,6 +871,7 @@ static const ctl_named_node_t experimental_node[] = {
 	{NAME("hooks"),		CHILD(named, experimental_hooks)},
 	{NAME("utilization"),	CHILD(named, experimental_utilization)},
 	{NAME("arenas"),	CHILD(indexed, experimental_arenas)},
+	{NAME("arenas_create_ext"),	CTL(experimental_arenas_create_ext)},
 	{NAME("prof_recent"),	CHILD(named, experimental_prof_recent)},
 	{NAME("batch_alloc"),	CTL(experimental_batch_alloc)},
 	{NAME("thread"),	CHILD(named, experimental_thread)}
@@ -1242,7 +1244,7 @@ ctl_arena_refresh(tsdn_t *tsdn, arena_t *arena, ctl_arena_t *ctl_sdarena,
 }
 
 static unsigned
-ctl_arena_init(tsd_t *tsd, extent_hooks_t *extent_hooks) {
+ctl_arena_init(tsd_t *tsd, const arena_config_t *config) {
 	unsigned arena_ind;
 	ctl_arena_t *ctl_arena;
 
@@ -1260,7 +1262,7 @@ ctl_arena_init(tsd_t *tsd, extent_hooks_t *extent_hooks) {
 	}
 
 	/* Initialize new arena. */
-	if (arena_init(tsd_tsdn(tsd), arena_ind, extent_hooks) == NULL) {
+	if (arena_init(tsd_tsdn(tsd), arena_ind, config) == NULL) {
 		return UINT_MAX;
 	}
 
@@ -2881,8 +2883,11 @@ arena_i_extent_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 				extent_hooks_t *new_extent_hooks
 				    JEMALLOC_CC_SILENCE_INIT(NULL);
 				WRITE(new_extent_hooks, extent_hooks_t *);
+				arena_config_t config = arena_config_default;
+				config.extent_hooks = new_extent_hooks;
+
 				arena = arena_init(tsd_tsdn(tsd), arena_ind,
-				    new_extent_hooks);
+				    &config);
 				if (arena == NULL) {
 					ret = EFAULT;
 					goto label_return;
@@ -3069,15 +3074,14 @@ static int
 arenas_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
-	extent_hooks_t *extent_hooks;
 	unsigned arena_ind;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 
 	VERIFY_READ(unsigned);
-	extent_hooks = (extent_hooks_t *)&ehooks_default_extent_hooks;
-	WRITE(extent_hooks, extent_hooks_t *);
-	if ((arena_ind = ctl_arena_init(tsd, extent_hooks)) == UINT_MAX) {
+	arena_config_t config = arena_config_default;
+	WRITE(config.extent_hooks, extent_hooks_t *);
+	if ((arena_ind = ctl_arena_init(tsd, &config)) == UINT_MAX) {
 		ret = EAGAIN;
 		goto label_return;
 	}
@@ -3089,6 +3093,30 @@ label_return:
 	return ret;
 }
 
+static int
+experimental_arenas_create_ext_ctl(tsd_t *tsd,
+    const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+	unsigned arena_ind;
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
+
+	arena_config_t config = arena_config_default;
+	VERIFY_READ(unsigned);
+	WRITE(config, arena_config_t);
+
+	if ((arena_ind = ctl_arena_init(tsd, &config)) == UINT_MAX) {
+		ret = EAGAIN;
+		goto label_return;
+	}
+	READ(arena_ind, unsigned);
+	ret = 0;
+label_return:
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
+	return ret;
+}
+
 static int
 arenas_lookup_ctl(tsd_t *tsd, const size_t *mib,
     size_t miblen, void *oldp, size_t *oldlenp, void *newp,
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 18b54520..7ffa5533 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -384,7 +384,7 @@ narenas_total_get(void) {
 
 /* Create a new arena and insert it into the arenas array at index ind. */
 static arena_t *
-arena_init_locked(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
+arena_init_locked(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 	arena_t *arena;
 
 	assert(ind <= narenas_total_get());
@@ -406,7 +406,7 @@ arena_init_locked(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
 	}
 
 	/* Actually initialize the arena. */
-	arena = arena_new(tsdn, ind, extent_hooks);
+	arena = arena_new(tsdn, ind, config);
 
 	return arena;
 }
@@ -430,11 +430,11 @@ arena_new_create_background_thread(tsdn_t *tsdn, unsigned ind) {
 }
 
 arena_t *
-arena_init(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
+arena_init(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 	arena_t *arena;
 
 	malloc_mutex_lock(tsdn, &arenas_lock);
-	arena = arena_init_locked(tsdn, ind, extent_hooks);
+	arena = arena_init_locked(tsdn, ind, config);
 	malloc_mutex_unlock(tsdn, &arenas_lock);
 
 	arena_new_create_background_thread(tsdn, ind);
@@ -570,8 +570,7 @@ arena_choose_hard(tsd_t *tsd, bool internal) {
 				choose[j] = first_null;
 				arena = arena_init_locked(tsd_tsdn(tsd),
 				    choose[j],
-				    (extent_hooks_t *)
-				    &ehooks_default_extent_hooks);
+				    &arena_config_default);
 				if (arena == NULL) {
 					malloc_mutex_unlock(tsd_tsdn(tsd),
 					    &arenas_lock);
@@ -1779,8 +1778,7 @@ malloc_init_hard_a0_locked() {
 	 * Initialize one arena here.  The rest are lazily created in
 	 * arena_choose_hard().
 	 */
-	if (arena_init(TSDN_NULL, 0,
-	    (extent_hooks_t *)&ehooks_default_extent_hooks) == NULL) {
+	if (arena_init(TSDN_NULL, 0, &arena_config_default) == NULL) {
 		return true;
 	}
 	a0 = arena_get(TSDN_NULL, 0, false);
diff --git a/test/integration/extent.c b/test/integration/extent.c
index 831ef63f..7a028f18 100644
--- a/test/integration/extent.c
+++ b/test/integration/extent.c
@@ -2,6 +2,8 @@
 
 #include "test/extent_hooks.h"
 
+#include "jemalloc/internal/arena_types.h"
+
 static void
 test_extent_body(unsigned arena_ind) {
 	void *p;
@@ -228,9 +230,58 @@ TEST_BEGIN(test_extent_auto_hook) {
 }
 TEST_END
 
+static void
+test_arenas_create_ext_base(arena_config_t config,
+	bool expect_hook_data, bool expect_hook_metadata)
+{
+	unsigned arena, arena1;
+	void *ptr;
+	size_t sz = sizeof(unsigned);
+
+	extent_hooks_prep();
+
+	called_alloc = false;
+	expect_d_eq(mallctl("experimental.arenas_create_ext",
+	    (void *)&arena, &sz, &config, sizeof(arena_config_t)), 0,
+	    "Unexpected mallctl() failure");
+	expect_b_eq(called_alloc, expect_hook_metadata,
+	    "expected hook metadata alloc mismatch");
+
+	called_alloc = false;
+	ptr = mallocx(42, MALLOCX_ARENA(arena) | MALLOCX_TCACHE_NONE);
+	expect_b_eq(called_alloc, expect_hook_data,
+	    "expected hook data alloc mismatch");
+
+	expect_ptr_not_null(ptr, "Unexpected mallocx() failure");
+	expect_d_eq(mallctl("arenas.lookup", &arena1, &sz, &ptr, sizeof(ptr)),
+	    0, "Unexpected mallctl() failure");
+	expect_u_eq(arena, arena1, "Unexpected arena index");
+	dallocx(ptr, 0);
+}
+
+TEST_BEGIN(test_arenas_create_ext_with_ehooks_no_metadata) {
+	arena_config_t config;
+	config.extent_hooks = &hooks;
+	config.metadata_use_hooks = false;
+
+	test_arenas_create_ext_base(config, true, false);
+}
+TEST_END
+
+TEST_BEGIN(test_arenas_create_ext_with_ehooks_with_metadata) {
+	arena_config_t config;
+	config.extent_hooks = &hooks;
+	config.metadata_use_hooks = true;
+
+	test_arenas_create_ext_base(config, true, true);
+}
+TEST_END
+
 int
 main(void) {
 	return test(
 	    test_extent_manual_hook,
-	    test_extent_auto_hook);
+	    test_extent_auto_hook,
+	    test_arenas_create_ext_with_ehooks_no_metadata,
+	    test_arenas_create_ext_with_ehooks_with_metadata);
 }
diff --git a/test/unit/base.c b/test/unit/base.c
index 5e990b34..07a43df7 100644
--- a/test/unit/base.c
+++ b/test/unit/base.c
@@ -32,7 +32,8 @@ TEST_BEGIN(test_base_hooks_default) {
 
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 	base = base_new(tsdn, 0,
-	    (extent_hooks_t *)&ehooks_default_extent_hooks);
+	    (extent_hooks_t *)&ehooks_default_extent_hooks,
+	    /* metadata_use_hooks */ true);
 
 	if (config_stats) {
 		base_stats_get(tsdn, base, &allocated0, &resident, &mapped,
@@ -74,7 +75,7 @@ TEST_BEGIN(test_base_hooks_null) {
 	memcpy(&hooks, &hooks_null, sizeof(extent_hooks_t));
 
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
-	base = base_new(tsdn, 0, &hooks);
+	base = base_new(tsdn, 0, &hooks, /* metadata_use_hooks */ true);
 	expect_ptr_not_null(base, "Unexpected base_new() failure");
 
 	if (config_stats) {
@@ -120,7 +121,7 @@ TEST_BEGIN(test_base_hooks_not_null) {
 
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 	did_alloc = false;
-	base = base_new(tsdn, 0, &hooks);
+	base = base_new(tsdn, 0, &hooks, /* metadata_use_hooks */ true);
 	expect_ptr_not_null(base, "Unexpected base_new() failure");
 	expect_true(did_alloc, "Expected alloc");
 
diff --git a/test/unit/edata_cache.c b/test/unit/edata_cache.c
index fe920c9a..af1110a9 100644
--- a/test/unit/edata_cache.c
+++ b/test/unit/edata_cache.c
@@ -5,7 +5,7 @@
 static void
 test_edata_cache_init(edata_cache_t *edata_cache) {
 	base_t *base = base_new(TSDN_NULL, /* ind */ 1,
-	    &ehooks_default_extent_hooks);
+	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
 	assert_ptr_not_null(base, "");
 	bool err = edata_cache_init(edata_cache, base);
 	assert_false(err, "");
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index dc3acc08..86012c75 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -37,7 +37,7 @@ static hpa_shard_t *
 create_test_data(hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
 	bool err;
 	base_t *base = base_new(TSDN_NULL, /* ind */ SHARD_IND,
-	    &ehooks_default_extent_hooks);
+	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
 	assert_ptr_not_null(base, "");
 
 	test_data_t *test_data = malloc(sizeof(test_data_t));
diff --git a/test/unit/pa.c b/test/unit/pa.c
index 4d3ad5e9..01d891df 100644
--- a/test/unit/pa.c
+++ b/test/unit/pa.c
@@ -53,7 +53,8 @@ test_data_t *init_test_data(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 	assert_ptr_not_null(test_data, "");
 	init_test_extent_hooks(&test_data->hooks);
 
-	base_t *base = base_new(TSDN_NULL, /* ind */ 1, &test_data->hooks);
+	base_t *base = base_new(TSDN_NULL, /* ind */ 1,
+	    &test_data->hooks, /* metadata_use_hooks */ true);
 	assert_ptr_not_null(base, "");
 
 	test_data->base = base;
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 7b2a4e36..82b617bd 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -12,7 +12,8 @@ TEST_BEGIN(test_rtree_read_empty) {
 
 	tsdn = tsdn_fetch();
 
-	base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks);
+	base_t *base = base_new(tsdn, 0,
+	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
 	expect_ptr_not_null(base, "Unexpected base_new failure");
 
 	rtree_t *rtree = &test_rtree;
@@ -52,7 +53,8 @@ TEST_BEGIN(test_rtree_extrema) {
 
 	tsdn_t *tsdn = tsdn_fetch();
 
-	base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks);
+	base_t *base = base_new(tsdn, 0,
+	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
 	expect_ptr_not_null(base, "Unexpected base_new failure");
 
 	rtree_t *rtree = &test_rtree;
@@ -103,7 +105,8 @@ TEST_END
 
 TEST_BEGIN(test_rtree_bits) {
 	tsdn_t *tsdn = tsdn_fetch();
-	base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks);
+	base_t *base = base_new(tsdn, 0,
+	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
 	expect_ptr_not_null(base, "Unexpected base_new failure");
 
 	uintptr_t keys[] = {PAGE, PAGE + 1,
@@ -152,7 +155,8 @@ TEST_BEGIN(test_rtree_random) {
 	sfmt_t *sfmt = init_gen_rand(SEED);
 	tsdn_t *tsdn = tsdn_fetch();
 
-	base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks);
+	base_t *base = base_new(tsdn, 0,
+	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
 	expect_ptr_not_null(base, "Unexpected base_new failure");
 
 	uintptr_t keys[NSET];
@@ -250,7 +254,8 @@ test_rtree_range_write(tsdn_t *tsdn, rtree_t *rtree, uintptr_t start,
 
 TEST_BEGIN(test_rtree_range) {
 	tsdn_t *tsdn = tsdn_fetch();
-	base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks);
+	base_t *base = base_new(tsdn, 0,
+	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
 	expect_ptr_not_null(base, "Unexpected base_new failure");
 
 	rtree_t *rtree = &test_rtree;
diff --git a/test/unit/sec.c b/test/unit/sec.c
index 82b0c9d9..763e6087 100644
--- a/test/unit/sec.c
+++ b/test/unit/sec.c
@@ -42,7 +42,7 @@ test_sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t max_alloc,
 	 * short-running, and SECs are arena-scoped in reality.
 	 */
 	base_t *base = base_new(TSDN_NULL, /* ind */ 123,
-	    &ehooks_default_extent_hooks);
+	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
 
 	bool err = sec_init(TSDN_NULL, sec, base, fallback, &opts);
 	assert_false(err, "Unexpected initialization failure");
@@ -442,7 +442,7 @@ TEST_BEGIN(test_nshards_0) {
 	/* See the note above -- we can't use the real tsd. */
 	tsdn_t *tsdn = TSDN_NULL;
 	base_t *base = base_new(TSDN_NULL, /* ind */ 123,
-	    &ehooks_default_extent_hooks);
+	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
 
 	sec_opts_t opts = SEC_OPTS_DEFAULT;
 	opts.nshards = 0;

From deb8e62a837b6dd303128a544501a7dc9677e47a Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 26 Apr 2021 14:22:25 -0700
Subject: [PATCH 2117/2608] Implement guard pages.

Adding guarded extents, which are regular extents surrounded by guard pages
(mprotected).  To reduce syscalls, small guarded extents are cached as a
separate eset in ecache, and decay through the dirty / muzzy / retained pipeline
as usual.
---
 Makefile.in                                   |   2 +
 configure.ac                                  |  12 ++
 include/jemalloc/internal/arena_inlines_b.h   |   3 +-
 include/jemalloc/internal/ecache.h            |  13 +-
 include/jemalloc/internal/edata.h             |  26 ++-
 include/jemalloc/internal/ehooks.h            |  41 ++++
 include/jemalloc/internal/extent.h            |   4 +-
 include/jemalloc/internal/guard.h             |  76 +++++++
 .../internal/jemalloc_internal_defs.h.in      |   3 +
 include/jemalloc/internal/pa.h                |   2 +-
 include/jemalloc/internal/pages.h             |   2 +
 include/jemalloc/internal/pai.h               |   7 +-
 include/jemalloc/internal/tsd.h               |   4 +
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |   3 +
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |   3 +
 src/arena.c                                   |  11 +-
 src/ecache.c                                  |   4 +
 src/ehooks.c                                  |  10 +
 src/extent.c                                  |  78 +++++--
 src/guard.c                                   |  63 ++++++
 src/hpa.c                                     |   7 +-
 src/jemalloc.c                                |   9 +
 src/pa.c                                      |  27 ++-
 src/pac.c                                     |  86 ++++++--
 src/pages.c                                   |  47 +++-
 src/pai.c                                     |   2 +-
 src/sec.c                                     |   9 +-
 src/tsd.c                                     |   3 +
 test/include/test/arena_decay.h               | 149 +++++++++++++
 test/include/test/guard.h                     |   6 +
 test/unit/arena_decay.c                       | 150 +------------
 test/unit/double_free.c                       |  25 ++-
 test/unit/guard.c                             | 201 ++++++++++++++++++
 test/unit/guard.sh                            |   3 +
 test/unit/hpa.c                               |  16 +-
 test/unit/hpa_background_thread.c             |   4 +
 test/unit/pa.c                                |   2 +-
 test/unit/retained.c                          |   7 +-
 test/unit/sec.c                               |  49 +++--
 41 files changed, 920 insertions(+), 251 deletions(-)
 create mode 100644 include/jemalloc/internal/guard.h
 create mode 100644 src/guard.c
 create mode 100644 test/include/test/arena_decay.h
 create mode 100644 test/include/test/guard.h
 create mode 100644 test/unit/guard.c
 create mode 100644 test/unit/guard.sh

diff --git a/Makefile.in b/Makefile.in
index a6f61ced..abd361fd 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -119,6 +119,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/extent_dss.c \
 	$(srcroot)src/extent_mmap.c \
 	$(srcroot)src/fxp.c \
+	$(srcroot)src/guard.c \
 	$(srcroot)src/hook.c \
 	$(srcroot)src/hpa.c \
 	$(srcroot)src/hpa_hooks.c \
@@ -218,6 +219,7 @@ TESTS_UNIT := \
 	${srcroot}test/unit/fb.c \
 	$(srcroot)test/unit/fork.c \
 	${srcroot}test/unit/fxp.c \
+	${srcroot}test/unit/guard.c \
 	$(srcroot)test/unit/hash.c \
 	$(srcroot)test/unit/hook.c \
 	$(srcroot)test/unit/hpa.c \
diff --git a/configure.ac b/configure.ac
index 5a5887ac..7e2b44c5 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2256,6 +2256,18 @@ else
   fi
 fi
 
+dnl ============================================================================
+dnl Check for mprotect(2).
+
+JE_COMPILABLE([mprotect(2)], [
+#include <sys/mman.h>
+], [
+	mprotect((void *)0, 0, PROT_NONE);
+], [je_cv_mprotect])
+if test "x${je_cv_mprotect}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_MPROTECT], [ ])
+fi
+
 dnl ============================================================================
 dnl Check for __builtin_clz(), __builtin_clzl(), and __builtin_clzll().
 
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 5410b160..fa81537c 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -221,7 +221,8 @@ large_dalloc_safety_checks(edata_t *edata, void *ptr, szind_t szind) {
 	 * The cost is low enough (as edata will be accessed anyway) to be
 	 * enabled all the time.
 	 */
-	if (unlikely(edata_state_get(edata) != extent_state_active)) {
+	if (unlikely(edata == NULL ||
+	    edata_state_get(edata) != extent_state_active)) {
 		safety_check_fail("Invalid deallocation detected: "
 		    "pages being freed (%p) not currently active, "
 		    "possibly caused by double free bugs.",
diff --git a/include/jemalloc/internal/ecache.h b/include/jemalloc/internal/ecache.h
index cc2752f5..dd1bc320 100644
--- a/include/jemalloc/internal/ecache.h
+++ b/include/jemalloc/internal/ecache.h
@@ -2,12 +2,14 @@
 #define JEMALLOC_INTERNAL_ECACHE_H
 
 #include "jemalloc/internal/eset.h"
+#include "jemalloc/internal/guard.h"
 #include "jemalloc/internal/mutex.h"
 
 typedef struct ecache_s ecache_t;
 struct ecache_s {
 	malloc_mutex_t mtx;
 	eset_t eset;
+	eset_t guarded_eset;
 	/* All stored extents must be in the same state. */
 	extent_state_t state;
 	/* The index of the ehooks the ecache is associated with. */
@@ -21,17 +23,22 @@ struct ecache_s {
 
 static inline size_t
 ecache_npages_get(ecache_t *ecache) {
-	return eset_npages_get(&ecache->eset);
+	return eset_npages_get(&ecache->eset) +
+	    eset_npages_get(&ecache->guarded_eset);
 }
+
 /* Get the number of extents in the given page size index. */
 static inline size_t
 ecache_nextents_get(ecache_t *ecache, pszind_t ind) {
-	return eset_nextents_get(&ecache->eset, ind);
+	return eset_nextents_get(&ecache->eset, ind) +
+	    eset_nextents_get(&ecache->guarded_eset, ind);
 }
+
 /* Get the sum total bytes of the extents in the given page size index. */
 static inline size_t
 ecache_nbytes_get(ecache_t *ecache, pszind_t ind) {
-	return eset_nbytes_get(&ecache->eset, ind);
+	return eset_nbytes_get(&ecache->eset, ind) +
+	    eset_nbytes_get(&ecache->guarded_eset, ind);
 }
 
 static inline unsigned
diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index ff14982c..af039ea7 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -98,12 +98,13 @@ struct edata_s {
 	 * c: committed
 	 * p: pai
 	 * z: zeroed
+	 * g: guarded
 	 * t: state
 	 * i: szind
 	 * f: nfree
 	 * s: bin_shard
 	 *
-	 * 00000000 ... 00000sss sssfffff fffffiii iiiiittt zpcbaaaa aaaaaaaa
+	 * 00000000 ... 0000ssss ssffffff ffffiiii iiiitttg zpcbaaaa aaaaaaaa
 	 *
 	 * arena_ind: Arena from which this extent came, or all 1 bits if
 	 *            unassociated.
@@ -123,6 +124,9 @@ struct edata_s {
 	 * zeroed: The zeroed flag is used by extent recycling code to track
 	 *         whether memory is zero-filled.
 	 *
+	 * guarded: The guarded flag is use by the sanitizer to track whether
+	 *          the extent has page guards around it.
+	 *
 	 * state: The state flag is an extent_state_t.
 	 *
 	 * szind: The szind flag indicates usable size class index for
@@ -158,8 +162,12 @@ struct edata_s {
 #define EDATA_BITS_ZEROED_SHIFT  (EDATA_BITS_PAI_WIDTH + EDATA_BITS_PAI_SHIFT)
 #define EDATA_BITS_ZEROED_MASK  MASK(EDATA_BITS_ZEROED_WIDTH, EDATA_BITS_ZEROED_SHIFT)
 
+#define EDATA_BITS_GUARDED_WIDTH  1
+#define EDATA_BITS_GUARDED_SHIFT  (EDATA_BITS_ZEROED_WIDTH + EDATA_BITS_ZEROED_SHIFT)
+#define EDATA_BITS_GUARDED_MASK  MASK(EDATA_BITS_GUARDED_WIDTH, EDATA_BITS_GUARDED_SHIFT)
+
 #define EDATA_BITS_STATE_WIDTH  3
-#define EDATA_BITS_STATE_SHIFT  (EDATA_BITS_ZEROED_WIDTH + EDATA_BITS_ZEROED_SHIFT)
+#define EDATA_BITS_STATE_SHIFT  (EDATA_BITS_GUARDED_WIDTH + EDATA_BITS_GUARDED_SHIFT)
 #define EDATA_BITS_STATE_MASK  MASK(EDATA_BITS_STATE_WIDTH, EDATA_BITS_STATE_SHIFT)
 
 #define EDATA_BITS_SZIND_WIDTH  LG_CEIL(SC_NSIZES)
@@ -293,6 +301,12 @@ edata_state_get(const edata_t *edata) {
 	    EDATA_BITS_STATE_SHIFT);
 }
 
+static inline bool
+edata_guarded_get(const edata_t *edata) {
+	return (bool)((edata->e_bits & EDATA_BITS_GUARDED_MASK) >>
+	    EDATA_BITS_GUARDED_SHIFT);
+}
+
 static inline bool
 edata_zeroed_get(const edata_t *edata) {
 	return (bool)((edata->e_bits & EDATA_BITS_ZEROED_MASK) >>
@@ -505,6 +519,12 @@ edata_state_set(edata_t *edata, extent_state_t state) {
 	    ((uint64_t)state << EDATA_BITS_STATE_SHIFT);
 }
 
+static inline void
+edata_guarded_set(edata_t *edata, bool guarded) {
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_GUARDED_MASK) |
+	    ((uint64_t)guarded << EDATA_BITS_GUARDED_SHIFT);
+}
+
 static inline void
 edata_zeroed_set(edata_t *edata, bool zeroed) {
 	edata->e_bits = (edata->e_bits & ~EDATA_BITS_ZEROED_MASK) |
@@ -588,6 +608,7 @@ edata_init(edata_t *edata, unsigned arena_ind, void *addr, size_t size,
 	edata_szind_set(edata, szind);
 	edata_sn_set(edata, sn);
 	edata_state_set(edata, state);
+	edata_guarded_set(edata, false);
 	edata_zeroed_set(edata, zeroed);
 	edata_committed_set(edata, committed);
 	edata_pai_set(edata, pai);
@@ -606,6 +627,7 @@ edata_binit(edata_t *edata, void *addr, size_t bsize, uint64_t sn) {
 	edata_szind_set(edata, SC_NSIZES);
 	edata_sn_set(edata, sn);
 	edata_state_set(edata, extent_state_active);
+	edata_guarded_set(edata, false);
 	edata_zeroed_set(edata, true);
 	edata_committed_set(edata, true);
 	/*
diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index 064ecf5a..8d9513e2 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -63,6 +63,8 @@ bool ehooks_default_merge(extent_hooks_t *extent_hooks, void *addr_a,
     unsigned arena_ind);
 bool ehooks_default_merge_impl(tsdn_t *tsdn, void *addr_a, void *addr_b);
 void ehooks_default_zero_impl(void *addr, size_t size);
+void ehooks_default_guard_impl(void *guard1, void *guard2);
+void ehooks_default_unguard_impl(void *guard1, void *guard2);
 
 /*
  * We don't officially support reentrancy from wtihin the extent hooks.  But
@@ -139,6 +141,15 @@ ehooks_merge_will_fail(ehooks_t *ehooks) {
 	return ehooks_get_extent_hooks_ptr(ehooks)->merge == NULL;
 }
 
+static inline bool
+ehooks_guard_will_fail(ehooks_t *ehooks) {
+	/*
+	 * Before the guard hooks are officially introduced, limit the use to
+	 * the default hooks only.
+	 */
+	return !ehooks_are_default(ehooks);
+}
+
 /*
  * Some hooks are required to return zeroed memory in certain situations.  In
  * debug mode, we do some heuristic checks that they did what they were supposed
@@ -368,4 +379,34 @@ ehooks_zero(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size) {
 	}
 }
 
+static inline bool
+ehooks_guard(tsdn_t *tsdn, ehooks_t *ehooks, void *guard1, void *guard2) {
+	bool err;
+	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+
+	if (extent_hooks == &ehooks_default_extent_hooks) {
+		ehooks_default_guard_impl(guard1, guard2);
+		err = false;
+	} else {
+		err = true;
+	}
+
+	return err;
+}
+
+static inline bool
+ehooks_unguard(tsdn_t *tsdn, ehooks_t *ehooks, void *guard1, void *guard2) {
+	bool err;
+	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
+
+	if (extent_hooks == &ehooks_default_extent_hooks) {
+		ehooks_default_unguard_impl(guard1, guard2);
+		err = false;
+	} else {
+		err = true;
+	}
+
+	return err;
+}
+
 #endif /* JEMALLOC_INTERNAL_EHOOKS_H */
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 03eebdd5..73c55633 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -21,10 +21,10 @@ extern size_t opt_lg_extent_max_active_fit;
 
 edata_t *ecache_alloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *expand_edata, size_t size, size_t alignment,
-    bool zero);
+    bool zero, bool guarded);
 edata_t *ecache_alloc_grow(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *expand_edata, size_t size, size_t alignment,
-    bool zero);
+    bool zero, bool guarded);
 void ecache_dalloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata);
 edata_t *ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
diff --git a/include/jemalloc/internal/guard.h b/include/jemalloc/internal/guard.h
new file mode 100644
index 00000000..31f98c5f
--- /dev/null
+++ b/include/jemalloc/internal/guard.h
@@ -0,0 +1,76 @@
+#ifndef JEMALLOC_INTERNAL_GUARD_H
+#define JEMALLOC_INTERNAL_GUARD_H
+
+#include "jemalloc/internal/ehooks.h"
+#include "jemalloc/internal/emap.h"
+
+#define PAGE_GUARDS_SIZE (2 * PAGE)
+
+#define SAN_GUARD_LARGE_EVERY_N_EXTENTS_DEFAULT 0
+#define SAN_GUARD_SMALL_EVERY_N_EXTENTS_DEFAULT 0
+
+/* 0 means disabled, i.e. never guarded. */
+extern size_t opt_san_guard_large;
+extern size_t opt_san_guard_small;
+
+void guard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap);
+void unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap);
+void tsd_san_init(tsd_t *tsd);
+
+static inline bool
+san_enabled(void) {
+	return (opt_san_guard_large != 0 || opt_san_guard_small != 0);
+}
+
+static inline bool
+large_extent_decide_guard(tsdn_t *tsdn, ehooks_t *ehooks, size_t size,
+    size_t alignment) {
+	if (opt_san_guard_large == 0 || ehooks_guard_will_fail(ehooks) ||
+	    tsdn_null(tsdn)) {
+		return false;
+	}
+
+	tsd_t *tsd = tsdn_tsd(tsdn);
+	uint64_t n = tsd_san_extents_until_guard_large_get(tsd);
+	assert(n >= 1);
+	if (n > 1) {
+		/*
+		 * Subtract conditionally because the guard may not happen due
+		 * to alignment or size restriction below.
+		 */
+		*tsd_san_extents_until_guard_largep_get(tsd) = n - 1;
+	}
+
+	if (n == 1 && (alignment <= PAGE) &&
+	    (size + PAGE_GUARDS_SIZE <= SC_LARGE_MAXCLASS)) {
+		*tsd_san_extents_until_guard_largep_get(tsd) =
+		    opt_san_guard_large;
+		return true;
+	} else {
+		assert(tsd_san_extents_until_guard_large_get(tsd) >= 1);
+		return false;
+	}
+}
+
+static inline bool
+slab_extent_decide_guard(tsdn_t *tsdn, ehooks_t *ehooks) {
+	if (opt_san_guard_small == 0 || ehooks_guard_will_fail(ehooks) ||
+	    tsdn_null(tsdn)) {
+		return false;
+	}
+
+	tsd_t *tsd = tsdn_tsd(tsdn);
+	uint64_t n = tsd_san_extents_until_guard_small_get(tsd);
+	assert(n >= 1);
+	if (n == 1) {
+		*tsd_san_extents_until_guard_smallp_get(tsd) =
+		    opt_san_guard_small;
+		return true;
+	} else {
+		*tsd_san_extents_until_guard_smallp_get(tsd) = n - 1;
+		assert(tsd_san_extents_until_guard_small_get(tsd) >= 1);
+		return false;
+	}
+}
+
+#endif /* JEMALLOC_INTERNAL_GUARD_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 78d1213e..418b0cb2 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -312,6 +312,9 @@
  */
 #undef JEMALLOC_MADVISE_NOCORE
 
+/* Defined if mprotect(2) is available. */
+#undef JEMALLOC_HAVE_MPROTECT
+
 /*
  * Defined if transparent huge pages (THPs) are supported via the
  * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled.
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 97834131..3cf370c8 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -167,7 +167,7 @@ void pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard);
 
 /* Gets an edata for the given allocation. */
 edata_t *pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size,
-    size_t alignment, bool slab, szind_t szind, bool zero,
+    size_t alignment, bool slab, szind_t szind, bool zero, bool guarded,
     bool *deferred_work_generated);
 /* Returns true on error, in which case nothing changed. */
 bool pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index 035364e2..3d7993dd 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -110,5 +110,7 @@ bool pages_dontdump(void *addr, size_t size);
 bool pages_dodump(void *addr, size_t size);
 bool pages_boot(void);
 void pages_set_thp_state (void *ptr, size_t size);
+void pages_mark_guards(void *head, void *tail);
+void pages_unmark_guards(void *head, void *tail);
 
 #endif /* JEMALLOC_INTERNAL_PAGES_EXTERNS_H */
diff --git a/include/jemalloc/internal/pai.h b/include/jemalloc/internal/pai.h
index ca5f616a..f8f7d667 100644
--- a/include/jemalloc/internal/pai.h
+++ b/include/jemalloc/internal/pai.h
@@ -7,7 +7,8 @@ typedef struct pai_s pai_t;
 struct pai_s {
 	/* Returns NULL on failure. */
 	edata_t *(*alloc)(tsdn_t *tsdn, pai_t *self, size_t size,
-	    size_t alignment, bool zero, bool *deferred_work_generated);
+	    size_t alignment, bool zero, bool guarded,
+	    bool *deferred_work_generated);
 	/*
 	 * Returns the number of extents added to the list (which may be fewer
 	 * than requested, in case of OOM).  The list should already be
@@ -37,8 +38,8 @@ struct pai_s {
 
 static inline edata_t *
 pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
-    bool *deferred_work_generated) {
-	return self->alloc(tsdn, self, size, alignment, zero,
+    bool guarded, bool *deferred_work_generated) {
+	return self->alloc(tsdn, self, size, alignment, zero, guarded,
 	    deferred_work_generated);
 }
 
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index d22fdc94..86d52778 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -73,6 +73,8 @@ typedef ql_elm(tsd_t) tsd_link_t;
     O(peak_dalloc_event_wait,	uint64_t,	uint64_t)		\
     O(prof_tdata,		prof_tdata_t *,		prof_tdata_t *)	\
     O(prng_state,		uint64_t,		uint64_t)	\
+    O(san_extents_until_guard_small,	uint64_t,	uint64_t)	\
+    O(san_extents_until_guard_large,	uint64_t,	uint64_t)	\
     O(iarena,			arena_t *,		arena_t *)	\
     O(arena,			arena_t *,		arena_t *)	\
     O(arena_decay_ticker,	ticker_geom_t,		ticker_geom_t)	\
@@ -103,6 +105,8 @@ typedef ql_elm(tsd_t) tsd_link_t;
     /* peak_dalloc_event_wait */	0,				\
     /* prof_tdata */		NULL,					\
     /* prng_state */		0,					\
+    /* san_extents_until_guard_small */	0,				\
+    /* san_extents_until_guard_large */	0,				\
     /* iarena */		NULL,					\
     /* arena */			NULL,					\
     /* arena_decay_ticker */						\
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 597b247b..75d66800 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -59,6 +59,7 @@
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
     <ClCompile Include="..\..\..\..\src\fxp.c" />
+    <ClCompile Include="..\..\..\..\src\guard.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index d063a019..c5bb4cfe 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -61,6 +61,9 @@
     <ClCompile Include="..\..\..\..\src\fxp.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\guard.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hook.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 46633e82..d25768e1 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -59,6 +59,7 @@
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
     <ClCompile Include="..\..\..\..\src\fxp.c" />
+    <ClCompile Include="..\..\..\..\src\guard.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index d063a019..c5bb4cfe 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -61,6 +61,9 @@
     <ClCompile Include="..\..\..\..\src\fxp.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\guard.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hook.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/arena.c b/src/arena.c
index c2842c6c..8147d14b 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -6,6 +6,7 @@
 #include "jemalloc/internal/ehooks.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/extent_mmap.h"
+#include "jemalloc/internal/guard.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/safety_check.h"
@@ -327,9 +328,10 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	szind_t szind = sz_size2index(usize);
 	size_t esize = usize + sz_large_pad;
 
+	bool guarded = large_extent_decide_guard(tsdn, arena_get_ehooks(arena),
+	    esize, alignment);
 	edata_t *edata = pa_alloc(tsdn, &arena->pa_shard, esize, alignment,
-	    /* slab */ false, szind, zero, &deferred_work_generated);
-
+	    /* slab */ false, szind, zero, guarded, &deferred_work_generated);
 	assert(deferred_work_generated == false);
 
 	if (edata != NULL) {
@@ -827,9 +829,10 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
+	bool guarded = slab_extent_decide_guard(tsdn, arena_get_ehooks(arena));
 	edata_t *slab = pa_alloc(tsdn, &arena->pa_shard, bin_info->slab_size,
-	    PAGE, /* slab */ true, /* szind */ binind, /* zero */ false,
-	    &deferred_work_generated);
+	    /* alignment */ PAGE, /* slab */ true, /* szind */ binind,
+	     /* zero */ false, guarded, &deferred_work_generated);
 
 	if (deferred_work_generated) {
 		arena_handle_deferred_work(tsdn, arena);
diff --git a/src/ecache.c b/src/ecache.c
index 3c1a2274..26fc2112 100644
--- a/src/ecache.c
+++ b/src/ecache.c
@@ -1,6 +1,8 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/guard.h"
+
 bool
 ecache_init(tsdn_t *tsdn, ecache_t *ecache, extent_state_t state, unsigned ind,
     bool delay_coalesce) {
@@ -12,6 +14,8 @@ ecache_init(tsdn_t *tsdn, ecache_t *ecache, extent_state_t state, unsigned ind,
 	ecache->ind = ind;
 	ecache->delay_coalesce = delay_coalesce;
 	eset_init(&ecache->eset, state);
+	eset_init(&ecache->guarded_eset, state);
+
 	return false;
 }
 
diff --git a/src/ehooks.c b/src/ehooks.c
index 5d12d003..383e9de6 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -244,6 +244,16 @@ ehooks_default_zero_impl(void *addr, size_t size) {
 	}
 }
 
+void
+ehooks_default_guard_impl(void *guard1, void *guard2) {
+	pages_mark_guards(guard1, guard2);
+}
+
+void
+ehooks_default_unguard_impl(void *guard1, void *guard2) {
+	pages_unmark_guards(guard1, guard2);
+}
+
 const extent_hooks_t ehooks_default_extent_hooks = {
 	ehooks_default_alloc,
 	ehooks_default_dalloc,
diff --git a/src/extent.c b/src/extent.c
index 04001142..84ecd6b2 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -37,14 +37,14 @@ static atomic_zu_t highpages;
 static void extent_deregister(tsdn_t *tsdn, pac_t *pac, edata_t *edata);
 static edata_t *extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *expand_edata, size_t usize, size_t alignment,
-    bool zero, bool *commit, bool growing_retained);
+    bool zero, bool *commit, bool growing_retained, bool guarded);
 static edata_t *extent_try_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool *coalesced);
 static void extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata);
 static edata_t *extent_alloc_retained(tsdn_t *tsdn, pac_t *pac,
     ehooks_t *ehooks, edata_t *expand_edata, size_t size, size_t alignment,
-    bool zero, bool *commit);
+    bool zero, bool *commit, bool guarded);
 static edata_t *extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t alignment, bool zero, bool *commit);
 
@@ -80,7 +80,8 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 
 edata_t *
 ecache_alloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
-    edata_t *expand_edata, size_t size, size_t alignment, bool zero) {
+    edata_t *expand_edata, size_t size, size_t alignment, bool zero,
+    bool guarded) {
 	assert(size != 0);
 	assert(alignment != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -88,14 +89,15 @@ ecache_alloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 
 	bool commit = true;
 	edata_t *edata = extent_recycle(tsdn, pac, ehooks, ecache, expand_edata,
-	    size, alignment, zero, &commit, false);
+	    size, alignment, zero, &commit, false, guarded);
 	assert(edata == NULL || edata_pai_get(edata) == EXTENT_PAI_PAC);
 	return edata;
 }
 
 edata_t *
 ecache_alloc_grow(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
-    edata_t *expand_edata, size_t size, size_t alignment, bool zero) {
+    edata_t *expand_edata, size_t size, size_t alignment, bool zero,
+    bool guarded) {
 	assert(size != 0);
 	assert(alignment != 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
@@ -103,7 +105,7 @@ ecache_alloc_grow(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 
 	bool commit = true;
 	edata_t *edata = extent_alloc_retained(tsdn, pac, ehooks, expand_edata,
-	    size, alignment, zero, &commit);
+	    size, alignment, zero, &commit, guarded);
 	if (edata == NULL) {
 		if (opt_retain && expand_edata != NULL) {
 			/*
@@ -114,6 +116,14 @@ ecache_alloc_grow(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 			 */
 			return NULL;
 		}
+		if (guarded) {
+			/*
+			 * Means no cached guarded extents available (and no
+			 * grow_retained was attempted).  The pac_alloc flow
+			 * will alloc regular extents to make new guarded ones.
+			 */
+			return NULL;
+		}
 		void *new_addr = (expand_edata == NULL) ? NULL :
 		    edata_past_get(expand_edata);
 		edata = extent_alloc_wrapper(tsdn, pac, ehooks, new_addr,
@@ -151,9 +161,19 @@ ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	edata_t *edata;
 	while (true) {
 		/* Get the LRU extent, if any. */
-		edata = edata_list_inactive_first(&ecache->eset.lru);
+		eset_t *eset = &ecache->eset;
+		edata = edata_list_inactive_first(&eset->lru);
 		if (edata == NULL) {
-			goto label_return;
+			/*
+			 * Next check if there are guarded extents.  They are
+			 * more expensive to purge (since they are not
+			 * mergeable), thus in favor of caching them longer.
+			 */
+			eset = &ecache->guarded_eset;
+			edata = edata_list_inactive_first(&eset->lru);
+			if (edata == NULL) {
+				goto label_return;
+			}
 		}
 		/* Check the eviction limit. */
 		size_t extents_npages = ecache_npages_get(ecache);
@@ -161,7 +181,7 @@ ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 			edata = NULL;
 			goto label_return;
 		}
-		eset_remove(&ecache->eset, edata);
+		eset_remove(eset, edata);
 		if (!ecache->delay_coalesce) {
 			break;
 		}
@@ -234,17 +254,19 @@ extent_deactivate_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
 	assert(edata_state_get(edata) == extent_state_active);
 
 	emap_update_edata_state(tsdn, pac->emap, edata, ecache->state);
-	eset_insert(&ecache->eset, edata);
+	eset_t *eset = edata_guarded_get(edata) ? &ecache->guarded_eset :
+	    &ecache->eset;
+	eset_insert(eset, edata);
 }
 
 static void
-extent_activate_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
+extent_activate_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache, eset_t *eset,
     edata_t *edata) {
 	assert(edata_arena_ind_get(edata) == ecache_ind_get(ecache));
 	assert(edata_state_get(edata) == ecache->state ||
 	    edata_state_get(edata) == extent_state_merging);
 
-	eset_remove(&ecache->eset, edata);
+	eset_remove(eset, edata);
 	emap_update_edata_state(tsdn, pac->emap, edata, extent_state_active);
 }
 
@@ -350,7 +372,8 @@ extent_deregister_no_gdump_sub(tsdn_t *tsdn, pac_t *pac,
  */
 static edata_t *
 extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *expand_edata, size_t size, size_t alignment) {
+    ecache_t *ecache, edata_t *expand_edata, size_t size, size_t alignment,
+    bool guarded) {
 	malloc_mutex_assert_owner(tsdn, &ecache->mtx);
 	assert(alignment > 0);
 	if (config_debug && expand_edata != NULL) {
@@ -366,6 +389,7 @@ extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	}
 
 	edata_t *edata;
+	eset_t *eset = guarded ? &ecache->guarded_eset : &ecache->eset;
 	if (expand_edata != NULL) {
 		edata = emap_try_acquire_edata_neighbor_expand(tsdn, pac->emap,
 		    expand_edata, EXTENT_PAI_PAC, ecache->state);
@@ -382,7 +406,7 @@ extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		 * If split and merge are not allowed (Windows w/o retain), try
 		 * exact fit only.
 		 */
-		bool exact_only = (!maps_coalesce && !opt_retain);
+		bool exact_only = (!maps_coalesce && !opt_retain) || guarded;
 		/*
 		 * A large extent might be broken up from its original size to
 		 * some small size to satisfy a small request.  When that small
@@ -394,13 +418,13 @@ extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		 */
 		unsigned lg_max_fit = ecache->delay_coalesce
 		    ? (unsigned)opt_lg_extent_max_active_fit : SC_PTR_BITS;
-		edata = eset_fit(&ecache->eset, size, alignment, exact_only,
-		    lg_max_fit);
+		edata = eset_fit(eset, size, alignment, exact_only, lg_max_fit);
 	}
 	if (edata == NULL) {
 		return NULL;
 	}
-	extent_activate_locked(tsdn, pac, ecache, edata);
+	assert(!guarded || edata_guarded_get(edata));
+	extent_activate_locked(tsdn, pac, ecache, eset, edata);
 
 	return edata;
 }
@@ -551,13 +575,14 @@ extent_recycle_split(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 static edata_t *
 extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
     edata_t *expand_edata, size_t size, size_t alignment, bool zero,
-    bool *commit, bool growing_retained) {
+    bool *commit, bool growing_retained, bool guarded) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+	assert(!guarded || expand_edata == NULL);
 
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 	edata_t *edata = extent_recycle_extract(tsdn, pac, ehooks, ecache,
-	    expand_edata, size, alignment);
+	    expand_edata, size, alignment, guarded);
 	if (edata == NULL) {
 		malloc_mutex_unlock(tsdn, &ecache->mtx);
 		return NULL;
@@ -734,7 +759,7 @@ label_err:
 static edata_t *
 extent_alloc_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *expand_edata, size_t size, size_t alignment, bool zero,
-    bool *commit) {
+    bool *commit, bool guarded) {
 	assert(size != 0);
 	assert(alignment != 0);
 
@@ -742,13 +767,13 @@ extent_alloc_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 
 	edata_t *edata = extent_recycle(tsdn, pac, ehooks,
 	    &pac->ecache_retained, expand_edata, size, alignment, zero, commit,
-	    /* growing_retained */ true);
+	    /* growing_retained */ true, guarded);
 	if (edata != NULL) {
 		malloc_mutex_unlock(tsdn, &pac->grow_mtx);
 		if (config_prof) {
 			extent_gdump_add(tsdn, edata);
 		}
-	} else if (opt_retain && expand_edata == NULL) {
+	} else if (opt_retain && expand_edata == NULL && !guarded) {
 		edata = extent_grow_retained(tsdn, pac, ehooks, size,
 		    alignment, zero, commit);
 		/* extent_grow_retained() always releases pac->grow_mtx. */
@@ -910,6 +935,9 @@ extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 
 	emap_assert_mapped(tsdn, pac->emap, edata);
 
+	if (edata_guarded_get(edata)) {
+		goto label_skip_coalesce;
+	}
 	if (!ecache->delay_coalesce) {
 		edata = extent_try_coalesce(tsdn, pac,  ehooks, ecache, edata,
 		    NULL);
@@ -931,6 +959,7 @@ extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 			return;
 		}
 	}
+label_skip_coalesce:
 	extent_deactivate_locked(tsdn, pac, ecache, edata);
 
 	malloc_mutex_unlock(tsdn, &ecache->mtx);
@@ -981,6 +1010,11 @@ extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 
 	/* Avoid calling the default extent_dalloc unless have to. */
 	if (!ehooks_dalloc_will_fail(ehooks)) {
+		/* Restore guard pages for dalloc / unmap. */
+		if (edata_guarded_get(edata)) {
+			assert(ehooks_are_default(ehooks));
+			unguard_pages(tsdn, ehooks, edata, pac->emap);
+		}
 		/*
 		 * Deregister first to avoid a race with other allocating
 		 * threads, and reregister if deallocation fails.
diff --git a/src/guard.c b/src/guard.c
new file mode 100644
index 00000000..07232199
--- /dev/null
+++ b/src/guard.c
@@ -0,0 +1,63 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/ehooks.h"
+#include "jemalloc/internal/guard.h"
+#include "jemalloc/internal/tsd.h"
+
+/* The sanitizer options. */
+size_t opt_san_guard_large = SAN_GUARD_LARGE_EVERY_N_EXTENTS_DEFAULT;
+size_t opt_san_guard_small = SAN_GUARD_SMALL_EVERY_N_EXTENTS_DEFAULT;
+
+void
+guard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap) {
+	emap_deregister_boundary(tsdn, emap, edata);
+
+	size_t size_with_guards = edata_size_get(edata);
+	size_t usize = size_with_guards - PAGE_GUARDS_SIZE;
+
+	uintptr_t guard1 = (uintptr_t)edata_base_get(edata);
+	uintptr_t addr = guard1 + PAGE;
+	uintptr_t guard2 = addr + usize;
+
+	assert(edata_state_get(edata) == extent_state_active);
+	ehooks_guard(tsdn, ehooks, (void *)guard1, (void *)guard2);
+
+	/* Update the guarded addr and usable size of the edata. */
+	edata_size_set(edata, usize);
+	edata_addr_set(edata, (void *)addr);
+	edata_guarded_set(edata, true);
+
+	/* The new boundary will be registered on the pa_alloc path. */
+}
+
+void
+unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap) {
+	/* Remove the inner boundary which no longer exists. */
+	emap_deregister_boundary(tsdn, emap, edata);
+
+	size_t size = edata_size_get(edata);
+	size_t size_with_guards = size + PAGE_GUARDS_SIZE;
+
+	uintptr_t addr =  (uintptr_t)edata_base_get(edata);
+	uintptr_t guard1 = addr - PAGE;
+	uintptr_t guard2 = addr + size;
+
+	assert(edata_state_get(edata) == extent_state_active);
+	ehooks_unguard(tsdn, ehooks, (void *)guard1, (void *)guard2);
+
+	/* Update the true addr and usable size of the edata. */
+	edata_size_set(edata, size_with_guards);
+	edata_addr_set(edata, (void *)guard1);
+	edata_guarded_set(edata, false);
+
+	/* Then re-register the outer boundary including the guards. */
+	emap_register_boundary(tsdn, emap, edata, SC_NSIZES, /* slab */ false);
+}
+
+void
+tsd_san_init(tsd_t *tsd) {
+	*tsd_san_extents_until_guard_smallp_get(tsd) = opt_san_guard_small;
+	*tsd_san_extents_until_guard_largep_get(tsd) = opt_san_guard_large;
+}
diff --git a/src/hpa.c b/src/hpa.c
index d7422a3c..82b9c992 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -9,7 +9,7 @@
 #define HPA_EDEN_SIZE (128 * HUGEPAGE)
 
 static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t alignment, bool zero, bool *deferred_work_generated);
+    size_t alignment, bool zero, bool guarded, bool *deferred_work_generated);
 static size_t hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t nallocs, edata_list_active_t *results, bool *deferred_work_generated);
 static bool hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
@@ -750,8 +750,9 @@ hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
 
 static edata_t *
 hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
-    bool *deferred_work_generated) {
+    bool guarded, bool *deferred_work_generated) {
 	assert((size & PAGE_MASK) == 0);
+	assert(!guarded);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
@@ -796,7 +797,6 @@ hpa_dalloc_prepare_unlocked(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) {
 	assert(edata_state_get(edata) == extent_state_active);
 	assert(edata_arena_ind_get(edata) == shard->ind);
 	assert(edata_szind_get_maybe_invalid(edata) == SC_NSIZES);
-	assert(!edata_slab_get(edata));
 	assert(edata_committed_get(edata));
 	assert(edata_base_get(edata) != NULL);
 
@@ -865,6 +865,7 @@ hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list,
 static void
 hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
     bool *deferred_work_generated) {
+	assert(!edata_guarded_get(edata));
 	/* Just a dalloc_batch of size 1; this lets us share logic. */
 	edata_list_active_t dalloc_list;
 	edata_list_active_init(&dalloc_list);
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 7ffa5533..907265c0 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -10,6 +10,7 @@
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/extent_mmap.h"
 #include "jemalloc/internal/fxp.h"
+#include "jemalloc/internal/guard.h"
 #include "jemalloc/internal/hook.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/log.h"
@@ -1616,6 +1617,14 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				}
 				CONF_CONTINUE;
 			}
+
+			CONF_HANDLE_SIZE_T(opt_san_guard_small,
+			    "san_guard_small", 0, SIZE_T_MAX,
+			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false)
+			CONF_HANDLE_SIZE_T(opt_san_guard_large,
+			    "san_guard_large", 0, SIZE_T_MAX,
+			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false)
+
 			CONF_ERROR("Invalid conf pair", k, klen, v, vlen);
 #undef CONF_ERROR
 #undef CONF_CONTINUE
diff --git a/src/pa.c b/src/pa.c
index 249de24a..649b9c2e 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -1,6 +1,7 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
+#include "jemalloc/internal/guard.h"
 #include "jemalloc/internal/hpa.h"
 
 static void
@@ -118,15 +119,17 @@ pa_get_pai(pa_shard_t *shard, edata_t *edata) {
 
 edata_t *
 pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
-    bool slab, szind_t szind, bool zero, bool *deferred_work_generated) {
+    bool slab, szind_t szind, bool zero, bool guarded,
+    bool *deferred_work_generated) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
+	assert(!guarded || alignment <= PAGE);
 
 	edata_t *edata = NULL;
 	*deferred_work_generated = false;
-	if (pa_shard_uses_hpa(shard)) {
+	if (!guarded && pa_shard_uses_hpa(shard)) {
 		edata = pai_alloc(tsdn, &shard->hpa_sec.pai, size, alignment,
-		    zero, deferred_work_generated);
+		    zero, /* guarded */ false, deferred_work_generated);
 	}
 	/*
 	 * Fall back to the PAC if the HPA is off or couldn't serve the given
@@ -134,10 +137,10 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 	 */
 	if (edata == NULL) {
 		edata = pai_alloc(tsdn, &shard->pac.pai, size, alignment, zero,
-		    deferred_work_generated);
+		    guarded, deferred_work_generated);
 	}
-
 	if (edata != NULL) {
+		assert(edata_size_get(edata) == size);
 		pa_nactive_add(shard, size >> LG_PAGE);
 		emap_remap(tsdn, shard->emap, edata, szind, slab);
 		edata_szind_set(edata, szind);
@@ -145,8 +148,6 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 		if (slab && (size > 2 * PAGE)) {
 			emap_register_interior(tsdn, shard->emap, edata, szind);
 		}
-	}
-	if (edata != NULL) {
 		assert(edata_arena_ind_get(edata) == shard->ind);
 	}
 	return edata;
@@ -158,7 +159,9 @@ pa_expand(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	assert(new_size > old_size);
 	assert(edata_size_get(edata) == old_size);
 	assert((new_size & PAGE_MASK) == 0);
-
+	if (edata_guarded_get(edata)) {
+		return true;
+	}
 	size_t expand_amount = new_size - old_size;
 
 	pai_t *pai = pa_get_pai(shard, edata);
@@ -181,6 +184,9 @@ pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	assert(new_size < old_size);
 	assert(edata_size_get(edata) == old_size);
 	assert((new_size & PAGE_MASK) == 0);
+	if (edata_guarded_get(edata)) {
+		return true;
+	}
 	size_t shrink_amount = old_size - new_size;
 
 	pai_t *pai = pa_get_pai(shard, edata);
@@ -202,7 +208,10 @@ pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
 	emap_remap(tsdn, shard->emap, edata, SC_NSIZES, /* slab */ false);
 	if (edata_slab_get(edata)) {
 		emap_deregister_interior(tsdn, shard->emap, edata);
-		edata_slab_set(edata, false);
+		/*
+		 * The slab state of the extent isn't cleared.  It may be used
+		 * by the pai implementation, e.g. to make caching decisions.
+		 */
 	}
 	edata_addr_set(edata, edata_base_get(edata));
 	edata_szind_set(edata, SC_NSIZES);
diff --git a/src/pac.c b/src/pac.c
index 03e31972..8ce3159c 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -2,9 +2,10 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/pac.h"
+#include "jemalloc/internal/guard.h"
 
 static edata_t *pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t alignment, bool zero, bool *deferred_work_generated);
+    size_t alignment, bool zero, bool guarded, bool *deferred_work_generated);
 static bool pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
     size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
 static bool pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
@@ -109,28 +110,66 @@ pac_may_have_muzzy(pac_t *pac) {
 }
 
 static edata_t *
-pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
-    bool zero, bool *deferred_work_generated) {
-	pac_t *pac = (pac_t *)self;
+pac_alloc_real(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
+    size_t alignment, bool zero, bool guarded) {
+	assert(!guarded || alignment <= PAGE);
 
-	*deferred_work_generated = false;
-
-	ehooks_t *ehooks = pac_ehooks_get(pac);
 	edata_t *edata = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_dirty,
-	    NULL, size, alignment, zero);
+	    NULL, size, alignment, zero, guarded);
 
 	if (edata == NULL && pac_may_have_muzzy(pac)) {
 		edata = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_muzzy,
-		    NULL, size, alignment, zero);
+		    NULL, size, alignment, zero, guarded);
 	}
 	if (edata == NULL) {
 		edata = ecache_alloc_grow(tsdn, pac, ehooks,
-		    &pac->ecache_retained, NULL, size, alignment, zero);
+		    &pac->ecache_retained, NULL, size, alignment, zero,
+		    guarded);
 		if (config_stats && edata != NULL) {
 			atomic_fetch_add_zu(&pac->stats->pac_mapped, size,
 			    ATOMIC_RELAXED);
 		}
 	}
+
+	return edata;
+}
+
+static edata_t *
+pac_alloc_new_guarded(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
+    size_t alignment, bool zero) {
+	assert(alignment <= PAGE);
+
+	size_t size_with_guards = size + PAGE_GUARDS_SIZE;
+	/* Alloc a non-guarded extent first.*/
+	edata_t *edata = pac_alloc_real(tsdn, pac, ehooks, size_with_guards,
+	    /* alignment */ PAGE, zero, /* guarded */ false);
+	if (edata != NULL) {
+		/* Add guards around it. */
+		assert(edata_size_get(edata) == size_with_guards);
+		guard_pages(tsdn, ehooks, edata, pac->emap);
+	}
+	assert(edata == NULL || (edata_guarded_get(edata) &&
+	    edata_size_get(edata) == size));
+
+	return edata;
+}
+
+static edata_t *
+pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
+    bool zero, bool guarded, bool *deferred_work_generated) {
+	*deferred_work_generated = false;
+
+	pac_t *pac = (pac_t *)self;
+	ehooks_t *ehooks = pac_ehooks_get(pac);
+
+	edata_t *edata = pac_alloc_real(tsdn, pac, ehooks, size, alignment,
+	    zero, guarded);
+	if (edata == NULL && guarded) {
+		/* No cached guarded extents; creating a new one. */
+		edata = pac_alloc_new_guarded(tsdn, pac, ehooks, size,
+		    alignment, zero);
+	}
+
 	return edata;
 }
 
@@ -149,15 +188,15 @@ pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 		return true;
 	}
 	edata_t *trail = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_dirty,
-	    edata, expand_amount, PAGE, zero);
+	    edata, expand_amount, PAGE, zero, /* guarded*/ false);
 	if (trail == NULL) {
 		trail = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_muzzy,
-		    edata, expand_amount, PAGE, zero);
+		    edata, expand_amount, PAGE, zero, /* guarded*/ false);
 	}
 	if (trail == NULL) {
 		trail = ecache_alloc_grow(tsdn, pac, ehooks,
 		    &pac->ecache_retained, edata, expand_amount, PAGE,
-		    zero);
+		    zero, /* guarded */ false);
 		mapped_add = expand_amount;
 	}
 	if (trail == NULL) {
@@ -203,6 +242,27 @@ pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
     bool *deferred_work_generated) {
 	pac_t *pac = (pac_t *)self;
 	ehooks_t *ehooks = pac_ehooks_get(pac);
+
+	if (edata_guarded_get(edata)) {
+		/*
+		 * Because cached guarded extents do exact fit only, large
+		 * guarded extents are restored on dalloc eagerly (otherwise
+		 * they will not be reused efficiently).  Slab sizes have a
+		 * limited number of size classes, and tend to cycle faster.
+		 *
+		 * In the case where coalesce is restrained (VirtualFree on
+		 * Windows), guarded extents are also not cached -- otherwise
+		 * during arena destroy / reset, the retained extents would not
+		 * be whole regions (i.e. they are split between regular and
+		 * guarded).
+		 */
+		if (!edata_slab_get(edata) || !maps_coalesce) {
+			assert(edata_size_get(edata) >= SC_LARGE_MINCLASS ||
+			    !maps_coalesce);
+			unguard_pages(tsdn, ehooks, edata, pac->emap);
+		}
+	}
+
 	ecache_dalloc(tsdn, pac, ehooks, &pac->ecache_dirty, edata);
 	/* Purging of deallocated pages is deferred */
 	*deferred_work_generated = true;
diff --git a/src/pages.c b/src/pages.c
index 42618858..a8d9988b 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -316,14 +316,10 @@ pages_unmap(void *addr, size_t size) {
 }
 
 static bool
-pages_commit_impl(void *addr, size_t size, bool commit) {
+os_pages_commit(void *addr, size_t size, bool commit) {
 	assert(PAGE_ADDR2BASE(addr) == addr);
 	assert(PAGE_CEILING(size) == size);
 
-	if (os_overcommits) {
-		return true;
-	}
-
 #ifdef _WIN32
 	return (commit ? (addr != VirtualAlloc(addr, size, MEM_COMMIT,
 	    PAGE_READWRITE)) : (!VirtualFree(addr, size, MEM_DECOMMIT)));
@@ -348,6 +344,15 @@ pages_commit_impl(void *addr, size_t size, bool commit) {
 #endif
 }
 
+static bool
+pages_commit_impl(void *addr, size_t size, bool commit) {
+	if (os_overcommits) {
+		return true;
+	}
+
+	return os_pages_commit(addr, size, commit);
+}
+
 bool
 pages_commit(void *addr, size_t size) {
 	return pages_commit_impl(addr, size, true);
@@ -358,6 +363,38 @@ pages_decommit(void *addr, size_t size) {
 	return pages_commit_impl(addr, size, false);
 }
 
+void
+pages_mark_guards(void *head, void *tail) {
+	assert(head != NULL && tail != NULL);
+	assert((uintptr_t)head < (uintptr_t)tail);
+#ifdef JEMALLOC_HAVE_MPROTECT
+	mprotect(head, PAGE, PROT_NONE);
+	mprotect(tail, PAGE, PROT_NONE);
+#else
+	/* Decommit sets to PROT_NONE / MEM_DECOMMIT. */
+	os_pages_commit(head, PAGE, false);
+	os_pages_commit(tail, PAGE, false);
+#endif
+}
+
+void
+pages_unmark_guards(void *head, void *tail) {
+	assert(head != NULL && tail != NULL);
+	assert((uintptr_t)head < (uintptr_t)tail);
+#ifdef JEMALLOC_HAVE_MPROTECT
+	size_t range = (uintptr_t)tail - (uintptr_t)head + PAGE;
+	if (range <= SC_LARGE_MINCLASS) {
+		mprotect(head, range, PROT_READ | PROT_WRITE);
+	} else {
+		mprotect(head, PAGE, PROT_READ | PROT_WRITE);
+		mprotect(tail, PAGE, PROT_READ | PROT_WRITE);
+	}
+#else
+	os_pages_commit(head, PAGE, true);
+	os_pages_commit(tail, PAGE, true);
+#endif
+}
+
 bool
 pages_purge_lazy(void *addr, size_t size) {
 	assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr);
diff --git a/src/pai.c b/src/pai.c
index e863a9be..86b8ee5b 100644
--- a/src/pai.c
+++ b/src/pai.c
@@ -7,7 +7,7 @@ pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
 	for (size_t i = 0; i < nallocs; i++) {
 		bool deferred_by_alloc = false;
 		edata_t *edata = pai_alloc(tsdn, self, size, PAGE,
-		    /* zero */ false, &deferred_by_alloc);
+		    /* zero */ false, /* guarded */ false, &deferred_by_alloc);
 		*deferred_work_generated |= deferred_by_alloc;
 		if (edata == NULL) {
 			return i;
diff --git a/src/sec.c b/src/sec.c
index c6f611f5..0f95a0d0 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -4,7 +4,7 @@
 #include "jemalloc/internal/sec.h"
 
 static edata_t *sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t alignment, bool zero, bool *deferred_work_generated);
+    size_t alignment, bool zero, bool guarded, bool *deferred_work_generated);
 static bool sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
     size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
 static bool sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
@@ -218,8 +218,9 @@ sec_batch_fill_and_alloc(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
 
 static edata_t *
 sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
-    bool *deferred_work_generated) {
+    bool guarded, bool *deferred_work_generated) {
 	assert((size & PAGE_MASK) == 0);
+	assert(!guarded);
 
 	sec_t *sec = (sec_t *)self;
 	*deferred_work_generated = false;
@@ -227,7 +228,7 @@ sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
 	if (zero || alignment > PAGE || sec->opts.nshards == 0
 	    || size > sec->opts.max_alloc) {
 		return pai_alloc(tsdn, sec->fallback, size, alignment, zero,
-		    deferred_work_generated);
+		    /* guarded */ false, deferred_work_generated);
 	}
 	pszind_t pszind = sz_psz2ind(size);
 	sec_shard_t *shard = sec_shard_pick(tsdn, sec);
@@ -250,7 +251,7 @@ sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
 			    size);
 		} else {
 			edata = pai_alloc(tsdn, sec->fallback, size, alignment,
-			    zero, deferred_work_generated);
+			    zero, /* guarded */ false, deferred_work_generated);
 		}
 	}
 	return edata;
diff --git a/src/tsd.c b/src/tsd.c
index 6820eb62..31ff2f23 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -2,6 +2,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/guard.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/rtree.h"
 
@@ -242,6 +243,7 @@ tsd_data_init(tsd_t *tsd) {
 	rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd));
 	tsd_prng_state_init(tsd);
 	tsd_te_init(tsd); /* event_init may use the prng state above. */
+	tsd_san_init(tsd);
 	return tsd_tcache_enabled_data_init(tsd);
 }
 
@@ -269,6 +271,7 @@ tsd_data_init_nocleanup(tsd_t *tsd) {
 	*tsd_reentrancy_levelp_get(tsd) = 1;
 	tsd_prng_state_init(tsd);
 	tsd_te_init(tsd); /* event_init may use the prng state above. */
+	tsd_san_init(tsd);
 	assert_tsd_data_cleanup_done(tsd);
 
 	return false;
diff --git a/test/include/test/arena_decay.h b/test/include/test/arena_decay.h
new file mode 100644
index 00000000..da659212
--- /dev/null
+++ b/test/include/test/arena_decay.h
@@ -0,0 +1,149 @@
+static unsigned
+do_arena_create(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
+	unsigned arena_ind;
+	size_t sz = sizeof(unsigned);
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
+	size_t mib[3];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+
+	expect_d_eq(mallctlnametomib("arena.0.dirty_decay_ms", mib, &miblen),
+	    0, "Unexpected mallctlnametomib() failure");
+	mib[1] = (size_t)arena_ind;
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL,
+	    (void *)&dirty_decay_ms, sizeof(dirty_decay_ms)), 0,
+	    "Unexpected mallctlbymib() failure");
+
+	expect_d_eq(mallctlnametomib("arena.0.muzzy_decay_ms", mib, &miblen),
+	    0, "Unexpected mallctlnametomib() failure");
+	mib[1] = (size_t)arena_ind;
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL,
+	    (void *)&muzzy_decay_ms, sizeof(muzzy_decay_ms)), 0,
+	    "Unexpected mallctlbymib() failure");
+
+	return arena_ind;
+}
+
+static void
+do_arena_destroy(unsigned arena_ind) {
+	size_t mib[3];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+	expect_d_eq(mallctlnametomib("arena.0.destroy", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[1] = (size_t)arena_ind;
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+}
+
+static void
+do_epoch(void) {
+	uint64_t epoch = 1;
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	    0, "Unexpected mallctl() failure");
+}
+
+static void
+do_purge(unsigned arena_ind) {
+	size_t mib[3];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+	expect_d_eq(mallctlnametomib("arena.0.purge", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[1] = (size_t)arena_ind;
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+}
+
+static void
+do_decay(unsigned arena_ind) {
+	size_t mib[3];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+	expect_d_eq(mallctlnametomib("arena.0.decay", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[1] = (size_t)arena_ind;
+	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+}
+
+static uint64_t
+get_arena_npurge_impl(const char *mibname, unsigned arena_ind) {
+	size_t mib[4];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+	expect_d_eq(mallctlnametomib(mibname, mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[2] = (size_t)arena_ind;
+	uint64_t npurge = 0;
+	size_t sz = sizeof(npurge);
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&npurge, &sz, NULL, 0),
+	    config_stats ? 0 : ENOENT, "Unexpected mallctlbymib() failure");
+	return npurge;
+}
+
+static uint64_t
+get_arena_dirty_npurge(unsigned arena_ind) {
+	do_epoch();
+	return get_arena_npurge_impl("stats.arenas.0.dirty_npurge", arena_ind);
+}
+
+static uint64_t
+get_arena_dirty_purged(unsigned arena_ind) {
+	do_epoch();
+	return get_arena_npurge_impl("stats.arenas.0.dirty_purged", arena_ind);
+}
+
+static uint64_t
+get_arena_muzzy_npurge(unsigned arena_ind) {
+	do_epoch();
+	return get_arena_npurge_impl("stats.arenas.0.muzzy_npurge", arena_ind);
+}
+
+static uint64_t
+get_arena_npurge(unsigned arena_ind) {
+	do_epoch();
+	return get_arena_npurge_impl("stats.arenas.0.dirty_npurge", arena_ind) +
+	    get_arena_npurge_impl("stats.arenas.0.muzzy_npurge", arena_ind);
+}
+
+static size_t
+get_arena_pdirty(unsigned arena_ind) {
+	do_epoch();
+	size_t mib[4];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+	expect_d_eq(mallctlnametomib("stats.arenas.0.pdirty", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[2] = (size_t)arena_ind;
+	size_t pdirty;
+	size_t sz = sizeof(pdirty);
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&pdirty, &sz, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+	return pdirty;
+}
+
+static size_t
+get_arena_pmuzzy(unsigned arena_ind) {
+	do_epoch();
+	size_t mib[4];
+	size_t miblen = sizeof(mib)/sizeof(size_t);
+	expect_d_eq(mallctlnametomib("stats.arenas.0.pmuzzy", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
+	mib[2] = (size_t)arena_ind;
+	size_t pmuzzy;
+	size_t sz = sizeof(pmuzzy);
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&pmuzzy, &sz, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+	return pmuzzy;
+}
+
+static void *
+do_mallocx(size_t size, int flags) {
+	void *p = mallocx(size, flags);
+	expect_ptr_not_null(p, "Unexpected mallocx() failure");
+	return p;
+}
+
+static void
+generate_dirty(unsigned arena_ind, size_t size) {
+	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+	void *p = do_mallocx(size, flags);
+	dallocx(p, flags);
+}
+
diff --git a/test/include/test/guard.h b/test/include/test/guard.h
new file mode 100644
index 00000000..691dc508
--- /dev/null
+++ b/test/include/test/guard.h
@@ -0,0 +1,6 @@
+static inline bool
+extent_is_guarded(tsdn_t *tsdn, void *ptr) {
+	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
+	return edata_guarded_get(edata);
+}
+
diff --git a/test/unit/arena_decay.c b/test/unit/arena_decay.c
index 9fca5385..bbfd23a5 100644
--- a/test/unit/arena_decay.c
+++ b/test/unit/arena_decay.c
@@ -1,4 +1,5 @@
 #include "test/jemalloc_test.h"
+#include "test/arena_decay.h"
 
 #include "jemalloc/internal/ticker.h"
 
@@ -22,155 +23,6 @@ nstime_update_mock(nstime_t *time) {
 	}
 }
 
-static unsigned
-do_arena_create(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
-	unsigned arena_ind;
-	size_t sz = sizeof(unsigned);
-	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
-	    0, "Unexpected mallctl() failure");
-	size_t mib[3];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
-
-	expect_d_eq(mallctlnametomib("arena.0.dirty_decay_ms", mib, &miblen),
-	    0, "Unexpected mallctlnametomib() failure");
-	mib[1] = (size_t)arena_ind;
-	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL,
-	    (void *)&dirty_decay_ms, sizeof(dirty_decay_ms)), 0,
-	    "Unexpected mallctlbymib() failure");
-
-	expect_d_eq(mallctlnametomib("arena.0.muzzy_decay_ms", mib, &miblen),
-	    0, "Unexpected mallctlnametomib() failure");
-	mib[1] = (size_t)arena_ind;
-	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL,
-	    (void *)&muzzy_decay_ms, sizeof(muzzy_decay_ms)), 0,
-	    "Unexpected mallctlbymib() failure");
-
-	return arena_ind;
-}
-
-static void
-do_arena_destroy(unsigned arena_ind) {
-	size_t mib[3];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
-	expect_d_eq(mallctlnametomib("arena.0.destroy", mib, &miblen), 0,
-	    "Unexpected mallctlnametomib() failure");
-	mib[1] = (size_t)arena_ind;
-	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
-	    "Unexpected mallctlbymib() failure");
-}
-
-void
-do_epoch(void) {
-	uint64_t epoch = 1;
-	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
-	    0, "Unexpected mallctl() failure");
-}
-
-void
-do_purge(unsigned arena_ind) {
-	size_t mib[3];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
-	expect_d_eq(mallctlnametomib("arena.0.purge", mib, &miblen), 0,
-	    "Unexpected mallctlnametomib() failure");
-	mib[1] = (size_t)arena_ind;
-	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
-	    "Unexpected mallctlbymib() failure");
-}
-
-void
-do_decay(unsigned arena_ind) {
-	size_t mib[3];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
-	expect_d_eq(mallctlnametomib("arena.0.decay", mib, &miblen), 0,
-	    "Unexpected mallctlnametomib() failure");
-	mib[1] = (size_t)arena_ind;
-	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
-	    "Unexpected mallctlbymib() failure");
-}
-
-static uint64_t
-get_arena_npurge_impl(const char *mibname, unsigned arena_ind) {
-	size_t mib[4];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
-	expect_d_eq(mallctlnametomib(mibname, mib, &miblen), 0,
-	    "Unexpected mallctlnametomib() failure");
-	mib[2] = (size_t)arena_ind;
-	uint64_t npurge = 0;
-	size_t sz = sizeof(npurge);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&npurge, &sz, NULL, 0),
-	    config_stats ? 0 : ENOENT, "Unexpected mallctlbymib() failure");
-	return npurge;
-}
-
-static uint64_t
-get_arena_dirty_npurge(unsigned arena_ind) {
-	do_epoch();
-	return get_arena_npurge_impl("stats.arenas.0.dirty_npurge", arena_ind);
-}
-
-static uint64_t
-get_arena_dirty_purged(unsigned arena_ind) {
-	do_epoch();
-	return get_arena_npurge_impl("stats.arenas.0.dirty_purged", arena_ind);
-}
-
-static uint64_t
-get_arena_muzzy_npurge(unsigned arena_ind) {
-	do_epoch();
-	return get_arena_npurge_impl("stats.arenas.0.muzzy_npurge", arena_ind);
-}
-
-static uint64_t
-get_arena_npurge(unsigned arena_ind) {
-	do_epoch();
-	return get_arena_npurge_impl("stats.arenas.0.dirty_npurge", arena_ind) +
-	    get_arena_npurge_impl("stats.arenas.0.muzzy_npurge", arena_ind);
-}
-
-static size_t
-get_arena_pdirty(unsigned arena_ind) {
-	do_epoch();
-	size_t mib[4];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
-	expect_d_eq(mallctlnametomib("stats.arenas.0.pdirty", mib, &miblen), 0,
-	    "Unexpected mallctlnametomib() failure");
-	mib[2] = (size_t)arena_ind;
-	size_t pdirty;
-	size_t sz = sizeof(pdirty);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&pdirty, &sz, NULL, 0), 0,
-	    "Unexpected mallctlbymib() failure");
-	return pdirty;
-}
-
-static size_t
-get_arena_pmuzzy(unsigned arena_ind) {
-	do_epoch();
-	size_t mib[4];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
-	expect_d_eq(mallctlnametomib("stats.arenas.0.pmuzzy", mib, &miblen), 0,
-	    "Unexpected mallctlnametomib() failure");
-	mib[2] = (size_t)arena_ind;
-	size_t pmuzzy;
-	size_t sz = sizeof(pmuzzy);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&pmuzzy, &sz, NULL, 0), 0,
-	    "Unexpected mallctlbymib() failure");
-	return pmuzzy;
-}
-
-static void *
-do_mallocx(size_t size, int flags) {
-	void *p = mallocx(size, flags);
-	expect_ptr_not_null(p, "Unexpected mallocx() failure");
-	return p;
-}
-
-static void
-generate_dirty(unsigned arena_ind, size_t size) {
-	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
-	void *p = do_mallocx(size, flags);
-	dallocx(p, flags);
-}
-
 TEST_BEGIN(test_decay_ticks) {
 	test_skip_if(is_background_thread_enabled());
 	test_skip_if(opt_hpa);
diff --git a/test/unit/double_free.c b/test/unit/double_free.c
index 73155b9c..f98484c4 100644
--- a/test/unit/double_free.c
+++ b/test/unit/double_free.c
@@ -1,4 +1,5 @@
 #include "test/jemalloc_test.h"
+#include "test/guard.h"
 
 #include "jemalloc/internal/safety_check.h"
 
@@ -30,8 +31,18 @@ TEST_BEGIN(test_large_double_free_tcache) {
 
 	test_large_double_free_pre();
 	char *ptr = malloc(SC_LARGE_MINCLASS);
+	bool guarded = extent_is_guarded(tsdn_fetch(), ptr);
 	free(ptr);
-	free(ptr);
+	if (!guarded) {
+		free(ptr);
+	} else {
+		/*
+		 * Skip because guarded extents may unguard immediately on
+		 * deallocation, in which case the second free will crash before
+		 * reaching the intended safety check.
+		 */
+		fake_abort_called = true;
+	}
 	mallctl("thread.tcache.flush", NULL, NULL, NULL, 0);
 	test_large_double_free_post();
 }
@@ -43,8 +54,18 @@ TEST_BEGIN(test_large_double_free_no_tcache) {
 
 	test_large_double_free_pre();
 	char *ptr = mallocx(SC_LARGE_MINCLASS, MALLOCX_TCACHE_NONE);
+	bool guarded = extent_is_guarded(tsdn_fetch(), ptr);
 	dallocx(ptr, MALLOCX_TCACHE_NONE);
-	dallocx(ptr, MALLOCX_TCACHE_NONE);
+	if (!guarded) {
+		dallocx(ptr, MALLOCX_TCACHE_NONE);
+	} else {
+		/*
+		 * Skip because guarded extents may unguard immediately on
+		 * deallocation, in which case the second free will crash before
+		 * reaching the intended safety check.
+		 */
+		fake_abort_called = true;
+	}
 	test_large_double_free_post();
 }
 TEST_END
diff --git a/test/unit/guard.c b/test/unit/guard.c
new file mode 100644
index 00000000..43381e44
--- /dev/null
+++ b/test/unit/guard.c
@@ -0,0 +1,201 @@
+#include "test/jemalloc_test.h"
+#include "test/arena_decay.h"
+#include "test/guard.h"
+
+#include "jemalloc/internal/guard.h"
+
+static void
+verify_extent_guarded(tsdn_t *tsdn, void *ptr) {
+	expect_true(extent_is_guarded(tsdn, ptr),
+	    "All extents should be guarded.");
+}
+
+#define MAX_SMALL_ALLOCATIONS 4096
+void *small_alloc[MAX_SMALL_ALLOCATIONS];
+
+TEST_BEGIN(test_guarded_small) {
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	unsigned npages = 16, pages_found = 0, ends_found = 0;
+	VARIABLE_ARRAY(uintptr_t, pages, npages);
+
+	/* Allocate to get sanitized pointers. */
+	size_t sz = PAGE / 8;
+	unsigned n_alloc = 0;
+	while (n_alloc < MAX_SMALL_ALLOCATIONS) {
+		void *ptr = malloc(sz);
+		expect_ptr_not_null(ptr, "Unexpected malloc() failure");
+		small_alloc[n_alloc] = ptr;
+		verify_extent_guarded(tsdn, ptr);
+		if ((uintptr_t)ptr % PAGE == 0) {
+			pages[pages_found++] = (uintptr_t)ptr;
+		}
+		if (((uintptr_t)ptr + (uintptr_t)sz) % PAGE == 0) {
+			ends_found++;
+		}
+		n_alloc++;
+		if (pages_found == npages && ends_found == npages) {
+			break;
+		}
+	}
+	/* Should found the ptrs being checked for overflow and underflow. */
+	expect_u_eq(pages_found, npages, "Could not found the expected pages.");
+	expect_u_eq(ends_found, npages, "Could not found the expected pages.");
+
+	/* Verify the pages are not continuous, i.e. separated by guards. */
+	for (unsigned i = 0; i < npages - 1; i++) {
+		for (unsigned j = i + 1; j < npages; j++) {
+			uintptr_t ptr_diff = pages[i] > pages[j] ?
+			    pages[i] - pages[j] : pages[j] - pages[i];
+			expect_zu_gt((size_t)ptr_diff, 2 * PAGE,
+			    "Pages should not be next to each other.");
+		}
+	}
+
+	for (unsigned i = 0; i < n_alloc + 1; i++) {
+		free(small_alloc[i]);
+	}
+}
+TEST_END
+
+TEST_BEGIN(test_guarded_large) {
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	unsigned nlarge = 32;
+	VARIABLE_ARRAY(uintptr_t, large, nlarge);
+
+	/* Allocate to get sanitized pointers. */
+	size_t large_sz = SC_LARGE_MINCLASS;
+	for (unsigned i = 0; i < nlarge; i++) {
+		void *ptr = malloc(large_sz);
+		verify_extent_guarded(tsdn, ptr);
+		expect_ptr_not_null(ptr, "Unexpected malloc() failure");
+		large[i] = (uintptr_t)ptr;
+	}
+
+	/* Verify the pages are not continuous, i.e. separated by guards. */
+	uintptr_t min_diff = (uintptr_t)-1;
+	for (unsigned i = 0; i < nlarge; i++) {
+		for (unsigned j = i + 1; j < nlarge; j++) {
+			uintptr_t ptr_diff = large[i] > large[j] ?
+			    large[i] - large[j] : large[j] - large[i];
+			expect_zu_ge((size_t)ptr_diff, large_sz + 2 * PAGE,
+			    "Pages should not be next to each other.");
+			if (ptr_diff < min_diff) {
+				min_diff = ptr_diff;
+			}
+		}
+	}
+	expect_zu_ge((size_t)min_diff, large_sz + 2 * PAGE,
+	    "Pages should not be next to each other.");
+
+	for (unsigned i = 0; i < nlarge; i++) {
+		free((void *)large[i]);
+	}
+}
+TEST_END
+
+static void
+verify_pdirty(unsigned arena_ind, uint64_t expected) {
+	uint64_t pdirty = get_arena_pdirty(arena_ind);
+	expect_u64_eq(pdirty, expected / PAGE,
+	    "Unexpected dirty page amount.");
+}
+
+static void
+verify_pmuzzy(unsigned arena_ind, uint64_t expected) {
+	uint64_t pmuzzy = get_arena_pmuzzy(arena_ind);
+	expect_u64_eq(pmuzzy, expected / PAGE,
+	    "Unexpected muzzy page amount.");
+}
+
+TEST_BEGIN(test_guarded_decay) {
+	unsigned arena_ind = do_arena_create(-1, -1);
+	do_decay(arena_ind);
+	do_purge(arena_ind);
+
+	verify_pdirty(arena_ind, 0);
+	verify_pmuzzy(arena_ind, 0);
+
+	/* Verify that guarded extents as dirty. */
+	size_t sz1 = PAGE, sz2 = PAGE * 2;
+	/* W/o maps_coalesce, guarded extents are unguarded eagerly. */
+	size_t add_guard_size = maps_coalesce ? 0 : PAGE_GUARDS_SIZE;
+	generate_dirty(arena_ind, sz1);
+	verify_pdirty(arena_ind, sz1 + add_guard_size);
+	verify_pmuzzy(arena_ind, 0);
+
+	/* Should reuse the first extent. */
+	generate_dirty(arena_ind, sz1);
+	verify_pdirty(arena_ind, sz1 + add_guard_size);
+	verify_pmuzzy(arena_ind, 0);
+
+	/* Should not reuse; expect new dirty pages. */
+	generate_dirty(arena_ind, sz2);
+	verify_pdirty(arena_ind, sz1 + sz2 + 2 * add_guard_size);
+	verify_pmuzzy(arena_ind, 0);
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+
+	/* Should reuse dirty extents for the two mallocx. */
+	void *p1 = do_mallocx(sz1, flags);
+	verify_extent_guarded(tsdn, p1);
+	verify_pdirty(arena_ind, sz2 + add_guard_size);
+
+	void *p2 = do_mallocx(sz2, flags);
+	verify_extent_guarded(tsdn, p2);
+	verify_pdirty(arena_ind, 0);
+	verify_pmuzzy(arena_ind, 0);
+
+	dallocx(p1, flags);
+	verify_pdirty(arena_ind, sz1 + add_guard_size);
+	dallocx(p2, flags);
+	verify_pdirty(arena_ind, sz1 + sz2 + 2 * add_guard_size);
+	verify_pmuzzy(arena_ind, 0);
+
+	do_purge(arena_ind);
+	verify_pdirty(arena_ind, 0);
+	verify_pmuzzy(arena_ind, 0);
+
+	if (config_stats) {
+		expect_u64_eq(get_arena_npurge(arena_ind), 1,
+		    "Expected purging to occur");
+		expect_u64_eq(get_arena_dirty_npurge(arena_ind), 1,
+		    "Expected purging to occur");
+		expect_u64_eq(get_arena_dirty_purged(arena_ind),
+		    (sz1 + sz2 + 2 * add_guard_size) / PAGE,
+		    "Expected purging to occur");
+		expect_u64_eq(get_arena_muzzy_npurge(arena_ind), 0,
+		    "Expected purging to occur");
+	}
+
+	if (opt_retain) {
+		/*
+		 * With retain, guarded extents are not mergable and will be
+		 * cached in ecache_retained.  They should be reused.
+		 */
+		void *new_p1 = do_mallocx(sz1, flags);
+		verify_extent_guarded(tsdn, p1);
+		expect_ptr_eq(p1, new_p1, "Expect to reuse p1");
+
+		void *new_p2 = do_mallocx(sz2, flags);
+		verify_extent_guarded(tsdn, p2);
+		expect_ptr_eq(p2, new_p2, "Expect to reuse p2");
+
+		dallocx(new_p1, flags);
+		verify_pdirty(arena_ind, sz1 + add_guard_size);
+		dallocx(new_p2, flags);
+		verify_pdirty(arena_ind, sz1 + sz2 + 2 * add_guard_size);
+		verify_pmuzzy(arena_ind, 0);
+	}
+
+	do_arena_destroy(arena_ind);
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_guarded_small,
+	    test_guarded_large,
+	    test_guarded_decay);
+}
diff --git a/test/unit/guard.sh b/test/unit/guard.sh
new file mode 100644
index 00000000..933b4a4d
--- /dev/null
+++ b/test/unit/guard.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="san_guard_large:1,san_guard_small:1"
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 86012c75..060ce3e4 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -80,11 +80,11 @@ TEST_BEGIN(test_alloc_max) {
 
 	/* Small max */
 	bool deferred_work_generated;
-	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX, PAGE, false,
+	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX, PAGE, false, false,
 	    &deferred_work_generated);
 	expect_ptr_not_null(edata, "Allocation of small max failed");
 	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX + PAGE, PAGE, false,
-	    &deferred_work_generated);
+	    false, &deferred_work_generated);
 	expect_ptr_null(edata, "Allocation of larger than small max succeeded");
 
 	destroy_test_data(shard);
@@ -188,7 +188,7 @@ TEST_BEGIN(test_stress) {
 			size_t npages = npages_min + prng_range_zu(&prng_state,
 			    npages_max - npages_min);
 			edata_t *edata = pai_alloc(tsdn, &shard->pai,
-			    npages * PAGE, PAGE, false,
+			    npages * PAGE, PAGE, false, false,
 			    &deferred_work_generated);
 			assert_ptr_not_null(edata,
 			    "Unexpected allocation failure");
@@ -263,7 +263,8 @@ TEST_BEGIN(test_alloc_dalloc_batch) {
 	 */
 	for (size_t i = 0; i < NALLOCS / 2; i++) {
 		allocs[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE,
-		    /* zero */ false, &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false,
+		    &deferred_work_generated);
 		expect_ptr_not_null(allocs[i], "Unexpected alloc failure");
 	}
 	edata_list_active_t allocs_list;
@@ -299,7 +300,8 @@ TEST_BEGIN(test_alloc_dalloc_batch) {
 	/* Reallocate (individually), and ensure reuse and contiguity. */
 	for (size_t i = 0; i < NALLOCS; i++) {
 		allocs[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE,
-		    /* zero */ false, &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false,
+		    &deferred_work_generated);
 		expect_ptr_not_null(allocs[i], "Unexpected alloc failure.");
 	}
 	void *new_base = edata_base_get(allocs[0]);
@@ -374,7 +376,7 @@ TEST_BEGIN(test_defer_time) {
 	edata_t *edatas[HUGEPAGE_PAGES];
 	for (int i = 0; i < (int)HUGEPAGE_PAGES; i++) {
 		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
-		    &deferred_work_generated);
+		    false, &deferred_work_generated);
 		expect_ptr_not_null(edatas[i], "Unexpected null edata");
 	}
 	hpa_shard_do_deferred_work(tsdn, shard);
@@ -408,7 +410,7 @@ TEST_BEGIN(test_defer_time) {
 	 */
 	for (int i = 0; i < (int)HUGEPAGE_PAGES / 2; i++) {
 		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
-		    &deferred_work_generated);
+		    false, &deferred_work_generated);
 		expect_ptr_not_null(edatas[i], "Unexpected null edata");
 	}
 	/*
diff --git a/test/unit/hpa_background_thread.c b/test/unit/hpa_background_thread.c
index 77d05556..5976bb47 100644
--- a/test/unit/hpa_background_thread.c
+++ b/test/unit/hpa_background_thread.c
@@ -128,6 +128,8 @@ TEST_BEGIN(test_hpa_background_thread_purges) {
 	test_skip_if(!config_stats);
 	test_skip_if(!hpa_supported());
 	test_skip_if(!have_background_thread);
+	/* Skip since guarded pages cannot be allocated from hpa. */
+	test_skip_if(san_enabled());
 
 	unsigned arena_ind = create_arena();
 	/*
@@ -142,6 +144,8 @@ TEST_BEGIN(test_hpa_background_thread_enable_disable) {
 	test_skip_if(!config_stats);
 	test_skip_if(!hpa_supported());
 	test_skip_if(!have_background_thread);
+	/* Skip since guarded pages cannot be allocated from hpa. */
+	test_skip_if(san_enabled());
 
 	unsigned arena_ind = create_arena();
 
diff --git a/test/unit/pa.c b/test/unit/pa.c
index 01d891df..fcf22237 100644
--- a/test/unit/pa.c
+++ b/test/unit/pa.c
@@ -91,7 +91,7 @@ do_alloc_free_purge(void *arg) {
 		bool deferred_work_generated;
 		edata_t *edata = pa_alloc(TSDN_NULL, &test_data->shard, PAGE,
 		    PAGE, /* slab */ false, /* szind */ 0, /* zero */ false,
-		    &deferred_work_generated);
+		    /* guarded */ false, &deferred_work_generated);
 		assert_ptr_not_null(edata, "");
 		pa_dalloc(TSDN_NULL, &test_data->shard, edata,
 		    &deferred_work_generated);
diff --git a/test/unit/retained.c b/test/unit/retained.c
index 9ad9940e..53cda286 100644
--- a/test/unit/retained.c
+++ b/test/unit/retained.c
@@ -1,5 +1,6 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/guard.h"
 #include "jemalloc/internal/spin.h"
 
 static unsigned		arena_ind;
@@ -103,7 +104,8 @@ TEST_BEGIN(test_retained) {
 
 	arena_ind = do_arena_create(NULL);
 	sz = nallocx(HUGEPAGE, 0);
-	esz = sz + sz_large_pad;
+	size_t guard_sz = san_enabled() ? PAGE_GUARDS_SIZE : 0;
+	esz = sz + sz_large_pad + guard_sz;
 
 	atomic_store_u(&epoch, 0, ATOMIC_RELAXED);
 
@@ -133,7 +135,8 @@ TEST_BEGIN(test_retained) {
 		 */
 		do_refresh();
 
-		size_t allocated = esz * nthreads * PER_THD_NALLOCS;
+		size_t allocated = (esz - guard_sz) * nthreads *
+		    PER_THD_NALLOCS;
 		size_t active = do_get_active(arena_ind);
 		expect_zu_le(allocated, active, "Unexpected active memory");
 		size_t mapped = do_get_mapped(arena_ind);
diff --git a/test/unit/sec.c b/test/unit/sec.c
index 763e6087..acca192d 100644
--- a/test/unit/sec.c
+++ b/test/unit/sec.c
@@ -50,7 +50,9 @@ test_sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t max_alloc,
 
 static inline edata_t *
 pai_test_allocator_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t alignment, bool zero, bool *deferred_work_generated) {
+    size_t alignment, bool zero, bool guarded,
+    bool *deferred_work_generated) {
+	assert(!guarded);
 	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
 	*deferred_work_generated = false;
 	if (ta->alloc_fail) {
@@ -182,10 +184,12 @@ TEST_BEGIN(test_reuse) {
 	    /* max_bytes */ 2 * (NALLOCS * PAGE + NALLOCS * 2 * PAGE));
 	for (int i = 0; i < NALLOCS; i++) {
 		one_page[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false,
+		    &deferred_work_generated);
 		expect_ptr_not_null(one_page[i], "Unexpected alloc failure");
 		two_page[i] = pai_alloc(tsdn, &sec.pai, 2 * PAGE, PAGE,
-		    /* zero */ false, &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false,
+		    &deferred_work_generated);
 		expect_ptr_not_null(one_page[i], "Unexpected alloc failure");
 	}
 	expect_zu_eq(0, ta.alloc_count, "Should be using batch allocs");
@@ -216,9 +220,11 @@ TEST_BEGIN(test_reuse) {
 	 */
 	for (int i = 0; i < NALLOCS; i++) {
 		edata_t *alloc1 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false,
+		    &deferred_work_generated);
 		edata_t *alloc2 = pai_alloc(tsdn, &sec.pai, 2 * PAGE, PAGE,
-		    /* zero */ false, &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false,
+		    &deferred_work_generated);
 		expect_ptr_eq(one_page[i], alloc1,
 		    "Got unexpected allocation");
 		expect_ptr_eq(two_page[i], alloc2,
@@ -255,11 +261,12 @@ TEST_BEGIN(test_auto_flush) {
 	    /* max_bytes */ NALLOCS * PAGE);
 	for (int i = 0; i < NALLOCS; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false,
+		    &deferred_work_generated);
 		expect_ptr_not_null(allocs[i], "Unexpected alloc failure");
 	}
 	extra_alloc = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false,
-	    &deferred_work_generated);
+	    /* guarded */ false, &deferred_work_generated);
 	expect_ptr_not_null(extra_alloc, "Unexpected alloc failure");
 	size_t max_allocs = ta.alloc_count + ta.alloc_batch_count;
 	expect_zu_le(NALLOCS + 1, max_allocs,
@@ -310,7 +317,8 @@ do_disable_flush_test(bool is_disable) {
 	    /* max_bytes */ NALLOCS * PAGE);
 	for (int i = 0; i < NALLOCS; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false,
+		    &deferred_work_generated);
 		expect_ptr_not_null(allocs[i], "Unexpected alloc failure");
 	}
 	/* Free all but the last aloc. */
@@ -383,7 +391,8 @@ TEST_BEGIN(test_max_alloc_respected) {
 		expect_zu_eq(i, ta.dalloc_count,
 		    "Incorrect number of deallocations");
 		edata_t *edata = pai_alloc(tsdn, &sec.pai, attempted_alloc,
-		    PAGE, /* zero */ false, &deferred_work_generated);
+		    PAGE, /* zero */ false, /* guarded */ false,
+		    &deferred_work_generated);
 		expect_ptr_not_null(edata, "Unexpected alloc failure");
 		expect_zu_eq(i + 1, ta.alloc_count,
 		    "Incorrect number of allocations");
@@ -410,7 +419,8 @@ TEST_BEGIN(test_expand_shrink_delegate) {
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ 10 * PAGE,
 	    /* max_bytes */ 1000 * PAGE);
 	edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-	    /* zero */ false, &deferred_work_generated);
+	    /* zero */ false, /* guarded */ false,
+	    &deferred_work_generated);
 	expect_ptr_not_null(edata, "Unexpected alloc failure");
 
 	bool err = pai_expand(tsdn, &sec.pai, edata, PAGE, 4 * PAGE,
@@ -450,7 +460,8 @@ TEST_BEGIN(test_nshards_0) {
 
 	bool deferred_work_generated;
 	edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-	    /* zero */ false, &deferred_work_generated);
+	    /* zero */ false, /* guarded */ false,
+	    &deferred_work_generated);
 	pai_dalloc(tsdn, &sec.pai, edata, &deferred_work_generated);
 
 	/* Both operations should have gone directly to the fallback. */
@@ -492,7 +503,8 @@ TEST_BEGIN(test_stats_simple) {
 	edata_t *allocs[FLUSH_PAGES];
 	for (size_t i = 0; i < FLUSH_PAGES; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false,
+		    &deferred_work_generated);
 		expect_stats_pages(tsdn, &sec, 0);
 	}
 
@@ -505,7 +517,8 @@ TEST_BEGIN(test_stats_simple) {
 		}
 		for (size_t j = 0; j < FLUSH_PAGES / 2; j++) {
 			allocs[j] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-			    /* zero */ false, &deferred_work_generated);
+			    /* zero */ false, /* guarded */ false,
+			    &deferred_work_generated);
 			expect_stats_pages(tsdn, &sec, FLUSH_PAGES / 2 - j - 1);
 		}
 	}
@@ -534,13 +547,14 @@ TEST_BEGIN(test_stats_auto_flush) {
 	bool deferred_work_generated;
 
 	extra_alloc0 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false,
-	    &deferred_work_generated);
+	    /* guarded */ false, &deferred_work_generated);
 	extra_alloc1 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false,
-	    &deferred_work_generated);
+	    /* guarded */ false, &deferred_work_generated);
 
 	for (size_t i = 0; i < 2 * FLUSH_PAGES; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false,
+		    &deferred_work_generated);
 	}
 
 	for (size_t i = 0; i < FLUSH_PAGES; i++) {
@@ -580,7 +594,8 @@ TEST_BEGIN(test_stats_manual_flush) {
 	edata_t *allocs[FLUSH_PAGES];
 	for (size_t i = 0; i < FLUSH_PAGES; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false,
+		    &deferred_work_generated);
 		expect_stats_pages(tsdn, &sec, 0);
 	}
 

From 3c4b717ffc05012905fec0c4b49cda8f783c2727 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 27 Sep 2021 13:19:37 -0700
Subject: [PATCH 2118/2608] Remove unused header base_structs.h.

---
 include/jemalloc/internal/base_structs.h | 62 ------------------------
 1 file changed, 62 deletions(-)
 delete mode 100644 include/jemalloc/internal/base_structs.h

diff --git a/include/jemalloc/internal/base_structs.h b/include/jemalloc/internal/base_structs.h
deleted file mode 100644
index 914c5b59..00000000
--- a/include/jemalloc/internal/base_structs.h
+++ /dev/null
@@ -1,62 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_BASE_STRUCTS_H
-#define JEMALLOC_INTERNAL_BASE_STRUCTS_H
-
-#include "jemalloc/internal/ehooks.h"
-#include "jemalloc/internal/edata.h"
-#include "jemalloc/internal/jemalloc_internal_types.h"
-#include "jemalloc/internal/mutex.h"
-#include "jemalloc/internal/sc.h"
-
-/* Embedded at the beginning of every block of base-managed virtual memory. */
-struct base_block_s {
-	/* Total size of block's virtual memory mapping. */
-	size_t size;
-
-	/* Next block in list of base's blocks. */
-	base_block_t *next;
-
-	/* Tracks unused trailing space. */
-	edata_t edata;
-};
-
-struct base_s {
-	/*
-	 * User-configurable extent hook functions.
-	 */
-	ehooks_t ehooks;
-
-	/*
-	 * Use user hooks for metadata when true.
-	 */
-	bool metadata_use_hooks;
-
-	/* Protects base_alloc() and base_stats_get() operations. */
-	malloc_mutex_t mtx;
-
-	/* Using THP when true (metadata_thp auto mode). */
-	bool auto_thp_switched;
-	/*
-	 * Most recent size class in the series of increasingly large base
-	 * extents.  Logarithmic spacing between subsequent allocations ensures
-	 * that the total number of distinct mappings remains small.
-	 */
-	pszind_t pind_last;
-
-	/* Serial number generation state. */
-	size_t extent_sn_next;
-
-	/* Chain of all blocks associated with base. */
-	base_block_t *blocks;
-
-	/* Heap of extents that track unused trailing space within blocks. */
-	edata_heap_t avail[SC_NSIZES];
-
-	/* Stats, only maintained if config_stats. */
-	size_t allocated;
-	size_t resident;
-	size_t mapped;
-	/* Number of THP regions touched. */
-	size_t n_thp;
-};
-
-#endif /* JEMALLOC_INTERNAL_BASE_STRUCTS_H */

From 83f3294027952710f35014cff1cffd51f281d785 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 27 Sep 2021 13:43:24 -0700
Subject: [PATCH 2119/2608] Small refactors around 7bb05e0.

---
 include/jemalloc/internal/arena_structs.h     |  6 +++---
 include/jemalloc/internal/arena_types.h       |  2 +-
 .../internal/jemalloc_internal_inlines_a.h    |  3 +--
 src/base.c                                    |  6 +++---
 src/jemalloc.c                                |  3 +--
 test/unit/pa.c                                |  4 ++--
 test/unit/rtree.c                             | 20 +++++++++----------
 7 files changed, 21 insertions(+), 23 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index ad76a79a..e2a5a408 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -1,5 +1,5 @@
-#ifndef JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H
-#define JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H
+#ifndef JEMALLOC_INTERNAL_ARENA_STRUCTS_H
+#define JEMALLOC_INTERNAL_ARENA_STRUCTS_H
 
 #include "jemalloc/internal/arena_stats.h"
 #include "jemalloc/internal/atomic.h"
@@ -98,4 +98,4 @@ struct arena_s {
 	bin_t			bins[0];
 };
 
-#endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H */
+#endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_H */
diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h
index f763a8ca..d0e12917 100644
--- a/include/jemalloc/internal/arena_types.h
+++ b/include/jemalloc/internal/arena_types.h
@@ -43,7 +43,7 @@ typedef enum {
 
 struct arena_config_s {
 	/* extent hooks to be used for the arena */
-	struct extent_hooks_s *extent_hooks;
+	extent_hooks_t *extent_hooks;
 
 	/*
 	 * Use extent hooks for metadata (base) allocations when true.
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index 1bca34cf..9e27cc30 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -65,8 +65,7 @@ arena_get(tsdn_t *tsdn, unsigned ind, bool init_if_missing) {
 	ret = (arena_t *)atomic_load_p(&arenas[ind], ATOMIC_ACQUIRE);
 	if (unlikely(ret == NULL)) {
 		if (init_if_missing) {
-			ret = arena_init(tsdn, ind,
-			    &arena_config_default);
+			ret = arena_init(tsdn, ind, &arena_config_default);
 		}
 	}
 	return ret;
diff --git a/src/base.c b/src/base.c
index cc127ea0..38f6fa4b 100644
--- a/src/base.c
+++ b/src/base.c
@@ -298,7 +298,7 @@ base_block_alloc(tsdn_t *tsdn, base_t *base, ehooks_t *ehooks, unsigned ind,
 static ehooks_t *
 base_ehooks_get_for_metadata(base_t *base) {
 	return base->metadata_use_hooks ? &base->ehooks :
-		(struct ehooks_s *)&ehooks_default_extent_hooks;
+	    (ehooks_t *)&ehooks_default_extent_hooks;
 }
 
 /*
@@ -522,7 +522,7 @@ base_postfork_child(tsdn_t *tsdn, base_t *base) {
 
 bool
 base_boot(tsdn_t *tsdn) {
-	b0 = base_new(tsdn, 0,
-		(extent_hooks_t *)&ehooks_default_extent_hooks, true);
+	b0 = base_new(tsdn, 0, (extent_hooks_t *)&ehooks_default_extent_hooks,
+	    /* metadata_use_hooks */ true);
 	return (b0 == NULL);
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 907265c0..8e04fa6f 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -570,8 +570,7 @@ arena_choose_hard(tsd_t *tsd, bool internal) {
 				/* Initialize a new arena. */
 				choose[j] = first_null;
 				arena = arena_init_locked(tsd_tsdn(tsd),
-				    choose[j],
-				    &arena_config_default);
+				    choose[j], &arena_config_default);
 				if (arena == NULL) {
 					malloc_mutex_unlock(tsd_tsdn(tsd),
 					    &arenas_lock);
diff --git a/test/unit/pa.c b/test/unit/pa.c
index fcf22237..10fa1b28 100644
--- a/test/unit/pa.c
+++ b/test/unit/pa.c
@@ -53,8 +53,8 @@ test_data_t *init_test_data(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 	assert_ptr_not_null(test_data, "");
 	init_test_extent_hooks(&test_data->hooks);
 
-	base_t *base = base_new(TSDN_NULL, /* ind */ 1,
-	    &test_data->hooks, /* metadata_use_hooks */ true);
+	base_t *base = base_new(TSDN_NULL, /* ind */ 1, &test_data->hooks,
+	    /* metadata_use_hooks */ true);
 	assert_ptr_not_null(base, "");
 
 	test_data->base = base;
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 82b617bd..4101b72b 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -12,8 +12,8 @@ TEST_BEGIN(test_rtree_read_empty) {
 
 	tsdn = tsdn_fetch();
 
-	base_t *base = base_new(tsdn, 0,
-	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
+	base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks,
+	    /* metadata_use_hooks */ true);
 	expect_ptr_not_null(base, "Unexpected base_new failure");
 
 	rtree_t *rtree = &test_rtree;
@@ -53,8 +53,8 @@ TEST_BEGIN(test_rtree_extrema) {
 
 	tsdn_t *tsdn = tsdn_fetch();
 
-	base_t *base = base_new(tsdn, 0,
-	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
+	base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks,
+	    /* metadata_use_hooks */ true);
 	expect_ptr_not_null(base, "Unexpected base_new failure");
 
 	rtree_t *rtree = &test_rtree;
@@ -105,8 +105,8 @@ TEST_END
 
 TEST_BEGIN(test_rtree_bits) {
 	tsdn_t *tsdn = tsdn_fetch();
-	base_t *base = base_new(tsdn, 0,
-	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
+	base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks,
+	    /* metadata_use_hooks */ true);
 	expect_ptr_not_null(base, "Unexpected base_new failure");
 
 	uintptr_t keys[] = {PAGE, PAGE + 1,
@@ -155,8 +155,8 @@ TEST_BEGIN(test_rtree_random) {
 	sfmt_t *sfmt = init_gen_rand(SEED);
 	tsdn_t *tsdn = tsdn_fetch();
 
-	base_t *base = base_new(tsdn, 0,
-	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
+	base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks,
+	    /* metadata_use_hooks */ true);
 	expect_ptr_not_null(base, "Unexpected base_new failure");
 
 	uintptr_t keys[NSET];
@@ -254,8 +254,8 @@ test_rtree_range_write(tsdn_t *tsdn, rtree_t *rtree, uintptr_t start,
 
 TEST_BEGIN(test_rtree_range) {
 	tsdn_t *tsdn = tsdn_fetch();
-	base_t *base = base_new(tsdn, 0,
-	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
+	base_t *base = base_new(tsdn, 0, &ehooks_default_extent_hooks,
+	    /* metadata_use_hooks */ true);
 	expect_ptr_not_null(base, "Unexpected base_new failure");
 
 	rtree_t *rtree = &test_rtree;

From 11b6db7448f9c31502a7bcf7e59cd8913732c83d Mon Sep 17 00:00:00 2001
From: David Carlier <devnexen@gmail.com>
Date: Sun, 25 Oct 2020 15:48:41 +0000
Subject: [PATCH 2120/2608] CPU affinity on BSD platforms support.

---
 src/background_thread.c | 28 ++++++++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/src/background_thread.c b/src/background_thread.c
index ac171c37..3bb8d26c 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -82,13 +82,33 @@ static inline bool
 set_current_thread_affinity(int cpu) {
 #if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
 	cpu_set_t cpuset;
+#else
+#  ifndef __NetBSD__
+	cpuset_t cpuset;
+#  else
+	cpuset_t *cpuset;
+#  endif
+#endif
+
+#ifndef __NetBSD__
 	CPU_ZERO(&cpuset);
 	CPU_SET(cpu, &cpuset);
-	int ret = sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
-
-	return (ret != 0);
 #else
-	return false;
+	cpuset = cpuset_create();
+#endif
+
+#if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
+	return (sched_setaffinity(0, sizeof(cpu_set_t), &cpuset) != 0);
+#else
+#  ifndef __NetBSD__
+	int ret = pthread_setaffinity_np(pthread_self(), sizeof(cpuset_t),
+	    &cpuset);
+#  else
+	int ret = pthread_setaffinity_np(pthread_self(), cpuset_size(cpuset),
+	    cpuset);
+	cpuset_destroy(cpuset);
+#  endif
+	return ret != 0;
 #endif
 }
 

From ab0f1604b4fc563158f142d41f6a3550463d7729 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 28 Sep 2021 15:41:10 -0700
Subject: [PATCH 2121/2608] Delay the atexit call to prof_log_start().

So that atexit() is only done when prof_log is used.
---
 src/prof_log.c | 39 ++++++++++++++++++++++-----------------
 1 file changed, 22 insertions(+), 17 deletions(-)

diff --git a/src/prof_log.c b/src/prof_log.c
index 0f27a128..0632c3b3 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -412,6 +412,13 @@ prof_log_dummy_set(bool new_value) {
 	prof_log_dummy = new_value;
 }
 
+/* Used as an atexit function to stop logging on exit. */
+static void
+prof_log_stop_final(void) {
+	tsd_t *tsd = tsd_fetch();
+	prof_log_stop(tsd_tsdn(tsd));
+}
+
 JEMALLOC_COLD
 bool
 prof_log_start(tsdn_t *tsdn, const char *filename) {
@@ -425,6 +432,20 @@ prof_log_start(tsdn_t *tsdn, const char *filename) {
 
 	malloc_mutex_lock(tsdn, &log_mtx);
 
+	static bool prof_log_atexit_called = false;
+	if (!prof_log_atexit_called) {
+		prof_log_atexit_called = true;
+		if (atexit(prof_log_stop_final) != 0) {
+			malloc_write("<jemalloc>: Error in atexit() "
+			    "for logging\n");
+			if (opt_abort) {
+				abort();
+			}
+			ret = true;
+			goto label_done;
+		}
+	}
+
 	if (prof_logging_state != prof_logging_state_stopped) {
 		ret = true;
 	} else if (filename == NULL) {
@@ -442,19 +463,12 @@ prof_log_start(tsdn_t *tsdn, const char *filename) {
 	if (!ret) {
 		nstime_prof_init_update(&log_start_timestamp);
 	}
-
+label_done:
 	malloc_mutex_unlock(tsdn, &log_mtx);
 
 	return ret;
 }
 
-/* Used as an atexit function to stop logging on exit. */
-static void
-prof_log_stop_final(void) {
-	tsd_t *tsd = tsd_fetch();
-	prof_log_stop(tsd_tsdn(tsd));
-}
-
 struct prof_emitter_cb_arg_s {
 	int fd;
 	ssize_t ret;
@@ -697,15 +711,6 @@ prof_log_init(tsd_t *tsd) {
 		prof_log_start(tsd_tsdn(tsd), NULL);
 	}
 
-	if (atexit(prof_log_stop_final) != 0) {
-		malloc_write("<jemalloc>: Error in atexit() "
-			     "for logging\n");
-		if (opt_abort) {
-			abort();
-		}
-		return true;
-	}
-
 	return false;
 }
 

From cf9724531af2864b243668d82aa63114e9737bfd Mon Sep 17 00:00:00 2001
From: David CARLIER <devnexen@gmail.com>
Date: Wed, 17 Feb 2021 20:40:11 +0000
Subject: [PATCH 2122/2608] Darwin malloc_size override support proposal.

Darwin has similar api than Linux/FreeBSD's malloc_usable_size.
---
 configure.ac                                  |  3 ++
 .../internal/jemalloc_internal_defs.h.in      |  5 +++
 include/jemalloc/jemalloc_protos.h.in         |  4 +++
 src/jemalloc.c                                | 35 ++++++++++++++-----
 test/include/test/jemalloc_test.h.in          |  5 +++
 test/integration/aligned_alloc.c              |  4 +--
 test/integration/allocated.c                  |  2 +-
 test/integration/malloc.c                     |  2 +-
 test/integration/posix_memalign.c             |  2 +-
 test/integration/rallocx.c                    |  4 +--
 test/stress/microbench.c                      |  2 +-
 test/unit/junk.c                              |  2 +-
 test/unit/prof_stats.c                        |  2 +-
 13 files changed, 53 insertions(+), 19 deletions(-)

diff --git a/configure.ac b/configure.ac
index 7e2b44c5..7a49e84f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1056,6 +1056,9 @@ AC_CHECK_FUNC([memalign],
 AC_CHECK_FUNC([valloc],
 	      [AC_DEFINE([JEMALLOC_OVERRIDE_VALLOC], [ ])
 	       public_syms="${public_syms} valloc"])
+AC_CHECK_FUNC([malloc_size],
+	      [AC_DEFINE([JEMALLOC_HAVE_MALLOC_SIZE], [ ])
+	       public_syms="${public_syms} malloc_size"])
 
 dnl Check for allocator-related functions that should be wrapped.
 wrap_syms=
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 418b0cb2..a4be549b 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -337,6 +337,11 @@
  */
 #undef JEMALLOC_HAVE_MEMCNTL
 
+/*
+ * Defined if malloc_size is supported
+ */
+#undef JEMALLOC_HAVE_MALLOC_SIZE
+
 /* Define if operating system has alloca.h header. */
 #undef JEMALLOC_HAS_ALLOCA_H
 
diff --git a/include/jemalloc/jemalloc_protos.h.in b/include/jemalloc/jemalloc_protos.h.in
index d75b2224..356221cc 100644
--- a/include/jemalloc/jemalloc_protos.h.in
+++ b/include/jemalloc/jemalloc_protos.h.in
@@ -53,6 +53,10 @@ JEMALLOC_EXPORT void JEMALLOC_NOTHROW	@je_@malloc_stats_print(
     const char *opts);
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW	@je_@malloc_usable_size(
     JEMALLOC_USABLE_SIZE_CONST void *ptr) JEMALLOC_CXX_THROW;
+#ifdef JEMALLOC_HAVE_MALLOC_SIZE
+JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW	@je_@malloc_size(
+    const void *ptr);
+#endif
 
 #ifdef JEMALLOC_OVERRIDE_MEMALIGN
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 8e04fa6f..469a4910 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3904,18 +3904,14 @@ je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
 }
 #undef STATS_PRINT_BUFSIZE
 
-JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
-je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) {
-	size_t ret;
-	tsdn_t *tsdn;
-
-	LOG("core.malloc_usable_size.entry", "ptr: %p", ptr);
-
+JEMALLOC_ALWAYS_INLINE size_t
+je_malloc_usable_size_impl(JEMALLOC_USABLE_SIZE_CONST void *ptr) {
 	assert(malloc_initialized() || IS_INITIALIZER);
 
-	tsdn = tsdn_fetch();
+	tsdn_t *tsdn = tsdn_fetch();
 	check_entry_exit_locking(tsdn);
 
+	size_t ret;
 	if (unlikely(ptr == NULL)) {
 		ret = 0;
 	} else {
@@ -3926,12 +3922,33 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) {
 			ret = isalloc(tsdn, ptr);
 		}
 	}
-
 	check_entry_exit_locking(tsdn);
+
+	return ret;
+}
+
+JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
+je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) {
+	LOG("core.malloc_usable_size.entry", "ptr: %p", ptr);
+
+	size_t ret = je_malloc_usable_size_impl(ptr);
+
 	LOG("core.malloc_usable_size.exit", "result: %zu", ret);
 	return ret;
 }
 
+#ifdef JEMALLOC_HAVE_MALLOC_SIZE
+JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
+je_malloc_size(const void *ptr) {
+	LOG("core.malloc_size.entry", "ptr: %p", ptr);
+
+	size_t ret = je_malloc_usable_size_impl(ptr);
+
+	LOG("core.malloc_size.exit", "result: %zu", ret);
+	return ret;
+}
+#endif
+
 static void
 batch_alloc_prof_sample_assert(tsd_t *tsd, size_t batch, size_t usize) {
 	assert(config_prof && opt_prof);
diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in
index 0e332165..3f8c0da7 100644
--- a/test/include/test/jemalloc_test.h.in
+++ b/test/include/test/jemalloc_test.h.in
@@ -132,6 +132,11 @@ static const bool config_debug =
 #define MEXP 19937
 #include "test/SFMT.h"
 
+#ifndef JEMALLOC_HAVE_MALLOC_SIZE
+#define TEST_MALLOC_SIZE malloc_usable_size
+#else
+#define TEST_MALLOC_SIZE malloc_size
+#endif
 /******************************************************************************/
 /*
  * Define always-enabled assertion macros, so that test assertions execute even
diff --git a/test/integration/aligned_alloc.c b/test/integration/aligned_alloc.c
index 3f619e7e..b37d5ba0 100644
--- a/test/integration/aligned_alloc.c
+++ b/test/integration/aligned_alloc.c
@@ -120,7 +120,7 @@ TEST_BEGIN(test_alignment_and_size) {
 					    "size=%zu (%#zx): %s",
 					    alignment, size, size, buf);
 				}
-				total += malloc_usable_size(ps[i]);
+				total += TEST_MALLOC_SIZE(ps[i]);
 				if (total >= (MAXALIGN << 1)) {
 					break;
 				}
@@ -141,7 +141,7 @@ TEST_END
 TEST_BEGIN(test_zero_alloc) {
 	void *res = aligned_alloc(8, 0);
 	assert(res);
-	size_t usable = malloc_usable_size(res);
+	size_t usable = TEST_MALLOC_SIZE(res);
 	assert(usable > 0);
 	free(res);
 }
diff --git a/test/integration/allocated.c b/test/integration/allocated.c
index 8f2f21d5..0c64272c 100644
--- a/test/integration/allocated.c
+++ b/test/integration/allocated.c
@@ -70,7 +70,7 @@ thd_start(void *arg) {
 	expect_ptr_eq(ap0, ap1,
 	    "Pointer returned by \"thread.allocatedp\" should not change");
 
-	usize = malloc_usable_size(p);
+	usize = TEST_MALLOC_SIZE(p);
 	expect_u64_le(a0 + usize, a1,
 	    "Allocated memory counter should increase by at least the amount "
 	    "explicitly allocated");
diff --git a/test/integration/malloc.c b/test/integration/malloc.c
index 8b33bc8f..ef449163 100644
--- a/test/integration/malloc.c
+++ b/test/integration/malloc.c
@@ -3,7 +3,7 @@
 TEST_BEGIN(test_zero_alloc) {
 	void *res = malloc(0);
 	assert(res);
-	size_t usable = malloc_usable_size(res);
+	size_t usable = TEST_MALLOC_SIZE(res);
 	assert(usable > 0);
 	free(res);
 }
diff --git a/test/integration/posix_memalign.c b/test/integration/posix_memalign.c
index 6f8a1b03..2da0549b 100644
--- a/test/integration/posix_memalign.c
+++ b/test/integration/posix_memalign.c
@@ -101,7 +101,7 @@ TEST_BEGIN(test_alignment_and_size) {
 					    "size=%zu (%#zx): %s",
 					    alignment, size, size, buf);
 				}
-				total += malloc_usable_size(ps[i]);
+				total += TEST_MALLOC_SIZE(ps[i]);
 				if (total >= (MAXALIGN << 1)) {
 					break;
 				}
diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c
index 57c7967f..d4a48fce 100644
--- a/test/integration/rallocx.c
+++ b/test/integration/rallocx.c
@@ -185,7 +185,7 @@ TEST_BEGIN(test_align_enum) {
 				assert_ptr_not_null(p,
 				    "Unexpected mallocx() error");
 				assert_zu_eq(nallocx(1, flags),
-				    malloc_usable_size(p),
+				    TEST_MALLOC_SIZE(p),
 				    "Wrong mallocx() usable size");
 				int flags_next =
 				    MALLOCX_LG_ALIGN(lg_align_next);
@@ -193,7 +193,7 @@ TEST_BEGIN(test_align_enum) {
 				assert_ptr_not_null(p,
 				    "Unexpected rallocx() error");
 				expect_zu_eq(nallocx(size, flags_next),
-				    malloc_usable_size(p),
+				    TEST_MALLOC_SIZE(p),
 				    "Wrong rallocx() usable size");
 				free(p);
 			}
diff --git a/test/stress/microbench.c b/test/stress/microbench.c
index 226677f7..062e32fd 100644
--- a/test/stress/microbench.c
+++ b/test/stress/microbench.c
@@ -69,7 +69,7 @@ malloc_mus_free(void) {
 		test_fail("Unexpected malloc() failure");
 		return;
 	}
-	malloc_usable_size(p);
+	TEST_MALLOC_SIZE(p);
 	free(p);
 }
 
diff --git a/test/unit/junk.c b/test/unit/junk.c
index 314da3ce..543092f1 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -30,7 +30,7 @@ do_allocs(size_t size, bool zero, size_t lg_align) {
 		if (opt_junk_alloc && !zero) {				\
 			expect_ptr_eq(ptr, last_junked_ptr, "");	\
 			expect_zu_eq(last_junked_usize,			\
-			    malloc_usable_size(ptr), "");		\
+			    TEST_MALLOC_SIZE(ptr), "");			\
 		}							\
 	} while (0)
 	if (!zero && lg_align == 0) {
diff --git a/test/unit/prof_stats.c b/test/unit/prof_stats.c
index a9145871..c88c4ae0 100644
--- a/test/unit/prof_stats.c
+++ b/test/unit/prof_stats.c
@@ -43,7 +43,7 @@ test_combinations(szind_t ind, size_t sizes_array[N_PTRS],
 		int flags = flags_array[i];
 		void *p = mallocx(sz, flags);
 		assert_ptr_not_null(p, "malloc() failed");
-		assert(malloc_usable_size(p) == sz_index2size(ind));
+		assert(TEST_MALLOC_SIZE(p) == sz_index2size(ind));
 		ptrs[i] = p;
 		live_req_sum += sz;
 		live_count++;

From 912324a1acae4bfb6445825caad000aa295dcca8 Mon Sep 17 00:00:00 2001
From: Stan Angelov <sangelov@fb.com>
Date: Thu, 30 Sep 2021 17:37:59 -0700
Subject: [PATCH 2123/2608] Add debug check outside of the loop in
 hpa_alloc_batch.

This optimizes the whole loop away for non-debug builds.
---
 src/hpa.c | 30 +++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/src/hpa.c b/src/hpa.c
index 82b9c992..24fb7a3f 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -733,17 +733,25 @@ hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	edata_t *edata;
-	ql_foreach(edata, &results->head, ql_link_active) {
-		emap_assert_mapped(tsdn, shard->emap, edata);
-		assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
-		assert(edata_state_get(edata) == extent_state_active);
-		assert(edata_arena_ind_get(edata) == shard->ind);
-		assert(edata_szind_get_maybe_invalid(edata) == SC_NSIZES);
-		assert(!edata_slab_get(edata));
-		assert(edata_committed_get(edata));
-		assert(edata_base_get(edata) == edata_addr_get(edata));
-		assert(edata_base_get(edata) != NULL);
+	/*
+	 * Guard the sanity checks with config_debug because the loop cannot be
+	 * proven non-circular by the compiler, even if everything within the
+	 * loop is optimized away.
+	 */
+	if (config_debug) {
+		edata_t *edata;
+		ql_foreach(edata, &results->head, ql_link_active) {
+			emap_assert_mapped(tsdn, shard->emap, edata);
+			assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
+			assert(edata_state_get(edata) == extent_state_active);
+			assert(edata_arena_ind_get(edata) == shard->ind);
+			assert(edata_szind_get_maybe_invalid(edata) ==
+			    SC_NSIZES);
+			assert(!edata_slab_get(edata));
+			assert(edata_committed_get(edata));
+			assert(edata_base_get(edata) == edata_addr_get(edata));
+			assert(edata_base_get(edata) != NULL);
+		}
 	}
 	return nsuccess;
 }

From c9ebff0fd6ab90d5eed0d11f48dfedcc21222ab0 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Wed, 6 Oct 2021 15:22:38 -0700
Subject: [PATCH 2124/2608] Initialize deferred_work_generated

As the code evolves, some code paths that have previously assigned
deferred_work_generated may cease being reached. This would leave the value
uninitialized. This change initializes the value for safety.
---
 src/arena.c     |  6 +++---
 src/hpa.c       |  2 --
 src/large.c     |  6 +++---
 src/pa.c        |  1 -
 src/pac.c       |  6 ------
 src/sec.c       |  8 +++-----
 test/unit/hpa.c |  8 ++++----
 test/unit/pa.c  |  2 +-
 test/unit/sec.c | 24 +++++++++---------------
 9 files changed, 23 insertions(+), 40 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 8147d14b..811f0edc 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -324,7 +324,7 @@ arena_large_ralloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t oldusize,
 edata_t *
 arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
     size_t alignment, bool zero) {
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 	szind_t szind = sz_size2index(usize);
 	size_t esize = usize + sz_large_pad;
 
@@ -561,7 +561,7 @@ arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena) {
 
 void
 arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab) {
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 	pa_dalloc(tsdn, &arena->pa_shard, slab, &deferred_work_generated);
 	if (deferred_work_generated) {
 		arena_handle_deferred_work(tsdn, arena);
@@ -825,7 +825,7 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
 static edata_t *
 arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard,
     const bin_info_t *bin_info) {
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
diff --git a/src/hpa.c b/src/hpa.c
index 24fb7a3f..5251655c 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -785,7 +785,6 @@ static bool
 hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
     size_t new_size, bool zero, bool *deferred_work_generated) {
 	/* Expand not yet supported. */
-	*deferred_work_generated = false;
 	return true;
 }
 
@@ -793,7 +792,6 @@ static bool
 hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
     size_t old_size, size_t new_size, bool *deferred_work_generated) {
 	/* Shrink not yet supported. */
-	*deferred_work_generated = false;
 	return true;
 }
 
diff --git a/src/large.c b/src/large.c
index 6dbb3d91..5fc4bf58 100644
--- a/src/large.c
+++ b/src/large.c
@@ -64,7 +64,7 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, edata_t *edata, size_t usize) {
 		return true;
 	}
 
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 	bool err = pa_shrink(tsdn, &arena->pa_shard, edata, old_size,
 	    usize + sz_large_pad, sz_size2index(usize),
 	    &deferred_work_generated);
@@ -90,7 +90,7 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 
 	szind_t szind = sz_size2index(usize);
 
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 	bool err = pa_expand(tsdn, &arena->pa_shard, edata, old_size, new_size,
 	    szind, zero, &deferred_work_generated);
 
@@ -249,7 +249,7 @@ large_dalloc_prep_impl(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 
 static void
 large_dalloc_finish_impl(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 	pa_dalloc(tsdn, &arena->pa_shard, edata, &deferred_work_generated);
 	if (deferred_work_generated) {
 		arena_handle_deferred_work(tsdn, arena);
diff --git a/src/pa.c b/src/pa.c
index 649b9c2e..779e672b 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -126,7 +126,6 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 	assert(!guarded || alignment <= PAGE);
 
 	edata_t *edata = NULL;
-	*deferred_work_generated = false;
 	if (!guarded && pa_shard_uses_hpa(shard)) {
 		edata = pai_alloc(tsdn, &shard->hpa_sec.pai, size, alignment,
 		    zero, /* guarded */ false, deferred_work_generated);
diff --git a/src/pac.c b/src/pac.c
index 8ce3159c..176b181a 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -157,8 +157,6 @@ pac_alloc_new_guarded(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
 static edata_t *
 pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
     bool zero, bool guarded, bool *deferred_work_generated) {
-	*deferred_work_generated = false;
-
 	pac_t *pac = (pac_t *)self;
 	ehooks_t *ehooks = pac_ehooks_get(pac);
 
@@ -179,8 +177,6 @@ pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 	pac_t *pac = (pac_t *)self;
 	ehooks_t *ehooks = pac_ehooks_get(pac);
 
-	*deferred_work_generated = false;
-
 	size_t mapped_add = 0;
 	size_t expand_amount = new_size - old_size;
 
@@ -221,8 +217,6 @@ pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 
 	size_t shrink_amount = old_size - new_size;
 
-	*deferred_work_generated = false;
-
 	if (ehooks_split_will_fail(ehooks)) {
 		return true;
 	}
diff --git a/src/sec.c b/src/sec.c
index 0f95a0d0..d99c4439 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -148,7 +148,7 @@ sec_flush_some_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
 	}
 
 	malloc_mutex_unlock(tsdn, &shard->mtx);
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 	pai_dalloc_batch(tsdn, sec->fallback, &to_flush,
 	    &deferred_work_generated);
 }
@@ -178,7 +178,7 @@ sec_batch_fill_and_alloc(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
 
 	edata_list_active_t result;
 	edata_list_active_init(&result);
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 	size_t nalloc = pai_alloc_batch(tsdn, sec->fallback, size,
 	    1 + sec->opts.batch_fill_extra, &result, &deferred_work_generated);
 
@@ -223,7 +223,6 @@ sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
 	assert(!guarded);
 
 	sec_t *sec = (sec_t *)self;
-	*deferred_work_generated = false;
 
 	if (zero || alignment > PAGE || sec->opts.nshards == 0
 	    || size > sec->opts.max_alloc) {
@@ -291,7 +290,7 @@ sec_flush_all_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
 	 * we're disabling the HPA or resetting the arena, both of which are
 	 * rare pathways.
 	 */
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 	pai_dalloc_batch(tsdn, sec->fallback, &to_flush,
 	    &deferred_work_generated);
 }
@@ -341,7 +340,6 @@ sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 	sec_shard_t *shard = sec_shard_pick(tsdn, sec);
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	if (shard->enabled) {
-		*deferred_work_generated = false;
 		sec_shard_dalloc_and_unlock(tsdn, sec, shard, edata);
 	} else {
 		malloc_mutex_unlock(tsdn, &shard->mtx);
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 060ce3e4..bda0d46d 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -79,7 +79,7 @@ TEST_BEGIN(test_alloc_max) {
 	edata_t *edata;
 
 	/* Small max */
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX, PAGE, false, false,
 	    &deferred_work_generated);
 	expect_ptr_not_null(edata, "Allocation of small max failed");
@@ -169,7 +169,7 @@ TEST_BEGIN(test_stress) {
 	mem_tree_t tree;
 	mem_tree_new(&tree);
 
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 
 	for (size_t i = 0; i < 100 * 1000; i++) {
 		size_t operation = prng_range_zu(&prng_state, 2);
@@ -252,7 +252,7 @@ TEST_BEGIN(test_alloc_dalloc_batch) {
 	    &test_hpa_shard_opts_default);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 
 	enum {NALLOCS = 8};
 
@@ -369,7 +369,7 @@ TEST_BEGIN(test_defer_time) {
 
 	hpa_shard_t *shard = create_test_data(&hooks, &opts);
 
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 
 	nstime_init(&defer_curtime, 0);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
diff --git a/test/unit/pa.c b/test/unit/pa.c
index 10fa1b28..505b6fa9 100644
--- a/test/unit/pa.c
+++ b/test/unit/pa.c
@@ -88,7 +88,7 @@ static void *
 do_alloc_free_purge(void *arg) {
 	test_data_t *test_data = (test_data_t *)arg;
 	for (int i = 0; i < 10 * 1000; i++) {
-		bool deferred_work_generated;
+		bool deferred_work_generated = false;
 		edata_t *edata = pa_alloc(TSDN_NULL, &test_data->shard, PAGE,
 		    PAGE, /* slab */ false, /* szind */ 0, /* zero */ false,
 		    /* guarded */ false, &deferred_work_generated);
diff --git a/test/unit/sec.c b/test/unit/sec.c
index acca192d..8ac3411c 100644
--- a/test/unit/sec.c
+++ b/test/unit/sec.c
@@ -54,7 +54,6 @@ pai_test_allocator_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
     bool *deferred_work_generated) {
 	assert(!guarded);
 	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
-	*deferred_work_generated = false;
 	if (ta->alloc_fail) {
 		return NULL;
 	}
@@ -76,7 +75,6 @@ pai_test_allocator_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t nallocs, edata_list_active_t *results,
     bool *deferred_work_generated) {
 	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
-	*deferred_work_generated = false;
 	if (ta->alloc_fail) {
 		return 0;
 	}
@@ -100,7 +98,6 @@ pai_test_allocator_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
     size_t old_size, size_t new_size, bool zero,
     bool *deferred_work_generated) {
 	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
-	*deferred_work_generated = false;
 	ta->expand_count++;
 	return ta->expand_return_value;
 }
@@ -109,7 +106,6 @@ static bool
 pai_test_allocator_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
     size_t old_size, size_t new_size, bool *deferred_work_generated) {
 	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
-	*deferred_work_generated = false;
 	ta->shrink_count++;
 	return ta->shrink_return_value;
 }
@@ -118,7 +114,6 @@ static void
 pai_test_allocator_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
     bool *deferred_work_generated) {
 	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
-	*deferred_work_generated = false;
 	ta->dalloc_count++;
 	free(edata);
 }
@@ -127,7 +122,6 @@ static void
 pai_test_allocator_dalloc_batch(tsdn_t *tsdn, pai_t *self,
     edata_list_active_t *list, bool *deferred_work_generated) {
 	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
-	*deferred_work_generated = false;
 
 	edata_t *edata;
 	while ((edata = edata_list_active_first(list)) != NULL) {
@@ -179,7 +173,7 @@ TEST_BEGIN(test_reuse) {
 	enum { NALLOCS = 11 };
 	edata_t *one_page[NALLOCS];
 	edata_t *two_page[NALLOCS];
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ 2 * PAGE,
 	    /* max_bytes */ 2 * (NALLOCS * PAGE + NALLOCS * 2 * PAGE));
 	for (int i = 0; i < NALLOCS; i++) {
@@ -256,7 +250,7 @@ TEST_BEGIN(test_auto_flush) {
 	enum { NALLOCS = 10 };
 	edata_t *extra_alloc;
 	edata_t *allocs[NALLOCS];
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
 	    /* max_bytes */ NALLOCS * PAGE);
 	for (int i = 0; i < NALLOCS; i++) {
@@ -312,7 +306,7 @@ do_disable_flush_test(bool is_disable) {
 
 	enum { NALLOCS = 11 };
 	edata_t *allocs[NALLOCS];
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
 	    /* max_bytes */ NALLOCS * PAGE);
 	for (int i = 0; i < NALLOCS; i++) {
@@ -380,7 +374,7 @@ TEST_BEGIN(test_max_alloc_respected) {
 	size_t max_alloc = 2 * PAGE;
 	size_t attempted_alloc = 3 * PAGE;
 
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, max_alloc,
 	    /* max_bytes */ 1000 * PAGE);
@@ -414,7 +408,7 @@ TEST_BEGIN(test_expand_shrink_delegate) {
 	/* See the note above -- we can't use the real tsd. */
 	tsdn_t *tsdn = TSDN_NULL;
 
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ 10 * PAGE,
 	    /* max_bytes */ 1000 * PAGE);
@@ -458,7 +452,7 @@ TEST_BEGIN(test_nshards_0) {
 	opts.nshards = 0;
 	sec_init(TSDN_NULL, &sec, base, &ta.pai, &opts);
 
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 	edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
 	    /* zero */ false, /* guarded */ false,
 	    &deferred_work_generated);
@@ -495,7 +489,7 @@ TEST_BEGIN(test_stats_simple) {
 		FLUSH_PAGES = 20,
 	};
 
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
 	    /* max_bytes */ FLUSH_PAGES * PAGE);
@@ -544,7 +538,7 @@ TEST_BEGIN(test_stats_auto_flush) {
 	edata_t *extra_alloc1;
 	edata_t *allocs[2 * FLUSH_PAGES];
 
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 
 	extra_alloc0 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false,
 	    /* guarded */ false, &deferred_work_generated);
@@ -590,7 +584,7 @@ TEST_BEGIN(test_stats_manual_flush) {
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
 	    /* max_bytes */ FLUSH_PAGES * PAGE);
 
-	bool deferred_work_generated;
+	bool deferred_work_generated = false;
 	edata_t *allocs[FLUSH_PAGES];
 	for (size_t i = 0; i < FLUSH_PAGES; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,

From 8daac7958f6b9a3e10e5de83c2a1252e8977687f Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Wed, 22 Sep 2021 14:59:53 -0700
Subject: [PATCH 2125/2608] Redefine functions with test hooks only for tests

Android build has issues with these defines, this will allow the build to
succeed if it doesn't need to build the tests.
---
 include/jemalloc/internal/test_hooks.h | 23 ++++++++++++++---------
 src/prof_sys.c                         |  2 +-
 test/unit/test_hooks.c                 |  2 +-
 3 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/include/jemalloc/internal/test_hooks.h b/include/jemalloc/internal/test_hooks.h
index a6351e59..3d530b5c 100644
--- a/include/jemalloc/internal/test_hooks.h
+++ b/include/jemalloc/internal/test_hooks.h
@@ -4,16 +4,21 @@
 extern JEMALLOC_EXPORT void (*test_hooks_arena_new_hook)();
 extern JEMALLOC_EXPORT void (*test_hooks_libc_hook)();
 
-#define JEMALLOC_HOOK(fn, hook) ((void)(hook != NULL && (hook(), 0)), fn)
+#if defined(JEMALLOC_JET) || defined(JEMALLOC_UNIT_TEST)
+#  define JEMALLOC_TEST_HOOK(fn, hook) ((void)(hook != NULL && (hook(), 0)), fn)
 
-#define open JEMALLOC_HOOK(open, test_hooks_libc_hook)
-#define read JEMALLOC_HOOK(read, test_hooks_libc_hook)
-#define write JEMALLOC_HOOK(write, test_hooks_libc_hook)
-#define readlink JEMALLOC_HOOK(readlink, test_hooks_libc_hook)
-#define close JEMALLOC_HOOK(close, test_hooks_libc_hook)
-#define creat JEMALLOC_HOOK(creat, test_hooks_libc_hook)
-#define secure_getenv JEMALLOC_HOOK(secure_getenv, test_hooks_libc_hook)
+#  define open JEMALLOC_TEST_HOOK(open, test_hooks_libc_hook)
+#  define read JEMALLOC_TEST_HOOK(read, test_hooks_libc_hook)
+#  define write JEMALLOC_TEST_HOOK(write, test_hooks_libc_hook)
+#  define readlink JEMALLOC_TEST_HOOK(readlink, test_hooks_libc_hook)
+#  define close JEMALLOC_TEST_HOOK(close, test_hooks_libc_hook)
+#  define creat JEMALLOC_TEST_HOOK(creat, test_hooks_libc_hook)
+#  define secure_getenv JEMALLOC_TEST_HOOK(secure_getenv, test_hooks_libc_hook)
 /* Note that this is undef'd and re-define'd in src/prof.c. */
-#define _Unwind_Backtrace JEMALLOC_HOOK(_Unwind_Backtrace, test_hooks_libc_hook)
+#  define _Unwind_Backtrace JEMALLOC_TEST_HOOK(_Unwind_Backtrace, test_hooks_libc_hook)
+#else
+#  define JEMALLOC_TEST_HOOK(fn, hook) fn
+#endif
+
 
 #endif /* JEMALLOC_INTERNAL_TEST_HOOKS_H */
diff --git a/src/prof_sys.c b/src/prof_sys.c
index fd41e86c..b7a3a2cf 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -20,7 +20,7 @@
  */
 #undef _Unwind_Backtrace
 #include <unwind.h>
-#define _Unwind_Backtrace JEMALLOC_HOOK(_Unwind_Backtrace, test_hooks_libc_hook)
+#define _Unwind_Backtrace JEMALLOC_TEST_HOOK(_Unwind_Backtrace, test_hooks_libc_hook)
 #endif
 
 /******************************************************************************/
diff --git a/test/unit/test_hooks.c b/test/unit/test_hooks.c
index 2a5b3d52..8cd2b3bb 100644
--- a/test/unit/test_hooks.c
+++ b/test/unit/test_hooks.c
@@ -12,7 +12,7 @@ func_to_hook(int arg1, int arg2) {
 	return arg1 + arg2;
 }
 
-#define func_to_hook JEMALLOC_HOOK(func_to_hook, test_hooks_libc_hook)
+#define func_to_hook JEMALLOC_TEST_HOOK(func_to_hook, test_hooks_libc_hook)
 
 TEST_BEGIN(unhooked_call) {
 	test_hooks_libc_hook = NULL;

From 2159615419a90b5473cfd9d3a4cb4700259d8c0b Mon Sep 17 00:00:00 2001
From: Wang JinLong <wangjinlong@uniontech.com>
Date: Mon, 18 Oct 2021 09:57:27 +0800
Subject: [PATCH 2126/2608] Add new architecture loongarch.

Signed-off-by: Wang JinLong <wangjinlong@uniontech.com>
---
 include/jemalloc/internal/quantum.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/jemalloc/internal/quantum.h b/include/jemalloc/internal/quantum.h
index 760d6add..c22d753a 100644
--- a/include/jemalloc/internal/quantum.h
+++ b/include/jemalloc/internal/quantum.h
@@ -30,6 +30,9 @@
 #  ifdef __hppa__
 #    define LG_QUANTUM		4
 #  endif
+#  ifdef __loongarch__
+#    define LG_QUANTUM		4
+#  endif
 #  ifdef __m68k__
 #    define LG_QUANTUM		3
 #  endif

From 26f5257b88c925357bc524444a61049905e7bd19 Mon Sep 17 00:00:00 2001
From: Ashutosh Grewal <agrewal@fb.com>
Date: Fri, 15 Oct 2021 19:23:31 -0700
Subject: [PATCH 2127/2608] Remove declaration of an undefined function

---
 include/jemalloc/internal/pac.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index d07ccc2f..5eee3de8 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -121,8 +121,6 @@ bool pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
     edata_cache_t *edata_cache, nstime_t *cur_time, size_t oversize_threshold,
     ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms, pac_stats_t *pac_stats,
     malloc_mutex_t *stats_mtx);
-void pac_stats_merge(tsdn_t *tsdn, pac_t *pac, pac_stats_t *pac_stats_out,
-    pac_estats_t *estats_out, size_t *resident);
 
 static inline size_t
 pac_mapped(pac_t *pac) {

From 4d56aaeca5883ae5f4b5550c528503fb51fdf479 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 19 Oct 2021 17:14:08 -0700
Subject: [PATCH 2128/2608] Optimize away the tsd_fast() check on free
 fastpath.

To ensure that the free fastpath can tolerate uninitialized tsd, improved the
static initializer for rtree_ctx in tsd.
---
 include/jemalloc/internal/emap.h         |  6 ++++--
 include/jemalloc/internal/rtree.h        |  3 ---
 include/jemalloc/internal/rtree_tsd.h    | 26 +++++++++++++++++------
 include/jemalloc/internal/thread_event.h | 15 ++++---------
 include/jemalloc/internal/tsd.h          |  2 +-
 src/jemalloc.c                           | 27 ++++++++++++------------
 test/unit/tsd.c                          |  7 ++++++
 7 files changed, 48 insertions(+), 38 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index a40b504b..87ece63d 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -276,12 +276,14 @@ emap_full_alloc_ctx_try_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
 }
 
 /*
- * Returns true on error.
+ * Only used on the fastpath of free.  Returns true when cannot be fulfilled by
+ * fast path, e.g. when the metadata key is not cached.
  */
 JEMALLOC_ALWAYS_INLINE bool
 emap_alloc_ctx_try_lookup_fast(tsd_t *tsd, emap_t *emap, const void *ptr,
     emap_alloc_ctx_t *alloc_ctx) {
-	rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd);
+	/* Use the unsafe getter since this may gets called during exit. */
+	rtree_ctx_t *rtree_ctx = tsd_rtree_ctxp_get_unsafe(tsd);
 
 	rtree_metadata_t metadata;
 	bool err = rtree_metadata_try_read_fast(tsd_tsdn(tsd), &emap->rtree,
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index c5f0d8c4..b4f44840 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -35,9 +35,6 @@
 #  define RTREE_LEAF_COMPACT
 #endif
 
-/* Needed for initialization only. */
-#define RTREE_LEAFKEY_INVALID ((uintptr_t)1)
-
 typedef struct rtree_node_elm_s rtree_node_elm_t;
 struct rtree_node_elm_s {
 	atomic_p_t	child; /* (rtree_{node,leaf}_elm_t *) */
diff --git a/include/jemalloc/internal/rtree_tsd.h b/include/jemalloc/internal/rtree_tsd.h
index 562e2929..e45525c5 100644
--- a/include/jemalloc/internal/rtree_tsd.h
+++ b/include/jemalloc/internal/rtree_tsd.h
@@ -18,16 +18,28 @@
  * cache misses if made overly large, plus the cost of linear search in the LRU
  * cache.
  */
-#define RTREE_CTX_LG_NCACHE 4
-#define RTREE_CTX_NCACHE (1 << RTREE_CTX_LG_NCACHE)
+#define RTREE_CTX_NCACHE 16
 #define RTREE_CTX_NCACHE_L2 8
 
-/*
- * Zero initializer required for tsd initialization only.  Proper initialization
- * done via rtree_ctx_data_init().
- */
-#define RTREE_CTX_ZERO_INITIALIZER {{{0, 0}}, {{0, 0}}}
+/* Needed for initialization only. */
+#define RTREE_LEAFKEY_INVALID ((uintptr_t)1)
+#define RTREE_CTX_CACHE_ELM_INVALID {RTREE_LEAFKEY_INVALID, NULL}
 
+#define RTREE_CTX_INIT_ELM_1 RTREE_CTX_CACHE_ELM_INVALID
+#define RTREE_CTX_INIT_ELM_2 RTREE_CTX_INIT_ELM_1, RTREE_CTX_INIT_ELM_1
+#define RTREE_CTX_INIT_ELM_4 RTREE_CTX_INIT_ELM_2, RTREE_CTX_INIT_ELM_2
+#define RTREE_CTX_INIT_ELM_8 RTREE_CTX_INIT_ELM_4, RTREE_CTX_INIT_ELM_4
+#define RTREE_CTX_INIT_ELM_16 RTREE_CTX_INIT_ELM_8, RTREE_CTX_INIT_ELM_8
+
+#define _RTREE_CTX_INIT_ELM_DATA(n) RTREE_CTX_INIT_ELM_##n
+#define RTREE_CTX_INIT_ELM_DATA(n) _RTREE_CTX_INIT_ELM_DATA(n)
+
+/*
+ * Static initializer (to invalidate the cache entries) is required because the
+ * free fastpath may access the rtree cache before a full tsd initialization.
+ */
+#define RTREE_CTX_INITIALIZER {{RTREE_CTX_INIT_ELM_DATA(RTREE_CTX_NCACHE)}, \
+			       {RTREE_CTX_INIT_ELM_DATA(RTREE_CTX_NCACHE_L2)}}
 
 typedef struct rtree_leaf_elm_s rtree_leaf_elm_t;
 
diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index 525019b6..2f4e1b39 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -118,17 +118,10 @@ te_malloc_fastpath_ctx(tsd_t *tsd, uint64_t *allocated, uint64_t *threshold) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-te_free_fastpath_ctx(tsd_t *tsd, uint64_t *deallocated, uint64_t *threshold,
-    bool size_hint) {
-	if (!size_hint) {
-		*deallocated = tsd_thread_deallocated_get(tsd);
-		*threshold = tsd_thread_deallocated_next_event_fast_get(tsd);
-	} else {
-		/* Unsafe getters since this may happen before tsd_init. */
-		*deallocated = *tsd_thread_deallocatedp_get_unsafe(tsd);
-		*threshold =
-		    *tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd);
-	}
+te_free_fastpath_ctx(tsd_t *tsd, uint64_t *deallocated, uint64_t *threshold) {
+	/* Unsafe getters since this may happen before tsd_init. */
+	*deallocated = *tsd_thread_deallocatedp_get_unsafe(tsd);
+	*threshold = *tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd);
 	assert(*threshold <= TE_NEXT_EVENT_FAST_MAX);
 }
 
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 86d52778..0a46d448 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -119,7 +119,7 @@ typedef ql_elm(tsd_t) tsd_link_t;
     /* activity_callback_thunk */					\
 	ACTIVITY_CALLBACK_THUNK_INITIALIZER,				\
     /* tcache_slow */		TCACHE_SLOW_ZERO_INITIALIZER,		\
-    /* rtree_ctx */		RTREE_CTX_ZERO_INITIALIZER,
+    /* rtree_ctx */		RTREE_CTX_INITIALIZER,
 
 /*  O(name,			type,			nullable type) */
 #define TSD_DATA_FAST							\
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 469a4910..0c798c87 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2910,12 +2910,20 @@ free_default(void *ptr) {
 JEMALLOC_ALWAYS_INLINE
 bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 	tsd_t *tsd = tsd_get(false);
+	/* The branch gets optimized away unless tsd_get_allocates(). */
+	if (unlikely(tsd == NULL)) {
+		return false;
+	}
+	/*
+	 *  The tsd_fast() / initialized checks are folded into the branch
+	 *  testing (deallocated_after >= threshold) later in this function.
+	 *  The threshold will be set to 0 when !tsd_fast.
+	 */
+	assert(tsd_fast(tsd) ||
+	    *tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd) == 0);
 
 	emap_alloc_ctx_t alloc_ctx;
 	if (!size_hint) {
-		if (unlikely(tsd == NULL || !tsd_fast(tsd))) {
-			return false;
-		}
 		bool err = emap_alloc_ctx_try_lookup_fast(tsd,
 		    &arena_emap_global, ptr, &alloc_ctx);
 
@@ -2925,15 +2933,6 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 		}
 		assert(alloc_ctx.szind != SC_NSIZES);
 	} else {
-		/*
-		 * The size hinted fastpath does not involve rtree lookup, thus
-		 * can tolerate an uninitialized tsd.  This allows the tsd_fast
-		 * check to be folded into the branch testing fast_threshold
-		 * (set to 0 when !tsd_fast).
-		 */
-		if (unlikely(tsd == NULL)) {
-			return false;
-		}
 		/*
 		 * Check for both sizes that are too large, and for sampled
 		 * objects.  Sampled objects are always page-aligned.  The
@@ -2949,7 +2948,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 	}
 
 	uint64_t deallocated, threshold;
-	te_free_fastpath_ctx(tsd, &deallocated, &threshold, size_hint);
+	te_free_fastpath_ctx(tsd, &deallocated, &threshold);
 
 	size_t usize = sz_index2size(alloc_ctx.szind);
 	uint64_t deallocated_after = deallocated + usize;
@@ -2963,7 +2962,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 	if (unlikely(deallocated_after >= threshold)) {
 		return false;
 	}
-
+	assert(tsd_fast(tsd));
 	bool fail = maybe_check_alloc_ctx(tsd, ptr, &alloc_ctx);
 	if (fail) {
 		/* See the comment in isfree. */
diff --git a/test/unit/tsd.c b/test/unit/tsd.c
index 3f3ca73d..205d8708 100644
--- a/test/unit/tsd.c
+++ b/test/unit/tsd.c
@@ -48,6 +48,13 @@ thd_start(void *arg) {
 	int d = (int)(uintptr_t)arg;
 	void *p;
 
+	/*
+	 * Test free before tsd init -- the free fast path (which does not
+	 * explicitly check for NULL) has to tolerate this case, and fall back
+	 * to free_default.
+	 */
+	free(NULL);
+
 	tsd_t *tsd = tsd_fetch();
 	expect_x_eq(tsd_test_data_get(tsd), MALLOC_TSD_TEST_DATA_INIT,
 	    "Initial tsd get should return initialization value");

From b6a7a535b32a3298db5b3518bc1f52fccc1597a6 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 20 Oct 2021 14:17:57 -0700
Subject: [PATCH 2129/2608] Optimize away a branch on the free fastpath.

On the rtree metadata lookup fast path, there will never be a NULL returned when
the cache key matches (which is unknown to the compiler).  The previous logic
was checking for NULL return value, resulting in the extra branch (in addition to
the cache key match checking).  Make the lookup_fast return a bool to indicate
cache miss / match, so that the extra branch is avoided.
---
 include/jemalloc/internal/rtree.h | 43 +++++++++++++++++--------------
 1 file changed, 24 insertions(+), 19 deletions(-)

diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index b4f44840..a00adb29 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -330,28 +330,27 @@ rtree_leaf_elm_state_update(tsdn_t *tsdn, rtree_t *rtree,
 }
 
 /*
- * Tries to look up the key in the L1 cache, returning it if there's a hit, or
- * NULL if there's a miss.
- * Key is allowed to be NULL; returns NULL in this case.
+ * Tries to look up the key in the L1 cache, returning false if there's a hit, or
+ * true if there's a miss.
+ * Key is allowed to be NULL; returns true in this case.
  */
-JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_t *
+JEMALLOC_ALWAYS_INLINE bool
 rtree_leaf_elm_lookup_fast(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
-    uintptr_t key) {
-	rtree_leaf_elm_t *elm;
-
+    uintptr_t key, rtree_leaf_elm_t **elm) {
 	size_t slot = rtree_cache_direct_map(key);
 	uintptr_t leafkey = rtree_leafkey(key);
 	assert(leafkey != RTREE_LEAFKEY_INVALID);
 
-	if (likely(rtree_ctx->cache[slot].leafkey == leafkey)) {
-		rtree_leaf_elm_t *leaf = rtree_ctx->cache[slot].leaf;
-		assert(leaf != NULL);
-		uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1);
-		elm = &leaf[subkey];
-		return elm;
-	} else {
-		return NULL;
+	if (unlikely(rtree_ctx->cache[slot].leafkey != leafkey)) {
+		return true;
 	}
+
+	rtree_leaf_elm_t *leaf = rtree_ctx->cache[slot].leaf;
+	assert(leaf != NULL);
+	uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1);
+	*elm = &leaf[subkey];
+
+	return false;
 }
 
 JEMALLOC_ALWAYS_INLINE rtree_leaf_elm_t *
@@ -449,16 +448,22 @@ rtree_metadata_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 }
 
 /*
- * Returns true on error.
+ * Returns true when the request cannot be fulfilled by fastpath.
  */
 static inline bool
 rtree_metadata_try_read_fast(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key, rtree_metadata_t *r_rtree_metadata) {
-	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup_fast(tsdn, rtree, rtree_ctx,
-	    key);
-	if (elm == NULL) {
+	rtree_leaf_elm_t *elm;
+	/*
+	 * Should check the bool return value (lookup success or not) instead of
+	 * elm == NULL (which will result in an extra branch).  This is because
+	 * when the cache lookup succeeds, there will never be a NULL pointer
+	 * returned (which is unknown to the compiler).
+	 */
+	if (rtree_leaf_elm_lookup_fast(tsdn, rtree, rtree_ctx, key, &elm)) {
 		return true;
 	}
+	assert(elm != NULL);
 	*r_rtree_metadata = rtree_leaf_elm_read(tsdn, rtree, elm,
 	    /* dependent */ true).metadata;
 	return false;

From 6cb585b13ad196ca2e4588ce984c269f3fdb4cea Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Tue, 2 Nov 2021 15:56:36 -0700
Subject: [PATCH 2130/2608] San: Unguard guarded slabs during arena destruction

When opt_retain is on, slab extents remain guarded in all states, even
retained. This works well if arena is never destroyed, because we
anticipate those slabs will be eventually reused. But if the arena is
destroyed, the slabs must be unguarded to prevent leaking guard pages.
---
 include/jemalloc/internal/guard.h |  9 +++++++-
 src/extent.c                      |  8 ++++---
 src/guard.c                       | 35 +++++++++++++++++++++++++------
 3 files changed, 42 insertions(+), 10 deletions(-)

diff --git a/include/jemalloc/internal/guard.h b/include/jemalloc/internal/guard.h
index 31f98c5f..8e578168 100644
--- a/include/jemalloc/internal/guard.h
+++ b/include/jemalloc/internal/guard.h
@@ -14,7 +14,14 @@ extern size_t opt_san_guard_large;
 extern size_t opt_san_guard_small;
 
 void guard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap);
-void unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap);
+void unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    emap_t *emap);
+/*
+ * Unguard the extent, but don't modify emap boundaries. Must be called on an
+ * extent that has been erased from emap and shouldn't be placed back.
+ */
+void unguard_pages_pre_destroy(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    emap_t *emap);
 void tsd_san_init(tsd_t *tsd);
 
 static inline bool
diff --git a/src/extent.c b/src/extent.c
index 84ecd6b2..a79e1c72 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1057,12 +1057,14 @@ extent_destroy_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata) {
 	assert(edata_base_get(edata) != NULL);
 	assert(edata_size_get(edata) != 0);
+	assert(edata_state_get(edata) == extent_state_retained);
+	assert(emap_edata_is_acquired(tsdn, pac->emap, edata));
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	/* Deregister first to avoid a race with other allocating threads. */
-	extent_deregister(tsdn, pac, edata);
-
+	if (edata_guarded_get(edata)) {
+		unguard_pages_pre_destroy(tsdn, ehooks, edata, pac->emap);
+	}
 	edata_addr_set(edata, edata_base_get(edata));
 
 	/* Try to destroy; silently fail otherwise. */
diff --git a/src/guard.c b/src/guard.c
index 07232199..4dadc970 100644
--- a/src/guard.c
+++ b/src/guard.c
@@ -32,10 +32,16 @@ guard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap) {
 	/* The new boundary will be registered on the pa_alloc path. */
 }
 
-void
-unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap) {
+static void
+unguard_pages_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap,
+    bool reg_emap) {
 	/* Remove the inner boundary which no longer exists. */
-	emap_deregister_boundary(tsdn, emap, edata);
+	if (reg_emap) {
+		assert(edata_state_get(edata) == extent_state_active);
+		emap_deregister_boundary(tsdn, emap, edata);
+	} else {
+		assert(edata_state_get(edata) == extent_state_retained);
+	}
 
 	size_t size = edata_size_get(edata);
 	size_t size_with_guards = size + PAGE_GUARDS_SIZE;
@@ -44,7 +50,6 @@ unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap) {
 	uintptr_t guard1 = addr - PAGE;
 	uintptr_t guard2 = addr + size;
 
-	assert(edata_state_get(edata) == extent_state_active);
 	ehooks_unguard(tsdn, ehooks, (void *)guard1, (void *)guard2);
 
 	/* Update the true addr and usable size of the edata. */
@@ -52,8 +57,26 @@ unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap) {
 	edata_addr_set(edata, (void *)guard1);
 	edata_guarded_set(edata, false);
 
-	/* Then re-register the outer boundary including the guards. */
-	emap_register_boundary(tsdn, emap, edata, SC_NSIZES, /* slab */ false);
+	/*
+	 * Then re-register the outer boundary including the guards, if
+	 * requested.
+	 */
+	if (reg_emap) {
+		emap_register_boundary(tsdn, emap, edata, SC_NSIZES,
+		    /* slab */ false);
+	}
+}
+
+void
+unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap) {
+	unguard_pages_impl(tsdn, ehooks, edata, emap, /* reg_emap */ true);
+}
+
+void
+unguard_pages_pre_destroy(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    emap_t *emap) {
+	emap_assert_not_mapped(tsdn, emap, edata);
+	unguard_pages_impl(tsdn, ehooks, edata, emap, /* reg_emap */ false);
 }
 
 void

From 37342a4d32797fdc029dde296cbef618c849608b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 4 Nov 2021 16:39:06 -0700
Subject: [PATCH 2131/2608] Add ctl interface for experimental_infallible_new.

---
 src/ctl.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/ctl.c b/src/ctl.c
index 491a333b..eccb9589 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -123,6 +123,7 @@ CTL_PROTO(opt_junk)
 CTL_PROTO(opt_zero)
 CTL_PROTO(opt_utrace)
 CTL_PROTO(opt_xmalloc)
+CTL_PROTO(opt_experimental_infallible_new)
 CTL_PROTO(opt_tcache)
 CTL_PROTO(opt_tcache_max)
 CTL_PROTO(opt_tcache_nslots_small_min)
@@ -439,6 +440,8 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("zero"),		CTL(opt_zero)},
 	{NAME("utrace"),	CTL(opt_utrace)},
 	{NAME("xmalloc"),	CTL(opt_xmalloc)},
+	{NAME("experimental_infallible_new"),
+		CTL(opt_experimental_infallible_new)},
 	{NAME("tcache"),	CTL(opt_tcache)},
 	{NAME("tcache_max"),	CTL(opt_tcache_max)},
 	{NAME("tcache_nslots_small_min"),
@@ -2161,6 +2164,8 @@ CTL_RO_NL_CGEN(config_fill, opt_junk, opt_junk, const char *)
 CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool)
 CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool)
 CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool)
+CTL_RO_NL_CGEN(config_enable_cxx, opt_experimental_infallible_new,
+    opt_experimental_infallible_new, bool)
 CTL_RO_NL_GEN(opt_tcache, opt_tcache, bool)
 CTL_RO_NL_GEN(opt_tcache_max, opt_tcache_max, size_t)
 CTL_RO_NL_GEN(opt_tcache_nslots_small_min, opt_tcache_nslots_small_min,

From 6bdb4f5ab0358d0b4c53b2d18ec9422526042413 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 11 Nov 2021 20:35:37 -0800
Subject: [PATCH 2132/2608] Check prof_active in addtion to opt_prof during
 batch_alloc().

---
 src/jemalloc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 0c798c87..a9d7c166 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -4000,6 +4000,7 @@ batch_alloc(void **ptrs, size_t num, size_t size, int flags) {
 		size_t batch = num - filled;
 		size_t surplus = SIZE_MAX; /* Dead store. */
 		bool prof_sample_event = config_prof && opt_prof
+		    && prof_active_get_unlocked()
 		    && te_prof_sample_event_lookahead_surplus(tsd,
 		    batch * usize, &surplus);
 

From 8b81d3f214cc9ef86210d731803fe39f2f3d54d9 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 15 Nov 2021 15:30:54 -0800
Subject: [PATCH 2133/2608] Fix the initialization of last_event in thread
 event init.

The event counters maintain a relationship with the current bytes: last_event <=
current < next_event.  When a reinit happens (e.g. reincarnated tsd), the last
event needs progressing because all events start fresh from the current bytes.
---
 src/thread_event.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/src/thread_event.c b/src/thread_event.c
index bb91baa7..37eb5827 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -221,7 +221,13 @@ te_recompute_fast_threshold(tsd_t *tsd) {
 static void
 te_adjust_thresholds_helper(tsd_t *tsd, te_ctx_t *ctx,
     uint64_t wait) {
+	/*
+	 * The next threshold based on future events can only be adjusted after
+	 * progressing the last_event counter (which is set to current).
+	 */
+	assert(te_ctx_current_bytes_get(ctx) == te_ctx_last_event_get(ctx));
 	assert(wait <= TE_MAX_START_WAIT);
+
 	uint64_t next_event = te_ctx_last_event_get(ctx) + (wait <=
 	    TE_MAX_INTERVAL ? wait : TE_MAX_INTERVAL);
 	te_ctx_next_event_set(tsd, ctx, next_event);
@@ -298,6 +304,19 @@ te_event_trigger(tsd_t *tsd, te_ctx_t *ctx) {
 
 static void
 te_init(tsd_t *tsd, bool is_alloc) {
+	te_ctx_t ctx;
+	te_ctx_get(tsd, &ctx, is_alloc);
+	/*
+	 * Reset the last event to current, which starts the events from a clean
+	 * state.  This is necessary when re-init the tsd event counters.
+	 *
+	 * The event counters maintain a relationship with the current bytes:
+	 * last_event <= current < next_event.  When a reinit happens (e.g.
+	 * reincarnated tsd), the last event needs progressing because all
+	 * events start fresh from the current bytes.
+	 */
+	te_ctx_last_event_set(&ctx, te_ctx_current_bytes_get(&ctx));
+
 	uint64_t wait = TE_MAX_START_WAIT;
 #define E(event, condition, alloc_event)				\
 	if (is_alloc == alloc_event && condition) {			\
@@ -311,8 +330,6 @@ te_init(tsd_t *tsd, bool is_alloc) {
 
 	ITERATE_OVER_ALL_EVENTS
 #undef E
-	te_ctx_t ctx;
-	te_ctx_get(tsd, &ctx, is_alloc);
 	te_adjust_thresholds_helper(tsd, &ctx, wait);
 }
 

From 400c59895a744068994025cf33f80b56bc960a35 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 16 Nov 2021 12:40:34 -0800
Subject: [PATCH 2134/2608] Fix uninitialized nstime reading / updating on the
 stack in hpa.

In order for nstime_update to handle non-monotonic clocks, it requires the input
nstime to be initialized -- when reading for the first time, zero init has to be
done.  Otherwise random stack value may be seen as clocks and returned.
---
 include/jemalloc/internal/hpa_hooks.h |  2 +-
 src/hpa.c                             | 13 ++++++++-----
 src/hpa_hooks.c                       |  7 +++++--
 test/unit/hpa.c                       |  2 +-
 4 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/hpa_hooks.h b/include/jemalloc/internal/hpa_hooks.h
index 3e21d855..12e6b972 100644
--- a/include/jemalloc/internal/hpa_hooks.h
+++ b/include/jemalloc/internal/hpa_hooks.h
@@ -8,7 +8,7 @@ struct hpa_hooks_s {
 	void (*purge)(void *ptr, size_t size);
 	void (*hugify)(void *ptr, size_t size);
 	void (*dehugify)(void *ptr, size_t size);
-	void (*curtime)(nstime_t *r_time);
+	void (*curtime)(nstime_t *r_time, bool first_reading);
 };
 
 extern hpa_hooks_t hpa_hooks_default;
diff --git a/src/hpa.c b/src/hpa.c
index 5251655c..b2628dbf 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -347,7 +347,7 @@ hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard,
 	if (hpa_good_hugification_candidate(shard, ps)
 	    && !hpdata_huge_get(ps)) {
 		nstime_t now;
-		shard->central->hooks.curtime(&now);
+		shard->central->hooks.curtime(&now, /* first_reading */ true);
 		hpdata_allow_hugify(ps, now);
 	}
 	/*
@@ -437,7 +437,8 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	shard->npending_purge -= num_to_purge;
 	shard->stats.npurge_passes++;
 	shard->stats.npurges += purges_this_pass;
-	shard->central->hooks.curtime(&shard->last_purge);
+	shard->central->hooks.curtime(&shard->last_purge,
+	    /* first_reading */ false);
 	if (dehugify) {
 		shard->stats.ndehugifies++;
 	}
@@ -477,7 +478,7 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 	/* Make sure that it's been hugifiable for long enough. */
 	nstime_t time_hugify_allowed = hpdata_time_hugify_allowed(to_hugify);
 	nstime_t nstime;
-	shard->central->hooks.curtime(&nstime);
+	shard->central->hooks.curtime(&nstime, /* first_reading */ true);
 	nstime_subtract(&nstime, &time_hugify_allowed);
 	uint64_t millis = nstime_msec(&nstime);
 	if (millis < shard->opts.hugify_delay_ms) {
@@ -895,7 +896,8 @@ hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
 		nstime_t time_hugify_allowed =
 		    hpdata_time_hugify_allowed(to_hugify);
 		nstime_t nstime;
-		shard->central->hooks.curtime(&nstime);
+		shard->central->hooks.curtime(&nstime,
+		    /* first_reading */ true);
 		nstime_subtract(&nstime, &time_hugify_allowed);
 		uint64_t since_hugify_allowed_ms = nstime_msec(&nstime);
 		/*
@@ -921,7 +923,8 @@ hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
 			return BACKGROUND_THREAD_DEFERRED_MIN;
 		}
 		nstime_t nstime;
-		shard->central->hooks.curtime(&nstime);
+		shard->central->hooks.curtime(&nstime,
+		    /* first_reading */ true);
 		nstime_subtract(&nstime, &shard->last_purge);
 		uint64_t since_last_purge_ms = nstime_msec(&nstime);
 
diff --git a/src/hpa_hooks.c b/src/hpa_hooks.c
index 6f377613..116592f2 100644
--- a/src/hpa_hooks.c
+++ b/src/hpa_hooks.c
@@ -8,7 +8,7 @@ static void hpa_hooks_unmap(void *ptr, size_t size);
 static void hpa_hooks_purge(void *ptr, size_t size);
 static void hpa_hooks_hugify(void *ptr, size_t size);
 static void hpa_hooks_dehugify(void *ptr, size_t size);
-static void hpa_hooks_curtime(nstime_t *r_nstime);
+static void hpa_hooks_curtime(nstime_t *r_nstime, bool first_reading);
 
 hpa_hooks_t hpa_hooks_default = {
 	&hpa_hooks_map,
@@ -48,6 +48,9 @@ hpa_hooks_dehugify(void *ptr, size_t size) {
 }
 
 static void
-hpa_hooks_curtime(nstime_t *r_nstime) {
+hpa_hooks_curtime(nstime_t *r_nstime, bool first_reading) {
+	if (first_reading) {
+		nstime_init_zero(r_nstime);
+	}
 	nstime_update(r_nstime);
 }
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index bda0d46d..a63d51d4 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -349,7 +349,7 @@ defer_test_dehugify(void *ptr, size_t size) {
 
 static nstime_t defer_curtime;
 static void
-defer_test_curtime(nstime_t *r_time) {
+defer_test_curtime(nstime_t *r_time, bool first_reading) {
 	*r_time = defer_curtime;
 }
 

From cdabe908d05ba68da248edf1dd9f522af1ec6024 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 16 Nov 2021 14:51:07 -0800
Subject: [PATCH 2135/2608] Track the initialized state of nstime_t on debug
 build.

Some nstime_t operations require and assume the input nstime is initialized
(e.g. nstime_update) -- uninitialized input may cause silent failures which is
difficult to reproduce / debug.  Add an explicit flag to track the state
(limited to debug build only).

Also fixed an use case in hpa (time of last_purge).
---
 include/jemalloc/internal/nstime.h | 11 +++++-
 src/hpa.c                          |  1 +
 src/nstime.c                       | 62 +++++++++++++++++++++++++++++-
 3 files changed, 72 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h
index 76e4351a..e8315db1 100644
--- a/include/jemalloc/internal/nstime.h
+++ b/include/jemalloc/internal/nstime.h
@@ -3,10 +3,19 @@
 
 /* Maximum supported number of seconds (~584 years). */
 #define NSTIME_SEC_MAX KQU(18446744072)
-#define NSTIME_ZERO_INITIALIZER {0}
+
+#define NSTIME_MAGIC ((uint32_t)0xb8a9ce37)
+#ifdef JEMALLOC_DEBUG
+#  define NSTIME_ZERO_INITIALIZER {0, NSTIME_MAGIC}
+#else
+#  define NSTIME_ZERO_INITIALIZER {0}
+#endif
 
 typedef struct {
 	uint64_t ns;
+#ifdef JEMALLOC_DEBUG
+	uint32_t magic; /* Tracks if initialized. */
+#endif
 } nstime_t;
 
 static const nstime_t zero = NSTIME_ZERO_INITIALIZER;
diff --git a/src/hpa.c b/src/hpa.c
index b2628dbf..caf122b7 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -203,6 +203,7 @@ hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
 	shard->opts = *opts;
 
 	shard->npending_purge = 0;
+	nstime_init_zero(&shard->last_purge);
 
 	shard->stats.npurge_passes = 0;
 	shard->stats.npurges = 0;
diff --git a/src/nstime.c b/src/nstime.c
index 184aa4c9..44419d2c 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -8,93 +8,153 @@
 #define BILLION	UINT64_C(1000000000)
 #define MILLION	UINT64_C(1000000)
 
+static void
+nstime_set_initialized(nstime_t *time) {
+#ifdef JEMALLOC_DEBUG
+	time->magic = NSTIME_MAGIC;
+#endif
+}
+
+static void
+nstime_assert_initialized(const nstime_t *time) {
+#ifdef JEMALLOC_DEBUG
+	/*
+	 * Some parts (e.g. stats) rely on memset to zero initialize.  Treat
+	 * these as valid initialization.
+	 */
+	assert(time->magic == NSTIME_MAGIC ||
+	    (time->magic == 0 && time->ns == 0));
+#endif
+}
+
+static void
+nstime_pair_assert_initialized(const nstime_t *t1, const nstime_t *t2) {
+	nstime_assert_initialized(t1);
+	nstime_assert_initialized(t2);
+}
+
+static void
+nstime_initialize_operand(nstime_t *time) {
+	/*
+	 * Operations like nstime_add may have the initial operand being zero
+	 * initialized (covered by the assert below).  Full-initialize needed
+	 * before changing it to non-zero.
+	 */
+	nstime_assert_initialized(time);
+	nstime_set_initialized(time);
+}
+
 void
 nstime_init(nstime_t *time, uint64_t ns) {
+	nstime_set_initialized(time);
 	time->ns = ns;
 }
 
 void
 nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec) {
+	nstime_set_initialized(time);
 	time->ns = sec * BILLION + nsec;
 }
 
 uint64_t
 nstime_ns(const nstime_t *time) {
+	nstime_assert_initialized(time);
 	return time->ns;
 }
 
 uint64_t
 nstime_msec(const nstime_t *time) {
+	nstime_assert_initialized(time);
 	return time->ns / MILLION;
 }
 
 uint64_t
 nstime_sec(const nstime_t *time) {
+	nstime_assert_initialized(time);
 	return time->ns / BILLION;
 }
 
 uint64_t
 nstime_nsec(const nstime_t *time) {
+	nstime_assert_initialized(time);
 	return time->ns % BILLION;
 }
 
 void
 nstime_copy(nstime_t *time, const nstime_t *source) {
+	/* Source is required to be initialized. */
+	nstime_assert_initialized(source);
 	*time = *source;
+	nstime_assert_initialized(time);
 }
 
 int
 nstime_compare(const nstime_t *a, const nstime_t *b) {
+	nstime_pair_assert_initialized(a, b);
 	return (a->ns > b->ns) - (a->ns < b->ns);
 }
 
 void
 nstime_add(nstime_t *time, const nstime_t *addend) {
+	nstime_pair_assert_initialized(time, addend);
 	assert(UINT64_MAX - time->ns >= addend->ns);
 
+	nstime_initialize_operand(time);
 	time->ns += addend->ns;
 }
 
 void
 nstime_iadd(nstime_t *time, uint64_t addend) {
+	nstime_assert_initialized(time);
 	assert(UINT64_MAX - time->ns >= addend);
 
+	nstime_initialize_operand(time);
 	time->ns += addend;
 }
 
 void
 nstime_subtract(nstime_t *time, const nstime_t *subtrahend) {
+	nstime_pair_assert_initialized(time, subtrahend);
 	assert(nstime_compare(time, subtrahend) >= 0);
 
+	/* No initialize operand -- subtraction must be initialized. */
 	time->ns -= subtrahend->ns;
 }
 
 void
 nstime_isubtract(nstime_t *time, uint64_t subtrahend) {
+	nstime_assert_initialized(time);
 	assert(time->ns >= subtrahend);
 
+	/* No initialize operand -- subtraction must be initialized. */
 	time->ns -= subtrahend;
 }
 
 void
 nstime_imultiply(nstime_t *time, uint64_t multiplier) {
+	nstime_assert_initialized(time);
 	assert((((time->ns | multiplier) & (UINT64_MAX << (sizeof(uint64_t) <<
 	    2))) == 0) || ((time->ns * multiplier) / multiplier == time->ns));
 
+	nstime_initialize_operand(time);
 	time->ns *= multiplier;
 }
 
 void
 nstime_idivide(nstime_t *time, uint64_t divisor) {
+	nstime_assert_initialized(time);
 	assert(divisor != 0);
 
+	nstime_initialize_operand(time);
 	time->ns /= divisor;
 }
 
 uint64_t
 nstime_divide(const nstime_t *time, const nstime_t *divisor) {
+	nstime_pair_assert_initialized(time, divisor);
 	assert(divisor->ns != 0);
 
+	/* No initialize operand -- *time itself remains unchanged. */
 	return time->ns / divisor->ns;
 }
 
@@ -192,7 +252,7 @@ nstime_update_impl(nstime_t *time) {
 	nstime_t old_time;
 
 	nstime_copy(&old_time, time);
-  nstime_get(time);
+	nstime_get(time);
 
 	/* Handle non-monotonic clocks. */
 	if (unlikely(nstime_compare(&old_time, time) > 0)) {

From 3b3257a7092f447fa6c9a3a7305cb346dfb37841 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Mon, 22 Nov 2021 18:42:05 -0800
Subject: [PATCH 2136/2608] Correct opt.prof_leak documentation

The option has been misleading, because it stays disabled unless
prof_final is also specified. In practice it's impossible to detect that
the option is silently disabled, because it just doesn't provide any
output as if there are no memory leaks detected.
---
 doc/jemalloc.xml.in | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index b8b96abe..cba0b3f6 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1547,8 +1547,10 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         <manvolnum>3</manvolnum></citerefentry> function to report memory leaks
         detected by allocation sampling.  See the
         <link linkend="opt.prof"><mallctl>opt.prof</mallctl></link> option for
-        information on analyzing heap profile output.  This option is disabled
-        by default.</para></listitem>
+        information on analyzing heap profile output.  Works only when combined
+        with <link linkend="opt.prof_final"><mallctl>opt.prof_final</mallctl>
+        </link>, otherwise does nothing.  This option is disabled by default.
+        </para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.zero_realloc">

From 113e8e68e1932065125acf66fa087a2e6e11b509 Mon Sep 17 00:00:00 2001
From: David CARLIER <devnexen@gmail.com>
Date: Thu, 2 Dec 2021 16:40:05 +0000
Subject: [PATCH 2137/2608] freebsd 14 build fix proposal.

seems to have introduced finally more linux api cpu affinity (sched_* family)
compatibility detected at configure time thus adjusting accordingly.
---
 configure.ac                                        | 1 +
 include/jemalloc/internal/jemalloc_internal_decls.h | 4 ++++
 2 files changed, 5 insertions(+)

diff --git a/configure.ac b/configure.ac
index 7a49e84f..22900ec0 100644
--- a/configure.ac
+++ b/configure.ac
@@ -652,6 +652,7 @@ case "${host}" in
 	SYM_PREFIX="_"
 	;;
   *-*-freebsd*)
+	JE_APPEND_VS(CPPFLAGS, -D_BSD_SOURCE)
 	abi="elf"
 	AC_DEFINE([JEMALLOC_SYSCTL_VM_OVERCOMMIT], [ ])
 	force_lazy_lock="1"
diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
index 7d212c4e..983027c8 100644
--- a/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -34,6 +34,10 @@
 #  include <pthread.h>
 #  if defined(__FreeBSD__) || defined(__DragonFly__)
 #  include <pthread_np.h>
+#  include <sched.h>
+#  if defined(__FreeBSD__)
+#    define cpu_set_t cpuset_t
+#  endif
 #  endif
 #  include <signal.h>
 #  ifdef JEMALLOC_OS_UNFAIR_LOCK

From af6ee27c0d6a87d0274b9e83a55f78176ab95da4 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 3 Dec 2021 12:06:16 -0800
Subject: [PATCH 2138/2608] Enforce abort_conf:true when malloc_conf is not
 fully recognized.

Ensures the malloc_conf "ends with key", "ends with comma" and "malform conf
string" cases abort under abort_conf:true.
---
 src/jemalloc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index a9d7c166..a7d43dc1 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -845,10 +845,12 @@ malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
 			if (opts != *opts_p) {
 				malloc_write("<jemalloc>: Conf string ends "
 				    "with key\n");
+				had_conf_error = true;
 			}
 			return true;
 		default:
 			malloc_write("<jemalloc>: Malformed conf string\n");
+			had_conf_error = true;
 			return true;
 		}
 	}
@@ -867,6 +869,7 @@ malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
 			if (*opts == '\0') {
 				malloc_write("<jemalloc>: Conf string ends "
 				    "with comma\n");
+				had_conf_error = true;
 			}
 			*vlen_p = (uintptr_t)opts - 1 - (uintptr_t)*v_p;
 			accept = true;

From 7dcf77809c9886e3892e29954d90b838af1292c3 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 30 Nov 2021 15:58:03 -0800
Subject: [PATCH 2139/2608] Mark slab as true on sized dealloc fast path.

For sized dealloc, fastpath only handles lookup-able sizes, which must be slabs.
---
 include/jemalloc/internal/sc.h | 11 ++++++++---
 src/jemalloc.c                 | 10 +++++++++-
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/sc.h b/include/jemalloc/internal/sc.h
index 031ffff4..8efd3249 100644
--- a/include/jemalloc/internal/sc.h
+++ b/include/jemalloc/internal/sc.h
@@ -248,16 +248,21 @@
 
 /* The largest size class in the lookup table, and its binary log. */
 #define SC_LG_MAX_LOOKUP 12
-#define SC_LOOKUP_MAXCLASS ((size_t)1 << SC_LG_MAX_LOOKUP)
+#define SC_LOOKUP_MAXCLASS (1 << SC_LG_MAX_LOOKUP)
 
 /* Internal, only used for the definition of SC_SMALL_MAXCLASS. */
-#define SC_SMALL_MAX_BASE ((size_t)1 << (LG_PAGE + SC_LG_NGROUP - 1))
-#define SC_SMALL_MAX_DELTA ((size_t)1 << (LG_PAGE - 1))
+#define SC_SMALL_MAX_BASE (1 << (LG_PAGE + SC_LG_NGROUP - 1))
+#define SC_SMALL_MAX_DELTA (1 << (LG_PAGE - 1))
 
 /* The largest size class allocated out of a slab. */
 #define SC_SMALL_MAXCLASS (SC_SMALL_MAX_BASE				\
     + (SC_NGROUP - 1) * SC_SMALL_MAX_DELTA)
 
+/* The fastpath assumes all lookup-able sizes are small. */
+#if (SC_SMALL_MAXCLASS < SC_LOOKUP_MAXCLASS)
+#  error "Lookup table sizes must be small"
+#endif
+
 /* The smallest size class not allocated out of a slab. */
 #define SC_LARGE_MINCLASS ((size_t)1ULL << (LG_PAGE + SC_LG_NGROUP))
 #define SC_LG_LARGE_MINCLASS (LG_PAGE + SC_LG_NGROUP)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index a7d43dc1..521f4ea4 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2946,9 +2946,17 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 			return false;
 		}
 		alloc_ctx.szind = sz_size2index_lookup(size);
+		/* Max lookup class must be small. */
+		assert(alloc_ctx.szind < SC_NBINS);
 		/* This is a dead store, except when opt size checking is on. */
-		alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
+		alloc_ctx.slab = true;
 	}
+	/*
+	 * Currently the fastpath only handles small sizes.  The branch on
+	 * SC_LOOKUP_MAXCLASS makes sure of it.  This lets us avoid checking
+	 * tcache szind upper limit (i.e. tcache_maxclass) as well.
+	 */
+	assert(alloc_ctx.slab);
 
 	uint64_t deallocated, threshold;
 	te_free_fastpath_ctx(tsd, &deallocated, &threshold);

From d9bbf539ff9cee5f138e03ad2e7f61263d381c7f Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Wed, 13 Oct 2021 12:35:52 -0700
Subject: [PATCH 2140/2608] CI: Refactor gen_travis.py

The CI consolidation project adds more operating systems to Travis. This
refactoring is aimed to decouple the configuration of each individual OS
from the actual job matrix generation and formatting. Otherwise,
format_job function would turn into a huge collection of ad-hoc
conditions.
---
 .travis.yml           | 127 +++++++++---------
 scripts/gen_travis.py | 290 +++++++++++++++++++++++++++---------------
 2 files changed, 250 insertions(+), 167 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 5cf0e08e..ecc13f4f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,17 +1,15 @@
+
+# This config file is generated by ./scripts/gen_travis.py.
+# Do not edit by hand.
+
 language: generic
 dist: focal
 
-matrix:
+jobs:
   include:
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
       env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
@@ -53,57 +51,6 @@ matrix:
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
       addons: *gcc_multilib
@@ -313,18 +260,76 @@ matrix:
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: ppc64le
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: osx
+      arch: amd64
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+    - os: osx
+      arch: amd64
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+    - os: osx
+      arch: amd64
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+    - os: osx
+      arch: amd64
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+    - os: osx
+      arch: amd64
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+    - os: osx
+      arch: amd64
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+    - os: osx
+      arch: amd64
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+    - os: osx
+      arch: amd64
+      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     # Development build
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     # --enable-expermental-smallocx:
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --enable-experimental-smallocx --enable-stats --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-experimental-smallocx --enable-stats --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
 
 
 before_script:
   - autoconf
   - scripts/gen_travis.py > travis_script && diff .travis.yml travis_script
-  - ./configure ${COMPILER_FLAGS:+       CC="$CC $COMPILER_FLAGS"       CXX="$CXX $COMPILER_FLAGS" }       $CONFIGURE_FLAGS
+  # If COMPILER_FLAGS are not empty, add them to CC and CXX
+  - ./configure ${COMPILER_FLAGS:+ CC="$CC $COMPILER_FLAGS" CXX="$CXX $COMPILER_FLAGS"} $CONFIGURE_FLAGS
   - make -j3
   - make -j3 tests
 
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index fe9d8403..e98ebeb6 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -1,22 +1,35 @@
 #!/usr/bin/env python3
 
-from itertools import combinations
+from itertools import combinations, chain
+from enum import Enum, auto
+
+
+LINUX = 'linux'
+OSX = 'osx'
+
+
+AMD64 = 'amd64'
+ARM64 = 'arm64'
+PPC64LE = 'ppc64le'
+
+
+TRAVIS_TEMPLATE = """
+# This config file is generated by ./scripts/gen_travis.py.
+# Do not edit by hand.
 
-travis_template = """\
 language: generic
 dist: focal
 
-matrix:
+jobs:
   include:
-%s
+{jobs}
 
 before_script:
   - autoconf
   - scripts/gen_travis.py > travis_script && diff .travis.yml travis_script
-  - ./configure ${COMPILER_FLAGS:+ \
-      CC="$CC $COMPILER_FLAGS" \
-      CXX="$CXX $COMPILER_FLAGS" } \
-      $CONFIGURE_FLAGS
+  # If COMPILER_FLAGS are not empty, add them to CC and CXX
+  - ./configure ${{COMPILER_FLAGS:+ CC="$CC $COMPILER_FLAGS" \
+CXX="$CXX $COMPILER_FLAGS"}} $CONFIGURE_FLAGS
   - make -j3
   - make -j3 tests
 
@@ -24,6 +37,39 @@ script:
   - make check
 """
 
+
+class Option(object):
+    class Type:
+        COMPILER = auto()
+        COMPILER_FLAG = auto()
+        CONFIGURE_FLAG = auto()
+        MALLOC_CONF = auto()
+
+    def __init__(self, type, value):
+        self.type = type
+        self.value = value
+
+    @staticmethod
+    def as_compiler(value):
+        return Option(Option.Type.COMPILER, value)
+
+    @staticmethod
+    def as_compiler_flag(value):
+        return Option(Option.Type.COMPILER_FLAG, value)
+
+    @staticmethod
+    def as_configure_flag(value):
+        return Option(Option.Type.CONFIGURE_FLAG, value)
+
+    @staticmethod
+    def as_malloc_conf(value):
+        return Option(Option.Type.MALLOC_CONF, value)
+
+    def __eq__(self, obj):
+        return (isinstance(obj, Option) and obj.type == self.type
+                and obj.value == self.value)
+
+
 # The 'default' configuration is gcc, on linux, with no compiler or configure
 # flags.  We also test with clang, -m32, --enable-debug, --enable-prof,
 # --disable-stats, and --with-malloc-conf=tcache:false.  To avoid abusing
@@ -32,84 +78,80 @@ script:
 # hope that bugs involving interactions of such settings are rare.
 MAX_UNUSUAL_OPTIONS = 2
 
-os_default = 'linux'
-os_unusual = 'osx'
 
-arch_default = 'amd64'
-arch_unusual = 'ppc64le'
+GCC = Option.as_compiler('CC=gcc CXX=g++')
+CLANG = Option.as_compiler('CC=clang CXX=clang++')
 
-compilers_default = 'CC=gcc CXX=g++'
-compilers_unusual = 'CC=clang CXX=clang++'
 
-compiler_flag_unusuals = ['-m32']
+compiler_default = GCC
+compilers_unusual = [CLANG,]
 
-configure_flag_unusuals = [
+
+compiler_flag_unusuals = [Option.as_compiler_flag(opt) for opt in ('-m32',)]
+
+
+configure_flag_unusuals = [Option.as_configure_flag(opt) for opt in (
     '--enable-debug',
     '--enable-prof',
     '--disable-stats',
     '--disable-libdl',
     '--enable-opt-safety-checks',
     '--with-lg-page=16',
-]
+)]
 
-malloc_conf_unusuals = [
+
+malloc_conf_unusuals = [Option.as_malloc_conf(opt) for opt in (
     'tcache:false',
     'dss:primary',
     'percpu_arena:percpu',
     'background_thread:true',
-]
+)]
 
-all_unusuals = (
-    [os_unusual] + [arch_unusual] + [compilers_unusual] + compiler_flag_unusuals
-    + configure_flag_unusuals + malloc_conf_unusuals
-)
 
-unusual_combinations_to_test = []
-for i in range(MAX_UNUSUAL_OPTIONS + 1):
-    unusual_combinations_to_test += combinations(all_unusuals, i)
+all_unusuals = (compilers_unusual + compiler_flag_unusuals
+    + configure_flag_unusuals + malloc_conf_unusuals)
+
 
 gcc_multilib_set = False
-gcc_ppc_set = False
+
+
+def get_extra_cflags(os, compiler):
+    # We get some spurious errors when -Warray-bounds is enabled.
+    extra_cflags = ['-Werror', '-Wno-array-bounds']
+    if compiler == CLANG.value or os == OSX:
+        extra_cflags += [
+	    '-Wno-unknown-warning-option',
+	    '-Wno-ignored-attributes'
+	]
+    if os == OSX:
+        extra_cflags += [
+	    '-Wno-deprecated-declarations',
+	]
+    return extra_cflags
+
+
 # Formats a job from a combination of flags
-def format_job(combination):
+def format_job(os, arch, combination):
     global gcc_multilib_set
-    global gcc_ppc_set
 
-    os = os_unusual if os_unusual in combination else os_default
-    compilers = compilers_unusual if compilers_unusual in combination else compilers_default
-    arch = arch_unusual if arch_unusual in combination else arch_default
-    compiler_flags = [x for x in combination if x in compiler_flag_unusuals]
-    configure_flags = [x for x in combination if x in configure_flag_unusuals]
-    malloc_conf = [x for x in combination if x in malloc_conf_unusuals]
+    compiler = [x.value for x in combination if x.type == Option.Type.COMPILER]
+    assert(len(compiler) <= 1)
+    if not compiler:
+        compiler = compiler_default.value
+    else:
+        compiler = compiler[0]
+    compiler_flags = [x.value for x in combination if x.type == Option.Type.COMPILER_FLAG]
+    configure_flags = [x.value for x in combination if x.type == Option.Type.CONFIGURE_FLAG]
+    malloc_conf = [x.value for x in combination if x.type == Option.Type.MALLOC_CONF]
 
-    # Filter out unsupported configurations on OS X.
-    if os == 'osx' and ('dss:primary' in malloc_conf or \
-      'percpu_arena:percpu' in malloc_conf or 'background_thread:true' \
-      in malloc_conf):
-        return ""
-    # gcc is just a redirect to clang on OS X. No need to test both.
-    if os == 'osx' and compilers_unusual in combination:
-        return ""
     if len(malloc_conf) > 0:
-        configure_flags.append('--with-malloc-conf=' + ",".join(malloc_conf))
-
-    # Filter out an unsupported configuration - heap profiling on OS X.
-    if os == 'osx' and '--enable-prof' in configure_flags:
-        return ""
-
-    # Filter out unsupported OSX configuration on PPC64LE
-    if arch == 'ppc64le' and (
-        os == 'osx'
-        or '-m32' in combination
-        or compilers_unusual in combination
-        ):
-        return ""
+        configure_flags.append('--with-malloc-conf=' + ','.join(malloc_conf))
 
     job = ""
-    job += '    - os: %s\n' % os
-    job += '      arch: %s\n' % arch
+    job += '    - os: {}\n'.format(os)
+    job += '      arch: {}\n'.format(arch)
 
-    if '-m32' in combination and os == 'linux':
+    if '-m32' in compiler_flags and os == 'linux':
         job += '      addons:'
         if gcc_multilib_set:
             job += ' *gcc_multilib\n'
@@ -121,63 +163,99 @@ def format_job(combination):
             job += '            - g++-multilib\n'
             gcc_multilib_set = True
 
-    # We get some spurious errors when -Warray-bounds is enabled.
-    extra_cflags = ['-Werror', '-Wno-array-bounds']
-    if 'clang' in compilers or os == 'osx':
-        extra_cflags += [
-	    '-Wno-unknown-warning-option',
-	    '-Wno-ignored-attributes'
-	]
-    if os == 'osx':
-        extra_cflags += [
-	    '-Wno-deprecated-declarations',
-	]
     env_string = ('{} COMPILER_FLAGS="{}" CONFIGURE_FLAGS="{}" '
         'EXTRA_CFLAGS="{}"'.format(
-        compilers, ' '.join(compiler_flags), ' '.join(configure_flags),
-        ' '.join(extra_cflags)))
+            compiler,
+            ' '.join(compiler_flags),
+            ' '.join(configure_flags),
+            ' '.join(get_extra_cflags(os, compiler))))
 
-    job += '      env: %s\n' % env_string
+    job += '      env: {}'.format(env_string)
     return job
 
-include_rows = ""
-for combination in unusual_combinations_to_test:
-    include_rows += format_job(combination)
 
-# Development build
-include_rows += '''\
+def generate_unusual_combinations(max_unusual_opts):
+    """
+    Generates different combinations of non-standard compilers, compiler flags,
+    configure flags and malloc_conf settings.
+
+    @param max_unusual_opts: Limit of unusual options per combination.
+    """
+    return chain.from_iterable(
+            [combinations(all_unusuals, i) for i in range(max_unusual_opts + 1)])
+
+
+def included(combination, exclude):
+    """
+    Checks if the combination of options should be included in the Travis
+    testing matrix.
+    """
+    return not any(excluded in combination for excluded in exclude)
+
+
+def generate_jobs(os, arch, exclude, max_unusual_opts):
+    jobs = []
+    for combination in generate_unusual_combinations(max_unusual_opts):
+        if included(combination, exclude):
+            jobs.append(format_job(os, arch, combination))
+    return '\n'.join(jobs)
+
+
+def generate_linux(arch):
+    os = LINUX
+
+    # Only generate 2 unusual options for AMD64 to reduce matrix size
+    max_unusual_opts = MAX_UNUSUAL_OPTIONS if arch == AMD64 else 1
+
+    exclude = []
+    if arch == PPC64LE:
+        # Avoid 32 bit builds and clang on PowerPC
+        exclude = [Option.as_compiler_flag('-m32')] + compilers_unusual
+
+    return generate_jobs(os, arch, exclude, max_unusual_opts)
+
+
+def generate_macos(arch):
+    os = OSX
+
+    max_unusual_opts = 1
+
+    exclude = ([Option.as_malloc_conf(opt) for opt in (
+            'dss:primary',
+            'percpu_arena:percpu',
+            'background_thread:true')] +
+        [Option.as_configure_flag('--enable-prof')] +
+        [CLANG,])
+
+    return generate_jobs(os, arch, exclude, max_unusual_opts)
+
+
+def get_manual_jobs():
+    return """\
     # Development build
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-'''
-
-# Enable-expermental-smallocx
-include_rows += '''\
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug \
+--disable-cache-oblivious --enable-stats --enable-log --enable-prof" \
+EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     # --enable-expermental-smallocx:
     - os: linux
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --enable-experimental-smallocx --enable-stats --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-'''
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug \
+--enable-experimental-smallocx --enable-stats --enable-prof" \
+EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+"""
 
-# Does not seem to be working on newer travis machines. Valgrind has long been a
-# pain point; abandon it for now.
-# Valgrind build bots
-#include_rows += '''
-#    # Valgrind
-#    - os: linux
-#      arch: amd64
-#      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds" JEMALLOC_TEST_PREFIX="valgrind"
-#      addons:
-#        apt:
-#          packages:
-#            - valgrind
-#'''
 
-# To enable valgrind on macosx add:
-#
-#  - os: osx
-#    env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds" JEMALLOC_TEST_PREFIX="valgrind"
-#    install: brew install valgrind
-#
-# It currently fails due to: https://github.com/jemalloc/jemalloc/issues/1274
+def main():
+    jobs = '\n'.join((
+        generate_linux(AMD64),
+        generate_linux(PPC64LE),
 
-print(travis_template % include_rows)
+        generate_macos(AMD64),
+        get_manual_jobs()
+    ))
+
+    print(TRAVIS_TEMPLATE.format(jobs=jobs))
+
+
+if __name__ == '__main__':
+    main()

From 62f9c54d2a9035c6bfdbb4c41ecc0dcb040b509e Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Fri, 22 Oct 2021 17:40:42 -0700
Subject: [PATCH 2141/2608] San: Rename 'guard' to 'san'

This prepares the foundation for more sanitizer-related work in the
future.
---
 Makefile.in                                  |  4 ++--
 include/jemalloc/internal/ecache.h           |  2 +-
 include/jemalloc/internal/{guard.h => san.h} | 13 +++++++------
 src/arena.c                                  |  6 +++---
 src/ecache.c                                 |  2 +-
 src/extent.c                                 |  4 ++--
 src/jemalloc.c                               |  2 +-
 src/pa.c                                     |  2 +-
 src/pac.c                                    |  6 +++---
 src/{guard.c => san.c}                       | 17 +++++++++--------
 src/tsd.c                                    |  2 +-
 test/include/test/{guard.h => san.h}         |  0
 test/unit/double_free.c                      |  2 +-
 test/unit/retained.c                         |  2 +-
 test/unit/{guard.c => san.c}                 |  4 ++--
 test/unit/{guard.sh => san.sh}               |  0
 16 files changed, 35 insertions(+), 33 deletions(-)
 rename include/jemalloc/internal/{guard.h => san.h} (83%)
 rename src/{guard.c => san.c} (80%)
 rename test/include/test/{guard.h => san.h} (100%)
 rename test/unit/{guard.c => san.c} (98%)
 rename test/unit/{guard.sh => san.sh} (100%)

diff --git a/Makefile.in b/Makefile.in
index abd361fd..8f96a992 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -119,7 +119,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/extent_dss.c \
 	$(srcroot)src/extent_mmap.c \
 	$(srcroot)src/fxp.c \
-	$(srcroot)src/guard.c \
+	$(srcroot)src/san.c \
 	$(srcroot)src/hook.c \
 	$(srcroot)src/hpa.c \
 	$(srcroot)src/hpa_hooks.c \
@@ -219,7 +219,7 @@ TESTS_UNIT := \
 	${srcroot}test/unit/fb.c \
 	$(srcroot)test/unit/fork.c \
 	${srcroot}test/unit/fxp.c \
-	${srcroot}test/unit/guard.c \
+	${srcroot}test/unit/san.c \
 	$(srcroot)test/unit/hash.c \
 	$(srcroot)test/unit/hook.c \
 	$(srcroot)test/unit/hpa.c \
diff --git a/include/jemalloc/internal/ecache.h b/include/jemalloc/internal/ecache.h
index dd1bc320..71cae3e3 100644
--- a/include/jemalloc/internal/ecache.h
+++ b/include/jemalloc/internal/ecache.h
@@ -2,7 +2,7 @@
 #define JEMALLOC_INTERNAL_ECACHE_H
 
 #include "jemalloc/internal/eset.h"
-#include "jemalloc/internal/guard.h"
+#include "jemalloc/internal/san.h"
 #include "jemalloc/internal/mutex.h"
 
 typedef struct ecache_s ecache_t;
diff --git a/include/jemalloc/internal/guard.h b/include/jemalloc/internal/san.h
similarity index 83%
rename from include/jemalloc/internal/guard.h
rename to include/jemalloc/internal/san.h
index 8e578168..b3d0304c 100644
--- a/include/jemalloc/internal/guard.h
+++ b/include/jemalloc/internal/san.h
@@ -13,15 +13,16 @@
 extern size_t opt_san_guard_large;
 extern size_t opt_san_guard_small;
 
-void guard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap);
-void unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+void san_guard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    emap_t *emap);
+void san_unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     emap_t *emap);
 /*
  * Unguard the extent, but don't modify emap boundaries. Must be called on an
  * extent that has been erased from emap and shouldn't be placed back.
  */
-void unguard_pages_pre_destroy(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    emap_t *emap);
+void san_unguard_pages_pre_destroy(tsdn_t *tsdn, ehooks_t *ehooks,
+    edata_t *edata, emap_t *emap);
 void tsd_san_init(tsd_t *tsd);
 
 static inline bool
@@ -30,7 +31,7 @@ san_enabled(void) {
 }
 
 static inline bool
-large_extent_decide_guard(tsdn_t *tsdn, ehooks_t *ehooks, size_t size,
+san_large_extent_decide_guard(tsdn_t *tsdn, ehooks_t *ehooks, size_t size,
     size_t alignment) {
 	if (opt_san_guard_large == 0 || ehooks_guard_will_fail(ehooks) ||
 	    tsdn_null(tsdn)) {
@@ -60,7 +61,7 @@ large_extent_decide_guard(tsdn_t *tsdn, ehooks_t *ehooks, size_t size,
 }
 
 static inline bool
-slab_extent_decide_guard(tsdn_t *tsdn, ehooks_t *ehooks) {
+san_slab_extent_decide_guard(tsdn_t *tsdn, ehooks_t *ehooks) {
 	if (opt_san_guard_small == 0 || ehooks_guard_will_fail(ehooks) ||
 	    tsdn_null(tsdn)) {
 		return false;
diff --git a/src/arena.c b/src/arena.c
index 811f0edc..19e4e85a 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -6,7 +6,7 @@
 #include "jemalloc/internal/ehooks.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/extent_mmap.h"
-#include "jemalloc/internal/guard.h"
+#include "jemalloc/internal/san.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/safety_check.h"
@@ -328,7 +328,7 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	szind_t szind = sz_size2index(usize);
 	size_t esize = usize + sz_large_pad;
 
-	bool guarded = large_extent_decide_guard(tsdn, arena_get_ehooks(arena),
+	bool guarded = san_large_extent_decide_guard(tsdn, arena_get_ehooks(arena),
 	    esize, alignment);
 	edata_t *edata = pa_alloc(tsdn, &arena->pa_shard, esize, alignment,
 	    /* slab */ false, szind, zero, guarded, &deferred_work_generated);
@@ -829,7 +829,7 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	bool guarded = slab_extent_decide_guard(tsdn, arena_get_ehooks(arena));
+	bool guarded = san_slab_extent_decide_guard(tsdn, arena_get_ehooks(arena));
 	edata_t *slab = pa_alloc(tsdn, &arena->pa_shard, bin_info->slab_size,
 	    /* alignment */ PAGE, /* slab */ true, /* szind */ binind,
 	     /* zero */ false, guarded, &deferred_work_generated);
diff --git a/src/ecache.c b/src/ecache.c
index 26fc2112..a242227d 100644
--- a/src/ecache.c
+++ b/src/ecache.c
@@ -1,7 +1,7 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
-#include "jemalloc/internal/guard.h"
+#include "jemalloc/internal/san.h"
 
 bool
 ecache_init(tsdn_t *tsdn, ecache_t *ecache, extent_state_t state, unsigned ind,
diff --git a/src/extent.c b/src/extent.c
index a79e1c72..7112d3a8 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1013,7 +1013,7 @@ extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		/* Restore guard pages for dalloc / unmap. */
 		if (edata_guarded_get(edata)) {
 			assert(ehooks_are_default(ehooks));
-			unguard_pages(tsdn, ehooks, edata, pac->emap);
+			san_unguard_pages(tsdn, ehooks, edata, pac->emap);
 		}
 		/*
 		 * Deregister first to avoid a race with other allocating
@@ -1063,7 +1063,7 @@ extent_destroy_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	    WITNESS_RANK_CORE, 0);
 
 	if (edata_guarded_get(edata)) {
-		unguard_pages_pre_destroy(tsdn, ehooks, edata, pac->emap);
+		san_unguard_pages_pre_destroy(tsdn, ehooks, edata, pac->emap);
 	}
 	edata_addr_set(edata, edata_base_get(edata));
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 521f4ea4..e707f9f9 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -10,7 +10,7 @@
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/extent_mmap.h"
 #include "jemalloc/internal/fxp.h"
-#include "jemalloc/internal/guard.h"
+#include "jemalloc/internal/san.h"
 #include "jemalloc/internal/hook.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/log.h"
diff --git a/src/pa.c b/src/pa.c
index 779e672b..9004cc90 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -1,7 +1,7 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
-#include "jemalloc/internal/guard.h"
+#include "jemalloc/internal/san.h"
 #include "jemalloc/internal/hpa.h"
 
 static void
diff --git a/src/pac.c b/src/pac.c
index 176b181a..e53de80f 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -2,7 +2,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/pac.h"
-#include "jemalloc/internal/guard.h"
+#include "jemalloc/internal/san.h"
 
 static edata_t *pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t alignment, bool zero, bool guarded, bool *deferred_work_generated);
@@ -146,7 +146,7 @@ pac_alloc_new_guarded(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
 	if (edata != NULL) {
 		/* Add guards around it. */
 		assert(edata_size_get(edata) == size_with_guards);
-		guard_pages(tsdn, ehooks, edata, pac->emap);
+		san_guard_pages(tsdn, ehooks, edata, pac->emap);
 	}
 	assert(edata == NULL || (edata_guarded_get(edata) &&
 	    edata_size_get(edata) == size));
@@ -253,7 +253,7 @@ pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 		if (!edata_slab_get(edata) || !maps_coalesce) {
 			assert(edata_size_get(edata) >= SC_LARGE_MINCLASS ||
 			    !maps_coalesce);
-			unguard_pages(tsdn, ehooks, edata, pac->emap);
+			san_unguard_pages(tsdn, ehooks, edata, pac->emap);
 		}
 	}
 
diff --git a/src/guard.c b/src/san.c
similarity index 80%
rename from src/guard.c
rename to src/san.c
index 4dadc970..139ec5a3 100644
--- a/src/guard.c
+++ b/src/san.c
@@ -3,7 +3,7 @@
 
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/ehooks.h"
-#include "jemalloc/internal/guard.h"
+#include "jemalloc/internal/san.h"
 #include "jemalloc/internal/tsd.h"
 
 /* The sanitizer options. */
@@ -11,7 +11,7 @@ size_t opt_san_guard_large = SAN_GUARD_LARGE_EVERY_N_EXTENTS_DEFAULT;
 size_t opt_san_guard_small = SAN_GUARD_SMALL_EVERY_N_EXTENTS_DEFAULT;
 
 void
-guard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap) {
+san_guard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap) {
 	emap_deregister_boundary(tsdn, emap, edata);
 
 	size_t size_with_guards = edata_size_get(edata);
@@ -33,8 +33,8 @@ guard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap) {
 }
 
 static void
-unguard_pages_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap,
-    bool reg_emap) {
+san_unguard_pages_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    emap_t *emap, bool reg_emap) {
 	/* Remove the inner boundary which no longer exists. */
 	if (reg_emap) {
 		assert(edata_state_get(edata) == extent_state_active);
@@ -68,15 +68,16 @@ unguard_pages_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap,
 }
 
 void
-unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap) {
-	unguard_pages_impl(tsdn, ehooks, edata, emap, /* reg_emap */ true);
+san_unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    emap_t *emap) {
+	san_unguard_pages_impl(tsdn, ehooks, edata, emap, /* reg_emap */ true);
 }
 
 void
-unguard_pages_pre_destroy(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+san_unguard_pages_pre_destroy(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     emap_t *emap) {
 	emap_assert_not_mapped(tsdn, emap, edata);
-	unguard_pages_impl(tsdn, ehooks, edata, emap, /* reg_emap */ false);
+	san_unguard_pages_impl(tsdn, ehooks, edata, emap, /* reg_emap */ false);
 }
 
 void
diff --git a/src/tsd.c b/src/tsd.c
index 31ff2f23..4859048e 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -2,7 +2,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
-#include "jemalloc/internal/guard.h"
+#include "jemalloc/internal/san.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/rtree.h"
 
diff --git a/test/include/test/guard.h b/test/include/test/san.h
similarity index 100%
rename from test/include/test/guard.h
rename to test/include/test/san.h
diff --git a/test/unit/double_free.c b/test/unit/double_free.c
index f98484c4..12122c1b 100644
--- a/test/unit/double_free.c
+++ b/test/unit/double_free.c
@@ -1,5 +1,5 @@
 #include "test/jemalloc_test.h"
-#include "test/guard.h"
+#include "test/san.h"
 
 #include "jemalloc/internal/safety_check.h"
 
diff --git a/test/unit/retained.c b/test/unit/retained.c
index 53cda286..76bda50f 100644
--- a/test/unit/retained.c
+++ b/test/unit/retained.c
@@ -1,6 +1,6 @@
 #include "test/jemalloc_test.h"
 
-#include "jemalloc/internal/guard.h"
+#include "jemalloc/internal/san.h"
 #include "jemalloc/internal/spin.h"
 
 static unsigned		arena_ind;
diff --git a/test/unit/guard.c b/test/unit/san.c
similarity index 98%
rename from test/unit/guard.c
rename to test/unit/san.c
index 43381e44..1baa26e9 100644
--- a/test/unit/guard.c
+++ b/test/unit/san.c
@@ -1,8 +1,8 @@
 #include "test/jemalloc_test.h"
 #include "test/arena_decay.h"
-#include "test/guard.h"
+#include "test/san.h"
 
-#include "jemalloc/internal/guard.h"
+#include "jemalloc/internal/san.h"
 
 static void
 verify_extent_guarded(tsdn_t *tsdn, void *ptr) {
diff --git a/test/unit/guard.sh b/test/unit/san.sh
similarity index 100%
rename from test/unit/guard.sh
rename to test/unit/san.sh

From 34b00f896966e3993b8570542dfe77c2002ce185 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Fri, 22 Oct 2021 17:23:09 -0700
Subject: [PATCH 2142/2608] San: Avoid running san tests with prof enabled

With prof enabled, number of page aligned allocations doesn't match the
number of slab "ends" because prof allocations skew the addresses. It
leads to 'pages' array overflow and hard to debug failures.
---
 test/unit/san.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/test/unit/san.c b/test/unit/san.c
index 1baa26e9..93e292f6 100644
--- a/test/unit/san.c
+++ b/test/unit/san.c
@@ -14,6 +14,8 @@ verify_extent_guarded(tsdn_t *tsdn, void *ptr) {
 void *small_alloc[MAX_SMALL_ALLOCATIONS];
 
 TEST_BEGIN(test_guarded_small) {
+	test_skip_if(opt_prof);
+
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 	unsigned npages = 16, pages_found = 0, ends_found = 0;
 	VARIABLE_ARRAY(uintptr_t, pages, npages);
@@ -27,6 +29,8 @@ TEST_BEGIN(test_guarded_small) {
 		small_alloc[n_alloc] = ptr;
 		verify_extent_guarded(tsdn, ptr);
 		if ((uintptr_t)ptr % PAGE == 0) {
+			assert_u_lt(pages_found, npages,
+			    "Unexpectedly large number of page aligned allocs");
 			pages[pages_found++] = (uintptr_t)ptr;
 		}
 		if (((uintptr_t)ptr + (uintptr_t)sz) % PAGE == 0) {

From 0f6da1257d7182777e47c78f47e0bb2aa28d259b Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Thu, 4 Nov 2021 11:10:19 -0700
Subject: [PATCH 2143/2608] San: Implement bump alloc

The new allocator will be used to allocate guarded extents used as slabs
for guarded small allocations.
---
 Makefile.in                          |   2 +
 include/jemalloc/internal/extent.h   |  11 ++-
 include/jemalloc/internal/pac.h      |   6 ++
 include/jemalloc/internal/san.h      |  48 +++++++++-
 include/jemalloc/internal/san_bump.h |  27 ++++++
 include/jemalloc/internal/witness.h  |   1 +
 src/extent.c                         |  95 ++++++++++----------
 src/pac.c                            |  15 ++--
 src/pages.c                          |  56 +++++++++---
 src/san.c                            |  97 ++++++++++++++++----
 src/san_bump.c                       | 127 +++++++++++++++++++++++++++
 test/include/test/arena_decay.h      |  28 +++---
 test/unit/retained.c                 |   2 +-
 test/unit/san.c                      |   2 +-
 test/unit/san_bump.c                 | 111 +++++++++++++++++++++++
 15 files changed, 521 insertions(+), 107 deletions(-)
 create mode 100644 include/jemalloc/internal/san_bump.h
 create mode 100644 src/san_bump.c
 create mode 100644 test/unit/san_bump.c

diff --git a/Makefile.in b/Makefile.in
index 8f96a992..50c586c5 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -120,6 +120,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/extent_mmap.c \
 	$(srcroot)src/fxp.c \
 	$(srcroot)src/san.c \
+	$(srcroot)src/san_bump.c \
 	$(srcroot)src/hook.c \
 	$(srcroot)src/hpa.c \
 	$(srcroot)src/hpa_hooks.c \
@@ -220,6 +221,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/fork.c \
 	${srcroot}test/unit/fxp.c \
 	${srcroot}test/unit/san.c \
+	${srcroot}test/unit/san_bump.c \
 	$(srcroot)test/unit/hash.c \
 	$(srcroot)test/unit/hook.c \
 	$(srcroot)test/unit/hpa.c \
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 73c55633..73059ad2 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -30,14 +30,20 @@ void ecache_dalloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 edata_t *ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, size_t npages_min);
 
+void extent_gdump_add(tsdn_t *tsdn, const edata_t *edata);
+void extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
+    edata_t *edata);
 void extent_dalloc_gap(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata);
+edata_t *extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
+    void *new_addr, size_t size, size_t alignment, bool zero, bool *commit,
+    bool growing_retained);
 void extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata);
 void extent_destroy_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata);
 bool extent_commit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    size_t offset, size_t length);
+    size_t offset, size_t length, bool growing_retained);
 bool extent_decommit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length);
 bool extent_purge_lazy_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
@@ -45,7 +51,8 @@ bool extent_purge_lazy_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 bool extent_purge_forced_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length);
 edata_t *extent_split_wrapper(tsdn_t *tsdn, pac_t *pac,
-    ehooks_t *ehooks, edata_t *edata, size_t size_a, size_t size_b);
+    ehooks_t *ehooks, edata_t *edata, size_t size_a, size_t size_b,
+    bool holding_core_locks);
 bool extent_merge_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *a, edata_t *b);
 size_t extent_sn_next(pac_t *pac);
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index 5eee3de8..7eaaf894 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -3,6 +3,7 @@
 
 #include "jemalloc/internal/exp_grow.h"
 #include "jemalloc/internal/pai.h"
+#include "san_bump.h"
 
 
 /*
@@ -127,6 +128,11 @@ pac_mapped(pac_t *pac) {
 	return atomic_load_zu(&pac->stats->pac_mapped, ATOMIC_RELAXED);
 }
 
+static inline ehooks_t *
+pac_ehooks_get(pac_t *pac) {
+	return base_ehooks_get(pac->base);
+}
+
 /*
  * All purging functions require holding decay->mtx.  This is one of the few
  * places external modules are allowed to peek inside pa_shard_t internals.
diff --git a/include/jemalloc/internal/san.h b/include/jemalloc/internal/san.h
index b3d0304c..70debf3a 100644
--- a/include/jemalloc/internal/san.h
+++ b/include/jemalloc/internal/san.h
@@ -4,7 +4,8 @@
 #include "jemalloc/internal/ehooks.h"
 #include "jemalloc/internal/emap.h"
 
-#define PAGE_GUARDS_SIZE (2 * PAGE)
+#define SAN_PAGE_GUARD PAGE
+#define SAN_PAGE_GUARDS_SIZE (SAN_PAGE_GUARD * 2)
 
 #define SAN_GUARD_LARGE_EVERY_N_EXTENTS_DEFAULT 0
 #define SAN_GUARD_SMALL_EVERY_N_EXTENTS_DEFAULT 0
@@ -14,9 +15,9 @@ extern size_t opt_san_guard_large;
 extern size_t opt_san_guard_small;
 
 void san_guard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    emap_t *emap);
+    emap_t *emap, bool left, bool right, bool remap);
 void san_unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    emap_t *emap);
+    emap_t *emap, bool left, bool right);
 /*
  * Unguard the extent, but don't modify emap boundaries. Must be called on an
  * extent that has been erased from emap and shouldn't be placed back.
@@ -25,6 +26,45 @@ void san_unguard_pages_pre_destroy(tsdn_t *tsdn, ehooks_t *ehooks,
     edata_t *edata, emap_t *emap);
 void tsd_san_init(tsd_t *tsd);
 
+static inline void
+san_guard_pages_two_sided(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    emap_t *emap, bool remap) {
+	return san_guard_pages(tsdn, ehooks, edata, emap, true, true,
+	    remap);
+}
+
+static inline void
+san_unguard_pages_two_sided(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    emap_t *emap) {
+	return san_unguard_pages(tsdn, ehooks, edata, emap, true, true);
+}
+
+static inline size_t
+san_two_side_unguarded_sz(size_t size) {
+	assert(size % PAGE == 0);
+	assert(size >= SAN_PAGE_GUARDS_SIZE);
+	return size - SAN_PAGE_GUARDS_SIZE;
+}
+
+static inline size_t
+san_two_side_guarded_sz(size_t size) {
+	assert(size % PAGE == 0);
+	return size + SAN_PAGE_GUARDS_SIZE;
+}
+
+static inline size_t
+san_one_side_unguarded_sz(size_t size) {
+	assert(size % PAGE == 0);
+	assert(size >= SAN_PAGE_GUARD);
+	return size - SAN_PAGE_GUARD;
+}
+
+static inline size_t
+san_one_side_guarded_sz(size_t size) {
+	assert(size % PAGE == 0);
+	return size + SAN_PAGE_GUARD;
+}
+
 static inline bool
 san_enabled(void) {
 	return (opt_san_guard_large != 0 || opt_san_guard_small != 0);
@@ -50,7 +90,7 @@ san_large_extent_decide_guard(tsdn_t *tsdn, ehooks_t *ehooks, size_t size,
 	}
 
 	if (n == 1 && (alignment <= PAGE) &&
-	    (size + PAGE_GUARDS_SIZE <= SC_LARGE_MAXCLASS)) {
+	    (san_two_side_guarded_sz(size) <= SC_LARGE_MAXCLASS)) {
 		*tsd_san_extents_until_guard_largep_get(tsd) =
 		    opt_san_guard_large;
 		return true;
diff --git a/include/jemalloc/internal/san_bump.h b/include/jemalloc/internal/san_bump.h
new file mode 100644
index 00000000..9c6c224f
--- /dev/null
+++ b/include/jemalloc/internal/san_bump.h
@@ -0,0 +1,27 @@
+#ifndef JEMALLOC_INTERNAL_SAN_BUMP_H
+#define JEMALLOC_INTERNAL_SAN_BUMP_H
+
+#include "jemalloc/internal/edata.h"
+#include "jemalloc/internal/exp_grow.h"
+#include "jemalloc/internal/mutex.h"
+
+extern const size_t SBA_RETAINED_ALLOC_SIZE;
+
+typedef struct ehooks_s ehooks_t;
+typedef struct pac_s pac_t;
+
+typedef struct san_bump_alloc_s san_bump_alloc_t;
+struct san_bump_alloc_s {
+	malloc_mutex_t mtx;
+
+	edata_t *curr_reg;
+};
+
+bool
+san_bump_alloc_init(san_bump_alloc_t* sba);
+
+edata_t *
+san_bump_alloc(tsdn_t *tsdn, san_bump_alloc_t* sba, pac_t *pac, ehooks_t *ehooks,
+    size_t size, bool zero);
+
+#endif /* JEMALLOC_INTERNAL_SAN_BUMP_H */
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index c12a705c..e81b9a00 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -48,6 +48,7 @@ enum witness_rank_e {
 
 	WITNESS_RANK_EXTENT_GROW,
 	WITNESS_RANK_HPA_SHARD_GROW = WITNESS_RANK_EXTENT_GROW,
+	WITNESS_RANK_SAN_BUMP_ALLOC = WITNESS_RANK_EXTENT_GROW,
 
 	WITNESS_RANK_EXTENTS,
 	WITNESS_RANK_HPA_SHARD = WITNESS_RANK_EXTENTS,
diff --git a/src/extent.c b/src/extent.c
index 7112d3a8..13d688d1 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -40,13 +40,9 @@ static edata_t *extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     bool zero, bool *commit, bool growing_retained, bool guarded);
 static edata_t *extent_try_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool *coalesced);
-static void extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *edata);
 static edata_t *extent_alloc_retained(tsdn_t *tsdn, pac_t *pac,
     ehooks_t *ehooks, edata_t *expand_edata, size_t size, size_t alignment,
     bool zero, bool *commit, bool guarded);
-static edata_t *extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    void *new_addr, size_t size, size_t alignment, bool zero, bool *commit);
 
 /******************************************************************************/
 
@@ -127,7 +123,8 @@ ecache_alloc_grow(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 		void *new_addr = (expand_edata == NULL) ? NULL :
 		    edata_past_get(expand_edata);
 		edata = extent_alloc_wrapper(tsdn, pac, ehooks, new_addr,
-		    size, alignment, zero, &commit);
+		    size, alignment, zero, &commit,
+		    /* growing_retained */ false);
 	}
 
 	assert(edata == NULL || edata_pai_get(edata) == EXTENT_PAI_PAC);
@@ -270,7 +267,7 @@ extent_activate_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache, eset_t *eset,
 	emap_update_edata_state(tsdn, pac->emap, edata, extent_state_active);
 }
 
-static void
+void
 extent_gdump_add(tsdn_t *tsdn, const edata_t *edata) {
 	cassert(config_prof);
 	/* prof_gdump() requirement. */
@@ -785,35 +782,6 @@ extent_alloc_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	return edata;
 }
 
-static edata_t *
-extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    void *new_addr, size_t size, size_t alignment, bool zero, bool *commit) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-
-	edata_t *edata = edata_cache_get(tsdn, pac->edata_cache);
-	if (edata == NULL) {
-		return NULL;
-	}
-	size_t palignment = ALIGNMENT_CEILING(alignment, PAGE);
-	void *addr = ehooks_alloc(tsdn, ehooks, new_addr, size, palignment,
-	    &zero, commit);
-	if (addr == NULL) {
-		edata_cache_put(tsdn, pac->edata_cache, edata);
-		return NULL;
-	}
-	edata_init(edata, ecache_ind_get(&pac->ecache_dirty), addr,
-	    size, /* slab */ false, SC_NSIZES, extent_sn_next(pac),
-	    extent_state_active, zero, *commit, EXTENT_PAI_PAC,
-	    opt_retain ? EXTENT_IS_HEAD : EXTENT_NOT_HEAD);
-	if (extent_register(tsdn, pac, edata)) {
-		edata_cache_put(tsdn, pac->edata_cache, edata);
-		return NULL;
-	}
-
-	return edata;
-}
-
 static bool
 extent_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
     edata_t *inner, edata_t *outer, bool forward) {
@@ -924,9 +892,9 @@ extent_maximally_purge(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
  * Does the metadata management portions of putting an unused extent into the
  * given ecache_t (coalesces and inserts into the eset).
  */
-static void
-extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *edata) {
+void
+extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
+    edata_t *edata) {
 	assert((ecache->state != extent_state_dirty &&
 	    ecache->state != extent_state_muzzy) ||
 	    !edata_zeroed_get(edata));
@@ -1001,6 +969,42 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	return err;
 }
 
+edata_t *
+extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
+    void *new_addr, size_t size, size_t alignment, bool zero, bool *commit,
+    bool growing_retained) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+
+	edata_t *edata = edata_cache_get(tsdn, pac->edata_cache);
+	if (edata == NULL) {
+		return NULL;
+	}
+	size_t palignment = ALIGNMENT_CEILING(alignment, PAGE);
+	void *addr = ehooks_alloc(tsdn, ehooks, new_addr, size, palignment,
+	    &zero, commit);
+	if (addr == NULL) {
+		edata_cache_put(tsdn, pac->edata_cache, edata);
+		return NULL;
+	}
+	edata_init(edata, ecache_ind_get(&pac->ecache_dirty), addr,
+	    size, /* slab */ false, SC_NSIZES, extent_sn_next(pac),
+	    extent_state_active, zero, *commit, EXTENT_PAI_PAC,
+	    opt_retain ? EXTENT_IS_HEAD : EXTENT_NOT_HEAD);
+	/*
+	 * Retained memory is not counted towards gdump.  Only if an extent is
+	 * allocated as a separate mapping, i.e. growing_retained is false, then
+	 * gdump should be updated.
+	 */
+	bool gdump_add = !growing_retained;
+	if (extent_register_impl(tsdn, pac, edata, gdump_add)) {
+		edata_cache_put(tsdn, pac->edata_cache, edata);
+		return NULL;
+	}
+
+	return edata;
+}
+
 void
 extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata) {
@@ -1013,7 +1017,8 @@ extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		/* Restore guard pages for dalloc / unmap. */
 		if (edata_guarded_get(edata)) {
 			assert(ehooks_are_default(ehooks));
-			san_unguard_pages(tsdn, ehooks, edata, pac->emap);
+			san_unguard_pages_two_sided(tsdn, ehooks, edata,
+			    pac->emap);
 		}
 		/*
 		 * Deregister first to avoid a race with other allocating
@@ -1057,12 +1062,14 @@ extent_destroy_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata) {
 	assert(edata_base_get(edata) != NULL);
 	assert(edata_size_get(edata) != 0);
-	assert(edata_state_get(edata) == extent_state_retained);
+	extent_state_t state = edata_state_get(edata);
+	assert(state == extent_state_retained || state == extent_state_active);
 	assert(emap_edata_is_acquired(tsdn, pac->emap, edata));
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
 	if (edata_guarded_get(edata)) {
+		assert(opt_retain);
 		san_unguard_pages_pre_destroy(tsdn, ehooks, edata, pac->emap);
 	}
 	edata_addr_set(edata, edata_base_get(edata));
@@ -1087,9 +1094,9 @@ extent_commit_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 
 bool
 extent_commit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    size_t offset, size_t length) {
+    size_t offset, size_t length, bool growing_retained) {
 	return extent_commit_impl(tsdn, ehooks, edata, offset, length,
-	    false);
+	    growing_retained);
 }
 
 bool
@@ -1207,9 +1214,9 @@ label_error_a:
 
 edata_t *
 extent_split_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata,
-    size_t size_a, size_t size_b) {
+    size_t size_a, size_t size_b, bool holding_core_locks) {
 	return extent_split_impl(tsdn, pac, ehooks, edata, size_a, size_b,
-	    /* holding_core_locks */ false);
+	    holding_core_locks);
 }
 
 static bool
diff --git a/src/pac.c b/src/pac.c
index e53de80f..914cec90 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -14,11 +14,6 @@ static void pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
     bool *deferred_work_generated);
 static uint64_t pac_time_until_deferred_work(tsdn_t *tsdn, pai_t *self);
 
-static ehooks_t *
-pac_ehooks_get(pac_t *pac) {
-	return base_ehooks_get(pac->base);
-}
-
 static inline void
 pac_decay_data_get(pac_t *pac, extent_state_t state,
     decay_t **r_decay, pac_decay_stats_t **r_decay_stats, ecache_t **r_ecache) {
@@ -139,14 +134,15 @@ pac_alloc_new_guarded(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
     size_t alignment, bool zero) {
 	assert(alignment <= PAGE);
 
-	size_t size_with_guards = size + PAGE_GUARDS_SIZE;
+	size_t size_with_guards = size + SAN_PAGE_GUARDS_SIZE;
 	/* Alloc a non-guarded extent first.*/
 	edata_t *edata = pac_alloc_real(tsdn, pac, ehooks, size_with_guards,
 	    /* alignment */ PAGE, zero, /* guarded */ false);
 	if (edata != NULL) {
 		/* Add guards around it. */
 		assert(edata_size_get(edata) == size_with_guards);
-		san_guard_pages(tsdn, ehooks, edata, pac->emap);
+		san_guard_pages(tsdn, ehooks, edata, pac->emap, true, true,
+		    true);
 	}
 	assert(edata == NULL || (edata_guarded_get(edata) &&
 	    edata_size_get(edata) == size));
@@ -222,7 +218,7 @@ pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 	}
 
 	edata_t *trail = extent_split_wrapper(tsdn, pac, ehooks, edata,
-	    new_size, shrink_amount);
+	    new_size, shrink_amount, /* holding_core_locks */ false);
 	if (trail == NULL) {
 		return true;
 	}
@@ -253,7 +249,8 @@ pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 		if (!edata_slab_get(edata) || !maps_coalesce) {
 			assert(edata_size_get(edata) >= SC_LARGE_MINCLASS ||
 			    !maps_coalesce);
-			san_unguard_pages(tsdn, ehooks, edata, pac->emap);
+			san_unguard_pages_two_sided(tsdn, ehooks, edata,
+			    pac->emap);
 		}
 	}
 
diff --git a/src/pages.c b/src/pages.c
index a8d9988b..8c83a7de 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -365,33 +365,61 @@ pages_decommit(void *addr, size_t size) {
 
 void
 pages_mark_guards(void *head, void *tail) {
-	assert(head != NULL && tail != NULL);
-	assert((uintptr_t)head < (uintptr_t)tail);
+	assert(head != NULL || tail != NULL);
+	assert(head == NULL || tail == NULL ||
+	    (uintptr_t)head < (uintptr_t)tail);
 #ifdef JEMALLOC_HAVE_MPROTECT
-	mprotect(head, PAGE, PROT_NONE);
-	mprotect(tail, PAGE, PROT_NONE);
+	if (head != NULL) {
+		mprotect(head, PAGE, PROT_NONE);
+	}
+	if (tail != NULL) {
+		mprotect(tail, PAGE, PROT_NONE);
+	}
 #else
 	/* Decommit sets to PROT_NONE / MEM_DECOMMIT. */
-	os_pages_commit(head, PAGE, false);
-	os_pages_commit(tail, PAGE, false);
+	if (head != NULL) {
+		os_pages_commit(head, PAGE, false);
+	}
+	if (tail != NULL) {
+		os_pages_commit(tail, PAGE, false);
+	}
 #endif
 }
 
 void
 pages_unmark_guards(void *head, void *tail) {
-	assert(head != NULL && tail != NULL);
-	assert((uintptr_t)head < (uintptr_t)tail);
+	assert(head != NULL || tail != NULL);
+	assert(head == NULL || tail == NULL ||
+	    (uintptr_t)head < (uintptr_t)tail);
 #ifdef JEMALLOC_HAVE_MPROTECT
-	size_t range = (uintptr_t)tail - (uintptr_t)head + PAGE;
-	if (range <= SC_LARGE_MINCLASS) {
+	bool head_and_tail = (head != NULL) && (tail != NULL);
+	size_t range = head_and_tail ?
+	    (uintptr_t)tail - (uintptr_t)head + PAGE :
+	    SIZE_T_MAX;
+	/*
+	 * The amount of work that the kernel does in mprotect depends on the
+	 * range argument.  SC_LARGE_MINCLASS is an arbitrary threshold chosen
+	 * to prevent kernel from doing too much work that would outweigh the
+	 * savings of performing one less system call.
+	 */
+	bool ranged_mprotect = head_and_tail && range <= SC_LARGE_MINCLASS;
+	if (ranged_mprotect) {
 		mprotect(head, range, PROT_READ | PROT_WRITE);
 	} else {
-		mprotect(head, PAGE, PROT_READ | PROT_WRITE);
-		mprotect(tail, PAGE, PROT_READ | PROT_WRITE);
+		if (head != NULL) {
+			mprotect(head, PAGE, PROT_READ | PROT_WRITE);
+		}
+		if (tail != NULL) {
+			mprotect(tail, PAGE, PROT_READ | PROT_WRITE);
+		}
 	}
 #else
-	os_pages_commit(head, PAGE, true);
-	os_pages_commit(tail, PAGE, true);
+	if (head != NULL) {
+		os_pages_commit(head, PAGE, true);
+	}
+	if (tail != NULL) {
+		os_pages_commit(tail, PAGE, true);
+	}
 #endif
 }
 
diff --git a/src/san.c b/src/san.c
index 139ec5a3..15fdb7ff 100644
--- a/src/san.c
+++ b/src/san.c
@@ -10,16 +10,63 @@
 size_t opt_san_guard_large = SAN_GUARD_LARGE_EVERY_N_EXTENTS_DEFAULT;
 size_t opt_san_guard_small = SAN_GUARD_SMALL_EVERY_N_EXTENTS_DEFAULT;
 
+static inline void
+san_find_guarded_addr(edata_t *edata, uintptr_t *guard1, uintptr_t *guard2,
+    uintptr_t *addr, size_t size, bool left, bool right) {
+	assert(!edata_guarded_get(edata));
+	assert(size % PAGE == 0);
+	*addr = (uintptr_t)edata_base_get(edata);
+	if (left) {
+		*guard1 = *addr;
+		*addr += SAN_PAGE_GUARD;
+	} else {
+		*guard1 = 0;
+	}
+
+	if (right) {
+		*guard2 = *addr + size;
+	} else {
+		*guard2 = 0;
+	}
+}
+
+static inline void
+san_find_unguarded_addr(edata_t *edata, uintptr_t *guard1, uintptr_t *guard2,
+    uintptr_t *addr, size_t size, bool left, bool right) {
+	assert(edata_guarded_get(edata));
+	assert(size % PAGE == 0);
+	*addr = (uintptr_t)edata_base_get(edata);
+	if (right) {
+		*guard2 = *addr + size;
+	} else {
+		*guard2 = 0;
+	}
+
+	if (left) {
+		*guard1 = *addr - SAN_PAGE_GUARD;
+		assert(*guard1 != 0);
+		*addr = *guard1;
+	} else {
+		*guard1 = 0;
+	}
+}
+
 void
-san_guard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap) {
-	emap_deregister_boundary(tsdn, emap, edata);
+san_guard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap,
+    bool left, bool right, bool remap) {
+	assert(left || right);
+	if (remap) {
+		emap_deregister_boundary(tsdn, emap, edata);
+	}
 
 	size_t size_with_guards = edata_size_get(edata);
-	size_t usize = size_with_guards - PAGE_GUARDS_SIZE;
+	size_t usize = (left && right)
+	    ? san_two_side_unguarded_sz(size_with_guards)
+	    : san_one_side_unguarded_sz(size_with_guards);
 
-	uintptr_t guard1 = (uintptr_t)edata_base_get(edata);
-	uintptr_t addr = guard1 + PAGE;
-	uintptr_t guard2 = addr + usize;
+	uintptr_t guard1, guard2, addr;
+	san_find_guarded_addr(edata, &guard1, &guard2, &addr, usize, left,
+	    right);
 
 	assert(edata_state_get(edata) == extent_state_active);
 	ehooks_guard(tsdn, ehooks, (void *)guard1, (void *)guard2);
@@ -29,14 +76,18 @@ san_guard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap) {
 	edata_addr_set(edata, (void *)addr);
 	edata_guarded_set(edata, true);
 
-	/* The new boundary will be registered on the pa_alloc path. */
+	if (remap) {
+		emap_register_boundary(tsdn, emap, edata, SC_NSIZES,
+		    /* slab */ false);
+	}
 }
 
 static void
 san_unguard_pages_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    emap_t *emap, bool reg_emap) {
+    emap_t *emap, bool left, bool right, bool remap) {
+	assert(left || right);
 	/* Remove the inner boundary which no longer exists. */
-	if (reg_emap) {
+	if (remap) {
 		assert(edata_state_get(edata) == extent_state_active);
 		emap_deregister_boundary(tsdn, emap, edata);
 	} else {
@@ -44,24 +95,26 @@ san_unguard_pages_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 	}
 
 	size_t size = edata_size_get(edata);
-	size_t size_with_guards = size + PAGE_GUARDS_SIZE;
+	size_t size_with_guards = (left && right)
+	    ? san_two_side_guarded_sz(size)
+	    : san_one_side_guarded_sz(size);
 
-	uintptr_t addr =  (uintptr_t)edata_base_get(edata);
-	uintptr_t guard1 = addr - PAGE;
-	uintptr_t guard2 = addr + size;
+	uintptr_t guard1, guard2, addr;
+	san_find_unguarded_addr(edata, &guard1, &guard2, &addr, size, left,
+	    right);
 
 	ehooks_unguard(tsdn, ehooks, (void *)guard1, (void *)guard2);
 
 	/* Update the true addr and usable size of the edata. */
 	edata_size_set(edata, size_with_guards);
-	edata_addr_set(edata, (void *)guard1);
+	edata_addr_set(edata, (void *)addr);
 	edata_guarded_set(edata, false);
 
 	/*
 	 * Then re-register the outer boundary including the guards, if
 	 * requested.
 	 */
-	if (reg_emap) {
+	if (remap) {
 		emap_register_boundary(tsdn, emap, edata, SC_NSIZES,
 		    /* slab */ false);
 	}
@@ -69,15 +122,23 @@ san_unguard_pages_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 
 void
 san_unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    emap_t *emap) {
-	san_unguard_pages_impl(tsdn, ehooks, edata, emap, /* reg_emap */ true);
+    emap_t *emap, bool left, bool right) {
+	san_unguard_pages_impl(tsdn, ehooks, edata, emap, left, right,
+	    /* remap */ true);
 }
 
 void
 san_unguard_pages_pre_destroy(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     emap_t *emap) {
 	emap_assert_not_mapped(tsdn, emap, edata);
-	san_unguard_pages_impl(tsdn, ehooks, edata, emap, /* reg_emap */ false);
+	/*
+	 * We don't want to touch the emap of about to be destroyed extents, as
+	 * they have been unmapped upon eviction from the retained ecache. Also,
+	 * we unguard the extents to the right, because retained extents only
+	 * own their right guard page per san_bump_alloc's logic.
+	 */
+	 san_unguard_pages_impl(tsdn, ehooks, edata, emap, /* left */ false,
+	    /* right */ true, /* remap */ false);
 }
 
 void
diff --git a/src/san_bump.c b/src/san_bump.c
new file mode 100644
index 00000000..6098bd95
--- /dev/null
+++ b/src/san_bump.c
@@ -0,0 +1,127 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/san_bump.h"
+#include "jemalloc/internal/pac.h"
+#include "jemalloc/internal/san.h"
+#include "jemalloc/internal/ehooks.h"
+#include "jemalloc/internal/edata_cache.h"
+
+const size_t SBA_RETAINED_ALLOC_SIZE = 1024 * 1024 * 4; /* 4 MB */
+
+static bool
+san_bump_grow_locked(tsdn_t *tsdn, san_bump_alloc_t *sba, pac_t *pac,
+    ehooks_t *ehooks, size_t size);
+
+bool
+san_bump_alloc_init(san_bump_alloc_t* sba) {
+	bool err = malloc_mutex_init(&sba->mtx, "sanitizer_bump_allocator",
+	    WITNESS_RANK_SAN_BUMP_ALLOC, malloc_mutex_rank_exclusive);
+	if (err) {
+		return true;
+	}
+	sba->curr_reg = NULL;
+
+	return false;
+}
+
+edata_t *
+san_bump_alloc(tsdn_t *tsdn, san_bump_alloc_t* sba, pac_t *pac,
+    ehooks_t *ehooks, size_t size, bool zero) {
+	assert(maps_coalesce && opt_retain);
+
+	edata_t* to_destroy;
+	size_t guarded_size = san_one_side_guarded_sz(size);
+
+	malloc_mutex_lock(tsdn, &sba->mtx);
+
+	if (sba->curr_reg == NULL ||
+	    edata_size_get(sba->curr_reg) < guarded_size) {
+		/*
+		 * If the current region can't accommodate the allocation,
+		 * try replacing it with a larger one and destroy current if the
+		 * replacement succeeds.
+		 */
+		to_destroy = sba->curr_reg;
+		bool err = san_bump_grow_locked(tsdn, sba, pac, ehooks,
+		    guarded_size);
+		if (err) {
+			goto label_err;
+		}
+	} else {
+		to_destroy = NULL;
+	}
+	assert(guarded_size <= edata_size_get(sba->curr_reg));
+	size_t trail_size = edata_size_get(sba->curr_reg) - guarded_size;
+
+	edata_t* edata;
+	if (trail_size != 0) {
+		edata_t* curr_reg_trail = extent_split_wrapper(tsdn, pac,
+		    ehooks, sba->curr_reg, guarded_size, trail_size,
+		    /* holding_core_locks */ true);
+		if (curr_reg_trail == NULL) {
+			goto label_err;
+		}
+		edata = sba->curr_reg;
+		sba->curr_reg = curr_reg_trail;
+	} else {
+		edata = sba->curr_reg;
+		sba->curr_reg = NULL;
+	}
+
+	malloc_mutex_unlock(tsdn, &sba->mtx);
+
+	assert(!edata_guarded_get(edata));
+	assert(sba->curr_reg == NULL || !edata_guarded_get(sba->curr_reg));
+	assert(to_destroy == NULL || !edata_guarded_get(to_destroy));
+
+	if (to_destroy != NULL) {
+		extent_destroy_wrapper(tsdn, pac, ehooks, to_destroy);
+	}
+
+	san_guard_pages(tsdn, ehooks, edata, pac->emap, /* left */ false,
+	    /* right */ true, /* remap */ true);
+
+	if (!edata_committed_get(edata)) {
+		if (extent_commit_wrapper(tsdn, ehooks, edata, 0,
+		    edata_size_get(edata), true)) {
+			extent_record(tsdn, pac, ehooks, &pac->ecache_retained,
+			    edata);
+			return NULL;
+		}
+		edata_committed_set(edata, true);
+	}
+	if (zero && !edata_zeroed_get(edata)) {
+		void *addr = edata_base_get(edata);
+		size_t size = edata_size_get(edata);
+		ehooks_zero(tsdn, ehooks, addr, size);
+		edata_zeroed_set(edata, true);
+	}
+
+	if (config_prof) {
+		extent_gdump_add(tsdn, edata);
+	}
+
+	return edata;
+label_err:
+	malloc_mutex_unlock(tsdn, &sba->mtx);
+	return NULL;
+}
+
+static bool
+san_bump_grow_locked(tsdn_t *tsdn, san_bump_alloc_t *sba, pac_t *pac,
+    ehooks_t *ehooks, size_t size) {
+	malloc_mutex_assert_owner(tsdn, &sba->mtx);
+
+	bool committed = false, zeroed = false;
+	size_t alloc_size = size > SBA_RETAINED_ALLOC_SIZE ? size :
+	    SBA_RETAINED_ALLOC_SIZE;
+	assert((alloc_size & PAGE_MASK) == 0);
+	sba->curr_reg = extent_alloc_wrapper(tsdn, pac, ehooks, NULL,
+	    alloc_size, PAGE, zeroed, &committed,
+	    /* growing_retained */ true);
+	if (sba->curr_reg == NULL) {
+		return true;
+	}
+	return false;
+}
diff --git a/test/include/test/arena_decay.h b/test/include/test/arena_decay.h
index da659212..524ee218 100644
--- a/test/include/test/arena_decay.h
+++ b/test/include/test/arena_decay.h
@@ -1,4 +1,4 @@
-static unsigned
+static inline unsigned
 do_arena_create(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 	unsigned arena_ind;
 	size_t sz = sizeof(unsigned);
@@ -24,7 +24,7 @@ do_arena_create(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 	return arena_ind;
 }
 
-static void
+static inline void
 do_arena_destroy(unsigned arena_ind) {
 	size_t mib[3];
 	size_t miblen = sizeof(mib)/sizeof(size_t);
@@ -35,14 +35,14 @@ do_arena_destroy(unsigned arena_ind) {
 	    "Unexpected mallctlbymib() failure");
 }
 
-static void
+static inline void
 do_epoch(void) {
 	uint64_t epoch = 1;
 	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
 	    0, "Unexpected mallctl() failure");
 }
 
-static void
+static inline void
 do_purge(unsigned arena_ind) {
 	size_t mib[3];
 	size_t miblen = sizeof(mib)/sizeof(size_t);
@@ -53,7 +53,7 @@ do_purge(unsigned arena_ind) {
 	    "Unexpected mallctlbymib() failure");
 }
 
-static void
+static inline void
 do_decay(unsigned arena_ind) {
 	size_t mib[3];
 	size_t miblen = sizeof(mib)/sizeof(size_t);
@@ -64,7 +64,7 @@ do_decay(unsigned arena_ind) {
 	    "Unexpected mallctlbymib() failure");
 }
 
-static uint64_t
+static inline uint64_t
 get_arena_npurge_impl(const char *mibname, unsigned arena_ind) {
 	size_t mib[4];
 	size_t miblen = sizeof(mib)/sizeof(size_t);
@@ -78,32 +78,32 @@ get_arena_npurge_impl(const char *mibname, unsigned arena_ind) {
 	return npurge;
 }
 
-static uint64_t
+static inline uint64_t
 get_arena_dirty_npurge(unsigned arena_ind) {
 	do_epoch();
 	return get_arena_npurge_impl("stats.arenas.0.dirty_npurge", arena_ind);
 }
 
-static uint64_t
+static inline uint64_t
 get_arena_dirty_purged(unsigned arena_ind) {
 	do_epoch();
 	return get_arena_npurge_impl("stats.arenas.0.dirty_purged", arena_ind);
 }
 
-static uint64_t
+static inline uint64_t
 get_arena_muzzy_npurge(unsigned arena_ind) {
 	do_epoch();
 	return get_arena_npurge_impl("stats.arenas.0.muzzy_npurge", arena_ind);
 }
 
-static uint64_t
+static inline uint64_t
 get_arena_npurge(unsigned arena_ind) {
 	do_epoch();
 	return get_arena_npurge_impl("stats.arenas.0.dirty_npurge", arena_ind) +
 	    get_arena_npurge_impl("stats.arenas.0.muzzy_npurge", arena_ind);
 }
 
-static size_t
+static inline size_t
 get_arena_pdirty(unsigned arena_ind) {
 	do_epoch();
 	size_t mib[4];
@@ -118,7 +118,7 @@ get_arena_pdirty(unsigned arena_ind) {
 	return pdirty;
 }
 
-static size_t
+static inline size_t
 get_arena_pmuzzy(unsigned arena_ind) {
 	do_epoch();
 	size_t mib[4];
@@ -133,14 +133,14 @@ get_arena_pmuzzy(unsigned arena_ind) {
 	return pmuzzy;
 }
 
-static void *
+static inline void *
 do_mallocx(size_t size, int flags) {
 	void *p = mallocx(size, flags);
 	expect_ptr_not_null(p, "Unexpected mallocx() failure");
 	return p;
 }
 
-static void
+static inline void
 generate_dirty(unsigned arena_ind, size_t size) {
 	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
 	void *p = do_mallocx(size, flags);
diff --git a/test/unit/retained.c b/test/unit/retained.c
index 76bda50f..53c90f24 100644
--- a/test/unit/retained.c
+++ b/test/unit/retained.c
@@ -104,7 +104,7 @@ TEST_BEGIN(test_retained) {
 
 	arena_ind = do_arena_create(NULL);
 	sz = nallocx(HUGEPAGE, 0);
-	size_t guard_sz = san_enabled() ? PAGE_GUARDS_SIZE : 0;
+	size_t guard_sz = san_enabled() ? SAN_PAGE_GUARDS_SIZE : 0;
 	esz = sz + sz_large_pad + guard_sz;
 
 	atomic_store_u(&epoch, 0, ATOMIC_RELAXED);
diff --git a/test/unit/san.c b/test/unit/san.c
index 93e292f6..eb9ff517 100644
--- a/test/unit/san.c
+++ b/test/unit/san.c
@@ -122,7 +122,7 @@ TEST_BEGIN(test_guarded_decay) {
 	/* Verify that guarded extents as dirty. */
 	size_t sz1 = PAGE, sz2 = PAGE * 2;
 	/* W/o maps_coalesce, guarded extents are unguarded eagerly. */
-	size_t add_guard_size = maps_coalesce ? 0 : PAGE_GUARDS_SIZE;
+	size_t add_guard_size = maps_coalesce ? 0 : SAN_PAGE_GUARDS_SIZE;
 	generate_dirty(arena_ind, sz1);
 	verify_pdirty(arena_ind, sz1 + add_guard_size);
 	verify_pmuzzy(arena_ind, 0);
diff --git a/test/unit/san_bump.c b/test/unit/san_bump.c
new file mode 100644
index 00000000..fbee53e5
--- /dev/null
+++ b/test/unit/san_bump.c
@@ -0,0 +1,111 @@
+#include "test/jemalloc_test.h"
+#include "test/arena_decay.h"
+
+#include "jemalloc/internal/arena_structs.h"
+#include "jemalloc/internal/san_bump.h"
+
+TEST_BEGIN(test_san_bump_alloc) {
+	test_skip_if(!maps_coalesce || !opt_retain);
+
+	tsdn_t *tsdn = tsdn_fetch();
+
+	san_bump_alloc_t sba;
+	san_bump_alloc_init(&sba);
+
+	unsigned arena_ind = do_arena_create(0, 0);
+	assert_u_ne(arena_ind, UINT_MAX, "Failed to create an arena");
+
+	arena_t *arena = arena_get(tsdn, arena_ind, false);
+	pac_t *pac = &arena->pa_shard.pac;
+
+	size_t alloc_size = PAGE * 16;
+	size_t alloc_n = alloc_size / sizeof(unsigned);
+	edata_t* edata = san_bump_alloc(tsdn, &sba, pac, pac_ehooks_get(pac),
+	    alloc_size, /* zero */ false);
+
+	expect_ptr_not_null(edata, "Failed to allocate edata");
+	expect_u_eq(edata_arena_ind_get(edata), arena_ind,
+	    "Edata was assigned an incorrect arena id");
+	expect_zu_eq(edata_size_get(edata), alloc_size,
+	    "Allocated edata of incorrect size");
+	expect_false(edata_slab_get(edata),
+	    "Bump allocator incorrectly assigned 'slab' to true");
+	expect_true(edata_committed_get(edata), "Edata is not committed");
+
+	void *ptr = edata_addr_get(edata);
+	expect_ptr_not_null(ptr, "Edata was assigned an invalid address");
+	/* Test that memory is allocated; no guard pages are misplaced */
+	for (unsigned i = 0; i < alloc_n; ++i) {
+		((unsigned *)ptr)[i] = 1;
+	}
+
+	size_t alloc_size2 = PAGE * 28;
+	size_t alloc_n2 = alloc_size / sizeof(unsigned);
+	edata_t *edata2 = san_bump_alloc(tsdn, &sba, pac, pac_ehooks_get(pac),
+	    alloc_size2, /* zero */ true);
+
+	expect_ptr_not_null(edata2, "Failed to allocate edata");
+	expect_u_eq(edata_arena_ind_get(edata2), arena_ind,
+	    "Edata was assigned an incorrect arena id");
+	expect_zu_eq(edata_size_get(edata2), alloc_size2,
+	    "Allocated edata of incorrect size");
+	expect_false(edata_slab_get(edata2),
+	    "Bump allocator incorrectly assigned 'slab' to true");
+	expect_true(edata_committed_get(edata2), "Edata is not committed");
+
+	void *ptr2 = edata_addr_get(edata2);
+	expect_ptr_not_null(ptr, "Edata was assigned an invalid address");
+
+	uintptr_t ptrdiff = ptr2 > ptr ? (uintptr_t)ptr2 - (uintptr_t)ptr
+	    : (uintptr_t)ptr - (uintptr_t)ptr2;
+	size_t between_allocs = (size_t)ptrdiff - alloc_size;
+
+	expect_zu_ge(between_allocs, PAGE,
+	    "Guard page between allocs is missing");
+
+	for (unsigned i = 0; i < alloc_n2; ++i) {
+		expect_u_eq(((unsigned *)ptr2)[i], 0, "Memory is not zeroed");
+	}
+}
+TEST_END
+
+TEST_BEGIN(test_large_alloc_size) {
+	test_skip_if(!maps_coalesce || !opt_retain);
+
+	tsdn_t *tsdn = tsdn_fetch();
+
+	san_bump_alloc_t sba;
+	san_bump_alloc_init(&sba);
+
+	unsigned arena_ind = do_arena_create(0, 0);
+	assert_u_ne(arena_ind, UINT_MAX, "Failed to create an arena");
+
+	arena_t *arena = arena_get(tsdn, arena_ind, false);
+	pac_t *pac = &arena->pa_shard.pac;
+
+	size_t alloc_size = SBA_RETAINED_ALLOC_SIZE * 2;
+	edata_t* edata = san_bump_alloc(tsdn, &sba, pac, pac_ehooks_get(pac),
+	    alloc_size, /* zero */ false);
+	expect_u_eq(edata_arena_ind_get(edata), arena_ind,
+	    "Edata was assigned an incorrect arena id");
+	expect_zu_eq(edata_size_get(edata), alloc_size,
+	    "Allocated edata of incorrect size");
+	expect_false(edata_slab_get(edata),
+	    "Bump allocator incorrectly assigned 'slab' to true");
+	expect_true(edata_committed_get(edata), "Edata is not committed");
+
+	void *ptr = edata_addr_get(edata);
+	expect_ptr_not_null(ptr, "Edata was assigned an invalid address");
+	/* Test that memory is allocated; no guard pages are misplaced */
+	for (unsigned i = 0; i < alloc_size / PAGE; ++i) {
+		*((char *)ptr + PAGE * i) = 1;
+	}
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_san_bump_alloc,
+	    test_large_alloc_size);
+}

From 2c70e8d3513edc5417a1fa6808350083e5c40f7d Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Mon, 25 Oct 2021 20:19:08 -0700
Subject: [PATCH 2144/2608] Rename 'arena_decay' to 'arena_util'

While initially this file contained helper functions for one particular
test, now its usage spread across different test files. Purpose has
shifted towards a collection of handy arena ctl wrappers.
---
 test/include/test/{arena_decay.h => arena_util.h} | 0
 test/unit/arena_decay.c                           | 2 +-
 test/unit/san.c                                   | 2 +-
 test/unit/san_bump.c                              | 2 +-
 4 files changed, 3 insertions(+), 3 deletions(-)
 rename test/include/test/{arena_decay.h => arena_util.h} (100%)

diff --git a/test/include/test/arena_decay.h b/test/include/test/arena_util.h
similarity index 100%
rename from test/include/test/arena_decay.h
rename to test/include/test/arena_util.h
diff --git a/test/unit/arena_decay.c b/test/unit/arena_decay.c
index bbfd23a5..e991f4dd 100644
--- a/test/unit/arena_decay.c
+++ b/test/unit/arena_decay.c
@@ -1,5 +1,5 @@
 #include "test/jemalloc_test.h"
-#include "test/arena_decay.h"
+#include "test/arena_util.h"
 
 #include "jemalloc/internal/ticker.h"
 
diff --git a/test/unit/san.c b/test/unit/san.c
index eb9ff517..0daa282b 100644
--- a/test/unit/san.c
+++ b/test/unit/san.c
@@ -1,5 +1,5 @@
 #include "test/jemalloc_test.h"
-#include "test/arena_decay.h"
+#include "test/arena_util.h"
 #include "test/san.h"
 
 #include "jemalloc/internal/san.h"
diff --git a/test/unit/san_bump.c b/test/unit/san_bump.c
index fbee53e5..cafa37fe 100644
--- a/test/unit/san_bump.c
+++ b/test/unit/san_bump.c
@@ -1,5 +1,5 @@
 #include "test/jemalloc_test.h"
-#include "test/arena_decay.h"
+#include "test/arena_util.h"
 
 #include "jemalloc/internal/arena_structs.h"
 #include "jemalloc/internal/san_bump.h"

From f56f5b9930a46f919ae40b04acef8200fdd216e9 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Fri, 5 Nov 2021 14:19:39 -0700
Subject: [PATCH 2145/2608] Pass 'frequent_reuse' hint to PAI

Currently used only for guarding purposes, the hint is used to determine
if the allocation is supposed to be frequently reused. For example, it
might urge the allocator to ensure the allocation is cached.
---
 include/jemalloc/internal/pai.h |  9 +++---
 src/hpa.c                       |  5 +--
 src/pa.c                        |  4 +--
 src/pac.c                       |  6 ++--
 src/pai.c                       |  3 +-
 src/sec.c                       | 11 ++++---
 test/unit/hpa.c                 | 16 +++++-----
 test/unit/sec.c                 | 54 ++++++++++++++++++---------------
 8 files changed, 60 insertions(+), 48 deletions(-)

diff --git a/include/jemalloc/internal/pai.h b/include/jemalloc/internal/pai.h
index f8f7d667..d978cd7d 100644
--- a/include/jemalloc/internal/pai.h
+++ b/include/jemalloc/internal/pai.h
@@ -7,7 +7,7 @@ typedef struct pai_s pai_t;
 struct pai_s {
 	/* Returns NULL on failure. */
 	edata_t *(*alloc)(tsdn_t *tsdn, pai_t *self, size_t size,
-	    size_t alignment, bool zero, bool guarded,
+	    size_t alignment, bool zero, bool guarded, bool frequent_reuse,
 	    bool *deferred_work_generated);
 	/*
 	 * Returns the number of extents added to the list (which may be fewer
@@ -37,10 +37,11 @@ struct pai_s {
  */
 
 static inline edata_t *
-pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
-    bool guarded, bool *deferred_work_generated) {
+pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
+    bool zero, bool guarded, bool frequent_reuse,
+    bool *deferred_work_generated) {
 	return self->alloc(tsdn, self, size, alignment, zero, guarded,
-	    deferred_work_generated);
+	    frequent_reuse, deferred_work_generated);
 }
 
 static inline size_t
diff --git a/src/hpa.c b/src/hpa.c
index caf122b7..0a7ec19e 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -9,7 +9,8 @@
 #define HPA_EDEN_SIZE (128 * HUGEPAGE)
 
 static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t alignment, bool zero, bool guarded, bool *deferred_work_generated);
+    size_t alignment, bool zero, bool guarded, bool frequent_reuse,
+    bool *deferred_work_generated);
 static size_t hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t nallocs, edata_list_active_t *results, bool *deferred_work_generated);
 static bool hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
@@ -760,7 +761,7 @@ hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
 
 static edata_t *
 hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
-    bool guarded, bool *deferred_work_generated) {
+    bool guarded, bool frequent_reuse, bool *deferred_work_generated) {
 	assert((size & PAGE_MASK) == 0);
 	assert(!guarded);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
diff --git a/src/pa.c b/src/pa.c
index 9004cc90..0f95e93a 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -128,7 +128,7 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 	edata_t *edata = NULL;
 	if (!guarded && pa_shard_uses_hpa(shard)) {
 		edata = pai_alloc(tsdn, &shard->hpa_sec.pai, size, alignment,
-		    zero, /* guarded */ false, deferred_work_generated);
+		    zero, /* guarded */ false, slab, deferred_work_generated);
 	}
 	/*
 	 * Fall back to the PAC if the HPA is off or couldn't serve the given
@@ -136,7 +136,7 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 	 */
 	if (edata == NULL) {
 		edata = pai_alloc(tsdn, &shard->pac.pai, size, alignment, zero,
-		    guarded, deferred_work_generated);
+		    guarded, slab, deferred_work_generated);
 	}
 	if (edata != NULL) {
 		assert(edata_size_get(edata) == size);
diff --git a/src/pac.c b/src/pac.c
index 914cec90..e1f60025 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -5,7 +5,8 @@
 #include "jemalloc/internal/san.h"
 
 static edata_t *pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t alignment, bool zero, bool guarded, bool *deferred_work_generated);
+    size_t alignment, bool zero, bool guarded, bool frequent_reuse,
+    bool *deferred_work_generated);
 static bool pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
     size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
 static bool pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
@@ -152,7 +153,8 @@ pac_alloc_new_guarded(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
 
 static edata_t *
 pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
-    bool zero, bool guarded, bool *deferred_work_generated) {
+    bool zero, bool guarded, bool frequent_reuse,
+    bool *deferred_work_generated) {
 	pac_t *pac = (pac_t *)self;
 	ehooks_t *ehooks = pac_ehooks_get(pac);
 
diff --git a/src/pai.c b/src/pai.c
index 86b8ee5b..45c87729 100644
--- a/src/pai.c
+++ b/src/pai.c
@@ -7,7 +7,8 @@ pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
 	for (size_t i = 0; i < nallocs; i++) {
 		bool deferred_by_alloc = false;
 		edata_t *edata = pai_alloc(tsdn, self, size, PAGE,
-		    /* zero */ false, /* guarded */ false, &deferred_by_alloc);
+		    /* zero */ false, /* guarded */ false,
+		    /* frequent_reuse */ false, &deferred_by_alloc);
 		*deferred_work_generated |= deferred_by_alloc;
 		if (edata == NULL) {
 			return i;
diff --git a/src/sec.c b/src/sec.c
index d99c4439..0c4e7032 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -4,7 +4,8 @@
 #include "jemalloc/internal/sec.h"
 
 static edata_t *sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t alignment, bool zero, bool guarded, bool *deferred_work_generated);
+    size_t alignment, bool zero, bool guarded, bool frequent_reuse,
+    bool *deferred_work_generated);
 static bool sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
     size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
 static bool sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
@@ -218,7 +219,7 @@ sec_batch_fill_and_alloc(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
 
 static edata_t *
 sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
-    bool guarded, bool *deferred_work_generated) {
+    bool guarded, bool frequent_reuse, bool *deferred_work_generated) {
 	assert((size & PAGE_MASK) == 0);
 	assert(!guarded);
 
@@ -227,7 +228,8 @@ sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
 	if (zero || alignment > PAGE || sec->opts.nshards == 0
 	    || size > sec->opts.max_alloc) {
 		return pai_alloc(tsdn, sec->fallback, size, alignment, zero,
-		    /* guarded */ false, deferred_work_generated);
+		    /* guarded */ false, frequent_reuse,
+		    deferred_work_generated);
 	}
 	pszind_t pszind = sz_psz2ind(size);
 	sec_shard_t *shard = sec_shard_pick(tsdn, sec);
@@ -250,7 +252,8 @@ sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
 			    size);
 		} else {
 			edata = pai_alloc(tsdn, sec->fallback, size, alignment,
-			    zero, /* guarded */ false, deferred_work_generated);
+			    zero, /* guarded */ false, frequent_reuse,
+			    deferred_work_generated);
 		}
 	}
 	return edata;
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index a63d51d4..25ee1950 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -81,10 +81,10 @@ TEST_BEGIN(test_alloc_max) {
 	/* Small max */
 	bool deferred_work_generated = false;
 	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX, PAGE, false, false,
-	    &deferred_work_generated);
+	    false, &deferred_work_generated);
 	expect_ptr_not_null(edata, "Allocation of small max failed");
 	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX + PAGE, PAGE, false,
-	    false, &deferred_work_generated);
+	    false, false, &deferred_work_generated);
 	expect_ptr_null(edata, "Allocation of larger than small max succeeded");
 
 	destroy_test_data(shard);
@@ -188,7 +188,7 @@ TEST_BEGIN(test_stress) {
 			size_t npages = npages_min + prng_range_zu(&prng_state,
 			    npages_max - npages_min);
 			edata_t *edata = pai_alloc(tsdn, &shard->pai,
-			    npages * PAGE, PAGE, false, false,
+			    npages * PAGE, PAGE, false, false, false,
 			    &deferred_work_generated);
 			assert_ptr_not_null(edata,
 			    "Unexpected allocation failure");
@@ -264,7 +264,7 @@ TEST_BEGIN(test_alloc_dalloc_batch) {
 	for (size_t i = 0; i < NALLOCS / 2; i++) {
 		allocs[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE,
 		    /* zero */ false, /* guarded */ false,
-		    &deferred_work_generated);
+		    /* frequent_reuse */ false, &deferred_work_generated);
 		expect_ptr_not_null(allocs[i], "Unexpected alloc failure");
 	}
 	edata_list_active_t allocs_list;
@@ -300,8 +300,8 @@ TEST_BEGIN(test_alloc_dalloc_batch) {
 	/* Reallocate (individually), and ensure reuse and contiguity. */
 	for (size_t i = 0; i < NALLOCS; i++) {
 		allocs[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false,
-		    &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
+		    false, &deferred_work_generated);
 		expect_ptr_not_null(allocs[i], "Unexpected alloc failure.");
 	}
 	void *new_base = edata_base_get(allocs[0]);
@@ -376,7 +376,7 @@ TEST_BEGIN(test_defer_time) {
 	edata_t *edatas[HUGEPAGE_PAGES];
 	for (int i = 0; i < (int)HUGEPAGE_PAGES; i++) {
 		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
-		    false, &deferred_work_generated);
+		    false, false, &deferred_work_generated);
 		expect_ptr_not_null(edatas[i], "Unexpected null edata");
 	}
 	hpa_shard_do_deferred_work(tsdn, shard);
@@ -410,7 +410,7 @@ TEST_BEGIN(test_defer_time) {
 	 */
 	for (int i = 0; i < (int)HUGEPAGE_PAGES / 2; i++) {
 		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
-		    false, &deferred_work_generated);
+		    false, false, &deferred_work_generated);
 		expect_ptr_not_null(edatas[i], "Unexpected null edata");
 	}
 	/*
diff --git a/test/unit/sec.c b/test/unit/sec.c
index 8ac3411c..e98bdc92 100644
--- a/test/unit/sec.c
+++ b/test/unit/sec.c
@@ -50,7 +50,7 @@ test_sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t max_alloc,
 
 static inline edata_t *
 pai_test_allocator_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t alignment, bool zero, bool guarded,
+    size_t alignment, bool zero, bool guarded, bool frequent_reuse,
     bool *deferred_work_generated) {
 	assert(!guarded);
 	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
@@ -178,12 +178,12 @@ TEST_BEGIN(test_reuse) {
 	    /* max_bytes */ 2 * (NALLOCS * PAGE + NALLOCS * 2 * PAGE));
 	for (int i = 0; i < NALLOCS; i++) {
 		one_page[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false,
-		    &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
+		    false, &deferred_work_generated);
 		expect_ptr_not_null(one_page[i], "Unexpected alloc failure");
 		two_page[i] = pai_alloc(tsdn, &sec.pai, 2 * PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false,
-		    &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
+		    false, &deferred_work_generated);
 		expect_ptr_not_null(one_page[i], "Unexpected alloc failure");
 	}
 	expect_zu_eq(0, ta.alloc_count, "Should be using batch allocs");
@@ -214,11 +214,11 @@ TEST_BEGIN(test_reuse) {
 	 */
 	for (int i = 0; i < NALLOCS; i++) {
 		edata_t *alloc1 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false,
-		    &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
+		    false, &deferred_work_generated);
 		edata_t *alloc2 = pai_alloc(tsdn, &sec.pai, 2 * PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false,
-		    &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
+		    false, &deferred_work_generated);
 		expect_ptr_eq(one_page[i], alloc1,
 		    "Got unexpected allocation");
 		expect_ptr_eq(two_page[i], alloc2,
@@ -255,12 +255,13 @@ TEST_BEGIN(test_auto_flush) {
 	    /* max_bytes */ NALLOCS * PAGE);
 	for (int i = 0; i < NALLOCS; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false,
-		    &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
+		    false, &deferred_work_generated);
 		expect_ptr_not_null(allocs[i], "Unexpected alloc failure");
 	}
 	extra_alloc = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false,
-	    /* guarded */ false, &deferred_work_generated);
+	    /* guarded */ false, /* frequent_reuse */ false,
+	    &deferred_work_generated);
 	expect_ptr_not_null(extra_alloc, "Unexpected alloc failure");
 	size_t max_allocs = ta.alloc_count + ta.alloc_batch_count;
 	expect_zu_le(NALLOCS + 1, max_allocs,
@@ -311,8 +312,8 @@ do_disable_flush_test(bool is_disable) {
 	    /* max_bytes */ NALLOCS * PAGE);
 	for (int i = 0; i < NALLOCS; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false,
-		    &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
+		    false, &deferred_work_generated);
 		expect_ptr_not_null(allocs[i], "Unexpected alloc failure");
 	}
 	/* Free all but the last aloc. */
@@ -386,7 +387,7 @@ TEST_BEGIN(test_max_alloc_respected) {
 		    "Incorrect number of deallocations");
 		edata_t *edata = pai_alloc(tsdn, &sec.pai, attempted_alloc,
 		    PAGE, /* zero */ false, /* guarded */ false,
-		    &deferred_work_generated);
+		    /* frequent_reuse */ false, &deferred_work_generated);
 		expect_ptr_not_null(edata, "Unexpected alloc failure");
 		expect_zu_eq(i + 1, ta.alloc_count,
 		    "Incorrect number of allocations");
@@ -413,7 +414,7 @@ TEST_BEGIN(test_expand_shrink_delegate) {
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ 10 * PAGE,
 	    /* max_bytes */ 1000 * PAGE);
 	edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-	    /* zero */ false, /* guarded */ false,
+	    /* zero */ false, /* guarded */ false, /* frequent_reuse */ false,
 	    &deferred_work_generated);
 	expect_ptr_not_null(edata, "Unexpected alloc failure");
 
@@ -454,7 +455,7 @@ TEST_BEGIN(test_nshards_0) {
 
 	bool deferred_work_generated = false;
 	edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-	    /* zero */ false, /* guarded */ false,
+	    /* zero */ false, /* guarded */ false, /* frequent_reuse */ false,
 	    &deferred_work_generated);
 	pai_dalloc(tsdn, &sec.pai, edata, &deferred_work_generated);
 
@@ -497,8 +498,8 @@ TEST_BEGIN(test_stats_simple) {
 	edata_t *allocs[FLUSH_PAGES];
 	for (size_t i = 0; i < FLUSH_PAGES; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false,
-		    &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
+		    false, &deferred_work_generated);
 		expect_stats_pages(tsdn, &sec, 0);
 	}
 
@@ -512,6 +513,7 @@ TEST_BEGIN(test_stats_simple) {
 		for (size_t j = 0; j < FLUSH_PAGES / 2; j++) {
 			allocs[j] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
 			    /* zero */ false, /* guarded */ false,
+			    /* frequent_reuse */ false,
 			    &deferred_work_generated);
 			expect_stats_pages(tsdn, &sec, FLUSH_PAGES / 2 - j - 1);
 		}
@@ -541,14 +543,16 @@ TEST_BEGIN(test_stats_auto_flush) {
 	bool deferred_work_generated = false;
 
 	extra_alloc0 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false,
-	    /* guarded */ false, &deferred_work_generated);
+	    /* guarded */ false, /* frequent_reuse */ false,
+	    &deferred_work_generated);
 	extra_alloc1 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false,
-	    /* guarded */ false, &deferred_work_generated);
+	    /* guarded */ false, /* frequent_reuse */ false,
+	    &deferred_work_generated);
 
 	for (size_t i = 0; i < 2 * FLUSH_PAGES; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false,
-		    &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
+		    false, &deferred_work_generated);
 	}
 
 	for (size_t i = 0; i < FLUSH_PAGES; i++) {
@@ -588,8 +592,8 @@ TEST_BEGIN(test_stats_manual_flush) {
 	edata_t *allocs[FLUSH_PAGES];
 	for (size_t i = 0; i < FLUSH_PAGES; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false,
-		    &deferred_work_generated);
+		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
+		    false, &deferred_work_generated);
 		expect_stats_pages(tsdn, &sec, 0);
 	}
 

From 800ce49c19bc105199cf645172f1e462d70d77c4 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Thu, 28 Oct 2021 12:08:10 -0700
Subject: [PATCH 2146/2608] San: Bump alloc frequently reused guarded
 allocations

To utilize a separate retained area for guarded extents, use bump alloc
to allocate those extents.
---
 include/jemalloc/internal/emap.h     |  1 +
 include/jemalloc/internal/extent.h   |  1 +
 include/jemalloc/internal/pac.h      |  3 ++
 include/jemalloc/internal/san_bump.h | 31 ++++++++++++++++--
 src/arena.c                          |  7 ++--
 src/emap.c                           |  1 +
 src/extent.c                         | 32 +++++++++++++------
 src/pac.c                            | 48 +++++++++++++++++++---------
 src/san_bump.c                       | 16 +---------
 test/unit/san.c                      | 22 +++++++------
 10 files changed, 106 insertions(+), 56 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 87ece63d..847af327 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -208,6 +208,7 @@ extent_assert_can_coalesce(const edata_t *inner, const edata_t *outer) {
 	assert(edata_committed_get(inner) == edata_committed_get(outer));
 	assert(edata_state_get(inner) == extent_state_active);
 	assert(edata_state_get(outer) == extent_state_merging);
+	assert(!edata_guarded_get(inner) && !edata_guarded_get(outer));
 	assert(edata_base_get(inner) == edata_past_get(outer) ||
 	    edata_base_get(outer) == edata_past_get(inner));
 }
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 73059ad2..1660f45f 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -127,6 +127,7 @@ extent_can_acquire_neighbor(edata_t *edata, rtree_contents_t contents,
 			return false;
 		}
 	}
+	assert(!edata_guarded_get(edata) && !edata_guarded_get(neighbor));
 
 	return true;
 }
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index 7eaaf894..01c4e6af 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -99,6 +99,9 @@ struct pac_s {
 	exp_grow_t exp_grow;
 	malloc_mutex_t grow_mtx;
 
+	/* Special allocator for guarded frequently reused extents. */
+	san_bump_alloc_t sba;
+
 	/* How large extents should be before getting auto-purged. */
 	atomic_zu_t oversize_threshold;
 
diff --git a/include/jemalloc/internal/san_bump.h b/include/jemalloc/internal/san_bump.h
index 9c6c224f..8ec4a710 100644
--- a/include/jemalloc/internal/san_bump.h
+++ b/include/jemalloc/internal/san_bump.h
@@ -5,7 +5,9 @@
 #include "jemalloc/internal/exp_grow.h"
 #include "jemalloc/internal/mutex.h"
 
-extern const size_t SBA_RETAINED_ALLOC_SIZE;
+#define SBA_RETAINED_ALLOC_SIZE ((size_t)4 << 20)
+
+extern bool opt_retain;
 
 typedef struct ehooks_s ehooks_t;
 typedef struct pac_s pac_t;
@@ -17,8 +19,31 @@ struct san_bump_alloc_s {
 	edata_t *curr_reg;
 };
 
-bool
-san_bump_alloc_init(san_bump_alloc_t* sba);
+static inline bool
+san_bump_enabled() {
+	/*
+	 * We enable san_bump allocator only when it's possible to break up a
+	 * mapping and unmap a part of it (maps_coalesce). This is needed to
+	 * ensure the arena destruction process can destroy all retained guarded
+	 * extents one by one and to unmap a trailing part of a retained guarded
+	 * region when it's too small to fit a pending allocation.
+	 * opt_retain is required, because this allocator retains a large
+	 * virtual memory mapping and returns smaller parts of it.
+	 */
+	return maps_coalesce && opt_retain;
+}
+
+static inline bool
+san_bump_alloc_init(san_bump_alloc_t* sba) {
+	bool err = malloc_mutex_init(&sba->mtx, "sanitizer_bump_allocator",
+	    WITNESS_RANK_SAN_BUMP_ALLOC, malloc_mutex_rank_exclusive);
+	if (err) {
+		return true;
+	}
+	sba->curr_reg = NULL;
+
+	return false;
+}
 
 edata_t *
 san_bump_alloc(tsdn_t *tsdn, san_bump_alloc_t* sba, pac_t *pac, ehooks_t *ehooks,
diff --git a/src/arena.c b/src/arena.c
index 19e4e85a..121832a7 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -328,8 +328,8 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	szind_t szind = sz_size2index(usize);
 	size_t esize = usize + sz_large_pad;
 
-	bool guarded = san_large_extent_decide_guard(tsdn, arena_get_ehooks(arena),
-	    esize, alignment);
+	bool guarded = san_large_extent_decide_guard(tsdn,
+	    arena_get_ehooks(arena), esize, alignment);
 	edata_t *edata = pa_alloc(tsdn, &arena->pa_shard, esize, alignment,
 	    /* slab */ false, szind, zero, guarded, &deferred_work_generated);
 	assert(deferred_work_generated == false);
@@ -829,7 +829,8 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	bool guarded = san_slab_extent_decide_guard(tsdn, arena_get_ehooks(arena));
+	bool guarded = san_slab_extent_decide_guard(tsdn,
+	    arena_get_ehooks(arena));
 	edata_t *slab = pa_alloc(tsdn, &arena->pa_shard, bin_info->slab_size,
 	    /* alignment */ PAGE, /* slab */ true, /* szind */ binind,
 	     /* zero */ false, guarded, &deferred_work_generated);
diff --git a/src/emap.c b/src/emap.c
index e37fea38..9cc95a72 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -44,6 +44,7 @@ emap_try_acquire_edata_neighbor_impl(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
     bool expanding) {
 	witness_assert_positive_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE);
+	assert(!edata_guarded_get(edata));
 	assert(!expanding || forward);
 	assert(!edata_state_in_transition(expected_state));
 	assert(expected_state == extent_state_dirty ||
diff --git a/src/extent.c b/src/extent.c
index 13d688d1..6fabcc7d 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -87,6 +87,7 @@ ecache_alloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 	edata_t *edata = extent_recycle(tsdn, pac, ehooks, ecache, expand_edata,
 	    size, alignment, zero, &commit, false, guarded);
 	assert(edata == NULL || edata_pai_get(edata) == EXTENT_PAI_PAC);
+	assert(edata == NULL || edata_guarded_get(edata) == guarded);
 	return edata;
 }
 
@@ -179,7 +180,7 @@ ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 			goto label_return;
 		}
 		eset_remove(eset, edata);
-		if (!ecache->delay_coalesce) {
+		if (!ecache->delay_coalesce || edata_guarded_get(edata)) {
 			break;
 		}
 		/* Try to coalesce. */
@@ -399,11 +400,6 @@ extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 			}
 		}
 	} else {
-		/*
-		 * If split and merge are not allowed (Windows w/o retain), try
-		 * exact fit only.
-		 */
-		bool exact_only = (!maps_coalesce && !opt_retain) || guarded;
 		/*
 		 * A large extent might be broken up from its original size to
 		 * some small size to satisfy a small request.  When that small
@@ -415,7 +411,18 @@ extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		 */
 		unsigned lg_max_fit = ecache->delay_coalesce
 		    ? (unsigned)opt_lg_extent_max_active_fit : SC_PTR_BITS;
-		edata = eset_fit(eset, size, alignment, exact_only, lg_max_fit);
+
+		/*
+		 * If split and merge are not allowed (Windows w/o retain), try
+		 * exact fit only.
+		 *
+		 * For simplicity purposes, splitting guarded extents is not
+		 * supported.  Hence, we do only exact fit for guarded
+		 * allocations.
+		 */
+		bool exact_only = (!maps_coalesce && !opt_retain) || guarded;
+		edata = eset_fit(eset, size, alignment, exact_only,
+		    lg_max_fit);
 	}
 	if (edata == NULL) {
 		return NULL;
@@ -474,6 +481,7 @@ extent_split_interior(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 
 	/* Split the lead. */
 	if (leadsize != 0) {
+		assert(!edata_guarded_get(*edata));
 		*lead = *edata;
 		*edata = extent_split_impl(tsdn, pac, ehooks, *lead, leadsize,
 		    size + trailsize, /* holding_core_locks*/ true);
@@ -486,6 +494,7 @@ extent_split_interior(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 
 	/* Split the trail. */
 	if (trailsize != 0) {
+		assert(!edata_guarded_get(*edata));
 		*trail = extent_split_impl(tsdn, pac, ehooks, *edata, size,
 		    trailsize, /* holding_core_locks */ true);
 		if (*trail == NULL) {
@@ -510,6 +519,7 @@ static edata_t *
 extent_recycle_split(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *expand_edata, size_t size, size_t alignment,
     edata_t *edata, bool growing_retained) {
+	assert(!edata_guarded_get(edata) || size == edata_size_get(edata));
 	malloc_mutex_assert_owner(tsdn, &ecache->mtx);
 
 	edata_t *lead;
@@ -576,8 +586,10 @@ extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 	assert(!guarded || expand_edata == NULL);
+	assert(!guarded || alignment <= PAGE);
 
 	malloc_mutex_lock(tsdn, &ecache->mtx);
+
 	edata_t *edata = extent_recycle_extract(tsdn, pac, ehooks, ecache,
 	    expand_edata, size, alignment, guarded);
 	if (edata == NULL) {
@@ -746,7 +758,6 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		size_t size = edata_size_get(edata);
 		ehooks_zero(tsdn, ehooks, addr, size);
 	}
-
 	return edata;
 label_err:
 	malloc_mutex_unlock(tsdn, &pac->grow_mtx);
@@ -801,6 +812,7 @@ extent_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 static edata_t *
 extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool *coalesced) {
+	assert(!edata_guarded_get(edata));
 	/*
 	 * We avoid checking / locking inactive neighbors for large size
 	 * classes, since they are eagerly coalesced on deallocation which can
@@ -907,7 +919,7 @@ extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 		goto label_skip_coalesce;
 	}
 	if (!ecache->delay_coalesce) {
-		edata = extent_try_coalesce(tsdn, pac,  ehooks, ecache, edata,
+		edata = extent_try_coalesce(tsdn, pac, ehooks, ecache, edata,
 		    NULL);
 	} else if (edata_size_get(edata) >= SC_LARGE_MINCLASS) {
 		assert(ecache == &pac->ecache_dirty);
@@ -1014,7 +1026,7 @@ extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 
 	/* Avoid calling the default extent_dalloc unless have to. */
 	if (!ehooks_dalloc_will_fail(ehooks)) {
-		/* Restore guard pages for dalloc / unmap. */
+		/* Remove guard pages for dalloc / unmap. */
 		if (edata_guarded_get(edata)) {
 			assert(ehooks_are_default(ehooks));
 			san_unguard_pages_two_sided(tsdn, ehooks, edata,
diff --git a/src/pac.c b/src/pac.c
index e1f60025..c6d9f146 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -81,6 +81,9 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
 	if (decay_init(&pac->decay_muzzy, cur_time, muzzy_decay_ms)) {
 		return true;
 	}
+	if (san_bump_alloc_init(&pac->sba)) {
+		return true;
+	}
 
 	pac->base = base;
 	pac->emap = emap;
@@ -132,18 +135,24 @@ pac_alloc_real(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
 
 static edata_t *
 pac_alloc_new_guarded(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
-    size_t alignment, bool zero) {
+    size_t alignment, bool zero, bool frequent_reuse) {
 	assert(alignment <= PAGE);
 
-	size_t size_with_guards = size + SAN_PAGE_GUARDS_SIZE;
-	/* Alloc a non-guarded extent first.*/
-	edata_t *edata = pac_alloc_real(tsdn, pac, ehooks, size_with_guards,
-	    /* alignment */ PAGE, zero, /* guarded */ false);
-	if (edata != NULL) {
-		/* Add guards around it. */
-		assert(edata_size_get(edata) == size_with_guards);
-		san_guard_pages(tsdn, ehooks, edata, pac->emap, true, true,
-		    true);
+	edata_t *edata;
+	if (san_bump_enabled() && frequent_reuse) {
+		edata = san_bump_alloc(tsdn, &pac->sba, pac, ehooks, size,
+		    zero);
+	} else {
+		size_t size_with_guards = san_two_side_guarded_sz(size);
+		/* Alloc a non-guarded extent first.*/
+		edata = pac_alloc_real(tsdn, pac, ehooks, size_with_guards,
+		    /* alignment */ PAGE, zero, /* guarded */ false);
+		if (edata != NULL) {
+			/* Add guards around it. */
+			assert(edata_size_get(edata) == size_with_guards);
+			san_guard_pages_two_sided(tsdn, ehooks, edata,
+			    pac->emap, true);
+		}
 	}
 	assert(edata == NULL || (edata_guarded_get(edata) &&
 	    edata_size_get(edata) == size));
@@ -158,12 +167,21 @@ pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
 	pac_t *pac = (pac_t *)self;
 	ehooks_t *ehooks = pac_ehooks_get(pac);
 
-	edata_t *edata = pac_alloc_real(tsdn, pac, ehooks, size, alignment,
-	    zero, guarded);
+	edata_t *edata = NULL;
+	/*
+	 * The condition is an optimization - not frequently reused guarded
+	 * allocations are never put in the ecache.  pac_alloc_real also
+	 * doesn't grow retained for guarded allocations.  So pac_alloc_real
+	 * for such allocations would always return NULL.
+	 * */
+	if (!guarded || frequent_reuse) {
+		edata =	pac_alloc_real(tsdn, pac, ehooks, size, alignment,
+		    zero, guarded);
+	}
 	if (edata == NULL && guarded) {
 		/* No cached guarded extents; creating a new one. */
 		edata = pac_alloc_new_guarded(tsdn, pac, ehooks, size,
-		    alignment, zero);
+		    alignment, zero, frequent_reuse);
 	}
 
 	return edata;
@@ -189,8 +207,8 @@ pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 	}
 	if (trail == NULL) {
 		trail = ecache_alloc_grow(tsdn, pac, ehooks,
-		    &pac->ecache_retained, edata, expand_amount, PAGE,
-		    zero, /* guarded */ false);
+		    &pac->ecache_retained, edata, expand_amount, PAGE, zero,
+		    /* guarded */ false);
 		mapped_add = expand_amount;
 	}
 	if (trail == NULL) {
diff --git a/src/san_bump.c b/src/san_bump.c
index 6098bd95..1a94e55d 100644
--- a/src/san_bump.c
+++ b/src/san_bump.c
@@ -7,28 +7,14 @@
 #include "jemalloc/internal/ehooks.h"
 #include "jemalloc/internal/edata_cache.h"
 
-const size_t SBA_RETAINED_ALLOC_SIZE = 1024 * 1024 * 4; /* 4 MB */
-
 static bool
 san_bump_grow_locked(tsdn_t *tsdn, san_bump_alloc_t *sba, pac_t *pac,
     ehooks_t *ehooks, size_t size);
 
-bool
-san_bump_alloc_init(san_bump_alloc_t* sba) {
-	bool err = malloc_mutex_init(&sba->mtx, "sanitizer_bump_allocator",
-	    WITNESS_RANK_SAN_BUMP_ALLOC, malloc_mutex_rank_exclusive);
-	if (err) {
-		return true;
-	}
-	sba->curr_reg = NULL;
-
-	return false;
-}
-
 edata_t *
 san_bump_alloc(tsdn_t *tsdn, san_bump_alloc_t* sba, pac_t *pac,
     ehooks_t *ehooks, size_t size, bool zero) {
-	assert(maps_coalesce && opt_retain);
+	assert(san_bump_enabled());
 
 	edata_t* to_destroy;
 	size_t guarded_size = san_one_side_guarded_sz(size);
diff --git a/test/unit/san.c b/test/unit/san.c
index 0daa282b..5b98f52e 100644
--- a/test/unit/san.c
+++ b/test/unit/san.c
@@ -13,6 +13,11 @@ verify_extent_guarded(tsdn_t *tsdn, void *ptr) {
 #define MAX_SMALL_ALLOCATIONS 4096
 void *small_alloc[MAX_SMALL_ALLOCATIONS];
 
+/*
+ * This test allocates page sized slabs and checks that every two slabs have
+ * at least one page in between them. That page is supposed to be the guard
+ * page.
+ */
 TEST_BEGIN(test_guarded_small) {
 	test_skip_if(opt_prof);
 
@@ -21,7 +26,8 @@ TEST_BEGIN(test_guarded_small) {
 	VARIABLE_ARRAY(uintptr_t, pages, npages);
 
 	/* Allocate to get sanitized pointers. */
-	size_t sz = PAGE / 8;
+	size_t slab_sz = PAGE;
+	size_t sz = slab_sz / 8;
 	unsigned n_alloc = 0;
 	while (n_alloc < MAX_SMALL_ALLOCATIONS) {
 		void *ptr = malloc(sz);
@@ -50,8 +56,9 @@ TEST_BEGIN(test_guarded_small) {
 		for (unsigned j = i + 1; j < npages; j++) {
 			uintptr_t ptr_diff = pages[i] > pages[j] ?
 			    pages[i] - pages[j] : pages[j] - pages[i];
-			expect_zu_gt((size_t)ptr_diff, 2 * PAGE,
-			    "Pages should not be next to each other.");
+			expect_zu_ge((size_t)ptr_diff, slab_sz + PAGE,
+			    "There should be at least one pages between "
+			    "guarded slabs");
 		}
 	}
 
@@ -76,20 +83,15 @@ TEST_BEGIN(test_guarded_large) {
 	}
 
 	/* Verify the pages are not continuous, i.e. separated by guards. */
-	uintptr_t min_diff = (uintptr_t)-1;
 	for (unsigned i = 0; i < nlarge; i++) {
 		for (unsigned j = i + 1; j < nlarge; j++) {
 			uintptr_t ptr_diff = large[i] > large[j] ?
 			    large[i] - large[j] : large[j] - large[i];
 			expect_zu_ge((size_t)ptr_diff, large_sz + 2 * PAGE,
-			    "Pages should not be next to each other.");
-			if (ptr_diff < min_diff) {
-				min_diff = ptr_diff;
-			}
+			    "There should be at least two pages between "
+			    " guarded large allocations");
 		}
 	}
-	expect_zu_ge((size_t)min_diff, large_sz + 2 * PAGE,
-	    "Pages should not be next to each other.");
 
 	for (unsigned i = 0; i < nlarge; i++) {
 		free((void *)large[i]);

From d90655390f5192d53723023667b57453ba23e676 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Mon, 22 Nov 2021 16:57:56 -0800
Subject: [PATCH 2147/2608] San: Create a function for committing and zeroing

Committing and zeroing an extent is usually done together, hence a new
function.
---
 include/jemalloc/internal/extent.h |  4 ++-
 src/extent.c                       | 56 +++++++++++++++++++-----------
 src/san_bump.c                     | 19 +++-------
 3 files changed, 43 insertions(+), 36 deletions(-)

diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 1660f45f..7336e8b8 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -43,7 +43,7 @@ void extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 void extent_destroy_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata);
 bool extent_commit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    size_t offset, size_t length, bool growing_retained);
+    size_t offset, size_t length);
 bool extent_decommit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length);
 bool extent_purge_lazy_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
@@ -55,6 +55,8 @@ edata_t *extent_split_wrapper(tsdn_t *tsdn, pac_t *pac,
     bool holding_core_locks);
 bool extent_merge_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *a, edata_t *b);
+bool extent_commit_zero(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    bool commit, bool zero, bool growing_retained);
 size_t extent_sn_next(pac_t *pac);
 bool extent_boot(void);
 
diff --git a/src/extent.c b/src/extent.c
index 6fabcc7d..4bbbff38 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -604,26 +604,20 @@ extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 		return NULL;
 	}
 
-	if (*commit && !edata_committed_get(edata)) {
-		if (extent_commit_impl(tsdn, ehooks, edata, 0,
-		    edata_size_get(edata), growing_retained)) {
-			extent_record(tsdn, pac, ehooks, ecache, edata);
-			return NULL;
-		}
-	}
-
-	if (edata_committed_get(edata)) {
-		*commit = true;
-	}
-
 	assert(edata_state_get(edata) == extent_state_active);
-
-	if (zero) {
-		void *addr = edata_base_get(edata);
-		if (!edata_zeroed_get(edata)) {
-			size_t size = edata_size_get(edata);
-			ehooks_zero(tsdn, ehooks, addr, size);
-		}
+	if (extent_commit_zero(tsdn, ehooks, edata, *commit, zero,
+	    growing_retained)) {
+		extent_record(tsdn, pac, ehooks, ecache, edata);
+		return NULL;
+	}
+	if (edata_committed_get(edata)) {
+		/*
+		 * This reverses the purpose of this variable - previously it
+		 * was treated as an input parameter, now it turns into an
+		 * output parameter, reporting if the edata has actually been
+		 * committed.
+		 */
+		*commit = true;
 	}
 	return edata;
 }
@@ -1106,9 +1100,9 @@ extent_commit_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 
 bool
 extent_commit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    size_t offset, size_t length, bool growing_retained) {
+    size_t offset, size_t length) {
 	return extent_commit_impl(tsdn, ehooks, edata, offset, length,
-	    growing_retained);
+	    /* growing_retained */ false);
 }
 
 bool
@@ -1287,6 +1281,26 @@ extent_merge_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	    /* holding_core_locks */ false);
 }
 
+bool
+extent_commit_zero(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+    bool commit, bool zero, bool growing_retained) {
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
+
+	if (commit && !edata_committed_get(edata)) {
+		if (extent_commit_impl(tsdn, ehooks, edata, 0,
+		    edata_size_get(edata), growing_retained)) {
+			return true;
+		}
+	}
+	if (zero && !edata_zeroed_get(edata)) {
+		void *addr = edata_base_get(edata);
+		size_t size = edata_size_get(edata);
+		ehooks_zero(tsdn, ehooks, addr, size);
+	}
+	return false;
+}
+
 bool
 extent_boot(void) {
 	assert(sizeof(slab_data_t) >= sizeof(e_prof_info_t));
diff --git a/src/san_bump.c b/src/san_bump.c
index 1a94e55d..88897455 100644
--- a/src/san_bump.c
+++ b/src/san_bump.c
@@ -68,20 +68,11 @@ san_bump_alloc(tsdn_t *tsdn, san_bump_alloc_t* sba, pac_t *pac,
 	san_guard_pages(tsdn, ehooks, edata, pac->emap, /* left */ false,
 	    /* right */ true, /* remap */ true);
 
-	if (!edata_committed_get(edata)) {
-		if (extent_commit_wrapper(tsdn, ehooks, edata, 0,
-		    edata_size_get(edata), true)) {
-			extent_record(tsdn, pac, ehooks, &pac->ecache_retained,
-			    edata);
-			return NULL;
-		}
-		edata_committed_set(edata, true);
-	}
-	if (zero && !edata_zeroed_get(edata)) {
-		void *addr = edata_base_get(edata);
-		size_t size = edata_size_get(edata);
-		ehooks_zero(tsdn, ehooks, addr, size);
-		edata_zeroed_set(edata, true);
+	if (extent_commit_zero(tsdn, ehooks, edata, /* commit */ true, zero,
+	    /* growing_retained */ false)) {
+		extent_record(tsdn, pac, ehooks, &pac->ecache_retained,
+		    edata);
+		return NULL;
 	}
 
 	if (config_prof) {

From 9015e129bd7de389afa4196495451669700904d0 Mon Sep 17 00:00:00 2001
From: Alex Lapenkov <lapenkov@fb.com>
Date: Mon, 13 Dec 2021 15:07:23 -0800
Subject: [PATCH 2148/2608] Update visual studio projects

Add relevant source files to the projects.
---
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |  5 ++--
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  | 29 ++++++++++++++++---
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |  5 ++--
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  | 29 ++++++++++++++++---
 4 files changed, 56 insertions(+), 12 deletions(-)

diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 75d66800..ec028a1a 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -59,7 +59,6 @@
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
     <ClCompile Include="..\..\..\..\src\fxp.c" />
-    <ClCompile Include="..\..\..\..\src\guard.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
@@ -86,6 +85,8 @@
     <ClCompile Include="..\..\..\..\src\psset.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\safety_check.c" />
+    <ClCompile Include="..\..\..\..\src\san.c" />
+    <ClCompile Include="..\..\..\..\src\san_bump.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
     <ClCompile Include="..\..\..\..\src\sec.c" />
     <ClCompile Include="..\..\..\..\src\stats.c" />
@@ -376,4 +377,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index c5bb4cfe..1b43e9f2 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -61,9 +61,6 @@
     <ClCompile Include="..\..\..\..\src\fxp.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\guard.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hook.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -172,5 +169,29 @@
     <ClCompile Include="..\..\..\..\src\witness.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\bin_info.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ecache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\edata.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\edata_cache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ehooks.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\eset.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\san.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\san_bump.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
-</Project>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index d25768e1..a8004dbd 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -59,7 +59,6 @@
     <ClCompile Include="..\..\..\..\src\extent_dss.c" />
     <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
     <ClCompile Include="..\..\..\..\src\fxp.c" />
-    <ClCompile Include="..\..\..\..\src\guard.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
@@ -86,6 +85,8 @@
     <ClCompile Include="..\..\..\..\src\psset.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\safety_check.c" />
+    <ClCompile Include="..\..\..\..\src\san.c" />
+    <ClCompile Include="..\..\..\..\src\san_bump.c" />
     <ClCompile Include="..\..\..\..\src\sc.c" />
     <ClCompile Include="..\..\..\..\src\sec.c" />
     <ClCompile Include="..\..\..\..\src\stats.c" />
@@ -375,4 +376,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index c5bb4cfe..1b43e9f2 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -61,9 +61,6 @@
     <ClCompile Include="..\..\..\..\src\fxp.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\guard.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hook.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -172,5 +169,29 @@
     <ClCompile Include="..\..\..\..\src\witness.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\bin_info.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ecache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\edata.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\edata_cache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ehooks.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\eset.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\san.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\san_bump.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
-</Project>
+</Project>
\ No newline at end of file

From bb5052ce90c6ad4b07c665d9ac96952de2f2b443 Mon Sep 17 00:00:00 2001
From: mweisgut <marcel.weisgut@hpi.de>
Date: Fri, 17 Dec 2021 04:33:30 -0700
Subject: [PATCH 2149/2608] Fix base_ehooks_get_for_metadata

---
 include/jemalloc/internal/base.h |  5 +++--
 src/base.c                       | 15 ++++++++-------
 test/unit/base.c                 | 31 ++++++++++++++++++++++++++++++-
 3 files changed, 41 insertions(+), 10 deletions(-)

diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h
index 67e19409..9b2c9fb1 100644
--- a/include/jemalloc/internal/base.h
+++ b/include/jemalloc/internal/base.h
@@ -47,9 +47,9 @@ struct base_s {
 	ehooks_t ehooks;
 
 	/*
-	 * Use user hooks for metadata when true.
+	 * User-configurable extent hook functions for metadata allocations.
 	 */
-	bool metadata_use_hooks;
+	ehooks_t ehooks_base;
 
 	/* Protects base_alloc() and base_stats_get() operations. */
 	malloc_mutex_t mtx;
@@ -95,6 +95,7 @@ base_t *base_new(tsdn_t *tsdn, unsigned ind,
     const extent_hooks_t *extent_hooks, bool metadata_use_hooks);
 void base_delete(tsdn_t *tsdn, base_t *base);
 ehooks_t *base_ehooks_get(base_t *base);
+ehooks_t *base_ehooks_get_for_metadata(base_t *base);
 extent_hooks_t *base_extent_hooks_set(base_t *base,
     extent_hooks_t *extent_hooks);
 void *base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment);
diff --git a/src/base.c b/src/base.c
index 38f6fa4b..7f4d6756 100644
--- a/src/base.c
+++ b/src/base.c
@@ -295,12 +295,6 @@ base_block_alloc(tsdn_t *tsdn, base_t *base, ehooks_t *ehooks, unsigned ind,
 	return block;
 }
 
-static ehooks_t *
-base_ehooks_get_for_metadata(base_t *base) {
-	return base->metadata_use_hooks ? &base->ehooks :
-	    (ehooks_t *)&ehooks_default_extent_hooks;
-}
-
 /*
  * Allocate an extent that is at least as large as specified size, with
  * specified alignment.
@@ -375,6 +369,9 @@ base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks,
 	base_t *base = (base_t *)base_extent_bump_alloc_helper(&block->edata,
 	    &gap_size, base_size, base_alignment);
 	ehooks_init(&base->ehooks, (extent_hooks_t *)extent_hooks, ind);
+	ehooks_init(&base->ehooks_base, metadata_use_hooks ?
+	    (extent_hooks_t *)extent_hooks :
+	    (extent_hooks_t *)&ehooks_default_extent_hooks, ind);
 	if (malloc_mutex_init(&base->mtx, "base", WITNESS_RANK_BASE,
 	    malloc_mutex_rank_exclusive)) {
 		base_unmap(tsdn, &fake_ehooks, ind, block, block->size);
@@ -384,7 +381,6 @@ base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks,
 	base->extent_sn_next = extent_sn_next;
 	base->blocks = block;
 	base->auto_thp_switched = false;
-	base->metadata_use_hooks = metadata_use_hooks;
 	for (szind_t i = 0; i < SC_NSIZES; i++) {
 		edata_heap_new(&base->avail[i]);
 	}
@@ -422,6 +418,11 @@ base_ehooks_get(base_t *base) {
 	return &base->ehooks;
 }
 
+ehooks_t *
+base_ehooks_get_for_metadata(base_t *base) {
+	return &base->ehooks_base;
+}
+
 extent_hooks_t *
 base_extent_hooks_set(base_t *base, extent_hooks_t *extent_hooks) {
 	extent_hooks_t *old_extent_hooks =
diff --git a/test/unit/base.c b/test/unit/base.c
index 07a43df7..15e04a8c 100644
--- a/test/unit/base.c
+++ b/test/unit/base.c
@@ -227,10 +227,39 @@ TEST_BEGIN(test_base_hooks_not_null) {
 }
 TEST_END
 
+TEST_BEGIN(test_base_ehooks_get_for_metadata_default_hook) {
+	extent_hooks_prep();
+	memcpy(&hooks, &hooks_not_null, sizeof(extent_hooks_t));
+	base_t *base;
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	base = base_new(tsdn, 0, &hooks, /* metadata_use_hooks */ false);
+	ehooks_t *ehooks = base_ehooks_get_for_metadata(base);
+	expect_true(ehooks_are_default(ehooks),
+		"Expected default extent hook functions pointer");
+	base_delete(tsdn, base);
+}
+TEST_END
+
+
+TEST_BEGIN(test_base_ehooks_get_for_metadata_custom_hook) {
+	extent_hooks_prep();
+	memcpy(&hooks, &hooks_not_null, sizeof(extent_hooks_t));
+	base_t *base;
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	base = base_new(tsdn, 0, &hooks, /* metadata_use_hooks */ true);
+	ehooks_t *ehooks = base_ehooks_get_for_metadata(base);
+	expect_ptr_eq(&hooks, ehooks_get_extent_hooks_ptr(ehooks),
+		"Expected user-specified extend hook functions pointer");
+	base_delete(tsdn, base);
+}
+TEST_END
+
 int
 main(void) {
 	return test(
 	    test_base_hooks_default,
 	    test_base_hooks_null,
-	    test_base_hooks_not_null);
+	    test_base_hooks_not_null,
+            test_base_ehooks_get_for_metadata_default_hook,
+            test_base_ehooks_get_for_metadata_custom_hook);
 }

From cafe9a315879b357ac3c6d00f3b7f9ad52c33087 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Fri, 17 Dec 2021 21:00:21 +0300
Subject: [PATCH 2150/2608] Disable percpu arena in case of non deterministic
 CPU count

Determinitic number of CPUs is important for percpu arena to work
correctly, since it uses cpu index - sched_getcpu(), and if it will
greater then number of CPUs bad thing will happen, or assertion will be
failed in debug build:

    <jemalloc>: ../contrib/jemalloc/src/jemalloc.c:321: Failed assertion: "ind <= narenas_total_get()"
    Aborted (core dumped)

Number of CPUs can be obtained from the following places:
- sched_getaffinity()
- sysconf(_SC_NPROCESSORS_ONLN)
- sysconf(_SC_NPROCESSORS_CONF)

For the sched_getaffinity() you may simply use taskset(1) to run program
on a different cpu, and in case it will be not first, percpu will work
incorrectly, i.e.:

    $ taskset --cpu-list $(( $(getconf _NPROCESSORS_ONLN)-1 )) <your_program>

_SC_NPROCESSORS_ONLN uses /sys/devices/system/cpu/online, LXD/LXC
virtualize /sys/devices/system/cpu/online file [1], and so when you run
container with limited limits.cpus it will bind randomly selected CPU to
it

  [1]: https://github.com/lxc/lxcfs/issues/301

_SC_NPROCESSORS_CONF uses /sys/devices/system/cpu/cpu*, and AFAIK nobody
playing with dentries there.

So if all three of these are equal, percpu arenas should work correctly.

And a small note regardless _SC_NPROCESSORS_ONLN/_SC_NPROCESSORS_CONF,
musl uses sched_getaffinity() for both. So this will also increase the
entropy.

Also note, that you can check is percpu arena really applied using
abort_conf:true.

Refs: https://github.com/jemalloc/jemalloc/pull/1939
Refs: https://github.com/ClickHouse/ClickHouse/issues/32806

v2: move malloc_cpu_count_is_deterministic() into
    malloc_init_hard_recursible() since _SC_NPROCESSORS_CONF does
    allocations for readdir()
v3:
- mark cpu_count_is_deterministic static
- check only if percpu arena is enabled
- check narenas
---
 src/jemalloc.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 55 insertions(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index e707f9f9..38f70367 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -148,6 +148,8 @@ unsigned	opt_narenas = 0;
 fxp_t		opt_narenas_ratio = FXP_INIT_INT(4);
 
 unsigned	ncpus;
+/* ncpus is determinstinc, see malloc_cpu_count_is_deterministic() */
+static int	cpu_count_is_deterministic = -1;
 
 /* Protects arenas initialization. */
 malloc_mutex_t arenas_lock;
@@ -741,6 +743,42 @@ malloc_ncpus(void) {
 	return ((result == -1) ? 1 : (unsigned)result);
 }
 
+/*
+ * Ensure that number of CPUs is determistinc, i.e. it is the same based on:
+ * - sched_getaffinity()
+ * - _SC_NPROCESSORS_ONLN
+ * - _SC_NPROCESSORS_CONF
+ * Since otherwise tricky things is possible with percpu arenas in use.
+ */
+static bool
+malloc_cpu_count_is_deterministic()
+{
+#ifdef _WIN32
+	return true;
+#else
+	long cpu_onln = sysconf(_SC_NPROCESSORS_ONLN);
+	long cpu_conf = sysconf(_SC_NPROCESSORS_CONF);
+	if (cpu_onln != cpu_conf)
+		return false;
+#  if defined(CPU_COUNT)
+#    if defined(__FreeBSD__)
+	cpuset_t set;
+#    else
+	cpu_set_t set;
+#    endif /* __FreeBSD__ */
+#    if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
+	sched_getaffinity(0, sizeof(set), &set);
+#    else /* !JEMALLOC_HAVE_SCHED_SETAFFINITY */
+	pthread_getaffinity_np(pthread_self(), sizeof(set), &set);
+#    endif /* JEMALLOC_HAVE_SCHED_SETAFFINITY */
+	long cpu_affinity = CPU_COUNT(&set);
+	if (cpu_affinity != cpu_conf)
+		return false;
+#  endif /* CPU_COUNT */
+	return true;
+#endif
+}
+
 static void
 init_opt_stats_opts(const char *v, size_t vlen, char *dest) {
 	size_t opts_len = strlen(dest);
@@ -1833,6 +1871,7 @@ malloc_init_hard_recursible(void) {
 	malloc_init_state = malloc_init_recursible;
 
 	ncpus = malloc_ncpus();
+	cpu_count_is_deterministic = malloc_cpu_count_is_deterministic();
 
 #if (defined(JEMALLOC_HAVE_PTHREAD_ATFORK) && !defined(JEMALLOC_MUTEX_INIT_CB) \
     && !defined(JEMALLOC_ZONE) && !defined(_WIN32) && \
@@ -1892,7 +1931,22 @@ malloc_init_narenas(void) {
 	assert(ncpus > 0);
 
 	if (opt_percpu_arena != percpu_arena_disabled) {
-		if (!have_percpu_arena || malloc_getcpu() < 0) {
+		if (!cpu_count_is_deterministic) {
+			if (opt_narenas) {
+				malloc_write("<jemalloc>: Number of CPUs is not deterministic, "
+					"but narenas is set. Hope you not what you are doing and "
+					"you have set narenas to largest possible CPU ID.\n");
+				if (opt_abort) {
+					abort();
+				}
+			} else {
+				opt_percpu_arena = percpu_arena_disabled;
+				if (opt_abort_conf) {
+					malloc_write("<jemalloc>: Number of CPUs is not deterministic\n");
+					malloc_abort_invalid_conf();
+				}
+			}
+		} else if (!have_percpu_arena || malloc_getcpu() < 0) {
 			opt_percpu_arena = percpu_arena_disabled;
 			malloc_printf("<jemalloc>: perCPU arena getcpu() not "
 			    "available. Setting narenas to %u.\n", opt_narenas ?

From 310af725b0037870f70bf6b94426249f69ca4441 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 20 Dec 2021 14:39:24 -0800
Subject: [PATCH 2151/2608] Add nstime_ns_since which obtains the duration
 since the input time.

---
 include/jemalloc/internal/nstime.h |  1 +
 src/nstime.c                       | 13 +++++++++++++
 test/unit/nstime.c                 | 28 ++++++++++++++++++++++++++++
 3 files changed, 42 insertions(+)

diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h
index e8315db1..258b16e3 100644
--- a/include/jemalloc/internal/nstime.h
+++ b/include/jemalloc/internal/nstime.h
@@ -35,6 +35,7 @@ void nstime_isubtract(nstime_t *time, uint64_t subtrahend);
 void nstime_imultiply(nstime_t *time, uint64_t multiplier);
 void nstime_idivide(nstime_t *time, uint64_t divisor);
 uint64_t nstime_divide(const nstime_t *time, const nstime_t *divisor);
+uint64_t nstime_ns_since(const nstime_t *past);
 
 typedef bool (nstime_monotonic_t)(void);
 extern nstime_monotonic_t *JET_MUTABLE nstime_monotonic;
diff --git a/src/nstime.c b/src/nstime.c
index 44419d2c..a1a53777 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -158,6 +158,19 @@ nstime_divide(const nstime_t *time, const nstime_t *divisor) {
 	return time->ns / divisor->ns;
 }
 
+/* Returns time since *past, w/o updating *past. */
+uint64_t
+nstime_ns_since(const nstime_t *past) {
+	nstime_assert_initialized(past);
+
+	nstime_t now;
+	nstime_copy(&now, past);
+	nstime_update(&now);
+
+	assert(nstime_compare(&now, past) >= 0);
+	return now.ns - past->ns;
+}
+
 #ifdef _WIN32
 #  define NSTIME_MONOTONIC true
 static void
diff --git a/test/unit/nstime.c b/test/unit/nstime.c
index 083002bd..56238ab3 100644
--- a/test/unit/nstime.c
+++ b/test/unit/nstime.c
@@ -201,6 +201,33 @@ TEST_BEGIN(test_nstime_divide) {
 }
 TEST_END
 
+void
+test_nstime_since_once(nstime_t *t) {
+	nstime_t old_t;
+	nstime_copy(&old_t, t);
+
+	uint64_t ns_since = nstime_ns_since(t);
+	nstime_update(t);
+
+	nstime_t new_t;
+	nstime_copy(&new_t, t);
+	nstime_subtract(&new_t, &old_t);
+
+	expect_u64_ge(nstime_ns(&new_t), ns_since,
+	    "Incorrect time since result");
+}
+
+TEST_BEGIN(test_nstime_ns_since) {
+	nstime_t t;
+
+	nstime_init_update(&t);
+	for (uint64_t i = 0; i < 10000; i++) {
+		/* Keeps updating t and verifies ns_since is valid. */
+		test_nstime_since_once(&t);
+	}
+}
+TEST_END
+
 TEST_BEGIN(test_nstime_monotonic) {
 	nstime_monotonic();
 }
@@ -220,5 +247,6 @@ main(void) {
 	    test_nstime_imultiply,
 	    test_nstime_idivide,
 	    test_nstime_divide,
+	    test_nstime_ns_since,
 	    test_nstime_monotonic);
 }

From 837b37c4ce44a1c236e1657a6de80b064af98610 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 20 Dec 2021 15:04:12 -0800
Subject: [PATCH 2152/2608] Fix the time-since computation in HPA.

nstime module guarantees monotonic clock update within a single nstime_t.  This
means, if two separate nstime_t variables are read and updated separately,
nstime_subtract between them may result in underflow.  Fixed by switching to the
time since utility provided by nstime.
---
 include/jemalloc/internal/hpa_hooks.h |  1 +
 src/hpa.c                             | 22 +++++++---------------
 src/hpa_hooks.c                       |  7 +++++++
 test/unit/hpa.c                       |  7 +++++++
 4 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/hpa_hooks.h b/include/jemalloc/internal/hpa_hooks.h
index 12e6b972..4ea221cb 100644
--- a/include/jemalloc/internal/hpa_hooks.h
+++ b/include/jemalloc/internal/hpa_hooks.h
@@ -9,6 +9,7 @@ struct hpa_hooks_s {
 	void (*hugify)(void *ptr, size_t size);
 	void (*dehugify)(void *ptr, size_t size);
 	void (*curtime)(nstime_t *r_time, bool first_reading);
+	uint64_t (*ms_since)(nstime_t *r_time);
 };
 
 extern hpa_hooks_t hpa_hooks_default;
diff --git a/src/hpa.c b/src/hpa.c
index 0a7ec19e..7e2aeba0 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -479,10 +479,7 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 
 	/* Make sure that it's been hugifiable for long enough. */
 	nstime_t time_hugify_allowed = hpdata_time_hugify_allowed(to_hugify);
-	nstime_t nstime;
-	shard->central->hooks.curtime(&nstime, /* first_reading */ true);
-	nstime_subtract(&nstime, &time_hugify_allowed);
-	uint64_t millis = nstime_msec(&nstime);
+	uint64_t millis = shard->central->hooks.ms_since(&time_hugify_allowed);
 	if (millis < shard->opts.hugify_delay_ms) {
 		return false;
 	}
@@ -897,17 +894,15 @@ hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
 	if (to_hugify != NULL) {
 		nstime_t time_hugify_allowed =
 		    hpdata_time_hugify_allowed(to_hugify);
-		nstime_t nstime;
-		shard->central->hooks.curtime(&nstime,
-		    /* first_reading */ true);
-		nstime_subtract(&nstime, &time_hugify_allowed);
-		uint64_t since_hugify_allowed_ms = nstime_msec(&nstime);
+		uint64_t since_hugify_allowed_ms =
+		    shard->central->hooks.ms_since(&time_hugify_allowed);
 		/*
 		 * If not enough time has passed since hugification was allowed,
 		 * sleep for the rest.
 		 */
 		if (since_hugify_allowed_ms < shard->opts.hugify_delay_ms) {
-			time_ns = shard->opts.hugify_delay_ms - since_hugify_allowed_ms;
+			time_ns = shard->opts.hugify_delay_ms -
+			    since_hugify_allowed_ms;
 			time_ns *= 1000 * 1000;
 		} else {
 			malloc_mutex_unlock(tsdn, &shard->mtx);
@@ -924,11 +919,8 @@ hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
 			malloc_mutex_unlock(tsdn, &shard->mtx);
 			return BACKGROUND_THREAD_DEFERRED_MIN;
 		}
-		nstime_t nstime;
-		shard->central->hooks.curtime(&nstime,
-		    /* first_reading */ true);
-		nstime_subtract(&nstime, &shard->last_purge);
-		uint64_t since_last_purge_ms = nstime_msec(&nstime);
+		uint64_t since_last_purge_ms = shard->central->hooks.ms_since(
+		    &shard->last_purge);
 
 		if (since_last_purge_ms < shard->opts.min_purge_interval_ms) {
 			uint64_t until_purge_ns;
diff --git a/src/hpa_hooks.c b/src/hpa_hooks.c
index 116592f2..ade581e8 100644
--- a/src/hpa_hooks.c
+++ b/src/hpa_hooks.c
@@ -9,6 +9,7 @@ static void hpa_hooks_purge(void *ptr, size_t size);
 static void hpa_hooks_hugify(void *ptr, size_t size);
 static void hpa_hooks_dehugify(void *ptr, size_t size);
 static void hpa_hooks_curtime(nstime_t *r_nstime, bool first_reading);
+static uint64_t hpa_hooks_ms_since(nstime_t *past_nstime);
 
 hpa_hooks_t hpa_hooks_default = {
 	&hpa_hooks_map,
@@ -17,6 +18,7 @@ hpa_hooks_t hpa_hooks_default = {
 	&hpa_hooks_hugify,
 	&hpa_hooks_dehugify,
 	&hpa_hooks_curtime,
+	&hpa_hooks_ms_since
 };
 
 static void *
@@ -54,3 +56,8 @@ hpa_hooks_curtime(nstime_t *r_nstime, bool first_reading) {
 	}
 	nstime_update(r_nstime);
 }
+
+static uint64_t
+hpa_hooks_ms_since(nstime_t *past_nstime) {
+	return nstime_ns_since(past_nstime) / 1000 / 1000;
+}
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 25ee1950..dfd57f39 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -1,6 +1,7 @@
 #include "test/jemalloc_test.h"
 
 #include "jemalloc/internal/hpa.h"
+#include "jemalloc/internal/nstime.h"
 
 #define SHARD_IND 111
 
@@ -353,6 +354,11 @@ defer_test_curtime(nstime_t *r_time, bool first_reading) {
 	*r_time = defer_curtime;
 }
 
+static uint64_t
+defer_test_ms_since(nstime_t *past_time) {
+	return (nstime_ns(&defer_curtime) - nstime_ns(past_time)) / 1000 / 1000;
+}
+
 TEST_BEGIN(test_defer_time) {
 	test_skip_if(!hpa_supported());
 
@@ -363,6 +369,7 @@ TEST_BEGIN(test_defer_time) {
 	hooks.hugify = &defer_test_hugify;
 	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
 
 	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
 	opts.deferral_allowed = true;

From 60b9637cc0c5e88518d03e23de8538523757f060 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 21 Dec 2021 15:43:30 -0800
Subject: [PATCH 2153/2608] Only invoke malloc_cpu_count_is_deterministic()
 when necessary.

Also refactor the handling of the non-deterministic case.  Notably allow the
case with narenas set to proceed w/o warnings, to not affect existing valid use
cases.
---
 src/jemalloc.c | 51 ++++++++++++++++++++++++++++----------------------
 1 file changed, 29 insertions(+), 22 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 38f70367..18936575 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -148,8 +148,6 @@ unsigned	opt_narenas = 0;
 fxp_t		opt_narenas_ratio = FXP_INIT_INT(4);
 
 unsigned	ncpus;
-/* ncpus is determinstinc, see malloc_cpu_count_is_deterministic() */
-static int	cpu_count_is_deterministic = -1;
 
 /* Protects arenas initialization. */
 malloc_mutex_t arenas_lock;
@@ -758,10 +756,11 @@ malloc_cpu_count_is_deterministic()
 #else
 	long cpu_onln = sysconf(_SC_NPROCESSORS_ONLN);
 	long cpu_conf = sysconf(_SC_NPROCESSORS_CONF);
-	if (cpu_onln != cpu_conf)
+	if (cpu_onln != cpu_conf) {
 		return false;
+	}
 #  if defined(CPU_COUNT)
-#    if defined(__FreeBSD__)
+#    if defined(__FreeBSD__) || defined(__DragonFly__)
 	cpuset_t set;
 #    else
 	cpu_set_t set;
@@ -772,8 +771,9 @@ malloc_cpu_count_is_deterministic()
 	pthread_getaffinity_np(pthread_self(), sizeof(set), &set);
 #    endif /* JEMALLOC_HAVE_SCHED_SETAFFINITY */
 	long cpu_affinity = CPU_COUNT(&set);
-	if (cpu_affinity != cpu_conf)
+	if (cpu_affinity != cpu_conf) {
 		return false;
+	}
 #  endif /* CPU_COUNT */
 	return true;
 #endif
@@ -1871,7 +1871,29 @@ malloc_init_hard_recursible(void) {
 	malloc_init_state = malloc_init_recursible;
 
 	ncpus = malloc_ncpus();
-	cpu_count_is_deterministic = malloc_cpu_count_is_deterministic();
+	if (opt_percpu_arena != percpu_arena_disabled) {
+		bool cpu_count_is_deterministic =
+		    malloc_cpu_count_is_deterministic();
+		if (!cpu_count_is_deterministic) {
+			/*
+			 * If # of CPU is not deterministic, and narenas not
+			 * specified, disables per cpu arena since it may not
+			 * detect CPU IDs properly.
+			 */
+			if (opt_narenas == 0) {
+				opt_percpu_arena = percpu_arena_disabled;
+				malloc_write("<jemalloc>: Number of CPUs "
+				    "detected is not deterministic. Per-CPU "
+				    "arena disabled.\n");
+				if (opt_abort_conf) {
+					malloc_abort_invalid_conf();
+				}
+				if (opt_abort) {
+					abort();
+				}
+			}
+		}
+	}
 
 #if (defined(JEMALLOC_HAVE_PTHREAD_ATFORK) && !defined(JEMALLOC_MUTEX_INIT_CB) \
     && !defined(JEMALLOC_ZONE) && !defined(_WIN32) && \
@@ -1931,22 +1953,7 @@ malloc_init_narenas(void) {
 	assert(ncpus > 0);
 
 	if (opt_percpu_arena != percpu_arena_disabled) {
-		if (!cpu_count_is_deterministic) {
-			if (opt_narenas) {
-				malloc_write("<jemalloc>: Number of CPUs is not deterministic, "
-					"but narenas is set. Hope you not what you are doing and "
-					"you have set narenas to largest possible CPU ID.\n");
-				if (opt_abort) {
-					abort();
-				}
-			} else {
-				opt_percpu_arena = percpu_arena_disabled;
-				if (opt_abort_conf) {
-					malloc_write("<jemalloc>: Number of CPUs is not deterministic\n");
-					malloc_abort_invalid_conf();
-				}
-			}
-		} else if (!have_percpu_arena || malloc_getcpu() < 0) {
+		if (!have_percpu_arena || malloc_getcpu() < 0) {
 			opt_percpu_arena = percpu_arena_disabled;
 			malloc_printf("<jemalloc>: perCPU arena getcpu() not "
 			    "available. Setting narenas to %u.\n", opt_narenas ?

From e491df1d2f686a1ba47036301693285a72d98ca2 Mon Sep 17 00:00:00 2001
From: Joshua Watt <JPEWhacker@gmail.com>
Date: Wed, 15 Dec 2021 10:49:01 -0600
Subject: [PATCH 2154/2608] Fix warnings when using autoheader.

---
 configure.ac | 271 ++++++++++++++++++++++++++-------------------------
 1 file changed, 136 insertions(+), 135 deletions(-)

diff --git a/configure.ac b/configure.ac
index 22900ec0..e18c0cc2 100644
--- a/configure.ac
+++ b/configure.ac
@@ -237,11 +237,11 @@ fi
 if test "x$GCC" = "xyes" ; then
   JE_CFLAGS_ADD([-std=gnu11])
   if test "x$je_cv_cflags_added" = "x-std=gnu11" ; then
-    AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT])
+    AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT], [ ], [ ])
   else
     JE_CFLAGS_ADD([-std=gnu99])
     if test "x$je_cv_cflags_added" = "x-std=gnu99" ; then
-      AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT])
+      AC_DEFINE_UNQUOTED([JEMALLOC_HAS_RESTRICT], [ ], [ ])
     fi
   fi
   JE_CFLAGS_ADD([-Werror=unknown-warning-option])
@@ -326,7 +326,7 @@ if test "x$enable_cxx" = "x1" ; then
   fi
 fi
 if test "x$enable_cxx" = "x1"; then
-  AC_DEFINE([JEMALLOC_ENABLE_CXX], [ ])
+  AC_DEFINE([JEMALLOC_ENABLE_CXX], [ ], [ ])
 fi
 AC_SUBST([enable_cxx])
 AC_SUBST([CONFIGURE_CXXFLAGS])
@@ -335,7 +335,7 @@ AC_SUBST([EXTRA_CXXFLAGS])
 
 AC_C_BIGENDIAN([ac_cv_big_endian=1], [ac_cv_big_endian=0])
 if test "x${ac_cv_big_endian}" = "x1" ; then
-  AC_DEFINE_UNQUOTED([JEMALLOC_BIG_ENDIAN], [ ])
+  AC_DEFINE_UNQUOTED([JEMALLOC_BIG_ENDIAN], [ ], [ ])
 fi
 
 if test "x${je_cv_msvc}" = "xyes" -a "x${ac_cv_header_inttypes_h}" = "xno"; then
@@ -355,7 +355,7 @@ else
     AC_MSG_ERROR([Unsupported pointer size: ${ac_cv_sizeof_void_p}])
   fi
 fi
-AC_DEFINE_UNQUOTED([LG_SIZEOF_PTR], [$LG_SIZEOF_PTR])
+AC_DEFINE_UNQUOTED([LG_SIZEOF_PTR], [$LG_SIZEOF_PTR], [ ])
 
 AC_CHECK_SIZEOF([int])
 if test "x${ac_cv_sizeof_int}" = "x8" ; then
@@ -365,7 +365,7 @@ elif test "x${ac_cv_sizeof_int}" = "x4" ; then
 else
   AC_MSG_ERROR([Unsupported int size: ${ac_cv_sizeof_int}])
 fi
-AC_DEFINE_UNQUOTED([LG_SIZEOF_INT], [$LG_SIZEOF_INT])
+AC_DEFINE_UNQUOTED([LG_SIZEOF_INT], [$LG_SIZEOF_INT], [ ])
 
 AC_CHECK_SIZEOF([long])
 if test "x${ac_cv_sizeof_long}" = "x8" ; then
@@ -375,7 +375,7 @@ elif test "x${ac_cv_sizeof_long}" = "x4" ; then
 else
   AC_MSG_ERROR([Unsupported long size: ${ac_cv_sizeof_long}])
 fi
-AC_DEFINE_UNQUOTED([LG_SIZEOF_LONG], [$LG_SIZEOF_LONG])
+AC_DEFINE_UNQUOTED([LG_SIZEOF_LONG], [$LG_SIZEOF_LONG], [ ])
 
 AC_CHECK_SIZEOF([long long])
 if test "x${ac_cv_sizeof_long_long}" = "x8" ; then
@@ -385,7 +385,7 @@ elif test "x${ac_cv_sizeof_long_long}" = "x4" ; then
 else
   AC_MSG_ERROR([Unsupported long long size: ${ac_cv_sizeof_long_long}])
 fi
-AC_DEFINE_UNQUOTED([LG_SIZEOF_LONG_LONG], [$LG_SIZEOF_LONG_LONG])
+AC_DEFINE_UNQUOTED([LG_SIZEOF_LONG_LONG], [$LG_SIZEOF_LONG_LONG], [ ])
 
 AC_CHECK_SIZEOF([intmax_t])
 if test "x${ac_cv_sizeof_intmax_t}" = "x16" ; then
@@ -397,7 +397,7 @@ elif test "x${ac_cv_sizeof_intmax_t}" = "x4" ; then
 else
   AC_MSG_ERROR([Unsupported intmax_t size: ${ac_cv_sizeof_intmax_t}])
 fi
-AC_DEFINE_UNQUOTED([LG_SIZEOF_INTMAX_T], [$LG_SIZEOF_INTMAX_T])
+AC_DEFINE_UNQUOTED([LG_SIZEOF_INTMAX_T], [$LG_SIZEOF_INTMAX_T], [ ])
 
 AC_CANONICAL_HOST
 dnl CPU-specific settings.
@@ -437,8 +437,8 @@ case "${host_cpu}" in
 	HAVE_CPU_SPINWAIT=0
 	;;
 esac
-AC_DEFINE_UNQUOTED([HAVE_CPU_SPINWAIT], [$HAVE_CPU_SPINWAIT])
-AC_DEFINE_UNQUOTED([CPU_SPINWAIT], [$CPU_SPINWAIT])
+AC_DEFINE_UNQUOTED([HAVE_CPU_SPINWAIT], [$HAVE_CPU_SPINWAIT], [ ])
+AC_DEFINE_UNQUOTED([CPU_SPINWAIT], [$CPU_SPINWAIT], [ ])
 
 AC_ARG_WITH([lg_vaddr],
   [AS_HELP_STRING([--with-lg-vaddr=<lg-vaddr>], [Number of significant virtual address bits])],
@@ -503,7 +503,7 @@ typedef unsigned __int32 uint32_t;
         LG_VADDR="${je_cv_lg_vaddr}"
       fi
       if test "x${LG_VADDR}" != "xerror" ; then
-        AC_DEFINE_UNQUOTED([LG_VADDR], [$LG_VADDR])
+        AC_DEFINE_UNQUOTED([LG_VADDR], [$LG_VADDR], [ ])
       else
         AC_MSG_ERROR([cannot determine number of significant virtual address bits])
       fi
@@ -525,7 +525,7 @@ typedef unsigned __int32 uint32_t;
     fi
     ;;
 esac
-AC_DEFINE_UNQUOTED([LG_VADDR], [$LG_VADDR])
+AC_DEFINE_UNQUOTED([LG_VADDR], [$LG_VADDR], [ ])
 
 LD_PRELOAD_VAR="LD_PRELOAD"
 so="so"
@@ -654,7 +654,7 @@ case "${host}" in
   *-*-freebsd*)
 	JE_APPEND_VS(CPPFLAGS, -D_BSD_SOURCE)
 	abi="elf"
-	AC_DEFINE([JEMALLOC_SYSCTL_VM_OVERCOMMIT], [ ])
+	AC_DEFINE([JEMALLOC_SYSCTL_VM_OVERCOMMIT], [ ], [ ])
 	force_lazy_lock="1"
 	;;
   *-*-dragonfly*)
@@ -672,11 +672,11 @@ case "${host}" in
 	JE_APPEND_VS(CPPFLAGS, -D_GNU_SOURCE)
 	abi="elf"
 	glibc="0"
-	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS], [ ])
-	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
-	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ])
-	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
-	AC_DEFINE([JEMALLOC_C11_ATOMICS])
+	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS], [ ], [ ])
+	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H], [ ], [ ])
+	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ], [ ])
+	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ], [ ])
+	AC_DEFINE([JEMALLOC_C11_ATOMICS], [ ], [ ])
 	force_tls="0"
 	if test "${LG_SIZEOF_PTR}" = "3"; then
 	  default_retain="1"
@@ -687,11 +687,11 @@ case "${host}" in
 	JE_APPEND_VS(CPPFLAGS, -D_GNU_SOURCE)
 	abi="elf"
 	glibc="1"
-	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS], [ ])
-	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
-	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ])
-	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
-	AC_DEFINE([JEMALLOC_USE_CXX_THROW], [ ])
+	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS], [ ], [ ])
+	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H], [ ], [ ])
+	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ], [ ])
+	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ], [ ])
+	AC_DEFINE([JEMALLOC_USE_CXX_THROW], [ ], [ ])
 	if test "${LG_SIZEOF_PTR}" = "3"; then
 	  default_retain="1"
 	fi
@@ -700,10 +700,10 @@ case "${host}" in
 	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
 	JE_APPEND_VS(CPPFLAGS, -D_GNU_SOURCE)
 	abi="elf"
-	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
-	AC_DEFINE([JEMALLOC_SYSCTL_VM_OVERCOMMIT], [ ])
-	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ])
-	AC_DEFINE([JEMALLOC_USE_CXX_THROW], [ ])
+	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H], [ ], [ ])
+	AC_DEFINE([JEMALLOC_SYSCTL_VM_OVERCOMMIT], [ ], [ ])
+	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ], [ ])
+	AC_DEFINE([JEMALLOC_USE_CXX_THROW], [ ], [ ])
 	;;
   *-*-netbsd*)
 	AC_MSG_CHECKING([ABI])
@@ -774,7 +774,7 @@ case "${host}" in
   *-*-nto-qnx)
 	abi="elf"
   force_tls="0"
-  AC_DEFINE([JEMALLOC_HAS_ALLOCA_H])
+  AC_DEFINE([JEMALLOC_HAS_ALLOCA_H], [ ], [ ])
 	;;
   *)
 	AC_MSG_RESULT([Unsupported operating system: ${host}])
@@ -797,7 +797,7 @@ AC_CHECK_HEADERS([malloc.h], [
                 AC_MSG_RESULT([no])
          ])
 ])
-AC_DEFINE_UNQUOTED([JEMALLOC_USABLE_SIZE_CONST], [$JEMALLOC_USABLE_SIZE_CONST])
+AC_DEFINE_UNQUOTED([JEMALLOC_USABLE_SIZE_CONST], [$JEMALLOC_USABLE_SIZE_CONST], [ ])
 AC_SUBST([abi])
 AC_SUBST([RPATH])
 AC_SUBST([LD_PRELOAD_VAR])
@@ -835,7 +835,7 @@ JE_COMPILABLE([__attribute__ syntax],
               [],
               [je_cv_attribute])
 if test "x${je_cv_attribute}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_ATTR], [ ])
+  AC_DEFINE([JEMALLOC_HAVE_ATTR], [ ], [ ])
   if test "x${GCC}" = "xyes" -a "x${abi}" = "xelf"; then
     JE_CFLAGS_ADD([-fvisibility=hidden])
     JE_CXXFLAGS_ADD([-fvisibility=hidden])
@@ -863,7 +863,7 @@ JE_COMPILABLE([alloc_size attribute], [#include <stdlib.h>],
               [je_cv_alloc_size])
 JE_CFLAGS_RESTORE()
 if test "x${je_cv_alloc_size}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_ATTR_ALLOC_SIZE], [ ])
+  AC_DEFINE([JEMALLOC_HAVE_ATTR_ALLOC_SIZE], [ ], [ ])
 fi
 dnl Check for format(gnu_printf, ...) attribute support.
 JE_CFLAGS_SAVE()
@@ -874,7 +874,7 @@ JE_COMPILABLE([format(gnu_printf, ...) attribute], [#include <stdlib.h>],
               [je_cv_format_gnu_printf])
 JE_CFLAGS_RESTORE()
 if test "x${je_cv_format_gnu_printf}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF], [ ])
+  AC_DEFINE([JEMALLOC_HAVE_ATTR_FORMAT_GNU_PRINTF], [ ], [ ])
 fi
 dnl Check for format(printf, ...) attribute support.
 JE_CFLAGS_SAVE()
@@ -885,7 +885,7 @@ JE_COMPILABLE([format(printf, ...) attribute], [#include <stdlib.h>],
               [je_cv_format_printf])
 JE_CFLAGS_RESTORE()
 if test "x${je_cv_format_printf}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_ATTR_FORMAT_PRINTF], [ ])
+  AC_DEFINE([JEMALLOC_HAVE_ATTR_FORMAT_PRINTF], [ ], [ ])
 fi
 
 dnl Check for format_arg(...) attribute support.
@@ -897,7 +897,7 @@ JE_COMPILABLE([format(printf, ...) attribute], [#include <stdlib.h>],
               [je_cv_format_arg])
 JE_CFLAGS_RESTORE()
 if test "x${je_cv_format_arg}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_ATTR_FORMAT_ARG], [ ])
+  AC_DEFINE([JEMALLOC_HAVE_ATTR_FORMAT_ARG], [ ], [ ])
 fi
 
 dnl Check for fallthrough attribute support.
@@ -915,7 +915,7 @@ JE_COMPILABLE([fallthrough attribute],
               [je_cv_fallthrough])
 JE_CFLAGS_RESTORE()
 if test "x${je_cv_fallthrough}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_ATTR_FALLTHROUGH], [ ])
+  AC_DEFINE([JEMALLOC_HAVE_ATTR_FALLTHROUGH], [ ], [ ])
   JE_CFLAGS_ADD([-Wimplicit-fallthrough])
   JE_CXXFLAGS_ADD([-Wimplicit-fallthrough])
 fi
@@ -929,7 +929,7 @@ JE_COMPILABLE([cold attribute], [],
               [je_cv_cold])
 JE_CFLAGS_RESTORE()
 if test "x${je_cv_cold}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_ATTR_COLD], [ ])
+  AC_DEFINE([JEMALLOC_HAVE_ATTR_COLD], [ ], [ ])
 fi
 
 dnl Check for VM_MAKE_TAG for mmap support.
@@ -941,7 +941,7 @@ JE_COMPILABLE([vm_make_tag],
 	       munmap(p, 16);],
 	      [je_cv_vm_make_tag])
 if test "x${je_cv_vm_make_tag}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_VM_MAKE_TAG], [ ])
+  AC_DEFINE([JEMALLOC_HAVE_VM_MAKE_TAG], [ ], [ ])
 fi
 
 dnl Support optional additions to rpath.
@@ -1033,11 +1033,11 @@ else
 fi]
 )
 if test "x$JEMALLOC_PREFIX" = "x" ; then
-  AC_DEFINE([JEMALLOC_IS_MALLOC])
+  AC_DEFINE([JEMALLOC_IS_MALLOC], [ ], [ ])
 else
   JEMALLOC_CPREFIX=`echo ${JEMALLOC_PREFIX} | tr "a-z" "A-Z"`
-  AC_DEFINE_UNQUOTED([JEMALLOC_PREFIX], ["$JEMALLOC_PREFIX"])
-  AC_DEFINE_UNQUOTED([JEMALLOC_CPREFIX], ["$JEMALLOC_CPREFIX"])
+  AC_DEFINE_UNQUOTED([JEMALLOC_PREFIX], ["$JEMALLOC_PREFIX"], [ ])
+  AC_DEFINE_UNQUOTED([JEMALLOC_CPREFIX], ["$JEMALLOC_CPREFIX"], [ ])
 fi
 AC_SUBST([JEMALLOC_PREFIX])
 AC_SUBST([JEMALLOC_CPREFIX])
@@ -1045,45 +1045,45 @@ AC_SUBST([JEMALLOC_CPREFIX])
 AC_ARG_WITH([export],
   [AS_HELP_STRING([--without-export], [disable exporting jemalloc public APIs])],
   [if test "x$with_export" = "xno"; then
-  AC_DEFINE([JEMALLOC_EXPORT],[])
+  AC_DEFINE([JEMALLOC_EXPORT],[], [ ])
 fi]
 )
 
 public_syms="aligned_alloc calloc dallocx free mallctl mallctlbymib mallctlnametomib malloc malloc_conf malloc_conf_2_conf_harder malloc_message malloc_stats_print malloc_usable_size mallocx smallocx_${jemalloc_version_gid} nallocx posix_memalign rallocx realloc sallocx sdallocx xallocx"
 dnl Check for additional platform-specific public API functions.
 AC_CHECK_FUNC([memalign],
-	      [AC_DEFINE([JEMALLOC_OVERRIDE_MEMALIGN], [ ])
+	      [AC_DEFINE([JEMALLOC_OVERRIDE_MEMALIGN], [ ], [ ])
 	       public_syms="${public_syms} memalign"])
 AC_CHECK_FUNC([valloc],
-	      [AC_DEFINE([JEMALLOC_OVERRIDE_VALLOC], [ ])
+	      [AC_DEFINE([JEMALLOC_OVERRIDE_VALLOC], [ ], [ ])
 	       public_syms="${public_syms} valloc"])
 AC_CHECK_FUNC([malloc_size],
-	      [AC_DEFINE([JEMALLOC_HAVE_MALLOC_SIZE], [ ])
+	      [AC_DEFINE([JEMALLOC_HAVE_MALLOC_SIZE], [ ], [ ])
 	       public_syms="${public_syms} malloc_size"])
 
 dnl Check for allocator-related functions that should be wrapped.
 wrap_syms=
 if test "x${JEMALLOC_PREFIX}" = "x" ; then
   AC_CHECK_FUNC([__libc_calloc],
-		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_CALLOC], [ ])
+		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_CALLOC], [ ], [ ])
 		 wrap_syms="${wrap_syms} __libc_calloc"])
   AC_CHECK_FUNC([__libc_free],
-		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_FREE], [ ])
+		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_FREE], [ ], [ ])
 		 wrap_syms="${wrap_syms} __libc_free"])
   AC_CHECK_FUNC([__libc_malloc],
-		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_MALLOC], [ ])
+		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_MALLOC], [ ], [ ])
 		 wrap_syms="${wrap_syms} __libc_malloc"])
   AC_CHECK_FUNC([__libc_memalign],
-		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_MEMALIGN], [ ])
+		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_MEMALIGN], [ ], [ ])
 		 wrap_syms="${wrap_syms} __libc_memalign"])
   AC_CHECK_FUNC([__libc_realloc],
-		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_REALLOC], [ ])
+		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_REALLOC], [ ], [ ])
 		 wrap_syms="${wrap_syms} __libc_realloc"])
   AC_CHECK_FUNC([__libc_valloc],
-		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_VALLOC], [ ])
+		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_VALLOC], [ ], [ ])
 		 wrap_syms="${wrap_syms} __libc_valloc"])
   AC_CHECK_FUNC([__posix_memalign],
-		[AC_DEFINE([JEMALLOC_OVERRIDE___POSIX_MEMALIGN], [ ])
+		[AC_DEFINE([JEMALLOC_OVERRIDE___POSIX_MEMALIGN], [ ], [ ])
 		 wrap_syms="${wrap_syms} __posix_memalign"])
 fi
 
@@ -1101,7 +1101,7 @@ AC_ARG_WITH([private_namespace],
   [JEMALLOC_PRIVATE_NAMESPACE="${with_private_namespace}je_"],
   [JEMALLOC_PRIVATE_NAMESPACE="je_"]
 )
-AC_DEFINE_UNQUOTED([JEMALLOC_PRIVATE_NAMESPACE], [$JEMALLOC_PRIVATE_NAMESPACE])
+AC_DEFINE_UNQUOTED([JEMALLOC_PRIVATE_NAMESPACE], [$JEMALLOC_PRIVATE_NAMESPACE], [ ])
 private_namespace="$JEMALLOC_PRIVATE_NAMESPACE"
 AC_SUBST([private_namespace])
 
@@ -1121,7 +1121,7 @@ AC_ARG_WITH([malloc_conf],
   [JEMALLOC_CONFIG_MALLOC_CONF=""]
 )
 config_malloc_conf="$JEMALLOC_CONFIG_MALLOC_CONF"
-AC_DEFINE_UNQUOTED([JEMALLOC_CONFIG_MALLOC_CONF], ["$config_malloc_conf"])
+AC_DEFINE_UNQUOTED([JEMALLOC_CONFIG_MALLOC_CONF], ["$config_malloc_conf"], [ ])
 
 dnl Substitute @je_@ in jemalloc_protos.h.in, primarily to make generation of
 dnl jemalloc_protos_jet.h easy.
@@ -1210,7 +1210,7 @@ fi
 [enable_debug="0"]
 )
 if test "x$enable_debug" = "x1" ; then
-  AC_DEFINE([JEMALLOC_DEBUG], [ ])
+  AC_DEFINE([JEMALLOC_DEBUG], [ ], [ ])
 fi
 AC_SUBST([enable_debug])
 
@@ -1242,7 +1242,7 @@ fi
 [enable_stats="1"]
 )
 if test "x$enable_stats" = "x1" ; then
-  AC_DEFINE([JEMALLOC_STATS], [ ])
+  AC_DEFINE([JEMALLOC_STATS], [ ], [ ])
 fi
 AC_SUBST([enable_stats])
 
@@ -1258,7 +1258,7 @@ fi
 [enable_experimental_smallocx="0"]
 )
 if test "x$enable_experimental_smallocx" = "x1" ; then
-  AC_DEFINE([JEMALLOC_EXPERIMENTAL_SMALLOCX_API])
+  AC_DEFINE([JEMALLOC_EXPERIMENTAL_SMALLOCX_API], [ ], [ ])
 fi
 AC_SUBST([enable_experimental_smallocx])
 
@@ -1315,7 +1315,7 @@ if test "x$backtrace_method" = "x" -a "x$enable_prof_libunwind" = "x1" ; then
   fi
   if test "x${enable_prof_libunwind}" = "x1" ; then
     backtrace_method="libunwind"
-    AC_DEFINE([JEMALLOC_PROF_LIBUNWIND], [ ])
+    AC_DEFINE([JEMALLOC_PROF_LIBUNWIND], [ ], [ ])
   fi
 fi
 
@@ -1338,7 +1338,7 @@ if test "x$backtrace_method" = "x" -a "x$enable_prof_libgcc" = "x1" \
   fi
   if test "x${enable_prof_libgcc}" = "x1" ; then
     backtrace_method="libgcc"
-    AC_DEFINE([JEMALLOC_PROF_LIBGCC], [ ])
+    AC_DEFINE([JEMALLOC_PROF_LIBGCC], [ ], [ ])
   fi
 else
   enable_prof_libgcc="0"
@@ -1359,7 +1359,7 @@ if test "x$backtrace_method" = "x" -a "x$enable_prof_gcc" = "x1" \
      -a "x$GCC" = "xyes" ; then
   JE_CFLAGS_ADD([-fno-omit-frame-pointer])
   backtrace_method="gcc intrinsics"
-  AC_DEFINE([JEMALLOC_PROF_GCC], [ ])
+  AC_DEFINE([JEMALLOC_PROF_GCC], [ ], [ ])
 else
   enable_prof_gcc="0"
 fi
@@ -1374,19 +1374,19 @@ if test "x$enable_prof" = "x1" ; then
   dnl Heap profiling uses the log(3) function.
   JE_APPEND_VS(LIBS, $LM)
 
-  AC_DEFINE([JEMALLOC_PROF], [ ])
+  AC_DEFINE([JEMALLOC_PROF], [ ], [ ])
 fi
 AC_SUBST([enable_prof])
 
 dnl Indicate whether adjacent virtual memory mappings automatically coalesce
 dnl (and fragment on demand).
 if test "x${maps_coalesce}" = "x1" ; then
-  AC_DEFINE([JEMALLOC_MAPS_COALESCE], [ ])
+  AC_DEFINE([JEMALLOC_MAPS_COALESCE], [ ], [ ])
 fi
 
 dnl Indicate whether to retain memory (rather than using munmap()) by default.
 if test "x$default_retain" = "x1" ; then
-  AC_DEFINE([JEMALLOC_RETAIN], [ ])
+  AC_DEFINE([JEMALLOC_RETAIN], [ ], [ ])
 fi
 
 dnl Enable allocation from DSS if supported by the OS.
@@ -1403,7 +1403,7 @@ else
 fi
 
 if test "x$have_dss" = "x1" ; then
-  AC_DEFINE([JEMALLOC_DSS], [ ])
+  AC_DEFINE([JEMALLOC_DSS], [ ], [ ])
 fi
 
 dnl Support the junk/zero filling option by default.
@@ -1418,7 +1418,7 @@ fi
 [enable_fill="1"]
 )
 if test "x$enable_fill" = "x1" ; then
-  AC_DEFINE([JEMALLOC_FILL], [ ])
+  AC_DEFINE([JEMALLOC_FILL], [ ], [ ])
 fi
 AC_SUBST([enable_fill])
 
@@ -1456,11 +1456,11 @@ if test "x${je_cv_utrace}" = "xno" ; then
     enable_utrace="0"
   fi
   if test "x$enable_utrace" = "x1" ; then
-    AC_DEFINE([JEMALLOC_UTRACE_LABEL], [ ])
+    AC_DEFINE([JEMALLOC_UTRACE_LABEL], [ ], [ ])
   fi
 else
   if test "x$enable_utrace" = "x1" ; then
-    AC_DEFINE([JEMALLOC_UTRACE], [ ])
+    AC_DEFINE([JEMALLOC_UTRACE], [ ], [ ])
   fi
 fi
 AC_SUBST([enable_utrace])
@@ -1477,7 +1477,7 @@ fi
 [enable_xmalloc="0"]
 )
 if test "x$enable_xmalloc" = "x1" ; then
-  AC_DEFINE([JEMALLOC_XMALLOC], [ ])
+  AC_DEFINE([JEMALLOC_XMALLOC], [ ], [ ])
 fi
 AC_SUBST([enable_xmalloc])
 
@@ -1494,7 +1494,7 @@ fi
 [enable_cache_oblivious="1"]
 )
 if test "x$enable_cache_oblivious" = "x1" ; then
-  AC_DEFINE([JEMALLOC_CACHE_OBLIVIOUS], [ ])
+  AC_DEFINE([JEMALLOC_CACHE_OBLIVIOUS], [ ], [ ])
 fi
 AC_SUBST([enable_cache_oblivious])
 
@@ -1510,7 +1510,7 @@ fi
 [enable_log="0"]
 )
 if test "x$enable_log" = "x1" ; then
-  AC_DEFINE([JEMALLOC_LOG], [ ])
+  AC_DEFINE([JEMALLOC_LOG], [ ], [ ])
 fi
 AC_SUBST([enable_log])
 
@@ -1526,7 +1526,7 @@ fi
 [enable_readlinkat="0"]
 )
 if test "x$enable_readlinkat" = "x1" ; then
-  AC_DEFINE([JEMALLOC_READLINKAT], [ ])
+  AC_DEFINE([JEMALLOC_READLINKAT], [ ], [ ])
 fi
 AC_SUBST([enable_readlinkat])
 
@@ -1543,7 +1543,7 @@ fi
 [enable_opt_safety_checks="0"]
 )
 if test "x$enable_opt_safety_checks" = "x1" ; then
-  AC_DEFINE([JEMALLOC_OPT_SAFETY_CHECKS], [ ])
+  AC_DEFINE([JEMALLOC_OPT_SAFETY_CHECKS], [ ], [ ])
 fi
 AC_SUBST([enable_opt_safety_checks])
 
@@ -1560,7 +1560,7 @@ fi
 [enable_opt_size_checks="0"]
 )
 if test "x$enable_opt_size_checks" = "x1" ; then
-  AC_DEFINE([JEMALLOC_OPT_SIZE_CHECKS], [ ])
+  AC_DEFINE([JEMALLOC_OPT_SIZE_CHECKS], [ ], [ ])
 fi
 AC_SUBST([enable_opt_size_checks])
 
@@ -1574,9 +1574,9 @@ void foo (void) {
 	}
 ], [je_cv_gcc_builtin_unreachable])
 if test "x${je_cv_gcc_builtin_unreachable}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_INTERNAL_UNREACHABLE], [__builtin_unreachable])
+  AC_DEFINE([JEMALLOC_INTERNAL_UNREACHABLE], [__builtin_unreachable], [ ])
 else
-  AC_DEFINE([JEMALLOC_INTERNAL_UNREACHABLE], [abort])
+  AC_DEFINE([JEMALLOC_INTERNAL_UNREACHABLE], [abort], [ ])
 fi
 
 dnl ============================================================================
@@ -1596,9 +1596,9 @@ JE_COMPILABLE([a program using __builtin_ffsl], [
 	}
 ], [je_cv_gcc_builtin_ffsl])
 if test "x${je_cv_gcc_builtin_ffsl}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_INTERNAL_FFSLL], [__builtin_ffsll])
-  AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [__builtin_ffsl])
-  AC_DEFINE([JEMALLOC_INTERNAL_FFS], [__builtin_ffs])
+  AC_DEFINE([JEMALLOC_INTERNAL_FFSLL], [__builtin_ffsll], [ ])
+  AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [__builtin_ffsl], [ ])
+  AC_DEFINE([JEMALLOC_INTERNAL_FFS], [__builtin_ffs], [ ])
 else
   JE_COMPILABLE([a program using ffsl], [
   #include <stdio.h>
@@ -1611,9 +1611,9 @@ else
 	}
   ], [je_cv_function_ffsl])
   if test "x${je_cv_function_ffsl}" = "xyes" ; then
-    AC_DEFINE([JEMALLOC_INTERNAL_FFSLL], [ffsll])
-    AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [ffsl])
-    AC_DEFINE([JEMALLOC_INTERNAL_FFS], [ffs])
+    AC_DEFINE([JEMALLOC_INTERNAL_FFSLL], [ffsll], [ ])
+    AC_DEFINE([JEMALLOC_INTERNAL_FFSL], [ffsl], [ ])
+    AC_DEFINE([JEMALLOC_INTERNAL_FFS], [ffs], [ ])
   else
     AC_MSG_ERROR([Cannot build without ffsl(3) or __builtin_ffsl()])
   fi
@@ -1630,16 +1630,16 @@ JE_COMPILABLE([a program using __builtin_popcountl], [
 	}
 ], [je_cv_gcc_builtin_popcountl])
 if test "x${je_cv_gcc_builtin_popcountl}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_INTERNAL_POPCOUNT], [__builtin_popcount])
-  AC_DEFINE([JEMALLOC_INTERNAL_POPCOUNTL], [__builtin_popcountl])
-  AC_DEFINE([JEMALLOC_INTERNAL_POPCOUNTLL], [__builtin_popcountll])
+  AC_DEFINE([JEMALLOC_INTERNAL_POPCOUNT], [__builtin_popcount], [ ])
+  AC_DEFINE([JEMALLOC_INTERNAL_POPCOUNTL], [__builtin_popcountl], [ ])
+  AC_DEFINE([JEMALLOC_INTERNAL_POPCOUNTLL], [__builtin_popcountll], [ ])
 fi
 
 AC_ARG_WITH([lg_quantum],
   [AS_HELP_STRING([--with-lg-quantum=<lg-quantum>],
    [Base 2 log of minimum allocation alignment])])
 if test "x$with_lg_quantum" != "x" ; then
-  AC_DEFINE_UNQUOTED([LG_QUANTUM], [$with_lg_quantum])
+  AC_DEFINE_UNQUOTED([LG_QUANTUM], [$with_lg_quantum], [ ])
 fi
 
 AC_ARG_WITH([lg_slab_maxregs],
@@ -1648,7 +1648,7 @@ AC_ARG_WITH([lg_slab_maxregs],
   [CONFIG_LG_SLAB_MAXREGS="with_lg_slab_maxregs"],
   [CONFIG_LG_SLAB_MAXREGS=""])
 if test "x$with_lg_slab_maxregs" != "x" ; then
-  AC_DEFINE_UNQUOTED([CONFIG_LG_SLAB_MAXREGS], [$with_lg_slab_maxregs])
+  AC_DEFINE_UNQUOTED([CONFIG_LG_SLAB_MAXREGS], [$with_lg_slab_maxregs], [ ])
 fi
 
 AC_ARG_WITH([lg_page],
@@ -1700,7 +1700,7 @@ if test "x${je_cv_lg_page}" != "x" ; then
   LG_PAGE="${je_cv_lg_page}"
 fi
 if test "x${LG_PAGE}" != "xundefined" ; then
-   AC_DEFINE_UNQUOTED([LG_PAGE], [$LG_PAGE])
+   AC_DEFINE_UNQUOTED([LG_PAGE], [$LG_PAGE], [ ])
 else
    AC_MSG_ERROR([cannot determine value for LG_PAGE])
 fi
@@ -1737,7 +1737,7 @@ if test "x${LG_PAGE}" != "xundefined" -a \
         "${je_cv_lg_hugepage}" -lt "${LG_PAGE}" ; then
   AC_MSG_ERROR([Huge page size (2^${je_cv_lg_hugepage}) must be at least page size (2^${LG_PAGE})])
 fi
-AC_DEFINE_UNQUOTED([LG_HUGEPAGE], [${je_cv_lg_hugepage}])
+AC_DEFINE_UNQUOTED([LG_HUGEPAGE], [${je_cv_lg_hugepage}], [ ])
 
 dnl ============================================================================
 dnl Enable libdl by default.
@@ -1758,7 +1758,7 @@ dnl ============================================================================
 dnl Configure pthreads.
 
 if test "x$abi" != "xpecoff" ; then
-  AC_DEFINE([JEMALLOC_HAVE_PTHREAD], [ ])
+  AC_DEFINE([JEMALLOC_HAVE_PTHREAD], [ ], [ ])
   AC_CHECK_HEADERS([pthread.h], , [AC_MSG_ERROR([pthread.h is missing])])
   dnl Some systems may embed pthreads functionality in libc; check for libpthread
   dnl first, but try libc too before failing.
@@ -1776,7 +1776,7 @@ dnl Check if we have dlsym support.
         [AC_CHECK_LIB([dl], [dlsym], [LIBS="$LIBS -ldl"], [have_dlsym="0"])]),
       [have_dlsym="0"])
     if test "x$have_dlsym" = "x1" ; then
-      AC_DEFINE([JEMALLOC_HAVE_DLSYM], [ ])
+      AC_DEFINE([JEMALLOC_HAVE_DLSYM], [ ], [ ])
     fi
   else
     have_dlsym="0"
@@ -1788,7 +1788,7 @@ dnl Check if we have dlsym support.
   pthread_atfork((void *)0, (void *)0, (void *)0);
 ], [je_cv_pthread_atfork])
   if test "x${je_cv_pthread_atfork}" = "xyes" ; then
-    AC_DEFINE([JEMALLOC_HAVE_PTHREAD_ATFORK], [ ])
+    AC_DEFINE([JEMALLOC_HAVE_PTHREAD_ATFORK], [ ], [ ])
   fi
   dnl Check if pthread_setname_np is available with the expected API.
   JE_COMPILABLE([pthread_setname_np(3)], [
@@ -1797,7 +1797,7 @@ dnl Check if we have dlsym support.
   pthread_setname_np(pthread_self(), "setname_test");
 ], [je_cv_pthread_setname_np])
   if test "x${je_cv_pthread_setname_np}" = "xyes" ; then
-    AC_DEFINE([JEMALLOC_HAVE_PTHREAD_SETNAME_NP], [ ])
+    AC_DEFINE([JEMALLOC_HAVE_PTHREAD_SETNAME_NP], [ ], [ ])
   fi
   dnl Check if pthread_getname_np is not necessarily present despite
   dnl the pthread_setname_np counterpart
@@ -1812,7 +1812,7 @@ dnl Check if we have dlsym support.
   }
 ], [je_cv_pthread_getname_np])
   if test "x${je_cv_pthread_getname_np}" = "xyes" ; then
-    AC_DEFINE([JEMALLOC_HAVE_PTHREAD_GETNAME_NP], [ ])
+    AC_DEFINE([JEMALLOC_HAVE_PTHREAD_GETNAME_NP], [ ], [ ])
   fi
   dnl Check if pthread_get_name_np is not necessarily present despite
   dnl the pthread_set_name_np counterpart
@@ -1828,7 +1828,7 @@ dnl Check if we have dlsym support.
   }
 ], [je_cv_pthread_get_name_np])
   if test "x${je_cv_pthread_get_name_np}" = "xyes" ; then
-    AC_DEFINE([JEMALLOC_HAVE_PTHREAD_GET_NAME_NP], [ ])
+    AC_DEFINE([JEMALLOC_HAVE_PTHREAD_GET_NAME_NP], [ ], [ ])
   fi
 fi
 
@@ -1860,7 +1860,7 @@ JE_COMPILABLE([clock_gettime(CLOCK_MONOTONIC_COARSE, ...)], [
 	clock_gettime(CLOCK_MONOTONIC_COARSE, &ts);
 ], [je_cv_clock_monotonic_coarse])
 if test "x${je_cv_clock_monotonic_coarse}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE])
+  AC_DEFINE([JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE], [ ], [ ])
 fi
 
 dnl check for CLOCK_MONOTONIC.
@@ -1876,7 +1876,7 @@ JE_COMPILABLE([clock_gettime(CLOCK_MONOTONIC, ...)], [
 #endif
 ], [je_cv_clock_monotonic])
 if test "x${je_cv_clock_monotonic}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_CLOCK_MONOTONIC])
+  AC_DEFINE([JEMALLOC_HAVE_CLOCK_MONOTONIC], [ ], [ ])
 fi
 
 dnl Check for mach_absolute_time().
@@ -1886,7 +1886,7 @@ JE_COMPILABLE([mach_absolute_time()], [
 	mach_absolute_time();
 ], [je_cv_mach_absolute_time])
 if test "x${je_cv_mach_absolute_time}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_MACH_ABSOLUTE_TIME])
+  AC_DEFINE([JEMALLOC_HAVE_MACH_ABSOLUTE_TIME], [ ], [ ])
 fi
 
 dnl check for CLOCK_REALTIME (always should be available on Linux)
@@ -1898,7 +1898,7 @@ JE_COMPILABLE([clock_gettime(CLOCK_REALTIME, ...)], [
 	clock_gettime(CLOCK_REALTIME, &ts);
 ], [je_cv_clock_realtime])
 if test "x${je_cv_clock_realtime}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_CLOCK_REALTIME])
+  AC_DEFINE([JEMALLOC_HAVE_CLOCK_REALTIME], [ ], [ ])
 fi
 
 dnl Use syscall(2) (if available) by default.
@@ -1926,7 +1926,7 @@ if test "x$enable_syscall" = "x1" ; then
                 [je_cv_syscall])
   JE_CFLAGS_RESTORE()
   if test "x$je_cv_syscall" = "xyes" ; then
-    AC_DEFINE([JEMALLOC_USE_SYSCALL], [ ])
+    AC_DEFINE([JEMALLOC_USE_SYSCALL], [ ], [ ])
   fi
 fi
 
@@ -1936,7 +1936,7 @@ AC_CHECK_FUNC([secure_getenv],
               [have_secure_getenv="0"]
              )
 if test "x$have_secure_getenv" = "x1" ; then
-  AC_DEFINE([JEMALLOC_HAVE_SECURE_GETENV], [ ])
+  AC_DEFINE([JEMALLOC_HAVE_SECURE_GETENV], [ ], [ ])
 fi
 
 dnl Check if the GNU-specific sched_getcpu function exists.
@@ -1945,7 +1945,7 @@ AC_CHECK_FUNC([sched_getcpu],
               [have_sched_getcpu="0"]
              )
 if test "x$have_sched_getcpu" = "x1" ; then
-  AC_DEFINE([JEMALLOC_HAVE_SCHED_GETCPU], [ ])
+  AC_DEFINE([JEMALLOC_HAVE_SCHED_GETCPU], [ ], [ ])
 fi
 
 dnl Check if the GNU-specific sched_setaffinity function exists.
@@ -1954,7 +1954,7 @@ AC_CHECK_FUNC([sched_setaffinity],
               [have_sched_setaffinity="0"]
              )
 if test "x$have_sched_setaffinity" = "x1" ; then
-  AC_DEFINE([JEMALLOC_HAVE_SCHED_SETAFFINITY], [ ])
+  AC_DEFINE([JEMALLOC_HAVE_SCHED_SETAFFINITY], [ ], [ ])
 fi
 
 dnl Check if the Solaris/BSD issetugid function exists.
@@ -1963,7 +1963,7 @@ AC_CHECK_FUNC([issetugid],
               [have_issetugid="0"]
              )
 if test "x$have_issetugid" = "x1" ; then
-  AC_DEFINE([JEMALLOC_HAVE_ISSETUGID], [ ])
+  AC_DEFINE([JEMALLOC_HAVE_ISSETUGID], [ ], [ ])
 fi
 
 dnl Check whether the BSD-specific _malloc_thread_cleanup() exists.  If so, use
@@ -1975,7 +1975,7 @@ AC_CHECK_FUNC([_malloc_thread_cleanup],
               [have__malloc_thread_cleanup="0"]
              )
 if test "x$have__malloc_thread_cleanup" = "x1" ; then
-  AC_DEFINE([JEMALLOC_MALLOC_THREAD_CLEANUP], [ ])
+  AC_DEFINE([JEMALLOC_MALLOC_THREAD_CLEANUP], [ ], [ ])
   wrap_syms="${wrap_syms} _malloc_thread_cleanup"
   force_tls="1"
 fi
@@ -1988,7 +1988,7 @@ AC_CHECK_FUNC([_pthread_mutex_init_calloc_cb],
               [have__pthread_mutex_init_calloc_cb="0"]
              )
 if test "x$have__pthread_mutex_init_calloc_cb" = "x1" ; then
-  AC_DEFINE([JEMALLOC_MUTEX_INIT_CB])
+  AC_DEFINE([JEMALLOC_MUTEX_INIT_CB], [ ], [ ])
   wrap_syms="${wrap_syms} _malloc_prefork _malloc_postfork"
 fi
 
@@ -1997,7 +1997,7 @@ AC_CHECK_FUNC([memcntl],
 	      [have_memcntl="0"],
 	      )
 if test "x$have_memcntl" = "x1" ; then
-  AC_DEFINE([JEMALLOC_HAVE_MEMCNTL], [ ])
+  AC_DEFINE([JEMALLOC_HAVE_MEMCNTL], [ ], [ ])
 fi
 
 dnl Disable lazy locking by default.
@@ -2026,7 +2026,7 @@ if test "x${enable_lazy_lock}" = "x1" -a "x${abi}" = "xpecoff" ; then
 fi
 if test "x$enable_lazy_lock" = "x1" ; then
   if test "x$have_dlsym" = "x1" ; then
-    AC_DEFINE([JEMALLOC_LAZY_LOCK], [ ])
+    AC_DEFINE([JEMALLOC_LAZY_LOCK], [ ], [ ])
   else
     AC_MSG_ERROR([Missing dlsym support: lazy-lock cannot be enabled.])
   fi
@@ -2059,7 +2059,7 @@ else
 fi
 AC_SUBST([enable_tls])
 if test "x${enable_tls}" = "x1" ; then
-  AC_DEFINE_UNQUOTED([JEMALLOC_TLS], [ ])
+  AC_DEFINE_UNQUOTED([JEMALLOC_TLS], [ ], [ ])
 fi
 
 dnl ============================================================================
@@ -2080,7 +2080,7 @@ JE_COMPILABLE([C11 atomics], [
     return r == 0;
 ], [je_cv_c11_atomics])
 if test "x${je_cv_c11_atomics}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_C11_ATOMICS])
+  AC_DEFINE([JEMALLOC_C11_ATOMICS], [ ], [ ])
 fi
 
 dnl ============================================================================
@@ -2095,7 +2095,7 @@ JE_COMPILABLE([GCC __atomic atomics], [
     return after_add == 1;
 ], [je_cv_gcc_atomic_atomics])
 if test "x${je_cv_gcc_atomic_atomics}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_GCC_ATOMIC_ATOMICS])
+  AC_DEFINE([JEMALLOC_GCC_ATOMIC_ATOMICS], [ ], [ ])
 
   dnl check for 8-bit atomic support
   JE_COMPILABLE([GCC 8-bit __atomic atomics], [
@@ -2107,7 +2107,7 @@ if test "x${je_cv_gcc_atomic_atomics}" = "xyes" ; then
       return after_add == 1;
   ], [je_cv_gcc_u8_atomic_atomics])
   if test "x${je_cv_gcc_u8_atomic_atomics}" = "xyes" ; then
-    AC_DEFINE([JEMALLOC_GCC_U8_ATOMIC_ATOMICS])
+    AC_DEFINE([JEMALLOC_GCC_U8_ATOMIC_ATOMICS], [ ], [ ])
   fi
 fi
 
@@ -2122,7 +2122,7 @@ JE_COMPILABLE([GCC __sync atomics], [
     return (before_add == 0) && (after_add == 1);
 ], [je_cv_gcc_sync_atomics])
 if test "x${je_cv_gcc_sync_atomics}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_GCC_SYNC_ATOMICS])
+  AC_DEFINE([JEMALLOC_GCC_SYNC_ATOMICS], [ ], [ ])
 
   dnl check for 8-bit atomic support
   JE_COMPILABLE([GCC 8-bit __sync atomics], [
@@ -2133,7 +2133,7 @@ if test "x${je_cv_gcc_sync_atomics}" = "xyes" ; then
       return (before_add == 0) && (after_add == 1);
   ], [je_cv_gcc_u8_sync_atomics])
   if test "x${je_cv_gcc_u8_sync_atomics}" = "xyes" ; then
-    AC_DEFINE([JEMALLOC_GCC_U8_SYNC_ATOMICS])
+    AC_DEFINE([JEMALLOC_GCC_U8_SYNC_ATOMICS], [ ], [ ])
   fi
 fi
 
@@ -2158,7 +2158,7 @@ JE_COMPILABLE([Darwin OSAtomic*()], [
 	}
 ], [je_cv_osatomic])
 if test "x${je_cv_osatomic}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_OSATOMIC], [ ])
+  AC_DEFINE([JEMALLOC_OSATOMIC], [ ], [ ])
 fi
 
 dnl ============================================================================
@@ -2170,7 +2170,7 @@ JE_COMPILABLE([madvise(2)], [
 	madvise((void *)0, 0, 0);
 ], [je_cv_madvise])
 if test "x${je_cv_madvise}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_MADVISE], [ ])
+  AC_DEFINE([JEMALLOC_HAVE_MADVISE], [ ], [ ])
 
   dnl Check for madvise(..., MADV_FREE).
   JE_COMPILABLE([madvise(..., MADV_FREE)], [
@@ -2179,12 +2179,12 @@ if test "x${je_cv_madvise}" = "xyes" ; then
 	madvise((void *)0, 0, MADV_FREE);
 ], [je_cv_madv_free])
   if test "x${je_cv_madv_free}" = "xyes" ; then
-    AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
+    AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ], [ ])
   elif test "x${je_cv_madvise}" = "xyes" ; then
     case "${host_cpu}" in i686|x86_64)
         case "${host}" in *-*-linux*)
-            AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ])
-            AC_DEFINE([JEMALLOC_DEFINE_MADVISE_FREE], [ ])
+            AC_DEFINE([JEMALLOC_PURGE_MADVISE_FREE], [ ], [ ])
+            AC_DEFINE([JEMALLOC_DEFINE_MADVISE_FREE], [ ], [ ])
 	    ;;
         esac
         ;;
@@ -2198,7 +2198,7 @@ if test "x${je_cv_madvise}" = "xyes" ; then
 	madvise((void *)0, 0, MADV_DONTNEED);
 ], [je_cv_madv_dontneed])
   if test "x${je_cv_madv_dontneed}" = "xyes" ; then
-    AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ])
+    AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED], [ ], [ ])
   fi
 
   dnl Check for madvise(..., MADV_DO[NT]DUMP).
@@ -2209,7 +2209,7 @@ if test "x${je_cv_madvise}" = "xyes" ; then
 	madvise((void *)0, 0, MADV_DODUMP);
 ], [je_cv_madv_dontdump])
   if test "x${je_cv_madv_dontdump}" = "xyes" ; then
-    AC_DEFINE([JEMALLOC_MADVISE_DONTDUMP], [ ])
+    AC_DEFINE([JEMALLOC_MADVISE_DONTDUMP], [ ], [ ])
   fi
 
   dnl Check for madvise(..., MADV_[NO]HUGEPAGE).
@@ -2227,14 +2227,14 @@ if test "x${je_cv_madvise}" = "xyes" ; then
 	madvise((void *)0, 0, MADV_CORE);
 ], [je_cv_madv_nocore])
   if test "x${je_cv_madv_nocore}" = "xyes" ; then
-    AC_DEFINE([JEMALLOC_MADVISE_NOCORE], [ ])
+    AC_DEFINE([JEMALLOC_MADVISE_NOCORE], [ ], [ ])
   fi
 case "${host_cpu}" in
   arm*)
     ;;
   *)
   if test "x${je_cv_thp}" = "xyes" ; then
-    AC_DEFINE([JEMALLOC_HAVE_MADVISE_HUGE], [ ])
+    AC_DEFINE([JEMALLOC_HAVE_MADVISE_HUGE], [ ], [ ])
   fi
   ;;
 esac
@@ -2246,7 +2246,7 @@ else
     posix_madvise((void *)0, 0, 0);
   ], [je_cv_posix_madvise])
   if test "x${je_cv_posix_madvise}" = "xyes" ; then
-    AC_DEFINE([JEMALLOC_HAVE_POSIX_MADVISE], [ ])
+    AC_DEFINE([JEMALLOC_HAVE_POSIX_MADVISE], [ ], [ ])
 
     dnl Check for posix_madvise(..., POSIX_MADV_DONTNEED).
     JE_COMPILABLE([posix_madvise(..., POSIX_MADV_DONTNEED)], [
@@ -2255,7 +2255,7 @@ else
     posix_madvise((void *)0, 0, POSIX_MADV_DONTNEED);
   ], [je_cv_posix_madv_dontneed])
     if test "x${je_cv_posix_madv_dontneed}" = "xyes" ; then
-      AC_DEFINE([JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED], [ ])
+      AC_DEFINE([JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED], [ ], [ ])
     fi
   fi
 fi
@@ -2269,7 +2269,7 @@ JE_COMPILABLE([mprotect(2)], [
 	mprotect((void *)0, 0, PROT_NONE);
 ], [je_cv_mprotect])
 if test "x${je_cv_mprotect}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_MPROTECT], [ ])
+  AC_DEFINE([JEMALLOC_HAVE_MPROTECT], [ ], [ ])
 fi
 
 dnl ============================================================================
@@ -2296,7 +2296,7 @@ AC_CACHE_CHECK([for __builtin_clz],
                                [je_cv_builtin_clz=no])])
 
 if test "x${je_cv_builtin_clz}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_BUILTIN_CLZ], [ ])
+  AC_DEFINE([JEMALLOC_HAVE_BUILTIN_CLZ], [ ], [ ])
 fi
 
 dnl ============================================================================
@@ -2315,7 +2315,7 @@ JE_COMPILABLE([Darwin os_unfair_lock_*()], [
 	#endif
 ], [je_cv_os_unfair_lock])
 if test "x${je_cv_os_unfair_lock}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_OS_UNFAIR_LOCK], [ ])
+  AC_DEFINE([JEMALLOC_OS_UNFAIR_LOCK], [ ], [ ])
 fi
 
 dnl ============================================================================
@@ -2341,7 +2341,7 @@ if test "x${enable_zone_allocator}" = "x1" ; then
   if test "x${abi}" != "xmacho"; then
     AC_MSG_ERROR([--enable-zone-allocator is only supported on Darwin])
   fi
-  AC_DEFINE([JEMALLOC_ZONE], [ ])
+  AC_DEFINE([JEMALLOC_ZONE], [ ], [ ])
 fi
 
 dnl ============================================================================
@@ -2362,16 +2362,17 @@ AC_SUBST([enable_initial_exec_tls])
 if test "x${je_cv_tls_model}" = "xyes" -a \
        "x${enable_initial_exec_tls}" = "x1" ; then
   AC_DEFINE([JEMALLOC_TLS_MODEL],
-            [__attribute__((tls_model("initial-exec")))])
+            [__attribute__((tls_model("initial-exec")))], 
+            [ ])
 else
-  AC_DEFINE([JEMALLOC_TLS_MODEL], [ ])
+  AC_DEFINE([JEMALLOC_TLS_MODEL], [ ], [ ])
 fi
 
 dnl ============================================================================
 dnl Enable background threads if possible.
 
 if test "x${have_pthread}" = "x1" -a "x${je_cv_os_unfair_lock}" != "xyes" ; then
-  AC_DEFINE([JEMALLOC_BACKGROUND_THREAD])
+  AC_DEFINE([JEMALLOC_BACKGROUND_THREAD], [ ], [ ])
 fi
 
 dnl ============================================================================
@@ -2392,7 +2393,7 @@ if test "x$glibc" = "x1" ; then
 ], [je_cv_glibc_malloc_hook])
   if test "x${je_cv_glibc_malloc_hook}" = "xyes" ; then
     if test "x${JEMALLOC_PREFIX}" = "x" ; then
-      AC_DEFINE([JEMALLOC_GLIBC_MALLOC_HOOK], [ ])
+      AC_DEFINE([JEMALLOC_GLIBC_MALLOC_HOOK], [ ], [ ])
       wrap_syms="${wrap_syms} __free_hook __malloc_hook __realloc_hook"
     fi
   fi
@@ -2407,7 +2408,7 @@ if test "x$glibc" = "x1" ; then
 ], [je_cv_glibc_memalign_hook])
   if test "x${je_cv_glibc_memalign_hook}" = "xyes" ; then
     if test "x${JEMALLOC_PREFIX}" = "x" ; then
-      AC_DEFINE([JEMALLOC_GLIBC_MEMALIGN_HOOK], [ ])
+      AC_DEFINE([JEMALLOC_GLIBC_MEMALIGN_HOOK], [ ], [ ])
       wrap_syms="${wrap_syms} __memalign_hook"
     fi
   fi
@@ -2422,7 +2423,7 @@ JE_COMPILABLE([pthreads adaptive mutexes], [
   pthread_mutexattr_destroy(&attr);
 ], [je_cv_pthread_mutex_adaptive_np])
 if test "x${je_cv_pthread_mutex_adaptive_np}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP], [ ])
+  AC_DEFINE([JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP], [ ], [ ])
 fi
 
 JE_CFLAGS_SAVE()
@@ -2441,7 +2442,7 @@ JE_COMPILABLE([strerror_r returns char with gnu source], [
 ], [je_cv_strerror_r_returns_char_with_gnu_source])
 JE_CFLAGS_RESTORE()
 if test "x${je_cv_strerror_r_returns_char_with_gnu_source}" = "xyes" ; then
-  AC_DEFINE([JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE], [ ])
+  AC_DEFINE([JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE], [ ], [ ])
 fi
 
 dnl ============================================================================

From bd70d8fc0f35fc7883fad18216d09e613867314b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 15 Nov 2021 15:23:47 -0800
Subject: [PATCH 2155/2608] Add the profiling settings for tests explicit.

Many profiling related tests make assumptions on the profiling settings,
e.g. opt_prof is off by default, and prof_active is default on when opt_prof is
on.  However the default settings can be changed via --with-malloc-conf at build
time.  Fixing the tests by adding the assumed settings explicitly.
---
 test/unit/hpa_background_thread.c | 4 ++--
 test/unit/inspect.sh              | 5 +++++
 test/unit/mallctl.c               | 6 +++++-
 test/unit/prof_active.sh          | 2 +-
 test/unit/prof_hook.sh            | 2 +-
 test/unit/prof_log.sh             | 2 +-
 test/unit/prof_recent.sh          | 2 +-
 test/unit/prof_stats.sh           | 2 +-
 test/unit/prof_sys_thread_name.sh | 2 +-
 test/unit/prof_tctx.sh            | 2 +-
 test/unit/safety_check.sh         | 2 +-
 test/unit/size_check.sh           | 5 +++++
 test/unit/tcache_max.c            | 1 +
 13 files changed, 26 insertions(+), 11 deletions(-)
 create mode 100644 test/unit/inspect.sh
 create mode 100644 test/unit/size_check.sh

diff --git a/test/unit/hpa_background_thread.c b/test/unit/hpa_background_thread.c
index 5976bb47..228b771b 100644
--- a/test/unit/hpa_background_thread.c
+++ b/test/unit/hpa_background_thread.c
@@ -104,8 +104,8 @@ expect_purging(unsigned arena_ind, bool expect_deferred) {
 		dallocx(ptr, MALLOCX_TCACHE_NONE);
 		empty_ndirty = get_empty_ndirty(arena_ind);
 		if (expect_deferred) {
-			expect_true(empty_ndirty == 0 || empty_ndirty == 1,
-			    "Unexpected extra dirty page count: %zu",
+			expect_true(empty_ndirty == 0 || empty_ndirty == 1 ||
+			    opt_prof, "Unexpected extra dirty page count: %zu",
 			    empty_ndirty);
 		} else {
 			assert_zu_eq(0, empty_ndirty,
diff --git a/test/unit/inspect.sh b/test/unit/inspect.sh
new file mode 100644
index 00000000..352d1107
--- /dev/null
+++ b/test/unit/inspect.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:false"
+fi
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 5cba0837..81a36c97 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -368,7 +368,10 @@ TEST_BEGIN(test_tcache_none) {
 	/* Make sure that tcache-based allocation returns p, not q. */
 	void *p1 = mallocx(42, 0);
 	expect_ptr_not_null(p1, "Unexpected mallocx() failure");
-	expect_ptr_eq(p0, p1, "Expected tcache to allocate cached region");
+	if (!opt_prof) {
+		expect_ptr_eq(p0, p1,
+		    "Expected tcache to allocate cached region");
+	}
 
 	/* Clean up. */
 	dallocx(p1, MALLOCX_TCACHE_NONE);
@@ -904,6 +907,7 @@ TEST_BEGIN(test_prof_active) {
 	 * test_mallctl_opt was already enough.
 	 */
 	test_skip_if(!config_prof);
+	test_skip_if(opt_prof);
 
 	bool active, old;
 	size_t len = sizeof(bool);
diff --git a/test/unit/prof_active.sh b/test/unit/prof_active.sh
index 0167cb10..9749674a 100644
--- a/test/unit/prof_active.sh
+++ b/test/unit/prof_active.sh
@@ -1,5 +1,5 @@
 #!/bin/sh
 
 if [ "x${enable_prof}" = "x1" ] ; then
-  export MALLOC_CONF="prof:true,prof_thread_active_init:false,lg_prof_sample:0"
+  export MALLOC_CONF="prof:true,prof_active:true,prof_thread_active_init:false,lg_prof_sample:0"
 fi
diff --git a/test/unit/prof_hook.sh b/test/unit/prof_hook.sh
index d14cb8c5..c7ebd8f9 100644
--- a/test/unit/prof_hook.sh
+++ b/test/unit/prof_hook.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 
 if [ "x${enable_prof}" = "x1" ] ; then
-  export MALLOC_CONF="prof:true,lg_prof_sample:0"
+  export MALLOC_CONF="prof:true,prof_active:true,lg_prof_sample:0"
 fi
 
diff --git a/test/unit/prof_log.sh b/test/unit/prof_log.sh
index 8fcc7d8a..485f9bf0 100644
--- a/test/unit/prof_log.sh
+++ b/test/unit/prof_log.sh
@@ -1,5 +1,5 @@
 #!/bin/sh
 
 if [ "x${enable_prof}" = "x1" ] ; then
-  export MALLOC_CONF="prof:true,lg_prof_sample:0"
+  export MALLOC_CONF="prof:true,prof_active:true,lg_prof_sample:0"
 fi
diff --git a/test/unit/prof_recent.sh b/test/unit/prof_recent.sh
index 59759a6a..58a54a47 100644
--- a/test/unit/prof_recent.sh
+++ b/test/unit/prof_recent.sh
@@ -1,5 +1,5 @@
 #!/bin/sh
 
 if [ "x${enable_prof}" = "x1" ] ; then
-  export MALLOC_CONF="prof:true,lg_prof_sample:0,prof_recent_alloc_max:3"
+  export MALLOC_CONF="prof:true,prof_active:true,lg_prof_sample:0,prof_recent_alloc_max:3"
 fi
diff --git a/test/unit/prof_stats.sh b/test/unit/prof_stats.sh
index b01dfd45..f3c819b5 100644
--- a/test/unit/prof_stats.sh
+++ b/test/unit/prof_stats.sh
@@ -1,5 +1,5 @@
 #!/bin/sh
 
 if [ "x${enable_prof}" = "x1" ] ; then
-  export MALLOC_CONF="prof:true,lg_prof_sample:0,prof_stats:true"
+  export MALLOC_CONF="prof:true,prof_active:true,lg_prof_sample:0,prof_stats:true"
 fi
diff --git a/test/unit/prof_sys_thread_name.sh b/test/unit/prof_sys_thread_name.sh
index 281cf9a0..1f02a8a8 100644
--- a/test/unit/prof_sys_thread_name.sh
+++ b/test/unit/prof_sys_thread_name.sh
@@ -1,5 +1,5 @@
 #!/bin/sh
 
 if [ "x${enable_prof}" = "x1" ] ; then
-  export MALLOC_CONF="prof:true,lg_prof_sample:0,prof_sys_thread_name:true"
+  export MALLOC_CONF="prof:true,prof_active:true,lg_prof_sample:0,prof_sys_thread_name:true"
 fi
diff --git a/test/unit/prof_tctx.sh b/test/unit/prof_tctx.sh
index 8fcc7d8a..485f9bf0 100644
--- a/test/unit/prof_tctx.sh
+++ b/test/unit/prof_tctx.sh
@@ -1,5 +1,5 @@
 #!/bin/sh
 
 if [ "x${enable_prof}" = "x1" ] ; then
-  export MALLOC_CONF="prof:true,lg_prof_sample:0"
+  export MALLOC_CONF="prof:true,prof_active:true,lg_prof_sample:0"
 fi
diff --git a/test/unit/safety_check.sh b/test/unit/safety_check.sh
index 8fcc7d8a..485f9bf0 100644
--- a/test/unit/safety_check.sh
+++ b/test/unit/safety_check.sh
@@ -1,5 +1,5 @@
 #!/bin/sh
 
 if [ "x${enable_prof}" = "x1" ] ; then
-  export MALLOC_CONF="prof:true,lg_prof_sample:0"
+  export MALLOC_CONF="prof:true,prof_active:true,lg_prof_sample:0"
 fi
diff --git a/test/unit/size_check.sh b/test/unit/size_check.sh
new file mode 100644
index 00000000..352d1107
--- /dev/null
+++ b/test/unit/size_check.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:false"
+fi
diff --git a/test/unit/tcache_max.c b/test/unit/tcache_max.c
index 0594ceff..4f207e0e 100644
--- a/test/unit/tcache_max.c
+++ b/test/unit/tcache_max.c
@@ -151,6 +151,7 @@ test_tcache_max_impl(void) {
 TEST_BEGIN(test_tcache_max) {
 	test_skip_if(!config_stats);
 	test_skip_if(!opt_tcache);
+	test_skip_if(opt_prof);
 
 	for (alloc_option = alloc_option_start;
 	     alloc_option < alloc_option_end;

From d038160f3b76ac1e5203e11008169366629c81cd Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 22 Dec 2021 17:24:58 -0800
Subject: [PATCH 2156/2608] Fix shadowed variable usage.

Verified with EXTRA_CFLAGS=-Wshadow.
---
 .../internal/jemalloc_internal_inlines_b.h    |  1 -
 include/jemalloc/internal/nstime.h            |  6 ++--
 include/jemalloc/internal/prof_externs.h      |  2 +-
 include/jemalloc/internal/prof_inlines.h      |  4 +--
 src/bin_info.c                                |  4 +--
 src/ckh.c                                     |  6 ++--
 src/ctl.c                                     |  6 ++--
 src/extent.c                                  |  5 ++-
 src/jemalloc.c                                | 32 +++++++++----------
 src/pa.c                                      |  7 ++--
 src/pac.c                                     |  8 ++---
 src/prof.c                                    | 10 +++---
 src/prof_data.c                               |  8 ++---
 src/prof_sys.c                                | 10 +++---
 src/stats.c                                   | 14 ++++----
 test/analyze/prof_bias.c                      |  6 ++--
 test/src/test.c                               |  4 +--
 test/unit/arena_reset.c                       |  6 ++--
 test/unit/atomic.c                            |  2 +-
 test/unit/batch_alloc.c                       | 23 ++++++-------
 test/unit/pa.c                                |  4 +--
 test/unit/prof_idump.c                        |  6 ++--
 test/unit/prof_recent.c                       |  2 +-
 test/unit/prof_reset.c                        | 31 +++++++++---------
 test/unit/rb.c                                |  2 +-
 test/unit/retained.c                          | 30 ++++++++---------
 26 files changed, 119 insertions(+), 120 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index 1de349e6..35d71d0a 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -20,7 +20,6 @@ percpu_arena_update(tsd_t *tsd, unsigned cpu) {
 		tcache_t *tcache = tcache_get(tsd);
 		if (tcache != NULL) {
 			tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd);
-			tcache_t *tcache = tsd_tcachep_get(tsd);
 			tcache_arena_reassociate(tsd_tsdn(tsd), tcache_slow,
 			    tcache, newarena);
 		}
diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h
index 258b16e3..486e5cca 100644
--- a/include/jemalloc/internal/nstime.h
+++ b/include/jemalloc/internal/nstime.h
@@ -18,7 +18,7 @@ typedef struct {
 #endif
 } nstime_t;
 
-static const nstime_t zero = NSTIME_ZERO_INITIALIZER;
+static const nstime_t nstime_zero = NSTIME_ZERO_INITIALIZER;
 
 void nstime_init(nstime_t *time, uint64_t ns);
 void nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec);
@@ -60,12 +60,12 @@ extern const char *prof_time_res_mode_names[];
 
 JEMALLOC_ALWAYS_INLINE void
 nstime_init_zero(nstime_t *time) {
-	nstime_copy(time, &zero);
+	nstime_copy(time, &nstime_zero);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
 nstime_equals_zero(nstime_t *time) {
-	int diff = nstime_compare(time, &zero);
+	int diff = nstime_compare(time, &nstime_zero);
 	assert(diff >= 0);
 	return diff == 0;
 }
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 75dd90bf..953192f4 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -32,7 +32,7 @@ extern bool opt_prof_sys_thread_name;
 extern bool opt_prof_stats;
 
 /* Accessed via prof_active_[gs]et{_unlocked,}(). */
-extern bool prof_active;
+extern bool prof_active_state;
 
 /* Accessed via prof_gdump_[gs]et{_unlocked,}(). */
 extern bool prof_gdump_val;
diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h
index 7884e9a7..a8e7e7fb 100644
--- a/include/jemalloc/internal/prof_inlines.h
+++ b/include/jemalloc/internal/prof_inlines.h
@@ -12,7 +12,7 @@ prof_active_assert() {
 	 * If opt_prof is off, then prof_active must always be off, regardless
 	 * of whether prof_active_mtx is in effect or not.
 	 */
-	assert(opt_prof || !prof_active);
+	assert(opt_prof || !prof_active_state);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -24,7 +24,7 @@ prof_active_get_unlocked(void) {
 	 * prof_active in the fast path, so there are no guarantees regarding
 	 * how long it will take for all threads to notice state changes.
 	 */
-	return prof_active;
+	return prof_active_state;
 }
 
 JEMALLOC_ALWAYS_INLINE bool
diff --git a/src/bin_info.c b/src/bin_info.c
index 20b93ea4..8629ef88 100644
--- a/src/bin_info.c
+++ b/src/bin_info.c
@@ -7,9 +7,9 @@ bin_info_t bin_infos[SC_NBINS];
 
 static void
 bin_infos_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
-    bin_info_t bin_infos[SC_NBINS]) {
+    bin_info_t infos[SC_NBINS]) {
 	for (unsigned i = 0; i < SC_NBINS; i++) {
-		bin_info_t *bin_info = &bin_infos[i];
+		bin_info_t *bin_info = &infos[i];
 		sc_t *sc = &sc_data->sc[i];
 		bin_info->reg_size = ((size_t)1U << sc->lg_base)
 		    + ((size_t)sc->ndelta << sc->lg_delta);
diff --git a/src/ckh.c b/src/ckh.c
index 9441fbad..8db4319c 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -356,14 +356,14 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh) {
 }
 
 bool
-ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
+ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *ckh_hash,
     ckh_keycomp_t *keycomp) {
 	bool ret;
 	size_t mincells, usize;
 	unsigned lg_mincells;
 
 	assert(minitems > 0);
-	assert(hash != NULL);
+	assert(ckh_hash != NULL);
 	assert(keycomp != NULL);
 
 #ifdef CKH_COUNT
@@ -392,7 +392,7 @@ ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *hash,
 	}
 	ckh->lg_minbuckets = lg_mincells - LG_CKH_BUCKET_CELLS;
 	ckh->lg_curbuckets = lg_mincells - LG_CKH_BUCKET_CELLS;
-	ckh->hash = hash;
+	ckh->hash = ckh_hash;
 	ckh->keycomp = keycomp;
 
 	usize = sz_sa2u(sizeof(ckhc_t) << lg_mincells, CACHELINE);
diff --git a/src/ctl.c b/src/ctl.c
index eccb9589..81ab1479 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3622,9 +3622,9 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
 		MUTEX_PROF_RESET(arena->tcache_ql_mtx);
 		MUTEX_PROF_RESET(arena->base->mtx);
 
-		for (szind_t i = 0; i < SC_NBINS; i++) {
-			for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
-				bin_t *bin = arena_get_bin(arena, i, j);
+		for (szind_t j = 0; j < SC_NBINS; j++) {
+			for (unsigned k = 0; k < bin_infos[j].n_shards; k++) {
+				bin_t *bin = arena_get_bin(arena, j, k);
 				MUTEX_PROF_RESET(bin->lock);
 			}
 		}
diff --git a/src/extent.c b/src/extent.c
index 4bbbff38..1c6fa1fc 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -748,9 +748,8 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		extent_gdump_add(tsdn, edata);
 	}
 	if (zero && !edata_zeroed_get(edata)) {
-		void *addr = edata_base_get(edata);
-		size_t size = edata_size_get(edata);
-		ehooks_zero(tsdn, ehooks, addr, size);
+		ehooks_zero(tsdn, ehooks, edata_base_get(edata),
+		    edata_size_get(edata));
 	}
 	return edata;
 label_err:
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 18936575..d105dff2 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1212,12 +1212,12 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			CONF_HANDLE_BOOL(opt_abort_conf, "abort_conf")
 			CONF_HANDLE_BOOL(opt_trust_madvise, "trust_madvise")
 			if (strncmp("metadata_thp", k, klen) == 0) {
-				int i;
+				int m;
 				bool match = false;
-				for (i = 0; i < metadata_thp_mode_limit; i++) {
-					if (strncmp(metadata_thp_mode_names[i],
+				for (m = 0; m < metadata_thp_mode_limit; m++) {
+					if (strncmp(metadata_thp_mode_names[m],
 					    v, vlen) == 0) {
-						opt_metadata_thp = i;
+						opt_metadata_thp = m;
 						match = true;
 						break;
 					}
@@ -1230,18 +1230,18 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			}
 			CONF_HANDLE_BOOL(opt_retain, "retain")
 			if (strncmp("dss", k, klen) == 0) {
-				int i;
+				int m;
 				bool match = false;
-				for (i = 0; i < dss_prec_limit; i++) {
-					if (strncmp(dss_prec_names[i], v, vlen)
+				for (m = 0; m < dss_prec_limit; m++) {
+					if (strncmp(dss_prec_names[m], v, vlen)
 					    == 0) {
-						if (extent_dss_prec_set(i)) {
+						if (extent_dss_prec_set(m)) {
 							CONF_ERROR(
 							    "Error setting dss",
 							    k, klen, v, vlen);
 						} else {
 							opt_dss =
-							    dss_prec_names[i];
+							    dss_prec_names[m];
 							match = true;
 							break;
 						}
@@ -1428,16 +1428,16 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 
 			if (strncmp("percpu_arena", k, klen) == 0) {
 				bool match = false;
-				for (int i = percpu_arena_mode_names_base; i <
-				    percpu_arena_mode_names_limit; i++) {
-					if (strncmp(percpu_arena_mode_names[i],
+				for (int m = percpu_arena_mode_names_base; m <
+				    percpu_arena_mode_names_limit; m++) {
+					if (strncmp(percpu_arena_mode_names[m],
 					    v, vlen) == 0) {
 						if (!have_percpu_arena) {
 							CONF_ERROR(
 							    "No getcpu support",
 							    k, klen, v, vlen);
 						}
-						opt_percpu_arena = i;
+						opt_percpu_arena = m;
 						match = true;
 						break;
 					}
@@ -1622,15 +1622,15 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			}
 			if (CONF_MATCH("thp")) {
 				bool match = false;
-				for (int i = 0; i < thp_mode_names_limit; i++) {
-					if (strncmp(thp_mode_names[i],v, vlen)
+				for (int m = 0; m < thp_mode_names_limit; m++) {
+					if (strncmp(thp_mode_names[m],v, vlen)
 					    == 0) {
 						if (!have_madvise_huge && !have_memcntl) {
 							CONF_ERROR(
 							    "No THP support",
 							    k, klen, v, vlen);
 						}
-						opt_thp = i;
+						opt_thp = m;
 						match = true;
 						break;
 					}
diff --git a/src/pa.c b/src/pa.c
index 0f95e93a..eb7e4620 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -31,8 +31,9 @@ pa_central_init(pa_central_t *central, base_t *base, bool hpa,
 bool
 pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, pa_central_t *central,
     emap_t *emap, base_t *base, unsigned ind, pa_shard_stats_t *stats,
-    malloc_mutex_t *stats_mtx, nstime_t *cur_time, size_t oversize_threshold,
-    ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
+    malloc_mutex_t *stats_mtx, nstime_t *cur_time,
+    size_t pac_oversize_threshold, ssize_t dirty_decay_ms,
+    ssize_t muzzy_decay_ms) {
 	/* This will change eventually, but for now it should hold. */
 	assert(base_ind_get(base) == ind);
 	if (edata_cache_init(&shard->edata_cache, base)) {
@@ -40,7 +41,7 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, pa_central_t *central,
 	}
 
 	if (pac_init(tsdn, &shard->pac, base, emap, &shard->edata_cache,
-	    cur_time, oversize_threshold, dirty_decay_ms, muzzy_decay_ms,
+	    cur_time, pac_oversize_threshold, dirty_decay_ms, muzzy_decay_ms,
 	    &stats->pac_stats, stats_mtx)) {
 		return true;
 	}
diff --git a/src/pac.c b/src/pac.c
index c6d9f146..53e3d823 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -36,9 +36,9 @@ pac_decay_data_get(pac_t *pac, extent_state_t state,
 
 bool
 pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
-    edata_cache_t *edata_cache, nstime_t *cur_time, size_t oversize_threshold,
-    ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms, pac_stats_t *pac_stats,
-    malloc_mutex_t *stats_mtx) {
+    edata_cache_t *edata_cache, nstime_t *cur_time,
+    size_t pac_oversize_threshold, ssize_t dirty_decay_ms,
+    ssize_t muzzy_decay_ms, pac_stats_t *pac_stats, malloc_mutex_t *stats_mtx) {
 	unsigned ind = base_ind_get(base);
 	/*
 	 * Delay coalescing for dirty extents despite the disruptive effect on
@@ -73,7 +73,7 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
 	    WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
-	atomic_store_zu(&pac->oversize_threshold, oversize_threshold,
+	atomic_store_zu(&pac->oversize_threshold, pac_oversize_threshold,
 	    ATOMIC_RELAXED);
 	if (decay_init(&pac->decay_dirty, cur_time, dirty_decay_ms)) {
 		return true;
diff --git a/src/prof.c b/src/prof.c
index 625bcd73..f708d108 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -43,7 +43,7 @@ static counter_accum_t prof_idump_accumulated;
  * Initialized as opt_prof_active, and accessed via
  * prof_active_[gs]et{_unlocked,}().
  */
-bool prof_active;
+bool prof_active_state;
 static malloc_mutex_t prof_active_mtx;
 
 /*
@@ -416,7 +416,7 @@ prof_active_get(tsdn_t *tsdn) {
 
 	prof_active_assert();
 	malloc_mutex_lock(tsdn, &prof_active_mtx);
-	prof_active_current = prof_active;
+	prof_active_current = prof_active_state;
 	malloc_mutex_unlock(tsdn, &prof_active_mtx);
 	return prof_active_current;
 }
@@ -427,8 +427,8 @@ prof_active_set(tsdn_t *tsdn, bool active) {
 
 	prof_active_assert();
 	malloc_mutex_lock(tsdn, &prof_active_mtx);
-	prof_active_old = prof_active;
-	prof_active = active;
+	prof_active_old = prof_active_state;
+	prof_active_state = active;
 	malloc_mutex_unlock(tsdn, &prof_active_mtx);
 	prof_active_assert();
 	return prof_active_old;
@@ -629,7 +629,7 @@ prof_boot2(tsd_t *tsd, base_t *base) {
 	if (opt_prof) {
 		lg_prof_sample = opt_lg_prof_sample;
 		prof_unbias_map_init();
-		prof_active = opt_prof_active;
+		prof_active_state = opt_prof_active;
 		prof_gdump_val = opt_prof_gdump;
 		prof_thread_active_init = opt_prof_thread_active_init;
 
diff --git a/src/prof_data.c b/src/prof_data.c
index 63349850..3ef0100d 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -397,7 +397,7 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 
 /* Used in unit tests. */
 static prof_tdata_t *
-prof_tdata_count_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
+prof_tdata_count_iter(prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata,
     void *arg) {
 	size_t *tdata_count = (size_t *)arg;
 
@@ -895,7 +895,7 @@ struct prof_tdata_merge_iter_arg_s {
 };
 
 static prof_tdata_t *
-prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
+prof_tdata_merge_iter(prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata,
     void *opaque) {
 	prof_tdata_merge_iter_arg_t *arg =
 	    (prof_tdata_merge_iter_arg_t *)opaque;
@@ -939,7 +939,7 @@ prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
 }
 
 static prof_tdata_t *
-prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
+prof_tdata_dump_iter(prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata,
     void *opaque) {
 	if (!tdata->dumping) {
 		return NULL;
@@ -1278,7 +1278,7 @@ prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata) {
 }
 
 static prof_tdata_t *
-prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
+prof_tdata_reset_iter(prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata,
     void *arg) {
 	tsdn_t *tsdn = (tsdn_t *)arg;
 
diff --git a/src/prof_sys.c b/src/prof_sys.c
index b7a3a2cf..b5f1f5b2 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -561,18 +561,18 @@ prof_dump_filename(tsd_t *tsd, char *filename, char v, uint64_t vseq) {
 	cassert(config_prof);
 
 	assert(tsd_reentrancy_level_get(tsd) == 0);
-	const char *prof_prefix = prof_prefix_get(tsd_tsdn(tsd));
+	const char *prefix = prof_prefix_get(tsd_tsdn(tsd));
 
 	if (vseq != VSEQ_INVALID) {
 	        /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
 		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
-		    "%s.%d.%"FMTu64".%c%"FMTu64".heap", prof_prefix,
-		    prof_getpid(), prof_dump_seq, v, vseq);
+		    "%s.%d.%"FMTu64".%c%"FMTu64".heap", prefix, prof_getpid(),
+		    prof_dump_seq, v, vseq);
 	} else {
 	        /* "<prefix>.<pid>.<seq>.<v>.heap" */
 		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
-		    "%s.%d.%"FMTu64".%c.heap", prof_prefix,
-		    prof_getpid(), prof_dump_seq, v);
+		    "%s.%d.%"FMTu64".%c.heap", prefix, prof_getpid(),
+		    prof_dump_seq, v);
 	}
 	prof_dump_seq++;
 }
diff --git a/src/stats.c b/src/stats.c
index 7af5782a..b1b3906d 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1606,15 +1606,15 @@ stats_general_print(emitter_t *emitter) {
 		    "Maximum thread-cached size class", emitter_type_size, &sv);
 	}
 
-	unsigned nbins;
-	CTL_GET("arenas.nbins", &nbins, unsigned);
+	unsigned arenas_nbins;
+	CTL_GET("arenas.nbins", &arenas_nbins, unsigned);
 	emitter_kv(emitter, "nbins", "Number of bin size classes",
-	    emitter_type_unsigned, &nbins);
+	    emitter_type_unsigned, &arenas_nbins);
 
-	unsigned nhbins;
-	CTL_GET("arenas.nhbins", &nhbins, unsigned);
+	unsigned arenas_nhbins;
+	CTL_GET("arenas.nhbins", &arenas_nhbins, unsigned);
 	emitter_kv(emitter, "nhbins", "Number of thread-cache bin size classes",
-	    emitter_type_unsigned, &nhbins);
+	    emitter_type_unsigned, &arenas_nhbins);
 
 	/*
 	 * We do enough mallctls in a loop that we actually want to omit them
@@ -1624,7 +1624,7 @@ stats_general_print(emitter_t *emitter) {
 		emitter_json_array_kv_begin(emitter, "bin");
 		size_t arenas_bin_mib[CTL_MAX_DEPTH];
 		CTL_LEAF_PREPARE(arenas_bin_mib, 0, "arenas.bin");
-		for (unsigned i = 0; i < nbins; i++) {
+		for (unsigned i = 0; i < arenas_nbins; i++) {
 			arenas_bin_mib[2] = i;
 			emitter_json_object_begin(emitter);
 
diff --git a/test/analyze/prof_bias.c b/test/analyze/prof_bias.c
index 4b960a66..a96ca942 100644
--- a/test/analyze/prof_bias.c
+++ b/test/analyze/prof_bias.c
@@ -45,9 +45,9 @@ do_allocs(size_t sz, size_t cnt, bool do_frees) {
 
 int
 main(void) {
-	size_t lg_prof_sample = 19;
-	int err = mallctl("prof.reset", NULL, NULL, (void *)&lg_prof_sample,
-	    sizeof(lg_prof_sample));
+	size_t lg_prof_sample_local = 19;
+	int err = mallctl("prof.reset", NULL, NULL,
+	    (void *)&lg_prof_sample_local, sizeof(lg_prof_sample_local));
 	assert(err == 0);
 
 	prof_backtrace_hook_set(mock_backtrace);
diff --git a/test/src/test.c b/test/src/test.c
index f97ce4d1..4cd803e5 100644
--- a/test/src/test.c
+++ b/test/src/test.c
@@ -87,8 +87,8 @@ test_fail(const char *format, ...) {
 }
 
 static const char *
-test_status_string(test_status_t test_status) {
-	switch (test_status) {
+test_status_string(test_status_t current_status) {
+	switch (current_status) {
 	case test_status_pass: return "pass";
 	case test_status_skip: return "skip";
 	case test_status_fail: return "fail";
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index 589689c0..8ef0786c 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -258,12 +258,12 @@ TEST_BEGIN(test_arena_destroy_hooks_default) {
 
 	/* Try arena.create with custom hooks. */
 	size_t sz = sizeof(extent_hooks_t *);
-	extent_hooks_t *default_hooks;
-	expect_d_eq(mallctl("arena.0.extent_hooks", (void *)&default_hooks,
+	extent_hooks_t *a0_default_hooks;
+	expect_d_eq(mallctl("arena.0.extent_hooks", (void *)&a0_default_hooks,
 	    &sz, NULL, 0), 0, "Unexpected mallctlnametomib() failure");
 
 	/* Default impl; but wrapped as "customized". */
-	extent_hooks_t new_hooks = *default_hooks;
+	extent_hooks_t new_hooks = *a0_default_hooks;
 	extent_hooks_t *hook = &new_hooks;
 	sz = sizeof(unsigned);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
diff --git a/test/unit/atomic.c b/test/unit/atomic.c
index 1326a11c..c2ec8c7e 100644
--- a/test/unit/atomic.c
+++ b/test/unit/atomic.c
@@ -45,7 +45,7 @@
 	 */								\
 	atomic_store_##ta(&atom, val1, ATOMIC_RELAXED);			\
 	success = false;						\
-	for (int i = 0; i < 10 && !success; i++) {			\
+	for (int retry = 0; retry < 10 && !success; retry++) {		\
 		expected = val2;					\
 		success = atomic_compare_exchange_weak_##ta(&atom,	\
 		    &expected, val3, ATOMIC_RELAXED, ATOMIC_RELAXED);	\
diff --git a/test/unit/batch_alloc.c b/test/unit/batch_alloc.c
index 992990f3..901c52b1 100644
--- a/test/unit/batch_alloc.c
+++ b/test/unit/batch_alloc.c
@@ -1,7 +1,7 @@
 #include "test/jemalloc_test.h"
 
 #define BATCH_MAX ((1U << 16) + 1024)
-static void *ptrs[BATCH_MAX];
+static void *global_ptrs[BATCH_MAX];
 
 #define PAGE_ALIGNED(ptr) (((uintptr_t)ptr & PAGE_MASK) == 0)
 
@@ -122,13 +122,14 @@ test_wrapper(size_t size, size_t alignment, bool zero, unsigned arena_flag) {
 			}
 			size_t batch = base + (size_t)j;
 			assert(batch < BATCH_MAX);
-			size_t filled = batch_alloc_wrapper(ptrs, batch, size,
-			    flags);
+			size_t filled = batch_alloc_wrapper(global_ptrs, batch,
+			    size, flags);
 			assert_zu_eq(filled, batch, "");
-			verify_batch_basic(tsd, ptrs, batch, usize, zero);
-			verify_batch_locality(tsd, ptrs, batch, usize, arena,
-			    nregs);
-			release_batch(ptrs, batch, usize);
+			verify_batch_basic(tsd, global_ptrs, batch, usize,
+			    zero);
+			verify_batch_locality(tsd, global_ptrs, batch, usize,
+			    arena, nregs);
+			release_batch(global_ptrs, batch, usize);
 		}
 	}
 
@@ -163,16 +164,16 @@ TEST_BEGIN(test_batch_alloc_large) {
 	size_t size = SC_LARGE_MINCLASS;
 	for (size_t batch = 0; batch < 4; ++batch) {
 		assert(batch < BATCH_MAX);
-		size_t filled = batch_alloc(ptrs, batch, size, 0);
+		size_t filled = batch_alloc(global_ptrs, batch, size, 0);
 		assert_zu_eq(filled, batch, "");
-		release_batch(ptrs, batch, size);
+		release_batch(global_ptrs, batch, size);
 	}
 	size = tcache_maxclass + 1;
 	for (size_t batch = 0; batch < 4; ++batch) {
 		assert(batch < BATCH_MAX);
-		size_t filled = batch_alloc(ptrs, batch, size, 0);
+		size_t filled = batch_alloc(global_ptrs, batch, size, 0);
 		assert_zu_eq(filled, batch, "");
-		release_batch(ptrs, batch, size);
+		release_batch(global_ptrs, batch, size);
 	}
 }
 TEST_END
diff --git a/test/unit/pa.c b/test/unit/pa.c
index 505b6fa9..b1e2f6e9 100644
--- a/test/unit/pa.c
+++ b/test/unit/pa.c
@@ -69,10 +69,10 @@ test_data_t *init_test_data(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 	    &hpa_hooks_default);
 	assert_false(err, "");
 
-	const size_t oversize_threshold = 8 * 1024 * 1024;
+	const size_t pa_oversize_threshold = 8 * 1024 * 1024;
 	err = pa_shard_init(TSDN_NULL, &test_data->shard, &test_data->central,
 	    &test_data->emap, test_data->base, /* ind */ 1, &test_data->stats,
-	    &test_data->stats_mtx, &time, oversize_threshold, dirty_decay_ms,
+	    &test_data->stats_mtx, &time, pa_oversize_threshold, dirty_decay_ms,
 	    muzzy_decay_ms);
 	assert_false(err, "");
 
diff --git a/test/unit/prof_idump.c b/test/unit/prof_idump.c
index e9f5e56c..455ac529 100644
--- a/test/unit/prof_idump.c
+++ b/test/unit/prof_idump.c
@@ -26,14 +26,14 @@ TEST_BEGIN(test_idump) {
 	bool active;
 	void *p;
 
-	const char *prefix = TEST_PREFIX;
+	const char *test_prefix = TEST_PREFIX;
 
 	test_skip_if(!config_prof);
 
 	active = true;
 
-	expect_d_eq(mallctl("prof.prefix", NULL, NULL, (void *)&prefix,
-	    sizeof(prefix)), 0,
+	expect_d_eq(mallctl("prof.prefix", NULL, NULL, (void *)&test_prefix,
+	    sizeof(test_prefix)), 0,
 	    "Unexpected mallctl failure while overwriting dump prefix");
 
 	expect_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index 9974d105..c23b01ec 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -15,7 +15,7 @@ confirm_prof_setup() {
 	    "opt_prof_recent_alloc_max not set correctly");
 
 	/* Dynamics */
-	assert_true(prof_active, "prof_active not on");
+	assert_true(prof_active_state, "prof_active not on");
 	assert_zd_eq(prof_recent_alloc_max_ctl_read(), OPT_ALLOC_MAX,
 	    "prof_recent_alloc_max not set correctly");
 }
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
index a0fb0389..9b33b205 100644
--- a/test/unit/prof_reset.c
+++ b/test/unit/prof_reset.c
@@ -21,26 +21,25 @@ set_prof_active(bool active) {
 
 static size_t
 get_lg_prof_sample(void) {
-	size_t lg_prof_sample;
+	size_t ret;
 	size_t sz = sizeof(size_t);
 
-	expect_d_eq(mallctl("prof.lg_sample", (void *)&lg_prof_sample, &sz,
-	    NULL, 0), 0,
+	expect_d_eq(mallctl("prof.lg_sample", (void *)&ret, &sz, NULL, 0), 0,
 	    "Unexpected mallctl failure while reading profiling sample rate");
-	return lg_prof_sample;
+	return ret;
 }
 
 static void
-do_prof_reset(size_t lg_prof_sample) {
+do_prof_reset(size_t lg_prof_sample_input) {
 	expect_d_eq(mallctl("prof.reset", NULL, NULL,
-	    (void *)&lg_prof_sample, sizeof(size_t)), 0,
+	    (void *)&lg_prof_sample_input, sizeof(size_t)), 0,
 	    "Unexpected mallctl failure while resetting profile data");
-	expect_zu_eq(lg_prof_sample, get_lg_prof_sample(),
+	expect_zu_eq(lg_prof_sample_input, get_lg_prof_sample(),
 	    "Expected profile sample rate change");
 }
 
 TEST_BEGIN(test_prof_reset_basic) {
-	size_t lg_prof_sample_orig, lg_prof_sample, lg_prof_sample_next;
+	size_t lg_prof_sample_orig, lg_prof_sample_cur, lg_prof_sample_next;
 	size_t sz;
 	unsigned i;
 
@@ -52,8 +51,8 @@ TEST_BEGIN(test_prof_reset_basic) {
 	    "Unexpected mallctl failure while reading profiling sample rate");
 	expect_zu_eq(lg_prof_sample_orig, 0,
 	    "Unexpected profiling sample rate");
-	lg_prof_sample = get_lg_prof_sample();
-	expect_zu_eq(lg_prof_sample_orig, lg_prof_sample,
+	lg_prof_sample_cur = get_lg_prof_sample();
+	expect_zu_eq(lg_prof_sample_orig, lg_prof_sample_cur,
 	    "Unexpected disagreement between \"opt.lg_prof_sample\" and "
 	    "\"prof.lg_sample\"");
 
@@ -61,8 +60,8 @@ TEST_BEGIN(test_prof_reset_basic) {
 	for (i = 0; i < 2; i++) {
 		expect_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0), 0,
 		    "Unexpected mallctl failure while resetting profile data");
-		lg_prof_sample = get_lg_prof_sample();
-		expect_zu_eq(lg_prof_sample_orig, lg_prof_sample,
+		lg_prof_sample_cur = get_lg_prof_sample();
+		expect_zu_eq(lg_prof_sample_orig, lg_prof_sample_cur,
 		    "Unexpected profile sample rate change");
 	}
 
@@ -70,15 +69,15 @@ TEST_BEGIN(test_prof_reset_basic) {
 	lg_prof_sample_next = 1;
 	for (i = 0; i < 2; i++) {
 		do_prof_reset(lg_prof_sample_next);
-		lg_prof_sample = get_lg_prof_sample();
-		expect_zu_eq(lg_prof_sample, lg_prof_sample_next,
+		lg_prof_sample_cur = get_lg_prof_sample();
+		expect_zu_eq(lg_prof_sample_cur, lg_prof_sample_next,
 		    "Expected profile sample rate change");
 		lg_prof_sample_next = lg_prof_sample_orig;
 	}
 
 	/* Make sure the test code restored prof.lg_sample. */
-	lg_prof_sample = get_lg_prof_sample();
-	expect_zu_eq(lg_prof_sample_orig, lg_prof_sample,
+	lg_prof_sample_cur = get_lg_prof_sample();
+	expect_zu_eq(lg_prof_sample_orig, lg_prof_sample_cur,
 	    "Unexpected disagreement between \"opt.lg_prof_sample\" and "
 	    "\"prof.lg_sample\"");
 }
diff --git a/test/unit/rb.c b/test/unit/rb.c
index 7d4c454d..827ec510 100644
--- a/test/unit/rb.c
+++ b/test/unit/rb.c
@@ -964,7 +964,7 @@ do_update_search_test(int nnodes, int ntrees, int nremovals,
 				tree_insert(&tree, &nodes[j]);
 			}
 		}
-		for (int i = 0; i < nupdates; i++) {
+		for (int j = 0; j < nupdates; j++) {
 			uint32_t ind = gen_rand32_range(sfmt, nnodes);
 			nodes[ind].specialness = 1 - nodes[ind].specialness;
 			tree_update_summaries(&tree, &nodes[ind]);
diff --git a/test/unit/retained.c b/test/unit/retained.c
index 53c90f24..37ff88f6 100644
--- a/test/unit/retained.c
+++ b/test/unit/retained.c
@@ -13,43 +13,43 @@ static atomic_u_t	nfinished;
 
 static unsigned
 do_arena_create(extent_hooks_t *h) {
-	unsigned arena_ind;
-	size_t sz = sizeof(unsigned);
-	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
+	unsigned new_arena_ind;
+	size_t ind_sz = sizeof(unsigned);
+	expect_d_eq(mallctl("arenas.create", (void *)&new_arena_ind, &ind_sz,
 	    (void *)(h != NULL ? &h : NULL), (h != NULL ? sizeof(h) : 0)), 0,
 	    "Unexpected mallctl() failure");
-	return arena_ind;
+	return new_arena_ind;
 }
 
 static void
-do_arena_destroy(unsigned arena_ind) {
+do_arena_destroy(unsigned ind) {
 	size_t mib[3];
 	size_t miblen;
 
 	miblen = sizeof(mib)/sizeof(size_t);
 	expect_d_eq(mallctlnametomib("arena.0.destroy", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
-	mib[1] = (size_t)arena_ind;
+	mib[1] = (size_t)ind;
 	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctlbymib() failure");
 }
 
 static void
 do_refresh(void) {
-	uint64_t epoch = 1;
-	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
-	    sizeof(epoch)), 0, "Unexpected mallctl() failure");
+	uint64_t refresh_epoch = 1;
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&refresh_epoch,
+	    sizeof(refresh_epoch)), 0, "Unexpected mallctl() failure");
 }
 
 static size_t
-do_get_size_impl(const char *cmd, unsigned arena_ind) {
+do_get_size_impl(const char *cmd, unsigned ind) {
 	size_t mib[4];
 	size_t miblen = sizeof(mib) / sizeof(size_t);
 	size_t z = sizeof(size_t);
 
 	expect_d_eq(mallctlnametomib(cmd, mib, &miblen),
 	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
-	mib[2] = arena_ind;
+	mib[2] = ind;
 	size_t size;
 	expect_d_eq(mallctlbymib(mib, miblen, (void *)&size, &z, NULL, 0),
 	    0, "Unexpected mallctlbymib([\"%s\"], ...) failure", cmd);
@@ -58,13 +58,13 @@ do_get_size_impl(const char *cmd, unsigned arena_ind) {
 }
 
 static size_t
-do_get_active(unsigned arena_ind) {
-	return do_get_size_impl("stats.arenas.0.pactive", arena_ind) * PAGE;
+do_get_active(unsigned ind) {
+	return do_get_size_impl("stats.arenas.0.pactive", ind) * PAGE;
 }
 
 static size_t
-do_get_mapped(unsigned arena_ind) {
-	return do_get_size_impl("stats.arenas.0.mapped", arena_ind);
+do_get_mapped(unsigned ind) {
+	return do_get_size_impl("stats.arenas.0.mapped", ind);
 }
 
 static void *

From 06aac61c4b261e5d1c8dcf3c7dd7921e9e395d62 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 29 Nov 2021 15:45:24 -0800
Subject: [PATCH 2157/2608] Split the core logic of tcache flush into a
 separate function.

The core function takes a ptr array as input (containing items to be flushed),
which will be reused to flush sanitizer-stashed items.
---
 src/tcache.c | 40 ++++++++++++++++++++++++----------------
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index 39a4ea6e..5c3d5b17 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -300,7 +300,7 @@ tcache_bin_flush_match(edata_t *edata, unsigned cur_arena_ind,
 
 JEMALLOC_ALWAYS_INLINE void
 tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
-    szind_t binind, unsigned rem, bool small) {
+    szind_t binind, cache_bin_ptr_array_t *ptrs, unsigned nflush, bool small) {
 	tcache_slow_t *tcache_slow = tcache->tcache_slow;
 	/*
 	 * A couple lookup calls take tsdn; declare it once for convenience
@@ -313,24 +313,15 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	} else {
 		assert(binind < nhbins);
 	}
-	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
-	    &tcache_bin_info[binind]);
-	assert((cache_bin_sz_t)rem <= ncached);
 	arena_t *tcache_arena = tcache_slow->arena;
 	assert(tcache_arena != NULL);
 
-	unsigned nflush = ncached - rem;
 	/*
 	 * Variable length array must have > 0 length; the last element is never
 	 * touched (it's just included to satisfy the no-zero-length rule).
 	 */
 	VARIABLE_ARRAY(emap_batch_lookup_result_t, item_edata, nflush + 1);
-	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nflush);
-
-	cache_bin_init_ptr_array_for_flush(cache_bin, &tcache_bin_info[binind],
-	    &ptrs, nflush);
-
-	tcache_bin_flush_edatas_lookup(tsd, &ptrs, binind, nflush, item_edata);
+	tcache_bin_flush_edatas_lookup(tsd, ptrs, binind, nflush, item_edata);
 
 	/*
 	 * The slabs where we freed the last remaining object in the slab (and
@@ -407,7 +398,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 		 */
 		if (!small) {
 			for (unsigned i = 0; i < nflush; i++) {
-				void *ptr = ptrs.ptr[i];
+				void *ptr = ptrs->ptr[i];
 				edata = item_edata[i].edata;
 				assert(ptr != NULL && edata != NULL);
 
@@ -429,7 +420,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 			arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind);
 		}
 		for (unsigned i = 0; i < nflush; i++) {
-			void *ptr = ptrs.ptr[i];
+			void *ptr = ptrs->ptr[i];
 			edata = item_edata[i].edata;
 			assert(ptr != NULL && edata != NULL);
 			if (!tcache_bin_flush_match(edata, cur_arena_ind,
@@ -440,7 +431,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 				 * arena.  Either way, stash the object so that
 				 * it can be handled in a future pass.
 				 */
-				ptrs.ptr[ndeferred] = ptr;
+				ptrs->ptr[ndeferred] = ptr;
 				item_edata[ndeferred].edata = edata;
 				ndeferred++;
 				continue;
@@ -501,6 +492,23 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 		}
 	}
 
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tcache_bin_flush_bottom(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
+    szind_t binind, unsigned rem, bool small) {
+	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
+	    &tcache_bin_info[binind]);
+	assert((cache_bin_sz_t)rem <= ncached);
+	unsigned nflush = ncached - rem;
+
+	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nflush);
+	cache_bin_init_ptr_array_for_flush(cache_bin, &tcache_bin_info[binind],
+	    &ptrs, nflush);
+
+	tcache_bin_flush_impl(tsd, tcache, cache_bin, binind, &ptrs, nflush,
+	    small);
+
 	cache_bin_finish_flush(cache_bin, &tcache_bin_info[binind], &ptrs,
 	    ncached - rem);
 }
@@ -508,13 +516,13 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 void
 tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
     szind_t binind, unsigned rem) {
-	tcache_bin_flush_impl(tsd, tcache, cache_bin, binind, rem, true);
+	tcache_bin_flush_bottom(tsd, tcache, cache_bin, binind, rem, true);
 }
 
 void
 tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
     szind_t binind, unsigned rem) {
-	tcache_bin_flush_impl(tsd, tcache, cache_bin, binind, rem, false);
+	tcache_bin_flush_bottom(tsd, tcache, cache_bin, binind, rem, false);
 }
 
 void

From b75822bc6e5cbbf463c611d8dea32857f8de9d3e Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 18 Oct 2021 17:33:15 -0700
Subject: [PATCH 2158/2608] Implement use-after-free detection using junk and
 stash.

On deallocation, sampled pointers (specially aligned) get junked and stashed
into tcache (to prevent immediate reuse).  The expected behavior is to have
read-after-free corrupted and stopped by the junk-filling, while
write-after-free is checked when flushing the stashed pointers.
---
 Makefile.in                                   |   1 +
 configure.ac                                  |  17 ++
 include/jemalloc/internal/cache_bin.h         | 101 +++++++-
 .../internal/jemalloc_internal_defs.h.in      |   3 +
 .../internal/jemalloc_internal_externs.h      |   3 +
 .../jemalloc/internal/jemalloc_preamble.h.in  |   8 +
 include/jemalloc/internal/san.h               |  68 ++++++
 include/jemalloc/internal/tcache_externs.h    |  24 +-
 include/jemalloc/internal/tcache_inlines.h    |  20 ++
 src/arena.c                                   |   2 +
 src/cache_bin.c                               |   2 +
 src/ctl.c                                     |   4 +
 src/jemalloc.c                                |  74 +++++-
 src/san.c                                     |  60 +++++
 src/tcache.c                                  |  47 ++++
 test/include/test/arena_util.h                |   6 +
 test/unit/cache_bin.c                         | 149 ++++++++++--
 test/unit/mallctl.c                           |  15 +-
 test/unit/tcache_max.c                        |   1 +
 test/unit/tcache_max.sh                       |   2 +-
 test/unit/uaf.c                               | 225 ++++++++++++++++++
 test/unit/uaf.sh                              |   3 +
 22 files changed, 793 insertions(+), 42 deletions(-)
 create mode 100644 test/unit/uaf.c
 create mode 100644 test/unit/uaf.sh

diff --git a/Makefile.in b/Makefile.in
index 50c586c5..7a820fe7 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -284,6 +284,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/thread_event.c \
 	$(srcroot)test/unit/ticker.c \
 	$(srcroot)test/unit/tsd.c \
+	$(srcroot)test/unit/uaf.c \
 	$(srcroot)test/unit/witness.c \
 	$(srcroot)test/unit/zero.c \
 	$(srcroot)test/unit/zero_realloc_abort.c \
diff --git a/configure.ac b/configure.ac
index e18c0cc2..49a12ac8 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1564,6 +1564,23 @@ if test "x$enable_opt_size_checks" = "x1" ; then
 fi
 AC_SUBST([enable_opt_size_checks])
 
+dnl Do not check for use-after-free by default.
+AC_ARG_ENABLE([uaf-detection],
+  [AS_HELP_STRING([--enable-uaf-detection],
+  [Allow sampled junk-filling on deallocation to detect use-after-free])],
+[if test "x$enable_uaf_detection" = "xno" ; then
+  enable_uaf_detection="0"
+else
+  enable_uaf_detection="1"
+fi
+],
+[enable_uaf_detection="0"]
+)
+if test "x$enable_uaf_detection" = "x1" ; then
+  AC_DEFINE([JEMALLOC_UAF_DETECTION], [ ])
+fi
+AC_SUBST([enable_uaf_detection])
+
 JE_COMPILABLE([a program using __builtin_unreachable], [
 void foo (void) {
   __builtin_unreachable();
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 41942e97..266897f9 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -98,7 +98,7 @@ struct cache_bin_s {
 	 * when the array is nonempty -- this is in the array).
 	 *
 	 * Recall that since the stack grows down, this is the lowest address in
-	 * the array.
+	 * the array.  Only adjusted when stashing items.
 	 */
 	uint16_t low_bits_full;
 
@@ -107,7 +107,7 @@ struct cache_bin_s {
 	 * is empty.
 	 *
 	 * The stack grows down -- this is one past the highest address in the
-	 * array.
+	 * array.  Immutable after initialization.
 	 */
 	uint16_t low_bits_empty;
 };
@@ -136,6 +136,26 @@ cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
 	descriptor->bins = bins;
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+cache_bin_nonfast_aligned(const void *ptr) {
+	if (!config_uaf_detection) {
+		return false;
+	}
+	/*
+	 * Currently we use alignment to decide which pointer to junk & stash on
+	 * dealloc (for catching use-after-free).  In some common cases a
+	 * page-aligned check is needed already (sdalloc w/ config_prof), so we
+	 * are getting it more or less for free -- no added instructions on
+	 * free_fastpath.
+	 *
+	 * Another way of deciding which pointer to sample, is adding another
+	 * thread_event to pick one every N bytes.  That also adds no cost on
+	 * the fastpath, however it will tend to pick large allocations which is
+	 * not the desired behavior.
+	 */
+	return ((uintptr_t)ptr & san_cache_bin_nonfast_mask) == 0;
+}
+
 /* Returns ncached_max: Upper limit on ncached. */
 static inline cache_bin_sz_t
 cache_bin_info_ncached_max(cache_bin_info_t *info) {
@@ -232,6 +252,20 @@ cache_bin_empty_position_get(cache_bin_t *bin) {
 	return ret;
 }
 
+/*
+ * Internal.
+ *
+ * A pointer to the position with the lowest address of the backing array.
+ */
+static inline void **
+cache_bin_full_position_get(cache_bin_t *bin, cache_bin_info_t *info) {
+	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(info);
+	void **ret = cache_bin_empty_position_get(bin) - ncached_max;
+	assert(ret <= bin->stack_head);
+
+	return ret;
+}
+
 /*
  * As the name implies.  This is important since it's not correct to try to
  * batch fill a nonempty cache bin.
@@ -359,13 +393,17 @@ cache_bin_alloc_batch(cache_bin_t *bin, size_t num, void **out) {
 	return n;
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+cache_bin_full(cache_bin_t *bin) {
+	return ((uint16_t)(uintptr_t)bin->stack_head == bin->low_bits_full);
+}
+
 /*
  * Free an object into the given bin.  Fails only if the bin is full.
  */
 JEMALLOC_ALWAYS_INLINE bool
 cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) {
-	uint16_t low_bits = (uint16_t)(uintptr_t)bin->stack_head;
-	if (unlikely(low_bits == bin->low_bits_full)) {
+	if (unlikely(cache_bin_full(bin))) {
 		return false;
 	}
 
@@ -377,7 +415,39 @@ cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) {
 	return true;
 }
 
-/**
+/* Returns false if failed to stash (i.e. bin is full). */
+JEMALLOC_ALWAYS_INLINE bool
+cache_bin_stash(cache_bin_t *bin, void *ptr) {
+	if (cache_bin_full(bin)) {
+		return false;
+	}
+
+	/* Stash at the full position, in the [full, head) range. */
+	uint16_t low_bits_head = (uint16_t)(uintptr_t)bin->stack_head;
+	/* Wraparound handled as well. */
+	uint16_t diff = cache_bin_diff(bin, bin->low_bits_full, low_bits_head);
+	*(void **)((uintptr_t)bin->stack_head - diff) = ptr;
+
+	assert(!cache_bin_full(bin));
+	bin->low_bits_full += sizeof(void *);
+	cache_bin_assert_earlier(bin, bin->low_bits_full, low_bits_head);
+
+	return true;
+}
+
+JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
+cache_bin_nstashed_get(cache_bin_t *bin, cache_bin_info_t *info) {
+	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(info);
+	void **full = cache_bin_full_position_get(bin, info);
+
+	uint16_t nstashed = cache_bin_diff(bin, (uint16_t)(uintptr_t)full,
+	    bin->low_bits_full) / sizeof(void *);
+	assert(nstashed <= ncached_max);
+
+	return nstashed;
+}
+
+/*
  * Filling and flushing are done in batch, on arrays of void *s.  For filling,
  * the arrays go forward, and can be accessed with ordinary array arithmetic.
  * For flushing, we work from the end backwards, and so need to use special
@@ -463,6 +533,27 @@ cache_bin_finish_flush(cache_bin_t *bin, cache_bin_info_t *info,
 	cache_bin_low_water_adjust(bin);
 }
 
+static inline void
+cache_bin_init_ptr_array_for_stashed(cache_bin_t *bin, szind_t binind,
+    cache_bin_info_t *info, cache_bin_ptr_array_t *arr,
+    cache_bin_sz_t nstashed) {
+	assert(nstashed > 0);
+	assert(cache_bin_nstashed_get(bin, info) == nstashed);
+
+	void **full = cache_bin_full_position_get(bin, info);
+	arr->ptr = full;
+	assert(*arr->ptr != NULL);
+}
+
+static inline void
+cache_bin_finish_flush_stashed(cache_bin_t *bin, cache_bin_info_t *info) {
+	void **full = cache_bin_full_position_get(bin, info);
+
+	/* Reset the bin local full position. */
+	bin->low_bits_full = (uint16_t)(uintptr_t)full;
+	assert(cache_bin_nstashed_get(bin, info) == 0);
+}
+
 /*
  * Initialize a cache_bin_info to represent up to the given number of items in
  * the cache_bins it is associated with.
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index a4be549b..0cb15d3e 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -415,6 +415,9 @@
 /* Performs additional size checks when defined. */
 #undef JEMALLOC_OPT_SIZE_CHECKS
 
+/* Allows sampled junk and stash for checking use-after-free when defined. */
+#undef JEMALLOC_UAF_DETECTION
+
 /* Darwin VM_MAKE_TAG support */
 #undef JEMALLOC_HAVE_VM_MAKE_TAG
 
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index e8bfb03b..fa1fabeb 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -35,6 +35,9 @@ extern const char *zero_realloc_mode_names[];
 extern atomic_zu_t zero_realloc_count;
 extern bool opt_cache_oblivious;
 
+/* Escape free-fastpath when ptr & mask == 0 (for sanitization purpose). */
+extern uintptr_t san_cache_bin_nonfast_mask;
+
 /* Number of CPUs. */
 extern unsigned ncpus;
 
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index f5d83a66..5ce77d96 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -198,6 +198,14 @@ static const bool config_opt_size_checks =
 #endif
     ;
 
+static const bool config_uaf_detection =
+#if defined(JEMALLOC_UAF_DETECTION) || defined(JEMALLOC_DEBUG)
+    true
+#else
+    false
+#endif
+    ;
+
 /* Whether or not the C++ extensions are enabled. */
 static const bool config_enable_cxx =
 #ifdef JEMALLOC_ENABLE_CXX
diff --git a/include/jemalloc/internal/san.h b/include/jemalloc/internal/san.h
index 70debf3a..f97211a7 100644
--- a/include/jemalloc/internal/san.h
+++ b/include/jemalloc/internal/san.h
@@ -10,9 +10,16 @@
 #define SAN_GUARD_LARGE_EVERY_N_EXTENTS_DEFAULT 0
 #define SAN_GUARD_SMALL_EVERY_N_EXTENTS_DEFAULT 0
 
+#define SAN_LG_UAF_ALIGN_DEFAULT (-1)
+#define SAN_CACHE_BIN_NONFAST_MASK_DEFAULT (uintptr_t)(-1)
+
+static const uintptr_t uaf_detect_junk = (uintptr_t)0x5b5b5b5b5b5b5b5bULL;
+
 /* 0 means disabled, i.e. never guarded. */
 extern size_t opt_san_guard_large;
 extern size_t opt_san_guard_small;
+/* -1 means disabled, i.e. never check for use-after-free. */
+extern ssize_t opt_lg_san_uaf_align;
 
 void san_guard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     emap_t *emap, bool left, bool right, bool remap);
@@ -24,7 +31,10 @@ void san_unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
  */
 void san_unguard_pages_pre_destroy(tsdn_t *tsdn, ehooks_t *ehooks,
     edata_t *edata, emap_t *emap);
+void san_check_stashed_ptrs(void **ptrs, size_t nstashed, size_t usize);
+
 void tsd_san_init(tsd_t *tsd);
+void san_init(ssize_t lg_san_uaf_align);
 
 static inline void
 san_guard_pages_two_sided(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
@@ -121,4 +131,62 @@ san_slab_extent_decide_guard(tsdn_t *tsdn, ehooks_t *ehooks) {
 	}
 }
 
+static inline void
+san_junk_ptr_locations(void *ptr, size_t usize, void **first, void **mid,
+    void **last) {
+	size_t ptr_sz = sizeof(void *);
+
+	*first = ptr;
+
+	*mid = (void *)((uintptr_t)ptr + ((usize >> 1) & ~(ptr_sz - 1)));
+	assert(*first != *mid || usize == ptr_sz);
+	assert((uintptr_t)*first <= (uintptr_t)*mid);
+
+	/*
+	 * When usize > 32K, the gap between requested_size and usize might be
+	 * greater than 4K -- this means the last write may access an
+	 * likely-untouched page (default settings w/ 4K pages).  However by
+	 * default the tcache only goes up to the 32K size class, and is usually
+	 * tuned lower instead of higher, which makes it less of a concern.
+	 */
+	*last = (void *)((uintptr_t)ptr + usize - sizeof(uaf_detect_junk));
+	assert(*first != *last || usize == ptr_sz);
+	assert(*mid != *last || usize <= ptr_sz * 2);
+	assert((uintptr_t)*mid <= (uintptr_t)*last);
+}
+
+static inline bool
+san_junk_ptr_should_slow(void) {
+	/*
+	 * The latter condition (pointer size greater than the min size class)
+	 * is not expected -- fall back to the slow path for simplicity.
+	 */
+	return config_debug || (LG_SIZEOF_PTR > SC_LG_TINY_MIN);
+}
+
+static inline void
+san_junk_ptr(void *ptr, size_t usize) {
+	if (san_junk_ptr_should_slow()) {
+		memset(ptr, (char)uaf_detect_junk, usize);
+		return;
+	}
+
+	void *first, *mid, *last;
+	san_junk_ptr_locations(ptr, usize, &first, &mid, &last);
+	*(uintptr_t *)first = uaf_detect_junk;
+	*(uintptr_t *)mid = uaf_detect_junk;
+	*(uintptr_t *)last = uaf_detect_junk;
+}
+
+static inline bool
+san_uaf_detection_enabled(void) {
+	bool ret = config_uaf_detection && (opt_lg_san_uaf_align != -1);
+	if (config_uaf_detection && ret) {
+		assert(san_cache_bin_nonfast_mask == ((uintptr_t)1 <<
+		    opt_lg_san_uaf_align) - 1);
+	}
+
+	return ret;
+}
+
 #endif /* JEMALLOC_INTERNAL_GUARD_H */
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 95f3a682..a2ab7101 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -34,23 +34,25 @@ extern cache_bin_info_t *tcache_bin_info;
  */
 extern tcaches_t	*tcaches;
 
-size_t	tcache_salloc(tsdn_t *tsdn, const void *ptr);
-void	*tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
+size_t tcache_salloc(tsdn_t *tsdn, const void *ptr);
+void *tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
     cache_bin_t *tbin, szind_t binind, bool *tcache_success);
 
-void	tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
+void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
     szind_t binind, unsigned rem);
-void	tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
+void tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
     szind_t binind, unsigned rem);
-void	tcache_arena_reassociate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
+void tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, cache_bin_t *bin,
+    szind_t binind, bool is_small);
+void tcache_arena_reassociate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
     tcache_t *tcache, arena_t *arena);
 tcache_t *tcache_create_explicit(tsd_t *tsd);
-void	tcache_cleanup(tsd_t *tsd);
-void	tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
-bool	tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind);
-void	tcaches_flush(tsd_t *tsd, unsigned ind);
-void	tcaches_destroy(tsd_t *tsd, unsigned ind);
-bool	tcache_boot(tsdn_t *tsdn, base_t *base);
+void tcache_cleanup(tsd_t *tsd);
+void tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
+bool tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind);
+void tcaches_flush(tsd_t *tsd, unsigned ind);
+void tcaches_destroy(tsd_t *tsd, unsigned ind);
+bool tcache_boot(tsdn_t *tsdn, base_t *base);
 void tcache_arena_associate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
     tcache_t *tcache, arena_t *arena);
 void tcache_prefork(tsdn_t *tsdn);
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 926c852d..2634f145 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -3,6 +3,7 @@
 
 #include "jemalloc/internal/bin.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/san.h"
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/sz.h"
 #include "jemalloc/internal/util.h"
@@ -61,6 +62,8 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
 			return arena_malloc_hard(tsd_tsdn(tsd), arena, size,
 			    binind, zero);
 		}
+		tcache_bin_flush_stashed(tsd, tcache, bin, binind,
+		    /* is_small */ true);
 
 		ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache,
 		    bin, binind, &tcache_hard_success);
@@ -100,6 +103,8 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 		if (unlikely(arena == NULL)) {
 			return NULL;
 		}
+		tcache_bin_flush_stashed(tsd, tcache, bin, binind,
+		    /* is_small */ false);
 
 		ret = large_malloc(tsd_tsdn(tsd), arena, sz_s2u(size), zero);
 		if (ret == NULL) {
@@ -126,6 +131,21 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SC_SMALL_MAXCLASS);
 
 	cache_bin_t *bin = &tcache->bins[binind];
+	/*
+	 * Not marking the branch unlikely because this is past free_fastpath()
+	 * (which handles the most common cases), i.e. at this point it's often
+	 * uncommon cases.
+	 */
+	if (cache_bin_nonfast_aligned(ptr)) {
+		/* Junk unconditionally, even if bin is full. */
+		san_junk_ptr(ptr, sz_index2size(binind));
+		if (cache_bin_stash(bin, ptr)) {
+			return;
+		}
+		assert(cache_bin_full(bin));
+		/* Bin full; fall through into the flush branch. */
+	}
+
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
 		if (unlikely(tcache_small_bin_disabled(binind, bin))) {
 			arena_dalloc_small(tsd_tsdn(tsd), ptr);
diff --git a/src/arena.c b/src/arena.c
index 121832a7..ed41d6d8 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -157,6 +157,8 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 			cache_bin_t *cache_bin = &descriptor->bins[i];
 			astats->tcache_bytes +=
 			    cache_bin_ncached_get_remote(cache_bin,
+			    &tcache_bin_info[i]) * sz_index2size(i) +
+			    cache_bin_nstashed_get(cache_bin,
 			    &tcache_bin_info[i]) * sz_index2size(i);
 		}
 	}
diff --git a/src/cache_bin.c b/src/cache_bin.c
index b7470823..b8d81ef1 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -2,6 +2,8 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/bit_util.h"
+#include "jemalloc/internal/cache_bin.h"
+#include "jemalloc/internal/safety_check.h"
 
 void
 cache_bin_info_init(cache_bin_info_t *info,
diff --git a/src/ctl.c b/src/ctl.c
index 81ab1479..78dc5792 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -150,6 +150,7 @@ CTL_PROTO(opt_prof_recent_alloc_max)
 CTL_PROTO(opt_prof_stats)
 CTL_PROTO(opt_prof_sys_thread_name)
 CTL_PROTO(opt_prof_time_res)
+CTL_PROTO(opt_lg_san_uaf_align)
 CTL_PROTO(opt_zero_realloc)
 CTL_PROTO(tcache_create)
 CTL_PROTO(tcache_flush)
@@ -472,6 +473,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("prof_stats"),	CTL(opt_prof_stats)},
 	{NAME("prof_sys_thread_name"),	CTL(opt_prof_sys_thread_name)},
 	{NAME("prof_time_resolution"),	CTL(opt_prof_time_res)},
+	{NAME("lg_san_uaf_align"),	CTL(opt_lg_san_uaf_align)},
 	{NAME("zero_realloc"),	CTL(opt_zero_realloc)}
 };
 
@@ -2201,6 +2203,8 @@ CTL_RO_NL_CGEN(config_prof, opt_prof_sys_thread_name, opt_prof_sys_thread_name,
     bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_time_res,
     prof_time_res_mode_names[opt_prof_time_res], const char *)
+CTL_RO_NL_CGEN(config_uaf_detection, opt_lg_san_uaf_align,
+    opt_lg_san_uaf_align, ssize_t)
 CTL_RO_NL_GEN(opt_zero_realloc,
     zero_realloc_mode_names[opt_zero_realloc_action], const char *)
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index d105dff2..c8eef2de 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1657,6 +1657,31 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				}
 				CONF_CONTINUE;
 			}
+			if (config_uaf_detection &&
+			    CONF_MATCH("lg_san_uaf_align")) {
+				ssize_t a;
+				CONF_VALUE_READ(ssize_t, a)
+				if (CONF_VALUE_READ_FAIL() || a < -1) {
+					CONF_ERROR("Invalid conf value",
+					    k, klen, v, vlen);
+				}
+				if (a == -1) {
+					opt_lg_san_uaf_align = -1;
+					CONF_CONTINUE;
+				}
+
+				/* clip if necessary */
+				ssize_t max_allowed = (sizeof(size_t) << 3) - 1;
+				ssize_t min_allowed = LG_PAGE;
+				if (a > max_allowed) {
+					a = max_allowed;
+				} else if (a < min_allowed) {
+					a = min_allowed;
+				}
+
+				opt_lg_san_uaf_align = a;
+				CONF_CONTINUE;
+			}
 
 			CONF_HANDLE_SIZE_T(opt_san_guard_small,
 			    "san_guard_small", 0, SIZE_T_MAX,
@@ -1760,6 +1785,7 @@ malloc_init_hard_a0_locked() {
 		prof_boot0();
 	}
 	malloc_conf_init(&sc_data, bin_shard_sizes);
+	san_init(opt_lg_san_uaf_align);
 	sz_boot(&sc_data, opt_cache_oblivious);
 	bin_info_boot(&sc_data, bin_shard_sizes);
 
@@ -2970,6 +2996,41 @@ free_default(void *ptr) {
 	}
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+free_fastpath_nonfast_aligned(void *ptr, bool check_prof) {
+	/*
+	 * free_fastpath do not handle two uncommon cases: 1) sampled profiled
+	 * objects and 2) sampled junk & stash for use-after-free detection.
+	 * Both have special alignments which are used to escape the fastpath.
+	 *
+	 * prof_sample is page-aligned, which covers the UAF check when both
+	 * are enabled (the assertion below).  Avoiding redundant checks since
+	 * this is on the fastpath -- at most one runtime branch from this.
+	 */
+	if (config_debug && cache_bin_nonfast_aligned(ptr)) {
+		assert(prof_sample_aligned(ptr));
+	}
+
+	if (config_prof && check_prof) {
+		/* When prof is enabled, the prof_sample alignment is enough. */
+		if (prof_sample_aligned(ptr)) {
+			return true;
+		} else {
+			return false;
+		}
+	}
+
+	if (config_uaf_detection) {
+		if (cache_bin_nonfast_aligned(ptr)) {
+			return true;
+		} else {
+			return false;
+		}
+	}
+
+	return false;
+}
+
 /* Returns whether or not the free attempt was successful. */
 JEMALLOC_ALWAYS_INLINE
 bool free_fastpath(void *ptr, size_t size, bool size_hint) {
@@ -2992,18 +3053,21 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 		    &arena_emap_global, ptr, &alloc_ctx);
 
 		/* Note: profiled objects will have alloc_ctx.slab set */
-		if (unlikely(err || !alloc_ctx.slab)) {
+		if (unlikely(err || !alloc_ctx.slab ||
+		    free_fastpath_nonfast_aligned(ptr,
+		    /* check_prof */ false))) {
 			return false;
 		}
 		assert(alloc_ctx.szind != SC_NSIZES);
 	} else {
 		/*
-		 * Check for both sizes that are too large, and for sampled
-		 * objects.  Sampled objects are always page-aligned.  The
-		 * sampled object check will also check for null ptr.
+		 * Check for both sizes that are too large, and for sampled /
+		 * special aligned objects.  The alignment check will also check
+		 * for null ptr.
 		 */
 		if (unlikely(size > SC_LOOKUP_MAXCLASS ||
-		    (config_prof && prof_sample_aligned(ptr)))) {
+		    free_fastpath_nonfast_aligned(ptr,
+		    /* check_prof */ true))) {
 			return false;
 		}
 		alloc_ctx.szind = sz_size2index_lookup(size);
diff --git a/src/san.c b/src/san.c
index 15fdb7ff..6e512911 100644
--- a/src/san.c
+++ b/src/san.c
@@ -10,6 +10,15 @@
 size_t opt_san_guard_large = SAN_GUARD_LARGE_EVERY_N_EXTENTS_DEFAULT;
 size_t opt_san_guard_small = SAN_GUARD_SMALL_EVERY_N_EXTENTS_DEFAULT;
 
+/* Aligned (-1 is off) ptrs will be junked & stashed on dealloc. */
+ssize_t opt_lg_san_uaf_align = SAN_LG_UAF_ALIGN_DEFAULT;
+
+/*
+ *  Initialized in san_init().  When disabled, the mask is set to (uintptr_t)-1
+ *  to always fail the nonfast_align check.
+ */
+uintptr_t san_cache_bin_nonfast_mask = SAN_CACHE_BIN_NONFAST_MASK_DEFAULT;
+
 static inline void
 san_find_guarded_addr(edata_t *edata, uintptr_t *guard1, uintptr_t *guard2,
     uintptr_t *addr, size_t size, bool left, bool right) {
@@ -141,8 +150,59 @@ san_unguard_pages_pre_destroy(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 	    /* right */ true, /* remap */ false);
 }
 
+static bool
+san_stashed_corrupted(void *ptr, size_t size) {
+	if (san_junk_ptr_should_slow()) {
+		for (size_t i = 0; i < size; i++) {
+			if (((char *)ptr)[i] != (char)uaf_detect_junk) {
+				return true;
+			}
+		}
+		return false;
+	}
+
+	void *first, *mid, *last;
+	san_junk_ptr_locations(ptr, size, &first, &mid, &last);
+	if (*(uintptr_t *)first != uaf_detect_junk ||
+	    *(uintptr_t *)mid != uaf_detect_junk ||
+	    *(uintptr_t *)last != uaf_detect_junk) {
+		return true;
+	}
+
+	return false;
+}
+
+void
+san_check_stashed_ptrs(void **ptrs, size_t nstashed, size_t usize) {
+	/*
+	 * Verify that the junked-filled & stashed pointers remain unchanged, to
+	 * detect write-after-free.
+	 */
+	for (size_t n = 0; n < nstashed; n++) {
+		void *stashed = ptrs[n];
+		assert(stashed != NULL);
+		assert(cache_bin_nonfast_aligned(stashed));
+		if (unlikely(san_stashed_corrupted(stashed, usize))) {
+			safety_check_fail("<jemalloc>: Write-after-free "
+			    "detected on deallocated pointer %p (size %zu).\n",
+			    stashed, usize);
+		}
+	}
+}
+
 void
 tsd_san_init(tsd_t *tsd) {
 	*tsd_san_extents_until_guard_smallp_get(tsd) = opt_san_guard_small;
 	*tsd_san_extents_until_guard_largep_get(tsd) = opt_san_guard_large;
 }
+
+void
+san_init(ssize_t lg_san_uaf_align) {
+	assert(lg_san_uaf_align == -1 || lg_san_uaf_align >= LG_PAGE);
+	if (lg_san_uaf_align == -1) {
+		san_cache_bin_nonfast_mask = (uintptr_t)-1;
+		return;
+	}
+
+	san_cache_bin_nonfast_mask = ((uintptr_t)1 << lg_san_uaf_align) - 1;
+}
diff --git a/src/tcache.c b/src/tcache.c
index 5c3d5b17..74f0d83b 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -4,6 +4,7 @@
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/safety_check.h"
+#include "jemalloc/internal/san.h"
 #include "jemalloc/internal/sc.h"
 
 /******************************************************************************/
@@ -179,6 +180,8 @@ tcache_event(tsd_t *tsd) {
 	bool is_small = (szind < SC_NBINS);
 	cache_bin_t *cache_bin = &tcache->bins[szind];
 
+	tcache_bin_flush_stashed(tsd, tcache, cache_bin, szind, is_small);
+
 	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
 	    &tcache_bin_info[szind]);
 	if (low_water > 0) {
@@ -497,6 +500,8 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 JEMALLOC_ALWAYS_INLINE void
 tcache_bin_flush_bottom(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
     szind_t binind, unsigned rem, bool small) {
+	tcache_bin_flush_stashed(tsd, tcache, cache_bin, binind, small);
+
 	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
 	    &tcache_bin_info[binind]);
 	assert((cache_bin_sz_t)rem <= ncached);
@@ -525,6 +530,48 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	tcache_bin_flush_bottom(tsd, tcache, cache_bin, binind, rem, false);
 }
 
+/*
+ * Flushing stashed happens when 1) tcache fill, 2) tcache flush, or 3) tcache
+ * GC event.  This makes sure that the stashed items do not hold memory for too
+ * long, and new buffers can only be allocated when nothing is stashed.
+ *
+ * The downside is, the time between stash and flush may be relatively short,
+ * especially when the request rate is high.  It lowers the chance of detecting
+ * write-after-free -- however that is a delayed detection anyway, and is less
+ * of a focus than the memory overhead.
+ */
+void
+tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
+    szind_t binind, bool is_small) {
+	cache_bin_info_t *info = &tcache_bin_info[binind];
+	/*
+	 * The two below are for assertion only.  The content of original cached
+	 * items remain unchanged -- the stashed items reside on the other end
+	 * of the stack.  Checking the stack head and ncached to verify.
+	 */
+	void *head_content = *cache_bin->stack_head;
+	cache_bin_sz_t orig_cached = cache_bin_ncached_get_local(cache_bin,
+	    info);
+
+	cache_bin_sz_t nstashed = cache_bin_nstashed_get(cache_bin, info);
+	assert(orig_cached + nstashed <= cache_bin_info_ncached_max(info));
+	if (nstashed == 0) {
+		return;
+	}
+
+	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nstashed);
+	cache_bin_init_ptr_array_for_stashed(cache_bin, binind, info, &ptrs,
+	    nstashed);
+	san_check_stashed_ptrs(ptrs.ptr, nstashed, sz_index2size(binind));
+	tcache_bin_flush_impl(tsd, tcache, cache_bin, binind, &ptrs, nstashed,
+	    is_small);
+	cache_bin_finish_flush_stashed(cache_bin, info);
+
+	assert(cache_bin_nstashed_get(cache_bin, info) == 0);
+	assert(cache_bin_ncached_get_local(cache_bin, info) == orig_cached);
+	assert(head_content == *cache_bin->stack_head);
+}
+
 void
 tcache_arena_associate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
     tcache_t *tcache, arena_t *arena) {
diff --git a/test/include/test/arena_util.h b/test/include/test/arena_util.h
index 524ee218..9a41dacb 100644
--- a/test/include/test/arena_util.h
+++ b/test/include/test/arena_util.h
@@ -26,6 +26,12 @@ do_arena_create(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 
 static inline void
 do_arena_destroy(unsigned arena_ind) {
+	/* 
+	 * For convenience, flush tcache in case there are cached items.
+	 * However not assert success since the tcache may be disabled.
+	 */
+	mallctl("thread.tcache.flush", NULL, NULL, NULL, 0);
+
 	size_t mib[3];
 	size_t miblen = sizeof(mib)/sizeof(size_t);
 	expect_d_eq(mallctlnametomib("arena.0.destroy", mib, &miblen), 0,
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index 56e69018..2b093b4d 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -82,27 +82,30 @@ do_batch_alloc_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 	free(out);
 }
 
+static void
+test_bin_init(cache_bin_t *bin, cache_bin_info_t *info) {
+	size_t size;
+	size_t alignment;
+	cache_bin_info_compute_alloc(info, 1, &size, &alignment);
+	void *mem = mallocx(size, MALLOCX_ALIGN(alignment));
+	assert_ptr_not_null(mem, "Unexpected mallocx failure");
+
+	size_t cur_offset = 0;
+	cache_bin_preincrement(info, 1, mem, &cur_offset);
+	cache_bin_init(bin, info, mem, &cur_offset);
+	cache_bin_postincrement(info, 1, mem, &cur_offset);
+	assert_zu_eq(cur_offset, size, "Should use all requested memory");
+}
+
 TEST_BEGIN(test_cache_bin) {
 	const int ncached_max = 100;
 	bool success;
 	void *ptr;
 
-	cache_bin_t bin;
 	cache_bin_info_t info;
 	cache_bin_info_init(&info, ncached_max);
-
-	size_t size;
-	size_t alignment;
-	cache_bin_info_compute_alloc(&info, 1, &size, &alignment);
-	void *mem = mallocx(size, MALLOCX_ALIGN(alignment));
-	assert_ptr_not_null(mem, "Unexpected mallocx failure");
-
-	size_t cur_offset = 0;
-	cache_bin_preincrement(&info, 1, mem, &cur_offset);
-	cache_bin_init(&bin, &info, mem, &cur_offset);
-	cache_bin_postincrement(&info, 1, mem, &cur_offset);
-
-	assert_zu_eq(cur_offset, size, "Should use all requested memory");
+	cache_bin_t bin;
+	test_bin_init(&bin, &info);
 
 	/* Initialize to empty; should then have 0 elements. */
 	expect_d_eq(ncached_max, cache_bin_info_ncached_max(&info), "");
@@ -258,7 +261,123 @@ TEST_BEGIN(test_cache_bin) {
 }
 TEST_END
 
+static void
+do_flush_stashed_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
+    cache_bin_sz_t nfill, cache_bin_sz_t nstash) {
+	expect_true(cache_bin_ncached_get_local(bin, info) == 0,
+	    "Bin not empty");
+	expect_true(cache_bin_nstashed_get(bin, info) == 0, "Bin not empty");
+	expect_true(nfill + nstash <= info->ncached_max, "Exceeded max");
+
+	bool ret;
+	/* Fill */
+	for (cache_bin_sz_t i = 0; i < nfill; i++) {
+		ret = cache_bin_dalloc_easy(bin, &ptrs[i]);
+		expect_true(ret, "Unexpected fill failure");
+	}
+	expect_true(cache_bin_ncached_get_local(bin, info) == nfill,
+	    "Wrong cached count");
+
+	/* Stash */
+	for (cache_bin_sz_t i = 0; i < nstash; i++) {
+		ret = cache_bin_stash(bin, &ptrs[i + nfill]);
+		expect_true(ret, "Unexpected stash failure");
+	}
+	expect_true(cache_bin_nstashed_get(bin, info) == nstash,
+	    "Wrong stashed count");
+
+	if (nfill + nstash == info->ncached_max) {
+		ret = cache_bin_dalloc_easy(bin, &ptrs[0]);
+		expect_false(ret, "Should not dalloc into a full bin");
+		ret = cache_bin_stash(bin, &ptrs[0]);
+		expect_false(ret, "Should not stash into a full bin");
+	}
+
+	/* Alloc filled ones */
+	for (cache_bin_sz_t i = 0; i < nfill; i++) {
+		void *ptr = cache_bin_alloc(bin, &ret);
+		expect_true(ret, "Unexpected alloc failure");
+		/* Verify it's not from the stashed range. */
+		expect_true((uintptr_t)ptr < (uintptr_t)&ptrs[nfill],
+		    "Should not alloc stashed ptrs");
+	}
+	expect_true(cache_bin_ncached_get_local(bin, info) == 0,
+	    "Wrong cached count");
+	expect_true(cache_bin_nstashed_get(bin, info) == nstash,
+	    "Wrong stashed count");
+
+	cache_bin_alloc(bin, &ret);
+	expect_false(ret, "Should not alloc stashed");
+
+	/* Clear stashed ones */
+	cache_bin_finish_flush_stashed(bin, info);
+	expect_true(cache_bin_ncached_get_local(bin, info) == 0,
+	    "Wrong cached count");
+	expect_true(cache_bin_nstashed_get(bin, info) == 0,
+	    "Wrong stashed count");
+
+	cache_bin_alloc(bin, &ret);
+	expect_false(ret, "Should not alloc from empty bin");
+}
+
+TEST_BEGIN(test_cache_bin_stash) {
+	const int ncached_max = 100;
+
+	cache_bin_t bin;
+	cache_bin_info_t info;
+	cache_bin_info_init(&info, ncached_max);
+	test_bin_init(&bin, &info);
+
+	/*
+	 * The content of this array is not accessed; instead the interior
+	 * addresses are used to insert / stash into the bins as test pointers.
+	 */
+	void **ptrs = mallocx(sizeof(void *) * (ncached_max + 1), 0);
+	assert_ptr_not_null(ptrs, "Unexpected mallocx failure");
+	bool ret;
+	for (cache_bin_sz_t i = 0; i < ncached_max; i++) {
+		expect_true(cache_bin_ncached_get_local(&bin, &info) ==
+		    (i / 2 + i % 2), "Wrong ncached value");
+		expect_true(cache_bin_nstashed_get(&bin, &info) == i / 2,
+		    "Wrong nstashed value");
+		if (i % 2 == 0) {
+			cache_bin_dalloc_easy(&bin, &ptrs[i]);
+		} else {
+			ret = cache_bin_stash(&bin, &ptrs[i]);
+			expect_true(ret, "Should be able to stash into a "
+			    "non-full cache bin");
+		}
+	}
+	ret = cache_bin_dalloc_easy(&bin, &ptrs[0]);
+	expect_false(ret, "Should not dalloc into a full cache bin");
+	ret = cache_bin_stash(&bin, &ptrs[0]);
+	expect_false(ret, "Should not stash into a full cache bin");
+	for (cache_bin_sz_t i = 0; i < ncached_max; i++) {
+		void *ptr = cache_bin_alloc(&bin, &ret);
+		if (i < ncached_max / 2) {
+			expect_true(ret, "Should be able to alloc");
+			uintptr_t diff = ((uintptr_t)ptr - (uintptr_t)&ptrs[0])
+			    / sizeof(void *);
+			expect_true(diff % 2 == 0, "Should be able to alloc");
+		} else {
+			expect_false(ret, "Should not alloc stashed");
+			expect_true(cache_bin_nstashed_get(&bin, &info) ==
+			    ncached_max / 2, "Wrong nstashed value");
+		}
+	}
+
+	test_bin_init(&bin, &info);
+	do_flush_stashed_test(&bin, &info, ptrs, ncached_max, 0);
+	do_flush_stashed_test(&bin, &info, ptrs, 0, ncached_max);
+	do_flush_stashed_test(&bin, &info, ptrs, ncached_max / 2, ncached_max / 2);
+	do_flush_stashed_test(&bin, &info, ptrs, ncached_max / 4, ncached_max / 2);
+	do_flush_stashed_test(&bin, &info, ptrs, ncached_max / 2, ncached_max / 4);
+	do_flush_stashed_test(&bin, &info, ptrs, ncached_max / 4, ncached_max / 4);
+}
+TEST_END
+
 int
 main(void) {
-	return test(test_cache_bin);
+	return test(test_cache_bin,
+		test_cache_bin_stash);
 }
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 81a36c97..bd5ef9e5 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -323,6 +323,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(ssize_t, prof_recent_alloc_max, prof);
 	TEST_MALLCTL_OPT(bool, prof_stats, prof);
 	TEST_MALLCTL_OPT(bool, prof_sys_thread_name, prof);
+	TEST_MALLCTL_OPT(ssize_t, lg_san_uaf_align, uaf_detection);
 
 #undef TEST_MALLCTL_OPT
 }
@@ -368,7 +369,7 @@ TEST_BEGIN(test_tcache_none) {
 	/* Make sure that tcache-based allocation returns p, not q. */
 	void *p1 = mallocx(42, 0);
 	expect_ptr_not_null(p1, "Unexpected mallocx() failure");
-	if (!opt_prof) {
+	if (!opt_prof && !san_uaf_detection_enabled()) {
 		expect_ptr_eq(p0, p1,
 		    "Expected tcache to allocate cached region");
 	}
@@ -434,8 +435,10 @@ TEST_BEGIN(test_tcache) {
 		ps[i] = mallocx(psz, MALLOCX_TCACHE(tis[i]));
 		expect_ptr_not_null(ps[i], "Unexpected mallocx() failure, i=%u",
 		    i);
-		expect_ptr_eq(ps[i], p0,
-		    "Expected mallocx() to allocate cached region, i=%u", i);
+		if (!san_uaf_detection_enabled()) {
+			expect_ptr_eq(ps[i], p0, "Expected mallocx() to "
+			    "allocate cached region, i=%u", i);
+		}
 	}
 
 	/* Verify that reallocation uses cached regions. */
@@ -444,8 +447,10 @@ TEST_BEGIN(test_tcache) {
 		qs[i] = rallocx(ps[i], qsz, MALLOCX_TCACHE(tis[i]));
 		expect_ptr_not_null(qs[i], "Unexpected rallocx() failure, i=%u",
 		    i);
-		expect_ptr_eq(qs[i], q0,
-		    "Expected rallocx() to allocate cached region, i=%u", i);
+		if (!san_uaf_detection_enabled()) {
+			expect_ptr_eq(qs[i], q0, "Expected rallocx() to "
+			    "allocate cached region, i=%u", i);
+		}
 		/* Avoid undefined behavior in case of test failure. */
 		if (qs[i] == NULL) {
 			qs[i] = ps[i];
diff --git a/test/unit/tcache_max.c b/test/unit/tcache_max.c
index 4f207e0e..7b4217d6 100644
--- a/test/unit/tcache_max.c
+++ b/test/unit/tcache_max.c
@@ -152,6 +152,7 @@ TEST_BEGIN(test_tcache_max) {
 	test_skip_if(!config_stats);
 	test_skip_if(!opt_tcache);
 	test_skip_if(opt_prof);
+	test_skip_if(san_uaf_detection_enabled());
 
 	for (alloc_option = alloc_option_start;
 	     alloc_option < alloc_option_end;
diff --git a/test/unit/tcache_max.sh b/test/unit/tcache_max.sh
index 4480d733..278c4ad5 100644
--- a/test/unit/tcache_max.sh
+++ b/test/unit/tcache_max.sh
@@ -1,3 +1,3 @@
 #!/bin/sh
 
-export MALLOC_CONF="tcache_max:1024"
+export MALLOC_CONF="tcache_max:1024,lg_san_uaf_align:-1"
diff --git a/test/unit/uaf.c b/test/unit/uaf.c
new file mode 100644
index 00000000..30842a3a
--- /dev/null
+++ b/test/unit/uaf.c
@@ -0,0 +1,225 @@
+#include "test/jemalloc_test.h"
+#include "test/arena_util.h"
+
+#include "jemalloc/internal/cache_bin.h"
+#include "jemalloc/internal/safety_check.h"
+
+static size_t san_uaf_align;
+
+static bool fake_abort_called;
+void fake_abort(const char *message) {
+	(void)message;
+	fake_abort_called = true;
+}
+
+static void
+test_write_after_free_pre(void) {
+	safety_check_set_abort(&fake_abort);
+	fake_abort_called = false;
+}
+
+static void
+test_write_after_free_post(void) {
+	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
+	    0, "Unexpected tcache flush failure");
+	expect_true(fake_abort_called, "Use-after-free check didn't fire.");
+	safety_check_set_abort(NULL);
+}
+
+static bool
+uaf_detection_enabled(void) {
+	if (!config_uaf_detection) {
+		return false;
+	}
+
+	ssize_t lg_san_uaf_align;
+	size_t sz = sizeof(lg_san_uaf_align);
+	assert_d_eq(mallctl("opt.lg_san_uaf_align", &lg_san_uaf_align, &sz,
+	    NULL, 0), 0, "Unexpected mallctl failure");
+	if (lg_san_uaf_align < 0) {
+		return false;
+	}
+	assert_zd_ge(lg_san_uaf_align, LG_PAGE, "san_uaf_align out of range");
+	san_uaf_align = (size_t)1 << lg_san_uaf_align;
+
+	bool tcache_enabled;
+	sz = sizeof(tcache_enabled);
+	assert_d_eq(mallctl("thread.tcache.enabled", &tcache_enabled, &sz, NULL,
+	    0), 0, "Unexpected mallctl failure");
+	if (!tcache_enabled) {
+		return false;
+	}
+
+	return true;
+}
+
+static void
+test_use_after_free(size_t alloc_size, bool write_after_free) {
+	void *ptr = (void *)(uintptr_t)san_uaf_align;
+	assert_true(cache_bin_nonfast_aligned(ptr), "Wrong alignment");
+	ptr = (void *)((uintptr_t)123 * (uintptr_t)san_uaf_align);
+	assert_true(cache_bin_nonfast_aligned(ptr), "Wrong alignment");
+	ptr = (void *)((uintptr_t)san_uaf_align + 1);
+	assert_false(cache_bin_nonfast_aligned(ptr), "Wrong alignment");
+
+	/*
+	 * Disable purging (-1) so that all dirty pages remain committed, to
+	 * make use-after-free tolerable.
+	 */
+	unsigned arena_ind = do_arena_create(-1, -1);
+	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+
+	size_t n_max = san_uaf_align * 2;
+	void **items = mallocx(n_max * sizeof(void *), flags);
+	assert_ptr_not_null(items, "Unexpected mallocx failure");
+
+	bool found = false;
+	size_t iter = 0;
+	char magic = 's';
+	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
+	    0, "Unexpected tcache flush failure");
+	while (!found) {
+		ptr = mallocx(alloc_size, flags);
+		assert_ptr_not_null(ptr, "Unexpected mallocx failure");
+
+		found = cache_bin_nonfast_aligned(ptr);
+		*(char *)ptr = magic;
+		items[iter] = ptr;
+		assert_zu_lt(iter++, n_max, "No aligned ptr found");
+	}
+
+	if (write_after_free) {
+		test_write_after_free_pre();
+	}
+	bool junked = false;
+	while (iter-- != 0) {
+		char *volatile mem = items[iter];
+		assert_c_eq(*mem, magic, "Unexpected memory content");
+		free(mem);
+		if (*mem != magic) {
+			junked = true;
+			assert_c_eq(*mem, (char)uaf_detect_junk,
+			    "Unexpected junk-filling bytes");
+			if (write_after_free) {
+				*(char *)mem = magic + 1;
+			}
+		}
+		/* Flush tcache (including stashed). */
+		assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
+		    0, "Unexpected tcache flush failure");
+	}
+	expect_true(junked, "Aligned ptr not junked");
+	if (write_after_free) {
+		test_write_after_free_post();
+	}
+
+	dallocx(items, flags);
+	do_arena_destroy(arena_ind);
+}
+
+TEST_BEGIN(test_read_after_free) {
+	test_skip_if(!uaf_detection_enabled());
+
+	test_use_after_free(sizeof(void *), /* write_after_free */ false);
+	test_use_after_free(sizeof(void *) + 1, /* write_after_free */ false);
+	test_use_after_free(16, /* write_after_free */ false);
+	test_use_after_free(20, /* write_after_free */ false);
+	test_use_after_free(32, /* write_after_free */ false);
+	test_use_after_free(33, /* write_after_free */ false);
+	test_use_after_free(48, /* write_after_free */ false);
+	test_use_after_free(64, /* write_after_free */ false);
+	test_use_after_free(65, /* write_after_free */ false);
+	test_use_after_free(129, /* write_after_free */ false);
+	test_use_after_free(255, /* write_after_free */ false);
+	test_use_after_free(256, /* write_after_free */ false);
+}
+TEST_END
+
+TEST_BEGIN(test_write_after_free) {
+	test_skip_if(!uaf_detection_enabled());
+
+	test_use_after_free(sizeof(void *), /* write_after_free */ true);
+	test_use_after_free(sizeof(void *) + 1, /* write_after_free */ true);
+	test_use_after_free(16, /* write_after_free */ true);
+	test_use_after_free(20, /* write_after_free */ true);
+	test_use_after_free(32, /* write_after_free */ true);
+	test_use_after_free(33, /* write_after_free */ true);
+	test_use_after_free(48, /* write_after_free */ true);
+	test_use_after_free(64, /* write_after_free */ true);
+	test_use_after_free(65, /* write_after_free */ true);
+	test_use_after_free(129, /* write_after_free */ true);
+	test_use_after_free(255, /* write_after_free */ true);
+	test_use_after_free(256, /* write_after_free */ true);
+}
+TEST_END
+
+static bool
+check_allocated_intact(void **allocated, size_t n_alloc) {
+	for (unsigned i = 0; i < n_alloc; i++) {
+		void *ptr = *(void **)allocated[i];
+		bool found = false;
+		for (unsigned j = 0; j < n_alloc; j++) {
+			if (ptr == allocated[j]) {
+				found = true;
+				break;
+			}
+		}
+		if (!found) {
+			return false;
+		}
+	}
+
+	return true;
+}
+
+TEST_BEGIN(test_use_after_free_integration) {
+	test_skip_if(!uaf_detection_enabled());
+
+	unsigned arena_ind = do_arena_create(-1, -1);
+	int flags = MALLOCX_ARENA(arena_ind);
+
+	size_t n_alloc = san_uaf_align * 2;
+	void **allocated = mallocx(n_alloc * sizeof(void *), flags);
+	assert_ptr_not_null(allocated, "Unexpected mallocx failure");
+
+	for (unsigned i = 0; i < n_alloc; i++) {
+		allocated[i] = mallocx(sizeof(void *) * 8, flags);
+		assert_ptr_not_null(allocated[i], "Unexpected mallocx failure");
+		if (i > 0) {
+			/* Emulate a circular list. */
+			*(void **)allocated[i] = allocated[i - 1];
+		}
+	}
+	*(void **)allocated[0] = allocated[n_alloc - 1];
+	expect_true(check_allocated_intact(allocated, n_alloc),
+	    "Allocated data corrupted");
+
+	for (unsigned i = 0; i < n_alloc; i++) {
+		free(allocated[i]);
+	}
+	/* Read-after-free */
+	expect_false(check_allocated_intact(allocated, n_alloc),
+	    "Junk-filling not detected");
+
+	test_write_after_free_pre();
+	for (unsigned i = 0; i < n_alloc; i++) {
+		allocated[i] = mallocx(sizeof(void *), flags);
+		assert_ptr_not_null(allocated[i], "Unexpected mallocx failure");
+		*(void **)allocated[i] = (void *)(uintptr_t)i;
+	}
+	/* Write-after-free */
+	for (unsigned i = 0; i < n_alloc; i++) {
+		free(allocated[i]);
+		*(void **)allocated[i] = NULL;
+	}
+	test_write_after_free_post();
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_read_after_free,
+	    test_write_after_free,
+	    test_use_after_free_integration);
+}
diff --git a/test/unit/uaf.sh b/test/unit/uaf.sh
new file mode 100644
index 00000000..5f12dcf6
--- /dev/null
+++ b/test/unit/uaf.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="lg_san_uaf_align:12"

From e491cef9abcc80de7c2648a0a244a5271848099a Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 30 Nov 2021 14:39:34 -0800
Subject: [PATCH 2159/2608] Add stats for stashed bytes in tcache.

---
 include/jemalloc/internal/arena_stats.h |  1 +
 include/jemalloc/internal/cache_bin.h   | 58 +++++++++++++++++--------
 src/arena.c                             | 15 ++++---
 src/ctl.c                               |  7 +++
 src/stats.c                             |  3 +-
 src/tcache.c                            |  4 +-
 test/unit/cache_bin.c                   | 13 +++---
 test/unit/stats.c                       | 14 ++++--
 test/unit/uaf.c                         | 33 ++++++++++++++
 9 files changed, 112 insertions(+), 36 deletions(-)

diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 02c93405..15f1d345 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -73,6 +73,7 @@ struct arena_stats_s {
 
 	/* Number of bytes cached in tcache associated with this arena. */
 	size_t			tcache_bytes; /* Derived. */
+	size_t			tcache_stashed_bytes; /* Derived. */
 
 	mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes];
 
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 266897f9..76345be9 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -223,18 +223,6 @@ cache_bin_ncached_get_local(cache_bin_t *bin, cache_bin_info_t *info) {
 	return n;
 }
 
-/*
- * Obtain a racy view of the number of items currently in the cache bin, in the
- * presence of possible concurrent modifications.
- */
-static inline cache_bin_sz_t
-cache_bin_ncached_get_remote(cache_bin_t *bin, cache_bin_info_t *info) {
-	cache_bin_sz_t n = cache_bin_ncached_get_internal(bin,
-	    /* racy */ true);
-	assert(n <= cache_bin_info_ncached_max(info));
-	return n;
-}
-
 /*
  * Internal.
  *
@@ -436,15 +424,49 @@ cache_bin_stash(cache_bin_t *bin, void *ptr) {
 }
 
 JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
-cache_bin_nstashed_get(cache_bin_t *bin, cache_bin_info_t *info) {
+cache_bin_nstashed_get_internal(cache_bin_t *bin, cache_bin_info_t *info,
+    bool racy) {
 	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(info);
 	void **full = cache_bin_full_position_get(bin, info);
 
-	uint16_t nstashed = cache_bin_diff(bin, (uint16_t)(uintptr_t)full,
+	cache_bin_sz_t n = cache_bin_diff(bin, (uint16_t)(uintptr_t)full,
 	    bin->low_bits_full) / sizeof(void *);
-	assert(nstashed <= ncached_max);
+	assert(n <= ncached_max);
 
-	return nstashed;
+	/* Below are for assertions only. */
+	void *stashed = *(full + n - 1);
+	bool aligned = cache_bin_nonfast_aligned(stashed);
+#ifdef JEMALLOC_JET
+	/* Allow arbitrary pointers to be stashed in tests. */
+	aligned = true;
+#endif
+	assert(n == 0 || (stashed != NULL && aligned) || racy);
+
+	return n;
+}
+
+JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
+cache_bin_nstashed_get_local(cache_bin_t *bin, cache_bin_info_t *info) {
+	cache_bin_sz_t n = cache_bin_nstashed_get_internal(bin, info, false);
+	assert(n <= cache_bin_info_ncached_max(info));
+	return n;
+}
+
+/*
+ * Obtain a racy view of the number of items currently in the cache bin, in the
+ * presence of possible concurrent modifications.
+ */
+static inline void
+cache_bin_nitems_get_remote(cache_bin_t *bin, cache_bin_info_t *info,
+    cache_bin_sz_t *ncached, cache_bin_sz_t *nstashed) {
+	cache_bin_sz_t n = cache_bin_ncached_get_internal(bin, /* racy */ true);
+	assert(n <= cache_bin_info_ncached_max(info));
+	*ncached = n;
+
+	n = cache_bin_nstashed_get_internal(bin, info, /* racy */ true);
+	assert(n <= cache_bin_info_ncached_max(info));
+	*nstashed = n;
+	/* Note that cannot assert ncached + nstashed <= ncached_max (racy). */
 }
 
 /*
@@ -538,7 +560,7 @@ cache_bin_init_ptr_array_for_stashed(cache_bin_t *bin, szind_t binind,
     cache_bin_info_t *info, cache_bin_ptr_array_t *arr,
     cache_bin_sz_t nstashed) {
 	assert(nstashed > 0);
-	assert(cache_bin_nstashed_get(bin, info) == nstashed);
+	assert(cache_bin_nstashed_get_local(bin, info) == nstashed);
 
 	void **full = cache_bin_full_position_get(bin, info);
 	arr->ptr = full;
@@ -551,7 +573,7 @@ cache_bin_finish_flush_stashed(cache_bin_t *bin, cache_bin_info_t *info) {
 
 	/* Reset the bin local full position. */
 	bin->low_bits_full = (uint16_t)(uintptr_t)full;
-	assert(cache_bin_nstashed_get(bin, info) == 0);
+	assert(cache_bin_nstashed_get_local(bin, info) == 0);
 }
 
 /*
diff --git a/src/arena.c b/src/arena.c
index ed41d6d8..bf880d71 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -148,18 +148,21 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 
 	LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 
-	/* tcache_bytes counts currently cached bytes. */
+	/* Currently cached bytes and sanitizer-stashed bytes in tcache. */
 	astats->tcache_bytes = 0;
+	astats->tcache_stashed_bytes = 0;
 	malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
 	cache_bin_array_descriptor_t *descriptor;
 	ql_foreach(descriptor, &arena->cache_bin_array_descriptor_ql, link) {
 		for (szind_t i = 0; i < nhbins; i++) {
 			cache_bin_t *cache_bin = &descriptor->bins[i];
-			astats->tcache_bytes +=
-			    cache_bin_ncached_get_remote(cache_bin,
-			    &tcache_bin_info[i]) * sz_index2size(i) +
-			    cache_bin_nstashed_get(cache_bin,
-			    &tcache_bin_info[i]) * sz_index2size(i);
+			cache_bin_sz_t ncached, nstashed;
+			cache_bin_nitems_get_remote(cache_bin,
+			    &tcache_bin_info[i], &ncached, &nstashed);
+
+			astats->tcache_bytes += ncached * sz_index2size(i);
+			astats->tcache_stashed_bytes += nstashed *
+			    sz_index2size(i);
 		}
 	}
 	malloc_mutex_prof_read(tsdn,
diff --git a/src/ctl.c b/src/ctl.c
index 78dc5792..5a925129 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -290,6 +290,7 @@ CTL_PROTO(stats_arenas_i_base)
 CTL_PROTO(stats_arenas_i_internal)
 CTL_PROTO(stats_arenas_i_metadata_thp)
 CTL_PROTO(stats_arenas_i_tcache_bytes)
+CTL_PROTO(stats_arenas_i_tcache_stashed_bytes)
 CTL_PROTO(stats_arenas_i_resident)
 CTL_PROTO(stats_arenas_i_abandoned_vm)
 CTL_PROTO(stats_arenas_i_hpa_sec_bytes)
@@ -787,6 +788,8 @@ static const ctl_named_node_t stats_arenas_i_node[] = {
 	{NAME("internal"),	CTL(stats_arenas_i_internal)},
 	{NAME("metadata_thp"),	CTL(stats_arenas_i_metadata_thp)},
 	{NAME("tcache_bytes"),	CTL(stats_arenas_i_tcache_bytes)},
+	{NAME("tcache_stashed_bytes"),
+	    CTL(stats_arenas_i_tcache_stashed_bytes)},
 	{NAME("resident"),	CTL(stats_arenas_i_resident)},
 	{NAME("abandoned_vm"),	CTL(stats_arenas_i_abandoned_vm)},
 	{NAME("hpa_sec_bytes"),	CTL(stats_arenas_i_hpa_sec_bytes)},
@@ -1169,6 +1172,8 @@ MUTEX_PROF_ARENA_MUTEXES
 		    &astats->astats.pa_shard_stats.pac_stats.abandoned_vm);
 
 		sdstats->astats.tcache_bytes += astats->astats.tcache_bytes;
+		sdstats->astats.tcache_stashed_bytes +=
+		    astats->astats.tcache_stashed_bytes;
 
 		if (ctl_arena->arena_ind == 0) {
 			sdstats->astats.uptime = astats->astats.uptime;
@@ -3503,6 +3508,8 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_metadata_thp,
     arenas_i(mib[2])->astats->astats.metadata_thp, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_tcache_bytes,
     arenas_i(mib[2])->astats->astats.tcache_bytes, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_tcache_stashed_bytes,
+    arenas_i(mib[2])->astats->astats.tcache_stashed_bytes, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_resident,
     arenas_i(mib[2])->astats->astats.resident,
     size_t)
diff --git a/src/stats.c b/src/stats.c
index b1b3906d..bed585b1 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1055,7 +1055,7 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	size_t large_allocated;
 	uint64_t large_nmalloc, large_ndalloc, large_nrequests, large_nfills,
 	    large_nflushes;
-	size_t tcache_bytes, abandoned_vm;
+	size_t tcache_bytes, tcache_stashed_bytes, abandoned_vm;
 	uint64_t uptime;
 
 	CTL_GET("arenas.page", &page, size_t);
@@ -1344,6 +1344,7 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	GET_AND_EMIT_MEM_STAT(internal)
 	GET_AND_EMIT_MEM_STAT(metadata_thp)
 	GET_AND_EMIT_MEM_STAT(tcache_bytes)
+	GET_AND_EMIT_MEM_STAT(tcache_stashed_bytes)
 	GET_AND_EMIT_MEM_STAT(resident)
 	GET_AND_EMIT_MEM_STAT(abandoned_vm)
 	GET_AND_EMIT_MEM_STAT(extent_avail)
diff --git a/src/tcache.c b/src/tcache.c
index 74f0d83b..45d4e810 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -553,7 +553,7 @@ tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	cache_bin_sz_t orig_cached = cache_bin_ncached_get_local(cache_bin,
 	    info);
 
-	cache_bin_sz_t nstashed = cache_bin_nstashed_get(cache_bin, info);
+	cache_bin_sz_t nstashed = cache_bin_nstashed_get_local(cache_bin, info);
 	assert(orig_cached + nstashed <= cache_bin_info_ncached_max(info));
 	if (nstashed == 0) {
 		return;
@@ -567,7 +567,7 @@ tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	    is_small);
 	cache_bin_finish_flush_stashed(cache_bin, info);
 
-	assert(cache_bin_nstashed_get(cache_bin, info) == 0);
+	assert(cache_bin_nstashed_get_local(cache_bin, info) == 0);
 	assert(cache_bin_ncached_get_local(cache_bin, info) == orig_cached);
 	assert(head_content == *cache_bin->stack_head);
 }
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index 2b093b4d..3b6dbab3 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -266,7 +266,8 @@ do_flush_stashed_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
     cache_bin_sz_t nfill, cache_bin_sz_t nstash) {
 	expect_true(cache_bin_ncached_get_local(bin, info) == 0,
 	    "Bin not empty");
-	expect_true(cache_bin_nstashed_get(bin, info) == 0, "Bin not empty");
+	expect_true(cache_bin_nstashed_get_local(bin, info) == 0,
+	    "Bin not empty");
 	expect_true(nfill + nstash <= info->ncached_max, "Exceeded max");
 
 	bool ret;
@@ -283,7 +284,7 @@ do_flush_stashed_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 		ret = cache_bin_stash(bin, &ptrs[i + nfill]);
 		expect_true(ret, "Unexpected stash failure");
 	}
-	expect_true(cache_bin_nstashed_get(bin, info) == nstash,
+	expect_true(cache_bin_nstashed_get_local(bin, info) == nstash,
 	    "Wrong stashed count");
 
 	if (nfill + nstash == info->ncached_max) {
@@ -303,7 +304,7 @@ do_flush_stashed_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 	}
 	expect_true(cache_bin_ncached_get_local(bin, info) == 0,
 	    "Wrong cached count");
-	expect_true(cache_bin_nstashed_get(bin, info) == nstash,
+	expect_true(cache_bin_nstashed_get_local(bin, info) == nstash,
 	    "Wrong stashed count");
 
 	cache_bin_alloc(bin, &ret);
@@ -313,7 +314,7 @@ do_flush_stashed_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 	cache_bin_finish_flush_stashed(bin, info);
 	expect_true(cache_bin_ncached_get_local(bin, info) == 0,
 	    "Wrong cached count");
-	expect_true(cache_bin_nstashed_get(bin, info) == 0,
+	expect_true(cache_bin_nstashed_get_local(bin, info) == 0,
 	    "Wrong stashed count");
 
 	cache_bin_alloc(bin, &ret);
@@ -338,7 +339,7 @@ TEST_BEGIN(test_cache_bin_stash) {
 	for (cache_bin_sz_t i = 0; i < ncached_max; i++) {
 		expect_true(cache_bin_ncached_get_local(&bin, &info) ==
 		    (i / 2 + i % 2), "Wrong ncached value");
-		expect_true(cache_bin_nstashed_get(&bin, &info) == i / 2,
+		expect_true(cache_bin_nstashed_get_local(&bin, &info) == i / 2,
 		    "Wrong nstashed value");
 		if (i % 2 == 0) {
 			cache_bin_dalloc_easy(&bin, &ptrs[i]);
@@ -361,7 +362,7 @@ TEST_BEGIN(test_cache_bin_stash) {
 			expect_true(diff % 2 == 0, "Should be able to alloc");
 		} else {
 			expect_false(ret, "Should not alloc stashed");
-			expect_true(cache_bin_nstashed_get(&bin, &info) ==
+			expect_true(cache_bin_nstashed_get_local(&bin, &info) ==
 			    ncached_max / 2, "Wrong nstashed value");
 		}
 	}
diff --git a/test/unit/stats.c b/test/unit/stats.c
index cb99b095..bbdbd180 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -367,7 +367,7 @@ TEST_END
 static void
 test_tcache_bytes_for_usize(size_t usize) {
 	uint64_t epoch;
-	size_t tcache_bytes;
+	size_t tcache_bytes, tcache_stashed_bytes;
 	size_t sz = sizeof(tcache_bytes);
 
 	void *ptr = mallocx(usize, 0);
@@ -377,7 +377,11 @@ test_tcache_bytes_for_usize(size_t usize) {
 	assert_d_eq(mallctl(
 	    "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL) ".tcache_bytes",
 	    &tcache_bytes, &sz, NULL, 0), 0, "Unexpected mallctl failure");
-	size_t tcache_bytes_before = tcache_bytes;
+	assert_d_eq(mallctl(
+	    "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL)
+	    ".tcache_stashed_bytes", &tcache_stashed_bytes, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
+	size_t tcache_bytes_before = tcache_bytes + tcache_stashed_bytes;
 	dallocx(ptr, 0);
 
 	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
@@ -385,7 +389,11 @@ test_tcache_bytes_for_usize(size_t usize) {
 	assert_d_eq(mallctl(
 	    "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL) ".tcache_bytes",
 	    &tcache_bytes, &sz, NULL, 0), 0, "Unexpected mallctl failure");
-	size_t tcache_bytes_after = tcache_bytes;
+	assert_d_eq(mallctl(
+	    "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL)
+	    ".tcache_stashed_bytes", &tcache_stashed_bytes, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
+	size_t tcache_bytes_after = tcache_bytes + tcache_stashed_bytes;
 	assert_zu_eq(tcache_bytes_after - tcache_bytes_before,
 	    usize, "Incorrectly attributed a free");
 }
diff --git a/test/unit/uaf.c b/test/unit/uaf.c
index 30842a3a..880aee4f 100644
--- a/test/unit/uaf.c
+++ b/test/unit/uaf.c
@@ -53,6 +53,26 @@ uaf_detection_enabled(void) {
 	return true;
 }
 
+static size_t
+read_tcache_stashed_bytes(unsigned arena_ind) {
+	if (!config_stats) {
+		return 0;
+	}
+
+	uint64_t epoch;
+	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	    0, "Unexpected mallctl() failure");
+
+	size_t tcache_stashed_bytes;
+	size_t sz = sizeof(tcache_stashed_bytes);
+	assert_d_eq(mallctl(
+	    "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL)
+	    ".tcache_stashed_bytes", &tcache_stashed_bytes, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
+
+	return tcache_stashed_bytes;
+}
+
 static void
 test_use_after_free(size_t alloc_size, bool write_after_free) {
 	void *ptr = (void *)(uintptr_t)san_uaf_align;
@@ -95,6 +115,7 @@ test_use_after_free(size_t alloc_size, bool write_after_free) {
 	while (iter-- != 0) {
 		char *volatile mem = items[iter];
 		assert_c_eq(*mem, magic, "Unexpected memory content");
+		size_t stashed_before = read_tcache_stashed_bytes(arena_ind);
 		free(mem);
 		if (*mem != magic) {
 			junked = true;
@@ -103,6 +124,18 @@ test_use_after_free(size_t alloc_size, bool write_after_free) {
 			if (write_after_free) {
 				*(char *)mem = magic + 1;
 			}
+
+			size_t stashed_after = read_tcache_stashed_bytes(
+			    arena_ind);
+			/*
+			 * An edge case is the deallocation above triggering the
+			 * tcache GC event, in which case the stashed pointers
+			 * may get flushed immediately, before returning from
+			 * free().  Treat these cases as checked already.
+			 */
+			if (stashed_after <= stashed_before) {
+				fake_abort_called = true;
+			}
 		}
 		/* Flush tcache (including stashed). */
 		assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),

From 8b34a788b52c6410ef68f2dab6ebbf5079a0660e Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 10 Dec 2021 20:31:28 -0800
Subject: [PATCH 2160/2608] Fix an used-uninitialized warning (false positive).

---
 src/tcache.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/tcache.c b/src/tcache.c
index 45d4e810..7138f883 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -418,7 +418,8 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 
 		/* Deallocate whatever we can. */
 		unsigned ndeferred = 0;
-		arena_dalloc_bin_locked_info_t dalloc_bin_info;
+		/* Init only to avoid used-uninitialized warning. */
+		arena_dalloc_bin_locked_info_t dalloc_bin_info = {0};
 		if (small) {
 			arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind);
 		}

From 01d61a3c6fa4664ba92f97bd75f4b513396b140e Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 13 Dec 2021 22:05:13 -0800
Subject: [PATCH 2161/2608] Fix a conversion warning.

---
 include/jemalloc/internal/cache_bin.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 76345be9..102c133f 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -371,13 +371,15 @@ cache_bin_alloc(cache_bin_t *bin, bool *success) {
 
 JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
 cache_bin_alloc_batch(cache_bin_t *bin, size_t num, void **out) {
-	size_t n = cache_bin_ncached_get_internal(bin, /* racy */ false);
+	cache_bin_sz_t n = cache_bin_ncached_get_internal(bin,
+	    /* racy */ false);
 	if (n > num) {
-		n = num;
+		n = (cache_bin_sz_t)num;
 	}
 	memcpy(out, bin->stack_head, n * sizeof(void *));
 	bin->stack_head += n;
 	cache_bin_low_water_adjust(bin);
+
 	return n;
 }
 

From dfdd7562f55a409a1667a00595349804fe55cace Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 28 Dec 2021 13:01:17 -0800
Subject: [PATCH 2162/2608] Rename san_enabled() to san_guard_enabled().

---
 include/jemalloc/internal/san.h   | 2 +-
 test/unit/hpa_background_thread.c | 4 ++--
 test/unit/retained.c              | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/san.h b/include/jemalloc/internal/san.h
index f97211a7..27adddb1 100644
--- a/include/jemalloc/internal/san.h
+++ b/include/jemalloc/internal/san.h
@@ -76,7 +76,7 @@ san_one_side_guarded_sz(size_t size) {
 }
 
 static inline bool
-san_enabled(void) {
+san_guard_enabled(void) {
 	return (opt_san_guard_large != 0 || opt_san_guard_small != 0);
 }
 
diff --git a/test/unit/hpa_background_thread.c b/test/unit/hpa_background_thread.c
index 228b771b..ad7bac4b 100644
--- a/test/unit/hpa_background_thread.c
+++ b/test/unit/hpa_background_thread.c
@@ -129,7 +129,7 @@ TEST_BEGIN(test_hpa_background_thread_purges) {
 	test_skip_if(!hpa_supported());
 	test_skip_if(!have_background_thread);
 	/* Skip since guarded pages cannot be allocated from hpa. */
-	test_skip_if(san_enabled());
+	test_skip_if(san_guard_enabled());
 
 	unsigned arena_ind = create_arena();
 	/*
@@ -145,7 +145,7 @@ TEST_BEGIN(test_hpa_background_thread_enable_disable) {
 	test_skip_if(!hpa_supported());
 	test_skip_if(!have_background_thread);
 	/* Skip since guarded pages cannot be allocated from hpa. */
-	test_skip_if(san_enabled());
+	test_skip_if(san_guard_enabled());
 
 	unsigned arena_ind = create_arena();
 
diff --git a/test/unit/retained.c b/test/unit/retained.c
index 37ff88f6..aa9f6847 100644
--- a/test/unit/retained.c
+++ b/test/unit/retained.c
@@ -104,7 +104,7 @@ TEST_BEGIN(test_retained) {
 
 	arena_ind = do_arena_create(NULL);
 	sz = nallocx(HUGEPAGE, 0);
-	size_t guard_sz = san_enabled() ? SAN_PAGE_GUARDS_SIZE : 0;
+	size_t guard_sz = san_guard_enabled() ? SAN_PAGE_GUARDS_SIZE : 0;
 	esz = sz + sz_large_pad + guard_sz;
 
 	atomic_store_u(&epoch, 0, ATOMIC_RELAXED);

From eabe88916290fec452048eaa1abe1cd52a794339 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 28 Dec 2021 13:38:12 -0800
Subject: [PATCH 2163/2608] Rename full_position to low_bound in cache_bin.h.

---
 include/jemalloc/internal/cache_bin.h | 36 ++++++++++++++++++---------
 1 file changed, 24 insertions(+), 12 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 102c133f..c98c46ad 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -66,6 +66,17 @@ struct cache_bin_info_s {
 
 /*
  * Responsible for caching allocations associated with a single size.
+ *
+ * Several pointers are used to track the stack.  To save on metadata bytes,
+ * only the stack_head is a full sized pointer (which is dereferenced on the
+ * fastpath), while the others store only the low 16 bits -- this is correct
+ * because a single stack never takes more space than 2^16 bytes, and at the
+ * same time only equality checks are performed on the low bits.
+ *
+ * (low addr)                                                  (high addr)
+ * |------stashed------|------available------|------cached-----|
+ * ^                   ^                     ^                 ^
+ * low_bound(derived)  low_bits_full         stack_head        low_bits_empty
  */
 typedef struct cache_bin_s cache_bin_t;
 struct cache_bin_s {
@@ -94,11 +105,12 @@ struct cache_bin_s {
 
 	/*
 	 * The low bits of the value that stack_head will take on when the array
-	 * is full.  (But remember that stack_head always points to a valid item
-	 * when the array is nonempty -- this is in the array).
+	 * is full (of cached & stashed items).  But remember that stack_head
+	 * always points to a valid item when the array is nonempty -- this is
+	 * in the array.
 	 *
-	 * Recall that since the stack grows down, this is the lowest address in
-	 * the array.  Only adjusted when stashing items.
+	 * Recall that since the stack grows down, this is the lowest available
+	 * address in the array for caching.  Only adjusted when stashing items.
 	 */
 	uint16_t low_bits_full;
 
@@ -246,7 +258,7 @@ cache_bin_empty_position_get(cache_bin_t *bin) {
  * A pointer to the position with the lowest address of the backing array.
  */
 static inline void **
-cache_bin_full_position_get(cache_bin_t *bin, cache_bin_info_t *info) {
+cache_bin_low_bound_get(cache_bin_t *bin, cache_bin_info_t *info) {
 	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(info);
 	void **ret = cache_bin_empty_position_get(bin) - ncached_max;
 	assert(ret <= bin->stack_head);
@@ -429,14 +441,14 @@ JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
 cache_bin_nstashed_get_internal(cache_bin_t *bin, cache_bin_info_t *info,
     bool racy) {
 	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(info);
-	void **full = cache_bin_full_position_get(bin, info);
+	void **low_bound = cache_bin_low_bound_get(bin, info);
 
-	cache_bin_sz_t n = cache_bin_diff(bin, (uint16_t)(uintptr_t)full,
+	cache_bin_sz_t n = cache_bin_diff(bin, (uint16_t)(uintptr_t)low_bound,
 	    bin->low_bits_full) / sizeof(void *);
 	assert(n <= ncached_max);
 
 	/* Below are for assertions only. */
-	void *stashed = *(full + n - 1);
+	void *stashed = *(low_bound + n - 1);
 	bool aligned = cache_bin_nonfast_aligned(stashed);
 #ifdef JEMALLOC_JET
 	/* Allow arbitrary pointers to be stashed in tests. */
@@ -564,17 +576,17 @@ cache_bin_init_ptr_array_for_stashed(cache_bin_t *bin, szind_t binind,
 	assert(nstashed > 0);
 	assert(cache_bin_nstashed_get_local(bin, info) == nstashed);
 
-	void **full = cache_bin_full_position_get(bin, info);
-	arr->ptr = full;
+	void **low_bound = cache_bin_low_bound_get(bin, info);
+	arr->ptr = low_bound;
 	assert(*arr->ptr != NULL);
 }
 
 static inline void
 cache_bin_finish_flush_stashed(cache_bin_t *bin, cache_bin_info_t *info) {
-	void **full = cache_bin_full_position_get(bin, info);
+	void **low_bound = cache_bin_low_bound_get(bin, info);
 
 	/* Reset the bin local full position. */
-	bin->low_bits_full = (uint16_t)(uintptr_t)full;
+	bin->low_bits_full = (uint16_t)(uintptr_t)low_bound;
 	assert(cache_bin_nstashed_get_local(bin, info) == 0);
 }
 

From d660683d3ddc2aaebf41a5662a6bc629be016e6d Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 30 Dec 2021 13:27:23 -0800
Subject: [PATCH 2164/2608] Fix test config of lg_san_uaf_align.

The option may be configure-disabled, which resulted in the invalid options
output from the tests.
---
 test/include/test/san.h | 8 ++++++++
 test/unit/tcache_max.c  | 3 +++
 test/unit/tcache_max.sh | 2 +-
 test/unit/uaf.c         | 6 +++++-
 test/unit/uaf.sh        | 3 ---
 5 files changed, 17 insertions(+), 5 deletions(-)
 delete mode 100644 test/unit/uaf.sh

diff --git a/test/include/test/san.h b/test/include/test/san.h
index 691dc508..da07865c 100644
--- a/test/include/test/san.h
+++ b/test/include/test/san.h
@@ -1,3 +1,11 @@
+#if defined(JEMALLOC_UAF_DETECTION) || defined(JEMALLOC_DEBUG)
+#  define TEST_SAN_UAF_ALIGN_ENABLE "lg_san_uaf_align:12"
+#  define TEST_SAN_UAF_ALIGN_DISABLE "lg_san_uaf_align:-1"
+#else
+#  define TEST_SAN_UAF_ALIGN_ENABLE ""
+#  define TEST_SAN_UAF_ALIGN_DISABLE ""
+#endif
+
 static inline bool
 extent_is_guarded(tsdn_t *tsdn, void *ptr) {
 	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
diff --git a/test/unit/tcache_max.c b/test/unit/tcache_max.c
index 7b4217d6..1f657c85 100644
--- a/test/unit/tcache_max.c
+++ b/test/unit/tcache_max.c
@@ -1,4 +1,7 @@
 #include "test/jemalloc_test.h"
+#include "test/san.h"
+
+const char *malloc_conf = TEST_SAN_UAF_ALIGN_DISABLE;
 
 enum {
 	alloc_option_start = 0,
diff --git a/test/unit/tcache_max.sh b/test/unit/tcache_max.sh
index 278c4ad5..4480d733 100644
--- a/test/unit/tcache_max.sh
+++ b/test/unit/tcache_max.sh
@@ -1,3 +1,3 @@
 #!/bin/sh
 
-export MALLOC_CONF="tcache_max:1024,lg_san_uaf_align:-1"
+export MALLOC_CONF="tcache_max:1024"
diff --git a/test/unit/uaf.c b/test/unit/uaf.c
index 880aee4f..a8433c29 100644
--- a/test/unit/uaf.c
+++ b/test/unit/uaf.c
@@ -1,9 +1,13 @@
 #include "test/jemalloc_test.h"
 #include "test/arena_util.h"
+#include "test/san.h"
 
 #include "jemalloc/internal/cache_bin.h"
+#include "jemalloc/internal/san.h"
 #include "jemalloc/internal/safety_check.h"
 
+const char *malloc_conf = TEST_SAN_UAF_ALIGN_ENABLE;
+
 static size_t san_uaf_align;
 
 static bool fake_abort_called;
@@ -28,7 +32,7 @@ test_write_after_free_post(void) {
 
 static bool
 uaf_detection_enabled(void) {
-	if (!config_uaf_detection) {
+	if (!config_uaf_detection || !san_uaf_detection_enabled()) {
 		return false;
 	}
 
diff --git a/test/unit/uaf.sh b/test/unit/uaf.sh
deleted file mode 100644
index 5f12dcf6..00000000
--- a/test/unit/uaf.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/sh
-
-export MALLOC_CONF="lg_san_uaf_align:12"

From 067c2da07456660113bbb7bf76f0648c3c993a83 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 30 Dec 2021 14:23:44 -0800
Subject: [PATCH 2165/2608] Fix unnecessary returns in
 san_(un)guard_pages_two_sided.

---
 include/jemalloc/internal/san.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/san.h b/include/jemalloc/internal/san.h
index 27adddb1..8813d6bb 100644
--- a/include/jemalloc/internal/san.h
+++ b/include/jemalloc/internal/san.h
@@ -39,14 +39,13 @@ void san_init(ssize_t lg_san_uaf_align);
 static inline void
 san_guard_pages_two_sided(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     emap_t *emap, bool remap) {
-	return san_guard_pages(tsdn, ehooks, edata, emap, true, true,
-	    remap);
+	san_guard_pages(tsdn, ehooks, edata, emap, true, true, remap);
 }
 
 static inline void
 san_unguard_pages_two_sided(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     emap_t *emap) {
-	return san_unguard_pages(tsdn, ehooks, edata, emap, true, true);
+	san_unguard_pages(tsdn, ehooks, edata, emap, true, true);
 }
 
 static inline size_t

From f509703af59348496abdb0cb446e8d3d04bc085d Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 30 Dec 2021 14:39:42 -0800
Subject: [PATCH 2166/2608] Fix two conversion warnings in tcache.

---
 src/tcache.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index 7138f883..fa16732e 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -116,7 +116,7 @@ tcache_gc_item_delay_compute(szind_t szind) {
 	if (item_delay >= delay_max) {
 		item_delay = delay_max - 1;
 	}
-	return item_delay;
+	return (uint8_t)item_delay;
 }
 
 static void
@@ -134,7 +134,11 @@ tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 
 	size_t nflush = low_water - (low_water >> 2);
 	if (nflush < tcache_slow->bin_flush_delay_items[szind]) {
-		tcache_slow->bin_flush_delay_items[szind] -= nflush;
+		/* Workaround for a conversion warning. */
+		uint8_t nflush_uint8 = (uint8_t)nflush;
+		assert(sizeof(tcache_slow->bin_flush_delay_items[0]) ==
+		    sizeof(nflush_uint8));
+		tcache_slow->bin_flush_delay_items[szind] -= nflush_uint8;
 		return;
 	} else {
 		tcache_slow->bin_flush_delay_items[szind]

From 18510020e75fd3f6a2c9e26057d9a188bee1fc21 Mon Sep 17 00:00:00 2001
From: Yuriy Chernyshov <georgthegreat@gmail.com>
Date: Mon, 27 Dec 2021 13:39:39 +0300
Subject: [PATCH 2167/2608] Fix symbol conflict with musl libc

`__libc` prefixed functions are used by musl libc as non-replaceable malloc stubs.

Fix this conflict by checking if we are linking against glibc.
---
 src/jemalloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index c8eef2de..990855c4 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3239,7 +3239,7 @@ JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) =
     je_memalign;
 #  endif
 
-#  ifdef CPU_COUNT
+#  ifdef __GLIBC__
 /*
  * To enable static linking with glibc, the libc specific malloc interface must
  * be implemented also, so none of glibc's malloc.o functions are added to the

From c91e62dd375637e1d029af5385ce633a74f98712 Mon Sep 17 00:00:00 2001
From: Yuriy Chernyshov <georgthegreat@gmail.com>
Date: Wed, 5 Jan 2022 21:19:50 +0300
Subject: [PATCH 2168/2608] #include <features.h> as requested

---
 src/jemalloc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 990855c4..fb435248 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3231,6 +3231,8 @@ je_valloc(size_t size) {
  * passed an extra argument for the caller return address, which will be
  * ignored.
  */
+#include <features.h> // defines __GLIBC__ if we are compiling against glibc
+
 JEMALLOC_EXPORT void (*__free_hook)(void *ptr) = je_free;
 JEMALLOC_EXPORT void *(*__malloc_hook)(size_t size) = je_malloc;
 JEMALLOC_EXPORT void *(*__realloc_hook)(void *ptr, size_t size) = je_realloc;

From 61978bbe693c020ffa29dee17b81072ac52726e0 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 6 Jan 2022 16:54:01 -0800
Subject: [PATCH 2169/2608] Purge all if the last thread migrated away from an
 arena.

---
 src/jemalloc.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index fb435248..2ffb9f03 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -472,6 +472,12 @@ arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind) {
 	arena_nthreads_dec(oldarena, false);
 	arena_nthreads_inc(newarena, false);
 	tsd_arena_set(tsd, newarena);
+
+	if (arena_nthreads_get(oldarena, false) == 0) {
+		/* Purge if the old arena has no associated threads anymore. */
+		arena_decay(tsd_tsdn(tsd), oldarena,
+		    /* is_background_thread */ false, /* all */ true);
+	}
 }
 
 static void

From 6230cc88b6b3902902c58e4331ca6273e71b8e2e Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 6 Jan 2022 17:46:55 -0800
Subject: [PATCH 2170/2608] Add background thread sleep retry in
 test/unit/hpa_background_thread

Under high concurrency / heavy test load (e.g. using run_tests.sh), the
background thread may not get scheduled for a longer period of time.  Retry 100
times max before bailing out.
---
 test/unit/hpa_background_thread.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/test/unit/hpa_background_thread.c b/test/unit/hpa_background_thread.c
index ad7bac4b..81c25612 100644
--- a/test/unit/hpa_background_thread.c
+++ b/test/unit/hpa_background_thread.c
@@ -4,8 +4,8 @@
 static void
 sleep_for_background_thread_interval() {
 	/*
-	 * The sleep interval set in our .sh file is 50ms.  So it should
-	 * definitely run if we sleep for for times that.
+	 * The sleep interval set in our .sh file is 50ms.  So it likely will
+	 * run if we sleep for four times that.
 	 */
 	sleep_ns(200 * 1000 * 1000);
 }
@@ -117,10 +117,18 @@ expect_purging(unsigned arena_ind, bool expect_deferred) {
 		}
 	}
 	expect_b_eq(expect_deferred, observed_dirty_page, "");
-	if (expect_deferred) {
+
+	/*
+	 * Under high concurrency / heavy test load (e.g. using run_test.sh),
+	 * the background thread may not get scheduled for a longer period of
+	 * time.  Retry 100 times max before bailing out.
+	 */
+	unsigned retry = 0;
+	while ((empty_ndirty = get_empty_ndirty(arena_ind)) > 0 &&
+	    expect_deferred && (retry++ < 100)) {
 		sleep_for_background_thread_interval();
 	}
-	empty_ndirty = get_empty_ndirty(arena_ind);
+
 	expect_zu_eq(0, empty_ndirty, "Should have seen a background purge");
 }
 

From 89fe8ee6bf7a23556350d883a310c0224a171879 Mon Sep 17 00:00:00 2001
From: Jonathan Swinney <jswinney@amazon.com>
Date: Thu, 6 Jan 2022 17:09:34 +0000
Subject: [PATCH 2171/2608] Use the isb instruction instead of yield for spin
 locks on arm

isb introduces a small delay which is closer to the x86 pause instruction.
---
 configure.ac | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/configure.ac b/configure.ac
index 49a12ac8..3303badf 100644
--- a/configure.ac
+++ b/configure.ac
@@ -425,14 +425,15 @@ case "${host_cpu}" in
 	;;
   aarch64|arm*)
 	HAVE_CPU_SPINWAIT=1
-    AC_CACHE_VAL([je_cv_yield],
-      [JE_COMPILABLE([yield instruction], [],
-                    [[__asm__ volatile("yield"); return 0;]],
-                    [je_cv_yield])])
-	if test "x${je_cv_yield}" = "xyes" ; then
-	CPU_SPINWAIT='__asm__ volatile("yield")'
+	dnl isb is a better equivalent to the pause instruction on x86.
+	AC_CACHE_VAL([je_cv_isb],
+	  [JE_COMPILABLE([isb instruction], [],
+			[[__asm__ volatile("isb"); return 0;]],
+			[je_cv_isb])])
+	if test "x${je_cv_isb}" = "xyes" ; then
+	    CPU_SPINWAIT='__asm__ volatile("isb")'
 	fi
-    ;;
+	;;
   *)
 	HAVE_CPU_SPINWAIT=0
 	;;

From c9946fa7e679f9e9b739be83aff1b6a85cf8d78c Mon Sep 17 00:00:00 2001
From: Craig Leres <leres@xse.com>
Date: Tue, 4 Jan 2022 17:29:31 -0800
Subject: [PATCH 2172/2608] FreeBSD also needs the OS-X "don't declare system
 functions as nothrow" fix since it also has jemalloc in the base system

---
 include/jemalloc/jemalloc_macros.h.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in
index 5bb5c755..ebb3137e 100644
--- a/include/jemalloc/jemalloc_macros.h.in
+++ b/include/jemalloc/jemalloc_macros.h.in
@@ -142,7 +142,7 @@
 #  define JEMALLOC_COLD
 #endif
 
-#if defined(__APPLE__) && !defined(JEMALLOC_NO_RENAME)
+#if (defined(__APPLE__) || defined(__FreeBSD__)) && !defined(JEMALLOC_NO_RENAME)
 #  define JEMALLOC_SYS_NOTHROW
 #else
 #  define JEMALLOC_SYS_NOTHROW JEMALLOC_NOTHROW

From d66162e032190d74a2071e93049751744975ce55 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 7 Jan 2022 11:41:24 -0800
Subject: [PATCH 2173/2608] Fix the extent state checking on the merge error
 path.

With DSS as primary, the default merge impl will (correctly) decline to merge
when one of the extent is non-dss.  The error path should tolerate the
not-merged extent being in a merging state.
---
 src/extent.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index 1c6fa1fc..cf3d1f31 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -245,11 +245,10 @@ extents_abandon_vm(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 }
 
 static void
-extent_deactivate_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
+extent_deactivate_locked_impl(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
     edata_t *edata) {
 	malloc_mutex_assert_owner(tsdn, &ecache->mtx);
 	assert(edata_arena_ind_get(edata) == ecache_ind_get(ecache));
-	assert(edata_state_get(edata) == extent_state_active);
 
 	emap_update_edata_state(tsdn, pac->emap, edata, ecache->state);
 	eset_t *eset = edata_guarded_get(edata) ? &ecache->guarded_eset :
@@ -257,6 +256,20 @@ extent_deactivate_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
 	eset_insert(eset, edata);
 }
 
+static void
+extent_deactivate_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
+    edata_t *edata) {
+	assert(edata_state_get(edata) == extent_state_active);
+	extent_deactivate_locked_impl(tsdn, pac, ecache, edata);
+}
+
+static void
+extent_deactivate_check_state_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
+    edata_t *edata, extent_state_t expected_state) {
+	assert(edata_state_get(edata) == expected_state);
+	extent_deactivate_locked_impl(tsdn, pac, ecache, edata);
+}
+
 static void
 extent_activate_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache, eset_t *eset,
     edata_t *edata) {
@@ -796,7 +809,8 @@ extent_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 	    forward ? inner : outer, forward ? outer : inner,
 	    /* holding_core_locks */ true);
 	if (err) {
-		extent_deactivate_locked(tsdn, pac, ecache, outer);
+		extent_deactivate_check_state_locked(tsdn, pac, ecache, outer,
+		    extent_state_merging);
 	}
 
 	return err;

From 648b3b9f768674934c2bbf260bdc75301a63a314 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 7 Jan 2022 17:11:18 -0800
Subject: [PATCH 2174/2608] Lower the num_threads in the stress test of
 test/unit/prof_recent

This takes a fair amount of resources.  Under high concurrency it was causing
resource exhaustion such as pthread_create and mmap failures.
---
 test/unit/prof_recent.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index c23b01ec..4fb37236 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -553,7 +553,7 @@ TEST_END
 #undef DUMP_ERROR
 #undef DUMP_OUT_SIZE
 
-#define N_THREADS 16
+#define N_THREADS 8
 #define N_PTRS 512
 #define N_CTLS 8
 #define N_ITERS 2048

From ddb170b1d92d90ecee9ce87545086da9b34839aa Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 10 Jan 2022 13:34:07 -0800
Subject: [PATCH 2175/2608] Simplify arena_migrate() to take arena_t* instead
 of indices.

This makes debugging slightly easier and avoids the confusion of "should we
create new arenas" here.
---
 include/jemalloc/internal/jemalloc_internal_externs.h   | 2 +-
 include/jemalloc/internal/jemalloc_internal_inlines_b.h | 2 +-
 src/ctl.c                                               | 2 +-
 src/jemalloc.c                                          | 7 +++----
 4 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index fa1fabeb..fc834c67 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -62,7 +62,7 @@ void arena_set(unsigned ind, arena_t *arena);
 unsigned narenas_total_get(void);
 arena_t *arena_init(tsdn_t *tsdn, unsigned ind, const arena_config_t *config);
 arena_t *arena_choose_hard(tsd_t *tsd, bool internal);
-void arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind);
+void arena_migrate(tsd_t *tsd, arena_t *oldarena, arena_t *newarena);
 void iarena_cleanup(tsd_t *tsd);
 void arena_cleanup(tsd_t *tsd);
 size_t batch_alloc(void **ptrs, size_t num, size_t size, int flags);
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index 35d71d0a..152f8a03 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -16,7 +16,7 @@ percpu_arena_update(tsd_t *tsd, unsigned cpu) {
 		assert(newarena != NULL);
 
 		/* Set new arena/tcache associations. */
-		arena_migrate(tsd, oldind, newind);
+		arena_migrate(tsd, oldarena, newarena);
 		tcache_t *tcache = tcache_get(tsd);
 		if (tcache != NULL) {
 			tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd);
diff --git a/src/ctl.c b/src/ctl.c
index 5a925129..6e0088f6 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2259,7 +2259,7 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 			goto label_return;
 		}
 		/* Set new arena/tcache associations. */
-		arena_migrate(tsd, oldind, newind);
+		arena_migrate(tsd, oldarena, newarena);
 		if (tcache_available(tsd)) {
 			tcache_arena_reassociate(tsd_tsdn(tsd),
 			    tsd_tcache_slowp_get(tsd), tsd_tcachep_get(tsd),
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 2ffb9f03..17a27ae0 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -464,11 +464,10 @@ arena_bind(tsd_t *tsd, unsigned ind, bool internal) {
 }
 
 void
-arena_migrate(tsd_t *tsd, unsigned oldind, unsigned newind) {
-	arena_t *oldarena, *newarena;
+arena_migrate(tsd_t *tsd, arena_t *oldarena, arena_t *newarena) {
+	assert(oldarena != NULL);
+	assert(newarena != NULL);
 
-	oldarena = arena_get(tsd_tsdn(tsd), oldind, false);
-	newarena = arena_get(tsd_tsdn(tsd), newind, false);
 	arena_nthreads_dec(oldarena, false);
 	arena_nthreads_inc(newarena, false);
 	tsd_arena_set(tsd, newarena);

From 8b49eb132eae6fd3de081addb06d967470bfa2aa Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 11 Jan 2022 15:02:44 -0800
Subject: [PATCH 2176/2608] Fix the HELP_STRING of --enable-doc.

---
 configure.ac | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 3303badf..ac916c75 100644
--- a/configure.ac
+++ b/configure.ac
@@ -977,7 +977,7 @@ AC_PATH_PROG([AUTOCONF], [autoconf], [false], [$PATH])
 
 dnl Enable documentation
 AC_ARG_ENABLE([doc],
-	      [AS_HELP_STRING([--enable-documentation], [Build documentation])],
+	      [AS_HELP_STRING([--enable-doc], [Build documentation])],
 if test "x$enable_doc" = "xno" ; then
   enable_doc="0"
 else

From 011449f17bdddd4c9e0510b27a3fb34e88d072ca Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 11 Jan 2022 14:54:33 -0800
Subject: [PATCH 2177/2608] Fix doc build with install-suffix.

---
 Makefile.in | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index 7a820fe7..80f3b950 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -177,7 +177,6 @@ else
 LJEMALLOC := $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB)
 endif
 PC := $(objroot)jemalloc.pc
-MAN3 := $(objroot)doc/jemalloc$(install_suffix).3
 DOCS_XML := $(objroot)doc/jemalloc$(install_suffix).xml
 DOCS_HTML := $(DOCS_XML:$(objroot)%.xml=$(objroot)%.html)
 DOCS_MAN3 := $(DOCS_XML:$(objroot)%.xml=$(objroot)%.3)
@@ -378,7 +377,7 @@ all: build_lib
 
 dist: build_doc
 
-$(objroot)doc/%.html : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/html.xsl
+$(objroot)doc/%$(install_suffix).html : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/html.xsl
 ifneq ($(XSLROOT),)
 	$(XSLTPROC) -o $@ $(objroot)doc/html.xsl $<
 else
@@ -388,9 +387,16 @@ endif
 	@echo "Missing xsltproc.  "$@" not (re)built."
 endif
 
-$(objroot)doc/%.3 : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/manpages.xsl
+$(objroot)doc/%$(install_suffix).3 : $(objroot)doc/%.xml $(srcroot)doc/stylesheet.xsl $(objroot)doc/manpages.xsl
 ifneq ($(XSLROOT),)
 	$(XSLTPROC) -o $@ $(objroot)doc/manpages.xsl $<
+# The -o option (output filename) of xsltproc may not work (it uses the
+# <refname> in the .xml file).  Manually add the suffix if so.
+  ifneq ($(install_suffix),)
+	@if [ -f $(objroot)doc/jemalloc.3 ]; then \
+		mv $(objroot)doc/jemalloc.3 $(objroot)doc/jemalloc$(install_suffix).3 ; \
+	fi
+  endif
 else
 ifeq ($(wildcard $(DOCS_MAN3)),)
 	@echo "Missing xsltproc.  Doc not built." > $@

From eb196815d670f0937d2117ff0f2b885bd23c80de Mon Sep 17 00:00:00 2001
From: Charles <a837940593@gmail.com>
Date: Mon, 17 Jan 2022 23:18:54 +0800
Subject: [PATCH 2178/2608] Avoid calculating size of size class twice & delete
 sc_data_global.

---
 src/sc.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/src/sc.c b/src/sc.c
index 37683ff4..9a0f76d5 100644
--- a/src/sc.c
+++ b/src/sc.c
@@ -13,8 +13,6 @@
  * at least the damage is compartmentalized to this file.
  */
 
-sc_data_t sc_data_global;
-
 static size_t
 reg_size_compute(int lg_base, int lg_delta, int ndelta) {
 	return (ZU(1) << lg_base) + (ZU(ndelta) << lg_delta);
@@ -64,9 +62,8 @@ size_class(
 	sc->lg_base = lg_base;
 	sc->lg_delta = lg_delta;
 	sc->ndelta = ndelta;
-	sc->psz = (reg_size_compute(lg_base, lg_delta, ndelta)
-	    % (ZU(1) << lg_page) == 0);
-	size_t size = (ZU(1) << lg_base) + (ZU(ndelta) << lg_delta);
+	size_t size = reg_size_compute(lg_base, lg_delta, ndelta);
+	sc->psz = (size % (ZU(1) << lg_page) == 0);
 	if (index == 0) {
 		assert(!sc->psz);
 	}

From f15d8f3b416f6812ac030bc1a7aacf05927a4d7f Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Sat, 15 Jan 2022 13:51:33 -0800
Subject: [PATCH 2179/2608] Echo installed files via verbose 'install' command

It's not necessary to manually echo all install commands, similar effect
is achieved via 'install -v'
---
 Makefile.in | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index 80f3b950..f77ee7c7 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -554,20 +554,18 @@ endif
 install_bin:
 	$(INSTALL) -d $(BINDIR)
 	@for b in $(BINS); do \
-	echo "$(INSTALL) -m 755 $$b $(BINDIR)"; \
-	$(INSTALL) -m 755 $$b $(BINDIR); \
+	$(INSTALL) -v -m 755 $$b $(BINDIR); \
 done
 
 install_include:
 	$(INSTALL) -d $(INCLUDEDIR)/jemalloc
 	@for h in $(C_HDRS); do \
-	echo "$(INSTALL) -m 644 $$h $(INCLUDEDIR)/jemalloc"; \
-	$(INSTALL) -m 644 $$h $(INCLUDEDIR)/jemalloc; \
+	$(INSTALL) -v -m 644 $$h $(INCLUDEDIR)/jemalloc; \
 done
 
 install_lib_shared: $(DSOS)
 	$(INSTALL) -d $(LIBDIR)
-	$(INSTALL) -m 755 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) $(LIBDIR)
+	$(INSTALL) -v -m 755 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) $(LIBDIR)
 ifneq ($(SOREV),$(SO))
 	ln -sf $(LIBJEMALLOC).$(SOREV) $(LIBDIR)/$(LIBJEMALLOC).$(SO)
 endif
@@ -575,15 +573,13 @@ endif
 install_lib_static: $(STATIC_LIBS)
 	$(INSTALL) -d $(LIBDIR)
 	@for l in $(STATIC_LIBS); do \
-	echo "$(INSTALL) -m 755 $$l $(LIBDIR)"; \
-	$(INSTALL) -m 755 $$l $(LIBDIR); \
+	$(INSTALL) -v -m 755 $$l $(LIBDIR); \
 done
 
 install_lib_pc: $(PC)
 	$(INSTALL) -d $(LIBDIR)/pkgconfig
 	@for l in $(PC); do \
-	echo "$(INSTALL) -m 644 $$l $(LIBDIR)/pkgconfig"; \
-	$(INSTALL) -m 644 $$l $(LIBDIR)/pkgconfig; \
+	$(INSTALL) -v -m 644 $$l $(LIBDIR)/pkgconfig; \
 done
 
 ifeq ($(enable_shared), 1)
@@ -597,15 +593,13 @@ install_lib: install_lib_pc
 install_doc_html: build_doc_html
 	$(INSTALL) -d $(DATADIR)/doc/jemalloc$(install_suffix)
 	@for d in $(DOCS_HTML); do \
-	echo "$(INSTALL) -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix)"; \
-	$(INSTALL) -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix); \
+	$(INSTALL) -v -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix); \
 done
 
 install_doc_man: build_doc_man
 	$(INSTALL) -d $(MANDIR)/man3
 	@for d in $(DOCS_MAN3); do \
-	echo "$(INSTALL) -m 644 $$d $(MANDIR)/man3"; \
-	$(INSTALL) -m 644 $$d $(MANDIR)/man3; \
+	$(INSTALL) -v -m 644 $$d $(MANDIR)/man3; \
 done
 
 install_doc: install_doc_html install_doc_man

From 640c3c72e661ec0b3f20865ee4fd4363644c017a Mon Sep 17 00:00:00 2001
From: Shuduo Sang <sdsang@taosdata.com>
Date: Wed, 2 Jun 2021 12:50:46 +0800
Subject: [PATCH 2180/2608] Add support for 'make uninstall'

---
 Makefile.in | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/Makefile.in b/Makefile.in
index f77ee7c7..8e16982e 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -610,6 +610,48 @@ ifeq ($(enable_doc), 1)
 install: install_doc
 endif
 
+uninstall_bin:
+	$(RM) -v $(foreach b,$(notdir $(BINS)),$(BINDIR)/$(b))
+
+uninstall_include:
+	$(RM) -v $(foreach h,$(notdir $(C_HDRS)),$(INCLUDEDIR)/jemalloc/$(h))
+	rmdir -v $(INCLUDEDIR)/jemalloc
+
+uninstall_lib_shared:
+	$(RM) -v $(LIBDIR)/$(LIBJEMALLOC).$(SOREV)
+ifneq ($(SOREV),$(SO))
+	$(RM) -v $(LIBDIR)/$(LIBJEMALLOC).$(SO)
+endif
+
+uninstall_lib_static:
+	$(RM) -v $(foreach l,$(notdir $(STATIC_LIBS)),$(LIBDIR)/$(l))
+
+uninstall_lib_pc:
+	$(RM) -v $(foreach p,$(notdir $(PC)),$(LIBDIR)/pkgconfig/$(p))
+
+ifeq ($(enable_shared), 1)
+uninstall_lib: uninstall_lib_shared
+endif
+ifeq ($(enable_static), 1)
+uninstall_lib: uninstall_lib_static
+endif
+uninstall_lib: uninstall_lib_pc
+
+uninstall_doc_html:
+	$(RM) -v $(foreach d,$(notdir $(DOCS_HTML)),$(DATADIR)/doc/jemalloc$(install_suffix)/$(d))
+	rmdir -v $(DATADIR)/doc/jemalloc$(install_suffix)
+
+uninstall_doc_man:
+	$(RM) -v $(foreach d,$(notdir $(DOCS_MAN3)),$(MANDIR)/man3/$(d))
+
+uninstall_doc: uninstall_doc_html uninstall_doc_man
+
+uninstall: uninstall_bin uninstall_include uninstall_lib
+
+ifeq ($(enable_doc), 1)
+uninstall: uninstall_doc
+endif
+
 tests_unit: $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%$(EXE))
 tests_integration: $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%$(EXE)) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%$(EXE))
 tests_analyze: $(TESTS_ANALYZE:$(srcroot)%.c=$(objroot)%$(EXE))

From 36a09ba2c712612675f182fe879514a6078f5c77 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Tue, 18 Jan 2022 14:08:01 -0800
Subject: [PATCH 2181/2608] Forbid spaces in install suffix

To avoid potential issues with removing unintended files after 'make
uninstall', spaces are no longer allowed in install suffix. It's worth
mentioning, that with GNU Make on Linux spaces in install suffix didn't
work anyway, leading to errors in the Makefile. But being verbose
about this restriction makes it more transparent for the developers.
---
 configure.ac | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index ac916c75..6a5d082a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1109,7 +1109,10 @@ AC_SUBST([private_namespace])
 dnl Do not add suffix to installed files by default.
 AC_ARG_WITH([install_suffix],
   [AS_HELP_STRING([--with-install-suffix=<suffix>], [Suffix to append to all installed files])],
-  [INSTALL_SUFFIX="$with_install_suffix"],
+  [case "$with_install_suffix" in
+   *\ * ) AC_MSG_ERROR([Install suffix should not contain spaces]) ;;
+   * ) INSTALL_SUFFIX="$with_install_suffix" ;;
+esac],
   [INSTALL_SUFFIX=]
 )
 install_suffix="$INSTALL_SUFFIX"

From eafd2ac39fc4b608fc24b755670ff5138b9173ee Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Tue, 18 Jan 2022 17:20:57 -0800
Subject: [PATCH 2182/2608] Forbid spaces in prefix and exec_prefix

Spaces in these are also not handled correctly by Make, so there's sense
in not allowing that.
---
 configure.ac | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/configure.ac b/configure.ac
index 6a5d082a..0661005d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -131,12 +131,14 @@ abs_objroot="`pwd`/"
 AC_SUBST([abs_objroot])
 
 dnl Munge install path variables.
-if test "x$prefix" = "xNONE" ; then
-  prefix="/usr/local"
-fi
-if test "x$exec_prefix" = "xNONE" ; then
-  exec_prefix=$prefix
-fi
+case "$prefix" in
+   *\ * ) AC_MSG_ERROR([Prefix should not contain spaces]) ;;
+   "NONE" ) prefix="/usr/local" ;;
+esac
+case "$exec_prefix" in
+   *\ * ) AC_MSG_ERROR([Exec prefix should not contain spaces]) ;;
+   "NONE" ) exec_prefix=$prefix ;;
+esac
 PREFIX=$prefix
 AC_SUBST([PREFIX])
 BINDIR=`eval echo $bindir`

From b798fabdf7c86288f303b1e0bcf877c9ded67c18 Mon Sep 17 00:00:00 2001
From: yunxu <yunxu@yunxu.xyz>
Date: Wed, 12 Jan 2022 18:46:34 +0800
Subject: [PATCH 2183/2608] Add prof_leak_error option

The option makes the process to exit with error code 1 if a memory leak
is detected. This is useful for implementing automated tools that rely
on leak detection.
---
 doc/jemalloc.xml.in                      | 19 +++++++++++++++++++
 include/jemalloc/internal/prof_externs.h |  1 +
 src/ctl.c                                |  3 +++
 src/jemalloc.c                           | 20 ++++++++++++++++++++
 src/prof.c                               |  1 +
 src/prof_data.c                          | 10 ++++++++++
 src/stats.c                              |  1 +
 test/unit/mallctl.c                      |  1 +
 8 files changed, 56 insertions(+)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index cba0b3f6..6e2099ad 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1553,6 +1553,25 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         </para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.prof_leak_error">
+        <term>
+          <mallctl>opt.prof_leak_error</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+          [<option>--enable-prof</option>]
+        </term>
+        <listitem><para>Similar to <link linkend="opt.prof_leak"><mallctl>
+        opt.prof_leak</mallctl></link>, but makes the process exit with error
+        code 1 if a memory leak is detected.  This option supersedes
+        <link linkend="opt.prof_leak"><mallctl>opt.prof_leak</mallctl></link>,
+        meaning that if both are specified, this option takes precedence.  When
+        enabled, also enables <link linkend="opt.prof_leak"><mallctl>
+        opt.prof_leak</mallctl></link>.  Works only when combined with
+        <link linkend="opt.prof_final"><mallctl>opt.prof_final</mallctl></link>,
+        otherwise does nothing.  This option is disabled by default.
+        </para></listitem>
+      </varlistentry>
+
       <varlistentry id="opt.zero_realloc">
         <term>
           <mallctl>opt.zero_realloc</mallctl>
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 953192f4..bdff1349 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -12,6 +12,7 @@ extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */
 extern bool opt_prof_gdump;          /* High-water memory dumping. */
 extern bool opt_prof_final;          /* Final profile dumping. */
 extern bool opt_prof_leak;           /* Dump leak summary at exit. */
+extern bool opt_prof_leak_error;     /* Exit with error code if memory leaked */
 extern bool opt_prof_accum;          /* Report cumulative bytes. */
 extern bool opt_prof_log;            /* Turn logging on at boot. */
 extern char opt_prof_prefix[
diff --git a/src/ctl.c b/src/ctl.c
index 6e0088f6..54d33aed 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -145,6 +145,7 @@ CTL_PROTO(opt_lg_prof_interval)
 CTL_PROTO(opt_prof_gdump)
 CTL_PROTO(opt_prof_final)
 CTL_PROTO(opt_prof_leak)
+CTL_PROTO(opt_prof_leak_error)
 CTL_PROTO(opt_prof_accum)
 CTL_PROTO(opt_prof_recent_alloc_max)
 CTL_PROTO(opt_prof_stats)
@@ -469,6 +470,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("prof_gdump"),	CTL(opt_prof_gdump)},
 	{NAME("prof_final"),	CTL(opt_prof_final)},
 	{NAME("prof_leak"),	CTL(opt_prof_leak)},
+	{NAME("prof_leak_error"),	CTL(opt_prof_leak_error)},
 	{NAME("prof_accum"),	CTL(opt_prof_accum)},
 	{NAME("prof_recent_alloc_max"),	CTL(opt_prof_recent_alloc_max)},
 	{NAME("prof_stats"),	CTL(opt_prof_stats)},
@@ -2201,6 +2203,7 @@ CTL_RO_NL_CGEN(config_prof, opt_lg_prof_interval, opt_lg_prof_interval, ssize_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof_gdump, opt_prof_gdump, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_final, opt_prof_final, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool)
+CTL_RO_NL_CGEN(config_prof, opt_prof_leak_error, opt_prof_leak_error, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_recent_alloc_max,
     opt_prof_recent_alloc_max, ssize_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof_stats, opt_prof_stats, bool)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 17a27ae0..117a005c 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1578,6 +1578,26 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				CONF_HANDLE_BOOL(opt_prof_gdump, "prof_gdump")
 				CONF_HANDLE_BOOL(opt_prof_final, "prof_final")
 				CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak")
+				if (CONF_MATCH("prof_leak_error")) {
+					if (CONF_MATCH_VALUE("true")) {
+						if (!opt_prof_final) {
+							CONF_ERROR(
+							    "prof_leak_error is"
+							    " not allowed"
+							    " without"
+							    " prof_leak_final",
+							    k, klen, v, vlen);
+						} else {
+							opt_prof_leak = true;
+							opt_prof_leak_error =
+							    true;
+                                                }
+                                        } else if (!CONF_MATCH_VALUE("false")) {
+						CONF_ERROR("Invalid conf value",
+						    k, klen, v, vlen);
+					}
+					CONF_CONTINUE;
+				}
 				CONF_HANDLE_BOOL(opt_prof_log, "prof_log")
 				CONF_HANDLE_SSIZE_T(opt_prof_recent_alloc_max,
 				    "prof_recent_alloc_max", -1, SSIZE_MAX)
diff --git a/src/prof.c b/src/prof.c
index f708d108..cbfc7409 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -31,6 +31,7 @@ ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
 bool opt_prof_gdump = false;
 bool opt_prof_final = false;
 bool opt_prof_leak = false;
+bool opt_prof_leak_error = false;
 bool opt_prof_accum = false;
 char opt_prof_prefix[PROF_DUMP_FILENAME_LEN];
 bool opt_prof_sys_thread_name = false;
diff --git a/src/prof_data.c b/src/prof_data.c
index 3ef0100d..bfa55be1 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -1037,6 +1037,16 @@ prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx) {
 		    1) ? "s" : "", leak_ngctx, (leak_ngctx != 1) ? "s" : "");
 		malloc_printf(
 		    "<jemalloc>: Run jeprof on dump output for leak detail\n");
+		if (opt_prof_leak_error) {
+			malloc_printf(
+			    "<jemalloc>: Exiting with error code because memory"
+			    " leaks were detected\n");
+			/*
+			 * Use _exit() with underscore to avoid calling atexit()
+			 * and entering endless cycle.
+			 */
+			_exit(1);
+		}
 	}
 #endif
 }
diff --git a/src/stats.c b/src/stats.c
index bed585b1..efc70fd3 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1530,6 +1530,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_BOOL("prof_gdump")
 	OPT_WRITE_BOOL("prof_final")
 	OPT_WRITE_BOOL("prof_leak")
+	OPT_WRITE_BOOL("prof_leak_error")
 	OPT_WRITE_BOOL("stats_print")
 	OPT_WRITE_CHAR_P("stats_print_opts")
 	OPT_WRITE_BOOL("stats_print")
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index bd5ef9e5..6efc8f1b 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -320,6 +320,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, prof_gdump, prof);
 	TEST_MALLCTL_OPT(bool, prof_final, prof);
 	TEST_MALLCTL_OPT(bool, prof_leak, prof);
+	TEST_MALLCTL_OPT(bool, prof_leak_error, prof);
 	TEST_MALLCTL_OPT(ssize_t, prof_recent_alloc_max, prof);
 	TEST_MALLCTL_OPT(bool, prof_stats, prof);
 	TEST_MALLCTL_OPT(bool, prof_sys_thread_name, prof);

From 01a293fc08ba8b6df1824ffecd10d2be5879b980 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Tue, 21 Dec 2021 16:15:14 -0800
Subject: [PATCH 2184/2608] Add Windows to TravisCI

Implement the generation of Travis jobs for Windows. Currently, the
generated jobs replicate Appveyor setup and complete successfully. There
is support for MinGW GCC and MSVC compilers as well as 64 and 32 bit
compilation. Linux and MacOS jobs behave identically, but some
environment variables change - CROSS_COMPILE_32BIT=yes is added for
builds with cross compilation, empty COMPILER_FLAGS are not set anymore.
---
 .travis.yml                       | 273 ++++++++++++++++--------------
 scripts/gen_travis.py             | 151 +++++++++++------
 scripts/linux/before_install.sh   |  13 ++
 scripts/windows/before_install.sh |  83 +++++++++
 scripts/windows/before_script.sh  |  20 +++
 scripts/windows/script.sh         |  10 ++
 6 files changed, 375 insertions(+), 175 deletions(-)
 create mode 100644 scripts/linux/before_install.sh
 create mode 100644 scripts/windows/before_install.sh
 create mode 100644 scripts/windows/before_script.sh
 create mode 100644 scripts/windows/script.sh

diff --git a/.travis.yml b/.travis.yml
index ecc13f4f..97444250 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,322 +1,333 @@
-
 # This config file is generated by ./scripts/gen_travis.py.
 # Do not edit by hand.
 
-language: generic
+# We use 'minimal', because 'generic' makes Windows VMs hang at startup. Also
+# the software provided by 'generic' is simply not needed for our tests.
+# Differences are explained here:
+# https://docs.travis-ci.com/user/languages/minimal-and-generic/
+language: minimal
 dist: focal
 
 jobs:
   include:
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      addons: &gcc_multilib
-        apt:
-          packages:
-            - gcc-multilib
-            - g++-multilib
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      addons: *gcc_multilib
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      addons: *gcc_multilib
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      addons: *gcc_multilib
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      addons: *gcc_multilib
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      addons: *gcc_multilib
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      addons: *gcc_multilib
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      addons: *gcc_multilib
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      addons: *gcc_multilib
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      addons: *gcc_multilib
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      addons: *gcc_multilib
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      addons: *gcc_multilib
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: ppc64le
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+      env: CC=gcc CXX=g++ EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ COMPILER_FLAGS="" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+    - os: windows
+      arch: amd64
+      env: CC=gcc CXX=g++ EXTRA_CFLAGS="-fcommon"
+    - os: windows
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-fcommon"
+    - os: windows
+      arch: amd64
+      env: CC=cl.exe CXX=cl.exe
+    - os: windows
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes EXTRA_CFLAGS="-fcommon"
+    - os: windows
+      arch: amd64
+      env: CC=cl.exe CXX=cl.exe CONFIGURE_FLAGS="--enable-debug"
+    - os: windows
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-fcommon"
+    - os: windows
+      arch: amd64
+      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes
+    - os: windows
+      arch: amd64
+      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug"
     # Development build
     - os: linux
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -325,14 +336,30 @@ jobs:
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-experimental-smallocx --enable-stats --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
 
 
+before_install:
+  - |-
+    if test -f "./scripts/$TRAVIS_OS_NAME/before_install.sh"; then
+      source ./scripts/$TRAVIS_OS_NAME/before_install.sh
+    fi
+
 before_script:
-  - autoconf
-  - scripts/gen_travis.py > travis_script && diff .travis.yml travis_script
-  # If COMPILER_FLAGS are not empty, add them to CC and CXX
-  - ./configure ${COMPILER_FLAGS:+ CC="$CC $COMPILER_FLAGS" CXX="$CXX $COMPILER_FLAGS"} $CONFIGURE_FLAGS
-  - make -j3
-  - make -j3 tests
+  - |-
+    if test -f "./scripts/$TRAVIS_OS_NAME/before_script.sh"; then
+      source ./scripts/$TRAVIS_OS_NAME/before_script.sh
+    else
+      scripts/gen_travis.py > travis_script && diff .travis.yml travis_script
+      autoconf
+      # If COMPILER_FLAGS are not empty, add them to CC and CXX
+      ./configure ${COMPILER_FLAGS:+ CC="$CC $COMPILER_FLAGS" CXX="$CXX $COMPILER_FLAGS"} $CONFIGURE_FLAGS
+      make -j3
+      make -j3 tests
+    fi
 
 script:
-  - make check
+  - |-
+    if test -f "./scripts/$TRAVIS_OS_NAME/script.sh"; then
+      source ./scripts/$TRAVIS_OS_NAME/script.sh
+    else
+      make check
+    fi
 
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index e98ebeb6..63e00549 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -6,6 +6,7 @@ from enum import Enum, auto
 
 LINUX = 'linux'
 OSX = 'osx'
+WINDOWS = 'windows'
 
 
 AMD64 = 'amd64'
@@ -13,28 +14,48 @@ ARM64 = 'arm64'
 PPC64LE = 'ppc64le'
 
 
-TRAVIS_TEMPLATE = """
+TRAVIS_TEMPLATE = """\
 # This config file is generated by ./scripts/gen_travis.py.
 # Do not edit by hand.
 
-language: generic
+# We use 'minimal', because 'generic' makes Windows VMs hang at startup. Also
+# the software provided by 'generic' is simply not needed for our tests.
+# Differences are explained here:
+# https://docs.travis-ci.com/user/languages/minimal-and-generic/
+language: minimal
 dist: focal
 
 jobs:
   include:
 {jobs}
 
+before_install:
+  - |-
+    if test -f "./scripts/$TRAVIS_OS_NAME/before_install.sh"; then
+      source ./scripts/$TRAVIS_OS_NAME/before_install.sh
+    fi
+
 before_script:
-  - autoconf
-  - scripts/gen_travis.py > travis_script && diff .travis.yml travis_script
-  # If COMPILER_FLAGS are not empty, add them to CC and CXX
-  - ./configure ${{COMPILER_FLAGS:+ CC="$CC $COMPILER_FLAGS" \
+  - |-
+    if test -f "./scripts/$TRAVIS_OS_NAME/before_script.sh"; then
+      source ./scripts/$TRAVIS_OS_NAME/before_script.sh
+    else
+      scripts/gen_travis.py > travis_script && diff .travis.yml travis_script
+      autoconf
+      # If COMPILER_FLAGS are not empty, add them to CC and CXX
+      ./configure ${{COMPILER_FLAGS:+ CC="$CC $COMPILER_FLAGS" \
 CXX="$CXX $COMPILER_FLAGS"}} $CONFIGURE_FLAGS
-  - make -j3
-  - make -j3 tests
+      make -j3
+      make -j3 tests
+    fi
 
 script:
-  - make check
+  - |-
+    if test -f "./scripts/$TRAVIS_OS_NAME/script.sh"; then
+      source ./scripts/$TRAVIS_OS_NAME/script.sh
+    else
+      make check
+    fi
 """
 
 
@@ -44,6 +65,7 @@ class Option(object):
         COMPILER_FLAG = auto()
         CONFIGURE_FLAG = auto()
         MALLOC_CONF = auto()
+        FEATURE = auto()
 
     def __init__(self, type, value):
         self.type = type
@@ -65,6 +87,10 @@ class Option(object):
     def as_malloc_conf(value):
         return Option(Option.Type.MALLOC_CONF, value)
 
+    @staticmethod
+    def as_feature(value):
+        return Option(Option.Type.FEATURE, value)
+
     def __eq__(self, obj):
         return (isinstance(obj, Option) and obj.type == self.type
                 and obj.value == self.value)
@@ -81,13 +107,14 @@ MAX_UNUSUAL_OPTIONS = 2
 
 GCC = Option.as_compiler('CC=gcc CXX=g++')
 CLANG = Option.as_compiler('CC=clang CXX=clang++')
+CL = Option.as_compiler('CC=cl.exe CXX=cl.exe')
 
 
-compiler_default = GCC
 compilers_unusual = [CLANG,]
 
 
-compiler_flag_unusuals = [Option.as_compiler_flag(opt) for opt in ('-m32',)]
+CROSS_COMPILE_32BIT = Option.as_feature('CROSS_COMPILE_32BIT')
+feature_unusuals = [CROSS_COMPILE_32BIT]
 
 
 configure_flag_unusuals = [Option.as_configure_flag(opt) for opt in (
@@ -108,73 +135,75 @@ malloc_conf_unusuals = [Option.as_malloc_conf(opt) for opt in (
 )]
 
 
-all_unusuals = (compilers_unusual + compiler_flag_unusuals
+all_unusuals = (compilers_unusual + feature_unusuals
     + configure_flag_unusuals + malloc_conf_unusuals)
 
 
-gcc_multilib_set = False
-
-
 def get_extra_cflags(os, compiler):
+    if os == WINDOWS:
+        # For non-CL compilers under Windows (for now it's only MinGW-GCC),
+        # -fcommon needs to be specified to correctly handle multiple
+        # 'malloc_conf' symbols and such, which are declared weak under Linux.
+        # Weak symbols don't work with MinGW-GCC.
+        if compiler != CL.value:
+            return ['-fcommon']
+        else:
+            return []
+
     # We get some spurious errors when -Warray-bounds is enabled.
     extra_cflags = ['-Werror', '-Wno-array-bounds']
     if compiler == CLANG.value or os == OSX:
         extra_cflags += [
-	    '-Wno-unknown-warning-option',
-	    '-Wno-ignored-attributes'
-	]
+            '-Wno-unknown-warning-option',
+            '-Wno-ignored-attributes'
+        ]
     if os == OSX:
         extra_cflags += [
-	    '-Wno-deprecated-declarations',
-	]
+            '-Wno-deprecated-declarations',
+        ]
     return extra_cflags
 
 
 # Formats a job from a combination of flags
 def format_job(os, arch, combination):
-    global gcc_multilib_set
-
-    compiler = [x.value for x in combination if x.type == Option.Type.COMPILER]
-    assert(len(compiler) <= 1)
-    if not compiler:
-        compiler = compiler_default.value
-    else:
-        compiler = compiler[0]
+    compilers = [x.value for x in combination if x.type == Option.Type.COMPILER]
+    assert(len(compilers) <= 1)
     compiler_flags = [x.value for x in combination if x.type == Option.Type.COMPILER_FLAG]
     configure_flags = [x.value for x in combination if x.type == Option.Type.CONFIGURE_FLAG]
     malloc_conf = [x.value for x in combination if x.type == Option.Type.MALLOC_CONF]
+    features = [x.value for x in combination if x.type == Option.Type.FEATURE]
 
     if len(malloc_conf) > 0:
         configure_flags.append('--with-malloc-conf=' + ','.join(malloc_conf))
 
-    job = ""
-    job += '    - os: {}\n'.format(os)
-    job += '      arch: {}\n'.format(arch)
+    if not compilers:
+        compiler = GCC.value
+    else:
+        compiler = compilers[0]
 
-    if '-m32' in compiler_flags and os == 'linux':
-        job += '      addons:'
-        if gcc_multilib_set:
-            job += ' *gcc_multilib\n'
-        else:
-            job += ' &gcc_multilib\n'
-            job += '        apt:\n'
-            job += '          packages:\n'
-            job += '            - gcc-multilib\n'
-            job += '            - g++-multilib\n'
-            gcc_multilib_set = True
+    extra_environment_vars = ''
+    cross_compile = CROSS_COMPILE_32BIT.value in features
+    if os == LINUX and cross_compile:
+        compiler_flags.append('-m32')
 
-    env_string = ('{} COMPILER_FLAGS="{}" CONFIGURE_FLAGS="{}" '
-        'EXTRA_CFLAGS="{}"'.format(
+    features_str = ' '.join([' {}=yes'.format(feature) for feature in features])
+
+    stringify = lambda arr, name: ' {}="{}"'.format(name, ' '.join(arr)) if arr else ''
+    env_string = '{}{}{}{}{}{}'.format(
             compiler,
-            ' '.join(compiler_flags),
-            ' '.join(configure_flags),
-            ' '.join(get_extra_cflags(os, compiler))))
+            features_str,
+            stringify(compiler_flags, 'COMPILER_FLAGS'),
+            stringify(configure_flags, 'CONFIGURE_FLAGS'),
+            stringify(get_extra_cflags(os, compiler), 'EXTRA_CFLAGS'),
+            extra_environment_vars)
 
+    job = '    - os: {}\n'.format(os)
+    job += '      arch: {}\n'.format(arch)
     job += '      env: {}'.format(env_string)
     return job
 
 
-def generate_unusual_combinations(max_unusual_opts):
+def generate_unusual_combinations(unusuals, max_unusual_opts):
     """
     Generates different combinations of non-standard compilers, compiler flags,
     configure flags and malloc_conf settings.
@@ -182,20 +211,22 @@ def generate_unusual_combinations(max_unusual_opts):
     @param max_unusual_opts: Limit of unusual options per combination.
     """
     return chain.from_iterable(
-            [combinations(all_unusuals, i) for i in range(max_unusual_opts + 1)])
+            [combinations(unusuals, i) for i in range(max_unusual_opts + 1)])
 
 
 def included(combination, exclude):
     """
     Checks if the combination of options should be included in the Travis
     testing matrix.
+
+    @param exclude: A list of options to be avoided.
     """
     return not any(excluded in combination for excluded in exclude)
 
 
-def generate_jobs(os, arch, exclude, max_unusual_opts):
+def generate_jobs(os, arch, exclude, max_unusual_opts, unusuals=all_unusuals):
     jobs = []
-    for combination in generate_unusual_combinations(max_unusual_opts):
+    for combination in generate_unusual_combinations(unusuals, max_unusual_opts):
         if included(combination, exclude):
             jobs.append(format_job(os, arch, combination))
     return '\n'.join(jobs)
@@ -210,7 +241,7 @@ def generate_linux(arch):
     exclude = []
     if arch == PPC64LE:
         # Avoid 32 bit builds and clang on PowerPC
-        exclude = [Option.as_compiler_flag('-m32')] + compilers_unusual
+        exclude = (CROSS_COMPILE_32BIT, CLANG,)
 
     return generate_jobs(os, arch, exclude, max_unusual_opts)
 
@@ -230,6 +261,19 @@ def generate_macos(arch):
     return generate_jobs(os, arch, exclude, max_unusual_opts)
 
 
+def generate_windows(arch):
+    os = WINDOWS
+
+    max_unusual_opts = 3
+    unusuals = (
+        Option.as_configure_flag('--enable-debug'),
+        CL,
+        CROSS_COMPILE_32BIT,
+    )
+    return generate_jobs(os, arch, (), max_unusual_opts, unusuals)
+
+
+
 def get_manual_jobs():
     return """\
     # Development build
@@ -251,6 +295,9 @@ def main():
         generate_linux(PPC64LE),
 
         generate_macos(AMD64),
+
+        generate_windows(AMD64),
+
         get_manual_jobs()
     ))
 
diff --git a/scripts/linux/before_install.sh b/scripts/linux/before_install.sh
new file mode 100644
index 00000000..67417463
--- /dev/null
+++ b/scripts/linux/before_install.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+set -ev
+
+if [[ "$TRAVIS_OS_NAME" != "linux" ]]; then
+    echo "Incorrect \$TRAVIS_OS_NAME: expected linux, got $TRAVIS_OS_NAME"
+    exit 1
+fi
+
+if [[ "$CROSS_COMPILE_32BIT" == "yes" ]]; then
+    sudo apt-get update
+    sudo apt-get -y install gcc-multilib g++-multilib
+fi
diff --git a/scripts/windows/before_install.sh b/scripts/windows/before_install.sh
new file mode 100644
index 00000000..2740c458
--- /dev/null
+++ b/scripts/windows/before_install.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+
+set -e
+
+# The purpose of this script is to install build dependencies and set
+# $build_env to a function that sets appropriate environment variables,
+# to enable (mingw32|mingw64) environment if we want to compile with gcc, or
+# (mingw32|mingw64) + vcvarsall.bat if we want to compile with cl.exe
+
+if [[ "$TRAVIS_OS_NAME" != "windows" ]]; then
+    echo "Incorrect \$TRAVIS_OS_NAME: expected windows, got $TRAVIS_OS_NAME"
+    exit 1
+fi
+
+[[ ! -f C:/tools/msys64/msys2_shell.cmd ]] && rm -rf C:/tools/msys64
+choco uninstall -y mingw
+choco upgrade --no-progress -y msys2
+
+msys_shell_cmd="cmd //C RefreshEnv.cmd && set MSYS=winsymlinks:nativestrict && C:\\tools\\msys64\\msys2_shell.cmd"
+
+msys2() { $msys_shell_cmd -defterm -no-start -msys2 -c "$*"; }
+mingw32() { $msys_shell_cmd -defterm -no-start -mingw32 -c "$*"; }
+mingw64() { $msys_shell_cmd -defterm -no-start -mingw64 -c "$*"; }
+
+if [[ "$CROSS_COMPILE_32BIT" == "yes" ]]; then
+    mingw=mingw32
+    mingw_gcc_package_arch=i686
+else
+    mingw=mingw64
+    mingw_gcc_package_arch=x86_64
+fi
+
+if [[ "$CC" == *"gcc"* ]]; then
+    $mingw pacman -S --noconfirm --needed \
+        autotools \
+        git \
+        mingw-w64-${mingw_gcc_package_arch}-make \
+	    mingw-w64-${mingw_gcc_package_arch}-gcc \
+	    mingw-w64-${mingw_gcc_package_arch}-binutils
+    build_env=$mingw
+elif [[ "$CC" == *"cl"* ]]; then
+    $mingw pacman -S --noconfirm --needed \
+        autotools \
+	    git \
+	    mingw-w64-${mingw_gcc_package_arch}-make \
+	    mingw-w64-${mingw_gcc_package_arch}-binutils
+
+    # In order to use MSVC compiler (cl.exe), we need to correctly set some environment
+    # variables, namely PATH, INCLUDE, LIB and LIBPATH. The correct values of these
+    # variables are set by a batch script "vcvarsall.bat". The code below generates
+    # a batch script that calls "vcvarsall.bat" and prints the environment variables.
+    #
+    # Then, those environment variables are transformed from cmd to bash format and put
+    # into a script $apply_vsenv. If cl.exe needs to be used from bash, one can
+    # 'source $apply_vsenv' and it will apply the environment variables needed for cl.exe
+    # to be located and function correctly.
+    #
+    # At last, a function "mingw_with_msvc_vars" is generated which forwards user input
+    # into a correct mingw (32 or 64) subshell that automatically performs 'source $apply_vsenv',
+    # making it possible for autotools to discover and use cl.exe.
+    vcvarsall="vcvarsall.tmp.bat"
+    echo "@echo off" > $vcvarsall
+    echo "call \"c:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\\\vcvarsall.bat\" $USE_MSVC" >> $vcvarsall
+    echo "set" >> $vcvarsall
+
+    apply_vsenv="./apply_vsenv.sh"
+    cmd //C $vcvarsall | grep -E "^PATH=" | sed -n -e 's/\(.*\)=\(.*\)/export \1=$PATH:"\2"/g' \
+        -e 's/\([a-zA-Z]\):[\\\/]/\/\1\//g' \
+        -e 's/\\/\//g' \
+        -e 's/;\//:\//gp' > $apply_vsenv
+    cmd //C $vcvarsall | grep -E "^(INCLUDE|LIB|LIBPATH)=" | sed -n -e 's/\(.*\)=\(.*\)/export \1="\2"/gp' >> $apply_vsenv
+
+    cat $apply_vsenv
+    mingw_with_msvc_vars() { $msys_shell_cmd -defterm -no-start -$mingw -c "source $apply_vsenv && ""$*"; }
+    build_env=mingw_with_msvc_vars
+
+    rm -f $vcvarsall
+else
+    echo "Unknown C compiler: $CC"
+    exit 1
+fi
+
+echo "Build environment function: $build_env"
diff --git a/scripts/windows/before_script.sh b/scripts/windows/before_script.sh
new file mode 100644
index 00000000..9d30abab
--- /dev/null
+++ b/scripts/windows/before_script.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+set -e
+
+if [[ "$TRAVIS_OS_NAME" != "windows" ]]; then
+    echo "Incorrect \$TRAVIS_OS_NAME: expected windows, got $TRAVIS_OS_NAME"
+    exit 1
+fi
+
+$build_env autoconf
+$build_env ./configure $CONFIGURE_FLAGS
+# mingw32-make simply means "make", unrelated to mingw32 vs mingw64.
+# Simply disregard the prefix and treat is as "make".
+$build_env mingw32-make -j3
+# At the moment, it's impossible to make tests in parallel,
+# seemingly due to concurrent writes to '.pdb' file. I don't know why
+# that happens, because we explicitly supply '/Fs' to the compiler.
+# Until we figure out how to fix it, we should build tests sequentially
+# on Windows.
+$build_env mingw32-make tests
diff --git a/scripts/windows/script.sh b/scripts/windows/script.sh
new file mode 100644
index 00000000..3a27f70a
--- /dev/null
+++ b/scripts/windows/script.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+set -e
+
+if [[ "$TRAVIS_OS_NAME" != "windows" ]]; then
+    echo "Incorrect \$TRAVIS_OS_NAME: expected windows, got $TRAVIS_OS_NAME"
+    exit 1
+fi
+
+$build_env mingw32-make -k check

From 002f0e939795991f3f30fd0a6b0470094890305f Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Mon, 10 Jan 2022 17:29:17 -0800
Subject: [PATCH 2185/2608] Disable TravisCI jobs generation for Windows

These jobs take about 20 minutes to complete. We don't want to enable
them until we switch to unlimited concurrency plan, otherwise the builds
will take way too long.
---
 .travis.yml           | 24 ------------------------
 scripts/gen_travis.py |  2 +-
 2 files changed, 1 insertion(+), 25 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 97444250..c54cc454 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -304,30 +304,6 @@ jobs:
     - os: osx
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: windows
-      arch: amd64
-      env: CC=gcc CXX=g++ EXTRA_CFLAGS="-fcommon"
-    - os: windows
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-fcommon"
-    - os: windows
-      arch: amd64
-      env: CC=cl.exe CXX=cl.exe
-    - os: windows
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes EXTRA_CFLAGS="-fcommon"
-    - os: windows
-      arch: amd64
-      env: CC=cl.exe CXX=cl.exe CONFIGURE_FLAGS="--enable-debug"
-    - os: windows
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-fcommon"
-    - os: windows
-      arch: amd64
-      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes
-    - os: windows
-      arch: amd64
-      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug"
     # Development build
     - os: linux
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index 63e00549..685bad59 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -296,7 +296,7 @@ def main():
 
         generate_macos(AMD64),
 
-        generate_windows(AMD64),
+        #generate_windows(AMD64),
 
         get_manual_jobs()
     ))

From efc539c040cf11b19ffc8af29a8cc3e5c3609092 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 21 Jan 2022 17:56:12 -0800
Subject: [PATCH 2186/2608] Initialize prof_leak during prof init.

Otherwise, prof_leak may get set after prof_leak_error, and disagree with each
other.
---
 src/jemalloc.c | 2 +-
 src/prof.c     | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 117a005c..85c38dd6 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1585,7 +1585,7 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 							    "prof_leak_error is"
 							    " not allowed"
 							    " without"
-							    " prof_leak_final",
+							    " prof_final",
 							    k, klen, v, vlen);
 						} else {
 							opt_prof_leak = true;
diff --git a/src/prof.c b/src/prof.c
index cbfc7409..7a6d5d56 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -564,6 +564,9 @@ prof_boot1(void) {
 	 * opt_prof must be in its final state before any arenas are
 	 * initialized, so this function must be executed early.
 	 */
+	if (opt_prof_leak_error && !opt_prof_leak) {
+		opt_prof_leak = true;
+	}
 
 	if (opt_prof_leak && !opt_prof) {
 		/*

From 8c59c44ffa83bab0f73d5cc8f7d0bbc8d649220b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 26 Jan 2022 14:05:04 -0800
Subject: [PATCH 2187/2608] Add a dependency checking step at the end of
 malloc_conf_init.

Currently only prof_leak_error and prof_final are checked.
---
 src/jemalloc.c | 39 +++++++++++++++++++--------------------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 85c38dd6..364dc57f 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1578,26 +1578,8 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				CONF_HANDLE_BOOL(opt_prof_gdump, "prof_gdump")
 				CONF_HANDLE_BOOL(opt_prof_final, "prof_final")
 				CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak")
-				if (CONF_MATCH("prof_leak_error")) {
-					if (CONF_MATCH_VALUE("true")) {
-						if (!opt_prof_final) {
-							CONF_ERROR(
-							    "prof_leak_error is"
-							    " not allowed"
-							    " without"
-							    " prof_final",
-							    k, klen, v, vlen);
-						} else {
-							opt_prof_leak = true;
-							opt_prof_leak_error =
-							    true;
-                                                }
-                                        } else if (!CONF_MATCH_VALUE("false")) {
-						CONF_ERROR("Invalid conf value",
-						    k, klen, v, vlen);
-					}
-					CONF_CONTINUE;
-				}
+				CONF_HANDLE_BOOL(opt_prof_leak_error,
+				    "prof_leak_error")
 				CONF_HANDLE_BOOL(opt_prof_log, "prof_log")
 				CONF_HANDLE_SSIZE_T(opt_prof_recent_alloc_max,
 				    "prof_recent_alloc_max", -1, SSIZE_MAX)
@@ -1742,6 +1724,17 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 	atomic_store_b(&log_init_done, true, ATOMIC_RELEASE);
 }
 
+static bool
+malloc_conf_init_check_deps(void) {
+	if (opt_prof_leak_error && !opt_prof_final) {
+		malloc_printf("<jemalloc>: prof_leak_error is set w/o "
+		    "prof_final.\n");
+		return true;
+	}
+
+	return false;
+}
+
 static void
 malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 	const char *opts_cache[MALLOC_CONF_NSOURCES] = {NULL, NULL, NULL, NULL,
@@ -1752,6 +1745,12 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
 	malloc_conf_init_helper(NULL, NULL, true, opts_cache, buf);
 	malloc_conf_init_helper(sc_data, bin_shard_sizes, false, opts_cache,
 	    NULL);
+	if (malloc_conf_init_check_deps()) {
+		/* check_deps does warning msg only; abort below if needed. */
+		if (opt_abort_conf) {
+			malloc_abort_invalid_conf();
+		}
+	}
 }
 
 #undef MALLOC_CONF_NSOURCES

From 20f9802e4f25922884448d9581c66d76cc905c0c Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 26 Jan 2022 18:40:49 -0800
Subject: [PATCH 2188/2608] Avoid overflow warnings in test/unit/safety_check.

---
 test/unit/safety_check.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/test/unit/safety_check.c b/test/unit/safety_check.c
index 516a0969..84726675 100644
--- a/test/unit/safety_check.c
+++ b/test/unit/safety_check.c
@@ -13,6 +13,13 @@ void fake_abort(const char *message) {
 	fake_abort_called = true;
 }
 
+static void
+buffer_overflow_write(char *ptr, size_t size) {
+	/* Avoid overflow warnings. */
+	volatile size_t idx = size;
+	ptr[idx] = 0;
+}
+
 TEST_BEGIN(test_malloc_free_overflow) {
 	test_skip_if(!config_prof);
 	test_skip_if(!config_opt_safety_checks);
@@ -20,7 +27,7 @@ TEST_BEGIN(test_malloc_free_overflow) {
 	safety_check_set_abort(&fake_abort);
 	/* Buffer overflow! */
 	char* ptr = malloc(128);
-	ptr[128] = 0;
+	buffer_overflow_write(ptr, 128);
 	free(ptr);
 	safety_check_set_abort(NULL);
 
@@ -36,7 +43,7 @@ TEST_BEGIN(test_mallocx_dallocx_overflow) {
 	safety_check_set_abort(&fake_abort);
 	/* Buffer overflow! */
 	char* ptr = mallocx(128, 0);
-	ptr[128] = 0;
+	buffer_overflow_write(ptr, 128);
 	dallocx(ptr, 0);
 	safety_check_set_abort(NULL);
 
@@ -52,7 +59,7 @@ TEST_BEGIN(test_malloc_sdallocx_overflow) {
 	safety_check_set_abort(&fake_abort);
 	/* Buffer overflow! */
 	char* ptr = malloc(128);
-	ptr[128] = 0;
+	buffer_overflow_write(ptr, 128);
 	sdallocx(ptr, 128, 0);
 	safety_check_set_abort(NULL);
 
@@ -68,7 +75,7 @@ TEST_BEGIN(test_realloc_overflow) {
 	safety_check_set_abort(&fake_abort);
 	/* Buffer overflow! */
 	char* ptr = malloc(128);
-	ptr[128] = 0;
+	buffer_overflow_write(ptr, 128);
 	ptr = realloc(ptr, 129);
 	safety_check_set_abort(NULL);
 	free(ptr);
@@ -85,7 +92,7 @@ TEST_BEGIN(test_rallocx_overflow) {
 	safety_check_set_abort(&fake_abort);
 	/* Buffer overflow! */
 	char* ptr = malloc(128);
-	ptr[128] = 0;
+	buffer_overflow_write(ptr, 128);
 	ptr = rallocx(ptr, 129, 0);
 	safety_check_set_abort(NULL);
 	free(ptr);
@@ -102,7 +109,7 @@ TEST_BEGIN(test_xallocx_overflow) {
 	safety_check_set_abort(&fake_abort);
 	/* Buffer overflow! */
 	char* ptr = malloc(128);
-	ptr[128] = 0;
+	buffer_overflow_write(ptr, 128);
 	size_t result = xallocx(ptr, 129, 0, 0);
 	expect_zu_eq(result, 128, "");
 	free(ptr);

From a4e81221cceeb887708d53015d3d1f1f9642980a Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Mon, 31 Jan 2022 12:28:15 -0800
Subject: [PATCH 2189/2608] Document 'make uninstall'

Update INSTALL.md, reflecting the addition of 'uninstall' target.
---
 INSTALL.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/INSTALL.md b/INSTALL.md
index 14dacfa6..90da718d 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -12,6 +12,10 @@ that might work is:
     make
     make install
 
+You can uninstall the installed build artifacts like this:
+
+    make uninstall
+
 Notes:
  - "autoconf" needs to be installed
  - Documentation is built by the default target only when xsltproc is

From 063d134aeb4807872f45a3b7e6b43bed8f6320a2 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 14 Feb 2022 17:30:11 -0800
Subject: [PATCH 2190/2608] Properly detect background thread support on
 Darwin.

When cross-compile, the host type / abi should be checked to determine
background thread compatibility.
---
 configure.ac | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 0661005d..abcd91d2 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2394,7 +2394,8 @@ fi
 dnl ============================================================================
 dnl Enable background threads if possible.
 
-if test "x${have_pthread}" = "x1" -a "x${je_cv_os_unfair_lock}" != "xyes" ; then
+if test "x${have_pthread}" = "x1" -a "x${je_cv_os_unfair_lock}" != "xyes" -a \
+       "x${abi}" != "xmacho" ; then
   AC_DEFINE([JEMALLOC_BACKGROUND_THREAD], [ ], [ ])
 fi
 

From ca709c3139f77f4c00a903cdee46d71e9028f6c6 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Mon, 14 Feb 2022 17:57:14 -0800
Subject: [PATCH 2191/2608] Fix failed assertion due to racy memory access

While calculating the number of stashed pointers, multiple variables
potentially modified by a concurrent thread were used for the
calculation.  This led to some inconsistencies, correctly detected by
the assertions.  The change eliminates some possible inconsistencies by
using unmodified variables and only once a concurrently modified one.
The assertions are omitted for the cases where we acknowledge potential
inconsistencies too.
---
 include/jemalloc/internal/cache_bin.h | 75 +++++++++++++++++++++------
 src/cache_bin.c                       |  6 ++-
 2 files changed, 63 insertions(+), 18 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index c98c46ad..caf5be33 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -194,8 +194,15 @@ cache_bin_assert_earlier(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
  * be associated with the position earlier in memory.
  */
 static inline uint16_t
-cache_bin_diff(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
-	cache_bin_assert_earlier(bin, earlier, later);
+cache_bin_diff(cache_bin_t *bin, uint16_t earlier, uint16_t later, bool racy) {
+	/*
+	 * When it's racy, bin->low_bits_full can be modified concurrently. It
+	 * can cross the uint16_t max value and become less than
+	 * bin->low_bits_empty at the time of the check.
+	 */
+	if (!racy) {
+		cache_bin_assert_earlier(bin, earlier, later);
+	}
 	return later - earlier;
 }
 
@@ -207,7 +214,7 @@ cache_bin_diff(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
 static inline cache_bin_sz_t
 cache_bin_ncached_get_internal(cache_bin_t *bin, bool racy) {
 	cache_bin_sz_t diff = cache_bin_diff(bin,
-	    (uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty);
+	    (uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty, racy);
 	cache_bin_sz_t n = diff / sizeof(void *);
 	/*
 	 * We have undefined behavior here; if this function is called from the
@@ -239,11 +246,15 @@ cache_bin_ncached_get_local(cache_bin_t *bin, cache_bin_info_t *info) {
  * Internal.
  *
  * A pointer to the position one past the end of the backing array.
+ *
+ * Do not call if racy, because both 'bin->stack_head' and 'bin->low_bits_full'
+ * are subject to concurrent modifications.
  */
 static inline void **
 cache_bin_empty_position_get(cache_bin_t *bin) {
 	cache_bin_sz_t diff = cache_bin_diff(bin,
-	    (uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty);
+	    (uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty,
+	    /* racy */ false);
 	uintptr_t empty_bits = (uintptr_t)bin->stack_head + diff;
 	void **ret = (void **)empty_bits;
 
@@ -252,6 +263,22 @@ cache_bin_empty_position_get(cache_bin_t *bin) {
 	return ret;
 }
 
+/*
+ * Internal.
+ *
+ * Calculates low bits of the lower bound of the usable cache bin's range (see
+ * cache_bin_t visual representation above).
+ *
+ * No values are concurrently modified, so should be safe to read in a
+ * multithreaded environment. Currently concurrent access happens only during
+ * arena statistics collection.
+ */
+static inline uint16_t
+cache_bin_low_bits_low_bound_get(cache_bin_t *bin, cache_bin_info_t *info) {
+	return (uint16_t)bin->low_bits_empty -
+	    info->ncached_max * sizeof(void *);
+}
+
 /*
  * Internal.
  *
@@ -284,7 +311,7 @@ cache_bin_assert_empty(cache_bin_t *bin, cache_bin_info_t *info) {
 static inline cache_bin_sz_t
 cache_bin_low_water_get_internal(cache_bin_t *bin) {
 	return cache_bin_diff(bin, bin->low_bits_low_water,
-	    bin->low_bits_empty) / sizeof(void *);
+	    bin->low_bits_empty, /* racy */ false) / sizeof(void *);
 }
 
 /* Returns the numeric value of low water in [0, ncached]. */
@@ -427,7 +454,8 @@ cache_bin_stash(cache_bin_t *bin, void *ptr) {
 	/* Stash at the full position, in the [full, head) range. */
 	uint16_t low_bits_head = (uint16_t)(uintptr_t)bin->stack_head;
 	/* Wraparound handled as well. */
-	uint16_t diff = cache_bin_diff(bin, bin->low_bits_full, low_bits_head);
+	uint16_t diff = cache_bin_diff(bin, bin->low_bits_full, low_bits_head,
+	    /* racy */ false);
 	*(void **)((uintptr_t)bin->stack_head - diff) = ptr;
 
 	assert(!cache_bin_full(bin));
@@ -437,31 +465,46 @@ cache_bin_stash(cache_bin_t *bin, void *ptr) {
 	return true;
 }
 
+/*
+ * Get the number of stashed pointers.
+ *
+ * When called from a thread not owning the TLS (i.e. racy = true), it's
+ * important to keep in mind that 'bin->stack_head' and 'bin->low_bits_full' can
+ * be modified concurrently and almost none assertions about their values can be
+ * made.
+ */
 JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
 cache_bin_nstashed_get_internal(cache_bin_t *bin, cache_bin_info_t *info,
     bool racy) {
 	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(info);
-	void **low_bound = cache_bin_low_bound_get(bin, info);
+	uint16_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(bin,
+	    info);
 
-	cache_bin_sz_t n = cache_bin_diff(bin, (uint16_t)(uintptr_t)low_bound,
-	    bin->low_bits_full) / sizeof(void *);
+	cache_bin_sz_t n = cache_bin_diff(bin, low_bits_low_bound,
+	    bin->low_bits_full, racy) / sizeof(void *);
 	assert(n <= ncached_max);
 
-	/* Below are for assertions only. */
-	void *stashed = *(low_bound + n - 1);
-	bool aligned = cache_bin_nonfast_aligned(stashed);
+	if (!racy) {
+		/* Below are for assertions only. */
+		void **low_bound = cache_bin_low_bound_get(bin, info);
+
+		assert((uint16_t)(uintptr_t)low_bound == low_bits_low_bound);
+		void *stashed = *(low_bound + n - 1);
+		bool aligned = cache_bin_nonfast_aligned(stashed);
 #ifdef JEMALLOC_JET
-	/* Allow arbitrary pointers to be stashed in tests. */
-	aligned = true;
+		/* Allow arbitrary pointers to be stashed in tests. */
+		aligned = true;
 #endif
-	assert(n == 0 || (stashed != NULL && aligned) || racy);
+		assert(n == 0 || (stashed != NULL && aligned));
+	}
 
 	return n;
 }
 
 JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
 cache_bin_nstashed_get_local(cache_bin_t *bin, cache_bin_info_t *info) {
-	cache_bin_sz_t n = cache_bin_nstashed_get_internal(bin, info, false);
+	cache_bin_sz_t n = cache_bin_nstashed_get_internal(bin, info,
+	    /* racy */ false);
 	assert(n <= cache_bin_info_ncached_max(info));
 	return n;
 }
diff --git a/src/cache_bin.c b/src/cache_bin.c
index b8d81ef1..9ae072a0 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -83,8 +83,10 @@ cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
 	bin->low_bits_low_water = (uint16_t)(uintptr_t)bin->stack_head;
 	bin->low_bits_full = (uint16_t)(uintptr_t)full_position;
 	bin->low_bits_empty = (uint16_t)(uintptr_t)empty_position;
-	assert(cache_bin_diff(bin, bin->low_bits_full,
-	    (uint16_t)(uintptr_t) bin->stack_head) == bin_stack_size);
+	cache_bin_sz_t free_spots = cache_bin_diff(bin,
+	    bin->low_bits_full, (uint16_t)(uintptr_t)bin->stack_head,
+	    /* racy */ false);
+	assert(free_spots == bin_stack_size);
 	assert(cache_bin_ncached_get_local(bin, info) == 0);
 	assert(cache_bin_empty_position_get(bin) == empty_position);
 

From 78b58379c854a639df79beb3289351129d863d4b Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Tue, 1 Mar 2022 18:31:30 +0300
Subject: [PATCH 2192/2608] Fix possible "nmalloc >= ndalloc" assertion.

It is possible that ndalloc will be updated before nmalloc, in
arena_large_ralloc_stats_update(), fix this by reorder those calls.

It was found by ClickHouse CI, that periodically hits this assertion [1].

  [1]: https://github.com/ClickHouse/ClickHouse/issues/31531

That issue contains lots of examples, with core dump and some gdb output [2].

  [2]: https://s3.amazonaws.com/clickhouse-test-reports/34951/96390a9263cb5af3d6e42a84988239c9ae87ce32/stress_test__debug__actions_.html

Here you can find binaries for that particular report [3] you need
clickhouse debug build [4].

  [3]: https://s3.amazonaws.com/clickhouse-builds/34951/96390a9263cb5af3d6e42a84988239c9ae87ce32/clickhouse_build_check_(actions)/report.html
  [4]: https://s3.amazonaws.com/clickhouse-builds/34951/96390a9263cb5af3d6e42a84988239c9ae87ce32/package_debug/clickhouse

Brief info from that report:

    2 0x000000002ad6dbfe in arena_stats_merge (tsdn=0x7f2399abdd20, arena=0x7f241ce01080, nthreads=0x7f24e4360958, dss=0x7f24e4360960, dirty_decay_ms=0x7f24e4360968, muzzy_decay_ms=0x7f24e4360970, nactive=0x7f24e4360978, ndirty=0x7f24e43
    e4360988, astats=0x7f24e4360998, bstats=0x7f24e4363310, lstats=0x7f24e4364990, estats=0x7f24e4366e50, hpastats=0x7f24e43693a0, secstats=0x7f24e436a020) at ../contrib/jemalloc/src/arena.c:138
            ndalloc = 226
            nflush = 0
            curlextents = 0
            nmalloc = 225
            nrequests = 0

Here you can see that they differs only by 1.

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
---
 src/arena.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/arena.c b/src/arena.c
index bf880d71..857b27c5 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -322,8 +322,8 @@ arena_large_dalloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 static void
 arena_large_ralloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t oldusize,
     size_t usize) {
-	arena_large_dalloc_stats_update(tsdn, arena, oldusize);
 	arena_large_malloc_stats_update(tsdn, arena, usize);
+	arena_large_dalloc_stats_update(tsdn, arena, oldusize);
 }
 
 edata_t *

From eb65d1b07830b285bf7ac7678e964f080cd3916a Mon Sep 17 00:00:00 2001
From: Alex Lapenkov <lapenkov.a@yandex.ru>
Date: Sat, 22 Jan 2022 10:14:16 -0800
Subject: [PATCH 2193/2608] Fix FreeBSD system jemalloc TSD cleanup

Before this commit, in case FreeBSD libc jemalloc was overridden by another
jemalloc, proper thread shutdown callback was involved only for the overriding
jemalloc. A call to _malloc_thread_cleanup from libthr would be redirected to
user jemalloc, leaving data about dead threads hanging in system jemalloc. This
change tackles the issue in two ways. First, for current and old system
jemallocs, which we can not modify, the overriding jemalloc would locate and
invoke system cleanup routine. For upcoming jemalloc integrations, the cleanup
registering function will also be redirected to user jemalloc, which means that
system jemalloc's cleanup routine will be registered in user's jemalloc and a
single call to _malloc_thread_cleanup will be sufficient to invoke both
callbacks.
---
 configure.ac                                     |  2 +-
 include/jemalloc/internal/tsd.h                  |  5 ++++-
 .../internal/tsd_malloc_thread_cleanup.h         |  2 +-
 include/jemalloc/internal/tsd_types.h            |  2 +-
 include/jemalloc/internal/tsd_win.h              |  2 +-
 src/tsd.c                                        | 16 +++++++++++-----
 6 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/configure.ac b/configure.ac
index abcd91d2..69b8162f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1999,7 +1999,7 @@ AC_CHECK_FUNC([_malloc_thread_cleanup],
              )
 if test "x$have__malloc_thread_cleanup" = "x1" ; then
   AC_DEFINE([JEMALLOC_MALLOC_THREAD_CLEANUP], [ ], [ ])
-  wrap_syms="${wrap_syms} _malloc_thread_cleanup"
+  wrap_syms="${wrap_syms} _malloc_thread_cleanup _malloc_tsd_cleanup_register"
   force_tls="1"
 fi
 
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 0a46d448..66d68822 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -153,9 +153,12 @@ typedef ql_elm(tsd_t) tsd_link_t;
     				TSD_DATA_SLOWER_INITIALIZER		\
 }
 
+#if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
+void _malloc_tsd_cleanup_register(bool (*f)(void));
+#endif
+
 void *malloc_tsd_malloc(size_t size);
 void malloc_tsd_dalloc(void *wrapper);
-void malloc_tsd_cleanup_register(bool (*f)(void));
 tsd_t *malloc_tsd_boot0(void);
 void malloc_tsd_boot1(void);
 void tsd_cleanup(void *arg);
diff --git a/include/jemalloc/internal/tsd_malloc_thread_cleanup.h b/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
index 65852d5c..d8f3ef13 100644
--- a/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
+++ b/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
@@ -21,7 +21,7 @@ tsd_cleanup_wrapper(void) {
 
 JEMALLOC_ALWAYS_INLINE bool
 tsd_boot0(void) {
-	malloc_tsd_cleanup_register(&tsd_cleanup_wrapper);
+	_malloc_tsd_cleanup_register(&tsd_cleanup_wrapper);
 	tsd_booted = true;
 	return false;
 }
diff --git a/include/jemalloc/internal/tsd_types.h b/include/jemalloc/internal/tsd_types.h
index 6200af61..a6ae37da 100644
--- a/include/jemalloc/internal/tsd_types.h
+++ b/include/jemalloc/internal/tsd_types.h
@@ -1,7 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_TSD_TYPES_H
 #define JEMALLOC_INTERNAL_TSD_TYPES_H
 
-#define MALLOC_TSD_CLEANUPS_MAX	2
+#define MALLOC_TSD_CLEANUPS_MAX	4
 
 typedef struct tsd_s tsd_t;
 typedef struct tsdn_s tsdn_t;
diff --git a/include/jemalloc/internal/tsd_win.h b/include/jemalloc/internal/tsd_win.h
index cf30d18e..a91dac88 100644
--- a/include/jemalloc/internal/tsd_win.h
+++ b/include/jemalloc/internal/tsd_win.h
@@ -72,7 +72,7 @@ tsd_boot0(void) {
 	if (tsd_tsd == TLS_OUT_OF_INDEXES) {
 		return true;
 	}
-	malloc_tsd_cleanup_register(&tsd_cleanup_wrapper);
+	_malloc_tsd_cleanup_register(&tsd_cleanup_wrapper);
 	tsd_wrapper_set(&tsd_boot_wrapper);
 	tsd_booted = true;
 	return false;
diff --git a/src/tsd.c b/src/tsd.c
index 4859048e..b98c34bf 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -9,9 +9,6 @@
 /******************************************************************************/
 /* Data. */
 
-static unsigned ncleanups;
-static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX];
-
 /* TSD_INITIALIZER triggers "-Wmissing-field-initializer" */
 JEMALLOC_DIAGNOSTIC_PUSH
 JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
@@ -337,6 +334,9 @@ malloc_tsd_dalloc(void *wrapper) {
 }
 
 #if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
+static unsigned ncleanups;
+static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX];
+
 #ifndef _WIN32
 JEMALLOC_EXPORT
 #endif
@@ -361,15 +361,19 @@ _malloc_thread_cleanup(void) {
 		}
 	} while (again);
 }
-#endif
 
+#ifndef _WIN32
+JEMALLOC_EXPORT
+#endif
 void
-malloc_tsd_cleanup_register(bool (*f)(void)) {
+_malloc_tsd_cleanup_register(bool (*f)(void)) {
 	assert(ncleanups < MALLOC_TSD_CLEANUPS_MAX);
 	cleanups[ncleanups] = f;
 	ncleanups++;
 }
 
+#endif
+
 static void
 tsd_do_data_cleanup(tsd_t *tsd) {
 	prof_tdata_cleanup(tsd);
@@ -429,7 +433,9 @@ tsd_t *
 malloc_tsd_boot0(void) {
 	tsd_t *tsd;
 
+#if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
 	ncleanups = 0;
+#endif
 	if (malloc_mutex_init(&tsd_nominal_tsds_lock, "tsd_nominal_tsds_lock",
 	    WITNESS_RANK_OMIT, malloc_mutex_rank_exclusive)) {
 		return NULL;

From 7ae0f15c598258610dd3cfd9633301ffa8661c45 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 21 Mar 2022 12:15:16 -0700
Subject: [PATCH 2194/2608] Add a default page size when cross-compile for
 Apple M1.

When cross-compile for M1 and no page size specified, use the default 16K and
skip detecting the page size (which is likely incorrect).
---
 configure.ac | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/configure.ac b/configure.ac
index 69b8162f..5c7a8ef8 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1677,6 +1677,15 @@ fi
 AC_ARG_WITH([lg_page],
   [AS_HELP_STRING([--with-lg-page=<lg-page>], [Base 2 log of system page size])],
   [LG_PAGE="$with_lg_page"], [LG_PAGE="detect"])
+case "${host}" in
+  aarch64-apple-darwin*)
+      dnl When cross-compile for Apple M1 and no page size specified, use the
+      dnl default and skip detecting the page size (which is likely incorrect).
+      if test "x${host}" != "x${build}" -a "x$LG_PAGE" = "xdetect"; then
+        LG_PAGE=14
+      fi
+      ;;
+esac
 if test "x$LG_PAGE" = "xdetect"; then
   AC_CACHE_CHECK([LG_PAGE],
                [je_cv_lg_page],

From 52631c90f664ded0a5106a7d5fd906d46a7c1f81 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Mon, 14 Mar 2022 20:17:14 -0700
Subject: [PATCH 2195/2608] Fix size class calculation for sec

Due to a bug in sec initialization, the number of cached size classes
was equal to 198. The bug caused the creation of more than a hundred of
unused bins, although it didn't affect the caching logic.
---
 src/sec.c       | 13 ++++++++-----
 test/unit/sec.c |  1 +
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/sec.c b/src/sec.c
index 0c4e7032..6fffaf1e 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -23,11 +23,11 @@ sec_bin_init(sec_bin_t *bin) {
 bool
 sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, pai_t *fallback,
     const sec_opts_t *opts) {
-	size_t max_alloc = opts->max_alloc & PAGE_MASK;
-	pszind_t npsizes = sz_psz2ind(max_alloc);
-	if (sz_pind2sz(npsizes) > opts->max_alloc) {
-		npsizes--;
-	}
+	assert(opts->max_alloc > 0);
+
+	size_t max_alloc = opts->max_alloc & ~PAGE_MASK;
+	pszind_t npsizes = sz_psz2ind(max_alloc) + 1;
+
 	size_t sz_shards = opts->nshards * sizeof(sec_shard_t);
 	size_t sz_bins = opts->nshards * (size_t)npsizes * sizeof(sec_bin_t);
 	size_t sz_alloc = sz_shards + sz_bins;
@@ -232,6 +232,8 @@ sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
 		    deferred_work_generated);
 	}
 	pszind_t pszind = sz_psz2ind(size);
+	assert(pszind < sec->npsizes);
+
 	sec_shard_t *shard = sec_shard_pick(tsdn, sec);
 	sec_bin_t *bin = &shard->bins[pszind];
 	bool do_batch_fill = false;
@@ -305,6 +307,7 @@ sec_shard_dalloc_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
 	assert(shard->bytes_cur <= sec->opts.max_bytes);
 	size_t size = edata_size_get(edata);
 	pszind_t pszind = sz_psz2ind(size);
+	assert(pszind < sec->npsizes);
 	/*
 	 * Prepending here results in LIFO allocation per bin, which seems
 	 * reasonable.
diff --git a/test/unit/sec.c b/test/unit/sec.c
index e98bdc92..f3ec403d 100644
--- a/test/unit/sec.c
+++ b/test/unit/sec.c
@@ -46,6 +46,7 @@ test_sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t max_alloc,
 
 	bool err = sec_init(TSDN_NULL, sec, base, fallback, &opts);
 	assert_false(err, "Unexpected initialization failure");
+	assert_u_ge(sec->npsizes, 0, "Zero size classes allowed for caching");
 }
 
 static inline edata_t *

From 5bf03f8ce5802b90a16b595e962fe4f07ce7fe93 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Tue, 22 Mar 2022 14:33:04 -0700
Subject: [PATCH 2196/2608] Implement PAGE_FLOOR macro

---
 include/jemalloc/internal/pages.h | 3 +++
 src/sec.c                         | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index 3d7993dd..ad1f606a 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -13,6 +13,9 @@
 /* Return the smallest pagesize multiple that is >= s. */
 #define PAGE_CEILING(s)							\
 	(((s) + PAGE_MASK) & ~PAGE_MASK)
+/* Return the largest pagesize multiple that is <=s. */
+#define PAGE_FLOOR(s) 							\
+	((s) & ~PAGE_MASK)
 
 /* Huge page size.  LG_HUGEPAGE is determined by the configure script. */
 #define HUGEPAGE	((size_t)(1U << LG_HUGEPAGE))
diff --git a/src/sec.c b/src/sec.c
index 6fffaf1e..c13904d8 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -25,7 +25,7 @@ sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, pai_t *fallback,
     const sec_opts_t *opts) {
 	assert(opts->max_alloc > 0);
 
-	size_t max_alloc = opts->max_alloc & ~PAGE_MASK;
+	size_t max_alloc = PAGE_FLOOR(opts->max_alloc);
 	pszind_t npsizes = sz_psz2ind(max_alloc) + 1;
 
 	size_t sz_shards = opts->nshards * sizeof(sec_shard_t);

From eaaa368bab472a78e99a25c1641d24ad3c2283ad Mon Sep 17 00:00:00 2001
From: Charles <a837940593@gmail.com>
Date: Tue, 1 Feb 2022 20:26:39 +0800
Subject: [PATCH 2197/2608] Add comments and use meaningful vars in sz_psz2ind.

---
 Makefile.in                    |  1 +
 include/jemalloc/internal/sc.h |  1 +
 include/jemalloc/internal/sz.h | 48 ++++++++++++++++++++-----
 src/sc.c                       |  2 +-
 test/unit/sz.c                 | 66 ++++++++++++++++++++++++++++++++++
 5 files changed, 108 insertions(+), 10 deletions(-)
 create mode 100644 test/unit/sz.c

diff --git a/Makefile.in b/Makefile.in
index 8e16982e..cf6d5687 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -278,6 +278,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/spin.c \
 	$(srcroot)test/unit/stats.c \
 	$(srcroot)test/unit/stats_print.c \
+	$(srcroot)test/unit/sz.c \
 	$(srcroot)test/unit/tcache_max.c \
 	$(srcroot)test/unit/test_hooks.c \
 	$(srcroot)test/unit/thread_event.c \
diff --git a/include/jemalloc/internal/sc.h b/include/jemalloc/internal/sc.h
index 8efd3249..308985fa 100644
--- a/include/jemalloc/internal/sc.h
+++ b/include/jemalloc/internal/sc.h
@@ -344,6 +344,7 @@ struct sc_data_s {
 	sc_t sc[SC_NSIZES];
 };
 
+size_t reg_size_compute(int lg_base, int lg_delta, int ndelta);
 void sc_data_init(sc_data_t *data);
 /*
  * Updates slab sizes in [begin, end] to be pgs pages in length, if possible.
diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index f2be6139..3c0fc1da 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -55,22 +55,52 @@ extern void sz_boot(const sc_data_t *sc_data, bool cache_oblivious);
 
 JEMALLOC_ALWAYS_INLINE pszind_t
 sz_psz2ind(size_t psz) {
+	assert(psz > 0);
 	if (unlikely(psz > SC_LARGE_MAXCLASS)) {
 		return SC_NPSIZES;
 	}
-	pszind_t x = lg_floor((psz<<1)-1);
-	pszind_t shift = (x < SC_LG_NGROUP + LG_PAGE) ?
+	/* x is the lg of the first base >= psz. */
+	pszind_t x = lg_ceil(psz);
+	/*
+	 * sc.h introduces a lot of size classes. These size classes are divided
+	 * into different size class groups. There is a very special size class
+	 * group, each size class in or after it is an integer multiple of PAGE.
+	 * We call it first_ps_rg. It means first page size regular group. The
+	 * range of first_ps_rg is (base, base * 2], and base == PAGE *
+	 * SC_NGROUP. off_to_first_ps_rg begins from 1, instead of 0. e.g.
+	 * off_to_first_ps_rg is 1 when psz is (PAGE * SC_NGROUP + 1).
+	 */
+	pszind_t off_to_first_ps_rg = (x < SC_LG_NGROUP + LG_PAGE) ?
 	    0 : x - (SC_LG_NGROUP + LG_PAGE);
-	pszind_t grp = shift << SC_LG_NGROUP;
 
-	pszind_t lg_delta = (x < SC_LG_NGROUP + LG_PAGE + 1) ?
-	    LG_PAGE : x - SC_LG_NGROUP - 1;
+	/*
+	 * Same as sc_s::lg_delta.
+	 * Delta for off_to_first_ps_rg == 1 is PAGE,
+	 * for each increase in offset, it's multiplied by two.
+	 * Therefore, lg_delta = LG_PAGE + (off_to_first_ps_rg - 1).
+	 */
+	pszind_t lg_delta = (off_to_first_ps_rg == 0) ?
+	    LG_PAGE : LG_PAGE + (off_to_first_ps_rg - 1);
 
-	size_t delta_inverse_mask = ZU(-1) << lg_delta;
-	pszind_t mod = ((((psz-1) & delta_inverse_mask) >> lg_delta)) &
-	    ((ZU(1) << SC_LG_NGROUP) - 1);
+	/*
+	 * Let's write psz in binary, e.g. 0011 for 0x3, 0111 for 0x7.
+	 * The leftmost bits whose len is lg_base decide the base of psz.
+	 * The rightmost bits whose len is lg_delta decide (pgz % PAGE).
+	 * The middle bits whose len is SC_LG_NGROUP decide ndelta.
+	 * ndelta is offset to the first size class in the size class group,
+	 * starts from 1.
+	 * If you don't know lg_base, ndelta or lg_delta, see sc.h.
+	 * |xxxxxxxxxxxxxxxxxxxx|------------------------|yyyyyyyyyyyyyyyyyyyyy|
+	 * |<-- len: lg_base -->|<-- len: SC_LG_NGROUP-->|<-- len: lg_delta -->|
+	 *                      |<--      ndelta      -->|
+	 * rg_inner_off = ndelta - 1
+	 * Why use (psz - 1)?
+	 * To handle case: psz % (1 << lg_delta) == 0.
+	 */
+	pszind_t rg_inner_off = (((psz - 1)) >> lg_delta) & (SC_NGROUP - 1);
 
-	pszind_t ind = grp + mod;
+	pszind_t base_ind = off_to_first_ps_rg << SC_LG_NGROUP;
+	pszind_t ind = base_ind + rg_inner_off;
 	return ind;
 }
 
diff --git a/src/sc.c b/src/sc.c
index 9a0f76d5..e4a94d89 100644
--- a/src/sc.c
+++ b/src/sc.c
@@ -13,7 +13,7 @@
  * at least the damage is compartmentalized to this file.
  */
 
-static size_t
+size_t
 reg_size_compute(int lg_base, int lg_delta, int ndelta) {
 	return (ZU(1) << lg_base) + (ZU(ndelta) << lg_delta);
 }
diff --git a/test/unit/sz.c b/test/unit/sz.c
new file mode 100644
index 00000000..be11aca4
--- /dev/null
+++ b/test/unit/sz.c
@@ -0,0 +1,66 @@
+#include "test/jemalloc_test.h"
+
+TEST_BEGIN(test_sz_psz2ind) {
+	/*
+	 * Testing page size classes which reside prior to the regular group
+	 * with all size classes divisible by page size.
+	 * For x86_64 Linux, it's 4096, 8192, 12288, 16384, with correponding
+	 * pszind 0, 1, 2 and 3.
+	 */
+	for (size_t i = 0; i < SC_NGROUP; i++) {
+		for (size_t psz = i * PAGE + 1; psz <= (i + 1) * PAGE; psz++) {
+			pszind_t ind = sz_psz2ind(psz);
+			expect_zu_eq(ind, i, "Got %u as sz_psz2ind of %zu", ind,
+			    psz);
+		}
+	}
+
+	sc_data_t data;
+	memset(&data, 0, sizeof(data));
+	sc_data_init(&data);
+	/*
+	 * 'base' is the base of the first regular group with all size classes
+	 * divisible by page size.
+	 * For x86_64 Linux, it's 16384, and base_ind is 36.
+	 */
+	size_t base_psz = 1 << (SC_LG_NGROUP + LG_PAGE);
+	size_t base_ind = 0;
+	while (base_ind < SC_NSIZES &&
+	    reg_size_compute(data.sc[base_ind].lg_base,
+		data.sc[base_ind].lg_delta,
+		data.sc[base_ind].ndelta) < base_psz) {
+		base_ind++;
+	}
+	expect_zu_eq(
+	    reg_size_compute(data.sc[base_ind].lg_base,
+		data.sc[base_ind].lg_delta, data.sc[base_ind].ndelta),
+	    base_psz, "Size class equal to %zu not found", base_psz);
+	/*
+	 * Test different sizes falling into groups after the 'base'. The
+	 * increment is PAGE / 3 for the execution speed purpose.
+	 */
+	base_ind -= SC_NGROUP;
+	for (size_t psz = base_psz; psz <= 64 * 1024 * 1024; psz += PAGE / 3) {
+		pszind_t ind = sz_psz2ind(psz);
+		sc_t gt_sc = data.sc[ind + base_ind];
+		expect_zu_gt(psz,
+		    reg_size_compute(gt_sc.lg_base, gt_sc.lg_delta,
+			gt_sc.ndelta),
+		    "Got %u as sz_psz2ind of %zu", ind, psz);
+		sc_t le_sc = data.sc[ind + base_ind + 1];
+		expect_zu_le(psz,
+		    reg_size_compute(le_sc.lg_base, le_sc.lg_delta,
+			le_sc.ndelta),
+		    "Got %u as sz_psz2ind of %zu", ind, psz);
+	}
+
+	pszind_t max_ind = sz_psz2ind(SC_LARGE_MAXCLASS + 1);
+	expect_lu_eq(max_ind, SC_NPSIZES,
+	    "Got %u as sz_psz2ind of %llu", max_ind, SC_LARGE_MAXCLASS);
+}
+TEST_END
+
+int
+main(void) {
+	return test(test_sz_psz2ind);
+}

From a93931537e3845c8baca6965aded9a9683fa1481 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Thu, 24 Mar 2022 18:07:27 -0700
Subject: [PATCH 2198/2608] Do not disable SEC by default for 64k pages
 platforms

Default SEC max_alloc option value was 32k, disabling SEC for platforms with
lg-page=16. This change enables SEC for all platforms, making minimum max_alloc
value equal to PAGE.
---
 include/jemalloc/internal/sec_opts.h | 2 +-
 src/sec.c                            | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/sec_opts.h b/include/jemalloc/internal/sec_opts.h
index 91b6d0de..a3ad72fb 100644
--- a/include/jemalloc/internal/sec_opts.h
+++ b/include/jemalloc/internal/sec_opts.h
@@ -46,7 +46,7 @@ struct sec_opts_s {
 	/* nshards */							\
 	4,								\
 	/* max_alloc */							\
-	32 * 1024,							\
+	(32 * 1024) < PAGE ? PAGE : (32 * 1024),			\
 	/* max_bytes */							\
 	256 * 1024,							\
 	/* bytes_after_flush */						\
diff --git a/src/sec.c b/src/sec.c
index c13904d8..df675590 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -23,7 +23,7 @@ sec_bin_init(sec_bin_t *bin) {
 bool
 sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, pai_t *fallback,
     const sec_opts_t *opts) {
-	assert(opts->max_alloc > 0);
+	assert(opts->max_alloc >= PAGE);
 
 	size_t max_alloc = PAGE_FLOOR(opts->max_alloc);
 	pszind_t npsizes = sz_psz2ind(max_alloc) + 1;

From fdb6c101625060236732a6003116a129edda3687 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Thu, 6 Jan 2022 19:31:09 -0800
Subject: [PATCH 2199/2608] Add FreeBSD to TravisCI

Implement the generation of Travis jobs for FreeBSD. The generated jobs
replicate the existing CirrusCI config.
---
 .travis.yml                       | 48 +++++++++++++++++++++++++++++++
 scripts/freebsd/before_install.sh |  3 ++
 scripts/freebsd/before_script.sh  | 10 +++++++
 scripts/freebsd/script.sh         |  3 ++
 scripts/gen_travis.py             | 19 ++++++++++++
 5 files changed, 83 insertions(+)
 create mode 100644 scripts/freebsd/before_install.sh
 create mode 100644 scripts/freebsd/before_script.sh
 create mode 100644 scripts/freebsd/script.sh

diff --git a/.travis.yml b/.travis.yml
index c54cc454..f2b107ba 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -304,6 +304,54 @@ jobs:
     - os: osx
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-lg-page=16 --with-malloc-conf=tcache:false"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug --with-lg-page=16 --with-malloc-conf=tcache:false"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
     # Development build
     - os: linux
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
diff --git a/scripts/freebsd/before_install.sh b/scripts/freebsd/before_install.sh
new file mode 100644
index 00000000..f2bee321
--- /dev/null
+++ b/scripts/freebsd/before_install.sh
@@ -0,0 +1,3 @@
+#!/bin/tcsh
+
+su -m root -c 'pkg install -y git'
diff --git a/scripts/freebsd/before_script.sh b/scripts/freebsd/before_script.sh
new file mode 100644
index 00000000..29406f6f
--- /dev/null
+++ b/scripts/freebsd/before_script.sh
@@ -0,0 +1,10 @@
+#!/bin/tcsh
+
+autoconf
+# We don't perfectly track freebsd stdlib.h definitions.  This is fine when
+# we count as a system header, but breaks otherwise, like during these
+# tests.
+./configure --with-jemalloc-prefix=ci_ ${COMPILER_FLAGS:+ CC="$CC $COMPILER_FLAGS" CXX="$CXX $COMPILER_FLAGS"} $CONFIGURE_FLAGS
+JE_NCPUS=`sysctl -n kern.smp.cpus`
+gmake -j${JE_NCPUS}
+gmake -j${JE_NCPUS} tests
diff --git a/scripts/freebsd/script.sh b/scripts/freebsd/script.sh
new file mode 100644
index 00000000..d9c53a20
--- /dev/null
+++ b/scripts/freebsd/script.sh
@@ -0,0 +1,3 @@
+#!/bin/tcsh
+
+gmake check
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index 685bad59..40b0be1b 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -7,6 +7,7 @@ from enum import Enum, auto
 LINUX = 'linux'
 OSX = 'osx'
 WINDOWS = 'windows'
+FREEBSD = 'freebsd'
 
 
 AMD64 = 'amd64'
@@ -140,6 +141,9 @@ all_unusuals = (compilers_unusual + feature_unusuals
 
 
 def get_extra_cflags(os, compiler):
+    if os == FREEBSD:
+        return []
+
     if os == WINDOWS:
         # For non-CL compilers under Windows (for now it's only MinGW-GCC),
         # -fcommon needs to be specified to correctly handle multiple
@@ -273,6 +277,19 @@ def generate_windows(arch):
     return generate_jobs(os, arch, (), max_unusual_opts, unusuals)
 
 
+def generate_freebsd(arch):
+    os = FREEBSD
+
+    max_unusual_opts = 4
+    unusuals = (
+        Option.as_configure_flag('--enable-debug'),
+        Option.as_configure_flag('--enable-prof --enable-prof-libunwind'),
+        Option.as_configure_flag('--with-lg-page=16 --with-malloc-conf=tcache:false'),
+        CROSS_COMPILE_32BIT,
+    )
+    return generate_jobs(os, arch, (), max_unusual_opts, unusuals)
+
+
 
 def get_manual_jobs():
     return """\
@@ -298,6 +315,8 @@ def main():
 
         #generate_windows(AMD64),
 
+        generate_freebsd(AMD64),
+
         get_manual_jobs()
     ))
 

From 8a49b62e788a5ae21a32a3a2caccf27b841c9bf8 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Mon, 21 Mar 2022 14:14:34 -0700
Subject: [PATCH 2200/2608] Enable TravisCI for Windows

---
 .travis.yml           | 24 ++++++++++++++++++++++++
 scripts/gen_travis.py |  2 +-
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index f2b107ba..29c19a7d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -304,6 +304,30 @@ jobs:
     - os: osx
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+    - os: windows
+      arch: amd64
+      env: CC=gcc CXX=g++ EXTRA_CFLAGS="-fcommon"
+    - os: windows
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-fcommon"
+    - os: windows
+      arch: amd64
+      env: CC=cl.exe CXX=cl.exe
+    - os: windows
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes EXTRA_CFLAGS="-fcommon"
+    - os: windows
+      arch: amd64
+      env: CC=cl.exe CXX=cl.exe CONFIGURE_FLAGS="--enable-debug"
+    - os: windows
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-fcommon"
+    - os: windows
+      arch: amd64
+      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes
+    - os: windows
+      arch: amd64
+      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug"
     - os: freebsd
       arch: amd64
       env: CC=gcc CXX=g++
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index 40b0be1b..e076c352 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -313,7 +313,7 @@ def main():
 
         generate_macos(AMD64),
 
-        #generate_windows(AMD64),
+        generate_windows(AMD64),
 
         generate_freebsd(AMD64),
 

From 25517b852e76b429d4a97f4c96606263b2a9c209 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Mon, 21 Mar 2022 15:11:34 -0700
Subject: [PATCH 2201/2608] Reoreder TravisCI jobs to optimize CI time

Sorting jobs by descending expected runtime helps to utilize concurrency
better.
---
 .travis.yml           | 144 +++++++++++++++++++++---------------------
 scripts/gen_travis.py |  10 +--
 2 files changed, 77 insertions(+), 77 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 29c19a7d..bf44fad4 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -10,6 +10,78 @@ dist: focal
 
 jobs:
   include:
+    - os: windows
+      arch: amd64
+      env: CC=gcc CXX=g++ EXTRA_CFLAGS="-fcommon"
+    - os: windows
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-fcommon"
+    - os: windows
+      arch: amd64
+      env: CC=cl.exe CXX=cl.exe
+    - os: windows
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes EXTRA_CFLAGS="-fcommon"
+    - os: windows
+      arch: amd64
+      env: CC=cl.exe CXX=cl.exe CONFIGURE_FLAGS="--enable-debug"
+    - os: windows
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-fcommon"
+    - os: windows
+      arch: amd64
+      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes
+    - os: windows
+      arch: amd64
+      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-lg-page=16 --with-malloc-conf=tcache:false"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug --with-lg-page=16 --with-malloc-conf=tcache:false"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
+    - os: freebsd
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -304,78 +376,6 @@ jobs:
     - os: osx
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: windows
-      arch: amd64
-      env: CC=gcc CXX=g++ EXTRA_CFLAGS="-fcommon"
-    - os: windows
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-fcommon"
-    - os: windows
-      arch: amd64
-      env: CC=cl.exe CXX=cl.exe
-    - os: windows
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes EXTRA_CFLAGS="-fcommon"
-    - os: windows
-      arch: amd64
-      env: CC=cl.exe CXX=cl.exe CONFIGURE_FLAGS="--enable-debug"
-    - os: windows
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-fcommon"
-    - os: windows
-      arch: amd64
-      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes
-    - os: windows
-      arch: amd64
-      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-lg-page=16 --with-malloc-conf=tcache:false"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug --with-lg-page=16 --with-malloc-conf=tcache:false"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
     # Development build
     - os: linux
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index e076c352..4366a066 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -308,16 +308,16 @@ EXTRA_CFLAGS="-Werror -Wno-array-bounds"
 
 def main():
     jobs = '\n'.join((
+        generate_windows(AMD64),
+
+        generate_freebsd(AMD64),
+
         generate_linux(AMD64),
         generate_linux(PPC64LE),
 
         generate_macos(AMD64),
 
-        generate_windows(AMD64),
-
-        generate_freebsd(AMD64),
-
-        get_manual_jobs()
+        get_manual_jobs(),
     ))
 
     print(TRAVIS_TEMPLATE.format(jobs=jobs))

From ed5fc14b28ca62a6ba57b65adf557e1ef09037f0 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 23 Mar 2022 16:31:40 -0700
Subject: [PATCH 2202/2608] Use volatile to workaround buffer overflow false
 positives.

In test/integration/rallocx, full usable size is checked which may confuse
overflow detection.
---
 test/integration/rallocx.c | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c
index d4a48fce..68b8f381 100644
--- a/test/integration/rallocx.c
+++ b/test/integration/rallocx.c
@@ -41,7 +41,11 @@ get_large_size(size_t ind) {
 }
 
 TEST_BEGIN(test_grow_and_shrink) {
-	void *p, *q;
+	/*
+	 * Use volatile to workaround buffer overflow false positives
+	 * (-D_FORTIFY_SOURCE=3).
+	 */
+	void *volatile p, *volatile q;
 	size_t tsz;
 #define NCYCLES 3
 	unsigned i, j;
@@ -85,9 +89,13 @@ TEST_BEGIN(test_grow_and_shrink) {
 TEST_END
 
 static bool
-validate_fill(const void *p, uint8_t c, size_t offset, size_t len) {
+validate_fill(void *p, uint8_t c, size_t offset, size_t len) {
 	bool ret = false;
-	const uint8_t *buf = (const uint8_t *)p;
+	/*
+	 * Use volatile to workaround buffer overflow false positives
+	 * (-D_FORTIFY_SOURCE=3).
+	 */
+	uint8_t *volatile buf = (uint8_t *)p;
 	size_t i;
 
 	for (i = 0; i < len; i++) {
@@ -104,7 +112,11 @@ validate_fill(const void *p, uint8_t c, size_t offset, size_t len) {
 }
 
 TEST_BEGIN(test_zero) {
-	void *p, *q;
+	/*
+	 * Use volatile to workaround buffer overflow false positives
+	 * (-D_FORTIFY_SOURCE=3).
+	 */
+	void *volatile p, *volatile q;
 	size_t psz, qsz, i, j;
 	size_t start_sizes[] = {1, 3*1024, 63*1024, 4095*1024};
 #define FILL_BYTE 0xaaU
@@ -205,7 +217,11 @@ TEST_BEGIN(test_align_enum) {
 TEST_END
 
 TEST_BEGIN(test_lg_align_and_zero) {
-	void *p, *q;
+	/*
+	 * Use volatile to workaround buffer overflow false positives
+	 * (-D_FORTIFY_SOURCE=3).
+	 */
+	void *volatile p, *volatile q;
 	unsigned lg_align;
 	size_t sz;
 #define MAX_LG_ALIGN 25

From 5841b6dbe7106cf40923593ba8a0e6421a5fe905 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 19 Apr 2022 14:43:26 -0700
Subject: [PATCH 2203/2608] Update FreeBSD image to 12.3 for cirrus ci.

---
 .cirrus.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.cirrus.yml b/.cirrus.yml
index 4cca64ba..75695398 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -29,7 +29,7 @@ task:
         UNCOMMON_CONFIG: --with-lg-page=16 --with-malloc-conf=tcache:false
   freebsd_instance:
     matrix:
-      image: freebsd-12-2-release-amd64
+      image: freebsd-12-3-release-amd64
   install_script:
     - sed -i.bak -e 's,pkg+http://pkg.FreeBSD.org/\${ABI}/quarterly,pkg+http://pkg.FreeBSD.org/\${ABI}/latest,' /etc/pkg/FreeBSD.conf
     - pkg upgrade -y

From 0e29ad4efa3d1c5ae9cd01afd32812dd18875200 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 15 Apr 2022 12:17:59 -0700
Subject: [PATCH 2204/2608] Rename zero_realloc option "strict" to "alloc".

With realloc(ptr, 0) being UB per C23, the option name "strict" makes less sense
now.  Rename to "alloc" which describes the behavior.
---
 Makefile.in                                          |  2 +-
 doc/jemalloc.xml.in                                  |  6 +++---
 include/jemalloc/internal/jemalloc_internal_types.h  |  2 +-
 src/jemalloc.c                                       | 12 ++++++------
 .../{zero_realloc_strict.c => zero_realloc_alloc.c}  |  4 ++--
 test/unit/zero_realloc_alloc.sh                      |  3 +++
 test/unit/zero_realloc_strict.sh                     |  3 ---
 7 files changed, 16 insertions(+), 16 deletions(-)
 rename test/unit/{zero_realloc_strict.c => zero_realloc_alloc.c} (94%)
 create mode 100644 test/unit/zero_realloc_alloc.sh
 delete mode 100644 test/unit/zero_realloc_strict.sh

diff --git a/Makefile.in b/Makefile.in
index cf6d5687..1193cd85 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -289,7 +289,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/zero.c \
 	$(srcroot)test/unit/zero_realloc_abort.c \
 	$(srcroot)test/unit/zero_realloc_free.c \
-	$(srcroot)test/unit/zero_realloc_strict.c \
+	$(srcroot)test/unit/zero_realloc_alloc.c \
 	$(srcroot)test/unit/zero_reallocs.c
 ifeq (@enable_prof@, 1)
 TESTS_UNIT += \
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 6e2099ad..8c3703bd 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1580,19 +1580,19 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         </term>
         <listitem><para> Determines the behavior of
 	<function>realloc()</function> when passed a value of zero for the new
-	size.  <quote>strict</quote> treats this as an allocation of size zero
+	size.  <quote>alloc</quote> treats this as an allocation of size zero
 	(and returns a non-null result except in case of resource exhaustion).
 	<quote>free</quote> treats this as a deallocation of the pointer, and
 	returns <constant>NULL</constant> without setting
 	<varname>errno</varname>.  <quote>abort</quote> aborts the process if
-	zero is passed.  The default is <quote>strict</quote>.</para>
+	zero is passed.  The default is <quote>alloc</quote>.</para>
 
 	<para>There is considerable divergence of behaviors across
 	implementations in handling this case. Many have the behavior of
 	<quote>free</quote>. This can introduce security vulnerabilities, since
 	a <constant>NULL</constant> return value indicates failure, and the
 	continued validity of the passed-in pointer (per POSIX and C11).
-	<quote>strict</quote> is safe, but can cause leaks in programs that
+	<quote>alloc</quote> is safe, but can cause leaks in programs that
 	expect the common behavior.  Programs intended to be portable and
 	leak-free cannot assume either behavior, and must therefore never call
 	realloc with a size of 0.  The <quote>abort</quote> option enables these
diff --git a/include/jemalloc/internal/jemalloc_internal_types.h b/include/jemalloc/internal/jemalloc_internal_types.h
index 61c1f31a..62c2b59c 100644
--- a/include/jemalloc/internal/jemalloc_internal_types.h
+++ b/include/jemalloc/internal/jemalloc_internal_types.h
@@ -9,7 +9,7 @@ typedef int malloc_cpuid_t;
 /* When realloc(non-null-ptr, 0) is called, what happens? */
 enum zero_realloc_action_e {
 	/* Realloc(ptr, 0) is free(ptr); return malloc(0); */
-	zero_realloc_action_strict = 0,
+	zero_realloc_action_alloc = 0,
 	/* Realloc(ptr, 0) is free(ptr); */
 	zero_realloc_action_free = 1,
 	/* Realloc(ptr, 0) aborts. */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 364dc57f..7e5bd338 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -112,12 +112,12 @@ bool opt_cache_oblivious =
     ;
 
 zero_realloc_action_t opt_zero_realloc_action =
-    zero_realloc_action_strict;
+    zero_realloc_action_alloc;
 
 atomic_zu_t zero_realloc_count = ATOMIC_INIT(0);
 
 const char *zero_realloc_mode_names[] = {
-	"strict",
+	"alloc",
 	"free",
 	"abort",
 };
@@ -1649,9 +1649,9 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				CONF_CONTINUE;
 			}
 			if (CONF_MATCH("zero_realloc")) {
-				if (CONF_MATCH_VALUE("strict")) {
+				if (CONF_MATCH_VALUE("alloc")) {
 					opt_zero_realloc_action
-					    = zero_realloc_action_strict;
+					    = zero_realloc_action_alloc;
 				} else if (CONF_MATCH_VALUE("free")) {
 					opt_zero_realloc_action
 					    = zero_realloc_action_free;
@@ -3578,9 +3578,9 @@ do_realloc_nonnull_zero(void *ptr) {
 	if (config_stats) {
 		atomic_fetch_add_zu(&zero_realloc_count, 1, ATOMIC_RELAXED);
 	}
-	if (opt_zero_realloc_action == zero_realloc_action_strict) {
+	if (opt_zero_realloc_action == zero_realloc_action_alloc) {
 		/*
-		 * The user might have gotten a strict setting while expecting a
+		 * The user might have gotten an alloc setting while expecting a
 		 * free setting.  If that's the case, we at least try to
 		 * reduce the harm, and turn off the tcache while allocating, so
 		 * that we'll get a true first fit.
diff --git a/test/unit/zero_realloc_strict.c b/test/unit/zero_realloc_alloc.c
similarity index 94%
rename from test/unit/zero_realloc_strict.c
rename to test/unit/zero_realloc_alloc.c
index 249d838a..65e07bdb 100644
--- a/test/unit/zero_realloc_strict.c
+++ b/test/unit/zero_realloc_alloc.c
@@ -24,7 +24,7 @@ deallocated() {
 	return deallocated;
 }
 
-TEST_BEGIN(test_realloc_strict) {
+TEST_BEGIN(test_realloc_alloc) {
 	void *ptr = mallocx(1, 0);
 	expect_ptr_not_null(ptr, "Unexpected mallocx error");
 	uint64_t allocated_before = allocated();
@@ -44,5 +44,5 @@ TEST_END
 int
 main(void) {
 	return test(
-	    test_realloc_strict);
+	    test_realloc_alloc);
 }
diff --git a/test/unit/zero_realloc_alloc.sh b/test/unit/zero_realloc_alloc.sh
new file mode 100644
index 00000000..802687cf
--- /dev/null
+++ b/test/unit/zero_realloc_alloc.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="zero_realloc:alloc"
diff --git a/test/unit/zero_realloc_strict.sh b/test/unit/zero_realloc_strict.sh
deleted file mode 100644
index 314dcd0a..00000000
--- a/test/unit/zero_realloc_strict.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/sh
-
-export MALLOC_CONF="zero_realloc:strict"

From 9a242f16d9e4a6afcd53782a9427471f6d144f1f Mon Sep 17 00:00:00 2001
From: cuishuang <imcusg@gmail.com>
Date: Sun, 24 Apr 2022 23:32:44 +0800
Subject: [PATCH 2205/2608] fix some typos

Signed-off-by: cuishuang <imcusg@gmail.com>
---
 bin/jeprof.in                                          | 4 ++--
 doc/jemalloc.xml.in                                    | 2 +-
 include/jemalloc/internal/extent.h                     | 2 +-
 include/jemalloc/internal/jemalloc_internal_includes.h | 2 +-
 include/jemalloc/internal/pa.h                         | 2 +-
 include/jemalloc/internal/sc.h                         | 2 +-
 src/jemalloc.c                                         | 2 +-
 src/tsd.c                                              | 2 +-
 test/analyze/rand.c                                    | 2 +-
 test/unit/sz.c                                         | 2 +-
 10 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/bin/jeprof.in b/bin/jeprof.in
index e0b212ae..dbf6252b 100644
--- a/bin/jeprof.in
+++ b/bin/jeprof.in
@@ -5085,7 +5085,7 @@ sub MapToSymbols {
       } else {
 	# MapSymbolsWithNM tags each routine with its starting address,
 	# useful in case the image has multiple occurrences of this
-	# routine.  (It uses a syntax that resembles template paramters,
+	# routine.  (It uses a syntax that resembles template parameters,
 	# that are automatically stripped out by ShortFunctionName().)
 	# addr2line does not provide the same information.  So we check
 	# if nm disambiguated our symbol, and if so take the annotated
@@ -5437,7 +5437,7 @@ sub GetProcedureBoundaries {
   # "nm -f $image" is supposed to fail on GNU nm, but if:
   #
   # a. $image starts with [BbSsPp] (for example, bin/foo/bar), AND
-  # b. you have a.out in your current directory (a not uncommon occurence)
+  # b. you have a.out in your current directory (a not uncommon occurrence)
   #
   # then "nm -f $image" succeeds because -f only looks at the first letter of
   # the argument, which looks valid because it's [BbSsPp], and then since
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 8c3703bd..ce7acd9f 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1409,7 +1409,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         set to the empty string, no automatic dumps will occur; this is
         primarily useful for disabling the automatic final heap dump (which
         also disables leak reporting, if enabled).  The default prefix is
-        <filename>jeprof</filename>.  This prefix value can be overriden by
+        <filename>jeprof</filename>.  This prefix value can be overridden by
         <link linkend="prof.prefix"><mallctl>prof.prefix</mallctl></link>.
         </para></listitem>
       </varlistentry>
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 7336e8b8..1d51d410 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -104,7 +104,7 @@ extent_can_acquire_neighbor(edata_t *edata, rtree_contents_t contents,
 		    edata_committed_get(neighbor))) {
 			/*
 			 * Some platforms (e.g. Windows) require an explicit
-			 * commit step (and writing to uncomitted memory is not
+			 * commit step (and writing to uncommitted memory is not
 			 * allowed).
 			 */
 			return false;
diff --git a/include/jemalloc/internal/jemalloc_internal_includes.h b/include/jemalloc/internal/jemalloc_internal_includes.h
index 90a12a12..751c112f 100644
--- a/include/jemalloc/internal/jemalloc_internal_includes.h
+++ b/include/jemalloc/internal/jemalloc_internal_includes.h
@@ -10,7 +10,7 @@
  * structs, externs, and inlines), and included each header file multiple times
  * in this file, picking out the portion we want on each pass using the
  * following #defines:
- *   JEMALLOC_H_TYPES   : Preprocessor-defined constants and psuedo-opaque data
+ *   JEMALLOC_H_TYPES   : Preprocessor-defined constants and pseudo-opaque data
  *                        types.
  *   JEMALLOC_H_STRUCTS : Data structures.
  *   JEMALLOC_H_EXTERNS : Extern data declarations and function prototypes.
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 3cf370c8..4748a05b 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -180,7 +180,7 @@ bool pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
     size_t new_size, szind_t szind, bool *deferred_work_generated);
 /*
  * Frees the given edata back to the pa.  Sets *generated_dirty if we produced
- * new dirty pages (well, we alwyas set it for now; but this need not be the
+ * new dirty pages (well, we always set it for now; but this need not be the
  * case).
  * (We could make generated_dirty the return value of course, but this is more
  * consistent with the shrink pathway and our error codes here).
diff --git a/include/jemalloc/internal/sc.h b/include/jemalloc/internal/sc.h
index 308985fa..9bab347b 100644
--- a/include/jemalloc/internal/sc.h
+++ b/include/jemalloc/internal/sc.h
@@ -348,7 +348,7 @@ size_t reg_size_compute(int lg_base, int lg_delta, int ndelta);
 void sc_data_init(sc_data_t *data);
 /*
  * Updates slab sizes in [begin, end] to be pgs pages in length, if possible.
- * Otherwise, does its best to accomodate the request.
+ * Otherwise, does its best to accommodate the request.
  */
 void sc_data_update_slab_size(sc_data_t *data, size_t begin, size_t end,
     int pgs);
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 7e5bd338..9c94425c 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -327,7 +327,7 @@ a0dalloc(void *ptr) {
 }
 
 /*
- * FreeBSD's libc uses the bootstrap_*() functions in bootstrap-senstive
+ * FreeBSD's libc uses the bootstrap_*() functions in bootstrap-sensitive
  * situations that cannot tolerate TLS variable access (TLS allocation and very
  * early internal data structure initialization).
  */
diff --git a/src/tsd.c b/src/tsd.c
index b98c34bf..e8e4f3a3 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -209,7 +209,7 @@ tsd_state_set(tsd_t *tsd, uint8_t new_state) {
 			/*
 			 * This is the tricky case.  We're transitioning from
 			 * one nominal state to another.  The caller can't know
-			 * about any races that are occuring at the same time,
+			 * about any races that are occurring at the same time,
 			 * so we always have to recompute no matter what.
 			 */
 			tsd_slow_update(tsd);
diff --git a/test/analyze/rand.c b/test/analyze/rand.c
index a4ab49a2..bb20b06e 100644
--- a/test/analyze/rand.c
+++ b/test/analyze/rand.c
@@ -34,7 +34,7 @@
  * (c) Any generated number >= n_bucket * 2^lg_bucket_width will be counted
  *     towards the last bucket; the expected mean and stddev provided should
  *     also reflect that.
- * (d) The number of iterations is adviced to be determined so that the bucket
+ * (d) The number of iterations is advised to be determined so that the bucket
  *     with the minimal expected proportion gets a sufficient count.
  */
 
diff --git a/test/unit/sz.c b/test/unit/sz.c
index be11aca4..8ae04b92 100644
--- a/test/unit/sz.c
+++ b/test/unit/sz.c
@@ -4,7 +4,7 @@ TEST_BEGIN(test_sz_psz2ind) {
 	/*
 	 * Testing page size classes which reside prior to the regular group
 	 * with all size classes divisible by page size.
-	 * For x86_64 Linux, it's 4096, 8192, 12288, 16384, with correponding
+	 * For x86_64 Linux, it's 4096, 8192, 12288, 16384, with corresponding
 	 * pszind 0, 1, 2 and 3.
 	 */
 	for (size_t i = 0; i < SC_NGROUP; i++) {

From 391bad4b95839e2c690879ca62b1e904a49a78df Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 21 Apr 2022 16:31:33 -0700
Subject: [PATCH 2206/2608] Avoid abort() in
 test/integration/cpp/infallible_new_true.

Allow setting the safety check abort hook through mallctl, which avoids abort()
and core dumps.
---
 include/jemalloc/internal/safety_check.h     |  5 +-
 src/ctl.c                                    | 24 +++++
 src/safety_check.c                           |  4 +-
 test/integration/cpp/infallible_new_true.cpp | 98 +++++++++++---------
 4 files changed, 82 insertions(+), 49 deletions(-)

diff --git a/include/jemalloc/internal/safety_check.h b/include/jemalloc/internal/safety_check.h
index f10c68e4..f1a74f17 100644
--- a/include/jemalloc/internal/safety_check.h
+++ b/include/jemalloc/internal/safety_check.h
@@ -4,8 +4,11 @@
 void safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr,
     size_t true_size, size_t input_size);
 void safety_check_fail(const char *format, ...);
+
+typedef void (*safety_check_abort_hook_t)(const char *message);
+
 /* Can set to NULL for a default. */
-void safety_check_set_abort(void (*abort_fn)(const char *));
+void safety_check_set_abort(safety_check_abort_hook_t abort_fn);
 
 JEMALLOC_ALWAYS_INLINE void
 safety_check_set_redzone(void *ptr, size_t usize, size_t bumped_usize) {
diff --git a/src/ctl.c b/src/ctl.c
index 54d33aed..135271ba 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -14,6 +14,7 @@
 #include "jemalloc/internal/prof_recent.h"
 #include "jemalloc/internal/prof_stats.h"
 #include "jemalloc/internal/prof_sys.h"
+#include "jemalloc/internal/safety_check.h"
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/util.h"
 
@@ -311,6 +312,7 @@ CTL_PROTO(experimental_hooks_install)
 CTL_PROTO(experimental_hooks_remove)
 CTL_PROTO(experimental_hooks_prof_backtrace)
 CTL_PROTO(experimental_hooks_prof_dump)
+CTL_PROTO(experimental_hooks_safety_check_abort)
 CTL_PROTO(experimental_thread_activity_callback)
 CTL_PROTO(experimental_utilization_query)
 CTL_PROTO(experimental_utilization_batch_query)
@@ -849,6 +851,7 @@ static const ctl_named_node_t experimental_hooks_node[] = {
 	{NAME("remove"),	CTL(experimental_hooks_remove)},
 	{NAME("prof_backtrace"),	CTL(experimental_hooks_prof_backtrace)},
 	{NAME("prof_dump"),	CTL(experimental_hooks_prof_dump)},
+	{NAME("safety_check_abort"),	CTL(experimental_hooks_safety_check_abort)},
 };
 
 static const ctl_named_node_t experimental_thread_node[] = {
@@ -3437,6 +3440,27 @@ label_return:
 	return ret;
 }
 
+/* For integration test purpose only.  No plan to move out of experimental. */
+static int
+experimental_hooks_safety_check_abort_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+
+	WRITEONLY();
+	if (newp != NULL) {
+		if (newlen != sizeof(safety_check_abort_hook_t)) {
+			ret = EINVAL;
+			goto label_return;
+		}
+		safety_check_abort_hook_t hook JEMALLOC_CC_SILENCE_INIT(NULL);
+		WRITE(hook, safety_check_abort_hook_t);
+		safety_check_set_abort(hook);
+	}
+	ret = 0;
+label_return:
+	return ret;
+}
+
 /******************************************************************************/
 
 CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats->allocated, size_t)
diff --git a/src/safety_check.c b/src/safety_check.c
index 552b3121..209fdda9 100644
--- a/src/safety_check.c
+++ b/src/safety_check.c
@@ -1,7 +1,7 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
-static void (*safety_check_abort)(const char *message);
+static safety_check_abort_hook_t safety_check_abort;
 
 void safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr,
     size_t true_size, size_t input_size) {
@@ -15,7 +15,7 @@ void safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr,
 	    true_size, input_size, ptr, src);
 }
 
-void safety_check_set_abort(void (*abort_fn)(const char *)) {
+void safety_check_set_abort(safety_check_abort_hook_t abort_fn) {
 	safety_check_abort = abort_fn;
 }
 
diff --git a/test/integration/cpp/infallible_new_true.cpp b/test/integration/cpp/infallible_new_true.cpp
index 9b943bd4..d6754128 100644
--- a/test/integration/cpp/infallible_new_true.cpp
+++ b/test/integration/cpp/infallible_new_true.cpp
@@ -1,55 +1,61 @@
 #include <stdio.h>
 
-/*
- * We can't test C++ in unit tests, and we can't change the safety check failure
- * hook in integration tests.  So we check that we *actually* abort on failure,
- * by forking and checking the child process exit code.
- */
-
-/* It's a unix system? */
-#ifdef __unix__
-/* I know this! */
-#include <sys/types.h>
-#include <unistd.h>
-#include <sys/wait.h>
-static const bool can_fork = true;
-#else
-static const bool can_fork = false;
-#endif
-
 #include "test/jemalloc_test.h"
 
-TEST_BEGIN(test_failing_alloc) {
-	test_skip_if(!can_fork);
-#ifdef __unix__
-	pid_t pid = fork();
-	expect_d_ne(pid, -1, "Unexpected fork failure");
-	if (pid == 0) {
-		/*
-		 * In the child, we'll print an error message to stderr before
-		 * exiting.  Close stderr to avoid spamming output for this
-		 * expected failure.
-		 */
-		fclose(stderr);
-		try {
-			/* Too big of an allocation to succeed. */
-			void *volatile ptr = ::operator new((size_t)-1);
-			(void)ptr;
-		} catch (...) {
-			/*
-			 * Swallow the exception; remember, we expect this to
-			 * fail via an abort within new, not because an
-			 * exception didn't get caught.
-			 */
-		}
-	} else {
-		int status;
-		pid_t err = waitpid(pid, &status, 0);
-		expect_d_ne(-1, err, "waitpid failure");
-		expect_false(WIFEXITED(status),
-		    "Should have seen an abnormal failure");
+/*
+ * We can't test C++ in unit tests.  In order to intercept abort, use a secret
+ * safety check abort hook in integration tests.
+ */
+typedef void (*abort_hook_t)(const char *message);
+bool fake_abort_called;
+void fake_abort(const char *message) {
+	if (strcmp(message, "<jemalloc>: Allocation failed and "
+	    "opt.experimental_infallible_new is true. Aborting.\n") != 0) {
+		abort();
 	}
+	fake_abort_called = true;
+}
+
+static bool
+own_operator_new(void) {
+	uint64_t before, after;
+	size_t sz = sizeof(before);
+
+	/* thread.allocated is always available, even w/o config_stats. */
+	expect_d_eq(mallctl("thread.allocated", (void *)&before, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure reading stats");
+	void *volatile ptr = ::operator new((size_t)8);
+	expect_ptr_not_null(ptr, "Unexpected allocation failure");
+	expect_d_eq(mallctl("thread.allocated", (void *)&after, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure reading stats");
+
+	return (after != before);
+}
+
+TEST_BEGIN(test_failing_alloc) {
+	abort_hook_t abort_hook = &fake_abort;
+	expect_d_eq(mallctl("experimental.hooks.safety_check_abort", NULL, NULL,
+	    (void *)&abort_hook, sizeof(abort_hook)), 0,
+	    "Unexpected mallctl failure setting abort hook");
+
+	/*
+	 * Not owning operator new is only expected to happen on MinGW which
+	 * does not support operator new / delete replacement.
+	 */
+#ifdef _WIN32
+	test_skip_if(!own_operator_new());
+#else
+	expect_true(own_operator_new(), "No operator new overload");
 #endif
+	void *volatile ptr = (void *)1;
+	try {
+		/* Too big of an allocation to succeed. */
+		ptr = ::operator new((size_t)-1);
+	} catch (...) {
+		abort();
+	}
+	expect_ptr_null(ptr, "Allocation should have failed");
+	expect_b_eq(fake_abort_called, true, "Abort hook not invoked");
 }
 TEST_END
 

From ceca07d2ca95f7c2680263f3c679ba3f611d5ffb Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 25 Apr 2022 14:17:52 -0700
Subject: [PATCH 2207/2608] Correct the name of stats.mutexes.prof_thds_data in
 doc.

---
 doc/jemalloc.xml.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index ce7acd9f..12dc5710 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -2733,7 +2733,7 @@ struct extent_hooks_s {
         counters</link>.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="stats.mutexes.prof_tdatas">
+      <varlistentry id="stats.mutexes.prof_thds_data">
         <term>
           <mallctl>stats.mutexes.prof_thds_data.{counter}</mallctl>
 	  (<type>counter specific type</type>) <literal>r-</literal>

From f5e840bbf0213d86ae3d0a915df8abd03d75cdf6 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 27 Apr 2022 18:16:11 -0700
Subject: [PATCH 2208/2608] Minor typo fix in doc.

---
 doc/jemalloc.xml.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 12dc5710..5c12f1d4 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1345,7 +1345,7 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         maximum, size classes up to 8 MiB can be cached.  The default maximum is
         32 KiB (2^15).  As a convenience, this may also be set by specifying
         lg_tcache_max, which will be taken to be the base-2 logarithm of the
-        setting of tcache_max</para></listitem>
+        setting of tcache_max.</para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.thp">

From 254b011915c0c68549beb7a91be02cf56d81fa32 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 28 Apr 2022 17:40:37 -0700
Subject: [PATCH 2209/2608] Small doc tweak of opt.trust_madvise.

Avoid quoted enabled and disabled because it's a bool type instead of char *.
---
 doc/jemalloc.xml.in | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 5c12f1d4..fe4ded96 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -972,10 +972,9 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
           (<type>bool</type>)
           <literal>r-</literal>
         </term>
-        <listitem><para>Do not perform runtime check for MADV_DONTNEED, to
-        check that it actually zeros pages.  The default is
-        <quote>disabled</quote> on linux and <quote>enabled</quote> elsewhere.
-        </para></listitem>
+        <listitem><para>If true, do not perform runtime check for MADV_DONTNEED,
+        to check that it actually zeros pages.  The default is disabled on Linux
+        and enabled elsewhere.</para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.retain">

From a7d73dd4c9ba97bb033f7ae15f218a65d8b8ace6 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 3 May 2022 14:28:30 -0700
Subject: [PATCH 2210/2608] Update TUNING.md to include the new tcache_max
 option.

---
 TUNING.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/TUNING.md b/TUNING.md
index 34fca05b..e96399d7 100644
--- a/TUNING.md
+++ b/TUNING.md
@@ -1,5 +1,5 @@
 This document summarizes the common approaches for performance fine tuning with
-jemalloc (as of 5.1.0).  The default configuration of jemalloc tends to work
+jemalloc (as of 5.3.0).  The default configuration of jemalloc tends to work
 reasonably well in practice, and most applications should not have to tune any
 options. However, in order to cover a wide range of applications and avoid
 pathological cases, the default setting is sometimes kept conservative and
@@ -76,14 +76,14 @@ Examples:
 
 * High resource consumption application, prioritizing memory usage:
 
-    `background_thread:true` combined with shorter decay time (decreased
-    `dirty_decay_ms` and / or `muzzy_decay_ms`,
+    `background_thread:true,tcache_max:4096` combined with shorter decay time
+    (decreased `dirty_decay_ms` and / or `muzzy_decay_ms`,
     e.g. `dirty_decay_ms:5000,muzzy_decay_ms:5000`), and lower arena count
     (e.g. number of CPUs).
 
 * Low resource consumption application:
 
-    `narenas:1,lg_tcache_max:13` combined with shorter decay time (decreased
+    `narenas:1,tcache_max:1024` combined with shorter decay time (decreased
     `dirty_decay_ms` and / or `muzzy_decay_ms`,e.g.
     `dirty_decay_ms:1000,muzzy_decay_ms:0`).
 

From 66c889500a20e6493a6768de6eaa7347daf61483 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 4 May 2022 11:38:57 -0700
Subject: [PATCH 2211/2608] Make test/unit/background_thread_enable more
 conservative.

To avoid resource exhaustion on 32-bit platforms.
---
 test/unit/background_thread_enable.c | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/test/unit/background_thread_enable.c b/test/unit/background_thread_enable.c
index 46776f36..44034ac6 100644
--- a/test/unit/background_thread_enable.c
+++ b/test/unit/background_thread_enable.c
@@ -2,12 +2,8 @@
 
 const char *malloc_conf = "background_thread:false,narenas:1,max_background_threads:20";
 
-TEST_BEGIN(test_deferred) {
-	test_skip_if(!have_background_thread);
-
-	unsigned id;
-	size_t sz_u = sizeof(unsigned);
-
+static unsigned
+max_test_narenas(void) {
 	/*
 	 * 10 here is somewhat arbitrary, except insofar as we want to ensure
 	 * that the number of background threads is smaller than the number of
@@ -15,7 +11,22 @@ TEST_BEGIN(test_deferred) {
 	 * cpu to handle background purging, so this is a conservative
 	 * approximation.
 	 */
-	for (unsigned i = 0; i < 10 * ncpus; i++) {
+	unsigned ret = 10 * ncpus;
+	/* Limit the max to avoid VM exhaustion on 32-bit . */
+	if (ret > 512) {
+		ret = 512;
+	}
+
+	return ret;
+}
+
+TEST_BEGIN(test_deferred) {
+	test_skip_if(!have_background_thread);
+
+	unsigned id;
+	size_t sz_u = sizeof(unsigned);
+
+	for (unsigned i = 0; i < max_test_narenas(); i++) {
 		expect_d_eq(mallctl("arenas.create", &id, &sz_u, NULL, 0), 0,
 		    "Failed to create arena");
 	}
@@ -50,7 +61,7 @@ TEST_BEGIN(test_max_background_threads) {
 	unsigned id;
 	size_t sz_u = sizeof(unsigned);
 
-	for (unsigned i = 0; i < 10 * ncpus; i++) {
+	for (unsigned i = 0; i < max_test_narenas(); i++) {
 		expect_d_eq(mallctl("arenas.create", &id, &sz_u, NULL, 0), 0,
 		    "Failed to create arena");
 	}

From 8cb814629acc7c7a8c1008f47e35d3f40129f5fa Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 3 May 2022 15:41:43 -0700
Subject: [PATCH 2212/2608] Make the default option of zero realloc match the
 system allocator.

---
 configure.ac                                      |  9 +++++++++
 doc/jemalloc.xml.in                               | 15 ++++++++-------
 .../jemalloc/internal/jemalloc_internal_defs.h.in |  3 +++
 src/jemalloc.c                                    |  7 ++++++-
 4 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/configure.ac b/configure.ac
index 5c7a8ef8..f6d25f33 100644
--- a/configure.ac
+++ b/configure.ac
@@ -638,6 +638,7 @@ dnl Define cpp macros in CPPFLAGS, rather than doing AC_DEFINE(macro), since the
 dnl definitions need to be seen before any headers are included, which is a pain
 dnl to make happen otherwise.
 default_retain="0"
+zero_realloc_default_free="0"
 maps_coalesce="1"
 DUMP_SYMS="${NM} -a"
 SYM_PREFIX=""
@@ -684,6 +685,7 @@ case "${host}" in
 	if test "${LG_SIZEOF_PTR}" = "3"; then
 	  default_retain="1"
 	fi
+	zero_realloc_default_free="1"
 	;;
   *-*-linux*)
 	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
@@ -698,6 +700,7 @@ case "${host}" in
 	if test "${LG_SIZEOF_PTR}" = "3"; then
 	  default_retain="1"
 	fi
+	zero_realloc_default_free="1"
 	;;
   *-*-kfreebsd*)
 	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
@@ -773,6 +776,7 @@ case "${host}" in
 	if test "${LG_SIZEOF_PTR}" = "3"; then
 	  default_retain="1"
 	fi
+	zero_realloc_default_free="1"
 	;;
   *-*-nto-qnx)
 	abi="elf"
@@ -1395,6 +1399,11 @@ if test "x$default_retain" = "x1" ; then
   AC_DEFINE([JEMALLOC_RETAIN], [ ], [ ])
 fi
 
+dnl Indicate whether realloc(ptr, 0) defaults to the "alloc" behavior.
+if test "x$zero_realloc_default_free" = "x1" ; then
+  AC_DEFINE([JEMALLOC_ZERO_REALLOC_DEFAULT_FREE], [ ], [ ])
+fi
+
 dnl Enable allocation from DSS if supported by the OS.
 have_dss="1"
 dnl Check whether the BSD/SUSv1 sbrk() exists.  If not, disable DSS support.
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index fe4ded96..e28e8f38 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1578,13 +1578,14 @@ malloc_conf = "xmalloc:true";]]></programlisting>
           <literal>r-</literal>
         </term>
         <listitem><para> Determines the behavior of
-	<function>realloc()</function> when passed a value of zero for the new
-	size.  <quote>alloc</quote> treats this as an allocation of size zero
-	(and returns a non-null result except in case of resource exhaustion).
-	<quote>free</quote> treats this as a deallocation of the pointer, and
-	returns <constant>NULL</constant> without setting
-	<varname>errno</varname>.  <quote>abort</quote> aborts the process if
-	zero is passed.  The default is <quote>alloc</quote>.</para>
+        <function>realloc()</function> when passed a value of zero for the new
+        size.  <quote>alloc</quote> treats this as an allocation of size zero
+        (and returns a non-null result except in case of resource exhaustion).
+        <quote>free</quote> treats this as a deallocation of the pointer, and
+        returns <constant>NULL</constant> without setting
+        <varname>errno</varname>.  <quote>abort</quote> aborts the process if
+        zero is passed.  The default is <quote>free</quote> on Linux and
+        Windows, and <quote>alloc</quote> elsewhere.</para>
 
 	<para>There is considerable divergence of behaviors across
 	implementations in handling this case. Many have the behavior of
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 0cb15d3e..3588072f 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -421,4 +421,7 @@
 /* Darwin VM_MAKE_TAG support */
 #undef JEMALLOC_HAVE_VM_MAKE_TAG
 
+/* If defined, realloc(ptr, 0) defaults to "free" instead of "alloc". */
+#undef JEMALLOC_ZERO_REALLOC_DEFAULT_FREE
+
 #endif /* JEMALLOC_INTERNAL_DEFS_H_ */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 9c94425c..7655de4e 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -112,7 +112,12 @@ bool opt_cache_oblivious =
     ;
 
 zero_realloc_action_t opt_zero_realloc_action =
-    zero_realloc_action_alloc;
+#ifdef JEMALLOC_ZERO_REALLOC_DEFAULT_FREE
+    zero_realloc_action_free
+#else
+    zero_realloc_action_alloc
+#endif
+    ;
 
 atomic_zu_t zero_realloc_count = ATOMIC_INIT(0);
 

From 304c919829f9f340669b61fa64867cfe5dba8021 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 27 Apr 2022 18:05:07 -0700
Subject: [PATCH 2213/2608] Update ChangeLog for 5.3.0.

---
 ChangeLog | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 100 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index e55813b7..32fde562 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,106 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
+* 5.3.0 (May 6, 2022)
+
+  This release contains many speed and space optimizations, from micro
+  optimizations on common paths to rework of internal data structures and
+  locking schemes, and many more too detailed to list below.  Multiple percent
+  of system level metric improvements were measured in tested production
+  workloads.  The release has gone through large-scale production testing.
+
+  New features:
+  - Add the thread.idle mallctl which hints that the calling thread will be
+    idle for a nontrivial period of time.  (@davidtgoldblatt)
+  - Allow small size classes to be the maximum size class to cache in the
+    thread-specific cache, through the opt.[lg_]tcache_max option.  (@interwq,
+    @jordalgo)
+  - Make the behavior of realloc(ptr, 0) configurable with opt.zero_realloc.
+    (@davidtgoldblatt)
+  - Add 'make uninstall' support.  (@sangshuduo, @Lapenkov)
+  - Support C++17 over-aligned allocation.  (@marksantaniello)
+  - Add the thread.peak mallctl for approximate per-thread peak memory tracking.
+    (@davidtgoldblatt)
+  - Add interval-based stats output opt.stats_interval.  (@interwq)
+  - Add prof.prefix to override filename prefixes for dumps.  (@zhxchen17)
+  - Add high resolution timestamp support for profiling.  (@tyroguru)
+  - Add the --collapsed flag to jeprof for flamegraph generation.
+    (@igorwwwwwwwwwwwwwwwwwwww)
+  - Add the --debug-syms-by-id option to jeprof for debug symbols discovery.
+    (@DeannaGelbart)
+  - Add the opt.prof_leak_error option to exit with error code when leak is
+    detected using opt.prof_final.  (@yunxuo)
+  - Add opt.cache_oblivious as an runtime alternative to config.cache_oblivious.
+    (@interwq)
+  - Add mallctl interfaces:
+    + opt.zero_realloc  (@davidtgoldblatt)
+    + opt.cache_oblivious  (@interwq)
+    + opt.prof_leak_error  (@yunxuo)
+    + opt.stats_interval  (@interwq)
+    + opt.stats_interval_opts  (@interwq)
+    + opt.tcache_max  (@interwq)
+    + opt.trust_madvise  (@azat)
+    + prof.prefix  (@zhxchen17)
+    + stats.zero_reallocs  (@davidtgoldblatt)
+    + thread.idle  (@davidtgoldblatt)
+    + thread.peak.{read,reset}  (@davidtgoldblatt)
+
+  Bug fixes:
+  - Fix the synchronization around explicit tcache creation which could cause
+    invalid tcache identifiers.  This regression was first released in 5.0.0.
+    (@yoshinorim, @davidtgoldblatt)
+  - Fix a profiling biasing issue which could cause incorrect heap usage and
+    object counts.  This issue existed in all previous releases with the heap
+    profiling feature.  (@davidtgoldblatt)
+  - Fix the order of stats counter updating on large realloc which could cause
+    failed assertions.  This regression was first released in 5.0.0.  (@azat)
+  - Fix the locking on the arena destroy mallctl, which could cause concurrent
+    arena creations to fail.  This functionality was first introduced in 5.0.0.
+    (@interwq)
+
+  Portability improvements:
+  - Remove nothrow from system function declarations on macOS and FreeBSD.
+    (@davidtgoldblatt, @fredemmott, @leres)
+  - Improve overcommit and page alignment settings on NetBSD.  (@zoulasc)
+  - Improve CPU affinity support on BSD platforms.  (@devnexen)
+  - Improve utrace detection and support.  (@devnexen)
+  - Improve QEMU support with MADV_DONTNEED zeroed pages detection.  (@azat)
+  - Add memcntl support on Solaris / illumos.  (@devnexen)
+  - Improve CPU_SPINWAIT on ARM.  (@AWSjswinney)
+  - Improve TSD cleanup on FreeBSD.  (@Lapenkov)
+  - Disable percpu_arena if the CPU count cannot be reliably detected.  (@azat)
+  - Add malloc_size(3) override support.  (@devnexen)
+  - Add mmap VM_MAKE_TAG support.  (@devnexen)
+  - Add support for MADV_[NO]CORE.  (@devnexen)
+  - Add support for DragonFlyBSD.  (@devnexen)
+  - Fix the QUANTUM setting on MIPS64.  (@brooksdavis)
+  - Add the QUANTUM setting for ARC.  (@vineetgarc)
+  - Add the QUANTUM setting for LoongArch.  (@wangjl-uos)
+  - Add QNX support.  (@jqian-aurora)
+  - Avoid atexit(3) calls unless the relevant profiling features are enabled.
+    (@BusyJay, @laiwei-rice, @interwq)
+  - Fix unknown option detection when using Clang.  (@Lapenkov)
+  - Fix symbol conflict with musl libc.  (@georgthegreat)
+  - Add -Wimplicit-fallthrough checks.  (@nickdesaulniers)
+  - Add __forceinline support on MSVC.  (@santagada)
+  - Improve FreeBSD and Windows CI support.  (@Lapenkov)
+  - Add CI support for PPC64LE architecture.  (@ezeeyahoo)
+
+  Incompatible changes:
+  - Maximum size class allowed in tcache (opt.[lg_]tcache_max) now has an upper
+    bound of 8MiB.  (@interwq)
+
+  Optimizations and refactors (@davidtgoldblatt, @Lapenkov, @interwq):
+  - Optimize the common cases of the thread cache operations.
+  - Optimize internal data structures, including RB tree and pairing heap.
+  - Optimize the internal locking on extent management.
+  - Extract and refactor the internal page allocator and interface modules.
+
+  Documentation:
+  - Fix doc build with --with-install-suffix.  (@lawmurray, @interwq)
+  - Add PROFILING_INTERNALS.md.  (@davidtgoldblatt)
+  - Ensure the proper order of doc building and installation.  (@Mingli-Yu)
+
 * 5.2.1 (August 5, 2019)
 
   This release is primarily about Windows.  A critical virtual memory leak is

From 70d4102f48dce2d5755e9139a15eeec606f97bff Mon Sep 17 00:00:00 2001
From: Yuriy Chernyshov <thegeorg@yandex-team.com>
Date: Mon, 9 May 2022 15:36:49 +0300
Subject: [PATCH 2214/2608] Fix compiling edata.h with MSVC

At the time an attempt to compile jemalloc 5.3.0 with MSVC 2019 results in the followin error message:

> jemalloc/include/jemalloc/internal/edata.h:660: error C4576: a parenthesized type followed by an initializer list is a non-standard explicit type conversion syntax
---
 include/jemalloc/internal/edata.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index af039ea7..e77a55e6 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -656,8 +656,10 @@ edata_ead_comp(const edata_t *a, const edata_t *b) {
 
 static inline edata_cmp_summary_t
 edata_cmp_summary_get(const edata_t *edata) {
-	return (edata_cmp_summary_t){edata_sn_get(edata),
-		(uintptr_t)edata_addr_get(edata)};
+	edata_cmp_summary_t result;
+	result.sn = edata_sn_get(edata);
+	result.addr = (uintptr_t)edata_addr_get(edata);
+	return result;
 }
 
 static inline int

From cd5aaf308a46ce8ad0232ee9efb697b4ed33a7e4 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 17 May 2022 13:11:44 -0700
Subject: [PATCH 2215/2608] Improve the failure message upon
 opt_experimental_infallible_new.

---
 src/jemalloc_cpp.cpp                         | 10 ++++++++--
 test/integration/cpp/infallible_new_true.cpp |  4 ++--
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index 451655f1..8b53a392 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -57,8 +57,14 @@ JEMALLOC_NOINLINE
 static void *
 handleOOM(std::size_t size, bool nothrow) {
 	if (opt_experimental_infallible_new) {
-		safety_check_fail("<jemalloc>: Allocation failed and "
-		    "opt.experimental_infallible_new is true. Aborting.\n");
+		const char *huge_warning = (size >= ((std::size_t)1 << 30)) ?
+		    "This may be caused by heap corruption, if the large size "
+		    "is unexpected (suggest building with sanitizers for "
+		    "debugging)." : "";
+
+		safety_check_fail("<jemalloc>: Allocation of size %zu failed. "
+		    "%s opt.experimental_infallible_new is true. Aborting.\n",
+		    size, huge_warning);
 		return nullptr;
 	}
 
diff --git a/test/integration/cpp/infallible_new_true.cpp b/test/integration/cpp/infallible_new_true.cpp
index d6754128..3976f08b 100644
--- a/test/integration/cpp/infallible_new_true.cpp
+++ b/test/integration/cpp/infallible_new_true.cpp
@@ -9,8 +9,8 @@
 typedef void (*abort_hook_t)(const char *message);
 bool fake_abort_called;
 void fake_abort(const char *message) {
-	if (strcmp(message, "<jemalloc>: Allocation failed and "
-	    "opt.experimental_infallible_new is true. Aborting.\n") != 0) {
+	const char *expected_start = "<jemalloc>: Allocation of size";
+	if (strncmp(message, expected_start, strlen(expected_start) != 0)) {
 		abort();
 	}
 	fake_abort_called = true;

From 5b1f2cc5d79672e0d8852da1b705d68a74d22cd4 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Tue, 19 Apr 2022 19:51:27 -0700
Subject: [PATCH 2216/2608] Implement pvalloc replacement

Despite being an obsolete function, pvalloc is still present in GLIBC and should
work correctly when jemalloc replaces libc allocator.
---
 configure.ac                                  |  6 +++
 include/jemalloc/internal/hook.h              |  1 +
 .../internal/jemalloc_internal_defs.h.in      |  1 +
 include/jemalloc/jemalloc_defs.h.in           |  1 +
 include/jemalloc/jemalloc_protos.h.in         |  6 +++
 src/jemalloc.c                                | 46 +++++++++++++++++++
 test/unit/hook.c                              | 14 ++++++
 7 files changed, 75 insertions(+)

diff --git a/configure.ac b/configure.ac
index f6d25f33..8248f52d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1064,6 +1064,9 @@ AC_CHECK_FUNC([memalign],
 AC_CHECK_FUNC([valloc],
 	      [AC_DEFINE([JEMALLOC_OVERRIDE_VALLOC], [ ], [ ])
 	       public_syms="${public_syms} valloc"])
+AC_CHECK_FUNC([pvalloc],
+	      [AC_DEFINE([JEMALLOC_OVERRIDE_PVALLOC], [ ], [ ])
+	       public_syms="${public_syms} pvalloc"])
 AC_CHECK_FUNC([malloc_size],
 	      [AC_DEFINE([JEMALLOC_HAVE_MALLOC_SIZE], [ ], [ ])
 	       public_syms="${public_syms} malloc_size"])
@@ -1089,6 +1092,9 @@ if test "x${JEMALLOC_PREFIX}" = "x" ; then
   AC_CHECK_FUNC([__libc_valloc],
 		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_VALLOC], [ ], [ ])
 		 wrap_syms="${wrap_syms} __libc_valloc"])
+  AC_CHECK_FUNC([__libc_pvalloc],
+		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_PVALLOC], [ ], [ ])
+		 wrap_syms="${wrap_syms} __libc_pvalloc"])
   AC_CHECK_FUNC([__posix_memalign],
 		[AC_DEFINE([JEMALLOC_OVERRIDE___POSIX_MEMALIGN], [ ], [ ])
 		 wrap_syms="${wrap_syms} __posix_memalign"])
diff --git a/include/jemalloc/internal/hook.h b/include/jemalloc/internal/hook.h
index ee246b1e..af03d2f5 100644
--- a/include/jemalloc/internal/hook.h
+++ b/include/jemalloc/internal/hook.h
@@ -55,6 +55,7 @@ enum hook_alloc_e {
 	hook_alloc_calloc,
 	hook_alloc_memalign,
 	hook_alloc_valloc,
+	hook_alloc_pvalloc,
 	hook_alloc_mallocx,
 
 	/* The reallocating functions have both alloc and dalloc variants */
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 3588072f..888ef470 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -18,6 +18,7 @@
 #undef JEMALLOC_OVERRIDE___LIBC_MEMALIGN
 #undef JEMALLOC_OVERRIDE___LIBC_REALLOC
 #undef JEMALLOC_OVERRIDE___LIBC_VALLOC
+#undef JEMALLOC_OVERRIDE___LIBC_PVALLOC
 #undef JEMALLOC_OVERRIDE___POSIX_MEMALIGN
 
 /*
diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in
index cbe2fca6..77d9d3b5 100644
--- a/include/jemalloc/jemalloc_defs.h.in
+++ b/include/jemalloc/jemalloc_defs.h.in
@@ -25,6 +25,7 @@
  */
 #undef JEMALLOC_OVERRIDE_MEMALIGN
 #undef JEMALLOC_OVERRIDE_VALLOC
+#undef JEMALLOC_OVERRIDE_PVALLOC
 
 /*
  * At least Linux omits the "const" in:
diff --git a/include/jemalloc/jemalloc_protos.h.in b/include/jemalloc/jemalloc_protos.h.in
index 356221cc..3f9fc848 100644
--- a/include/jemalloc/jemalloc_protos.h.in
+++ b/include/jemalloc/jemalloc_protos.h.in
@@ -69,3 +69,9 @@ JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
     void JEMALLOC_SYS_NOTHROW	*@je_@valloc(size_t size) JEMALLOC_CXX_THROW
     JEMALLOC_ATTR(malloc);
 #endif
+
+#ifdef JEMALLOC_OVERRIDE_PVALLOC
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+    void JEMALLOC_SYS_NOTHROW	*@je_@pvalloc(size_t size) JEMALLOC_CXX_THROW
+    JEMALLOC_ATTR(malloc);
+#endif
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 7655de4e..68db1f36 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3250,6 +3250,49 @@ je_valloc(size_t size) {
 }
 #endif
 
+#ifdef JEMALLOC_OVERRIDE_PVALLOC
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
+void JEMALLOC_NOTHROW *
+JEMALLOC_ATTR(malloc)
+je_pvalloc(size_t size) {
+	void *ret;
+
+	static_opts_t sopts;
+	dynamic_opts_t dopts;
+
+	LOG("core.pvalloc.entry", "size: %zu\n", size);
+
+	static_opts_init(&sopts);
+	dynamic_opts_init(&dopts);
+
+	sopts.null_out_result_on_error = true;
+	sopts.min_alignment = PAGE;
+	sopts.oom_string =
+	    "<jemalloc>: Error allocating aligned memory: out of memory\n";
+	sopts.invalid_alignment_string =
+	    "<jemalloc>: Error allocating aligned memory: invalid alignment\n";
+
+	dopts.result = &ret;
+	dopts.num_items = 1;
+	/*
+	 * This is the only difference from je_valloc - size is rounded up to
+	 * a PAGE multiple.
+	 */
+	dopts.item_size = PAGE_CEILING(size);
+	dopts.alignment = PAGE;
+
+	imalloc(&sopts, &dopts);
+	if (sopts.slow) {
+		uintptr_t args[3] = {size};
+		hook_invoke_alloc(hook_alloc_pvalloc, ret, (uintptr_t)ret,
+		    args);
+	}
+
+	LOG("core.pvalloc.exit", "result: %p\n", ret);
+	return ret;
+}
+#endif
+
 #if defined(JEMALLOC_IS_MALLOC) && defined(JEMALLOC_GLIBC_MALLOC_HOOK)
 /*
  * glibc provides the RTLD_DEEPBIND flag for dlopen which can make it possible
@@ -3297,6 +3340,9 @@ void *__libc_realloc(void* ptr, size_t size) PREALIAS(je_realloc);
 #    ifdef JEMALLOC_OVERRIDE___LIBC_VALLOC
 void *__libc_valloc(size_t size) PREALIAS(je_valloc);
 #    endif
+#    ifdef JEMALLOC_OVERRIDE___LIBC_PVALLOC
+void *__libc_pvalloc(size_t size) PREALIAS(je_pvalloc);
+#    endif
 #    ifdef JEMALLOC_OVERRIDE___POSIX_MEMALIGN
 int __posix_memalign(void** r, size_t a, size_t s) PREALIAS(je_posix_memalign);
 #    endif
diff --git a/test/unit/hook.c b/test/unit/hook.c
index 16a6f1b0..36dbd269 100644
--- a/test/unit/hook.c
+++ b/test/unit/hook.c
@@ -313,6 +313,20 @@ TEST_BEGIN(test_hooks_alloc_simple) {
 	free(ptr);
 #endif /* JEMALLOC_OVERRIDE_VALLOC */
 
+	/* pvalloc */
+#ifdef JEMALLOC_OVERRIDE_PVALLOC
+	reset();
+	ptr = pvalloc(1);
+	expect_d_eq(call_count, 1, "Hook not called");
+	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
+	expect_d_eq(arg_type, (int)hook_alloc_pvalloc, "Wrong hook type");
+	expect_ptr_eq(ptr, arg_result, "Wrong result");
+	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
+	    "Wrong raw result");
+	expect_u64_eq((uintptr_t)1, arg_args_raw[0], "Wrong argument");
+	free(ptr);
+#endif /* JEMALLOC_OVERRIDE_PVALLOC */
+
 	/* mallocx */
 	reset();
 	ptr = mallocx(1, MALLOCX_LG_ALIGN(10));

From 70e3735f3a71d3e05faa05c58ff3ca82ebaad908 Mon Sep 17 00:00:00 2001
From: barracuda156 <vital.had@gmail.com>
Date: Sat, 21 May 2022 23:36:15 +0800
Subject: [PATCH 2217/2608] jemalloc: fix PowerPC definitions in quantum.h

---
 include/jemalloc/internal/quantum.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/quantum.h b/include/jemalloc/internal/quantum.h
index c22d753a..a97f54ca 100644
--- a/include/jemalloc/internal/quantum.h
+++ b/include/jemalloc/internal/quantum.h
@@ -49,7 +49,7 @@
 #  ifdef __or1k__
 #    define LG_QUANTUM		3
 #  endif
-#  ifdef __powerpc__
+#  if defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__) || defined(__ppc64__)
 #    define LG_QUANTUM		4
 #  endif
 #  if defined(__riscv) || defined(__riscv__)

From df7ad8a9b6121c5c4b15bad5606b51bf734416a6 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Tue, 7 Jun 2022 12:25:44 -0700
Subject: [PATCH 2218/2608] Revert "Echo installed files via verbose 'install'
 command"

This reverts commit f15d8f3b416f6812ac030bc1a7aacf05927a4d7f. "install -v"
turned out to be not portable and not work on NetBSD.
---
 Makefile.in | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index 1193cd85..6809fb29 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -555,18 +555,20 @@ endif
 install_bin:
 	$(INSTALL) -d $(BINDIR)
 	@for b in $(BINS); do \
-	$(INSTALL) -v -m 755 $$b $(BINDIR); \
+	echo "$(INSTALL) -m 755 $$b $(BINDIR)"; \
+	$(INSTALL) -m 755 $$b $(BINDIR); \
 done
 
 install_include:
 	$(INSTALL) -d $(INCLUDEDIR)/jemalloc
 	@for h in $(C_HDRS); do \
-	$(INSTALL) -v -m 644 $$h $(INCLUDEDIR)/jemalloc; \
+	echo "$(INSTALL) -m 644 $$h $(INCLUDEDIR)/jemalloc"; \
+	$(INSTALL) -m 644 $$h $(INCLUDEDIR)/jemalloc; \
 done
 
 install_lib_shared: $(DSOS)
 	$(INSTALL) -d $(LIBDIR)
-	$(INSTALL) -v -m 755 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) $(LIBDIR)
+	$(INSTALL) -m 755 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) $(LIBDIR)
 ifneq ($(SOREV),$(SO))
 	ln -sf $(LIBJEMALLOC).$(SOREV) $(LIBDIR)/$(LIBJEMALLOC).$(SO)
 endif
@@ -574,13 +576,15 @@ endif
 install_lib_static: $(STATIC_LIBS)
 	$(INSTALL) -d $(LIBDIR)
 	@for l in $(STATIC_LIBS); do \
-	$(INSTALL) -v -m 755 $$l $(LIBDIR); \
+	echo "$(INSTALL) -m 755 $$l $(LIBDIR)"; \
+	$(INSTALL) -m 755 $$l $(LIBDIR); \
 done
 
 install_lib_pc: $(PC)
 	$(INSTALL) -d $(LIBDIR)/pkgconfig
 	@for l in $(PC); do \
-	$(INSTALL) -v -m 644 $$l $(LIBDIR)/pkgconfig; \
+	echo "$(INSTALL) -m 644 $$l $(LIBDIR)/pkgconfig"; \
+	$(INSTALL) -m 644 $$l $(LIBDIR)/pkgconfig; \
 done
 
 ifeq ($(enable_shared), 1)
@@ -594,13 +598,15 @@ install_lib: install_lib_pc
 install_doc_html: build_doc_html
 	$(INSTALL) -d $(DATADIR)/doc/jemalloc$(install_suffix)
 	@for d in $(DOCS_HTML); do \
-	$(INSTALL) -v -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix); \
+	echo "$(INSTALL) -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix)"; \
+	$(INSTALL) -m 644 $$d $(DATADIR)/doc/jemalloc$(install_suffix); \
 done
 
 install_doc_man: build_doc_man
 	$(INSTALL) -d $(MANDIR)/man3
 	@for d in $(DOCS_MAN3); do \
-	$(INSTALL) -v -m 644 $$d $(MANDIR)/man3; \
+	echo "$(INSTALL) -m 644 $$d $(MANDIR)/man3"; \
+	$(INSTALL) -m 644 $$d $(MANDIR)/man3; \
 done
 
 install_doc: install_doc_html install_doc_man

From df8f7d10af15d549ab73ba807b2e14a9d7fe1cc2 Mon Sep 17 00:00:00 2001
From: David Carlier <devnexen@gmail.com>
Date: Fri, 20 May 2022 20:14:33 +0100
Subject: [PATCH 2219/2608] Implement malloc_getcpu for amd64 and arm64 macOS

This enables per CPU arena on MacOS
---
 configure.ac                                    | 17 +++++++++++++++++
 .../internal/jemalloc_internal_inlines_a.h      |  9 +++++++++
 2 files changed, 26 insertions(+)

diff --git a/configure.ac b/configure.ac
index 8248f52d..66eb7c91 100644
--- a/configure.ac
+++ b/configure.ac
@@ -510,6 +510,23 @@ typedef unsigned __int32 uint32_t;
       else
         AC_MSG_ERROR([cannot determine number of significant virtual address bits])
       fi
+      AC_CACHE_CHECK([rdtscp support],
+		     [je_cv_rdtscp],
+		     AC_RUN_IFELSE([AC_LANG_PROGRAM(
+[[
+#include <stdint.h>
+]],
+[[
+      unsigned int dx;
+      asm volatile("rdtscp" : "=d"(dx) ::);
+      return 0;
+]])],
+      [je_cv_rdtscp=yes],
+      [je_cv_rdstcp=no],
+      [je_cv_rdtscp=no]))
+      if test "x${je_cv_rdtscp}" = "xyes"; then
+        AC_DEFINE([HAVE_RDTSCP], 1, [])
+      fi
     fi
     ;;
   *)
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index 9e27cc30..7686a9b7 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -14,6 +14,15 @@ malloc_getcpu(void) {
 	return GetCurrentProcessorNumber();
 #elif defined(JEMALLOC_HAVE_SCHED_GETCPU)
 	return (malloc_cpuid_t)sched_getcpu();
+#elif defined(HAVE_RDTSCP)
+	unsigned int ax, cx, dx;
+	asm volatile("rdtscp" : "=a"(ax), "=d"(dx), "=c"(cx) ::);
+	return (malloc_cpuid_t)(dx & 0xfff);
+#elif defined(__aarch64__) && defined(__APPLE__)
+	/* Other oses most likely use tpidr_el0 instead */
+	uintptr_t c;
+	asm volatile("mrs %x0, tpidrro_el0" : "=r"(c) :: "memory");
+	return (malloc_cpuid_t)(c & (1 << 3) - 1);
 #else
 	not_reached();
 	return -1;

From b950934916b2973fd4131ebfb684e53df305001a Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 8 Jun 2022 14:24:55 -0700
Subject: [PATCH 2220/2608] Enable retain by default on macOS.

High number of mappings result in unusually high fork() cost on macOS.  Retain
fixes the issue, at a small cost of extra VM space reserved.
---
 configure.ac | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/configure.ac b/configure.ac
index 66eb7c91..917d9a80 100644
--- a/configure.ac
+++ b/configure.ac
@@ -671,6 +671,9 @@ case "${host}" in
 	SOREV="${rev}.${so}"
 	sbrk_deprecated="1"
 	SYM_PREFIX="_"
+	if test "${LG_SIZEOF_PTR}" = "3"; then
+	  default_retain="1"
+	fi
 	;;
   *-*-freebsd*)
 	JE_APPEND_VS(CPPFLAGS, -D_BSD_SOURCE)

From 4fc5c4fbac156c9f44452d3f30216451711dfa18 Mon Sep 17 00:00:00 2001
From: David Carlier <devnexen@gmail.com>
Date: Wed, 1 Jun 2022 22:04:11 +0100
Subject: [PATCH 2221/2608] New configure option '--enable-pageid' for Linux

The option makes jemalloc use prctl with PR_SET_VMA to tag memory mappings with
"jemalloc_pg" or "jemalloc_pg_overcommit". This allows to easily identify
jemalloc's mappings in /proc/<pid>/maps. PR_SET_VMA is only available in Linux
5.17 and above.
---
 configure.ac                                  | 25 +++++++++++++++++
 .../internal/jemalloc_internal_defs.h.in      |  6 ++++
 src/pages.c                                   | 28 +++++++++++++++++++
 3 files changed, 59 insertions(+)

diff --git a/configure.ac b/configure.ac
index 917d9a80..0ae579ee 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2067,6 +2067,14 @@ if test "x$have_memcntl" = "x1" ; then
   AC_DEFINE([JEMALLOC_HAVE_MEMCNTL], [ ], [ ])
 fi
 
+AC_CHECK_FUNC([prctl],
+	      [have_prctl="1"],
+	      [have_prctl="0"],
+	      )
+if test "x$have_prctl" = "x1" ; then
+  AC_DEFINE([JEMALLOC_HAVE_PRCTL], [ ], [ ])
+fi
+
 dnl Disable lazy locking by default.
 AC_ARG_ENABLE([lazy_lock],
   [AS_HELP_STRING([--enable-lazy-lock],
@@ -2435,6 +2443,22 @@ else
   AC_DEFINE([JEMALLOC_TLS_MODEL], [ ], [ ])
 fi
 
+dnl Do not compile with debugging by default.
+AC_ARG_ENABLE([pageid],
+  [AS_HELP_STRING([--enable-pageid],
+                  [Enable named pages])],
+[if test "x$enable_pageid" = "xno" ; then
+  enable_pageid="0"
+else
+  enable_pageid="1"
+fi
+],
+[enable_pageid="0"]
+)
+if test "x$enable_pageid" = "x1" ; then
+  AC_DEFINE([JEMALLOC_PAGEID], [ ], [ ])
+fi
+
 dnl ============================================================================
 dnl Enable background threads if possible.
 
@@ -2691,5 +2715,6 @@ AC_MSG_RESULT([xmalloc            : ${enable_xmalloc}])
 AC_MSG_RESULT([log                : ${enable_log}])
 AC_MSG_RESULT([lazy_lock          : ${enable_lazy_lock}])
 AC_MSG_RESULT([cache-oblivious    : ${enable_cache_oblivious}])
+AC_MSG_RESULT([pageid             : ${enable_pageid}])
 AC_MSG_RESULT([cxx                : ${enable_cxx}])
 AC_MSG_RESULT([===============================================================================])
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 888ef470..6dbd8780 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -162,6 +162,12 @@
 /* Use gcc intrinsics for profile backtracing if defined. */
 #undef JEMALLOC_PROF_GCC
 
+/* JEMALLOC_PAGEID enabled page id */
+#undef JEMALLOC_PAGEID
+
+/* JEMALLOC_HAVE_PRCTL checks prctl */
+#undef JEMALLOC_HAVE_PRCTL
+
 /*
  * JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage
  * segment (DSS).
diff --git a/src/pages.c b/src/pages.c
index 8c83a7de..b672e4de 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -21,6 +21,13 @@
 #else
 #define PAGES_FD_TAG -1
 #endif
+#ifdef JEMALLOC_HAVE_PRCTL
+#include <sys/prctl.h>
+#ifndef PR_SET_VMA
+#define PR_SET_VMA 0x53564d41
+#define PR_SET_VMA_ANON_NAME 0
+#endif
+#endif
 
 /******************************************************************************/
 /* Data. */
@@ -98,6 +105,22 @@ static int madvise_MADV_DONTNEED_zeroes_pages()
 }
 #endif
 
+#ifdef JEMALLOC_PAGEID
+static int os_page_id(void *addr, size_t size, const char *name)
+{
+#ifdef JEMALLOC_HAVE_PRCTL
+	/*
+	 * While parsing `/proc/<pid>/maps` file, the block could appear as
+	 * 7f4836000000-7f4836800000 rw-p 00000000 00:00 0 [anon:jemalloc_pg_overcommit]`
+	 */
+	return prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, (uintptr_t)addr, size,
+	    (uintptr_t)name);
+#else
+	return 0;
+#endif
+}
+#endif
+
 /******************************************************************************/
 /*
  * Function prototypes for static functions that are referenced prior to
@@ -162,6 +185,11 @@ os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
 #endif
 	assert(ret == NULL || (addr == NULL && ret != addr) || (addr != NULL &&
 	    ret == addr));
+#ifdef JEMALLOC_PAGEID
+	int n = os_page_id(ret, size,
+	    os_overcommits ? "jemalloc_pg_overcommit" : "jemalloc_pg");
+	assert(n == 0 || (n == -1 && get_errno() == EINVAL));
+#endif
 	return ret;
 }
 

From 3713932836db1190ebadd4a0643db2d354b84fa3 Mon Sep 17 00:00:00 2001
From: Alex Lapenkou <lapenkov@fb.com>
Date: Mon, 13 Jun 2022 15:32:33 -0700
Subject: [PATCH 2222/2608] Update building for Windows instructions

Explain how to build for Windows in INSTALL.md and remove another readme.txt in
an obscure location.
---
 INSTALL.md      | 79 +++++++++++++++++++++++++++++++++++++++++++++++++
 msvc/ReadMe.txt | 23 --------------
 2 files changed, 79 insertions(+), 23 deletions(-)
 delete mode 100644 msvc/ReadMe.txt

diff --git a/INSTALL.md b/INSTALL.md
index 90da718d..9dffa646 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -396,6 +396,85 @@ exclusively):
 
     Use this to search for programs used during configuration and building.
 
+## Building for Windows
+
+There are at least two ways to build jemalloc's libraries for Windows. They
+differ in their ease of use and flexibility.
+
+### With MSVC solutions
+This is the easy, but less flexible approach. It doesn't let you specify
+arguments to the `configure` script.
+  
+1. Install Cygwin with at least the following packages:
+   * autoconf
+   * autogen
+   * gawk
+   * grep
+   * sed
+
+2. Install Visual Studio 2015 or 2017 with Visual C++
+
+3. Add Cygwin\bin to the PATH environment variable
+
+4. Open "x64 Native Tools Command Prompt for VS 2017"
+   (note: x86/x64 doesn't matter at this point)
+
+5. Generate header files:
+   sh -c "CC=cl ./autogen.sh"
+
+6. Now the project can be opened and built in Visual Studio:
+   msvc\jemalloc_vc2017.sln
+
+### With MSYS
+This is a more involved approach that offers the same configuration flexibility
+as Linux builds. We use it for our CI workflow to test different jemalloc
+configurations on Windows.
+
+1. Install the prerequisites
+    1. MSYS2
+    2. Chocolatey
+    3. Visual Studio if you want to compile with MSVC compiler
+
+2. Run your bash emulation. It could be MSYS2 or Git Bash (this manual was
+   tested on both)
+3. Manually and selectively follow
+   [before_install.sh](https://github.com/jemalloc/jemalloc/blob/dev/scripts/windows/before_install.sh)
+   script.
+    1. Skip the `TRAVIS_OS_NAME` check, `rm -rf C:/tools/msys64` and `choco
+       uninstall/upgrade` part.
+    2.  If using `msys2` shell, add path to `RefreshEnv.cmd` to `PATH`:
+        `PATH="$PATH:/c/ProgramData/chocolatey/bin"`
+    3. Assign `msys_shell_cmd`, `msys2`, `mingw32` and `mingw64` as in the
+       script.
+    4. Pick `CROSS_COMPILE_32BIT` , `CC` and `USE_MSVC` values depending on
+       your needs. For instance, if you'd like to build for x86_64 Windows
+       with `gcc`, then `CROSS_COMPILE_32BIT="no"`, `CC="gcc"` and
+       `USE_MSVC=""`. If you'd like to build for x86 Windows with `cl.exe`,
+       then `CROSS_COMPILE_32BIT="yes"`, `CC="cl.exe"`, `USE_MSVC="x86"`.
+       For x86_64 builds with `cl.exe`, assign `USE_MSVC="amd64"` and
+       `CROSS_COMPILE_32BIT="no"`.
+    5. Replace the path to `vcvarsall.bat` with the path on your system. For
+       instance, on my Windows PC with Visual Studio 17, the path is
+       `C:\Program Files (x86)\Microsoft Visual
+       Studio\2017\BuildTools\VC\Auxiliary\Build\vcvarsall.bat`.
+    6. Execute the rest of the script. It will install the required
+       dependencies and assign the variable `build_env`, which is a function
+       that executes following commands with the correct environment
+       variables set.
+4. Use `$build_env <command>` as you would in a Linux shell:
+     1. `$build_env autoconf`
+     2. `$build_env ./configure CC="<desired compiler>" <configuration flags>`
+     3. `$build_env mingw32-make`
+
+If you're having any issues with the above, ensure the following:
+
+5. When you run `cmd //C RefreshEnv.cmd`, you get an output line starting with
+   `Refreshing` . If it errors saying `RefreshEnv.cmd` is not found, then you
+   need to add it to your `PATH` as described above in item 3.2
+
+6. When you run `cmd //C $vcvarsall`, it prints a bunch of environment
+   variables. Otherwise, check the path to the `vcvarsall.bat` in `$vcvarsall`
+   script and fix it.
 
 ## Development
 
diff --git a/msvc/ReadMe.txt b/msvc/ReadMe.txt
deleted file mode 100644
index 633a7d49..00000000
--- a/msvc/ReadMe.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-
-How to build jemalloc for Windows
-=================================
-
-1. Install Cygwin with at least the following packages:
-   * autoconf
-   * autogen
-   * gawk
-   * grep
-   * sed
-
-2. Install Visual Studio 2015 or 2017 with Visual C++
-
-3. Add Cygwin\bin to the PATH environment variable
-
-4. Open "x64 Native Tools Command Prompt for VS 2017"
-   (note: x86/x64 doesn't matter at this point)
-
-5. Generate header files:
-   sh -c "CC=cl ./autogen.sh"
-
-6. Now the project can be opened and built in Visual Studio:
-   msvc\jemalloc_vc2017.sln

From a9215bf18aed1a1e59cbc7dfb9a0f018697d491d Mon Sep 17 00:00:00 2001
From: David CARLIER <devnexen@gmail.com>
Date: Sat, 18 Jun 2022 17:17:26 +0100
Subject: [PATCH 2223/2608] CI update FreeBSD version.

---
 .cirrus.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.cirrus.yml b/.cirrus.yml
index 75695398..a68f3dc1 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -29,7 +29,7 @@ task:
         UNCOMMON_CONFIG: --with-lg-page=16 --with-malloc-conf=tcache:false
   freebsd_instance:
     matrix:
-      image: freebsd-12-3-release-amd64
+      image: freebsd-13-0-release-amd64
   install_script:
     - sed -i.bak -e 's,pkg+http://pkg.FreeBSD.org/\${ABI}/quarterly,pkg+http://pkg.FreeBSD.org/\${ABI}/latest,' /etc/pkg/FreeBSD.conf
     - pkg upgrade -y

From cb578bbe01326bfc4a7b676f6921189d84518f03 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sun, 3 Jul 2022 20:23:59 +0300
Subject: [PATCH 2224/2608] Fix possible "nmalloc >= ndalloc" assertion

In arena_stats_merge() first nmalloc was read, and after ndalloc.

However with this order, it is possible for some thread to incement
ndalloc in between, and then nmalloc < ndalloc, and assertion will fail,
like again found by ClickHouse CI [1] (even after #2234).

  [1]: https://github.com/ClickHouse/ClickHouse/issues/31531

Swap the order to avoid possible assertion.

Cc: @interwq
Follow-up for: #2234
---
 src/arena.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 857b27c5..1ab2775e 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -106,18 +106,21 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	astats->metadata_thp += metadata_thp;
 
 	for (szind_t i = 0; i < SC_NSIZES - SC_NBINS; i++) {
-		uint64_t nmalloc = locked_read_u64(tsdn,
-		    LOCKEDINT_MTX(arena->stats.mtx),
-		    &arena->stats.lstats[i].nmalloc);
-		locked_inc_u64_unsynchronized(&lstats[i].nmalloc, nmalloc);
-		astats->nmalloc_large += nmalloc;
-
+		/* ndalloc should be read before nmalloc,
+		 * since otherwise it is possible for ndalloc to be incremented,
+		 * and the following can become true: ndalloc > nmalloc */
 		uint64_t ndalloc = locked_read_u64(tsdn,
 		    LOCKEDINT_MTX(arena->stats.mtx),
 		    &arena->stats.lstats[i].ndalloc);
 		locked_inc_u64_unsynchronized(&lstats[i].ndalloc, ndalloc);
 		astats->ndalloc_large += ndalloc;
 
+		uint64_t nmalloc = locked_read_u64(tsdn,
+		    LOCKEDINT_MTX(arena->stats.mtx),
+		    &arena->stats.lstats[i].nmalloc);
+		locked_inc_u64_unsynchronized(&lstats[i].nmalloc, nmalloc);
+		astats->nmalloc_large += nmalloc;
+
 		uint64_t nrequests = locked_read_u64(tsdn,
 		    LOCKEDINT_MTX(arena->stats.mtx),
 		    &arena->stats.lstats[i].nrequests);

From 41a859ef7325569c6c25f92d294d45123bb81355 Mon Sep 17 00:00:00 2001
From: Jasmin Parent <guildwarsguy12@gmail.com>
Date: Sat, 2 Jul 2022 14:44:46 -0400
Subject: [PATCH 2225/2608] Remove duplicated words in documentation

---
 doc/jemalloc.xml.in | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index e28e8f38..98f86f95 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1121,9 +1121,7 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         linkend="arena.i.dirty_decay_ms"><mallctl>arena.&lt;i&gt;.dirty_decay_ms</mallctl></link>
         for related dynamic control options.  See <link
         linkend="opt.muzzy_decay_ms"><mallctl>opt.muzzy_decay_ms</mallctl></link>
-        for a description of muzzy pages.for a description of muzzy pages.  Note
-        that when the <link
-        linkend="opt.oversize_threshold"><mallctl>oversize_threshold</mallctl></link>
+        for a description of muzzy pages.  Note that when the <link linkend="opt.oversize_threshold"><mallctl>oversize_threshold</mallctl></link>
         feature is enabled, the arenas reserved for oversize requests may have
         its own default decay settings.</para></listitem>
       </varlistentry>

From a1c7d9c046c2a90b978dc409d366b89303c96ab6 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 12 Jul 2022 12:08:10 -0700
Subject: [PATCH 2226/2608] Add the missing opt.cache_oblivious handling.

---
 src/jemalloc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 68db1f36..7ccbf8ac 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1220,6 +1220,7 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 
 			CONF_HANDLE_BOOL(opt_abort, "abort")
 			CONF_HANDLE_BOOL(opt_abort_conf, "abort_conf")
+			CONF_HANDLE_BOOL(opt_cache_oblivious, "cache_oblivious")
 			CONF_HANDLE_BOOL(opt_trust_madvise, "trust_madvise")
 			if (strncmp("metadata_thp", k, klen) == 0) {
 				int m;

From 58478412be842e140cc03dbb0c6ce84b2b8d096e Mon Sep 17 00:00:00 2001
From: David Carlier <devnexen@gmail.com>
Date: Tue, 7 Jun 2022 21:43:08 +0100
Subject: [PATCH 2227/2608] OpenBSD build fix. still no cpu affinity.

- enabling pthread_get/pthread_set_name_np api.
- disabling per thread cpu affinity handling, unsupported on this platform.
---
 include/jemalloc/internal/jemalloc_internal_decls.h | 2 +-
 include/jemalloc/jemalloc_macros.h.in               | 2 +-
 src/background_thread.c                             | 4 ++++
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
index 983027c8..77ba1c9a 100644
--- a/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -32,7 +32,7 @@
 #    include <sys/uio.h>
 #  endif
 #  include <pthread.h>
-#  if defined(__FreeBSD__) || defined(__DragonFly__)
+#  if defined(__FreeBSD__) || defined(__DragonFly__) || defined(__OpenBSD__)
 #  include <pthread_np.h>
 #  include <sched.h>
 #  if defined(__FreeBSD__)
diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in
index ebb3137e..2de3f27d 100644
--- a/include/jemalloc/jemalloc_macros.h.in
+++ b/include/jemalloc/jemalloc_macros.h.in
@@ -142,7 +142,7 @@
 #  define JEMALLOC_COLD
 #endif
 
-#if (defined(__APPLE__) || defined(__FreeBSD__)) && !defined(JEMALLOC_NO_RENAME)
+#if (defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__)) && !defined(JEMALLOC_NO_RENAME)
 #  define JEMALLOC_SYS_NOTHROW
 #else
 #  define JEMALLOC_SYS_NOTHROW JEMALLOC_NOTHROW
diff --git a/src/background_thread.c b/src/background_thread.c
index 3bb8d26c..f22174d6 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -80,6 +80,9 @@ background_thread_info_init(tsdn_t *tsdn, background_thread_info_t *info) {
 
 static inline bool
 set_current_thread_affinity(int cpu) {
+#ifdef __OpenBSD__
+	return false;
+#else
 #if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
 	cpu_set_t cpuset;
 #else
@@ -110,6 +113,7 @@ set_current_thread_affinity(int cpu) {
 #  endif
 	return ret != 0;
 #endif
+#endif
 }
 
 #define BILLION UINT64_C(1000000000)

From 4e12d21c8ddb9a70a12c8194c8b6c331fad7154a Mon Sep 17 00:00:00 2001
From: David CARLIER <devnexen@gmail.com>
Date: Sat, 11 Jun 2022 07:04:26 +0100
Subject: [PATCH 2228/2608] enabled percpu_arena settings on macOs.

follow-up on #2280
---
 include/jemalloc/internal/jemalloc_preamble.h.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index 5ce77d96..d7086302 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -215,7 +215,7 @@ static const bool config_enable_cxx =
 #endif
 ;
 
-#if defined(_WIN32) || defined(JEMALLOC_HAVE_SCHED_GETCPU)
+#if defined(_WIN32) || defined(__APPLE__) || defined(JEMALLOC_HAVE_SCHED_GETCPU)
 /* Currently percpu_arena depends on sched_getcpu. */
 #define JEMALLOC_PERCPU_ARENA
 #endif

From adc70c051135ac8909ca37492d7b104150077033 Mon Sep 17 00:00:00 2001
From: David CARLIER <devnexen@gmail.com>
Date: Sat, 11 Jun 2022 07:57:22 +0100
Subject: [PATCH 2229/2608] update travis

---
 .travis.yml           | 3 +++
 scripts/gen_travis.py | 1 -
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index bf44fad4..49e6aa7e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -376,6 +376,9 @@ jobs:
     - os: osx
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+    - os: osx
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     # Development build
     - os: linux
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index 4366a066..b49905f9 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -257,7 +257,6 @@ def generate_macos(arch):
 
     exclude = ([Option.as_malloc_conf(opt) for opt in (
             'dss:primary',
-            'percpu_arena:percpu',
             'background_thread:true')] +
         [Option.as_configure_flag('--enable-prof')] +
         [CLANG,])

From 36366f3c4c741723369853c923e56999716398fc Mon Sep 17 00:00:00 2001
From: Ivan Zaitsev <ivanzaitsev@fb.com>
Date: Wed, 20 Jul 2022 15:25:56 -0700
Subject: [PATCH 2230/2608] Add double free detection in thread cache for debug
 build

Add new runtime option `debug_double_free_max_scan` that specifies the max
number of stack entries to scan in the cache bit when trying to detect the
double free bug (currently debug build only).
---
 include/jemalloc/internal/cache_bin.h         | 34 +++++++++++++
 .../internal/jemalloc_internal_externs.h      |  1 +
 include/jemalloc/internal/safety_check.h      |  2 +
 src/ctl.c                                     |  7 ++-
 src/jemalloc.c                                | 11 +++++
 src/stats.c                                   |  1 +
 test/unit/double_free.c                       | 49 ++++++++++++++++---
 test/unit/mallctl.c                           |  1 +
 8 files changed, 97 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index caf5be33..87c7ea5e 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_CACHE_BIN_H
 
 #include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/safety_check.h"
 #include "jemalloc/internal/sz.h"
 
 /*
@@ -427,6 +428,35 @@ cache_bin_full(cache_bin_t *bin) {
 	return ((uint16_t)(uintptr_t)bin->stack_head == bin->low_bits_full);
 }
 
+/*
+ * Scans the allocated area of the cache_bin for the given pointer up to limit.
+ * Fires safety_check_fail if the ptr is found and returns true.
+ */
+JEMALLOC_ALWAYS_INLINE bool
+cache_bin_dalloc_safety_checks(cache_bin_t *bin, void *ptr) {
+	if (!config_debug || opt_debug_double_free_max_scan == 0) {
+		return false;
+	}
+
+	cache_bin_sz_t ncached = cache_bin_ncached_get_internal(bin, false);
+	unsigned max_scan = opt_debug_double_free_max_scan < ncached
+	    ? opt_debug_double_free_max_scan
+	    : ncached;
+
+	void **cur = bin->stack_head;
+	void **limit = cur + max_scan;
+	for (; cur < limit; cur++) {
+		if (*cur == ptr) {
+			safety_check_fail(
+			    "Invalid deallocation detected: double free of "
+			    "pointer %p\n",
+			    ptr);
+			return true;
+		}
+	}
+	return false;
+}
+
 /*
  * Free an object into the given bin.  Fails only if the bin is full.
  */
@@ -436,6 +466,10 @@ cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) {
 		return false;
 	}
 
+        if (unlikely(cache_bin_dalloc_safety_checks(bin, ptr))) {
+                return true;
+        }
+
 	bin->stack_head--;
 	*bin->stack_head = ptr;
 	cache_bin_assert_earlier(bin, bin->low_bits_full,
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index fc834c67..63b9bd2c 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -34,6 +34,7 @@ extern malloc_init_t malloc_init_state;
 extern const char *zero_realloc_mode_names[];
 extern atomic_zu_t zero_realloc_count;
 extern bool opt_cache_oblivious;
+extern unsigned opt_debug_double_free_max_scan;
 
 /* Escape free-fastpath when ptr & mask == 0 (for sanitization purpose). */
 extern uintptr_t san_cache_bin_nonfast_mask;
diff --git a/include/jemalloc/internal/safety_check.h b/include/jemalloc/internal/safety_check.h
index f1a74f17..900cfa55 100644
--- a/include/jemalloc/internal/safety_check.h
+++ b/include/jemalloc/internal/safety_check.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_SAFETY_CHECK_H
 #define JEMALLOC_INTERNAL_SAFETY_CHECK_H
 
+#define SAFETY_CHECK_DOUBLE_FREE_MAX_SCAN_DEFAULT 32
+
 void safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr,
     size_t true_size, size_t input_size);
 void safety_check_fail(const char *format, ...);
diff --git a/src/ctl.c b/src/ctl.c
index 135271ba..e942cb1a 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -92,6 +92,7 @@ CTL_PROTO(config_xmalloc)
 CTL_PROTO(opt_abort)
 CTL_PROTO(opt_abort_conf)
 CTL_PROTO(opt_cache_oblivious)
+CTL_PROTO(opt_debug_double_free_max_scan)
 CTL_PROTO(opt_trust_madvise)
 CTL_PROTO(opt_confirm_conf)
 CTL_PROTO(opt_hpa)
@@ -479,7 +480,9 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("prof_sys_thread_name"),	CTL(opt_prof_sys_thread_name)},
 	{NAME("prof_time_resolution"),	CTL(opt_prof_time_res)},
 	{NAME("lg_san_uaf_align"),	CTL(opt_lg_san_uaf_align)},
-	{NAME("zero_realloc"),	CTL(opt_zero_realloc)}
+	{NAME("zero_realloc"),	CTL(opt_zero_realloc)},
+	{NAME("debug_double_free_max_scan"),
+		CTL(opt_debug_double_free_max_scan)}
 };
 
 static const ctl_named_node_t	tcache_node[] = {
@@ -2128,6 +2131,8 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool)
 CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
 CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
 CTL_RO_NL_GEN(opt_cache_oblivious, opt_cache_oblivious, bool)
+CTL_RO_NL_GEN(opt_debug_double_free_max_scan,
+    opt_debug_double_free_max_scan, unsigned)
 CTL_RO_NL_GEN(opt_trust_madvise, opt_trust_madvise, bool)
 CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 7ccbf8ac..83d69dd0 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -154,6 +154,9 @@ fxp_t		opt_narenas_ratio = FXP_INIT_INT(4);
 
 unsigned	ncpus;
 
+unsigned opt_debug_double_free_max_scan =
+    SAFETY_CHECK_DOUBLE_FREE_MAX_SCAN_DEFAULT;
+
 /* Protects arenas initialization. */
 malloc_mutex_t arenas_lock;
 
@@ -1420,6 +1423,10 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			CONF_HANDLE_UNSIGNED(opt_lg_tcache_flush_large_div,
 			    "lg_tcache_flush_large_div", 1, 16,
 			    CONF_CHECK_MIN, CONF_CHECK_MAX, /* clip */ true)
+			CONF_HANDLE_UNSIGNED(opt_debug_double_free_max_scan,
+			    "debug_double_free_max_scan", 0, UINT_MAX,
+			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
+			    /* clip */ false)
 
 			/*
 			 * The runtime option of oversize_threshold remains
@@ -1737,6 +1744,10 @@ malloc_conf_init_check_deps(void) {
 		    "prof_final.\n");
 		return true;
 	}
+	/* To emphasize in the stats output that opt is disabled when !debug. */
+	if (!config_debug) {
+		opt_debug_double_free_max_scan = 0;
+	}
 
 	return false;
 }
diff --git a/src/stats.c b/src/stats.c
index efc70fd3..d150baef 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1518,6 +1518,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_SIZE_T("tcache_gc_delay_bytes")
 	OPT_WRITE_UNSIGNED("lg_tcache_flush_small_div")
 	OPT_WRITE_UNSIGNED("lg_tcache_flush_large_div")
+	OPT_WRITE_UNSIGNED("debug_double_free_max_scan")
 	OPT_WRITE_CHAR_P("thp")
 	OPT_WRITE_BOOL("prof")
 	OPT_WRITE_CHAR_P("prof_prefix")
diff --git a/test/unit/double_free.c b/test/unit/double_free.c
index 12122c1b..b52fcf90 100644
--- a/test/unit/double_free.c
+++ b/test/unit/double_free.c
@@ -10,13 +10,13 @@ void fake_abort(const char *message) {
 }
 
 void
-test_large_double_free_pre(void) {
+test_double_free_pre(void) {
 	safety_check_set_abort(&fake_abort);
 	fake_abort_called = false;
 }
 
 void
-test_large_double_free_post() {
+test_double_free_post() {
 	expect_b_eq(fake_abort_called, true, "Double-free check didn't fire.");
 	safety_check_set_abort(NULL);
 }
@@ -29,7 +29,7 @@ TEST_BEGIN(test_large_double_free_tcache) {
 	 */
 	test_skip_if(config_debug);
 
-	test_large_double_free_pre();
+	test_double_free_pre();
 	char *ptr = malloc(SC_LARGE_MINCLASS);
 	bool guarded = extent_is_guarded(tsdn_fetch(), ptr);
 	free(ptr);
@@ -44,7 +44,7 @@ TEST_BEGIN(test_large_double_free_tcache) {
 		fake_abort_called = true;
 	}
 	mallctl("thread.tcache.flush", NULL, NULL, NULL, 0);
-	test_large_double_free_post();
+	test_double_free_post();
 }
 TEST_END
 
@@ -52,7 +52,7 @@ TEST_BEGIN(test_large_double_free_no_tcache) {
 	test_skip_if(!config_opt_safety_checks);
 	test_skip_if(config_debug);
 
-	test_large_double_free_pre();
+	test_double_free_pre();
 	char *ptr = mallocx(SC_LARGE_MINCLASS, MALLOCX_TCACHE_NONE);
 	bool guarded = extent_is_guarded(tsdn_fetch(), ptr);
 	dallocx(ptr, MALLOCX_TCACHE_NONE);
@@ -66,12 +66,45 @@ TEST_BEGIN(test_large_double_free_no_tcache) {
 		 */
 		fake_abort_called = true;
 	}
-	test_large_double_free_post();
+	test_double_free_post();
+}
+TEST_END
+
+TEST_BEGIN(test_small_double_free_tcache) {
+	test_skip_if(!config_debug);
+
+	test_skip_if(opt_debug_double_free_max_scan == 0);
+
+	bool tcache_enabled;
+	size_t sz = sizeof(tcache_enabled);
+	assert_d_eq(
+	    mallctl("thread.tcache.enabled", &tcache_enabled, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
+	test_skip_if(!tcache_enabled);
+
+	test_double_free_pre();
+	char *ptr = malloc(1);
+	bool guarded = extent_is_guarded(tsdn_fetch(), ptr);
+	free(ptr);
+	if (!guarded) {
+		free(ptr);
+	} else {
+		/*
+		 * Skip because guarded extents may unguard immediately on
+		 * deallocation, in which case the second free will crash before
+		 * reaching the intended safety check.
+		 */
+		fake_abort_called = true;
+	}
+	mallctl("thread.tcache.flush", NULL, NULL, NULL, 0);
+	test_double_free_post();
 }
 TEST_END
 
 int
 main(void) {
-	return test(test_large_double_free_no_tcache,
-	    test_large_double_free_tcache);
+	return test(
+	    test_large_double_free_no_tcache,
+	    test_large_double_free_tcache,
+	    test_small_double_free_tcache);
 }
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 6efc8f1b..62bd1a2d 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -325,6 +325,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, prof_stats, prof);
 	TEST_MALLCTL_OPT(bool, prof_sys_thread_name, prof);
 	TEST_MALLCTL_OPT(ssize_t, lg_san_uaf_align, uaf_detection);
+	TEST_MALLCTL_OPT(unsigned, debug_double_free_max_scan, always);
 
 #undef TEST_MALLCTL_OPT
 }

From 42daa1ac4405a06ed79f68dc2c0ca8c5ad477ecd Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@fb.com>
Date: Tue, 9 Aug 2022 16:39:02 -0700
Subject: [PATCH 2231/2608] Add double free detection using slab bitmap for
 debug build

Add a sanity check for double free issue in the arena in case that the tcache has been flushed.
---
 include/jemalloc/internal/arena_inlines_b.h | 71 ++++++++++++++++-----
 test/unit/double_free.c                     | 50 ++++++++++++---
 2 files changed, 96 insertions(+), 25 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index fa81537c..69617fb7 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -298,6 +298,54 @@ arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
 	}
 }
 
+/* Find the region index of a pointer. */
+JEMALLOC_ALWAYS_INLINE size_t
+arena_slab_regind_impl(div_info_t* div_info, szind_t binind,
+    edata_t *slab, const void *ptr) {
+	size_t diff, regind;
+
+	/* Freeing a pointer outside the slab can cause assertion failure. */
+	assert((uintptr_t)ptr >= (uintptr_t)edata_addr_get(slab));
+	assert((uintptr_t)ptr < (uintptr_t)edata_past_get(slab));
+	/* Freeing an interior pointer can cause assertion failure. */
+	assert(((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab)) %
+	    (uintptr_t)bin_infos[binind].reg_size == 0);
+
+	diff = (size_t)((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab));
+
+	/* Avoid doing division with a variable divisor. */
+	regind = div_compute(div_info, diff);
+	assert(regind < bin_infos[binind].nregs);
+	return regind;
+}
+
+/* Checks whether ptr is currently active in the arena. */
+JEMALLOC_ALWAYS_INLINE bool
+arena_tcache_dalloc_small_safety_check(tsdn_t *tsdn, void *ptr) {
+	if (!config_debug) {
+		return false;
+	}
+	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
+	szind_t binind = edata_szind_get(edata);
+	div_info_t div_info = arena_binind_div_info[binind];
+	/*
+	 * Calls the internal function arena_slab_regind_impl because the
+	 * safety check does not require a lock.
+	 */
+	size_t regind = arena_slab_regind_impl(&div_info, binind, edata, ptr);
+	slab_data_t *slab_data = edata_slab_data_get(edata);
+	const bin_info_t *bin_info = &bin_infos[binind];
+	assert(edata_nfree_get(edata) < bin_info->nregs);
+	if (unlikely(!bitmap_get(slab_data->bitmap, &bin_info->bitmap_info,
+	    regind))) {
+		safety_check_fail(
+		    "Invalid deallocation detected: the pointer being freed (%p) not "
+		    "currently active, possibly caused by double free bugs.\n", ptr);
+		return true;
+	}
+	return false;
+}
+
 JEMALLOC_ALWAYS_INLINE void
 arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
     emap_alloc_ctx_t *caller_alloc_ctx, bool slow_path) {
@@ -328,6 +376,9 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 
 	if (likely(alloc_ctx.slab)) {
 		/* Small allocation. */
+		if (arena_tcache_dalloc_small_safety_check(tsdn, ptr)) {
+			return;
+		}
 		tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr,
 		    alloc_ctx.szind, slow_path);
 	} else {
@@ -415,6 +466,9 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 
 	if (likely(alloc_ctx.slab)) {
 		/* Small allocation. */
+		if (arena_tcache_dalloc_small_safety_check(tsdn, ptr)) {
+			return;
+		}
 		tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr,
 		    alloc_ctx.szind, slow_path);
 	} else {
@@ -465,22 +519,7 @@ struct arena_dalloc_bin_locked_info_s {
 JEMALLOC_ALWAYS_INLINE size_t
 arena_slab_regind(arena_dalloc_bin_locked_info_t *info, szind_t binind,
     edata_t *slab, const void *ptr) {
-	size_t diff, regind;
-
-	/* Freeing a pointer outside the slab can cause assertion failure. */
-	assert((uintptr_t)ptr >= (uintptr_t)edata_addr_get(slab));
-	assert((uintptr_t)ptr < (uintptr_t)edata_past_get(slab));
-	/* Freeing an interior pointer can cause assertion failure. */
-	assert(((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab)) %
-	    (uintptr_t)bin_infos[binind].reg_size == 0);
-
-	diff = (size_t)((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab));
-
-	/* Avoid doing division with a variable divisor. */
-	regind = div_compute(&info->div_info, diff);
-
-	assert(regind < bin_infos[binind].nregs);
-
+	size_t regind = arena_slab_regind_impl(&info->div_info, binind, slab, ptr);
 	return regind;
 }
 
diff --git a/test/unit/double_free.c b/test/unit/double_free.c
index b52fcf90..e73efe71 100644
--- a/test/unit/double_free.c
+++ b/test/unit/double_free.c
@@ -21,6 +21,15 @@ test_double_free_post() {
 	safety_check_set_abort(NULL);
 }
 
+bool tcache_enabled() {
+	bool enabled;
+	size_t sz = sizeof(enabled);
+	assert_d_eq(
+	    mallctl("thread.tcache.enabled", &enabled, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
+	return enabled;
+}
+
 TEST_BEGIN(test_large_double_free_tcache) {
 	test_skip_if(!config_opt_safety_checks);
 	/*
@@ -72,15 +81,8 @@ TEST_END
 
 TEST_BEGIN(test_small_double_free_tcache) {
 	test_skip_if(!config_debug);
-
 	test_skip_if(opt_debug_double_free_max_scan == 0);
-
-	bool tcache_enabled;
-	size_t sz = sizeof(tcache_enabled);
-	assert_d_eq(
-	    mallctl("thread.tcache.enabled", &tcache_enabled, &sz, NULL, 0), 0,
-	    "Unexpected mallctl failure");
-	test_skip_if(!tcache_enabled);
+	test_skip_if(!tcache_enabled());
 
 	test_double_free_pre();
 	char *ptr = malloc(1);
@@ -101,10 +103,40 @@ TEST_BEGIN(test_small_double_free_tcache) {
 }
 TEST_END
 
+TEST_BEGIN(test_small_double_free_arena) {
+	test_skip_if(!config_debug);
+	test_skip_if(!tcache_enabled());
+
+	test_double_free_pre();
+	/*
+	 * Allocate one more pointer to keep the slab partially used after
+	 * flushing the cache.
+	 */
+	char *ptr1 = malloc(1);
+	char *ptr = malloc(1);
+	bool guarded = extent_is_guarded(tsdn_fetch(), ptr);
+	free(ptr);
+	if (!guarded) {
+		mallctl("thread.tcache.flush", NULL, NULL, NULL, 0);
+		free(ptr);
+	} else {
+		/*
+		 * Skip because guarded extents may unguard immediately on
+		 * deallocation, in which case the second free will crash before
+		 * reaching the intended safety check.
+		 */
+		fake_abort_called = true;
+	}
+	test_double_free_post();
+	free(ptr1);
+}
+TEST_END
+
 int
 main(void) {
 	return test(
 	    test_large_double_free_no_tcache,
 	    test_large_double_free_tcache,
-	    test_small_double_free_tcache);
+	    test_small_double_free_tcache,
+	    test_small_double_free_arena);
 }

From ce29b4c3d9256956a8d60302b5d1fa72c3479686 Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@fb.com>
Date: Fri, 12 Aug 2022 11:31:07 -0700
Subject: [PATCH 2232/2608] Refactor the remote / cross thread cache bin stats
 reading

Refactored cache_bin.h so that only one function is racy.
---
 include/jemalloc/internal/cache_bin.h | 100 +++++++++++++-------------
 src/cache_bin.c                       |   3 +-
 2 files changed, 51 insertions(+), 52 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 87c7ea5e..ee8b1ae2 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -195,27 +195,18 @@ cache_bin_assert_earlier(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
  * be associated with the position earlier in memory.
  */
 static inline uint16_t
-cache_bin_diff(cache_bin_t *bin, uint16_t earlier, uint16_t later, bool racy) {
-	/*
-	 * When it's racy, bin->low_bits_full can be modified concurrently. It
-	 * can cross the uint16_t max value and become less than
-	 * bin->low_bits_empty at the time of the check.
-	 */
-	if (!racy) {
-		cache_bin_assert_earlier(bin, earlier, later);
-	}
+cache_bin_diff(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
+	cache_bin_assert_earlier(bin, earlier, later);
 	return later - earlier;
 }
 
 /*
  * Number of items currently cached in the bin, without checking ncached_max.
- * We require specifying whether or not the request is racy or not (i.e. whether
- * or not concurrent modifications are possible).
  */
 static inline cache_bin_sz_t
-cache_bin_ncached_get_internal(cache_bin_t *bin, bool racy) {
+cache_bin_ncached_get_internal(cache_bin_t *bin) {
 	cache_bin_sz_t diff = cache_bin_diff(bin,
-	    (uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty, racy);
+	    (uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty);
 	cache_bin_sz_t n = diff / sizeof(void *);
 	/*
 	 * We have undefined behavior here; if this function is called from the
@@ -226,7 +217,7 @@ cache_bin_ncached_get_internal(cache_bin_t *bin, bool racy) {
 	 * fast paths.  This should still be "safe" in the sense of generating
 	 * the correct assembly for the foreseeable future, though.
 	 */
-	assert(n == 0 || *(bin->stack_head) != NULL || racy);
+	assert(n == 0 || *(bin->stack_head) != NULL);
 	return n;
 }
 
@@ -237,8 +228,7 @@ cache_bin_ncached_get_internal(cache_bin_t *bin, bool racy) {
  */
 static inline cache_bin_sz_t
 cache_bin_ncached_get_local(cache_bin_t *bin, cache_bin_info_t *info) {
-	cache_bin_sz_t n = cache_bin_ncached_get_internal(bin,
-	    /* racy */ false);
+	cache_bin_sz_t n = cache_bin_ncached_get_internal(bin);
 	assert(n <= cache_bin_info_ncached_max(info));
 	return n;
 }
@@ -254,8 +244,7 @@ cache_bin_ncached_get_local(cache_bin_t *bin, cache_bin_info_t *info) {
 static inline void **
 cache_bin_empty_position_get(cache_bin_t *bin) {
 	cache_bin_sz_t diff = cache_bin_diff(bin,
-	    (uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty,
-	    /* racy */ false);
+	    (uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty);
 	uintptr_t empty_bits = (uintptr_t)bin->stack_head + diff;
 	void **ret = (void **)empty_bits;
 
@@ -312,7 +301,7 @@ cache_bin_assert_empty(cache_bin_t *bin, cache_bin_info_t *info) {
 static inline cache_bin_sz_t
 cache_bin_low_water_get_internal(cache_bin_t *bin) {
 	return cache_bin_diff(bin, bin->low_bits_low_water,
-	    bin->low_bits_empty, /* racy */ false) / sizeof(void *);
+	    bin->low_bits_empty) / sizeof(void *);
 }
 
 /* Returns the numeric value of low water in [0, ncached]. */
@@ -339,7 +328,7 @@ cache_bin_low_water_set(cache_bin_t *bin) {
 
 static inline void
 cache_bin_low_water_adjust(cache_bin_t *bin) {
-	if (cache_bin_ncached_get_internal(bin, /* racy */ false)
+	if (cache_bin_ncached_get_internal(bin)
 	    < cache_bin_low_water_get_internal(bin)) {
 		cache_bin_low_water_set(bin);
 	}
@@ -411,8 +400,7 @@ cache_bin_alloc(cache_bin_t *bin, bool *success) {
 
 JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
 cache_bin_alloc_batch(cache_bin_t *bin, size_t num, void **out) {
-	cache_bin_sz_t n = cache_bin_ncached_get_internal(bin,
-	    /* racy */ false);
+	cache_bin_sz_t n = cache_bin_ncached_get_internal(bin);
 	if (n > num) {
 		n = (cache_bin_sz_t)num;
 	}
@@ -438,7 +426,7 @@ cache_bin_dalloc_safety_checks(cache_bin_t *bin, void *ptr) {
 		return false;
 	}
 
-	cache_bin_sz_t ncached = cache_bin_ncached_get_internal(bin, false);
+	cache_bin_sz_t ncached = cache_bin_ncached_get_internal(bin);
 	unsigned max_scan = opt_debug_double_free_max_scan < ncached
 	    ? opt_debug_double_free_max_scan
 	    : ncached;
@@ -488,8 +476,7 @@ cache_bin_stash(cache_bin_t *bin, void *ptr) {
 	/* Stash at the full position, in the [full, head) range. */
 	uint16_t low_bits_head = (uint16_t)(uintptr_t)bin->stack_head;
 	/* Wraparound handled as well. */
-	uint16_t diff = cache_bin_diff(bin, bin->low_bits_full, low_bits_head,
-	    /* racy */ false);
+	uint16_t diff = cache_bin_diff(bin, bin->low_bits_full, low_bits_head);
 	*(void **)((uintptr_t)bin->stack_head - diff) = ptr;
 
 	assert(!cache_bin_full(bin));
@@ -499,46 +486,35 @@ cache_bin_stash(cache_bin_t *bin, void *ptr) {
 	return true;
 }
 
-/*
- * Get the number of stashed pointers.
- *
- * When called from a thread not owning the TLS (i.e. racy = true), it's
- * important to keep in mind that 'bin->stack_head' and 'bin->low_bits_full' can
- * be modified concurrently and almost none assertions about their values can be
- * made.
- */
+/* Get the number of stashed pointers. */
 JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
-cache_bin_nstashed_get_internal(cache_bin_t *bin, cache_bin_info_t *info,
-    bool racy) {
+cache_bin_nstashed_get_internal(cache_bin_t *bin, cache_bin_info_t *info) {
 	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(info);
 	uint16_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(bin,
 	    info);
 
 	cache_bin_sz_t n = cache_bin_diff(bin, low_bits_low_bound,
-	    bin->low_bits_full, racy) / sizeof(void *);
+	    bin->low_bits_full) / sizeof(void *);
 	assert(n <= ncached_max);
 
-	if (!racy) {
-		/* Below are for assertions only. */
-		void **low_bound = cache_bin_low_bound_get(bin, info);
+	/* Below are for assertions only. */
+	void **low_bound = cache_bin_low_bound_get(bin, info);
 
-		assert((uint16_t)(uintptr_t)low_bound == low_bits_low_bound);
-		void *stashed = *(low_bound + n - 1);
-		bool aligned = cache_bin_nonfast_aligned(stashed);
+	assert((uint16_t)(uintptr_t)low_bound == low_bits_low_bound);
+	void *stashed = *(low_bound + n - 1);
+	bool aligned = cache_bin_nonfast_aligned(stashed);
 #ifdef JEMALLOC_JET
-		/* Allow arbitrary pointers to be stashed in tests. */
-		aligned = true;
+	/* Allow arbitrary pointers to be stashed in tests. */
+	aligned = true;
 #endif
-		assert(n == 0 || (stashed != NULL && aligned));
-	}
+	assert(n == 0 || (stashed != NULL && aligned));
 
 	return n;
 }
 
 JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
 cache_bin_nstashed_get_local(cache_bin_t *bin, cache_bin_info_t *info) {
-	cache_bin_sz_t n = cache_bin_nstashed_get_internal(bin, info,
-	    /* racy */ false);
+	cache_bin_sz_t n = cache_bin_nstashed_get_internal(bin, info);
 	assert(n <= cache_bin_info_ncached_max(info));
 	return n;
 }
@@ -546,15 +522,39 @@ cache_bin_nstashed_get_local(cache_bin_t *bin, cache_bin_info_t *info) {
 /*
  * Obtain a racy view of the number of items currently in the cache bin, in the
  * presence of possible concurrent modifications.
+ *
+ * Note that this is the only racy function in this header.  Any other functions
+ * are assumed to be non-racy.  The "racy" term here means accessed from another
+ * thread (that is not the owner of the specific cache bin).  This only happens
+ * when gathering stats (read-only).  The only change because of the racy
+ * condition is that assertions based on mutable fields are omitted.
+ *
+ * It's important to keep in mind that 'bin->stack_head' and
+ * 'bin->low_bits_full' can be modified concurrently and almost no assertions
+ * about their values can be made.
+ *
+ * This function should not call other utility functions because the racy
+ * condition may cause unexpected / undefined behaviors in unverified utility
+ * functions.  Currently, this function calls two utility functions
+ * cache_bin_info_ncached_max and cache_bin_low_bits_low_bound_get because they
+ * help access values that will not be concurrently modified.
  */
 static inline void
 cache_bin_nitems_get_remote(cache_bin_t *bin, cache_bin_info_t *info,
     cache_bin_sz_t *ncached, cache_bin_sz_t *nstashed) {
-	cache_bin_sz_t n = cache_bin_ncached_get_internal(bin, /* racy */ true);
+	/* Racy version of cache_bin_ncached_get_internal. */
+	cache_bin_sz_t diff = bin->low_bits_empty -
+	    (uint16_t)(uintptr_t)bin->stack_head;
+	cache_bin_sz_t n = diff / sizeof(void *);
+
 	assert(n <= cache_bin_info_ncached_max(info));
 	*ncached = n;
 
-	n = cache_bin_nstashed_get_internal(bin, info, /* racy */ true);
+	/* Racy version of cache_bin_nstashed_get_internal. */
+	uint16_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(bin,
+	    info);
+	n = (bin->low_bits_full - low_bits_low_bound) / sizeof(void *);
+
 	assert(n <= cache_bin_info_ncached_max(info));
 	*nstashed = n;
 	/* Note that cannot assert ncached + nstashed <= ncached_max (racy). */
diff --git a/src/cache_bin.c b/src/cache_bin.c
index 9ae072a0..a4c22bd7 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -84,8 +84,7 @@ cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
 	bin->low_bits_full = (uint16_t)(uintptr_t)full_position;
 	bin->low_bits_empty = (uint16_t)(uintptr_t)empty_position;
 	cache_bin_sz_t free_spots = cache_bin_diff(bin,
-	    bin->low_bits_full, (uint16_t)(uintptr_t)bin->stack_head,
-	    /* racy */ false);
+	    bin->low_bits_full, (uint16_t)(uintptr_t)bin->stack_head);
 	assert(free_spots == bin_stack_size);
 	assert(cache_bin_ncached_get_local(bin, info) == 0);
 	assert(cache_bin_empty_position_get(bin) == empty_position);

From 56ddbea270e5c73ba5a4977550e02c2b3706ae80 Mon Sep 17 00:00:00 2001
From: Abael He <abaelhe@icloud.com>
Date: Thu, 25 Aug 2022 11:12:08 +0800
Subject: [PATCH 2233/2608] error: implicit declaration of function
 'pthread_create_fptr_init' is invalid in C99

./autogen.sh \
&& ./configure --prefix=/usr/local  --enable-static   --enable-autogen --enable-xmalloc --with-static-libunwind=/usr/local/lib/libunwind.a --enable-lazy-lock --with-jemalloc-prefix='' \
&& make -j16

...
gcc -std=gnu11 -Werror=unknown-warning-option -Wall -Wextra -Wshorten-64-to-32 -Wsign-compare -Wundef -Wno-format-zero-length -Wpointer-arith -Wno-missing-braces -Wno-missing-field-initializers -pipe -g3 -Wimplicit-fallthrough -O3 -funroll-loops -fPIC -DPIC -c -D_REENTRANT -Iinclude -Iinclude -DJEMALLOC_NO_PRIVATE_NAMESPACE -o src/edata_cache.sym.o src/edata_cache.c
src/background_thread.c:768:6: error: implicit declaration of function 'pthread_create_fptr_init' is invalid in C99 [-Werror,-Wimplicit-function-declaration]
            pthread_create_fptr_init()) {
            ^
src/background_thread.c:768:6: note: did you mean 'pthread_create_wrapper_init'?
src/background_thread.c:34:1: note: 'pthread_create_wrapper_init' declared here
pthread_create_wrapper_init(void) {
^
1 error generated.
make: *** [src/background_thread.sym.o] Error 1
make: *** Waiting for unfinished jobs....
---
 src/background_thread.c | 66 ++++++++++++++++++++---------------------
 1 file changed, 33 insertions(+), 33 deletions(-)

diff --git a/src/background_thread.c b/src/background_thread.c
index f22174d6..3171dd31 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -46,6 +46,39 @@ pthread_create_wrapper(pthread_t *__restrict thread, const pthread_attr_t *attr,
 
 	return pthread_create_fptr(thread, attr, start_routine, arg);
 }
+
+#ifdef JEMALLOC_HAVE_DLSYM
+#include <dlfcn.h>
+#endif
+
+static bool
+pthread_create_fptr_init(void) {
+	if (pthread_create_fptr != NULL) {
+		return false;
+	}
+	/*
+	 * Try the next symbol first, because 1) when use lazy_lock we have a
+	 * wrapper for pthread_create; and 2) application may define its own
+	 * wrapper as well (and can call malloc within the wrapper).
+	 */
+#ifdef JEMALLOC_HAVE_DLSYM
+	pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
+#else
+	pthread_create_fptr = NULL;
+#endif
+	if (pthread_create_fptr == NULL) {
+		if (config_lazy_lock) {
+			malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
+			    "\"pthread_create\")\n");
+			abort();
+		} else {
+			/* Fall back to the default symbol. */
+			pthread_create_fptr = pthread_create;
+		}
+	}
+
+	return false;
+}
 #endif /* JEMALLOC_PTHREAD_CREATE_WRAPPER */
 
 #ifndef JEMALLOC_BACKGROUND_THREAD
@@ -710,39 +743,6 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
 #undef BILLION
 #undef BACKGROUND_THREAD_MIN_INTERVAL_NS
 
-#ifdef JEMALLOC_HAVE_DLSYM
-#include <dlfcn.h>
-#endif
-
-static bool
-pthread_create_fptr_init(void) {
-	if (pthread_create_fptr != NULL) {
-		return false;
-	}
-	/*
-	 * Try the next symbol first, because 1) when use lazy_lock we have a
-	 * wrapper for pthread_create; and 2) application may define its own
-	 * wrapper as well (and can call malloc within the wrapper).
-	 */
-#ifdef JEMALLOC_HAVE_DLSYM
-	pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
-#else
-	pthread_create_fptr = NULL;
-#endif
-	if (pthread_create_fptr == NULL) {
-		if (config_lazy_lock) {
-			malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
-			    "\"pthread_create\")\n");
-			abort();
-		} else {
-			/* Fall back to the default symbol. */
-			pthread_create_fptr = pthread_create;
-		}
-	}
-
-	return false;
-}
-
 /*
  * When lazy lock is enabled, we need to make sure setting isthreaded before
  * taking any background_thread locks.  This is called early in ctl (instead of

From a0734fd6ee326cd2059edbe4bca7092988a63684 Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@fb.com>
Date: Fri, 19 Aug 2022 12:17:10 -0700
Subject: [PATCH 2234/2608] Making jemalloc max stack depth a runtime option

---
 include/jemalloc/internal/prof_externs.h |   1 +
 include/jemalloc/internal/prof_structs.h |   2 +-
 include/jemalloc/internal/prof_types.h   |   7 +-
 src/ctl.c                                |   3 +
 src/jemalloc.c                           |   3 +
 src/prof.c                               |   1 +
 src/prof_data.c                          |   7 +-
 src/prof_sys.c                           | 152 ++++++++++++++++++++++-
 src/stats.c                              |   1 +
 test/unit/mallctl.c                      |   1 +
 test/unit/prof_hook.c                    |   1 +
 test/unit/prof_hook.sh                   |   3 +-
 12 files changed, 171 insertions(+), 11 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index bdff1349..d1101561 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -7,6 +7,7 @@
 extern bool opt_prof;
 extern bool opt_prof_active;
 extern bool opt_prof_thread_active_init;
+extern unsigned opt_prof_bt_max;
 extern size_t opt_lg_prof_sample;    /* Mean bytes between samples. */
 extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */
 extern bool opt_prof_gdump;          /* High-water memory dumping. */
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index dd22115f..9331fba4 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -202,7 +202,7 @@ struct prof_tdata_s {
 	prof_cnt_t		cnt_summed;
 
 	/* Backtrace vector, used for calls to prof_backtrace(). */
-	void			*vec[PROF_BT_MAX];
+	void 			**vec;
 };
 typedef rb_tree(prof_tdata_t) prof_tdata_tree_t;
 
diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h
index ba628654..87cbb4ab 100644
--- a/include/jemalloc/internal/prof_types.h
+++ b/include/jemalloc/internal/prof_types.h
@@ -23,7 +23,12 @@ typedef struct prof_recent_s prof_recent_t;
  * is based on __builtin_return_address() necessarily has a hard-coded number
  * of backtrace frame handlers, and should be kept in sync with this setting.
  */
-#define PROF_BT_MAX			128
+#ifdef JEMALLOC_PROF_GCC
+#  define PROF_BT_MAX_LIMIT 256
+#else
+#  define PROF_BT_MAX_LIMIT UINT_MAX
+#endif
+#define PROF_BT_MAX_DEFAULT			128
 
 /* Initial hash table size. */
 #define PROF_CKH_MINITEMS		64
diff --git a/src/ctl.c b/src/ctl.c
index e942cb1a..6b03f986 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -142,6 +142,7 @@ CTL_PROTO(opt_prof)
 CTL_PROTO(opt_prof_prefix)
 CTL_PROTO(opt_prof_active)
 CTL_PROTO(opt_prof_thread_active_init)
+CTL_PROTO(opt_prof_bt_max)
 CTL_PROTO(opt_lg_prof_sample)
 CTL_PROTO(opt_lg_prof_interval)
 CTL_PROTO(opt_prof_gdump)
@@ -468,6 +469,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("prof_prefix"),	CTL(opt_prof_prefix)},
 	{NAME("prof_active"),	CTL(opt_prof_active)},
 	{NAME("prof_thread_active_init"), CTL(opt_prof_thread_active_init)},
+	{NAME("prof_bt_max"), CTL(opt_prof_bt_max)},
 	{NAME("lg_prof_sample"), CTL(opt_lg_prof_sample)},
 	{NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)},
 	{NAME("prof_gdump"),	CTL(opt_prof_gdump)},
@@ -2205,6 +2207,7 @@ CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *)
 CTL_RO_NL_CGEN(config_prof, opt_prof_active, opt_prof_active, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_thread_active_init,
     opt_prof_thread_active_init, bool)
+CTL_RO_NL_CGEN(config_prof, opt_prof_bt_max, opt_prof_bt_max, unsigned)
 CTL_RO_NL_CGEN(config_prof, opt_lg_prof_sample, opt_lg_prof_sample, size_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool)
 CTL_RO_NL_CGEN(config_prof, opt_lg_prof_interval, opt_lg_prof_interval, ssize_t)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 83d69dd0..a4761c9b 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1585,6 +1585,9 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				    - 1, CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
 				    true)
 				CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum")
+				CONF_HANDLE_UNSIGNED(opt_prof_bt_max, "prof_bt_max",
+				    1, PROF_BT_MAX_LIMIT, CONF_CHECK_MIN, CONF_CHECK_MAX,
+				    /* clip */ true)
 				CONF_HANDLE_SSIZE_T(opt_lg_prof_interval,
 				    "lg_prof_interval", -1,
 				    (sizeof(uint64_t) << 3) - 1)
diff --git a/src/prof.c b/src/prof.c
index 7a6d5d56..3deac0b5 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -26,6 +26,7 @@
 bool opt_prof = false;
 bool opt_prof_active = true;
 bool opt_prof_thread_active_init = true;
+unsigned opt_prof_bt_max = PROF_BT_MAX_DEFAULT;
 size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
 ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
 bool opt_prof_gdump = false;
diff --git a/src/prof_data.c b/src/prof_data.c
index bfa55be1..f8b19594 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -1167,13 +1167,16 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
 	cassert(config_prof);
 
 	/* Initialize an empty cache for this thread. */
-	tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd), sizeof(prof_tdata_t),
-	    sz_size2index(sizeof(prof_tdata_t)), false, NULL, true,
+	size_t tdata_sz = ALIGNMENT_CEILING(sizeof(prof_tdata_t), QUANTUM);
+	size_t total_sz = tdata_sz + sizeof(void *) * opt_prof_bt_max;
+	tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd),
+	    total_sz, sz_size2index(total_sz), false, NULL, true,
 	    arena_get(TSDN_NULL, 0, true), true);
 	if (tdata == NULL) {
 		return NULL;
 	}
 
+	tdata->vec = (void **)((uintptr_t)tdata + tdata_sz);
 	tdata->lock = prof_tdata_mutex_choose(thr_uid);
 	tdata->thr_uid = thr_uid;
 	tdata->thr_discrim = thr_discrim;
diff --git a/src/prof_sys.c b/src/prof_sys.c
index b5f1f5b2..99fa3a77 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -55,9 +55,9 @@ prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 	cassert(config_prof);
 	assert(*len == 0);
 	assert(vec != NULL);
-	assert(max_len == PROF_BT_MAX);
+	assert(max_len <= PROF_BT_MAX_LIMIT);
 
-	nframes = unw_backtrace(vec, PROF_BT_MAX);
+	nframes = unw_backtrace(vec, max_len);
 	if (nframes <= 0) {
 		return;
 	}
@@ -97,13 +97,14 @@ prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 
 	cassert(config_prof);
 	assert(vec != NULL);
-	assert(max_len == PROF_BT_MAX);
+	assert(max_len <= PROF_BT_MAX_LIMIT);
 
 	_Unwind_Backtrace(prof_unwind_callback, &data);
 }
 #elif (defined(JEMALLOC_PROF_GCC))
 static void
 prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
+/* The input arg must be a constant for __builtin_return_address. */
 #define BT_FRAME(i)							\
 	if ((i) < max_len) {						\
 		void *p;						\
@@ -122,7 +123,7 @@ prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 
 	cassert(config_prof);
 	assert(vec != NULL);
-	assert(max_len == PROF_BT_MAX);
+	assert(max_len <= PROF_BT_MAX_LIMIT);
 
 	BT_FRAME(0)
 	BT_FRAME(1)
@@ -264,6 +265,147 @@ prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 	BT_FRAME(125)
 	BT_FRAME(126)
 	BT_FRAME(127)
+	BT_FRAME(128)
+	BT_FRAME(129)
+
+	BT_FRAME(130)
+	BT_FRAME(131)
+	BT_FRAME(132)
+	BT_FRAME(133)
+	BT_FRAME(134)
+	BT_FRAME(135)
+	BT_FRAME(136)
+	BT_FRAME(137)
+	BT_FRAME(138)
+	BT_FRAME(139)
+
+	BT_FRAME(140)
+	BT_FRAME(141)
+	BT_FRAME(142)
+	BT_FRAME(143)
+	BT_FRAME(144)
+	BT_FRAME(145)
+	BT_FRAME(146)
+	BT_FRAME(147)
+	BT_FRAME(148)
+	BT_FRAME(149)
+
+	BT_FRAME(150)
+	BT_FRAME(151)
+	BT_FRAME(152)
+	BT_FRAME(153)
+	BT_FRAME(154)
+	BT_FRAME(155)
+	BT_FRAME(156)
+	BT_FRAME(157)
+	BT_FRAME(158)
+	BT_FRAME(159)
+
+	BT_FRAME(160)
+	BT_FRAME(161)
+	BT_FRAME(162)
+	BT_FRAME(163)
+	BT_FRAME(164)
+	BT_FRAME(165)
+	BT_FRAME(166)
+	BT_FRAME(167)
+	BT_FRAME(168)
+	BT_FRAME(169)
+
+	BT_FRAME(170)
+	BT_FRAME(171)
+	BT_FRAME(172)
+	BT_FRAME(173)
+	BT_FRAME(174)
+	BT_FRAME(175)
+	BT_FRAME(176)
+	BT_FRAME(177)
+	BT_FRAME(178)
+	BT_FRAME(179)
+
+	BT_FRAME(180)
+	BT_FRAME(181)
+	BT_FRAME(182)
+	BT_FRAME(183)
+	BT_FRAME(184)
+	BT_FRAME(185)
+	BT_FRAME(186)
+	BT_FRAME(187)
+	BT_FRAME(188)
+	BT_FRAME(189)
+
+	BT_FRAME(190)
+	BT_FRAME(191)
+	BT_FRAME(192)
+	BT_FRAME(193)
+	BT_FRAME(194)
+	BT_FRAME(195)
+	BT_FRAME(196)
+	BT_FRAME(197)
+	BT_FRAME(198)
+	BT_FRAME(199)
+
+	BT_FRAME(200)
+	BT_FRAME(201)
+	BT_FRAME(202)
+	BT_FRAME(203)
+	BT_FRAME(204)
+	BT_FRAME(205)
+	BT_FRAME(206)
+	BT_FRAME(207)
+	BT_FRAME(208)
+	BT_FRAME(209)
+
+	BT_FRAME(210)
+	BT_FRAME(211)
+	BT_FRAME(212)
+	BT_FRAME(213)
+	BT_FRAME(214)
+	BT_FRAME(215)
+	BT_FRAME(216)
+	BT_FRAME(217)
+	BT_FRAME(218)
+	BT_FRAME(219)
+
+	BT_FRAME(220)
+	BT_FRAME(221)
+	BT_FRAME(222)
+	BT_FRAME(223)
+	BT_FRAME(224)
+	BT_FRAME(225)
+	BT_FRAME(226)
+	BT_FRAME(227)
+	BT_FRAME(228)
+	BT_FRAME(229)
+
+	BT_FRAME(230)
+	BT_FRAME(231)
+	BT_FRAME(232)
+	BT_FRAME(233)
+	BT_FRAME(234)
+	BT_FRAME(235)
+	BT_FRAME(236)
+	BT_FRAME(237)
+	BT_FRAME(238)
+	BT_FRAME(239)
+
+	BT_FRAME(240)
+	BT_FRAME(241)
+	BT_FRAME(242)
+	BT_FRAME(243)
+	BT_FRAME(244)
+	BT_FRAME(245)
+	BT_FRAME(246)
+	BT_FRAME(247)
+	BT_FRAME(248)
+	BT_FRAME(249)
+
+	BT_FRAME(250)
+	BT_FRAME(251)
+	BT_FRAME(252)
+	BT_FRAME(253)
+	BT_FRAME(254)
+	BT_FRAME(255)
 #undef BT_FRAME
 }
 #else
@@ -281,7 +423,7 @@ prof_backtrace(tsd_t *tsd, prof_bt_t *bt) {
 	assert(prof_backtrace_hook != NULL);
 
 	pre_reentrancy(tsd, NULL);
-	prof_backtrace_hook(bt->vec, &bt->len, PROF_BT_MAX);
+	prof_backtrace_hook(bt->vec, &bt->len, opt_prof_bt_max);
 	post_reentrancy(tsd);
 }
 
diff --git a/src/stats.c b/src/stats.c
index d150baef..5bb1a346 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1521,6 +1521,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_UNSIGNED("debug_double_free_max_scan")
 	OPT_WRITE_CHAR_P("thp")
 	OPT_WRITE_BOOL("prof")
+	OPT_WRITE_UNSIGNED("prof_bt_max")
 	OPT_WRITE_CHAR_P("prof_prefix")
 	OPT_WRITE_BOOL_MUTABLE("prof_active", "prof.active")
 	OPT_WRITE_BOOL_MUTABLE("prof_thread_active_init",
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 62bd1a2d..14fe7993 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -314,6 +314,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, prof, prof);
 	TEST_MALLCTL_OPT(const char *, prof_prefix, prof);
 	TEST_MALLCTL_OPT(bool, prof_active, prof);
+	TEST_MALLCTL_OPT(unsigned, prof_bt_max, prof);
 	TEST_MALLCTL_OPT(ssize_t, lg_prof_sample, prof);
 	TEST_MALLCTL_OPT(bool, prof_accum, prof);
 	TEST_MALLCTL_OPT(ssize_t, lg_prof_interval, prof);
diff --git a/test/unit/prof_hook.c b/test/unit/prof_hook.c
index 6480d930..fc06d84e 100644
--- a/test/unit/prof_hook.c
+++ b/test/unit/prof_hook.c
@@ -129,6 +129,7 @@ TEST_END
 TEST_BEGIN(test_prof_dump_hook) {
 
 	test_skip_if(!config_prof);
+	expect_u_eq(opt_prof_bt_max, 200, "Unexpected backtrace stack depth");
 
 	mock_dump_hook_called = false;
 
diff --git a/test/unit/prof_hook.sh b/test/unit/prof_hook.sh
index c7ebd8f9..48cd51a5 100644
--- a/test/unit/prof_hook.sh
+++ b/test/unit/prof_hook.sh
@@ -1,6 +1,5 @@
 #!/bin/sh
 
 if [ "x${enable_prof}" = "x1" ] ; then
-  export MALLOC_CONF="prof:true,prof_active:true,lg_prof_sample:0"
+  export MALLOC_CONF="prof:true,prof_active:true,lg_prof_sample:0,prof_bt_max:200"
 fi
-

From ba19d2cb78176ef715aca461c7a7a7b2afb35772 Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@fb.com>
Date: Thu, 1 Sep 2022 16:42:56 -0700
Subject: [PATCH 2235/2608] Add arena-level name.

An arena-level name can help identify manual arenas.
---
 include/jemalloc/internal/arena_externs.h |  2 +
 include/jemalloc/internal/arena_structs.h |  3 +
 include/jemalloc/internal/arena_types.h   |  2 +
 src/arena.c                               | 21 +++++++
 src/ctl.c                                 | 67 +++++++++++++++++++++--
 src/stats.c                               | 13 ++++-
 test/include/test/test.h                  |  4 +-
 test/unit/mallctl.c                       | 43 +++++++++++++++
 8 files changed, 146 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index e6fceaaf..9f5c1958 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -88,6 +88,8 @@ ehooks_t *arena_get_ehooks(arena_t *arena);
 extent_hooks_t *arena_set_extent_hooks(tsd_t *tsd, arena_t *arena,
     extent_hooks_t *extent_hooks);
 bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
+void arena_name_get(arena_t *arena, char *name);
+void arena_name_set(arena_t *arena, const char *name);
 ssize_t arena_dirty_decay_ms_default_get(void);
 bool arena_dirty_decay_ms_default_set(ssize_t decay_ms);
 ssize_t arena_muzzy_decay_ms_default_get(void);
diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index e2a5a408..e6868fce 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -91,6 +91,9 @@ struct arena_s {
 	/* Used to determine uptime.  Read-only after initialization. */
 	nstime_t		create_time;
 
+	/* The name of the arena. */
+	char 			name[ARENA_NAME_LEN];
+
 	/*
 	 * The arena is allocated alongside its bins; really this is a
 	 * dynamically sized array determined by the binshard settings.
diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h
index d0e12917..45eec69f 100644
--- a/include/jemalloc/internal/arena_types.h
+++ b/include/jemalloc/internal/arena_types.h
@@ -8,6 +8,8 @@
 #define MUZZY_DECAY_MS_DEFAULT	(0)
 /* Number of event ticks between time checks. */
 #define ARENA_DECAY_NTICKS_PER_UPDATE	1000
+/* Maximum length of the arena name. */
+#define ARENA_NAME_LEN 32
 
 typedef struct arena_decay_s arena_decay_t;
 typedef struct arena_s arena_t;
diff --git a/src/arena.c b/src/arena.c
index 1ab2775e..25ab41af 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1547,6 +1547,22 @@ arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec) {
 	return false;
 }
 
+void
+arena_name_get(arena_t *arena, char *name) {
+	char *end = (char *)memchr((void *)arena->name, '\0', ARENA_NAME_LEN);
+	assert(end != NULL);
+	size_t len = (uintptr_t)end - (uintptr_t)arena->name + 1;
+	assert(len > 0 && len <= ARENA_NAME_LEN);
+
+	strncpy(name, arena->name, len);
+}
+
+void
+arena_name_set(arena_t *arena, const char *name) {
+	strncpy(arena->name, name, ARENA_NAME_LEN);
+	arena->name[ARENA_NAME_LEN - 1] = '\0';
+}
+
 ssize_t
 arena_dirty_decay_ms_default_get(void) {
 	return atomic_load_zd(&dirty_decay_ms_default, ATOMIC_RELAXED);
@@ -1670,6 +1686,11 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 	arena_set(ind, arena);
 	arena->ind = ind;
 
+	/* Init the name. */
+	malloc_snprintf(arena->name, sizeof(arena->name), "%s_%u",
+	    arena_is_auto(arena) ? "auto" : "manual", arena->ind);
+	arena->name[ARENA_NAME_LEN - 1] = '\0';
+
 	nstime_init_update(&arena->create_time);
 
 	/*
diff --git a/src/ctl.c b/src/ctl.c
index 6b03f986..acf5d366 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -170,6 +170,7 @@ CTL_PROTO(arena_i_dirty_decay_ms)
 CTL_PROTO(arena_i_muzzy_decay_ms)
 CTL_PROTO(arena_i_extent_hooks)
 CTL_PROTO(arena_i_retain_grow_limit)
+CTL_PROTO(arena_i_name)
 INDEX_PROTO(arena_i)
 CTL_PROTO(arenas_bin_i_size)
 CTL_PROTO(arenas_bin_i_nregs)
@@ -504,11 +505,12 @@ static const ctl_named_node_t arena_i_node[] = {
 	 * Undocumented for now, since we anticipate an arena API in flux after
 	 * we cut the last 5-series release.
 	 */
-	{NAME("oversize_threshold"), CTL(arena_i_oversize_threshold)},
-	{NAME("dirty_decay_ms"), CTL(arena_i_dirty_decay_ms)},
-	{NAME("muzzy_decay_ms"), CTL(arena_i_muzzy_decay_ms)},
-	{NAME("extent_hooks"),	CTL(arena_i_extent_hooks)},
-	{NAME("retain_grow_limit"),	CTL(arena_i_retain_grow_limit)}
+	{NAME("oversize_threshold"),	CTL(arena_i_oversize_threshold)},
+	{NAME("dirty_decay_ms"),	CTL(arena_i_dirty_decay_ms)},
+	{NAME("muzzy_decay_ms"),	CTL(arena_i_muzzy_decay_ms)},
+	{NAME("extent_hooks"),		CTL(arena_i_extent_hooks)},
+	{NAME("retain_grow_limit"),	CTL(arena_i_retain_grow_limit)},
+	{NAME("name"),			CTL(arena_i_name)}
 };
 static const ctl_named_node_t super_arena_i_node[] = {
 	{NAME(""),		CHILD(named, arena_i)}
@@ -2983,6 +2985,61 @@ label_return:
 	return ret;
 }
 
+/*
+ * When writing, newp should point to a char array storing the name to be set.
+ * A name longer than ARENA_NAME_LEN will be arbitrarily cut. When reading,
+ * oldp should point to a char array whose length is no shorter than
+ * ARENA_NAME_LEN or the length of the name when it was set.
+ */
+static int
+arena_i_name_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+	unsigned arena_ind;
+	char *name;
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
+	MIB_UNSIGNED(arena_ind, 1);
+	if (arena_ind == MALLCTL_ARENAS_ALL || arena_ind >=
+	    ctl_arenas->narenas) {
+		ret = EINVAL;
+		goto label_return;
+	}
+	arena_t *arena = arena_get(tsd_tsdn(tsd), arena_ind, false);
+	if (arena == NULL) {
+		ret = EFAULT;
+		goto label_return;
+	}
+
+	if (oldp != NULL && oldlenp != NULL) {
+		/*
+		 * Read the arena name.  When reading, the input oldp should
+		 * point to an array with a length no shorter than
+		 * ARENA_NAME_LEN or the length when it was set.
+		 */
+		if (*oldlenp != sizeof(char *)) {
+			ret = EINVAL;
+			goto label_return;
+		}
+		name = *(char **)oldp;
+		arena_name_get(arena, name);
+	}
+
+	if (newp != NULL) {
+		/* Write the arena name. */
+		WRITE(name, char *);
+		if (name == NULL) {
+			ret = EINVAL;
+			goto label_return;
+		}
+		arena_name_set(arena, name);
+	}
+	ret = 0;
+label_return:
+	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);
+	return ret;
+}
+
 static const ctl_named_node_t *
 arena_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
     size_t i) {
diff --git a/src/stats.c b/src/stats.c
index 5bb1a346..701a6c86 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -42,15 +42,18 @@ const char *arena_mutex_names[mutex_prof_num_arena_mutexes] = {
 	assert(miblen_new == miblen + 1);				\
 } while (0)
 
-#define CTL_M2_GET(n, i, v, t) do {					\
+#define CTL_MIB_GET(n, i, v, t, ind) do {				\
 	size_t mib[CTL_MAX_DEPTH];					\
 	size_t miblen = sizeof(mib) / sizeof(size_t);			\
 	size_t sz = sizeof(t);						\
 	xmallctlnametomib(n, mib, &miblen);				\
-	mib[2] = (i);							\
+	mib[(ind)] = (i);							\
 	xmallctlbymib(mib, miblen, (void *)v, &sz, NULL, 0);		\
 } while (0)
 
+#define CTL_M1_GET(n, i, v, t) CTL_MIB_GET(n, i, v, t, 1)
+#define CTL_M2_GET(n, i, v, t) CTL_MIB_GET(n, i, v, t, 2)
+
 /******************************************************************************/
 /* Data. */
 
@@ -1042,6 +1045,8 @@ JEMALLOC_COLD
 static void
 stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
     bool mutex, bool extents, bool hpa) {
+	char name[ARENA_NAME_LEN];
+	char *namep = name;
 	unsigned nthreads;
 	const char *dss;
 	ssize_t dirty_decay_ms, muzzy_decay_ms;
@@ -1059,6 +1064,10 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	uint64_t uptime;
 
 	CTL_GET("arenas.page", &page, size_t);
+	if (i != MALLCTL_ARENAS_ALL && i != MALLCTL_ARENAS_DESTROYED) {
+		CTL_M1_GET("arena.0.name", i, (void *)&namep, const char *);
+		emitter_kv(emitter, "name", "name", emitter_type_string, &namep);
+	}
 
 	CTL_M2_GET("stats.arenas.0.nthreads", i, &nthreads, unsigned);
 	emitter_kv(emitter, "nthreads", "assigned threads",
diff --git a/test/include/test/test.h b/test/include/test/test.h
index d4b65912..54610dab 100644
--- a/test/include/test/test.h
+++ b/test/include/test/test.h
@@ -266,7 +266,7 @@
 #define expect_false(a, ...)	expect_b_eq(a, false, __VA_ARGS__)
 
 #define verify_str_eq(may_abort, a, b, ...) do {			\
-	if (strcmp((a), (b))) {						\
+	if (strcmp((a), (b)) != 0) {						\
 		char prefix[ASSERT_BUFSIZE];				\
 		char message[ASSERT_BUFSIZE];				\
 		malloc_snprintf(prefix, sizeof(prefix),			\
@@ -284,7 +284,7 @@
 } while (0)
 
 #define verify_str_ne(may_abort, a, b, ...) do {			\
-	if (!strcmp((a), (b))) {					\
+	if (strcmp((a), (b)) == 0) {					\
 		char prefix[ASSERT_BUFSIZE];				\
 		char message[ASSERT_BUFSIZE];				\
 		malloc_snprintf(prefix, sizeof(prefix),			\
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 14fe7993..244d4c96 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -711,6 +711,48 @@ TEST_BEGIN(test_arena_i_dss) {
 }
 TEST_END
 
+TEST_BEGIN(test_arena_i_name) {
+	unsigned arena_ind;
+	size_t ind_sz = sizeof(arena_ind);
+	size_t mib[3];
+	size_t miblen;
+	char name_old[ARENA_NAME_LEN];
+	char *name_oldp = name_old;
+	size_t sz = sizeof(name_oldp);
+	char default_name[ARENA_NAME_LEN];
+	const char *name_new = "test name";
+	const char *super_long_name = "A name longer than ARENA_NAME_LEN";
+	size_t super_long_name_len = strlen(super_long_name);
+	assert(super_long_name_len > ARENA_NAME_LEN);
+
+	miblen = sizeof(mib)/sizeof(size_t);
+	expect_d_eq(mallctlnametomib("arena.0.name", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() error");
+
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &ind_sz, NULL,
+	    0), 0, "Unexpected mallctl() failure");
+	mib[1] = arena_ind;
+
+	malloc_snprintf(default_name, sizeof(default_name), "manual_%u",
+	    arena_ind);
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&name_oldp, &sz,
+	    (void *)&name_new, sizeof(name_new)), 0,
+	    "Unexpected mallctl() failure");
+	expect_str_eq(name_old, default_name,
+	    "Unexpected default value for arena name");
+
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&name_oldp, &sz,
+	    (void *)&super_long_name, sizeof(super_long_name)), 0,
+	    "Unexpected mallctl() failure");
+	expect_str_eq(name_old, name_new, "Unexpected value for arena name");
+
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&name_oldp, &sz,
+	    NULL, 0), 0, "Unexpected mallctl() failure");
+	int cmp = strncmp(name_old, super_long_name, ARENA_NAME_LEN - 1);
+	expect_true(cmp == 0, "Unexpected value for long arena name ");
+}
+TEST_END
+
 TEST_BEGIN(test_arena_i_retain_grow_limit) {
 	size_t old_limit, new_limit, default_limit;
 	size_t mib[3];
@@ -1258,6 +1300,7 @@ main(void) {
 	    test_arena_i_purge,
 	    test_arena_i_decay,
 	    test_arena_i_dss,
+	    test_arena_i_name,
 	    test_arena_i_retain_grow_limit,
 	    test_arenas_dirty_decay_ms,
 	    test_arenas_muzzy_decay_ms,

From c9ac1f4701d621c3f39b94970fa96ce23897a295 Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@fb.com>
Date: Fri, 16 Sep 2022 11:39:34 -0700
Subject: [PATCH 2236/2608] Fix a bug in C++ integration test.

---
 test/integration/cpp/infallible_new_true.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/test/integration/cpp/infallible_new_true.cpp b/test/integration/cpp/infallible_new_true.cpp
index 3976f08b..3b2862bd 100644
--- a/test/integration/cpp/infallible_new_true.cpp
+++ b/test/integration/cpp/infallible_new_true.cpp
@@ -10,7 +10,7 @@ typedef void (*abort_hook_t)(const char *message);
 bool fake_abort_called;
 void fake_abort(const char *message) {
 	const char *expected_start = "<jemalloc>: Allocation of size";
-	if (strncmp(message, expected_start, strlen(expected_start) != 0)) {
+	if (strncmp(message, expected_start, strlen(expected_start)) != 0) {
 		abort();
 	}
 	fake_abort_called = true;
@@ -64,4 +64,3 @@ main(void) {
 	return test(
 	    test_failing_alloc);
 }
-

From c0c9783ec9289e6d1de749ff20081af65bdd78b8 Mon Sep 17 00:00:00 2001
From: Lily Wang <494550702@qq.com>
Date: Fri, 9 Sep 2022 02:04:23 -0700
Subject: [PATCH 2237/2608] Add vcpkg installation instructions

---
 INSTALL.md | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/INSTALL.md b/INSTALL.md
index 9dffa646..f772dd09 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -476,6 +476,23 @@ If you're having any issues with the above, ensure the following:
    variables. Otherwise, check the path to the `vcvarsall.bat` in `$vcvarsall`
    script and fix it.
 
+### Building from vcpkg
+
+The jemalloc port in vcpkg is kept up to date by Microsoft team members and
+community contributors. The url of vcpkg is: https://github.com/Microsoft/vcpkg
+. You can download and install jemalloc using the vcpkg dependency manager:
+
+```shell
+git clone https://github.com/Microsoft/vcpkg.git
+cd vcpkg
+./bootstrap-vcpkg.sh  # ./bootstrap-vcpkg.bat for Windows
+./vcpkg integrate install
+./vcpkg install jemalloc
+```
+
+If the version is out of date, please [create an issue or pull
+request](https://github.com/Microsoft/vcpkg) on the vcpkg repository.
+
 ## Development
 
 If you intend to make non-trivial changes to jemalloc, use the 'autogen.sh'

From 3de0c24859f4413bf03448249078169bb50bda0f Mon Sep 17 00:00:00 2001
From: divanorama <divanorama@gmail.com>
Date: Thu, 29 Sep 2022 23:35:59 +0200
Subject: [PATCH 2238/2608] Disable builtin malloc in tests

With `--with-jemalloc-prefix=` and without `-fno-builtin` or `-O1` both clang and gcc may optimize out `malloc` calls
whose result is unused. Comparing result to NULL also doesn't necessarily count as being used.

This won't be a problem in most client programs as this only concerns really unused pointers, but in
tests it's important to actually execute allocations.
`-fno-builtin` should disable this optimization for both gcc and clang, and applying it only to tests code shouldn't hopefully be an issue.
Another alternative is to force "use" of result but that'd require more changes and may miss some other optimization-related issues.

This should resolve https://github.com/jemalloc/jemalloc/issues/2091
---
 Makefile.in | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Makefile.in b/Makefile.in
index 6809fb29..a964f07e 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -458,6 +458,8 @@ $(TESTS_OBJS): $(objroot)test/%.$(O): $(srcroot)test/%.c
 $(TESTS_CPP_OBJS): $(objroot)test/%.$(O): $(srcroot)test/%.cpp
 $(TESTS_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include
 $(TESTS_CPP_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include
+$(TESTS_OBJS): CFLAGS += -fno-builtin
+$(TESTS_CPP_OBJS): CPPFLAGS += -fno-builtin
 ifneq ($(IMPORTLIB),$(SO))
 $(CPP_OBJS) $(C_SYM_OBJS) $(C_OBJS) $(C_JET_SYM_OBJS) $(C_JET_OBJS): CPPFLAGS += -DDLLEXPORT
 endif

From 4c95c953e2c4b443d930d3b41abb17eb38f075f5 Mon Sep 17 00:00:00 2001
From: David Carlier <devnexen@gmail.com>
Date: Sat, 1 Oct 2022 20:35:29 +0100
Subject: [PATCH 2239/2608] fix build for non linux/BSD platforms.

---
 configure.ac                                          | 9 +++++++++
 include/jemalloc/internal/jemalloc_internal_defs.h.in | 3 +++
 src/background_thread.c                               | 6 +++---
 3 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/configure.ac b/configure.ac
index 0ae579ee..64c0c847 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2024,6 +2024,15 @@ if test "x$have_sched_setaffinity" = "x1" ; then
   AC_DEFINE([JEMALLOC_HAVE_SCHED_SETAFFINITY], [ ], [ ])
 fi
 
+dnl Check if the pthread_setaffinity_np function exists.
+AC_CHECK_FUNC([pthread_setaffinity_np],
+              [have_pthread_setaffinity_np="1"],
+              [have_pthread_setaffinity_np="0"]
+             )
+if test "x$have_pthread_setaffinity_np" = "x1" ; then
+  AC_DEFINE([JEMALLOC_HAVE_PTHREAD_SETAFFINITY_NP], [ ], [ ])
+fi
+
 dnl Check if the Solaris/BSD issetugid function exists.
 AC_CHECK_FUNC([issetugid],
               [have_issetugid="1"],
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 6dbd8780..f5d94ee7 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -391,6 +391,9 @@
 /* GNU specific sched_setaffinity support */
 #undef JEMALLOC_HAVE_SCHED_SETAFFINITY
 
+/* pthread_setaffinity_np support */
+#undef JEMALLOC_HAVE_PTHREAD_SETAFFINITY_NP
+
 /*
  * If defined, all the features necessary for background threads are present.
  */
diff --git a/src/background_thread.c b/src/background_thread.c
index 3171dd31..3c006cec 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -113,9 +113,7 @@ background_thread_info_init(tsdn_t *tsdn, background_thread_info_t *info) {
 
 static inline bool
 set_current_thread_affinity(int cpu) {
-#ifdef __OpenBSD__
-	return false;
-#else
+#if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY) || defined(JEMALLOC_HAVE_PTHREAD_SETAFFINITY_NP)
 #if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
 	cpu_set_t cpuset;
 #else
@@ -146,6 +144,8 @@ set_current_thread_affinity(int cpu) {
 #  endif
 	return ret != 0;
 #endif
+#else
+        return false;
 #endif
 }
 

From b04e7666f2f29de096a170c49cb49cd8f308b7e1 Mon Sep 17 00:00:00 2001
From: Jordan Rome <jordalgo@fb.com>
Date: Thu, 29 Sep 2022 10:07:47 -0400
Subject: [PATCH 2240/2608] update PROFILING_INTERNALS.md

Expand the bad example of summing before unbiasing.
---
 doc_internal/PROFILING_INTERNALS.md | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/doc_internal/PROFILING_INTERNALS.md b/doc_internal/PROFILING_INTERNALS.md
index 0a9f31c0..f337fb88 100644
--- a/doc_internal/PROFILING_INTERNALS.md
+++ b/doc_internal/PROFILING_INTERNALS.md
@@ -99,7 +99,25 @@ Using this approach means that there are a few things users need to be aware of.
 If one stack appears twice as often as another, this by itself does not imply that it allocates twice as often. Consider the case in which there are only two types of allocating call stacks in a program. Stack A allocates 8 bytes, and occurs a million times in a program. Stack B allocates 8 MB, and occurs just once in a program. If our sampling rate $R$ is about 1MB, we expect stack A to show up about 8 times, and stack B to show up once. Stack A isn't 8 times more frequent than stack B, though; it's a million times more frequent.
 
 ### Aggregation must be done after unbiasing samples
-Some tools manually parse heap dump output, and aggregate across stacks (or across program runs) to provide wider-scale data analyses. When doing this aggregation, though, it's important to unbias-and-then-sum, rather than sum-and-then-unbias. Reusing our example from the previous section: suppose we collect heap dumps of the program from a million machines. We then have 8 million occurs of stack A (each of 8 bytes), and a million occurrences of stack B (each of 8 MB). If we sum first, we'll attribute 64 MB to stack A, and 8 TB to stack B. Unbiasing changes these numbers by an infinitesimal amount, so that sum-then-unbias dramatically underreports the amount of memory allocated by stack A.
+Some tools manually parse heap dump output, and aggregate across stacks (or across program runs) to provide wider-scale data analyses. When doing this aggregation, though, it's important to unbias-and-then-sum, rather than sum-and-then-unbias. Reusing our example from the previous section: suppose we collect heap dumps of the program from 1 million machines. We then have 8 million samples of stack A (8 per machine, each of 8 bytes), and 1 million samples of stack B (1 per machine, each of 8 MB).
+
+If we sum first then unbias based on this formula: $1 - e^{-Z/R}$ we get:
+
+$$Z = 8,000,000 * 8 bytes = 64MB$$
+$$64MB / (1 - e^{-64MB/1MB}) \approx 64MB (Stack A)$$
+
+$$Z = 1,000,000 * 8MB = 8TB$$
+$$8TB / (1 - e^{-1TB/1MB}) \approx 8TB (Stack B)$$
+
+Clearly we are unbiasing by an infinitesimal amount, which dramatically underreports the amount of memory allocated by stack A. Whereas if we unbias first and then sum:
+
+$$Z = 8 bytes$$
+$$8 bytes / (1 - e^{-8 bytes/1MB}) \approx 1MB$$
+$$1MB * 8,000,000 = 8TB (Stack A)$$
+
+$$Z = 8MB$$
+$$8MB / (1 - e^{-8MB/1MB})  \approx 8MB$$
+$$8MB * 1,000,000 = 8TB (Stack B)$$
 
 ## An avenue for future exploration
 While the framework we laid out above is pretty general, as an engineering decision we're only interested in fairly simple approaches (i.e. ones for which the chance of an allocation being sampled depends only on its size). Our job is then: for each size class $Z$, pick a probability $p_Z$ that an allocation of that size will be sampled. We made some handwave-y references to statistical distributions to justify our choices, but there's no reason we need to pick them that way. Any set of non-zero probabilities is a valid choice.

From 1897f185d2c06307fefc4d8f4512eeb13c474999 Mon Sep 17 00:00:00 2001
From: divanorama <divanorama@gmail.com>
Date: Mon, 26 Sep 2022 09:46:34 +0200
Subject: [PATCH 2241/2608] Fix safety_check segfault in double free test

---
 include/jemalloc/internal/arena_inlines_b.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 69617fb7..c9d7db86 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -225,8 +225,7 @@ large_dalloc_safety_checks(edata_t *edata, void *ptr, szind_t szind) {
 	    edata_state_get(edata) != extent_state_active)) {
 		safety_check_fail("Invalid deallocation detected: "
 		    "pages being freed (%p) not currently active, "
-		    "possibly caused by double free bugs.",
-		    (uintptr_t)edata_addr_get(edata));
+		    "possibly caused by double free bugs.", ptr);
 		return true;
 	}
 	size_t input_size = sz_index2size(szind);

From be65438f20a5fe4fdc5c5bb2cfa7ba3f0e9da378 Mon Sep 17 00:00:00 2001
From: Paul Smith <paul@mad-scientist.net>
Date: Fri, 14 Oct 2022 14:33:08 -0400
Subject: [PATCH 2242/2608] jemalloc_internal_types.h: Use alloca if
 __STDC_NO_VLA__ is defined

No currently-available version of Visual Studio C compiler supports
variable length arrays, even if it defines __STDC_VERSION__ >= C99.
As far as I know Microsoft has no plans to ever support VLAs in MSVC.

The C11 standard requires that the __STDC_NO_VLA__ macro be defined if
the compiler doesn't support VLAs, so fall back to alloca() if so.
---
 include/jemalloc/internal/jemalloc_internal_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_types.h b/include/jemalloc/internal/jemalloc_internal_types.h
index 62c2b59c..b23a8bed 100644
--- a/include/jemalloc/internal/jemalloc_internal_types.h
+++ b/include/jemalloc/internal/jemalloc_internal_types.h
@@ -110,7 +110,7 @@ typedef enum malloc_init_e malloc_init_t;
 	(((s) + (alignment - 1)) & ((~(alignment)) + 1))
 
 /* Declare a variable-length array. */
-#if __STDC_VERSION__ < 199901L
+#if __STDC_VERSION__ < 199901L || defined(__STDC_NO_VLA__)
 #  ifdef _MSC_VER
 #    include <malloc.h>
 #    define alloca _alloca

From 143e9c4a2f4eb8916e9802323485fd91260fd17c Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 21 Oct 2022 15:10:48 -0700
Subject: [PATCH 2243/2608] Enable fast thread locals for dealloc-only threads.

Previously if a thread does only allocations, it stays on the slow path /
minimal initialized state forever.  However, dealloc-only is a valid pattern for
dedicated reclamation threads -- this means thread cache is disabled (no batched
flush) for them, which causes high overhead and contention.

Added the condition to fully initialize TSD when a fair amount of dealloc
activities are observed.
---
 include/jemalloc/internal/tsd.h |  4 +++
 src/tsd.c                       | 18 ++++++++++-
 test/unit/tsd.c                 | 56 +++++++++++++++++++++++++++++++++
 3 files changed, 77 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 66d68822..c6bf28fc 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -59,6 +59,7 @@ typedef ql_elm(tsd_t) tsd_link_t;
 #define TSD_DATA_SLOW							\
     O(tcache_enabled,		bool,			bool)		\
     O(reentrancy_level,		int8_t,			int8_t)		\
+    O(min_init_state_nfetched,		uint8_t,	uint8_t)	\
     O(thread_allocated_last_event,	uint64_t,	uint64_t)	\
     O(thread_allocated_next_event,	uint64_t,	uint64_t)	\
     O(thread_deallocated_last_event,	uint64_t,	uint64_t)	\
@@ -91,6 +92,7 @@ typedef ql_elm(tsd_t) tsd_link_t;
 #define TSD_DATA_SLOW_INITIALIZER					\
     /* tcache_enabled */	TCACHE_ENABLED_ZERO_INITIALIZER,	\
     /* reentrancy_level */	0,					\
+    /* min_init_state_nfetched */	0,				\
     /* thread_allocated_last_event */	0,				\
     /* thread_allocated_next_event */	0,				\
     /* thread_deallocated_last_event */	0,				\
@@ -177,6 +179,8 @@ void tsd_global_slow_inc(tsdn_t *tsdn);
 void tsd_global_slow_dec(tsdn_t *tsdn);
 bool tsd_global_slow();
 
+#define TSD_MIN_INIT_STATE_MAX_FETCHED (128)
+
 enum {
 	/* Common case --> jnz. */
 	tsd_state_nominal = 0,
diff --git a/src/tsd.c b/src/tsd.c
index e8e4f3a3..cef7ba58 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -300,9 +300,25 @@ tsd_fetch_slow(tsd_t *tsd, bool minimal) {
 			tsd_state_set(tsd, tsd_state_minimal_initialized);
 			tsd_set(tsd);
 			tsd_data_init_nocleanup(tsd);
+			*tsd_min_init_state_nfetchedp_get(tsd) = 1;
 		}
 	} else if (tsd_state_get(tsd) == tsd_state_minimal_initialized) {
-		if (!minimal) {
+		/*
+		 * If a thread only ever deallocates (e.g. dedicated reclamation
+		 * threads), we want to help it to eventually escape the slow
+		 * path (caused by the minimal initialized state).  The nfetched
+		 * counter tracks the number of times the tsd has been accessed
+		 * under the min init state, and triggers the switch to nominal
+		 * once reached the max allowed count.
+		 *
+		 * This means at most 128 deallocations stay on the slow path.
+		 *
+		 * Also see comments in free_default().
+		 */
+		uint8_t *nfetched = tsd_min_init_state_nfetchedp_get(tsd);
+		assert(*nfetched >= 1);
+		(*nfetched)++;
+		if (!minimal || *nfetched == TSD_MIN_INIT_STATE_MAX_FETCHED) {
 			/* Switch to fully initialized. */
 			tsd_state_set(tsd, tsd_state_nominal);
 			assert(*tsd_reentrancy_levelp_get(tsd) >= 1);
diff --git a/test/unit/tsd.c b/test/unit/tsd.c
index 205d8708..bb5cd9f6 100644
--- a/test/unit/tsd.c
+++ b/test/unit/tsd.c
@@ -136,6 +136,61 @@ TEST_BEGIN(test_tsd_reincarnation) {
 }
 TEST_END
 
+static void *
+thd_start_dalloc_only(void *arg) {
+	void **ptrs = (void **)arg;
+
+	tsd_t *tsd = tsd_fetch_min();
+	if (tsd_state_get(tsd) != tsd_state_minimal_initialized) {
+		/* Allocation happened implicitly. */
+		expect_u_eq(tsd_state_get(tsd), tsd_state_nominal,
+		    "TSD state should be nominal");
+		return NULL;
+	}
+
+	void *ptr;
+	for (size_t i = 0; (ptr = ptrs[i]) != NULL; i++) {
+		/* Offset by 1 because of the manual tsd_fetch_min above. */
+		if (i + 1 < TSD_MIN_INIT_STATE_MAX_FETCHED) {
+			expect_u_eq(tsd_state_get(tsd),
+			    tsd_state_minimal_initialized,
+			    "TSD should be minimal initialized");
+		} else {
+			/* State may be nominal or nominal_slow. */
+			expect_true(tsd_nominal(tsd), "TSD should be nominal");
+		}
+		free(ptr);
+	}
+
+	return NULL;
+}
+
+static void
+test_sub_thread_n_dalloc(size_t nptrs) {
+	void **ptrs = (void **)malloc(sizeof(void *) * (nptrs + 1));
+	for (size_t i = 0; i < nptrs; i++) {
+		ptrs[i] = malloc(8);
+	}
+	ptrs[nptrs] = NULL;
+
+	thd_t thd;
+	thd_create(&thd, thd_start_dalloc_only, (void *)ptrs);
+	thd_join(thd, NULL);
+	free(ptrs);
+}
+
+TEST_BEGIN(test_tsd_sub_thread_dalloc_only) {
+	test_sub_thread_n_dalloc(1);
+	test_sub_thread_n_dalloc(16);
+	test_sub_thread_n_dalloc(TSD_MIN_INIT_STATE_MAX_FETCHED - 2);
+	test_sub_thread_n_dalloc(TSD_MIN_INIT_STATE_MAX_FETCHED - 1);
+	test_sub_thread_n_dalloc(TSD_MIN_INIT_STATE_MAX_FETCHED);
+	test_sub_thread_n_dalloc(TSD_MIN_INIT_STATE_MAX_FETCHED + 1);
+	test_sub_thread_n_dalloc(TSD_MIN_INIT_STATE_MAX_FETCHED + 2);
+	test_sub_thread_n_dalloc(TSD_MIN_INIT_STATE_MAX_FETCHED * 2);
+}
+TEST_END
+
 typedef struct {
 	atomic_u32_t phase;
 	atomic_b_t error;
@@ -269,6 +324,7 @@ main(void) {
 	return test_no_reentrancy(
 	    test_tsd_main_thread,
 	    test_tsd_sub_thread,
+	    test_tsd_sub_thread_dalloc_only,
 	    test_tsd_reincarnation,
 	    test_tsd_global_slow);
 }

From 481bbfc9906e7744716677edd49d0d6c22556a1a Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 2 Nov 2022 16:09:06 -0700
Subject: [PATCH 2244/2608] Add a configure option --enable-force-getenv.

Allows the use of getenv() rather than secure_getenv() to read MALLOC_CONF.
This helps in situations where hosts are under full control, and setting
MALLOC_CONF is needed while also setuid.  Disabled by default.
---
 configure.ac                                     | 16 ++++++++++++++++
 .../internal/jemalloc_internal_defs.h.in         |  6 ++++++
 src/jemalloc.c                                   | 16 ++++++++++------
 3 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/configure.ac b/configure.ac
index 64c0c847..846a049c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1571,6 +1571,22 @@ if test "x$enable_readlinkat" = "x1" ; then
 fi
 AC_SUBST([enable_readlinkat])
 
+dnl Do not force getenv by default
+AC_ARG_ENABLE([force-getenv],
+  [AS_HELP_STRING([--enable-force-getenv], [Use getenv over secure_getenv])],
+[if test "x$enable_force_getenv" = "xno" ; then
+  enable_force_getenv="0"
+else
+  enable_force_getenv="1"
+fi
+],
+[enable_force_getenv="0"]
+)
+if test "x$enable_force_getenv" = "x1" ; then
+  AC_DEFINE([JEMALLOC_FORCE_GETENV], [ ], [ ])
+fi
+AC_SUBST([force_getenv])
+
 dnl Avoid extra safety checks by default
 AC_ARG_ENABLE([opt-safety-checks],
   [AS_HELP_STRING([--enable-opt-safety-checks],
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index f5d94ee7..55938433 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -266,6 +266,12 @@
  */
 #undef JEMALLOC_READLINKAT
 
+/*
+ * If defined, use getenv() (instead of secure_getenv() or
+ * alternatives) to access MALLOC_CONF.
+ */
+#undef JEMALLOC_FORCE_GETENV
+
 /*
  * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings.
  */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index a4761c9b..039be40f 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -703,16 +703,20 @@ check_entry_exit_locking(tsdn_t *tsdn) {
  */
 
 static char *
-jemalloc_secure_getenv(const char *name) {
-#ifdef JEMALLOC_HAVE_SECURE_GETENV
-	return secure_getenv(name);
+jemalloc_getenv(const char *name) {
+#ifdef JEMALLOC_FORCE_GETENV
+	return getenv(name);
 #else
-#  ifdef JEMALLOC_HAVE_ISSETUGID
+#  ifdef JEMALLOC_HAVE_SECURE_GETENV
+	return secure_getenv(name);
+#  else
+#    ifdef JEMALLOC_HAVE_ISSETUGID
 	if (issetugid() != 0) {
 		return NULL;
 	}
-#  endif
+#    endif
 	return getenv(name);
+#  endif
 #endif
 }
 
@@ -1045,7 +1049,7 @@ obtain_malloc_conf(unsigned which_source, char buf[PATH_MAX + 1]) {
 #endif
 		    ;
 
-		if ((ret = jemalloc_secure_getenv(envname)) != NULL) {
+		if ((ret = jemalloc_getenv(envname)) != NULL) {
 			/*
 			 * Do nothing; opts is already initialized to the value
 			 * of the MALLOC_CONF environment variable.

From 14ad8205bf0e23cdc1698f65c4d307753726a6a3 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@fb.com>
Date: Thu, 3 Nov 2022 18:27:03 -0700
Subject: [PATCH 2245/2608] Update the ratio display in benchmark

In bench.h, specify the ratio as the time consumption ratio and
modify the display of the ratio.
---
 test/include/test/bench.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/include/test/bench.h b/test/include/test/bench.h
index 0397c948..c2f78a71 100644
--- a/test/include/test/bench.h
+++ b/test/include/test/bench.h
@@ -53,7 +53,7 @@ compare_funcs(uint64_t nwarmup, uint64_t niter, const char *name_a,
 
 	timer_ratio(&timer_a, &timer_b, ratio_buf, sizeof(ratio_buf));
 	malloc_printf("%"FMTu64" iterations, %s=%"FMTu64"us (%s ns/iter), "
-	    "%s=%"FMTu64"us (%s ns/iter), ratio=1:%s\n",
+	    "%s=%"FMTu64"us (%s ns/iter), time consumption ratio=%s:1\n",
 	    niter, name_a, usec_a, buf_a, name_b, usec_b, buf_b, ratio_buf);
 
 	dallocx(p, 0);

From 06374d2a6ad525be86e4381b4bb5010fedff3268 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@fb.com>
Date: Thu, 3 Nov 2022 16:55:15 -0700
Subject: [PATCH 2246/2608] Benchmark operator delete

Added the microbenchmark for operator delete.
Also modified bench.h so that it can be used in C++.
---
 Makefile.in                          | 16 ++++--
 test/include/test/bench.h            | 10 ++--
 test/include/test/jemalloc_test.h.in |  3 +-
 test/stress/cpp/microbench.cpp       | 83 ++++++++++++++++++++++++++++
 4 files changed, 102 insertions(+), 10 deletions(-)
 create mode 100644 test/stress/cpp/microbench.cpp

diff --git a/Makefile.in b/Makefile.in
index a964f07e..195084d6 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -332,10 +332,15 @@ TESTS_STRESS := $(srcroot)test/stress/batch_alloc.c \
 	$(srcroot)test/stress/large_microbench.c \
 	$(srcroot)test/stress/mallctl.c \
 	$(srcroot)test/stress/microbench.c
+ifeq (@enable_cxx@, 1)
+TESTS_STRESS_CPP := $(srcroot)test/stress/cpp/microbench.cpp
+else
+TESTS_STRESS_CPP :=
+endif
 
 
 TESTS := $(TESTS_UNIT) $(TESTS_INTEGRATION) $(TESTS_INTEGRATION_CPP) \
-	$(TESTS_ANALYZE) $(TESTS_STRESS)
+	$(TESTS_ANALYZE) $(TESTS_STRESS) $(TESTS_STRESS_CPP)
 
 PRIVATE_NAMESPACE_HDRS := $(objroot)include/jemalloc/internal/private_namespace.h $(objroot)include/jemalloc/internal/private_namespace_jet.h
 PRIVATE_NAMESPACE_GEN_HDRS := $(PRIVATE_NAMESPACE_HDRS:%.h=%.gen.h)
@@ -362,9 +367,10 @@ TESTS_INTEGRATION_OBJS := $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%.$(O))
 TESTS_INTEGRATION_CPP_OBJS := $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%.$(O))
 TESTS_ANALYZE_OBJS := $(TESTS_ANALYZE:$(srcroot)%.c=$(objroot)%.$(O))
 TESTS_STRESS_OBJS := $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%.$(O))
+TESTS_STRESS_CPP_OBJS := $(TESTS_STRESS_CPP:$(srcroot)%.cpp=$(objroot)%.$(O))
 TESTS_OBJS := $(TESTS_UNIT_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_ANALYZE_OBJS) \
 	$(TESTS_STRESS_OBJS)
-TESTS_CPP_OBJS := $(TESTS_INTEGRATION_CPP_OBJS)
+TESTS_CPP_OBJS := $(TESTS_INTEGRATION_CPP_OBJS) $(TESTS_STRESS_CPP_OBJS)
 
 .PHONY: all dist build_doc_html build_doc_man build_doc
 .PHONY: install_bin install_include install_lib
@@ -454,6 +460,7 @@ $(TESTS_INTEGRATION_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_TEST
 $(TESTS_INTEGRATION_CPP_OBJS): CPPFLAGS += -DJEMALLOC_INTEGRATION_CPP_TEST
 $(TESTS_ANALYZE_OBJS): CPPFLAGS += -DJEMALLOC_ANALYZE_TEST
 $(TESTS_STRESS_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_TEST
+$(TESTS_STRESS_CPP_OBJS): CPPFLAGS += -DJEMALLOC_STRESS_CPP_TEST
 $(TESTS_OBJS): $(objroot)test/%.$(O): $(srcroot)test/%.c
 $(TESTS_CPP_OBJS): $(objroot)test/%.$(O): $(srcroot)test/%.cpp
 $(TESTS_OBJS): CPPFLAGS += -I$(srcroot)test/include -I$(objroot)test/include
@@ -474,7 +481,7 @@ $(TESTS_OBJS) $(TESTS_CPP_OBJS): $(objroot)test/include/test/jemalloc_test.h
 endif
 
 $(C_OBJS) $(CPP_OBJS) $(C_PIC_OBJS) $(CPP_PIC_OBJS) $(C_TESTLIB_INTEGRATION_OBJS) $(C_UTIL_INTEGRATION_OBJS) $(TESTS_INTEGRATION_OBJS) $(TESTS_INTEGRATION_CPP_OBJS): $(objroot)include/jemalloc/internal/private_namespace.h
-$(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS) $(C_TESTLIB_ANALYZE_OBJS) $(C_TESTLIB_STRESS_OBJS) $(TESTS_UNIT_OBJS) $(TESTS_ANALYZE_OBJS) $(TESTS_STRESS_OBJS): $(objroot)include/jemalloc/internal/private_namespace_jet.h
+$(C_JET_OBJS) $(C_TESTLIB_UNIT_OBJS) $(C_TESTLIB_ANALYZE_OBJS) $(C_TESTLIB_STRESS_OBJS) $(TESTS_UNIT_OBJS) $(TESTS_ANALYZE_OBJS) $(TESTS_STRESS_OBJS) $(TESTS_STRESS_CPP_OBJS): $(objroot)include/jemalloc/internal/private_namespace_jet.h
 
 $(C_SYM_OBJS) $(C_OBJS) $(C_PIC_OBJS) $(C_JET_SYM_OBJS) $(C_JET_OBJS) $(C_TESTLIB_OBJS) $(TESTS_OBJS): %.$(O):
 	@mkdir -p $(@D)
@@ -664,7 +671,7 @@ endif
 tests_unit: $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%$(EXE))
 tests_integration: $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%$(EXE)) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%$(EXE))
 tests_analyze: $(TESTS_ANALYZE:$(srcroot)%.c=$(objroot)%$(EXE))
-tests_stress: $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%$(EXE))
+tests_stress: $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%$(EXE)) $(TESTS_STRESS_CPP:$(srcroot)%.cpp=$(objroot)%$(EXE))
 tests: tests_unit tests_integration tests_analyze tests_stress
 
 check_unit_dir:
@@ -697,6 +704,7 @@ else
 endif
 stress: tests_stress stress_dir
 	$(SHELL) $(objroot)test/test.sh $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%)
+	$(SHELL) $(objroot)test/test.sh $(TESTS_STRESS_CPP:$(srcroot)%.cpp=$(objroot)%)
 check: check_unit check_integration check_integration_decay check_integration_prof
 
 clean:
diff --git a/test/include/test/bench.h b/test/include/test/bench.h
index c2f78a71..7421b4d2 100644
--- a/test/include/test/bench.h
+++ b/test/include/test/bench.h
@@ -23,7 +23,7 @@ fmt_nsecs(uint64_t usec, uint64_t iters, char *buf) {
 	uint64_t nsecs_per_iter1000 = nsec1000 / iters;
 	uint64_t intpart = nsecs_per_iter1000 / 1000;
 	uint64_t fracpart = nsecs_per_iter1000 % 1000;
-	malloc_snprintf(buf, FMT_NSECS_BUF_SIZE, "%"FMTu64".%03"FMTu64, intpart,
+	malloc_snprintf(buf, FMT_NSECS_BUF_SIZE, "%" FMTu64 ".%03" FMTu64, intpart,
 	    fracpart);
 }
 
@@ -40,8 +40,8 @@ compare_funcs(uint64_t nwarmup, uint64_t niter, const char *name_a,
 		return;
 	}
 
-	time_func(&timer_a, nwarmup, niter, func_a);
-	time_func(&timer_b, nwarmup, niter, func_b);
+	time_func(&timer_a, nwarmup, niter, (void (*)())func_a);
+	time_func(&timer_b, nwarmup, niter, (void (*)())func_b);
 
 	uint64_t usec_a = timer_usec(&timer_a);
 	char buf_a[FMT_NSECS_BUF_SIZE];
@@ -52,8 +52,8 @@ compare_funcs(uint64_t nwarmup, uint64_t niter, const char *name_a,
 	fmt_nsecs(usec_b, niter, buf_b);
 
 	timer_ratio(&timer_a, &timer_b, ratio_buf, sizeof(ratio_buf));
-	malloc_printf("%"FMTu64" iterations, %s=%"FMTu64"us (%s ns/iter), "
-	    "%s=%"FMTu64"us (%s ns/iter), time consumption ratio=%s:1\n",
+	malloc_printf("%" FMTu64 " iterations, %s=%" FMTu64 "us (%s ns/iter), "
+	    "%s=%" FMTu64 "us (%s ns/iter), time consumption ratio=%s:1\n",
 	    niter, name_a, usec_a, buf_a, name_b, usec_b, buf_b, ratio_buf);
 
 	dallocx(p, 0);
diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in
index 3f8c0da7..600d993c 100644
--- a/test/include/test/jemalloc_test.h.in
+++ b/test/include/test/jemalloc_test.h.in
@@ -88,7 +88,8 @@ static const bool config_debug =
  * public jemalloc interfaces with jet_ prefixes, so that stress tests can use
  * a separate allocator for their internal data structures.
  */
-#elif defined(JEMALLOC_STRESS_TEST)
+#elif defined(JEMALLOC_STRESS_TEST) || \
+    defined(JEMALLOC_STRESS_CPP_TEST)
 #  include "jemalloc/jemalloc@install_suffix@.h"
 
 #  include "jemalloc/jemalloc_protos_jet.h"
diff --git a/test/stress/cpp/microbench.cpp b/test/stress/cpp/microbench.cpp
new file mode 100644
index 00000000..65f41dea
--- /dev/null
+++ b/test/stress/cpp/microbench.cpp
@@ -0,0 +1,83 @@
+#include "test/jemalloc_test.h"
+#include "test/bench.h"
+
+static void
+malloc_free(void) {
+	void *p = malloc(1);
+	expect_ptr_not_null(p, "Unexpected new failure");
+	free(p);
+}
+
+static void
+new_delete(void) {
+	auto p = ::operator new(1);
+	expect_ptr_not_null(p, "Unexpected new failure");
+	::operator delete(p);
+}
+
+static void
+malloc_free_array(void) {
+	void *p = malloc(sizeof(int)*8);
+	expect_ptr_not_null(p, "Unexpected new[] failure");
+	free(p);
+}
+
+static void
+new_delete_array(void) {
+	auto p = new int[8];
+	expect_ptr_not_null(p, "Unexpected new[] failure");
+	delete[] p;
+}
+
+#if __cpp_sized_deallocation >= 201309
+static void
+new_sized_delete(void) {
+	auto p = ::operator new(1);
+	expect_ptr_not_null(p, "Unexpected new failure");
+	::operator delete(p, 1);
+}
+
+static void
+malloc_sdallocx(void) {
+	void *p = malloc(1);
+	expect_ptr_not_null(p, "Unexpected new failure");
+        sdallocx(p, 1, 0);
+}
+#endif
+
+TEST_BEGIN(test_free_vs_delete) {
+	compare_funcs(10*1000*1000, 100*1000*1000, 
+	    "malloc_free", (void *)malloc_free, 
+	    "new_delete", (void *)new_delete);
+}
+TEST_END
+
+TEST_BEGIN(test_free_array_vs_delete_array) {
+	compare_funcs(10*1000*1000, 100*1000*1000, 
+	    "malloc_free_array", (void *)malloc_free_array, 
+	    "delete_array", (void *)new_delete_array);
+}
+TEST_END
+
+
+TEST_BEGIN(test_sized_delete_vs_sdallocx) {
+#if __cpp_sized_deallocation >= 201309
+	compare_funcs(10*1000*1000, 100*1000*1000, 
+	    "new_size_delete", (void *)new_sized_delete, 
+	    "malloc_sdallocx", (void *)malloc_sdallocx);
+#else
+	malloc_printf("Skipping test_sized_delete_vs_sdallocx since \
+	    sized deallocation is not enabled.\n");
+#endif
+}
+TEST_END
+
+
+int
+main() {
+	return test_no_reentrancy(
+	    test_free_vs_delete,
+	    test_free_array_vs_delete_array,
+	    test_sized_delete_vs_sdallocx);
+	    
+}

From e8f9f13811c16acb1ab8771fd2ffe4437e1b8620 Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@fb.com>
Date: Mon, 19 Sep 2022 17:05:55 -0700
Subject: [PATCH 2247/2608] Inline free and sdallocx into operator delete

---
 .../internal/jemalloc_internal_externs.h      |   3 +-
 .../internal/jemalloc_internal_inlines_c.h    | 224 ++++++++++++++++++
 include/jemalloc/internal/prof_inlines.h      |   6 +-
 src/jemalloc.c                                | 211 +----------------
 src/jemalloc_cpp.cpp                          |  18 +-
 test/stress/cpp/microbench.cpp                |   7 +-
 6 files changed, 241 insertions(+), 228 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 63b9bd2c..d90f6ddb 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -70,7 +70,8 @@ size_t batch_alloc(void **ptrs, size_t num, size_t size, int flags);
 void jemalloc_prefork(void);
 void jemalloc_postfork_parent(void);
 void jemalloc_postfork_child(void);
-void je_sdallocx_noflags(void *ptr, size_t size);
+void sdallocx_default(void *ptr, size_t size, int flags);
+void free_default(void *ptr);
 void *malloc_default(size_t size);
 
 #endif /* JEMALLOC_INTERNAL_EXTERNS_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index b0868b7d..719b8eea 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -7,6 +7,17 @@
 #include "jemalloc/internal/sz.h"
 #include "jemalloc/internal/thread_event.h"
 #include "jemalloc/internal/witness.h"
+#include "jemalloc/internal/arena_externs.h"
+#include "jemalloc/internal/emap.h"
+
+/*
+ * These correspond to the macros in jemalloc/jemalloc_macros.h.  Broadly, we
+ * should have one constant here per magic value there.  Note however that the
+ * representations need not be related.
+ */
+#define TCACHE_IND_NONE ((unsigned)-1)
+#define TCACHE_IND_AUTOMATIC ((unsigned)-2)
+#define ARENA_IND_AUTOMATIC ((unsigned)-1)
 
 /*
  * Translating the names of the 'i' functions:
@@ -337,4 +348,217 @@ imalloc_fastpath(size_t size, void *(fallback_alloc)(size_t)) {
 	return fallback_alloc(size);
 }
 
+JEMALLOC_ALWAYS_INLINE tcache_t *
+tcache_get_from_ind(tsd_t *tsd, unsigned tcache_ind, bool slow, bool is_alloc) {
+        tcache_t *tcache;
+        if (tcache_ind == TCACHE_IND_AUTOMATIC) {
+                if (likely(!slow)) {
+                        /* Getting tcache ptr unconditionally. */
+                        tcache = tsd_tcachep_get(tsd);
+                        assert(tcache == tcache_get(tsd));
+                } else if (is_alloc ||
+                    likely(tsd_reentrancy_level_get(tsd) == 0)) {
+                        tcache = tcache_get(tsd);
+                } else {
+                        tcache = NULL;
+                }
+        } else {
+                /*
+                 * Should not specify tcache on deallocation path when being
+                 * reentrant.
+                 */
+                assert(is_alloc || tsd_reentrancy_level_get(tsd) == 0 ||
+                    tsd_state_nocleanup(tsd));
+                if (tcache_ind == TCACHE_IND_NONE) {
+                        tcache = NULL;
+                } else {
+                        tcache = tcaches_get(tsd, tcache_ind);
+                }
+        }
+        return tcache;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+maybe_check_alloc_ctx(tsd_t *tsd, void *ptr, emap_alloc_ctx_t *alloc_ctx) {
+        if (config_opt_size_checks) {
+                emap_alloc_ctx_t dbg_ctx;
+                emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
+                    &dbg_ctx);
+                if (alloc_ctx->szind != dbg_ctx.szind) {
+                        safety_check_fail_sized_dealloc(
+                            /* current_dealloc */ true, ptr,
+                            /* true_size */ sz_size2index(dbg_ctx.szind),
+                            /* input_size */ sz_size2index(alloc_ctx->szind));
+                        return true;
+                }
+                if (alloc_ctx->slab != dbg_ctx.slab) {
+                        safety_check_fail(
+                            "Internal heap corruption detected: "
+                            "mismatch in slab bit");
+                        return true;
+                }
+        }
+        return false;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+prof_sample_aligned(const void *ptr) {
+        return ((uintptr_t)ptr & PAGE_MASK) == 0;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+free_fastpath_nonfast_aligned(void *ptr, bool check_prof) {
+        /*
+         * free_fastpath do not handle two uncommon cases: 1) sampled profiled
+         * objects and 2) sampled junk & stash for use-after-free detection.
+         * Both have special alignments which are used to escape the fastpath.
+         *
+         * prof_sample is page-aligned, which covers the UAF check when both
+         * are enabled (the assertion below).  Avoiding redundant checks since
+         * this is on the fastpath -- at most one runtime branch from this.
+         */
+        if (config_debug && cache_bin_nonfast_aligned(ptr)) {
+                assert(prof_sample_aligned(ptr));
+        }
+
+        if (config_prof && check_prof) {
+                /* When prof is enabled, the prof_sample alignment is enough. */
+                if (prof_sample_aligned(ptr)) {
+                        return true;
+                } else {
+                        return false;
+                }
+        }
+
+        if (config_uaf_detection) {
+                if (cache_bin_nonfast_aligned(ptr)) {
+                        return true;
+                } else {
+                        return false;
+                }
+        }
+
+        return false;
+}
+
+/* Returns whether or not the free attempt was successful. */
+JEMALLOC_ALWAYS_INLINE
+bool free_fastpath(void *ptr, size_t size, bool size_hint) {
+        tsd_t *tsd = tsd_get(false);
+        /* The branch gets optimized away unless tsd_get_allocates(). */
+        if (unlikely(tsd == NULL)) {
+                return false;
+        }
+        /*
+         *  The tsd_fast() / initialized checks are folded into the branch
+         *  testing (deallocated_after >= threshold) later in this function.
+         *  The threshold will be set to 0 when !tsd_fast.
+         */
+        assert(tsd_fast(tsd) ||
+            *tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd) == 0);
+
+        emap_alloc_ctx_t alloc_ctx;
+        if (!size_hint) {
+                bool err = emap_alloc_ctx_try_lookup_fast(tsd,
+                    &arena_emap_global, ptr, &alloc_ctx);
+
+                /* Note: profiled objects will have alloc_ctx.slab set */
+                if (unlikely(err || !alloc_ctx.slab ||
+                    free_fastpath_nonfast_aligned(ptr,
+                    /* check_prof */ false))) {
+                        return false;
+                }
+                assert(alloc_ctx.szind != SC_NSIZES);
+        } else {
+                /*
+                 * Check for both sizes that are too large, and for sampled /
+                 * special aligned objects.  The alignment check will also check
+                 * for null ptr.
+                 */
+                if (unlikely(size > SC_LOOKUP_MAXCLASS ||
+                    free_fastpath_nonfast_aligned(ptr,
+                    /* check_prof */ true))) {
+                        return false;
+                }
+                alloc_ctx.szind = sz_size2index_lookup(size);
+                /* Max lookup class must be small. */
+                assert(alloc_ctx.szind < SC_NBINS);
+                /* This is a dead store, except when opt size checking is on. */
+                alloc_ctx.slab = true;
+        }
+        /*
+         * Currently the fastpath only handles small sizes.  The branch on
+         * SC_LOOKUP_MAXCLASS makes sure of it.  This lets us avoid checking
+         * tcache szind upper limit (i.e. tcache_maxclass) as well.
+         */
+        assert(alloc_ctx.slab);
+
+        uint64_t deallocated, threshold;
+        te_free_fastpath_ctx(tsd, &deallocated, &threshold);
+
+        size_t usize = sz_index2size(alloc_ctx.szind);
+        uint64_t deallocated_after = deallocated + usize;
+        /*
+         * Check for events and tsd non-nominal (fast_threshold will be set to
+         * 0) in a single branch.  Note that this handles the uninitialized case
+         * as well (TSD init will be triggered on the non-fastpath).  Therefore
+         * anything depends on a functional TSD (e.g. the alloc_ctx sanity check
+         * below) needs to be after this branch.
+         */
+        if (unlikely(deallocated_after >= threshold)) {
+                return false;
+        }
+        assert(tsd_fast(tsd));
+        bool fail = maybe_check_alloc_ctx(tsd, ptr, &alloc_ctx);
+        if (fail) {
+                /* See the comment in isfree. */
+                return true;
+        }
+
+        tcache_t *tcache = tcache_get_from_ind(tsd, TCACHE_IND_AUTOMATIC,
+            /* slow */ false, /* is_alloc */ false);
+        cache_bin_t *bin = &tcache->bins[alloc_ctx.szind];
+
+        /*
+         * If junking were enabled, this is where we would do it.  It's not
+         * though, since we ensured above that we're on the fast path.  Assert
+         * that to double-check.
+         */
+        assert(!opt_junk_free);
+
+        if (!cache_bin_dalloc_easy(bin, ptr)) {
+                return false;
+        }
+
+        *tsd_thread_deallocatedp_get(tsd) = deallocated_after;
+
+        return true;
+}
+
+JEMALLOC_ALWAYS_INLINE void JEMALLOC_NOTHROW
+je_sdallocx_noflags(void *ptr, size_t size) {
+        LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: 0", ptr,
+                size);
+
+        if (!free_fastpath(ptr, size, true)) {
+                sdallocx_default(ptr, size, 0);
+        }
+
+        LOG("core.sdallocx.exit", "");
+}
+
+JEMALLOC_ALWAYS_INLINE void JEMALLOC_NOTHROW
+je_sdallocx_impl(void *ptr, size_t size, int flags) {
+        if (flags != 0 || !free_fastpath(ptr, size, true)) {
+                sdallocx_default(ptr, size, flags);
+        }
+}
+
+JEMALLOC_ALWAYS_INLINE void JEMALLOC_NOTHROW
+je_free_impl(void *ptr) {
+        if (!free_fastpath(ptr, 0, false)) {
+                free_default(ptr);
+        }
+}
+
 #endif /* JEMALLOC_INTERNAL_INLINES_C_H */
diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h
index a8e7e7fb..7d9608b5 100644
--- a/include/jemalloc/internal/prof_inlines.h
+++ b/include/jemalloc/internal/prof_inlines.h
@@ -4,6 +4,7 @@
 #include "jemalloc/internal/safety_check.h"
 #include "jemalloc/internal/sz.h"
 #include "jemalloc/internal/thread_event.h"
+#include "jemalloc/internal/jemalloc_internal_inlines_c.h"
 
 JEMALLOC_ALWAYS_INLINE void
 prof_active_assert() {
@@ -227,11 +228,6 @@ prof_sample_align(size_t orig_align) {
 	    orig_align;
 }
 
-JEMALLOC_ALWAYS_INLINE bool
-prof_sample_aligned(const void *ptr) {
-	return ((uintptr_t)ptr & PAGE_MASK) == 0;
-}
-
 JEMALLOC_ALWAYS_INLINE bool
 prof_sampled(tsd_t *tsd, const void *ptr) {
 	prof_info_t prof_info;
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 039be40f..7407022f 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2273,15 +2273,6 @@ static_opts_init(static_opts_t *static_opts) {
 	static_opts->usize = false;
 }
 
-/*
- * These correspond to the macros in jemalloc/jemalloc_macros.h.  Broadly, we
- * should have one constant here per magic value there.  Note however that the
- * representations need not be related.
- */
-#define TCACHE_IND_NONE ((unsigned)-1)
-#define TCACHE_IND_AUTOMATIC ((unsigned)-2)
-#define ARENA_IND_AUTOMATIC ((unsigned)-1)
-
 typedef struct dynamic_opts_s dynamic_opts_t;
 struct dynamic_opts_s {
 	void **result;
@@ -2346,36 +2337,6 @@ zero_get(bool guarantee, bool slow) {
 	}
 }
 
-JEMALLOC_ALWAYS_INLINE tcache_t *
-tcache_get_from_ind(tsd_t *tsd, unsigned tcache_ind, bool slow, bool is_alloc) {
-	tcache_t *tcache;
-	if (tcache_ind == TCACHE_IND_AUTOMATIC) {
-		if (likely(!slow)) {
-			/* Getting tcache ptr unconditionally. */
-			tcache = tsd_tcachep_get(tsd);
-			assert(tcache == tcache_get(tsd));
-		} else if (is_alloc ||
-		    likely(tsd_reentrancy_level_get(tsd) == 0)) {
-			tcache = tcache_get(tsd);
-		} else {
-			tcache = NULL;
-		}
-	} else {
-		/*
-		 * Should not specify tcache on deallocation path when being
-		 * reentrant.
-		 */
-		assert(is_alloc || tsd_reentrancy_level_get(tsd) == 0 ||
-		    tsd_state_nocleanup(tsd));
-		if (tcache_ind == TCACHE_IND_NONE) {
-			tcache = NULL;
-		} else {
-			tcache = tcaches_get(tsd, tcache_ind);
-		}
-	}
-	return tcache;
-}
-
 /* Return true if a manual arena is specified and arena_get() OOMs. */
 JEMALLOC_ALWAYS_INLINE bool
 arena_get_from_ind(tsd_t *tsd, unsigned arena_ind, arena_t **arena_p) {
@@ -2915,29 +2876,6 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 	thread_dalloc_event(tsd, usize);
 }
 
-JEMALLOC_ALWAYS_INLINE bool
-maybe_check_alloc_ctx(tsd_t *tsd, void *ptr, emap_alloc_ctx_t *alloc_ctx) {
-	if (config_opt_size_checks) {
-		emap_alloc_ctx_t dbg_ctx;
-		emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
-		    &dbg_ctx);
-		if (alloc_ctx->szind != dbg_ctx.szind) {
-			safety_check_fail_sized_dealloc(
-			    /* current_dealloc */ true, ptr,
-			    /* true_size */ sz_size2index(dbg_ctx.szind),
-			    /* input_size */ sz_size2index(alloc_ctx->szind));
-			return true;
-		}
-		if (alloc_ctx->slab != dbg_ctx.slab) {
-			safety_check_fail(
-			    "Internal heap corruption detected: "
-			    "mismatch in slab bit");
-			return true;
-		}
-	}
-	return false;
-}
-
 JEMALLOC_ALWAYS_INLINE void
 isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 	if (!slow_path) {
@@ -3044,142 +2982,11 @@ free_default(void *ptr) {
 	}
 }
 
-JEMALLOC_ALWAYS_INLINE bool
-free_fastpath_nonfast_aligned(void *ptr, bool check_prof) {
-	/*
-	 * free_fastpath do not handle two uncommon cases: 1) sampled profiled
-	 * objects and 2) sampled junk & stash for use-after-free detection.
-	 * Both have special alignments which are used to escape the fastpath.
-	 *
-	 * prof_sample is page-aligned, which covers the UAF check when both
-	 * are enabled (the assertion below).  Avoiding redundant checks since
-	 * this is on the fastpath -- at most one runtime branch from this.
-	 */
-	if (config_debug && cache_bin_nonfast_aligned(ptr)) {
-		assert(prof_sample_aligned(ptr));
-	}
-
-	if (config_prof && check_prof) {
-		/* When prof is enabled, the prof_sample alignment is enough. */
-		if (prof_sample_aligned(ptr)) {
-			return true;
-		} else {
-			return false;
-		}
-	}
-
-	if (config_uaf_detection) {
-		if (cache_bin_nonfast_aligned(ptr)) {
-			return true;
-		} else {
-			return false;
-		}
-	}
-
-	return false;
-}
-
-/* Returns whether or not the free attempt was successful. */
-JEMALLOC_ALWAYS_INLINE
-bool free_fastpath(void *ptr, size_t size, bool size_hint) {
-	tsd_t *tsd = tsd_get(false);
-	/* The branch gets optimized away unless tsd_get_allocates(). */
-	if (unlikely(tsd == NULL)) {
-		return false;
-	}
-	/*
-	 *  The tsd_fast() / initialized checks are folded into the branch
-	 *  testing (deallocated_after >= threshold) later in this function.
-	 *  The threshold will be set to 0 when !tsd_fast.
-	 */
-	assert(tsd_fast(tsd) ||
-	    *tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd) == 0);
-
-	emap_alloc_ctx_t alloc_ctx;
-	if (!size_hint) {
-		bool err = emap_alloc_ctx_try_lookup_fast(tsd,
-		    &arena_emap_global, ptr, &alloc_ctx);
-
-		/* Note: profiled objects will have alloc_ctx.slab set */
-		if (unlikely(err || !alloc_ctx.slab ||
-		    free_fastpath_nonfast_aligned(ptr,
-		    /* check_prof */ false))) {
-			return false;
-		}
-		assert(alloc_ctx.szind != SC_NSIZES);
-	} else {
-		/*
-		 * Check for both sizes that are too large, and for sampled /
-		 * special aligned objects.  The alignment check will also check
-		 * for null ptr.
-		 */
-		if (unlikely(size > SC_LOOKUP_MAXCLASS ||
-		    free_fastpath_nonfast_aligned(ptr,
-		    /* check_prof */ true))) {
-			return false;
-		}
-		alloc_ctx.szind = sz_size2index_lookup(size);
-		/* Max lookup class must be small. */
-		assert(alloc_ctx.szind < SC_NBINS);
-		/* This is a dead store, except when opt size checking is on. */
-		alloc_ctx.slab = true;
-	}
-	/*
-	 * Currently the fastpath only handles small sizes.  The branch on
-	 * SC_LOOKUP_MAXCLASS makes sure of it.  This lets us avoid checking
-	 * tcache szind upper limit (i.e. tcache_maxclass) as well.
-	 */
-	assert(alloc_ctx.slab);
-
-	uint64_t deallocated, threshold;
-	te_free_fastpath_ctx(tsd, &deallocated, &threshold);
-
-	size_t usize = sz_index2size(alloc_ctx.szind);
-	uint64_t deallocated_after = deallocated + usize;
-	/*
-	 * Check for events and tsd non-nominal (fast_threshold will be set to
-	 * 0) in a single branch.  Note that this handles the uninitialized case
-	 * as well (TSD init will be triggered on the non-fastpath).  Therefore
-	 * anything depends on a functional TSD (e.g. the alloc_ctx sanity check
-	 * below) needs to be after this branch.
-	 */
-	if (unlikely(deallocated_after >= threshold)) {
-		return false;
-	}
-	assert(tsd_fast(tsd));
-	bool fail = maybe_check_alloc_ctx(tsd, ptr, &alloc_ctx);
-	if (fail) {
-		/* See the comment in isfree. */
-		return true;
-	}
-
-	tcache_t *tcache = tcache_get_from_ind(tsd, TCACHE_IND_AUTOMATIC,
-	    /* slow */ false, /* is_alloc */ false);
-	cache_bin_t *bin = &tcache->bins[alloc_ctx.szind];
-
-	/*
-	 * If junking were enabled, this is where we would do it.  It's not
-	 * though, since we ensured above that we're on the fast path.  Assert
-	 * that to double-check.
-	 */
-	assert(!opt_junk_free);
-
-	if (!cache_bin_dalloc_easy(bin, ptr)) {
-		return false;
-	}
-
-	*tsd_thread_deallocatedp_get(tsd) = deallocated_after;
-
-	return true;
-}
-
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_free(void *ptr) {
 	LOG("core.free.entry", "ptr: %p", ptr);
 
-	if (!free_fastpath(ptr, 0, false)) {
-		free_default(ptr);
-	}
+	je_free_impl(ptr);
 
 	LOG("core.free.exit", "");
 }
@@ -4000,21 +3807,7 @@ je_sdallocx(void *ptr, size_t size, int flags) {
 	LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr,
 		size, flags);
 
-	if (flags != 0 || !free_fastpath(ptr, size, true)) {
-		sdallocx_default(ptr, size, flags);
-	}
-
-	LOG("core.sdallocx.exit", "");
-}
-
-void JEMALLOC_NOTHROW
-je_sdallocx_noflags(void *ptr, size_t size) {
-	LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: 0", ptr,
-		size);
-
-	if (!free_fastpath(ptr, size, true)) {
-		sdallocx_default(ptr, size, 0);
-	}
+	je_sdallocx_impl(ptr, size, flags);
 
 	LOG("core.sdallocx.exit", "");
 }
diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index 8b53a392..e39615bc 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -173,21 +173,21 @@ operator new[](std::size_t size, std::align_val_t alignment, const std::nothrow_
 
 void
 operator delete(void *ptr) noexcept {
-	je_free(ptr);
+	je_free_impl(ptr);
 }
 
 void
 operator delete[](void *ptr) noexcept {
-	je_free(ptr);
+	je_free_impl(ptr);
 }
 
 void
 operator delete(void *ptr, const std::nothrow_t &) noexcept {
-	je_free(ptr);
+	je_free_impl(ptr);
 }
 
 void operator delete[](void *ptr, const std::nothrow_t &) noexcept {
-	je_free(ptr);
+	je_free_impl(ptr);
 }
 
 #if __cpp_sized_deallocation >= 201309
@@ -224,27 +224,27 @@ alignedSizedDeleteImpl(void* ptr, std::size_t size, std::align_val_t alignment)
 	if (unlikely(ptr == nullptr)) {
 		return;
 	}
-	je_sdallocx(ptr, size, MALLOCX_ALIGN(alignment));
+	je_sdallocx_impl(ptr, size, MALLOCX_ALIGN(alignment));
 }
 
 void
 operator delete(void* ptr, std::align_val_t) noexcept {
-	je_free(ptr);
+	je_free_impl(ptr);
 }
 
 void
 operator delete[](void* ptr, std::align_val_t) noexcept {
-	je_free(ptr);
+	je_free_impl(ptr);
 }
 
 void
 operator delete(void* ptr, std::align_val_t, const std::nothrow_t&) noexcept {
-	je_free(ptr);
+	je_free_impl(ptr);
 }
 
 void
 operator delete[](void* ptr, std::align_val_t, const std::nothrow_t&) noexcept {
-	je_free(ptr);
+	je_free_impl(ptr);
 }
 
 void
diff --git a/test/stress/cpp/microbench.cpp b/test/stress/cpp/microbench.cpp
index 65f41dea..3d23403b 100644
--- a/test/stress/cpp/microbench.cpp
+++ b/test/stress/cpp/microbench.cpp
@@ -4,7 +4,7 @@
 static void
 malloc_free(void) {
 	void *p = malloc(1);
-	expect_ptr_not_null(p, "Unexpected new failure");
+	expect_ptr_not_null(p, "Unexpected malloc failure");
 	free(p);
 }
 
@@ -18,7 +18,7 @@ new_delete(void) {
 static void
 malloc_free_array(void) {
 	void *p = malloc(sizeof(int)*8);
-	expect_ptr_not_null(p, "Unexpected new[] failure");
+	expect_ptr_not_null(p, "Unexpected malloc failure");
 	free(p);
 }
 
@@ -40,7 +40,7 @@ new_sized_delete(void) {
 static void
 malloc_sdallocx(void) {
 	void *p = malloc(1);
-	expect_ptr_not_null(p, "Unexpected new failure");
+	expect_ptr_not_null(p, "Unexpected malloc failure");
         sdallocx(p, 1, 0);
 }
 #endif
@@ -79,5 +79,4 @@ main() {
 	    test_free_vs_delete,
 	    test_free_array_vs_delete_array,
 	    test_sized_delete_vs_sdallocx);
-	    
 }

From a74acb57e87e2c3ad4386f757f4d792d9aa6e19a Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@fb.com>
Date: Thu, 1 Dec 2022 17:31:08 -0800
Subject: [PATCH 2248/2608] Fix dividing 0 error in stress/cpp/microbench

Summary:
Per issue #2356, some CXX compilers may optimize away the
new/delete operation in stress/cpp/microbench.cpp.
Thus, this commit (1) bumps the time interval to 1 if it is 0, and
(2) modifies the pointers in the microbench to volatile.
---
 test/src/timer.c               | 11 +++++++++++
 test/stress/cpp/microbench.cpp | 36 +++++++++++++++++-----------------
 2 files changed, 29 insertions(+), 18 deletions(-)

diff --git a/test/src/timer.c b/test/src/timer.c
index 6e8b8edb..0f39d5f6 100644
--- a/test/src/timer.c
+++ b/test/src/timer.c
@@ -28,6 +28,17 @@ timer_ratio(timedelta_t *a, timedelta_t *b, char *buf, size_t buflen) {
 	size_t i = 0;
 	size_t j, n;
 
+	/* 
+ 	* The time difference could be 0 if the two clock readings are 
+ 	* identical, either due to the operations being measured in the middle
+ 	* took very little time (or even got optimized away), or the clock 
+ 	* readings are bad / very coarse grained clock.
+ 	* Thus, bump t1 if it is 0 to avoid dividing 0. 
+ 	*/
+	if (t1 == 0) {
+	    t1 = 1;
+	}
+
 	/* Whole. */
 	n = malloc_snprintf(&buf[i], buflen-i, "%"FMTu64, t0 / t1);
 	i += n;
diff --git a/test/stress/cpp/microbench.cpp b/test/stress/cpp/microbench.cpp
index 3d23403b..ab41b65d 100644
--- a/test/stress/cpp/microbench.cpp
+++ b/test/stress/cpp/microbench.cpp
@@ -3,45 +3,45 @@
 
 static void
 malloc_free(void) {
-	void *p = malloc(1);
-	expect_ptr_not_null(p, "Unexpected malloc failure");
-	free(p);
+	void* volatile p = malloc(1);
+	expect_ptr_not_null((void *)p, "Unexpected malloc failure");
+	free((void *)p);
 }
 
 static void
 new_delete(void) {
-	auto p = ::operator new(1);
-	expect_ptr_not_null(p, "Unexpected new failure");
-	::operator delete(p);
+	void* volatile p = ::operator new(1);
+	expect_ptr_not_null((void *)p, "Unexpected new failure");
+	::operator delete((void *)p);
 }
 
 static void
 malloc_free_array(void) {
-	void *p = malloc(sizeof(int)*8);
-	expect_ptr_not_null(p, "Unexpected malloc failure");
-	free(p);
+	void* volatile p = malloc(sizeof(int)*8);
+	expect_ptr_not_null((void *)p, "Unexpected malloc failure");
+	free((void *)p);
 }
 
 static void
 new_delete_array(void) {
-	auto p = new int[8];
-	expect_ptr_not_null(p, "Unexpected new[] failure");
-	delete[] p;
+	int* volatile p = new int[8];
+	expect_ptr_not_null((int *)p, "Unexpected new[] failure");
+	delete[] (int *)p;
 }
 
 #if __cpp_sized_deallocation >= 201309
 static void
 new_sized_delete(void) {
-	auto p = ::operator new(1);
-	expect_ptr_not_null(p, "Unexpected new failure");
-	::operator delete(p, 1);
+	void* volatile p = ::operator new(1);
+	expect_ptr_not_null((void *)p, "Unexpected new failure");
+	::operator delete((void *)p, 1);
 }
 
 static void
 malloc_sdallocx(void) {
-	void *p = malloc(1);
-	expect_ptr_not_null(p, "Unexpected malloc failure");
-        sdallocx(p, 1, 0);
+	void* volatile p = malloc(1);
+	expect_ptr_not_null((void *)p, "Unexpected malloc failure");
+        sdallocx((void *)p, 1, 0);
 }
 #endif
 

From 8580c65f81c5252e493da656a448ec3a8571dab7 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 2 Nov 2022 15:17:16 -0700
Subject: [PATCH 2249/2608] Implement prof sample hooks
 "experimental.hooks.prof_sample(_free)".

The added hooks hooks.prof_sample and hooks.prof_sample_free are intended to
allow advanced users to track additional information, to enable new ways of
profiling on top of the jemalloc heap profile and sample features.

The sample hook is invoked after the allocation and backtracing, and forwards
the both the allocation and backtrace to the user hook; the sample_free hook
happens before the actual deallocation, and forwards only the ptr and usz to the
hook.
---
 include/jemalloc/internal/prof_externs.h |   9 +-
 include/jemalloc/internal/prof_hook.h    |   6 +
 include/jemalloc/internal/prof_inlines.h |   5 +-
 src/ctl.c                                |  60 +++++++
 src/prof.c                               |  50 +++++-
 src/prof_sys.c                           |   2 +
 test/unit/prof_hook.c                    | 194 +++++++++++++++++++++--
 7 files changed, 307 insertions(+), 19 deletions(-)

diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index d1101561..412378a2 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -56,6 +56,12 @@ prof_backtrace_hook_t prof_backtrace_hook_get();
 void prof_dump_hook_set(prof_dump_hook_t hook);
 prof_dump_hook_t prof_dump_hook_get();
 
+void prof_sample_hook_set(prof_sample_hook_t hook);
+prof_sample_hook_t prof_sample_hook_get();
+
+void prof_sample_free_hook_set(prof_sample_free_hook_t hook);
+prof_sample_free_hook_t prof_sample_free_hook_get();
+
 /* Functions only accessed in prof_inlines.h */
 prof_tdata_t *prof_tdata_init(tsd_t *tsd);
 prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
@@ -63,7 +69,8 @@ prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
 void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx);
 void prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
     size_t usize, prof_tctx_t *tctx);
-void prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info);
+void prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize,
+    prof_info_t *prof_info);
 prof_tctx_t *prof_tctx_create(tsd_t *tsd);
 void prof_idump(tsdn_t *tsdn);
 bool prof_mdump(tsd_t *tsd, const char *filename);
diff --git a/include/jemalloc/internal/prof_hook.h b/include/jemalloc/internal/prof_hook.h
index 150d19d3..8615dc53 100644
--- a/include/jemalloc/internal/prof_hook.h
+++ b/include/jemalloc/internal/prof_hook.h
@@ -18,4 +18,10 @@ typedef void (*prof_backtrace_hook_t)(void **, unsigned *, unsigned);
  */
 typedef void (*prof_dump_hook_t)(const char *filename);
 
+/* ptr, size, backtrace vector, backtrace vector length */
+typedef void (*prof_sample_hook_t)(const void *, size_t, void **, unsigned);
+
+/* ptr, size */
+typedef void (*prof_sample_free_hook_t)(const void *, size_t);
+
 #endif /* JEMALLOC_INTERNAL_PROF_HOOK_H */
diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h
index 7d9608b5..ab3e01f6 100644
--- a/include/jemalloc/internal/prof_inlines.h
+++ b/include/jemalloc/internal/prof_inlines.h
@@ -213,7 +213,8 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
 	 * counters.
 	 */
 	if (unlikely(old_sampled)) {
-		prof_free_sampled_object(tsd, old_usize, old_prof_info);
+		prof_free_sampled_object(tsd, old_ptr, old_usize,
+		    old_prof_info);
 	}
 }
 
@@ -250,7 +251,7 @@ prof_free(tsd_t *tsd, const void *ptr, size_t usize,
 
 	if (unlikely((uintptr_t)prof_info.alloc_tctx > (uintptr_t)1U)) {
 		assert(prof_sample_aligned(ptr));
-		prof_free_sampled_object(tsd, usize, &prof_info);
+		prof_free_sampled_object(tsd, ptr, usize, &prof_info);
 	}
 }
 
diff --git a/src/ctl.c b/src/ctl.c
index acf5d366..eafbdc61 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -315,6 +315,8 @@ CTL_PROTO(experimental_hooks_install)
 CTL_PROTO(experimental_hooks_remove)
 CTL_PROTO(experimental_hooks_prof_backtrace)
 CTL_PROTO(experimental_hooks_prof_dump)
+CTL_PROTO(experimental_hooks_prof_sample)
+CTL_PROTO(experimental_hooks_prof_sample_free)
 CTL_PROTO(experimental_hooks_safety_check_abort)
 CTL_PROTO(experimental_thread_activity_callback)
 CTL_PROTO(experimental_utilization_query)
@@ -858,6 +860,8 @@ static const ctl_named_node_t experimental_hooks_node[] = {
 	{NAME("remove"),	CTL(experimental_hooks_remove)},
 	{NAME("prof_backtrace"),	CTL(experimental_hooks_prof_backtrace)},
 	{NAME("prof_dump"),	CTL(experimental_hooks_prof_dump)},
+	{NAME("prof_sample"),	CTL(experimental_hooks_prof_sample)},
+	{NAME("prof_sample_free"),	CTL(experimental_hooks_prof_sample_free)},
 	{NAME("safety_check_abort"),	CTL(experimental_hooks_safety_check_abort)},
 };
 
@@ -3505,6 +3509,62 @@ label_return:
 	return ret;
 }
 
+static int
+experimental_hooks_prof_sample_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+
+	if (oldp == NULL && newp == NULL) {
+		ret = EINVAL;
+		goto label_return;
+	}
+	if (oldp != NULL) {
+		prof_sample_hook_t old_hook =
+		    prof_sample_hook_get();
+		READ(old_hook, prof_sample_hook_t);
+	}
+	if (newp != NULL) {
+		if (!opt_prof) {
+			ret = ENOENT;
+			goto label_return;
+		}
+		prof_sample_hook_t new_hook JEMALLOC_CC_SILENCE_INIT(NULL);
+		WRITE(new_hook, prof_sample_hook_t);
+		prof_sample_hook_set(new_hook);
+	}
+	ret = 0;
+label_return:
+	return ret;
+}
+
+static int
+experimental_hooks_prof_sample_free_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+
+	if (oldp == NULL && newp == NULL) {
+		ret = EINVAL;
+		goto label_return;
+	}
+	if (oldp != NULL) {
+		prof_sample_free_hook_t old_hook =
+		    prof_sample_free_hook_get();
+		READ(old_hook, prof_sample_free_hook_t);
+	}
+	if (newp != NULL) {
+		if (!opt_prof) {
+			ret = ENOENT;
+			goto label_return;
+		}
+		prof_sample_free_hook_t new_hook JEMALLOC_CC_SILENCE_INIT(NULL);
+		WRITE(new_hook, prof_sample_free_hook_t);
+		prof_sample_free_hook_set(new_hook);
+	}
+	ret = 0;
+label_return:
+	return ret;
+}
+
 /* For integration test purpose only.  No plan to move out of experimental. */
 static int
 experimental_hooks_safety_check_abort_ctl(tsd_t *tsd, const size_t *mib,
diff --git a/src/prof.c b/src/prof.c
index 3deac0b5..91425371 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -78,6 +78,12 @@ atomic_p_t prof_backtrace_hook;
 /* Logically a prof_dump_hook_t. */
 atomic_p_t prof_dump_hook;
 
+/* Logically a prof_sample_hook_t. */
+atomic_p_t prof_sample_hook;
+
+/* Logically a prof_sample_free_hook_t. */
+atomic_p_t prof_sample_free_hook;
+
 /******************************************************************************/
 
 void
@@ -145,10 +151,20 @@ prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
 	if (opt_prof_stats) {
 		prof_stats_inc(tsd, szind, size);
 	}
+
+	/* Sample hook. */
+	prof_sample_hook_t prof_sample_hook = prof_sample_hook_get();
+	if (prof_sample_hook != NULL) {
+		prof_bt_t *bt = &tctx->gctx->bt;
+		pre_reentrancy(tsd, NULL);
+		prof_sample_hook(ptr, size, bt->vec, bt->len);
+		post_reentrancy(tsd);
+	}
 }
 
 void
-prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
+prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize,
+    prof_info_t *prof_info) {
 	cassert(config_prof);
 
 	assert(prof_info != NULL);
@@ -156,6 +172,16 @@ prof_free_sampled_object(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
 	assert((uintptr_t)tctx > (uintptr_t)1U);
 
 	szind_t szind = sz_size2index(usize);
+
+	/* Unsample hook. */
+	prof_sample_free_hook_t prof_sample_free_hook =
+	    prof_sample_free_hook_get();
+	if (prof_sample_free_hook != NULL) {
+		pre_reentrancy(tsd, NULL);
+		prof_sample_free_hook(ptr, usize);
+		post_reentrancy(tsd);
+	}
+
 	malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
 
 	assert(tctx->cnts.curobjs > 0);
@@ -549,6 +575,28 @@ prof_dump_hook_get() {
 	    ATOMIC_ACQUIRE);
 }
 
+void
+prof_sample_hook_set(prof_sample_hook_t hook) {
+	atomic_store_p(&prof_sample_hook, hook, ATOMIC_RELEASE);
+}
+
+prof_sample_hook_t
+prof_sample_hook_get() {
+	return (prof_sample_hook_t)atomic_load_p(&prof_sample_hook,
+	    ATOMIC_ACQUIRE);
+}
+
+void
+prof_sample_free_hook_set(prof_sample_free_hook_t hook) {
+	atomic_store_p(&prof_sample_free_hook, hook, ATOMIC_RELEASE);
+}
+
+prof_sample_free_hook_t
+prof_sample_free_hook_get() {
+	return (prof_sample_free_hook_t)atomic_load_p(&prof_sample_free_hook,
+	    ATOMIC_ACQUIRE);
+}
+
 void
 prof_boot0(void) {
 	cassert(config_prof);
diff --git a/src/prof_sys.c b/src/prof_sys.c
index 99fa3a77..d2487fd6 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -431,6 +431,8 @@ void
 prof_hooks_init() {
 	prof_backtrace_hook_set(&prof_backtrace_impl);
 	prof_dump_hook_set(NULL);
+	prof_sample_hook_set(NULL);
+	prof_sample_free_hook_set(NULL);
 }
 
 void
diff --git a/test/unit/prof_hook.c b/test/unit/prof_hook.c
index fc06d84e..a48b237b 100644
--- a/test/unit/prof_hook.c
+++ b/test/unit/prof_hook.c
@@ -1,11 +1,23 @@
 #include "test/jemalloc_test.h"
 
+/*
+ * The MALLOC_CONF of this test has lg_prof_sample:0, meaning that every single
+ * allocation will be sampled (and trigger relevant hooks).
+ */
+
 const char *dump_filename = "/dev/null";
 
-prof_backtrace_hook_t default_hook;
+prof_backtrace_hook_t default_bt_hook;
 
 bool mock_bt_hook_called = false;
 bool mock_dump_hook_called = false;
+bool mock_prof_sample_hook_called = false;
+bool mock_prof_sample_free_hook_called = false;
+
+void *sampled_ptr = NULL;
+size_t sampled_ptr_sz = 0;
+void *free_sampled_ptr = NULL;
+size_t free_sampled_ptr_sz = 0;
 
 void
 mock_bt_hook(void **vec, unsigned *len, unsigned max_len) {
@@ -18,7 +30,7 @@ mock_bt_hook(void **vec, unsigned *len, unsigned max_len) {
 
 void
 mock_bt_augmenting_hook(void **vec, unsigned *len, unsigned max_len) {
-	default_hook(vec, len, max_len);
+	default_bt_hook(vec, len, max_len);
 	expect_u_gt(*len, 0, "Default backtrace hook returned empty backtrace");
 	expect_u_lt(*len, max_len,
 	    "Default backtrace hook returned too large backtrace");
@@ -47,6 +59,24 @@ mock_dump_hook(const char *filename) {
 	    "Incorrect file name passed to the dump hook");
 }
 
+void
+mock_prof_sample_hook(const void *ptr, size_t sz, void **vec, unsigned len) {
+	mock_prof_sample_hook_called = true;
+	sampled_ptr = (void *)ptr;
+	sampled_ptr_sz = sz;
+	for (unsigned i = 0; i < len; i++) {
+		expect_ptr_not_null((void **)vec[i],
+		    "Backtrace should not contain NULL");
+	}
+}
+
+void
+mock_prof_sample_free_hook(const void *ptr, size_t sz) {
+	mock_prof_sample_free_hook_called = true;
+	free_sampled_ptr = (void *)ptr;
+	free_sampled_ptr_sz = sz;
+}
+
 TEST_BEGIN(test_prof_backtrace_hook_replace) {
 
 	test_skip_if(!config_prof);
@@ -63,10 +93,10 @@ TEST_BEGIN(test_prof_backtrace_hook_replace) {
 	    NULL, 0, (void *)&null_hook,  sizeof(null_hook)),
 		EINVAL, "Incorrectly allowed NULL backtrace hook");
 
-	size_t default_hook_sz = sizeof(prof_backtrace_hook_t);
+	size_t default_bt_hook_sz = sizeof(prof_backtrace_hook_t);
 	prof_backtrace_hook_t hook = &mock_bt_hook;
 	expect_d_eq(mallctl("experimental.hooks.prof_backtrace",
-	    (void *)&default_hook, &default_hook_sz, (void *)&hook,
+	    (void *)&default_bt_hook, &default_bt_hook_sz, (void *)&hook,
 	    sizeof(hook)), 0, "Unexpected mallctl failure setting hook");
 
 	void *p1 = mallocx(1, 0);
@@ -77,8 +107,8 @@ TEST_BEGIN(test_prof_backtrace_hook_replace) {
 	prof_backtrace_hook_t current_hook;
 	size_t current_hook_sz = sizeof(prof_backtrace_hook_t);
 	expect_d_eq(mallctl("experimental.hooks.prof_backtrace",
-	    (void *)&current_hook, &current_hook_sz, (void *)&default_hook,
-	    sizeof(default_hook)), 0,
+	    (void *)&current_hook, &current_hook_sz, (void *)&default_bt_hook,
+	    sizeof(default_bt_hook)), 0,
 	    "Unexpected mallctl failure resetting hook to default");
 
 	expect_ptr_eq(current_hook, hook,
@@ -100,10 +130,10 @@ TEST_BEGIN(test_prof_backtrace_hook_augment) {
 
 	expect_false(mock_bt_hook_called, "Called mock hook before it's set");
 
-	size_t default_hook_sz = sizeof(prof_backtrace_hook_t);
+	size_t default_bt_hook_sz = sizeof(prof_backtrace_hook_t);
 	prof_backtrace_hook_t hook = &mock_bt_augmenting_hook;
 	expect_d_eq(mallctl("experimental.hooks.prof_backtrace",
-	    (void *)&default_hook, &default_hook_sz, (void *)&hook,
+	    (void *)&default_bt_hook, &default_bt_hook_sz, (void *)&hook,
 	    sizeof(hook)), 0, "Unexpected mallctl failure setting hook");
 
 	void *p1 = mallocx(1, 0);
@@ -114,8 +144,8 @@ TEST_BEGIN(test_prof_backtrace_hook_augment) {
 	prof_backtrace_hook_t current_hook;
 	size_t current_hook_sz = sizeof(prof_backtrace_hook_t);
 	expect_d_eq(mallctl("experimental.hooks.prof_backtrace",
-	    (void *)&current_hook, &current_hook_sz, (void *)&default_hook,
-	    sizeof(default_hook)), 0,
+	    (void *)&current_hook, &current_hook_sz, (void *)&default_bt_hook,
+	    sizeof(default_bt_hook)), 0,
 	    "Unexpected mallctl failure resetting hook to default");
 
 	expect_ptr_eq(current_hook, hook,
@@ -138,10 +168,10 @@ TEST_BEGIN(test_prof_dump_hook) {
 
 	expect_false(mock_dump_hook_called, "Called dump hook before it's set");
 
-	size_t default_hook_sz = sizeof(prof_dump_hook_t);
+	size_t default_bt_hook_sz = sizeof(prof_dump_hook_t);
 	prof_dump_hook_t hook = &mock_dump_hook;
 	expect_d_eq(mallctl("experimental.hooks.prof_dump",
-	    (void *)&default_hook, &default_hook_sz, (void *)&hook,
+	    (void *)&default_bt_hook, &default_bt_hook_sz, (void *)&hook,
 	    sizeof(hook)), 0, "Unexpected mallctl failure setting hook");
 
 	expect_d_eq(mallctl("prof.dump", NULL, NULL, (void *)&dump_filename,
@@ -152,8 +182,8 @@ TEST_BEGIN(test_prof_dump_hook) {
 	prof_dump_hook_t current_hook;
 	size_t current_hook_sz = sizeof(prof_dump_hook_t);
 	expect_d_eq(mallctl("experimental.hooks.prof_dump",
-	    (void *)&current_hook, &current_hook_sz, (void *)&default_hook,
-	    sizeof(default_hook)), 0,
+	    (void *)&current_hook, &current_hook_sz, (void *)&default_bt_hook,
+	    sizeof(default_bt_hook)), 0,
 	    "Unexpected mallctl failure resetting hook to default");
 
 	expect_ptr_eq(current_hook, hook,
@@ -161,10 +191,144 @@ TEST_BEGIN(test_prof_dump_hook) {
 }
 TEST_END
 
+/* Need the do_write flag because NULL is a valid to_write value. */
+static void
+read_write_prof_sample_hook(prof_sample_hook_t *to_read, bool do_write,
+    prof_sample_hook_t to_write) {
+	size_t hook_sz = sizeof(prof_sample_hook_t);
+	expect_d_eq(mallctl("experimental.hooks.prof_sample",
+	    (void *)to_read, &hook_sz, do_write ? &to_write : NULL, hook_sz), 0,
+	    "Unexpected prof_sample_hook mallctl failure");
+}
+
+static void
+write_prof_sample_hook(prof_sample_hook_t new_hook) {
+	read_write_prof_sample_hook(NULL, true, new_hook);
+}
+
+static prof_sample_hook_t
+read_prof_sample_hook(void) {
+	prof_sample_hook_t curr_hook;
+	read_write_prof_sample_hook(&curr_hook, false, NULL);
+
+	return curr_hook;
+}
+
+static void
+read_write_prof_sample_free_hook(prof_sample_free_hook_t *to_read,
+    bool do_write, prof_sample_free_hook_t to_write) {
+	size_t hook_sz = sizeof(prof_sample_free_hook_t);
+	expect_d_eq(mallctl("experimental.hooks.prof_sample_free",
+	    (void *)to_read, &hook_sz, do_write ? &to_write : NULL, hook_sz), 0,
+	    "Unexpected prof_sample_free_hook mallctl failure");
+}
+
+static void
+write_prof_sample_free_hook(prof_sample_free_hook_t new_hook) {
+	read_write_prof_sample_free_hook(NULL, true, new_hook);
+}
+
+static prof_sample_free_hook_t
+read_prof_sample_free_hook(void) {
+	prof_sample_free_hook_t curr_hook;
+	read_write_prof_sample_free_hook(&curr_hook, false, NULL);
+
+	return curr_hook;
+}
+
+static void
+check_prof_sample_hooks(bool sample_hook_set, bool sample_free_hook_set) {
+	expect_false(mock_prof_sample_hook_called,
+	    "Should not have called prof_sample hook");
+	expect_false(mock_prof_sample_free_hook_called,
+	    "Should not have called prof_sample_free hook");
+	expect_ptr_null(sampled_ptr, "Unexpected sampled ptr");
+	expect_zu_eq(sampled_ptr_sz, 0, "Unexpected sampled ptr size");
+	expect_ptr_null(free_sampled_ptr, "Unexpected free sampled ptr");
+	expect_zu_eq(free_sampled_ptr_sz, 0,
+	    "Unexpected free sampled ptr size");
+
+	prof_sample_hook_t curr_hook = read_prof_sample_hook();
+	expect_ptr_eq(curr_hook, sample_hook_set ? mock_prof_sample_hook : NULL,
+	    "Unexpected non NULL default hook");
+
+	prof_sample_free_hook_t curr_free_hook = read_prof_sample_free_hook();
+	expect_ptr_eq(curr_free_hook, sample_free_hook_set ?
+	    mock_prof_sample_free_hook : NULL,
+	    "Unexpected non NULL default hook");
+
+	size_t alloc_sz = 10;
+	void *p = mallocx(alloc_sz, 0);
+	expect_ptr_not_null(p, "Failed to allocate");
+	expect_true(mock_prof_sample_hook_called == sample_hook_set,
+	   "Incorrect prof_sample hook usage");
+	if (sample_hook_set) {
+		expect_ptr_eq(p, sampled_ptr, "Unexpected sampled ptr");
+		expect_zu_eq(alloc_sz, sampled_ptr_sz,
+		    "Unexpected sampled usize");
+	}
+
+	dallocx(p, 0);
+	expect_true(mock_prof_sample_free_hook_called == sample_free_hook_set,
+	   "Incorrect prof_sample_free hook usage");
+	if (sample_free_hook_set) {
+		size_t usz = sz_s2u(alloc_sz);
+		expect_ptr_eq(p, free_sampled_ptr, "Unexpected sampled ptr");
+		expect_zu_eq(usz, free_sampled_ptr_sz, "Unexpected sampled usize");
+	}
+
+	sampled_ptr = free_sampled_ptr = NULL;
+	sampled_ptr_sz = free_sampled_ptr_sz = 0;
+	mock_prof_sample_hook_called = false;
+	mock_prof_sample_free_hook_called = false;
+}
+
+TEST_BEGIN(test_prof_sample_hooks) {
+	test_skip_if(!config_prof);
+
+	check_prof_sample_hooks(false, false);
+
+	write_prof_sample_hook(mock_prof_sample_hook);
+	check_prof_sample_hooks(true, false);
+
+	write_prof_sample_free_hook(mock_prof_sample_free_hook);
+	check_prof_sample_hooks(true, true);
+
+	write_prof_sample_hook(NULL);
+	check_prof_sample_hooks(false, true);
+
+	write_prof_sample_free_hook(NULL);
+	check_prof_sample_hooks(false, false);
+
+	/* Test read+write together. */
+	prof_sample_hook_t sample_hook;
+	read_write_prof_sample_hook(&sample_hook, true, mock_prof_sample_hook);
+	expect_ptr_null(sample_hook, "Unexpected non NULL default hook");
+	check_prof_sample_hooks(true, false);
+
+	prof_sample_free_hook_t sample_free_hook;
+	read_write_prof_sample_free_hook(&sample_free_hook, true,
+	    mock_prof_sample_free_hook);
+	expect_ptr_null(sample_free_hook, "Unexpected non NULL default hook");
+	check_prof_sample_hooks(true, true);
+
+	read_write_prof_sample_hook(&sample_hook, true, NULL);
+	expect_ptr_eq(sample_hook, mock_prof_sample_hook,
+	    "Unexpected prof_sample hook");
+	check_prof_sample_hooks(false, true);
+
+	read_write_prof_sample_free_hook(&sample_free_hook, true, NULL);
+	expect_ptr_eq(sample_free_hook, mock_prof_sample_free_hook,
+	    "Unexpected prof_sample_free hook");
+	check_prof_sample_hooks(false, false);
+}
+TEST_END
+
 int
 main(void) {
 	return test(
 	    test_prof_backtrace_hook_replace,
 	    test_prof_backtrace_hook_augment,
-	    test_prof_dump_hook);
+	    test_prof_dump_hook,
+	    test_prof_sample_hooks);
 }

From 5fd55837bbc400d8cc15152ac2b80b64baa9b68c Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 10 Feb 2023 15:28:22 -0800
Subject: [PATCH 2250/2608] Fix thread_name updating for heap profiling.

The current thread name reading path updates the name every time, which requires
both alloc and dalloc -- and the temporary NULL value in the middle causes races
where the prof dump read path gets NULLed in the middle.

Minimize the changes in this commit to isolate the bugfix testing; will also
refactor the whole thread name paths later.
---
 src/prof_data.c | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/src/prof_data.c b/src/prof_data.c
index f8b19594..56d3dc88 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -451,16 +451,15 @@ prof_thread_name_alloc(tsd_t *tsd, const char *thread_name) {
 	}
 
 	size = strlen(thread_name) + 1;
-	if (size == 1) {
-		return "";
-	}
-
 	ret = iallocztm(tsd_tsdn(tsd), size, sz_size2index(size), false, NULL,
 	    true, arena_get(TSDN_NULL, 0, true), true);
 	if (ret == NULL) {
 		return NULL;
 	}
+
 	memcpy(ret, thread_name, size);
+	ret[size - 1] = '\0';
+
 	return ret;
 }
 
@@ -493,14 +492,14 @@ prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name) {
 		return EAGAIN;
 	}
 
-	if (tdata->thread_name != NULL) {
-		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
-		    true);
-		tdata->thread_name = NULL;
-	}
-	if (strlen(s) > 0) {
-		tdata->thread_name = s;
+	char *old_thread_name = tdata->thread_name;
+	tdata->thread_name = s;
+	if (old_thread_name != NULL) {
+		idalloctm(tsd_tsdn(tsd), old_thread_name, /* tcache */ NULL,
+		    /* alloc_ctx */ NULL, /* is_internal */ true,
+		    /* slow_path */ true);
 	}
+
 	return 0;
 }
 

From 97b313c7d480bc087b0c805b4bb42b71dd9c9e93 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 15 Dec 2022 14:36:04 -0800
Subject: [PATCH 2251/2608] More conservative setting for
 /test/unit/background_thread_enable.

Lower the thread and arena count to avoid resource exhaustion on 32-bit.
---
 test/unit/background_thread_enable.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/test/unit/background_thread_enable.c b/test/unit/background_thread_enable.c
index 44034ac6..5f42feff 100644
--- a/test/unit/background_thread_enable.c
+++ b/test/unit/background_thread_enable.c
@@ -1,6 +1,6 @@
 #include "test/jemalloc_test.h"
 
-const char *malloc_conf = "background_thread:false,narenas:1,max_background_threads:20";
+const char *malloc_conf = "background_thread:false,narenas:1,max_background_threads:8";
 
 static unsigned
 max_test_narenas(void) {
@@ -12,12 +12,9 @@ max_test_narenas(void) {
 	 * approximation.
 	 */
 	unsigned ret = 10 * ncpus;
-	/* Limit the max to avoid VM exhaustion on 32-bit . */
-	if (ret > 512) {
-		ret = 512;
-	}
 
-	return ret;
+	/* Limit the max to avoid VM exhaustion on 32-bit . */
+	return ret > 256 ? 256 : ret;
 }
 
 TEST_BEGIN(test_deferred) {

From b6125120ac22c2c7e7cd36df114a2b280dcc33e7 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 13 Feb 2023 17:43:12 -0800
Subject: [PATCH 2252/2608] Add an explicit name to the dedicated oversize
 arena.

---
 src/arena.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/arena.c b/src/arena.c
index 25ab41af..970f60ed 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1746,6 +1746,11 @@ arena_choose_huge(tsd_t *tsd) {
 		if (huge_arena == NULL) {
 			return NULL;
 		}
+
+		char *huge_arena_name = "auto_oversize";
+		strncpy(huge_arena->name, huge_arena_name, ARENA_NAME_LEN);
+		huge_arena->name[ARENA_NAME_LEN - 1] = '\0';
+
 		/*
 		 * Purge eagerly for huge allocations, because: 1) number of
 		 * huge allocations is usually small, which means ticker based

From c7805f1eb5b9eadccb9711044e141ff741c09d4c Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 15 Feb 2023 17:28:58 -0800
Subject: [PATCH 2253/2608] Add a header in HPA stats for the nonfull slabs.

---
 src/stats.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index 701a6c86..43360a2d 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -910,8 +910,7 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	    "      npageslabs: %zu huge, %zu nonhuge\n"
 	    "      nactive: %zu huge, %zu nonhuge \n"
 	    "      ndirty: %zu huge, %zu nonhuge \n"
-	    "      nretained: 0 huge, %zu nonhuge \n"
-	    "\n",
+	    "      nretained: 0 huge, %zu nonhuge \n",
 	    npageslabs_huge, npageslabs_nonhuge,
 	    nactive_huge, nactive_nonhuge,
 	    ndirty_huge, ndirty_nonhuge,
@@ -932,6 +931,7 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	    &ndirty_nonhuge);
 	emitter_json_object_end(emitter); /* End "empty_slabs" */
 
+	/* Last, nonfull slab stats. */
 	COL_HDR(row, size, NULL, right, 20, size)
 	COL_HDR(row, ind, NULL, right, 4, unsigned)
 	COL_HDR(row, npageslabs_huge, NULL, right, 16, size)
@@ -947,6 +947,7 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	stats_arenas_mib[2] = i;
 	CTL_LEAF_PREPARE(stats_arenas_mib, 3, "hpa_shard.nonfull_slabs");
 
+	emitter_table_printf(emitter, "  In nonfull slabs:\n");
 	emitter_table_row(emitter, &header_row);
 	emitter_json_array_kv_begin(emitter, "nonfull_slabs");
 	bool in_gap = false;

From 4422f88d17404944a312825a1aec96cd9dc6c165 Mon Sep 17 00:00:00 2001
From: barracuda156 <vital.had@gmail.com>
Date: Sat, 15 Oct 2022 18:04:24 +0800
Subject: [PATCH 2254/2608] Makefile.in: link with g++ when cxx enabled

---
 Makefile.in | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Makefile.in b/Makefile.in
index 195084d6..450abeb4 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -522,7 +522,11 @@ endif
 
 $(objroot)lib/$(LIBJEMALLOC).$(SOREV) : $(if $(PIC_CFLAGS),$(C_PIC_OBJS),$(C_OBJS)) $(if $(PIC_CFLAGS),$(CPP_PIC_OBJS),$(CPP_OBJS))
 	@mkdir -p $(@D)
+ifeq (@enable_cxx@, 1)
+	$(CXX) $(DSO_LDFLAGS) $(call RPATH,$(RPATH_EXTRA)) $(LDTARGET) $+ $(LDFLAGS) $(LIBS) $(EXTRA_LDFLAGS)
+else
 	$(CC) $(DSO_LDFLAGS) $(call RPATH,$(RPATH_EXTRA)) $(LDTARGET) $+ $(LDFLAGS) $(LIBS) $(EXTRA_LDFLAGS)
+endif
 
 $(objroot)lib/$(LIBJEMALLOC)_pic.$(A) : $(C_PIC_OBJS) $(CPP_PIC_OBJS)
 $(objroot)lib/$(LIBJEMALLOC).$(A) : $(C_OBJS) $(CPP_OBJS)

From e8b28908dede2a27530dbaa255af6cbcf579fc31 Mon Sep 17 00:00:00 2001
From: Fernando Pelliccioni <fpelliccioni@gmail.com>
Date: Fri, 23 Sep 2022 11:34:05 -0300
Subject: [PATCH 2255/2608] [MSVC] support for Visual Studio 2019 and 2022

---
 msvc/jemalloc_vc2019.sln                      |  63 +++
 msvc/jemalloc_vc2022.sln                      |  63 +++
 .../projects/vc2019/jemalloc/jemalloc.vcxproj | 379 ++++++++++++++++++
 .../vc2019/jemalloc/jemalloc.vcxproj.filters  | 197 +++++++++
 .../vc2019/test_threads/test_threads.vcxproj  | 326 +++++++++++++++
 .../test_threads/test_threads.vcxproj.filters |  26 ++
 .../projects/vc2022/jemalloc/jemalloc.vcxproj | 379 ++++++++++++++++++
 .../vc2022/jemalloc/jemalloc.vcxproj.filters  | 197 +++++++++
 .../vc2022/test_threads/test_threads.vcxproj  | 326 +++++++++++++++
 .../test_threads/test_threads.vcxproj.filters |  26 ++
 10 files changed, 1982 insertions(+)
 create mode 100644 msvc/jemalloc_vc2019.sln
 create mode 100644 msvc/jemalloc_vc2022.sln
 create mode 100644 msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
 create mode 100644 msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
 create mode 100644 msvc/projects/vc2019/test_threads/test_threads.vcxproj
 create mode 100644 msvc/projects/vc2019/test_threads/test_threads.vcxproj.filters
 create mode 100644 msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
 create mode 100644 msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
 create mode 100644 msvc/projects/vc2022/test_threads/test_threads.vcxproj
 create mode 100644 msvc/projects/vc2022/test_threads/test_threads.vcxproj.filters

diff --git a/msvc/jemalloc_vc2019.sln b/msvc/jemalloc_vc2019.sln
new file mode 100644
index 00000000..871ea9d4
--- /dev/null
+++ b/msvc/jemalloc_vc2019.sln
@@ -0,0 +1,63 @@
+﻿
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 14
+VisualStudioVersion = 14.0.24720.0
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{70A99006-6DE9-472B-8F83-4CEE6C616DF3}"
+	ProjectSection(SolutionItems) = preProject
+		ReadMe.txt = ReadMe.txt
+	EndProjectSection
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "jemalloc", "projects\vc2019\jemalloc\jemalloc.vcxproj", "{8D6BB292-9E1C-413D-9F98-4864BDC1514A}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test_threads", "projects\vc2019\test_threads\test_threads.vcxproj", "{09028CFD-4EB7-491D-869C-0708DB97ED44}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|x64 = Debug|x64
+		Debug|x86 = Debug|x86
+		Debug-static|x64 = Debug-static|x64
+		Debug-static|x86 = Debug-static|x86
+		Release|x64 = Release|x64
+		Release|x86 = Release|x86
+		Release-static|x64 = Release-static|x64
+		Release-static|x86 = Release-static|x86
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x64.ActiveCfg = Debug|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x64.Build.0 = Debug|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x86.ActiveCfg = Debug|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x86.Build.0 = Debug|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x64.ActiveCfg = Debug-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x64.Build.0 = Debug-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x86.ActiveCfg = Debug-static|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x86.Build.0 = Debug-static|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x64.ActiveCfg = Release|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x64.Build.0 = Release|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x86.ActiveCfg = Release|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x86.Build.0 = Release|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x64.ActiveCfg = Release-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x64.Build.0 = Release-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x86.ActiveCfg = Release-static|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x86.Build.0 = Release-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x64.ActiveCfg = Debug|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x64.Build.0 = Debug|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x86.ActiveCfg = Debug|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x86.Build.0 = Debug|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x64.ActiveCfg = Debug-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x64.Build.0 = Debug-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x86.ActiveCfg = Debug-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x86.Build.0 = Debug-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x64.ActiveCfg = Release|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x64.Build.0 = Release|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x86.ActiveCfg = Release|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x86.Build.0 = Release|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x64.ActiveCfg = Release-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x64.Build.0 = Release-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x86.ActiveCfg = Release-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x86.Build.0 = Release-static|Win32
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
diff --git a/msvc/jemalloc_vc2022.sln b/msvc/jemalloc_vc2022.sln
new file mode 100644
index 00000000..898574f1
--- /dev/null
+++ b/msvc/jemalloc_vc2022.sln
@@ -0,0 +1,63 @@
+﻿
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 14
+VisualStudioVersion = 14.0.24720.0
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{70A99006-6DE9-472B-8F83-4CEE6C616DF3}"
+	ProjectSection(SolutionItems) = preProject
+		ReadMe.txt = ReadMe.txt
+	EndProjectSection
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "jemalloc", "projects\vc2022\jemalloc\jemalloc.vcxproj", "{8D6BB292-9E1C-413D-9F98-4864BDC1514A}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test_threads", "projects\vc2022\test_threads\test_threads.vcxproj", "{09028CFD-4EB7-491D-869C-0708DB97ED44}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|x64 = Debug|x64
+		Debug|x86 = Debug|x86
+		Debug-static|x64 = Debug-static|x64
+		Debug-static|x86 = Debug-static|x86
+		Release|x64 = Release|x64
+		Release|x86 = Release|x86
+		Release-static|x64 = Release-static|x64
+		Release-static|x86 = Release-static|x86
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x64.ActiveCfg = Debug|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x64.Build.0 = Debug|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x86.ActiveCfg = Debug|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug|x86.Build.0 = Debug|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x64.ActiveCfg = Debug-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x64.Build.0 = Debug-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x86.ActiveCfg = Debug-static|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Debug-static|x86.Build.0 = Debug-static|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x64.ActiveCfg = Release|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x64.Build.0 = Release|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x86.ActiveCfg = Release|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release|x86.Build.0 = Release|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x64.ActiveCfg = Release-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x64.Build.0 = Release-static|x64
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x86.ActiveCfg = Release-static|Win32
+		{8D6BB292-9E1C-413D-9F98-4864BDC1514A}.Release-static|x86.Build.0 = Release-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x64.ActiveCfg = Debug|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x64.Build.0 = Debug|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x86.ActiveCfg = Debug|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug|x86.Build.0 = Debug|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x64.ActiveCfg = Debug-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x64.Build.0 = Debug-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x86.ActiveCfg = Debug-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Debug-static|x86.Build.0 = Debug-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x64.ActiveCfg = Release|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x64.Build.0 = Release|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x86.ActiveCfg = Release|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release|x86.Build.0 = Release|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x64.ActiveCfg = Release-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x64.Build.0 = Release-static|x64
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x86.ActiveCfg = Release-static|Win32
+		{09028CFD-4EB7-491D-869C-0708DB97ED44}.Release-static|x86.Build.0 = Release-static|Win32
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
new file mode 100644
index 00000000..66ba849d
--- /dev/null
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
@@ -0,0 +1,379 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="16.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug-static|Win32">
+      <Configuration>Debug-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug-static|x64">
+      <Configuration>Debug-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|Win32">
+      <Configuration>Release-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|x64">
+      <Configuration>Release-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\..\src\arena.c" />
+    <ClCompile Include="..\..\..\..\src\background_thread.c" />
+    <ClCompile Include="..\..\..\..\src\base.c" />
+    <ClCompile Include="..\..\..\..\src\bin.c" />
+    <ClCompile Include="..\..\..\..\src\bin_info.c" />
+    <ClCompile Include="..\..\..\..\src\bitmap.c" />
+    <ClCompile Include="..\..\..\..\src\buf_writer.c" />
+    <ClCompile Include="..\..\..\..\src\cache_bin.c" />
+    <ClCompile Include="..\..\..\..\src\ckh.c" />
+    <ClCompile Include="..\..\..\..\src\counter.c" />
+    <ClCompile Include="..\..\..\..\src\ctl.c" />
+    <ClCompile Include="..\..\..\..\src\decay.c" />
+    <ClCompile Include="..\..\..\..\src\div.c" />
+    <ClCompile Include="..\..\..\..\src\ecache.c" />
+    <ClCompile Include="..\..\..\..\src\edata.c" />
+    <ClCompile Include="..\..\..\..\src\edata_cache.c" />
+    <ClCompile Include="..\..\..\..\src\ehooks.c" />
+    <ClCompile Include="..\..\..\..\src\emap.c" />
+    <ClCompile Include="..\..\..\..\src\eset.c" />
+    <ClCompile Include="..\..\..\..\src\exp_grow.c" />
+    <ClCompile Include="..\..\..\..\src\extent.c" />
+    <ClCompile Include="..\..\..\..\src\extent_dss.c" />
+    <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
+    <ClCompile Include="..\..\..\..\src\fxp.c" />
+    <ClCompile Include="..\..\..\..\src\hook.c" />
+    <ClCompile Include="..\..\..\..\src\hpa.c" />
+    <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
+    <ClCompile Include="..\..\..\..\src\hpdata.c" />
+    <ClCompile Include="..\..\..\..\src\inspect.c" />
+    <ClCompile Include="..\..\..\..\src\jemalloc.c" />
+    <ClCompile Include="..\..\..\..\src\large.c" />
+    <ClCompile Include="..\..\..\..\src\log.c" />
+    <ClCompile Include="..\..\..\..\src\malloc_io.c" />
+    <ClCompile Include="..\..\..\..\src\mutex.c" />
+    <ClCompile Include="..\..\..\..\src\nstime.c" />
+    <ClCompile Include="..\..\..\..\src\pa.c" />
+    <ClCompile Include="..\..\..\..\src\pa_extra.c" />
+    <ClCompile Include="..\..\..\..\src\pai.c" />
+    <ClCompile Include="..\..\..\..\src\pac.c" />
+    <ClCompile Include="..\..\..\..\src\pages.c" />
+    <ClCompile Include="..\..\..\..\src\peak_event.c" />
+    <ClCompile Include="..\..\..\..\src\prof.c" />
+    <ClCompile Include="..\..\..\..\src\prof_data.c" />
+    <ClCompile Include="..\..\..\..\src\prof_log.c" />
+    <ClCompile Include="..\..\..\..\src\prof_recent.c" />
+    <ClCompile Include="..\..\..\..\src\prof_stats.c" />
+    <ClCompile Include="..\..\..\..\src\prof_sys.c" />
+    <ClCompile Include="..\..\..\..\src\psset.c" />
+    <ClCompile Include="..\..\..\..\src\rtree.c" />
+    <ClCompile Include="..\..\..\..\src\safety_check.c" />
+    <ClCompile Include="..\..\..\..\src\san.c" />
+    <ClCompile Include="..\..\..\..\src\san_bump.c" />
+    <ClCompile Include="..\..\..\..\src\sc.c" />
+    <ClCompile Include="..\..\..\..\src\sec.c" />
+    <ClCompile Include="..\..\..\..\src\stats.c" />
+    <ClCompile Include="..\..\..\..\src\sz.c" />
+    <ClCompile Include="..\..\..\..\src\tcache.c" />
+    <ClCompile Include="..\..\..\..\src\test_hooks.c" />
+    <ClCompile Include="..\..\..\..\src\thread_event.c" />
+    <ClCompile Include="..\..\..\..\src\ticker.c" />
+    <ClCompile Include="..\..\..\..\src\tsd.c" />
+    <ClCompile Include="..\..\..\..\src\witness.c" />
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{8D6BB292-9E1C-413D-9F98-4864BDC1514A}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>jemalloc</RootNamespace>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)d</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-$(PlatformToolset)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-$(PlatformToolset)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)d</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-vc$(PlatformToolsetVersion)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-vc$(PlatformToolsetVersion)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <DebugInformationFormat>OldStyle</DebugInformationFormat>
+      <MinimalRebuild>false</MinimalRebuild>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>_REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <DebugInformationFormat>OldStyle</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
new file mode 100644
index 00000000..1b43e9f2
--- /dev/null
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
@@ -0,0 +1,197 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\..\src\arena.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\background_thread.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\base.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\bin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\bitmap.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\buf_writer.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\cache_bin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ckh.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\counter.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ctl.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\decay.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\div.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\emap.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\exp_grow.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\extent.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\extent_dss.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\extent_mmap.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\fxp.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hook.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa_hooks.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpdata.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\inspect.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\jemalloc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\large.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\log.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\malloc_io.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\mutex.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\nstime.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pa.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pa_extra.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pai.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pac.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pages.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\peak_event.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_data.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_log.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_recent.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_stats.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_sys.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\psset.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\rtree.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\safety_check.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\sc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\sec.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\stats.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\sz.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\tcache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\test_hooks.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\thread_event.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ticker.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\tsd.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\witness.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\bin_info.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ecache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\edata.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\edata_cache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ehooks.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\eset.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\san.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\san_bump.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2019/test_threads/test_threads.vcxproj b/msvc/projects/vc2019/test_threads/test_threads.vcxproj
new file mode 100644
index 00000000..8471a41e
--- /dev/null
+++ b/msvc/projects/vc2019/test_threads/test_threads.vcxproj
@@ -0,0 +1,326 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="16.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug-static|Win32">
+      <Configuration>Debug-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug-static|x64">
+      <Configuration>Debug-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|Win32">
+      <Configuration>Release-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|x64">
+      <Configuration>Release-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{09028CFD-4EB7-491D-869C-0708DB97ED44}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>test_threads</RootNamespace>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v142</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemallocd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>jemallocd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>jemalloc-vc$(PlatformToolsetVersion)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc-vc$(PlatformToolsetVersion)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\test_threads\test_threads.cpp" />
+    <ClCompile Include="..\..\..\test_threads\test_threads_main.cpp" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\jemalloc\jemalloc.vcxproj">
+      <Project>{8d6bb292-9e1c-413d-9f98-4864bdc1514a}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\test_threads\test_threads.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2019/test_threads/test_threads.vcxproj.filters b/msvc/projects/vc2019/test_threads/test_threads.vcxproj.filters
new file mode 100644
index 00000000..fa4588fd
--- /dev/null
+++ b/msvc/projects/vc2019/test_threads/test_threads.vcxproj.filters
@@ -0,0 +1,26 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\test_threads\test_threads.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\test_threads\test_threads_main.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\test_threads\test_threads.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
new file mode 100644
index 00000000..7d9a1aa0
--- /dev/null
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
@@ -0,0 +1,379 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="17.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug-static|Win32">
+      <Configuration>Debug-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug-static|x64">
+      <Configuration>Debug-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|Win32">
+      <Configuration>Release-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|x64">
+      <Configuration>Release-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\..\src\arena.c" />
+    <ClCompile Include="..\..\..\..\src\background_thread.c" />
+    <ClCompile Include="..\..\..\..\src\base.c" />
+    <ClCompile Include="..\..\..\..\src\bin.c" />
+    <ClCompile Include="..\..\..\..\src\bin_info.c" />
+    <ClCompile Include="..\..\..\..\src\bitmap.c" />
+    <ClCompile Include="..\..\..\..\src\buf_writer.c" />
+    <ClCompile Include="..\..\..\..\src\cache_bin.c" />
+    <ClCompile Include="..\..\..\..\src\ckh.c" />
+    <ClCompile Include="..\..\..\..\src\counter.c" />
+    <ClCompile Include="..\..\..\..\src\ctl.c" />
+    <ClCompile Include="..\..\..\..\src\decay.c" />
+    <ClCompile Include="..\..\..\..\src\div.c" />
+    <ClCompile Include="..\..\..\..\src\ecache.c" />
+    <ClCompile Include="..\..\..\..\src\edata.c" />
+    <ClCompile Include="..\..\..\..\src\edata_cache.c" />
+    <ClCompile Include="..\..\..\..\src\ehooks.c" />
+    <ClCompile Include="..\..\..\..\src\emap.c" />
+    <ClCompile Include="..\..\..\..\src\eset.c" />
+    <ClCompile Include="..\..\..\..\src\exp_grow.c" />
+    <ClCompile Include="..\..\..\..\src\extent.c" />
+    <ClCompile Include="..\..\..\..\src\extent_dss.c" />
+    <ClCompile Include="..\..\..\..\src\extent_mmap.c" />
+    <ClCompile Include="..\..\..\..\src\fxp.c" />
+    <ClCompile Include="..\..\..\..\src\hook.c" />
+    <ClCompile Include="..\..\..\..\src\hpa.c" />
+    <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
+    <ClCompile Include="..\..\..\..\src\hpdata.c" />
+    <ClCompile Include="..\..\..\..\src\inspect.c" />
+    <ClCompile Include="..\..\..\..\src\jemalloc.c" />
+    <ClCompile Include="..\..\..\..\src\large.c" />
+    <ClCompile Include="..\..\..\..\src\log.c" />
+    <ClCompile Include="..\..\..\..\src\malloc_io.c" />
+    <ClCompile Include="..\..\..\..\src\mutex.c" />
+    <ClCompile Include="..\..\..\..\src\nstime.c" />
+    <ClCompile Include="..\..\..\..\src\pa.c" />
+    <ClCompile Include="..\..\..\..\src\pa_extra.c" />
+    <ClCompile Include="..\..\..\..\src\pai.c" />
+    <ClCompile Include="..\..\..\..\src\pac.c" />
+    <ClCompile Include="..\..\..\..\src\pages.c" />
+    <ClCompile Include="..\..\..\..\src\peak_event.c" />
+    <ClCompile Include="..\..\..\..\src\prof.c" />
+    <ClCompile Include="..\..\..\..\src\prof_data.c" />
+    <ClCompile Include="..\..\..\..\src\prof_log.c" />
+    <ClCompile Include="..\..\..\..\src\prof_recent.c" />
+    <ClCompile Include="..\..\..\..\src\prof_stats.c" />
+    <ClCompile Include="..\..\..\..\src\prof_sys.c" />
+    <ClCompile Include="..\..\..\..\src\psset.c" />
+    <ClCompile Include="..\..\..\..\src\rtree.c" />
+    <ClCompile Include="..\..\..\..\src\safety_check.c" />
+    <ClCompile Include="..\..\..\..\src\san.c" />
+    <ClCompile Include="..\..\..\..\src\san_bump.c" />
+    <ClCompile Include="..\..\..\..\src\sc.c" />
+    <ClCompile Include="..\..\..\..\src\sec.c" />
+    <ClCompile Include="..\..\..\..\src\stats.c" />
+    <ClCompile Include="..\..\..\..\src\sz.c" />
+    <ClCompile Include="..\..\..\..\src\tcache.c" />
+    <ClCompile Include="..\..\..\..\src\test_hooks.c" />
+    <ClCompile Include="..\..\..\..\src\thread_event.c" />
+    <ClCompile Include="..\..\..\..\src\ticker.c" />
+    <ClCompile Include="..\..\..\..\src\tsd.c" />
+    <ClCompile Include="..\..\..\..\src\witness.c" />
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{8D6BB292-9E1C-413D-9F98-4864BDC1514A}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>jemalloc</RootNamespace>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>DynamicLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="Configuration">
+    <ConfigurationType>StaticLibrary</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)d</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-$(PlatformToolset)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-$(PlatformToolset)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)d</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-vc$(PlatformToolsetVersion)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <TargetName>$(ProjectName)-vc$(PlatformToolsetVersion)-$(Configuration)</TargetName>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;_WINDLL;DLLEXPORT;JEMALLOC_DEBUG;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;JEMALLOC_DEBUG;_REENTRANT;JEMALLOC_EXPORT=;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <DebugInformationFormat>OldStyle</DebugInformationFormat>
+      <MinimalRebuild>false</MinimalRebuild>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>_REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>_REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;_WINDLL;DLLEXPORT;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <ProgramDataBaseFileName>$(OutputPath)$(TargetName).pdb</ProgramDataBaseFileName>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>JEMALLOC_NO_PRIVATE_NAMESPACE;_REENTRANT;JEMALLOC_EXPORT=;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+      <DisableSpecificWarnings>4090;4146;4267;4334</DisableSpecificWarnings>
+      <DebugInformationFormat>OldStyle</DebugInformationFormat>
+    </ClCompile>
+    <Link>
+      <SubSystem>Windows</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+    </Link>
+  </ItemDefinitionGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
new file mode 100644
index 00000000..1b43e9f2
--- /dev/null
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
@@ -0,0 +1,197 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\..\src\arena.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\background_thread.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\base.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\bin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\bitmap.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\buf_writer.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\cache_bin.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ckh.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\counter.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ctl.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\decay.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\div.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\emap.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\exp_grow.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\extent.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\extent_dss.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\extent_mmap.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\fxp.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hook.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa_hooks.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpdata.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\inspect.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\jemalloc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\large.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\log.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\malloc_io.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\mutex.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\nstime.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pa.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pa_extra.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pai.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pac.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\pages.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\peak_event.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_data.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_log.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_recent.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_stats.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_sys.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\psset.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\rtree.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\safety_check.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\sc.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\sec.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\stats.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\sz.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\tcache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\test_hooks.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\thread_event.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ticker.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\tsd.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\witness.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\bin_info.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ecache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\edata.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\edata_cache.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\ehooks.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\eset.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\san.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\..\src\san_bump.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2022/test_threads/test_threads.vcxproj b/msvc/projects/vc2022/test_threads/test_threads.vcxproj
new file mode 100644
index 00000000..471f693b
--- /dev/null
+++ b/msvc/projects/vc2022/test_threads/test_threads.vcxproj
@@ -0,0 +1,326 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug-static|Win32">
+      <Configuration>Debug-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug-static|x64">
+      <Configuration>Debug-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|Win32">
+      <Configuration>Release-static</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release-static|x64">
+      <Configuration>Release-static</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{09028CFD-4EB7-491D-869C-0708DB97ED44}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>test_threads</RootNamespace>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v143</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <OutDir>$(SolutionDir)$(Platform)\$(Configuration)\</OutDir>
+    <IntDir>$(Platform)\$(Configuration)\</IntDir>
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemallocd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>jemallocd.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug-static|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>jemalloc-vc$(PlatformToolsetVersion)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc-$(PlatformToolset)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc.lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release-static|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>JEMALLOC_EXPORT=;JEMALLOC_STATIC;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <AdditionalIncludeDirectories>..\..\..\..\test\include;..\..\..\..\include;..\..\..\..\include\msvc_compat;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+      <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalLibraryDirectories>$(SolutionDir)$(Platform)\$(Configuration)</AdditionalLibraryDirectories>
+      <AdditionalDependencies>jemalloc-vc$(PlatformToolsetVersion)-$(Configuration).lib;kernel32.lib;user32.lib;gdi32.lib;winspool.lib;comdlg32.lib;advapi32.lib;shell32.lib;ole32.lib;oleaut32.lib;uuid.lib;odbc32.lib;odbccp32.lib;%(AdditionalDependencies)</AdditionalDependencies>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\test_threads\test_threads.cpp" />
+    <ClCompile Include="..\..\..\test_threads\test_threads_main.cpp" />
+  </ItemGroup>
+  <ItemGroup>
+    <ProjectReference Include="..\jemalloc\jemalloc.vcxproj">
+      <Project>{8d6bb292-9e1c-413d-9f98-4864bdc1514a}</Project>
+    </ProjectReference>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\test_threads\test_threads.h" />
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2022/test_threads/test_threads.vcxproj.filters b/msvc/projects/vc2022/test_threads/test_threads.vcxproj.filters
new file mode 100644
index 00000000..fa4588fd
--- /dev/null
+++ b/msvc/projects/vc2022/test_threads/test_threads.vcxproj.filters
@@ -0,0 +1,26 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="Source Files">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="Header Files">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\..\..\test_threads\test_threads.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+    <ClCompile Include="..\..\..\test_threads\test_threads_main.cpp">
+      <Filter>Source Files</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\..\..\test_threads\test_threads.h">
+      <Filter>Header Files</Filter>
+    </ClInclude>
+  </ItemGroup>
+</Project>
\ No newline at end of file

From 09e4b38fb1f9a9b505e35ac13b8f99282990bc2c Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@fb.com>
Date: Thu, 15 Dec 2022 10:54:33 -0800
Subject: [PATCH 2256/2608] Use asm volatile during benchmarks.

---
 configure.ac                                  | 16 +++++++++++++++
 .../internal/jemalloc_internal_defs.h.in      |  3 +++
 test/include/test/bench.h                     | 11 ++++++++++
 test/stress/cpp/microbench.cpp                | 20 ++++++++++++-------
 test/stress/large_microbench.c                |  2 ++
 test/stress/microbench.c                      |  6 ++++++
 6 files changed, 51 insertions(+), 7 deletions(-)

diff --git a/configure.ac b/configure.ac
index 846a049c..fbc6298b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -546,6 +546,22 @@ typedef unsigned __int32 uint32_t;
     ;;
 esac
 AC_DEFINE_UNQUOTED([LG_VADDR], [$LG_VADDR], [ ])
+AC_CACHE_CHECK([asm volatile support],
+               [je_cv_asm_volatile],
+               AC_RUN_IFELSE([AC_LANG_PROGRAM(
+[[
+]],
+[[
+      void* ptr;
+      asm volatile("" : "+r"(ptr));
+      return 0;
+]])],
+[je_cv_asm_volatile=yes],
+[je_cv_asm_volatile=no],
+[je_cv_asm_volatile=no]))
+if test "x${je_cv_asm_volatile}" = "xyes"; then
+  AC_DEFINE([JEMALLOC_HAVE_ASM_VOLATILE], [ ], [ ])
+fi
 
 LD_PRELOAD_VAR="LD_PRELOAD"
 so="so"
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 55938433..41e40ccf 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -440,4 +440,7 @@
 /* If defined, realloc(ptr, 0) defaults to "free" instead of "alloc". */
 #undef JEMALLOC_ZERO_REALLOC_DEFAULT_FREE
 
+/* If defined, use volatile asm during benchmarks. */
+#undef JEMALLOC_HAVE_ASM_VOLATILE
+
 #endif /* JEMALLOC_INTERNAL_DEFS_H_ */
diff --git a/test/include/test/bench.h b/test/include/test/bench.h
index 7421b4d2..29c6801f 100644
--- a/test/include/test/bench.h
+++ b/test/include/test/bench.h
@@ -58,3 +58,14 @@ compare_funcs(uint64_t nwarmup, uint64_t niter, const char *name_a,
 
 	dallocx(p, 0);
 }
+
+static inline void *
+no_opt_ptr(void *ptr) {
+#ifdef JEMALLOC_HAVE_ASM_VOLATILE
+  asm volatile("" : "+r"(ptr));
+#else
+  void *volatile dup = ptr;
+  ptr = dup;
+#endif
+  return ptr;
+}
diff --git a/test/stress/cpp/microbench.cpp b/test/stress/cpp/microbench.cpp
index ab41b65d..203c3dc9 100644
--- a/test/stress/cpp/microbench.cpp
+++ b/test/stress/cpp/microbench.cpp
@@ -3,44 +3,50 @@
 
 static void
 malloc_free(void) {
-	void* volatile p = malloc(1);
+	void* p = malloc(1);
 	expect_ptr_not_null((void *)p, "Unexpected malloc failure");
+	p = no_opt_ptr(p);
 	free((void *)p);
 }
 
 static void
 new_delete(void) {
-	void* volatile p = ::operator new(1);
+	void* p = ::operator new(1);
 	expect_ptr_not_null((void *)p, "Unexpected new failure");
+	p = no_opt_ptr(p);
 	::operator delete((void *)p);
 }
 
 static void
 malloc_free_array(void) {
-	void* volatile p = malloc(sizeof(int)*8);
+	void* p = malloc(sizeof(int)*8);
 	expect_ptr_not_null((void *)p, "Unexpected malloc failure");
+	p = no_opt_ptr(p);
 	free((void *)p);
 }
 
 static void
 new_delete_array(void) {
-	int* volatile p = new int[8];
-	expect_ptr_not_null((int *)p, "Unexpected new[] failure");
+	int* p = new int[8];
+	expect_ptr_not_null((void *)p, "Unexpected new[] failure");
+	p = (int *)no_opt_ptr((void *)p);
 	delete[] (int *)p;
 }
 
 #if __cpp_sized_deallocation >= 201309
 static void
 new_sized_delete(void) {
-	void* volatile p = ::operator new(1);
+	void* p = ::operator new(1);
 	expect_ptr_not_null((void *)p, "Unexpected new failure");
+	p = no_opt_ptr(p);
 	::operator delete((void *)p, 1);
 }
 
 static void
 malloc_sdallocx(void) {
-	void* volatile p = malloc(1);
+	void* p = malloc(1);
 	expect_ptr_not_null((void *)p, "Unexpected malloc failure");
+	p = no_opt_ptr(p);
         sdallocx((void *)p, 1, 0);
 }
 #endif
diff --git a/test/stress/large_microbench.c b/test/stress/large_microbench.c
index c66b33a1..44a60c53 100644
--- a/test/stress/large_microbench.c
+++ b/test/stress/large_microbench.c
@@ -9,6 +9,7 @@ large_mallocx_free(void) {
 	 */
 	void *p = mallocx(SC_LARGE_MINCLASS, MALLOCX_TCACHE_NONE);
 	assert_ptr_not_null(p, "mallocx shouldn't fail");
+	p = no_opt_ptr(p);
 	free(p);
 }
 
@@ -16,6 +17,7 @@ static void
 small_mallocx_free(void) {
 	void *p = mallocx(16, 0);
 	assert_ptr_not_null(p, "mallocx shouldn't fail");
+	p = no_opt_ptr(p);
 	free(p);
 }
 
diff --git a/test/stress/microbench.c b/test/stress/microbench.c
index 062e32fd..89479b7e 100644
--- a/test/stress/microbench.c
+++ b/test/stress/microbench.c
@@ -9,6 +9,7 @@ malloc_free(void) {
 		test_fail("Unexpected malloc() failure");
 		return;
 	}
+	p = no_opt_ptr(p);
 	free(p);
 }
 
@@ -19,6 +20,7 @@ mallocx_free(void) {
 		test_fail("Unexpected mallocx() failure");
 		return;
 	}
+	p = no_opt_ptr(p);
 	free(p);
 }
 
@@ -35,6 +37,7 @@ malloc_dallocx(void) {
 		test_fail("Unexpected malloc() failure");
 		return;
 	}
+	p = no_opt_ptr(p);
 	dallocx(p, 0);
 }
 
@@ -45,6 +48,7 @@ malloc_sdallocx(void) {
 		test_fail("Unexpected malloc() failure");
 		return;
 	}
+	p = no_opt_ptr(p);
 	sdallocx(p, 1, 0);
 }
 
@@ -82,6 +86,7 @@ malloc_sallocx_free(void) {
 		test_fail("Unexpected malloc() failure");
 		return;
 	}
+	p = no_opt_ptr(p);
 	if (sallocx(p, 0) < 1) {
 		test_fail("Unexpected sallocx() failure");
 	}
@@ -103,6 +108,7 @@ malloc_nallocx_free(void) {
 		test_fail("Unexpected malloc() failure");
 		return;
 	}
+	p = no_opt_ptr(p);
 	if (nallocx(1, 0) < 1) {
 		test_fail("Unexpected nallocx() failure");
 	}

From 4edea8eb8e879bf4d89a3ed418bf90bb8e09d93b Mon Sep 17 00:00:00 2001
From: Chris Seymour <christopher.seymour@nanoporetech.com>
Date: Sat, 25 Feb 2023 14:52:22 +0000
Subject: [PATCH 2257/2608] switch to https

---
 README         |  2 +-
 TUNING.md      | 26 +++++++++++++-------------
 jemalloc.pc.in |  2 +-
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/README b/README
index 3a6e0d27..d33a69ce 100644
--- a/README
+++ b/README
@@ -17,4 +17,4 @@ jemalloc.
 
 The ChangeLog file contains a brief summary of changes for each release.
 
-URL: http://jemalloc.net/
+URL: https://jemalloc.net/
diff --git a/TUNING.md b/TUNING.md
index e96399d7..1f6bef35 100644
--- a/TUNING.md
+++ b/TUNING.md
@@ -11,9 +11,9 @@ by a few percent, or make favorable trade-offs.
 ## Notable runtime options for performance tuning
 
 Runtime options can be set via
-[malloc_conf](http://jemalloc.net/jemalloc.3.html#tuning).
+[malloc_conf](https://jemalloc.net/jemalloc.3.html#tuning).
 
-* [background_thread](http://jemalloc.net/jemalloc.3.html#background_thread)
+* [background_thread](https://jemalloc.net/jemalloc.3.html#background_thread)
 
     Enabling jemalloc background threads generally improves the tail latency for
     application threads, since unused memory purging is shifted to the dedicated
@@ -23,7 +23,7 @@ Runtime options can be set via
     Suggested: `background_thread:true` when jemalloc managed threads can be
     allowed.
 
-* [metadata_thp](http://jemalloc.net/jemalloc.3.html#opt.metadata_thp)
+* [metadata_thp](https://jemalloc.net/jemalloc.3.html#opt.metadata_thp)
 
     Allowing jemalloc to utilize transparent huge pages for its internal
     metadata usually reduces TLB misses significantly, especially for programs
@@ -35,8 +35,8 @@ Runtime options can be set via
     `metadata_thp:always`, which is expected to improve CPU utilization at a
     small memory cost.
 
-* [dirty_decay_ms](http://jemalloc.net/jemalloc.3.html#opt.dirty_decay_ms) and
-  [muzzy_decay_ms](http://jemalloc.net/jemalloc.3.html#opt.muzzy_decay_ms)
+* [dirty_decay_ms](https://jemalloc.net/jemalloc.3.html#opt.dirty_decay_ms) and
+  [muzzy_decay_ms](https://jemalloc.net/jemalloc.3.html#opt.muzzy_decay_ms)
 
     Decay time determines how fast jemalloc returns unused pages back to the
     operating system, and therefore provides a fairly straightforward trade-off
@@ -46,7 +46,7 @@ Runtime options can be set via
 
     Suggested: tune the values based on the desired trade-offs.
 
-* [narenas](http://jemalloc.net/jemalloc.3.html#opt.narenas)
+* [narenas](https://jemalloc.net/jemalloc.3.html#opt.narenas)
 
     By default jemalloc uses multiple arenas to reduce internal lock contention.
     However high arena count may also increase overall memory fragmentation,
@@ -57,7 +57,7 @@ Runtime options can be set via
     Suggested: if low parallelism is expected, try lower arena count while
     monitoring CPU and memory usage.
 
-* [percpu_arena](http://jemalloc.net/jemalloc.3.html#opt.percpu_arena)
+* [percpu_arena](https://jemalloc.net/jemalloc.3.html#opt.percpu_arena)
 
     Enable dynamic thread to arena association based on running CPU.  This has
     the potential to improve locality, e.g. when thread to CPU affinity is
@@ -100,28 +100,28 @@ aborts immediately on illegal options.
 In addition to the runtime options, there are a number of programmatic ways to
 improve application performance with jemalloc.
 
-* [Explicit arenas](http://jemalloc.net/jemalloc.3.html#arenas.create)
+* [Explicit arenas](https://jemalloc.net/jemalloc.3.html#arenas.create)
 
     Manually created arenas can help performance in various ways, e.g. by
     managing locality and contention for specific usages.  For example,
     applications can explicitly allocate frequently accessed objects from a
     dedicated arena with
-    [mallocx()](http://jemalloc.net/jemalloc.3.html#MALLOCX_ARENA) to improve
+    [mallocx()](https://jemalloc.net/jemalloc.3.html#MALLOCX_ARENA) to improve
     locality.  In addition, explicit arenas often benefit from individually
     tuned options, e.g. relaxed [decay
-    time](http://jemalloc.net/jemalloc.3.html#arena.i.dirty_decay_ms) if
+    time](https://jemalloc.net/jemalloc.3.html#arena.i.dirty_decay_ms) if
     frequent reuse is expected.
 
-* [Extent hooks](http://jemalloc.net/jemalloc.3.html#arena.i.extent_hooks)
+* [Extent hooks](https://jemalloc.net/jemalloc.3.html#arena.i.extent_hooks)
 
     Extent hooks allow customization for managing underlying memory.  One use
     case for performance purpose is to utilize huge pages -- for example,
-    [HHVM](https://github.com/facebook/hhvm/blob/master/hphp/util/alloc.cpp)
+    [HHVM](httpss://github.com/facebook/hhvm/blob/master/hphp/util/alloc.cpp)
     uses explicit arenas with customized extent hooks to manage 1GB huge pages
     for frequently accessed data, which reduces TLB misses significantly.
 
 * [Explicit thread-to-arena
-  binding](http://jemalloc.net/jemalloc.3.html#thread.arena)
+  binding](https://jemalloc.net/jemalloc.3.html#thread.arena)
 
     It is common for some threads in an application to have different memory
     access / allocation patterns.  Threads with heavy workloads often benefit
diff --git a/jemalloc.pc.in b/jemalloc.pc.in
index c428a86d..0a377152 100644
--- a/jemalloc.pc.in
+++ b/jemalloc.pc.in
@@ -6,7 +6,7 @@ install_suffix=@install_suffix@
 
 Name: jemalloc
 Description: A general purpose malloc(3) implementation that emphasizes fragmentation avoidance and scalable concurrency support.
-URL: http://jemalloc.net/
+URL: https://jemalloc.net/
 Version: @jemalloc_version_major@.@jemalloc_version_minor@.@jemalloc_version_bugfix@_@jemalloc_version_nrev@
 Cflags: -I${includedir}
 Libs: -L${libdir} -ljemalloc${install_suffix}

From f743690739299cb1e72852744bdd79443b264be0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Amaury=20S=C3=A9chet?= <deadalnix@gmail.com>
Date: Fri, 10 Mar 2023 09:12:15 +0000
Subject: [PATCH 2258/2608] Remove unused mutex from hpa_central

---
 include/jemalloc/internal/hpa.h | 5 -----
 src/hpa.c                       | 6 +-----
 2 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index f3562853..0b3c76c6 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -9,11 +9,6 @@
 
 typedef struct hpa_central_s hpa_central_t;
 struct hpa_central_s {
-	/*
-	 * The mutex guarding most of the operations on the central data
-	 * structure.
-	 */
-	malloc_mutex_t mtx;
 	/*
 	 * Guards expansion of eden.  We separate this from the regular mutex so
 	 * that cheaper operations can still continue while we're doing the OS
diff --git a/src/hpa.c b/src/hpa.c
index 7e2aeba0..8ebb2db2 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -68,11 +68,7 @@ hpa_central_init(hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks)
 	if (err) {
 		return true;
 	}
-	err = malloc_mutex_init(&central->mtx, "hpa_central",
-	    WITNESS_RANK_HPA_CENTRAL, malloc_mutex_rank_exclusive);
-	if (err) {
-		return true;
-	}
+
 	central->base = base;
 	central->eden = NULL;
 	central->eden_len = 0;

From 71bc1a3d91ae7e513488401627eca2a31e9f6e60 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 10 Mar 2023 13:15:59 -0800
Subject: [PATCH 2259/2608] Avoid assuming the arena id in test when
 percpu_arena is used.

---
 test/unit/huge.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/test/unit/huge.c b/test/unit/huge.c
index ec64e500..53f6577b 100644
--- a/test/unit/huge.c
+++ b/test/unit/huge.c
@@ -82,6 +82,9 @@ TEST_BEGIN(huge_allocation) {
 	expect_u_gt(arena1, 0, "Huge allocation should not come from arena 0");
 	dallocx(ptr, 0);
 
+	test_skip_if(have_percpu_arena &&
+	    PERCPU_ARENA_ENABLED(opt_percpu_arena));
+
 	ptr = mallocx(HUGE_SZ >> 1, 0);
 	expect_ptr_not_null(ptr, "Fail to allocate half huge size");
 	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,

From d503d72129eddb2175d5d5119c9b70d507112947 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 9 Mar 2023 11:26:07 -0800
Subject: [PATCH 2260/2608] Add the missing descriptions in AC_DEFINE

---
 configure.ac | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index fbc6298b..2bbf7d54 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1088,7 +1088,7 @@ AC_SUBST([JEMALLOC_CPREFIX])
 AC_ARG_WITH([export],
   [AS_HELP_STRING([--without-export], [disable exporting jemalloc public APIs])],
   [if test "x$with_export" = "xno"; then
-  AC_DEFINE([JEMALLOC_EXPORT],[], [ ])
+  AC_DEFINE([JEMALLOC_EXPORT], [ ], [ ])
 fi]
 )
 
@@ -1650,7 +1650,7 @@ fi
 [enable_uaf_detection="0"]
 )
 if test "x$enable_uaf_detection" = "x1" ; then
-  AC_DEFINE([JEMALLOC_UAF_DETECTION], [ ])
+  AC_DEFINE([JEMALLOC_UAF_DETECTION], [ ], [ ])
 fi
 AC_SUBST([enable_uaf_detection])
 

From aba1645f2d65a3b5c46958d7642b46ab3c142cf3 Mon Sep 17 00:00:00 2001
From: Marvin Schmidt <marv@exherbo.org>
Date: Tue, 27 Sep 2022 07:03:14 +0200
Subject: [PATCH 2261/2608] configure: Handle *-linux-musl* hosts properly

This is the same as the `*-*-linux*` case with the two exceptions that
we don't set glibc=1 and don't define JEMALLOC_USE_CXX_THROW
---
 configure.ac | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/configure.ac b/configure.ac
index 2bbf7d54..f38b72d6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -723,6 +723,19 @@ case "${host}" in
 	fi
 	zero_realloc_default_free="1"
 	;;
+  *-*-linux-musl*)
+	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
+	JE_APPEND_VS(CPPFLAGS, -D_GNU_SOURCE)
+	abi="elf"
+	AC_DEFINE([JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS], [ ], [ ])
+	AC_DEFINE([JEMALLOC_HAS_ALLOCA_H], [ ], [ ])
+	AC_DEFINE([JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY], [ ], [ ])
+	AC_DEFINE([JEMALLOC_THREADED_INIT], [ ], [ ])
+	if test "${LG_SIZEOF_PTR}" = "3"; then
+	  default_retain="1"
+	fi
+	zero_realloc_default_free="1"
+	;;
   *-*-linux*)
 	dnl syscall(2) and secure_getenv(3) are exposed by _GNU_SOURCE.
 	JE_APPEND_VS(CPPFLAGS, -D_GNU_SOURCE)

From 45249cf5a9cfa13c2c62e68e272a391721523b4b Mon Sep 17 00:00:00 2001
From: Marvin Schmidt <marv@exherbo.org>
Date: Tue, 27 Sep 2022 07:00:13 +0200
Subject: [PATCH 2262/2608] Fix exception specification error for hosts using
 musl libc

It turns out that the previous commit did not suffice since the
JEMALLOC_SYS_NOTHROW definition also causes the same exception specification
errors as JEMALLOC_USE_CXX_THROW did:
```
x86_64-pc-linux-musl-cc -std=gnu11 -Werror=unknown-warning-option -Wall -Wextra -Wshorten-64-to-32 -Wsign-compare -Wundef -Wno-format-zero-length -Wpointer-
arith -Wno-missing-braces -Wno-missing-field-initializers -pipe -g3 -fvisibility=hidden -Wimplicit-fallthrough -O3 -funroll-loops -march=native -O2 -pipe -c -march=native -O2 -pipe -D_GNU_SOURCE -D_REENTRANT -Iinclude -Iinclude -o src/background_thread.o src/background_thread.c
In file included from src/jemalloc_cpp.cpp:9:
In file included from include/jemalloc/internal/jemalloc_preamble.h:27:
include/jemalloc/internal/../jemalloc.h:254:32: error: exception specification in declaration does not match previous declaration
    void JEMALLOC_SYS_NOTHROW   *je_malloc(size_t size)
                                 ^
include/jemalloc/internal/../jemalloc.h:75:21: note: expanded from macro 'je_malloc'
                    ^
/usr/x86_64-pc-linux-musl/include/stdlib.h:40:7: note: previous declaration is here
void *malloc (size_t);
      ^
```

On systems using the musl C library we have to omit the exception specification
on malloc function family like it's done for MacOS, FreeBSD and OpenBSD.
---
 include/jemalloc/jemalloc_macros.h.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in
index 2de3f27d..05d996be 100644
--- a/include/jemalloc/jemalloc_macros.h.in
+++ b/include/jemalloc/jemalloc_macros.h.in
@@ -142,7 +142,7 @@
 #  define JEMALLOC_COLD
 #endif
 
-#if (defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__)) && !defined(JEMALLOC_NO_RENAME)
+#if (defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__) || (defined(__linux__) && !defined(__GLIBC__))) && !defined(JEMALLOC_NO_RENAME)
 #  define JEMALLOC_SYS_NOTHROW
 #else
 #  define JEMALLOC_SYS_NOTHROW JEMALLOC_NOTHROW

From 8e7353a19b5fd9dd1041307b884bc969065b63af Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 21 Mar 2023 14:02:30 -0700
Subject: [PATCH 2263/2608] Explicit arena assignment in test_thread_idle.

Otherwise the associated arena could change with percpu arena enabled.
---
 test/unit/mallctl.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 244d4c96..1ff8b564 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -1097,8 +1097,12 @@ TEST_BEGIN(test_thread_idle) {
 
 	unsigned arena_ind;
 	sz = sizeof(arena_ind);
-	err = mallctl("thread.arena", &arena_ind, &sz, NULL, 0);
-	expect_d_eq(err, 0, "");
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
+        err = mallctl("thread.arena", NULL, NULL, &arena_ind, sizeof(arena_ind));
+	expect_d_eq(err, 0, "Unexpected mallctl() failure");
+	err = mallctl("thread.tcache.flush", NULL, NULL, NULL, 0);
+	expect_d_eq(err, 0, "Unexpected mallctl() failure");
 
 	/* We're going to do an allocation of size 1, which we know is small. */
 	size_t mib[5];
@@ -1108,10 +1112,11 @@ TEST_BEGIN(test_thread_idle) {
 	mib[2] = arena_ind;
 
 	/*
-	 * This alloc and dalloc should leave something in the tcache, in a
-	 * small size's cache bin.
+	 * This alloc and dalloc should leave something (from the newly created
+	 * arena) in the tcache, in a small size's cache bin.  Later the stats
+	 * of that arena will be checked to verify if tcache flush happened.
 	 */
-	void *ptr = mallocx(1, 0);
+	void *ptr = mallocx(1, MALLOCX_TCACHE_NONE);
 	dallocx(ptr, 0);
 
 	uint64_t epoch;

From 8b64be34414e92fcbcdbaf5b81db6d26289667b5 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 21 Mar 2023 14:12:12 -0700
Subject: [PATCH 2264/2608] Explicit arena assignment in test_tcache_max.

Otherwise the associated arena could change with percpu arena enabled.
---
 test/unit/tcache_max.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/test/unit/tcache_max.c b/test/unit/tcache_max.c
index 1f657c85..b1093f40 100644
--- a/test/unit/tcache_max.c
+++ b/test/unit/tcache_max.c
@@ -157,6 +157,13 @@ TEST_BEGIN(test_tcache_max) {
 	test_skip_if(opt_prof);
 	test_skip_if(san_uaf_detection_enabled());
 
+	unsigned arena_ind;
+	size_t sz = sizeof(arena_ind);
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
+	expect_d_eq(mallctl("thread.arena", NULL, NULL, &arena_ind,
+	    sizeof(arena_ind)), 0, "Unexpected mallctl() failure");
+
 	for (alloc_option = alloc_option_start;
 	     alloc_option < alloc_option_end;
 	     alloc_option++) {

From 31e01a98f159926493158cde6453cde55f21c42b Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@fb.com>
Date: Wed, 14 Dec 2022 17:23:41 -0800
Subject: [PATCH 2265/2608] Fix the rdtscp detection bug and add prefix for the
 macro.

---
 configure.ac                                            | 4 ++--
 include/jemalloc/internal/jemalloc_internal_defs.h.in   | 6 ++++++
 include/jemalloc/internal/jemalloc_internal_inlines_a.h | 2 +-
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/configure.ac b/configure.ac
index f38b72d6..ec7a97cb 100644
--- a/configure.ac
+++ b/configure.ac
@@ -522,10 +522,10 @@ typedef unsigned __int32 uint32_t;
       return 0;
 ]])],
       [je_cv_rdtscp=yes],
-      [je_cv_rdstcp=no],
+      [je_cv_rdtscp=no],
       [je_cv_rdtscp=no]))
       if test "x${je_cv_rdtscp}" = "xyes"; then
-        AC_DEFINE([HAVE_RDTSCP], 1, [])
+        AC_DEFINE([JEMALLOC_HAVE_RDTSCP], [ ], [ ])
       fi
     fi
     ;;
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 41e40ccf..87845a48 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -443,4 +443,10 @@
 /* If defined, use volatile asm during benchmarks. */
 #undef JEMALLOC_HAVE_ASM_VOLATILE
 
+/* 
+ * If defined, support the use of rdtscp to get the time stamp counter 
+ * and the processor ID. 
+ */
+#undef JEMALLOC_HAVE_RDTSCP
+
 #endif /* JEMALLOC_INTERNAL_DEFS_H_ */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index 7686a9b7..cb6d78fa 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -14,7 +14,7 @@ malloc_getcpu(void) {
 	return GetCurrentProcessorNumber();
 #elif defined(JEMALLOC_HAVE_SCHED_GETCPU)
 	return (malloc_cpuid_t)sched_getcpu();
-#elif defined(HAVE_RDTSCP)
+#elif defined(JEMALLOC_HAVE_RDTSCP)
 	unsigned int ax, cx, dx;
 	asm volatile("rdtscp" : "=a"(ax), "=d"(dx), "=c"(cx) ::);
 	return (malloc_cpuid_t)(dx & 0xfff);

From 543e2d61e6047208d647cf3fd3499bead3bcc23e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Amaury=20S=C3=A9chet?= <deadalnix@gmail.com>
Date: Thu, 2 Mar 2023 23:32:42 +0000
Subject: [PATCH 2266/2608] Simplify the logic in ph_insert

Also fixes what looks like an off by one error in the lazy aux list
merge part of the code that previously never touched the last node in
the aux list.
---
 include/jemalloc/internal/ph.h | 59 +++++++++++++++++-----------------
 1 file changed, 30 insertions(+), 29 deletions(-)

diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index 5f091c5f..8ceadb90 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -318,36 +318,37 @@ ph_insert(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) {
 	 */
 	if (ph->root == NULL) {
 		ph->root = phn;
-	} else {
-		/*
-		 * As a special case, check to see if we can replace the root.
-		 * This is practically common in some important cases, and lets
-		 * us defer some insertions (hopefully, until the point where
-		 * some of the items in the aux list have been removed, savings
-		 * us from linking them at all).
-		 */
-		if (cmp(phn, ph->root) < 0) {
-			phn_lchild_set(phn, ph->root, offset);
-			phn_prev_set(ph->root, phn, offset);
-			ph->root = phn;
-			ph->auxcount = 0;
-			return;
-		}
-		ph->auxcount++;
-		phn_next_set(phn, phn_next_get(ph->root, offset), offset);
-		if (phn_next_get(ph->root, offset) != NULL) {
-			phn_prev_set(phn_next_get(ph->root, offset), phn,
-			    offset);
-		}
-		phn_prev_set(phn, ph->root, offset);
-		phn_next_set(ph->root, phn, offset);
+		return;
 	}
-	if (ph->auxcount > 1) {
-		unsigned nmerges = ffs_zu(ph->auxcount - 1);
-		bool done = false;
-		for (unsigned i = 0; i < nmerges && !done; i++) {
-			done = ph_try_aux_merge_pair(ph, offset, cmp);
-		}
+
+	/*
+	 * As a special case, check to see if we can replace the root.
+	 * This is practically common in some important cases, and lets
+	 * us defer some insertions (hopefully, until the point where
+	 * some of the items in the aux list have been removed, savings
+	 * us from linking them at all).
+	 */
+	if (cmp(phn, ph->root) < 0) {
+		phn_lchild_set(phn, ph->root, offset);
+		phn_prev_set(ph->root, phn, offset);
+		ph->root = phn;
+		ph->auxcount = 0;
+		return;
+	}
+
+	phn_next_set(phn, phn_next_get(ph->root, offset), offset);
+	if (phn_next_get(ph->root, offset) != NULL) {
+		phn_prev_set(phn_next_get(ph->root, offset), phn,
+		    offset);
+	}
+	phn_prev_set(phn, ph->root, offset);
+	phn_next_set(ph->root, phn, offset);
+
+	ph->auxcount++;
+	unsigned nmerges = ffs_zu(ph->auxcount);
+	bool done = false;
+	for (unsigned i = 0; i < nmerges && !done; i++) {
+		done = ph_try_aux_merge_pair(ph, offset, cmp);
 	}
 }
 

From be6da4f663a062353dd9a25baaae0ebcd68b7477 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Amaury=20S=C3=A9chet?= <deadalnix@gmail.com>
Date: Fri, 3 Mar 2023 12:08:51 +0000
Subject: [PATCH 2267/2608] Do not maintain root->prev in ph_remove.

---
 include/jemalloc/internal/ph.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index 8ceadb90..0cc41eab 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -380,9 +380,6 @@ ph_remove(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) {
 		 */
 		if (phn_lchild_get(phn, offset) == NULL) {
 			ph->root = phn_next_get(phn, offset);
-			if (ph->root != NULL) {
-				phn_prev_set(ph->root, NULL, offset);
-			}
 			return;
 		}
 		ph_merge_aux(ph, offset, cmp);

From 5266152d7922fc76fdaaa39ded9381a4fa7b4b9d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Amaury=20S=C3=A9chet?= <deadalnix@gmail.com>
Date: Fri, 3 Mar 2023 12:35:45 +0000
Subject: [PATCH 2268/2608] Simplify the logic in ph_remove

---
 include/jemalloc/internal/ph.h | 64 +++++++++++-----------------------
 1 file changed, 20 insertions(+), 44 deletions(-)

diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index 0cc41eab..c3cf8743 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -369,9 +369,6 @@ ph_remove_first(ph_t *ph, size_t offset, ph_cmp_t cmp) {
 
 JEMALLOC_ALWAYS_INLINE void
 ph_remove(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) {
-	void *replace;
-	void *parent;
-
 	if (ph->root == phn) {
 		/*
 		 * We can delete from aux list without merging it, but we need
@@ -389,50 +386,29 @@ ph_remove(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) {
 		}
 	}
 
-	/* Get parent (if phn is leftmost child) before mutating. */
-	if ((parent = phn_prev_get(phn, offset)) != NULL) {
-		if (phn_lchild_get(parent, offset) != phn) {
-			parent = NULL;
-		}
-	}
-	/* Find a possible replacement node, and link to parent. */
-	replace = ph_merge_children(phn, offset, cmp);
-	/* Set next/prev for sibling linked list. */
+	void* prev = phn_prev_get(phn, offset);
+	void* next = phn_next_get(phn, offset);
+
+	/* If we have children, then we integrate them back in the heap. */
+	void* replace = ph_merge_children(phn, offset, cmp);
 	if (replace != NULL) {
-		if (parent != NULL) {
-			phn_prev_set(replace, parent, offset);
-			phn_lchild_set(parent, replace, offset);
-		} else {
-			phn_prev_set(replace, phn_prev_get(phn, offset),
-			    offset);
-			if (phn_prev_get(phn, offset) != NULL) {
-				phn_next_set(phn_prev_get(phn, offset), replace,
-				    offset);
-			}
-		}
-		phn_next_set(replace, phn_next_get(phn, offset), offset);
-		if (phn_next_get(phn, offset) != NULL) {
-			phn_prev_set(phn_next_get(phn, offset), replace,
-			    offset);
+		phn_next_set(replace, next, offset);
+		if (next != NULL) {
+			phn_prev_set(next, replace, offset);
 		}
+
+		next = replace;
+	}
+
+	if (next != NULL) {
+		phn_prev_set(next, prev, offset);
+	}
+
+	assert(prev != NULL);
+	if (phn_lchild_get(prev, offset) == phn) {
+		phn_lchild_set(prev, next, offset);
 	} else {
-		if (parent != NULL) {
-			void *next = phn_next_get(phn, offset);
-			phn_lchild_set(parent, next, offset);
-			if (next != NULL) {
-				phn_prev_set(next, parent, offset);
-			}
-		} else {
-			assert(phn_prev_get(phn, offset) != NULL);
-			phn_next_set(
-			    phn_prev_get(phn, offset),
-			    phn_next_get(phn, offset), offset);
-		}
-		if (phn_next_get(phn, offset) != NULL) {
-			phn_prev_set(
-			    phn_next_get(phn, offset),
-			    phn_prev_get(phn, offset), offset);
-		}
+		phn_next_set(prev, next, offset);
 	}
 }
 

From 6cab460a45411316426fb44bd476214d6af36d47 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 28 Mar 2023 23:12:55 -0700
Subject: [PATCH 2269/2608] Add a multithreaded test for prof_sys_thread_name.

Verified that this catches the issue being fixed in 5fd5583.
---
 test/unit/prof_sys_thread_name.c | 50 +++++++++++++++++++++++++++++++-
 1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/test/unit/prof_sys_thread_name.c b/test/unit/prof_sys_thread_name.c
index affc788a..3aeb8cf1 100644
--- a/test/unit/prof_sys_thread_name.c
+++ b/test/unit/prof_sys_thread_name.c
@@ -3,6 +3,7 @@
 #include "jemalloc/internal/prof_sys.h"
 
 static const char *test_thread_name = "test_name";
+static const char *dump_filename = "/dev/null";
 
 static int
 test_prof_sys_thread_name_read_error(char *buf, size_t limit) {
@@ -25,6 +26,7 @@ test_prof_sys_thread_name_read_clear(char *buf, size_t limit) {
 
 TEST_BEGIN(test_prof_sys_thread_name) {
 	test_skip_if(!config_prof);
+	test_skip_if(!opt_prof_sys_thread_name);
 
 	bool oldval;
 	size_t sz = sizeof(oldval);
@@ -44,6 +46,8 @@ TEST_BEGIN(test_prof_sys_thread_name) {
 	assert_ptr_eq(thread_name, test_thread_name,
 	    "Thread name should not be touched");
 
+	prof_sys_thread_name_read_t *orig_prof_sys_thread_name_read =
+	    prof_sys_thread_name_read;
 	prof_sys_thread_name_read = test_prof_sys_thread_name_read_error;
 	void *p = malloc(1);
 	free(p);
@@ -67,11 +71,55 @@ TEST_BEGIN(test_prof_sys_thread_name) {
 	    "mallctl read for thread name should not fail");
 	expect_str_eq(thread_name, "", "Thread name should be updated if the "
 	    "system call returns a different name");
+
+	prof_sys_thread_name_read = orig_prof_sys_thread_name_read;
 }
 TEST_END
 
+#define ITER (16*1024)
+static void *
+thd_start(void *unused) {
+	/* Triggering samples which loads thread names. */
+	for (unsigned i = 0; i < ITER; i++) {
+		void *p = mallocx(4096, 0);
+		assert_ptr_not_null(p, "Unexpected mallocx() failure");
+		dallocx(p, 0);
+	}
+
+	return NULL;
+}
+
+TEST_BEGIN(test_prof_sys_thread_name_mt) {
+	test_skip_if(!config_prof);
+	test_skip_if(!opt_prof_sys_thread_name);
+
+#define NTHREADS 4
+	thd_t thds[NTHREADS];
+	unsigned thd_args[NTHREADS];
+	unsigned i;
+
+	for (i = 0; i < NTHREADS; i++) {
+		thd_args[i] = i;
+		thd_create(&thds[i], thd_start, (void *)&thd_args[i]);
+	}
+	/* Prof dump which reads the thread names. */
+	for (i = 0; i < ITER; i++) {
+		expect_d_eq(mallctl("prof.dump", NULL, NULL,
+		    (void *)&dump_filename, sizeof(dump_filename)), 0,
+		    "Unexpected mallctl failure while dumping");
+	}
+
+	for (i = 0; i < NTHREADS; i++) {
+		thd_join(thds[i], NULL);
+	}
+}
+#undef NTHREADS
+#undef ITER
+TEST_END
+
 int
 main(void) {
 	return test(
-	    test_prof_sys_thread_name);
+	    test_prof_sys_thread_name,
+	    test_prof_sys_thread_name_mt);
 }

From ce0b7ab6c8d7a3579d012c227013f5143d9bc8c6 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 28 Mar 2023 18:02:34 -0700
Subject: [PATCH 2270/2608] Inline the storage for thread name in prof_tdata_t.

The previous approach managed the thread name in a separate buffer, which causes
races because the thread name update (triggered by new samples) can happen at
the same time as prof dumping (which reads the thread names) -- these two
operations are under separate locks to avoid blocking each other.  Implemented
the thread name storage as part of the tdata struct, which resolves the lifetime
issue and also avoids internal alloc / dalloc during prof_sample.
---
 include/jemalloc/internal/prof_data.h    |  1 -
 include/jemalloc/internal/prof_inlines.h | 34 ++++++++++++
 include/jemalloc/internal/prof_structs.h |  6 +-
 include/jemalloc/internal/prof_types.h   |  3 +
 src/ctl.c                                |  6 +-
 src/prof.c                               | 19 ++++---
 src/prof_data.c                          | 70 ++++++------------------
 src/prof_log.c                           |  3 +-
 src/prof_recent.c                        |  4 +-
 src/prof_sys.c                           | 15 +++--
 test/unit/prof_thread_name.c             | 62 ++++++++++++---------
 11 files changed, 120 insertions(+), 103 deletions(-)

diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
index 4c8e22c7..c4286b51 100644
--- a/include/jemalloc/internal/prof_data.h
+++ b/include/jemalloc/internal/prof_data.h
@@ -18,7 +18,6 @@ bool prof_bt_keycomp(const void *k1, const void *k2);
 
 bool prof_data_init(tsd_t *tsd);
 prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt);
-char *prof_thread_name_alloc(tsd_t *tsd, const char *thread_name);
 int prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name);
 void prof_unbias_map_init();
 void prof_dump_impl(tsd_t *tsd, write_cb_t *prof_dump_write, void *cbopaque,
diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h
index ab3e01f6..b74b115c 100644
--- a/include/jemalloc/internal/prof_inlines.h
+++ b/include/jemalloc/internal/prof_inlines.h
@@ -38,6 +38,22 @@ prof_gdump_get_unlocked(void) {
 	return prof_gdump_val;
 }
 
+JEMALLOC_ALWAYS_INLINE void
+prof_thread_name_assert(prof_tdata_t *tdata) {
+	if (!config_debug) {
+		return;
+	}
+	prof_active_assert();
+
+	bool terminated = false;
+	for (unsigned i = 0; i < PROF_THREAD_NAME_MAX_LEN; i++) {
+		if (tdata->thread_name[i] == '\0') {
+			terminated = true;
+		}
+	}
+	assert(terminated);
+}
+
 JEMALLOC_ALWAYS_INLINE prof_tdata_t *
 prof_tdata_get(tsd_t *tsd, bool create) {
 	prof_tdata_t *tdata;
@@ -59,6 +75,10 @@ prof_tdata_get(tsd_t *tsd, bool create) {
 		assert(tdata == NULL || tdata->attached);
 	}
 
+	if (tdata != NULL) {
+		prof_thread_name_assert(tdata);
+	}
+
 	return tdata;
 }
 
@@ -255,4 +275,18 @@ prof_free(tsd_t *tsd, const void *ptr, size_t usize,
 	}
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+prof_thread_name_empty(prof_tdata_t *tdata) {
+	prof_active_assert();
+
+	return (tdata->thread_name[0] == '\0');
+}
+
+JEMALLOC_ALWAYS_INLINE void
+prof_thread_name_clear(prof_tdata_t *tdata) {
+	prof_active_assert();
+
+	tdata->thread_name[0] = '\0';
+}
+
 #endif /* JEMALLOC_INTERNAL_PROF_INLINES_H */
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index 9331fba4..da3cf8d5 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -156,9 +156,6 @@ struct prof_tdata_s {
 	 */
 	uint64_t		thr_discrim;
 
-	/* Included in heap profile dumps if non-NULL. */
-	char			*thread_name;
-
 	bool			attached;
 	bool			expired;
 
@@ -179,6 +176,9 @@ struct prof_tdata_s {
 	 */
 	ckh_t			bt2tctx;
 
+	/* Included in heap profile dumps if has content. */
+	char			thread_name[PROF_THREAD_NAME_MAX_LEN];
+
 	/* State used to avoid dumping while operating on prof internals. */
 	bool			enq;
 	bool			enq_idump;
diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h
index 87cbb4ab..104f7e61 100644
--- a/include/jemalloc/internal/prof_types.h
+++ b/include/jemalloc/internal/prof_types.h
@@ -77,4 +77,7 @@ typedef struct prof_recent_s prof_recent_t;
 /* Default number of recent allocations to record. */
 #define PROF_RECENT_ALLOC_MAX_DEFAULT 0
 
+/* Thread name storage size limit. */
+#define PROF_THREAD_NAME_MAX_LEN 16
+
 #endif /* JEMALLOC_INTERNAL_PROF_TYPES_H */
diff --git a/src/ctl.c b/src/ctl.c
index eafbdc61..cfd4ac6e 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2384,13 +2384,13 @@ thread_prof_name_ctl(tsd_t *tsd, const size_t *mib,
 	READ_XOR_WRITE();
 
 	if (newp != NULL) {
-		if (newlen != sizeof(const char *)) {
+		const char *newval = *(const char **)newp;
+		if (newlen != sizeof(const char *) || newval == NULL) {
 			ret = EINVAL;
 			goto label_return;
 		}
 
-		if ((ret = prof_thread_name_set(tsd, *(const char **)newp)) !=
-		    0) {
+		if ((ret = prof_thread_name_set(tsd, newval)) != 0) {
 			goto label_return;
 		}
 	} else {
diff --git a/src/prof.c b/src/prof.c
index 91425371..832aa528 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -415,11 +415,14 @@ prof_tdata_t *
 prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) {
 	uint64_t thr_uid = tdata->thr_uid;
 	uint64_t thr_discrim = tdata->thr_discrim + 1;
-	char *thread_name = (tdata->thread_name != NULL) ?
-	    prof_thread_name_alloc(tsd, tdata->thread_name) : NULL;
 	bool active = tdata->active;
 
+	/* Keep a local copy of the thread name, before detaching. */
+	prof_thread_name_assert(tdata);
+	char thread_name[PROF_THREAD_NAME_MAX_LEN];
+	strncpy(thread_name, tdata->thread_name, PROF_THREAD_NAME_MAX_LEN);
 	prof_tdata_detach(tsd, tdata);
+
 	return prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name,
 	    active);
 }
@@ -464,15 +467,15 @@ prof_active_set(tsdn_t *tsdn, bool active) {
 
 const char *
 prof_thread_name_get(tsd_t *tsd) {
+	static const char *prof_thread_name_dummy = "";
+
 	assert(tsd_reentrancy_level_get(tsd) == 0);
-
-	prof_tdata_t *tdata;
-
-	tdata = prof_tdata_get(tsd, true);
+	prof_tdata_t *tdata = prof_tdata_get(tsd, true);
 	if (tdata == NULL) {
-		return "";
+		return prof_thread_name_dummy;
 	}
-	return (tdata->thread_name != NULL ? tdata->thread_name : "");
+
+	return tdata->thread_name;
 }
 
 int
diff --git a/src/prof_data.c b/src/prof_data.c
index 56d3dc88..c33668ee 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -441,64 +441,30 @@ prof_bt_count(void) {
 	return bt_count;
 }
 
-char *
-prof_thread_name_alloc(tsd_t *tsd, const char *thread_name) {
-	char *ret;
-	size_t size;
-
-	if (thread_name == NULL) {
-		return NULL;
-	}
-
-	size = strlen(thread_name) + 1;
-	ret = iallocztm(tsd_tsdn(tsd), size, sz_size2index(size), false, NULL,
-	    true, arena_get(TSDN_NULL, 0, true), true);
-	if (ret == NULL) {
-		return NULL;
-	}
-
-	memcpy(ret, thread_name, size);
-	ret[size - 1] = '\0';
-
-	return ret;
+static void
+prof_thread_name_write_tdata(prof_tdata_t *tdata, const char *thread_name) {
+	strncpy(tdata->thread_name, thread_name, PROF_THREAD_NAME_MAX_LEN);
+	tdata->thread_name[PROF_THREAD_NAME_MAX_LEN - 1] = '\0';
 }
 
 int
 prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name) {
 	assert(tsd_reentrancy_level_get(tsd) == 0);
+	assert(thread_name != NULL);
 
-	prof_tdata_t *tdata;
-	unsigned i;
-	char *s;
-
-	tdata = prof_tdata_get(tsd, true);
-	if (tdata == NULL) {
-		return EAGAIN;
-	}
-
-	/* Validate input. */
-	if (thread_name == NULL) {
-		return EFAULT;
-	}
-	for (i = 0; thread_name[i] != '\0'; i++) {
+	for (unsigned i = 0; thread_name[i] != '\0'; i++) {
 		char c = thread_name[i];
 		if (!isgraph(c) && !isblank(c)) {
-			return EFAULT;
+			return EINVAL;
 		}
 	}
 
-	s = prof_thread_name_alloc(tsd, thread_name);
-	if (s == NULL) {
-		return EAGAIN;
+	prof_tdata_t *tdata = prof_tdata_get(tsd, true);
+	if (tdata == NULL) {
+		return ENOMEM;
 	}
 
-	char *old_thread_name = tdata->thread_name;
-	tdata->thread_name = s;
-	if (old_thread_name != NULL) {
-		idalloctm(tsd_tsdn(tsd), old_thread_name, /* tcache */ NULL,
-		    /* alloc_ctx */ NULL, /* is_internal */ true,
-		    /* slow_path */ true);
-	}
+	prof_thread_name_write_tdata(tdata, thread_name);
 
 	return 0;
 }
@@ -949,7 +915,7 @@ prof_tdata_dump_iter(prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata,
 	    tdata->thr_uid);
 	prof_dump_print_cnts(arg->prof_dump_write, arg->cbopaque,
 	    &tdata->cnt_summed);
-	if (tdata->thread_name != NULL) {
+	if (!prof_thread_name_empty(tdata)) {
 		arg->prof_dump_write(arg->cbopaque, " ");
 		arg->prof_dump_write(arg->cbopaque, tdata->thread_name);
 	}
@@ -1179,10 +1145,15 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
 	tdata->lock = prof_tdata_mutex_choose(thr_uid);
 	tdata->thr_uid = thr_uid;
 	tdata->thr_discrim = thr_discrim;
-	tdata->thread_name = thread_name;
 	tdata->attached = true;
 	tdata->expired = false;
 	tdata->tctx_uid_next = 0;
+	if (thread_name == NULL) {
+		prof_thread_name_clear(tdata);
+	} else {
+		prof_thread_name_write_tdata(tdata, thread_name);
+	}
+	prof_thread_name_assert(tdata);
 
 	if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash,
 	    prof_bt_keycomp)) {
@@ -1230,13 +1201,8 @@ prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
 	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tdata->lock);
 
 	tdata_tree_remove(&tdatas, tdata);
-
 	assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
 
-	if (tdata->thread_name != NULL) {
-		idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
-		    true);
-	}
 	ckh_delete(tsd, &tdata->bt2tctx);
 	idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
 }
diff --git a/src/prof_log.c b/src/prof_log.c
index 0632c3b3..384d5e38 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -243,8 +243,7 @@ prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
 	    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL, true,
 	    arena_get(TSDN_NULL, 0, true), true);
 
-	const char *prod_thr_name = (tctx->tdata->thread_name == NULL)?
-				        "" : tctx->tdata->thread_name;
+	const char *prod_thr_name = tctx->tdata->thread_name;
 	const char *cons_thr_name = prof_thread_name_get(tsd);
 
 	prof_bt_t bt;
diff --git a/src/prof_recent.c b/src/prof_recent.c
index 834a9446..4c3c6296 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -495,7 +495,7 @@ prof_recent_alloc_dump_node(emitter_t *emitter, prof_recent_t *node) {
 	    &node->alloc_tctx->thr_uid);
 	prof_tdata_t *alloc_tdata = node->alloc_tctx->tdata;
 	assert(alloc_tdata != NULL);
-	if (alloc_tdata->thread_name != NULL) {
+	if (!prof_thread_name_empty(alloc_tdata)) {
 		emitter_json_kv(emitter, "alloc_thread_name",
 		    emitter_type_string, &alloc_tdata->thread_name);
 	}
@@ -511,7 +511,7 @@ prof_recent_alloc_dump_node(emitter_t *emitter, prof_recent_t *node) {
 		    emitter_type_uint64, &node->dalloc_tctx->thr_uid);
 		prof_tdata_t *dalloc_tdata = node->dalloc_tctx->tdata;
 		assert(dalloc_tdata != NULL);
-		if (dalloc_tdata->thread_name != NULL) {
+		if (!prof_thread_name_empty(dalloc_tdata)) {
 			emitter_json_kv(emitter, "dalloc_thread_name",
 			    emitter_type_string, &dalloc_tdata->thread_name);
 		}
diff --git a/src/prof_sys.c b/src/prof_sys.c
index d2487fd6..3f7196f8 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -462,12 +462,17 @@ prof_sys_thread_name_read_t *JET_MUTABLE prof_sys_thread_name_read =
 
 void
 prof_sys_thread_name_fetch(tsd_t *tsd) {
-#define THREAD_NAME_MAX_LEN 16
-	char buf[THREAD_NAME_MAX_LEN];
-	if (!prof_sys_thread_name_read(buf, THREAD_NAME_MAX_LEN)) {
-		prof_thread_name_set_impl(tsd, buf);
+	prof_tdata_t *tdata = prof_tdata_get(tsd, true);
+	if (tdata == NULL) {
+		return;
 	}
-#undef THREAD_NAME_MAX_LEN
+
+	if (prof_sys_thread_name_read(tdata->thread_name,
+	    PROF_THREAD_NAME_MAX_LEN) != 0) {
+		prof_thread_name_clear(tdata);
+	}
+
+	tdata->thread_name[PROF_THREAD_NAME_MAX_LEN - 1] = '\0';
 }
 
 int
diff --git a/test/unit/prof_thread_name.c b/test/unit/prof_thread_name.c
index 3c4614fc..0fc29f75 100644
--- a/test/unit/prof_thread_name.c
+++ b/test/unit/prof_thread_name.c
@@ -14,8 +14,6 @@ mallctl_thread_name_get_impl(const char *thread_name_expected, const char *func,
 	expect_str_eq(thread_name_old, thread_name_expected,
 	    "%s():%d: Unexpected thread.prof.name value", func, line);
 }
-#define mallctl_thread_name_get(a)					\
-	mallctl_thread_name_get_impl(a, __func__, __LINE__)
 
 static void
 mallctl_thread_name_set_impl(const char *thread_name, const char *func,
@@ -26,51 +24,59 @@ mallctl_thread_name_set_impl(const char *thread_name, const char *func,
 	    func, line);
 	mallctl_thread_name_get_impl(thread_name, func, line);
 }
+
+#define mallctl_thread_name_get(a)					\
+	mallctl_thread_name_get_impl(a, __func__, __LINE__)
+
 #define mallctl_thread_name_set(a)					\
 	mallctl_thread_name_set_impl(a, __func__, __LINE__)
 
 TEST_BEGIN(test_prof_thread_name_validation) {
-	const char *thread_name;
-
 	test_skip_if(!config_prof);
 	test_skip_if(opt_prof_sys_thread_name);
 
 	mallctl_thread_name_get("");
-	mallctl_thread_name_set("hi there");
+
+	const char *test_name1 = "test case1";
+	mallctl_thread_name_set(test_name1);
+
+	/* Test name longer than the max len. */
+	char long_name[] =
+	    "test case longer than expected; test case longer than expected";
+	expect_zu_gt(strlen(long_name), PROF_THREAD_NAME_MAX_LEN,
+	   "Long test name not long enough");
+	const char *test_name_long = long_name;
+	expect_d_eq(mallctl("thread.prof.name", NULL, NULL,
+	    (void *)&test_name_long, sizeof(test_name_long)), 0,
+	    "Unexpected mallctl failure from thread.prof.name");
+	/* Long name cut to match. */
+	long_name[PROF_THREAD_NAME_MAX_LEN - 1] = '\0';
+	mallctl_thread_name_get(test_name_long);
 
 	/* NULL input shouldn't be allowed. */
-	thread_name = NULL;
+	const char *test_name2 = NULL;
 	expect_d_eq(mallctl("thread.prof.name", NULL, NULL,
-	    (void *)&thread_name, sizeof(thread_name)), EFAULT,
-	    "Unexpected mallctl result writing \"%s\" to thread.prof.name",
-	    thread_name);
+	    (void *)&test_name2, sizeof(test_name2)), EINVAL,
+	    "Unexpected mallctl result writing to thread.prof.name");
 
 	/* '\n' shouldn't be allowed. */
-	thread_name = "hi\nthere";
+	const char *test_name3 = "test\ncase";
 	expect_d_eq(mallctl("thread.prof.name", NULL, NULL,
-	    (void *)&thread_name, sizeof(thread_name)), EFAULT,
+	    (void *)&test_name3, sizeof(test_name3)), EINVAL,
 	    "Unexpected mallctl result writing \"%s\" to thread.prof.name",
-	    thread_name);
+	    test_name3);
 
 	/* Simultaneous read/write shouldn't be allowed. */
-	{
-		const char *thread_name_old;
-		size_t sz;
-
-		sz = sizeof(thread_name_old);
-		expect_d_eq(mallctl("thread.prof.name",
-		    (void *)&thread_name_old, &sz, (void *)&thread_name,
-		    sizeof(thread_name)), EPERM,
-		    "Unexpected mallctl result writing \"%s\" to "
-		    "thread.prof.name", thread_name);
-	}
+	const char *thread_name_old;
+	size_t sz = sizeof(thread_name_old);
+	expect_d_eq(mallctl("thread.prof.name", (void *)&thread_name_old, &sz,
+	    (void *)&test_name1, sizeof(test_name1)), EPERM,
+	    "Unexpected mallctl result from thread.prof.name");
 
 	mallctl_thread_name_set("");
 }
 TEST_END
 
-#define NTHREADS	4
-#define NRESET		25
 static void *
 thd_start(void *varg) {
 	unsigned thd_ind = *(unsigned *)varg;
@@ -82,6 +88,7 @@ thd_start(void *varg) {
 	mallctl_thread_name_get("");
 	mallctl_thread_name_set(thread_name);
 
+#define NRESET 25
 	for (i = 0; i < NRESET; i++) {
 		expect_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0), 0,
 		    "Unexpected error while resetting heap profile data");
@@ -92,12 +99,14 @@ thd_start(void *varg) {
 	mallctl_thread_name_set("");
 
 	return NULL;
+#undef NRESET
 }
 
 TEST_BEGIN(test_prof_thread_name_threaded) {
 	test_skip_if(!config_prof);
 	test_skip_if(opt_prof_sys_thread_name);
 
+#define NTHREADS 4
 	thd_t thds[NTHREADS];
 	unsigned thd_args[NTHREADS];
 	unsigned i;
@@ -109,10 +118,9 @@ TEST_BEGIN(test_prof_thread_name_threaded) {
 	for (i = 0; i < NTHREADS; i++) {
 		thd_join(thds[i], NULL);
 	}
+#undef NTHREADS
 }
 TEST_END
-#undef NTHREADS
-#undef NRESET
 
 int
 main(void) {

From e62aa478c79865242363d3531fc58c4c7f65a1b4 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 28 Mar 2023 20:09:41 -0700
Subject: [PATCH 2271/2608] Rearrange the bools in prof_tdata_t to save some
 bytes.

This lowered the sizeof(prof_tdata_t) from 200 to 192 which is a round size
class.  Afterwards the tdata_t size remain unchanged with the last commit, which
effectively inlined the storage of thread names for free.
---
 include/jemalloc/internal/prof_structs.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index da3cf8d5..49061f02 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -156,9 +156,6 @@ struct prof_tdata_s {
 	 */
 	uint64_t		thr_discrim;
 
-	bool			attached;
-	bool			expired;
-
 	rb_node(prof_tdata_t)	tdata_link;
 
 	/*
@@ -198,6 +195,9 @@ struct prof_tdata_s {
 	 */
 	bool			active;
 
+	bool			attached;
+	bool			expired;
+
 	/* Temporary storage for summation during dump. */
 	prof_cnt_t		cnt_summed;
 

From 434a68e221f7dbb6f30bd13d318d0c22e1b47e78 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 30 Mar 2023 19:02:24 -0700
Subject: [PATCH 2272/2608] Disallow decay during reentrancy.

Decay should not be triggered during reentrant calls (may cause lock order
reversal / deadlocks).  Added a delay_trigger flag to the tickers to bypass
decay when rentrancy_level is not zero.
---
 include/jemalloc/internal/arena_inlines_b.h |  3 +-
 include/jemalloc/internal/ticker.h          | 33 +++++++----
 test/unit/ticker.c                          | 61 +++++++++++++++++----
 3 files changed, 76 insertions(+), 21 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index c9d7db86..609e73d3 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -131,7 +131,8 @@ arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) {
 	 */
 	ticker_geom_t *decay_ticker = tsd_arena_decay_tickerp_get(tsd);
 	uint64_t *prng_state = tsd_prng_statep_get(tsd);
-	if (unlikely(ticker_geom_ticks(decay_ticker, prng_state, nticks))) {
+	if (unlikely(ticker_geom_ticks(decay_ticker, prng_state, nticks,
+	    tsd_reentrancy_level_get(tsd) > 0))) {
 		arena_decay(tsdn, arena, false, false);
 	}
 }
diff --git a/include/jemalloc/internal/ticker.h b/include/jemalloc/internal/ticker.h
index 6b51ddec..de034995 100644
--- a/include/jemalloc/internal/ticker.h
+++ b/include/jemalloc/internal/ticker.h
@@ -57,23 +57,27 @@ ticker_read(const ticker_t *ticker) {
 JEMALLOC_NOINLINE
 #endif
 static bool
-ticker_fixup(ticker_t *ticker) {
+ticker_fixup(ticker_t *ticker, bool delay_trigger) {
+	if (delay_trigger) {
+		ticker->tick = 0;
+		return false;
+	}
 	ticker->tick = ticker->nticks;
 	return true;
 }
 
 static inline bool
-ticker_ticks(ticker_t *ticker, int32_t nticks) {
+ticker_ticks(ticker_t *ticker, int32_t nticks, bool delay_trigger) {
 	ticker->tick -= nticks;
 	if (unlikely(ticker->tick < 0)) {
-		return ticker_fixup(ticker);
+		return ticker_fixup(ticker, delay_trigger);
 	}
 	return false;
 }
 
 static inline bool
-ticker_tick(ticker_t *ticker) {
-	return ticker_ticks(ticker, 1);
+ticker_tick(ticker_t *ticker, bool delay_trigger) {
+	return ticker_ticks(ticker, 1, delay_trigger);
 }
 
 /*
@@ -150,26 +154,35 @@ ticker_geom_read(const ticker_geom_t *ticker) {
 JEMALLOC_NOINLINE
 #endif
 static bool
-ticker_geom_fixup(ticker_geom_t *ticker, uint64_t *prng_state) {
+ticker_geom_fixup(ticker_geom_t *ticker, uint64_t *prng_state,
+    bool delay_trigger) {
+	if (delay_trigger) {
+		ticker->tick = 0;
+		return false;
+	}
+
 	uint64_t idx = prng_lg_range_u64(prng_state, TICKER_GEOM_NBITS);
 	ticker->tick = (uint32_t)(
 	    (uint64_t)ticker->nticks * (uint64_t)ticker_geom_table[idx]
 	    / (uint64_t)TICKER_GEOM_MUL);
+
 	return true;
 }
 
 static inline bool
-ticker_geom_ticks(ticker_geom_t *ticker, uint64_t *prng_state, int32_t nticks) {
+ticker_geom_ticks(ticker_geom_t *ticker, uint64_t *prng_state, int32_t nticks,
+    bool delay_trigger) {
 	ticker->tick -= nticks;
 	if (unlikely(ticker->tick < 0)) {
-		return ticker_geom_fixup(ticker, prng_state);
+		return ticker_geom_fixup(ticker, prng_state, delay_trigger);
 	}
 	return false;
 }
 
 static inline bool
-ticker_geom_tick(ticker_geom_t *ticker, uint64_t *prng_state) {
-	return ticker_geom_ticks(ticker, prng_state, 1);
+ticker_geom_tick(ticker_geom_t *ticker, uint64_t *prng_state,
+    bool delay_trigger) {
+	return ticker_geom_ticks(ticker, prng_state, 1, delay_trigger);
 }
 
 #endif /* JEMALLOC_INTERNAL_TICKER_H */
diff --git a/test/unit/ticker.c b/test/unit/ticker.c
index 0dd77861..c4147a0c 100644
--- a/test/unit/ticker.c
+++ b/test/unit/ticker.c
@@ -13,12 +13,12 @@ TEST_BEGIN(test_ticker_tick) {
 		for (j = 0; j < NTICKS; j++) {
 			expect_u_eq(ticker_read(&ticker), NTICKS - j,
 			    "Unexpected ticker value (i=%d, j=%d)", i, j);
-			expect_false(ticker_tick(&ticker),
+			expect_false(ticker_tick(&ticker, false),
 			    "Unexpected ticker fire (i=%d, j=%d)", i, j);
 		}
 		expect_u32_eq(ticker_read(&ticker), 0,
 		    "Expected ticker depletion");
-		expect_true(ticker_tick(&ticker),
+		expect_true(ticker_tick(&ticker, false),
 		    "Expected ticker fire (i=%d)", i);
 		expect_u32_eq(ticker_read(&ticker), NTICKS,
 		    "Expected ticker reset");
@@ -34,12 +34,15 @@ TEST_BEGIN(test_ticker_ticks) {
 	ticker_init(&ticker, NTICKS);
 
 	expect_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value");
-	expect_false(ticker_ticks(&ticker, NTICKS), "Unexpected ticker fire");
+	expect_false(ticker_ticks(&ticker, NTICKS, false),
+	    "Unexpected ticker fire");
 	expect_u_eq(ticker_read(&ticker), 0, "Unexpected ticker value");
-	expect_true(ticker_ticks(&ticker, NTICKS), "Expected ticker fire");
+	expect_true(ticker_ticks(&ticker, NTICKS, false),
+	    "Expected ticker fire");
 	expect_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value");
 
-	expect_true(ticker_ticks(&ticker, NTICKS + 1), "Expected ticker fire");
+	expect_true(ticker_ticks(&ticker, NTICKS + 1, false),
+	    "Expected ticker fire");
 	expect_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value");
 #undef NTICKS
 }
@@ -52,13 +55,14 @@ TEST_BEGIN(test_ticker_copy) {
 	ticker_init(&ta, NTICKS);
 	ticker_copy(&tb, &ta);
 	expect_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value");
-	expect_true(ticker_ticks(&tb, NTICKS + 1), "Expected ticker fire");
+	expect_true(ticker_ticks(&tb, NTICKS + 1, false),
+	    "Expected ticker fire");
 	expect_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value");
 
-	ticker_tick(&ta);
+	ticker_tick(&ta, false);
 	ticker_copy(&tb, &ta);
 	expect_u_eq(ticker_read(&tb), NTICKS - 1, "Unexpected ticker value");
-	expect_true(ticker_ticks(&tb, NTICKS), "Expected ticker fire");
+	expect_true(ticker_ticks(&tb, NTICKS, false), "Expected ticker fire");
 	expect_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value");
 #undef NTICKS
 }
@@ -74,7 +78,7 @@ TEST_BEGIN(test_ticker_geom) {
 	/* Just some random constant. */
 	uint64_t prng_state = 0x343219f93496db9fULL;
 	for (uint64_t i = 0; i < niters; i++) {
-		while(!ticker_geom_tick(&ticker, &prng_state)) {
+		while(!ticker_geom_tick(&ticker, &prng_state, false)) {
 			total_ticks++;
 		}
 	}
@@ -90,11 +94,48 @@ TEST_BEGIN(test_ticker_geom) {
 }
 TEST_END
 
+TEST_BEGIN(test_ticker_delay) {
+	const int32_t ticks = 1000;
+	const uint64_t niters = 10000;
+
+	ticker_t t1;
+	ticker_init(&t1, ticks);
+
+	ticker_geom_t t2;
+	/* Just some random constant. */
+	uint64_t prng_state = 0x43219f93496db9f3ULL;
+	ticker_geom_init(&t2, ticks);
+
+	bool delay = false;
+	expect_false(ticker_ticks(&t1, ticks, delay), "Unexpected ticker fire");
+	expect_false(ticker_geom_ticks(&t2, &prng_state, ticks, delay),
+	    "Unexpected ticker fire");
+	expect_d_eq(ticker_read(&t1), 0, "Unexpected ticker value");
+	expect_d_eq(ticker_geom_read(&t2), 0, "Unexpected ticker value");
+
+	delay = true;
+	/* Not allowed to fire when delay is set to true. */
+	for (unsigned i = 0; i < niters; i++) {
+		expect_false(ticker_tick(&t1, delay), "Unexpected ticker fire");
+		expect_false(ticker_geom_tick(&t2, &prng_state, delay),
+		    "Unexpected ticker fire");
+		expect_d_eq(ticker_read(&t1), 0, "Unexpected ticker value");
+		expect_d_eq(ticker_geom_read(&t2), 0, "Unexpected ticker value");
+	}
+
+	delay = false;
+	expect_true(ticker_tick(&t1, delay), "Expected ticker fire");
+	expect_true(ticker_geom_tick(&t2, &prng_state, delay),
+	    "Expected ticker fire");
+}
+TEST_END
+
 int
 main(void) {
 	return test(
 	    test_ticker_tick,
 	    test_ticker_ticks,
 	    test_ticker_copy,
-	    test_ticker_geom);
+	    test_ticker_geom,
+	    test_ticker_delay);
 }

From 5f64ad60cdd2359249c863c2a01f8555672d7c35 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@fb.com>
Date: Wed, 18 Jan 2023 15:43:43 -0800
Subject: [PATCH 2273/2608] Remove locked flag set in malloc_mutex_trylock

As a hint flag of the lock, parameter locked should be set only
when the lock is gained or freed.
---
 include/jemalloc/internal/mutex.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index 63a0b1b3..03d3557b 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -175,7 +175,6 @@ malloc_mutex_trylock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 	witness_assert_not_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
 	if (isthreaded) {
 		if (malloc_mutex_trylock_final(mutex)) {
-			atomic_store_b(&mutex->locked, true, ATOMIC_RELAXED);
 			return true;
 		}
 		mutex_owner_stats_update(tsdn, mutex);

From 521970fb2e5278b7b92061933cbacdbb9478998a Mon Sep 17 00:00:00 2001
From: Eric Mueller <eric.mueller1024@gmail.com>
Date: Mon, 17 Apr 2023 18:59:25 -0700
Subject: [PATCH 2274/2608] Check for equality instead of assigning in asserts
 in hpa_from_pai.

It appears like a simple typo means we're unconditionally overwriting
some fields in hpa_from_pai when asserts are enabled. From hpa_shard_init,
it looks like these fields have these values anyway, so this shouldn't
cause bugs, but if something is wrong it seems better to have these
asserts in place.

See issue #2412.
---
 src/hpa.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/hpa.c b/src/hpa.c
index 8ebb2db2..1e736ad4 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -703,10 +703,10 @@ hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 
 static hpa_shard_t *
 hpa_from_pai(pai_t *self) {
-	assert(self->alloc = &hpa_alloc);
-	assert(self->expand = &hpa_expand);
-	assert(self->shrink = &hpa_shrink);
-	assert(self->dalloc = &hpa_dalloc);
+	assert(self->alloc == &hpa_alloc);
+	assert(self->expand == &hpa_expand);
+	assert(self->shrink == &hpa_shrink);
+	assert(self->dalloc == &hpa_dalloc);
 	return (hpa_shard_t *)self;
 }
 

From fc680128e0aed18d878bdc71c1ceb53e79da3de7 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Fri, 21 Apr 2023 10:49:18 -0700
Subject: [PATCH 2275/2608] Remove errant `assert` in
 `arena_extent_alloc_large`

This codepath may generate deferred work when the HPA is enabled.
See also [@davidtgoldblatt's relevant comment on the PR which
introduced this](https://github.com/jemalloc/jemalloc/pull/2107#discussion_r699770967)
which prevented a similarly incorrect `assert` from being added elsewhere.
---
 src/arena.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/arena.c b/src/arena.c
index 970f60ed..9592ab9d 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -340,7 +340,6 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	    arena_get_ehooks(arena), esize, alignment);
 	edata_t *edata = pa_alloc(tsdn, &arena->pa_shard, esize, alignment,
 	    /* slab */ false, szind, zero, guarded, &deferred_work_generated);
-	assert(deferred_work_generated == false);
 
 	if (edata != NULL) {
 		if (config_stats) {

From f2b28906e63bef7518c58236e3e9dde8e4fceb89 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Amaury=20S=C3=A9chet?= <deadalnix@gmail.com>
Date: Thu, 20 Apr 2023 22:38:28 +0000
Subject: [PATCH 2276/2608] Some nits in cache_bin.h

---
 include/jemalloc/internal/cache_bin.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index ee8b1ae2..c9c8f865 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -454,9 +454,9 @@ cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) {
 		return false;
 	}
 
-        if (unlikely(cache_bin_dalloc_safety_checks(bin, ptr))) {
-                return true;
-        }
+	if (unlikely(cache_bin_dalloc_safety_checks(bin, ptr))) {
+		return true;
+	}
 
 	bin->stack_head--;
 	*bin->stack_head = ptr;
@@ -642,7 +642,7 @@ cache_bin_finish_flush(cache_bin_t *bin, cache_bin_info_t *info,
 	unsigned rem = cache_bin_ncached_get_local(bin, info) - nflushed;
 	memmove(bin->stack_head + nflushed, bin->stack_head,
 	    rem * sizeof(void *));
-	bin->stack_head = bin->stack_head + nflushed;
+	bin->stack_head += nflushed;
 	cache_bin_low_water_adjust(bin);
 }
 

From 6841110bd6ed17b32a5fed90c53c64555366a792 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Mon, 1 May 2023 11:49:35 -0700
Subject: [PATCH 2277/2608] Make `edata_cmp_summary_comp` 30% faster

`edata_cmp_summary_comp` is one of the very hottest functions, taking up
3% of all time spent inside Jemalloc. I noticed that all existing
callsites rely only on the sign of the value returned by this function,
so I came up with this equivalent branchless implementation which
preserves this property. After empirical measurement, I have found that
this implementation is 30% faster, therefore representing a 1% speed-up
to the allocator as a whole.

At @interwq's suggestion, I've applied the same optimization to
`edata_esnead_comp` in case this function becomes hotter in the future.
---
 include/jemalloc/internal/edata.h | 35 +++++++++++++++++--------------
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index e77a55e6..d2d16c46 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -664,13 +664,20 @@ edata_cmp_summary_get(const edata_t *edata) {
 
 static inline int
 edata_cmp_summary_comp(edata_cmp_summary_t a, edata_cmp_summary_t b) {
-	int ret;
-	ret = (a.sn > b.sn) - (a.sn < b.sn);
-	if (ret != 0) {
-		return ret;
-	}
-	ret = (a.addr > b.addr) - (a.addr < b.addr);
-	return ret;
+	/*
+	 * Logically, what we're doing here is comparing based on `.sn`, and
+	 * falling back to comparing on `.addr` in the case that `a.sn == b.sn`.
+	 * We accomplish this by multiplying the result of the `.sn` comparison
+	 * by 2, so that so long as it is not 0, it will dominate the `.addr`
+	 * comparison in determining the sign of the returned result value.
+	 * The justification for doing things this way is that this is
+	 * branchless - all of the branches that would be present in a
+	 * straightforward implementation are common cases, and thus the branch
+	 * prediction accuracy is not great. As a result, this implementation
+	 * is measurably faster (by around 30%).
+	 */
+	return (2 * ((a.sn > b.sn) - (a.sn < b.sn))) +
+	       ((a.addr > b.addr) - (a.addr < b.addr));
 }
 
 static inline int
@@ -683,15 +690,11 @@ edata_snad_comp(const edata_t *a, const edata_t *b) {
 
 static inline int
 edata_esnead_comp(const edata_t *a, const edata_t *b) {
-	int ret;
-
-	ret = edata_esn_comp(a, b);
-	if (ret != 0) {
-		return ret;
-	}
-
-	ret = edata_ead_comp(a, b);
-	return ret;
+	/*
+	 * Similar to `edata_cmp_summary_comp`, we've opted for a
+	 * branchless implementation for the sake of performance.
+	 */
+	return (2 * edata_esn_comp(a, b)) + edata_ead_comp(a, b);
 }
 
 ph_proto(, edata_avail, edata_t)

From 70344a2d38eb71a162ea19d1a4fee8f0d168588b Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Mon, 8 May 2023 12:37:18 -0700
Subject: [PATCH 2278/2608] Make eligible functions `static`

The codebase is already very disciplined in making any function which
can be `static`, but there are a few that appear to have slipped through
the cracks.
---
 include/jemalloc/internal/extent.h | 2 --
 src/decay.c                        | 2 +-
 src/extent.c                       | 4 +++-
 src/hpa.c                          | 2 +-
 test/unit/bit_util.c               | 4 ++--
 test/unit/double_free.c            | 7 ++++---
 test/unit/nstime.c                 | 2 +-
 test/unit/pa.c                     | 3 ++-
 test/unit/size_check.c             | 4 ++--
 9 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 1d51d410..367793db 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -44,8 +44,6 @@ void extent_destroy_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata);
 bool extent_commit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length);
-bool extent_decommit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    size_t offset, size_t length);
 bool extent_purge_lazy_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length);
 bool extent_purge_forced_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
diff --git a/src/decay.c b/src/decay.c
index d801b2bc..dd107a34 100644
--- a/src/decay.c
+++ b/src/decay.c
@@ -14,7 +14,7 @@ static const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
  * Generate a new deadline that is uniformly random within the next epoch after
  * the current one.
  */
-void
+static void
 decay_deadline_init(decay_t *decay) {
 	nstime_copy(&decay->deadline, &decay->epoch);
 	nstime_add(&decay->deadline, &decay->interval);
diff --git a/src/extent.c b/src/extent.c
index cf3d1f31..3374dd58 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -43,6 +43,8 @@ static edata_t *extent_try_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 static edata_t *extent_alloc_retained(tsdn_t *tsdn, pac_t *pac,
     ehooks_t *ehooks, edata_t *expand_edata, size_t size, size_t alignment,
     bool zero, bool *commit, bool guarded);
+static bool extent_decommit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks,
+    edata_t *edata, size_t offset, size_t length);
 
 /******************************************************************************/
 
@@ -1118,7 +1120,7 @@ extent_commit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 	    /* growing_retained */ false);
 }
 
-bool
+static bool
 extent_decommit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
diff --git a/src/hpa.c b/src/hpa.c
index 1e736ad4..7462025c 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -83,7 +83,7 @@ hpa_alloc_ps(tsdn_t *tsdn, hpa_central_t *central) {
 	    CACHELINE);
 }
 
-hpdata_t *
+static hpdata_t *
 hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
     bool *oom) {
 	/* Don't yet support big allocations; these should get filtered out. */
diff --git a/test/unit/bit_util.c b/test/unit/bit_util.c
index 7d31b210..295abb1b 100644
--- a/test/unit/bit_util.c
+++ b/test/unit/bit_util.c
@@ -48,7 +48,7 @@ TEST_BEGIN(test_pow2_ceil_zu) {
 }
 TEST_END
 
-void
+static void
 expect_lg_ceil_range(size_t input, unsigned answer) {
 	if (input == 1) {
 		expect_u_eq(0, answer, "Got %u as lg_ceil of 1", answer);
@@ -60,7 +60,7 @@ expect_lg_ceil_range(size_t input, unsigned answer) {
 	    "Got %u as lg_ceil of %zu", answer, input);
 }
 
-void
+static void
 expect_lg_floor_range(size_t input, unsigned answer) {
 	if (input == 1) {
 		expect_u_eq(0, answer, "Got %u as lg_floor of 1", answer);
diff --git a/test/unit/double_free.c b/test/unit/double_free.c
index e73efe71..f1e50cd2 100644
--- a/test/unit/double_free.c
+++ b/test/unit/double_free.c
@@ -9,19 +9,20 @@ void fake_abort(const char *message) {
 	fake_abort_called = true;
 }
 
-void
+static void
 test_double_free_pre(void) {
 	safety_check_set_abort(&fake_abort);
 	fake_abort_called = false;
 }
 
-void
+static void
 test_double_free_post() {
 	expect_b_eq(fake_abort_called, true, "Double-free check didn't fire.");
 	safety_check_set_abort(NULL);
 }
 
-bool tcache_enabled() {
+static bool
+tcache_enabled() {
 	bool enabled;
 	size_t sz = sizeof(enabled);
 	assert_d_eq(
diff --git a/test/unit/nstime.c b/test/unit/nstime.c
index 56238ab3..e7e11e61 100644
--- a/test/unit/nstime.c
+++ b/test/unit/nstime.c
@@ -201,7 +201,7 @@ TEST_BEGIN(test_nstime_divide) {
 }
 TEST_END
 
-void
+static void
 test_nstime_since_once(nstime_t *t) {
 	nstime_t old_t;
 	nstime_copy(&old_t, t);
diff --git a/test/unit/pa.c b/test/unit/pa.c
index b1e2f6e9..d44bb95c 100644
--- a/test/unit/pa.c
+++ b/test/unit/pa.c
@@ -48,7 +48,8 @@ struct test_data_s {
 	extent_hooks_t hooks;
 };
 
-test_data_t *init_test_data(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
+static test_data_t *
+init_test_data(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 	test_data_t *test_data = calloc(1, sizeof(test_data_t));
 	assert_ptr_not_null(test_data, "");
 	init_test_extent_hooks(&test_data->hooks);
diff --git a/test/unit/size_check.c b/test/unit/size_check.c
index accdc405..3cb3bc9c 100644
--- a/test/unit/size_check.c
+++ b/test/unit/size_check.c
@@ -14,7 +14,7 @@ void fake_abort(const char *message) {
 #define LARGE_SIZE1 SC_LARGE_MINCLASS
 #define LARGE_SIZE2 (LARGE_SIZE1 * 2)
 
-void *
+static void *
 test_invalid_size_pre(size_t sz) {
 	safety_check_set_abort(&fake_abort);
 
@@ -25,7 +25,7 @@ test_invalid_size_pre(size_t sz) {
 	return ptr;
 }
 
-void
+static void
 test_invalid_size_post(void) {
 	expect_true(fake_abort_called, "Safety check didn't fire");
 	safety_check_set_abort(NULL);

From 12311fe6c37720225a3e8b5798e7051d153d29c1 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Tue, 9 May 2023 09:37:01 -0700
Subject: [PATCH 2279/2608] Fix segfault in `extent_try_coalesce_impl`

Static analysis flagged this. `extent_record` was passing `NULL` as the
value for `coalesced` to `extent_try_coalesce`, which in turn passes
that argument to `extent_try_coalesce_impl`, where it is written to
without checking if it is `NULL`. I can confirm from reviewing the
fleetwide coredump data that this was in fact being hit in production.
---
 src/extent.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/extent.c b/src/extent.c
index 3374dd58..fdcd0afb 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -822,6 +822,7 @@ static edata_t *
 extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool *coalesced) {
 	assert(!edata_guarded_get(edata));
+	assert(coalesced != NULL);
 	/*
 	 * We avoid checking / locking inactive neighbors for large size
 	 * classes, since they are eagerly coalesced on deallocation which can
@@ -928,8 +929,9 @@ extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 		goto label_skip_coalesce;
 	}
 	if (!ecache->delay_coalesce) {
+		bool coalesced_unused;
 		edata = extent_try_coalesce(tsdn, pac, ehooks, ecache, edata,
-		    NULL);
+		    &coalesced_unused);
 	} else if (edata_size_get(edata) >= SC_LARGE_MINCLASS) {
 		assert(ecache == &pac->ecache_dirty);
 		/* Always coalesce large extents eagerly. */

From dc0a184f8d349546af6a051eb87be47715eacff3 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Mon, 8 May 2023 18:18:39 -0700
Subject: [PATCH 2280/2608] Fix possible `NULL` pointer dereference in
 `VERIFY_READ`

Static analysis flagged this. Fixed by simply checking `oldlenp`
before dereferencing it.
---
 src/ctl.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/ctl.c b/src/ctl.c
index cfd4ac6e..61511d34 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1816,7 +1816,9 @@ ctl_mtx_assert_held(tsdn_t *tsdn) {
 /* Verify that the space provided is enough. */
 #define VERIFY_READ(t)	do {						\
 	if (oldp == NULL || oldlenp == NULL || *oldlenp != sizeof(t)) {	\
-		*oldlenp = 0;						\
+		if (oldlenp != NULL) {					\
+			*oldlenp = 0;					\
+		}							\
 		ret = EINVAL;						\
 		goto label_return;					\
 	}								\

From 019cccc293f96c9f7886373d816aab061f65f7de Mon Sep 17 00:00:00 2001
From: auxten <auxten@users.noreply.github.com>
Date: Wed, 3 May 2023 13:25:12 +0800
Subject: [PATCH 2281/2608] Make arenas_lookup_ctl triable

---
 src/ctl.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index 61511d34..e7d6529e 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3215,19 +3215,21 @@ arenas_lookup_ctl(tsd_t *tsd, const size_t *mib,
 	int ret;
 	unsigned arena_ind;
 	void *ptr;
-	edata_t *edata;
+	emap_full_alloc_ctx_t alloc_ctx;
+	bool ptr_not_present;
 	arena_t *arena;
 
 	ptr = NULL;
 	ret = EINVAL;
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	WRITE(ptr, void *);
-	edata = emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr);
-	if (edata == NULL) {
+	ptr_not_present = emap_full_alloc_ctx_try_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
+		&alloc_ctx);
+	if (ptr_not_present) {
 		goto label_return;
 	}
 
-	arena = arena_get_from_edata(edata);
+	arena = arena_get_from_edata(alloc_ctx.edata);
 	if (arena == NULL) {
 		goto label_return;
 	}

From 5bac384970a8224daee0b07475950a5291fc37d3 Mon Sep 17 00:00:00 2001
From: auxten <auxten@users.noreply.github.com>
Date: Wed, 3 May 2023 22:34:30 +0800
Subject: [PATCH 2282/2608] If ptr present check if alloc_ctx.edata == NULL

---
 src/ctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/ctl.c b/src/ctl.c
index e7d6529e..c495ecf6 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3225,7 +3225,7 @@ arenas_lookup_ctl(tsd_t *tsd, const size_t *mib,
 	WRITE(ptr, void *);
 	ptr_not_present = emap_full_alloc_ctx_try_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
 		&alloc_ctx);
-	if (ptr_not_present) {
+	if (ptr_not_present || alloc_ctx.edata == NULL) {
 		goto label_return;
 	}
 

From 6ea8a7e928c86f7976c5e1356a22292509f8705b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 10 May 2023 16:20:14 -0700
Subject: [PATCH 2283/2608] Add config detection for
 JEMALLOC_HAVE_PTHREAD_SET_NAME_NP.

and use it on the background thread name setting.
---
 configure.ac                                          | 10 ++++++++++
 include/jemalloc/internal/jemalloc_internal_defs.h.in |  3 +++
 src/background_thread.c                               |  2 +-
 3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index ec7a97cb..5b18fd34 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1926,6 +1926,16 @@ dnl Check if we have dlsym support.
   if test "x${je_cv_pthread_getname_np}" = "xyes" ; then
     AC_DEFINE([JEMALLOC_HAVE_PTHREAD_GETNAME_NP], [ ], [ ])
   fi
+  dnl Check if pthread_set_name_np is available with the expected API.
+  JE_COMPILABLE([pthread_set_name_np(3)], [
+#include <pthread.h>
+#include <pthread_np.h>
+], [
+  pthread_set_name_np(pthread_self(), "set_name_test");
+], [je_cv_pthread_set_name_np])
+  if test "x${je_cv_pthread_set_name_np}" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_HAVE_PTHREAD_SET_NAME_NP], [ ], [ ])
+  fi
   dnl Check if pthread_get_name_np is not necessarily present despite
   dnl the pthread_set_name_np counterpart
   JE_COMPILABLE([pthread_get_name_np(3)], [
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 87845a48..e61667a6 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -89,6 +89,9 @@
 /* Defined if pthread_getname_np(3) is available. */
 #undef JEMALLOC_HAVE_PTHREAD_GETNAME_NP
 
+/* Defined if pthread_set_name_np(3) is available. */
+#undef JEMALLOC_HAVE_PTHREAD_SET_NAME_NP
+
 /* Defined if pthread_get_name_np(3) is available. */
 #undef JEMALLOC_HAVE_PTHREAD_GET_NAME_NP
 
diff --git a/src/background_thread.c b/src/background_thread.c
index 3c006cec..1d5bde6c 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -467,7 +467,7 @@ background_thread_entry(void *ind_arg) {
 	assert(thread_ind < max_background_threads);
 #ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
 	pthread_setname_np(pthread_self(), "jemalloc_bg_thd");
-#elif defined(__FreeBSD__) || defined(__DragonFly__)
+#elif defined(JEMALLOC_HAVE_PTHREAD_SET_NAME_NP)
 	pthread_set_name_np(pthread_self(), "jemalloc_bg_thd");
 #endif
 	if (opt_percpu_arena != percpu_arena_disabled) {

From 94ace05832209543bde81d0a5f0e2a9660243abd Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 10 May 2023 13:20:40 -0700
Subject: [PATCH 2284/2608] Fix the prof thread_name reference in prof_recent
 dump.

As pointed out in #2434, the thread_name in prof_tdata_t was changed in #2407.
This also requires an update for the prof_recent dump, specifically the emitter
expects a "char **" which is fixed in this commit.
---
 src/prof_recent.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/prof_recent.c b/src/prof_recent.c
index 4c3c6296..e5b3fb17 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -496,8 +496,9 @@ prof_recent_alloc_dump_node(emitter_t *emitter, prof_recent_t *node) {
 	prof_tdata_t *alloc_tdata = node->alloc_tctx->tdata;
 	assert(alloc_tdata != NULL);
 	if (!prof_thread_name_empty(alloc_tdata)) {
+		const char *thread_name = alloc_tdata->thread_name;
 		emitter_json_kv(emitter, "alloc_thread_name",
-		    emitter_type_string, &alloc_tdata->thread_name);
+		    emitter_type_string, &thread_name);
 	}
 	uint64_t alloc_time_ns = nstime_ns(&node->alloc_time);
 	emitter_json_kv(emitter, "alloc_time", emitter_type_uint64,
@@ -512,8 +513,9 @@ prof_recent_alloc_dump_node(emitter_t *emitter, prof_recent_t *node) {
 		prof_tdata_t *dalloc_tdata = node->dalloc_tctx->tdata;
 		assert(dalloc_tdata != NULL);
 		if (!prof_thread_name_empty(dalloc_tdata)) {
+			const char *thread_name = dalloc_tdata->thread_name;
 			emitter_json_kv(emitter, "dalloc_thread_name",
-			    emitter_type_string, &dalloc_tdata->thread_name);
+			    emitter_type_string, &thread_name);
 		}
 		assert(!nstime_equals_zero(&node->dalloc_time));
 		uint64_t dalloc_time_ns = nstime_ns(&node->dalloc_time);

From d4a2b8bab10980d4677d43560f27ac9ef66cde45 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 10 May 2023 16:32:51 -0700
Subject: [PATCH 2285/2608] Add the prof_sys_thread_name feature in the
 prof_recent unit test.

This tests the combination of the prof_recent and thread_name features.
Verified that it catches the issue being fixed in this PR.

Also explicitly set thread name in test/unit/prof_recent.  This fixes the name
testing when no default thread name is set (e.g. FreeBSD).
---
 test/include/test/thd.h  |  6 ++++--
 test/src/thd.c           | 18 ++++++++++++++++++
 test/unit/prof_recent.c  | 14 ++++++--------
 test/unit/prof_recent.sh |  2 +-
 4 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/test/include/test/thd.h b/test/include/test/thd.h
index 47a51262..848c5271 100644
--- a/test/include/test/thd.h
+++ b/test/include/test/thd.h
@@ -5,5 +5,7 @@ typedef HANDLE thd_t;
 typedef pthread_t thd_t;
 #endif
 
-void	thd_create(thd_t *thd, void *(*proc)(void *), void *arg);
-void	thd_join(thd_t thd, void **ret);
+void thd_create(thd_t *thd, void *(*proc)(void *), void *arg);
+void thd_join(thd_t thd, void **ret);
+bool thd_has_setname(void);
+void thd_setname(const char *name);
diff --git a/test/src/thd.c b/test/src/thd.c
index 9a15eabb..8f91a595 100644
--- a/test/src/thd.c
+++ b/test/src/thd.c
@@ -32,3 +32,21 @@ thd_join(thd_t thd, void **ret) {
 	pthread_join(thd, ret);
 }
 #endif
+
+void
+thd_setname(const char *name) {
+#ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
+	pthread_setname_np(pthread_self(), name);
+#elif defined(JEMALLOC_HAVE_PTHREAD_SET_NAME_NP)
+	pthread_set_name_np(pthread_self(), name);
+#endif
+}
+
+bool
+thd_has_setname(void) {
+#if defined(JEMALLOC_HAVE_PTHREAD_SETNAME_NP) || defined(JEMALLOC_HAVE_PTHREAD_SET_NAME_NP)
+	return true;
+#else
+	return false;
+#endif
+}
diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index 4fb37236..2cf699d8 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -5,6 +5,8 @@
 /* As specified in the shell script */
 #define OPT_ALLOC_MAX 3
 
+const char *test_thread_name = "test_thread";
+
 /* Invariant before and after every test (when config_prof is on) */
 static void
 confirm_prof_setup() {
@@ -439,16 +441,11 @@ confirm_record(const char *template, const confirm_record_t *records,
 			}
 			ASSERT_CHAR(',');
 
-			if (opt_prof_sys_thread_name) {
+			if (thd_has_setname() && opt_prof_sys_thread_name) {
 				ASSERT_FORMATTED_STR("\"%s_thread_name\"",
 				    *type);
-				ASSERT_CHAR(':');
-				ASSERT_CHAR('"');
-				while (*start != '"') {
-					++start;
-				}
-				ASSERT_CHAR('"');
-				ASSERT_CHAR(',');
+				ASSERT_FORMATTED_STR(":\"%s\",",
+				    test_thread_name);
 			}
 
 			ASSERT_FORMATTED_STR("\"%s_time\"", *type);
@@ -495,6 +492,7 @@ confirm_record(const char *template, const confirm_record_t *records,
 TEST_BEGIN(test_prof_recent_alloc_dump) {
 	test_skip_if(!config_prof);
 
+	thd_setname(test_thread_name);
 	confirm_prof_setup();
 
 	ssize_t future;
diff --git a/test/unit/prof_recent.sh b/test/unit/prof_recent.sh
index 58a54a47..10415bf3 100644
--- a/test/unit/prof_recent.sh
+++ b/test/unit/prof_recent.sh
@@ -1,5 +1,5 @@
 #!/bin/sh
 
 if [ "x${enable_prof}" = "x1" ] ; then
-  export MALLOC_CONF="prof:true,prof_active:true,lg_prof_sample:0,prof_recent_alloc_max:3"
+  export MALLOC_CONF="prof:true,prof_active:true,lg_prof_sample:0,prof_recent_alloc_max:3,prof_sys_thread_name:true"
 fi

From 0288126d9cc0d061766e37cbbaabaa78aff3aff5 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Thu, 11 May 2023 14:20:30 -0700
Subject: [PATCH 2286/2608] Fix possible `NULL` pointer dereference from
 `mallctl("prof.prefix", ...)`

Static analysis flagged this issue. Here is a minimal program which
causes a segfault within Jemalloc:
```
#include <jemalloc/jemalloc.h>

const char *malloc_conf = "prof:true";

int main() {
  mallctl("prof.prefix", NULL, NULL, NULL, 0);
}
```

Fixed by checking if `prefix` is `NULL`.
---
 src/prof_sys.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/prof_sys.c b/src/prof_sys.c
index 3f7196f8..3cbb3a85 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -749,6 +749,9 @@ bool
 prof_prefix_set(tsdn_t *tsdn, const char *prefix) {
 	cassert(config_prof);
 	ctl_mtx_assert_held(tsdn);
+	if (prefix == NULL) {
+		return true;
+	}
 	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
 	if (prof_prefix == NULL) {
 		malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);

From 3e2ba7a6510be583edb316372f8cfff35f2f25d5 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Wed, 26 Apr 2023 14:10:41 -0700
Subject: [PATCH 2287/2608] Remove dead stores detected by static analysis

None of these are harmful, and they are almost certainly optimized
away by the compiler. The motivation for fixing them anyway is that
we'd like to enable static analysis as part of CI, and the first step
towards that is resolving the warnings it produces at present.
---
 include/jemalloc/internal/log.h | 3 +--
 src/ctl.c                       | 2 +-
 src/jemalloc.c                  | 3 +--
 src/malloc_io.c                 | 6 +-----
 src/pages.c                     | 2 +-
 src/stats.c                     | 4 ++--
 6 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/log.h b/include/jemalloc/internal/log.h
index 64208586..f39c598a 100644
--- a/include/jemalloc/internal/log.h
+++ b/include/jemalloc/internal/log.h
@@ -96,8 +96,7 @@ log_impl_varargs(const char *name, ...) {
 	dst_offset += malloc_snprintf(buf, JEMALLOC_LOG_BUFSIZE, "%s: ", name);
 	dst_offset += malloc_vsnprintf(buf + dst_offset,
 	    JEMALLOC_LOG_BUFSIZE - dst_offset, format, ap);
-	dst_offset += malloc_snprintf(buf + dst_offset,
-	    JEMALLOC_LOG_BUFSIZE - dst_offset, "\n");
+	malloc_snprintf(buf + dst_offset, JEMALLOC_LOG_BUFSIZE - dst_offset, "\n");
 	va_end(ap);
 
 	malloc_write(buf);
diff --git a/src/ctl.c b/src/ctl.c
index c495ecf6..e597b2bb 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2745,7 +2745,6 @@ arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	int ret;
 	const char *dss = NULL;
 	unsigned arena_ind;
-	dss_prec_t dss_prec_old = dss_prec_limit;
 	dss_prec_t dss_prec = dss_prec_limit;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
@@ -2773,6 +2772,7 @@ arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	 * Access via index narenas is deprecated, and scheduled for removal in
 	 * 6.0.0.
 	 */
+	dss_prec_t dss_prec_old;
 	if (arena_ind == MALLCTL_ARENAS_ALL || arena_ind ==
 	    ctl_arenas->narenas) {
 		if (dss_prec != dss_prec_limit &&
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 7407022f..37cd159c 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2389,7 +2389,6 @@ imalloc_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
 	 * from the ind_large bucket.
 	 */
 	szind_t ind_large;
-	size_t bumped_usize = usize;
 
 	dopts->alignment = prof_sample_align(dopts->alignment);
 	if (usize <= SC_SMALL_MAXCLASS) {
@@ -2398,7 +2397,7 @@ imalloc_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
 		    sz_sa2u(SC_LARGE_MINCLASS, dopts->alignment))
 			== SC_LARGE_MINCLASS);
 		ind_large = sz_size2index(SC_LARGE_MINCLASS);
-		bumped_usize = sz_s2u(SC_LARGE_MINCLASS);
+		size_t bumped_usize = sz_s2u(SC_LARGE_MINCLASS);
 		ret = imalloc_no_sample(sopts, dopts, tsd, bumped_usize,
 		    bumped_usize, ind_large);
 		if (unlikely(ret == NULL)) {
diff --git a/src/malloc_io.c b/src/malloc_io.c
index b76885cb..6de409b3 100644
--- a/src/malloc_io.c
+++ b/src/malloc_io.c
@@ -423,7 +423,6 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 			unsigned char len = '?';
 			char *s;
 			size_t slen;
-			bool first_width_digit = true;
 			bool pad_zero = false;
 
 			f++;
@@ -462,9 +461,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 				}
 				break;
 			case '0':
-				if (first_width_digit) {
-					pad_zero = true;
-				}
+				pad_zero = true;
 				JEMALLOC_FALLTHROUGH;
 			case '1': case '2': case '3': case '4':
 			case '5': case '6': case '7': case '8': case '9': {
@@ -474,7 +471,6 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 				assert(uwidth != UINTMAX_MAX || get_errno() !=
 				    ERANGE);
 				width = (int)uwidth;
-				first_width_digit = false;
 				break;
 			} default:
 				break;
diff --git a/src/pages.c b/src/pages.c
index b672e4de..09b51b88 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -68,7 +68,6 @@ static int madvise_dont_need_zeros_is_faulty = -1;
  */
 static int madvise_MADV_DONTNEED_zeroes_pages()
 {
-	int works = -1;
 	size_t size = PAGE;
 
 	void * addr = mmap(NULL, size, PROT_READ|PROT_WRITE,
@@ -83,6 +82,7 @@ static int madvise_MADV_DONTNEED_zeroes_pages()
 	}
 
 	memset(addr, 'A', size);
+	int works;
 	if (madvise(addr, size, MADV_DONTNEED) == 0) {
 		works = memchr(addr, 'A', size) == NULL;
 	} else {
diff --git a/src/stats.c b/src/stats.c
index 43360a2d..59db4f8e 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1803,7 +1803,7 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 		size_t sz;
 		VARIABLE_ARRAY(bool, initialized, narenas);
 		bool destroyed_initialized;
-		unsigned i, j, ninitialized;
+		unsigned i, ninitialized;
 
 		xmallctlnametomib("arena.0.initialized", mib, &miblen);
 		for (i = ninitialized = 0; i < narenas; i++) {
@@ -1843,7 +1843,7 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 
 		/* Unmerged stats. */
 		if (unmerged) {
-			for (i = j = 0; i < narenas; i++) {
+			for (i = 0; i < narenas; i++) {
 				if (initialized[i]) {
 					char arena_ind_str[20];
 					malloc_snprintf(arena_ind_str,

From 4e6f1e920814eafb4ca165a861e9c886022b35e3 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Mon, 15 May 2023 10:39:15 -0700
Subject: [PATCH 2288/2608] Allow overriding `LG_PAGE`

This is useful for our internal builds where we override the
configuration in the header files generated by autoconf.
---
 .../internal/jemalloc_internal_defs.h.in         |  2 ++
 .../internal/jemalloc_internal_overrides.h       | 16 ++++++++++++++++
 2 files changed, 18 insertions(+)
 create mode 100644 include/jemalloc/internal/jemalloc_internal_overrides.h

diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index e61667a6..20355949 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -452,4 +452,6 @@
  */
 #undef JEMALLOC_HAVE_RDTSCP
 
+#include "jemalloc_internal_overrides.h"
+
 #endif /* JEMALLOC_INTERNAL_DEFS_H_ */
diff --git a/include/jemalloc/internal/jemalloc_internal_overrides.h b/include/jemalloc/internal/jemalloc_internal_overrides.h
new file mode 100644
index 00000000..ddd6ee17
--- /dev/null
+++ b/include/jemalloc/internal/jemalloc_internal_overrides.h
@@ -0,0 +1,16 @@
+#ifndef JEMALLOC_INTERNAL_OVERRIDES_H
+#define JEMALLOC_INTERNAL_OVERRIDES_H
+
+/*
+ * Under normal circumstances this header serves no purpose, as these settings
+ * can be customized via the corresponding autoconf options at configure-time.
+ * Overriding in this fashion is useful when the header files generated by
+ * autoconf are used as input for another build system.
+ */
+
+#ifdef JEMALLOC_OVERRIDE_LG_PAGE
+    #undef LG_PAGE
+    #define LG_PAGE JEMALLOC_OVERRIDE_LG_PAGE
+#endif
+
+#endif /* JEMALLOC_INTERNAL_OVERRIDES_H */

From 9c32689e576906332d2ceaabafc2a927d152beba Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Thu, 18 May 2023 10:13:59 -0700
Subject: [PATCH 2289/2608] Fix bug where hpa_shard was not being destroyed

It appears that this was a simple mistake where `hpa_shard_disable` was
being called instead of `hpa_shard_destroy`. At present
`hpa_shard_destroy` is not called anywhere at all outside of test-cases,
which further suggests that this is a bug. @davidtgoldblatt noted
however that since HPA is disabled for manual arenas and we don't
support destruction for auto arenas that presently there is no way to
actually trigger this bug. Nonetheless, it should be fixed.
---
 src/pa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pa.c b/src/pa.c
index eb7e4620..18c850d7 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -108,7 +108,7 @@ pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard) {
 	pac_destroy(tsdn, &shard->pac);
 	if (shard->ever_used_hpa) {
 		sec_flush(tsdn, &shard->hpa_sec);
-		hpa_shard_disable(tsdn, &shard->hpa_shard);
+		hpa_shard_destroy(tsdn, &shard->hpa_shard);
 	}
 }
 

From a2259f9fa6c9a82cacf1d85cf7d92a1a44484a97 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 25 May 2023 14:22:37 -0700
Subject: [PATCH 2290/2608] Fix the include path of
 "jemalloc_internal_overrides.h".

---
 include/jemalloc/internal/jemalloc_internal_defs.h.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 20355949..bef99dea 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -452,6 +452,6 @@
  */
 #undef JEMALLOC_HAVE_RDTSCP
 
-#include "jemalloc_internal_overrides.h"
+#include "jemalloc/internal/jemalloc_internal_overrides.h"
 
 #endif /* JEMALLOC_INTERNAL_DEFS_H_ */

From d577e9b5880906dbd4ab04fb61de5650170ac08b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 17 May 2023 11:54:56 -0700
Subject: [PATCH 2291/2608] Explicitly cast to unsigned for MALLOCX_ARENA and
 _TCACHE defines.

---
 include/jemalloc/internal/jemalloc_internal_types.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_types.h b/include/jemalloc/internal/jemalloc_internal_types.h
index b23a8bed..b1c48be9 100644
--- a/include/jemalloc/internal/jemalloc_internal_types.h
+++ b/include/jemalloc/internal/jemalloc_internal_types.h
@@ -45,12 +45,12 @@ typedef enum malloc_init_e malloc_init_t;
 #define MALLOCX_ARENA_SHIFT	20
 #define MALLOCX_TCACHE_SHIFT	8
 #define MALLOCX_ARENA_MASK \
-    (((1 << MALLOCX_ARENA_BITS) - 1) << MALLOCX_ARENA_SHIFT)
+    ((unsigned)(((1U << MALLOCX_ARENA_BITS) - 1) << MALLOCX_ARENA_SHIFT))
 /* NB: Arena index bias decreases the maximum number of arenas by 1. */
-#define MALLOCX_ARENA_LIMIT	((1 << MALLOCX_ARENA_BITS) - 1)
+#define MALLOCX_ARENA_LIMIT	((unsigned)((1U << MALLOCX_ARENA_BITS) - 1))
 #define MALLOCX_TCACHE_MASK \
-    (((1 << MALLOCX_TCACHE_BITS) - 1) << MALLOCX_TCACHE_SHIFT)
-#define MALLOCX_TCACHE_MAX	((1 << MALLOCX_TCACHE_BITS) - 3)
+    ((unsigned)(((1U << MALLOCX_TCACHE_BITS) - 1) << MALLOCX_TCACHE_SHIFT))
+#define MALLOCX_TCACHE_MAX	((unsigned)((1U << MALLOCX_TCACHE_BITS) - 3))
 #define MALLOCX_LG_ALIGN_MASK	((1 << MALLOCX_LG_ALIGN_BITS) - 1)
 /* Use MALLOCX_ALIGN_GET() if alignment may not be specified in flags. */
 #define MALLOCX_ALIGN_GET_SPECIFIED(flags)				\

From d59e30cbc9fa47425a4ba907ab8f8b580e26f37e Mon Sep 17 00:00:00 2001
From: Arne Welzel <arne.welzel@corelight.com>
Date: Wed, 24 May 2023 14:13:28 +0200
Subject: [PATCH 2292/2608] Rename fallback_impl to fallbackNewImpl and prune
 in jeprof

The existing fallback_impl name seemed a bit generic and given
it's static probably okay to rename.

Closes #2451
---
 bin/jeprof.in        | 2 ++
 src/jemalloc_cpp.cpp | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/bin/jeprof.in b/bin/jeprof.in
index dbf6252b..b734f50b 100644
--- a/bin/jeprof.in
+++ b/bin/jeprof.in
@@ -2957,6 +2957,8 @@ sub RemoveUninterestingFrames {
                       '@JEMALLOC_PREFIX@malloc',
                       'newImpl',
                       'void* newImpl',
+                      'fallbackNewImpl',
+                      'void* fallbackNewImpl',
                       '@JEMALLOC_PREFIX@free',
                       '@JEMALLOC_PREFIX@memalign',
                       '@JEMALLOC_PREFIX@posix_memalign',
diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index e39615bc..4258b1ad 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -100,7 +100,7 @@ handleOOM(std::size_t size, bool nothrow) {
 template <bool IsNoExcept>
 JEMALLOC_NOINLINE
 static void *
-fallback_impl(std::size_t size) noexcept(IsNoExcept) {
+fallbackNewImpl(std::size_t size) noexcept(IsNoExcept) {
 	void *ptr = malloc_default(size);
 	if (likely(ptr != nullptr)) {
 		return ptr;
@@ -112,7 +112,7 @@ template <bool IsNoExcept>
 JEMALLOC_ALWAYS_INLINE
 void *
 newImpl(std::size_t size) noexcept(IsNoExcept) {
-	return imalloc_fastpath(size, &fallback_impl<IsNoExcept>);
+	return imalloc_fastpath(size, &fallbackNewImpl<IsNoExcept>);
 }
 
 void *

From c1d3ad46746da038cfc66ea5b545d195f511b0f4 Mon Sep 17 00:00:00 2001
From: Arne Welzel <arne.welzel@corelight.com>
Date: Wed, 24 May 2023 21:07:49 +0200
Subject: [PATCH 2293/2608] Prune je_malloc_default and do_rallocx in jeprof

Running a simple Ruby and Python execution je_malloc_default and
do_rallocx() in the resulting SVG / text output. Prune these, too.

    MALLOC_CONF='stats_print:true,lg_prof_sample:8,prof:true,prof_final:true' \
        python3 -c '[x for x in range(10000000)]'

    MALLOC_CONF='stats_print:true,lg_prof_sample:8,prof:true,prof_final:true' \
        ruby -e 'puts (0..1000).map{"0"}.join(" ")'
---
 bin/jeprof.in | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bin/jeprof.in b/bin/jeprof.in
index b734f50b..65f616d4 100644
--- a/bin/jeprof.in
+++ b/bin/jeprof.in
@@ -2955,6 +2955,7 @@ sub RemoveUninterestingFrames {
     foreach my $name ('@JEMALLOC_PREFIX@calloc',
                       'cfree',
                       '@JEMALLOC_PREFIX@malloc',
+                      'je_malloc_default',
                       'newImpl',
                       'void* newImpl',
                       'fallbackNewImpl',
@@ -2968,6 +2969,7 @@ sub RemoveUninterestingFrames {
                       '@JEMALLOC_PREFIX@realloc',
                       '@JEMALLOC_PREFIX@mallocx',
                       '@JEMALLOC_PREFIX@rallocx',
+                      'do_rallocx',
                       '@JEMALLOC_PREFIX@xallocx',
                       '@JEMALLOC_PREFIX@dallocx',
                       '@JEMALLOC_PREFIX@sdallocx',

From 6d4aa33753d1d6fa60925b40e0fd40f1e6a42ef4 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Sat, 27 May 2023 14:21:11 -0700
Subject: [PATCH 2294/2608] Extract the calculation of psset heap assignment
 for an hpdata into a common function

This is in preparation for upcoming changes I plan to make to this
logic. Extracting it into a common function will make this easier and
less error-prone, and cleans up the existing code regardless.
---
 include/jemalloc/internal/hpdata.h |  4 +--
 src/psset.c                        | 47 ++++++++++++------------------
 2 files changed, 21 insertions(+), 30 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 1fb534db..5bf7aae8 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -343,12 +343,12 @@ hpdata_assert_consistent(hpdata_t *hpdata) {
 }
 
 static inline bool
-hpdata_empty(hpdata_t *hpdata) {
+hpdata_empty(const hpdata_t *hpdata) {
 	return hpdata->h_nactive == 0;
 }
 
 static inline bool
-hpdata_full(hpdata_t *hpdata) {
+hpdata_full(const hpdata_t *hpdata) {
 	return hpdata->h_nactive == HUGEPAGE_PAGES;
 }
 
diff --git a/src/psset.c b/src/psset.c
index 9a8f054f..55966816 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -92,8 +92,20 @@ psset_bin_stats_remove(psset_t *psset, psset_bin_stats_t *binstats,
 	psset_bin_stats_insert_remove(psset, binstats, ps, false);
 }
 
+static pszind_t
+psset_hpdata_heap_index(const hpdata_t *ps) {
+	assert(!hpdata_full(ps));
+	assert(!hpdata_empty(ps));
+	size_t longest_free_range = hpdata_longest_free_range_get(ps);
+	pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
+	    longest_free_range << LG_PAGE));
+	assert(pind < PSSET_NPSIZES);
+	return pind;
+}
+
 static void
-psset_hpdata_heap_remove(psset_t *psset, pszind_t pind, hpdata_t *ps) {
+psset_hpdata_heap_remove(psset_t *psset, hpdata_t *ps) {
+	pszind_t pind = psset_hpdata_heap_index(ps);
 	hpdata_age_heap_remove(&psset->pageslabs[pind], ps);
 	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
 		fb_unset(psset->pageslab_bitmap, PSSET_NPSIZES, (size_t)pind);
@@ -101,7 +113,8 @@ psset_hpdata_heap_remove(psset_t *psset, pszind_t pind, hpdata_t *ps) {
 }
 
 static void
-psset_hpdata_heap_insert(psset_t *psset, pszind_t pind, hpdata_t *ps) {
+psset_hpdata_heap_insert(psset_t *psset, hpdata_t *ps) {
+	pszind_t pind = psset_hpdata_heap_index(ps);
 	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
 		fb_set(psset->pageslab_bitmap, PSSET_NPSIZES, (size_t)pind);
 	}
@@ -115,12 +128,7 @@ psset_stats_insert(psset_t* psset, hpdata_t *ps) {
 	} else if (hpdata_full(ps)) {
 		psset_bin_stats_insert(psset, psset->stats.full_slabs, ps);
 	} else {
-		size_t longest_free_range = hpdata_longest_free_range_get(ps);
-
-		pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
-		    longest_free_range << LG_PAGE));
-		assert(pind < PSSET_NPSIZES);
-
+		pszind_t pind = psset_hpdata_heap_index(ps);
 		psset_bin_stats_insert(psset, psset->stats.nonfull_slabs[pind],
 		    ps);
 	}
@@ -133,12 +141,7 @@ psset_stats_remove(psset_t *psset, hpdata_t *ps) {
 	} else if (hpdata_full(ps)) {
 		psset_bin_stats_remove(psset, psset->stats.full_slabs, ps);
 	} else {
-		size_t longest_free_range = hpdata_longest_free_range_get(ps);
-
-		pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
-		    longest_free_range << LG_PAGE));
-		assert(pind < PSSET_NPSIZES);
-
+		pszind_t pind = psset_hpdata_heap_index(ps);
 		psset_bin_stats_remove(psset, psset->stats.nonfull_slabs[pind],
 		    ps);
 	}
@@ -165,13 +168,7 @@ psset_alloc_container_insert(psset_t *psset, hpdata_t *ps) {
 		 * going to return them from a psset_pick_alloc call.
 		 */
 	} else {
-		size_t longest_free_range = hpdata_longest_free_range_get(ps);
-
-		pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
-		    longest_free_range << LG_PAGE));
-		assert(pind < PSSET_NPSIZES);
-
-		psset_hpdata_heap_insert(psset, pind, ps);
+		psset_hpdata_heap_insert(psset, ps);
 	}
 }
 
@@ -186,13 +183,7 @@ psset_alloc_container_remove(psset_t *psset, hpdata_t *ps) {
 	} else if (hpdata_full(ps)) {
 		/* Same as above -- do nothing in this case. */
 	} else {
-		size_t longest_free_range = hpdata_longest_free_range_get(ps);
-
-		pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
-		    longest_free_range << LG_PAGE));
-		assert(pind < PSSET_NPSIZES);
-
-		psset_hpdata_heap_remove(psset, pind, ps);
+		psset_hpdata_heap_remove(psset, ps);
 	}
 }
 

From 5832ef658975d5f2da2bdfddf55712d9fa343e30 Mon Sep 17 00:00:00 2001
From: Christos Zoulas <christos@zoulas.com>
Date: Wed, 31 May 2023 13:00:35 -0400
Subject: [PATCH 2295/2608] Use a local variable to set the alignment for this
 particular allocation instead of changing mmap_flags which makes the change
 permanent. This was enforcing large alignments for allocations that did not
 need it causing fragmentation. Reported by Andreas Gustafsson.

---
 src/pages.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/pages.c b/src/pages.c
index 09b51b88..2d5b8164 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -155,6 +155,7 @@ os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
 	 * of existing mappings, and we only want to create new mappings.
 	 */
 	{
+		int flags = mmap_flags;
 #ifdef __NetBSD__
 		/*
 		 * On NetBSD PAGE for a platform is defined to the
@@ -164,12 +165,12 @@ os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
 		 */
 		if (alignment > os_page || PAGE > os_page) {
 			unsigned int a = ilog2(MAX(alignment, PAGE));
-			mmap_flags |= MAP_ALIGNED(a);
+			flags |= MAP_ALIGNED(a);
 		}
 #endif
 		int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
 
-		ret = mmap(addr, size, prot, mmap_flags, PAGES_FD_TAG, 0);
+		ret = mmap(addr, size, prot, flags, PAGES_FD_TAG, 0);
 	}
 	assert(ret != NULL);
 

From 86eb49b47847e48390c672371987ff4e476e53a3 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 6 Jun 2023 13:37:07 -0700
Subject: [PATCH 2296/2608] Fix the arena selection for oversized allocations.

Use the per-arena oversize_threshold, instead of the global setting.
---
 include/jemalloc/internal/arena_externs.h   |  2 +-
 include/jemalloc/internal/arena_inlines_b.h | 16 ++++++++++------
 src/arena.c                                 |  5 ++++-
 src/jemalloc.c                              |  2 +-
 4 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 9f5c1958..3821233f 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -100,7 +100,7 @@ unsigned arena_nthreads_get(arena_t *arena, bool internal);
 void arena_nthreads_inc(arena_t *arena, bool internal);
 void arena_nthreads_dec(arena_t *arena, bool internal);
 arena_t *arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config);
-bool arena_init_huge(void);
+bool arena_init_huge(arena_t *a0);
 bool arena_is_huge(unsigned arena_ind);
 arena_t *arena_choose_huge(tsd_t *tsd);
 bin_t *arena_bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 609e73d3..b57dbfdd 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -28,14 +28,18 @@ arena_choose_maybe_huge(tsd_t *tsd, arena_t *arena, size_t size) {
 	 * 1) is using auto arena selection (i.e. arena == NULL), and 2) the
 	 * thread is not assigned to a manual arena.
 	 */
-	if (unlikely(size >= oversize_threshold)) {
-		arena_t *tsd_arena = tsd_arena_get(tsd);
-		if (tsd_arena == NULL || arena_is_auto(tsd_arena)) {
-			return arena_choose_huge(tsd);
-		}
+	arena_t *tsd_arena = tsd_arena_get(tsd);
+	if (tsd_arena == NULL) {
+		tsd_arena = arena_choose(tsd, NULL);
 	}
 
-	return arena_choose(tsd, NULL);
+	size_t threshold = atomic_load_zu(
+	    &tsd_arena->pa_shard.pac.oversize_threshold, ATOMIC_RELAXED);
+	if (unlikely(size >= threshold) && arena_is_auto(tsd_arena)) {
+		return arena_choose_huge(tsd);
+	}
+
+	return tsd_arena;
 }
 
 JEMALLOC_ALWAYS_INLINE void
diff --git a/src/arena.c b/src/arena.c
index 9592ab9d..ab1a9ab8 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1770,7 +1770,7 @@ arena_choose_huge(tsd_t *tsd) {
 }
 
 bool
-arena_init_huge(void) {
+arena_init_huge(arena_t *a0) {
 	bool huge_enabled;
 
 	/* The threshold should be large size class. */
@@ -1783,6 +1783,9 @@ arena_init_huge(void) {
 		/* Reserve the index for the huge arena. */
 		huge_arena_ind = narenas_total_get();
 		oversize_threshold = opt_oversize_threshold;
+		/* a0 init happened before malloc_conf_init. */
+		atomic_store_zu(&a0->pa_shard.pac.oversize_threshold,
+		    oversize_threshold, ATOMIC_RELAXED);
 		huge_enabled = true;
 	}
 
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 37cd159c..8a69d81b 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2090,7 +2090,7 @@ malloc_init_narenas(void) {
 		    narenas_auto);
 	}
 	narenas_total_set(narenas_auto);
-	if (arena_init_huge()) {
+	if (arena_init_huge(a0)) {
 		narenas_total_inc();
 	}
 	manual_arena_base = narenas_total_get();

From 90176f8a87a0b5bdb0ac4c1a515b1d9c58dc5a82 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Tue, 9 May 2023 12:06:47 -0700
Subject: [PATCH 2297/2608] Fix segfault in rb `*_tree_remove`

Static analysis flagged this. It's possible to segfault in the
`*_tree_remove` function generated by `rb_gen`, as `nodep` may
still be `NULL` after the initial for loop. I can confirm from reviewing
the fleetwide coredump data that this was in fact being hit in
production, primarily through `tctx_tree_remove`, and much more rarely
through `gctx_tree_remove`.
---
 include/jemalloc/internal/arena_inlines_b.h | 61 +++++++++++----------
 include/jemalloc/internal/rb.h              | 15 +++++
 2 files changed, 48 insertions(+), 28 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index b57dbfdd..11b0ce46 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -42,6 +42,34 @@ arena_choose_maybe_huge(tsd_t *tsd, arena_t *arena, size_t size) {
 	return tsd_arena;
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+large_dalloc_safety_checks(edata_t *edata, const void *ptr, szind_t szind) {
+	if (!config_opt_safety_checks) {
+		return false;
+	}
+
+	/*
+	 * Eagerly detect double free and sized dealloc bugs for large sizes.
+	 * The cost is low enough (as edata will be accessed anyway) to be
+	 * enabled all the time.
+	 */
+	if (unlikely(edata == NULL ||
+	    edata_state_get(edata) != extent_state_active)) {
+		safety_check_fail("Invalid deallocation detected: "
+		    "pages being freed (%p) not currently active, "
+		    "possibly caused by double free bugs.", ptr);
+		return true;
+	}
+	size_t input_size = sz_index2size(szind);
+	if (unlikely(input_size != edata_usize_get(edata))) {
+		safety_check_fail_sized_dealloc(/* current_dealloc */ true, ptr,
+		    /* true_size */ edata_usize_get(edata), input_size);
+		return true;
+	}
+
+	return false;
+}
+
 JEMALLOC_ALWAYS_INLINE void
 arena_prof_info_get(tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx,
     prof_info_t *prof_info, bool reset_recent) {
@@ -65,6 +93,11 @@ arena_prof_info_get(tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx,
 	if (unlikely(!is_slab)) {
 		/* edata must have been initialized at this point. */
 		assert(edata != NULL);
+		if (reset_recent &&
+		    large_dalloc_safety_checks(edata, ptr,
+		    edata_szind_get(edata))) {
+			return;
+		}
 		large_prof_info_get(tsd, edata, prof_info, reset_recent);
 	} else {
 		prof_info->alloc_tctx = (prof_tctx_t *)(uintptr_t)1U;
@@ -215,34 +248,6 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
 	return sz_index2size(full_alloc_ctx.szind);
 }
 
-JEMALLOC_ALWAYS_INLINE bool
-large_dalloc_safety_checks(edata_t *edata, void *ptr, szind_t szind) {
-	if (!config_opt_safety_checks) {
-		return false;
-	}
-
-	/*
-	 * Eagerly detect double free and sized dealloc bugs for large sizes.
-	 * The cost is low enough (as edata will be accessed anyway) to be
-	 * enabled all the time.
-	 */
-	if (unlikely(edata == NULL ||
-	    edata_state_get(edata) != extent_state_active)) {
-		safety_check_fail("Invalid deallocation detected: "
-		    "pages being freed (%p) not currently active, "
-		    "possibly caused by double free bugs.", ptr);
-		return true;
-	}
-	size_t input_size = sz_index2size(szind);
-	if (unlikely(input_size != edata_usize_get(edata))) {
-		safety_check_fail_sized_dealloc(/* current_dealloc */ true, ptr,
-		    /* true_size */ edata_usize_get(edata), input_size);
-		return true;
-	}
-
-	return false;
-}
-
 static inline void
 arena_dalloc_large_no_tcache(tsdn_t *tsdn, void *ptr, szind_t szind) {
 	if (config_prof && unlikely(szind < SC_NBINS)) {
diff --git a/include/jemalloc/internal/rb.h b/include/jemalloc/internal/rb.h
index a9a51cb6..fc1dac7c 100644
--- a/include/jemalloc/internal/rb.h
+++ b/include/jemalloc/internal/rb.h
@@ -560,6 +560,20 @@ a_prefix##reverse_iter_filtered(a_rbt_type *rbtree, a_type *start,	\
  * the same as with the unfiltered version, with the added constraint that the
  * returned node must pass the filter.
  */
+JEMALLOC_ALWAYS_INLINE void
+rb_remove_safety_checks(const void *nodep, const char *function_name) {
+	if (!config_opt_safety_checks) {
+		return;
+	}
+	if (unlikely(nodep == NULL)) {
+		safety_check_fail(
+		    "<jemalloc>: Invalid deallocation detected in %s: "
+		    "attempting to remove node from tree but node was "
+		    "not found. Possibly caused by double free bugs.",
+		    function_name);
+        }
+}
+
 #define rb_gen(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp)	\
     rb_gen_impl(a_attr, a_prefix, a_rbt_type, a_type, a_field, a_cmp,	\
 	rb_empty_summarize, false)
@@ -852,6 +866,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 	    }								\
 	}								\
     }									\
+    rb_remove_safety_checks(nodep, __func__);				\
     assert(nodep->node == node);					\
     pathp--;								\
     if (pathp->node != node) {						\

From 210f0d0b2bb3ed51a83a675c34f09fc36ac686e1 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Thu, 15 Jun 2023 14:47:20 -0700
Subject: [PATCH 2298/2608] Fix read of uninitialized data in `prof_free`

In #2433, I inadvertently introduced a regression which causes the use of
uninitialized data. Namely, the control path I added for the safety
check in `arena_prof_info_get` neglected to set `prof_info->alloc_tctx`
when the check fails, resulting in `prof_info.alloc_tctx` being
uninitialized [when it is read at the end of
`prof_free`](https://github.com/jemalloc/jemalloc/blob/90176f8a87a0b5bdb0ac4c1a515b1d9c58dc5a82/include/jemalloc/internal/prof_inlines.h#L272).
---
 include/jemalloc/internal/arena_inlines_b.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 11b0ce46..bf25a31c 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -96,6 +96,7 @@ arena_prof_info_get(tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx,
 		if (reset_recent &&
 		    large_dalloc_safety_checks(edata, ptr,
 		    edata_szind_get(edata))) {
+			prof_info->alloc_tctx = (prof_tctx_t *)(uintptr_t)1U;
 			return;
 		}
 		large_prof_info_get(tsd, edata, prof_info, reset_recent);

From bb0333e745a71aea0230a09be49a752115d45bb7 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Fri, 12 May 2023 13:17:52 -0700
Subject: [PATCH 2299/2608] Fix remaining static analysis warnings

Fix or suppress the remaining warnings generated by static analysis.
This is a necessary step before we can incorporate static analysis into
CI. Where possible, I've preferred to modify the code itself instead of
just disabling the warning with a magic comment, so that if we decide to
use different static analysis tools in the future we will be covered
against them raising similar warnings.
---
 .../internal/jemalloc_internal_inlines_c.h    |  2 ++
 include/jemalloc/internal/ph.h                |  6 ++++
 include/jemalloc/internal/rb.h                |  1 +
 include/jemalloc/internal/rtree.h             |  5 +++-
 include/jemalloc/internal/witness.h           |  3 ++
 src/background_thread.c                       | 28 +++++++++++++------
 src/ctl.c                                     | 17 ++++++++---
 src/decay.c                                   |  1 +
 src/extent.c                                  |  1 +
 src/jemalloc.c                                |  3 +-
 src/jemalloc_cpp.cpp                          |  2 ++
 src/malloc_io.c                               |  3 +-
 12 files changed, 56 insertions(+), 16 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 719b8eea..206f1400 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -325,6 +325,8 @@ imalloc_fastpath(size_t size, void *(fallback_alloc)(size_t)) {
 	tcache_t *tcache = tsd_tcachep_get(tsd);
 	assert(tcache == tcache_get(tsd));
 	cache_bin_t *bin = &tcache->bins[ind];
+	/* Suppress spurious warning from static analysis */
+	assert(bin != NULL);
 	bool tcache_success;
 	void *ret;
 
diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index c3cf8743..1fabee5d 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -127,6 +127,7 @@ phn_merge_ordered(void *phn0, void *phn1, size_t offset,
 	phn0child = phn_lchild_get(phn0, offset);
 	phn_next_set(phn1, phn0child, offset);
 	if (phn0child != NULL) {
+		/* NOLINTNEXTLINE(readability-suspicious-call-argument) */
 		phn_prev_set(phn0child, phn1, offset);
 	}
 	phn_lchild_set(phn0, phn1, offset);
@@ -143,6 +144,7 @@ phn_merge(void *phn0, void *phn1, size_t offset, ph_cmp_t cmp) {
 		phn_merge_ordered(phn0, phn1, offset, cmp);
 		result = phn0;
 	} else {
+		/* NOLINTNEXTLINE(readability-suspicious-call-argument) */
 		phn_merge_ordered(phn1, phn0, offset, cmp);
 		result = phn1;
 	}
@@ -188,10 +190,12 @@ phn_merge_siblings(void *phn, size_t offset, ph_cmp_t cmp) {
 				phn_prev_set(phn1, NULL, offset);
 				phn_next_set(phn1, NULL, offset);
 				phn0 = phn_merge(phn0, phn1, offset, cmp);
+				/* NOLINTNEXTLINE(readability-suspicious-call-argument) */
 				phn_next_set(tail, phn0, offset);
 				tail = phn0;
 				phn0 = phnrest;
 			} else {
+				/* NOLINTNEXTLINE(readability-suspicious-call-argument) */
 				phn_next_set(tail, phn0, offset);
 				tail = phn0;
 				phn0 = NULL;
@@ -210,6 +214,7 @@ phn_merge_siblings(void *phn, size_t offset, ph_cmp_t cmp) {
 				if (head == NULL) {
 					break;
 				}
+				/* NOLINTNEXTLINE(readability-suspicious-call-argument) */
 				phn_next_set(tail, phn0, offset);
 				tail = phn0;
 				phn0 = head;
@@ -298,6 +303,7 @@ ph_try_aux_merge_pair(ph_t *ph, size_t offset, ph_cmp_t cmp) {
 	phn0 = phn_merge(phn0, phn1, offset, cmp);
 	phn_next_set(phn0, next_phn1, offset);
 	if (next_phn1 != NULL) {
+		/* NOLINTNEXTLINE(readability-suspicious-call-argument) */
 		phn_prev_set(next_phn1, phn0, offset);
 	}
 	phn_next_set(ph->root, phn0, offset);
diff --git a/include/jemalloc/internal/rb.h b/include/jemalloc/internal/rb.h
index fc1dac7c..343e7c13 100644
--- a/include/jemalloc/internal/rb.h
+++ b/include/jemalloc/internal/rb.h
@@ -867,6 +867,7 @@ a_prefix##remove(a_rbt_type *rbtree, a_type *node) {			\
 	}								\
     }									\
     rb_remove_safety_checks(nodep, __func__);				\
+    assert(nodep != NULL);                                              \
     assert(nodep->node == node);					\
     pathp--;								\
     if (pathp->node != node) {						\
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index a00adb29..22f5f9dc 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -268,6 +268,10 @@ rtree_contents_encode(rtree_contents_t contents, void **bits,
     unsigned *additional) {
 #ifdef RTREE_LEAF_COMPACT
 	*bits = (void *)rtree_leaf_elm_bits_encode(contents);
+	/* Suppress spurious warning from static analysis */
+	if (config_debug) {
+		*additional = 0;
+	}
 #else
 	*additional = (unsigned)contents.metadata.slab
 	    | ((unsigned)contents.metadata.is_head << 1)
@@ -299,7 +303,6 @@ rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree,
 	assert((uintptr_t)contents.edata % EDATA_ALIGNMENT == 0);
 	void *bits;
 	unsigned additional;
-
 	rtree_contents_encode(contents, &bits, &additional);
 	rtree_leaf_elm_write_commit(tsdn, rtree, elm, bits, additional);
 }
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index e81b9a00..fbe5f943 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -341,6 +341,9 @@ witness_lock(witness_tsdn_t *witness_tsdn, witness_t *witness) {
 		witness_lock_error(witnesses, witness);
 	}
 
+	/* Suppress spurious warning from static analysis */
+	assert(ql_empty(witnesses) ||
+	    qr_prev(ql_first(witnesses), link) != NULL);
 	ql_elm_new(witness, link);
 	ql_tail_insert(witnesses, witness, link);
 }
diff --git a/src/background_thread.c b/src/background_thread.c
index 1d5bde6c..53b492bb 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -340,8 +340,9 @@ background_thread_create_signals_masked(pthread_t *thread,
 }
 
 static bool
-check_background_thread_creation(tsd_t *tsd, unsigned *n_created,
-    bool *created_threads) {
+check_background_thread_creation(tsd_t *tsd,
+    const size_t const_max_background_threads,
+    unsigned *n_created, bool *created_threads) {
 	bool ret = false;
 	if (likely(*n_created == n_background_threads)) {
 		return ret;
@@ -349,7 +350,7 @@ check_background_thread_creation(tsd_t *tsd, unsigned *n_created,
 
 	tsdn_t *tsdn = tsd_tsdn(tsd);
 	malloc_mutex_unlock(tsdn, &background_thread_info[0].mtx);
-	for (unsigned i = 1; i < max_background_threads; i++) {
+	for (unsigned i = 1; i < const_max_background_threads; i++) {
 		if (created_threads[i]) {
 			continue;
 		}
@@ -391,10 +392,19 @@ check_background_thread_creation(tsd_t *tsd, unsigned *n_created,
 
 static void
 background_thread0_work(tsd_t *tsd) {
-	/* Thread0 is also responsible for launching / terminating threads. */
-	VARIABLE_ARRAY(bool, created_threads, max_background_threads);
+	/*
+	 * Thread0 is also responsible for launching / terminating threads.
+	 * We are guaranteed that `max_background_threads` will not change
+	 * underneath us. Unfortunately static analysis tools do not understand
+	 * this, so we are extracting `max_background_threads` into a local
+	 * variable solely for the sake of exposing this information to such
+	 * tools.
+	 */
+	const size_t const_max_background_threads = max_background_threads;
+	assert(const_max_background_threads > 0);
+	VARIABLE_ARRAY(bool, created_threads, const_max_background_threads);
 	unsigned i;
-	for (i = 1; i < max_background_threads; i++) {
+	for (i = 1; i < const_max_background_threads; i++) {
 		created_threads[i] = false;
 	}
 	/* Start working, and create more threads when asked. */
@@ -404,8 +414,8 @@ background_thread0_work(tsd_t *tsd) {
 		    &background_thread_info[0])) {
 			continue;
 		}
-		if (check_background_thread_creation(tsd, &n_created,
-		    (bool *)&created_threads)) {
+		if (check_background_thread_creation(tsd, const_max_background_threads,
+		    &n_created, (bool *)&created_threads)) {
 			continue;
 		}
 		background_work_sleep_once(tsd_tsdn(tsd),
@@ -417,7 +427,7 @@ background_thread0_work(tsd_t *tsd) {
 	 * the global background_thread mutex (and is waiting) for us.
 	 */
 	assert(!background_thread_enabled());
-	for (i = 1; i < max_background_threads; i++) {
+	for (i = 1; i < const_max_background_threads; i++) {
 		background_thread_info_t *info = &background_thread_info[i];
 		assert(info->state != background_thread_paused);
 		if (created_threads[i]) {
diff --git a/src/ctl.c b/src/ctl.c
index e597b2bb..7d0ab346 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1314,9 +1314,18 @@ ctl_background_thread_stats_read(tsdn_t *tsdn) {
 
 static void
 ctl_refresh(tsdn_t *tsdn) {
-	unsigned i;
+	malloc_mutex_assert_owner(tsdn, &ctl_mtx);
+	/*
+	 * We are guaranteed that `ctl_arenas->narenas` will not change
+	 * underneath us since we hold `ctl_mtx` for the duration of this
+	 * function. Unfortunately static analysis tools do not understand this,
+	 * so we are extracting `narenas` into a local variable solely for the
+	 * sake of exposing this information to such tools.
+	 */
+	const unsigned narenas = ctl_arenas->narenas;
+	assert(narenas > 0);
 	ctl_arena_t *ctl_sarena = arenas_i(MALLCTL_ARENAS_ALL);
-	VARIABLE_ARRAY(arena_t *, tarenas, ctl_arenas->narenas);
+	VARIABLE_ARRAY(arena_t *, tarenas, narenas);
 
 	/*
 	 * Clear sum stats, since they will be merged into by
@@ -1324,11 +1333,11 @@ ctl_refresh(tsdn_t *tsdn) {
 	 */
 	ctl_arena_clear(ctl_sarena);
 
-	for (i = 0; i < ctl_arenas->narenas; i++) {
+	for (unsigned i = 0; i < narenas; i++) {
 		tarenas[i] = arena_get(tsdn, i, false);
 	}
 
-	for (i = 0; i < ctl_arenas->narenas; i++) {
+	for (unsigned i = 0; i < narenas; i++) {
 		ctl_arena_t *ctl_arena = arenas_i(i);
 		bool initialized = (tarenas[i] != NULL);
 
diff --git a/src/decay.c b/src/decay.c
index dd107a34..f75696dd 100644
--- a/src/decay.c
+++ b/src/decay.c
@@ -157,6 +157,7 @@ decay_deadline_reached(const decay_t *decay, const nstime_t *time) {
 uint64_t
 decay_npages_purge_in(decay_t *decay, nstime_t *time, size_t npages_new) {
 	uint64_t decay_interval_ns = decay_epoch_duration_ns(decay);
+	assert(decay_interval_ns != 0);
 	size_t n_epoch = (size_t)(nstime_ns(time) / decay_interval_ns);
 
 	uint64_t npages_purge;
diff --git a/src/extent.c b/src/extent.c
index fdcd0afb..18e4698c 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -407,6 +407,7 @@ extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		edata = emap_try_acquire_edata_neighbor_expand(tsdn, pac->emap,
 		    expand_edata, EXTENT_PAI_PAC, ecache->state);
 		if (edata != NULL) {
+			/* NOLINTNEXTLINE(readability-suspicious-call-argument) */
 			extent_assert_can_expand(expand_edata, edata);
 			if (edata_size_get(edata) < size) {
 				emap_release_edata(tsdn, pac->emap, edata,
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 8a69d81b..88559be0 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -991,7 +991,8 @@ obtain_malloc_conf(unsigned which_source, char buf[PATH_MAX + 1]) {
 		 * Each source should only be read once, to minimize # of
 		 * syscalls on init.
 		 */
-		assert(read_source++ == which_source);
+		assert(read_source == which_source);
+		read_source++;
 	}
 	assert(which_source < MALLOC_CONF_NSOURCES);
 
diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index 4258b1ad..44569c14 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -1,5 +1,6 @@
 #include <mutex>
 #include <new>
+// NOLINTBEGIN(misc-use-anonymous-namespace)
 
 #define JEMALLOC_CPP_CPP_
 #ifdef __cplusplus
@@ -258,3 +259,4 @@ operator delete[](void* ptr, std::size_t size, std::align_val_t alignment) noexc
 }
 
 #endif  // __cpp_aligned_new
+// NOLINTEND(misc-use-anonymous-namespace)
diff --git a/src/malloc_io.c b/src/malloc_io.c
index 6de409b3..192d8208 100644
--- a/src/malloc_io.c
+++ b/src/malloc_io.c
@@ -316,7 +316,8 @@ x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, size_t *slen_p) {
 	if (alt_form) {
 		s -= 2;
 		(*slen_p) += 2;
-		memcpy(s, uppercase ? "0X" : "0x", 2);
+		s[0] = '0';
+		s[1] = uppercase ? 'X' : 'x';
 	}
 	return s;
 }

From 05385191d4ba42eb219141503a42c648722a8d4f Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Wed, 17 May 2023 17:00:10 -0700
Subject: [PATCH 2300/2608] Add GitHub action which runs static analysis

Now that all of the various issues that static analysis uncovered have
been fixed (#2431, #2432, #2433, #2436, #2437, #2446), I've added a
GitHub action which will run static analysis for every PR going forward.
When static analysis detects issues with your code, the GitHub action
provides a link to download its findings in a form tailored for human
consumption.

Take a look at [this demonstration of what it looks like when static
analysis issues are
found](https://github.com/Svetlitski/jemalloc/actions/runs/5010245602)
on my fork for an example (make sure to follow the instructions in the
error message to download and inspect the results).
---
 .github/workflows/static_analysis.yaml | 68 ++++++++++++++++++++++++++
 .gitignore                             |  4 ++
 scripts/run_static_analysis.sh         | 52 ++++++++++++++++++++
 3 files changed, 124 insertions(+)
 create mode 100644 .github/workflows/static_analysis.yaml
 create mode 100755 scripts/run_static_analysis.sh

diff --git a/.github/workflows/static_analysis.yaml b/.github/workflows/static_analysis.yaml
new file mode 100644
index 00000000..547b1564
--- /dev/null
+++ b/.github/workflows/static_analysis.yaml
@@ -0,0 +1,68 @@
+name: 'Static Analysis'
+on: [pull_request]
+jobs:
+  static-analysis:
+    runs-on: ubuntu-latest
+    steps:
+      # We build libunwind ourselves because sadly the version
+      # provided by Ubuntu via apt-get is much too old.
+      - name: Check out libunwind
+        uses: actions/checkout@v3
+        with:
+          repository: libunwind/libunwind
+          path: libunwind
+          ref: 'v1.6.2'
+          github-server-url: 'https://github.com'
+      - name: Install libunwind
+        run: |
+          cd libunwind
+          autoreconf -i
+          ./configure --prefix=/usr
+          make -s -j $(nproc) V=0
+          sudo make -s install V=0
+          cd ..
+          rm -rf libunwind
+      - name: Check out repository
+        uses: actions/checkout@v3
+      # We download LLVM directly from the latest stable release
+      # on GitHub, because this tends to be much newer than the
+      # version available via apt-get in Ubuntu.
+      - name: Download LLVM
+        uses: dsaltares/fetch-gh-release-asset@master
+        with:
+          repo: 'llvm/llvm-project'
+          version: 'latest'
+          file: 'clang[+]llvm-.*x86_64-linux-gnu.*'
+          regex: true
+          target: 'llvm_assets/'
+          token: ${{ secrets.GITHUB_TOKEN }}
+      - name: Install prerequisites
+        id: install_prerequisites
+        run: |
+          tar -C llvm_assets -xaf llvm_assets/*.tar* &
+          sudo apt-get update
+          sudo apt-get install -y jq bear python3-pip
+          pip install codechecker
+          echo "Extracting LLVM from tar" 1>&2
+          wait
+          echo "LLVM_BIN_DIR=$(echo llvm_assets/clang*/bin)" >> "$GITHUB_OUTPUT"
+      - name: Run static analysis
+        id: run_static_analysis
+        run: >
+          PATH="${{ steps.install_prerequisites.outputs.LLVM_BIN_DIR }}:$PATH"
+          LDFLAGS='-L/usr/lib'
+          scripts/run_static_analysis.sh static_analysis_results "$GITHUB_OUTPUT"
+      - name: Upload static analysis results
+        if: ${{ steps.run_static_analysis.outputs.HAS_STATIC_ANALYSIS_RESULTS }} == '1'
+        uses: actions/upload-artifact@v3
+        with:
+          name: static_analysis_results
+          path: static_analysis_results
+      - name: Check static analysis results
+        run: |
+          if [[ "${{ steps.run_static_analysis.outputs.HAS_STATIC_ANALYSIS_RESULTS }}" == '1' ]]
+          then
+              echo "::error::Static analysis found issues with your code. Download the 'static_analysis_results' artifact from this workflow and view the 'index.html' file contained within it in a web browser locally for detailed results."
+              exit 1
+          fi
+
diff --git a/.gitignore b/.gitignore
index 1c0b3385..0f5e7aae 100644
--- a/.gitignore
+++ b/.gitignore
@@ -45,6 +45,10 @@
 /src/*.[od]
 /src/*.sym
 
+compile_commands.json
+/static_analysis_raw_results
+/static_analysis_results
+
 /run_tests.out/
 
 /test/test.sh
diff --git a/scripts/run_static_analysis.sh b/scripts/run_static_analysis.sh
new file mode 100755
index 00000000..db870689
--- /dev/null
+++ b/scripts/run_static_analysis.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+git clean -Xfd
+
+export CC='clang'
+export CXX='clang++'
+compile_time_malloc_conf='background_thread:true,'\
+'metadata_thp:auto,'\
+'abort_conf:true,'\
+'muzzy_decay_ms:0,'\
+'zero_realloc:free,'\
+'prof_unbias:false,'\
+'prof_time_resolution:high'
+
+./autogen.sh \
+	--with-private-namespace=jemalloc_ \
+	--disable-cache-oblivious \
+	--enable-prof \
+	--enable-prof-libunwind \
+	--with-malloc-conf="$compile_time_malloc_conf" \
+	--enable-readlinkat \
+	--enable-opt-safety-checks \
+	--enable-uaf-detection \
+	--enable-force-getenv \
+	--enable-debug # Enabling debug for static analysis is important,
+	               # otherwise you'll get tons of warnings for things
+	               # that are already covered by `assert`s.
+
+bear -- make -s -j $(nproc)
+# We end up with lots of duplicate entries in the compilation database, one for
+# each output file type (e.g. .o, .d, .sym, etc.). There must be exactly one
+# entry for each file in the compilation database in order for
+# cross-translation-unit analysis to work, so we deduplicate the database here.
+jq '[.[] | select(.output | test("/[^./]*\\.o$"))]' compile_commands.json > compile_commands.json.tmp
+mv compile_commands.json.tmp compile_commands.json
+
+CC_ANALYZERS_FROM_PATH=1 CodeChecker analyze compile_commands.json --jobs $(nproc) \
+	--ctu --compile-uniqueing strict --output static_analysis_raw_results \
+	--analyzers clang-tidy clangsa
+
+html_output_dir="${1:-static_analysis_results}"
+result=${2:-/dev/null}
+# We're echoing a value because we want to indicate whether or not any errors
+# were found, but we always want the script to have a successful exit code so
+# that we actually reach the step in the GitHub action where we upload the results.
+if CodeChecker parse --export html --output "$html_output_dir" static_analysis_raw_results
+then
+	echo "HAS_STATIC_ANALYSIS_RESULTS=0" >> "$result"
+else
+	echo "HAS_STATIC_ANALYSIS_RESULTS=1" >> "$result"
+fi

From f2e00d2fd3e56e6599f889ee09d5c41ed4012015 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Fri, 28 Apr 2023 12:29:49 -0700
Subject: [PATCH 2301/2608] Remove trailing whitespace

Additionally, added a GitHub Action to ensure no more trailing
whitespace will creep in again in the future.

I'm excluding Markdown files from this check, since trailing whitespace
is significant there, and also excluding `build-aux/install-sh` because
there is significant trailing whitespace on the line that sets
`defaultIFS`.
---
 .github/workflows/check_formatting.yaml        | 10 ++++++++++
 bin/jeprof.in                                  | 12 ++++++------
 build-aux/install-sh                           | 14 +++++++-------
 configure.ac                                   |  2 +-
 .../internal/jemalloc_internal_defs.h.in       |  6 +++---
 include/msvc_compat/C99/stdint.h               | 18 +++++++++---------
 scripts/check_trailing_whitespace.sh           |  7 +++++++
 test/include/test/SFMT-params.h                | 18 +++++++++---------
 test/include/test/SFMT.h                       | 14 +++++++-------
 test/include/test/arena_util.h                 |  2 +-
 test/src/timer.c                               |  8 ++++----
 test/stress/cpp/microbench.cpp                 | 12 ++++++------
 12 files changed, 70 insertions(+), 53 deletions(-)
 create mode 100644 .github/workflows/check_formatting.yaml
 create mode 100755 scripts/check_trailing_whitespace.sh

diff --git a/.github/workflows/check_formatting.yaml b/.github/workflows/check_formatting.yaml
new file mode 100644
index 00000000..f7be77b1
--- /dev/null
+++ b/.github/workflows/check_formatting.yaml
@@ -0,0 +1,10 @@
+name: 'Check Formatting'
+on: [pull_request]
+jobs:
+  check-formatting:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out repository
+        uses: actions/checkout@v3
+      - name: Check for trailing whitespace
+        run: scripts/check_trailing_whitespace.sh
diff --git a/bin/jeprof.in b/bin/jeprof.in
index 65f616d4..f02c1f3e 100644
--- a/bin/jeprof.in
+++ b/bin/jeprof.in
@@ -4504,19 +4504,19 @@ sub FindLibrary {
 # For libc libraries, the copy in /usr/lib/debug contains debugging symbols
 sub DebuggingLibrary {
   my $file = shift;
-      
+
   if ($file !~ m|^/|) {
     return undef;
   }
-      
+
   # Find debug symbol file if it's named after the library's name.
-  
-  if (-f "/usr/lib/debug$file") {                 
+
+  if (-f "/usr/lib/debug$file") {
     if($main::opt_debug) { print STDERR "found debug info for $file in /usr/lib/debug$file\n"; }
     return "/usr/lib/debug$file";
   } elsif (-f "/usr/lib/debug$file.debug") {
     if($main::opt_debug) { print STDERR "found debug info for $file in /usr/lib/debug$file.debug\n"; }
-    return "/usr/lib/debug$file.debug"; 
+    return "/usr/lib/debug$file.debug";
   }
 
   if(!$main::opt_debug_syms_by_id) {
@@ -4525,7 +4525,7 @@ sub DebuggingLibrary {
   }
 
   # Find debug file if it's named after the library's build ID.
-  
+
   my $readelf = '';
   if (!$main::gave_up_on_elfutils) {
     $readelf = qx/eu-readelf -n ${file}/;
diff --git a/build-aux/install-sh b/build-aux/install-sh
index ebc66913..b44de098 100755
--- a/build-aux/install-sh
+++ b/build-aux/install-sh
@@ -115,7 +115,7 @@ fi
 if [ x"$dir_arg" != x ]; then
 	dst=$src
 	src=""
-	
+
 	if [ -d $dst ]; then
 		instcmd=:
 	else
@@ -124,7 +124,7 @@ if [ x"$dir_arg" != x ]; then
 else
 
 # Waiting for this to be detected by the "$instcmd $src $dsttmp" command
-# might cause directories to be created, which would be especially bad 
+# might cause directories to be created, which would be especially bad
 # if $src (and thus $dsttmp) contains '*'.
 
 	if [ -f $src -o -d $src ]
@@ -134,7 +134,7 @@ else
 		echo "install:  $src does not exist"
 		exit 1
 	fi
-	
+
 	if [ x"$dst" = x ]
 	then
 		echo "install:	no destination specified"
@@ -201,17 +201,17 @@ else
 
 # If we're going to rename the final executable, determine the name now.
 
-	if [ x"$transformarg" = x ] 
+	if [ x"$transformarg" = x ]
 	then
 		dstfile=`basename $dst`
 	else
-		dstfile=`basename $dst $transformbasename | 
+		dstfile=`basename $dst $transformbasename |
 			sed $transformarg`$transformbasename
 	fi
 
 # don't allow the sed command to completely eliminate the filename
 
-	if [ x"$dstfile" = x ] 
+	if [ x"$dstfile" = x ]
 	then
 		dstfile=`basename $dst`
 	else
@@ -242,7 +242,7 @@ else
 # Now rename the file to the real destination.
 
 	$doit $rmcmd -f $dstdir/$dstfile &&
-	$doit $mvcmd $dsttmp $dstdir/$dstfile 
+	$doit $mvcmd $dsttmp $dstdir/$dstfile
 
 fi &&
 
diff --git a/configure.ac b/configure.ac
index 5b18fd34..687b221c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2501,7 +2501,7 @@ AC_SUBST([enable_initial_exec_tls])
 if test "x${je_cv_tls_model}" = "xyes" -a \
        "x${enable_initial_exec_tls}" = "x1" ; then
   AC_DEFINE([JEMALLOC_TLS_MODEL],
-            [__attribute__((tls_model("initial-exec")))], 
+            [__attribute__((tls_model("initial-exec")))],
             [ ])
 else
   AC_DEFINE([JEMALLOC_TLS_MODEL], [ ], [ ])
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index bef99dea..cce638d3 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -446,9 +446,9 @@
 /* If defined, use volatile asm during benchmarks. */
 #undef JEMALLOC_HAVE_ASM_VOLATILE
 
-/* 
- * If defined, support the use of rdtscp to get the time stamp counter 
- * and the processor ID. 
+/*
+ * If defined, support the use of rdtscp to get the time stamp counter
+ * and the processor ID.
  */
 #undef JEMALLOC_HAVE_RDTSCP
 
diff --git a/include/msvc_compat/C99/stdint.h b/include/msvc_compat/C99/stdint.h
index d02608a5..c66fbb81 100644
--- a/include/msvc_compat/C99/stdint.h
+++ b/include/msvc_compat/C99/stdint.h
@@ -1,32 +1,32 @@
 // ISO C9x  compliant stdint.h for Microsoft Visual Studio
-// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 
-// 
+// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124
+//
 //  Copyright (c) 2006-2008 Alexander Chemeris
-// 
+//
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are met:
-// 
+//
 //   1. Redistributions of source code must retain the above copyright notice,
 //      this list of conditions and the following disclaimer.
-// 
+//
 //   2. Redistributions in binary form must reproduce the above copyright
 //      notice, this list of conditions and the following disclaimer in the
 //      documentation and/or other materials provided with the distribution.
-// 
+//
 //   3. The name of the author may be used to endorse or promote products
 //      derived from this software without specific prior written permission.
-// 
+//
 // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
 // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
 // EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
-// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
+// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-// 
+//
 ///////////////////////////////////////////////////////////////////////////////
 
 #ifndef _MSC_VER // [
diff --git a/scripts/check_trailing_whitespace.sh b/scripts/check_trailing_whitespace.sh
new file mode 100755
index 00000000..7aafe131
--- /dev/null
+++ b/scripts/check_trailing_whitespace.sh
@@ -0,0 +1,7 @@
+#!/usr/bin/env bash
+
+if git grep -E '\s+$' -- ':!*.md' ':!build-aux/install-sh'
+then
+	echo 'Error: found trailing whitespace' 1>&2
+	exit 1
+fi
diff --git a/test/include/test/SFMT-params.h b/test/include/test/SFMT-params.h
index ade66222..6730adf8 100644
--- a/test/include/test/SFMT-params.h
+++ b/test/include/test/SFMT-params.h
@@ -45,7 +45,7 @@
 /*-----------------
   BASIC DEFINITIONS
   -----------------*/
-/** Mersenne Exponent. The period of the sequence 
+/** Mersenne Exponent. The period of the sequence
  *  is a multiple of 2^MEXP-1.
  * #define MEXP 19937 */
 /** SFMT generator has an internal state array of 128-bit integers,
@@ -63,25 +63,25 @@
   following definitions are in paramsXXXX.h file.
   ----------------------*/
 /** the pick up position of the array.
-#define POS1 122 
+#define POS1 122
 */
 
 /** the parameter of shift left as four 32-bit registers.
 #define SL1 18
  */
 
-/** the parameter of shift left as one 128-bit register. 
- * The 128-bit integer is shifted by (SL2 * 8) bits. 
-#define SL2 1 
+/** the parameter of shift left as one 128-bit register.
+ * The 128-bit integer is shifted by (SL2 * 8) bits.
+#define SL2 1
 */
 
 /** the parameter of shift right as four 32-bit registers.
 #define SR1 11
 */
 
-/** the parameter of shift right as one 128-bit register. 
- * The 128-bit integer is shifted by (SL2 * 8) bits. 
-#define SR2 1 
+/** the parameter of shift right as one 128-bit register.
+ * The 128-bit integer is shifted by (SL2 * 8) bits.
+#define SR2 1
 */
 
 /** A bitmask, used in the recursion.  These parameters are introduced
@@ -89,7 +89,7 @@
 #define MSK1 0xdfffffefU
 #define MSK2 0xddfecb7fU
 #define MSK3 0xbffaffffU
-#define MSK4 0xbffffff6U 
+#define MSK4 0xbffffff6U
 */
 
 /** These definitions are part of a 128-bit period certification vector.
diff --git a/test/include/test/SFMT.h b/test/include/test/SFMT.h
index 863fc55e..338dd45c 100644
--- a/test/include/test/SFMT.h
+++ b/test/include/test/SFMT.h
@@ -33,8 +33,8 @@
  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
-/** 
- * @file SFMT.h 
+/**
+ * @file SFMT.h
  *
  * @brief SIMD oriented Fast Mersenne Twister(SFMT) pseudorandom
  * number generator
@@ -53,7 +53,7 @@
  * and you have to define PRIu64 and PRIx64 in this file as follows:
  * @verbatim
  typedef unsigned int uint32_t
- typedef unsigned long long uint64_t  
+ typedef unsigned long long uint64_t
  #define PRIu64 "llu"
  #define PRIx64 "llx"
 @endverbatim
@@ -84,8 +84,8 @@ int get_min_array_size64(void);
 /* These real versions are due to Isaku Wada */
 /** generates a random number on [0,1]-real-interval */
 static inline double to_real1(uint32_t v) {
-    return v * (1.0/4294967295.0); 
-    /* divided by 2^32-1 */ 
+    return v * (1.0/4294967295.0);
+    /* divided by 2^32-1 */
 }
 
 /** generates a random number on [0,1]-real-interval */
@@ -95,7 +95,7 @@ static inline double genrand_real1(sfmt_t *ctx) {
 
 /** generates a random number on [0,1)-real-interval */
 static inline double to_real2(uint32_t v) {
-    return v * (1.0/4294967296.0); 
+    return v * (1.0/4294967296.0);
     /* divided by 2^32 */
 }
 
@@ -106,7 +106,7 @@ static inline double genrand_real2(sfmt_t *ctx) {
 
 /** generates a random number on (0,1)-real-interval */
 static inline double to_real3(uint32_t v) {
-    return (((double)v) + 0.5)*(1.0/4294967296.0); 
+    return (((double)v) + 0.5)*(1.0/4294967296.0);
     /* divided by 2^32 */
 }
 
diff --git a/test/include/test/arena_util.h b/test/include/test/arena_util.h
index 9a41dacb..535c1aa1 100644
--- a/test/include/test/arena_util.h
+++ b/test/include/test/arena_util.h
@@ -26,7 +26,7 @@ do_arena_create(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 
 static inline void
 do_arena_destroy(unsigned arena_ind) {
-	/* 
+	/*
 	 * For convenience, flush tcache in case there are cached items.
 	 * However not assert success since the tcache may be disabled.
 	 */
diff --git a/test/src/timer.c b/test/src/timer.c
index 0f39d5f6..94528a34 100644
--- a/test/src/timer.c
+++ b/test/src/timer.c
@@ -28,12 +28,12 @@ timer_ratio(timedelta_t *a, timedelta_t *b, char *buf, size_t buflen) {
 	size_t i = 0;
 	size_t j, n;
 
-	/* 
- 	* The time difference could be 0 if the two clock readings are 
+	/*
+ 	* The time difference could be 0 if the two clock readings are
  	* identical, either due to the operations being measured in the middle
- 	* took very little time (or even got optimized away), or the clock 
+ 	* took very little time (or even got optimized away), or the clock
  	* readings are bad / very coarse grained clock.
- 	* Thus, bump t1 if it is 0 to avoid dividing 0. 
+ 	* Thus, bump t1 if it is 0 to avoid dividing 0.
  	*/
 	if (t1 == 0) {
 	    t1 = 1;
diff --git a/test/stress/cpp/microbench.cpp b/test/stress/cpp/microbench.cpp
index 203c3dc9..7422d1ca 100644
--- a/test/stress/cpp/microbench.cpp
+++ b/test/stress/cpp/microbench.cpp
@@ -52,15 +52,15 @@ malloc_sdallocx(void) {
 #endif
 
 TEST_BEGIN(test_free_vs_delete) {
-	compare_funcs(10*1000*1000, 100*1000*1000, 
-	    "malloc_free", (void *)malloc_free, 
+	compare_funcs(10*1000*1000, 100*1000*1000,
+	    "malloc_free", (void *)malloc_free,
 	    "new_delete", (void *)new_delete);
 }
 TEST_END
 
 TEST_BEGIN(test_free_array_vs_delete_array) {
-	compare_funcs(10*1000*1000, 100*1000*1000, 
-	    "malloc_free_array", (void *)malloc_free_array, 
+	compare_funcs(10*1000*1000, 100*1000*1000,
+	    "malloc_free_array", (void *)malloc_free_array,
 	    "delete_array", (void *)new_delete_array);
 }
 TEST_END
@@ -68,8 +68,8 @@ TEST_END
 
 TEST_BEGIN(test_sized_delete_vs_sdallocx) {
 #if __cpp_sized_deallocation >= 201309
-	compare_funcs(10*1000*1000, 100*1000*1000, 
-	    "new_size_delete", (void *)new_sized_delete, 
+	compare_funcs(10*1000*1000, 100*1000*1000,
+	    "new_size_delete", (void *)new_sized_delete,
 	    "malloc_sdallocx", (void *)malloc_sdallocx);
 #else
 	malloc_printf("Skipping test_sized_delete_vs_sdallocx since \

From 46e464a26bcf83c414db489c23236663ee570260 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Fri, 23 Jun 2023 13:30:16 -0700
Subject: [PATCH 2302/2608] Fix downloading LLVM in GitHub Action

It turns out LLVM does not include a build for every platform in the
assets for every release, just some of them. As such, I've pinned us to
the latest release version with a corresponding build.
---
 .github/workflows/static_analysis.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/static_analysis.yaml b/.github/workflows/static_analysis.yaml
index 547b1564..df60b5a1 100644
--- a/.github/workflows/static_analysis.yaml
+++ b/.github/workflows/static_analysis.yaml
@@ -31,7 +31,7 @@ jobs:
         uses: dsaltares/fetch-gh-release-asset@master
         with:
           repo: 'llvm/llvm-project'
-          version: 'latest'
+          version: 'tags/llvmorg-16.0.4'
           file: 'clang[+]llvm-.*x86_64-linux-gnu.*'
           regex: true
           target: 'llvm_assets/'

From d1313313101f9df127bba08bf8fd90a849bf3b87 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 23 Jun 2023 14:13:26 -0700
Subject: [PATCH 2303/2608] Avoid eager purging on the dedicated oversize arena
 when using bg thds.

We have observed new workload patterns (namely ML training type) that cycle
through oversized allocations frequently, because 1) the dataset might be sparse
which is faster to go through, and 2) GPU accelerated.  As a result, the eager
purging from the oversize arena becomes a bottleneck.  To offer an easy
solution, allow normal purging of the oversized extents when background threads
are enabled.
---
 src/arena.c                    | 61 +++++++++++++++++++++-------------
 src/extent.c                   |  1 +
 test/unit/oversize_threshold.c |  5 ++-
 3 files changed, 42 insertions(+), 25 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index ab1a9ab8..3b151b77 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1730,6 +1730,42 @@ label_error:
 	return NULL;
 }
 
+static arena_t *
+arena_create_huge_arena(tsd_t *tsd, unsigned ind) {
+	assert(ind != 0);
+
+	arena_t *huge_arena = arena_get(tsd_tsdn(tsd), ind, true);
+	if (huge_arena == NULL) {
+		return NULL;
+	}
+
+	char *huge_arena_name = "auto_oversize";
+	strncpy(huge_arena->name, huge_arena_name, ARENA_NAME_LEN);
+	huge_arena->name[ARENA_NAME_LEN - 1] = '\0';
+
+	/*
+	 * Purge eagerly for huge allocations, because: 1) number of huge
+	 * allocations is usually small, which means ticker based decay is not
+	 * reliable; and 2) less immediate reuse is expected for huge
+	 * allocations.
+	 *
+	 * However, with background threads enabled, keep normal purging since
+	 * the purging delay is bounded.
+	 */
+	if (!background_thread_enabled()
+	    && arena_dirty_decay_ms_default_get() > 0) {
+		arena_decay_ms_set(tsd_tsdn(tsd), huge_arena,
+		    extent_state_dirty, 0);
+	}
+	if (!background_thread_enabled()
+	    &&arena_muzzy_decay_ms_default_get() > 0) {
+		arena_decay_ms_set(tsd_tsdn(tsd), huge_arena,
+		    extent_state_muzzy, 0);
+	}
+
+	return huge_arena;
+}
+
 arena_t *
 arena_choose_huge(tsd_t *tsd) {
 	/* huge_arena_ind can be 0 during init (will use a0). */
@@ -1740,30 +1776,7 @@ arena_choose_huge(tsd_t *tsd) {
 	arena_t *huge_arena = arena_get(tsd_tsdn(tsd), huge_arena_ind, false);
 	if (huge_arena == NULL) {
 		/* Create the huge arena on demand. */
-		assert(huge_arena_ind != 0);
-		huge_arena = arena_get(tsd_tsdn(tsd), huge_arena_ind, true);
-		if (huge_arena == NULL) {
-			return NULL;
-		}
-
-		char *huge_arena_name = "auto_oversize";
-		strncpy(huge_arena->name, huge_arena_name, ARENA_NAME_LEN);
-		huge_arena->name[ARENA_NAME_LEN - 1] = '\0';
-
-		/*
-		 * Purge eagerly for huge allocations, because: 1) number of
-		 * huge allocations is usually small, which means ticker based
-		 * decay is not reliable; and 2) less immediate reuse is
-		 * expected for huge allocations.
-		 */
-		if (arena_dirty_decay_ms_default_get() > 0) {
-			arena_decay_ms_set(tsd_tsdn(tsd), huge_arena,
-			    extent_state_dirty, 0);
-		}
-		if (arena_muzzy_decay_ms_default_get() > 0) {
-			arena_decay_ms_set(tsd_tsdn(tsd), huge_arena,
-			    extent_state_muzzy, 0);
-		}
+		huge_arena = arena_create_huge_arena(tsd, huge_arena_ind);
 	}
 
 	return huge_arena;
diff --git a/src/extent.c b/src/extent.c
index 18e4698c..477050b6 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -944,6 +944,7 @@ extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 		} while (coalesced);
 		if (edata_size_get(edata) >=
 		    atomic_load_zu(&pac->oversize_threshold, ATOMIC_RELAXED)
+		    && !background_thread_enabled()
 		    && extent_may_force_decay(pac)) {
 			/* Shortcut to purge the oversize extent eagerly. */
 			malloc_mutex_unlock(tsdn, &ecache->mtx);
diff --git a/test/unit/oversize_threshold.c b/test/unit/oversize_threshold.c
index 44a8f76a..95ce6537 100644
--- a/test/unit/oversize_threshold.c
+++ b/test/unit/oversize_threshold.c
@@ -120,7 +120,10 @@ TEST_BEGIN(test_oversize_threshold) {
 	 */
 	ptr = mallocx(2 * 1024 * 1024, MALLOCX_ARENA(arena));
 	dallocx(ptr, MALLOCX_TCACHE_NONE);
-	expect_zu_ge(max_purged, 2 * 1024 * 1024, "Expected a 2MB purge");
+	if (!is_background_thread_enabled()) {
+		expect_zu_ge(max_purged, 2 * 1024 * 1024,
+		    "Expected a 2MB purge");
+	}
 }
 TEST_END
 

From e1338703efb77f7d276ee65121fa63bb66ede239 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Mon, 3 Jul 2023 14:45:28 -0700
Subject: [PATCH 2304/2608] Address compiler warnings in the unit tests

---
 test/unit/hpa.c      | 2 ++
 test/unit/retained.c | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index dfd57f39..f7874281 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -32,6 +32,8 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
 	false,
 	/* hugify_delay_ms */
 	10 * 1000,
+	/* min_purge_interval_ms */
+	5 * 1000,
 };
 
 static hpa_shard_t *
diff --git a/test/unit/retained.c b/test/unit/retained.c
index aa9f6847..340f2d38 100644
--- a/test/unit/retained.c
+++ b/test/unit/retained.c
@@ -144,7 +144,6 @@ TEST_BEGIN(test_retained) {
 
 		arena_t *arena = arena_get(tsdn_fetch(), arena_ind, false);
 		size_t usable = 0;
-		size_t fragmented = 0;
 		for (pszind_t pind = sz_psz2ind(HUGEPAGE); pind <
 		    arena->pa_shard.pac.exp_grow.next; pind++) {
 			size_t psz = sz_pind2sz(pind);
@@ -158,7 +157,6 @@ TEST_BEGIN(test_retained) {
 				    "Excessive retained memory "
 				    "(%#zx[+%#zx] > %#zx)", usable, psz_usable,
 				    allocated);
-				fragmented += psz_fragmented;
 				usable += psz_usable;
 			}
 		}

From 5a858c64d6f049c64c11baf907ab8655e6ed72a3 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Fri, 2 Jun 2023 15:15:37 -0700
Subject: [PATCH 2305/2608] Reduce the memory overhead of sampled small
 allocations

Previously, small allocations which were sampled as part of heap
profiling were rounded up to `SC_LARGE_MINCLASS`. This additional memory
usage becomes problematic when the page size is increased, as noted in #2358.

Small allocations are now rounded up to the nearest multiple of `PAGE`
instead, reducing the memory overhead by a factor of 4 in the most
extreme cases.
---
 include/jemalloc/internal/arena_externs.h     |  9 +--
 include/jemalloc/internal/arena_inlines_b.h   | 11 ++-
 .../internal/jemalloc_internal_inlines_c.h    | 69 +++++++++++++-----
 include/jemalloc/internal/pages.h             |  3 +
 include/jemalloc/internal/prof_inlines.h      |  9 +--
 include/jemalloc/internal/prof_types.h        |  8 +++
 include/jemalloc/internal/safety_check.h      | 40 +++++++++--
 include/jemalloc/internal/sz.h                | 15 ++++
 include/jemalloc/internal/tcache_inlines.h    |  2 +-
 src/arena.c                                   | 64 +++++++++--------
 src/jemalloc.c                                | 72 ++++++++++---------
 src/pages.c                                   |  2 +-
 12 files changed, 206 insertions(+), 98 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 3821233f..2d82ad8f 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -65,10 +65,11 @@ void arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
     const unsigned nfill);
 
 void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
-    szind_t ind, bool zero);
+    szind_t ind, bool zero, bool slab);
 void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
-    size_t alignment, bool zero, tcache_t *tcache);
-void arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize);
+    size_t alignment, bool zero, bool slab, tcache_t *tcache);
+void arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize,
+    size_t bumped_usize);
 void arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
     bool slow_path);
 void arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab);
@@ -81,7 +82,7 @@ void arena_dalloc_small(tsdn_t *tsdn, void *ptr);
 bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t extra, bool zero, size_t *newsize);
 void *arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
-    size_t size, size_t alignment, bool zero, tcache_t *tcache,
+    size_t size, size_t alignment, bool zero, bool slab, tcache_t *tcache,
     hook_ralloc_args_t *hook_args);
 dss_prec_t arena_dss_prec_get(arena_t *arena);
 ehooks_t *arena_get_ehooks(arena_t *arena);
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index bf25a31c..420a62b2 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -182,23 +182,22 @@ arena_decay_tick(tsdn_t *tsdn, arena_t *arena) {
 
 JEMALLOC_ALWAYS_INLINE void *
 arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
-    tcache_t *tcache, bool slow_path) {
+    bool slab, tcache_t *tcache, bool slow_path) {
 	assert(!tsdn_null(tsdn) || tcache == NULL);
 
 	if (likely(tcache != NULL)) {
-		if (likely(size <= SC_SMALL_MAXCLASS)) {
+		if (likely(slab)) {
+			assert(sz_can_use_slab(size));
 			return tcache_alloc_small(tsdn_tsd(tsdn), arena,
 			    tcache, size, ind, zero, slow_path);
-		}
-		if (likely(size <= tcache_maxclass)) {
+		} else if (likely(size <= tcache_maxclass)) {
 			return tcache_alloc_large(tsdn_tsd(tsdn), arena,
 			    tcache, size, ind, zero, slow_path);
 		}
 		/* (size > tcache_maxclass) case falls through. */
-		assert(size > tcache_maxclass);
 	}
 
-	return arena_malloc_hard(tsdn, arena, size, ind, zero);
+	return arena_malloc_hard(tsdn, arena, size, ind, zero, slab);
 }
 
 JEMALLOC_ALWAYS_INLINE arena_t *
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 206f1400..ae9cb0c2 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -52,10 +52,12 @@ isalloc(tsdn_t *tsdn, const void *ptr) {
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
-    bool is_internal, arena_t *arena, bool slow_path) {
+iallocztm_explicit_slab(tsdn_t *tsdn, size_t size, szind_t ind, bool zero,
+    bool slab, tcache_t *tcache, bool is_internal, arena_t *arena,
+    bool slow_path) {
 	void *ret;
 
+	assert(!slab || sz_can_use_slab(size)); /* slab && large is illegal */
 	assert(!is_internal || tcache == NULL);
 	assert(!is_internal || arena == NULL || arena_is_auto(arena));
 	if (!tsdn_null(tsdn) && tsd_reentrancy_level_get(tsdn_tsd(tsdn)) == 0) {
@@ -63,13 +65,21 @@ iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
 		    WITNESS_RANK_CORE, 0);
 	}
 
-	ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path);
+	ret = arena_malloc(tsdn, arena, size, ind, zero, slab, tcache, slow_path);
 	if (config_stats && is_internal && likely(ret != NULL)) {
 		arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn, ret));
 	}
 	return ret;
 }
 
+JEMALLOC_ALWAYS_INLINE void *
+iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
+    bool is_internal, arena_t *arena, bool slow_path) {
+	bool slab = sz_can_use_slab(size);
+	return iallocztm_explicit_slab(tsdn, size, ind, zero, slab, tcache,
+	    is_internal, arena, slow_path);
+}
+
 JEMALLOC_ALWAYS_INLINE void *
 ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path) {
 	return iallocztm(tsd_tsdn(tsd), size, ind, zero, tcache_get(tsd), false,
@@ -77,10 +87,11 @@ ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path) {
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
-    tcache_t *tcache, bool is_internal, arena_t *arena) {
+ipallocztm_explicit_slab(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
+    bool slab, tcache_t *tcache, bool is_internal, arena_t *arena) {
 	void *ret;
 
+	assert(!slab || sz_can_use_slab(usize)); /* slab && large is illegal */
 	assert(usize != 0);
 	assert(usize == sz_sa2u(usize, alignment));
 	assert(!is_internal || tcache == NULL);
@@ -88,7 +99,7 @@ ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 
-	ret = arena_palloc(tsdn, arena, usize, alignment, zero, tcache);
+	ret = arena_palloc(tsdn, arena, usize, alignment, zero, slab, tcache);
 	assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret);
 	if (config_stats && is_internal && likely(ret != NULL)) {
 		arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn, ret));
@@ -96,12 +107,26 @@ ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
 	return ret;
 }
 
+JEMALLOC_ALWAYS_INLINE void *
+ipallocztm(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
+    tcache_t *tcache, bool is_internal, arena_t *arena) {
+	return ipallocztm_explicit_slab(tsdn, usize, alignment, zero,
+	    sz_can_use_slab(usize), tcache, is_internal, arena);
+}
+
 JEMALLOC_ALWAYS_INLINE void *
 ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
     tcache_t *tcache, arena_t *arena) {
 	return ipallocztm(tsdn, usize, alignment, zero, tcache, false, arena);
 }
 
+JEMALLOC_ALWAYS_INLINE void *
+ipalloct_explicit_slab(tsdn_t *tsdn, size_t usize, size_t alignment,
+    bool zero, bool slab, tcache_t *tcache, arena_t *arena) {
+	return ipallocztm_explicit_slab(tsdn, usize, alignment, zero, slab,
+	    tcache, false, arena);
+}
+
 JEMALLOC_ALWAYS_INLINE void *
 ipalloc(tsd_t *tsd, size_t usize, size_t alignment, bool zero) {
 	return ipallocztm(tsd_tsdn(tsd), usize, alignment, zero,
@@ -146,7 +171,7 @@ isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 
 JEMALLOC_ALWAYS_INLINE void *
 iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
-    size_t alignment, bool zero, tcache_t *tcache, arena_t *arena,
+    size_t alignment, bool zero, bool slab, tcache_t *tcache, arena_t *arena,
     hook_ralloc_args_t *hook_args) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
@@ -157,7 +182,8 @@ iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 	if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
 		return NULL;
 	}
-	p = ipalloct(tsdn, usize, alignment, zero, tcache, arena);
+	p = ipalloct_explicit_slab(tsdn, usize, alignment, zero, slab,
+	    tcache, arena);
 	if (p == NULL) {
 		return NULL;
 	}
@@ -184,8 +210,9 @@ iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
  * passed-around anywhere.
  */
 JEMALLOC_ALWAYS_INLINE void *
-iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment,
-    bool zero, tcache_t *tcache, arena_t *arena, hook_ralloc_args_t *hook_args)
+iralloct_explicit_slab(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
+    size_t alignment, bool zero, bool slab, tcache_t *tcache, arena_t *arena,
+    hook_ralloc_args_t *hook_args)
 {
 	assert(ptr != NULL);
 	assert(size != 0);
@@ -199,18 +226,28 @@ iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment,
 		 * and copy.
 		 */
 		return iralloct_realign(tsdn, ptr, oldsize, size, alignment,
-		    zero, tcache, arena, hook_args);
+		    zero, slab, tcache, arena, hook_args);
 	}
 
 	return arena_ralloc(tsdn, arena, ptr, oldsize, size, alignment, zero,
-	    tcache, hook_args);
+	    slab, tcache, hook_args);
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment,
+    size_t usize, bool zero, tcache_t *tcache, arena_t *arena,
+    hook_ralloc_args_t *hook_args)
+{
+	bool slab = sz_can_use_slab(usize);
+	return iralloct_explicit_slab(tsdn, ptr, oldsize, size, alignment, zero,
+	    slab, tcache, arena, hook_args);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
 iralloc(tsd_t *tsd, void *ptr, size_t oldsize, size_t size, size_t alignment,
-    bool zero, hook_ralloc_args_t *hook_args) {
-	return iralloct(tsd_tsdn(tsd), ptr, oldsize, size, alignment, zero,
-	    tcache_get(tsd), NULL, hook_args);
+    size_t usize, bool zero, hook_ralloc_args_t *hook_args) {
+	return iralloct(tsd_tsdn(tsd), ptr, oldsize, size, alignment, usize,
+	    zero, tcache_get(tsd), NULL, hook_args);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -405,7 +442,7 @@ maybe_check_alloc_ctx(tsd_t *tsd, void *ptr, emap_alloc_ctx_t *alloc_ctx) {
 
 JEMALLOC_ALWAYS_INLINE bool
 prof_sample_aligned(const void *ptr) {
-        return ((uintptr_t)ptr & PAGE_MASK) == 0;
+	return ((uintptr_t)ptr & PROF_SAMPLE_ALIGNMENT_MASK) == 0;
 }
 
 JEMALLOC_ALWAYS_INLINE bool
diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index ad1f606a..361de587 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -1,6 +1,9 @@
 #ifndef JEMALLOC_INTERNAL_PAGES_EXTERNS_H
 #define JEMALLOC_INTERNAL_PAGES_EXTERNS_H
 
+/* Actual operating system page size, detected during bootstrap, <= PAGE. */
+extern size_t	os_page;
+
 /* Page size.  LG_PAGE is determined by the configure script. */
 #ifdef PAGE_MASK
 #  undef PAGE_MASK
diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h
index b74b115c..b5273010 100644
--- a/include/jemalloc/internal/prof_inlines.h
+++ b/include/jemalloc/internal/prof_inlines.h
@@ -239,14 +239,15 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-prof_sample_align(size_t orig_align) {
+prof_sample_align(size_t usize, size_t orig_align) {
 	/*
-	 * Enforce page alignment, so that sampled allocations can be identified
+	 * Enforce alignment, so that sampled allocations can be identified
 	 * w/o metadata lookup.
 	 */
 	assert(opt_prof);
-	return (opt_cache_oblivious && orig_align < PAGE) ? PAGE :
-	    orig_align;
+	return (orig_align < PROF_SAMPLE_ALIGNMENT &&
+	       (sz_can_use_slab(usize) || opt_cache_oblivious)) ?
+	           PROF_SAMPLE_ALIGNMENT : orig_align;
 }
 
 JEMALLOC_ALWAYS_INLINE bool
diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h
index 104f7e61..046ea204 100644
--- a/include/jemalloc/internal/prof_types.h
+++ b/include/jemalloc/internal/prof_types.h
@@ -80,4 +80,12 @@ typedef struct prof_recent_s prof_recent_t;
 /* Thread name storage size limit. */
 #define PROF_THREAD_NAME_MAX_LEN 16
 
+/*
+ * Minimum required alignment for sampled allocations. Over-aligning sampled
+ * allocations allows us to quickly identify them on the dalloc path without
+ * resorting to metadata lookup.
+ */
+#define PROF_SAMPLE_ALIGNMENT PAGE
+#define PROF_SAMPLE_ALIGNMENT_MASK PAGE_MASK
+
 #endif /* JEMALLOC_INTERNAL_PROF_TYPES_H */
diff --git a/include/jemalloc/internal/safety_check.h b/include/jemalloc/internal/safety_check.h
index 900cfa55..7854c1bf 100644
--- a/include/jemalloc/internal/safety_check.h
+++ b/include/jemalloc/internal/safety_check.h
@@ -3,6 +3,8 @@
 
 #define SAFETY_CHECK_DOUBLE_FREE_MAX_SCAN_DEFAULT 32
 
+#include "jemalloc/internal/pages.h"
+
 void safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr,
     size_t true_size, size_t input_size);
 void safety_check_fail(const char *format, ...);
@@ -12,22 +14,50 @@ typedef void (*safety_check_abort_hook_t)(const char *message);
 /* Can set to NULL for a default. */
 void safety_check_set_abort(safety_check_abort_hook_t abort_fn);
 
+#define REDZONE_SIZE ((size_t) 32)
+#define REDZONE_FILL_VALUE 0xBC
+
+/*
+ * Normally the redzone extends `REDZONE_SIZE` bytes beyond the end of
+ * the allocation. However, we don't let the redzone extend onto another
+ * OS page because this would impose additional overhead if that page was
+ * not already resident in memory.
+ */
+JEMALLOC_ALWAYS_INLINE const unsigned char *
+compute_redzone_end(const void *_ptr, size_t usize, size_t bumped_usize) {
+	const unsigned char *ptr = (const unsigned char *) _ptr;
+	const unsigned char *redzone_end = usize + REDZONE_SIZE < bumped_usize ?
+	    &ptr[usize + REDZONE_SIZE] : &ptr[bumped_usize];
+	const unsigned char *page_end = (const unsigned char *)
+	    ALIGNMENT_CEILING(((uintptr_t) (&ptr[usize])), os_page);
+	return redzone_end < page_end ? redzone_end : page_end;
+}
+
 JEMALLOC_ALWAYS_INLINE void
 safety_check_set_redzone(void *ptr, size_t usize, size_t bumped_usize) {
-	assert(usize < bumped_usize);
-	for (size_t i = usize; i < bumped_usize && i < usize + 32; ++i) {
-		*((unsigned char *)ptr + i) = 0xBC;
+	assert(usize <= bumped_usize);
+	const unsigned char *redzone_end =
+		compute_redzone_end(ptr, usize, bumped_usize);
+	for (unsigned char *curr = &((unsigned char *)ptr)[usize];
+	     curr < redzone_end; curr++) {
+		*curr = REDZONE_FILL_VALUE;
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
 safety_check_verify_redzone(const void *ptr, size_t usize, size_t bumped_usize)
 {
-	for (size_t i = usize; i < bumped_usize && i < usize + 32; ++i) {
-		if (unlikely(*((unsigned char *)ptr + i) != 0xBC)) {
+	const unsigned char *redzone_end =
+		compute_redzone_end(ptr, usize, bumped_usize);
+	for (const unsigned char *curr= &((const unsigned char *)ptr)[usize];
+	     curr < redzone_end; curr++) {
+		if (unlikely(*curr != REDZONE_FILL_VALUE)) {
 			safety_check_fail("Use after free error\n");
 		}
 	}
 }
 
+#undef REDZONE_SIZE
+#undef REDZONE_FILL_VALUE
+
 #endif /*JEMALLOC_INTERNAL_SAFETY_CHECK_H */
diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index 3c0fc1da..a799cea9 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -365,6 +365,21 @@ sz_sa2u(size_t size, size_t alignment) {
 	return usize;
 }
 
+/*
+ * Under normal circumstances, whether or not to use a slab
+ * to satisfy an allocation depends solely on the allocation's
+ * effective size. However, this is *not* the case when an allocation
+ * is sampled for profiling, in which case you *must not* use a slab
+ * regardless of the effective size. Thus `sz_can_use_slab` is called
+ * on the common path, but there exist `*_explicit_slab` variants of
+ * several functions for handling the aforementioned case of
+ * sampled allocations.
+ */
+JEMALLOC_ALWAYS_INLINE bool
+sz_can_use_slab(size_t size) {
+	return size <= SC_SMALL_MAXCLASS;
+}
+
 size_t sz_psz_quantize_floor(size_t size);
 size_t sz_psz_quantize_ceil(size_t size);
 
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 2634f145..2b8db0a3 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -60,7 +60,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
 		if (unlikely(tcache_small_bin_disabled(binind, bin))) {
 			/* stats and zero are handled directly by the arena. */
 			return arena_malloc_hard(tsd_tsdn(tsd), arena, size,
-			    binind, zero);
+			    binind, zero, /* slab */ true);
 		}
 		tcache_bin_flush_stashed(tsd, tcache, bin, binind,
 		    /* is_small */ true);
diff --git a/src/arena.c b/src/arena.c
index 3b151b77..b154b7a5 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1191,7 +1191,7 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
 
 void *
 arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
-    bool zero) {
+    bool zero, bool slab) {
 	assert(!tsdn_null(tsdn) || arena != NULL);
 
 	if (likely(!tsdn_null(tsdn))) {
@@ -1201,18 +1201,19 @@ arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
 		return NULL;
 	}
 
-	if (likely(size <= SC_SMALL_MAXCLASS)) {
+	if (likely(slab)) {
+		assert(sz_can_use_slab(size));
 		return arena_malloc_small(tsdn, arena, ind, zero);
+	} else {
+		return large_malloc(tsdn, arena, sz_index2size(ind), zero);
 	}
-	return large_malloc(tsdn, arena, sz_index2size(ind), zero);
 }
 
 void *
 arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
-    bool zero, tcache_t *tcache) {
-	void *ret;
-
-	if (usize <= SC_SMALL_MAXCLASS) {
+    bool zero, bool slab, tcache_t *tcache) {
+	if (slab) {
+		assert(sz_can_use_slab(usize));
 		/* Small; alignment doesn't require special slab placement. */
 
 		/* usize should be a result of sz_sa2u() */
@@ -1223,27 +1224,26 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 		 */
 		assert(alignment <= PAGE);
 
-		ret = arena_malloc(tsdn, arena, usize, sz_size2index(usize),
-		    zero, tcache, true);
+		return arena_malloc(tsdn, arena, usize, sz_size2index(usize),
+		    zero, slab, tcache, true);
 	} else {
 		if (likely(alignment <= CACHELINE)) {
-			ret = large_malloc(tsdn, arena, usize, zero);
+			return large_malloc(tsdn, arena, usize, zero);
 		} else {
-			ret = large_palloc(tsdn, arena, usize, alignment, zero);
+			return large_palloc(tsdn, arena, usize, alignment, zero);
 		}
 	}
-	return ret;
 }
 
 void
-arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize) {
+arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize, size_t bumped_usize) {
 	cassert(config_prof);
 	assert(ptr != NULL);
-	assert(isalloc(tsdn, ptr) == SC_LARGE_MINCLASS);
-	assert(usize <= SC_SMALL_MAXCLASS);
+	assert(isalloc(tsdn, ptr) == bumped_usize);
+	assert(sz_can_use_slab(usize));
 
 	if (config_opt_safety_checks) {
-		safety_check_set_redzone(ptr, usize, SC_LARGE_MINCLASS);
+		safety_check_set_redzone(ptr, usize, bumped_usize);
 	}
 
 	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
@@ -1259,13 +1259,19 @@ static size_t
 arena_prof_demote(tsdn_t *tsdn, edata_t *edata, const void *ptr) {
 	cassert(config_prof);
 	assert(ptr != NULL);
+	size_t usize = isalloc(tsdn, ptr);
+	size_t bumped_usize = sz_sa2u(usize, PROF_SAMPLE_ALIGNMENT);
+	assert(bumped_usize <= SC_LARGE_MINCLASS &&
+	    PAGE_CEILING(bumped_usize) == bumped_usize);
+	assert(edata_size_get(edata) - bumped_usize <= sz_large_pad);
+	szind_t szind = sz_size2index(bumped_usize);
 
-	edata_szind_set(edata, SC_NBINS);
-	emap_remap(tsdn, &arena_emap_global, edata, SC_NBINS, /* slab */ false);
+	edata_szind_set(edata, szind);
+	emap_remap(tsdn, &arena_emap_global, edata, szind, /* slab */ false);
 
-	assert(isalloc(tsdn, ptr) == SC_LARGE_MINCLASS);
+	assert(isalloc(tsdn, ptr) == bumped_usize);
 
-	return SC_LARGE_MINCLASS;
+	return bumped_usize;
 }
 
 void
@@ -1282,10 +1288,10 @@ arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 		 * Currently, we only do redzoning for small sampled
 		 * allocations.
 		 */
-		assert(bumped_usize == SC_LARGE_MINCLASS);
 		safety_check_verify_redzone(ptr, usize, bumped_usize);
 	}
-	if (bumped_usize <= tcache_maxclass && tcache != NULL) {
+	if (bumped_usize >= SC_LARGE_MINCLASS &&
+	    bumped_usize <= tcache_maxclass && tcache != NULL) {
 		tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
 		    sz_size2index(bumped_usize), slow_path);
 	} else {
@@ -1443,28 +1449,30 @@ done:
 
 static void *
 arena_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
-    size_t alignment, bool zero, tcache_t *tcache) {
+    size_t alignment, bool zero, bool slab, tcache_t *tcache) {
 	if (alignment == 0) {
 		return arena_malloc(tsdn, arena, usize, sz_size2index(usize),
-		    zero, tcache, true);
+		    zero, slab, tcache, true);
 	}
 	usize = sz_sa2u(usize, alignment);
 	if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
 		return NULL;
 	}
-	return ipalloct(tsdn, usize, alignment, zero, tcache, arena);
+	return ipalloct_explicit_slab(tsdn, usize, alignment, zero, slab,
+	    tcache, arena);
 }
 
 void *
 arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
-    size_t size, size_t alignment, bool zero, tcache_t *tcache,
+    size_t size, size_t alignment, bool zero, bool slab, tcache_t *tcache,
     hook_ralloc_args_t *hook_args) {
 	size_t usize = alignment == 0 ? sz_s2u(size) : sz_sa2u(size, alignment);
 	if (unlikely(usize == 0 || size > SC_LARGE_MAXCLASS)) {
 		return NULL;
 	}
 
-	if (likely(usize <= SC_SMALL_MAXCLASS)) {
+	if (likely(slab)) {
+		assert(sz_can_use_slab(usize));
 		/* Try to avoid moving the allocation. */
 		UNUSED size_t newsize;
 		if (!arena_ralloc_no_move(tsdn, ptr, oldsize, usize, 0, zero,
@@ -1488,7 +1496,7 @@ arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
 	 * object.  In that case, fall back to allocating new space and copying.
 	 */
 	void *ret = arena_ralloc_move_helper(tsdn, arena, usize, alignment,
-	    zero, tcache);
+	    zero, slab, tcache);
 	if (ret == NULL) {
 		return NULL;
 	}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 88559be0..4e4e4bee 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2360,7 +2360,7 @@ arena_get_from_ind(tsd_t *tsd, unsigned arena_ind, arena_t **arena_p) {
 /* ind is ignored if dopts->alignment > 0. */
 JEMALLOC_ALWAYS_INLINE void *
 imalloc_no_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
-    size_t size, size_t usize, szind_t ind) {
+    size_t size, size_t usize, szind_t ind, bool slab) {
 	/* Fill in the tcache. */
 	tcache_t *tcache = tcache_get_from_ind(tsd, dopts->tcache_ind,
 	    sopts->slow, /* is_alloc */ true);
@@ -2372,12 +2372,12 @@ imalloc_no_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
 	}
 
 	if (unlikely(dopts->alignment != 0)) {
-		return ipalloct(tsd_tsdn(tsd), usize, dopts->alignment,
-		    dopts->zero, tcache, arena);
+		return ipalloct_explicit_slab(tsd_tsdn(tsd), usize,
+		    dopts->alignment, dopts->zero, slab, tcache, arena);
 	}
 
-	return iallocztm(tsd_tsdn(tsd), size, ind, dopts->zero, tcache, false,
-	    arena, sopts->slow);
+	return iallocztm_explicit_slab(tsd_tsdn(tsd), size, ind, dopts->zero,
+	    slab, tcache, false, arena, sopts->slow);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
@@ -2385,28 +2385,26 @@ imalloc_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
     size_t usize, szind_t ind) {
 	void *ret;
 
+	dopts->alignment = prof_sample_align(usize, dopts->alignment);
 	/*
-	 * For small allocations, sampling bumps the usize.  If so, we allocate
-	 * from the ind_large bucket.
+	 * If the allocation is small enough that it would normally be allocated
+	 * on a slab, we need to take additional steps to ensure that it gets
+	 * its own extent instead.
 	 */
-	szind_t ind_large;
-
-	dopts->alignment = prof_sample_align(dopts->alignment);
-	if (usize <= SC_SMALL_MAXCLASS) {
-		assert(((dopts->alignment == 0) ?
-		    sz_s2u(SC_LARGE_MINCLASS) :
-		    sz_sa2u(SC_LARGE_MINCLASS, dopts->alignment))
-			== SC_LARGE_MINCLASS);
-		ind_large = sz_size2index(SC_LARGE_MINCLASS);
-		size_t bumped_usize = sz_s2u(SC_LARGE_MINCLASS);
+	if (sz_can_use_slab(usize)) {
+		assert((dopts->alignment & PROF_SAMPLE_ALIGNMENT_MASK) == 0);
+		size_t bumped_usize = sz_sa2u(usize, dopts->alignment);
+		szind_t bumped_ind = sz_size2index(bumped_usize);
+		dopts->tcache_ind = TCACHE_IND_NONE;
 		ret = imalloc_no_sample(sopts, dopts, tsd, bumped_usize,
-		    bumped_usize, ind_large);
+		    bumped_usize, bumped_ind, /* slab */ false);
 		if (unlikely(ret == NULL)) {
 			return NULL;
 		}
-		arena_prof_promote(tsd_tsdn(tsd), ret, usize);
+		arena_prof_promote(tsd_tsdn(tsd), ret, usize, bumped_usize);
 	} else {
-		ret = imalloc_no_sample(sopts, dopts, tsd, usize, usize, ind);
+		ret = imalloc_no_sample(sopts, dopts, tsd, usize, usize, ind,
+		    /* slab */ false);
 	}
 	assert(prof_sample_aligned(ret));
 
@@ -2532,9 +2530,10 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 
 		emap_alloc_ctx_t alloc_ctx;
 		if (likely((uintptr_t)tctx == (uintptr_t)1U)) {
-			alloc_ctx.slab = (usize <= SC_SMALL_MAXCLASS);
+			alloc_ctx.slab = sz_can_use_slab(usize);
 			allocation = imalloc_no_sample(
-			    sopts, dopts, tsd, usize, usize, ind);
+			    sopts, dopts, tsd, usize, usize, ind,
+			    alloc_ctx.slab);
 		} else if ((uintptr_t)tctx > (uintptr_t)1U) {
 			allocation = imalloc_sample(
 			    sopts, dopts, tsd, usize, ind);
@@ -2551,7 +2550,7 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 	} else {
 		assert(!opt_prof);
 		allocation = imalloc_no_sample(sopts, dopts, tsd, size, usize,
-		    ind);
+		    ind, sz_can_use_slab(usize));
 		if (unlikely(allocation == NULL)) {
 			goto label_oom;
 		}
@@ -3314,18 +3313,25 @@ irallocx_prof_sample(tsdn_t *tsdn, void *old_ptr, size_t old_usize,
 		return NULL;
 	}
 
-	alignment = prof_sample_align(alignment);
-	if (usize <= SC_SMALL_MAXCLASS) {
-		p = iralloct(tsdn, old_ptr, old_usize,
-		    SC_LARGE_MINCLASS, alignment, zero, tcache,
-		    arena, hook_args);
+	alignment = prof_sample_align(usize, alignment);
+	/*
+	 * If the allocation is small enough that it would normally be allocated
+	 * on a slab, we need to take additional steps to ensure that it gets
+	 * its own extent instead.
+	 */
+	if (sz_can_use_slab(usize)) {
+		size_t bumped_usize = sz_sa2u(usize, alignment);
+		p = iralloct_explicit_slab(tsdn, old_ptr, old_usize,
+		    bumped_usize, alignment, zero, /* slab */ false,
+		    tcache, arena, hook_args);
 		if (p == NULL) {
 			return NULL;
 		}
-		arena_prof_promote(tsdn, p, usize);
+		arena_prof_promote(tsdn, p, usize, bumped_usize);
 	} else {
-		p = iralloct(tsdn, old_ptr, old_usize, usize, alignment, zero,
-		    tcache, arena, hook_args);
+		p = iralloct_explicit_slab(tsdn, old_ptr, old_usize, usize,
+		    alignment, zero, /* slab */ false, tcache, arena,
+		    hook_args);
 	}
 	assert(prof_sample_aligned(p));
 
@@ -3348,7 +3354,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 		    usize, alignment, zero, tcache, arena, tctx, hook_args);
 	} else {
 		p = iralloct(tsd_tsdn(tsd), old_ptr, old_usize, size, alignment,
-		    zero, tcache, arena, hook_args);
+		    usize, zero, tcache, arena, hook_args);
 	}
 	if (unlikely(p == NULL)) {
 		prof_alloc_rollback(tsd, tctx);
@@ -3407,7 +3413,7 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 		}
 	} else {
 		p = iralloct(tsd_tsdn(tsd), ptr, old_usize, size, alignment,
-		    zero, tcache, arena, &hook_args);
+		    usize, zero, tcache, arena, &hook_args);
 		if (unlikely(p == NULL)) {
 			goto label_oom;
 		}
diff --git a/src/pages.c b/src/pages.c
index 2d5b8164..41bbef57 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -33,7 +33,7 @@
 /* Data. */
 
 /* Actual operating system page size, detected during bootstrap, <= PAGE. */
-static size_t	os_page;
+size_t	os_page;
 
 #ifndef _WIN32
 #  define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE)

From ebd7e99f5c1bd486d9eee5f10a48a92585efc1e3 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Mon, 12 Jun 2023 14:13:17 -0700
Subject: [PATCH 2306/2608] Add a test-case for small profiled allocations

Validate that small allocations (i.e. those with `size <= SC_SMALL_MAXCLASS`)
which are sampled for profiling maintain the expected invariants even
though they now take up less space.
---
 Makefile.in             |  1 +
 test/unit/prof_small.c  | 78 +++++++++++++++++++++++++++++++++++++++++
 test/unit/prof_small.sh |  6 ++++
 3 files changed, 85 insertions(+)
 create mode 100644 test/unit/prof_small.c
 create mode 100644 test/unit/prof_small.sh

diff --git a/Makefile.in b/Makefile.in
index 450abeb4..a0131558 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -256,6 +256,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/prof_mdump.c \
 	$(srcroot)test/unit/prof_recent.c \
 	$(srcroot)test/unit/prof_reset.c \
+	$(srcroot)test/unit/prof_small.c \
 	$(srcroot)test/unit/prof_stats.c \
 	$(srcroot)test/unit/prof_tctx.c \
 	$(srcroot)test/unit/prof_thread_name.c \
diff --git a/test/unit/prof_small.c b/test/unit/prof_small.c
new file mode 100644
index 00000000..4a982b70
--- /dev/null
+++ b/test/unit/prof_small.c
@@ -0,0 +1,78 @@
+#include "test/jemalloc_test.h"
+
+static void assert_small_allocation_sampled(void *ptr, size_t size) {
+	assert_ptr_not_null(ptr, "Unexpected malloc failure");
+	assert_zu_le(size, SC_SMALL_MAXCLASS, "Unexpected large size class");
+	edata_t *edata = emap_edata_lookup(TSDN_NULL, &arena_emap_global, ptr);
+	assert_ptr_not_null(edata, "Unable to find edata for allocation");
+	expect_false(edata_slab_get(edata),
+	    "Sampled small allocations should not be placed on slabs");
+	expect_ptr_eq(edata_base_get(edata), ptr,
+	    "Sampled allocations should be page-aligned");
+	expect_zu_eq(edata_usize_get(edata), size,
+	    "Edata usize did not match requested size");
+	expect_zu_eq(edata_size_get(edata), PAGE_CEILING(size) + sz_large_pad,
+	    "Edata actual size was not a multiple of PAGE");
+	prof_tctx_t *prof_tctx = edata_prof_tctx_get(edata);
+	expect_ptr_not_null(prof_tctx, "Edata had null prof_tctx");
+	expect_ptr_not_null(prof_tctx->tdata,
+	    "Edata had null prof_tdata despite being sampled");
+}
+
+TEST_BEGIN(test_profile_small_allocations) {
+	test_skip_if(!config_prof);
+
+	for (szind_t index = 0; index < SC_NBINS; index++) {
+		size_t size = sz_index2size(index);
+		void *ptr = malloc(size);
+		assert_small_allocation_sampled(ptr, size);
+		free(ptr);
+	}
+}
+TEST_END
+
+TEST_BEGIN(test_profile_small_reallocations_growing) {
+	test_skip_if(!config_prof);
+
+	void *ptr = NULL;
+	for (szind_t index = 0; index < SC_NBINS; index++) {
+		size_t size = sz_index2size(index);
+		ptr = realloc(ptr, size);
+		assert_small_allocation_sampled(ptr, size);
+	}
+}
+TEST_END
+
+TEST_BEGIN(test_profile_small_reallocations_shrinking) {
+	test_skip_if(!config_prof);
+
+	void *ptr = NULL;
+	for (szind_t index = SC_NBINS; index-- > 0;) {
+		size_t size = sz_index2size(index);
+		ptr = realloc(ptr, size);
+		assert_small_allocation_sampled(ptr, size);
+	}
+}
+TEST_END
+
+TEST_BEGIN(test_profile_small_reallocations_same_size_class) {
+	test_skip_if(!config_prof);
+
+	for (szind_t index = 0; index < SC_NBINS; index++) {
+		size_t size = sz_index2size(index);
+		void *ptr = malloc(size);
+		assert_small_allocation_sampled(ptr, size);
+		ptr = realloc(ptr, size - 1);
+		assert_small_allocation_sampled(ptr, size);
+		free(ptr);
+	}
+}
+TEST_END
+
+int
+main(void) {
+	return test(test_profile_small_allocations,
+	    test_profile_small_reallocations_growing,
+	    test_profile_small_reallocations_shrinking,
+	    test_profile_small_reallocations_same_size_class);
+}
diff --git a/test/unit/prof_small.sh b/test/unit/prof_small.sh
new file mode 100644
index 00000000..d14cb8c5
--- /dev/null
+++ b/test/unit/prof_small.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+if [ "x${enable_prof}" = "x1" ] ; then
+  export MALLOC_CONF="prof:true,lg_prof_sample:0"
+fi
+

From 602edd75664e2a2ef3063d9b3bd42d1f81a1be2b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 5 Jul 2023 13:33:34 -0700
Subject: [PATCH 2307/2608] Enabled -Wstrict-prototypes and fixed warnings.

---
 configure.ac                             |  1 +
 include/jemalloc/internal/ehooks.h       |  2 +-
 include/jemalloc/internal/hook.h         |  2 +-
 include/jemalloc/internal/hpa.h          |  2 +-
 include/jemalloc/internal/prof_data.h    |  2 +-
 include/jemalloc/internal/prof_externs.h |  8 ++++----
 include/jemalloc/internal/prof_inlines.h |  2 +-
 include/jemalloc/internal/prof_recent.h  |  4 ++--
 include/jemalloc/internal/prof_sys.h     |  6 +++---
 include/jemalloc/internal/san_bump.h     |  2 +-
 include/jemalloc/internal/spin.h         |  2 +-
 include/jemalloc/internal/test_hooks.h   |  4 ++--
 include/jemalloc/internal/tsd.h          |  2 +-
 src/ehooks.c                             |  2 +-
 src/hook.c                               |  4 ++--
 src/hpa.c                                |  2 +-
 src/jemalloc.c                           |  4 ++--
 src/pages.c                              |  2 +-
 src/prof.c                               |  8 ++++----
 src/prof_data.c                          |  2 +-
 src/prof_recent.c                        |  8 ++++----
 src/prof_sys.c                           |  6 +++---
 src/test_hooks.c                         |  4 ++--
 src/tsd.c                                |  2 +-
 test/analyze/sizes.c                     |  2 +-
 test/include/test/bench.h                |  4 ++--
 test/include/test/test.h                 |  2 +-
 test/src/test.c                          |  6 +++---
 test/stress/batch_alloc.c                | 16 ++++++++--------
 test/unit/double_free.c                  |  4 ++--
 test/unit/fork.c                         |  2 +-
 test/unit/hook.c                         |  6 +++---
 test/unit/hpa_background_thread.c        |  4 ++--
 test/unit/junk.c                         |  2 +-
 test/unit/prof_log.c                     | 12 ++++++------
 test/unit/prof_mdump.c                   |  2 +-
 test/unit/prof_recent.c                  |  4 ++--
 test/unit/test_hooks.c                   |  2 +-
 test/unit/zero_realloc_abort.c           |  3 ++-
 test/unit/zero_realloc_alloc.c           |  4 ++--
 test/unit/zero_realloc_free.c            |  2 +-
 test/unit/zero_reallocs.c                |  2 +-
 42 files changed, 82 insertions(+), 80 deletions(-)

diff --git a/configure.ac b/configure.ac
index 687b221c..9686ac53 100644
--- a/configure.ac
+++ b/configure.ac
@@ -262,6 +262,7 @@ if test "x$GCC" = "xyes" ; then
   dnl This one too.
   JE_CFLAGS_ADD([-Wno-missing-field-initializers])
   JE_CFLAGS_ADD([-Wno-missing-attributes])
+  JE_CFLAGS_ADD([-Wstrict-prototypes])
   JE_CFLAGS_ADD([-pipe])
   JE_CFLAGS_ADD([-g3])
 elif test "x$je_cv_msvc" = "xyes" ; then
diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index 8d9513e2..d583c521 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -53,7 +53,7 @@ bool ehooks_default_purge_lazy_impl(void *addr, size_t offset, size_t length);
 #ifdef PAGES_CAN_PURGE_FORCED
 bool ehooks_default_purge_forced_impl(void *addr, size_t offset, size_t length);
 #endif
-bool ehooks_default_split_impl();
+bool ehooks_default_split_impl(void);
 /*
  * Merge is the only default extent hook we declare -- see the comment in
  * ehooks_merge.
diff --git a/include/jemalloc/internal/hook.h b/include/jemalloc/internal/hook.h
index af03d2f5..41157207 100644
--- a/include/jemalloc/internal/hook.h
+++ b/include/jemalloc/internal/hook.h
@@ -144,7 +144,7 @@ struct hook_ralloc_args_s {
  * Returns an opaque handle to be used when removing the hook.  NULL means that
  * we couldn't install the hook.
  */
-bool hook_boot();
+bool hook_boot(void);
 
 void *hook_install(tsdn_t *tsdn, hooks_t *hooks);
 /* Uninstalls the hook with the handle previously returned from hook_install. */
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 0b3c76c6..01fe3166 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -143,7 +143,7 @@ struct hpa_shard_s {
  * is not necessarily a guarantee that it backs its allocations by hugepages,
  * just that it can function properly given the system it's running on.
  */
-bool hpa_supported();
+bool hpa_supported(void);
 bool hpa_central_init(hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks);
 bool hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
     base_t *base, edata_cache_t *edata_cache, unsigned ind,
diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
index c4286b51..016b6507 100644
--- a/include/jemalloc/internal/prof_data.h
+++ b/include/jemalloc/internal/prof_data.h
@@ -19,7 +19,7 @@ bool prof_bt_keycomp(const void *k1, const void *k2);
 bool prof_data_init(tsd_t *tsd);
 prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt);
 int prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name);
-void prof_unbias_map_init();
+void prof_unbias_map_init(void);
 void prof_dump_impl(tsd_t *tsd, write_cb_t *prof_dump_write, void *cbopaque,
     prof_tdata_t *tdata, bool leakcheck);
 prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 412378a2..cce5c8f5 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -51,16 +51,16 @@ extern size_t lg_prof_sample;
 extern bool prof_booted;
 
 void prof_backtrace_hook_set(prof_backtrace_hook_t hook);
-prof_backtrace_hook_t prof_backtrace_hook_get();
+prof_backtrace_hook_t prof_backtrace_hook_get(void);
 
 void prof_dump_hook_set(prof_dump_hook_t hook);
-prof_dump_hook_t prof_dump_hook_get();
+prof_dump_hook_t prof_dump_hook_get(void);
 
 void prof_sample_hook_set(prof_sample_hook_t hook);
-prof_sample_hook_t prof_sample_hook_get();
+prof_sample_hook_t prof_sample_hook_get(void);
 
 void prof_sample_free_hook_set(prof_sample_free_hook_t hook);
-prof_sample_free_hook_t prof_sample_free_hook_get();
+prof_sample_free_hook_t prof_sample_free_hook_get(void);
 
 /* Functions only accessed in prof_inlines.h */
 prof_tdata_t *prof_tdata_init(tsd_t *tsd);
diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h
index b5273010..c0783fc1 100644
--- a/include/jemalloc/internal/prof_inlines.h
+++ b/include/jemalloc/internal/prof_inlines.h
@@ -7,7 +7,7 @@
 #include "jemalloc/internal/jemalloc_internal_inlines_c.h"
 
 JEMALLOC_ALWAYS_INLINE void
-prof_active_assert() {
+prof_active_assert(void) {
 	cassert(config_prof);
 	/*
 	 * If opt_prof is off, then prof_active must always be off, regardless
diff --git a/include/jemalloc/internal/prof_recent.h b/include/jemalloc/internal/prof_recent.h
index df410236..959e336b 100644
--- a/include/jemalloc/internal/prof_recent.h
+++ b/include/jemalloc/internal/prof_recent.h
@@ -7,7 +7,7 @@ extern malloc_mutex_t prof_recent_dump_mtx;
 bool prof_recent_alloc_prepare(tsd_t *tsd, prof_tctx_t *tctx);
 void prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size, size_t usize);
 void prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata);
-bool prof_recent_init();
+bool prof_recent_init(void);
 void edata_prof_recent_alloc_init(edata_t *edata);
 
 /* Used in unit tests. */
@@ -16,7 +16,7 @@ extern prof_recent_list_t prof_recent_alloc_list;
 edata_t *prof_recent_alloc_edata_get_no_lock_test(const prof_recent_t *node);
 prof_recent_t *edata_prof_recent_alloc_get_no_lock_test(const edata_t *edata);
 
-ssize_t prof_recent_alloc_max_ctl_read();
+ssize_t prof_recent_alloc_max_ctl_read(void);
 ssize_t prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max);
 void prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque);
 
diff --git a/include/jemalloc/internal/prof_sys.h b/include/jemalloc/internal/prof_sys.h
index 3d25a429..0eb50788 100644
--- a/include/jemalloc/internal/prof_sys.h
+++ b/include/jemalloc/internal/prof_sys.h
@@ -6,8 +6,8 @@ extern base_t *prof_base;
 
 void bt_init(prof_bt_t *bt, void **vec);
 void prof_backtrace(tsd_t *tsd, prof_bt_t *bt);
-void prof_hooks_init();
-void prof_unwind_init();
+void prof_hooks_init(void);
+void prof_unwind_init(void);
 void prof_sys_thread_name_fetch(tsd_t *tsd);
 int prof_getpid(void);
 void prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind);
@@ -24,7 +24,7 @@ typedef int (prof_dump_open_file_t)(const char *, int);
 extern prof_dump_open_file_t *JET_MUTABLE prof_dump_open_file;
 typedef ssize_t (prof_dump_write_file_t)(int, const void *, size_t);
 extern prof_dump_write_file_t *JET_MUTABLE prof_dump_write_file;
-typedef int (prof_dump_open_maps_t)();
+typedef int (prof_dump_open_maps_t)(void);
 extern prof_dump_open_maps_t *JET_MUTABLE prof_dump_open_maps;
 
 #endif /* JEMALLOC_INTERNAL_PROF_SYS_H */
diff --git a/include/jemalloc/internal/san_bump.h b/include/jemalloc/internal/san_bump.h
index 8ec4a710..0a8e76e9 100644
--- a/include/jemalloc/internal/san_bump.h
+++ b/include/jemalloc/internal/san_bump.h
@@ -20,7 +20,7 @@ struct san_bump_alloc_s {
 };
 
 static inline bool
-san_bump_enabled() {
+san_bump_enabled(void) {
 	/*
 	 * We enable san_bump allocator only when it's possible to break up a
 	 * mapping and unmap a part of it (maps_coalesce). This is needed to
diff --git a/include/jemalloc/internal/spin.h b/include/jemalloc/internal/spin.h
index 22804c68..6940f15e 100644
--- a/include/jemalloc/internal/spin.h
+++ b/include/jemalloc/internal/spin.h
@@ -8,7 +8,7 @@ typedef struct {
 } spin_t;
 
 static inline void
-spin_cpu_spinwait() {
+spin_cpu_spinwait(void) {
 #  if HAVE_CPU_SPINWAIT
 	CPU_SPINWAIT;
 #  else
diff --git a/include/jemalloc/internal/test_hooks.h b/include/jemalloc/internal/test_hooks.h
index 3d530b5c..2b90afe1 100644
--- a/include/jemalloc/internal/test_hooks.h
+++ b/include/jemalloc/internal/test_hooks.h
@@ -1,8 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_TEST_HOOKS_H
 #define JEMALLOC_INTERNAL_TEST_HOOKS_H
 
-extern JEMALLOC_EXPORT void (*test_hooks_arena_new_hook)();
-extern JEMALLOC_EXPORT void (*test_hooks_libc_hook)();
+extern JEMALLOC_EXPORT void (*test_hooks_arena_new_hook)(void);
+extern JEMALLOC_EXPORT void (*test_hooks_libc_hook)(void);
 
 #if defined(JEMALLOC_JET) || defined(JEMALLOC_UNIT_TEST)
 #  define JEMALLOC_TEST_HOOK(fn, hook) ((void)(hook != NULL && (hook(), 0)), fn)
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index c6bf28fc..3dd52247 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -177,7 +177,7 @@ void tsd_postfork_child(tsd_t *tsd);
  */
 void tsd_global_slow_inc(tsdn_t *tsdn);
 void tsd_global_slow_dec(tsdn_t *tsdn);
-bool tsd_global_slow();
+bool tsd_global_slow(void);
 
 #define TSD_MIN_INIT_STATE_MAX_FETCHED (128)
 
diff --git a/src/ehooks.c b/src/ehooks.c
index 383e9de6..da759215 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -159,7 +159,7 @@ ehooks_default_purge_forced(extent_hooks_t *extent_hooks, void *addr,
 #endif
 
 bool
-ehooks_default_split_impl() {
+ehooks_default_split_impl(void) {
 	if (!maps_coalesce) {
 		/*
 		 * Without retain, only whole regions can be purged (required by
diff --git a/src/hook.c b/src/hook.c
index 493edbbe..77a988d7 100644
--- a/src/hook.c
+++ b/src/hook.c
@@ -19,7 +19,7 @@ static seq_hooks_t hooks[HOOK_MAX];
 static malloc_mutex_t hooks_mu;
 
 bool
-hook_boot() {
+hook_boot(void) {
 	return malloc_mutex_init(&hooks_mu, "hooks", WITNESS_RANK_HOOK,
 	    malloc_mutex_rank_exclusive);
 }
@@ -100,7 +100,7 @@ for (int for_each_hook_counter = 0;					\
 }
 
 static bool *
-hook_reentrantp() {
+hook_reentrantp(void) {
 	/*
 	 * We prevent user reentrancy within hooks.  This is basically just a
 	 * thread-local bool that triggers an early-exit.
diff --git a/src/hpa.c b/src/hpa.c
index 7462025c..ee41994f 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -24,7 +24,7 @@ static void hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self,
 static uint64_t hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self);
 
 bool
-hpa_supported() {
+hpa_supported(void) {
 #ifdef _WIN32
 	/*
 	 * At least until the API and implementation is somewhat settled, we
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 4e4e4bee..7771a731 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -766,7 +766,7 @@ malloc_ncpus(void) {
  * Since otherwise tricky things is possible with percpu arenas in use.
  */
 static bool
-malloc_cpu_count_is_deterministic()
+malloc_cpu_count_is_deterministic(void)
 {
 #ifdef _WIN32
 	return true;
@@ -1807,7 +1807,7 @@ malloc_init_hard_needed(void) {
 }
 
 static bool
-malloc_init_hard_a0_locked() {
+malloc_init_hard_a0_locked(void) {
 	malloc_initializer = INITIALIZER;
 
 	JEMALLOC_DIAGNOSTIC_PUSH
diff --git a/src/pages.c b/src/pages.c
index 41bbef57..e70c6e92 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -66,7 +66,7 @@ static int madvise_dont_need_zeros_is_faulty = -1;
  *
  *   [1]: https://patchwork.kernel.org/patch/10576637/
  */
-static int madvise_MADV_DONTNEED_zeroes_pages()
+static int madvise_MADV_DONTNEED_zeroes_pages(void)
 {
 	size_t size = PAGE;
 
diff --git a/src/prof.c b/src/prof.c
index 832aa528..e958349e 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -562,7 +562,7 @@ prof_backtrace_hook_set(prof_backtrace_hook_t hook) {
 }
 
 prof_backtrace_hook_t
-prof_backtrace_hook_get() {
+prof_backtrace_hook_get(void) {
 	return (prof_backtrace_hook_t)atomic_load_p(&prof_backtrace_hook,
 	    ATOMIC_ACQUIRE);
 }
@@ -573,7 +573,7 @@ prof_dump_hook_set(prof_dump_hook_t hook) {
 }
 
 prof_dump_hook_t
-prof_dump_hook_get() {
+prof_dump_hook_get(void) {
 	return (prof_dump_hook_t)atomic_load_p(&prof_dump_hook,
 	    ATOMIC_ACQUIRE);
 }
@@ -584,7 +584,7 @@ prof_sample_hook_set(prof_sample_hook_t hook) {
 }
 
 prof_sample_hook_t
-prof_sample_hook_get() {
+prof_sample_hook_get(void) {
 	return (prof_sample_hook_t)atomic_load_p(&prof_sample_hook,
 	    ATOMIC_ACQUIRE);
 }
@@ -595,7 +595,7 @@ prof_sample_free_hook_set(prof_sample_free_hook_t hook) {
 }
 
 prof_sample_free_hook_t
-prof_sample_free_hook_get() {
+prof_sample_free_hook_get(void) {
 	return (prof_sample_free_hook_t)atomic_load_p(&prof_sample_free_hook,
 	    ATOMIC_ACQUIRE);
 }
diff --git a/src/prof_data.c b/src/prof_data.c
index c33668ee..d52522b0 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -503,7 +503,7 @@ prof_double_uint64_cast(double d) {
 }
 #endif
 
-void prof_unbias_map_init() {
+void prof_unbias_map_init(void) {
 	/* See the comment in prof_sample_new_event_wait */
 #ifdef JEMALLOC_PROF
 	for (szind_t i = 0; i < SC_NSIZES; i++) {
diff --git a/src/prof_recent.c b/src/prof_recent.c
index e5b3fb17..b5639b4c 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -16,13 +16,13 @@ prof_recent_list_t prof_recent_alloc_list;
 malloc_mutex_t prof_recent_dump_mtx; /* Protects dumping. */
 
 static void
-prof_recent_alloc_max_init() {
+prof_recent_alloc_max_init(void) {
 	atomic_store_zd(&prof_recent_alloc_max, opt_prof_recent_alloc_max,
 	    ATOMIC_RELAXED);
 }
 
 static inline ssize_t
-prof_recent_alloc_max_get_no_lock() {
+prof_recent_alloc_max_get_no_lock(void) {
 	return atomic_load_zd(&prof_recent_alloc_max, ATOMIC_RELAXED);
 }
 
@@ -403,7 +403,7 @@ label_rollback:
 }
 
 ssize_t
-prof_recent_alloc_max_ctl_read() {
+prof_recent_alloc_max_ctl_read(void) {
 	cassert(config_prof);
 	/* Don't bother to acquire the lock. */
 	return prof_recent_alloc_max_get_no_lock();
@@ -582,7 +582,7 @@ prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque) {
 #undef PROF_RECENT_PRINT_BUFSIZE
 
 bool
-prof_recent_init() {
+prof_recent_init(void) {
 	cassert(config_prof);
 	prof_recent_alloc_max_init();
 
diff --git a/src/prof_sys.c b/src/prof_sys.c
index 3cbb3a85..467394a5 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -428,7 +428,7 @@ prof_backtrace(tsd_t *tsd, prof_bt_t *bt) {
 }
 
 void
-prof_hooks_init() {
+prof_hooks_init(void) {
 	prof_backtrace_hook_set(&prof_backtrace_impl);
 	prof_dump_hook_set(NULL);
 	prof_sample_hook_set(NULL);
@@ -436,7 +436,7 @@ prof_hooks_init() {
 }
 
 void
-prof_unwind_init() {
+prof_unwind_init(void) {
 #ifdef JEMALLOC_PROF_LIBGCC
 	/*
 	 * Cause the backtracing machinery to allocate its internal
@@ -596,7 +596,7 @@ prof_open_maps_internal(const char *format, ...) {
 #endif
 
 static int
-prof_dump_open_maps_impl() {
+prof_dump_open_maps_impl(void) {
 	int mfd;
 
 	cassert(config_prof);
diff --git a/src/test_hooks.c b/src/test_hooks.c
index ace00d9c..40621199 100644
--- a/src/test_hooks.c
+++ b/src/test_hooks.c
@@ -6,7 +6,7 @@
  * from outside the generated library, so that we can use them in test code.
  */
 JEMALLOC_EXPORT
-void (*test_hooks_arena_new_hook)() = NULL;
+void (*test_hooks_arena_new_hook)(void) = NULL;
 
 JEMALLOC_EXPORT
-void (*test_hooks_libc_hook)() = NULL;
+void (*test_hooks_libc_hook)(void) = NULL;
diff --git a/src/tsd.c b/src/tsd.c
index cef7ba58..a4db8e36 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -148,7 +148,7 @@ tsd_local_slow(tsd_t *tsd) {
 }
 
 bool
-tsd_global_slow() {
+tsd_global_slow(void) {
 	return atomic_load_u32(&tsd_global_slow_count, ATOMIC_RELAXED) > 0;
 }
 
diff --git a/test/analyze/sizes.c b/test/analyze/sizes.c
index 44c9de5e..a48c4f48 100644
--- a/test/analyze/sizes.c
+++ b/test/analyze/sizes.c
@@ -29,7 +29,7 @@ do_print(const char *name, size_t sz_bytes) {
 }
 
 int
-main() {
+main(void) {
 #define P(type)								\
 	do_print(#type, sizeof(type))
 	P(arena_t);
diff --git a/test/include/test/bench.h b/test/include/test/bench.h
index 29c6801f..e2a9fc09 100644
--- a/test/include/test/bench.h
+++ b/test/include/test/bench.h
@@ -40,8 +40,8 @@ compare_funcs(uint64_t nwarmup, uint64_t niter, const char *name_a,
 		return;
 	}
 
-	time_func(&timer_a, nwarmup, niter, (void (*)())func_a);
-	time_func(&timer_b, nwarmup, niter, (void (*)())func_b);
+	time_func(&timer_a, nwarmup, niter, (void (*)(void))func_a);
+	time_func(&timer_b, nwarmup, niter, (void (*)(void))func_b);
 
 	uint64_t usec_a = timer_usec(&timer_a);
 	char buf_a[FMT_NSECS_BUF_SIZE];
diff --git a/test/include/test/test.h b/test/include/test/test.h
index 54610dab..183dfc00 100644
--- a/test/include/test/test.h
+++ b/test/include/test/test.h
@@ -569,7 +569,7 @@ label_test_end:								\
 	}								\
 } while (0)
 
-bool test_is_reentrant();
+bool test_is_reentrant(void);
 
 void	test_skip(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
 void	test_fail(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
diff --git a/test/src/test.c b/test/src/test.c
index 4cd803e5..e26dbfd4 100644
--- a/test/src/test.c
+++ b/test/src/test.c
@@ -35,7 +35,7 @@ reentrancy_t_str(reentrancy_t r) {
 }
 
 static void
-do_hook(bool *hook_ran, void (**hook)()) {
+do_hook(bool *hook_ran, void (**hook)(void)) {
 	*hook_ran = true;
 	*hook = NULL;
 
@@ -47,12 +47,12 @@ do_hook(bool *hook_ran, void (**hook)()) {
 }
 
 static void
-libc_reentrancy_hook() {
+libc_reentrancy_hook(void) {
 	do_hook(&libc_hook_ran, &test_hooks_libc_hook);
 }
 
 static void
-arena_new_reentrancy_hook() {
+arena_new_reentrancy_hook(void) {
 	do_hook(&arena_new_hook_ran, &test_hooks_arena_new_hook);
 }
 
diff --git a/test/stress/batch_alloc.c b/test/stress/batch_alloc.c
index 427e1cba..6b973bb1 100644
--- a/test/stress/batch_alloc.c
+++ b/test/stress/batch_alloc.c
@@ -124,12 +124,12 @@ compare_with_free(size_t batch, size_t iter,
 }
 
 static void
-batch_alloc_without_free_tiny() {
+batch_alloc_without_free_tiny(void) {
 	batch_alloc_without_free(TINY_BATCH);
 }
 
 static void
-item_alloc_without_free_tiny() {
+item_alloc_without_free_tiny(void) {
 	item_alloc_without_free(TINY_BATCH);
 }
 
@@ -140,12 +140,12 @@ TEST_BEGIN(test_tiny_batch_without_free) {
 TEST_END
 
 static void
-batch_alloc_with_free_tiny() {
+batch_alloc_with_free_tiny(void) {
 	batch_alloc_with_free(TINY_BATCH);
 }
 
 static void
-item_alloc_with_free_tiny() {
+item_alloc_with_free_tiny(void) {
 	item_alloc_with_free(TINY_BATCH);
 }
 
@@ -156,12 +156,12 @@ TEST_BEGIN(test_tiny_batch_with_free) {
 TEST_END
 
 static void
-batch_alloc_without_free_huge() {
+batch_alloc_without_free_huge(void) {
 	batch_alloc_without_free(HUGE_BATCH);
 }
 
 static void
-item_alloc_without_free_huge() {
+item_alloc_without_free_huge(void) {
 	item_alloc_without_free(HUGE_BATCH);
 }
 
@@ -172,12 +172,12 @@ TEST_BEGIN(test_huge_batch_without_free) {
 TEST_END
 
 static void
-batch_alloc_with_free_huge() {
+batch_alloc_with_free_huge(void) {
 	batch_alloc_with_free(HUGE_BATCH);
 }
 
 static void
-item_alloc_with_free_huge() {
+item_alloc_with_free_huge(void) {
 	item_alloc_with_free(HUGE_BATCH);
 }
 
diff --git a/test/unit/double_free.c b/test/unit/double_free.c
index f1e50cd2..b6ae8f75 100644
--- a/test/unit/double_free.c
+++ b/test/unit/double_free.c
@@ -16,13 +16,13 @@ test_double_free_pre(void) {
 }
 
 static void
-test_double_free_post() {
+test_double_free_post(void) {
 	expect_b_eq(fake_abort_called, true, "Double-free check didn't fire.");
 	safety_check_set_abort(NULL);
 }
 
 static bool
-tcache_enabled() {
+tcache_enabled(void) {
 	bool enabled;
 	size_t sz = sizeof(enabled);
 	assert_d_eq(
diff --git a/test/unit/fork.c b/test/unit/fork.c
index 4137423f..447eb191 100644
--- a/test/unit/fork.c
+++ b/test/unit/fork.c
@@ -95,7 +95,7 @@ do_fork_thd(void *arg) {
 
 #ifndef _WIN32
 static void
-do_test_fork_multithreaded() {
+do_test_fork_multithreaded(void) {
 	thd_t child;
 	thd_create(&child, do_fork_thd, NULL);
 	do_fork_thd(NULL);
diff --git a/test/unit/hook.c b/test/unit/hook.c
index 36dbd269..f2a7f190 100644
--- a/test/unit/hook.c
+++ b/test/unit/hook.c
@@ -14,7 +14,7 @@ static uintptr_t arg_args_raw[4];
 static int call_count = 0;
 
 static void
-reset_args() {
+reset_args(void) {
 	arg_extra = NULL;
 	arg_type = 12345;
 	arg_result = NULL;
@@ -40,7 +40,7 @@ alloc_free_size(size_t sz) {
  * allocation scenarios.
  */
 static void
-be_reentrant() {
+be_reentrant(void) {
 	/* Let's make sure the tcache is non-empty if enabled. */
 	alloc_free_size(1);
 	alloc_free_size(1024);
@@ -77,7 +77,7 @@ expect_args_raw(uintptr_t *args_raw_expected, int nargs) {
 }
 
 static void
-reset() {
+reset(void) {
 	call_count = 0;
 	reset_args();
 }
diff --git a/test/unit/hpa_background_thread.c b/test/unit/hpa_background_thread.c
index 81c25612..774ccb4a 100644
--- a/test/unit/hpa_background_thread.c
+++ b/test/unit/hpa_background_thread.c
@@ -2,7 +2,7 @@
 #include "test/sleep.h"
 
 static void
-sleep_for_background_thread_interval() {
+sleep_for_background_thread_interval(void) {
 	/*
 	 * The sleep interval set in our .sh file is 50ms.  So it likely will
 	 * run if we sleep for four times that.
@@ -11,7 +11,7 @@ sleep_for_background_thread_interval() {
 }
 
 static unsigned
-create_arena() {
+create_arena(void) {
 	unsigned arena_ind;
 	size_t sz;
 
diff --git a/test/unit/junk.c b/test/unit/junk.c
index 543092f1..6c5b8beb 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -7,7 +7,7 @@ static void *last_junked_ptr;
 static size_t last_junked_usize;
 
 static void
-reset() {
+reset(void) {
 	ptr_ind = 0;
 	last_junked_ptr = NULL;
 	last_junked_usize = 0;
diff --git a/test/unit/prof_log.c b/test/unit/prof_log.c
index 5ff208e2..a32fdd0b 100644
--- a/test/unit/prof_log.c
+++ b/test/unit/prof_log.c
@@ -4,11 +4,11 @@
 #define N_PARAM 100
 #define N_THREADS 10
 
-static void expect_rep() {
+static void expect_rep(void) {
 	expect_b_eq(prof_log_rep_check(), false, "Rep check failed");
 }
 
-static void expect_log_empty() {
+static void expect_log_empty(void) {
 	expect_zu_eq(prof_log_bt_count(), 0,
 	    "The log has backtraces; it isn't empty");
 	expect_zu_eq(prof_log_thr_count(), 0,
@@ -19,7 +19,7 @@ static void expect_log_empty() {
 
 void *buf[N_PARAM];
 
-static void f() {
+static void f(void) {
 	int i;
 	for (i = 0; i < N_PARAM; i++) {
 		buf[i] = malloc(100);
@@ -91,18 +91,18 @@ TEST_BEGIN(test_prof_log_many_threads) {
 }
 TEST_END
 
-static void f3() {
+static void f3(void) {
 	void *p = malloc(100);
 	free(p);
 }
 
-static void f1() {
+static void f1(void) {
 	void *p = malloc(100);
 	f3();
 	free(p);
 }
 
-static void f2() {
+static void f2(void) {
 	void *p = malloc(100);
 	free(p);
 }
diff --git a/test/unit/prof_mdump.c b/test/unit/prof_mdump.c
index 75b3a515..bcbb961a 100644
--- a/test/unit/prof_mdump.c
+++ b/test/unit/prof_mdump.c
@@ -129,7 +129,7 @@ TEST_BEGIN(test_mdump_output_error) {
 TEST_END
 
 static int
-prof_dump_open_maps_error() {
+prof_dump_open_maps_error(void) {
 	return -1;
 }
 
diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index 2cf699d8..24ee6f42 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -9,7 +9,7 @@ const char *test_thread_name = "test_thread";
 
 /* Invariant before and after every test (when config_prof is on) */
 static void
-confirm_prof_setup() {
+confirm_prof_setup(void) {
 	/* Options */
 	assert_true(opt_prof, "opt_prof not on");
 	assert_true(opt_prof_active, "opt_prof_active not on");
@@ -356,7 +356,7 @@ test_dump_write_cb(void *not_used, const char *str) {
 }
 
 static void
-call_dump() {
+call_dump(void) {
 	static void *in[2] = {test_dump_write_cb, NULL};
 	dump_out_len = 0;
 	assert_d_eq(mallctl("experimental.prof_recent.alloc_dump",
diff --git a/test/unit/test_hooks.c b/test/unit/test_hooks.c
index 8cd2b3bb..41e7bf35 100644
--- a/test/unit/test_hooks.c
+++ b/test/unit/test_hooks.c
@@ -3,7 +3,7 @@
 static bool hook_called = false;
 
 static void
-hook() {
+hook(void) {
 	hook_called = true;
 }
 
diff --git a/test/unit/zero_realloc_abort.c b/test/unit/zero_realloc_abort.c
index a880d104..f014cdc2 100644
--- a/test/unit/zero_realloc_abort.c
+++ b/test/unit/zero_realloc_abort.c
@@ -4,7 +4,8 @@
 
 static bool abort_called = false;
 
-void set_abort_called() {
+void set_abort_called(const char *message) {
+	(void)message;
 	abort_called = true;
 };
 
diff --git a/test/unit/zero_realloc_alloc.c b/test/unit/zero_realloc_alloc.c
index 65e07bdb..6954818c 100644
--- a/test/unit/zero_realloc_alloc.c
+++ b/test/unit/zero_realloc_alloc.c
@@ -1,7 +1,7 @@
 #include "test/jemalloc_test.h"
 
 static uint64_t
-allocated() {
+allocated(void) {
 	if (!config_stats) {
 		return 0;
 	}
@@ -13,7 +13,7 @@ allocated() {
 }
 
 static uint64_t
-deallocated() {
+deallocated(void) {
 	if (!config_stats) {
 		return 0;
 	}
diff --git a/test/unit/zero_realloc_free.c b/test/unit/zero_realloc_free.c
index baed86c9..277f219d 100644
--- a/test/unit/zero_realloc_free.c
+++ b/test/unit/zero_realloc_free.c
@@ -1,7 +1,7 @@
 #include "test/jemalloc_test.h"
 
 static uint64_t
-deallocated() {
+deallocated(void) {
 	if (!config_stats) {
 		return 0;
 	}
diff --git a/test/unit/zero_reallocs.c b/test/unit/zero_reallocs.c
index 66c7a404..a9077222 100644
--- a/test/unit/zero_reallocs.c
+++ b/test/unit/zero_reallocs.c
@@ -1,7 +1,7 @@
 #include "test/jemalloc_test.h"
 
 static size_t
-zero_reallocs() {
+zero_reallocs(void) {
 	if (!config_stats) {
 		return 0;
 	}

From e249d1a2a1eef5bd0b329f0575f9d952a5e73522 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 5 Jul 2023 14:02:14 -0700
Subject: [PATCH 2308/2608] Remove unreachable code.

---
 include/jemalloc/internal/bit_util.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/jemalloc/internal/bit_util.h b/include/jemalloc/internal/bit_util.h
index bac59140..70fa4bc9 100644
--- a/include/jemalloc/internal/bit_util.h
+++ b/include/jemalloc/internal/bit_util.h
@@ -340,7 +340,6 @@ ffs_u32(uint32_t x) {
 #else
 #error No implementation for 32-bit ffs()
 #endif
-	return ffs_u(x);
 }
 
 static inline unsigned
@@ -350,7 +349,6 @@ fls_u32(uint32_t x) {
 #else
 #error No implementation for 32-bit fls()
 #endif
-	return fls_u(x);
 }
 
 static inline uint64_t

From 589c63b4244e60dcfe74861a2b110b545182216f Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Thu, 18 May 2023 10:22:58 -0700
Subject: [PATCH 2309/2608] Make eligible global variables `static` and/or
 `const`

For better or worse, Jemalloc has a significant number of global
variables. Making all eligible global variables `static` and/or `const`
at least makes it slightly easier to reason about them, as these
qualifications communicate to the programmer restrictions on their use
without having to `grep` the whole codebase.
---
 include/jemalloc/internal/arena_externs.h             | 3 +--
 include/jemalloc/internal/base.h                      | 2 +-
 include/jemalloc/internal/extent_dss.h                | 2 +-
 include/jemalloc/internal/hpa_hooks.h                 | 2 +-
 include/jemalloc/internal/jemalloc_internal_externs.h | 6 +++---
 include/jemalloc/internal/nstime.h                    | 2 +-
 include/jemalloc/internal/pa.h                        | 2 +-
 include/jemalloc/internal/pages.h                     | 2 +-
 src/arena.c                                           | 4 ++--
 src/base.c                                            | 2 +-
 src/extent_dss.c                                      | 2 +-
 src/hpa_hooks.c                                       | 2 +-
 src/jemalloc.c                                        | 8 ++++----
 src/nstime.c                                          | 2 +-
 src/pa.c                                              | 2 +-
 src/pages.c                                           | 2 +-
 src/prof.c                                            | 8 ++++----
 src/prof_log.c                                        | 2 +-
 src/prof_sys.c                                        | 2 --
 src/stats.c                                           | 4 ++--
 test/unit/hpa.c                                       | 2 +-
 21 files changed, 30 insertions(+), 33 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 2d82ad8f..8e323639 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -18,11 +18,10 @@ extern ssize_t opt_dirty_decay_ms;
 extern ssize_t opt_muzzy_decay_ms;
 
 extern percpu_arena_mode_t opt_percpu_arena;
-extern const char *percpu_arena_mode_names[];
+extern const char *const percpu_arena_mode_names[];
 
 extern div_info_t arena_binind_div_info[SC_NBINS];
 
-extern malloc_mutex_t arenas_lock;
 extern emap_t arena_emap_global;
 
 extern size_t opt_oversize_threshold;
diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h
index 9b2c9fb1..23207563 100644
--- a/include/jemalloc/internal/base.h
+++ b/include/jemalloc/internal/base.h
@@ -23,7 +23,7 @@ typedef enum metadata_thp_mode_e metadata_thp_mode_t;
 
 #define METADATA_THP_DEFAULT metadata_thp_disabled
 extern metadata_thp_mode_t opt_metadata_thp;
-extern const char *metadata_thp_mode_names[];
+extern const char *const metadata_thp_mode_names[];
 
 
 /* Embedded at the beginning of every block of base-managed virtual memory. */
diff --git a/include/jemalloc/internal/extent_dss.h b/include/jemalloc/internal/extent_dss.h
index e8f02ce2..38f04340 100644
--- a/include/jemalloc/internal/extent_dss.h
+++ b/include/jemalloc/internal/extent_dss.h
@@ -11,7 +11,7 @@ typedef enum {
 #define DSS_PREC_DEFAULT dss_prec_secondary
 #define DSS_DEFAULT "secondary"
 
-extern const char *dss_prec_names[];
+extern const char *const dss_prec_names[];
 
 extern const char *opt_dss;
 
diff --git a/include/jemalloc/internal/hpa_hooks.h b/include/jemalloc/internal/hpa_hooks.h
index 4ea221cb..841f529e 100644
--- a/include/jemalloc/internal/hpa_hooks.h
+++ b/include/jemalloc/internal/hpa_hooks.h
@@ -12,6 +12,6 @@ struct hpa_hooks_s {
 	uint64_t (*ms_since)(nstime_t *r_time);
 };
 
-extern hpa_hooks_t hpa_hooks_default;
+extern const hpa_hooks_t hpa_hooks_default;
 
 #endif /* JEMALLOC_INTERNAL_HPA_HOOKS_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index d90f6ddb..b1e5bde9 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -22,8 +22,8 @@ extern sec_opts_t opt_hpa_sec_opts;
 extern const char *opt_junk;
 extern bool opt_junk_alloc;
 extern bool opt_junk_free;
-extern void (*junk_free_callback)(void *ptr, size_t size);
-extern void (*junk_alloc_callback)(void *ptr, size_t size);
+extern void (*JET_MUTABLE junk_free_callback)(void *ptr, size_t size);
+extern void (*JET_MUTABLE junk_alloc_callback)(void *ptr, size_t size);
 extern bool opt_utrace;
 extern bool opt_xmalloc;
 extern bool opt_experimental_infallible_new;
@@ -31,7 +31,7 @@ extern bool opt_zero;
 extern unsigned opt_narenas;
 extern zero_realloc_action_t opt_zero_realloc_action;
 extern malloc_init_t malloc_init_state;
-extern const char *zero_realloc_mode_names[];
+extern const char *const zero_realloc_mode_names[];
 extern atomic_zu_t zero_realloc_count;
 extern bool opt_cache_oblivious;
 extern unsigned opt_debug_double_free_max_scan;
diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h
index 486e5cca..ad1ae532 100644
--- a/include/jemalloc/internal/nstime.h
+++ b/include/jemalloc/internal/nstime.h
@@ -56,7 +56,7 @@ enum prof_time_res_e {
 typedef enum prof_time_res_e prof_time_res_t;
 
 extern prof_time_res_t opt_prof_time_res;
-extern const char *prof_time_res_mode_names[];
+extern const char *const prof_time_res_mode_names[];
 
 JEMALLOC_ALWAYS_INLINE void
 nstime_init_zero(nstime_t *time) {
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 4748a05b..c8aed932 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -131,7 +131,7 @@ pa_shard_ehooks_get(pa_shard_t *shard) {
 
 /* Returns true on error. */
 bool pa_central_init(pa_central_t *central, base_t *base, bool hpa,
-    hpa_hooks_t *hpa_hooks);
+    const hpa_hooks_t *hpa_hooks);
 
 /* Returns true on error. */
 bool pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, pa_central_t *central,
diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index 361de587..c9d10ce2 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -102,7 +102,7 @@ typedef enum {
 #define THP_MODE_DEFAULT thp_mode_default
 extern thp_mode_t opt_thp;
 extern thp_mode_t init_system_thp_mode; /* Initial system wide state. */
-extern const char *thp_mode_names[];
+extern const char *const thp_mode_names[];
 
 void *pages_map(void *addr, size_t size, size_t alignment, bool *commit);
 void pages_unmap(void *addr, size_t size);
diff --git a/src/arena.c b/src/arena.c
index b154b7a5..a8890e57 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -21,7 +21,7 @@ JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
  * Define names for both unininitialized and initialized phases, so that
  * options and mallctl processing are straightforward.
  */
-const char *percpu_arena_mode_names[] = {
+const char *const percpu_arena_mode_names[] = {
 	"percpu",
 	"phycpu",
 	"disabled",
@@ -37,7 +37,7 @@ static atomic_zd_t dirty_decay_ms_default;
 static atomic_zd_t muzzy_decay_ms_default;
 
 emap_t arena_emap_global;
-pa_central_t arena_pa_central_global;
+static pa_central_t arena_pa_central_global;
 
 div_info_t arena_binind_div_info[SC_NBINS];
 
diff --git a/src/base.c b/src/base.c
index 7f4d6756..16f90495 100644
--- a/src/base.c
+++ b/src/base.c
@@ -22,7 +22,7 @@ static base_t *b0;
 
 metadata_thp_mode_t opt_metadata_thp = METADATA_THP_DEFAULT;
 
-const char *metadata_thp_mode_names[] = {
+const char *const metadata_thp_mode_names[] = {
 	"disabled",
 	"auto",
 	"always"
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 9a35bacf..0b846296 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -10,7 +10,7 @@
 
 const char	*opt_dss = DSS_DEFAULT;
 
-const char	*dss_prec_names[] = {
+const char	*const dss_prec_names[] = {
 	"disabled",
 	"primary",
 	"secondary",
diff --git a/src/hpa_hooks.c b/src/hpa_hooks.c
index ade581e8..6048f382 100644
--- a/src/hpa_hooks.c
+++ b/src/hpa_hooks.c
@@ -11,7 +11,7 @@ static void hpa_hooks_dehugify(void *ptr, size_t size);
 static void hpa_hooks_curtime(nstime_t *r_nstime, bool first_reading);
 static uint64_t hpa_hooks_ms_since(nstime_t *past_nstime);
 
-hpa_hooks_t hpa_hooks_default = {
+const hpa_hooks_t hpa_hooks_default = {
 	&hpa_hooks_map,
 	&hpa_hooks_unmap,
 	&hpa_hooks_purge,
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 7771a731..8de30279 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -121,7 +121,7 @@ zero_realloc_action_t opt_zero_realloc_action =
 
 atomic_zu_t zero_realloc_count = ATOMIC_INIT(0);
 
-const char *zero_realloc_mode_names[] = {
+const char *const zero_realloc_mode_names[] = {
 	"alloc",
 	"free",
 	"abort",
@@ -142,8 +142,8 @@ static void default_junk_free(void *ptr, size_t usize) {
 	memset(ptr, junk_free_byte, usize);
 }
 
-void (*junk_alloc_callback)(void *ptr, size_t size) = &default_junk_alloc;
-void (*junk_free_callback)(void *ptr, size_t size) = &default_junk_free;
+void (*JET_MUTABLE junk_alloc_callback)(void *ptr, size_t size) = &default_junk_alloc;
+void (*JET_MUTABLE junk_free_callback)(void *ptr, size_t size) = &default_junk_free;
 
 bool	opt_utrace = false;
 bool	opt_xmalloc = false;
@@ -158,7 +158,7 @@ unsigned opt_debug_double_free_max_scan =
     SAFETY_CHECK_DOUBLE_FREE_MAX_SCAN_DEFAULT;
 
 /* Protects arenas initialization. */
-malloc_mutex_t arenas_lock;
+static malloc_mutex_t arenas_lock;
 
 /* The global hpa, and whether it's on. */
 bool opt_hpa = false;
diff --git a/src/nstime.c b/src/nstime.c
index a1a53777..7fb9100e 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -228,7 +228,7 @@ nstime_monotonic_t *JET_MUTABLE nstime_monotonic = nstime_monotonic_impl;
 prof_time_res_t opt_prof_time_res =
 	prof_time_res_default;
 
-const char *prof_time_res_mode_names[] = {
+const char *const prof_time_res_mode_names[] = {
 	"default",
 	"high",
 };
diff --git a/src/pa.c b/src/pa.c
index 18c850d7..63eef2b5 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -17,7 +17,7 @@ pa_nactive_sub(pa_shard_t *shard, size_t sub_pages) {
 
 bool
 pa_central_init(pa_central_t *central, base_t *base, bool hpa,
-    hpa_hooks_t *hpa_hooks) {
+    const hpa_hooks_t *hpa_hooks) {
 	bool err;
 	if (hpa) {
 		err = hpa_central_init(&central->hpa, base, hpa_hooks);
diff --git a/src/pages.c b/src/pages.c
index e70c6e92..249d7c5b 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -42,7 +42,7 @@ static int	mmap_flags;
 #endif
 static bool	os_overcommits;
 
-const char *thp_mode_names[] = {
+const char *const thp_mode_names[] = {
 	"default",
 	"always",
 	"never",
diff --git a/src/prof.c b/src/prof.c
index e958349e..9986a329 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -73,16 +73,16 @@ static malloc_mutex_t next_thr_uid_mtx;
 bool prof_booted = false;
 
 /* Logically a prof_backtrace_hook_t. */
-atomic_p_t prof_backtrace_hook;
+static atomic_p_t prof_backtrace_hook;
 
 /* Logically a prof_dump_hook_t. */
-atomic_p_t prof_dump_hook;
+static atomic_p_t prof_dump_hook;
 
 /* Logically a prof_sample_hook_t. */
-atomic_p_t prof_sample_hook;
+static atomic_p_t prof_sample_hook;
 
 /* Logically a prof_sample_free_hook_t. */
-atomic_p_t prof_sample_free_hook;
+static atomic_p_t prof_sample_free_hook;
 
 /******************************************************************************/
 
diff --git a/src/prof_log.c b/src/prof_log.c
index 384d5e38..f4000aec 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -25,7 +25,7 @@ enum prof_logging_state_e {
  * - started: log_start called, log_stop not called yet. Allocations are logged.
  * - dumping: log_stop called but not finished; samples are not logged anymore.
  */
-prof_logging_state_t prof_logging_state = prof_logging_state_stopped;
+static prof_logging_state_t prof_logging_state = prof_logging_state_stopped;
 
 /* Used in unit tests. */
 static bool prof_log_dummy = false;
diff --git a/src/prof_sys.c b/src/prof_sys.c
index 467394a5..1f8ecb62 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -27,8 +27,6 @@
 
 malloc_mutex_t prof_dump_filename_mtx;
 
-bool prof_do_mock = false;
-
 static uint64_t prof_dump_seq;
 static uint64_t prof_dump_iseq;
 static uint64_t prof_dump_mseq;
diff --git a/src/stats.c b/src/stats.c
index 59db4f8e..d80af226 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -9,13 +9,13 @@
 #include "jemalloc/internal/mutex_prof.h"
 #include "jemalloc/internal/prof_stats.h"
 
-const char *global_mutex_names[mutex_prof_num_global_mutexes] = {
+static const char *const global_mutex_names[mutex_prof_num_global_mutexes] = {
 #define OP(mtx) #mtx,
 	MUTEX_PROF_GLOBAL_MUTEXES
 #undef OP
 };
 
-const char *arena_mutex_names[mutex_prof_num_arena_mutexes] = {
+static const char *const arena_mutex_names[mutex_prof_num_arena_mutexes] = {
 #define OP(mtx) #mtx,
 	MUTEX_PROF_ARENA_MUTEXES
 #undef OP
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index f7874281..64aef59e 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -37,7 +37,7 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
 };
 
 static hpa_shard_t *
-create_test_data(hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
+create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
 	bool err;
 	base_t *base = base_new(TSDN_NULL, /* ind */ SHARD_IND,
 	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);

From 5711dc31d87c5aa5b4dd17a0bda850516a45ae53 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Thu, 6 Jul 2023 16:51:51 -0700
Subject: [PATCH 2310/2608] Only enable `-Wstrict-prototypes` in CI to unbreak
 feature detection

Adding `-Wstrict-prototypes` to the default `CFLAGS` in PR #2473 had the
non-obvious side-effect of breaking configure-time feature detection,
because the [test-program `autoconf` generates for feature
detection](https://www.gnu.org/software/autoconf/manual/autoconf-2.67/html_node/Generating-Sources.html#:~:text=main%20())
defines `main` as:
```c
int main()
```
Which causes all feature checks to fail, since this triggers
`-Wstrict-prototypes` and the feature checks use `-Werror`.

Resolved by only adding `-Wstrict-prototypes` to
`EXTRA_{CFLAGS,CXXFLAGS}` in CI, since these flags are not used during
feature detection and we control which compiler is used.
---
 configure.ac                   | 1 -
 scripts/run_static_analysis.sh | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index 9686ac53..687b221c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -262,7 +262,6 @@ if test "x$GCC" = "xyes" ; then
   dnl This one too.
   JE_CFLAGS_ADD([-Wno-missing-field-initializers])
   JE_CFLAGS_ADD([-Wno-missing-attributes])
-  JE_CFLAGS_ADD([-Wstrict-prototypes])
   JE_CFLAGS_ADD([-pipe])
   JE_CFLAGS_ADD([-g3])
 elif test "x$je_cv_msvc" = "xyes" ; then
diff --git a/scripts/run_static_analysis.sh b/scripts/run_static_analysis.sh
index db870689..68ceae55 100755
--- a/scripts/run_static_analysis.sh
+++ b/scripts/run_static_analysis.sh
@@ -13,7 +13,7 @@ compile_time_malloc_conf='background_thread:true,'\
 'prof_unbias:false,'\
 'prof_time_resolution:high'
 
-./autogen.sh \
+EXTRA_CFLAGS='-Wstrict-prototypes' EXTRA_CXXFLAGS='-Wstrict-prototypes' ./autogen.sh \
 	--with-private-namespace=jemalloc_ \
 	--disable-cache-oblivious \
 	--enable-prof \

From 1d9e9c2ed6f0cb3bf168c0d602ae0a289ee27093 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Thu, 6 Jul 2023 16:27:56 -0700
Subject: [PATCH 2311/2608] Fix inconsistent parameter names between
 definition/declaration pairs

For the sake of consistency, function definitions and their
corresponding declarations should use the same names for parameters.
I've enabled this check in static analysis to prevent this issue from
occurring again in the future.
---
 include/jemalloc/internal/arena_externs.h  |  4 ++--
 include/jemalloc/internal/bin.h            |  2 +-
 include/jemalloc/internal/hook.h           |  2 +-
 include/jemalloc/internal/hpdata.h         |  2 +-
 include/jemalloc/internal/tcache_externs.h | 14 +++++++-------
 include/jemalloc/internal/tsd.h            |  2 +-
 scripts/run_static_analysis.sh             | 15 ++++++++++++---
 7 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 8e323639..22d7fff7 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -47,9 +47,9 @@ edata_t *arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena,
 void arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena,
     edata_t *edata);
 void arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena,
-    edata_t *edata, size_t oldsize);
+    edata_t *edata, size_t oldusize);
 void arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena,
-    edata_t *edata, size_t oldsize);
+    edata_t *edata, size_t oldusize);
 bool arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, extent_state_t state,
     ssize_t decay_ms);
 ssize_t arena_decay_ms_get(arena_t *arena, extent_state_t state);
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index 63f97395..027af088 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -48,7 +48,7 @@ struct bins_s {
 	bin_t *bin_shards;
 };
 
-void bin_shard_sizes_boot(unsigned bin_shards[SC_NBINS]);
+void bin_shard_sizes_boot(unsigned bin_shard_sizes[SC_NBINS]);
 bool bin_update_shard_size(unsigned bin_shards[SC_NBINS], size_t start_size,
     size_t end_size, size_t nshards);
 
diff --git a/include/jemalloc/internal/hook.h b/include/jemalloc/internal/hook.h
index 41157207..27f94841 100644
--- a/include/jemalloc/internal/hook.h
+++ b/include/jemalloc/internal/hook.h
@@ -146,7 +146,7 @@ struct hook_ralloc_args_s {
  */
 bool hook_boot(void);
 
-void *hook_install(tsdn_t *tsdn, hooks_t *hooks);
+void *hook_install(tsdn_t *tsdn, hooks_t *to_install);
 /* Uninstalls the hook with the handle previously returned from hook_install. */
 void hook_remove(tsdn_t *tsdn, void *opaque);
 
diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 5bf7aae8..36918258 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -359,7 +359,7 @@ void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age);
  * offset within that allocation.
  */
 void *hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz);
-void hpdata_unreserve(hpdata_t *hpdata, void *begin, size_t sz);
+void hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz);
 
 /*
  * The hpdata_purge_prepare_t allows grabbing the metadata required to purge
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index a2ab7101..37f61646 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -36,14 +36,14 @@ extern tcaches_t	*tcaches;
 
 size_t tcache_salloc(tsdn_t *tsdn, const void *ptr);
 void *tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
-    cache_bin_t *tbin, szind_t binind, bool *tcache_success);
+    cache_bin_t *cache_bin, szind_t binind, bool *tcache_success);
 
-void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
-    szind_t binind, unsigned rem);
-void tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
-    szind_t binind, unsigned rem);
-void tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, cache_bin_t *bin,
-    szind_t binind, bool is_small);
+void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache,
+    cache_bin_t *cache_bin, szind_t binind, unsigned rem);
+void tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache,
+    cache_bin_t *cache_bin, szind_t binind, unsigned rem);
+void tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache,
+    cache_bin_t *cache_bin, szind_t binind, bool is_small);
 void tcache_arena_reassociate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
     tcache_t *tcache, arena_t *arena);
 tcache_t *tcache_create_explicit(tsd_t *tsd);
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 3dd52247..c2f432e8 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -164,7 +164,7 @@ void malloc_tsd_dalloc(void *wrapper);
 tsd_t *malloc_tsd_boot0(void);
 void malloc_tsd_boot1(void);
 void tsd_cleanup(void *arg);
-tsd_t *tsd_fetch_slow(tsd_t *tsd, bool internal);
+tsd_t *tsd_fetch_slow(tsd_t *tsd, bool minimal);
 void tsd_state_set(tsd_t *tsd, uint8_t new_state);
 void tsd_slow_update(tsd_t *tsd);
 void tsd_prefork(tsd_t *tsd);
diff --git a/scripts/run_static_analysis.sh b/scripts/run_static_analysis.sh
index 68ceae55..4994fe64 100755
--- a/scripts/run_static_analysis.sh
+++ b/scripts/run_static_analysis.sh
@@ -27,7 +27,7 @@ EXTRA_CFLAGS='-Wstrict-prototypes' EXTRA_CXXFLAGS='-Wstrict-prototypes' ./autoge
 	               # otherwise you'll get tons of warnings for things
 	               # that are already covered by `assert`s.
 
-bear -- make -s -j $(nproc)
+bear -- make -s -j "$(nproc)"
 # We end up with lots of duplicate entries in the compilation database, one for
 # each output file type (e.g. .o, .d, .sym, etc.). There must be exactly one
 # entry for each file in the compilation database in order for
@@ -35,9 +35,18 @@ bear -- make -s -j $(nproc)
 jq '[.[] | select(.output | test("/[^./]*\\.o$"))]' compile_commands.json > compile_commands.json.tmp
 mv compile_commands.json.tmp compile_commands.json
 
-CC_ANALYZERS_FROM_PATH=1 CodeChecker analyze compile_commands.json --jobs $(nproc) \
+# CodeChecker has a bug where it freaks out if you supply the skipfile via process substitution,
+# so we resort to manually creating a temporary file
+skipfile=$(mktemp)
+# The single-quotes are deliberate here, you want `$skipfile` to be evaluated upon exit
+trap 'rm -f $skipfile' EXIT
+echo '-**/stdlib.h' > "$skipfile"
+CC_ANALYZERS_FROM_PATH=1 CodeChecker analyze compile_commands.json --jobs "$(nproc)" \
 	--ctu --compile-uniqueing strict --output static_analysis_raw_results \
-	--analyzers clang-tidy clangsa
+	--analyzers clangsa clang-tidy --skip "$skipfile" \
+	--enable readability-inconsistent-declaration-parameter-name
+	# `--enable` is additive, the vast majority of the checks we want are
+	# enabled by default.
 
 html_output_dir="${1:-static_analysis_results}"
 result=${2:-/dev/null}

From 65d3b5989b0afa44f0703bc1ca81f2ba74ed90a5 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Thu, 8 Jun 2023 12:56:16 -0700
Subject: [PATCH 2312/2608] Print test error messages in color when stderr is a
 terminal

When stderr is a terminal and supports color, print error messages
from tests in red to make them stand out from the surrounding output.
---
 test/test.sh.in | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/test/test.sh.in b/test/test.sh.in
index 39302fff..b4fbb355 100644
--- a/test/test.sh.in
+++ b/test/test.sh.in
@@ -63,8 +63,14 @@ for t in $@; do
       fail_count=$((fail_count+1))
       ;;
     *)
-      echo "Test harness error: ${t} w/ MALLOC_CONF=\"${MALLOC_CONF}\"" 1>&2
-      echo "Use prefix to debug, e.g. JEMALLOC_TEST_PREFIX=\"gdb --args\" sh test/test.sh ${t}" 1>&2
+      color_start=''
+      color_end=''
+      if [ -t 2 ] && tput colors >/dev/null 2>&1; then
+        color_start='\033[31m'
+        color_end='\033[0m'
+      fi
+      printf "${color_start}Test harness error: %s w/ MALLOC_CONF=\"%s\"${color_end}\n" "${t}" "${MALLOC_CONF}" 1>&2
+      printf "${color_start}Use prefix to debug, e.g. JEMALLOC_TEST_PREFIX=\"gdb --args\" sh test/test.sh %s${color_end}\n" "${t}" 1>&2
       exit 1
   esac
 done

From 314c073a38adfbfc97ed2913e287e8e642fc46ca Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Mon, 10 Jul 2023 14:25:53 -0700
Subject: [PATCH 2313/2608] Print the failed assertion before aborting in test
 cases

This makes it faster and easier to debug, so that you don't need to fire
up a debugger just to see which assertion triggered in a failing test.
---
 test/include/test/test.h | 38 +++++++-------------------------------
 test/src/test.c          |  5 ++++-
 2 files changed, 11 insertions(+), 32 deletions(-)

diff --git a/test/include/test/test.h b/test/include/test/test.h
index 183dfc00..80ca7cbb 100644
--- a/test/include/test/test.h
+++ b/test/include/test/test.h
@@ -13,11 +13,7 @@
 		    __func__, __FILE__, __LINE__,			\
 		    #a, #b, a_, b_);					\
 		malloc_snprintf(message, sizeof(message), __VA_ARGS__);	\
-		if (may_abort) {					\
-			abort();					\
-		} else {						\
-			p_test_fail(prefix, message);			\
-		}							\
+		p_test_fail(may_abort, prefix, message);		\
 	}								\
 } while (0)
 
@@ -230,11 +226,7 @@
 		    #a, #b, a_ ? "true" : "false",			\
 		    b_ ? "true" : "false");				\
 		malloc_snprintf(message, sizeof(message), __VA_ARGS__);	\
-		if (may_abort) {					\
-			abort();					\
-		} else {						\
-			p_test_fail(prefix, message);			\
-		}							\
+		p_test_fail(may_abort, prefix, message);		\
 	}								\
 } while (0)
 
@@ -251,11 +243,7 @@
 		    #a, #b, a_ ? "true" : "false",			\
 		    b_ ? "true" : "false");				\
 		malloc_snprintf(message, sizeof(message), __VA_ARGS__);	\
-		if (may_abort) {					\
-			abort();					\
-		} else {						\
-			p_test_fail(prefix, message);			\
-		}							\
+		p_test_fail(may_abort, prefix, message);		\
 	}								\
 } while (0)
 
@@ -275,11 +263,7 @@
 		    "\"%s\" differs from \"%s\": ",			\
 		    __func__, __FILE__, __LINE__, #a, #b, a, b);	\
 		malloc_snprintf(message, sizeof(message), __VA_ARGS__);	\
-		if (may_abort) {					\
-			abort();					\
-		} else {						\
-			p_test_fail(prefix, message);			\
-		}							\
+		p_test_fail(may_abort, prefix, message);		\
 	}								\
 } while (0)
 
@@ -293,11 +277,7 @@
 		    "\"%s\" same as \"%s\": ",				\
 		    __func__, __FILE__, __LINE__, #a, #b, a, b);	\
 		malloc_snprintf(message, sizeof(message), __VA_ARGS__);	\
-		if (may_abort) {					\
-			abort();					\
-		} else {						\
-			p_test_fail(prefix, message);			\
-		}							\
+		p_test_fail(may_abort, prefix, message);		\
 	}								\
 } while (0)
 
@@ -311,11 +291,7 @@
 	    "%s:%s:%d: Unreachable code reached: ",			\
 	    __func__, __FILE__, __LINE__);				\
 	malloc_snprintf(message, sizeof(message), __VA_ARGS__);		\
-	if (may_abort) {						\
-		abort();						\
-	} else {							\
-		p_test_fail(prefix, message);				\
-	}								\
+	p_test_fail(may_abort, prefix, message);			\
 } while (0)
 
 #define expect_not_reached(...) verify_not_reached(false, __VA_ARGS__)
@@ -580,4 +556,4 @@ test_status_t	p_test_no_reentrancy(test_t *t, ...);
 test_status_t	p_test_no_malloc_init(test_t *t, ...);
 void	p_test_init(const char *name);
 void	p_test_fini(void);
-void	p_test_fail(const char *prefix, const char *message);
+void	p_test_fail(bool may_abort, const char *prefix, const char *message);
diff --git a/test/src/test.c b/test/src/test.c
index e26dbfd4..8b69d74a 100644
--- a/test/src/test.c
+++ b/test/src/test.c
@@ -228,7 +228,10 @@ p_test_no_malloc_init(test_t *t, ...) {
 }
 
 void
-p_test_fail(const char *prefix, const char *message) {
+p_test_fail(bool may_abort, const char *prefix, const char *message) {
 	malloc_cprintf(NULL, NULL, "%s%s\n", prefix, message);
 	test_status = test_status_fail;
+	if (may_abort) {
+		abort();
+	}
 }

From 36ca0c1b7de5fc92e6be48f73f28a6dce0e8890e Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Wed, 12 Jul 2023 09:57:46 -0700
Subject: [PATCH 2314/2608] Stop concealing pointer provenance in
 `phn_link_get`

At least for LLVM, [casting from an integer to a pointer hides provenance information](https://clang.llvm.org/extra/clang-tidy/checks/performance/no-int-to-ptr.html)
and inhibits optimizations. Here's a [Godbolt link](https://godbolt.org/z/5bYPcKoWT)
showing how this change removes a couple unnecessary branches in
`phn_merge_siblings`, which is a very hot function. Canary profiles show
only minor improvements (since most of the cost of this function is in
cache misses), but there's no reason we shouldn't take it.
---
 include/jemalloc/internal/ph.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index 1fabee5d..89de8663 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -73,7 +73,7 @@ struct ph_s {
 
 JEMALLOC_ALWAYS_INLINE phn_link_t *
 phn_link_get(void *phn, size_t offset) {
-	return (phn_link_t *)(((uintptr_t)phn) + offset);
+	return (phn_link_t *)(((char *)phn) + offset);
 }
 
 JEMALLOC_ALWAYS_INLINE void

From 856db56f6ec54f59491fa7897dab9a23d5bf9ff4 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Mon, 12 Jun 2023 16:05:18 -0700
Subject: [PATCH 2315/2608] Move tsd implementation details into
 `tsd_internals.h`

This is a prerequisite to achieving self-contained headers. Previously,
the various tsd implementation headers (`tsd_generic.h`,
`tsd_tls.h`, `tsd_malloc_thread_cleanup.h`, and `tsd_win.h`) relied
implicitly on being included in `tsd.h` after a variety of dependencies
had been defined above them. This commit instead makes these
dependencies explicit by splitting them out into a separate file,
`tsd_internals.h`, which each of the tsd implementation headers includes
directly.
---
 include/jemalloc/internal/tsd.h               | 298 -----------------
 include/jemalloc/internal/tsd_generic.h       |   2 +
 include/jemalloc/internal/tsd_internals.h     | 303 ++++++++++++++++++
 .../internal/tsd_malloc_thread_cleanup.h      |   2 +
 include/jemalloc/internal/tsd_tls.h           |   2 +
 include/jemalloc/internal/tsd_win.h           |   2 +
 6 files changed, 311 insertions(+), 298 deletions(-)
 create mode 100644 include/jemalloc/internal/tsd_internals.h

diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index c2f432e8..e36ffc6d 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -1,304 +1,6 @@
 #ifndef JEMALLOC_INTERNAL_TSD_H
 #define JEMALLOC_INTERNAL_TSD_H
 
-#include "jemalloc/internal/activity_callback.h"
-#include "jemalloc/internal/arena_types.h"
-#include "jemalloc/internal/assert.h"
-#include "jemalloc/internal/bin_types.h"
-#include "jemalloc/internal/jemalloc_internal_externs.h"
-#include "jemalloc/internal/peak.h"
-#include "jemalloc/internal/prof_types.h"
-#include "jemalloc/internal/ql.h"
-#include "jemalloc/internal/rtree_tsd.h"
-#include "jemalloc/internal/tcache_types.h"
-#include "jemalloc/internal/tcache_structs.h"
-#include "jemalloc/internal/util.h"
-#include "jemalloc/internal/witness.h"
-
-/*
- * Thread-Specific-Data layout
- *
- * At least some thread-local data gets touched on the fast-path of almost all
- * malloc operations.  But much of it is only necessary down slow-paths, or
- * testing.  We want to colocate the fast-path data so that it can live on the
- * same cacheline if possible.  So we define three tiers of hotness:
- * TSD_DATA_FAST: Touched on the alloc/dalloc fast paths.
- * TSD_DATA_SLOW: Touched down slow paths.  "Slow" here is sort of general;
- *     there are "semi-slow" paths like "not a sized deallocation, but can still
- *     live in the tcache".  We'll want to keep these closer to the fast-path
- *     data.
- * TSD_DATA_SLOWER: Only touched in test or debug modes, or not touched at all.
- *
- * An additional concern is that the larger tcache bins won't be used (we have a
- * bin per size class, but by default only cache relatively small objects).  So
- * the earlier bins are in the TSD_DATA_FAST tier, but the later ones are in the
- * TSD_DATA_SLOWER tier.
- *
- * As a result of all this, we put the slow data first, then the fast data, then
- * the slower data, while keeping the tcache as the last element of the fast
- * data (so that the fast -> slower transition happens midway through the
- * tcache).  While we don't yet play alignment tricks to guarantee it, this
- * increases our odds of getting some cache/page locality on fast paths.
- */
-
-#ifdef JEMALLOC_JET
-typedef void (*test_callback_t)(int *);
-#  define MALLOC_TSD_TEST_DATA_INIT 0x72b65c10
-#  define MALLOC_TEST_TSD \
-    O(test_data,		int,			int)		\
-    O(test_callback,		test_callback_t,	int)
-#  define MALLOC_TEST_TSD_INITIALIZER , MALLOC_TSD_TEST_DATA_INIT, NULL
-#else
-#  define MALLOC_TEST_TSD
-#  define MALLOC_TEST_TSD_INITIALIZER
-#endif
-
-typedef ql_elm(tsd_t) tsd_link_t;
-
-/*  O(name,			type,			nullable type) */
-#define TSD_DATA_SLOW							\
-    O(tcache_enabled,		bool,			bool)		\
-    O(reentrancy_level,		int8_t,			int8_t)		\
-    O(min_init_state_nfetched,		uint8_t,	uint8_t)	\
-    O(thread_allocated_last_event,	uint64_t,	uint64_t)	\
-    O(thread_allocated_next_event,	uint64_t,	uint64_t)	\
-    O(thread_deallocated_last_event,	uint64_t,	uint64_t)	\
-    O(thread_deallocated_next_event,	uint64_t,	uint64_t)	\
-    O(tcache_gc_event_wait,	uint64_t,		uint64_t)	\
-    O(tcache_gc_dalloc_event_wait,	uint64_t,	uint64_t)	\
-    O(prof_sample_event_wait,	uint64_t,		uint64_t)	\
-    O(prof_sample_last_event,	uint64_t,		uint64_t)	\
-    O(stats_interval_event_wait,	uint64_t,	uint64_t)	\
-    O(stats_interval_last_event,	uint64_t,	uint64_t)	\
-    O(peak_alloc_event_wait,	uint64_t,		uint64_t)	\
-    O(peak_dalloc_event_wait,	uint64_t,	uint64_t)		\
-    O(prof_tdata,		prof_tdata_t *,		prof_tdata_t *)	\
-    O(prng_state,		uint64_t,		uint64_t)	\
-    O(san_extents_until_guard_small,	uint64_t,	uint64_t)	\
-    O(san_extents_until_guard_large,	uint64_t,	uint64_t)	\
-    O(iarena,			arena_t *,		arena_t *)	\
-    O(arena,			arena_t *,		arena_t *)	\
-    O(arena_decay_ticker,	ticker_geom_t,		ticker_geom_t)	\
-    O(sec_shard,		uint8_t,		uint8_t)	\
-    O(binshards,		tsd_binshards_t,	tsd_binshards_t)\
-    O(tsd_link,			tsd_link_t,		tsd_link_t)	\
-    O(in_hook,			bool,			bool)		\
-    O(peak,			peak_t,			peak_t)		\
-    O(activity_callback_thunk,	activity_callback_thunk_t,		\
-	activity_callback_thunk_t)					\
-    O(tcache_slow,		tcache_slow_t,		tcache_slow_t)	\
-    O(rtree_ctx,		rtree_ctx_t,		rtree_ctx_t)
-
-#define TSD_DATA_SLOW_INITIALIZER					\
-    /* tcache_enabled */	TCACHE_ENABLED_ZERO_INITIALIZER,	\
-    /* reentrancy_level */	0,					\
-    /* min_init_state_nfetched */	0,				\
-    /* thread_allocated_last_event */	0,				\
-    /* thread_allocated_next_event */	0,				\
-    /* thread_deallocated_last_event */	0,				\
-    /* thread_deallocated_next_event */	0,				\
-    /* tcache_gc_event_wait */		0,				\
-    /* tcache_gc_dalloc_event_wait */	0,				\
-    /* prof_sample_event_wait */	0,				\
-    /* prof_sample_last_event */	0,				\
-    /* stats_interval_event_wait */	0,				\
-    /* stats_interval_last_event */	0,				\
-    /* peak_alloc_event_wait */		0,				\
-    /* peak_dalloc_event_wait */	0,				\
-    /* prof_tdata */		NULL,					\
-    /* prng_state */		0,					\
-    /* san_extents_until_guard_small */	0,				\
-    /* san_extents_until_guard_large */	0,				\
-    /* iarena */		NULL,					\
-    /* arena */			NULL,					\
-    /* arena_decay_ticker */						\
-	TICKER_GEOM_INIT(ARENA_DECAY_NTICKS_PER_UPDATE),		\
-    /* sec_shard */		(uint8_t)-1,				\
-    /* binshards */		TSD_BINSHARDS_ZERO_INITIALIZER,		\
-    /* tsd_link */		{NULL},					\
-    /* in_hook */		false,					\
-    /* peak */			PEAK_INITIALIZER,			\
-    /* activity_callback_thunk */					\
-	ACTIVITY_CALLBACK_THUNK_INITIALIZER,				\
-    /* tcache_slow */		TCACHE_SLOW_ZERO_INITIALIZER,		\
-    /* rtree_ctx */		RTREE_CTX_INITIALIZER,
-
-/*  O(name,			type,			nullable type) */
-#define TSD_DATA_FAST							\
-    O(thread_allocated,		uint64_t,		uint64_t)	\
-    O(thread_allocated_next_event_fast,	uint64_t,	uint64_t)	\
-    O(thread_deallocated,	uint64_t,		uint64_t)	\
-    O(thread_deallocated_next_event_fast, uint64_t,	uint64_t)	\
-    O(tcache,			tcache_t,		tcache_t)
-
-#define TSD_DATA_FAST_INITIALIZER					\
-    /* thread_allocated */	0,					\
-    /* thread_allocated_next_event_fast */ 0, 				\
-    /* thread_deallocated */	0,					\
-    /* thread_deallocated_next_event_fast */	0,			\
-    /* tcache */		TCACHE_ZERO_INITIALIZER,
-
-/*  O(name,			type,			nullable type) */
-#define TSD_DATA_SLOWER							\
-    O(witness_tsd,              witness_tsd_t,		witness_tsdn_t)	\
-    MALLOC_TEST_TSD
-
-#define TSD_DATA_SLOWER_INITIALIZER					\
-    /* witness */		WITNESS_TSD_INITIALIZER			\
-    /* test data */		MALLOC_TEST_TSD_INITIALIZER
-
-
-#define TSD_INITIALIZER {						\
-    				TSD_DATA_SLOW_INITIALIZER		\
-    /* state */			ATOMIC_INIT(tsd_state_uninitialized),	\
-    				TSD_DATA_FAST_INITIALIZER		\
-    				TSD_DATA_SLOWER_INITIALIZER		\
-}
-
-#if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
-void _malloc_tsd_cleanup_register(bool (*f)(void));
-#endif
-
-void *malloc_tsd_malloc(size_t size);
-void malloc_tsd_dalloc(void *wrapper);
-tsd_t *malloc_tsd_boot0(void);
-void malloc_tsd_boot1(void);
-void tsd_cleanup(void *arg);
-tsd_t *tsd_fetch_slow(tsd_t *tsd, bool minimal);
-void tsd_state_set(tsd_t *tsd, uint8_t new_state);
-void tsd_slow_update(tsd_t *tsd);
-void tsd_prefork(tsd_t *tsd);
-void tsd_postfork_parent(tsd_t *tsd);
-void tsd_postfork_child(tsd_t *tsd);
-
-/*
- * Call ..._inc when your module wants to take all threads down the slow paths,
- * and ..._dec when it no longer needs to.
- */
-void tsd_global_slow_inc(tsdn_t *tsdn);
-void tsd_global_slow_dec(tsdn_t *tsdn);
-bool tsd_global_slow(void);
-
-#define TSD_MIN_INIT_STATE_MAX_FETCHED (128)
-
-enum {
-	/* Common case --> jnz. */
-	tsd_state_nominal = 0,
-	/* Initialized but on slow path. */
-	tsd_state_nominal_slow = 1,
-	/*
-	 * Some thread has changed global state in such a way that all nominal
-	 * threads need to recompute their fast / slow status the next time they
-	 * get a chance.
-	 *
-	 * Any thread can change another thread's status *to* recompute, but
-	 * threads are the only ones who can change their status *from*
-	 * recompute.
-	 */
-	tsd_state_nominal_recompute = 2,
-	/*
-	 * The above nominal states should be lower values.  We use
-	 * tsd_nominal_max to separate nominal states from threads in the
-	 * process of being born / dying.
-	 */
-	tsd_state_nominal_max = 2,
-
-	/*
-	 * A thread might free() during its death as its only allocator action;
-	 * in such scenarios, we need tsd, but set up in such a way that no
-	 * cleanup is necessary.
-	 */
-	tsd_state_minimal_initialized = 3,
-	/* States during which we know we're in thread death. */
-	tsd_state_purgatory = 4,
-	tsd_state_reincarnated = 5,
-	/*
-	 * What it says on the tin; tsd that hasn't been initialized.  Note
-	 * that even when the tsd struct lives in TLS, when need to keep track
-	 * of stuff like whether or not our pthread destructors have been
-	 * scheduled, so this really truly is different than the nominal state.
-	 */
-	tsd_state_uninitialized = 6
-};
-
-/*
- * Some TSD accesses can only be done in a nominal state.  To enforce this, we
- * wrap TSD member access in a function that asserts on TSD state, and mangle
- * field names to prevent touching them accidentally.
- */
-#define TSD_MANGLE(n) cant_access_tsd_items_directly_use_a_getter_or_setter_##n
-
-#ifdef JEMALLOC_U8_ATOMICS
-#  define tsd_state_t atomic_u8_t
-#  define tsd_atomic_load atomic_load_u8
-#  define tsd_atomic_store atomic_store_u8
-#  define tsd_atomic_exchange atomic_exchange_u8
-#else
-#  define tsd_state_t atomic_u32_t
-#  define tsd_atomic_load atomic_load_u32
-#  define tsd_atomic_store atomic_store_u32
-#  define tsd_atomic_exchange atomic_exchange_u32
-#endif
-
-/* The actual tsd. */
-struct tsd_s {
-	/*
-	 * The contents should be treated as totally opaque outside the tsd
-	 * module.  Access any thread-local state through the getters and
-	 * setters below.
-	 */
-
-#define O(n, t, nt)							\
-	t TSD_MANGLE(n);
-
-	TSD_DATA_SLOW
-	/*
-	 * We manually limit the state to just a single byte.  Unless the 8-bit
-	 * atomics are unavailable (which is rare).
-	 */
-	tsd_state_t state;
-	TSD_DATA_FAST
-	TSD_DATA_SLOWER
-#undef O
-};
-
-JEMALLOC_ALWAYS_INLINE uint8_t
-tsd_state_get(tsd_t *tsd) {
-	/*
-	 * This should be atomic.  Unfortunately, compilers right now can't tell
-	 * that this can be done as a memory comparison, and forces a load into
-	 * a register that hurts fast-path performance.
-	 */
-	/* return atomic_load_u8(&tsd->state, ATOMIC_RELAXED); */
-	return *(uint8_t *)&tsd->state;
-}
-
-/*
- * Wrapper around tsd_t that makes it possible to avoid implicit conversion
- * between tsd_t and tsdn_t, where tsdn_t is "nullable" and has to be
- * explicitly converted to tsd_t, which is non-nullable.
- */
-struct tsdn_s {
-	tsd_t tsd;
-};
-#define TSDN_NULL ((tsdn_t *)0)
-JEMALLOC_ALWAYS_INLINE tsdn_t *
-tsd_tsdn(tsd_t *tsd) {
-	return (tsdn_t *)tsd;
-}
-
-JEMALLOC_ALWAYS_INLINE bool
-tsdn_null(const tsdn_t *tsdn) {
-	return tsdn == NULL;
-}
-
-JEMALLOC_ALWAYS_INLINE tsd_t *
-tsdn_tsd(tsdn_t *tsdn) {
-	assert(!tsdn_null(tsdn));
-
-	return &tsdn->tsd;
-}
-
 /*
  * We put the platform-specific data declarations and inlines into their own
  * header files to avoid cluttering this file.  They define tsd_boot0,
diff --git a/include/jemalloc/internal/tsd_generic.h b/include/jemalloc/internal/tsd_generic.h
index a718472f..c5648f63 100644
--- a/include/jemalloc/internal/tsd_generic.h
+++ b/include/jemalloc/internal/tsd_generic.h
@@ -3,6 +3,8 @@
 #endif
 #define JEMALLOC_INTERNAL_TSD_GENERIC_H
 
+#include "jemalloc/internal/tsd_internals.h"
+
 typedef struct tsd_init_block_s tsd_init_block_t;
 struct tsd_init_block_s {
 	ql_elm(tsd_init_block_t) link;
diff --git a/include/jemalloc/internal/tsd_internals.h b/include/jemalloc/internal/tsd_internals.h
new file mode 100644
index 00000000..813580c0
--- /dev/null
+++ b/include/jemalloc/internal/tsd_internals.h
@@ -0,0 +1,303 @@
+#ifdef JEMALLOC_INTERNAL_TSD_INTERNALS_H
+#error This file should be included only once, by one of tsd_malloc_thread_cleanup.h, tsd_tls.h, tsd_generic.h, or tsd_win.h
+#endif
+#define JEMALLOC_INTERNAL_TSD_INTERNALS_H
+
+#include "jemalloc/internal/activity_callback.h"
+#include "jemalloc/internal/arena_types.h"
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/bin_types.h"
+#include "jemalloc/internal/jemalloc_internal_externs.h"
+#include "jemalloc/internal/peak.h"
+#include "jemalloc/internal/prof_types.h"
+#include "jemalloc/internal/ql.h"
+#include "jemalloc/internal/rtree_tsd.h"
+#include "jemalloc/internal/tcache_types.h"
+#include "jemalloc/internal/tcache_structs.h"
+#include "jemalloc/internal/util.h"
+#include "jemalloc/internal/witness.h"
+#include "jemalloc/internal/tsd_types.h"
+
+/*
+ * Thread-Specific-Data layout
+ *
+ * At least some thread-local data gets touched on the fast-path of almost all
+ * malloc operations.  But much of it is only necessary down slow-paths, or
+ * testing.  We want to colocate the fast-path data so that it can live on the
+ * same cacheline if possible.  So we define three tiers of hotness:
+ * TSD_DATA_FAST: Touched on the alloc/dalloc fast paths.
+ * TSD_DATA_SLOW: Touched down slow paths.  "Slow" here is sort of general;
+ *     there are "semi-slow" paths like "not a sized deallocation, but can still
+ *     live in the tcache".  We'll want to keep these closer to the fast-path
+ *     data.
+ * TSD_DATA_SLOWER: Only touched in test or debug modes, or not touched at all.
+ *
+ * An additional concern is that the larger tcache bins won't be used (we have a
+ * bin per size class, but by default only cache relatively small objects).  So
+ * the earlier bins are in the TSD_DATA_FAST tier, but the later ones are in the
+ * TSD_DATA_SLOWER tier.
+ *
+ * As a result of all this, we put the slow data first, then the fast data, then
+ * the slower data, while keeping the tcache as the last element of the fast
+ * data (so that the fast -> slower transition happens midway through the
+ * tcache).  While we don't yet play alignment tricks to guarantee it, this
+ * increases our odds of getting some cache/page locality on fast paths.
+ */
+
+#ifdef JEMALLOC_JET
+typedef void (*test_callback_t)(int *);
+#  define MALLOC_TSD_TEST_DATA_INIT 0x72b65c10
+#  define MALLOC_TEST_TSD \
+    O(test_data,		int,			int)		\
+    O(test_callback,		test_callback_t,	int)
+#  define MALLOC_TEST_TSD_INITIALIZER , MALLOC_TSD_TEST_DATA_INIT, NULL
+#else
+#  define MALLOC_TEST_TSD
+#  define MALLOC_TEST_TSD_INITIALIZER
+#endif
+
+typedef ql_elm(tsd_t) tsd_link_t;
+
+/*  O(name,			type,			nullable type) */
+#define TSD_DATA_SLOW							\
+    O(tcache_enabled,		bool,			bool)		\
+    O(reentrancy_level,		int8_t,			int8_t)		\
+    O(min_init_state_nfetched,		uint8_t,	uint8_t)	\
+    O(thread_allocated_last_event,	uint64_t,	uint64_t)	\
+    O(thread_allocated_next_event,	uint64_t,	uint64_t)	\
+    O(thread_deallocated_last_event,	uint64_t,	uint64_t)	\
+    O(thread_deallocated_next_event,	uint64_t,	uint64_t)	\
+    O(tcache_gc_event_wait,	uint64_t,		uint64_t)	\
+    O(tcache_gc_dalloc_event_wait,	uint64_t,	uint64_t)	\
+    O(prof_sample_event_wait,	uint64_t,		uint64_t)	\
+    O(prof_sample_last_event,	uint64_t,		uint64_t)	\
+    O(stats_interval_event_wait,	uint64_t,	uint64_t)	\
+    O(stats_interval_last_event,	uint64_t,	uint64_t)	\
+    O(peak_alloc_event_wait,	uint64_t,		uint64_t)	\
+    O(peak_dalloc_event_wait,	uint64_t,	uint64_t)		\
+    O(prof_tdata,		prof_tdata_t *,		prof_tdata_t *)	\
+    O(prng_state,		uint64_t,		uint64_t)	\
+    O(san_extents_until_guard_small,	uint64_t,	uint64_t)	\
+    O(san_extents_until_guard_large,	uint64_t,	uint64_t)	\
+    O(iarena,			arena_t *,		arena_t *)	\
+    O(arena,			arena_t *,		arena_t *)	\
+    O(arena_decay_ticker,	ticker_geom_t,		ticker_geom_t)	\
+    O(sec_shard,		uint8_t,		uint8_t)	\
+    O(binshards,		tsd_binshards_t,	tsd_binshards_t)\
+    O(tsd_link,			tsd_link_t,		tsd_link_t)	\
+    O(in_hook,			bool,			bool)		\
+    O(peak,			peak_t,			peak_t)		\
+    O(activity_callback_thunk,	activity_callback_thunk_t,		\
+	activity_callback_thunk_t)					\
+    O(tcache_slow,		tcache_slow_t,		tcache_slow_t)	\
+    O(rtree_ctx,		rtree_ctx_t,		rtree_ctx_t)
+
+#define TSD_DATA_SLOW_INITIALIZER					\
+    /* tcache_enabled */	TCACHE_ENABLED_ZERO_INITIALIZER,	\
+    /* reentrancy_level */	0,					\
+    /* min_init_state_nfetched */	0,				\
+    /* thread_allocated_last_event */	0,				\
+    /* thread_allocated_next_event */	0,				\
+    /* thread_deallocated_last_event */	0,				\
+    /* thread_deallocated_next_event */	0,				\
+    /* tcache_gc_event_wait */		0,				\
+    /* tcache_gc_dalloc_event_wait */	0,				\
+    /* prof_sample_event_wait */	0,				\
+    /* prof_sample_last_event */	0,				\
+    /* stats_interval_event_wait */	0,				\
+    /* stats_interval_last_event */	0,				\
+    /* peak_alloc_event_wait */		0,				\
+    /* peak_dalloc_event_wait */	0,				\
+    /* prof_tdata */		NULL,					\
+    /* prng_state */		0,					\
+    /* san_extents_until_guard_small */	0,				\
+    /* san_extents_until_guard_large */	0,				\
+    /* iarena */		NULL,					\
+    /* arena */			NULL,					\
+    /* arena_decay_ticker */						\
+	TICKER_GEOM_INIT(ARENA_DECAY_NTICKS_PER_UPDATE),		\
+    /* sec_shard */		(uint8_t)-1,				\
+    /* binshards */		TSD_BINSHARDS_ZERO_INITIALIZER,		\
+    /* tsd_link */		{NULL},					\
+    /* in_hook */		false,					\
+    /* peak */			PEAK_INITIALIZER,			\
+    /* activity_callback_thunk */					\
+	ACTIVITY_CALLBACK_THUNK_INITIALIZER,				\
+    /* tcache_slow */		TCACHE_SLOW_ZERO_INITIALIZER,		\
+    /* rtree_ctx */		RTREE_CTX_INITIALIZER,
+
+/*  O(name,			type,			nullable type) */
+#define TSD_DATA_FAST							\
+    O(thread_allocated,		uint64_t,		uint64_t)	\
+    O(thread_allocated_next_event_fast,	uint64_t,	uint64_t)	\
+    O(thread_deallocated,	uint64_t,		uint64_t)	\
+    O(thread_deallocated_next_event_fast, uint64_t,	uint64_t)	\
+    O(tcache,			tcache_t,		tcache_t)
+
+#define TSD_DATA_FAST_INITIALIZER					\
+    /* thread_allocated */	0,					\
+    /* thread_allocated_next_event_fast */ 0, 				\
+    /* thread_deallocated */	0,					\
+    /* thread_deallocated_next_event_fast */	0,			\
+    /* tcache */		TCACHE_ZERO_INITIALIZER,
+
+/*  O(name,			type,			nullable type) */
+#define TSD_DATA_SLOWER							\
+    O(witness_tsd,              witness_tsd_t,		witness_tsdn_t)	\
+    MALLOC_TEST_TSD
+
+#define TSD_DATA_SLOWER_INITIALIZER					\
+    /* witness */		WITNESS_TSD_INITIALIZER			\
+    /* test data */		MALLOC_TEST_TSD_INITIALIZER
+
+
+#define TSD_INITIALIZER {						\
+    				TSD_DATA_SLOW_INITIALIZER		\
+    /* state */			ATOMIC_INIT(tsd_state_uninitialized),	\
+    				TSD_DATA_FAST_INITIALIZER		\
+    				TSD_DATA_SLOWER_INITIALIZER		\
+}
+
+#if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
+void _malloc_tsd_cleanup_register(bool (*f)(void));
+#endif
+
+void *malloc_tsd_malloc(size_t size);
+void malloc_tsd_dalloc(void *wrapper);
+tsd_t *malloc_tsd_boot0(void);
+void malloc_tsd_boot1(void);
+void tsd_cleanup(void *arg);
+tsd_t *tsd_fetch_slow(tsd_t *tsd, bool minimal);
+void tsd_state_set(tsd_t *tsd, uint8_t new_state);
+void tsd_slow_update(tsd_t *tsd);
+void tsd_prefork(tsd_t *tsd);
+void tsd_postfork_parent(tsd_t *tsd);
+void tsd_postfork_child(tsd_t *tsd);
+
+/*
+ * Call ..._inc when your module wants to take all threads down the slow paths,
+ * and ..._dec when it no longer needs to.
+ */
+void tsd_global_slow_inc(tsdn_t *tsdn);
+void tsd_global_slow_dec(tsdn_t *tsdn);
+bool tsd_global_slow(void);
+
+#define TSD_MIN_INIT_STATE_MAX_FETCHED (128)
+
+enum {
+	/* Common case --> jnz. */
+	tsd_state_nominal = 0,
+	/* Initialized but on slow path. */
+	tsd_state_nominal_slow = 1,
+	/*
+	 * Some thread has changed global state in such a way that all nominal
+	 * threads need to recompute their fast / slow status the next time they
+	 * get a chance.
+	 *
+	 * Any thread can change another thread's status *to* recompute, but
+	 * threads are the only ones who can change their status *from*
+	 * recompute.
+	 */
+	tsd_state_nominal_recompute = 2,
+	/*
+	 * The above nominal states should be lower values.  We use
+	 * tsd_nominal_max to separate nominal states from threads in the
+	 * process of being born / dying.
+	 */
+	tsd_state_nominal_max = 2,
+
+	/*
+	 * A thread might free() during its death as its only allocator action;
+	 * in such scenarios, we need tsd, but set up in such a way that no
+	 * cleanup is necessary.
+	 */
+	tsd_state_minimal_initialized = 3,
+	/* States during which we know we're in thread death. */
+	tsd_state_purgatory = 4,
+	tsd_state_reincarnated = 5,
+	/*
+	 * What it says on the tin; tsd that hasn't been initialized.  Note
+	 * that even when the tsd struct lives in TLS, when need to keep track
+	 * of stuff like whether or not our pthread destructors have been
+	 * scheduled, so this really truly is different than the nominal state.
+	 */
+	tsd_state_uninitialized = 6
+};
+
+/*
+ * Some TSD accesses can only be done in a nominal state.  To enforce this, we
+ * wrap TSD member access in a function that asserts on TSD state, and mangle
+ * field names to prevent touching them accidentally.
+ */
+#define TSD_MANGLE(n) cant_access_tsd_items_directly_use_a_getter_or_setter_##n
+
+#ifdef JEMALLOC_U8_ATOMICS
+#  define tsd_state_t atomic_u8_t
+#  define tsd_atomic_load atomic_load_u8
+#  define tsd_atomic_store atomic_store_u8
+#  define tsd_atomic_exchange atomic_exchange_u8
+#else
+#  define tsd_state_t atomic_u32_t
+#  define tsd_atomic_load atomic_load_u32
+#  define tsd_atomic_store atomic_store_u32
+#  define tsd_atomic_exchange atomic_exchange_u32
+#endif
+
+/* The actual tsd. */
+struct tsd_s {
+	/*
+	 * The contents should be treated as totally opaque outside the tsd
+	 * module.  Access any thread-local state through the getters and
+	 * setters below.
+	 */
+
+#define O(n, t, nt)							\
+	t TSD_MANGLE(n);
+
+	TSD_DATA_SLOW
+	/*
+	 * We manually limit the state to just a single byte.  Unless the 8-bit
+	 * atomics are unavailable (which is rare).
+	 */
+	tsd_state_t state;
+	TSD_DATA_FAST
+	TSD_DATA_SLOWER
+#undef O
+};
+
+JEMALLOC_ALWAYS_INLINE uint8_t
+tsd_state_get(tsd_t *tsd) {
+	/*
+	 * This should be atomic.  Unfortunately, compilers right now can't tell
+	 * that this can be done as a memory comparison, and forces a load into
+	 * a register that hurts fast-path performance.
+	 */
+	/* return atomic_load_u8(&tsd->state, ATOMIC_RELAXED); */
+	return *(uint8_t *)&tsd->state;
+}
+
+/*
+ * Wrapper around tsd_t that makes it possible to avoid implicit conversion
+ * between tsd_t and tsdn_t, where tsdn_t is "nullable" and has to be
+ * explicitly converted to tsd_t, which is non-nullable.
+ */
+struct tsdn_s {
+	tsd_t tsd;
+};
+#define TSDN_NULL ((tsdn_t *)0)
+JEMALLOC_ALWAYS_INLINE tsdn_t *
+tsd_tsdn(tsd_t *tsd) {
+	return (tsdn_t *)tsd;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsdn_null(const tsdn_t *tsdn) {
+	return tsdn == NULL;
+}
+
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsdn_tsd(tsdn_t *tsdn) {
+	assert(!tsdn_null(tsdn));
+
+	return &tsdn->tsd;
+}
diff --git a/include/jemalloc/internal/tsd_malloc_thread_cleanup.h b/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
index d8f3ef13..a6bd3f58 100644
--- a/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
+++ b/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
@@ -3,6 +3,8 @@
 #endif
 #define JEMALLOC_INTERNAL_TSD_MALLOC_THREAD_CLEANUP_H
 
+#include "jemalloc/internal/tsd_internals.h"
+
 #define JEMALLOC_TSD_TYPE_ATTR(type) __thread type JEMALLOC_TLS_MODEL
 
 extern JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls;
diff --git a/include/jemalloc/internal/tsd_tls.h b/include/jemalloc/internal/tsd_tls.h
index 7d6c805b..cd0ddbd1 100644
--- a/include/jemalloc/internal/tsd_tls.h
+++ b/include/jemalloc/internal/tsd_tls.h
@@ -3,6 +3,8 @@
 #endif
 #define JEMALLOC_INTERNAL_TSD_TLS_H
 
+#include "jemalloc/internal/tsd_internals.h"
+
 #define JEMALLOC_TSD_TYPE_ATTR(type) __thread type JEMALLOC_TLS_MODEL
 
 extern JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls;
diff --git a/include/jemalloc/internal/tsd_win.h b/include/jemalloc/internal/tsd_win.h
index a91dac88..4efeffb6 100644
--- a/include/jemalloc/internal/tsd_win.h
+++ b/include/jemalloc/internal/tsd_win.h
@@ -3,6 +3,8 @@
 #endif
 #define JEMALLOC_INTERNAL_TSD_WIN_H
 
+#include "jemalloc/internal/tsd_internals.h"
+
 typedef struct {
 	bool initialized;
 	tsd_t val;

From 41e0b857bef0b787a581c7a8334b46981d5e06ed Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Fri, 9 Jun 2023 17:37:47 -0700
Subject: [PATCH 2316/2608] Make headers self-contained by fixing `#include`s
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Header files are now self-contained, which makes the relationships
between the files clearer, and crucially allows LSP tools like `clangd`
to function correctly in all of our header files. I have verified that
the headers are self-contained (aside from the various Windows shims) by
compiling them as if they were C files – in a follow-up commit I plan to
add this to CI to ensure we don't regress on this front.
---
 include/jemalloc/internal/activity_callback.h           | 2 ++
 include/jemalloc/internal/arena_externs.h               | 3 +++
 include/jemalloc/internal/arena_inlines_a.h             | 3 +++
 include/jemalloc/internal/arena_inlines_b.h             | 8 ++++++++
 include/jemalloc/internal/arena_stats.h                 | 1 +
 include/jemalloc/internal/arena_structs.h               | 1 +
 include/jemalloc/internal/arena_types.h                 | 1 +
 include/jemalloc/internal/assert.h                      | 1 +
 include/jemalloc/internal/atomic.h                      | 4 +++-
 include/jemalloc/internal/atomic_c11.h                  | 1 +
 include/jemalloc/internal/atomic_gcc_atomic.h           | 5 +++++
 include/jemalloc/internal/atomic_gcc_sync.h             | 6 ++++++
 include/jemalloc/internal/atomic_msvc.h                 | 6 ++++++
 include/jemalloc/internal/background_thread_externs.h   | 5 +++++
 include/jemalloc/internal/background_thread_inlines.h   | 5 +++++
 include/jemalloc/internal/background_thread_structs.h   | 3 +++
 include/jemalloc/internal/base.h                        | 1 +
 include/jemalloc/internal/bin.h                         | 1 +
 include/jemalloc/internal/bin_info.h                    | 1 +
 include/jemalloc/internal/bin_stats.h                   | 1 +
 include/jemalloc/internal/bin_types.h                   | 1 +
 include/jemalloc/internal/bit_util.h                    | 1 +
 include/jemalloc/internal/bitmap.h                      | 1 +
 include/jemalloc/internal/buf_writer.h                  | 4 ++++
 include/jemalloc/internal/cache_bin.h                   | 2 ++
 include/jemalloc/internal/ckh.h                         | 1 +
 include/jemalloc/internal/counter.h                     | 2 ++
 include/jemalloc/internal/ctl.h                         | 4 ++++
 include/jemalloc/internal/decay.h                       | 2 ++
 include/jemalloc/internal/div.h                         | 1 +
 include/jemalloc/internal/ecache.h                      | 3 ++-
 include/jemalloc/internal/edata.h                       | 2 ++
 include/jemalloc/internal/edata_cache.h                 | 1 +
 include/jemalloc/internal/ehooks.h                      | 3 +++
 include/jemalloc/internal/emap.h                        | 1 +
 include/jemalloc/internal/emitter.h                     | 4 ++++
 include/jemalloc/internal/eset.h                        | 3 ++-
 include/jemalloc/internal/exp_grow.h                    | 2 ++
 include/jemalloc/internal/extent.h                      | 2 ++
 include/jemalloc/internal/extent_dss.h                  | 4 ++++
 include/jemalloc/internal/extent_mmap.h                 | 2 ++
 include/jemalloc/internal/fb.h                          | 4 ++++
 include/jemalloc/internal/fxp.h                         | 3 +++
 include/jemalloc/internal/hash.h                        | 1 +
 include/jemalloc/internal/hook.h                        | 1 +
 include/jemalloc/internal/hpa.h                         | 5 +++++
 include/jemalloc/internal/hpa_hooks.h                   | 3 +++
 include/jemalloc/internal/hpa_opts.h                    | 1 +
 include/jemalloc/internal/hpdata.h                      | 3 +++
 include/jemalloc/internal/inspect.h                     | 3 +++
 include/jemalloc/internal/jemalloc_internal_externs.h   | 3 ++-
 include/jemalloc/internal/jemalloc_internal_inlines_a.h | 4 ++++
 include/jemalloc/internal/jemalloc_internal_inlines_b.h | 3 +++
 include/jemalloc/internal/jemalloc_internal_inlines_c.h | 6 ++++--
 include/jemalloc/internal/jemalloc_preamble.h.in        | 2 +-
 include/jemalloc/internal/large_externs.h               | 2 ++
 include/jemalloc/internal/lockedint.h                   | 5 +++++
 include/jemalloc/internal/log.h                         | 1 +
 include/jemalloc/internal/malloc_io.h                   | 1 +
 include/jemalloc/internal/mpsc_queue.h                  | 1 +
 include/jemalloc/internal/mutex.h                       | 1 +
 include/jemalloc/internal/mutex_prof.h                  | 1 +
 include/jemalloc/internal/nstime.h                      | 3 +++
 include/jemalloc/internal/pa.h                          | 1 +
 include/jemalloc/internal/pac.h                         | 6 +++++-
 include/jemalloc/internal/pages.h                       | 2 ++
 include/jemalloc/internal/pai.h                         | 4 ++++
 include/jemalloc/internal/peak.h                        | 2 ++
 include/jemalloc/internal/peak_event.h                  | 3 +++
 include/jemalloc/internal/ph.h                          | 4 ++++
 include/jemalloc/internal/prng.h                        | 1 +
 include/jemalloc/internal/prof_data.h                   | 1 +
 include/jemalloc/internal/prof_externs.h                | 2 ++
 include/jemalloc/internal/prof_hook.h                   | 2 ++
 include/jemalloc/internal/prof_inlines.h                | 6 +++++-
 include/jemalloc/internal/prof_log.h                    | 1 +
 include/jemalloc/internal/prof_recent.h                 | 4 ++++
 include/jemalloc/internal/prof_stats.h                  | 3 +++
 include/jemalloc/internal/prof_structs.h                | 1 +
 include/jemalloc/internal/prof_sys.h                    | 4 ++++
 include/jemalloc/internal/psset.h                       | 1 +
 include/jemalloc/internal/ql.h                          | 1 +
 include/jemalloc/internal/rb.h                          | 3 +++
 include/jemalloc/internal/rtree.h                       | 3 +++
 include/jemalloc/internal/rtree_tsd.h                   | 2 ++
 include/jemalloc/internal/safety_check.h                | 6 ++++--
 include/jemalloc/internal/san.h                         | 3 +++
 include/jemalloc/internal/san_bump.h                    | 2 ++
 include/jemalloc/internal/sc.h                          | 1 +
 include/jemalloc/internal/sec.h                         | 4 ++++
 include/jemalloc/internal/sec_opts.h                    | 2 ++
 include/jemalloc/internal/seq.h                         | 1 +
 include/jemalloc/internal/slab_data.h                   | 1 +
 include/jemalloc/internal/spin.h                        | 2 ++
 include/jemalloc/internal/stats.h                       | 4 ++++
 include/jemalloc/internal/sz.h                          | 1 +
 include/jemalloc/internal/tcache_externs.h              | 6 ++++++
 include/jemalloc/internal/tcache_inlines.h              | 5 +++++
 include/jemalloc/internal/tcache_structs.h              | 3 ++-
 include/jemalloc/internal/tcache_types.h                | 1 +
 include/jemalloc/internal/test_hooks.h                  | 2 ++
 include/jemalloc/internal/thread_event.h                | 1 +
 include/jemalloc/internal/ticker.h                      | 1 +
 include/jemalloc/internal/tsd.h                         | 1 +
 include/jemalloc/internal/tsd_generic.h                 | 3 +++
 include/jemalloc/internal/tsd_internals.h               | 5 +++--
 include/jemalloc/internal/tsd_malloc_thread_cleanup.h   | 2 ++
 include/jemalloc/internal/tsd_tls.h                     | 2 ++
 include/jemalloc/internal/tsd_types.h                   | 2 ++
 include/jemalloc/internal/tsd_win.h                     | 2 ++
 include/jemalloc/internal/util.h                        | 3 +++
 include/jemalloc/internal/witness.h                     | 2 ++
 include/jemalloc/jemalloc_protos.h.in                   | 3 +++
 test/include/test/jemalloc_test.h.in                    | 9 +--------
 test/integration/MALLOCX_ARENA.c                        | 8 --------
 test/integration/allocated.c                            | 8 --------
 116 files changed, 277 insertions(+), 38 deletions(-)

diff --git a/include/jemalloc/internal/activity_callback.h b/include/jemalloc/internal/activity_callback.h
index 6c2e84e3..0f4f3962 100644
--- a/include/jemalloc/internal/activity_callback.h
+++ b/include/jemalloc/internal/activity_callback.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_ACTIVITY_CALLBACK_H
 #define JEMALLOC_INTERNAL_ACTIVITY_CALLBACK_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+
 /*
  * The callback to be executed "periodically", in response to some amount of
  * allocator activity.
diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 22d7fff7..d79b607a 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -1,8 +1,11 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_EXTERNS_H
 #define JEMALLOC_INTERNAL_ARENA_EXTERNS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/arena_stats.h"
 #include "jemalloc/internal/bin.h"
 #include "jemalloc/internal/div.h"
+#include "jemalloc/internal/emap.h"
 #include "jemalloc/internal/extent_dss.h"
 #include "jemalloc/internal/hook.h"
 #include "jemalloc/internal/pages.h"
diff --git a/include/jemalloc/internal/arena_inlines_a.h b/include/jemalloc/internal/arena_inlines_a.h
index 8568358c..214ce80b 100644
--- a/include/jemalloc/internal/arena_inlines_a.h
+++ b/include/jemalloc/internal/arena_inlines_a.h
@@ -1,6 +1,9 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_INLINES_A_H
 #define JEMALLOC_INTERNAL_ARENA_INLINES_A_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/arena_structs.h"
+
 static inline unsigned
 arena_ind_get(const arena_t *arena) {
 	return arena->ind;
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 420a62b2..c7d08227 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -1,14 +1,22 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_INLINES_B_H
 #define JEMALLOC_INTERNAL_ARENA_INLINES_B_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/arena_externs.h"
+#include "jemalloc/internal/arena_structs.h"
 #include "jemalloc/internal/div.h"
 #include "jemalloc/internal/emap.h"
+#include "jemalloc/internal/jemalloc_internal_inlines_b.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/large_externs.h"
 #include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/prof_externs.h"
+#include "jemalloc/internal/prof_structs.h"
 #include "jemalloc/internal/rtree.h"
 #include "jemalloc/internal/safety_check.h"
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/sz.h"
+#include "jemalloc/internal/tcache_inlines.h"
 #include "jemalloc/internal/ticker.h"
 
 static inline arena_t *
diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 15f1d345..3407b023 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_STATS_H
 #define JEMALLOC_INTERNAL_ARENA_STATS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/lockedint.h"
 #include "jemalloc/internal/mutex.h"
diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index e6868fce..0fffa7eb 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_STRUCTS_H
 #define JEMALLOC_INTERNAL_ARENA_STRUCTS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/arena_stats.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/bin.h"
diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h
index 45eec69f..a1fc8926 100644
--- a/include/jemalloc/internal/arena_types.h
+++ b/include/jemalloc/internal/arena_types.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_ARENA_TYPES_H
 #define JEMALLOC_INTERNAL_ARENA_TYPES_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/sc.h"
 
 /* Default decay times in milliseconds. */
diff --git a/include/jemalloc/internal/assert.h b/include/jemalloc/internal/assert.h
index be4d45b3..38eb2a2c 100644
--- a/include/jemalloc/internal/assert.h
+++ b/include/jemalloc/internal/assert.h
@@ -1,3 +1,4 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/util.h"
 
diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h
index c0f73122..6dd2a7c6 100644
--- a/include/jemalloc/internal/atomic.h
+++ b/include/jemalloc/internal/atomic.h
@@ -1,7 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_ATOMIC_H
 #define JEMALLOC_INTERNAL_ATOMIC_H
 
-#define ATOMIC_INLINE JEMALLOC_ALWAYS_INLINE
+#include "jemalloc/internal/jemalloc_preamble.h"
 
 #define JEMALLOC_U8_ATOMICS
 #if defined(JEMALLOC_GCC_ATOMIC_ATOMICS)
@@ -22,6 +22,8 @@
 #  error "Don't have atomics implemented on this platform."
 #endif
 
+#define ATOMIC_INLINE JEMALLOC_ALWAYS_INLINE
+
 /*
  * This header gives more or less a backport of C11 atomics. The user can write
  * JEMALLOC_GENERATE_ATOMICS(type, short_type, lg_sizeof_type); to generate
diff --git a/include/jemalloc/internal/atomic_c11.h b/include/jemalloc/internal/atomic_c11.h
index a5f9313a..74173b03 100644
--- a/include/jemalloc/internal/atomic_c11.h
+++ b/include/jemalloc/internal/atomic_c11.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_ATOMIC_C11_H
 #define JEMALLOC_INTERNAL_ATOMIC_C11_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include <stdatomic.h>
 
 #define ATOMIC_INIT(...) ATOMIC_VAR_INIT(__VA_ARGS__)
diff --git a/include/jemalloc/internal/atomic_gcc_atomic.h b/include/jemalloc/internal/atomic_gcc_atomic.h
index 471515e8..0819fde1 100644
--- a/include/jemalloc/internal/atomic_gcc_atomic.h
+++ b/include/jemalloc/internal/atomic_gcc_atomic.h
@@ -1,8 +1,11 @@
 #ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_ATOMIC_H
 #define JEMALLOC_INTERNAL_ATOMIC_GCC_ATOMIC_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/assert.h"
 
+#define ATOMIC_INLINE JEMALLOC_ALWAYS_INLINE
+
 #define ATOMIC_INIT(...) {__VA_ARGS__}
 
 typedef enum {
@@ -126,4 +129,6 @@ atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val,	\
 	    atomic_enum_to_builtin(mo));				\
 }
 
+#undef ATOMIC_INLINE
+
 #endif /* JEMALLOC_INTERNAL_ATOMIC_GCC_ATOMIC_H */
diff --git a/include/jemalloc/internal/atomic_gcc_sync.h b/include/jemalloc/internal/atomic_gcc_sync.h
index e02b7cbe..21136bd0 100644
--- a/include/jemalloc/internal/atomic_gcc_sync.h
+++ b/include/jemalloc/internal/atomic_gcc_sync.h
@@ -1,6 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
 #define JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+
+#define ATOMIC_INLINE JEMALLOC_ALWAYS_INLINE
+
 #define ATOMIC_INIT(...) {__VA_ARGS__}
 
 typedef enum {
@@ -192,4 +196,6 @@ atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val,	\
 	return __sync_fetch_and_xor(&a->repr, val);			\
 }
 
+#undef ATOMIC_INLINE
+
 #endif /* JEMALLOC_INTERNAL_ATOMIC_GCC_SYNC_H */
diff --git a/include/jemalloc/internal/atomic_msvc.h b/include/jemalloc/internal/atomic_msvc.h
index 67057ce5..a429f1ab 100644
--- a/include/jemalloc/internal/atomic_msvc.h
+++ b/include/jemalloc/internal/atomic_msvc.h
@@ -1,6 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_ATOMIC_MSVC_H
 #define JEMALLOC_INTERNAL_ATOMIC_MSVC_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+
+#define ATOMIC_INLINE JEMALLOC_ALWAYS_INLINE
+
 #define ATOMIC_INIT(...) {__VA_ARGS__}
 
 typedef enum {
@@ -155,4 +159,6 @@ atomic_fetch_xor_##short_type(atomic_##short_type##_t *a,		\
 	    &a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val);		\
 }
 
+#undef ATOMIC_INLINE
+
 #endif /* JEMALLOC_INTERNAL_ATOMIC_MSVC_H */
diff --git a/include/jemalloc/internal/background_thread_externs.h b/include/jemalloc/internal/background_thread_externs.h
index 6ae3c8d8..0d34ee55 100644
--- a/include/jemalloc/internal/background_thread_externs.h
+++ b/include/jemalloc/internal/background_thread_externs.h
@@ -1,6 +1,11 @@
 #ifndef JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H
 #define JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/background_thread_structs.h"
+#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/mutex.h"
+
 extern bool opt_background_thread;
 extern size_t opt_max_background_threads;
 extern malloc_mutex_t background_thread_lock;
diff --git a/include/jemalloc/internal/background_thread_inlines.h b/include/jemalloc/internal/background_thread_inlines.h
index 92c5febe..4ed05d1b 100644
--- a/include/jemalloc/internal/background_thread_inlines.h
+++ b/include/jemalloc/internal/background_thread_inlines.h
@@ -1,6 +1,11 @@
 #ifndef JEMALLOC_INTERNAL_BACKGROUND_THREAD_INLINES_H
 #define JEMALLOC_INTERNAL_BACKGROUND_THREAD_INLINES_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/arena_inlines_a.h"
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/background_thread_externs.h"
+
 JEMALLOC_ALWAYS_INLINE bool
 background_thread_enabled(void) {
 	return atomic_load_b(&background_thread_enabled_state, ATOMIC_RELAXED);
diff --git a/include/jemalloc/internal/background_thread_structs.h b/include/jemalloc/internal/background_thread_structs.h
index 83a91984..67b68797 100644
--- a/include/jemalloc/internal/background_thread_structs.h
+++ b/include/jemalloc/internal/background_thread_structs.h
@@ -1,6 +1,9 @@
 #ifndef JEMALLOC_INTERNAL_BACKGROUND_THREAD_STRUCTS_H
 #define JEMALLOC_INTERNAL_BACKGROUND_THREAD_STRUCTS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/mutex.h"
+
 /* This file really combines "structs" and "types", but only transitionally. */
 
 #if defined(JEMALLOC_BACKGROUND_THREAD) || defined(JEMALLOC_LAZY_LOCK)
diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h
index 23207563..6b41aa6f 100644
--- a/include/jemalloc/internal/base.h
+++ b/include/jemalloc/internal/base.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_BASE_H
 #define JEMALLOC_INTERNAL_BASE_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/edata.h"
 #include "jemalloc/internal/ehooks.h"
 #include "jemalloc/internal/mutex.h"
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index 027af088..ed27c18f 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_BIN_H
 #define JEMALLOC_INTERNAL_BIN_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/bin_stats.h"
 #include "jemalloc/internal/bin_types.h"
 #include "jemalloc/internal/edata.h"
diff --git a/include/jemalloc/internal/bin_info.h b/include/jemalloc/internal/bin_info.h
index 7fe65c86..b6175550 100644
--- a/include/jemalloc/internal/bin_info.h
+++ b/include/jemalloc/internal/bin_info.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_BIN_INFO_H
 #define JEMALLOC_INTERNAL_BIN_INFO_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/bitmap.h"
 
 /*
diff --git a/include/jemalloc/internal/bin_stats.h b/include/jemalloc/internal/bin_stats.h
index 0b99297c..f95b9e9c 100644
--- a/include/jemalloc/internal/bin_stats.h
+++ b/include/jemalloc/internal/bin_stats.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_BIN_STATS_H
 #define JEMALLOC_INTERNAL_BIN_STATS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/mutex_prof.h"
 
 typedef struct bin_stats_s bin_stats_t;
diff --git a/include/jemalloc/internal/bin_types.h b/include/jemalloc/internal/bin_types.h
index 945e8326..5ec22dfd 100644
--- a/include/jemalloc/internal/bin_types.h
+++ b/include/jemalloc/internal/bin_types.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_BIN_TYPES_H
 #define JEMALLOC_INTERNAL_BIN_TYPES_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/sc.h"
 
 #define BIN_SHARDS_MAX (1 << EDATA_BITS_BINSHARD_WIDTH)
diff --git a/include/jemalloc/internal/bit_util.h b/include/jemalloc/internal/bit_util.h
index 70fa4bc9..c413a75d 100644
--- a/include/jemalloc/internal/bit_util.h
+++ b/include/jemalloc/internal/bit_util.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_BIT_UTIL_H
 #define JEMALLOC_INTERNAL_BIT_UTIL_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/assert.h"
 
 /* Sanity check. */
diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h
index dc19454d..e501da47 100644
--- a/include/jemalloc/internal/bitmap.h
+++ b/include/jemalloc/internal/bitmap.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_BITMAP_H
 #define JEMALLOC_INTERNAL_BITMAP_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/bit_util.h"
 #include "jemalloc/internal/sc.h"
 
diff --git a/include/jemalloc/internal/buf_writer.h b/include/jemalloc/internal/buf_writer.h
index 37aa6de5..fa0ac99c 100644
--- a/include/jemalloc/internal/buf_writer.h
+++ b/include/jemalloc/internal/buf_writer.h
@@ -1,6 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_BUF_WRITER_H
 #define JEMALLOC_INTERNAL_BUF_WRITER_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/tsd_types.h"
+
 /*
  * Note: when using the buffered writer, cbopaque is passed to write_cb only
  * when the buffer is flushed.  It would make a difference if cbopaque points
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index c9c8f865..218e368e 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_CACHE_BIN_H
 #define JEMALLOC_INTERNAL_CACHE_BIN_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_externs.h"
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/safety_check.h"
 #include "jemalloc/internal/sz.h"
diff --git a/include/jemalloc/internal/ckh.h b/include/jemalloc/internal/ckh.h
index 7b3850bc..8e9d7fed 100644
--- a/include/jemalloc/internal/ckh.h
+++ b/include/jemalloc/internal/ckh.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_CKH_H
 #define JEMALLOC_INTERNAL_CKH_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/tsd.h"
 
 /* Cuckoo hashing implementation.  Skip to the end for the interface. */
diff --git a/include/jemalloc/internal/counter.h b/include/jemalloc/internal/counter.h
index 79abf064..74e30701 100644
--- a/include/jemalloc/internal/counter.h
+++ b/include/jemalloc/internal/counter.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_COUNTER_H
 #define JEMALLOC_INTERNAL_COUNTER_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/lockedint.h"
 #include "jemalloc/internal/mutex.h"
 
 typedef struct counter_accum_s {
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index 63d27f8a..1d3e6140 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -1,6 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_CTL_H
 #define JEMALLOC_INTERNAL_CTL_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/arena_stats.h"
+#include "jemalloc/internal/background_thread_structs.h"
+#include "jemalloc/internal/bin_stats.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/mutex_prof.h"
diff --git a/include/jemalloc/internal/decay.h b/include/jemalloc/internal/decay.h
index cf6a9d22..74be55da 100644
--- a/include/jemalloc/internal/decay.h
+++ b/include/jemalloc/internal/decay.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_DECAY_H
 #define JEMALLOC_INTERNAL_DECAY_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/smoothstep.h"
 
 #define DECAY_UNBOUNDED_TIME_TO_PURGE ((uint64_t)-1)
diff --git a/include/jemalloc/internal/div.h b/include/jemalloc/internal/div.h
index aebae939..56d5f463 100644
--- a/include/jemalloc/internal/div.h
+++ b/include/jemalloc/internal/div.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_DIV_H
 #define JEMALLOC_INTERNAL_DIV_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/assert.h"
 
 /*
diff --git a/include/jemalloc/internal/ecache.h b/include/jemalloc/internal/ecache.h
index 71cae3e3..2bd74fde 100644
--- a/include/jemalloc/internal/ecache.h
+++ b/include/jemalloc/internal/ecache.h
@@ -1,9 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_ECACHE_H
 #define JEMALLOC_INTERNAL_ECACHE_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/eset.h"
-#include "jemalloc/internal/san.h"
 #include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/san.h"
 
 typedef struct ecache_s ecache_t;
 struct ecache_s {
diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index d2d16c46..5fe4e14d 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -1,12 +1,14 @@
 #ifndef JEMALLOC_INTERNAL_EDATA_H
 #define JEMALLOC_INTERNAL_EDATA_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/bin_info.h"
 #include "jemalloc/internal/bit_util.h"
 #include "jemalloc/internal/hpdata.h"
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/ph.h"
+#include "jemalloc/internal/prof_types.h"
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/slab_data.h"
diff --git a/include/jemalloc/internal/edata_cache.h b/include/jemalloc/internal/edata_cache.h
index 8b6c0ef7..b2c7b4f1 100644
--- a/include/jemalloc/internal/edata_cache.h
+++ b/include/jemalloc/internal/edata_cache.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_EDATA_CACHE_H
 #define JEMALLOC_INTERNAL_EDATA_CACHE_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/base.h"
 
 /* For tests only. */
diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index d583c521..947e056c 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -1,8 +1,11 @@
 #ifndef JEMALLOC_INTERNAL_EHOOKS_H
 #define JEMALLOC_INTERNAL_EHOOKS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/extent_mmap.h"
+#include "jemalloc/internal/tsd.h"
+#include "jemalloc/internal/tsd_types.h"
 
 /*
  * This module is the internal interface to the extent hooks (both
diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 847af327..08262f1f 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_EMAP_H
 #define JEMALLOC_INTERNAL_EMAP_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/base.h"
 #include "jemalloc/internal/rtree.h"
 
diff --git a/include/jemalloc/internal/emitter.h b/include/jemalloc/internal/emitter.h
index 9482f68b..bc12fe92 100644
--- a/include/jemalloc/internal/emitter.h
+++ b/include/jemalloc/internal/emitter.h
@@ -1,6 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_EMITTER_H
 #define JEMALLOC_INTERNAL_EMITTER_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/ql.h"
 
 typedef enum emitter_output_e emitter_output_t;
diff --git a/include/jemalloc/internal/eset.h b/include/jemalloc/internal/eset.h
index 4f689b47..9b7c4a89 100644
--- a/include/jemalloc/internal/eset.h
+++ b/include/jemalloc/internal/eset.h
@@ -1,9 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_ESET_H
 #define JEMALLOC_INTERNAL_ESET_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/fb.h"
 #include "jemalloc/internal/edata.h"
+#include "jemalloc/internal/fb.h"
 #include "jemalloc/internal/mutex.h"
 
 /*
diff --git a/include/jemalloc/internal/exp_grow.h b/include/jemalloc/internal/exp_grow.h
index 8566b8a4..40a1add0 100644
--- a/include/jemalloc/internal/exp_grow.h
+++ b/include/jemalloc/internal/exp_grow.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_EXP_GROW_H
 #define JEMALLOC_INTERNAL_EXP_GROW_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/sz.h"
 typedef struct exp_grow_s exp_grow_t;
 struct exp_grow_s {
 	/*
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 367793db..17feb703 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -1,8 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_EXTENT_H
 #define JEMALLOC_INTERNAL_EXTENT_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/ecache.h"
 #include "jemalloc/internal/ehooks.h"
+#include "jemalloc/internal/pac.h"
 #include "jemalloc/internal/ph.h"
 #include "jemalloc/internal/rtree.h"
 
diff --git a/include/jemalloc/internal/extent_dss.h b/include/jemalloc/internal/extent_dss.h
index 38f04340..c8e71e82 100644
--- a/include/jemalloc/internal/extent_dss.h
+++ b/include/jemalloc/internal/extent_dss.h
@@ -1,6 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_EXTENT_DSS_H
 #define JEMALLOC_INTERNAL_EXTENT_DSS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/arena_types.h"
+#include "jemalloc/internal/tsd_types.h"
+
 typedef enum {
 	dss_prec_disabled  = 0,
 	dss_prec_primary   = 1,
diff --git a/include/jemalloc/internal/extent_mmap.h b/include/jemalloc/internal/extent_mmap.h
index 55f17ee4..e6a4649e 100644
--- a/include/jemalloc/internal/extent_mmap.h
+++ b/include/jemalloc/internal/extent_mmap.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_EXTENT_MMAP_EXTERNS_H
 #define JEMALLOC_INTERNAL_EXTENT_MMAP_EXTERNS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+
 extern bool opt_retain;
 
 void *extent_alloc_mmap(void *new_addr, size_t size, size_t alignment,
diff --git a/include/jemalloc/internal/fb.h b/include/jemalloc/internal/fb.h
index 90c4091f..e38095af 100644
--- a/include/jemalloc/internal/fb.h
+++ b/include/jemalloc/internal/fb.h
@@ -1,6 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_FB_H
 #define JEMALLOC_INTERNAL_FB_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/bit_util.h"
+
 /*
  * The flat bitmap module.  This has a larger API relative to the bitmap module
  * (supporting things like backwards searches, and searching for both set and
diff --git a/include/jemalloc/internal/fxp.h b/include/jemalloc/internal/fxp.h
index 415a9828..e42425f9 100644
--- a/include/jemalloc/internal/fxp.h
+++ b/include/jemalloc/internal/fxp.h
@@ -1,6 +1,9 @@
 #ifndef JEMALLOC_INTERNAL_FXP_H
 #define JEMALLOC_INTERNAL_FXP_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/assert.h"
+
 /*
  * A simple fixed-point math implementation, supporting only unsigned values
  * (with overflow being an error).
diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h
index 7f945679..15162b94 100644
--- a/include/jemalloc/internal/hash.h
+++ b/include/jemalloc/internal/hash.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_HASH_H
 #define JEMALLOC_INTERNAL_HASH_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/assert.h"
 
 /*
diff --git a/include/jemalloc/internal/hook.h b/include/jemalloc/internal/hook.h
index 27f94841..76b9130d 100644
--- a/include/jemalloc/internal/hook.h
+++ b/include/jemalloc/internal/hook.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_HOOK_H
 #define JEMALLOC_INTERNAL_HOOK_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/tsd.h"
 
 /*
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 01fe3166..4805efaf 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -1,9 +1,14 @@
 #ifndef JEMALLOC_INTERNAL_HPA_H
 #define JEMALLOC_INTERNAL_HPA_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/edata_cache.h"
+#include "jemalloc/internal/emap.h"
 #include "jemalloc/internal/exp_grow.h"
 #include "jemalloc/internal/hpa_hooks.h"
 #include "jemalloc/internal/hpa_opts.h"
+#include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/pai.h"
 #include "jemalloc/internal/psset.h"
 
diff --git a/include/jemalloc/internal/hpa_hooks.h b/include/jemalloc/internal/hpa_hooks.h
index 841f529e..72f3a43c 100644
--- a/include/jemalloc/internal/hpa_hooks.h
+++ b/include/jemalloc/internal/hpa_hooks.h
@@ -1,6 +1,9 @@
 #ifndef JEMALLOC_INTERNAL_HPA_HOOKS_H
 #define JEMALLOC_INTERNAL_HPA_HOOKS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/nstime.h"
+
 typedef struct hpa_hooks_s hpa_hooks_t;
 struct hpa_hooks_s {
 	void *(*map)(size_t size);
diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h
index ee84fea1..6e58c86b 100644
--- a/include/jemalloc/internal/hpa_opts.h
+++ b/include/jemalloc/internal/hpa_opts.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_HPA_OPTS_H
 #define JEMALLOC_INTERNAL_HPA_OPTS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/fxp.h"
 
 /*
diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 36918258..7ba92112 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -1,7 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_HPDATA_H
 #define JEMALLOC_INTERNAL_HPDATA_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/fb.h"
+#include "jemalloc/internal/nstime.h"
+#include "jemalloc/internal/pages.h"
 #include "jemalloc/internal/ph.h"
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/typed_list.h"
diff --git a/include/jemalloc/internal/inspect.h b/include/jemalloc/internal/inspect.h
index 65fef51d..0da920ca 100644
--- a/include/jemalloc/internal/inspect.h
+++ b/include/jemalloc/internal/inspect.h
@@ -1,6 +1,9 @@
 #ifndef JEMALLOC_INTERNAL_INSPECT_H
 #define JEMALLOC_INTERNAL_INSPECT_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/tsd_types.h"
+
 /*
  * This module contains the heap introspection capabilities.  For now they are
  * exposed purely through mallctl APIs in the experimental namespace, but this
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index b1e5bde9..ae03c644 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -1,11 +1,12 @@
 #ifndef JEMALLOC_INTERNAL_EXTERNS_H
 #define JEMALLOC_INTERNAL_EXTERNS_H
 
+#include "jemalloc/internal/arena_types.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/hpa_opts.h"
+#include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/sec_opts.h"
 #include "jemalloc/internal/tsd_types.h"
-#include "jemalloc/internal/nstime.h"
 
 /* TSD checks this to set thread local slow state accordingly. */
 extern bool malloc_slow;
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index cb6d78fa..8d5e22fd 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -1,10 +1,14 @@
 #ifndef JEMALLOC_INTERNAL_INLINES_A_H
 #define JEMALLOC_INTERNAL_INLINES_A_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/arena_externs.h"
+#include "jemalloc/internal/arena_types.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/bit_util.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/sc.h"
+#include "jemalloc/internal/tcache_externs.h"
 #include "jemalloc/internal/ticker.h"
 
 JEMALLOC_ALWAYS_INLINE malloc_cpuid_t
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index 152f8a03..b2cab228 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -1,7 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_INLINES_B_H
 #define JEMALLOC_INTERNAL_INLINES_B_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/arena_inlines_a.h"
 #include "jemalloc/internal/extent.h"
+#include "jemalloc/internal/jemalloc_internal_inlines_a.h"
 
 static inline void
 percpu_arena_update(tsd_t *tsd, unsigned cpu) {
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index ae9cb0c2..1dac668a 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -1,14 +1,16 @@
 #ifndef JEMALLOC_INTERNAL_INLINES_C_H
 #define JEMALLOC_INTERNAL_INLINES_C_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/arena_externs.h"
+#include "jemalloc/internal/arena_inlines_b.h"
+#include "jemalloc/internal/emap.h"
 #include "jemalloc/internal/hook.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 #include "jemalloc/internal/log.h"
 #include "jemalloc/internal/sz.h"
 #include "jemalloc/internal/thread_event.h"
 #include "jemalloc/internal/witness.h"
-#include "jemalloc/internal/arena_externs.h"
-#include "jemalloc/internal/emap.h"
 
 /*
  * These correspond to the macros in jemalloc/jemalloc_macros.h.  Broadly, we
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index d7086302..6b55e47f 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -1,7 +1,7 @@
 #ifndef JEMALLOC_PREAMBLE_H
 #define JEMALLOC_PREAMBLE_H
 
-#include "jemalloc_internal_defs.h"
+#include "jemalloc/internal/jemalloc_internal_defs.h"
 #include "jemalloc/internal/jemalloc_internal_decls.h"
 
 #if defined(JEMALLOC_UTRACE) || defined(JEMALLOC_UTRACE_LABEL)
diff --git a/include/jemalloc/internal/large_externs.h b/include/jemalloc/internal/large_externs.h
index 8e09122d..ce9c8689 100644
--- a/include/jemalloc/internal/large_externs.h
+++ b/include/jemalloc/internal/large_externs.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_LARGE_EXTERNS_H
 #define JEMALLOC_INTERNAL_LARGE_EXTERNS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/edata.h"
 #include "jemalloc/internal/hook.h"
 
 void *large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero);
diff --git a/include/jemalloc/internal/lockedint.h b/include/jemalloc/internal/lockedint.h
index d020ebec..062dedbf 100644
--- a/include/jemalloc/internal/lockedint.h
+++ b/include/jemalloc/internal/lockedint.h
@@ -1,6 +1,11 @@
 #ifndef JEMALLOC_INTERNAL_LOCKEDINT_H
 #define JEMALLOC_INTERNAL_LOCKEDINT_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/tsd_types.h"
+
 /*
  * In those architectures that support 64-bit atomics, we use atomic updates for
  * our 64-bit values.  Otherwise, we use a plain uint64_t and synchronize
diff --git a/include/jemalloc/internal/log.h b/include/jemalloc/internal/log.h
index f39c598a..921985c8 100644
--- a/include/jemalloc/internal/log.h
+++ b/include/jemalloc/internal/log.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_LOG_H
 #define JEMALLOC_INTERNAL_LOG_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/mutex.h"
diff --git a/include/jemalloc/internal/malloc_io.h b/include/jemalloc/internal/malloc_io.h
index a375bdae..0afb0429 100644
--- a/include/jemalloc/internal/malloc_io.h
+++ b/include/jemalloc/internal/malloc_io.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_MALLOC_IO_H
 #define JEMALLOC_INTERNAL_MALLOC_IO_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 
 #ifdef _WIN32
diff --git a/include/jemalloc/internal/mpsc_queue.h b/include/jemalloc/internal/mpsc_queue.h
index 316ea9b1..d8aa624b 100644
--- a/include/jemalloc/internal/mpsc_queue.h
+++ b/include/jemalloc/internal/mpsc_queue.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_MPSC_QUEUE_H
 #define JEMALLOC_INTERNAL_MPSC_QUEUE_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/atomic.h"
 
 /*
diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index 03d3557b..46f22aec 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_MUTEX_H
 #define JEMALLOC_INTERNAL_MUTEX_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/mutex_prof.h"
 #include "jemalloc/internal/tsd.h"
diff --git a/include/jemalloc/internal/mutex_prof.h b/include/jemalloc/internal/mutex_prof.h
index 4a526a5a..14e4340b 100644
--- a/include/jemalloc/internal/mutex_prof.h
+++ b/include/jemalloc/internal/mutex_prof.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_MUTEX_PROF_H
 #define JEMALLOC_INTERNAL_MUTEX_PROF_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/tsd_types.h"
diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h
index ad1ae532..440a4d15 100644
--- a/include/jemalloc/internal/nstime.h
+++ b/include/jemalloc/internal/nstime.h
@@ -1,6 +1,9 @@
 #ifndef JEMALLOC_INTERNAL_NSTIME_H
 #define JEMALLOC_INTERNAL_NSTIME_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/assert.h"
+
 /* Maximum supported number of seconds (~584 years). */
 #define NSTIME_SEC_MAX KQU(18446744072)
 
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index c8aed932..5f43244d 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_PA_H
 #define JEMALLOC_INTERNAL_PA_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/base.h"
 #include "jemalloc/internal/decay.h"
 #include "jemalloc/internal/ecache.h"
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index 01c4e6af..0b173a58 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -1,11 +1,15 @@
 #ifndef JEMALLOC_INTERNAL_PAC_H
 #define JEMALLOC_INTERNAL_PAC_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/decay.h"
+#include "jemalloc/internal/ecache.h"
+#include "jemalloc/internal/edata_cache.h"
 #include "jemalloc/internal/exp_grow.h"
+#include "jemalloc/internal/lockedint.h"
 #include "jemalloc/internal/pai.h"
 #include "san_bump.h"
 
-
 /*
  * Page allocator classic; an implementation of the PAI interface that:
  * - Can be used for arenas with custom extent hooks.
diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index c9d10ce2..0ecc2cd0 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_PAGES_EXTERNS_H
 #define JEMALLOC_INTERNAL_PAGES_EXTERNS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+
 /* Actual operating system page size, detected during bootstrap, <= PAGE. */
 extern size_t	os_page;
 
diff --git a/include/jemalloc/internal/pai.h b/include/jemalloc/internal/pai.h
index d978cd7d..dd64ee59 100644
--- a/include/jemalloc/internal/pai.h
+++ b/include/jemalloc/internal/pai.h
@@ -1,6 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_PAI_H
 #define JEMALLOC_INTERNAL_PAI_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/edata.h"
+#include "jemalloc/internal/tsd_types.h"
+
 /* An interface for page allocation. */
 
 typedef struct pai_s pai_t;
diff --git a/include/jemalloc/internal/peak.h b/include/jemalloc/internal/peak.h
index 59da3e41..2a973cb8 100644
--- a/include/jemalloc/internal/peak.h
+++ b/include/jemalloc/internal/peak.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_PEAK_H
 #define JEMALLOC_INTERNAL_PEAK_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+
 typedef struct peak_s peak_t;
 struct peak_s {
 	/* The highest recorded peak value, after adjustment (see below). */
diff --git a/include/jemalloc/internal/peak_event.h b/include/jemalloc/internal/peak_event.h
index b808ce04..cc2a1401 100644
--- a/include/jemalloc/internal/peak_event.h
+++ b/include/jemalloc/internal/peak_event.h
@@ -1,6 +1,9 @@
 #ifndef JEMALLOC_INTERNAL_PEAK_EVENT_H
 #define JEMALLOC_INTERNAL_PEAK_EVENT_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/tsd_types.h"
+
 /*
  * While peak.h contains the simple helper struct that tracks state, this
  * contains the allocator tie-ins (and knows about tsd, the event module, etc.).
diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index 89de8663..3ae38710 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -1,6 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_PH_H
 #define JEMALLOC_INTERNAL_PH_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/bit_util.h"
+
 /*
  * A Pairing Heap implementation.
  *
diff --git a/include/jemalloc/internal/prng.h b/include/jemalloc/internal/prng.h
index 14542aa1..81060d32 100644
--- a/include/jemalloc/internal/prng.h
+++ b/include/jemalloc/internal/prng.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_PRNG_H
 #define JEMALLOC_INTERNAL_PRNG_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/bit_util.h"
 
 /*
diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
index 016b6507..43e8d7e7 100644
--- a/include/jemalloc/internal/prof_data.h
+++ b/include/jemalloc/internal/prof_data.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_PROF_DATA_H
 #define JEMALLOC_INTERNAL_PROF_DATA_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/mutex.h"
 
 extern malloc_mutex_t bt2gctx_mtx;
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index cce5c8f5..514c5804 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_PROF_EXTERNS_H
 #define JEMALLOC_INTERNAL_PROF_EXTERNS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/base.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/prof_hook.h"
 
diff --git a/include/jemalloc/internal/prof_hook.h b/include/jemalloc/internal/prof_hook.h
index 8615dc53..3c5ff8bf 100644
--- a/include/jemalloc/internal/prof_hook.h
+++ b/include/jemalloc/internal/prof_hook.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_PROF_HOOK_H
 #define JEMALLOC_INTERNAL_PROF_HOOK_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+
 /*
  * The hooks types of which are declared in this file are experimental and
  * undocumented, thus the typedefs are located in an 'internal' header.
diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h
index c0783fc1..6cb73735 100644
--- a/include/jemalloc/internal/prof_inlines.h
+++ b/include/jemalloc/internal/prof_inlines.h
@@ -1,10 +1,14 @@
 #ifndef JEMALLOC_INTERNAL_PROF_INLINES_H
 #define JEMALLOC_INTERNAL_PROF_INLINES_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/arena_inlines_b.h"
+#include "jemalloc/internal/jemalloc_internal_inlines_c.h"
+#include "jemalloc/internal/prof_externs.h"
+#include "jemalloc/internal/prof_structs.h"
 #include "jemalloc/internal/safety_check.h"
 #include "jemalloc/internal/sz.h"
 #include "jemalloc/internal/thread_event.h"
-#include "jemalloc/internal/jemalloc_internal_inlines_c.h"
 
 JEMALLOC_ALWAYS_INLINE void
 prof_active_assert(void) {
diff --git a/include/jemalloc/internal/prof_log.h b/include/jemalloc/internal/prof_log.h
index ccb557dd..0b1271c8 100644
--- a/include/jemalloc/internal/prof_log.h
+++ b/include/jemalloc/internal/prof_log.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_PROF_LOG_H
 #define JEMALLOC_INTERNAL_PROF_LOG_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/mutex.h"
 
 extern malloc_mutex_t log_mtx;
diff --git a/include/jemalloc/internal/prof_recent.h b/include/jemalloc/internal/prof_recent.h
index 959e336b..33649e6d 100644
--- a/include/jemalloc/internal/prof_recent.h
+++ b/include/jemalloc/internal/prof_recent.h
@@ -1,6 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_PROF_RECENT_H
 #define JEMALLOC_INTERNAL_PROF_RECENT_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/edata.h"
+#include "jemalloc/internal/mutex.h"
+
 extern malloc_mutex_t prof_recent_alloc_mtx;
 extern malloc_mutex_t prof_recent_dump_mtx;
 
diff --git a/include/jemalloc/internal/prof_stats.h b/include/jemalloc/internal/prof_stats.h
index 7954e82d..c4d269e5 100644
--- a/include/jemalloc/internal/prof_stats.h
+++ b/include/jemalloc/internal/prof_stats.h
@@ -1,6 +1,9 @@
 #ifndef JEMALLOC_INTERNAL_PROF_STATS_H
 #define JEMALLOC_INTERNAL_PROF_STATS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/mutex.h"
+
 typedef struct prof_stats_s prof_stats_t;
 struct prof_stats_s {
 	uint64_t req_sum;
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index 49061f02..084a549d 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_PROF_STRUCTS_H
 #define JEMALLOC_INTERNAL_PROF_STRUCTS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/ckh.h"
 #include "jemalloc/internal/edata.h"
 #include "jemalloc/internal/mutex.h"
diff --git a/include/jemalloc/internal/prof_sys.h b/include/jemalloc/internal/prof_sys.h
index 0eb50788..e6e7f06f 100644
--- a/include/jemalloc/internal/prof_sys.h
+++ b/include/jemalloc/internal/prof_sys.h
@@ -1,6 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_PROF_SYS_H
 #define JEMALLOC_INTERNAL_PROF_SYS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/mutex.h"
+
 extern malloc_mutex_t prof_dump_filename_mtx;
 extern base_t *prof_base;
 
diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index e1d64970..7e510b7f 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_PSSET_H
 #define JEMALLOC_INTERNAL_PSSET_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/hpdata.h"
 
 /*
diff --git a/include/jemalloc/internal/ql.h b/include/jemalloc/internal/ql.h
index c7f52f86..ebe69988 100644
--- a/include/jemalloc/internal/ql.h
+++ b/include/jemalloc/internal/ql.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_QL_H
 #define JEMALLOC_INTERNAL_QL_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/qr.h"
 
 /*
diff --git a/include/jemalloc/internal/rb.h b/include/jemalloc/internal/rb.h
index 343e7c13..5f2771a9 100644
--- a/include/jemalloc/internal/rb.h
+++ b/include/jemalloc/internal/rb.h
@@ -1,6 +1,9 @@
 #ifndef JEMALLOC_INTERNAL_RB_H
 #define JEMALLOC_INTERNAL_RB_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/safety_check.h"
+
 /*-
  *******************************************************************************
  *
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index 22f5f9dc..f559c94f 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -1,7 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_RTREE_H
 #define JEMALLOC_INTERNAL_RTREE_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/base.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/edata.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/rtree_tsd.h"
 #include "jemalloc/internal/sc.h"
diff --git a/include/jemalloc/internal/rtree_tsd.h b/include/jemalloc/internal/rtree_tsd.h
index e45525c5..59f18570 100644
--- a/include/jemalloc/internal/rtree_tsd.h
+++ b/include/jemalloc/internal/rtree_tsd.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_RTREE_CTX_H
 #define JEMALLOC_INTERNAL_RTREE_CTX_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+
 /*
  * Number of leafkey/leaf pairs to cache in L1 and L2 level respectively.  Each
  * entry supports an entire leaf, so the cache hit rate is typically high even
diff --git a/include/jemalloc/internal/safety_check.h b/include/jemalloc/internal/safety_check.h
index 7854c1bf..ef778dae 100644
--- a/include/jemalloc/internal/safety_check.h
+++ b/include/jemalloc/internal/safety_check.h
@@ -1,10 +1,12 @@
 #ifndef JEMALLOC_INTERNAL_SAFETY_CHECK_H
 #define JEMALLOC_INTERNAL_SAFETY_CHECK_H
 
-#define SAFETY_CHECK_DOUBLE_FREE_MAX_SCAN_DEFAULT 32
-
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/pages.h"
 
+#define SAFETY_CHECK_DOUBLE_FREE_MAX_SCAN_DEFAULT 32
+
 void safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr,
     size_t true_size, size_t input_size);
 void safety_check_fail(const char *format, ...);
diff --git a/include/jemalloc/internal/san.h b/include/jemalloc/internal/san.h
index 8813d6bb..79723965 100644
--- a/include/jemalloc/internal/san.h
+++ b/include/jemalloc/internal/san.h
@@ -1,8 +1,11 @@
 #ifndef JEMALLOC_INTERNAL_GUARD_H
 #define JEMALLOC_INTERNAL_GUARD_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/ehooks.h"
 #include "jemalloc/internal/emap.h"
+#include "jemalloc/internal/jemalloc_internal_externs.h"
+#include "jemalloc/internal/tsd.h"
 
 #define SAN_PAGE_GUARD PAGE
 #define SAN_PAGE_GUARDS_SIZE (SAN_PAGE_GUARD * 2)
diff --git a/include/jemalloc/internal/san_bump.h b/include/jemalloc/internal/san_bump.h
index 0a8e76e9..d6e9cfc5 100644
--- a/include/jemalloc/internal/san_bump.h
+++ b/include/jemalloc/internal/san_bump.h
@@ -1,9 +1,11 @@
 #ifndef JEMALLOC_INTERNAL_SAN_BUMP_H
 #define JEMALLOC_INTERNAL_SAN_BUMP_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/edata.h"
 #include "jemalloc/internal/exp_grow.h"
 #include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/witness.h"
 
 #define SBA_RETAINED_ALLOC_SIZE ((size_t)4 << 20)
 
diff --git a/include/jemalloc/internal/sc.h b/include/jemalloc/internal/sc.h
index 9bab347b..770835cc 100644
--- a/include/jemalloc/internal/sc.h
+++ b/include/jemalloc/internal/sc.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_SC_H
 #define JEMALLOC_INTERNAL_SC_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
 
 /*
diff --git a/include/jemalloc/internal/sec.h b/include/jemalloc/internal/sec.h
index fa863382..8ef1e9fb 100644
--- a/include/jemalloc/internal/sec.h
+++ b/include/jemalloc/internal/sec.h
@@ -1,8 +1,12 @@
 #ifndef JEMALLOC_INTERNAL_SEC_H
 #define JEMALLOC_INTERNAL_SEC_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/base.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/pai.h"
+#include "jemalloc/internal/sec_opts.h"
 
 /*
  * Small extent cache.
diff --git a/include/jemalloc/internal/sec_opts.h b/include/jemalloc/internal/sec_opts.h
index a3ad72fb..19ed1492 100644
--- a/include/jemalloc/internal/sec_opts.h
+++ b/include/jemalloc/internal/sec_opts.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_SEC_OPTS_H
 #define JEMALLOC_INTERNAL_SEC_OPTS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+
 /*
  * The configuration settings used by an sec_t.  Morally, this is part of the
  * SEC interface, but we put it here for header-ordering reasons.
diff --git a/include/jemalloc/internal/seq.h b/include/jemalloc/internal/seq.h
index ef2df4c6..9bb6b235 100644
--- a/include/jemalloc/internal/seq.h
+++ b/include/jemalloc/internal/seq.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_SEQ_H
 #define JEMALLOC_INTERNAL_SEQ_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/atomic.h"
 
 /*
diff --git a/include/jemalloc/internal/slab_data.h b/include/jemalloc/internal/slab_data.h
index e821863d..724c71e3 100644
--- a/include/jemalloc/internal/slab_data.h
+++ b/include/jemalloc/internal/slab_data.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_SLAB_DATA_H
 #define JEMALLOC_INTERNAL_SLAB_DATA_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/bitmap.h"
 
 typedef struct slab_data_s slab_data_t;
diff --git a/include/jemalloc/internal/spin.h b/include/jemalloc/internal/spin.h
index 6940f15e..87c400d5 100644
--- a/include/jemalloc/internal/spin.h
+++ b/include/jemalloc/internal/spin.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_SPIN_H
 #define JEMALLOC_INTERNAL_SPIN_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+
 #define SPIN_INITIALIZER {0U}
 
 typedef struct {
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index 727f7dcb..310178ea 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -1,6 +1,10 @@
 #ifndef JEMALLOC_INTERNAL_STATS_H
 #define JEMALLOC_INTERNAL_STATS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/tsd_types.h"
+
 /*  OPTION(opt,		var_name,	default,	set_value_to) */
 #define STATS_PRINT_OPTIONS						\
     OPTION('J',		json,		false,		true)		\
diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index a799cea9..955d8ec0 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_SIZE_H
 #define JEMALLOC_INTERNAL_SIZE_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/bit_util.h"
 #include "jemalloc/internal/pages.h"
 #include "jemalloc/internal/sc.h"
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 37f61646..af6fd970 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -1,6 +1,12 @@
 #ifndef JEMALLOC_INTERNAL_TCACHE_EXTERNS_H
 #define JEMALLOC_INTERNAL_TCACHE_EXTERNS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/cache_bin.h"
+#include "jemalloc/internal/sz.h"
+#include "jemalloc/internal/tcache_types.h"
+
 extern bool opt_tcache;
 extern size_t opt_tcache_max;
 extern ssize_t	opt_lg_tcache_nslots_mul;
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 2b8db0a3..b69d89ad 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -1,11 +1,16 @@
 #ifndef JEMALLOC_INTERNAL_TCACHE_INLINES_H
 #define JEMALLOC_INTERNAL_TCACHE_INLINES_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/arena_externs.h"
 #include "jemalloc/internal/bin.h"
+#include "jemalloc/internal/jemalloc_internal_inlines_b.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/large_externs.h"
 #include "jemalloc/internal/san.h"
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/sz.h"
+#include "jemalloc/internal/tcache_externs.h"
 #include "jemalloc/internal/util.h"
 
 static inline bool
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index 176d73de..75918158 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -1,11 +1,12 @@
 #ifndef JEMALLOC_INTERNAL_TCACHE_STRUCTS_H
 #define JEMALLOC_INTERNAL_TCACHE_STRUCTS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/cache_bin.h"
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/sc.h"
+#include "jemalloc/internal/tcache_types.h"
 #include "jemalloc/internal/ticker.h"
-#include "jemalloc/internal/tsd_types.h"
 
 /*
  * The tcache state is split into the slow and hot path data.  Each has a
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index 583677ea..cea86fb7 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_TCACHE_TYPES_H
 #define JEMALLOC_INTERNAL_TCACHE_TYPES_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/sc.h"
 
 typedef struct tcache_slow_s tcache_slow_t;
diff --git a/include/jemalloc/internal/test_hooks.h b/include/jemalloc/internal/test_hooks.h
index 2b90afe1..af3f2755 100644
--- a/include/jemalloc/internal/test_hooks.h
+++ b/include/jemalloc/internal/test_hooks.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_TEST_HOOKS_H
 #define JEMALLOC_INTERNAL_TEST_HOOKS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+
 extern JEMALLOC_EXPORT void (*test_hooks_arena_new_hook)(void);
 extern JEMALLOC_EXPORT void (*test_hooks_libc_hook)(void);
 
diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index 2f4e1b39..46c57ed5 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_THREAD_EVENT_H
 #define JEMALLOC_INTERNAL_THREAD_EVENT_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/tsd.h"
 
 /* "te" is short for "thread_event" */
diff --git a/include/jemalloc/internal/ticker.h b/include/jemalloc/internal/ticker.h
index de034995..dca9bd10 100644
--- a/include/jemalloc/internal/ticker.h
+++ b/include/jemalloc/internal/ticker.h
@@ -1,6 +1,7 @@
 #ifndef JEMALLOC_INTERNAL_TICKER_H
 #define JEMALLOC_INTERNAL_TICKER_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/prng.h"
 #include "jemalloc/internal/util.h"
 
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index e36ffc6d..4f22dcff 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -7,6 +7,7 @@
  * tsd_boot1, tsd_boot, tsd_booted_get, tsd_get_allocates, tsd_get, and tsd_set.
  */
 #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/tsd_malloc_thread_cleanup.h"
 #elif (defined(JEMALLOC_TLS))
 #include "jemalloc/internal/tsd_tls.h"
diff --git a/include/jemalloc/internal/tsd_generic.h b/include/jemalloc/internal/tsd_generic.h
index c5648f63..aa8042a4 100644
--- a/include/jemalloc/internal/tsd_generic.h
+++ b/include/jemalloc/internal/tsd_generic.h
@@ -3,7 +3,10 @@
 #endif
 #define JEMALLOC_INTERNAL_TSD_GENERIC_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/tsd_internals.h"
+#include "jemalloc/internal/tsd_types.h"
 
 typedef struct tsd_init_block_s tsd_init_block_t;
 struct tsd_init_block_s {
diff --git a/include/jemalloc/internal/tsd_internals.h b/include/jemalloc/internal/tsd_internals.h
index 813580c0..439f1d10 100644
--- a/include/jemalloc/internal/tsd_internals.h
+++ b/include/jemalloc/internal/tsd_internals.h
@@ -3,6 +3,7 @@
 #endif
 #define JEMALLOC_INTERNAL_TSD_INTERNALS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/activity_callback.h"
 #include "jemalloc/internal/arena_types.h"
 #include "jemalloc/internal/assert.h"
@@ -12,11 +13,11 @@
 #include "jemalloc/internal/prof_types.h"
 #include "jemalloc/internal/ql.h"
 #include "jemalloc/internal/rtree_tsd.h"
-#include "jemalloc/internal/tcache_types.h"
 #include "jemalloc/internal/tcache_structs.h"
+#include "jemalloc/internal/tcache_types.h"
+#include "jemalloc/internal/tsd_types.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/witness.h"
-#include "jemalloc/internal/tsd_types.h"
 
 /*
  * Thread-Specific-Data layout
diff --git a/include/jemalloc/internal/tsd_malloc_thread_cleanup.h b/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
index a6bd3f58..fb9ea1b4 100644
--- a/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
+++ b/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
@@ -3,7 +3,9 @@
 #endif
 #define JEMALLOC_INTERNAL_TSD_MALLOC_THREAD_CLEANUP_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/tsd_internals.h"
+#include "jemalloc/internal/tsd_types.h"
 
 #define JEMALLOC_TSD_TYPE_ATTR(type) __thread type JEMALLOC_TLS_MODEL
 
diff --git a/include/jemalloc/internal/tsd_tls.h b/include/jemalloc/internal/tsd_tls.h
index cd0ddbd1..5e5a6e5e 100644
--- a/include/jemalloc/internal/tsd_tls.h
+++ b/include/jemalloc/internal/tsd_tls.h
@@ -3,7 +3,9 @@
 #endif
 #define JEMALLOC_INTERNAL_TSD_TLS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/tsd_internals.h"
+#include "jemalloc/internal/tsd_types.h"
 
 #define JEMALLOC_TSD_TYPE_ATTR(type) __thread type JEMALLOC_TLS_MODEL
 
diff --git a/include/jemalloc/internal/tsd_types.h b/include/jemalloc/internal/tsd_types.h
index a6ae37da..73bbe486 100644
--- a/include/jemalloc/internal/tsd_types.h
+++ b/include/jemalloc/internal/tsd_types.h
@@ -3,6 +3,8 @@
 
 #define MALLOC_TSD_CLEANUPS_MAX	4
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+
 typedef struct tsd_s tsd_t;
 typedef struct tsdn_s tsdn_t;
 typedef bool (*malloc_tsd_cleanup_t)(void);
diff --git a/include/jemalloc/internal/tsd_win.h b/include/jemalloc/internal/tsd_win.h
index 4efeffb6..8ec7eda7 100644
--- a/include/jemalloc/internal/tsd_win.h
+++ b/include/jemalloc/internal/tsd_win.h
@@ -3,7 +3,9 @@
 #endif
 #define JEMALLOC_INTERNAL_TSD_WIN_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/tsd_internals.h"
+#include "jemalloc/internal/tsd_types.h"
 
 typedef struct {
 	bool initialized;
diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index dcb1c0a5..939f3891 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -1,6 +1,9 @@
 #ifndef JEMALLOC_INTERNAL_UTIL_H
 #define JEMALLOC_INTERNAL_UTIL_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_types.h"
+
 #define UTIL_INLINE static inline
 
 /* Junk fill patterns. */
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index fbe5f943..937ca2d5 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -1,6 +1,8 @@
 #ifndef JEMALLOC_INTERNAL_WITNESS_H
 #define JEMALLOC_INTERNAL_WITNESS_H
 
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/ql.h"
 
 /******************************************************************************/
diff --git a/include/jemalloc/jemalloc_protos.h.in b/include/jemalloc/jemalloc_protos.h.in
index 3f9fc848..aff2d88f 100644
--- a/include/jemalloc/jemalloc_protos.h.in
+++ b/include/jemalloc/jemalloc_protos.h.in
@@ -1,3 +1,6 @@
+#include "jemalloc/jemalloc_defs.h"
+#include "jemalloc/jemalloc_macros.h"
+
 /*
  * The @je_@ prefix on the following public symbol declarations is an artifact
  * of namespace management, and should be omitted in application code unless
diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in
index 600d993c..f9c506da 100644
--- a/test/include/test/jemalloc_test.h.in
+++ b/test/include/test/jemalloc_test.h.in
@@ -58,14 +58,7 @@ extern "C" {
 #  include "jemalloc/jemalloc@install_suffix@.h"
 #  include "jemalloc/internal/jemalloc_internal_defs.h"
 #  include "jemalloc/internal/jemalloc_internal_macros.h"
-
-static const bool config_debug =
-#ifdef JEMALLOC_DEBUG
-    true
-#else
-    false
-#endif
-    ;
+#  include "jemalloc/internal/jemalloc_preamble.h"
 
 #  define JEMALLOC_N(n) @private_namespace@##n
 #  include "jemalloc/internal/private_namespace.h"
diff --git a/test/integration/MALLOCX_ARENA.c b/test/integration/MALLOCX_ARENA.c
index 7e61df08..440ad9ef 100644
--- a/test/integration/MALLOCX_ARENA.c
+++ b/test/integration/MALLOCX_ARENA.c
@@ -2,14 +2,6 @@
 
 #define NTHREADS 10
 
-static bool have_dss =
-#ifdef JEMALLOC_DSS
-    true
-#else
-    false
-#endif
-    ;
-
 void *
 thd_start(void *arg) {
 	unsigned thread_ind = (unsigned)(uintptr_t)arg;
diff --git a/test/integration/allocated.c b/test/integration/allocated.c
index 0c64272c..967e0108 100644
--- a/test/integration/allocated.c
+++ b/test/integration/allocated.c
@@ -1,13 +1,5 @@
 #include "test/jemalloc_test.h"
 
-static const bool config_stats =
-#ifdef JEMALLOC_STATS
-    true
-#else
-    false
-#endif
-    ;
-
 void *
 thd_start(void *arg) {
 	int err;

From cdb2c0e02fc303fd56aa525ef63eb71136e62b2d Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Fri, 14 Jul 2023 13:14:06 -0700
Subject: [PATCH 2317/2608] Implement C23's `free_sized` and
 `free_aligned_sized`

[N2699 - Sized Memory Deallocation](https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2699.htm)
introduced two new functions which were incorporated into the C23
standard, `free_sized` and `free_aligned_sized`. Both already have
analogues in Jemalloc, all we are doing here is adding the appropriate
wrappers.
---
 configure.ac                                  | 12 +++++-
 doc/jemalloc.xml.in                           | 42 +++++++++++++++++++
 .../internal/jemalloc_internal_defs.h.in      |  2 +
 include/jemalloc/jemalloc_protos.h.in         |  3 ++
 src/jemalloc.c                                | 17 ++++++++
 5 files changed, 75 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 687b221c..f820d14a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1105,7 +1105,7 @@ AC_ARG_WITH([export],
 fi]
 )
 
-public_syms="aligned_alloc calloc dallocx free mallctl mallctlbymib mallctlnametomib malloc malloc_conf malloc_conf_2_conf_harder malloc_message malloc_stats_print malloc_usable_size mallocx smallocx_${jemalloc_version_gid} nallocx posix_memalign rallocx realloc sallocx sdallocx xallocx"
+public_syms="aligned_alloc calloc dallocx free free_sized free_aligned_sized mallctl mallctlbymib mallctlnametomib malloc malloc_conf malloc_conf_2_conf_harder malloc_message malloc_stats_print malloc_usable_size mallocx smallocx_${jemalloc_version_gid} nallocx posix_memalign rallocx realloc sallocx sdallocx xallocx"
 dnl Check for additional platform-specific public API functions.
 AC_CHECK_FUNC([memalign],
 	      [AC_DEFINE([JEMALLOC_OVERRIDE_MEMALIGN], [ ], [ ])
@@ -1129,6 +1129,16 @@ if test "x${JEMALLOC_PREFIX}" = "x" ; then
   AC_CHECK_FUNC([__libc_free],
 		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_FREE], [ ], [ ])
 		 wrap_syms="${wrap_syms} __libc_free"])
+  dnl __libc_free_sized and __libc_free_aligned_sized are here speculatively
+  dnl under the assumption that glibc will eventually define symbols with these
+  dnl names. In the event glibc chooses different names for these symbols,
+  dnl these will need to be amended to match.
+  AC_CHECK_FUNC([__libc_free_sized],
+		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_FREE_SIZED], [ ], [ ])
+		 wrap_syms="${wrap_syms} __libc_free_sized"])
+  AC_CHECK_FUNC([__libc_free_aligned_sized],
+		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_FREE_ALIGNED_SIZED], [ ], [ ])
+		 wrap_syms="${wrap_syms} __libc_free_aligned_sized"])
   AC_CHECK_FUNC([__libc_malloc],
 		[AC_DEFINE([JEMALLOC_OVERRIDE___LIBC_MALLOC], [ ], [ ])
 		 wrap_syms="${wrap_syms} __libc_malloc"])
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 98f86f95..bdebd433 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -33,6 +33,8 @@
     <refname>aligned_alloc</refname>
     <refname>realloc</refname>
     <refname>free</refname>
+    <refname>free_sized</refname>
+    <refname>free_aligned_sized</refname>
     <refname>mallocx</refname>
     <refname>rallocx</refname>
     <refname>xallocx</refname>
@@ -89,6 +91,17 @@
           <funcdef>void <function>free</function></funcdef>
           <paramdef>void *<parameter>ptr</parameter></paramdef>
         </funcprototype>
+        <funcprototype>
+          <funcdef>void <function>free_sized</function></funcdef>
+          <paramdef>void *<parameter>ptr</parameter></paramdef>
+          <paramdef>size_t <parameter>size</parameter></paramdef>
+        </funcprototype>
+        <funcprototype>
+          <funcdef>void <function>free_aligned_sized</function></funcdef>
+          <paramdef>void *<parameter>ptr</parameter></paramdef>
+          <paramdef>size_t <parameter>alignment</parameter></paramdef>
+          <paramdef>size_t <parameter>size</parameter></paramdef>
+        </funcprototype>
       </refsect2>
       <refsect2>
         <title>Non-standard API</title>
@@ -227,6 +240,17 @@
       allocated memory referenced by <parameter>ptr</parameter> to be made
       available for future allocations.  If <parameter>ptr</parameter> is
       <constant>NULL</constant>, no action occurs.</para>
+
+      <para>The <function>free_sized()</function> function is an extension of
+      <function>free()</function> with a <parameter>size</parameter> parameter
+      to allow the caller to pass in the allocation size as an optimization.
+      </para>
+
+      <para>The <function>free_aligned_sized()</function> function accepts a
+      <parameter>ptr</parameter> which was allocated with a requested
+      <parameter>size</parameter> and <parameter>alignment</parameter>, causing
+      the allocated memory referenced by <parameter>ptr</parameter> to be made
+      available for future allocations.</para>
     </refsect2>
     <refsect2>
       <title>Non-standard API</title>
@@ -451,6 +475,24 @@ for (i = 0; i < nbins; i++) {
       depended on, since such behavior is entirely implementation-dependent.
       </para>
     </refsect2>
+    <refsect2>
+      <title>Interactions Between the Standard and Non-standard APIs</title>
+      <para>Generally speaking it is permissible to pass pointers obtained from
+      the standard API to the non-standard API and vice versa (e.g. calling
+      <function>free()</function> with a pointer returned by a call to
+      <function>mallocx()</function>, calling <function>sdallocx()</function>
+      with a pointer returned by a call to <function>calloc()</function>).
+      There are however a few exceptions. In keeping with the C23 standard –
+      which forbids calling <function>free_sized()</function> on a pointer
+      returned by <function>aligned_alloc()</function>, mandating that either
+      <function>free_aligned_sized()</function> or <function>free()</function>
+      be used instead – using any combination of the standard and non-standard
+      APIs in an equivalent fashion (i.e. taking a pointer which was allocated
+      with an explicitly requested alignment and attempting to free it via an
+      API that accepts a size hint, without also providing the alignment hint)
+      is likewise forbidden.
+      </para>
+    </refsect2>
   </refsect1>
   <refsect1 id="tuning">
     <title>TUNING</title>
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index cce638d3..7498bc48 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -14,6 +14,8 @@
  */
 #undef JEMALLOC_OVERRIDE___LIBC_CALLOC
 #undef JEMALLOC_OVERRIDE___LIBC_FREE
+#undef JEMALLOC_OVERRIDE___LIBC_FREE_SIZED
+#undef JEMALLOC_OVERRIDE___LIBC_FREE_ALIGNED_SIZED
 #undef JEMALLOC_OVERRIDE___LIBC_MALLOC
 #undef JEMALLOC_OVERRIDE___LIBC_MEMALIGN
 #undef JEMALLOC_OVERRIDE___LIBC_REALLOC
diff --git a/include/jemalloc/jemalloc_protos.h.in b/include/jemalloc/jemalloc_protos.h.in
index aff2d88f..170493dd 100644
--- a/include/jemalloc/jemalloc_protos.h.in
+++ b/include/jemalloc/jemalloc_protos.h.in
@@ -28,6 +28,9 @@ JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
     JEMALLOC_CXX_THROW JEMALLOC_ALLOC_SIZE(2);
 JEMALLOC_EXPORT void JEMALLOC_SYS_NOTHROW	@je_@free(void *ptr)
     JEMALLOC_CXX_THROW;
+JEMALLOC_EXPORT void JEMALLOC_NOTHROW	@je_@free_sized(void *ptr, size_t size);
+JEMALLOC_EXPORT void JEMALLOC_NOTHROW	@je_@free_aligned_sized(
+    void *ptr, size_t alignment, size_t size);
 
 JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
     void JEMALLOC_NOTHROW	*@je_@mallocx(size_t size, int flags)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 8de30279..3961683a 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2990,6 +2990,16 @@ je_free(void *ptr) {
 	LOG("core.free.exit", "");
 }
 
+JEMALLOC_EXPORT void JEMALLOC_NOTHROW
+je_free_sized(void *ptr, size_t size) {
+	return je_sdallocx_noflags(ptr, size);
+}
+
+JEMALLOC_EXPORT void JEMALLOC_NOTHROW
+je_free_aligned_sized(void *ptr, size_t alignment, size_t size) {
+	return je_sdallocx(ptr, size, /* flags */ MALLOCX_ALIGN(alignment));
+}
+
 /*
  * End malloc(3)-compatible functions.
  */
@@ -3153,6 +3163,13 @@ void *__libc_calloc(size_t n, size_t size) PREALIAS(je_calloc);
 #    ifdef JEMALLOC_OVERRIDE___LIBC_FREE
 void __libc_free(void* ptr) PREALIAS(je_free);
 #    endif
+#    ifdef JEMALLOC_OVERRIDE___LIBC_FREE_SIZED
+void __libc_free_sized(void* ptr, size_t size) PREALIAS(je_free_sized);
+#    endif
+#    ifdef JEMALLOC_OVERRIDE___LIBC_FREE_ALIGNED_SIZED
+void __libc_free_aligned_sized(
+    void* ptr, size_t alignment, size_t size) PREALIAS(je_free_aligned_sized);
+#    endif
 #    ifdef JEMALLOC_OVERRIDE___LIBC_MALLOC
 void *__libc_malloc(size_t size) PREALIAS(je_malloc);
 #    endif

From c49c17f128cc757c6bd4d026af181f01e28f3b41 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Thu, 20 Jul 2023 11:30:59 -0700
Subject: [PATCH 2318/2608] Suppress verbose frame address warnings

These warnings are not useful, and make the output of some CI jobs
enormous and difficult to read, so let's suppress them.
---
 include/jemalloc/internal/jemalloc_internal_macros.h | 4 ++++
 src/prof_sys.c                                       | 3 +++
 2 files changed, 7 insertions(+)

diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h
index e97b5f90..a08b7e7a 100644
--- a/include/jemalloc/internal/jemalloc_internal_macros.h
+++ b/include/jemalloc/internal/jemalloc_internal_macros.h
@@ -50,6 +50,7 @@
 #  define JEMALLOC_DIAGNOSTIC_POP __pragma(warning(pop))
 #  define JEMALLOC_DIAGNOSTIC_IGNORE(W) __pragma(warning(disable:W))
 #  define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
+#  define JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS
 #  define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
 #  define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
 #  define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
@@ -79,6 +80,8 @@
 #    define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
 #  endif
 
+#  define JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS  \
+     JEMALLOC_DIAGNOSTIC_IGNORE("-Wframe-address")
 #  define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS  \
      JEMALLOC_DIAGNOSTIC_IGNORE("-Wtype-limits")
 #  define JEMALLOC_DIAGNOSTIC_IGNORE_UNUSED_PARAMETER \
@@ -97,6 +100,7 @@
 #  define JEMALLOC_DIAGNOSTIC_POP
 #  define JEMALLOC_DIAGNOSTIC_IGNORE(W)
 #  define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
+#  define JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS
 #  define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
 #  define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
 #  define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
diff --git a/src/prof_sys.c b/src/prof_sys.c
index 1f8ecb62..dbb4c80a 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -100,6 +100,8 @@ prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 	_Unwind_Backtrace(prof_unwind_callback, &data);
 }
 #elif (defined(JEMALLOC_PROF_GCC))
+JEMALLOC_DIAGNOSTIC_PUSH
+JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS
 static void
 prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 /* The input arg must be a constant for __builtin_return_address. */
@@ -405,6 +407,7 @@ prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 	BT_FRAME(254)
 	BT_FRAME(255)
 #undef BT_FRAME
+JEMALLOC_DIAGNOSTIC_POP
 }
 #else
 static void

From 7e54dd1ddb0953093fc640cca9a45897b33cf84d Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Fri, 21 Jul 2023 18:13:58 -0700
Subject: [PATCH 2319/2608] Define `PROF_TCTX_SENTINEL` instead of using magic
 numbers

This makes the code more readable on its own, and also sets the stage
for more cleanly handling the pointer provenance lints in a following
commit.
---
 include/jemalloc/internal/arena_inlines_b.h |  6 ++---
 include/jemalloc/internal/prof_inlines.h    | 25 ++++++++++++---------
 include/jemalloc/internal/prof_types.h      |  2 ++
 src/jemalloc.c                              |  8 +++----
 src/large.c                                 |  4 ++--
 src/prof.c                                  | 14 +++++++++---
 test/unit/prof_tctx.c                       |  4 ++--
 7 files changed, 39 insertions(+), 24 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index c7d08227..44a73373 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -104,15 +104,15 @@ arena_prof_info_get(tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx,
 		if (reset_recent &&
 		    large_dalloc_safety_checks(edata, ptr,
 		    edata_szind_get(edata))) {
-			prof_info->alloc_tctx = (prof_tctx_t *)(uintptr_t)1U;
+			prof_info->alloc_tctx = PROF_TCTX_SENTINEL;
 			return;
 		}
 		large_prof_info_get(tsd, edata, prof_info, reset_recent);
 	} else {
-		prof_info->alloc_tctx = (prof_tctx_t *)(uintptr_t)1U;
+		prof_info->alloc_tctx = PROF_TCTX_SENTINEL;
 		/*
 		 * No need to set other fields in prof_info; they will never be
-		 * accessed if (uintptr_t)alloc_tctx == (uintptr_t)1U.
+		 * accessed if alloc_tctx == PROF_TCTX_SENTINEL.
 		 */
 	}
 }
diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h
index 6cb73735..75300ee4 100644
--- a/include/jemalloc/internal/prof_inlines.h
+++ b/include/jemalloc/internal/prof_inlines.h
@@ -106,6 +106,11 @@ prof_info_get_and_reset_recent(tsd_t *tsd, const void *ptr,
 	arena_prof_info_get(tsd, ptr, alloc_ctx, prof_info, true);
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+prof_tctx_is_valid(const prof_tctx_t *tctx) {
+	return tctx != NULL && tctx != PROF_TCTX_SENTINEL;
+}
+
 JEMALLOC_ALWAYS_INLINE void
 prof_tctx_reset(tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx) {
 	cassert(config_prof);
@@ -126,7 +131,7 @@ JEMALLOC_ALWAYS_INLINE void
 prof_info_set(tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx, size_t size) {
 	cassert(config_prof);
 	assert(edata != NULL);
-	assert((uintptr_t)tctx > (uintptr_t)1U);
+	assert(prof_tctx_is_valid(tctx));
 
 	arena_prof_info_set(tsd, edata, tctx, size);
 }
@@ -161,7 +166,7 @@ prof_alloc_prep(tsd_t *tsd, bool prof_active, bool sample_event) {
 
 	if (!prof_active ||
 	    likely(prof_sample_should_skip(tsd, sample_event))) {
-		ret = (prof_tctx_t *)(uintptr_t)1U;
+		ret = PROF_TCTX_SENTINEL;
 	} else {
 		ret = prof_tctx_create(tsd);
 	}
@@ -176,7 +181,7 @@ prof_malloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
 	assert(ptr != NULL);
 	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
 
-	if (unlikely((uintptr_t)tctx > (uintptr_t)1U)) {
+	if (unlikely(prof_tctx_is_valid(tctx))) {
 		prof_malloc_sample_object(tsd, ptr, size, usize, tctx);
 	} else {
 		prof_tctx_reset(tsd, ptr, alloc_ctx);
@@ -190,7 +195,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
 	bool sampled, old_sampled, moved;
 
 	cassert(config_prof);
-	assert(ptr != NULL || (uintptr_t)tctx <= (uintptr_t)1U);
+	assert(ptr != NULL || !prof_tctx_is_valid(tctx));
 
 	if (prof_active && ptr != NULL) {
 		assert(usize == isalloc(tsd_tsdn(tsd), ptr));
@@ -203,12 +208,12 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
 			 * sample threshold.
 			 */
 			prof_alloc_rollback(tsd, tctx);
-			tctx = (prof_tctx_t *)(uintptr_t)1U;
+			tctx = PROF_TCTX_SENTINEL;
 		}
 	}
 
-	sampled = ((uintptr_t)tctx > (uintptr_t)1U);
-	old_sampled = ((uintptr_t)old_prof_info->alloc_tctx > (uintptr_t)1U);
+	sampled = prof_tctx_is_valid(tctx);
+	old_sampled = prof_tctx_is_valid(old_prof_info->alloc_tctx);
 	moved = (ptr != old_ptr);
 
 	if (unlikely(sampled)) {
@@ -226,7 +231,7 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
 	} else {
 		prof_info_t prof_info;
 		prof_info_get(tsd, ptr, NULL, &prof_info);
-		assert((uintptr_t)prof_info.alloc_tctx == (uintptr_t)1U);
+		assert(prof_info.alloc_tctx == PROF_TCTX_SENTINEL);
 	}
 
 	/*
@@ -258,7 +263,7 @@ JEMALLOC_ALWAYS_INLINE bool
 prof_sampled(tsd_t *tsd, const void *ptr) {
 	prof_info_t prof_info;
 	prof_info_get(tsd, ptr, NULL, &prof_info);
-	bool sampled = (uintptr_t)prof_info.alloc_tctx > (uintptr_t)1U;
+	bool sampled = prof_tctx_is_valid(prof_info.alloc_tctx);
 	if (sampled) {
 		assert(prof_sample_aligned(ptr));
 	}
@@ -274,7 +279,7 @@ prof_free(tsd_t *tsd, const void *ptr, size_t usize,
 	cassert(config_prof);
 	assert(usize == isalloc(tsd_tsdn(tsd), ptr));
 
-	if (unlikely((uintptr_t)prof_info.alloc_tctx > (uintptr_t)1U)) {
+	if (unlikely(prof_tctx_is_valid(prof_info.alloc_tctx))) {
 		assert(prof_sample_aligned(ptr));
 		prof_free_sampled_object(tsd, ptr, usize, &prof_info);
 	}
diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h
index 046ea204..921b16fe 100644
--- a/include/jemalloc/internal/prof_types.h
+++ b/include/jemalloc/internal/prof_types.h
@@ -88,4 +88,6 @@ typedef struct prof_recent_s prof_recent_t;
 #define PROF_SAMPLE_ALIGNMENT PAGE
 #define PROF_SAMPLE_ALIGNMENT_MASK PAGE_MASK
 
+#define PROF_TCTX_SENTINEL ((prof_tctx_t *)((uintptr_t)1U))
+
 #endif /* JEMALLOC_INTERNAL_PROF_TYPES_H */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 3961683a..a36b4974 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2529,12 +2529,12 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		    sample_event);
 
 		emap_alloc_ctx_t alloc_ctx;
-		if (likely((uintptr_t)tctx == (uintptr_t)1U)) {
+		if (likely(tctx == PROF_TCTX_SENTINEL)) {
 			alloc_ctx.slab = sz_can_use_slab(usize);
 			allocation = imalloc_no_sample(
 			    sopts, dopts, tsd, usize, usize, ind,
 			    alloc_ctx.slab);
-		} else if ((uintptr_t)tctx > (uintptr_t)1U) {
+		} else if (tctx != NULL) {
 			allocation = imalloc_sample(
 			    sopts, dopts, tsd, usize, ind);
 			alloc_ctx.slab = false;
@@ -3366,7 +3366,7 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 	bool sample_event = te_prof_sample_event_lookahead(tsd, usize);
 	prof_tctx_t *tctx = prof_alloc_prep(tsd, prof_active, sample_event);
 	void *p;
-	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
+	if (unlikely(tctx != PROF_TCTX_SENTINEL)) {
 		p = irallocx_prof_sample(tsd_tsdn(tsd), old_ptr, old_usize,
 		    usize, alignment, zero, tcache, arena, tctx, hook_args);
 	} else {
@@ -3612,7 +3612,7 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 	prof_tctx_t *tctx = prof_alloc_prep(tsd, prof_active, sample_event);
 
 	size_t usize;
-	if (unlikely((uintptr_t)tctx != (uintptr_t)1U)) {
+	if (unlikely(tctx != PROF_TCTX_SENTINEL)) {
 		usize = ixallocx_prof_sample(tsd_tsdn(tsd), ptr, old_usize,
 		    size, extra, alignment, zero, tctx);
 	} else {
diff --git a/src/large.c b/src/large.c
index 5fc4bf58..10fa652e 100644
--- a/src/large.c
+++ b/src/large.c
@@ -287,7 +287,7 @@ large_prof_info_get(tsd_t *tsd, edata_t *edata, prof_info_t *prof_info,
 	prof_tctx_t *alloc_tctx = edata_prof_tctx_get(edata);
 	prof_info->alloc_tctx = alloc_tctx;
 
-	if ((uintptr_t)alloc_tctx > (uintptr_t)1U) {
+	if (prof_tctx_is_valid(alloc_tctx)) {
 		nstime_copy(&prof_info->alloc_time,
 		    edata_prof_alloc_time_get(edata));
 		prof_info->alloc_size = edata_prof_alloc_size_get(edata);
@@ -308,7 +308,7 @@ large_prof_tctx_set(edata_t *edata, prof_tctx_t *tctx) {
 
 void
 large_prof_tctx_reset(edata_t *edata) {
-	large_prof_tctx_set(edata, (prof_tctx_t *)(uintptr_t)1U);
+	large_prof_tctx_set(edata, PROF_TCTX_SENTINEL);
 }
 
 void
diff --git a/src/prof.c b/src/prof.c
index 9986a329..52869375 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -91,11 +91,19 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx) {
 	cassert(config_prof);
 
 	if (tsd_reentrancy_level_get(tsd) > 0) {
-		assert((uintptr_t)tctx == (uintptr_t)1U);
+		assert(tctx == PROF_TCTX_SENTINEL);
 		return;
 	}
 
-	if ((uintptr_t)tctx > (uintptr_t)1U) {
+	if (prof_tctx_is_valid(tctx)) {
+		/*
+		 * This `assert` really shouldn't be necessary. It's here
+		 * because there's a bug in the clang static analyzer; it
+		 * somehow does not realize that by `prof_tctx_is_valid(tctx)`
+		 * being true that we've already ensured that `tctx` is not
+		 * `NULL`.
+		 */
+		assert(tctx != NULL);
 		malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
 		tctx->prepared = false;
 		prof_tctx_try_destroy(tsd, tctx);
@@ -169,7 +177,7 @@ prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize,
 
 	assert(prof_info != NULL);
 	prof_tctx_t *tctx = prof_info->alloc_tctx;
-	assert((uintptr_t)tctx > (uintptr_t)1U);
+	assert(prof_tctx_is_valid(tctx));
 
 	szind_t szind = sz_size2index(usize);
 
diff --git a/test/unit/prof_tctx.c b/test/unit/prof_tctx.c
index e0efdc36..d19dd395 100644
--- a/test/unit/prof_tctx.c
+++ b/test/unit/prof_tctx.c
@@ -18,7 +18,7 @@ TEST_BEGIN(test_prof_realloc) {
 	p = mallocx(1024, flags);
 	expect_ptr_not_null(p, "Unexpected mallocx() failure");
 	prof_info_get(tsd, p, NULL, &prof_info_p);
-	expect_ptr_ne(prof_info_p.alloc_tctx, (prof_tctx_t *)(uintptr_t)1U,
+	expect_ptr_ne(prof_info_p.alloc_tctx, PROF_TCTX_SENTINEL,
 	    "Expected valid tctx");
 	prof_cnt_all(&cnt_1);
 	expect_u64_eq(cnt_0.curobjs + 1, cnt_1.curobjs,
@@ -28,7 +28,7 @@ TEST_BEGIN(test_prof_realloc) {
 	expect_ptr_ne(p, q, "Expected move");
 	expect_ptr_not_null(p, "Unexpected rmallocx() failure");
 	prof_info_get(tsd, q, NULL, &prof_info_q);
-	expect_ptr_ne(prof_info_q.alloc_tctx, (prof_tctx_t *)(uintptr_t)1U,
+	expect_ptr_ne(prof_info_q.alloc_tctx, PROF_TCTX_SENTINEL,
 	    "Expected valid tctx");
 	prof_cnt_all(&cnt_2);
 	expect_u64_eq(cnt_1.curobjs, cnt_2.curobjs,

From 14311536959457d10e9307a580afeb0af1a8838b Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Mon, 24 Jul 2023 10:36:32 -0700
Subject: [PATCH 2320/2608] Define `SBRK_INVALID` instead of using a magic
 number

---
 src/extent_dss.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/extent_dss.c b/src/extent_dss.c
index 0b846296..f8bd8f60 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -8,6 +8,8 @@
 /******************************************************************************/
 /* Data. */
 
+#define SBRK_INVALID ((void *)-1)
+
 const char	*opt_dss = DSS_DEFAULT;
 
 const char	*const dss_prec_names[] = {
@@ -94,7 +96,7 @@ extent_dss_max_update(void *new_addr) {
 	 * up to date.
 	 */
 	void *max_cur = extent_dss_sbrk(0);
-	if (max_cur == (void *)-1) {
+	if (max_cur == SBRK_INVALID) {
 		return NULL;
 	}
 	atomic_store_p(&dss_max, max_cur, ATOMIC_RELEASE);
@@ -220,7 +222,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			 * Failure, whether due to OOM or a race with a raw
 			 * sbrk() call from outside the allocator.
 			 */
-			if (dss_prev == (void *)-1) {
+			if (dss_prev == SBRK_INVALID) {
 				/* OOM. */
 				atomic_store_b(&dss_exhausted, true,
 				    ATOMIC_RELEASE);
@@ -270,7 +272,7 @@ extent_dss_boot(void) {
 
 	dss_base = extent_dss_sbrk(0);
 	atomic_store_b(&dss_extending, false, ATOMIC_RELAXED);
-	atomic_store_b(&dss_exhausted, dss_base == (void *)-1, ATOMIC_RELAXED);
+	atomic_store_b(&dss_exhausted, dss_base == SBRK_INVALID, ATOMIC_RELAXED);
 	atomic_store_p(&dss_max, dss_base, ATOMIC_RELAXED);
 }
 

From 4827bb17bdd5a25921c5b091ffadf3039d297b17 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Mon, 24 Jul 2023 10:38:42 -0700
Subject: [PATCH 2321/2608] Remove vestigial `TCACHE_STATE_*` macros

---
 include/jemalloc/internal/tcache_types.h | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index cea86fb7..50f1fbcd 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -8,16 +8,6 @@ typedef struct tcache_slow_s tcache_slow_t;
 typedef struct tcache_s tcache_t;
 typedef struct tcaches_s tcaches_t;
 
-/*
- * tcache pointers close to NULL are used to encode state information that is
- * used for two purposes: preventing thread caching on a per thread basis and
- * cleaning up during thread shutdown.
- */
-#define TCACHE_STATE_DISABLED		((tcache_t *)(uintptr_t)1)
-#define TCACHE_STATE_REINCARNATED	((tcache_t *)(uintptr_t)2)
-#define TCACHE_STATE_PURGATORY		((tcache_t *)(uintptr_t)3)
-#define TCACHE_STATE_MAX		TCACHE_STATE_PURGATORY
-
 /* Used in TSD static initializer only. Real init in tsd_tcache_data_init(). */
 #define TCACHE_ZERO_INITIALIZER {0}
 #define TCACHE_SLOW_ZERO_INITIALIZER {0}

From 3e82f357bb218194df5ba1acee39cd6a7d6fe6f6 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Mon, 24 Jul 2023 10:33:36 -0700
Subject: [PATCH 2322/2608] Fix all optimization-inhibiting integer-to-pointer
 casts

Following from PR #2481, we replace all integer-to-pointer casts [which
hide pointer provenance information (and thus inhibit
optimizations)](https://clang.llvm.org/extra/clang-tidy/checks/performance/no-int-to-ptr.html)
with equivalent operations that preserve this information. I have
enabled the corresponding clang-tidy check in our static analysis CI so
that we do not get bitten by this again in the future.
---
 include/jemalloc/internal/arena_inlines_b.h   |  4 +-
 include/jemalloc/internal/cache_bin.h         |  4 +-
 include/jemalloc/internal/edata.h             |  6 +--
 .../internal/jemalloc_internal_decls.h        | 17 +++++++++
 .../internal/jemalloc_internal_types.h        | 16 +++++++-
 include/jemalloc/internal/pages.h             |  5 ++-
 include/jemalloc/internal/prof_types.h        |  1 +
 include/jemalloc/internal/rtree.h             |  5 +++
 include/jemalloc/internal/safety_check.h      |  2 +-
 include/jemalloc/internal/san.h               |  4 +-
 include/jemalloc/internal/tcache_types.h      |  1 +
 include/jemalloc/internal/util.h              |  4 +-
 scripts/run_static_analysis.sh                |  3 +-
 src/arena.c                                   |  3 +-
 src/background_thread.c                       |  2 +
 src/base.c                                    |  6 +--
 src/cache_bin.c                               |  8 ++--
 src/ehooks.c                                  |  8 ++--
 src/extent.c                                  |  4 +-
 src/extent_dss.c                              | 11 +++---
 src/hpdata.c                                  |  4 +-
 src/jemalloc.c                                |  4 +-
 src/large.c                                   |  6 +--
 src/pages.c                                   |  4 +-
 src/prof_data.c                               |  8 +++-
 src/san.c                                     | 38 +++++++++----------
 src/tcache.c                                  |  4 +-
 27 files changed, 116 insertions(+), 66 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 44a73373..b1cd84b4 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -513,7 +513,7 @@ arena_cache_oblivious_randomize(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 		}
 		uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE -
 		    lg_range);
-		edata->e_addr = (void *)((uintptr_t)edata->e_addr +
+		edata->e_addr = (void *)((byte_t *)edata->e_addr +
 		    random_offset);
 		assert(ALIGNMENT_ADDR2BASE(edata->e_addr, alignment) ==
 		    edata->e_addr);
@@ -599,7 +599,7 @@ arena_dalloc_bin_locked_finish(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 
 static inline bin_t *
 arena_get_bin(arena_t *arena, szind_t binind, unsigned binshard) {
-	bin_t *shard0 = (bin_t *)((uintptr_t)arena + arena_bin_offsets[binind]);
+	bin_t *shard0 = (bin_t *)((byte_t *)arena + arena_bin_offsets[binind]);
 	return shard0 + binshard;
 }
 
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 218e368e..2e95c33c 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -247,7 +247,7 @@ static inline void **
 cache_bin_empty_position_get(cache_bin_t *bin) {
 	cache_bin_sz_t diff = cache_bin_diff(bin,
 	    (uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty);
-	uintptr_t empty_bits = (uintptr_t)bin->stack_head + diff;
+	byte_t *empty_bits = (byte_t *)bin->stack_head + diff;
 	void **ret = (void **)empty_bits;
 
 	assert(ret >= bin->stack_head);
@@ -479,7 +479,7 @@ cache_bin_stash(cache_bin_t *bin, void *ptr) {
 	uint16_t low_bits_head = (uint16_t)(uintptr_t)bin->stack_head;
 	/* Wraparound handled as well. */
 	uint16_t diff = cache_bin_diff(bin, bin->low_bits_full, low_bits_head);
-	*(void **)((uintptr_t)bin->stack_head - diff) = ptr;
+	*(void **)((byte_t *)bin->stack_head - diff) = ptr;
 
 	assert(!cache_bin_full(bin));
 	bin->low_bits_full += sizeof(void *);
diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index 5fe4e14d..baf5187f 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -377,18 +377,18 @@ edata_ps_get(const edata_t *edata) {
 
 static inline void *
 edata_before_get(const edata_t *edata) {
-	return (void *)((uintptr_t)edata_base_get(edata) - PAGE);
+	return (void *)((byte_t *)edata_base_get(edata) - PAGE);
 }
 
 static inline void *
 edata_last_get(const edata_t *edata) {
-	return (void *)((uintptr_t)edata_base_get(edata) +
+	return (void *)((byte_t *)edata_base_get(edata) +
 	    edata_size_get(edata) - PAGE);
 }
 
 static inline void *
 edata_past_get(const edata_t *edata) {
-	return (void *)((uintptr_t)edata_base_get(edata) +
+	return (void *)((byte_t *)edata_base_get(edata) +
 	    edata_size_get(edata));
 }
 
diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
index 77ba1c9a..0bca9133 100644
--- a/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -105,4 +105,21 @@ isblank(int c) {
 #  undef small
 #endif
 
+/*
+ * Oftentimes we'd like to perform some kind of arithmetic to obtain
+ * a pointer from another pointer but with some offset or mask applied.
+ * Naively you would accomplish this by casting the source pointer to
+ * `uintptr_t`, performing all of the relevant arithmetic, and then casting
+ * the result to the desired pointer type. However, this has the unfortunate
+ * side-effect of concealing pointer provenance, hiding useful information for
+ * optimization from the compiler (see here for details:
+ * https://clang.llvm.org/extra/clang-tidy/checks/performance/no-int-to-ptr.html
+ * )
+ * Instead what one should do is cast the source pointer to `char *` and perform
+ * the equivalent arithmetic (since `char` of course represents one byte). But
+ * because `char *` has the semantic meaning of "string", we define this typedef
+ * simply to make it clearer where we are performing such pointer arithmetic.
+ */
+typedef char byte_t;
+
 #endif /* JEMALLOC_INTERNAL_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_types.h b/include/jemalloc/internal/jemalloc_internal_types.h
index b1c48be9..4ab5a0cf 100644
--- a/include/jemalloc/internal/jemalloc_internal_types.h
+++ b/include/jemalloc/internal/jemalloc_internal_types.h
@@ -99,7 +99,8 @@ typedef enum malloc_init_e malloc_init_t;
 
 /* Return the nearest aligned address at or below a. */
 #define ALIGNMENT_ADDR2BASE(a, alignment)				\
-	((void *)((uintptr_t)(a) & ((~(alignment)) + 1)))
+	((void *)(((byte_t *)(a)) - (((uintptr_t)(a)) -			\
+	    ((uintptr_t)(a) & ((~(alignment)) + 1)))))
 
 /* Return the offset between a and the nearest aligned address at or below a. */
 #define ALIGNMENT_ADDR2OFFSET(a, alignment)				\
@@ -109,6 +110,19 @@ typedef enum malloc_init_e malloc_init_t;
 #define ALIGNMENT_CEILING(s, alignment)					\
 	(((s) + (alignment - 1)) & ((~(alignment)) + 1))
 
+/*
+ * Return the nearest aligned address at or above a.
+ *
+ * While at first glance this would appear to be merely a more complicated
+ * way to perform the same computation as `ALIGNMENT_CEILING`,
+ * this has the important additional property of not concealing pointer
+ * provenance from the compiler. See the block-comment on the
+ * definition of `byte_t` for more details.
+ */
+#define ALIGNMENT_ADDR2CEILING(a, alignment)				\
+	((void *)(((byte_t *)(a)) + (((((uintptr_t)(a)) +		\
+	    (alignment - 1)) & ((~(alignment)) + 1)) - ((uintptr_t)(a)))))
+
 /* Declare a variable-length array. */
 #if __STDC_VERSION__ < 199901L || defined(__STDC_NO_VLA__)
 #  ifdef _MSC_VER
diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index 0ecc2cd0..b4e9678e 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_PAGES_EXTERNS_H
 
 #include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_types.h"
 
 /* Actual operating system page size, detected during bootstrap, <= PAGE. */
 extern size_t	os_page;
@@ -14,7 +15,7 @@ extern size_t	os_page;
 #define PAGE_MASK	((size_t)(PAGE - 1))
 /* Return the page base address for the page containing address a. */
 #define PAGE_ADDR2BASE(a)						\
-	((void *)((uintptr_t)(a) & ~PAGE_MASK))
+	ALIGNMENT_ADDR2BASE(a, PAGE)
 /* Return the smallest pagesize multiple that is >= s. */
 #define PAGE_CEILING(s)							\
 	(((s) + PAGE_MASK) & ~PAGE_MASK)
@@ -41,7 +42,7 @@ extern size_t	os_page;
 
 /* Return the huge page base address for the huge page containing address a. */
 #define HUGEPAGE_ADDR2BASE(a)						\
-	((void *)((uintptr_t)(a) & ~HUGEPAGE_MASK))
+	ALIGNMENT_ADDR2BASE(a, HUGEPAGE)
 /* Return the smallest pagesize multiple that is >= s. */
 #define HUGEPAGE_CEILING(s)						\
 	(((s) + HUGEPAGE_MASK) & ~HUGEPAGE_MASK)
diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h
index 921b16fe..a27f7fb3 100644
--- a/include/jemalloc/internal/prof_types.h
+++ b/include/jemalloc/internal/prof_types.h
@@ -88,6 +88,7 @@ typedef struct prof_recent_s prof_recent_t;
 #define PROF_SAMPLE_ALIGNMENT PAGE
 #define PROF_SAMPLE_ALIGNMENT_MASK PAGE_MASK
 
+/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 #define PROF_TCTX_SENTINEL ((prof_tctx_t *)((uintptr_t)1U))
 
 #endif /* JEMALLOC_INTERNAL_PROF_TYPES_H */
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index f559c94f..f35368ae 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -226,9 +226,11 @@ rtree_leaf_elm_bits_decode(uintptr_t bits) {
 	uintptr_t high_bit_mask = ((uintptr_t)1 << LG_VADDR) - 1;
 	/* Mask off metadata. */
 	uintptr_t mask = high_bit_mask & low_bit_mask;
+	/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 	contents.edata = (edata_t *)(bits & mask);
 #    else
 	/* Restore sign-extended high bits, mask metadata bits. */
+	/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 	contents.edata = (edata_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB)
 	    >> RTREE_NHIB) & low_bit_mask);
 #    endif
@@ -270,6 +272,7 @@ JEMALLOC_ALWAYS_INLINE void
 rtree_contents_encode(rtree_contents_t contents, void **bits,
     unsigned *additional) {
 #ifdef RTREE_LEAF_COMPACT
+	/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 	*bits = (void *)rtree_leaf_elm_bits_encode(contents);
 	/* Suppress spurious warning from static analysis */
 	if (config_debug) {
@@ -320,8 +323,10 @@ rtree_leaf_elm_state_update(tsdn_t *tsdn, rtree_t *rtree,
 	    /* dependent */ true);
 	bits &= ~RTREE_LEAF_STATE_MASK;
 	bits |= state << RTREE_LEAF_STATE_SHIFT;
+	/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 	atomic_store_p(&elm1->le_bits, (void *)bits, ATOMIC_RELEASE);
 	if (elm2 != NULL) {
+		/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 		atomic_store_p(&elm2->le_bits, (void *)bits, ATOMIC_RELEASE);
 	}
 #else
diff --git a/include/jemalloc/internal/safety_check.h b/include/jemalloc/internal/safety_check.h
index ef778dae..194b7744 100644
--- a/include/jemalloc/internal/safety_check.h
+++ b/include/jemalloc/internal/safety_check.h
@@ -31,7 +31,7 @@ compute_redzone_end(const void *_ptr, size_t usize, size_t bumped_usize) {
 	const unsigned char *redzone_end = usize + REDZONE_SIZE < bumped_usize ?
 	    &ptr[usize + REDZONE_SIZE] : &ptr[bumped_usize];
 	const unsigned char *page_end = (const unsigned char *)
-	    ALIGNMENT_CEILING(((uintptr_t) (&ptr[usize])), os_page);
+	    ALIGNMENT_ADDR2CEILING(&ptr[usize], os_page);
 	return redzone_end < page_end ? redzone_end : page_end;
 }
 
diff --git a/include/jemalloc/internal/san.h b/include/jemalloc/internal/san.h
index 79723965..669f99dd 100644
--- a/include/jemalloc/internal/san.h
+++ b/include/jemalloc/internal/san.h
@@ -140,7 +140,7 @@ san_junk_ptr_locations(void *ptr, size_t usize, void **first, void **mid,
 
 	*first = ptr;
 
-	*mid = (void *)((uintptr_t)ptr + ((usize >> 1) & ~(ptr_sz - 1)));
+	*mid = (void *)((byte_t *)ptr + ((usize >> 1) & ~(ptr_sz - 1)));
 	assert(*first != *mid || usize == ptr_sz);
 	assert((uintptr_t)*first <= (uintptr_t)*mid);
 
@@ -151,7 +151,7 @@ san_junk_ptr_locations(void *ptr, size_t usize, void **first, void **mid,
 	 * default the tcache only goes up to the 32K size class, and is usually
 	 * tuned lower instead of higher, which makes it less of a concern.
 	 */
-	*last = (void *)((uintptr_t)ptr + usize - sizeof(uaf_detect_junk));
+	*last = (void *)((byte_t *)ptr + usize - sizeof(uaf_detect_junk));
 	assert(*first != *last || usize == ptr_sz);
 	assert(*mid != *last || usize <= ptr_sz * 2);
 	assert((uintptr_t)*mid <= (uintptr_t)*last);
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index 50f1fbcd..a781f5a6 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -16,6 +16,7 @@ typedef struct tcaches_s tcaches_t;
 #define TCACHE_ENABLED_ZERO_INITIALIZER false
 
 /* Used for explicit tcache only. Means flushed but not destroyed. */
+/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 #define TCACHES_ELM_NEED_REINIT ((tcache_t *)(uintptr_t)1)
 
 #define TCACHE_LG_MAXCLASS_LIMIT 23 /* tcache_maxclass = 8M */
diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index 939f3891..536c0970 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -110,14 +110,14 @@ util_prefetch_write(void *ptr) {
 JEMALLOC_ALWAYS_INLINE void
 util_prefetch_read_range(void *ptr, size_t sz) {
 	for (size_t i = 0; i < sz; i += CACHELINE) {
-		util_prefetch_read((void *)((uintptr_t)ptr + i));
+		util_prefetch_read((void *)((byte_t *)ptr + i));
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
 util_prefetch_write_range(void *ptr, size_t sz) {
 	for (size_t i = 0; i < sz; i += CACHELINE) {
-		util_prefetch_write((void *)((uintptr_t)ptr + i));
+		util_prefetch_write((void *)((byte_t *)ptr + i));
 	}
 }
 
diff --git a/scripts/run_static_analysis.sh b/scripts/run_static_analysis.sh
index 4994fe64..70c813d1 100755
--- a/scripts/run_static_analysis.sh
+++ b/scripts/run_static_analysis.sh
@@ -44,7 +44,8 @@ echo '-**/stdlib.h' > "$skipfile"
 CC_ANALYZERS_FROM_PATH=1 CodeChecker analyze compile_commands.json --jobs "$(nproc)" \
 	--ctu --compile-uniqueing strict --output static_analysis_raw_results \
 	--analyzers clangsa clang-tidy --skip "$skipfile" \
-	--enable readability-inconsistent-declaration-parameter-name
+	--enable readability-inconsistent-declaration-parameter-name \
+	--enable performance-no-int-to-ptr
 	# `--enable` is additive, the vast majority of the checks we want are
 	# enabled by default.
 
diff --git a/src/arena.c b/src/arena.c
index a8890e57..65eef864 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -236,7 +236,7 @@ arena_slab_reg_alloc(edata_t *slab, const bin_info_t *bin_info) {
 	assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info));
 
 	regind = bitmap_sfu(slab_data->bitmap, &bin_info->bitmap_info);
-	ret = (void *)((uintptr_t)edata_addr_get(slab) +
+	ret = (void *)((byte_t *)edata_addr_get(slab) +
 	    (uintptr_t)(bin_info->reg_size * regind));
 	edata_nfree_dec(slab);
 	return ret;
@@ -280,6 +280,7 @@ arena_slab_reg_alloc_batch(edata_t *slab, const bin_info_t *bin_info,
 		while (pop--) {
 			size_t bit = cfs_lu(&g);
 			size_t regind = shift + bit;
+			/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 			*(ptrs + i) = (void *)(base + regsize * regind);
 
 			i++;
diff --git a/src/background_thread.c b/src/background_thread.c
index 53b492bb..94d91a89 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -368,6 +368,7 @@ check_background_thread_creation(tsd_t *tsd,
 
 		pre_reentrancy(tsd, NULL);
 		int err = background_thread_create_signals_masked(&info->thread,
+			/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 		    NULL, background_thread_entry, (void *)(uintptr_t)i);
 		post_reentrancy(tsd);
 
@@ -540,6 +541,7 @@ background_thread_create_locked(tsd_t *tsd, unsigned arena_ind) {
 	 * background threads with the underlying pthread_create.
 	 */
 	int err = background_thread_create_signals_masked(&info->thread, NULL,
+		/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 	    background_thread_entry, (void *)thread_ind);
 	post_reentrancy(tsd);
 
diff --git a/src/base.c b/src/base.c
index 16f90495..8e4606d0 100644
--- a/src/base.c
+++ b/src/base.c
@@ -181,9 +181,9 @@ base_extent_bump_alloc_helper(edata_t *edata, size_t *gap_size, size_t size,
 
 	*gap_size = ALIGNMENT_CEILING((uintptr_t)edata_addr_get(edata),
 	    alignment) - (uintptr_t)edata_addr_get(edata);
-	ret = (void *)((uintptr_t)edata_addr_get(edata) + *gap_size);
+	ret = (void *)((byte_t *)edata_addr_get(edata) + *gap_size);
 	assert(edata_bsize_get(edata) >= *gap_size + size);
-	edata_binit(edata, (void *)((uintptr_t)edata_addr_get(edata) +
+	edata_binit(edata, (void *)((byte_t *)edata_addr_get(edata) +
 	    *gap_size + size), edata_bsize_get(edata) - *gap_size - size,
 	    edata_sn_get(edata));
 	return ret;
@@ -291,7 +291,7 @@ base_block_alloc(tsdn_t *tsdn, base_t *base, ehooks_t *ehooks, unsigned ind,
 	block->next = NULL;
 	assert(block_size >= header_size);
 	base_edata_init(extent_sn_next, &block->edata,
-	    (void *)((uintptr_t)block + header_size), block_size - header_size);
+	    (void *)((byte_t *)block + header_size), block_size - header_size);
 	return block;
 }
 
diff --git a/src/cache_bin.c b/src/cache_bin.c
index a4c22bd7..362605a8 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -50,7 +50,7 @@ cache_bin_preincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
 		assert(((uintptr_t)alloc & (computed_alignment - 1)) == 0);
 	}
 
-	*(uintptr_t *)((uintptr_t)alloc + *cur_offset) =
+	*(uintptr_t *)((byte_t *)alloc + *cur_offset) =
 	    cache_bin_preceding_junk;
 	*cur_offset += sizeof(void *);
 }
@@ -58,7 +58,7 @@ cache_bin_preincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
 void
 cache_bin_postincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
     size_t *cur_offset) {
-	*(uintptr_t *)((uintptr_t)alloc + *cur_offset) =
+	*(uintptr_t *)((byte_t *)alloc + *cur_offset) =
 	    cache_bin_trailing_junk;
 	*cur_offset += sizeof(void *);
 }
@@ -71,12 +71,12 @@ cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
 	 * will access the slots toward higher addresses (for the benefit of
 	 * adjacent prefetch).
 	 */
-	void *stack_cur = (void *)((uintptr_t)alloc + *cur_offset);
+	void *stack_cur = (void *)((byte_t *)alloc + *cur_offset);
 	void *full_position = stack_cur;
 	uint16_t bin_stack_size = info->ncached_max * sizeof(void *);
 
 	*cur_offset += bin_stack_size;
-	void *empty_position = (void *)((uintptr_t)alloc + *cur_offset);
+	void *empty_position = (void *)((byte_t *)alloc + *cur_offset);
 
 	/* Init to the empty position. */
 	bin->stack_head = (void **)empty_position;
diff --git a/src/ehooks.c b/src/ehooks.c
index da759215..fc2355e6 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -100,7 +100,7 @@ ehooks_default_destroy(extent_hooks_t *extent_hooks, void *addr, size_t size,
 
 bool
 ehooks_default_commit_impl(void *addr, size_t offset, size_t length) {
-	return pages_commit((void *)((uintptr_t)addr + (uintptr_t)offset),
+	return pages_commit((void *)((byte_t *)addr + (uintptr_t)offset),
 	    length);
 }
 
@@ -112,7 +112,7 @@ ehooks_default_commit(extent_hooks_t *extent_hooks, void *addr, size_t size,
 
 bool
 ehooks_default_decommit_impl(void *addr, size_t offset, size_t length) {
-	return pages_decommit((void *)((uintptr_t)addr + (uintptr_t)offset),
+	return pages_decommit((void *)((byte_t *)addr + (uintptr_t)offset),
 	    length);
 }
 
@@ -125,7 +125,7 @@ ehooks_default_decommit(extent_hooks_t *extent_hooks, void *addr, size_t size,
 #ifdef PAGES_CAN_PURGE_LAZY
 bool
 ehooks_default_purge_lazy_impl(void *addr, size_t offset, size_t length) {
-	return pages_purge_lazy((void *)((uintptr_t)addr + (uintptr_t)offset),
+	return pages_purge_lazy((void *)((byte_t *)addr + (uintptr_t)offset),
 	    length);
 }
 
@@ -143,7 +143,7 @@ ehooks_default_purge_lazy(extent_hooks_t *extent_hooks, void *addr, size_t size,
 #ifdef PAGES_CAN_PURGE_FORCED
 bool
 ehooks_default_purge_forced_impl(void *addr, size_t offset, size_t length) {
-	return pages_purge_forced((void *)((uintptr_t)addr +
+	return pages_purge_forced((void *)((byte_t *)addr +
 	    (uintptr_t)offset), length);
 }
 
diff --git a/src/extent.c b/src/extent.c
index 477050b6..822c6eee 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -743,7 +743,7 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		/* A successful commit should return zeroed memory. */
 		if (config_debug) {
 			void *addr = edata_addr_get(edata);
-			size_t *p = (size_t *)(uintptr_t)addr;
+			size_t *p = (size_t *)addr;
 			/* Check the first page only. */
 			for (size_t i = 0; i < PAGE / sizeof(size_t); i++) {
 				assert(p[i] == 0);
@@ -1199,7 +1199,7 @@ extent_split_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	}
 
 	edata_init(trail, edata_arena_ind_get(edata),
-	    (void *)((uintptr_t)edata_base_get(edata) + size_a), size_b,
+	    (void *)((byte_t *)edata_base_get(edata) + size_a), size_b,
 	    /* slab */ false, SC_NSIZES, edata_sn_get(edata),
 	    edata_state_get(edata), edata_zeroed_get(edata),
 	    edata_committed_get(edata), EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
diff --git a/src/extent_dss.c b/src/extent_dss.c
index f8bd8f60..32fb4112 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -8,6 +8,7 @@
 /******************************************************************************/
 /* Data. */
 
+/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 #define SBRK_INVALID ((void *)-1)
 
 const char	*opt_dss = DSS_DEFAULT;
@@ -149,10 +150,10 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			 * necessary to satisfy alignment.  This space can be
 			 * recycled for later use.
 			 */
-			void *gap_addr_page = (void *)(PAGE_CEILING(
-			    (uintptr_t)max_cur));
-			void *ret = (void *)ALIGNMENT_CEILING(
-			    (uintptr_t)gap_addr_page, alignment);
+			void *gap_addr_page = ALIGNMENT_ADDR2CEILING(max_cur,
+			    PAGE);
+			void *ret = ALIGNMENT_ADDR2CEILING(
+			    gap_addr_page, alignment);
 			size_t gap_size_page = (uintptr_t)ret -
 			    (uintptr_t)gap_addr_page;
 			if (gap_size_page != 0) {
@@ -167,7 +168,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			 * Compute the address just past the end of the desired
 			 * allocation space.
 			 */
-			void *dss_next = (void *)((uintptr_t)ret + size);
+			void *dss_next = (void *)((byte_t *)ret + size);
 			if ((uintptr_t)ret < (uintptr_t)max_cur ||
 			    (uintptr_t)dss_next < (uintptr_t)max_cur) {
 				goto label_oom; /* Wrap-around. */
diff --git a/src/hpdata.c b/src/hpdata.c
index e7d7294c..3058eafe 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -130,7 +130,7 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 
 	hpdata_assert_consistent(hpdata);
 	return (void *)(
-	    (uintptr_t)hpdata_addr_get(hpdata) + (result << LG_PAGE));
+	    (byte_t *)hpdata_addr_get(hpdata) + (result << LG_PAGE));
 }
 
 void
@@ -277,7 +277,7 @@ hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
 	}
 
 	*r_purge_addr = (void *)(
-	    (uintptr_t)hpdata_addr_get(hpdata) + purge_begin * PAGE);
+	    (byte_t *)hpdata_addr_get(hpdata) + purge_begin * PAGE);
 	*r_purge_size = purge_len * PAGE;
 
 	purge_state->next_purge_search_begin = purge_begin + purge_len;
diff --git a/src/jemalloc.c b/src/jemalloc.c
index a36b4974..df0c1ebc 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3446,7 +3446,7 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 	if (config_fill && unlikely(opt_junk_alloc) && usize > old_usize
 	    && !zero) {
 		size_t excess_len = usize - old_usize;
-		void *excess_start = (void *)((uintptr_t)p + old_usize);
+		void *excess_start = (void *)((byte_t *)p + old_usize);
 		junk_alloc_callback(excess_start, excess_len);
 	}
 
@@ -3716,7 +3716,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	if (config_fill && unlikely(opt_junk_alloc) && usize > old_usize &&
 	    !zero) {
 		size_t excess_len = usize - old_usize;
-		void *excess_start = (void *)((uintptr_t)ptr + old_usize);
+		void *excess_start = (void *)((byte_t *)ptr + old_usize);
 		junk_alloc_callback(excess_start, excess_len);
 	}
 label_not_resized:
diff --git a/src/large.c b/src/large.c
index 10fa652e..d78085f0 100644
--- a/src/large.c
+++ b/src/large.c
@@ -113,10 +113,10 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 			 * of CACHELINE in [0 .. PAGE).
 			 */
 			void *zbase = (void *)
-			    ((uintptr_t)edata_addr_get(edata) + old_usize);
-			void *zpast = PAGE_ADDR2BASE((void *)((uintptr_t)zbase +
+			    ((byte_t *)edata_addr_get(edata) + old_usize);
+			void *zpast = PAGE_ADDR2BASE((void *)((byte_t *)zbase +
 			    PAGE));
-			size_t nzero = (uintptr_t)zpast - (uintptr_t)zbase;
+			size_t nzero = (byte_t *)zpast - (byte_t *)zbase;
 			assert(nzero > 0);
 			memset(zbase, 0, nzero);
 		}
diff --git a/src/pages.c b/src/pages.c
index 249d7c5b..58d9cfaf 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -197,7 +197,7 @@ os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
 static void *
 os_pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size,
     bool *commit) {
-	void *ret = (void *)((uintptr_t)addr + leadsize);
+	void *ret = (void *)((byte_t *)addr + leadsize);
 
 	assert(alloc_size >= leadsize + size);
 #ifdef _WIN32
@@ -217,7 +217,7 @@ os_pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size,
 		os_pages_unmap(addr, leadsize);
 	}
 	if (trailsize != 0) {
-		os_pages_unmap((void *)((uintptr_t)ret + size), trailsize);
+		os_pages_unmap((void *)((byte_t *)ret + size), trailsize);
 	}
 	return ret;
 #endif
diff --git a/src/prof_data.c b/src/prof_data.c
index d52522b0..91a9268d 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -85,8 +85,10 @@ prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b) {
 	return ret;
 }
 
+/* NOLINTBEGIN(performance-no-int-to-ptr) */
 rb_gen(static UNUSED, tctx_tree_, prof_tctx_tree_t, prof_tctx_t,
     tctx_link, prof_tctx_comp)
+/* NOLINTEND(performance-no-int-to-ptr) */
 
 static int
 prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b) {
@@ -100,8 +102,10 @@ prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b) {
 	return ret;
 }
 
+/* NOLINTBEGIN(performance-no-int-to-ptr) */
 rb_gen(static UNUSED, gctx_tree_, prof_gctx_tree_t, prof_gctx_t, dump_link,
     prof_gctx_comp)
+/* NOLINTEND(performance-no-int-to-ptr) */
 
 static int
 prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b) {
@@ -119,8 +123,10 @@ prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b) {
 	return ret;
 }
 
+/* NOLINTBEGIN(performance-no-int-to-ptr) */
 rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link,
     prof_tdata_comp)
+/* NOLINTEND(performance-no-int-to-ptr) */
 
 /******************************************************************************/
 
@@ -1141,7 +1147,7 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
 		return NULL;
 	}
 
-	tdata->vec = (void **)((uintptr_t)tdata + tdata_sz);
+	tdata->vec = (void **)((byte_t *)tdata + tdata_sz);
 	tdata->lock = prof_tdata_mutex_choose(thr_uid);
 	tdata->thr_uid = thr_uid;
 	tdata->thr_discrim = thr_discrim;
diff --git a/src/san.c b/src/san.c
index 6e512911..28ea3d7c 100644
--- a/src/san.c
+++ b/src/san.c
@@ -20,43 +20,43 @@ ssize_t opt_lg_san_uaf_align = SAN_LG_UAF_ALIGN_DEFAULT;
 uintptr_t san_cache_bin_nonfast_mask = SAN_CACHE_BIN_NONFAST_MASK_DEFAULT;
 
 static inline void
-san_find_guarded_addr(edata_t *edata, uintptr_t *guard1, uintptr_t *guard2,
-    uintptr_t *addr, size_t size, bool left, bool right) {
+san_find_guarded_addr(edata_t *edata, void **guard1, void **guard2,
+    void **addr, size_t size, bool left, bool right) {
 	assert(!edata_guarded_get(edata));
 	assert(size % PAGE == 0);
-	*addr = (uintptr_t)edata_base_get(edata);
+	*addr = edata_base_get(edata);
 	if (left) {
 		*guard1 = *addr;
-		*addr += SAN_PAGE_GUARD;
+		*addr = ((byte_t *)*addr) + SAN_PAGE_GUARD;
 	} else {
-		*guard1 = 0;
+		*guard1 = NULL;
 	}
 
 	if (right) {
-		*guard2 = *addr + size;
+		*guard2 = ((byte_t *)*addr) + size;
 	} else {
-		*guard2 = 0;
+		*guard2 = NULL;
 	}
 }
 
 static inline void
-san_find_unguarded_addr(edata_t *edata, uintptr_t *guard1, uintptr_t *guard2,
-    uintptr_t *addr, size_t size, bool left, bool right) {
+san_find_unguarded_addr(edata_t *edata, void **guard1, void **guard2,
+    void **addr, size_t size, bool left, bool right) {
 	assert(edata_guarded_get(edata));
 	assert(size % PAGE == 0);
-	*addr = (uintptr_t)edata_base_get(edata);
+	*addr = edata_base_get(edata);
 	if (right) {
-		*guard2 = *addr + size;
+		*guard2 = ((byte_t *)*addr) + size;
 	} else {
-		*guard2 = 0;
+		*guard2 = NULL;
 	}
 
 	if (left) {
-		*guard1 = *addr - SAN_PAGE_GUARD;
-		assert(*guard1 != 0);
+		*guard1 = ((byte_t *)*addr) - SAN_PAGE_GUARD;
+		assert(*guard1 != NULL);
 		*addr = *guard1;
 	} else {
-		*guard1 = 0;
+		*guard1 = NULL;
 	}
 }
 
@@ -73,16 +73,16 @@ san_guard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap,
 	    ? san_two_side_unguarded_sz(size_with_guards)
 	    : san_one_side_unguarded_sz(size_with_guards);
 
-	uintptr_t guard1, guard2, addr;
+	void *guard1, *guard2, *addr;
 	san_find_guarded_addr(edata, &guard1, &guard2, &addr, usize, left,
 	    right);
 
 	assert(edata_state_get(edata) == extent_state_active);
-	ehooks_guard(tsdn, ehooks, (void *)guard1, (void *)guard2);
+	ehooks_guard(tsdn, ehooks, guard1, guard2);
 
 	/* Update the guarded addr and usable size of the edata. */
 	edata_size_set(edata, usize);
-	edata_addr_set(edata, (void *)addr);
+	edata_addr_set(edata, addr);
 	edata_guarded_set(edata, true);
 
 	if (remap) {
@@ -108,7 +108,7 @@ san_unguard_pages_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 	    ? san_two_side_guarded_sz(size)
 	    : san_one_side_guarded_sz(size);
 
-	uintptr_t guard1, guard2, addr;
+	void *guard1, *guard2, *addr;
 	san_find_unguarded_addr(edata, &guard1, &guard2, &addr, size, left,
 	    right);
 
diff --git a/src/tcache.c b/src/tcache.c
index fa16732e..914ddb7a 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -768,9 +768,9 @@ tcache_create_explicit(tsd_t *tsd) {
 	if (mem == NULL) {
 		return NULL;
 	}
-	tcache_t *tcache = (void *)((uintptr_t)mem + tcache_bin_alloc_size);
+	tcache_t *tcache = (void *)((byte_t *)mem + tcache_bin_alloc_size);
 	tcache_slow_t *tcache_slow =
-	    (void *)((uintptr_t)mem + tcache_bin_alloc_size + sizeof(tcache_t));
+	    (void *)((byte_t *)mem + tcache_bin_alloc_size + sizeof(tcache_t));
 	tcache_init(tsd, tcache_slow, tcache, mem);
 
 	tcache_arena_associate(tsd_tsdn(tsd), tcache_slow, tcache,

From 8ff7e7d6c33fd18a9f8c9f086e027dd0edfc27f0 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Tue, 25 Jul 2023 10:42:10 -0700
Subject: [PATCH 2323/2608] Remove errant `#include`s in public `jemalloc.h`
 header

In an attempt to make all headers self-contained, I inadvertently added
`#include`s which refer to intermediate, generated headers that aren't
included in the final install. Closes #2489.
---
 include/jemalloc/jemalloc_protos.h.in | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/include/jemalloc/jemalloc_protos.h.in b/include/jemalloc/jemalloc_protos.h.in
index 170493dd..3e1d3223 100644
--- a/include/jemalloc/jemalloc_protos.h.in
+++ b/include/jemalloc/jemalloc_protos.h.in
@@ -1,6 +1,3 @@
-#include "jemalloc/jemalloc_defs.h"
-#include "jemalloc/jemalloc_macros.h"
-
 /*
  * The @je_@ prefix on the following public symbol declarations is an artifact
  * of namespace management, and should be omitted in application code unless

From 9ba1e1cb37b84daf00d37936f4223823c2aaac44 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Tue, 25 Jul 2023 12:14:35 -0700
Subject: [PATCH 2324/2608] Make `ctl_arena_clear` slightly more efficient

While this function isn't particularly hot, (accounting for just 0.27% of
time spent inside the allocator on average across the fleet), looking
at the generated assembly and performance profiles does show we're dispatching
to multiple different `memset`s when we could instead be just tail-calling
`memset` once, reducing code size and marginally improving performance.
---
 src/ctl.c | 18 +-----------------
 1 file changed, 1 insertion(+), 17 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index 7d0ab346..454766da 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1042,23 +1042,7 @@ ctl_arena_clear(ctl_arena_t *ctl_arena) {
 	ctl_arena->pdirty = 0;
 	ctl_arena->pmuzzy = 0;
 	if (config_stats) {
-		memset(&ctl_arena->astats->astats, 0, sizeof(arena_stats_t));
-		ctl_arena->astats->allocated_small = 0;
-		ctl_arena->astats->nmalloc_small = 0;
-		ctl_arena->astats->ndalloc_small = 0;
-		ctl_arena->astats->nrequests_small = 0;
-		ctl_arena->astats->nfills_small = 0;
-		ctl_arena->astats->nflushes_small = 0;
-		memset(ctl_arena->astats->bstats, 0, SC_NBINS *
-		    sizeof(bin_stats_data_t));
-		memset(ctl_arena->astats->lstats, 0, (SC_NSIZES - SC_NBINS) *
-		    sizeof(arena_stats_large_t));
-		memset(ctl_arena->astats->estats, 0, SC_NPSIZES *
-		    sizeof(pac_estats_t));
-		memset(&ctl_arena->astats->hpastats, 0,
-		    sizeof(hpa_shard_stats_t));
-		memset(&ctl_arena->astats->secstats, 0,
-		    sizeof(sec_stats_t));
+		memset(ctl_arena->astats, 0, sizeof(*(ctl_arena->astats)));
 	}
 }
 

From b01d49664651f239fdf76774cb6de05ed7e63f4a Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Fri, 28 Jul 2023 11:54:27 -0700
Subject: [PATCH 2325/2608] Add an override for the compile-time malloc_conf to
 `jemalloc_internal_overrides.h`

---
 include/jemalloc/internal/jemalloc_internal_overrides.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/jemalloc/internal/jemalloc_internal_overrides.h b/include/jemalloc/internal/jemalloc_internal_overrides.h
index ddd6ee17..5fbbe249 100644
--- a/include/jemalloc/internal/jemalloc_internal_overrides.h
+++ b/include/jemalloc/internal/jemalloc_internal_overrides.h
@@ -13,4 +13,9 @@
     #define LG_PAGE JEMALLOC_OVERRIDE_LG_PAGE
 #endif
 
+#ifdef JEMALLOC_OVERRIDE_JEMALLOC_CONFIG_MALLOC_CONF
+	#undef JEMALLOC_CONFIG_MALLOC_CONF
+	#define JEMALLOC_CONFIG_MALLOC_CONF JEMALLOC_OVERRIDE_JEMALLOC_CONFIG_MALLOC_CONF
+#endif
+
 #endif /* JEMALLOC_INTERNAL_OVERRIDES_H */

From 62648c88e5e50b8ed11181a8c42dbc1134d6d854 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Wed, 26 Jul 2023 12:25:59 -0700
Subject: [PATCH 2326/2608] Ensured sampled allocations are properly
 deallocated during `arena_reset`

Sampled allocations were not being demoted before being deallocated
during an `arena_reset` operation.
---
 src/arena.c | 141 ++++++++++++++++++++++++++++------------------------
 1 file changed, 76 insertions(+), 65 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 65eef864..f330663b 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -659,6 +659,76 @@ arena_bin_reset(tsd_t *tsd, arena_t *arena, bin_t *bin) {
 	malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 }
 
+void
+arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize, size_t bumped_usize) {
+	cassert(config_prof);
+	assert(ptr != NULL);
+	assert(isalloc(tsdn, ptr) == bumped_usize);
+	assert(sz_can_use_slab(usize));
+
+	if (config_opt_safety_checks) {
+		safety_check_set_redzone(ptr, usize, bumped_usize);
+	}
+
+	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
+
+	szind_t szind = sz_size2index(usize);
+	edata_szind_set(edata, szind);
+	emap_remap(tsdn, &arena_emap_global, edata, szind, /* slab */ false);
+
+	assert(isalloc(tsdn, ptr) == usize);
+}
+
+static size_t
+arena_prof_demote(tsdn_t *tsdn, edata_t *edata, const void *ptr) {
+	cassert(config_prof);
+	assert(ptr != NULL);
+	size_t usize = isalloc(tsdn, ptr);
+	size_t bumped_usize = sz_sa2u(usize, PROF_SAMPLE_ALIGNMENT);
+	assert(bumped_usize <= SC_LARGE_MINCLASS &&
+	    PAGE_CEILING(bumped_usize) == bumped_usize);
+	assert(edata_size_get(edata) - bumped_usize <= sz_large_pad);
+	szind_t szind = sz_size2index(bumped_usize);
+
+	edata_szind_set(edata, szind);
+	emap_remap(tsdn, &arena_emap_global, edata, szind, /* slab */ false);
+
+	assert(isalloc(tsdn, ptr) == bumped_usize);
+
+	return bumped_usize;
+}
+
+static void
+arena_dalloc_promoted_impl(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
+    bool slow_path, edata_t *edata) {
+	cassert(config_prof);
+	assert(opt_prof);
+
+	size_t usize = edata_usize_get(edata);
+	size_t bumped_usize = arena_prof_demote(tsdn, edata, ptr);
+	if (config_opt_safety_checks && usize < SC_LARGE_MINCLASS) {
+		/*
+		 * Currently, we only do redzoning for small sampled
+		 * allocations.
+		 */
+		safety_check_verify_redzone(ptr, usize, bumped_usize);
+	}
+	if (bumped_usize >= SC_LARGE_MINCLASS &&
+	    bumped_usize <= tcache_maxclass && tcache != NULL) {
+		tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
+		    sz_size2index(bumped_usize), slow_path);
+	} else {
+		large_dalloc(tsdn, edata);
+	}
+}
+
+void
+arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
+    bool slow_path) {
+	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
+	arena_dalloc_promoted_impl(tsdn, ptr, tcache, slow_path, edata);
+}
+
 void
 arena_reset(tsd_t *tsd, arena_t *arena) {
 	/*
@@ -697,7 +767,12 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 		if (config_prof && opt_prof) {
 			prof_free(tsd, ptr, usize, &alloc_ctx);
 		}
-		large_dalloc(tsd_tsdn(tsd), edata);
+		if (config_prof && opt_prof && alloc_ctx.szind < SC_NBINS) {
+			arena_dalloc_promoted_impl(tsd_tsdn(tsd), ptr,
+			    /* tcache */ NULL, /* slow_path */ true, edata);
+		} else {
+			large_dalloc(tsd_tsdn(tsd), edata);
+		}
 		malloc_mutex_lock(tsd_tsdn(tsd), &arena->large_mtx);
 	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), &arena->large_mtx);
@@ -1236,70 +1311,6 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	}
 }
 
-void
-arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize, size_t bumped_usize) {
-	cassert(config_prof);
-	assert(ptr != NULL);
-	assert(isalloc(tsdn, ptr) == bumped_usize);
-	assert(sz_can_use_slab(usize));
-
-	if (config_opt_safety_checks) {
-		safety_check_set_redzone(ptr, usize, bumped_usize);
-	}
-
-	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
-
-	szind_t szind = sz_size2index(usize);
-	edata_szind_set(edata, szind);
-	emap_remap(tsdn, &arena_emap_global, edata, szind, /* slab */ false);
-
-	assert(isalloc(tsdn, ptr) == usize);
-}
-
-static size_t
-arena_prof_demote(tsdn_t *tsdn, edata_t *edata, const void *ptr) {
-	cassert(config_prof);
-	assert(ptr != NULL);
-	size_t usize = isalloc(tsdn, ptr);
-	size_t bumped_usize = sz_sa2u(usize, PROF_SAMPLE_ALIGNMENT);
-	assert(bumped_usize <= SC_LARGE_MINCLASS &&
-	    PAGE_CEILING(bumped_usize) == bumped_usize);
-	assert(edata_size_get(edata) - bumped_usize <= sz_large_pad);
-	szind_t szind = sz_size2index(bumped_usize);
-
-	edata_szind_set(edata, szind);
-	emap_remap(tsdn, &arena_emap_global, edata, szind, /* slab */ false);
-
-	assert(isalloc(tsdn, ptr) == bumped_usize);
-
-	return bumped_usize;
-}
-
-void
-arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
-    bool slow_path) {
-	cassert(config_prof);
-	assert(opt_prof);
-
-	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
-	size_t usize = edata_usize_get(edata);
-	size_t bumped_usize = arena_prof_demote(tsdn, edata, ptr);
-	if (config_opt_safety_checks && usize < SC_LARGE_MINCLASS) {
-		/*
-		 * Currently, we only do redzoning for small sampled
-		 * allocations.
-		 */
-		safety_check_verify_redzone(ptr, usize, bumped_usize);
-	}
-	if (bumped_usize >= SC_LARGE_MINCLASS &&
-	    bumped_usize <= tcache_maxclass && tcache != NULL) {
-		tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
-		    sz_size2index(bumped_usize), slow_path);
-	} else {
-		large_dalloc(tsdn, edata);
-	}
-}
-
 static void
 arena_dissociate_bin_slab(arena_t *arena, edata_t *slab, bin_t *bin) {
 	/* Dissociate slab from bin. */

From 6816b238625d67e0bf3b6768f00709051b23f2a6 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 1 Aug 2023 14:28:24 -0700
Subject: [PATCH 2327/2608] Include the unrecognized malloc conf option in the
 error message.

Previously the option causing trouble will not be printed, unless the option
key:value pair format is found.
---
 src/jemalloc.c | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index df0c1ebc..ccb20c81 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -865,6 +865,14 @@ malloc_conf_multi_sizes_next(const char **slab_size_segment_cur,
 	return false;
 }
 
+static void
+malloc_conf_format_error(const char *msg, const char *begin, const char *end) {
+	size_t len = end - begin + 1;
+	len = len > BUFERROR_BUF ? BUFERROR_BUF : len;
+
+	malloc_printf("<jemalloc>: %s -- %.*s\n", msg, (int)len, begin);
+}
+
 static bool
 malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
     char const **v_p, size_t *vlen_p) {
@@ -898,13 +906,15 @@ malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
 			break;
 		case '\0':
 			if (opts != *opts_p) {
-				malloc_write("<jemalloc>: Conf string ends "
-				    "with key\n");
+				malloc_conf_format_error(
+				    "Conf string ends with key",
+				    *opts_p, opts - 1);
 				had_conf_error = true;
 			}
 			return true;
 		default:
-			malloc_write("<jemalloc>: Malformed conf string\n");
+			malloc_conf_format_error(
+			    "Malformed conf string", *opts_p, opts);
 			had_conf_error = true;
 			return true;
 		}
@@ -922,8 +932,9 @@ malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
 			 * comma if one exists.
 			 */
 			if (*opts == '\0') {
-				malloc_write("<jemalloc>: Conf string ends "
-				    "with comma\n");
+				malloc_conf_format_error(
+				    "Conf string ends with comma",
+				    *opts_p, opts - 1);
 				had_conf_error = true;
 			}
 			*vlen_p = (uintptr_t)opts - 1 - (uintptr_t)*v_p;

From ea5b7bea3144cd26a63510016d778eab3ca58822 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Thu, 6 Jul 2023 12:49:10 -0700
Subject: [PATCH 2328/2608] Add configuration option controlling DSS support

In many environments, the fallback `sbrk(2)` allocation path is never
used even if the system supports the syscall; if you're at the point
where `mmap(2)` is failing, `sbrk(2)` is unlikely to succeed. Without
changing the default, I've added the ability to disable the usage of DSS
altogether, so that you do not need to pay for the additional code size
and handful of extra runtime branches in such environments.
---
 configure.ac | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index f820d14a..c1ad9e66 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1469,6 +1469,18 @@ if test "x$zero_realloc_default_free" = "x1" ; then
   AC_DEFINE([JEMALLOC_ZERO_REALLOC_DEFAULT_FREE], [ ], [ ])
 fi
 
+dnl Support allocation from DSS by default
+AC_ARG_ENABLE([dss],
+  [AS_HELP_STRING([--disable-dss], [Disable usage of sbrk(2)])],
+[if test "x$enable_dss" = "xno" ; then
+  enable_dss="0"
+else
+  enable_dss="1"
+fi
+],
+[enable_dss="1"]
+)
+
 dnl Enable allocation from DSS if supported by the OS.
 have_dss="1"
 dnl Check whether the BSD/SUSv1 sbrk() exists.  If not, disable DSS support.
@@ -1482,7 +1494,7 @@ else
   have_dss="0"
 fi
 
-if test "x$have_dss" = "x1" ; then
+if test "x$have_dss" = "x1" -a "x$enable_dss" = "x1" ; then
   AC_DEFINE([JEMALLOC_DSS], [ ], [ ])
 fi
 
@@ -2791,4 +2803,5 @@ AC_MSG_RESULT([lazy_lock          : ${enable_lazy_lock}])
 AC_MSG_RESULT([cache-oblivious    : ${enable_cache_oblivious}])
 AC_MSG_RESULT([pageid             : ${enable_pageid}])
 AC_MSG_RESULT([cxx                : ${enable_cxx}])
+AC_MSG_RESULT([dss                : ${enable_dss}])
 AC_MSG_RESULT([===============================================================================])

From 07a2eab3ed5dd76657ee689326acd9ecaf1e2830 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Wed, 19 Jul 2023 12:30:12 -0700
Subject: [PATCH 2329/2608] Stop over-reporting memory usage from sampled small
 allocations

@interwq noticed [while reviewing an earlier PR](https://github.com/jemalloc/jemalloc/pull/2478#discussion_r1256217261)
that I missed modifying this statistics accounting in line with the rest
of the changes from #2459. This is now fixed, such that sampled small
allocations increment the `.nmalloc`/`.ndalloc` of their effective bin
size instead of over-reporting memory usage by attributing all such
allocations to `SC_LARGE_MINCLASS`.
---
 src/arena.c | 54 +++++++++++++++++++++++++++++------------------------
 1 file changed, 30 insertions(+), 24 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index f330663b..fe5874a4 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -293,34 +293,48 @@ arena_slab_reg_alloc_batch(edata_t *slab, const bin_info_t *bin_info,
 
 static void
 arena_large_malloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
-	szind_t index, hindex;
-
 	cassert(config_stats);
 
+	szind_t index = sz_size2index(usize);
+	/* This only occurs when we have a sampled small allocation */
 	if (usize < SC_LARGE_MINCLASS) {
-		usize = SC_LARGE_MINCLASS;
+		assert(index < SC_NBINS);
+		assert(usize >= PAGE && usize % PAGE == 0);
+		bin_t *bin = arena_get_bin(arena, index, /* binshard */ 0);
+		malloc_mutex_lock(tsdn, &bin->lock);
+		bin->stats.nmalloc++;
+		malloc_mutex_unlock(tsdn, &bin->lock);
+	} else {
+		assert(index >= SC_NBINS);
+		szind_t hindex = index - SC_NBINS;
+		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
+			&arena->stats.lstats[hindex].nmalloc, 1);
+		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
-	index = sz_size2index(usize);
-	hindex = (index >= SC_NBINS) ? index - SC_NBINS : 0;
-
-	locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-	    &arena->stats.lstats[hindex].nmalloc, 1);
 }
 
 static void
 arena_large_dalloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
-	szind_t index, hindex;
-
 	cassert(config_stats);
 
+	szind_t index = sz_size2index(usize);
+	/* This only occurs when we have a sampled small allocation */
 	if (usize < SC_LARGE_MINCLASS) {
-		usize = SC_LARGE_MINCLASS;
+		assert(index < SC_NBINS);
+		assert(usize >= PAGE && usize % PAGE == 0);
+		bin_t *bin = arena_get_bin(arena, index, /* binshard */ 0);
+		malloc_mutex_lock(tsdn, &bin->lock);
+		bin->stats.ndalloc++;
+		malloc_mutex_unlock(tsdn, &bin->lock);
+	} else {
+		assert(index >= SC_NBINS);
+		szind_t hindex = index - SC_NBINS;
+		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
+			&arena->stats.lstats[hindex].ndalloc, 1);
+		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
-	index = sz_size2index(usize);
-	hindex = (index >= SC_NBINS) ? index - SC_NBINS : 0;
-
-	locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-	    &arena->stats.lstats[hindex].ndalloc, 1);
 }
 
 static void
@@ -344,9 +358,7 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 
 	if (edata != NULL) {
 		if (config_stats) {
-			LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 			arena_large_malloc_stats_update(tsdn, arena, usize);
-			LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 		}
 	}
 
@@ -360,10 +372,8 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 void
 arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
 	if (config_stats) {
-		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 		arena_large_dalloc_stats_update(tsdn, arena,
 		    edata_usize_get(edata));
-		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
 }
 
@@ -373,9 +383,7 @@ arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 	size_t usize = edata_usize_get(edata);
 
 	if (config_stats) {
-		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 		arena_large_ralloc_stats_update(tsdn, arena, oldusize, usize);
-		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
 }
 
@@ -385,9 +393,7 @@ arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 	size_t usize = edata_usize_get(edata);
 
 	if (config_stats) {
-		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 		arena_large_ralloc_stats_update(tsdn, arena, oldusize, usize);
-		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
 }
 

From 162ff8365da9bc30f3dcddf0e02c7b7c40197bfc Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Fri, 4 Aug 2023 14:22:35 -0700
Subject: [PATCH 2330/2608] Update the Ubuntu version used by Travis CI

Update from Ubuntu Focal Fossa to Ubuntu Jammy Jellyfish. Staying up to
date is always good, but I'm also hoping that perhaps this newer release
contains fixes so that PowerPC VMs don't randomly hang indefinitely
while booting anymore, stalling our CI pipeline.
---
 .travis.yml           | 2 +-
 scripts/gen_travis.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 49e6aa7e..85e0b720 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -6,7 +6,7 @@
 # Differences are explained here:
 # https://docs.travis-ci.com/user/languages/minimal-and-generic/
 language: minimal
-dist: focal
+dist: jammy
 
 jobs:
   include:
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index b49905f9..fe4e029f 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -24,7 +24,7 @@ TRAVIS_TEMPLATE = """\
 # Differences are explained here:
 # https://docs.travis-ci.com/user/languages/minimal-and-generic/
 language: minimal
-dist: focal
+dist: jammy
 
 jobs:
   include:

From 120abd703addce50fb9105ee4f7e42c3612c3774 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Thu, 3 Aug 2023 15:05:10 -0700
Subject: [PATCH 2331/2608] Add support for the `deprecated` attribute

This is useful for enforcing the usage of getter/setter functions to
access fields which are considered private or have unique access constraints.
---
 configure.ac                                  | 24 +++++++++++++++++++
 .../internal/jemalloc_internal_macros.h       | 14 +++++++++++
 include/jemalloc/jemalloc_defs.h.in           |  3 +++
 include/jemalloc/jemalloc_macros.h.in         |  7 ++++++
 4 files changed, 48 insertions(+)

diff --git a/configure.ac b/configure.ac
index c1ad9e66..ff493e1d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -988,6 +988,30 @@ if test "x${je_cv_cold}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_ATTR_COLD], [ ], [ ])
 fi
 
+dnl Check for deprecated attribute support.
+JE_CFLAGS_SAVE()
+JE_CFLAGS_ADD([-Wdeprecated-declarations])
+JE_COMPILABLE([deprecated attribute],
+              [#if !__has_attribute(deprecated)
+               #error "deprecated attribute not supported"
+               #endif
+               struct has_deprecated_field {
+                   int good;
+                   int __attribute__((deprecated("Do not use"))) bad;
+               };
+              ],
+              [struct has_deprecated_field instance;
+               instance.good = 0;
+               instance.bad = 1;
+              ],
+              [je_cv_deprecated])
+JE_CFLAGS_RESTORE()
+if test "x${je_cv_deprecated}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_ATTR_DEPRECATED], [ ], [ ])
+  JE_CFLAGS_ADD([-Wdeprecated-declarations])
+  JE_CXXFLAGS_ADD([-Wdeprecated-declarations])
+fi
+
 dnl Check for VM_MAKE_TAG for mmap support.
 JE_COMPILABLE([vm_make_tag],
 	      [#include <sys/mman.h>
diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h
index a08b7e7a..9abcbb20 100644
--- a/include/jemalloc/internal/jemalloc_internal_macros.h
+++ b/include/jemalloc/internal/jemalloc_internal_macros.h
@@ -53,6 +53,7 @@
 #  define JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS
 #  define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
 #  define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
+#  define JEMALLOC_DIAGNOSTIC_IGNORE_DEPRECATED
 #  define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
 /* #pragma GCC diagnostic first appeared in gcc 4.6. */
 #elif (defined(__GNUC__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && \
@@ -92,6 +93,12 @@
 #  else
 #    define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
 #  endif
+#  ifdef JEMALLOC_HAVE_ATTR_DEPRECATED
+#    define JEMALLOC_DIAGNOSTIC_IGNORE_DEPRECATED \
+       JEMALLOC_DIAGNOSTIC_IGNORE("-Wdeprecated-declarations")
+#  else
+#    define JEMALLOC_DIAGNOSTIC_IGNORE_DEPRECATED
+#  endif
 #  define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS \
   JEMALLOC_DIAGNOSTIC_PUSH \
   JEMALLOC_DIAGNOSTIC_IGNORE_UNUSED_PARAMETER
@@ -103,9 +110,16 @@
 #  define JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS
 #  define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
 #  define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
+#  define JEMALLOC_DIAGNOSTIC_IGNORE_DEPRECATED
 #  define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
 #endif
 
+#define JEMALLOC_SUPPRESS_WARN_ON_USAGE(...) \
+   JEMALLOC_DIAGNOSTIC_PUSH \
+   JEMALLOC_DIAGNOSTIC_IGNORE_DEPRECATED \
+   __VA_ARGS__ \
+   JEMALLOC_DIAGNOSTIC_POP
+
 /*
  * Disables spurious diagnostics for all headers.  Since these headers are not
  * included by users directly, it does not affect their diagnostic settings.
diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in
index 77d9d3b5..ef04e756 100644
--- a/include/jemalloc/jemalloc_defs.h.in
+++ b/include/jemalloc/jemalloc_defs.h.in
@@ -19,6 +19,9 @@
 /* Defined if cold attribute is supported. */
 #undef JEMALLOC_HAVE_ATTR_COLD
 
+/* Defined if deprecated attribute is supported. */
+#undef JEMALLOC_HAVE_ATTR_DEPRECATED
+
 /*
  * Define overrides for non-standard allocator-related functions if they are
  * present on the system.
diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in
index 05d996be..a0679af5 100644
--- a/include/jemalloc/jemalloc_macros.h.in
+++ b/include/jemalloc/jemalloc_macros.h.in
@@ -86,6 +86,7 @@
 #    define JEMALLOC_ALLOCATOR
 #  endif
 #  define JEMALLOC_COLD
+#  define JEMALLOC_WARN_ON_USAGE(warning_message)
 #elif defined(JEMALLOC_HAVE_ATTR)
 #  define JEMALLOC_ATTR(s) __attribute__((s))
 #  define JEMALLOC_ALIGNED(s) JEMALLOC_ATTR(aligned(s))
@@ -126,6 +127,11 @@
 #  else
 #    define JEMALLOC_COLD
 #  endif
+#  ifdef JEMALLOC_HAVE_ATTR_DEPRECATED
+#    define JEMALLOC_WARN_ON_USAGE(warning_message) JEMALLOC_ATTR(deprecated(warning_message))
+#  else
+#    define JEMALLOC_WARN_ON_USAGE(warning_message)
+#  endif
 #else
 #  define JEMALLOC_ATTR(s)
 #  define JEMALLOC_ALIGNED(s)
@@ -140,6 +146,7 @@
 #  define JEMALLOC_RESTRICT_RETURN
 #  define JEMALLOC_ALLOCATOR
 #  define JEMALLOC_COLD
+#  define JEMALLOC_WARN_ON_USAGE(warning_message)
 #endif
 
 #if (defined(__APPLE__) || defined(__FreeBSD__) || defined(__OpenBSD__) || (defined(__linux__) && !defined(__GLIBC__))) && !defined(JEMALLOC_NO_RENAME)

From 424dd61d57500712fad7371bfd921cb9e3caee22 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Fri, 4 Aug 2023 11:43:59 -0700
Subject: [PATCH 2332/2608] Issue a warning upon directly accessing an arena's
 bins

An arena's bins should normally be accessed via the `arena_get_bin`
function, which properly takes into account bin-shards. To ensure that
we don't accidentally commit code which incorrectly accesses the bins
directly, we mark the field with `__attribute__((deprecated))` with an
appropriate warning message, and suppress the warning in the few places
where directly accessing the bins is allowed.
---
 include/jemalloc/internal/arena_structs.h |  4 +++-
 src/arena.c                               | 20 +++++++++++++++-----
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index 0fffa7eb..6f79be97 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -99,7 +99,9 @@ struct arena_s {
 	 * The arena is allocated alongside its bins; really this is a
 	 * dynamically sized array determined by the binshard settings.
 	 */
-	bin_t			bins[0];
+	JEMALLOC_WARN_ON_USAGE("Do not use this field directly. "
+	                       "Use `arena_get_bin` instead.")
+	bin_t			 all_bins[0];
 };
 
 #endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_H */
diff --git a/src/arena.c b/src/arena.c
index fe5874a4..98907bc1 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1700,7 +1700,9 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 	/* Initialize bins. */
 	atomic_store_u(&arena->binshard_next, 0, ATOMIC_RELEASE);
 	for (i = 0; i < nbins_total; i++) {
-		bool err = bin_init(&arena->bins[i]);
+		JEMALLOC_SUPPRESS_WARN_ON_USAGE(
+		bool err = bin_init(&arena->all_bins[i]);
+		)
 		if (err) {
 			goto label_error;
 		}
@@ -1849,7 +1851,9 @@ arena_boot(sc_data_t *sc_data, base_t *base, bool hpa) {
 		    (1U << sc->lg_base) + (sc->ndelta << sc->lg_delta));
 	}
 
-	uint32_t cur_offset = (uint32_t)offsetof(arena_t, bins);
+	JEMALLOC_SUPPRESS_WARN_ON_USAGE(
+	uint32_t cur_offset = (uint32_t)offsetof(arena_t, all_bins);
+	)
 	for (szind_t i = 0; i < SC_NBINS; i++) {
 		arena_bin_offsets[i] = cur_offset;
 		nbins_total += bin_infos[i].n_shards;
@@ -1904,14 +1908,18 @@ arena_prefork7(tsdn_t *tsdn, arena_t *arena) {
 void
 arena_prefork8(tsdn_t *tsdn, arena_t *arena) {
 	for (unsigned i = 0; i < nbins_total; i++) {
-		bin_prefork(tsdn, &arena->bins[i]);
+		JEMALLOC_SUPPRESS_WARN_ON_USAGE(
+		bin_prefork(tsdn, &arena->all_bins[i]);
+		)
 	}
 }
 
 void
 arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
 	for (unsigned i = 0; i < nbins_total; i++) {
-		bin_postfork_parent(tsdn, &arena->bins[i]);
+		JEMALLOC_SUPPRESS_WARN_ON_USAGE(
+		bin_postfork_parent(tsdn, &arena->all_bins[i]);
+		)
 	}
 
 	malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
@@ -1949,7 +1957,9 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 	}
 
 	for (unsigned i = 0; i < nbins_total; i++) {
-		bin_postfork_child(tsdn, &arena->bins[i]);
+		JEMALLOC_SUPPRESS_WARN_ON_USAGE(
+		bin_postfork_child(tsdn, &arena->all_bins[i]);
+		)
 	}
 
 	malloc_mutex_postfork_child(tsdn, &arena->large_mtx);

From 3aae792b1021a3e46490bd52e8b3300c3aa71e82 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Mon, 17 Jul 2023 15:22:26 -0700
Subject: [PATCH 2333/2608] Fix infinite purging loop in HPA

As reported in #2449, under certain circumstances it's possible to get
stuck in an infinite loop attempting to purge from the HPA. We now
handle this by validating the HPA settings at the end of
configuration parsing and either normalizing them or aborting depending on
if `abort_conf` is set.
---
 Makefile.in                                   |  1 +
 .../internal/jemalloc_internal_externs.h      |  1 +
 src/jemalloc.c                                | 44 ++++++++++++++-
 test/unit/hpa_background_thread.sh            |  2 +-
 test/unit/hpa_validate_conf.c                 | 56 +++++++++++++++++++
 test/unit/hpa_validate_conf.sh                |  3 +
 6 files changed, 105 insertions(+), 2 deletions(-)
 create mode 100644 test/unit/hpa_validate_conf.c
 create mode 100644 test/unit/hpa_validate_conf.sh

diff --git a/Makefile.in b/Makefile.in
index a0131558..3a02b3fd 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -225,6 +225,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/hook.c \
 	$(srcroot)test/unit/hpa.c \
 	$(srcroot)test/unit/hpa_background_thread.c \
+	$(srcroot)test/unit/hpa_validate_conf.c \
 	$(srcroot)test/unit/hpdata.c \
 	$(srcroot)test/unit/huge.c \
 	$(srcroot)test/unit/inspect.c \
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index ae03c644..64d9aa20 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -25,6 +25,7 @@ extern bool opt_junk_alloc;
 extern bool opt_junk_free;
 extern void (*JET_MUTABLE junk_free_callback)(void *ptr, size_t size);
 extern void (*JET_MUTABLE junk_alloc_callback)(void *ptr, size_t size);
+extern void (*JET_MUTABLE invalid_conf_abort)(void);
 extern bool opt_utrace;
 extern bool opt_xmalloc;
 extern bool opt_experimental_infallible_new;
diff --git a/src/jemalloc.c b/src/jemalloc.c
index ccb20c81..c5a06f6e 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -144,6 +144,7 @@ static void default_junk_free(void *ptr, size_t usize) {
 
 void (*JET_MUTABLE junk_alloc_callback)(void *ptr, size_t size) = &default_junk_alloc;
 void (*JET_MUTABLE junk_free_callback)(void *ptr, size_t size) = &default_junk_free;
+void (*JET_MUTABLE invalid_conf_abort)(void) = &abort;
 
 bool	opt_utrace = false;
 bool	opt_xmalloc = false;
@@ -959,7 +960,7 @@ malloc_abort_invalid_conf(void) {
 	assert(opt_abort_conf);
 	malloc_printf("<jemalloc>: Abort (abort_conf:true) on invalid conf "
 	    "value (see above).\n");
-	abort();
+	invalid_conf_abort();
 }
 
 static void
@@ -1081,6 +1082,46 @@ obtain_malloc_conf(unsigned which_source, char buf[PATH_MAX + 1]) {
 	return ret;
 }
 
+static void
+validate_hpa_settings(void) {
+	if (!hpa_supported() || !opt_hpa || opt_hpa_opts.dirty_mult == (fxp_t)-1) {
+		return;
+	}
+	size_t hpa_threshold = fxp_mul_frac(HUGEPAGE, opt_hpa_opts.dirty_mult) +
+	    opt_hpa_opts.hugification_threshold;
+	if (hpa_threshold > HUGEPAGE) {
+		return;
+	}
+
+	had_conf_error = true;
+	char hpa_dirty_mult[FXP_BUF_SIZE];
+	char hugification_threshold[FXP_BUF_SIZE];
+	char normalization_message[256] = {0};
+	fxp_print(opt_hpa_opts.dirty_mult, hpa_dirty_mult);
+	fxp_print(fxp_div(FXP_INIT_INT((unsigned)
+	    (opt_hpa_opts.hugification_threshold >> LG_PAGE)),
+	    FXP_INIT_INT(HUGEPAGE_PAGES)), hugification_threshold);
+	if (!opt_abort_conf) {
+		char normalized_hugification_threshold[FXP_BUF_SIZE];
+		opt_hpa_opts.hugification_threshold +=
+		    HUGEPAGE - hpa_threshold;
+		fxp_print(fxp_div(FXP_INIT_INT((unsigned)
+		    (opt_hpa_opts.hugification_threshold >> LG_PAGE)),
+		    FXP_INIT_INT(HUGEPAGE_PAGES)),
+		    normalized_hugification_threshold);
+		malloc_snprintf(normalization_message,
+		    sizeof(normalization_message), "<jemalloc>: Normalizing "
+		    "HPA settings to avoid pathological behavior, setting "
+		    "hpa_hugification_threshold_ratio: to %s.\n",
+		    normalized_hugification_threshold);
+	}
+	malloc_printf(
+	    "<jemalloc>: Invalid combination of options "
+	    "hpa_hugification_threshold_ratio: %s and hpa_dirty_mult: %s. "
+	    "These values should sum to > 1.0.\n%s", hugification_threshold,
+	    hpa_dirty_mult, normalization_message);
+}
+
 static void
 malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
     bool initial_call, const char *opts_cache[MALLOC_CONF_NSOURCES],
@@ -1749,6 +1790,7 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
     /* Re-enable diagnostic "-Wtype-limits" */
     JEMALLOC_DIAGNOSTIC_POP
 		}
+		validate_hpa_settings();
 		if (opt_abort_conf && had_conf_error) {
 			malloc_abort_invalid_conf();
 		}
diff --git a/test/unit/hpa_background_thread.sh b/test/unit/hpa_background_thread.sh
index 65a56a08..33b70e19 100644
--- a/test/unit/hpa_background_thread.sh
+++ b/test/unit/hpa_background_thread.sh
@@ -1,4 +1,4 @@
 #!/bin/sh
 
-export MALLOC_CONF="hpa_dirty_mult:0,hpa_min_purge_interval_ms:50,hpa_sec_nshards:0"
+export MALLOC_CONF="hpa_dirty_mult:0.001,hpa_hugification_threshold_ratio:1.0,hpa_min_purge_interval_ms:50,hpa_sec_nshards:0"
 
diff --git a/test/unit/hpa_validate_conf.c b/test/unit/hpa_validate_conf.c
new file mode 100644
index 00000000..8c1847ba
--- /dev/null
+++ b/test/unit/hpa_validate_conf.c
@@ -0,0 +1,56 @@
+#include "test/jemalloc_test.h"
+
+static bool abort_called = false;
+static void (*default_malloc_message)(void *, const char *);
+
+static void
+mock_invalid_conf_abort(void) {
+	abort_called = true;
+}
+
+static void
+null_malloc_message(void *_1, const char* _2) {
+}
+
+TEST_BEGIN(test_hpa_validate_conf) {
+	test_skip_if(!hpa_supported());
+	void *ptr = malloc(4096);
+	/* Need to restore this here to see any possible assert messages */
+	malloc_message = default_malloc_message;
+	assert_true(abort_called,
+	     "Should have aborted due to invalid values for hpa_dirty_mult and "
+	     "hpa_hugification_threshold_ratio");
+	free(ptr);
+}
+TEST_END
+
+/*
+ * We have to set `abort_conf:true` here and not via the `MALLOC_CONF`
+ * environment variable in the associated shell script for this test. This is
+ * because when testing on FreeBSD (where Jemalloc is the system allocator) in
+ * CI configs where HPA is not supported, setting `abort_conf:true` there would
+ * result in the system Jemalloc picking this up and aborting before we could
+ * ever even launch the test.
+ */
+const char *malloc_conf = "abort_conf:true";
+
+int
+main(void) {
+	/*
+	 * OK, this is a sort of nasty hack.  We don't want to add *another*
+	 * config option for HPA (the intent is that it becomes available on
+	 * more platforms over time, and we're trying to prune back config
+	 * options generally.  But we'll get initialization errors on other
+	 * platforms if we set hpa:true in the MALLOC_CONF (even if we set
+	 * abort_conf:false as well).  So we reach into the internals and set
+	 * them directly, but only if we know that we're actually going to do
+	 * something nontrivial in the tests.
+	 */
+	if (hpa_supported()) {
+		default_malloc_message = malloc_message;
+		malloc_message = null_malloc_message;
+		opt_hpa = true;
+		invalid_conf_abort = mock_invalid_conf_abort;
+	}
+	return test_no_reentrancy(test_hpa_validate_conf);
+}
diff --git a/test/unit/hpa_validate_conf.sh b/test/unit/hpa_validate_conf.sh
new file mode 100644
index 00000000..692c3da9
--- /dev/null
+++ b/test/unit/hpa_validate_conf.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF='tcache:false,hpa_dirty_mult:0.25,hpa_hugification_threshold_ratio:0.6'

From 4f50f782fa8e48248684e9f479b895fe19609635 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Thu, 27 Jul 2023 11:49:07 -0700
Subject: [PATCH 2334/2608] Use compiler-provided assume builtins when
 available

There are several benefits to this:
1. It's cleaner and more reliable to use the builtin to
   inform the compiler of assumptions instead of hoping that the
   optimizer understands your intentions.
2. `clang` will warn you if any of your assumptions would produce
   side-effects (which the compiler will discard). [This blog post](https://fastcompression.blogspot.com/2019/01/compiler-checked-contracts.html)
   by Yann Collet highlights that a hazard of using the
   `unreachable()`-based method of signaling assumptions is that it
   can sometimes result in additional instructions being generated (see
   [this Godbolt link](https://godbolt.org/z/lKNMs3) from the blog post
   for an example).
---
 include/jemalloc/internal/arena_inlines_b.h |  2 +-
 include/jemalloc/internal/util.h            | 19 +++++++++++++------
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index b1cd84b4..1c98ffa0 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -378,7 +378,7 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 	if (caller_alloc_ctx != NULL) {
 		alloc_ctx = *caller_alloc_ctx;
 	} else {
-		util_assume(!tsdn_null(tsdn));
+		util_assume(tsdn != NULL);
 		emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr,
 		    &alloc_ctx);
 	}
diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index 536c0970..2c35ef76 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -65,12 +65,19 @@ get_errno(void) {
 #endif
 }
 
-JEMALLOC_ALWAYS_INLINE void
-util_assume(bool b) {
-	if (!b) {
-		unreachable();
-	}
-}
+#ifdef _MSC_VER
+#define util_assume __assume
+#elif defined(__clang__) && (__clang_major__ > 3 || \
+    (__clang_major__ == 3 && __clang_minor__ >= 6))
+#define util_assume __builtin_assume
+#else
+#define util_assume(expr)		\
+	do {				\
+		if (!(expr)) {		\
+			unreachable();	\
+		}			\
+	} while(0)
+#endif
 
 /* ptr should be valid. */
 JEMALLOC_ALWAYS_INLINE void

From 254c4847e8ac263d24720aa93c2c7d410f55a239 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@fb.com>
Date: Sat, 5 Aug 2023 13:40:23 -0700
Subject: [PATCH 2335/2608] Print colorful reminder for failed tests.

---
 test/src/test.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/test/src/test.c b/test/src/test.c
index 8b69d74a..a21356d5 100644
--- a/test/src/test.c
+++ b/test/src/test.c
@@ -173,13 +173,19 @@ p_test_impl(bool do_malloc_init, bool do_reentrant, test_t *t, va_list ap) {
 		}
 	}
 
-	malloc_printf("--- %s: %u/%u, %s: %u/%u, %s: %u/%u ---\n",
+	bool colored = test_counts[test_status_fail] != 0 &&
+	    isatty(STDERR_FILENO);
+	const char *color_start = colored ? "\033[1;31m" : "";
+	const char *color_end = colored ? "\033[0m" : "";
+	malloc_printf("%s--- %s: %u/%u, %s: %u/%u, %s: %u/%u ---\n%s",
+	    color_start,
 	    test_status_string(test_status_pass),
 	    test_counts[test_status_pass], test_count,
 	    test_status_string(test_status_skip),
 	    test_counts[test_status_skip], test_count,
 	    test_status_string(test_status_fail),
-	    test_counts[test_status_fail], test_count);
+	    test_counts[test_status_fail], test_count,
+	    color_end);
 
 	return ret;
 }
@@ -229,7 +235,12 @@ p_test_no_malloc_init(test_t *t, ...) {
 
 void
 p_test_fail(bool may_abort, const char *prefix, const char *message) {
-	malloc_cprintf(NULL, NULL, "%s%s\n", prefix, message);
+	bool colored = test_counts[test_status_fail] != 0 &&
+	    isatty(STDERR_FILENO);
+	const char *color_start = colored ? "\033[1;31m" : "";
+	const char *color_end = colored ? "\033[0m" : "";
+	malloc_cprintf(NULL, NULL, "%s%s%s\n%s", color_start, prefix, message,
+	    color_end);
 	test_status = test_status_fail;
 	if (may_abort) {
 		abort();

From d2c9ed3d1e7c1a318e6fd018eb0e0f3ba5ee3365 Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Thu, 10 Aug 2023 10:43:42 -0700
Subject: [PATCH 2336/2608] Ensure short `read(2)`s/`write(2)`s are properly
 handled by IO utilities

`read(2)` and `write(2)` may read or write fewer bytes than were
requested. In order to robustly ensure that all of the requested bytes
are read/written, these edge-cases must be handled.
---
 include/jemalloc/internal/malloc_io.h | 35 +++++++++++++++++++++++++--
 1 file changed, 33 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/malloc_io.h b/include/jemalloc/internal/malloc_io.h
index 0afb0429..91e7b2ba 100644
--- a/include/jemalloc/internal/malloc_io.h
+++ b/include/jemalloc/internal/malloc_io.h
@@ -68,7 +68,7 @@ void malloc_cprintf(write_cb_t *write_cb, void *cbopaque, const char *format,
 void malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
 
 static inline ssize_t
-malloc_write_fd(int fd, const void *buf, size_t count) {
+malloc_write_fd_syscall(int fd, const void *buf, size_t count) {
 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_write)
 	/*
 	 * Use syscall(2) rather than write(2) when possible in order to avoid
@@ -90,7 +90,22 @@ malloc_write_fd(int fd, const void *buf, size_t count) {
 }
 
 static inline ssize_t
-malloc_read_fd(int fd, void *buf, size_t count) {
+malloc_write_fd(int fd, const void *buf, size_t count) {
+	size_t bytes_written = 0;
+	do {
+		ssize_t result = malloc_write_fd_syscall(fd,
+		    &((const byte_t *)buf)[bytes_written],
+		    count - bytes_written);
+		if (result < 0) {
+			return result;
+		}
+		bytes_written += result;
+	} while (bytes_written < count);
+	return bytes_written;
+}
+
+static inline ssize_t
+malloc_read_fd_syscall(int fd, void *buf, size_t count) {
 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read)
 	long result = syscall(SYS_read, fd, buf, count);
 #else
@@ -103,4 +118,20 @@ malloc_read_fd(int fd, void *buf, size_t count) {
 	return (ssize_t)result;
 }
 
+static inline ssize_t
+malloc_read_fd(int fd, void *buf, size_t count) {
+	size_t bytes_read = 0;
+	do {
+		ssize_t result = malloc_read_fd_syscall(fd,
+		    &((byte_t *)buf)[bytes_read], count - bytes_read);
+		if (result < 0) {
+			return result;
+		} else if (result == 0) {
+			break;
+		}
+		bytes_read += result;
+	} while (bytes_read < count);
+	return bytes_read;
+}
+
 #endif /* JEMALLOC_INTERNAL_MALLOC_IO_H */

From da66aa391f853ccf2300845b3873cc8f1cf48f2d Mon Sep 17 00:00:00 2001
From: Kevin Svetlitski <svetlitski@meta.com>
Date: Thu, 10 Aug 2023 16:31:35 -0700
Subject: [PATCH 2337/2608] Enable a few additional warnings for CI and fix the
 issues they uncovered

- `-Wmissing-prototypes` and `-Wmissing-variable-declarations` are
  helpful for finding dead code and/or things that should be `static`
  but aren't marked as such.
- `-Wunused-macros` is of similar utility, but for identifying dead macros.
- `-Wunreachable-code` and `-Wunreachable-code-aggressive` do exactly
  what they say: flag unreachable code.
---
 include/jemalloc/jemalloc_protos.h.in |  1 +
 scripts/run_static_analysis.sh        | 10 +++++++++-
 src/ctl.c                             | 25 -------------------------
 src/inspect.c                         |  1 +
 src/jemalloc.c                        |  3 +--
 src/jemalloc_cpp.cpp                  |  1 -
 src/mutex.c                           |  2 +-
 src/pa.c                              |  7 -------
 src/pages.c                           |  2 +-
 src/prof_sys.c                        |  1 -
 test/src/mtx.c                        |  2 +-
 11 files changed, 15 insertions(+), 40 deletions(-)

diff --git a/include/jemalloc/jemalloc_protos.h.in b/include/jemalloc/jemalloc_protos.h.in
index 3e1d3223..e474930f 100644
--- a/include/jemalloc/jemalloc_protos.h.in
+++ b/include/jemalloc/jemalloc_protos.h.in
@@ -4,6 +4,7 @@
  * JEMALLOC_NO_DEMANGLE is defined (see jemalloc_mangle@install_suffix@.h).
  */
 extern JEMALLOC_EXPORT const char	*@je_@malloc_conf;
+extern JEMALLOC_EXPORT const char	*@je_@malloc_conf_2_conf_harder;
 extern JEMALLOC_EXPORT void		(*@je_@malloc_message)(void *cbopaque,
     const char *s);
 
diff --git a/scripts/run_static_analysis.sh b/scripts/run_static_analysis.sh
index 70c813d1..e2185ec9 100755
--- a/scripts/run_static_analysis.sh
+++ b/scripts/run_static_analysis.sh
@@ -12,8 +12,16 @@ compile_time_malloc_conf='background_thread:true,'\
 'zero_realloc:free,'\
 'prof_unbias:false,'\
 'prof_time_resolution:high'
+extra_flags=(
+	-Wmissing-prototypes
+	-Wmissing-variable-declarations
+	-Wstrict-prototypes
+	-Wunreachable-code
+	-Wunreachable-code-aggressive
+	-Wunused-macros
+)
 
-EXTRA_CFLAGS='-Wstrict-prototypes' EXTRA_CXXFLAGS='-Wstrict-prototypes' ./autogen.sh \
+EXTRA_CFLAGS="${extra_flags[*]}" EXTRA_CXXFLAGS="${extra_flags[*]}" ./autogen.sh \
 	--with-private-namespace=jemalloc_ \
 	--disable-cache-oblivious \
 	--enable-prof \
diff --git a/src/ctl.c b/src/ctl.c
index 454766da..2607aed2 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1861,31 +1861,6 @@ ctl_mtx_assert_held(tsdn_t *tsdn) {
  * There's a lot of code duplication in the following macros due to limitations
  * in how nested cpp macros are expanded.
  */
-#define CTL_RO_CLGEN(c, l, n, v, t)					\
-static int								\
-n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
-    size_t *oldlenp, void *newp, size_t newlen) {			\
-	int ret;							\
-	t oldval;							\
-									\
-	if (!(c)) {							\
-		return ENOENT;						\
-	}								\
-	if (l) {							\
-		malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);		\
-	}								\
-	READONLY();							\
-	oldval = (v);							\
-	READ(oldval, t);						\
-									\
-	ret = 0;							\
-label_return:								\
-	if (l) {							\
-		malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);		\
-	}								\
-	return ret;							\
-}
-
 #define CTL_RO_CGEN(c, n, v, t)						\
 static int								\
 n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,			\
diff --git a/src/inspect.c b/src/inspect.c
index 911b5d52..2575b5c1 100644
--- a/src/inspect.c
+++ b/src/inspect.c
@@ -1,5 +1,6 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
+#include "jemalloc/internal/inspect.h"
 
 void
 inspect_extent_util_stats_get(tsdn_t *tsdn, const void *ptr, size_t *nfree,
diff --git a/src/jemalloc.c b/src/jemalloc.c
index c5a06f6e..e2b4917f 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
@@ -151,7 +150,7 @@ bool	opt_xmalloc = false;
 bool	opt_experimental_infallible_new = false;
 bool	opt_zero = false;
 unsigned	opt_narenas = 0;
-fxp_t		opt_narenas_ratio = FXP_INIT_INT(4);
+static fxp_t		opt_narenas_ratio = FXP_INIT_INT(4);
 
 unsigned	ncpus;
 
diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index 44569c14..08107a8a 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -2,7 +2,6 @@
 #include <new>
 // NOLINTBEGIN(misc-use-anonymous-namespace)
 
-#define JEMALLOC_CPP_CPP_
 #ifdef __cplusplus
 extern "C" {
 #endif
diff --git a/src/mutex.c b/src/mutex.c
index 0b3547a8..5655100d 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -5,7 +5,7 @@
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/spin.h"
 
-#ifndef _CRT_SPINCOUNT
+#if defined(_WIN32) && !defined(_CRT_SPINCOUNT)
 #define _CRT_SPINCOUNT 4000
 #endif
 
diff --git a/src/pa.c b/src/pa.c
index 63eef2b5..ebc6861d 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -220,13 +220,6 @@ pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
 	pai_dalloc(tsdn, pai, edata, deferred_work_generated);
 }
 
-bool
-pa_shard_retain_grow_limit_get_set(tsdn_t *tsdn, pa_shard_t *shard,
-    size_t *old_limit, size_t *new_limit) {
-	return pac_retain_grow_limit_get_set(tsdn, &shard->pac, old_limit,
-	    new_limit);
-}
-
 bool
 pa_decay_ms_set(tsdn_t *tsdn, pa_shard_t *shard, extent_state_t state,
     ssize_t decay_ms, pac_purge_eagerness_t eagerness) {
diff --git a/src/pages.c b/src/pages.c
index 58d9cfaf..8cf2fd9f 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -21,7 +21,7 @@
 #else
 #define PAGES_FD_TAG -1
 #endif
-#ifdef JEMALLOC_HAVE_PRCTL
+#if defined(JEMALLOC_HAVE_PRCTL) && defined(JEMALLOC_PAGEID)
 #include <sys/prctl.h>
 #ifndef PR_SET_VMA
 #define PR_SET_VMA 0x53564d41
diff --git a/src/prof_sys.c b/src/prof_sys.c
index dbb4c80a..c2998926 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -1,4 +1,3 @@
-#define JEMALLOC_PROF_SYS_C_
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
diff --git a/test/src/mtx.c b/test/src/mtx.c
index d9ce375c..6cb3ecd5 100644
--- a/test/src/mtx.c
+++ b/test/src/mtx.c
@@ -1,6 +1,6 @@
 #include "test/jemalloc_test.h"
 
-#ifndef _CRT_SPINCOUNT
+#if defined(_WIN32) && !defined(_CRT_SPINCOUNT)
 #define _CRT_SPINCOUNT 4000
 #endif
 

From 87c56c8df86107fdf32e92db68211e8b10d94ded Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 25 Aug 2023 11:34:24 -0700
Subject: [PATCH 2338/2608] Fix arenas.i.bins.j.mutex link id in manual.

---
 doc/jemalloc.xml.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index bdebd433..d0d4b20b 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -3307,7 +3307,7 @@ struct extent_hooks_s {
         <listitem><para>Current number of nonfull slabs.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="stats.arenas.i.bins.mutex">
+      <varlistentry id="stats.arenas.i.bins.j.mutex">
         <term>
           <mallctl>stats.arenas.&lt;i&gt;.bins.&lt;j&gt;.mutex.{counter}</mallctl>
           (<type>counter specific type</type>) <literal>r-</literal>

From b71da25b8a12c2c3f0c10b0811d15a61980186e8 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 28 Aug 2023 10:21:11 -0700
Subject: [PATCH 2339/2608] Fix reading CPU id using rdtscp.

As pointed out in #2527, the correct register containing CPU id should be ecx
instead edx.
---
 include/jemalloc/internal/jemalloc_internal_inlines_a.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index 8d5e22fd..111cda42 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -19,9 +19,9 @@ malloc_getcpu(void) {
 #elif defined(JEMALLOC_HAVE_SCHED_GETCPU)
 	return (malloc_cpuid_t)sched_getcpu();
 #elif defined(JEMALLOC_HAVE_RDTSCP)
-	unsigned int ax, cx, dx;
-	asm volatile("rdtscp" : "=a"(ax), "=d"(dx), "=c"(cx) ::);
-	return (malloc_cpuid_t)(dx & 0xfff);
+	unsigned int ecx;
+	asm volatile("rdtscp" : "=c" (ecx) :: "eax", "edx");
+	return (malloc_cpuid_t)(ecx & 0xfff);
 #elif defined(__aarch64__) && defined(__APPLE__)
 	/* Other oses most likely use tpidr_el0 instead */
 	uintptr_t c;

From 7d563a8f8117966d9466d92ed2c782eeae7a19eb Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 30 Aug 2023 10:15:30 -0700
Subject: [PATCH 2340/2608] Update safety check message to remove
 --enable-debug when it's already on.

---
 src/safety_check.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/safety_check.c b/src/safety_check.c
index 209fdda9..7ffe1f4f 100644
--- a/src/safety_check.c
+++ b/src/safety_check.c
@@ -7,12 +7,13 @@ void safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr,
     size_t true_size, size_t input_size) {
 	char *src = current_dealloc ? "the current pointer being freed" :
 	    "in thread cache, possibly from previous deallocations";
+	char *suggest_debug_build = config_debug ? "" : " --enable-debug or";
 
 	safety_check_fail("<jemalloc>: size mismatch detected (true size %zu "
 	    "vs input size %zu), likely caused by application sized "
-	    "deallocation bugs (source address: %p, %s). Suggest building with "
-	    "--enable-debug or address sanitizer for debugging. Abort.\n",
-	    true_size, input_size, ptr, src);
+	    "deallocation bugs (source address: %p, %s). Suggest building with"
+	    "%s address sanitizer for debugging. Abort.\n",
+	    true_size, input_size, ptr, src, suggest_debug_build);
 }
 
 void safety_check_set_abort(safety_check_abort_hook_t abort_fn) {

From ed7e6fe71a193ce24d1409d19d2c792f19af6a21 Mon Sep 17 00:00:00 2001
From: BtbN <btbn@btbn.de>
Date: Sun, 20 Aug 2023 16:21:56 +0200
Subject: [PATCH 2341/2608] Expose private library dependencies via pkg-config

When linking statically, these need to be included for linking to succeed.
---
 jemalloc.pc.in | 1 +
 1 file changed, 1 insertion(+)

diff --git a/jemalloc.pc.in b/jemalloc.pc.in
index 0a377152..3aecfda1 100644
--- a/jemalloc.pc.in
+++ b/jemalloc.pc.in
@@ -10,3 +10,4 @@ URL: https://jemalloc.net/
 Version: @jemalloc_version_major@.@jemalloc_version_minor@.@jemalloc_version_bugfix@_@jemalloc_version_nrev@
 Cflags: -I${includedir}
 Libs: -L${libdir} -ljemalloc${install_suffix}
+Libs.private: @LIBS@

From ce8ce99a4a969e8dd8644d7382126fbb423d9859 Mon Sep 17 00:00:00 2001
From: BtbN <btbn@btbn.de>
Date: Sun, 20 Aug 2023 16:38:09 +0200
Subject: [PATCH 2342/2608] Expose jemalloc_prefix via pkg-config

---
 jemalloc.pc.in | 1 +
 1 file changed, 1 insertion(+)

diff --git a/jemalloc.pc.in b/jemalloc.pc.in
index 3aecfda1..b50770d1 100644
--- a/jemalloc.pc.in
+++ b/jemalloc.pc.in
@@ -3,6 +3,7 @@ exec_prefix=@exec_prefix@
 libdir=@libdir@
 includedir=@includedir@
 install_suffix=@install_suffix@
+jemalloc_prefix=@JEMALLOC_PREFIX@
 
 Name: jemalloc
 Description: A general purpose malloc(3) implementation that emphasizes fragmentation avoidance and scalable concurrency support.

From 7d9eceaf3858515cd8774c3fad8e90fe53454e3c Mon Sep 17 00:00:00 2001
From: Evers Chen <evers_chen@163.com>
Date: Sun, 20 Aug 2023 08:11:35 +0000
Subject: [PATCH 2343/2608] Fix array bounds false warning in gcc 12.3.0
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1.error: array subscript 232 is above array bounds of ‘size_t[232]’ in gcc 12.3.0
2.it also optimizer to the code
---
 test/unit/size_classes.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/test/unit/size_classes.c b/test/unit/size_classes.c
index c70eb592..9e8a408f 100644
--- a/test/unit/size_classes.c
+++ b/test/unit/size_classes.c
@@ -24,7 +24,7 @@ get_max_size_class(void) {
 
 TEST_BEGIN(test_size_classes) {
 	size_t size_class, max_size_class;
-	szind_t index, max_index;
+	szind_t index, gen_index, max_index;
 
 	max_size_class = get_max_size_class();
 	max_index = sz_size2index(max_size_class);
@@ -32,6 +32,7 @@ TEST_BEGIN(test_size_classes) {
 	for (index = 0, size_class = sz_index2size(index); index < max_index ||
 	    size_class < max_size_class; index++, size_class =
 	    sz_index2size(index)) {
+		gen_index = sz_size2index(size_class);
 		expect_true(index < max_index,
 		    "Loop conditionals should be equivalent; index=%u, "
 		    "size_class=%zu (%#zx)", index, size_class, size_class);
@@ -39,17 +40,15 @@ TEST_BEGIN(test_size_classes) {
 		    "Loop conditionals should be equivalent; index=%u, "
 		    "size_class=%zu (%#zx)", index, size_class, size_class);
 
-		expect_u_eq(index, sz_size2index(size_class),
+		expect_u_eq(index, gen_index,
 		    "sz_size2index() does not reverse sz_index2size(): index=%u"
 		    " --> size_class=%zu --> index=%u --> size_class=%zu",
-		    index, size_class, sz_size2index(size_class),
-		    sz_index2size(sz_size2index(size_class)));
-		expect_zu_eq(size_class,
-		    sz_index2size(sz_size2index(size_class)),
+		    index, size_class, gen_index, sz_index2size(gen_index));
+
+		expect_zu_eq(size_class, sz_index2size(gen_index),
 		    "sz_index2size() does not reverse sz_size2index(): index=%u"
 		    " --> size_class=%zu --> index=%u --> size_class=%zu",
-		    index, size_class, sz_size2index(size_class),
-		    sz_index2size(sz_size2index(size_class)));
+		    index, size_class, gen_index, sz_index2size(gen_index));
 
 		expect_u_eq(index+1, sz_size2index(size_class+1),
 		    "Next size_class does not round up properly");

From fbca96c4332380c5799dcc804365ac6e93d7db2f Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@fb.com>
Date: Sun, 20 Aug 2023 23:28:38 -0700
Subject: [PATCH 2344/2608] Remove unnecessary parameters for
 cache_bin_postincrement.

---
 include/jemalloc/internal/cache_bin.h | 3 +--
 src/cache_bin.c                       | 3 +--
 src/tcache.c                          | 3 +--
 test/unit/cache_bin.c                 | 2 +-
 4 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 2e95c33c..e6313144 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -691,8 +691,7 @@ void cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos,
  */
 void cache_bin_preincrement(cache_bin_info_t *infos, szind_t ninfos,
     void *alloc, size_t *cur_offset);
-void cache_bin_postincrement(cache_bin_info_t *infos, szind_t ninfos,
-    void *alloc, size_t *cur_offset);
+void cache_bin_postincrement(void *alloc, size_t *cur_offset);
 void cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
     size_t *cur_offset);
 
diff --git a/src/cache_bin.c b/src/cache_bin.c
index 362605a8..5fb5607a 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -56,8 +56,7 @@ cache_bin_preincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
 }
 
 void
-cache_bin_postincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
-    size_t *cur_offset) {
+cache_bin_postincrement(void *alloc, size_t *cur_offset) {
 	*(uintptr_t *)((byte_t *)alloc + *cur_offset) =
 	    cache_bin_trailing_junk;
 	*cur_offset += sizeof(void *);
diff --git a/src/tcache.c b/src/tcache.c
index 914ddb7a..e9cf2ee5 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -698,8 +698,7 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 		assert(tcache_small_bin_disabled(i, cache_bin));
 	}
 
-	cache_bin_postincrement(tcache_bin_info, nhbins, mem,
-	    &cur_offset);
+	cache_bin_postincrement(mem, &cur_offset);
 	/* Sanity check that the whole stack is used. */
 	assert(cur_offset == tcache_bin_alloc_size);
 }
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index 3b6dbab3..50d51a6d 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -93,7 +93,7 @@ test_bin_init(cache_bin_t *bin, cache_bin_info_t *info) {
 	size_t cur_offset = 0;
 	cache_bin_preincrement(info, 1, mem, &cur_offset);
 	cache_bin_init(bin, info, mem, &cur_offset);
-	cache_bin_postincrement(info, 1, mem, &cur_offset);
+	cache_bin_postincrement(mem, &cur_offset);
 	assert_zu_eq(cur_offset, size, "Should use all requested memory");
 }
 

From a442d9b895935ac872e7ccc705213537bc747c19 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@fb.com>
Date: Sun, 6 Aug 2023 11:38:30 -0700
Subject: [PATCH 2345/2608] Enable per-tcache tcache_max

1. add tcache_max and nhbins into tcache_t so that they are per-tcache,
   with one auto tcache per thread, it's also per-thread;
2. add mallctl for each thread to set its own tcache_max (of its auto tcache);
3. store the maximum number of items in each bin instead of using a global storage;
4. add tests for the modifications above.
5. Rename `nhbins` and `tcache_maxclass` to `global_do_not_change_nhbins` and `global_do_not_change_tcache_maxclass`.
---
 include/jemalloc/internal/arena_inlines_b.h   |   7 +-
 include/jemalloc/internal/cache_bin.h         |   3 +
 .../internal/jemalloc_internal_inlines_b.h    |   1 +
 .../internal/jemalloc_internal_inlines_c.h    |   2 +-
 include/jemalloc/internal/tcache_externs.h    |  19 +-
 include/jemalloc/internal/tcache_inlines.h    |  83 ++++-
 include/jemalloc/internal/tcache_structs.h    |   2 +
 include/jemalloc/internal/tcache_types.h      |   2 +-
 src/arena.c                                   |  14 +-
 src/cache_bin.c                               |   1 +
 src/ctl.c                                     |  40 ++-
 src/jemalloc.c                                |  14 +-
 src/tcache.c                                  | 319 ++++++++++--------
 test/unit/batch_alloc.c                       |   2 +-
 test/unit/tcache_max.c                        | 241 ++++++++++---
 15 files changed, 528 insertions(+), 222 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 1c98ffa0..c4d1c887 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -198,11 +198,11 @@ arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
 			assert(sz_can_use_slab(size));
 			return tcache_alloc_small(tsdn_tsd(tsdn), arena,
 			    tcache, size, ind, zero, slow_path);
-		} else if (likely(size <= tcache_maxclass)) {
+		} else if (likely(size <= tcache_max_get(tcache))) {
 			return tcache_alloc_large(tsdn_tsd(tsdn), arena,
 			    tcache, size, ind, zero, slow_path);
 		}
-		/* (size > tcache_maxclass) case falls through. */
+		/* (size > tcache_max) case falls through. */
 	}
 
 	return arena_malloc_hard(tsdn, arena, size, ind, zero, slab);
@@ -297,7 +297,8 @@ arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
 JEMALLOC_ALWAYS_INLINE void
 arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
     bool slow_path) {
-	if (szind < nhbins) {
+	assert (!tsdn_null(tsdn) && tcache != NULL);
+	if (szind < tcache_nhbins_get(tcache)) {
 		if (config_prof && unlikely(szind < SC_NBINS)) {
 			arena_dalloc_promoted(tsdn, ptr, tcache, slow_path);
 		} else {
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index e6313144..4cfc3f1d 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -125,6 +125,9 @@ struct cache_bin_s {
 	 * array.  Immutable after initialization.
 	 */
 	uint16_t low_bits_empty;
+
+	/* The maximum number of cached items in the bin. */
+	cache_bin_info_t bin_info;
 };
 
 /*
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index b2cab228..2ddb4a89 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -23,6 +23,7 @@ percpu_arena_update(tsd_t *tsd, unsigned cpu) {
 		tcache_t *tcache = tcache_get(tsd);
 		if (tcache != NULL) {
 			tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd);
+			assert(tcache_slow->arena != NULL);
 			tcache_arena_reassociate(tsd_tsdn(tsd), tcache_slow,
 			    tcache, newarena);
 		}
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 1dac668a..8b80e3c1 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -530,7 +530,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
         /*
          * Currently the fastpath only handles small sizes.  The branch on
          * SC_LOOKUP_MAXCLASS makes sure of it.  This lets us avoid checking
-         * tcache szind upper limit (i.e. tcache_maxclass) as well.
+         * tcache szind upper limit (i.e. tcache_max) as well.
          */
         assert(alloc_ctx.slab);
 
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index af6fd970..87d243a1 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -21,14 +21,19 @@ extern unsigned opt_lg_tcache_flush_large_div;
 
 /*
  * Number of tcache bins.  There are SC_NBINS small-object bins, plus 0 or more
- * large-object bins.
+ * large-object bins.  This is only used during threads initialization and
+ * changing it will not reflect on initialized threads as expected.  Thus,
+ * it should not be changed on the fly.  To change the number of tcache bins
+ * in use, refer to tcache_nhbins of each tcache.
  */
-extern unsigned	nhbins;
+extern unsigned	global_do_not_change_nhbins;
 
-/* Maximum cached size class. */
-extern size_t	tcache_maxclass;
-
-extern cache_bin_info_t *tcache_bin_info;
+/*
+ * Maximum cached size class.  Same as above, this is only used during threads
+ * initialization and should not be changed.  To change the maximum cached size
+ * class, refer to tcache_max of each tcache.
+ */
+extern size_t	global_do_not_change_tcache_maxclass;
 
 /*
  * Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and
@@ -65,7 +70,7 @@ void tcache_prefork(tsdn_t *tsdn);
 void tcache_postfork_parent(tsdn_t *tsdn);
 void tcache_postfork_child(tsdn_t *tsdn);
 void tcache_flush(tsd_t *tsd);
-bool tsd_tcache_data_init(tsd_t *tsd);
+bool tsd_tcache_data_init(tsd_t *tsd, arena_t *arena);
 bool tsd_tcache_enabled_data_init(tsd_t *tsd);
 
 void tcache_assert_initialized(tcache_t *tcache);
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index b69d89ad..97501ee2 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -23,7 +23,7 @@ tcache_enabled_set(tsd_t *tsd, bool enabled) {
 	bool was_enabled = tsd_tcache_enabled_get(tsd);
 
 	if (!was_enabled && enabled) {
-		tsd_tcache_data_init(tsd);
+		tsd_tcache_data_init(tsd, NULL);
 	} else if (was_enabled && !enabled) {
 		tcache_cleanup(tsd);
 	}
@@ -32,13 +32,67 @@ tcache_enabled_set(tsd_t *tsd, bool enabled) {
 	tsd_slow_update(tsd);
 }
 
+static inline unsigned
+tcache_nhbins_get(tcache_t *tcache) {
+	assert(tcache != NULL);
+	assert(tcache->tcache_nhbins <= TCACHE_NBINS_MAX);
+	return tcache->tcache_nhbins;
+}
+
+static inline size_t
+tcache_max_get(tcache_t *tcache) {
+	assert(tcache != NULL);
+	assert(tcache->tcache_max <= TCACHE_MAXCLASS_LIMIT);
+	return tcache->tcache_max;
+}
+
+static inline void
+tcache_max_and_nhbins_set(tcache_t *tcache, size_t tcache_max) {
+	assert(tcache != NULL);
+	assert(tcache_max <= TCACHE_MAXCLASS_LIMIT);
+	tcache->tcache_max = tcache_max;
+	tcache->tcache_nhbins = sz_size2index(tcache_max) + 1;
+}
+
+static inline void
+thread_tcache_max_and_nhbins_set(tsd_t *tsd, size_t tcache_max) {
+	assert(tcache_max <= TCACHE_MAXCLASS_LIMIT);
+	assert(tcache_max == sz_s2u(tcache_max));
+	tcache_t *tcache = tsd_tcachep_get(tsd);
+	tcache_slow_t *tcache_slow;
+	assert(tcache != NULL);
+
+	bool enabled = tcache_available(tsd);
+	arena_t *assigned_arena;
+	if (enabled) {
+		tcache_slow = tcache_slow_get(tsd);
+		assert(tcache != NULL && tcache_slow != NULL);
+		assigned_arena = tcache_slow->arena;
+		/* Shutdown and reboot the tcache for a clean slate. */
+		tcache_cleanup(tsd);
+	}
+
+	/*
+	* Still set tcache_max and tcache_nhbins of the tcache even if
+	* the tcache is not available yet because the values are
+	* stored in tsd_t and are always available for changing.
+	*/
+	tcache_max_and_nhbins_set(tcache, tcache_max);
+
+	if (enabled) {
+		tsd_tcache_data_init(tsd, assigned_arena);
+	}
+
+	assert(tcache_nhbins_get(tcache) == sz_size2index(tcache_max) + 1);
+}
+
 JEMALLOC_ALWAYS_INLINE bool
 tcache_small_bin_disabled(szind_t ind, cache_bin_t *bin) {
 	assert(ind < SC_NBINS);
-	bool ret = (cache_bin_info_ncached_max(&tcache_bin_info[ind]) == 0);
-	if (ret && bin != NULL) {
+	assert(bin != NULL);
+	bool ret = cache_bin_info_ncached_max(&bin->bin_info) == 0;
+	if (ret) {
 		/* small size class but cache bin disabled. */
-		assert(ind >= nhbins);
 		assert((uintptr_t)(*bin->stack_head) ==
 		    cache_bin_preceding_junk);
 	}
@@ -46,6 +100,14 @@ tcache_small_bin_disabled(szind_t ind, cache_bin_t *bin) {
 	return ret;
 }
 
+JEMALLOC_ALWAYS_INLINE bool
+tcache_large_bin_disabled(szind_t ind, cache_bin_t *bin) {
+	assert(ind >= SC_NBINS);
+	assert(bin != NULL);
+	return (cache_bin_info_ncached_max(&bin->bin_info) == 0 ||
+	    cache_bin_still_zero_initialized(bin));
+}
+
 JEMALLOC_ALWAYS_INLINE void *
 tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
     size_t size, szind_t binind, bool zero, bool slow_path) {
@@ -95,7 +157,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 	void *ret;
 	bool tcache_success;
 
-	assert(binind >= SC_NBINS && binind < nhbins);
+	assert(binind >= SC_NBINS && binind < tcache_nhbins_get(tcache));
 	cache_bin_t *bin = &tcache->bins[binind];
 	ret = cache_bin_alloc(bin, &tcache_success);
 	assert(tcache_success == (ret != NULL));
@@ -118,7 +180,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 	} else {
 		if (unlikely(zero)) {
 			size_t usize = sz_index2size(binind);
-			assert(usize <= tcache_maxclass);
+			assert(usize <= tcache_max_get(tcache));
 			memset(ret, 0, usize);
 		}
 
@@ -157,7 +219,7 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 			return;
 		}
 		cache_bin_sz_t max = cache_bin_info_ncached_max(
-		    &tcache_bin_info[binind]);
+		    &bin->bin_info);
 		unsigned remain = max >> opt_lg_tcache_flush_small_div;
 		tcache_bin_flush_small(tsd, tcache, bin, binind, remain);
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
@@ -169,14 +231,13 @@ JEMALLOC_ALWAYS_INLINE void
 tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
     bool slow_path) {
 
-	assert(tcache_salloc(tsd_tsdn(tsd), ptr)
-	    > SC_SMALL_MAXCLASS);
-	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_maxclass);
+	assert(tcache_salloc(tsd_tsdn(tsd), ptr) > SC_SMALL_MAXCLASS);
+	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_max_get(tcache));
 
 	cache_bin_t *bin = &tcache->bins[binind];
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
 		unsigned remain = cache_bin_info_ncached_max(
-		    &tcache_bin_info[binind]) >> opt_lg_tcache_flush_large_div;
+		    &bin->bin_info) >> opt_lg_tcache_flush_large_div;
 		tcache_bin_flush_large(tsd, tcache, bin, binind, remain);
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
 		assert(ret);
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index 75918158..b51e10a7 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -55,6 +55,8 @@ struct tcache_slow_s {
 
 struct tcache_s {
 	tcache_slow_t	*tcache_slow;
+	unsigned	tcache_nhbins;
+	size_t		tcache_max;
 	cache_bin_t	bins[TCACHE_NBINS_MAX];
 };
 
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index a781f5a6..a91b3252 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -19,7 +19,7 @@ typedef struct tcaches_s tcaches_t;
 /* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 #define TCACHES_ELM_NEED_REINIT ((tcache_t *)(uintptr_t)1)
 
-#define TCACHE_LG_MAXCLASS_LIMIT 23 /* tcache_maxclass = 8M */
+#define TCACHE_LG_MAXCLASS_LIMIT 23 /* tcache_max = 8M */
 #define TCACHE_MAXCLASS_LIMIT ((size_t)1 << TCACHE_LG_MAXCLASS_LIMIT)
 #define TCACHE_NBINS_MAX (SC_NBINS + SC_NGROUP *			\
     (TCACHE_LG_MAXCLASS_LIMIT - SC_LG_LARGE_MINCLASS) + 1)
diff --git a/src/arena.c b/src/arena.c
index 98907bc1..e7fa0971 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -157,11 +157,18 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
 	cache_bin_array_descriptor_t *descriptor;
 	ql_foreach(descriptor, &arena->cache_bin_array_descriptor_ql, link) {
-		for (szind_t i = 0; i < nhbins; i++) {
+		for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
 			cache_bin_t *cache_bin = &descriptor->bins[i];
 			cache_bin_sz_t ncached, nstashed;
 			cache_bin_nitems_get_remote(cache_bin,
-			    &tcache_bin_info[i], &ncached, &nstashed);
+			    &cache_bin->bin_info, &ncached, &nstashed);
+
+			if ((i < SC_NBINS &&
+			    tcache_small_bin_disabled(i, cache_bin)) ||
+			    (i >= SC_NBINS &&
+			    tcache_large_bin_disabled(i, cache_bin))) {
+				assert(ncached == 0 && nstashed == 0);
+			}
 
 			astats->tcache_bytes += ncached * sz_index2size(i);
 			astats->tcache_stashed_bytes += nstashed *
@@ -720,7 +727,8 @@ arena_dalloc_promoted_impl(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 		safety_check_verify_redzone(ptr, usize, bumped_usize);
 	}
 	if (bumped_usize >= SC_LARGE_MINCLASS &&
-	    bumped_usize <= tcache_maxclass && tcache != NULL) {
+	    tcache != NULL &&
+	    bumped_usize <= tcache_max_get(tcache)) {
 		tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
 		    sz_size2index(bumped_usize), slow_path);
 	} else {
diff --git a/src/cache_bin.c b/src/cache_bin.c
index 5fb5607a..03577084 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -82,6 +82,7 @@ cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
 	bin->low_bits_low_water = (uint16_t)(uintptr_t)bin->stack_head;
 	bin->low_bits_full = (uint16_t)(uintptr_t)full_position;
 	bin->low_bits_empty = (uint16_t)(uintptr_t)empty_position;
+	cache_bin_info_init(&bin->bin_info, info->ncached_max);
 	cache_bin_sz_t free_spots = cache_bin_diff(bin,
 	    bin->low_bits_full, (uint16_t)(uintptr_t)bin->stack_head);
 	assert(free_spots == bin_stack_size);
diff --git a/src/ctl.c b/src/ctl.c
index 2607aed2..5697539a 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -66,6 +66,7 @@ CTL_PROTO(epoch)
 CTL_PROTO(background_thread)
 CTL_PROTO(max_background_threads)
 CTL_PROTO(thread_tcache_enabled)
+CTL_PROTO(thread_tcache_max)
 CTL_PROTO(thread_tcache_flush)
 CTL_PROTO(thread_peak_read)
 CTL_PROTO(thread_peak_reset)
@@ -371,6 +372,7 @@ CTL_PROTO(stats_mutexes_reset)
 
 static const ctl_named_node_t	thread_tcache_node[] = {
 	{NAME("enabled"),	CTL(thread_tcache_enabled)},
+	{NAME("max"),		CTL(thread_tcache_max)},
 	{NAME("flush"),		CTL(thread_tcache_flush)}
 };
 
@@ -2289,6 +2291,40 @@ label_return:
 	return ret;
 }
 
+static int
+thread_tcache_max_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
+    size_t newlen) {
+	int ret;
+	size_t oldval;
+
+	/* pointer to tcache_t always exists even with tcache disabled. */
+	tcache_t *tcache = tsd_tcachep_get(tsd);
+	assert(tcache != NULL);
+	oldval = tcache_max_get(tcache);
+	READ(oldval, size_t);
+
+	if (newp != NULL) {
+		if (newlen != sizeof(size_t)) {
+			ret = EINVAL;
+			goto label_return;
+		}
+		size_t new_tcache_max = oldval;
+		WRITE(new_tcache_max, size_t);
+		if (new_tcache_max > TCACHE_MAXCLASS_LIMIT) {
+			new_tcache_max = TCACHE_MAXCLASS_LIMIT;
+		}
+		new_tcache_max = sz_s2u(new_tcache_max);
+		if(new_tcache_max != oldval) {
+			thread_tcache_max_and_nhbins_set(tsd, new_tcache_max);
+		}
+	}
+
+	ret = 0;
+label_return:
+	return ret;
+}
+
 static int
 thread_tcache_flush_ctl(tsd_t *tsd, const size_t *mib,
     size_t miblen, void *oldp, size_t *oldlenp, void *newp,
@@ -3101,9 +3137,9 @@ arenas_muzzy_decay_ms_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 
 CTL_RO_NL_GEN(arenas_quantum, QUANTUM, size_t)
 CTL_RO_NL_GEN(arenas_page, PAGE, size_t)
-CTL_RO_NL_GEN(arenas_tcache_max, tcache_maxclass, size_t)
+CTL_RO_NL_GEN(arenas_tcache_max, global_do_not_change_tcache_maxclass, size_t)
 CTL_RO_NL_GEN(arenas_nbins, SC_NBINS, unsigned)
-CTL_RO_NL_GEN(arenas_nhbins, nhbins, unsigned)
+CTL_RO_NL_GEN(arenas_nhbins, global_do_not_change_nhbins, unsigned)
 CTL_RO_NL_GEN(arenas_bin_i_size, bin_infos[mib[2]].reg_size, size_t)
 CTL_RO_NL_GEN(arenas_bin_i_nregs, bin_infos[mib[2]].nregs, uint32_t)
 CTL_RO_NL_GEN(arenas_bin_i_slab_size, bin_infos[mib[2]].slab_size, size_t)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index e2b4917f..7aa6a1cd 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -4136,15 +4136,13 @@ batch_alloc(void **ptrs, size_t num, size_t size, int flags) {
 			filled += n;
 		}
 
-		if (likely(ind < nhbins) && progress < batch) {
+		unsigned tcache_ind = mallocx_tcache_get(flags);
+		tcache_t *tcache = tcache_get_from_ind(tsd, tcache_ind,
+		    /* slow */ true, /* is_alloc */ true);
+		if (likely(tcache != NULL &&
+		    ind < tcache_nhbins_get(tcache)) && progress < batch) {
 			if (bin == NULL) {
-				unsigned tcache_ind = mallocx_tcache_get(flags);
-				tcache_t *tcache = tcache_get_from_ind(tsd,
-				    tcache_ind, /* slow */ true,
-				    /* is_alloc */ true);
-				if (tcache != NULL) {
-					bin = &tcache->bins[ind];
-				}
+				bin = &tcache->bins[ind];
 			}
 			/*
 			 * If we don't have a tcache bin, we don't want to
diff --git a/src/tcache.c b/src/tcache.c
index e9cf2ee5..ae68c08b 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -12,7 +12,7 @@
 
 bool opt_tcache = true;
 
-/* tcache_maxclass is set to 32KB by default.  */
+/* global_do_not_change_tcache_maxclass is set to 32KB by default. */
 size_t opt_tcache_max = ((size_t)1) << 15;
 
 /* Reasonable defaults for min and max values. */
@@ -57,16 +57,18 @@ size_t opt_tcache_gc_delay_bytes = 0;
 unsigned opt_lg_tcache_flush_small_div = 1;
 unsigned opt_lg_tcache_flush_large_div = 1;
 
-cache_bin_info_t	*tcache_bin_info;
-
-/* Total stack size required (per tcache).  Include the padding above. */
-static size_t tcache_bin_alloc_size;
-static size_t tcache_bin_alloc_alignment;
-
-/* Number of cache bins enabled, including both large and small. */
-unsigned		nhbins;
-/* Max size class to be cached (can be small or large). */
-size_t			tcache_maxclass;
+/*
+ * Number of cache bins enabled, including both large and small.  This value
+ * is only used to initialize tcache_nhbins in the per-thread tcache.
+ * Directly modifying it will not affect threads already launched.
+ */
+unsigned		global_do_not_change_nhbins;
+/*
+ * Max size class to be cached (can be small or large). This value is only used
+ * to initialize tcache_max in the per-thread tcache.   Directly modifying it
+ * will not affect threads already launched.
+ */
+size_t			global_do_not_change_tcache_maxclass;
 
 tcaches_t		*tcaches;
 
@@ -127,9 +129,9 @@ tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 
 	cache_bin_t *cache_bin = &tcache->bins[szind];
 	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
-	    &tcache_bin_info[szind]);
+	    &cache_bin->bin_info);
 	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
-	    &tcache_bin_info[szind]);
+	    &cache_bin->bin_info);
 	assert(!tcache_slow->bin_refilled[szind]);
 
 	size_t nflush = low_water - (low_water >> 2);
@@ -152,7 +154,7 @@ tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	 * Reduce fill count by 2X.  Limit lg_fill_div such that
 	 * the fill count is always at least 1.
 	 */
-	if ((cache_bin_info_ncached_max(&tcache_bin_info[szind])
+	if ((cache_bin_info_ncached_max(&cache_bin->bin_info)
 	    >> (tcache_slow->lg_fill_div[szind] + 1)) >= 1) {
 		tcache_slow->lg_fill_div[szind]++;
 	}
@@ -165,9 +167,9 @@ tcache_gc_large(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	assert(szind >= SC_NBINS);
 	cache_bin_t *cache_bin = &tcache->bins[szind];
 	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
-	    &tcache_bin_info[szind]);
+	    &cache_bin->bin_info);
 	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
-	    &tcache_bin_info[szind]);
+	    &cache_bin->bin_info);
 	tcache_bin_flush_large(tsd, tcache, cache_bin, szind,
 	    (unsigned)(ncached - low_water + (low_water >> 2)));
 }
@@ -187,7 +189,7 @@ tcache_event(tsd_t *tsd) {
 	tcache_bin_flush_stashed(tsd, tcache, cache_bin, szind, is_small);
 
 	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
-	    &tcache_bin_info[szind]);
+	    &cache_bin->bin_info);
 	if (low_water > 0) {
 		if (is_small) {
 			tcache_gc_small(tsd, tcache_slow, tcache, szind);
@@ -208,7 +210,7 @@ tcache_event(tsd_t *tsd) {
 	cache_bin_low_water_set(cache_bin);
 
 	tcache_slow->next_gc_bin++;
-	if (tcache_slow->next_gc_bin == nhbins) {
+	if (tcache_slow->next_gc_bin == tcache_nhbins_get(tcache)) {
 		tcache_slow->next_gc_bin = 0;
 	}
 }
@@ -233,10 +235,10 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
 	void *ret;
 
 	assert(tcache_slow->arena != NULL);
-	unsigned nfill = cache_bin_info_ncached_max(&tcache_bin_info[binind])
+	unsigned nfill = cache_bin_info_ncached_max(&cache_bin->bin_info)
 	    >> tcache_slow->lg_fill_div[binind];
 	arena_cache_bin_fill_small(tsdn, arena, cache_bin,
-	    &tcache_bin_info[binind], binind, nfill);
+	    &cache_bin->bin_info, binind, nfill);
 	tcache_slow->bin_refilled[binind] = true;
 	ret = cache_bin_alloc(cache_bin, tcache_success);
 
@@ -318,7 +320,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	if (small) {
 		assert(binind < SC_NBINS);
 	} else {
-		assert(binind < nhbins);
+		assert(binind < tcache_nhbins_get(tcache));
 	}
 	arena_t *tcache_arena = tcache_slow->arena;
 	assert(tcache_arena != NULL);
@@ -508,18 +510,18 @@ tcache_bin_flush_bottom(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	tcache_bin_flush_stashed(tsd, tcache, cache_bin, binind, small);
 
 	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
-	    &tcache_bin_info[binind]);
+	    &cache_bin->bin_info);
 	assert((cache_bin_sz_t)rem <= ncached);
 	unsigned nflush = ncached - rem;
 
 	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nflush);
-	cache_bin_init_ptr_array_for_flush(cache_bin, &tcache_bin_info[binind],
+	cache_bin_init_ptr_array_for_flush(cache_bin, &cache_bin->bin_info,
 	    &ptrs, nflush);
 
 	tcache_bin_flush_impl(tsd, tcache, cache_bin, binind, &ptrs, nflush,
 	    small);
 
-	cache_bin_finish_flush(cache_bin, &tcache_bin_info[binind], &ptrs,
+	cache_bin_finish_flush(cache_bin, &cache_bin->bin_info, &ptrs,
 	    ncached - rem);
 }
 
@@ -548,7 +550,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 void
 tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
     szind_t binind, bool is_small) {
-	cache_bin_info_t *info = &tcache_bin_info[binind];
+	cache_bin_info_t *info = &cache_bin->bin_info;
 	/*
 	 * The two below are for assertion only.  The content of original cached
 	 * items remain unchanged -- the stashed items reside on the other end
@@ -633,15 +635,31 @@ tcache_arena_reassociate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
 	tcache_arena_associate(tsdn, tcache_slow, tcache, arena);
 }
 
+static void
+tcache_max_and_nhbins_init(tcache_t *tcache) {
+	assert(tcache != NULL);
+	assert(global_do_not_change_tcache_maxclass != 0);
+	assert(global_do_not_change_nhbins != 0);
+	tcache->tcache_max = global_do_not_change_tcache_maxclass;
+	tcache->tcache_nhbins = global_do_not_change_nhbins;
+	assert(tcache->tcache_nhbins == sz_size2index(tcache->tcache_max) + 1);
+}
+
 bool
 tsd_tcache_enabled_data_init(tsd_t *tsd) {
 	/* Called upon tsd initialization. */
 	tsd_tcache_enabled_set(tsd, opt_tcache);
+	/*
+	 * tcache is not available yet, but we need to set up its tcache_max
+	 * and tcache_nhbins in advance.
+	 */
+	tcache_t *tcache = tsd_tcachep_get(tsd);
+	tcache_max_and_nhbins_init(tcache);
 	tsd_slow_update(tsd);
 
 	if (opt_tcache) {
 		/* Trigger tcache init. */
-		tsd_tcache_data_init(tsd);
+		tsd_tcache_data_init(tsd, NULL);
 	}
 
 	return false;
@@ -649,7 +667,7 @@ tsd_tcache_enabled_data_init(tsd_t *tsd) {
 
 static void
 tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
-    void *mem) {
+    void *mem, cache_bin_info_t *tcache_bin_info) {
 	tcache->tcache_slow = tcache_slow;
 	tcache_slow->tcache = tcache;
 
@@ -660,17 +678,19 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 
 	/*
 	 * We reserve cache bins for all small size classes, even if some may
-	 * not get used (i.e. bins higher than nhbins).  This allows the fast
-	 * and common paths to access cache bin metadata safely w/o worrying
-	 * about which ones are disabled.
+	 * not get used (i.e. bins higher than tcache_nhbins).  This allows
+	 * the fast and common paths to access cache bin metadata safely w/o
+	 * worrying about which ones are disabled.
 	 */
-	unsigned n_reserved_bins = nhbins < SC_NBINS ? SC_NBINS : nhbins;
+	unsigned tcache_nhbins = tcache_nhbins_get(tcache);
+	unsigned n_reserved_bins = tcache_nhbins < SC_NBINS ? SC_NBINS
+	    : tcache_nhbins;
 	memset(tcache->bins, 0, sizeof(cache_bin_t) * n_reserved_bins);
 
 	size_t cur_offset = 0;
-	cache_bin_preincrement(tcache_bin_info, nhbins, mem,
+	cache_bin_preincrement(tcache_bin_info, tcache_nhbins, mem,
 	    &cur_offset);
-	for (unsigned i = 0; i < nhbins; i++) {
+	for (unsigned i = 0; i < tcache_nhbins; i++) {
 		if (i < SC_NBINS) {
 			tcache_slow->lg_fill_div[i] = 1;
 			tcache_slow->bin_refilled[i] = false;
@@ -682,12 +702,12 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 		    &cur_offset);
 	}
 	/*
-	 * For small size classes beyond tcache_maxclass (i.e. nhbins < NBINS),
-	 * their cache bins are initialized to a state to safely and efficiently
-	 * fail all fastpath alloc / free, so that no additional check around
-	 * nhbins is needed on fastpath.
+	 * For small size classes beyond tcache_max(i.e.
+	 * tcache_nhbins< NBINS), their cache bins are initialized to a state
+	 * to safely and efficiently fail all fastpath alloc / free, so that
+	 * no additional check around tcache_nhbins is needed on fastpath.
 	 */
-	for (unsigned i = nhbins; i < SC_NBINS; i++) {
+	for (unsigned i = tcache_nhbins; i < SC_NBINS; i++) {
 		/* Disabled small bins. */
 		cache_bin_t *cache_bin = &tcache->bins[i];
 		void *fake_stack = mem;
@@ -699,19 +719,102 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	}
 
 	cache_bin_postincrement(mem, &cur_offset);
-	/* Sanity check that the whole stack is used. */
-	assert(cur_offset == tcache_bin_alloc_size);
+	if (config_debug) {
+		/* Sanity check that the whole stack is used. */
+		size_t size, alignment;
+		cache_bin_info_compute_alloc(tcache_bin_info, tcache_nhbins,
+		    &size, &alignment);
+		assert(cur_offset == size);
+	}
+}
+
+static inline unsigned
+tcache_ncached_max_compute(szind_t szind, unsigned current_nhbins) {
+	if (szind >= SC_NBINS) {
+		assert(szind < current_nhbins);
+		return opt_tcache_nslots_large;
+	}
+	unsigned slab_nregs = bin_infos[szind].nregs;
+
+	/* We may modify these values; start with the opt versions. */
+	unsigned nslots_small_min = opt_tcache_nslots_small_min;
+	unsigned nslots_small_max = opt_tcache_nslots_small_max;
+
+	/*
+	 * Clamp values to meet our constraints -- even, nonzero, min < max, and
+	 * suitable for a cache bin size.
+	 */
+	if (opt_tcache_nslots_small_max > CACHE_BIN_NCACHED_MAX) {
+		nslots_small_max = CACHE_BIN_NCACHED_MAX;
+	}
+	if (nslots_small_min % 2 != 0) {
+		nslots_small_min++;
+	}
+	if (nslots_small_max % 2 != 0) {
+		nslots_small_max--;
+	}
+	if (nslots_small_min < 2) {
+		nslots_small_min = 2;
+	}
+	if (nslots_small_max < 2) {
+		nslots_small_max = 2;
+	}
+	if (nslots_small_min > nslots_small_max) {
+		nslots_small_min = nslots_small_max;
+	}
+
+	unsigned candidate;
+	if (opt_lg_tcache_nslots_mul < 0) {
+		candidate = slab_nregs >> (-opt_lg_tcache_nslots_mul);
+	} else {
+		candidate = slab_nregs << opt_lg_tcache_nslots_mul;
+	}
+	if (candidate % 2 != 0) {
+		/*
+		 * We need the candidate size to be even -- we assume that we
+		 * can divide by two and get a positive number (e.g. when
+		 * flushing).
+		 */
+		++candidate;
+	}
+	if (candidate <= nslots_small_min) {
+		return nslots_small_min;
+	} else if (candidate <= nslots_small_max) {
+		return candidate;
+	} else {
+		return nslots_small_max;
+	}
+}
+
+static void
+tcache_bin_info_compute(cache_bin_info_t *tcache_bin_info,
+    unsigned tcache_nhbins) {
+	for (szind_t i = 0; i < tcache_nhbins; i++) {
+		unsigned ncached_max = tcache_ncached_max_compute(i,
+		    tcache_nhbins);
+		cache_bin_info_init(&tcache_bin_info[i], ncached_max);
+	}
+	for (szind_t i = tcache_nhbins; i < SC_NBINS; i++) {
+		/* Disabled small bins. */
+		cache_bin_info_init(&tcache_bin_info[i], 0);
+	}
 }
 
 /* Initialize auto tcache (embedded in TSD). */
 bool
-tsd_tcache_data_init(tsd_t *tsd) {
+tsd_tcache_data_init(tsd_t *tsd, arena_t *arena) {
 	tcache_slow_t *tcache_slow = tsd_tcache_slowp_get_unsafe(tsd);
 	tcache_t *tcache = tsd_tcachep_get_unsafe(tsd);
 
 	assert(cache_bin_still_zero_initialized(&tcache->bins[0]));
-	size_t alignment = tcache_bin_alloc_alignment;
-	size_t size = sz_sa2u(tcache_bin_alloc_size, alignment);
+	unsigned tcache_nhbins = tcache_nhbins_get(tcache);
+	size_t size, alignment;
+	/* Takes 146B stack space. */
+	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX] = {0};
+	tcache_bin_info_compute(tcache_bin_info, tcache_nhbins);
+	cache_bin_info_compute_alloc(tcache_bin_info, tcache_nhbins,
+	    &size, &alignment);
+	size = sz_sa2u(size, alignment);
 
 	void *mem = ipallocztm(tsd_tsdn(tsd), size, alignment, true, NULL,
 	    true, arena_get(TSDN_NULL, 0, true));
@@ -719,7 +822,7 @@ tsd_tcache_data_init(tsd_t *tsd) {
 		return true;
 	}
 
-	tcache_init(tsd, tcache_slow, tcache, mem);
+	tcache_init(tsd, tcache_slow, tcache, mem, tcache_bin_info);
 	/*
 	 * Initialization is a bit tricky here.  After malloc init is done, all
 	 * threads can rely on arena_choose and associate tcache accordingly.
@@ -729,14 +832,15 @@ tsd_tcache_data_init(tsd_t *tsd) {
 	 * arena_choose_hard() will re-associate properly.
 	 */
 	tcache_slow->arena = NULL;
-	arena_t *arena;
 	if (!malloc_initialized()) {
 		/* If in initialization, assign to a0. */
 		arena = arena_get(tsd_tsdn(tsd), 0, false);
 		tcache_arena_associate(tsd_tsdn(tsd), tcache_slow, tcache,
 		    arena);
 	} else {
-		arena = arena_choose(tsd, NULL);
+		if (arena == NULL) {
+			arena = arena_choose(tsd, NULL);
+		}
 		/* This may happen if thread.tcache.enabled is used. */
 		if (tcache_slow->arena == NULL) {
 			tcache_arena_associate(tsd_tsdn(tsd), tcache_slow,
@@ -756,21 +860,29 @@ tcache_create_explicit(tsd_t *tsd) {
 	 * the beginning of the whole allocation (for freeing).  The makes sure
 	 * the cache bins have the requested alignment.
 	 */
-	size_t size = tcache_bin_alloc_size + sizeof(tcache_t)
+	unsigned tcache_nhbins = global_do_not_change_nhbins;
+	size_t tcache_size, alignment;
+	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX] = {0};
+	tcache_bin_info_compute(tcache_bin_info, tcache_nhbins);
+	cache_bin_info_compute_alloc(tcache_bin_info, tcache_nhbins,
+	    &tcache_size, &alignment);
+
+	size_t size = tcache_size + sizeof(tcache_t)
 	    + sizeof(tcache_slow_t);
 	/* Naturally align the pointer stacks. */
 	size = PTR_CEILING(size);
-	size = sz_sa2u(size, tcache_bin_alloc_alignment);
+	size = sz_sa2u(size, alignment);
 
-	void *mem = ipallocztm(tsd_tsdn(tsd), size, tcache_bin_alloc_alignment,
+	void *mem = ipallocztm(tsd_tsdn(tsd), size, alignment,
 	    true, NULL, true, arena_get(TSDN_NULL, 0, true));
 	if (mem == NULL) {
 		return NULL;
 	}
-	tcache_t *tcache = (void *)((byte_t *)mem + tcache_bin_alloc_size);
+	tcache_t *tcache = (void *)((byte_t *)mem + tcache_size);
 	tcache_slow_t *tcache_slow =
-	    (void *)((byte_t *)mem + tcache_bin_alloc_size + sizeof(tcache_t));
-	tcache_init(tsd, tcache_slow, tcache, mem);
+	    (void *)((byte_t *)mem + tcache_size + sizeof(tcache_t));
+	tcache_max_and_nhbins_init(tcache);
+	tcache_init(tsd, tcache_slow, tcache, mem, tcache_bin_info);
 
 	tcache_arena_associate(tsd_tsdn(tsd), tcache_slow, tcache,
 	    arena_ichoose(tsd, NULL));
@@ -783,7 +895,7 @@ tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
 	tcache_slow_t *tcache_slow = tcache->tcache_slow;
 	assert(tcache_slow->arena != NULL);
 
-	for (unsigned i = 0; i < nhbins; i++) {
+	for (unsigned i = 0; i < tcache_nhbins_get(tcache); i++) {
 		cache_bin_t *cache_bin = &tcache->bins[i];
 		if (i < SC_NBINS) {
 			tcache_bin_flush_small(tsd, tcache, cache_bin, i, 0);
@@ -811,7 +923,7 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
 
 	if (tsd_tcache) {
 		cache_bin_t *cache_bin = &tcache->bins[0];
-		cache_bin_assert_empty(cache_bin, &tcache_bin_info[0]);
+		cache_bin_assert_empty(cache_bin, &cache_bin->bin_info);
 	}
 	idalloctm(tsd_tsdn(tsd), tcache_slow->dyn_alloc, NULL, NULL, true,
 	    true);
@@ -849,13 +961,9 @@ tcache_cleanup(tsd_t *tsd) {
 	assert(!cache_bin_still_zero_initialized(&tcache->bins[0]));
 
 	tcache_destroy(tsd, tcache, true);
-	if (config_debug) {
-		/*
-		 * For debug testing only, we want to pretend we're still in the
-		 * zero-initialized state.
-		 */
-		memset(tcache->bins, 0, sizeof(cache_bin_t) * nhbins);
-	}
+	/* Make sure all bins used are reinitialized to the clean state. */
+	memset(tcache->bins, 0, sizeof(cache_bin_t) *
+	    tcache_nhbins_get(tcache));
 }
 
 void
@@ -863,7 +971,7 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 	cassert(config_stats);
 
 	/* Merge and reset tcache stats. */
-	for (unsigned i = 0; i < nhbins; i++) {
+	for (unsigned i = 0; i < tcache_nhbins_get(tcache); i++) {
 		cache_bin_t *cache_bin = &tcache->bins[i];
 		if (i < SC_NBINS) {
 			bin_t *bin = arena_bin_choose(tsdn, arena, i, NULL);
@@ -986,97 +1094,18 @@ tcaches_destroy(tsd_t *tsd, unsigned ind) {
 	}
 }
 
-static unsigned
-tcache_ncached_max_compute(szind_t szind) {
-	if (szind >= SC_NBINS) {
-		assert(szind < nhbins);
-		return opt_tcache_nslots_large;
-	}
-	unsigned slab_nregs = bin_infos[szind].nregs;
-
-	/* We may modify these values; start with the opt versions. */
-	unsigned nslots_small_min = opt_tcache_nslots_small_min;
-	unsigned nslots_small_max = opt_tcache_nslots_small_max;
-
-	/*
-	 * Clamp values to meet our constraints -- even, nonzero, min < max, and
-	 * suitable for a cache bin size.
-	 */
-	if (opt_tcache_nslots_small_max > CACHE_BIN_NCACHED_MAX) {
-		nslots_small_max = CACHE_BIN_NCACHED_MAX;
-	}
-	if (nslots_small_min % 2 != 0) {
-		nslots_small_min++;
-	}
-	if (nslots_small_max % 2 != 0) {
-		nslots_small_max--;
-	}
-	if (nslots_small_min < 2) {
-		nslots_small_min = 2;
-	}
-	if (nslots_small_max < 2) {
-		nslots_small_max = 2;
-	}
-	if (nslots_small_min > nslots_small_max) {
-		nslots_small_min = nslots_small_max;
-	}
-
-	unsigned candidate;
-	if (opt_lg_tcache_nslots_mul < 0) {
-		candidate = slab_nregs >> (-opt_lg_tcache_nslots_mul);
-	} else {
-		candidate = slab_nregs << opt_lg_tcache_nslots_mul;
-	}
-	if (candidate % 2 != 0) {
-		/*
-		 * We need the candidate size to be even -- we assume that we
-		 * can divide by two and get a positive number (e.g. when
-		 * flushing).
-		 */
-		++candidate;
-	}
-	if (candidate <= nslots_small_min) {
-		return nslots_small_min;
-	} else if (candidate <= nslots_small_max) {
-		return candidate;
-	} else {
-		return nslots_small_max;
-	}
-}
-
 bool
 tcache_boot(tsdn_t *tsdn, base_t *base) {
-	tcache_maxclass = sz_s2u(opt_tcache_max);
-	assert(tcache_maxclass <= TCACHE_MAXCLASS_LIMIT);
-	nhbins = sz_size2index(tcache_maxclass) + 1;
+	global_do_not_change_tcache_maxclass = sz_s2u(opt_tcache_max);
+	assert(global_do_not_change_tcache_maxclass <= TCACHE_MAXCLASS_LIMIT);
+	global_do_not_change_nhbins =
+	    sz_size2index(global_do_not_change_tcache_maxclass) + 1;
 
 	if (malloc_mutex_init(&tcaches_mtx, "tcaches", WITNESS_RANK_TCACHES,
 	    malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 
-	/* Initialize tcache_bin_info.  See comments in tcache_init(). */
-	unsigned n_reserved_bins = nhbins < SC_NBINS ? SC_NBINS : nhbins;
-	size_t size = n_reserved_bins * sizeof(cache_bin_info_t);
-	tcache_bin_info = (cache_bin_info_t *)base_alloc(tsdn, base, size,
-	    CACHELINE);
-	if (tcache_bin_info == NULL) {
-		return true;
-	}
-
-	for (szind_t i = 0; i < nhbins; i++) {
-		unsigned ncached_max = tcache_ncached_max_compute(i);
-		cache_bin_info_init(&tcache_bin_info[i], ncached_max);
-	}
-	for (szind_t i = nhbins; i < SC_NBINS; i++) {
-		/* Disabled small bins. */
-		cache_bin_info_init(&tcache_bin_info[i], 0);
-		assert(tcache_small_bin_disabled(i, NULL));
-	}
-
-	cache_bin_info_compute_alloc(tcache_bin_info, nhbins,
-	    &tcache_bin_alloc_size, &tcache_bin_alloc_alignment);
-
 	return false;
 }
 
diff --git a/test/unit/batch_alloc.c b/test/unit/batch_alloc.c
index 901c52b1..2bd5968e 100644
--- a/test/unit/batch_alloc.c
+++ b/test/unit/batch_alloc.c
@@ -168,7 +168,7 @@ TEST_BEGIN(test_batch_alloc_large) {
 		assert_zu_eq(filled, batch, "");
 		release_batch(global_ptrs, batch, size);
 	}
-	size = tcache_maxclass + 1;
+	size = global_do_not_change_tcache_maxclass + 1;
 	for (size_t batch = 0; batch < 4; ++batch) {
 		assert(batch < BATCH_MAX);
 		size_t filled = batch_alloc(global_ptrs, batch, size, 0);
diff --git a/test/unit/tcache_max.c b/test/unit/tcache_max.c
index b1093f40..0a563c2f 100644
--- a/test/unit/tcache_max.c
+++ b/test/unit/tcache_max.c
@@ -18,11 +18,10 @@ enum {
 	dalloc_option_end
 };
 
-static unsigned alloc_option, dalloc_option;
-static size_t tcache_max;
+static bool global_test;
 
 static void *
-alloc_func(size_t sz) {
+alloc_func(size_t sz, unsigned alloc_option) {
 	void *ret;
 
 	switch (alloc_option) {
@@ -41,7 +40,7 @@ alloc_func(size_t sz) {
 }
 
 static void
-dalloc_func(void *ptr, size_t sz) {
+dalloc_func(void *ptr, size_t sz, unsigned dalloc_option) {
 	switch (dalloc_option) {
 	case use_free:
 		free(ptr);
@@ -58,10 +57,10 @@ dalloc_func(void *ptr, size_t sz) {
 }
 
 static size_t
-tcache_bytes_read(void) {
+tcache_bytes_read_global(void) {
 	uint64_t epoch;
-	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
-	    0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
+	    sizeof(epoch)), 0, "Unexpected mallctl() failure");
 
 	size_t tcache_bytes;
 	size_t sz = sizeof(tcache_bytes);
@@ -72,16 +71,30 @@ tcache_bytes_read(void) {
 	return tcache_bytes;
 }
 
+static size_t
+tcache_bytes_read_local(void) {
+	size_t tcache_bytes = 0;
+	tsd_t *tsd = tsd_fetch();
+	tcache_t *tcache = tcache_get(tsd);
+	for (szind_t i = 0; i < tcache_nhbins_get(tcache); i++) {
+		cache_bin_t *cache_bin = &tcache->bins[i];
+		cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
+		    &cache_bin->bin_info);
+		tcache_bytes += ncached * sz_index2size(i);
+	}
+	return tcache_bytes;
+}
 static void
 tcache_bytes_check_update(size_t *prev, ssize_t diff) {
-	size_t tcache_bytes = tcache_bytes_read();
+	size_t tcache_bytes = global_test ? tcache_bytes_read_global():
+	    tcache_bytes_read_local();
 	expect_zu_eq(tcache_bytes, *prev + diff, "tcache bytes not expected");
-
 	*prev += diff;
 }
 
 static void
-test_tcache_bytes_alloc(size_t alloc_size) {
+test_tcache_bytes_alloc(size_t alloc_size, size_t tcache_max,
+    unsigned alloc_option, unsigned dalloc_option) {
 	expect_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0), 0,
 	    "Unexpected tcache flush failure");
 
@@ -90,65 +103,82 @@ test_tcache_bytes_alloc(size_t alloc_size) {
 	bool cached = (usize <= tcache_max);
 	ssize_t diff = cached ? usize : 0;
 
-	void *ptr1 = alloc_func(alloc_size);
-	void *ptr2 = alloc_func(alloc_size);
+	void *ptr1 = alloc_func(alloc_size, alloc_option);
+	void *ptr2 = alloc_func(alloc_size, alloc_option);
 
-	size_t bytes = tcache_bytes_read();
-	dalloc_func(ptr2, alloc_size);
+	size_t bytes = global_test ? tcache_bytes_read_global() :
+	    tcache_bytes_read_local();
+	dalloc_func(ptr2, alloc_size, dalloc_option);
 	/* Expect tcache_bytes increase after dalloc */
 	tcache_bytes_check_update(&bytes, diff);
 
-	dalloc_func(ptr1, alloc_size);
+	dalloc_func(ptr1, alloc_size, alloc_option);
 	/* Expect tcache_bytes increase again */
 	tcache_bytes_check_update(&bytes, diff);
 
-	void *ptr3 = alloc_func(alloc_size);
+	void *ptr3 = alloc_func(alloc_size, alloc_option);
 	if (cached) {
 		expect_ptr_eq(ptr1, ptr3, "Unexpected cached ptr");
 	}
 	/* Expect tcache_bytes decrease after alloc */
 	tcache_bytes_check_update(&bytes, -diff);
 
-	void *ptr4 = alloc_func(alloc_size);
+	void *ptr4 = alloc_func(alloc_size, alloc_option);
 	if (cached) {
 		expect_ptr_eq(ptr2, ptr4, "Unexpected cached ptr");
 	}
 	/* Expect tcache_bytes decrease again */
 	tcache_bytes_check_update(&bytes, -diff);
 
-	dalloc_func(ptr3, alloc_size);
+	dalloc_func(ptr3, alloc_size, dalloc_option);
 	tcache_bytes_check_update(&bytes, diff);
-	dalloc_func(ptr4, alloc_size);
+	dalloc_func(ptr4, alloc_size, dalloc_option);
 	tcache_bytes_check_update(&bytes, diff);
 }
 
 static void
-test_tcache_max_impl(void) {
-	size_t sz;
+test_tcache_max_impl(size_t target_tcache_max, unsigned alloc_option,
+    unsigned dalloc_option) {
+	size_t tcache_max, sz;
 	sz = sizeof(tcache_max);
-	assert_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max,
-	    &sz, NULL, 0), 0, "Unexpected mallctl() failure");
+	if (global_test) {
+		assert_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max,
+		    &sz, NULL, 0), 0, "Unexpected mallctl() failure");
+		expect_zu_eq(tcache_max, target_tcache_max,
+		    "Global tcache_max not expected");
+	} else {
+		assert_d_eq(mallctl("thread.tcache.max",
+		    (void *)&tcache_max, &sz, NULL,.0), 0,
+		    "Unexpected.mallctl().failure");
+		expect_zu_eq(tcache_max, target_tcache_max,
+		    "Current thread's tcache_max not expected");
+	}
+	test_tcache_bytes_alloc(1, tcache_max, alloc_option, dalloc_option);
+	test_tcache_bytes_alloc(tcache_max - 1, tcache_max, alloc_option,
+	    dalloc_option);
+	test_tcache_bytes_alloc(tcache_max, tcache_max, alloc_option,
+	    dalloc_option);
+	test_tcache_bytes_alloc(tcache_max + 1, tcache_max, alloc_option,
+	    dalloc_option);
 
-	/* opt.tcache_max set to 1024 in tcache_max.sh */
-	expect_zu_eq(tcache_max, 1024, "tcache_max not expected");
-
-	test_tcache_bytes_alloc(1);
-	test_tcache_bytes_alloc(tcache_max - 1);
-	test_tcache_bytes_alloc(tcache_max);
-	test_tcache_bytes_alloc(tcache_max + 1);
-
-	test_tcache_bytes_alloc(PAGE - 1);
-	test_tcache_bytes_alloc(PAGE);
-	test_tcache_bytes_alloc(PAGE + 1);
+	test_tcache_bytes_alloc(PAGE - 1, tcache_max, alloc_option,
+	    dalloc_option);
+	test_tcache_bytes_alloc(PAGE, tcache_max, alloc_option,
+	    dalloc_option);
+	test_tcache_bytes_alloc(PAGE + 1, tcache_max, alloc_option,
+	    dalloc_option);
 
 	size_t large;
 	sz = sizeof(large);
 	assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&large, &sz, NULL,
 	    0), 0, "Unexpected mallctl() failure");
 
-	test_tcache_bytes_alloc(large - 1);
-	test_tcache_bytes_alloc(large);
-	test_tcache_bytes_alloc(large + 1);
+	test_tcache_bytes_alloc(large - 1, tcache_max, alloc_option,
+	    dalloc_option);
+	test_tcache_bytes_alloc(large, tcache_max, alloc_option,
+	    dalloc_option);
+	test_tcache_bytes_alloc(large + 1, tcache_max, alloc_option,
+	    dalloc_option);
 }
 
 TEST_BEGIN(test_tcache_max) {
@@ -157,26 +187,157 @@ TEST_BEGIN(test_tcache_max) {
 	test_skip_if(opt_prof);
 	test_skip_if(san_uaf_detection_enabled());
 
-	unsigned arena_ind;
+	unsigned arena_ind, alloc_option, dalloc_option;
 	size_t sz = sizeof(arena_ind);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
 	expect_d_eq(mallctl("thread.arena", NULL, NULL, &arena_ind,
 	    sizeof(arena_ind)), 0, "Unexpected mallctl() failure");
 
+	global_test = true;
 	for (alloc_option = alloc_option_start;
 	     alloc_option < alloc_option_end;
 	     alloc_option++) {
 		for (dalloc_option = dalloc_option_start;
 		     dalloc_option < dalloc_option_end;
 		     dalloc_option++) {
-			test_tcache_max_impl();
+			/* opt.tcache_max set to 1024 in tcache_max.sh. */
+			test_tcache_max_impl(1024, alloc_option,
+			    dalloc_option);
 		}
 	}
+	global_test = false;
+}
+TEST_END
+
+static size_t
+tcache_max2nhbins(size_t tcache_max) {
+	return sz_size2index(tcache_max) + 1;
+}
+
+static void *
+tcache_check(void *arg) {
+	size_t old_tcache_max, new_tcache_max, min_tcache_max, sz;
+	unsigned tcache_nhbins;
+	tsd_t *tsd = tsd_fetch();
+	tcache_t *tcache = tsd_tcachep_get(tsd);
+	sz = sizeof(size_t);
+	new_tcache_max = *(size_t *)arg;
+	min_tcache_max = 1;
+
+	/*
+	 * Check the default tcache_max and tcache_nhbins of each thread's
+	 * auto tcache.
+	 */
+	old_tcache_max = tcache_max_get(tcache);
+	expect_zu_eq(old_tcache_max, opt_tcache_max,
+	    "Unexpected default value for tcache_max");
+	tcache_nhbins = tcache_nhbins_get(tcache);
+	expect_zu_eq(tcache_nhbins, (size_t)global_do_not_change_nhbins,
+	    "Unexpected default value for tcache_nhbins");
+
+	/*
+	 * Close the tcache and test the set.
+	 * Test an input that is not a valid size class, it should be ceiled
+	 * to a valid size class.
+	 */
+	bool e0 = false, e1;
+	size_t bool_sz = sizeof(bool);
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e1, &bool_sz,
+	    (void *)&e0, bool_sz), 0, "Unexpected mallctl() error");
+	expect_true(e1, "Unexpected previous tcache state");
+
+	size_t temp_tcache_max = TCACHE_MAXCLASS_LIMIT - 1;
+	assert_d_eq(mallctl("thread.tcache.max",
+	    NULL, NULL, (void *)&temp_tcache_max, sz),.0,
+	    "Unexpected.mallctl().failure");
+	old_tcache_max = tcache_max_get(tcache);
+	expect_zu_eq(old_tcache_max, TCACHE_MAXCLASS_LIMIT,
+	    "Unexpected value for tcache_max");
+	tcache_nhbins = tcache_nhbins_get(tcache);
+	expect_zu_eq(tcache_nhbins, TCACHE_NBINS_MAX,
+	    "Unexpected value for tcache_nhbins");
+	assert_d_eq(mallctl("thread.tcache.max",
+	    (void *)&old_tcache_max, &sz,
+	    (void *)&min_tcache_max, sz),.0,
+	    "Unexpected.mallctl().failure");
+	expect_zu_eq(old_tcache_max, TCACHE_MAXCLASS_LIMIT,
+	    "Unexpected value for tcache_max");
+
+	/* Enable tcache, the set should still be valid. */
+	e0 = true;
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e1, &bool_sz,
+	    (void *)&e0, bool_sz), 0, "Unexpected mallctl() error");
+	expect_false(e1, "Unexpected previous tcache state");
+	min_tcache_max = sz_s2u(min_tcache_max);
+	expect_zu_eq(tcache_max_get(tcache), min_tcache_max,
+	    "Unexpected value for tcache_max");
+	expect_zu_eq(tcache_nhbins_get(tcache),
+	    tcache_max2nhbins(min_tcache_max), "Unexpected value for nhbins");
+	assert_d_eq(mallctl("thread.tcache.max",
+	    (void *)&old_tcache_max, &sz,
+	    (void *)&new_tcache_max, sz),.0,
+	    "Unexpected.mallctl().failure");
+	expect_zu_eq(old_tcache_max, min_tcache_max,
+	    "Unexpected value for tcache_max");
+
+	/*
+	 * Check the thread's tcache_max and nhbins both through mallctl
+	 * and alloc tests.
+	 */
+	if (new_tcache_max > TCACHE_MAXCLASS_LIMIT) {
+		new_tcache_max = TCACHE_MAXCLASS_LIMIT;
+	}
+	old_tcache_max = tcache_max_get(tcache);
+	expect_zu_eq(old_tcache_max, new_tcache_max,
+	    "Unexpected value for tcache_max");
+	tcache_nhbins = tcache_nhbins_get(tcache);
+	expect_zu_eq(tcache_nhbins, tcache_max2nhbins(new_tcache_max),
+	    "Unexpected value for tcache_nhbins");
+	for (unsigned alloc_option = alloc_option_start;
+	     alloc_option < alloc_option_end;
+	     alloc_option++) {
+		for (unsigned dalloc_option = dalloc_option_start;
+		     dalloc_option < dalloc_option_end;
+		     dalloc_option++) {
+			test_tcache_max_impl(new_tcache_max,
+			    alloc_option, dalloc_option);
+		}
+	}
+
+	return NULL;
+}
+
+TEST_BEGIN(test_thread_tcache_max) {
+	test_skip_if(!config_stats);
+	test_skip_if(!opt_tcache);
+	test_skip_if(opt_prof);
+	test_skip_if(san_uaf_detection_enabled());
+
+	unsigned nthreads = 8;
+	global_test = false;
+	VARIABLE_ARRAY(thd_t, threads, nthreads);
+	VARIABLE_ARRAY(size_t, all_threads_tcache_max, nthreads);
+	for (unsigned i = 0; i < nthreads; i++) {
+		all_threads_tcache_max[i] = 1024 * (1<<((i + 10) % 20));
+		if (i == nthreads - 1) {
+			all_threads_tcache_max[i] = UINT_MAX;
+		}
+	}
+	for (unsigned i = 0; i < nthreads; i++) {
+		thd_create(&threads[i], tcache_check,
+		    &(all_threads_tcache_max[i]));
+	}
+	for (unsigned i = 0; i < nthreads; i++) {
+		thd_join(threads[i], NULL);
+	}
 }
 TEST_END
 
 int
 main(void) {
-	return test(test_tcache_max);
+	return test(
+	    test_tcache_max,
+	    test_thread_tcache_max);
 }
+

From 72cfdce71806443f4ccdbfe10aa5d50346a3d07e Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 13 Sep 2023 21:51:54 -0700
Subject: [PATCH 2346/2608] Allocate tcache stack from base allocator

When using metadata_thp, allocate tcache bin stacks from base0, which means they
will be placed on huge pages along with other metadata, instead of mixed with
other regular allocations.

In order to do so, modified the base allocator to support limited reuse: freed
tcached stacks (from thread termination) will be returned to base0 and made
available for reuse, but no merging will be attempted since they were bump
allocated out of base blocks. These reused base extents are managed using
separately allocated base edata_t -- they are cached in base->edata_avail when
the extent is all allocated.

One tricky part is, stats updating must be skipped for such reused extents
(since they were accounted for already, and there is no purging for base). This
requires tracking the "if is reused" state explicitly and bypass the stats
updates when allocating from them.
---
 include/jemalloc/internal/base.h      |   5 +
 include/jemalloc/internal/cache_bin.h |   1 +
 include/jemalloc/internal/edata.h     |   6 +-
 src/base.c                            | 151 +++++++++++++++++++++++---
 src/cache_bin.c                       |  18 ++-
 src/tcache.c                          |  22 +++-
 test/unit/tcache_max.c                |  26 +++++
 7 files changed, 202 insertions(+), 27 deletions(-)

diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h
index 6b41aa6f..451be10f 100644
--- a/include/jemalloc/internal/base.h
+++ b/include/jemalloc/internal/base.h
@@ -73,6 +73,9 @@ struct base_s {
 	/* Heap of extents that track unused trailing space within blocks. */
 	edata_heap_t avail[SC_NSIZES];
 
+	/* Contains reusable base edata (used by tcache_stacks currently). */
+	edata_avail_t edata_avail;
+
 	/* Stats, only maintained if config_stats. */
 	size_t allocated;
 	size_t resident;
@@ -101,6 +104,8 @@ extent_hooks_t *base_extent_hooks_set(base_t *base,
     extent_hooks_t *extent_hooks);
 void *base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment);
 edata_t *base_alloc_edata(tsdn_t *tsdn, base_t *base);
+void *b0_alloc_tcache_stack(tsdn_t *tsdn, size_t size);
+void b0_dalloc_tcache_stack(tsdn_t *tsdn, void *tcache_stack);
 void base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated,
     size_t *resident, size_t *mapped, size_t *n_thp);
 void base_prefork(tsdn_t *tsdn, base_t *base);
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 4cfc3f1d..78ac3295 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -704,5 +704,6 @@ void cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
  * not cache_bin_init was called on it.
  */
 bool cache_bin_still_zero_initialized(cache_bin_t *bin);
+bool cache_bin_stack_use_thp(void);
 
 #endif /* JEMALLOC_INTERNAL_CACHE_BIN_H */
diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index baf5187f..17befd92 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -621,7 +621,8 @@ edata_init(edata_t *edata, unsigned arena_ind, void *addr, size_t size,
 }
 
 static inline void
-edata_binit(edata_t *edata, void *addr, size_t bsize, uint64_t sn) {
+edata_binit(edata_t *edata, void *addr, size_t bsize, uint64_t sn,
+    bool reused) {
 	edata_arena_ind_set(edata, (1U << MALLOCX_ARENA_BITS) - 1);
 	edata_addr_set(edata, addr);
 	edata_bsize_set(edata, bsize);
@@ -629,7 +630,8 @@ edata_binit(edata_t *edata, void *addr, size_t bsize, uint64_t sn) {
 	edata_szind_set(edata, SC_NSIZES);
 	edata_sn_set(edata, sn);
 	edata_state_set(edata, extent_state_active);
-	edata_guarded_set(edata, false);
+	/* See comments in base_edata_is_reused. */
+	edata_guarded_set(edata, reused);
 	edata_zeroed_set(edata, true);
 	edata_committed_set(edata, true);
 	/*
diff --git a/src/base.c b/src/base.c
index 8e4606d0..e1dfe604 100644
--- a/src/base.c
+++ b/src/base.c
@@ -110,6 +110,16 @@ label_done:
 	}
 }
 
+static inline bool
+base_edata_is_reused(edata_t *edata) {
+	/*
+	 * Borrow the guarded bit to indicate if the extent is a recycled one,
+	 * i.e. the ones returned to base for reuse; currently only tcache bin
+	 * stacks.  Skips stats updating if so (needed for this purpose only).
+	 */
+	return edata_guarded_get(edata);
+}
+
 static void
 base_edata_init(size_t *extent_sn_next, edata_t *edata, void *addr,
     size_t size) {
@@ -118,7 +128,7 @@ base_edata_init(size_t *extent_sn_next, edata_t *edata, void *addr,
 	sn = *extent_sn_next;
 	(*extent_sn_next)++;
 
-	edata_binit(edata, addr, size, sn);
+	edata_binit(edata, addr, size, sn, false /* is_reused */);
 }
 
 static size_t
@@ -185,24 +195,57 @@ base_extent_bump_alloc_helper(edata_t *edata, size_t *gap_size, size_t size,
 	assert(edata_bsize_get(edata) >= *gap_size + size);
 	edata_binit(edata, (void *)((byte_t *)edata_addr_get(edata) +
 	    *gap_size + size), edata_bsize_get(edata) - *gap_size - size,
-	    edata_sn_get(edata));
+	    edata_sn_get(edata), base_edata_is_reused(edata));
 	return ret;
 }
 
 static void
-base_extent_bump_alloc_post(base_t *base, edata_t *edata, size_t gap_size,
-    void *addr, size_t size) {
-	if (edata_bsize_get(edata) > 0) {
-		/*
-		 * Compute the index for the largest size class that does not
-		 * exceed extent's size.
-		 */
-		szind_t index_floor =
-		    sz_size2index(edata_bsize_get(edata) + 1) - 1;
-		edata_heap_insert(&base->avail[index_floor], edata);
+base_edata_heap_insert(tsdn_t *tsdn, base_t *base, edata_t *edata) {
+	malloc_mutex_assert_owner(tsdn, &base->mtx);
+
+	size_t bsize = edata_bsize_get(edata);
+	assert(bsize > 0);
+	/*
+	 * Compute the index for the largest size class that does not exceed
+	 * extent's size.
+	 */
+	szind_t index_floor = sz_size2index(bsize + 1) - 1;
+	edata_heap_insert(&base->avail[index_floor], edata);
+}
+
+/*
+ * Only can be called by top-level functions, since it may call base_alloc
+ * internally when cache is empty.
+ */
+static edata_t *
+base_alloc_base_edata(tsdn_t *tsdn, base_t *base) {
+	edata_t *edata;
+
+	malloc_mutex_lock(tsdn, &base->mtx);
+	edata = edata_avail_first(&base->edata_avail);
+	if (edata != NULL) {
+		edata_avail_remove(&base->edata_avail, edata);
+	}
+	malloc_mutex_unlock(tsdn, &base->mtx);
+
+	if (edata == NULL) {
+		edata = base_alloc_edata(tsdn, base);
 	}
 
-	if (config_stats) {
+	return edata;
+}
+
+static void
+base_extent_bump_alloc_post(tsdn_t *tsdn, base_t *base, edata_t *edata,
+    size_t gap_size, void *addr, size_t size) {
+	if (edata_bsize_get(edata) > 0) {
+		base_edata_heap_insert(tsdn, base, edata);
+	} else {
+		/* Freed base edata_t stored in edata_avail. */
+		edata_avail_insert(&base->edata_avail, edata);
+	}
+
+	if (config_stats && !base_edata_is_reused(edata)) {
 		base->allocated += size;
 		/*
 		 * Add one PAGE to base_resident for every page boundary that is
@@ -224,13 +267,13 @@ base_extent_bump_alloc_post(base_t *base, edata_t *edata, size_t gap_size,
 }
 
 static void *
-base_extent_bump_alloc(base_t *base, edata_t *edata, size_t size,
+base_extent_bump_alloc(tsdn_t *tsdn, base_t *base, edata_t *edata, size_t size,
     size_t alignment) {
 	void *ret;
 	size_t gap_size;
 
 	ret = base_extent_bump_alloc_helper(edata, &gap_size, size, alignment);
-	base_extent_bump_alloc_post(base, edata, gap_size, ret, size);
+	base_extent_bump_alloc_post(tsdn, base, edata, gap_size, ret, size);
 	return ret;
 }
 
@@ -384,6 +427,8 @@ base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks,
 	for (szind_t i = 0; i < SC_NSIZES; i++) {
 		edata_heap_new(&base->avail[i]);
 	}
+	edata_avail_new(&base->edata_avail);
+
 	if (config_stats) {
 		base->allocated = sizeof(base_block_t);
 		base->resident = PAGE_CEILING(sizeof(base_block_t));
@@ -395,8 +440,12 @@ base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks,
 		assert(base->resident <= base->mapped);
 		assert(base->n_thp << LG_HUGEPAGE <= base->mapped);
 	}
-	base_extent_bump_alloc_post(base, &block->edata, gap_size, base,
+
+	/* Locking here is only necessary because of assertions. */
+	malloc_mutex_lock(tsdn, &base->mtx);
+	base_extent_bump_alloc_post(tsdn, base, &block->edata, gap_size, base,
 	    base_size);
+	malloc_mutex_unlock(tsdn, &base->mtx);
 
 	return base;
 }
@@ -457,7 +506,7 @@ base_alloc_impl(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment,
 		goto label_return;
 	}
 
-	ret = base_extent_bump_alloc(base, edata, usize, alignment);
+	ret = base_extent_bump_alloc(tsdn, base, edata, usize, alignment);
 	if (esn != NULL) {
 		*esn = (size_t)edata_sn_get(edata);
 	}
@@ -491,6 +540,74 @@ base_alloc_edata(tsdn_t *tsdn, base_t *base) {
 	return edata;
 }
 
+static inline void
+b0_alloc_header_size(size_t *header_size, size_t *alignment) {
+	*alignment = QUANTUM;
+	*header_size = QUANTUM > sizeof(edata_t *) ? QUANTUM :
+	    sizeof(edata_t *);
+}
+
+/*
+ * Each piece allocated here is managed by a separate edata, because it was bump
+ * allocated and cannot be merged back into the original base_block.  This means
+ * it's not for general purpose: 1) they are not page aligned, nor page sized,
+ * and 2) the requested size should not be too small (as each piece comes with
+ * an edata_t).  Only used for tcache bin stack allocation now.
+ */
+void *
+b0_alloc_tcache_stack(tsdn_t *tsdn, size_t stack_size) {
+	base_t *base = b0get();
+	edata_t *edata = base_alloc_base_edata(tsdn, base);
+	if (edata == NULL) {
+		return NULL;
+	}
+
+	/*
+	 * Reserve room for the header, which stores a pointer to the managing
+	 * edata_t.  The header itself is located right before the return
+	 * address, so that edata can be retrieved on dalloc.  Bump up to usize
+	 * to improve reusability -- otherwise the freed stacks will be put back
+	 * into the previous size class.
+	 */
+	size_t esn, alignment, header_size;
+	b0_alloc_header_size(&header_size, &alignment);
+
+	size_t alloc_size = sz_s2u(stack_size + header_size);
+	void *addr = base_alloc_impl(tsdn, base, alloc_size, alignment, &esn);
+	if (addr == NULL) {
+		edata_avail_insert(&base->edata_avail, edata);
+		return NULL;
+	}
+
+	/* Set is_reused: see comments in base_edata_is_reused. */
+	edata_binit(edata, addr, alloc_size, esn, true /* is_reused */);
+	*(edata_t **)addr = edata;
+
+	return (byte_t *)addr + header_size;
+}
+
+void
+b0_dalloc_tcache_stack(tsdn_t *tsdn, void *tcache_stack) {
+	/* edata_t pointer stored in header. */
+	size_t alignment, header_size;
+	b0_alloc_header_size(&header_size, &alignment);
+
+	edata_t *edata = *(edata_t **)((byte_t *)tcache_stack - header_size);
+	void *addr = edata_addr_get(edata);
+	size_t bsize = edata_bsize_get(edata);
+	/* Marked as "reused" to avoid double counting stats. */
+	assert(base_edata_is_reused(edata));
+	assert(addr != NULL && bsize > 0);
+
+	/* Zero out since base_alloc returns zeroed memory. */
+	memset(addr, 0, bsize);
+
+	base_t *base = b0get();
+	malloc_mutex_lock(tsdn, &base->mtx);
+	base_edata_heap_insert(tsdn, base, edata);
+	malloc_mutex_unlock(tsdn, &base->mtx);
+}
+
 void
 base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, size_t *resident,
     size_t *mapped, size_t *n_thp) {
diff --git a/src/cache_bin.c b/src/cache_bin.c
index 03577084..2ad2062d 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -14,6 +14,17 @@ cache_bin_info_init(cache_bin_info_t *info,
 	info->ncached_max = (cache_bin_sz_t)ncached_max;
 }
 
+bool
+cache_bin_stack_use_thp(void) {
+	/*
+	 * If metadata_thp is enabled, allocating tcache stack from the base
+	 * allocator for efficiency gains.  The downside, however, is that base
+	 * allocator never purges freed memory, and may cache a fair amount of
+	 * memory after many threads are terminated and not reused.
+	 */
+	return metadata_thp_enabled();
+}
+
 void
 cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos,
     size_t *size, size_t *alignment) {
@@ -31,10 +42,11 @@ cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos,
 	}
 
 	/*
-	 * Align to at least PAGE, to minimize the # of TLBs needed by the
-	 * smaller sizes; also helps if the larger sizes don't get used at all.
+	 * When not using THP, align to at least PAGE, to minimize the # of TLBs
+	 * needed by the smaller sizes; also helps if the larger sizes don't get
+	 * used at all.
 	 */
-	*alignment = PAGE;
+	*alignment = cache_bin_stack_use_thp() ? QUANTUM : PAGE;
 }
 
 void
diff --git a/src/tcache.c b/src/tcache.c
index ae68c08b..2c0a7e2e 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -2,6 +2,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/base.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/safety_check.h"
 #include "jemalloc/internal/san.h"
@@ -814,10 +815,17 @@ tsd_tcache_data_init(tsd_t *tsd, arena_t *arena) {
 	tcache_bin_info_compute(tcache_bin_info, tcache_nhbins);
 	cache_bin_info_compute_alloc(tcache_bin_info, tcache_nhbins,
 	    &size, &alignment);
-	size = sz_sa2u(size, alignment);
 
-	void *mem = ipallocztm(tsd_tsdn(tsd), size, alignment, true, NULL,
-	    true, arena_get(TSDN_NULL, 0, true));
+	void *mem;
+	if (cache_bin_stack_use_thp()) {
+		/* Alignment is ignored since it comes from THP. */
+		assert(alignment == QUANTUM);
+		mem = b0_alloc_tcache_stack(tsd_tsdn(tsd), size);
+	} else {
+		size = sz_sa2u(size, alignment);
+		mem = ipallocztm(tsd_tsdn(tsd), size, alignment, true, NULL,
+		    true, arena_get(TSDN_NULL, 0, true));
+	}
 	if (mem == NULL) {
 		return true;
 	}
@@ -925,8 +933,12 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
 		cache_bin_t *cache_bin = &tcache->bins[0];
 		cache_bin_assert_empty(cache_bin, &cache_bin->bin_info);
 	}
-	idalloctm(tsd_tsdn(tsd), tcache_slow->dyn_alloc, NULL, NULL, true,
-	    true);
+	if (tsd_tcache && cache_bin_stack_use_thp()) {
+		b0_dalloc_tcache_stack(tsd_tsdn(tsd), tcache_slow->dyn_alloc);
+	} else {
+		idalloctm(tsd_tsdn(tsd), tcache_slow->dyn_alloc, NULL, NULL,
+		    true, true);
+	}
 
 	/*
 	 * The deallocation and tcache flush above may not trigger decay since
diff --git a/test/unit/tcache_max.c b/test/unit/tcache_max.c
index 0a563c2f..6481504e 100644
--- a/test/unit/tcache_max.c
+++ b/test/unit/tcache_max.c
@@ -215,6 +215,29 @@ tcache_max2nhbins(size_t tcache_max) {
 	return sz_size2index(tcache_max) + 1;
 }
 
+static void
+validate_tcache_stack(tcache_t *tcache) {
+	/* Assume bins[0] is enabled. */
+	void *tcache_stack = tcache->bins[0].stack_head;
+	bool expect_found = cache_bin_stack_use_thp() ? true : false;
+
+	/* Walk through all blocks to see if the stack is within range. */
+	base_t *base = b0get();
+	base_block_t *next = base->blocks;
+	bool found = false;
+	do {
+		base_block_t *block = next;
+		if ((byte_t *)tcache_stack >= (byte_t *)block &&
+		    (byte_t *)tcache_stack < ((byte_t *)block + block->size)) {
+			found = true;
+			break;
+		}
+		next = block->next;
+	} while (next != NULL);
+
+	expect_true(found == expect_found, "Unexpected tcache stack source");
+}
+
 static void *
 tcache_check(void *arg) {
 	size_t old_tcache_max, new_tcache_max, min_tcache_max, sz;
@@ -235,6 +258,7 @@ tcache_check(void *arg) {
 	tcache_nhbins = tcache_nhbins_get(tcache);
 	expect_zu_eq(tcache_nhbins, (size_t)global_do_not_change_nhbins,
 	    "Unexpected default value for tcache_nhbins");
+	validate_tcache_stack(tcache);
 
 	/*
 	 * Close the tcache and test the set.
@@ -280,6 +304,7 @@ tcache_check(void *arg) {
 	    "Unexpected.mallctl().failure");
 	expect_zu_eq(old_tcache_max, min_tcache_max,
 	    "Unexpected value for tcache_max");
+	validate_tcache_stack(tcache);
 
 	/*
 	 * Check the thread's tcache_max and nhbins both through mallctl
@@ -303,6 +328,7 @@ tcache_check(void *arg) {
 			test_tcache_max_impl(new_tcache_max,
 			    alloc_option, dalloc_option);
 		}
+		validate_tcache_stack(tcache);
 	}
 
 	return NULL;

From 7a9e4c9073c9a06fa43130ecfd4790740327d415 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@fb.com>
Date: Mon, 2 Oct 2023 22:48:22 -0700
Subject: [PATCH 2347/2608] Mark jemalloc.h as system header to resolve header
 conflicts.

---
 include/jemalloc/jemalloc.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/jemalloc/jemalloc.sh b/include/jemalloc/jemalloc.sh
index b19b1548..dacd6195 100755
--- a/include/jemalloc/jemalloc.sh
+++ b/include/jemalloc/jemalloc.sh
@@ -5,6 +5,7 @@ objroot=$1
 cat <<EOF
 #ifndef JEMALLOC_H_
 #define JEMALLOC_H_
+#pragma GCC system_header
 #ifdef __cplusplus
 extern "C" {
 #endif

From 005f20aa7fdef1be6f9fe46e4f2e7b88177a9f21 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 27 Sep 2023 09:55:12 -0700
Subject: [PATCH 2348/2608] Fix comments about malloc_conf to enable logging.

---
 include/jemalloc/internal/log.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/log.h b/include/jemalloc/internal/log.h
index 921985c8..7b074abd 100644
--- a/include/jemalloc/internal/log.h
+++ b/include/jemalloc/internal/log.h
@@ -27,9 +27,9 @@
  * log("extent.a", "log msg for extent.a"); // 5
  * log("extent.b", "log msg for extent.b"); // 6
  *
- * And your malloc_conf option is "log=arena.a|extent", then lines 2, 4, 5, and
+ * And your malloc_conf option is "log:arena.a|extent", then lines 2, 4, 5, and
  * 6 will print at runtime.  You can enable logging from all log vars by
- * writing "log=.".
+ * writing "log:.".
  *
  * None of this should be regarded as a stable API for right now.  It's intended
  * as a debugging interface, to let us keep around some of our printf-debugging

From 36becb1302552c24b7bd59d8f00598e10a2411ea Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Tue, 10 Oct 2023 09:46:23 -0700
Subject: [PATCH 2349/2608] metadata usage breakdowns: tracking edata and rtree
 usages

---
 include/jemalloc/internal/arena_stats.h |  2 ++
 include/jemalloc/internal/base.h        |  6 +++-
 include/jemalloc/internal/ctl.h         |  2 ++
 src/arena.c                             |  8 +++--
 src/base.c                              | 39 +++++++++++++++++++++----
 src/ctl.c                               | 24 +++++++++++++++
 src/rtree.c                             |  8 ++---
 src/stats.c                             | 22 ++++++++++----
 test/unit/base.c                        | 22 +++++++-------
 test/unit/stats.c                       | 13 ++++++++-
 10 files changed, 116 insertions(+), 30 deletions(-)

diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 3407b023..3d512630 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -52,6 +52,8 @@ struct arena_stats_s {
 	 * in pa_shard_stats_t.
 	 */
 	size_t			base; /* Derived. */
+	size_t			metadata_edata; /* Derived. */
+	size_t			metadata_rtree; /* Derived. */
 	size_t			resident; /* Derived. */
 	size_t			metadata_thp; /* Derived. */
 	size_t			mapped; /* Derived. */
diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h
index 451be10f..86b0cf4a 100644
--- a/include/jemalloc/internal/base.h
+++ b/include/jemalloc/internal/base.h
@@ -78,6 +78,8 @@ struct base_s {
 
 	/* Stats, only maintained if config_stats. */
 	size_t allocated;
+	size_t edata_allocated;
+	size_t rtree_allocated;
 	size_t resident;
 	size_t mapped;
 	/* Number of THP regions touched. */
@@ -104,10 +106,12 @@ extent_hooks_t *base_extent_hooks_set(base_t *base,
     extent_hooks_t *extent_hooks);
 void *base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment);
 edata_t *base_alloc_edata(tsdn_t *tsdn, base_t *base);
+void *base_alloc_rtree(tsdn_t *tsdn, base_t *base, size_t size);
 void *b0_alloc_tcache_stack(tsdn_t *tsdn, size_t size);
 void b0_dalloc_tcache_stack(tsdn_t *tsdn, void *tcache_stack);
 void base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated,
-    size_t *resident, size_t *mapped, size_t *n_thp);
+    size_t *edata_allocated, size_t *rtree_allocated, size_t *resident,
+    size_t *mapped, size_t *n_thp);
 void base_prefork(tsdn_t *tsdn, base_t *base);
 void base_postfork_parent(tsdn_t *tsdn, base_t *base);
 void base_postfork_child(tsdn_t *tsdn, base_t *base);
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index 1d3e6140..f38236f6 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -57,6 +57,8 @@ typedef struct ctl_stats_s {
 	size_t allocated;
 	size_t active;
 	size_t metadata;
+	size_t metadata_edata;
+	size_t metadata_rtree;
 	size_t metadata_thp;
 	size_t resident;
 	size_t mapped;
diff --git a/src/arena.c b/src/arena.c
index e7fa0971..d937c349 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -92,8 +92,10 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	arena_basic_stats_merge(tsdn, arena, nthreads, dss, dirty_decay_ms,
 	    muzzy_decay_ms, nactive, ndirty, nmuzzy);
 
-	size_t base_allocated, base_resident, base_mapped, metadata_thp;
-	base_stats_get(tsdn, arena->base, &base_allocated, &base_resident,
+	size_t base_allocated, base_edata_allocated, base_rtree_allocated,
+	    base_resident, base_mapped, metadata_thp;
+	base_stats_get(tsdn, arena->base, &base_allocated,
+	    &base_edata_allocated, &base_rtree_allocated, &base_resident,
 	    &base_mapped, &metadata_thp);
 	size_t pac_mapped_sz = pac_mapped(&arena->pa_shard.pac);
 	astats->mapped += base_mapped + pac_mapped_sz;
@@ -102,6 +104,8 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 
 	astats->base += base_allocated;
+	astats->metadata_edata += base_edata_allocated;
+	astats->metadata_rtree += base_rtree_allocated;
 	atomic_load_add_store_zu(&astats->internal, arena_internal_get(arena));
 	astats->metadata_thp += metadata_thp;
 
diff --git a/src/base.c b/src/base.c
index e1dfe604..1d5e8fcd 100644
--- a/src/base.c
+++ b/src/base.c
@@ -430,6 +430,8 @@ base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks,
 	edata_avail_new(&base->edata_avail);
 
 	if (config_stats) {
+		base->edata_allocated = 0;
+		base->rtree_allocated = 0;
 		base->allocated = sizeof(base_block_t);
 		base->resident = PAGE_CEILING(sizeof(base_block_t));
 		base->mapped = block->size;
@@ -482,7 +484,7 @@ base_extent_hooks_set(base_t *base, extent_hooks_t *extent_hooks) {
 
 static void *
 base_alloc_impl(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment,
-    size_t *esn) {
+    size_t *esn, size_t *ret_usize) {
 	alignment = QUANTUM_CEILING(alignment);
 	size_t usize = ALIGNMENT_CEILING(size, alignment);
 	size_t asize = usize + alignment - QUANTUM;
@@ -510,6 +512,9 @@ base_alloc_impl(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment,
 	if (esn != NULL) {
 		*esn = (size_t)edata_sn_get(edata);
 	}
+	if (ret_usize != NULL) {
+		*ret_usize = usize;
+	}
 label_return:
 	malloc_mutex_unlock(tsdn, &base->mtx);
 	return ret;
@@ -525,21 +530,38 @@ label_return:
  */
 void *
 base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
-	return base_alloc_impl(tsdn, base, size, alignment, NULL);
+	return base_alloc_impl(tsdn, base, size, alignment, NULL, NULL);
 }
 
 edata_t *
 base_alloc_edata(tsdn_t *tsdn, base_t *base) {
-	size_t esn;
+	size_t esn, usize;
 	edata_t *edata = base_alloc_impl(tsdn, base, sizeof(edata_t),
-	    EDATA_ALIGNMENT, &esn);
+	    EDATA_ALIGNMENT, &esn, &usize);
 	if (edata == NULL) {
 		return NULL;
 	}
+	if (config_stats) {
+		base->edata_allocated += usize;
+	}
 	edata_esn_set(edata, esn);
 	return edata;
 }
 
+void *
+base_alloc_rtree(tsdn_t *tsdn, base_t *base, size_t size) {
+	size_t usize;
+	void *rtree = base_alloc_impl(tsdn, base, size, CACHELINE, NULL,
+	    &usize);
+	if (rtree == NULL) {
+		return NULL;
+	}
+	if (config_stats) {
+		base->rtree_allocated += usize;
+	}
+	return rtree;
+}
+
 static inline void
 b0_alloc_header_size(size_t *header_size, size_t *alignment) {
 	*alignment = QUANTUM;
@@ -573,7 +595,8 @@ b0_alloc_tcache_stack(tsdn_t *tsdn, size_t stack_size) {
 	b0_alloc_header_size(&header_size, &alignment);
 
 	size_t alloc_size = sz_s2u(stack_size + header_size);
-	void *addr = base_alloc_impl(tsdn, base, alloc_size, alignment, &esn);
+	void *addr = base_alloc_impl(tsdn, base, alloc_size, alignment, &esn,
+	    NULL);
 	if (addr == NULL) {
 		edata_avail_insert(&base->edata_avail, edata);
 		return NULL;
@@ -609,14 +632,18 @@ b0_dalloc_tcache_stack(tsdn_t *tsdn, void *tcache_stack) {
 }
 
 void
-base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, size_t *resident,
+base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated,
+    size_t *edata_allocated, size_t *rtree_allocated, size_t *resident,
     size_t *mapped, size_t *n_thp) {
 	cassert(config_stats);
 
 	malloc_mutex_lock(tsdn, &base->mtx);
 	assert(base->allocated <= base->resident);
 	assert(base->resident <= base->mapped);
+	assert(base->edata_allocated + base->rtree_allocated <= base->allocated);
 	*allocated = base->allocated;
+	*edata_allocated = base->edata_allocated;
+	*rtree_allocated = base->rtree_allocated;
 	*resident = base->resident;
 	*mapped = base->mapped;
 	*n_thp = base->n_thp;
diff --git a/src/ctl.c b/src/ctl.c
index 5697539a..b0277c0a 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -294,6 +294,8 @@ CTL_PROTO(stats_arenas_i_muzzy_nmadvise)
 CTL_PROTO(stats_arenas_i_muzzy_purged)
 CTL_PROTO(stats_arenas_i_base)
 CTL_PROTO(stats_arenas_i_internal)
+CTL_PROTO(stats_arenas_i_metadata_edata)
+CTL_PROTO(stats_arenas_i_metadata_rtree)
 CTL_PROTO(stats_arenas_i_metadata_thp)
 CTL_PROTO(stats_arenas_i_tcache_bytes)
 CTL_PROTO(stats_arenas_i_tcache_stashed_bytes)
@@ -307,6 +309,8 @@ CTL_PROTO(stats_background_thread_num_threads)
 CTL_PROTO(stats_background_thread_num_runs)
 CTL_PROTO(stats_background_thread_run_interval)
 CTL_PROTO(stats_metadata)
+CTL_PROTO(stats_metadata_edata)
+CTL_PROTO(stats_metadata_rtree)
 CTL_PROTO(stats_metadata_thp)
 CTL_PROTO(stats_resident)
 CTL_PROTO(stats_mapped)
@@ -801,6 +805,8 @@ static const ctl_named_node_t stats_arenas_i_node[] = {
 	{NAME("muzzy_purged"),	CTL(stats_arenas_i_muzzy_purged)},
 	{NAME("base"),		CTL(stats_arenas_i_base)},
 	{NAME("internal"),	CTL(stats_arenas_i_internal)},
+	{NAME("metadata_edata"),	CTL(stats_arenas_i_metadata_edata)},
+	{NAME("metadata_rtree"),	CTL(stats_arenas_i_metadata_rtree)},
 	{NAME("metadata_thp"),	CTL(stats_arenas_i_metadata_thp)},
 	{NAME("tcache_bytes"),	CTL(stats_arenas_i_tcache_bytes)},
 	{NAME("tcache_stashed_bytes"),
@@ -846,6 +852,8 @@ static const ctl_named_node_t stats_node[] = {
 	{NAME("allocated"),	CTL(stats_allocated)},
 	{NAME("active"),	CTL(stats_active)},
 	{NAME("metadata"),	CTL(stats_metadata)},
+	{NAME("metadata_edata"),	CTL(stats_metadata_edata)},
+	{NAME("metadata_rtree"),	CTL(stats_metadata_rtree)},
 	{NAME("metadata_thp"),	CTL(stats_metadata_thp)},
 	{NAME("resident"),	CTL(stats_resident)},
 	{NAME("mapped"),	CTL(stats_mapped)},
@@ -1138,6 +1146,10 @@ MUTEX_PROF_ARENA_MUTEXES
 #undef OP
 		if (!destroyed) {
 			sdstats->astats.base += astats->astats.base;
+			sdstats->astats.metadata_edata += astats->astats
+			    .metadata_edata;
+			sdstats->astats.metadata_rtree += astats->astats
+			    .metadata_rtree;
 			sdstats->astats.resident += astats->astats.resident;
 			sdstats->astats.metadata_thp += astats->astats.metadata_thp;
 			ctl_accum_atomic_zu(&sdstats->astats.internal,
@@ -1341,6 +1353,10 @@ ctl_refresh(tsdn_t *tsdn) {
 		ctl_stats->metadata = ctl_sarena->astats->astats.base +
 		    atomic_load_zu(&ctl_sarena->astats->astats.internal,
 			ATOMIC_RELAXED);
+		ctl_stats->metadata_edata = ctl_sarena->astats->astats
+		    .metadata_edata;
+		ctl_stats->metadata_rtree = ctl_sarena->astats->astats
+		    .metadata_rtree;
 		ctl_stats->resident = ctl_sarena->astats->astats.resident;
 		ctl_stats->metadata_thp =
 		    ctl_sarena->astats->astats.metadata_thp;
@@ -3599,6 +3615,10 @@ label_return:
 CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats->allocated, size_t)
 CTL_RO_CGEN(config_stats, stats_active, ctl_stats->active, size_t)
 CTL_RO_CGEN(config_stats, stats_metadata, ctl_stats->metadata, size_t)
+CTL_RO_CGEN(config_stats, stats_metadata_edata, ctl_stats->metadata_edata,
+    size_t)
+CTL_RO_CGEN(config_stats, stats_metadata_rtree, ctl_stats->metadata_rtree,
+    size_t)
 CTL_RO_CGEN(config_stats, stats_metadata_thp, ctl_stats->metadata_thp, size_t)
 CTL_RO_CGEN(config_stats, stats_resident, ctl_stats->resident, size_t)
 CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats->mapped, size_t)
@@ -3664,6 +3684,10 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_base,
 CTL_RO_CGEN(config_stats, stats_arenas_i_internal,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.internal, ATOMIC_RELAXED),
     size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_metadata_edata,
+    arenas_i(mib[2])->astats->astats.metadata_edata, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_metadata_rtree,
+    arenas_i(mib[2])->astats->astats.metadata_rtree, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_metadata_thp,
     arenas_i(mib[2])->astats->astats.metadata_thp, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_tcache_bytes,
diff --git a/src/rtree.c b/src/rtree.c
index 6496b5af..b6ac04b7 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -29,14 +29,14 @@ rtree_new(rtree_t *rtree, base_t *base, bool zeroed) {
 
 static rtree_node_elm_t *
 rtree_node_alloc(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
-	return (rtree_node_elm_t *)base_alloc(tsdn, rtree->base,
-	    nelms * sizeof(rtree_node_elm_t), CACHELINE);
+	return (rtree_node_elm_t *)base_alloc_rtree(tsdn, rtree->base,
+	    nelms * sizeof(rtree_node_elm_t));
 }
 
 static rtree_leaf_elm_t *
 rtree_leaf_alloc(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
-	return (rtree_leaf_elm_t *)base_alloc(tsdn, rtree->base,
-	    nelms * sizeof(rtree_leaf_elm_t), CACHELINE);
+	return (rtree_leaf_elm_t *)base_alloc_rtree(tsdn, rtree->base,
+	    nelms * sizeof(rtree_leaf_elm_t));
 }
 
 static rtree_node_elm_t *
diff --git a/src/stats.c b/src/stats.c
index d80af226..c580b49e 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1052,7 +1052,8 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	const char *dss;
 	ssize_t dirty_decay_ms, muzzy_decay_ms;
 	size_t page, pactive, pdirty, pmuzzy, mapped, retained;
-	size_t base, internal, resident, metadata_thp, extent_avail;
+	size_t base, internal, resident, metadata_edata, metadata_rtree,
+	    metadata_thp, extent_avail;
 	uint64_t dirty_npurge, dirty_nmadvise, dirty_purged;
 	uint64_t muzzy_npurge, muzzy_nmadvise, muzzy_purged;
 	size_t small_allocated;
@@ -1352,6 +1353,8 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	GET_AND_EMIT_MEM_STAT(retained)
 	GET_AND_EMIT_MEM_STAT(base)
 	GET_AND_EMIT_MEM_STAT(internal)
+	GET_AND_EMIT_MEM_STAT(metadata_edata)
+	GET_AND_EMIT_MEM_STAT(metadata_rtree)
 	GET_AND_EMIT_MEM_STAT(metadata_thp)
 	GET_AND_EMIT_MEM_STAT(tcache_bytes)
 	GET_AND_EMIT_MEM_STAT(tcache_stashed_bytes)
@@ -1696,8 +1699,8 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 	 * These should be deleted.  We keep them around for a while, to aid in
 	 * the transition to the emitter code.
 	 */
-	size_t allocated, active, metadata, metadata_thp, resident, mapped,
-	    retained;
+	size_t allocated, active, metadata, metadata_edata, metadata_rtree,
+	    metadata_thp, resident, mapped, retained;
 	size_t num_background_threads;
 	size_t zero_reallocs;
 	uint64_t background_thread_num_runs, background_thread_run_interval;
@@ -1705,6 +1708,8 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 	CTL_GET("stats.allocated", &allocated, size_t);
 	CTL_GET("stats.active", &active, size_t);
 	CTL_GET("stats.metadata", &metadata, size_t);
+	CTL_GET("stats.metadata_edata", &metadata_edata, size_t);
+	CTL_GET("stats.metadata_rtree", &metadata_rtree, size_t);
 	CTL_GET("stats.metadata_thp", &metadata_thp, size_t);
 	CTL_GET("stats.resident", &resident, size_t);
 	CTL_GET("stats.mapped", &mapped, size_t);
@@ -1730,6 +1735,10 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 	emitter_json_kv(emitter, "allocated", emitter_type_size, &allocated);
 	emitter_json_kv(emitter, "active", emitter_type_size, &active);
 	emitter_json_kv(emitter, "metadata", emitter_type_size, &metadata);
+	emitter_json_kv(emitter, "metadata_edata", emitter_type_size,
+	    &metadata_edata);
+	emitter_json_kv(emitter, "metadata_rtree", emitter_type_size,
+	    &metadata_rtree);
 	emitter_json_kv(emitter, "metadata_thp", emitter_type_size,
 	    &metadata_thp);
 	emitter_json_kv(emitter, "resident", emitter_type_size, &resident);
@@ -1739,9 +1748,10 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 	    &zero_reallocs);
 
 	emitter_table_printf(emitter, "Allocated: %zu, active: %zu, "
-	    "metadata: %zu (n_thp %zu), resident: %zu, mapped: %zu, "
-	    "retained: %zu\n", allocated, active, metadata, metadata_thp,
-	    resident, mapped, retained);
+	    "metadata: %zu (n_thp %zu, edata %zu, rtree %zu), resident: %zu, "
+	    "mapped: %zu, retained: %zu\n", allocated, active, metadata,
+		metadata_thp, metadata_edata, metadata_rtree, resident, mapped,
+	    retained);
 
 	/* Strange behaviors */
 	emitter_table_printf(emitter,
diff --git a/test/unit/base.c b/test/unit/base.c
index 15e04a8c..3e46626e 100644
--- a/test/unit/base.c
+++ b/test/unit/base.c
@@ -28,7 +28,8 @@ static extent_hooks_t hooks_not_null = {
 
 TEST_BEGIN(test_base_hooks_default) {
 	base_t *base;
-	size_t allocated0, allocated1, resident, mapped, n_thp;
+	size_t allocated0, allocated1, edata_allocated,
+	    rtree_allocated, resident, mapped, n_thp;
 
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 	base = base_new(tsdn, 0,
@@ -36,8 +37,8 @@ TEST_BEGIN(test_base_hooks_default) {
 	    /* metadata_use_hooks */ true);
 
 	if (config_stats) {
-		base_stats_get(tsdn, base, &allocated0, &resident, &mapped,
-		    &n_thp);
+		base_stats_get(tsdn, base, &allocated0, &edata_allocated,
+		    &rtree_allocated, &resident, &mapped, &n_thp);
 		expect_zu_ge(allocated0, sizeof(base_t),
 		    "Base header should count as allocated");
 		if (opt_metadata_thp == metadata_thp_always) {
@@ -50,8 +51,8 @@ TEST_BEGIN(test_base_hooks_default) {
 	    "Unexpected base_alloc() failure");
 
 	if (config_stats) {
-		base_stats_get(tsdn, base, &allocated1, &resident, &mapped,
-		    &n_thp);
+		base_stats_get(tsdn, base, &allocated1, &edata_allocated,
+		    &rtree_allocated, &resident, &mapped, &n_thp);
 		expect_zu_ge(allocated1 - allocated0, 42,
 		    "At least 42 bytes were allocated by base_alloc()");
 	}
@@ -63,7 +64,8 @@ TEST_END
 TEST_BEGIN(test_base_hooks_null) {
 	extent_hooks_t hooks_orig;
 	base_t *base;
-	size_t allocated0, allocated1, resident, mapped, n_thp;
+	size_t allocated0, allocated1, edata_allocated,
+	    rtree_allocated, resident, mapped, n_thp;
 
 	extent_hooks_prep();
 	try_dalloc = false;
@@ -79,8 +81,8 @@ TEST_BEGIN(test_base_hooks_null) {
 	expect_ptr_not_null(base, "Unexpected base_new() failure");
 
 	if (config_stats) {
-		base_stats_get(tsdn, base, &allocated0, &resident, &mapped,
-		    &n_thp);
+		base_stats_get(tsdn, base, &allocated0, &edata_allocated,
+		    &rtree_allocated, &resident, &mapped, &n_thp);
 		expect_zu_ge(allocated0, sizeof(base_t),
 		    "Base header should count as allocated");
 		if (opt_metadata_thp == metadata_thp_always) {
@@ -93,8 +95,8 @@ TEST_BEGIN(test_base_hooks_null) {
 	    "Unexpected base_alloc() failure");
 
 	if (config_stats) {
-		base_stats_get(tsdn, base, &allocated1, &resident, &mapped,
-		    &n_thp);
+		base_stats_get(tsdn, base, &allocated1, &edata_allocated,
+		    &rtree_allocated, &resident, &mapped, &n_thp);
 		expect_zu_ge(allocated1 - allocated0, 42,
 		    "At least 42 bytes were allocated by base_alloc()");
 	}
diff --git a/test/unit/stats.c b/test/unit/stats.c
index bbdbd180..203a71b5 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -4,7 +4,8 @@
 #define STRINGIFY(x) STRINGIFY_HELPER(x)
 
 TEST_BEGIN(test_stats_summary) {
-	size_t sz, allocated, active, resident, mapped;
+	size_t sz, allocated, active, resident, mapped,
+	    metadata, metadata_edata, metadata_rtree;
 	int expected = config_stats ? 0 : ENOENT;
 
 	sz = sizeof(size_t);
@@ -17,6 +18,13 @@ TEST_BEGIN(test_stats_summary) {
 	expect_d_eq(mallctl("stats.mapped", (void *)&mapped, &sz, NULL, 0),
 	    expected, "Unexpected mallctl() result");
 
+	expect_d_eq(mallctl("stats.metadata", (void *)&metadata, &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
+	expect_d_eq(mallctl("stats.metadata_edata", (void *)&metadata_edata,
+	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	expect_d_eq(mallctl("stats.metadata_rtree", (void *)&metadata_rtree,
+	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+
 	if (config_stats) {
 		expect_zu_le(allocated, active,
 		    "allocated should be no larger than active");
@@ -24,6 +32,9 @@ TEST_BEGIN(test_stats_summary) {
 		    "active should be less than resident");
 		expect_zu_lt(active, mapped,
 		    "active should be less than mapped");
+		expect_zu_le(metadata_edata + metadata_rtree, metadata,
+		    "the sum of metadata_edata and metadata_rtree "
+		    "should be no larger than metadata");
 	}
 }
 TEST_END

From 6b197fdd460be8bf3379da91d42e677dd5b5437a Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@fb.com>
Date: Tue, 22 Aug 2023 16:31:54 -0700
Subject: [PATCH 2350/2608] Pre-generate ncached_max for all bins for better
 tcache_max tuning experience.

---
 include/jemalloc/internal/arena_inlines_b.h |   8 +-
 include/jemalloc/internal/cache_bin.h       |  91 +++++---
 include/jemalloc/internal/tcache_externs.h  |   7 +-
 include/jemalloc/internal/tcache_inlines.h  | 144 +++++-------
 include/jemalloc/internal/tcache_structs.h  |   4 +-
 src/arena.c                                 |  22 +-
 src/cache_bin.c                             |  20 +-
 src/ctl.c                                   |   6 +-
 src/jemalloc.c                              |   3 +-
 src/tcache.c                                | 244 +++++++++++++-------
 test/unit/cache_bin.c                       | 132 ++++++-----
 test/unit/tcache_max.c                      |  44 ++--
 12 files changed, 417 insertions(+), 308 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index c4d1c887..a4bacd8b 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -198,7 +198,9 @@ arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
 			assert(sz_can_use_slab(size));
 			return tcache_alloc_small(tsdn_tsd(tsdn), arena,
 			    tcache, size, ind, zero, slow_path);
-		} else if (likely(size <= tcache_max_get(tcache))) {
+		} else if (likely(ind < TCACHE_NBINS_MAX &&
+		    !tcache_bin_disabled(ind, &tcache->bins[ind],
+		    tcache->tcache_slow))) {
 			return tcache_alloc_large(tsdn_tsd(tsdn), arena,
 			    tcache, size, ind, zero, slow_path);
 		}
@@ -298,7 +300,9 @@ JEMALLOC_ALWAYS_INLINE void
 arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
     bool slow_path) {
 	assert (!tsdn_null(tsdn) && tcache != NULL);
-	if (szind < tcache_nhbins_get(tcache)) {
+	if (szind < TCACHE_NBINS_MAX &&
+	    !tcache_bin_disabled(szind, &tcache->bins[szind],
+	    tcache->tcache_slow)) {
 		if (config_prof && unlikely(szind < SC_NBINS)) {
 			arena_dalloc_promoted(tsdn, ptr, tcache, slow_path);
 		} else {
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 78ac3295..2c831caf 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -23,16 +23,20 @@
  */
 typedef uint16_t cache_bin_sz_t;
 
+#define JUNK_ADDR ((uintptr_t)0x7a7a7a7a7a7a7a7aULL)
 /*
  * Leave a noticeable mark pattern on the cache bin stack boundaries, in case a
  * bug starts leaking those.  Make it look like the junk pattern but be distinct
  * from it.
  */
-static const uintptr_t cache_bin_preceding_junk =
-    (uintptr_t)0x7a7a7a7a7a7a7a7aULL;
-/* Note: a7 vs. 7a above -- this tells you which pointer leaked. */
-static const uintptr_t cache_bin_trailing_junk =
-    (uintptr_t)0xa7a7a7a7a7a7a7a7ULL;
+static const uintptr_t cache_bin_preceding_junk = JUNK_ADDR;
+/* Note: JUNK_ADDR vs. JUNK_ADDR + 1 -- this tells you which pointer leaked. */
+static const uintptr_t cache_bin_trailing_junk = JUNK_ADDR + 1;
+/*
+ * A pointer used to initialize a fake stack_head for disabled small bins
+ * so that the enabled/disabled assessment does not rely on ncached_max.
+ */
+extern const uintptr_t disabled_bin;
 
 /*
  * That implies the following value, for the maximum number of items in any
@@ -174,9 +178,35 @@ cache_bin_nonfast_aligned(const void *ptr) {
 	return ((uintptr_t)ptr & san_cache_bin_nonfast_mask) == 0;
 }
 
+static inline const void *
+cache_bin_disabled_bin_stack(void) {
+	return &disabled_bin;
+}
+
+/*
+ * If a cache bin was zero initialized (either because it lives in static or
+ * thread-local storage, or was memset to 0), this function indicates whether or
+ * not cache_bin_init was called on it.
+ */
+static inline bool
+cache_bin_still_zero_initialized(cache_bin_t *bin) {
+	return bin->stack_head == NULL;
+}
+
+static inline bool
+cache_bin_disabled(cache_bin_t *bin) {
+	bool disabled = (bin->stack_head == cache_bin_disabled_bin_stack());
+	if (disabled) {
+		assert((uintptr_t)(*bin->stack_head) == JUNK_ADDR);
+	}
+	return disabled;
+}
+
 /* Returns ncached_max: Upper limit on ncached. */
 static inline cache_bin_sz_t
-cache_bin_info_ncached_max(cache_bin_info_t *info) {
+cache_bin_info_ncached_max_get(cache_bin_t *bin, cache_bin_info_t *info) {
+	assert(!cache_bin_disabled(bin));
+	assert(info == &bin->bin_info);
 	return info->ncached_max;
 }
 
@@ -234,7 +264,7 @@ cache_bin_ncached_get_internal(cache_bin_t *bin) {
 static inline cache_bin_sz_t
 cache_bin_ncached_get_local(cache_bin_t *bin, cache_bin_info_t *info) {
 	cache_bin_sz_t n = cache_bin_ncached_get_internal(bin);
-	assert(n <= cache_bin_info_ncached_max(info));
+	assert(n <= cache_bin_info_ncached_max_get(bin, info));
 	return n;
 }
 
@@ -271,7 +301,7 @@ cache_bin_empty_position_get(cache_bin_t *bin) {
 static inline uint16_t
 cache_bin_low_bits_low_bound_get(cache_bin_t *bin, cache_bin_info_t *info) {
 	return (uint16_t)bin->low_bits_empty -
-	    info->ncached_max * sizeof(void *);
+	    cache_bin_info_ncached_max_get(bin, info) * sizeof(void *);
 }
 
 /*
@@ -281,7 +311,7 @@ cache_bin_low_bits_low_bound_get(cache_bin_t *bin, cache_bin_info_t *info) {
  */
 static inline void **
 cache_bin_low_bound_get(cache_bin_t *bin, cache_bin_info_t *info) {
-	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(info);
+	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max_get(bin, info);
 	void **ret = cache_bin_empty_position_get(bin) - ncached_max;
 	assert(ret <= bin->stack_head);
 
@@ -313,7 +343,7 @@ cache_bin_low_water_get_internal(cache_bin_t *bin) {
 static inline cache_bin_sz_t
 cache_bin_low_water_get(cache_bin_t *bin, cache_bin_info_t *info) {
 	cache_bin_sz_t low_water = cache_bin_low_water_get_internal(bin);
-	assert(low_water <= cache_bin_info_ncached_max(info));
+	assert(low_water <= cache_bin_info_ncached_max_get(bin, info));
 	assert(low_water <= cache_bin_ncached_get_local(bin, info));
 
 	cache_bin_assert_earlier(bin, (uint16_t)(uintptr_t)bin->stack_head,
@@ -328,11 +358,13 @@ cache_bin_low_water_get(cache_bin_t *bin, cache_bin_info_t *info) {
  */
 static inline void
 cache_bin_low_water_set(cache_bin_t *bin) {
+	assert(!cache_bin_disabled(bin));
 	bin->low_bits_low_water = (uint16_t)(uintptr_t)bin->stack_head;
 }
 
 static inline void
 cache_bin_low_water_adjust(cache_bin_t *bin) {
+	assert(!cache_bin_disabled(bin));
 	if (cache_bin_ncached_get_internal(bin)
 	    < cache_bin_low_water_get_internal(bin)) {
 		cache_bin_low_water_set(bin);
@@ -494,25 +526,26 @@ cache_bin_stash(cache_bin_t *bin, void *ptr) {
 /* Get the number of stashed pointers. */
 JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
 cache_bin_nstashed_get_internal(cache_bin_t *bin, cache_bin_info_t *info) {
-	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max(info);
+	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max_get(bin, info);
 	uint16_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(bin,
 	    info);
 
 	cache_bin_sz_t n = cache_bin_diff(bin, low_bits_low_bound,
 	    bin->low_bits_full) / sizeof(void *);
 	assert(n <= ncached_max);
+	if (config_debug && n != 0) {
+		/* Below are for assertions only. */
+		void **low_bound = cache_bin_low_bound_get(bin, info);
 
-	/* Below are for assertions only. */
-	void **low_bound = cache_bin_low_bound_get(bin, info);
-
-	assert((uint16_t)(uintptr_t)low_bound == low_bits_low_bound);
-	void *stashed = *(low_bound + n - 1);
-	bool aligned = cache_bin_nonfast_aligned(stashed);
+		assert((uint16_t)(uintptr_t)low_bound == low_bits_low_bound);
+		void *stashed = *(low_bound + n - 1);
+		bool aligned = cache_bin_nonfast_aligned(stashed);
 #ifdef JEMALLOC_JET
-	/* Allow arbitrary pointers to be stashed in tests. */
-	aligned = true;
+		/* Allow arbitrary pointers to be stashed in tests. */
+		aligned = true;
 #endif
-	assert(n == 0 || (stashed != NULL && aligned));
+		assert(stashed != NULL && aligned);
+	}
 
 	return n;
 }
@@ -520,7 +553,7 @@ cache_bin_nstashed_get_internal(cache_bin_t *bin, cache_bin_info_t *info) {
 JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
 cache_bin_nstashed_get_local(cache_bin_t *bin, cache_bin_info_t *info) {
 	cache_bin_sz_t n = cache_bin_nstashed_get_internal(bin, info);
-	assert(n <= cache_bin_info_ncached_max(info));
+	assert(n <= cache_bin_info_ncached_max_get(bin, info));
 	return n;
 }
 
@@ -541,8 +574,8 @@ cache_bin_nstashed_get_local(cache_bin_t *bin, cache_bin_info_t *info) {
  * This function should not call other utility functions because the racy
  * condition may cause unexpected / undefined behaviors in unverified utility
  * functions.  Currently, this function calls two utility functions
- * cache_bin_info_ncached_max and cache_bin_low_bits_low_bound_get because they
- * help access values that will not be concurrently modified.
+ * cache_bin_info_ncached_max_get and cache_bin_low_bits_low_bound_get because
+ * they help access values that will not be concurrently modified.
  */
 static inline void
 cache_bin_nitems_get_remote(cache_bin_t *bin, cache_bin_info_t *info,
@@ -552,7 +585,8 @@ cache_bin_nitems_get_remote(cache_bin_t *bin, cache_bin_info_t *info,
 	    (uint16_t)(uintptr_t)bin->stack_head;
 	cache_bin_sz_t n = diff / sizeof(void *);
 
-	assert(n <= cache_bin_info_ncached_max(info));
+	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max_get(bin, info);
+	assert(n <= ncached_max);
 	*ncached = n;
 
 	/* Racy version of cache_bin_nstashed_get_internal. */
@@ -560,7 +594,7 @@ cache_bin_nitems_get_remote(cache_bin_t *bin, cache_bin_info_t *info,
 	    info);
 	n = (bin->low_bits_full - low_bits_low_bound) / sizeof(void *);
 
-	assert(n <= cache_bin_info_ncached_max(info));
+	assert(n <= ncached_max);
 	*nstashed = n;
 	/* Note that cannot assert ncached + nstashed <= ncached_max (racy). */
 }
@@ -697,13 +731,8 @@ void cache_bin_preincrement(cache_bin_info_t *infos, szind_t ninfos,
 void cache_bin_postincrement(void *alloc, size_t *cur_offset);
 void cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
     size_t *cur_offset);
+void cache_bin_init_disabled(cache_bin_t *bin, cache_bin_sz_t ncached_max);
 
-/*
- * If a cache bin was zero initialized (either because it lives in static or
- * thread-local storage, or was memset to 0), this function indicates whether or
- * not cache_bin_init was called on it.
- */
-bool cache_bin_still_zero_initialized(cache_bin_t *bin);
 bool cache_bin_stack_use_thp(void);
 
 #endif /* JEMALLOC_INTERNAL_CACHE_BIN_H */
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 87d243a1..8ca966d7 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -24,9 +24,9 @@ extern unsigned opt_lg_tcache_flush_large_div;
  * large-object bins.  This is only used during threads initialization and
  * changing it will not reflect on initialized threads as expected.  Thus,
  * it should not be changed on the fly.  To change the number of tcache bins
- * in use, refer to tcache_nhbins of each tcache.
+ * in use, refer to tcache_nbins of each tcache.
  */
-extern unsigned	global_do_not_change_nhbins;
+extern unsigned	global_do_not_change_nbins;
 
 /*
  * Maximum cached size class.  Same as above, this is only used during threads
@@ -58,6 +58,7 @@ void tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache,
 void tcache_arena_reassociate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
     tcache_t *tcache, arena_t *arena);
 tcache_t *tcache_create_explicit(tsd_t *tsd);
+void thread_tcache_max_set(tsd_t *tsd, size_t tcache_max);
 void tcache_cleanup(tsd_t *tsd);
 void tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
 bool tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind);
@@ -70,8 +71,8 @@ void tcache_prefork(tsdn_t *tsdn);
 void tcache_postfork_parent(tsdn_t *tsdn);
 void tcache_postfork_child(tsdn_t *tsdn);
 void tcache_flush(tsd_t *tsd);
-bool tsd_tcache_data_init(tsd_t *tsd, arena_t *arena);
 bool tsd_tcache_enabled_data_init(tsd_t *tsd);
+void tcache_enabled_set(tsd_t *tsd, bool enabled);
 
 void tcache_assert_initialized(tcache_t *tcache);
 
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 97501ee2..68481113 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -18,94 +18,72 @@ tcache_enabled_get(tsd_t *tsd) {
 	return tsd_tcache_enabled_get(tsd);
 }
 
-static inline void
-tcache_enabled_set(tsd_t *tsd, bool enabled) {
-	bool was_enabled = tsd_tcache_enabled_get(tsd);
-
-	if (!was_enabled && enabled) {
-		tsd_tcache_data_init(tsd, NULL);
-	} else if (was_enabled && !enabled) {
-		tcache_cleanup(tsd);
-	}
-	/* Commit the state last.  Above calls check current state. */
-	tsd_tcache_enabled_set(tsd, enabled);
-	tsd_slow_update(tsd);
-}
-
 static inline unsigned
-tcache_nhbins_get(tcache_t *tcache) {
-	assert(tcache != NULL);
-	assert(tcache->tcache_nhbins <= TCACHE_NBINS_MAX);
-	return tcache->tcache_nhbins;
+tcache_nbins_get(tcache_slow_t *tcache_slow) {
+	assert(tcache_slow != NULL);
+	unsigned nbins = tcache_slow->tcache_nbins;
+	assert(nbins <= TCACHE_NBINS_MAX);
+	return nbins;
 }
 
 static inline size_t
-tcache_max_get(tcache_t *tcache) {
-	assert(tcache != NULL);
-	assert(tcache->tcache_max <= TCACHE_MAXCLASS_LIMIT);
-	return tcache->tcache_max;
+tcache_max_get(tcache_slow_t *tcache_slow) {
+	assert(tcache_slow != NULL);
+	size_t tcache_max = sz_index2size(tcache_nbins_get(tcache_slow) - 1);
+	assert(tcache_max <= TCACHE_MAXCLASS_LIMIT);
+	return tcache_max;
 }
 
 static inline void
-tcache_max_and_nhbins_set(tcache_t *tcache, size_t tcache_max) {
-	assert(tcache != NULL);
+tcache_max_set(tcache_slow_t *tcache_slow, size_t tcache_max) {
+	assert(tcache_slow != NULL);
 	assert(tcache_max <= TCACHE_MAXCLASS_LIMIT);
-	tcache->tcache_max = tcache_max;
-	tcache->tcache_nhbins = sz_size2index(tcache_max) + 1;
+	tcache_slow->tcache_nbins = sz_size2index(tcache_max) + 1;
 }
 
 static inline void
-thread_tcache_max_and_nhbins_set(tsd_t *tsd, size_t tcache_max) {
-	assert(tcache_max <= TCACHE_MAXCLASS_LIMIT);
-	assert(tcache_max == sz_s2u(tcache_max));
-	tcache_t *tcache = tsd_tcachep_get(tsd);
-	tcache_slow_t *tcache_slow;
-	assert(tcache != NULL);
-
-	bool enabled = tcache_available(tsd);
-	arena_t *assigned_arena;
-	if (enabled) {
-		tcache_slow = tcache_slow_get(tsd);
-		assert(tcache != NULL && tcache_slow != NULL);
-		assigned_arena = tcache_slow->arena;
-		/* Shutdown and reboot the tcache for a clean slate. */
-		tcache_cleanup(tsd);
+tcache_bin_settings_backup(tcache_t *tcache,
+    cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
+	for (unsigned i = 0; i < TCACHE_NBINS_MAX; i++) {
+		cache_bin_info_init(&tcache_bin_info[i],
+		    tcache->bins[i].bin_info.ncached_max);
 	}
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tcache_bin_disabled(szind_t ind, cache_bin_t *bin,
+    tcache_slow_t *tcache_slow) {
+	assert(bin != NULL);
+	bool disabled = cache_bin_disabled(bin);
 
 	/*
-	* Still set tcache_max and tcache_nhbins of the tcache even if
-	* the tcache is not available yet because the values are
-	* stored in tsd_t and are always available for changing.
-	*/
-	tcache_max_and_nhbins_set(tcache, tcache_max);
-
-	if (enabled) {
-		tsd_tcache_data_init(tsd, assigned_arena);
+	 * If a bin's ind >= nbins or ncached_max == 0, it must be disabled.
+	 * However, when ind < nbins, it could be either enabled
+	 * (ncached_max > 0) or disabled (ncached_max == 0). Similarly, when
+	 * ncached_max > 0, it could be either enabled (ind < nbins) or
+	 * disabled (ind >= nbins).  Thus, if a bin is disabled, it has either
+	 * ind >= nbins or ncached_max == 0.  If a bin is enabled, it has
+	 * ind < nbins and ncached_max > 0.
+	 */
+	unsigned nbins = tcache_nbins_get(tcache_slow);
+	cache_bin_sz_t ncached_max = bin->bin_info.ncached_max;
+	if (ind >= nbins) {
+		assert(disabled);
+	} else {
+		assert(!disabled || ncached_max == 0);
+	}
+	if (ncached_max == 0) {
+		assert(disabled);
+	} else {
+		assert(!disabled || ind >= nbins);
+	}
+	if (disabled) {
+		assert(ind >= nbins || ncached_max == 0);
+	} else {
+		assert(ind < nbins && ncached_max > 0);
 	}
 
-	assert(tcache_nhbins_get(tcache) == sz_size2index(tcache_max) + 1);
-}
-
-JEMALLOC_ALWAYS_INLINE bool
-tcache_small_bin_disabled(szind_t ind, cache_bin_t *bin) {
-	assert(ind < SC_NBINS);
-	assert(bin != NULL);
-	bool ret = cache_bin_info_ncached_max(&bin->bin_info) == 0;
-	if (ret) {
-		/* small size class but cache bin disabled. */
-		assert((uintptr_t)(*bin->stack_head) ==
-		    cache_bin_preceding_junk);
-	}
-
-	return ret;
-}
-
-JEMALLOC_ALWAYS_INLINE bool
-tcache_large_bin_disabled(szind_t ind, cache_bin_t *bin) {
-	assert(ind >= SC_NBINS);
-	assert(bin != NULL);
-	return (cache_bin_info_ncached_max(&bin->bin_info) == 0 ||
-	    cache_bin_still_zero_initialized(bin));
+	return disabled;
 }
 
 JEMALLOC_ALWAYS_INLINE void *
@@ -124,7 +102,8 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
 		if (unlikely(arena == NULL)) {
 			return NULL;
 		}
-		if (unlikely(tcache_small_bin_disabled(binind, bin))) {
+		if (unlikely(tcache_bin_disabled(binind, bin,
+		    tcache->tcache_slow))) {
 			/* stats and zero are handled directly by the arena. */
 			return arena_malloc_hard(tsd_tsdn(tsd), arena, size,
 			    binind, zero, /* slab */ true);
@@ -157,8 +136,9 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 	void *ret;
 	bool tcache_success;
 
-	assert(binind >= SC_NBINS && binind < tcache_nhbins_get(tcache));
 	cache_bin_t *bin = &tcache->bins[binind];
+	assert(binind >= SC_NBINS &&
+	    !tcache_bin_disabled(binind, bin, tcache->tcache_slow));
 	ret = cache_bin_alloc(bin, &tcache_success);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
@@ -180,7 +160,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 	} else {
 		if (unlikely(zero)) {
 			size_t usize = sz_index2size(binind);
-			assert(usize <= tcache_max_get(tcache));
+			assert(usize <= tcache_max_get(tcache->tcache_slow));
 			memset(ret, 0, usize);
 		}
 
@@ -214,12 +194,13 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	}
 
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
-		if (unlikely(tcache_small_bin_disabled(binind, bin))) {
+		if (unlikely(tcache_bin_disabled(binind, bin,
+		    tcache->tcache_slow))) {
 			arena_dalloc_small(tsd_tsdn(tsd), ptr);
 			return;
 		}
-		cache_bin_sz_t max = cache_bin_info_ncached_max(
-		    &bin->bin_info);
+		cache_bin_sz_t max = cache_bin_info_ncached_max_get(
+		    bin, &bin->bin_info);
 		unsigned remain = max >> opt_lg_tcache_flush_small_div;
 		tcache_bin_flush_small(tsd, tcache, bin, binind, remain);
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
@@ -232,12 +213,13 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
     bool slow_path) {
 
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr) > SC_SMALL_MAXCLASS);
-	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_max_get(tcache));
+	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <=
+	    tcache_max_get(tcache->tcache_slow));
 
 	cache_bin_t *bin = &tcache->bins[binind];
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
-		unsigned remain = cache_bin_info_ncached_max(
-		    &bin->bin_info) >> opt_lg_tcache_flush_large_div;
+		unsigned remain = cache_bin_info_ncached_max_get(
+		    bin, &bin->bin_info) >> opt_lg_tcache_flush_large_div;
 		tcache_bin_flush_large(tsd, tcache, bin, binind, remain);
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
 		assert(ret);
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index b51e10a7..d94099b0 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -31,6 +31,8 @@ struct tcache_slow_s {
 
 	/* The arena this tcache is associated with. */
 	arena_t		*arena;
+	/* The number of bins activated in the tcache. */
+	unsigned	tcache_nbins;
 	/* Next bin to GC. */
 	szind_t		next_gc_bin;
 	/* For small bins, fill (ncached_max >> lg_fill_div). */
@@ -55,8 +57,6 @@ struct tcache_slow_s {
 
 struct tcache_s {
 	tcache_slow_t	*tcache_slow;
-	unsigned	tcache_nhbins;
-	size_t		tcache_max;
 	cache_bin_t	bins[TCACHE_NBINS_MAX];
 };
 
diff --git a/src/arena.c b/src/arena.c
index d937c349..4a383670 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -163,17 +163,13 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	ql_foreach(descriptor, &arena->cache_bin_array_descriptor_ql, link) {
 		for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
 			cache_bin_t *cache_bin = &descriptor->bins[i];
+			if (cache_bin_disabled(cache_bin)) {
+				continue;
+			}
+
 			cache_bin_sz_t ncached, nstashed;
 			cache_bin_nitems_get_remote(cache_bin,
 			    &cache_bin->bin_info, &ncached, &nstashed);
-
-			if ((i < SC_NBINS &&
-			    tcache_small_bin_disabled(i, cache_bin)) ||
-			    (i >= SC_NBINS &&
-			    tcache_large_bin_disabled(i, cache_bin))) {
-				assert(ncached == 0 && nstashed == 0);
-			}
-
 			astats->tcache_bytes += ncached * sz_index2size(i);
 			astats->tcache_stashed_bytes += nstashed *
 			    sz_index2size(i);
@@ -730,11 +726,13 @@ arena_dalloc_promoted_impl(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 		 */
 		safety_check_verify_redzone(ptr, usize, bumped_usize);
 	}
+	szind_t bumped_ind = sz_size2index(bumped_usize);
 	if (bumped_usize >= SC_LARGE_MINCLASS &&
-	    tcache != NULL &&
-	    bumped_usize <= tcache_max_get(tcache)) {
-		tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr,
-		    sz_size2index(bumped_usize), slow_path);
+	    tcache != NULL && bumped_ind < TCACHE_NBINS_MAX &&
+	    !tcache_bin_disabled(bumped_ind, &tcache->bins[bumped_ind],
+	    tcache->tcache_slow)) {
+		tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr, bumped_ind,
+		    slow_path);
 	} else {
 		large_dalloc(tsdn, edata);
 	}
diff --git a/src/cache_bin.c b/src/cache_bin.c
index 2ad2062d..67b6327b 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -5,10 +5,11 @@
 #include "jemalloc/internal/cache_bin.h"
 #include "jemalloc/internal/safety_check.h"
 
+const uintptr_t disabled_bin = JUNK_ADDR;
+
 void
 cache_bin_info_init(cache_bin_info_t *info,
     cache_bin_sz_t ncached_max) {
-	assert(ncached_max <= CACHE_BIN_NCACHED_MAX);
 	size_t stack_size = (size_t)ncached_max * sizeof(void *);
 	assert(stack_size < ((size_t)1 << (sizeof(cache_bin_sz_t) * 8)));
 	info->ncached_max = (cache_bin_sz_t)ncached_max;
@@ -37,7 +38,6 @@ cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos,
 	 */
 	*size = sizeof(void *) * 2;
 	for (szind_t i = 0; i < ninfos; i++) {
-		assert(infos[i].ncached_max > 0);
 		*size += infos[i].ncached_max * sizeof(void *);
 	}
 
@@ -98,13 +98,21 @@ cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
 	cache_bin_sz_t free_spots = cache_bin_diff(bin,
 	    bin->low_bits_full, (uint16_t)(uintptr_t)bin->stack_head);
 	assert(free_spots == bin_stack_size);
-	assert(cache_bin_ncached_get_local(bin, info) == 0);
+	if (!cache_bin_disabled(bin)) {
+		assert(cache_bin_ncached_get_local(bin, &bin->bin_info) == 0);
+	}
 	assert(cache_bin_empty_position_get(bin) == empty_position);
 
 	assert(bin_stack_size > 0 || empty_position == full_position);
 }
 
-bool
-cache_bin_still_zero_initialized(cache_bin_t *bin) {
-	return bin->stack_head == NULL;
+void
+cache_bin_init_disabled(cache_bin_t *bin, cache_bin_sz_t ncached_max) {
+	const void *fake_stack = cache_bin_disabled_bin_stack();
+	size_t fake_offset = 0;
+	cache_bin_info_t fake_info;
+	cache_bin_info_init(&fake_info, 0);
+	cache_bin_init(bin, &fake_info, (void *)fake_stack, &fake_offset);
+	cache_bin_info_init(&bin->bin_info, ncached_max);
+	assert(fake_offset == 0);
 }
diff --git a/src/ctl.c b/src/ctl.c
index b0277c0a..af22d0aa 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2317,7 +2317,7 @@ thread_tcache_max_ctl(tsd_t *tsd, const size_t *mib,
 	/* pointer to tcache_t always exists even with tcache disabled. */
 	tcache_t *tcache = tsd_tcachep_get(tsd);
 	assert(tcache != NULL);
-	oldval = tcache_max_get(tcache);
+	oldval = tcache_max_get(tcache->tcache_slow);
 	READ(oldval, size_t);
 
 	if (newp != NULL) {
@@ -2332,7 +2332,7 @@ thread_tcache_max_ctl(tsd_t *tsd, const size_t *mib,
 		}
 		new_tcache_max = sz_s2u(new_tcache_max);
 		if(new_tcache_max != oldval) {
-			thread_tcache_max_and_nhbins_set(tsd, new_tcache_max);
+			thread_tcache_max_set(tsd, new_tcache_max);
 		}
 	}
 
@@ -3155,7 +3155,7 @@ CTL_RO_NL_GEN(arenas_quantum, QUANTUM, size_t)
 CTL_RO_NL_GEN(arenas_page, PAGE, size_t)
 CTL_RO_NL_GEN(arenas_tcache_max, global_do_not_change_tcache_maxclass, size_t)
 CTL_RO_NL_GEN(arenas_nbins, SC_NBINS, unsigned)
-CTL_RO_NL_GEN(arenas_nhbins, global_do_not_change_nhbins, unsigned)
+CTL_RO_NL_GEN(arenas_nhbins, global_do_not_change_nbins, unsigned)
 CTL_RO_NL_GEN(arenas_bin_i_size, bin_infos[mib[2]].reg_size, size_t)
 CTL_RO_NL_GEN(arenas_bin_i_nregs, bin_infos[mib[2]].nregs, uint32_t)
 CTL_RO_NL_GEN(arenas_bin_i_slab_size, bin_infos[mib[2]].slab_size, size_t)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 7aa6a1cd..4bf5cbff 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -4140,7 +4140,8 @@ batch_alloc(void **ptrs, size_t num, size_t size, int flags) {
 		tcache_t *tcache = tcache_get_from_ind(tsd, tcache_ind,
 		    /* slow */ true, /* is_alloc */ true);
 		if (likely(tcache != NULL &&
-		    ind < tcache_nhbins_get(tcache)) && progress < batch) {
+		    !tcache_bin_disabled(ind, &tcache->bins[ind],
+		    tcache->tcache_slow)) && progress < batch) {
 			if (bin == NULL) {
 				bin = &tcache->bins[ind];
 			}
diff --git a/src/tcache.c b/src/tcache.c
index 2c0a7e2e..3070193c 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -60,10 +60,10 @@ unsigned opt_lg_tcache_flush_large_div = 1;
 
 /*
  * Number of cache bins enabled, including both large and small.  This value
- * is only used to initialize tcache_nhbins in the per-thread tcache.
+ * is only used to initialize tcache_nbins in the per-thread tcache.
  * Directly modifying it will not affect threads already launched.
  */
-unsigned		global_do_not_change_nhbins;
+unsigned		global_do_not_change_nbins;
 /*
  * Max size class to be cached (can be small or large). This value is only used
  * to initialize tcache_max in the per-thread tcache.   Directly modifying it
@@ -129,6 +129,7 @@ tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	assert(szind < SC_NBINS);
 
 	cache_bin_t *cache_bin = &tcache->bins[szind];
+	assert(!tcache_bin_disabled(szind, cache_bin, tcache->tcache_slow));
 	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
 	    &cache_bin->bin_info);
 	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
@@ -155,7 +156,7 @@ tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	 * Reduce fill count by 2X.  Limit lg_fill_div such that
 	 * the fill count is always at least 1.
 	 */
-	if ((cache_bin_info_ncached_max(&cache_bin->bin_info)
+	if ((cache_bin_info_ncached_max_get(cache_bin, &cache_bin->bin_info)
 	    >> (tcache_slow->lg_fill_div[szind] + 1)) >= 1) {
 		tcache_slow->lg_fill_div[szind]++;
 	}
@@ -167,6 +168,7 @@ tcache_gc_large(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	/* Like the small GC; flush 3/4 of untouched items. */
 	assert(szind >= SC_NBINS);
 	cache_bin_t *cache_bin = &tcache->bins[szind];
+	assert(!tcache_bin_disabled(szind, cache_bin, tcache->tcache_slow));
 	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
 	    &cache_bin->bin_info);
 	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
@@ -187,8 +189,12 @@ tcache_event(tsd_t *tsd) {
 	bool is_small = (szind < SC_NBINS);
 	cache_bin_t *cache_bin = &tcache->bins[szind];
 
-	tcache_bin_flush_stashed(tsd, tcache, cache_bin, szind, is_small);
+	if (tcache_bin_disabled(szind, cache_bin, tcache_slow)) {
+		goto label_done;
+	}
 
+	tcache_bin_flush_stashed(tsd, tcache, cache_bin, szind,
+	    is_small);
 	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
 	    &cache_bin->bin_info);
 	if (low_water > 0) {
@@ -210,8 +216,9 @@ tcache_event(tsd_t *tsd) {
 	}
 	cache_bin_low_water_set(cache_bin);
 
+label_done:
 	tcache_slow->next_gc_bin++;
-	if (tcache_slow->next_gc_bin == tcache_nhbins_get(tcache)) {
+	if (tcache_slow->next_gc_bin == tcache_nbins_get(tcache_slow)) {
 		tcache_slow->next_gc_bin = 0;
 	}
 }
@@ -236,8 +243,9 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
 	void *ret;
 
 	assert(tcache_slow->arena != NULL);
-	unsigned nfill = cache_bin_info_ncached_max(&cache_bin->bin_info)
-	    >> tcache_slow->lg_fill_div[binind];
+	assert(!tcache_bin_disabled(binind, cache_bin, tcache_slow));
+	unsigned nfill = cache_bin_info_ncached_max_get(cache_bin,
+	    &cache_bin->bin_info) >> tcache_slow->lg_fill_div[binind];
 	arena_cache_bin_fill_small(tsdn, arena, cache_bin,
 	    &cache_bin->bin_info, binind, nfill);
 	tcache_slow->bin_refilled[binind] = true;
@@ -321,7 +329,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	if (small) {
 		assert(binind < SC_NBINS);
 	} else {
-		assert(binind < tcache_nhbins_get(tcache));
+		assert(binind < tcache_nbins_get(tcache_slow));
 	}
 	arena_t *tcache_arena = tcache_slow->arena;
 	assert(tcache_arena != NULL);
@@ -508,6 +516,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 JEMALLOC_ALWAYS_INLINE void
 tcache_bin_flush_bottom(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
     szind_t binind, unsigned rem, bool small) {
+	assert(!tcache_bin_disabled(binind, cache_bin, tcache->tcache_slow));
 	tcache_bin_flush_stashed(tsd, tcache, cache_bin, binind, small);
 
 	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
@@ -551,6 +560,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 void
 tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
     szind_t binind, bool is_small) {
+	assert(!tcache_bin_disabled(binind, cache_bin, tcache->tcache_slow));
 	cache_bin_info_t *info = &cache_bin->bin_info;
 	/*
 	 * The two below are for assertion only.  The content of original cached
@@ -562,7 +572,8 @@ tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	    info);
 
 	cache_bin_sz_t nstashed = cache_bin_nstashed_get_local(cache_bin, info);
-	assert(orig_cached + nstashed <= cache_bin_info_ncached_max(info));
+	assert(orig_cached + nstashed <=
+	    cache_bin_info_ncached_max_get(cache_bin, info));
 	if (nstashed == 0) {
 		return;
 	}
@@ -637,33 +648,11 @@ tcache_arena_reassociate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
 }
 
 static void
-tcache_max_and_nhbins_init(tcache_t *tcache) {
-	assert(tcache != NULL);
+tcache_default_settings_init(tcache_slow_t *tcache_slow) {
+	assert(tcache_slow != NULL);
 	assert(global_do_not_change_tcache_maxclass != 0);
-	assert(global_do_not_change_nhbins != 0);
-	tcache->tcache_max = global_do_not_change_tcache_maxclass;
-	tcache->tcache_nhbins = global_do_not_change_nhbins;
-	assert(tcache->tcache_nhbins == sz_size2index(tcache->tcache_max) + 1);
-}
-
-bool
-tsd_tcache_enabled_data_init(tsd_t *tsd) {
-	/* Called upon tsd initialization. */
-	tsd_tcache_enabled_set(tsd, opt_tcache);
-	/*
-	 * tcache is not available yet, but we need to set up its tcache_max
-	 * and tcache_nhbins in advance.
-	 */
-	tcache_t *tcache = tsd_tcachep_get(tsd);
-	tcache_max_and_nhbins_init(tcache);
-	tsd_slow_update(tsd);
-
-	if (opt_tcache) {
-		/* Trigger tcache init. */
-		tsd_tcache_data_init(tsd, NULL);
-	}
-
-	return false;
+	assert(global_do_not_change_nbins != 0);
+	tcache_slow->tcache_nbins = global_do_not_change_nbins;
 }
 
 static void
@@ -679,19 +668,15 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 
 	/*
 	 * We reserve cache bins for all small size classes, even if some may
-	 * not get used (i.e. bins higher than tcache_nhbins).  This allows
+	 * not get used (i.e. bins higher than tcache_nbins).  This allows
 	 * the fast and common paths to access cache bin metadata safely w/o
 	 * worrying about which ones are disabled.
 	 */
-	unsigned tcache_nhbins = tcache_nhbins_get(tcache);
-	unsigned n_reserved_bins = tcache_nhbins < SC_NBINS ? SC_NBINS
-	    : tcache_nhbins;
-	memset(tcache->bins, 0, sizeof(cache_bin_t) * n_reserved_bins);
-
+	unsigned tcache_nbins = tcache_nbins_get(tcache_slow);
 	size_t cur_offset = 0;
-	cache_bin_preincrement(tcache_bin_info, tcache_nhbins, mem,
+	cache_bin_preincrement(tcache_bin_info, tcache_nbins, mem,
 	    &cur_offset);
-	for (unsigned i = 0; i < tcache_nhbins; i++) {
+	for (unsigned i = 0; i < tcache_nbins; i++) {
 		if (i < SC_NBINS) {
 			tcache_slow->lg_fill_div[i] = 1;
 			tcache_slow->bin_refilled[i] = false;
@@ -699,40 +684,40 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 			    = tcache_gc_item_delay_compute(i);
 		}
 		cache_bin_t *cache_bin = &tcache->bins[i];
-		cache_bin_init(cache_bin, &tcache_bin_info[i], mem,
-		    &cur_offset);
+		if (tcache_bin_info[i].ncached_max > 0) {
+			cache_bin_init(cache_bin, &tcache_bin_info[i], mem,
+			    &cur_offset);
+		} else {
+			cache_bin_init_disabled(cache_bin,
+			    tcache_bin_info[i].ncached_max);
+		}
 	}
 	/*
-	 * For small size classes beyond tcache_max(i.e.
-	 * tcache_nhbins< NBINS), their cache bins are initialized to a state
-	 * to safely and efficiently fail all fastpath alloc / free, so that
-	 * no additional check around tcache_nhbins is needed on fastpath.
+	 * Initialize all disabled bins to a state that can safely and
+	 * efficiently fail all fastpath alloc / free, so that no additional
+	 * check around tcache_nbins is needed on fastpath.  Yet we still
+	 * store the ncached_max in the bin_info for future usage.
 	 */
-	for (unsigned i = tcache_nhbins; i < SC_NBINS; i++) {
-		/* Disabled small bins. */
+	for (unsigned i = tcache_nbins; i < TCACHE_NBINS_MAX; i++) {
 		cache_bin_t *cache_bin = &tcache->bins[i];
-		void *fake_stack = mem;
-		size_t fake_offset = 0;
-
-		cache_bin_init(cache_bin, &tcache_bin_info[i], fake_stack,
-		    &fake_offset);
-		assert(tcache_small_bin_disabled(i, cache_bin));
+		cache_bin_init_disabled(cache_bin,
+		    tcache_bin_info[i].ncached_max);
+		assert(tcache_bin_disabled(i, cache_bin, tcache->tcache_slow));
 	}
 
 	cache_bin_postincrement(mem, &cur_offset);
 	if (config_debug) {
 		/* Sanity check that the whole stack is used. */
 		size_t size, alignment;
-		cache_bin_info_compute_alloc(tcache_bin_info, tcache_nhbins,
+		cache_bin_info_compute_alloc(tcache_bin_info, tcache_nbins,
 		    &size, &alignment);
 		assert(cur_offset == size);
 	}
 }
 
 static inline unsigned
-tcache_ncached_max_compute(szind_t szind, unsigned current_nhbins) {
+tcache_ncached_max_compute(szind_t szind) {
 	if (szind >= SC_NBINS) {
-		assert(szind < current_nhbins);
 		return opt_tcache_nslots_large;
 	}
 	unsigned slab_nregs = bin_infos[szind].nregs;
@@ -788,32 +773,28 @@ tcache_ncached_max_compute(szind_t szind, unsigned current_nhbins) {
 }
 
 static void
-tcache_bin_info_compute(cache_bin_info_t *tcache_bin_info,
-    unsigned tcache_nhbins) {
-	for (szind_t i = 0; i < tcache_nhbins; i++) {
-		unsigned ncached_max = tcache_ncached_max_compute(i,
-		    tcache_nhbins);
+tcache_bin_info_compute(cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
+	/*
+	 * Compute the values for each bin, but for bins with indices larger
+	 * than tcache_nbins, no items will be cached.
+	 */
+	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
+		unsigned ncached_max = tcache_ncached_max_compute(i);
+		assert(ncached_max <= CACHE_BIN_NCACHED_MAX);
 		cache_bin_info_init(&tcache_bin_info[i], ncached_max);
 	}
-	for (szind_t i = tcache_nhbins; i < SC_NBINS; i++) {
-		/* Disabled small bins. */
-		cache_bin_info_init(&tcache_bin_info[i], 0);
-	}
 }
 
-/* Initialize auto tcache (embedded in TSD). */
-bool
-tsd_tcache_data_init(tsd_t *tsd, arena_t *arena) {
+static bool
+tsd_tcache_data_init_impl(tsd_t *tsd, arena_t *arena,
+    cache_bin_info_t *tcache_bin_info) {
 	tcache_slow_t *tcache_slow = tsd_tcache_slowp_get_unsafe(tsd);
 	tcache_t *tcache = tsd_tcachep_get_unsafe(tsd);
 
 	assert(cache_bin_still_zero_initialized(&tcache->bins[0]));
-	unsigned tcache_nhbins = tcache_nhbins_get(tcache);
+	unsigned tcache_nbins = tcache_nbins_get(tcache_slow);
 	size_t size, alignment;
-	/* Takes 146B stack space. */
-	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX] = {0};
-	tcache_bin_info_compute(tcache_bin_info, tcache_nhbins);
-	cache_bin_info_compute_alloc(tcache_bin_info, tcache_nhbins,
+	cache_bin_info_compute_alloc(tcache_bin_info, tcache_nbins,
 	    &size, &alignment);
 
 	void *mem;
@@ -860,6 +841,23 @@ tsd_tcache_data_init(tsd_t *tsd, arena_t *arena) {
 	return false;
 }
 
+static bool
+tsd_tcache_data_init_with_bin_settings(tsd_t *tsd, arena_t *arena,
+    cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
+	assert(tcache_bin_info != NULL);
+	return tsd_tcache_data_init_impl(tsd, arena, tcache_bin_info);
+}
+
+/* Initialize auto tcache (embedded in TSD). */
+static bool
+tsd_tcache_data_init(tsd_t *tsd, arena_t *arena) {
+	/* Takes 146B stack space. */
+	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX] = {{0}};
+	tcache_bin_info_compute(tcache_bin_info);
+
+	return tsd_tcache_data_init_impl(tsd, arena, tcache_bin_info);
+}
+
 /* Created manual tcache for tcache.create mallctl. */
 tcache_t *
 tcache_create_explicit(tsd_t *tsd) {
@@ -868,11 +866,11 @@ tcache_create_explicit(tsd_t *tsd) {
 	 * the beginning of the whole allocation (for freeing).  The makes sure
 	 * the cache bins have the requested alignment.
 	 */
-	unsigned tcache_nhbins = global_do_not_change_nhbins;
+	unsigned tcache_nbins = global_do_not_change_nbins;
 	size_t tcache_size, alignment;
-	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX] = {0};
-	tcache_bin_info_compute(tcache_bin_info, tcache_nhbins);
-	cache_bin_info_compute_alloc(tcache_bin_info, tcache_nhbins,
+	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX] = {{0}};
+	tcache_bin_info_compute(tcache_bin_info);
+	cache_bin_info_compute_alloc(tcache_bin_info, tcache_nbins,
 	    &tcache_size, &alignment);
 
 	size_t size = tcache_size + sizeof(tcache_t)
@@ -889,7 +887,7 @@ tcache_create_explicit(tsd_t *tsd) {
 	tcache_t *tcache = (void *)((byte_t *)mem + tcache_size);
 	tcache_slow_t *tcache_slow =
 	    (void *)((byte_t *)mem + tcache_size + sizeof(tcache_t));
-	tcache_max_and_nhbins_init(tcache);
+	tcache_default_settings_init(tcache_slow);
 	tcache_init(tsd, tcache_slow, tcache, mem, tcache_bin_info);
 
 	tcache_arena_associate(tsd_tsdn(tsd), tcache_slow, tcache,
@@ -898,13 +896,83 @@ tcache_create_explicit(tsd_t *tsd) {
 	return tcache;
 }
 
+bool
+tsd_tcache_enabled_data_init(tsd_t *tsd) {
+	/* Called upon tsd initialization. */
+	tsd_tcache_enabled_set(tsd, opt_tcache);
+	/*
+	 * tcache is not available yet, but we need to set up its tcache_nbins
+	 * in advance.
+	 */
+	tcache_default_settings_init(tsd_tcache_slowp_get(tsd));
+	tsd_slow_update(tsd);
+
+	if (opt_tcache) {
+		/* Trigger tcache init. */
+		tsd_tcache_data_init(tsd, NULL);
+	}
+
+	return false;
+}
+
+void
+tcache_enabled_set(tsd_t *tsd, bool enabled) {
+	bool was_enabled = tsd_tcache_enabled_get(tsd);
+
+	if (!was_enabled && enabled) {
+		tsd_tcache_data_init(tsd, NULL);
+	} else if (was_enabled && !enabled) {
+		tcache_cleanup(tsd);
+	}
+	/* Commit the state last.  Above calls check current state. */
+	tsd_tcache_enabled_set(tsd, enabled);
+	tsd_slow_update(tsd);
+}
+
+void
+thread_tcache_max_set(tsd_t *tsd, size_t tcache_max) {
+	assert(tcache_max <= TCACHE_MAXCLASS_LIMIT);
+	assert(tcache_max == sz_s2u(tcache_max));
+	tcache_t *tcache = tsd_tcachep_get(tsd);
+	tcache_slow_t *tcache_slow = tcache->tcache_slow;
+	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX] = {{0}};
+	assert(tcache != NULL && tcache_slow != NULL);
+
+	bool enabled = tcache_available(tsd);
+	arena_t *assigned_arena;
+	if (enabled) {
+		assigned_arena = tcache_slow->arena;
+		/* Carry over the bin settings during the reboot. */
+		tcache_bin_settings_backup(tcache, tcache_bin_info);
+		/* Shutdown and reboot the tcache for a clean slate. */
+		tcache_cleanup(tsd);
+	}
+
+	/*
+	* Still set tcache_nbins of the tcache even if the tcache is not
+	* available yet because the values are stored in tsd_t and are
+	* always available for changing.
+	*/
+	tcache_max_set(tcache_slow, tcache_max);
+
+	if (enabled) {
+		tsd_tcache_data_init_with_bin_settings(tsd, assigned_arena,
+		    tcache_bin_info);
+	}
+
+	assert(tcache_nbins_get(tcache_slow) == sz_size2index(tcache_max) + 1);
+}
+
 static void
 tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
 	tcache_slow_t *tcache_slow = tcache->tcache_slow;
 	assert(tcache_slow->arena != NULL);
 
-	for (unsigned i = 0; i < tcache_nhbins_get(tcache); i++) {
+	for (unsigned i = 0; i < tcache_nbins_get(tcache_slow); i++) {
 		cache_bin_t *cache_bin = &tcache->bins[i];
+		if (tcache_bin_disabled(i, cache_bin, tcache_slow)) {
+			continue;
+		}
 		if (i < SC_NBINS) {
 			tcache_bin_flush_small(tsd, tcache, cache_bin, i, 0);
 		} else {
@@ -974,8 +1042,7 @@ tcache_cleanup(tsd_t *tsd) {
 
 	tcache_destroy(tsd, tcache, true);
 	/* Make sure all bins used are reinitialized to the clean state. */
-	memset(tcache->bins, 0, sizeof(cache_bin_t) *
-	    tcache_nhbins_get(tcache));
+	memset(tcache->bins, 0, sizeof(cache_bin_t) * TCACHE_NBINS_MAX);
 }
 
 void
@@ -983,8 +1050,11 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 	cassert(config_stats);
 
 	/* Merge and reset tcache stats. */
-	for (unsigned i = 0; i < tcache_nhbins_get(tcache); i++) {
+	for (unsigned i = 0; i < tcache_nbins_get(tcache->tcache_slow); i++) {
 		cache_bin_t *cache_bin = &tcache->bins[i];
+		if (tcache_bin_disabled(i, cache_bin, tcache->tcache_slow)) {
+			continue;
+		}
 		if (i < SC_NBINS) {
 			bin_t *bin = arena_bin_choose(tsdn, arena, i, NULL);
 			malloc_mutex_lock(tsdn, &bin->lock);
@@ -1110,7 +1180,7 @@ bool
 tcache_boot(tsdn_t *tsdn, base_t *base) {
 	global_do_not_change_tcache_maxclass = sz_s2u(opt_tcache_max);
 	assert(global_do_not_change_tcache_maxclass <= TCACHE_MAXCLASS_LIMIT);
-	global_do_not_change_nhbins =
+	global_do_not_change_nbins =
 	    sz_size2index(global_do_not_change_tcache_maxclass) + 1;
 
 	if (malloc_mutex_init(&tcaches_mtx, "tcaches", WITNESS_RANK_TCACHES,
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index 50d51a6d..aed34585 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -106,11 +106,13 @@ TEST_BEGIN(test_cache_bin) {
 	cache_bin_info_init(&info, ncached_max);
 	cache_bin_t bin;
 	test_bin_init(&bin, &info);
+	cache_bin_info_t *bin_info = &bin.bin_info;
 
 	/* Initialize to empty; should then have 0 elements. */
-	expect_d_eq(ncached_max, cache_bin_info_ncached_max(&info), "");
-	expect_true(cache_bin_ncached_get_local(&bin, &info) == 0, "");
-	expect_true(cache_bin_low_water_get(&bin, &info) == 0, "");
+	expect_d_eq(ncached_max, cache_bin_info_ncached_max_get(&bin,
+	    &bin.bin_info), "");
+	expect_true(cache_bin_ncached_get_local(&bin, bin_info) == 0, "");
+	expect_true(cache_bin_low_water_get(&bin, bin_info) == 0, "");
 
 	ptr = cache_bin_alloc_easy(&bin, &success);
 	expect_false(success, "Shouldn't successfully allocate when empty");
@@ -127,14 +129,14 @@ TEST_BEGIN(test_cache_bin) {
 	void **ptrs = mallocx(sizeof(void *) * (ncached_max + 1), 0);
 	assert_ptr_not_null(ptrs, "Unexpected mallocx failure");
 	for  (cache_bin_sz_t i = 0; i < ncached_max; i++) {
-		expect_true(cache_bin_ncached_get_local(&bin, &info) == i, "");
+		expect_true(cache_bin_ncached_get_local(&bin, bin_info) == i, "");
 		success = cache_bin_dalloc_easy(&bin, &ptrs[i]);
 		expect_true(success,
 		    "Should be able to dalloc into a non-full cache bin.");
-		expect_true(cache_bin_low_water_get(&bin, &info) == 0,
+		expect_true(cache_bin_low_water_get(&bin, bin_info) == 0,
 		    "Pushes and pops shouldn't change low water of zero.");
 	}
-	expect_true(cache_bin_ncached_get_local(&bin, &info) == ncached_max,
+	expect_true(cache_bin_ncached_get_local(&bin, bin_info) == ncached_max,
 	    "");
 	success = cache_bin_dalloc_easy(&bin, &ptrs[ncached_max]);
 	expect_false(success, "Shouldn't be able to dalloc into a full bin.");
@@ -142,9 +144,9 @@ TEST_BEGIN(test_cache_bin) {
 	cache_bin_low_water_set(&bin);
 
 	for (cache_bin_sz_t i = 0; i < ncached_max; i++) {
-		expect_true(cache_bin_low_water_get(&bin, &info)
+		expect_true(cache_bin_low_water_get(&bin, bin_info)
 		    == ncached_max - i, "");
-		expect_true(cache_bin_ncached_get_local(&bin, &info)
+		expect_true(cache_bin_ncached_get_local(&bin, bin_info)
 		    == ncached_max - i, "");
 		/*
 		 * This should fail -- the easy variant can't change the low
@@ -153,9 +155,9 @@ TEST_BEGIN(test_cache_bin) {
 		ptr = cache_bin_alloc_easy(&bin, &success);
 		expect_ptr_null(ptr, "");
 		expect_false(success, "");
-		expect_true(cache_bin_low_water_get(&bin, &info)
+		expect_true(cache_bin_low_water_get(&bin, bin_info)
 		    == ncached_max - i, "");
-		expect_true(cache_bin_ncached_get_local(&bin, &info)
+		expect_true(cache_bin_ncached_get_local(&bin, bin_info)
 		    == ncached_max - i, "");
 
 		/* This should succeed, though. */
@@ -163,13 +165,13 @@ TEST_BEGIN(test_cache_bin) {
 		expect_true(success, "");
 		expect_ptr_eq(ptr, &ptrs[ncached_max - i - 1],
 		    "Alloc should pop in stack order");
-		expect_true(cache_bin_low_water_get(&bin, &info)
+		expect_true(cache_bin_low_water_get(&bin, bin_info)
 		    == ncached_max - i - 1, "");
-		expect_true(cache_bin_ncached_get_local(&bin, &info)
+		expect_true(cache_bin_ncached_get_local(&bin, bin_info)
 		    == ncached_max - i - 1, "");
 	}
 	/* Now we're empty -- all alloc attempts should fail. */
-	expect_true(cache_bin_ncached_get_local(&bin, &info) == 0, "");
+	expect_true(cache_bin_ncached_get_local(&bin, bin_info) == 0, "");
 	ptr = cache_bin_alloc_easy(&bin, &success);
 	expect_ptr_null(ptr, "");
 	expect_false(success, "");
@@ -185,7 +187,7 @@ TEST_BEGIN(test_cache_bin) {
 	for (cache_bin_sz_t i = ncached_max / 2; i < ncached_max; i++) {
 		cache_bin_dalloc_easy(&bin, &ptrs[i]);
 	}
-	expect_true(cache_bin_ncached_get_local(&bin, &info) == ncached_max,
+	expect_true(cache_bin_ncached_get_local(&bin, bin_info) == ncached_max,
 	    "");
 	for (cache_bin_sz_t i = ncached_max - 1; i >= ncached_max / 2; i--) {
 		/*
@@ -202,60 +204,64 @@ TEST_BEGIN(test_cache_bin) {
 	expect_ptr_null(ptr, "");
 
 	/* We're going to test filling -- we must be empty to start. */
-	while (cache_bin_ncached_get_local(&bin, &info)) {
+	while (cache_bin_ncached_get_local(&bin, bin_info)) {
 		cache_bin_alloc(&bin, &success);
 		expect_true(success, "");
 	}
 
 	/* Test fill. */
 	/* Try to fill all, succeed fully. */
-	do_fill_test(&bin, &info, ptrs, ncached_max, ncached_max, ncached_max);
+	do_fill_test(&bin, bin_info, ptrs, ncached_max, ncached_max,
+	    ncached_max);
 	/* Try to fill all, succeed partially. */
-	do_fill_test(&bin, &info, ptrs, ncached_max, ncached_max,
+	do_fill_test(&bin, bin_info, ptrs, ncached_max, ncached_max,
 	    ncached_max / 2);
 	/* Try to fill all, fail completely. */
-	do_fill_test(&bin, &info, ptrs, ncached_max, ncached_max, 0);
+	do_fill_test(&bin, bin_info, ptrs, ncached_max, ncached_max, 0);
 
 	/* Try to fill some, succeed fully. */
-	do_fill_test(&bin, &info, ptrs, ncached_max, ncached_max / 2,
+	do_fill_test(&bin, bin_info, ptrs, ncached_max, ncached_max / 2,
 	    ncached_max / 2);
 	/* Try to fill some, succeed partially. */
-	do_fill_test(&bin, &info, ptrs, ncached_max, ncached_max / 2,
+	do_fill_test(&bin, bin_info, ptrs, ncached_max, ncached_max / 2,
 	    ncached_max / 4);
 	/* Try to fill some, fail completely. */
-	do_fill_test(&bin, &info, ptrs, ncached_max, ncached_max / 2, 0);
+	do_fill_test(&bin, bin_info, ptrs, ncached_max, ncached_max / 2, 0);
 
-	do_flush_test(&bin, &info, ptrs, ncached_max, ncached_max);
-	do_flush_test(&bin, &info, ptrs, ncached_max, ncached_max / 2);
-	do_flush_test(&bin, &info, ptrs, ncached_max, 0);
-	do_flush_test(&bin, &info, ptrs, ncached_max / 2, ncached_max / 2);
-	do_flush_test(&bin, &info, ptrs, ncached_max / 2, ncached_max / 4);
-	do_flush_test(&bin, &info, ptrs, ncached_max / 2, 0);
+	do_flush_test(&bin, bin_info, ptrs, ncached_max, ncached_max);
+	do_flush_test(&bin, bin_info, ptrs, ncached_max, ncached_max / 2);
+	do_flush_test(&bin, bin_info, ptrs, ncached_max, 0);
+	do_flush_test(&bin, bin_info, ptrs, ncached_max / 2, ncached_max / 2);
+	do_flush_test(&bin, bin_info, ptrs, ncached_max / 2, ncached_max / 4);
+	do_flush_test(&bin, bin_info, ptrs, ncached_max / 2, 0);
 
-	do_batch_alloc_test(&bin, &info, ptrs, ncached_max, ncached_max);
-	do_batch_alloc_test(&bin, &info, ptrs, ncached_max, ncached_max * 2);
-	do_batch_alloc_test(&bin, &info, ptrs, ncached_max, ncached_max / 2);
-	do_batch_alloc_test(&bin, &info, ptrs, ncached_max, 2);
-	do_batch_alloc_test(&bin, &info, ptrs, ncached_max, 1);
-	do_batch_alloc_test(&bin, &info, ptrs, ncached_max, 0);
-	do_batch_alloc_test(&bin, &info, ptrs, ncached_max / 2,
+	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max, ncached_max);
+	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max,
+	    ncached_max * 2);
+	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max,
 	    ncached_max / 2);
-	do_batch_alloc_test(&bin, &info, ptrs, ncached_max / 2, ncached_max);
-	do_batch_alloc_test(&bin, &info, ptrs, ncached_max / 2,
+	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max, 2);
+	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max, 1);
+	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max, 0);
+	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max / 2,
+	    ncached_max / 2);
+	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max / 2,
+	    ncached_max);
+	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max / 2,
 	    ncached_max / 4);
-	do_batch_alloc_test(&bin, &info, ptrs, ncached_max / 2, 2);
-	do_batch_alloc_test(&bin, &info, ptrs, ncached_max / 2, 1);
-	do_batch_alloc_test(&bin, &info, ptrs, ncached_max / 2, 0);
-	do_batch_alloc_test(&bin, &info, ptrs, 2, ncached_max);
-	do_batch_alloc_test(&bin, &info, ptrs, 2, 2);
-	do_batch_alloc_test(&bin, &info, ptrs, 2, 1);
-	do_batch_alloc_test(&bin, &info, ptrs, 2, 0);
-	do_batch_alloc_test(&bin, &info, ptrs, 1, 2);
-	do_batch_alloc_test(&bin, &info, ptrs, 1, 1);
-	do_batch_alloc_test(&bin, &info, ptrs, 1, 0);
-	do_batch_alloc_test(&bin, &info, ptrs, 0, 2);
-	do_batch_alloc_test(&bin, &info, ptrs, 0, 1);
-	do_batch_alloc_test(&bin, &info, ptrs, 0, 0);
+	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max / 2, 2);
+	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max / 2, 1);
+	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max / 2, 0);
+	do_batch_alloc_test(&bin, bin_info, ptrs, 2, ncached_max);
+	do_batch_alloc_test(&bin, bin_info, ptrs, 2, 2);
+	do_batch_alloc_test(&bin, bin_info, ptrs, 2, 1);
+	do_batch_alloc_test(&bin, bin_info, ptrs, 2, 0);
+	do_batch_alloc_test(&bin, bin_info, ptrs, 1, 2);
+	do_batch_alloc_test(&bin, bin_info, ptrs, 1, 1);
+	do_batch_alloc_test(&bin, bin_info, ptrs, 1, 0);
+	do_batch_alloc_test(&bin, bin_info, ptrs, 0, 2);
+	do_batch_alloc_test(&bin, bin_info, ptrs, 0, 1);
+	do_batch_alloc_test(&bin, bin_info, ptrs, 0, 0);
 
 	free(ptrs);
 }
@@ -328,6 +334,7 @@ TEST_BEGIN(test_cache_bin_stash) {
 	cache_bin_info_t info;
 	cache_bin_info_init(&info, ncached_max);
 	test_bin_init(&bin, &info);
+	cache_bin_info_t *bin_info = &bin.bin_info;
 
 	/*
 	 * The content of this array is not accessed; instead the interior
@@ -337,10 +344,10 @@ TEST_BEGIN(test_cache_bin_stash) {
 	assert_ptr_not_null(ptrs, "Unexpected mallocx failure");
 	bool ret;
 	for (cache_bin_sz_t i = 0; i < ncached_max; i++) {
-		expect_true(cache_bin_ncached_get_local(&bin, &info) ==
+		expect_true(cache_bin_ncached_get_local(&bin, bin_info) ==
 		    (i / 2 + i % 2), "Wrong ncached value");
-		expect_true(cache_bin_nstashed_get_local(&bin, &info) == i / 2,
-		    "Wrong nstashed value");
+		expect_true(cache_bin_nstashed_get_local(&bin, bin_info) ==
+		    i / 2, "Wrong nstashed value");
 		if (i % 2 == 0) {
 			cache_bin_dalloc_easy(&bin, &ptrs[i]);
 		} else {
@@ -362,18 +369,23 @@ TEST_BEGIN(test_cache_bin_stash) {
 			expect_true(diff % 2 == 0, "Should be able to alloc");
 		} else {
 			expect_false(ret, "Should not alloc stashed");
-			expect_true(cache_bin_nstashed_get_local(&bin, &info) ==
-			    ncached_max / 2, "Wrong nstashed value");
+			expect_true(cache_bin_nstashed_get_local(&bin,
+			    bin_info) == ncached_max / 2,
+			    "Wrong nstashed value");
 		}
 	}
 
 	test_bin_init(&bin, &info);
-	do_flush_stashed_test(&bin, &info, ptrs, ncached_max, 0);
-	do_flush_stashed_test(&bin, &info, ptrs, 0, ncached_max);
-	do_flush_stashed_test(&bin, &info, ptrs, ncached_max / 2, ncached_max / 2);
-	do_flush_stashed_test(&bin, &info, ptrs, ncached_max / 4, ncached_max / 2);
-	do_flush_stashed_test(&bin, &info, ptrs, ncached_max / 2, ncached_max / 4);
-	do_flush_stashed_test(&bin, &info, ptrs, ncached_max / 4, ncached_max / 4);
+	do_flush_stashed_test(&bin, bin_info, ptrs, ncached_max, 0);
+	do_flush_stashed_test(&bin, bin_info, ptrs, 0, ncached_max);
+	do_flush_stashed_test(&bin, bin_info, ptrs, ncached_max / 2,
+	    ncached_max / 2);
+	do_flush_stashed_test(&bin, bin_info, ptrs, ncached_max / 4,
+	    ncached_max / 2);
+	do_flush_stashed_test(&bin, bin_info, ptrs, ncached_max / 2,
+	    ncached_max / 4);
+	do_flush_stashed_test(&bin, bin_info, ptrs, ncached_max / 4,
+	    ncached_max / 4);
 }
 TEST_END
 
diff --git a/test/unit/tcache_max.c b/test/unit/tcache_max.c
index 6481504e..53752463 100644
--- a/test/unit/tcache_max.c
+++ b/test/unit/tcache_max.c
@@ -76,8 +76,11 @@ tcache_bytes_read_local(void) {
 	size_t tcache_bytes = 0;
 	tsd_t *tsd = tsd_fetch();
 	tcache_t *tcache = tcache_get(tsd);
-	for (szind_t i = 0; i < tcache_nhbins_get(tcache); i++) {
+	for (szind_t i = 0; i < tcache_nbins_get(tcache->tcache_slow); i++) {
 		cache_bin_t *cache_bin = &tcache->bins[i];
+		if (tcache_bin_disabled(i, cache_bin, tcache->tcache_slow)) {
+			continue;
+		}
 		cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
 		    &cache_bin->bin_info);
 		tcache_bytes += ncached * sz_index2size(i);
@@ -211,7 +214,7 @@ TEST_BEGIN(test_tcache_max) {
 TEST_END
 
 static size_t
-tcache_max2nhbins(size_t tcache_max) {
+tcache_max2nbins(size_t tcache_max) {
 	return sz_size2index(tcache_max) + 1;
 }
 
@@ -241,23 +244,24 @@ validate_tcache_stack(tcache_t *tcache) {
 static void *
 tcache_check(void *arg) {
 	size_t old_tcache_max, new_tcache_max, min_tcache_max, sz;
-	unsigned tcache_nhbins;
+	unsigned tcache_nbins;
 	tsd_t *tsd = tsd_fetch();
 	tcache_t *tcache = tsd_tcachep_get(tsd);
+	tcache_slow_t *tcache_slow = tcache->tcache_slow;
 	sz = sizeof(size_t);
 	new_tcache_max = *(size_t *)arg;
 	min_tcache_max = 1;
 
 	/*
-	 * Check the default tcache_max and tcache_nhbins of each thread's
+	 * Check the default tcache_max and tcache_nbins of each thread's
 	 * auto tcache.
 	 */
-	old_tcache_max = tcache_max_get(tcache);
+	old_tcache_max = tcache_max_get(tcache_slow);
 	expect_zu_eq(old_tcache_max, opt_tcache_max,
 	    "Unexpected default value for tcache_max");
-	tcache_nhbins = tcache_nhbins_get(tcache);
-	expect_zu_eq(tcache_nhbins, (size_t)global_do_not_change_nhbins,
-	    "Unexpected default value for tcache_nhbins");
+	tcache_nbins = tcache_nbins_get(tcache_slow);
+	expect_zu_eq(tcache_nbins, (size_t)global_do_not_change_nbins,
+	    "Unexpected default value for tcache_nbins");
 	validate_tcache_stack(tcache);
 
 	/*
@@ -275,12 +279,12 @@ tcache_check(void *arg) {
 	assert_d_eq(mallctl("thread.tcache.max",
 	    NULL, NULL, (void *)&temp_tcache_max, sz),.0,
 	    "Unexpected.mallctl().failure");
-	old_tcache_max = tcache_max_get(tcache);
+	old_tcache_max = tcache_max_get(tcache_slow);
 	expect_zu_eq(old_tcache_max, TCACHE_MAXCLASS_LIMIT,
 	    "Unexpected value for tcache_max");
-	tcache_nhbins = tcache_nhbins_get(tcache);
-	expect_zu_eq(tcache_nhbins, TCACHE_NBINS_MAX,
-	    "Unexpected value for tcache_nhbins");
+	tcache_nbins = tcache_nbins_get(tcache_slow);
+	expect_zu_eq(tcache_nbins, TCACHE_NBINS_MAX,
+	    "Unexpected value for tcache_nbins");
 	assert_d_eq(mallctl("thread.tcache.max",
 	    (void *)&old_tcache_max, &sz,
 	    (void *)&min_tcache_max, sz),.0,
@@ -294,10 +298,10 @@ tcache_check(void *arg) {
 	    (void *)&e0, bool_sz), 0, "Unexpected mallctl() error");
 	expect_false(e1, "Unexpected previous tcache state");
 	min_tcache_max = sz_s2u(min_tcache_max);
-	expect_zu_eq(tcache_max_get(tcache), min_tcache_max,
+	expect_zu_eq(tcache_max_get(tcache_slow), min_tcache_max,
 	    "Unexpected value for tcache_max");
-	expect_zu_eq(tcache_nhbins_get(tcache),
-	    tcache_max2nhbins(min_tcache_max), "Unexpected value for nhbins");
+	expect_zu_eq(tcache_nbins_get(tcache_slow),
+	    tcache_max2nbins(min_tcache_max), "Unexpected value for nbins");
 	assert_d_eq(mallctl("thread.tcache.max",
 	    (void *)&old_tcache_max, &sz,
 	    (void *)&new_tcache_max, sz),.0,
@@ -307,18 +311,18 @@ tcache_check(void *arg) {
 	validate_tcache_stack(tcache);
 
 	/*
-	 * Check the thread's tcache_max and nhbins both through mallctl
+	 * Check the thread's tcache_max and nbins both through mallctl
 	 * and alloc tests.
 	 */
 	if (new_tcache_max > TCACHE_MAXCLASS_LIMIT) {
 		new_tcache_max = TCACHE_MAXCLASS_LIMIT;
 	}
-	old_tcache_max = tcache_max_get(tcache);
+	old_tcache_max = tcache_max_get(tcache_slow);
 	expect_zu_eq(old_tcache_max, new_tcache_max,
 	    "Unexpected value for tcache_max");
-	tcache_nhbins = tcache_nhbins_get(tcache);
-	expect_zu_eq(tcache_nhbins, tcache_max2nhbins(new_tcache_max),
-	    "Unexpected value for tcache_nhbins");
+	tcache_nbins = tcache_nbins_get(tcache_slow);
+	expect_zu_eq(tcache_nbins, tcache_max2nbins(new_tcache_max),
+	    "Unexpected value for tcache_nbins");
 	for (unsigned alloc_option = alloc_option_start;
 	     alloc_option < alloc_option_end;
 	     alloc_option++) {

From 630f7de9520efeec096a604ce02bc7aef7b46a94 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@fb.com>
Date: Tue, 19 Sep 2023 14:37:09 -0700
Subject: [PATCH 2351/2608] Add mallctl to set and get ncached_max of each
 cache_bin.

1. `thread_tcache_ncached_max_read_sizeclass` allows users to get the
    ncached_max of the bin with the input sizeclass, passed in through
    oldp (will be upper casted if not an exact bin size is given).
2. `thread_tcache_ncached_max_write` takes in a char array
    representing the settings for bins in the tcache.
---
 Makefile.in                                   |   1 +
 include/jemalloc/internal/arena_inlines_b.h   |   3 +-
 include/jemalloc/internal/cache_bin.h         |  17 +-
 include/jemalloc/internal/ctl.h               |   1 +
 .../internal/jemalloc_internal_macros.h       |   2 +
 include/jemalloc/internal/tcache_externs.h    |   5 +-
 include/jemalloc/internal/tcache_inlines.h    |   7 +-
 include/jemalloc/internal/util.h              |   8 +
 src/cache_bin.c                               |   3 +-
 src/ctl.c                                     |  85 ++++++-
 src/jemalloc.c                                |  49 +---
 src/tcache.c                                  |  83 ++++++-
 src/util.c                                    |  49 ++++
 test/unit/tcache_max.c                        | 234 +++++++++++++++++-
 14 files changed, 477 insertions(+), 70 deletions(-)
 create mode 100644 src/util.c

diff --git a/Makefile.in b/Makefile.in
index 3a02b3fd..594ea4f2 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -155,6 +155,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/thread_event.c \
 	$(srcroot)src/ticker.c \
 	$(srcroot)src/tsd.c \
+	$(srcroot)src/util.c \
 	$(srcroot)src/witness.c
 ifeq ($(enable_zone_allocator), 1)
 C_SRCS += $(srcroot)src/zone.c
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index a4bacd8b..f8928a01 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -198,7 +198,8 @@ arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
 			assert(sz_can_use_slab(size));
 			return tcache_alloc_small(tsdn_tsd(tsdn), arena,
 			    tcache, size, ind, zero, slow_path);
-		} else if (likely(ind < TCACHE_NBINS_MAX &&
+		} else if (likely(
+		    ind < tcache_nbins_get(tcache->tcache_slow) &&
 		    !tcache_bin_disabled(ind, &tcache->bins[ind],
 		    tcache->tcache_slow))) {
 			return tcache_alloc_large(tsdn_tsd(tsdn), arena,
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 2c831caf..e2da3b90 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -210,6 +210,11 @@ cache_bin_info_ncached_max_get(cache_bin_t *bin, cache_bin_info_t *info) {
 	return info->ncached_max;
 }
 
+/* Gets ncached_max without asserting that the bin is enabled. */
+static inline cache_bin_sz_t
+cache_bin_ncached_max_get_unsafe(cache_bin_t *bin) {
+	return bin->bin_info.ncached_max;
+}
 /*
  * Internal.
  *
@@ -229,7 +234,7 @@ cache_bin_assert_earlier(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
  * Does difference calculations that handle wraparound correctly.  Earlier must
  * be associated with the position earlier in memory.
  */
-static inline uint16_t
+static inline cache_bin_sz_t
 cache_bin_diff(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
 	cache_bin_assert_earlier(bin, earlier, later);
 	return later - earlier;
@@ -584,19 +589,17 @@ cache_bin_nitems_get_remote(cache_bin_t *bin, cache_bin_info_t *info,
 	cache_bin_sz_t diff = bin->low_bits_empty -
 	    (uint16_t)(uintptr_t)bin->stack_head;
 	cache_bin_sz_t n = diff / sizeof(void *);
-
-	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max_get(bin, info);
-	assert(n <= ncached_max);
 	*ncached = n;
 
 	/* Racy version of cache_bin_nstashed_get_internal. */
 	uint16_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(bin,
 	    info);
 	n = (bin->low_bits_full - low_bits_low_bound) / sizeof(void *);
-
-	assert(n <= ncached_max);
 	*nstashed = n;
-	/* Note that cannot assert ncached + nstashed <= ncached_max (racy). */
+	/*
+	 * Note that cannot assert anything regarding ncached_max because
+	 * it can be configured on the fly and is thus racy.
+	 */
 }
 
 /*
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index f38236f6..1f124bfc 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -14,6 +14,7 @@
 
 /* Maximum ctl tree depth. */
 #define CTL_MAX_DEPTH	7
+#define CTL_MULTI_SETTING_MAX_LEN 1000
 
 typedef struct ctl_node_s {
 	bool named;
diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h
index 9abcbb20..40df5feb 100644
--- a/include/jemalloc/internal/jemalloc_internal_macros.h
+++ b/include/jemalloc/internal/jemalloc_internal_macros.h
@@ -37,8 +37,10 @@
 /* Various function pointers are static and immutable except during testing. */
 #ifdef JEMALLOC_JET
 #  define JET_MUTABLE
+#  define JET_EXTERN extern
 #else
 #  define JET_MUTABLE const
+#  define JET_EXTERN static
 #endif
 
 #define JEMALLOC_VA_ARGS_HEAD(head, ...) head
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 8ca966d7..aa7ca00f 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -26,7 +26,7 @@ extern unsigned opt_lg_tcache_flush_large_div;
  * it should not be changed on the fly.  To change the number of tcache bins
  * in use, refer to tcache_nbins of each tcache.
  */
-extern unsigned	global_do_not_change_nbins;
+extern unsigned	global_do_not_change_tcache_nbins;
 
 /*
  * Maximum cached size class.  Same as above, this is only used during threads
@@ -55,6 +55,9 @@ void tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache,
     cache_bin_t *cache_bin, szind_t binind, unsigned rem);
 void tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache,
     cache_bin_t *cache_bin, szind_t binind, bool is_small);
+bool tcache_bins_ncached_max_write(tsd_t *tsd, char *settings, size_t len);
+bool tcache_bin_ncached_max_read(tsd_t *tsd, size_t bin_size,
+    cache_bin_sz_t *ncached_max);
 void tcache_arena_reassociate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
     tcache_t *tcache, arena_t *arena);
 tcache_t *tcache_create_explicit(tsd_t *tsd);
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 68481113..05599a5b 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -46,7 +46,7 @@ tcache_bin_settings_backup(tcache_t *tcache,
     cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
 	for (unsigned i = 0; i < TCACHE_NBINS_MAX; i++) {
 		cache_bin_info_init(&tcache_bin_info[i],
-		    tcache->bins[i].bin_info.ncached_max);
+		    cache_bin_ncached_max_get_unsafe(&tcache->bins[i]));
 	}
 }
 
@@ -54,6 +54,7 @@ JEMALLOC_ALWAYS_INLINE bool
 tcache_bin_disabled(szind_t ind, cache_bin_t *bin,
     tcache_slow_t *tcache_slow) {
 	assert(bin != NULL);
+	assert(ind < TCACHE_NBINS_MAX);
 	bool disabled = cache_bin_disabled(bin);
 
 	/*
@@ -66,7 +67,7 @@ tcache_bin_disabled(szind_t ind, cache_bin_t *bin,
 	 * ind < nbins and ncached_max > 0.
 	 */
 	unsigned nbins = tcache_nbins_get(tcache_slow);
-	cache_bin_sz_t ncached_max = bin->bin_info.ncached_max;
+	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get_unsafe(bin);
 	if (ind >= nbins) {
 		assert(disabled);
 	} else {
@@ -215,6 +216,8 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr) > SC_SMALL_MAXCLASS);
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <=
 	    tcache_max_get(tcache->tcache_slow));
+	assert(!tcache_bin_disabled(binind, &tcache->bins[binind],
+	    tcache->tcache_slow));
 
 	cache_bin_t *bin = &tcache->bins[binind];
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index 2c35ef76..f4035095 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -130,4 +130,12 @@ util_prefetch_write_range(void *ptr, size_t sz) {
 
 #undef UTIL_INLINE
 
+/*
+ * Reads the settings in the following format:
+ * key1-key2:value|key3-key4:value|...
+ * Note it does not handle the ending '\0'.
+ */
+bool
+multi_setting_parse_next(const char **setting_segment_cur, size_t *len_left,
+    size_t *key_start, size_t *key_end, size_t *value);
 #endif /* JEMALLOC_INTERNAL_UTIL_H */
diff --git a/src/cache_bin.c b/src/cache_bin.c
index 67b6327b..24dabd0b 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -10,8 +10,9 @@ const uintptr_t disabled_bin = JUNK_ADDR;
 void
 cache_bin_info_init(cache_bin_info_t *info,
     cache_bin_sz_t ncached_max) {
+	assert(ncached_max <= CACHE_BIN_NCACHED_MAX);
 	size_t stack_size = (size_t)ncached_max * sizeof(void *);
-	assert(stack_size < ((size_t)1 << (sizeof(cache_bin_sz_t) * 8)));
+	assert(stack_size <= UINT16_MAX);
 	info->ncached_max = (cache_bin_sz_t)ncached_max;
 }
 
diff --git a/src/ctl.c b/src/ctl.c
index af22d0aa..93144752 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -68,6 +68,8 @@ CTL_PROTO(max_background_threads)
 CTL_PROTO(thread_tcache_enabled)
 CTL_PROTO(thread_tcache_max)
 CTL_PROTO(thread_tcache_flush)
+CTL_PROTO(thread_tcache_ncached_max_write)
+CTL_PROTO(thread_tcache_ncached_max_read_sizeclass)
 CTL_PROTO(thread_peak_read)
 CTL_PROTO(thread_peak_reset)
 CTL_PROTO(thread_prof_name)
@@ -374,10 +376,17 @@ CTL_PROTO(stats_mutexes_reset)
  */
 #define INDEX(i)	{false},	i##_index
 
+static const ctl_named_node_t	thread_tcache_ncached_max_node[] = {
+	{NAME("read_sizeclass"),
+	    CTL(thread_tcache_ncached_max_read_sizeclass)},
+	{NAME("write"),		CTL(thread_tcache_ncached_max_write)}
+};
+
 static const ctl_named_node_t	thread_tcache_node[] = {
 	{NAME("enabled"),	CTL(thread_tcache_enabled)},
 	{NAME("max"),		CTL(thread_tcache_max)},
-	{NAME("flush"),		CTL(thread_tcache_flush)}
+	{NAME("flush"),		CTL(thread_tcache_flush)},
+	{NAME("ncached_max"),	CHILD(named, thread_tcache_ncached_max)}
 };
 
 static const ctl_named_node_t	thread_peak_node[] = {
@@ -2282,6 +2291,78 @@ label_return:
 
 CTL_RO_NL_GEN(thread_allocated, tsd_thread_allocated_get(tsd), uint64_t)
 CTL_RO_NL_GEN(thread_allocatedp, tsd_thread_allocatedp_get(tsd), uint64_t *)
+
+static int
+thread_tcache_ncached_max_read_sizeclass_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
+    size_t newlen) {
+	int ret;
+	size_t bin_size = 0;
+
+	/* Read the bin size from newp. */
+	if (newp == NULL) {
+		ret = EINVAL;
+		goto label_return;
+	}
+	WRITE(bin_size, size_t);
+
+	cache_bin_sz_t ncached_max = 0;
+	if (tcache_bin_ncached_max_read(tsd, bin_size, &ncached_max)) {
+		ret = EINVAL;
+		goto label_return;
+	}
+	size_t result = (size_t)ncached_max;
+	READ(result, size_t);
+	ret = 0;
+label_return:
+	return ret;
+}
+
+static int
+thread_tcache_ncached_max_write_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
+    size_t newlen) {
+	int ret;
+	WRITEONLY();
+	if (newp != NULL) {
+		if (!tcache_available(tsd)) {
+			ret = ENOENT;
+			goto label_return;
+		}
+		char *settings = NULL;
+		WRITE(settings, char *);
+		if (settings == NULL) {
+			ret = EINVAL;
+			goto label_return;
+		}
+		/* Get the length of the setting string safely. */
+		char *end = (char *)memchr(settings, '\0',
+		    CTL_MULTI_SETTING_MAX_LEN);
+		if (end == NULL) {
+			ret = EINVAL;
+			goto label_return;
+		}
+		/*
+		 * Exclude the last '\0' for len since it is not handled by
+		 * multi_setting_parse_next.
+		 */
+		size_t len = (uintptr_t)end - (uintptr_t)settings;
+		if (len == 0) {
+			ret = 0;
+			goto label_return;
+		}
+
+		if (tcache_bins_ncached_max_write(tsd, settings, len)) {
+			ret = EINVAL;
+			goto label_return;
+		}
+	}
+
+	ret = 0;
+label_return:
+	return ret;
+}
+
 CTL_RO_NL_GEN(thread_deallocated, tsd_thread_deallocated_get(tsd), uint64_t)
 CTL_RO_NL_GEN(thread_deallocatedp, tsd_thread_deallocatedp_get(tsd), uint64_t *)
 
@@ -3155,7 +3236,7 @@ CTL_RO_NL_GEN(arenas_quantum, QUANTUM, size_t)
 CTL_RO_NL_GEN(arenas_page, PAGE, size_t)
 CTL_RO_NL_GEN(arenas_tcache_max, global_do_not_change_tcache_maxclass, size_t)
 CTL_RO_NL_GEN(arenas_nbins, SC_NBINS, unsigned)
-CTL_RO_NL_GEN(arenas_nhbins, global_do_not_change_nbins, unsigned)
+CTL_RO_NL_GEN(arenas_nhbins, global_do_not_change_tcache_nbins, unsigned)
 CTL_RO_NL_GEN(arenas_bin_i_size, bin_infos[mib[2]].reg_size, size_t)
 CTL_RO_NL_GEN(arenas_bin_i_nregs, bin_infos[mib[2]].nregs, uint32_t)
 CTL_RO_NL_GEN(arenas_bin_i_slab_size, bin_infos[mib[2]].slab_size, size_t)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 4bf5cbff..9c4e578e 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -821,50 +821,6 @@ init_opt_stats_opts(const char *v, size_t vlen, char *dest) {
 	assert(opts_len == strlen(dest));
 }
 
-/* Reads the next size pair in a multi-sized option. */
-static bool
-malloc_conf_multi_sizes_next(const char **slab_size_segment_cur,
-    size_t *vlen_left, size_t *slab_start, size_t *slab_end, size_t *new_size) {
-	const char *cur = *slab_size_segment_cur;
-	char *end;
-	uintmax_t um;
-
-	set_errno(0);
-
-	/* First number, then '-' */
-	um = malloc_strtoumax(cur, &end, 0);
-	if (get_errno() != 0 || *end != '-') {
-		return true;
-	}
-	*slab_start = (size_t)um;
-	cur = end + 1;
-
-	/* Second number, then ':' */
-	um = malloc_strtoumax(cur, &end, 0);
-	if (get_errno() != 0 || *end != ':') {
-		return true;
-	}
-	*slab_end = (size_t)um;
-	cur = end + 1;
-
-	/* Last number */
-	um = malloc_strtoumax(cur, &end, 0);
-	if (get_errno() != 0) {
-		return true;
-	}
-	*new_size = (size_t)um;
-
-	/* Consume the separator if there is one. */
-	if (*end == '|') {
-		end++;
-	}
-
-	*vlen_left -= end - *slab_size_segment_cur;
-	*slab_size_segment_cur = end;
-
-	return false;
-}
-
 static void
 malloc_conf_format_error(const char *msg, const char *begin, const char *end) {
 	size_t len = end - begin + 1;
@@ -1351,7 +1307,7 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 					size_t size_start;
 					size_t size_end;
 					size_t nshards;
-					bool err = malloc_conf_multi_sizes_next(
+					bool err = multi_setting_parse_next(
 					    &bin_shards_segment_cur, &vlen_left,
 					    &size_start, &size_end, &nshards);
 					if (err || bin_update_shard_size(
@@ -1613,7 +1569,7 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 					size_t slab_start;
 					size_t slab_end;
 					size_t pgs;
-					err = malloc_conf_multi_sizes_next(
+					err = multi_setting_parse_next(
 					    &slab_size_segment_cur,
 					    &vlen_left, &slab_start, &slab_end,
 					    &pgs);
@@ -4140,6 +4096,7 @@ batch_alloc(void **ptrs, size_t num, size_t size, int flags) {
 		tcache_t *tcache = tcache_get_from_ind(tsd, tcache_ind,
 		    /* slow */ true, /* is_alloc */ true);
 		if (likely(tcache != NULL &&
+		    ind < tcache_nbins_get(tcache->tcache_slow) &&
 		    !tcache_bin_disabled(ind, &tcache->bins[ind],
 		    tcache->tcache_slow)) && progress < batch) {
 			if (bin == NULL) {
diff --git a/src/tcache.c b/src/tcache.c
index 3070193c..3fc2cae2 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -63,7 +63,7 @@ unsigned opt_lg_tcache_flush_large_div = 1;
  * is only used to initialize tcache_nbins in the per-thread tcache.
  * Directly modifying it will not affect threads already launched.
  */
-unsigned		global_do_not_change_nbins;
+unsigned		global_do_not_change_tcache_nbins;
 /*
  * Max size class to be cached (can be small or large). This value is only used
  * to initialize tcache_max in the per-thread tcache.   Directly modifying it
@@ -193,8 +193,7 @@ tcache_event(tsd_t *tsd) {
 		goto label_done;
 	}
 
-	tcache_bin_flush_stashed(tsd, tcache, cache_bin, szind,
-	    is_small);
+	tcache_bin_flush_stashed(tsd, tcache, cache_bin, szind, is_small);
 	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
 	    &cache_bin->bin_info);
 	if (low_water > 0) {
@@ -591,6 +590,28 @@ tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	assert(head_content == *cache_bin->stack_head);
 }
 
+bool
+tcache_bin_ncached_max_read(tsd_t *tsd, size_t bin_size,
+    cache_bin_sz_t *ncached_max) {
+	if (bin_size > TCACHE_MAXCLASS_LIMIT) {
+		return true;
+	}
+
+	if (!tcache_available(tsd)) {
+		*ncached_max = 0;
+		return false;
+	}
+
+	tcache_t *tcache = tsd_tcachep_get(tsd);
+	assert(tcache != NULL);
+	szind_t bin_ind = sz_size2index(bin_size);
+
+	cache_bin_t *bin = &tcache->bins[bin_ind];
+	*ncached_max = tcache_bin_disabled(bin_ind, bin, tcache->tcache_slow) ?
+	    0: cache_bin_info_ncached_max_get(bin, &bin->bin_info);
+	return false;
+}
+
 void
 tcache_arena_associate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
     tcache_t *tcache, arena_t *arena) {
@@ -651,8 +672,8 @@ static void
 tcache_default_settings_init(tcache_slow_t *tcache_slow) {
 	assert(tcache_slow != NULL);
 	assert(global_do_not_change_tcache_maxclass != 0);
-	assert(global_do_not_change_nbins != 0);
-	tcache_slow->tcache_nbins = global_do_not_change_nbins;
+	assert(global_do_not_change_tcache_nbins != 0);
+	tcache_slow->tcache_nbins = global_do_not_change_tcache_nbins;
 }
 
 static void
@@ -772,7 +793,7 @@ tcache_ncached_max_compute(szind_t szind) {
 	}
 }
 
-static void
+JET_EXTERN void
 tcache_bin_info_compute(cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
 	/*
 	 * Compute the values for each bin, but for bins with indices larger
@@ -866,7 +887,7 @@ tcache_create_explicit(tsd_t *tsd) {
 	 * the beginning of the whole allocation (for freeing).  The makes sure
 	 * the cache bins have the requested alignment.
 	 */
-	unsigned tcache_nbins = global_do_not_change_nbins;
+	unsigned tcache_nbins = global_do_not_change_tcache_nbins;
 	size_t tcache_size, alignment;
 	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX] = {{0}};
 	tcache_bin_info_compute(tcache_bin_info);
@@ -963,6 +984,52 @@ thread_tcache_max_set(tsd_t *tsd, size_t tcache_max) {
 	assert(tcache_nbins_get(tcache_slow) == sz_size2index(tcache_max) + 1);
 }
 
+bool
+tcache_bins_ncached_max_write(tsd_t *tsd, char *settings, size_t len) {
+	assert(tcache_available(tsd));
+	tcache_t *tcache = tsd_tcachep_get(tsd);
+	assert(tcache != NULL);
+	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX];
+	tcache_bin_settings_backup(tcache, tcache_bin_info);
+	const char *bin_settings_segment_cur = settings;
+	size_t len_left = len;
+	assert(len_left != 0);
+
+	do {
+		size_t size_start, size_end;
+		size_t ncached_max;
+		bool err = multi_setting_parse_next(&bin_settings_segment_cur,
+		    &len_left, &size_start, &size_end, &ncached_max);
+		if (err) {
+			return true;
+		}
+		if (size_end > TCACHE_MAXCLASS_LIMIT) {
+			size_end = TCACHE_MAXCLASS_LIMIT;
+		}
+		if (size_start > TCACHE_MAXCLASS_LIMIT ||
+		    size_start > size_end) {
+			continue;
+		}
+		/* May get called before sz_init (during malloc_conf_init). */
+		szind_t bin_start = sz_size2index_compute(size_start);
+		szind_t bin_end = sz_size2index_compute(size_end);
+		if (ncached_max > CACHE_BIN_NCACHED_MAX) {
+			ncached_max = (size_t)CACHE_BIN_NCACHED_MAX;
+		}
+		for (szind_t i = bin_start; i <= bin_end; i++) {
+			cache_bin_info_init(&tcache_bin_info[i],
+			    (cache_bin_sz_t)ncached_max);
+		}
+	} while (len_left > 0);
+
+	arena_t *assigned_arena = tcache->tcache_slow->arena;
+	tcache_cleanup(tsd);
+	tsd_tcache_data_init_with_bin_settings(tsd, assigned_arena,
+	    tcache_bin_info);
+
+	return false;
+}
+
 static void
 tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
 	tcache_slow_t *tcache_slow = tcache->tcache_slow;
@@ -1180,7 +1247,7 @@ bool
 tcache_boot(tsdn_t *tsdn, base_t *base) {
 	global_do_not_change_tcache_maxclass = sz_s2u(opt_tcache_max);
 	assert(global_do_not_change_tcache_maxclass <= TCACHE_MAXCLASS_LIMIT);
-	global_do_not_change_nbins =
+	global_do_not_change_tcache_nbins =
 	    sz_size2index(global_do_not_change_tcache_maxclass) + 1;
 
 	if (malloc_mutex_init(&tcaches_mtx, "tcaches", WITNESS_RANK_TCACHES,
diff --git a/src/util.c b/src/util.c
new file mode 100644
index 00000000..b73848fb
--- /dev/null
+++ b/src/util.c
@@ -0,0 +1,49 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/util.h"
+
+/* Reads the next size pair in a multi-sized option. */
+bool
+multi_setting_parse_next(const char **setting_segment_cur, size_t *len_left,
+    size_t *key_start, size_t *key_end, size_t *value) {
+	const char *cur = *setting_segment_cur;
+	char *end;
+	uintmax_t um;
+
+	set_errno(0);
+
+	/* First number, then '-' */
+	um = malloc_strtoumax(cur, &end, 0);
+	if (get_errno() != 0 || *end != '-') {
+		return true;
+	}
+	*key_start = (size_t)um;
+	cur = end + 1;
+
+	/* Second number, then ':' */
+	um = malloc_strtoumax(cur, &end, 0);
+	if (get_errno() != 0 || *end != ':') {
+		return true;
+	}
+	*key_end = (size_t)um;
+	cur = end + 1;
+
+	/* Last number */
+	um = malloc_strtoumax(cur, &end, 0);
+	if (get_errno() != 0) {
+		return true;
+	}
+	*value = (size_t)um;
+
+	/* Consume the separator if there is one. */
+	if (*end == '|') {
+		end++;
+	}
+
+	*len_left -= end - *setting_segment_cur;
+	*setting_segment_cur = end;
+
+	return false;
+}
+
diff --git a/test/unit/tcache_max.c b/test/unit/tcache_max.c
index 53752463..5793cb6b 100644
--- a/test/unit/tcache_max.c
+++ b/test/unit/tcache_max.c
@@ -2,6 +2,8 @@
 #include "test/san.h"
 
 const char *malloc_conf = TEST_SAN_UAF_ALIGN_DISABLE;
+extern void tcache_bin_info_compute(
+    cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]);
 
 enum {
 	alloc_option_start = 0,
@@ -260,7 +262,7 @@ tcache_check(void *arg) {
 	expect_zu_eq(old_tcache_max, opt_tcache_max,
 	    "Unexpected default value for tcache_max");
 	tcache_nbins = tcache_nbins_get(tcache_slow);
-	expect_zu_eq(tcache_nbins, (size_t)global_do_not_change_nbins,
+	expect_zu_eq(tcache_nbins, (size_t)global_do_not_change_tcache_nbins,
 	    "Unexpected default value for tcache_nbins");
 	validate_tcache_stack(tcache);
 
@@ -364,10 +366,238 @@ TEST_BEGIN(test_thread_tcache_max) {
 }
 TEST_END
 
+static void
+check_bins_info(cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
+	size_t mib_get[4], mib_get_len;
+	mib_get_len = sizeof(mib_get) / sizeof(size_t);
+	const char *get_name = "thread.tcache.ncached_max.read_sizeclass";
+	size_t ncached_max;
+	size_t sz = sizeof(size_t);
+	expect_d_eq(mallctlnametomib(get_name, mib_get, &mib_get_len), 0,
+	    "Unexpected mallctlnametomib() failure");
+
+	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
+		size_t bin_size = sz_index2size(i);
+		expect_d_eq(mallctlbymib(mib_get, mib_get_len,
+		    (void *)&ncached_max, &sz,
+		    (void *)&bin_size, sizeof(size_t)), 0,
+		    "Unexpected mallctlbymib() failure");
+		expect_zu_eq(ncached_max, tcache_bin_info[i].ncached_max,
+		    "Unexpected ncached_max for bin %d", i);
+		/* Check ncached_max returned under a non-bin size. */
+		bin_size--;
+		size_t temp_ncached_max = 0;
+		expect_d_eq(mallctlbymib(mib_get, mib_get_len,
+		    (void *)&temp_ncached_max, &sz,
+		    (void *)&bin_size, sizeof(size_t)), 0,
+		    "Unexpected mallctlbymib() failure");
+		expect_zu_eq(temp_ncached_max, ncached_max,
+		    "Unexpected ncached_max for inaccurate bin size.");
+	}
+}
+
+static void *
+ncached_max_check(void* args) {
+	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX];
+	cache_bin_info_t tcache_bin_info_backup[TCACHE_NBINS_MAX];
+	tsd_t *tsd = tsd_fetch();
+	tcache_t *tcache = tsd_tcachep_get(tsd);
+	assert(tcache != NULL);
+	tcache_slow_t *tcache_slow = tcache->tcache_slow;
+
+	/* Check the initial bin settings. */
+	tcache_bin_info_compute(tcache_bin_info);
+	memcpy(tcache_bin_info_backup, tcache_bin_info,
+	    sizeof(tcache_bin_info));
+	unsigned nbins = tcache_nbins_get(tcache_slow);
+	for (szind_t i = nbins; i < TCACHE_NBINS_MAX; i++) {
+		cache_bin_info_init(&tcache_bin_info[i], 0);
+	}
+	check_bins_info(tcache_bin_info);
+
+	size_t mib_set[4], mib_set_len;
+	mib_set_len = sizeof(mib_set) / sizeof(size_t);
+	const char *set_name = "thread.tcache.ncached_max.write";
+	expect_d_eq(mallctlnametomib(set_name, mib_set, &mib_set_len), 0,
+	    "Unexpected mallctlnametomib() failure");
+
+	/* Test the ncached_max set with tcache on. */
+	char inputs[100] = "8-128:1|160-160:11|170-320:22|224-8388609:0";
+	char *inputp = inputs;
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	    (void *)&inputp, sizeof(char *)), 0,
+	    "Unexpected mallctlbymib() failure");
+	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
+		if (i >= sz_size2index(8) &&i <= sz_size2index(128)) {
+			cache_bin_info_init(&tcache_bin_info[i], 1);
+		}
+		if (i == sz_size2index(160)) {
+			cache_bin_info_init(&tcache_bin_info[i], 11);
+		}
+		if (i >= sz_size2index(170) && i <= sz_size2index(320)) {
+			cache_bin_info_init(&tcache_bin_info[i], 22);
+		}
+		if (i >= sz_size2index(224)) {
+			cache_bin_info_init(&tcache_bin_info[i], 0);
+		}
+		if (i >= nbins) {
+			cache_bin_info_init(&tcache_bin_info[i], 0);
+		}
+	}
+	check_bins_info(tcache_bin_info);
+
+	/*
+	 * Close the tcache and set ncached_max of some bins.  It will be
+	 * set properly but thread.tcache.ncached_max.read still returns 0
+	 * since the bin is not available yet.  After enabling the tcache,
+	 * the new setting will not be carried on.  Instead, the default
+	 * settings will be applied.
+	 */
+	bool e0 = false, e1;
+	size_t bool_sz = sizeof(bool);
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e1, &bool_sz,
+	    (void *)&e0, bool_sz), 0, "Unexpected mallctl() error");
+	expect_true(e1, "Unexpected previous tcache state");
+	strcpy(inputs, "0-112:8");
+	/* Setting returns ENOENT when the tcache is disabled. */
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	    (void *)&inputp, sizeof(char *)), ENOENT,
+	    "Unexpected mallctlbymib() failure");
+	/* All ncached_max should return 0 once tcache is disabled. */
+	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
+		cache_bin_info_init(&tcache_bin_info[i], 0);
+	}
+	check_bins_info(tcache_bin_info);
+
+	e0 = true;
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e1, &bool_sz,
+	    (void *)&e0, bool_sz), 0, "Unexpected mallctl() error");
+	expect_false(e1, "Unexpected previous tcache state");
+	memcpy(tcache_bin_info, tcache_bin_info_backup,
+	    sizeof(tcache_bin_info_backup));
+	for (szind_t i = tcache_nbins_get(tcache_slow); i < TCACHE_NBINS_MAX;
+	    i++) {
+		cache_bin_info_init(&tcache_bin_info[i], 0);
+	}
+	check_bins_info(tcache_bin_info);
+
+	/*
+	 * Set ncached_max of bins not enabled yet.  Then, enable them by
+	 * resetting tcache_max.  The ncached_max changes should stay.
+	 */
+	size_t tcache_max = 1024;
+	assert_d_eq(mallctl("thread.tcache.max",
+	    NULL, NULL, (void *)&tcache_max, sizeof(size_t)),.0,
+	    "Unexpected.mallctl().failure");
+	for (szind_t i = sz_size2index(1024) + 1; i < TCACHE_NBINS_MAX; i++) {
+		cache_bin_info_init(&tcache_bin_info[i], 0);
+	}
+	strcpy(inputs, "2048-6144:123");
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	    (void *)&inputp, sizeof(char *)), 0,
+	    "Unexpected mallctlbymib() failure");
+	check_bins_info(tcache_bin_info);
+
+	tcache_max = 6144;
+	assert_d_eq(mallctl("thread.tcache.max",
+	    NULL, NULL, (void *)&tcache_max, sizeof(size_t)),.0,
+	    "Unexpected.mallctl().failure");
+	memcpy(tcache_bin_info, tcache_bin_info_backup,
+	    sizeof(tcache_bin_info_backup));
+	for (szind_t i = sz_size2index(2048); i < TCACHE_NBINS_MAX; i++) {
+		if (i <= sz_size2index(6144)) {
+			cache_bin_info_init(&tcache_bin_info[i], 123);
+		} else if (i > sz_size2index(6144)) {
+			cache_bin_info_init(&tcache_bin_info[i], 0);
+		}
+	}
+	check_bins_info(tcache_bin_info);
+
+	/* Test an empty input, it should do nothing. */
+	strcpy(inputs, "");
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	    (void *)&inputp, sizeof(char *)), 0,
+	    "Unexpected mallctlbymib() failure");
+	check_bins_info(tcache_bin_info);
+
+	/* Test a half-done string, it should return EINVAL and do nothing. */
+	strcpy(inputs, "4-1024:7|256-1024");
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	    (void *)&inputp, sizeof(char *)), EINVAL,
+	    "Unexpected mallctlbymib() failure");
+	check_bins_info(tcache_bin_info);
+
+	/*
+	 * Test an invalid string with start size larger than end size.  It
+	 * should return success but do nothing.
+	 */
+	strcpy(inputs, "1024-256:7");
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	    (void *)&inputp, sizeof(char *)), 0,
+	    "Unexpected mallctlbymib() failure");
+	check_bins_info(tcache_bin_info);
+
+	/*
+	 * Test a string exceeding the length limit, it should return EINVAL
+	 * and do nothing.
+	 */
+	char *long_inputs = (char *)malloc(10000 * sizeof(char));
+	expect_true(long_inputs != NULL, "Unexpected allocation failure.");
+	for (int i = 0; i < 200; i++) {
+		memcpy(long_inputs + i * 9, "4-1024:3|", 9);
+	}
+	memcpy(long_inputs + 200 * 9, "4-1024:3", 8);
+	long_inputs[200 * 9 + 8] = '\0';
+	inputp = long_inputs;
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	    (void *)&inputp, sizeof(char *)), EINVAL,
+	    "Unexpected mallctlbymib() failure");
+	check_bins_info(tcache_bin_info);
+	free(long_inputs);
+
+	/*
+	 * Test a string with invalid characters, it should return EINVAL
+	 * and do nothing.
+	 */
+	strcpy(inputs, "k8-1024:77p");
+	inputp = inputs;
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	    (void *)&inputp, sizeof(char *)), EINVAL,
+	    "Unexpected mallctlbymib() failure");
+	check_bins_info(tcache_bin_info);
+
+	/* Test large ncached_max, it should return success but capped. */
+	strcpy(inputs, "1024-1024:65540");
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	    (void *)&inputp, sizeof(char *)), 0,
+	    "Unexpected mallctlbymib() failure");
+	cache_bin_info_init(&tcache_bin_info[sz_size2index(1024)],
+	    CACHE_BIN_NCACHED_MAX);
+	check_bins_info(tcache_bin_info);
+
+	return NULL;
+}
+
+TEST_BEGIN(test_ncached_max) {
+	test_skip_if(!config_stats);
+	test_skip_if(!opt_tcache);
+	test_skip_if(san_uaf_detection_enabled());
+	unsigned nthreads = 8;
+	VARIABLE_ARRAY(thd_t, threads, nthreads);
+	for (unsigned i = 0; i < nthreads; i++) {
+		thd_create(&threads[i], ncached_max_check, NULL);
+	}
+	for (unsigned i = 0; i < nthreads; i++) {
+		thd_join(threads[i], NULL);
+	}
+}
+TEST_END
+
 int
 main(void) {
 	return test(
 	    test_tcache_max,
-	    test_thread_tcache_max);
+	    test_thread_tcache_max,
+	    test_ncached_max);
 }
 

From 867eedfc589039257deafe7492afa7aa9ab6169f Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@fb.com>
Date: Mon, 16 Oct 2023 15:31:13 -0700
Subject: [PATCH 2352/2608] Fix the bug in dalloc promoted allocations.

An allocation small enough will be promoted so that it does not
share an extent with others.  However, when dalloc, such allocations
may not be dalloc as a promoted one if nbins < SC_NBINS.  This
commit fixes the bug.
---
 include/jemalloc/internal/arena_inlines_b.h | 29 +++++++++++----------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index f8928a01..a891b35c 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -301,23 +301,24 @@ JEMALLOC_ALWAYS_INLINE void
 arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
     bool slow_path) {
 	assert (!tsdn_null(tsdn) && tcache != NULL);
-	if (szind < TCACHE_NBINS_MAX &&
-	    !tcache_bin_disabled(szind, &tcache->bins[szind],
-	    tcache->tcache_slow)) {
-		if (config_prof && unlikely(szind < SC_NBINS)) {
-			arena_dalloc_promoted(tsdn, ptr, tcache, slow_path);
-		} else {
+	bool is_sample_promoted = config_prof && szind < SC_NBINS;
+	if (unlikely(is_sample_promoted)) {
+		arena_dalloc_promoted(tsdn, ptr, tcache, slow_path);
+	} else {
+		if (szind < tcache_nbins_get(tcache->tcache_slow) &&
+		    !tcache_bin_disabled(szind, &tcache->bins[szind],
+		    tcache->tcache_slow)) {
 			tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr, szind,
 			    slow_path);
+		} else {
+			edata_t *edata = emap_edata_lookup(tsdn,
+			    &arena_emap_global, ptr);
+			if (large_dalloc_safety_checks(edata, ptr, szind)) {
+				/* See the comment in isfree. */
+				return;
+			}
+			large_dalloc(tsdn, edata);
 		}
-	} else {
-		edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
-		    ptr);
-		if (large_dalloc_safety_checks(edata, ptr, szind)) {
-			/* See the comment in isfree. */
-			return;
-		}
-		large_dalloc(tsdn, edata);
 	}
 }
 

From 8a22d10b834cb66cce3e62dfc7606d8a491fe50b Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@fb.com>
Date: Wed, 11 Oct 2023 00:30:52 -0700
Subject: [PATCH 2353/2608] Allow setting default ncached_max for each bin
 through malloc_conf

---
 Makefile.in                                |   1 +
 include/jemalloc/internal/tcache_externs.h |   8 +
 src/jemalloc.c                             |  12 +
 src/tcache.c                               |  47 +++-
 test/unit/ncached_max.c                    | 264 +++++++++++++++++++++
 test/unit/tcache_max.c                     | 230 +-----------------
 6 files changed, 321 insertions(+), 241 deletions(-)
 create mode 100644 test/unit/ncached_max.c

diff --git a/Makefile.in b/Makefile.in
index 594ea4f2..df244adb 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -242,6 +242,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/mq.c \
 	$(srcroot)test/unit/mtx.c \
 	$(srcroot)test/unit/nstime.c \
+	$(srcroot)test/unit/ncached_max.c \
 	$(srcroot)test/unit/oversize_threshold.c \
 	$(srcroot)test/unit/pa.c \
 	$(srcroot)test/unit/pack.c \
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index aa7ca00f..973dbfe9 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -35,6 +35,11 @@ extern unsigned	global_do_not_change_tcache_nbins;
  */
 extern size_t	global_do_not_change_tcache_maxclass;
 
+/* Default bin info for each bin. */
+extern cache_bin_info_t opt_tcache_ncached_max[TCACHE_NBINS_MAX];
+/* Records whether a bin's info is specified by malloc_conf. */
+extern bool opt_tcache_ncached_max_set[TCACHE_NBINS_MAX];
+
 /*
  * Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and
  * usable via the MALLOCX_TCACHE() flag.  The automatic per thread tcaches are
@@ -55,6 +60,9 @@ void tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache,
     cache_bin_t *cache_bin, szind_t binind, unsigned rem);
 void tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache,
     cache_bin_t *cache_bin, szind_t binind, bool is_small);
+bool tcache_bin_info_settings_parse(const char *bin_settings_segment_cur,
+    size_t len_left, cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX],
+    bool bin_info_is_set[TCACHE_NBINS_MAX]);
 bool tcache_bins_ncached_max_write(tsd_t *tsd, char *settings, size_t len);
 bool tcache_bin_ncached_max_read(tsd_t *tsd, size_t bin_size,
     cache_bin_sz_t *ncached_max);
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 9c4e578e..c77f2ef2 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1322,6 +1322,18 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				} while (vlen_left > 0);
 				CONF_CONTINUE;
 			}
+			if (CONF_MATCH("tcache_ncached_max")) {
+				bool err = tcache_bin_info_settings_parse(
+				    v, vlen, opt_tcache_ncached_max,
+				    opt_tcache_ncached_max_set);
+				if (err) {
+					CONF_ERROR("Invalid settings for "
+					    "tcache_ncached_max", k, klen, v,
+					    vlen);
+					break;
+				}
+				CONF_CONTINUE;
+			}
 			CONF_HANDLE_INT64_T(opt_mutex_max_spin,
 			    "mutex_max_spin", -1, INT64_MAX, CONF_CHECK_MIN,
 			    CONF_DONT_CHECK_MAX, false);
diff --git a/src/tcache.c b/src/tcache.c
index 3fc2cae2..a8eaf296 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -71,6 +71,15 @@ unsigned		global_do_not_change_tcache_nbins;
  */
 size_t			global_do_not_change_tcache_maxclass;
 
+/* Default bin info for each bin.  Will be initialized when thread starts. */
+cache_bin_info_t opt_tcache_ncached_max[TCACHE_NBINS_MAX] = {{0}};
+/*
+ * Marks whether a bin's info is set already.  This is used in
+ * tcache_bin_info_compute to avoid overwriting ncached_max specified by
+ * malloc_conf.
+ */
+bool opt_tcache_ncached_max_set[TCACHE_NBINS_MAX] = {0};
+
 tcaches_t		*tcaches;
 
 /* Index of first element within tcaches that has never been used. */
@@ -800,7 +809,9 @@ tcache_bin_info_compute(cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
 	 * than tcache_nbins, no items will be cached.
 	 */
 	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
-		unsigned ncached_max = tcache_ncached_max_compute(i);
+		unsigned ncached_max = opt_tcache_ncached_max_set[i] ?
+		    opt_tcache_ncached_max[i].ncached_max:
+		    tcache_ncached_max_compute(i);
 		assert(ncached_max <= CACHE_BIN_NCACHED_MAX);
 		cache_bin_info_init(&tcache_bin_info[i], ncached_max);
 	}
@@ -984,17 +995,9 @@ thread_tcache_max_set(tsd_t *tsd, size_t tcache_max) {
 	assert(tcache_nbins_get(tcache_slow) == sz_size2index(tcache_max) + 1);
 }
 
-bool
-tcache_bins_ncached_max_write(tsd_t *tsd, char *settings, size_t len) {
-	assert(tcache_available(tsd));
-	tcache_t *tcache = tsd_tcachep_get(tsd);
-	assert(tcache != NULL);
-	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX];
-	tcache_bin_settings_backup(tcache, tcache_bin_info);
-	const char *bin_settings_segment_cur = settings;
-	size_t len_left = len;
-	assert(len_left != 0);
-
+bool tcache_bin_info_settings_parse(const char *bin_settings_segment_cur,
+    size_t len_left, cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX],
+    bool bin_info_is_set[TCACHE_NBINS_MAX]) {
 	do {
 		size_t size_start, size_end;
 		size_t ncached_max;
@@ -1019,9 +1022,29 @@ tcache_bins_ncached_max_write(tsd_t *tsd, char *settings, size_t len) {
 		for (szind_t i = bin_start; i <= bin_end; i++) {
 			cache_bin_info_init(&tcache_bin_info[i],
 			    (cache_bin_sz_t)ncached_max);
+			if (bin_info_is_set != NULL) {
+				bin_info_is_set[i] = true;
+			}
 		}
 	} while (len_left > 0);
 
+	return false;
+}
+
+bool
+tcache_bins_ncached_max_write(tsd_t *tsd, char *settings, size_t len) {
+	assert(tcache_available(tsd));
+	assert(len != 0);
+	tcache_t *tcache = tsd_tcachep_get(tsd);
+	assert(tcache != NULL);
+	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX];
+	tcache_bin_settings_backup(tcache, tcache_bin_info);
+
+	if(tcache_bin_info_settings_parse(settings, len, tcache_bin_info,
+	    NULL)) {
+		return true;
+	}
+
 	arena_t *assigned_arena = tcache->tcache_slow->arena;
 	tcache_cleanup(tsd);
 	tsd_tcache_data_init_with_bin_settings(tsd, assigned_arena,
diff --git a/test/unit/ncached_max.c b/test/unit/ncached_max.c
new file mode 100644
index 00000000..da35d7c9
--- /dev/null
+++ b/test/unit/ncached_max.c
@@ -0,0 +1,264 @@
+#include "test/jemalloc_test.h"
+#include "test/san.h"
+
+const char *malloc_conf =
+"tcache_ncached_max:256-1024:1001|2048-2048:0,tcache_max:4096";
+extern void tcache_bin_info_compute(
+    cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]);
+
+static void
+check_bins_info(cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
+	size_t mib_get[4], mib_get_len;
+	mib_get_len = sizeof(mib_get) / sizeof(size_t);
+	const char *get_name = "thread.tcache.ncached_max.read_sizeclass";
+	size_t ncached_max;
+	size_t sz = sizeof(size_t);
+	expect_d_eq(mallctlnametomib(get_name, mib_get, &mib_get_len), 0,
+	    "Unexpected mallctlnametomib() failure");
+
+	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
+		size_t bin_size = sz_index2size(i);
+		expect_d_eq(mallctlbymib(mib_get, mib_get_len,
+		    (void *)&ncached_max, &sz,
+		    (void *)&bin_size, sizeof(size_t)), 0,
+		    "Unexpected mallctlbymib() failure");
+		expect_zu_eq(ncached_max, tcache_bin_info[i].ncached_max,
+		    "Unexpected ncached_max for bin %d", i);
+		/* Check ncached_max returned under a non-bin size. */
+		bin_size--;
+		size_t temp_ncached_max = 0;
+		expect_d_eq(mallctlbymib(mib_get, mib_get_len,
+		    (void *)&temp_ncached_max, &sz,
+		    (void *)&bin_size, sizeof(size_t)), 0,
+		    "Unexpected mallctlbymib() failure");
+		expect_zu_eq(temp_ncached_max, ncached_max,
+		    "Unexpected ncached_max for inaccurate bin size.");
+	}
+}
+
+static void *
+ncached_max_check(void* args) {
+	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX];
+	cache_bin_info_t tcache_bin_info_backup[TCACHE_NBINS_MAX];
+	tsd_t *tsd = tsd_fetch();
+	tcache_t *tcache = tsd_tcachep_get(tsd);
+	assert(tcache != NULL);
+	tcache_slow_t *tcache_slow = tcache->tcache_slow;
+
+
+	tcache_bin_info_compute(tcache_bin_info);
+	memcpy(tcache_bin_info_backup, tcache_bin_info,
+	    sizeof(tcache_bin_info));
+	/* Check ncached_max set by malloc_conf. */
+	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
+		bool first_range = (i >= sz_size2index(256) &&
+		    i <= sz_size2index(1024));
+		bool second_range = (i == sz_size2index(2048));
+		cache_bin_sz_t target_ncached_max = 0;
+		if (first_range || second_range) {
+			target_ncached_max = first_range ? 1001: 0;
+			expect_true(opt_tcache_ncached_max_set[i],
+			    "Unexpected state for bin %u", i);
+			expect_zu_eq(target_ncached_max,
+			    tcache_bin_info[i].ncached_max,
+			    "Unexpected generated ncached_max for bin %u", i);
+		} else {
+			expect_false(opt_tcache_ncached_max_set[i],
+			    "Unexpected state for bin %u", i);
+		}
+		expect_zu_eq(target_ncached_max,
+		    opt_tcache_ncached_max[i].ncached_max,
+		    "Unexpected pre-set ncached_max for bin %u", i);
+	}
+	unsigned nbins = tcache_nbins_get(tcache_slow);
+	for (szind_t i = nbins; i < TCACHE_NBINS_MAX; i++) {
+		cache_bin_info_init(&tcache_bin_info[i], 0);
+	}
+	/* Check the initial bin settings. */
+	check_bins_info(tcache_bin_info);
+
+	size_t mib_set[4], mib_set_len;
+	mib_set_len = sizeof(mib_set) / sizeof(size_t);
+	const char *set_name = "thread.tcache.ncached_max.write";
+	expect_d_eq(mallctlnametomib(set_name, mib_set, &mib_set_len), 0,
+	    "Unexpected mallctlnametomib() failure");
+
+	/* Test the ncached_max set with tcache on. */
+	char inputs[100] = "8-128:1|160-160:11|170-320:22|224-8388609:0";
+	char *inputp = inputs;
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	    (void *)&inputp, sizeof(char *)), 0,
+	    "Unexpected mallctlbymib() failure");
+	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
+		if (i >= sz_size2index(8) &&i <= sz_size2index(128)) {
+			cache_bin_info_init(&tcache_bin_info[i], 1);
+		}
+		if (i == sz_size2index(160)) {
+			cache_bin_info_init(&tcache_bin_info[i], 11);
+		}
+		if (i >= sz_size2index(170) && i <= sz_size2index(320)) {
+			cache_bin_info_init(&tcache_bin_info[i], 22);
+		}
+		if (i >= sz_size2index(224)) {
+			cache_bin_info_init(&tcache_bin_info[i], 0);
+		}
+		if (i >= nbins) {
+			cache_bin_info_init(&tcache_bin_info[i], 0);
+		}
+	}
+	check_bins_info(tcache_bin_info);
+
+	/*
+	 * Close the tcache and set ncached_max of some bins.  It will be
+	 * set properly but thread.tcache.ncached_max.read still returns 0
+	 * since the bin is not available yet.  After enabling the tcache,
+	 * the new setting will not be carried on.  Instead, the default
+	 * settings will be applied.
+	 */
+	bool e0 = false, e1;
+	size_t bool_sz = sizeof(bool);
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e1, &bool_sz,
+	    (void *)&e0, bool_sz), 0, "Unexpected mallctl() error");
+	expect_true(e1, "Unexpected previous tcache state");
+	strcpy(inputs, "0-112:8");
+	/* Setting returns ENOENT when the tcache is disabled. */
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	    (void *)&inputp, sizeof(char *)), ENOENT,
+	    "Unexpected mallctlbymib() failure");
+	/* All ncached_max should return 0 once tcache is disabled. */
+	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
+		cache_bin_info_init(&tcache_bin_info[i], 0);
+	}
+	check_bins_info(tcache_bin_info);
+
+	e0 = true;
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e1, &bool_sz,
+	    (void *)&e0, bool_sz), 0, "Unexpected mallctl() error");
+	expect_false(e1, "Unexpected previous tcache state");
+	memcpy(tcache_bin_info, tcache_bin_info_backup,
+	    sizeof(tcache_bin_info_backup));
+	for (szind_t i = tcache_nbins_get(tcache_slow); i < TCACHE_NBINS_MAX;
+	    i++) {
+		cache_bin_info_init(&tcache_bin_info[i], 0);
+	}
+	check_bins_info(tcache_bin_info);
+
+	/*
+	 * Set ncached_max of bins not enabled yet.  Then, enable them by
+	 * resetting tcache_max.  The ncached_max changes should stay.
+	 */
+	size_t tcache_max = 1024;
+	assert_d_eq(mallctl("thread.tcache.max",
+	    NULL, NULL, (void *)&tcache_max, sizeof(size_t)),.0,
+	    "Unexpected.mallctl().failure");
+	for (szind_t i = sz_size2index(1024) + 1; i < TCACHE_NBINS_MAX; i++) {
+		cache_bin_info_init(&tcache_bin_info[i], 0);
+	}
+	strcpy(inputs, "2048-6144:123");
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	    (void *)&inputp, sizeof(char *)), 0,
+	    "Unexpected mallctlbymib() failure");
+	check_bins_info(tcache_bin_info);
+
+	tcache_max = 6144;
+	assert_d_eq(mallctl("thread.tcache.max",
+	    NULL, NULL, (void *)&tcache_max, sizeof(size_t)),.0,
+	    "Unexpected.mallctl().failure");
+	memcpy(tcache_bin_info, tcache_bin_info_backup,
+	    sizeof(tcache_bin_info_backup));
+	for (szind_t i = sz_size2index(2048); i < TCACHE_NBINS_MAX; i++) {
+		if (i <= sz_size2index(6144)) {
+			cache_bin_info_init(&tcache_bin_info[i], 123);
+		} else if (i > sz_size2index(6144)) {
+			cache_bin_info_init(&tcache_bin_info[i], 0);
+		}
+	}
+	check_bins_info(tcache_bin_info);
+
+	/* Test an empty input, it should do nothing. */
+	strcpy(inputs, "");
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	    (void *)&inputp, sizeof(char *)), 0,
+	    "Unexpected mallctlbymib() failure");
+	check_bins_info(tcache_bin_info);
+
+	/* Test a half-done string, it should return EINVAL and do nothing. */
+	strcpy(inputs, "4-1024:7|256-1024");
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	    (void *)&inputp, sizeof(char *)), EINVAL,
+	    "Unexpected mallctlbymib() failure");
+	check_bins_info(tcache_bin_info);
+
+	/*
+	 * Test an invalid string with start size larger than end size.  It
+	 * should return success but do nothing.
+	 */
+	strcpy(inputs, "1024-256:7");
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	    (void *)&inputp, sizeof(char *)), 0,
+	    "Unexpected mallctlbymib() failure");
+	check_bins_info(tcache_bin_info);
+
+	/*
+	 * Test a string exceeding the length limit, it should return EINVAL
+	 * and do nothing.
+	 */
+	char *long_inputs = (char *)malloc(10000 * sizeof(char));
+	expect_true(long_inputs != NULL, "Unexpected allocation failure.");
+	for (int i = 0; i < 200; i++) {
+		memcpy(long_inputs + i * 9, "4-1024:3|", 9);
+	}
+	memcpy(long_inputs + 200 * 9, "4-1024:3", 8);
+	long_inputs[200 * 9 + 8] = '\0';
+	inputp = long_inputs;
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	    (void *)&inputp, sizeof(char *)), EINVAL,
+	    "Unexpected mallctlbymib() failure");
+	check_bins_info(tcache_bin_info);
+	free(long_inputs);
+
+	/*
+	 * Test a string with invalid characters, it should return EINVAL
+	 * and do nothing.
+	 */
+	strcpy(inputs, "k8-1024:77p");
+	inputp = inputs;
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	    (void *)&inputp, sizeof(char *)), EINVAL,
+	    "Unexpected mallctlbymib() failure");
+	check_bins_info(tcache_bin_info);
+
+	/* Test large ncached_max, it should return success but capped. */
+	strcpy(inputs, "1024-1024:65540");
+	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
+	    (void *)&inputp, sizeof(char *)), 0,
+	    "Unexpected mallctlbymib() failure");
+	cache_bin_info_init(&tcache_bin_info[sz_size2index(1024)],
+	    CACHE_BIN_NCACHED_MAX);
+	check_bins_info(tcache_bin_info);
+
+	return NULL;
+}
+
+TEST_BEGIN(test_ncached_max) {
+	test_skip_if(!config_stats);
+	test_skip_if(!opt_tcache);
+	test_skip_if(san_uaf_detection_enabled());
+	/* TODO: change nthreads to 8 to reduce CI loads. */
+	unsigned nthreads = 108;
+	VARIABLE_ARRAY(thd_t, threads, nthreads);
+	for (unsigned i = 0; i < nthreads; i++) {
+		thd_create(&threads[i], ncached_max_check, NULL);
+	}
+	for (unsigned i = 0; i < nthreads; i++) {
+		thd_join(threads[i], NULL);
+	}
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_ncached_max);
+}
+
diff --git a/test/unit/tcache_max.c b/test/unit/tcache_max.c
index 5793cb6b..32eacadf 100644
--- a/test/unit/tcache_max.c
+++ b/test/unit/tcache_max.c
@@ -366,238 +366,10 @@ TEST_BEGIN(test_thread_tcache_max) {
 }
 TEST_END
 
-static void
-check_bins_info(cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
-	size_t mib_get[4], mib_get_len;
-	mib_get_len = sizeof(mib_get) / sizeof(size_t);
-	const char *get_name = "thread.tcache.ncached_max.read_sizeclass";
-	size_t ncached_max;
-	size_t sz = sizeof(size_t);
-	expect_d_eq(mallctlnametomib(get_name, mib_get, &mib_get_len), 0,
-	    "Unexpected mallctlnametomib() failure");
-
-	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
-		size_t bin_size = sz_index2size(i);
-		expect_d_eq(mallctlbymib(mib_get, mib_get_len,
-		    (void *)&ncached_max, &sz,
-		    (void *)&bin_size, sizeof(size_t)), 0,
-		    "Unexpected mallctlbymib() failure");
-		expect_zu_eq(ncached_max, tcache_bin_info[i].ncached_max,
-		    "Unexpected ncached_max for bin %d", i);
-		/* Check ncached_max returned under a non-bin size. */
-		bin_size--;
-		size_t temp_ncached_max = 0;
-		expect_d_eq(mallctlbymib(mib_get, mib_get_len,
-		    (void *)&temp_ncached_max, &sz,
-		    (void *)&bin_size, sizeof(size_t)), 0,
-		    "Unexpected mallctlbymib() failure");
-		expect_zu_eq(temp_ncached_max, ncached_max,
-		    "Unexpected ncached_max for inaccurate bin size.");
-	}
-}
-
-static void *
-ncached_max_check(void* args) {
-	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX];
-	cache_bin_info_t tcache_bin_info_backup[TCACHE_NBINS_MAX];
-	tsd_t *tsd = tsd_fetch();
-	tcache_t *tcache = tsd_tcachep_get(tsd);
-	assert(tcache != NULL);
-	tcache_slow_t *tcache_slow = tcache->tcache_slow;
-
-	/* Check the initial bin settings. */
-	tcache_bin_info_compute(tcache_bin_info);
-	memcpy(tcache_bin_info_backup, tcache_bin_info,
-	    sizeof(tcache_bin_info));
-	unsigned nbins = tcache_nbins_get(tcache_slow);
-	for (szind_t i = nbins; i < TCACHE_NBINS_MAX; i++) {
-		cache_bin_info_init(&tcache_bin_info[i], 0);
-	}
-	check_bins_info(tcache_bin_info);
-
-	size_t mib_set[4], mib_set_len;
-	mib_set_len = sizeof(mib_set) / sizeof(size_t);
-	const char *set_name = "thread.tcache.ncached_max.write";
-	expect_d_eq(mallctlnametomib(set_name, mib_set, &mib_set_len), 0,
-	    "Unexpected mallctlnametomib() failure");
-
-	/* Test the ncached_max set with tcache on. */
-	char inputs[100] = "8-128:1|160-160:11|170-320:22|224-8388609:0";
-	char *inputp = inputs;
-	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
-	    (void *)&inputp, sizeof(char *)), 0,
-	    "Unexpected mallctlbymib() failure");
-	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
-		if (i >= sz_size2index(8) &&i <= sz_size2index(128)) {
-			cache_bin_info_init(&tcache_bin_info[i], 1);
-		}
-		if (i == sz_size2index(160)) {
-			cache_bin_info_init(&tcache_bin_info[i], 11);
-		}
-		if (i >= sz_size2index(170) && i <= sz_size2index(320)) {
-			cache_bin_info_init(&tcache_bin_info[i], 22);
-		}
-		if (i >= sz_size2index(224)) {
-			cache_bin_info_init(&tcache_bin_info[i], 0);
-		}
-		if (i >= nbins) {
-			cache_bin_info_init(&tcache_bin_info[i], 0);
-		}
-	}
-	check_bins_info(tcache_bin_info);
-
-	/*
-	 * Close the tcache and set ncached_max of some bins.  It will be
-	 * set properly but thread.tcache.ncached_max.read still returns 0
-	 * since the bin is not available yet.  After enabling the tcache,
-	 * the new setting will not be carried on.  Instead, the default
-	 * settings will be applied.
-	 */
-	bool e0 = false, e1;
-	size_t bool_sz = sizeof(bool);
-	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e1, &bool_sz,
-	    (void *)&e0, bool_sz), 0, "Unexpected mallctl() error");
-	expect_true(e1, "Unexpected previous tcache state");
-	strcpy(inputs, "0-112:8");
-	/* Setting returns ENOENT when the tcache is disabled. */
-	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
-	    (void *)&inputp, sizeof(char *)), ENOENT,
-	    "Unexpected mallctlbymib() failure");
-	/* All ncached_max should return 0 once tcache is disabled. */
-	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
-		cache_bin_info_init(&tcache_bin_info[i], 0);
-	}
-	check_bins_info(tcache_bin_info);
-
-	e0 = true;
-	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e1, &bool_sz,
-	    (void *)&e0, bool_sz), 0, "Unexpected mallctl() error");
-	expect_false(e1, "Unexpected previous tcache state");
-	memcpy(tcache_bin_info, tcache_bin_info_backup,
-	    sizeof(tcache_bin_info_backup));
-	for (szind_t i = tcache_nbins_get(tcache_slow); i < TCACHE_NBINS_MAX;
-	    i++) {
-		cache_bin_info_init(&tcache_bin_info[i], 0);
-	}
-	check_bins_info(tcache_bin_info);
-
-	/*
-	 * Set ncached_max of bins not enabled yet.  Then, enable them by
-	 * resetting tcache_max.  The ncached_max changes should stay.
-	 */
-	size_t tcache_max = 1024;
-	assert_d_eq(mallctl("thread.tcache.max",
-	    NULL, NULL, (void *)&tcache_max, sizeof(size_t)),.0,
-	    "Unexpected.mallctl().failure");
-	for (szind_t i = sz_size2index(1024) + 1; i < TCACHE_NBINS_MAX; i++) {
-		cache_bin_info_init(&tcache_bin_info[i], 0);
-	}
-	strcpy(inputs, "2048-6144:123");
-	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
-	    (void *)&inputp, sizeof(char *)), 0,
-	    "Unexpected mallctlbymib() failure");
-	check_bins_info(tcache_bin_info);
-
-	tcache_max = 6144;
-	assert_d_eq(mallctl("thread.tcache.max",
-	    NULL, NULL, (void *)&tcache_max, sizeof(size_t)),.0,
-	    "Unexpected.mallctl().failure");
-	memcpy(tcache_bin_info, tcache_bin_info_backup,
-	    sizeof(tcache_bin_info_backup));
-	for (szind_t i = sz_size2index(2048); i < TCACHE_NBINS_MAX; i++) {
-		if (i <= sz_size2index(6144)) {
-			cache_bin_info_init(&tcache_bin_info[i], 123);
-		} else if (i > sz_size2index(6144)) {
-			cache_bin_info_init(&tcache_bin_info[i], 0);
-		}
-	}
-	check_bins_info(tcache_bin_info);
-
-	/* Test an empty input, it should do nothing. */
-	strcpy(inputs, "");
-	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
-	    (void *)&inputp, sizeof(char *)), 0,
-	    "Unexpected mallctlbymib() failure");
-	check_bins_info(tcache_bin_info);
-
-	/* Test a half-done string, it should return EINVAL and do nothing. */
-	strcpy(inputs, "4-1024:7|256-1024");
-	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
-	    (void *)&inputp, sizeof(char *)), EINVAL,
-	    "Unexpected mallctlbymib() failure");
-	check_bins_info(tcache_bin_info);
-
-	/*
-	 * Test an invalid string with start size larger than end size.  It
-	 * should return success but do nothing.
-	 */
-	strcpy(inputs, "1024-256:7");
-	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
-	    (void *)&inputp, sizeof(char *)), 0,
-	    "Unexpected mallctlbymib() failure");
-	check_bins_info(tcache_bin_info);
-
-	/*
-	 * Test a string exceeding the length limit, it should return EINVAL
-	 * and do nothing.
-	 */
-	char *long_inputs = (char *)malloc(10000 * sizeof(char));
-	expect_true(long_inputs != NULL, "Unexpected allocation failure.");
-	for (int i = 0; i < 200; i++) {
-		memcpy(long_inputs + i * 9, "4-1024:3|", 9);
-	}
-	memcpy(long_inputs + 200 * 9, "4-1024:3", 8);
-	long_inputs[200 * 9 + 8] = '\0';
-	inputp = long_inputs;
-	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
-	    (void *)&inputp, sizeof(char *)), EINVAL,
-	    "Unexpected mallctlbymib() failure");
-	check_bins_info(tcache_bin_info);
-	free(long_inputs);
-
-	/*
-	 * Test a string with invalid characters, it should return EINVAL
-	 * and do nothing.
-	 */
-	strcpy(inputs, "k8-1024:77p");
-	inputp = inputs;
-	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
-	    (void *)&inputp, sizeof(char *)), EINVAL,
-	    "Unexpected mallctlbymib() failure");
-	check_bins_info(tcache_bin_info);
-
-	/* Test large ncached_max, it should return success but capped. */
-	strcpy(inputs, "1024-1024:65540");
-	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
-	    (void *)&inputp, sizeof(char *)), 0,
-	    "Unexpected mallctlbymib() failure");
-	cache_bin_info_init(&tcache_bin_info[sz_size2index(1024)],
-	    CACHE_BIN_NCACHED_MAX);
-	check_bins_info(tcache_bin_info);
-
-	return NULL;
-}
-
-TEST_BEGIN(test_ncached_max) {
-	test_skip_if(!config_stats);
-	test_skip_if(!opt_tcache);
-	test_skip_if(san_uaf_detection_enabled());
-	unsigned nthreads = 8;
-	VARIABLE_ARRAY(thd_t, threads, nthreads);
-	for (unsigned i = 0; i < nthreads; i++) {
-		thd_create(&threads[i], ncached_max_check, NULL);
-	}
-	for (unsigned i = 0; i < nthreads; i++) {
-		thd_join(threads[i], NULL);
-	}
-}
-TEST_END
-
 int
 main(void) {
 	return test(
 	    test_tcache_max,
-	    test_thread_tcache_max,
-	    test_ncached_max);
+	    test_thread_tcache_max);
 }
 

From 6fb3b6a8e45d3e5f83b331ce8a1d41c5e5da3f4c Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@fb.com>
Date: Tue, 17 Oct 2023 20:17:42 -0700
Subject: [PATCH 2354/2608] Refactor the tcache initiailization

1. Pre-generate all default tcache ncached_max in tcache_boot;
2. Add getters returning default ncached_max and ncached_max_set;
3. Refactor tcache init so that it is always init with a given setting.
---
 include/jemalloc/internal/cache_bin.h      | 10 +--
 include/jemalloc/internal/tcache_externs.h | 10 +--
 src/cache_bin.c                            |  6 +-
 src/jemalloc.c                             |  6 +-
 src/tcache.c                               | 82 ++++++++++++++--------
 test/unit/ncached_max.c                    | 20 +++---
 test/unit/tcache_max.c                     |  2 -
 7 files changed, 75 insertions(+), 61 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index e2da3b90..67565835 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -719,8 +719,8 @@ void cache_bin_info_init(cache_bin_info_t *bin_info,
  * Given an array of initialized cache_bin_info_ts, determine how big an
  * allocation is required to initialize a full set of cache_bin_ts.
  */
-void cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos,
-    size_t *size, size_t *alignment);
+void cache_bin_info_compute_alloc(const cache_bin_info_t *infos,
+    szind_t ninfos, size_t *size, size_t *alignment);
 
 /*
  * Actually initialize some cache bins.  Callers should allocate the backing
@@ -729,11 +729,11 @@ void cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos,
  * cache_bin_postincrement.  *alloc_cur will then point immediately past the end
  * of the allocation.
  */
-void cache_bin_preincrement(cache_bin_info_t *infos, szind_t ninfos,
+void cache_bin_preincrement(const cache_bin_info_t *infos, szind_t ninfos,
     void *alloc, size_t *cur_offset);
 void cache_bin_postincrement(void *alloc, size_t *cur_offset);
-void cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
-    size_t *cur_offset);
+void cache_bin_init(cache_bin_t *bin, const cache_bin_info_t *info,
+    void *alloc, size_t *cur_offset);
 void cache_bin_init_disabled(cache_bin_t *bin, cache_bin_sz_t ncached_max);
 
 bool cache_bin_stack_use_thp(void);
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 973dbfe9..732adacb 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -35,11 +35,6 @@ extern unsigned	global_do_not_change_tcache_nbins;
  */
 extern size_t	global_do_not_change_tcache_maxclass;
 
-/* Default bin info for each bin. */
-extern cache_bin_info_t opt_tcache_ncached_max[TCACHE_NBINS_MAX];
-/* Records whether a bin's info is specified by malloc_conf. */
-extern bool opt_tcache_ncached_max_set[TCACHE_NBINS_MAX];
-
 /*
  * Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and
  * usable via the MALLOCX_TCACHE() flag.  The automatic per thread tcaches are
@@ -60,9 +55,8 @@ void tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache,
     cache_bin_t *cache_bin, szind_t binind, unsigned rem);
 void tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache,
     cache_bin_t *cache_bin, szind_t binind, bool is_small);
-bool tcache_bin_info_settings_parse(const char *bin_settings_segment_cur,
-    size_t len_left, cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX],
-    bool bin_info_is_set[TCACHE_NBINS_MAX]);
+bool tcache_bin_info_default_init(const char *bin_settings_segment_cur,
+    size_t len_left);
 bool tcache_bins_ncached_max_write(tsd_t *tsd, char *settings, size_t len);
 bool tcache_bin_ncached_max_read(tsd_t *tsd, size_t bin_size,
     cache_bin_sz_t *ncached_max);
diff --git a/src/cache_bin.c b/src/cache_bin.c
index 24dabd0b..c3b94e54 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -28,7 +28,7 @@ cache_bin_stack_use_thp(void) {
 }
 
 void
-cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos,
+cache_bin_info_compute_alloc(const cache_bin_info_t *infos, szind_t ninfos,
     size_t *size, size_t *alignment) {
 	/* For the total bin stack region (per tcache), reserve 2 more slots so
 	 * that
@@ -51,7 +51,7 @@ cache_bin_info_compute_alloc(cache_bin_info_t *infos, szind_t ninfos,
 }
 
 void
-cache_bin_preincrement(cache_bin_info_t *infos, szind_t ninfos, void *alloc,
+cache_bin_preincrement(const cache_bin_info_t *infos, szind_t ninfos, void *alloc,
     size_t *cur_offset) {
 	if (config_debug) {
 		size_t computed_size;
@@ -76,7 +76,7 @@ cache_bin_postincrement(void *alloc, size_t *cur_offset) {
 }
 
 void
-cache_bin_init(cache_bin_t *bin, cache_bin_info_t *info, void *alloc,
+cache_bin_init(cache_bin_t *bin, const cache_bin_info_t *info, void *alloc,
     size_t *cur_offset) {
 	/*
 	 * The full_position points to the lowest available space.  Allocations
diff --git a/src/jemalloc.c b/src/jemalloc.c
index c77f2ef2..5da22a53 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1323,14 +1323,12 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				CONF_CONTINUE;
 			}
 			if (CONF_MATCH("tcache_ncached_max")) {
-				bool err = tcache_bin_info_settings_parse(
-				    v, vlen, opt_tcache_ncached_max,
-				    opt_tcache_ncached_max_set);
+				bool err = tcache_bin_info_default_init(
+				    v, vlen);
 				if (err) {
 					CONF_ERROR("Invalid settings for "
 					    "tcache_ncached_max", k, klen, v,
 					    vlen);
-					break;
 				}
 				CONF_CONTINUE;
 			}
diff --git a/src/tcache.c b/src/tcache.c
index a8eaf296..02627896 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -71,14 +71,17 @@ unsigned		global_do_not_change_tcache_nbins;
  */
 size_t			global_do_not_change_tcache_maxclass;
 
-/* Default bin info for each bin.  Will be initialized when thread starts. */
-cache_bin_info_t opt_tcache_ncached_max[TCACHE_NBINS_MAX] = {{0}};
+/*
+ * Default bin info for each bin.  Will be initialized in malloc_conf_init
+ * and tcache_boot and should not be modified after that.
+ */
+static cache_bin_info_t opt_tcache_ncached_max[TCACHE_NBINS_MAX] = {{0}};
 /*
  * Marks whether a bin's info is set already.  This is used in
  * tcache_bin_info_compute to avoid overwriting ncached_max specified by
- * malloc_conf.
+ * malloc_conf.  It should be set only when parsing malloc_conf.
  */
-bool opt_tcache_ncached_max_set[TCACHE_NBINS_MAX] = {0};
+static bool opt_tcache_ncached_max_set[TCACHE_NBINS_MAX] = {0};
 
 tcaches_t		*tcaches;
 
@@ -599,6 +602,16 @@ tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	assert(head_content == *cache_bin->stack_head);
 }
 
+JET_EXTERN bool
+tcache_get_default_ncached_max_set(szind_t ind) {
+	return opt_tcache_ncached_max_set[ind];
+}
+
+JET_EXTERN const cache_bin_info_t *
+tcache_get_default_ncached_max(void) {
+	return opt_tcache_ncached_max;
+}
+
 bool
 tcache_bin_ncached_max_read(tsd_t *tsd, size_t bin_size,
     cache_bin_sz_t *ncached_max) {
@@ -687,7 +700,7 @@ tcache_default_settings_init(tcache_slow_t *tcache_slow) {
 
 static void
 tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
-    void *mem, cache_bin_info_t *tcache_bin_info) {
+    void *mem, const cache_bin_info_t *tcache_bin_info) {
 	tcache->tcache_slow = tcache_slow;
 	tcache_slow->tcache = tcache;
 
@@ -809,8 +822,8 @@ tcache_bin_info_compute(cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
 	 * than tcache_nbins, no items will be cached.
 	 */
 	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
-		unsigned ncached_max = opt_tcache_ncached_max_set[i] ?
-		    opt_tcache_ncached_max[i].ncached_max:
+		unsigned ncached_max = tcache_get_default_ncached_max_set(i) ?
+		    (unsigned)tcache_get_default_ncached_max()[i].ncached_max:
 		    tcache_ncached_max_compute(i);
 		assert(ncached_max <= CACHE_BIN_NCACHED_MAX);
 		cache_bin_info_init(&tcache_bin_info[i], ncached_max);
@@ -819,7 +832,7 @@ tcache_bin_info_compute(cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
 
 static bool
 tsd_tcache_data_init_impl(tsd_t *tsd, arena_t *arena,
-    cache_bin_info_t *tcache_bin_info) {
+    const cache_bin_info_t *tcache_bin_info) {
 	tcache_slow_t *tcache_slow = tsd_tcache_slowp_get_unsafe(tsd);
 	tcache_t *tcache = tsd_tcachep_get_unsafe(tsd);
 
@@ -873,20 +886,11 @@ tsd_tcache_data_init_impl(tsd_t *tsd, arena_t *arena,
 	return false;
 }
 
-static bool
-tsd_tcache_data_init_with_bin_settings(tsd_t *tsd, arena_t *arena,
-    cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
-	assert(tcache_bin_info != NULL);
-	return tsd_tcache_data_init_impl(tsd, arena, tcache_bin_info);
-}
-
 /* Initialize auto tcache (embedded in TSD). */
 static bool
-tsd_tcache_data_init(tsd_t *tsd, arena_t *arena) {
-	/* Takes 146B stack space. */
-	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX] = {{0}};
-	tcache_bin_info_compute(tcache_bin_info);
-
+tsd_tcache_data_init(tsd_t *tsd, arena_t *arena,
+    const cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
+	assert(tcache_bin_info != NULL);
 	return tsd_tcache_data_init_impl(tsd, arena, tcache_bin_info);
 }
 
@@ -900,10 +904,8 @@ tcache_create_explicit(tsd_t *tsd) {
 	 */
 	unsigned tcache_nbins = global_do_not_change_tcache_nbins;
 	size_t tcache_size, alignment;
-	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX] = {{0}};
-	tcache_bin_info_compute(tcache_bin_info);
-	cache_bin_info_compute_alloc(tcache_bin_info, tcache_nbins,
-	    &tcache_size, &alignment);
+	cache_bin_info_compute_alloc(tcache_get_default_ncached_max(),
+	    tcache_nbins, &tcache_size, &alignment);
 
 	size_t size = tcache_size + sizeof(tcache_t)
 	    + sizeof(tcache_slow_t);
@@ -920,7 +922,8 @@ tcache_create_explicit(tsd_t *tsd) {
 	tcache_slow_t *tcache_slow =
 	    (void *)((byte_t *)mem + tcache_size + sizeof(tcache_t));
 	tcache_default_settings_init(tcache_slow);
-	tcache_init(tsd, tcache_slow, tcache, mem, tcache_bin_info);
+	tcache_init(tsd, tcache_slow, tcache, mem,
+	    tcache_get_default_ncached_max());
 
 	tcache_arena_associate(tsd_tsdn(tsd), tcache_slow, tcache,
 	    arena_ichoose(tsd, NULL));
@@ -941,7 +944,8 @@ tsd_tcache_enabled_data_init(tsd_t *tsd) {
 
 	if (opt_tcache) {
 		/* Trigger tcache init. */
-		tsd_tcache_data_init(tsd, NULL);
+		tsd_tcache_data_init(tsd, NULL,
+		    tcache_get_default_ncached_max());
 	}
 
 	return false;
@@ -952,7 +956,8 @@ tcache_enabled_set(tsd_t *tsd, bool enabled) {
 	bool was_enabled = tsd_tcache_enabled_get(tsd);
 
 	if (!was_enabled && enabled) {
-		tsd_tcache_data_init(tsd, NULL);
+		tsd_tcache_data_init(tsd, NULL,
+		    tcache_get_default_ncached_max());
 	} else if (was_enabled && !enabled) {
 		tcache_cleanup(tsd);
 	}
@@ -988,14 +993,14 @@ thread_tcache_max_set(tsd_t *tsd, size_t tcache_max) {
 	tcache_max_set(tcache_slow, tcache_max);
 
 	if (enabled) {
-		tsd_tcache_data_init_with_bin_settings(tsd, assigned_arena,
-		    tcache_bin_info);
+		tsd_tcache_data_init(tsd, assigned_arena, tcache_bin_info);
 	}
 
 	assert(tcache_nbins_get(tcache_slow) == sz_size2index(tcache_max) + 1);
 }
 
-bool tcache_bin_info_settings_parse(const char *bin_settings_segment_cur,
+static bool
+tcache_bin_info_settings_parse(const char *bin_settings_segment_cur,
     size_t len_left, cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX],
     bool bin_info_is_set[TCACHE_NBINS_MAX]) {
 	do {
@@ -1031,6 +1036,14 @@ bool tcache_bin_info_settings_parse(const char *bin_settings_segment_cur,
 	return false;
 }
 
+bool
+tcache_bin_info_default_init(const char *bin_settings_segment_cur,
+    size_t len_left) {
+	return tcache_bin_info_settings_parse(bin_settings_segment_cur,
+	    len_left, opt_tcache_ncached_max, opt_tcache_ncached_max_set);
+}
+
+
 bool
 tcache_bins_ncached_max_write(tsd_t *tsd, char *settings, size_t len) {
 	assert(tcache_available(tsd));
@@ -1047,7 +1060,7 @@ tcache_bins_ncached_max_write(tsd_t *tsd, char *settings, size_t len) {
 
 	arena_t *assigned_arena = tcache->tcache_slow->arena;
 	tcache_cleanup(tsd);
-	tsd_tcache_data_init_with_bin_settings(tsd, assigned_arena,
+	tsd_tcache_data_init(tsd, assigned_arena,
 	    tcache_bin_info);
 
 	return false;
@@ -1272,6 +1285,13 @@ tcache_boot(tsdn_t *tsdn, base_t *base) {
 	assert(global_do_not_change_tcache_maxclass <= TCACHE_MAXCLASS_LIMIT);
 	global_do_not_change_tcache_nbins =
 	    sz_size2index(global_do_not_change_tcache_maxclass) + 1;
+	/*
+	 * Pre-compute default bin info and store the results in
+	 * opt_tcache_ncached_max. After the changes here,
+	 * opt_tcache_ncached_max should not be modified and should always be
+	 * accessed using tcache_get_default_ncached_max.
+	 */
+	tcache_bin_info_compute(opt_tcache_ncached_max);
 
 	if (malloc_mutex_init(&tcaches_mtx, "tcaches", WITNESS_RANK_TCACHES,
 	    malloc_mutex_rank_exclusive)) {
diff --git a/test/unit/ncached_max.c b/test/unit/ncached_max.c
index da35d7c9..1a0d2885 100644
--- a/test/unit/ncached_max.c
+++ b/test/unit/ncached_max.c
@@ -2,9 +2,11 @@
 #include "test/san.h"
 
 const char *malloc_conf =
-"tcache_ncached_max:256-1024:1001|2048-2048:0,tcache_max:4096";
+"tcache_ncached_max:256-1024:1001|2048-2048:0|8192-8192:1,tcache_max:4096";
 extern void tcache_bin_info_compute(
     cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]);
+extern bool tcache_get_default_ncached_max_set(szind_t ind);
+extern const cache_bin_info_t *tcache_get_default_ncached_max(void);
 
 static void
 check_bins_info(cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
@@ -54,21 +56,23 @@ ncached_max_check(void* args) {
 		bool first_range = (i >= sz_size2index(256) &&
 		    i <= sz_size2index(1024));
 		bool second_range = (i == sz_size2index(2048));
+		bool third_range = (i == sz_size2index(8192));
 		cache_bin_sz_t target_ncached_max = 0;
-		if (first_range || second_range) {
-			target_ncached_max = first_range ? 1001: 0;
-			expect_true(opt_tcache_ncached_max_set[i],
+		if (first_range || second_range || third_range) {
+			target_ncached_max = first_range ? 1001:
+			    (second_range ? 0: 1);
+			expect_true(tcache_get_default_ncached_max_set(i),
 			    "Unexpected state for bin %u", i);
 			expect_zu_eq(target_ncached_max,
 			    tcache_bin_info[i].ncached_max,
 			    "Unexpected generated ncached_max for bin %u", i);
+			expect_zu_eq(target_ncached_max,
+			    tcache_get_default_ncached_max()[i].ncached_max,
+			    "Unexpected pre-set ncached_max for bin %u", i);
 		} else {
-			expect_false(opt_tcache_ncached_max_set[i],
+			expect_false(tcache_get_default_ncached_max_set(i),
 			    "Unexpected state for bin %u", i);
 		}
-		expect_zu_eq(target_ncached_max,
-		    opt_tcache_ncached_max[i].ncached_max,
-		    "Unexpected pre-set ncached_max for bin %u", i);
 	}
 	unsigned nbins = tcache_nbins_get(tcache_slow);
 	for (szind_t i = nbins; i < TCACHE_NBINS_MAX; i++) {
diff --git a/test/unit/tcache_max.c b/test/unit/tcache_max.c
index 32eacadf..c740b5e7 100644
--- a/test/unit/tcache_max.c
+++ b/test/unit/tcache_max.c
@@ -2,8 +2,6 @@
 #include "test/san.h"
 
 const char *malloc_conf = TEST_SAN_UAF_ALIGN_DISABLE;
-extern void tcache_bin_info_compute(
-    cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]);
 
 enum {
 	alloc_option_start = 0,

From d88fa71bbd8f22814ead264eff07ba70f05f3291 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@fb.com>
Date: Wed, 18 Oct 2023 10:40:32 -0700
Subject: [PATCH 2355/2608] Fix nfill = 0 bug when ncached_max is 1

---
 src/arena.c  | 1 +
 src/tcache.c | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/src/arena.c b/src/arena.c
index 4a383670..9a8e5d64 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1023,6 +1023,7 @@ arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
     cache_bin_t *cache_bin, cache_bin_info_t *cache_bin_info, szind_t binind,
     const unsigned nfill) {
 	assert(cache_bin_ncached_get_local(cache_bin, cache_bin_info) == 0);
+	assert(nfill != 0);
 
 	const bin_info_t *bin_info = &bin_infos[binind];
 
diff --git a/src/tcache.c b/src/tcache.c
index 02627896..d57574ca 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -257,6 +257,9 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
 	assert(!tcache_bin_disabled(binind, cache_bin, tcache_slow));
 	unsigned nfill = cache_bin_info_ncached_max_get(cache_bin,
 	    &cache_bin->bin_info) >> tcache_slow->lg_fill_div[binind];
+	if (nfill == 0) {
+		nfill = 1;
+	}
 	arena_cache_bin_fill_small(tsdn, arena, cache_bin,
 	    &cache_bin->bin_info, binind, nfill);
 	tcache_slow->bin_refilled[binind] = true;

From 04d1a87b78230931aa28cca72bef4424223a8d39 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 18 Oct 2023 12:13:35 -0700
Subject: [PATCH 2356/2608] Fix a zero-initializer warning on macOS.

---
 include/jemalloc/internal/tcache_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index a91b3252..578a199e 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -10,7 +10,7 @@ typedef struct tcaches_s tcaches_t;
 
 /* Used in TSD static initializer only. Real init in tsd_tcache_data_init(). */
 #define TCACHE_ZERO_INITIALIZER {0}
-#define TCACHE_SLOW_ZERO_INITIALIZER {0}
+#define TCACHE_SLOW_ZERO_INITIALIZER {{0}}
 
 /* Used in TSD static initializer only. Will be initialized to opt_tcache. */
 #define TCACHE_ENABLED_ZERO_INITIALIZER false

From 756d4df2fd1b5bde025abed50c9b771376d72c6f Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@fb.com>
Date: Wed, 18 Oct 2023 16:44:36 -0700
Subject: [PATCH 2357/2608] Add util.c into vs project file.

---
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj         | 1 +
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters | 3 +++
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj         | 1 +
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters | 3 +++
 msvc/projects/vc2019/jemalloc/jemalloc.vcxproj         | 1 +
 msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters | 3 +++
 msvc/projects/vc2022/jemalloc/jemalloc.vcxproj         | 1 +
 msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters | 3 +++
 8 files changed, 16 insertions(+)

diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index ec028a1a..03c241ca 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -96,6 +96,7 @@
     <ClCompile Include="..\..\..\..\src\thread_event.c" />
     <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
+    <ClCompile Include="..\..\..\..\src\util.c" />
     <ClCompile Include="..\..\..\..\src\witness.c" />
   </ItemGroup>
   <PropertyGroup Label="Globals">
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 1b43e9f2..514368aa 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -166,6 +166,9 @@
     <ClCompile Include="..\..\..\..\src\tsd.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\util.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\witness.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index a8004dbd..5d23d8e2 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -96,6 +96,7 @@
     <ClCompile Include="..\..\..\..\src\thread_event.c" />
     <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
+    <ClCompile Include="..\..\..\..\src\util.c" />
     <ClCompile Include="..\..\..\..\src\witness.c" />
   </ItemGroup>
   <PropertyGroup Label="Globals">
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 1b43e9f2..514368aa 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -166,6 +166,9 @@
     <ClCompile Include="..\..\..\..\src\tsd.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\util.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\witness.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
index 66ba849d..8eaab36b 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
@@ -96,6 +96,7 @@
     <ClCompile Include="..\..\..\..\src\thread_event.c" />
     <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
+    <ClCompile Include="..\..\..\..\src\util.c" />
     <ClCompile Include="..\..\..\..\src\witness.c" />
   </ItemGroup>
   <PropertyGroup Label="Globals">
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
index 1b43e9f2..514368aa 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
@@ -166,6 +166,9 @@
     <ClCompile Include="..\..\..\..\src\tsd.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\util.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\witness.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
index 7d9a1aa0..cd871379 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
@@ -96,6 +96,7 @@
     <ClCompile Include="..\..\..\..\src\thread_event.c" />
     <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
+    <ClCompile Include="..\..\..\..\src\util.c" />
     <ClCompile Include="..\..\..\..\src\witness.c" />
   </ItemGroup>
   <PropertyGroup Label="Globals">
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
index 1b43e9f2..514368aa 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
@@ -166,6 +166,9 @@
     <ClCompile Include="..\..\..\..\src\tsd.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\util.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\witness.c">
       <Filter>Source Files</Filter>
     </ClCompile>

From e2cd27132acfe04604352dbaa9d95b124f9ea50e Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Sun, 22 Oct 2023 22:23:13 -0700
Subject: [PATCH 2358/2608] Change stack_size assertion back to the more
 compatabile one.

---
 src/cache_bin.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/cache_bin.c b/src/cache_bin.c
index c3b94e54..0454dbde 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -12,7 +12,7 @@ cache_bin_info_init(cache_bin_info_t *info,
     cache_bin_sz_t ncached_max) {
 	assert(ncached_max <= CACHE_BIN_NCACHED_MAX);
 	size_t stack_size = (size_t)ncached_max * sizeof(void *);
-	assert(stack_size <= UINT16_MAX);
+	assert(stack_size < ((size_t)1 << (sizeof(cache_bin_sz_t) * 8)));
 	info->ncached_max = (cache_bin_sz_t)ncached_max;
 }
 

From 3025b021b9206478d2edcf017f1df7657d35e615 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 23 Oct 2023 13:00:10 -0700
Subject: [PATCH 2359/2608] Optimize mutex and bin alignment / locality.

---
 include/jemalloc/internal/arena_structs.h |  5 ++++-
 include/jemalloc/internal/mutex.h         | 23 ++++++++++++-----------
 src/arena.c                               |  7 ++++++-
 3 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index 6f79be97..803ed25c 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -98,10 +98,13 @@ struct arena_s {
 	/*
 	 * The arena is allocated alongside its bins; really this is a
 	 * dynamically sized array determined by the binshard settings.
+	 * Enforcing cacheline-alignment to minimize the number of cachelines
+	 * touched on the hot paths.
 	 */
 	JEMALLOC_WARN_ON_USAGE("Do not use this field directly. "
 	                       "Use `arena_get_bin` instead.")
-	bin_t			 all_bins[0];
+	JEMALLOC_ALIGNED(CACHELINE)
+	bin_t			all_bins[0];
 };
 
 #endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_H */
diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index 46f22aec..75abf298 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -32,6 +32,12 @@ struct malloc_mutex_s {
 			 * unlocking thread).
 			 */
 			mutex_prof_data_t	prof_data;
+			/*
+			 * Hint flag to avoid exclusive cache line contention
+			 * during spin waiting.  Placed along with prof_data
+			 * since it's always modified even with no contention.
+			 */
+			atomic_b_t		locked;
 #ifdef _WIN32
 #  if _WIN32_WINNT >= 0x0600
 			SRWLOCK         	lock;
@@ -46,11 +52,6 @@ struct malloc_mutex_s {
 #else
 			pthread_mutex_t		lock;
 #endif
-			/*
-			 * Hint flag to avoid exclusive cache line contention
-			 * during spin waiting
-			 */
-			atomic_b_t		locked;
 		};
 		/*
 		 * We only touch witness when configured w/ debug.  However we
@@ -99,21 +100,21 @@ struct malloc_mutex_s {
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
 #  if defined(JEMALLOC_DEBUG)
 #    define MALLOC_MUTEX_INITIALIZER					\
-  {{{LOCK_PROF_DATA_INITIALIZER, OS_UNFAIR_LOCK_INIT, ATOMIC_INIT(false)}}, \
+  {{{LOCK_PROF_DATA_INITIALIZER, ATOMIC_INIT(false), OS_UNFAIR_LOCK_INIT}}, \
          WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0}
 #  else
 #    define MALLOC_MUTEX_INITIALIZER                      \
-  {{{LOCK_PROF_DATA_INITIALIZER, OS_UNFAIR_LOCK_INIT, ATOMIC_INIT(false)}},  \
+  {{{LOCK_PROF_DATA_INITIALIZER, ATOMIC_INIT(false), OS_UNFAIR_LOCK_INIT}},  \
       WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
 #  endif
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
 #  if (defined(JEMALLOC_DEBUG))
 #     define MALLOC_MUTEX_INITIALIZER					\
-      {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, NULL, ATOMIC_INIT(false)}},	\
+      {{{LOCK_PROF_DATA_INITIALIZER, ATOMIC_INIT(false), PTHREAD_MUTEX_INITIALIZER, NULL}},	\
            WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0}
 #  else
 #     define MALLOC_MUTEX_INITIALIZER					\
-      {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, NULL, ATOMIC_INIT(false)}},	\
+      {{{LOCK_PROF_DATA_INITIALIZER, ATOMIC_INIT(false), PTHREAD_MUTEX_INITIALIZER, NULL}},	\
            WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
 #  endif
 
@@ -121,11 +122,11 @@ struct malloc_mutex_s {
 #    define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT
 #  if defined(JEMALLOC_DEBUG)
 #    define MALLOC_MUTEX_INITIALIZER					\
-     {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, ATOMIC_INIT(false)}}, \
+     {{{LOCK_PROF_DATA_INITIALIZER, ATOMIC_INIT(false), PTHREAD_MUTEX_INITIALIZER}}, \
            WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0}
 #  else
 #    define MALLOC_MUTEX_INITIALIZER                          \
-     {{{LOCK_PROF_DATA_INITIALIZER, PTHREAD_MUTEX_INITIALIZER, ATOMIC_INIT(false)}},	\
+     {{{LOCK_PROF_DATA_INITIALIZER, ATOMIC_INIT(false), PTHREAD_MUTEX_INITIALIZER}},	\
       WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
 #  endif
 #endif
diff --git a/src/arena.c b/src/arena.c
index 9a8e5d64..b4ead26a 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1666,11 +1666,16 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 		}
 	}
 
-	size_t arena_size = sizeof(arena_t) + sizeof(bin_t) * nbins_total;
+	size_t arena_size = ALIGNMENT_CEILING(sizeof(arena_t), CACHELINE) +
+	    sizeof(bin_t) * nbins_total;
 	arena = (arena_t *)base_alloc(tsdn, base, arena_size, CACHELINE);
 	if (arena == NULL) {
 		goto label_error;
 	}
+	JEMALLOC_SUPPRESS_WARN_ON_USAGE(
+	assert((uintptr_t)&arena->all_bins[nbins_total -1] + sizeof(bin_t) <=
+	    (uintptr_t)arena + arena_size);
+	)
 
 	atomic_store_u(&arena->nthreads[0], 0, ATOMIC_RELAXED);
 	atomic_store_u(&arena->nthreads[1], 0, ATOMIC_RELAXED);

From e4817c8d89a2a413e835c4adeab5c5c4412f9235 Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Tue, 24 Oct 2023 13:51:14 -0700
Subject: [PATCH 2360/2608] Cleanup cache_bin_info_t* info input args

---
 include/jemalloc/internal/arena_externs.h  |   3 +-
 include/jemalloc/internal/cache_bin.h      |  97 +++++-----
 include/jemalloc/internal/tcache_inlines.h |   7 +-
 src/arena.c                                |  13 +-
 src/cache_bin.c                            |   2 +-
 src/tcache.c                               |  56 +++---
 test/unit/cache_bin.c                      | 200 ++++++++++-----------
 test/unit/tcache_max.c                     |   4 +-
 8 files changed, 175 insertions(+), 207 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index d79b607a..7a29fd8b 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -63,8 +63,7 @@ void arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena);
 void arena_reset(tsd_t *tsd, arena_t *arena);
 void arena_destroy(tsd_t *tsd, arena_t *arena);
 void arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
-    cache_bin_t *cache_bin, cache_bin_info_t *cache_bin_info, szind_t binind,
-    const unsigned nfill);
+    cache_bin_t *cache_bin, szind_t binind, const unsigned nfill);
 
 void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
     szind_t ind, bool zero, bool slab);
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 67565835..a26c3671 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -202,19 +202,19 @@ cache_bin_disabled(cache_bin_t *bin) {
 	return disabled;
 }
 
-/* Returns ncached_max: Upper limit on ncached. */
-static inline cache_bin_sz_t
-cache_bin_info_ncached_max_get(cache_bin_t *bin, cache_bin_info_t *info) {
-	assert(!cache_bin_disabled(bin));
-	assert(info == &bin->bin_info);
-	return info->ncached_max;
-}
-
 /* Gets ncached_max without asserting that the bin is enabled. */
 static inline cache_bin_sz_t
 cache_bin_ncached_max_get_unsafe(cache_bin_t *bin) {
 	return bin->bin_info.ncached_max;
 }
+
+/* Returns ncached_max: Upper limit on ncached. */
+static inline cache_bin_sz_t
+cache_bin_ncached_max_get(cache_bin_t *bin) {
+	assert(!cache_bin_disabled(bin));
+	return cache_bin_ncached_max_get_unsafe(bin);
+}
+
 /*
  * Internal.
  *
@@ -267,9 +267,9 @@ cache_bin_ncached_get_internal(cache_bin_t *bin) {
  * possible.
  */
 static inline cache_bin_sz_t
-cache_bin_ncached_get_local(cache_bin_t *bin, cache_bin_info_t *info) {
+cache_bin_ncached_get_local(cache_bin_t *bin) {
 	cache_bin_sz_t n = cache_bin_ncached_get_internal(bin);
-	assert(n <= cache_bin_info_ncached_max_get(bin, info));
+	assert(n <= cache_bin_ncached_max_get(bin));
 	return n;
 }
 
@@ -304,9 +304,9 @@ cache_bin_empty_position_get(cache_bin_t *bin) {
  * arena statistics collection.
  */
 static inline uint16_t
-cache_bin_low_bits_low_bound_get(cache_bin_t *bin, cache_bin_info_t *info) {
+cache_bin_low_bits_low_bound_get(cache_bin_t *bin) {
 	return (uint16_t)bin->low_bits_empty -
-	    cache_bin_info_ncached_max_get(bin, info) * sizeof(void *);
+	    cache_bin_ncached_max_get(bin) * sizeof(void *);
 }
 
 /*
@@ -315,8 +315,8 @@ cache_bin_low_bits_low_bound_get(cache_bin_t *bin, cache_bin_info_t *info) {
  * A pointer to the position with the lowest address of the backing array.
  */
 static inline void **
-cache_bin_low_bound_get(cache_bin_t *bin, cache_bin_info_t *info) {
-	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max_get(bin, info);
+cache_bin_low_bound_get(cache_bin_t *bin) {
+	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get(bin);
 	void **ret = cache_bin_empty_position_get(bin) - ncached_max;
 	assert(ret <= bin->stack_head);
 
@@ -328,8 +328,8 @@ cache_bin_low_bound_get(cache_bin_t *bin, cache_bin_info_t *info) {
  * batch fill a nonempty cache bin.
  */
 static inline void
-cache_bin_assert_empty(cache_bin_t *bin, cache_bin_info_t *info) {
-	assert(cache_bin_ncached_get_local(bin, info) == 0);
+cache_bin_assert_empty(cache_bin_t *bin) {
+	assert(cache_bin_ncached_get_local(bin) == 0);
 	assert(cache_bin_empty_position_get(bin) == bin->stack_head);
 }
 
@@ -346,10 +346,10 @@ cache_bin_low_water_get_internal(cache_bin_t *bin) {
 
 /* Returns the numeric value of low water in [0, ncached]. */
 static inline cache_bin_sz_t
-cache_bin_low_water_get(cache_bin_t *bin, cache_bin_info_t *info) {
+cache_bin_low_water_get(cache_bin_t *bin) {
 	cache_bin_sz_t low_water = cache_bin_low_water_get_internal(bin);
-	assert(low_water <= cache_bin_info_ncached_max_get(bin, info));
-	assert(low_water <= cache_bin_ncached_get_local(bin, info));
+	assert(low_water <= cache_bin_ncached_max_get(bin));
+	assert(low_water <= cache_bin_ncached_get_local(bin));
 
 	cache_bin_assert_earlier(bin, (uint16_t)(uintptr_t)bin->stack_head,
 	    bin->low_bits_low_water);
@@ -530,17 +530,16 @@ cache_bin_stash(cache_bin_t *bin, void *ptr) {
 
 /* Get the number of stashed pointers. */
 JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
-cache_bin_nstashed_get_internal(cache_bin_t *bin, cache_bin_info_t *info) {
-	cache_bin_sz_t ncached_max = cache_bin_info_ncached_max_get(bin, info);
-	uint16_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(bin,
-	    info);
+cache_bin_nstashed_get_internal(cache_bin_t *bin) {
+	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get(bin);
+	uint16_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(bin);
 
 	cache_bin_sz_t n = cache_bin_diff(bin, low_bits_low_bound,
 	    bin->low_bits_full) / sizeof(void *);
 	assert(n <= ncached_max);
 	if (config_debug && n != 0) {
 		/* Below are for assertions only. */
-		void **low_bound = cache_bin_low_bound_get(bin, info);
+		void **low_bound = cache_bin_low_bound_get(bin);
 
 		assert((uint16_t)(uintptr_t)low_bound == low_bits_low_bound);
 		void *stashed = *(low_bound + n - 1);
@@ -556,9 +555,9 @@ cache_bin_nstashed_get_internal(cache_bin_t *bin, cache_bin_info_t *info) {
 }
 
 JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
-cache_bin_nstashed_get_local(cache_bin_t *bin, cache_bin_info_t *info) {
-	cache_bin_sz_t n = cache_bin_nstashed_get_internal(bin, info);
-	assert(n <= cache_bin_info_ncached_max_get(bin, info));
+cache_bin_nstashed_get_local(cache_bin_t *bin) {
+	cache_bin_sz_t n = cache_bin_nstashed_get_internal(bin);
+	assert(n <= cache_bin_ncached_max_get(bin));
 	return n;
 }
 
@@ -579,12 +578,12 @@ cache_bin_nstashed_get_local(cache_bin_t *bin, cache_bin_info_t *info) {
  * This function should not call other utility functions because the racy
  * condition may cause unexpected / undefined behaviors in unverified utility
  * functions.  Currently, this function calls two utility functions
- * cache_bin_info_ncached_max_get and cache_bin_low_bits_low_bound_get because
+ * cache_bin_ncached_max_get and cache_bin_low_bits_low_bound_get because
  * they help access values that will not be concurrently modified.
  */
 static inline void
-cache_bin_nitems_get_remote(cache_bin_t *bin, cache_bin_info_t *info,
-    cache_bin_sz_t *ncached, cache_bin_sz_t *nstashed) {
+cache_bin_nitems_get_remote(cache_bin_t *bin, cache_bin_sz_t *ncached,
+    cache_bin_sz_t *nstashed) {
 	/* Racy version of cache_bin_ncached_get_internal. */
 	cache_bin_sz_t diff = bin->low_bits_empty -
 	    (uint16_t)(uintptr_t)bin->stack_head;
@@ -592,8 +591,7 @@ cache_bin_nitems_get_remote(cache_bin_t *bin, cache_bin_info_t *info,
 	*ncached = n;
 
 	/* Racy version of cache_bin_nstashed_get_internal. */
-	uint16_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(bin,
-	    info);
+	uint16_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(bin);
 	n = (bin->low_bits_full - low_bits_low_bound) / sizeof(void *);
 	*nstashed = n;
 	/*
@@ -643,9 +641,9 @@ struct cache_bin_ptr_array_s {
  * finish_fill call before doing any alloc/dalloc operations on the bin.
  */
 static inline void
-cache_bin_init_ptr_array_for_fill(cache_bin_t *bin, cache_bin_info_t *info,
-    cache_bin_ptr_array_t *arr, cache_bin_sz_t nfill) {
-	cache_bin_assert_empty(bin, info);
+cache_bin_init_ptr_array_for_fill(cache_bin_t *bin, cache_bin_ptr_array_t *arr,
+    cache_bin_sz_t nfill) {
+	cache_bin_assert_empty(bin);
 	arr->ptr = cache_bin_empty_position_get(bin) - nfill;
 }
 
@@ -655,9 +653,9 @@ cache_bin_init_ptr_array_for_fill(cache_bin_t *bin, cache_bin_info_t *info,
  * case of OOM.
  */
 static inline void
-cache_bin_finish_fill(cache_bin_t *bin, cache_bin_info_t *info,
-    cache_bin_ptr_array_t *arr, cache_bin_sz_t nfilled) {
-	cache_bin_assert_empty(bin, info);
+cache_bin_finish_fill(cache_bin_t *bin, cache_bin_ptr_array_t *arr,
+    cache_bin_sz_t nfilled) {
+	cache_bin_assert_empty(bin);
 	void **empty_position = cache_bin_empty_position_get(bin);
 	if (nfilled < arr->n) {
 		memmove(empty_position - nfilled, empty_position - arr->n,
@@ -671,17 +669,17 @@ cache_bin_finish_fill(cache_bin_t *bin, cache_bin_info_t *info,
  * everything we give them.
  */
 static inline void
-cache_bin_init_ptr_array_for_flush(cache_bin_t *bin, cache_bin_info_t *info,
+cache_bin_init_ptr_array_for_flush(cache_bin_t *bin,
     cache_bin_ptr_array_t *arr, cache_bin_sz_t nflush) {
 	arr->ptr = cache_bin_empty_position_get(bin) - nflush;
-	assert(cache_bin_ncached_get_local(bin, info) == 0
+	assert(cache_bin_ncached_get_local(bin) == 0
 	    || *arr->ptr != NULL);
 }
 
 static inline void
-cache_bin_finish_flush(cache_bin_t *bin, cache_bin_info_t *info,
-    cache_bin_ptr_array_t *arr, cache_bin_sz_t nflushed) {
-	unsigned rem = cache_bin_ncached_get_local(bin, info) - nflushed;
+cache_bin_finish_flush(cache_bin_t *bin, cache_bin_ptr_array_t *arr,
+    cache_bin_sz_t nflushed) {
+	unsigned rem = cache_bin_ncached_get_local(bin) - nflushed;
 	memmove(bin->stack_head + nflushed, bin->stack_head,
 	    rem * sizeof(void *));
 	bin->stack_head += nflushed;
@@ -690,23 +688,22 @@ cache_bin_finish_flush(cache_bin_t *bin, cache_bin_info_t *info,
 
 static inline void
 cache_bin_init_ptr_array_for_stashed(cache_bin_t *bin, szind_t binind,
-    cache_bin_info_t *info, cache_bin_ptr_array_t *arr,
-    cache_bin_sz_t nstashed) {
+    cache_bin_ptr_array_t *arr, cache_bin_sz_t nstashed) {
 	assert(nstashed > 0);
-	assert(cache_bin_nstashed_get_local(bin, info) == nstashed);
+	assert(cache_bin_nstashed_get_local(bin) == nstashed);
 
-	void **low_bound = cache_bin_low_bound_get(bin, info);
+	void **low_bound = cache_bin_low_bound_get(bin);
 	arr->ptr = low_bound;
 	assert(*arr->ptr != NULL);
 }
 
 static inline void
-cache_bin_finish_flush_stashed(cache_bin_t *bin, cache_bin_info_t *info) {
-	void **low_bound = cache_bin_low_bound_get(bin, info);
+cache_bin_finish_flush_stashed(cache_bin_t *bin) {
+	void **low_bound = cache_bin_low_bound_get(bin);
 
 	/* Reset the bin local full position. */
 	bin->low_bits_full = (uint16_t)(uintptr_t)low_bound;
-	assert(cache_bin_nstashed_get_local(bin, info) == 0);
+	assert(cache_bin_nstashed_get_local(bin) == 0);
 }
 
 /*
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 05599a5b..e8e3b41f 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -200,8 +200,7 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 			arena_dalloc_small(tsd_tsdn(tsd), ptr);
 			return;
 		}
-		cache_bin_sz_t max = cache_bin_info_ncached_max_get(
-		    bin, &bin->bin_info);
+		cache_bin_sz_t max = cache_bin_ncached_max_get(bin);
 		unsigned remain = max >> opt_lg_tcache_flush_small_div;
 		tcache_bin_flush_small(tsd, tcache, bin, binind, remain);
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
@@ -221,8 +220,8 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 
 	cache_bin_t *bin = &tcache->bins[binind];
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
-		unsigned remain = cache_bin_info_ncached_max_get(
-		    bin, &bin->bin_info) >> opt_lg_tcache_flush_large_div;
+		unsigned remain = cache_bin_ncached_max_get(bin) >>
+		    opt_lg_tcache_flush_large_div;
 		tcache_bin_flush_large(tsd, tcache, bin, binind, remain);
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
 		assert(ret);
diff --git a/src/arena.c b/src/arena.c
index b4ead26a..4e923015 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -168,8 +168,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 			}
 
 			cache_bin_sz_t ncached, nstashed;
-			cache_bin_nitems_get_remote(cache_bin,
-			    &cache_bin->bin_info, &ncached, &nstashed);
+			cache_bin_nitems_get_remote(cache_bin, &ncached, &nstashed);
 			astats->tcache_bytes += ncached * sz_index2size(i);
 			astats->tcache_stashed_bytes += nstashed *
 			    sz_index2size(i);
@@ -1020,16 +1019,14 @@ arena_bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 
 void
 arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
-    cache_bin_t *cache_bin, cache_bin_info_t *cache_bin_info, szind_t binind,
-    const unsigned nfill) {
-	assert(cache_bin_ncached_get_local(cache_bin, cache_bin_info) == 0);
+    cache_bin_t *cache_bin, szind_t binind, const unsigned nfill) {
+	assert(cache_bin_ncached_get_local(cache_bin) == 0);
 	assert(nfill != 0);
 
 	const bin_info_t *bin_info = &bin_infos[binind];
 
 	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nfill);
-	cache_bin_init_ptr_array_for_fill(cache_bin, cache_bin_info, &ptrs,
-	    nfill);
+	cache_bin_init_ptr_array_for_fill(cache_bin, &ptrs, nfill);
 	/*
 	 * Bin-local resources are used first: 1) bin->slabcur, and 2) nonfull
 	 * slabs.  After both are exhausted, new slabs will be allocated through
@@ -1143,7 +1140,7 @@ label_refill:
 		fresh_slab = NULL;
 	}
 
-	cache_bin_finish_fill(cache_bin, cache_bin_info, &ptrs, filled);
+	cache_bin_finish_fill(cache_bin, &ptrs, filled);
 	arena_decay_tick(tsdn, arena);
 }
 
diff --git a/src/cache_bin.c b/src/cache_bin.c
index 0454dbde..6438705f 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -100,7 +100,7 @@ cache_bin_init(cache_bin_t *bin, const cache_bin_info_t *info, void *alloc,
 	    bin->low_bits_full, (uint16_t)(uintptr_t)bin->stack_head);
 	assert(free_spots == bin_stack_size);
 	if (!cache_bin_disabled(bin)) {
-		assert(cache_bin_ncached_get_local(bin, &bin->bin_info) == 0);
+		assert(cache_bin_ncached_get_local(bin) == 0);
 	}
 	assert(cache_bin_empty_position_get(bin) == empty_position);
 
diff --git a/src/tcache.c b/src/tcache.c
index d57574ca..015bdb11 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -142,10 +142,8 @@ tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 
 	cache_bin_t *cache_bin = &tcache->bins[szind];
 	assert(!tcache_bin_disabled(szind, cache_bin, tcache->tcache_slow));
-	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
-	    &cache_bin->bin_info);
-	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
-	    &cache_bin->bin_info);
+	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin);
+	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin);
 	assert(!tcache_slow->bin_refilled[szind]);
 
 	size_t nflush = low_water - (low_water >> 2);
@@ -168,8 +166,8 @@ tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	 * Reduce fill count by 2X.  Limit lg_fill_div such that
 	 * the fill count is always at least 1.
 	 */
-	if ((cache_bin_info_ncached_max_get(cache_bin, &cache_bin->bin_info)
-	    >> (tcache_slow->lg_fill_div[szind] + 1)) >= 1) {
+	if ((cache_bin_ncached_max_get(cache_bin) >>
+	    (tcache_slow->lg_fill_div[szind] + 1)) >= 1) {
 		tcache_slow->lg_fill_div[szind]++;
 	}
 }
@@ -181,10 +179,8 @@ tcache_gc_large(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	assert(szind >= SC_NBINS);
 	cache_bin_t *cache_bin = &tcache->bins[szind];
 	assert(!tcache_bin_disabled(szind, cache_bin, tcache->tcache_slow));
-	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
-	    &cache_bin->bin_info);
-	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
-	    &cache_bin->bin_info);
+	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin);
+	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin);
 	tcache_bin_flush_large(tsd, tcache, cache_bin, szind,
 	    (unsigned)(ncached - low_water + (low_water >> 2)));
 }
@@ -206,8 +202,7 @@ tcache_event(tsd_t *tsd) {
 	}
 
 	tcache_bin_flush_stashed(tsd, tcache, cache_bin, szind, is_small);
-	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin,
-	    &cache_bin->bin_info);
+	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin);
 	if (low_water > 0) {
 		if (is_small) {
 			tcache_gc_small(tsd, tcache_slow, tcache, szind);
@@ -255,13 +250,12 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
 
 	assert(tcache_slow->arena != NULL);
 	assert(!tcache_bin_disabled(binind, cache_bin, tcache_slow));
-	unsigned nfill = cache_bin_info_ncached_max_get(cache_bin,
-	    &cache_bin->bin_info) >> tcache_slow->lg_fill_div[binind];
+	unsigned nfill = cache_bin_ncached_max_get(cache_bin)
+	    >> tcache_slow->lg_fill_div[binind];
 	if (nfill == 0) {
 		nfill = 1;
 	}
-	arena_cache_bin_fill_small(tsdn, arena, cache_bin,
-	    &cache_bin->bin_info, binind, nfill);
+	arena_cache_bin_fill_small(tsdn, arena, cache_bin, binind, nfill);
 	tcache_slow->bin_refilled[binind] = true;
 	ret = cache_bin_alloc(cache_bin, tcache_success);
 
@@ -533,20 +527,17 @@ tcache_bin_flush_bottom(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	assert(!tcache_bin_disabled(binind, cache_bin, tcache->tcache_slow));
 	tcache_bin_flush_stashed(tsd, tcache, cache_bin, binind, small);
 
-	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
-	    &cache_bin->bin_info);
+	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin);
 	assert((cache_bin_sz_t)rem <= ncached);
 	unsigned nflush = ncached - rem;
 
 	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nflush);
-	cache_bin_init_ptr_array_for_flush(cache_bin, &cache_bin->bin_info,
-	    &ptrs, nflush);
+	cache_bin_init_ptr_array_for_flush(cache_bin, &ptrs, nflush);
 
 	tcache_bin_flush_impl(tsd, tcache, cache_bin, binind, &ptrs, nflush,
 	    small);
 
-	cache_bin_finish_flush(cache_bin, &cache_bin->bin_info, &ptrs,
-	    ncached - rem);
+	cache_bin_finish_flush(cache_bin, &ptrs, ncached - rem);
 }
 
 void
@@ -575,33 +566,30 @@ void
 tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
     szind_t binind, bool is_small) {
 	assert(!tcache_bin_disabled(binind, cache_bin, tcache->tcache_slow));
-	cache_bin_info_t *info = &cache_bin->bin_info;
 	/*
 	 * The two below are for assertion only.  The content of original cached
 	 * items remain unchanged -- the stashed items reside on the other end
 	 * of the stack.  Checking the stack head and ncached to verify.
 	 */
 	void *head_content = *cache_bin->stack_head;
-	cache_bin_sz_t orig_cached = cache_bin_ncached_get_local(cache_bin,
-	    info);
+	cache_bin_sz_t orig_cached = cache_bin_ncached_get_local(cache_bin);
 
-	cache_bin_sz_t nstashed = cache_bin_nstashed_get_local(cache_bin, info);
-	assert(orig_cached + nstashed <=
-	    cache_bin_info_ncached_max_get(cache_bin, info));
+	cache_bin_sz_t nstashed = cache_bin_nstashed_get_local(cache_bin);
+	assert(orig_cached + nstashed <= cache_bin_ncached_max_get(cache_bin));
 	if (nstashed == 0) {
 		return;
 	}
 
 	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nstashed);
-	cache_bin_init_ptr_array_for_stashed(cache_bin, binind, info, &ptrs,
+	cache_bin_init_ptr_array_for_stashed(cache_bin, binind, &ptrs,
 	    nstashed);
 	san_check_stashed_ptrs(ptrs.ptr, nstashed, sz_index2size(binind));
 	tcache_bin_flush_impl(tsd, tcache, cache_bin, binind, &ptrs, nstashed,
 	    is_small);
-	cache_bin_finish_flush_stashed(cache_bin, info);
+	cache_bin_finish_flush_stashed(cache_bin);
 
-	assert(cache_bin_nstashed_get_local(cache_bin, info) == 0);
-	assert(cache_bin_ncached_get_local(cache_bin, info) == orig_cached);
+	assert(cache_bin_nstashed_get_local(cache_bin) == 0);
+	assert(cache_bin_ncached_get_local(cache_bin) == orig_cached);
 	assert(head_content == *cache_bin->stack_head);
 }
 
@@ -633,7 +621,7 @@ tcache_bin_ncached_max_read(tsd_t *tsd, size_t bin_size,
 
 	cache_bin_t *bin = &tcache->bins[bin_ind];
 	*ncached_max = tcache_bin_disabled(bin_ind, bin, tcache->tcache_slow) ?
-	    0: cache_bin_info_ncached_max_get(bin, &bin->bin_info);
+	    0: cache_bin_ncached_max_get(bin);
 	return false;
 }
 
@@ -1105,7 +1093,7 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
 
 	if (tsd_tcache) {
 		cache_bin_t *cache_bin = &tcache->bins[0];
-		cache_bin_assert_empty(cache_bin, &cache_bin->bin_info);
+		cache_bin_assert_empty(cache_bin);
 	}
 	if (tsd_tcache && cache_bin_stack_use_thp()) {
 		b0_dalloc_tcache_stack(tsd_tsdn(tsd), tcache_slow->dyn_alloc);
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index aed34585..1bb750d7 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -1,19 +1,18 @@
 #include "test/jemalloc_test.h"
 
 static void
-do_fill_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
-    cache_bin_sz_t ncached_max, cache_bin_sz_t nfill_attempt,
-    cache_bin_sz_t nfill_succeed) {
+do_fill_test(cache_bin_t *bin, void **ptrs, cache_bin_sz_t ncached_max,
+    cache_bin_sz_t nfill_attempt, cache_bin_sz_t nfill_succeed) {
 	bool success;
 	void *ptr;
-	assert_true(cache_bin_ncached_get_local(bin, info) == 0, "");
+	assert_true(cache_bin_ncached_get_local(bin) == 0, "");
 	CACHE_BIN_PTR_ARRAY_DECLARE(arr, nfill_attempt);
-	cache_bin_init_ptr_array_for_fill(bin, info, &arr, nfill_attempt);
+	cache_bin_init_ptr_array_for_fill(bin, &arr, nfill_attempt);
 	for (cache_bin_sz_t i = 0; i < nfill_succeed; i++) {
 		arr.ptr[i] = &ptrs[i];
 	}
-	cache_bin_finish_fill(bin, info, &arr, nfill_succeed);
-	expect_true(cache_bin_ncached_get_local(bin, info) == nfill_succeed,
+	cache_bin_finish_fill(bin, &arr, nfill_succeed);
+	expect_true(cache_bin_ncached_get_local(bin) == nfill_succeed,
 	    "");
 	cache_bin_low_water_set(bin);
 
@@ -22,18 +21,18 @@ do_fill_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 		expect_true(success, "");
 		expect_ptr_eq(ptr, (void *)&ptrs[i],
 		    "Should pop in order filled");
-		expect_true(cache_bin_low_water_get(bin, info)
+		expect_true(cache_bin_low_water_get(bin)
 		    == nfill_succeed - i - 1, "");
 	}
-	expect_true(cache_bin_ncached_get_local(bin, info) == 0, "");
-	expect_true(cache_bin_low_water_get(bin, info) == 0, "");
+	expect_true(cache_bin_ncached_get_local(bin) == 0, "");
+	expect_true(cache_bin_low_water_get(bin) == 0, "");
 }
 
 static void
-do_flush_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
-    cache_bin_sz_t nfill, cache_bin_sz_t nflush) {
+do_flush_test(cache_bin_t *bin, void **ptrs, cache_bin_sz_t nfill,
+    cache_bin_sz_t nflush) {
 	bool success;
-	assert_true(cache_bin_ncached_get_local(bin, info) == 0, "");
+	assert_true(cache_bin_ncached_get_local(bin) == 0, "");
 
 	for (cache_bin_sz_t i = 0; i < nfill; i++) {
 		success = cache_bin_dalloc_easy(bin, &ptrs[i]);
@@ -41,30 +40,30 @@ do_flush_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 	}
 
 	CACHE_BIN_PTR_ARRAY_DECLARE(arr, nflush);
-	cache_bin_init_ptr_array_for_flush(bin, info, &arr, nflush);
+	cache_bin_init_ptr_array_for_flush(bin, &arr, nflush);
 	for (cache_bin_sz_t i = 0; i < nflush; i++) {
 		expect_ptr_eq(arr.ptr[i], &ptrs[nflush - i - 1], "");
 	}
-	cache_bin_finish_flush(bin, info, &arr, nflush);
+	cache_bin_finish_flush(bin, &arr, nflush);
 
-	expect_true(cache_bin_ncached_get_local(bin, info) == nfill - nflush,
+	expect_true(cache_bin_ncached_get_local(bin) == nfill - nflush,
 	    "");
-	while (cache_bin_ncached_get_local(bin, info) > 0) {
+	while (cache_bin_ncached_get_local(bin) > 0) {
 		cache_bin_alloc(bin, &success);
 	}
 }
 
 static void
-do_batch_alloc_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
-    cache_bin_sz_t nfill, size_t batch) {
-	assert_true(cache_bin_ncached_get_local(bin, info) == 0, "");
+do_batch_alloc_test(cache_bin_t *bin, void **ptrs, cache_bin_sz_t nfill,
+    size_t batch) {
+	assert_true(cache_bin_ncached_get_local(bin) == 0, "");
 	CACHE_BIN_PTR_ARRAY_DECLARE(arr, nfill);
-	cache_bin_init_ptr_array_for_fill(bin, info, &arr, nfill);
+	cache_bin_init_ptr_array_for_fill(bin, &arr, nfill);
 	for (cache_bin_sz_t i = 0; i < nfill; i++) {
 		arr.ptr[i] = &ptrs[i];
 	}
-	cache_bin_finish_fill(bin, info, &arr, nfill);
-	assert_true(cache_bin_ncached_get_local(bin, info) == nfill, "");
+	cache_bin_finish_fill(bin, &arr, nfill);
+	assert_true(cache_bin_ncached_get_local(bin) == nfill, "");
 	cache_bin_low_water_set(bin);
 
 	void **out = malloc((batch + 1) * sizeof(void *));
@@ -73,9 +72,9 @@ do_batch_alloc_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 	for (cache_bin_sz_t i = 0; i < (cache_bin_sz_t)n; i++) {
 		expect_ptr_eq(out[i], &ptrs[i], "");
 	}
-	expect_true(cache_bin_low_water_get(bin, info) == nfill -
+	expect_true(cache_bin_low_water_get(bin) == nfill -
 	    (cache_bin_sz_t)n, "");
-	while (cache_bin_ncached_get_local(bin, info) > 0) {
+	while (cache_bin_ncached_get_local(bin) > 0) {
 		bool success;
 		cache_bin_alloc(bin, &success);
 	}
@@ -106,13 +105,11 @@ TEST_BEGIN(test_cache_bin) {
 	cache_bin_info_init(&info, ncached_max);
 	cache_bin_t bin;
 	test_bin_init(&bin, &info);
-	cache_bin_info_t *bin_info = &bin.bin_info;
 
 	/* Initialize to empty; should then have 0 elements. */
-	expect_d_eq(ncached_max, cache_bin_info_ncached_max_get(&bin,
-	    &bin.bin_info), "");
-	expect_true(cache_bin_ncached_get_local(&bin, bin_info) == 0, "");
-	expect_true(cache_bin_low_water_get(&bin, bin_info) == 0, "");
+	expect_d_eq(ncached_max, cache_bin_ncached_max_get(&bin), "");
+	expect_true(cache_bin_ncached_get_local(&bin) == 0, "");
+	expect_true(cache_bin_low_water_get(&bin) == 0, "");
 
 	ptr = cache_bin_alloc_easy(&bin, &success);
 	expect_false(success, "Shouldn't successfully allocate when empty");
@@ -129,14 +126,14 @@ TEST_BEGIN(test_cache_bin) {
 	void **ptrs = mallocx(sizeof(void *) * (ncached_max + 1), 0);
 	assert_ptr_not_null(ptrs, "Unexpected mallocx failure");
 	for  (cache_bin_sz_t i = 0; i < ncached_max; i++) {
-		expect_true(cache_bin_ncached_get_local(&bin, bin_info) == i, "");
+		expect_true(cache_bin_ncached_get_local(&bin) == i, "");
 		success = cache_bin_dalloc_easy(&bin, &ptrs[i]);
 		expect_true(success,
 		    "Should be able to dalloc into a non-full cache bin.");
-		expect_true(cache_bin_low_water_get(&bin, bin_info) == 0,
+		expect_true(cache_bin_low_water_get(&bin) == 0,
 		    "Pushes and pops shouldn't change low water of zero.");
 	}
-	expect_true(cache_bin_ncached_get_local(&bin, bin_info) == ncached_max,
+	expect_true(cache_bin_ncached_get_local(&bin) == ncached_max,
 	    "");
 	success = cache_bin_dalloc_easy(&bin, &ptrs[ncached_max]);
 	expect_false(success, "Shouldn't be able to dalloc into a full bin.");
@@ -144,9 +141,9 @@ TEST_BEGIN(test_cache_bin) {
 	cache_bin_low_water_set(&bin);
 
 	for (cache_bin_sz_t i = 0; i < ncached_max; i++) {
-		expect_true(cache_bin_low_water_get(&bin, bin_info)
+		expect_true(cache_bin_low_water_get(&bin)
 		    == ncached_max - i, "");
-		expect_true(cache_bin_ncached_get_local(&bin, bin_info)
+		expect_true(cache_bin_ncached_get_local(&bin)
 		    == ncached_max - i, "");
 		/*
 		 * This should fail -- the easy variant can't change the low
@@ -155,9 +152,9 @@ TEST_BEGIN(test_cache_bin) {
 		ptr = cache_bin_alloc_easy(&bin, &success);
 		expect_ptr_null(ptr, "");
 		expect_false(success, "");
-		expect_true(cache_bin_low_water_get(&bin, bin_info)
+		expect_true(cache_bin_low_water_get(&bin)
 		    == ncached_max - i, "");
-		expect_true(cache_bin_ncached_get_local(&bin, bin_info)
+		expect_true(cache_bin_ncached_get_local(&bin)
 		    == ncached_max - i, "");
 
 		/* This should succeed, though. */
@@ -165,13 +162,13 @@ TEST_BEGIN(test_cache_bin) {
 		expect_true(success, "");
 		expect_ptr_eq(ptr, &ptrs[ncached_max - i - 1],
 		    "Alloc should pop in stack order");
-		expect_true(cache_bin_low_water_get(&bin, bin_info)
+		expect_true(cache_bin_low_water_get(&bin)
 		    == ncached_max - i - 1, "");
-		expect_true(cache_bin_ncached_get_local(&bin, bin_info)
+		expect_true(cache_bin_ncached_get_local(&bin)
 		    == ncached_max - i - 1, "");
 	}
 	/* Now we're empty -- all alloc attempts should fail. */
-	expect_true(cache_bin_ncached_get_local(&bin, bin_info) == 0, "");
+	expect_true(cache_bin_ncached_get_local(&bin) == 0, "");
 	ptr = cache_bin_alloc_easy(&bin, &success);
 	expect_ptr_null(ptr, "");
 	expect_false(success, "");
@@ -187,7 +184,7 @@ TEST_BEGIN(test_cache_bin) {
 	for (cache_bin_sz_t i = ncached_max / 2; i < ncached_max; i++) {
 		cache_bin_dalloc_easy(&bin, &ptrs[i]);
 	}
-	expect_true(cache_bin_ncached_get_local(&bin, bin_info) == ncached_max,
+	expect_true(cache_bin_ncached_get_local(&bin) == ncached_max,
 	    "");
 	for (cache_bin_sz_t i = ncached_max - 1; i >= ncached_max / 2; i--) {
 		/*
@@ -204,77 +201,72 @@ TEST_BEGIN(test_cache_bin) {
 	expect_ptr_null(ptr, "");
 
 	/* We're going to test filling -- we must be empty to start. */
-	while (cache_bin_ncached_get_local(&bin, bin_info)) {
+	while (cache_bin_ncached_get_local(&bin)) {
 		cache_bin_alloc(&bin, &success);
 		expect_true(success, "");
 	}
 
 	/* Test fill. */
 	/* Try to fill all, succeed fully. */
-	do_fill_test(&bin, bin_info, ptrs, ncached_max, ncached_max,
+	do_fill_test(&bin, ptrs, ncached_max, ncached_max,
 	    ncached_max);
 	/* Try to fill all, succeed partially. */
-	do_fill_test(&bin, bin_info, ptrs, ncached_max, ncached_max,
+	do_fill_test(&bin, ptrs, ncached_max, ncached_max,
 	    ncached_max / 2);
 	/* Try to fill all, fail completely. */
-	do_fill_test(&bin, bin_info, ptrs, ncached_max, ncached_max, 0);
+	do_fill_test(&bin, ptrs, ncached_max, ncached_max, 0);
 
 	/* Try to fill some, succeed fully. */
-	do_fill_test(&bin, bin_info, ptrs, ncached_max, ncached_max / 2,
+	do_fill_test(&bin, ptrs, ncached_max, ncached_max / 2,
 	    ncached_max / 2);
 	/* Try to fill some, succeed partially. */
-	do_fill_test(&bin, bin_info, ptrs, ncached_max, ncached_max / 2,
+	do_fill_test(&bin, ptrs, ncached_max, ncached_max / 2,
 	    ncached_max / 4);
 	/* Try to fill some, fail completely. */
-	do_fill_test(&bin, bin_info, ptrs, ncached_max, ncached_max / 2, 0);
+	do_fill_test(&bin, ptrs, ncached_max, ncached_max / 2, 0);
 
-	do_flush_test(&bin, bin_info, ptrs, ncached_max, ncached_max);
-	do_flush_test(&bin, bin_info, ptrs, ncached_max, ncached_max / 2);
-	do_flush_test(&bin, bin_info, ptrs, ncached_max, 0);
-	do_flush_test(&bin, bin_info, ptrs, ncached_max / 2, ncached_max / 2);
-	do_flush_test(&bin, bin_info, ptrs, ncached_max / 2, ncached_max / 4);
-	do_flush_test(&bin, bin_info, ptrs, ncached_max / 2, 0);
+	do_flush_test(&bin, ptrs, ncached_max, ncached_max);
+	do_flush_test(&bin, ptrs, ncached_max, ncached_max / 2);
+	do_flush_test(&bin, ptrs, ncached_max, 0);
+	do_flush_test(&bin, ptrs, ncached_max / 2, ncached_max / 2);
+	do_flush_test(&bin, ptrs, ncached_max / 2, ncached_max / 4);
+	do_flush_test(&bin, ptrs, ncached_max / 2, 0);
 
-	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max, ncached_max);
-	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max,
-	    ncached_max * 2);
-	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max,
-	    ncached_max / 2);
-	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max, 2);
-	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max, 1);
-	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max, 0);
-	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max / 2,
-	    ncached_max / 2);
-	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max / 2,
-	    ncached_max);
-	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max / 2,
-	    ncached_max / 4);
-	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max / 2, 2);
-	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max / 2, 1);
-	do_batch_alloc_test(&bin, bin_info, ptrs, ncached_max / 2, 0);
-	do_batch_alloc_test(&bin, bin_info, ptrs, 2, ncached_max);
-	do_batch_alloc_test(&bin, bin_info, ptrs, 2, 2);
-	do_batch_alloc_test(&bin, bin_info, ptrs, 2, 1);
-	do_batch_alloc_test(&bin, bin_info, ptrs, 2, 0);
-	do_batch_alloc_test(&bin, bin_info, ptrs, 1, 2);
-	do_batch_alloc_test(&bin, bin_info, ptrs, 1, 1);
-	do_batch_alloc_test(&bin, bin_info, ptrs, 1, 0);
-	do_batch_alloc_test(&bin, bin_info, ptrs, 0, 2);
-	do_batch_alloc_test(&bin, bin_info, ptrs, 0, 1);
-	do_batch_alloc_test(&bin, bin_info, ptrs, 0, 0);
+	do_batch_alloc_test(&bin, ptrs, ncached_max, ncached_max);
+	do_batch_alloc_test(&bin, ptrs, ncached_max, ncached_max * 2);
+	do_batch_alloc_test(&bin, ptrs, ncached_max, ncached_max / 2);
+	do_batch_alloc_test(&bin, ptrs, ncached_max, 2);
+	do_batch_alloc_test(&bin, ptrs, ncached_max, 1);
+	do_batch_alloc_test(&bin, ptrs, ncached_max, 0);
+	do_batch_alloc_test(&bin, ptrs, ncached_max / 2, ncached_max / 2);
+	do_batch_alloc_test(&bin, ptrs, ncached_max / 2, ncached_max);
+	do_batch_alloc_test(&bin, ptrs, ncached_max / 2, ncached_max / 4);
+	do_batch_alloc_test(&bin, ptrs, ncached_max / 2, 2);
+	do_batch_alloc_test(&bin, ptrs, ncached_max / 2, 1);
+	do_batch_alloc_test(&bin, ptrs, ncached_max / 2, 0);
+	do_batch_alloc_test(&bin, ptrs, 2, ncached_max);
+	do_batch_alloc_test(&bin, ptrs, 2, 2);
+	do_batch_alloc_test(&bin, ptrs, 2, 1);
+	do_batch_alloc_test(&bin, ptrs, 2, 0);
+	do_batch_alloc_test(&bin, ptrs, 1, 2);
+	do_batch_alloc_test(&bin, ptrs, 1, 1);
+	do_batch_alloc_test(&bin, ptrs, 1, 0);
+	do_batch_alloc_test(&bin, ptrs, 0, 2);
+	do_batch_alloc_test(&bin, ptrs, 0, 1);
+	do_batch_alloc_test(&bin, ptrs, 0, 0);
 
 	free(ptrs);
 }
 TEST_END
 
 static void
-do_flush_stashed_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
-    cache_bin_sz_t nfill, cache_bin_sz_t nstash) {
-	expect_true(cache_bin_ncached_get_local(bin, info) == 0,
+do_flush_stashed_test(cache_bin_t *bin, void **ptrs, cache_bin_sz_t nfill,
+    cache_bin_sz_t nstash) {
+	expect_true(cache_bin_ncached_get_local(bin) == 0,
 	    "Bin not empty");
-	expect_true(cache_bin_nstashed_get_local(bin, info) == 0,
+	expect_true(cache_bin_nstashed_get_local(bin) == 0,
 	    "Bin not empty");
-	expect_true(nfill + nstash <= info->ncached_max, "Exceeded max");
+	expect_true(nfill + nstash <= bin->bin_info.ncached_max, "Exceeded max");
 
 	bool ret;
 	/* Fill */
@@ -282,7 +274,7 @@ do_flush_stashed_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 		ret = cache_bin_dalloc_easy(bin, &ptrs[i]);
 		expect_true(ret, "Unexpected fill failure");
 	}
-	expect_true(cache_bin_ncached_get_local(bin, info) == nfill,
+	expect_true(cache_bin_ncached_get_local(bin) == nfill,
 	    "Wrong cached count");
 
 	/* Stash */
@@ -290,10 +282,10 @@ do_flush_stashed_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 		ret = cache_bin_stash(bin, &ptrs[i + nfill]);
 		expect_true(ret, "Unexpected stash failure");
 	}
-	expect_true(cache_bin_nstashed_get_local(bin, info) == nstash,
+	expect_true(cache_bin_nstashed_get_local(bin) == nstash,
 	    "Wrong stashed count");
 
-	if (nfill + nstash == info->ncached_max) {
+	if (nfill + nstash == bin->bin_info.ncached_max) {
 		ret = cache_bin_dalloc_easy(bin, &ptrs[0]);
 		expect_false(ret, "Should not dalloc into a full bin");
 		ret = cache_bin_stash(bin, &ptrs[0]);
@@ -308,19 +300,19 @@ do_flush_stashed_test(cache_bin_t *bin, cache_bin_info_t *info, void **ptrs,
 		expect_true((uintptr_t)ptr < (uintptr_t)&ptrs[nfill],
 		    "Should not alloc stashed ptrs");
 	}
-	expect_true(cache_bin_ncached_get_local(bin, info) == 0,
+	expect_true(cache_bin_ncached_get_local(bin) == 0,
 	    "Wrong cached count");
-	expect_true(cache_bin_nstashed_get_local(bin, info) == nstash,
+	expect_true(cache_bin_nstashed_get_local(bin) == nstash,
 	    "Wrong stashed count");
 
 	cache_bin_alloc(bin, &ret);
 	expect_false(ret, "Should not alloc stashed");
 
 	/* Clear stashed ones */
-	cache_bin_finish_flush_stashed(bin, info);
-	expect_true(cache_bin_ncached_get_local(bin, info) == 0,
+	cache_bin_finish_flush_stashed(bin);
+	expect_true(cache_bin_ncached_get_local(bin) == 0,
 	    "Wrong cached count");
-	expect_true(cache_bin_nstashed_get_local(bin, info) == 0,
+	expect_true(cache_bin_nstashed_get_local(bin) == 0,
 	    "Wrong stashed count");
 
 	cache_bin_alloc(bin, &ret);
@@ -334,7 +326,6 @@ TEST_BEGIN(test_cache_bin_stash) {
 	cache_bin_info_t info;
 	cache_bin_info_init(&info, ncached_max);
 	test_bin_init(&bin, &info);
-	cache_bin_info_t *bin_info = &bin.bin_info;
 
 	/*
 	 * The content of this array is not accessed; instead the interior
@@ -344,9 +335,9 @@ TEST_BEGIN(test_cache_bin_stash) {
 	assert_ptr_not_null(ptrs, "Unexpected mallocx failure");
 	bool ret;
 	for (cache_bin_sz_t i = 0; i < ncached_max; i++) {
-		expect_true(cache_bin_ncached_get_local(&bin, bin_info) ==
+		expect_true(cache_bin_ncached_get_local(&bin) ==
 		    (i / 2 + i % 2), "Wrong ncached value");
-		expect_true(cache_bin_nstashed_get_local(&bin, bin_info) ==
+		expect_true(cache_bin_nstashed_get_local(&bin) ==
 		    i / 2, "Wrong nstashed value");
 		if (i % 2 == 0) {
 			cache_bin_dalloc_easy(&bin, &ptrs[i]);
@@ -369,22 +360,21 @@ TEST_BEGIN(test_cache_bin_stash) {
 			expect_true(diff % 2 == 0, "Should be able to alloc");
 		} else {
 			expect_false(ret, "Should not alloc stashed");
-			expect_true(cache_bin_nstashed_get_local(&bin,
-			    bin_info) == ncached_max / 2,
+			expect_true(cache_bin_nstashed_get_local(&bin) == ncached_max / 2,
 			    "Wrong nstashed value");
 		}
 	}
 
 	test_bin_init(&bin, &info);
-	do_flush_stashed_test(&bin, bin_info, ptrs, ncached_max, 0);
-	do_flush_stashed_test(&bin, bin_info, ptrs, 0, ncached_max);
-	do_flush_stashed_test(&bin, bin_info, ptrs, ncached_max / 2,
+	do_flush_stashed_test(&bin, ptrs, ncached_max, 0);
+	do_flush_stashed_test(&bin, ptrs, 0, ncached_max);
+	do_flush_stashed_test(&bin, ptrs, ncached_max / 2,
 	    ncached_max / 2);
-	do_flush_stashed_test(&bin, bin_info, ptrs, ncached_max / 4,
+	do_flush_stashed_test(&bin, ptrs, ncached_max / 4,
 	    ncached_max / 2);
-	do_flush_stashed_test(&bin, bin_info, ptrs, ncached_max / 2,
+	do_flush_stashed_test(&bin, ptrs, ncached_max / 2,
 	    ncached_max / 4);
-	do_flush_stashed_test(&bin, bin_info, ptrs, ncached_max / 4,
+	do_flush_stashed_test(&bin, ptrs, ncached_max / 4,
 	    ncached_max / 4);
 }
 TEST_END
diff --git a/test/unit/tcache_max.c b/test/unit/tcache_max.c
index c740b5e7..a64fca71 100644
--- a/test/unit/tcache_max.c
+++ b/test/unit/tcache_max.c
@@ -81,8 +81,7 @@ tcache_bytes_read_local(void) {
 		if (tcache_bin_disabled(i, cache_bin, tcache->tcache_slow)) {
 			continue;
 		}
-		cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin,
-		    &cache_bin->bin_info);
+		cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin);
 		tcache_bytes += ncached * sz_index2size(i);
 	}
 	return tcache_bytes;
@@ -370,4 +369,3 @@ main(void) {
 	    test_tcache_max,
 	    test_thread_tcache_max);
 }
-

From eda05b39941c0ff6d5236c845e6bca70324c9a32 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Wed, 13 Dec 2023 15:21:09 -0800
Subject: [PATCH 2361/2608] Fix static analysis warnings.

---
 include/jemalloc/internal/arena_externs.h |  2 +-
 src/arena.c                               |  4 ++--
 src/extent.c                              |  5 +++--
 src/jemalloc.c                            |  8 ++++++--
 src/pac.c                                 |  9 +++++++--
 src/prof.c                                |  3 ++-
 src/prof_data.c                           |  4 ++++
 src/sz.c                                  |  3 ++-
 src/tcache.c                              | 10 ++++++----
 9 files changed, 33 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 7a29fd8b..f91bd888 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -63,7 +63,7 @@ void arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena);
 void arena_reset(tsd_t *tsd, arena_t *arena);
 void arena_destroy(tsd_t *tsd, arena_t *arena);
 void arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
-    cache_bin_t *cache_bin, szind_t binind, const unsigned nfill);
+    cache_bin_t *cache_bin, szind_t binind, const cache_bin_sz_t nfill);
 
 void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
     szind_t ind, bool zero, bool slab);
diff --git a/src/arena.c b/src/arena.c
index 4e923015..746ab328 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1019,7 +1019,7 @@ arena_bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 
 void
 arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
-    cache_bin_t *cache_bin, szind_t binind, const unsigned nfill) {
+    cache_bin_t *cache_bin, szind_t binind, const cache_bin_sz_t nfill) {
 	assert(cache_bin_ncached_get_local(cache_bin) == 0);
 	assert(nfill != 0);
 
@@ -1056,7 +1056,7 @@ arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
 	bool made_progress = true;
 	edata_t *fresh_slab = NULL;
 	bool alloc_and_retry = false;
-	unsigned filled = 0;
+	cache_bin_sz_t filled = 0;
 	unsigned binshard;
 	bin_t *bin = arena_bin_choose(tsdn, arena, binind, &binshard);
 
diff --git a/src/extent.c b/src/extent.c
index 822c6eee..2efc7938 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -201,8 +201,6 @@ ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	 * concurrent operations.
 	 */
 	switch (ecache->state) {
-	case extent_state_active:
-		not_reached();
 	case extent_state_dirty:
 	case extent_state_muzzy:
 		emap_update_edata_state(tsdn, pac->emap, edata,
@@ -211,6 +209,9 @@ ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	case extent_state_retained:
 		extent_deregister(tsdn, pac, edata);
 		break;
+	case extent_state_active:
+	case extent_state_transition:
+	case extent_state_merging:
 	default:
 		not_reached();
 	}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 5da22a53..8fba8878 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -460,8 +460,12 @@ arena_bind(tsd_t *tsd, unsigned ind, bool internal) {
 		tsd_iarena_set(tsd, arena);
 	} else {
 		tsd_arena_set(tsd, arena);
-		unsigned shard = atomic_fetch_add_u(&arena->binshard_next, 1,
-		    ATOMIC_RELAXED);
+		/*
+		 * While shard acts as a random seed, the cast below should
+		 * not make much difference.
+		 */
+		uint8_t shard = (uint8_t)atomic_fetch_add_u(
+		    &arena->binshard_next, 1, ATOMIC_RELAXED);
 		tsd_binshards_t *bins = tsd_binshardsp_get(tsd);
 		for (unsigned i = 0; i < SC_NBINS; i++) {
 			assert(bin_infos[i].n_shards > 0 &&
diff --git a/src/pac.c b/src/pac.c
index 53e3d823..57a0c953 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -29,6 +29,10 @@ pac_decay_data_get(pac_t *pac, extent_state_t state,
 		*r_decay_stats = &pac->stats->decay_muzzy;
 		*r_ecache = &pac->ecache_muzzy;
 		return;
+	case extent_state_active:
+	case extent_state_retained:
+	case extent_state_transition:
+	case extent_state_merging:
 	default:
 		unreachable();
 	}
@@ -385,8 +389,6 @@ pac_decay_stashed(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 		npurged += npages;
 
 		switch (ecache->state) {
-		case extent_state_active:
-			not_reached();
 		case extent_state_dirty:
 			if (try_muzzy) {
 				err = extent_purge_lazy_wrapper(tsdn, ehooks,
@@ -402,7 +404,10 @@ pac_decay_stashed(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 			extent_dalloc_wrapper(tsdn, pac, ehooks, edata);
 			nunmapped += npages;
 			break;
+		case extent_state_active:
 		case extent_state_retained:
+		case extent_state_transition:
+		case extent_state_merging:
 		default:
 			not_reached();
 		}
diff --git a/src/prof.c b/src/prof.c
index 52869375..1cf49740 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -277,7 +277,8 @@ prof_sample_new_event_wait(tsd_t *tsd) {
 	 * otherwise bytes_until_sample would be 0 if u is exactly 1.0.
 	 */
 	uint64_t r = prng_lg_range_u64(tsd_prng_statep_get(tsd), 53);
-	double u = (r == 0U) ? 1.0 : (double)r * (1.0/9007199254740992.0L);
+	double u = (r == 0U) ? 1.0 : (double)((long double)r *
+	    (1.0L/9007199254740992.0L));
 	return (uint64_t)(log(u) /
 	    log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
 	    + (uint64_t)1U;
diff --git a/src/prof_data.c b/src/prof_data.c
index 91a9268d..39af0c90 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -709,6 +709,7 @@ prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
 	case prof_tctx_state_purgatory:
 		prof_tctx_merge_gctx(tsdn, tctx, tctx->gctx);
 		break;
+	case prof_tctx_state_initializing:
 	default:
 		not_reached();
 	}
@@ -764,6 +765,7 @@ prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
 	case prof_tctx_state_purgatory:
 		ret = tctx;
 		goto label_return;
+	case prof_tctx_state_initializing:
 	default:
 		not_reached();
 	}
@@ -1393,6 +1395,8 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
 		destroy_tctx = false;
 		destroy_gctx = false;
 		break;
+	case prof_tctx_state_initializing:
+	case prof_tctx_state_purgatory:
 	default:
 		not_reached();
 		destroy_tctx = false;
diff --git a/src/sz.c b/src/sz.c
index d3115dda..89def9d5 100644
--- a/src/sz.c
+++ b/src/sz.c
@@ -100,7 +100,8 @@ sz_boot_size2index_tab(const sc_data_t *sc_data) {
 		size_t max_ind = ((sz + (ZU(1) << SC_LG_TINY_MIN) - 1)
 				   >> SC_LG_TINY_MIN);
 		for (; dst_ind <= max_ind && dst_ind < dst_max; dst_ind++) {
-			sz_size2index_tab[dst_ind] = sc_ind;
+			assert(sc_ind < 1 << (sizeof(uint8_t) * 8));
+			sz_size2index_tab[dst_ind] = (uint8_t)sc_ind;
 		}
 	}
 }
diff --git a/src/tcache.c b/src/tcache.c
index 015bdb11..ca0b1acb 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -250,7 +250,7 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
 
 	assert(tcache_slow->arena != NULL);
 	assert(!tcache_bin_disabled(binind, cache_bin, tcache_slow));
-	unsigned nfill = cache_bin_ncached_max_get(cache_bin)
+	cache_bin_sz_t nfill = cache_bin_ncached_max_get(cache_bin)
 	    >> tcache_slow->lg_fill_div[binind];
 	if (nfill == 0) {
 		nfill = 1;
@@ -529,7 +529,7 @@ tcache_bin_flush_bottom(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 
 	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin);
 	assert((cache_bin_sz_t)rem <= ncached);
-	unsigned nflush = ncached - rem;
+	cache_bin_sz_t nflush = ncached - (cache_bin_sz_t)rem;
 
 	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nflush);
 	cache_bin_init_ptr_array_for_flush(cache_bin, &ptrs, nflush);
@@ -537,7 +537,8 @@ tcache_bin_flush_bottom(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	tcache_bin_flush_impl(tsd, tcache, cache_bin, binind, &ptrs, nflush,
 	    small);
 
-	cache_bin_finish_flush(cache_bin, &ptrs, ncached - rem);
+	cache_bin_finish_flush(cache_bin, &ptrs,
+	    ncached - (cache_bin_sz_t)rem);
 }
 
 void
@@ -817,7 +818,8 @@ tcache_bin_info_compute(cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
 		    (unsigned)tcache_get_default_ncached_max()[i].ncached_max:
 		    tcache_ncached_max_compute(i);
 		assert(ncached_max <= CACHE_BIN_NCACHED_MAX);
-		cache_bin_info_init(&tcache_bin_info[i], ncached_max);
+		cache_bin_info_init(&tcache_bin_info[i],
+		    (cache_bin_sz_t)ncached_max);
 	}
 }
 

From f6fe6abdcb5372f0bad1dba0b77dedc8f95c8a78 Mon Sep 17 00:00:00 2001
From: Honggyu Kim <honggyu.kim@sk.com>
Date: Fri, 29 Dec 2023 10:16:39 +0900
Subject: [PATCH 2362/2608] build: Make autogen.sh accept quoted extra options

The current autogen.sh script doesn't allow receiving quoted extra
options.

If someone wants to pass extra CFLAGS that is split into multiple
options with a whitespace, then a quote is required.

However, the configure inside autogen.sh fails in this case as follows.

  $ ./autogen.sh CFLAGS="-Dmmap=cxl_mmap -Dmunmap=cxl_munmap"
  autoconf
  ./configure --enable-autogen CFLAGS=-Dmmap=cxl_mmap -Dmunmap=cxl_munmap
  configure: error: unrecognized option: `-Dmunmap=cxl_munmap'
  Try `./configure --help' for more information
  Error 0 in ./configure

It's because the quote discarded unexpectedly when calling configure.

This patch is to fix this problem.

Signed-off-by: Honggyu Kim <honggyu.kim@sk.com>
---
 autogen.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/autogen.sh b/autogen.sh
index 75f32da6..c5325fc9 100755
--- a/autogen.sh
+++ b/autogen.sh
@@ -9,8 +9,8 @@ for i in autoconf; do
     fi
 done
 
-echo "./configure --enable-autogen $@"
-./configure --enable-autogen $@
+echo "./configure --enable-autogen \"$@\""
+./configure --enable-autogen "$@"
 if [ $? -ne 0 ]; then
     echo "Error $? in ./configure"
     exit 1

From dfb3260b97a13a90487ec74e495ca4fc684f6a44 Mon Sep 17 00:00:00 2001
From: Connor <zbk602423539@gmail.com>
Date: Mon, 11 Dec 2023 18:25:49 +0800
Subject: [PATCH 2363/2608] Fix missing cleanup message for collected profiles.

```
sub cleanup {
  unlink($main::tmpfile_sym);
  unlink(keys %main::tempnames);

  # We leave any collected profiles in $HOME/jeprof in case the user wants
  # to look at them later.  We print a message informing them of this.
  if ((scalar(@main::profile_files) > 0) &&
      defined($main::collected_profile)) {
    if (scalar(@main::profile_files) == 1) {
      print STDERR "Dynamically gathered profile is in $main::collected_profile\n";
    }
    print STDERR "If you want to investigate this profile further, you can do:\n";
    print STDERR "\n";
    print STDERR "  jeprof \\\n";
    print STDERR "    $main::prog \\\n";
    print STDERR "    $main::collected_profile\n";
    print STDERR "\n";
  }
}
```
On cleanup, it would print out a message for the collected profile.
If there is only one collected profile, it would pop by L691, then `scalar(@main::profile_files)` would be 0, and no message would be printed.
---
 bin/jeprof.in | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/bin/jeprof.in b/bin/jeprof.in
index f02c1f3e..f6999ece 100644
--- a/bin/jeprof.in
+++ b/bin/jeprof.in
@@ -688,15 +688,15 @@ sub Main() {
   my $symbol_map = {};
 
   # Read one profile, pick the last item on the list
-  my $data = ReadProfile($main::prog, pop(@main::profile_files));
+  my $data = ReadProfile($main::prog, $main::profile_files[0]);
   my $profile = $data->{profile};
   my $pcs = $data->{pcs};
   my $libs = $data->{libs};   # Info about main program and shared libraries
   $symbol_map = MergeSymbols($symbol_map, $data->{symbols});
 
   # Add additional profiles, if available.
-  if (scalar(@main::profile_files) > 0) {
-    foreach my $pname (@main::profile_files) {
+  if (scalar(@main::profile_files) > 1) {
+    foreach my $pname (@main::profile_files[1..$#main::profile_files]) {
       my $data2 = ReadProfile($main::prog, $pname);
       $profile = AddProfile($profile, $data2->{profile});
       $pcs = AddPcs($pcs, $data2->{pcs});

From d284aad0277dd11b7e05dcc0328cc7e6f53c7023 Mon Sep 17 00:00:00 2001
From: Minsoo Choo <minsoochoo0122@proton.me>
Date: Sat, 25 Nov 2023 09:01:29 -0500
Subject: [PATCH 2364/2608] Test on more FreeBSD versions

Added 14.0-RELEASE
Added 15-CURRENT
Added 14-STABLE
Added 13-STABLE

13.0-RELEASE will be updated when 13.3-RELEASE comes out.
---
 .cirrus.yml | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/.cirrus.yml b/.cirrus.yml
index a68f3dc1..45498fb8 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -27,9 +27,22 @@ task:
         UNCOMMON_CONFIG:
     - env:
         UNCOMMON_CONFIG: --with-lg-page=16 --with-malloc-conf=tcache:false
-  freebsd_instance:
-    matrix:
-      image: freebsd-13-0-release-amd64
+  matrix:
+     - name: 15-CURRENT
+       freebsd_instance:
+         image_family: freebsd-15-0-snap
+     - name: 14-STABLE
+       freebsd_instance:
+         image_family: freebsd-14-0-snap
+     - name: 14.0-RELEASE
+       freebsd_instance:
+         image_family: freebsd-14-0
+     - name: 13-STABLE
+       freebsd_instance:
+         image_family: freebsd-13-2-snap
+     - name: 13.0-RELEASE
+       freebsd_instance:
+         image_family: freebsd-13-0
   install_script:
     - sed -i.bak -e 's,pkg+http://pkg.FreeBSD.org/\${ABI}/quarterly,pkg+http://pkg.FreeBSD.org/\${ABI}/latest,' /etc/pkg/FreeBSD.conf
     - pkg upgrade -y

From 3a6296e1ef2249b5bb0cffb0be47376ea0491aad Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 4 Jan 2024 14:16:00 -0800
Subject: [PATCH 2365/2608] Disable FreeBSD on Travis CI since it's not
 working.

Travis CI currently provides only FreeBSD 12 which is EOL.
---
 .travis.yml           | 48 -------------------------------------------
 scripts/gen_travis.py |  6 +++++-
 2 files changed, 5 insertions(+), 49 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 85e0b720..ec1481c4 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -34,54 +34,6 @@ jobs:
     - os: windows
       arch: amd64
       env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-lg-page=16 --with-malloc-conf=tcache:false"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug --with-lg-page=16 --with-malloc-conf=tcache:false"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
-    - os: freebsd
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-libunwind --with-lg-page=16 --with-malloc-conf=tcache:false"
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ EXTRA_CFLAGS="-Werror -Wno-array-bounds"
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index fe4e029f..651006ca 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -309,7 +309,11 @@ def main():
     jobs = '\n'.join((
         generate_windows(AMD64),
 
-        generate_freebsd(AMD64),
+        # Travis currently provides only FreeBSD 12.1 which is EOL.  Builds are
+        # not working as of Jan 2024.  Disable the tests for now to avoid the
+        # noise / confusion.
+
+        # generate_freebsd(AMD64),
 
         generate_linux(AMD64),
         generate_linux(PPC64LE),

From 05160258df8a4e34f323b2c6eb1f2c0f59591d05 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 3 Jan 2024 11:59:02 -0800
Subject: [PATCH 2366/2608] When safety_check_fail, also embed hint msg in the
 abort function name because there are cases only logging crash stack traces.

---
 src/safety_check.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/src/safety_check.c b/src/safety_check.c
index 7ffe1f4f..d3f68fbc 100644
--- a/src/safety_check.c
+++ b/src/safety_check.c
@@ -20,6 +20,20 @@ void safety_check_set_abort(safety_check_abort_hook_t abort_fn) {
 	safety_check_abort = abort_fn;
 }
 
+/*
+ * In addition to malloc_write, also embed hint msg in the abort function name
+ * because there are cases only logging crash stack traces.
+ */
+static void
+safety_check_detected_heap_corruption___run_address_sanitizer_build_to_debug(const char *buf) {
+	if (safety_check_abort == NULL) {
+		malloc_write(buf);
+		abort();
+	} else {
+		safety_check_abort(buf);
+	}
+}
+
 void safety_check_fail(const char *format, ...) {
 	char buf[MALLOC_PRINTF_BUFSIZE];
 
@@ -28,10 +42,5 @@ void safety_check_fail(const char *format, ...) {
 	malloc_vsnprintf(buf, MALLOC_PRINTF_BUFSIZE, format, ap);
 	va_end(ap);
 
-	if (safety_check_abort == NULL) {
-		malloc_write(buf);
-		abort();
-	} else {
-		safety_check_abort(buf);
-	}
+	safety_check_detected_heap_corruption___run_address_sanitizer_build_to_debug(buf);
 }

From b1792c80d2870c87af79d64bcca844d19345412d Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Mon, 4 Dec 2023 14:34:35 -0800
Subject: [PATCH 2367/2608] Add LOGs when entrying and exiting free and
 sdallocx.

---
 .../internal/jemalloc_internal_inlines_c.h    |  8 ---
 src/jemalloc.c                                | 17 +++++--
 src/jemalloc_cpp.cpp                          | 51 ++++++++++++++++++-
 3 files changed, 61 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 8b80e3c1..6dcffac9 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -278,8 +278,6 @@ fastpath_success_finish(tsd_t *tsd, uint64_t allocated_after,
 	if (config_stats) {
 		bin->tstats.nrequests++;
 	}
-
-	LOG("core.malloc.exit", "result: %p", ret);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -306,7 +304,6 @@ malloc_initialized(void) {
  */
 JEMALLOC_ALWAYS_INLINE void *
 imalloc_fastpath(size_t size, void *(fallback_alloc)(size_t)) {
-	LOG("core.malloc.entry", "size: %zu", size);
 	if (tsd_get_allocates() && unlikely(!malloc_initialized())) {
 		return fallback_alloc(size);
 	}
@@ -578,14 +575,9 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
 
 JEMALLOC_ALWAYS_INLINE void JEMALLOC_NOTHROW
 je_sdallocx_noflags(void *ptr, size_t size) {
-        LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: 0", ptr,
-                size);
-
         if (!free_fastpath(ptr, size, true)) {
                 sdallocx_default(ptr, size, 0);
         }
-
-        LOG("core.sdallocx.exit", "");
 }
 
 JEMALLOC_ALWAYS_INLINE void JEMALLOC_NOTHROW
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 8fba8878..88436f45 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2730,8 +2730,6 @@ malloc_default(size_t size) {
 		hook_invoke_alloc(hook_alloc_malloc, ret, (uintptr_t)ret, args);
 	}
 
-	LOG("core.malloc.exit", "result: %p", ret);
-
 	return ret;
 }
 
@@ -2744,7 +2742,12 @@ JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
 void JEMALLOC_NOTHROW *
 JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)
 je_malloc(size_t size) {
-	return imalloc_fastpath(size, &malloc_default);
+	LOG("core.malloc.entry", "size: %zu", size);
+
+	void * ret = imalloc_fastpath(size, &malloc_default);
+
+	LOG("core.malloc.exit", "result: %p", ret);
+	return ret;
 }
 
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
@@ -2835,7 +2838,7 @@ je_calloc(size_t num, size_t size) {
 	static_opts_t sopts;
 	dynamic_opts_t dopts;
 
-	LOG("core.calloc.entry", "num: %zu, size: %zu\n", num, size);
+	LOG("core.calloc.entry", "num: %zu, size: %zu", num, size);
 
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
@@ -3014,7 +3017,11 @@ je_free(void *ptr) {
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_free_sized(void *ptr, size_t size) {
-	return je_sdallocx_noflags(ptr, size);
+	LOG("core.free_sized.entry", "ptr: %p, size: %zu", ptr, size);
+
+	je_sdallocx_noflags(ptr, size);
+
+	LOG("core.free_sized.exit", "");
 }
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index 08107a8a..fffd6aee 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -112,7 +112,12 @@ template <bool IsNoExcept>
 JEMALLOC_ALWAYS_INLINE
 void *
 newImpl(std::size_t size) noexcept(IsNoExcept) {
-	return imalloc_fastpath(size, &fallbackNewImpl<IsNoExcept>);
+	LOG("core.operator_new.entry", "size: %zu", size);
+
+	void * ret = imalloc_fastpath(size, &fallbackNewImpl<IsNoExcept>);
+
+	LOG("core.operator_new.exit", "result: %p", ret);
+	return ret;
 }
 
 void *
@@ -173,21 +178,37 @@ operator new[](std::size_t size, std::align_val_t alignment, const std::nothrow_
 
 void
 operator delete(void *ptr) noexcept {
+	LOG("core.operator_delete.entry", "ptr: %p", ptr);
+
 	je_free_impl(ptr);
+
+	LOG("core.operator_delete.exit", "");
 }
 
 void
 operator delete[](void *ptr) noexcept {
+	LOG("core.operator_delete.entry", "ptr: %p", ptr);
+
 	je_free_impl(ptr);
+
+	LOG("core.operator_delete.exit", "");
 }
 
 void
 operator delete(void *ptr, const std::nothrow_t &) noexcept {
+	LOG("core.operator_delete.entry", "ptr: %p", ptr);
+
 	je_free_impl(ptr);
+
+	LOG("core.operator_delete.exit", "");
 }
 
 void operator delete[](void *ptr, const std::nothrow_t &) noexcept {
+	LOG("core.operator_delete.entry", "ptr: %p", ptr);
+
 	je_free_impl(ptr);
+
+	LOG("core.operator_delete.exit", "");
 }
 
 #if __cpp_sized_deallocation >= 201309
@@ -198,7 +219,11 @@ sizedDeleteImpl(void* ptr, std::size_t size) noexcept {
 	if (unlikely(ptr == nullptr)) {
 		return;
 	}
+	LOG("core.operator_delete.entry", "ptr: %p, size: %zu", ptr, size);
+
 	je_sdallocx_noflags(ptr, size);
+
+	LOG("core.operator_delete.exit", "");
 }
 
 void
@@ -217,34 +242,56 @@ operator delete[](void *ptr, std::size_t size) noexcept {
 
 JEMALLOC_ALWAYS_INLINE
 void
-alignedSizedDeleteImpl(void* ptr, std::size_t size, std::align_val_t alignment) noexcept {
+alignedSizedDeleteImpl(void* ptr, std::size_t size, std::align_val_t alignment)
+    noexcept {
 	if (config_debug) {
 		assert(((size_t)alignment & ((size_t)alignment - 1)) == 0);
 	}
 	if (unlikely(ptr == nullptr)) {
 		return;
 	}
+	LOG("core.operator_delete.entry", "ptr: %p, size: %zu, alignment: %zu",
+	    ptr, size, alignment);
+
 	je_sdallocx_impl(ptr, size, MALLOCX_ALIGN(alignment));
+
+	LOG("core.operator_delete.exit", "");
 }
 
 void
 operator delete(void* ptr, std::align_val_t) noexcept {
+	LOG("core.operator_delete.entry", "ptr: %p", ptr);
+
 	je_free_impl(ptr);
+
+	LOG("core.operator_delete.exit", "");
 }
 
 void
 operator delete[](void* ptr, std::align_val_t) noexcept {
+	LOG("core.operator_delete.entry", "ptr: %p", ptr);
+
 	je_free_impl(ptr);
+
+	LOG("core.operator_delete.exit", "");
 }
 
 void
 operator delete(void* ptr, std::align_val_t, const std::nothrow_t&) noexcept {
+	LOG("core.operator_delete.entry", "ptr: %p", ptr);
+
 	je_free_impl(ptr);
+
+	LOG("core.operator_delete.exit", "");
 }
 
 void
 operator delete[](void* ptr, std::align_val_t, const std::nothrow_t&) noexcept {
+	LOG("core.operator_delete.entry", "ptr: %p", ptr);
+
 	je_free_impl(ptr);
+
+	LOG("core.operator_delete.exit", "");
 }
 
 void

From a2c52674091c53f6af1ac8b7ef8849bc7797a5ad Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 16 Jan 2024 13:07:58 -0800
Subject: [PATCH 2368/2608] HPA: Allow frequent reused alloc to bypass the
 slab_max_alloc limit, as long as it's within the huge page size.  These
 requests do not concern internal fragmentation with huge pages, since the
 entire range is expected to be accessed.

---
 include/jemalloc/internal/pai.h | 10 ++++++----
 src/hpa.c                       | 25 ++++++++++++++++++++-----
 src/pai.c                       |  7 ++++---
 src/sec.c                       |  7 ++++---
 test/unit/hpa.c                 | 19 ++++++++++++++++---
 test/unit/sec.c                 |  2 +-
 6 files changed, 51 insertions(+), 19 deletions(-)

diff --git a/include/jemalloc/internal/pai.h b/include/jemalloc/internal/pai.h
index dd64ee59..557d30d1 100644
--- a/include/jemalloc/internal/pai.h
+++ b/include/jemalloc/internal/pai.h
@@ -20,7 +20,7 @@ struct pai_s {
 	 * the results are not necessarily zeroed.
 	 */
 	size_t (*alloc_batch)(tsdn_t *tsdn, pai_t *self, size_t size,
-	    size_t nallocs, edata_list_active_t *results,
+	    size_t nallocs, edata_list_active_t *results, bool frequent_reuse,
 	    bool *deferred_work_generated);
 	bool (*expand)(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 	    size_t old_size, size_t new_size, bool zero,
@@ -50,9 +50,10 @@ pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
 
 static inline size_t
 pai_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
-    edata_list_active_t *results, bool *deferred_work_generated) {
+    edata_list_active_t *results, bool frequent_reuse,
+    bool *deferred_work_generated) {
 	return self->alloc_batch(tsdn, self, size, nallocs, results,
-	    deferred_work_generated);
+	    frequent_reuse, deferred_work_generated);
 }
 
 static inline bool
@@ -91,7 +92,8 @@ pai_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
  * each item in the list.
  */
 size_t pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t nallocs, edata_list_active_t *results, bool *deferred_work_generated);
+    size_t nallocs, edata_list_active_t *results, bool frequent_reuse,
+    bool *deferred_work_generated);
 /* Ditto, for dalloc. */
 void pai_dalloc_batch_default(tsdn_t *tsdn, pai_t *self,
     edata_list_active_t *list, bool *deferred_work_generated);
diff --git a/src/hpa.c b/src/hpa.c
index ee41994f..99d1f033 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -12,7 +12,8 @@ static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t alignment, bool zero, bool guarded, bool frequent_reuse,
     bool *deferred_work_generated);
 static size_t hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t nallocs, edata_list_active_t *results, bool *deferred_work_generated);
+    size_t nallocs, edata_list_active_t *results, bool frequent_reuse,
+    bool *deferred_work_generated);
 static bool hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
     size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
 static bool hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
@@ -643,7 +644,9 @@ static size_t
 hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
     size_t nallocs, edata_list_active_t *results,
     bool *deferred_work_generated) {
-	assert(size <= shard->opts.slab_max_alloc);
+	assert(size <= HUGEPAGE);
+	assert(size <= shard->opts.slab_max_alloc ||
+	    size == sz_index2size(sz_size2index(size)));
 	bool oom = false;
 
 	size_t nsuccess = hpa_try_alloc_batch_no_grow(tsdn, shard, size, &oom,
@@ -712,14 +715,26 @@ hpa_from_pai(pai_t *self) {
 
 static size_t
 hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
-    edata_list_active_t *results, bool *deferred_work_generated) {
+    edata_list_active_t *results, bool frequent_reuse,
+    bool *deferred_work_generated) {
 	assert(nallocs > 0);
 	assert((size & PAGE_MASK) == 0);
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, 0);
 	hpa_shard_t *shard = hpa_from_pai(self);
 
-	if (size > shard->opts.slab_max_alloc) {
+	/*
+	 * frequent_use here indicates this request comes from the arena bins,
+	 * in which case it will be split into slabs, and therefore there is no
+	 * intrinsic slack in the allocation (the entire range of allocated size
+	 * will be accessed).
+	 *
+	 * In this case bypass the slab_max_alloc limit (if still within the
+	 * huge page size).  These requests do not concern internal
+	 * fragmentation with huge pages (again, the full size will be used).
+	 */
+	if (!(frequent_reuse && size <= HUGEPAGE) &&
+	    (size > shard->opts.slab_max_alloc)) {
 		return 0;
 	}
 
@@ -771,7 +786,7 @@ hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
 	edata_list_active_t results;
 	edata_list_active_init(&results);
 	size_t nallocs = hpa_alloc_batch(tsdn, self, size, /* nallocs */ 1,
-	    &results, deferred_work_generated);
+	    &results, frequent_reuse, deferred_work_generated);
 	assert(nallocs == 0 || nallocs == 1);
 	edata_t *edata = edata_list_active_first(&results);
 	return edata;
diff --git a/src/pai.c b/src/pai.c
index 45c87729..e8cddfc3 100644
--- a/src/pai.c
+++ b/src/pai.c
@@ -3,12 +3,13 @@
 
 size_t
 pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
-    edata_list_active_t *results, bool *deferred_work_generated) {
+    edata_list_active_t *results, bool frequent_reuse,
+    bool *deferred_work_generated) {
 	for (size_t i = 0; i < nallocs; i++) {
 		bool deferred_by_alloc = false;
 		edata_t *edata = pai_alloc(tsdn, self, size, PAGE,
-		    /* zero */ false, /* guarded */ false,
-		    /* frequent_reuse */ false, &deferred_by_alloc);
+		    /* zero */ false, /* guarded */ false, frequent_reuse,
+		    &deferred_by_alloc);
 		*deferred_work_generated |= deferred_by_alloc;
 		if (edata == NULL) {
 			return i;
diff --git a/src/sec.c b/src/sec.c
index df675590..19d69ff4 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -174,14 +174,15 @@ sec_shard_alloc_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
 
 static edata_t *
 sec_batch_fill_and_alloc(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
-    sec_bin_t *bin, size_t size) {
+    sec_bin_t *bin, size_t size, bool frequent_reuse) {
 	malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
 
 	edata_list_active_t result;
 	edata_list_active_init(&result);
 	bool deferred_work_generated = false;
 	size_t nalloc = pai_alloc_batch(tsdn, sec->fallback, size,
-	    1 + sec->opts.batch_fill_extra, &result, &deferred_work_generated);
+	    1 + sec->opts.batch_fill_extra, &result, frequent_reuse,
+	    &deferred_work_generated);
 
 	edata_t *ret = edata_list_active_first(&result);
 	if (ret != NULL) {
@@ -251,7 +252,7 @@ sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
 	if (edata == NULL) {
 		if (do_batch_fill) {
 			edata = sec_batch_fill_and_alloc(tsdn, sec, shard, bin,
-			    size);
+			    size, frequent_reuse);
 		} else {
 			edata = pai_alloc(tsdn, sec->fallback, size, alignment,
 			    zero, /* guarded */ false, frequent_reuse,
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 64aef59e..9e3160b4 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -84,12 +84,25 @@ TEST_BEGIN(test_alloc_max) {
 	/* Small max */
 	bool deferred_work_generated = false;
 	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX, PAGE, false, false,
-	    false, &deferred_work_generated);
+	     /* frequent_reuse */ false, &deferred_work_generated);
 	expect_ptr_not_null(edata, "Allocation of small max failed");
+
 	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX + PAGE, PAGE, false,
-	    false, false, &deferred_work_generated);
+	    false, /* frequent_reuse */ false, &deferred_work_generated);
 	expect_ptr_null(edata, "Allocation of larger than small max succeeded");
 
+	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX, PAGE, false,
+	    false, /* frequent_reuse */ true, &deferred_work_generated);
+	expect_ptr_not_null(edata, "Allocation of frequent reused failed");
+
+	edata = pai_alloc(tsdn, &shard->pai, HUGEPAGE, PAGE, false,
+	    false, /* frequent_reuse */ true, &deferred_work_generated);
+	expect_ptr_not_null(edata, "Allocation of frequent reused failed");
+
+	edata = pai_alloc(tsdn, &shard->pai, HUGEPAGE + PAGE, PAGE, false,
+	    false, /* frequent_reuse */ true, &deferred_work_generated);
+	expect_ptr_null(edata, "Allocation of larger than hugepage succeeded");
+
 	destroy_test_data(shard);
 }
 TEST_END
@@ -273,7 +286,7 @@ TEST_BEGIN(test_alloc_dalloc_batch) {
 	edata_list_active_t allocs_list;
 	edata_list_active_init(&allocs_list);
 	size_t nsuccess = pai_alloc_batch(tsdn, &shard->pai, PAGE, NALLOCS / 2,
-	    &allocs_list, &deferred_work_generated);
+	    &allocs_list, /* frequent_reuse */ false, &deferred_work_generated);
 	expect_zu_eq(NALLOCS / 2, nsuccess, "Unexpected oom");
 	for (size_t i = NALLOCS / 2; i < NALLOCS; i++) {
 		allocs[i] = edata_list_active_first(&allocs_list);
diff --git a/test/unit/sec.c b/test/unit/sec.c
index f3ec403d..0b5e1c31 100644
--- a/test/unit/sec.c
+++ b/test/unit/sec.c
@@ -73,7 +73,7 @@ pai_test_allocator_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
 
 static inline size_t
 pai_test_allocator_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t nallocs, edata_list_active_t *results,
+    size_t nallocs, edata_list_active_t *results, bool frequent_reuse,
     bool *deferred_work_generated) {
 	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
 	if (ta->alloc_fail) {

From f96010b7fa8ce5f83802144bdebf2bb7a6679649 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@meta.com>
Date: Tue, 23 Jan 2024 16:21:04 -0800
Subject: [PATCH 2369/2608] gitignore: Start ignoring clangd dirs.

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index 0f5e7aae..9180ddf1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -45,6 +45,9 @@
 /src/*.[od]
 /src/*.sym
 
+# These are semantically meaningful for clangd and related tooling.
+/build/
+/.cache/
 compile_commands.json
 /static_analysis_raw_results
 /static_analysis_results

From 6d181bc1b7a99348886984754c6227002ef4542f Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 15 Feb 2024 13:34:05 -0800
Subject: [PATCH 2370/2608] Fix Cirrus CI.

13.0-RELEASE does not exist anymore.  "The resource
'projects/freebsd-org-cloud-dev/global/images/family/freebsd-13-0' was not
found"
---
 .cirrus.yml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/.cirrus.yml b/.cirrus.yml
index 45498fb8..63a96d2a 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -40,9 +40,6 @@ task:
      - name: 13-STABLE
        freebsd_instance:
          image_family: freebsd-13-2-snap
-     - name: 13.0-RELEASE
-       freebsd_instance:
-         image_family: freebsd-13-0
   install_script:
     - sed -i.bak -e 's,pkg+http://pkg.FreeBSD.org/\${ABI}/quarterly,pkg+http://pkg.FreeBSD.org/\${ABI}/latest,' /etc/pkg/FreeBSD.conf
     - pkg upgrade -y

From 1aba4f41a3fef53fa913e655444dbba53a0c82df Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 13 Feb 2024 14:37:21 -0800
Subject: [PATCH 2371/2608] Allow zero sized memalign to pass.

Instead of failing on assertions.  Previously the same change was made for
posix_memalign and aligned_alloc (#1554).  Make memalign behave the same way
even though it's obsolete.
---
 src/jemalloc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 88436f45..7934e767 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3052,6 +3052,7 @@ je_memalign(size_t alignment, size_t size) {
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
 
+	sopts.bump_empty_aligned_alloc = true;
 	sopts.min_alignment = 1;
 	sopts.oom_string =
 	    "<jemalloc>: Error allocating aligned memory: out of memory\n";

From 373884ab482ad1de4b839e40bd38fd154f324707 Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Tue, 30 Jan 2024 10:16:28 -0800
Subject: [PATCH 2372/2608] print out all malloc_conf settings in stats

---
 .../internal/jemalloc_internal_externs.h      |  3 ++
 src/ctl.c                                     | 26 +++++++++++-
 src/jemalloc.c                                | 41 ++++++++++++-------
 src/stats.c                                   | 36 +++++++++++++++-
 test/unit/malloc_conf_2.c                     | 26 +++++++++++-
 5 files changed, 114 insertions(+), 18 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 64d9aa20..9d7a9048 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -38,6 +38,9 @@ extern atomic_zu_t zero_realloc_count;
 extern bool opt_cache_oblivious;
 extern unsigned opt_debug_double_free_max_scan;
 
+extern const char *opt_malloc_conf_symlink;
+extern const char *opt_malloc_conf_env_var;
+
 /* Escape free-fastpath when ptr & mask == 0 (for sanitization purpose). */
 extern uintptr_t san_cache_bin_nonfast_mask;
 
diff --git a/src/ctl.c b/src/ctl.c
index 93144752..7c349da7 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -159,6 +159,10 @@ CTL_PROTO(opt_prof_sys_thread_name)
 CTL_PROTO(opt_prof_time_res)
 CTL_PROTO(opt_lg_san_uaf_align)
 CTL_PROTO(opt_zero_realloc)
+CTL_PROTO(opt_malloc_conf_symlink)
+CTL_PROTO(opt_malloc_conf_env_var)
+CTL_PROTO(opt_malloc_conf_global_var)
+CTL_PROTO(opt_malloc_conf_global_var_2_conf_harder)
 CTL_PROTO(tcache_create)
 CTL_PROTO(tcache_flush)
 CTL_PROTO(tcache_destroy)
@@ -426,6 +430,14 @@ static const ctl_named_node_t	config_node[] = {
 	{NAME("xmalloc"),	CTL(config_xmalloc)}
 };
 
+static const ctl_named_node_t opt_malloc_conf_node[] = {
+	{NAME("symlink"),	CTL(opt_malloc_conf_symlink)},
+	{NAME("env_var"),	CTL(opt_malloc_conf_env_var)},
+	{NAME("global_var"),	CTL(opt_malloc_conf_global_var)},
+	{NAME("global_var_2_conf_harder"),
+	    CTL(opt_malloc_conf_global_var_2_conf_harder)}
+};
+
 static const ctl_named_node_t opt_node[] = {
 	{NAME("abort"),		CTL(opt_abort)},
 	{NAME("abort_conf"),	CTL(opt_abort_conf)},
@@ -502,7 +514,8 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("lg_san_uaf_align"),	CTL(opt_lg_san_uaf_align)},
 	{NAME("zero_realloc"),	CTL(opt_zero_realloc)},
 	{NAME("debug_double_free_max_scan"),
-		CTL(opt_debug_double_free_max_scan)}
+		CTL(opt_debug_double_free_max_scan)},
+	{NAME("malloc_conf"),	CHILD(named, opt_malloc_conf)}
 };
 
 static const ctl_named_node_t	tcache_node[] = {
@@ -2230,6 +2243,17 @@ CTL_RO_NL_CGEN(config_uaf_detection, opt_lg_san_uaf_align,
 CTL_RO_NL_GEN(opt_zero_realloc,
     zero_realloc_mode_names[opt_zero_realloc_action], const char *)
 
+/* malloc_conf options */
+CTL_RO_NL_CGEN(opt_malloc_conf_symlink, opt_malloc_conf_symlink,
+    opt_malloc_conf_symlink, const char *)
+CTL_RO_NL_CGEN(opt_malloc_conf_env_var, opt_malloc_conf_env_var,
+    opt_malloc_conf_env_var, const char *)
+CTL_RO_NL_CGEN(je_malloc_conf, opt_malloc_conf_global_var, je_malloc_conf,
+    const char *)
+CTL_RO_NL_CGEN(je_malloc_conf_2_conf_harder,
+    opt_malloc_conf_global_var_2_conf_harder, je_malloc_conf_2_conf_harder,
+    const char *)
+
 /******************************************************************************/
 
 static int
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 7934e767..68c0e7eb 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -57,6 +57,9 @@ const char	*je_malloc_conf_2_conf_harder
 #endif
     ;
 
+const char *opt_malloc_conf_symlink = NULL;
+const char *opt_malloc_conf_env_var = NULL;
+
 bool	opt_abort =
 #ifdef JEMALLOC_DEBUG
     true
@@ -955,7 +958,7 @@ malloc_slow_flag_init(void) {
 #define MALLOC_CONF_NSOURCES 5
 
 static const char *
-obtain_malloc_conf(unsigned which_source, char buf[PATH_MAX + 1]) {
+obtain_malloc_conf(unsigned which_source, char readlink_buf[PATH_MAX + 1]) {
 	if (config_debug) {
 		static unsigned read_source = 0;
 		/*
@@ -998,9 +1001,9 @@ obtain_malloc_conf(unsigned which_source, char buf[PATH_MAX + 1]) {
 		 * link's name.
 		 */
 #ifndef JEMALLOC_READLINKAT
-		linklen = readlink(linkname, buf, PATH_MAX);
+		linklen = readlink(linkname, readlink_buf, PATH_MAX);
 #else
-		linklen = readlinkat(AT_FDCWD, linkname, buf, PATH_MAX);
+		linklen = readlinkat(AT_FDCWD, linkname, readlink_buf, PATH_MAX);
 #endif
 		if (linklen == -1) {
 			/* No configuration specified. */
@@ -1009,8 +1012,8 @@ obtain_malloc_conf(unsigned which_source, char buf[PATH_MAX + 1]) {
 			set_errno(saved_errno);
 		}
 #endif
-		buf[linklen] = '\0';
-		ret = buf;
+		readlink_buf[linklen] = '\0';
+		ret = readlink_buf;
 		break;
 	} case 3: {
 		const char *envname =
@@ -1022,10 +1025,7 @@ obtain_malloc_conf(unsigned which_source, char buf[PATH_MAX + 1]) {
 		    ;
 
 		if ((ret = jemalloc_getenv(envname)) != NULL) {
-			/*
-			 * Do nothing; opts is already initialized to the value
-			 * of the MALLOC_CONF environment variable.
-			 */
+			opt_malloc_conf_env_var = ret;
 		} else {
 			/* No configuration specified. */
 			ret = NULL;
@@ -1084,7 +1084,7 @@ validate_hpa_settings(void) {
 static void
 malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
     bool initial_call, const char *opts_cache[MALLOC_CONF_NSOURCES],
-    char buf[PATH_MAX + 1]) {
+    char readlink_buf[PATH_MAX + 1]) {
 	static const char *opts_explain[MALLOC_CONF_NSOURCES] = {
 		"string specified via --with-malloc-conf",
 		"string pointed to by the global variable malloc_conf",
@@ -1101,7 +1101,7 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 	for (i = 0; i < MALLOC_CONF_NSOURCES; i++) {
 		/* Get runtime configuration. */
 		if (initial_call) {
-			opts_cache[i] = obtain_malloc_conf(i, buf);
+			opts_cache[i] = obtain_malloc_conf(i, readlink_buf);
 		}
 		opts = opts_cache[i];
 		if (!initial_call && opt_confirm_conf) {
@@ -1783,13 +1783,13 @@ malloc_conf_init_check_deps(void) {
 }
 
 static void
-malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]) {
+malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
+    char readlink_buf[PATH_MAX + 1]) {
 	const char *opts_cache[MALLOC_CONF_NSOURCES] = {NULL, NULL, NULL, NULL,
 		NULL};
-	char buf[PATH_MAX + 1];
 
 	/* The first call only set the confirm_conf option and opts_cache */
-	malloc_conf_init_helper(NULL, NULL, true, opts_cache, buf);
+	malloc_conf_init_helper(NULL, NULL, true, opts_cache, readlink_buf);
 	malloc_conf_init_helper(sc_data, bin_shard_sizes, false, opts_cache,
 	    NULL);
 	if (malloc_conf_init_check_deps()) {
@@ -1855,7 +1855,9 @@ malloc_init_hard_a0_locked(void) {
 	if (config_prof) {
 		prof_boot0();
 	}
-	malloc_conf_init(&sc_data, bin_shard_sizes);
+	char readlink_buf[PATH_MAX + 1];
+	readlink_buf[0] = '\0';
+	malloc_conf_init(&sc_data, bin_shard_sizes, readlink_buf);
 	san_init(opt_lg_san_uaf_align);
 	sz_boot(&sc_data, opt_cache_oblivious);
 	bin_info_boot(&sc_data, bin_shard_sizes);
@@ -1949,6 +1951,15 @@ malloc_init_hard_a0_locked(void) {
 
 	malloc_init_state = malloc_init_a0_initialized;
 
+	size_t buf_len = strlen(readlink_buf);
+	if (buf_len > 0) {
+		void *readlink_allocated = a0ialloc(buf_len + 1, false, true);
+		if (readlink_allocated != NULL) {
+			memcpy(readlink_allocated, readlink_buf, buf_len + 1);
+			opt_malloc_conf_symlink = readlink_allocated;
+		}
+	}
+
 	return false;
 }
 
diff --git a/src/stats.c b/src/stats.c
index c580b49e..428e8ffb 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1473,6 +1473,40 @@ stats_general_print(emitter_t *emitter) {
 
 	emitter_dict_begin(emitter, "opt", "Run-time option settings");
 
+	/*
+	 * opt.malloc_conf.
+	 *
+	 * Sources are documented in https://jemalloc.net/jemalloc.3.html#tuning
+	 * - (Not Included Here) The string specified via --with-malloc-conf,
+	 *     which is already printed out above as config.malloc_conf
+	 * - (Included) The string pointed to by the global variable malloc_conf
+	 * - (Included) The “name” of the file referenced by the symbolic link
+	 *     named /etc/malloc.conf
+	 * - (Included) The value of the environment variable MALLOC_CONF
+	 * - (Optional, Unofficial) The string pointed to by the global variable
+	 *     malloc_conf_2_conf_harder, which is hidden from the public.
+	 *
+	 * Note: The outputs are strictly ordered by priorities (low -> high).
+	 *
+	 */
+#define MALLOC_CONF_WRITE(name, message)					\
+	if (je_mallctl("opt.malloc_conf."name, (void *)&cpv, &cpsz, NULL, 0) !=	\
+	    0) {								\
+		cpv = "";							\
+	}									\
+	emitter_kv(emitter, name, message, emitter_type_string,	&cpv);
+
+	MALLOC_CONF_WRITE("global_var", "Global variable malloc_conf");
+	MALLOC_CONF_WRITE("symlink", "Symbolic link malloc.conf");
+	MALLOC_CONF_WRITE("env_var", "Environment variable MALLOC_CONF");
+	/* As this config is unofficial, skip the output if it's NULL */
+	if (je_mallctl("opt.malloc_conf.global_var_2_conf_harder",
+	    (void *)&cpv, &cpsz, NULL, 0) == 0) {
+		emitter_kv(emitter, "global_var_2_conf_harder", "Global "
+		    "variable malloc_conf_2_conf_harder", emitter_type_string, &cpv);
+	}
+#undef MALLOC_CONF_WRITE
+
 	OPT_WRITE_BOOL("abort")
 	OPT_WRITE_BOOL("abort_conf")
 	OPT_WRITE_BOOL("cache_oblivious")
@@ -1554,7 +1588,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_CHAR_P("stats_interval_opts")
 	OPT_WRITE_CHAR_P("zero_realloc")
 
-	emitter_dict_end(emitter);
+	emitter_dict_end(emitter); /* Close "opt". */
 
 #undef OPT_WRITE
 #undef OPT_WRITE_MUTABLE
diff --git a/test/unit/malloc_conf_2.c b/test/unit/malloc_conf_2.c
index ecfa4991..9d2c6077 100644
--- a/test/unit/malloc_conf_2.c
+++ b/test/unit/malloc_conf_2.c
@@ -22,8 +22,32 @@ TEST_BEGIN(test_malloc_conf_2) {
 }
 TEST_END
 
+TEST_BEGIN(test_mallctl_global_var) {
+#ifdef _WIN32
+	bool windows = true;
+#else
+	bool windows = false;
+#endif
+	/* Windows doesn't support weak symbol linker trickery. */
+	test_skip_if(windows);
+
+	const char *mc;
+	size_t sz = sizeof(mc);
+	expect_d_eq(mallctl("opt.malloc_conf.global_var",
+	    (void *)&mc, &sz, NULL, 0), 0, "Unexpected mallctl() failure");
+	expect_str_eq(mc, malloc_conf, "Unexpected value for the global variable "
+	    "malloc_conf");
+
+	expect_d_eq(mallctl("opt.malloc_conf.global_var_2_conf_harder",
+	    (void *)&mc, &sz, NULL, 0), 0, "Unexpected mallctl() failure");
+	expect_str_eq(mc, malloc_conf_2_conf_harder, "Unexpected value for the "
+	    "global variable malloc_conf_2_conf_harder");
+}
+TEST_END
+
 int
 main(void) {
 	return test(
-	    test_malloc_conf_2);
+	    test_malloc_conf_2,
+	    test_mallctl_global_var);
 }

From ed9b00a96b25ea24e90875d7a79cdbf3411dd53b Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy@outlook.com>
Date: Mon, 4 Mar 2024 14:50:39 +0800
Subject: [PATCH 2373/2608] Replace unsigned induction variable with size_t in
 background_threads_enable

This patch avoids unnecessary vectorizations in clang and missed recognition of memset in gcc. See also https://godbolt.org/z/aoeMsjr4c.
---
 src/background_thread.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/background_thread.c b/src/background_thread.c
index 94d91a89..c92fa2bc 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -580,7 +580,7 @@ background_threads_enable(tsd_t *tsd) {
 
 	VARIABLE_ARRAY(bool, marked, max_background_threads);
 	unsigned nmarked;
-	for (unsigned i = 0; i < max_background_threads; i++) {
+	for (size_t i = 0; i < max_background_threads; i++) {
 		marked[i] = false;
 	}
 	nmarked = 0;

From 1978e5cdac731dca43b62e4b03612c0758f7cece Mon Sep 17 00:00:00 2001
From: Minsoo Choo <minsoochoo0122@proton.me>
Date: Sat, 9 Mar 2024 00:46:31 -0500
Subject: [PATCH 2374/2608] Update acitons/checkout and actions/upload-artifact
 to v4

---
 .github/workflows/check_formatting.yaml | 2 +-
 .github/workflows/static_analysis.yaml  | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/check_formatting.yaml b/.github/workflows/check_formatting.yaml
index f7be77b1..8a10065f 100644
--- a/.github/workflows/check_formatting.yaml
+++ b/.github/workflows/check_formatting.yaml
@@ -5,6 +5,6 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - name: Check out repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
       - name: Check for trailing whitespace
         run: scripts/check_trailing_whitespace.sh
diff --git a/.github/workflows/static_analysis.yaml b/.github/workflows/static_analysis.yaml
index df60b5a1..29e617fc 100644
--- a/.github/workflows/static_analysis.yaml
+++ b/.github/workflows/static_analysis.yaml
@@ -7,7 +7,7 @@ jobs:
       # We build libunwind ourselves because sadly the version
       # provided by Ubuntu via apt-get is much too old.
       - name: Check out libunwind
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           repository: libunwind/libunwind
           path: libunwind
@@ -23,7 +23,7 @@ jobs:
           cd ..
           rm -rf libunwind
       - name: Check out repository
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
       # We download LLVM directly from the latest stable release
       # on GitHub, because this tends to be much newer than the
       # version available via apt-get in Ubuntu.
@@ -54,7 +54,7 @@ jobs:
           scripts/run_static_analysis.sh static_analysis_results "$GITHUB_OUTPUT"
       - name: Upload static analysis results
         if: ${{ steps.run_static_analysis.outputs.HAS_STATIC_ANALYSIS_RESULTS }} == '1'
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: static_analysis_results
           path: static_analysis_results

From 10d713151d7245ae89657a7002a5988522b7bd7a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Amaury=20S=C3=A9chet?= <deadalnix@gmail.com>
Date: Wed, 25 Oct 2023 01:01:22 +0000
Subject: [PATCH 2375/2608] Ensure that the root of a heap is always the best
 element.

---
 include/jemalloc/internal/ph.h | 17 +++--------------
 1 file changed, 3 insertions(+), 14 deletions(-)

diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index 3ae38710..830ccb7e 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -239,7 +239,7 @@ ph_merge_aux(ph_t *ph, size_t offset, ph_cmp_t cmp) {
 		phn_prev_set(phn, NULL, offset);
 		phn = phn_merge_siblings(phn, offset, cmp);
 		assert(phn_next_get(phn, offset) == NULL);
-		ph->root = phn_merge(ph->root, phn, offset, cmp);
+		phn_merge_ordered(ph->root, phn, offset, cmp);
 	}
 }
 
@@ -380,20 +380,9 @@ ph_remove_first(ph_t *ph, size_t offset, ph_cmp_t cmp) {
 JEMALLOC_ALWAYS_INLINE void
 ph_remove(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) {
 	if (ph->root == phn) {
-		/*
-		 * We can delete from aux list without merging it, but we need
-		 * to merge if we are dealing with the root node and it has
-		 * children.
-		 */
-		if (phn_lchild_get(phn, offset) == NULL) {
-			ph->root = phn_next_get(phn, offset);
-			return;
-		}
 		ph_merge_aux(ph, offset, cmp);
-		if (ph->root == phn) {
-			ph->root = ph_merge_children(ph->root, offset, cmp);
-			return;
-		}
+		ph->root = ph_merge_children(phn, offset, cmp);
+		return;
 	}
 
 	void* prev = phn_prev_get(phn, offset);

From 92aa52c0625d35ca1c30e7fc913d7c92c9518f9e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Amaury=20S=C3=A9chet?= <deadalnix@gmail.com>
Date: Wed, 25 Oct 2023 00:36:08 +0000
Subject: [PATCH 2376/2608] Reduce nesting in phn_merge_siblings using an early
 return.

---
 include/jemalloc/internal/ph.h | 105 +++++++++++++++++----------------
 1 file changed, 54 insertions(+), 51 deletions(-)

diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index 830ccb7e..ef9634be 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -162,6 +162,10 @@ phn_merge_siblings(void *phn, size_t offset, ph_cmp_t cmp) {
 	void *phn0 = phn;
 	void *phn1 = phn_next_get(phn0, offset);
 
+	if (phn1 == NULL) {
+		return phn0;
+	}
+
 	/*
 	 * Multipass merge, wherein the first two elements of a FIFO
 	 * are repeatedly merged, and each result is appended to the
@@ -170,62 +174,61 @@ phn_merge_siblings(void *phn, size_t offset, ph_cmp_t cmp) {
 	 * its tail, so we do a single pass over the sibling list to
 	 * populate the FIFO.
 	 */
-	if (phn1 != NULL) {
-		void *phnrest = phn_next_get(phn1, offset);
-		if (phnrest != NULL) {
-			phn_prev_set(phnrest, NULL, offset);
-		}
-		phn_prev_set(phn0, NULL, offset);
-		phn_next_set(phn0, NULL, offset);
-		phn_prev_set(phn1, NULL, offset);
-		phn_next_set(phn1, NULL, offset);
-		phn0 = phn_merge(phn0, phn1, offset, cmp);
-		head = tail = phn0;
-		phn0 = phnrest;
-		while (phn0 != NULL) {
-			phn1 = phn_next_get(phn0, offset);
-			if (phn1 != NULL) {
-				phnrest = phn_next_get(phn1, offset);
-				if (phnrest != NULL) {
-					phn_prev_set(phnrest, NULL, offset);
-				}
-				phn_prev_set(phn0, NULL, offset);
-				phn_next_set(phn0, NULL, offset);
-				phn_prev_set(phn1, NULL, offset);
-				phn_next_set(phn1, NULL, offset);
-				phn0 = phn_merge(phn0, phn1, offset, cmp);
-				/* NOLINTNEXTLINE(readability-suspicious-call-argument) */
-				phn_next_set(tail, phn0, offset);
-				tail = phn0;
-				phn0 = phnrest;
-			} else {
-				/* NOLINTNEXTLINE(readability-suspicious-call-argument) */
-				phn_next_set(tail, phn0, offset);
-				tail = phn0;
-				phn0 = NULL;
-			}
-		}
-		phn0 = head;
+	void *phnrest = phn_next_get(phn1, offset);
+	if (phnrest != NULL) {
+		phn_prev_set(phnrest, NULL, offset);
+	}
+	phn_prev_set(phn0, NULL, offset);
+	phn_next_set(phn0, NULL, offset);
+	phn_prev_set(phn1, NULL, offset);
+	phn_next_set(phn1, NULL, offset);
+	phn0 = phn_merge(phn0, phn1, offset, cmp);
+	head = tail = phn0;
+	phn0 = phnrest;
+	while (phn0 != NULL) {
 		phn1 = phn_next_get(phn0, offset);
 		if (phn1 != NULL) {
-			while (true) {
-				head = phn_next_get(phn1, offset);
-				assert(phn_prev_get(phn0, offset) == NULL);
-				phn_next_set(phn0, NULL, offset);
-				assert(phn_prev_get(phn1, offset) == NULL);
-				phn_next_set(phn1, NULL, offset);
-				phn0 = phn_merge(phn0, phn1, offset, cmp);
-				if (head == NULL) {
-					break;
-				}
-				/* NOLINTNEXTLINE(readability-suspicious-call-argument) */
-				phn_next_set(tail, phn0, offset);
-				tail = phn0;
-				phn0 = head;
-				phn1 = phn_next_get(phn0, offset);
+			phnrest = phn_next_get(phn1, offset);
+			if (phnrest != NULL) {
+				phn_prev_set(phnrest, NULL, offset);
 			}
+			phn_prev_set(phn0, NULL, offset);
+			phn_next_set(phn0, NULL, offset);
+			phn_prev_set(phn1, NULL, offset);
+			phn_next_set(phn1, NULL, offset);
+			phn0 = phn_merge(phn0, phn1, offset, cmp);
+			/* NOLINTNEXTLINE(readability-suspicious-call-argument) */
+			phn_next_set(tail, phn0, offset);
+			tail = phn0;
+			phn0 = phnrest;
+		} else {
+			/* NOLINTNEXTLINE(readability-suspicious-call-argument) */
+			phn_next_set(tail, phn0, offset);
+			tail = phn0;
+			phn0 = NULL;
 		}
 	}
+	phn0 = head;
+	phn1 = phn_next_get(phn0, offset);
+	if (phn1 != NULL) {
+		while (true) {
+			head = phn_next_get(phn1, offset);
+			assert(phn_prev_get(phn0, offset) == NULL);
+			phn_next_set(phn0, NULL, offset);
+			assert(phn_prev_get(phn1, offset) == NULL);
+			phn_next_set(phn1, NULL, offset);
+			phn0 = phn_merge(phn0, phn1, offset, cmp);
+			if (head == NULL) {
+				break;
+			}
+			/* NOLINTNEXTLINE(readability-suspicious-call-argument) */
+			phn_next_set(tail, phn0, offset);
+			tail = phn0;
+			phn0 = head;
+			phn1 = phn_next_get(phn0, offset);
+		}
+	}
+
 	return phn0;
 }
 

From b2e59a96e1ffc953300c5b69ffae934a63de38c0 Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Mon, 25 Mar 2024 08:02:49 -0700
Subject: [PATCH 2377/2608] Introduce getters for page allocator shard stats

Access nactive, ndirty and nmuzzy throught getters and not directly.
There are no functional change, but getters are required to propagate
HPA's statistics up to Page Allocator's statitics.
---
 include/jemalloc/internal/pa.h |  4 ++++
 src/pa.c                       |  2 +-
 src/pa_extra.c                 | 25 ++++++++++++++++++++-----
 3 files changed, 25 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 5f43244d..75626738 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -224,6 +224,10 @@ void pa_shard_prefork5(tsdn_t *tsdn, pa_shard_t *shard);
 void pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard);
 void pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard);
 
+size_t pa_shard_nactive(pa_shard_t *shard);
+size_t pa_shard_ndirty(pa_shard_t *shard);
+size_t pa_shard_nmuzzy(pa_shard_t *shard);
+
 void pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive,
     size_t *ndirty, size_t *nmuzzy);
 
diff --git a/src/pa.c b/src/pa.c
index ebc6861d..7a24ae65 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -11,7 +11,7 @@ pa_nactive_add(pa_shard_t *shard, size_t add_pages) {
 
 static void
 pa_nactive_sub(pa_shard_t *shard, size_t sub_pages) {
-	assert(atomic_load_zu(&shard->nactive, ATOMIC_RELAXED) >= sub_pages);
+	assert(pa_shard_nactive(shard) >= sub_pages);
 	atomic_fetch_sub_zu(&shard->nactive, sub_pages, ATOMIC_RELAXED);
 }
 
diff --git a/src/pa_extra.c b/src/pa_extra.c
index 0f488be6..ee101891 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -74,12 +74,27 @@ pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard) {
 	}
 }
 
+size_t
+pa_shard_nactive(pa_shard_t *shard) {
+	return atomic_load_zu(&shard->nactive, ATOMIC_RELAXED);
+}
+
+size_t
+pa_shard_ndirty(pa_shard_t *shard) {
+	return ecache_npages_get(&shard->pac.ecache_dirty);
+}
+
+size_t
+pa_shard_nmuzzy(pa_shard_t *shard) {
+	return ecache_npages_get(&shard->pac.ecache_muzzy);
+}
+
 void
 pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive, size_t *ndirty,
     size_t *nmuzzy) {
-	*nactive += atomic_load_zu(&shard->nactive, ATOMIC_RELAXED);
-	*ndirty += ecache_npages_get(&shard->pac.ecache_dirty);
-	*nmuzzy += ecache_npages_get(&shard->pac.ecache_muzzy);
+	*nactive += pa_shard_nactive(shard);
+	*ndirty += pa_shard_ndirty(shard);
+	*nmuzzy += pa_shard_nmuzzy(shard);
 }
 
 void
@@ -95,8 +110,8 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
 	    &shard->edata_cache.count, ATOMIC_RELAXED);
 
 	size_t resident_pgs = 0;
-	resident_pgs += atomic_load_zu(&shard->nactive, ATOMIC_RELAXED);
-	resident_pgs += ecache_npages_get(&shard->pac.ecache_dirty);
+	resident_pgs += pa_shard_nactive(shard);
+	resident_pgs += pa_shard_ndirty(shard);
 	*resident += (resident_pgs << LG_PAGE);
 
 	/* Dirty decay stats */

From 268e8ee880bcb67163eda4c4f43c06697b28a436 Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Mon, 25 Mar 2024 09:23:03 -0700
Subject: [PATCH 2378/2608] Include HPA ndirty into page allocator ndirty stat

---
 src/pa_extra.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/pa_extra.c b/src/pa_extra.c
index ee101891..76507039 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -81,7 +81,11 @@ pa_shard_nactive(pa_shard_t *shard) {
 
 size_t
 pa_shard_ndirty(pa_shard_t *shard) {
-	return ecache_npages_get(&shard->pac.ecache_dirty);
+	size_t ndirty = ecache_npages_get(&shard->pac.ecache_dirty);
+	if (shard->ever_used_hpa) {
+		ndirty += psset_ndirty(&shard->hpa_shard.psset);
+	}
+	return ndirty;
 }
 
 size_t

From 38056fea64c34ca4fef0a16212776eaa4de80b78 Mon Sep 17 00:00:00 2001
From: Juhyung Park <qkrwngud825@gmail.com>
Date: Tue, 19 Mar 2024 16:46:56 +0900
Subject: [PATCH 2379/2608] Set errno to ENOMEM on rallocx() OOM failures

realloc() and rallocx() shares path, and realloc() should set errno to
ENOMEM upon OOM failures.

Fixes: ee961c23100e ("Merge realloc and rallocx pathways.")
Signed-off-by: Juhyung Park <qkrwngud825@gmail.com>
---
 src/jemalloc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 68c0e7eb..4e77894c 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3493,6 +3493,7 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 
 	return p;
 label_oom:
+	set_errno(ENOMEM);
 	if (config_xmalloc && unlikely(opt_xmalloc)) {
 		malloc_write("<jemalloc>: Error in rallocx(): out of memory\n");
 		abort();

From 5081c16bb49a0c9d1dde3cbd7dfb2e97c2827ea4 Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Wed, 3 Apr 2024 13:27:11 -0700
Subject: [PATCH 2380/2608] Experimental calloc implementation with using
 memset on larger sizes

---
 .../internal/jemalloc_internal_externs.h      |  1 +
 src/arena.c                                   | 35 +++++++++++++++----
 src/jemalloc.c                                |  5 +++
 3 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 9d7a9048..41c0f366 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -37,6 +37,7 @@ extern const char *const zero_realloc_mode_names[];
 extern atomic_zu_t zero_realloc_count;
 extern bool opt_cache_oblivious;
 extern unsigned opt_debug_double_free_max_scan;
+extern size_t opt_calloc_madvise_threshold;
 
 extern const char *opt_malloc_conf_symlink;
 extern const char *opt_malloc_conf_env_var;
diff --git a/src/arena.c b/src/arena.c
index 746ab328..8c87d67f 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -359,18 +359,39 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 
 	bool guarded = san_large_extent_decide_guard(tsdn,
 	    arena_get_ehooks(arena), esize, alignment);
-	edata_t *edata = pa_alloc(tsdn, &arena->pa_shard, esize, alignment,
-	    /* slab */ false, szind, zero, guarded, &deferred_work_generated);
 
-	if (edata != NULL) {
-		if (config_stats) {
-			arena_large_malloc_stats_update(tsdn, arena, usize);
-		}
+	/*
+	 * - if usize >= opt_calloc_madvise_threshold,
+	 *     - pa_alloc(..., zero_override = zero, ...)
+	 * - otherwise,
+	 *     - pa_alloc(..., zero_override = false, ...)
+	 *     - use memset() to zero out memory if zero == true.
+	 */
+	bool zero_override = zero && (usize >= opt_calloc_madvise_threshold);
+	edata_t *edata = pa_alloc(tsdn, &arena->pa_shard, esize, alignment,
+	    /* slab */ false, szind, zero_override, guarded,
+	    &deferred_work_generated);
+
+	if (edata == NULL) {
+		return NULL;
 	}
 
-	if (edata != NULL && sz_large_pad != 0) {
+	if (config_stats) {
+		arena_large_malloc_stats_update(tsdn, arena, usize);
+	}
+	if (sz_large_pad != 0) {
 		arena_cache_oblivious_randomize(tsdn, arena, edata, alignment);
 	}
+	/*
+	 * This branch should be put after the randomization so that the addr
+	 * returned by edata_addr_get() has already be randomized,
+	 * if cache_oblivious is enabled.
+	 */
+	if (zero && !zero_override && !edata_zeroed_get(edata)) {
+		void *addr = edata_addr_get(edata);
+		size_t usize = edata_usize_get(edata);
+		memset(addr, 0, usize);
+	}
 
 	return edata;
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 4e77894c..0362f539 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -160,6 +160,8 @@ unsigned	ncpus;
 unsigned opt_debug_double_free_max_scan =
     SAFETY_CHECK_DOUBLE_FREE_MAX_SCAN_DEFAULT;
 
+size_t opt_calloc_madvise_threshold = 0;
+
 /* Protects arenas initialization. */
 static malloc_mutex_t arenas_lock;
 
@@ -1453,6 +1455,9 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    "debug_double_free_max_scan", 0, UINT_MAX,
 			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
 			    /* clip */ false)
+			CONF_HANDLE_SIZE_T(opt_calloc_madvise_threshold,
+			    "calloc_madvise_threshold", 0, SC_LARGE_MAXCLASS,
+			    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX, /* clip */ false)
 
 			/*
 			 * The runtime option of oversize_threshold remains

From 83b075789b4239035931c1ee212576d00153bbf0 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 5 Apr 2024 11:33:55 -0700
Subject: [PATCH 2381/2608] rallocx path: only set errno on the realloc case.

---
 src/jemalloc.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 0362f539..d83c191f 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3498,7 +3498,9 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 
 	return p;
 label_oom:
-	set_errno(ENOMEM);
+	if (is_realloc) {
+		set_errno(ENOMEM);
+	}
 	if (config_xmalloc && unlikely(opt_xmalloc)) {
 		malloc_write("<jemalloc>: Error in rallocx(): out of memory\n");
 		abort();

From 11038ff762a2ba11eec26d3ffb32026424d2ccfe Mon Sep 17 00:00:00 2001
From: Daniel Hodges <hodges.daniel.scott@gmail.com>
Date: Sat, 23 Dec 2023 19:40:44 -0500
Subject: [PATCH 2382/2608] Add support for namespace pids in heap profile
 names

This change adds support for writing pid namespaces to the filename of a
heap profile. When running with namespaces pids may reused across
namespaces and if mounts are shared where profiles are written there is
not a great way to differentiate profiles between pids.

Signed-off-by: Daniel Hodges <hodges.daniel.scott@gmail.com>
Signed-off-by: Daniel Hodges <hodgesd@fb.com>
---
 doc/jemalloc.xml.in                      | 17 ++++++
 include/jemalloc/internal/prof_externs.h |  3 +
 src/ctl.c                                |  4 ++
 src/jemalloc.c                           |  1 +
 src/prof.c                               |  1 +
 src/prof_sys.c                           | 76 ++++++++++++++++++++----
 test/unit/mallctl.c                      |  1 +
 7 files changed, 93 insertions(+), 10 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index d0d4b20b..89a176e0 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1514,6 +1514,23 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         by default.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.prof_pid_namespace">
+        <term>
+          <mallctl>opt.prof_pid_namespace</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+          [<option>--enable-prof</option>]
+        </term>
+        <listitem><para>Enable adding the pid namespace to the profile
+        filename. Profiles are dumped to files named according to the pattern
+        <filename>&lt;prefix&gt;.&lt;pid_namespace&gt;.&lt;pid&gt;.&lt;seq&gt;.i&lt;iseq&gt;.heap</filename>,
+        where <literal>&lt;prefix&gt;</literal> is controlled by the <link
+        linkend="opt.prof_prefix"><mallctl>opt.prof_prefix</mallctl></link> and
+        <link linkend="prof.prefix"><mallctl>prof.prefix</mallctl></link>
+        options.
+        </para></listitem>
+      </varlistentry>
+
       <varlistentry id="opt.lg_prof_interval">
         <term>
           <mallctl>opt.lg_prof_interval</mallctl>
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 514c5804..952ace7d 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -26,6 +26,9 @@ extern char opt_prof_prefix[
     1];
 extern bool opt_prof_unbias;
 
+/* Include pid namespace in profile file names. */
+extern bool opt_prof_pid_namespace;
+
 /* For recording recent allocations */
 extern ssize_t opt_prof_recent_alloc_max;
 
diff --git a/src/ctl.c b/src/ctl.c
index 7c349da7..37b69576 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -153,6 +153,7 @@ CTL_PROTO(opt_prof_final)
 CTL_PROTO(opt_prof_leak)
 CTL_PROTO(opt_prof_leak_error)
 CTL_PROTO(opt_prof_accum)
+CTL_PROTO(opt_prof_pid_namespace)
 CTL_PROTO(opt_prof_recent_alloc_max)
 CTL_PROTO(opt_prof_stats)
 CTL_PROTO(opt_prof_sys_thread_name)
@@ -507,6 +508,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("prof_leak"),	CTL(opt_prof_leak)},
 	{NAME("prof_leak_error"),	CTL(opt_prof_leak_error)},
 	{NAME("prof_accum"),	CTL(opt_prof_accum)},
+	{NAME("prof_pid_namespace"),	CTL(opt_prof_pid_namespace)},
 	{NAME("prof_recent_alloc_max"),	CTL(opt_prof_recent_alloc_max)},
 	{NAME("prof_stats"),	CTL(opt_prof_stats)},
 	{NAME("prof_sys_thread_name"),	CTL(opt_prof_sys_thread_name)},
@@ -2226,6 +2228,8 @@ CTL_RO_NL_CGEN(config_prof, opt_prof_thread_active_init,
 CTL_RO_NL_CGEN(config_prof, opt_prof_bt_max, opt_prof_bt_max, unsigned)
 CTL_RO_NL_CGEN(config_prof, opt_lg_prof_sample, opt_lg_prof_sample, size_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool)
+CTL_RO_NL_CGEN(config_prof, opt_prof_pid_namespace, opt_prof_pid_namespace,
+    bool)
 CTL_RO_NL_CGEN(config_prof, opt_lg_prof_interval, opt_lg_prof_interval, ssize_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof_gdump, opt_prof_gdump, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_final, opt_prof_final, bool)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index d83c191f..390912ba 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1628,6 +1628,7 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				CONF_HANDLE_BOOL(opt_prof_leak_error,
 				    "prof_leak_error")
 				CONF_HANDLE_BOOL(opt_prof_log, "prof_log")
+				CONF_HANDLE_BOOL(opt_prof_pid_namespace, "prof_pid_namespace")
 				CONF_HANDLE_SSIZE_T(opt_prof_recent_alloc_max,
 				    "prof_recent_alloc_max", -1, SSIZE_MAX)
 				CONF_HANDLE_BOOL(opt_prof_stats, "prof_stats")
diff --git a/src/prof.c b/src/prof.c
index 1cf49740..6ae7f768 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -34,6 +34,7 @@ bool opt_prof_final = false;
 bool opt_prof_leak = false;
 bool opt_prof_leak_error = false;
 bool opt_prof_accum = false;
+bool opt_prof_pid_namespace = false;
 char opt_prof_prefix[PROF_DUMP_FILENAME_LEN];
 bool opt_prof_sys_thread_name = false;
 bool opt_prof_unbias = true;
diff --git a/src/prof_sys.c b/src/prof_sys.c
index c2998926..1e22332c 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -484,6 +484,41 @@ prof_getpid(void) {
 #endif
 }
 
+long
+prof_get_pid_namespace() {
+	long ret = 0;
+
+#if defined(_WIN32) || defined(__APPLE__)
+	// Not supported, do nothing.
+#else
+	char buf[PATH_MAX];
+	const char* linkname =
+#  if defined(__FreeBSD__) || defined(__DragonFly__)
+	    "/proc/curproc/ns/pid"
+#  else
+	    "/proc/self/ns/pid"
+#  endif
+	    ;
+	ssize_t linklen =
+#  ifndef JEMALLOC_READLINKAT
+	readlink(linkname, buf, PATH_MAX)
+#  else
+	readlinkat(AT_FDCWD, linkname, buf, PATH_MAX)
+#  endif
+	    ;
+
+	// namespace string is expected to be like pid:[4026531836]
+	if (linklen > 0) {
+		// Trim the trailing "]"
+		buf[linklen-1] = '\0';
+		char* index = strtok(buf, "pid:[");
+		ret = atol(index);
+	}
+#endif
+
+  return ret;
+}
+
 /*
  * This buffer is rather large for stack allocation, so use a single buffer for
  * all profile dumps; protected by prof_dump_mtx.
@@ -713,15 +748,30 @@ prof_dump_filename(tsd_t *tsd, char *filename, char v, uint64_t vseq) {
 	const char *prefix = prof_prefix_get(tsd_tsdn(tsd));
 
 	if (vseq != VSEQ_INVALID) {
-	        /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
-		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
-		    "%s.%d.%"FMTu64".%c%"FMTu64".heap", prefix, prof_getpid(),
-		    prof_dump_seq, v, vseq);
+		if (opt_prof_pid_namespace) {
+			/* "<prefix>.<pid_namespace>.<pid>.<seq>.v<vseq>.heap" */
+			malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
+			    "%s.%ld.%d.%"FMTu64".%c%"FMTu64".heap", prefix,
+			    prof_get_pid_namespace(), prof_getpid(), prof_dump_seq, v,
+			    vseq);
+		} else {
+			/* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
+			malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
+			    "%s.%d.%"FMTu64".%c%"FMTu64".heap", prefix, prof_getpid(),
+			    prof_dump_seq, v, vseq);
+		}
 	} else {
-	        /* "<prefix>.<pid>.<seq>.<v>.heap" */
-		malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
-		    "%s.%d.%"FMTu64".%c.heap", prefix, prof_getpid(),
-		    prof_dump_seq, v);
+		if (opt_prof_pid_namespace) {
+			/* "<prefix>.<pid_namespace>.<pid>.<seq>.<v>.heap" */
+			malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
+			    "%s.%ld.%d.%"FMTu64".%c.heap", prefix,
+			    prof_get_pid_namespace(), prof_getpid(), prof_dump_seq, v);
+		} else {
+			/* "<prefix>.<pid>.<seq>.<v>.heap" */
+			malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
+			    "%s.%d.%"FMTu64".%c.heap", prefix, prof_getpid(),
+			    prof_dump_seq, v);
+		}
 	}
 	prof_dump_seq++;
 }
@@ -729,8 +779,14 @@ prof_dump_filename(tsd_t *tsd, char *filename, char v, uint64_t vseq) {
 void
 prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind) {
 	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
-	malloc_snprintf(filename, PROF_DUMP_FILENAME_LEN,
-	    "%s.%d.%"FMTu64".json", prof_prefix_get(tsdn), prof_getpid(), ind);
+	if (opt_prof_pid_namespace) {
+		malloc_snprintf(filename, PROF_DUMP_FILENAME_LEN,
+		    "%s.%ld.%d.%"FMTu64".json", prof_prefix_get(tsdn),
+		    prof_get_pid_namespace(), prof_getpid(), ind);
+	} else {
+		malloc_snprintf(filename, PROF_DUMP_FILENAME_LEN,
+		    "%s.%d.%"FMTu64".json", prof_prefix_get(tsdn), prof_getpid(), ind);
+	}
 	malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
 }
 
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 1ff8b564..9e5baff0 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -317,6 +317,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(unsigned, prof_bt_max, prof);
 	TEST_MALLCTL_OPT(ssize_t, lg_prof_sample, prof);
 	TEST_MALLCTL_OPT(bool, prof_accum, prof);
+	TEST_MALLCTL_OPT(bool, prof_pid_namespace, prof);
 	TEST_MALLCTL_OPT(ssize_t, lg_prof_interval, prof);
 	TEST_MALLCTL_OPT(bool, prof_gdump, prof);
 	TEST_MALLCTL_OPT(bool, prof_final, prof);

From 4b555c11a54d31ba941d996011c7063b2083a12e Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Mon, 4 Dec 2023 12:07:54 -0800
Subject: [PATCH 2383/2608] Enable heap profiling on MacOS

---
 bin/jeprof.in           | 71 ++++++++++++++++++++++++++++++++++++++++-
 src/prof_sys.c          | 67 ++++++++++++++++++++++++++++++++++++++
 test/unit/prof_gdump.sh |  2 +-
 test/unit/prof_mdump.c  |  1 +
 4 files changed, 139 insertions(+), 2 deletions(-)

diff --git a/bin/jeprof.in b/bin/jeprof.in
index f6999ece..7aff8643 100644
--- a/bin/jeprof.in
+++ b/bin/jeprof.in
@@ -88,6 +88,7 @@ my %obj_tool_map = (
   #"nm_pdb" => "nm-pdb",       # for reading windows (PDB-format) executables
   #"addr2line_pdb" => "addr2line-pdb",                                # ditto
   #"otool" => "otool",         # equivalent of objdump on OS X
+  #"dyld_info" => "dyld_info",   # equivalent of otool on OS X for shared cache
 );
 # NOTE: these are lists, so you can put in commandline flags if you want.
 my @DOT = ("dot");          # leave non-absolute, since it may be in /usr/local
@@ -4661,7 +4662,65 @@ sub ParseTextSectionHeaderFromOtool {
   return $r;
 }
 
+# Parse text section header of a library in OS X shared cache using dyld_info
+sub ParseTextSectionHeaderFromDyldInfo {
+  my $lib = shift;
+
+  my $size = undef;
+  my $vma;
+  my $file_offset;
+  # Get dyld_info output from the library file to figure out how to
+  # map between mapped addresses and addresses in the library.
+  my $cmd = ShellEscape($obj_tool_map{"dyld_info"}, "-segments", $lib);
+  open(DYLD, "$cmd |") || error("$cmd: $!\n");
+
+  while (<DYLD>) {
+    s/\r//g;         # turn windows-looking lines into unix-looking lines
+    # -segments:
+    #    load-address    segment section        sect-size  seg-size perm
+    #     0x1803E0000    __TEXT                                   112KB r.x
+    #     0x1803E4F34             __text            80960
+    #     0x1803F8B74             __auth_stubs        768
+    #     0x1803F8E74             __init_offsets        4
+    #     0x1803F8E78             __gcc_except_tab   1180
+    my @x = split;
+    if ($#x >= 2) {
+      if ($x[0] eq 'load-offset') {
+        # dyld_info should only be used for the shared lib.
+        return undef;
+      } elsif ($x[1] eq '__TEXT') {
+        $file_offset = $x[0];
+      } elsif ($x[1] eq '__text') {
+        $size = $x[2];
+        $vma = $x[0];
+        $file_offset = AddressSub($x[0], $file_offset);
+        last;
+      }
+    }
+  }
+  close(DYLD);
+
+  if (!defined($vma) || !defined($size) || !defined($file_offset)) {
+     return undef;
+  }
+
+  my $r = {};
+  $r->{size} = $size;
+  $r->{vma} = $vma;
+  $r->{file_offset} = $file_offset;
+
+  return $r;
+}
+
 sub ParseTextSectionHeader {
+  # obj_tool_map("dyld_info") is only defined if we're in a Mach-O environment
+  if (defined($obj_tool_map{"dyld_info"})) {
+    my $r = ParseTextSectionHeaderFromDyldInfo(@_);
+    if (defined($r)){
+      return $r;
+    }
+  }
+  # if dyld_info doesn't work, or we don't have it, fall back to otool
   # obj_tool_map("otool") is only defined if we're in a Mach-O environment
   if (defined($obj_tool_map{"otool"})) {
     my $r = ParseTextSectionHeaderFromOtool(@_);
@@ -4702,7 +4761,7 @@ sub ParseLibraries {
       $offset = HexExtend($3);
       $lib = $4;
       $lib =~ s|\\|/|g;     # turn windows-style paths into unix-style paths
-    } elsif ($l =~ /^\s*($h)-($h):\s*(\S+\.so(\.\d+)*)/) {
+    } elsif ($l =~ /^\s*($h)-($h):\s*(\S+\.(so|dll|dylib|bundle)(\.\d+)*)/) {
       # Cooked line from DumpAddressMap.  Example:
       #   40000000-40015000: /lib/ld-2.3.2.so
       $start = HexExtend($1);
@@ -4719,6 +4778,15 @@ sub ParseLibraries {
       $offset = HexExtend($3);
       $lib = $4;
       $lib =~ s|\\|/|g;     # turn windows-style paths into unix-style paths
+    } elsif (($l =~ /^\s*($h)-($h):\s*(\S+)/) && ($3 eq $prog)) {
+      # PIEs and address space randomization do not play well with our
+      # default assumption that main executable is at lowest
+      # addresses. So we're detecting main executable from
+      # DumpAddressMap as well.
+      $start = HexExtend($1);
+      $finish = HexExtend($2);
+      $offset = $zero_offset;
+      $lib = $3;
     }
     # FreeBSD 10.0 virtual memory map /proc/curproc/map as defined in
     # function procfs_doprocmap (sys/fs/procfs/procfs_map.c)
@@ -5249,6 +5317,7 @@ sub ConfigureObjTools {
   if ($file_type =~ /Mach-O/) {
     # OS X uses otool to examine Mach-O files, rather than objdump.
     $obj_tool_map{"otool"} = "otool";
+    $obj_tool_map{"dyld_info"} = "dyld_info";
     $obj_tool_map{"addr2line"} = "false";  # no addr2line
     $obj_tool_map{"objdump"} = "false";  # no objdump
   }
diff --git a/src/prof_sys.c b/src/prof_sys.c
index 1e22332c..8a904040 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -605,6 +605,72 @@ prof_dump_close(prof_dump_arg_t *arg) {
 	}
 }
 
+#ifdef __APPLE__
+#include <mach-o/dyld.h>
+
+#ifdef __LP64__
+typedef struct mach_header_64 mach_header_t;
+typedef struct segment_command_64 segment_command_t;
+#define MH_MAGIC_VALUE MH_MAGIC_64
+#define MH_CIGAM_VALUE MH_CIGAM_64
+#define LC_SEGMENT_VALUE LC_SEGMENT_64
+#else
+typedef struct mach_header mach_header_t;
+typedef struct segment_command segment_command_t;
+#define MH_MAGIC_VALUE MH_MAGIC
+#define MH_CIGAM_VALUE MH_CIGAM
+#define LC_SEGMENT_VALUE LC_SEGMENT
+#endif
+
+static void
+prof_dump_dyld_image_vmaddr(buf_writer_t *buf_writer, uint32_t image_index) {
+	const mach_header_t *header = (const mach_header_t *)
+	    _dyld_get_image_header(image_index);
+	if (header == NULL || (header->magic != MH_MAGIC_VALUE &&
+	    header->magic != MH_CIGAM_VALUE)) {
+		// Invalid header
+		return;
+	}
+
+	intptr_t slide = _dyld_get_image_vmaddr_slide(image_index);
+	const char *name = _dyld_get_image_name(image_index);
+	struct load_command *load_cmd = (struct load_command *)
+	    ((char *)header + sizeof(mach_header_t));
+	for (uint32_t i = 0; load_cmd && (i < header->ncmds); i++) {
+		if (load_cmd->cmd == LC_SEGMENT_VALUE) {
+			const segment_command_t *segment_cmd =
+			    (const segment_command_t *)load_cmd;
+			if (!strcmp(segment_cmd->segname, "__TEXT")) {
+				char buffer[PATH_MAX + 1];
+				malloc_snprintf(buffer, sizeof(buffer),
+				    "%016llx-%016llx: %s\n", segment_cmd->vmaddr + slide,
+				    segment_cmd->vmaddr + slide + segment_cmd->vmsize, name);
+				buf_writer_cb(buf_writer, buffer);
+				return;
+			}
+		}
+		load_cmd =
+		    (struct load_command *)((char *)load_cmd + load_cmd->cmdsize);
+	}
+}
+
+static void
+prof_dump_dyld_maps(buf_writer_t *buf_writer) {
+	uint32_t image_count = _dyld_image_count();
+	for (uint32_t i = 0; i < image_count; i++) {
+		prof_dump_dyld_image_vmaddr(buf_writer, i);
+	}
+}
+
+prof_dump_open_maps_t *JET_MUTABLE prof_dump_open_maps = NULL;
+
+static void
+prof_dump_maps(buf_writer_t *buf_writer) {
+	buf_writer_cb(buf_writer, "\nMAPPED_LIBRARIES:\n");
+	/* No proc map file to read on MacOS, dump dyld maps for backtrace. */
+	prof_dump_dyld_maps(buf_writer);
+}
+#else /* !__APPLE__ */
 #ifndef _WIN32
 JEMALLOC_FORMAT_PRINTF(1, 2)
 static int
@@ -670,6 +736,7 @@ prof_dump_maps(buf_writer_t *buf_writer) {
 	buf_writer_pipe(buf_writer, prof_dump_read_maps_cb, &mfd);
 	close(mfd);
 }
+#endif /* __APPLE__ */
 
 static bool
 prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
diff --git a/test/unit/prof_gdump.sh b/test/unit/prof_gdump.sh
index 3f600d20..a0b91dff 100644
--- a/test/unit/prof_gdump.sh
+++ b/test/unit/prof_gdump.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 
 if [ "x${enable_prof}" = "x1" ] ; then
-  export MALLOC_CONF="prof:true,prof_active:false,prof_gdump:true"
+  export MALLOC_CONF="prof:true,prof_active:false,prof_gdump:true,lg_prof_sample:0"
 fi
 
diff --git a/test/unit/prof_mdump.c b/test/unit/prof_mdump.c
index bcbb961a..0559339e 100644
--- a/test/unit/prof_mdump.c
+++ b/test/unit/prof_mdump.c
@@ -166,6 +166,7 @@ expect_maps_write_failure(int count) {
 TEST_BEGIN(test_mdump_maps_error) {
 	test_skip_if(!config_prof);
 	test_skip_if(!config_debug);
+	test_skip_if(prof_dump_open_maps == NULL);
 
 	prof_dump_open_file_t *open_file_orig = prof_dump_open_file;
 	prof_dump_write_file_t *write_file_orig = prof_dump_write_file;

From 630434bb0ac619f7beec927569782d924c459385 Mon Sep 17 00:00:00 2001
From: "debing.sun" <debing.sun@redis.com>
Date: Tue, 30 Jan 2024 01:38:08 +0800
Subject: [PATCH 2384/2608] Fixed type error with allocated that caused
 incorrect printing on 32bit

---
 src/stats.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/stats.c b/src/stats.c
index 428e8ffb..026a4f54 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -324,7 +324,7 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 
 	COL_HDR(row, size, NULL, right, 20, size)
 	COL_HDR(row, ind, NULL, right, 4, unsigned)
-	COL_HDR(row, allocated, NULL, right, 13, uint64)
+	COL_HDR(row, allocated, NULL, right, 13, size)
 	COL_HDR(row, nmalloc, NULL, right, 13, uint64)
 	COL_HDR(row, nmalloc_ps, "(#/sec)", right, 8, uint64)
 	COL_HDR(row, ndalloc, NULL, right, 13, uint64)

From fa451de17fff73cc03c31ec8cd817d62927d1ff9 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 12 Apr 2024 11:08:39 -0700
Subject: [PATCH 2385/2608] Fix the tcache flush sanity checking around ncached
 and nstashed.

When there were many items stashed, it's possible that after flushing stashed,
ncached is already lower than the remain, in which case the flush can simply
return at that point.
---
 src/tcache.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index ca0b1acb..f9235541 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -524,11 +524,25 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 JEMALLOC_ALWAYS_INLINE void
 tcache_bin_flush_bottom(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
     szind_t binind, unsigned rem, bool small) {
+	assert(rem <= cache_bin_ncached_max_get(cache_bin));
 	assert(!tcache_bin_disabled(binind, cache_bin, tcache->tcache_slow));
+	cache_bin_sz_t orig_nstashed = cache_bin_nstashed_get_local(cache_bin);
 	tcache_bin_flush_stashed(tsd, tcache, cache_bin, binind, small);
 
 	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin);
-	assert((cache_bin_sz_t)rem <= ncached);
+	assert((cache_bin_sz_t)rem <= ncached + orig_nstashed);
+	if ((cache_bin_sz_t)rem > ncached) {
+		/*
+		 * The flush_stashed above could have done enough flushing, if
+		 * there were many items stashed.  Validate that: 1) non zero
+		 * stashed, and 2) bin stack has available space now.
+		 */
+		assert(orig_nstashed > 0);
+		assert(ncached + cache_bin_nstashed_get_local(cache_bin)
+		    < cache_bin_ncached_max_get(cache_bin));
+		/* Still go through the flush logic for stats purpose only. */
+		rem = ncached;
+	}
 	cache_bin_sz_t nflush = ncached - (cache_bin_sz_t)rem;
 
 	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nflush);
@@ -537,8 +551,7 @@ tcache_bin_flush_bottom(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	tcache_bin_flush_impl(tsd, tcache, cache_bin, binind, &ptrs, nflush,
 	    small);
 
-	cache_bin_finish_flush(cache_bin, &ptrs,
-	    ncached - (cache_bin_sz_t)rem);
+	cache_bin_finish_flush(cache_bin, &ptrs, nflush);
 }
 
 void

From 47d69b4eabae199fa8b5d948f0043effccfbc31e Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Thu, 4 Apr 2024 10:02:08 -0700
Subject: [PATCH 2386/2608] HPA: Fix infinite purging loop

One of the condition to start purging is `hpa_hugify_blocked_by_ndirty`
function call returns true. This can happen in cases where we have no
dirty memory for this shard at all. In this case purging loop will be an
infinite loop.

`hpa_hugify_blocked_by_ndirty` was introduced at 0f6c420, but at that
time purging loop has different form and additional `break` was not
required. Purging loop form was re-written at 6630c5989, but additional
exit condition wasn't added there at the time.

Repo code was shared by Patrik Dokoupil at [1], I stripped it down to
minimum to reproduce issue in jemalloc unit tests.

[1]: https://github.com/jemalloc/jemalloc/pull/2533
---
 src/hpa.c       | 11 +++++++++--
 test/unit/hpa.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 57 insertions(+), 4 deletions(-)

diff --git a/src/hpa.c b/src/hpa.c
index 99d1f033..6b1ae2ce 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -537,9 +537,16 @@ hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard,
 		purged = false;
 		while (hpa_should_purge(tsdn, shard) && nops < max_ops) {
 			purged = hpa_try_purge(tsdn, shard);
-			if (purged) {
-				nops++;
+			if (!purged) {
+				/*
+				 * It is fine if we couldn't purge as sometimes
+				 * we try to purge just to unblock
+				 * hugification, but there is maybe no dirty
+				 * pages at all at the moment.
+				 */
+				break;
 			}
+			nops++;
 		}
 		hugified = hpa_try_hugify(tsdn, shard);
 		if (hugified) {
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 9e3160b4..a8a26e13 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -24,7 +24,7 @@ struct test_data_s {
 static hpa_shard_opts_t test_hpa_shard_opts_default = {
 	/* slab_max_alloc */
 	ALLOC_MAX,
-	/* hugification threshold */
+	/* hugification_threshold */
 	HUGEPAGE,
 	/* dirty_mult */
 	FXP_INIT_PERCENT(25),
@@ -36,6 +36,21 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
 	5 * 1000,
 };
 
+static hpa_shard_opts_t test_hpa_shard_opts_purge = {
+	/* slab_max_alloc */
+	HUGEPAGE,
+	/* hugification_threshold */
+	0.9 * HUGEPAGE,
+	/* dirty_mult */
+	FXP_INIT_PERCENT(11),
+	/* deferral_allowed */
+	true,
+	/* hugify_delay_ms */
+	0,
+	/* min_purge_interval_ms */
+	5 * 1000,
+};
+
 static hpa_shard_t *
 create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
 	bool err;
@@ -452,6 +467,36 @@ TEST_BEGIN(test_defer_time) {
 }
 TEST_END
 
+TEST_BEGIN(test_purge_no_infinite_loop) {
+	test_skip_if(!hpa_supported());
+
+	hpa_shard_t *shard = create_test_data(&hpa_hooks_default,
+	    &test_hpa_shard_opts_purge);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+
+	/*
+	 * This is not arbitrary value, it is chosen to met hugification
+	 * criteria for huge page and at the same time do not allow hugify page
+	 * without triggering a purge.
+	 */
+	const size_t npages =
+	    test_hpa_shard_opts_purge.hugification_threshold / PAGE + 1;
+	const size_t size = npages * PAGE;
+
+	bool deferred_work_generated = false;
+	edata_t *edata = pai_alloc(tsdn, &shard->pai, size, PAGE,
+	     /* zero */ false, /* guarded */ false, /* frequent_reuse */ false,
+	     &deferred_work_generated);
+	expect_ptr_not_null(edata, "Unexpected alloc failure");
+
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	/* hpa_shard_do_deferred_work should not stuck in a purging loop */
+
+	destroy_test_data(shard);
+}
+TEST_END
+
 int
 main(void) {
 	/*
@@ -470,5 +515,6 @@ main(void) {
 	    test_alloc_max,
 	    test_stress,
 	    test_alloc_dalloc_batch,
-	    test_defer_time);
+	    test_defer_time,
+	    test_purge_no_infinite_loop);
 }

From 8d8379da443f46dc976252b968cb9ca8e63ec974 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 2 May 2024 12:53:42 -0700
Subject: [PATCH 2387/2608] Fix background_thread creation for the
 oversize_arena.

Bypassing background thread creation for the oversize_arena used to be an
optimization since that arena had eager purging.  However #2466 changed the
purging policy for the oversize_arena -- specifically it switched to the default
decay time when background_thread is enabled.

This issue is noticable when the number of arenas is low: whenever the total #
of arenas is <= 4 (which is the default max # of background threads), in which
case the purging will be stalled since no background thread is created for the
oversize_arena.
---
 include/jemalloc/internal/arena_externs.h |  1 -
 src/arena.c                               |  8 --------
 src/ctl.c                                 | 11 -----------
 src/jemalloc.c                            |  7 ++-----
 4 files changed, 2 insertions(+), 25 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index f91bd888..3d0329fc 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -103,7 +103,6 @@ void arena_nthreads_inc(arena_t *arena, bool internal);
 void arena_nthreads_dec(arena_t *arena, bool internal);
 arena_t *arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config);
 bool arena_init_huge(arena_t *a0);
-bool arena_is_huge(unsigned arena_ind);
 arena_t *arena_choose_huge(tsd_t *tsd);
 bin_t *arena_bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
     unsigned *binshard);
diff --git a/src/arena.c b/src/arena.c
index 8c87d67f..1e5289e4 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1867,14 +1867,6 @@ arena_init_huge(arena_t *a0) {
 	return huge_enabled;
 }
 
-bool
-arena_is_huge(unsigned arena_ind) {
-	if (huge_arena_ind == 0) {
-		return false;
-	}
-	return (arena_ind == huge_arena_ind);
-}
-
 bool
 arena_boot(sc_data_t *sc_data, base_t *base, bool hpa) {
 	arena_dirty_decay_ms_default_set(opt_dirty_decay_ms);
diff --git a/src/ctl.c b/src/ctl.c
index 37b69576..3b90aa15 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2985,17 +2985,6 @@ arena_i_decay_ms_ctl_impl(tsd_t *tsd, const size_t *mib, size_t miblen,
 			ret = EINVAL;
 			goto label_return;
 		}
-		if (arena_is_huge(arena_ind) && *(ssize_t *)newp > 0) {
-			/*
-			 * By default the huge arena purges eagerly.  If it is
-			 * set to non-zero decay time afterwards, background
-			 * thread might be needed.
-			 */
-			if (background_thread_create(tsd, arena_ind)) {
-				ret = EFAULT;
-				goto label_return;
-			}
-		}
 
 		if (arena_decay_ms_set(tsd_tsdn(tsd), arena, state,
 		    *(ssize_t *)newp)) {
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 390912ba..77407714 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -430,11 +430,8 @@ arena_new_create_background_thread(tsdn_t *tsdn, unsigned ind) {
 	if (ind == 0) {
 		return;
 	}
-	/*
-	 * Avoid creating a new background thread just for the huge arena, which
-	 * purges eagerly by default.
-	 */
-	if (have_background_thread && !arena_is_huge(ind)) {
+
+	if (have_background_thread) {
 		if (background_thread_create(tsdn_tsd(tsdn), ind)) {
 			malloc_printf("<jemalloc>: error in background thread "
 				      "creation for arena %u. Abort.\n", ind);

From 5afff2e44e8d31ef1e9eb01d6b1327fe111835ed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Amaury=20S=C3=A9chet?= <deadalnix@gmail.com>
Date: Fri, 3 May 2024 00:31:49 +0000
Subject: [PATCH 2388/2608] Simplify the logic in tcache_gc_small.

---
 src/tcache.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index f9235541..2114ff95 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -154,11 +154,10 @@ tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 		    sizeof(nflush_uint8));
 		tcache_slow->bin_flush_delay_items[szind] -= nflush_uint8;
 		return;
-	} else {
-		tcache_slow->bin_flush_delay_items[szind]
-		    = tcache_gc_item_delay_compute(szind);
 	}
 
+	tcache_slow->bin_flush_delay_items[szind]
+	    = tcache_gc_item_delay_compute(szind);
 	tcache_bin_flush_small(tsd, tcache, cache_bin, szind,
 	    (unsigned)(ncached - nflush));
 

From 86f4851f5d1242f4a17f78afeb4b974be5b2b1aa Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@meta.com>
Date: Tue, 21 May 2024 15:52:57 -0700
Subject: [PATCH 2389/2608] Add clang static analyzer suppression macro.

---
 include/jemalloc/internal/jemalloc_internal_macros.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h
index 40df5feb..407e868a 100644
--- a/include/jemalloc/internal/jemalloc_internal_macros.h
+++ b/include/jemalloc/internal/jemalloc_internal_macros.h
@@ -116,6 +116,18 @@
 #  define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
 #endif
 
+#ifdef __clang_analyzer__
+#  define JEMALLOC_CLANG_ANALYZER
+#endif
+
+#ifdef JEMALLOC_CLANG_ANALYZER
+#  define JEMALLOC_CLANG_ANALYZER_SUPPRESS __attribute__((suppress))
+#  define JEMALLOC_CLANG_ANALYZER_SILENCE_INIT(v) = v
+#else
+#  define JEMALLOC_CLANG_ANALYZER_SUPPRESS
+#  define JEMALLOC_CLANG_ANALYZER_SILENCE_INIT(v)
+#endif
+
 #define JEMALLOC_SUPPRESS_WARN_ON_USAGE(...) \
    JEMALLOC_DIAGNOSTIC_PUSH \
    JEMALLOC_DIAGNOSTIC_IGNORE_DEPRECATED \

From 70c94d7474c3c4f4b61303f042727d2dab66ad07 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@meta.com>
Date: Mon, 22 Jan 2024 15:34:58 -0800
Subject: [PATCH 2390/2608] Add batcher module.

This can be used to batch up simple operation commands for later use by another
thread.
---
 Makefile.in                                   |   2 +
 include/jemalloc/internal/batcher.h           |  44 ++++
 include/jemalloc/internal/witness.h           |   3 +-
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |   3 +-
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |   5 +-
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |   3 +-
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |   5 +-
 .../projects/vc2019/jemalloc/jemalloc.vcxproj |   3 +-
 .../vc2019/jemalloc/jemalloc.vcxproj.filters  |   5 +-
 .../projects/vc2022/jemalloc/jemalloc.vcxproj |   3 +-
 .../vc2022/jemalloc/jemalloc.vcxproj.filters  |   5 +-
 src/batcher.c                                 |  86 ++++++
 test/unit/batcher.c                           | 246 ++++++++++++++++++
 13 files changed, 404 insertions(+), 9 deletions(-)
 create mode 100644 include/jemalloc/internal/batcher.h
 create mode 100644 src/batcher.c
 create mode 100644 test/unit/batcher.c

diff --git a/Makefile.in b/Makefile.in
index df244adb..ef6e1764 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -98,6 +98,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/arena.c \
 	$(srcroot)src/background_thread.c \
 	$(srcroot)src/base.c \
+	$(srcroot)src/batcher.c \
 	$(srcroot)src/bin.c \
 	$(srcroot)src/bin_info.c \
 	$(srcroot)src/bitmap.c \
@@ -204,6 +205,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/background_thread_enable.c \
 	$(srcroot)test/unit/base.c \
 	$(srcroot)test/unit/batch_alloc.c \
+	$(srcroot)test/unit/batcher.c \
 	$(srcroot)test/unit/binshard.c \
 	$(srcroot)test/unit/bitmap.c \
 	$(srcroot)test/unit/bit_util.c \
diff --git a/include/jemalloc/internal/batcher.h b/include/jemalloc/internal/batcher.h
new file mode 100644
index 00000000..a435f0b7
--- /dev/null
+++ b/include/jemalloc/internal/batcher.h
@@ -0,0 +1,44 @@
+#ifndef JEMALLOC_INTERNAL_BATCHER_H
+#define JEMALLOC_INTERNAL_BATCHER_H
+
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/mutex.h"
+
+#define BATCHER_NO_IDX ((size_t)-1)
+
+typedef struct batcher_s batcher_t;
+struct batcher_s {
+	/*
+	 * Optimize for locality -- nelems_max and nelems are always touched
+	 * togehter, along with the front of the mutex. The end of the mutex is
+	 * only touched if there's contention.
+	 */
+	atomic_zu_t nelems;
+	size_t nelems_max;
+	malloc_mutex_t mtx;
+};
+
+void batcher_init(batcher_t *batcher, size_t nelems_max);
+
+/*
+ * Returns an index (into some user-owned array) to use for pushing, or
+ * BATCHER_NO_IDX if no index is free.  If the former, the caller must call
+ * batcher_push_end once done.
+ */
+size_t batcher_push_begin(tsdn_t *tsdn, batcher_t *batcher,
+    size_t elems_to_push);
+void batcher_push_end(tsdn_t *tsdn, batcher_t *batcher);
+
+/*
+ * Returns the number of items to pop, or BATCHER_NO_IDX if there are none.
+ * If the former, must be followed by a call to batcher_pop_end.
+ */
+size_t batcher_pop_begin(tsdn_t *tsdn, batcher_t *batcher);
+void batcher_pop_end(tsdn_t *tsdn, batcher_t *batcher);
+
+void batcher_prefork(tsdn_t *tsdn, batcher_t *batcher);
+void batcher_postfork_parent(tsdn_t *tsdn, batcher_t *batcher);
+void batcher_postfork_child(tsdn_t *tsdn, batcher_t *batcher);
+
+#endif /* JEMALLOC_INTERNAL_BATCHER_H */
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 937ca2d5..afee1246 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -64,9 +64,10 @@ enum witness_rank_e {
 	WITNESS_RANK_BASE,
 	WITNESS_RANK_ARENA_LARGE,
 	WITNESS_RANK_HOOK,
+	WITNESS_RANK_BIN,
 
 	WITNESS_RANK_LEAF=0x1000,
-	WITNESS_RANK_BIN = WITNESS_RANK_LEAF,
+	WITNESS_RANK_BATCHER=WITNESS_RANK_LEAF,
 	WITNESS_RANK_ARENA_STATS = WITNESS_RANK_LEAF,
 	WITNESS_RANK_COUNTER_ACCUM = WITNESS_RANK_LEAF,
 	WITNESS_RANK_DSS = WITNESS_RANK_LEAF,
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 03c241ca..58bd7b3e 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -38,6 +38,7 @@
     <ClCompile Include="..\..\..\..\src\arena.c" />
     <ClCompile Include="..\..\..\..\src\background_thread.c" />
     <ClCompile Include="..\..\..\..\src\base.c" />
+    <ClCompile Include="..\..\..\..\src\batcher.c" />
     <ClCompile Include="..\..\..\..\src\bin.c" />
     <ClCompile Include="..\..\..\..\src\bin_info.c" />
     <ClCompile Include="..\..\..\..\src\bitmap.c" />
@@ -378,4 +379,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 514368aa..82ad3e35 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -16,6 +16,9 @@
     <ClCompile Include="..\..\..\..\src\base.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\batcher.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\bin.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -197,4 +200,4 @@
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 5d23d8e2..6e59c035 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -38,6 +38,7 @@
     <ClCompile Include="..\..\..\..\src\arena.c" />
     <ClCompile Include="..\..\..\..\src\background_thread.c" />
     <ClCompile Include="..\..\..\..\src\base.c" />
+    <ClCompile Include="..\..\..\..\src\batcher.c" />
     <ClCompile Include="..\..\..\..\src\bin.c" />
     <ClCompile Include="..\..\..\..\src\bin_info.c" />
     <ClCompile Include="..\..\..\..\src\bitmap.c" />
@@ -377,4 +378,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 514368aa..82ad3e35 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -16,6 +16,9 @@
     <ClCompile Include="..\..\..\..\src\base.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\batcher.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\bin.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -197,4 +200,4 @@
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
index 8eaab36b..db06fc6d 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
@@ -38,6 +38,7 @@
     <ClCompile Include="..\..\..\..\src\arena.c" />
     <ClCompile Include="..\..\..\..\src\background_thread.c" />
     <ClCompile Include="..\..\..\..\src\base.c" />
+    <ClCompile Include="..\..\..\..\src\batcher.c" />
     <ClCompile Include="..\..\..\..\src\bin.c" />
     <ClCompile Include="..\..\..\..\src\bin_info.c" />
     <ClCompile Include="..\..\..\..\src\bitmap.c" />
@@ -377,4 +378,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
index 514368aa..82ad3e35 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
@@ -16,6 +16,9 @@
     <ClCompile Include="..\..\..\..\src\base.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\batcher.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\bin.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -197,4 +200,4 @@
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
index cd871379..01de0dcb 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
@@ -38,6 +38,7 @@
     <ClCompile Include="..\..\..\..\src\arena.c" />
     <ClCompile Include="..\..\..\..\src\background_thread.c" />
     <ClCompile Include="..\..\..\..\src\base.c" />
+    <ClCompile Include="..\..\..\..\src\batcher.c" />
     <ClCompile Include="..\..\..\..\src\bin.c" />
     <ClCompile Include="..\..\..\..\src\bin_info.c" />
     <ClCompile Include="..\..\..\..\src\bitmap.c" />
@@ -377,4 +378,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
index 514368aa..82ad3e35 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
@@ -16,6 +16,9 @@
     <ClCompile Include="..\..\..\..\src\base.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\batcher.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\bin.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -197,4 +200,4 @@
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/src/batcher.c b/src/batcher.c
new file mode 100644
index 00000000..19af7d83
--- /dev/null
+++ b/src/batcher.c
@@ -0,0 +1,86 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+
+#include "jemalloc/internal/batcher.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/atomic.h"
+
+void
+batcher_init(batcher_t *batcher, size_t nelems_max) {
+	atomic_store_zu(&batcher->nelems, 0, ATOMIC_RELAXED);
+	batcher->nelems_max = nelems_max;
+	malloc_mutex_init(&batcher->mtx, "batcher", WITNESS_RANK_BATCHER,
+	    malloc_mutex_rank_exclusive);
+}
+
+/*
+ * Returns an index (into some user-owned array) to use for pushing, or
+ * BATCHER_NO_IDX if no index is free.
+ */
+size_t batcher_push_begin(tsdn_t *tsdn, batcher_t *batcher,
+    size_t elems_to_push) {
+	assert(elems_to_push > 0);
+	size_t nelems_guess = atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED);
+	if (nelems_guess + elems_to_push > batcher->nelems_max) {
+		return BATCHER_NO_IDX;
+	}
+	malloc_mutex_lock(tsdn, &batcher->mtx);
+	size_t nelems = atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED);
+	if (nelems + elems_to_push > batcher->nelems_max) {
+		malloc_mutex_unlock(tsdn, &batcher->mtx);
+		return BATCHER_NO_IDX;
+	}
+	assert(elems_to_push <= batcher->nelems_max - nelems);
+	/*
+	 * We update nelems at push time (instead of during pop) so that other
+	 * racing accesses of the batcher can fail fast instead of trying to
+	 * acquire a mutex only to discover that there's no space for them.
+	 */
+	atomic_store_zu(&batcher->nelems, nelems + elems_to_push, ATOMIC_RELAXED);
+	return nelems;
+}
+
+void
+batcher_push_end(tsdn_t *tsdn, batcher_t *batcher) {
+	malloc_mutex_assert_owner(tsdn, &batcher->mtx);
+	assert(atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED) > 0);
+	malloc_mutex_unlock(tsdn, &batcher->mtx);
+}
+
+size_t
+batcher_pop_begin(tsdn_t *tsdn, batcher_t *batcher) {
+	size_t nelems_guess = atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED);
+	assert(nelems_guess <= batcher->nelems_max);
+	if (nelems_guess == 0) {
+		return BATCHER_NO_IDX;
+	}
+	malloc_mutex_lock(tsdn, &batcher->mtx);
+	size_t nelems = atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED);
+	assert(nelems <= batcher->nelems_max);
+	if (nelems == 0) {
+		malloc_mutex_unlock(tsdn, &batcher->mtx);
+		return BATCHER_NO_IDX;
+	}
+	atomic_store_zu(&batcher->nelems, 0, ATOMIC_RELAXED);
+	return nelems;
+}
+
+void batcher_pop_end(tsdn_t *tsdn, batcher_t *batcher) {
+	assert(atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED) == 0);
+	malloc_mutex_unlock(tsdn, &batcher->mtx);
+}
+
+void
+batcher_prefork(tsdn_t *tsdn, batcher_t *batcher) {
+	malloc_mutex_prefork(tsdn, &batcher->mtx);
+}
+
+void
+batcher_postfork_parent(tsdn_t *tsdn, batcher_t *batcher) {
+	malloc_mutex_postfork_parent(tsdn, &batcher->mtx);
+}
+
+void
+batcher_postfork_child(tsdn_t *tsdn, batcher_t *batcher) {
+	malloc_mutex_postfork_child(tsdn, &batcher->mtx);
+}
diff --git a/test/unit/batcher.c b/test/unit/batcher.c
new file mode 100644
index 00000000..df9d3e5b
--- /dev/null
+++ b/test/unit/batcher.c
@@ -0,0 +1,246 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/batcher.h"
+
+TEST_BEGIN(test_simple) {
+	enum { NELEMS_MAX = 10, DATA_BASE_VAL = 100, NRUNS = 5 };
+	batcher_t batcher;
+	size_t data[NELEMS_MAX];
+	for (size_t nelems = 0; nelems < NELEMS_MAX; nelems++) {
+		batcher_init(&batcher, nelems);
+		for (int run = 0; run < NRUNS; run++) {
+			for (int i = 0; i < NELEMS_MAX; i++) {
+				data[i] = (size_t)-1;
+			}
+			for (size_t i = 0; i < nelems; i++) {
+				size_t idx = batcher_push_begin(TSDN_NULL,
+				    &batcher, 1);
+				assert_zu_eq(i, idx, "Wrong index");
+				assert_zu_eq((size_t)-1, data[idx],
+				    "Expected uninitialized slot");
+				data[idx] = DATA_BASE_VAL + i;
+				batcher_push_end(TSDN_NULL, &batcher);
+			}
+			if (nelems > 0) {
+				size_t idx = batcher_push_begin(TSDN_NULL,
+				    &batcher, 1);
+				assert_zu_eq(BATCHER_NO_IDX, idx,
+				    "Shouldn't be able to push into a full "
+				    "batcher");
+			}
+
+			size_t npop = batcher_pop_begin(TSDN_NULL, &batcher);
+			if (nelems == 0) {
+				assert_zu_eq(npop, BATCHER_NO_IDX,
+				    "Shouldn't get any items out of an empty "
+				    "batcher");
+			} else {
+				assert_zu_eq(npop, nelems,
+				    "Wrong number of elements popped");
+			}
+			for (size_t i = 0; i < nelems; i++) {
+				assert_zu_eq(data[i], DATA_BASE_VAL + i,
+				    "Item popped out of order!");
+			}
+			if (nelems != 0) {
+				batcher_pop_end(TSDN_NULL, &batcher);
+			}
+		}
+	}
+}
+TEST_END
+
+TEST_BEGIN(test_multi_push) {
+	size_t idx, nelems;
+	batcher_t batcher;
+	batcher_init(&batcher, 11);
+	/* Push two at a time, 5 times, for 10 total. */
+	for (int i = 0; i < 5; i++) {
+		idx = batcher_push_begin(TSDN_NULL, &batcher, 2);
+		assert_zu_eq(2 * i, idx, "Should push in order");
+		batcher_push_end(TSDN_NULL, &batcher);
+	}
+	/* Pushing two more should fail -- would put us at 12 elems. */
+	idx = batcher_push_begin(TSDN_NULL, &batcher, 2);
+	assert_zu_eq(BATCHER_NO_IDX, idx, "Should be out of space");
+	/* But one more should work */
+	idx = batcher_push_begin(TSDN_NULL, &batcher, 1);
+	assert_zu_eq(10, idx, "Should be out of space");
+	batcher_push_end(TSDN_NULL, &batcher);
+	nelems = batcher_pop_begin(TSDN_NULL, &batcher);
+	batcher_pop_end(TSDN_NULL, &batcher);
+	assert_zu_eq(11, nelems, "Should have popped everything");
+}
+TEST_END
+
+enum {
+	STRESS_TEST_ELEMS = 10,
+	STRESS_TEST_THREADS = 4,
+	STRESS_TEST_OPS = 1000 * 1000,
+	STRESS_TEST_PUSH_TO_POP_RATIO = 5,
+};
+
+typedef struct stress_test_data_s stress_test_data_t;
+struct stress_test_data_s {
+	batcher_t batcher;
+	mtx_t pop_mtx;
+	atomic_u32_t thread_id;
+
+	uint32_t elems_data[STRESS_TEST_ELEMS];
+	size_t push_count[STRESS_TEST_ELEMS];
+	size_t pop_count[STRESS_TEST_ELEMS];
+	atomic_zu_t atomic_push_count[STRESS_TEST_ELEMS];
+	atomic_zu_t atomic_pop_count[STRESS_TEST_ELEMS];
+};
+
+/*
+ * Note: 0-indexed. If one element is set and you want to find it, you call
+ * get_nth_set(elems, 0).
+ */
+static size_t
+get_nth_set(bool elems_owned[STRESS_TEST_ELEMS], size_t n) {
+	size_t ntrue = 0;
+	for (size_t i = 0; i < STRESS_TEST_ELEMS; i++) {
+		if (elems_owned[i]) {
+			ntrue++;
+		}
+		if (ntrue > n) {
+			return i;
+		}
+	}
+	assert_not_reached("Asked for the %zu'th set element when < %zu are "
+	    "set",
+	    n, n);
+	/* Just to silence a compiler warning. */
+	return 0;
+}
+
+static void *
+stress_test_thd(void *arg) {
+	stress_test_data_t *data = arg;
+	size_t prng = atomic_fetch_add_u32(&data->thread_id, 1,
+	    ATOMIC_RELAXED);
+
+	size_t nelems_owned = 0;
+	bool elems_owned[STRESS_TEST_ELEMS] = {0};
+	size_t local_push_count[STRESS_TEST_ELEMS] = {0};
+	size_t local_pop_count[STRESS_TEST_ELEMS] = {0};
+
+	for (int i = 0; i < STRESS_TEST_OPS; i++) {
+		size_t rnd = prng_range_zu(&prng,
+		    STRESS_TEST_PUSH_TO_POP_RATIO);
+		if (rnd == 0 || nelems_owned == 0) {
+			size_t nelems = batcher_pop_begin(TSDN_NULL,
+			    &data->batcher);
+			if (nelems == BATCHER_NO_IDX) {
+				continue;
+			}
+			for (size_t i = 0; i < nelems; i++) {
+				uint32_t elem = data->elems_data[i];
+				assert_false(elems_owned[elem],
+				    "Shouldn't already own what we just "
+				    "popped");
+				elems_owned[elem] = true;
+				nelems_owned++;
+				local_pop_count[elem]++;
+				data->pop_count[elem]++;
+			}
+			batcher_pop_end(TSDN_NULL, &data->batcher);
+		} else {
+			size_t elem_to_push_idx = prng_range_zu(&prng,
+			    nelems_owned);
+			size_t elem = get_nth_set(elems_owned,
+			    elem_to_push_idx);
+			assert_true(
+			    elems_owned[elem],
+			    "Should own element we're about to pop");
+			elems_owned[elem] = false;
+			local_push_count[elem]++;
+			data->push_count[elem]++;
+			nelems_owned--;
+			size_t idx = batcher_push_begin(TSDN_NULL,
+			    &data->batcher, 1);
+			assert_zu_ne(idx, BATCHER_NO_IDX,
+			    "Batcher can't be full -- we have one of its "
+			    "elems!");
+			data->elems_data[idx] = (uint32_t)elem;
+			batcher_push_end(TSDN_NULL, &data->batcher);
+		}
+	}
+
+	/* Push all local elems back, flush local counts to the shared ones. */
+	size_t push_idx = 0;
+	if (nelems_owned != 0) {
+		push_idx = batcher_push_begin(TSDN_NULL, &data->batcher,
+		    nelems_owned);
+		assert_zu_ne(BATCHER_NO_IDX, push_idx,
+		    "Should be space to push");
+	}
+	for (size_t i = 0; i < STRESS_TEST_ELEMS; i++) {
+		if (elems_owned[i]) {
+			data->elems_data[push_idx] = (uint32_t)i;
+			push_idx++;
+			local_push_count[i]++;
+			data->push_count[i]++;
+		}
+		atomic_fetch_add_zu(
+		    &data->atomic_push_count[i], local_push_count[i],
+		    ATOMIC_RELAXED);
+		atomic_fetch_add_zu(
+		    &data->atomic_pop_count[i], local_pop_count[i],
+		    ATOMIC_RELAXED);
+	}
+	if (nelems_owned != 0) {
+		batcher_push_end(TSDN_NULL, &data->batcher);
+	}
+
+	return NULL;
+}
+
+TEST_BEGIN(test_stress) {
+	stress_test_data_t data;
+	batcher_init(&data.batcher, STRESS_TEST_ELEMS);
+	bool err = mtx_init(&data.pop_mtx);
+	assert_false(err, "mtx_init failure");
+	atomic_store_u32(&data.thread_id, 0, ATOMIC_RELAXED);
+	for (int i = 0; i < STRESS_TEST_ELEMS; i++) {
+		data.push_count[i] = 0;
+		data.pop_count[i] = 0;
+		atomic_store_zu(&data.atomic_push_count[i], 0, ATOMIC_RELAXED);
+		atomic_store_zu(&data.atomic_pop_count[i], 0, ATOMIC_RELAXED);
+
+		size_t idx = batcher_push_begin(TSDN_NULL, &data.batcher, 1);
+		assert_zu_eq(i, idx, "Should push in order");
+		data.elems_data[idx] = i;
+		batcher_push_end(TSDN_NULL, &data.batcher);
+	}
+
+	thd_t threads[STRESS_TEST_THREADS];
+	for (int i = 0; i < STRESS_TEST_THREADS; i++) {
+		thd_create(&threads[i], stress_test_thd, &data);
+	}
+	for (int i = 0; i < STRESS_TEST_THREADS; i++) {
+		thd_join(threads[i], NULL);
+	}
+	for (int i = 0; i < STRESS_TEST_ELEMS; i++) {
+		assert_zu_ne(0, data.push_count[i],
+		    "Should have done something!");
+		assert_zu_eq(data.push_count[i], data.pop_count[i],
+		    "every element should be pushed and popped an equal number "
+		    "of times");
+		assert_zu_eq(data.push_count[i],
+		    atomic_load_zu(&data.atomic_push_count[i], ATOMIC_RELAXED),
+		    "atomic and non-atomic count should be equal given proper "
+		    "synchronization");
+		assert_zu_eq(data.pop_count[i],
+		    atomic_load_zu(&data.atomic_pop_count[i], ATOMIC_RELAXED),
+		    "atomic and non-atomic count should be equal given proper "
+		    "synchronization");
+	}
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(test_simple, test_multi_push, test_stress);
+}

From c085530c711fb233203963cd93dfa9339b0b9980 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@meta.com>
Date: Fri, 2 Feb 2024 13:20:14 -0800
Subject: [PATCH 2391/2608] Tcache batching: Plumbing

In the next commit, we'll start using the batcher to eliminate mutex traffic.
To avoid cluttering up that commit with the random bits of busy-work it entails,
we'll centralize them here.  This commit introduces:
- A batched bin type.
- The ability to mix batched and unbatched bins in the arena.
- Conf parsing to set batches per size and a max batched size.
- mallctl access to the corresponding opt-namespace keys.
- Stats output of the above.
---
 include/jemalloc/internal/arena_inlines_b.h | 17 ++++++-
 include/jemalloc/internal/arena_structs.h   |  2 +-
 include/jemalloc/internal/bin.h             | 22 +++++++--
 include/jemalloc/internal/bin_info.h        |  9 ++++
 src/arena.c                                 | 54 ++++++++++-----------
 src/bin.c                                   | 18 +++++--
 src/bin_info.c                              | 13 +++++
 src/ctl.c                                   |  7 +++
 src/jemalloc.c                              | 10 ++++
 src/stats.c                                 |  2 +
 test/analyze/sizes.c                        |  2 +
 11 files changed, 121 insertions(+), 35 deletions(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index a891b35c..18a72e7c 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -604,10 +604,25 @@ arena_dalloc_bin_locked_finish(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 	}
 }
 
+static inline bool
+arena_bin_has_batch(szind_t binind) {
+	return binind < bin_info_nbatched_sizes;
+}
+
 static inline bin_t *
 arena_get_bin(arena_t *arena, szind_t binind, unsigned binshard) {
 	bin_t *shard0 = (bin_t *)((byte_t *)arena + arena_bin_offsets[binind]);
-	return shard0 + binshard;
+	bin_t *ret;
+	if (arena_bin_has_batch(binind)) {
+		ret = (bin_t *)((bin_with_batch_t *)shard0 + binshard);
+	} else {
+		ret = shard0 + binshard;
+	}
+	assert(binind >= SC_NBINS - 1
+	    || (uintptr_t)ret < (uintptr_t)arena
+	    + arena_bin_offsets[binind + 1]);
+
+	return ret;
 }
 
 #endif /* JEMALLOC_INTERNAL_ARENA_INLINES_B_H */
diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index 803ed25c..56e12f95 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -104,7 +104,7 @@ struct arena_s {
 	JEMALLOC_WARN_ON_USAGE("Do not use this field directly. "
 	                       "Use `arena_get_bin` instead.")
 	JEMALLOC_ALIGNED(CACHELINE)
-	bin_t			all_bins[0];
+	bin_with_batch_t			all_bins[0];
 };
 
 #endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_H */
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index ed27c18f..36fce04f 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -2,12 +2,15 @@
 #define JEMALLOC_INTERNAL_BIN_H
 
 #include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/batcher.h"
 #include "jemalloc/internal/bin_stats.h"
 #include "jemalloc/internal/bin_types.h"
 #include "jemalloc/internal/edata.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/sc.h"
 
+#define BIN_REMOTE_FREE_ELEMS_MAX 16
+
 /*
  * A bin contains a set of extents that are currently being used for slab
  * allocations.
@@ -42,6 +45,19 @@ struct bin_s {
 	edata_list_active_t	slabs_full;
 };
 
+typedef struct bin_remote_free_data_s bin_remote_free_data_t;
+struct bin_remote_free_data_s {
+	void *ptr;
+	edata_t *slab;
+};
+
+typedef struct bin_with_batch_s bin_with_batch_t;
+struct bin_with_batch_s {
+	bin_t bin;
+	batcher_t remote_frees;
+	bin_remote_free_data_t remote_free_data[BIN_REMOTE_FREE_ELEMS_MAX];
+};
+
 /* A set of sharded bins of the same size class. */
 typedef struct bins_s bins_t;
 struct bins_s {
@@ -57,9 +73,9 @@ bool bin_update_shard_size(unsigned bin_shards[SC_NBINS], size_t start_size,
 bool bin_init(bin_t *bin);
 
 /* Forking. */
-void bin_prefork(tsdn_t *tsdn, bin_t *bin);
-void bin_postfork_parent(tsdn_t *tsdn, bin_t *bin);
-void bin_postfork_child(tsdn_t *tsdn, bin_t *bin);
+void bin_prefork(tsdn_t *tsdn, bin_t *bin, bool has_batch);
+void bin_postfork_parent(tsdn_t *tsdn, bin_t *bin, bool has_batch);
+void bin_postfork_child(tsdn_t *tsdn, bin_t *bin, bool has_batch);
 
 /* Stats. */
 static inline void
diff --git a/include/jemalloc/internal/bin_info.h b/include/jemalloc/internal/bin_info.h
index b6175550..f743b7d8 100644
--- a/include/jemalloc/internal/bin_info.h
+++ b/include/jemalloc/internal/bin_info.h
@@ -44,6 +44,15 @@ struct bin_info_s {
 	bitmap_info_t		bitmap_info;
 };
 
+/* The maximum size a size class can be and still get batching behavior. */
+extern size_t opt_bin_info_max_batched_size;
+/* The number of batches per batched size class. */
+extern size_t opt_bin_info_remote_free_max_batch;
+
+extern szind_t bin_info_nbatched_sizes;
+extern unsigned bin_info_nbatched_bins;
+extern unsigned bin_info_nunbatched_bins;
+
 extern bin_info_t bin_infos[SC_NBINS];
 
 void bin_info_boot(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]);
diff --git a/src/arena.c b/src/arena.c
index 1e5289e4..71ef26f5 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -45,7 +45,6 @@ size_t opt_oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
 size_t oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
 
 uint32_t arena_bin_offsets[SC_NBINS];
-static unsigned nbins_total;
 
 static unsigned huge_arena_ind;
 
@@ -1672,7 +1671,6 @@ arena_t *
 arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 	arena_t *arena;
 	base_t *base;
-	unsigned i;
 
 	if (ind == 0) {
 		base = b0get();
@@ -1685,15 +1683,12 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 	}
 
 	size_t arena_size = ALIGNMENT_CEILING(sizeof(arena_t), CACHELINE) +
-	    sizeof(bin_t) * nbins_total;
+	    sizeof(bin_with_batch_t) * bin_info_nbatched_bins
+	    + sizeof(bin_t) * bin_info_nunbatched_bins;
 	arena = (arena_t *)base_alloc(tsdn, base, arena_size, CACHELINE);
 	if (arena == NULL) {
 		goto label_error;
 	}
-	JEMALLOC_SUPPRESS_WARN_ON_USAGE(
-	assert((uintptr_t)&arena->all_bins[nbins_total -1] + sizeof(bin_t) <=
-	    (uintptr_t)arena + arena_size);
-	)
 
 	atomic_store_u(&arena->nthreads[0], 0, ATOMIC_RELAXED);
 	atomic_store_u(&arena->nthreads[1], 0, ATOMIC_RELAXED);
@@ -1733,12 +1728,13 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 
 	/* Initialize bins. */
 	atomic_store_u(&arena->binshard_next, 0, ATOMIC_RELEASE);
-	for (i = 0; i < nbins_total; i++) {
-		JEMALLOC_SUPPRESS_WARN_ON_USAGE(
-		bool err = bin_init(&arena->all_bins[i]);
-		)
-		if (err) {
-			goto label_error;
+	for (unsigned i = 0; i < SC_NBINS; i++) {
+		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
+			bin_t *bin = arena_get_bin(arena, i, j);
+			bool err = bin_init(bin);
+			if (err) {
+				goto label_error;
+			}
 		}
 	}
 
@@ -1882,8 +1878,9 @@ arena_boot(sc_data_t *sc_data, base_t *base, bool hpa) {
 	)
 	for (szind_t i = 0; i < SC_NBINS; i++) {
 		arena_bin_offsets[i] = cur_offset;
-		nbins_total += bin_infos[i].n_shards;
-		cur_offset += (uint32_t)(bin_infos[i].n_shards * sizeof(bin_t));
+		uint32_t bin_sz = (i < bin_info_nbatched_sizes
+		    ? sizeof(bin_with_batch_t) : sizeof(bin_t));
+		cur_offset += (uint32_t)bin_infos[i].n_shards * bin_sz;
 	}
 	return pa_central_init(&arena_pa_central_global, base, hpa,
 	    &hpa_hooks_default);
@@ -1933,19 +1930,21 @@ arena_prefork7(tsdn_t *tsdn, arena_t *arena) {
 
 void
 arena_prefork8(tsdn_t *tsdn, arena_t *arena) {
-	for (unsigned i = 0; i < nbins_total; i++) {
-		JEMALLOC_SUPPRESS_WARN_ON_USAGE(
-		bin_prefork(tsdn, &arena->all_bins[i]);
-		)
+	for (szind_t i = 0; i < SC_NBINS; i++) {
+		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
+			bin_t *bin = arena_get_bin(arena, i, j);
+			bin_prefork(tsdn, bin, arena_bin_has_batch(i));
+		}
 	}
 }
 
 void
 arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
-	for (unsigned i = 0; i < nbins_total; i++) {
-		JEMALLOC_SUPPRESS_WARN_ON_USAGE(
-		bin_postfork_parent(tsdn, &arena->all_bins[i]);
-		)
+	for (szind_t i = 0; i < SC_NBINS; i++) {
+		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
+			bin_t *bin = arena_get_bin(arena, i, j);
+			bin_postfork_parent(tsdn, bin, arena_bin_has_batch(i));
+		}
 	}
 
 	malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
@@ -1982,10 +1981,11 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 		}
 	}
 
-	for (unsigned i = 0; i < nbins_total; i++) {
-		JEMALLOC_SUPPRESS_WARN_ON_USAGE(
-		bin_postfork_child(tsdn, &arena->all_bins[i]);
-		)
+	for (szind_t i = 0; i < SC_NBINS; i++) {
+		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
+			bin_t *bin = arena_get_bin(arena, i, j);
+			bin_postfork_child(tsdn, bin, arena_bin_has_batch(i));
+		}
 	}
 
 	malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
diff --git a/src/bin.c b/src/bin.c
index fa204587..b9b4be2c 100644
--- a/src/bin.c
+++ b/src/bin.c
@@ -54,16 +54,28 @@ bin_init(bin_t *bin) {
 }
 
 void
-bin_prefork(tsdn_t *tsdn, bin_t *bin) {
+bin_prefork(tsdn_t *tsdn, bin_t *bin, bool has_batch) {
 	malloc_mutex_prefork(tsdn, &bin->lock);
+	if (has_batch) {
+		bin_with_batch_t *batched = (bin_with_batch_t *)bin;
+		batcher_prefork(tsdn, &batched->remote_frees);
+	}
 }
 
 void
-bin_postfork_parent(tsdn_t *tsdn, bin_t *bin) {
+bin_postfork_parent(tsdn_t *tsdn, bin_t *bin, bool has_batch) {
 	malloc_mutex_postfork_parent(tsdn, &bin->lock);
+	if (has_batch) {
+		bin_with_batch_t *batched = (bin_with_batch_t *)bin;
+		batcher_postfork_parent(tsdn, &batched->remote_frees);
+	}
 }
 
 void
-bin_postfork_child(tsdn_t *tsdn, bin_t *bin) {
+bin_postfork_child(tsdn_t *tsdn, bin_t *bin, bool has_batch) {
 	malloc_mutex_postfork_child(tsdn, &bin->lock);
+	if (has_batch) {
+		bin_with_batch_t *batched = (bin_with_batch_t *)bin;
+		batcher_postfork_child(tsdn, &batched->remote_frees);
+	}
 }
diff --git a/src/bin_info.c b/src/bin_info.c
index 8629ef88..27f0be17 100644
--- a/src/bin_info.c
+++ b/src/bin_info.c
@@ -3,8 +3,15 @@
 
 #include "jemalloc/internal/bin_info.h"
 
+size_t opt_bin_info_max_batched_size;
+size_t opt_bin_info_remote_free_max_batch;
+
 bin_info_t bin_infos[SC_NBINS];
 
+szind_t bin_info_nbatched_sizes;
+unsigned bin_info_nbatched_bins;
+unsigned bin_info_nunbatched_bins;
+
 static void
 bin_infos_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
     bin_info_t infos[SC_NBINS]) {
@@ -20,6 +27,12 @@ bin_infos_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 		bitmap_info_t bitmap_info = BITMAP_INFO_INITIALIZER(
 		    bin_info->nregs);
 		bin_info->bitmap_info = bitmap_info;
+		if (bin_info->reg_size <= opt_bin_info_max_batched_size) {
+			bin_info_nbatched_sizes++;
+			bin_info_nbatched_bins += bin_info->n_shards;
+		} else {
+			bin_info_nunbatched_bins += bin_info->n_shards;
+		}
 	}
 }
 
diff --git a/src/ctl.c b/src/ctl.c
index 3b90aa15..1b76b792 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -129,6 +129,8 @@ CTL_PROTO(opt_zero)
 CTL_PROTO(opt_utrace)
 CTL_PROTO(opt_xmalloc)
 CTL_PROTO(opt_experimental_infallible_new)
+CTL_PROTO(opt_max_batched_size)
+CTL_PROTO(opt_remote_free_max_batch)
 CTL_PROTO(opt_tcache)
 CTL_PROTO(opt_tcache_max)
 CTL_PROTO(opt_tcache_nslots_small_min)
@@ -480,6 +482,8 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("xmalloc"),	CTL(opt_xmalloc)},
 	{NAME("experimental_infallible_new"),
 		CTL(opt_experimental_infallible_new)},
+	{NAME("max_batched_size"),	CTL(opt_max_batched_size)},
+	{NAME("remote_free_max_batch"),	CTL(opt_remote_free_max_batch)},
 	{NAME("tcache"),	CTL(opt_tcache)},
 	{NAME("tcache_max"),	CTL(opt_tcache_max)},
 	{NAME("tcache_nslots_small_min"),
@@ -2203,6 +2207,9 @@ CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool)
 CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool)
 CTL_RO_NL_CGEN(config_enable_cxx, opt_experimental_infallible_new,
     opt_experimental_infallible_new, bool)
+CTL_RO_NL_GEN(opt_max_batched_size, opt_bin_info_max_batched_size, size_t)
+CTL_RO_NL_GEN(opt_remote_free_max_batch, opt_bin_info_remote_free_max_batch,
+    size_t)
 CTL_RO_NL_GEN(opt_tcache, opt_tcache, bool)
 CTL_RO_NL_GEN(opt_tcache_max, opt_tcache_max, size_t)
 CTL_RO_NL_GEN(opt_tcache_nslots_small_min, opt_tcache_nslots_small_min,
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 77407714..8f40e0cc 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1325,6 +1325,16 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				} while (vlen_left > 0);
 				CONF_CONTINUE;
 			}
+			CONF_HANDLE_SIZE_T(opt_bin_info_max_batched_size,
+			    "max_batched_size", 0, SIZE_T_MAX,
+			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
+			    /* clip */ true)
+			CONF_HANDLE_SIZE_T(opt_bin_info_remote_free_max_batch,
+			    "remote_free_max_batch", 0,
+			    BIN_REMOTE_FREE_ELEMS_MAX,
+			    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
+			    /* clip */ true)
+
 			if (CONF_MATCH("tcache_ncached_max")) {
 				bool err = tcache_bin_info_default_init(
 				    v, vlen);
diff --git a/src/stats.c b/src/stats.c
index 026a4f54..359a252c 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1555,6 +1555,8 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_BOOL("utrace")
 	OPT_WRITE_BOOL("xmalloc")
 	OPT_WRITE_BOOL("experimental_infallible_new")
+	OPT_WRITE_SIZE_T("max_batched_size")
+	OPT_WRITE_SIZE_T("remote_free_max_batch")
 	OPT_WRITE_BOOL("tcache")
 	OPT_WRITE_SIZE_T("tcache_max")
 	OPT_WRITE_UNSIGNED("tcache_nslots_small_min")
diff --git a/test/analyze/sizes.c b/test/analyze/sizes.c
index a48c4f48..cfb5ce51 100644
--- a/test/analyze/sizes.c
+++ b/test/analyze/sizes.c
@@ -35,6 +35,8 @@ main(void) {
 	P(arena_t);
 	P(arena_stats_t);
 	P(base_t);
+	P(bin_t);
+	P(bin_with_batch_t);
 	P(decay_t);
 	P(edata_t);
 	P(ecache_t);

From 6e568488500b12441094e084f89b1a1da784f39b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@meta.com>
Date: Wed, 7 Feb 2024 10:21:53 -0800
Subject: [PATCH 2392/2608] Tcache: Split up small/large handling.

The main bits of shared code are the edata filtering and the stats flushing
logic, both of which are fairly simple to read and not so painful to duplicate.
The shared code comes at the cost of guarding all the subtle logic with
`if (small)`, which doesn't feel worth it.
---
 src/tcache.c | 256 ++++++++++++++++++++++++++++++---------------------
 1 file changed, 152 insertions(+), 104 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index 2114ff95..8bec5d6c 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -312,20 +312,9 @@ tcache_bin_flush_edatas_lookup(tsd_t *tsd, cache_bin_ptr_array_t *arr,
 	}
 }
 
-JEMALLOC_ALWAYS_INLINE bool
-tcache_bin_flush_match(edata_t *edata, unsigned cur_arena_ind,
-    unsigned cur_binshard, bool small) {
-	if (small) {
-		return edata_arena_ind_get(edata) == cur_arena_ind
-		    && edata_binshard_get(edata) == cur_binshard;
-	} else {
-		return edata_arena_ind_get(edata) == cur_arena_ind;
-	}
-}
-
 JEMALLOC_ALWAYS_INLINE void
-tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
-    szind_t binind, cache_bin_ptr_array_t *ptrs, unsigned nflush, bool small) {
+tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
+    szind_t binind, cache_bin_ptr_array_t *ptrs, unsigned nflush) {
 	tcache_slow_t *tcache_slow = tcache->tcache_slow;
 	/*
 	 * A couple lookup calls take tsdn; declare it once for convenience
@@ -333,11 +322,7 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	 */
 	tsdn_t *tsdn = tsd_tsdn(tsd);
 
-	if (small) {
-		assert(binind < SC_NBINS);
-	} else {
-		assert(binind < tcache_nbins_get(tcache_slow));
-	}
+	assert(binind < SC_NBINS);
 	arena_t *tcache_arena = tcache_slow->arena;
 	assert(tcache_arena != NULL);
 
@@ -368,34 +353,19 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 		unsigned cur_arena_ind = edata_arena_ind_get(edata);
 		arena_t *cur_arena = arena_get(tsdn, cur_arena_ind, false);
 
+		unsigned cur_binshard = edata_binshard_get(edata);
+		bin_t *cur_bin = arena_get_bin(cur_arena, binind,
+		    cur_binshard);
+		assert(cur_binshard < bin_infos[binind].n_shards);
 		/*
-		 * These assignments are always overwritten when small is true,
-		 * and their values are always ignored when small is false, but
-		 * to avoid the technical UB when we pass them as parameters, we
-		 * need to intialize them.
+		 * If you're looking at profiles, you might think this
+		 * is a good place to prefetch the bin stats, which are
+		 * often a cache miss.  This turns out not to be
+		 * helpful on the workloads we've looked at, with moving
+		 * the bin stats next to the lock seeming to do better.
 		 */
-		unsigned cur_binshard = 0;
-		bin_t *cur_bin = NULL;
-		if (small) {
-			cur_binshard = edata_binshard_get(edata);
-			cur_bin = arena_get_bin(cur_arena, binind,
-			    cur_binshard);
-			assert(cur_binshard < bin_infos[binind].n_shards);
-			/*
-			 * If you're looking at profiles, you might think this
-			 * is a good place to prefetch the bin stats, which are
-			 * often a cache miss.  This turns out not to be
-			 * helpful on the workloads we've looked at, with moving
-			 * the bin stats next to the lock seeming to do better.
-			 */
-		}
 
-		if (small) {
-			malloc_mutex_lock(tsdn, &cur_bin->lock);
-		}
-		if (!small && !arena_is_auto(cur_arena)) {
-			malloc_mutex_lock(tsdn, &cur_arena->large_mtx);
-		}
+		malloc_mutex_lock(tsdn, &cur_bin->lock);
 
 		/*
 		 * If we acquired the right lock and have some stats to flush,
@@ -404,53 +374,23 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 		if (config_stats && tcache_arena == cur_arena
 		    && !merged_stats) {
 			merged_stats = true;
-			if (small) {
-				cur_bin->stats.nflushes++;
-				cur_bin->stats.nrequests +=
-				    cache_bin->tstats.nrequests;
-				cache_bin->tstats.nrequests = 0;
-			} else {
-				arena_stats_large_flush_nrequests_add(tsdn,
-				    &tcache_arena->stats, binind,
-				    cache_bin->tstats.nrequests);
-				cache_bin->tstats.nrequests = 0;
-			}
-		}
-
-		/*
-		 * Large allocations need special prep done.  Afterwards, we can
-		 * drop the large lock.
-		 */
-		if (!small) {
-			for (unsigned i = 0; i < nflush; i++) {
-				void *ptr = ptrs->ptr[i];
-				edata = item_edata[i].edata;
-				assert(ptr != NULL && edata != NULL);
-
-				if (tcache_bin_flush_match(edata, cur_arena_ind,
-				    cur_binshard, small)) {
-					large_dalloc_prep_locked(tsdn,
-					    edata);
-				}
-			}
-		}
-		if (!small && !arena_is_auto(cur_arena)) {
-			malloc_mutex_unlock(tsdn, &cur_arena->large_mtx);
+			cur_bin->stats.nflushes++;
+			cur_bin->stats.nrequests +=
+			    cache_bin->tstats.nrequests;
+			cache_bin->tstats.nrequests = 0;
 		}
 
 		/* Deallocate whatever we can. */
 		unsigned ndeferred = 0;
 		/* Init only to avoid used-uninitialized warning. */
 		arena_dalloc_bin_locked_info_t dalloc_bin_info = {0};
-		if (small) {
-			arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind);
-		}
+		arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind);
 		for (unsigned i = 0; i < nflush; i++) {
 			void *ptr = ptrs->ptr[i];
 			edata = item_edata[i].edata;
 			assert(ptr != NULL && edata != NULL);
-			if (!tcache_bin_flush_match(edata, cur_arena_ind,
-			    cur_binshard, small)) {
+			if (edata_arena_ind_get(edata) != cur_arena_ind
+			    || edata_binshard_get(edata) != cur_binshard) {
 				/*
 				 * The object was allocated either via a
 				 * different arena, or a different bin in this
@@ -462,34 +402,23 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 				ndeferred++;
 				continue;
 			}
-			if (small) {
-				if (arena_dalloc_bin_locked_step(tsdn,
-				    cur_arena, cur_bin, &dalloc_bin_info,
-				    binind, edata, ptr)) {
-					dalloc_slabs[dalloc_count] = edata;
-					dalloc_count++;
-				}
-			} else {
-				if (large_dalloc_safety_checks(edata, ptr,
-				    binind)) {
-					/* See the comment in isfree. */
-					continue;
-				}
-				large_dalloc_finish(tsdn, edata);
+			if (arena_dalloc_bin_locked_step(tsdn,
+			    cur_arena, cur_bin, &dalloc_bin_info,
+			    binind, edata, ptr)) {
+				dalloc_slabs[dalloc_count] = edata;
+				dalloc_count++;
 			}
 		}
 
-		if (small) {
-			arena_dalloc_bin_locked_finish(tsdn, cur_arena, cur_bin,
-			    &dalloc_bin_info);
-			malloc_mutex_unlock(tsdn, &cur_bin->lock);
-		}
+		arena_dalloc_bin_locked_finish(tsdn, cur_arena, cur_bin,
+		    &dalloc_bin_info);
+		malloc_mutex_unlock(tsdn, &cur_bin->lock);
+
 		arena_decay_ticks(tsdn, cur_arena, nflush - ndeferred);
 		nflush = ndeferred;
 	}
 
 	/* Handle all deferred slab dalloc. */
-	assert(small || dalloc_count == 0);
 	for (unsigned i = 0; i < dalloc_count; i++) {
 		edata_t *slab = dalloc_slabs[i];
 		arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
@@ -497,7 +426,6 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	}
 
 	if (config_stats && !merged_stats) {
-		if (small) {
 			/*
 			 * The flush loop didn't happen to flush to this
 			 * thread's arena, so the stats didn't get merged.
@@ -510,14 +438,132 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 			bin->stats.nrequests += cache_bin->tstats.nrequests;
 			cache_bin->tstats.nrequests = 0;
 			malloc_mutex_unlock(tsdn, &bin->lock);
-		} else {
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tcache_bin_flush_impl_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
+    szind_t binind, cache_bin_ptr_array_t *ptrs, unsigned nflush) {
+	tcache_slow_t *tcache_slow = tcache->tcache_slow;
+	/*
+	 * A couple lookup calls take tsdn; declare it once for convenience
+	 * instead of calling tsd_tsdn(tsd) all the time.
+	 */
+	tsdn_t *tsdn = tsd_tsdn(tsd);
+
+	assert(binind < tcache_nbins_get(tcache_slow));
+	arena_t *tcache_arena = tcache_slow->arena;
+	assert(tcache_arena != NULL);
+
+	/*
+	 * Variable length array must have > 0 length; the last element is never
+	 * touched (it's just included to satisfy the no-zero-length rule).
+	 */
+	VARIABLE_ARRAY(emap_batch_lookup_result_t, item_edata, nflush + 1);
+	tcache_bin_flush_edatas_lookup(tsd, ptrs, binind, nflush, item_edata);
+
+	/*
+	 * We're about to grab a bunch of locks.  If one of them happens to be
+	 * the one guarding the arena-level stats counters we flush our
+	 * thread-local ones to, we do so under one critical section.
+	 */
+	bool merged_stats = false;
+	while (nflush > 0) {
+		/* Lock the arena, or bin, associated with the first object. */
+		edata_t *edata = item_edata[0].edata;
+		unsigned cur_arena_ind = edata_arena_ind_get(edata);
+		arena_t *cur_arena = arena_get(tsdn, cur_arena_ind, false);
+
+		if (!arena_is_auto(cur_arena)) {
+			malloc_mutex_lock(tsdn, &cur_arena->large_mtx);
+		}
+
+		/*
+		 * If we acquired the right lock and have some stats to flush,
+		 * flush them.
+		 */
+		if (config_stats && tcache_arena == cur_arena
+		    && !merged_stats) {
+			merged_stats = true;
 			arena_stats_large_flush_nrequests_add(tsdn,
 			    &tcache_arena->stats, binind,
 			    cache_bin->tstats.nrequests);
 			cache_bin->tstats.nrequests = 0;
 		}
+
+		/*
+		 * Large allocations need special prep done.  Afterwards, we can
+		 * drop the large lock.
+		 */
+		for (unsigned i = 0; i < nflush; i++) {
+			void *ptr = ptrs->ptr[i];
+			edata = item_edata[i].edata;
+			assert(ptr != NULL && edata != NULL);
+
+			if (edata_arena_ind_get(edata) == cur_arena_ind) {
+				large_dalloc_prep_locked(tsdn,
+				    edata);
+			}
+		}
+		if (!arena_is_auto(cur_arena)) {
+			malloc_mutex_unlock(tsdn, &cur_arena->large_mtx);
+		}
+
+		/* Deallocate whatever we can. */
+		unsigned ndeferred = 0;
+		for (unsigned i = 0; i < nflush; i++) {
+			void *ptr = ptrs->ptr[i];
+			edata = item_edata[i].edata;
+			assert(ptr != NULL && edata != NULL);
+			if (edata_arena_ind_get(edata) != cur_arena_ind) {
+				/*
+				 * The object was allocated either via a
+				 * different arena, or a different bin in this
+				 * arena.  Either way, stash the object so that
+				 * it can be handled in a future pass.
+				 */
+				ptrs->ptr[ndeferred] = ptr;
+				item_edata[ndeferred].edata = edata;
+				ndeferred++;
+				continue;
+			}
+			if (large_dalloc_safety_checks(edata, ptr, binind)) {
+				/* See the comment in isfree. */
+				continue;
+			}
+			large_dalloc_finish(tsdn, edata);
+		}
+		arena_decay_ticks(tsdn, cur_arena, nflush - ndeferred);
+		nflush = ndeferred;
 	}
 
+	if (config_stats && !merged_stats) {
+		arena_stats_large_flush_nrequests_add(tsdn,
+		    &tcache_arena->stats, binind,
+		    cache_bin->tstats.nrequests);
+		cache_bin->tstats.nrequests = 0;
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
+    szind_t binind, cache_bin_ptr_array_t *ptrs, unsigned nflush, bool small) {
+	/*
+	 * The small/large flush logic is very similar; you might conclude that
+	 * it's a good opportunity to share code.  We've tried this, and by and
+	 * large found this to obscure more than it helps; there are so many
+	 * fiddly bits around things like stats handling, precisely when and
+	 * which mutexes are acquired, etc., that almost all code ends up being
+	 * gated behind 'if (small) { ... } else { ... }'.  Even though the
+	 * '...' is morally equivalent, the code itself needs slight tweaks.
+	 */
+	if (small) {
+		tcache_bin_flush_impl_small(tsd, tcache, cache_bin, binind,
+		    ptrs, nflush);
+	} else {
+		tcache_bin_flush_impl_large(tsd, tcache, cache_bin, binind,
+		    ptrs, nflush);
+	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -556,13 +602,15 @@ tcache_bin_flush_bottom(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 void
 tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
     szind_t binind, unsigned rem) {
-	tcache_bin_flush_bottom(tsd, tcache, cache_bin, binind, rem, true);
+	tcache_bin_flush_bottom(tsd, tcache, cache_bin, binind, rem,
+	    /* small */ true);
 }
 
 void
 tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
     szind_t binind, unsigned rem) {
-	tcache_bin_flush_bottom(tsd, tcache, cache_bin, binind, rem, false);
+	tcache_bin_flush_bottom(tsd, tcache, cache_bin, binind, rem,
+	    /* small */ false);
 }
 
 /*

From 44d91cf2434796188486960a07771709c15b0c2b Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@meta.com>
Date: Wed, 7 Feb 2024 13:18:39 -0800
Subject: [PATCH 2393/2608] Tcache flush: Partition by bin before locking.

This accomplishes two things:
- It avoids a full array scan (and any attendant branch prediction misses, etc.)
  while holding the bin lock.
- It allows us to know the number of items that will be flushed before flushing
  them, which will (in an upcoming commit) let us know if it's safe to use the
  batched flush (in which case we won't acquire the bin mutex).
---
 src/tcache.c | 110 ++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 74 insertions(+), 36 deletions(-)

diff --git a/src/tcache.c b/src/tcache.c
index 8bec5d6c..4dd5ccd6 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -347,29 +347,85 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 	 * thread-local ones to, we do so under one critical section.
 	 */
 	bool merged_stats = false;
-	while (nflush > 0) {
-		/* Lock the arena, or bin, associated with the first object. */
-		edata_t *edata = item_edata[0].edata;
-		unsigned cur_arena_ind = edata_arena_ind_get(edata);
+
+	/*
+	 * We maintain the invariant that all edatas yet to be flushed are
+	 * contained in the half-open range [flush_start, flush_end).  We'll
+	 * repeatedly partition the array so that the unflushed items are at the
+	 * end.
+	 */
+	unsigned flush_start = 0;
+
+	while (flush_start < nflush) {
+		/*
+		 * After our partitioning step, all objects to flush will be in
+		 * the half-open range [prev_flush_start, flush_start), and
+		 * flush_start will be updated to correspond to the next loop
+		 * iteration.
+		 */
+		unsigned prev_flush_start = flush_start;
+
+		edata_t *cur_edata = item_edata[flush_start].edata;
+		unsigned cur_arena_ind = edata_arena_ind_get(cur_edata);
 		arena_t *cur_arena = arena_get(tsdn, cur_arena_ind, false);
 
-		unsigned cur_binshard = edata_binshard_get(edata);
+		unsigned cur_binshard = edata_binshard_get(cur_edata);
 		bin_t *cur_bin = arena_get_bin(cur_arena, binind,
 		    cur_binshard);
 		assert(cur_binshard < bin_infos[binind].n_shards);
+
 		/*
-		 * If you're looking at profiles, you might think this
-		 * is a good place to prefetch the bin stats, which are
-		 * often a cache miss.  This turns out not to be
-		 * helpful on the workloads we've looked at, with moving
-		 * the bin stats next to the lock seeming to do better.
+		 * Start off the partition; item_edata[i] always matches itself
+		 * of course.
 		 */
+		flush_start++;
+		for (unsigned i = flush_start; i < nflush; i++) {
+			void *ptr = ptrs->ptr[i];
+			edata_t *edata = item_edata[i].edata;
+			assert(ptr != NULL && edata != NULL);
+			assert((uintptr_t)ptr >= (uintptr_t)edata_addr_get(edata));
+			assert((uintptr_t)ptr < (uintptr_t)edata_past_get(edata));
+			if (edata_arena_ind_get(edata) == cur_arena_ind
+			    && edata_binshard_get(edata) == cur_binshard) {
+				/* Swap the edatas. */
+				emap_batch_lookup_result_t temp_edata
+				    = item_edata[flush_start];
+				item_edata[flush_start] = item_edata[i];
+				item_edata[i] = temp_edata;
+				/* Swap the pointers */
+				void *temp_ptr = ptrs->ptr[flush_start];
+				ptrs->ptr[flush_start] = ptrs->ptr[i];
+				ptrs->ptr[i] = temp_ptr;
+				flush_start++;
+			}
+		}
+		/* Make sure we implemented partitioning correctly. */
+		if (config_debug) {
+			for (unsigned i = prev_flush_start; i < flush_start;
+			    i++) {
+				edata_t *edata = item_edata[i].edata;
+				unsigned arena_ind = edata_arena_ind_get(edata);
+				assert(arena_ind == cur_arena_ind);
+				unsigned binshard = edata_binshard_get(edata);
+				assert(binshard == cur_binshard);
+			}
+			for (unsigned i = flush_start; i < nflush; i++) {
+				edata_t *edata = item_edata[i].edata;
+				assert(edata_arena_ind_get(edata)
+				    != cur_arena_ind
+				    || edata_binshard_get(edata)
+				    != cur_binshard);
+			}
+		}
 
+		/* Actually do the flushing. */
 		malloc_mutex_lock(tsdn, &cur_bin->lock);
-
 		/*
-		 * If we acquired the right lock and have some stats to flush,
-		 * flush them.
+		 * Flush stats first, if that was the right lock.  Note that we
+		 * don't actually have to flush stats into the current thread's
+		 * binshard. Flushing into any binshard in the same arena is
+		 * enough; we don't expose stats on per-binshard basis (just
+		 * per-bin).
 		 */
 		if (config_stats && tcache_arena == cur_arena
 		    && !merged_stats) {
@@ -379,29 +435,13 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 			    cache_bin->tstats.nrequests;
 			cache_bin->tstats.nrequests = 0;
 		}
-
-		/* Deallocate whatever we can. */
-		unsigned ndeferred = 0;
+		/* Next flush objects. */
 		/* Init only to avoid used-uninitialized warning. */
 		arena_dalloc_bin_locked_info_t dalloc_bin_info = {0};
 		arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind);
-		for (unsigned i = 0; i < nflush; i++) {
+		for (unsigned i = prev_flush_start; i < flush_start; i++) {
 			void *ptr = ptrs->ptr[i];
-			edata = item_edata[i].edata;
-			assert(ptr != NULL && edata != NULL);
-			if (edata_arena_ind_get(edata) != cur_arena_ind
-			    || edata_binshard_get(edata) != cur_binshard) {
-				/*
-				 * The object was allocated either via a
-				 * different arena, or a different bin in this
-				 * arena.  Either way, stash the object so that
-				 * it can be handled in a future pass.
-				 */
-				ptrs->ptr[ndeferred] = ptr;
-				item_edata[ndeferred].edata = edata;
-				ndeferred++;
-				continue;
-			}
+			edata_t *edata = item_edata[i].edata;
 			if (arena_dalloc_bin_locked_step(tsdn,
 			    cur_arena, cur_bin, &dalloc_bin_info,
 			    binind, edata, ptr)) {
@@ -409,13 +449,11 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 				dalloc_count++;
 			}
 		}
-
 		arena_dalloc_bin_locked_finish(tsdn, cur_arena, cur_bin,
 		    &dalloc_bin_info);
 		malloc_mutex_unlock(tsdn, &cur_bin->lock);
-
-		arena_decay_ticks(tsdn, cur_arena, nflush - ndeferred);
-		nflush = ndeferred;
+		arena_decay_ticks(tsdn, cur_arena,
+		    flush_start - prev_flush_start);
 	}
 
 	/* Handle all deferred slab dalloc. */

From fc615739cbd15dcb4a60c611206d9b8817aab565 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@meta.com>
Date: Fri, 9 Feb 2024 16:08:45 -0800
Subject: [PATCH 2394/2608] Add batching to arena bins.

This adds a fast-path for threads freeing a small number of allocations to
bins which are not their "home-base" and which encounter lock contention in
attempting to do so. In producer-consumer workflows, such small lock hold times
can cause lock convoying that greatly increases overall bin mutex contention.
---
 Makefile.in                                 |   1 +
 include/jemalloc/internal/arena_inlines_b.h | 132 +++++++++-
 include/jemalloc/internal/bin.h             |  47 +++-
 include/jemalloc/internal/bin_info.h        |   2 +
 src/arena.c                                 |  38 ++-
 src/bin.c                                   |  36 ++-
 src/bin_info.c                              |  15 +-
 src/ctl.c                                   |   4 +
 src/jemalloc.c                              |   5 +
 src/stats.c                                 |   1 +
 src/tcache.c                                | 173 ++++++++++---
 test/include/test/fork.h                    |  32 +++
 test/include/test/jemalloc_test.h.in        |   5 +
 test/unit/bin_batching.c                    | 264 ++++++++++++++++++++
 test/unit/bin_batching.sh                   |  10 +
 test/unit/fork.c                            |  35 +--
 16 files changed, 722 insertions(+), 78 deletions(-)
 create mode 100644 test/include/test/fork.h
 create mode 100644 test/unit/bin_batching.c
 create mode 100644 test/unit/bin_batching.sh

diff --git a/Makefile.in b/Makefile.in
index ef6e1764..94208f37 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -206,6 +206,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/base.c \
 	$(srcroot)test/unit/batch_alloc.c \
 	$(srcroot)test/unit/batcher.c \
+	$(srcroot)test/unit/bin_batching.c \
 	$(srcroot)test/unit/binshard.c \
 	$(srcroot)test/unit/bitmap.c \
 	$(srcroot)test/unit/bit_util.c \
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 18a72e7c..7f5f6bb0 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -563,10 +563,11 @@ arena_dalloc_bin_locked_begin(arena_dalloc_bin_locked_info_t *info,
  * stats updates, which happen during finish (this lets running counts get left
  * in a register).
  */
-JEMALLOC_ALWAYS_INLINE bool
+JEMALLOC_ALWAYS_INLINE void
 arena_dalloc_bin_locked_step(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
     arena_dalloc_bin_locked_info_t *info, szind_t binind, edata_t *slab,
-    void *ptr) {
+    void *ptr, edata_t **dalloc_slabs, unsigned ndalloc_slabs,
+    unsigned *dalloc_slabs_count, edata_list_active_t *dalloc_slabs_extra) {
 	const bin_info_t *bin_info = &bin_infos[binind];
 	size_t regind = arena_slab_regind(info, binind, slab, ptr);
 	slab_data_t *slab_data = edata_slab_data_get(slab);
@@ -586,12 +587,17 @@ arena_dalloc_bin_locked_step(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 	if (nfree == bin_info->nregs) {
 		arena_dalloc_bin_locked_handle_newly_empty(tsdn, arena, slab,
 		    bin);
-		return true;
+
+		if (*dalloc_slabs_count < ndalloc_slabs) {
+			dalloc_slabs[*dalloc_slabs_count] = slab;
+			(*dalloc_slabs_count)++;
+		} else {
+			edata_list_active_append(dalloc_slabs_extra, slab);
+		}
 	} else if (nfree == 1 && slab != bin->slabcur) {
 		arena_dalloc_bin_locked_handle_newly_nonempty(tsdn, arena, slab,
 		    bin);
 	}
-	return false;
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -604,11 +610,129 @@ arena_dalloc_bin_locked_finish(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 	}
 }
 
+JEMALLOC_ALWAYS_INLINE void
+arena_bin_flush_batch_impl(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
+    arena_dalloc_bin_locked_info_t *dalloc_bin_info, unsigned binind,
+    edata_t **dalloc_slabs, unsigned ndalloc_slabs, unsigned *dalloc_count,
+    edata_list_active_t *dalloc_slabs_extra) {
+	assert(binind < bin_info_nbatched_sizes);
+	bin_with_batch_t *batched_bin = (bin_with_batch_t *)bin;
+	size_t nelems_to_pop = batcher_pop_begin(tsdn,
+	    &batched_bin->remote_frees);
+
+	bin_batching_test_mid_pop(nelems_to_pop);
+	if (nelems_to_pop == BATCHER_NO_IDX) {
+		malloc_mutex_assert_not_owner(tsdn,
+		    &batched_bin->remote_frees.mtx);
+		return;
+	} else {
+		malloc_mutex_assert_owner(tsdn,
+		    &batched_bin->remote_frees.mtx);
+	}
+
+	bin_remote_free_data_t remote_free_data[BIN_REMOTE_FREE_ELEMS_MAX];
+	for (size_t i = 0; i < nelems_to_pop; i++) {
+		remote_free_data[i] = batched_bin->remote_free_data[i];
+	}
+	batcher_pop_end(tsdn, &batched_bin->remote_frees);
+
+	for (size_t i = 0; i < nelems_to_pop; i++) {
+		arena_dalloc_bin_locked_step(tsdn, arena, bin, dalloc_bin_info,
+		    binind, remote_free_data[i].slab, remote_free_data[i].ptr,
+		    dalloc_slabs, ndalloc_slabs, dalloc_count,
+		    dalloc_slabs_extra);
+	}
+}
+
+typedef struct arena_bin_flush_batch_state_s arena_bin_flush_batch_state_t;
+struct arena_bin_flush_batch_state_s {
+	arena_dalloc_bin_locked_info_t info;
+
+	/*
+	 * Bin batching is subtle in that there are unusual edge cases in which
+	 * it can trigger the deallocation of more slabs than there were items
+	 * flushed (say, if every original deallocation triggered a slab
+	 * deallocation, and so did every batched one).  So we keep a small
+	 * backup array for any "extra" slabs, as well as a a list to allow a
+	 * dynamic number of ones exceeding that array.
+	 */
+	edata_t *dalloc_slabs[8];
+	unsigned dalloc_slab_count;
+	edata_list_active_t dalloc_slabs_extra;
+};
+
+JEMALLOC_ALWAYS_INLINE unsigned
+arena_bin_batch_get_ndalloc_slabs(unsigned preallocated_slabs) {
+	if (preallocated_slabs > bin_batching_test_ndalloc_slabs_max) {
+		return bin_batching_test_ndalloc_slabs_max;
+	}
+	return preallocated_slabs;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_bin_flush_batch_after_lock(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
+    unsigned binind, arena_bin_flush_batch_state_t *state) {
+	if (binind >= bin_info_nbatched_sizes) {
+		return;
+	}
+
+	arena_dalloc_bin_locked_begin(&state->info, binind);
+	state->dalloc_slab_count = 0;
+	edata_list_active_init(&state->dalloc_slabs_extra);
+
+	unsigned preallocated_slabs = (unsigned)(sizeof(state->dalloc_slabs)
+	    / sizeof(state->dalloc_slabs[0]));
+	unsigned ndalloc_slabs = arena_bin_batch_get_ndalloc_slabs(
+	    preallocated_slabs);
+
+	arena_bin_flush_batch_impl(tsdn, arena, bin, &state->info, binind,
+	    state->dalloc_slabs, ndalloc_slabs,
+	    &state->dalloc_slab_count, &state->dalloc_slabs_extra);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_bin_flush_batch_before_unlock(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
+    unsigned binind, arena_bin_flush_batch_state_t *state) {
+	if (binind >= bin_info_nbatched_sizes) {
+		return;
+	}
+
+	arena_dalloc_bin_locked_finish(tsdn, arena, bin, &state->info);
+}
+
 static inline bool
 arena_bin_has_batch(szind_t binind) {
 	return binind < bin_info_nbatched_sizes;
 }
 
+JEMALLOC_ALWAYS_INLINE void
+arena_bin_flush_batch_after_unlock(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
+    unsigned binind, arena_bin_flush_batch_state_t *state) {
+	if (!arena_bin_has_batch(binind)) {
+		return;
+	}
+	/*
+	 * The initialization of dalloc_slabs_extra is guarded by an
+	 * arena_bin_has_batch check higher up the stack.  But the clang
+	 * analyzer forgets this down the stack, triggering a spurious error
+	 * reported here.
+	 */
+	JEMALLOC_CLANG_ANALYZER_SUPPRESS {
+		bin_batching_test_after_unlock(state->dalloc_slab_count,
+		    edata_list_active_empty(&state->dalloc_slabs_extra));
+	}
+	for (unsigned i = 0; i < state->dalloc_slab_count; i++) {
+		edata_t *slab = state->dalloc_slabs[i];
+		arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
+	}
+	while (!edata_list_active_empty(&state->dalloc_slabs_extra)) {
+		edata_t *slab = edata_list_active_first(
+		    &state->dalloc_slabs_extra);
+		edata_list_active_remove(&state->dalloc_slabs_extra, slab);
+		arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
+	}
+}
+
 static inline bin_t *
 arena_get_bin(arena_t *arena, szind_t binind, unsigned binshard) {
 	bin_t *shard0 = (bin_t *)((byte_t *)arena + arena_bin_offsets[binind]);
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index 36fce04f..5b776c17 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -11,6 +11,51 @@
 
 #define BIN_REMOTE_FREE_ELEMS_MAX 16
 
+#ifdef JEMALLOC_JET
+extern void (*bin_batching_test_after_push_hook)(size_t idx);
+extern void (*bin_batching_test_mid_pop_hook)(size_t elems_to_pop);
+extern void (*bin_batching_test_after_unlock_hook)(unsigned slab_dalloc_count,
+    bool list_empty);
+#endif
+
+#ifdef JEMALLOC_JET
+extern unsigned bin_batching_test_ndalloc_slabs_max;
+#else
+static const unsigned bin_batching_test_ndalloc_slabs_max = (unsigned)-1;
+#endif
+
+JEMALLOC_ALWAYS_INLINE void
+bin_batching_test_after_push(size_t idx) {
+	(void)idx;
+#ifdef JEMALLOC_JET
+	if (bin_batching_test_after_push_hook != NULL) {
+		bin_batching_test_after_push_hook(idx);
+	}
+#endif
+}
+
+JEMALLOC_ALWAYS_INLINE void
+bin_batching_test_mid_pop(size_t elems_to_pop) {
+	(void)elems_to_pop;
+#ifdef JEMALLOC_JET
+	if (bin_batching_test_mid_pop_hook != NULL) {
+		bin_batching_test_mid_pop_hook(elems_to_pop);
+	}
+#endif
+}
+
+JEMALLOC_ALWAYS_INLINE void
+bin_batching_test_after_unlock(unsigned slab_dalloc_count, bool list_empty) {
+	(void)slab_dalloc_count;
+	(void)list_empty;
+#ifdef JEMALLOC_JET
+	if (bin_batching_test_after_unlock_hook != NULL) {
+		bin_batching_test_after_unlock_hook(slab_dalloc_count,
+		    list_empty);
+	}
+#endif
+}
+
 /*
  * A bin contains a set of extents that are currently being used for slab
  * allocations.
@@ -70,7 +115,7 @@ bool bin_update_shard_size(unsigned bin_shards[SC_NBINS], size_t start_size,
     size_t end_size, size_t nshards);
 
 /* Initializes a bin to empty.  Returns true on error. */
-bool bin_init(bin_t *bin);
+bool bin_init(bin_t *bin, unsigned binind);
 
 /* Forking. */
 void bin_prefork(tsdn_t *tsdn, bin_t *bin, bool has_batch);
diff --git a/include/jemalloc/internal/bin_info.h b/include/jemalloc/internal/bin_info.h
index f743b7d8..88d58c91 100644
--- a/include/jemalloc/internal/bin_info.h
+++ b/include/jemalloc/internal/bin_info.h
@@ -48,6 +48,8 @@ struct bin_info_s {
 extern size_t opt_bin_info_max_batched_size;
 /* The number of batches per batched size class. */
 extern size_t opt_bin_info_remote_free_max_batch;
+// The max number of pending elems (across all batches)
+extern size_t opt_bin_info_remote_free_max;
 
 extern szind_t bin_info_nbatched_sizes;
 extern unsigned bin_info_nbatched_bins;
diff --git a/src/arena.c b/src/arena.c
index 71ef26f5..21010279 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -661,10 +661,17 @@ arena_bin_slabs_full_remove(arena_t *arena, bin_t *bin, edata_t *slab) {
 }
 
 static void
-arena_bin_reset(tsd_t *tsd, arena_t *arena, bin_t *bin) {
+arena_bin_reset(tsd_t *tsd, arena_t *arena, bin_t *bin, unsigned binind) {
 	edata_t *slab;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
+
+	if (arena_bin_has_batch(binind)) {
+		bin_with_batch_t *batched_bin = (bin_with_batch_t *)bin;
+		batcher_init(&batched_bin->remote_frees,
+		    BIN_REMOTE_FREE_ELEMS_MAX);
+	}
+
 	if (bin->slabcur != NULL) {
 		slab = bin->slabcur;
 		bin->slabcur = NULL;
@@ -815,7 +822,8 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 	/* Bins. */
 	for (unsigned i = 0; i < SC_NBINS; i++) {
 		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
-			arena_bin_reset(tsd, arena, arena_get_bin(arena, i, j));
+			arena_bin_reset(tsd, arena, arena_get_bin(arena, i, j),
+			    i);
 		}
 	}
 	pa_shard_reset(tsd_tsdn(tsd), &arena->pa_shard);
@@ -1080,8 +1088,18 @@ arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
 	unsigned binshard;
 	bin_t *bin = arena_bin_choose(tsdn, arena, binind, &binshard);
 
+	/*
+	 * This has some fields that are conditionally initialized down batch
+	 * flush pathways.  This can trigger static analysis warnings deeper
+	 * down in the static.  The accesses are guarded by the same checks as
+	 * the initialization, but the analysis isn't able to track that across
+	 * multiple stack frames.
+	 */
+	arena_bin_flush_batch_state_t batch_flush_state
+	    JEMALLOC_CLANG_ANALYZER_SILENCE_INIT({0});
 label_refill:
 	malloc_mutex_lock(tsdn, &bin->lock);
+	arena_bin_flush_batch_after_lock(tsdn, arena, bin, binind, &batch_flush_state);
 
 	while (filled < nfill) {
 		/* Try batch-fill from slabcur first. */
@@ -1136,7 +1154,11 @@ label_refill:
 		cache_bin->tstats.nrequests = 0;
 	}
 
+	arena_bin_flush_batch_before_unlock(tsdn, arena, bin, binind,
+	    &batch_flush_state);
 	malloc_mutex_unlock(tsdn, &bin->lock);
+	arena_bin_flush_batch_after_unlock(tsdn, arena, bin, binind,
+	    &batch_flush_state);
 
 	if (alloc_and_retry) {
 		assert(fresh_slab == NULL);
@@ -1427,12 +1449,16 @@ arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, edata_t *edata, void *ptr) {
 	malloc_mutex_lock(tsdn, &bin->lock);
 	arena_dalloc_bin_locked_info_t info;
 	arena_dalloc_bin_locked_begin(&info, binind);
-	bool ret = arena_dalloc_bin_locked_step(tsdn, arena, bin,
-	    &info, binind, edata, ptr);
+	edata_t *dalloc_slabs[1];
+	unsigned dalloc_slabs_count = 0;
+	arena_dalloc_bin_locked_step(tsdn, arena, bin, &info, binind, edata,
+	    ptr, dalloc_slabs, /* ndalloc_slabs */ 1, &dalloc_slabs_count,
+	    /* dalloc_slabs_extra */ NULL);
 	arena_dalloc_bin_locked_finish(tsdn, arena, bin, &info);
 	malloc_mutex_unlock(tsdn, &bin->lock);
 
-	if (ret) {
+	if (dalloc_slabs_count != 0) {
+		assert(dalloc_slabs[0] == edata);
 		arena_slab_dalloc(tsdn, arena, edata);
 	}
 }
@@ -1731,7 +1757,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 	for (unsigned i = 0; i < SC_NBINS; i++) {
 		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
 			bin_t *bin = arena_get_bin(arena, i, j);
-			bool err = bin_init(bin);
+			bool err = bin_init(bin, i);
 			if (err) {
 				goto label_error;
 			}
diff --git a/src/bin.c b/src/bin.c
index b9b4be2c..267aa0f3 100644
--- a/src/bin.c
+++ b/src/bin.c
@@ -6,6 +6,14 @@
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/witness.h"
 
+#ifdef JEMALLOC_JET
+unsigned bin_batching_test_ndalloc_slabs_max = (unsigned)-1;
+void (*bin_batching_test_after_push_hook)(size_t push_idx);
+void (*bin_batching_test_mid_pop_hook)(size_t nelems_to_pop);
+void (*bin_batching_test_after_unlock_hook)(unsigned slab_dalloc_count,
+    bool list_empty);
+#endif
+
 bool
 bin_update_shard_size(unsigned bin_shard_sizes[SC_NBINS], size_t start_size,
     size_t end_size, size_t nshards) {
@@ -39,7 +47,7 @@ bin_shard_sizes_boot(unsigned bin_shard_sizes[SC_NBINS]) {
 }
 
 bool
-bin_init(bin_t *bin) {
+bin_init(bin_t *bin, unsigned binind) {
 	if (malloc_mutex_init(&bin->lock, "bin", WITNESS_RANK_BIN,
 	    malloc_mutex_rank_exclusive)) {
 		return true;
@@ -50,6 +58,11 @@ bin_init(bin_t *bin) {
 	if (config_stats) {
 		memset(&bin->stats, 0, sizeof(bin_stats_t));
 	}
+	if (arena_bin_has_batch(binind)) {
+		bin_with_batch_t *batched_bin = (bin_with_batch_t *)bin;
+		batcher_init(&batched_bin->remote_frees,
+		    opt_bin_info_remote_free_max);
+	}
 	return false;
 }
 
@@ -57,8 +70,23 @@ void
 bin_prefork(tsdn_t *tsdn, bin_t *bin, bool has_batch) {
 	malloc_mutex_prefork(tsdn, &bin->lock);
 	if (has_batch) {
+		/*
+		 * The batch mutex has lower rank than the bin mutex (as it must
+		 * -- it's acquired later).  But during forking, we go
+		 *  bin-at-a-time, so that we acquire mutex on bin 0, then on
+		 *  the bin 0 batcher, then on bin 1.  This is a safe ordering
+		 *  (it's ordered by the index of arenas and bins within those
+		 *  arenas), but will trigger witness errors that would
+		 *  otherwise force another level of arena forking that breaks
+		 *  bin encapsulation (because the witness API doesn't "know"
+		 *  about arena or bin ordering -- it just sees that the batcher
+		 *  has a lower rank than the bin).  So instead we exclude the
+		 *  batcher mutex from witness checking during fork (which is
+		 *  the only time we touch multiple bins at once) by passing
+		 *  TSDN_NULL.
+		 */
 		bin_with_batch_t *batched = (bin_with_batch_t *)bin;
-		batcher_prefork(tsdn, &batched->remote_frees);
+		batcher_prefork(TSDN_NULL, &batched->remote_frees);
 	}
 }
 
@@ -67,7 +95,7 @@ bin_postfork_parent(tsdn_t *tsdn, bin_t *bin, bool has_batch) {
 	malloc_mutex_postfork_parent(tsdn, &bin->lock);
 	if (has_batch) {
 		bin_with_batch_t *batched = (bin_with_batch_t *)bin;
-		batcher_postfork_parent(tsdn, &batched->remote_frees);
+		batcher_postfork_parent(TSDN_NULL, &batched->remote_frees);
 	}
 }
 
@@ -76,6 +104,6 @@ bin_postfork_child(tsdn_t *tsdn, bin_t *bin, bool has_batch) {
 	malloc_mutex_postfork_child(tsdn, &bin->lock);
 	if (has_batch) {
 		bin_with_batch_t *batched = (bin_with_batch_t *)bin;
-		batcher_postfork_child(tsdn, &batched->remote_frees);
+		batcher_postfork_child(TSDN_NULL, &batched->remote_frees);
 	}
 }
diff --git a/src/bin_info.c b/src/bin_info.c
index 27f0be17..f8a64ae3 100644
--- a/src/bin_info.c
+++ b/src/bin_info.c
@@ -3,8 +3,19 @@
 
 #include "jemalloc/internal/bin_info.h"
 
-size_t opt_bin_info_max_batched_size;
-size_t opt_bin_info_remote_free_max_batch;
+/*
+ * We leave bin-batching disabled by default, with other settings chosen mostly
+ * empirically; across the test programs I looked at they provided the most bang
+ * for the buck.  With other default settings, these choices for bin batching
+ * result in them consuming far less memory (even in the worst case) than the
+ * tcaches themselves, the arena, etc.
+ * Note that we always try to pop all bins on every arena cache bin lock
+ * operation, so the typical memory waste is far less than this (and only on
+ * hot bins, which tend to be large anyways).
+ */
+size_t opt_bin_info_max_batched_size = 0; /* 192 is a good default. */
+size_t opt_bin_info_remote_free_max_batch = 4;
+size_t opt_bin_info_remote_free_max = BIN_REMOTE_FREE_ELEMS_MAX;
 
 bin_info_t bin_infos[SC_NBINS];
 
diff --git a/src/ctl.c b/src/ctl.c
index 1b76b792..ab40050d 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -130,6 +130,7 @@ CTL_PROTO(opt_utrace)
 CTL_PROTO(opt_xmalloc)
 CTL_PROTO(opt_experimental_infallible_new)
 CTL_PROTO(opt_max_batched_size)
+CTL_PROTO(opt_remote_free_max)
 CTL_PROTO(opt_remote_free_max_batch)
 CTL_PROTO(opt_tcache)
 CTL_PROTO(opt_tcache_max)
@@ -483,6 +484,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("experimental_infallible_new"),
 		CTL(opt_experimental_infallible_new)},
 	{NAME("max_batched_size"),	CTL(opt_max_batched_size)},
+	{NAME("remote_free_max"),	CTL(opt_remote_free_max)},
 	{NAME("remote_free_max_batch"),	CTL(opt_remote_free_max_batch)},
 	{NAME("tcache"),	CTL(opt_tcache)},
 	{NAME("tcache_max"),	CTL(opt_tcache_max)},
@@ -2208,6 +2210,8 @@ CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool)
 CTL_RO_NL_CGEN(config_enable_cxx, opt_experimental_infallible_new,
     opt_experimental_infallible_new, bool)
 CTL_RO_NL_GEN(opt_max_batched_size, opt_bin_info_max_batched_size, size_t)
+CTL_RO_NL_GEN(opt_remote_free_max, opt_bin_info_remote_free_max,
+    size_t)
 CTL_RO_NL_GEN(opt_remote_free_max_batch, opt_bin_info_remote_free_max_batch,
     size_t)
 CTL_RO_NL_GEN(opt_tcache, opt_tcache, bool)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 8f40e0cc..89f4b29d 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1334,6 +1334,11 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    BIN_REMOTE_FREE_ELEMS_MAX,
 			    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
 			    /* clip */ true)
+			CONF_HANDLE_SIZE_T(opt_bin_info_remote_free_max,
+			    "remote_free_max", 0,
+			    BIN_REMOTE_FREE_ELEMS_MAX,
+			    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
+			    /* clip */ true)
 
 			if (CONF_MATCH("tcache_ncached_max")) {
 				bool err = tcache_bin_info_default_init(
diff --git a/src/stats.c b/src/stats.c
index 359a252c..f057e722 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1556,6 +1556,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_BOOL("xmalloc")
 	OPT_WRITE_BOOL("experimental_infallible_new")
 	OPT_WRITE_SIZE_T("max_batched_size")
+	OPT_WRITE_SIZE_T("remote_free_max")
 	OPT_WRITE_SIZE_T("remote_free_max_batch")
 	OPT_WRITE_BOOL("tcache")
 	OPT_WRITE_SIZE_T("tcache_max")
diff --git a/src/tcache.c b/src/tcache.c
index 4dd5ccd6..564b5d9c 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -325,6 +325,7 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 	assert(binind < SC_NBINS);
 	arena_t *tcache_arena = tcache_slow->arena;
 	assert(tcache_arena != NULL);
+	unsigned tcache_binshard = tsd_binshardsp_get(tsdn_tsd(tsdn))->binshard[binind];
 
 	/*
 	 * Variable length array must have > 0 length; the last element is never
@@ -341,6 +342,18 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 	unsigned dalloc_count = 0;
 	VARIABLE_ARRAY(edata_t *, dalloc_slabs, nflush + 1);
 
+	/*
+	 * There's an edge case where we need to deallocate more slabs than we
+	 * have elements of dalloc_slabs.  This can if we end up deallocating
+	 * items batched by another thread in addition to ones flushed from the
+	 * cache.  Since this is not very likely (most small object
+	 * deallocations don't free up a whole slab), we don't want to burn the
+	 * stack space to keep those excess slabs in an array.  Instead we'll
+	 * maintain an overflow list.
+	 */
+	edata_list_active_t dalloc_slabs_extra;
+	edata_list_active_init(&dalloc_slabs_extra);
+
 	/*
 	 * We're about to grab a bunch of locks.  If one of them happens to be
 	 * the one guarding the arena-level stats counters we flush our
@@ -418,40 +431,136 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 			}
 		}
 
-		/* Actually do the flushing. */
-		malloc_mutex_lock(tsdn, &cur_bin->lock);
 		/*
-		 * Flush stats first, if that was the right lock.  Note that we
-		 * don't actually have to flush stats into the current thread's
-		 * binshard. Flushing into any binshard in the same arena is
-		 * enough; we don't expose stats on per-binshard basis (just
-		 * per-bin).
+		 * We never batch when flushing to our home-base bin shard,
+		 * since it's likely that we'll have to acquire that lock anyway
+		 * when flushing stats.
+		 *
+		 * A plausible check we could add to can_batch is
+		 * '&& arena_is_auto(cur_arena)'.  The motivation would be that
+		 * we have a higher tolerance for dubious user assumptions
+		 * around non-auto arenas (e.g. "if I deallocate every object I
+		 * allocated, and then call tcache.flush, then the arena stats
+		 * must reflect zero live allocations").
+		 *
+		 * This is dubious for a couple reasons:
+		 * - We already don't provide perfect fidelity for stats
+		 *   counting (e.g. for profiled allocations, whose size can
+		 *   inflate in stats).
+		 * - Hanging load-bearing guarantees around stats impedes
+		 *   scalability in general.
+		 *
+		 * There are some "complete" strategies we could do instead:
+		 * - Add a arena.<i>.quiesce call to pop all bins for users who
+		 *   do want those stats accounted for.
+		 * - Make batchability a user-controllable per-arena option.
+		 * - Do a batch pop after every mutex acquisition for which we
+		 *   want to provide accurate stats.  This gives perfectly
+		 *   accurate stats, but can cause weird performance effects
+		 *   (because doing stats collection can now result in slabs
+		 *   becoming empty, and therefore purging, large mutex
+		 *   acquisition, etc.).
+		 * - Propagate the "why" behind a flush down to the level of the
+		 *   batcher, and include a batch pop attempt down full tcache
+		 *   flushing pathways.  This is just a lot of plumbing and
+		 *   internal complexity.
+		 *
+		 * We don't do any of these right now, but the decision calculus
+		 * and tradeoffs are subtle enough that the reasoning was worth
+		 * leaving in this comment.
 		 */
-		if (config_stats && tcache_arena == cur_arena
-		    && !merged_stats) {
-			merged_stats = true;
-			cur_bin->stats.nflushes++;
-			cur_bin->stats.nrequests +=
-			    cache_bin->tstats.nrequests;
-			cache_bin->tstats.nrequests = 0;
+		bool bin_is_batched = arena_bin_has_batch(binind);
+		bool home_binshard = (cur_arena == tcache_arena
+		    && cur_binshard == tcache_binshard);
+		bool can_batch = (flush_start - prev_flush_start
+		    <= opt_bin_info_remote_free_max_batch)
+		    && !home_binshard && bin_is_batched;
+
+		/*
+		 * We try to avoid the batching pathway if we can, so we always
+		 * at least *try* to lock.
+		 */
+		bool locked = false;
+		bool batched = false;
+		if (can_batch) {
+			locked = !malloc_mutex_trylock(tsdn, &cur_bin->lock);
 		}
-		/* Next flush objects. */
-		/* Init only to avoid used-uninitialized warning. */
-		arena_dalloc_bin_locked_info_t dalloc_bin_info = {0};
-		arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind);
-		for (unsigned i = prev_flush_start; i < flush_start; i++) {
-			void *ptr = ptrs->ptr[i];
-			edata_t *edata = item_edata[i].edata;
-			if (arena_dalloc_bin_locked_step(tsdn,
-			    cur_arena, cur_bin, &dalloc_bin_info,
-			    binind, edata, ptr)) {
-				dalloc_slabs[dalloc_count] = edata;
-				dalloc_count++;
+		if (can_batch && !locked) {
+			bin_with_batch_t *batched_bin =
+			    (bin_with_batch_t *)cur_bin;
+			size_t push_idx = batcher_push_begin(tsdn,
+			    &batched_bin->remote_frees,
+			    flush_start - prev_flush_start);
+			bin_batching_test_after_push(push_idx);
+
+			if (push_idx != BATCHER_NO_IDX) {
+				batched = true;
+				unsigned nbatched
+				    = flush_start - prev_flush_start;
+				for (unsigned i = 0; i < nbatched; i++) {
+					unsigned src_ind = prev_flush_start + i;
+					batched_bin->remote_free_data[
+					    push_idx + i].ptr
+						= ptrs->ptr[src_ind];
+					batched_bin->remote_free_data[
+					    push_idx + i].slab
+						= item_edata[src_ind].edata;
+				}
+				batcher_push_end(tsdn,
+				    &batched_bin->remote_frees);
 			}
 		}
-		arena_dalloc_bin_locked_finish(tsdn, cur_arena, cur_bin,
-		    &dalloc_bin_info);
-		malloc_mutex_unlock(tsdn, &cur_bin->lock);
+		if (!batched) {
+			if (!locked) {
+				malloc_mutex_lock(tsdn, &cur_bin->lock);
+			}
+			/*
+			 * Flush stats first, if that was the right lock.  Note
+			 * that we don't actually have to flush stats into the
+			 * current thread's binshard. Flushing into any binshard
+			 * in the same arena is enough; we don't expose stats on
+			 * per-binshard basis (just per-bin).
+			 */
+			if (config_stats && tcache_arena == cur_arena
+			    && !merged_stats) {
+				merged_stats = true;
+				cur_bin->stats.nflushes++;
+				cur_bin->stats.nrequests +=
+				    cache_bin->tstats.nrequests;
+				cache_bin->tstats.nrequests = 0;
+			}
+			unsigned preallocated_slabs = nflush;
+			unsigned ndalloc_slabs = arena_bin_batch_get_ndalloc_slabs(
+			    preallocated_slabs);
+
+			/* Next flush objects our own objects. */
+			/* Init only to avoid used-uninitialized warning. */
+			arena_dalloc_bin_locked_info_t dalloc_bin_info = {0};
+			arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind);
+			for (unsigned i = prev_flush_start; i < flush_start;
+			    i++) {
+				void *ptr = ptrs->ptr[i];
+				edata_t *edata = item_edata[i].edata;
+				arena_dalloc_bin_locked_step(tsdn, cur_arena,
+				    cur_bin, &dalloc_bin_info, binind, edata,
+				    ptr, dalloc_slabs, ndalloc_slabs,
+				    &dalloc_count, &dalloc_slabs_extra);
+			}
+			/*
+			 * Lastly, flush any batched objects (from other
+			 * threads).
+			 */
+			if (bin_is_batched) {
+				arena_bin_flush_batch_impl(tsdn, cur_arena,
+				    cur_bin, &dalloc_bin_info, binind,
+				    dalloc_slabs, ndalloc_slabs,
+				    &dalloc_count, &dalloc_slabs_extra);
+			}
+
+			arena_dalloc_bin_locked_finish(tsdn, cur_arena, cur_bin,
+			    &dalloc_bin_info);
+			malloc_mutex_unlock(tsdn, &cur_bin->lock);
+		}
 		arena_decay_ticks(tsdn, cur_arena,
 		    flush_start - prev_flush_start);
 	}
@@ -460,7 +569,11 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 	for (unsigned i = 0; i < dalloc_count; i++) {
 		edata_t *slab = dalloc_slabs[i];
 		arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
-
+	}
+	while (!edata_list_active_empty(&dalloc_slabs_extra)) {
+		edata_t *slab = edata_list_active_first(&dalloc_slabs_extra);
+		edata_list_active_remove(&dalloc_slabs_extra, slab);
+		arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
 	}
 
 	if (config_stats && !merged_stats) {
diff --git a/test/include/test/fork.h b/test/include/test/fork.h
new file mode 100644
index 00000000..ac9b1858
--- /dev/null
+++ b/test/include/test/fork.h
@@ -0,0 +1,32 @@
+#ifndef JEMALLOC_TEST_FORK_H
+#define JEMALLOC_TEST_FORK_H
+
+#ifndef _WIN32
+
+#include <sys/wait.h>
+
+static inline void
+fork_wait_for_child_exit(int pid) {
+	int status;
+	while (true) {
+		if (waitpid(pid, &status, 0) == -1) {
+			test_fail("Unexpected waitpid() failure.");
+		}
+		if (WIFSIGNALED(status)) {
+			test_fail("Unexpected child termination due to "
+			    "signal %d", WTERMSIG(status));
+			break;
+		}
+		if (WIFEXITED(status)) {
+			if (WEXITSTATUS(status) != 0) {
+				test_fail("Unexpected child exit value %d",
+				    WEXITSTATUS(status));
+			}
+			break;
+		}
+	}
+}
+
+#endif
+
+#endif /* JEMALLOC_TEST_FORK_H */
diff --git a/test/include/test/jemalloc_test.h.in b/test/include/test/jemalloc_test.h.in
index f9c506da..8b139db1 100644
--- a/test/include/test/jemalloc_test.h.in
+++ b/test/include/test/jemalloc_test.h.in
@@ -1,3 +1,6 @@
+#ifndef JEMALLOC_TEST_H
+#define JEMALLOC_TEST_H
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -172,3 +175,5 @@ extern "C" {
 #ifdef __cplusplus
 }
 #endif
+
+#endif
diff --git a/test/unit/bin_batching.c b/test/unit/bin_batching.c
new file mode 100644
index 00000000..525f59e0
--- /dev/null
+++ b/test/unit/bin_batching.c
@@ -0,0 +1,264 @@
+#include "test/jemalloc_test.h"
+#include "test/fork.h"
+
+enum {
+	STRESS_THREADS = 3,
+	STRESS_OBJECTS_PER_THREAD = 1000,
+	STRESS_ALLOC_SZ = PAGE / 2,
+};
+
+typedef struct stress_thread_data_s stress_thread_data_t;
+struct stress_thread_data_s {
+	unsigned thd_id;
+	atomic_zu_t *ready_thds;
+	atomic_zu_t *done_thds;
+	void **to_dalloc;
+};
+
+static atomic_zu_t push_failure_count;
+static atomic_zu_t pop_attempt_results[2];
+static atomic_zu_t dalloc_zero_slab_count;
+static atomic_zu_t dalloc_nonzero_slab_count;
+static atomic_zu_t dalloc_nonempty_list_count;
+
+static bool
+should_skip() {
+	return
+	    /*
+	     * We do batching operations on tcache flush pathways; we can't if
+	     * caching is disabled.
+	     */
+	    !opt_tcache ||
+	    /* We rely on tcache fill/flush operations of the size we use. */
+	    opt_tcache_max < STRESS_ALLOC_SZ
+	    /*
+	     * Some of the races we want to trigger are fiddly enough that they
+	     * only show up under real concurrency.  We add 1 to account for the
+	     * main thread, which also does some work.
+	     */
+	    || ncpus < STRESS_THREADS + 1;
+}
+
+static void
+increment_push_failure(size_t push_idx) {
+	if (push_idx == BATCHER_NO_IDX) {
+		atomic_fetch_add_zu(&push_failure_count, 1, ATOMIC_RELAXED);
+	} else {
+		assert_zu_lt(push_idx, 4, "Only 4 elems");
+		volatile int x = 10000;
+		while (--x) {
+			/* Spin for a while, to try to provoke a failure. */
+		}
+	}
+}
+
+static void
+increment_pop_attempt(size_t elems_to_pop) {
+	bool elems = (elems_to_pop != BATCHER_NO_IDX);
+	atomic_fetch_add_zu(&pop_attempt_results[elems], 1, ATOMIC_RELAXED);
+}
+
+static void
+increment_slab_dalloc_count(unsigned slab_dalloc_count, bool list_empty) {
+	if (slab_dalloc_count > 0) {
+		atomic_fetch_add_zu(&dalloc_nonzero_slab_count, 1,
+		    ATOMIC_RELAXED);
+	} else {
+		atomic_fetch_add_zu(&dalloc_zero_slab_count, 1,
+		    ATOMIC_RELAXED);
+	}
+	if (!list_empty) {
+		atomic_fetch_add_zu(&dalloc_nonempty_list_count, 1,
+		    ATOMIC_RELAXED);
+	}
+}
+
+static void flush_tcache() {
+	assert_d_eq(0, mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
+	    "Unexpected mallctl failure");
+}
+
+static void *
+stress_thread(void *arg) {
+	stress_thread_data_t *data = arg;
+	uint64_t prng_state = data->thd_id;
+	atomic_fetch_add_zu(data->ready_thds, 1, ATOMIC_RELAXED);
+	while (atomic_load_zu(data->ready_thds, ATOMIC_RELAXED)
+	    != STRESS_THREADS) {
+		/* Spin */
+	}
+	for (int i = 0; i < STRESS_OBJECTS_PER_THREAD; i++) {
+		dallocx(data->to_dalloc[i], 0);
+		if (prng_range_u64(&prng_state, 3) == 0) {
+			flush_tcache();
+		}
+
+	}
+	flush_tcache();
+	atomic_fetch_add_zu(data->done_thds, 1, ATOMIC_RELAXED);
+	return NULL;
+}
+
+/*
+ * Run main_thread_fn in conditions that trigger all the various edge cases and
+ * subtle race conditions.
+ */
+static void
+stress_run(void (*main_thread_fn)(), int nruns) {
+	bin_batching_test_ndalloc_slabs_max = 1;
+	bin_batching_test_after_push_hook = &increment_push_failure;
+	bin_batching_test_mid_pop_hook = &increment_pop_attempt;
+	bin_batching_test_after_unlock_hook = &increment_slab_dalloc_count;
+
+	atomic_store_zu(&push_failure_count, 0, ATOMIC_RELAXED);
+	atomic_store_zu(&pop_attempt_results[2], 0, ATOMIC_RELAXED);
+	atomic_store_zu(&dalloc_zero_slab_count, 0, ATOMIC_RELAXED);
+	atomic_store_zu(&dalloc_nonzero_slab_count, 0, ATOMIC_RELAXED);
+	atomic_store_zu(&dalloc_nonempty_list_count, 0, ATOMIC_RELAXED);
+
+	for (int run = 0; run < nruns; run++) {
+		thd_t thds[STRESS_THREADS];
+		stress_thread_data_t thd_datas[STRESS_THREADS];
+		atomic_zu_t ready_thds;
+		atomic_store_zu(&ready_thds, 0, ATOMIC_RELAXED);
+		atomic_zu_t done_thds;
+		atomic_store_zu(&done_thds, 0, ATOMIC_RELAXED);
+
+		void *ptrs[STRESS_THREADS][STRESS_OBJECTS_PER_THREAD];
+		for (int i = 0; i < STRESS_THREADS; i++) {
+			thd_datas[i].thd_id = i;
+			thd_datas[i].ready_thds = &ready_thds;
+			thd_datas[i].done_thds = &done_thds;
+			thd_datas[i].to_dalloc = ptrs[i];
+			for (int j = 0; j < STRESS_OBJECTS_PER_THREAD; j++) {
+				void *ptr = mallocx(STRESS_ALLOC_SZ, 0);
+				assert_ptr_not_null(ptr, "alloc failure");
+				ptrs[i][j] = ptr;
+			}
+		}
+		for (int i = 0; i < STRESS_THREADS; i++) {
+			thd_create(&thds[i], stress_thread, &thd_datas[i]);
+		}
+		while (atomic_load_zu(&done_thds, ATOMIC_RELAXED)
+		    != STRESS_THREADS) {
+			main_thread_fn();
+		}
+		for (int i = 0; i < STRESS_THREADS; i++) {
+			thd_join(thds[i], NULL);
+		}
+	}
+
+	bin_batching_test_ndalloc_slabs_max = (unsigned)-1;
+	bin_batching_test_after_push_hook = NULL;
+	bin_batching_test_mid_pop_hook = NULL;
+	bin_batching_test_after_unlock_hook = NULL;
+}
+
+static void
+do_allocs_frees() {
+	enum {NALLOCS = 32};
+	flush_tcache();
+	void *ptrs[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		ptrs[i] = mallocx(STRESS_ALLOC_SZ, 0);
+	}
+	for (int i = 0; i < NALLOCS; i++) {
+		dallocx(ptrs[i], 0);
+	}
+	flush_tcache();
+}
+
+static void
+test_arena_reset_main_fn() {
+	do_allocs_frees();
+}
+
+TEST_BEGIN(test_arena_reset) {
+	int err;
+	unsigned arena;
+	unsigned old_arena;
+
+	test_skip_if(should_skip());
+	test_skip_if(opt_percpu_arena != percpu_arena_disabled);
+
+	size_t arena_sz = sizeof(arena);
+	err = mallctl("arenas.create", (void *)&arena, &arena_sz, NULL, 0);
+	assert_d_eq(0, err, "Arena creation failed");
+
+	err = mallctl("thread.arena", &old_arena, &arena_sz, &arena, arena_sz);
+	assert_d_eq(0, err, "changing arena failed");
+
+	stress_run(&test_arena_reset_main_fn, /* nruns */ 10);
+
+	flush_tcache();
+
+	char buf[100];
+	malloc_snprintf(buf, sizeof(buf), "arena.%u.reset", arena);
+	err = mallctl(buf, NULL, NULL, NULL, 0);
+	assert_d_eq(0, err, "Couldn't change arena");
+
+	do_allocs_frees();
+
+	err = mallctl("thread.arena", NULL, NULL, &old_arena, arena_sz);
+	assert_d_eq(0, err, "changing arena failed");
+}
+TEST_END
+
+static void
+test_fork_main_fn() {
+#ifndef _WIN32
+	pid_t pid = fork();
+	if (pid == -1) {
+		test_fail("Fork failure!");
+	} else if (pid == 0) {
+		/* Child */
+		do_allocs_frees();
+		_exit(0);
+	} else {
+		fork_wait_for_child_exit(pid);
+		do_allocs_frees();
+	}
+#endif
+}
+
+TEST_BEGIN(test_fork) {
+#ifdef _WIN32
+	test_skip("No fork on windows");
+#endif
+	test_skip_if(should_skip());
+	stress_run(&test_fork_main_fn, /* nruns */ 10);
+}
+TEST_END
+
+static void
+test_races_main_fn() {
+	do_allocs_frees();
+}
+
+TEST_BEGIN(test_races) {
+	test_skip_if(should_skip());
+
+	stress_run(&test_races_main_fn, /* nruns */ 400);
+
+	assert_zu_lt(0, atomic_load_zu(&push_failure_count, ATOMIC_RELAXED),
+	    "Should have seen some push failures");
+	assert_zu_lt(0, atomic_load_zu(&pop_attempt_results[0], ATOMIC_RELAXED),
+	    "Should have seen some pop failures");
+	assert_zu_lt(0, atomic_load_zu(&pop_attempt_results[1], ATOMIC_RELAXED),
+	    "Should have seen some pop successes");
+	assert_zu_lt(0, atomic_load_zu(&dalloc_zero_slab_count, ATOMIC_RELAXED),
+	    "Expected some frees that didn't empty a slab");
+	assert_zu_lt(0, atomic_load_zu(&dalloc_nonzero_slab_count,
+	    ATOMIC_RELAXED), "expected some frees that emptied a slab");
+	assert_zu_lt(0, atomic_load_zu(&dalloc_nonempty_list_count,
+	    ATOMIC_RELAXED), "expected some frees that used the empty list");
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_arena_reset,
+	    test_races,
+	    test_fork);
+}
diff --git a/test/unit/bin_batching.sh b/test/unit/bin_batching.sh
new file mode 100644
index 00000000..fef9bdc6
--- /dev/null
+++ b/test/unit/bin_batching.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+# This value of max_batched_size effectively requires all bins to be batched;
+# our page limits are fuzzy, but we bound slab item counts to 2**32, so we'd be
+# at multi-gigabyte minimum page sizes.
+# The reason for this sort of hacky approach is that we want to
+# allocate/deallocate PAGE/2-sized objects (to trigger the "non-empty" ->
+# "empty" and "non-empty"-> "full" transitions often, which have special
+# handling). But the value of PAGE isn't easily available in test scripts.
+export MALLOC_CONF="narenas:2,bin_shards:1-1000000000:3,max_batched_size:1000000000,remote_free_max_batch:1,remote_free_max:4"
diff --git a/test/unit/fork.c b/test/unit/fork.c
index 447eb191..1a4c575e 100644
--- a/test/unit/fork.c
+++ b/test/unit/fork.c
@@ -1,32 +1,5 @@
 #include "test/jemalloc_test.h"
-
-#ifndef _WIN32
-#include <sys/wait.h>
-#endif
-
-#ifndef _WIN32
-static void
-wait_for_child_exit(int pid) {
-	int status;
-	while (true) {
-		if (waitpid(pid, &status, 0) == -1) {
-			test_fail("Unexpected waitpid() failure.");
-		}
-		if (WIFSIGNALED(status)) {
-			test_fail("Unexpected child termination due to "
-			    "signal %d", WTERMSIG(status));
-			break;
-		}
-		if (WIFEXITED(status)) {
-			if (WEXITSTATUS(status) != 0) {
-				test_fail("Unexpected child exit value %d",
-				    WEXITSTATUS(status));
-			}
-			break;
-		}
-	}
-}
-#endif
+#include "test/fork.h"
 
 TEST_BEGIN(test_fork) {
 #ifndef _WIN32
@@ -64,7 +37,7 @@ TEST_BEGIN(test_fork) {
 		/* Child. */
 		_exit(0);
 	} else {
-		wait_for_child_exit(pid);
+		fork_wait_for_child_exit(pid);
 	}
 #else
 	test_skip("fork(2) is irrelevant to Windows");
@@ -87,7 +60,7 @@ do_fork_thd(void *arg) {
 		test_fail("Exec failed");
 	} else {
 		/* Parent */
-		wait_for_child_exit(pid);
+		fork_wait_for_child_exit(pid);
 	}
 	return NULL;
 }
@@ -124,7 +97,7 @@ TEST_BEGIN(test_fork_multithreaded) {
 			do_test_fork_multithreaded();
 			_exit(0);
 		} else {
-			wait_for_child_exit(pid);
+			fork_wait_for_child_exit(pid);
 		}
 	}
 #else

From f9c0b5f7f8a917661db39289e38ec94d9d198f11 Mon Sep 17 00:00:00 2001
From: David Goldblatt <davidgoldblatt@meta.com>
Date: Tue, 20 Feb 2024 14:54:43 -0800
Subject: [PATCH 2395/2608] Bin batching: add some stats.

This lets us easily see what fraction of flush load is being taken up by the
bins, and helps guide future optimization approaches (for example: should we
prefetch during cache bin fills? It depends on how many objects the average fill
pops out of the batch).
---
 include/jemalloc/internal/arena_inlines_b.h |  6 +++
 include/jemalloc/internal/batcher.h         |  2 +
 include/jemalloc/internal/bin.h             |  5 +++
 include/jemalloc/internal/bin_stats.h       |  5 +++
 src/batcher.c                               | 10 +++++
 src/ctl.c                                   | 30 ++++++++++++++
 src/stats.c                                 | 43 +++++++++++++++++++++
 src/tcache.c                                | 13 +++++++
 8 files changed, 114 insertions(+)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 7f5f6bb0..ea246cc5 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -630,6 +630,8 @@ arena_bin_flush_batch_impl(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 		    &batched_bin->remote_frees.mtx);
 	}
 
+	size_t npushes = batcher_pop_get_pushes(tsdn,
+	    &batched_bin->remote_frees);
 	bin_remote_free_data_t remote_free_data[BIN_REMOTE_FREE_ELEMS_MAX];
 	for (size_t i = 0; i < nelems_to_pop; i++) {
 		remote_free_data[i] = batched_bin->remote_free_data[i];
@@ -642,6 +644,10 @@ arena_bin_flush_batch_impl(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 		    dalloc_slabs, ndalloc_slabs, dalloc_count,
 		    dalloc_slabs_extra);
 	}
+
+	bin->stats.batch_pops++;
+	bin->stats.batch_pushes += npushes;
+	bin->stats.batch_pushed_elems += nelems_to_pop;
 }
 
 typedef struct arena_bin_flush_batch_state_s arena_bin_flush_batch_state_t;
diff --git a/include/jemalloc/internal/batcher.h b/include/jemalloc/internal/batcher.h
index a435f0b7..40c8b35f 100644
--- a/include/jemalloc/internal/batcher.h
+++ b/include/jemalloc/internal/batcher.h
@@ -16,6 +16,7 @@ struct batcher_s {
 	 */
 	atomic_zu_t nelems;
 	size_t nelems_max;
+	size_t npushes;
 	malloc_mutex_t mtx;
 };
 
@@ -35,6 +36,7 @@ void batcher_push_end(tsdn_t *tsdn, batcher_t *batcher);
  * If the former, must be followed by a call to batcher_pop_end.
  */
 size_t batcher_pop_begin(tsdn_t *tsdn, batcher_t *batcher);
+size_t batcher_pop_get_pushes(tsdn_t *tsdn, batcher_t *batcher);
 void batcher_pop_end(tsdn_t *tsdn, batcher_t *batcher);
 
 void batcher_prefork(tsdn_t *tsdn, batcher_t *batcher);
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index 5b776c17..c49afea6 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -138,6 +138,11 @@ bin_stats_merge(tsdn_t *tsdn, bin_stats_data_t *dst_bin_stats, bin_t *bin) {
 	stats->reslabs += bin->stats.reslabs;
 	stats->curslabs += bin->stats.curslabs;
 	stats->nonfull_slabs += bin->stats.nonfull_slabs;
+
+	stats->batch_failed_pushes += bin->stats.batch_failed_pushes;
+	stats->batch_pushes += bin->stats.batch_pushes;
+	stats->batch_pushed_elems += bin->stats.batch_pushed_elems;
+
 	malloc_mutex_unlock(tsdn, &bin->lock);
 }
 
diff --git a/include/jemalloc/internal/bin_stats.h b/include/jemalloc/internal/bin_stats.h
index f95b9e9c..334c166d 100644
--- a/include/jemalloc/internal/bin_stats.h
+++ b/include/jemalloc/internal/bin_stats.h
@@ -48,6 +48,11 @@ struct bin_stats_s {
 
 	/* Current size of nonfull slabs heap in this bin. */
 	size_t		nonfull_slabs;
+
+	uint64_t	batch_pops;
+	uint64_t	batch_failed_pushes;
+	uint64_t	batch_pushes;
+	uint64_t	batch_pushed_elems;
 };
 
 typedef struct bin_stats_data_s bin_stats_data_t;
diff --git a/src/batcher.c b/src/batcher.c
index 19af7d83..2570b3a9 100644
--- a/src/batcher.c
+++ b/src/batcher.c
@@ -9,6 +9,7 @@ void
 batcher_init(batcher_t *batcher, size_t nelems_max) {
 	atomic_store_zu(&batcher->nelems, 0, ATOMIC_RELAXED);
 	batcher->nelems_max = nelems_max;
+	batcher->npushes = 0;
 	malloc_mutex_init(&batcher->mtx, "batcher", WITNESS_RANK_BATCHER,
 	    malloc_mutex_rank_exclusive);
 }
@@ -37,9 +38,18 @@ size_t batcher_push_begin(tsdn_t *tsdn, batcher_t *batcher,
 	 * acquire a mutex only to discover that there's no space for them.
 	 */
 	atomic_store_zu(&batcher->nelems, nelems + elems_to_push, ATOMIC_RELAXED);
+	batcher->npushes++;
 	return nelems;
 }
 
+size_t
+batcher_pop_get_pushes(tsdn_t *tsdn, batcher_t *batcher) {
+	malloc_mutex_assert_owner(tsdn, &batcher->mtx);
+	size_t npushes = batcher->npushes;
+	batcher->npushes = 0;
+	return npushes;
+}
+
 void
 batcher_push_end(tsdn_t *tsdn, batcher_t *batcher) {
 	malloc_mutex_assert_owner(tsdn, &batcher->mtx);
diff --git a/src/ctl.c b/src/ctl.c
index ab40050d..09188dd9 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -239,6 +239,10 @@ CTL_PROTO(stats_arenas_i_bins_j_nslabs)
 CTL_PROTO(stats_arenas_i_bins_j_nreslabs)
 CTL_PROTO(stats_arenas_i_bins_j_curslabs)
 CTL_PROTO(stats_arenas_i_bins_j_nonfull_slabs)
+CTL_PROTO(stats_arenas_i_bins_j_batch_pops)
+CTL_PROTO(stats_arenas_i_bins_j_batch_failed_pushes)
+CTL_PROTO(stats_arenas_i_bins_j_batch_pushes)
+CTL_PROTO(stats_arenas_i_bins_j_batch_pushed_elems)
 INDEX_PROTO(stats_arenas_i_bins_j)
 CTL_PROTO(stats_arenas_i_lextents_j_nmalloc)
 CTL_PROTO(stats_arenas_i_lextents_j_ndalloc)
@@ -694,6 +698,14 @@ static const ctl_named_node_t stats_arenas_i_bins_j_node[] = {
 	{NAME("nreslabs"),	CTL(stats_arenas_i_bins_j_nreslabs)},
 	{NAME("curslabs"),	CTL(stats_arenas_i_bins_j_curslabs)},
 	{NAME("nonfull_slabs"),	CTL(stats_arenas_i_bins_j_nonfull_slabs)},
+	{NAME("batch_pops"),
+		CTL(stats_arenas_i_bins_j_batch_pops)},
+	{NAME("batch_failed_pushes"),
+		CTL(stats_arenas_i_bins_j_batch_failed_pushes)},
+	{NAME("batch_pushes"),
+		CTL(stats_arenas_i_bins_j_batch_pushes)},
+	{NAME("batch_pushed_elems"),
+		CTL(stats_arenas_i_bins_j_batch_pushed_elems)},
 	{NAME("mutex"),		CHILD(named, stats_arenas_i_bins_j_mutex)}
 };
 
@@ -1246,6 +1258,16 @@ MUTEX_PROF_ARENA_MUTEXES
 				assert(bstats->curslabs == 0);
 				assert(bstats->nonfull_slabs == 0);
 			}
+
+			merged->batch_pops
+			    += bstats->batch_pops;
+			merged->batch_failed_pushes
+			    += bstats->batch_failed_pushes;
+			merged->batch_pushes
+			    += bstats->batch_pushes;
+			merged->batch_pushed_elems
+			    += bstats->batch_pushed_elems;
+
 			malloc_mutex_prof_merge(&sdstats->bstats[i].mutex_data,
 			    &astats->bstats[i].mutex_data);
 		}
@@ -3957,6 +3979,14 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curslabs,
     arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.curslabs, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nonfull_slabs,
     arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.nonfull_slabs, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_batch_pops,
+    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_pops, uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_batch_failed_pushes,
+    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_failed_pushes, uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_batch_pushes,
+    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_pushes, uint64_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_batch_pushed_elems,
+    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_pushed_elems, uint64_t)
 
 static const ctl_named_node_t *
 stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib,
diff --git a/src/stats.c b/src/stats.c
index f057e722..8419158a 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -358,6 +358,15 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 	COL_HDR(row, nreslabs, NULL, right, 13, uint64)
 	COL_HDR(row, nreslabs_ps, "(#/sec)", right, 8, uint64)
 
+	COL_HDR(row, pops, NULL, right, 10, uint64)
+	COL_HDR(row, pops_ps, "(#/sec)", right, 8, uint64)
+	COL_HDR(row, failed_push, NULL, right, 13, uint64)
+	COL_HDR(row, failed_push_ps, "(#/sec)", right, 8, uint64)
+	COL_HDR(row, push, NULL, right, 7, uint64)
+	COL_HDR(row, push_ps, "(#/sec)", right, 8, uint64)
+	COL_HDR(row, push_elem, NULL, right, 12, uint64)
+	COL_HDR(row, push_elem_ps, "(#/sec)", right, 8, uint64)
+
 	/* Don't want to actually print the name. */
 	header_justify_spacer.str_val = " ";
 	col_justify_spacer.str_val = " ";
@@ -405,6 +414,8 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 		uint32_t nregs, nshards;
 		uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes;
 		uint64_t nreslabs;
+		uint64_t batch_pops, batch_failed_pushes, batch_pushes,
+		    batch_pushed_elems;
 		prof_stats_t prof_live;
 		prof_stats_t prof_accum;
 
@@ -453,6 +464,15 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 		CTL_LEAF(stats_arenas_mib, 5, "nonfull_slabs", &nonfull_slabs,
 		    size_t);
 
+		CTL_LEAF(stats_arenas_mib, 5, "batch_pops", &batch_pops,
+		    uint64_t);
+		CTL_LEAF(stats_arenas_mib, 5, "batch_failed_pushes",
+		    &batch_failed_pushes, uint64_t);
+		CTL_LEAF(stats_arenas_mib, 5, "batch_pushes",
+		    &batch_pushes, uint64_t);
+		CTL_LEAF(stats_arenas_mib, 5, "batch_pushed_elems",
+		    &batch_pushed_elems, uint64_t);
+
 		if (mutex) {
 			mutex_stats_read_arena_bin(stats_arenas_mib, 5,
 			    col_mutex64, col_mutex32, uptime);
@@ -487,6 +507,14 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 		    &curslabs);
 		emitter_json_kv(emitter, "nonfull_slabs", emitter_type_size,
 		    &nonfull_slabs);
+		emitter_json_kv(emitter, "batch_pops",
+		    emitter_type_uint64, &batch_pops);
+		emitter_json_kv(emitter, "batch_failed_pushes",
+		    emitter_type_uint64, &batch_failed_pushes);
+		emitter_json_kv(emitter, "batch_pushes",
+		    emitter_type_uint64, &batch_pushes);
+		emitter_json_kv(emitter, "batch_pushed_elems",
+		    emitter_type_uint64, &batch_pushed_elems);
 		if (mutex) {
 			emitter_json_object_kv_begin(emitter, "mutex");
 			mutex_stats_emit(emitter, NULL, col_mutex64,
@@ -545,6 +573,21 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 		col_nreslabs.uint64_val = nreslabs;
 		col_nreslabs_ps.uint64_val = rate_per_second(nreslabs, uptime);
 
+		col_pops.uint64_val = batch_pops;
+		col_pops_ps.uint64_val
+		    = rate_per_second(batch_pops, uptime);
+
+		col_failed_push.uint64_val = batch_failed_pushes;
+		col_failed_push_ps.uint64_val
+		    = rate_per_second(batch_failed_pushes, uptime);
+		col_push.uint64_val = batch_pushes;
+		col_push_ps.uint64_val
+		    = rate_per_second(batch_pushes, uptime);
+
+		col_push_elem.uint64_val = batch_pushed_elems;
+		col_push_elem_ps.uint64_val
+		    = rate_per_second(batch_pushed_elems, uptime);
+
 		/*
 		 * Note that mutex columns were initialized above, if mutex ==
 		 * true.
diff --git a/src/tcache.c b/src/tcache.c
index 564b5d9c..03ec5136 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -482,6 +482,7 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 		 */
 		bool locked = false;
 		bool batched = false;
+		bool batch_failed = false;
 		if (can_batch) {
 			locked = !malloc_mutex_trylock(tsdn, &cur_bin->lock);
 		}
@@ -508,12 +509,24 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 				}
 				batcher_push_end(tsdn,
 				    &batched_bin->remote_frees);
+			} else {
+				batch_failed = true;
 			}
 		}
 		if (!batched) {
 			if (!locked) {
 				malloc_mutex_lock(tsdn, &cur_bin->lock);
 			}
+			/*
+			 * Unlike other stats (which only ever get flushed into
+			 * a tcache's associated arena), batch_failed counts get
+			 * accumulated into the bin where the push attempt
+			 * failed.
+			 */
+			if (config_stats && batch_failed) {
+				cur_bin->stats.batch_failed_pushes++;
+			}
+
 			/*
 			 * Flush stats first, if that was the right lock.  Note
 			 * that we don't actually have to flush stats into the

From 90c627edb70e081e1298b79010478d2f804467f1 Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Fri, 31 May 2024 10:28:58 -0700
Subject: [PATCH 2396/2608] Export hugepage size with `arenas.hugepage`

---
 src/ctl.c           | 3 +++
 src/stats.c         | 4 ++++
 test/unit/mallctl.c | 1 +
 3 files changed, 8 insertions(+)

diff --git a/src/ctl.c b/src/ctl.c
index 09188dd9..4347dd2b 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -195,6 +195,7 @@ CTL_PROTO(arenas_dirty_decay_ms)
 CTL_PROTO(arenas_muzzy_decay_ms)
 CTL_PROTO(arenas_quantum)
 CTL_PROTO(arenas_page)
+CTL_PROTO(arenas_hugepage)
 CTL_PROTO(arenas_tcache_max)
 CTL_PROTO(arenas_nbins)
 CTL_PROTO(arenas_nhbins)
@@ -593,6 +594,7 @@ static const ctl_named_node_t arenas_node[] = {
 	{NAME("muzzy_decay_ms"), CTL(arenas_muzzy_decay_ms)},
 	{NAME("quantum"),	CTL(arenas_quantum)},
 	{NAME("page"),		CTL(arenas_page)},
+	{NAME("hugepage"),	CTL(arenas_hugepage)},
 	{NAME("tcache_max"),	CTL(arenas_tcache_max)},
 	{NAME("nbins"),		CTL(arenas_nbins)},
 	{NAME("nhbins"),	CTL(arenas_nhbins)},
@@ -3284,6 +3286,7 @@ arenas_muzzy_decay_ms_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 
 CTL_RO_NL_GEN(arenas_quantum, QUANTUM, size_t)
 CTL_RO_NL_GEN(arenas_page, PAGE, size_t)
+CTL_RO_NL_GEN(arenas_hugepage, HUGEPAGE, size_t)
 CTL_RO_NL_GEN(arenas_tcache_max, global_do_not_change_tcache_maxclass, size_t)
 CTL_RO_NL_GEN(arenas_nbins, SC_NBINS, unsigned)
 CTL_RO_NL_GEN(arenas_nhbins, global_do_not_change_tcache_nbins, unsigned)
diff --git a/src/stats.c b/src/stats.c
index 8419158a..4df0ae62 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1698,6 +1698,10 @@ stats_general_print(emitter_t *emitter) {
 	CTL_GET("arenas.page", &sv, size_t);
 	emitter_kv(emitter, "page", "Page size", emitter_type_size, &sv);
 
+	CTL_GET("arenas.hugepage", &sv, size_t);
+	emitter_kv(emitter, "hugepage", "Hugepage size", emitter_type_size,
+	    &sv);
+
 	if (je_mallctl("arenas.tcache_max", (void *)&sv, &ssz, NULL, 0) == 0) {
 		emitter_kv(emitter, "tcache_max",
 		    "Maximum thread-cached size class", emitter_type_size, &sv);
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 9e5baff0..84cd3995 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -877,6 +877,7 @@ TEST_BEGIN(test_arenas_constants) {
 
 	TEST_ARENAS_CONSTANT(size_t, quantum, QUANTUM);
 	TEST_ARENAS_CONSTANT(size_t, page, PAGE);
+	TEST_ARENAS_CONSTANT(size_t, hugepage, HUGEPAGE);
 	TEST_ARENAS_CONSTANT(unsigned, nbins, SC_NBINS);
 	TEST_ARENAS_CONSTANT(unsigned, nlextents, SC_NSIZES - SC_NBINS);
 

From 91a6d230dba40ef2ef6e381b4c4fab5f5b0f6111 Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Fri, 31 May 2024 06:35:48 -0700
Subject: [PATCH 2397/2608] Respect `hpa_min_purge_interval_ms` option

Currently, hugepages aware allocator backend works together with classic
one as a fallback for not yet supported allocations. When background
threads are enabled wake up time for classic interfere with hpa as there
were no checks inside hpa purging logic to check if we are not purging too
frequently. If background thread is running and `hpa_should_purge`
returns true, then we will purge, even if we purged less than
hpa_min_purge_interval_ms ago.
---
 src/hpa.c                         | 10 ++++++
 test/unit/hpa_background_thread.c | 52 ++++++++++++++++++++++---------
 2 files changed, 48 insertions(+), 14 deletions(-)

diff --git a/src/hpa.c b/src/hpa.c
index 6b1ae2ce..fe925ad4 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -378,6 +378,16 @@ static bool
 hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 
+	/*
+	 * Make sure we respect purge interval setting and don't purge
+	 * too frequently.
+	 */
+	uint64_t since_last_purge_ms = shard->central->hooks.ms_since(
+	    &shard->last_purge);
+	if (since_last_purge_ms < shard->opts.min_purge_interval_ms) {
+	     return false;
+	}
+
 	hpdata_t *to_purge = psset_pick_purge(&shard->psset);
 	if (to_purge == NULL) {
 		return false;
diff --git a/test/unit/hpa_background_thread.c b/test/unit/hpa_background_thread.c
index 774ccb4a..e4abb63b 100644
--- a/test/unit/hpa_background_thread.c
+++ b/test/unit/hpa_background_thread.c
@@ -83,7 +83,36 @@ wait_until_thread_is_enabled(unsigned arena_id) {
 }
 
 static void
-expect_purging(unsigned arena_ind, bool expect_deferred) {
+expect_purging(unsigned arena_ind) {
+	size_t empty_ndirty = get_empty_ndirty(arena_ind);
+	expect_zu_eq(0, empty_ndirty, "Expected arena to start unused.");
+
+	void *ptrs[2];
+	ptrs[0] = mallocx(PAGE,
+	    MALLOCX_TCACHE_NONE | MALLOCX_ARENA(arena_ind));
+	ptrs[1] = mallocx(PAGE,
+	    MALLOCX_TCACHE_NONE | MALLOCX_ARENA(arena_ind));
+
+	empty_ndirty = get_empty_ndirty(arena_ind);
+	expect_zu_eq(0, empty_ndirty, "All pages should be active");
+
+	dallocx(ptrs[0], MALLOCX_TCACHE_NONE);
+	expect_true(empty_ndirty == 0 || empty_ndirty == 1,
+	    "Unexpected extra dirty page count: %zu", empty_ndirty);
+
+	/*
+	 * Wait for at least hpa_min_purge_interval_ms to trigger purge on next
+	 * deallocation.
+	 */
+	sleep_for_background_thread_interval();
+
+	dallocx(ptrs[1], MALLOCX_TCACHE_NONE);
+	empty_ndirty = get_empty_ndirty(arena_ind);
+	expect_zu_eq(0, empty_ndirty, "There are should be no dirty pages");
+}
+
+static void
+expect_deferred_purging(unsigned arena_ind) {
 	size_t empty_ndirty;
 
 	empty_ndirty = get_empty_ndirty(arena_ind);
@@ -103,20 +132,15 @@ expect_purging(unsigned arena_ind, bool expect_deferred) {
 		expect_zu_eq(0, empty_ndirty, "All pages should be active");
 		dallocx(ptr, MALLOCX_TCACHE_NONE);
 		empty_ndirty = get_empty_ndirty(arena_ind);
-		if (expect_deferred) {
-			expect_true(empty_ndirty == 0 || empty_ndirty == 1 ||
-			    opt_prof, "Unexpected extra dirty page count: %zu",
-			    empty_ndirty);
-		} else {
-			assert_zu_eq(0, empty_ndirty,
-			    "Saw dirty pages without deferred purging");
-		}
+		expect_true(empty_ndirty == 0 || empty_ndirty == 1 ||
+		    opt_prof, "Unexpected extra dirty page count: %zu",
+		    empty_ndirty);
 		if (empty_ndirty > 0) {
 			observed_dirty_page = true;
 			break;
 		}
 	}
-	expect_b_eq(expect_deferred, observed_dirty_page, "");
+	expect_true(observed_dirty_page, "");
 
 	/*
 	 * Under high concurrency / heavy test load (e.g. using run_test.sh),
@@ -125,7 +149,7 @@ expect_purging(unsigned arena_ind, bool expect_deferred) {
 	 */
 	unsigned retry = 0;
 	while ((empty_ndirty = get_empty_ndirty(arena_ind)) > 0 &&
-	    expect_deferred && (retry++ < 100)) {
+	    (retry++ < 100)) {
 		sleep_for_background_thread_interval();
 	}
 
@@ -144,7 +168,7 @@ TEST_BEGIN(test_hpa_background_thread_purges) {
 	 * Our .sh sets dirty mult to 0, so all dirty pages should get purged
 	 * any time any thread frees.
 	 */
-	expect_purging(arena_ind, /* expect_deferred */ true);
+	expect_deferred_purging(arena_ind);
 }
 TEST_END
 
@@ -158,11 +182,11 @@ TEST_BEGIN(test_hpa_background_thread_enable_disable) {
 	unsigned arena_ind = create_arena();
 
 	set_background_thread_enabled(false);
-	expect_purging(arena_ind, false);
+	expect_purging(arena_ind);
 
 	set_background_thread_enabled(true);
 	wait_until_thread_is_enabled(arena_ind);
-	expect_purging(arena_ind, true);
+	expect_deferred_purging(arena_ind);
 }
 TEST_END
 

From 867c6dd7dc88adb0489b8b815dd70c68807325fc Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Fri, 7 Jun 2024 05:32:24 -0700
Subject: [PATCH 2398/2608] Option to guard `hpa_min_purge_interval_ms` fix

Change in `hpa_min_purge_interval_ms` handling logic is not backward
compatible as it might increase memory usage. Now this logic guarded by
`hpa_strict_min_purge_interval` option.

When `hpa_strict_min_purge_interval` is true, we will purge no more than
`hpa_min_purge_interval_ms`. When `hpa_strict_min_purge_interval` is
false, old purging logic behaviour is preserved.

Long term strategy migrate all users of hpa to new logic and then delete
`hpa_strict_min_purge_interval` option.
---
 include/jemalloc/internal/hpa_opts.h | 12 +++++++++++-
 src/ctl.c                            |  4 ++++
 src/hpa.c                            | 10 ++++++----
 src/jemalloc.c                       |  4 ++++
 src/stats.c                          |  1 +
 test/unit/hpa_background_thread.sh   |  2 +-
 6 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h
index 6e58c86b..93add641 100644
--- a/include/jemalloc/internal/hpa_opts.h
+++ b/include/jemalloc/internal/hpa_opts.h
@@ -49,6 +49,14 @@ struct hpa_shard_opts_s {
 	 * Minimum amount of time between purges.
 	 */
 	uint64_t min_purge_interval_ms;
+
+	/*
+	 * Strictly respect minimum amout of time between purges.
+	 *
+	 * This is an option to provide backward compatibility for staged rollout of
+	 * purging logic fix.
+	 */
+	bool strict_min_purge_interval;
 };
 
 #define HPA_SHARD_OPTS_DEFAULT {					\
@@ -69,7 +77,9 @@ struct hpa_shard_opts_s {
 	/* hugify_delay_ms */						\
 	10 * 1000,							\
 	/* min_purge_interval_ms */					\
-	5 * 1000							\
+	5 * 1000,							\
+	/* strict_min_purge_interval */					\
+	false								\
 }
 
 #endif /* JEMALLOC_INTERNAL_HPA_OPTS_H */
diff --git a/src/ctl.c b/src/ctl.c
index 4347dd2b..62589d77 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -103,6 +103,7 @@ CTL_PROTO(opt_hpa_slab_max_alloc)
 CTL_PROTO(opt_hpa_hugification_threshold)
 CTL_PROTO(opt_hpa_hugify_delay_ms)
 CTL_PROTO(opt_hpa_min_purge_interval_ms)
+CTL_PROTO(opt_hpa_strict_min_purge_interval)
 CTL_PROTO(opt_hpa_dirty_mult)
 CTL_PROTO(opt_hpa_sec_nshards)
 CTL_PROTO(opt_hpa_sec_max_alloc)
@@ -459,6 +460,7 @@ static const ctl_named_node_t opt_node[] = {
 		CTL(opt_hpa_hugification_threshold)},
 	{NAME("hpa_hugify_delay_ms"), CTL(opt_hpa_hugify_delay_ms)},
 	{NAME("hpa_min_purge_interval_ms"), CTL(opt_hpa_min_purge_interval_ms)},
+	{NAME("hpa_strict_min_purge_interval"), CTL(opt_hpa_strict_min_purge_interval)},
 	{NAME("hpa_dirty_mult"), CTL(opt_hpa_dirty_mult)},
 	{NAME("hpa_sec_nshards"),	CTL(opt_hpa_sec_nshards)},
 	{NAME("hpa_sec_max_alloc"),	CTL(opt_hpa_sec_max_alloc)},
@@ -2193,6 +2195,8 @@ CTL_RO_NL_GEN(opt_hpa_hugification_threshold,
 CTL_RO_NL_GEN(opt_hpa_hugify_delay_ms, opt_hpa_opts.hugify_delay_ms, uint64_t)
 CTL_RO_NL_GEN(opt_hpa_min_purge_interval_ms, opt_hpa_opts.min_purge_interval_ms,
     uint64_t)
+CTL_RO_NL_GEN(opt_hpa_strict_min_purge_interval,
+    opt_hpa_opts.strict_min_purge_interval, bool)
 
 /*
  * This will have to change before we publicly document this option; fxp_t and
diff --git a/src/hpa.c b/src/hpa.c
index fe925ad4..49d6b037 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -382,10 +382,12 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	 * Make sure we respect purge interval setting and don't purge
 	 * too frequently.
 	 */
-	uint64_t since_last_purge_ms = shard->central->hooks.ms_since(
-	    &shard->last_purge);
-	if (since_last_purge_ms < shard->opts.min_purge_interval_ms) {
-	     return false;
+	if (shard->opts.strict_min_purge_interval) {
+		uint64_t since_last_purge_ms = shard->central->hooks.ms_since(
+		    &shard->last_purge);
+		if (since_last_purge_ms < shard->opts.min_purge_interval_ms) {
+		     return false;
+		}
 	}
 
 	hpdata_t *to_purge = psset_pick_purge(&shard->psset);
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 89f4b29d..abd7540f 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1554,6 +1554,10 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    "hpa_min_purge_interval_ms", 0, 0,
 			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false);
 
+			CONF_HANDLE_BOOL(
+			    opt_hpa_opts.strict_min_purge_interval,
+			    "hpa_strict_min_purge_interval");
+
 			if (CONF_MATCH("hpa_dirty_mult")) {
 				if (CONF_MATCH_VALUE("-1")) {
 					opt_hpa_opts.dirty_mult = (fxp_t)-1;
diff --git a/src/stats.c b/src/stats.c
index 4df0ae62..726007f5 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1564,6 +1564,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_SIZE_T("hpa_hugification_threshold")
 	OPT_WRITE_UINT64("hpa_hugify_delay_ms")
 	OPT_WRITE_UINT64("hpa_min_purge_interval_ms")
+	OPT_WRITE_BOOL("hpa_strict_min_purge_interval")
 	if (je_mallctl("opt.hpa_dirty_mult", (void *)&u32v, &u32sz, NULL, 0)
 	    == 0) {
 		/*
diff --git a/test/unit/hpa_background_thread.sh b/test/unit/hpa_background_thread.sh
index 33b70e19..666da8fc 100644
--- a/test/unit/hpa_background_thread.sh
+++ b/test/unit/hpa_background_thread.sh
@@ -1,4 +1,4 @@
 #!/bin/sh
 
-export MALLOC_CONF="hpa_dirty_mult:0.001,hpa_hugification_threshold_ratio:1.0,hpa_min_purge_interval_ms:50,hpa_sec_nshards:0"
+export MALLOC_CONF="hpa_dirty_mult:0.001,hpa_hugification_threshold_ratio:1.0,hpa_min_purge_interval_ms:50,hpa_strict_min_purge_interval:true,hpa_sec_nshards:0"
 

From 21bcc0a8d49ab2944ae53c7e43f5c84fc8a34322 Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@meta.com>
Date: Thu, 6 Jun 2024 11:14:40 -0700
Subject: [PATCH 2399/2608] Make JEMALLOC_CXX_THROW definition compatible with
 newer C++ versions

---
 include/jemalloc/jemalloc_macros.h.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/jemalloc_macros.h.in b/include/jemalloc/jemalloc_macros.h.in
index a0679af5..06f47b8a 100644
--- a/include/jemalloc/jemalloc_macros.h.in
+++ b/include/jemalloc/jemalloc_macros.h.in
@@ -52,7 +52,7 @@
 #define MALLCTL_ARENAS_DESTROYED	4097
 
 #if defined(__cplusplus) && defined(JEMALLOC_USE_CXX_THROW)
-#  define JEMALLOC_CXX_THROW throw()
+#  define JEMALLOC_CXX_THROW noexcept (true)
 #else
 #  define JEMALLOC_CXX_THROW
 #endif

From 8477ec9562632b0808874416cb2d11ad6fbf99ea Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@meta.com>
Date: Fri, 21 Jun 2024 15:21:52 -0700
Subject: [PATCH 2400/2608] Set dependent as false for all rtree reads without
 ownership

---
 include/jemalloc/internal/emap.h | 4 ++--
 src/emap.c                       | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 08262f1f..7ac0ae95 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -186,13 +186,13 @@ emap_edata_is_acquired(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	 */
 	EMAP_DECLARE_RTREE_CTX;
 	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, &emap->rtree,
-	    rtree_ctx, (uintptr_t)edata_base_get(edata), /* dependent */ true,
+	    rtree_ctx, (uintptr_t)edata_base_get(edata), /* dependent */ false,
 	    /* init_missing */ false);
 	if (elm == NULL) {
 		return true;
 	}
 	rtree_contents_t contents = rtree_leaf_elm_read(tsdn, &emap->rtree, elm,
-	    /* dependent */ true);
+	    /* dependent */ false);
 	if (contents.edata == NULL ||
 	    contents.metadata.state == extent_state_active ||
 	    edata_state_in_transition(contents.metadata.state)) {
diff --git a/src/emap.c b/src/emap.c
index 9cc95a72..f7d5c25a 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -74,7 +74,7 @@ emap_try_acquire_edata_neighbor_impl(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
 	}
 
 	rtree_contents_t neighbor_contents = rtree_leaf_elm_read(tsdn,
-	    &emap->rtree, elm, /* dependent */ true);
+	    &emap->rtree, elm, /* dependent */ false);
 	if (!extent_can_acquire_neighbor(edata, neighbor_contents, pai,
 	    expected_state, forward, expanding)) {
 		return NULL;

From a1fcbebb186b8909693a2f543030f94e5f448f5f Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Thu, 23 May 2024 12:19:57 -0700
Subject: [PATCH 2401/2608] skip tcache GC for tcache_max unit test

---
 test/unit/tcache_max.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/unit/tcache_max.sh b/test/unit/tcache_max.sh
index 4480d733..0de75e4b 100644
--- a/test/unit/tcache_max.sh
+++ b/test/unit/tcache_max.sh
@@ -1,3 +1,3 @@
 #!/bin/sh
 
-export MALLOC_CONF="tcache_max:1024"
+export MALLOC_CONF="tcache_max:1024,tcache_gc_incr_bytes:939524096"

From c893fcd169fffca1b9d3156c6637a197765b82d0 Mon Sep 17 00:00:00 2001
From: Danny Lin <danny@kdrag0n.dev>
Date: Mon, 10 Jun 2024 15:03:23 -0700
Subject: [PATCH 2402/2608] Change macOS mmap tag to fix conflict with
 CoreMedia
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Tag 101 is assigned to "CoreMedia Capture Data", which makes for confusing output when debugging.

To avoid conflicts, use a tag in the reserved application-specific range from 240–255 (inclusive).

All assigned tags: https://github.com/apple-oss-distributions/xnu/blob/94d3b452840153a99b38a3a9659680b2a006908e/osfmk/mach/vm_statistics.h#L773-L775
---
 src/pages.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pages.c b/src/pages.c
index 8cf2fd9f..981aae9b 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -17,7 +17,7 @@
 #include <sys/bitops.h>	/* ilog2 */
 #endif
 #ifdef JEMALLOC_HAVE_VM_MAKE_TAG
-#define PAGES_FD_TAG VM_MAKE_TAG(101U)
+#define PAGES_FD_TAG VM_MAKE_TAG(254U)
 #else
 #define PAGES_FD_TAG -1
 #endif

From b66f689764e05084f5b995bf2f8d277b70e084fd Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Thu, 18 Jul 2024 07:58:51 -0700
Subject: [PATCH 2403/2608] Emit long string values without truncation

There are few long options (`bin_shards` and `slab_sizes` for example)
when they are specified and we emit statistics value gets truncated.

Moved emitting logic for strings into separate `emitter_emit_str`
function. It will try to emit string same way as before and if value is
too long will fallback emiting rest partially with chunks of `BUF_SIZE`.

Justification for long strings (longer than `BUF_SIZE`) is not
supported.
---
 include/jemalloc/internal/emitter.h | 44 ++++++++++++++++------
 test/unit/emitter.c                 | 58 +++++++++++++++++++++++++----
 2 files changed, 82 insertions(+), 20 deletions(-)

diff --git a/include/jemalloc/internal/emitter.h b/include/jemalloc/internal/emitter.h
index bc12fe92..11153254 100644
--- a/include/jemalloc/internal/emitter.h
+++ b/include/jemalloc/internal/emitter.h
@@ -118,6 +118,37 @@ emitter_gen_fmt(char *out_fmt, size_t out_size, const char *fmt_specifier,
 	return out_fmt;
 }
 
+static inline void
+emitter_emit_str(emitter_t *emitter, emitter_justify_t justify, int width,
+    char *fmt, size_t fmt_size, const char *str) {
+#define BUF_SIZE 256
+	char buf[BUF_SIZE];
+	size_t str_written = malloc_snprintf(buf, BUF_SIZE, "\"%s\"", str);
+	emitter_printf(emitter,
+	    emitter_gen_fmt(fmt, fmt_size, "%s", justify, width), buf);
+	if (str_written < BUF_SIZE) {
+		return;
+	}
+	/*
+	 * There is no support for long string justification at the moment as
+	 * we output them partially with multiple malloc_snprintf calls and
+	 * justufication will work correctly only withing one call.
+	 * Fortunately this is not a big concern as we don't use justufication
+	 * with long strings right now.
+	 *
+	 * We emitted leading quotation mark and trailing '\0', hence need to
+	 * exclude extra characters from str shift.
+	 */
+	str += BUF_SIZE - 2;
+	do {
+		str_written = malloc_snprintf(buf, BUF_SIZE, "%s\"", str);
+		str += str_written >= BUF_SIZE ? BUF_SIZE - 1 : str_written;
+		emitter_printf(emitter,
+		    emitter_gen_fmt(fmt, fmt_size, "%s", justify, width), buf);
+	} while (str_written >= BUF_SIZE);
+#undef BUF_SIZE
+}
+
 /*
  * Internal.  Emit the given value type in the relevant encoding (so that the
  * bool true gets mapped to json "true", but the string "true" gets mapped to
@@ -128,8 +159,6 @@ emitter_gen_fmt(char *out_fmt, size_t out_size, const char *fmt_specifier,
 static inline void
 emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width,
     emitter_type_t value_type, const void *value) {
-	size_t str_written;
-#define BUF_SIZE 256
 #define FMT_SIZE 10
 	/*
 	 * We dynamically generate a format string to emit, to let us use the
@@ -138,7 +167,6 @@ emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width,
 	 * cases.
 	 */
 	char fmt[FMT_SIZE];
-	char buf[BUF_SIZE];
 
 #define EMIT_SIMPLE(type, format)					\
 	emitter_printf(emitter,						\
@@ -167,15 +195,8 @@ emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width,
 		EMIT_SIMPLE(size_t, "%zu")
 		break;
 	case emitter_type_string:
-		str_written = malloc_snprintf(buf, BUF_SIZE, "\"%s\"",
+		emitter_emit_str(emitter, justify, width, fmt, FMT_SIZE,
 		    *(const char *const *)value);
-		/*
-		 * We control the strings we output; we shouldn't get anything
-		 * anywhere near the fmt size.
-		 */
-		assert(str_written < BUF_SIZE);
-		emitter_printf(emitter,
-		    emitter_gen_fmt(fmt, FMT_SIZE, "%s", justify, width), buf);
 		break;
 	case emitter_type_uint32:
 		EMIT_SIMPLE(uint32_t, "%" FMTu32)
@@ -189,7 +210,6 @@ emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width,
 	default:
 		unreachable();
 	}
-#undef BUF_SIZE
 #undef FMT_SIZE
 }
 
diff --git a/test/unit/emitter.c b/test/unit/emitter.c
index ef8f9ff5..af0da90d 100644
--- a/test/unit/emitter.c
+++ b/test/unit/emitter.c
@@ -222,6 +222,17 @@ emit_types(emitter_t *emitter) {
 	ssize_t zd = -456;
 	size_t zu = 456;
 	const char *str = "string";
+	const char *long_str =
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz";
 	uint32_t u32 = 789;
 	uint64_t u64 = 10000000000ULL;
 
@@ -232,8 +243,9 @@ emit_types(emitter_t *emitter) {
 	emitter_kv(emitter, "k4", "K4", emitter_type_ssize, &zd);
 	emitter_kv(emitter, "k5", "K5", emitter_type_size, &zu);
 	emitter_kv(emitter, "k6", "K6", emitter_type_string, &str);
-	emitter_kv(emitter, "k7", "K7", emitter_type_uint32, &u32);
-	emitter_kv(emitter, "k8", "K8", emitter_type_uint64, &u64);
+	emitter_kv(emitter, "k7", "K7", emitter_type_string, &long_str);
+	emitter_kv(emitter, "k8", "K8", emitter_type_uint32, &u32);
+	emitter_kv(emitter, "k9", "K9", emitter_type_uint64, &u64);
 	/*
 	 * We don't test the title type, since it's only used for tables.  It's
 	 * tested in the emitter_table_row tests.
@@ -249,8 +261,18 @@ static const char *types_json =
 "\t\"k4\": -456,\n"
 "\t\"k5\": 456,\n"
 "\t\"k6\": \"string\",\n"
-"\t\"k7\": 789,\n"
-"\t\"k8\": 10000000000\n"
+"\t\"k7\": \"abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz\",\n"
+"\t\"k8\": 789,\n"
+"\t\"k9\": 10000000000\n"
 "}\n";
 static const char *types_json_compact =
 "{"
@@ -260,8 +282,18 @@ static const char *types_json_compact =
 	"\"k4\":-456,"
 	"\"k5\":456,"
 	"\"k6\":\"string\","
-	"\"k7\":789,"
-	"\"k8\":10000000000"
+	"\"k7\":\"abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz "
+	    "abcdefghijklmnopqrstuvwxyz\","
+	"\"k8\":789,"
+	"\"k9\":10000000000"
 "}";
 static const char *types_table =
 "K1: false\n"
@@ -270,8 +302,18 @@ static const char *types_table =
 "K4: -456\n"
 "K5: 456\n"
 "K6: \"string\"\n"
-"K7: 789\n"
-"K8: 10000000000\n";
+"K7: \"abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz\"\n"
+"K8: 789\n"
+"K9: 10000000000\n";
 
 static void
 emit_modal(emitter_t *emitter) {

From bc32ddff2da6e58df90b1762f17519a2c04b26b0 Mon Sep 17 00:00:00 2001
From: Nathan Slingerland <slinger@fb.com>
Date: Mon, 29 Jul 2024 11:28:41 -0700
Subject: [PATCH 2404/2608] Add usize to prof_sample_hook_t

---
 include/jemalloc/internal/prof_hook.h | 4 ++--
 src/prof.c                            | 2 +-
 test/unit/prof_hook.c                 | 9 +++++++--
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/include/jemalloc/internal/prof_hook.h b/include/jemalloc/internal/prof_hook.h
index 3c5ff8bf..087dadc6 100644
--- a/include/jemalloc/internal/prof_hook.h
+++ b/include/jemalloc/internal/prof_hook.h
@@ -20,8 +20,8 @@ typedef void (*prof_backtrace_hook_t)(void **, unsigned *, unsigned);
  */
 typedef void (*prof_dump_hook_t)(const char *filename);
 
-/* ptr, size, backtrace vector, backtrace vector length */
-typedef void (*prof_sample_hook_t)(const void *, size_t, void **, unsigned);
+/* ptr, size, backtrace vector, backtrace vector length, usize */
+typedef void (*prof_sample_hook_t)(const void *ptr, size_t size, void **backtrace, unsigned backtrace_length, size_t usize);
 
 /* ptr, size */
 typedef void (*prof_sample_free_hook_t)(const void *, size_t);
diff --git a/src/prof.c b/src/prof.c
index 6ae7f768..8fdc6f71 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -166,7 +166,7 @@ prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
 	if (prof_sample_hook != NULL) {
 		prof_bt_t *bt = &tctx->gctx->bt;
 		pre_reentrancy(tsd, NULL);
-		prof_sample_hook(ptr, size, bt->vec, bt->len);
+		prof_sample_hook(ptr, size, bt->vec, bt->len, usize);
 		post_reentrancy(tsd);
 	}
 }
diff --git a/test/unit/prof_hook.c b/test/unit/prof_hook.c
index a48b237b..fd2871e5 100644
--- a/test/unit/prof_hook.c
+++ b/test/unit/prof_hook.c
@@ -16,6 +16,7 @@ bool mock_prof_sample_free_hook_called = false;
 
 void *sampled_ptr = NULL;
 size_t sampled_ptr_sz = 0;
+size_t sampled_ptr_usz = 0;
 void *free_sampled_ptr = NULL;
 size_t free_sampled_ptr_sz = 0;
 
@@ -60,10 +61,11 @@ mock_dump_hook(const char *filename) {
 }
 
 void
-mock_prof_sample_hook(const void *ptr, size_t sz, void **vec, unsigned len) {
+mock_prof_sample_hook(const void *ptr, size_t sz, void **vec, unsigned len, size_t usz) {
 	mock_prof_sample_hook_called = true;
 	sampled_ptr = (void *)ptr;
 	sampled_ptr_sz = sz;
+	sampled_ptr_usz = usz;
 	for (unsigned i = 0; i < len; i++) {
 		expect_ptr_not_null((void **)vec[i],
 		    "Backtrace should not contain NULL");
@@ -244,6 +246,7 @@ check_prof_sample_hooks(bool sample_hook_set, bool sample_free_hook_set) {
 	    "Should not have called prof_sample_free hook");
 	expect_ptr_null(sampled_ptr, "Unexpected sampled ptr");
 	expect_zu_eq(sampled_ptr_sz, 0, "Unexpected sampled ptr size");
+	expect_zu_eq(sampled_ptr_usz, 0, "Unexpected sampled ptr usize");
 	expect_ptr_null(free_sampled_ptr, "Unexpected free sampled ptr");
 	expect_zu_eq(free_sampled_ptr_sz, 0,
 	    "Unexpected free sampled ptr size");
@@ -258,6 +261,7 @@ check_prof_sample_hooks(bool sample_hook_set, bool sample_free_hook_set) {
 	    "Unexpected non NULL default hook");
 
 	size_t alloc_sz = 10;
+	size_t alloc_usz = 16;
 	void *p = mallocx(alloc_sz, 0);
 	expect_ptr_not_null(p, "Failed to allocate");
 	expect_true(mock_prof_sample_hook_called == sample_hook_set,
@@ -266,6 +270,7 @@ check_prof_sample_hooks(bool sample_hook_set, bool sample_free_hook_set) {
 		expect_ptr_eq(p, sampled_ptr, "Unexpected sampled ptr");
 		expect_zu_eq(alloc_sz, sampled_ptr_sz,
 		    "Unexpected sampled usize");
+		expect_zu_eq(alloc_usz, sampled_ptr_usz, "Unexpected sampled usize");
 	}
 
 	dallocx(p, 0);
@@ -278,7 +283,7 @@ check_prof_sample_hooks(bool sample_hook_set, bool sample_free_hook_set) {
 	}
 
 	sampled_ptr = free_sampled_ptr = NULL;
-	sampled_ptr_sz = free_sampled_ptr_sz = 0;
+	sampled_ptr_sz = sampled_ptr_usz = free_sampled_ptr_sz = 0;
 	mock_prof_sample_hook_called = false;
 	mock_prof_sample_free_hook_called = false;
 }

From 8dc97b11089be6d58a52009ea3da610bf90331d3 Mon Sep 17 00:00:00 2001
From: Burton Li <pul@microsoft.com>
Date: Mon, 1 Jul 2024 15:49:58 -0700
Subject: [PATCH 2405/2608] Fix NSTIME_MONOTONIC for win32 implementation

---
 src/nstime.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/nstime.c b/src/nstime.c
index 7fb9100e..72f04227 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -172,7 +172,7 @@ nstime_ns_since(const nstime_t *past) {
 }
 
 #ifdef _WIN32
-#  define NSTIME_MONOTONIC true
+#  define NSTIME_MONOTONIC false
 static void
 nstime_get(nstime_t *time) {
 	FILETIME ft;

From 48f66cf4a22af3b380d4c049f79fb7e820eba3d3 Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Thu, 18 Jul 2024 15:36:08 -0700
Subject: [PATCH 2406/2608] add a size check when declare a stack array to be
 less than 2048 bytes

---
 include/jemalloc/internal/jemalloc_internal_types.h | 8 ++++++--
 src/ctl.c                                           | 4 ++--
 src/stats.c                                         | 2 +-
 test/unit/hash.c                                    | 4 ++--
 4 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_types.h b/include/jemalloc/internal/jemalloc_internal_types.h
index 4ab5a0cf..6a81f3cd 100644
--- a/include/jemalloc/internal/jemalloc_internal_types.h
+++ b/include/jemalloc/internal/jemalloc_internal_types.h
@@ -135,10 +135,14 @@ typedef enum malloc_init_e malloc_init_t;
 #      include <stdlib.h>
 #    endif
 #  endif
-#  define VARIABLE_ARRAY(type, name, count) \
+#  define VARIABLE_ARRAY_UNSAFE(type, name, count) \
 	type *name = alloca(sizeof(type) * (count))
 #else
-#  define VARIABLE_ARRAY(type, name, count) type name[(count)]
+#  define VARIABLE_ARRAY_UNSAFE(type, name, count) type name[(count)]
 #endif
+#define VARIABLE_ARRAY_SIZE_MAX	2048
+#define VARIABLE_ARRAY(type, name, count)	\
+	assert(sizeof(type) * (count) <= VARIABLE_ARRAY_SIZE_MAX);	\
+	VARIABLE_ARRAY_UNSAFE(type, name, count)
 
 #endif /* JEMALLOC_INTERNAL_TYPES_H */
diff --git a/src/ctl.c b/src/ctl.c
index 62589d77..ebe5c61c 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -1379,7 +1379,7 @@ ctl_refresh(tsdn_t *tsdn) {
 	const unsigned narenas = ctl_arenas->narenas;
 	assert(narenas > 0);
 	ctl_arena_t *ctl_sarena = arenas_i(MALLCTL_ARENAS_ALL);
-	VARIABLE_ARRAY(arena_t *, tarenas, narenas);
+	VARIABLE_ARRAY_UNSAFE(arena_t *, tarenas, narenas);
 
 	/*
 	 * Clear sum stats, since they will be merged into by
@@ -2726,7 +2726,7 @@ arena_i_decay(tsdn_t *tsdn, unsigned arena_ind, bool all) {
 		 */
 		if (arena_ind == MALLCTL_ARENAS_ALL || arena_ind == narenas) {
 			unsigned i;
-			VARIABLE_ARRAY(arena_t *, tarenas, narenas);
+			VARIABLE_ARRAY_UNSAFE(arena_t *, tarenas, narenas);
 
 			for (i = 0; i < narenas; i++) {
 				tarenas[i] = arena_get(tsdn, i, false);
diff --git a/src/stats.c b/src/stats.c
index 726007f5..fbfacabf 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1896,7 +1896,7 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 		size_t mib[3];
 		size_t miblen = sizeof(mib) / sizeof(size_t);
 		size_t sz;
-		VARIABLE_ARRAY(bool, initialized, narenas);
+		VARIABLE_ARRAY_UNSAFE(bool, initialized, narenas);
 		bool destroyed_initialized;
 		unsigned i, ninitialized;
 
diff --git a/test/unit/hash.c b/test/unit/hash.c
index 49f08238..17c66ec6 100644
--- a/test/unit/hash.c
+++ b/test/unit/hash.c
@@ -61,8 +61,8 @@ static void
 hash_variant_verify_key(hash_variant_t variant, uint8_t *key) {
 	const int hashbytes = hash_variant_bits(variant) / 8;
 	const int hashes_size = hashbytes * 256;
-	VARIABLE_ARRAY(uint8_t, hashes, hashes_size);
-	VARIABLE_ARRAY(uint8_t, final, hashbytes);
+	VARIABLE_ARRAY_UNSAFE(uint8_t, hashes, hashes_size);
+	VARIABLE_ARRAY_UNSAFE(uint8_t, final, hashbytes);
 	unsigned i;
 	uint32_t computed, expected;
 

From 47c9bcd402110be3f64517ad9366d1cfaa751d48 Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Thu, 18 Jul 2024 17:33:07 -0700
Subject: [PATCH 2407/2608] Use a for-loop to fulfill flush requests that are
 larger than CACHE_BIN_NFLUSH_BATCH_MAX items

---
 include/jemalloc/internal/cache_bin.h |  8 +++++
 src/tcache.c                          | 47 ++++++++++++++++++---------
 2 files changed, 39 insertions(+), 16 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index a26c3671..a7a5e40e 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -600,6 +600,14 @@ cache_bin_nitems_get_remote(cache_bin_t *bin, cache_bin_sz_t *ncached,
 	 */
 }
 
+/*
+ * Limit how many items can be flushed in a batch (Which is the upper bound
+ * for the nflush parameter in tcache_bin_flush_impl()).
+ * This is to avoid stack overflow when we do batch edata look up, which
+ * reserves a nflush * sizeof(emap_batch_lookup_result_t) stack variable.
+ */
+#define CACHE_BIN_NFLUSH_BATCH_MAX (VARIABLE_ARRAY_SIZE_MAX >> LG_SIZEOF_PTR)
+
 /*
  * Filling and flushing are done in batch, on arrays of void *s.  For filling,
  * the arrays go forward, and can be accessed with ordinary array arithmetic.
diff --git a/src/tcache.c b/src/tcache.c
index 03ec5136..4144524d 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -712,22 +712,37 @@ tcache_bin_flush_impl_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 JEMALLOC_ALWAYS_INLINE void
 tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
     szind_t binind, cache_bin_ptr_array_t *ptrs, unsigned nflush, bool small) {
-	/*
-	 * The small/large flush logic is very similar; you might conclude that
-	 * it's a good opportunity to share code.  We've tried this, and by and
-	 * large found this to obscure more than it helps; there are so many
-	 * fiddly bits around things like stats handling, precisely when and
-	 * which mutexes are acquired, etc., that almost all code ends up being
-	 * gated behind 'if (small) { ... } else { ... }'.  Even though the
-	 * '...' is morally equivalent, the code itself needs slight tweaks.
-	 */
-	if (small) {
-		tcache_bin_flush_impl_small(tsd, tcache, cache_bin, binind,
-		    ptrs, nflush);
-	} else {
-		tcache_bin_flush_impl_large(tsd, tcache, cache_bin, binind,
-		    ptrs, nflush);
-	}
+	assert(ptrs != NULL && ptrs->ptr != NULL);
+	unsigned nflush_batch, nflushed = 0;
+	cache_bin_ptr_array_t ptrs_batch;
+	do {
+		nflush_batch = nflush - nflushed;
+		if (nflush_batch > CACHE_BIN_NFLUSH_BATCH_MAX) {
+			nflush_batch = CACHE_BIN_NFLUSH_BATCH_MAX;
+		}
+		assert(nflush_batch <= CACHE_BIN_NFLUSH_BATCH_MAX);
+		(&ptrs_batch)->n = (cache_bin_sz_t)nflush_batch;
+		(&ptrs_batch)->ptr = ptrs->ptr + nflushed;
+		/*
+		 * The small/large flush logic is very similar; you might conclude that
+		 * it's a good opportunity to share code.  We've tried this, and by and
+		 * large found this to obscure more than it helps; there are so many
+		 * fiddly bits around things like stats handling, precisely when and
+		 * which mutexes are acquired, etc., that almost all code ends up being
+		 * gated behind 'if (small) { ... } else { ... }'.  Even though the
+		 * '...' is morally equivalent, the code itself needs slight tweaks.
+		 */
+		if (small) {
+			tcache_bin_flush_impl_small(tsd, tcache, cache_bin, binind,
+			    &ptrs_batch, nflush_batch);
+		} else {
+			tcache_bin_flush_impl_large(tsd, tcache, cache_bin, binind,
+			    &ptrs_batch, nflush_batch);
+		}
+		nflushed += nflush_batch;
+	} while (nflushed < nflush);
+	assert(nflush == nflushed);
+	assert((ptrs->ptr + nflush) == ((&ptrs_batch)->ptr + nflush_batch));
 }
 
 JEMALLOC_ALWAYS_INLINE void

From 8fefabd3a49d1f090fe677722f1e2a66f162237a Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Thu, 18 Jul 2024 17:49:21 -0700
Subject: [PATCH 2408/2608] increase the ncached_max in fill_flush test case to
 1024

---
 test/stress/fill_flush.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/stress/fill_flush.c b/test/stress/fill_flush.c
index a2db044d..546bcc0b 100644
--- a/test/stress/fill_flush.c
+++ b/test/stress/fill_flush.c
@@ -5,6 +5,7 @@
 #define LARGE_ALLOC_SIZE SC_LARGE_MINCLASS
 #define NALLOCS 1000
 
+const char *malloc_conf = "tcache_ncached_max:8-128:1024";
 /*
  * We make this volatile so the 1-at-a-time variants can't leave the allocation
  * in a register, just to try to get the cache behavior closer.

From a25b9b8ba91881964be3083db349991bbbbf1661 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Amaury=20S=C3=A9chet?= <deadalnix@gmail.com>
Date: Wed, 24 Jul 2024 21:16:11 +0000
Subject: [PATCH 2409/2608] Simplify the logic when bumping lg_fill_div.

---
 src/tcache.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tcache.c b/src/tcache.c
index 4144524d..b90907ad 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -166,7 +166,7 @@ tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	 * the fill count is always at least 1.
 	 */
 	if ((cache_bin_ncached_max_get(cache_bin) >>
-	    (tcache_slow->lg_fill_div[szind] + 1)) >= 1) {
+	     tcache_slow->lg_fill_div[szind]) > 1) {
 		tcache_slow->lg_fill_div[szind]++;
 	}
 }

From 0a9f51d0d8d2a8135cc853be7ed771230854ede6 Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Fri, 2 Aug 2024 09:35:44 -0700
Subject: [PATCH 2410/2608] Simplify `hpa_shard_maybe_do_deferred_work`

It doesn't make much sense to repeat purging once we done with
hugification, because we can de-hugify pages that were hugified just
moment ago for no good reason. Let them wait next deferred work phase
instead. And if they still meeting purging conditions then, purge them.
---
 src/hpa.c | 50 +++++++++++++++++++++++++-------------------------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/src/hpa.c b/src/hpa.c
index 49d6b037..27fc1589 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -532,41 +532,41 @@ hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard,
 	if (!forced && shard->opts.deferral_allowed) {
 		return;
 	}
+
 	/*
 	 * If we're on a background thread, do work so long as there's work to
 	 * be done.  Otherwise, bound latency to not be *too* bad by doing at
 	 * most a small fixed number of operations.
 	 */
-	bool hugified = false;
-	bool purged = false;
 	size_t max_ops = (forced ? (size_t)-1 : 16);
 	size_t nops = 0;
-	do {
-		/*
-		 * Always purge before hugifying, to make sure we get some
-		 * ability to hit our quiescence targets.
-		 */
-		purged = false;
-		while (hpa_should_purge(tsdn, shard) && nops < max_ops) {
-			purged = hpa_try_purge(tsdn, shard);
-			if (!purged) {
-				/*
-				 * It is fine if we couldn't purge as sometimes
-				 * we try to purge just to unblock
-				 * hugification, but there is maybe no dirty
-				 * pages at all at the moment.
-				 */
-				break;
-			}
-			nops++;
-		}
-		hugified = hpa_try_hugify(tsdn, shard);
-		if (hugified) {
-			nops++;
+
+	/*
+	 * Always purge before hugifying, to make sure we get some
+	 * ability to hit our quiescence targets.
+	 */
+	while (hpa_should_purge(tsdn, shard) && nops < max_ops) {
+		if (!hpa_try_purge(tsdn, shard)) {
+			/*
+			 * It is fine if we couldn't purge as sometimes
+			 * we try to purge just to unblock
+			 * hugification, but there is maybe no dirty
+			 * pages at all at the moment.
+			 */
+			break;
 		}
 		malloc_mutex_assert_owner(tsdn, &shard->mtx);
+		nops++;
+	}
+
+	/*
+	 * Try to hugify at least once, even if we out of operations to make at
+	 * least some progress on hugification even at worst case.
+	 */
+	while (hpa_try_hugify(tsdn, shard) && nops < max_ops) {
 		malloc_mutex_assert_owner(tsdn, &shard->mtx);
-	} while ((hugified || purged) && nops < max_ops);
+		nops++;
+	}
 }
 
 static edata_t *

From 143f458188d2d5a02418e7f72e56152dab118786 Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Tue, 6 Aug 2024 08:37:04 -0700
Subject: [PATCH 2411/2608] Fix `hpa_strict_min_purge_interval` option logic

We update `shard->last_purge` on each call of `hpa_try_purge` if we
purged something. This means, when `hpa_strict_min_purge_interval`
option is set only one slab will be purged, because on the next
call condition for too frequent purge protection
`since_last_purge_ms < shard->opts.min_purge_interval_ms` will always
be true. This is not an intended behaviour.

Instead, we need to check `min_purge_interval_ms` once and purge as many
pages as needed to satisfy requirements for `hpa_dirty_mult` option.

Make possible to count number of actions performed in unit tests (purge,
hugify, dehugify) instead of binary: called/not called. Extended current
unit tests with cases where we need to purge more than one page for a
purge phase.
---
 src/hpa.c       |  54 +++++++------
 test/unit/hpa.c | 197 +++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 209 insertions(+), 42 deletions(-)

diff --git a/src/hpa.c b/src/hpa.c
index 27fc1589..d3b9c6c2 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -378,18 +378,6 @@ static bool
 hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 
-	/*
-	 * Make sure we respect purge interval setting and don't purge
-	 * too frequently.
-	 */
-	if (shard->opts.strict_min_purge_interval) {
-		uint64_t since_last_purge_ms = shard->central->hooks.ms_since(
-		    &shard->last_purge);
-		if (since_last_purge_ms < shard->opts.min_purge_interval_ms) {
-		     return false;
-		}
-	}
-
 	hpdata_t *to_purge = psset_pick_purge(&shard->psset);
 	if (to_purge == NULL) {
 		return false;
@@ -521,6 +509,19 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 	return true;
 }
 
+static bool
+hpa_min_purge_interval_passed(tsdn_t *tsdn, hpa_shard_t *shard) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	if (shard->opts.strict_min_purge_interval) {
+		uint64_t since_last_purge_ms = shard->central->hooks.ms_since(
+		    &shard->last_purge);
+		if (since_last_purge_ms < shard->opts.min_purge_interval_ms) {
+		     return false;
+		}
+	}
+	return true;
+}
+
 /*
  * Execution of deferred work is forced if it's triggered by an explicit
  * hpa_shard_do_deferred_work() call.
@@ -545,18 +546,25 @@ hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard,
 	 * Always purge before hugifying, to make sure we get some
 	 * ability to hit our quiescence targets.
 	 */
-	while (hpa_should_purge(tsdn, shard) && nops < max_ops) {
-		if (!hpa_try_purge(tsdn, shard)) {
-			/*
-			 * It is fine if we couldn't purge as sometimes
-			 * we try to purge just to unblock
-			 * hugification, but there is maybe no dirty
-			 * pages at all at the moment.
-			 */
-			break;
+
+	/*
+	 * Make sure we respect purge interval setting and don't purge
+	 * too frequently.
+	 */
+	if (hpa_min_purge_interval_passed(tsdn, shard)) {
+		while (hpa_should_purge(tsdn, shard) && nops < max_ops) {
+			if (!hpa_try_purge(tsdn, shard)) {
+				/*
+				 * It is fine if we couldn't purge as sometimes
+				 * we try to purge just to unblock
+				 * hugification, but there is maybe no dirty
+				 * pages at all at the moment.
+				 */
+				break;
+			}
+			malloc_mutex_assert_owner(tsdn, &shard->mtx);
+			nops++;
 		}
-		malloc_mutex_assert_owner(tsdn, &shard->mtx);
-		nops++;
 	}
 
 	/*
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index a8a26e13..2c11e0a8 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -34,6 +34,8 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
 	10 * 1000,
 	/* min_purge_interval_ms */
 	5 * 1000,
+	/* strict_min_purge_interval */
+	false
 };
 
 static hpa_shard_opts_t test_hpa_shard_opts_purge = {
@@ -49,6 +51,8 @@ static hpa_shard_opts_t test_hpa_shard_opts_purge = {
 	0,
 	/* min_purge_interval_ms */
 	5 * 1000,
+	/* strict_min_purge_interval */
+	false
 };
 
 static hpa_shard_t *
@@ -358,24 +362,24 @@ defer_test_unmap(void *ptr, size_t size) {
 	(void)size;
 }
 
-static bool defer_purge_called = false;
+static size_t ndefer_purge_calls = 0;
 static void
 defer_test_purge(void *ptr, size_t size) {
 	(void)ptr;
 	(void)size;
-	defer_purge_called = true;
+	++ndefer_purge_calls;
 }
 
-static bool defer_hugify_called = false;
+static size_t ndefer_hugify_calls = 0;
 static void
 defer_test_hugify(void *ptr, size_t size) {
-	defer_hugify_called = true;
+	++ndefer_hugify_calls;
 }
 
-static bool defer_dehugify_called = false;
+static size_t ndefer_dehugify_calls = 0;
 static void
 defer_test_dehugify(void *ptr, size_t size) {
-	defer_dehugify_called = true;
+	++ndefer_dehugify_calls;
 }
 
 static nstime_t defer_curtime;
@@ -417,14 +421,14 @@ TEST_BEGIN(test_defer_time) {
 		expect_ptr_not_null(edatas[i], "Unexpected null edata");
 	}
 	hpa_shard_do_deferred_work(tsdn, shard);
-	expect_false(defer_hugify_called, "Hugified too early");
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
 
 	/* Hugification delay is set to 10 seconds in options. */
 	nstime_init2(&defer_curtime, 11, 0);
 	hpa_shard_do_deferred_work(tsdn, shard);
-	expect_true(defer_hugify_called, "Failed to hugify");
+	expect_zu_eq(1, ndefer_hugify_calls, "Failed to hugify");
 
-	defer_hugify_called = false;
+	ndefer_hugify_calls = 0;
 
 	/* Purge.  Recall that dirty_mult is .25. */
 	for (int i = 0; i < (int)HUGEPAGE_PAGES / 2; i++) {
@@ -434,12 +438,12 @@ TEST_BEGIN(test_defer_time) {
 
 	hpa_shard_do_deferred_work(tsdn, shard);
 
-	expect_false(defer_hugify_called, "Hugified too early");
-	expect_true(defer_dehugify_called, "Should have dehugified");
-	expect_true(defer_purge_called, "Should have purged");
-	defer_hugify_called = false;
-	defer_dehugify_called = false;
-	defer_purge_called = false;
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(1, ndefer_dehugify_calls, "Should have dehugified");
+	expect_zu_eq(1, ndefer_purge_calls, "Should have purged");
+	ndefer_hugify_calls = 0;
+	ndefer_dehugify_calls = 0;
+	ndefer_purge_calls = 0;
 
 	/*
 	 * Refill the page.  We now meet the hugification threshold; we should
@@ -459,9 +463,10 @@ TEST_BEGIN(test_defer_time) {
 	/* Wait for the threshold again. */
 	nstime_init2(&defer_curtime, 22, 0);
 	hpa_shard_do_deferred_work(tsdn, shard);
-	expect_true(defer_hugify_called, "Hugified too early");
-	expect_false(defer_dehugify_called, "Unexpected dehugify");
-	expect_false(defer_purge_called, "Unexpected purge");
+	expect_zu_eq(1, ndefer_hugify_calls, "Failed to hugify");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Unexpected dehugify");
+	expect_zu_eq(0, ndefer_purge_calls, "Unexpected purge");
+	ndefer_hugify_calls = 0;
 
 	destroy_test_data(shard);
 }
@@ -497,6 +502,157 @@ TEST_BEGIN(test_purge_no_infinite_loop) {
 }
 TEST_END
 
+TEST_BEGIN(test_strict_no_min_purge_interval) {
+	test_skip_if(!hpa_supported());
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+
+	bool deferred_work_generated = false;
+
+	nstime_init(&defer_curtime, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+
+	edata_t *edata = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+	    false, false, &deferred_work_generated);
+	expect_ptr_not_null(edata, "Unexpected null edata");
+	pai_dalloc(tsdn, &shard->pai, edata, &deferred_work_generated);
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	/*
+	 * Strict minimum purge interval is not set, we should purge as long as
+	 * we have dirty pages.
+	 */
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+	expect_zu_eq(1, ndefer_purge_calls, "Expect purge");
+	ndefer_purge_calls = 0;
+
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_strict_min_purge_interval) {
+	test_skip_if(!hpa_supported());
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+	opts.strict_min_purge_interval = true;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+
+	bool deferred_work_generated = false;
+
+	nstime_init(&defer_curtime, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+
+	edata_t *edata = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+	    false, false, &deferred_work_generated);
+	expect_ptr_not_null(edata, "Unexpected null edata");
+	pai_dalloc(tsdn, &shard->pai, edata, &deferred_work_generated);
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	/*
+	 * We have a slab with dirty page and no active pages, but
+	 * opt.min_purge_interval_ms didn't pass yet.
+	 */
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+	expect_zu_eq(0, ndefer_purge_calls, "Purged too early");
+
+	/* Minumum purge interval is set to 5 seconds in options. */
+	nstime_init2(&defer_curtime, 6, 0);
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	/* Now we should purge, but nothing else. */
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+	expect_zu_eq(1, ndefer_purge_calls, "Expect purge");
+	ndefer_purge_calls = 0;
+
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_purge) {
+	test_skip_if(!hpa_supported());
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+
+	bool deferred_work_generated = false;
+
+	nstime_init(&defer_curtime, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum {NALLOCS = 8 * HUGEPAGE_PAGES};
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	/* Deallocate 3 hugepages out of 8. */
+	for (int i = 0; i < 3 * (int)HUGEPAGE_PAGES; i++) {
+		pai_dalloc(tsdn, &shard->pai, edatas[i],
+		    &deferred_work_generated);
+	}
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+	/*
+	 * Expect only 2 purges, because opt.dirty_mult is set to 0.25 and we still
+	 * have 5 active hugepages (1 / 5 = 0.2 < 0.25).
+	 */
+	expect_zu_eq(2, ndefer_purge_calls, "Expect purges");
+	ndefer_purge_calls = 0;
+
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+	/*
+	 * We still have completely dirty hugepage, but we are below
+	 * opt.dirty_mult.
+	 */
+	expect_zu_eq(0, ndefer_purge_calls, "Purged too early");
+	ndefer_purge_calls = 0;
+
+	destroy_test_data(shard);
+}
+TEST_END
+
 int
 main(void) {
 	/*
@@ -516,5 +672,8 @@ main(void) {
 	    test_stress,
 	    test_alloc_dalloc_batch,
 	    test_defer_time,
-	    test_purge_no_infinite_loop);
+	    test_purge_no_infinite_loop,
+	    test_strict_no_min_purge_interval,
+	    test_strict_min_purge_interval,
+	    test_purge);
 }

From aaa29003ab90b574c29dc4c0c331085c07f1c1fd Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Tue, 6 Aug 2024 08:47:57 -0700
Subject: [PATCH 2412/2608] Limit maximum number of purged slabs with option

Option `experimental_hpa_max_purge_nhp` introduced for backward
compatibility reasons: to make it possible to have behaviour similar
to buggy `hpa_strict_min_purge_interval` implementation.

When `experimental_hpa_max_purge_nhp` is set to -1, there is no limit
to number of slabs we'll purge on each iteration. Otherwise, we'll purge
no more than `experimental_hpa_max_purge_nhp` hugepages (slabs). This in
turn means we might not purge enough dirty pages to satisfy
`hpa_dirty_mult` requirement.

Combination of `hpa_dirty_mult`, `experimental_hpa_max_purge_nhp` and
`hpa_strict_min_purge_interval` options allows us to have steady rate of
pages returned back to the system. This provides a strickier latency
guarantees as number of `madvise` calls is bounded (and hence number of
TLB shootdowns is limited) in exchange to weaker memory usage
guarantees.
---
 include/jemalloc/internal/hpa_opts.h |  9 +++-
 src/ctl.c                            |  8 ++-
 src/hpa.c                            | 17 ++++++-
 src/jemalloc.c                       |  4 ++
 src/stats.c                          |  1 +
 test/unit/hpa.c                      | 75 ++++++++++++++++++++++++++--
 test/unit/mallctl.c                  |  1 +
 7 files changed, 109 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h
index 93add641..bf3de0e9 100644
--- a/include/jemalloc/internal/hpa_opts.h
+++ b/include/jemalloc/internal/hpa_opts.h
@@ -57,6 +57,11 @@ struct hpa_shard_opts_s {
 	 * purging logic fix.
 	 */
 	bool strict_min_purge_interval;
+
+	/*
+	 * Maximum number of hugepages to purge on each purging attempt.
+	 */
+	ssize_t experimental_max_purge_nhp;
 };
 
 #define HPA_SHARD_OPTS_DEFAULT {					\
@@ -79,7 +84,9 @@ struct hpa_shard_opts_s {
 	/* min_purge_interval_ms */					\
 	5 * 1000,							\
 	/* strict_min_purge_interval */					\
-	false								\
+	false,								\
+	/* experimental_max_purge_nhp */				\
+	-1								\
 }
 
 #endif /* JEMALLOC_INTERNAL_HPA_OPTS_H */
diff --git a/src/ctl.c b/src/ctl.c
index ebe5c61c..a01f643e 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -104,6 +104,7 @@ CTL_PROTO(opt_hpa_hugification_threshold)
 CTL_PROTO(opt_hpa_hugify_delay_ms)
 CTL_PROTO(opt_hpa_min_purge_interval_ms)
 CTL_PROTO(opt_hpa_strict_min_purge_interval)
+CTL_PROTO(opt_experimental_hpa_max_purge_nhp)
 CTL_PROTO(opt_hpa_dirty_mult)
 CTL_PROTO(opt_hpa_sec_nshards)
 CTL_PROTO(opt_hpa_sec_max_alloc)
@@ -460,7 +461,10 @@ static const ctl_named_node_t opt_node[] = {
 		CTL(opt_hpa_hugification_threshold)},
 	{NAME("hpa_hugify_delay_ms"), CTL(opt_hpa_hugify_delay_ms)},
 	{NAME("hpa_min_purge_interval_ms"), CTL(opt_hpa_min_purge_interval_ms)},
-	{NAME("hpa_strict_min_purge_interval"), CTL(opt_hpa_strict_min_purge_interval)},
+	{NAME("hpa_strict_min_purge_interval"),
+		CTL(opt_hpa_strict_min_purge_interval)},
+	{NAME("experimental_hpa_max_purge_nhp"),
+		CTL(opt_experimental_hpa_max_purge_nhp)},
 	{NAME("hpa_dirty_mult"), CTL(opt_hpa_dirty_mult)},
 	{NAME("hpa_sec_nshards"),	CTL(opt_hpa_sec_nshards)},
 	{NAME("hpa_sec_max_alloc"),	CTL(opt_hpa_sec_max_alloc)},
@@ -2197,6 +2201,8 @@ CTL_RO_NL_GEN(opt_hpa_min_purge_interval_ms, opt_hpa_opts.min_purge_interval_ms,
     uint64_t)
 CTL_RO_NL_GEN(opt_hpa_strict_min_purge_interval,
     opt_hpa_opts.strict_min_purge_interval, bool)
+CTL_RO_NL_GEN(opt_experimental_hpa_max_purge_nhp,
+    opt_hpa_opts.experimental_max_purge_nhp, ssize_t)
 
 /*
  * This will have to change before we publicly document this option; fxp_t and
diff --git a/src/hpa.c b/src/hpa.c
index d3b9c6c2..0410fefc 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -552,7 +552,22 @@ hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard,
 	 * too frequently.
 	 */
 	if (hpa_min_purge_interval_passed(tsdn, shard)) {
-		while (hpa_should_purge(tsdn, shard) && nops < max_ops) {
+		size_t max_purges = max_ops;
+		/*
+		 * Limit number of hugepages (slabs) to purge.
+		 * When experimental_max_purge_nhp option is used, there is no
+		 * guarantee we'll always respect dirty_mult option.  Option
+		 * experimental_max_purge_nhp provides a way to configure same
+		 * behaviour as was possible before, with buggy implementation
+		 * of purging algorithm.
+		 */
+		ssize_t max_purge_nhp = shard->opts.experimental_max_purge_nhp;
+		if (max_purge_nhp != -1 &&
+		    max_purges > (size_t)max_purge_nhp) {
+			max_purges = max_purge_nhp;
+		}
+
+		while (hpa_should_purge(tsdn, shard) && nops < max_purges) {
 			if (!hpa_try_purge(tsdn, shard)) {
 				/*
 				 * It is fine if we couldn't purge as sometimes
diff --git a/src/jemalloc.c b/src/jemalloc.c
index abd7540f..4859cff6 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1558,6 +1558,10 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    opt_hpa_opts.strict_min_purge_interval,
 			    "hpa_strict_min_purge_interval");
 
+			CONF_HANDLE_SSIZE_T(
+			    opt_hpa_opts.experimental_max_purge_nhp,
+			    "experimental_hpa_max_purge_nhp", -1, SSIZE_MAX);
+
 			if (CONF_MATCH("hpa_dirty_mult")) {
 				if (CONF_MATCH_VALUE("-1")) {
 					opt_hpa_opts.dirty_mult = (fxp_t)-1;
diff --git a/src/stats.c b/src/stats.c
index fbfacabf..a5c3f0fe 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1565,6 +1565,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_UINT64("hpa_hugify_delay_ms")
 	OPT_WRITE_UINT64("hpa_min_purge_interval_ms")
 	OPT_WRITE_BOOL("hpa_strict_min_purge_interval")
+	OPT_WRITE_SSIZE_T("experimental_hpa_max_purge_nhp")
 	if (je_mallctl("opt.hpa_dirty_mult", (void *)&u32v, &u32sz, NULL, 0)
 	    == 0) {
 		/*
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 2c11e0a8..4f15876b 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -35,7 +35,9 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
 	/* min_purge_interval_ms */
 	5 * 1000,
 	/* strict_min_purge_interval */
-	false
+	false,
+	/* experimental_max_purge_nhp */
+	-1
 };
 
 static hpa_shard_opts_t test_hpa_shard_opts_purge = {
@@ -52,7 +54,9 @@ static hpa_shard_opts_t test_hpa_shard_opts_purge = {
 	/* min_purge_interval_ms */
 	5 * 1000,
 	/* strict_min_purge_interval */
-	false
+	false,
+	/* experimental_max_purge_nhp */
+	-1
 };
 
 static hpa_shard_t *
@@ -653,6 +657,70 @@ TEST_BEGIN(test_purge) {
 }
 TEST_END
 
+TEST_BEGIN(test_experimental_max_purge_nhp) {
+	test_skip_if(!hpa_supported());
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+	opts.experimental_max_purge_nhp = 1;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+
+	bool deferred_work_generated = false;
+
+	nstime_init(&defer_curtime, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum {NALLOCS = 8 * HUGEPAGE_PAGES};
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	/* Deallocate 3 hugepages out of 8. */
+	for (int i = 0; i < 3 * (int)HUGEPAGE_PAGES; i++) {
+		pai_dalloc(tsdn, &shard->pai, edatas[i],
+		    &deferred_work_generated);
+	}
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+	/*
+	 * Expect only one purge call, because opts.experimental_max_purge_nhp
+	 * is set to 1.
+	 */
+	expect_zu_eq(1, ndefer_purge_calls, "Expect purges");
+	ndefer_purge_calls = 0;
+
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+	/* We still above the limit for dirty pages. */
+	expect_zu_eq(1, ndefer_purge_calls, "Expect purge");
+	ndefer_purge_calls = 0;
+
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+	/* Finally, we are below the limit, no purges are expected. */
+	expect_zu_eq(0, ndefer_purge_calls, "Purged too early");
+
+	destroy_test_data(shard);
+}
+TEST_END
+
 int
 main(void) {
 	/*
@@ -675,5 +743,6 @@ main(void) {
 	    test_purge_no_infinite_loop,
 	    test_strict_no_min_purge_interval,
 	    test_strict_min_purge_interval,
-	    test_purge);
+	    test_purge,
+	    test_experimental_max_purge_nhp);
 }
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 84cd3995..ffe5c411 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -292,6 +292,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(size_t, hpa_sec_max_bytes, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_bytes_after_flush, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_batch_fill_extra, always);
+	TEST_MALLCTL_OPT(ssize_t, experimental_hpa_max_purge_nhp, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);
 	TEST_MALLCTL_OPT(const char *, percpu_arena, always);
 	TEST_MALLCTL_OPT(size_t, oversize_threshold, always);

From c7ccb8d7e99a1c3f1ba3cc3e465bc6dd1b0fbe0b Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Mon, 19 Aug 2024 10:54:19 -0700
Subject: [PATCH 2413/2608] Add `experimental` prefix to
 `hpa_strict_min_purge_interval`

Goal is to make it obvious this option is experimental.
---
 include/jemalloc/internal/hpa_opts.h |  4 ++--
 src/ctl.c                            | 10 +++++-----
 src/hpa.c                            |  2 +-
 src/jemalloc.c                       |  4 ++--
 src/stats.c                          |  2 +-
 test/unit/hpa.c                      | 14 +++++++-------
 test/unit/hpa_background_thread.sh   |  2 +-
 test/unit/mallctl.c                  |  2 ++
 8 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h
index bf3de0e9..15765689 100644
--- a/include/jemalloc/internal/hpa_opts.h
+++ b/include/jemalloc/internal/hpa_opts.h
@@ -56,7 +56,7 @@ struct hpa_shard_opts_s {
 	 * This is an option to provide backward compatibility for staged rollout of
 	 * purging logic fix.
 	 */
-	bool strict_min_purge_interval;
+	bool experimental_strict_min_purge_interval;
 
 	/*
 	 * Maximum number of hugepages to purge on each purging attempt.
@@ -83,7 +83,7 @@ struct hpa_shard_opts_s {
 	10 * 1000,							\
 	/* min_purge_interval_ms */					\
 	5 * 1000,							\
-	/* strict_min_purge_interval */					\
+	/* experimental_strict_min_purge_interval */			\
 	false,								\
 	/* experimental_max_purge_nhp */				\
 	-1								\
diff --git a/src/ctl.c b/src/ctl.c
index a01f643e..0f69e1ec 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -103,7 +103,7 @@ CTL_PROTO(opt_hpa_slab_max_alloc)
 CTL_PROTO(opt_hpa_hugification_threshold)
 CTL_PROTO(opt_hpa_hugify_delay_ms)
 CTL_PROTO(opt_hpa_min_purge_interval_ms)
-CTL_PROTO(opt_hpa_strict_min_purge_interval)
+CTL_PROTO(opt_experimental_hpa_strict_min_purge_interval)
 CTL_PROTO(opt_experimental_hpa_max_purge_nhp)
 CTL_PROTO(opt_hpa_dirty_mult)
 CTL_PROTO(opt_hpa_sec_nshards)
@@ -461,8 +461,8 @@ static const ctl_named_node_t opt_node[] = {
 		CTL(opt_hpa_hugification_threshold)},
 	{NAME("hpa_hugify_delay_ms"), CTL(opt_hpa_hugify_delay_ms)},
 	{NAME("hpa_min_purge_interval_ms"), CTL(opt_hpa_min_purge_interval_ms)},
-	{NAME("hpa_strict_min_purge_interval"),
-		CTL(opt_hpa_strict_min_purge_interval)},
+	{NAME("experimental_hpa_strict_min_purge_interval"),
+		CTL(opt_experimental_hpa_strict_min_purge_interval)},
 	{NAME("experimental_hpa_max_purge_nhp"),
 		CTL(opt_experimental_hpa_max_purge_nhp)},
 	{NAME("hpa_dirty_mult"), CTL(opt_hpa_dirty_mult)},
@@ -2199,8 +2199,8 @@ CTL_RO_NL_GEN(opt_hpa_hugification_threshold,
 CTL_RO_NL_GEN(opt_hpa_hugify_delay_ms, opt_hpa_opts.hugify_delay_ms, uint64_t)
 CTL_RO_NL_GEN(opt_hpa_min_purge_interval_ms, opt_hpa_opts.min_purge_interval_ms,
     uint64_t)
-CTL_RO_NL_GEN(opt_hpa_strict_min_purge_interval,
-    opt_hpa_opts.strict_min_purge_interval, bool)
+CTL_RO_NL_GEN(opt_experimental_hpa_strict_min_purge_interval,
+    opt_hpa_opts.experimental_strict_min_purge_interval, bool)
 CTL_RO_NL_GEN(opt_experimental_hpa_max_purge_nhp,
     opt_hpa_opts.experimental_max_purge_nhp, ssize_t)
 
diff --git a/src/hpa.c b/src/hpa.c
index 0410fefc..3d7a6f60 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -512,7 +512,7 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 static bool
 hpa_min_purge_interval_passed(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
-	if (shard->opts.strict_min_purge_interval) {
+	if (shard->opts.experimental_strict_min_purge_interval) {
 		uint64_t since_last_purge_ms = shard->central->hooks.ms_since(
 		    &shard->last_purge);
 		if (since_last_purge_ms < shard->opts.min_purge_interval_ms) {
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 4859cff6..512b3fcc 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1555,8 +1555,8 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false);
 
 			CONF_HANDLE_BOOL(
-			    opt_hpa_opts.strict_min_purge_interval,
-			    "hpa_strict_min_purge_interval");
+			    opt_hpa_opts.experimental_strict_min_purge_interval,
+			    "experimental_hpa_strict_min_purge_interval");
 
 			CONF_HANDLE_SSIZE_T(
 			    opt_hpa_opts.experimental_max_purge_nhp,
diff --git a/src/stats.c b/src/stats.c
index a5c3f0fe..bcd08ce5 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1564,7 +1564,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_SIZE_T("hpa_hugification_threshold")
 	OPT_WRITE_UINT64("hpa_hugify_delay_ms")
 	OPT_WRITE_UINT64("hpa_min_purge_interval_ms")
-	OPT_WRITE_BOOL("hpa_strict_min_purge_interval")
+	OPT_WRITE_BOOL("experimental_hpa_strict_min_purge_interval")
 	OPT_WRITE_SSIZE_T("experimental_hpa_max_purge_nhp")
 	if (je_mallctl("opt.hpa_dirty_mult", (void *)&u32v, &u32sz, NULL, 0)
 	    == 0) {
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 4f15876b..ae8a976c 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -34,7 +34,7 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
 	10 * 1000,
 	/* min_purge_interval_ms */
 	5 * 1000,
-	/* strict_min_purge_interval */
+	/* experimental_strict_min_purge_interval */
 	false,
 	/* experimental_max_purge_nhp */
 	-1
@@ -53,7 +53,7 @@ static hpa_shard_opts_t test_hpa_shard_opts_purge = {
 	0,
 	/* min_purge_interval_ms */
 	5 * 1000,
-	/* strict_min_purge_interval */
+	/* experimental_strict_min_purge_interval */
 	false,
 	/* experimental_max_purge_nhp */
 	-1
@@ -506,7 +506,7 @@ TEST_BEGIN(test_purge_no_infinite_loop) {
 }
 TEST_END
 
-TEST_BEGIN(test_strict_no_min_purge_interval) {
+TEST_BEGIN(test_no_experimental_strict_min_purge_interval) {
 	test_skip_if(!hpa_supported());
 
 	hpa_hooks_t hooks;
@@ -547,7 +547,7 @@ TEST_BEGIN(test_strict_no_min_purge_interval) {
 }
 TEST_END
 
-TEST_BEGIN(test_strict_min_purge_interval) {
+TEST_BEGIN(test_experimental_strict_min_purge_interval) {
 	test_skip_if(!hpa_supported());
 
 	hpa_hooks_t hooks;
@@ -561,7 +561,7 @@ TEST_BEGIN(test_strict_min_purge_interval) {
 
 	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
 	opts.deferral_allowed = true;
-	opts.strict_min_purge_interval = true;
+	opts.experimental_strict_min_purge_interval = true;
 
 	hpa_shard_t *shard = create_test_data(&hooks, &opts);
 
@@ -741,8 +741,8 @@ main(void) {
 	    test_alloc_dalloc_batch,
 	    test_defer_time,
 	    test_purge_no_infinite_loop,
-	    test_strict_no_min_purge_interval,
-	    test_strict_min_purge_interval,
+	    test_no_experimental_strict_min_purge_interval,
+	    test_experimental_strict_min_purge_interval,
 	    test_purge,
 	    test_experimental_max_purge_nhp);
 }
diff --git a/test/unit/hpa_background_thread.sh b/test/unit/hpa_background_thread.sh
index 666da8fc..33b70e19 100644
--- a/test/unit/hpa_background_thread.sh
+++ b/test/unit/hpa_background_thread.sh
@@ -1,4 +1,4 @@
 #!/bin/sh
 
-export MALLOC_CONF="hpa_dirty_mult:0.001,hpa_hugification_threshold_ratio:1.0,hpa_min_purge_interval_ms:50,hpa_strict_min_purge_interval:true,hpa_sec_nshards:0"
+export MALLOC_CONF="hpa_dirty_mult:0.001,hpa_hugification_threshold_ratio:1.0,hpa_min_purge_interval_ms:50,hpa_sec_nshards:0"
 
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index ffe5c411..028a27f7 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -292,6 +292,8 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(size_t, hpa_sec_max_bytes, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_bytes_after_flush, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_batch_fill_extra, always);
+	TEST_MALLCTL_OPT(bool, experimental_hpa_strict_min_purge_interval,
+	    always);
 	TEST_MALLCTL_OPT(ssize_t, experimental_hpa_max_purge_nhp, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);
 	TEST_MALLCTL_OPT(const char *, percpu_arena, always);

From 8c54637f8c7a98bbaec6ee38229a904bbf22170c Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Thu, 22 Aug 2024 21:58:58 -0700
Subject: [PATCH 2414/2608] Better trigger race condition in bin_batching unit
 test

---
 test/unit/bin_batching.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/test/unit/bin_batching.c b/test/unit/bin_batching.c
index 525f59e0..19975341 100644
--- a/test/unit/bin_batching.c
+++ b/test/unit/bin_batching.c
@@ -45,9 +45,16 @@ increment_push_failure(size_t push_idx) {
 		atomic_fetch_add_zu(&push_failure_count, 1, ATOMIC_RELAXED);
 	} else {
 		assert_zu_lt(push_idx, 4, "Only 4 elems");
-		volatile int x = 10000;
+		volatile size_t x = 10000;
 		while (--x) {
 			/* Spin for a while, to try to provoke a failure. */
+			if (x == push_idx) {
+#ifdef _WIN32
+				SwitchToThread();
+#else
+				sched_yield();
+#endif
+			}
 		}
 	}
 }

From 5b72ac098abce464add567869d082f2097bd59a2 Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@meta.com>
Date: Fri, 23 Aug 2024 14:25:47 -0700
Subject: [PATCH 2415/2608] Remove tests for ppc64 on Travic CI.

---
 .travis.yml           | 33 ---------------------------------
 scripts/gen_travis.py |  3 ++-
 2 files changed, 2 insertions(+), 34 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index ec1481c4..20605fc0 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -271,39 +271,6 @@ jobs:
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: linux
-      arch: ppc64le
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
       arch: amd64
       env: CC=gcc CXX=g++ EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index 651006ca..d2fd25e3 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -316,7 +316,8 @@ def main():
         # generate_freebsd(AMD64),
 
         generate_linux(AMD64),
-        generate_linux(PPC64LE),
+        # PPC tests on travis has been down for a while, disable it for now.
+        # generate_linux(PPC64LE),
 
         generate_macos(AMD64),
 

From bd0a5b0f3b6ce17a5f888e8e08ee5de774b29579 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 28 Aug 2024 11:33:07 -0700
Subject: [PATCH 2416/2608] Fix static analysis warnings.

Newly reported warnings included several reserved macro identifier, and
false-positive used-uninitialized.
---
 scripts/run_static_analysis.sh | 3 ++-
 src/arena.c                    | 2 +-
 src/ctl.c                      | 2 +-
 src/tcache.c                   | 2 +-
 4 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/scripts/run_static_analysis.sh b/scripts/run_static_analysis.sh
index e2185ec9..1662f7eb 100755
--- a/scripts/run_static_analysis.sh
+++ b/scripts/run_static_analysis.sh
@@ -53,7 +53,8 @@ CC_ANALYZERS_FROM_PATH=1 CodeChecker analyze compile_commands.json --jobs "$(npr
 	--ctu --compile-uniqueing strict --output static_analysis_raw_results \
 	--analyzers clangsa clang-tidy --skip "$skipfile" \
 	--enable readability-inconsistent-declaration-parameter-name \
-	--enable performance-no-int-to-ptr
+	--enable performance-no-int-to-ptr \
+	--disable clang-diagnostic-reserved-macro-identifier
 	# `--enable` is additive, the vast majority of the checks we want are
 	# enabled by default.
 
diff --git a/src/arena.c b/src/arena.c
index 21010279..bee106f0 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -473,7 +473,7 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	    arena_decide_unforced_purge_eagerness(is_background_thread);
 	bool epoch_advanced = pac_maybe_decay_purge(tsdn, &arena->pa_shard.pac,
 	    decay, decay_stats, ecache, eagerness);
-	size_t npages_new;
+	size_t npages_new JEMALLOC_CLANG_ANALYZER_SILENCE_INIT(0);
 	if (epoch_advanced) {
 		/* Backlog is updated on epoch advance. */
 		npages_new = decay_epoch_npages_delta(decay);
diff --git a/src/ctl.c b/src/ctl.c
index 0f69e1ec..ec569d7c 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3165,7 +3165,7 @@ arena_i_name_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	unsigned arena_ind;
-	char *name;
+	char *name JEMALLOC_CLANG_ANALYZER_SILENCE_INIT(NULL);
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	MIB_UNSIGNED(arena_ind, 1);
diff --git a/src/tcache.c b/src/tcache.c
index b90907ad..003469a3 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -1208,7 +1208,7 @@ thread_tcache_max_set(tsd_t *tsd, size_t tcache_max) {
 	assert(tcache != NULL && tcache_slow != NULL);
 
 	bool enabled = tcache_available(tsd);
-	arena_t *assigned_arena;
+	arena_t *assigned_arena JEMALLOC_CLANG_ANALYZER_SILENCE_INIT(NULL);
 	if (enabled) {
 		assigned_arena = tcache_slow->arena;
 		/* Carry over the bin settings during the reboot. */

From e29ac619870b99a2cdde991c9d6d4c08d11a8ec2 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 28 Aug 2024 16:19:16 -0700
Subject: [PATCH 2417/2608] Limit Cirrus CI to freebsd 15 and 14

---
 .cirrus.yml | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/.cirrus.yml b/.cirrus.yml
index 63a96d2a..13714014 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -34,12 +34,6 @@ task:
      - name: 14-STABLE
        freebsd_instance:
          image_family: freebsd-14-0-snap
-     - name: 14.0-RELEASE
-       freebsd_instance:
-         image_family: freebsd-14-0
-     - name: 13-STABLE
-       freebsd_instance:
-         image_family: freebsd-13-2-snap
   install_script:
     - sed -i.bak -e 's,pkg+http://pkg.FreeBSD.org/\${ABI}/quarterly,pkg+http://pkg.FreeBSD.org/\${ABI}/latest,' /etc/pkg/FreeBSD.conf
     - pkg upgrade -y

From 9e123a833cc6f56381c46a1656a323f893fa2528 Mon Sep 17 00:00:00 2001
From: Ben Niu <beniu@microsoft.com>
Date: Thu, 21 Dec 2023 20:33:41 -0800
Subject: [PATCH 2418/2608] Leverage new Windows API TlsGetValue2 for
 performance

---
 include/jemalloc/internal/quantum.h |  2 +-
 include/jemalloc/internal/tsd_win.h | 30 ++++++++++++++++++++++++++---
 src/tsd.c                           |  4 ++++
 3 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/quantum.h b/include/jemalloc/internal/quantum.h
index a97f54ca..b4beb309 100644
--- a/include/jemalloc/internal/quantum.h
+++ b/include/jemalloc/internal/quantum.h
@@ -24,7 +24,7 @@
 #  ifdef __arm__
 #    define LG_QUANTUM		3
 #  endif
-#  ifdef __aarch64__
+#  if defined(__aarch64__) || defined(_M_ARM64)
 #    define LG_QUANTUM		4
 #  endif
 #  ifdef __hppa__
diff --git a/include/jemalloc/internal/tsd_win.h b/include/jemalloc/internal/tsd_win.h
index 8ec7eda7..4b40a8ab 100644
--- a/include/jemalloc/internal/tsd_win.h
+++ b/include/jemalloc/internal/tsd_win.h
@@ -15,6 +15,16 @@ typedef struct {
 extern DWORD tsd_tsd;
 extern tsd_wrapper_t tsd_boot_wrapper;
 extern bool tsd_booted;
+#if defined(_M_ARM64EC)
+#define JEMALLOC_WIN32_TLSGETVALUE2 0
+#else
+#define JEMALLOC_WIN32_TLSGETVALUE2 1
+#endif
+#if JEMALLOC_WIN32_TLSGETVALUE2
+typedef LPVOID (WINAPI *TGV2)(DWORD dwTlsIndex);
+extern TGV2 tls_get_value2;
+extern HMODULE tgv2_mod;
+#endif
 
 /* Initialization/cleanup. */
 JEMALLOC_ALWAYS_INLINE bool
@@ -49,9 +59,17 @@ tsd_wrapper_set(tsd_wrapper_t *wrapper) {
 
 JEMALLOC_ALWAYS_INLINE tsd_wrapper_t *
 tsd_wrapper_get(bool init) {
-	DWORD error = GetLastError();
-	tsd_wrapper_t *wrapper = (tsd_wrapper_t *) TlsGetValue(tsd_tsd);
-	SetLastError(error);
+	tsd_wrapper_t *wrapper;
+#if JEMALLOC_WIN32_TLSGETVALUE2
+	if (tls_get_value2 != NULL) {
+		wrapper = (tsd_wrapper_t *) tls_get_value2(tsd_tsd);
+	} else
+#endif
+	{
+		DWORD error = GetLastError();
+		wrapper = (tsd_wrapper_t *) TlsGetValue(tsd_tsd);
+		SetLastError(error);
+	}
 
 	if (init && unlikely(wrapper == NULL)) {
 		wrapper = (tsd_wrapper_t *)
@@ -78,6 +96,12 @@ tsd_boot0(void) {
 	}
 	_malloc_tsd_cleanup_register(&tsd_cleanup_wrapper);
 	tsd_wrapper_set(&tsd_boot_wrapper);
+#if JEMALLOC_WIN32_TLSGETVALUE2
+	tgv2_mod = LoadLibraryA("api-ms-win-core-processthreads-l1-1-8.dll");
+	if (tgv2_mod != NULL) {
+		tls_get_value2 = (TGV2)GetProcAddress(tgv2_mod, "TlsGetValue2");
+	}
+#endif
 	tsd_booted = true;
 	return false;
 }
diff --git a/src/tsd.c b/src/tsd.c
index a4db8e36..c9ae2d64 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -25,6 +25,10 @@ bool tsd_booted = false;
 DWORD tsd_tsd;
 tsd_wrapper_t tsd_boot_wrapper = {false, TSD_INITIALIZER};
 bool tsd_booted = false;
+#if JEMALLOC_WIN32_TLSGETVALUE2
+TGV2 tls_get_value2 = NULL;
+HMODULE tgv2_mod = NULL;
+#endif
 #else
 
 /*

From f68effe4ac0d1ee5cf26fc9c7fc50c88d16bf6ba Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Mon, 12 Aug 2024 16:15:46 -0700
Subject: [PATCH 2419/2608] Add a runtime option opt_experimental_tcache_gc to
 guard the new design

---
 include/jemalloc/internal/jemalloc_internal_externs.h | 1 +
 src/ctl.c                                             | 4 ++++
 src/jemalloc.c                                        | 3 +++
 src/stats.c                                           | 1 +
 4 files changed, 9 insertions(+)

diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 41c0f366..2c6b58f7 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -29,6 +29,7 @@ extern void (*JET_MUTABLE invalid_conf_abort)(void);
 extern bool opt_utrace;
 extern bool opt_xmalloc;
 extern bool opt_experimental_infallible_new;
+extern bool opt_experimental_tcache_gc;
 extern bool opt_zero;
 extern unsigned opt_narenas;
 extern zero_realloc_action_t opt_zero_realloc_action;
diff --git a/src/ctl.c b/src/ctl.c
index ec569d7c..8608f124 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -131,6 +131,7 @@ CTL_PROTO(opt_zero)
 CTL_PROTO(opt_utrace)
 CTL_PROTO(opt_xmalloc)
 CTL_PROTO(opt_experimental_infallible_new)
+CTL_PROTO(opt_experimental_tcache_gc)
 CTL_PROTO(opt_max_batched_size)
 CTL_PROTO(opt_remote_free_max)
 CTL_PROTO(opt_remote_free_max_batch)
@@ -494,6 +495,8 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("xmalloc"),	CTL(opt_xmalloc)},
 	{NAME("experimental_infallible_new"),
 		CTL(opt_experimental_infallible_new)},
+	{NAME("experimental_tcache_gc"),
+		CTL(opt_experimental_tcache_gc)},
 	{NAME("max_batched_size"),	CTL(opt_max_batched_size)},
 	{NAME("remote_free_max"),	CTL(opt_remote_free_max)},
 	{NAME("remote_free_max_batch"),	CTL(opt_remote_free_max_batch)},
@@ -2243,6 +2246,7 @@ CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool)
 CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool)
 CTL_RO_NL_CGEN(config_enable_cxx, opt_experimental_infallible_new,
     opt_experimental_infallible_new, bool)
+CTL_RO_NL_GEN(opt_experimental_tcache_gc, opt_experimental_tcache_gc, bool)
 CTL_RO_NL_GEN(opt_max_batched_size, opt_bin_info_max_batched_size, size_t)
 CTL_RO_NL_GEN(opt_remote_free_max, opt_bin_info_remote_free_max,
     size_t)
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 512b3fcc..dc471563 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -151,6 +151,7 @@ void (*JET_MUTABLE invalid_conf_abort)(void) = &abort;
 bool	opt_utrace = false;
 bool	opt_xmalloc = false;
 bool	opt_experimental_infallible_new = false;
+bool	opt_experimental_tcache_gc = false;
 bool	opt_zero = false;
 unsigned	opt_narenas = 0;
 static fxp_t		opt_narenas_ratio = FXP_INIT_INT(4);
@@ -1414,6 +1415,8 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				    "experimental_infallible_new")
 			}
 
+			CONF_HANDLE_BOOL(opt_experimental_tcache_gc,
+			    "experimental_tcache_gc")
 			CONF_HANDLE_BOOL(opt_tcache, "tcache")
 			CONF_HANDLE_SIZE_T(opt_tcache_max, "tcache_max",
 			    0, TCACHE_MAXCLASS_LIMIT, CONF_DONT_CHECK_MIN,
diff --git a/src/stats.c b/src/stats.c
index bcd08ce5..f45b7447 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1600,6 +1600,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_BOOL("utrace")
 	OPT_WRITE_BOOL("xmalloc")
 	OPT_WRITE_BOOL("experimental_infallible_new")
+	OPT_WRITE_BOOL("experimental_tcache_gc")
 	OPT_WRITE_SIZE_T("max_batched_size")
 	OPT_WRITE_SIZE_T("remote_free_max")
 	OPT_WRITE_SIZE_T("remote_free_max_batch")

From 14d5dc136a40ddf2464f2178f950b562f38f0d25 Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Thu, 1 Aug 2024 10:24:09 -0700
Subject: [PATCH 2420/2608] Allow a range for the nfill passed to
 arena_cache_bin_fill_small

---
 include/jemalloc/internal/arena_externs.h |  3 ++-
 src/arena.c                               | 31 +++++++++++++++--------
 src/tcache.c                              |  4 ++-
 3 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 3d0329fc..e915c97a 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -63,7 +63,8 @@ void arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena);
 void arena_reset(tsd_t *tsd, arena_t *arena);
 void arena_destroy(tsd_t *tsd, arena_t *arena);
 void arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
-    cache_bin_t *cache_bin, szind_t binind, const cache_bin_sz_t nfill);
+    cache_bin_t *cache_bin, szind_t binind, const cache_bin_sz_t nfill_min,
+    const cache_bin_sz_t nfill_max);
 
 void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
     szind_t ind, bool zero, bool slab);
diff --git a/src/arena.c b/src/arena.c
index bee106f0..884d1bf9 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1047,14 +1047,16 @@ arena_bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 
 void
 arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
-    cache_bin_t *cache_bin, szind_t binind, const cache_bin_sz_t nfill) {
+    cache_bin_t *cache_bin, szind_t binind, const cache_bin_sz_t nfill_min,
+    const cache_bin_sz_t nfill_max) {
 	assert(cache_bin_ncached_get_local(cache_bin) == 0);
-	assert(nfill != 0);
+	assert(nfill_min > 0 && nfill_min <= nfill_max);
+	assert(nfill_max <= cache_bin_ncached_max_get(cache_bin));
 
 	const bin_info_t *bin_info = &bin_infos[binind];
 
-	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nfill);
-	cache_bin_init_ptr_array_for_fill(cache_bin, &ptrs, nfill);
+	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nfill_max);
+	cache_bin_init_ptr_array_for_fill(cache_bin, &ptrs, nfill_max);
 	/*
 	 * Bin-local resources are used first: 1) bin->slabcur, and 2) nonfull
 	 * slabs.  After both are exhausted, new slabs will be allocated through
@@ -1101,13 +1103,19 @@ label_refill:
 	malloc_mutex_lock(tsdn, &bin->lock);
 	arena_bin_flush_batch_after_lock(tsdn, arena, bin, binind, &batch_flush_state);
 
-	while (filled < nfill) {
+	while (filled < nfill_min) {
 		/* Try batch-fill from slabcur first. */
 		edata_t *slabcur = bin->slabcur;
 		if (slabcur != NULL && edata_nfree_get(slabcur) > 0) {
-			unsigned tofill = nfill - filled;
-			unsigned nfree = edata_nfree_get(slabcur);
-			unsigned cnt = tofill < nfree ? tofill : nfree;
+			/*
+			 * Use up the free slots if the total filled <= nfill_max.
+			 * Otherwise, fallback to nfill_min for a more conservative
+			 * memory usage.
+			 */
+			unsigned cnt = edata_nfree_get(slabcur);
+			if (cnt + filled > nfill_max) {
+				cnt = nfill_min - filled;
+			}
 
 			arena_slab_reg_alloc_batch(slabcur, bin_info, cnt,
 			    &ptrs.ptr[filled]);
@@ -1144,7 +1152,7 @@ label_refill:
 		assert(fresh_slab == NULL);
 		assert(!alloc_and_retry);
 		break;
-	} /* while (filled < nfill) loop. */
+	} /* while (filled < nfill_min) loop. */
 
 	if (config_stats && !alloc_and_retry) {
 		bin->stats.nmalloc += filled;
@@ -1162,7 +1170,7 @@ label_refill:
 
 	if (alloc_and_retry) {
 		assert(fresh_slab == NULL);
-		assert(filled < nfill);
+		assert(filled < nfill_min);
 		assert(made_progress);
 
 		fresh_slab = arena_slab_alloc(tsdn, arena, binind, binshard,
@@ -1173,7 +1181,8 @@ label_refill:
 		made_progress = false;
 		goto label_refill;
 	}
-	assert(filled == nfill || (fresh_slab == NULL && !made_progress));
+	assert((filled >= nfill_min && filled <= nfill_max) ||
+	    (fresh_slab == NULL && !made_progress));
 
 	/* Release if allocated but not used. */
 	if (fresh_slab != NULL) {
diff --git a/src/tcache.c b/src/tcache.c
index 003469a3..c300ed7d 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -254,7 +254,9 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
 	if (nfill == 0) {
 		nfill = 1;
 	}
-	arena_cache_bin_fill_small(tsdn, arena, cache_bin, binind, nfill);
+	arena_cache_bin_fill_small(tsdn, arena, cache_bin, binind,
+	    /* nfill_min */ opt_experimental_tcache_gc ?
+	    ((nfill >> 1) + 1) : nfill, /* nfill_max */ nfill);
 	tcache_slow->bin_refilled[binind] = true;
 	ret = cache_bin_alloc(cache_bin, tcache_success);
 

From e2c9f3a9ce684090898b58a5fdb244cff48ef9bb Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Thu, 1 Aug 2024 13:20:11 -0700
Subject: [PATCH 2421/2608] Take locality into consideration when doing GC
 flush

---
 include/jemalloc/internal/tcache_types.h |   1 +
 src/tcache.c                             | 216 ++++++++++++++++++++++-
 2 files changed, 213 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index 578a199e..27516387 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -23,5 +23,6 @@ typedef struct tcaches_s tcaches_t;
 #define TCACHE_MAXCLASS_LIMIT ((size_t)1 << TCACHE_LG_MAXCLASS_LIMIT)
 #define TCACHE_NBINS_MAX (SC_NBINS + SC_NGROUP *			\
     (TCACHE_LG_MAXCLASS_LIMIT - SC_LG_LARGE_MINCLASS) + 1)
+#define TCACHE_GC_NEIGHBOR_LIMIT ((uintptr_t)1 << 21) /* 2M */
 
 #endif /* JEMALLOC_INTERNAL_TCACHE_TYPES_H */
diff --git a/src/tcache.c b/src/tcache.c
index c300ed7d..35f18077 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -134,10 +134,162 @@ tcache_gc_item_delay_compute(szind_t szind) {
 	return (uint8_t)item_delay;
 }
 
+static inline void *
+tcache_gc_small_heuristic_addr_get(tsd_t *tsd, tcache_slow_t *tcache_slow,
+    szind_t szind) {
+	assert(szind < SC_NBINS);
+	tsdn_t *tsdn = tsd_tsdn(tsd);
+	bin_t *bin = arena_bin_choose(tsdn, tcache_slow->arena, szind, NULL);
+	assert(bin != NULL);
+
+	malloc_mutex_lock(tsdn, &bin->lock);
+	edata_t *slab = (bin->slabcur == NULL) ?
+	    edata_heap_first(&bin->slabs_nonfull) : bin->slabcur;
+	assert(slab != NULL || edata_heap_empty(&bin->slabs_nonfull));
+	void *ret = (slab != NULL) ? edata_addr_get(slab) : NULL;
+	assert(ret != NULL || slab == NULL);
+	malloc_mutex_unlock(tsdn, &bin->lock);
+
+	return ret;
+}
+
+static inline bool
+tcache_gc_is_addr_remote(void *addr, uintptr_t min, uintptr_t max) {
+	assert(addr != NULL);
+	return ((uintptr_t)addr < min || (uintptr_t)addr >= max);
+}
+
+static inline cache_bin_sz_t
+tcache_gc_small_nremote_get(cache_bin_t *cache_bin, void *addr,
+    uintptr_t *addr_min, uintptr_t *addr_max, szind_t szind, size_t nflush) {
+	assert(addr != NULL && addr_min != NULL && addr_max != NULL);
+	/* The slab address range that the provided addr belongs to. */
+	uintptr_t slab_min = (uintptr_t)addr;
+	uintptr_t slab_max = slab_min + bin_infos[szind].slab_size;
+	/*
+	 * When growing retained virtual memory, it's increased exponentially,
+	 * starting from 2M, so that the total number of disjoint virtual
+	 * memory ranges retained by each shard is limited.
+	 */
+	uintptr_t neighbor_min = ((uintptr_t)addr > TCACHE_GC_NEIGHBOR_LIMIT) ?
+	    ((uintptr_t)addr - TCACHE_GC_NEIGHBOR_LIMIT) : 0;
+	uintptr_t neighbor_max = ((uintptr_t)addr < (UINTPTR_MAX -
+	    TCACHE_GC_NEIGHBOR_LIMIT)) ? ((uintptr_t)addr +
+	    TCACHE_GC_NEIGHBOR_LIMIT) : UINTPTR_MAX;
+
+	/* Scan the entire bin to count the number of remote pointers. */
+	void **head = cache_bin->stack_head;
+	cache_bin_sz_t n_remote_slab = 0, n_remote_neighbor = 0;
+	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin);
+	for (void **cur = head; cur < head + ncached; cur++) {
+		n_remote_slab += (cache_bin_sz_t)tcache_gc_is_addr_remote(*cur,
+		    slab_min, slab_max);
+		n_remote_neighbor += (cache_bin_sz_t)tcache_gc_is_addr_remote(*cur,
+		    neighbor_min, neighbor_max);
+	}
+	/*
+	 * Note: since slab size is dynamic and can be larger than 2M, i.e.
+	 * TCACHE_GC_NEIGHBOR_LIMIT, there is no guarantee as to which of
+	 * n_remote_slab and n_remote_neighbor is greater.
+	 */
+	assert(n_remote_slab <= ncached && n_remote_neighbor <= ncached);
+	/*
+	 * We first consider keeping ptrs from the neighboring addr range,
+	 * since in most cases the range is greater than the slab range.
+	 * So if the number of non-neighbor ptrs is more than the intended
+	 * flush amount, we use it as the anchor for flushing.
+	 */
+	if (n_remote_neighbor >= nflush) {
+		*addr_min = neighbor_min;
+		*addr_max = neighbor_max;
+		return n_remote_neighbor;
+	}
+	/*
+	 * We then consider only keeping ptrs from the local slab, and in most
+	 * cases this is stricter, assuming that slab < 2M is the common case.
+	 */
+	*addr_min = slab_min;
+	*addr_max = slab_max;
+	return n_remote_slab;
+}
+
+/* Shuffle the ptrs in the bin to put the remote pointers at the bottom. */
+static inline void
+tcache_gc_small_bin_shuffle(cache_bin_t *cache_bin, cache_bin_sz_t nremote,
+   uintptr_t addr_min, uintptr_t addr_max) {
+	void **swap = NULL;
+	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin);
+	cache_bin_sz_t ntop = ncached - nremote, cnt = 0;
+	assert(ntop > 0 && ntop < ncached);
+	/*
+	 * Scan the [head, head + ntop) part of the cache bin, during which
+	 * bubbling the non-remote ptrs to the top of the bin.
+	 * After this, the [head, head + cnt) part of the bin contains only
+	 * non-remote ptrs, and they're in the same relative order as before.
+	 * While the [head + cnt, head + ntop) part contains only remote ptrs.
+	 */
+	void **head = cache_bin->stack_head;
+	for (void **cur = head; cur < head + ntop; cur++) {
+		if (!tcache_gc_is_addr_remote(*cur, addr_min, addr_max)) {
+			/* Tracks the number of non-remote ptrs seen so far. */
+			cnt++;
+			/*
+			 * There is remote ptr before the current non-remote ptr,
+			 * swap the current non-remote ptr with the remote ptr,
+			 * and increment the swap pointer so that it's still
+			 * pointing to the top remote ptr in the bin.
+			 */
+			if (swap != NULL) {
+				assert(swap < cur);
+				assert(tcache_gc_is_addr_remote(*swap, addr_min, addr_max));
+				void *tmp = *cur;
+				*cur = *swap;
+				*swap = tmp;
+				swap++;
+				assert(swap <= cur);
+				assert(tcache_gc_is_addr_remote(*swap, addr_min, addr_max));
+			}
+			continue;
+		} else if (swap == NULL) {
+			/* Swap always points to the top remote ptr in the bin. */
+			swap = cur;
+		}
+	}
+	/*
+	 * Scan the [head + ntop, head + ncached) part of the cache bin,
+	 * after which it should only contain remote ptrs.
+	 */
+	for (void **cur = head + ntop; cur < head + ncached; cur++) {
+		/* Early break if all non-remote ptrs have been moved. */
+		if (cnt == ntop) {
+			break;
+		}
+		if (!tcache_gc_is_addr_remote(*cur, addr_min, addr_max)) {
+			assert(tcache_gc_is_addr_remote(*(head + cnt), addr_min,
+			    addr_max));
+			void *tmp = *cur;
+			*cur = *(head + cnt);
+			*(head + cnt) = tmp;
+			cnt++;
+		}
+	}
+	assert(cnt == ntop);
+	/* Sanity check to make sure the shuffle is done correctly. */
+	for (void **cur = head; cur < head + ncached; cur++) {
+		assert(*cur != NULL);
+		assert(((cur < head + ntop) && !tcache_gc_is_addr_remote(
+		    *cur, addr_min, addr_max)) || ((cur >= head + ntop) &&
+		    tcache_gc_is_addr_remote(*cur, addr_min, addr_max)));
+	}
+}
+
 static void
 tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
     szind_t szind) {
-	/* Aim to flush 3/4 of items below low-water. */
+	/*
+	 * Aim to flush 3/4 of items below low-water, with remote pointers being
+	 * prioritized for flushing.
+	 */
 	assert(szind < SC_NBINS);
 
 	cache_bin_t *cache_bin = &tcache->bins[szind];
@@ -158,8 +310,6 @@ tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 
 	tcache_slow->bin_flush_delay_items[szind]
 	    = tcache_gc_item_delay_compute(szind);
-	tcache_bin_flush_small(tsd, tcache, cache_bin, szind,
-	    (unsigned)(ncached - nflush));
 
 	/*
 	 * Reduce fill count by 2X.  Limit lg_fill_div such that
@@ -169,12 +319,70 @@ tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	     tcache_slow->lg_fill_div[szind]) > 1) {
 		tcache_slow->lg_fill_div[szind]++;
 	}
+
+	/*
+	 * When the new tcache gc is not enabled, or simply the entire bin needs
+	 * to be flushed, flush the bottom nflush items directly.
+	 */
+	if (!opt_experimental_tcache_gc || nflush == ncached) {
+		goto label_flush;
+	}
+
+	/* Query arena binshard to get heuristic locality info. */
+	void *addr = tcache_gc_small_heuristic_addr_get(tsd, tcache_slow, szind);
+	if (addr == NULL) {
+		goto label_flush;
+	}
+
+	/*
+	 * Use the queried addr above to get the number of remote ptrs in the
+	 * bin, and the min/max of the local addr range.
+	 */
+	uintptr_t addr_min, addr_max;
+	cache_bin_sz_t nremote = tcache_gc_small_nremote_get(cache_bin, addr,
+	    &addr_min, &addr_max, szind, nflush);
+
+	/*
+	 * Update the nflush to the larger value between the intended flush count
+	 * and the number of remote ptrs.
+	 */
+	if (nremote > nflush) {
+		nflush = nremote;
+	}
+	/*
+	 * When entering the locality check, nflush should be less than ncached,
+	 * otherwise the entire bin should be flushed regardless. The only case
+	 * when nflush gets updated to ncached after locality check is, when all
+	 * the items in the bin are remote, in which case the entire bin should
+	 * also be flushed.
+	 */
+	assert(nflush < ncached || nremote == ncached);
+	if (nremote == 0 || nremote == ncached)	{
+		goto label_flush;
+	}
+
+	/*
+	 * Move the remote points to the bottom of the bin for flushing.
+	 * As long as moved to the bottom, the order of these nremote ptrs
+	 * does not matter, since they are going to be flushed anyway.
+	 * The rest of the ptrs are moved to the top of the bin, and their
+	 * relative order is maintained.
+	 */
+	tcache_gc_small_bin_shuffle(cache_bin, nremote, addr_min, addr_max);
+
+label_flush:
+	assert(nflush > 0 && nflush <= ncached);
+	tcache_bin_flush_small(tsd, tcache, cache_bin, szind,
+	    (unsigned)(ncached - nflush));
 }
 
 static void
 tcache_gc_large(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
     szind_t szind) {
-	/* Like the small GC; flush 3/4 of untouched items. */
+	/*
+	 * Like the small GC, flush 3/4 of untouched items. However, simply flush
+	 * the bottom nflush items, without any locality check.
+	 */
 	assert(szind >= SC_NBINS);
 	cache_bin_t *cache_bin = &tcache->bins[szind];
 	assert(!tcache_bin_disabled(szind, cache_bin, tcache->tcache_slow));

From 0c88be9e0a09fc868ac05ace96466bdc6f502ab8 Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Thu, 1 Aug 2024 14:01:32 -0700
Subject: [PATCH 2422/2608] Regulate GC frequency by requiring a time interval
 between two consecutive GCs

---
 include/jemalloc/internal/tcache_structs.h |   4 +
 include/jemalloc/internal/tcache_types.h   |   3 +
 src/tcache.c                               | 191 ++++++++++++++-------
 3 files changed, 138 insertions(+), 60 deletions(-)

diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index d94099b0..63e5db5d 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -33,8 +33,12 @@ struct tcache_slow_s {
 	arena_t		*arena;
 	/* The number of bins activated in the tcache. */
 	unsigned	tcache_nbins;
+	/* Last time GC has been performed.  */
+	nstime_t	last_gc_time;
 	/* Next bin to GC. */
 	szind_t		next_gc_bin;
+	szind_t		next_gc_bin_small;
+	szind_t		next_gc_bin_large;
 	/* For small bins, fill (ncached_max >> lg_fill_div). */
 	uint8_t		lg_fill_div[SC_NBINS];
 	/* For small bins, whether has been refilled since last GC. */
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index 27516387..eebad79f 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -24,5 +24,8 @@ typedef struct tcaches_s tcaches_t;
 #define TCACHE_NBINS_MAX (SC_NBINS + SC_NGROUP *			\
     (TCACHE_LG_MAXCLASS_LIMIT - SC_LG_LARGE_MINCLASS) + 1)
 #define TCACHE_GC_NEIGHBOR_LIMIT ((uintptr_t)1 << 21) /* 2M */
+#define TCACHE_GC_INTERVAL_NS ((uint64_t)10 * KQU(1000000)) /* 10ms */
+#define TCACHE_GC_SMALL_NBINS_MAX ((SC_NBINS > 8) ? (SC_NBINS >> 3) : 1)
+#define TCACHE_GC_LARGE_NBINS_MAX 1
 
 #endif /* JEMALLOC_INTERNAL_TCACHE_TYPES_H */
diff --git a/src/tcache.c b/src/tcache.c
index 35f18077..3d38700d 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -283,7 +283,7 @@ tcache_gc_small_bin_shuffle(cache_bin_t *cache_bin, cache_bin_sz_t nremote,
 	}
 }
 
-static void
+static bool
 tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
     szind_t szind) {
 	/*
@@ -296,35 +296,49 @@ tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	assert(!tcache_bin_disabled(szind, cache_bin, tcache->tcache_slow));
 	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin);
 	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin);
+	if (low_water > 0) {
+		/*
+		 * Reduce fill count by 2X.  Limit lg_fill_div such that
+		 * the fill count is always at least 1.
+		 */
+		if ((cache_bin_ncached_max_get(cache_bin) >>
+		    tcache_slow->lg_fill_div[szind]) > 1) {
+			tcache_slow->lg_fill_div[szind]++;
+		}
+	} else if (tcache_slow->bin_refilled[szind]) {
+		/*
+		 * Increase fill count by 2X for small bins.  Make sure
+		 * lg_fill_div stays greater than 0.
+		 */
+		if (tcache_slow->lg_fill_div[szind] > 1) {
+			tcache_slow->lg_fill_div[szind]--;
+		}
+		tcache_slow->bin_refilled[szind] = false;
+	}
 	assert(!tcache_slow->bin_refilled[szind]);
 
-	size_t nflush = low_water - (low_water >> 2);
-	if (nflush < tcache_slow->bin_flush_delay_items[szind]) {
-		/* Workaround for a conversion warning. */
-		uint8_t nflush_uint8 = (uint8_t)nflush;
-		assert(sizeof(tcache_slow->bin_flush_delay_items[0]) ==
-		    sizeof(nflush_uint8));
-		tcache_slow->bin_flush_delay_items[szind] -= nflush_uint8;
-		return;
+	cache_bin_sz_t nflush = low_water - (low_water >> 2);
+	/*
+	 * When the new tcache gc is not enabled, keep the flush delay logic,
+	 * and directly flush the bottom nflush items if needed.
+	 */
+	if (!opt_experimental_tcache_gc) {
+		if (nflush < tcache_slow->bin_flush_delay_items[szind]) {
+			/* Workaround for a conversion warning. */
+			uint8_t nflush_uint8 = (uint8_t)nflush;
+			assert(sizeof(tcache_slow->bin_flush_delay_items[0]) ==
+			    sizeof(nflush_uint8));
+			tcache_slow->bin_flush_delay_items[szind] -= nflush_uint8;
+			return false;
+		}
+
+		tcache_slow->bin_flush_delay_items[szind]
+		    = tcache_gc_item_delay_compute(szind);
+		goto label_flush;
 	}
 
-	tcache_slow->bin_flush_delay_items[szind]
-	    = tcache_gc_item_delay_compute(szind);
-
-	/*
-	 * Reduce fill count by 2X.  Limit lg_fill_div such that
-	 * the fill count is always at least 1.
-	 */
-	if ((cache_bin_ncached_max_get(cache_bin) >>
-	     tcache_slow->lg_fill_div[szind]) > 1) {
-		tcache_slow->lg_fill_div[szind]++;
-	}
-
-	/*
-	 * When the new tcache gc is not enabled, or simply the entire bin needs
-	 * to be flushed, flush the bottom nflush items directly.
-	 */
-	if (!opt_experimental_tcache_gc || nflush == ncached) {
+	/* Directly goto the flush path when the entire bin needs to be flushed. */
+	if ( nflush == ncached) {
 		goto label_flush;
 	}
 
@@ -371,12 +385,17 @@ tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	tcache_gc_small_bin_shuffle(cache_bin, nremote, addr_min, addr_max);
 
 label_flush:
-	assert(nflush > 0 && nflush <= ncached);
+	if (nflush == 0) {
+		assert(low_water == 0);
+		return false;
+	}
+	assert(nflush <= ncached);
 	tcache_bin_flush_small(tsd, tcache, cache_bin, szind,
 	    (unsigned)(ncached - nflush));
+	return true;
 }
 
-static void
+static bool
 tcache_gc_large(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
     szind_t szind) {
 	/*
@@ -386,10 +405,32 @@ tcache_gc_large(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	assert(szind >= SC_NBINS);
 	cache_bin_t *cache_bin = &tcache->bins[szind];
 	assert(!tcache_bin_disabled(szind, cache_bin, tcache->tcache_slow));
-	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin);
 	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin);
-	tcache_bin_flush_large(tsd, tcache, cache_bin, szind,
-	    (unsigned)(ncached - low_water + (low_water >> 2)));
+	if (low_water == 0) {
+		return false;
+	}
+	unsigned nrem = (unsigned)(cache_bin_ncached_get_local(cache_bin) -
+	    low_water + (low_water >> 2));
+	tcache_bin_flush_large(tsd, tcache, cache_bin, szind, nrem);
+	return true;
+}
+
+/* Try to gc one bin by szind, return true if there is item flushed. */
+static bool
+tcache_try_gc_bin(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
+    szind_t szind) {
+	assert(tcache != NULL);
+	cache_bin_t *cache_bin = &tcache->bins[szind];
+	if (tcache_bin_disabled(szind, cache_bin, tcache_slow)) {
+		return false;
+	}
+
+	bool is_small = (szind < SC_NBINS);
+	tcache_bin_flush_stashed(tsd, tcache, cache_bin, szind, is_small);
+	bool ret = is_small ? tcache_gc_small(tsd, tcache_slow, tcache, szind) :
+	    tcache_gc_large(tsd, tcache_slow, tcache, szind);
+	cache_bin_low_water_set(cache_bin);
+	return ret;
 }
 
 static void
@@ -400,40 +441,67 @@ tcache_event(tsd_t *tsd) {
 	}
 
 	tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd);
-	szind_t szind = tcache_slow->next_gc_bin;
-	bool is_small = (szind < SC_NBINS);
-	cache_bin_t *cache_bin = &tcache->bins[szind];
+	assert(tcache_slow != NULL);
 
-	if (tcache_bin_disabled(szind, cache_bin, tcache_slow)) {
-		goto label_done;
-	}
-
-	tcache_bin_flush_stashed(tsd, tcache, cache_bin, szind, is_small);
-	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin);
-	if (low_water > 0) {
-		if (is_small) {
-			tcache_gc_small(tsd, tcache_slow, tcache, szind);
-		} else {
-			tcache_gc_large(tsd, tcache_slow, tcache, szind);
+	/* When the new tcache gc is not enabled, GC one bin at a time. */
+	if (!opt_experimental_tcache_gc) {
+		szind_t szind = tcache_slow->next_gc_bin;
+		tcache_try_gc_bin(tsd, tcache_slow, tcache, szind);
+		tcache_slow->next_gc_bin++;
+		if (tcache_slow->next_gc_bin == tcache_nbins_get(tcache_slow)) {
+			tcache_slow->next_gc_bin = 0;
 		}
-	} else if (is_small && tcache_slow->bin_refilled[szind]) {
-		assert(low_water == 0);
-		/*
-		 * Increase fill count by 2X for small bins.  Make sure
-		 * lg_fill_div stays greater than 0.
-		 */
-		if (tcache_slow->lg_fill_div[szind] > 1) {
-			tcache_slow->lg_fill_div[szind]--;
-		}
-		tcache_slow->bin_refilled[szind] = false;
+		return;
 	}
-	cache_bin_low_water_set(cache_bin);
 
-label_done:
-	tcache_slow->next_gc_bin++;
-	if (tcache_slow->next_gc_bin == tcache_nbins_get(tcache_slow)) {
-		tcache_slow->next_gc_bin = 0;
+	nstime_t now;
+	nstime_copy(&now, &tcache_slow->last_gc_time);
+	nstime_update(&now);
+	assert(nstime_compare(&now, &tcache_slow->last_gc_time) >= 0);
+
+	if (nstime_ns(&now) - nstime_ns(&tcache_slow->last_gc_time) <
+	    TCACHE_GC_INTERVAL_NS) {
+		// time interval is too short, skip this event.
+		return;
 	}
+	/* Update last_gc_time to now. */
+	nstime_copy(&tcache_slow->last_gc_time, &now);
+
+	unsigned gc_small_nbins = 0, gc_large_nbins = 0;
+	unsigned tcache_nbins = tcache_nbins_get(tcache_slow);
+	unsigned small_nbins = tcache_nbins > SC_NBINS ? SC_NBINS : tcache_nbins;
+	szind_t szind_small = tcache_slow->next_gc_bin_small;
+	szind_t szind_large = tcache_slow->next_gc_bin_large;
+
+	/* Flush at most TCACHE_GC_SMALL_NBINS_MAX small bins at a time. */
+	for (unsigned i = 0; i < small_nbins && gc_small_nbins <
+	    TCACHE_GC_SMALL_NBINS_MAX; i++) {
+		assert(szind_small < SC_NBINS);
+		if (tcache_try_gc_bin(tsd, tcache_slow, tcache, szind_small)) {
+			gc_small_nbins++;
+		}
+		if (++szind_small == small_nbins) {
+			szind_small = 0;
+		}
+	}
+	tcache_slow->next_gc_bin_small = szind_small;
+
+	if (tcache_nbins <= SC_NBINS) {
+		return;
+	}
+
+	/* Flush at most TCACHE_GC_LARGE_NBINS_MAX large bins at a time. */
+	for (unsigned i = SC_NBINS; i < tcache_nbins && gc_large_nbins <
+	    TCACHE_GC_LARGE_NBINS_MAX; i++) {
+		assert(szind_large >= SC_NBINS && szind_large < tcache_nbins);
+		if (tcache_try_gc_bin(tsd, tcache_slow, tcache, szind_large)) {
+			gc_large_nbins++;
+		}
+		if (++szind_large == tcache_nbins) {
+			szind_large = SC_NBINS;
+		}
+	}
+	tcache_slow->next_gc_bin_large = szind_large;
 }
 
 void
@@ -1146,7 +1214,10 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	tcache_slow->tcache = tcache;
 
 	memset(&tcache_slow->link, 0, sizeof(ql_elm(tcache_t)));
+	nstime_init_zero(&tcache_slow->last_gc_time);
 	tcache_slow->next_gc_bin = 0;
+	tcache_slow->next_gc_bin_small = 0;
+	tcache_slow->next_gc_bin_large = SC_NBINS;
 	tcache_slow->arena = NULL;
 	tcache_slow->dyn_alloc = mem;
 

From 7c996861656f67dc74ab66f1bc6e758ed96c69b3 Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Thu, 22 Aug 2024 14:50:08 -0700
Subject: [PATCH 2423/2608] Better handle burst allocation on
 tcache_alloc_small_hard

---
 include/jemalloc/internal/cache_bin.h      |  10 ++
 include/jemalloc/internal/tcache_structs.h |   4 +-
 src/tcache.c                               | 105 ++++++++++++++++++---
 3 files changed, 104 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index a7a5e40e..cb137af9 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -600,6 +600,16 @@ cache_bin_nitems_get_remote(cache_bin_t *bin, cache_bin_sz_t *ncached,
 	 */
 }
 
+/*
+ * For small bins, used to calculate how many items to fill at a time.
+ * The final nfill is calculated by (ncached_max >> (base - offset)).
+ */
+typedef struct cache_bin_fill_ctl_s cache_bin_fill_ctl_t;
+struct cache_bin_fill_ctl_s {
+	uint8_t base;
+	uint8_t offset;
+};
+
 /*
  * Limit how many items can be flushed in a batch (Which is the upper bound
  * for the nflush parameter in tcache_bin_flush_impl()).
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index 63e5db5d..e9a68152 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -39,8 +39,8 @@ struct tcache_slow_s {
 	szind_t		next_gc_bin;
 	szind_t		next_gc_bin_small;
 	szind_t		next_gc_bin_large;
-	/* For small bins, fill (ncached_max >> lg_fill_div). */
-	uint8_t		lg_fill_div[SC_NBINS];
+	/* For small bins, help determine how many items to fill at a time. */
+	cache_bin_fill_ctl_t	bin_fill_ctl_do_not_access_directly[SC_NBINS];
 	/* For small bins, whether has been refilled since last GC. */
 	bool		bin_refilled[SC_NBINS];
 	/*
diff --git a/src/tcache.c b/src/tcache.c
index 3d38700d..15da14da 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -121,6 +121,85 @@ tcache_gc_dalloc_postponed_event_wait(tsd_t *tsd) {
 	return TE_MIN_START_WAIT;
 }
 
+static inline void
+tcache_bin_fill_ctl_init(tcache_slow_t *tcache_slow, szind_t szind) {
+	assert(szind < SC_NBINS);
+	cache_bin_fill_ctl_t *ctl =
+	    &tcache_slow->bin_fill_ctl_do_not_access_directly[szind];
+	ctl->base = 1;
+	ctl->offset = 0;
+}
+
+static inline cache_bin_fill_ctl_t *
+tcache_bin_fill_ctl_get(tcache_slow_t *tcache_slow, szind_t szind) {
+	assert(szind < SC_NBINS);
+	cache_bin_fill_ctl_t *ctl =
+	    &tcache_slow->bin_fill_ctl_do_not_access_directly[szind];
+	assert(ctl->base > ctl->offset);
+	return ctl;
+}
+
+/*
+ * The number of items to be filled at a time for a given small bin is
+ * calculated by (ncached_max >> lg_fill_div).
+ * The actual ctl struct consists of two fields, i.e. base and offset,
+ * and the difference between the two(base - offset) is the final lg_fill_div.
+ * The base is adjusted during GC based on the traffic within a period of time,
+ * while the offset is updated in real time to handle the immediate traffic.
+ */
+static inline uint8_t
+tcache_nfill_small_lg_div_get(tcache_slow_t *tcache_slow, szind_t szind) {
+	cache_bin_fill_ctl_t *ctl = tcache_bin_fill_ctl_get(tcache_slow, szind);
+	return (ctl->base - (opt_experimental_tcache_gc ? ctl->offset : 0));
+}
+
+/*
+ * When we want to fill more items to respond to burst load,
+ * offset is increased so that (base - offset) is decreased,
+ * which in return increases the number of items to be filled.
+ */
+static inline void
+tcache_nfill_small_burst_prepare(tcache_slow_t *tcache_slow, szind_t szind) {
+	cache_bin_fill_ctl_t *ctl = tcache_bin_fill_ctl_get(tcache_slow, szind);
+	if (ctl->offset + 1 < ctl->base) {
+		ctl->offset++;
+	}
+}
+
+static inline void
+tcache_nfill_small_burst_reset(tcache_slow_t *tcache_slow, szind_t szind) {
+	cache_bin_fill_ctl_t *ctl = tcache_bin_fill_ctl_get(tcache_slow, szind);
+	ctl->offset = 0;
+}
+
+/*
+ * limit == 0: indicating that the fill count should be increased,
+ * i.e. lg_div(base) should be decreased.
+ *
+ * limit != 0: limit is set to ncached_max, indicating that the fill
+ * count should be decreased, i.e. lg_div(base) should be increased.
+ */
+static inline void
+tcache_nfill_small_gc_update(tcache_slow_t *tcache_slow, szind_t szind,
+    cache_bin_sz_t limit) {
+	cache_bin_fill_ctl_t *ctl = tcache_bin_fill_ctl_get(tcache_slow, szind);
+	if (!limit && ctl->base > 1) {
+		/*
+		 * Increase fill count by 2X for small bins.  Make sure
+		 * lg_fill_div stays greater than 1.
+		 */
+		ctl->base--;
+	} else if (limit && (limit >> ctl->base) > 1) {
+		/*
+		 * Reduce fill count by 2X.  Limit lg_fill_div such that
+		 * the fill count is always at least 1.
+		 */
+		ctl->base++;
+	}
+	/* Reset the offset for the next GC period. */
+	ctl->offset = 0;
+}
+
 static uint8_t
 tcache_gc_item_delay_compute(szind_t szind) {
 	assert(szind < SC_NBINS);
@@ -298,21 +377,19 @@ tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	cache_bin_sz_t low_water = cache_bin_low_water_get(cache_bin);
 	if (low_water > 0) {
 		/*
-		 * Reduce fill count by 2X.  Limit lg_fill_div such that
-		 * the fill count is always at least 1.
+		 * There is unused items within the GC period => reduce fill count.
+		 * limit field != 0 is borrowed to indicate that the fill count
+		 * should be reduced.
 		 */
-		if ((cache_bin_ncached_max_get(cache_bin) >>
-		    tcache_slow->lg_fill_div[szind]) > 1) {
-			tcache_slow->lg_fill_div[szind]++;
-		}
+		tcache_nfill_small_gc_update(tcache_slow, szind,
+		    /* limit */ cache_bin_ncached_max_get(cache_bin));
 	} else if (tcache_slow->bin_refilled[szind]) {
 		/*
-		 * Increase fill count by 2X for small bins.  Make sure
-		 * lg_fill_div stays greater than 0.
+		 * There has been refills within the GC period => increase fill count.
+		 * limit field set to 0 is borrowed to indicate that the fill count
+		 * should be increased.
 		 */
-		if (tcache_slow->lg_fill_div[szind] > 1) {
-			tcache_slow->lg_fill_div[szind]--;
-		}
+		tcache_nfill_small_gc_update(tcache_slow, szind, /* limit */ 0);
 		tcache_slow->bin_refilled[szind] = false;
 	}
 	assert(!tcache_slow->bin_refilled[szind]);
@@ -526,7 +603,7 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
 	assert(tcache_slow->arena != NULL);
 	assert(!tcache_bin_disabled(binind, cache_bin, tcache_slow));
 	cache_bin_sz_t nfill = cache_bin_ncached_max_get(cache_bin)
-	    >> tcache_slow->lg_fill_div[binind];
+	    >> tcache_nfill_small_lg_div_get(tcache_slow, binind);
 	if (nfill == 0) {
 		nfill = 1;
 	}
@@ -534,6 +611,7 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
 	    /* nfill_min */ opt_experimental_tcache_gc ?
 	    ((nfill >> 1) + 1) : nfill, /* nfill_max */ nfill);
 	tcache_slow->bin_refilled[binind] = true;
+	tcache_nfill_small_burst_prepare(tcache_slow, binind);
 	ret = cache_bin_alloc(cache_bin, tcache_success);
 
 	return ret;
@@ -1059,6 +1137,7 @@ tcache_bin_flush_bottom(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 void
 tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
     szind_t binind, unsigned rem) {
+	tcache_nfill_small_burst_reset(tcache->tcache_slow, binind);
 	tcache_bin_flush_bottom(tsd, tcache, cache_bin, binind, rem,
 	    /* small */ true);
 }
@@ -1233,7 +1312,7 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	    &cur_offset);
 	for (unsigned i = 0; i < tcache_nbins; i++) {
 		if (i < SC_NBINS) {
-			tcache_slow->lg_fill_div[i] = 1;
+			tcache_bin_fill_ctl_init(tcache_slow, i);
 			tcache_slow->bin_refilled[i] = false;
 			tcache_slow->bin_flush_delay_items[i]
 			    = tcache_gc_item_delay_compute(i);

From baa5a90cc6f77e86c2aa58257f3d6c67a1b863dc Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Wed, 28 Aug 2024 15:17:11 -0700
Subject: [PATCH 2424/2608] fix nstime_update_mock in arena_decay unit test

---
 test/unit/arena_decay.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/unit/arena_decay.c b/test/unit/arena_decay.c
index e991f4dd..10d1a6b1 100644
--- a/test/unit/arena_decay.c
+++ b/test/unit/arena_decay.c
@@ -18,7 +18,7 @@ nstime_monotonic_mock(void) {
 static void
 nstime_update_mock(nstime_t *time) {
 	nupdates_mock++;
-	if (monotonic_mock) {
+	if (monotonic_mock && nstime_compare(&time_mock, time) > 0) {
 		nstime_copy(time, &time_mock);
 	}
 }

From cd05b19f10fce353105dcc7290a8374a5c4f4a67 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 28 Mar 2024 14:43:17 -0700
Subject: [PATCH 2425/2608] Fix the VM over-reservation on aarch64 w/ larger
 pages.

HUGEPAGE could be larger on some platforms (e.g. 512M on aarch64 w/ 64K pages),
in which case it would cause grow_retained / exp_grow to over-reserve VMs.

Similarly, make sure the base alloc has a const 2M alignment.
---
 include/jemalloc/internal/base.h |  7 ++++++-
 src/base.c                       | 31 +++++++++++++++++++++++--------
 src/exp_grow.c                   |  8 +++++++-
 3 files changed, 36 insertions(+), 10 deletions(-)

diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h
index 86b0cf4a..c8004b25 100644
--- a/include/jemalloc/internal/base.h
+++ b/include/jemalloc/internal/base.h
@@ -6,6 +6,12 @@
 #include "jemalloc/internal/ehooks.h"
 #include "jemalloc/internal/mutex.h"
 
+/*
+ * Alignment when THP is not enabled.  Set to constant 2M in case the HUGEPAGE
+ * value is unexpected high (which would cause VM over-reservation).
+ */
+#define BASE_BLOCK_MIN_ALIGN ((size_t)2 << 20)
+
 enum metadata_thp_mode_e {
 	metadata_thp_disabled   = 0,
 	/*
@@ -26,7 +32,6 @@ typedef enum metadata_thp_mode_e metadata_thp_mode_t;
 extern metadata_thp_mode_t opt_metadata_thp;
 extern const char *const metadata_thp_mode_names[];
 
-
 /* Embedded at the beginning of every block of base-managed virtual memory. */
 typedef struct base_block_s base_block_t;
 struct base_block_s {
diff --git a/src/base.c b/src/base.c
index 1d5e8fcd..ac8598eb 100644
--- a/src/base.c
+++ b/src/base.c
@@ -42,9 +42,17 @@ base_map(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, size_t size) {
 	bool zero = true;
 	bool commit = true;
 
-	/* Use huge page sizes and alignment regardless of opt_metadata_thp. */
-	assert(size == HUGEPAGE_CEILING(size));
-	size_t alignment = HUGEPAGE;
+	/*
+	 * Use huge page sizes and alignment when opt_metadata_thp is enabled
+	 * or auto.
+	 */
+	size_t alignment;
+	if (opt_metadata_thp == metadata_thp_disabled) {
+		alignment = BASE_BLOCK_MIN_ALIGN;
+	} else {
+		assert(size == HUGEPAGE_CEILING(size));
+		alignment = HUGEPAGE;
+	}
 	if (ehooks_are_default(ehooks)) {
 		addr = extent_alloc_mmap(NULL, size, alignment, &zero, &commit);
 		if (have_madvise_huge && addr) {
@@ -277,6 +285,13 @@ base_extent_bump_alloc(tsdn_t *tsdn, base_t *base, edata_t *edata, size_t size,
 	return ret;
 }
 
+static size_t
+base_block_size_ceil(size_t block_size) {
+	return opt_metadata_thp == metadata_thp_disabled ?
+	    ALIGNMENT_CEILING(block_size, BASE_BLOCK_MIN_ALIGN) :
+	    HUGEPAGE_CEILING(block_size);
+}
+
 /*
  * Allocate a block of virtual memory that is large enough to start with a
  * base_block_t header, followed by an object of specified size and alignment.
@@ -295,14 +310,14 @@ base_block_alloc(tsdn_t *tsdn, base_t *base, ehooks_t *ehooks, unsigned ind,
 	 * Create increasingly larger blocks in order to limit the total number
 	 * of disjoint virtual memory ranges.  Choose the next size in the page
 	 * size class series (skipping size classes that are not a multiple of
-	 * HUGEPAGE), or a size large enough to satisfy the requested size and
-	 * alignment, whichever is larger.
+	 * HUGEPAGE when using metadata_thp), or a size large enough to satisfy
+	 * the requested size and alignment, whichever is larger.
 	 */
-	size_t min_block_size = HUGEPAGE_CEILING(sz_psz2u(header_size + gap_size
-	    + usize));
+	size_t min_block_size = base_block_size_ceil(sz_psz2u(header_size +
+	    gap_size + usize));
 	pszind_t pind_next = (*pind_last + 1 < sz_psz2ind(SC_LARGE_MAXCLASS)) ?
 	    *pind_last + 1 : *pind_last;
-	size_t next_block_size = HUGEPAGE_CEILING(sz_pind2sz(pind_next));
+	size_t next_block_size = base_block_size_ceil(sz_pind2sz(pind_next));
 	size_t block_size = (min_block_size > next_block_size) ? min_block_size
 	    : next_block_size;
 	base_block_t *block = (base_block_t *)base_map(tsdn, ehooks, ind,
diff --git a/src/exp_grow.c b/src/exp_grow.c
index 386471f4..955823a1 100644
--- a/src/exp_grow.c
+++ b/src/exp_grow.c
@@ -3,6 +3,12 @@
 
 void
 exp_grow_init(exp_grow_t *exp_grow) {
-	exp_grow->next = sz_psz2ind(HUGEPAGE);
+	/*
+	 * Enforce a minimal of 2M grow, which is convenient for the huge page
+	 * use cases.  Avoid using HUGEPAGE as the value though, because on some
+	 * platforms it can be very large (e.g. 512M on aarch64 w/ 64K pages).
+	 */
+	const size_t min_grow = (size_t)2 << 20;
+	exp_grow->next = sz_psz2ind(min_grow);
 	exp_grow->limit = sz_psz2ind(SC_LARGE_MAXCLASS);
 }

From 3383b98f1b9a2e60ec0bda2fcf463ba271926596 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 28 Mar 2024 16:04:56 -0700
Subject: [PATCH 2426/2608] Check if the huge page size is expected when
 enabling HPA.

---
 include/jemalloc/internal/pages.h |  8 ++++++++
 src/jemalloc.c                    | 28 +++++++++++++++++++++-------
 2 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index b4e9678e..6c295b43 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -27,6 +27,14 @@ extern size_t	os_page;
 #define HUGEPAGE	((size_t)(1U << LG_HUGEPAGE))
 #define HUGEPAGE_MASK	((size_t)(HUGEPAGE - 1))
 
+/*
+ * Used to validate that the hugepage size is not unexpectedly high.  The huge
+ * page features (HPA, metadata_thp) are primarily designed with a 2M THP size
+ * in mind.  Much larger sizes are not tested and likely to cause issues such as
+ * bad fragmentation or simply broken.
+ */
+#define HUGEPAGE_MAX_EXPECTED_SIZE ((size_t)(16U << 20))
+
 #if LG_HUGEPAGE != 0
 #  define HUGEPAGE_PAGES (HUGEPAGE / PAGE)
 #else
diff --git a/src/jemalloc.c b/src/jemalloc.c
index dc471563..63f6b302 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1041,18 +1041,14 @@ obtain_malloc_conf(unsigned which_source, char readlink_buf[PATH_MAX + 1]) {
 	return ret;
 }
 
-static void
-validate_hpa_settings(void) {
-	if (!hpa_supported() || !opt_hpa || opt_hpa_opts.dirty_mult == (fxp_t)-1) {
-		return;
-	}
+static bool
+validate_hpa_ratios(void) {
 	size_t hpa_threshold = fxp_mul_frac(HUGEPAGE, opt_hpa_opts.dirty_mult) +
 	    opt_hpa_opts.hugification_threshold;
 	if (hpa_threshold > HUGEPAGE) {
-		return;
+		return false;
 	}
 
-	had_conf_error = true;
 	char hpa_dirty_mult[FXP_BUF_SIZE];
 	char hugification_threshold[FXP_BUF_SIZE];
 	char normalization_message[256] = {0};
@@ -1079,6 +1075,24 @@ validate_hpa_settings(void) {
 	    "hpa_hugification_threshold_ratio: %s and hpa_dirty_mult: %s. "
 	    "These values should sum to > 1.0.\n%s", hugification_threshold,
 	    hpa_dirty_mult, normalization_message);
+
+	return true;
+}
+
+static void
+validate_hpa_settings(void) {
+	if (!hpa_supported() || !opt_hpa) {
+		return;
+	}
+	if (HUGEPAGE > HUGEPAGE_MAX_EXPECTED_SIZE) {
+		had_conf_error = true;
+		malloc_printf(
+		    "<jemalloc>: huge page size (%zu) greater than expected."
+		    "May not be supported or behave as expected.", HUGEPAGE);
+	}
+	if (opt_hpa_opts.dirty_mult != (fxp_t)-1 && validate_hpa_ratios()) {
+		had_conf_error = true;
+	}
 }
 
 static void

From c1a3ca3755f2adae078b14925e874a6ff743aba1 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 3 May 2024 12:04:09 -0700
Subject: [PATCH 2427/2608] Adjust the value width in stats output.

Some of the values are accumulative and can reach high after running for long
periods.
---
 src/stats.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index f45b7447..ef025eb3 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -324,12 +324,12 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 
 	COL_HDR(row, size, NULL, right, 20, size)
 	COL_HDR(row, ind, NULL, right, 4, unsigned)
-	COL_HDR(row, allocated, NULL, right, 13, size)
-	COL_HDR(row, nmalloc, NULL, right, 13, uint64)
+	COL_HDR(row, allocated, NULL, right, 14, size)
+	COL_HDR(row, nmalloc, NULL, right, 14, uint64)
 	COL_HDR(row, nmalloc_ps, "(#/sec)", right, 8, uint64)
-	COL_HDR(row, ndalloc, NULL, right, 13, uint64)
+	COL_HDR(row, ndalloc, NULL, right, 14, uint64)
 	COL_HDR(row, ndalloc_ps, "(#/sec)", right, 8, uint64)
-	COL_HDR(row, nrequests, NULL, right, 13, uint64)
+	COL_HDR(row, nrequests, NULL, right, 15, uint64)
 	COL_HDR(row, nrequests_ps, "(#/sec)", right, 10, uint64)
 	COL_HDR_DECLARE(prof_live_requested);
 	COL_HDR_DECLARE(prof_live_count);

From 323ed2e3a8c88c7db89b4119b10192af4303d29c Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 11 Sep 2024 15:08:24 -0700
Subject: [PATCH 2428/2608] Optimize fast path to allow static size class
 computation.

After inlining at LTO time, many callsites have input size known which means the
index and usable size can be translated at compile time.  However the size-index
lookup table prevents it -- this commit solves that by switching to the compute
approach when the size is detected to be a known const.
---
 .../internal/jemalloc_internal_inlines_c.h    |  5 +--
 include/jemalloc/internal/sz.h                | 33 +++++++++++++++----
 include/jemalloc/internal/util.h              | 10 ++++++
 3 files changed, 40 insertions(+), 8 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 6dcffac9..432ec17c 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -496,6 +496,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
             *tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd) == 0);
 
         emap_alloc_ctx_t alloc_ctx;
+	size_t usize;
         if (!size_hint) {
                 bool err = emap_alloc_ctx_try_lookup_fast(tsd,
                     &arena_emap_global, ptr, &alloc_ctx);
@@ -507,6 +508,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
                         return false;
                 }
                 assert(alloc_ctx.szind != SC_NSIZES);
+		usize = sz_index2size(alloc_ctx.szind);
         } else {
                 /*
                  * Check for both sizes that are too large, and for sampled /
@@ -518,7 +520,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
                     /* check_prof */ true))) {
                         return false;
                 }
-                alloc_ctx.szind = sz_size2index_lookup(size);
+		sz_size2index_usize_fastpath(size, &alloc_ctx.szind, &usize);
                 /* Max lookup class must be small. */
                 assert(alloc_ctx.szind < SC_NBINS);
                 /* This is a dead store, except when opt size checking is on. */
@@ -534,7 +536,6 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
         uint64_t deallocated, threshold;
         te_free_fastpath_ctx(tsd, &deallocated, &threshold);
 
-        size_t usize = sz_index2size(alloc_ctx.szind);
         uint64_t deallocated_after = deallocated + usize;
         /*
          * Check for events and tsd non-nominal (fast_threshold will be set to
diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index 955d8ec0..a2d2debc 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -152,8 +152,8 @@ sz_psz2u(size_t psz) {
 	return usize;
 }
 
-static inline szind_t
-sz_size2index_compute(size_t size) {
+JEMALLOC_ALWAYS_INLINE szind_t
+sz_size2index_compute_inline(size_t size) {
 	if (unlikely(size > SC_LARGE_MAXCLASS)) {
 		return SC_NSIZES;
 	}
@@ -186,6 +186,11 @@ sz_size2index_compute(size_t size) {
 	}
 }
 
+static inline szind_t
+sz_size2index_compute(size_t size) {
+	return sz_size2index_compute_inline(size);
+}
+
 JEMALLOC_ALWAYS_INLINE szind_t
 sz_size2index_lookup_impl(size_t size) {
 	assert(size <= SC_LOOKUP_MAXCLASS);
@@ -208,8 +213,8 @@ sz_size2index(size_t size) {
 	return sz_size2index_compute(size);
 }
 
-static inline size_t
-sz_index2size_compute(szind_t index) {
+JEMALLOC_ALWAYS_INLINE size_t
+sz_index2size_compute_inline(szind_t index) {
 #if (SC_NTINY > 0)
 	if (index < SC_NTINY) {
 		return (ZU(1) << (SC_LG_TINY_MAXCLASS - SC_NTINY + 1 + index));
@@ -234,6 +239,11 @@ sz_index2size_compute(szind_t index) {
 	}
 }
 
+static inline size_t
+sz_index2size_compute(szind_t index) {
+	return sz_index2size_compute_inline(index);
+}
+
 JEMALLOC_ALWAYS_INLINE size_t
 sz_index2size_lookup_impl(szind_t index) {
 	return sz_index2size_tab[index];
@@ -254,8 +264,19 @@ sz_index2size(szind_t index) {
 
 JEMALLOC_ALWAYS_INLINE void
 sz_size2index_usize_fastpath(size_t size, szind_t *ind, size_t *usize) {
-	*ind = sz_size2index_lookup_impl(size);
-	*usize = sz_index2size_lookup_impl(*ind);
+	if (util_compile_time_const(size)) {
+		/*
+		 * When inlined, the size may become known at compile
+		 * time, which allows static computation through LTO.
+		 */
+		*ind = sz_size2index_compute_inline(size);
+		assert(*ind == sz_size2index_lookup_impl(size));
+		*usize = sz_index2size_compute_inline(*ind);
+		assert(*usize == sz_index2size_lookup_impl(*ind));
+	} else {
+		*ind = sz_size2index_lookup_impl(size);
+		*usize = sz_index2size_lookup_impl(*ind);
+	}
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index f4035095..24f23629 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -79,6 +79,16 @@ get_errno(void) {
 	} while(0)
 #endif
 
+/* Allows compiler constant folding on inlined paths. */
+#if defined(__has_builtin)
+#  if __has_builtin(__builtin_constant_p)
+#    define util_compile_time_const(x) __builtin_constant_p(x)
+#  endif
+#endif
+#ifndef util_compile_time_const
+#  define util_compile_time_const(x) (false)
+#endif
+
 /* ptr should be valid. */
 JEMALLOC_ALWAYS_INLINE void
 util_prefetch_read(void *ptr) {

From 60f472f367121d7d4933d0237ff38276f565fc88 Mon Sep 17 00:00:00 2001
From: Nathan Slingerland <slinger@meta.com>
Date: Thu, 12 Sep 2024 09:03:30 -0700
Subject: [PATCH 2429/2608] Fix initialization of pop_attempt_results in
 bin_batching test

---
 test/unit/bin_batching.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/test/unit/bin_batching.c b/test/unit/bin_batching.c
index 19975341..a20062fd 100644
--- a/test/unit/bin_batching.c
+++ b/test/unit/bin_batching.c
@@ -118,7 +118,8 @@ stress_run(void (*main_thread_fn)(), int nruns) {
 	bin_batching_test_after_unlock_hook = &increment_slab_dalloc_count;
 
 	atomic_store_zu(&push_failure_count, 0, ATOMIC_RELAXED);
-	atomic_store_zu(&pop_attempt_results[2], 0, ATOMIC_RELAXED);
+	atomic_store_zu(&pop_attempt_results[0], 0, ATOMIC_RELAXED);
+	atomic_store_zu(&pop_attempt_results[1], 0, ATOMIC_RELAXED);
 	atomic_store_zu(&dalloc_zero_slab_count, 0, ATOMIC_RELAXED);
 	atomic_store_zu(&dalloc_nonzero_slab_count, 0, ATOMIC_RELAXED);
 	atomic_store_zu(&dalloc_nonempty_list_count, 0, ATOMIC_RELAXED);

From 8c2e15d1a5749e50a1f61e216bb5fefc0d71d9b0 Mon Sep 17 00:00:00 2001
From: Nathan Slingerland <slinger@fb.com>
Date: Tue, 10 Sep 2024 13:29:35 -0700
Subject: [PATCH 2430/2608] Add malloc_open() / malloc_close() reentrancy safe
 helpers

---
 include/jemalloc/internal/malloc_io.h | 21 +++++++++
 src/pages.c                           | 61 +++++----------------------
 2 files changed, 32 insertions(+), 50 deletions(-)

diff --git a/include/jemalloc/internal/malloc_io.h b/include/jemalloc/internal/malloc_io.h
index 91e7b2ba..bb43fb5c 100644
--- a/include/jemalloc/internal/malloc_io.h
+++ b/include/jemalloc/internal/malloc_io.h
@@ -134,4 +134,25 @@ malloc_read_fd(int fd, void *buf, size_t count) {
 	return bytes_read;
 }
 
+static inline int malloc_open(const char *path, int flags) {
+	int fd;
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
+	fd = (int)syscall(SYS_open, path, flags);
+#elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat)
+	fd = (int)syscall(SYS_openat, AT_FDCWD, path, flags);
+#else
+	fd = open(path, flags);
+#endif
+	return fd;
+}
+
+static inline int malloc_close(int fd) {
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
+	return (int)syscall(SYS_close, fd);
+#else
+	return close(fd);
+#endif
+}
+
+
 #endif /* JEMALLOC_INTERNAL_MALLOC_IO_H */
diff --git a/src/pages.c b/src/pages.c
index 981aae9b..5b55a046 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -651,36 +651,13 @@ os_overcommits_proc(void) {
 	int fd;
 	char buf[1];
 
-#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
-	#if defined(O_CLOEXEC)
-		fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY |
-			O_CLOEXEC);
-	#else
-		fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY);
-		if (fd != -1) {
-			fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
-		}
-	#endif
-#elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat)
-	#if defined(O_CLOEXEC)
-		fd = (int)syscall(SYS_openat,
-			AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
-	#else
-		fd = (int)syscall(SYS_openat,
-			AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY);
-		if (fd != -1) {
-			fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
-		}
-	#endif
+#if defined(O_CLOEXEC)
+	fd = malloc_open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
 #else
-	#if defined(O_CLOEXEC)
-		fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
-	#else
-		fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
-		if (fd != -1) {
-			fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
-		}
-	#endif
+	fd = malloc_open("/proc/sys/vm/overcommit_memory", O_RDONLY);
+	if (fd != -1) {
+		fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
+	}
 #endif
 
 	if (fd == -1) {
@@ -688,11 +665,7 @@ os_overcommits_proc(void) {
 	}
 
 	ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf));
-#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
-	syscall(SYS_close, fd);
-#else
-	close(fd);
-#endif
+	malloc_close(fd);
 
 	if (nread < 1) {
 		return false; /* Error. */
@@ -741,29 +714,17 @@ init_thp_state(void) {
 	static const char sys_state_never[] = "always madvise [never]\n";
 	char buf[sizeof(sys_state_madvise)];
 
-#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
-	int fd = (int)syscall(SYS_open,
+	int fd = malloc_open(
 	    "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
-#elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat)
-	int fd = (int)syscall(SYS_openat,
-		    AT_FDCWD, "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
-#else
-	int fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
-#endif
 	if (fd == -1) {
 		goto label_error;
 	}
 
 	ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf));
-#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
-	syscall(SYS_close, fd);
-#else
-	close(fd);
-#endif
-
-        if (nread < 0) {
+	malloc_close(fd);
+	if (nread < 0) {
 		goto label_error;
-        }
+	}
 
 	if (strncmp(buf, sys_state_madvise, (size_t)nread) == 0) {
 		init_system_thp_mode = thp_mode_default;

From db4f0e71820017039f09e5acc04b554826e304fd Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@meta.com>
Date: Tue, 27 Aug 2024 14:26:57 -0700
Subject: [PATCH 2431/2608] Add travis tests for arm64.

---
 .travis.yml           | 36 ++++++++++++++++++++++++++++++++++++
 scripts/gen_travis.py |  4 ++++
 2 files changed, 40 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index 20605fc0..aad7eea7 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -271,6 +271,42 @@ jobs:
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: arm64
+      env: CC=gcc CXX=g++ EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: arm64
+      env: CC=clang CXX=clang++ EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+    - os: linux
+      arch: arm64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: arm64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: arm64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: arm64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: arm64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: arm64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: arm64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: arm64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: arm64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: arm64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
       arch: amd64
       env: CC=gcc CXX=g++ EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index d2fd25e3..ae0b9e2e 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -246,6 +246,9 @@ def generate_linux(arch):
     if arch == PPC64LE:
         # Avoid 32 bit builds and clang on PowerPC
         exclude = (CROSS_COMPILE_32BIT, CLANG,)
+    if arch == ARM64:
+        # Avoid 32 bit build on ARM64
+        exclude = (CROSS_COMPILE_32BIT,)
 
     return generate_jobs(os, arch, exclude, max_unusual_opts)
 
@@ -318,6 +321,7 @@ def main():
         generate_linux(AMD64),
         # PPC tests on travis has been down for a while, disable it for now.
         # generate_linux(PPC64LE),
+        generate_linux(ARM64),
 
         generate_macos(AMD64),
 

From 661fb1e6722e9b29e76520182086edcb835077e3 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 19 Sep 2024 23:48:55 -0700
Subject: [PATCH 2432/2608] Fix the locked flag for malloc_mutex_trylock().

---
 include/jemalloc/internal/mutex.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index 75abf298..9f72cb18 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -36,6 +36,8 @@ struct malloc_mutex_s {
 			 * Hint flag to avoid exclusive cache line contention
 			 * during spin waiting.  Placed along with prof_data
 			 * since it's always modified even with no contention.
+			 * Modified by the lock owner only (after acquired, and
+			 * before release), and may be read by other threads.
 			 */
 			atomic_b_t		locked;
 #ifdef _WIN32
@@ -156,7 +158,12 @@ malloc_mutex_lock_final(malloc_mutex_t *mutex) {
 
 static inline bool
 malloc_mutex_trylock_final(malloc_mutex_t *mutex) {
-	return MALLOC_MUTEX_TRYLOCK(mutex);
+	bool failed = MALLOC_MUTEX_TRYLOCK(mutex);
+	if (!failed) {
+		atomic_store_b(&mutex->locked, true, ATOMIC_RELAXED);
+	}
+
+	return failed;
 }
 
 static inline void
@@ -216,7 +223,6 @@ malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 	if (isthreaded) {
 		if (malloc_mutex_trylock_final(mutex)) {
 			malloc_mutex_lock_slow(mutex);
-			atomic_store_b(&mutex->locked, true, ATOMIC_RELAXED);
 		}
 		mutex_owner_stats_update(tsdn, mutex);
 	}

From 3eb7a4b53dfeae537fd78cece51342a1f12d86dc Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 20 Sep 2024 15:07:27 -0700
Subject: [PATCH 2433/2608] Fix mutex state tracking around
 pthread_cond_wait().

pthread_cond_wait drops and re-acquires the mutex internally, w/o
going through our wrapper.  Update the locked state explicitly.
---
 src/background_thread.c | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/src/background_thread.c b/src/background_thread.c
index c92fa2bc..a5f4dbf7 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -153,6 +153,26 @@ set_current_thread_affinity(int cpu) {
 /* Minimal sleep interval 100 ms. */
 #define BACKGROUND_THREAD_MIN_INTERVAL_NS (BILLION / 10)
 
+static int
+background_thread_cond_wait(background_thread_info_t *info,
+    struct timespec *ts) {
+	int ret;
+
+	/*
+	 * pthread_cond_wait drops and re-acquires the mutex internally, w/o
+	 * going through our wrapper.  Update the locked state explicitly.
+	 */
+	atomic_store_b(&info->mtx.locked, false, ATOMIC_RELAXED);
+	if (ts == NULL) {
+		ret = pthread_cond_wait(&info->cond, &info->mtx.lock);
+	} else {
+		ret = pthread_cond_timedwait(&info->cond, &info->mtx.lock, ts);
+	}
+	atomic_store_b(&info->mtx.locked, true, ATOMIC_RELAXED);
+
+	return ret;
+}
+
 static void
 background_thread_sleep(tsdn_t *tsdn, background_thread_info_t *info,
     uint64_t interval) {
@@ -171,7 +191,7 @@ background_thread_sleep(tsdn_t *tsdn, background_thread_info_t *info,
 	if (interval == BACKGROUND_THREAD_INDEFINITE_SLEEP) {
 		background_thread_wakeup_time_set(tsdn, info,
 		    BACKGROUND_THREAD_INDEFINITE_SLEEP);
-		ret = pthread_cond_wait(&info->cond, &info->mtx.lock);
+		ret = background_thread_cond_wait(info, NULL);
 		assert(ret == 0);
 	} else {
 		assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS &&
@@ -193,7 +213,7 @@ background_thread_sleep(tsdn_t *tsdn, background_thread_info_t *info,
 		ts.tv_nsec = (size_t)nstime_nsec(&ts_wakeup);
 
 		assert(!background_thread_indefinite_sleep(info));
-		ret = pthread_cond_timedwait(&info->cond, &info->mtx.lock, &ts);
+		ret = background_thread_cond_wait(info, &ts);
 		assert(ret == ETIMEDOUT || ret == 0);
 	}
 	if (config_stats) {

From 1960536b61ba2c1d287cf7866fae02aea3f4e3b0 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 19 Sep 2024 23:28:23 -0700
Subject: [PATCH 2434/2608] Add malloc_mutex_is_locked() sanity checks.

---
 include/jemalloc/internal/mutex.h | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index 9f72cb18..ceb07b85 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -178,6 +178,12 @@ mutex_owner_stats_update(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 	}
 }
 
+static inline bool
+malloc_mutex_is_locked(malloc_mutex_t *mutex) {
+	/* Used for sanity checking only. */
+	return atomic_load_b(&mutex->locked, ATOMIC_RELAXED);
+}
+
 /* Trylock: return false if the lock is successfully acquired. */
 static inline bool
 malloc_mutex_trylock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
@@ -186,6 +192,7 @@ malloc_mutex_trylock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 		if (malloc_mutex_trylock_final(mutex)) {
 			return true;
 		}
+		assert(malloc_mutex_is_locked(mutex));
 		mutex_owner_stats_update(tsdn, mutex);
 	}
 	witness_lock(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
@@ -224,6 +231,7 @@ malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 		if (malloc_mutex_trylock_final(mutex)) {
 			malloc_mutex_lock_slow(mutex);
 		}
+		assert(malloc_mutex_is_locked(mutex));
 		mutex_owner_stats_update(tsdn, mutex);
 	}
 	witness_lock(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
@@ -231,9 +239,10 @@ malloc_mutex_lock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 
 static inline void
 malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
-	atomic_store_b(&mutex->locked, false, ATOMIC_RELAXED);
 	witness_unlock(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
 	if (isthreaded) {
+		assert(malloc_mutex_is_locked(mutex));
+		atomic_store_b(&mutex->locked, false, ATOMIC_RELAXED);
 		MALLOC_MUTEX_UNLOCK(mutex);
 	}
 }

From de5606d0d819cbea5c9ef17c989821c1bd7a6697 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 20 Sep 2024 15:39:46 -0700
Subject: [PATCH 2435/2608] Fix a missing init value warning caught by static
 analysis.

---
 src/extent.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/extent.c b/src/extent.c
index 2efc7938..30942491 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -825,6 +825,7 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool *coalesced) {
 	assert(!edata_guarded_get(edata));
 	assert(coalesced != NULL);
+	*coalesced = false;
 	/*
 	 * We avoid checking / locking inactive neighbors for large size
 	 * classes, since they are eagerly coalesced on deallocation which can

From 734f29ce56a2769857b084a37af09f5846c56a32 Mon Sep 17 00:00:00 2001
From: roblabla <unfiltered@roblab.la>
Date: Mon, 23 Sep 2024 15:33:43 +0200
Subject: [PATCH 2436/2608] Fix compilation with MSVC 2022

On MSVC, log is an intrinsic that doesn't require libm. However,
AC_SEARCH_LIBS does not successfully detect this, as it will try to
compile a program using the wrong signature for log. Newer versions of
MSVC CL detects this and rejects the program with the following
messages:

conftest.c(40): warning C4391: 'char log()': incorrect return type for intrinsic function, expected 'double'
conftest.c(44): error C2168: 'log': too few actual parameters for intrinsic function

Since log is always available on MSVC (it's been around since the dawn
of time), we simply always assume it's there if MSVC is detected.
---
 configure.ac | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/configure.ac b/configure.ac
index ff493e1d..dcf357e4 100644
--- a/configure.ac
+++ b/configure.ac
@@ -878,11 +878,26 @@ AC_SUBST([DUMP_SYMS])
 AC_SUBST([CC_MM])
 
 dnl Determine whether libm must be linked to use e.g. log(3).
-AC_SEARCH_LIBS([log], [m], , [AC_MSG_ERROR([Missing math functions])])
-if test "x$ac_cv_search_log" != "xnone required" ; then
-  LM="$ac_cv_search_log"
-else
+
+# On MSVC, log is an intrinsic that doesn't require libm. However,
+# AC_SEARCH_LIBS does not successfully detect this, as it will try to compile
+# a program using the wrong signature for log. Newer versions of MSVC CL detects
+# this and rejects the program with the following messages.
+#
+# conftest.c(40): warning C4391: 'char log()': incorrect return type for intrinsic function, expected 'double'
+# conftest.c(44): error C2168: 'log': too few actual parameters for intrinsic function
+#
+# Since log is always available on MSVC (it's been around since the dawn of
+# time), we simply always assume it's there if MSVC is detected.
+if test "x$je_cv_msvc" = "xyes" ; then
   LM=
+else
+  AC_SEARCH_LIBS([log], [m], , [AC_MSG_ERROR([Missing math functions])])
+    if test "x$ac_cv_search_log" != "xnone required" ; then
+      LM="$ac_cv_search_log"
+    else
+      LM=
+    fi
 fi
 AC_SUBST(LM)
 

From 0181aaa495bc6ef3dcd570ea5d37cb7b72375614 Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@meta.com>
Date: Fri, 13 Sep 2024 15:52:22 -0700
Subject: [PATCH 2437/2608] Optimize edata_cmp_summary_compare when __uint128_t
 is available

---
 configure.ac                                     | 15 +++++++++++++++
 include/jemalloc/internal/edata.h                | 16 ++++++++++++++++
 .../internal/jemalloc_internal_defs.h.in         |  3 +++
 3 files changed, 34 insertions(+)

diff --git a/configure.ac b/configure.ac
index dcf357e4..2e7f610d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -562,6 +562,21 @@ AC_CACHE_CHECK([asm volatile support],
 if test "x${je_cv_asm_volatile}" = "xyes"; then
   AC_DEFINE([JEMALLOC_HAVE_ASM_VOLATILE], [ ], [ ])
 fi
+AC_CACHE_CHECK([__int128 support],
+               [je_cv_int128],
+               AC_RUN_IFELSE([AC_LANG_PROGRAM(
+[[
+]],
+[[
+      __int128 temp = 0;
+      return temp;
+]])],
+[je_cv_int128=yes],
+[je_cv_int128=no],
+[je_cv_int128=no]))
+if test "x${je_cv_int128}" = "xyes"; then
+  AC_DEFINE([JEMALLOC_HAVE_INT128], [ ], [ ])
+fi
 
 LD_PRELOAD_VAR="LD_PRELOAD"
 so="so"
diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index 17befd92..2381ccbc 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -666,6 +666,21 @@ edata_cmp_summary_get(const edata_t *edata) {
 	return result;
 }
 
+#ifdef JEMALLOC_HAVE_INT128
+JEMALLOC_ALWAYS_INLINE unsigned __int128
+edata_cmp_summary_encode(edata_cmp_summary_t src) {
+	return ((unsigned __int128)src.sn << 64) | src.addr;
+}
+
+static inline int
+edata_cmp_summary_comp(edata_cmp_summary_t a, edata_cmp_summary_t b) {
+    unsigned __int128 a_encoded = edata_cmp_summary_encode(a);
+    unsigned __int128 b_encoded = edata_cmp_summary_encode(b);
+    if (a_encoded < b_encoded) return -1;
+    if (a_encoded == b_encoded) return 0;
+    return 1;
+}
+#else
 static inline int
 edata_cmp_summary_comp(edata_cmp_summary_t a, edata_cmp_summary_t b) {
 	/*
@@ -683,6 +698,7 @@ edata_cmp_summary_comp(edata_cmp_summary_t a, edata_cmp_summary_t b) {
 	return (2 * ((a.sn > b.sn) - (a.sn < b.sn))) +
 	       ((a.addr > b.addr) - (a.addr < b.addr));
 }
+#endif
 
 static inline int
 edata_snad_comp(const edata_t *a, const edata_t *b) {
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 7498bc48..7f369873 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -454,6 +454,9 @@
  */
 #undef JEMALLOC_HAVE_RDTSCP
 
+/* If defined, use __int128 for optimization. */
+#undef JEMALLOC_HAVE_INT128
+
 #include "jemalloc/internal/jemalloc_internal_overrides.h"
 
 #endif /* JEMALLOC_INTERNAL_DEFS_H_ */

From 44db479fad82751a3c6a3157e59b9d295f9ec90f Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 20 Sep 2024 20:24:30 -0700
Subject: [PATCH 2438/2608] Fix the lock owner sanity checking during
 background thread boot.

During boot, some mutexes are not initialized yet, plus there's no point taking
many mutexes while everything is covered by the global init lock, so the locking
assumptions in some functions (e.g. background_thread_enabled_set()) can't be
enforced.  Skip the lock owner check in this case.
---
 include/jemalloc/internal/background_thread_inlines.h | 7 ++++++-
 src/background_thread.c                               | 4 ++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/background_thread_inlines.h b/include/jemalloc/internal/background_thread_inlines.h
index 4ed05d1b..fd3884f1 100644
--- a/include/jemalloc/internal/background_thread_inlines.h
+++ b/include/jemalloc/internal/background_thread_inlines.h
@@ -11,10 +11,15 @@ background_thread_enabled(void) {
 	return atomic_load_b(&background_thread_enabled_state, ATOMIC_RELAXED);
 }
 
+JEMALLOC_ALWAYS_INLINE void
+background_thread_enabled_set_impl(bool state) {
+	atomic_store_b(&background_thread_enabled_state, state, ATOMIC_RELAXED);
+}
+
 JEMALLOC_ALWAYS_INLINE void
 background_thread_enabled_set(tsdn_t *tsdn, bool state) {
 	malloc_mutex_assert_owner(tsdn, &background_thread_lock);
-	atomic_store_b(&background_thread_enabled_state, state, ATOMIC_RELAXED);
+	background_thread_enabled_set_impl(state);
 }
 
 JEMALLOC_ALWAYS_INLINE background_thread_info_t *
diff --git a/src/background_thread.c b/src/background_thread.c
index a5f4dbf7..30c3111c 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -819,7 +819,6 @@ background_thread_boot1(tsdn_t *tsdn, base_t *base) {
 	}
 	max_background_threads = opt_max_background_threads;
 
-	background_thread_enabled_set(tsdn, opt_background_thread);
 	if (malloc_mutex_init(&background_thread_lock,
 	    "background_thread_global",
 	    WITNESS_RANK_BACKGROUND_THREAD_GLOBAL,
@@ -850,7 +849,8 @@ background_thread_boot1(tsdn_t *tsdn, base_t *base) {
 		background_thread_info_init(tsdn, info);
 		malloc_mutex_unlock(tsdn, &info->mtx);
 	}
+	/* Using _impl to bypass the locking check during init. */
+	background_thread_enabled_set_impl(opt_background_thread);
 #endif
-
 	return false;
 }

From 6cc42173cbb2dad6ef5c7e49e6666987ce4cf92c Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 20 Sep 2024 20:33:10 -0700
Subject: [PATCH 2439/2608] Assert the mutex is locked within
 malloc_mutex_assert_owner().

---
 include/jemalloc/internal/mutex.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index ceb07b85..db2bdf37 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -250,6 +250,9 @@ malloc_mutex_unlock(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 static inline void
 malloc_mutex_assert_owner(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 	witness_assert_owner(tsdn_witness_tsdp_get(tsdn), &mutex->witness);
+	if (isthreaded) {
+		assert(malloc_mutex_is_locked(mutex));
+	}
 }
 
 static inline void

From 4f4fd424477142ee9962fcf4e4cd0349d4e6e4d3 Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Thu, 29 Aug 2024 10:49:31 -0700
Subject: [PATCH 2440/2608] Remove `strict_min_purge_interval` option

Option `experimental_hpa_strict_min_purge_interval` was expected to be
temporary to simplify rollout of a bugfix. Now, when bugfix rollout is
complete it is safe to remove this option.
---
 include/jemalloc/internal/hpa_opts.h | 10 ----------
 src/ctl.c                            |  5 -----
 src/hpa.c                            | 11 +++--------
 src/jemalloc.c                       |  4 ----
 src/stats.c                          |  1 -
 test/unit/hpa.c                      | 29 +++++++++++++++++-----------
 test/unit/mallctl.c                  |  2 --
 7 files changed, 21 insertions(+), 41 deletions(-)

diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h
index 15765689..ee2bd40c 100644
--- a/include/jemalloc/internal/hpa_opts.h
+++ b/include/jemalloc/internal/hpa_opts.h
@@ -50,14 +50,6 @@ struct hpa_shard_opts_s {
 	 */
 	uint64_t min_purge_interval_ms;
 
-	/*
-	 * Strictly respect minimum amout of time between purges.
-	 *
-	 * This is an option to provide backward compatibility for staged rollout of
-	 * purging logic fix.
-	 */
-	bool experimental_strict_min_purge_interval;
-
 	/*
 	 * Maximum number of hugepages to purge on each purging attempt.
 	 */
@@ -83,8 +75,6 @@ struct hpa_shard_opts_s {
 	10 * 1000,							\
 	/* min_purge_interval_ms */					\
 	5 * 1000,							\
-	/* experimental_strict_min_purge_interval */			\
-	false,								\
 	/* experimental_max_purge_nhp */				\
 	-1								\
 }
diff --git a/src/ctl.c b/src/ctl.c
index 8608f124..2a9e47f2 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -103,7 +103,6 @@ CTL_PROTO(opt_hpa_slab_max_alloc)
 CTL_PROTO(opt_hpa_hugification_threshold)
 CTL_PROTO(opt_hpa_hugify_delay_ms)
 CTL_PROTO(opt_hpa_min_purge_interval_ms)
-CTL_PROTO(opt_experimental_hpa_strict_min_purge_interval)
 CTL_PROTO(opt_experimental_hpa_max_purge_nhp)
 CTL_PROTO(opt_hpa_dirty_mult)
 CTL_PROTO(opt_hpa_sec_nshards)
@@ -462,8 +461,6 @@ static const ctl_named_node_t opt_node[] = {
 		CTL(opt_hpa_hugification_threshold)},
 	{NAME("hpa_hugify_delay_ms"), CTL(opt_hpa_hugify_delay_ms)},
 	{NAME("hpa_min_purge_interval_ms"), CTL(opt_hpa_min_purge_interval_ms)},
-	{NAME("experimental_hpa_strict_min_purge_interval"),
-		CTL(opt_experimental_hpa_strict_min_purge_interval)},
 	{NAME("experimental_hpa_max_purge_nhp"),
 		CTL(opt_experimental_hpa_max_purge_nhp)},
 	{NAME("hpa_dirty_mult"), CTL(opt_hpa_dirty_mult)},
@@ -2202,8 +2199,6 @@ CTL_RO_NL_GEN(opt_hpa_hugification_threshold,
 CTL_RO_NL_GEN(opt_hpa_hugify_delay_ms, opt_hpa_opts.hugify_delay_ms, uint64_t)
 CTL_RO_NL_GEN(opt_hpa_min_purge_interval_ms, opt_hpa_opts.min_purge_interval_ms,
     uint64_t)
-CTL_RO_NL_GEN(opt_experimental_hpa_strict_min_purge_interval,
-    opt_hpa_opts.experimental_strict_min_purge_interval, bool)
 CTL_RO_NL_GEN(opt_experimental_hpa_max_purge_nhp,
     opt_hpa_opts.experimental_max_purge_nhp, ssize_t)
 
diff --git a/src/hpa.c b/src/hpa.c
index 3d7a6f60..d58a17ec 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -512,14 +512,9 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 static bool
 hpa_min_purge_interval_passed(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
-	if (shard->opts.experimental_strict_min_purge_interval) {
-		uint64_t since_last_purge_ms = shard->central->hooks.ms_since(
-		    &shard->last_purge);
-		if (since_last_purge_ms < shard->opts.min_purge_interval_ms) {
-		     return false;
-		}
-	}
-	return true;
+	uint64_t since_last_purge_ms = shard->central->hooks.ms_since(
+	    &shard->last_purge);
+	return since_last_purge_ms >= shard->opts.min_purge_interval_ms;
 }
 
 /*
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 63f6b302..428a50ef 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1571,10 +1571,6 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    "hpa_min_purge_interval_ms", 0, 0,
 			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false);
 
-			CONF_HANDLE_BOOL(
-			    opt_hpa_opts.experimental_strict_min_purge_interval,
-			    "experimental_hpa_strict_min_purge_interval");
-
 			CONF_HANDLE_SSIZE_T(
 			    opt_hpa_opts.experimental_max_purge_nhp,
 			    "experimental_hpa_max_purge_nhp", -1, SSIZE_MAX);
diff --git a/src/stats.c b/src/stats.c
index ef025eb3..d5be92d3 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1564,7 +1564,6 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_SIZE_T("hpa_hugification_threshold")
 	OPT_WRITE_UINT64("hpa_hugify_delay_ms")
 	OPT_WRITE_UINT64("hpa_min_purge_interval_ms")
-	OPT_WRITE_BOOL("experimental_hpa_strict_min_purge_interval")
 	OPT_WRITE_SSIZE_T("experimental_hpa_max_purge_nhp")
 	if (je_mallctl("opt.hpa_dirty_mult", (void *)&u32v, &u32sz, NULL, 0)
 	    == 0) {
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index ae8a976c..747f98ef 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -34,8 +34,6 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
 	10 * 1000,
 	/* min_purge_interval_ms */
 	5 * 1000,
-	/* experimental_strict_min_purge_interval */
-	false,
 	/* experimental_max_purge_nhp */
 	-1
 };
@@ -53,8 +51,6 @@ static hpa_shard_opts_t test_hpa_shard_opts_purge = {
 	0,
 	/* min_purge_interval_ms */
 	5 * 1000,
-	/* experimental_strict_min_purge_interval */
-	false,
 	/* experimental_max_purge_nhp */
 	-1
 };
@@ -506,7 +502,7 @@ TEST_BEGIN(test_purge_no_infinite_loop) {
 }
 TEST_END
 
-TEST_BEGIN(test_no_experimental_strict_min_purge_interval) {
+TEST_BEGIN(test_no_min_purge_interval) {
 	test_skip_if(!hpa_supported());
 
 	hpa_hooks_t hooks;
@@ -520,6 +516,7 @@ TEST_BEGIN(test_no_experimental_strict_min_purge_interval) {
 
 	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
 	opts.deferral_allowed = true;
+	opts.min_purge_interval_ms = 0;
 
 	hpa_shard_t *shard = create_test_data(&hooks, &opts);
 
@@ -547,7 +544,7 @@ TEST_BEGIN(test_no_experimental_strict_min_purge_interval) {
 }
 TEST_END
 
-TEST_BEGIN(test_experimental_strict_min_purge_interval) {
+TEST_BEGIN(test_min_purge_interval) {
 	test_skip_if(!hpa_supported());
 
 	hpa_hooks_t hooks;
@@ -561,7 +558,6 @@ TEST_BEGIN(test_experimental_strict_min_purge_interval) {
 
 	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
 	opts.deferral_allowed = true;
-	opts.experimental_strict_min_purge_interval = true;
 
 	hpa_shard_t *shard = create_test_data(&hooks, &opts);
 
@@ -631,6 +627,7 @@ TEST_BEGIN(test_purge) {
 		pai_dalloc(tsdn, &shard->pai, edatas[i],
 		    &deferred_work_generated);
 	}
+	nstime_init2(&defer_curtime, 6, 0);
 	hpa_shard_do_deferred_work(tsdn, shard);
 
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
@@ -642,9 +639,15 @@ TEST_BEGIN(test_purge) {
 	expect_zu_eq(2, ndefer_purge_calls, "Expect purges");
 	ndefer_purge_calls = 0;
 
+	nstime_init2(&defer_curtime, 12, 0);
 	hpa_shard_do_deferred_work(tsdn, shard);
 
-	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	/*
+	 * We are still having 5 active hugepages and now they are
+	 * matching hugification criteria long enough to actually hugify them.
+	 */
+	expect_zu_eq(5, ndefer_hugify_calls, "Expect hugification");
+	ndefer_hugify_calls = 0;
 	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 	/*
 	 * We still have completely dirty hugepage, but we are below
@@ -691,6 +694,7 @@ TEST_BEGIN(test_experimental_max_purge_nhp) {
 		pai_dalloc(tsdn, &shard->pai, edatas[i],
 		    &deferred_work_generated);
 	}
+	nstime_init2(&defer_curtime, 6, 0);
 	hpa_shard_do_deferred_work(tsdn, shard);
 
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
@@ -702,14 +706,17 @@ TEST_BEGIN(test_experimental_max_purge_nhp) {
 	expect_zu_eq(1, ndefer_purge_calls, "Expect purges");
 	ndefer_purge_calls = 0;
 
+	nstime_init2(&defer_curtime, 12, 0);
 	hpa_shard_do_deferred_work(tsdn, shard);
 
-	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(5, ndefer_hugify_calls, "Expect hugification");
+	ndefer_hugify_calls = 0;
 	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 	/* We still above the limit for dirty pages. */
 	expect_zu_eq(1, ndefer_purge_calls, "Expect purge");
 	ndefer_purge_calls = 0;
 
+	nstime_init2(&defer_curtime, 18, 0);
 	hpa_shard_do_deferred_work(tsdn, shard);
 
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
@@ -741,8 +748,8 @@ main(void) {
 	    test_alloc_dalloc_batch,
 	    test_defer_time,
 	    test_purge_no_infinite_loop,
-	    test_no_experimental_strict_min_purge_interval,
-	    test_experimental_strict_min_purge_interval,
+	    test_no_min_purge_interval,
+	    test_min_purge_interval,
 	    test_purge,
 	    test_experimental_max_purge_nhp);
 }
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 028a27f7..ffe5c411 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -292,8 +292,6 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(size_t, hpa_sec_max_bytes, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_bytes_after_flush, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_batch_fill_extra, always);
-	TEST_MALLCTL_OPT(bool, experimental_hpa_strict_min_purge_interval,
-	    always);
 	TEST_MALLCTL_OPT(ssize_t, experimental_hpa_max_purge_nhp, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);
 	TEST_MALLCTL_OPT(const char *, percpu_arena, always);

From 1c900088c33402cc8bb0ea78dc1338ab6c087e0c Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@meta.com>
Date: Thu, 26 Sep 2024 15:40:52 -0700
Subject: [PATCH 2441/2608] Do not support hpa if HUGEPAGE is too large.

---
 src/hpa.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/hpa.c b/src/hpa.c
index d58a17ec..d1558821 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -51,6 +51,10 @@ hpa_supported(void) {
 	if (HUGEPAGE_PAGES == 1) {
 		return false;
 	}
+	/* As mentioned in pages.h, do not support If HUGEPAGE is too large. */
+	if (HUGEPAGE > HUGEPAGE_MAX_EXPECTED_SIZE) {
+		return false;
+	}
 	return true;
 }
 

From 3a0d9cdadb8a0dbfd180367459721d13eab0e116 Mon Sep 17 00:00:00 2001
From: Ben Niu <beniu@microsoft.com>
Date: Fri, 30 Aug 2024 17:32:42 -0700
Subject: [PATCH 2442/2608] Use MSVC __declspec(thread) for TSD on Windows

---
 include/jemalloc/internal/tsd_win.h | 74 ++++++++++++++++++++++++++++-
 src/tsd.c                           |  6 ++-
 2 files changed, 78 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/tsd_win.h b/include/jemalloc/internal/tsd_win.h
index 4b40a8ab..559ee78f 100644
--- a/include/jemalloc/internal/tsd_win.h
+++ b/include/jemalloc/internal/tsd_win.h
@@ -7,11 +7,17 @@
 #include "jemalloc/internal/tsd_internals.h"
 #include "jemalloc/internal/tsd_types.h"
 
+/* val should always be the first field of tsd_wrapper_t since accessing
+   val is the common path and having val as the first field makes it possible
+   that converting a pointer to tsd_wrapper_t to a pointer to val is no more
+   than a type cast. */
 typedef struct {
-	bool initialized;
 	tsd_t val;
+	bool initialized;
 } tsd_wrapper_t;
 
+#if defined(JEMALLOC_LEGACY_WINDOWS_SUPPORT) || !defined(_MSC_VER)
+
 extern DWORD tsd_tsd;
 extern tsd_wrapper_t tsd_boot_wrapper;
 extern bool tsd_booted;
@@ -165,3 +171,69 @@ tsd_set(tsd_t *val) {
 	}
 	wrapper->initialized = true;
 }
+
+#else // defined(JEMALLOC_LEGACY_WINDOWS_SUPPORT) || !defined(_MSC_VER)
+
+#define JEMALLOC_TSD_TYPE_ATTR(type) __declspec(thread) type
+
+extern JEMALLOC_TSD_TYPE_ATTR(tsd_wrapper_t) tsd_wrapper_tls;
+extern bool tsd_booted;
+
+/* Initialization/cleanup. */
+JEMALLOC_ALWAYS_INLINE bool
+tsd_cleanup_wrapper(void) {
+	if (tsd_wrapper_tls.initialized) {
+		tsd_wrapper_tls.initialized = false;
+		tsd_cleanup(&tsd_wrapper_tls.val);
+		if (tsd_wrapper_tls.initialized) {
+			/* Trigger another cleanup round. */
+			return true;
+		}
+	}
+	return false;
+}
+
+/* Initialization/cleanup. */
+JEMALLOC_ALWAYS_INLINE bool
+tsd_boot0(void) {
+	_malloc_tsd_cleanup_register(tsd_cleanup_wrapper);
+	tsd_booted = true;
+	return false;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tsd_boot1(void) {
+	/* Do nothing. */
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_boot(void) {
+	return tsd_boot0();
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_booted_get(void) {
+	return tsd_booted;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+tsd_get_allocates(void) {
+	return false;
+}
+
+/* Get/set. */
+JEMALLOC_ALWAYS_INLINE tsd_t *
+tsd_get(bool init) {
+	return &(tsd_wrapper_tls.val);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+tsd_set(tsd_t *val) {
+	assert(tsd_booted);
+	if (likely(&(tsd_wrapper_tls.val) != val)) {
+		tsd_wrapper_tls.val = (*val);
+	}
+	tsd_wrapper_tls.initialized = true;
+}
+
+#endif // defined(JEMALLOC_LEGACY_WINDOWS_SUPPORT) || !defined(_MSC_VER)
diff --git a/src/tsd.c b/src/tsd.c
index c9ae2d64..0a2ccc59 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -22,8 +22,12 @@ JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER;
 pthread_key_t tsd_tsd;
 bool tsd_booted = false;
 #elif (defined(_WIN32))
+#if defined(JEMALLOC_LEGACY_WINDOWS_SUPPORT) || !defined(_MSC_VER)
 DWORD tsd_tsd;
-tsd_wrapper_t tsd_boot_wrapper = {false, TSD_INITIALIZER};
+tsd_wrapper_t tsd_boot_wrapper = {TSD_INITIALIZER, false};
+#else
+JEMALLOC_TSD_TYPE_ATTR(tsd_wrapper_t) tsd_wrapper_tls = { TSD_INITIALIZER, false };
+#endif
 bool tsd_booted = false;
 #if JEMALLOC_WIN32_TLSGETVALUE2
 TGV2 tls_get_value2 = NULL;

From edc1576f03d15a22b968828b68a074d9be6e5cc0 Mon Sep 17 00:00:00 2001
From: Nathan Slingerland <slinger@fb.com>
Date: Mon, 16 Sep 2024 11:00:37 -0700
Subject: [PATCH 2443/2608] Add safe frame-pointer backtrace unwinder

---
 INSTALL.md                                    |   7 +
 Makefile.in                                   |   1 +
 configure.ac                                  |  28 +++
 doc/jemalloc.xml.in                           |  10 ++
 .../internal/jemalloc_internal_defs.h.in      |   3 +
 .../jemalloc/internal/jemalloc_preamble.h.in  |   7 +
 include/jemalloc/internal/malloc_io.h         |   7 +
 include/jemalloc/internal/prof_sys.h          |   1 +
 src/ctl.c                                     |   3 +
 src/prof_stack_range.c                        | 161 ++++++++++++++++++
 src/prof_sys.c                                |  42 ++++-
 src/stats.c                                   |   1 +
 test/unit/mallctl.c                           |   1 +
 13 files changed, 271 insertions(+), 1 deletion(-)
 create mode 100644 src/prof_stack_range.c

diff --git a/INSTALL.md b/INSTALL.md
index f772dd09..7f6137ee 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -139,6 +139,7 @@ any of the following arguments (not a definitive list) to 'configure':
     in the following list that appears to function correctly:
 
     + libunwind      (requires --enable-prof-libunwind)
+    + frame pointer  (requires --enable-prof-frameptr)
     + libgcc         (unless --disable-prof-libgcc)
     + gcc intrinsics (unless --disable-prof-gcc)
 
@@ -147,6 +148,12 @@ any of the following arguments (not a definitive list) to 'configure':
     Use the libunwind library (http://www.nongnu.org/libunwind/) for stack
     backtracing.
 
+* `--enable-prof-frameptr`
+
+    Use the optimized frame pointer unwinder for stack backtracing. Safe
+    to use in mixed code (with and without frame pointers) - but requires
+    frame pointers to produce meaningful stacks. Linux only.
+
 * `--disable-prof-libgcc`
 
     Disable the use of libgcc's backtracing functionality.
diff --git a/Makefile.in b/Makefile.in
index 94208f37..6a386720 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -142,6 +142,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/prof_data.c \
 	$(srcroot)src/prof_log.c \
 	$(srcroot)src/prof_recent.c \
+	$(srcroot)src/prof_stack_range.c \
 	$(srcroot)src/prof_stats.c \
 	$(srcroot)src/prof_sys.c \
 	$(srcroot)src/psset.c \
diff --git a/configure.ac b/configure.ac
index 2e7f610d..4776aa60 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1448,6 +1448,33 @@ if test "x$backtrace_method" = "x" -a "x$enable_prof_libunwind" = "x1" ; then
   fi
 fi
 
+if test `uname -s` = "Linux"
+then
+  AC_ARG_ENABLE([prof-frameptr],
+    [AS_HELP_STRING([--enable-prof-frameptr], [Use optimized frame pointer unwinder for backtracing (Linux only)])],
+  [if test "x$enable_prof_frameptr" = "xno" ; then
+    enable_prof_frameptr="0"
+  else
+    enable_prof_frameptr="1"
+    if test "x$enable_prof" = "x0" ; then
+      AC_MSG_ERROR([--enable-prof-frameptr should only be used with --enable-prof])
+    fi
+  fi
+  ],
+  [enable_prof_frameptr="0"]
+  )
+  if test "x$backtrace_method" = "x" -a "x$enable_prof_frameptr" = "x1" \
+      -a "x$GCC" = "xyes" ; then
+    JE_CFLAGS_ADD([-fno-omit-frame-pointer])
+    backtrace_method="frame pointer linux"
+    AC_DEFINE([JEMALLOC_PROF_FRAME_POINTER], [ ], [ ])
+  else
+    enable_prof_frameptr="0"
+  fi
+else
+  enable_prof_frameptr="0"
+fi
+
 AC_ARG_ENABLE([prof-libgcc],
   [AS_HELP_STRING([--disable-prof-libgcc],
   [Do not use libgcc for backtracing])],
@@ -2847,6 +2874,7 @@ AC_MSG_RESULT([stats              : ${enable_stats}])
 AC_MSG_RESULT([experimental_smallocx : ${enable_experimental_smallocx}])
 AC_MSG_RESULT([prof               : ${enable_prof}])
 AC_MSG_RESULT([prof-libunwind     : ${enable_prof_libunwind}])
+AC_MSG_RESULT([prof-frameptr      : ${enable_prof_frameptr}])
 AC_MSG_RESULT([prof-libgcc        : ${enable_prof_libgcc}])
 AC_MSG_RESULT([prof-gcc           : ${enable_prof_gcc}])
 AC_MSG_RESULT([fill               : ${enable_fill}])
diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 89a176e0..59058bad 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -897,6 +897,16 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         during build configuration.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="config.prof_frameptr">
+        <term>
+          <mallctl>config.prof_frameptr</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para><option>--enable-prof-frameptr</option> was specified
+        during build configuration.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="config.stats">
         <term>
           <mallctl>config.stats</mallctl>
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 7f369873..7ad75a06 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -167,6 +167,9 @@
 /* Use gcc intrinsics for profile backtracing if defined. */
 #undef JEMALLOC_PROF_GCC
 
+/* Use frame pointer for profile backtracing if defined. Linux only. */
+#undef JEMALLOC_PROF_FRAME_POINTER
+
 /* JEMALLOC_PAGEID enabled page id */
 #undef JEMALLOC_PAGEID
 
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index 6b55e47f..ebce5d56 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -114,6 +114,13 @@ static const bool config_prof_libunwind =
     false
 #endif
     ;
+static const bool config_prof_frameptr =
+#ifdef JEMALLOC_PROF_FRAME_POINTER
+    true
+#else
+    false
+#endif
+    ;
 static const bool maps_coalesce =
 #ifdef JEMALLOC_MAPS_COALESCE
     true
diff --git a/include/jemalloc/internal/malloc_io.h b/include/jemalloc/internal/malloc_io.h
index bb43fb5c..9c7c6ec2 100644
--- a/include/jemalloc/internal/malloc_io.h
+++ b/include/jemalloc/internal/malloc_io.h
@@ -154,5 +154,12 @@ static inline int malloc_close(int fd) {
 #endif
 }
 
+static inline off_t malloc_lseek(int fd, off_t offset, int whence) {
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_lseek)
+  return (off_t)syscall(SYS_lseek, fd, offset, whence);
+#else
+  return lseek(fd, offset, whence);
+#endif
+}
 
 #endif /* JEMALLOC_INTERNAL_MALLOC_IO_H */
diff --git a/include/jemalloc/internal/prof_sys.h b/include/jemalloc/internal/prof_sys.h
index e6e7f06f..3377ba92 100644
--- a/include/jemalloc/internal/prof_sys.h
+++ b/include/jemalloc/internal/prof_sys.h
@@ -20,6 +20,7 @@ void prof_fdump_impl(tsd_t *tsd);
 void prof_idump_impl(tsd_t *tsd);
 bool prof_mdump_impl(tsd_t *tsd, const char *filename);
 void prof_gdump_impl(tsd_t *tsd);
+uintptr_t prof_thread_stack_start(uintptr_t stack_end);
 
 /* Used in unit tests. */
 typedef int (prof_sys_thread_name_read_t)(char *buf, size_t limit);
diff --git a/src/ctl.c b/src/ctl.c
index 2a9e47f2..690bbabc 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -89,6 +89,7 @@ CTL_PROTO(config_opt_safety_checks)
 CTL_PROTO(config_prof)
 CTL_PROTO(config_prof_libgcc)
 CTL_PROTO(config_prof_libunwind)
+CTL_PROTO(config_prof_frameptr)
 CTL_PROTO(config_stats)
 CTL_PROTO(config_utrace)
 CTL_PROTO(config_xmalloc)
@@ -436,6 +437,7 @@ static const ctl_named_node_t	config_node[] = {
 	{NAME("prof"),		CTL(config_prof)},
 	{NAME("prof_libgcc"),	CTL(config_prof_libgcc)},
 	{NAME("prof_libunwind"), CTL(config_prof_libunwind)},
+	{NAME("prof_frameptr"), CTL(config_prof_frameptr)},
 	{NAME("stats"),		CTL(config_stats)},
 	{NAME("utrace"),	CTL(config_utrace)},
 	{NAME("xmalloc"),	CTL(config_xmalloc)}
@@ -2178,6 +2180,7 @@ CTL_RO_CONFIG_GEN(config_opt_safety_checks, bool)
 CTL_RO_CONFIG_GEN(config_prof, bool)
 CTL_RO_CONFIG_GEN(config_prof_libgcc, bool)
 CTL_RO_CONFIG_GEN(config_prof_libunwind, bool)
+CTL_RO_CONFIG_GEN(config_prof_frameptr, bool)
 CTL_RO_CONFIG_GEN(config_stats, bool)
 CTL_RO_CONFIG_GEN(config_utrace, bool)
 CTL_RO_CONFIG_GEN(config_xmalloc, bool)
diff --git a/src/prof_stack_range.c b/src/prof_stack_range.c
new file mode 100644
index 00000000..c3458044
--- /dev/null
+++ b/src/prof_stack_range.c
@@ -0,0 +1,161 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/malloc_io.h"
+#include "jemalloc/internal/prof_sys.h"
+
+#if defined (__linux__)
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h> // strtoul
+#include <string.h>
+#include <unistd.h>
+
+static int prof_mapping_containing_addr(
+    uintptr_t addr,
+    const char* maps_path,
+    uintptr_t* mm_start,
+    uintptr_t* mm_end) {
+  int ret = ENOENT; // not found
+  *mm_start = *mm_end = 0;
+
+  // Each line of /proc/<pid>/maps is:
+  // <start>-<end> <perms> <offset> <dev> <inode> <pathname>
+  //
+  // The fields we care about are always within the first 34 characters so
+  // as long as `buf` contains the start of a mapping line it can always be
+  // parsed.
+  static const int kMappingFieldsWidth = 34;
+
+  int fd = -1;
+  char buf[4096];
+  ssize_t remaining = 0; // actual number of bytes read to buf
+  char* line = NULL;
+
+  while (1) {
+    if (fd < 0) {
+      // case 0: initial open of maps file
+      fd = malloc_open(maps_path, O_RDONLY);
+      if (fd < 0) {
+        return errno;
+      }
+
+      remaining = malloc_read_fd(fd, buf, sizeof(buf));
+      if (remaining <= 0) {
+        break;
+      }
+      line = buf;
+    } else if (line == NULL) {
+      // case 1: no newline found in buf
+      remaining = malloc_read_fd(fd, buf, sizeof(buf));
+      if (remaining <= 0) {
+        break;
+      }
+      line = memchr(buf, '\n', remaining);
+      if (line != NULL) {
+        line++; // advance to character after newline
+        remaining -= (line - buf);
+      }
+    } else if (line != NULL && remaining < kMappingFieldsWidth) {
+      // case 2: found newline but insufficient characters remaining in buf
+
+      // fd currently points to the character immediately after the last
+      // character in buf. Seek fd to the character after the newline.
+      if (malloc_lseek(fd, -remaining, SEEK_CUR) == -1) {
+        ret = errno;
+        break;
+      }
+
+      remaining = malloc_read_fd(fd, buf, sizeof(buf));
+      if (remaining <= 0) {
+        break;
+      }
+      line = buf;
+    } else {
+      // case 3: found newline and sufficient characters to parse
+
+      // parse <start>-<end>
+      char* tmp = line;
+      uintptr_t start_addr = strtoul(tmp, &tmp, 16);
+      if (addr >= start_addr) {
+        tmp++; // advance to character after '-'
+        uintptr_t end_addr = strtoul(tmp, &tmp, 16);
+        if (addr < end_addr) {
+          *mm_start = start_addr;
+          *mm_end = end_addr;
+          ret = 0;
+          break;
+        }
+      }
+
+      // Advance to character after next newline in the current buf.
+      char* prev_line = line;
+      line = memchr(line, '\n', remaining);
+      if (line != NULL) {
+        line++; // advance to character after newline
+        remaining -= (line - prev_line);
+      }
+    }
+  }
+
+  malloc_close(fd);
+  return ret;
+}
+
+static uintptr_t prof_main_thread_stack_start(const char* stat_path) {
+  uintptr_t stack_start = 0;
+
+  int fd = malloc_open(stat_path, O_RDONLY);
+  if (fd < 0) {
+    return 0;
+  }
+
+  char buf[512];
+  ssize_t n = malloc_read_fd(fd, buf, sizeof(buf) - 1);
+  if (n >= 0) {
+    buf[n] = '\0';
+    if (sscanf(
+            buf,
+            "%*d (%*[^)]) %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %*u %*u %*d %*d %*d %*d %*d %*d %*u %*u %*d %*u %*u %*u %"FMTuPTR,
+            &stack_start) != 1) {
+    }
+  }
+  malloc_close(fd);
+  return stack_start;
+}
+
+uintptr_t prof_thread_stack_start(uintptr_t stack_end) {
+  pid_t pid = getpid();
+  pid_t tid = gettid();
+  if (pid == tid) {
+    char stat_path[32]; // "/proc/<pid>/stat"
+    malloc_snprintf(stat_path, sizeof(stat_path), "/proc/%d/stat", pid);
+    return prof_main_thread_stack_start(stat_path);
+  } else {
+    // NOTE: Prior to kernel 4.5 an entry for every thread stack was included in
+    // /proc/<pid>/maps as [STACK:<tid>]. Starting with kernel 4.5 only the main
+    // thread stack remains as the [stack] mapping. For other thread stacks the
+    // mapping is still visible in /proc/<pid>/task/<tid>/maps (though not
+    // labeled as [STACK:tid]).
+    // https://lists.ubuntu.com/archives/kernel-team/2016-March/074681.html
+    char maps_path[64]; // "/proc/<pid>/task/<tid>/maps"
+    malloc_snprintf(maps_path, sizeof(maps_path), "/proc/%d/task/%d/maps", pid, tid);
+
+    uintptr_t mm_start, mm_end;
+    if (prof_mapping_containing_addr(
+            stack_end, maps_path, &mm_start, &mm_end) != 0) {
+      return 0;
+    }
+    return mm_end;
+  }
+}
+
+#else
+
+uintptr_t prof_thread_stack_start(UNUSED uintptr_t stack_end) {
+  return 0;
+}
+
+#endif // __linux__
diff --git a/src/prof_sys.c b/src/prof_sys.c
index 8a904040..f0bc8b4b 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -3,6 +3,7 @@
 
 #include "jemalloc/internal/buf_writer.h"
 #include "jemalloc/internal/ctl.h"
+#include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/prof_data.h"
 #include "jemalloc/internal/prof_sys.h"
 
@@ -98,6 +99,45 @@ prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 
 	_Unwind_Backtrace(prof_unwind_callback, &data);
 }
+#elif (defined(JEMALLOC_PROF_FRAME_POINTER))
+JEMALLOC_DIAGNOSTIC_PUSH
+JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS
+static void
+prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
+  // stack_start - highest possible valid stack address (assumption: stacks grow downward)
+  //   stack_end - current stack frame and lowest possible valid stack address
+  //               (all earlier frames will be at higher addresses than this)
+
+  // always safe to get the current stack frame address
+  void** stack_end = (void**)__builtin_frame_address(0);
+  if (stack_end == NULL) {
+    *len = 0;
+    return;
+  }
+
+  static __thread void **stack_start = (void **)0;  // thread local
+  if (stack_start == 0 || stack_end >= stack_start) {
+    stack_start = (void**)prof_thread_stack_start((uintptr_t)stack_end);
+  }
+
+  if (stack_start == 0 || stack_end >= stack_start) {
+    *len = 0;
+    return;
+  }
+
+  unsigned ii = 0;
+  void** fp = (void**)stack_end;
+  while (fp < stack_start && ii < max_len) {
+    vec[ii++] = fp[1];
+    void** fp_prev = fp;
+    fp = fp[0];
+    if (unlikely(fp <= fp_prev)) { // sanity check forward progress
+      break;
+    }
+  }
+  *len = ii;
+}
+JEMALLOC_DIAGNOSTIC_POP
 #elif (defined(JEMALLOC_PROF_GCC))
 JEMALLOC_DIAGNOSTIC_PUSH
 JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS
@@ -484,7 +524,7 @@ prof_getpid(void) {
 #endif
 }
 
-long
+static long
 prof_get_pid_namespace() {
 	long ret = 0;
 
diff --git a/src/stats.c b/src/stats.c
index d5be92d3..1e607d9e 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1467,6 +1467,7 @@ stats_general_print(emitter_t *emitter) {
 	CONFIG_WRITE_BOOL(prof);
 	CONFIG_WRITE_BOOL(prof_libgcc);
 	CONFIG_WRITE_BOOL(prof_libunwind);
+	CONFIG_WRITE_BOOL(prof_frameptr);
 	CONFIG_WRITE_BOOL(stats);
 	CONFIG_WRITE_BOOL(utrace);
 	CONFIG_WRITE_BOOL(xmalloc);
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index ffe5c411..65e84370 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -255,6 +255,7 @@ TEST_BEGIN(test_mallctl_config) {
 	TEST_MALLCTL_CONFIG(prof, bool);
 	TEST_MALLCTL_CONFIG(prof_libgcc, bool);
 	TEST_MALLCTL_CONFIG(prof_libunwind, bool);
+	TEST_MALLCTL_CONFIG(prof_frameptr, bool);
 	TEST_MALLCTL_CONFIG(stats, bool);
 	TEST_MALLCTL_CONFIG(utrace, bool);
 	TEST_MALLCTL_CONFIG(xmalloc, bool);

From 8c2b8bcf24ec67523d310f46c38730b1d3348b39 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 4 Oct 2024 11:18:43 -0700
Subject: [PATCH 2444/2608] Update doc to reflect muzzy decay is disabled by
 default.

It has been disabled since 5.2.0 (in #1421).
---
 doc/jemalloc.xml.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 59058bad..2a8573b8 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1195,7 +1195,7 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         purged according to a sigmoidal decay curve that starts and ends with
         zero purge rate.  A decay time of 0 causes all unused muzzy pages to be
         purged immediately upon creation.  A decay time of -1 disables purging.
-        The default decay time is 10 seconds.  See <link
+        Muzzy decay is disabled by default (with decay time 0).  See <link
         linkend="arenas.muzzy_decay_ms"><mallctl>arenas.muzzy_decay_ms</mallctl></link>
         and <link
         linkend="arena.i.muzzy_decay_ms"><mallctl>arena.&lt;i&gt;.muzzy_decay_ms</mallctl></link>

From 02251c0070969e526cae3dde6d7b2610a4ed87ef Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 4 Oct 2024 11:29:04 -0700
Subject: [PATCH 2445/2608] Update the configure cache file example in
 INSTALL.md

---
 INSTALL.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/INSTALL.md b/INSTALL.md
index 7f6137ee..2333f13d 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -322,13 +322,13 @@ behavior:
     'configure' uses this to find programs.
 
 In some cases it may be necessary to work around configuration results that do
-not match reality.  For example, Linux 4.5 added support for the MADV_FREE flag
-to madvise(2), which can cause problems if building on a host with MADV_FREE
-support and deploying to a target without.  To work around this, use a cache
-file to override the relevant configuration variable defined in configure.ac,
-e.g.:
+not match reality.  For example, Linux 3.4 added support for the MADV_DONTDUMP
+flag to madvise(2), which can cause problems if building on a host with
+MADV_DONTDUMP support and deploying to a target without.  To work around this,
+use a cache file to override the relevant configuration variable defined in
+configure.ac, e.g.:
 
-    echo "je_cv_madv_free=no" > config.cache && ./configure -C
+    echo "je_cv_madv_dontdump=no" > config.cache && ./configure -C
 
 
 ## Advanced compilation

From 397827a27d0e5092a15812eb421a2762c773920f Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@meta.com>
Date: Fri, 16 Aug 2024 16:36:57 -0700
Subject: [PATCH 2446/2608] Updated jeprof with more symbols to filter.

---
 bin/jeprof.in | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/bin/jeprof.in b/bin/jeprof.in
index 7aff8643..9cae84ed 100644
--- a/bin/jeprof.in
+++ b/bin/jeprof.in
@@ -2961,6 +2961,20 @@ sub RemoveUninterestingFrames {
                       'void* newImpl',
                       'fallbackNewImpl',
                       'void* fallbackNewImpl',
+                      'fallback_impl',
+                      'void* fallback_impl',
+                      'imalloc',
+                      'int imalloc',
+                      'imalloc_body',
+                      'int imalloc_body',
+                      'prof_alloc_prep',
+                      'prof_tctx_t *prof_alloc_prep',
+                      'prof_backtrace_impl',
+                      'void prof_backtrace_impl',
+                      'je_prof_backtrace',
+                      'void je_prof_backtrace',
+                      'je_prof_tctx_create',
+                      'prof_tctx_t* prof_tctx_create',
                       '@JEMALLOC_PREFIX@free',
                       '@JEMALLOC_PREFIX@memalign',
                       '@JEMALLOC_PREFIX@posix_memalign',
@@ -2969,8 +2983,12 @@ sub RemoveUninterestingFrames {
                       '@JEMALLOC_PREFIX@valloc',
                       '@JEMALLOC_PREFIX@realloc',
                       '@JEMALLOC_PREFIX@mallocx',
+                      'irallocx_prof',
+                      'void *irallocx_prof',
                       '@JEMALLOC_PREFIX@rallocx',
                       'do_rallocx',
+                      'ixallocx_prof',
+                      'size_t ixallocx_prof',
                       '@JEMALLOC_PREFIX@xallocx',
                       '@JEMALLOC_PREFIX@dallocx',
                       '@JEMALLOC_PREFIX@sdallocx',
@@ -3083,6 +3101,8 @@ sub RemoveUninterestingFrames {
     foreach my $a (@addrs) {
       if (exists($symbols->{$a})) {
         my $func = $symbols->{$a}->[0];
+        # Remove suffix in the symbols following space when filtering.
+        $func =~ s/ .*//;
         if ($skip{$func} || ($func =~ m/$skip_regexp/)) {
           # Throw away the portion of the backtrace seen so far, under the
           # assumption that previous frames were for functions internal to the

From 6d625d5e5e06b5a07ab90c37ef6b03b55ca1c00a Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Fri, 4 Oct 2024 15:25:54 -0700
Subject: [PATCH 2447/2608] Add support for clock_gettime_nsec_np()

Prefer clock_gettime_nsec_np(CLOCK_UPTIME_RAW) to mach_absolute_time().
---
 configure.ac                                        | 10 ++++++++++
 .../jemalloc/internal/jemalloc_internal_defs.h.in   |  5 +++++
 src/nstime.c                                        | 13 ++++++++++++-
 3 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 4776aa60..9698997a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2126,6 +2126,16 @@ if test "x${je_cv_clock_realtime}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_CLOCK_REALTIME], [ ], [ ])
 fi
 
+dnl Check for clock_gettime_nsec_np().
+JE_COMPILABLE([clock_gettime_nsec_np()], [
+#include <time.h>
+], [
+	clock_gettime_nsec_np(CLOCK_UPTIME_RAW);
+], [je_cv_clock_gettime_nsec_np])
+if test "x${je_cv_clock_gettime_nsec_np}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_CLOCK_GETTIME_NSEC_NP], [ ], [ ])
+fi
+
 dnl Use syscall(2) (if available) by default.
 AC_ARG_ENABLE([syscall],
   [AS_HELP_STRING([--disable-syscall], [Disable use of syscall(2)])],
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 7ad75a06..f5b1a924 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -117,6 +117,11 @@
  */
 #undef JEMALLOC_HAVE_CLOCK_REALTIME
 
+/*
+ * Defined if clock_gettime_nsec_np(CLOCK_UPTIME_RAW) is available.
+ */
+#undef JEMALLOC_HAVE_CLOCK_GETTIME_NSEC_NP
+
 /*
  * Defined if _malloc_thread_cleanup() exists.  At least in the case of
  * FreeBSD, pthread_key_create() allocates, which if used during malloc
diff --git a/src/nstime.c b/src/nstime.c
index 72f04227..15c53330 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -201,11 +201,22 @@ nstime_get(nstime_t *time) {
 	clock_gettime(CLOCK_MONOTONIC, &ts);
 	nstime_init2(time, ts.tv_sec, ts.tv_nsec);
 }
+#elif defined(JEMALLOC_HAVE_CLOCK_GETTIME_NSEC_NP)
+#  define NSTIME_MONOTONIC true
+static void
+nstime_get(nstime_t *time) {
+	nstime_init(time, clock_gettime_nsec_np(CLOCK_UPTIME_RAW));
+}
 #elif defined(JEMALLOC_HAVE_MACH_ABSOLUTE_TIME)
 #  define NSTIME_MONOTONIC true
 static void
 nstime_get(nstime_t *time) {
-	nstime_init(time, mach_absolute_time());
+	static mach_timebase_info_data_t sTimebaseInfo;
+	if (sTimebaseInfo.denom == 0) {
+		(void) mach_timebase_info(&sTimebaseInfo);
+	}
+	nstime_init(time, mach_absolute_time() * sTimebaseInfo.numer
+	    / sTimebaseInfo.denom);
 }
 #else
 #  define NSTIME_MONOTONIC false

From 2a693b83d2d1631b6a856d178125e1c47c12add9 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 14 Oct 2024 10:28:50 -0700
Subject: [PATCH 2448/2608] Fix the sized-dealloc safety check abort msg.

---
 include/jemalloc/internal/jemalloc_internal_inlines_c.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 432ec17c..854aec1e 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -425,8 +425,8 @@ maybe_check_alloc_ctx(tsd_t *tsd, void *ptr, emap_alloc_ctx_t *alloc_ctx) {
                 if (alloc_ctx->szind != dbg_ctx.szind) {
                         safety_check_fail_sized_dealloc(
                             /* current_dealloc */ true, ptr,
-                            /* true_size */ sz_size2index(dbg_ctx.szind),
-                            /* input_size */ sz_size2index(alloc_ctx->szind));
+                            /* true_size */ sz_index2size(dbg_ctx.szind),
+                            /* input_size */ sz_index2size(alloc_ctx->szind));
                         return true;
                 }
                 if (alloc_ctx->slab != dbg_ctx.slab) {

From b9758afff037fb074a440bb5590ed113cad78bd3 Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Fri, 25 Oct 2024 09:47:43 -0700
Subject: [PATCH 2449/2608] Add `nstime_ms_since` to get time since in ms

Milliseconds are used a lot in hpa, so it is convenient to have
`nstime_ms_since` function instead of dividing to `MILLION` constantly.

For consistency renamed `nstime_msec` to `nstime_ms` as `ms` abbreviation
is used much more commonly across codebase than `msec`.

```
$ grep -Rn '_msec' include src | wc -l
2

$ grep -RPn '_ms( |,|:)' include src | wc -l
72
```

Function `nstime_msec` wasn't used anywhere in the code yet.
---
 include/jemalloc/internal/nstime.h |  3 ++-
 src/hpa_hooks.c                    |  2 +-
 src/nstime.c                       | 10 ++++++++--
 test/unit/nstime.c                 | 19 +++++++++++++++++++
 4 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h
index 440a4d15..1f32df58 100644
--- a/include/jemalloc/internal/nstime.h
+++ b/include/jemalloc/internal/nstime.h
@@ -26,8 +26,8 @@ static const nstime_t nstime_zero = NSTIME_ZERO_INITIALIZER;
 void nstime_init(nstime_t *time, uint64_t ns);
 void nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec);
 uint64_t nstime_ns(const nstime_t *time);
+uint64_t nstime_ms(const nstime_t *time);
 uint64_t nstime_sec(const nstime_t *time);
-uint64_t nstime_msec(const nstime_t *time);
 uint64_t nstime_nsec(const nstime_t *time);
 void nstime_copy(nstime_t *time, const nstime_t *source);
 int nstime_compare(const nstime_t *a, const nstime_t *b);
@@ -39,6 +39,7 @@ void nstime_imultiply(nstime_t *time, uint64_t multiplier);
 void nstime_idivide(nstime_t *time, uint64_t divisor);
 uint64_t nstime_divide(const nstime_t *time, const nstime_t *divisor);
 uint64_t nstime_ns_since(const nstime_t *past);
+uint64_t nstime_ms_since(const nstime_t *past);
 
 typedef bool (nstime_monotonic_t)(void);
 extern nstime_monotonic_t *JET_MUTABLE nstime_monotonic;
diff --git a/src/hpa_hooks.c b/src/hpa_hooks.c
index 6048f382..f43f05eb 100644
--- a/src/hpa_hooks.c
+++ b/src/hpa_hooks.c
@@ -59,5 +59,5 @@ hpa_hooks_curtime(nstime_t *r_nstime, bool first_reading) {
 
 static uint64_t
 hpa_hooks_ms_since(nstime_t *past_nstime) {
-	return nstime_ns_since(past_nstime) / 1000 / 1000;
+	return nstime_ms_since(past_nstime);
 }
diff --git a/src/nstime.c b/src/nstime.c
index 15c53330..894753aa 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -63,7 +63,7 @@ nstime_ns(const nstime_t *time) {
 }
 
 uint64_t
-nstime_msec(const nstime_t *time) {
+nstime_ms(const nstime_t *time) {
 	nstime_assert_initialized(time);
 	return time->ns / MILLION;
 }
@@ -158,7 +158,7 @@ nstime_divide(const nstime_t *time, const nstime_t *divisor) {
 	return time->ns / divisor->ns;
 }
 
-/* Returns time since *past, w/o updating *past. */
+/* Returns time since *past in nanoseconds, w/o updating *past. */
 uint64_t
 nstime_ns_since(const nstime_t *past) {
 	nstime_assert_initialized(past);
@@ -171,6 +171,12 @@ nstime_ns_since(const nstime_t *past) {
 	return now.ns - past->ns;
 }
 
+/* Returns time since *past in milliseconds, w/o updating *past. */
+uint64_t
+nstime_ms_since(const nstime_t *past) {
+	return nstime_ns_since(past) / MILLION;
+}
+
 #ifdef _WIN32
 #  define NSTIME_MONOTONIC false
 static void
diff --git a/test/unit/nstime.c b/test/unit/nstime.c
index e7e11e61..43fd3954 100644
--- a/test/unit/nstime.c
+++ b/test/unit/nstime.c
@@ -228,6 +228,24 @@ TEST_BEGIN(test_nstime_ns_since) {
 }
 TEST_END
 
+TEST_BEGIN(test_nstime_ms_since) {
+	nstime_t delta;
+
+	nstime_init2(&delta, /* sec */ 1, /* nsec */ 0);
+	for (uint64_t i = 0; i < 10000; i++) {
+		nstime_t now;
+		nstime_init_update(&now);
+
+		nstime_t past;
+		nstime_copy(&past, &now);
+		nstime_subtract(&past, &delta);
+
+		expect_u64_ge(nstime_ms_since(&past), nstime_ms(&delta),
+		    "Incorrect time since result");
+	}
+}
+TEST_END
+
 TEST_BEGIN(test_nstime_monotonic) {
 	nstime_monotonic();
 }
@@ -248,5 +266,6 @@ main(void) {
 	    test_nstime_idivide,
 	    test_nstime_divide,
 	    test_nstime_ns_since,
+	    test_nstime_ms_since,
 	    test_nstime_monotonic);
 }

From b82333fdec6e5833f88780fcf1fc50b799268e1b Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Fri, 25 Oct 2024 10:27:25 -0700
Subject: [PATCH 2450/2608] Split `stats_arena_hpa_shard_print` function

Make multiple functions from `stats_arena_hpa_shard_print` for
readability and ease of change in the future.
---
 src/stats.c | 60 +++++++++++++++++++++++++++++++++--------------------
 1 file changed, 37 insertions(+), 23 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index 1e607d9e..89dd1916 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -831,12 +831,16 @@ stats_arena_extents_print(emitter_t *emitter, unsigned i) {
 }
 
 static void
-stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
-	emitter_row_t header_row;
-	emitter_row_init(&header_row);
-	emitter_row_t row;
-	emitter_row_init(&row);
+stats_arena_hpa_shard_sec_print(emitter_t *emitter, unsigned i) {
+	size_t sec_bytes;
+	CTL_M2_GET("stats.arenas.0.hpa_sec_bytes", i, &sec_bytes, size_t);
+	emitter_kv(emitter, "sec_bytes", "Bytes in small extent cache",
+	    emitter_type_size, &sec_bytes);
+}
 
+static void
+stats_arena_hpa_shard_counters_print(emitter_t *emitter, unsigned i,
+    uint64_t uptime) {
 	uint64_t npurge_passes;
 	uint64_t npurges;
 	uint64_t nhugifies;
@@ -851,21 +855,6 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	CTL_M2_GET("stats.arenas.0.hpa_shard.ndehugifies",
 	    i, &ndehugifies, uint64_t);
 
-	size_t npageslabs_huge;
-	size_t nactive_huge;
-	size_t ndirty_huge;
-
-	size_t npageslabs_nonhuge;
-	size_t nactive_nonhuge;
-	size_t ndirty_nonhuge;
-	size_t nretained_nonhuge;
-
-	size_t sec_bytes;
-	CTL_M2_GET("stats.arenas.0.hpa_sec_bytes", i, &sec_bytes, size_t);
-	emitter_kv(emitter, "sec_bytes", "Bytes in small extent cache",
-	    emitter_type_size, &sec_bytes);
-
-	/* First, global stats. */
 	emitter_table_printf(emitter,
 	    "HPA shard stats:\n"
 	    "  Purge passes: %" FMTu64 " (%" FMTu64 " / sec)\n"
@@ -878,7 +867,6 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	    nhugifies, rate_per_second(nhugifies, uptime),
 	    ndehugifies, rate_per_second(ndehugifies, uptime));
 
-	emitter_json_object_kv_begin(emitter, "hpa_shard");
 	emitter_json_kv(emitter, "npurge_passes", emitter_type_uint64,
 	    &npurge_passes);
 	emitter_json_kv(emitter, "npurges", emitter_type_uint64,
@@ -887,8 +875,25 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	    &nhugifies);
 	emitter_json_kv(emitter, "ndehugifies", emitter_type_uint64,
 	    &ndehugifies);
+}
 
-	/* Next, full slab stats. */
+static void
+stats_arena_hpa_shard_slabs_print(emitter_t *emitter, unsigned i) {
+	emitter_row_t header_row;
+	emitter_row_init(&header_row);
+	emitter_row_t row;
+	emitter_row_init(&row);
+
+	size_t npageslabs_huge;
+	size_t nactive_huge;
+	size_t ndirty_huge;
+
+	size_t npageslabs_nonhuge;
+	size_t nactive_nonhuge;
+	size_t ndirty_nonhuge;
+	size_t nretained_nonhuge;
+
+	/* Full slab stats. */
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs_huge",
 	    i, &npageslabs_huge, size_t);
 	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive_huge",
@@ -1049,12 +1054,21 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 		emitter_json_object_end(emitter);
 	}
 	emitter_json_array_end(emitter); /* End "nonfull_slabs" */
-	emitter_json_object_end(emitter); /* End "hpa_shard" */
 	if (in_gap) {
 		emitter_table_printf(emitter, "                     ---\n");
 	}
 }
 
+static void
+stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
+	stats_arena_hpa_shard_sec_print(emitter, i);
+
+	emitter_json_object_kv_begin(emitter, "hpa_shard");
+	stats_arena_hpa_shard_counters_print(emitter, i, uptime);
+	stats_arena_hpa_shard_slabs_print(emitter, i);
+	emitter_json_object_end(emitter); /* End "hpa_shard" */
+}
+
 static void
 stats_arena_mutexes_print(emitter_t *emitter, unsigned arena_ind, uint64_t uptime) {
 	emitter_row_t row;

From a361e886e2ec23513e374abc1e4e0429cc93ec5c Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Thu, 7 Nov 2024 10:16:46 -0800
Subject: [PATCH 2451/2608] Move `je_cv_thp` logic closer to definition

---
 configure.ac | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/configure.ac b/configure.ac
index 9698997a..d037fed5 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2471,6 +2471,16 @@ if test "x${je_cv_madvise}" = "xyes" ; then
 	madvise((void *)0, 0, MADV_HUGEPAGE);
 	madvise((void *)0, 0, MADV_NOHUGEPAGE);
 ], [je_cv_thp])
+  case "${host_cpu}" in
+    arm*)
+      ;;
+    *)
+    if test "x${je_cv_thp}" = "xyes" ; then
+      AC_DEFINE([JEMALLOC_HAVE_MADVISE_HUGE], [ ], [ ])
+    fi
+    ;;
+  esac
+
   dnl Check for madvise(..., MADV_[NO]CORE).
   JE_COMPILABLE([madvise(..., MADV_[[NO]]CORE)], [
 #include <sys/mman.h>
@@ -2481,15 +2491,6 @@ if test "x${je_cv_madvise}" = "xyes" ; then
   if test "x${je_cv_madv_nocore}" = "xyes" ; then
     AC_DEFINE([JEMALLOC_MADVISE_NOCORE], [ ], [ ])
   fi
-case "${host_cpu}" in
-  arm*)
-    ;;
-  *)
-  if test "x${je_cv_thp}" = "xyes" ; then
-    AC_DEFINE([JEMALLOC_HAVE_MADVISE_HUGE], [ ], [ ])
-  fi
-  ;;
-esac
 else
   dnl Check for posix_madvise.
   JE_COMPILABLE([posix_madvise], [

From 0ce13c6fb5ae3bd837f5a7314bd580070bb408da Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Thu, 31 Oct 2024 11:43:11 -0700
Subject: [PATCH 2452/2608] Add opt `hpa_hugify_sync` to hugify synchronously

Linux 6.1 introduced `MADV_COLLAPSE` flag to perform a best-effort
synchronous collapse of the native pages mapped by the memory range into
transparent huge pages.

Synchronous hugification might be beneficial for at least two reasons:
we are not relying on khugepaged anymore and get an instant feedback if
range wasn't hugified.

If `hpa_hugify_sync` option is on, we'll try to perform synchronously
collapse and if it wasn't successful, we'll fallback to asynchronous
behaviour.
---
 configure.ac                                  | 10 ++++++++
 include/jemalloc/internal/hpa.h               |  8 ++++++
 include/jemalloc/internal/hpa_hooks.h         |  2 +-
 include/jemalloc/internal/hpa_opts.h          |  7 ++++++
 .../internal/jemalloc_internal_defs.h.in      |  7 ++++++
 .../jemalloc/internal/jemalloc_preamble.h.in  |  9 +++++++
 include/jemalloc/internal/pages.h             |  1 +
 src/ctl.c                                     |  9 +++++++
 src/hpa.c                                     | 17 ++++++++++++-
 src/hpa_hooks.c                               | 25 ++++++++++++++++---
 src/jemalloc.c                                | 12 +++++++++
 src/pages.c                                   | 24 ++++++++++++++++++
 src/stats.c                                   |  8 ++++++
 test/unit/hpa.c                               |  9 +++++--
 test/unit/mallctl.c                           |  1 +
 15 files changed, 141 insertions(+), 8 deletions(-)

diff --git a/configure.ac b/configure.ac
index d037fed5..a330e33e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2491,6 +2491,16 @@ if test "x${je_cv_madvise}" = "xyes" ; then
   if test "x${je_cv_madv_nocore}" = "xyes" ; then
     AC_DEFINE([JEMALLOC_MADVISE_NOCORE], [ ], [ ])
   fi
+
+  dnl Check for madvise(..., MADV_COLLAPSE).
+  JE_COMPILABLE([madvise(..., MADV_COLLAPSE)], [
+#include <sys/mman.h>
+], [
+	madvise((void *)0, 0, MADV_COLLAPSE);
+], [je_cv_madv_collapse])
+  if test "x${je_cv_madv_collapse}" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_HAVE_MADVISE_COLLAPSE], [ ], [ ])
+  fi
 else
   dnl Check for posix_madvise.
   JE_COMPILABLE([posix_madvise], [
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 4805efaf..4c410c40 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -61,6 +61,14 @@ struct hpa_shard_nonderived_stats_s {
 	 * Guarded by mtx.
 	 */
 	uint64_t nhugifies;
+
+	/*
+	 * The number of times we've tried to hugify a pageslab, but failed.
+	 *
+	 * Guarded by mtx.
+	 */
+	uint64_t nhugify_failures;
+
 	/*
 	 * The number of times we've dehugified a pageslab.
 	 *
diff --git a/include/jemalloc/internal/hpa_hooks.h b/include/jemalloc/internal/hpa_hooks.h
index 72f3a43c..b04b04f6 100644
--- a/include/jemalloc/internal/hpa_hooks.h
+++ b/include/jemalloc/internal/hpa_hooks.h
@@ -9,7 +9,7 @@ struct hpa_hooks_s {
 	void *(*map)(size_t size);
 	void (*unmap)(void *ptr, size_t size);
 	void (*purge)(void *ptr, size_t size);
-	void (*hugify)(void *ptr, size_t size);
+	bool (*hugify)(void *ptr, size_t size, bool sync);
 	void (*dehugify)(void *ptr, size_t size);
 	void (*curtime)(nstime_t *r_time, bool first_reading);
 	uint64_t (*ms_since)(nstime_t *r_time);
diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h
index ee2bd40c..42246172 100644
--- a/include/jemalloc/internal/hpa_opts.h
+++ b/include/jemalloc/internal/hpa_opts.h
@@ -45,6 +45,11 @@ struct hpa_shard_opts_s {
 	 */
 	uint64_t hugify_delay_ms;
 
+	/*
+	 * Hugify pages synchronously.
+	 */
+	bool hugify_sync;
+
 	/*
 	 * Minimum amount of time between purges.
 	 */
@@ -73,6 +78,8 @@ struct hpa_shard_opts_s {
 	false,								\
 	/* hugify_delay_ms */						\
 	10 * 1000,							\
+	/* hugify_sync */						\
+	false,								\
 	/* min_purge_interval_ms */					\
 	5 * 1000,							\
 	/* experimental_max_purge_nhp */				\
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index f5b1a924..5cf77f47 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -308,6 +308,13 @@
  */
 #undef JEMALLOC_HAVE_MADVISE_HUGE
 
+/*
+ * Defined if best-effort synchronous collapse of the native
+ * pages mapped by the memory range into transparent huge pages is supported
+ * via MADV_COLLAPSE arguments to madvise(2).
+ */
+#undef JEMALLOC_HAVE_MADVISE_COLLAPSE
+
 /*
  * Methods for purging unused pages differ between operating systems.
  *
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index ebce5d56..a59c3489 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -57,6 +57,15 @@
 #  define JEMALLOC_MADV_FREE 8
 #endif
 
+/*
+ * Can be defined at compile time, in cases, when it is known
+ * madvise(..., MADV_COLLAPSE) feature is supported, but MADV_COLLAPSE
+ * constant is not defined.
+ */
+#ifdef JEMALLOC_DEFINE_MADVISE_COLLAPSE
+#  define JEMALLOC_MADV_COLLAPSE 25
+#endif
+
 static const bool config_debug =
 #ifdef JEMALLOC_DEBUG
     true
diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index 6c295b43..0dcf96dc 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -123,6 +123,7 @@ bool pages_purge_lazy(void *addr, size_t size);
 bool pages_purge_forced(void *addr, size_t size);
 bool pages_huge(void *addr, size_t size);
 bool pages_nohuge(void *addr, size_t size);
+bool pages_collapse(void *addr, size_t size);
 bool pages_dontdump(void *addr, size_t size);
 bool pages_dodump(void *addr, size_t size);
 bool pages_boot(void);
diff --git a/src/ctl.c b/src/ctl.c
index 690bbabc..40e75fb7 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -103,6 +103,7 @@ CTL_PROTO(opt_hpa)
 CTL_PROTO(opt_hpa_slab_max_alloc)
 CTL_PROTO(opt_hpa_hugification_threshold)
 CTL_PROTO(opt_hpa_hugify_delay_ms)
+CTL_PROTO(opt_hpa_hugify_sync)
 CTL_PROTO(opt_hpa_min_purge_interval_ms)
 CTL_PROTO(opt_experimental_hpa_max_purge_nhp)
 CTL_PROTO(opt_hpa_dirty_mult)
@@ -263,6 +264,7 @@ INDEX_PROTO(stats_arenas_i_extents_j)
 CTL_PROTO(stats_arenas_i_hpa_shard_npurge_passes)
 CTL_PROTO(stats_arenas_i_hpa_shard_npurges)
 CTL_PROTO(stats_arenas_i_hpa_shard_nhugifies)
+CTL_PROTO(stats_arenas_i_hpa_shard_nhugify_failures)
 CTL_PROTO(stats_arenas_i_hpa_shard_ndehugifies)
 
 /* We have a set of stats for full slabs. */
@@ -462,6 +464,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("hpa_hugification_threshold"),
 		CTL(opt_hpa_hugification_threshold)},
 	{NAME("hpa_hugify_delay_ms"), CTL(opt_hpa_hugify_delay_ms)},
+	{NAME("hpa_hugify_sync"), CTL(opt_hpa_hugify_sync)},
 	{NAME("hpa_min_purge_interval_ms"), CTL(opt_hpa_min_purge_interval_ms)},
 	{NAME("experimental_hpa_max_purge_nhp"),
 		CTL(opt_experimental_hpa_max_purge_nhp)},
@@ -834,6 +837,8 @@ static const ctl_named_node_t stats_arenas_i_hpa_shard_node[] = {
 	{NAME("npurge_passes"),	CTL(stats_arenas_i_hpa_shard_npurge_passes)},
 	{NAME("npurges"),	CTL(stats_arenas_i_hpa_shard_npurges)},
 	{NAME("nhugifies"),	CTL(stats_arenas_i_hpa_shard_nhugifies)},
+	{NAME("nhugify_failures"),
+	    CTL(stats_arenas_i_hpa_shard_nhugify_failures)},
 	{NAME("ndehugifies"),	CTL(stats_arenas_i_hpa_shard_ndehugifies)}
 };
 
@@ -2200,6 +2205,7 @@ CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool)
 CTL_RO_NL_GEN(opt_hpa_hugification_threshold,
     opt_hpa_opts.hugification_threshold, size_t)
 CTL_RO_NL_GEN(opt_hpa_hugify_delay_ms, opt_hpa_opts.hugify_delay_ms, uint64_t)
+CTL_RO_NL_GEN(opt_hpa_hugify_sync, opt_hpa_opts.hugify_sync, bool)
 CTL_RO_NL_GEN(opt_hpa_min_purge_interval_ms, opt_hpa_opts.min_purge_interval_ms,
     uint64_t)
 CTL_RO_NL_GEN(opt_experimental_hpa_max_purge_nhp,
@@ -4061,6 +4067,9 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_npurges,
     arenas_i(mib[2])->astats->hpastats.nonderived_stats.npurges, uint64_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nhugifies,
     arenas_i(mib[2])->astats->hpastats.nonderived_stats.nhugifies, uint64_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nhugify_failures,
+    arenas_i(mib[2])->astats->hpastats.nonderived_stats.nhugify_failures,
+    uint64_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_ndehugifies,
     arenas_i(mib[2])->astats->hpastats.nonderived_stats.ndehugifies, uint64_t);
 
diff --git a/src/hpa.c b/src/hpa.c
index d1558821..14541413 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -210,6 +210,7 @@ hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
 	shard->stats.npurge_passes = 0;
 	shard->stats.npurges = 0;
 	shard->stats.nhugifies = 0;
+	shard->stats.nhugify_failures = 0;
 	shard->stats.ndehugifies = 0;
 
 	/*
@@ -242,6 +243,7 @@ hpa_shard_nonderived_stats_accum(hpa_shard_nonderived_stats_t *dst,
 	dst->npurge_passes += src->npurge_passes;
 	dst->npurges += src->npurges;
 	dst->nhugifies += src->nhugifies;
+	dst->nhugify_failures += src->nhugify_failures;
 	dst->ndehugifies += src->ndehugifies;
 }
 
@@ -499,10 +501,23 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 
-	shard->central->hooks.hugify(hpdata_addr_get(to_hugify), HUGEPAGE);
+	bool err = shard->central->hooks.hugify(hpdata_addr_get(to_hugify),
+	    HUGEPAGE, shard->opts.hugify_sync);
 
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	shard->stats.nhugifies++;
+	if (err) {
+		/*
+		 * When asynchronious hugification is used
+		 * (shard->opts.hugify_sync option is false), we are not
+		 * expecting to get here, unless something went terrible wrong.
+		 * Because underlying syscall is only setting kernel flag for
+		 * memory range (actual hugification happens asynchroniously
+		 * and we are not getting any feedback about its outcome), we
+		 * expect syscall to be successful all the time.
+		 */
+		shard->stats.nhugify_failures++;
+	}
 
 	psset_update_begin(&shard->psset, to_hugify);
 	hpdata_hugify(to_hugify);
diff --git a/src/hpa_hooks.c b/src/hpa_hooks.c
index f43f05eb..4628c14f 100644
--- a/src/hpa_hooks.c
+++ b/src/hpa_hooks.c
@@ -6,7 +6,7 @@
 static void *hpa_hooks_map(size_t size);
 static void hpa_hooks_unmap(void *ptr, size_t size);
 static void hpa_hooks_purge(void *ptr, size_t size);
-static void hpa_hooks_hugify(void *ptr, size_t size);
+static bool hpa_hooks_hugify(void *ptr, size_t size, bool sync);
 static void hpa_hooks_dehugify(void *ptr, size_t size);
 static void hpa_hooks_curtime(nstime_t *r_nstime, bool first_reading);
 static uint64_t hpa_hooks_ms_since(nstime_t *past_nstime);
@@ -37,10 +37,27 @@ hpa_hooks_purge(void *ptr, size_t size) {
 	pages_purge_forced(ptr, size);
 }
 
-static void
-hpa_hooks_hugify(void *ptr, size_t size) {
+static bool
+hpa_hooks_hugify(void *ptr, size_t size, bool sync) {
+	/*
+	 * We mark memory range as huge independently on which hugification
+	 * technique is used (synchronous or asynchronous) to have correct
+	 * VmFlags set for introspection and accounting purposes.  If
+	 * synchronous hugification is enabled and pages_collapse call fails,
+	 * then we hope memory range will be hugified asynchronously by
+	 * khugepaged eventually.  Right now, 3 out of 4 error return codes of
+	 * madvise(..., MADV_COLLAPSE) are retryable.  Instead of retrying, we
+	 * just fallback to asynchronous khugepaged hugification to simplify
+	 * implementation, even if we might know khugepaged fallback will not
+	 * be successful (current madvise(..., MADV_COLLAPSE) implementation
+	 * hints, when EINVAL is returned it is likely that khugepaged won't be
+	 * able to collapse memory range into hugepage either).
+	 */
 	bool err = pages_huge(ptr, size);
-	(void)err;
+	if (sync) {
+		err = pages_collapse(ptr, size);
+	}
+	return err;
 }
 
 static void
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 428a50ef..248de28b 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1093,6 +1093,15 @@ validate_hpa_settings(void) {
 	if (opt_hpa_opts.dirty_mult != (fxp_t)-1 && validate_hpa_ratios()) {
 		had_conf_error = true;
 	}
+#ifndef JEMALLOC_HAVE_MADVISE_COLLAPSE
+	if (opt_hpa_opts.hugify_sync) {
+	       had_conf_error = true;
+	       malloc_printf(
+		   "<jemalloc>: hpa_hugify_sync config option is enabled, "
+		   "but MADV_COLLAPSE support was not detected at build "
+		   "time.");
+	}
+#endif
 }
 
 static void
@@ -1566,6 +1575,9 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    0, 0, CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
 			    false);
 
+			CONF_HANDLE_BOOL(
+			    opt_hpa_opts.hugify_sync, "hpa_hugify_sync");
+
 			CONF_HANDLE_UINT64_T(
 			    opt_hpa_opts.min_purge_interval_ms,
 			    "hpa_min_purge_interval_ms", 0, 0,
diff --git a/src/pages.c b/src/pages.c
index 5b55a046..26fd8d5d 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -567,6 +567,30 @@ pages_nohuge_unaligned(void *addr, size_t size) {
 	return pages_nohuge_impl(addr, size, false);
 }
 
+bool
+pages_collapse(void *addr, size_t size) {
+	assert(PAGE_ADDR2BASE(addr) == addr);
+	assert(PAGE_CEILING(size) == size);
+	/*
+	 * There is one more MADV_COLLAPSE precondition that is not easy to
+	 * express with assert statement.  In order to madvise(addr, size,
+	 * MADV_COLLAPSE) call to be successful, at least one page in the range
+	 * must currently be backed by physical memory.  In particularly, this
+	 * means we can't call pages_collapse on freshly mapped memory region.
+	 * See madvise(2) man page for more details.
+	 */
+#if defined(JEMALLOC_HAVE_MADVISE_COLLAPSE) && \
+    (defined(MADV_COLLAPSE) || defined(JEMALLOC_MADV_COLLAPSE))
+#  if defined(MADV_COLLAPSE)
+	return (madvise(addr, size, MADV_COLLAPSE) != 0);
+#  elif defined(JEMALLOC_MADV_COLLAPSE)
+	return (madvise(addr, size, JEMALLOC_MADV_COLLAPSE) != 0);
+#  endif
+#else
+	return true;
+#endif
+}
+
 bool
 pages_dontdump(void *addr, size_t size) {
 	assert(PAGE_ADDR2BASE(addr) == addr);
diff --git a/src/stats.c b/src/stats.c
index 89dd1916..7fbaa5cc 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -844,6 +844,7 @@ stats_arena_hpa_shard_counters_print(emitter_t *emitter, unsigned i,
 	uint64_t npurge_passes;
 	uint64_t npurges;
 	uint64_t nhugifies;
+	uint64_t nhugify_failures;
 	uint64_t ndehugifies;
 
 	CTL_M2_GET("stats.arenas.0.hpa_shard.npurge_passes",
@@ -852,6 +853,8 @@ stats_arena_hpa_shard_counters_print(emitter_t *emitter, unsigned i,
 	    i, &npurges, uint64_t);
 	CTL_M2_GET("stats.arenas.0.hpa_shard.nhugifies",
 	    i, &nhugifies, uint64_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.nhugify_failures",
+	    i, &nhugify_failures, uint64_t);
 	CTL_M2_GET("stats.arenas.0.hpa_shard.ndehugifies",
 	    i, &ndehugifies, uint64_t);
 
@@ -860,11 +863,13 @@ stats_arena_hpa_shard_counters_print(emitter_t *emitter, unsigned i,
 	    "  Purge passes: %" FMTu64 " (%" FMTu64 " / sec)\n"
 	    "  Purges: %" FMTu64 " (%" FMTu64 " / sec)\n"
 	    "  Hugeifies: %" FMTu64 " (%" FMTu64 " / sec)\n"
+	    "  Hugify failures: %" FMTu64 " (%" FMTu64 " / sec)\n"
 	    "  Dehugifies: %" FMTu64 " (%" FMTu64 " / sec)\n"
 	    "\n",
 	    npurge_passes, rate_per_second(npurge_passes, uptime),
 	    npurges, rate_per_second(npurges, uptime),
 	    nhugifies, rate_per_second(nhugifies, uptime),
+	    nhugify_failures, rate_per_second(nhugify_failures, uptime),
 	    ndehugifies, rate_per_second(ndehugifies, uptime));
 
 	emitter_json_kv(emitter, "npurge_passes", emitter_type_uint64,
@@ -873,6 +878,8 @@ stats_arena_hpa_shard_counters_print(emitter_t *emitter, unsigned i,
 	    &npurges);
 	emitter_json_kv(emitter, "nhugifies", emitter_type_uint64,
 	    &nhugifies);
+	emitter_json_kv(emitter, "nhugify_failures", emitter_type_uint64,
+	    &nhugify_failures);
 	emitter_json_kv(emitter, "ndehugifies", emitter_type_uint64,
 	    &ndehugifies);
 }
@@ -1578,6 +1585,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_SIZE_T("hpa_slab_max_alloc")
 	OPT_WRITE_SIZE_T("hpa_hugification_threshold")
 	OPT_WRITE_UINT64("hpa_hugify_delay_ms")
+	OPT_WRITE_BOOL("hpa_hugify_sync")
 	OPT_WRITE_UINT64("hpa_min_purge_interval_ms")
 	OPT_WRITE_SSIZE_T("experimental_hpa_max_purge_nhp")
 	if (je_mallctl("opt.hpa_dirty_mult", (void *)&u32v, &u32sz, NULL, 0)
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 747f98ef..50b96a87 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -32,6 +32,8 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
 	false,
 	/* hugify_delay_ms */
 	10 * 1000,
+	/* hugify_sync */
+	false,
 	/* min_purge_interval_ms */
 	5 * 1000,
 	/* experimental_max_purge_nhp */
@@ -49,6 +51,8 @@ static hpa_shard_opts_t test_hpa_shard_opts_purge = {
 	true,
 	/* hugify_delay_ms */
 	0,
+	/* hugify_sync */
+	false,
 	/* min_purge_interval_ms */
 	5 * 1000,
 	/* experimental_max_purge_nhp */
@@ -371,9 +375,10 @@ defer_test_purge(void *ptr, size_t size) {
 }
 
 static size_t ndefer_hugify_calls = 0;
-static void
-defer_test_hugify(void *ptr, size_t size) {
+static bool
+defer_test_hugify(void *ptr, size_t size, bool sync) {
 	++ndefer_hugify_calls;
+	return false;
 }
 
 static size_t ndefer_dehugify_calls = 0;
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 65e84370..8c8fb18c 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -288,6 +288,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(const char *, dss, always);
 	TEST_MALLCTL_OPT(bool, hpa, always);
 	TEST_MALLCTL_OPT(size_t, hpa_slab_max_alloc, always);
+	TEST_MALLCTL_OPT(bool, hpa_hugify_sync, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_nshards, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_max_alloc, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_max_bytes, always);

From 3820e38dc1021cebba4628e277cde060e840aaef Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Fri, 15 Nov 2024 08:53:20 -0800
Subject: [PATCH 2453/2608] Remove validation for HPA ratios

Config validation was introduced at 3aae792b with main intention to fix
infinite purging loop, but it didn't actually fix the underlying
problem, just masked it. Later 47d69b4ea was merged to address the same
problem.

Options `hpa_dirty_mult` and `hpa_hugification_threshold` have different
application dimensions: `hpa_dirty_mult` applied to active memory on the
shard, but `hpa_hugification_threshold` is a threshold for single
pageslab (hugepage). It doesn't make much sense to sum them up together.

While it is true that too high value of `hpa_dirty_mult` and too low
value of `hpa_hugification_threshold` can lead to pathological
behaviour, it is true for other options as well. Poor configurations
might lead to suboptimal and sometimes completely unacceptable
behaviour and that's OK, that is exactly the reason why they are called
poor.

There are other mechanism exist to prevent extreme behaviour, when we
hugified and then immediately purged page, see
`hpa_hugify_blocked_by_ndirty` function, which exist to prevent exactly
this case.

Lastly, `hpa_dirty_mult + hpa_hugification_threshold >= 1` constraint is
too tight and prevents a lot of valid configurations.
---
 Makefile.in                        |  1 -
 src/jemalloc.c                     | 41 ----------------------
 test/unit/hpa_background_thread.sh |  2 +-
 test/unit/hpa_validate_conf.c      | 56 ------------------------------
 test/unit/hpa_validate_conf.sh     |  3 --
 5 files changed, 1 insertion(+), 102 deletions(-)
 delete mode 100644 test/unit/hpa_validate_conf.c
 delete mode 100644 test/unit/hpa_validate_conf.sh

diff --git a/Makefile.in b/Makefile.in
index 6a386720..27eb90d3 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -230,7 +230,6 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/hook.c \
 	$(srcroot)test/unit/hpa.c \
 	$(srcroot)test/unit/hpa_background_thread.c \
-	$(srcroot)test/unit/hpa_validate_conf.c \
 	$(srcroot)test/unit/hpdata.c \
 	$(srcroot)test/unit/huge.c \
 	$(srcroot)test/unit/inspect.c \
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 248de28b..67be7681 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1041,44 +1041,6 @@ obtain_malloc_conf(unsigned which_source, char readlink_buf[PATH_MAX + 1]) {
 	return ret;
 }
 
-static bool
-validate_hpa_ratios(void) {
-	size_t hpa_threshold = fxp_mul_frac(HUGEPAGE, opt_hpa_opts.dirty_mult) +
-	    opt_hpa_opts.hugification_threshold;
-	if (hpa_threshold > HUGEPAGE) {
-		return false;
-	}
-
-	char hpa_dirty_mult[FXP_BUF_SIZE];
-	char hugification_threshold[FXP_BUF_SIZE];
-	char normalization_message[256] = {0};
-	fxp_print(opt_hpa_opts.dirty_mult, hpa_dirty_mult);
-	fxp_print(fxp_div(FXP_INIT_INT((unsigned)
-	    (opt_hpa_opts.hugification_threshold >> LG_PAGE)),
-	    FXP_INIT_INT(HUGEPAGE_PAGES)), hugification_threshold);
-	if (!opt_abort_conf) {
-		char normalized_hugification_threshold[FXP_BUF_SIZE];
-		opt_hpa_opts.hugification_threshold +=
-		    HUGEPAGE - hpa_threshold;
-		fxp_print(fxp_div(FXP_INIT_INT((unsigned)
-		    (opt_hpa_opts.hugification_threshold >> LG_PAGE)),
-		    FXP_INIT_INT(HUGEPAGE_PAGES)),
-		    normalized_hugification_threshold);
-		malloc_snprintf(normalization_message,
-		    sizeof(normalization_message), "<jemalloc>: Normalizing "
-		    "HPA settings to avoid pathological behavior, setting "
-		    "hpa_hugification_threshold_ratio: to %s.\n",
-		    normalized_hugification_threshold);
-	}
-	malloc_printf(
-	    "<jemalloc>: Invalid combination of options "
-	    "hpa_hugification_threshold_ratio: %s and hpa_dirty_mult: %s. "
-	    "These values should sum to > 1.0.\n%s", hugification_threshold,
-	    hpa_dirty_mult, normalization_message);
-
-	return true;
-}
-
 static void
 validate_hpa_settings(void) {
 	if (!hpa_supported() || !opt_hpa) {
@@ -1090,9 +1052,6 @@ validate_hpa_settings(void) {
 		    "<jemalloc>: huge page size (%zu) greater than expected."
 		    "May not be supported or behave as expected.", HUGEPAGE);
 	}
-	if (opt_hpa_opts.dirty_mult != (fxp_t)-1 && validate_hpa_ratios()) {
-		had_conf_error = true;
-	}
 #ifndef JEMALLOC_HAVE_MADVISE_COLLAPSE
 	if (opt_hpa_opts.hugify_sync) {
 	       had_conf_error = true;
diff --git a/test/unit/hpa_background_thread.sh b/test/unit/hpa_background_thread.sh
index 33b70e19..65a56a08 100644
--- a/test/unit/hpa_background_thread.sh
+++ b/test/unit/hpa_background_thread.sh
@@ -1,4 +1,4 @@
 #!/bin/sh
 
-export MALLOC_CONF="hpa_dirty_mult:0.001,hpa_hugification_threshold_ratio:1.0,hpa_min_purge_interval_ms:50,hpa_sec_nshards:0"
+export MALLOC_CONF="hpa_dirty_mult:0,hpa_min_purge_interval_ms:50,hpa_sec_nshards:0"
 
diff --git a/test/unit/hpa_validate_conf.c b/test/unit/hpa_validate_conf.c
deleted file mode 100644
index 8c1847ba..00000000
--- a/test/unit/hpa_validate_conf.c
+++ /dev/null
@@ -1,56 +0,0 @@
-#include "test/jemalloc_test.h"
-
-static bool abort_called = false;
-static void (*default_malloc_message)(void *, const char *);
-
-static void
-mock_invalid_conf_abort(void) {
-	abort_called = true;
-}
-
-static void
-null_malloc_message(void *_1, const char* _2) {
-}
-
-TEST_BEGIN(test_hpa_validate_conf) {
-	test_skip_if(!hpa_supported());
-	void *ptr = malloc(4096);
-	/* Need to restore this here to see any possible assert messages */
-	malloc_message = default_malloc_message;
-	assert_true(abort_called,
-	     "Should have aborted due to invalid values for hpa_dirty_mult and "
-	     "hpa_hugification_threshold_ratio");
-	free(ptr);
-}
-TEST_END
-
-/*
- * We have to set `abort_conf:true` here and not via the `MALLOC_CONF`
- * environment variable in the associated shell script for this test. This is
- * because when testing on FreeBSD (where Jemalloc is the system allocator) in
- * CI configs where HPA is not supported, setting `abort_conf:true` there would
- * result in the system Jemalloc picking this up and aborting before we could
- * ever even launch the test.
- */
-const char *malloc_conf = "abort_conf:true";
-
-int
-main(void) {
-	/*
-	 * OK, this is a sort of nasty hack.  We don't want to add *another*
-	 * config option for HPA (the intent is that it becomes available on
-	 * more platforms over time, and we're trying to prune back config
-	 * options generally.  But we'll get initialization errors on other
-	 * platforms if we set hpa:true in the MALLOC_CONF (even if we set
-	 * abort_conf:false as well).  So we reach into the internals and set
-	 * them directly, but only if we know that we're actually going to do
-	 * something nontrivial in the tests.
-	 */
-	if (hpa_supported()) {
-		default_malloc_message = malloc_message;
-		malloc_message = null_malloc_message;
-		opt_hpa = true;
-		invalid_conf_abort = mock_invalid_conf_abort;
-	}
-	return test_no_reentrancy(test_hpa_validate_conf);
-}
diff --git a/test/unit/hpa_validate_conf.sh b/test/unit/hpa_validate_conf.sh
deleted file mode 100644
index 692c3da9..00000000
--- a/test/unit/hpa_validate_conf.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/sh
-
-export MALLOC_CONF='tcache:false,hpa_dirty_mult:0.25,hpa_hugification_threshold_ratio:0.6'

From 6092c980a6d02b34bc7b3ed0c2ad923d0a5d2970 Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Thu, 14 Nov 2024 10:52:50 -0800
Subject: [PATCH 2454/2608] Expose `psset` state stats

When evaluating changes in HPA logic, it is useful to know internal
`hpa_shard` state. Great deal of this state is `psset`. Some of the
`psset` stats was available, but in disaggregated form, which is not
very convenient. This commit exposed `psset` counters to `mallctl`
and malloc stats dumps.

Example of how malloc stats dump will look like after the change.

HPA shard stats:
  Pageslabs: 14899 (4354 huge, 10545 nonhuge)
  Active pages: 6708166 (2228917 huge, 4479249 nonhuge)
  Dirty pages: 233816 (331 huge, 233485 nonhuge)
  Retained pages: 686306
  Purge passes: 8730 (10 / sec)
  Purges: 127501 (146 / sec)
  Hugeifies: 4358 (5 / sec)
  Dehugifies: 4 (0 / sec)

Pageslabs, active pages, dirty pages and retained pages are rows added
by this change.
---
 include/jemalloc/internal/psset.h |  39 ++++---
 src/ctl.c                         |  75 +++++++++++--
 src/psset.c                       | 105 ++++++++++++-------
 src/stats.c                       |  69 ++++++++++++
 test/unit/mallctl.c               |  59 +++++++++++
 test/unit/psset.c                 | 169 +++++++++++++++++++++++++++++-
 6 files changed, 458 insertions(+), 58 deletions(-)

diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index 7e510b7f..ea608213 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -21,6 +21,12 @@
  */
 #define PSSET_NPSIZES 64
 
+/*
+ * We store non-hugefied and hugified pageslabs metadata separately.
+ * [0] corresponds to non-hugified and [1] to hugified pageslabs.
+ */
+#define PSSET_NHUGE 2
+
 /*
  * We keep two purge lists per page size class; one for hugified hpdatas (at
  * index 2*pszind), and one for the non-hugified hpdatas (at index 2*pszind +
@@ -44,21 +50,36 @@ struct psset_bin_stats_s {
 
 typedef struct psset_stats_s psset_stats_t;
 struct psset_stats_s {
+	/*
+	 * Merged stats for all pageslabs in psset.  This lets us quickly
+	 * answer queries for the number of dirty and active pages in the
+	 * entire set.
+	 */
+	psset_bin_stats_t merged;
+
+	/*
+	 * Below are the same stats, but aggregated by different
+	 * properties of pageslabs: huginess or fullness.
+	 */
+
+	/* Non-huge and huge slabs. */
+	psset_bin_stats_t slabs[PSSET_NHUGE];
+
 	/*
 	 * The second index is huge stats; nonfull_slabs[pszind][0] contains
 	 * stats for the non-huge slabs in bucket pszind, while
 	 * nonfull_slabs[pszind][1] contains stats for the huge slabs.
 	 */
-	psset_bin_stats_t nonfull_slabs[PSSET_NPSIZES][2];
+	psset_bin_stats_t nonfull_slabs[PSSET_NPSIZES][PSSET_NHUGE];
 
 	/*
 	 * Full slabs don't live in any edata heap, but we still track their
 	 * stats.
 	 */
-	psset_bin_stats_t full_slabs[2];
+	psset_bin_stats_t full_slabs[PSSET_NHUGE];
 
 	/* Empty slabs are similar. */
-	psset_bin_stats_t empty_slabs[2];
+	psset_bin_stats_t empty_slabs[PSSET_NHUGE];
 };
 
 typedef struct psset_s psset_t;
@@ -70,12 +91,6 @@ struct psset_s {
 	hpdata_age_heap_t pageslabs[PSSET_NPSIZES];
 	/* Bitmap for which set bits correspond to non-empty heaps. */
 	fb_group_t pageslab_bitmap[FB_NGROUPS(PSSET_NPSIZES)];
-	/*
-	 * The sum of all bin stats in stats.  This lets us quickly answer
-	 * queries for the number of dirty, active, and retained pages in the
-	 * entire set.
-	 */
-	psset_bin_stats_t merged_stats;
 	psset_stats_t stats;
 	/*
 	 * Slabs with no active allocations, but which are allowed to serve new
@@ -116,17 +131,17 @@ void psset_remove(psset_t *psset, hpdata_t *ps);
 
 static inline size_t
 psset_npageslabs(psset_t *psset) {
-	return psset->merged_stats.npageslabs;
+	return psset->stats.merged.npageslabs;
 }
 
 static inline size_t
 psset_nactive(psset_t *psset) {
-	return psset->merged_stats.nactive;
+	return psset->stats.merged.nactive;
 }
 
 static inline size_t
 psset_ndirty(psset_t *psset) {
-	return psset->merged_stats.ndirty;
+	return psset->stats.merged.ndirty;
 }
 
 #endif /* JEMALLOC_INTERNAL_PSSET_H */
diff --git a/src/ctl.c b/src/ctl.c
index 40e75fb7..66844105 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -261,13 +261,27 @@ CTL_PROTO(stats_arenas_i_extents_j_dirty_bytes)
 CTL_PROTO(stats_arenas_i_extents_j_muzzy_bytes)
 CTL_PROTO(stats_arenas_i_extents_j_retained_bytes)
 INDEX_PROTO(stats_arenas_i_extents_j)
+
+/* Merged set of stats for HPA shard. */
+CTL_PROTO(stats_arenas_i_hpa_shard_npageslabs)
+CTL_PROTO(stats_arenas_i_hpa_shard_nactive)
+CTL_PROTO(stats_arenas_i_hpa_shard_ndirty)
+
 CTL_PROTO(stats_arenas_i_hpa_shard_npurge_passes)
 CTL_PROTO(stats_arenas_i_hpa_shard_npurges)
 CTL_PROTO(stats_arenas_i_hpa_shard_nhugifies)
 CTL_PROTO(stats_arenas_i_hpa_shard_nhugify_failures)
 CTL_PROTO(stats_arenas_i_hpa_shard_ndehugifies)
 
-/* We have a set of stats for full slabs. */
+/* Set of stats for non-hugified and hugified slabs. */
+CTL_PROTO(stats_arenas_i_hpa_shard_slabs_npageslabs_nonhuge)
+CTL_PROTO(stats_arenas_i_hpa_shard_slabs_npageslabs_huge)
+CTL_PROTO(stats_arenas_i_hpa_shard_slabs_nactive_nonhuge)
+CTL_PROTO(stats_arenas_i_hpa_shard_slabs_nactive_huge)
+CTL_PROTO(stats_arenas_i_hpa_shard_slabs_ndirty_nonhuge)
+CTL_PROTO(stats_arenas_i_hpa_shard_slabs_ndirty_huge)
+
+/* A parallel set of stats for full slabs. */
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge)
 CTL_PROTO(stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge)
@@ -295,6 +309,7 @@ CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_nonhuge)
 CTL_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_huge)
 
 INDEX_PROTO(stats_arenas_i_hpa_shard_nonfull_slabs_j)
+
 CTL_PROTO(stats_arenas_i_nthreads)
 CTL_PROTO(stats_arenas_i_uptime)
 CTL_PROTO(stats_arenas_i_dss)
@@ -771,6 +786,21 @@ MUTEX_PROF_ARENA_MUTEXES
 #undef OP
 };
 
+static const ctl_named_node_t stats_arenas_i_hpa_shard_slabs_node[] = {
+	{NAME("npageslabs_nonhuge"),
+		CTL(stats_arenas_i_hpa_shard_slabs_npageslabs_nonhuge)},
+	{NAME("npageslabs_huge"),
+		CTL(stats_arenas_i_hpa_shard_slabs_npageslabs_huge)},
+	{NAME("nactive_nonhuge"),
+		CTL(stats_arenas_i_hpa_shard_slabs_nactive_nonhuge)},
+	{NAME("nactive_huge"),
+		CTL(stats_arenas_i_hpa_shard_slabs_nactive_huge)},
+	{NAME("ndirty_nonhuge"),
+		CTL(stats_arenas_i_hpa_shard_slabs_ndirty_nonhuge)},
+	{NAME("ndirty_huge"),
+		CTL(stats_arenas_i_hpa_shard_slabs_ndirty_huge)}
+};
+
 static const ctl_named_node_t stats_arenas_i_hpa_shard_full_slabs_node[] = {
 	{NAME("npageslabs_nonhuge"),
 		CTL(stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge)},
@@ -827,19 +857,25 @@ static const ctl_indexed_node_t stats_arenas_i_hpa_shard_nonfull_slabs_node[] =
 };
 
 static const ctl_named_node_t stats_arenas_i_hpa_shard_node[] = {
-	{NAME("full_slabs"),	CHILD(named,
-	    stats_arenas_i_hpa_shard_full_slabs)},
-	{NAME("empty_slabs"),	CHILD(named,
-	    stats_arenas_i_hpa_shard_empty_slabs)},
-	{NAME("nonfull_slabs"),	CHILD(indexed,
-	    stats_arenas_i_hpa_shard_nonfull_slabs)},
+	{NAME("npageslabs"),	CTL(stats_arenas_i_hpa_shard_npageslabs)},
+	{NAME("nactive"),	CTL(stats_arenas_i_hpa_shard_nactive)},
+	{NAME("ndirty"),	CTL(stats_arenas_i_hpa_shard_ndirty)},
+
+	{NAME("slabs"),	CHILD(named, stats_arenas_i_hpa_shard_slabs)},
 
 	{NAME("npurge_passes"),	CTL(stats_arenas_i_hpa_shard_npurge_passes)},
 	{NAME("npurges"),	CTL(stats_arenas_i_hpa_shard_npurges)},
 	{NAME("nhugifies"),	CTL(stats_arenas_i_hpa_shard_nhugifies)},
 	{NAME("nhugify_failures"),
 	    CTL(stats_arenas_i_hpa_shard_nhugify_failures)},
-	{NAME("ndehugifies"),	CTL(stats_arenas_i_hpa_shard_ndehugifies)}
+	{NAME("ndehugifies"),	CTL(stats_arenas_i_hpa_shard_ndehugifies)},
+
+	{NAME("full_slabs"),	CHILD(named,
+	    stats_arenas_i_hpa_shard_full_slabs)},
+	{NAME("empty_slabs"),	CHILD(named,
+	    stats_arenas_i_hpa_shard_empty_slabs)},
+	{NAME("nonfull_slabs"),	CHILD(indexed,
+	    stats_arenas_i_hpa_shard_nonfull_slabs)}
 };
 
 static const ctl_named_node_t stats_arenas_i_node[] = {
@@ -4061,6 +4097,29 @@ stats_arenas_i_extents_j_index(tsdn_t *tsdn, const size_t *mib,
 	return super_stats_arenas_i_extents_j_node;
 }
 
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_npageslabs,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.merged.npageslabs, size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nactive,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.merged.nactive, size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_ndirty,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.merged.ndirty, size_t);
+
+/* Nonhuge slabs */
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_slabs_npageslabs_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.slabs[0].npageslabs, size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_slabs_nactive_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.slabs[0].nactive, size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_slabs_ndirty_nonhuge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.slabs[0].ndirty, size_t);
+
+/* Huge slabs */
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_slabs_npageslabs_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.slabs[1].npageslabs, size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_slabs_nactive_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.slabs[1].nactive, size_t);
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_slabs_ndirty_huge,
+    arenas_i(mib[2])->astats->hpastats.psset_stats.slabs[1].ndirty, size_t);
+
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_npurge_passes,
     arenas_i(mib[2])->astats->hpastats.nonderived_stats.npurge_passes, uint64_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_npurges,
diff --git a/src/psset.c b/src/psset.c
index 55966816..9a833193 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -11,7 +11,6 @@ psset_init(psset_t *psset) {
 		hpdata_age_heap_new(&psset->pageslabs[i]);
 	}
 	fb_init(psset->pageslab_bitmap, PSSET_NPSIZES);
-	memset(&psset->merged_stats, 0, sizeof(psset->merged_stats));
 	memset(&psset->stats, 0, sizeof(psset->stats));
 	hpdata_empty_list_init(&psset->empty);
 	for (int i = 0; i < PSSET_NPURGE_LISTS; i++) {
@@ -30,10 +29,14 @@ psset_bin_stats_accum(psset_bin_stats_t *dst, psset_bin_stats_t *src) {
 
 void
 psset_stats_accum(psset_stats_t *dst, psset_stats_t *src) {
-	psset_bin_stats_accum(&dst->full_slabs[0], &src->full_slabs[0]);
-	psset_bin_stats_accum(&dst->full_slabs[1], &src->full_slabs[1]);
-	psset_bin_stats_accum(&dst->empty_slabs[0], &src->empty_slabs[0]);
-	psset_bin_stats_accum(&dst->empty_slabs[1], &src->empty_slabs[1]);
+	psset_bin_stats_accum(&dst->merged, &src->merged);
+	for (int huge = 0; huge < PSSET_NHUGE; huge++) {
+		psset_bin_stats_accum(&dst->slabs[huge], &src->slabs[huge]);
+		psset_bin_stats_accum(&dst->full_slabs[huge],
+		    &src->full_slabs[huge]);
+		psset_bin_stats_accum(&dst->empty_slabs[huge],
+		    &src->empty_slabs[huge]);
+	}
 	for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
 		psset_bin_stats_accum(&dst->nonfull_slabs[i][0],
 		    &src->nonfull_slabs[i][0]);
@@ -48,48 +51,76 @@ psset_stats_accum(psset_stats_t *dst, psset_stats_t *src) {
  * bin) when we call psset_update_end.
  */
 JEMALLOC_ALWAYS_INLINE void
-psset_bin_stats_insert_remove(psset_t *psset, psset_bin_stats_t *binstats,
-    hpdata_t *ps, bool insert) {
+psset_slab_stats_insert_remove(psset_stats_t *stats,
+    psset_bin_stats_t *binstats, hpdata_t *ps, bool insert) {
 	size_t mul = insert ? (size_t)1 : (size_t)-1;
+	size_t nactive = hpdata_nactive_get(ps);
+	size_t ndirty = hpdata_ndirty_get(ps);
+
+	stats->merged.npageslabs += mul * 1;
+	stats->merged.nactive += mul * nactive;
+	stats->merged.ndirty += mul * ndirty;
+
+	/*
+	 * Stats above are necessary for purging logic to work, everything
+	 * below is to improve observability, thense is optional, so we don't
+	 * update it, when stats disabled.
+	 */
+	if (!config_stats) {
+		return;
+	}
+
 	size_t huge_idx = (size_t)hpdata_huge_get(ps);
 
-	binstats[huge_idx].npageslabs += mul * 1;
-	binstats[huge_idx].nactive += mul * hpdata_nactive_get(ps);
-	binstats[huge_idx].ndirty += mul * hpdata_ndirty_get(ps);
+	stats->slabs[huge_idx].npageslabs += mul * 1;
+	stats->slabs[huge_idx].nactive += mul * nactive;
+	stats->slabs[huge_idx].ndirty += mul * ndirty;
 
-	psset->merged_stats.npageslabs += mul * 1;
-	psset->merged_stats.nactive += mul * hpdata_nactive_get(ps);
-	psset->merged_stats.ndirty += mul * hpdata_ndirty_get(ps);
+	binstats[huge_idx].npageslabs += mul * 1;
+	binstats[huge_idx].nactive += mul * nactive;
+	binstats[huge_idx].ndirty += mul * ndirty;
 
 	if (config_debug) {
-		psset_bin_stats_t check_stats = {0};
-		for (size_t huge = 0; huge <= 1; huge++) {
-			psset_bin_stats_accum(&check_stats,
-			    &psset->stats.full_slabs[huge]);
-			psset_bin_stats_accum(&check_stats,
-			    &psset->stats.empty_slabs[huge]);
+		psset_bin_stats_t check_stats[PSSET_NHUGE] = {{0}};
+		for (int huge = 0; huge < PSSET_NHUGE; huge++) {
+			psset_bin_stats_accum(&check_stats[huge],
+			    &stats->full_slabs[huge]);
+			psset_bin_stats_accum(&check_stats[huge],
+			    &stats->empty_slabs[huge]);
 			for (pszind_t pind = 0; pind < PSSET_NPSIZES; pind++) {
-				psset_bin_stats_accum(&check_stats,
-				    &psset->stats.nonfull_slabs[pind][huge]);
+				psset_bin_stats_accum(&check_stats[huge],
+				    &stats->nonfull_slabs[pind][huge]);
 			}
 		}
-		assert(psset->merged_stats.npageslabs
-		    == check_stats.npageslabs);
-		assert(psset->merged_stats.nactive == check_stats.nactive);
-		assert(psset->merged_stats.ndirty == check_stats.ndirty);
+
+		assert(stats->merged.npageslabs
+		    == check_stats[0].npageslabs + check_stats[1].npageslabs);
+		assert(stats->merged.nactive
+		    == check_stats[0].nactive + check_stats[1].nactive);
+		assert(stats->merged.ndirty
+		    == check_stats[0].ndirty + check_stats[1].ndirty);
+
+		for (int huge = 0; huge < PSSET_NHUGE; huge++) {
+			assert(stats->slabs[huge].npageslabs
+			    == check_stats[huge].npageslabs);
+			assert(stats->slabs[huge].nactive
+			    == check_stats[huge].nactive);
+			assert(stats->slabs[huge].ndirty
+			    == check_stats[huge].ndirty);
+		}
 	}
 }
 
 static void
-psset_bin_stats_insert(psset_t *psset, psset_bin_stats_t *binstats,
+psset_slab_stats_insert(psset_stats_t *stats, psset_bin_stats_t *binstats,
     hpdata_t *ps) {
-	psset_bin_stats_insert_remove(psset, binstats, ps, true);
+	psset_slab_stats_insert_remove(stats, binstats, ps, true);
 }
 
 static void
-psset_bin_stats_remove(psset_t *psset, psset_bin_stats_t *binstats,
+psset_slab_stats_remove(psset_stats_t *stats, psset_bin_stats_t *binstats,
     hpdata_t *ps) {
-	psset_bin_stats_insert_remove(psset, binstats, ps, false);
+	psset_slab_stats_insert_remove(stats, binstats, ps, false);
 }
 
 static pszind_t
@@ -122,27 +153,29 @@ psset_hpdata_heap_insert(psset_t *psset, hpdata_t *ps) {
 }
 
 static void
-psset_stats_insert(psset_t* psset, hpdata_t *ps) {
+psset_stats_insert(psset_t *psset, hpdata_t *ps) {
+	psset_stats_t *stats = &psset->stats;
 	if (hpdata_empty(ps)) {
-		psset_bin_stats_insert(psset, psset->stats.empty_slabs, ps);
+		psset_slab_stats_insert(stats, psset->stats.empty_slabs, ps);
 	} else if (hpdata_full(ps)) {
-		psset_bin_stats_insert(psset, psset->stats.full_slabs, ps);
+		psset_slab_stats_insert(stats, psset->stats.full_slabs, ps);
 	} else {
 		pszind_t pind = psset_hpdata_heap_index(ps);
-		psset_bin_stats_insert(psset, psset->stats.nonfull_slabs[pind],
+		psset_slab_stats_insert(stats, psset->stats.nonfull_slabs[pind],
 		    ps);
 	}
 }
 
 static void
 psset_stats_remove(psset_t *psset, hpdata_t *ps) {
+	psset_stats_t *stats = &psset->stats;
 	if (hpdata_empty(ps)) {
-		psset_bin_stats_remove(psset, psset->stats.empty_slabs, ps);
+		psset_slab_stats_remove(stats, psset->stats.empty_slabs, ps);
 	} else if (hpdata_full(ps)) {
-		psset_bin_stats_remove(psset, psset->stats.full_slabs, ps);
+		psset_slab_stats_remove(stats, psset->stats.full_slabs, ps);
 	} else {
 		pszind_t pind = psset_hpdata_heap_index(ps);
-		psset_bin_stats_remove(psset, psset->stats.nonfull_slabs[pind],
+		psset_slab_stats_remove(stats, psset->stats.nonfull_slabs[pind],
 		    ps);
 	}
 }
diff --git a/src/stats.c b/src/stats.c
index 7fbaa5cc..b28b9942 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -841,12 +841,48 @@ stats_arena_hpa_shard_sec_print(emitter_t *emitter, unsigned i) {
 static void
 stats_arena_hpa_shard_counters_print(emitter_t *emitter, unsigned i,
     uint64_t uptime) {
+	size_t npageslabs;
+	size_t nactive;
+	size_t ndirty;
+
+	size_t npageslabs_nonhuge;
+	size_t nactive_nonhuge;
+	size_t ndirty_nonhuge;
+	size_t nretained_nonhuge;
+
+	size_t npageslabs_huge;
+	size_t nactive_huge;
+	size_t ndirty_huge;
+
 	uint64_t npurge_passes;
 	uint64_t npurges;
 	uint64_t nhugifies;
 	uint64_t nhugify_failures;
 	uint64_t ndehugifies;
 
+	CTL_M2_GET("stats.arenas.0.hpa_shard.npageslabs",
+	    i, &npageslabs, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.nactive",
+	    i, &nactive, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.ndirty",
+	    i, &ndirty, size_t);
+
+	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.npageslabs_nonhuge",
+	    i, &npageslabs_nonhuge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.nactive_nonhuge",
+	    i, &nactive_nonhuge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.ndirty_nonhuge",
+	    i, &ndirty_nonhuge, size_t);
+	nretained_nonhuge = npageslabs_nonhuge * HUGEPAGE_PAGES
+	    - nactive_nonhuge - ndirty_nonhuge;
+
+	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.npageslabs_huge",
+	    i, &npageslabs_huge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.nactive_huge",
+	    i, &nactive_huge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.ndirty_huge",
+	    i, &ndirty_huge, size_t);
+
 	CTL_M2_GET("stats.arenas.0.hpa_shard.npurge_passes",
 	    i, &npurge_passes, uint64_t);
 	CTL_M2_GET("stats.arenas.0.hpa_shard.npurges",
@@ -860,18 +896,33 @@ stats_arena_hpa_shard_counters_print(emitter_t *emitter, unsigned i,
 
 	emitter_table_printf(emitter,
 	    "HPA shard stats:\n"
+	    "  Pageslabs: %zu (%zu huge, %zu nonhuge)\n"
+	    "  Active pages: %zu (%zu huge, %zu nonhuge)\n"
+	    "  Dirty pages: %zu (%zu huge, %zu nonhuge)\n"
+	    "  Retained pages: %zu\n"
 	    "  Purge passes: %" FMTu64 " (%" FMTu64 " / sec)\n"
 	    "  Purges: %" FMTu64 " (%" FMTu64 " / sec)\n"
 	    "  Hugeifies: %" FMTu64 " (%" FMTu64 " / sec)\n"
 	    "  Hugify failures: %" FMTu64 " (%" FMTu64 " / sec)\n"
 	    "  Dehugifies: %" FMTu64 " (%" FMTu64 " / sec)\n"
 	    "\n",
+	    npageslabs, npageslabs_huge, npageslabs_nonhuge,
+	    nactive, nactive_huge, nactive_nonhuge,
+	    ndirty, ndirty_huge, ndirty_nonhuge,
+	    nretained_nonhuge,
 	    npurge_passes, rate_per_second(npurge_passes, uptime),
 	    npurges, rate_per_second(npurges, uptime),
 	    nhugifies, rate_per_second(nhugifies, uptime),
 	    nhugify_failures, rate_per_second(nhugify_failures, uptime),
 	    ndehugifies, rate_per_second(ndehugifies, uptime));
 
+	emitter_json_kv(emitter, "npageslabs", emitter_type_size,
+	    &npageslabs);
+	emitter_json_kv(emitter, "nactive", emitter_type_size,
+	    &nactive);
+	emitter_json_kv(emitter, "ndirty", emitter_type_size,
+	    &ndirty);
+
 	emitter_json_kv(emitter, "npurge_passes", emitter_type_uint64,
 	    &npurge_passes);
 	emitter_json_kv(emitter, "npurges", emitter_type_uint64,
@@ -882,6 +933,24 @@ stats_arena_hpa_shard_counters_print(emitter_t *emitter, unsigned i,
 	    &nhugify_failures);
 	emitter_json_kv(emitter, "ndehugifies", emitter_type_uint64,
 	    &ndehugifies);
+
+	emitter_json_object_kv_begin(emitter, "slabs");
+	emitter_json_kv(emitter, "npageslabs_nonhuge", emitter_type_size,
+	    &npageslabs_nonhuge);
+	emitter_json_kv(emitter, "nactive_nonhuge", emitter_type_size,
+	    &nactive_nonhuge);
+	emitter_json_kv(emitter, "ndirty_nonhuge", emitter_type_size,
+	    &ndirty_nonhuge);
+	emitter_json_kv(emitter, "nretained_nonhuge", emitter_type_size,
+	    &nretained_nonhuge);
+
+	emitter_json_kv(emitter, "npageslabs_huge", emitter_type_size,
+	    &npageslabs_huge);
+	emitter_json_kv(emitter, "nactive_huge", emitter_type_size,
+	    &nactive_huge);
+	emitter_json_kv(emitter, "ndirty_huge", emitter_type_size,
+	    &ndirty_huge);
+	emitter_json_object_end(emitter); /* End "slabs" */
 }
 
 static void
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 8c8fb18c..6784306f 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -1002,6 +1002,63 @@ TEST_BEGIN(test_stats_arenas) {
 }
 TEST_END
 
+TEST_BEGIN(test_stats_arenas_hpa_shard_counters) {
+	test_skip_if(!config_stats);
+
+#define TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(t, name) do {		\
+	t name;								\
+	size_t sz = sizeof(t);						\
+	expect_d_eq(mallctl("stats.arenas.0.hpa_shard."#name,		\
+	    (void *)&name, &sz,						\
+	    NULL, 0), 0, "Unexpected mallctl() failure");		\
+} while (0)
+
+	TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(size_t, npageslabs);
+	TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(size_t, nactive);
+	TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(size_t, ndirty);
+	TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(uint64_t, npurge_passes);
+	TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(uint64_t, npurges);
+	TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(uint64_t, nhugifies);
+	TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(uint64_t, ndehugifies);
+
+#undef TEST_STATS_ARENAS_HPA_SHARD_COUNTERS
+}
+TEST_END
+
+TEST_BEGIN(test_stats_arenas_hpa_shard_slabs) {
+	test_skip_if(!config_stats);
+
+#define TEST_STATS_ARENAS_HPA_SHARD_SLABS_GEN(t, slab, name) do {	\
+	t slab##_##name;						\
+	size_t sz = sizeof(t);						\
+	expect_d_eq(mallctl("stats.arenas.0.hpa_shard."#slab"."#name,	\
+	    (void *)&slab##_##name, &sz,				\
+	    NULL, 0), 0, "Unexpected mallctl() failure");		\
+} while (0)
+
+#define TEST_STATS_ARENAS_HPA_SHARD_SLABS(t, slab, name) do {		\
+	TEST_STATS_ARENAS_HPA_SHARD_SLABS_GEN(t, slab,			\
+	    name##_##nonhuge);						\
+	TEST_STATS_ARENAS_HPA_SHARD_SLABS_GEN(t, slab, name##_##huge);	\
+} while (0)
+
+	TEST_STATS_ARENAS_HPA_SHARD_SLABS(size_t, slabs, npageslabs);
+	TEST_STATS_ARENAS_HPA_SHARD_SLABS(size_t, slabs, nactive);
+	TEST_STATS_ARENAS_HPA_SHARD_SLABS(size_t, slabs, ndirty);
+
+	TEST_STATS_ARENAS_HPA_SHARD_SLABS(size_t, full_slabs, npageslabs);
+	TEST_STATS_ARENAS_HPA_SHARD_SLABS(size_t, full_slabs, nactive);
+	TEST_STATS_ARENAS_HPA_SHARD_SLABS(size_t, full_slabs, ndirty);
+
+	TEST_STATS_ARENAS_HPA_SHARD_SLABS(size_t, empty_slabs, npageslabs);
+	TEST_STATS_ARENAS_HPA_SHARD_SLABS(size_t, empty_slabs, nactive);
+	TEST_STATS_ARENAS_HPA_SHARD_SLABS(size_t, empty_slabs, ndirty);
+
+#undef TEST_STATS_ARENAS_HPA_SHARD_SLABS
+#undef TEST_STATS_ARENAS_HPA_SHARD_SLABS_GEN
+}
+TEST_END
+
 static void
 alloc_hook(void *extra, UNUSED hook_alloc_t type, UNUSED void *result,
     UNUSED uintptr_t result_raw, UNUSED uintptr_t args_raw[3]) {
@@ -1321,6 +1378,8 @@ main(void) {
 	    test_arenas_lookup,
 	    test_prof_active,
 	    test_stats_arenas,
+	    test_stats_arenas_hpa_shard_counters,
+	    test_stats_arenas_hpa_shard_slabs,
 	    test_hooks,
 	    test_hooks_exhaustion,
 	    test_thread_idle,
diff --git a/test/unit/psset.c b/test/unit/psset.c
index 6ff72012..6bfdbb5f 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -64,6 +64,24 @@ test_psset_alloc_reuse(psset_t *psset, edata_t *r_edata, size_t size) {
 	return false;
 }
 
+static hpdata_t *
+test_psset_hugify(psset_t *psset, edata_t *edata) {
+	hpdata_t *ps = edata_ps_get(edata);
+	psset_update_begin(psset, ps);
+	hpdata_hugify(ps);
+	psset_update_end(psset, ps);
+	return ps;
+}
+
+static hpdata_t *
+test_psset_dehugify(psset_t *psset, edata_t *edata) {
+	hpdata_t *ps = edata_ps_get(edata);
+	psset_update_begin(psset, ps);
+	hpdata_dehugify(ps);
+	psset_update_end(psset, ps);
+	return ps;
+}
+
 static hpdata_t *
 test_psset_dalloc(psset_t *psset, edata_t *edata) {
 	hpdata_t *ps = edata_ps_get(edata);
@@ -339,6 +357,149 @@ TEST_BEGIN(test_multi_pageslab) {
 }
 TEST_END
 
+TEST_BEGIN(test_stats_merged) {
+	hpdata_t pageslab;
+	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
+
+	edata_t alloc[HUGEPAGE_PAGES];
+
+	psset_t psset;
+	psset_init(&psset);
+	expect_zu_eq(0, psset.stats.merged.npageslabs, "");
+	expect_zu_eq(0, psset.stats.merged.nactive, "");
+	expect_zu_eq(0, psset.stats.merged.ndirty, "");
+
+	edata_init_test(&alloc[0]);
+	test_psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
+	for (size_t i = 1; i < HUGEPAGE_PAGES; i++) {
+		expect_zu_eq(1, psset.stats.merged.npageslabs, "");
+		expect_zu_eq(i, psset.stats.merged.nactive, "");
+		expect_zu_eq(0, psset.stats.merged.ndirty, "");
+
+		edata_init_test(&alloc[i]);
+		bool err = test_psset_alloc_reuse(&psset, &alloc[i], PAGE);
+		expect_false(err, "Nonempty psset failed page allocation.");
+	}
+	expect_zu_eq(1, psset.stats.merged.npageslabs, "");
+	expect_zu_eq(HUGEPAGE_PAGES, psset.stats.merged.nactive, "");
+	expect_zu_eq(0, psset.stats.merged.ndirty, "");
+
+	for (ssize_t i = HUGEPAGE_PAGES - 1; i > 0; i--) {
+		test_psset_dalloc(&psset, &alloc[i]);
+		expect_zu_eq(1, psset.stats.merged.npageslabs, "");
+		expect_zu_eq(i, psset.stats.merged.nactive, "");
+		expect_zu_eq(HUGEPAGE_PAGES - i, psset.stats.merged.ndirty, "");
+	}
+	/* No allocations have left. */
+	test_psset_dalloc(&psset, &alloc[0]);
+	expect_zu_eq(0, psset.stats.merged.npageslabs, "");
+	expect_zu_eq(0, psset.stats.merged.nactive, "");
+
+	/*
+	 * Last test_psset_dalloc call removed empty pageslab from psset, so
+	 * nothing has left there, even no dirty pages.
+	 */
+	expect_zu_eq(0, psset.stats.merged.ndirty, "");
+
+	test_psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
+	expect_zu_eq(1, psset.stats.merged.npageslabs, "");
+	expect_zu_eq(1, psset.stats.merged.nactive, "");
+	expect_zu_eq(0, psset.stats.merged.ndirty, "");
+
+	psset_update_begin(&psset, &pageslab);
+	expect_zu_eq(0, psset.stats.merged.npageslabs, "");
+	expect_zu_eq(0, psset.stats.merged.nactive, "");
+	expect_zu_eq(0, psset.stats.merged.ndirty, "");
+
+	psset_update_end(&psset, &pageslab);
+	expect_zu_eq(1, psset.stats.merged.npageslabs, "");
+	expect_zu_eq(1, psset.stats.merged.nactive, "");
+	expect_zu_eq(0, psset.stats.merged.ndirty, "");
+}
+TEST_END
+
+TEST_BEGIN(test_stats_huge) {
+	test_skip_if(!config_stats);
+
+	hpdata_t pageslab;
+	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
+
+	edata_t alloc[HUGEPAGE_PAGES];
+
+	psset_t psset;
+	psset_init(&psset);
+	for (int huge = 0; huge < PSSET_NHUGE; ++huge) {
+		expect_zu_eq(0, psset.stats.slabs[huge].npageslabs, "");
+		expect_zu_eq(0, psset.stats.slabs[huge].nactive, "");
+		expect_zu_eq(0, psset.stats.slabs[huge].ndirty, "");
+	}
+
+	edata_init_test(&alloc[0]);
+	test_psset_alloc_new(&psset, &pageslab, &alloc[0], PAGE);
+	for (size_t i = 1; i < HUGEPAGE_PAGES; i++) {
+		expect_zu_eq(1, psset.stats.slabs[0].npageslabs, "");
+		expect_zu_eq(i, psset.stats.slabs[0].nactive, "");
+		expect_zu_eq(0, psset.stats.slabs[0].ndirty, "");
+
+		expect_zu_eq(0, psset.stats.slabs[1].npageslabs, "");
+		expect_zu_eq(0, psset.stats.slabs[1].nactive, "");
+		expect_zu_eq(0, psset.stats.slabs[1].ndirty, "");
+
+		edata_init_test(&alloc[i]);
+		bool err = test_psset_alloc_reuse(&psset, &alloc[i], PAGE);
+		expect_false(err, "Nonempty psset failed page allocation.");
+	}
+	expect_zu_eq(1, psset.stats.slabs[0].npageslabs, "");
+	expect_zu_eq(HUGEPAGE_PAGES, psset.stats.slabs[0].nactive, "");
+	expect_zu_eq(0, psset.stats.slabs[0].ndirty, "");
+
+	expect_zu_eq(0, psset.stats.slabs[1].npageslabs, "");
+	expect_zu_eq(0, psset.stats.slabs[1].nactive, "");
+	expect_zu_eq(0, psset.stats.slabs[1].ndirty, "");
+
+	test_psset_hugify(&psset, &alloc[0]);
+
+	/* All stats should been moved from nonhuge to huge. */
+	expect_zu_eq(0, psset.stats.slabs[0].npageslabs, "");
+	expect_zu_eq(0, psset.stats.slabs[0].nactive, "");
+	expect_zu_eq(0, psset.stats.slabs[0].ndirty, "");
+
+	expect_zu_eq(1, psset.stats.slabs[1].npageslabs, "");
+	expect_zu_eq(HUGEPAGE_PAGES, psset.stats.slabs[1].nactive, "");
+	expect_zu_eq(0, psset.stats.slabs[1].ndirty, "");
+
+	test_psset_dehugify(&psset, &alloc[0]);
+
+	/* And back from huge to nonhuge after dehugification. */
+	expect_zu_eq(1, psset.stats.slabs[0].npageslabs, "");
+	expect_zu_eq(HUGEPAGE_PAGES, psset.stats.slabs[0].nactive, "");
+	expect_zu_eq(0, psset.stats.slabs[0].ndirty, "");
+
+	expect_zu_eq(0, psset.stats.slabs[1].npageslabs, "");
+	expect_zu_eq(0, psset.stats.slabs[1].nactive, "");
+	expect_zu_eq(0, psset.stats.slabs[1].ndirty, "");
+
+	for (ssize_t i = HUGEPAGE_PAGES - 1; i > 0; i--) {
+		test_psset_dalloc(&psset, &alloc[i]);
+
+		expect_zu_eq(1, psset.stats.slabs[0].npageslabs, "");
+		expect_zu_eq(i, psset.stats.slabs[0].nactive, "");
+		expect_zu_eq(HUGEPAGE_PAGES - i, psset.stats.slabs[0].ndirty, "");
+
+		expect_zu_eq(0, psset.stats.slabs[1].npageslabs, "");
+		expect_zu_eq(0, psset.stats.slabs[1].nactive, "");
+		expect_zu_eq(0, psset.stats.slabs[1].ndirty, "");
+	}
+	test_psset_dalloc(&psset, &alloc[0]);
+
+	for (int huge = 0; huge < PSSET_NHUGE; huge++) {
+		expect_zu_eq(0, psset.stats.slabs[huge].npageslabs, "");
+		expect_zu_eq(0, psset.stats.slabs[huge].nactive, "");
+		expect_zu_eq(0, psset.stats.slabs[huge].ndirty, "");
+	}
+}
+TEST_END
+
 static void
 stats_expect_empty(psset_bin_stats_t *stats) {
 	assert_zu_eq(0, stats->npageslabs,
@@ -379,7 +540,9 @@ stats_expect(psset_t *psset, size_t nactive) {
 	expect_zu_eq(nactive, psset_nactive(psset), "");
 }
 
-TEST_BEGIN(test_stats) {
+TEST_BEGIN(test_stats_fullness) {
+	test_skip_if(!config_stats);
+
 	bool err;
 
 	hpdata_t pageslab;
@@ -739,7 +902,9 @@ main(void) {
 	    test_reuse,
 	    test_evict,
 	    test_multi_pageslab,
-	    test_stats,
+	    test_stats_merged,
+	    test_stats_huge,
+	    test_stats_fullness,
 	    test_oldest_fit,
 	    test_insert_remove,
 	    test_purge_prefers_nonhuge,

From 46690c9ec036cede074476caa05ecd6fe954bd23 Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Thu, 28 Nov 2024 07:10:33 -0800
Subject: [PATCH 2455/2608] Fix `test_retained` on boxes with a lot of CPUs

We are trying to create `ncpus * 2` threads for this test and place them
into `VARIABLE_ARRAY`, but `VARIABLE_ARRAY` can not be more than
`VARIABLE_ARRAY_SIZE_MAX` bytes. When there are a lot of threads on the
box test always fails.

```
$ nproc
176

$ make -j`nproc` tests_unit && ./test/unit/retained
<jemalloc>: ../test/unit/retained.c:123: Failed assertion:
"sizeof(thd_t) * (nthreads) <= VARIABLE_ARRAY_SIZE_MAX"
Aborted (core dumped)
```

There is no need for high concurrency for this test as we are only
checking stats there and it's behaviour is quite stable regarding number
of allocating threads.

Limited number of threads to 16 to save compute resources (on CI for
example) and reduce tests running time.

Before the change (`nproc` is 80 on this box).

```
$ make -j`nproc` tests_unit && time ./test/unit/retained
<...>
real    0m0.372s
user    0m14.236s
sys     0m12.338s
```

After the change (same box).

```
$ make -j`nproc` tests_unit && time ./test/unit/retained
<...>
real    0m0.018s
user    0m0.108s
sys     0m0.068s
```
---
 test/unit/retained.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/test/unit/retained.c b/test/unit/retained.c
index 340f2d38..40cbb0cd 100644
--- a/test/unit/retained.c
+++ b/test/unit/retained.c
@@ -110,8 +110,15 @@ TEST_BEGIN(test_retained) {
 	atomic_store_u(&epoch, 0, ATOMIC_RELAXED);
 
 	unsigned nthreads = ncpus * 2;
-	if (LG_SIZEOF_PTR < 3 && nthreads > 16) {
-		nthreads = 16; /* 32-bit platform could run out of vaddr. */
+	if (nthreads > 16) {
+		/*
+		 * Limit number of threads we are creating for following
+		 * reasons.
+		 * 1. On 32-bit platforms could run out of vaddr.
+		 * 2. On boxes with a lot of CPUs we might have not enough
+		 *    memory to fit thd_t into VARIABLE_ARRAY.
+		 */
+		nthreads = 16;
 	}
 	VARIABLE_ARRAY(thd_t, threads, nthreads);
 	for (unsigned i = 0; i < nthreads; i++) {

From 6786934280392e71a1e14d48b331d4eca58550a7 Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@meta.com>
Date: Mon, 9 Dec 2024 14:24:19 -0800
Subject: [PATCH 2456/2608] Fix ehooks assertion for arena creation

---
 src/ehooks.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/ehooks.c b/src/ehooks.c
index fc2355e6..89e30409 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -53,7 +53,7 @@ ehooks_default_alloc_impl(tsdn_t *tsdn, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit, unsigned arena_ind) {
 	arena_t *arena = arena_get(tsdn, arena_ind, false);
 	/* NULL arena indicates arena_create. */
-	assert(arena != NULL || alignment == HUGEPAGE);
+	assert(arena != NULL || alignment == BASE_BLOCK_MIN_ALIGN);
 	dss_prec_t dss = (arena == NULL) ? dss_prec_disabled :
 	    (dss_prec_t)atomic_load_u(&arena->dss_prec, ATOMIC_RELAXED);
 	void *ret = extent_alloc_core(tsdn, arena, new_addr, size, alignment,

From a17385a882c252a292299ab047d13fc3b2d6fb16 Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@meta.com>
Date: Fri, 13 Dec 2024 15:06:06 -0800
Subject: [PATCH 2457/2608] Enable large hugepage tests for arm64 on Travis

---
 .travis.yml           |  3 +++
 scripts/gen_travis.py | 17 +++++++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/.travis.yml b/.travis.yml
index aad7eea7..ceda8989 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -295,6 +295,9 @@ jobs:
     - os: linux
       arch: arm64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: arm64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-lg-hugepage=29" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index ae0b9e2e..43457967 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -96,6 +96,15 @@ class Option(object):
         return (isinstance(obj, Option) and obj.type == self.type
                 and obj.value == self.value)
 
+    def __repr__(self):
+        type_names = {
+            Option.Type.COMPILER: 'COMPILER',
+            Option.Type.COMPILER_FLAG: 'COMPILER_FLAG',
+            Option.Type.CONFIGURE_FLAG: 'CONFIGURE_FLAG',
+            Option.Type.MALLOC_CONF: 'MALLOC_CONF',
+            Option.Type.FEATURE: 'FEATURE'
+        }
+        return f"Option({type_names[self.type]}, {repr(self.value)})"
 
 # The 'default' configuration is gcc, on linux, with no compiler or configure
 # flags.  We also test with clang, -m32, --enable-debug, --enable-prof,
@@ -125,7 +134,9 @@ configure_flag_unusuals = [Option.as_configure_flag(opt) for opt in (
     '--disable-libdl',
     '--enable-opt-safety-checks',
     '--with-lg-page=16',
+    '--with-lg-page=16 --with-lg-hugepage=29',
 )]
+LARGE_HUGEPAGE = Option.as_configure_flag("--with-lg-page=16 --with-lg-hugepage=29")
 
 
 malloc_conf_unusuals = [Option.as_malloc_conf(opt) for opt in (
@@ -250,6 +261,9 @@ def generate_linux(arch):
         # Avoid 32 bit build on ARM64
         exclude = (CROSS_COMPILE_32BIT,)
 
+    if arch != ARM64:
+        exclude += [LARGE_HUGEPAGE]
+
     return generate_jobs(os, arch, exclude, max_unusual_opts)
 
 
@@ -264,6 +278,9 @@ def generate_macos(arch):
         [Option.as_configure_flag('--enable-prof')] +
         [CLANG,])
 
+    if arch != ARM64:
+        exclude += [LARGE_HUGEPAGE]
+
     return generate_jobs(os, arch, exclude, max_unusual_opts)
 
 

From 587676fee8a77046e67d3ae8eb26e5456b6da481 Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@meta.com>
Date: Wed, 11 Dec 2024 15:24:26 -0800
Subject: [PATCH 2458/2608] Disable psset test when hugepage size is too large.

---
 include/jemalloc/internal/hpa.h |  1 +
 src/hpa.c                       |  7 ++++++-
 test/unit/psset.c               | 12 ++++++++++++
 3 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 4c410c40..1f90a15f 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -151,6 +151,7 @@ struct hpa_shard_s {
 	nstime_t last_purge;
 };
 
+bool hpa_hugepage_size_exceeds_limit();
 /*
  * Whether or not the HPA can be used given the current configuration.  This is
  * is not necessarily a guarantee that it backs its allocations by hugepages,
diff --git a/src/hpa.c b/src/hpa.c
index 14541413..cb3f978c 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -24,6 +24,11 @@ static void hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self,
     edata_list_active_t *list, bool *deferred_work_generated);
 static uint64_t hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self);
 
+bool
+hpa_hugepage_size_exceeds_limit() {
+	return HUGEPAGE > HUGEPAGE_MAX_EXPECTED_SIZE;
+}
+
 bool
 hpa_supported(void) {
 #ifdef _WIN32
@@ -52,7 +57,7 @@ hpa_supported(void) {
 		return false;
 	}
 	/* As mentioned in pages.h, do not support If HUGEPAGE is too large. */
-	if (HUGEPAGE > HUGEPAGE_MAX_EXPECTED_SIZE) {
+	if (hpa_hugepage_size_exceeds_limit()) {
 		return false;
 	}
 	return true;
diff --git a/test/unit/psset.c b/test/unit/psset.c
index 6bfdbb5f..c400f3b9 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -120,6 +120,7 @@ edata_expect(edata_t *edata, size_t page_offset, size_t page_cnt) {
 }
 
 TEST_BEGIN(test_empty) {
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	bool err;
 	hpdata_t pageslab;
 	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
@@ -137,6 +138,7 @@ TEST_BEGIN(test_empty) {
 TEST_END
 
 TEST_BEGIN(test_fill) {
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	bool err;
 
 	hpdata_t pageslab;
@@ -169,6 +171,7 @@ TEST_BEGIN(test_fill) {
 TEST_END
 
 TEST_BEGIN(test_reuse) {
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	bool err;
 	hpdata_t *ps;
 
@@ -261,6 +264,7 @@ TEST_BEGIN(test_reuse) {
 TEST_END
 
 TEST_BEGIN(test_evict) {
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	bool err;
 	hpdata_t *ps;
 
@@ -295,6 +299,7 @@ TEST_BEGIN(test_evict) {
 TEST_END
 
 TEST_BEGIN(test_multi_pageslab) {
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	bool err;
 	hpdata_t *ps;
 
@@ -420,6 +425,7 @@ TEST_END
 
 TEST_BEGIN(test_stats_huge) {
 	test_skip_if(!config_stats);
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
 
 	hpdata_t pageslab;
 	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
@@ -542,6 +548,7 @@ stats_expect(psset_t *psset, size_t nactive) {
 
 TEST_BEGIN(test_stats_fullness) {
 	test_skip_if(!config_stats);
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
 
 	bool err;
 
@@ -637,6 +644,7 @@ init_test_pageslabs(psset_t *psset, hpdata_t *pageslab,
 }
 
 TEST_BEGIN(test_oldest_fit) {
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	bool err;
 	edata_t alloc[HUGEPAGE_PAGES];
 	edata_t worse_alloc[HUGEPAGE_PAGES];
@@ -660,6 +668,7 @@ TEST_BEGIN(test_oldest_fit) {
 TEST_END
 
 TEST_BEGIN(test_insert_remove) {
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	bool err;
 	hpdata_t *ps;
 	edata_t alloc[HUGEPAGE_PAGES];
@@ -706,6 +715,7 @@ TEST_BEGIN(test_insert_remove) {
 TEST_END
 
 TEST_BEGIN(test_purge_prefers_nonhuge) {
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	/*
 	 * All else being equal, we should prefer purging non-huge pages over
 	 * huge ones for non-empty extents.
@@ -789,6 +799,7 @@ TEST_BEGIN(test_purge_prefers_nonhuge) {
 TEST_END
 
 TEST_BEGIN(test_purge_prefers_empty) {
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	void *ptr;
 
 	psset_t psset;
@@ -825,6 +836,7 @@ TEST_BEGIN(test_purge_prefers_empty) {
 TEST_END
 
 TEST_BEGIN(test_purge_prefers_empty_huge) {
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	void *ptr;
 
 	psset_t psset;

From d8486b2653dc54f4d836e389960f627ab56cb8b4 Mon Sep 17 00:00:00 2001
From: appujee <124090381+appujee@users.noreply.github.com>
Date: Fri, 25 Oct 2024 14:00:32 -0700
Subject: [PATCH 2459/2608] Remove unreachable() macro as c23 already defines
 it.

Taken from https://android-review.git.corp.google.com/c/platform/external/jemalloc_new/+/3316478

This might need more cleanups to remove the definition of JEMALLOC_INTERNAL_UNREACHABLE.
---
 include/jemalloc/internal/util.h | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index 24f23629..b400f231 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -39,12 +39,6 @@
 #  define unlikely(x) !!(x)
 #endif
 
-#if !defined(JEMALLOC_INTERNAL_UNREACHABLE)
-#  error JEMALLOC_INTERNAL_UNREACHABLE should have been defined by configure
-#endif
-
-#define unreachable() JEMALLOC_INTERNAL_UNREACHABLE()
-
 /* Set error code. */
 UTIL_INLINE void
 set_errno(int errnum) {

From 4b88bddbcac1f994034eb5d7485fd35663c3d325 Mon Sep 17 00:00:00 2001
From: appujee <124090381+appujee@users.noreply.github.com>
Date: Wed, 6 Nov 2024 13:14:10 -0800
Subject: [PATCH 2460/2608] Conditionally remove unreachable for C23+

---
 include/jemalloc/internal/util.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index b400f231..6646386e 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -39,6 +39,15 @@
 #  define unlikely(x) !!(x)
 #endif
 
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
+#include <stddef.h>
+#else
+#if !defined(JEMALLOC_INTERNAL_UNREACHABLE)
+#  error JEMALLOC_INTERNAL_UNREACHABLE should have been defined by configure
+#endif
+#define unreachable() JEMALLOC_INTERNAL_UNREACHABLE()
+#endif
+
 /* Set error code. */
 UTIL_INLINE void
 set_errno(int errnum) {

From 17881ebbfd76529904e826f425f3266834cf3a75 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dan=20Hor=C3=A1k?= <dan@danny.cz>
Date: Fri, 8 Nov 2024 15:34:06 +0000
Subject: [PATCH 2461/2608] Add configure check for gettid() presence

The gettid() function is available on Linux in glibc only since version
2.30. There are supported distributions that still use older glibc
version. Thus add a configure check if the gettid() function is
available and extend the check in src/prof_stack_range.c so it's skipped
also when gettid() isn't available.

Fixes: https://github.com/jemalloc/jemalloc/issues/2740
---
 configure.ac           | 9 +++++++++
 src/prof_stack_range.c | 2 +-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index a330e33e..e5fb3a6d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2706,6 +2706,15 @@ if test "x${je_cv_pthread_mutex_adaptive_np}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP], [ ], [ ])
 fi
 
+JE_COMPILABLE([gettid], [
+#include <unistd.h>
+], [
+  int tid = gettid();
+], [je_cv_gettid])
+if test "x${je_cv_gettid}" = "xyes" ; then
+  AC_DEFINE([JEMALLOC_HAVE_GETTID], [ ], [ ])
+fi
+
 JE_CFLAGS_SAVE()
 JE_CFLAGS_ADD([-D_GNU_SOURCE])
 JE_CFLAGS_ADD([-Werror])
diff --git a/src/prof_stack_range.c b/src/prof_stack_range.c
index c3458044..1f40dcc5 100644
--- a/src/prof_stack_range.c
+++ b/src/prof_stack_range.c
@@ -4,7 +4,7 @@
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/prof_sys.h"
 
-#if defined (__linux__)
+#if defined (__linux__) && defined(JE_HAVE_GETTID)
 
 #include <errno.h>
 #include <fcntl.h>

From 52fa9577ba8fa94f41c8c92f845a74c3fb04db80 Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Tue, 14 Jan 2025 10:46:39 -0800
Subject: [PATCH 2462/2608] Fix integer overflow in test/unit/hash.c

`final[3]` is `uint8_t`. Integer conversion rank of `uint8_t` is lower
than integer conversion rank of `int`, so `uint8_t` got promoted to
`int`, which is signed integer type. Shift `final[3]` value left on 24,
when leftmost bit is set overflows `int` and it is undefined behaviour.

Before this change Undefined Behaviour Sanitizer was unhappy about it
with the following message.

```
../test/unit/hash.c:119:25: runtime error: left shift of 176 by 24
places cannot be represented in type 'int'
```

After this commit problem is gone.
---
 test/unit/hash.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/test/unit/hash.c b/test/unit/hash.c
index 17c66ec6..7276333d 100644
--- a/test/unit/hash.c
+++ b/test/unit/hash.c
@@ -115,8 +115,11 @@ hash_variant_verify_key(hash_variant_t variant, uint8_t *key) {
 	} default: not_reached();
 	}
 
-	computed = (final[0] << 0) | (final[1] << 8) | (final[2] << 16) |
-	    (final[3] << 24);
+	computed =
+	    ((uint32_t)final[0] << 0) |
+	    ((uint32_t)final[1] << 8) |
+	    ((uint32_t)final[2] << 16) |
+	    ((uint32_t)final[3] << 24);
 
 	switch (variant) {
 #ifdef JEMALLOC_BIG_ENDIAN

From 20cc983314ecf14ac08ccf0d60ce7e41f88babf6 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 21 Jan 2025 15:06:02 -0800
Subject: [PATCH 2463/2608] Fix the gettid() detection caught by @mrluanma .

---
 include/jemalloc/internal/jemalloc_internal_defs.h.in | 3 +++
 src/prof_stack_range.c                                | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 5cf77f47..742d599d 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -411,6 +411,9 @@
 /* Adaptive mutex support in pthreads. */
 #undef JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP
 
+/* gettid() support */
+#undef JEMALLOC_HAVE_GETTID
+
 /* GNU specific sched_getcpu support */
 #undef JEMALLOC_HAVE_SCHED_GETCPU
 
diff --git a/src/prof_stack_range.c b/src/prof_stack_range.c
index 1f40dcc5..6a99b56f 100644
--- a/src/prof_stack_range.c
+++ b/src/prof_stack_range.c
@@ -4,7 +4,7 @@
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/prof_sys.h"
 
-#if defined (__linux__) && defined(JE_HAVE_GETTID)
+#if defined (__linux__) && defined(JEMALLOC_HAVE_GETTID)
 
 #include <errno.h>
 #include <fcntl.h>

From 607b86603532b59c35cfdf9abd61a0c14966092b Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 21 Jan 2025 15:15:56 -0800
Subject: [PATCH 2464/2608] Check for 0 input when setting
 max_background_thread through mallctl.

Reported by @nc7s.
---
 src/ctl.c                            | 3 ++-
 test/unit/background_thread_enable.c | 5 +++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/ctl.c b/src/ctl.c
index 66844105..b0fc0487 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2181,7 +2181,8 @@ max_background_threads_ctl(tsd_t *tsd, const size_t *mib,
 			ret = 0;
 			goto label_return;
 		}
-		if (newval > opt_max_background_threads) {
+		if (newval > opt_max_background_threads ||
+		    newval == 0) {
 			ret = EINVAL;
 			goto label_return;
 		}
diff --git a/test/unit/background_thread_enable.c b/test/unit/background_thread_enable.c
index 5f42feff..3a2d55ac 100644
--- a/test/unit/background_thread_enable.c
+++ b/test/unit/background_thread_enable.c
@@ -54,6 +54,9 @@ TEST_BEGIN(test_max_background_threads) {
 	    "opt.max_background_threads should match");
 	expect_d_eq(mallctl("max_background_threads", NULL, NULL, &max_n_thds,
 	    sz_m), 0, "Failed to set max background threads");
+	size_t size_zero = 0;
+	expect_d_ne(mallctl("max_background_threads", NULL, NULL, &size_zero,
+	    sz_m), 0, "Should not allow zero background threads");
 
 	unsigned id;
 	size_t sz_u = sizeof(unsigned);
@@ -80,6 +83,8 @@ TEST_BEGIN(test_max_background_threads) {
 	new_max_thds = 1;
 	expect_d_eq(mallctl("max_background_threads", NULL, NULL, &new_max_thds,
 	    sz_m), 0, "Failed to set max background threads");
+	expect_d_ne(mallctl("max_background_threads", NULL, NULL, &size_zero,
+	    sz_m), 0, "Should not allow zero background threads");
 	expect_zu_eq(n_background_threads, new_max_thds,
 	    "Number of background threads should be 1.\n");
 }

From ef8e512e2916a7c2dfca289e9113324b87324723 Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Fri, 24 Jan 2025 07:48:58 -0800
Subject: [PATCH 2465/2608] Fix `bitmap_ffu` out of range read

We tried to load `g` from `bitmap[i]` before checking it is actually a
valid load. Tweaked a loop a bit to `break` early, when we are done
scanning for bits.

Before this commit undefined behaviour sanitizer from GCC 14+ was
unhappy at `test/unit/bitmap` test with following error.

```
../include/jemalloc/internal/bitmap.h:293:5: runtime error: load of
address 0x7bb1c2e08008 with insufficient space for an object of type
'const bitmap_t'
<...>
    #0 0x62671a149954 in bitmap_ffu ../include/jemalloc/internal/bitmap.h:293
    #1 0x62671a149954 in test_bitmap_xfu_body ../test/unit/bitmap.c:275
    #2 0x62671a14b767 in test_bitmap_xfu ../test/unit/bitmap.c:323
    #3 0x62671a376ad1 in p_test_impl ../test/src/test.c:149
    #4 0x62671a377135 in p_test ../test/src/test.c:200
    #5 0x62671a13da06 in main ../test/unit/bitmap.c:336
<...>
```
---
 include/jemalloc/internal/bitmap.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h
index e501da47..8cd5f5a3 100644
--- a/include/jemalloc/internal/bitmap.h
+++ b/include/jemalloc/internal/bitmap.h
@@ -284,14 +284,17 @@ bitmap_ffu(const bitmap_t *bitmap, const bitmap_info_t *binfo, size_t min_bit) {
 	bitmap_t g = bitmap[i] & ~((1LU << (min_bit & BITMAP_GROUP_NBITS_MASK))
 	    - 1);
 	size_t bit;
-	do {
+	while (1) {
 		if (g != 0) {
 			bit = ffs_lu(g);
 			return (i << LG_BITMAP_GROUP_NBITS) + bit;
 		}
 		i++;
+		if (i >= binfo->ngroups) {
+			break;
+		}
 		g = bitmap[i];
-	} while (i < binfo->ngroups);
+	}
 	return binfo->nbits;
 #endif
 }

From 257e64b968ec40c285331dfb6e3db8a2b34999d1 Mon Sep 17 00:00:00 2001
From: Shai Duvdevani <duv@meta.com>
Date: Wed, 29 Jan 2025 15:25:10 -0800
Subject: [PATCH 2466/2608] Unlike `prof_sample` which is supported only with
 profiling mode active, `prof_threshold` is intended to be an always-supported
 allocation callback with much less overhead. The usage of the threshold
 allows performance critical callers to change program execution based on the
 callback: e.g. drop caches when memory becomes high or to predict the program
 is about to OOM ahead of time using peak memory watermarks.

---
 Makefile.in                                   |   3 +
 include/jemalloc/internal/prof_externs.h      |   4 +
 include/jemalloc/internal/prof_hook.h         |   5 +
 include/jemalloc/internal/prof_threshold.h    |  11 ++
 include/jemalloc/internal/thread_event.h      |   1 +
 include/jemalloc/internal/tsd_internals.h     |   2 +
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |   3 +
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |   3 +
 .../projects/vc2019/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2019/jemalloc/jemalloc.vcxproj.filters  |   3 +
 .../projects/vc2022/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2022/jemalloc/jemalloc.vcxproj.filters  |   3 +
 src/ctl.c                                     |  31 ++++++
 src/jemalloc.c                                |   4 +
 src/prof_threshold.c                          |  57 ++++++++++
 src/thread_event.c                            |   5 +
 test/unit/mallctl.c                           |   1 +
 test/unit/prof_threshold.c                    | 103 ++++++++++++++++++
 test/unit/prof_threshold_small.c              |   2 +
 test/unit/prof_threshold_small.sh             |   1 +
 22 files changed, 246 insertions(+)
 create mode 100644 include/jemalloc/internal/prof_threshold.h
 create mode 100644 src/prof_threshold.c
 create mode 100644 test/unit/prof_threshold.c
 create mode 100644 test/unit/prof_threshold_small.c
 create mode 100644 test/unit/prof_threshold_small.sh

diff --git a/Makefile.in b/Makefile.in
index 27eb90d3..1914fc28 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -145,6 +145,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/prof_stack_range.c \
 	$(srcroot)src/prof_stats.c \
 	$(srcroot)src/prof_sys.c \
+	$(srcroot)src/prof_threshold.c \
 	$(srcroot)src/psset.c \
 	$(srcroot)src/rtree.c \
 	$(srcroot)src/safety_check.c \
@@ -266,6 +267,8 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/prof_stats.c \
 	$(srcroot)test/unit/prof_tctx.c \
 	$(srcroot)test/unit/prof_thread_name.c \
+	$(srcroot)test/unit/prof_threshold.c \
+	$(srcroot)test/unit/prof_threshold_small.c \
 	$(srcroot)test/unit/prof_sys_thread_name.c \
 	$(srcroot)test/unit/psset.c \
 	$(srcroot)test/unit/ql.c \
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 952ace7d..789e3811 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -11,6 +11,7 @@ extern bool opt_prof_active;
 extern bool opt_prof_thread_active_init;
 extern unsigned opt_prof_bt_max;
 extern size_t opt_lg_prof_sample;    /* Mean bytes between samples. */
+extern size_t opt_experimental_lg_prof_threshold;    /* Mean bytes between thresholds. */
 extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */
 extern bool opt_prof_gdump;          /* High-water memory dumping. */
 extern bool opt_prof_final;          /* Final profile dumping. */
@@ -67,6 +68,9 @@ prof_sample_hook_t prof_sample_hook_get(void);
 void prof_sample_free_hook_set(prof_sample_free_hook_t hook);
 prof_sample_free_hook_t prof_sample_free_hook_get(void);
 
+void prof_threshold_hook_set(prof_threshold_hook_t hook);
+prof_threshold_hook_t prof_threshold_hook_get(void);
+
 /* Functions only accessed in prof_inlines.h */
 prof_tdata_t *prof_tdata_init(tsd_t *tsd);
 prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
diff --git a/include/jemalloc/internal/prof_hook.h b/include/jemalloc/internal/prof_hook.h
index 087dadc6..2f3a81af 100644
--- a/include/jemalloc/internal/prof_hook.h
+++ b/include/jemalloc/internal/prof_hook.h
@@ -26,4 +26,9 @@ typedef void (*prof_sample_hook_t)(const void *ptr, size_t size, void **backtrac
 /* ptr, size */
 typedef void (*prof_sample_free_hook_t)(const void *, size_t);
 
+/*
+ * A callback hook that notifies when an allocation threshold has been crossed.
+ */
+typedef void (*prof_threshold_hook_t)(uint64_t alloc, uint64_t dealloc, uint64_t peak);
+
 #endif /* JEMALLOC_INTERNAL_PROF_HOOK_H */
diff --git a/include/jemalloc/internal/prof_threshold.h b/include/jemalloc/internal/prof_threshold.h
new file mode 100644
index 00000000..dc9c8f2b
--- /dev/null
+++ b/include/jemalloc/internal/prof_threshold.h
@@ -0,0 +1,11 @@
+#ifndef JEMALLOC_INTERNAL_THRESHOLD_EVENT_H
+#define JEMALLOC_INTERNAL_THRESHOLD_EVENT_H
+
+#include "jemalloc/internal/tsd_types.h"
+
+/* The activity-triggered hooks. */
+uint64_t prof_threshold_new_event_wait(tsd_t *tsd);
+uint64_t prof_threshold_postponed_event_wait(tsd_t *tsd);
+void prof_threshold_event_handler(tsd_t *tsd, uint64_t elapsed);
+
+#endif /* JEMALLOC_INTERNAL_THRESHOLD_EVENT_H */
diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index 46c57ed5..ad46ffe7 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -56,6 +56,7 @@ void tsd_te_init(tsd_t *tsd);
 #define ITERATE_OVER_ALL_EVENTS						\
     E(tcache_gc,		(opt_tcache_gc_incr_bytes > 0), true)	\
     E(prof_sample,		(config_prof && opt_prof), true)  	\
+    E(prof_threshold,		config_stats, true)  			\
     E(stats_interval,		(opt_stats_interval >= 0), true)   	\
     E(tcache_gc_dalloc,		(opt_tcache_gc_incr_bytes > 0), false)	\
     E(peak_alloc,		config_stats, true)			\
diff --git a/include/jemalloc/internal/tsd_internals.h b/include/jemalloc/internal/tsd_internals.h
index 439f1d10..0ed33234 100644
--- a/include/jemalloc/internal/tsd_internals.h
+++ b/include/jemalloc/internal/tsd_internals.h
@@ -72,6 +72,7 @@ typedef ql_elm(tsd_t) tsd_link_t;
     O(tcache_gc_dalloc_event_wait,	uint64_t,	uint64_t)	\
     O(prof_sample_event_wait,	uint64_t,		uint64_t)	\
     O(prof_sample_last_event,	uint64_t,		uint64_t)	\
+    O(prof_threshold_event_wait,	uint64_t,	uint64_t)	\
     O(stats_interval_event_wait,	uint64_t,	uint64_t)	\
     O(stats_interval_last_event,	uint64_t,	uint64_t)	\
     O(peak_alloc_event_wait,	uint64_t,		uint64_t)	\
@@ -105,6 +106,7 @@ typedef ql_elm(tsd_t) tsd_link_t;
     /* tcache_gc_dalloc_event_wait */	0,				\
     /* prof_sample_event_wait */	0,				\
     /* prof_sample_last_event */	0,				\
+    /* prof_threshold_event_wait */	0,				\
     /* stats_interval_event_wait */	0,				\
     /* stats_interval_last_event */	0,				\
     /* peak_alloc_event_wait */		0,				\
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 58bd7b3e..c43b30b1 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -83,6 +83,7 @@
     <ClCompile Include="..\..\..\..\src\prof_recent.c" />
     <ClCompile Include="..\..\..\..\src\prof_stats.c" />
     <ClCompile Include="..\..\..\..\src\prof_sys.c" />
+    <ClCompile Include="..\..\..\..\src\prof_threshold.c" />
     <ClCompile Include="..\..\..\..\src\psset.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\safety_check.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 82ad3e35..f091475e 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -133,6 +133,9 @@
     <ClCompile Include="..\..\..\..\src\prof_sys.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_threshold.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\psset.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 6e59c035..a195f6b3 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -83,6 +83,7 @@
     <ClCompile Include="..\..\..\..\src\prof_recent.c" />
     <ClCompile Include="..\..\..\..\src\prof_stats.c" />
     <ClCompile Include="..\..\..\..\src\prof_sys.c" />
+    <ClCompile Include="..\..\..\..\src\prof_threshold.c" />
     <ClCompile Include="..\..\..\..\src\psset.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\safety_check.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 82ad3e35..f091475e 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -133,6 +133,9 @@
     <ClCompile Include="..\..\..\..\src\prof_sys.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_threshold.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\psset.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
index db06fc6d..cd16005d 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
@@ -83,6 +83,7 @@
     <ClCompile Include="..\..\..\..\src\prof_recent.c" />
     <ClCompile Include="..\..\..\..\src\prof_stats.c" />
     <ClCompile Include="..\..\..\..\src\prof_sys.c" />
+    <ClCompile Include="..\..\..\..\src\prof_threshold.c" />
     <ClCompile Include="..\..\..\..\src\psset.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\safety_check.c" />
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
index 82ad3e35..f091475e 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
@@ -133,6 +133,9 @@
     <ClCompile Include="..\..\..\..\src\prof_sys.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_threshold.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\psset.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
index 01de0dcb..2d8c4be6 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
@@ -83,6 +83,7 @@
     <ClCompile Include="..\..\..\..\src\prof_recent.c" />
     <ClCompile Include="..\..\..\..\src\prof_stats.c" />
     <ClCompile Include="..\..\..\..\src\prof_sys.c" />
+    <ClCompile Include="..\..\..\..\src\prof_threshold.c" />
     <ClCompile Include="..\..\..\..\src\psset.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\safety_check.c" />
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
index 82ad3e35..f091475e 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
@@ -133,6 +133,9 @@
     <ClCompile Include="..\..\..\..\src\prof_sys.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\prof_threshold.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\psset.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/ctl.c b/src/ctl.c
index b0fc0487..1ebcbf8e 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -154,6 +154,7 @@ CTL_PROTO(opt_prof_active)
 CTL_PROTO(opt_prof_thread_active_init)
 CTL_PROTO(opt_prof_bt_max)
 CTL_PROTO(opt_lg_prof_sample)
+CTL_PROTO(opt_experimental_lg_prof_threshold)
 CTL_PROTO(opt_lg_prof_interval)
 CTL_PROTO(opt_prof_gdump)
 CTL_PROTO(opt_prof_final)
@@ -357,6 +358,7 @@ CTL_PROTO(experimental_hooks_prof_backtrace)
 CTL_PROTO(experimental_hooks_prof_dump)
 CTL_PROTO(experimental_hooks_prof_sample)
 CTL_PROTO(experimental_hooks_prof_sample_free)
+CTL_PROTO(experimental_hooks_prof_threshold)
 CTL_PROTO(experimental_hooks_safety_check_abort)
 CTL_PROTO(experimental_thread_activity_callback)
 CTL_PROTO(experimental_utilization_query)
@@ -539,6 +541,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("prof_thread_active_init"), CTL(opt_prof_thread_active_init)},
 	{NAME("prof_bt_max"), CTL(opt_prof_bt_max)},
 	{NAME("lg_prof_sample"), CTL(opt_lg_prof_sample)},
+	{NAME("experimental_lg_prof_threshold"), CTL(opt_experimental_lg_prof_threshold)},
 	{NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)},
 	{NAME("prof_gdump"),	CTL(opt_prof_gdump)},
 	{NAME("prof_final"),	CTL(opt_prof_final)},
@@ -965,6 +968,7 @@ static const ctl_named_node_t experimental_hooks_node[] = {
 	{NAME("prof_dump"),	CTL(experimental_hooks_prof_dump)},
 	{NAME("prof_sample"),	CTL(experimental_hooks_prof_sample)},
 	{NAME("prof_sample_free"),	CTL(experimental_hooks_prof_sample_free)},
+	{NAME("prof_threshold"),	CTL(experimental_hooks_prof_threshold)},
 	{NAME("safety_check_abort"),	CTL(experimental_hooks_safety_check_abort)},
 };
 
@@ -2317,6 +2321,7 @@ CTL_RO_NL_CGEN(config_prof, opt_prof_thread_active_init,
     opt_prof_thread_active_init, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_bt_max, opt_prof_bt_max, unsigned)
 CTL_RO_NL_CGEN(config_prof, opt_lg_prof_sample, opt_lg_prof_sample, size_t)
+CTL_RO_NL_CGEN(config_prof, opt_experimental_lg_prof_threshold, opt_experimental_lg_prof_threshold, size_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_pid_namespace, opt_prof_pid_namespace,
     bool)
@@ -3778,6 +3783,32 @@ label_return:
 	return ret;
 }
 
+
+static int
+experimental_hooks_prof_threshold_ctl(tsd_t *tsd, const size_t *mib,
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+
+	if (oldp == NULL && newp == NULL) {
+		ret = EINVAL;
+		goto label_return;
+	}
+	if (oldp != NULL) {
+		prof_threshold_hook_t old_hook =
+		    prof_threshold_hook_get();
+		READ(old_hook, prof_threshold_hook_t);
+	}
+	if (newp != NULL) {
+		prof_threshold_hook_t new_hook JEMALLOC_CC_SILENCE_INIT(NULL);
+		WRITE(new_hook, prof_threshold_hook_t);
+		prof_threshold_hook_set(new_hook);
+	}
+	ret = 0;
+label_return:
+	return ret;
+}
+
+
 /* For integration test purpose only.  No plan to move out of experimental. */
 static int
 experimental_hooks_safety_check_abort_ctl(tsd_t *tsd, const size_t *mib,
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 67be7681..6d2f6494 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1619,6 +1619,10 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				    "lg_prof_sample", 0, (sizeof(uint64_t) << 3)
 				    - 1, CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
 				    true)
+				CONF_HANDLE_SIZE_T(opt_experimental_lg_prof_threshold,
+				    "experimental_lg_prof_threshold", 0, (sizeof(uint64_t) << 3)
+				    - 1, CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
+				    true)
 				CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum")
 				CONF_HANDLE_UNSIGNED(opt_prof_bt_max, "prof_bt_max",
 				    1, PROF_BT_MAX_LIMIT, CONF_CHECK_MIN, CONF_CHECK_MAX,
diff --git a/src/prof_threshold.c b/src/prof_threshold.c
new file mode 100644
index 00000000..28a525fc
--- /dev/null
+++ b/src/prof_threshold.c
@@ -0,0 +1,57 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/activity_callback.h"
+#include "jemalloc/internal/prof_threshold.h"
+
+#include "jemalloc/internal/prof_externs.h"
+
+/*
+ * Update every 128MB by default.
+ */
+#define PROF_THRESHOLD_LG_WAIT_DEFAULT 27
+
+/* Logically a prof_threshold_hook_t. */
+static atomic_p_t prof_threshold_hook;
+size_t opt_experimental_lg_prof_threshold = PROF_THRESHOLD_LG_WAIT_DEFAULT;
+
+void
+prof_threshold_hook_set(prof_threshold_hook_t hook) {
+	atomic_store_p(&prof_threshold_hook, hook, ATOMIC_RELEASE);
+}
+
+prof_threshold_hook_t
+prof_threshold_hook_get(void) {
+	return (prof_threshold_hook_t)atomic_load_p(&prof_threshold_hook,
+	    ATOMIC_ACQUIRE);
+}
+
+/* Invoke callback for threshold reached */
+static void
+prof_threshold_update(tsd_t *tsd) {
+	prof_threshold_hook_t prof_threshold_hook = prof_threshold_hook_get();
+	if (prof_threshold_hook == NULL) {
+		return;
+        }
+	uint64_t alloc = tsd_thread_allocated_get(tsd);
+	uint64_t dalloc = tsd_thread_deallocated_get(tsd);
+	peak_t *peak = tsd_peakp_get(tsd);
+	pre_reentrancy(tsd, NULL);
+	prof_threshold_hook(alloc, dalloc, peak->cur_max);
+	post_reentrancy(tsd);
+}
+
+uint64_t
+prof_threshold_new_event_wait(tsd_t *tsd) {
+	return 1 << opt_experimental_lg_prof_threshold;
+}
+
+uint64_t
+prof_threshold_postponed_event_wait(tsd_t *tsd) {
+	return TE_MIN_START_WAIT;
+}
+
+void
+prof_threshold_event_handler(tsd_t *tsd, uint64_t elapsed) {
+	prof_threshold_update(tsd);
+}
diff --git a/src/thread_event.c b/src/thread_event.c
index 37eb5827..a8276cd7 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -69,6 +69,11 @@ peak_dalloc_fetch_elapsed(tsd_t *tsd) {
 	return TE_INVALID_ELAPSED;
 }
 
+static uint64_t
+prof_threshold_fetch_elapsed(tsd_t *tsd) {
+	return TE_INVALID_ELAPSED;
+}
+
 /* Per event facilities done. */
 
 static bool
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 6784306f..02fedaa7 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -319,6 +319,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, prof_active, prof);
 	TEST_MALLCTL_OPT(unsigned, prof_bt_max, prof);
 	TEST_MALLCTL_OPT(ssize_t, lg_prof_sample, prof);
+	TEST_MALLCTL_OPT(ssize_t, experimental_lg_prof_threshold, prof);
 	TEST_MALLCTL_OPT(bool, prof_accum, prof);
 	TEST_MALLCTL_OPT(bool, prof_pid_namespace, prof);
 	TEST_MALLCTL_OPT(ssize_t, lg_prof_interval, prof);
diff --git a/test/unit/prof_threshold.c b/test/unit/prof_threshold.c
new file mode 100644
index 00000000..48e9df19
--- /dev/null
+++ b/test/unit/prof_threshold.c
@@ -0,0 +1,103 @@
+#include "test/jemalloc_test.h"
+
+/* Test config (set in reset_test_config) */
+#define ALLOC_ITERATIONS_IN_THRESHOLD 10
+uint64_t threshold_bytes = 0;
+uint64_t chunk_size = 0;
+
+/* Test globals for calblack */
+uint64_t hook_calls = 0;
+uint64_t last_peak = 0;
+uint64_t last_alloc = 0;
+uint64_t alloc_baseline = 0;
+
+void
+mock_prof_threshold_hook(uint64_t alloc, uint64_t dealloc, uint64_t peak) {
+	hook_calls++;
+	last_peak = peak;
+	last_alloc = alloc;
+}
+
+/* Need the do_write flag because NULL is a valid to_write value. */
+static void
+read_write_prof_threshold_hook(prof_threshold_hook_t *to_read, bool do_write,
+    prof_threshold_hook_t to_write) {
+	size_t hook_sz = sizeof(prof_threshold_hook_t);
+	expect_d_eq(mallctl("experimental.hooks.prof_threshold",
+	    (void *)to_read, &hook_sz, do_write ? &to_write : NULL, hook_sz), 0,
+	    "Unexpected prof_threshold_hook mallctl failure");
+}
+
+static void
+write_prof_threshold_hook(prof_threshold_hook_t new_hook) {
+	read_write_prof_threshold_hook(NULL, true, new_hook);
+}
+
+static prof_threshold_hook_t
+read_prof_threshold_hook() {
+	prof_threshold_hook_t hook;
+	read_write_prof_threshold_hook(&hook, false, NULL);
+	return hook;
+}
+
+static void reset_test_config() {
+	hook_calls = 0;
+	last_peak = 0;
+	alloc_baseline = last_alloc; /* We run the test multiple times */
+	last_alloc = 0;
+	threshold_bytes = 1 << opt_experimental_lg_prof_threshold;
+	chunk_size = threshold_bytes / ALLOC_ITERATIONS_IN_THRESHOLD;
+}
+
+static void expect_threshold_calls(int calls) {
+	expect_zu_eq(hook_calls, calls, "Hook called the right amount of times");
+	expect_u64_lt(last_peak, chunk_size * 2, "We allocate chunk_size at a time");
+	expect_u64_ge(last_alloc, threshold_bytes * calls + alloc_baseline, "Crosses");
+}
+
+static void allocate_chunks(int chunks) {
+	for (int i = 0; i < chunks; i++) {
+		void* p = mallocx(chunk_size, 0);
+		expect_ptr_not_null(p, "Failed to allocate");
+		free(p);
+	}
+}
+
+TEST_BEGIN(test_prof_threshold_hook) {
+	/* Test setting and reading the hook (both value and null) */
+	write_prof_threshold_hook(mock_prof_threshold_hook);
+	expect_ptr_eq(read_prof_threshold_hook(), mock_prof_threshold_hook, "Unexpected hook");
+
+	write_prof_threshold_hook(NULL);
+	expect_ptr_null(read_prof_threshold_hook(), "Hook was erased");
+
+	/* Reset everything before the test */
+	reset_test_config();
+	write_prof_threshold_hook(mock_prof_threshold_hook);
+
+	int err = mallctl("thread.peak.reset", NULL, NULL, NULL, 0);
+	expect_d_eq(err, 0, "Peak reset failed");
+
+	/* Note that since we run this test multiple times and we don't reset
+	   the allocation counter, each time we offset the callback by the
+	   amount we allocate over the threshold. */
+
+	/* A simple small allocation is not enough to trigger the callback */
+	allocate_chunks(1);
+	expect_zu_eq(hook_calls, 0, "Hook not called yet");
+
+	/* Enough allocations to trigger the callback */
+	allocate_chunks(ALLOC_ITERATIONS_IN_THRESHOLD);
+	expect_threshold_calls(1);
+
+	/* Enough allocations to trigger the callback again */
+	allocate_chunks(ALLOC_ITERATIONS_IN_THRESHOLD);
+	expect_threshold_calls(2);
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_prof_threshold_hook);
+}
diff --git a/test/unit/prof_threshold_small.c b/test/unit/prof_threshold_small.c
new file mode 100644
index 00000000..67f444b1
--- /dev/null
+++ b/test/unit/prof_threshold_small.c
@@ -0,0 +1,2 @@
+#include "test/jemalloc_test.h"
+#include "prof_threshold.c"
diff --git a/test/unit/prof_threshold_small.sh b/test/unit/prof_threshold_small.sh
new file mode 100644
index 00000000..62726069
--- /dev/null
+++ b/test/unit/prof_threshold_small.sh
@@ -0,0 +1 @@
+export MALLOC_CONF="experimental_lg_prof_threshold:22"

From 1abeae9ebd7b3c9f3ebb5e49db393149c37f18f9 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Wed, 29 Jan 2025 21:33:30 -0800
Subject: [PATCH 2467/2608] Fix test/unit/prof_threshold when !config_stats

---
 test/unit/prof_threshold.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/test/unit/prof_threshold.c b/test/unit/prof_threshold.c
index 48e9df19..2026c1c6 100644
--- a/test/unit/prof_threshold.c
+++ b/test/unit/prof_threshold.c
@@ -64,6 +64,8 @@ static void allocate_chunks(int chunks) {
 }
 
 TEST_BEGIN(test_prof_threshold_hook) {
+	test_skip_if(!config_stats);
+
 	/* Test setting and reading the hook (both value and null) */
 	write_prof_threshold_hook(mock_prof_threshold_hook);
 	expect_ptr_eq(read_prof_threshold_hook(), mock_prof_threshold_hook, "Unexpected hook");

From 3bc89cfecab89cdc2cd6ed8566e15b7fa4fdac88 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 30 Jan 2025 10:45:11 -0800
Subject: [PATCH 2468/2608] Avoid implicit conversion in
 test/unit/prof_threshold

---
 test/unit/prof_threshold.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/unit/prof_threshold.c b/test/unit/prof_threshold.c
index 2026c1c6..c6f53983 100644
--- a/test/unit/prof_threshold.c
+++ b/test/unit/prof_threshold.c
@@ -50,14 +50,14 @@ static void reset_test_config() {
 }
 
 static void expect_threshold_calls(int calls) {
-	expect_zu_eq(hook_calls, calls, "Hook called the right amount of times");
+	expect_u64_eq(hook_calls, calls, "Hook called the right amount of times");
 	expect_u64_lt(last_peak, chunk_size * 2, "We allocate chunk_size at a time");
 	expect_u64_ge(last_alloc, threshold_bytes * calls + alloc_baseline, "Crosses");
 }
 
 static void allocate_chunks(int chunks) {
 	for (int i = 0; i < chunks; i++) {
-		void* p = mallocx(chunk_size, 0);
+		void* p = mallocx((size_t)chunk_size, 0);
 		expect_ptr_not_null(p, "Failed to allocate");
 		free(p);
 	}
@@ -86,7 +86,7 @@ TEST_BEGIN(test_prof_threshold_hook) {
 
 	/* A simple small allocation is not enough to trigger the callback */
 	allocate_chunks(1);
-	expect_zu_eq(hook_calls, 0, "Hook not called yet");
+	expect_u64_eq(hook_calls, 0, "Hook not called yet");
 
 	/* Enough allocations to trigger the callback */
 	allocate_chunks(ALLOC_ITERATIONS_IN_THRESHOLD);

From 34c823f1479047990a73d0e9acf396c2e04fb6b1 Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Fri, 10 Jan 2025 10:51:21 -0800
Subject: [PATCH 2469/2608] Add autoconf options to enable sanitizers

This commit allows to enable sanitizers with autoconf options, instead
of modifying `CFLAGS`, `CXXFLAGS` and `LDFLAGS` directly.

* `--enable-tsan` option to enable Thread Sanitizer.
* `--enable-ubsan` option to enable Undefined Behaviour Sanitizer.

End goal is to speedup development by finding problems quickly, early
and easier. Eventually, when all current issues will be fixed, we can
enable sanitizers in CI. Fortunately, there are not a lot of problems we
need to fix.

Address Sanitizer is a bit controversial, because it replaces memory
allocator, so we decided to left it out for a while.

Below are couple of examples of how tests look like under different
sanitizers at the moment.

```
$  ../configure --enable-tsan --enable-debug
<...>
asan               : 0
tsan               : 1
ubsan              : 0
$ make -j`nproc` check
<...>
  Thread T13 (tid=332043, running) created by main thread at:
    #0 pthread_create <null> (libtsan.so.0+0x61748)
    #1 thd_create ../test/src/thd.c:25 (bin_batching+0x5631ca)
    #2 stress_run ../test/unit/bin_batching.c:148
(bin_batching+0x40364c)
    #3 test_races ../test/unit/bin_batching.c:249
(bin_batching+0x403d79)
    #4 p_test_impl ../test/src/test.c:149 (bin_batching+0x562811)
    #5 p_test_no_reentrancy ../test/src/test.c:213
(bin_batching+0x562d35)
    #6 main ../test/unit/bin_batching.c:268 (bin_batching+0x40417e)

SUMMARY: ThreadSanitizer: data race
../include/jemalloc/internal/edata.h:498 in edata_nfree_inc
```

```
$ ../configure --enable-ubsan --enable-debug
<...>
asan               : 0
tsan               : 0
ubsan              : 1
$ make -j`nproc` check
<...>
=== test/unit/hash ===
../test/unit/hash.c:119:16: runtime error: left shift of 176 by 24
places cannot be represented in type 'int'
<...>
```
---
 configure.ac | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 64 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index e5fb3a6d..7f59b3f1 100644
--- a/configure.ac
+++ b/configure.ac
@@ -92,6 +92,32 @@ AC_LANG_POP([C++])
 JE_CONCAT_VVV(CXXFLAGS, CONFIGURE_CXXFLAGS, SPECIFIED_CXXFLAGS)
 ])
 
+CONFIGURE_LDFLAGS=
+SPECIFIED_LDFLAGS="${LDFLAGS}"
+dnl JE_LDFLAGS_ADD(ldflag)
+dnl
+dnl LDFLAGS is the concatenation of CONFIGURE_LDFLAGS and SPECIFIED_LDFLAGS
+dnl This macro appends to CONFIGURE_LDFLAGS and regenerates LDFLAGS.
+AC_DEFUN([JE_LDFLAGS_ADD],
+[
+AC_MSG_CHECKING([whether linker supports $1])
+T_CONFIGURE_LDFLAGS="${CONFIGURE_LDFLAGS}"
+JE_APPEND_VS(CONFIGURE_LDFLAGS, $1)
+JE_CONCAT_VVV(LDFLAGS, CONFIGURE_LDFLAGS, SPECIFIED_LDFLAGS)
+AC_LINK_IFELSE([AC_LANG_PROGRAM(
+[[
+]], [[
+    return 0;
+]])],
+              [je_cv_ldflags_added=$1]
+              AC_MSG_RESULT([yes]),
+              [je_cv_ldflags_added=]
+              AC_MSG_RESULT([no])
+              [CONFIGURE_LDFLAGS="${T_CONFIGURE_LDFLAGS}"]
+)
+JE_CONCAT_VVV(LDFLAGS, CONFIGURE_LDFLAGS, SPECIFIED_LDFLAGS)
+])
+
 dnl JE_COMPILABLE(label, hcode, mcode, rvar)
 dnl
 dnl Use AC_LINK_IFELSE() rather than AC_COMPILE_IFELSE() so that linker errors
@@ -2647,6 +2673,40 @@ if test "x$enable_pageid" = "x1" ; then
   AC_DEFINE([JEMALLOC_PAGEID], [ ], [ ])
 fi
 
+AC_ARG_ENABLE([tsan],
+  [AS_HELP_STRING([--enable-tsan],
+                  [Enable thread sanitizer])],
+[if test "x$enable_tsan" = "xno" ; then
+  enable_tsan="0"
+else
+  enable_tsan="1"
+fi
+],
+[enable_tsan="0"]
+)
+if test "x$enable_tsan" = "x1" ; then
+  JE_CFLAGS_ADD([-fsanitize=thread])
+  JE_CXXFLAGS_ADD([-fsanitize=thread])
+  JE_LDFLAGS_ADD([-fsanitize=thread])
+fi
+
+AC_ARG_ENABLE([ubsan],
+  [AS_HELP_STRING([--enable-ubsan],
+                  [Enable undefined behavior sanitizer])],
+[if test "x$enable_ubsan" = "xno" ; then
+  enable_ubsan="0"
+else
+  enable_ubsan="1"
+fi
+],
+[enable_ubsan="0"]
+)
+if test "x$enable_ubsan" = "x1" ; then
+  JE_CFLAGS_ADD([-fsanitize=undefined])
+  JE_CXXFLAGS_ADD([-fsanitize=undefined])
+  JE_LDFLAGS_ADD([-fsanitize=undefined])
+fi
+
 dnl ============================================================================
 dnl Enable background threads if possible.
 
@@ -2869,7 +2929,8 @@ AC_MSG_RESULT([CXX                : ${CXX}])
 AC_MSG_RESULT([CONFIGURE_CXXFLAGS : ${CONFIGURE_CXXFLAGS}])
 AC_MSG_RESULT([SPECIFIED_CXXFLAGS : ${SPECIFIED_CXXFLAGS}])
 AC_MSG_RESULT([EXTRA_CXXFLAGS     : ${EXTRA_CXXFLAGS}])
-AC_MSG_RESULT([LDFLAGS            : ${LDFLAGS}])
+AC_MSG_RESULT([CONFIGURE_LDFLAGS  : ${CONFIGURE_LDFLAGS}])
+AC_MSG_RESULT([SPECIFIED_LDFLAGS  : ${SPECIFIED_LDFLAGS}])
 AC_MSG_RESULT([EXTRA_LDFLAGS      : ${EXTRA_LDFLAGS}])
 AC_MSG_RESULT([DSO_LDFLAGS        : ${DSO_LDFLAGS}])
 AC_MSG_RESULT([LIBS               : ${LIBS}])
@@ -2916,4 +2977,6 @@ AC_MSG_RESULT([cache-oblivious    : ${enable_cache_oblivious}])
 AC_MSG_RESULT([pageid             : ${enable_pageid}])
 AC_MSG_RESULT([cxx                : ${enable_cxx}])
 AC_MSG_RESULT([dss                : ${enable_dss}])
+AC_MSG_RESULT([tsan               : ${enable_tsan}])
+AC_MSG_RESULT([ubsan              : ${enable_ubsan}])
 AC_MSG_RESULT([===============================================================================])

From c17bf8b368dd400614a42942c2c31a50bce5c680 Mon Sep 17 00:00:00 2001
From: roblabla <unfiltered@roblab.la>
Date: Tue, 30 Jul 2024 14:56:42 +0200
Subject: [PATCH 2470/2608] Disable config from file or envvar with build flag

This adds a new autoconf flag, --disable-user-config, which disables
reading the configuration from /etc/malloc.conf or the MALLOC_CONF
environment variable. This can be useful when integrating jemalloc in a
binary that internally handles all aspects of the configuration and
shouldn't be impacted by ambient change in the environment.
---
 configure.ac                        | 18 +++++++++++++
 include/jemalloc/jemalloc_defs.h.in |  6 +++++
 src/jemalloc.c                      | 40 ++++++++++++++++++-----------
 3 files changed, 49 insertions(+), 15 deletions(-)

diff --git a/configure.ac b/configure.ac
index 7f59b3f1..b01ff56b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1401,6 +1401,23 @@ if test "x$enable_stats" = "x1" ; then
 fi
 AC_SUBST([enable_stats])
 
+dnl Disable reading configuration from file and environment variable
+AC_ARG_ENABLE([user_config],
+  [AS_HELP_STRING([--disable-user-config],
+  [Do not read malloc config from /etc/malloc.conf or MALLOC_CONF])],
+[if test "x$enable_user_config" = "xno" ; then
+  enable_user_config="0"
+else
+  enable_user_config="1"
+fi
+],
+[enable_user_config="1"]
+)
+if test "x$enable_user_config" = "x1" ; then
+  AC_DEFINE([JEMALLOC_CONFIG_ENV], [ ], [ ])
+  AC_DEFINE([JEMALLOC_CONFIG_FILE], [ ], [ ])
+fi
+
 dnl Do not enable smallocx by default.
 AC_ARG_ENABLE([experimental_smallocx],
   [AS_HELP_STRING([--enable-experimental-smallocx], [Enable experimental smallocx API])],
@@ -2962,6 +2979,7 @@ AC_MSG_RESULT([static libs        : ${enable_static}])
 AC_MSG_RESULT([autogen            : ${enable_autogen}])
 AC_MSG_RESULT([debug              : ${enable_debug}])
 AC_MSG_RESULT([stats              : ${enable_stats}])
+AC_MSG_RESULT([user_config        : ${enable_user_config}])
 AC_MSG_RESULT([experimental_smallocx : ${enable_experimental_smallocx}])
 AC_MSG_RESULT([prof               : ${enable_prof}])
 AC_MSG_RESULT([prof-libunwind     : ${enable_prof_libunwind}])
diff --git a/include/jemalloc/jemalloc_defs.h.in b/include/jemalloc/jemalloc_defs.h.in
index ef04e756..96c75011 100644
--- a/include/jemalloc/jemalloc_defs.h.in
+++ b/include/jemalloc/jemalloc_defs.h.in
@@ -46,6 +46,12 @@
  */
 #undef JEMALLOC_USE_CXX_THROW
 
+/*
+ * If undefined, disables reading configuration from environment variable or file
+ */
+#undef JEMALLOC_CONFIG_ENV
+#undef JEMALLOC_CONFIG_FILE
+
 #ifdef _MSC_VER
 #  ifdef _WIN64
 #    define LG_SIZEOF_PTR_WIN 3
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 6d2f6494..8ae72efb 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -985,44 +985,53 @@ obtain_malloc_conf(unsigned which_source, char readlink_buf[PATH_MAX + 1]) {
 		}
 		break;
 	case 2: {
+#ifndef JEMALLOC_CONFIG_FILE
+		ret = NULL;
+		break;
+#else
 		ssize_t linklen = 0;
-#ifndef _WIN32
+#  ifndef _WIN32
 		int saved_errno = errno;
 		const char *linkname =
-#  ifdef JEMALLOC_PREFIX
+#    ifdef JEMALLOC_PREFIX
 		    "/etc/"JEMALLOC_PREFIX"malloc.conf"
-#  else
+#    else
 		    "/etc/malloc.conf"
-#  endif
+#    endif
 		    ;
 
 		/*
 		 * Try to use the contents of the "/etc/malloc.conf" symbolic
 		 * link's name.
 		 */
-#ifndef JEMALLOC_READLINKAT
+#    ifndef JEMALLOC_READLINKAT
 		linklen = readlink(linkname, readlink_buf, PATH_MAX);
-#else
+#    else
 		linklen = readlinkat(AT_FDCWD, linkname, readlink_buf, PATH_MAX);
-#endif
+#    endif
 		if (linklen == -1) {
 			/* No configuration specified. */
 			linklen = 0;
 			/* Restore errno. */
 			set_errno(saved_errno);
 		}
-#endif
+#  endif
 		readlink_buf[linklen] = '\0';
 		ret = readlink_buf;
 		break;
-	} case 3: {
-		const char *envname =
-#ifdef JEMALLOC_PREFIX
-		    JEMALLOC_CPREFIX"MALLOC_CONF"
-#else
-		    "MALLOC_CONF"
 #endif
-		    ;
+	} case 3: {
+#ifndef JEMALLOC_CONFIG_ENV
+		ret = NULL;
+		break;
+#else
+		const char *envname =
+#  ifdef JEMALLOC_PREFIX
+			JEMALLOC_CPREFIX"MALLOC_CONF"
+#  else
+			"MALLOC_CONF"
+#  endif
+			;
 
 		if ((ret = jemalloc_getenv(envname)) != NULL) {
 			opt_malloc_conf_env_var = ret;
@@ -1031,6 +1040,7 @@ obtain_malloc_conf(unsigned which_source, char readlink_buf[PATH_MAX + 1]) {
 			ret = NULL;
 		}
 		break;
+#endif
 	} case 4: {
 		ret = je_malloc_conf_2_conf_harder;
 		break;

From 421b17a622a5037b82aa658dc0cc8264ddd6e711 Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Wed, 5 Feb 2025 11:00:15 -0800
Subject: [PATCH 2471/2608] Remove age_counter from hpa_central

Before this commit we had two age counters: one global in HPA central
and one local in each HPA shard. We used HPA shard counter, when we are
reused empty pageslab and HPA central counter anywhere else. They
suppose to be comparable, because we use them for allocation placement
decisions, but in reality they are not, there is no ordering guarantees
between them.

At the moment, there is no way for pageslab to migrate between HPA
shards, so we don't actually need HPA central age counter.
---
 include/jemalloc/internal/hpa.h |  2 --
 src/hpa.c                       | 10 +++++-----
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 1f90a15f..d788d051 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -31,8 +31,6 @@ struct hpa_central_s {
 	size_t eden_len;
 	/* Source for metadata. */
 	base_t *base;
-	/* Number of grow operations done on this hpa_central_t. */
-	uint64_t age_counter;
 
 	/* The HPA hooks. */
 	hpa_hooks_t hooks;
diff --git a/src/hpa.c b/src/hpa.c
index cb3f978c..932cf201 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -82,7 +82,6 @@ hpa_central_init(hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks)
 	central->base = base;
 	central->eden = NULL;
 	central->eden_len = 0;
-	central->age_counter = 0;
 	central->hooks = *hooks;
 	return false;
 }
@@ -95,7 +94,7 @@ hpa_alloc_ps(tsdn_t *tsdn, hpa_central_t *central) {
 
 static hpdata_t *
 hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
-    bool *oom) {
+    uint64_t age, bool *oom) {
 	/* Don't yet support big allocations; these should get filtered out. */
 	assert(size <= HUGEPAGE);
 	/*
@@ -118,7 +117,7 @@ hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
 			malloc_mutex_unlock(tsdn, &central->grow_mtx);
 			return NULL;
 		}
-		hpdata_init(ps, central->eden, central->age_counter++);
+		hpdata_init(ps, central->eden, age);
 		central->eden = NULL;
 		central->eden_len = 0;
 		malloc_mutex_unlock(tsdn, &central->grow_mtx);
@@ -168,7 +167,7 @@ hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
 	assert(central->eden_len % HUGEPAGE == 0);
 	assert(HUGEPAGE_ADDR2BASE(central->eden) == central->eden);
 
-	hpdata_init(ps, central->eden, central->age_counter++);
+	hpdata_init(ps, central->eden, age);
 
 	char *eden_char = (char *)central->eden;
 	eden_char += HUGEPAGE;
@@ -738,7 +737,8 @@ hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 	 * deallocations (and allocations of smaller sizes) may still succeed
 	 * while we're doing this potentially expensive system call.
 	 */
-	hpdata_t *ps = hpa_central_extract(tsdn, shard->central, size, &oom);
+	hpdata_t *ps = hpa_central_extract(tsdn, shard->central, size,
+	    shard->age_counter++, &oom);
 	if (ps == NULL) {
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 		return nsuccess;

From 499f3068593ec61dae961e2c8ea3e0cf1482d616 Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Wed, 5 Feb 2025 04:32:31 -0800
Subject: [PATCH 2472/2608] Fix arena 0 `deferral_allowed` flag init

Arena 0 have a dedicated initialization path, which differs from
initialization path of other arenas. The main difference for the purpose
of this change is that we initialize arena 0 before we initialize
background threads. HPA shard options have `deferral_allowed` flag which
should be equal to `background_thread_enabled()` return value, but it
wasn't the case before this change, because for arena 0
`background_thread_enabled()` was initialized correctly after arena 0
initialization phase already ended.

Below is initialization sequence for arena 0 after this commit to
illustrate everything still should be initialized correctly.

* `hpa_central_init` initializes HPA Central, before we initialize every
  HPA shard (including arena's 0).
* `background_thread_boot1` initializes `background_thread_enabled()`
  return value.
* `pa_shard_enable_hpa` initializes arena 0 HPA shard.

```
                       malloc_init_hard -------------
                      /           /                  \
                     /           /                    \
                    /           /                      \
malloc_init_hard_a0_locked  background_thread_boot1  pa_shard_enable_hpa
        /                     /                          \
       /                     /                            \
      /                     /                              \
arena_boot       background_thread_enabled_seta         hpa_shard_init
     |
     |
pa_central_init
     |
     |
hpa_central_init
```
---
 src/arena.c                       |  4 ++--
 src/jemalloc.c                    | 21 ++++++++++++++-------
 test/unit/hpa_background_thread.c | 31 +++++++++++++++++++++++++++++++
 3 files changed, 47 insertions(+), 9 deletions(-)

diff --git a/src/arena.c b/src/arena.c
index 884d1bf9..ab6006d7 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1789,8 +1789,8 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 	 * We turn on the HPA if set to.  There are two exceptions:
 	 * - Custom extent hooks (we should only return memory allocated from
 	 *   them in that case).
-	 * - Arena 0 initialization.  In this case, we're mid-bootstrapping, and
-	 *   so arena_hpa_global is not yet initialized.
+	 * - Arena 0 initialization.  In this case, we're mid-bootstrapping,
+	 *   and so background_thread_enabled is not yet initialized.
 	 */
 	if (opt_hpa && ehooks_are_default(base_ehooks_get(base)) && ind != 0) {
 		hpa_shard_opts_t hpa_shard_opts = opt_hpa_opts;
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 8ae72efb..55e85710 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1964,13 +1964,6 @@ malloc_init_hard_a0_locked(void) {
 		} else {
 			opt_hpa = false;
 		}
-	} else if (opt_hpa) {
-		hpa_shard_opts_t hpa_shard_opts = opt_hpa_opts;
-		hpa_shard_opts.deferral_allowed = background_thread_enabled();
-		if (pa_shard_enable_hpa(TSDN_NULL, &a0->pa_shard,
-		    &hpa_shard_opts, &opt_hpa_sec_opts)) {
-			return true;
-		}
 	}
 
 	malloc_init_state = malloc_init_a0_initialized;
@@ -2225,6 +2218,20 @@ malloc_init_hard(void) {
 	    || background_thread_boot1(tsd_tsdn(tsd), b0get())) {
 		UNLOCK_RETURN(tsd_tsdn(tsd), true, true)
 	}
+	if (opt_hpa) {
+		/*
+		 * We didn't initialize arena 0 hpa_shard in arena_new, because
+		 * background_thread_enabled wasn't initialized yet, but we
+		 * need it to set correct value for deferral_allowed.
+		 */
+		arena_t *a0 = arena_get(tsd_tsdn(tsd), 0, false);
+		hpa_shard_opts_t hpa_shard_opts = opt_hpa_opts;
+		hpa_shard_opts.deferral_allowed = background_thread_enabled();
+		if (pa_shard_enable_hpa(tsd_tsdn(tsd), &a0->pa_shard,
+		    &hpa_shard_opts, &opt_hpa_sec_opts)) {
+			UNLOCK_RETURN(tsd_tsdn(tsd), true, true)
+		}
+	}
 	if (config_prof && prof_boot2(tsd, b0get())) {
 		UNLOCK_RETURN(tsd_tsdn(tsd), true, true)
 	}
diff --git a/test/unit/hpa_background_thread.c b/test/unit/hpa_background_thread.c
index e4abb63b..93f046b5 100644
--- a/test/unit/hpa_background_thread.c
+++ b/test/unit/hpa_background_thread.c
@@ -1,6 +1,31 @@
 #include "test/jemalloc_test.h"
 #include "test/sleep.h"
 
+TEST_BEGIN(test_hpa_background_thread_a0_initialized) {
+	/*
+	 * Arena 0 has dedicated initialization path.  We'd like to make sure
+	 * deferral_allowed value initialized correctly from the start of the
+	 * application.
+	 */
+	test_skip_if(!config_stats);
+	test_skip_if(!hpa_supported());
+	test_skip_if(!have_background_thread);
+	test_skip_if(san_guard_enabled());
+
+	bool enabled = false;
+	size_t sz = sizeof(enabled);
+	int err = mallctl("background_thread", (void *)&enabled, &sz, NULL, 0);
+	expect_d_eq(err, 0, "Unexpected mallctl() failure");
+	expect_true(enabled, "Background thread should be enabled");
+
+	arena_t *a0 = arena_get(TSDN_NULL, 0, false);
+	expect_ptr_ne(a0, NULL, "");
+	bool deferral_allowed = a0->pa_shard.hpa_shard.opts.deferral_allowed;
+	expect_true(deferral_allowed,
+	    "Should have deferral_allowed option enabled for arena #0");
+}
+TEST_END
+
 static void
 sleep_for_background_thread_interval(void) {
 	/*
@@ -207,6 +232,12 @@ main(void) {
 		opt_background_thread = true;
 	}
 	return test_no_reentrancy(
+	    /*
+	     * Unfortunately, order of tests is important here.  We need to
+	     * make sure arena #0 initialized correctly, before we start
+	     * turning background thread on and off in other tests.
+	     */
+	    test_hpa_background_thread_a0_initialized,
 	    test_hpa_background_thread_purges,
 	    test_hpa_background_thread_enable_disable);
 }

From f55e0c3f5c7c5ea1ee40c7c0c6dff4f19aab9c32 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 3 Mar 2025 13:52:06 -0800
Subject: [PATCH 2473/2608] Remove unsupported Cirrus CI config

---
 .cirrus.yml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/.cirrus.yml b/.cirrus.yml
index 13714014..585aa42f 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -31,9 +31,6 @@ task:
      - name: 15-CURRENT
        freebsd_instance:
          image_family: freebsd-15-0-snap
-     - name: 14-STABLE
-       freebsd_instance:
-         image_family: freebsd-14-0-snap
   install_script:
     - sed -i.bak -e 's,pkg+http://pkg.FreeBSD.org/\${ABI}/quarterly,pkg+http://pkg.FreeBSD.org/\${ABI}/latest,' /etc/pkg/FreeBSD.conf
     - pkg upgrade -y

From ac279d7e717e6b5f836657fbc525d0975f80a7d0 Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@meta.com>
Date: Mon, 3 Mar 2025 10:17:03 -0800
Subject: [PATCH 2474/2608] Fix profiling sample metadata lookup during xallocx

---
 src/jemalloc.c         | 10 +++++++++-
 test/unit/prof_small.c | 29 ++++++++++++++++++++++++++++-
 2 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 55e85710..31d4cb27 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -3708,7 +3708,15 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 		prof_info_get(tsd, ptr, alloc_ctx, &prof_info);
 		prof_alloc_rollback(tsd, tctx);
 	} else {
-		prof_info_get_and_reset_recent(tsd, ptr, alloc_ctx, &prof_info);
+		/*
+		 * Need to retrieve the new alloc_ctx since the modification
+		 * to edata has already been done.
+		 */
+		emap_alloc_ctx_t new_alloc_ctx;
+		emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
+		    &new_alloc_ctx);
+		prof_info_get_and_reset_recent(tsd, ptr, &new_alloc_ctx,
+		    &prof_info);
 		assert(usize <= usize_max);
 		sample_event = te_prof_sample_event_lookahead(tsd, usize);
 		prof_realloc(tsd, ptr, size, usize, tctx, prof_active, ptr,
diff --git a/test/unit/prof_small.c b/test/unit/prof_small.c
index 4a982b70..e3462c1f 100644
--- a/test/unit/prof_small.c
+++ b/test/unit/prof_small.c
@@ -31,13 +31,39 @@ TEST_BEGIN(test_profile_small_allocations) {
 }
 TEST_END
 
+TEST_BEGIN(test_profile_small_allocations_sdallocx) {
+	test_skip_if(!config_prof);
+
+	for (szind_t index = 0; index < SC_NBINS; index++) {
+		size_t size = sz_index2size(index);
+		void *ptr = malloc(size);
+		assert_small_allocation_sampled(ptr, size);
+		/*
+		 * While free calls into ifree, sdallocx calls into isfree,
+		 * This test covers the isfree path to make sure promoted small
+		 * allocs are handled properly.
+		 */
+		sdallocx(ptr, size, 0);
+	}
+}
+TEST_END
+
 TEST_BEGIN(test_profile_small_reallocations_growing) {
 	test_skip_if(!config_prof);
 
 	void *ptr = NULL;
-	for (szind_t index = 0; index < SC_NBINS; index++) {
+	for (szind_t index = 0; index <= SC_NBINS; index++) {
 		size_t size = sz_index2size(index);
 		ptr = realloc(ptr, size);
+		/*
+		 * When index reaches SC_NBINS, it is no longer a small alloc,
+		 * we still want to test the realloc from a small alloc to a
+		 * large one, but we should not assert_small_allocation_sampled
+		 * on it.
+		 */
+		if (index == SC_NBINS) {
+			break;
+		}
 		assert_small_allocation_sampled(ptr, size);
 	}
 }
@@ -72,6 +98,7 @@ TEST_END
 int
 main(void) {
 	return test(test_profile_small_allocations,
+	    test_profile_small_allocations_sdallocx,
 	    test_profile_small_reallocations_growing,
 	    test_profile_small_reallocations_shrinking,
 	    test_profile_small_reallocations_same_size_class);

From c067a55c790bebd69fd6d87935f8c353524ef814 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Tue, 26 Mar 2024 14:35:29 -0700
Subject: [PATCH 2475/2608] Introducing a new usize calculation policy

Converting size to usize is what jemalloc has been done by ceiling
size to the closest size class. However, this causes lots of memory
wastes with HPA enabled.  This commit changes how usize is calculated so
that the gap between two contiguous usize is no larger than a page.
Specifically, this commit includes the following changes:

1. Adding a build-time config option (--enable-limit-usize-gap) and a
runtime one (limit_usize_gap) to guard the changes.
When build-time
config is enabled, some minor CPU overhead is expected because usize
will be stored and accessed apart from index.  When runtime option is
also enabled (it can only be enabled with the build-time config
enabled). a new usize calculation approach wil be employed.  This new
calculation will ceil size to the closest multiple of PAGE for all sizes
larger than USIZE_GROW_SLOW_THRESHOLD instead of using the size classes.
Note when the build-time config is enabled, the runtime option is
default on.

2. Prepare tcache for size to grow by PAGE over GROUP*PAGE.
To prepare for the upcoming changes where size class grows by PAGE when
larger than NGROUP * PAGE, disable the tcache when it is larger than 2 *
NGROUP * PAGE. The threshold for tcache is set higher to prevent perf
regression as much as possible while usizes between NGROUP * PAGE and 2 *
NGROUP * PAGE happen to grow by PAGE.

3. Prepare pac and hpa psset for size to grow by PAGE over GROUP*PAGE
For PAC, to avoid having too many bins, arena bins still have the same
layout.  This means some extra search is needed for a page-level request that
is not aligned with the orginal size class: it should also search the heap
before the current index since the previous heap might also be able to
have some allocations satisfying it.  The same changes apply to HPA's
psset.
This search relies on the enumeration of the heap because not all allocs in
the previous heap are guaranteed to satisfy the request.  To balance the
memory and CPU overhead, we currently enumerate at most a fixed number
of nodes before concluding none can satisfy the request during an
enumeration.

4. Add bytes counter to arena large stats.
To prepare for the upcoming usize changes, stats collected by
multiplying alive allocations and the bin size is no longer accurate.
Thus, add separate counters to record the bytes malloced and dalloced.

5. Change structs use when freeing to avoid using index2size for large sizes.
  - Change the definition of emap_alloc_ctx_t
  - Change the read of both from edata_t.
  - Change the assignment and usage of emap_alloc_ctx_t.
  - Change other callsites of index2size.
Note for the changes in the data structure, i.e., emap_alloc_ctx_t,
will be used when the build-time config (--enable-limit-usize-gap) is
enabled but they will store the same value as index2size(szind) if the
runtime option (opt_limit_usize_gap) is not enabled.

6. Adapt hpa to the usize changes.
Change the settings in sec to limit is usage for sizes larger than
USIZE_GROW_SLOW_THRESHOLD and modify corresponding tests.

7. Modify usize calculation and corresponding tests.
Change the sz_s2u_compute. Note sz_index2size is not always safe now
while sz_size2index still works as expected.
---
 configure.ac                                  |  19 +++
 include/jemalloc/internal/arena_inlines_b.h   |  52 +++++---
 include/jemalloc/internal/arena_stats.h       |  10 +-
 include/jemalloc/internal/edata.h             |  61 ++++++++-
 include/jemalloc/internal/emap.h              |  70 +++++++++-
 include/jemalloc/internal/hpdata.h            |   8 +-
 .../internal/jemalloc_internal_defs.h.in      |   6 +
 .../internal/jemalloc_internal_externs.h      |   1 +
 .../internal/jemalloc_internal_inlines_c.h    |   5 +-
 .../jemalloc/internal/jemalloc_preamble.h.in  |   8 ++
 include/jemalloc/internal/ph.h                | 120 +++++++++++++++++-
 include/jemalloc/internal/sc.h                |  18 +++
 include/jemalloc/internal/sz.h                |  47 ++++++-
 include/jemalloc/internal/tcache_types.h      |   6 +-
 src/arena.c                                   |  28 +++-
 src/ctl.c                                     |   8 +-
 src/eset.c                                    | 119 ++++++++++++++++-
 src/hpa.c                                     |   2 +-
 src/jemalloc.c                                |  58 +++++++--
 src/prof_data.c                               |   8 +-
 src/psset.c                                   |  34 ++++-
 src/sec.c                                     |   7 +
 src/tcache.c                                  |   3 +-
 test/integration/rallocx.c                    |   2 +-
 test/test.sh.in                               |   1 +
 test/unit/arena_reset.c                       |   3 +-
 test/unit/hpa.c                               |   2 +-
 test/unit/mallctl.c                           |   1 +
 test/unit/ph.c                                |  19 ++-
 test/unit/sec.c                               |   3 +-
 test/unit/size_classes.c                      |  38 +++++-
 test/unit/size_classes.sh                     |   5 +
 test/unit/stats.c                             |  15 ++-
 33 files changed, 713 insertions(+), 74 deletions(-)
 create mode 100644 test/unit/size_classes.sh

diff --git a/configure.ac b/configure.ac
index b01ff56b..a55a5a08 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2732,6 +2732,24 @@ if test "x${have_pthread}" = "x1" -a "x${je_cv_os_unfair_lock}" != "xyes" -a \
   AC_DEFINE([JEMALLOC_BACKGROUND_THREAD], [ ], [ ])
 fi
 
+dnl ============================================================================
+dnl Limit the gap between two contiguous usizes to be at most PAGE.
+AC_ARG_ENABLE([limit_usize_gap],
+  [AS_HELP_STRING([--enable-limit-usize-gap],
+                  [Limit the gap between two contiguous usizes])],
+[if test "x$limit_usize_gap" = "xno" ; then
+  limit_usize_gap="0"
+else
+  limit_usize_gap="1"
+fi
+],
+[limit_usize_gap="0"]
+)
+if test "x$limit_usize_gap" = "x1" ; then
+  AC_DEFINE([LIMIT_USIZE_GAP], [ ])
+fi
+AC_SUBST([limit_usize_gap])
+
 dnl ============================================================================
 dnl Check for glibc malloc hooks
 
@@ -2997,4 +3015,5 @@ AC_MSG_RESULT([cxx                : ${enable_cxx}])
 AC_MSG_RESULT([dss                : ${enable_dss}])
 AC_MSG_RESULT([tsan               : ${enable_tsan}])
 AC_MSG_RESULT([ubsan              : ${enable_ubsan}])
+AC_MSG_RESULT([limit-usize-gap    : ${limit_usize_gap}])
 AC_MSG_RESULT([===============================================================================])
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index ea246cc5..108493f2 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -51,7 +51,7 @@ arena_choose_maybe_huge(tsd_t *tsd, arena_t *arena, size_t size) {
 }
 
 JEMALLOC_ALWAYS_INLINE bool
-large_dalloc_safety_checks(edata_t *edata, const void *ptr, szind_t szind) {
+large_dalloc_safety_checks(edata_t *edata, const void *ptr, size_t input_size) {
 	if (!config_opt_safety_checks) {
 		return false;
 	}
@@ -68,7 +68,6 @@ large_dalloc_safety_checks(edata_t *edata, const void *ptr, szind_t szind) {
 		    "possibly caused by double free bugs.", ptr);
 		return true;
 	}
-	size_t input_size = sz_index2size(szind);
 	if (unlikely(input_size != edata_usize_get(edata))) {
 		safety_check_fail_sized_dealloc(/* current_dealloc */ true, ptr,
 		    /* true_size */ edata_usize_get(edata), input_size);
@@ -101,9 +100,10 @@ arena_prof_info_get(tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx,
 	if (unlikely(!is_slab)) {
 		/* edata must have been initialized at this point. */
 		assert(edata != NULL);
+		size_t usize = (alloc_ctx == NULL)? edata_usize_get(edata):
+		    emap_alloc_ctx_usize_get(alloc_ctx);
 		if (reset_recent &&
-		    large_dalloc_safety_checks(edata, ptr,
-		    edata_szind_get(edata))) {
+		    large_dalloc_safety_checks(edata, ptr, usize)) {
 			prof_info->alloc_tctx = PROF_TCTX_SENTINEL;
 			return;
 		}
@@ -225,7 +225,7 @@ arena_salloc(tsdn_t *tsdn, const void *ptr) {
 	emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr, &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
 
-	return sz_index2size(alloc_ctx.szind);
+	return emap_alloc_ctx_usize_get(&alloc_ctx);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
@@ -256,17 +256,24 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
 
 	assert(full_alloc_ctx.szind != SC_NSIZES);
 
-	return sz_index2size(full_alloc_ctx.szind);
+	return edata_usize_get(full_alloc_ctx.edata);
 }
 
 static inline void
-arena_dalloc_large_no_tcache(tsdn_t *tsdn, void *ptr, szind_t szind) {
+arena_dalloc_large_no_tcache(tsdn_t *tsdn, void *ptr, szind_t szind,
+    size_t usize) {
+	/*
+	 * szind is still needed in this function mainly becuase
+	 * szind < SC_NBINS determines not only if this is a small alloc,
+	 * but also if szind is valid (an inactive extent would have
+	 * szind == SC_NSIZES).
+	 */
 	if (config_prof && unlikely(szind < SC_NBINS)) {
 		arena_dalloc_promoted(tsdn, ptr, NULL, true);
 	} else {
 		edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
 		    ptr);
-		if (large_dalloc_safety_checks(edata, ptr, szind)) {
+		if (large_dalloc_safety_checks(edata, ptr, usize)) {
 			/* See the comment in isfree. */
 			return;
 		}
@@ -287,19 +294,22 @@ arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
 		assert(alloc_ctx.szind == edata_szind_get(edata));
 		assert(alloc_ctx.szind < SC_NSIZES);
 		assert(alloc_ctx.slab == edata_slab_get(edata));
+		assert(emap_alloc_ctx_usize_get(&alloc_ctx) ==
+		    edata_usize_get(edata));
 	}
 
 	if (likely(alloc_ctx.slab)) {
 		/* Small allocation. */
 		arena_dalloc_small(tsdn, ptr);
 	} else {
-		arena_dalloc_large_no_tcache(tsdn, ptr, alloc_ctx.szind);
+		arena_dalloc_large_no_tcache(tsdn, ptr, alloc_ctx.szind,
+		    emap_alloc_ctx_usize_get(&alloc_ctx));
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
 arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
-    bool slow_path) {
+    size_t usize, bool slow_path) {
 	assert (!tsdn_null(tsdn) && tcache != NULL);
 	bool is_sample_promoted = config_prof && szind < SC_NBINS;
 	if (unlikely(is_sample_promoted)) {
@@ -313,7 +323,7 @@ arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
 		} else {
 			edata_t *edata = emap_edata_lookup(tsdn,
 			    &arena_emap_global, ptr);
-			if (large_dalloc_safety_checks(edata, ptr, szind)) {
+			if (large_dalloc_safety_checks(edata, ptr, usize)) {
 				/* See the comment in isfree. */
 				return;
 			}
@@ -396,6 +406,8 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 		assert(alloc_ctx.szind == edata_szind_get(edata));
 		assert(alloc_ctx.szind < SC_NSIZES);
 		assert(alloc_ctx.slab == edata_slab_get(edata));
+		assert(emap_alloc_ctx_usize_get(&alloc_ctx) ==
+		    edata_usize_get(edata));
 	}
 
 	if (likely(alloc_ctx.slab)) {
@@ -407,7 +419,7 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 		    alloc_ctx.szind, slow_path);
 	} else {
 		arena_dalloc_large(tsdn, ptr, tcache, alloc_ctx.szind,
-		    slow_path);
+		    emap_alloc_ctx_usize_get(&alloc_ctx), slow_path);
 	}
 }
 
@@ -422,8 +434,9 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
 		 * There is no risk of being confused by a promoted sampled
 		 * object, so base szind and slab on the given size.
 		 */
-		alloc_ctx.szind = sz_size2index(size);
-		alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
+		szind_t szind = sz_size2index(size);
+		emap_alloc_ctx_init(&alloc_ctx, szind, (szind < SC_NBINS),
+		    size);
 	}
 
 	if ((config_prof && opt_prof) || config_debug) {
@@ -446,7 +459,8 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
 		/* Small allocation. */
 		arena_dalloc_small(tsdn, ptr);
 	} else {
-		arena_dalloc_large_no_tcache(tsdn, ptr, alloc_ctx.szind);
+		arena_dalloc_large_no_tcache(tsdn, ptr, alloc_ctx.szind,
+		    emap_alloc_ctx_usize_get(&alloc_ctx));
 	}
 }
 
@@ -469,6 +483,7 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 			emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr,
 			    &alloc_ctx);
 			assert(alloc_ctx.szind == sz_size2index(size));
+			assert(emap_alloc_ctx_usize_get(&alloc_ctx) == size);
 		} else {
 			alloc_ctx = *caller_alloc_ctx;
 		}
@@ -486,6 +501,11 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 		    ptr);
 		assert(alloc_ctx.szind == edata_szind_get(edata));
 		assert(alloc_ctx.slab == edata_slab_get(edata));
+		emap_alloc_ctx_init(&alloc_ctx, alloc_ctx.szind, alloc_ctx.slab,
+		    sz_s2u(size));
+		assert(!config_limit_usize_gap ||
+		    emap_alloc_ctx_usize_get(&alloc_ctx) ==
+		    edata_usize_get(edata));
 	}
 
 	if (likely(alloc_ctx.slab)) {
@@ -497,7 +517,7 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 		    alloc_ctx.szind, slow_path);
 	} else {
 		arena_dalloc_large(tsdn, ptr, tcache, alloc_ctx.szind,
-		    slow_path);
+		    sz_s2u(size), slow_path);
 	}
 }
 
diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 3d512630..7f075114 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -14,12 +14,18 @@ JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
 typedef struct arena_stats_large_s arena_stats_large_t;
 struct arena_stats_large_s {
 	/*
-	 * Total number of allocation/deallocation requests served directly by
-	 * the arena.
+	 * Total number of large allocation/deallocation requests served directly
+	 * by the arena.
 	 */
 	locked_u64_t	nmalloc;
 	locked_u64_t	ndalloc;
 
+	/*
+	 * Total large active bytes (allocated - deallocated) served directly
+	 * by the arena.
+	 */
+	locked_u64_t	active_bytes;
+
 	/*
 	 * Number of allocation requests that correspond to this size class.
 	 * This includes requests served by tcache, though tcache only
diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index 2381ccbc..b087ea31 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -21,6 +21,14 @@
  */
 #define EDATA_ALIGNMENT 128
 
+/*
+ * Defines how many nodes visited when enumerating the heap to search for
+ * qualifed extents.  More nodes visited may result in better choices at
+ * the cost of longer search time.  This size should not exceed 2^16 - 1
+ * because we use uint16_t for accessing the queue needed for enumeration.
+ */
+#define ESET_ENUMERATE_MAX_NUM 32
+
 enum extent_state_e {
 	extent_state_active   = 0,
 	extent_state_dirty    = 1,
@@ -89,8 +97,8 @@ struct edata_cmp_summary_s {
 
 /* Extent (span of pages).  Use accessor functions for e_* fields. */
 typedef struct edata_s edata_t;
-ph_structs(edata_avail, edata_t);
-ph_structs(edata_heap, edata_t);
+ph_structs(edata_avail, edata_t, ESET_ENUMERATE_MAX_NUM);
+ph_structs(edata_heap, edata_t, ESET_ENUMERATE_MAX_NUM);
 struct edata_s {
 	/*
 	 * Bitfield containing several fields:
@@ -281,7 +289,54 @@ edata_szind_get(const edata_t *edata) {
 
 static inline size_t
 edata_usize_get(const edata_t *edata) {
-	return sz_index2size(edata_szind_get(edata));
+	assert(edata != NULL);
+	/*
+	 * When sz_limit_usize_gap_enabled() is true, two cases:
+	 * 1. if usize_from_ind is not smaller than SC_LARGE_MINCLASS,
+	 * usize_from_size is accurate;
+	 * 2. otherwise, usize_from_ind is accurate.
+	 *
+	 * When sz_limit_usize_gap_enabled() is not true, the two should be the
+	 * same when usize_from_ind is not smaller than SC_LARGE_MINCLASS.
+	 *
+	 * Note sampled small allocs will be promoted.  Their extent size is
+	 * recorded in edata_size_get(edata), while their szind reflects the
+	 * true usize.  Thus, usize retrieved here is still accurate for
+	 * sampled small allocs.
+	 */
+	szind_t szind = edata_szind_get(edata);
+#ifdef JEMALLOC_JET
+	/*
+	 * Double free is invalid and results in undefined behavior.  However,
+	 * for double free tests to end gracefully, return an invalid usize
+	 * when szind shows the edata is not active, i.e., szind == SC_NSIZES.
+	 */
+	if (unlikely(szind == SC_NSIZES)) {
+		return SC_LARGE_MAXCLASS + 1;
+	}
+#endif
+
+	if (!sz_limit_usize_gap_enabled() || szind < SC_NBINS) {
+		size_t usize_from_ind = sz_index2size(szind);
+		if (!sz_limit_usize_gap_enabled() &&
+		    usize_from_ind >= SC_LARGE_MINCLASS) {
+			size_t size = (edata->e_size_esn & EDATA_SIZE_MASK);
+			assert(size > sz_large_pad);
+			size_t usize_from_size = size - sz_large_pad;
+			assert(usize_from_ind == usize_from_size);
+		}
+		return usize_from_ind;
+	}
+
+	size_t size = (edata->e_size_esn & EDATA_SIZE_MASK);
+	assert(size > sz_large_pad);
+	size_t usize_from_size = size - sz_large_pad;
+	/*
+	 * no matter limit-usize-gap enabled or not, usize retrieved from size
+	 * is not accurate when smaller than SC_LARGE_MINCLASS.
+	 */
+	assert(usize_from_size >= SC_LARGE_MINCLASS);
+	return usize_from_size;
 }
 
 static inline unsigned
diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 7ac0ae95..5885daa6 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -20,8 +20,9 @@ struct emap_s {
 };
 
 /* Used to pass rtree lookup context down the path. */
-typedef struct emap_alloc_ctx_t emap_alloc_ctx_t;
-struct emap_alloc_ctx_t {
+typedef struct emap_alloc_ctx_s emap_alloc_ctx_t;
+struct emap_alloc_ctx_s {
+	size_t usize;
 	szind_t szind;
 	bool slab;
 };
@@ -230,16 +231,66 @@ emap_edata_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr) {
 	return rtree_read(tsdn, &emap->rtree, rtree_ctx, (uintptr_t)ptr).edata;
 }
 
+JEMALLOC_ALWAYS_INLINE void
+emap_alloc_ctx_init(emap_alloc_ctx_t *alloc_ctx, szind_t szind, bool slab,
+    size_t usize) {
+	alloc_ctx->szind = szind;
+	alloc_ctx->slab = slab;
+	/*
+	 * When config_limit_usize_gap disabled, alloc_ctx->usize
+	 * should not be accessed.
+	 */
+	if (config_limit_usize_gap) {
+		alloc_ctx->usize = usize;
+		assert(sz_limit_usize_gap_enabled() ||
+		    usize == sz_index2size(szind));
+	} else if (config_debug) {
+		alloc_ctx->usize = SC_LARGE_MAXCLASS + 1;
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+emap_alloc_ctx_usize_get(emap_alloc_ctx_t *alloc_ctx) {
+	assert(alloc_ctx->szind < SC_NSIZES);
+	if (!config_limit_usize_gap || alloc_ctx->slab) {
+		assert(!config_limit_usize_gap ||
+		    alloc_ctx->usize == sz_index2size(alloc_ctx->szind));
+		return sz_index2size(alloc_ctx->szind);
+	}
+	assert(sz_limit_usize_gap_enabled() ||
+	    alloc_ctx->usize == sz_index2size(alloc_ctx->szind));
+	assert(alloc_ctx->usize <= SC_LARGE_MAXCLASS);
+	return alloc_ctx->usize;
+}
+
 /* Fills in alloc_ctx with the info in the map. */
 JEMALLOC_ALWAYS_INLINE void
 emap_alloc_ctx_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
     emap_alloc_ctx_t *alloc_ctx) {
 	EMAP_DECLARE_RTREE_CTX;
 
-	rtree_metadata_t metadata = rtree_metadata_read(tsdn, &emap->rtree,
-	    rtree_ctx, (uintptr_t)ptr);
-	alloc_ctx->szind = metadata.szind;
-	alloc_ctx->slab = metadata.slab;
+	if (config_limit_usize_gap) {
+		rtree_contents_t contents = rtree_read(tsdn, &emap->rtree,
+		    rtree_ctx, (uintptr_t)ptr);
+		/*
+		 * If the alloc is invalid, do not calculate usize since edata
+		 * could be corrupted.
+		 */
+		if (contents.metadata.szind == SC_NSIZES ||
+		    contents.edata == NULL) {
+			emap_alloc_ctx_init(alloc_ctx, contents.metadata.szind,
+			    contents.metadata.slab, 0);
+			return;
+		}
+		emap_alloc_ctx_init(alloc_ctx, contents.metadata.szind,
+		    contents.metadata.slab, edata_usize_get(contents.edata));
+	} else {
+		rtree_metadata_t metadata = rtree_metadata_read(tsdn,
+		    &emap->rtree, rtree_ctx, (uintptr_t)ptr);
+		/* alloc_ctx->usize will not be read/write in this case. */
+		emap_alloc_ctx_init(alloc_ctx, metadata.szind, metadata.slab,
+		    SC_LARGE_MAXCLASS + 1);
+	}
 }
 
 /* The pointer must be mapped. */
@@ -293,8 +344,15 @@ emap_alloc_ctx_try_lookup_fast(tsd_t *tsd, emap_t *emap, const void *ptr,
 	if (err) {
 		return true;
 	}
+	/*
+	 * Small allocs using the fastpath can always use index to get the
+	 * usize.  Therefore, do not set alloc_ctx->usize here.
+	 */
 	alloc_ctx->szind = metadata.szind;
 	alloc_ctx->slab = metadata.slab;
+	if (config_debug) {
+		alloc_ctx->usize = SC_LARGE_MAXCLASS + 1;
+	}
 	return false;
 }
 
diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 7ba92112..a8a845ec 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -20,8 +20,14 @@
  * an observable property of any given region of address space).  It's just
  * hugepage-sized and hugepage-aligned; it's *potentially* huge.
  */
+
+/*
+ * The max enumeration num should not exceed 2^16 - 1, see comments in edata.h
+ * for ESET_ENUMERATE_MAX_NUM for more details.
+ */
+#define PSSET_ENUMERATE_MAX_NUM 32
 typedef struct hpdata_s hpdata_t;
-ph_structs(hpdata_age_heap, hpdata_t);
+ph_structs(hpdata_age_heap, hpdata_t, PSSET_ENUMERATE_MAX_NUM);
 struct hpdata_s {
 	/*
 	 * We likewise follow the edata convention of mangling names and forcing
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 742d599d..e76eaaf4 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -475,6 +475,12 @@
 /* If defined, use __int128 for optimization. */
 #undef JEMALLOC_HAVE_INT128
 
+/*
+ * If defined, the gap between any two contiguous usizes should not exceed
+ * PAGE.
+ */
+#undef LIMIT_USIZE_GAP
+
 #include "jemalloc/internal/jemalloc_internal_overrides.h"
 
 #endif /* JEMALLOC_INTERNAL_DEFS_H_ */
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 2c6b58f7..8c6df450 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -39,6 +39,7 @@ extern atomic_zu_t zero_realloc_count;
 extern bool opt_cache_oblivious;
 extern unsigned opt_debug_double_free_max_scan;
 extern size_t opt_calloc_madvise_threshold;
+extern bool opt_limit_usize_gap;
 
 extern const char *opt_malloc_conf_symlink;
 extern const char *opt_malloc_conf_env_var;
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 854aec1e..c7ef9161 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -425,8 +425,9 @@ maybe_check_alloc_ctx(tsd_t *tsd, void *ptr, emap_alloc_ctx_t *alloc_ctx) {
                 if (alloc_ctx->szind != dbg_ctx.szind) {
                         safety_check_fail_sized_dealloc(
                             /* current_dealloc */ true, ptr,
-                            /* true_size */ sz_index2size(dbg_ctx.szind),
-                            /* input_size */ sz_index2size(alloc_ctx->szind));
+                            /* true_size */ emap_alloc_ctx_usize_get(&dbg_ctx),
+                            /* input_size */ emap_alloc_ctx_usize_get(
+                            alloc_ctx));
                         return true;
                 }
                 if (alloc_ctx->slab != dbg_ctx.slab) {
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index a59c3489..ef637a2d 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -276,4 +276,12 @@ static const bool have_memcntl =
 #endif
     ;
 
+static const bool config_limit_usize_gap =
+#ifdef LIMIT_USIZE_GAP
+    true
+#else
+    false
+#endif
+    ;
+
 #endif /* JEMALLOC_PREAMBLE_H */
diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index ef9634be..05376004 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -75,6 +75,16 @@ struct ph_s {
 	size_t auxcount;
 };
 
+typedef struct ph_enumerate_vars_s ph_enumerate_vars_t;
+struct ph_enumerate_vars_s {
+	uint16_t front;
+	uint16_t rear;
+	uint16_t queue_size;
+	uint16_t visited_num;
+	uint16_t max_visit_num;
+	uint16_t max_queue_size;
+};
+
 JEMALLOC_ALWAYS_INLINE phn_link_t *
 phn_link_get(void *phn, size_t offset) {
 	return (phn_link_t *)(((char *)phn) + offset);
@@ -414,14 +424,98 @@ ph_remove(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) {
 	}
 }
 
-#define ph_structs(a_prefix, a_type)					\
+JEMALLOC_ALWAYS_INLINE void
+ph_enumerate_vars_init(ph_enumerate_vars_t *vars, uint16_t max_visit_num,
+    uint16_t max_queue_size) {
+	vars->queue_size = 0;
+	vars->visited_num = 0;
+	vars->front = 0;
+	vars->rear = 0;
+	vars->max_visit_num = max_visit_num;
+	vars->max_queue_size = max_queue_size;
+	assert(vars->max_visit_num > 0);
+	/*
+	 * max_queue_size must be able to support max_visit_num, which means
+	 * the queue will not overflow before reaching max_visit_num.
+	 */
+	assert(vars->max_queue_size >= (vars->max_visit_num + 1)/2);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+ph_enumerate_queue_push(void *phn, void **bfs_queue,
+    ph_enumerate_vars_t *vars) {
+	assert(vars->queue_size < vars->max_queue_size);
+	bfs_queue[vars->rear] = phn;
+	vars->rear = (vars->rear + 1) % vars->max_queue_size;
+	(vars->queue_size) ++;
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+ph_enumerate_queue_pop(void **bfs_queue, ph_enumerate_vars_t *vars) {
+	assert(vars->queue_size > 0);
+	assert(vars->queue_size <= vars->max_queue_size);
+	void *ret = bfs_queue[vars->front];
+	vars->front = (vars->front + 1) % vars->max_queue_size;
+	(vars->queue_size) --;
+	return ret;
+}
+
+
+/*
+ * The two functions below offer a solution to enumerate the pairing heap.
+ * Whe enumerating, always call ph_enumerate_prepare first to prepare the queue
+ * needed for BFS.  Next, call ph_enumerate_next to get the next element in
+ * the enumeration.  When enumeration ends, ph_enumerate_next returns NULL and
+ * should not be called again.  Enumeration ends when all elements in the heap
+ * has been enumerated or the number of visited elements exceed
+ * max_visit_num.
+ */
+JEMALLOC_ALWAYS_INLINE void
+ph_enumerate_prepare(ph_t *ph, void **bfs_queue, ph_enumerate_vars_t *vars,
+    uint16_t max_visit_num, uint16_t max_queue_size) {
+	ph_enumerate_vars_init(vars, max_visit_num, max_queue_size);
+	ph_enumerate_queue_push(ph->root, bfs_queue, vars);
+}
+
+JEMALLOC_ALWAYS_INLINE void *
+ph_enumerate_next(ph_t *ph, size_t offset, void **bfs_queue,
+    ph_enumerate_vars_t *vars) {
+	if (vars->queue_size == 0) {
+		return NULL;
+	}
+
+	(vars->visited_num) ++;
+	if (vars->visited_num > vars->max_visit_num) {
+		return NULL;
+	}
+
+	void *ret = ph_enumerate_queue_pop(bfs_queue, vars);
+	assert(ret != NULL);
+	void *left = phn_lchild_get(ret, offset);
+	void *right = phn_next_get(ret, offset);
+	if (left) {
+		ph_enumerate_queue_push(left, bfs_queue, vars);
+	}
+	if (right) {
+		ph_enumerate_queue_push(right, bfs_queue, vars);
+	}
+	return ret;
+}
+
+#define ph_structs(a_prefix, a_type, a_max_queue_size)			\
 typedef struct {							\
 	phn_link_t link;						\
 } a_prefix##_link_t;							\
 									\
 typedef struct {							\
 	ph_t ph;							\
-} a_prefix##_t;
+} a_prefix##_t;								\
+									\
+typedef struct {							\
+	void *bfs_queue[a_max_queue_size];				\
+	ph_enumerate_vars_t vars;					\
+} a_prefix##_enumerate_helper_t;
+
 
 /*
  * The ph_proto() macro generates function prototypes that correspond to the
@@ -436,7 +530,12 @@ a_attr a_type *a_prefix##_any(a_prefix##_t *ph);			\
 a_attr void a_prefix##_insert(a_prefix##_t *ph, a_type *phn);		\
 a_attr a_type *a_prefix##_remove_first(a_prefix##_t *ph);		\
 a_attr void a_prefix##_remove(a_prefix##_t *ph, a_type *phn);		\
-a_attr a_type *a_prefix##_remove_any(a_prefix##_t *ph);
+a_attr a_type *a_prefix##_remove_any(a_prefix##_t *ph);			\
+a_attr void a_prefix##_enumerate_prepare(a_prefix##_t *ph,		\
+    a_prefix##_enumerate_helper_t *helper, uint16_t max_visit_num,	\
+    uint16_t max_queue_size);						\
+a_attr a_type *a_prefix##_enumerate_next(a_prefix##_t *ph,		\
+    a_prefix##_enumerate_helper_t *helper);
 
 /* The ph_gen() macro generates a type-specific pairing heap implementation. */
 #define ph_gen(a_attr, a_prefix, a_type, a_field, a_cmp)		\
@@ -491,6 +590,21 @@ a_prefix##_remove_any(a_prefix##_t *ph) {				\
 		a_prefix##_remove(ph, ret);				\
 	}								\
 	return ret;							\
+}									\
+									\
+a_attr void								\
+a_prefix##_enumerate_prepare(a_prefix##_t *ph,				\
+    a_prefix##_enumerate_helper_t *helper, uint16_t max_visit_num,	\
+    uint16_t max_queue_size) {						\
+	ph_enumerate_prepare(&ph->ph, helper->bfs_queue, &helper->vars,	\
+	    max_visit_num, max_queue_size);				\
+}									\
+									\
+a_attr a_type *								\
+a_prefix##_enumerate_next(a_prefix##_t *ph,				\
+    a_prefix##_enumerate_helper_t *helper) {				\
+	return ph_enumerate_next(&ph->ph, offsetof(a_type, a_field),	\
+	    helper->bfs_queue, &helper->vars);				\
 }
 
 #endif /* JEMALLOC_INTERNAL_PH_H */
diff --git a/include/jemalloc/internal/sc.h b/include/jemalloc/internal/sc.h
index 770835cc..098e47b7 100644
--- a/include/jemalloc/internal/sc.h
+++ b/include/jemalloc/internal/sc.h
@@ -286,6 +286,24 @@
 #  endif
 #endif
 
+/*
+ * When config_limit_usize_gap is enabled, the gaps between two contiguous
+ * size classes should not exceed PAGE.  This means there should be no concept
+ * of size classes for sizes > SC_SMALL_MAXCLASS (or >= SC_LARGE_MINCLASS).
+ * However, between SC_LARGE_MINCLASS (SC_NGROUP * PAGE) and
+ * 2 * SC_NGROUP * PAGE, the size class also happens to be aligned with PAGE.
+ * Since tcache relies on size classes to work and it greatly increases the
+ * perf of allocs & deallocs, we extend the existence of size class to
+ * 2 * SC_NGROUP * PAGE ONLY for the tcache module.  This means for all other
+ * modules, there is no size class for sizes >= SC_LARGE_MINCLASS.  Yet for
+ * tcache, the threshold is moved up to 2 * SC_NGROUP * PAGE, which is
+ * USIZE_GROW_SLOW_THRESHOLD defined below.  With the default SC_NGROUP being
+ * 2, and PAGE being 4KB, the threshold for tcache (USIZE_GROW_SLOW_THRESHOLD)
+ * is 32KB.
+ */
+#define LG_USIZE_GROW_SLOW_THRESHOLD (SC_LG_NGROUP + LG_PAGE + 1)
+#define USIZE_GROW_SLOW_THRESHOLD (1U << LG_USIZE_GROW_SLOW_THRESHOLD)
+
 #define SC_SLAB_MAXREGS (1U << SC_LG_SLAB_MAXREGS)
 
 typedef struct sc_s sc_t;
diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index a2d2debc..6c0a1f0c 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -54,6 +54,15 @@ extern size_t sz_large_pad;
 
 extern void sz_boot(const sc_data_t *sc_data, bool cache_oblivious);
 
+JEMALLOC_ALWAYS_INLINE bool
+sz_limit_usize_gap_enabled() {
+#ifdef LIMIT_USIZE_GAP
+	return opt_limit_usize_gap;
+#else
+	return false;
+#endif
+}
+
 JEMALLOC_ALWAYS_INLINE pszind_t
 sz_psz2ind(size_t psz) {
 	assert(psz > 0);
@@ -257,11 +266,34 @@ sz_index2size_lookup(szind_t index) {
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
-sz_index2size(szind_t index) {
+sz_index2size_unsafe(szind_t index) {
 	assert(index < SC_NSIZES);
 	return sz_index2size_lookup(index);
 }
 
+JEMALLOC_ALWAYS_INLINE size_t
+sz_index2size(szind_t index) {
+	assert(!sz_limit_usize_gap_enabled() ||
+	    index <= sz_size2index(USIZE_GROW_SLOW_THRESHOLD));
+	size_t size = sz_index2size_unsafe(index);
+	/*
+	 * With limit_usize_gap enabled, the usize above
+	 * SC_LARGE_MINCLASS should grow by PAGE.  However, for sizes
+	 * in [SC_LARGE_MINCLASS, USIZE_GROW_SLOW_THRESHOLD], the
+	 * usize would not change because the size class gap in this
+	 * range is just the same as PAGE.  Although we use
+	 * SC_LARGE_MINCLASS as the threshold in most places, we
+	 * allow tcache and sec to cache up to
+	 * USIZE_GROW_SLOW_THRESHOLD to minimize the side effect of
+	 * not having size classes for larger sizes.  Thus, we assert
+	 * the size is no larger than USIZE_GROW_SLOW_THRESHOLD here
+	 * instead of SC_LARGE_MINCLASS.
+	 */
+	assert(!sz_limit_usize_gap_enabled() ||
+	    size <= USIZE_GROW_SLOW_THRESHOLD);
+	return size;
+}
+
 JEMALLOC_ALWAYS_INLINE void
 sz_size2index_usize_fastpath(size_t size, szind_t *ind, size_t *usize) {
 	if (util_compile_time_const(size)) {
@@ -296,7 +328,7 @@ sz_s2u_compute(size_t size) {
 		    (ZU(1) << lg_ceil));
 	}
 #endif
-	{
+	if (size <= SC_SMALL_MAXCLASS || !sz_limit_usize_gap_enabled()) {
 		size_t x = lg_floor((size<<1)-1);
 		size_t lg_delta = (x < SC_LG_NGROUP + LG_QUANTUM + 1)
 		    ?  LG_QUANTUM : x - SC_LG_NGROUP - 1;
@@ -304,11 +336,22 @@ sz_s2u_compute(size_t size) {
 		size_t delta_mask = delta - 1;
 		size_t usize = (size + delta_mask) & ~delta_mask;
 		return usize;
+	} else {
+		/*
+		 * With sz_limit_usize_gap_enabled() == true, usize of a large
+		 * allocation is calculated by ceiling size to the smallest
+		 * multiple of PAGE to minimize the memory overhead, especially
+		 * when using hugepages.
+		 */
+		size_t usize = PAGE_CEILING(size);
+		assert(usize - size < PAGE);
+		return usize;
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
 sz_s2u_lookup(size_t size) {
+	assert(!config_limit_usize_gap || size < SC_LARGE_MINCLASS);
 	size_t ret = sz_index2size_lookup(sz_size2index_lookup(size));
 
 	assert(ret == sz_s2u_compute(size));
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index eebad79f..f13ff748 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -19,7 +19,11 @@ typedef struct tcaches_s tcaches_t;
 /* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 #define TCACHES_ELM_NEED_REINIT ((tcache_t *)(uintptr_t)1)
 
-#define TCACHE_LG_MAXCLASS_LIMIT 23 /* tcache_max = 8M */
+#ifdef LIMIT_USIZE_GAP
+    #define TCACHE_LG_MAXCLASS_LIMIT LG_USIZE_GROW_SLOW_THRESHOLD
+#else
+    #define TCACHE_LG_MAXCLASS_LIMIT 23 /* tcache_max = 8M */
+#endif
 #define TCACHE_MAXCLASS_LIMIT ((size_t)1 << TCACHE_LG_MAXCLASS_LIMIT)
 #define TCACHE_NBINS_MAX (SC_NBINS + SC_NGROUP *			\
     (TCACHE_LG_MAXCLASS_LIMIT - SC_LG_LARGE_MINCLASS) + 1)
diff --git a/src/arena.c b/src/arena.c
index ab6006d7..54ecc403 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -145,8 +145,18 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		assert(nmalloc - ndalloc <= SIZE_T_MAX);
 		size_t curlextents = (size_t)(nmalloc - ndalloc);
 		lstats[i].curlextents += curlextents;
-		astats->allocated_large +=
-		    curlextents * sz_index2size(SC_NBINS + i);
+
+		if (config_limit_usize_gap) {
+			uint64_t active_bytes = locked_read_u64(tsdn,
+			    LOCKEDINT_MTX(arena->stats.mtx),
+			    &arena->stats.lstats[i].active_bytes);
+			locked_inc_u64_unsynchronized(
+			    &lstats[i].active_bytes, active_bytes);
+			astats->allocated_large += active_bytes;
+		} else {
+			astats->allocated_large +=
+			    curlextents * sz_index2size(SC_NBINS + i);
+		}
 	}
 
 	pa_shard_stats_merge(tsdn, &arena->pa_shard, &astats->pa_shard_stats,
@@ -315,6 +325,11 @@ arena_large_malloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 			&arena->stats.lstats[hindex].nmalloc, 1);
+		if (config_limit_usize_gap) {
+			locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
+			    &arena->stats.lstats[hindex].active_bytes,
+			    usize);
+		}
 		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
 }
@@ -338,6 +353,11 @@ arena_large_dalloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 			&arena->stats.lstats[hindex].ndalloc, 1);
+		if (config_limit_usize_gap) {
+			locked_dec_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
+			    &arena->stats.lstats[hindex].active_bytes,
+			    usize);
+		}
 		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
 }
@@ -802,7 +822,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 		assert(alloc_ctx.szind != SC_NSIZES);
 
 		if (config_stats || (config_prof && opt_prof)) {
-			usize = sz_index2size(alloc_ctx.szind);
+			usize = emap_alloc_ctx_usize_get(&alloc_ctx);
 			assert(usize == isalloc(tsd_tsdn(tsd), ptr));
 		}
 		/* Remove large allocation from prof sample set. */
@@ -1346,7 +1366,7 @@ arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
 		assert(sz_can_use_slab(size));
 		return arena_malloc_small(tsdn, arena, ind, zero);
 	} else {
-		return large_malloc(tsdn, arena, sz_index2size(ind), zero);
+		return large_malloc(tsdn, arena, sz_s2u(size), zero);
 	}
 }
 
diff --git a/src/ctl.c b/src/ctl.c
index 1ebcbf8e..73d4cb66 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -168,6 +168,7 @@ CTL_PROTO(opt_prof_sys_thread_name)
 CTL_PROTO(opt_prof_time_res)
 CTL_PROTO(opt_lg_san_uaf_align)
 CTL_PROTO(opt_zero_realloc)
+CTL_PROTO(opt_limit_usize_gap)
 CTL_PROTO(opt_malloc_conf_symlink)
 CTL_PROTO(opt_malloc_conf_env_var)
 CTL_PROTO(opt_malloc_conf_global_var)
@@ -557,6 +558,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("zero_realloc"),	CTL(opt_zero_realloc)},
 	{NAME("debug_double_free_max_scan"),
 		CTL(opt_debug_double_free_max_scan)},
+	{NAME("limit_usize_gap"),	CTL(opt_limit_usize_gap)},
 	{NAME("malloc_conf"),	CHILD(named, opt_malloc_conf)}
 };
 
@@ -2341,6 +2343,8 @@ CTL_RO_NL_CGEN(config_uaf_detection, opt_lg_san_uaf_align,
     opt_lg_san_uaf_align, ssize_t)
 CTL_RO_NL_GEN(opt_zero_realloc,
     zero_realloc_mode_names[opt_zero_realloc_action], const char *)
+CTL_RO_NL_CGEN(config_limit_usize_gap, opt_limit_usize_gap, opt_limit_usize_gap,
+    bool)
 
 /* malloc_conf options */
 CTL_RO_NL_CGEN(opt_malloc_conf_symlink, opt_malloc_conf_symlink,
@@ -3364,8 +3368,8 @@ arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib,
 }
 
 CTL_RO_NL_GEN(arenas_nlextents, SC_NSIZES - SC_NBINS, unsigned)
-CTL_RO_NL_GEN(arenas_lextent_i_size, sz_index2size(SC_NBINS+(szind_t)mib[2]),
-    size_t)
+CTL_RO_NL_GEN(arenas_lextent_i_size,
+    sz_index2size_unsafe(SC_NBINS+(szind_t)mib[2]), size_t)
 static const ctl_named_node_t *
 arenas_lextent_i_index(tsdn_t *tsdn, const size_t *mib,
     size_t miblen, size_t i) {
diff --git a/src/eset.c b/src/eset.c
index 6f8f335e..7dc9cce7 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -155,6 +155,71 @@ eset_remove(eset_t *eset, edata_t *edata) {
 	    cur_extents_npages - (size >> LG_PAGE), ATOMIC_RELAXED);
 }
 
+edata_t *
+eset_enumerate_alignment_search(eset_t *eset, size_t size, pszind_t bin_ind,
+    size_t alignment) {
+	if (edata_heap_empty(&eset->bins[bin_ind].heap)) {
+		return NULL;
+	}
+
+	edata_t *edata = NULL;
+	edata_heap_enumerate_helper_t helper;
+	edata_heap_enumerate_prepare(&eset->bins[bin_ind].heap, &helper,
+	    ESET_ENUMERATE_MAX_NUM, sizeof(helper.bfs_queue)/sizeof(void *));
+	while ((edata =
+	    edata_heap_enumerate_next(&eset->bins[bin_ind].heap, &helper)) !=
+	    NULL) {
+		uintptr_t base = (uintptr_t)edata_base_get(edata);
+		size_t candidate_size = edata_size_get(edata);
+		if (candidate_size < size) {
+			continue;
+		}
+
+		uintptr_t next_align = ALIGNMENT_CEILING((uintptr_t)base,
+		    PAGE_CEILING(alignment));
+		if (base > next_align || base + candidate_size <= next_align) {
+			/* Overflow or not crossing the next alignment. */
+			continue;
+		}
+
+		size_t leadsize = next_align - base;
+		if (candidate_size - leadsize >= size) {
+			return edata;
+		}
+	}
+
+	return NULL;
+}
+
+edata_t *
+eset_enumerate_search(eset_t *eset, size_t size, pszind_t bin_ind,
+    bool exact_only, edata_cmp_summary_t *ret_summ) {
+	if (edata_heap_empty(&eset->bins[bin_ind].heap)) {
+		return NULL;
+	}
+
+	edata_t *ret = NULL, *edata = NULL;
+	edata_heap_enumerate_helper_t helper;
+	edata_heap_enumerate_prepare(&eset->bins[bin_ind].heap, &helper,
+	    ESET_ENUMERATE_MAX_NUM, sizeof(helper.bfs_queue)/sizeof(void *));
+	while ((edata =
+	    edata_heap_enumerate_next(&eset->bins[bin_ind].heap, &helper)) !=
+	    NULL) {
+		if ((!exact_only && edata_size_get(edata) >= size) ||
+		    (exact_only && edata_size_get(edata) == size)) {
+			edata_cmp_summary_t temp_summ =
+			    edata_cmp_summary_get(edata);
+			if (ret == NULL || edata_cmp_summary_comp(temp_summ,
+			    *ret_summ) < 0) {
+				ret = edata;
+				*ret_summ = temp_summ;
+			}
+		}
+	}
+
+	return ret;
+}
+
 /*
  * Find an extent with size [min_size, max_size) to satisfy the alignment
  * requirement.  For each size, try only the first extent in the heap.
@@ -162,8 +227,19 @@ eset_remove(eset_t *eset, edata_t *edata) {
 static edata_t *
 eset_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
     size_t alignment) {
-        pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(min_size));
-        pszind_t pind_max = sz_psz2ind(sz_psz_quantize_ceil(max_size));
+	pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(min_size));
+	pszind_t pind_max = sz_psz2ind(sz_psz_quantize_ceil(max_size));
+
+	/* See comments in eset_first_fit for why we enumerate search below. */
+	pszind_t pind_prev = sz_psz2ind(sz_psz_quantize_floor(min_size));
+	if (sz_limit_usize_gap_enabled() && pind != pind_prev) {
+		edata_t *ret = NULL;
+		ret = eset_enumerate_alignment_search(eset, min_size, pind_prev,
+		    alignment);
+		if (ret != NULL) {
+			return ret;
+		}
+	}
 
 	for (pszind_t i =
 	    (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)pind);
@@ -211,8 +287,43 @@ eset_first_fit(eset_t *eset, size_t size, bool exact_only,
 	pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(size));
 
 	if (exact_only) {
-		return edata_heap_empty(&eset->bins[pind].heap) ? NULL :
-		    edata_heap_first(&eset->bins[pind].heap);
+		if (sz_limit_usize_gap_enabled()) {
+			pszind_t pind_prev =
+			    sz_psz2ind(sz_psz_quantize_floor(size));
+			return eset_enumerate_search(eset, size, pind_prev,
+			    /* exact_only */ true, &ret_summ);
+		} else {
+			return edata_heap_empty(&eset->bins[pind].heap) ? NULL:
+			    edata_heap_first(&eset->bins[pind].heap);
+		}
+	}
+
+	/*
+	 * Each element in the eset->bins is a heap corresponding to a size
+	 * class.  When sz_limit_usize_gap_enabled() is false, all heaps after
+	 * pind (including pind itself) will surely satisfy the rquests while
+	 * heaps before pind cannot satisfy the request because usize is
+	 * calculated based on size classes then.  However, when
+	 * sz_limit_usize_gap_enabled() is true, usize is calculated by ceiling
+	 * user requested size to the closest multiple of PAGE.  This means in
+	 * the heap before pind, i.e., pind_prev, there may exist extents able
+	 * to satisfy the request and we should enumerate the heap when
+	 * pind_prev != pind.
+	 *
+	 * For example, when PAGE=4KB and the user requested size is 1MB + 4KB,
+	 * usize would be 1.25MB when sz_limit_usize_gap_enabled() is false.
+	 * pind points to the heap containing extents ranging in
+	 * [1.25MB, 1.5MB).  Thus, searching starting from pind will not miss
+	 * any candidates.  When sz_limit_usize_gap_enabled() is true, the
+	 * usize would be 1MB + 4KB and pind still points to the same heap.
+	 * In this case, the heap pind_prev points to, which contains extents
+	 * in the range [1MB, 1.25MB), may contain candidates satisfying the
+	 * usize and thus should be enumerated.
+	 */
+	pszind_t pind_prev = sz_psz2ind(sz_psz_quantize_floor(size));
+	if (sz_limit_usize_gap_enabled() && pind != pind_prev){
+		ret = eset_enumerate_search(eset, size, pind_prev,
+		    /* exact_only */ false, &ret_summ);
 	}
 
 	for (pszind_t i =
diff --git a/src/hpa.c b/src/hpa.c
index 932cf201..2a5d7e1f 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -706,7 +706,7 @@ hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
     bool *deferred_work_generated) {
 	assert(size <= HUGEPAGE);
 	assert(size <= shard->opts.slab_max_alloc ||
-	    size == sz_index2size(sz_size2index(size)));
+	    size == sz_s2u(size));
 	bool oom = false;
 
 	size_t nsuccess = hpa_try_alloc_batch_no_grow(tsdn, shard, size, &oom,
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 31d4cb27..67456bb7 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -123,6 +123,13 @@ zero_realloc_action_t opt_zero_realloc_action =
 
 atomic_zu_t zero_realloc_count = ATOMIC_INIT(0);
 
+bool opt_limit_usize_gap =
+#ifdef LIMIT_USIZE_GAP
+    true;
+#else
+    false;
+#endif
+
 const char *const zero_realloc_mode_names[] = {
 	"alloc",
 	"free",
@@ -1578,8 +1585,8 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    "hpa_sec_nshards", 0, 0, CONF_CHECK_MIN,
 			    CONF_DONT_CHECK_MAX, true);
 			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.max_alloc,
-			    "hpa_sec_max_alloc", PAGE, 0, CONF_CHECK_MIN,
-			    CONF_DONT_CHECK_MAX, true);
+			    "hpa_sec_max_alloc", PAGE, USIZE_GROW_SLOW_THRESHOLD,
+			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
 			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.max_bytes,
 			    "hpa_sec_max_bytes", PAGE, 0, CONF_CHECK_MIN,
 			    CONF_DONT_CHECK_MAX, true);
@@ -1763,6 +1770,11 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    "san_guard_large", 0, SIZE_T_MAX,
 			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false)
 
+			if (config_limit_usize_gap) {
+				CONF_HANDLE_BOOL(opt_limit_usize_gap,
+				    "limit_usize_gap");
+			}
+
 			CONF_ERROR("Invalid conf pair", k, klen, v, vlen);
 #undef CONF_ERROR
 #undef CONF_CONTINUE
@@ -2182,6 +2194,17 @@ static bool
 malloc_init_hard(void) {
 	tsd_t *tsd;
 
+	if (config_limit_usize_gap) {
+		assert(TCACHE_MAXCLASS_LIMIT <= USIZE_GROW_SLOW_THRESHOLD);
+		assert(SC_LOOKUP_MAXCLASS <= USIZE_GROW_SLOW_THRESHOLD);
+		/*
+		 * This asserts an extreme case where TINY_MAXCLASS is larger
+		 * than LARGE_MINCLASS.  It could only happen if some constants
+		 * are configured miserably wrong.
+		 */
+		assert(SC_LG_TINY_MAXCLASS <=
+		    (size_t)1ULL << (LG_PAGE + SC_LG_NGROUP));
+	}
 #if defined(_WIN32) && _WIN32_WINNT < 0x0600
 	_init_init_lock();
 #endif
@@ -2376,7 +2399,8 @@ aligned_usize_get(size_t size, size_t alignment, size_t *usize, szind_t *ind,
 			if (unlikely(*ind >= SC_NSIZES)) {
 				return true;
 			}
-			*usize = sz_index2size(*ind);
+			*usize = sz_limit_usize_gap_enabled()? sz_s2u(size):
+			    sz_index2size(*ind);
 			assert(*usize > 0 && *usize <= SC_LARGE_MAXCLASS);
 			return false;
 		}
@@ -2924,7 +2948,7 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 	    &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
 
-	size_t usize = sz_index2size(alloc_ctx.szind);
+	size_t usize = emap_alloc_ctx_usize_get(&alloc_ctx);
 	if (config_prof && opt_prof) {
 		prof_free(tsd, ptr, usize, &alloc_ctx);
 	}
@@ -2956,35 +2980,41 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	emap_alloc_ctx_t alloc_ctx;
+	szind_t szind = sz_size2index(usize);
 	if (!config_prof) {
-		alloc_ctx.szind = sz_size2index(usize);
-		alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
+		emap_alloc_ctx_init(&alloc_ctx, szind, (szind < SC_NBINS),
+		    usize);
 	} else {
 		if (likely(!prof_sample_aligned(ptr))) {
 			/*
 			 * When the ptr is not page aligned, it was not sampled.
 			 * usize can be trusted to determine szind and slab.
 			 */
-			alloc_ctx.szind = sz_size2index(usize);
-			alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
+			emap_alloc_ctx_init(&alloc_ctx, szind,
+			    (szind < SC_NBINS), usize);
 		} else if (opt_prof) {
+			/*
+			 * Small sampled allocs promoted can still get correct
+			 * usize here.  Check comments in edata_usize_get.
+			 */
 			emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global,
 			    ptr, &alloc_ctx);
 
 			if (config_opt_safety_checks) {
 				/* Small alloc may have !slab (sampled). */
+				size_t true_size =
+				    emap_alloc_ctx_usize_get(&alloc_ctx);
 				if (unlikely(alloc_ctx.szind !=
 				    sz_size2index(usize))) {
 					safety_check_fail_sized_dealloc(
 					    /* current_dealloc */ true, ptr,
-					    /* true_size */ sz_index2size(
-					    alloc_ctx.szind),
+					    /* true_size */ true_size,
 					    /* input_size */ usize);
 				}
 			}
 		} else {
-			alloc_ctx.szind = sz_size2index(usize);
-			alloc_ctx.slab = (alloc_ctx.szind < SC_NBINS);
+			emap_alloc_ctx_init(&alloc_ctx, szind,
+			    (szind < SC_NBINS), usize);
 		}
 	}
 	bool fail = maybe_check_alloc_ctx(tsd, ptr, &alloc_ctx);
@@ -3486,7 +3516,7 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 	emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
 	    &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
-	old_usize = sz_index2size(alloc_ctx.szind);
+	old_usize = emap_alloc_ctx_usize_get(&alloc_ctx);
 	assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
 	if (aligned_usize_get(size, alignment, &usize, NULL, false)) {
 		goto label_oom;
@@ -3756,7 +3786,7 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
 	    &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
-	old_usize = sz_index2size(alloc_ctx.szind);
+	old_usize = emap_alloc_ctx_usize_get(&alloc_ctx);
 	assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
 	/*
 	 * The API explicitly absolves itself of protecting against (size +
diff --git a/src/prof_data.c b/src/prof_data.c
index 39af0c90..437673ee 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -513,7 +513,13 @@ void prof_unbias_map_init(void) {
 	/* See the comment in prof_sample_new_event_wait */
 #ifdef JEMALLOC_PROF
 	for (szind_t i = 0; i < SC_NSIZES; i++) {
-		double sz = (double)sz_index2size(i);
+		/*
+		 * When limit_usize_gap is enabled, the unbiased calculation
+		 * here is not as accurate as it was because usize now changes
+		 * in a finer grain while the unbiased_sz is still calculated
+		 * using the old way.
+		 */
+		double sz = (double)sz_index2size_unsafe(i);
 		double rate = (double)(ZU(1) << lg_prof_sample);
 		double div_val = 1.0 - exp(-sz / rate);
 		double unbiased_sz = sz / div_val;
diff --git a/src/psset.c b/src/psset.c
index 9a833193..e617f426 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -337,18 +337,50 @@ psset_update_end(psset_t *psset, hpdata_t *ps) {
 	hpdata_assert_consistent(ps);
 }
 
+hpdata_t *
+psset_enumerate_search(psset_t *psset, pszind_t pind, size_t size) {
+	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
+		return NULL;
+	}
+
+	hpdata_t *ps = NULL;
+	hpdata_age_heap_enumerate_helper_t helper;
+	hpdata_age_heap_enumerate_prepare(&psset->pageslabs[pind], &helper,
+	    PSSET_ENUMERATE_MAX_NUM, sizeof(helper.bfs_queue) / sizeof(void *));
+
+	while ((ps = hpdata_age_heap_enumerate_next(&psset->pageslabs[pind],
+	    &helper))) {
+		if (hpdata_longest_free_range_get(ps) >= size) {
+			return ps;
+		}
+	}
+
+	return NULL;
+}
+
 hpdata_t *
 psset_pick_alloc(psset_t *psset, size_t size) {
 	assert((size & PAGE_MASK) == 0);
 	assert(size <= HUGEPAGE);
 
 	pszind_t min_pind = sz_psz2ind(sz_psz_quantize_ceil(size));
+	hpdata_t *ps = NULL;
+
+	/* See comments in eset_first_fit for why we enumerate search below. */
+	pszind_t pind_prev = sz_psz2ind(sz_psz_quantize_floor(size));
+	if (sz_limit_usize_gap_enabled() && pind_prev < min_pind) {
+		ps = psset_enumerate_search(psset, pind_prev, size);
+		if (ps != NULL) {
+			return ps;
+		}
+	}
+
 	pszind_t pind = (pszind_t)fb_ffs(psset->pageslab_bitmap, PSSET_NPSIZES,
 	    (size_t)min_pind);
 	if (pind == PSSET_NPSIZES) {
 		return hpdata_empty_list_first(&psset->empty);
 	}
-	hpdata_t *ps = hpdata_age_heap_first(&psset->pageslabs[pind]);
+	ps = hpdata_age_heap_first(&psset->pageslabs[pind]);
 	if (ps == NULL) {
 		return NULL;
 	}
diff --git a/src/sec.c b/src/sec.c
index 19d69ff4..8827d1bd 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -24,6 +24,13 @@ bool
 sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, pai_t *fallback,
     const sec_opts_t *opts) {
 	assert(opts->max_alloc >= PAGE);
+	/*
+	 * Same as tcache, sec do not cache allocs/dallocs larger than
+	 * USIZE_GROW_SLOW_THRESHOLD because the usize above this increases
+	 * by PAGE and the number of usizes is too large.
+	 */
+	assert(!sz_limit_usize_gap_enabled() ||
+	    opts->max_alloc <= USIZE_GROW_SLOW_THRESHOLD);
 
 	size_t max_alloc = PAGE_FLOOR(opts->max_alloc);
 	pszind_t npsizes = sz_psz2ind(max_alloc) + 1;
diff --git a/src/tcache.c b/src/tcache.c
index 15da14da..270d38ac 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -1047,7 +1047,8 @@ tcache_bin_flush_impl_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 				ndeferred++;
 				continue;
 			}
-			if (large_dalloc_safety_checks(edata, ptr, binind)) {
+			if (large_dalloc_safety_checks(edata, ptr,
+			    sz_index2size(binind))) {
 				/* See the comment in isfree. */
 				continue;
 			}
diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c
index 68b8f381..85d9238b 100644
--- a/test/integration/rallocx.c
+++ b/test/integration/rallocx.c
@@ -49,7 +49,7 @@ TEST_BEGIN(test_grow_and_shrink) {
 	size_t tsz;
 #define NCYCLES 3
 	unsigned i, j;
-#define NSZS 1024
+#define NSZS 64
 	size_t szs[NSZS];
 #define MAXSZ ZU(12 * 1024 * 1024)
 
diff --git a/test/test.sh.in b/test/test.sh.in
index b4fbb355..a4ee9396 100644
--- a/test/test.sh.in
+++ b/test/test.sh.in
@@ -43,6 +43,7 @@ for t in $@; do
     # per test shell script to ignore the @JEMALLOC_CPREFIX@ detail).
     enable_fill=@enable_fill@ \
     enable_prof=@enable_prof@ \
+    limit_usize_gap=@limit_usize_gap@ \
     . @srcroot@${t}.sh && \
     export_malloc_conf && \
     $JEMALLOC_TEST_PREFIX ${t}@exe@ @abs_srcroot@ @abs_objroot@
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index 8ef0786c..09536b29 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -78,7 +78,8 @@ vsalloc(tsdn_t *tsdn, const void *ptr) {
 		return 0;
 	}
 
-	return sz_index2size(full_alloc_ctx.szind);
+	return config_limit_usize_gap? edata_usize_get(full_alloc_ctx.edata):
+	    sz_index2size(full_alloc_ctx.szind);
 }
 
 static unsigned
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 50b96a87..6c42729a 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -5,7 +5,7 @@
 
 #define SHARD_IND 111
 
-#define ALLOC_MAX (HUGEPAGE / 4)
+#define ALLOC_MAX (HUGEPAGE)
 
 typedef struct test_data_s test_data_t;
 struct test_data_s {
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 02fedaa7..296b7bff 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -332,6 +332,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, prof_sys_thread_name, prof);
 	TEST_MALLCTL_OPT(ssize_t, lg_san_uaf_align, uaf_detection);
 	TEST_MALLCTL_OPT(unsigned, debug_double_free_max_scan, always);
+	TEST_MALLCTL_OPT(bool, limit_usize_gap, limit_usize_gap);
 
 #undef TEST_MALLCTL_OPT
 }
diff --git a/test/unit/ph.c b/test/unit/ph.c
index 28f5e488..0339f993 100644
--- a/test/unit/ph.c
+++ b/test/unit/ph.c
@@ -2,8 +2,9 @@
 
 #include "jemalloc/internal/ph.h"
 
+#define BFS_ENUMERATE_MAX 30
 typedef struct node_s node_t;
-ph_structs(heap, node_t);
+ph_structs(heap, node_t, BFS_ENUMERATE_MAX);
 
 struct node_s {
 #define NODE_MAGIC 0x9823af7e
@@ -239,6 +240,22 @@ TEST_BEGIN(test_ph_random) {
 			expect_false(heap_empty(&heap),
 			    "Heap should not be empty");
 
+			/* Enumerate nodes. */
+			heap_enumerate_helper_t helper;
+			uint16_t max_queue_size = sizeof(helper.bfs_queue)
+			    / sizeof(void *);
+			expect_u_eq(max_queue_size, BFS_ENUMERATE_MAX,
+			    "Incorrect bfs queue length initialized");
+			assert(max_queue_size == BFS_ENUMERATE_MAX);
+			heap_enumerate_prepare(&heap, &helper,
+			    BFS_ENUMERATE_MAX, max_queue_size);
+			size_t node_count = 0;
+			while(heap_enumerate_next(&heap, &helper)) {
+				node_count ++;
+			}
+			expect_lu_eq(node_count, j,
+			    "Unexpected enumeration results.");
+
 			/* Remove nodes. */
 			switch (i % 6) {
 			case 0:
diff --git a/test/unit/sec.c b/test/unit/sec.c
index 0b5e1c31..cfef043f 100644
--- a/test/unit/sec.c
+++ b/test/unit/sec.c
@@ -412,7 +412,8 @@ TEST_BEGIN(test_expand_shrink_delegate) {
 
 	bool deferred_work_generated = false;
 
-	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ 10 * PAGE,
+	test_sec_init(&sec, &ta.pai, /* nshards */ 1,
+	    /* max_alloc */ USIZE_GROW_SLOW_THRESHOLD,
 	    /* max_bytes */ 1000 * PAGE);
 	edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
 	    /* zero */ false, /* guarded */ false, /* frequent_reuse */ false,
diff --git a/test/unit/size_classes.c b/test/unit/size_classes.c
index 9e8a408f..24913803 100644
--- a/test/unit/size_classes.c
+++ b/test/unit/size_classes.c
@@ -26,7 +26,8 @@ TEST_BEGIN(test_size_classes) {
 	size_t size_class, max_size_class;
 	szind_t index, gen_index, max_index;
 
-	max_size_class = get_max_size_class();
+	max_size_class = sz_limit_usize_gap_enabled()? SC_SMALL_MAXCLASS:
+	    get_max_size_class();
 	max_index = sz_size2index(max_size_class);
 
 	for (index = 0, size_class = sz_index2size(index); index < max_index ||
@@ -79,6 +80,40 @@ TEST_BEGIN(test_size_classes) {
 }
 TEST_END
 
+TEST_BEGIN(test_grow_slow_size_classes) {
+	test_skip_if(!sz_limit_usize_gap_enabled());
+
+	size_t size = SC_LARGE_MINCLASS;
+	size_t target_usize = SC_LARGE_MINCLASS;
+	size_t max_size = get_max_size_class();
+	size_t increase[3] = {PAGE - 1, 1, 1};
+	while (size <= max_size) {
+		size_t usize = sz_s2u(size);
+		expect_zu_eq(usize, target_usize,
+		    "sz_s2u() does not generate usize as expected.");
+		size += increase[0];
+		usize = sz_s2u(size);
+		target_usize += PAGE;
+		expect_zu_eq(usize, target_usize,
+		    "sz_s2u() does not generate usize as expected.");
+		size += increase[1];
+		usize = sz_s2u(size);
+		expect_zu_eq(usize, target_usize,
+		    "sz_s2u() does not generate usize as expected.");
+		size += increase[2];
+		usize = sz_s2u(size);
+		target_usize += PAGE;
+		expect_zu_eq(usize, target_usize,
+		    "sz_s2u() does not generate usize as expected.");
+		if (target_usize << 1 < target_usize) {
+			break;
+		}
+		target_usize = target_usize << 1;
+		size = target_usize;
+	}
+}
+TEST_END
+
 TEST_BEGIN(test_psize_classes) {
 	size_t size_class, max_psz;
 	pszind_t pind, max_pind;
@@ -182,6 +217,7 @@ int
 main(void) {
 	return test(
 	    test_size_classes,
+	    test_grow_slow_size_classes,
 	    test_psize_classes,
 	    test_overflow);
 }
diff --git a/test/unit/size_classes.sh b/test/unit/size_classes.sh
new file mode 100644
index 00000000..93d5e8d1
--- /dev/null
+++ b/test/unit/size_classes.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+if [ "x${limit_usize_gap}" = "x1" ] ; then
+  export MALLOC_CONF="limit_usize_gap:true"
+fi
diff --git a/test/unit/stats.c b/test/unit/stats.c
index 203a71b5..584a582f 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -202,17 +202,22 @@ TEST_END
 
 TEST_BEGIN(test_stats_arenas_large) {
 	void *p;
-	size_t sz, allocated;
+	size_t sz, allocated, allocated_before;
 	uint64_t epoch, nmalloc, ndalloc;
+	size_t malloc_size = (1U << (SC_LG_LARGE_MINCLASS + 1)) + 1;
 	int expected = config_stats ? 0 : ENOENT;
 
-	p = mallocx((1U << SC_LG_LARGE_MINCLASS), MALLOCX_ARENA(0));
+	sz = sizeof(size_t);
+	expect_d_eq(mallctl("stats.arenas.0.large.allocated",
+	    (void *)&allocated_before, &sz, NULL, 0), expected,
+	    "Unexpected mallctl() result");
+
+	p = mallocx(malloc_size, MALLOCX_ARENA(0));
 	expect_ptr_not_null(p, "Unexpected mallocx() failure");
 
 	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
 	    0, "Unexpected mallctl() failure");
 
-	sz = sizeof(size_t);
 	expect_d_eq(mallctl("stats.arenas.0.large.allocated",
 	    (void *)&allocated, &sz, NULL, 0), expected,
 	    "Unexpected mallctl() result");
@@ -223,8 +228,10 @@ TEST_BEGIN(test_stats_arenas_large) {
 	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
-		expect_zu_gt(allocated, 0,
+		expect_zu_ge(allocated_before, 0,
 		    "allocated should be greater than zero");
+		expect_zu_ge(allocated - allocated_before, sz_s2u(malloc_size),
+		    "the diff between allocated should be greater than the allocation made");
 		expect_u64_gt(nmalloc, 0,
 		    "nmalloc should be greater than zero");
 		expect_u64_ge(nmalloc, ndalloc,

From 6035d4a8d369d158ca299c10773e05796e1d18ad Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@meta.com>
Date: Mon, 27 Jan 2025 23:09:51 -0800
Subject: [PATCH 2476/2608] Cache extra extents in the dirty pool from
 ecache_alloc_grow

---
 include/jemalloc/internal/sz.h | 19 +++++---
 src/pac.c                      | 80 ++++++++++++++++++++++++++++++++--
 test/unit/arena_decay.c        |  9 +++-
 3 files changed, 96 insertions(+), 12 deletions(-)

diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index 6c0a1f0c..1122461c 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -311,6 +311,17 @@ sz_size2index_usize_fastpath(size_t size, szind_t *ind, size_t *usize) {
 	}
 }
 
+JEMALLOC_ALWAYS_INLINE size_t
+sz_s2u_compute_using_delta(size_t size) {
+	size_t x = lg_floor((size<<1)-1);
+	size_t lg_delta = (x < SC_LG_NGROUP + LG_QUANTUM + 1)
+	    ?  LG_QUANTUM : x - SC_LG_NGROUP - 1;
+	size_t delta = ZU(1) << lg_delta;
+	size_t delta_mask = delta - 1;
+	size_t usize = (size + delta_mask) & ~delta_mask;
+	return usize;
+}
+
 JEMALLOC_ALWAYS_INLINE size_t
 sz_s2u_compute(size_t size) {
 	if (unlikely(size > SC_LARGE_MAXCLASS)) {
@@ -329,13 +340,7 @@ sz_s2u_compute(size_t size) {
 	}
 #endif
 	if (size <= SC_SMALL_MAXCLASS || !sz_limit_usize_gap_enabled()) {
-		size_t x = lg_floor((size<<1)-1);
-		size_t lg_delta = (x < SC_LG_NGROUP + LG_QUANTUM + 1)
-		    ?  LG_QUANTUM : x - SC_LG_NGROUP - 1;
-		size_t delta = ZU(1) << lg_delta;
-		size_t delta_mask = delta - 1;
-		size_t usize = (size + delta_mask) & ~delta_mask;
-		return usize;
+		return sz_s2u_compute_using_delta(size);
 	} else {
 		/*
 		 * With sz_limit_usize_gap_enabled() == true, usize of a large
diff --git a/src/pac.c b/src/pac.c
index 57a0c953..3523ef3d 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -112,10 +112,27 @@ pac_may_have_muzzy(pac_t *pac) {
 	return pac_decay_ms_get(pac, extent_state_muzzy) != 0;
 }
 
+size_t pac_alloc_retained_batched_size(size_t size) {
+	if (size > SC_LARGE_MAXCLASS) {
+		/*
+		 * A valid input with usize SC_LARGE_MAXCLASS could still
+		 * reach here because of sz_large_pad.  Such a request is valid
+		 * but we should not further increase it.  Thus, directly
+		 * return size for such cases.
+		 */
+		return size;
+	}
+	size_t batched_size = sz_s2u_compute_using_delta(size);
+	size_t next_hugepage_size = HUGEPAGE_CEILING(size);
+	return batched_size > next_hugepage_size? next_hugepage_size:
+	    batched_size;
+}
+
 static edata_t *
 pac_alloc_real(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
     size_t alignment, bool zero, bool guarded) {
 	assert(!guarded || alignment <= PAGE);
+	size_t newly_mapped_size = 0;
 
 	edata_t *edata = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_dirty,
 	    NULL, size, alignment, zero, guarded);
@@ -124,14 +141,69 @@ pac_alloc_real(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
 		edata = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_muzzy,
 		    NULL, size, alignment, zero, guarded);
 	}
+
+	/*
+	 * We batched allocate a larger extent when limit_usize_gap is enabled
+	 * because the reuse of extents in the dirty pool is worse without size
+	 * classes for large allocs.  For instance, when limit_usize_gap is not
+	 * enabled, 1.1MB, 1.15MB, and 1.2MB allocs will all be ceiled to
+	 * 1.25MB and can reuse the same buffer if they are alloc & dalloc
+	 * sequentially.  However, with limit_usize_gap enabled, they cannot
+	 * reuse the same buffer and their sequential allocs & dallocs will
+	 * result in three different extents.  Thus, we cache extra mergeable
+	 * extents in the dirty pool to improve the reuse.  We skip this
+	 * optimization if both maps_coalesce and opt_retain are disabled
+	 * because VM is not cheap enough to be used aggressively and extents
+	 * cannot be merged at will (only extents from the same VirtualAlloc
+	 * can be merged).  Note that it could still be risky to cache more
+	 * extents when either mpas_coalesce or opt_retain is enabled.  Yet
+	 * doing so is still beneficial in improving the reuse of extents
+	 * with some limits.  This choice should be reevaluated if
+	 * pac_alloc_retained_batched_size is changed to be more aggressive.
+	 */
+	if (sz_limit_usize_gap_enabled() && edata == NULL &&
+	    (maps_coalesce || opt_retain)) {
+		size_t batched_size = pac_alloc_retained_batched_size(size);
+		/*
+		 * Note that ecache_alloc_grow will try to retrieve virtual
+		 * memory from both retained pool and directly from OS through
+		 * extent_alloc_wrapper if the retained pool has no qualified
+		 * extents.  This is also why the overcaching still works even
+		 * with opt_retain off.
+		 */
+		edata = ecache_alloc_grow(tsdn, pac, ehooks,
+		    &pac->ecache_retained, NULL, batched_size,
+		    alignment, zero, guarded);
+
+		if (edata != NULL && batched_size > size) {
+			edata_t *trail = extent_split_wrapper(tsdn, pac,
+			    ehooks, edata, size, batched_size - size,
+			    /* holding_core_locks */ false);
+			if (trail == NULL) {
+				ecache_dalloc(tsdn, pac, ehooks,
+				    &pac->ecache_retained, edata);
+				edata = NULL;
+			} else {
+				ecache_dalloc(tsdn, pac, ehooks,
+				    &pac->ecache_dirty, trail);
+			}
+		}
+
+		if (edata != NULL) {
+			newly_mapped_size = batched_size;
+		}
+	}
+
 	if (edata == NULL) {
 		edata = ecache_alloc_grow(tsdn, pac, ehooks,
 		    &pac->ecache_retained, NULL, size, alignment, zero,
 		    guarded);
-		if (config_stats && edata != NULL) {
-			atomic_fetch_add_zu(&pac->stats->pac_mapped, size,
-			    ATOMIC_RELAXED);
-		}
+		newly_mapped_size = size;
+	}
+
+	if (config_stats && newly_mapped_size != 0) {
+		atomic_fetch_add_zu(&pac->stats->pac_mapped,
+		    newly_mapped_size, ATOMIC_RELAXED);
 	}
 
 	return edata;
diff --git a/test/unit/arena_decay.c b/test/unit/arena_decay.c
index 10d1a6b1..00a38326 100644
--- a/test/unit/arena_decay.c
+++ b/test/unit/arena_decay.c
@@ -410,7 +410,14 @@ TEST_BEGIN(test_decay_never) {
 	/* Verify that each deallocation generates additional dirty pages. */
 	size_t pdirty_prev = get_arena_pdirty(arena_ind);
 	size_t pmuzzy_prev = get_arena_pmuzzy(arena_ind);
-	expect_zu_eq(pdirty_prev, 0, "Unexpected dirty pages");
+	/*
+	 * With limit_usize_gap enabled, some more extents
+	 * are cached in the dirty pool, making the assumption below
+	 * not true.
+	 */
+	if (!sz_limit_usize_gap_enabled()) {
+		expect_zu_eq(pdirty_prev, 0, "Unexpected dirty pages");
+	}
 	expect_zu_eq(pmuzzy_prev, 0, "Unexpected muzzy pages");
 	for (unsigned i = 0; i < sizeof(sizes)/sizeof(size_t); i++) {
 		dallocx(ptrs[i], flags);

From 70f019cd3abc5dfc67df1b8a2c460bc5e8221ae2 Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@meta.com>
Date: Tue, 14 May 2024 14:27:17 -0700
Subject: [PATCH 2477/2608] Enable limit-usize-gap in CI tests.

Considering the new usize calculation will be default soon, add the
config option in for Travis, Cirrus and appveyor.
---
 .appveyor.yml         |  12 ++-
 .cirrus.yml           |   2 +-
 .travis.yml           | 218 +++++++++++++++++++++---------------------
 scripts/gen_travis.py |   1 +
 4 files changed, 119 insertions(+), 114 deletions(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index d31f9aed..dedc7867 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -5,27 +5,31 @@ environment:
   - MSYSTEM: MINGW64
     CPU: x86_64
     MSVC: amd64
-    CONFIG_FLAGS: --enable-debug
+    CONFIG_FLAGS: --enable-debug --enable-limit-usize-gap
   - MSYSTEM: MINGW64
     CPU: x86_64
-    CONFIG_FLAGS: --enable-debug
+    CONFIG_FLAGS: --enable-debug --enable-limit-usize-gap
   - MSYSTEM: MINGW32
     CPU: i686
     MSVC: x86
-    CONFIG_FLAGS: --enable-debug
+    CONFIG_FLAGS: --enable-debug --enable-limit-usize-gap
   - MSYSTEM: MINGW32
     CPU: i686
-    CONFIG_FLAGS: --enable-debug
+    CONFIG_FLAGS: --enable-debug --enable-limit-usize-gap
   - MSYSTEM: MINGW64
     CPU: x86_64
     MSVC: amd64
+    CONFIG_FLAGS: --enable-limit-usize-gap
   - MSYSTEM: MINGW64
     CPU: x86_64
+    CONFIG_FLAGS: --enable-limit-usize-gap
   - MSYSTEM: MINGW32
     CPU: i686
     MSVC: x86
+    CONFIG_FLAGS: --enable-limit-usize-gap
   - MSYSTEM: MINGW32
     CPU: i686
+    CONFIG_FLAGS: --enable-limit-usize-gap
 
 install:
   - set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH%
diff --git a/.cirrus.yml b/.cirrus.yml
index 585aa42f..8051272c 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -40,7 +40,7 @@ task:
     # We don't perfectly track freebsd stdlib.h definitions.  This is fine when
     # we count as a system header, but breaks otherwise, like during these
     # tests.
-    - ./configure --with-jemalloc-prefix=ci_ ${DEBUG_CONFIG} ${PROF_CONFIG} ${UNCOMMON_CONFIG}
+    - ./configure --with-jemalloc-prefix=ci_ --enable-limit-usize-gap ${DEBUG_CONFIG} ${PROF_CONFIG} ${UNCOMMON_CONFIG}
     - export JFLAG=`sysctl -n kern.smp.cpus`
     - gmake -j${JFLAG}
     - gmake -j${JFLAG} tests
diff --git a/.travis.yml b/.travis.yml
index ceda8989..a32755c6 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -12,331 +12,331 @@ jobs:
   include:
     - os: windows
       arch: amd64
-      env: CC=gcc CXX=g++ EXTRA_CFLAGS="-fcommon"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-fcommon"
     - os: windows
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-fcommon"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-limit-usize-gap" EXTRA_CFLAGS="-fcommon"
     - os: windows
       arch: amd64
-      env: CC=cl.exe CXX=cl.exe
+      env: CC=cl.exe CXX=cl.exe CONFIGURE_FLAGS="--enable-limit-usize-gap"
     - os: windows
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes EXTRA_CFLAGS="-fcommon"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-fcommon"
     - os: windows
       arch: amd64
-      env: CC=cl.exe CXX=cl.exe CONFIGURE_FLAGS="--enable-debug"
+      env: CC=cl.exe CXX=cl.exe CONFIGURE_FLAGS="--enable-debug --enable-limit-usize-gap"
     - os: windows
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-fcommon"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug --enable-limit-usize-gap" EXTRA_CFLAGS="-fcommon"
     - os: windows
       arch: amd64
-      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes
+      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-limit-usize-gap"
     - os: windows
       arch: amd64
-      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug"
+      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug --enable-limit-usize-gap"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-debug --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-prof --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--disable-stats --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--disable-libdl --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-opt-safety-checks --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-debug --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-prof --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-stats --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-libdl --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-opt-safety-checks --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-stats --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-libdl --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-opt-safety-checks --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --disable-stats --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --disable-libdl --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-opt-safety-checks --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --disable-libdl --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --enable-opt-safety-checks --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --enable-opt-safety-checks --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu,background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=clang CXX=clang++ EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-lg-hugepage=29" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-lg-hugepage=29 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     - os: osx
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     # Development build
     - os: linux
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index 43457967..a49bb83b 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -191,6 +191,7 @@ def format_job(os, arch, combination):
     if len(malloc_conf) > 0:
         configure_flags.append('--with-malloc-conf=' + ','.join(malloc_conf))
 
+    configure_flags.append('--enable-limit-usize-gap')
     if not compilers:
         compiler = GCC.value
     else:

From 22440a0207cd7d7c624c78723ca1eeb8a4353e79 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Tue, 4 Feb 2025 18:31:11 -0800
Subject: [PATCH 2478/2608] Implement process_madvise support.

Add opt.process_madvise_max_batch which determines if process_madvise is enabled
(non-zero) and the max # of regions in each batch.  Added another limiting
factor which is the space to reserve on stack, which results in the max batch of
128.
---
 configure.ac                                  | 11 +++
 include/jemalloc/internal/extent.h            | 12 ++++
 .../internal/jemalloc_internal_defs.h.in      |  3 +
 .../jemalloc/internal/jemalloc_preamble.h.in  |  7 ++
 include/jemalloc/internal/pages.h             |  1 +
 include/jemalloc/internal/typed_list.h        |  4 ++
 src/ctl.c                                     |  4 ++
 src/extent.c                                  | 36 ++++++++--
 src/jemalloc.c                                |  5 ++
 src/pac.c                                     | 67 ++++++++++++++++++-
 src/pages.c                                   | 58 ++++++++++++++++
 src/stats.c                                   |  1 +
 test/unit/mallctl.c                           |  1 +
 13 files changed, 204 insertions(+), 6 deletions(-)

diff --git a/configure.ac b/configure.ac
index a55a5a08..eb500db9 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2544,6 +2544,17 @@ if test "x${je_cv_madvise}" = "xyes" ; then
   if test "x${je_cv_madv_collapse}" = "xyes" ; then
     AC_DEFINE([JEMALLOC_HAVE_MADVISE_COLLAPSE], [ ], [ ])
   fi
+
+  dnl Check for process_madvise
+  JE_COMPILABLE([process_madvise(2)], [
+#include <sys/mman.h>
+#include <sys/syscall.h>
+], [
+	syscall(SYS_process_madvise, 0, (void *)0, 0, 0, 0);
+], [je_cv_process_madvise])
+  if test "x${je_cv_process_madvise}" = "xyes" ; then
+    AC_DEFINE([JEMALLOC_HAVE_PROCESS_MADVISE], [ ], [ ])
+  fi
 else
   dnl Check for posix_madvise.
   JE_COMPILABLE([posix_madvise], [
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index 17feb703..be61db8d 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -21,6 +21,16 @@
 #define LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT 6
 extern size_t opt_lg_extent_max_active_fit;
 
+#define PROCESS_MADVISE_MAX_BATCH_DEFAULT 0
+extern size_t opt_process_madvise_max_batch;
+
+#ifdef JEMALLOC_HAVE_PROCESS_MADVISE
+/* The iovec is on stack.  Limit the max batch to avoid stack overflow. */
+#define PROCESS_MADVISE_MAX_BATCH_LIMIT (VARIABLE_ARRAY_SIZE_MAX / sizeof(struct iovec))
+#else
+#define PROCESS_MADVISE_MAX_BATCH_LIMIT 0
+#endif
+
 edata_t *ecache_alloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *expand_edata, size_t size, size_t alignment,
     bool zero, bool guarded);
@@ -42,6 +52,8 @@ edata_t *extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     bool growing_retained);
 void extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata);
+void extent_dalloc_wrapper_purged(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
+    edata_t *edata);
 void extent_destroy_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata);
 bool extent_commit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index e76eaaf4..2e47438a 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -345,6 +345,9 @@
  */
 #undef JEMALLOC_MADVISE_NOCORE
 
+/* Defined if process_madvise(2) is available. */
+#undef JEMALLOC_HAVE_PROCESS_MADVISE
+
 /* Defined if mprotect(2) is available. */
 #undef JEMALLOC_HAVE_MPROTECT
 
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index ef637a2d..eba475a6 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -87,6 +87,13 @@ static const bool have_madvise_huge =
     false
 #endif
     ;
+static const bool have_process_madvise =
+#ifdef JEMALLOC_HAVE_PROCESS_MADVISE
+    true
+#else
+    false
+#endif
+    ;
 static const bool config_fill =
 #ifdef JEMALLOC_FILL
     true
diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index 0dcf96dc..366bc30b 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -121,6 +121,7 @@ bool pages_commit(void *addr, size_t size);
 bool pages_decommit(void *addr, size_t size);
 bool pages_purge_lazy(void *addr, size_t size);
 bool pages_purge_forced(void *addr, size_t size);
+bool pages_purge_process_madvise(void *vec, size_t ven_len, size_t total_bytes);
 bool pages_huge(void *addr, size_t size);
 bool pages_nohuge(void *addr, size_t size);
 bool pages_collapse(void *addr, size_t size);
diff --git a/include/jemalloc/internal/typed_list.h b/include/jemalloc/internal/typed_list.h
index 6535055a..7c4826fc 100644
--- a/include/jemalloc/internal/typed_list.h
+++ b/include/jemalloc/internal/typed_list.h
@@ -22,6 +22,10 @@ static inline el_type *							\
 list_type##_last(const list_type##_t *list) {				\
 	return ql_last(&list->head, linkage);				\
 }									\
+static inline el_type *							\
+list_type##_next(const list_type##_t *list, el_type *item) {		\
+	return ql_next(&list->head, item, linkage);			\
+}									\
 static inline void							\
 list_type##_append(list_type##_t *list, el_type *item) {		\
 	ql_elm_new(item, linkage);					\
diff --git a/src/ctl.c b/src/ctl.c
index 73d4cb66..c55d9719 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -169,6 +169,7 @@ CTL_PROTO(opt_prof_time_res)
 CTL_PROTO(opt_lg_san_uaf_align)
 CTL_PROTO(opt_zero_realloc)
 CTL_PROTO(opt_limit_usize_gap)
+CTL_PROTO(opt_process_madvise_max_batch)
 CTL_PROTO(opt_malloc_conf_symlink)
 CTL_PROTO(opt_malloc_conf_env_var)
 CTL_PROTO(opt_malloc_conf_global_var)
@@ -559,6 +560,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("debug_double_free_max_scan"),
 		CTL(opt_debug_double_free_max_scan)},
 	{NAME("limit_usize_gap"),	CTL(opt_limit_usize_gap)},
+	{NAME("process_madvise_max_batch"), CTL(opt_process_madvise_max_batch)},
 	{NAME("malloc_conf"),	CHILD(named, opt_malloc_conf)}
 };
 
@@ -2316,6 +2318,8 @@ CTL_RO_NL_GEN(opt_lg_tcache_flush_large_div, opt_lg_tcache_flush_large_div,
 CTL_RO_NL_GEN(opt_thp, thp_mode_names[opt_thp], const char *)
 CTL_RO_NL_GEN(opt_lg_extent_max_active_fit, opt_lg_extent_max_active_fit,
     size_t)
+CTL_RO_NL_GEN(opt_process_madvise_max_batch, opt_process_madvise_max_batch,
+    size_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *)
 CTL_RO_NL_CGEN(config_prof, opt_prof_active, opt_prof_active, bool)
diff --git a/src/extent.c b/src/extent.c
index 30942491..e61b7f9c 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -12,6 +12,13 @@
 /* Data. */
 
 size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT;
+size_t opt_process_madvise_max_batch =
+#ifdef JEMALLOC_HAVE_PROCESS_MADVISE
+    PROCESS_MADVISE_MAX_BATCH_DEFAULT;
+#else
+    0
+#endif
+    ;
 
 static bool extent_commit_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length, bool growing_retained);
@@ -1032,6 +1039,29 @@ extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	return edata;
 }
 
+static void
+extent_dalloc_wrapper_finish(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
+    edata_t *edata) {
+	if (config_prof) {
+		extent_gdump_sub(tsdn, edata);
+	}
+	extent_record(tsdn, pac, ehooks, &pac->ecache_retained, edata);
+}
+
+void
+extent_dalloc_wrapper_purged(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
+    edata_t *edata) {
+	assert(edata_pai_get(edata) == EXTENT_PAI_PAC);
+	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
+	    WITNESS_RANK_CORE, 0);
+
+	/* Verify that will not go down the dalloc / munmap route. */
+	assert(ehooks_dalloc_will_fail(ehooks));
+
+	edata_zeroed_set(edata, true);
+	extent_dalloc_wrapper_finish(tsdn, pac, ehooks, edata);
+}
+
 void
 extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata) {
@@ -1077,11 +1107,7 @@ extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	}
 	edata_zeroed_set(edata, zeroed);
 
-	if (config_prof) {
-		extent_gdump_sub(tsdn, edata);
-	}
-
-	extent_record(tsdn, pac, ehooks, &pac->ecache_retained, edata);
+	extent_dalloc_wrapper_finish(tsdn, pac, ehooks, edata);
 }
 
 void
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 67456bb7..9f4bc785 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1361,6 +1361,11 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    "muzzy_decay_ms", -1, NSTIME_SEC_MAX * KQU(1000) <
 			    QU(SSIZE_MAX) ? NSTIME_SEC_MAX * KQU(1000) :
 			    SSIZE_MAX);
+			CONF_HANDLE_SIZE_T(opt_process_madvise_max_batch,
+			    "process_madvise_max_batch", 0,
+			    PROCESS_MADVISE_MAX_BATCH_LIMIT,
+			    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
+			    /* clip */ true)
 			CONF_HANDLE_BOOL(opt_stats_print, "stats_print")
 			if (CONF_MATCH("stats_print_opts")) {
 				init_opt_stats_opts(v, vlen,
diff --git a/src/pac.c b/src/pac.c
index 3523ef3d..12c1e444 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -435,6 +435,44 @@ pac_stash_decayed(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
 	return nstashed;
 }
 
+static bool
+decay_with_process_madvise(edata_list_inactive_t *decay_extents) {
+	cassert(have_process_madvise);
+	assert(opt_process_madvise_max_batch > 0);
+#ifndef JEMALLOC_HAVE_PROCESS_MADVISE
+	return true;
+#else
+	assert(opt_process_madvise_max_batch <=
+	    PROCESS_MADVISE_MAX_BATCH_LIMIT);
+	size_t len = opt_process_madvise_max_batch;
+	VARIABLE_ARRAY(struct iovec, vec, len);
+
+	size_t cur = 0, total_bytes = 0;
+	for (edata_t *edata = edata_list_inactive_first(decay_extents);
+	     edata != NULL;
+	     edata = edata_list_inactive_next(decay_extents, edata)) {
+		size_t pages_bytes = edata_size_get(edata);
+		vec[cur].iov_base = edata_base_get(edata);
+		vec[cur].iov_len = pages_bytes;
+		total_bytes += pages_bytes;
+		cur++;
+		if (cur == len) {
+			bool err = pages_purge_process_madvise(vec, len,
+			    total_bytes);
+			if (err) {
+				return true;
+			}
+			cur = 0;
+			total_bytes = 0;
+		}
+	}
+	if (cur > 0) {
+		return pages_purge_process_madvise(vec, cur, total_bytes);
+	}
+	return false;
+#endif
+}
+
 static size_t
 pac_decay_stashed(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
     pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
@@ -450,6 +488,28 @@ pac_decay_stashed(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 	bool try_muzzy = !fully_decay
 	    && pac_decay_ms_get(pac, extent_state_muzzy) != 0;
 
+	bool purge_to_retained = !try_muzzy ||
+	    ecache->state == extent_state_muzzy;
+	/*
+	 * Attempt process_madvise only if 1) enabled, 2) purging to retained,
+	 * and 3) not using custom hooks.
+	 */
+	bool try_process_madvise = (opt_process_madvise_max_batch > 0) &&
+	    purge_to_retained && ehooks_dalloc_will_fail(ehooks);
+
+	bool already_purged;
+	if (try_process_madvise) {
+		/*
+		 * If anything unexpected happened during process_madvise
+		 * (e.g. not supporting MADV_DONTNEED, or partial success for
+		 * some reason), we will consider nothing is purged and fallback
+		 * to the regular madvise.
+		 */
+		already_purged = !decay_with_process_madvise(decay_extents);
+	} else {
+		already_purged = false;
+	}
+
 	for (edata_t *edata = edata_list_inactive_first(decay_extents); edata !=
 	    NULL; edata = edata_list_inactive_first(decay_extents)) {
 		edata_list_inactive_remove(decay_extents, edata);
@@ -473,7 +533,12 @@ pac_decay_stashed(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 			}
 			JEMALLOC_FALLTHROUGH;
 		case extent_state_muzzy:
-			extent_dalloc_wrapper(tsdn, pac, ehooks, edata);
+			if (already_purged) {
+				extent_dalloc_wrapper_purged(tsdn, pac, ehooks,
+				    edata);
+			} else {
+				extent_dalloc_wrapper(tsdn, pac, ehooks, edata);
+			}
 			nunmapped += npages;
 			break;
 		case extent_state_active:
diff --git a/src/pages.c b/src/pages.c
index 26fd8d5d..babfd50f 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -617,6 +617,58 @@ pages_dodump(void *addr, size_t size) {
 #endif
 }
 
+#ifdef JEMALLOC_HAVE_PROCESS_MADVISE
+#include <sys/mman.h>
+#include <sys/syscall.h>
+static int pidfd;
+
+static bool
+init_process_madvise(void) {
+	if (opt_process_madvise_max_batch == 0) {
+		return false;
+	}
+
+	if (opt_process_madvise_max_batch > PROCESS_MADVISE_MAX_BATCH_LIMIT) {
+		opt_process_madvise_max_batch = PROCESS_MADVISE_MAX_BATCH_LIMIT;
+	}
+	pid_t pid = getpid();
+	pidfd = syscall(SYS_pidfd_open, pid, 0);
+	if (pidfd == -1) {
+		return true;
+	}
+
+	return false;
+}
+
+static bool
+pages_purge_process_madvise_impl(void *vec, size_t vec_len,
+    size_t total_bytes) {
+	size_t purged_bytes = (size_t)syscall(SYS_process_madvise, pidfd,
+	    (struct iovec *)vec, vec_len, MADV_DONTNEED, 0);
+
+	return purged_bytes != total_bytes;
+}
+
+#else
+
+static bool
+init_process_madvise(void) {
+	return false;
+}
+
+static bool
+pages_purge_process_madvise_impl(void *vec, size_t vec_len,
+    size_t total_bytes) {
+	not_reached();
+	return true;
+}
+
+#endif
+
+bool
+pages_purge_process_madvise(void *vec, size_t vec_len, size_t total_bytes) {
+	return pages_purge_process_madvise_impl(vec, vec_len, total_bytes);
+}
 
 static size_t
 os_page_detect(void) {
@@ -833,6 +885,12 @@ pages_boot(void) {
 		os_pages_unmap(madv_free_page, PAGE);
 	}
 #endif
+	if (init_process_madvise()) {
+		if (opt_abort) {
+			abort();
+		}
+		return true;
+	}
 
 	return false;
 }
diff --git a/src/stats.c b/src/stats.c
index b28b9942..58874bf8 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1727,6 +1727,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_INT64("stats_interval")
 	OPT_WRITE_CHAR_P("stats_interval_opts")
 	OPT_WRITE_CHAR_P("zero_realloc")
+	OPT_WRITE_SIZE_T("process_madvise_max_batch")
 
 	emitter_dict_end(emitter); /* Close "opt". */
 
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 296b7bff..57aa59e5 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -333,6 +333,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(ssize_t, lg_san_uaf_align, uaf_detection);
 	TEST_MALLCTL_OPT(unsigned, debug_double_free_max_scan, always);
 	TEST_MALLCTL_OPT(bool, limit_usize_gap, limit_usize_gap);
+	TEST_MALLCTL_OPT(size_t, process_madvise_max_batch, always);
 
 #undef TEST_MALLCTL_OPT
 }

From ad108d50f1c30700389103ff5fe3ef5f538f804c Mon Sep 17 00:00:00 2001
From: Dmitry Ilvokhin <d@ilvokhin.com>
Date: Tue, 21 Jan 2025 07:20:15 -0800
Subject: [PATCH 2479/2608] Extend purging algorithm with peak demand tracking

Implementation inspired by idea described in "Beyond malloc efficiency
to fleet efficiency: a hugepage-aware memory allocator" paper [1].

Primary idea is to track maximum number (peak) of active pages in use
with sliding window and then use this number to decide how many dirty
pages we would like to keep.

We are trying to estimate maximum amount of active memory we'll need in
the near future. We do so by projecting future active memory demand
(based on peak active memory usage we observed in the past within
sliding window) and adding slack on top of it (an overhead is reasonable
to have in exchange of higher hugepages coverage). When peak demand
tracking is off, projection of future active memory is active memory we
are having right now.

Estimation is essentially the same as `nactive_max * (1 + dirty_mult)`.

Peak demand purging algorithm controlled by two config options. Option
`hpa_peak_demand_window_ms` controls duration of sliding window we track
maximum active memory usage in and option `hpa_dirty_mult` controls
amount of slack we are allowed to have as a percent from maximum active
memory usage. By default `hpa_peak_demand_window_ms == 0` now and we
have same behaviour (ratio based purging) that we had before this
commit.

[1]: https://storage.googleapis.com/gweb-research2023-media/pubtools/6170.pdf
---
 Makefile.in                                   |   2 +
 include/jemalloc/internal/hpa.h               |   4 +
 include/jemalloc/internal/hpa_opts.h          |  14 +-
 include/jemalloc/internal/peak_demand.h       |  55 ++++++
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |   3 +
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |   3 +
 .../projects/vc2019/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2019/jemalloc/jemalloc.vcxproj.filters  |   3 +
 .../projects/vc2022/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2022/jemalloc/jemalloc.vcxproj.filters  |   3 +
 src/ctl.c                                     |   5 +
 src/hpa.c                                     |  53 +++++-
 src/jemalloc.c                                |   5 +
 src/peak_demand.c                             |  74 ++++++++
 src/stats.c                                   |   1 +
 test/unit/hpa.c                               | 174 +++++++++++++++---
 test/unit/mallctl.c                           |   1 +
 test/unit/peak_demand.c                       | 162 ++++++++++++++++
 20 files changed, 537 insertions(+), 29 deletions(-)
 create mode 100644 include/jemalloc/internal/peak_demand.h
 create mode 100644 src/peak_demand.c
 create mode 100644 test/unit/peak_demand.c

diff --git a/Makefile.in b/Makefile.in
index 1914fc28..b4102d0b 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -137,6 +137,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/pai.c \
 	$(srcroot)src/pac.c \
 	$(srcroot)src/pages.c \
+	$(srcroot)src/peak_demand.c \
 	$(srcroot)src/peak_event.c \
 	$(srcroot)src/prof.c \
 	$(srcroot)src/prof_data.c \
@@ -252,6 +253,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/pack.c \
 	$(srcroot)test/unit/pages.c \
 	$(srcroot)test/unit/peak.c \
+	$(srcroot)test/unit/peak_demand.c \
 	$(srcroot)test/unit/ph.c \
 	$(srcroot)test/unit/prng.c \
 	$(srcroot)test/unit/prof_accum.c \
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index d788d051..a384d04a 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -10,6 +10,7 @@
 #include "jemalloc/internal/hpa_opts.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/pai.h"
+#include "jemalloc/internal/peak_demand.h"
 #include "jemalloc/internal/psset.h"
 
 typedef struct hpa_central_s hpa_central_t;
@@ -147,6 +148,9 @@ struct hpa_shard_s {
 	 * Last time we performed purge on this shard.
 	 */
 	nstime_t last_purge;
+
+	/* Peak active memory sliding window statistics. */
+	peak_demand_t peak_demand;
 };
 
 bool hpa_hugepage_size_exceeds_limit();
diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h
index 42246172..816bb577 100644
--- a/include/jemalloc/internal/hpa_opts.h
+++ b/include/jemalloc/internal/hpa_opts.h
@@ -27,7 +27,8 @@ struct hpa_shard_opts_s {
 
 	/*
 	 * The HPA purges whenever the number of pages exceeds dirty_mult *
-	 * active_pages.  This may be set to (fxp_t)-1 to disable purging.
+	 * peak_active_pages.  This may be set to (fxp_t)-1 to disable
+	 * purging.
 	 */
 	fxp_t dirty_mult;
 
@@ -59,6 +60,13 @@ struct hpa_shard_opts_s {
 	 * Maximum number of hugepages to purge on each purging attempt.
 	 */
 	ssize_t experimental_max_purge_nhp;
+
+	/*
+	 * Sliding window duration to track active memory demand statistics.
+	 * This might be set to 0, to disable sliding window statistics
+	 * tracking and use current number of active pages for purging instead.
+	 */
+	uint64_t peak_demand_window_ms;
 };
 
 #define HPA_SHARD_OPTS_DEFAULT {					\
@@ -83,7 +91,9 @@ struct hpa_shard_opts_s {
 	/* min_purge_interval_ms */					\
 	5 * 1000,							\
 	/* experimental_max_purge_nhp */				\
-	-1								\
+	-1,								\
+	/* peak_demand_window_ms */					\
+	0								\
 }
 
 #endif /* JEMALLOC_INTERNAL_HPA_OPTS_H */
diff --git a/include/jemalloc/internal/peak_demand.h b/include/jemalloc/internal/peak_demand.h
new file mode 100644
index 00000000..2664cbec
--- /dev/null
+++ b/include/jemalloc/internal/peak_demand.h
@@ -0,0 +1,55 @@
+#ifndef JEMALLOC_INTERNAL_PEAK_DEMAND_H
+#define JEMALLOC_INTERNAL_PEAK_DEMAND_H
+
+#include "jemalloc/internal/jemalloc_preamble.h"
+
+/*
+ * Implementation of peak active memory demand tracking.
+ *
+ * Inspired by "Beyond malloc efficiency to fleet efficiency: a hugepage-aware
+ * memory allocator" whitepaper.
+ * https://storage.googleapis.com/gweb-research2023-media/pubtools/6170.pdf
+ *
+ * End goal is to track peak active memory usage over specified time interval.
+ * We do so by dividing this time interval into disjoint subintervals and
+ * storing value of maximum memory usage for each subinterval in a circular
+ * buffer.  Nanoseconds resolution timestamp uniquely maps into epoch, which is
+ * used as an index to access circular buffer.
+ */
+
+#define PEAK_DEMAND_LG_BUCKETS 4
+/*
+ * Number of buckets should be power of 2 to ensure modulo operation is
+ * optimized to bit masking by the compiler.
+ */
+#define PEAK_DEMAND_NBUCKETS (1 << PEAK_DEMAND_LG_BUCKETS)
+
+typedef struct peak_demand_s peak_demand_t;
+struct peak_demand_s {
+	/*
+	 * Absolute value of current epoch, monotonically increases over time.  Epoch
+	 * value modulo number of buckets used as an index to access nactive_max
+	 * array.
+	 */
+	uint64_t epoch;
+
+	/* How many nanoseconds each epoch approximately takes. */
+	uint64_t epoch_interval_ns;
+
+	/*
+	 * Circular buffer to track maximum number of active pages for each
+	 * epoch.
+	 */
+	size_t nactive_max[PEAK_DEMAND_NBUCKETS];
+};
+
+void peak_demand_init(peak_demand_t *peak_demand, uint64_t interval_ms);
+
+/* Updates peak demand statistics with current number of active pages. */
+void peak_demand_update(peak_demand_t *peak_demand, const nstime_t *now,
+    size_t nactive);
+
+/* Returns maximum number of active pages in sliding window. */
+size_t peak_demand_nactive_max(peak_demand_t *peak_demand);
+
+#endif /* JEMALLOC_INTERNAL_PEAK_DEMAND_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index c43b30b1..97a95fbf 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -76,6 +76,7 @@
     <ClCompile Include="..\..\..\..\src\pai.c" />
     <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
+    <ClCompile Include="..\..\..\..\src\peak_demand.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index f091475e..1a89369e 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -112,6 +112,9 @@
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\peak_demand.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\peak_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index a195f6b3..8529438c 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -76,6 +76,7 @@
     <ClCompile Include="..\..\..\..\src\pai.c" />
     <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
+    <ClCompile Include="..\..\..\..\src\peak_demand.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index f091475e..1a89369e 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -112,6 +112,9 @@
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\peak_demand.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\peak_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
index cd16005d..eace48ba 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
@@ -76,6 +76,7 @@
     <ClCompile Include="..\..\..\..\src\pai.c" />
     <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
+    <ClCompile Include="..\..\..\..\src\peak_demand.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
index f091475e..1a89369e 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
@@ -112,6 +112,9 @@
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\peak_demand.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\peak_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
index 2d8c4be6..98085cfd 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
@@ -76,6 +76,7 @@
     <ClCompile Include="..\..\..\..\src\pai.c" />
     <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
+    <ClCompile Include="..\..\..\..\src\peak_demand.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
index f091475e..1a89369e 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
@@ -112,6 +112,9 @@
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\peak_demand.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\peak_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/ctl.c b/src/ctl.c
index c55d9719..2c941ae8 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -106,6 +106,7 @@ CTL_PROTO(opt_hpa_hugify_delay_ms)
 CTL_PROTO(opt_hpa_hugify_sync)
 CTL_PROTO(opt_hpa_min_purge_interval_ms)
 CTL_PROTO(opt_experimental_hpa_max_purge_nhp)
+CTL_PROTO(opt_hpa_peak_demand_window_ms)
 CTL_PROTO(opt_hpa_dirty_mult)
 CTL_PROTO(opt_hpa_sec_nshards)
 CTL_PROTO(opt_hpa_sec_max_alloc)
@@ -487,6 +488,8 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("hpa_min_purge_interval_ms"), CTL(opt_hpa_min_purge_interval_ms)},
 	{NAME("experimental_hpa_max_purge_nhp"),
 		CTL(opt_experimental_hpa_max_purge_nhp)},
+	{NAME("hpa_peak_demand_window_ms"),
+	    CTL(opt_hpa_peak_demand_window_ms)},
 	{NAME("hpa_dirty_mult"), CTL(opt_hpa_dirty_mult)},
 	{NAME("hpa_sec_nshards"),	CTL(opt_hpa_sec_nshards)},
 	{NAME("hpa_sec_max_alloc"),	CTL(opt_hpa_sec_max_alloc)},
@@ -2255,6 +2258,8 @@ CTL_RO_NL_GEN(opt_hpa_min_purge_interval_ms, opt_hpa_opts.min_purge_interval_ms,
     uint64_t)
 CTL_RO_NL_GEN(opt_experimental_hpa_max_purge_nhp,
     opt_hpa_opts.experimental_max_purge_nhp, ssize_t)
+CTL_RO_NL_GEN(opt_hpa_peak_demand_window_ms,
+    opt_hpa_opts.peak_demand_window_ms, uint64_t)
 
 /*
  * This will have to change before we publicly document this option; fxp_t and
diff --git a/src/hpa.c b/src/hpa.c
index 2a5d7e1f..c01dde13 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -63,6 +63,11 @@ hpa_supported(void) {
 	return true;
 }
 
+static bool
+hpa_peak_demand_tracking_enabled(hpa_shard_t *shard) {
+	return shard->opts.peak_demand_window_ms > 0;
+}
+
 static void
 hpa_do_consistency_checks(hpa_shard_t *shard) {
 	assert(shard->base != NULL);
@@ -217,6 +222,11 @@ hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
 	shard->stats.nhugify_failures = 0;
 	shard->stats.ndehugifies = 0;
 
+	if (hpa_peak_demand_tracking_enabled(shard)) {
+		peak_demand_init(&shard->peak_demand,
+		    shard->opts.peak_demand_window_ms);
+	}
+
 	/*
 	 * Fill these in last, so that if an hpa_shard gets used despite
 	 * initialization failing, we'll at least crash instead of just
@@ -294,8 +304,37 @@ hpa_ndirty_max(tsdn_t *tsdn, hpa_shard_t *shard) {
 	if (shard->opts.dirty_mult == (fxp_t)-1) {
 		return (size_t)-1;
 	}
-	return fxp_mul_frac(psset_nactive(&shard->psset),
-	    shard->opts.dirty_mult);
+	/*
+	 * We are trying to estimate maximum amount of active memory we'll
+	 * need in the near future.  We do so by projecting future active
+	 * memory demand (based on peak active memory usage we observed in the
+	 * past within sliding window) and adding slack on top of it (an
+	 * overhead is reasonable to have in exchange of higher hugepages
+	 * coverage).  When peak demand tracking is off, projection of future
+	 * active memory is active memory we are having right now.
+	 *
+	 * Estimation is essentially the same as nactive_max * (1 +
+	 * dirty_mult), but expressed differently to factor in necessary
+	 * implementation details.
+	 */
+	size_t nactive = psset_nactive(&shard->psset);
+	size_t nactive_max = nactive;
+	if (hpa_peak_demand_tracking_enabled(shard)) {
+		/*
+		 * We release shard->mtx, when we do a syscall to purge dirty
+		 * memory, so someone might grab shard->mtx, allocate memory
+		 * from this shard and update psset's nactive counter, before
+		 * peak_demand_update(...) was called and we'll get
+		 * peak_demand_nactive_max(...) <= nactive as a result.
+		 */
+		size_t peak = peak_demand_nactive_max(&shard->peak_demand);
+		if (peak > nactive_max) {
+			nactive_max = peak;
+		}
+	}
+	size_t slack = fxp_mul_frac(nactive_max, shard->opts.dirty_mult);
+	size_t estimation = nactive_max + slack;
+	return estimation - nactive;
 }
 
 static bool
@@ -548,6 +587,16 @@ static void
 hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard,
     bool forced) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+
+	/* Update active memory demand statistics. */
+	if (hpa_peak_demand_tracking_enabled(shard)) {
+		nstime_t now;
+		shard->central->hooks.curtime(&now,
+		    /* first_reading */ true);
+		peak_demand_update(&shard->peak_demand, &now,
+		    psset_nactive(&shard->psset));
+	}
+
 	if (!forced && shard->opts.deferral_allowed) {
 		return;
 	}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 9f4bc785..d08771f8 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1568,6 +1568,11 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    opt_hpa_opts.experimental_max_purge_nhp,
 			    "experimental_hpa_max_purge_nhp", -1, SSIZE_MAX);
 
+			CONF_HANDLE_UINT64_T(
+			    opt_hpa_opts.peak_demand_window_ms,
+			    "hpa_peak_demand_window_ms", 0, 0,
+			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false);
+
 			if (CONF_MATCH("hpa_dirty_mult")) {
 				if (CONF_MATCH_VALUE("-1")) {
 					opt_hpa_opts.dirty_mult = (fxp_t)-1;
diff --git a/src/peak_demand.c b/src/peak_demand.c
new file mode 100644
index 00000000..49f28930
--- /dev/null
+++ b/src/peak_demand.c
@@ -0,0 +1,74 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/peak_demand.h"
+
+void
+peak_demand_init(peak_demand_t *peak_demand, uint64_t interval_ms) {
+	assert(interval_ms > 0);
+	peak_demand->epoch = 0;
+	uint64_t interval_ns = interval_ms * 1000 * 1000;
+	peak_demand->epoch_interval_ns = interval_ns / PEAK_DEMAND_NBUCKETS;
+	memset(peak_demand->nactive_max, 0, sizeof(peak_demand->nactive_max));
+}
+
+static uint64_t
+peak_demand_epoch_ind(peak_demand_t *peak_demand) {
+	return peak_demand->epoch % PEAK_DEMAND_NBUCKETS;
+}
+
+static nstime_t
+peak_demand_next_epoch_advance(peak_demand_t *peak_demand) {
+	uint64_t epoch = peak_demand->epoch;
+	uint64_t ns = (epoch + 1) * peak_demand->epoch_interval_ns;
+	nstime_t next;
+	nstime_init(&next, ns);
+	return next;
+}
+
+static uint64_t
+peak_demand_maybe_advance_epoch(peak_demand_t *peak_demand,
+    const nstime_t *now) {
+	nstime_t next_epoch_advance =
+	    peak_demand_next_epoch_advance(peak_demand);
+	if (nstime_compare(now, &next_epoch_advance) < 0) {
+		return peak_demand_epoch_ind(peak_demand);
+	}
+	uint64_t next_epoch = nstime_ns(now) / peak_demand->epoch_interval_ns;
+	assert(next_epoch > peak_demand->epoch);
+	/*
+	 * If we missed more epochs, than capacity of circular buffer
+	 * (PEAK_DEMAND_NBUCKETS), re-write no more than PEAK_DEMAND_NBUCKETS
+	 * items as we don't want to zero out same item multiple times.
+	 */
+	if (peak_demand->epoch + PEAK_DEMAND_NBUCKETS < next_epoch) {
+		peak_demand->epoch = next_epoch - PEAK_DEMAND_NBUCKETS;
+	}
+	while (peak_demand->epoch < next_epoch) {
+		++peak_demand->epoch;
+		uint64_t ind = peak_demand_epoch_ind(peak_demand);
+		peak_demand->nactive_max[ind] = 0;
+	}
+	return peak_demand_epoch_ind(peak_demand);
+}
+
+void
+peak_demand_update(peak_demand_t *peak_demand, const nstime_t *now,
+    size_t nactive) {
+	uint64_t ind = peak_demand_maybe_advance_epoch(peak_demand, now);
+	size_t *epoch_nactive = &peak_demand->nactive_max[ind];
+	if (nactive > *epoch_nactive) {
+		*epoch_nactive = nactive;
+	}
+}
+
+size_t
+peak_demand_nactive_max(peak_demand_t *peak_demand) {
+	size_t nactive_max = peak_demand->nactive_max[0];
+	for (int i = 1; i < PEAK_DEMAND_NBUCKETS; ++i) {
+		if (peak_demand->nactive_max[i] > nactive_max) {
+			nactive_max = peak_demand->nactive_max[i];
+		}
+	}
+	return nactive_max;
+}
diff --git a/src/stats.c b/src/stats.c
index 58874bf8..bd0167fb 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1657,6 +1657,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_BOOL("hpa_hugify_sync")
 	OPT_WRITE_UINT64("hpa_min_purge_interval_ms")
 	OPT_WRITE_SSIZE_T("experimental_hpa_max_purge_nhp")
+	OPT_WRITE_UINT64("hpa_peak_demand_window_ms")
 	if (je_mallctl("opt.hpa_dirty_mult", (void *)&u32v, &u32sz, NULL, 0)
 	    == 0) {
 		/*
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 6c42729a..ceed9bd8 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -37,26 +37,9 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
 	/* min_purge_interval_ms */
 	5 * 1000,
 	/* experimental_max_purge_nhp */
-	-1
-};
-
-static hpa_shard_opts_t test_hpa_shard_opts_purge = {
-	/* slab_max_alloc */
-	HUGEPAGE,
-	/* hugification_threshold */
-	0.9 * HUGEPAGE,
-	/* dirty_mult */
-	FXP_INIT_PERCENT(11),
-	/* deferral_allowed */
-	true,
-	/* hugify_delay_ms */
-	0,
-	/* hugify_sync */
-	false,
-	/* min_purge_interval_ms */
-	5 * 1000,
-	/* experimental_max_purge_nhp */
-	-1
+	-1,
+	/* peak_demand_window_ms */
+	0
 };
 
 static hpa_shard_t *
@@ -480,8 +463,14 @@ TEST_END
 TEST_BEGIN(test_purge_no_infinite_loop) {
 	test_skip_if(!hpa_supported());
 
-	hpa_shard_t *shard = create_test_data(&hpa_hooks_default,
-	    &test_hpa_shard_opts_purge);
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.slab_max_alloc = HUGEPAGE;
+	opts.hugification_threshold = 0.9 * HUGEPAGE;
+	opts.dirty_mult = FXP_INIT_PERCENT(11);
+	opts.deferral_allowed = true;
+	opts.hugify_delay_ms = 0;
+
+	hpa_shard_t *shard = create_test_data(&hpa_hooks_default, &opts);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 
 	/*
@@ -489,8 +478,7 @@ TEST_BEGIN(test_purge_no_infinite_loop) {
 	 * criteria for huge page and at the same time do not allow hugify page
 	 * without triggering a purge.
 	 */
-	const size_t npages =
-	    test_hpa_shard_opts_purge.hugification_threshold / PAGE + 1;
+	const size_t npages = opts.hugification_threshold / PAGE + 1;
 	const size_t size = npages * PAGE;
 
 	bool deferred_work_generated = false;
@@ -733,6 +721,140 @@ TEST_BEGIN(test_experimental_max_purge_nhp) {
 }
 TEST_END
 
+TEST_BEGIN(test_demand_purge_slack) {
+	test_skip_if(!hpa_supported());
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+	/* Allow 10% of slack. */
+	opts.dirty_mult = FXP_INIT_PERCENT(10);
+	/* Peak demand sliding window duration is 10 seconds. */
+	opts.peak_demand_window_ms = 10 * 1000;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+
+	bool deferred_work_generated = false;
+
+	nstime_init(&defer_curtime, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum {NALLOCS = 16 * HUGEPAGE_PAGES};
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+
+	/* Deallocate 5 hugepages out of 16. */
+	for (int i = 0; i < 5 * (int)HUGEPAGE_PAGES; i++) {
+		pai_dalloc(tsdn, &shard->pai, edatas[i],
+		    &deferred_work_generated);
+	}
+	nstime_init2(&defer_curtime, 6, 0);
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+	/*
+	 * Peak demand within sliding window is 16 hugepages, so we don't need
+	 * to purge anything just yet.
+	 */
+	expect_zu_eq(0, ndefer_purge_calls, "Purged too early");
+
+	nstime_init2(&defer_curtime, 12, 0);
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	expect_zu_eq(11, ndefer_hugify_calls, "Expect hugification");
+	ndefer_hugify_calls = 0;
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+	/*
+	 * 12 seconds passed now, peak demand is 11 hugepages, we allowed to
+	 * keep 11 * 0.1 (hpa_dirty_mult) = 1.1 dirty hugepages, but we
+	 * have 5 dirty hugepages, so we should purge 4 of them.
+	 */
+	expect_zu_eq(4, ndefer_purge_calls, "Expect purges");
+	ndefer_purge_calls = 0;
+
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_demand_purge_tight) {
+	test_skip_if(!hpa_supported());
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+	/* No slack allowed. */
+	opts.dirty_mult = FXP_INIT_PERCENT(0);
+	/* Peak demand sliding window duration is 10 seconds. */
+	opts.peak_demand_window_ms = 10 * 1000;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+
+	bool deferred_work_generated = false;
+
+	nstime_init(&defer_curtime, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum {NALLOCS = 16 * HUGEPAGE_PAGES};
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+
+	/* Deallocate 5 hugepages out of 16. */
+	for (int i = 0; i < 5 * (int)HUGEPAGE_PAGES; i++) {
+		pai_dalloc(tsdn, &shard->pai, edatas[i],
+		    &deferred_work_generated);
+	}
+	nstime_init2(&defer_curtime, 6, 0);
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+	/*
+	 * Peak demand within sliding window is 16 hugepages, to purge anything
+	 * just yet.
+	 */
+	expect_zu_eq(0, ndefer_purge_calls, "Purged too early");
+
+	nstime_init2(&defer_curtime, 12, 0);
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	expect_zu_eq(11, ndefer_hugify_calls, "Expect hugification");
+	ndefer_hugify_calls = 0;
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+	/*
+	 * 12 seconds passed now, peak demand is 11 hugepages.  We have
+	 * hpa_dirty_mult = 0, so we allowed to keep 11 * 0 = 0 dirty
+	 * hugepages, but we have 5, all of them should be purged.
+	 */
+	expect_zu_eq(5, ndefer_purge_calls, "Expect purges");
+	ndefer_purge_calls = 0;
+
+	destroy_test_data(shard);
+}
+TEST_END
+
 int
 main(void) {
 	/*
@@ -756,5 +878,7 @@ main(void) {
 	    test_no_min_purge_interval,
 	    test_min_purge_interval,
 	    test_purge,
-	    test_experimental_max_purge_nhp);
+	    test_experimental_max_purge_nhp,
+	    test_demand_purge_slack,
+	    test_demand_purge_tight);
 }
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 57aa59e5..366b992b 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -295,6 +295,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(size_t, hpa_sec_bytes_after_flush, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_batch_fill_extra, always);
 	TEST_MALLCTL_OPT(ssize_t, experimental_hpa_max_purge_nhp, always);
+	TEST_MALLCTL_OPT(uint64_t, hpa_peak_demand_window_ms, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);
 	TEST_MALLCTL_OPT(const char *, percpu_arena, always);
 	TEST_MALLCTL_OPT(size_t, oversize_threshold, always);
diff --git a/test/unit/peak_demand.c b/test/unit/peak_demand.c
new file mode 100644
index 00000000..ca2506b8
--- /dev/null
+++ b/test/unit/peak_demand.c
@@ -0,0 +1,162 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/peak_demand.h"
+
+TEST_BEGIN(test_peak_demand_init) {
+	peak_demand_t peak_demand;
+	/*
+	 * Exact value doesn't matter here as we don't advance epoch in this
+	 * test.
+	 */
+	uint64_t interval_ms = 1000;
+	peak_demand_init(&peak_demand, interval_ms);
+
+	expect_zu_eq(peak_demand_nactive_max(&peak_demand), 0,
+	    "Unexpected ndirty_max value after initialization");
+}
+TEST_END
+
+TEST_BEGIN(test_peak_demand_update_basic) {
+	peak_demand_t peak_demand;
+	/* Make each bucket exactly one second to simplify math. */
+	uint64_t interval_ms = 1000 * PEAK_DEMAND_NBUCKETS;
+	peak_demand_init(&peak_demand, interval_ms);
+
+	nstime_t now;
+
+	nstime_init2(&now, /* sec */ 0, /* nsec */ 0);
+	peak_demand_update(&peak_demand, &now, /* nactive */ 1024);
+
+	nstime_init2(&now, /* sec */ 1, /* nsec */ 0);
+	peak_demand_update(&peak_demand, &now, /* nactive */ 512);
+
+	nstime_init2(&now, /* sec */ 2, /* nsec */ 0);
+	peak_demand_update(&peak_demand, &now, /* nactive */ 256);
+
+	expect_zu_eq(peak_demand_nactive_max(&peak_demand), 1024, "");
+}
+TEST_END
+
+TEST_BEGIN(test_peak_demand_update_skip_epochs) {
+	peak_demand_t peak_demand;
+	uint64_t interval_ms = 1000 * PEAK_DEMAND_NBUCKETS;
+	peak_demand_init(&peak_demand, interval_ms);
+
+	nstime_t now;
+
+	nstime_init2(&now, /* sec */ 0, /* nsec */ 0);
+	peak_demand_update(&peak_demand, &now, /* nactive */ 1024);
+
+	nstime_init2(&now, /* sec */ PEAK_DEMAND_NBUCKETS - 1, /* nsec */ 0);
+	peak_demand_update(&peak_demand, &now, /* nactive */ 512);
+
+	nstime_init2(&now, /* sec */ 2 * (PEAK_DEMAND_NBUCKETS - 1),
+	    /* nsec */ 0);
+	peak_demand_update(&peak_demand, &now, /* nactive */ 256);
+
+	/*
+	 * Updates are not evenly spread over time.  When we update at
+	 * 2 * (PEAK_DEMAND_NBUCKETS - 1) second, 1024 value is already out of
+	 * sliding window, but 512 is still present.
+	 */
+	expect_zu_eq(peak_demand_nactive_max(&peak_demand), 512, "");
+}
+TEST_END
+
+TEST_BEGIN(test_peak_demand_update_rewrite_optimization) {
+	peak_demand_t peak_demand;
+	uint64_t interval_ms = 1000 * PEAK_DEMAND_NBUCKETS;
+	peak_demand_init(&peak_demand, interval_ms);
+
+	nstime_t now;
+
+	nstime_init2(&now, /* sec */ 0, /* nsec */ 0);
+	peak_demand_update(&peak_demand, &now, /* nactive */ 1024);
+
+	nstime_init2(&now, /* sec */ 0, /* nsec */ UINT64_MAX);
+	/*
+	 * This update should take reasonable time if optimization is working
+	 * correctly, otherwise we'll loop from 0 to UINT64_MAX and this test
+	 * will take a long time to finish.
+	 */
+	peak_demand_update(&peak_demand, &now, /* nactive */ 512);
+
+	expect_zu_eq(peak_demand_nactive_max(&peak_demand), 512, "");
+}
+TEST_END
+
+TEST_BEGIN(test_peak_demand_update_out_of_interval) {
+	peak_demand_t peak_demand;
+	uint64_t interval_ms = 1000 * PEAK_DEMAND_NBUCKETS;
+	peak_demand_init(&peak_demand, interval_ms);
+
+	nstime_t now;
+
+	nstime_init2(&now, /* sec */ 0 * PEAK_DEMAND_NBUCKETS, /* nsec */ 0);
+	peak_demand_update(&peak_demand, &now, /* nactive */ 1024);
+
+	nstime_init2(&now, /* sec */ 1 * PEAK_DEMAND_NBUCKETS, /* nsec */ 0);
+	peak_demand_update(&peak_demand, &now, /* nactive */ 512);
+
+	nstime_init2(&now, /* sec */ 2 * PEAK_DEMAND_NBUCKETS, /* nsec */ 0);
+	peak_demand_update(&peak_demand, &now, /* nactive */ 256);
+
+	/*
+	 * Updates frequency is lower than tracking interval, so we should
+	 * have only last value.
+	 */
+	expect_zu_eq(peak_demand_nactive_max(&peak_demand), 256, "");
+}
+TEST_END
+
+TEST_BEGIN(test_peak_demand_update_static_epoch) {
+	peak_demand_t peak_demand;
+	uint64_t interval_ms = 1000 * PEAK_DEMAND_NBUCKETS;
+	peak_demand_init(&peak_demand, interval_ms);
+
+	nstime_t now;
+	nstime_init_zero(&now);
+
+	/* Big enough value to overwrite values in circular buffer. */
+	size_t nactive_max = 2 * PEAK_DEMAND_NBUCKETS;
+	for (size_t nactive = 0; nactive <= nactive_max; ++nactive) {
+		/*
+		 * We should override value in the same bucket as now value
+		 * doesn't change between iterations.
+		 */
+		peak_demand_update(&peak_demand, &now, nactive);
+	}
+
+	expect_zu_eq(peak_demand_nactive_max(&peak_demand), nactive_max, "");
+}
+TEST_END
+
+TEST_BEGIN(test_peak_demand_update_epoch_advance) {
+	peak_demand_t peak_demand;
+	uint64_t interval_ms = 1000 * PEAK_DEMAND_NBUCKETS;
+	peak_demand_init(&peak_demand, interval_ms);
+
+	nstime_t now;
+	/* Big enough value to overwrite values in circular buffer. */
+	size_t nactive_max = 2 * PEAK_DEMAND_NBUCKETS;
+	for (size_t nactive = 0; nactive <= nactive_max; ++nactive) {
+		uint64_t sec = nactive;
+		nstime_init2(&now, sec, /* nsec */ 0);
+		peak_demand_update(&peak_demand, &now, nactive);
+	}
+
+	expect_zu_eq(peak_demand_nactive_max(&peak_demand), nactive_max, "");
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_peak_demand_init,
+	    test_peak_demand_update_basic,
+	    test_peak_demand_update_skip_epochs,
+	    test_peak_demand_update_rewrite_optimization,
+	    test_peak_demand_update_out_of_interval,
+	    test_peak_demand_update_static_epoch,
+	    test_peak_demand_update_epoch_advance);
+}

From 773b5809f9ab3f7c525badbe7587f8ab8ee20d41 Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@meta.com>
Date: Wed, 5 Mar 2025 17:58:20 -0800
Subject: [PATCH 2480/2608] Fix frame pointer based unwinder to handle changing
 stack range

---
 include/jemalloc/internal/prof_sys.h |   2 +-
 src/prof_stack_range.c               | 281 ++++++++++++++-------------
 src/prof_sys.c                       | 117 ++++++++---
 3 files changed, 233 insertions(+), 167 deletions(-)

diff --git a/include/jemalloc/internal/prof_sys.h b/include/jemalloc/internal/prof_sys.h
index 3377ba92..42284b38 100644
--- a/include/jemalloc/internal/prof_sys.h
+++ b/include/jemalloc/internal/prof_sys.h
@@ -20,7 +20,7 @@ void prof_fdump_impl(tsd_t *tsd);
 void prof_idump_impl(tsd_t *tsd);
 bool prof_mdump_impl(tsd_t *tsd, const char *filename);
 void prof_gdump_impl(tsd_t *tsd);
-uintptr_t prof_thread_stack_start(uintptr_t stack_end);
+int prof_thread_stack_range(uintptr_t fp, uintptr_t *low, uintptr_t *high);
 
 /* Used in unit tests. */
 typedef int (prof_sys_thread_name_read_t)(char *buf, size_t limit);
diff --git a/src/prof_stack_range.c b/src/prof_stack_range.c
index 6a99b56f..f5e5c044 100644
--- a/src/prof_stack_range.c
+++ b/src/prof_stack_range.c
@@ -4,158 +4,163 @@
 #include "jemalloc/internal/malloc_io.h"
 #include "jemalloc/internal/prof_sys.h"
 
-#if defined (__linux__) && defined(JEMALLOC_HAVE_GETTID)
+#if defined(__linux__) && defined(JEMALLOC_HAVE_GETTID)
 
-#include <errno.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h> // strtoul
-#include <string.h>
-#include <unistd.h>
+#    include <errno.h>
+#    include <fcntl.h>
+#    include <stdio.h>
+#    include <stdlib.h>  // strtoul
+#    include <string.h>
+#    include <unistd.h>
 
-static int prof_mapping_containing_addr(
-    uintptr_t addr,
-    const char* maps_path,
-    uintptr_t* mm_start,
-    uintptr_t* mm_end) {
-  int ret = ENOENT; // not found
-  *mm_start = *mm_end = 0;
-
-  // Each line of /proc/<pid>/maps is:
-  // <start>-<end> <perms> <offset> <dev> <inode> <pathname>
-  //
-  // The fields we care about are always within the first 34 characters so
-  // as long as `buf` contains the start of a mapping line it can always be
-  // parsed.
-  static const int kMappingFieldsWidth = 34;
-
-  int fd = -1;
-  char buf[4096];
-  ssize_t remaining = 0; // actual number of bytes read to buf
-  char* line = NULL;
-
-  while (1) {
-    if (fd < 0) {
-      // case 0: initial open of maps file
-      fd = malloc_open(maps_path, O_RDONLY);
-      if (fd < 0) {
-        return errno;
-      }
-
-      remaining = malloc_read_fd(fd, buf, sizeof(buf));
-      if (remaining <= 0) {
-        break;
-      }
-      line = buf;
-    } else if (line == NULL) {
-      // case 1: no newline found in buf
-      remaining = malloc_read_fd(fd, buf, sizeof(buf));
-      if (remaining <= 0) {
-        break;
-      }
-      line = memchr(buf, '\n', remaining);
-      if (line != NULL) {
-        line++; // advance to character after newline
-        remaining -= (line - buf);
-      }
-    } else if (line != NULL && remaining < kMappingFieldsWidth) {
-      // case 2: found newline but insufficient characters remaining in buf
-
-      // fd currently points to the character immediately after the last
-      // character in buf. Seek fd to the character after the newline.
-      if (malloc_lseek(fd, -remaining, SEEK_CUR) == -1) {
-        ret = errno;
-        break;
-      }
-
-      remaining = malloc_read_fd(fd, buf, sizeof(buf));
-      if (remaining <= 0) {
-        break;
-      }
-      line = buf;
-    } else {
-      // case 3: found newline and sufficient characters to parse
-
-      // parse <start>-<end>
-      char* tmp = line;
-      uintptr_t start_addr = strtoul(tmp, &tmp, 16);
-      if (addr >= start_addr) {
-        tmp++; // advance to character after '-'
-        uintptr_t end_addr = strtoul(tmp, &tmp, 16);
-        if (addr < end_addr) {
-          *mm_start = start_addr;
-          *mm_end = end_addr;
-          ret = 0;
-          break;
+/*
+ * Converts a string representing a hexadecimal number to an unsigned long long
+ * integer. Functionally equivalent to strtoull() (for base 16) but faster for
+ * that case.
+ *
+ * @param nptr Pointer to the string to be converted.
+ * @param endptr Pointer to a pointer to character, which will be set to the
+ * character in `nptr` where parsing stopped. Can be NULL.
+ * @return The converted unsigned long long integer value.
+ */
+static inline unsigned long long int
+strtoull_hex(const char *nptr, char **endptr) {
+    unsigned long long int val = 0;
+    int ii = 0;
+    for (; ii < 16; ++ii) {
+        char c = nptr[ii];
+        if (c >= '0' && c <= '9') {
+            val = (val << 4) + (c - '0');
+        } else if (c >= 'a' && c <= 'f') {
+            val = (val << 4) + (c - 'a' + 10);
+        } else {
+            break;
         }
-      }
-
-      // Advance to character after next newline in the current buf.
-      char* prev_line = line;
-      line = memchr(line, '\n', remaining);
-      if (line != NULL) {
-        line++; // advance to character after newline
-        remaining -= (line - prev_line);
-      }
     }
-  }
-
-  malloc_close(fd);
-  return ret;
+    if (endptr) {
+        *endptr = (char *)(nptr + ii);
+    }
+    return val;
 }
 
-static uintptr_t prof_main_thread_stack_start(const char* stat_path) {
-  uintptr_t stack_start = 0;
+static int
+prof_mapping_containing_addr(uintptr_t addr, const char *maps_path,
+  uintptr_t *mm_start, uintptr_t *mm_end) {
+    int ret = ENOENT; /* not found */
+    *mm_start = *mm_end = 0;
 
-  int fd = malloc_open(stat_path, O_RDONLY);
-  if (fd < 0) {
-    return 0;
-  }
+    /*
+     * Each line of /proc/<pid>/maps is:
+     * <start>-<end> <perms> <offset> <dev> <inode> <pathname>
+     *
+     * The fields we care about are always within the first 34 characters so
+     * as long as `buf` contains the start of a mapping line it can always be
+     * parsed.
+     */
+    static const int kMappingFieldsWidth = 34;
 
-  char buf[512];
-  ssize_t n = malloc_read_fd(fd, buf, sizeof(buf) - 1);
-  if (n >= 0) {
-    buf[n] = '\0';
-    if (sscanf(
-            buf,
-            "%*d (%*[^)]) %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %*u %*u %*d %*d %*d %*d %*d %*d %*u %*u %*d %*u %*u %*u %"FMTuPTR,
-            &stack_start) != 1) {
+    int fd = -1;
+    char buf[4096];
+    ssize_t remaining = 0; /* actual number of bytes read to buf */
+    char *line = NULL;
+
+    while (1) {
+        if (fd < 0) {
+            /* case 0: initial open of maps file */
+            fd = malloc_open(maps_path, O_RDONLY);
+            if (fd < 0) {
+                return errno;
+            }
+
+            remaining = malloc_read_fd(fd, buf, sizeof(buf));
+            if (remaining <= 0) {
+                ret = errno;
+                break;
+            }
+            line = buf;
+        } else if (line == NULL) {
+            /* case 1: no newline found in buf */
+            remaining = malloc_read_fd(fd, buf, sizeof(buf));
+            if (remaining <= 0) {
+                ret = errno;
+                break;
+            }
+            line = memchr(buf, '\n', remaining);
+            if (line != NULL) {
+                line++;  /* advance to character after newline */
+                remaining -= (line - buf);
+            }
+        } else if (line != NULL && remaining < kMappingFieldsWidth) {
+            /*
+             * case 2: found newline but insufficient characters remaining in
+             * buf
+             */
+            memcpy(buf, line,
+              remaining);  /* copy remaining characters to start of buf */
+            line = buf;
+
+            size_t count =
+              malloc_read_fd(fd, buf + remaining, sizeof(buf) - remaining);
+            if (count <= 0) {
+                ret = errno;
+                break;
+            }
+
+            remaining += count;  /* actual number of bytes read to buf */
+        } else {
+            /* case 3: found newline and sufficient characters to parse */
+
+            /* parse <start>-<end> */
+            char *tmp = line;
+            uintptr_t start_addr = (uintptr_t)strtoull_hex(tmp, &tmp);
+            if (addr >= start_addr) {
+                tmp++;  /* advance to character after '-' */
+                uintptr_t end_addr = (uintptr_t)strtoull_hex(tmp, NULL);
+                if (addr < end_addr) {
+                    *mm_start = start_addr;
+                    *mm_end = end_addr;
+                    ret = 0;
+                    break;
+                }
+            }
+
+            /* Advance to character after next newline in the current buf. */
+            char *prev_line = line;
+            line = memchr(line, '\n', remaining);
+            if (line != NULL) {
+                line++;  /* advance to character after newline */
+                remaining -= (line - prev_line);
+            }
+        }
     }
-  }
-  malloc_close(fd);
-  return stack_start;
+
+    malloc_close(fd);
+    return ret;
 }
 
-uintptr_t prof_thread_stack_start(uintptr_t stack_end) {
-  pid_t pid = getpid();
-  pid_t tid = gettid();
-  if (pid == tid) {
-    char stat_path[32]; // "/proc/<pid>/stat"
-    malloc_snprintf(stat_path, sizeof(stat_path), "/proc/%d/stat", pid);
-    return prof_main_thread_stack_start(stat_path);
-  } else {
-    // NOTE: Prior to kernel 4.5 an entry for every thread stack was included in
-    // /proc/<pid>/maps as [STACK:<tid>]. Starting with kernel 4.5 only the main
-    // thread stack remains as the [stack] mapping. For other thread stacks the
-    // mapping is still visible in /proc/<pid>/task/<tid>/maps (though not
-    // labeled as [STACK:tid]).
-    // https://lists.ubuntu.com/archives/kernel-team/2016-March/074681.html
-    char maps_path[64]; // "/proc/<pid>/task/<tid>/maps"
-    malloc_snprintf(maps_path, sizeof(maps_path), "/proc/%d/task/%d/maps", pid, tid);
-
-    uintptr_t mm_start, mm_end;
-    if (prof_mapping_containing_addr(
-            stack_end, maps_path, &mm_start, &mm_end) != 0) {
-      return 0;
-    }
-    return mm_end;
-  }
+int
+prof_thread_stack_range(uintptr_t fp, uintptr_t *low, uintptr_t *high) {
+    /*
+     * NOTE: Prior to kernel 4.5 an entry for every thread stack was included in
+     * /proc/<pid>/maps as [STACK:<tid>]. Starting with kernel 4.5 only the main
+     * thread stack remains as the [stack] mapping. For other thread stacks the
+     * mapping is still visible in /proc/<pid>/task/<tid>/maps (though not
+     * labeled as [STACK:tid]).
+     * https://lists.ubuntu.com/archives/kernel-team/2016-March/074681.html
+    */
+    char maps_path[64];  // "/proc/<pid>/task/<tid>/maps"
+    malloc_snprintf(maps_path, sizeof(maps_path), "/proc/%d/task/%d/maps",
+      getpid(), gettid());
+    return prof_mapping_containing_addr(fp, maps_path, low, high);
 }
 
 #else
 
-uintptr_t prof_thread_stack_start(UNUSED uintptr_t stack_end) {
-  return 0;
+int
+prof_thread_stack_range(
+  UNUSED uintptr_t addr, uintptr_t *stack_start, uintptr_t *stack_end) {
+    *stack_start = *stack_end = 0;
+    return ENOENT;
 }
 
-#endif // __linux__
+#endif  // __linux__
diff --git a/src/prof_sys.c b/src/prof_sys.c
index f0bc8b4b..642d8c89 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -23,6 +23,11 @@
 #define _Unwind_Backtrace JEMALLOC_TEST_HOOK(_Unwind_Backtrace, test_hooks_libc_hook)
 #endif
 
+#ifdef JEMALLOC_PROF_FRAME_POINTER
+// execinfo backtrace() as fallback unwinder
+#include <execinfo.h>
+#endif
+
 /******************************************************************************/
 
 malloc_mutex_t prof_dump_filename_mtx;
@@ -102,41 +107,97 @@ prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 #elif (defined(JEMALLOC_PROF_FRAME_POINTER))
 JEMALLOC_DIAGNOSTIC_PUSH
 JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS
+
+struct stack_range {
+	uintptr_t start;
+	uintptr_t end;
+};
+
+struct thread_unwind_info {
+	struct stack_range stack_range;
+	bool fallback;
+};
+static __thread struct thread_unwind_info unwind_info = {
+	.stack_range = {
+		.start = 0,
+		.end = 0,
+	},
+	.fallback = false,
+}; /* thread local */
+
 static void
 prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
-  // stack_start - highest possible valid stack address (assumption: stacks grow downward)
-  //   stack_end - current stack frame and lowest possible valid stack address
-  //               (all earlier frames will be at higher addresses than this)
+	/* fp: 		current stack frame pointer
+	 *
+	 * stack_range:	readable stack memory range for the current thread.
+	 *		Used to validate frame addresses during stack unwinding.
+	 *		For most threads there is a single valid stack range
+	 *		that is fixed at thread creation time.  This may not be
+	 *		the case when folly fibers or boost contexts are used.
+	 *		In those cases fall back to using execinfo backtrace()
+	 *		(DWARF unwind).
+	 */
 
-  // always safe to get the current stack frame address
-  void** stack_end = (void**)__builtin_frame_address(0);
-  if (stack_end == NULL) {
-    *len = 0;
-    return;
-  }
+	/* always safe to get the current stack frame address */
+	uintptr_t fp = (uintptr_t)__builtin_frame_address(0);
 
-  static __thread void **stack_start = (void **)0;  // thread local
-  if (stack_start == 0 || stack_end >= stack_start) {
-    stack_start = (void**)prof_thread_stack_start((uintptr_t)stack_end);
-  }
+	/* new thread - get the stack range */
+	if (!unwind_info.fallback &&
+	    unwind_info.stack_range.start == unwind_info.stack_range.end) {
+		if (prof_thread_stack_range(fp, &unwind_info.stack_range.start,
+		    &unwind_info.stack_range.end) != 0) {
+			unwind_info.fallback = true;
+		} else {
+			assert(fp >= unwind_info.stack_range.start
+			    && fp < unwind_info.stack_range.end);
+		}
+	}
 
-  if (stack_start == 0 || stack_end >= stack_start) {
-    *len = 0;
-    return;
-  }
+	if (unwind_info.fallback) {
+		goto label_fallback;
+	}
 
-  unsigned ii = 0;
-  void** fp = (void**)stack_end;
-  while (fp < stack_start && ii < max_len) {
-    vec[ii++] = fp[1];
-    void** fp_prev = fp;
-    fp = fp[0];
-    if (unlikely(fp <= fp_prev)) { // sanity check forward progress
-      break;
-    }
-  }
-  *len = ii;
+	unsigned ii = 0;
+	while (ii < max_len && fp != 0) {
+		if (fp < unwind_info.stack_range.start ||
+		    fp >= unwind_info.stack_range.end) {
+			/*
+			 * Determining the stack range from procfs can be
+			 * relatively expensive especially for programs with
+			 * many threads / shared libraries.  If the stack
+			 * range has changed, it is likely to change again
+			 * in the future (fibers or some other stack
+			 * manipulation).  So fall back to backtrace for this
+			 * thread.
+			 */
+			unwind_info.fallback = true;
+			goto label_fallback;
+		}
+		void* ip = ((void **)fp)[1];
+		if (ip == 0) {
+			break;
+		}
+		vec[ii++] = ip;
+		fp = ((uintptr_t *)fp)[0];
+	}
+	*len = ii;
+	return;
+
+label_fallback:
+	/*
+	 * Using the backtrace from execinfo.h here.  Note that it may get
+	 * redirected to libunwind when a libunwind not built with build-time
+	 * flag --disable-weak-backtrace is linked.
+	 */
+	assert(unwind_info.fallback);
+	int nframes = backtrace(vec, max_len);
+	if (nframes > 0) {
+		*len = nframes;
+	} else {
+		*len = 0;
+	}
 }
+
 JEMALLOC_DIAGNOSTIC_POP
 #elif (defined(JEMALLOC_PROF_GCC))
 JEMALLOC_DIAGNOSTIC_PUSH

From 81f35e0b55c52cb0c3e1171afd477e1cb66fafaf Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@meta.com>
Date: Fri, 7 Mar 2025 11:08:03 -0800
Subject: [PATCH 2481/2608] Modify Travis tests to use frameptr when profiling

---
 .travis.yml           | 42 ++++++++++++++++++++++++++++++++++++++++++
 scripts/gen_travis.py |  8 +++++++-
 2 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index a32755c6..387b36cf 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -61,6 +61,9 @@ jobs:
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -94,6 +97,9 @@ jobs:
     - os: linux
       arch: amd64
       env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+    - os: linux
+      arch: amd64
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
       env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
@@ -124,6 +130,9 @@ jobs:
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -151,6 +160,9 @@ jobs:
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -175,6 +187,9 @@ jobs:
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -196,6 +211,9 @@ jobs:
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -214,6 +232,9 @@ jobs:
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -229,6 +250,9 @@ jobs:
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -241,6 +265,9 @@ jobs:
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -253,6 +280,18 @@ jobs:
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: amd64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
@@ -298,6 +337,9 @@ jobs:
     - os: linux
       arch: arm64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-lg-hugepage=29 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+    - os: linux
+      arch: arm64
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index a49bb83b..6f8ee505 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -265,7 +265,13 @@ def generate_linux(arch):
     if arch != ARM64:
         exclude += [LARGE_HUGEPAGE]
 
-    return generate_jobs(os, arch, exclude, max_unusual_opts)
+    linux_configure_flags = list(configure_flag_unusuals)
+    linux_configure_flags.append(Option.as_configure_flag("--enable-prof --enable-prof-frameptr"))
+
+    linux_unusuals = (compilers_unusual + feature_unusuals
+                    + linux_configure_flags + malloc_conf_unusuals)
+
+    return generate_jobs(os, arch, exclude, max_unusual_opts, linux_unusuals)
 
 
 def generate_macos(arch):

From 86bbabac32775bdf414318e57e626febb9b6eac1 Mon Sep 17 00:00:00 2001
From: Audrey Dutcher <audrey@rhelmot.io>
Date: Thu, 6 Mar 2025 22:24:27 -0700
Subject: [PATCH 2482/2608] background_thread: add fallback for pthread_create
 dlsym

If jemalloc is linked into a shared library, the RTLD_NEXT dlsym call
may fail since RTLD_NEXT is only specified to search all objects after
the current one in the loading order, and the pthread library may be
earlier in the load order. Instead of failing immediately, attempt one
more time to find pthread_create via RTLD_GLOBAL.

Errors cascading from this were observed on FreeBSD 14.1.
---
 src/background_thread.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/background_thread.c b/src/background_thread.c
index 30c3111c..511febac 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -63,6 +63,9 @@ pthread_create_fptr_init(void) {
 	 */
 #ifdef JEMALLOC_HAVE_DLSYM
 	pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
+	if (pthread_create_fptr == NULL) {
+		pthread_create_fptr = dlsym(RTLD_DEFAULT, "pthread_create");
+	}
 #else
 	pthread_create_fptr = NULL;
 #endif

From e1a77ec5583702429fbe7c42e7ad37dfd5517cce Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Wed, 5 Mar 2025 13:00:57 -0800
Subject: [PATCH 2483/2608] Support THP with Huge Arena in PAC

---
 include/jemalloc/internal/arena_externs.h     |  3 +
 .../internal/jemalloc_internal_externs.h      |  2 +
 include/jemalloc/internal/pac.h               | 25 +++++++
 src/arena.c                                   | 19 +++++-
 src/base.c                                    | 46 ++++++++++++-
 src/ctl.c                                     |  3 +
 src/extent.c                                  | 66 +++++++++++++++++--
 src/jemalloc.c                                |  1 +
 src/stats.c                                   |  1 +
 9 files changed, 158 insertions(+), 8 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index e915c97a..8dd5b015 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -30,6 +30,9 @@ extern emap_t arena_emap_global;
 extern size_t opt_oversize_threshold;
 extern size_t oversize_threshold;
 
+extern bool opt_huge_arena_pac_thp;
+extern pac_thp_t huge_arena_pac_thp;
+
 /*
  * arena_bin_offsets[binind] is the offset of the first bin shard for size class
  * binind.
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 8c6df450..83a37baf 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -62,6 +62,8 @@ extern unsigned manual_arena_base;
  */
 extern atomic_p_t arenas[];
 
+extern unsigned huge_arena_ind;
+
 void *a0malloc(size_t size);
 void a0dalloc(void *ptr);
 void *bootstrap_malloc(size_t size);
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index 0b173a58..243e97f3 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -125,6 +125,31 @@ struct pac_s {
 	atomic_zu_t extent_sn_next;
 };
 
+typedef struct pac_thp_s pac_thp_t;
+struct pac_thp_s {
+	/*
+	 * opt_thp controls THP for user requested allocations. Settings
+	 * "always", "never" and "default" are available if THP is supported
+	 * by the OS and the default extent hooks are used:
+	 * - "always" and "never" are convered by pages_set_thp_state() in
+	 *   ehooks_default_alloc_impl().
+	 * - "default" makes no change for all the other auto arenas except
+	 *   the huge arena. For the huge arena, we might also look at
+	 *   opt_metadata_thp to decide whether to use THP or not.
+	 *   This is a temporary remedy before HPA is fully supported.
+	 */
+	bool thp_madvise;
+	/* Below fields are protected by the lock. */
+	malloc_mutex_t lock;
+	bool auto_thp_switched;
+	atomic_u_t n_thp_lazy;
+	/*
+	 * List that tracks HUGEPAGE aligned regions that're lazily hugified
+	 * in auto thp mode.
+	 */
+	edata_list_active_t thp_lazy_list;
+};
+
 bool pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
     edata_cache_t *edata_cache, nstime_t *cur_time, size_t oversize_threshold,
     ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms, pac_stats_t *pac_stats,
diff --git a/src/arena.c b/src/arena.c
index 54ecc403..84d4e14c 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -46,7 +46,15 @@ size_t oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
 
 uint32_t arena_bin_offsets[SC_NBINS];
 
-static unsigned huge_arena_ind;
+/*
+ * a0 is used to handle huge requests before malloc init completes. After
+ * that,the huge_arena_ind is updated to point to the actual huge arena,
+ * which is the last one of the auto arenas.
+ */
+unsigned huge_arena_ind = 0;
+bool opt_huge_arena_pac_thp = false;
+pac_thp_t huge_arena_pac_thp = {.thp_madvise = false,
+    .auto_thp_switched = false, .n_thp_lazy = ATOMIC_INIT(0)};
 
 const arena_config_t arena_config_default = {
 	/* .extent_hooks = */ (extent_hooks_t *)&ehooks_default_extent_hooks,
@@ -1898,6 +1906,7 @@ arena_choose_huge(tsd_t *tsd) {
 bool
 arena_init_huge(arena_t *a0) {
 	bool huge_enabled;
+	assert(huge_arena_ind == 0);
 
 	/* The threshold should be large size class. */
 	if (opt_oversize_threshold > SC_LARGE_MAXCLASS ||
@@ -1908,10 +1917,18 @@ arena_init_huge(arena_t *a0) {
 	} else {
 		/* Reserve the index for the huge arena. */
 		huge_arena_ind = narenas_total_get();
+		assert(huge_arena_ind != 0);
 		oversize_threshold = opt_oversize_threshold;
 		/* a0 init happened before malloc_conf_init. */
 		atomic_store_zu(&a0->pa_shard.pac.oversize_threshold,
 		    oversize_threshold, ATOMIC_RELAXED);
+		/* Initialize huge arena THP settings for PAC. */
+		(&huge_arena_pac_thp)->thp_madvise = opt_huge_arena_pac_thp &&
+		    metadata_thp_enabled() && (opt_thp == thp_mode_default) &&
+		    (init_system_thp_mode == thp_mode_default);
+		malloc_mutex_init(&(&huge_arena_pac_thp)->lock, "pac_thp",
+		    WITNESS_RANK_LEAF, malloc_mutex_rank_exclusive);
+		edata_list_active_init(&(&huge_arena_pac_thp)->thp_lazy_list);
 		huge_enabled = true;
 	}
 
diff --git a/src/base.c b/src/base.c
index ac8598eb..13367697 100644
--- a/src/base.c
+++ b/src/base.c
@@ -55,9 +55,6 @@ base_map(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, size_t size) {
 	}
 	if (ehooks_are_default(ehooks)) {
 		addr = extent_alloc_mmap(NULL, size, alignment, &zero, &commit);
-		if (have_madvise_huge && addr) {
-			pages_set_thp_state(addr, size);
-		}
 	} else {
 		addr = ehooks_alloc(tsdn, ehooks, NULL, size, alignment, &zero,
 		    &commit);
@@ -153,6 +150,40 @@ base_get_num_blocks(base_t *base, bool with_new_block) {
 	return n_blocks;
 }
 
+static void
+huge_arena_auto_thp_switch(tsdn_t *tsdn, pac_thp_t *pac_thp) {
+	assert(opt_huge_arena_pac_thp);
+	assert(!pac_thp->auto_thp_switched);
+
+	arena_t *huge_arena;
+	if (huge_arena_ind == 0 || (huge_arena = arena_get(tsdn, huge_arena_ind,
+	    false)) == NULL) {
+		/* Huge arena hasn't been init yet, simply turn the switch on. */
+		pac_thp->auto_thp_switched = true;
+		return;
+	}
+
+	assert(huge_arena != NULL);
+	edata_list_active_t *pending_list;
+	malloc_mutex_lock(tsdn, &pac_thp->lock);
+	pending_list = &pac_thp->thp_lazy_list;
+	pac_thp->auto_thp_switched = true;
+	malloc_mutex_unlock(tsdn, &pac_thp->lock);
+
+	unsigned cnt = 0;
+	edata_t *edata;
+	ql_foreach(edata, &pending_list->head, ql_link_active) {
+		assert(edata != NULL);
+		void *addr = edata_addr_get(edata);
+		size_t size = edata_size_get(edata);
+		assert(HUGEPAGE_ADDR2BASE(addr) == addr);
+		assert(HUGEPAGE_CEILING(size) == size && size != 0);
+		pages_huge(addr, size);
+		cnt++;
+	}
+	assert(cnt == atomic_load_u(&pac_thp->n_thp_lazy, ATOMIC_RELAXED));
+}
+
 static void
 base_auto_thp_switch(tsdn_t *tsdn, base_t *base) {
 	assert(opt_metadata_thp == metadata_thp_auto);
@@ -187,6 +218,15 @@ base_auto_thp_switch(tsdn_t *tsdn, base_t *base) {
 		block = block->next;
 		assert(block == NULL || (base_ind_get(base) == 0));
 	}
+
+	/* Handle the THP auto switch for the huge arena. */
+	if (!huge_arena_pac_thp.thp_madvise || base_ind_get(base) != 0) {
+		/* Only b0 metadata auto thp switch do the trigger. */
+		return;
+	}
+	malloc_mutex_unlock(tsdn, &base->mtx);
+	huge_arena_auto_thp_switch(tsdn, &huge_arena_pac_thp);
+	malloc_mutex_lock(tsdn, &base->mtx);
 }
 
 static void *
diff --git a/src/ctl.c b/src/ctl.c
index 2c941ae8..1d7eace6 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -113,6 +113,7 @@ CTL_PROTO(opt_hpa_sec_max_alloc)
 CTL_PROTO(opt_hpa_sec_max_bytes)
 CTL_PROTO(opt_hpa_sec_bytes_after_flush)
 CTL_PROTO(opt_hpa_sec_batch_fill_extra)
+CTL_PROTO(opt_huge_arena_pac_thp)
 CTL_PROTO(opt_metadata_thp)
 CTL_PROTO(opt_retain)
 CTL_PROTO(opt_dss)
@@ -498,6 +499,7 @@ static const ctl_named_node_t opt_node[] = {
 		CTL(opt_hpa_sec_bytes_after_flush)},
 	{NAME("hpa_sec_batch_fill_extra"),
 		CTL(opt_hpa_sec_batch_fill_extra)},
+	{NAME("huge_arena_pac_thp"), CTL(opt_huge_arena_pac_thp)},
 	{NAME("metadata_thp"),	CTL(opt_metadata_thp)},
 	{NAME("retain"),	CTL(opt_retain)},
 	{NAME("dss"),		CTL(opt_dss)},
@@ -2277,6 +2279,7 @@ CTL_RO_NL_GEN(opt_hpa_sec_bytes_after_flush, opt_hpa_sec_opts.bytes_after_flush,
 CTL_RO_NL_GEN(opt_hpa_sec_batch_fill_extra, opt_hpa_sec_opts.batch_fill_extra,
     size_t)
 
+CTL_RO_NL_GEN(opt_huge_arena_pac_thp, opt_huge_arena_pac_thp, bool)
 CTL_RO_NL_GEN(opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp],
     const char *)
 CTL_RO_NL_GEN(opt_retain, opt_retain, bool)
diff --git a/src/extent.c b/src/extent.c
index e61b7f9c..86b30f82 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -646,6 +646,55 @@ extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 	return edata;
 }
 
+static void
+extent_handle_huge_arena_thp(tsdn_t *tsdn, pac_thp_t *pac_thp,
+    edata_cache_t *edata_cache, void *addr, size_t size) {
+	assert(opt_huge_arena_pac_thp);
+	assert(opt_metadata_thp != metadata_thp_disabled);
+	/*
+	 * With rounding up the given memory region [addr, addr + size) to
+	 * the huge page region that it crosses boundaries with,
+	 * essentially we're aligning the start addr down and the end addr
+	 * up to the nearest HUGEPAGE boundaries. The memory overhead can
+	 * be within the range of [0, 2 * (HUGEPAGE - 1)].
+	 */
+	void *huge_addr = HUGEPAGE_ADDR2BASE(addr);
+	void *huge_end = HUGEPAGE_ADDR2BASE((void *)((byte_t *)addr +
+	    (uintptr_t)(size + HUGEPAGE - 1)));
+	assert((uintptr_t)huge_end > (uintptr_t)huge_addr);
+
+	size_t huge_size = (uintptr_t)huge_end - (uintptr_t)huge_addr;
+	assert(huge_size <= (size + ((HUGEPAGE - 1) << 1)) &&
+		    huge_size >= size);
+
+	if (opt_metadata_thp == metadata_thp_always ||
+	    pac_thp->auto_thp_switched) {
+		pages_huge(huge_addr, huge_size);
+	} else {
+		assert(opt_metadata_thp == metadata_thp_auto);
+		edata_t *edata = edata_cache_get(tsdn, edata_cache);
+
+		malloc_mutex_lock(tsdn, &pac_thp->lock);
+		/* Can happen if the switch is turned on during edata retrieval. */
+		if (pac_thp->auto_thp_switched) {
+			malloc_mutex_unlock(tsdn, &pac_thp->lock);
+			pages_huge(huge_addr, huge_size);
+			if (edata != NULL) {
+				edata_cache_put(tsdn, edata_cache, edata);
+			}
+		} else {
+			if (edata != NULL) {
+				edata_addr_set(edata, huge_addr);
+				edata_size_set(edata, huge_size);
+				edata_list_active_append(&pac_thp->thp_lazy_list, edata);
+				atomic_fetch_add_u(&pac_thp->n_thp_lazy, 1, ATOMIC_RELAXED);
+			}
+			malloc_mutex_unlock(tsdn, &pac_thp->lock);
+		}
+		malloc_mutex_assert_not_owner(tsdn, &pac_thp->lock);
+	}
+}
+
 /*
  * If virtual memory is retained, create increasingly larger extents from which
  * to split requested extents in order to limit the total number of disjoint
@@ -688,10 +737,10 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		goto label_err;
 	}
 
-	edata_init(edata, ecache_ind_get(&pac->ecache_retained), ptr,
-	    alloc_size, false, SC_NSIZES, extent_sn_next(pac),
-	    extent_state_active, zeroed, committed, EXTENT_PAI_PAC,
-	    EXTENT_IS_HEAD);
+	unsigned ind = ecache_ind_get(&pac->ecache_retained);
+	edata_init(edata, ind, ptr, alloc_size, false, SC_NSIZES,
+	    extent_sn_next(pac), extent_state_active, zeroed, committed,
+	    EXTENT_PAI_PAC, EXTENT_IS_HEAD);
 
 	if (extent_register_no_gdump_add(tsdn, pac, edata)) {
 		edata_cache_put(tsdn, pac->edata_cache, edata);
@@ -767,6 +816,15 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	exp_grow_size_commit(&pac->exp_grow, exp_grow_skip);
 	malloc_mutex_unlock(tsdn, &pac->grow_mtx);
 
+	if (huge_arena_pac_thp.thp_madvise) {
+		/* Avoid using HUGEPAGE when the grow size is less than HUGEPAGE. */
+		if (ind != 0 && ind == huge_arena_ind && ehooks_are_default(ehooks) &&
+		    likely(alloc_size >= HUGEPAGE)) {
+			extent_handle_huge_arena_thp(tsdn, &huge_arena_pac_thp,
+			    pac->edata_cache, ptr, alloc_size);
+		}
+	}
+
 	if (config_prof) {
 		/* Adjust gdump stats now that extent is final size. */
 		extent_gdump_add(tsdn, edata);
diff --git a/src/jemalloc.c b/src/jemalloc.c
index d08771f8..4939d954 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1240,6 +1240,7 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			CONF_HANDLE_BOOL(opt_abort_conf, "abort_conf")
 			CONF_HANDLE_BOOL(opt_cache_oblivious, "cache_oblivious")
 			CONF_HANDLE_BOOL(opt_trust_madvise, "trust_madvise")
+			CONF_HANDLE_BOOL(opt_huge_arena_pac_thp, "huge_arena_pac_thp")
 			if (strncmp("metadata_thp", k, klen) == 0) {
 				int m;
 				bool match = false;
diff --git a/src/stats.c b/src/stats.c
index bd0167fb..6e77977f 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1681,6 +1681,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_SIZE_T("hpa_sec_max_bytes")
 	OPT_WRITE_SIZE_T("hpa_sec_bytes_after_flush")
 	OPT_WRITE_SIZE_T("hpa_sec_batch_fill_extra")
+	OPT_WRITE_BOOL("huge_arena_pac_thp")
 	OPT_WRITE_CHAR_P("metadata_thp")
 	OPT_WRITE_INT64("mutex_max_spin")
 	OPT_WRITE_BOOL_MUTABLE("background_thread", "background_thread")

From a4defdb85434c2027c45c956f4d6d333997a1b50 Mon Sep 17 00:00:00 2001
From: Jay Lee <BusyJayLee@gmail.com>
Date: Fri, 7 Feb 2025 14:12:38 +0800
Subject: [PATCH 2484/2608] detect false failure of strerror_r

See tikv/jemallocator#108.

In a summary, test on `strerror_r` can fail due to reasons other
than `strerror_r` itself, so add an additional test to determine
the failure is expected.

Signed-off-by: Jay Lee <BusyJayLee@gmail.com>
---
 configure.ac | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/configure.ac b/configure.ac
index eb500db9..1c9c5067 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2835,9 +2835,19 @@ JE_COMPILABLE([strerror_r returns char with gnu source], [
   char *error = strerror_r(EINVAL, buffer, 100);
   printf("%s\n", error);
 ], [je_cv_strerror_r_returns_char_with_gnu_source])
+if test "x${je_cv_strerror_r_returns_char_with_gnu_source}" = "xno" ; then
+  JE_COMPILABLE([strerror_r header only], [
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+], [], [je_cv_strerror_r_header_pass])
+fi
 JE_CFLAGS_RESTORE()
 if test "x${je_cv_strerror_r_returns_char_with_gnu_source}" = "xyes" ; then
   AC_DEFINE([JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE], [ ], [ ])
+elif test "x${je_cv_strerror_r_header_pass}" = "xno" ; then
+  AC_MSG_ERROR([cannot determine return type of strerror_r])
 fi
 
 dnl ============================================================================

From 3688dfb5c3b7d94a12e18b753c0fc9c405b77b1f Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Wed, 19 Mar 2025 17:26:33 -0700
Subject: [PATCH 2485/2608] fix assertion error in huge_arena_auto_thp_switch()
 when b0 is deleted in unit test

---
 include/jemalloc/internal/arena_externs.h |  2 +-
 src/arena.c                               |  9 +++++--
 src/base.c                                | 30 ++++++++++++++++-------
 src/jemalloc.c                            |  6 ++---
 4 files changed, 32 insertions(+), 15 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 8dd5b015..91fed258 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -106,7 +106,7 @@ unsigned arena_nthreads_get(arena_t *arena, bool internal);
 void arena_nthreads_inc(arena_t *arena, bool internal);
 void arena_nthreads_dec(arena_t *arena, bool internal);
 arena_t *arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config);
-bool arena_init_huge(arena_t *a0);
+bool arena_init_huge(tsdn_t *tsdn, arena_t *a0);
 arena_t *arena_choose_huge(tsd_t *tsd);
 bin_t *arena_bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
     unsigned *binshard);
diff --git a/src/arena.c b/src/arena.c
index 84d4e14c..0a0c97ef 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1904,7 +1904,7 @@ arena_choose_huge(tsd_t *tsd) {
 }
 
 bool
-arena_init_huge(arena_t *a0) {
+arena_init_huge(tsdn_t *tsdn, arena_t *a0) {
 	bool huge_enabled;
 	assert(huge_arena_ind == 0);
 
@@ -1922,13 +1922,18 @@ arena_init_huge(arena_t *a0) {
 		/* a0 init happened before malloc_conf_init. */
 		atomic_store_zu(&a0->pa_shard.pac.oversize_threshold,
 		    oversize_threshold, ATOMIC_RELAXED);
-		/* Initialize huge arena THP settings for PAC. */
+		/* Initialize huge_arena_pac_thp fields. */
+		base_t *b0 = a0->base;
+		/* Make sure that b0 thp auto-switch won't happen concurrently here. */
+		malloc_mutex_lock(tsdn, &b0->mtx);
 		(&huge_arena_pac_thp)->thp_madvise = opt_huge_arena_pac_thp &&
 		    metadata_thp_enabled() && (opt_thp == thp_mode_default) &&
 		    (init_system_thp_mode == thp_mode_default);
+		(&huge_arena_pac_thp)->auto_thp_switched = b0->auto_thp_switched;
 		malloc_mutex_init(&(&huge_arena_pac_thp)->lock, "pac_thp",
 		    WITNESS_RANK_LEAF, malloc_mutex_rank_exclusive);
 		edata_list_active_init(&(&huge_arena_pac_thp)->thp_lazy_list);
+		malloc_mutex_unlock(tsdn, &b0->mtx);
 		huge_enabled = true;
 	}
 
diff --git a/src/base.c b/src/base.c
index 13367697..52f3d1d3 100644
--- a/src/base.c
+++ b/src/base.c
@@ -153,17 +153,19 @@ base_get_num_blocks(base_t *base, bool with_new_block) {
 static void
 huge_arena_auto_thp_switch(tsdn_t *tsdn, pac_thp_t *pac_thp) {
 	assert(opt_huge_arena_pac_thp);
-	assert(!pac_thp->auto_thp_switched);
-
-	arena_t *huge_arena;
-	if (huge_arena_ind == 0 || (huge_arena = arena_get(tsdn, huge_arena_ind,
-	    false)) == NULL) {
-		/* Huge arena hasn't been init yet, simply turn the switch on. */
-		pac_thp->auto_thp_switched = true;
+#ifdef JEMALLOC_JET
+	if (pac_thp->auto_thp_switched) {
 		return;
 	}
+#else
+	/*
+	 * The switch should be turned on only once when the b0 auto thp switch is
+	 * turned on, unless it's a unit test where b0 gets deleted and then
+	 * recreated.
+	 */
+	assert(!pac_thp->auto_thp_switched);
+#endif
 
-	assert(huge_arena != NULL);
 	edata_list_active_t *pending_list;
 	malloc_mutex_lock(tsdn, &pac_thp->lock);
 	pending_list = &pac_thp->thp_lazy_list;
@@ -221,9 +223,19 @@ base_auto_thp_switch(tsdn_t *tsdn, base_t *base) {
 
 	/* Handle the THP auto switch for the huge arena. */
 	if (!huge_arena_pac_thp.thp_madvise || base_ind_get(base) != 0) {
-		/* Only b0 metadata auto thp switch do the trigger. */
+		/*
+		 * The huge arena THP auto-switch is triggered only by b0 switch,
+		 * provided that the huge arena is initialized. If b0 switch is enabled
+		 * before huge arena is ready, the huge arena switch will be enabled
+		 * during huge_arena_pac_thp initialization.
+		 */
 		return;
 	}
+	/*
+	 * thp_madvise above is by default false and set in arena_init_huge() with
+	 * b0 mtx held. So if we reach here, it means the entire huge_arena_pac_thp
+	 * is initialized and we can safely switch the THP.
+	 */
 	malloc_mutex_unlock(tsdn, &base->mtx);
 	huge_arena_auto_thp_switch(tsdn, &huge_arena_pac_thp);
 	malloc_mutex_lock(tsdn, &base->mtx);
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 4939d954..d7b46d6c 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -2097,7 +2097,7 @@ percpu_arena_as_initialized(percpu_arena_mode_t mode) {
 }
 
 static bool
-malloc_init_narenas(void) {
+malloc_init_narenas(tsdn_t *tsdn) {
 	assert(ncpus > 0);
 
 	if (opt_percpu_arena != percpu_arena_disabled) {
@@ -2164,7 +2164,7 @@ malloc_init_narenas(void) {
 		    narenas_auto);
 	}
 	narenas_total_set(narenas_auto);
-	if (arena_init_huge(a0)) {
+	if (arena_init_huge(tsdn, a0)) {
 		narenas_total_inc();
 	}
 	manual_arena_base = narenas_total_get();
@@ -2248,7 +2248,7 @@ malloc_init_hard(void) {
 	/* Set reentrancy level to 1 during init. */
 	pre_reentrancy(tsd, NULL);
 	/* Initialize narenas before prof_boot2 (for allocation). */
-	if (malloc_init_narenas()
+	if (malloc_init_narenas(tsd_tsdn(tsd))
 	    || background_thread_boot1(tsd_tsdn(tsd), b0get())) {
 		UNLOCK_RETURN(tsd_tsdn(tsd), true, true)
 	}

From 80e9001af33558c4ea991fcf5a715f3a7942a40e Mon Sep 17 00:00:00 2001
From: "Kaspar M. Rohrer" <kaspar.rohrer@gmail.com>
Date: Sat, 29 Mar 2025 23:51:20 +0100
Subject: [PATCH 2486/2608] Move `extern "C" specifications for C++ to where
 they are needed

This should fix errors when compiling C++ code with modules enabled on clang.
---
 include/jemalloc/jemalloc.sh            | 6 ------
 include/jemalloc/jemalloc_protos.h.in   | 8 ++++++++
 include/jemalloc/jemalloc_typedefs.h.in | 8 ++++++++
 3 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/include/jemalloc/jemalloc.sh b/include/jemalloc/jemalloc.sh
index dacd6195..9eaca266 100755
--- a/include/jemalloc/jemalloc.sh
+++ b/include/jemalloc/jemalloc.sh
@@ -6,9 +6,6 @@ cat <<EOF
 #ifndef JEMALLOC_H_
 #define JEMALLOC_H_
 #pragma GCC system_header
-#ifdef __cplusplus
-extern "C" {
-#endif
 
 EOF
 
@@ -21,8 +18,5 @@ for hdr in jemalloc_defs.h jemalloc_rename.h jemalloc_macros.h \
 done
 
 cat <<EOF
-#ifdef __cplusplus
-}
-#endif
 #endif /* JEMALLOC_H_ */
 EOF
diff --git a/include/jemalloc/jemalloc_protos.h.in b/include/jemalloc/jemalloc_protos.h.in
index e474930f..21e4dc57 100644
--- a/include/jemalloc/jemalloc_protos.h.in
+++ b/include/jemalloc/jemalloc_protos.h.in
@@ -3,6 +3,10 @@
  * of namespace management, and should be omitted in application code unless
  * JEMALLOC_NO_DEMANGLE is defined (see jemalloc_mangle@install_suffix@.h).
  */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 extern JEMALLOC_EXPORT const char	*@je_@malloc_conf;
 extern JEMALLOC_EXPORT const char	*@je_@malloc_conf_2_conf_harder;
 extern JEMALLOC_EXPORT void		(*@je_@malloc_message)(void *cbopaque,
@@ -79,3 +83,7 @@ JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
     void JEMALLOC_SYS_NOTHROW	*@je_@pvalloc(size_t size) JEMALLOC_CXX_THROW
     JEMALLOC_ATTR(malloc);
 #endif
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/include/jemalloc/jemalloc_typedefs.h.in b/include/jemalloc/jemalloc_typedefs.h.in
index 1a588743..793ee365 100644
--- a/include/jemalloc/jemalloc_typedefs.h.in
+++ b/include/jemalloc/jemalloc_typedefs.h.in
@@ -1,3 +1,7 @@
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 typedef struct extent_hooks_s extent_hooks_t;
 
 /*
@@ -75,3 +79,7 @@ struct extent_hooks_s {
 	extent_split_t		*split;
 	extent_merge_t		*merge;
 };
+
+#ifdef __cplusplus
+}
+#endif

From f19f49ef3ed34e1a74851f112677a9045a0b15f8 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Wed, 12 Mar 2025 12:26:52 -0700
Subject: [PATCH 2487/2608] if process_madvise is supported, call it when
 purging hpa

---
 Makefile.in                           |   1 +
 include/jemalloc/internal/hpa_hooks.h |   1 +
 src/hpa.c                             |  53 +++++-
 src/hpa_hooks.c                       |  16 +-
 test/unit/hpa.c                       |  58 +++++-
 test/unit/hpa_vectorized_madvise.c    | 258 ++++++++++++++++++++++++++
 test/unit/hpa_vectorized_madvise.sh   |   3 +
 7 files changed, 387 insertions(+), 3 deletions(-)
 create mode 100644 test/unit/hpa_vectorized_madvise.c
 create mode 100644 test/unit/hpa_vectorized_madvise.sh

diff --git a/Makefile.in b/Makefile.in
index b4102d0b..ee3399ec 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -231,6 +231,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/hash.c \
 	$(srcroot)test/unit/hook.c \
 	$(srcroot)test/unit/hpa.c \
+	$(srcroot)test/unit/hpa_vectorized_madvise.c \
 	$(srcroot)test/unit/hpa_background_thread.c \
 	$(srcroot)test/unit/hpdata.c \
 	$(srcroot)test/unit/huge.c \
diff --git a/include/jemalloc/internal/hpa_hooks.h b/include/jemalloc/internal/hpa_hooks.h
index b04b04f6..d0618f89 100644
--- a/include/jemalloc/internal/hpa_hooks.h
+++ b/include/jemalloc/internal/hpa_hooks.h
@@ -13,6 +13,7 @@ struct hpa_hooks_s {
 	void (*dehugify)(void *ptr, size_t size);
 	void (*curtime)(nstime_t *r_time, bool first_reading);
 	uint64_t (*ms_since)(nstime_t *r_time);
+	bool (*vectorized_purge)(void* vec, size_t vlen, size_t nbytes);
 };
 
 extern const hpa_hooks_t hpa_hooks_default;
diff --git a/src/hpa.c b/src/hpa.c
index c01dde13..adb106cc 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -8,6 +8,16 @@
 
 #define HPA_EDEN_SIZE (128 * HUGEPAGE)
 
+#define HPA_MIN_VAR_VEC_SIZE 8
+#ifdef JEMALLOC_HAVE_PROCESS_MADVISE
+typedef struct iovec hpa_io_vector_t;
+#else
+typedef struct {
+	void *iov_base;
+	size_t iov_len;
+} hpa_io_vector_t;
+#endif
+
 static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t alignment, bool zero, bool guarded, bool frequent_reuse,
     bool *deferred_work_generated);
@@ -422,6 +432,24 @@ hpa_shard_has_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
 	return to_hugify != NULL || hpa_should_purge(tsdn, shard);
 }
 
+/* If we fail vectorized purge, we will do single */
+static void
+hpa_try_vectorized_purge(hpa_shard_t *shard, hpa_io_vector_t *vec,
+	size_t vlen, size_t nbytes) {
+	bool success = opt_process_madvise_max_batch > 0
+		&& !shard->central->hooks.vectorized_purge(vec, vlen, nbytes);
+	if (!success) {
+		/* On failure, it is safe to purge again (potential perf
+		 * penalty) If kernel can tell exactly which regions
+		 * failed, we could avoid that penalty.
+		 */
+		for (size_t i = 0; i < vlen; ++i) {
+			shard->central->hooks.purge(vec[i].iov_base,
+				vec[i].iov_len);
+		}
+	}
+}
+
 /* Returns whether or not we purged anything. */
 static bool
 hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
@@ -470,14 +498,37 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	}
 	size_t total_purged = 0;
 	uint64_t purges_this_pass = 0;
+
+	assert(opt_process_madvise_max_batch <=
+		PROCESS_MADVISE_MAX_BATCH_LIMIT);
+	size_t len = opt_process_madvise_max_batch == 0 ?
+		HPA_MIN_VAR_VEC_SIZE : opt_process_madvise_max_batch;
+	VARIABLE_ARRAY(hpa_io_vector_t, vec, len);
+
 	void *purge_addr;
 	size_t purge_size;
+	size_t cur = 0;
+	size_t total_batch_bytes = 0;
 	while (hpdata_purge_next(to_purge, &purge_state, &purge_addr,
 	    &purge_size)) {
+		vec[cur].iov_base = purge_addr;
+		vec[cur].iov_len = purge_size;
 		total_purged += purge_size;
 		assert(total_purged <= HUGEPAGE);
 		purges_this_pass++;
-		shard->central->hooks.purge(purge_addr, purge_size);
+		total_batch_bytes += purge_size;
+		cur++;
+		if (cur == len) {
+			hpa_try_vectorized_purge(shard, vec, len, total_batch_bytes);
+			assert(total_batch_bytes > 0);
+			cur = 0;
+			total_batch_bytes = 0;
+		}
+	}
+
+	/* Batch was not full */
+	if (cur > 0) {
+		hpa_try_vectorized_purge(shard, vec, cur, total_batch_bytes);
 	}
 
 	malloc_mutex_lock(tsdn, &shard->mtx);
diff --git a/src/hpa_hooks.c b/src/hpa_hooks.c
index 4628c14f..072d490e 100644
--- a/src/hpa_hooks.c
+++ b/src/hpa_hooks.c
@@ -10,6 +10,8 @@ static bool hpa_hooks_hugify(void *ptr, size_t size, bool sync);
 static void hpa_hooks_dehugify(void *ptr, size_t size);
 static void hpa_hooks_curtime(nstime_t *r_nstime, bool first_reading);
 static uint64_t hpa_hooks_ms_since(nstime_t *past_nstime);
+static bool hpa_hooks_vectorized_purge(
+	void *vec, size_t vlen, size_t nbytes);
 
 const hpa_hooks_t hpa_hooks_default = {
 	&hpa_hooks_map,
@@ -18,7 +20,8 @@ const hpa_hooks_t hpa_hooks_default = {
 	&hpa_hooks_hugify,
 	&hpa_hooks_dehugify,
 	&hpa_hooks_curtime,
-	&hpa_hooks_ms_since
+	&hpa_hooks_ms_since,
+	&hpa_hooks_vectorized_purge
 };
 
 static void *
@@ -78,3 +81,14 @@ static uint64_t
 hpa_hooks_ms_since(nstime_t *past_nstime) {
 	return nstime_ms_since(past_nstime);
 }
+
+
+/* Return true if we did not purge all nbytes, or on some error */
+static bool
+hpa_hooks_vectorized_purge(void *vec, size_t vlen, size_t nbytes) {
+#ifdef JEMALLOC_HAVE_PROCESS_MADVISE
+    return pages_purge_process_madvise(vec, vlen, nbytes);
+#else
+    return true;
+#endif
+}
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index ceed9bd8..e53ee2ec 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -357,6 +357,16 @@ defer_test_purge(void *ptr, size_t size) {
 	++ndefer_purge_calls;
 }
 
+static bool defer_vectorized_purge_called = false;
+static bool
+defer_vectorized_purge(void *vec, size_t vlen, size_t nbytes) {
+	(void)vec;
+	(void)nbytes;
+	++ndefer_purge_calls;
+	defer_vectorized_purge_called = true;
+	return false;
+}
+
 static size_t ndefer_hugify_calls = 0;
 static bool
 defer_test_hugify(void *ptr, size_t size, bool sync) {
@@ -392,6 +402,7 @@ TEST_BEGIN(test_defer_time) {
 	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
 
 	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
 	opts.deferral_allowed = true;
@@ -506,6 +517,7 @@ TEST_BEGIN(test_no_min_purge_interval) {
 	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
 
 	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
 	opts.deferral_allowed = true;
@@ -548,6 +560,7 @@ TEST_BEGIN(test_min_purge_interval) {
 	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
 
 	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
 	opts.deferral_allowed = true;
@@ -598,6 +611,7 @@ TEST_BEGIN(test_purge) {
 	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
 
 	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
 	opts.deferral_allowed = true;
@@ -664,6 +678,7 @@ TEST_BEGIN(test_experimental_max_purge_nhp) {
 	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
 
 	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
 	opts.deferral_allowed = true;
@@ -732,6 +747,7 @@ TEST_BEGIN(test_demand_purge_slack) {
 	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
 
 	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
 	opts.deferral_allowed = true;
@@ -799,6 +815,7 @@ TEST_BEGIN(test_demand_purge_tight) {
 	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
 
 	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
 	opts.deferral_allowed = true;
@@ -855,6 +872,44 @@ TEST_BEGIN(test_demand_purge_tight) {
 }
 TEST_END
 
+TEST_BEGIN(test_vectorized_opt_eq_zero) {
+    test_skip_if(!hpa_supported() ||
+		(opt_process_madvise_max_batch != 0));
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+	opts.min_purge_interval_ms = 0;
+
+	defer_vectorized_purge_called = false;
+	ndefer_purge_calls = 0;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+	bool deferred_work_generated = false;
+	nstime_init(&defer_curtime, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	edata_t *edata = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		false, false, &deferred_work_generated);
+	expect_ptr_not_null(edata, "Unexpected null edata");
+	pai_dalloc(tsdn, &shard->pai, edata, &deferred_work_generated);
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	expect_false(defer_vectorized_purge_called, "No vec purge");
+	expect_zu_eq(1, ndefer_purge_calls, "Expect purge");
+
+	destroy_test_data(shard);
+}
+TEST_END
+
 int
 main(void) {
 	/*
@@ -880,5 +935,6 @@ main(void) {
 	    test_purge,
 	    test_experimental_max_purge_nhp,
 	    test_demand_purge_slack,
-	    test_demand_purge_tight);
+	    test_demand_purge_tight,
+	    test_vectorized_opt_eq_zero);
 }
diff --git a/test/unit/hpa_vectorized_madvise.c b/test/unit/hpa_vectorized_madvise.c
new file mode 100644
index 00000000..130dc699
--- /dev/null
+++ b/test/unit/hpa_vectorized_madvise.c
@@ -0,0 +1,258 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/hpa.h"
+#include "jemalloc/internal/nstime.h"
+
+#define SHARD_IND 111
+
+#define ALLOC_MAX (HUGEPAGE)
+
+typedef struct test_data_s test_data_t;
+struct test_data_s {
+	/*
+	 * Must be the first member -- we convert back and forth between the
+	 * test_data_t and the hpa_shard_t;
+	 */
+	hpa_shard_t shard;
+	hpa_central_t central;
+	base_t *base;
+	edata_cache_t shard_edata_cache;
+
+	emap_t emap;
+};
+
+static hpa_shard_opts_t test_hpa_shard_opts_default = {
+	/* slab_max_alloc */
+	ALLOC_MAX,
+	/* hugification_threshold */
+	HUGEPAGE,
+	/* dirty_mult */
+	FXP_INIT_PERCENT(25),
+	/* deferral_allowed */
+	false,
+	/* hugify_delay_ms */
+	10 * 1000,
+	/* hugify_sync */
+	false,
+	/* min_purge_interval_ms */
+	5 * 1000,
+	/* experimental_max_purge_nhp */
+	-1,
+	/* peak_demand_window_ms */
+	0
+};
+
+static hpa_shard_t *
+create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
+	bool err;
+	base_t *base = base_new(TSDN_NULL, /* ind */ SHARD_IND,
+	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
+	assert_ptr_not_null(base, "");
+
+	test_data_t *test_data = malloc(sizeof(test_data_t));
+	assert_ptr_not_null(test_data, "");
+
+	test_data->base = base;
+
+	err = edata_cache_init(&test_data->shard_edata_cache, base);
+	assert_false(err, "");
+
+	err = emap_init(&test_data->emap, test_data->base, /* zeroed */ false);
+	assert_false(err, "");
+
+	err = hpa_central_init(&test_data->central, test_data->base, hooks);
+	assert_false(err, "");
+
+	err = hpa_shard_init(&test_data->shard, &test_data->central,
+	    &test_data->emap, test_data->base, &test_data->shard_edata_cache,
+	    SHARD_IND, opts);
+	assert_false(err, "");
+
+	return (hpa_shard_t *)test_data;
+}
+
+static void
+destroy_test_data(hpa_shard_t *shard) {
+	test_data_t *test_data = (test_data_t *)shard;
+	base_delete(TSDN_NULL, test_data->base);
+	free(test_data);
+}
+
+static uintptr_t defer_bump_ptr = HUGEPAGE * 123;
+static void *
+defer_test_map(size_t size) {
+	void *result = (void *)defer_bump_ptr;
+	defer_bump_ptr += size;
+	return result;
+}
+
+static void
+defer_test_unmap(void *ptr, size_t size) {
+	(void)ptr;
+	(void)size;
+}
+
+static size_t ndefer_purge_calls = 0;
+static void
+defer_test_purge(void *ptr, size_t size) {
+	(void)ptr;
+	(void)size;
+	++ndefer_purge_calls;
+}
+
+static size_t ndefer_vec_purge_calls = 0;
+static bool
+defer_vectorized_purge(void *vec, size_t vlen, size_t nbytes) {
+	(void)vec;
+	(void)nbytes;
+	++ndefer_vec_purge_calls;
+	return false;
+}
+
+static bool defer_vec_purge_didfail = false;
+static bool defer_vectorized_purge_fail(void *vec, size_t vlen, size_t nbytes) {
+	(void)vec;
+	(void)vlen;
+	(void)nbytes;
+	defer_vec_purge_didfail = true;
+	return true;
+}
+
+static size_t ndefer_hugify_calls = 0;
+static bool
+defer_test_hugify(void *ptr, size_t size, bool sync) {
+	++ndefer_hugify_calls;
+	return false;
+}
+
+static size_t ndefer_dehugify_calls = 0;
+static void
+defer_test_dehugify(void *ptr, size_t size) {
+	++ndefer_dehugify_calls;
+}
+
+static nstime_t defer_curtime;
+static void
+defer_test_curtime(nstime_t *r_time, bool first_reading) {
+	*r_time = defer_curtime;
+}
+
+static uint64_t
+defer_test_ms_since(nstime_t *past_time) {
+	return (nstime_ns(&defer_curtime) - nstime_ns(past_time)) / 1000 / 1000;
+}
+
+TEST_BEGIN(test_vectorized_failure_fallback) {
+	test_skip_if(!hpa_supported() ||
+		(opt_process_madvise_max_batch == 0));
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge_fail;
+	defer_vec_purge_didfail = false;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+	opts.min_purge_interval_ms = 0;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+
+	bool deferred_work_generated = false;
+
+	nstime_init(&defer_curtime, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+
+	edata_t *edata = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+	false, false, &deferred_work_generated);
+	expect_ptr_not_null(edata, "Unexpected null edata");
+	pai_dalloc(tsdn, &shard->pai, edata, &deferred_work_generated);
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	expect_true(defer_vec_purge_didfail, "Expect vec purge fail");
+	expect_zu_eq(1, ndefer_purge_calls, "Expect non-vec purge");
+	ndefer_purge_calls = 0;
+
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_more_regions_purged_from_one_page) {
+	test_skip_if(!hpa_supported() ||
+		(opt_process_madvise_max_batch == 0) ||
+		HUGEPAGE_PAGES <= 4);
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+	opts.min_purge_interval_ms = 0;
+	ndefer_vec_purge_calls = 0;
+	ndefer_purge_calls = 0;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+
+	bool deferred_work_generated = false;
+
+	nstime_init(&defer_curtime, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+
+	enum {NALLOCS = 8 * HUGEPAGE_PAGES};
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	/* Deallocate almost 3 pages out of 8, and to force batching
+	 * leave the 2nd and 4th PAGE in the first 3 hugepages.
+	 */
+	for (int i = 0; i < 3 * (int)HUGEPAGE_PAGES; i++) {
+		int j = i % HUGEPAGE_PAGES;
+		if (j != 1 && j != 3) {
+			pai_dalloc(tsdn, &shard->pai, edatas[i],
+			    &deferred_work_generated);
+		}
+	}
+
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	/*
+	 * Strict minimum purge interval is not set, we should purge as long as
+	 * we have dirty pages.
+	 */
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+
+	/* We purge from 2 huge pages, each one 3 segments. That's 6 non
+	 * vectorized calls, or 2 <= vc <=6 vectorized calls
+	 * (depending on batch size).
+	 */
+	size_t nexpected = 2 * (1 + (3 - 1) / opt_process_madvise_max_batch);
+	expect_zu_eq(nexpected, ndefer_vec_purge_calls, "Expect purge");
+	expect_zu_eq(0, ndefer_purge_calls, "Expect no non-vec purge");
+	ndefer_vec_purge_calls = 0;
+
+	destroy_test_data(shard);
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_vectorized_failure_fallback,
+	    test_more_regions_purged_from_one_page);
+}
diff --git a/test/unit/hpa_vectorized_madvise.sh b/test/unit/hpa_vectorized_madvise.sh
new file mode 100644
index 00000000..c5d66afa
--- /dev/null
+++ b/test/unit/hpa_vectorized_madvise.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="process_madvise_max_batch:2"

From f81fb92a8984b767dae10dc54ef48d1d50e6e1de Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 14 Apr 2025 14:15:14 -0700
Subject: [PATCH 2488/2608] Remove Travis CI macOS configs (not supported
 anymore).

---
 .travis.yml           | 27 ---------------------------
 scripts/gen_travis.py |  3 ++-
 2 files changed, 2 insertions(+), 28 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 387b36cf..5a83d757 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -352,33 +352,6 @@ jobs:
     - os: linux
       arch: arm64
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
-    - os: osx
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
     # Development build
     - os: linux
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index 6f8ee505..fa98f2a2 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -347,7 +347,8 @@ def main():
         # generate_linux(PPC64LE),
         generate_linux(ARM64),
 
-        generate_macos(AMD64),
+        # Starting April 1st, 2025, Travis no longer supports OSx/macOS builds
+        # generate_macos(AMD64),
 
         get_manual_jobs(),
     ))

From c20a63a765dcd22f6b91676ab03507dd9d7b3e2d Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Thu, 10 Apr 2025 15:07:20 -0700
Subject: [PATCH 2489/2608] Silence the uninitialized warning from clang.

---
 include/jemalloc/internal/jemalloc_internal_inlines_c.h | 2 +-
 include/jemalloc/internal/util.h                        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index c7ef9161..39c196a5 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -496,7 +496,7 @@ bool free_fastpath(void *ptr, size_t size, bool size_hint) {
         assert(tsd_fast(tsd) ||
             *tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd) == 0);
 
-        emap_alloc_ctx_t alloc_ctx;
+        emap_alloc_ctx_t alloc_ctx JEMALLOC_CC_SILENCE_INIT({0, 0, false});
 	size_t usize;
         if (!size_hint) {
                 bool err = emap_alloc_ctx_try_lookup_fast(tsd,
diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index 6646386e..35aa26e6 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -29,7 +29,7 @@
  * wherever the compiler fails to recognize that the variable is never used
  * uninitialized.
  */
-#define JEMALLOC_CC_SILENCE_INIT(v) = v
+#define JEMALLOC_CC_SILENCE_INIT(...) = __VA_ARGS__
 
 #ifdef __GNUC__
 #  define likely(x)   __builtin_expect(!!(x), 1)

From c23a6bfdf6eed78dbe9c2b39a3798d091843a957 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Thu, 10 Apr 2025 15:12:52 -0700
Subject: [PATCH 2490/2608] Add opt.limit_usize_gap to stats

---
 src/stats.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/stats.c b/src/stats.c
index 6e77977f..db9b9f43 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1730,6 +1730,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_CHAR_P("stats_interval_opts")
 	OPT_WRITE_CHAR_P("zero_realloc")
 	OPT_WRITE_SIZE_T("process_madvise_max_batch")
+	OPT_WRITE_BOOL("limit_usize_gap")
 
 	emitter_dict_end(emitter); /* Close "opt". */
 

From a3910b9802d066a72707d9d77bc981d05b74d761 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Thu, 24 Apr 2025 20:21:53 -0700
Subject: [PATCH 2491/2608] Avoid forced purging during thread-arena migration
 when bg thd is on.

---
 src/jemalloc.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index d7b46d6c..9451df77 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -494,8 +494,12 @@ arena_migrate(tsd_t *tsd, arena_t *oldarena, arena_t *newarena) {
 	arena_nthreads_inc(newarena, false);
 	tsd_arena_set(tsd, newarena);
 
-	if (arena_nthreads_get(oldarena, false) == 0) {
-		/* Purge if the old arena has no associated threads anymore. */
+	if (arena_nthreads_get(oldarena, false) == 0 &&
+	    !background_thread_enabled()) {
+		/*
+		 * Purge if the old arena has no associated threads anymore and
+		 * no background threads.
+		 */
 		arena_decay(tsd_tsdn(tsd), oldarena,
 		    /* is_background_thread */ false, /* all */ true);
 	}

From cfa90dfd80c4b3ca2b2678fb55cfc718bd9f42c6 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Tue, 8 Apr 2025 09:51:53 -0700
Subject: [PATCH 2492/2608] Refactor hpa purging to prepare for vectorized call
 across multiple pages

---
 include/jemalloc/internal/hpa_utils.h | 82 +++++++++++++++++++++++++++
 src/hpa.c                             | 63 +++++---------------
 2 files changed, 97 insertions(+), 48 deletions(-)
 create mode 100644 include/jemalloc/internal/hpa_utils.h

diff --git a/include/jemalloc/internal/hpa_utils.h b/include/jemalloc/internal/hpa_utils.h
new file mode 100644
index 00000000..035d3b21
--- /dev/null
+++ b/include/jemalloc/internal/hpa_utils.h
@@ -0,0 +1,82 @@
+#ifndef JEMALLOC_INTERNAL_HPA_UTILS_H
+#define JEMALLOC_INTERNAL_HPA_UTILS_H
+
+#include "jemalloc/internal/hpa.h"
+
+#define HPA_MIN_VAR_VEC_SIZE 8
+#ifdef JEMALLOC_HAVE_PROCESS_MADVISE
+typedef struct iovec hpa_io_vector_t;
+#else
+typedef struct {
+    void *iov_base;
+    size_t iov_len;
+} hpa_io_vector_t;
+#endif
+
+/* Actually invoke hooks. If we fail vectorized, use single purges */
+static void
+hpa_try_vectorized_purge(
+  hpa_shard_t *shard, hpa_io_vector_t *vec, size_t vlen, size_t nbytes) {
+    bool success = opt_process_madvise_max_batch > 0
+      && !shard->central->hooks.vectorized_purge(vec, vlen, nbytes);
+    if (!success) {
+        /* On failure, it is safe to purge again (potential perf
+         * penalty) If kernel can tell exactly which regions
+         * failed, we could avoid that penalty.
+         */
+        for (size_t i = 0; i < vlen; ++i) {
+            shard->central->hooks.purge(vec[i].iov_base, vec[i].iov_len);
+        }
+    }
+}
+
+/*
+ * This struct accumulates the regions for process_madvise.
+ * It invokes the hook when batch limit is reached
+ */
+typedef struct {
+    hpa_io_vector_t *vp;
+    size_t cur;
+    size_t total_bytes;
+    size_t capacity;
+} hpa_range_accum_t;
+
+static inline void
+hpa_range_accum_init(hpa_range_accum_t *ra, hpa_io_vector_t *v, size_t sz) {
+    ra->vp = v;
+    ra->capacity = sz;
+    ra->total_bytes = 0;
+    ra->cur = 0;
+}
+
+static inline void
+hpa_range_accum_flush(hpa_range_accum_t *ra, hpa_shard_t *shard) {
+    assert(ra->total_bytes > 0 && ra->cur > 0);
+    hpa_try_vectorized_purge(shard, ra->vp, ra->cur, ra->total_bytes);
+    ra->cur = 0;
+    ra->total_bytes = 0;
+}
+
+static inline void
+hpa_range_accum_add(
+  hpa_range_accum_t *ra, void *addr, size_t sz, hpa_shard_t *shard) {
+    assert(ra->cur < ra->capacity);
+
+    ra->vp[ra->cur].iov_base = addr;
+    ra->vp[ra->cur].iov_len = sz;
+    ra->total_bytes += sz;
+    ra->cur++;
+
+    if (ra->cur == ra->capacity) {
+        hpa_range_accum_flush(ra, shard);
+    }
+}
+
+static inline void
+hpa_range_accum_finish(hpa_range_accum_t *ra, hpa_shard_t *shard) {
+    if (ra->cur > 0) {
+        hpa_range_accum_flush(ra, shard);
+    }
+}
+
+#endif /* JEMALLOC_INTERNAL_HPA_UTILS_H */
diff --git a/src/hpa.c b/src/hpa.c
index adb106cc..c6771352 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -2,22 +2,13 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/hpa.h"
+#include "jemalloc/internal/hpa_utils.h"
 
 #include "jemalloc/internal/fb.h"
 #include "jemalloc/internal/witness.h"
 
 #define HPA_EDEN_SIZE (128 * HUGEPAGE)
 
-#define HPA_MIN_VAR_VEC_SIZE 8
-#ifdef JEMALLOC_HAVE_PROCESS_MADVISE
-typedef struct iovec hpa_io_vector_t;
-#else
-typedef struct {
-	void *iov_base;
-	size_t iov_len;
-} hpa_io_vector_t;
-#endif
-
 static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t alignment, bool zero, bool guarded, bool frequent_reuse,
     bool *deferred_work_generated);
@@ -432,22 +423,12 @@ hpa_shard_has_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
 	return to_hugify != NULL || hpa_should_purge(tsdn, shard);
 }
 
-/* If we fail vectorized purge, we will do single */
-static void
-hpa_try_vectorized_purge(hpa_shard_t *shard, hpa_io_vector_t *vec,
-	size_t vlen, size_t nbytes) {
-	bool success = opt_process_madvise_max_batch > 0
-		&& !shard->central->hooks.vectorized_purge(vec, vlen, nbytes);
-	if (!success) {
-		/* On failure, it is safe to purge again (potential perf
-		 * penalty) If kernel can tell exactly which regions
-		 * failed, we could avoid that penalty.
-		 */
-		for (size_t i = 0; i < vlen; ++i) {
-			shard->central->hooks.purge(vec[i].iov_base,
-				vec[i].iov_len);
-		}
-	}
+static inline size_t
+hpa_process_madvise_max_iovec_len(void) {
+	assert(opt_process_madvise_max_batch <=
+		PROCESS_MADVISE_MAX_BATCH_LIMIT);
+	return opt_process_madvise_max_batch == 0 ?
+		HPA_MIN_VAR_VEC_SIZE : opt_process_madvise_max_batch;
 }
 
 /* Returns whether or not we purged anything. */
@@ -498,38 +479,24 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	}
 	size_t total_purged = 0;
 	uint64_t purges_this_pass = 0;
-
-	assert(opt_process_madvise_max_batch <=
-		PROCESS_MADVISE_MAX_BATCH_LIMIT);
-	size_t len = opt_process_madvise_max_batch == 0 ?
-		HPA_MIN_VAR_VEC_SIZE : opt_process_madvise_max_batch;
+	
+	size_t len = hpa_process_madvise_max_iovec_len();
 	VARIABLE_ARRAY(hpa_io_vector_t, vec, len);
 
+	hpa_range_accum_t accum;
+	hpa_range_accum_init(&accum, vec, len);
+
 	void *purge_addr;
 	size_t purge_size;
-	size_t cur = 0;
-	size_t total_batch_bytes = 0;
 	while (hpdata_purge_next(to_purge, &purge_state, &purge_addr,
 	    &purge_size)) {
-		vec[cur].iov_base = purge_addr;
-		vec[cur].iov_len = purge_size;
 		total_purged += purge_size;
 		assert(total_purged <= HUGEPAGE);
+		hpa_range_accum_add(&accum, purge_addr, purge_size, shard);
 		purges_this_pass++;
-		total_batch_bytes += purge_size;
-		cur++;
-		if (cur == len) {
-			hpa_try_vectorized_purge(shard, vec, len, total_batch_bytes);
-			assert(total_batch_bytes > 0);
-			cur = 0;
-			total_batch_bytes = 0;
-		}
-	}
-
-	/* Batch was not full */
-	if (cur > 0) {
-		hpa_try_vectorized_purge(shard, vec, cur, total_batch_bytes);
 	}
+	/* If batch was not full, finish */
+	hpa_range_accum_finish(&accum, shard);
 
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	/* The shard updates */

From 0dfb4a5a1a83f0968f8499c101dc98586a582546 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Tue, 8 Apr 2025 10:49:05 -0700
Subject: [PATCH 2493/2608] Add output argument to hpa_purge_begin to count
 dirty ranges

---
 include/jemalloc/internal/hpdata.h |  6 ++++--
 src/hpa.c                          |  4 +++-
 src/hpdata.c                       |  7 ++++++-
 test/unit/hpdata.c                 | 15 +++++++++++----
 test/unit/psset.c                  |  4 +++-
 5 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index a8a845ec..a8a4a552 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -395,9 +395,11 @@ struct hpdata_purge_state_s {
  * until you're done, and then end.  Allocating out of an hpdata undergoing
  * purging is not allowed.
  *
- * Returns the number of dirty pages that will be purged.
+ * Returns the number of dirty pages that will be purged and sets nranges
+ * to number of ranges with dirty pages that will be purged.
  */
-size_t hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state);
+size_t hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
+    size_t *nranges);
 
 /*
  * If there are more extents to purge, sets *r_purge_addr and *r_purge_size to
diff --git a/src/hpa.c b/src/hpa.c
index c6771352..afcfbe7f 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -465,8 +465,10 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 
 	/* Gather all the metadata we'll need during the purge. */
 	bool dehugify = hpdata_huge_get(to_purge);
+	size_t nranges;
 	hpdata_purge_state_t purge_state;
-	size_t num_to_purge = hpdata_purge_begin(to_purge, &purge_state);
+	size_t num_to_purge = hpdata_purge_begin(to_purge, &purge_state, &nranges);
+	(void) nranges; /*not used yet */
 
 	shard->npending_purge += num_to_purge;
 
diff --git a/src/hpdata.c b/src/hpdata.c
index 3058eafe..f3e347c4 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -164,7 +164,8 @@ hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
 }
 
 size_t
-hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
+hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
+	size_t *nranges) {
 	hpdata_assert_consistent(hpdata);
 	/*
 	 * See the comment below; we might purge any inactive extent, so it's
@@ -216,6 +217,7 @@ hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 
 	fb_init(purge_state->to_purge, HUGEPAGE_PAGES);
 	size_t next_bit = 0;
+	*nranges = 0;
 	while (next_bit < HUGEPAGE_PAGES) {
 		size_t next_dirty = fb_ffs(dirty_pages, HUGEPAGE_PAGES,
 		    next_bit);
@@ -239,6 +241,7 @@ hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 
 		fb_set_range(purge_state->to_purge, HUGEPAGE_PAGES, next_dirty,
 		    last_dirty - next_dirty + 1);
+		(*nranges)++;
 		next_bit = next_active + 1;
 	}
 
@@ -249,6 +252,8 @@ hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 	    purge_state->to_purge, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
 	assert(ndirty == fb_scount(dirty_pages, HUGEPAGE_PAGES, 0,
 	    HUGEPAGE_PAGES));
+	assert(*nranges <= ndirty);
+	assert(ndirty == 0 || *nranges > 0);
 
 	hpdata_assert_consistent(hpdata);
 
diff --git a/test/unit/hpdata.c b/test/unit/hpdata.c
index 288e71d4..995ab77b 100644
--- a/test/unit/hpdata.c
+++ b/test/unit/hpdata.c
@@ -69,8 +69,10 @@ TEST_BEGIN(test_purge_simple) {
 
 	hpdata_alloc_allowed_set(&hpdata, false);
 	hpdata_purge_state_t purge_state;
-	size_t to_purge = hpdata_purge_begin(&hpdata, &purge_state);
+	size_t nranges;
+	size_t to_purge = hpdata_purge_begin(&hpdata, &purge_state, &nranges);
 	expect_zu_eq(HUGEPAGE_PAGES / 4, to_purge, "");
+	expect_zu_eq(1, nranges, "All dirty pages in a single range");
 
 	void *purge_addr;
 	size_t purge_size;
@@ -113,8 +115,10 @@ TEST_BEGIN(test_purge_intervening_dalloc) {
 
 	hpdata_alloc_allowed_set(&hpdata, false);
 	hpdata_purge_state_t purge_state;
-	size_t to_purge = hpdata_purge_begin(&hpdata, &purge_state);
+	size_t nranges;
+	size_t to_purge = hpdata_purge_begin(&hpdata, &purge_state, &nranges);
 	expect_zu_eq(HUGEPAGE_PAGES / 2, to_purge, "");
+	expect_zu_eq(2, nranges, "First quarter and last half");
 
 	void *purge_addr;
 	size_t purge_size;
@@ -171,8 +175,10 @@ TEST_BEGIN(test_purge_over_retained) {
 	/* Purge the second quarter. */
 	hpdata_alloc_allowed_set(&hpdata, false);
 	hpdata_purge_state_t purge_state;
-	size_t to_purge_dirty = hpdata_purge_begin(&hpdata, &purge_state);
+	size_t nranges;
+	size_t to_purge_dirty = hpdata_purge_begin(&hpdata, &purge_state, &nranges);
 	expect_zu_eq(HUGEPAGE_PAGES / 4, to_purge_dirty, "");
+	expect_zu_eq(1, nranges, "Second quarter only");
 
 	bool got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
 	    &purge_size);
@@ -199,8 +205,9 @@ TEST_BEGIN(test_purge_over_retained) {
 	 * re-purge it.  We expect a single purge of 3/4 of the hugepage,
 	 * purging half its pages.
 	 */
-	to_purge_dirty = hpdata_purge_begin(&hpdata, &purge_state);
+	to_purge_dirty = hpdata_purge_begin(&hpdata, &purge_state, &nranges);
 	expect_zu_eq(HUGEPAGE_PAGES / 2, to_purge_dirty, "");
+	expect_zu_eq(1, nranges, "Single range expected");
 
 	got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
 	    &purge_size);
diff --git a/test/unit/psset.c b/test/unit/psset.c
index c400f3b9..b15d9af3 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -19,7 +19,9 @@ static void
 test_psset_fake_purge(hpdata_t *ps) {
 	hpdata_purge_state_t purge_state;
 	hpdata_alloc_allowed_set(ps, false);
-	hpdata_purge_begin(ps, &purge_state);
+	size_t nranges;
+	hpdata_purge_begin(ps, &purge_state, &nranges);
+	(void) nranges;
 	void *addr;
 	size_t size;
 	while (hpdata_purge_next(ps, &purge_state, &addr, &size)) {

From 1956a54a434ec365fad22d7497d86495b0c31883 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Sat, 5 Apr 2025 12:14:14 -0700
Subject: [PATCH 2494/2608] [process_madvise] Use process_madvise across
 multiple huge_pages

---
 Makefile.in                                   |   1 +
 include/jemalloc/internal/hpa_utils.h         |  34 +++
 src/extent.c                                  |   1 +
 src/hpa.c                                     | 240 ++++++++++++------
 test/unit/hpa_vectorized_madvise.c            |  82 +++++-
 .../unit/hpa_vectorized_madvise_large_batch.c | 199 +++++++++++++++
 .../hpa_vectorized_madvise_large_batch.sh     |   3 +
 7 files changed, 482 insertions(+), 78 deletions(-)
 create mode 100644 test/unit/hpa_vectorized_madvise_large_batch.c
 create mode 100644 test/unit/hpa_vectorized_madvise_large_batch.sh

diff --git a/Makefile.in b/Makefile.in
index ee3399ec..ac8c51ff 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -232,6 +232,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/hook.c \
 	$(srcroot)test/unit/hpa.c \
 	$(srcroot)test/unit/hpa_vectorized_madvise.c \
+	$(srcroot)test/unit/hpa_vectorized_madvise_large_batch.c \
 	$(srcroot)test/unit/hpa_background_thread.c \
 	$(srcroot)test/unit/hpdata.c \
 	$(srcroot)test/unit/huge.c \
diff --git a/include/jemalloc/internal/hpa_utils.h b/include/jemalloc/internal/hpa_utils.h
index 035d3b21..283510b9 100644
--- a/include/jemalloc/internal/hpa_utils.h
+++ b/include/jemalloc/internal/hpa_utils.h
@@ -79,4 +79,38 @@ hpa_range_accum_finish(hpa_range_accum_t *ra, hpa_shard_t *shard) {
     }
 }
 
+/*
+ * For purging more than one page we use batch of these items
+ */
+typedef struct {
+	hpdata_purge_state_t state;
+	hpdata_t *hp;
+	bool dehugify;
+} hpa_purge_item_t;
+
+typedef struct hpa_purge_batch_s hpa_purge_batch_t;
+struct hpa_purge_batch_s {
+	hpa_purge_item_t *items;
+	size_t items_capacity;
+	/* Number of huge pages to purge in current batch */
+	size_t item_cnt;
+	/* Number of ranges to purge in current batch */
+	size_t nranges;
+	/* Total number of dirty pages in current batch*/
+	size_t ndirty_in_batch;
+
+	/* Max number of huge pages to purge */
+	size_t max_hp;
+	/*
+	 * Once we are above this watermark we should not add more pages
+	 * to the same batch. This is because while we want to minimize
+	 * number of madvise calls we also do not want to be preventing
+	 * allocations from too many huge pages (which we have to do
+	 * while they are being purged)
+	 */
+	size_t range_watermark;
+
+	size_t npurged_hp_total;
+};
+
 #endif /* JEMALLOC_INTERNAL_HPA_UTILS_H */
diff --git a/src/extent.c b/src/extent.c
index 86b30f82..3425e1ce 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -12,6 +12,7 @@
 /* Data. */
 
 size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT;
+/* This option is intended for kernel tuning, not app tuning. */
 size_t opt_process_madvise_max_batch =
 #ifdef JEMALLOC_HAVE_PROCESS_MADVISE
     PROCESS_MADVISE_MAX_BATCH_DEFAULT;
diff --git a/src/hpa.c b/src/hpa.c
index afcfbe7f..50614e42 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -423,6 +423,31 @@ hpa_shard_has_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
 	return to_hugify != NULL || hpa_should_purge(tsdn, shard);
 }
 
+/*
+ * This is used for jemalloc internal tuning and may change in the
+ * future based on production traffic.
+ *
+ * This value protects two things:
+ *    1. Stack size
+ *    2. Number of huge pages that are being purged in a batch as
+ *       we do not allow allocations while making *madvise
+ *       syscall.
+ */
+#define HPA_PURGE_BATCH_MAX_DEFAULT 16
+
+#ifndef JEMALLOC_JET
+#define HPA_PURGE_BATCH_MAX HPA_PURGE_BATCH_MAX_DEFAULT
+#else
+size_t hpa_purge_max_batch_size_for_test = HPA_PURGE_BATCH_MAX_DEFAULT;
+size_t
+hpa_purge_max_batch_size_for_test_set(size_t new_size) {
+	size_t old_size = hpa_purge_max_batch_size_for_test;
+	hpa_purge_max_batch_size_for_test = new_size;
+	return old_size;
+}
+#define HPA_PURGE_BATCH_MAX hpa_purge_max_batch_size_for_test
+#endif
+
 static inline size_t
 hpa_process_madvise_max_iovec_len(void) {
 	assert(opt_process_madvise_max_batch <=
@@ -431,14 +456,48 @@ hpa_process_madvise_max_iovec_len(void) {
 		HPA_MIN_VAR_VEC_SIZE : opt_process_madvise_max_batch;
 }
 
-/* Returns whether or not we purged anything. */
-static bool
-hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
-	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+static inline void
+hpa_purge_actual_unlocked(hpa_shard_t *shard, hpa_purge_item_t *batch,
+	size_t batch_sz) {
+	assert(batch_sz > 0);
 
-	hpdata_t *to_purge = psset_pick_purge(&shard->psset);
+	size_t len = hpa_process_madvise_max_iovec_len();
+	VARIABLE_ARRAY(hpa_io_vector_t, vec, len);
+
+	hpa_range_accum_t accum;
+	hpa_range_accum_init(&accum, vec, len);
+
+	for (size_t i = 0; i < batch_sz; ++i) {
+		hpdata_t *to_purge = batch[i].hp;
+
+		/* Actually do the purging, now that the lock is dropped. */
+		if (batch[i].dehugify) {
+			shard->central->hooks.dehugify(hpdata_addr_get(to_purge),
+		    	HUGEPAGE);
+		}
+		void *purge_addr;
+		size_t purge_size;
+		size_t total_purged_on_one_hp = 0;
+		while (hpdata_purge_next(
+				to_purge, &batch[i].state, &purge_addr, &purge_size)) {
+			total_purged_on_one_hp += purge_size;
+			assert(total_purged_on_one_hp <= HUGEPAGE);
+			hpa_range_accum_add(&accum, purge_addr, purge_size, shard);
+		}
+	}
+	hpa_range_accum_finish(&accum, shard);
+}
+
+/* Prepare purge of one page. Return num of dirty regular pages on it
+ * Return 0 if no purgable huge page is found
+ *
+ * If there was a page to purge its purge state is initialized
+ */
+static inline size_t
+hpa_purge_start_hp(hpa_purge_batch_t *b, psset_t *psset) {
+	hpdata_t *to_purge = psset_pick_purge(psset);
 	if (to_purge == NULL) {
-		return false;
+		return 0;
 	}
 	assert(hpdata_purge_allowed_get(to_purge));
 	assert(!hpdata_changing_state_get(to_purge));
@@ -448,7 +507,7 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	 * we're purging it (allocations and deallocations are
 	 * OK).
 	 */
-	psset_update_begin(&shard->psset, to_purge);
+	psset_update_begin(psset, to_purge);
 	assert(hpdata_alloc_allowed_get(to_purge));
 	hpdata_mid_purge_set(to_purge, true);
 	hpdata_purge_allowed_set(to_purge, false);
@@ -461,70 +520,115 @@ hpa_try_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	 * (clearing out user data).
 	 */
 	hpdata_alloc_allowed_set(to_purge, false);
-	psset_update_end(&shard->psset, to_purge);
+	psset_update_end(psset, to_purge);
 
+	assert(b->item_cnt < b->items_capacity);
+	hpa_purge_item_t *hp_item = &b->items[b->item_cnt];
+	b->item_cnt++;
+	hp_item->hp = to_purge;
 	/* Gather all the metadata we'll need during the purge. */
-	bool dehugify = hpdata_huge_get(to_purge);
+	hp_item->dehugify = hpdata_huge_get(hp_item->hp);
 	size_t nranges;
-	hpdata_purge_state_t purge_state;
-	size_t num_to_purge = hpdata_purge_begin(to_purge, &purge_state, &nranges);
-	(void) nranges; /*not used yet */
+	size_t ndirty =
+		hpdata_purge_begin(hp_item->hp, &hp_item->state, &nranges);
+	/* We picked hp to purge, so it should have some dirty ranges */
+	assert(ndirty > 0 && nranges >0);
+	b->ndirty_in_batch += ndirty;
+	b->nranges += nranges;
+	return ndirty;
+}
 
-	shard->npending_purge += num_to_purge;
-
-	malloc_mutex_unlock(tsdn, &shard->mtx);
-
-	/* Actually do the purging, now that the lock is dropped. */
-	if (dehugify) {
-		shard->central->hooks.dehugify(hpdata_addr_get(to_purge),
-		    HUGEPAGE);
-	}
-	size_t total_purged = 0;
-	uint64_t purges_this_pass = 0;
-	
-	size_t len = hpa_process_madvise_max_iovec_len();
-	VARIABLE_ARRAY(hpa_io_vector_t, vec, len);
-
-	hpa_range_accum_t accum;
-	hpa_range_accum_init(&accum, vec, len);
-
-	void *purge_addr;
-	size_t purge_size;
-	while (hpdata_purge_next(to_purge, &purge_state, &purge_addr,
-	    &purge_size)) {
-		total_purged += purge_size;
-		assert(total_purged <= HUGEPAGE);
-		hpa_range_accum_add(&accum, purge_addr, purge_size, shard);
-		purges_this_pass++;
-	}
-	/* If batch was not full, finish */
-	hpa_range_accum_finish(&accum, shard);
-
-	malloc_mutex_lock(tsdn, &shard->mtx);
-	/* The shard updates */
-	shard->npending_purge -= num_to_purge;
-	shard->stats.npurge_passes++;
-	shard->stats.npurges += purges_this_pass;
-	shard->central->hooks.curtime(&shard->last_purge,
-	    /* first_reading */ false);
-	if (dehugify) {
+/* Finish purge of one huge page. */
+static inline void
+hpa_purge_finish_hp(tsdn_t *tsdn, hpa_shard_t *shard,
+	hpa_purge_item_t *hp_item) {
+	if (hp_item->dehugify) {
 		shard->stats.ndehugifies++;
 	}
-
 	/* The hpdata updates. */
-	psset_update_begin(&shard->psset, to_purge);
-	if (dehugify) {
-		hpdata_dehugify(to_purge);
+	psset_update_begin(&shard->psset, hp_item->hp);
+	if (hp_item->dehugify) {
+		hpdata_dehugify(hp_item->hp);
 	}
-	hpdata_purge_end(to_purge, &purge_state);
-	hpdata_mid_purge_set(to_purge, false);
+	hpdata_purge_end(hp_item->hp, &hp_item->state);
+	hpdata_mid_purge_set(hp_item->hp, false);
 
-	hpdata_alloc_allowed_set(to_purge, true);
-	hpa_update_purge_hugify_eligibility(tsdn, shard, to_purge);
+	hpdata_alloc_allowed_set(hp_item->hp, true);
+	hpa_update_purge_hugify_eligibility(tsdn, shard, hp_item->hp);
 
-	psset_update_end(&shard->psset, to_purge);
+	psset_update_end(&shard->psset, hp_item->hp);
+}
 
-	return true;
+static inline bool
+hpa_batch_full(hpa_purge_batch_t *b) {
+	/* It's okay for ranges to go above */
+	return b->npurged_hp_total == b->max_hp ||
+		b->item_cnt == b->items_capacity ||
+		b->nranges >= b->range_watermark;
+}
+
+static inline void
+hpa_batch_pass_start(hpa_purge_batch_t *b) {
+	b->item_cnt = 0;
+	b->nranges = 0;
+	b->ndirty_in_batch = 0;
+}
+
+static inline bool
+hpa_batch_empty(hpa_purge_batch_t *b) {
+	return b->item_cnt == 0;
+}
+
+/* Returns number of huge pages purged. */
+static inline size_t
+hpa_purge(tsdn_t *tsdn, hpa_shard_t *shard, size_t max_hp) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	assert(max_hp > 0);
+
+	assert(HPA_PURGE_BATCH_MAX > 0);
+	assert(HPA_PURGE_BATCH_MAX <
+		(VARIABLE_ARRAY_SIZE_MAX / sizeof(hpa_purge_item_t)));
+	VARIABLE_ARRAY(hpa_purge_item_t, items, HPA_PURGE_BATCH_MAX);
+	hpa_purge_batch_t batch = {
+		.max_hp = max_hp,
+		.npurged_hp_total = 0,
+		.items = &items[0],
+		.items_capacity = HPA_PURGE_BATCH_MAX,
+		.range_watermark = hpa_process_madvise_max_iovec_len(),
+	};
+	assert(batch.range_watermark > 0);
+
+	while (1) {
+		hpa_batch_pass_start(&batch);
+		assert(hpa_batch_empty(&batch));
+		while(!hpa_batch_full(&batch) && hpa_should_purge(tsdn, shard)) {
+			size_t ndirty = hpa_purge_start_hp(&batch, &shard->psset);
+			if (ndirty == 0) {
+				break;
+			}
+			shard->npending_purge += ndirty;
+			batch.npurged_hp_total++;
+		}
+
+		if (hpa_batch_empty(&batch)) {
+			break;
+		}
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+		hpa_purge_actual_unlocked(shard, batch.items, batch.item_cnt);
+		malloc_mutex_lock(tsdn, &shard->mtx);
+
+		/* The shard updates */
+		shard->npending_purge -= batch.ndirty_in_batch;
+		shard->stats.npurges += batch.ndirty_in_batch;
+		shard->central->hooks.curtime(&shard->last_purge,
+			/* first_reading */ false);
+		for (size_t i=0; i<batch.item_cnt; ++i) {
+			hpa_purge_finish_hp(tsdn, shard, &batch.items[i]);
+		}
+	}
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	shard->stats.npurge_passes++;
+	return batch.npurged_hp_total;
 }
 
 /* Returns whether or not we hugified anything. */
@@ -654,19 +758,9 @@ hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard,
 			max_purges = max_purge_nhp;
 		}
 
-		while (hpa_should_purge(tsdn, shard) && nops < max_purges) {
-			if (!hpa_try_purge(tsdn, shard)) {
-				/*
-				 * It is fine if we couldn't purge as sometimes
-				 * we try to purge just to unblock
-				 * hugification, but there is maybe no dirty
-				 * pages at all at the moment.
-				 */
-				break;
-			}
-			malloc_mutex_assert_owner(tsdn, &shard->mtx);
-			nops++;
-		}
+		malloc_mutex_assert_owner(tsdn, &shard->mtx);
+		nops += hpa_purge(tsdn, shard, max_purges);
+		malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	}
 
 	/*
diff --git a/test/unit/hpa_vectorized_madvise.c b/test/unit/hpa_vectorized_madvise.c
index 130dc699..ae25fdde 100644
--- a/test/unit/hpa_vectorized_madvise.c
+++ b/test/unit/hpa_vectorized_madvise.c
@@ -237,15 +237,86 @@ TEST_BEGIN(test_more_regions_purged_from_one_page) {
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
 	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 
-	/* We purge from 2 huge pages, each one 3 segments. That's 6 non
-	 * vectorized calls, or 2 <= vc <=6 vectorized calls
-	 * (depending on batch size).
+	/* We purge from 2 huge pages, each one 3 dirty continous segments.
+	 * For opt_process_madvise_max_batch = 2, that is
+	 * 2 calls for first page, and 2 calls for second as we don't
+	 * want to hold the lock on the second page while vectorized batch
+	 * of size 2 is already filled with the first one.
 	 */
-	size_t nexpected = 2 * (1 + (3 - 1) / opt_process_madvise_max_batch);
+	expect_zu_eq(4, ndefer_vec_purge_calls, "Expect purge");
+	expect_zu_eq(0, ndefer_purge_calls, "Expect no non-vec purge");
+	ndefer_vec_purge_calls = 0;
+
+	destroy_test_data(shard);
+}
+TEST_END
+
+size_t
+hpa_purge_max_batch_size_for_test_set(size_t new_size);
+TEST_BEGIN(test_more_pages_than_batch_page_size) {
+	test_skip_if(!hpa_supported() ||
+		(opt_process_madvise_max_batch == 0) ||
+		HUGEPAGE_PAGES <= 4);
+
+	size_t old_page_batch = hpa_purge_max_batch_size_for_test_set(1);
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+	opts.min_purge_interval_ms = 0;
+	ndefer_vec_purge_calls = 0;
+	ndefer_purge_calls = 0;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+
+	bool deferred_work_generated = false;
+
+	nstime_init(&defer_curtime, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+
+	enum {NALLOCS = 8 * HUGEPAGE_PAGES};
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	for (int i = 0; i < 3 * (int)HUGEPAGE_PAGES; i++) {
+		pai_dalloc(tsdn, &shard->pai, edatas[i],
+			&deferred_work_generated);
+	}
+
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	/*
+	 * Strict minimum purge interval is not set, we should purge as long as
+	 * we have dirty pages.
+	 */
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+
+	/* We have page batch size = 1.
+	 * we have 5 * HP active pages, 3 * HP dirty pages
+	 * To achieve the balance of 25% max dirty we need to
+	 * purge 2 pages. Since batch is 1 that must be 2 calls
+	 * no matter what opt_process_madvise_max_batch is
+	 */
+	size_t nexpected = 2;
 	expect_zu_eq(nexpected, ndefer_vec_purge_calls, "Expect purge");
 	expect_zu_eq(0, ndefer_purge_calls, "Expect no non-vec purge");
 	ndefer_vec_purge_calls = 0;
 
+	hpa_purge_max_batch_size_for_test_set(old_page_batch);
+
 	destroy_test_data(shard);
 }
 TEST_END
@@ -254,5 +325,6 @@ int
 main(void) {
 	return test_no_reentrancy(
 	    test_vectorized_failure_fallback,
-	    test_more_regions_purged_from_one_page);
+	    test_more_regions_purged_from_one_page,
+	    test_more_pages_than_batch_page_size);
 }
diff --git a/test/unit/hpa_vectorized_madvise_large_batch.c b/test/unit/hpa_vectorized_madvise_large_batch.c
new file mode 100644
index 00000000..99ce15f4
--- /dev/null
+++ b/test/unit/hpa_vectorized_madvise_large_batch.c
@@ -0,0 +1,199 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/hpa.h"
+#include "jemalloc/internal/nstime.h"
+
+#define SHARD_IND 111
+
+#define ALLOC_MAX (HUGEPAGE)
+
+typedef struct test_data_s test_data_t;
+struct test_data_s {
+	/*
+	 * Must be the first member -- we convert back and forth between the
+	 * test_data_t and the hpa_shard_t;
+	 */
+	hpa_shard_t shard;
+	hpa_central_t central;
+	base_t *base;
+	edata_cache_t shard_edata_cache;
+
+	emap_t emap;
+};
+
+static hpa_shard_opts_t test_hpa_shard_opts_default = {
+	/* slab_max_alloc */
+	ALLOC_MAX,
+	/* hugification_threshold */
+	HUGEPAGE,
+	/* dirty_mult */
+	FXP_INIT_PERCENT(25),
+	/* deferral_allowed */
+	false,
+	/* hugify_delay_ms */
+	10 * 1000,
+	/* hugify_sync */
+	false,
+	/* min_purge_interval_ms */
+	5 * 1000,
+	/* experimental_max_purge_nhp */
+	-1,
+	/* peak_demand_window_ms */
+	0
+};
+
+static hpa_shard_t *
+create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
+	bool err;
+	base_t *base = base_new(TSDN_NULL, /* ind */ SHARD_IND,
+	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
+	assert_ptr_not_null(base, "");
+
+	test_data_t *test_data = malloc(sizeof(test_data_t));
+	assert_ptr_not_null(test_data, "");
+
+	test_data->base = base;
+
+	err = edata_cache_init(&test_data->shard_edata_cache, base);
+	assert_false(err, "");
+
+	err = emap_init(&test_data->emap, test_data->base, /* zeroed */ false);
+	assert_false(err, "");
+
+	err = hpa_central_init(&test_data->central, test_data->base, hooks);
+	assert_false(err, "");
+
+	err = hpa_shard_init(&test_data->shard, &test_data->central,
+	    &test_data->emap, test_data->base, &test_data->shard_edata_cache,
+	    SHARD_IND, opts);
+	assert_false(err, "");
+
+	return (hpa_shard_t *)test_data;
+}
+
+static void
+destroy_test_data(hpa_shard_t *shard) {
+	test_data_t *test_data = (test_data_t *)shard;
+	base_delete(TSDN_NULL, test_data->base);
+	free(test_data);
+}
+
+static uintptr_t defer_bump_ptr = HUGEPAGE * 123;
+static void *
+defer_test_map(size_t size) {
+	void *result = (void *)defer_bump_ptr;
+	defer_bump_ptr += size;
+	return result;
+}
+
+static void
+defer_test_unmap(void *ptr, size_t size) {
+	(void)ptr;
+	(void)size;
+}
+
+static size_t ndefer_purge_calls = 0;
+static void
+defer_test_purge(void *ptr, size_t size) {
+	(void)ptr;
+	(void)size;
+	++ndefer_purge_calls;
+}
+
+static size_t ndefer_vec_purge_calls = 0;
+static bool
+defer_vectorized_purge(void *vec, size_t vlen, size_t nbytes) {
+	(void)vec;
+	(void)nbytes;
+	++ndefer_vec_purge_calls;
+	return false;
+}
+
+static size_t ndefer_hugify_calls = 0;
+static bool
+defer_test_hugify(void *ptr, size_t size, bool sync) {
+	++ndefer_hugify_calls;
+	return false;
+}
+
+static size_t ndefer_dehugify_calls = 0;
+static void
+defer_test_dehugify(void *ptr, size_t size) {
+	++ndefer_dehugify_calls;
+}
+
+static nstime_t defer_curtime;
+static void
+defer_test_curtime(nstime_t *r_time, bool first_reading) {
+	*r_time = defer_curtime;
+}
+
+static uint64_t
+defer_test_ms_since(nstime_t *past_time) {
+	return (nstime_ns(&defer_curtime) - nstime_ns(past_time)) / 1000 / 1000;
+}
+
+TEST_BEGIN(test_vectorized_purge) {
+	test_skip_if(!hpa_supported() ||
+		     opt_process_madvise_max_batch == 0 || HUGEPAGE_PAGES <= 4);
+	assert(opt_process_madvise_max_batch == 64);
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+	opts.min_purge_interval_ms = 0;
+	ndefer_vec_purge_calls = 0;
+	ndefer_purge_calls = 0;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+
+	bool deferred_work_generated = false;
+
+	nstime_init(&defer_curtime, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+
+	enum {NALLOCS = 8 * HUGEPAGE_PAGES};
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	/* Deallocate almost 3 hugepages out of 8, and to force batching
+	 * leave the 2nd and 4th PAGE in the first 3 hugepages.
+	 */
+	for (int i = 0; i < 3 * (int)HUGEPAGE_PAGES; i++) {
+		int j = i % HUGEPAGE_PAGES;
+		if (j != 1 && j != 3) {
+			pai_dalloc(tsdn, &shard->pai, edatas[i],
+			    &deferred_work_generated);
+		}
+	}
+
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	/*
+	 * We purge from 2 huge pages, each one 3 dirty continous segments.
+	 * For opt_process_madvise_max_batch = 64, that is all just one call
+	 */
+	expect_zu_eq(1, ndefer_vec_purge_calls, "Expect single purge");
+	ndefer_vec_purge_calls = 0;
+
+	destroy_test_data(shard);
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_vectorized_purge);
+}
diff --git a/test/unit/hpa_vectorized_madvise_large_batch.sh b/test/unit/hpa_vectorized_madvise_large_batch.sh
new file mode 100644
index 00000000..f996047f
--- /dev/null
+++ b/test/unit/hpa_vectorized_madvise_large_batch.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="process_madvise_max_batch:64"

From 852da1be150e9811a3f0ab91302c5d6e9ee62e4f Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 25 Apr 2025 18:26:49 -0700
Subject: [PATCH 2495/2608] Add experimental option force using
 SYS_process_madvise

---
 configure.ac                                       | 14 ++++++++++++++
 .../jemalloc/internal/jemalloc_internal_defs.h.in  |  2 ++
 src/pages.c                                        |  8 +++++++-
 3 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 1c9c5067..f731e8b4 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2457,6 +2457,13 @@ if test "x${je_cv_osatomic}" = "xyes" ; then
 fi
 
 dnl ============================================================================
+
+AC_ARG_WITH([experimental_sys_process_madvise],
+  [AS_HELP_STRING([--with-experimental-sys-process-madvise=<experimental-sys-process-madvise>],
+   [Force process_madvise and use experimental-sys-process-madvise number when making syscall])],
+  [je_cv_sys_pmadv_nr="${with_experimental_sys_process_madvise}"],
+  [je_cv_sys_pmadv_nr=""])
+
 dnl Check for madvise(2).
 
 JE_COMPILABLE([madvise(2)], [
@@ -2554,6 +2561,13 @@ if test "x${je_cv_madvise}" = "xyes" ; then
 ], [je_cv_process_madvise])
   if test "x${je_cv_process_madvise}" = "xyes" ; then
     AC_DEFINE([JEMALLOC_HAVE_PROCESS_MADVISE], [ ], [ ])
+  else
+    if test "x${je_cv_sys_pmadv_nr}" != "x" ; then
+      dnl Forcing experimental usage of process_madvise
+      AC_MSG_RESULT([Forcing usage of process_madvise with syscall nr=${je_cv_sys_pmadv_nr}])
+      AC_DEFINE([JEMALLOC_HAVE_PROCESS_MADVISE], [ ], [ ])
+      AC_DEFINE_UNQUOTED([EXPERIMENTAL_SYS_PROCESS_MADVISE_NR], [${je_cv_sys_pmadv_nr}], [ ])
+    fi
   fi
 else
   dnl Check for posix_madvise.
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 2e47438a..c7218c66 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -348,6 +348,8 @@
 /* Defined if process_madvise(2) is available. */
 #undef JEMALLOC_HAVE_PROCESS_MADVISE
 
+#undef EXPERIMENTAL_SYS_PROCESS_MADVISE_NR
+
 /* Defined if mprotect(2) is available. */
 #undef JEMALLOC_HAVE_MPROTECT
 
diff --git a/src/pages.c b/src/pages.c
index babfd50f..d53e0fef 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -640,10 +640,16 @@ init_process_madvise(void) {
 	return false;
 }
 
+#ifdef SYS_process_madvise
+#define JE_SYS_PROCESS_MADVISE_NR SYS_process_madvise
+#else
+#define JE_SYS_PROCESS_MADVISE_NR EXPERIMENTAL_SYS_PROCESS_MADVISE_NR
+#endif
+
 static bool
 pages_purge_process_madvise_impl(void *vec, size_t vec_len,
     size_t total_bytes) {
-	size_t purged_bytes = (size_t)syscall(SYS_process_madvise, pidfd,
+	size_t purged_bytes = (size_t)syscall(JE_SYS_PROCESS_MADVISE_NR, pidfd,
 	    (struct iovec *)vec, vec_len, MADV_DONTNEED, 0);
 
 	return purged_bytes != total_bytes;

From 01e9ecbeb2fa69ae8e9f3e1013c9f7d44f6d033e Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@uh.edu>
Date: Tue, 15 Apr 2025 23:50:43 -0700
Subject: [PATCH 2496/2608] Remove build-time configuration
 'config_limit_usize_gap'

---
 .travis.yml                                   | 228 +++++++++---------
 configure.ac                                  |  19 --
 include/jemalloc/internal/arena_inlines_b.h   |   3 +-
 include/jemalloc/internal/emap.h              |  50 ++--
 .../internal/jemalloc_internal_defs.h.in      |   6 -
 .../jemalloc/internal/jemalloc_preamble.h.in  |   8 -
 include/jemalloc/internal/sc.h                |   2 +-
 include/jemalloc/internal/sz.h                |   6 +-
 include/jemalloc/internal/tcache_types.h      |   6 +-
 scripts/gen_travis.py                         |   1 -
 src/arena.c                                   |  33 +--
 src/ctl.c                                     |   3 +-
 src/jemalloc.c                                |  34 +--
 test/unit/arena_reset.c                       |   3 +-
 test/unit/mallctl.c                           |   2 +-
 15 files changed, 160 insertions(+), 244 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 5a83d757..433288cb 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -12,346 +12,346 @@ jobs:
   include:
     - os: windows
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-fcommon"
+      env: CC=gcc CXX=g++ EXTRA_CFLAGS="-fcommon"
     - os: windows
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-limit-usize-gap" EXTRA_CFLAGS="-fcommon"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-fcommon"
     - os: windows
       arch: amd64
-      env: CC=cl.exe CXX=cl.exe CONFIGURE_FLAGS="--enable-limit-usize-gap"
+      env: CC=cl.exe CXX=cl.exe
     - os: windows
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-fcommon"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes EXTRA_CFLAGS="-fcommon"
     - os: windows
       arch: amd64
-      env: CC=cl.exe CXX=cl.exe CONFIGURE_FLAGS="--enable-debug --enable-limit-usize-gap"
+      env: CC=cl.exe CXX=cl.exe CONFIGURE_FLAGS="--enable-debug"
     - os: windows
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug --enable-limit-usize-gap" EXTRA_CFLAGS="-fcommon"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-fcommon"
     - os: windows
       arch: amd64
-      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-limit-usize-gap"
+      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes
     - os: windows
       arch: amd64
-      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug --enable-limit-usize-gap"
+      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-debug --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-prof --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--disable-stats --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--disable-libdl --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-opt-safety-checks --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-debug --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-prof --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-stats --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-libdl --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-opt-safety-checks --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes COMPILER_FLAGS="-m32" CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-stats --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-libdl --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-opt-safety-checks --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --disable-stats --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --disable-libdl --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-opt-safety-checks --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --disable-libdl --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --enable-opt-safety-checks --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --enable-opt-safety-checks --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu,background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu,background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=clang CXX=clang++ CONFIGURE_FLAGS="--enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+      env: CC=clang CXX=clang++ EXTRA_CFLAGS="-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-stats" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--disable-libdl" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-opt-safety-checks" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-lg-hugepage=29 --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-lg-page=16 --with-lg-hugepage=29" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-prof --enable-prof-frameptr" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=tcache:false" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=dss:primary" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=percpu_arena:percpu" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     - os: linux
       arch: arm64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true --enable-limit-usize-gap" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
+      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--with-malloc-conf=background_thread:true" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
     # Development build
     - os: linux
       env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof" EXTRA_CFLAGS="-Werror -Wno-array-bounds"
diff --git a/configure.ac b/configure.ac
index f731e8b4..c615cab2 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2757,24 +2757,6 @@ if test "x${have_pthread}" = "x1" -a "x${je_cv_os_unfair_lock}" != "xyes" -a \
   AC_DEFINE([JEMALLOC_BACKGROUND_THREAD], [ ], [ ])
 fi
 
-dnl ============================================================================
-dnl Limit the gap between two contiguous usizes to be at most PAGE.
-AC_ARG_ENABLE([limit_usize_gap],
-  [AS_HELP_STRING([--enable-limit-usize-gap],
-                  [Limit the gap between two contiguous usizes])],
-[if test "x$limit_usize_gap" = "xno" ; then
-  limit_usize_gap="0"
-else
-  limit_usize_gap="1"
-fi
-],
-[limit_usize_gap="0"]
-)
-if test "x$limit_usize_gap" = "x1" ; then
-  AC_DEFINE([LIMIT_USIZE_GAP], [ ])
-fi
-AC_SUBST([limit_usize_gap])
-
 dnl ============================================================================
 dnl Check for glibc malloc hooks
 
@@ -3050,5 +3032,4 @@ AC_MSG_RESULT([cxx                : ${enable_cxx}])
 AC_MSG_RESULT([dss                : ${enable_dss}])
 AC_MSG_RESULT([tsan               : ${enable_tsan}])
 AC_MSG_RESULT([ubsan              : ${enable_ubsan}])
-AC_MSG_RESULT([limit-usize-gap    : ${limit_usize_gap}])
 AC_MSG_RESULT([===============================================================================])
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 108493f2..4b765289 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -503,8 +503,7 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 		assert(alloc_ctx.slab == edata_slab_get(edata));
 		emap_alloc_ctx_init(&alloc_ctx, alloc_ctx.szind, alloc_ctx.slab,
 		    sz_s2u(size));
-		assert(!config_limit_usize_gap ||
-		    emap_alloc_ctx_usize_get(&alloc_ctx) ==
+		assert(emap_alloc_ctx_usize_get(&alloc_ctx) ==
 		    edata_usize_get(edata));
 	}
 
diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 5885daa6..06ed5d32 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -236,25 +236,16 @@ emap_alloc_ctx_init(emap_alloc_ctx_t *alloc_ctx, szind_t szind, bool slab,
     size_t usize) {
 	alloc_ctx->szind = szind;
 	alloc_ctx->slab = slab;
-	/*
-	 * When config_limit_usize_gap disabled, alloc_ctx->usize
-	 * should not be accessed.
-	 */
-	if (config_limit_usize_gap) {
-		alloc_ctx->usize = usize;
-		assert(sz_limit_usize_gap_enabled() ||
-		    usize == sz_index2size(szind));
-	} else if (config_debug) {
-		alloc_ctx->usize = SC_LARGE_MAXCLASS + 1;
-	}
+	alloc_ctx->usize = usize;
+	assert(sz_limit_usize_gap_enabled() ||
+	    usize == sz_index2size(szind));
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
 emap_alloc_ctx_usize_get(emap_alloc_ctx_t *alloc_ctx) {
 	assert(alloc_ctx->szind < SC_NSIZES);
-	if (!config_limit_usize_gap || alloc_ctx->slab) {
-		assert(!config_limit_usize_gap ||
-		    alloc_ctx->usize == sz_index2size(alloc_ctx->szind));
+	if (alloc_ctx->slab) {
+		assert(alloc_ctx->usize == sz_index2size(alloc_ctx->szind));
 		return sz_index2size(alloc_ctx->szind);
 	}
 	assert(sz_limit_usize_gap_enabled() ||
@@ -269,28 +260,15 @@ emap_alloc_ctx_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
     emap_alloc_ctx_t *alloc_ctx) {
 	EMAP_DECLARE_RTREE_CTX;
 
-	if (config_limit_usize_gap) {
-		rtree_contents_t contents = rtree_read(tsdn, &emap->rtree,
-		    rtree_ctx, (uintptr_t)ptr);
-		/*
-		 * If the alloc is invalid, do not calculate usize since edata
-		 * could be corrupted.
-		 */
-		if (contents.metadata.szind == SC_NSIZES ||
-		    contents.edata == NULL) {
-			emap_alloc_ctx_init(alloc_ctx, contents.metadata.szind,
-			    contents.metadata.slab, 0);
-			return;
-		}
-		emap_alloc_ctx_init(alloc_ctx, contents.metadata.szind,
-		    contents.metadata.slab, edata_usize_get(contents.edata));
-	} else {
-		rtree_metadata_t metadata = rtree_metadata_read(tsdn,
-		    &emap->rtree, rtree_ctx, (uintptr_t)ptr);
-		/* alloc_ctx->usize will not be read/write in this case. */
-		emap_alloc_ctx_init(alloc_ctx, metadata.szind, metadata.slab,
-		    SC_LARGE_MAXCLASS + 1);
-	}
+	rtree_contents_t contents = rtree_read(tsdn, &emap->rtree,
+	    rtree_ctx, (uintptr_t)ptr);
+	/*
+	 * If the alloc is invalid, do not calculate usize since edata
+	 * could be corrupted.
+	 */
+	emap_alloc_ctx_init(alloc_ctx, contents.metadata.szind,
+	    contents.metadata.slab, (contents.metadata.szind == SC_NSIZES
+	    || contents.edata == NULL)? 0: edata_usize_get(contents.edata));
 }
 
 /* The pointer must be mapped. */
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index c7218c66..6d557959 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -480,12 +480,6 @@
 /* If defined, use __int128 for optimization. */
 #undef JEMALLOC_HAVE_INT128
 
-/*
- * If defined, the gap between any two contiguous usizes should not exceed
- * PAGE.
- */
-#undef LIMIT_USIZE_GAP
-
 #include "jemalloc/internal/jemalloc_internal_overrides.h"
 
 #endif /* JEMALLOC_INTERNAL_DEFS_H_ */
diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in
index eba475a6..bbfe2513 100644
--- a/include/jemalloc/internal/jemalloc_preamble.h.in
+++ b/include/jemalloc/internal/jemalloc_preamble.h.in
@@ -283,12 +283,4 @@ static const bool have_memcntl =
 #endif
     ;
 
-static const bool config_limit_usize_gap =
-#ifdef LIMIT_USIZE_GAP
-    true
-#else
-    false
-#endif
-    ;
-
 #endif /* JEMALLOC_PREAMBLE_H */
diff --git a/include/jemalloc/internal/sc.h b/include/jemalloc/internal/sc.h
index 098e47b7..3b9280d8 100644
--- a/include/jemalloc/internal/sc.h
+++ b/include/jemalloc/internal/sc.h
@@ -287,7 +287,7 @@
 #endif
 
 /*
- * When config_limit_usize_gap is enabled, the gaps between two contiguous
+ * When limit_usize_gap is enabled, the gaps between two contiguous
  * size classes should not exceed PAGE.  This means there should be no concept
  * of size classes for sizes > SC_SMALL_MAXCLASS (or >= SC_LARGE_MINCLASS).
  * However, between SC_LARGE_MINCLASS (SC_NGROUP * PAGE) and
diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index 1122461c..6f161260 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -56,11 +56,7 @@ extern void sz_boot(const sc_data_t *sc_data, bool cache_oblivious);
 
 JEMALLOC_ALWAYS_INLINE bool
 sz_limit_usize_gap_enabled() {
-#ifdef LIMIT_USIZE_GAP
 	return opt_limit_usize_gap;
-#else
-	return false;
-#endif
 }
 
 JEMALLOC_ALWAYS_INLINE pszind_t
@@ -356,7 +352,7 @@ sz_s2u_compute(size_t size) {
 
 JEMALLOC_ALWAYS_INLINE size_t
 sz_s2u_lookup(size_t size) {
-	assert(!config_limit_usize_gap || size < SC_LARGE_MINCLASS);
+	assert(size < SC_LARGE_MINCLASS);
 	size_t ret = sz_index2size_lookup(sz_size2index_lookup(size));
 
 	assert(ret == sz_s2u_compute(size));
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index f13ff748..b3828ecf 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -19,11 +19,7 @@ typedef struct tcaches_s tcaches_t;
 /* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 #define TCACHES_ELM_NEED_REINIT ((tcache_t *)(uintptr_t)1)
 
-#ifdef LIMIT_USIZE_GAP
-    #define TCACHE_LG_MAXCLASS_LIMIT LG_USIZE_GROW_SLOW_THRESHOLD
-#else
-    #define TCACHE_LG_MAXCLASS_LIMIT 23 /* tcache_max = 8M */
-#endif
+#define TCACHE_LG_MAXCLASS_LIMIT LG_USIZE_GROW_SLOW_THRESHOLD
 #define TCACHE_MAXCLASS_LIMIT ((size_t)1 << TCACHE_LG_MAXCLASS_LIMIT)
 #define TCACHE_NBINS_MAX (SC_NBINS + SC_NGROUP *			\
     (TCACHE_LG_MAXCLASS_LIMIT - SC_LG_LARGE_MINCLASS) + 1)
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index fa98f2a2..d43c802e 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -191,7 +191,6 @@ def format_job(os, arch, combination):
     if len(malloc_conf) > 0:
         configure_flags.append('--with-malloc-conf=' + ','.join(malloc_conf))
 
-    configure_flags.append('--enable-limit-usize-gap')
     if not compilers:
         compiler = GCC.value
     else:
diff --git a/src/arena.c b/src/arena.c
index 0a0c97ef..1586ee91 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -154,17 +154,12 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		size_t curlextents = (size_t)(nmalloc - ndalloc);
 		lstats[i].curlextents += curlextents;
 
-		if (config_limit_usize_gap) {
-			uint64_t active_bytes = locked_read_u64(tsdn,
-			    LOCKEDINT_MTX(arena->stats.mtx),
-			    &arena->stats.lstats[i].active_bytes);
-			locked_inc_u64_unsynchronized(
-			    &lstats[i].active_bytes, active_bytes);
-			astats->allocated_large += active_bytes;
-		} else {
-			astats->allocated_large +=
-			    curlextents * sz_index2size(SC_NBINS + i);
-		}
+		uint64_t active_bytes = locked_read_u64(tsdn,
+		    LOCKEDINT_MTX(arena->stats.mtx),
+		    &arena->stats.lstats[i].active_bytes);
+		locked_inc_u64_unsynchronized(
+		    &lstats[i].active_bytes, active_bytes);
+		astats->allocated_large += active_bytes;
 	}
 
 	pa_shard_stats_merge(tsdn, &arena->pa_shard, &astats->pa_shard_stats,
@@ -333,11 +328,9 @@ arena_large_malloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 			&arena->stats.lstats[hindex].nmalloc, 1);
-		if (config_limit_usize_gap) {
-			locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-			    &arena->stats.lstats[hindex].active_bytes,
-			    usize);
-		}
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
+		    &arena->stats.lstats[hindex].active_bytes,
+		    usize);
 		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
 }
@@ -361,11 +354,9 @@ arena_large_dalloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
 			&arena->stats.lstats[hindex].ndalloc, 1);
-		if (config_limit_usize_gap) {
-			locked_dec_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-			    &arena->stats.lstats[hindex].active_bytes,
-			    usize);
-		}
+		locked_dec_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
+		    &arena->stats.lstats[hindex].active_bytes,
+		    usize);
 		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
 }
diff --git a/src/ctl.c b/src/ctl.c
index 1d7eace6..49820af6 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -2355,8 +2355,7 @@ CTL_RO_NL_CGEN(config_uaf_detection, opt_lg_san_uaf_align,
     opt_lg_san_uaf_align, ssize_t)
 CTL_RO_NL_GEN(opt_zero_realloc,
     zero_realloc_mode_names[opt_zero_realloc_action], const char *)
-CTL_RO_NL_CGEN(config_limit_usize_gap, opt_limit_usize_gap, opt_limit_usize_gap,
-    bool)
+CTL_RO_NL_GEN(opt_limit_usize_gap, opt_limit_usize_gap, bool)
 
 /* malloc_conf options */
 CTL_RO_NL_CGEN(opt_malloc_conf_symlink, opt_malloc_conf_symlink,
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 9451df77..445955b0 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -123,12 +123,7 @@ zero_realloc_action_t opt_zero_realloc_action =
 
 atomic_zu_t zero_realloc_count = ATOMIC_INIT(0);
 
-bool opt_limit_usize_gap =
-#ifdef LIMIT_USIZE_GAP
-    true;
-#else
-    false;
-#endif
+bool opt_limit_usize_gap = true;
 
 const char *const zero_realloc_mode_names[] = {
 	"alloc",
@@ -1785,10 +1780,8 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    "san_guard_large", 0, SIZE_T_MAX,
 			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false)
 
-			if (config_limit_usize_gap) {
-				CONF_HANDLE_BOOL(opt_limit_usize_gap,
-				    "limit_usize_gap");
-			}
+			CONF_HANDLE_BOOL(opt_limit_usize_gap,
+			    "limit_usize_gap");
 
 			CONF_ERROR("Invalid conf pair", k, klen, v, vlen);
 #undef CONF_ERROR
@@ -2209,17 +2202,16 @@ static bool
 malloc_init_hard(void) {
 	tsd_t *tsd;
 
-	if (config_limit_usize_gap) {
-		assert(TCACHE_MAXCLASS_LIMIT <= USIZE_GROW_SLOW_THRESHOLD);
-		assert(SC_LOOKUP_MAXCLASS <= USIZE_GROW_SLOW_THRESHOLD);
-		/*
-		 * This asserts an extreme case where TINY_MAXCLASS is larger
-		 * than LARGE_MINCLASS.  It could only happen if some constants
-		 * are configured miserably wrong.
-		 */
-		assert(SC_LG_TINY_MAXCLASS <=
-		    (size_t)1ULL << (LG_PAGE + SC_LG_NGROUP));
-	}
+	assert(TCACHE_MAXCLASS_LIMIT <= USIZE_GROW_SLOW_THRESHOLD);
+	assert(SC_LOOKUP_MAXCLASS <= USIZE_GROW_SLOW_THRESHOLD);
+	/*
+	 * This asserts an extreme case where TINY_MAXCLASS is larger
+	 * than LARGE_MINCLASS.  It could only happen if some constants
+	 * are configured miserably wrong.
+	 */
+	assert(SC_LG_TINY_MAXCLASS <=
+	    (size_t)1ULL << (LG_PAGE + SC_LG_NGROUP));
+
 #if defined(_WIN32) && _WIN32_WINNT < 0x0600
 	_init_init_lock();
 #endif
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index 09536b29..42fa9a5d 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -78,8 +78,7 @@ vsalloc(tsdn_t *tsdn, const void *ptr) {
 		return 0;
 	}
 
-	return config_limit_usize_gap? edata_usize_get(full_alloc_ctx.edata):
-	    sz_index2size(full_alloc_ctx.szind);
+	return edata_usize_get(full_alloc_ctx.edata);
 }
 
 static unsigned
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 366b992b..7d4634e8 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -333,7 +333,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, prof_sys_thread_name, prof);
 	TEST_MALLCTL_OPT(ssize_t, lg_san_uaf_align, uaf_detection);
 	TEST_MALLCTL_OPT(unsigned, debug_double_free_max_scan, always);
-	TEST_MALLCTL_OPT(bool, limit_usize_gap, limit_usize_gap);
+	TEST_MALLCTL_OPT(bool, limit_usize_gap, always);
 	TEST_MALLCTL_OPT(size_t, process_madvise_max_batch, always);
 
 #undef TEST_MALLCTL_OPT

From 8347f1045aaf975192b06c3168a40a05ae8c206a Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Wed, 16 Apr 2025 11:57:55 -0700
Subject: [PATCH 2497/2608] Renaming limit_usize_gap to
 disable_large_size_classes

---
 include/jemalloc/internal/edata.h             | 12 +++----
 include/jemalloc/internal/emap.h              |  4 +--
 .../internal/jemalloc_internal_externs.h      |  2 +-
 include/jemalloc/internal/sc.h                | 10 +++---
 include/jemalloc/internal/sz.h                | 14 ++++----
 src/ctl.c                                     |  6 ++--
 src/eset.c                                    | 22 ++++++-------
 src/jemalloc.c                                | 19 ++++++++---
 src/pac.c                                     | 33 ++++++++++---------
 src/prof_data.c                               |  2 +-
 src/psset.c                                   |  2 +-
 src/sec.c                                     |  2 +-
 src/stats.c                                   |  2 +-
 test/test.sh.in                               |  2 +-
 test/unit/arena_decay.c                       |  4 +--
 test/unit/mallctl.c                           |  2 +-
 test/unit/size_classes.c                      |  4 +--
 test/unit/size_classes.sh                     |  4 +--
 18 files changed, 78 insertions(+), 68 deletions(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index b087ea31..e41e4efa 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -291,12 +291,12 @@ static inline size_t
 edata_usize_get(const edata_t *edata) {
 	assert(edata != NULL);
 	/*
-	 * When sz_limit_usize_gap_enabled() is true, two cases:
+	 * When sz_large_size_classes_disabled() is true, two cases:
 	 * 1. if usize_from_ind is not smaller than SC_LARGE_MINCLASS,
 	 * usize_from_size is accurate;
 	 * 2. otherwise, usize_from_ind is accurate.
 	 *
-	 * When sz_limit_usize_gap_enabled() is not true, the two should be the
+	 * When sz_large_size_classes_disabled() is not true, the two should be the
 	 * same when usize_from_ind is not smaller than SC_LARGE_MINCLASS.
 	 *
 	 * Note sampled small allocs will be promoted.  Their extent size is
@@ -316,9 +316,9 @@ edata_usize_get(const edata_t *edata) {
 	}
 #endif
 
-	if (!sz_limit_usize_gap_enabled() || szind < SC_NBINS) {
+	if (!sz_large_size_classes_disabled() || szind < SC_NBINS) {
 		size_t usize_from_ind = sz_index2size(szind);
-		if (!sz_limit_usize_gap_enabled() &&
+		if (!sz_large_size_classes_disabled() &&
 		    usize_from_ind >= SC_LARGE_MINCLASS) {
 			size_t size = (edata->e_size_esn & EDATA_SIZE_MASK);
 			assert(size > sz_large_pad);
@@ -332,8 +332,8 @@ edata_usize_get(const edata_t *edata) {
 	assert(size > sz_large_pad);
 	size_t usize_from_size = size - sz_large_pad;
 	/*
-	 * no matter limit-usize-gap enabled or not, usize retrieved from size
-	 * is not accurate when smaller than SC_LARGE_MINCLASS.
+	 * no matter large size classes disabled or not, usize retrieved from
+	 * size is not accurate when smaller than SC_LARGE_MINCLASS.
 	 */
 	assert(usize_from_size >= SC_LARGE_MINCLASS);
 	return usize_from_size;
diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index 06ed5d32..fba46abe 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -237,7 +237,7 @@ emap_alloc_ctx_init(emap_alloc_ctx_t *alloc_ctx, szind_t szind, bool slab,
 	alloc_ctx->szind = szind;
 	alloc_ctx->slab = slab;
 	alloc_ctx->usize = usize;
-	assert(sz_limit_usize_gap_enabled() ||
+	assert(sz_large_size_classes_disabled() ||
 	    usize == sz_index2size(szind));
 }
 
@@ -248,7 +248,7 @@ emap_alloc_ctx_usize_get(emap_alloc_ctx_t *alloc_ctx) {
 		assert(alloc_ctx->usize == sz_index2size(alloc_ctx->szind));
 		return sz_index2size(alloc_ctx->szind);
 	}
-	assert(sz_limit_usize_gap_enabled() ||
+	assert(sz_large_size_classes_disabled() ||
 	    alloc_ctx->usize == sz_index2size(alloc_ctx->szind));
 	assert(alloc_ctx->usize <= SC_LARGE_MAXCLASS);
 	return alloc_ctx->usize;
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 83a37baf..3b42f833 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -39,7 +39,7 @@ extern atomic_zu_t zero_realloc_count;
 extern bool opt_cache_oblivious;
 extern unsigned opt_debug_double_free_max_scan;
 extern size_t opt_calloc_madvise_threshold;
-extern bool opt_limit_usize_gap;
+extern bool opt_disable_large_size_classes;
 
 extern const char *opt_malloc_conf_symlink;
 extern const char *opt_malloc_conf_env_var;
diff --git a/include/jemalloc/internal/sc.h b/include/jemalloc/internal/sc.h
index 3b9280d8..97956e7a 100644
--- a/include/jemalloc/internal/sc.h
+++ b/include/jemalloc/internal/sc.h
@@ -287,11 +287,11 @@
 #endif
 
 /*
- * When limit_usize_gap is enabled, the gaps between two contiguous
- * size classes should not exceed PAGE.  This means there should be no concept
- * of size classes for sizes > SC_SMALL_MAXCLASS (or >= SC_LARGE_MINCLASS).
- * However, between SC_LARGE_MINCLASS (SC_NGROUP * PAGE) and
- * 2 * SC_NGROUP * PAGE, the size class also happens to be aligned with PAGE.
+ * When large size classes are disabled, there is no concept of size classes
+ * for sizes > SC_SMALLMAXCLASS (or >= SC_LARGE_MINCLASS).  This ensures that
+ * the overhead between the usable size and the user request size will not
+ * exceed PAGE.  Between SC_LARGE_MINCLASS (SC_NGROUP * PAGE) and
+ * 2 * SC_NGROUP * PAGE, the size classes also happen to be aligned with PAGE.
  * Since tcache relies on size classes to work and it greatly increases the
  * perf of allocs & deallocs, we extend the existence of size class to
  * 2 * SC_NGROUP * PAGE ONLY for the tcache module.  This means for all other
diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index 6f161260..e6cfa6a9 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -55,8 +55,8 @@ extern size_t sz_large_pad;
 extern void sz_boot(const sc_data_t *sc_data, bool cache_oblivious);
 
 JEMALLOC_ALWAYS_INLINE bool
-sz_limit_usize_gap_enabled() {
-	return opt_limit_usize_gap;
+sz_large_size_classes_disabled() {
+	return opt_disable_large_size_classes;
 }
 
 JEMALLOC_ALWAYS_INLINE pszind_t
@@ -269,11 +269,11 @@ sz_index2size_unsafe(szind_t index) {
 
 JEMALLOC_ALWAYS_INLINE size_t
 sz_index2size(szind_t index) {
-	assert(!sz_limit_usize_gap_enabled() ||
+	assert(!sz_large_size_classes_disabled() ||
 	    index <= sz_size2index(USIZE_GROW_SLOW_THRESHOLD));
 	size_t size = sz_index2size_unsafe(index);
 	/*
-	 * With limit_usize_gap enabled, the usize above
+	 * With large size classes disabled, the usize above
 	 * SC_LARGE_MINCLASS should grow by PAGE.  However, for sizes
 	 * in [SC_LARGE_MINCLASS, USIZE_GROW_SLOW_THRESHOLD], the
 	 * usize would not change because the size class gap in this
@@ -285,7 +285,7 @@ sz_index2size(szind_t index) {
 	 * the size is no larger than USIZE_GROW_SLOW_THRESHOLD here
 	 * instead of SC_LARGE_MINCLASS.
 	 */
-	assert(!sz_limit_usize_gap_enabled() ||
+	assert(!sz_large_size_classes_disabled() ||
 	    size <= USIZE_GROW_SLOW_THRESHOLD);
 	return size;
 }
@@ -335,11 +335,11 @@ sz_s2u_compute(size_t size) {
 		    (ZU(1) << lg_ceil));
 	}
 #endif
-	if (size <= SC_SMALL_MAXCLASS || !sz_limit_usize_gap_enabled()) {
+	if (size <= SC_SMALL_MAXCLASS || !sz_large_size_classes_disabled()) {
 		return sz_s2u_compute_using_delta(size);
 	} else {
 		/*
-		 * With sz_limit_usize_gap_enabled() == true, usize of a large
+		 * With sz_large_size_classes_disabled() == true, usize of a large
 		 * allocation is calculated by ceiling size to the smallest
 		 * multiple of PAGE to minimize the memory overhead, especially
 		 * when using hugepages.
diff --git a/src/ctl.c b/src/ctl.c
index 49820af6..92d254c1 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -170,7 +170,7 @@ CTL_PROTO(opt_prof_sys_thread_name)
 CTL_PROTO(opt_prof_time_res)
 CTL_PROTO(opt_lg_san_uaf_align)
 CTL_PROTO(opt_zero_realloc)
-CTL_PROTO(opt_limit_usize_gap)
+CTL_PROTO(opt_disable_large_size_classes)
 CTL_PROTO(opt_process_madvise_max_batch)
 CTL_PROTO(opt_malloc_conf_symlink)
 CTL_PROTO(opt_malloc_conf_env_var)
@@ -564,7 +564,7 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("zero_realloc"),	CTL(opt_zero_realloc)},
 	{NAME("debug_double_free_max_scan"),
 		CTL(opt_debug_double_free_max_scan)},
-	{NAME("limit_usize_gap"),	CTL(opt_limit_usize_gap)},
+	{NAME("disable_large_size_classes"),	CTL(opt_disable_large_size_classes)},
 	{NAME("process_madvise_max_batch"), CTL(opt_process_madvise_max_batch)},
 	{NAME("malloc_conf"),	CHILD(named, opt_malloc_conf)}
 };
@@ -2355,7 +2355,7 @@ CTL_RO_NL_CGEN(config_uaf_detection, opt_lg_san_uaf_align,
     opt_lg_san_uaf_align, ssize_t)
 CTL_RO_NL_GEN(opt_zero_realloc,
     zero_realloc_mode_names[opt_zero_realloc_action], const char *)
-CTL_RO_NL_GEN(opt_limit_usize_gap, opt_limit_usize_gap, bool)
+CTL_RO_NL_GEN(opt_disable_large_size_classes, opt_disable_large_size_classes, bool)
 
 /* malloc_conf options */
 CTL_RO_NL_CGEN(opt_malloc_conf_symlink, opt_malloc_conf_symlink,
diff --git a/src/eset.c b/src/eset.c
index 7dc9cce7..677162ff 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -232,7 +232,7 @@ eset_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
 
 	/* See comments in eset_first_fit for why we enumerate search below. */
 	pszind_t pind_prev = sz_psz2ind(sz_psz_quantize_floor(min_size));
-	if (sz_limit_usize_gap_enabled() && pind != pind_prev) {
+	if (sz_large_size_classes_disabled() && pind != pind_prev) {
 		edata_t *ret = NULL;
 		ret = eset_enumerate_alignment_search(eset, min_size, pind_prev,
 		    alignment);
@@ -287,7 +287,7 @@ eset_first_fit(eset_t *eset, size_t size, bool exact_only,
 	pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(size));
 
 	if (exact_only) {
-		if (sz_limit_usize_gap_enabled()) {
+		if (sz_large_size_classes_disabled()) {
 			pszind_t pind_prev =
 			    sz_psz2ind(sz_psz_quantize_floor(size));
 			return eset_enumerate_search(eset, size, pind_prev,
@@ -300,28 +300,28 @@ eset_first_fit(eset_t *eset, size_t size, bool exact_only,
 
 	/*
 	 * Each element in the eset->bins is a heap corresponding to a size
-	 * class.  When sz_limit_usize_gap_enabled() is false, all heaps after
+	 * class.  When sz_large_size_classes_disabled() is false, all heaps after
 	 * pind (including pind itself) will surely satisfy the rquests while
 	 * heaps before pind cannot satisfy the request because usize is
 	 * calculated based on size classes then.  However, when
-	 * sz_limit_usize_gap_enabled() is true, usize is calculated by ceiling
-	 * user requested size to the closest multiple of PAGE.  This means in
-	 * the heap before pind, i.e., pind_prev, there may exist extents able
-	 * to satisfy the request and we should enumerate the heap when
-	 * pind_prev != pind.
+	 * sz_large_size_classes_disabled() is true, usize is calculated by
+	 * ceiling user requested size to the closest multiple of PAGE.  This
+	 * means in the heap before pind, i.e., pind_prev, there may exist
+	 * extents able to satisfy the request and we should enumerate the heap
+	 * when pind_prev != pind.
 	 *
 	 * For example, when PAGE=4KB and the user requested size is 1MB + 4KB,
-	 * usize would be 1.25MB when sz_limit_usize_gap_enabled() is false.
+	 * usize would be 1.25MB when sz_large_size_classes_disabled() is false.
 	 * pind points to the heap containing extents ranging in
 	 * [1.25MB, 1.5MB).  Thus, searching starting from pind will not miss
-	 * any candidates.  When sz_limit_usize_gap_enabled() is true, the
+	 * any candidates.  When sz_large_size_classes_disabled() is true, the
 	 * usize would be 1MB + 4KB and pind still points to the same heap.
 	 * In this case, the heap pind_prev points to, which contains extents
 	 * in the range [1MB, 1.25MB), may contain candidates satisfying the
 	 * usize and thus should be enumerated.
 	 */
 	pszind_t pind_prev = sz_psz2ind(sz_psz_quantize_floor(size));
-	if (sz_limit_usize_gap_enabled() && pind != pind_prev){
+	if (sz_large_size_classes_disabled() && pind != pind_prev){
 		ret = eset_enumerate_search(eset, size, pind_prev,
 		    /* exact_only */ false, &ret_summ);
 	}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 445955b0..360635a8 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -123,7 +123,12 @@ zero_realloc_action_t opt_zero_realloc_action =
 
 atomic_zu_t zero_realloc_count = ATOMIC_INIT(0);
 
-bool opt_limit_usize_gap = true;
+/*
+ * Disable large size classes is now the default behavior in jemalloc.
+ * Although it is configurable in MALLOC_CONF, this is mainly for debugging
+ * purposes and should not be tuned.
+ */
+bool opt_disable_large_size_classes = true;
 
 const char *const zero_realloc_mode_names[] = {
 	"alloc",
@@ -1780,8 +1785,14 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    "san_guard_large", 0, SIZE_T_MAX,
 			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false)
 
-			CONF_HANDLE_BOOL(opt_limit_usize_gap,
-			    "limit_usize_gap");
+			/*
+			 * Disable large size classes is now the default
+			 * behavior in jemalloc.  Although it is configurable
+			 * in MALLOC_CONF, this is mainly for debugging
+			 * purposes and should not be tuned.
+			 */
+			CONF_HANDLE_BOOL(opt_disable_large_size_classes,
+			    "disable_large_size_classes");
 
 			CONF_ERROR("Invalid conf pair", k, klen, v, vlen);
 #undef CONF_ERROR
@@ -2406,7 +2417,7 @@ aligned_usize_get(size_t size, size_t alignment, size_t *usize, szind_t *ind,
 			if (unlikely(*ind >= SC_NSIZES)) {
 				return true;
 			}
-			*usize = sz_limit_usize_gap_enabled()? sz_s2u(size):
+			*usize = sz_large_size_classes_disabled()? sz_s2u(size):
 			    sz_index2size(*ind);
 			assert(*usize > 0 && *usize <= SC_LARGE_MAXCLASS);
 			return false;
diff --git a/src/pac.c b/src/pac.c
index 12c1e444..e9ba7957 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -143,25 +143,26 @@ pac_alloc_real(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
 	}
 
 	/*
-	 * We batched allocate a larger extent when limit_usize_gap is enabled
+	 * We batched allocate a larger extent with large size classes disabled
 	 * because the reuse of extents in the dirty pool is worse without size
-	 * classes for large allocs.  For instance, when limit_usize_gap is not
-	 * enabled, 1.1MB, 1.15MB, and 1.2MB allocs will all be ceiled to
-	 * 1.25MB and can reuse the same buffer if they are alloc & dalloc
-	 * sequentially.  However, with limit_usize_gap enabled, they cannot
-	 * reuse the same buffer and their sequential allocs & dallocs will
-	 * result in three different extents.  Thus, we cache extra mergeable
-	 * extents in the dirty pool to improve the reuse.  We skip this
-	 * optimization if both maps_coalesce and opt_retain are disabled
-	 * because VM is not cheap enough to be used aggressively and extents
-	 * cannot be merged at will (only extents from the same VirtualAlloc
-	 * can be merged).  Note that it could still be risky to cache more
-	 * extents when either mpas_coalesce or opt_retain is enabled.  Yet
-	 * doing so is still beneficial in improving the reuse of extents
-	 * with some limits.  This choice should be reevaluated if
+	 * classes for large allocs.  For instance, when
+	 * disable_large_size_classes is false, 1.1MB, 1.15MB, and 1.2MB allocs
+	 * will all be ceiled to 1.25MB and can reuse the same buffer if they
+	 * are alloc & dalloc sequentially.  However, with
+	 * disable_large_size_classes being true, they cannot reuse the same
+	 * buffer and their sequential allocs & dallocs will result in three
+	 * different extents.  Thus, we cache extra mergeable extents in the
+	 * dirty pool to improve the reuse.  We skip this optimization if both
+	 * maps_coalesce and opt_retain are disabled because VM is not cheap
+	 * enough in such cases to be used aggressively and extents cannot be
+	 * merged at will (only extents from the same VirtualAlloc can be
+	 * merged).  Note that it could still be risky to cache more extents
+	 * when either mpas_coalesce or opt_retain is enabled.  Yet doing
+	 * so is still beneficial in improving the reuse of extents with some
+	 * limits.  This choice should be reevaluated if
 	 * pac_alloc_retained_batched_size is changed to be more aggressive.
 	 */
-	if (sz_limit_usize_gap_enabled() && edata == NULL &&
+	if (sz_large_size_classes_disabled() && edata == NULL &&
 	    (maps_coalesce || opt_retain)) {
 		size_t batched_size = pac_alloc_retained_batched_size(size);
 		/*
diff --git a/src/prof_data.c b/src/prof_data.c
index 437673ee..edc5c558 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -514,7 +514,7 @@ void prof_unbias_map_init(void) {
 #ifdef JEMALLOC_PROF
 	for (szind_t i = 0; i < SC_NSIZES; i++) {
 		/*
-		 * When limit_usize_gap is enabled, the unbiased calculation
+		 * With large size classes disabled, the unbiased calculation
 		 * here is not as accurate as it was because usize now changes
 		 * in a finer grain while the unbiased_sz is still calculated
 		 * using the old way.
diff --git a/src/psset.c b/src/psset.c
index e617f426..97694301 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -368,7 +368,7 @@ psset_pick_alloc(psset_t *psset, size_t size) {
 
 	/* See comments in eset_first_fit for why we enumerate search below. */
 	pszind_t pind_prev = sz_psz2ind(sz_psz_quantize_floor(size));
-	if (sz_limit_usize_gap_enabled() && pind_prev < min_pind) {
+	if (sz_large_size_classes_disabled() && pind_prev < min_pind) {
 		ps = psset_enumerate_search(psset, pind_prev, size);
 		if (ps != NULL) {
 			return ps;
diff --git a/src/sec.c b/src/sec.c
index 8827d1bd..67585a71 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -29,7 +29,7 @@ sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, pai_t *fallback,
 	 * USIZE_GROW_SLOW_THRESHOLD because the usize above this increases
 	 * by PAGE and the number of usizes is too large.
 	 */
-	assert(!sz_limit_usize_gap_enabled() ||
+	assert(!sz_large_size_classes_disabled() ||
 	    opts->max_alloc <= USIZE_GROW_SLOW_THRESHOLD);
 
 	size_t max_alloc = PAGE_FLOOR(opts->max_alloc);
diff --git a/src/stats.c b/src/stats.c
index db9b9f43..d3127483 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1730,7 +1730,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_CHAR_P("stats_interval_opts")
 	OPT_WRITE_CHAR_P("zero_realloc")
 	OPT_WRITE_SIZE_T("process_madvise_max_batch")
-	OPT_WRITE_BOOL("limit_usize_gap")
+	OPT_WRITE_BOOL("disable_large_size_classes")
 
 	emitter_dict_end(emitter); /* Close "opt". */
 
diff --git a/test/test.sh.in b/test/test.sh.in
index a4ee9396..dc13bc28 100644
--- a/test/test.sh.in
+++ b/test/test.sh.in
@@ -43,7 +43,7 @@ for t in $@; do
     # per test shell script to ignore the @JEMALLOC_CPREFIX@ detail).
     enable_fill=@enable_fill@ \
     enable_prof=@enable_prof@ \
-    limit_usize_gap=@limit_usize_gap@ \
+    disable_large_size_classes=@disable_large_size_classes@ \
     . @srcroot@${t}.sh && \
     export_malloc_conf && \
     $JEMALLOC_TEST_PREFIX ${t}@exe@ @abs_srcroot@ @abs_objroot@
diff --git a/test/unit/arena_decay.c b/test/unit/arena_decay.c
index 00a38326..177ba505 100644
--- a/test/unit/arena_decay.c
+++ b/test/unit/arena_decay.c
@@ -411,11 +411,11 @@ TEST_BEGIN(test_decay_never) {
 	size_t pdirty_prev = get_arena_pdirty(arena_ind);
 	size_t pmuzzy_prev = get_arena_pmuzzy(arena_ind);
 	/*
-	 * With limit_usize_gap enabled, some more extents
+	 * With sz_large_size_classes_disabled() = true, some more extents
 	 * are cached in the dirty pool, making the assumption below
 	 * not true.
 	 */
-	if (!sz_limit_usize_gap_enabled()) {
+	if (!sz_large_size_classes_disabled()) {
 		expect_zu_eq(pdirty_prev, 0, "Unexpected dirty pages");
 	}
 	expect_zu_eq(pmuzzy_prev, 0, "Unexpected muzzy pages");
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 7d4634e8..cf9b88aa 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -333,7 +333,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, prof_sys_thread_name, prof);
 	TEST_MALLCTL_OPT(ssize_t, lg_san_uaf_align, uaf_detection);
 	TEST_MALLCTL_OPT(unsigned, debug_double_free_max_scan, always);
-	TEST_MALLCTL_OPT(bool, limit_usize_gap, always);
+	TEST_MALLCTL_OPT(bool, disable_large_size_classes, always);
 	TEST_MALLCTL_OPT(size_t, process_madvise_max_batch, always);
 
 #undef TEST_MALLCTL_OPT
diff --git a/test/unit/size_classes.c b/test/unit/size_classes.c
index 24913803..c373829c 100644
--- a/test/unit/size_classes.c
+++ b/test/unit/size_classes.c
@@ -26,7 +26,7 @@ TEST_BEGIN(test_size_classes) {
 	size_t size_class, max_size_class;
 	szind_t index, gen_index, max_index;
 
-	max_size_class = sz_limit_usize_gap_enabled()? SC_SMALL_MAXCLASS:
+	max_size_class = sz_large_size_classes_disabled()? SC_SMALL_MAXCLASS:
 	    get_max_size_class();
 	max_index = sz_size2index(max_size_class);
 
@@ -81,7 +81,7 @@ TEST_BEGIN(test_size_classes) {
 TEST_END
 
 TEST_BEGIN(test_grow_slow_size_classes) {
-	test_skip_if(!sz_limit_usize_gap_enabled());
+	test_skip_if(!sz_large_size_classes_disabled());
 
 	size_t size = SC_LARGE_MINCLASS;
 	size_t target_usize = SC_LARGE_MINCLASS;
diff --git a/test/unit/size_classes.sh b/test/unit/size_classes.sh
index 93d5e8d1..54363554 100644
--- a/test/unit/size_classes.sh
+++ b/test/unit/size_classes.sh
@@ -1,5 +1,3 @@
 #!/bin/sh
 
-if [ "x${limit_usize_gap}" = "x1" ] ; then
-  export MALLOC_CONF="limit_usize_gap:true"
-fi
+export MALLOC_CONF="disable_large_size_classes:true"

From 37bf846cc38345947ff644bf47d7d51126353c09 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Fri, 2 May 2025 15:58:27 -0700
Subject: [PATCH 2498/2608] Fixes to prevent static analysis warnings.

---
 include/jemalloc/internal/hpa.h | 2 +-
 include/jemalloc/internal/sz.h  | 2 +-
 src/eset.c                      | 4 ++--
 src/hpa.c                       | 2 +-
 src/pac.c                       | 2 +-
 src/prof_sys.c                  | 2 +-
 src/psset.c                     | 2 +-
 7 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index a384d04a..117c1c20 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -153,7 +153,7 @@ struct hpa_shard_s {
 	peak_demand_t peak_demand;
 };
 
-bool hpa_hugepage_size_exceeds_limit();
+bool hpa_hugepage_size_exceeds_limit(void);
 /*
  * Whether or not the HPA can be used given the current configuration.  This is
  * is not necessarily a guarantee that it backs its allocations by hugepages,
diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index e6cfa6a9..3a32e232 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -55,7 +55,7 @@ extern size_t sz_large_pad;
 extern void sz_boot(const sc_data_t *sc_data, bool cache_oblivious);
 
 JEMALLOC_ALWAYS_INLINE bool
-sz_large_size_classes_disabled() {
+sz_large_size_classes_disabled(void) {
 	return opt_disable_large_size_classes;
 }
 
diff --git a/src/eset.c b/src/eset.c
index 677162ff..b4666e2c 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -155,7 +155,7 @@ eset_remove(eset_t *eset, edata_t *edata) {
 	    cur_extents_npages - (size >> LG_PAGE), ATOMIC_RELAXED);
 }
 
-edata_t *
+static edata_t *
 eset_enumerate_alignment_search(eset_t *eset, size_t size, pszind_t bin_ind,
     size_t alignment) {
 	if (edata_heap_empty(&eset->bins[bin_ind].heap)) {
@@ -191,7 +191,7 @@ eset_enumerate_alignment_search(eset_t *eset, size_t size, pszind_t bin_ind,
 	return NULL;
 }
 
-edata_t *
+static edata_t *
 eset_enumerate_search(eset_t *eset, size_t size, pszind_t bin_ind,
     bool exact_only, edata_cmp_summary_t *ret_summ) {
 	if (edata_heap_empty(&eset->bins[bin_ind].heap)) {
diff --git a/src/hpa.c b/src/hpa.c
index 50614e42..9b7ff744 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -26,7 +26,7 @@ static void hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self,
 static uint64_t hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self);
 
 bool
-hpa_hugepage_size_exceeds_limit() {
+hpa_hugepage_size_exceeds_limit(void) {
 	return HUGEPAGE > HUGEPAGE_MAX_EXPECTED_SIZE;
 }
 
diff --git a/src/pac.c b/src/pac.c
index e9ba7957..0e435717 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -112,7 +112,7 @@ pac_may_have_muzzy(pac_t *pac) {
 	return pac_decay_ms_get(pac, extent_state_muzzy) != 0;
 }
 
-size_t pac_alloc_retained_batched_size(size_t size) {
+static size_t pac_alloc_retained_batched_size(size_t size) {
 	if (size > SC_LARGE_MAXCLASS) {
 		/*
 		 * A valid input with usize SC_LARGE_MAXCLASS could still
diff --git a/src/prof_sys.c b/src/prof_sys.c
index 642d8c89..e3b7bbcb 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -586,7 +586,7 @@ prof_getpid(void) {
 }
 
 static long
-prof_get_pid_namespace() {
+prof_get_pid_namespace(void) {
 	long ret = 0;
 
 #if defined(_WIN32) || defined(__APPLE__)
diff --git a/src/psset.c b/src/psset.c
index 97694301..afe9f1c1 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -337,7 +337,7 @@ psset_update_end(psset_t *psset, hpdata_t *ps) {
 	hpdata_assert_consistent(ps);
 }
 
-hpdata_t *
+static hpdata_t *
 psset_enumerate_search(psset_t *psset, pszind_t pind, size_t size) {
 	if (hpdata_age_heap_empty(&psset->pageslabs[pind])) {
 		return NULL;

From 3c14707b016b156c5f86dfd21304b01161c40750 Mon Sep 17 00:00:00 2001
From: Jiebin Sun <jiebin.sun@intel.com>
Date: Fri, 25 Apr 2025 02:04:05 -0700
Subject: [PATCH 2499/2608] To improve reuse efficiency, the maximum coalesced
 size for large extents in the dirty ecache has been limited. This patch was
 tested with real workloads using ClickHouse (Clickbench Q35) on a system with
 2x240 vCPUs. The results showed a 2X in query per second (QPS) performance
 and a reduction in page faults to 29% of the previous rate. Additionally,
 microbenchmark testing involved 256 memory reallocations resizing from 4KB to
 16KB in one arena, which demonstrated a 5X performance improvement.

Signed-off-by: Jiebin Sun <jiebin.sun@intel.com>
---
 src/extent.c | 40 +++++++++++++++++++++++++++++++++-------
 1 file changed, 33 insertions(+), 7 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index 3425e1ce..03a3fdd8 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -888,7 +888,7 @@ extent_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 
 static edata_t *
 extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *edata, bool *coalesced) {
+    ecache_t *ecache, edata_t *edata, size_t max_size, bool *coalesced) {
 	assert(!edata_guarded_get(edata));
 	assert(coalesced != NULL);
 	*coalesced = false;
@@ -908,7 +908,8 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		/* Try to coalesce forward. */
 		edata_t *next = emap_try_acquire_edata_neighbor(tsdn, pac->emap,
 		    edata, EXTENT_PAI_PAC, ecache->state, /* forward */ true);
-		if (next != NULL) {
+		size_t max_next_neighbor = max_size > edata_size_get(edata) ?  max_size - edata_size_get(edata) : 0;
+		if (next != NULL && edata_size_get(next) <= max_next_neighbor) {
 			if (!extent_coalesce(tsdn, pac, ehooks, ecache, edata,
 			    next, true)) {
 				if (ecache->delay_coalesce) {
@@ -923,7 +924,8 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		/* Try to coalesce backward. */
 		edata_t *prev = emap_try_acquire_edata_neighbor(tsdn, pac->emap,
 		    edata, EXTENT_PAI_PAC, ecache->state, /* forward */ false);
-		if (prev != NULL) {
+		size_t max_prev_neighbor = max_size > edata_size_get(edata) ?  max_size - edata_size_get(edata) : 0;
+		if (prev != NULL && edata_size_get(prev) <= max_prev_neighbor) {
 			if (!extent_coalesce(tsdn, pac, ehooks, ecache, edata,
 			    prev, false)) {
 				edata = prev;
@@ -947,14 +949,14 @@ static edata_t *
 extent_try_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool *coalesced) {
 	return extent_try_coalesce_impl(tsdn, pac, ehooks, ecache, edata,
-	    coalesced);
+	    SC_LARGE_MAXCLASS, coalesced);
 }
 
 static edata_t *
 extent_try_coalesce_large(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *edata, bool *coalesced) {
+    ecache_t *ecache, edata_t *edata, size_t max_size, bool *coalesced) {
 	return extent_try_coalesce_impl(tsdn, pac, ehooks, ecache, edata,
-	    coalesced);
+	    max_size, coalesced);
 }
 
 /* Purge a single extent to retained / unmapped directly. */
@@ -1004,11 +1006,35 @@ extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 	} else if (edata_size_get(edata) >= SC_LARGE_MINCLASS) {
 		assert(ecache == &pac->ecache_dirty);
 		/* Always coalesce large extents eagerly. */
+		/**
+		* Maximum size limit (max_size) for large extents waiting to be coalesced
+		* in dirty ecache.
+		*
+		* When set to a non-zero value, this parameter restricts the maximum size
+		* of large extents after coalescing. If the combined size of two extents
+		* would exceed this threshold, the coalescing operation is skipped.
+		*
+		* This improves dirty ecache reuse efficiency by:
+		* - Maintaining appropriately sized extents that match common allocation requests
+		* - Limiting large extent coalescence to prevent overly large extents that are
+		*   less likely to be reused efficiently
+		* - Setting lg_max_coalesce for large extent merging scenarios, similar to how
+		*   lg_max_fit is used during extent reuse
+		*
+		* Note that during extent decay/purge operations, no coalescing restrictions
+		* are applied to dirty ecache despite the delay_coalesce setting. This ensures
+		* that while improving dirty ecache reuse efficiency, we don't compromise
+		* the final coalescing that happens during the transition from dirty ecache
+		* to muzzy/retained ecache states.
+		*/
+		unsigned lg_max_coalesce = (unsigned)opt_lg_extent_max_active_fit;
+		size_t edata_size = edata_size_get(edata);
+		size_t max_size = (SC_LARGE_MAXCLASS >> lg_max_coalesce) > edata_size ? (edata_size << lg_max_coalesce) : SC_LARGE_MAXCLASS;
 		bool coalesced;
 		do {
 			assert(edata_state_get(edata) == extent_state_active);
 			edata = extent_try_coalesce_large(tsdn, pac, ehooks,
-			    ecache, edata, &coalesced);
+			    ecache, edata, max_size, &coalesced);
 		} while (coalesced);
 		if (edata_size_get(edata) >=
 		    atomic_load_zu(&pac->oversize_threshold, ATOMIC_RELAXED)

From 3cee771cfa1d3b0df5bab166fdcb654e60cca9bd Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Thu, 17 Apr 2025 15:29:34 -0700
Subject: [PATCH 2500/2608] Modify .clang-format to make it more aligned with
 current freebsd style

---
 .clang-format | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/.clang-format b/.clang-format
index 719c03c5..a890af4d 100644
--- a/.clang-format
+++ b/.clang-format
@@ -20,16 +20,16 @@ AlwaysBreakBeforeMultilineStrings: true
 BinPackArguments: true
 BinPackParameters: true
 BraceWrapping:
-  AfterClass: false
-  AfterControlStatement: false
-  AfterEnum: false
-  AfterFunction: false
-  AfterNamespace: false
-  AfterObjCDeclaration: false
-  AfterStruct: false
-  AfterUnion: false
-  BeforeCatch: false
-  BeforeElse: false
+  AfterClass: true
+  AfterControlStatement: true
+  AfterEnum: true
+  AfterFunction: true
+  AfterNamespace: true
+  AfterObjCDeclaration: true
+  AfterStruct: true
+  AfterUnion: true
+  BeforeCatch: true
+  BeforeElse: true
   IndentBraces: false
 # BreakAfterJavaFieldAnnotations: true
 BreakBeforeBinaryOperators: NonAssignment
@@ -43,7 +43,7 @@ ColumnLimit: 80
 # CompactNamespaces: true
 # ConstructorInitializerAllOnOneLineOrOnePerLine: true
 # ConstructorInitializerIndentWidth: 4
-ContinuationIndentWidth: 2
+ContinuationIndentWidth: 4
 Cpp11BracedListStyle: true
 DerivePointerAlignment: false
 DisableFormat:   false
@@ -57,7 +57,7 @@ ForEachMacros:   [ ql_foreach, qr_foreach, ]
 # IncludeIsMainRegex: ''
 IndentCaseLabels: false
 IndentPPDirectives: AfterHash
-IndentWidth: 4
+IndentWidth: 8
 IndentWrappedFunctionNames: false
 # JavaImportGroups: []
 # JavaScriptQuotes: Leave
@@ -73,8 +73,8 @@ MaxEmptyLinesToKeep: 1
 # ObjCSpaceAfterProperty: false
 # ObjCSpaceBeforeProtocolList: false
 
-PenaltyBreakAssignment: 2
-PenaltyBreakBeforeFirstCallParameter: 1
+PenaltyBreakAssignment: 100
+PenaltyBreakBeforeFirstCallParameter: 100
 PenaltyBreakComment: 300
 PenaltyBreakFirstLessLess: 120
 PenaltyBreakString: 1000
@@ -96,7 +96,7 @@ PointerAlignment: Right
 #         - 'cpp'
 #       BasedOnStyle: llvm
 #       CanonicalDelimiter: 'cc'
-ReflowComments: true
+ReflowComments: false
 SortIncludes: false
 SpaceAfterCStyleCast: false
 # SpaceAfterTemplateKeyword: true
@@ -107,7 +107,7 @@ SpaceBeforeAssignmentOperators: true
 SpaceBeforeParens: ControlStatements
 # SpaceBeforeRangeBasedForLoopColon: true
 SpaceInEmptyParentheses: false
-SpacesBeforeTrailingComments: 2
+SpacesBeforeTrailingComments: 1
 SpacesInAngles:  false
 SpacesInCStyleCastParentheses: false
 # SpacesInContainerLiterals: false
@@ -118,5 +118,5 @@ SpacesInSquareBrackets: false
 # used by some of the core jemalloc developers.
 # StatementMacros: []
 TabWidth: 8
-UseTab: Never
+UseTab: ForIndentation
 ...

From 554185356bf990155df8d72060c4efe993642baf Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Fri, 18 Apr 2025 11:45:57 -0700
Subject: [PATCH 2501/2608] Sample format on tcache_max test

---
 test/unit/tcache_max.c | 151 +++++++++++++++++++----------------------
 1 file changed, 71 insertions(+), 80 deletions(-)

diff --git a/test/unit/tcache_max.c b/test/unit/tcache_max.c
index a64fca71..884ee7fe 100644
--- a/test/unit/tcache_max.c
+++ b/test/unit/tcache_max.c
@@ -3,12 +3,7 @@
 
 const char *malloc_conf = TEST_SAN_UAF_ALIGN_DISABLE;
 
-enum {
-	alloc_option_start = 0,
-	use_malloc = 0,
-	use_mallocx,
-	alloc_option_end
-};
+enum { alloc_option_start = 0, use_malloc = 0, use_mallocx, alloc_option_end };
 
 enum {
 	dalloc_option_start = 0,
@@ -59,14 +54,15 @@ dalloc_func(void *ptr, size_t sz, unsigned dalloc_option) {
 static size_t
 tcache_bytes_read_global(void) {
 	uint64_t epoch;
-	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
-	    sizeof(epoch)), 0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	    0, "Unexpected mallctl() failure");
 
 	size_t tcache_bytes;
 	size_t sz = sizeof(tcache_bytes);
-	assert_d_eq(mallctl(
-	    "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL) ".tcache_bytes",
-	    &tcache_bytes, &sz, NULL, 0), 0, "Unexpected mallctl failure");
+	assert_d_eq(mallctl("stats.arenas." STRINGIFY(
+	                        MALLCTL_ARENAS_ALL) ".tcache_bytes",
+	                &tcache_bytes, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
 
 	return tcache_bytes;
 }
@@ -88,8 +84,8 @@ tcache_bytes_read_local(void) {
 }
 static void
 tcache_bytes_check_update(size_t *prev, ssize_t diff) {
-	size_t tcache_bytes = global_test ? tcache_bytes_read_global():
-	    tcache_bytes_read_local();
+	size_t tcache_bytes = global_test ? tcache_bytes_read_global()
+	                                  : tcache_bytes_read_local();
 	expect_zu_eq(tcache_bytes, *prev + diff, "tcache bytes not expected");
 	*prev += diff;
 }
@@ -108,8 +104,8 @@ test_tcache_bytes_alloc(size_t alloc_size, size_t tcache_max,
 	void *ptr1 = alloc_func(alloc_size, alloc_option);
 	void *ptr2 = alloc_func(alloc_size, alloc_option);
 
-	size_t bytes = global_test ? tcache_bytes_read_global() :
-	    tcache_bytes_read_local();
+	size_t bytes = global_test ? tcache_bytes_read_global()
+	                           : tcache_bytes_read_local();
 	dalloc_func(ptr2, alloc_size, dalloc_option);
 	/* Expect tcache_bytes increase after dalloc */
 	tcache_bytes_check_update(&bytes, diff);
@@ -139,48 +135,48 @@ test_tcache_bytes_alloc(size_t alloc_size, size_t tcache_max,
 }
 
 static void
-test_tcache_max_impl(size_t target_tcache_max, unsigned alloc_option,
-    unsigned dalloc_option) {
+test_tcache_max_impl(
+    size_t target_tcache_max, unsigned alloc_option, unsigned dalloc_option) {
 	size_t tcache_max, sz;
 	sz = sizeof(tcache_max);
 	if (global_test) {
 		assert_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max,
-		    &sz, NULL, 0), 0, "Unexpected mallctl() failure");
+		                &sz, NULL, 0),
+		    0, "Unexpected mallctl() failure");
 		expect_zu_eq(tcache_max, target_tcache_max,
 		    "Global tcache_max not expected");
 	} else {
-		assert_d_eq(mallctl("thread.tcache.max",
-		    (void *)&tcache_max, &sz, NULL,.0), 0,
-		    "Unexpected.mallctl().failure");
+		assert_d_eq(mallctl("thread.tcache.max", (void *)&tcache_max,
+		                &sz, NULL, .0),
+		    0, "Unexpected.mallctl().failure");
 		expect_zu_eq(tcache_max, target_tcache_max,
 		    "Current thread's tcache_max not expected");
 	}
 	test_tcache_bytes_alloc(1, tcache_max, alloc_option, dalloc_option);
-	test_tcache_bytes_alloc(tcache_max - 1, tcache_max, alloc_option,
-	    dalloc_option);
-	test_tcache_bytes_alloc(tcache_max, tcache_max, alloc_option,
-	    dalloc_option);
-	test_tcache_bytes_alloc(tcache_max + 1, tcache_max, alloc_option,
-	    dalloc_option);
+	test_tcache_bytes_alloc(
+	    tcache_max - 1, tcache_max, alloc_option, dalloc_option);
+	test_tcache_bytes_alloc(
+	    tcache_max, tcache_max, alloc_option, dalloc_option);
+	test_tcache_bytes_alloc(
+	    tcache_max + 1, tcache_max, alloc_option, dalloc_option);
 
-	test_tcache_bytes_alloc(PAGE - 1, tcache_max, alloc_option,
-	    dalloc_option);
-	test_tcache_bytes_alloc(PAGE, tcache_max, alloc_option,
-	    dalloc_option);
-	test_tcache_bytes_alloc(PAGE + 1, tcache_max, alloc_option,
-	    dalloc_option);
+	test_tcache_bytes_alloc(
+	    PAGE - 1, tcache_max, alloc_option, dalloc_option);
+	test_tcache_bytes_alloc(PAGE, tcache_max, alloc_option, dalloc_option);
+	test_tcache_bytes_alloc(
+	    PAGE + 1, tcache_max, alloc_option, dalloc_option);
 
 	size_t large;
 	sz = sizeof(large);
-	assert_d_eq(mallctl("arenas.lextent.0.size", (void *)&large, &sz, NULL,
-	    0), 0, "Unexpected mallctl() failure");
+	assert_d_eq(
+	    mallctl("arenas.lextent.0.size", (void *)&large, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
 
-	test_tcache_bytes_alloc(large - 1, tcache_max, alloc_option,
-	    dalloc_option);
-	test_tcache_bytes_alloc(large, tcache_max, alloc_option,
-	    dalloc_option);
-	test_tcache_bytes_alloc(large + 1, tcache_max, alloc_option,
-	    dalloc_option);
+	test_tcache_bytes_alloc(
+	    large - 1, tcache_max, alloc_option, dalloc_option);
+	test_tcache_bytes_alloc(large, tcache_max, alloc_option, dalloc_option);
+	test_tcache_bytes_alloc(
+	    large + 1, tcache_max, alloc_option, dalloc_option);
 }
 
 TEST_BEGIN(test_tcache_max) {
@@ -193,19 +189,17 @@ TEST_BEGIN(test_tcache_max) {
 	size_t sz = sizeof(arena_ind);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
-	expect_d_eq(mallctl("thread.arena", NULL, NULL, &arena_ind,
-	    sizeof(arena_ind)), 0, "Unexpected mallctl() failure");
+	expect_d_eq(
+	    mallctl("thread.arena", NULL, NULL, &arena_ind, sizeof(arena_ind)),
+	    0, "Unexpected mallctl() failure");
 
 	global_test = true;
-	for (alloc_option = alloc_option_start;
-	     alloc_option < alloc_option_end;
+	for (alloc_option = alloc_option_start; alloc_option < alloc_option_end;
 	     alloc_option++) {
 		for (dalloc_option = dalloc_option_start;
-		     dalloc_option < dalloc_option_end;
-		     dalloc_option++) {
+		     dalloc_option < dalloc_option_end; dalloc_option++) {
 			/* opt.tcache_max set to 1024 in tcache_max.sh. */
-			test_tcache_max_impl(1024, alloc_option,
-			    dalloc_option);
+			test_tcache_max_impl(1024, alloc_option, dalloc_option);
 		}
 	}
 	global_test = false;
@@ -229,8 +223,9 @@ validate_tcache_stack(tcache_t *tcache) {
 	bool found = false;
 	do {
 		base_block_t *block = next;
-		if ((byte_t *)tcache_stack >= (byte_t *)block &&
-		    (byte_t *)tcache_stack < ((byte_t *)block + block->size)) {
+		if ((byte_t *)tcache_stack >= (byte_t *)block
+		    && (byte_t *)tcache_stack
+		        < ((byte_t *)block + block->size)) {
 			found = true;
 			break;
 		}
@@ -271,42 +266,42 @@ tcache_check(void *arg) {
 	bool e0 = false, e1;
 	size_t bool_sz = sizeof(bool);
 	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e1, &bool_sz,
-	    (void *)&e0, bool_sz), 0, "Unexpected mallctl() error");
+	                (void *)&e0, bool_sz),
+	    0, "Unexpected mallctl() error");
 	expect_true(e1, "Unexpected previous tcache state");
 
 	size_t temp_tcache_max = TCACHE_MAXCLASS_LIMIT - 1;
-	assert_d_eq(mallctl("thread.tcache.max",
-	    NULL, NULL, (void *)&temp_tcache_max, sz),.0,
-	    "Unexpected.mallctl().failure");
+	assert_d_eq(mallctl("thread.tcache.max", NULL, NULL,
+	                (void *)&temp_tcache_max, sz),
+	    .0, "Unexpected.mallctl().failure");
 	old_tcache_max = tcache_max_get(tcache_slow);
 	expect_zu_eq(old_tcache_max, TCACHE_MAXCLASS_LIMIT,
 	    "Unexpected value for tcache_max");
 	tcache_nbins = tcache_nbins_get(tcache_slow);
 	expect_zu_eq(tcache_nbins, TCACHE_NBINS_MAX,
 	    "Unexpected value for tcache_nbins");
-	assert_d_eq(mallctl("thread.tcache.max",
-	    (void *)&old_tcache_max, &sz,
-	    (void *)&min_tcache_max, sz),.0,
-	    "Unexpected.mallctl().failure");
+	assert_d_eq(mallctl("thread.tcache.max", (void *)&old_tcache_max, &sz,
+	                (void *)&min_tcache_max, sz),
+	    .0, "Unexpected.mallctl().failure");
 	expect_zu_eq(old_tcache_max, TCACHE_MAXCLASS_LIMIT,
 	    "Unexpected value for tcache_max");
 
 	/* Enable tcache, the set should still be valid. */
 	e0 = true;
 	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e1, &bool_sz,
-	    (void *)&e0, bool_sz), 0, "Unexpected mallctl() error");
+	                (void *)&e0, bool_sz),
+	    0, "Unexpected mallctl() error");
 	expect_false(e1, "Unexpected previous tcache state");
 	min_tcache_max = sz_s2u(min_tcache_max);
 	expect_zu_eq(tcache_max_get(tcache_slow), min_tcache_max,
 	    "Unexpected value for tcache_max");
 	expect_zu_eq(tcache_nbins_get(tcache_slow),
 	    tcache_max2nbins(min_tcache_max), "Unexpected value for nbins");
-	assert_d_eq(mallctl("thread.tcache.max",
-	    (void *)&old_tcache_max, &sz,
-	    (void *)&new_tcache_max, sz),.0,
-	    "Unexpected.mallctl().failure");
-	expect_zu_eq(old_tcache_max, min_tcache_max,
-	    "Unexpected value for tcache_max");
+	assert_d_eq(mallctl("thread.tcache.max", (void *)&old_tcache_max, &sz,
+	                (void *)&new_tcache_max, sz),
+	    .0, "Unexpected.mallctl().failure");
+	expect_zu_eq(
+	    old_tcache_max, min_tcache_max, "Unexpected value for tcache_max");
 	validate_tcache_stack(tcache);
 
 	/*
@@ -317,19 +312,17 @@ tcache_check(void *arg) {
 		new_tcache_max = TCACHE_MAXCLASS_LIMIT;
 	}
 	old_tcache_max = tcache_max_get(tcache_slow);
-	expect_zu_eq(old_tcache_max, new_tcache_max,
-	    "Unexpected value for tcache_max");
+	expect_zu_eq(
+	    old_tcache_max, new_tcache_max, "Unexpected value for tcache_max");
 	tcache_nbins = tcache_nbins_get(tcache_slow);
 	expect_zu_eq(tcache_nbins, tcache_max2nbins(new_tcache_max),
 	    "Unexpected value for tcache_nbins");
 	for (unsigned alloc_option = alloc_option_start;
-	     alloc_option < alloc_option_end;
-	     alloc_option++) {
+	     alloc_option < alloc_option_end; alloc_option++) {
 		for (unsigned dalloc_option = dalloc_option_start;
-		     dalloc_option < dalloc_option_end;
-		     dalloc_option++) {
-			test_tcache_max_impl(new_tcache_max,
-			    alloc_option, dalloc_option);
+		     dalloc_option < dalloc_option_end; dalloc_option++) {
+			test_tcache_max_impl(
+			    new_tcache_max, alloc_option, dalloc_option);
 		}
 		validate_tcache_stack(tcache);
 	}
@@ -348,14 +341,14 @@ TEST_BEGIN(test_thread_tcache_max) {
 	VARIABLE_ARRAY(thd_t, threads, nthreads);
 	VARIABLE_ARRAY(size_t, all_threads_tcache_max, nthreads);
 	for (unsigned i = 0; i < nthreads; i++) {
-		all_threads_tcache_max[i] = 1024 * (1<<((i + 10) % 20));
+		all_threads_tcache_max[i] = 1024 * (1 << ((i + 10) % 20));
 		if (i == nthreads - 1) {
 			all_threads_tcache_max[i] = UINT_MAX;
 		}
 	}
 	for (unsigned i = 0; i < nthreads; i++) {
-		thd_create(&threads[i], tcache_check,
-		    &(all_threads_tcache_max[i]));
+		thd_create(
+		    &threads[i], tcache_check, &(all_threads_tcache_max[i]));
 	}
 	for (unsigned i = 0; i < nthreads; i++) {
 		thd_join(threads[i], NULL);
@@ -365,7 +358,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_tcache_max,
-	    test_thread_tcache_max);
+	return test(test_tcache_max, test_thread_tcache_max);
 }

From b6338c4ff6cd1a95b4f956a9028df420ad6f4761 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Mon, 19 May 2025 15:17:35 -0700
Subject: [PATCH 2502/2608] EASY - be explicit in non-vectorized hpa tests

---
 test/unit/hpa.sh | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 test/unit/hpa.sh

diff --git a/test/unit/hpa.sh b/test/unit/hpa.sh
new file mode 100644
index 00000000..fe0e0b67
--- /dev/null
+++ b/test/unit/hpa.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="process_madvise_max_batch:0"

From f19a569216e829fcd646191277374387e8291c62 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Mon, 19 May 2025 15:15:49 -0700
Subject: [PATCH 2503/2608] Ignore formatting commit in blame.

---
 .git-blame-ignore-revs | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 .git-blame-ignore-revs

diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
new file mode 100644
index 00000000..365e8bb1
--- /dev/null
+++ b/.git-blame-ignore-revs
@@ -0,0 +1 @@
+554185356bf990155df8d72060c4efe993642baf

From 9169e9272a9fb123702e04c77ff5326f29818f70 Mon Sep 17 00:00:00 2001
From: Xin Yang <yangxin.dev@bytedance.com>
Date: Tue, 13 May 2025 11:01:36 +0800
Subject: [PATCH 2504/2608] Fix: Adjust CACHE_BIN_NFLUSH_BATCH_MAX size to
 prevent assert failures

The maximum allowed value for `nflush_batch` is
`CACHE_BIN_NFLUSH_BATCH_MAX`. However, `tcache_bin_flush_impl_small`
could potentially declare an array of `emap_batch_lookup_result_t`
of size `CACHE_BIN_NFLUSH_BATCH_MAX + 1`. leads to a `VARIABLE_ARRAY`
assertion failure, observed when `tcache_nslots_small_max` is
configured to 2048. This patch ensures the array size does not exceed
the allowed maximum.

Signed-off-by: Xin Yang <yangxin.dev@bytedance.com>
---
 include/jemalloc/internal/cache_bin.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index cb137af9..3ca7e287 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -616,7 +616,7 @@ struct cache_bin_fill_ctl_s {
  * This is to avoid stack overflow when we do batch edata look up, which
  * reserves a nflush * sizeof(emap_batch_lookup_result_t) stack variable.
  */
-#define CACHE_BIN_NFLUSH_BATCH_MAX (VARIABLE_ARRAY_SIZE_MAX >> LG_SIZEOF_PTR)
+#define CACHE_BIN_NFLUSH_BATCH_MAX ((VARIABLE_ARRAY_SIZE_MAX >> LG_SIZEOF_PTR) - 1)
 
 /*
  * Filling and flushing are done in batch, on arrays of void *s.  For filling,

From 5e460bfea25c39d9bf8ea0077c3b6740e9515487 Mon Sep 17 00:00:00 2001
From: Xin Yang <yangxin.dev@bytedance.com>
Date: Tue, 13 May 2025 11:25:13 +0800
Subject: [PATCH 2505/2608] Refactor: use the cache_bin_sz_t typedef instead of
 direct uint16_t

any future changes to the underlying data type for bin sizes
(such as upgrading from `uint16_t` to `uint32_t`) can be achieved
by modifying only the `cache_bin_sz_t` definition.

Signed-off-by: Xin Yang <yangxin.dev@bytedance.com>
---
 include/jemalloc/internal/cache_bin.h | 42 +++++++++++++--------------
 src/cache_bin.c                       | 10 +++----
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 3ca7e287..7ab48dc9 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -108,7 +108,7 @@ struct cache_bin_s {
 	 * Since the stack grows down, this is a higher address than
 	 * low_bits_full.
 	 */
-	uint16_t low_bits_low_water;
+	cache_bin_sz_t low_bits_low_water;
 
 	/*
 	 * The low bits of the value that stack_head will take on when the array
@@ -119,7 +119,7 @@ struct cache_bin_s {
 	 * Recall that since the stack grows down, this is the lowest available
 	 * address in the array for caching.  Only adjusted when stashing items.
 	 */
-	uint16_t low_bits_full;
+	cache_bin_sz_t low_bits_full;
 
 	/*
 	 * The low bits of the value that stack_head will take on when the array
@@ -128,7 +128,7 @@ struct cache_bin_s {
 	 * The stack grows down -- this is one past the highest address in the
 	 * array.  Immutable after initialization.
 	 */
-	uint16_t low_bits_empty;
+	cache_bin_sz_t low_bits_empty;
 
 	/* The maximum number of cached items in the bin. */
 	cache_bin_info_t bin_info;
@@ -222,7 +222,7 @@ cache_bin_ncached_max_get(cache_bin_t *bin) {
  * with later.
  */
 static inline void
-cache_bin_assert_earlier(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
+cache_bin_assert_earlier(cache_bin_t *bin, cache_bin_sz_t earlier, cache_bin_sz_t later) {
 	if (earlier > later) {
 		assert(bin->low_bits_full > bin->low_bits_empty);
 	}
@@ -235,7 +235,7 @@ cache_bin_assert_earlier(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
  * be associated with the position earlier in memory.
  */
 static inline cache_bin_sz_t
-cache_bin_diff(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
+cache_bin_diff(cache_bin_t *bin, cache_bin_sz_t earlier, cache_bin_sz_t later) {
 	cache_bin_assert_earlier(bin, earlier, later);
 	return later - earlier;
 }
@@ -246,7 +246,7 @@ cache_bin_diff(cache_bin_t *bin, uint16_t earlier, uint16_t later) {
 static inline cache_bin_sz_t
 cache_bin_ncached_get_internal(cache_bin_t *bin) {
 	cache_bin_sz_t diff = cache_bin_diff(bin,
-	    (uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty);
+	    (cache_bin_sz_t)(uintptr_t)bin->stack_head, bin->low_bits_empty);
 	cache_bin_sz_t n = diff / sizeof(void *);
 	/*
 	 * We have undefined behavior here; if this function is called from the
@@ -284,7 +284,7 @@ cache_bin_ncached_get_local(cache_bin_t *bin) {
 static inline void **
 cache_bin_empty_position_get(cache_bin_t *bin) {
 	cache_bin_sz_t diff = cache_bin_diff(bin,
-	    (uint16_t)(uintptr_t)bin->stack_head, bin->low_bits_empty);
+	    (cache_bin_sz_t)(uintptr_t)bin->stack_head, bin->low_bits_empty);
 	byte_t *empty_bits = (byte_t *)bin->stack_head + diff;
 	void **ret = (void **)empty_bits;
 
@@ -303,9 +303,9 @@ cache_bin_empty_position_get(cache_bin_t *bin) {
  * multithreaded environment. Currently concurrent access happens only during
  * arena statistics collection.
  */
-static inline uint16_t
+static inline cache_bin_sz_t
 cache_bin_low_bits_low_bound_get(cache_bin_t *bin) {
-	return (uint16_t)bin->low_bits_empty -
+	return (cache_bin_sz_t)bin->low_bits_empty -
 	    cache_bin_ncached_max_get(bin) * sizeof(void *);
 }
 
@@ -351,7 +351,7 @@ cache_bin_low_water_get(cache_bin_t *bin) {
 	assert(low_water <= cache_bin_ncached_max_get(bin));
 	assert(low_water <= cache_bin_ncached_get_local(bin));
 
-	cache_bin_assert_earlier(bin, (uint16_t)(uintptr_t)bin->stack_head,
+	cache_bin_assert_earlier(bin, (cache_bin_sz_t)(uintptr_t)bin->stack_head,
 	    bin->low_bits_low_water);
 
 	return low_water;
@@ -364,7 +364,7 @@ cache_bin_low_water_get(cache_bin_t *bin) {
 static inline void
 cache_bin_low_water_set(cache_bin_t *bin) {
 	assert(!cache_bin_disabled(bin));
-	bin->low_bits_low_water = (uint16_t)(uintptr_t)bin->stack_head;
+	bin->low_bits_low_water = (cache_bin_sz_t)(uintptr_t)bin->stack_head;
 }
 
 static inline void
@@ -391,7 +391,7 @@ cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) {
 	 * be used.  It's safe because the stack has one more slot reserved.
 	 */
 	void *ret = *bin->stack_head;
-	uint16_t low_bits = (uint16_t)(uintptr_t)bin->stack_head;
+	cache_bin_sz_t low_bits = (cache_bin_sz_t)(uintptr_t)bin->stack_head;
 	void **new_head = bin->stack_head + 1;
 
 	/*
@@ -414,7 +414,7 @@ cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) {
 	 */
 	if (likely(low_bits != bin->low_bits_empty)) {
 		bin->stack_head = new_head;
-		bin->low_bits_low_water = (uint16_t)(uintptr_t)new_head;
+		bin->low_bits_low_water = (cache_bin_sz_t)(uintptr_t)new_head;
 		*success = true;
 		return ret;
 	}
@@ -455,7 +455,7 @@ cache_bin_alloc_batch(cache_bin_t *bin, size_t num, void **out) {
 
 JEMALLOC_ALWAYS_INLINE bool
 cache_bin_full(cache_bin_t *bin) {
-	return ((uint16_t)(uintptr_t)bin->stack_head == bin->low_bits_full);
+	return ((cache_bin_sz_t)(uintptr_t)bin->stack_head == bin->low_bits_full);
 }
 
 /*
@@ -503,7 +503,7 @@ cache_bin_dalloc_easy(cache_bin_t *bin, void *ptr) {
 	bin->stack_head--;
 	*bin->stack_head = ptr;
 	cache_bin_assert_earlier(bin, bin->low_bits_full,
-	    (uint16_t)(uintptr_t)bin->stack_head);
+	    (cache_bin_sz_t)(uintptr_t)bin->stack_head);
 
 	return true;
 }
@@ -516,9 +516,9 @@ cache_bin_stash(cache_bin_t *bin, void *ptr) {
 	}
 
 	/* Stash at the full position, in the [full, head) range. */
-	uint16_t low_bits_head = (uint16_t)(uintptr_t)bin->stack_head;
+	cache_bin_sz_t low_bits_head = (cache_bin_sz_t)(uintptr_t)bin->stack_head;
 	/* Wraparound handled as well. */
-	uint16_t diff = cache_bin_diff(bin, bin->low_bits_full, low_bits_head);
+	cache_bin_sz_t diff = cache_bin_diff(bin, bin->low_bits_full, low_bits_head);
 	*(void **)((byte_t *)bin->stack_head - diff) = ptr;
 
 	assert(!cache_bin_full(bin));
@@ -532,7 +532,7 @@ cache_bin_stash(cache_bin_t *bin, void *ptr) {
 JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
 cache_bin_nstashed_get_internal(cache_bin_t *bin) {
 	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get(bin);
-	uint16_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(bin);
+	cache_bin_sz_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(bin);
 
 	cache_bin_sz_t n = cache_bin_diff(bin, low_bits_low_bound,
 	    bin->low_bits_full) / sizeof(void *);
@@ -541,7 +541,7 @@ cache_bin_nstashed_get_internal(cache_bin_t *bin) {
 		/* Below are for assertions only. */
 		void **low_bound = cache_bin_low_bound_get(bin);
 
-		assert((uint16_t)(uintptr_t)low_bound == low_bits_low_bound);
+		assert((cache_bin_sz_t)(uintptr_t)low_bound == low_bits_low_bound);
 		void *stashed = *(low_bound + n - 1);
 		bool aligned = cache_bin_nonfast_aligned(stashed);
 #ifdef JEMALLOC_JET
@@ -586,12 +586,12 @@ cache_bin_nitems_get_remote(cache_bin_t *bin, cache_bin_sz_t *ncached,
     cache_bin_sz_t *nstashed) {
 	/* Racy version of cache_bin_ncached_get_internal. */
 	cache_bin_sz_t diff = bin->low_bits_empty -
-	    (uint16_t)(uintptr_t)bin->stack_head;
+	    (cache_bin_sz_t)(uintptr_t)bin->stack_head;
 	cache_bin_sz_t n = diff / sizeof(void *);
 	*ncached = n;
 
 	/* Racy version of cache_bin_nstashed_get_internal. */
-	uint16_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(bin);
+	cache_bin_sz_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(bin);
 	n = (bin->low_bits_full - low_bits_low_bound) / sizeof(void *);
 	*nstashed = n;
 	/*
diff --git a/src/cache_bin.c b/src/cache_bin.c
index 6438705f..2f5afeb9 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -85,19 +85,19 @@ cache_bin_init(cache_bin_t *bin, const cache_bin_info_t *info, void *alloc,
 	 */
 	void *stack_cur = (void *)((byte_t *)alloc + *cur_offset);
 	void *full_position = stack_cur;
-	uint16_t bin_stack_size = info->ncached_max * sizeof(void *);
+	cache_bin_sz_t bin_stack_size = info->ncached_max * sizeof(void *);
 
 	*cur_offset += bin_stack_size;
 	void *empty_position = (void *)((byte_t *)alloc + *cur_offset);
 
 	/* Init to the empty position. */
 	bin->stack_head = (void **)empty_position;
-	bin->low_bits_low_water = (uint16_t)(uintptr_t)bin->stack_head;
-	bin->low_bits_full = (uint16_t)(uintptr_t)full_position;
-	bin->low_bits_empty = (uint16_t)(uintptr_t)empty_position;
+	bin->low_bits_low_water = (cache_bin_sz_t)(uintptr_t)bin->stack_head;
+	bin->low_bits_full = (cache_bin_sz_t)(uintptr_t)full_position;
+	bin->low_bits_empty = (cache_bin_sz_t)(uintptr_t)empty_position;
 	cache_bin_info_init(&bin->bin_info, info->ncached_max);
 	cache_bin_sz_t free_spots = cache_bin_diff(bin,
-	    bin->low_bits_full, (uint16_t)(uintptr_t)bin->stack_head);
+	    bin->low_bits_full, (cache_bin_sz_t)(uintptr_t)bin->stack_head);
 	assert(free_spots == bin_stack_size);
 	if (!cache_bin_disabled(bin)) {
 		assert(cache_bin_ncached_get_local(bin) == 0);

From fd60645260b74645cd606bb6a48464890ab39dee Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Wed, 7 May 2025 16:37:26 -0700
Subject: [PATCH 2506/2608] Add one more check to double free validation.

---
 include/jemalloc/internal/arena_inlines_b.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 4b765289..61008b59 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -68,7 +68,8 @@ large_dalloc_safety_checks(edata_t *edata, const void *ptr, size_t input_size) {
 		    "possibly caused by double free bugs.", ptr);
 		return true;
 	}
-	if (unlikely(input_size != edata_usize_get(edata))) {
+	if (unlikely(input_size != edata_usize_get(edata) ||
+	    input_size > SC_LARGE_MAXCLASS)) {
 		safety_check_fail_sized_dealloc(/* current_dealloc */ true, ptr,
 		    /* true_size */ edata_usize_get(edata), input_size);
 		return true;

From 1818170c8d4de24e0f3b67d7e0347b4ea8e2fec1 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Wed, 7 May 2025 17:39:20 -0700
Subject: [PATCH 2507/2608] Fix binshard.sh by specifying bin_shards for all
 sizes.

---
 test/unit/binshard.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/unit/binshard.sh b/test/unit/binshard.sh
index c1d58c88..1882e90a 100644
--- a/test/unit/binshard.sh
+++ b/test/unit/binshard.sh
@@ -1,3 +1,3 @@
 #!/bin/sh
 
-export MALLOC_CONF="narenas:1,bin_shards:1-160:16|129-512:4|256-256:8"
+export MALLOC_CONF="narenas:1,bin_shards:1-160:16|129-512:4|256-256:8|513-8070450532247928832:1"

From 4531411abed6bc4e6740bdaa21abafe02960ee51 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Tue, 20 May 2025 11:58:26 -0700
Subject: [PATCH 2508/2608] Modify .clang-format to have declarations aligned

---
 .clang-format | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.clang-format b/.clang-format
index a890af4d..527ec375 100644
--- a/.clang-format
+++ b/.clang-format
@@ -4,10 +4,10 @@
 # AccessModifierOffset: -2
 AlignAfterOpenBracket: DontAlign
 AlignConsecutiveAssignments: false
-AlignConsecutiveDeclarations: false
+AlignConsecutiveDeclarations: true
 AlignEscapedNewlines: Right
 AlignOperands: false
-AlignTrailingComments: false
+AlignTrailingComments: true
 AllowAllParametersOfDeclarationOnNextLine: true
 AllowShortBlocksOnASingleLine: false
 AllowShortCaseLabelsOnASingleLine: false

From edaab8b3ad752a845019985062689551cd6315c1 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Thu, 22 May 2025 22:59:00 -0700
Subject: [PATCH 2509/2608] Turn clang-format off for codes with multi-line
 commands in macros

---
 include/jemalloc/internal/atomic_c11.h      | 2 ++
 include/jemalloc/internal/atomic_gcc_sync.h | 2 ++
 include/jemalloc/internal/atomic_msvc.h     | 2 ++
 include/jemalloc/internal/bit_util.h        | 6 ++++++
 include/jemalloc/internal/hpa_opts.h        | 2 ++
 include/jemalloc/internal/mpsc_queue.h      | 2 ++
 include/jemalloc/internal/rb.h              | 2 ++
 include/jemalloc/internal/seq.h             | 2 ++
 include/jemalloc/internal/tsd.h             | 2 ++
 test/unit/atomic.c                          | 2 ++
 test/unit/bit_util.c                        | 2 ++
 11 files changed, 26 insertions(+)

diff --git a/include/jemalloc/internal/atomic_c11.h b/include/jemalloc/internal/atomic_c11.h
index 74173b03..a37e9661 100644
--- a/include/jemalloc/internal/atomic_c11.h
+++ b/include/jemalloc/internal/atomic_c11.h
@@ -15,6 +15,7 @@
 
 #define atomic_fence atomic_thread_fence
 
+/* clang-format off */
 #define JEMALLOC_GENERATE_ATOMICS(type, short_type,			\
     /* unused */ lg_size)						\
 typedef _Atomic(type) atomic_##short_type##_t;				\
@@ -59,6 +60,7 @@ atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a,	\
 	return atomic_compare_exchange_strong_explicit(a, expected,	\
 	    desired, success_mo, failure_mo);				\
 }
+/* clang-format on */
 
 /*
  * Integral types have some special operations available that non-integral ones
diff --git a/include/jemalloc/internal/atomic_gcc_sync.h b/include/jemalloc/internal/atomic_gcc_sync.h
index 21136bd0..801d6197 100644
--- a/include/jemalloc/internal/atomic_gcc_sync.h
+++ b/include/jemalloc/internal/atomic_gcc_sync.h
@@ -87,6 +87,7 @@ atomic_post_sc_store_fence() {
 
 }
 
+/* clang-format off */
 #define JEMALLOC_GENERATE_ATOMICS(type, short_type,			\
     /* unused */ lg_size)						\
 typedef struct {							\
@@ -161,6 +162,7 @@ atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a,	\
 		return false;						\
 	}								\
 }
+/* clang-format on */
 
 #define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type,			\
     /* unused */ lg_size)						\
diff --git a/include/jemalloc/internal/atomic_msvc.h b/include/jemalloc/internal/atomic_msvc.h
index a429f1ab..5313aed9 100644
--- a/include/jemalloc/internal/atomic_msvc.h
+++ b/include/jemalloc/internal/atomic_msvc.h
@@ -118,6 +118,7 @@ atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a,	\
 }
 
 
+/* clang-format off */
 #define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, lg_size)	\
 JEMALLOC_GENERATE_ATOMICS(type, short_type, lg_size)			\
 									\
@@ -158,6 +159,7 @@ atomic_fetch_xor_##short_type(atomic_##short_type##_t *a,		\
 	return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedXor, lg_size)(	\
 	    &a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val);		\
 }
+/* clang-format on */
 
 #undef ATOMIC_INLINE
 
diff --git a/include/jemalloc/internal/bit_util.h b/include/jemalloc/internal/bit_util.h
index c413a75d..840dbde2 100644
--- a/include/jemalloc/internal/bit_util.h
+++ b/include/jemalloc/internal/bit_util.h
@@ -35,6 +35,7 @@ ffs_u(unsigned x) {
 	return JEMALLOC_INTERNAL_FFS(x) - 1;
 }
 
+/* clang-format off */
 #define DO_FLS_SLOW(x, suffix) do {					\
 	util_assume(x != 0);						\
 	x |= (x >> 1);							\
@@ -58,6 +59,7 @@ ffs_u(unsigned x) {
 	}								\
 	return ffs_##suffix(x) - 1;					\
 } while(0)
+/* clang-format on */
 
 static inline unsigned
 fls_llu_slow(unsigned long long x) {
@@ -118,6 +120,7 @@ fls_u(unsigned x) {
 #define DO_BSR64(bit, x) bit = 0; unreachable()
 #endif
 
+/* clang-format off */
 #define DO_FLS(x) do {							\
 	if (x == 0) {							\
 		return 8 * sizeof(x);					\
@@ -144,6 +147,7 @@ fls_u(unsigned x) {
 	}								\
 	unreachable();							\
 } while (0)
+/* clang-format on */
 
 static inline unsigned
 fls_llu(unsigned long long x) {
@@ -184,6 +188,7 @@ fls_u(unsigned x) {
 #  error "Haven't implemented popcount for 16-byte ints."
 #endif
 
+/* clang-format off */
 #define DO_POPCOUNT(x, type) do {					\
 	/*								\
 	 * Algorithm from an old AMD optimization reference manual.	\
@@ -227,6 +232,7 @@ fls_u(unsigned x) {
 	x >>= ((sizeof(x) - 1) * 8);					\
 	return (unsigned)x;						\
 } while(0)
+/* clang-format on */
 
 static inline unsigned
 popcount_u_slow(unsigned bitmap) {
diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h
index 816bb577..e5517719 100644
--- a/include/jemalloc/internal/hpa_opts.h
+++ b/include/jemalloc/internal/hpa_opts.h
@@ -69,6 +69,7 @@ struct hpa_shard_opts_s {
 	uint64_t peak_demand_window_ms;
 };
 
+/* clang-format off */
 #define HPA_SHARD_OPTS_DEFAULT {					\
 	/* slab_max_alloc */						\
 	64 * 1024,							\
@@ -95,5 +96,6 @@ struct hpa_shard_opts_s {
 	/* peak_demand_window_ms */					\
 	0								\
 }
+/* clang-format on */
 
 #endif /* JEMALLOC_INTERNAL_HPA_OPTS_H */
diff --git a/include/jemalloc/internal/mpsc_queue.h b/include/jemalloc/internal/mpsc_queue.h
index d8aa624b..86f4898f 100644
--- a/include/jemalloc/internal/mpsc_queue.h
+++ b/include/jemalloc/internal/mpsc_queue.h
@@ -26,6 +26,7 @@
  * two-stack tricks reverses orders in the lock-free first stack).
  */
 
+/* clang-format off */
 #define mpsc_queue(a_type)						\
 struct {								\
 	atomic_p_t tail;						\
@@ -131,5 +132,6 @@ a_prefix##pop_batch(a_queue_type *queue, a_list_type *dst) {		\
 	}								\
 	ql_concat(dst, &reversed, a_link);				\
 }
+/* clang-format on */
 
 #endif /* JEMALLOC_INTERNAL_MPSC_QUEUE_H */
diff --git a/include/jemalloc/internal/rb.h b/include/jemalloc/internal/rb.h
index 5f2771a9..235d548e 100644
--- a/include/jemalloc/internal/rb.h
+++ b/include/jemalloc/internal/rb.h
@@ -38,6 +38,7 @@
  */
 #define RB_MAX_DEPTH (sizeof(void *) << 4)
 
+/* clang-format off */
 #ifdef RB_COMPACT
 /* Node structure. */
 #define rb_node(a_type)							\
@@ -1871,5 +1872,6 @@ a_prefix##reverse_iter_filtered(a_rbt_type *rbtree, a_type *start,	\
     return ret;								\
 }									\
 ) /* end rb_summarized_only */
+/* clang-format on */
 
 #endif /* JEMALLOC_INTERNAL_RB_H */
diff --git a/include/jemalloc/internal/seq.h b/include/jemalloc/internal/seq.h
index 9bb6b235..d2c0d1fc 100644
--- a/include/jemalloc/internal/seq.h
+++ b/include/jemalloc/internal/seq.h
@@ -8,6 +8,7 @@
  * A simple seqlock implementation.
  */
 
+/* clang-format off */
 #define seq_define(type, short_type)					\
 typedef struct {							\
 	atomic_zu_t seq;						\
@@ -52,5 +53,6 @@ seq_try_load_##short_type(type *dst, seq_##short_type##_t *src) {	\
 	memcpy(dst, buf, sizeof(type));					\
 	return true;							\
 }
+/* clang-format on */
 
 #endif /* JEMALLOC_INTERNAL_SEQ_H */
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index 4f22dcff..c06605df 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -32,6 +32,7 @@ TSD_DATA_FAST
 TSD_DATA_SLOWER
 #undef O
 
+/* clang-format off */
 /* tsd_foop_get(tsd) returns a pointer to the thread-local instance of foo. */
 #define O(n, t, nt)							\
 JEMALLOC_ALWAYS_INLINE t *						\
@@ -48,6 +49,7 @@ tsd_##n##p_get(tsd_t *tsd) {						\
 	    state == tsd_state_minimal_initialized);			\
 	return tsd_##n##p_get_unsafe(tsd);				\
 }
+/* clang-format on */
 TSD_DATA_SLOW
 TSD_DATA_FAST
 TSD_DATA_SLOWER
diff --git a/test/unit/atomic.c b/test/unit/atomic.c
index c2ec8c7e..6c4b85e5 100644
--- a/test/unit/atomic.c
+++ b/test/unit/atomic.c
@@ -15,6 +15,7 @@
  * and val3 for desired.
  */
 
+/* clang-format off */
 #define DO_TESTS(t, ta, val1, val2, val3) do {				\
 	t val;								\
 	t expected;							\
@@ -174,6 +175,7 @@ typedef struct {							\
 		DO_INTEGER_TESTS(t, ta, test.val1, test.val2);		\
 	}								\
 } while (0)
+/* clang-format on */
 
 TEST_STRUCT(uint64_t, u64);
 TEST_BEGIN(test_atomic_u64) {
diff --git a/test/unit/bit_util.c b/test/unit/bit_util.c
index 295abb1b..4e9d2e16 100644
--- a/test/unit/bit_util.c
+++ b/test/unit/bit_util.c
@@ -226,6 +226,7 @@ expand_byte_to_mask(unsigned byte) {
 	return result;
 }
 
+/* clang-format off */
 #define TEST_POPCOUNT(t, suf, pri_hex) do {				\
 	t bmul = (t)0x0101010101010101ULL;				\
 	for (unsigned i = 0; i < (1 << sizeof(t)); i++) {		\
@@ -245,6 +246,7 @@ expand_byte_to_mask(unsigned byte) {
 		}							\
 	}								\
 } while (0)
+/* clang-format on */
 
 TEST_BEGIN(test_popcount_u) {
 	TEST_POPCOUNT(unsigned, u, "x");

From 27d7960cf9b48a9a9395661f212d05a471dceed4 Mon Sep 17 00:00:00 2001
From: Jason Evans <je@branchtaken.com>
Date: Mon, 19 May 2025 21:09:01 -0700
Subject: [PATCH 2510/2608] Revert "Extend purging algorithm with peak demand
 tracking"

This reverts commit ad108d50f1c30700389103ff5fe3ef5f538f804c.
---
 Makefile.in                                   |   2 -
 include/jemalloc/internal/hpa.h               |   4 -
 include/jemalloc/internal/hpa_opts.h          |  14 +-
 include/jemalloc/internal/peak_demand.h       |  55 ------
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |   1 -
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |   3 -
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |   1 -
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |   3 -
 .../projects/vc2019/jemalloc/jemalloc.vcxproj |   1 -
 .../vc2019/jemalloc/jemalloc.vcxproj.filters  |   3 -
 .../projects/vc2022/jemalloc/jemalloc.vcxproj |   1 -
 .../vc2022/jemalloc/jemalloc.vcxproj.filters  |   3 -
 src/ctl.c                                     |   5 -
 src/hpa.c                                     |  53 +-----
 src/jemalloc.c                                |   5 -
 src/peak_demand.c                             |  74 --------
 src/stats.c                                   |   1 -
 test/unit/hpa.c                               | 174 +++---------------
 test/unit/mallctl.c                           |   1 -
 test/unit/peak_demand.c                       | 162 ----------------
 20 files changed, 28 insertions(+), 538 deletions(-)
 delete mode 100644 include/jemalloc/internal/peak_demand.h
 delete mode 100644 src/peak_demand.c
 delete mode 100644 test/unit/peak_demand.c

diff --git a/Makefile.in b/Makefile.in
index ac8c51ff..7085a22a 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -137,7 +137,6 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/pai.c \
 	$(srcroot)src/pac.c \
 	$(srcroot)src/pages.c \
-	$(srcroot)src/peak_demand.c \
 	$(srcroot)src/peak_event.c \
 	$(srcroot)src/prof.c \
 	$(srcroot)src/prof_data.c \
@@ -255,7 +254,6 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/pack.c \
 	$(srcroot)test/unit/pages.c \
 	$(srcroot)test/unit/peak.c \
-	$(srcroot)test/unit/peak_demand.c \
 	$(srcroot)test/unit/ph.c \
 	$(srcroot)test/unit/prng.c \
 	$(srcroot)test/unit/prof_accum.c \
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 117c1c20..2e9fccc2 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -10,7 +10,6 @@
 #include "jemalloc/internal/hpa_opts.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/pai.h"
-#include "jemalloc/internal/peak_demand.h"
 #include "jemalloc/internal/psset.h"
 
 typedef struct hpa_central_s hpa_central_t;
@@ -148,9 +147,6 @@ struct hpa_shard_s {
 	 * Last time we performed purge on this shard.
 	 */
 	nstime_t last_purge;
-
-	/* Peak active memory sliding window statistics. */
-	peak_demand_t peak_demand;
 };
 
 bool hpa_hugepage_size_exceeds_limit(void);
diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h
index e5517719..9e7f76ac 100644
--- a/include/jemalloc/internal/hpa_opts.h
+++ b/include/jemalloc/internal/hpa_opts.h
@@ -27,8 +27,7 @@ struct hpa_shard_opts_s {
 
 	/*
 	 * The HPA purges whenever the number of pages exceeds dirty_mult *
-	 * peak_active_pages.  This may be set to (fxp_t)-1 to disable
-	 * purging.
+	 * active_pages.  This may be set to (fxp_t)-1 to disable purging.
 	 */
 	fxp_t dirty_mult;
 
@@ -60,13 +59,6 @@ struct hpa_shard_opts_s {
 	 * Maximum number of hugepages to purge on each purging attempt.
 	 */
 	ssize_t experimental_max_purge_nhp;
-
-	/*
-	 * Sliding window duration to track active memory demand statistics.
-	 * This might be set to 0, to disable sliding window statistics
-	 * tracking and use current number of active pages for purging instead.
-	 */
-	uint64_t peak_demand_window_ms;
 };
 
 /* clang-format off */
@@ -92,9 +84,7 @@ struct hpa_shard_opts_s {
 	/* min_purge_interval_ms */					\
 	5 * 1000,							\
 	/* experimental_max_purge_nhp */				\
-	-1,								\
-	/* peak_demand_window_ms */					\
-	0								\
+	-1								\
 }
 /* clang-format on */
 
diff --git a/include/jemalloc/internal/peak_demand.h b/include/jemalloc/internal/peak_demand.h
deleted file mode 100644
index 2664cbec..00000000
--- a/include/jemalloc/internal/peak_demand.h
+++ /dev/null
@@ -1,55 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_PEAK_DEMAND_H
-#define JEMALLOC_INTERNAL_PEAK_DEMAND_H
-
-#include "jemalloc/internal/jemalloc_preamble.h"
-
-/*
- * Implementation of peak active memory demand tracking.
- *
- * Inspired by "Beyond malloc efficiency to fleet efficiency: a hugepage-aware
- * memory allocator" whitepaper.
- * https://storage.googleapis.com/gweb-research2023-media/pubtools/6170.pdf
- *
- * End goal is to track peak active memory usage over specified time interval.
- * We do so by dividing this time interval into disjoint subintervals and
- * storing value of maximum memory usage for each subinterval in a circular
- * buffer.  Nanoseconds resolution timestamp uniquely maps into epoch, which is
- * used as an index to access circular buffer.
- */
-
-#define PEAK_DEMAND_LG_BUCKETS 4
-/*
- * Number of buckets should be power of 2 to ensure modulo operation is
- * optimized to bit masking by the compiler.
- */
-#define PEAK_DEMAND_NBUCKETS (1 << PEAK_DEMAND_LG_BUCKETS)
-
-typedef struct peak_demand_s peak_demand_t;
-struct peak_demand_s {
-	/*
-	 * Absolute value of current epoch, monotonically increases over time.  Epoch
-	 * value modulo number of buckets used as an index to access nactive_max
-	 * array.
-	 */
-	uint64_t epoch;
-
-	/* How many nanoseconds each epoch approximately takes. */
-	uint64_t epoch_interval_ns;
-
-	/*
-	 * Circular buffer to track maximum number of active pages for each
-	 * epoch.
-	 */
-	size_t nactive_max[PEAK_DEMAND_NBUCKETS];
-};
-
-void peak_demand_init(peak_demand_t *peak_demand, uint64_t interval_ms);
-
-/* Updates peak demand statistics with current number of active pages. */
-void peak_demand_update(peak_demand_t *peak_demand, const nstime_t *now,
-    size_t nactive);
-
-/* Returns maximum number of active pages in sliding window. */
-size_t peak_demand_nactive_max(peak_demand_t *peak_demand);
-
-#endif /* JEMALLOC_INTERNAL_PEAK_DEMAND_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 97a95fbf..c43b30b1 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -76,7 +76,6 @@
     <ClCompile Include="..\..\..\..\src\pai.c" />
     <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
-    <ClCompile Include="..\..\..\..\src\peak_demand.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 1a89369e..f091475e 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -112,9 +112,6 @@
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\peak_demand.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\peak_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 8529438c..a195f6b3 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -76,7 +76,6 @@
     <ClCompile Include="..\..\..\..\src\pai.c" />
     <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
-    <ClCompile Include="..\..\..\..\src\peak_demand.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 1a89369e..f091475e 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -112,9 +112,6 @@
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\peak_demand.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\peak_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
index eace48ba..cd16005d 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
@@ -76,7 +76,6 @@
     <ClCompile Include="..\..\..\..\src\pai.c" />
     <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
-    <ClCompile Include="..\..\..\..\src\peak_demand.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
index 1a89369e..f091475e 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
@@ -112,9 +112,6 @@
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\peak_demand.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\peak_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
index 98085cfd..2d8c4be6 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
@@ -76,7 +76,6 @@
     <ClCompile Include="..\..\..\..\src\pai.c" />
     <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
-    <ClCompile Include="..\..\..\..\src\peak_demand.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
     <ClCompile Include="..\..\..\..\src\prof.c" />
     <ClCompile Include="..\..\..\..\src\prof_data.c" />
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
index 1a89369e..f091475e 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
@@ -112,9 +112,6 @@
     <ClCompile Include="..\..\..\..\src\pages.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\peak_demand.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\peak_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/ctl.c b/src/ctl.c
index 92d254c1..a30adc52 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -106,7 +106,6 @@ CTL_PROTO(opt_hpa_hugify_delay_ms)
 CTL_PROTO(opt_hpa_hugify_sync)
 CTL_PROTO(opt_hpa_min_purge_interval_ms)
 CTL_PROTO(opt_experimental_hpa_max_purge_nhp)
-CTL_PROTO(opt_hpa_peak_demand_window_ms)
 CTL_PROTO(opt_hpa_dirty_mult)
 CTL_PROTO(opt_hpa_sec_nshards)
 CTL_PROTO(opt_hpa_sec_max_alloc)
@@ -489,8 +488,6 @@ static const ctl_named_node_t opt_node[] = {
 	{NAME("hpa_min_purge_interval_ms"), CTL(opt_hpa_min_purge_interval_ms)},
 	{NAME("experimental_hpa_max_purge_nhp"),
 		CTL(opt_experimental_hpa_max_purge_nhp)},
-	{NAME("hpa_peak_demand_window_ms"),
-	    CTL(opt_hpa_peak_demand_window_ms)},
 	{NAME("hpa_dirty_mult"), CTL(opt_hpa_dirty_mult)},
 	{NAME("hpa_sec_nshards"),	CTL(opt_hpa_sec_nshards)},
 	{NAME("hpa_sec_max_alloc"),	CTL(opt_hpa_sec_max_alloc)},
@@ -2260,8 +2257,6 @@ CTL_RO_NL_GEN(opt_hpa_min_purge_interval_ms, opt_hpa_opts.min_purge_interval_ms,
     uint64_t)
 CTL_RO_NL_GEN(opt_experimental_hpa_max_purge_nhp,
     opt_hpa_opts.experimental_max_purge_nhp, ssize_t)
-CTL_RO_NL_GEN(opt_hpa_peak_demand_window_ms,
-    opt_hpa_opts.peak_demand_window_ms, uint64_t)
 
 /*
  * This will have to change before we publicly document this option; fxp_t and
diff --git a/src/hpa.c b/src/hpa.c
index 9b7ff744..48e356c6 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -64,11 +64,6 @@ hpa_supported(void) {
 	return true;
 }
 
-static bool
-hpa_peak_demand_tracking_enabled(hpa_shard_t *shard) {
-	return shard->opts.peak_demand_window_ms > 0;
-}
-
 static void
 hpa_do_consistency_checks(hpa_shard_t *shard) {
 	assert(shard->base != NULL);
@@ -223,11 +218,6 @@ hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
 	shard->stats.nhugify_failures = 0;
 	shard->stats.ndehugifies = 0;
 
-	if (hpa_peak_demand_tracking_enabled(shard)) {
-		peak_demand_init(&shard->peak_demand,
-		    shard->opts.peak_demand_window_ms);
-	}
-
 	/*
 	 * Fill these in last, so that if an hpa_shard gets used despite
 	 * initialization failing, we'll at least crash instead of just
@@ -305,37 +295,8 @@ hpa_ndirty_max(tsdn_t *tsdn, hpa_shard_t *shard) {
 	if (shard->opts.dirty_mult == (fxp_t)-1) {
 		return (size_t)-1;
 	}
-	/*
-	 * We are trying to estimate maximum amount of active memory we'll
-	 * need in the near future.  We do so by projecting future active
-	 * memory demand (based on peak active memory usage we observed in the
-	 * past within sliding window) and adding slack on top of it (an
-	 * overhead is reasonable to have in exchange of higher hugepages
-	 * coverage).  When peak demand tracking is off, projection of future
-	 * active memory is active memory we are having right now.
-	 *
-	 * Estimation is essentially the same as nactive_max * (1 +
-	 * dirty_mult), but expressed differently to factor in necessary
-	 * implementation details.
-	 */
-	size_t nactive = psset_nactive(&shard->psset);
-	size_t nactive_max = nactive;
-	if (hpa_peak_demand_tracking_enabled(shard)) {
-		/*
-		 * We release shard->mtx, when we do a syscall to purge dirty
-		 * memory, so someone might grab shard->mtx, allocate memory
-		 * from this shard and update psset's nactive counter, before
-		 * peak_demand_update(...) was called and we'll get
-		 * peak_demand_nactive_max(...) <= nactive as a result.
-		 */
-		size_t peak = peak_demand_nactive_max(&shard->peak_demand);
-		if (peak > nactive_max) {
-			nactive_max = peak;
-		}
-	}
-	size_t slack = fxp_mul_frac(nactive_max, shard->opts.dirty_mult);
-	size_t estimation = nactive_max + slack;
-	return estimation - nactive;
+	return fxp_mul_frac(psset_nactive(&shard->psset),
+	    shard->opts.dirty_mult);
 }
 
 static bool
@@ -711,16 +672,6 @@ static void
 hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard,
     bool forced) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
-
-	/* Update active memory demand statistics. */
-	if (hpa_peak_demand_tracking_enabled(shard)) {
-		nstime_t now;
-		shard->central->hooks.curtime(&now,
-		    /* first_reading */ true);
-		peak_demand_update(&shard->peak_demand, &now,
-		    psset_nactive(&shard->psset));
-	}
-
 	if (!forced && shard->opts.deferral_allowed) {
 		return;
 	}
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 360635a8..d958c8ca 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1573,11 +1573,6 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    opt_hpa_opts.experimental_max_purge_nhp,
 			    "experimental_hpa_max_purge_nhp", -1, SSIZE_MAX);
 
-			CONF_HANDLE_UINT64_T(
-			    opt_hpa_opts.peak_demand_window_ms,
-			    "hpa_peak_demand_window_ms", 0, 0,
-			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false);
-
 			if (CONF_MATCH("hpa_dirty_mult")) {
 				if (CONF_MATCH_VALUE("-1")) {
 					opt_hpa_opts.dirty_mult = (fxp_t)-1;
diff --git a/src/peak_demand.c b/src/peak_demand.c
deleted file mode 100644
index 49f28930..00000000
--- a/src/peak_demand.c
+++ /dev/null
@@ -1,74 +0,0 @@
-#include "jemalloc/internal/jemalloc_preamble.h"
-#include "jemalloc/internal/jemalloc_internal_includes.h"
-
-#include "jemalloc/internal/peak_demand.h"
-
-void
-peak_demand_init(peak_demand_t *peak_demand, uint64_t interval_ms) {
-	assert(interval_ms > 0);
-	peak_demand->epoch = 0;
-	uint64_t interval_ns = interval_ms * 1000 * 1000;
-	peak_demand->epoch_interval_ns = interval_ns / PEAK_DEMAND_NBUCKETS;
-	memset(peak_demand->nactive_max, 0, sizeof(peak_demand->nactive_max));
-}
-
-static uint64_t
-peak_demand_epoch_ind(peak_demand_t *peak_demand) {
-	return peak_demand->epoch % PEAK_DEMAND_NBUCKETS;
-}
-
-static nstime_t
-peak_demand_next_epoch_advance(peak_demand_t *peak_demand) {
-	uint64_t epoch = peak_demand->epoch;
-	uint64_t ns = (epoch + 1) * peak_demand->epoch_interval_ns;
-	nstime_t next;
-	nstime_init(&next, ns);
-	return next;
-}
-
-static uint64_t
-peak_demand_maybe_advance_epoch(peak_demand_t *peak_demand,
-    const nstime_t *now) {
-	nstime_t next_epoch_advance =
-	    peak_demand_next_epoch_advance(peak_demand);
-	if (nstime_compare(now, &next_epoch_advance) < 0) {
-		return peak_demand_epoch_ind(peak_demand);
-	}
-	uint64_t next_epoch = nstime_ns(now) / peak_demand->epoch_interval_ns;
-	assert(next_epoch > peak_demand->epoch);
-	/*
-	 * If we missed more epochs, than capacity of circular buffer
-	 * (PEAK_DEMAND_NBUCKETS), re-write no more than PEAK_DEMAND_NBUCKETS
-	 * items as we don't want to zero out same item multiple times.
-	 */
-	if (peak_demand->epoch + PEAK_DEMAND_NBUCKETS < next_epoch) {
-		peak_demand->epoch = next_epoch - PEAK_DEMAND_NBUCKETS;
-	}
-	while (peak_demand->epoch < next_epoch) {
-		++peak_demand->epoch;
-		uint64_t ind = peak_demand_epoch_ind(peak_demand);
-		peak_demand->nactive_max[ind] = 0;
-	}
-	return peak_demand_epoch_ind(peak_demand);
-}
-
-void
-peak_demand_update(peak_demand_t *peak_demand, const nstime_t *now,
-    size_t nactive) {
-	uint64_t ind = peak_demand_maybe_advance_epoch(peak_demand, now);
-	size_t *epoch_nactive = &peak_demand->nactive_max[ind];
-	if (nactive > *epoch_nactive) {
-		*epoch_nactive = nactive;
-	}
-}
-
-size_t
-peak_demand_nactive_max(peak_demand_t *peak_demand) {
-	size_t nactive_max = peak_demand->nactive_max[0];
-	for (int i = 1; i < PEAK_DEMAND_NBUCKETS; ++i) {
-		if (peak_demand->nactive_max[i] > nactive_max) {
-			nactive_max = peak_demand->nactive_max[i];
-		}
-	}
-	return nactive_max;
-}
diff --git a/src/stats.c b/src/stats.c
index d3127483..8496e457 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1657,7 +1657,6 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_BOOL("hpa_hugify_sync")
 	OPT_WRITE_UINT64("hpa_min_purge_interval_ms")
 	OPT_WRITE_SSIZE_T("experimental_hpa_max_purge_nhp")
-	OPT_WRITE_UINT64("hpa_peak_demand_window_ms")
 	if (je_mallctl("opt.hpa_dirty_mult", (void *)&u32v, &u32sz, NULL, 0)
 	    == 0) {
 		/*
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index e53ee2ec..47fa25f2 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -37,9 +37,26 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
 	/* min_purge_interval_ms */
 	5 * 1000,
 	/* experimental_max_purge_nhp */
-	-1,
-	/* peak_demand_window_ms */
-	0
+	-1
+};
+
+static hpa_shard_opts_t test_hpa_shard_opts_purge = {
+	/* slab_max_alloc */
+	HUGEPAGE,
+	/* hugification_threshold */
+	0.9 * HUGEPAGE,
+	/* dirty_mult */
+	FXP_INIT_PERCENT(11),
+	/* deferral_allowed */
+	true,
+	/* hugify_delay_ms */
+	0,
+	/* hugify_sync */
+	false,
+	/* min_purge_interval_ms */
+	5 * 1000,
+	/* experimental_max_purge_nhp */
+	-1
 };
 
 static hpa_shard_t *
@@ -474,14 +491,8 @@ TEST_END
 TEST_BEGIN(test_purge_no_infinite_loop) {
 	test_skip_if(!hpa_supported());
 
-	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
-	opts.slab_max_alloc = HUGEPAGE;
-	opts.hugification_threshold = 0.9 * HUGEPAGE;
-	opts.dirty_mult = FXP_INIT_PERCENT(11);
-	opts.deferral_allowed = true;
-	opts.hugify_delay_ms = 0;
-
-	hpa_shard_t *shard = create_test_data(&hpa_hooks_default, &opts);
+	hpa_shard_t *shard = create_test_data(&hpa_hooks_default,
+	    &test_hpa_shard_opts_purge);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 
 	/*
@@ -489,7 +500,8 @@ TEST_BEGIN(test_purge_no_infinite_loop) {
 	 * criteria for huge page and at the same time do not allow hugify page
 	 * without triggering a purge.
 	 */
-	const size_t npages = opts.hugification_threshold / PAGE + 1;
+	const size_t npages =
+	    test_hpa_shard_opts_purge.hugification_threshold / PAGE + 1;
 	const size_t size = npages * PAGE;
 
 	bool deferred_work_generated = false;
@@ -736,142 +748,6 @@ TEST_BEGIN(test_experimental_max_purge_nhp) {
 }
 TEST_END
 
-TEST_BEGIN(test_demand_purge_slack) {
-	test_skip_if(!hpa_supported());
-
-	hpa_hooks_t hooks;
-	hooks.map = &defer_test_map;
-	hooks.unmap = &defer_test_unmap;
-	hooks.purge = &defer_test_purge;
-	hooks.hugify = &defer_test_hugify;
-	hooks.dehugify = &defer_test_dehugify;
-	hooks.curtime = &defer_test_curtime;
-	hooks.ms_since = &defer_test_ms_since;
-	hooks.vectorized_purge = &defer_vectorized_purge;
-
-	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
-	opts.deferral_allowed = true;
-	/* Allow 10% of slack. */
-	opts.dirty_mult = FXP_INIT_PERCENT(10);
-	/* Peak demand sliding window duration is 10 seconds. */
-	opts.peak_demand_window_ms = 10 * 1000;
-
-	hpa_shard_t *shard = create_test_data(&hooks, &opts);
-
-	bool deferred_work_generated = false;
-
-	nstime_init(&defer_curtime, 0);
-	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
-	enum {NALLOCS = 16 * HUGEPAGE_PAGES};
-	edata_t *edatas[NALLOCS];
-	for (int i = 0; i < NALLOCS; i++) {
-		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
-		    false, false, &deferred_work_generated);
-		expect_ptr_not_null(edatas[i], "Unexpected null edata");
-	}
-
-	/* Deallocate 5 hugepages out of 16. */
-	for (int i = 0; i < 5 * (int)HUGEPAGE_PAGES; i++) {
-		pai_dalloc(tsdn, &shard->pai, edatas[i],
-		    &deferred_work_generated);
-	}
-	nstime_init2(&defer_curtime, 6, 0);
-	hpa_shard_do_deferred_work(tsdn, shard);
-
-	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
-	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
-	/*
-	 * Peak demand within sliding window is 16 hugepages, so we don't need
-	 * to purge anything just yet.
-	 */
-	expect_zu_eq(0, ndefer_purge_calls, "Purged too early");
-
-	nstime_init2(&defer_curtime, 12, 0);
-	hpa_shard_do_deferred_work(tsdn, shard);
-
-	expect_zu_eq(11, ndefer_hugify_calls, "Expect hugification");
-	ndefer_hugify_calls = 0;
-	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
-	/*
-	 * 12 seconds passed now, peak demand is 11 hugepages, we allowed to
-	 * keep 11 * 0.1 (hpa_dirty_mult) = 1.1 dirty hugepages, but we
-	 * have 5 dirty hugepages, so we should purge 4 of them.
-	 */
-	expect_zu_eq(4, ndefer_purge_calls, "Expect purges");
-	ndefer_purge_calls = 0;
-
-	destroy_test_data(shard);
-}
-TEST_END
-
-TEST_BEGIN(test_demand_purge_tight) {
-	test_skip_if(!hpa_supported());
-
-	hpa_hooks_t hooks;
-	hooks.map = &defer_test_map;
-	hooks.unmap = &defer_test_unmap;
-	hooks.purge = &defer_test_purge;
-	hooks.hugify = &defer_test_hugify;
-	hooks.dehugify = &defer_test_dehugify;
-	hooks.curtime = &defer_test_curtime;
-	hooks.ms_since = &defer_test_ms_since;
-	hooks.vectorized_purge = &defer_vectorized_purge;
-
-	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
-	opts.deferral_allowed = true;
-	/* No slack allowed. */
-	opts.dirty_mult = FXP_INIT_PERCENT(0);
-	/* Peak demand sliding window duration is 10 seconds. */
-	opts.peak_demand_window_ms = 10 * 1000;
-
-	hpa_shard_t *shard = create_test_data(&hooks, &opts);
-
-	bool deferred_work_generated = false;
-
-	nstime_init(&defer_curtime, 0);
-	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
-	enum {NALLOCS = 16 * HUGEPAGE_PAGES};
-	edata_t *edatas[NALLOCS];
-	for (int i = 0; i < NALLOCS; i++) {
-		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
-		    false, false, &deferred_work_generated);
-		expect_ptr_not_null(edatas[i], "Unexpected null edata");
-	}
-
-	/* Deallocate 5 hugepages out of 16. */
-	for (int i = 0; i < 5 * (int)HUGEPAGE_PAGES; i++) {
-		pai_dalloc(tsdn, &shard->pai, edatas[i],
-		    &deferred_work_generated);
-	}
-	nstime_init2(&defer_curtime, 6, 0);
-	hpa_shard_do_deferred_work(tsdn, shard);
-
-	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
-	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
-	/*
-	 * Peak demand within sliding window is 16 hugepages, to purge anything
-	 * just yet.
-	 */
-	expect_zu_eq(0, ndefer_purge_calls, "Purged too early");
-
-	nstime_init2(&defer_curtime, 12, 0);
-	hpa_shard_do_deferred_work(tsdn, shard);
-
-	expect_zu_eq(11, ndefer_hugify_calls, "Expect hugification");
-	ndefer_hugify_calls = 0;
-	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
-	/*
-	 * 12 seconds passed now, peak demand is 11 hugepages.  We have
-	 * hpa_dirty_mult = 0, so we allowed to keep 11 * 0 = 0 dirty
-	 * hugepages, but we have 5, all of them should be purged.
-	 */
-	expect_zu_eq(5, ndefer_purge_calls, "Expect purges");
-	ndefer_purge_calls = 0;
-
-	destroy_test_data(shard);
-}
-TEST_END
-
 TEST_BEGIN(test_vectorized_opt_eq_zero) {
     test_skip_if(!hpa_supported() ||
 		(opt_process_madvise_max_batch != 0));
@@ -934,7 +810,5 @@ main(void) {
 	    test_min_purge_interval,
 	    test_purge,
 	    test_experimental_max_purge_nhp,
-	    test_demand_purge_slack,
-	    test_demand_purge_tight,
 	    test_vectorized_opt_eq_zero);
 }
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index cf9b88aa..68c3a705 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -295,7 +295,6 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(size_t, hpa_sec_bytes_after_flush, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_batch_fill_extra, always);
 	TEST_MALLCTL_OPT(ssize_t, experimental_hpa_max_purge_nhp, always);
-	TEST_MALLCTL_OPT(uint64_t, hpa_peak_demand_window_ms, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);
 	TEST_MALLCTL_OPT(const char *, percpu_arena, always);
 	TEST_MALLCTL_OPT(size_t, oversize_threshold, always);
diff --git a/test/unit/peak_demand.c b/test/unit/peak_demand.c
deleted file mode 100644
index ca2506b8..00000000
--- a/test/unit/peak_demand.c
+++ /dev/null
@@ -1,162 +0,0 @@
-#include "test/jemalloc_test.h"
-
-#include "jemalloc/internal/peak_demand.h"
-
-TEST_BEGIN(test_peak_demand_init) {
-	peak_demand_t peak_demand;
-	/*
-	 * Exact value doesn't matter here as we don't advance epoch in this
-	 * test.
-	 */
-	uint64_t interval_ms = 1000;
-	peak_demand_init(&peak_demand, interval_ms);
-
-	expect_zu_eq(peak_demand_nactive_max(&peak_demand), 0,
-	    "Unexpected ndirty_max value after initialization");
-}
-TEST_END
-
-TEST_BEGIN(test_peak_demand_update_basic) {
-	peak_demand_t peak_demand;
-	/* Make each bucket exactly one second to simplify math. */
-	uint64_t interval_ms = 1000 * PEAK_DEMAND_NBUCKETS;
-	peak_demand_init(&peak_demand, interval_ms);
-
-	nstime_t now;
-
-	nstime_init2(&now, /* sec */ 0, /* nsec */ 0);
-	peak_demand_update(&peak_demand, &now, /* nactive */ 1024);
-
-	nstime_init2(&now, /* sec */ 1, /* nsec */ 0);
-	peak_demand_update(&peak_demand, &now, /* nactive */ 512);
-
-	nstime_init2(&now, /* sec */ 2, /* nsec */ 0);
-	peak_demand_update(&peak_demand, &now, /* nactive */ 256);
-
-	expect_zu_eq(peak_demand_nactive_max(&peak_demand), 1024, "");
-}
-TEST_END
-
-TEST_BEGIN(test_peak_demand_update_skip_epochs) {
-	peak_demand_t peak_demand;
-	uint64_t interval_ms = 1000 * PEAK_DEMAND_NBUCKETS;
-	peak_demand_init(&peak_demand, interval_ms);
-
-	nstime_t now;
-
-	nstime_init2(&now, /* sec */ 0, /* nsec */ 0);
-	peak_demand_update(&peak_demand, &now, /* nactive */ 1024);
-
-	nstime_init2(&now, /* sec */ PEAK_DEMAND_NBUCKETS - 1, /* nsec */ 0);
-	peak_demand_update(&peak_demand, &now, /* nactive */ 512);
-
-	nstime_init2(&now, /* sec */ 2 * (PEAK_DEMAND_NBUCKETS - 1),
-	    /* nsec */ 0);
-	peak_demand_update(&peak_demand, &now, /* nactive */ 256);
-
-	/*
-	 * Updates are not evenly spread over time.  When we update at
-	 * 2 * (PEAK_DEMAND_NBUCKETS - 1) second, 1024 value is already out of
-	 * sliding window, but 512 is still present.
-	 */
-	expect_zu_eq(peak_demand_nactive_max(&peak_demand), 512, "");
-}
-TEST_END
-
-TEST_BEGIN(test_peak_demand_update_rewrite_optimization) {
-	peak_demand_t peak_demand;
-	uint64_t interval_ms = 1000 * PEAK_DEMAND_NBUCKETS;
-	peak_demand_init(&peak_demand, interval_ms);
-
-	nstime_t now;
-
-	nstime_init2(&now, /* sec */ 0, /* nsec */ 0);
-	peak_demand_update(&peak_demand, &now, /* nactive */ 1024);
-
-	nstime_init2(&now, /* sec */ 0, /* nsec */ UINT64_MAX);
-	/*
-	 * This update should take reasonable time if optimization is working
-	 * correctly, otherwise we'll loop from 0 to UINT64_MAX and this test
-	 * will take a long time to finish.
-	 */
-	peak_demand_update(&peak_demand, &now, /* nactive */ 512);
-
-	expect_zu_eq(peak_demand_nactive_max(&peak_demand), 512, "");
-}
-TEST_END
-
-TEST_BEGIN(test_peak_demand_update_out_of_interval) {
-	peak_demand_t peak_demand;
-	uint64_t interval_ms = 1000 * PEAK_DEMAND_NBUCKETS;
-	peak_demand_init(&peak_demand, interval_ms);
-
-	nstime_t now;
-
-	nstime_init2(&now, /* sec */ 0 * PEAK_DEMAND_NBUCKETS, /* nsec */ 0);
-	peak_demand_update(&peak_demand, &now, /* nactive */ 1024);
-
-	nstime_init2(&now, /* sec */ 1 * PEAK_DEMAND_NBUCKETS, /* nsec */ 0);
-	peak_demand_update(&peak_demand, &now, /* nactive */ 512);
-
-	nstime_init2(&now, /* sec */ 2 * PEAK_DEMAND_NBUCKETS, /* nsec */ 0);
-	peak_demand_update(&peak_demand, &now, /* nactive */ 256);
-
-	/*
-	 * Updates frequency is lower than tracking interval, so we should
-	 * have only last value.
-	 */
-	expect_zu_eq(peak_demand_nactive_max(&peak_demand), 256, "");
-}
-TEST_END
-
-TEST_BEGIN(test_peak_demand_update_static_epoch) {
-	peak_demand_t peak_demand;
-	uint64_t interval_ms = 1000 * PEAK_DEMAND_NBUCKETS;
-	peak_demand_init(&peak_demand, interval_ms);
-
-	nstime_t now;
-	nstime_init_zero(&now);
-
-	/* Big enough value to overwrite values in circular buffer. */
-	size_t nactive_max = 2 * PEAK_DEMAND_NBUCKETS;
-	for (size_t nactive = 0; nactive <= nactive_max; ++nactive) {
-		/*
-		 * We should override value in the same bucket as now value
-		 * doesn't change between iterations.
-		 */
-		peak_demand_update(&peak_demand, &now, nactive);
-	}
-
-	expect_zu_eq(peak_demand_nactive_max(&peak_demand), nactive_max, "");
-}
-TEST_END
-
-TEST_BEGIN(test_peak_demand_update_epoch_advance) {
-	peak_demand_t peak_demand;
-	uint64_t interval_ms = 1000 * PEAK_DEMAND_NBUCKETS;
-	peak_demand_init(&peak_demand, interval_ms);
-
-	nstime_t now;
-	/* Big enough value to overwrite values in circular buffer. */
-	size_t nactive_max = 2 * PEAK_DEMAND_NBUCKETS;
-	for (size_t nactive = 0; nactive <= nactive_max; ++nactive) {
-		uint64_t sec = nactive;
-		nstime_init2(&now, sec, /* nsec */ 0);
-		peak_demand_update(&peak_demand, &now, nactive);
-	}
-
-	expect_zu_eq(peak_demand_nactive_max(&peak_demand), nactive_max, "");
-}
-TEST_END
-
-int
-main(void) {
-	return test_no_reentrancy(
-	    test_peak_demand_init,
-	    test_peak_demand_update_basic,
-	    test_peak_demand_update_skip_epochs,
-	    test_peak_demand_update_rewrite_optimization,
-	    test_peak_demand_update_out_of_interval,
-	    test_peak_demand_update_static_epoch,
-	    test_peak_demand_update_epoch_advance);
-}

From 1972241cd204c60fb5b66f23c48a117879636161 Mon Sep 17 00:00:00 2001
From: Qi Wang <interwq@gwu.edu>
Date: Mon, 2 Jun 2025 11:24:55 -0700
Subject: [PATCH 2511/2608] Remove unused options in the batched madvise unit
 tests.

---
 test/unit/hpa_vectorized_madvise.c             | 4 +---
 test/unit/hpa_vectorized_madvise_large_batch.c | 4 +---
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/test/unit/hpa_vectorized_madvise.c b/test/unit/hpa_vectorized_madvise.c
index ae25fdde..6770a9fa 100644
--- a/test/unit/hpa_vectorized_madvise.c
+++ b/test/unit/hpa_vectorized_madvise.c
@@ -37,9 +37,7 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
 	/* min_purge_interval_ms */
 	5 * 1000,
 	/* experimental_max_purge_nhp */
-	-1,
-	/* peak_demand_window_ms */
-	0
+	-1
 };
 
 static hpa_shard_t *
diff --git a/test/unit/hpa_vectorized_madvise_large_batch.c b/test/unit/hpa_vectorized_madvise_large_batch.c
index 99ce15f4..561da7a2 100644
--- a/test/unit/hpa_vectorized_madvise_large_batch.c
+++ b/test/unit/hpa_vectorized_madvise_large_batch.c
@@ -37,9 +37,7 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
 	/* min_purge_interval_ms */
 	5 * 1000,
 	/* experimental_max_purge_nhp */
-	-1,
-	/* peak_demand_window_ms */
-	0
+	-1
 };
 
 static hpa_shard_t *

From e6864c6075a9fdeea56f788588652f2cefb996b6 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 14 Mar 2025 06:34:05 -0700
Subject: [PATCH 2512/2608] [thread_event] Remove macros from thread_event and
 replace with dynamic event objects

---
 Makefile.in                                   |   1 +
 include/jemalloc/internal/peak_event.h        |  17 +-
 include/jemalloc/internal/prof_externs.h      |  63 +++-
 include/jemalloc/internal/prof_threshold.h    |   5 +-
 include/jemalloc/internal/stats.h             |   5 +-
 include/jemalloc/internal/tcache_externs.h    |   3 +
 include/jemalloc/internal/thread_event.h      |  84 +----
 .../jemalloc/internal/thread_event_registry.h |  58 ++++
 include/jemalloc/internal/tsd_internals.h     |  19 +-
 src/peak_event.c                              |  44 +--
 src/prof.c                                    |  54 ++-
 src/prof_threshold.c                          |  14 +-
 src/stats.c                                   |  22 +-
 src/tcache.c                                  |  27 +-
 src/thread_event.c                            | 311 ++++++++++--------
 src/thread_event_registry.c                   |  37 +++
 test/unit/thread_event.c                      |   9 +-
 17 files changed, 455 insertions(+), 318 deletions(-)
 create mode 100644 include/jemalloc/internal/thread_event_registry.h
 create mode 100644 src/thread_event_registry.c

diff --git a/Makefile.in b/Makefile.in
index 7085a22a..2519ed83 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -156,6 +156,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/tcache.c \
 	$(srcroot)src/test_hooks.c \
 	$(srcroot)src/thread_event.c \
+        $(srcroot)src/thread_event_registry.c \
 	$(srcroot)src/ticker.c \
 	$(srcroot)src/tsd.c \
 	$(srcroot)src/util.c \
diff --git a/include/jemalloc/internal/peak_event.h b/include/jemalloc/internal/peak_event.h
index cc2a1401..1e339ff8 100644
--- a/include/jemalloc/internal/peak_event.h
+++ b/include/jemalloc/internal/peak_event.h
@@ -4,6 +4,14 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/tsd_types.h"
 
+/*
+ * Update every 64K by default.  We're not exposing this as a configuration
+ * option for now; we don't want to bind ourselves too tightly to any particular
+ * performance requirements for small values, or guarantee that we'll even be
+ * able to provide fine-grained accuracy.
+ */
+#define PEAK_EVENT_WAIT (64 * 1024)
+
 /*
  * While peak.h contains the simple helper struct that tracks state, this
  * contains the allocator tie-ins (and knows about tsd, the event module, etc.).
@@ -15,13 +23,6 @@ void peak_event_update(tsd_t *tsd);
 void peak_event_zero(tsd_t *tsd);
 uint64_t peak_event_max(tsd_t *tsd);
 
-/* Manual hooks. */
-/* The activity-triggered hooks. */
-uint64_t peak_alloc_new_event_wait(tsd_t *tsd);
-uint64_t peak_alloc_postponed_event_wait(tsd_t *tsd);
-void peak_alloc_event_handler(tsd_t *tsd, uint64_t elapsed);
-uint64_t peak_dalloc_new_event_wait(tsd_t *tsd);
-uint64_t peak_dalloc_postponed_event_wait(tsd_t *tsd);
-void peak_dalloc_event_handler(tsd_t *tsd, uint64_t elapsed);
+extern te_base_cb_t peak_te_handler;
 
 #endif /* JEMALLOC_INTERNAL_PEAK_EVENT_H */
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 789e3811..7d962522 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -5,6 +5,7 @@
 #include "jemalloc/internal/base.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/prof_hook.h"
+#include "jemalloc/internal/thread_event_registry.h"
 
 extern bool opt_prof;
 extern bool opt_prof_active;
@@ -104,9 +105,65 @@ void prof_prefork1(tsdn_t *tsdn);
 void prof_postfork_parent(tsdn_t *tsdn);
 void prof_postfork_child(tsdn_t *tsdn);
 
-/* Only accessed by thread event. */
 uint64_t prof_sample_new_event_wait(tsd_t *tsd);
-uint64_t prof_sample_postponed_event_wait(tsd_t *tsd);
-void prof_sample_event_handler(tsd_t *tsd, uint64_t elapsed);
+uint64_t tsd_prof_sample_event_wait_get(tsd_t *tsd);
+
+/*
+ * The lookahead functionality facilitates events to be able to lookahead, i.e.
+ * without touching the event counters, to determine whether an event would be
+ * triggered.  The event counters are not advanced until the end of the
+ * allocation / deallocation calls, so the lookahead can be useful if some
+ * preparation work for some event must be done early in the allocation /
+ * deallocation calls.
+ *
+ * Currently only the profiling sampling event needs the lookahead
+ * functionality, so we don't yet define general purpose lookahead functions.
+ *
+ * Surplus is a terminology referring to the amount of bytes beyond what's
+ * needed for triggering an event, which can be a useful quantity to have in
+ * general when lookahead is being called.
+ *
+ * This function returns true if allocation of usize would go above the next
+ * trigger for prof event, and false otherwise.
+ * If function returns true surplus will contain number of bytes beyond that
+ * trigger.
+ */
+
+JEMALLOC_ALWAYS_INLINE bool
+te_prof_sample_event_lookahead_surplus(tsd_t *tsd, size_t usize,
+    size_t *surplus) {
+	if (surplus != NULL) {
+		/*
+		 * This is a dead store: the surplus will be overwritten before
+		 * any read.  The initialization suppresses compiler warnings.
+		 * Meanwhile, using SIZE_MAX to initialize is good for
+		 * debugging purpose, because a valid surplus value is strictly
+		 * less than usize, which is at most SIZE_MAX.
+		 */
+		*surplus = SIZE_MAX;
+	}
+	if (unlikely(!tsd_nominal(tsd) || tsd_reentrancy_level_get(tsd) > 0)) {
+		return false;
+	}
+	/* The subtraction is intentionally susceptible to underflow. */
+	uint64_t accumbytes = tsd_thread_allocated_get(tsd) + usize -
+	    tsd_thread_allocated_last_event_get(tsd);
+	uint64_t sample_wait = tsd_prof_sample_event_wait_get(tsd);
+	if (accumbytes < sample_wait) {
+		return false;
+	}
+	assert(accumbytes - sample_wait < (uint64_t)usize);
+	if (surplus != NULL) {
+		*surplus = (size_t)(accumbytes - sample_wait);
+	}
+	return true;
+}
+
+JEMALLOC_ALWAYS_INLINE bool
+te_prof_sample_event_lookahead(tsd_t *tsd, size_t usize) {
+	return te_prof_sample_event_lookahead_surplus(tsd, usize, NULL);
+}
+
+extern te_base_cb_t prof_sample_te_handler;
 
 #endif /* JEMALLOC_INTERNAL_PROF_EXTERNS_H */
diff --git a/include/jemalloc/internal/prof_threshold.h b/include/jemalloc/internal/prof_threshold.h
index dc9c8f2b..93e9478e 100644
--- a/include/jemalloc/internal/prof_threshold.h
+++ b/include/jemalloc/internal/prof_threshold.h
@@ -3,9 +3,6 @@
 
 #include "jemalloc/internal/tsd_types.h"
 
-/* The activity-triggered hooks. */
-uint64_t prof_threshold_new_event_wait(tsd_t *tsd);
-uint64_t prof_threshold_postponed_event_wait(tsd_t *tsd);
-void prof_threshold_event_handler(tsd_t *tsd, uint64_t elapsed);
+extern te_base_cb_t prof_threshold_te_handler;
 
 #endif /* JEMALLOC_INTERNAL_THRESHOLD_EVENT_H */
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index 310178ea..a5f1be32 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -3,6 +3,7 @@
 
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_types.h"
+#include "jemalloc/internal/thread_event_registry.h"
 #include "jemalloc/internal/tsd_types.h"
 
 /*  OPTION(opt,		var_name,	default,	set_value_to) */
@@ -43,9 +44,7 @@ extern char opt_stats_interval_opts[stats_print_tot_num_options+1];
 #define STATS_INTERVAL_ACCUM_BATCH_MAX (4 << 20)
 
 /* Only accessed by thread event. */
-uint64_t stats_interval_new_event_wait(tsd_t *tsd);
-uint64_t stats_interval_postponed_event_wait(tsd_t *tsd);
-void stats_interval_event_handler(tsd_t *tsd, uint64_t elapsed);
+extern te_base_cb_t stats_interval_te_handler;
 
 /* Implements je_malloc_stats_print. */
 void stats_print(write_cb_t *write_cb, void *cbopaque, const char *opts);
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 732adacb..024314fe 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -6,6 +6,7 @@
 #include "jemalloc/internal/cache_bin.h"
 #include "jemalloc/internal/sz.h"
 #include "jemalloc/internal/tcache_types.h"
+#include "jemalloc/internal/thread_event_registry.h"
 
 extern bool opt_tcache;
 extern size_t opt_tcache_max;
@@ -89,4 +90,6 @@ uint64_t tcache_gc_dalloc_new_event_wait(tsd_t *tsd);
 uint64_t tcache_gc_dalloc_postponed_event_wait(tsd_t *tsd);
 void tcache_gc_dalloc_event_handler(tsd_t *tsd, uint64_t elapsed);
 
+extern te_base_cb_t tcache_gc_te_handler;
+
 #endif /* JEMALLOC_INTERNAL_TCACHE_EXTERNS_H */
diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index ad46ffe7..e9631cbd 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -49,29 +49,12 @@ void te_event_trigger(tsd_t *tsd, te_ctx_t *ctx);
 void te_recompute_fast_threshold(tsd_t *tsd);
 void tsd_te_init(tsd_t *tsd);
 
-/*
- * List of all events, in the following format:
- *  E(event,		(condition), is_alloc_event)
- */
-#define ITERATE_OVER_ALL_EVENTS						\
-    E(tcache_gc,		(opt_tcache_gc_incr_bytes > 0), true)	\
-    E(prof_sample,		(config_prof && opt_prof), true)  	\
-    E(prof_threshold,		config_stats, true)  			\
-    E(stats_interval,		(opt_stats_interval >= 0), true)   	\
-    E(tcache_gc_dalloc,		(opt_tcache_gc_incr_bytes > 0), false)	\
-    E(peak_alloc,		config_stats, true)			\
-    E(peak_dalloc,		config_stats, false)
-
-#define E(event, condition_unused, is_alloc_event_unused)		\
-    C(event##_event_wait)
-
 /* List of all thread event counters. */
-#define ITERATE_OVER_ALL_COUNTERS					\
-    C(thread_allocated)							\
-    C(thread_allocated_last_event)					\
-    ITERATE_OVER_ALL_EVENTS						\
-    C(prof_sample_last_event)						\
-    C(stats_interval_last_event)
+#define ITERATE_OVER_ALL_COUNTERS                                       \
+        C(thread_allocated)						\
+	C(thread_allocated_last_event)					\
+	C(prof_sample_last_event)					\
+	C(stats_interval_last_event)
 
 /* Getters directly wrap TSD getters. */
 #define C(counter)							\
@@ -99,12 +82,6 @@ counter##_set(tsd_t *tsd, uint64_t v) {					\
 ITERATE_OVER_ALL_COUNTERS
 #undef C
 
-/*
- * For generating _event_wait getter / setter functions for each individual
- * event.
- */
-#undef E
-
 /*
  * The malloc and free fastpath getters -- use the unsafe getters since tsd may
  * be non-nominal, in which case the fast_threshold will be set to 0.  This
@@ -221,57 +198,6 @@ te_ctx_get(tsd_t *tsd, te_ctx_t *ctx, bool is_alloc) {
 	}
 }
 
-/*
- * The lookahead functionality facilitates events to be able to lookahead, i.e.
- * without touching the event counters, to determine whether an event would be
- * triggered.  The event counters are not advanced until the end of the
- * allocation / deallocation calls, so the lookahead can be useful if some
- * preparation work for some event must be done early in the allocation /
- * deallocation calls.
- *
- * Currently only the profiling sampling event needs the lookahead
- * functionality, so we don't yet define general purpose lookahead functions.
- *
- * Surplus is a terminology referring to the amount of bytes beyond what's
- * needed for triggering an event, which can be a useful quantity to have in
- * general when lookahead is being called.
- */
-
-JEMALLOC_ALWAYS_INLINE bool
-te_prof_sample_event_lookahead_surplus(tsd_t *tsd, size_t usize,
-    size_t *surplus) {
-	if (surplus != NULL) {
-		/*
-		 * This is a dead store: the surplus will be overwritten before
-		 * any read.  The initialization suppresses compiler warnings.
-		 * Meanwhile, using SIZE_MAX to initialize is good for
-		 * debugging purpose, because a valid surplus value is strictly
-		 * less than usize, which is at most SIZE_MAX.
-		 */
-		*surplus = SIZE_MAX;
-	}
-	if (unlikely(!tsd_nominal(tsd) || tsd_reentrancy_level_get(tsd) > 0)) {
-		return false;
-	}
-	/* The subtraction is intentionally susceptible to underflow. */
-	uint64_t accumbytes = tsd_thread_allocated_get(tsd) + usize -
-	    tsd_thread_allocated_last_event_get(tsd);
-	uint64_t sample_wait = tsd_prof_sample_event_wait_get(tsd);
-	if (accumbytes < sample_wait) {
-		return false;
-	}
-	assert(accumbytes - sample_wait < (uint64_t)usize);
-	if (surplus != NULL) {
-		*surplus = (size_t)(accumbytes - sample_wait);
-	}
-	return true;
-}
-
-JEMALLOC_ALWAYS_INLINE bool
-te_prof_sample_event_lookahead(tsd_t *tsd, size_t usize) {
-	return te_prof_sample_event_lookahead_surplus(tsd, usize, NULL);
-}
-
 JEMALLOC_ALWAYS_INLINE void
 te_event_advance(tsd_t *tsd, size_t usize, bool is_alloc) {
 	te_assert_invariants(tsd);
diff --git a/include/jemalloc/internal/thread_event_registry.h b/include/jemalloc/internal/thread_event_registry.h
new file mode 100644
index 00000000..aee7a4f2
--- /dev/null
+++ b/include/jemalloc/internal/thread_event_registry.h
@@ -0,0 +1,58 @@
+#ifndef JEMALLOC_INTERNAL_THREAD_EVENT_REGISTRY_H
+#define JEMALLOC_INTERNAL_THREAD_EVENT_REGISTRY_H
+
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/tsd.h"
+
+/* "te" is short for "thread_event" */
+enum te_alloc_e {
+#ifdef JEMALLOC_PROF
+    te_alloc_prof_sample,
+#endif
+    te_alloc_stats_interval,
+#ifdef JEMALLOC_STATS
+    te_alloc_prof_threshold,
+#endif
+    te_alloc_tcache_gc,
+#ifdef JEMALLOC_STATS
+    te_alloc_peak,
+    te_alloc_last = te_alloc_peak,
+#else
+    te_alloc_last = te_alloc_tcache_gc,
+#endif
+    te_alloc_count = te_alloc_last + 1
+};
+typedef enum te_alloc_e te_alloc_t;
+
+enum te_dalloc_e {
+    te_dalloc_tcache_gc,
+#ifdef JEMALLOC_STATS
+    te_dalloc_peak,
+    te_dalloc_last = te_dalloc_peak,
+#else
+    te_dalloc_last = te_dalloc_tcache_gc,
+#endif
+    te_dalloc_count = te_dalloc_last + 1
+};
+typedef enum te_dalloc_e te_dalloc_t;
+
+/* These will live in tsd */
+typedef struct te_data_s te_data_t;
+struct te_data_s {
+	uint64_t alloc_wait[te_alloc_count];
+	uint64_t dalloc_wait[te_dalloc_count];
+};
+#define TE_DATA_INITIALIZER { {0}, {0} }
+
+typedef struct te_base_cb_s te_base_cb_t;
+struct te_base_cb_s {
+    bool (*enabled)(void);
+    uint64_t (*new_event_wait)(tsd_t *tsd);
+    uint64_t (*postponed_event_wait)(tsd_t *tsd);
+    void (*event_handler)(tsd_t *tsd);
+};
+
+extern te_base_cb_t *te_alloc_handlers[te_alloc_count];
+extern te_base_cb_t *te_dalloc_handlers[te_dalloc_count];
+
+#endif /* JEMALLOC_INTERNAL_THREAD_EVENT_REGISTRY_H */
diff --git a/include/jemalloc/internal/tsd_internals.h b/include/jemalloc/internal/tsd_internals.h
index 0ed33234..69b60519 100644
--- a/include/jemalloc/internal/tsd_internals.h
+++ b/include/jemalloc/internal/tsd_internals.h
@@ -15,6 +15,7 @@
 #include "jemalloc/internal/rtree_tsd.h"
 #include "jemalloc/internal/tcache_structs.h"
 #include "jemalloc/internal/tcache_types.h"
+#include "jemalloc/internal/thread_event_registry.h"
 #include "jemalloc/internal/tsd_types.h"
 #include "jemalloc/internal/util.h"
 #include "jemalloc/internal/witness.h"
@@ -68,15 +69,9 @@ typedef ql_elm(tsd_t) tsd_link_t;
     O(thread_allocated_next_event,	uint64_t,	uint64_t)	\
     O(thread_deallocated_last_event,	uint64_t,	uint64_t)	\
     O(thread_deallocated_next_event,	uint64_t,	uint64_t)	\
-    O(tcache_gc_event_wait,	uint64_t,		uint64_t)	\
-    O(tcache_gc_dalloc_event_wait,	uint64_t,	uint64_t)	\
-    O(prof_sample_event_wait,	uint64_t,		uint64_t)	\
+    O(te_data, 			te_data_t,		te_data_t)	\
     O(prof_sample_last_event,	uint64_t,		uint64_t)	\
-    O(prof_threshold_event_wait,	uint64_t,	uint64_t)	\
-    O(stats_interval_event_wait,	uint64_t,	uint64_t)	\
-    O(stats_interval_last_event,	uint64_t,	uint64_t)	\
-    O(peak_alloc_event_wait,	uint64_t,		uint64_t)	\
-    O(peak_dalloc_event_wait,	uint64_t,	uint64_t)		\
+    O(stats_interval_last_event, uint64_t, 		uint64_t)	\
     O(prof_tdata,		prof_tdata_t *,		prof_tdata_t *)	\
     O(prng_state,		uint64_t,		uint64_t)	\
     O(san_extents_until_guard_small,	uint64_t,	uint64_t)	\
@@ -102,15 +97,9 @@ typedef ql_elm(tsd_t) tsd_link_t;
     /* thread_allocated_next_event */	0,				\
     /* thread_deallocated_last_event */	0,				\
     /* thread_deallocated_next_event */	0,				\
-    /* tcache_gc_event_wait */		0,				\
-    /* tcache_gc_dalloc_event_wait */	0,				\
-    /* prof_sample_event_wait */	0,				\
+    /* te_data */			TE_DATA_INITIALIZER,   		\
     /* prof_sample_last_event */	0,				\
-    /* prof_threshold_event_wait */	0,				\
-    /* stats_interval_event_wait */	0,				\
     /* stats_interval_last_event */	0,				\
-    /* peak_alloc_event_wait */		0,				\
-    /* peak_dalloc_event_wait */	0,				\
     /* prof_tdata */		NULL,					\
     /* prng_state */		0,					\
     /* san_extents_until_guard_small */	0,				\
diff --git a/src/peak_event.c b/src/peak_event.c
index 4093fbcc..430bfdea 100644
--- a/src/peak_event.c
+++ b/src/peak_event.c
@@ -5,14 +5,7 @@
 
 #include "jemalloc/internal/activity_callback.h"
 #include "jemalloc/internal/peak.h"
-
-/*
- * Update every 64K by default.  We're not exposing this as a configuration
- * option for now; we don't want to bind ourselves too tightly to any particular
- * performance requirements for small values, or guarantee that we'll even be
- * able to provide fine-grained accuracy.
- */
-#define PEAK_EVENT_WAIT (64 * 1024)
+#include "jemalloc/internal/thread_event_registry.h"
 
 /* Update the peak with current tsd state. */
 void
@@ -49,34 +42,31 @@ peak_event_max(tsd_t *tsd) {
 	return peak_max(peak);
 }
 
-uint64_t
-peak_alloc_new_event_wait(tsd_t *tsd) {
+static uint64_t
+peak_event_new_event_wait(tsd_t *tsd) {
 	return PEAK_EVENT_WAIT;
 }
 
-uint64_t
-peak_alloc_postponed_event_wait(tsd_t *tsd) {
+static uint64_t
+peak_event_postponed_event_wait(tsd_t *tsd) {
 	return TE_MIN_START_WAIT;
 }
 
-void
-peak_alloc_event_handler(tsd_t *tsd, uint64_t elapsed) {
+static void
+peak_event_handler(tsd_t *tsd) {
 	peak_event_update(tsd);
 	peak_event_activity_callback(tsd);
 }
 
-uint64_t
-peak_dalloc_new_event_wait(tsd_t *tsd) {
-	return PEAK_EVENT_WAIT;
+static bool
+peak_event_enabled(void) {
+	return config_stats;
 }
 
-uint64_t
-peak_dalloc_postponed_event_wait(tsd_t *tsd) {
-	return TE_MIN_START_WAIT;
-}
-
-void
-peak_dalloc_event_handler(tsd_t *tsd, uint64_t elapsed) {
-	peak_event_update(tsd);
-	peak_event_activity_callback(tsd);
-}
+/* Handles alloc and dalloc */
+te_base_cb_t peak_te_handler = {
+	.enabled = &peak_event_enabled,
+	.new_event_wait = &peak_event_new_event_wait,
+	.postponed_event_wait = &peak_event_postponed_event_wait,
+	.event_handler = &peak_event_handler,
+};
diff --git a/src/prof.c b/src/prof.c
index 8fdc6f71..94eddb6d 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -12,6 +12,7 @@
 #include "jemalloc/internal/prof_sys.h"
 #include "jemalloc/internal/prof_hook.h"
 #include "jemalloc/internal/thread_event.h"
+#include "jemalloc/internal/thread_event_registry.h"
 
 /*
  * This file implements the profiling "APIs" needed by other parts of jemalloc,
@@ -289,8 +290,40 @@ prof_sample_new_event_wait(tsd_t *tsd) {
 #endif
 }
 
+void
+prof_sample_event_handler(tsd_t *tsd) {
+	cassert(config_prof);
+	if (prof_interval == 0 || !prof_active_get_unlocked()) {
+		return;
+	}
+	uint64_t last_event = thread_allocated_last_event_get(tsd);
+	uint64_t last_sample_event = tsd_prof_sample_last_event_get(tsd);
+	tsd_prof_sample_last_event_set(tsd, last_event);
+	uint64_t elapsed = last_event - last_sample_event;
+	assert(elapsed > 0 && elapsed != TE_INVALID_ELAPSED);
+	if (counter_accum(tsd_tsdn(tsd), &prof_idump_accumulated, elapsed)) {
+		prof_idump(tsd_tsdn(tsd));
+	}
+}
+
+static bool
+prof_sample_enabled(void) {
+	return config_prof && opt_prof;
+}
+
 uint64_t
-prof_sample_postponed_event_wait(tsd_t *tsd) {
+tsd_prof_sample_event_wait_get(tsd_t *tsd) {
+#ifdef JEMALLOC_PROF
+	return tsd_te_datap_get_unsafe(tsd)->alloc_wait[te_alloc_prof_sample];
+#else
+	not_reached();
+	return TE_MAX_START_WAIT;
+#endif
+}
+
+te_base_cb_t prof_sample_te_handler = {
+	.enabled = &prof_sample_enabled,
+	.new_event_wait = &prof_sample_new_event_wait,
 	/*
 	 * The postponed wait time for prof sample event is computed as if we
 	 * want a new wait time (i.e. as if the event were triggered).  If we
@@ -298,21 +331,10 @@ prof_sample_postponed_event_wait(tsd_t *tsd) {
 	 * handling the other events, then we can have sampling bias, if e.g.
 	 * the allocation immediately following a reentrancy always comes from
 	 * the same stack trace.
-	 */
-	return prof_sample_new_event_wait(tsd);
-}
-
-void
-prof_sample_event_handler(tsd_t *tsd, uint64_t elapsed) {
-	cassert(config_prof);
-	assert(elapsed > 0 && elapsed != TE_INVALID_ELAPSED);
-	if (prof_interval == 0 || !prof_active_get_unlocked()) {
-		return;
-	}
-	if (counter_accum(tsd_tsdn(tsd), &prof_idump_accumulated, elapsed)) {
-		prof_idump(tsd_tsdn(tsd));
-	}
-}
+	*/
+	.postponed_event_wait = &prof_sample_new_event_wait,
+	.event_handler = &prof_sample_event_handler,
+};
 
 static void
 prof_fdump(void) {
diff --git a/src/prof_threshold.c b/src/prof_threshold.c
index 28a525fc..516b0bf6 100644
--- a/src/prof_threshold.c
+++ b/src/prof_threshold.c
@@ -52,6 +52,18 @@ prof_threshold_postponed_event_wait(tsd_t *tsd) {
 }
 
 void
-prof_threshold_event_handler(tsd_t *tsd, uint64_t elapsed) {
+prof_threshold_event_handler(tsd_t *tsd) {
 	prof_threshold_update(tsd);
 }
+
+static bool
+prof_threshold_enabled(void) {
+	return config_stats;
+}
+
+te_base_cb_t prof_threshold_te_handler = {
+	.enabled = &prof_threshold_enabled,
+	.new_event_wait = &prof_threshold_new_event_wait,
+	.postponed_event_wait = &prof_threshold_postponed_event_wait,
+	.event_handler = &prof_threshold_event_handler,
+};
diff --git a/src/stats.c b/src/stats.c
index 8496e457..efc73223 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -65,7 +65,7 @@ char opt_stats_interval_opts[stats_print_tot_num_options+1] = "";
 
 static counter_accum_t stats_interval_accumulated;
 /* Per thread batch accum size for stats_interval. */
-static uint64_t stats_interval_accum_batch;
+uint64_t stats_interval_accum_batch;
 
 /******************************************************************************/
 
@@ -2128,7 +2128,12 @@ stats_interval_postponed_event_wait(tsd_t *tsd) {
 }
 
 void
-stats_interval_event_handler(tsd_t *tsd, uint64_t elapsed) {
+stats_interval_event_handler(tsd_t *tsd) {
+	uint64_t last_event = thread_allocated_last_event_get(tsd);
+	uint64_t last_sample_event = tsd_stats_interval_last_event_get(tsd);
+	tsd_stats_interval_last_event_set(tsd, last_event);
+	uint64_t elapsed = last_event - last_sample_event;
+
 	assert(elapsed > 0 && elapsed != TE_INVALID_ELAPSED);
 	if (counter_accum(tsd_tsdn(tsd), &stats_interval_accumulated,
 	    elapsed)) {
@@ -2136,6 +2141,19 @@ stats_interval_event_handler(tsd_t *tsd, uint64_t elapsed) {
 	}
 }
 
+static bool
+stats_interval_enabled(void) {
+	return opt_stats_interval >= 0;
+}
+
+te_base_cb_t stats_interval_te_handler = {
+	.enabled = &stats_interval_enabled,
+	.new_event_wait = &stats_interval_new_event_wait,
+	.postponed_event_wait = &stats_interval_postponed_event_wait,
+	.event_handler = &stats_interval_event_handler,
+};
+
+
 bool
 stats_boot(void) {
 	uint64_t stats_interval;
diff --git a/src/tcache.c b/src/tcache.c
index 270d38ac..36af7d97 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -511,7 +511,7 @@ tcache_try_gc_bin(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 }
 
 static void
-tcache_event(tsd_t *tsd) {
+tcache_gc_event(tsd_t *tsd) {
 	tcache_t *tcache = tcache_get(tsd);
 	if (tcache == NULL) {
 		return;
@@ -581,18 +581,6 @@ tcache_event(tsd_t *tsd) {
 	tcache_slow->next_gc_bin_large = szind_large;
 }
 
-void
-tcache_gc_event_handler(tsd_t *tsd, uint64_t elapsed) {
-	assert(elapsed == TE_INVALID_ELAPSED);
-	tcache_event(tsd);
-}
-
-void
-tcache_gc_dalloc_event_handler(tsd_t *tsd, uint64_t elapsed) {
-	assert(elapsed == TE_INVALID_ELAPSED);
-	tcache_event(tsd);
-}
-
 void *
 tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
     tcache_t *tcache, cache_bin_t *cache_bin, szind_t binind,
@@ -1912,3 +1900,16 @@ tcache_postfork_child(tsdn_t *tsdn) {
 void tcache_assert_initialized(tcache_t *tcache) {
 	assert(!cache_bin_still_zero_initialized(&tcache->bins[0]));
 }
+
+static bool
+tcache_gc_enabled(void) {
+    return (opt_tcache_gc_incr_bytes > 0);
+}
+
+/* Handles alloc and dalloc the same way */
+te_base_cb_t tcache_gc_te_handler = {
+	.enabled = &tcache_gc_enabled,
+	.new_event_wait = &tcache_gc_new_event_wait,
+	.postponed_event_wait = &tcache_gc_postponed_event_wait,
+	.event_handler = &tcache_gc_event,
+};
diff --git a/src/thread_event.c b/src/thread_event.c
index a8276cd7..0b1adcc1 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -2,108 +2,46 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/thread_event.h"
-
-/*
- * Signatures for event specific functions.  These functions should be defined
- * by the modules owning each event.  The signatures here verify that the
- * definitions follow the right format.
- *
- * The first two are functions computing new / postponed event wait time.  New
- * event wait time is the time till the next event if an event is currently
- * being triggered; postponed event wait time is the time till the next event
- * if an event should be triggered but needs to be postponed, e.g. when the TSD
- * is not nominal or during reentrancy.
- *
- * The third is the event handler function, which is called whenever an event
- * is triggered.  The parameter is the elapsed time since the last time an
- * event of the same type was triggered.
- */
-#define E(event, condition_unused, is_alloc_event_unused)		\
-uint64_t event##_new_event_wait(tsd_t *tsd);				\
-uint64_t event##_postponed_event_wait(tsd_t *tsd);			\
-void event##_event_handler(tsd_t *tsd, uint64_t elapsed);
-
-ITERATE_OVER_ALL_EVENTS
-#undef E
-
-/* Signatures for internal functions fetching elapsed time. */
-#define E(event, condition_unused, is_alloc_event_unused)		\
-static uint64_t event##_fetch_elapsed(tsd_t *tsd);
-
-ITERATE_OVER_ALL_EVENTS
-#undef E
-
-static uint64_t
-tcache_gc_fetch_elapsed(tsd_t *tsd) {
-	return TE_INVALID_ELAPSED;
-}
-
-static uint64_t
-tcache_gc_dalloc_fetch_elapsed(tsd_t *tsd) {
-	return TE_INVALID_ELAPSED;
-}
-
-static uint64_t
-prof_sample_fetch_elapsed(tsd_t *tsd) {
-	uint64_t last_event = thread_allocated_last_event_get(tsd);
-	uint64_t last_sample_event = prof_sample_last_event_get(tsd);
-	prof_sample_last_event_set(tsd, last_event);
-	return last_event - last_sample_event;
-}
-
-static uint64_t
-stats_interval_fetch_elapsed(tsd_t *tsd) {
-	uint64_t last_event = thread_allocated_last_event_get(tsd);
-	uint64_t last_stats_event = stats_interval_last_event_get(tsd);
-	stats_interval_last_event_set(tsd, last_event);
-	return last_event - last_stats_event;
-}
-
-static uint64_t
-peak_alloc_fetch_elapsed(tsd_t *tsd) {
-	return TE_INVALID_ELAPSED;
-}
-
-static uint64_t
-peak_dalloc_fetch_elapsed(tsd_t *tsd) {
-	return TE_INVALID_ELAPSED;
-}
-
-static uint64_t
-prof_threshold_fetch_elapsed(tsd_t *tsd) {
-	return TE_INVALID_ELAPSED;
-}
-
-/* Per event facilities done. */
+#include "jemalloc/internal/thread_event_registry.h"
+#include "jemalloc/internal/peak_event.h"
 
 static bool
 te_ctx_has_active_events(te_ctx_t *ctx) {
 	assert(config_debug);
-#define E(event, condition, alloc_event)			       \
-	if (condition && alloc_event == ctx->is_alloc) {	       \
-		return true;					       \
+	if (ctx->is_alloc) {
+		for (int i = 0; i < te_alloc_count; ++i) {
+			if (te_alloc_handlers[i]->enabled()) {
+				return true;
+			}
+		}
+	} else {
+		for (int i = 0; i < te_dalloc_count; ++i) {
+			if (te_dalloc_handlers[i]->enabled()) {
+				return true;
+			}
+		}
 	}
-	ITERATE_OVER_ALL_EVENTS
-#undef E
 	return false;
 }
 
 static uint64_t
 te_next_event_compute(tsd_t *tsd, bool is_alloc) {
+	te_base_cb_t **handlers = is_alloc ? te_alloc_handlers : te_dalloc_handlers;
+	uint64_t *waits = is_alloc ? tsd_te_datap_get_unsafe(tsd)->alloc_wait : tsd_te_datap_get_unsafe(tsd)->dalloc_wait;
+	int count = is_alloc ? te_alloc_count : te_dalloc_count;
+	
 	uint64_t wait = TE_MAX_START_WAIT;
-#define E(event, condition, alloc_event)				\
-	if (is_alloc == alloc_event && condition) {			\
-		uint64_t event_wait =					\
-		    event##_event_wait_get(tsd);			\
-		assert(event_wait <= TE_MAX_START_WAIT);		\
-		if (event_wait > 0U && event_wait < wait) {		\
-			wait = event_wait;				\
-		}							\
+
+	for (int i = 0; i < count; i++) {
+		if (handlers[i]->enabled()) {
+			uint64_t ev_wait = waits[i];
+			assert(ev_wait <= TE_MAX_START_WAIT);
+			if (ev_wait > 0U && ev_wait < wait) {
+				wait = ev_wait;
+			}
+		}
 	}
 
-	ITERATE_OVER_ALL_EVENTS
-#undef E
-	assert(wait <= TE_MAX_START_WAIT);
 	return wait;
 }
 
@@ -238,18 +176,132 @@ te_adjust_thresholds_helper(tsd_t *tsd, te_ctx_t *ctx,
 	te_ctx_next_event_set(tsd, ctx, next_event);
 }
 
-static uint64_t
-te_clip_event_wait(uint64_t event_wait) {
-	assert(event_wait > 0U);
-	if (TE_MIN_START_WAIT > 1U &&
-	    unlikely(event_wait < TE_MIN_START_WAIT)) {
-		event_wait = TE_MIN_START_WAIT;
+static void
+te_init_waits(tsd_t *tsd, uint64_t *wait, bool is_alloc) {
+	te_base_cb_t **handlers = is_alloc ? te_alloc_handlers : te_dalloc_handlers;
+	uint64_t *waits = is_alloc ? tsd_te_datap_get_unsafe(tsd)->alloc_wait : tsd_te_datap_get_unsafe(tsd)->dalloc_wait;
+	int count = is_alloc ? te_alloc_count : te_dalloc_count;
+	for (int i = 0; i < count; i++) {
+		if (handlers[i]->enabled()) {
+			uint64_t ev_wait = handlers[i]->new_event_wait(tsd);
+			assert(ev_wait > 0);
+			waits[i] = ev_wait;
+			if (ev_wait < *wait) {
+				*wait = ev_wait;
+			}
+		}
 	}
-	if (TE_MAX_START_WAIT < UINT64_MAX &&
-	    unlikely(event_wait > TE_MAX_START_WAIT)) {
-		event_wait = TE_MAX_START_WAIT;
+}
+
+static inline bool
+te_update_wait(tsd_t *tsd, uint64_t accumbytes, bool allow,
+	       uint64_t *ev_wait, uint64_t *wait, te_base_cb_t *handler,
+	       uint64_t new_wait) {
+	bool ret = false;
+	if (*ev_wait > accumbytes) {
+                *ev_wait -= accumbytes;
+        } else if (!allow) {
+                *ev_wait = handler->postponed_event_wait(tsd);
+        } else {
+                ret = true;
+                *ev_wait = new_wait == 0 ?
+		    handler->new_event_wait(tsd) :
+		    new_wait;
+        }
+
+        assert(*ev_wait > 0);
+        if (*ev_wait < *wait) {
+                *wait = *ev_wait;
+        }
+	return ret;
+}
+
+extern uint64_t stats_interval_accum_batch;
+/* Return number of handlers enqueued into to_trigger array */
+static inline size_t
+te_update_alloc_events(tsd_t *tsd, te_base_cb_t **to_trigger,
+		       uint64_t accumbytes, bool allow, uint64_t *wait) {
+	/*
+	 * We do not loop and invoke the functions via interface because
+	 * of the perf cost.  This path is relatively hot, so we sacrifice
+	 * elegance for perf.
+	 */
+	size_t nto_trigger = 0;
+	uint64_t *waits = tsd_te_datap_get_unsafe(tsd)->alloc_wait;
+	if (opt_tcache_gc_incr_bytes > 0) {
+		assert(te_alloc_handlers[te_alloc_tcache_gc]->enabled());
+		if (te_update_wait(tsd, accumbytes, allow,
+				   &waits[te_alloc_tcache_gc], wait,
+				   te_alloc_handlers[te_alloc_tcache_gc],
+				   opt_tcache_gc_incr_bytes)) {
+			to_trigger[nto_trigger++] =
+			    te_alloc_handlers[te_alloc_tcache_gc];
+		}
 	}
-	return event_wait;
+#ifdef JEMALLOC_PROF
+        if (opt_prof) {
+		assert(te_alloc_handlers[te_alloc_prof_sample]->enabled());
+		if(te_update_wait(tsd, accumbytes, allow,
+				  &waits[te_alloc_prof_sample], wait,
+				  te_alloc_handlers[te_alloc_prof_sample], 0)) {
+			to_trigger[nto_trigger++] =
+			    te_alloc_handlers[te_alloc_prof_sample];
+		}
+	}
+#endif
+	if (opt_stats_interval >= 0) {
+		if (te_update_wait(tsd, accumbytes, allow,
+				   &waits[te_alloc_stats_interval],
+				   wait,
+				   te_alloc_handlers[te_alloc_stats_interval],
+				   stats_interval_accum_batch)) {
+			assert(te_alloc_handlers[te_alloc_stats_interval]->enabled());
+			to_trigger[nto_trigger++] =
+			    te_alloc_handlers[te_alloc_stats_interval];
+		}
+	}
+
+#ifdef JEMALLOC_STATS
+	assert(te_alloc_handlers[te_alloc_peak]->enabled());
+ 	if(te_update_wait(tsd, accumbytes, allow, &waits[te_alloc_peak], wait,
+			  te_alloc_handlers[te_alloc_peak], PEAK_EVENT_WAIT)) {
+		to_trigger[nto_trigger++] = te_alloc_handlers[te_alloc_peak];
+ 	}
+
+        assert(te_alloc_handlers[te_alloc_prof_threshold]->enabled());
+        if(te_update_wait(tsd, accumbytes, allow,
+			  &waits[te_alloc_prof_threshold], wait,
+			  te_alloc_handlers[te_alloc_prof_threshold],
+			  1 << opt_experimental_lg_prof_threshold)) {
+		to_trigger[nto_trigger++] = te_alloc_handlers[te_alloc_prof_threshold];
+ 	}
+#endif
+	return nto_trigger;
+}
+
+static inline size_t
+te_update_dalloc_events(tsd_t *tsd, te_base_cb_t **to_trigger, uint64_t accumbytes,
+			bool allow, uint64_t *wait) {
+	size_t nto_trigger = 0;
+	uint64_t *waits = tsd_te_datap_get_unsafe(tsd)->dalloc_wait;
+	if (opt_tcache_gc_incr_bytes > 0) {
+		assert(te_dalloc_handlers[te_dalloc_tcache_gc]->enabled());
+		if (te_update_wait(tsd, accumbytes, allow,
+				   &waits[te_dalloc_tcache_gc], wait,
+				   te_dalloc_handlers[te_dalloc_tcache_gc],
+				   opt_tcache_gc_incr_bytes)) {
+			to_trigger[nto_trigger++] =
+			    te_dalloc_handlers[te_dalloc_tcache_gc];
+		}
+        }
+#ifdef JEMALLOC_STATS
+	assert(te_dalloc_handlers[te_dalloc_peak]->enabled());
+        if(te_update_wait(tsd, accumbytes, allow, &waits[te_dalloc_peak], wait,
+			  te_dalloc_handlers[te_dalloc_peak], PEAK_EVENT_WAIT)) {
+		to_trigger[nto_trigger++] = te_dalloc_handlers[te_dalloc_peak];
+ 	}
+#endif
+	return nto_trigger;
 }
 
 void
@@ -263,47 +315,32 @@ te_event_trigger(tsd_t *tsd, te_ctx_t *ctx) {
 
 	bool allow_event_trigger = tsd_nominal(tsd) &&
 	    tsd_reentrancy_level_get(tsd) == 0;
-	bool is_alloc = ctx->is_alloc;
 	uint64_t wait = TE_MAX_START_WAIT;
 
-#define E(event, condition, alloc_event)				\
-	bool is_##event##_triggered = false;				\
-	if (is_alloc == alloc_event && condition) {			\
-		uint64_t event_wait = event##_event_wait_get(tsd);	\
-		assert(event_wait <= TE_MAX_START_WAIT);		\
-		if (event_wait > accumbytes) {				\
-			event_wait -= accumbytes;			\
-		} else if (!allow_event_trigger) {			\
-			event_wait = event##_postponed_event_wait(tsd);	\
-		} else {						\
-			is_##event##_triggered = true;			\
-			event_wait = event##_new_event_wait(tsd);	\
-		}							\
-		event_wait = te_clip_event_wait(event_wait);		\
-		event##_event_wait_set(tsd, event_wait);		\
-		if (event_wait < wait) {				\
-			wait = event_wait;				\
-		}							\
+	assert((int)te_alloc_count >= (int) te_dalloc_count);
+	te_base_cb_t *to_trigger[te_alloc_count];
+	size_t nto_trigger;
+	if (ctx->is_alloc) {
+		nto_trigger = te_update_alloc_events(tsd, to_trigger,
+						     accumbytes,
+						     allow_event_trigger,
+						     &wait);
+	} else {
+		nto_trigger = te_update_dalloc_events(tsd, to_trigger,
+						      accumbytes,
+						      allow_event_trigger,
+						      &wait);
 	}
 
-	ITERATE_OVER_ALL_EVENTS
-#undef E
-
-	assert(wait <= TE_MAX_START_WAIT);
+        assert(wait <= TE_MAX_START_WAIT);
 	te_adjust_thresholds_helper(tsd, ctx, wait);
 	te_assert_invariants(tsd);
 
-#define E(event, condition, alloc_event)				\
-	if (is_alloc == alloc_event && condition &&			\
-	    is_##event##_triggered) {					\
-		assert(allow_event_trigger);				\
-		uint64_t elapsed = event##_fetch_elapsed(tsd);		\
-		event##_event_handler(tsd, elapsed);			\
+	for (size_t i = 0; i < nto_trigger; i++) {
+		assert(allow_event_trigger);
+		to_trigger[i]->event_handler(tsd);
 	}
 
-	ITERATE_OVER_ALL_EVENTS
-#undef E
-
 	te_assert_invariants(tsd);
 }
 
@@ -323,18 +360,8 @@ te_init(tsd_t *tsd, bool is_alloc) {
 	te_ctx_last_event_set(&ctx, te_ctx_current_bytes_get(&ctx));
 
 	uint64_t wait = TE_MAX_START_WAIT;
-#define E(event, condition, alloc_event)				\
-	if (is_alloc == alloc_event && condition) {			\
-		uint64_t event_wait = event##_new_event_wait(tsd);	\
-		event_wait = te_clip_event_wait(event_wait);		\
-		event##_event_wait_set(tsd, event_wait);		\
-		if (event_wait < wait) {				\
-			wait = event_wait;				\
-		}							\
-	}
+	te_init_waits(tsd, &wait, is_alloc);
 
-	ITERATE_OVER_ALL_EVENTS
-#undef E
 	te_adjust_thresholds_helper(tsd, &ctx, wait);
 }
 
diff --git a/src/thread_event_registry.c b/src/thread_event_registry.c
new file mode 100644
index 00000000..7543cfda
--- /dev/null
+++ b/src/thread_event_registry.c
@@ -0,0 +1,37 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/thread_event.h"
+#include "jemalloc/internal/thread_event_registry.h"
+#include "jemalloc/internal/thread_event_registry.h"
+#include "jemalloc/internal/tcache_externs.h"
+#include "jemalloc/internal/peak_event.h"
+#include "jemalloc/internal/prof_externs.h"
+#include "jemalloc/internal/prof_threshold.h"
+#include "jemalloc/internal/stats.h"
+
+
+/* Table of all the thread events.
+ *  Events share interface, but internally they will know thier
+ *  data layout in tsd.
+ */
+te_base_cb_t *te_alloc_handlers[te_alloc_count] = {
+#ifdef JEMALLOC_PROF
+    &prof_sample_te_handler,
+#endif
+    &stats_interval_te_handler,
+#ifdef JEMALLOC_STATS
+    &prof_threshold_te_handler,
+#endif
+    &tcache_gc_te_handler,
+#ifdef JEMALLOC_STATS
+    &peak_te_handler,
+#endif
+};
+
+te_base_cb_t *te_dalloc_handlers[te_dalloc_count] = {
+	&tcache_gc_te_handler,
+#ifdef JEMALLOC_STATS
+	&peak_te_handler,
+#endif
+};
diff --git a/test/unit/thread_event.c b/test/unit/thread_event.c
index e0b88a92..8b4fb1d6 100644
--- a/test/unit/thread_event.c
+++ b/test/unit/thread_event.c
@@ -8,12 +8,11 @@ TEST_BEGIN(test_next_event_fast) {
 	te_ctx_last_event_set(&ctx, 0);
 	te_ctx_current_bytes_set(&ctx, TE_NEXT_EVENT_FAST_MAX - 8U);
 	te_ctx_next_event_set(tsd, &ctx, TE_NEXT_EVENT_FAST_MAX);
-#define E(event, condition, is_alloc)					\
-	if (is_alloc && condition) {					\
-		event##_event_wait_set(tsd, TE_NEXT_EVENT_FAST_MAX);	\
+
+	uint64_t *waits = tsd_te_datap_get_unsafe(tsd)->alloc_wait;
+	for (size_t i = 0; i < te_alloc_count; i++) {
+		waits[i] = TE_NEXT_EVENT_FAST_MAX;
 	}
-	ITERATE_OVER_ALL_EVENTS
-#undef E
 
 	/* Test next_event_fast rolling back to 0. */
 	void *p = malloc(16U);

From 015b017973d47f3047f8f4d7349c937fefd30f99 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 28 Mar 2025 07:35:53 -0700
Subject: [PATCH 2513/2608] [thread_event] Add support for user events in
 thread events when stats are enabled

---
 include/jemalloc/internal/tcache_externs.h    |   8 -
 include/jemalloc/internal/thread_event.h      |   6 +-
 .../jemalloc/internal/thread_event_registry.h |  96 ++++++--
 include/jemalloc/internal/witness.h           |   1 +
 src/ctl.c                                     |  19 ++
 src/jemalloc.c                                |   1 +
 src/peak_event.c                              |   4 +-
 src/prof.c                                    |  10 +-
 src/prof_threshold.c                          |   6 +-
 src/stats.c                                   |   5 +-
 src/tcache.c                                  |   4 +-
 src/thread_event.c                            |  98 ++++++--
 src/thread_event_registry.c                   | 233 +++++++++++++++++-
 test/unit/mallctl.c                           |  40 ++-
 test/unit/thread_event.c                      |  20 ++
 15 files changed, 470 insertions(+), 81 deletions(-)

diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 024314fe..76d601c3 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -82,14 +82,6 @@ void tcache_enabled_set(tsd_t *tsd, bool enabled);
 
 void tcache_assert_initialized(tcache_t *tcache);
 
-/* Only accessed by thread event. */
-uint64_t tcache_gc_new_event_wait(tsd_t *tsd);
-uint64_t tcache_gc_postponed_event_wait(tsd_t *tsd);
-void tcache_gc_event_handler(tsd_t *tsd, uint64_t elapsed);
-uint64_t tcache_gc_dalloc_new_event_wait(tsd_t *tsd);
-uint64_t tcache_gc_dalloc_postponed_event_wait(tsd_t *tsd);
-void tcache_gc_dalloc_event_handler(tsd_t *tsd, uint64_t elapsed);
-
 extern te_base_cb_t tcache_gc_te_handler;
 
 #endif /* JEMALLOC_INTERNAL_TCACHE_EXTERNS_H */
diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index e9631cbd..bf9ca3cc 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -48,10 +48,12 @@ void te_assert_invariants_debug(tsd_t *tsd);
 void te_event_trigger(tsd_t *tsd, te_ctx_t *ctx);
 void te_recompute_fast_threshold(tsd_t *tsd);
 void tsd_te_init(tsd_t *tsd);
+void te_adjust_thresholds_helper(tsd_t *tsd, te_ctx_t *ctx,
+    uint64_t wait);
 
 /* List of all thread event counters. */
-#define ITERATE_OVER_ALL_COUNTERS                                       \
-        C(thread_allocated)						\
+#define ITERATE_OVER_ALL_COUNTERS					\
+	C(thread_allocated)						\
 	C(thread_allocated_last_event)					\
 	C(prof_sample_last_event)					\
 	C(stats_interval_last_event)
diff --git a/include/jemalloc/internal/thread_event_registry.h b/include/jemalloc/internal/thread_event_registry.h
index aee7a4f2..1957e727 100644
--- a/include/jemalloc/internal/thread_event_registry.h
+++ b/include/jemalloc/internal/thread_event_registry.h
@@ -2,37 +2,41 @@
 #define JEMALLOC_INTERNAL_THREAD_EVENT_REGISTRY_H
 
 #include "jemalloc/internal/jemalloc_preamble.h"
-#include "jemalloc/internal/tsd.h"
+#include "jemalloc/internal/tsd_types.h"
+
+#define TE_MAX_USER_EVENTS 4
 
 /* "te" is short for "thread_event" */
 enum te_alloc_e {
 #ifdef JEMALLOC_PROF
-    te_alloc_prof_sample,
+	te_alloc_prof_sample,
 #endif
-    te_alloc_stats_interval,
+	te_alloc_stats_interval,
+	te_alloc_tcache_gc,
 #ifdef JEMALLOC_STATS
-    te_alloc_prof_threshold,
+	te_alloc_prof_threshold,
+	te_alloc_peak,
 #endif
-    te_alloc_tcache_gc,
-#ifdef JEMALLOC_STATS
-    te_alloc_peak,
-    te_alloc_last = te_alloc_peak,
-#else
-    te_alloc_last = te_alloc_tcache_gc,
-#endif
-    te_alloc_count = te_alloc_last + 1
+	te_alloc_user0,
+	te_alloc_user1,
+	te_alloc_user2,
+	te_alloc_user3,
+	te_alloc_last = te_alloc_user3,
+	te_alloc_count = te_alloc_last + 1
 };
 typedef enum te_alloc_e te_alloc_t;
 
 enum te_dalloc_e {
-    te_dalloc_tcache_gc,
+	te_dalloc_tcache_gc,
 #ifdef JEMALLOC_STATS
-    te_dalloc_peak,
-    te_dalloc_last = te_dalloc_peak,
-#else
-    te_dalloc_last = te_dalloc_tcache_gc,
+	te_dalloc_peak,
 #endif
-    te_dalloc_count = te_dalloc_last + 1
+	te_dalloc_user0,
+	te_dalloc_user1,
+	te_dalloc_user2,
+	te_dalloc_user3,
+	te_dalloc_last = te_dalloc_user3,
+	te_dalloc_count = te_dalloc_last + 1
 };
 typedef enum te_dalloc_e te_dalloc_t;
 
@@ -42,17 +46,63 @@ struct te_data_s {
 	uint64_t alloc_wait[te_alloc_count];
 	uint64_t dalloc_wait[te_dalloc_count];
 };
-#define TE_DATA_INITIALIZER { {0}, {0} }
+#define TE_DATA_INITIALIZER                                                    \
+	{                                                                      \
+		{0}, {                                                         \
+			0                                                      \
+		}                                                              \
+	}
+
+/*
+ * Check if user event is installed, installed and enabled, or not
+ * installed.
+ *
+ */
+enum te_enabled_e { te_enabled_not_installed, te_enabled_yes, te_enabled_no };
+typedef enum te_enabled_e te_enabled_t;
 
 typedef struct te_base_cb_s te_base_cb_t;
 struct te_base_cb_s {
-    bool (*enabled)(void);
-    uint64_t (*new_event_wait)(tsd_t *tsd);
-    uint64_t (*postponed_event_wait)(tsd_t *tsd);
-    void (*event_handler)(tsd_t *tsd);
+	te_enabled_t (*enabled)(void);
+	uint64_t (*new_event_wait)(tsd_t *tsd);
+	uint64_t (*postponed_event_wait)(tsd_t *tsd);
+	void (*event_handler)(tsd_t *tsd);
 };
 
 extern te_base_cb_t *te_alloc_handlers[te_alloc_count];
 extern te_base_cb_t *te_dalloc_handlers[te_dalloc_count];
 
+bool experimental_thread_events_boot(void);
+
+/*
+ *  User callback for thread events
+ *
+ *  is_alloc - true if event is allocation, false if event is free
+ *  tallocated  - number of bytes allocated on current thread so far
+ *  tdallocated - number of bytes allocated on current thread so far
+ */
+typedef void (*user_event_cb_t)(
+    bool is_alloc, uint64_t tallocated, uint64_t tdallocated);
+
+typedef struct user_hook_object_s user_hook_object_t;
+struct user_hook_object_s {
+	user_event_cb_t callback;
+	uint64_t interval;
+	bool is_alloc_only;
+};
+
+/*
+ * register user callback
+ *
+ * return zero if event was registered
+ *
+ * if interval is zero or callback is NULL, or
+ * no more slots are available event will not be registered
+ * and non-zero value will be returned
+ *
+ */
+int te_register_user_handler(tsdn_t *tsdn, user_hook_object_t *te_uobj);
+
+te_enabled_t te_user_event_enabled(size_t ue_idx, bool is_alloc);
+
 #endif /* JEMALLOC_INTERNAL_THREAD_EVENT_REGISTRY_H */
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index afee1246..acf7860d 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -78,6 +78,7 @@ enum witness_rank_e {
 	WITNESS_RANK_PROF_RECENT_ALLOC = WITNESS_RANK_LEAF,
 	WITNESS_RANK_PROF_STATS = WITNESS_RANK_LEAF,
 	WITNESS_RANK_PROF_THREAD_ACTIVE_INIT = WITNESS_RANK_LEAF,
+	WITNESS_RANK_THREAD_EVENTS_USER = WITNESS_RANK_LEAF,
 };
 typedef enum witness_rank_e witness_rank_t;
 
diff --git a/src/ctl.c b/src/ctl.c
index a30adc52..4f06363a 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -362,6 +362,7 @@ CTL_PROTO(experimental_hooks_prof_dump)
 CTL_PROTO(experimental_hooks_prof_sample)
 CTL_PROTO(experimental_hooks_prof_sample_free)
 CTL_PROTO(experimental_hooks_prof_threshold)
+CTL_PROTO(experimental_hooks_thread_event)
 CTL_PROTO(experimental_hooks_safety_check_abort)
 CTL_PROTO(experimental_thread_activity_callback)
 CTL_PROTO(experimental_utilization_query)
@@ -976,6 +977,7 @@ static const ctl_named_node_t experimental_hooks_node[] = {
 	{NAME("prof_sample_free"),	CTL(experimental_hooks_prof_sample_free)},
 	{NAME("prof_threshold"),	CTL(experimental_hooks_prof_threshold)},
 	{NAME("safety_check_abort"),	CTL(experimental_hooks_safety_check_abort)},
+	{NAME("thread_event"),	CTL(experimental_hooks_thread_event)},
 };
 
 static const ctl_named_node_t experimental_thread_node[] = {
@@ -3818,6 +3820,23 @@ label_return:
 	return ret;
 }
 
+static int
+experimental_hooks_thread_event_ctl(tsd_t *tsd, const size_t *mib,
+	size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int ret;
+
+	if (newp == NULL) {
+		ret = EINVAL;
+		goto label_return;
+	}
+
+	user_hook_object_t t_new = {NULL, 0, false};
+	WRITE(t_new, user_hook_object_t);
+	ret = te_register_user_handler(tsd_tsdn(tsd), &t_new);
+
+label_return:
+	return ret;
+}
 
 /* For integration test purpose only.  No plan to move out of experimental. */
 static int
diff --git a/src/jemalloc.c b/src/jemalloc.c
index d958c8ca..a4509e68 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1965,6 +1965,7 @@ malloc_init_hard_a0_locked(void) {
 		return true;
 	}
 	hook_boot();
+	experimental_thread_events_boot();
 	/*
 	 * Create enough scaffolding to allow recursive allocation in
 	 * malloc_ncpus().
diff --git a/src/peak_event.c b/src/peak_event.c
index 430bfdea..e7f3ced6 100644
--- a/src/peak_event.c
+++ b/src/peak_event.c
@@ -58,9 +58,9 @@ peak_event_handler(tsd_t *tsd) {
 	peak_event_activity_callback(tsd);
 }
 
-static bool
+static te_enabled_t
 peak_event_enabled(void) {
-	return config_stats;
+	return config_stats ? te_enabled_yes : te_enabled_no;
 }
 
 /* Handles alloc and dalloc */
diff --git a/src/prof.c b/src/prof.c
index 94eddb6d..ec13afbd 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -306,11 +306,6 @@ prof_sample_event_handler(tsd_t *tsd) {
 	}
 }
 
-static bool
-prof_sample_enabled(void) {
-	return config_prof && opt_prof;
-}
-
 uint64_t
 tsd_prof_sample_event_wait_get(tsd_t *tsd) {
 #ifdef JEMALLOC_PROF
@@ -321,6 +316,11 @@ tsd_prof_sample_event_wait_get(tsd_t *tsd) {
 #endif
 }
 
+static te_enabled_t
+prof_sample_enabled(void) {
+	return config_prof && opt_prof ? te_enabled_yes : te_enabled_no;
+}
+
 te_base_cb_t prof_sample_te_handler = {
 	.enabled = &prof_sample_enabled,
 	.new_event_wait = &prof_sample_new_event_wait,
diff --git a/src/prof_threshold.c b/src/prof_threshold.c
index 516b0bf6..0b5cb53c 100644
--- a/src/prof_threshold.c
+++ b/src/prof_threshold.c
@@ -27,7 +27,7 @@ prof_threshold_hook_get(void) {
 }
 
 /* Invoke callback for threshold reached */
-static void
+static inline void
 prof_threshold_update(tsd_t *tsd) {
 	prof_threshold_hook_t prof_threshold_hook = prof_threshold_hook_get();
 	if (prof_threshold_hook == NULL) {
@@ -56,9 +56,9 @@ prof_threshold_event_handler(tsd_t *tsd) {
 	prof_threshold_update(tsd);
 }
 
-static bool
+static te_enabled_t
 prof_threshold_enabled(void) {
-	return config_stats;
+	return config_stats ? te_enabled_yes : te_enabled_no;
 }
 
 te_base_cb_t prof_threshold_te_handler = {
diff --git a/src/stats.c b/src/stats.c
index efc73223..b2a00319 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -2141,9 +2141,9 @@ stats_interval_event_handler(tsd_t *tsd) {
 	}
 }
 
-static bool
+static te_enabled_t
 stats_interval_enabled(void) {
-	return opt_stats_interval >= 0;
+	return opt_stats_interval >= 0 ? te_enabled_yes : te_enabled_no;
 }
 
 te_base_cb_t stats_interval_te_handler = {
@@ -2153,7 +2153,6 @@ te_base_cb_t stats_interval_te_handler = {
 	.event_handler = &stats_interval_event_handler,
 };
 
-
 bool
 stats_boot(void) {
 	uint64_t stats_interval;
diff --git a/src/tcache.c b/src/tcache.c
index 36af7d97..0154403d 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -1901,9 +1901,9 @@ void tcache_assert_initialized(tcache_t *tcache) {
 	assert(!cache_bin_still_zero_initialized(&tcache->bins[0]));
 }
 
-static bool
+static te_enabled_t
 tcache_gc_enabled(void) {
-    return (opt_tcache_gc_incr_bytes > 0);
+	return (opt_tcache_gc_incr_bytes > 0) ? te_enabled_yes : te_enabled_no;
 }
 
 /* Handles alloc and dalloc the same way */
diff --git a/src/thread_event.c b/src/thread_event.c
index 0b1adcc1..496c16be 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -10,13 +10,13 @@ te_ctx_has_active_events(te_ctx_t *ctx) {
 	assert(config_debug);
 	if (ctx->is_alloc) {
 		for (int i = 0; i < te_alloc_count; ++i) {
-			if (te_alloc_handlers[i]->enabled()) {
+			if (te_enabled_yes == te_alloc_handlers[i]->enabled()) {
 				return true;
 			}
 		}
 	} else {
 		for (int i = 0; i < te_dalloc_count; ++i) {
-			if (te_dalloc_handlers[i]->enabled()) {
+			if (te_enabled_yes == te_dalloc_handlers[i]->enabled()) {
 				return true;
 			}
 		}
@@ -26,14 +26,17 @@ te_ctx_has_active_events(te_ctx_t *ctx) {
 
 static uint64_t
 te_next_event_compute(tsd_t *tsd, bool is_alloc) {
-	te_base_cb_t **handlers = is_alloc ? te_alloc_handlers : te_dalloc_handlers;
-	uint64_t *waits = is_alloc ? tsd_te_datap_get_unsafe(tsd)->alloc_wait : tsd_te_datap_get_unsafe(tsd)->dalloc_wait;
+	te_base_cb_t **handlers = is_alloc ?
+	    te_alloc_handlers : te_dalloc_handlers;
+	uint64_t *waits = is_alloc ?
+	    tsd_te_datap_get_unsafe(tsd)->alloc_wait :
+	    tsd_te_datap_get_unsafe(tsd)->dalloc_wait;
 	int count = is_alloc ? te_alloc_count : te_dalloc_count;
-	
+
 	uint64_t wait = TE_MAX_START_WAIT;
 
 	for (int i = 0; i < count; i++) {
-		if (handlers[i]->enabled()) {
+		if (te_enabled_yes == handlers[i]->enabled()) {
 			uint64_t ev_wait = waits[i];
 			assert(ev_wait <= TE_MAX_START_WAIT);
 			if (ev_wait > 0U && ev_wait < wait) {
@@ -41,7 +44,6 @@ te_next_event_compute(tsd_t *tsd, bool is_alloc) {
 			}
 		}
 	}
-
 	return wait;
 }
 
@@ -64,6 +66,19 @@ te_assert_invariants_impl(tsd_t *tsd, te_ctx_t *ctx) {
 
 	/* The subtraction is intentionally susceptible to underflow. */
 	assert(current_bytes - last_event < interval);
+
+	/* This computation assumes that event did not become active in the
+	 * time since the last trigger. This works fine if waits for inactive
+	 * events are initialized with 0 as those are ignored
+	 * If we wanted to initialize user events to anything other than
+	 * zero, computation would take it into account and min_wait could
+	 * be smaller than interval (as it was not part of the calc setting
+	 * next_event).
+	 *
+	 * If we ever wanted to unregister the events assert would also
+	 * need to account for the possibility that next_event was set, by
+	 * event that is now gone
+	 */
 	uint64_t min_wait = te_next_event_compute(tsd, te_ctx_is_alloc(ctx));
 	/*
 	 * next_event should have been pushed up only except when no event is
@@ -161,8 +176,8 @@ te_recompute_fast_threshold(tsd_t *tsd) {
 	}
 }
 
-static void
-te_adjust_thresholds_helper(tsd_t *tsd, te_ctx_t *ctx,
+static inline void
+te_adjust_thresholds_impl(tsd_t *tsd, te_ctx_t *ctx,
     uint64_t wait) {
 	/*
 	 * The next threshold based on future events can only be adjusted after
@@ -175,14 +190,21 @@ te_adjust_thresholds_helper(tsd_t *tsd, te_ctx_t *ctx,
 	    TE_MAX_INTERVAL ? wait : TE_MAX_INTERVAL);
 	te_ctx_next_event_set(tsd, ctx, next_event);
 }
+void
+te_adjust_thresholds_helper(tsd_t *tsd, te_ctx_t *ctx,
+    uint64_t wait) {
+	te_adjust_thresholds_impl(tsd, ctx, wait);
+}
 
 static void
 te_init_waits(tsd_t *tsd, uint64_t *wait, bool is_alloc) {
 	te_base_cb_t **handlers = is_alloc ? te_alloc_handlers : te_dalloc_handlers;
-	uint64_t *waits = is_alloc ? tsd_te_datap_get_unsafe(tsd)->alloc_wait : tsd_te_datap_get_unsafe(tsd)->dalloc_wait;
+	uint64_t *waits = is_alloc ?
+	    tsd_te_datap_get_unsafe(tsd)->alloc_wait :
+	    tsd_te_datap_get_unsafe(tsd)->dalloc_wait;
 	int count = is_alloc ? te_alloc_count : te_dalloc_count;
 	for (int i = 0; i < count; i++) {
-		if (handlers[i]->enabled()) {
+		if (te_enabled_yes == handlers[i]->enabled()) {
 			uint64_t ev_wait = handlers[i]->new_event_wait(tsd);
 			assert(ev_wait > 0);
 			waits[i] = ev_wait;
@@ -229,7 +251,8 @@ te_update_alloc_events(tsd_t *tsd, te_base_cb_t **to_trigger,
 	size_t nto_trigger = 0;
 	uint64_t *waits = tsd_te_datap_get_unsafe(tsd)->alloc_wait;
 	if (opt_tcache_gc_incr_bytes > 0) {
-		assert(te_alloc_handlers[te_alloc_tcache_gc]->enabled());
+		assert(te_enabled_yes ==
+		       te_alloc_handlers[te_alloc_tcache_gc]->enabled());
 		if (te_update_wait(tsd, accumbytes, allow,
 				   &waits[te_alloc_tcache_gc], wait,
 				   te_alloc_handlers[te_alloc_tcache_gc],
@@ -240,7 +263,8 @@ te_update_alloc_events(tsd_t *tsd, te_base_cb_t **to_trigger,
 	}
 #ifdef JEMALLOC_PROF
         if (opt_prof) {
-		assert(te_alloc_handlers[te_alloc_prof_sample]->enabled());
+		assert(te_enabled_yes ==
+		       te_alloc_handlers[te_alloc_prof_sample]->enabled());
 		if(te_update_wait(tsd, accumbytes, allow,
 				  &waits[te_alloc_prof_sample], wait,
 				  te_alloc_handlers[te_alloc_prof_sample], 0)) {
@@ -255,27 +279,44 @@ te_update_alloc_events(tsd_t *tsd, te_base_cb_t **to_trigger,
 				   wait,
 				   te_alloc_handlers[te_alloc_stats_interval],
 				   stats_interval_accum_batch)) {
-			assert(te_alloc_handlers[te_alloc_stats_interval]->enabled());
+			assert(te_enabled_yes ==
+			       te_alloc_handlers[te_alloc_stats_interval]->enabled());
 			to_trigger[nto_trigger++] =
 			    te_alloc_handlers[te_alloc_stats_interval];
 		}
 	}
 
 #ifdef JEMALLOC_STATS
-	assert(te_alloc_handlers[te_alloc_peak]->enabled());
+	assert(te_enabled_yes == te_alloc_handlers[te_alloc_peak]->enabled());
  	if(te_update_wait(tsd, accumbytes, allow, &waits[te_alloc_peak], wait,
 			  te_alloc_handlers[te_alloc_peak], PEAK_EVENT_WAIT)) {
 		to_trigger[nto_trigger++] = te_alloc_handlers[te_alloc_peak];
  	}
 
-        assert(te_alloc_handlers[te_alloc_prof_threshold]->enabled());
+        assert(te_enabled_yes ==
+	       te_alloc_handlers[te_alloc_prof_threshold]->enabled());
         if(te_update_wait(tsd, accumbytes, allow,
 			  &waits[te_alloc_prof_threshold], wait,
 			  te_alloc_handlers[te_alloc_prof_threshold],
 			  1 << opt_experimental_lg_prof_threshold)) {
-		to_trigger[nto_trigger++] = te_alloc_handlers[te_alloc_prof_threshold];
+		to_trigger[nto_trigger++] =
+		    te_alloc_handlers[te_alloc_prof_threshold];
  	}
 #endif
+
+	for (te_alloc_t ue = te_alloc_user0; ue <= te_alloc_user3; ue++) {
+		te_enabled_t status =
+		    te_user_event_enabled(ue - te_alloc_user0, true);
+		if (status == te_enabled_not_installed) {
+			break;
+		} else if (status == te_enabled_yes) {
+			if (te_update_wait(tsd, accumbytes, allow, &waits[ue],
+					   wait, te_alloc_handlers[ue], 0)) {
+				to_trigger[nto_trigger++] =
+				    te_alloc_handlers[ue];
+			}
+		}
+	}
 	return nto_trigger;
 }
 
@@ -285,7 +326,8 @@ te_update_dalloc_events(tsd_t *tsd, te_base_cb_t **to_trigger, uint64_t accumbyt
 	size_t nto_trigger = 0;
 	uint64_t *waits = tsd_te_datap_get_unsafe(tsd)->dalloc_wait;
 	if (opt_tcache_gc_incr_bytes > 0) {
-		assert(te_dalloc_handlers[te_dalloc_tcache_gc]->enabled());
+		assert(te_enabled_yes ==
+		       te_dalloc_handlers[te_dalloc_tcache_gc]->enabled());
 		if (te_update_wait(tsd, accumbytes, allow,
 				   &waits[te_dalloc_tcache_gc], wait,
 				   te_dalloc_handlers[te_dalloc_tcache_gc],
@@ -295,12 +337,26 @@ te_update_dalloc_events(tsd_t *tsd, te_base_cb_t **to_trigger, uint64_t accumbyt
 		}
         }
 #ifdef JEMALLOC_STATS
-	assert(te_dalloc_handlers[te_dalloc_peak]->enabled());
+	assert(te_enabled_yes == te_dalloc_handlers[te_dalloc_peak]->enabled());
         if(te_update_wait(tsd, accumbytes, allow, &waits[te_dalloc_peak], wait,
-			  te_dalloc_handlers[te_dalloc_peak], PEAK_EVENT_WAIT)) {
+			  te_dalloc_handlers[te_dalloc_peak],
+			  PEAK_EVENT_WAIT)) {
 		to_trigger[nto_trigger++] = te_dalloc_handlers[te_dalloc_peak];
  	}
 #endif
+	for (te_dalloc_t ue = te_dalloc_user0; ue <= te_dalloc_user3; ue++) {
+		te_enabled_t status =
+		    te_user_event_enabled(ue - te_dalloc_user0, false);
+		if (status == te_enabled_not_installed) {
+			break;
+		} else if (status == te_enabled_yes) {
+			if (te_update_wait(tsd, accumbytes, allow, &waits[ue],
+					   wait, te_dalloc_handlers[ue], 0)) {
+				to_trigger[nto_trigger++] =
+				    te_dalloc_handlers[ue];
+			}
+		}
+	}
 	return nto_trigger;
 }
 
@@ -362,7 +418,7 @@ te_init(tsd_t *tsd, bool is_alloc) {
 	uint64_t wait = TE_MAX_START_WAIT;
 	te_init_waits(tsd, &wait, is_alloc);
 
-	te_adjust_thresholds_helper(tsd, &ctx, wait);
+	te_adjust_thresholds_impl(tsd, &ctx, wait);
 }
 
 void
diff --git a/src/thread_event_registry.c b/src/thread_event_registry.c
index 7543cfda..f5408178 100644
--- a/src/thread_event_registry.c
+++ b/src/thread_event_registry.c
@@ -3,30 +3,160 @@
 
 #include "jemalloc/internal/thread_event.h"
 #include "jemalloc/internal/thread_event_registry.h"
-#include "jemalloc/internal/thread_event_registry.h"
 #include "jemalloc/internal/tcache_externs.h"
 #include "jemalloc/internal/peak_event.h"
 #include "jemalloc/internal/prof_externs.h"
 #include "jemalloc/internal/prof_threshold.h"
 #include "jemalloc/internal/stats.h"
 
+static malloc_mutex_t uevents_mu;
 
-/* Table of all the thread events.
- *  Events share interface, but internally they will know thier
- *  data layout in tsd.
+bool
+experimental_thread_events_boot(void) {
+	return malloc_mutex_init(&uevents_mu, "thread_events",
+	    WITNESS_RANK_THREAD_EVENTS_USER, malloc_mutex_rank_exclusive);
+}
+
+#define TE_REGISTER_ERRCODE_FULL_SLOTS -1
+#define TE_REGISTER_ERRCODE_ALREADY_REGISTERED -2
+
+static user_hook_object_t uevents_storage[TE_MAX_USER_EVENTS] = {
+    {NULL, 0, false},
+};
+
+static atomic_p_t uevent_obj_p[TE_MAX_USER_EVENTS] = {
+    NULL,
+};
+
+static inline bool
+user_object_eq(user_hook_object_t *lhs, user_hook_object_t *rhs) {
+	assert(lhs != NULL && rhs != NULL);
+
+	return lhs->callback == rhs->callback && lhs->interval == rhs->interval
+	    && lhs->is_alloc_only == rhs->is_alloc_only;
+}
+
+/*
+ * Return slot number that event is registered at on success
+ *     it will be [0, TE_MAX_USER_EVENTS)
+ * Return negative value on some error
  */
+static inline int
+te_register_user_handler_locked(user_hook_object_t *new_obj) {
+	/* Attempt to find the free slot in global register */
+	for (int i = 0; i < TE_MAX_USER_EVENTS; ++i) {
+		user_hook_object_t *p = (user_hook_object_t *)atomic_load_p(
+		    &uevent_obj_p[i], ATOMIC_ACQUIRE);
+
+		if (p && user_object_eq(p, new_obj)) {
+			/* Same callback and interval are registered - no error. */
+			return TE_REGISTER_ERRCODE_ALREADY_REGISTERED;
+		} else if (p == NULL) {
+			/* Empty slot */
+			uevents_storage[i] = *new_obj;
+			atomic_fence(ATOMIC_SEQ_CST);
+			atomic_store_p(&uevent_obj_p[i], &uevents_storage[i],
+			    ATOMIC_RELEASE);
+			return i;
+		}
+	}
+
+	return TE_REGISTER_ERRCODE_FULL_SLOTS;
+}
+
+static inline user_hook_object_t *
+uobj_get(size_t cb_idx) {
+	assert(cb_idx < TE_MAX_USER_EVENTS);
+	return (user_hook_object_t *)atomic_load_p(
+	    &uevent_obj_p[cb_idx], ATOMIC_ACQUIRE);
+}
+
+te_enabled_t
+te_user_event_enabled(size_t ue_idx, bool is_alloc) {
+	assert(ue_idx < TE_MAX_USER_EVENTS);
+	user_hook_object_t *obj = uobj_get(ue_idx);
+	if (!obj) {
+		return te_enabled_not_installed;
+	}
+	if (is_alloc || !obj->is_alloc_only) {
+		return te_enabled_yes;
+	}
+	return te_enabled_no;
+}
+
+static inline uint64_t
+new_event_wait(size_t cb_idx) {
+	user_hook_object_t *obj = uobj_get(cb_idx);
+	/* Enabled should have guarded it */
+	assert(obj);
+	return obj->interval;
+}
+
+static uint64_t
+postponed_event_wait(tsd_t *tsd) {
+	return TE_MIN_START_WAIT;
+}
+
+static inline void
+handler_wrapper(tsd_t *tsd, bool is_alloc, size_t cb_idx) {
+	user_hook_object_t *obj = uobj_get(cb_idx);
+	/* Enabled should have guarded it */
+	assert(obj);
+	uint64_t alloc = tsd_thread_allocated_get(tsd);
+	uint64_t dalloc = tsd_thread_deallocated_get(tsd);
+
+	pre_reentrancy(tsd, NULL);
+	obj->callback(is_alloc, alloc, dalloc);
+	post_reentrancy(tsd);
+}
+
+#define TE_USER_HANDLER_BINDING_IDX(i)                                         \
+	static te_enabled_t te_user_alloc_enabled##i(void) {                   \
+		return te_user_event_enabled(i, true);                         \
+	}                                                                      \
+	static te_enabled_t te_user_dalloc_enabled##i(void) {                  \
+		return te_user_event_enabled(i, false);                        \
+	}                                                                      \
+	static uint64_t te_user_new_event_wait_##i(tsd_t *tsd) {               \
+		return new_event_wait(i);                                      \
+	}                                                                      \
+	static void te_user_alloc_handler_call##i(tsd_t *tsd) {                \
+		handler_wrapper(tsd, true, i);                                 \
+	}                                                                      \
+	static void te_user_dalloc_handler_call##i(tsd_t *tsd) {               \
+		handler_wrapper(tsd, false, i);                                \
+	}                                                                      \
+	static te_base_cb_t user_alloc_handler##i = {                          \
+	    .enabled = &te_user_alloc_enabled##i,                              \
+	    .new_event_wait = &te_user_new_event_wait_##i,                     \
+	    .postponed_event_wait = &postponed_event_wait,                     \
+	    .event_handler = &te_user_alloc_handler_call##i};                  \
+	static te_base_cb_t user_dalloc_handler##i = {                         \
+	    .enabled = &te_user_dalloc_enabled##i,                             \
+	    .new_event_wait = &te_user_new_event_wait_##i,                     \
+	    .postponed_event_wait = &postponed_event_wait,                     \
+	    .event_handler = &te_user_dalloc_handler_call##i}
+
+TE_USER_HANDLER_BINDING_IDX(0);
+TE_USER_HANDLER_BINDING_IDX(1);
+TE_USER_HANDLER_BINDING_IDX(2);
+TE_USER_HANDLER_BINDING_IDX(3);
+
+/* Table of all the thread events. */
 te_base_cb_t *te_alloc_handlers[te_alloc_count] = {
 #ifdef JEMALLOC_PROF
-    &prof_sample_te_handler,
+	&prof_sample_te_handler,
 #endif
-    &stats_interval_te_handler,
+	&stats_interval_te_handler,
+	&tcache_gc_te_handler,
 #ifdef JEMALLOC_STATS
-    &prof_threshold_te_handler,
-#endif
-    &tcache_gc_te_handler,
-#ifdef JEMALLOC_STATS
-    &peak_te_handler,
+	&prof_threshold_te_handler,
+	&peak_te_handler,
 #endif
+	&user_alloc_handler0,
+	&user_alloc_handler1,
+	&user_alloc_handler2,
+	&user_alloc_handler3
 };
 
 te_base_cb_t *te_dalloc_handlers[te_dalloc_count] = {
@@ -34,4 +164,85 @@ te_base_cb_t *te_dalloc_handlers[te_dalloc_count] = {
 #ifdef JEMALLOC_STATS
 	&peak_te_handler,
 #endif
+	&user_dalloc_handler0,
+	&user_dalloc_handler1,
+	&user_dalloc_handler2,
+	&user_dalloc_handler3
 };
+
+static inline bool
+te_update_tsd(tsd_t *tsd, uint64_t new_wait, size_t ue_idx, bool is_alloc) {
+	bool needs_recompute = false;
+	te_ctx_t ctx;
+	uint64_t next, current, cur_wait;
+
+	if (is_alloc) {
+		tsd_te_datap_get_unsafe(tsd)
+		    ->alloc_wait[te_alloc_user0 + ue_idx] = new_wait;
+	} else {
+		tsd_te_datap_get_unsafe(tsd)
+		    ->dalloc_wait[te_dalloc_user0 + ue_idx] = new_wait;
+	}
+	te_ctx_get(tsd, &ctx, is_alloc);
+
+	next = te_ctx_next_event_get(&ctx);
+	current = te_ctx_current_bytes_get(&ctx);
+	cur_wait = next - current;
+
+	if (new_wait < cur_wait) {
+		/*
+		 * Set last event to current (same as when te inits).  This
+		 * will make sure that all the invariants are correct, before
+		 * we adjust next_event and next_event fast.
+		 */
+		te_ctx_last_event_set(&ctx, te_ctx_current_bytes_get(&ctx));
+		te_adjust_thresholds_helper(tsd, &ctx, new_wait);
+		needs_recompute = true;
+	}
+	return needs_recompute;
+}
+
+static inline void
+te_recalculate_current_thread_data(tsdn_t *tsdn, int ue_idx, bool alloc_only) {
+	bool recompute = false;
+	/* we do not need lock to recalculate the events on the current thread */
+	assert(ue_idx < TE_MAX_USER_EVENTS);
+	tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
+	if (tsd) {
+		uint64_t new_wait = new_event_wait(ue_idx);
+		recompute = te_update_tsd(tsd, new_wait, ue_idx, true);
+		if (!alloc_only) {
+			recompute = te_update_tsd(tsd, new_wait, ue_idx, false)
+			    || recompute;
+		}
+
+		if (recompute) {
+			te_recompute_fast_threshold(tsd);
+		}
+	}
+}
+
+int
+te_register_user_handler(tsdn_t *tsdn, user_hook_object_t *te_uobj) {
+	int ret;
+	int reg_retcode;
+	if (!te_uobj || !te_uobj->callback || te_uobj->interval == 0) {
+		return EINVAL;
+	}
+
+	malloc_mutex_lock(tsdn, &uevents_mu);
+	reg_retcode = te_register_user_handler_locked(te_uobj);
+	malloc_mutex_unlock(tsdn, &uevents_mu);
+
+	if (reg_retcode >= 0) {
+		te_recalculate_current_thread_data(
+		    tsdn, reg_retcode, te_uobj->is_alloc_only);
+		ret = 0;
+	} else if (reg_retcode == TE_REGISTER_ERRCODE_ALREADY_REGISTERED) {
+		ret = 0;
+	} else {
+		ret = EINVAL;
+	}
+
+	return ret;
+}
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 68c3a705..838a4445 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -1347,6 +1347,43 @@ TEST_BEGIN(test_thread_activity_callback) {
 }
 TEST_END
 
+
+
+static unsigned nuser_thread_event_cb_calls;
+static void
+user_thread_event_cb(bool is_alloc, uint64_t tallocated, uint64_t tdallocated) {
+	(void)tdallocated;
+	(void)tallocated;
+	++nuser_thread_event_cb_calls;
+}
+static user_hook_object_t user_te_obj = {
+	.callback = user_thread_event_cb,
+	.interval = 100,
+	.is_alloc_only = false,
+};
+
+TEST_BEGIN(test_thread_event_hook) {
+	const size_t big_size = 10 * 1024 * 1024;
+	void *ptr;
+	int err;
+
+	unsigned current_calls = nuser_thread_event_cb_calls;
+	err = mallctl("experimental.hooks.thread_event", NULL, 0,
+	    &user_te_obj, sizeof(user_te_obj));
+	assert_d_eq(0, err, "");
+
+	err = mallctl("experimental.hooks.thread_event", NULL, 0,
+	    &user_te_obj, sizeof(user_te_obj));
+	assert_d_eq(0, err, "Not an error to provide object with same interval and cb");
+
+
+	ptr = mallocx(big_size, 0);
+	free(ptr);
+	expect_u64_lt(current_calls, nuser_thread_event_cb_calls, "");
+}
+TEST_END
+
+
 int
 main(void) {
 	return test(
@@ -1387,5 +1424,6 @@ main(void) {
 	    test_hooks_exhaustion,
 	    test_thread_idle,
 	    test_thread_peak,
-	    test_thread_activity_callback);
+	    test_thread_activity_callback,
+	    test_thread_event_hook);
 }
diff --git a/test/unit/thread_event.c b/test/unit/thread_event.c
index 8b4fb1d6..66d61cd2 100644
--- a/test/unit/thread_event.c
+++ b/test/unit/thread_event.c
@@ -1,5 +1,18 @@
 #include "test/jemalloc_test.h"
 
+static uint32_t nuser_hook_calls;
+static bool is_registered = false;
+static void
+test_cb(bool is_alloc, uint64_t tallocated, uint64_t tdallocated) {
+	++nuser_hook_calls;
+}
+
+static user_hook_object_t tobj = {
+	.callback = &test_cb,
+	.interval = 10,
+	.is_alloc_only = false
+};
+
 TEST_BEGIN(test_next_event_fast) {
 	tsd_t *tsd = tsd_fetch();
 	te_ctx_t ctx;
@@ -9,6 +22,12 @@ TEST_BEGIN(test_next_event_fast) {
 	te_ctx_current_bytes_set(&ctx, TE_NEXT_EVENT_FAST_MAX - 8U);
 	te_ctx_next_event_set(tsd, &ctx, TE_NEXT_EVENT_FAST_MAX);
 
+	if (!is_registered) {
+		is_registered = 0 == te_register_user_handler(tsd_tsdn(tsd), &tobj);
+	}
+	assert_true(is_registered || !config_stats, "Register user handler");
+	nuser_hook_calls = 0;
+
 	uint64_t *waits = tsd_te_datap_get_unsafe(tsd)->alloc_wait;
 	for (size_t i = 0; i < te_alloc_count; i++) {
 		waits[i] = TE_NEXT_EVENT_FAST_MAX;
@@ -16,6 +35,7 @@ TEST_BEGIN(test_next_event_fast) {
 
 	/* Test next_event_fast rolling back to 0. */
 	void *p = malloc(16U);
+	assert_true(nuser_hook_calls == 1 || !config_stats, "Expected alloc call");
 	assert_ptr_not_null(p, "malloc() failed");
 	free(p);
 

From c5547f9e64da41ccefa43d349b6bb79d09d5d63b Mon Sep 17 00:00:00 2001
From: "dzhao.ampere" <di.zhao@amperecomputing.com>
Date: Wed, 11 Jun 2025 15:15:25 +0800
Subject: [PATCH 2514/2608] test/unit/psset.c: fix SIGSEGV when PAGESIZE is
 large

When hugepage is enabled and PAGESIZE is large, the test could
ask for a stack size larger than user limit. Allocating the
memory instead can avoid the failure.

Closes: #2408
---
 test/unit/psset.c | 45 ++++++++++++++++++++++++++++++++++-----------
 1 file changed, 34 insertions(+), 11 deletions(-)

diff --git a/test/unit/psset.c b/test/unit/psset.c
index b15d9af3..c834e531 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -146,7 +146,7 @@ TEST_BEGIN(test_fill) {
 	hpdata_t pageslab;
 	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
 
-	edata_t alloc[HUGEPAGE_PAGES];
+	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
 	psset_t psset;
 	psset_init(&psset);
@@ -169,6 +169,8 @@ TEST_BEGIN(test_fill) {
 	edata_init_test(&extra_alloc);
 	err = test_psset_alloc_reuse(&psset, &extra_alloc, PAGE);
 	expect_true(err, "Alloc succeeded even though psset should be empty");
+
+	free(alloc);
 }
 TEST_END
 
@@ -180,7 +182,7 @@ TEST_BEGIN(test_reuse) {
 	hpdata_t pageslab;
 	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
 
-	edata_t alloc[HUGEPAGE_PAGES];
+	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
 	psset_t psset;
 	psset_init(&psset);
@@ -262,6 +264,8 @@ TEST_BEGIN(test_reuse) {
 	err = test_psset_alloc_reuse(&psset, &alloc[index_of_4], 4 * PAGE);
 	expect_false(err, "Should have been able to find alloc.");
 	edata_expect(&alloc[index_of_4], index_of_4, 4);
+
+	free(alloc);
 }
 TEST_END
 
@@ -273,7 +277,7 @@ TEST_BEGIN(test_evict) {
 	hpdata_t pageslab;
 	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
 
-	edata_t alloc[HUGEPAGE_PAGES];
+	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
 	psset_t psset;
 	psset_init(&psset);
@@ -297,6 +301,8 @@ TEST_BEGIN(test_evict) {
 
 	err = test_psset_alloc_reuse(&psset, &alloc[0], PAGE);
 	expect_true(err, "psset should be empty.");
+
+	free(alloc);
 }
 TEST_END
 
@@ -311,7 +317,9 @@ TEST_BEGIN(test_multi_pageslab) {
 	    (void *)((uintptr_t)PAGESLAB_ADDR + HUGEPAGE),
 	    PAGESLAB_AGE + 1);
 
-	edata_t alloc[2][HUGEPAGE_PAGES];
+	edata_t* alloc[2];
+	alloc[0] = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
+	alloc[1] = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
 	psset_t psset;
 	psset_init(&psset);
@@ -361,6 +369,9 @@ TEST_BEGIN(test_multi_pageslab) {
 	 */
 	err = test_psset_alloc_reuse(&psset, &alloc[1][0], 2 * PAGE);
 	expect_false(err, "Allocation should have succeeded");
+
+	free(alloc[0]);
+	free(alloc[1]);
 }
 TEST_END
 
@@ -368,7 +379,7 @@ TEST_BEGIN(test_stats_merged) {
 	hpdata_t pageslab;
 	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
 
-	edata_t alloc[HUGEPAGE_PAGES];
+	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
 	psset_t psset;
 	psset_init(&psset);
@@ -422,6 +433,8 @@ TEST_BEGIN(test_stats_merged) {
 	expect_zu_eq(1, psset.stats.merged.npageslabs, "");
 	expect_zu_eq(1, psset.stats.merged.nactive, "");
 	expect_zu_eq(0, psset.stats.merged.ndirty, "");
+
+	free(alloc);
 }
 TEST_END
 
@@ -432,7 +445,7 @@ TEST_BEGIN(test_stats_huge) {
 	hpdata_t pageslab;
 	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
 
-	edata_t alloc[HUGEPAGE_PAGES];
+	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
 	psset_t psset;
 	psset_init(&psset);
@@ -505,6 +518,8 @@ TEST_BEGIN(test_stats_huge) {
 		expect_zu_eq(0, psset.stats.slabs[huge].nactive, "");
 		expect_zu_eq(0, psset.stats.slabs[huge].ndirty, "");
 	}
+
+	free(alloc);
 }
 TEST_END
 
@@ -557,7 +572,7 @@ TEST_BEGIN(test_stats_fullness) {
 	hpdata_t pageslab;
 	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
 
-	edata_t alloc[HUGEPAGE_PAGES];
+	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
 	psset_t psset;
 	psset_init(&psset);
@@ -587,6 +602,8 @@ TEST_BEGIN(test_stats_fullness) {
 	stats_expect(&psset, 0);
 	psset_update_end(&psset, &pageslab);
 	stats_expect(&psset, 1);
+
+	free(alloc);
 }
 TEST_END
 
@@ -648,8 +665,8 @@ init_test_pageslabs(psset_t *psset, hpdata_t *pageslab,
 TEST_BEGIN(test_oldest_fit) {
 	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	bool err;
-	edata_t alloc[HUGEPAGE_PAGES];
-	edata_t worse_alloc[HUGEPAGE_PAGES];
+	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
+	edata_t *worse_alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
 	hpdata_t pageslab;
 	hpdata_t worse_pageslab;
@@ -666,6 +683,9 @@ TEST_BEGIN(test_oldest_fit) {
 	expect_false(err, "Nonempty psset failed page allocation");
 	expect_ptr_eq(&pageslab, edata_ps_get(&test_edata),
 	    "Allocated from the wrong pageslab");
+
+	free(alloc);
+	free(worse_alloc);
 }
 TEST_END
 
@@ -673,8 +693,8 @@ TEST_BEGIN(test_insert_remove) {
 	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	bool err;
 	hpdata_t *ps;
-	edata_t alloc[HUGEPAGE_PAGES];
-	edata_t worse_alloc[HUGEPAGE_PAGES];
+	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
+	edata_t *worse_alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
 	hpdata_t pageslab;
 	hpdata_t worse_pageslab;
@@ -713,6 +733,9 @@ TEST_BEGIN(test_insert_remove) {
 	psset_update_begin(&psset, &worse_pageslab);
 	err = test_psset_alloc_reuse(&psset, &alloc[HUGEPAGE_PAGES - 1], PAGE);
 	expect_true(err, "psset should be empty, but an alloc succeeded");
+
+	free(alloc);
+	free(worse_alloc);
 }
 TEST_END
 

From 95fc091b0f4f8d4e7a2209baf2e8411a21b234a4 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Fri, 13 Jun 2025 14:15:50 -0700
Subject: [PATCH 2515/2608] Update appveyor settings.

---
 .appveyor.yml | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index dedc7867..c74e89db 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -5,36 +5,42 @@ environment:
   - MSYSTEM: MINGW64
     CPU: x86_64
     MSVC: amd64
-    CONFIG_FLAGS: --enable-debug --enable-limit-usize-gap
+    CONFIG_FLAGS: --enable-debug
   - MSYSTEM: MINGW64
     CPU: x86_64
-    CONFIG_FLAGS: --enable-debug --enable-limit-usize-gap
+    CONFIG_FLAGS: --enable-debug
+    EXTRA_CFLAGS: "-fcommon"
   - MSYSTEM: MINGW32
     CPU: i686
     MSVC: x86
-    CONFIG_FLAGS: --enable-debug --enable-limit-usize-gap
+    CONFIG_FLAGS: --enable-debug
   - MSYSTEM: MINGW32
     CPU: i686
-    CONFIG_FLAGS: --enable-debug --enable-limit-usize-gap
+    CONFIG_FLAGS: --enable-debug
+    EXTRA_CFLAGS: "-fcommon"
   - MSYSTEM: MINGW64
     CPU: x86_64
     MSVC: amd64
-    CONFIG_FLAGS: --enable-limit-usize-gap
+    CONFIG_FLAGS:
   - MSYSTEM: MINGW64
     CPU: x86_64
-    CONFIG_FLAGS: --enable-limit-usize-gap
+    CONFIG_FLAGS:
+    EXTRA_CFLAGS: "-fcommon"
   - MSYSTEM: MINGW32
     CPU: i686
     MSVC: x86
-    CONFIG_FLAGS: --enable-limit-usize-gap
+    CONFIG_FLAGS:
   - MSYSTEM: MINGW32
     CPU: i686
-    CONFIG_FLAGS: --enable-limit-usize-gap
+    CONFIG_FLAGS:
+    EXTRA_CFLAGS: "-fcommon"
 
 install:
   - set PATH=c:\msys64\%MSYSTEM%\bin;c:\msys64\usr\bin;%PATH%
   - if defined MSVC call "c:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" %MSVC%
   - if defined MSVC pacman --noconfirm -Rsc mingw-w64-%CPU%-gcc gcc
+  - pacman --noconfirm -Syuu
+  - pacman --noconfirm -S autoconf
 
 build_script:
   - bash -c "autoconf"

From e350c715719efc7c13195c842e44c1f5ac8e28a7 Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@uh.edu>
Date: Fri, 13 Jun 2025 15:46:51 -0700
Subject: [PATCH 2516/2608] Remove --enable-limit-usize-gap for cirrus CI since
 the config-time option is removed.

---
 .cirrus.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.cirrus.yml b/.cirrus.yml
index 8051272c..585aa42f 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -40,7 +40,7 @@ task:
     # We don't perfectly track freebsd stdlib.h definitions.  This is fine when
     # we count as a system header, but breaks otherwise, like during these
     # tests.
-    - ./configure --with-jemalloc-prefix=ci_ --enable-limit-usize-gap ${DEBUG_CONFIG} ${PROF_CONFIG} ${UNCOMMON_CONFIG}
+    - ./configure --with-jemalloc-prefix=ci_ ${DEBUG_CONFIG} ${PROF_CONFIG} ${UNCOMMON_CONFIG}
     - export JFLAG=`sysctl -n kern.smp.cpus`
     - gmake -j${JFLAG}
     - gmake -j${JFLAG} tests

From a952a3b8b08a63609172c8c84cf6eb09de9fc7be Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Wed, 4 Jun 2025 13:28:37 -0700
Subject: [PATCH 2517/2608] Update the default value for
 opt_experimental_tcache_gc and opt_calloc_madvise_threshold

---
 include/jemalloc/internal/jemalloc_internal_types.h | 2 ++
 src/jemalloc.c                                      | 5 +++--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_types.h b/include/jemalloc/internal/jemalloc_internal_types.h
index 6a81f3cd..cddbfb65 100644
--- a/include/jemalloc/internal/jemalloc_internal_types.h
+++ b/include/jemalloc/internal/jemalloc_internal_types.h
@@ -145,4 +145,6 @@ typedef enum malloc_init_e malloc_init_t;
 	assert(sizeof(type) * (count) <= VARIABLE_ARRAY_SIZE_MAX);	\
 	VARIABLE_ARRAY_UNSAFE(type, name, count)
 
+#define CALLOC_MADVISE_THRESHOLD_DEFAULT (((size_t)1) << 23) /* 8 MB */
+
 #endif /* JEMALLOC_INTERNAL_TYPES_H */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index a4509e68..c6621a79 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -158,7 +158,7 @@ void (*JET_MUTABLE invalid_conf_abort)(void) = &abort;
 bool	opt_utrace = false;
 bool	opt_xmalloc = false;
 bool	opt_experimental_infallible_new = false;
-bool	opt_experimental_tcache_gc = false;
+bool	opt_experimental_tcache_gc = true;
 bool	opt_zero = false;
 unsigned	opt_narenas = 0;
 static fxp_t		opt_narenas_ratio = FXP_INIT_INT(4);
@@ -168,7 +168,8 @@ unsigned	ncpus;
 unsigned opt_debug_double_free_max_scan =
     SAFETY_CHECK_DOUBLE_FREE_MAX_SCAN_DEFAULT;
 
-size_t opt_calloc_madvise_threshold = 0;
+size_t opt_calloc_madvise_threshold =
+    CALLOC_MADVISE_THRESHOLD_DEFAULT;
 
 /* Protects arenas initialization. */
 static malloc_mutex_t arenas_lock;

From 6200e8987feb5eae198b95b14cd89d09695f7b3c Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Fri, 13 Jun 2025 12:31:12 -0700
Subject: [PATCH 2518/2608] Reformat the codebase with the clang-format 18.

---
 include/jemalloc/internal/activity_callback.h |    9 +-
 include/jemalloc/internal/arena_externs.h     |  136 +-
 include/jemalloc/internal/arena_inlines_b.h   |  252 +-
 include/jemalloc/internal/arena_stats.h       |   56 +-
 include/jemalloc/internal/arena_structs.h     |   35 +-
 include/jemalloc/internal/arena_types.h       |   26 +-
 include/jemalloc/internal/assert.h            |   74 +-
 include/jemalloc/internal/atomic.h            |   53 +-
 include/jemalloc/internal/atomic_c11.h        |   54 +-
 include/jemalloc/internal/atomic_gcc_atomic.h |  165 +-
 include/jemalloc/internal/atomic_gcc_sync.h   |   92 +-
 include/jemalloc/internal/atomic_msvc.h       |  151 +-
 .../internal/background_thread_externs.h      |   22 +-
 .../internal/background_thread_inlines.h      |    8 +-
 .../internal/background_thread_structs.h      |   26 +-
 include/jemalloc/internal/base.h              |   44 +-
 include/jemalloc/internal/batcher.h           |   14 +-
 include/jemalloc/internal/bin.h               |   24 +-
 include/jemalloc/internal/bin_info.h          |   12 +-
 include/jemalloc/internal/bin_stats.h         |   30 +-
 include/jemalloc/internal/bin_types.h         |    5 +-
 include/jemalloc/internal/bit_util.h          |   52 +-
 include/jemalloc/internal/bitmap.h            |  217 +-
 include/jemalloc/internal/buf_writer.h        |   24 +-
 include/jemalloc/internal/cache_bin.h         |  117 +-
 include/jemalloc/internal/ckh.h               |   10 +-
 include/jemalloc/internal/counter.h           |    2 +-
 include/jemalloc/internal/ctl.h               |  135 +-
 include/jemalloc/internal/decay.h             |   14 +-
 include/jemalloc/internal/ecache.h            |   16 +-
 include/jemalloc/internal/edata.h             |  281 +-
 include/jemalloc/internal/edata_cache.h       |   16 +-
 include/jemalloc/internal/ehooks.h            |   39 +-
 include/jemalloc/internal/emap.h              |   99 +-
 include/jemalloc/internal/emitter.h           |  106 +-
 include/jemalloc/internal/exp_grow.h          |    4 +-
 include/jemalloc/internal/extent.h            |   61 +-
 include/jemalloc/internal/extent_dss.h        |   18 +-
 include/jemalloc/internal/extent_mmap.h       |    4 +-
 include/jemalloc/internal/fb.h                |   29 +-
 include/jemalloc/internal/fxp.h               |    2 +-
 include/jemalloc/internal/hash.h              |  316 +-
 include/jemalloc/internal/hook.h              |   20 +-
 include/jemalloc/internal/hpa.h               |   15 +-
 include/jemalloc/internal/hpa_hooks.h         |    2 +-
 include/jemalloc/internal/hpa_utils.h         |   77 +-
 include/jemalloc/internal/hpdata.h            |   21 +-
 include/jemalloc/internal/inspect.h           |   10 +-
 .../internal/jemalloc_internal_decls.h        |  125 +-
 .../internal/jemalloc_internal_externs.h      |   74 +-
 .../internal/jemalloc_internal_inlines_a.h    |    8 +-
 .../internal/jemalloc_internal_inlines_b.h    |   23 +-
 .../internal/jemalloc_internal_inlines_c.h    |  357 +--
 .../internal/jemalloc_internal_macros.h       |  171 +-
 .../internal/jemalloc_internal_overrides.h    |    9 +-
 .../internal/jemalloc_internal_types.h        |  128 +-
 include/jemalloc/internal/large_externs.h     |   18 +-
 include/jemalloc/internal/lockedint.h         |   63 +-
 include/jemalloc/internal/log.h               |   66 +-
 include/jemalloc/internal/malloc_io.h         |   98 +-
 include/jemalloc/internal/mutex.h             |  188 +-
 include/jemalloc/internal/mutex_prof.h        |  112 +-
 include/jemalloc/internal/nstime.h            |   39 +-
 include/jemalloc/internal/pa.h                |   28 +-
 include/jemalloc/internal/pac.h               |   20 +-
 include/jemalloc/internal/pages.h             |   70 +-
 include/jemalloc/internal/pai.h               |   13 +-
 include/jemalloc/internal/peak.h              |    3 +-
 include/jemalloc/internal/peak_event.h        |    2 +-
 include/jemalloc/internal/ph.h                |  214 +-
 include/jemalloc/internal/prng.h              |   10 +-
 include/jemalloc/internal/prof_data.h         |   16 +-
 include/jemalloc/internal/prof_externs.h      |   95 +-
 include/jemalloc/internal/prof_hook.h         |    6 +-
 include/jemalloc/internal/prof_inlines.h      |   19 +-
 include/jemalloc/internal/prof_log.h          |    6 +-
 include/jemalloc/internal/prof_structs.h      |  106 +-
 include/jemalloc/internal/prof_sys.h          |   14 +-
 include/jemalloc/internal/prof_types.h        |   48 +-
 include/jemalloc/internal/psset.h             |    2 +-
 include/jemalloc/internal/ql.h                |  171 +-
 include/jemalloc/internal/qr.h                |   66 +-
 include/jemalloc/internal/quantum.h           |  148 +-
 include/jemalloc/internal/rb.h                |    2 +-
 include/jemalloc/internal/rtree.h             |  263 +-
 include/jemalloc/internal/rtree_tsd.h         |   19 +-
 include/jemalloc/internal/safety_check.h      |   27 +-
 include/jemalloc/internal/san.h               |   48 +-
 include/jemalloc/internal/san_bump.h          |    9 +-
 include/jemalloc/internal/sc.h                |   54 +-
 include/jemalloc/internal/sec.h               |   14 +-
 include/jemalloc/internal/sec_opts.h          |   22 +-
 include/jemalloc/internal/smoothstep.h        |  410 +--
 include/jemalloc/internal/spin.h              |    9 +-
 include/jemalloc/internal/stats.h             |   30 +-
 include/jemalloc/internal/sz.h                |   72 +-
 include/jemalloc/internal/tcache_externs.h    |   52 +-
 include/jemalloc/internal/tcache_inlines.h    |   56 +-
 include/jemalloc/internal/tcache_structs.h    |   30 +-
 include/jemalloc/internal/tcache_types.h      |   20 +-
 include/jemalloc/internal/test_hooks.h        |   24 +-
 include/jemalloc/internal/thread_event.h      |   31 +-
 .../jemalloc/internal/thread_event_registry.h |    4 +-
 include/jemalloc/internal/ticker.h            |   20 +-
 include/jemalloc/internal/tsd.h               |   66 +-
 include/jemalloc/internal/tsd_generic.h       |   47 +-
 include/jemalloc/internal/tsd_internals.h     |  201 +-
 .../internal/tsd_malloc_thread_cleanup.h      |    2 +-
 include/jemalloc/internal/tsd_tls.h           |    4 +-
 include/jemalloc/internal/tsd_types.h         |    4 +-
 include/jemalloc/internal/tsd_win.h           |   49 +-
 include/jemalloc/internal/typed_list.h        |   93 +-
 include/jemalloc/internal/util.h              |   55 +-
 include/jemalloc/internal/witness.h           |   86 +-
 include/msvc_compat/C99/stdint.h              |  302 +-
 include/msvc_compat/strings.h                 |   39 +-
 msvc/test_threads/test_threads.cpp            |  172 +-
 msvc/test_threads/test_threads_main.cpp       |    7 +-
 src/arena.c                                   |  504 ++-
 src/background_thread.c                       |  297 +-
 src/base.c                                    |  209 +-
 src/batcher.c                                 |   10 +-
 src/bin.c                                     |   10 +-
 src/bin_info.c                                |    8 +-
 src/bitmap.c                                  |   21 +-
 src/buf_writer.c                              |   15 +-
 src/cache_bin.c                               |   30 +-
 src/ckh.c                                     |  101 +-
 src/counter.c                                 |    2 +-
 src/ctl.c                                     | 2262 +++++++-------
 src/decay.c                                   |   71 +-
 src/ecache.c                                  |    2 +-
 src/edata.c                                   |    5 +-
 src/edata_cache.c                             |   15 +-
 src/ehooks.c                                  |   70 +-
 src/emap.c                                    |  105 +-
 src/eset.c                                    |  161 +-
 src/extent.c                                  |  416 +--
 src/extent_dss.c                              |   89 +-
 src/extent_mmap.c                             |    6 +-
 src/fxp.c                                     |   14 +-
 src/hook.c                                    |   87 +-
 src/hpa.c                                     |  227 +-
 src/hpa_hooks.c                               |   33 +-
 src/hpdata.c                                  |   62 +-
 src/inspect.c                                 |    6 +-
 src/jemalloc.c                                | 1499 ++++-----
 src/jemalloc_cpp.cpp                          |  109 +-
 src/large.c                                   |   84 +-
 src/log.c                                     |   14 +-
 src/malloc_io.c                               |  451 +--
 src/mutex.c                                   |   52 +-
 src/nstime.c                                  |   42 +-
 src/pa.c                                      |   32 +-
 src/pa_extra.c                                |   20 +-
 src/pac.c                                     |  186 +-
 src/pages.c                                   |  227 +-
 src/pai.c                                     |    6 +-
 src/peak_event.c                              |   12 +-
 src/prof.c                                    |  140 +-
 src/prof_data.c                               |  297 +-
 src/prof_log.c                                |  130 +-
 src/prof_recent.c                             |   79 +-
 src/prof_stack_range.c                        |  209 +-
 src/prof_stats.c                              |    4 +-
 src/prof_sys.c                                |  223 +-
 src/prof_threshold.c                          |   16 +-
 src/psset.c                                   |   67 +-
 src/rtree.c                                   |  125 +-
 src/safety_check.c                            |   23 +-
 src/san.c                                     |   31 +-
 src/san_bump.c                                |   35 +-
 src/sc.c                                      |   10 +-
 src/sec.c                                     |   76 +-
 src/stats.c                                   | 1045 +++----
 src/sz.c                                      |   14 +-
 src/tcache.c                                  |  456 +--
 src/thread_event.c                            |  192 +-
 src/thread_event_registry.c                   |   29 +-
 src/ticker.c                                  |   15 +-
 src/tsd.c                                     |  123 +-
 src/util.c                                    |    5 +-
 src/witness.c                                 |   14 +-
 src/zone.c                                    |  113 +-
 test/analyze/prof_bias.c                      |    8 +-
 test/analyze/rand.c                           |   64 +-
 test/analyze/sizes.c                          |    9 +-
 test/include/test/SFMT-alti.h                 |  186 +-
 test/include/test/SFMT-params.h               |   40 +-
 test/include/test/SFMT-params11213.h          |   88 +-
 test/include/test/SFMT-params1279.h           |   88 +-
 test/include/test/SFMT-params132049.h         |   88 +-
 test/include/test/SFMT-params19937.h          |   88 +-
 test/include/test/SFMT-params216091.h         |   88 +-
 test/include/test/SFMT-params2281.h           |   88 +-
 test/include/test/SFMT-params4253.h           |   88 +-
 test/include/test/SFMT-params44497.h          |   88 +-
 test/include/test/SFMT-params607.h            |   88 +-
 test/include/test/SFMT-params86243.h          |   88 +-
 test/include/test/SFMT-sse2.h                 |  150 +-
 test/include/test/SFMT.h                      |   84 +-
 test/include/test/arena_util.h                |   41 +-
 test/include/test/bench.h                     |   32 +-
 test/include/test/bgthd.h                     |    4 +-
 test/include/test/btalloc.h                   |   52 +-
 test/include/test/extent_hooks.h              |  191 +-
 test/include/test/fork.h                      |    8 +-
 test/include/test/math.h                      |  194 +-
 test/include/test/mq.h                        |  148 +-
 test/include/test/mtx.h                       |   14 +-
 test/include/test/nbits.h                     |  208 +-
 test/include/test/san.h                       |    9 +-
 test/include/test/test.h                      |  956 +++---
 test/include/test/timer.h                     |    8 +-
 test/integration/MALLOCX_ARENA.c              |   24 +-
 test/integration/aligned_alloc.c              |   47 +-
 test/integration/allocated.c                  |   36 +-
 test/integration/cpp/basic.cpp                |    3 +-
 test/integration/cpp/infallible_new_false.cpp |    4 +-
 test/integration/cpp/infallible_new_true.cpp  |   12 +-
 test/integration/extent.c                     |  108 +-
 test/integration/malloc.c                     |    3 +-
 test/integration/mallocx.c                    |  117 +-
 test/integration/overflow.c                   |   19 +-
 test/integration/posix_memalign.c             |   54 +-
 test/integration/rallocx.c                    |  132 +-
 test/integration/sdallocx.c                   |   25 +-
 test/integration/slab_sizes.c                 |   24 +-
 test/integration/smallocx.c                   |  130 +-
 test/integration/thread_arena.c               |   29 +-
 test/integration/thread_tcache_enabled.c      |   57 +-
 test/integration/xallocx.c                    |  111 +-
 test/src/SFMT.c                               |  739 ++---
 test/src/mtx.c                                |    6 +-
 test/src/sleep.c                              |    4 +-
 test/src/test.c                               |   49 +-
 test/src/thd.c                                |    5 +-
 test/src/timer.c                              |   22 +-
 test/stress/batch_alloc.c                     |   35 +-
 test/stress/cpp/microbench.cpp                |   42 +-
 test/stress/fill_flush.c                      |   18 +-
 test/stress/hookbench.c                       |   27 +-
 test/stress/large_microbench.c                |    6 +-
 test/stress/mallctl.c                         |   35 +-
 test/stress/microbench.c                      |   19 +-
 test/unit/SFMT.c                              | 2779 ++++++++---------
 test/unit/a0.c                                |    3 +-
 test/unit/arena_decay.c                       |  129 +-
 test/unit/arena_reset.c                       |  115 +-
 test/unit/atomic.c                            |   11 +-
 test/unit/background_thread.c                 |   37 +-
 test/unit/background_thread_enable.c          |   50 +-
 test/unit/base.c                              |  125 +-
 test/unit/batch_alloc.c                       |   54 +-
 test/unit/batcher.c                           |   75 +-
 test/unit/bin_batching.c                      |   44 +-
 test/unit/binshard.c                          |   40 +-
 test/unit/bit_util.c                          |  168 +-
 test/unit/bitmap.c                            |  130 +-
 test/unit/buf_writer.c                        |   72 +-
 test/unit/cache_bin.c                         |  138 +-
 test/unit/ckh.c                               |   87 +-
 test/unit/counter.c                           |   13 +-
 test/unit/decay.c                             |   76 +-
 test/unit/div.c                               |   11 +-
 test/unit/double_free.c                       |   22 +-
 test/unit/edata_cache.c                       |   20 +-
 test/unit/emitter.c                           |  535 ++--
 test/unit/extent_quantize.c                   |   76 +-
 test/unit/fb.c                                |  220 +-
 test/unit/fork.c                              |   10 +-
 test/unit/fxp.c                               |  138 +-
 test/unit/hash.c                              |  101 +-
 test/unit/hook.c                              |  177 +-
 test/unit/hpa.c                               |  211 +-
 test/unit/hpa_background_thread.c             |   52 +-
 test/unit/hpa_vectorized_madvise.c            |   73 +-
 .../unit/hpa_vectorized_madvise_large_batch.c |   48 +-
 test/unit/hpdata.c                            |   95 +-
 test/unit/huge.c                              |   64 +-
 test/unit/inspect.c                           |  120 +-
 test/unit/junk.c                              |  107 +-
 test/unit/log.c                               |   58 +-
 test/unit/mallctl.c                           |  780 ++---
 test/unit/malloc_conf_2.c                     |   26 +-
 test/unit/malloc_io.c                         |  171 +-
 test/unit/math.c                              |  512 ++-
 test/unit/mpsc_queue.c                        |   54 +-
 test/unit/mq.c                                |   31 +-
 test/unit/mtx.c                               |   22 +-
 test/unit/ncached_max.c                       |  116 +-
 test/unit/nstime.c                            |  106 +-
 test/unit/oversize_threshold.c                |   21 +-
 test/unit/pa.c                                |   38 +-
 test/unit/pack.c                              |   64 +-
 test/unit/pages.c                             |   18 +-
 test/unit/peak.c                              |   10 +-
 test/unit/ph.c                                |  109 +-
 test/unit/prng.c                              |   80 +-
 test/unit/prof_accum.c                        |   31 +-
 test/unit/prof_active.c                       |   46 +-
 test/unit/prof_gdump.c                        |   21 +-
 test/unit/prof_hook.c                         |  125 +-
 test/unit/prof_idump.c                        |   20 +-
 test/unit/prof_log.c                          |   45 +-
 test/unit/prof_mdump.c                        |   23 +-
 test/unit/prof_recent.c                       |  331 +-
 test/unit/prof_reset.c                        |   77 +-
 test/unit/prof_small.c                        |    9 +-
 test/unit/prof_stats.c                        |   84 +-
 test/unit/prof_sys_thread_name.c              |   21 +-
 test/unit/prof_tctx.c                         |   19 +-
 test/unit/prof_thread_name.c                  |   54 +-
 test/unit/prof_threshold.c                    |   33 +-
 test/unit/psset.c                             |  129 +-
 test/unit/ql.c                                |  101 +-
 test/unit/qr.c                                |   73 +-
 test/unit/rb.c                                |  285 +-
 test/unit/retained.c                          |   72 +-
 test/unit/rtree.c                             |  158 +-
 test/unit/safety_check.c                      |   32 +-
 test/unit/san.c                               |   35 +-
 test/unit/san_bump.c                          |   38 +-
 test/unit/sc.c                                |   11 +-
 test/unit/sec.c                               |  116 +-
 test/unit/seq.c                               |   17 +-
 test/unit/size_check.c                        |    6 +-
 test/unit/size_classes.c                      |  112 +-
 test/unit/slab.c                              |   24 +-
 test/unit/smoothstep.c                        |   27 +-
 test/unit/spin.c                              |    3 +-
 test/unit/stats.c                             |  254 +-
 test/unit/stats_print.c                       |  717 +++--
 test/unit/sz.c                                |   31 +-
 test/unit/tcache_max.c                        |   24 +-
 test/unit/test_hooks.c                        |    4 +-
 test/unit/thread_event.c                      |   18 +-
 test/unit/ticker.c                            |   55 +-
 test/unit/tsd.c                               |   28 +-
 test/unit/uaf.c                               |   45 +-
 test/unit/witness.c                           |   47 +-
 test/unit/zero.c                              |   26 +-
 test/unit/zero_realloc_abort.c                |    7 +-
 test/unit/zero_realloc_alloc.c                |   17 +-
 test/unit/zero_realloc_free.c                 |   10 +-
 test/unit/zero_reallocs.c                     |    8 +-
 346 files changed, 18286 insertions(+), 17770 deletions(-)

diff --git a/include/jemalloc/internal/activity_callback.h b/include/jemalloc/internal/activity_callback.h
index 0f4f3962..6745f1a2 100644
--- a/include/jemalloc/internal/activity_callback.h
+++ b/include/jemalloc/internal/activity_callback.h
@@ -13,13 +13,14 @@
  *
  * The calls to this thunk get driven by the peak_event module.
  */
-#define ACTIVITY_CALLBACK_THUNK_INITIALIZER {NULL, NULL}
-typedef void (*activity_callback_t)(void *uctx, uint64_t allocated,
-    uint64_t deallocated);
+#define ACTIVITY_CALLBACK_THUNK_INITIALIZER                                    \
+	{ NULL, NULL }
+typedef void (*activity_callback_t)(
+    void *uctx, uint64_t allocated, uint64_t deallocated);
 typedef struct activity_callback_thunk_s activity_callback_thunk_t;
 struct activity_callback_thunk_s {
 	activity_callback_t callback;
-	void *uctx;
+	void               *uctx;
 };
 
 #endif /* JEMALLOC_INTERNAL_ACTIVITY_CALLBACK_H */
diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 91fed258..39d2099d 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -21,7 +21,7 @@ extern ssize_t opt_dirty_decay_ms;
 extern ssize_t opt_muzzy_decay_ms;
 
 extern percpu_arena_mode_t opt_percpu_arena;
-extern const char *const percpu_arena_mode_names[];
+extern const char *const   percpu_arena_mode_names[];
 
 extern div_info_t arena_binind_div_info[SC_NBINS];
 
@@ -30,7 +30,7 @@ extern emap_t arena_emap_global;
 extern size_t opt_oversize_threshold;
 extern size_t oversize_threshold;
 
-extern bool opt_huge_arena_pac_thp;
+extern bool      opt_huge_arena_pac_thp;
 extern pac_thp_t huge_arena_pac_thp;
 
 /*
@@ -39,90 +39,90 @@ extern pac_thp_t huge_arena_pac_thp;
  */
 extern uint32_t arena_bin_offsets[SC_NBINS];
 
-void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena,
-    unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms,
-    ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy);
+void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
+    const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
+    size_t *nactive, size_t *ndirty, size_t *nmuzzy);
 void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
-    bin_stats_data_t *bstats, arena_stats_large_t *lstats,
-    pac_estats_t *estats, hpa_shard_stats_t *hpastats, sec_stats_t *secstats);
+    bin_stats_data_t *bstats, arena_stats_large_t *lstats, pac_estats_t *estats,
+    hpa_shard_stats_t *hpastats, sec_stats_t *secstats);
 void arena_handle_deferred_work(tsdn_t *tsdn, arena_t *arena);
-edata_t *arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena,
-    size_t usize, size_t alignment, bool zero);
-void arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena,
-    edata_t *edata);
-void arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena,
-    edata_t *edata, size_t oldusize);
-void arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena,
-    edata_t *edata, size_t oldusize);
-bool arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, extent_state_t state,
-    ssize_t decay_ms);
+edata_t *arena_extent_alloc_large(
+    tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool zero);
+void arena_extent_dalloc_large_prep(
+    tsdn_t *tsdn, arena_t *arena, edata_t *edata);
+void arena_extent_ralloc_large_shrink(
+    tsdn_t *tsdn, arena_t *arena, edata_t *edata, size_t oldusize);
+void arena_extent_ralloc_large_expand(
+    tsdn_t *tsdn, arena_t *arena, edata_t *edata, size_t oldusize);
+bool arena_decay_ms_set(
+    tsdn_t *tsdn, arena_t *arena, extent_state_t state, ssize_t decay_ms);
 ssize_t arena_decay_ms_get(arena_t *arena, extent_state_t state);
-void arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
-    bool all);
+void    arena_decay(
+       tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all);
 uint64_t arena_time_until_deferred(tsdn_t *tsdn, arena_t *arena);
-void arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena);
-void arena_reset(tsd_t *tsd, arena_t *arena);
-void arena_destroy(tsd_t *tsd, arena_t *arena);
-void arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
-    cache_bin_t *cache_bin, szind_t binind, const cache_bin_sz_t nfill_min,
-    const cache_bin_sz_t nfill_max);
+void     arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena);
+void     arena_reset(tsd_t *tsd, arena_t *arena);
+void     arena_destroy(tsd_t *tsd, arena_t *arena);
+void     arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
+        cache_bin_t *cache_bin, szind_t binind, const cache_bin_sz_t nfill_min,
+        const cache_bin_sz_t nfill_max);
 
-void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size,
-    szind_t ind, bool zero, bool slab);
-void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize,
-    size_t alignment, bool zero, bool slab, tcache_t *tcache);
-void arena_prof_promote(tsdn_t *tsdn, void *ptr, size_t usize,
-    size_t bumped_usize);
-void arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
-    bool slow_path);
+void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
+    bool zero, bool slab);
+void *arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
+    bool zero, bool slab, tcache_t *tcache);
+void  arena_prof_promote(
+     tsdn_t *tsdn, void *ptr, size_t usize, size_t bumped_usize);
+void arena_dalloc_promoted(
+    tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path);
 void arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab);
 
-void arena_dalloc_bin_locked_handle_newly_empty(tsdn_t *tsdn, arena_t *arena,
-    edata_t *slab, bin_t *bin);
-void arena_dalloc_bin_locked_handle_newly_nonempty(tsdn_t *tsdn, arena_t *arena,
-    edata_t *slab, bin_t *bin);
-void arena_dalloc_small(tsdn_t *tsdn, void *ptr);
-bool arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
-    size_t extra, bool zero, size_t *newsize);
+void arena_dalloc_bin_locked_handle_newly_empty(
+    tsdn_t *tsdn, arena_t *arena, edata_t *slab, bin_t *bin);
+void arena_dalloc_bin_locked_handle_newly_nonempty(
+    tsdn_t *tsdn, arena_t *arena, edata_t *slab, bin_t *bin);
+void  arena_dalloc_small(tsdn_t *tsdn, void *ptr);
+bool  arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
+     size_t extra, bool zero, size_t *newsize);
 void *arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
     size_t size, size_t alignment, bool zero, bool slab, tcache_t *tcache,
     hook_ralloc_args_t *hook_args);
-dss_prec_t arena_dss_prec_get(arena_t *arena);
-ehooks_t *arena_get_ehooks(arena_t *arena);
-extent_hooks_t *arena_set_extent_hooks(tsd_t *tsd, arena_t *arena,
-    extent_hooks_t *extent_hooks);
-bool arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
-void arena_name_get(arena_t *arena, char *name);
-void arena_name_set(arena_t *arena, const char *name);
+dss_prec_t      arena_dss_prec_get(arena_t *arena);
+ehooks_t       *arena_get_ehooks(arena_t *arena);
+extent_hooks_t *arena_set_extent_hooks(
+    tsd_t *tsd, arena_t *arena, extent_hooks_t *extent_hooks);
+bool    arena_dss_prec_set(arena_t *arena, dss_prec_t dss_prec);
+void    arena_name_get(arena_t *arena, char *name);
+void    arena_name_set(arena_t *arena, const char *name);
 ssize_t arena_dirty_decay_ms_default_get(void);
-bool arena_dirty_decay_ms_default_set(ssize_t decay_ms);
+bool    arena_dirty_decay_ms_default_set(ssize_t decay_ms);
 ssize_t arena_muzzy_decay_ms_default_get(void);
-bool arena_muzzy_decay_ms_default_set(ssize_t decay_ms);
-bool arena_retain_grow_limit_get_set(tsd_t *tsd, arena_t *arena,
-    size_t *old_limit, size_t *new_limit);
+bool    arena_muzzy_decay_ms_default_set(ssize_t decay_ms);
+bool    arena_retain_grow_limit_get_set(
+       tsd_t *tsd, arena_t *arena, size_t *old_limit, size_t *new_limit);
 unsigned arena_nthreads_get(arena_t *arena, bool internal);
-void arena_nthreads_inc(arena_t *arena, bool internal);
-void arena_nthreads_dec(arena_t *arena, bool internal);
+void     arena_nthreads_inc(arena_t *arena, bool internal);
+void     arena_nthreads_dec(arena_t *arena, bool internal);
 arena_t *arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config);
-bool arena_init_huge(tsdn_t *tsdn, arena_t *a0);
+bool     arena_init_huge(tsdn_t *tsdn, arena_t *a0);
 arena_t *arena_choose_huge(tsd_t *tsd);
-bin_t *arena_bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
-    unsigned *binshard);
+bin_t   *arena_bin_choose(
+      tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned *binshard);
 size_t arena_fill_small_fresh(tsdn_t *tsdn, arena_t *arena, szind_t binind,
     void **ptrs, size_t nfill, bool zero);
-bool arena_boot(sc_data_t *sc_data, base_t *base, bool hpa);
-void arena_prefork0(tsdn_t *tsdn, arena_t *arena);
-void arena_prefork1(tsdn_t *tsdn, arena_t *arena);
-void arena_prefork2(tsdn_t *tsdn, arena_t *arena);
-void arena_prefork3(tsdn_t *tsdn, arena_t *arena);
-void arena_prefork4(tsdn_t *tsdn, arena_t *arena);
-void arena_prefork5(tsdn_t *tsdn, arena_t *arena);
-void arena_prefork6(tsdn_t *tsdn, arena_t *arena);
-void arena_prefork7(tsdn_t *tsdn, arena_t *arena);
-void arena_prefork8(tsdn_t *tsdn, arena_t *arena);
-void arena_postfork_parent(tsdn_t *tsdn, arena_t *arena);
-void arena_postfork_child(tsdn_t *tsdn, arena_t *arena);
+bool   arena_boot(sc_data_t *sc_data, base_t *base, bool hpa);
+void   arena_prefork0(tsdn_t *tsdn, arena_t *arena);
+void   arena_prefork1(tsdn_t *tsdn, arena_t *arena);
+void   arena_prefork2(tsdn_t *tsdn, arena_t *arena);
+void   arena_prefork3(tsdn_t *tsdn, arena_t *arena);
+void   arena_prefork4(tsdn_t *tsdn, arena_t *arena);
+void   arena_prefork5(tsdn_t *tsdn, arena_t *arena);
+void   arena_prefork6(tsdn_t *tsdn, arena_t *arena);
+void   arena_prefork7(tsdn_t *tsdn, arena_t *arena);
+void   arena_prefork8(tsdn_t *tsdn, arena_t *arena);
+void   arena_postfork_parent(tsdn_t *tsdn, arena_t *arena);
+void   arena_postfork_child(tsdn_t *tsdn, arena_t *arena);
 
 #endif /* JEMALLOC_INTERNAL_ARENA_EXTERNS_H */
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 61008b59..549dfb8a 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -21,8 +21,8 @@
 
 static inline arena_t *
 arena_get_from_edata(edata_t *edata) {
-	return (arena_t *)atomic_load_p(&arenas[edata_arena_ind_get(edata)],
-	    ATOMIC_RELAXED);
+	return (arena_t *)atomic_load_p(
+	    &arenas[edata_arena_ind_get(edata)], ATOMIC_RELAXED);
 }
 
 JEMALLOC_ALWAYS_INLINE arena_t *
@@ -61,15 +61,17 @@ large_dalloc_safety_checks(edata_t *edata, const void *ptr, size_t input_size) {
 	 * The cost is low enough (as edata will be accessed anyway) to be
 	 * enabled all the time.
 	 */
-	if (unlikely(edata == NULL ||
-	    edata_state_get(edata) != extent_state_active)) {
-		safety_check_fail("Invalid deallocation detected: "
+	if (unlikely(edata == NULL
+	        || edata_state_get(edata) != extent_state_active)) {
+		safety_check_fail(
+		    "Invalid deallocation detected: "
 		    "pages being freed (%p) not currently active, "
-		    "possibly caused by double free bugs.", ptr);
+		    "possibly caused by double free bugs.",
+		    ptr);
 		return true;
 	}
-	if (unlikely(input_size != edata_usize_get(edata) ||
-	    input_size > SC_LARGE_MAXCLASS)) {
+	if (unlikely(input_size != edata_usize_get(edata)
+	        || input_size > SC_LARGE_MAXCLASS)) {
 		safety_check_fail_sized_dealloc(/* current_dealloc */ true, ptr,
 		    /* true_size */ edata_usize_get(edata), input_size);
 		return true;
@@ -86,25 +88,26 @@ arena_prof_info_get(tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx,
 	assert(prof_info != NULL);
 
 	edata_t *edata = NULL;
-	bool is_slab;
+	bool     is_slab;
 
 	/* Static check. */
 	if (alloc_ctx == NULL) {
-		edata = emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global,
-		    ptr);
+		edata = emap_edata_lookup(
+		    tsd_tsdn(tsd), &arena_emap_global, ptr);
 		is_slab = edata_slab_get(edata);
 	} else if (unlikely(!(is_slab = alloc_ctx->slab))) {
-		edata = emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global,
-		    ptr);
+		edata = emap_edata_lookup(
+		    tsd_tsdn(tsd), &arena_emap_global, ptr);
 	}
 
 	if (unlikely(!is_slab)) {
 		/* edata must have been initialized at this point. */
 		assert(edata != NULL);
-		size_t usize = (alloc_ctx == NULL)? edata_usize_get(edata):
-		    emap_alloc_ctx_usize_get(alloc_ctx);
-		if (reset_recent &&
-		    large_dalloc_safety_checks(edata, ptr, usize)) {
+		size_t usize = (alloc_ctx == NULL)
+		    ? edata_usize_get(edata)
+		    : emap_alloc_ctx_usize_get(alloc_ctx);
+		if (reset_recent
+		    && large_dalloc_safety_checks(edata, ptr, usize)) {
 			prof_info->alloc_tctx = PROF_TCTX_SENTINEL;
 			return;
 		}
@@ -119,22 +122,22 @@ arena_prof_info_get(tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_prof_tctx_reset(tsd_t *tsd, const void *ptr,
-    emap_alloc_ctx_t *alloc_ctx) {
+arena_prof_tctx_reset(
+    tsd_t *tsd, const void *ptr, emap_alloc_ctx_t *alloc_ctx) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
 	/* Static check. */
 	if (alloc_ctx == NULL) {
-		edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd),
-		    &arena_emap_global, ptr);
+		edata_t *edata = emap_edata_lookup(
+		    tsd_tsdn(tsd), &arena_emap_global, ptr);
 		if (unlikely(!edata_slab_get(edata))) {
 			large_prof_tctx_reset(edata);
 		}
 	} else {
 		if (unlikely(!alloc_ctx->slab)) {
-			edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd),
-			    &arena_emap_global, ptr);
+			edata_t *edata = emap_edata_lookup(
+			    tsd_tsdn(tsd), &arena_emap_global, ptr);
 			large_prof_tctx_reset(edata);
 		}
 	}
@@ -145,16 +148,16 @@ arena_prof_tctx_reset_sampled(tsd_t *tsd, const void *ptr) {
 	cassert(config_prof);
 	assert(ptr != NULL);
 
-	edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global,
-	    ptr);
+	edata_t *edata = emap_edata_lookup(
+	    tsd_tsdn(tsd), &arena_emap_global, ptr);
 	assert(!edata_slab_get(edata));
 
 	large_prof_tctx_reset(edata);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_prof_info_set(tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx,
-    size_t size) {
+arena_prof_info_set(
+    tsd_t *tsd, edata_t *edata, prof_tctx_t *tctx, size_t size) {
 	cassert(config_prof);
 
 	assert(!edata_slab_get(edata));
@@ -177,9 +180,9 @@ arena_decay_ticks(tsdn_t *tsdn, arena_t *arena, unsigned nticks) {
 	 * use a single ticker for all of them.
 	 */
 	ticker_geom_t *decay_ticker = tsd_arena_decay_tickerp_get(tsd);
-	uint64_t *prng_state = tsd_prng_statep_get(tsd);
+	uint64_t      *prng_state = tsd_prng_statep_get(tsd);
 	if (unlikely(ticker_geom_ticks(decay_ticker, prng_state, nticks,
-	    tsd_reentrancy_level_get(tsd) > 0))) {
+	        tsd_reentrancy_level_get(tsd) > 0))) {
 		arena_decay(tsdn, arena, false, false);
 	}
 }
@@ -197,14 +200,13 @@ arena_malloc(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind, bool zero,
 	if (likely(tcache != NULL)) {
 		if (likely(slab)) {
 			assert(sz_can_use_slab(size));
-			return tcache_alloc_small(tsdn_tsd(tsdn), arena,
-			    tcache, size, ind, zero, slow_path);
-		} else if (likely(
-		    ind < tcache_nbins_get(tcache->tcache_slow) &&
-		    !tcache_bin_disabled(ind, &tcache->bins[ind],
-		    tcache->tcache_slow))) {
-			return tcache_alloc_large(tsdn_tsd(tsdn), arena,
-			    tcache, size, ind, zero, slow_path);
+			return tcache_alloc_small(tsdn_tsd(tsdn), arena, tcache,
+			    size, ind, zero, slow_path);
+		} else if (likely(ind < tcache_nbins_get(tcache->tcache_slow)
+		               && !tcache_bin_disabled(ind, &tcache->bins[ind],
+		                   tcache->tcache_slow))) {
+			return tcache_alloc_large(tsdn_tsd(tsdn), arena, tcache,
+			    size, ind, zero, slow_path);
 		}
 		/* (size > tcache_max) case falls through. */
 	}
@@ -241,8 +243,8 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
 	 */
 
 	emap_full_alloc_ctx_t full_alloc_ctx;
-	bool missing = emap_full_alloc_ctx_try_lookup(tsdn, &arena_emap_global,
-	    ptr, &full_alloc_ctx);
+	bool                  missing = emap_full_alloc_ctx_try_lookup(
+            tsdn, &arena_emap_global, ptr, &full_alloc_ctx);
 	if (missing) {
 		return 0;
 	}
@@ -261,8 +263,8 @@ arena_vsalloc(tsdn_t *tsdn, const void *ptr) {
 }
 
 static inline void
-arena_dalloc_large_no_tcache(tsdn_t *tsdn, void *ptr, szind_t szind,
-    size_t usize) {
+arena_dalloc_large_no_tcache(
+    tsdn_t *tsdn, void *ptr, szind_t szind, size_t usize) {
 	/*
 	 * szind is still needed in this function mainly becuase
 	 * szind < SC_NBINS determines not only if this is a small alloc,
@@ -272,8 +274,8 @@ arena_dalloc_large_no_tcache(tsdn_t *tsdn, void *ptr, szind_t szind,
 	if (config_prof && unlikely(szind < SC_NBINS)) {
 		arena_dalloc_promoted(tsdn, ptr, NULL, true);
 	} else {
-		edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
-		    ptr);
+		edata_t *edata = emap_edata_lookup(
+		    tsdn, &arena_emap_global, ptr);
 		if (large_dalloc_safety_checks(edata, ptr, usize)) {
 			/* See the comment in isfree. */
 			return;
@@ -290,13 +292,13 @@ arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
 	emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr, &alloc_ctx);
 
 	if (config_debug) {
-		edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
-		    ptr);
+		edata_t *edata = emap_edata_lookup(
+		    tsdn, &arena_emap_global, ptr);
 		assert(alloc_ctx.szind == edata_szind_get(edata));
 		assert(alloc_ctx.szind < SC_NSIZES);
 		assert(alloc_ctx.slab == edata_slab_get(edata));
-		assert(emap_alloc_ctx_usize_get(&alloc_ctx) ==
-		    edata_usize_get(edata));
+		assert(emap_alloc_ctx_usize_get(&alloc_ctx)
+		    == edata_usize_get(edata));
 	}
 
 	if (likely(alloc_ctx.slab)) {
@@ -311,19 +313,19 @@ arena_dalloc_no_tcache(tsdn_t *tsdn, void *ptr) {
 JEMALLOC_ALWAYS_INLINE void
 arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
     size_t usize, bool slow_path) {
-	assert (!tsdn_null(tsdn) && tcache != NULL);
+	assert(!tsdn_null(tsdn) && tcache != NULL);
 	bool is_sample_promoted = config_prof && szind < SC_NBINS;
 	if (unlikely(is_sample_promoted)) {
 		arena_dalloc_promoted(tsdn, ptr, tcache, slow_path);
 	} else {
-		if (szind < tcache_nbins_get(tcache->tcache_slow) &&
-		    !tcache_bin_disabled(szind, &tcache->bins[szind],
-		    tcache->tcache_slow)) {
-			tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr, szind,
-			    slow_path);
+		if (szind < tcache_nbins_get(tcache->tcache_slow)
+		    && !tcache_bin_disabled(
+		        szind, &tcache->bins[szind], tcache->tcache_slow)) {
+			tcache_dalloc_large(
+			    tsdn_tsd(tsdn), tcache, ptr, szind, slow_path);
 		} else {
-			edata_t *edata = emap_edata_lookup(tsdn,
-			    &arena_emap_global, ptr);
+			edata_t *edata = emap_edata_lookup(
+			    tsdn, &arena_emap_global, ptr);
 			if (large_dalloc_safety_checks(edata, ptr, usize)) {
 				/* See the comment in isfree. */
 				return;
@@ -335,16 +337,17 @@ arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
 
 /* Find the region index of a pointer. */
 JEMALLOC_ALWAYS_INLINE size_t
-arena_slab_regind_impl(div_info_t* div_info, szind_t binind,
-    edata_t *slab, const void *ptr) {
+arena_slab_regind_impl(
+    div_info_t *div_info, szind_t binind, edata_t *slab, const void *ptr) {
 	size_t diff, regind;
 
 	/* Freeing a pointer outside the slab can cause assertion failure. */
 	assert((uintptr_t)ptr >= (uintptr_t)edata_addr_get(slab));
 	assert((uintptr_t)ptr < (uintptr_t)edata_past_get(slab));
 	/* Freeing an interior pointer can cause assertion failure. */
-	assert(((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab)) %
-	    (uintptr_t)bin_infos[binind].reg_size == 0);
+	assert(((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab))
+	        % (uintptr_t)bin_infos[binind].reg_size
+	    == 0);
 
 	diff = (size_t)((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab));
 
@@ -360,22 +363,23 @@ arena_tcache_dalloc_small_safety_check(tsdn_t *tsdn, void *ptr) {
 	if (!config_debug) {
 		return false;
 	}
-	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
-	szind_t binind = edata_szind_get(edata);
+	edata_t   *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
+	szind_t    binind = edata_szind_get(edata);
 	div_info_t div_info = arena_binind_div_info[binind];
 	/*
 	 * Calls the internal function arena_slab_regind_impl because the
 	 * safety check does not require a lock.
 	 */
 	size_t regind = arena_slab_regind_impl(&div_info, binind, edata, ptr);
-	slab_data_t *slab_data = edata_slab_data_get(edata);
+	slab_data_t      *slab_data = edata_slab_data_get(edata);
 	const bin_info_t *bin_info = &bin_infos[binind];
 	assert(edata_nfree_get(edata) < bin_info->nregs);
-	if (unlikely(!bitmap_get(slab_data->bitmap, &bin_info->bitmap_info,
-	    regind))) {
+	if (unlikely(!bitmap_get(
+	        slab_data->bitmap, &bin_info->bitmap_info, regind))) {
 		safety_check_fail(
 		    "Invalid deallocation detected: the pointer being freed (%p) not "
-		    "currently active, possibly caused by double free bugs.\n", ptr);
+		    "currently active, possibly caused by double free bugs.\n",
+		    ptr);
 		return true;
 	}
 	return false;
@@ -397,18 +401,18 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 		alloc_ctx = *caller_alloc_ctx;
 	} else {
 		util_assume(tsdn != NULL);
-		emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr,
-		    &alloc_ctx);
+		emap_alloc_ctx_lookup(
+		    tsdn, &arena_emap_global, ptr, &alloc_ctx);
 	}
 
 	if (config_debug) {
-		edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
-		    ptr);
+		edata_t *edata = emap_edata_lookup(
+		    tsdn, &arena_emap_global, ptr);
 		assert(alloc_ctx.szind == edata_szind_get(edata));
 		assert(alloc_ctx.szind < SC_NSIZES);
 		assert(alloc_ctx.slab == edata_slab_get(edata));
-		assert(emap_alloc_ctx_usize_get(&alloc_ctx) ==
-		    edata_usize_get(edata));
+		assert(emap_alloc_ctx_usize_get(&alloc_ctx)
+		    == edata_usize_get(edata));
 	}
 
 	if (likely(alloc_ctx.slab)) {
@@ -416,8 +420,8 @@ arena_dalloc(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 		if (arena_tcache_dalloc_small_safety_check(tsdn, ptr)) {
 			return;
 		}
-		tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr,
-		    alloc_ctx.szind, slow_path);
+		tcache_dalloc_small(
+		    tsdn_tsd(tsdn), tcache, ptr, alloc_ctx.szind, slow_path);
 	} else {
 		arena_dalloc_large(tsdn, ptr, tcache, alloc_ctx.szind,
 		    emap_alloc_ctx_usize_get(&alloc_ctx), slow_path);
@@ -436,21 +440,21 @@ arena_sdalloc_no_tcache(tsdn_t *tsdn, void *ptr, size_t size) {
 		 * object, so base szind and slab on the given size.
 		 */
 		szind_t szind = sz_size2index(size);
-		emap_alloc_ctx_init(&alloc_ctx, szind, (szind < SC_NBINS),
-		    size);
+		emap_alloc_ctx_init(
+		    &alloc_ctx, szind, (szind < SC_NBINS), size);
 	}
 
 	if ((config_prof && opt_prof) || config_debug) {
-		emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr,
-		    &alloc_ctx);
+		emap_alloc_ctx_lookup(
+		    tsdn, &arena_emap_global, ptr, &alloc_ctx);
 
 		assert(alloc_ctx.szind == sz_size2index(size));
 		assert((config_prof && opt_prof)
 		    || alloc_ctx.slab == (alloc_ctx.szind < SC_NBINS));
 
 		if (config_debug) {
-			edata_t *edata = emap_edata_lookup(tsdn,
-			    &arena_emap_global, ptr);
+			edata_t *edata = emap_edata_lookup(
+			    tsdn, &arena_emap_global, ptr);
 			assert(alloc_ctx.szind == edata_szind_get(edata));
 			assert(alloc_ctx.slab == edata_slab_get(edata));
 		}
@@ -481,8 +485,8 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 	if (config_prof && opt_prof) {
 		if (caller_alloc_ctx == NULL) {
 			/* Uncommon case and should be a static check. */
-			emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr,
-			    &alloc_ctx);
+			emap_alloc_ctx_lookup(
+			    tsdn, &arena_emap_global, ptr, &alloc_ctx);
 			assert(alloc_ctx.szind == sz_size2index(size));
 			assert(emap_alloc_ctx_usize_get(&alloc_ctx) == size);
 		} else {
@@ -498,14 +502,14 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 	}
 
 	if (config_debug) {
-		edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global,
-		    ptr);
+		edata_t *edata = emap_edata_lookup(
+		    tsdn, &arena_emap_global, ptr);
 		assert(alloc_ctx.szind == edata_szind_get(edata));
 		assert(alloc_ctx.slab == edata_slab_get(edata));
-		emap_alloc_ctx_init(&alloc_ctx, alloc_ctx.szind, alloc_ctx.slab,
-		    sz_s2u(size));
-		assert(emap_alloc_ctx_usize_get(&alloc_ctx) ==
-		    edata_usize_get(edata));
+		emap_alloc_ctx_init(
+		    &alloc_ctx, alloc_ctx.szind, alloc_ctx.slab, sz_s2u(size));
+		assert(emap_alloc_ctx_usize_get(&alloc_ctx)
+		    == edata_usize_get(edata));
 	}
 
 	if (likely(alloc_ctx.slab)) {
@@ -513,8 +517,8 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 		if (arena_tcache_dalloc_small_safety_check(tsdn, ptr)) {
 			return;
 		}
-		tcache_dalloc_small(tsdn_tsd(tsdn), tcache, ptr,
-		    alloc_ctx.szind, slow_path);
+		tcache_dalloc_small(
+		    tsdn_tsd(tsdn), tcache, ptr, alloc_ctx.szind, slow_path);
 	} else {
 		arena_dalloc_large(tsdn, ptr, tcache, alloc_ctx.szind,
 		    sz_s2u(size), slow_path);
@@ -522,13 +526,13 @@ arena_sdalloc(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
 }
 
 static inline void
-arena_cache_oblivious_randomize(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
-    size_t alignment) {
+arena_cache_oblivious_randomize(
+    tsdn_t *tsdn, arena_t *arena, edata_t *edata, size_t alignment) {
 	assert(edata_base_get(edata) == edata_addr_get(edata));
 
 	if (alignment < PAGE) {
-		unsigned lg_range = LG_PAGE -
-		    lg_floor(CACHELINE_CEILING(alignment));
+		unsigned lg_range = LG_PAGE
+		    - lg_floor(CACHELINE_CEILING(alignment));
 		size_t r;
 		if (!tsdn_null(tsdn)) {
 			tsd_t *tsd = tsdn_tsd(tsdn);
@@ -538,12 +542,12 @@ arena_cache_oblivious_randomize(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 			uint64_t stack_value = (uint64_t)(uintptr_t)&r;
 			r = (size_t)prng_lg_range_u64(&stack_value, lg_range);
 		}
-		uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE -
-		    lg_range);
-		edata->e_addr = (void *)((byte_t *)edata->e_addr +
-		    random_offset);
-		assert(ALIGNMENT_ADDR2BASE(edata->e_addr, alignment) ==
-		    edata->e_addr);
+		uintptr_t random_offset = ((uintptr_t)r)
+		    << (LG_PAGE - lg_range);
+		edata->e_addr = (void *)((byte_t *)edata->e_addr
+		    + random_offset);
+		assert(ALIGNMENT_ADDR2BASE(edata->e_addr, alignment)
+		    == edata->e_addr);
 	}
 }
 
@@ -556,20 +560,21 @@ arena_cache_oblivious_randomize(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 typedef struct arena_dalloc_bin_locked_info_s arena_dalloc_bin_locked_info_t;
 struct arena_dalloc_bin_locked_info_s {
 	div_info_t div_info;
-	uint32_t nregs;
-	uint64_t ndalloc;
+	uint32_t   nregs;
+	uint64_t   ndalloc;
 };
 
 JEMALLOC_ALWAYS_INLINE size_t
 arena_slab_regind(arena_dalloc_bin_locked_info_t *info, szind_t binind,
     edata_t *slab, const void *ptr) {
-	size_t regind = arena_slab_regind_impl(&info->div_info, binind, slab, ptr);
+	size_t regind = arena_slab_regind_impl(
+	    &info->div_info, binind, slab, ptr);
 	return regind;
 }
 
 JEMALLOC_ALWAYS_INLINE void
-arena_dalloc_bin_locked_begin(arena_dalloc_bin_locked_info_t *info,
-    szind_t binind) {
+arena_dalloc_bin_locked_begin(
+    arena_dalloc_bin_locked_info_t *info, szind_t binind) {
 	info->div_info = arena_binind_div_info[binind];
 	info->nregs = bin_infos[binind].nregs;
 	info->ndalloc = 0;
@@ -589,8 +594,8 @@ arena_dalloc_bin_locked_step(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
     void *ptr, edata_t **dalloc_slabs, unsigned ndalloc_slabs,
     unsigned *dalloc_slabs_count, edata_list_active_t *dalloc_slabs_extra) {
 	const bin_info_t *bin_info = &bin_infos[binind];
-	size_t regind = arena_slab_regind(info, binind, slab, ptr);
-	slab_data_t *slab_data = edata_slab_data_get(slab);
+	size_t            regind = arena_slab_regind(info, binind, slab, ptr);
+	slab_data_t      *slab_data = edata_slab_data_get(slab);
 
 	assert(edata_nfree_get(slab) < bin_info->nregs);
 	/* Freeing an unallocated pointer can cause assertion failure. */
@@ -605,8 +610,8 @@ arena_dalloc_bin_locked_step(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 
 	unsigned nfree = edata_nfree_get(slab);
 	if (nfree == bin_info->nregs) {
-		arena_dalloc_bin_locked_handle_newly_empty(tsdn, arena, slab,
-		    bin);
+		arena_dalloc_bin_locked_handle_newly_empty(
+		    tsdn, arena, slab, bin);
 
 		if (*dalloc_slabs_count < ndalloc_slabs) {
 			dalloc_slabs[*dalloc_slabs_count] = slab;
@@ -615,8 +620,8 @@ arena_dalloc_bin_locked_step(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 			edata_list_active_append(dalloc_slabs_extra, slab);
 		}
 	} else if (nfree == 1 && slab != bin->slabcur) {
-		arena_dalloc_bin_locked_handle_newly_nonempty(tsdn, arena, slab,
-		    bin);
+		arena_dalloc_bin_locked_handle_newly_nonempty(
+		    tsdn, arena, slab, bin);
 	}
 }
 
@@ -637,21 +642,20 @@ arena_bin_flush_batch_impl(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
     edata_list_active_t *dalloc_slabs_extra) {
 	assert(binind < bin_info_nbatched_sizes);
 	bin_with_batch_t *batched_bin = (bin_with_batch_t *)bin;
-	size_t nelems_to_pop = batcher_pop_begin(tsdn,
-	    &batched_bin->remote_frees);
+	size_t            nelems_to_pop = batcher_pop_begin(
+            tsdn, &batched_bin->remote_frees);
 
 	bin_batching_test_mid_pop(nelems_to_pop);
 	if (nelems_to_pop == BATCHER_NO_IDX) {
-		malloc_mutex_assert_not_owner(tsdn,
-		    &batched_bin->remote_frees.mtx);
+		malloc_mutex_assert_not_owner(
+		    tsdn, &batched_bin->remote_frees.mtx);
 		return;
 	} else {
-		malloc_mutex_assert_owner(tsdn,
-		    &batched_bin->remote_frees.mtx);
+		malloc_mutex_assert_owner(tsdn, &batched_bin->remote_frees.mtx);
 	}
 
-	size_t npushes = batcher_pop_get_pushes(tsdn,
-	    &batched_bin->remote_frees);
+	size_t npushes = batcher_pop_get_pushes(
+	    tsdn, &batched_bin->remote_frees);
 	bin_remote_free_data_t remote_free_data[BIN_REMOTE_FREE_ELEMS_MAX];
 	for (size_t i = 0; i < nelems_to_pop; i++) {
 		remote_free_data[i] = batched_bin->remote_free_data[i];
@@ -682,8 +686,8 @@ struct arena_bin_flush_batch_state_s {
 	 * backup array for any "extra" slabs, as well as a a list to allow a
 	 * dynamic number of ones exceeding that array.
 	 */
-	edata_t *dalloc_slabs[8];
-	unsigned dalloc_slab_count;
+	edata_t            *dalloc_slabs[8];
+	unsigned            dalloc_slab_count;
 	edata_list_active_t dalloc_slabs_extra;
 };
 
@@ -712,8 +716,8 @@ arena_bin_flush_batch_after_lock(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 	    preallocated_slabs);
 
 	arena_bin_flush_batch_impl(tsdn, arena, bin, &state->info, binind,
-	    state->dalloc_slabs, ndalloc_slabs,
-	    &state->dalloc_slab_count, &state->dalloc_slabs_extra);
+	    state->dalloc_slabs, ndalloc_slabs, &state->dalloc_slab_count,
+	    &state->dalloc_slabs_extra);
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -769,8 +773,8 @@ arena_get_bin(arena_t *arena, szind_t binind, unsigned binshard) {
 		ret = shard0 + binshard;
 	}
 	assert(binind >= SC_NBINS - 1
-	    || (uintptr_t)ret < (uintptr_t)arena
-	    + arena_bin_offsets[binind + 1]);
+	    || (uintptr_t)ret
+	        < (uintptr_t)arena + arena_bin_offsets[binind + 1]);
 
 	return ret;
 }
diff --git a/include/jemalloc/internal/arena_stats.h b/include/jemalloc/internal/arena_stats.h
index 7f075114..01012f68 100644
--- a/include/jemalloc/internal/arena_stats.h
+++ b/include/jemalloc/internal/arena_stats.h
@@ -17,31 +17,31 @@ struct arena_stats_large_s {
 	 * Total number of large allocation/deallocation requests served directly
 	 * by the arena.
 	 */
-	locked_u64_t	nmalloc;
-	locked_u64_t	ndalloc;
+	locked_u64_t nmalloc;
+	locked_u64_t ndalloc;
 
 	/*
 	 * Total large active bytes (allocated - deallocated) served directly
 	 * by the arena.
 	 */
-	locked_u64_t	active_bytes;
+	locked_u64_t active_bytes;
 
 	/*
 	 * Number of allocation requests that correspond to this size class.
 	 * This includes requests served by tcache, though tcache only
 	 * periodically merges into this counter.
 	 */
-	locked_u64_t	nrequests; /* Partially derived. */
+	locked_u64_t nrequests; /* Partially derived. */
 	/*
 	 * Number of tcache fills / flushes for large (similarly, periodically
 	 * merged).  Note that there is no large tcache batch-fill currently
 	 * (i.e. only fill 1 at a time); however flush may be batched.
 	 */
-	locked_u64_t	nfills; /* Partially derived. */
-	locked_u64_t	nflushes; /* Partially derived. */
+	locked_u64_t nfills;   /* Partially derived. */
+	locked_u64_t nflushes; /* Partially derived. */
 
 	/* Current number of allocations of this size class. */
-	size_t		curlextents; /* Derived. */
+	size_t curlextents; /* Derived. */
 };
 
 /*
@@ -57,40 +57,40 @@ struct arena_stats_s {
 	 * resident includes the base stats -- that's why it lives here and not
 	 * in pa_shard_stats_t.
 	 */
-	size_t			base; /* Derived. */
-	size_t			metadata_edata; /* Derived. */
-	size_t			metadata_rtree; /* Derived. */
-	size_t			resident; /* Derived. */
-	size_t			metadata_thp; /* Derived. */
-	size_t			mapped; /* Derived. */
+	size_t base;           /* Derived. */
+	size_t metadata_edata; /* Derived. */
+	size_t metadata_rtree; /* Derived. */
+	size_t resident;       /* Derived. */
+	size_t metadata_thp;   /* Derived. */
+	size_t mapped;         /* Derived. */
 
-	atomic_zu_t		internal;
+	atomic_zu_t internal;
 
-	size_t			allocated_large; /* Derived. */
-	uint64_t		nmalloc_large; /* Derived. */
-	uint64_t		ndalloc_large; /* Derived. */
-	uint64_t		nfills_large; /* Derived. */
-	uint64_t		nflushes_large; /* Derived. */
-	uint64_t		nrequests_large; /* Derived. */
+	size_t   allocated_large; /* Derived. */
+	uint64_t nmalloc_large;   /* Derived. */
+	uint64_t ndalloc_large;   /* Derived. */
+	uint64_t nfills_large;    /* Derived. */
+	uint64_t nflushes_large;  /* Derived. */
+	uint64_t nrequests_large; /* Derived. */
 
 	/*
 	 * The stats logically owned by the pa_shard in the same arena.  This
 	 * lives here only because it's convenient for the purposes of the ctl
 	 * module -- it only knows about the single arena_stats.
 	 */
-	pa_shard_stats_t	pa_shard_stats;
+	pa_shard_stats_t pa_shard_stats;
 
 	/* Number of bytes cached in tcache associated with this arena. */
-	size_t			tcache_bytes; /* Derived. */
-	size_t			tcache_stashed_bytes; /* Derived. */
+	size_t tcache_bytes;         /* Derived. */
+	size_t tcache_stashed_bytes; /* Derived. */
 
 	mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes];
 
 	/* One element for each large size class. */
-	arena_stats_large_t	lstats[SC_NSIZES - SC_NBINS];
+	arena_stats_large_t lstats[SC_NSIZES - SC_NBINS];
 
 	/* Arena uptime. */
-	nstime_t		uptime;
+	nstime_t uptime;
 };
 
 static inline bool
@@ -101,7 +101,7 @@ arena_stats_init(tsdn_t *tsdn, arena_stats_t *arena_stats) {
 		}
 	}
 	if (LOCKEDINT_MTX_INIT(arena_stats->mtx, "arena_stats",
-	    WITNESS_RANK_ARENA_STATS, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_ARENA_STATS, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	/* Memory is zeroed, so there is no need to clear stats. */
@@ -115,8 +115,8 @@ arena_stats_large_flush_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats,
 	arena_stats_large_t *lstats = &arena_stats->lstats[szind - SC_NBINS];
 	locked_inc_u64(tsdn, LOCKEDINT_MTX(arena_stats->mtx),
 	    &lstats->nrequests, nrequests);
-	locked_inc_u64(tsdn, LOCKEDINT_MTX(arena_stats->mtx),
-	    &lstats->nflushes, 1);
+	locked_inc_u64(
+	    tsdn, LOCKEDINT_MTX(arena_stats->mtx), &lstats->nflushes, 1);
 	LOCKEDINT_MTX_UNLOCK(tsdn, arena_stats->mtx);
 }
 
diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index 56e12f95..4778ca1b 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -32,20 +32,20 @@ struct arena_s {
 	 *
 	 * Synchronization: atomic.
 	 */
-	atomic_u_t		nthreads[2];
+	atomic_u_t nthreads[2];
 
 	/* Next bin shard for binding new threads. Synchronization: atomic. */
-	atomic_u_t		binshard_next;
+	atomic_u_t binshard_next;
 
 	/*
 	 * When percpu_arena is enabled, to amortize the cost of reading /
 	 * updating the current CPU id, track the most recent thread accessing
 	 * this arena, and only read CPU if there is a mismatch.
 	 */
-	tsdn_t		*last_thd;
+	tsdn_t *last_thd;
 
 	/* Synchronization: internal. */
-	arena_stats_t		stats;
+	arena_stats_t stats;
 
 	/*
 	 * Lists of tcaches and cache_bin_array_descriptors for extant threads
@@ -54,28 +54,28 @@ struct arena_s {
 	 *
 	 * Synchronization: tcache_ql_mtx.
 	 */
-	ql_head(tcache_slow_t)			tcache_ql;
-	ql_head(cache_bin_array_descriptor_t)	cache_bin_array_descriptor_ql;
-	malloc_mutex_t				tcache_ql_mtx;
+	ql_head(tcache_slow_t) tcache_ql;
+	ql_head(cache_bin_array_descriptor_t) cache_bin_array_descriptor_ql;
+	malloc_mutex_t tcache_ql_mtx;
 
 	/*
 	 * Represents a dss_prec_t, but atomically.
 	 *
 	 * Synchronization: atomic.
 	 */
-	atomic_u_t		dss_prec;
+	atomic_u_t dss_prec;
 
 	/*
 	 * Extant large allocations.
 	 *
 	 * Synchronization: large_mtx.
 	 */
-	edata_list_active_t	large;
+	edata_list_active_t large;
 	/* Synchronizes all large allocation/update/deallocation. */
-	malloc_mutex_t		large_mtx;
+	malloc_mutex_t large_mtx;
 
 	/* The page-level allocator shard this arena uses. */
-	pa_shard_t		pa_shard;
+	pa_shard_t pa_shard;
 
 	/*
 	 * A cached copy of base->ind.  This can get accessed on hot paths;
@@ -88,12 +88,12 @@ struct arena_s {
 	 *
 	 * Synchronization: internal.
 	 */
-	base_t			*base;
+	base_t *base;
 	/* Used to determine uptime.  Read-only after initialization. */
-	nstime_t		create_time;
+	nstime_t create_time;
 
 	/* The name of the arena. */
-	char 			name[ARENA_NAME_LEN];
+	char name[ARENA_NAME_LEN];
 
 	/*
 	 * The arena is allocated alongside its bins; really this is a
@@ -101,10 +101,11 @@ struct arena_s {
 	 * Enforcing cacheline-alignment to minimize the number of cachelines
 	 * touched on the hot paths.
 	 */
-	JEMALLOC_WARN_ON_USAGE("Do not use this field directly. "
-	                       "Use `arena_get_bin` instead.")
+	JEMALLOC_WARN_ON_USAGE(
+	    "Do not use this field directly. "
+	    "Use `arena_get_bin` instead.")
 	JEMALLOC_ALIGNED(CACHELINE)
-	bin_with_batch_t			all_bins[0];
+	bin_with_batch_t all_bins[0];
 };
 
 #endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_H */
diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h
index a1fc8926..7ed2b968 100644
--- a/include/jemalloc/internal/arena_types.h
+++ b/include/jemalloc/internal/arena_types.h
@@ -5,38 +5,38 @@
 #include "jemalloc/internal/sc.h"
 
 /* Default decay times in milliseconds. */
-#define DIRTY_DECAY_MS_DEFAULT	ZD(10 * 1000)
-#define MUZZY_DECAY_MS_DEFAULT	(0)
+#define DIRTY_DECAY_MS_DEFAULT ZD(10 * 1000)
+#define MUZZY_DECAY_MS_DEFAULT (0)
 /* Number of event ticks between time checks. */
-#define ARENA_DECAY_NTICKS_PER_UPDATE	1000
+#define ARENA_DECAY_NTICKS_PER_UPDATE 1000
 /* Maximum length of the arena name. */
 #define ARENA_NAME_LEN 32
 
 typedef struct arena_decay_s arena_decay_t;
-typedef struct arena_s arena_t;
+typedef struct arena_s       arena_t;
 
 typedef enum {
-	percpu_arena_mode_names_base   = 0, /* Used for options processing. */
+	percpu_arena_mode_names_base = 0, /* Used for options processing. */
 
 	/*
 	 * *_uninit are used only during bootstrapping, and must correspond
 	 * to initialized variant plus percpu_arena_mode_enabled_base.
 	 */
-	percpu_arena_uninit            = 0,
-	per_phycpu_arena_uninit        = 1,
+	percpu_arena_uninit = 0,
+	per_phycpu_arena_uninit = 1,
 
 	/* All non-disabled modes must come after percpu_arena_disabled. */
-	percpu_arena_disabled          = 2,
+	percpu_arena_disabled = 2,
 
-	percpu_arena_mode_names_limit  = 3, /* Used for options processing. */
+	percpu_arena_mode_names_limit = 3, /* Used for options processing. */
 	percpu_arena_mode_enabled_base = 3,
 
-	percpu_arena                   = 3,
-	per_phycpu_arena               = 4  /* Hyper threads share arena. */
+	percpu_arena = 3,
+	per_phycpu_arena = 4 /* Hyper threads share arena. */
 } percpu_arena_mode_t;
 
-#define PERCPU_ARENA_ENABLED(m)	((m) >= percpu_arena_mode_enabled_base)
-#define PERCPU_ARENA_DEFAULT	percpu_arena_disabled
+#define PERCPU_ARENA_ENABLED(m) ((m) >= percpu_arena_mode_enabled_base)
+#define PERCPU_ARENA_DEFAULT percpu_arena_disabled
 
 /*
  * When allocation_size >= oversize_threshold, use the dedicated huge arena
diff --git a/include/jemalloc/internal/assert.h b/include/jemalloc/internal/assert.h
index 38eb2a2c..1b5da72f 100644
--- a/include/jemalloc/internal/assert.h
+++ b/include/jemalloc/internal/assert.h
@@ -7,51 +7,57 @@
  * assertion failure.
  */
 #ifndef assert
-#define assert(e) do {							\
-	if (unlikely(config_debug && !(e))) {				\
-		malloc_printf(						\
-		    "<jemalloc>: %s:%d: Failed assertion: \"%s\"\n",	\
-		    __FILE__, __LINE__, #e);				\
-		abort();						\
-	}								\
-} while (0)
+#	define assert(e)                                                            \
+		do {                                                                 \
+			if (unlikely(config_debug && !(e))) {                        \
+				malloc_printf(                                       \
+				    "<jemalloc>: %s:%d: Failed assertion: \"%s\"\n", \
+				    __FILE__, __LINE__, #e);                         \
+				abort();                                             \
+			}                                                            \
+		} while (0)
 #endif
 
 #ifndef not_reached
-#define not_reached() do {						\
-	if (config_debug) {						\
-		malloc_printf(						\
-		    "<jemalloc>: %s:%d: Unreachable code reached\n",	\
-		    __FILE__, __LINE__);				\
-		abort();						\
-	}								\
-	unreachable();							\
-} while (0)
+#	define not_reached()                                                        \
+		do {                                                                 \
+			if (config_debug) {                                          \
+				malloc_printf(                                       \
+				    "<jemalloc>: %s:%d: Unreachable code reached\n", \
+				    __FILE__, __LINE__);                             \
+				abort();                                             \
+			}                                                            \
+			unreachable();                                               \
+		} while (0)
 #endif
 
 #ifndef not_implemented
-#define not_implemented() do {						\
-	if (config_debug) {						\
-		malloc_printf("<jemalloc>: %s:%d: Not implemented\n",	\
-		    __FILE__, __LINE__);				\
-		abort();						\
-	}								\
-} while (0)
+#	define not_implemented()                                              \
+		do {                                                           \
+			if (config_debug) {                                    \
+				malloc_printf(                                 \
+				    "<jemalloc>: %s:%d: Not implemented\n",    \
+				    __FILE__, __LINE__);                       \
+				abort();                                       \
+			}                                                      \
+		} while (0)
 #endif
 
 #ifndef assert_not_implemented
-#define assert_not_implemented(e) do {					\
-	if (unlikely(config_debug && !(e))) {				\
-		not_implemented();					\
-	}								\
-} while (0)
+#	define assert_not_implemented(e)                                      \
+		do {                                                           \
+			if (unlikely(config_debug && !(e))) {                  \
+				not_implemented();                             \
+			}                                                      \
+		} while (0)
 #endif
 
 /* Use to assert a particular configuration, e.g., cassert(config_debug). */
 #ifndef cassert
-#define cassert(c) do {							\
-	if (unlikely(!(c))) {						\
-		not_reached();						\
-	}								\
-} while (0)
+#	define cassert(c)                                                     \
+		do {                                                           \
+			if (unlikely(!(c))) {                                  \
+				not_reached();                                 \
+			}                                                      \
+		} while (0)
 #endif
diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h
index 6dd2a7c6..ddd9341e 100644
--- a/include/jemalloc/internal/atomic.h
+++ b/include/jemalloc/internal/atomic.h
@@ -5,21 +5,21 @@
 
 #define JEMALLOC_U8_ATOMICS
 #if defined(JEMALLOC_GCC_ATOMIC_ATOMICS)
-#  include "jemalloc/internal/atomic_gcc_atomic.h"
-#  if !defined(JEMALLOC_GCC_U8_ATOMIC_ATOMICS)
-#    undef JEMALLOC_U8_ATOMICS
-#  endif
+#	include "jemalloc/internal/atomic_gcc_atomic.h"
+#	if !defined(JEMALLOC_GCC_U8_ATOMIC_ATOMICS)
+#		undef JEMALLOC_U8_ATOMICS
+#	endif
 #elif defined(JEMALLOC_GCC_SYNC_ATOMICS)
-#  include "jemalloc/internal/atomic_gcc_sync.h"
-#  if !defined(JEMALLOC_GCC_U8_SYNC_ATOMICS)
-#    undef JEMALLOC_U8_ATOMICS
-#  endif
+#	include "jemalloc/internal/atomic_gcc_sync.h"
+#	if !defined(JEMALLOC_GCC_U8_SYNC_ATOMICS)
+#		undef JEMALLOC_U8_ATOMICS
+#	endif
 #elif defined(_MSC_VER)
-#  include "jemalloc/internal/atomic_msvc.h"
+#	include "jemalloc/internal/atomic_msvc.h"
 #elif defined(JEMALLOC_C11_ATOMICS)
-#  include "jemalloc/internal/atomic_c11.h"
+#	include "jemalloc/internal/atomic_c11.h"
 #else
-#  error "Don't have atomics implemented on this platform."
+#	error "Don't have atomics implemented on this platform."
 #endif
 
 #define ATOMIC_INLINE JEMALLOC_ALWAYS_INLINE
@@ -56,22 +56,19 @@
 /*
  * Another convenience -- simple atomic helper functions.
  */
-#define JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(type, short_type,	\
-    lg_size)								\
-    JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, lg_size)		\
-    ATOMIC_INLINE void							\
-    atomic_load_add_store_##short_type(atomic_##short_type##_t *a,	\
-	type inc) {							\
-	    type oldval = atomic_load_##short_type(a, ATOMIC_RELAXED);	\
-	    type newval = oldval + inc;					\
-	    atomic_store_##short_type(a, newval, ATOMIC_RELAXED);	\
-	}								\
-    ATOMIC_INLINE void							\
-    atomic_load_sub_store_##short_type(atomic_##short_type##_t *a,	\
-	type inc) {							\
-	    type oldval = atomic_load_##short_type(a, ATOMIC_RELAXED);	\
-	    type newval = oldval - inc;					\
-	    atomic_store_##short_type(a, newval, ATOMIC_RELAXED);	\
+#define JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(type, short_type, lg_size)      \
+	JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, lg_size)               \
+	ATOMIC_INLINE void atomic_load_add_store_##short_type(                 \
+	    atomic_##short_type##_t *a, type inc) {                            \
+		type oldval = atomic_load_##short_type(a, ATOMIC_RELAXED);     \
+		type newval = oldval + inc;                                    \
+		atomic_store_##short_type(a, newval, ATOMIC_RELAXED);          \
+	}                                                                      \
+	ATOMIC_INLINE void atomic_load_sub_store_##short_type(                 \
+	    atomic_##short_type##_t *a, type inc) {                            \
+		type oldval = atomic_load_##short_type(a, ATOMIC_RELAXED);     \
+		type newval = oldval - inc;                                    \
+		atomic_store_##short_type(a, newval, ATOMIC_RELAXED);          \
 	}
 
 /*
@@ -79,7 +76,7 @@
  * fact.
  */
 #if (LG_SIZEOF_PTR == 3 || LG_SIZEOF_INT == 3)
-#  define JEMALLOC_ATOMIC_U64
+#	define JEMALLOC_ATOMIC_U64
 #endif
 
 JEMALLOC_GENERATE_ATOMICS(void *, p, LG_SIZEOF_PTR)
diff --git a/include/jemalloc/internal/atomic_c11.h b/include/jemalloc/internal/atomic_c11.h
index a37e9661..1e86e2a0 100644
--- a/include/jemalloc/internal/atomic_c11.h
+++ b/include/jemalloc/internal/atomic_c11.h
@@ -66,35 +66,29 @@ atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a,	\
  * Integral types have some special operations available that non-integral ones
  * lack.
  */
-#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, 		\
-    /* unused */ lg_size)						\
-JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size)	\
-									\
-ATOMIC_INLINE type							\
-atomic_fetch_add_##short_type(atomic_##short_type##_t *a,		\
-    type val, atomic_memory_order_t mo) {				\
-	return atomic_fetch_add_explicit(a, val, mo);			\
-}									\
-									\
-ATOMIC_INLINE type							\
-atomic_fetch_sub_##short_type(atomic_##short_type##_t *a,		\
-    type val, atomic_memory_order_t mo) {				\
-	return atomic_fetch_sub_explicit(a, val, mo);			\
-}									\
-ATOMIC_INLINE type							\
-atomic_fetch_and_##short_type(atomic_##short_type##_t *a,		\
-    type val, atomic_memory_order_t mo) {				\
-	return atomic_fetch_and_explicit(a, val, mo);			\
-}									\
-ATOMIC_INLINE type							\
-atomic_fetch_or_##short_type(atomic_##short_type##_t *a,		\
-    type val, atomic_memory_order_t mo) {				\
-	return atomic_fetch_or_explicit(a, val, mo);			\
-}									\
-ATOMIC_INLINE type							\
-atomic_fetch_xor_##short_type(atomic_##short_type##_t *a,		\
-    type val, atomic_memory_order_t mo) {				\
-	return atomic_fetch_xor_explicit(a, val, mo);			\
-}
+#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, /* unused */ lg_size)  \
+	JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size)      \
+                                                                               \
+	ATOMIC_INLINE type atomic_fetch_add_##short_type(                      \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return atomic_fetch_add_explicit(a, val, mo);                  \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE type atomic_fetch_sub_##short_type(                      \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return atomic_fetch_sub_explicit(a, val, mo);                  \
+	}                                                                      \
+	ATOMIC_INLINE type atomic_fetch_and_##short_type(                      \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return atomic_fetch_and_explicit(a, val, mo);                  \
+	}                                                                      \
+	ATOMIC_INLINE type atomic_fetch_or_##short_type(                       \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return atomic_fetch_or_explicit(a, val, mo);                   \
+	}                                                                      \
+	ATOMIC_INLINE type atomic_fetch_xor_##short_type(                      \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return atomic_fetch_xor_explicit(a, val, mo);                  \
+	}
 
 #endif /* JEMALLOC_INTERNAL_ATOMIC_C11_H */
diff --git a/include/jemalloc/internal/atomic_gcc_atomic.h b/include/jemalloc/internal/atomic_gcc_atomic.h
index 0819fde1..a828a6b0 100644
--- a/include/jemalloc/internal/atomic_gcc_atomic.h
+++ b/include/jemalloc/internal/atomic_gcc_atomic.h
@@ -6,7 +6,8 @@
 
 #define ATOMIC_INLINE JEMALLOC_ALWAYS_INLINE
 
-#define ATOMIC_INIT(...) {__VA_ARGS__}
+#define ATOMIC_INIT(...)                                                       \
+	{ __VA_ARGS__ }
 
 typedef enum {
 	atomic_memory_order_relaxed,
@@ -39,95 +40,81 @@ atomic_fence(atomic_memory_order_t mo) {
 	__atomic_thread_fence(atomic_enum_to_builtin(mo));
 }
 
-#define JEMALLOC_GENERATE_ATOMICS(type, short_type,			\
-    /* unused */ lg_size)						\
-typedef struct {							\
-	type repr;							\
-} atomic_##short_type##_t;						\
-									\
-ATOMIC_INLINE type							\
-atomic_load_##short_type(const atomic_##short_type##_t *a,		\
-    atomic_memory_order_t mo) {						\
-	type result;							\
-	__atomic_load(&a->repr, &result, atomic_enum_to_builtin(mo));	\
-	return result;							\
-}									\
-									\
-ATOMIC_INLINE void							\
-atomic_store_##short_type(atomic_##short_type##_t *a, type val,		\
-    atomic_memory_order_t mo) {						\
-	__atomic_store(&a->repr, &val, atomic_enum_to_builtin(mo));	\
-}									\
-									\
-ATOMIC_INLINE type							\
-atomic_exchange_##short_type(atomic_##short_type##_t *a, type val,	\
-    atomic_memory_order_t mo) {						\
-	type result;							\
-	__atomic_exchange(&a->repr, &val, &result,			\
-	    atomic_enum_to_builtin(mo));				\
-	return result;							\
-}									\
-									\
-ATOMIC_INLINE bool							\
-atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a,	\
-    UNUSED type *expected, type desired,				\
-    atomic_memory_order_t success_mo,					\
-    atomic_memory_order_t failure_mo) {					\
-	return __atomic_compare_exchange(&a->repr, expected, &desired,	\
-	    true, atomic_enum_to_builtin(success_mo),			\
-	    atomic_enum_to_builtin(failure_mo));			\
-}									\
-									\
-ATOMIC_INLINE bool							\
-atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a,	\
-    UNUSED type *expected, type desired,				\
-    atomic_memory_order_t success_mo,					\
-    atomic_memory_order_t failure_mo) {					\
-	return __atomic_compare_exchange(&a->repr, expected, &desired,	\
-	    false,							\
-	    atomic_enum_to_builtin(success_mo),				\
-	    atomic_enum_to_builtin(failure_mo));			\
-}
+#define JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size)      \
+	typedef struct {                                                       \
+		type repr;                                                     \
+	} atomic_##short_type##_t;                                             \
+                                                                               \
+	ATOMIC_INLINE type atomic_load_##short_type(                           \
+	    const atomic_##short_type##_t *a, atomic_memory_order_t mo) {      \
+		type result;                                                   \
+		__atomic_load(&a->repr, &result, atomic_enum_to_builtin(mo));  \
+		return result;                                                 \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE void atomic_store_##short_type(                          \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		__atomic_store(&a->repr, &val, atomic_enum_to_builtin(mo));    \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE type atomic_exchange_##short_type(                       \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		type result;                                                   \
+		__atomic_exchange(                                             \
+		    &a->repr, &val, &result, atomic_enum_to_builtin(mo));      \
+		return result;                                                 \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE bool atomic_compare_exchange_weak_##short_type(          \
+	    atomic_##short_type##_t *a, UNUSED type *expected, type desired,   \
+	    atomic_memory_order_t success_mo,                                  \
+	    atomic_memory_order_t failure_mo) {                                \
+		return __atomic_compare_exchange(&a->repr, expected, &desired, \
+		    true, atomic_enum_to_builtin(success_mo),                  \
+		    atomic_enum_to_builtin(failure_mo));                       \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE bool atomic_compare_exchange_strong_##short_type(        \
+	    atomic_##short_type##_t *a, UNUSED type *expected, type desired,   \
+	    atomic_memory_order_t success_mo,                                  \
+	    atomic_memory_order_t failure_mo) {                                \
+		return __atomic_compare_exchange(&a->repr, expected, &desired, \
+		    false, atomic_enum_to_builtin(success_mo),                 \
+		    atomic_enum_to_builtin(failure_mo));                       \
+	}
 
-
-#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type,			\
-    /* unused */ lg_size)						\
-JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size)	\
-									\
-ATOMIC_INLINE type							\
-atomic_fetch_add_##short_type(atomic_##short_type##_t *a, type val,	\
-    atomic_memory_order_t mo) {						\
-	return __atomic_fetch_add(&a->repr, val,			\
-	    atomic_enum_to_builtin(mo));				\
-}									\
-									\
-ATOMIC_INLINE type							\
-atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, type val,	\
-    atomic_memory_order_t mo) {						\
-	return __atomic_fetch_sub(&a->repr, val,			\
-	    atomic_enum_to_builtin(mo));				\
-}									\
-									\
-ATOMIC_INLINE type							\
-atomic_fetch_and_##short_type(atomic_##short_type##_t *a, type val,	\
-    atomic_memory_order_t mo) {						\
-	return __atomic_fetch_and(&a->repr, val,			\
-	    atomic_enum_to_builtin(mo));				\
-}									\
-									\
-ATOMIC_INLINE type							\
-atomic_fetch_or_##short_type(atomic_##short_type##_t *a, type val,	\
-    atomic_memory_order_t mo) {						\
-	return __atomic_fetch_or(&a->repr, val,				\
-	    atomic_enum_to_builtin(mo));				\
-}									\
-									\
-ATOMIC_INLINE type							\
-atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val,	\
-    atomic_memory_order_t mo) {						\
-	return __atomic_fetch_xor(&a->repr, val,			\
-	    atomic_enum_to_builtin(mo));				\
-}
+#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, /* unused */ lg_size)  \
+	JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size)      \
+                                                                               \
+	ATOMIC_INLINE type atomic_fetch_add_##short_type(                      \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return __atomic_fetch_add(                                     \
+		    &a->repr, val, atomic_enum_to_builtin(mo));                \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE type atomic_fetch_sub_##short_type(                      \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return __atomic_fetch_sub(                                     \
+		    &a->repr, val, atomic_enum_to_builtin(mo));                \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE type atomic_fetch_and_##short_type(                      \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return __atomic_fetch_and(                                     \
+		    &a->repr, val, atomic_enum_to_builtin(mo));                \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE type atomic_fetch_or_##short_type(                       \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return __atomic_fetch_or(                                      \
+		    &a->repr, val, atomic_enum_to_builtin(mo));                \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE type atomic_fetch_xor_##short_type(                      \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return __atomic_fetch_xor(                                     \
+		    &a->repr, val, atomic_enum_to_builtin(mo));                \
+	}
 
 #undef ATOMIC_INLINE
 
diff --git a/include/jemalloc/internal/atomic_gcc_sync.h b/include/jemalloc/internal/atomic_gcc_sync.h
index 801d6197..9e2ff9c8 100644
--- a/include/jemalloc/internal/atomic_gcc_sync.h
+++ b/include/jemalloc/internal/atomic_gcc_sync.h
@@ -5,7 +5,8 @@
 
 #define ATOMIC_INLINE JEMALLOC_ALWAYS_INLINE
 
-#define ATOMIC_INIT(...) {__VA_ARGS__}
+#define ATOMIC_INIT(...)                                                       \
+	{ __VA_ARGS__ }
 
 typedef enum {
 	atomic_memory_order_relaxed,
@@ -29,13 +30,13 @@ atomic_fence(atomic_memory_order_t mo) {
 		return;
 	}
 	asm volatile("" ::: "memory");
-#  if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__)
 	/* This is implicit on x86. */
-#  elif defined(__ppc64__)
+#elif defined(__ppc64__)
 	asm volatile("lwsync");
-#  elif defined(__ppc__)
+#elif defined(__ppc__)
 	asm volatile("sync");
-#  elif defined(__sparc__) && defined(__arch64__)
+#elif defined(__sparc__) && defined(__arch64__)
 	if (mo == atomic_memory_order_acquire) {
 		asm volatile("membar #LoadLoad | #LoadStore");
 	} else if (mo == atomic_memory_order_release) {
@@ -43,9 +44,9 @@ atomic_fence(atomic_memory_order_t mo) {
 	} else {
 		asm volatile("membar #LoadLoad | #LoadStore | #StoreStore");
 	}
-#  else
+#else
 	__sync_synchronize();
-#  endif
+#endif
 	asm volatile("" ::: "memory");
 }
 
@@ -68,23 +69,22 @@ atomic_fence(atomic_memory_order_t mo) {
 
 ATOMIC_INLINE void
 atomic_pre_sc_load_fence() {
-#  if defined(__i386__) || defined(__x86_64__) ||			\
-    (defined(__sparc__) && defined(__arch64__))
+#if defined(__i386__) || defined(__x86_64__)                                   \
+    || (defined(__sparc__) && defined(__arch64__))
 	atomic_fence(atomic_memory_order_relaxed);
-#  else
+#else
 	atomic_fence(atomic_memory_order_seq_cst);
-#  endif
+#endif
 }
 
 ATOMIC_INLINE void
 atomic_post_sc_store_fence() {
-#  if defined(__i386__) || defined(__x86_64__) ||			\
-    (defined(__sparc__) && defined(__arch64__))
+#if defined(__i386__) || defined(__x86_64__)                                   \
+    || (defined(__sparc__) && defined(__arch64__))
 	atomic_fence(atomic_memory_order_seq_cst);
-#  else
+#else
 	atomic_fence(atomic_memory_order_relaxed);
-#  endif
-
+#endif
 }
 
 /* clang-format off */
@@ -164,39 +164,33 @@ atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a,	\
 }
 /* clang-format on */
 
-#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type,			\
-    /* unused */ lg_size)						\
-JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size)	\
-									\
-ATOMIC_INLINE type							\
-atomic_fetch_add_##short_type(atomic_##short_type##_t *a, type val,	\
-    atomic_memory_order_t mo) {						\
-	return __sync_fetch_and_add(&a->repr, val);			\
-}									\
-									\
-ATOMIC_INLINE type							\
-atomic_fetch_sub_##short_type(atomic_##short_type##_t *a, type val,	\
-    atomic_memory_order_t mo) {						\
-	return __sync_fetch_and_sub(&a->repr, val);			\
-}									\
-									\
-ATOMIC_INLINE type							\
-atomic_fetch_and_##short_type(atomic_##short_type##_t *a, type val,	\
-    atomic_memory_order_t mo) {						\
-	return __sync_fetch_and_and(&a->repr, val);			\
-}									\
-									\
-ATOMIC_INLINE type							\
-atomic_fetch_or_##short_type(atomic_##short_type##_t *a, type val,	\
-    atomic_memory_order_t mo) {						\
-	return __sync_fetch_and_or(&a->repr, val);			\
-}									\
-									\
-ATOMIC_INLINE type							\
-atomic_fetch_xor_##short_type(atomic_##short_type##_t *a, type val,	\
-    atomic_memory_order_t mo) {						\
-	return __sync_fetch_and_xor(&a->repr, val);			\
-}
+#define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, /* unused */ lg_size)  \
+	JEMALLOC_GENERATE_ATOMICS(type, short_type, /* unused */ lg_size)      \
+                                                                               \
+	ATOMIC_INLINE type atomic_fetch_add_##short_type(                      \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return __sync_fetch_and_add(&a->repr, val);                    \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE type atomic_fetch_sub_##short_type(                      \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return __sync_fetch_and_sub(&a->repr, val);                    \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE type atomic_fetch_and_##short_type(                      \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return __sync_fetch_and_and(&a->repr, val);                    \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE type atomic_fetch_or_##short_type(                       \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return __sync_fetch_and_or(&a->repr, val);                     \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE type atomic_fetch_xor_##short_type(                      \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return __sync_fetch_and_xor(&a->repr, val);                    \
+	}
 
 #undef ATOMIC_INLINE
 
diff --git a/include/jemalloc/internal/atomic_msvc.h b/include/jemalloc/internal/atomic_msvc.h
index 5313aed9..7accca63 100644
--- a/include/jemalloc/internal/atomic_msvc.h
+++ b/include/jemalloc/internal/atomic_msvc.h
@@ -5,7 +5,8 @@
 
 #define ATOMIC_INLINE JEMALLOC_ALWAYS_INLINE
 
-#define ATOMIC_INIT(...) {__VA_ARGS__}
+#define ATOMIC_INIT(...)                                                       \
+	{ __VA_ARGS__ }
 
 typedef enum {
 	atomic_memory_order_relaxed,
@@ -15,108 +16,104 @@ typedef enum {
 	atomic_memory_order_seq_cst
 } atomic_memory_order_t;
 
-typedef char atomic_repr_0_t;
-typedef short atomic_repr_1_t;
-typedef long atomic_repr_2_t;
+typedef char    atomic_repr_0_t;
+typedef short   atomic_repr_1_t;
+typedef long    atomic_repr_2_t;
 typedef __int64 atomic_repr_3_t;
 
 ATOMIC_INLINE void
 atomic_fence(atomic_memory_order_t mo) {
 	_ReadWriteBarrier();
-#  if defined(_M_ARM) || defined(_M_ARM64)
+#if defined(_M_ARM) || defined(_M_ARM64)
 	/* ARM needs a barrier for everything but relaxed. */
 	if (mo != atomic_memory_order_relaxed) {
 		MemoryBarrier();
 	}
-#  elif defined(_M_IX86) || defined (_M_X64)
+#elif defined(_M_IX86) || defined(_M_X64)
 	/* x86 needs a barrier only for seq_cst. */
 	if (mo == atomic_memory_order_seq_cst) {
 		MemoryBarrier();
 	}
-#  else
-#  error "Don't know how to create atomics for this platform for MSVC."
-#  endif
+#else
+#	error "Don't know how to create atomics for this platform for MSVC."
+#endif
 	_ReadWriteBarrier();
 }
 
-#define ATOMIC_INTERLOCKED_REPR(lg_size) atomic_repr_ ## lg_size ## _t
+#define ATOMIC_INTERLOCKED_REPR(lg_size) atomic_repr_##lg_size##_t
 
 #define ATOMIC_CONCAT(a, b) ATOMIC_RAW_CONCAT(a, b)
-#define ATOMIC_RAW_CONCAT(a, b) a ## b
+#define ATOMIC_RAW_CONCAT(a, b) a##b
 
-#define ATOMIC_INTERLOCKED_NAME(base_name, lg_size) ATOMIC_CONCAT(	\
-    base_name, ATOMIC_INTERLOCKED_SUFFIX(lg_size))
+#define ATOMIC_INTERLOCKED_NAME(base_name, lg_size)                            \
+	ATOMIC_CONCAT(base_name, ATOMIC_INTERLOCKED_SUFFIX(lg_size))
 
-#define ATOMIC_INTERLOCKED_SUFFIX(lg_size)				\
-    ATOMIC_CONCAT(ATOMIC_INTERLOCKED_SUFFIX_, lg_size)
+#define ATOMIC_INTERLOCKED_SUFFIX(lg_size)                                     \
+	ATOMIC_CONCAT(ATOMIC_INTERLOCKED_SUFFIX_, lg_size)
 
 #define ATOMIC_INTERLOCKED_SUFFIX_0 8
 #define ATOMIC_INTERLOCKED_SUFFIX_1 16
 #define ATOMIC_INTERLOCKED_SUFFIX_2
 #define ATOMIC_INTERLOCKED_SUFFIX_3 64
 
-#define JEMALLOC_GENERATE_ATOMICS(type, short_type, lg_size)		\
-typedef struct {							\
-	ATOMIC_INTERLOCKED_REPR(lg_size) repr;				\
-} atomic_##short_type##_t;						\
-									\
-ATOMIC_INLINE type							\
-atomic_load_##short_type(const atomic_##short_type##_t *a,		\
-    atomic_memory_order_t mo) {						\
-	ATOMIC_INTERLOCKED_REPR(lg_size) ret = a->repr;			\
-	if (mo != atomic_memory_order_relaxed) {			\
-		atomic_fence(atomic_memory_order_acquire);		\
-	}								\
-	return (type) ret;						\
-}									\
-									\
-ATOMIC_INLINE void							\
-atomic_store_##short_type(atomic_##short_type##_t *a,			\
-    type val, atomic_memory_order_t mo) {				\
-	if (mo != atomic_memory_order_relaxed) {			\
-		atomic_fence(atomic_memory_order_release);		\
-	}								\
-	a->repr = (ATOMIC_INTERLOCKED_REPR(lg_size)) val;		\
-	if (mo == atomic_memory_order_seq_cst) {			\
-		atomic_fence(atomic_memory_order_seq_cst);		\
-	}								\
-}									\
-									\
-ATOMIC_INLINE type							\
-atomic_exchange_##short_type(atomic_##short_type##_t *a, type val,	\
-    atomic_memory_order_t mo) {						\
-	return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedExchange,	\
-	    lg_size)(&a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val);	\
-}									\
-									\
-ATOMIC_INLINE bool							\
-atomic_compare_exchange_weak_##short_type(atomic_##short_type##_t *a,	\
-    type *expected, type desired, atomic_memory_order_t success_mo,	\
-    atomic_memory_order_t failure_mo) {					\
-	ATOMIC_INTERLOCKED_REPR(lg_size) e =				\
-	    (ATOMIC_INTERLOCKED_REPR(lg_size))*expected;		\
-	ATOMIC_INTERLOCKED_REPR(lg_size) d =				\
-	    (ATOMIC_INTERLOCKED_REPR(lg_size))desired;			\
-	ATOMIC_INTERLOCKED_REPR(lg_size) old =				\
-	    ATOMIC_INTERLOCKED_NAME(_InterlockedCompareExchange, 	\
-		lg_size)(&a->repr, d, e);				\
-	if (old == e) {							\
-		return true;						\
-	} else {							\
-		*expected = (type)old;					\
-		return false;						\
-	}								\
-}									\
-									\
-ATOMIC_INLINE bool							\
-atomic_compare_exchange_strong_##short_type(atomic_##short_type##_t *a,	\
-    type *expected, type desired, atomic_memory_order_t success_mo,	\
-    atomic_memory_order_t failure_mo) {					\
-	/* We implement the weak version with strong semantics. */	\
-	return atomic_compare_exchange_weak_##short_type(a, expected,	\
-	    desired, success_mo, failure_mo);				\
-}
-
+#define JEMALLOC_GENERATE_ATOMICS(type, short_type, lg_size)                   \
+	typedef struct {                                                       \
+		ATOMIC_INTERLOCKED_REPR(lg_size) repr;                         \
+	} atomic_##short_type##_t;                                             \
+                                                                               \
+	ATOMIC_INLINE type atomic_load_##short_type(                           \
+	    const atomic_##short_type##_t *a, atomic_memory_order_t mo) {      \
+		ATOMIC_INTERLOCKED_REPR(lg_size) ret = a->repr;                \
+		if (mo != atomic_memory_order_relaxed) {                       \
+			atomic_fence(atomic_memory_order_acquire);             \
+		}                                                              \
+		return (type)ret;                                              \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE void atomic_store_##short_type(                          \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		if (mo != atomic_memory_order_relaxed) {                       \
+			atomic_fence(atomic_memory_order_release);             \
+		}                                                              \
+		a->repr = (ATOMIC_INTERLOCKED_REPR(lg_size))val;               \
+		if (mo == atomic_memory_order_seq_cst) {                       \
+			atomic_fence(atomic_memory_order_seq_cst);             \
+		}                                                              \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE type atomic_exchange_##short_type(                       \
+	    atomic_##short_type##_t *a, type val, atomic_memory_order_t mo) {  \
+		return (type)ATOMIC_INTERLOCKED_NAME(_InterlockedExchange,     \
+		    lg_size)(&a->repr, (ATOMIC_INTERLOCKED_REPR(lg_size))val); \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE bool atomic_compare_exchange_weak_##short_type(          \
+	    atomic_##short_type##_t *a, type *expected, type desired,          \
+	    atomic_memory_order_t success_mo,                                  \
+	    atomic_memory_order_t failure_mo) {                                \
+		ATOMIC_INTERLOCKED_REPR(lg_size)                               \
+		e = (ATOMIC_INTERLOCKED_REPR(lg_size)) * expected;             \
+		ATOMIC_INTERLOCKED_REPR(lg_size)                               \
+		d = (ATOMIC_INTERLOCKED_REPR(lg_size))desired;                 \
+		ATOMIC_INTERLOCKED_REPR(lg_size)                               \
+		old = ATOMIC_INTERLOCKED_NAME(                                 \
+		    _InterlockedCompareExchange, lg_size)(&a->repr, d, e);     \
+		if (old == e) {                                                \
+			return true;                                           \
+		} else {                                                       \
+			*expected = (type)old;                                 \
+			return false;                                          \
+		}                                                              \
+	}                                                                      \
+                                                                               \
+	ATOMIC_INLINE bool atomic_compare_exchange_strong_##short_type(        \
+	    atomic_##short_type##_t *a, type *expected, type desired,          \
+	    atomic_memory_order_t success_mo,                                  \
+	    atomic_memory_order_t failure_mo) {                                \
+		/* We implement the weak version with strong semantics. */     \
+		return atomic_compare_exchange_weak_##short_type(              \
+		    a, expected, desired, success_mo, failure_mo);             \
+	}
 
 /* clang-format off */
 #define JEMALLOC_GENERATE_INT_ATOMICS(type, short_type, lg_size)	\
diff --git a/include/jemalloc/internal/background_thread_externs.h b/include/jemalloc/internal/background_thread_externs.h
index 0d34ee55..efc0aaa4 100644
--- a/include/jemalloc/internal/background_thread_externs.h
+++ b/include/jemalloc/internal/background_thread_externs.h
@@ -6,26 +6,26 @@
 #include "jemalloc/internal/base.h"
 #include "jemalloc/internal/mutex.h"
 
-extern bool opt_background_thread;
-extern size_t opt_max_background_threads;
-extern malloc_mutex_t background_thread_lock;
-extern atomic_b_t background_thread_enabled_state;
-extern size_t n_background_threads;
-extern size_t max_background_threads;
+extern bool                      opt_background_thread;
+extern size_t                    opt_max_background_threads;
+extern malloc_mutex_t            background_thread_lock;
+extern atomic_b_t                background_thread_enabled_state;
+extern size_t                    n_background_threads;
+extern size_t                    max_background_threads;
 extern background_thread_info_t *background_thread_info;
 
 bool background_thread_create(tsd_t *tsd, unsigned arena_ind);
 bool background_threads_enable(tsd_t *tsd);
 bool background_threads_disable(tsd_t *tsd);
-bool background_thread_is_started(background_thread_info_t* info);
-void background_thread_wakeup_early(background_thread_info_t *info,
-    nstime_t *remaining_sleep);
+bool background_thread_is_started(background_thread_info_t *info);
+void background_thread_wakeup_early(
+    background_thread_info_t *info, nstime_t *remaining_sleep);
 void background_thread_prefork0(tsdn_t *tsdn);
 void background_thread_prefork1(tsdn_t *tsdn);
 void background_thread_postfork_parent(tsdn_t *tsdn);
 void background_thread_postfork_child(tsdn_t *tsdn);
-bool background_thread_stats_read(tsdn_t *tsdn,
-    background_thread_stats_t *stats);
+bool background_thread_stats_read(
+    tsdn_t *tsdn, background_thread_stats_t *stats);
 void background_thread_ctl_init(tsdn_t *tsdn);
 
 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
diff --git a/include/jemalloc/internal/background_thread_inlines.h b/include/jemalloc/internal/background_thread_inlines.h
index fd3884f1..e822a3f7 100644
--- a/include/jemalloc/internal/background_thread_inlines.h
+++ b/include/jemalloc/internal/background_thread_inlines.h
@@ -36,14 +36,14 @@ background_thread_info_get(size_t ind) {
 JEMALLOC_ALWAYS_INLINE uint64_t
 background_thread_wakeup_time_get(background_thread_info_t *info) {
 	uint64_t next_wakeup = nstime_ns(&info->next_wakeup);
-	assert(atomic_load_b(&info->indefinite_sleep, ATOMIC_ACQUIRE) ==
-	    (next_wakeup == BACKGROUND_THREAD_INDEFINITE_SLEEP));
+	assert(atomic_load_b(&info->indefinite_sleep, ATOMIC_ACQUIRE)
+	    == (next_wakeup == BACKGROUND_THREAD_INDEFINITE_SLEEP));
 	return next_wakeup;
 }
 
 JEMALLOC_ALWAYS_INLINE void
-background_thread_wakeup_time_set(tsdn_t *tsdn, background_thread_info_t *info,
-    uint64_t wakeup_time) {
+background_thread_wakeup_time_set(
+    tsdn_t *tsdn, background_thread_info_t *info, uint64_t wakeup_time) {
 	malloc_mutex_assert_owner(tsdn, &info->mtx);
 	atomic_store_b(&info->indefinite_sleep,
 	    wakeup_time == BACKGROUND_THREAD_INDEFINITE_SLEEP, ATOMIC_RELEASE);
diff --git a/include/jemalloc/internal/background_thread_structs.h b/include/jemalloc/internal/background_thread_structs.h
index 67b68797..d56673da 100644
--- a/include/jemalloc/internal/background_thread_structs.h
+++ b/include/jemalloc/internal/background_thread_structs.h
@@ -7,7 +7,7 @@
 /* This file really combines "structs" and "types", but only transitionally. */
 
 #if defined(JEMALLOC_BACKGROUND_THREAD) || defined(JEMALLOC_LAZY_LOCK)
-#  define JEMALLOC_PTHREAD_CREATE_WRAPPER
+#	define JEMALLOC_PTHREAD_CREATE_WRAPPER
 #endif
 
 #define BACKGROUND_THREAD_INDEFINITE_SLEEP UINT64_MAX
@@ -35,33 +35,33 @@ typedef enum {
 struct background_thread_info_s {
 #ifdef JEMALLOC_BACKGROUND_THREAD
 	/* Background thread is pthread specific. */
-	pthread_t		thread;
-	pthread_cond_t		cond;
+	pthread_t      thread;
+	pthread_cond_t cond;
 #endif
-	malloc_mutex_t		mtx;
-	background_thread_state_t	state;
+	malloc_mutex_t            mtx;
+	background_thread_state_t state;
 	/* When true, it means no wakeup scheduled. */
-	atomic_b_t		indefinite_sleep;
+	atomic_b_t indefinite_sleep;
 	/* Next scheduled wakeup time (absolute time in ns). */
-	nstime_t		next_wakeup;
+	nstime_t next_wakeup;
 	/*
 	 *  Since the last background thread run, newly added number of pages
 	 *  that need to be purged by the next wakeup.  This is adjusted on
 	 *  epoch advance, and is used to determine whether we should signal the
 	 *  background thread to wake up earlier.
 	 */
-	size_t			npages_to_purge_new;
+	size_t npages_to_purge_new;
 	/* Stats: total number of runs since started. */
-	uint64_t		tot_n_runs;
+	uint64_t tot_n_runs;
 	/* Stats: total sleep time since started. */
-	nstime_t		tot_sleep_time;
+	nstime_t tot_sleep_time;
 };
 typedef struct background_thread_info_s background_thread_info_t;
 
 struct background_thread_stats_s {
-	size_t num_threads;
-	uint64_t num_runs;
-	nstime_t run_interval;
+	size_t            num_threads;
+	uint64_t          num_runs;
+	nstime_t          run_interval;
 	mutex_prof_data_t max_counter_per_bg_thd;
 };
 typedef struct background_thread_stats_s background_thread_stats_t;
diff --git a/include/jemalloc/internal/base.h b/include/jemalloc/internal/base.h
index c8004b25..f71a874c 100644
--- a/include/jemalloc/internal/base.h
+++ b/include/jemalloc/internal/base.h
@@ -13,7 +13,7 @@
 #define BASE_BLOCK_MIN_ALIGN ((size_t)2 << 20)
 
 enum metadata_thp_mode_e {
-	metadata_thp_disabled   = 0,
+	metadata_thp_disabled = 0,
 	/*
 	 * Lazily enable hugepage for metadata. To avoid high RSS caused by THP
 	 * + low usage arena (i.e. THP becomes a significant percentage), the
@@ -22,15 +22,15 @@ enum metadata_thp_mode_e {
 	 * arena), "auto" behaves the same as "always", i.e. madvise hugepage
 	 * right away.
 	 */
-	metadata_thp_auto       = 1,
-	metadata_thp_always     = 2,
+	metadata_thp_auto = 1,
+	metadata_thp_always = 2,
 	metadata_thp_mode_limit = 3
 };
 typedef enum metadata_thp_mode_e metadata_thp_mode_t;
 
 #define METADATA_THP_DEFAULT metadata_thp_disabled
 extern metadata_thp_mode_t opt_metadata_thp;
-extern const char *const metadata_thp_mode_names[];
+extern const char *const   metadata_thp_mode_names[];
 
 /* Embedded at the beginning of every block of base-managed virtual memory. */
 typedef struct base_block_s base_block_t;
@@ -102,24 +102,24 @@ metadata_thp_enabled(void) {
 }
 
 base_t *b0get(void);
-base_t *base_new(tsdn_t *tsdn, unsigned ind,
-    const extent_hooks_t *extent_hooks, bool metadata_use_hooks);
-void base_delete(tsdn_t *tsdn, base_t *base);
-ehooks_t *base_ehooks_get(base_t *base);
-ehooks_t *base_ehooks_get_for_metadata(base_t *base);
-extent_hooks_t *base_extent_hooks_set(base_t *base,
-    extent_hooks_t *extent_hooks);
-void *base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment);
+base_t *base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks,
+    bool metadata_use_hooks);
+void    base_delete(tsdn_t *tsdn, base_t *base);
+ehooks_t       *base_ehooks_get(base_t *base);
+ehooks_t       *base_ehooks_get_for_metadata(base_t *base);
+extent_hooks_t *base_extent_hooks_set(
+    base_t *base, extent_hooks_t *extent_hooks);
+void    *base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment);
 edata_t *base_alloc_edata(tsdn_t *tsdn, base_t *base);
-void *base_alloc_rtree(tsdn_t *tsdn, base_t *base, size_t size);
-void *b0_alloc_tcache_stack(tsdn_t *tsdn, size_t size);
-void b0_dalloc_tcache_stack(tsdn_t *tsdn, void *tcache_stack);
-void base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated,
-    size_t *edata_allocated, size_t *rtree_allocated, size_t *resident,
-    size_t *mapped, size_t *n_thp);
-void base_prefork(tsdn_t *tsdn, base_t *base);
-void base_postfork_parent(tsdn_t *tsdn, base_t *base);
-void base_postfork_child(tsdn_t *tsdn, base_t *base);
-bool base_boot(tsdn_t *tsdn);
+void    *base_alloc_rtree(tsdn_t *tsdn, base_t *base, size_t size);
+void    *b0_alloc_tcache_stack(tsdn_t *tsdn, size_t size);
+void     b0_dalloc_tcache_stack(tsdn_t *tsdn, void *tcache_stack);
+void     base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated,
+        size_t *edata_allocated, size_t *rtree_allocated, size_t *resident,
+        size_t *mapped, size_t *n_thp);
+void     base_prefork(tsdn_t *tsdn, base_t *base);
+void     base_postfork_parent(tsdn_t *tsdn, base_t *base);
+void     base_postfork_child(tsdn_t *tsdn, base_t *base);
+bool     base_boot(tsdn_t *tsdn);
 
 #endif /* JEMALLOC_INTERNAL_BASE_H */
diff --git a/include/jemalloc/internal/batcher.h b/include/jemalloc/internal/batcher.h
index 40c8b35f..3ceb8256 100644
--- a/include/jemalloc/internal/batcher.h
+++ b/include/jemalloc/internal/batcher.h
@@ -5,7 +5,7 @@
 #include "jemalloc/internal/atomic.h"
 #include "jemalloc/internal/mutex.h"
 
-#define BATCHER_NO_IDX ((size_t)-1)
+#define BATCHER_NO_IDX ((size_t) - 1)
 
 typedef struct batcher_s batcher_t;
 struct batcher_s {
@@ -14,9 +14,9 @@ struct batcher_s {
 	 * togehter, along with the front of the mutex. The end of the mutex is
 	 * only touched if there's contention.
 	 */
-	atomic_zu_t nelems;
-	size_t nelems_max;
-	size_t npushes;
+	atomic_zu_t    nelems;
+	size_t         nelems_max;
+	size_t         npushes;
 	malloc_mutex_t mtx;
 };
 
@@ -27,8 +27,8 @@ void batcher_init(batcher_t *batcher, size_t nelems_max);
  * BATCHER_NO_IDX if no index is free.  If the former, the caller must call
  * batcher_push_end once done.
  */
-size_t batcher_push_begin(tsdn_t *tsdn, batcher_t *batcher,
-    size_t elems_to_push);
+size_t batcher_push_begin(
+    tsdn_t *tsdn, batcher_t *batcher, size_t elems_to_push);
 void batcher_push_end(tsdn_t *tsdn, batcher_t *batcher);
 
 /*
@@ -37,7 +37,7 @@ void batcher_push_end(tsdn_t *tsdn, batcher_t *batcher);
  */
 size_t batcher_pop_begin(tsdn_t *tsdn, batcher_t *batcher);
 size_t batcher_pop_get_pushes(tsdn_t *tsdn, batcher_t *batcher);
-void batcher_pop_end(tsdn_t *tsdn, batcher_t *batcher);
+void   batcher_pop_end(tsdn_t *tsdn, batcher_t *batcher);
 
 void batcher_prefork(tsdn_t *tsdn, batcher_t *batcher);
 void batcher_postfork_parent(tsdn_t *tsdn, batcher_t *batcher);
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index c49afea6..e91583d7 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -14,8 +14,8 @@
 #ifdef JEMALLOC_JET
 extern void (*bin_batching_test_after_push_hook)(size_t idx);
 extern void (*bin_batching_test_mid_pop_hook)(size_t elems_to_pop);
-extern void (*bin_batching_test_after_unlock_hook)(unsigned slab_dalloc_count,
-    bool list_empty);
+extern void (*bin_batching_test_after_unlock_hook)(
+    unsigned slab_dalloc_count, bool list_empty);
 #endif
 
 #ifdef JEMALLOC_JET
@@ -50,8 +50,8 @@ bin_batching_test_after_unlock(unsigned slab_dalloc_count, bool list_empty) {
 	(void)list_empty;
 #ifdef JEMALLOC_JET
 	if (bin_batching_test_after_unlock_hook != NULL) {
-		bin_batching_test_after_unlock_hook(slab_dalloc_count,
-		    list_empty);
+		bin_batching_test_after_unlock_hook(
+		    slab_dalloc_count, list_empty);
 	}
 #endif
 }
@@ -63,13 +63,13 @@ bin_batching_test_after_unlock(unsigned slab_dalloc_count, bool list_empty) {
 typedef struct bin_s bin_t;
 struct bin_s {
 	/* All operations on bin_t fields require lock ownership. */
-	malloc_mutex_t		lock;
+	malloc_mutex_t lock;
 
 	/*
 	 * Bin statistics.  These get touched every time the lock is acquired,
 	 * so put them close by in the hopes of getting some cache locality.
 	 */
-	bin_stats_t	stats;
+	bin_stats_t stats;
 
 	/*
 	 * Current slab being used to service allocations of this bin's size
@@ -77,29 +77,29 @@ struct bin_s {
 	 * slabcur is reassigned, the previous slab must be deallocated or
 	 * inserted into slabs_{nonfull,full}.
 	 */
-	edata_t			*slabcur;
+	edata_t *slabcur;
 
 	/*
 	 * Heap of non-full slabs.  This heap is used to assure that new
 	 * allocations come from the non-full slab that is oldest/lowest in
 	 * memory.
 	 */
-	edata_heap_t		slabs_nonfull;
+	edata_heap_t slabs_nonfull;
 
 	/* List used to track full slabs. */
-	edata_list_active_t	slabs_full;
+	edata_list_active_t slabs_full;
 };
 
 typedef struct bin_remote_free_data_s bin_remote_free_data_t;
 struct bin_remote_free_data_s {
-	void *ptr;
+	void    *ptr;
 	edata_t *slab;
 };
 
 typedef struct bin_with_batch_s bin_with_batch_t;
 struct bin_with_batch_s {
-	bin_t bin;
-	batcher_t remote_frees;
+	bin_t                  bin;
+	batcher_t              remote_frees;
 	bin_remote_free_data_t remote_free_data[BIN_REMOTE_FREE_ELEMS_MAX];
 };
 
diff --git a/include/jemalloc/internal/bin_info.h b/include/jemalloc/internal/bin_info.h
index 88d58c91..0022c3f7 100644
--- a/include/jemalloc/internal/bin_info.h
+++ b/include/jemalloc/internal/bin_info.h
@@ -26,22 +26,22 @@
 typedef struct bin_info_s bin_info_t;
 struct bin_info_s {
 	/* Size of regions in a slab for this bin's size class. */
-	size_t			reg_size;
+	size_t reg_size;
 
 	/* Total size of a slab for this bin's size class. */
-	size_t			slab_size;
+	size_t slab_size;
 
 	/* Total number of regions in a slab for this bin's size class. */
-	uint32_t		nregs;
+	uint32_t nregs;
 
 	/* Number of sharded bins in each arena for this size class. */
-	uint32_t		n_shards;
+	uint32_t n_shards;
 
 	/*
 	 * Metadata used to manipulate bitmaps for slabs associated with this
 	 * bin.
 	 */
-	bitmap_info_t		bitmap_info;
+	bitmap_info_t bitmap_info;
 };
 
 /* The maximum size a size class can be and still get batching behavior. */
@@ -51,7 +51,7 @@ extern size_t opt_bin_info_remote_free_max_batch;
 // The max number of pending elems (across all batches)
 extern size_t opt_bin_info_remote_free_max;
 
-extern szind_t bin_info_nbatched_sizes;
+extern szind_t  bin_info_nbatched_sizes;
 extern unsigned bin_info_nbatched_bins;
 extern unsigned bin_info_nunbatched_bins;
 
diff --git a/include/jemalloc/internal/bin_stats.h b/include/jemalloc/internal/bin_stats.h
index 334c166d..e1095f38 100644
--- a/include/jemalloc/internal/bin_stats.h
+++ b/include/jemalloc/internal/bin_stats.h
@@ -12,52 +12,52 @@ struct bin_stats_s {
 	 * many times, resulting many increments to nrequests, but only one
 	 * each to nmalloc and ndalloc.
 	 */
-	uint64_t	nmalloc;
-	uint64_t	ndalloc;
+	uint64_t nmalloc;
+	uint64_t ndalloc;
 
 	/*
 	 * Number of allocation requests that correspond to the size of this
 	 * bin.  This includes requests served by tcache, though tcache only
 	 * periodically merges into this counter.
 	 */
-	uint64_t	nrequests;
+	uint64_t nrequests;
 
 	/*
 	 * Current number of regions of this size class, including regions
 	 * currently cached by tcache.
 	 */
-	size_t		curregs;
+	size_t curregs;
 
 	/* Number of tcache fills from this bin. */
-	uint64_t	nfills;
+	uint64_t nfills;
 
 	/* Number of tcache flushes to this bin. */
-	uint64_t	nflushes;
+	uint64_t nflushes;
 
 	/* Total number of slabs created for this bin's size class. */
-	uint64_t	nslabs;
+	uint64_t nslabs;
 
 	/*
 	 * Total number of slabs reused by extracting them from the slabs heap
 	 * for this bin's size class.
 	 */
-	uint64_t	reslabs;
+	uint64_t reslabs;
 
 	/* Current number of slabs in this bin. */
-	size_t		curslabs;
+	size_t curslabs;
 
 	/* Current size of nonfull slabs heap in this bin. */
-	size_t		nonfull_slabs;
+	size_t nonfull_slabs;
 
-	uint64_t	batch_pops;
-	uint64_t	batch_failed_pushes;
-	uint64_t	batch_pushes;
-	uint64_t	batch_pushed_elems;
+	uint64_t batch_pops;
+	uint64_t batch_failed_pushes;
+	uint64_t batch_pushes;
+	uint64_t batch_pushed_elems;
 };
 
 typedef struct bin_stats_data_s bin_stats_data_t;
 struct bin_stats_data_s {
-	bin_stats_t stats_data;
+	bin_stats_t       stats_data;
 	mutex_prof_data_t mutex_data;
 };
 #endif /* JEMALLOC_INTERNAL_BIN_STATS_H */
diff --git a/include/jemalloc/internal/bin_types.h b/include/jemalloc/internal/bin_types.h
index 5ec22dfd..b6bad37e 100644
--- a/include/jemalloc/internal/bin_types.h
+++ b/include/jemalloc/internal/bin_types.h
@@ -8,7 +8,10 @@
 #define N_BIN_SHARDS_DEFAULT 1
 
 /* Used in TSD static initializer only. Real init in arena_bind(). */
-#define TSD_BINSHARDS_ZERO_INITIALIZER {{UINT8_MAX}}
+#define TSD_BINSHARDS_ZERO_INITIALIZER                                         \
+	{                                                                      \
+		{ UINT8_MAX }                                                  \
+	}
 
 typedef struct tsd_binshards_s tsd_binshards_t;
 struct tsd_binshards_s {
diff --git a/include/jemalloc/internal/bit_util.h b/include/jemalloc/internal/bit_util.h
index 840dbde2..88c7942e 100644
--- a/include/jemalloc/internal/bit_util.h
+++ b/include/jemalloc/internal/bit_util.h
@@ -5,9 +5,9 @@
 #include "jemalloc/internal/assert.h"
 
 /* Sanity check. */
-#if !defined(JEMALLOC_INTERNAL_FFSLL) || !defined(JEMALLOC_INTERNAL_FFSL) \
+#if !defined(JEMALLOC_INTERNAL_FFSLL) || !defined(JEMALLOC_INTERNAL_FFSL)      \
     || !defined(JEMALLOC_INTERNAL_FFS)
-#  error JEMALLOC_INTERNAL_FFS{,L,LL} should have been defined by configure
+#	error JEMALLOC_INTERNAL_FFS{,L,LL} should have been defined by configure
 #endif
 
 /*
@@ -110,15 +110,17 @@ fls_u(unsigned x) {
 }
 #elif defined(_MSC_VER)
 
-#if LG_SIZEOF_PTR == 3
-#define DO_BSR64(bit, x) _BitScanReverse64(&bit, x)
-#else
+#	if LG_SIZEOF_PTR == 3
+#		define DO_BSR64(bit, x) _BitScanReverse64(&bit, x)
+#	else
 /*
  * This never actually runs; we're just dodging a compiler error for the
  * never-taken branch where sizeof(void *) == 8.
  */
-#define DO_BSR64(bit, x) bit = 0; unreachable()
-#endif
+#		define DO_BSR64(bit, x)                                       \
+			bit = 0;                                               \
+			unreachable()
+#	endif
 
 /* clang-format off */
 #define DO_FLS(x) do {							\
@@ -164,8 +166,8 @@ fls_u(unsigned x) {
 	DO_FLS(x);
 }
 
-#undef DO_FLS
-#undef DO_BSR64
+#	undef DO_FLS
+#	undef DO_BSR64
 #else
 
 static inline unsigned
@@ -185,7 +187,7 @@ fls_u(unsigned x) {
 #endif
 
 #if LG_SIZEOF_LONG_LONG > 3
-#  error "Haven't implemented popcount for 16-byte ints."
+#	error "Haven't implemented popcount for 16-byte ints."
 #endif
 
 /* clang-format off */
@@ -284,7 +286,7 @@ popcount_llu(unsigned long long bitmap) {
  */
 
 static inline size_t
-cfs_lu(unsigned long* bitmap) {
+cfs_lu(unsigned long *bitmap) {
 	util_assume(*bitmap != 0);
 	size_t bit = ffs_lu(*bitmap);
 	*bitmap ^= ZU(1) << bit;
@@ -300,7 +302,7 @@ ffs_zu(size_t x) {
 #elif LG_SIZEOF_PTR == LG_SIZEOF_LONG_LONG
 	return ffs_llu(x);
 #else
-#error No implementation for size_t ffs()
+#	error No implementation for size_t ffs()
 #endif
 }
 
@@ -313,11 +315,10 @@ fls_zu(size_t x) {
 #elif LG_SIZEOF_PTR == LG_SIZEOF_LONG_LONG
 	return fls_llu(x);
 #else
-#error No implementation for size_t fls()
+#	error No implementation for size_t fls()
 #endif
 }
 
-
 static inline unsigned
 ffs_u64(uint64_t x) {
 #if LG_SIZEOF_LONG == 3
@@ -325,7 +326,7 @@ ffs_u64(uint64_t x) {
 #elif LG_SIZEOF_LONG_LONG == 3
 	return ffs_llu(x);
 #else
-#error No implementation for 64-bit ffs()
+#	error No implementation for 64-bit ffs()
 #endif
 }
 
@@ -336,7 +337,7 @@ fls_u64(uint64_t x) {
 #elif LG_SIZEOF_LONG_LONG == 3
 	return fls_llu(x);
 #else
-#error No implementation for 64-bit fls()
+#	error No implementation for 64-bit fls()
 #endif
 }
 
@@ -345,7 +346,7 @@ ffs_u32(uint32_t x) {
 #if LG_SIZEOF_INT == 2
 	return ffs_u(x);
 #else
-#error No implementation for 32-bit ffs()
+#	error No implementation for 32-bit ffs()
 #endif
 }
 
@@ -354,7 +355,7 @@ fls_u32(uint32_t x) {
 #if LG_SIZEOF_INT == 2
 	return fls_u(x);
 #else
-#error No implementation for 32-bit fls()
+#	error No implementation for 32-bit fls()
 #endif
 }
 
@@ -375,7 +376,7 @@ pow2_ceil_u64(uint64_t x) {
 static inline uint32_t
 pow2_ceil_u32(uint32_t x) {
 	if (unlikely(x <= 1)) {
-	    return x;
+		return x;
 	}
 	size_t msb_on_index = fls_u32(x - 1);
 	/* As above. */
@@ -413,13 +414,16 @@ lg_ceil(size_t x) {
 #define LG_FLOOR_2(x) (x < (1ULL << 1) ? LG_FLOOR_1(x) : 1 + LG_FLOOR_1(x >> 1))
 #define LG_FLOOR_4(x) (x < (1ULL << 2) ? LG_FLOOR_2(x) : 2 + LG_FLOOR_2(x >> 2))
 #define LG_FLOOR_8(x) (x < (1ULL << 4) ? LG_FLOOR_4(x) : 4 + LG_FLOOR_4(x >> 4))
-#define LG_FLOOR_16(x) (x < (1ULL << 8) ? LG_FLOOR_8(x) : 8 + LG_FLOOR_8(x >> 8))
-#define LG_FLOOR_32(x) (x < (1ULL << 16) ? LG_FLOOR_16(x) : 16 + LG_FLOOR_16(x >> 16))
-#define LG_FLOOR_64(x) (x < (1ULL << 32) ? LG_FLOOR_32(x) : 32 + LG_FLOOR_32(x >> 32))
+#define LG_FLOOR_16(x)                                                         \
+	(x < (1ULL << 8) ? LG_FLOOR_8(x) : 8 + LG_FLOOR_8(x >> 8))
+#define LG_FLOOR_32(x)                                                         \
+	(x < (1ULL << 16) ? LG_FLOOR_16(x) : 16 + LG_FLOOR_16(x >> 16))
+#define LG_FLOOR_64(x)                                                         \
+	(x < (1ULL << 32) ? LG_FLOOR_32(x) : 32 + LG_FLOOR_32(x >> 32))
 #if LG_SIZEOF_PTR == 2
-#  define LG_FLOOR(x) LG_FLOOR_32((x))
+#	define LG_FLOOR(x) LG_FLOOR_32((x))
 #else
-#  define LG_FLOOR(x) LG_FLOOR_64((x))
+#	define LG_FLOOR(x) LG_FLOOR_64((x))
 #endif
 
 #define LG_CEIL(x) (LG_FLOOR(x) + (((x) & ((x) - 1)) == 0 ? 0 : 1))
diff --git a/include/jemalloc/internal/bitmap.h b/include/jemalloc/internal/bitmap.h
index 8cd5f5a3..e0f596fb 100644
--- a/include/jemalloc/internal/bitmap.h
+++ b/include/jemalloc/internal/bitmap.h
@@ -6,22 +6,22 @@
 #include "jemalloc/internal/sc.h"
 
 typedef unsigned long bitmap_t;
-#define LG_SIZEOF_BITMAP	LG_SIZEOF_LONG
+#define LG_SIZEOF_BITMAP LG_SIZEOF_LONG
 
 /* Maximum bitmap bit count is 2^LG_BITMAP_MAXBITS. */
 #if SC_LG_SLAB_MAXREGS > LG_CEIL(SC_NSIZES)
 /* Maximum bitmap bit count is determined by maximum regions per slab. */
-#  define LG_BITMAP_MAXBITS	SC_LG_SLAB_MAXREGS
+#	define LG_BITMAP_MAXBITS SC_LG_SLAB_MAXREGS
 #else
 /* Maximum bitmap bit count is determined by number of extent size classes. */
-#  define LG_BITMAP_MAXBITS	LG_CEIL(SC_NSIZES)
+#	define LG_BITMAP_MAXBITS LG_CEIL(SC_NSIZES)
 #endif
-#define BITMAP_MAXBITS		(ZU(1) << LG_BITMAP_MAXBITS)
+#define BITMAP_MAXBITS (ZU(1) << LG_BITMAP_MAXBITS)
 
 /* Number of bits per group. */
-#define LG_BITMAP_GROUP_NBITS		(LG_SIZEOF_BITMAP + 3)
-#define BITMAP_GROUP_NBITS		(1U << LG_BITMAP_GROUP_NBITS)
-#define BITMAP_GROUP_NBITS_MASK		(BITMAP_GROUP_NBITS-1)
+#define LG_BITMAP_GROUP_NBITS (LG_SIZEOF_BITMAP + 3)
+#define BITMAP_GROUP_NBITS (1U << LG_BITMAP_GROUP_NBITS)
+#define BITMAP_GROUP_NBITS_MASK (BITMAP_GROUP_NBITS - 1)
 
 /*
  * Do some analysis on how big the bitmap is before we use a tree.  For a brute
@@ -29,67 +29,64 @@ typedef unsigned long bitmap_t;
  * use a tree instead.
  */
 #if LG_BITMAP_MAXBITS - LG_BITMAP_GROUP_NBITS > 3
-#  define BITMAP_USE_TREE
+#	define BITMAP_USE_TREE
 #endif
 
 /* Number of groups required to store a given number of bits. */
-#define BITMAP_BITS2GROUPS(nbits)					\
-    (((nbits) + BITMAP_GROUP_NBITS_MASK) >> LG_BITMAP_GROUP_NBITS)
+#define BITMAP_BITS2GROUPS(nbits)                                              \
+	(((nbits) + BITMAP_GROUP_NBITS_MASK) >> LG_BITMAP_GROUP_NBITS)
 
 /*
  * Number of groups required at a particular level for a given number of bits.
  */
-#define BITMAP_GROUPS_L0(nbits)						\
-    BITMAP_BITS2GROUPS(nbits)
-#define BITMAP_GROUPS_L1(nbits)						\
-    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(nbits))
-#define BITMAP_GROUPS_L2(nbits)						\
-    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits))))
-#define BITMAP_GROUPS_L3(nbits)						\
-    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(		\
-	BITMAP_BITS2GROUPS((nbits)))))
-#define BITMAP_GROUPS_L4(nbits)						\
-    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(		\
-	BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits))))))
+#define BITMAP_GROUPS_L0(nbits) BITMAP_BITS2GROUPS(nbits)
+#define BITMAP_GROUPS_L1(nbits) BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(nbits))
+#define BITMAP_GROUPS_L2(nbits)                                                \
+	BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits))))
+#define BITMAP_GROUPS_L3(nbits)                                                \
+	BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(                                 \
+	    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits)))))
+#define BITMAP_GROUPS_L4(nbits)                                                \
+	BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS(              \
+	    BITMAP_BITS2GROUPS(BITMAP_BITS2GROUPS((nbits))))))
 
 /*
  * Assuming the number of levels, number of groups required for a given number
  * of bits.
  */
-#define BITMAP_GROUPS_1_LEVEL(nbits)					\
-    BITMAP_GROUPS_L0(nbits)
-#define BITMAP_GROUPS_2_LEVEL(nbits)					\
-    (BITMAP_GROUPS_1_LEVEL(nbits) + BITMAP_GROUPS_L1(nbits))
-#define BITMAP_GROUPS_3_LEVEL(nbits)					\
-    (BITMAP_GROUPS_2_LEVEL(nbits) + BITMAP_GROUPS_L2(nbits))
-#define BITMAP_GROUPS_4_LEVEL(nbits)					\
-    (BITMAP_GROUPS_3_LEVEL(nbits) + BITMAP_GROUPS_L3(nbits))
-#define BITMAP_GROUPS_5_LEVEL(nbits)					\
-    (BITMAP_GROUPS_4_LEVEL(nbits) + BITMAP_GROUPS_L4(nbits))
+#define BITMAP_GROUPS_1_LEVEL(nbits) BITMAP_GROUPS_L0(nbits)
+#define BITMAP_GROUPS_2_LEVEL(nbits)                                           \
+	(BITMAP_GROUPS_1_LEVEL(nbits) + BITMAP_GROUPS_L1(nbits))
+#define BITMAP_GROUPS_3_LEVEL(nbits)                                           \
+	(BITMAP_GROUPS_2_LEVEL(nbits) + BITMAP_GROUPS_L2(nbits))
+#define BITMAP_GROUPS_4_LEVEL(nbits)                                           \
+	(BITMAP_GROUPS_3_LEVEL(nbits) + BITMAP_GROUPS_L3(nbits))
+#define BITMAP_GROUPS_5_LEVEL(nbits)                                           \
+	(BITMAP_GROUPS_4_LEVEL(nbits) + BITMAP_GROUPS_L4(nbits))
 
 /*
  * Maximum number of groups required to support LG_BITMAP_MAXBITS.
  */
 #ifdef BITMAP_USE_TREE
 
-#if LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS
-#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_1_LEVEL(nbits)
-#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_1_LEVEL(BITMAP_MAXBITS)
-#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 2
-#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_2_LEVEL(nbits)
-#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_2_LEVEL(BITMAP_MAXBITS)
-#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 3
-#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_3_LEVEL(nbits)
-#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_3_LEVEL(BITMAP_MAXBITS)
-#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 4
-#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_4_LEVEL(nbits)
-#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_4_LEVEL(BITMAP_MAXBITS)
-#elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 5
-#  define BITMAP_GROUPS(nbits)	BITMAP_GROUPS_5_LEVEL(nbits)
-#  define BITMAP_GROUPS_MAX	BITMAP_GROUPS_5_LEVEL(BITMAP_MAXBITS)
-#else
-#  error "Unsupported bitmap size"
-#endif
+#	if LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS
+#		define BITMAP_GROUPS(nbits) BITMAP_GROUPS_1_LEVEL(nbits)
+#		define BITMAP_GROUPS_MAX BITMAP_GROUPS_1_LEVEL(BITMAP_MAXBITS)
+#	elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 2
+#		define BITMAP_GROUPS(nbits) BITMAP_GROUPS_2_LEVEL(nbits)
+#		define BITMAP_GROUPS_MAX BITMAP_GROUPS_2_LEVEL(BITMAP_MAXBITS)
+#	elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 3
+#		define BITMAP_GROUPS(nbits) BITMAP_GROUPS_3_LEVEL(nbits)
+#		define BITMAP_GROUPS_MAX BITMAP_GROUPS_3_LEVEL(BITMAP_MAXBITS)
+#	elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 4
+#		define BITMAP_GROUPS(nbits) BITMAP_GROUPS_4_LEVEL(nbits)
+#		define BITMAP_GROUPS_MAX BITMAP_GROUPS_4_LEVEL(BITMAP_MAXBITS)
+#	elif LG_BITMAP_MAXBITS <= LG_BITMAP_GROUP_NBITS * 5
+#		define BITMAP_GROUPS(nbits) BITMAP_GROUPS_5_LEVEL(nbits)
+#		define BITMAP_GROUPS_MAX BITMAP_GROUPS_5_LEVEL(BITMAP_MAXBITS)
+#	else
+#		error "Unsupported bitmap size"
+#	endif
 
 /*
  * Maximum number of levels possible.  This could be statically computed based
@@ -105,42 +102,53 @@ typedef unsigned long bitmap_t;
  * unused trailing entries in bitmap_info_t structures; the bitmaps themselves
  * are not impacted.
  */
-#define BITMAP_MAX_LEVELS	5
+#	define BITMAP_MAX_LEVELS 5
 
-#define BITMAP_INFO_INITIALIZER(nbits) {				\
-	/* nbits. */							\
-	nbits,								\
-	/* nlevels. */							\
-	(BITMAP_GROUPS_L0(nbits) > BITMAP_GROUPS_L1(nbits)) +		\
-	    (BITMAP_GROUPS_L1(nbits) > BITMAP_GROUPS_L2(nbits)) +	\
-	    (BITMAP_GROUPS_L2(nbits) > BITMAP_GROUPS_L3(nbits)) +	\
-	    (BITMAP_GROUPS_L3(nbits) > BITMAP_GROUPS_L4(nbits)) + 1,	\
-	/* levels. */							\
-	{								\
-		{0},							\
-		{BITMAP_GROUPS_L0(nbits)},				\
-		{BITMAP_GROUPS_L1(nbits) + BITMAP_GROUPS_L0(nbits)},	\
-		{BITMAP_GROUPS_L2(nbits) + BITMAP_GROUPS_L1(nbits) +	\
-		    BITMAP_GROUPS_L0(nbits)},				\
-		{BITMAP_GROUPS_L3(nbits) + BITMAP_GROUPS_L2(nbits) +	\
-		    BITMAP_GROUPS_L1(nbits) + BITMAP_GROUPS_L0(nbits)},	\
-		{BITMAP_GROUPS_L4(nbits) + BITMAP_GROUPS_L3(nbits) +	\
-		     BITMAP_GROUPS_L2(nbits) + BITMAP_GROUPS_L1(nbits)	\
-		     + BITMAP_GROUPS_L0(nbits)}				\
-	}								\
-}
+#	define BITMAP_INFO_INITIALIZER(nbits)                                 \
+		{                                                              \
+			/* nbits. */                                           \
+			nbits, /* nlevels. */                                  \
+			    (BITMAP_GROUPS_L0(nbits)                           \
+			        > BITMAP_GROUPS_L1(nbits))                     \
+			    + (BITMAP_GROUPS_L1(nbits)                         \
+			        > BITMAP_GROUPS_L2(nbits))                     \
+			    + (BITMAP_GROUPS_L2(nbits)                         \
+			        > BITMAP_GROUPS_L3(nbits))                     \
+			    + (BITMAP_GROUPS_L3(nbits)                         \
+			        > BITMAP_GROUPS_L4(nbits))                     \
+			    + 1, /* levels. */                                 \
+			{                                                      \
+				{0}, {BITMAP_GROUPS_L0(nbits)},                \
+				    {BITMAP_GROUPS_L1(nbits)                   \
+				        + BITMAP_GROUPS_L0(nbits)},            \
+				    {BITMAP_GROUPS_L2(nbits)                   \
+				        + BITMAP_GROUPS_L1(nbits)              \
+				        + BITMAP_GROUPS_L0(nbits)},            \
+				    {BITMAP_GROUPS_L3(nbits)                   \
+				        + BITMAP_GROUPS_L2(nbits)              \
+				        + BITMAP_GROUPS_L1(nbits)              \
+				        + BITMAP_GROUPS_L0(nbits)},            \
+				{                                              \
+					BITMAP_GROUPS_L4(nbits)                \
+					    + BITMAP_GROUPS_L3(nbits)          \
+					    + BITMAP_GROUPS_L2(nbits)          \
+					    + BITMAP_GROUPS_L1(nbits)          \
+					    + BITMAP_GROUPS_L0(nbits)          \
+				}                                              \
+			}                                                      \
+		}
 
 #else /* BITMAP_USE_TREE */
 
-#define BITMAP_GROUPS(nbits)	BITMAP_BITS2GROUPS(nbits)
-#define BITMAP_GROUPS_MAX	BITMAP_BITS2GROUPS(BITMAP_MAXBITS)
+#	define BITMAP_GROUPS(nbits) BITMAP_BITS2GROUPS(nbits)
+#	define BITMAP_GROUPS_MAX BITMAP_BITS2GROUPS(BITMAP_MAXBITS)
 
-#define BITMAP_INFO_INITIALIZER(nbits) {				\
-	/* nbits. */							\
-	nbits,								\
-	/* ngroups. */							\
-	BITMAP_BITS2GROUPS(nbits)					\
-}
+#	define BITMAP_INFO_INITIALIZER(nbits)                                 \
+		{                                                              \
+			/* nbits. */                                           \
+			nbits, /* ngroups. */                                  \
+			    BITMAP_BITS2GROUPS(nbits)                          \
+		}
 
 #endif /* BITMAP_USE_TREE */
 
@@ -161,21 +169,21 @@ typedef struct bitmap_info_s {
 	 * Only the first (nlevels+1) elements are used, and levels are ordered
 	 * bottom to top (e.g. the bottom level is stored in levels[0]).
 	 */
-	bitmap_level_t levels[BITMAP_MAX_LEVELS+1];
-#else /* BITMAP_USE_TREE */
+	bitmap_level_t levels[BITMAP_MAX_LEVELS + 1];
+#else  /* BITMAP_USE_TREE */
 	/* Number of groups necessary for nbits. */
 	size_t ngroups;
 #endif /* BITMAP_USE_TREE */
 } bitmap_info_t;
 
-void bitmap_info_init(bitmap_info_t *binfo, size_t nbits);
-void bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo, bool fill);
+void   bitmap_info_init(bitmap_info_t *binfo, size_t nbits);
+void   bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo, bool fill);
 size_t bitmap_size(const bitmap_info_t *binfo);
 
 static inline bool
 bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) {
 #ifdef BITMAP_USE_TREE
-	size_t rgoff = binfo->levels[binfo->nlevels].group_offset - 1;
+	size_t   rgoff = binfo->levels[binfo->nlevels].group_offset - 1;
 	bitmap_t rg = bitmap[rgoff];
 	/* The bitmap is full iff the root group is 0. */
 	return (rg == 0);
@@ -193,7 +201,7 @@ bitmap_full(bitmap_t *bitmap, const bitmap_info_t *binfo) {
 
 static inline bool
 bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
-	size_t goff;
+	size_t   goff;
 	bitmap_t g;
 
 	assert(bit < binfo->nbits);
@@ -204,9 +212,9 @@ bitmap_get(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
 
 static inline void
 bitmap_set(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
-	size_t goff;
+	size_t    goff;
 	bitmap_t *gp;
-	bitmap_t g;
+	bitmap_t  g;
 
 	assert(bit < binfo->nbits);
 	assert(!bitmap_get(bitmap, binfo, bit));
@@ -245,12 +253,13 @@ bitmap_ffu(const bitmap_t *bitmap, const bitmap_info_t *binfo, size_t min_bit) {
 #ifdef BITMAP_USE_TREE
 	size_t bit = 0;
 	for (unsigned level = binfo->nlevels; level--;) {
-		size_t lg_bits_per_group = (LG_BITMAP_GROUP_NBITS * (level +
-		    1));
-		bitmap_t group = bitmap[binfo->levels[level].group_offset + (bit
-		    >> lg_bits_per_group)];
-		unsigned group_nmask = (unsigned)(((min_bit > bit) ? (min_bit -
-		    bit) : 0) >> (lg_bits_per_group - LG_BITMAP_GROUP_NBITS));
+		size_t   lg_bits_per_group = (LG_BITMAP_GROUP_NBITS
+                    * (level + 1));
+		bitmap_t group = bitmap[binfo->levels[level].group_offset
+		    + (bit >> lg_bits_per_group)];
+		unsigned group_nmask =
+		    (unsigned)(((min_bit > bit) ? (min_bit - bit) : 0)
+		        >> (lg_bits_per_group - LG_BITMAP_GROUP_NBITS));
 		assert(group_nmask <= BITMAP_GROUP_NBITS);
 		bitmap_t group_mask = ~((1LU << group_nmask) - 1);
 		bitmap_t group_masked = group & group_mask;
@@ -273,16 +282,16 @@ bitmap_ffu(const bitmap_t *bitmap, const bitmap_info_t *binfo, size_t min_bit) {
 			}
 			return bitmap_ffu(bitmap, binfo, sib_base);
 		}
-		bit += ((size_t)ffs_lu(group_masked)) <<
-		    (lg_bits_per_group - LG_BITMAP_GROUP_NBITS);
+		bit += ((size_t)ffs_lu(group_masked))
+		    << (lg_bits_per_group - LG_BITMAP_GROUP_NBITS);
 	}
 	assert(bit >= min_bit);
 	assert(bit < binfo->nbits);
 	return bit;
 #else
-	size_t i = min_bit >> LG_BITMAP_GROUP_NBITS;
-	bitmap_t g = bitmap[i] & ~((1LU << (min_bit & BITMAP_GROUP_NBITS_MASK))
-	    - 1);
+	size_t   i = min_bit >> LG_BITMAP_GROUP_NBITS;
+	bitmap_t g = bitmap[i]
+	    & ~((1LU << (min_bit & BITMAP_GROUP_NBITS_MASK)) - 1);
 	size_t bit;
 	while (1) {
 		if (g != 0) {
@@ -302,7 +311,7 @@ bitmap_ffu(const bitmap_t *bitmap, const bitmap_info_t *binfo, size_t min_bit) {
 /* sfu: set first unset. */
 static inline size_t
 bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) {
-	size_t bit;
+	size_t   bit;
 	bitmap_t g;
 	unsigned i;
 
@@ -332,9 +341,9 @@ bitmap_sfu(bitmap_t *bitmap, const bitmap_info_t *binfo) {
 
 static inline void
 bitmap_unset(bitmap_t *bitmap, const bitmap_info_t *binfo, size_t bit) {
-	size_t goff;
-	bitmap_t *gp;
-	bitmap_t g;
+	size_t      goff;
+	bitmap_t   *gp;
+	bitmap_t    g;
 	UNUSED bool propagate;
 
 	assert(bit < binfo->nbits);
diff --git a/include/jemalloc/internal/buf_writer.h b/include/jemalloc/internal/buf_writer.h
index fa0ac99c..5ee9af4e 100644
--- a/include/jemalloc/internal/buf_writer.h
+++ b/include/jemalloc/internal/buf_writer.h
@@ -16,21 +16,21 @@
 
 typedef struct {
 	write_cb_t *write_cb;
-	void *cbopaque;
-	char *buf;
-	size_t buf_size;
-	size_t buf_end;
-	bool internal_buf;
+	void       *cbopaque;
+	char       *buf;
+	size_t      buf_size;
+	size_t      buf_end;
+	bool        internal_buf;
 } buf_writer_t;
 
-bool buf_writer_init(tsdn_t *tsdn, buf_writer_t *buf_writer,
-    write_cb_t *write_cb, void *cbopaque, char *buf, size_t buf_len);
-void buf_writer_flush(buf_writer_t *buf_writer);
+bool       buf_writer_init(tsdn_t *tsdn, buf_writer_t *buf_writer,
+          write_cb_t *write_cb, void *cbopaque, char *buf, size_t buf_len);
+void       buf_writer_flush(buf_writer_t *buf_writer);
 write_cb_t buf_writer_cb;
-void buf_writer_terminate(tsdn_t *tsdn, buf_writer_t *buf_writer);
+void       buf_writer_terminate(tsdn_t *tsdn, buf_writer_t *buf_writer);
 
-typedef ssize_t (read_cb_t)(void *read_cbopaque, void *buf, size_t limit);
-void buf_writer_pipe(buf_writer_t *buf_writer, read_cb_t *read_cb,
-    void *read_cbopaque);
+typedef ssize_t(read_cb_t)(void *read_cbopaque, void *buf, size_t limit);
+void buf_writer_pipe(
+    buf_writer_t *buf_writer, read_cb_t *read_cb, void *read_cbopaque);
 
 #endif /* JEMALLOC_INTERNAL_BUF_WRITER_H */
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 7ab48dc9..08ee0d6a 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -45,8 +45,8 @@ extern const uintptr_t disabled_bin;
  *   1 << (sizeof(cache_bin_sz_t) * 8)
  * bytes spread across pointer sized objects to get the maximum.
  */
-#define CACHE_BIN_NCACHED_MAX (((size_t)1 << sizeof(cache_bin_sz_t) * 8) \
-    / sizeof(void *) - 1)
+#define CACHE_BIN_NCACHED_MAX                                                  \
+	(((size_t)1 << sizeof(cache_bin_sz_t) * 8) / sizeof(void *) - 1)
 
 /*
  * This lives inside the cache_bin (for locality reasons), and is initialized
@@ -152,8 +152,8 @@ struct cache_bin_array_descriptor_s {
 };
 
 static inline void
-cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor,
-    cache_bin_t *bins) {
+cache_bin_array_descriptor_init(
+    cache_bin_array_descriptor_t *descriptor, cache_bin_t *bins) {
 	ql_elm_new(descriptor, link);
 	descriptor->bins = bins;
 }
@@ -222,7 +222,8 @@ cache_bin_ncached_max_get(cache_bin_t *bin) {
  * with later.
  */
 static inline void
-cache_bin_assert_earlier(cache_bin_t *bin, cache_bin_sz_t earlier, cache_bin_sz_t later) {
+cache_bin_assert_earlier(
+    cache_bin_t *bin, cache_bin_sz_t earlier, cache_bin_sz_t later) {
 	if (earlier > later) {
 		assert(bin->low_bits_full > bin->low_bits_empty);
 	}
@@ -285,8 +286,8 @@ static inline void **
 cache_bin_empty_position_get(cache_bin_t *bin) {
 	cache_bin_sz_t diff = cache_bin_diff(bin,
 	    (cache_bin_sz_t)(uintptr_t)bin->stack_head, bin->low_bits_empty);
-	byte_t *empty_bits = (byte_t *)bin->stack_head + diff;
-	void **ret = (void **)empty_bits;
+	byte_t        *empty_bits = (byte_t *)bin->stack_head + diff;
+	void         **ret = (void **)empty_bits;
 
 	assert(ret >= bin->stack_head);
 
@@ -305,8 +306,8 @@ cache_bin_empty_position_get(cache_bin_t *bin) {
  */
 static inline cache_bin_sz_t
 cache_bin_low_bits_low_bound_get(cache_bin_t *bin) {
-	return (cache_bin_sz_t)bin->low_bits_empty -
-	    cache_bin_ncached_max_get(bin) * sizeof(void *);
+	return (cache_bin_sz_t)bin->low_bits_empty
+	    - cache_bin_ncached_max_get(bin) * sizeof(void *);
 }
 
 /*
@@ -317,7 +318,7 @@ cache_bin_low_bits_low_bound_get(cache_bin_t *bin) {
 static inline void **
 cache_bin_low_bound_get(cache_bin_t *bin) {
 	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get(bin);
-	void **ret = cache_bin_empty_position_get(bin) - ncached_max;
+	void         **ret = cache_bin_empty_position_get(bin) - ncached_max;
 	assert(ret <= bin->stack_head);
 
 	return ret;
@@ -340,8 +341,8 @@ cache_bin_assert_empty(cache_bin_t *bin) {
  */
 static inline cache_bin_sz_t
 cache_bin_low_water_get_internal(cache_bin_t *bin) {
-	return cache_bin_diff(bin, bin->low_bits_low_water,
-	    bin->low_bits_empty) / sizeof(void *);
+	return cache_bin_diff(bin, bin->low_bits_low_water, bin->low_bits_empty)
+	    / sizeof(void *);
 }
 
 /* Returns the numeric value of low water in [0, ncached]. */
@@ -351,7 +352,8 @@ cache_bin_low_water_get(cache_bin_t *bin) {
 	assert(low_water <= cache_bin_ncached_max_get(bin));
 	assert(low_water <= cache_bin_ncached_get_local(bin));
 
-	cache_bin_assert_earlier(bin, (cache_bin_sz_t)(uintptr_t)bin->stack_head,
+	cache_bin_assert_earlier(bin,
+	    (cache_bin_sz_t)(uintptr_t)bin->stack_head,
 	    bin->low_bits_low_water);
 
 	return low_water;
@@ -390,9 +392,9 @@ cache_bin_alloc_impl(cache_bin_t *bin, bool *success, bool adjust_low_water) {
 	 * This may read from the empty position; however the loaded value won't
 	 * be used.  It's safe because the stack has one more slot reserved.
 	 */
-	void *ret = *bin->stack_head;
+	void          *ret = *bin->stack_head;
 	cache_bin_sz_t low_bits = (cache_bin_sz_t)(uintptr_t)bin->stack_head;
-	void **new_head = bin->stack_head + 1;
+	void         **new_head = bin->stack_head + 1;
 
 	/*
 	 * Note that the low water mark is at most empty; if we pass this check,
@@ -455,7 +457,8 @@ cache_bin_alloc_batch(cache_bin_t *bin, size_t num, void **out) {
 
 JEMALLOC_ALWAYS_INLINE bool
 cache_bin_full(cache_bin_t *bin) {
-	return ((cache_bin_sz_t)(uintptr_t)bin->stack_head == bin->low_bits_full);
+	return (
+	    (cache_bin_sz_t)(uintptr_t)bin->stack_head == bin->low_bits_full);
 }
 
 /*
@@ -469,9 +472,9 @@ cache_bin_dalloc_safety_checks(cache_bin_t *bin, void *ptr) {
 	}
 
 	cache_bin_sz_t ncached = cache_bin_ncached_get_internal(bin);
-	unsigned max_scan = opt_debug_double_free_max_scan < ncached
-	    ? opt_debug_double_free_max_scan
-	    : ncached;
+	unsigned       max_scan = opt_debug_double_free_max_scan < ncached
+	          ? opt_debug_double_free_max_scan
+	          : ncached;
 
 	void **cur = bin->stack_head;
 	void **limit = cur + max_scan;
@@ -516,9 +519,11 @@ cache_bin_stash(cache_bin_t *bin, void *ptr) {
 	}
 
 	/* Stash at the full position, in the [full, head) range. */
-	cache_bin_sz_t low_bits_head = (cache_bin_sz_t)(uintptr_t)bin->stack_head;
+	cache_bin_sz_t low_bits_head = (cache_bin_sz_t)(uintptr_t)
+	                                   bin->stack_head;
 	/* Wraparound handled as well. */
-	cache_bin_sz_t diff = cache_bin_diff(bin, bin->low_bits_full, low_bits_head);
+	cache_bin_sz_t diff = cache_bin_diff(
+	    bin, bin->low_bits_full, low_bits_head);
 	*(void **)((byte_t *)bin->stack_head - diff) = ptr;
 
 	assert(!cache_bin_full(bin));
@@ -532,18 +537,21 @@ cache_bin_stash(cache_bin_t *bin, void *ptr) {
 JEMALLOC_ALWAYS_INLINE cache_bin_sz_t
 cache_bin_nstashed_get_internal(cache_bin_t *bin) {
 	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get(bin);
-	cache_bin_sz_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(bin);
+	cache_bin_sz_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(
+	    bin);
 
-	cache_bin_sz_t n = cache_bin_diff(bin, low_bits_low_bound,
-	    bin->low_bits_full) / sizeof(void *);
+	cache_bin_sz_t n = cache_bin_diff(
+	                       bin, low_bits_low_bound, bin->low_bits_full)
+	    / sizeof(void *);
 	assert(n <= ncached_max);
 	if (config_debug && n != 0) {
 		/* Below are for assertions only. */
 		void **low_bound = cache_bin_low_bound_get(bin);
 
-		assert((cache_bin_sz_t)(uintptr_t)low_bound == low_bits_low_bound);
+		assert(
+		    (cache_bin_sz_t)(uintptr_t)low_bound == low_bits_low_bound);
 		void *stashed = *(low_bound + n - 1);
-		bool aligned = cache_bin_nonfast_aligned(stashed);
+		bool  aligned = cache_bin_nonfast_aligned(stashed);
 #ifdef JEMALLOC_JET
 		/* Allow arbitrary pointers to be stashed in tests. */
 		aligned = true;
@@ -582,16 +590,17 @@ cache_bin_nstashed_get_local(cache_bin_t *bin) {
  * they help access values that will not be concurrently modified.
  */
 static inline void
-cache_bin_nitems_get_remote(cache_bin_t *bin, cache_bin_sz_t *ncached,
-    cache_bin_sz_t *nstashed) {
+cache_bin_nitems_get_remote(
+    cache_bin_t *bin, cache_bin_sz_t *ncached, cache_bin_sz_t *nstashed) {
 	/* Racy version of cache_bin_ncached_get_internal. */
-	cache_bin_sz_t diff = bin->low_bits_empty -
-	    (cache_bin_sz_t)(uintptr_t)bin->stack_head;
+	cache_bin_sz_t diff = bin->low_bits_empty
+	    - (cache_bin_sz_t)(uintptr_t)bin->stack_head;
 	cache_bin_sz_t n = diff / sizeof(void *);
 	*ncached = n;
 
 	/* Racy version of cache_bin_nstashed_get_internal. */
-	cache_bin_sz_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(bin);
+	cache_bin_sz_t low_bits_low_bound = cache_bin_low_bits_low_bound_get(
+	    bin);
 	n = (bin->low_bits_full - low_bits_low_bound) / sizeof(void *);
 	*nstashed = n;
 	/*
@@ -616,7 +625,8 @@ struct cache_bin_fill_ctl_s {
  * This is to avoid stack overflow when we do batch edata look up, which
  * reserves a nflush * sizeof(emap_batch_lookup_result_t) stack variable.
  */
-#define CACHE_BIN_NFLUSH_BATCH_MAX ((VARIABLE_ARRAY_SIZE_MAX >> LG_SIZEOF_PTR) - 1)
+#define CACHE_BIN_NFLUSH_BATCH_MAX                                             \
+	((VARIABLE_ARRAY_SIZE_MAX >> LG_SIZEOF_PTR) - 1)
 
 /*
  * Filling and flushing are done in batch, on arrays of void *s.  For filling,
@@ -638,7 +648,7 @@ struct cache_bin_fill_ctl_s {
 typedef struct cache_bin_ptr_array_s cache_bin_ptr_array_t;
 struct cache_bin_ptr_array_s {
 	cache_bin_sz_t n;
-	void **ptr;
+	void         **ptr;
 };
 
 /*
@@ -650,17 +660,17 @@ struct cache_bin_ptr_array_s {
  * representations is easy (since they'll require an alloca in the calling
  * frame).
  */
-#define CACHE_BIN_PTR_ARRAY_DECLARE(name, nval)				\
-    cache_bin_ptr_array_t name;						\
-    name.n = (nval)
+#define CACHE_BIN_PTR_ARRAY_DECLARE(name, nval)                                \
+	cache_bin_ptr_array_t name;                                            \
+	name.n = (nval)
 
 /*
  * Start a fill.  The bin must be empty, and This must be followed by a
  * finish_fill call before doing any alloc/dalloc operations on the bin.
  */
 static inline void
-cache_bin_init_ptr_array_for_fill(cache_bin_t *bin, cache_bin_ptr_array_t *arr,
-    cache_bin_sz_t nfill) {
+cache_bin_init_ptr_array_for_fill(
+    cache_bin_t *bin, cache_bin_ptr_array_t *arr, cache_bin_sz_t nfill) {
 	cache_bin_assert_empty(bin);
 	arr->ptr = cache_bin_empty_position_get(bin) - nfill;
 }
@@ -671,8 +681,8 @@ cache_bin_init_ptr_array_for_fill(cache_bin_t *bin, cache_bin_ptr_array_t *arr,
  * case of OOM.
  */
 static inline void
-cache_bin_finish_fill(cache_bin_t *bin, cache_bin_ptr_array_t *arr,
-    cache_bin_sz_t nfilled) {
+cache_bin_finish_fill(
+    cache_bin_t *bin, cache_bin_ptr_array_t *arr, cache_bin_sz_t nfilled) {
 	cache_bin_assert_empty(bin);
 	void **empty_position = cache_bin_empty_position_get(bin);
 	if (nfilled < arr->n) {
@@ -687,19 +697,18 @@ cache_bin_finish_fill(cache_bin_t *bin, cache_bin_ptr_array_t *arr,
  * everything we give them.
  */
 static inline void
-cache_bin_init_ptr_array_for_flush(cache_bin_t *bin,
-    cache_bin_ptr_array_t *arr, cache_bin_sz_t nflush) {
+cache_bin_init_ptr_array_for_flush(
+    cache_bin_t *bin, cache_bin_ptr_array_t *arr, cache_bin_sz_t nflush) {
 	arr->ptr = cache_bin_empty_position_get(bin) - nflush;
-	assert(cache_bin_ncached_get_local(bin) == 0
-	    || *arr->ptr != NULL);
+	assert(cache_bin_ncached_get_local(bin) == 0 || *arr->ptr != NULL);
 }
 
 static inline void
-cache_bin_finish_flush(cache_bin_t *bin, cache_bin_ptr_array_t *arr,
-    cache_bin_sz_t nflushed) {
+cache_bin_finish_flush(
+    cache_bin_t *bin, cache_bin_ptr_array_t *arr, cache_bin_sz_t nflushed) {
 	unsigned rem = cache_bin_ncached_get_local(bin) - nflushed;
-	memmove(bin->stack_head + nflushed, bin->stack_head,
-	    rem * sizeof(void *));
+	memmove(
+	    bin->stack_head + nflushed, bin->stack_head, rem * sizeof(void *));
 	bin->stack_head += nflushed;
 	cache_bin_low_water_adjust(bin);
 }
@@ -728,14 +737,14 @@ cache_bin_finish_flush_stashed(cache_bin_t *bin) {
  * Initialize a cache_bin_info to represent up to the given number of items in
  * the cache_bins it is associated with.
  */
-void cache_bin_info_init(cache_bin_info_t *bin_info,
-    cache_bin_sz_t ncached_max);
+void cache_bin_info_init(
+    cache_bin_info_t *bin_info, cache_bin_sz_t ncached_max);
 /*
  * Given an array of initialized cache_bin_info_ts, determine how big an
  * allocation is required to initialize a full set of cache_bin_ts.
  */
-void cache_bin_info_compute_alloc(const cache_bin_info_t *infos,
-    szind_t ninfos, size_t *size, size_t *alignment);
+void cache_bin_info_compute_alloc(const cache_bin_info_t *infos, szind_t ninfos,
+    size_t *size, size_t *alignment);
 
 /*
  * Actually initialize some cache bins.  Callers should allocate the backing
@@ -747,8 +756,8 @@ void cache_bin_info_compute_alloc(const cache_bin_info_t *infos,
 void cache_bin_preincrement(const cache_bin_info_t *infos, szind_t ninfos,
     void *alloc, size_t *cur_offset);
 void cache_bin_postincrement(void *alloc, size_t *cur_offset);
-void cache_bin_init(cache_bin_t *bin, const cache_bin_info_t *info,
-    void *alloc, size_t *cur_offset);
+void cache_bin_init(cache_bin_t *bin, const cache_bin_info_t *info, void *alloc,
+    size_t *cur_offset);
 void cache_bin_init_disabled(cache_bin_t *bin, cache_bin_sz_t ncached_max);
 
 bool cache_bin_stack_use_thp(void);
diff --git a/include/jemalloc/internal/ckh.h b/include/jemalloc/internal/ckh.h
index 8e9d7fed..01b27e8f 100644
--- a/include/jemalloc/internal/ckh.h
+++ b/include/jemalloc/internal/ckh.h
@@ -22,8 +22,8 @@
 #define LG_CKH_BUCKET_CELLS (LG_CACHELINE - LG_SIZEOF_PTR - 1)
 
 /* Typedefs to allow easy function pointer passing. */
-typedef void ckh_hash_t (const void *, size_t[2]);
-typedef bool ckh_keycomp_t (const void *, const void *);
+typedef void ckh_hash_t(const void *, size_t[2]);
+typedef bool ckh_keycomp_t(const void *, const void *);
 
 /* Hash table cell. */
 typedef struct {
@@ -56,7 +56,7 @@ typedef struct {
 	unsigned lg_curbuckets;
 
 	/* Hash and comparison functions. */
-	ckh_hash_t *hash;
+	ckh_hash_t    *hash;
 	ckh_keycomp_t *keycomp;
 
 	/* Hash table with 2^lg_curbuckets buckets. */
@@ -89,8 +89,8 @@ bool ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data);
  * the key and value, and doesn't do any lifetime management.
  */
 bool ckh_insert(tsd_t *tsd, ckh_t *ckh, const void *key, const void *data);
-bool ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
-    void **data);
+bool ckh_remove(
+    tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key, void **data);
 bool ckh_search(ckh_t *ckh, const void *searchkey, void **key, void **data);
 
 /* Some useful hash and comparison functions for strings and pointers. */
diff --git a/include/jemalloc/internal/counter.h b/include/jemalloc/internal/counter.h
index 74e30701..0f38d40c 100644
--- a/include/jemalloc/internal/counter.h
+++ b/include/jemalloc/internal/counter.h
@@ -8,7 +8,7 @@
 typedef struct counter_accum_s {
 	LOCKEDINT_MTX_DECLARE(mtx)
 	locked_u64_t accumbytes;
-	uint64_t interval;
+	uint64_t     interval;
 } counter_accum_t;
 
 JEMALLOC_ALWAYS_INLINE bool
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index 1f124bfc..b290411b 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -13,7 +13,7 @@
 #include "jemalloc/internal/stats.h"
 
 /* Maximum ctl tree depth. */
-#define CTL_MAX_DEPTH	7
+#define CTL_MAX_DEPTH 7
 #define CTL_MULTI_SETTING_MAX_LEN 1000
 
 typedef struct ctl_node_s {
@@ -21,37 +21,37 @@ typedef struct ctl_node_s {
 } ctl_node_t;
 
 typedef struct ctl_named_node_s {
-	ctl_node_t node;
+	ctl_node_t  node;
 	const char *name;
 	/* If (nchildren == 0), this is a terminal node. */
-	size_t nchildren;
+	size_t            nchildren;
 	const ctl_node_t *children;
-	int (*ctl)(tsd_t *, const size_t *, size_t, void *, size_t *, void *,
-	    size_t);
+	int (*ctl)(
+	    tsd_t *, const size_t *, size_t, void *, size_t *, void *, size_t);
 } ctl_named_node_t;
 
 typedef struct ctl_indexed_node_s {
 	struct ctl_node_s node;
-	const ctl_named_node_t *(*index)(tsdn_t *, const size_t *, size_t,
-	    size_t);
+	const ctl_named_node_t *(*index)(
+	    tsdn_t *, const size_t *, size_t, size_t);
 } ctl_indexed_node_t;
 
 typedef struct ctl_arena_stats_s {
 	arena_stats_t astats;
 
 	/* Aggregate stats for small size classes, based on bin stats. */
-	size_t allocated_small;
+	size_t   allocated_small;
 	uint64_t nmalloc_small;
 	uint64_t ndalloc_small;
 	uint64_t nrequests_small;
 	uint64_t nfills_small;
 	uint64_t nflushes_small;
 
-	bin_stats_data_t bstats[SC_NBINS];
+	bin_stats_data_t    bstats[SC_NBINS];
 	arena_stats_large_t lstats[SC_NSIZES - SC_NBINS];
-	pac_estats_t estats[SC_NPSIZES];
-	hpa_shard_stats_t hpastats;
-	sec_stats_t secstats;
+	pac_estats_t        estats[SC_NPSIZES];
+	hpa_shard_stats_t   hpastats;
+	sec_stats_t         secstats;
 } ctl_arena_stats_t;
 
 typedef struct ctl_stats_s {
@@ -72,17 +72,17 @@ typedef struct ctl_stats_s {
 typedef struct ctl_arena_s ctl_arena_t;
 struct ctl_arena_s {
 	unsigned arena_ind;
-	bool initialized;
+	bool     initialized;
 	ql_elm(ctl_arena_t) destroyed_link;
 
 	/* Basic stats, supported even if !config_stats. */
-	unsigned nthreads;
+	unsigned    nthreads;
 	const char *dss;
-	ssize_t dirty_decay_ms;
-	ssize_t muzzy_decay_ms;
-	size_t pactive;
-	size_t pdirty;
-	size_t pmuzzy;
+	ssize_t     dirty_decay_ms;
+	ssize_t     muzzy_decay_ms;
+	size_t      pactive;
+	size_t      pdirty;
+	size_t      pmuzzy;
 
 	/* NULL if !config_stats. */
 	ctl_arena_stats_t *astats;
@@ -107,60 +107,67 @@ int ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
 int ctl_nametomib(tsd_t *tsd, const char *name, size_t *mibp, size_t *miblenp);
 int ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen);
-int ctl_mibnametomib(tsd_t *tsd, size_t *mib, size_t miblen, const char *name,
-    size_t *miblenp);
-int ctl_bymibname(tsd_t *tsd, size_t *mib, size_t miblen, const char *name,
-    size_t *miblenp, void *oldp, size_t *oldlenp, void *newp, size_t newlen);
+int ctl_mibnametomib(
+    tsd_t *tsd, size_t *mib, size_t miblen, const char *name, size_t *miblenp);
+int  ctl_bymibname(tsd_t *tsd, size_t *mib, size_t miblen, const char *name,
+     size_t *miblenp, void *oldp, size_t *oldlenp, void *newp, size_t newlen);
 bool ctl_boot(void);
 void ctl_prefork(tsdn_t *tsdn);
 void ctl_postfork_parent(tsdn_t *tsdn);
 void ctl_postfork_child(tsdn_t *tsdn);
 void ctl_mtx_assert_held(tsdn_t *tsdn);
 
-#define xmallctl(name, oldp, oldlenp, newp, newlen) do {		\
-	if (je_mallctl(name, oldp, oldlenp, newp, newlen)		\
-	    != 0) {							\
-		malloc_printf(						\
-		    "<jemalloc>: Failure in xmallctl(\"%s\", ...)\n",	\
-		    name);						\
-		abort();						\
-	}								\
-} while (0)
+#define xmallctl(name, oldp, oldlenp, newp, newlen)                            \
+	do {                                                                   \
+		if (je_mallctl(name, oldp, oldlenp, newp, newlen) != 0) {      \
+			malloc_printf(                                         \
+			    "<jemalloc>: Failure in xmallctl(\"%s\", ...)\n",  \
+			    name);                                             \
+			abort();                                               \
+		}                                                              \
+	} while (0)
 
-#define xmallctlnametomib(name, mibp, miblenp) do {			\
-	if (je_mallctlnametomib(name, mibp, miblenp) != 0) {		\
-		malloc_printf("<jemalloc>: Failure in "			\
-		    "xmallctlnametomib(\"%s\", ...)\n", name);		\
-		abort();						\
-	}								\
-} while (0)
+#define xmallctlnametomib(name, mibp, miblenp)                                 \
+	do {                                                                   \
+		if (je_mallctlnametomib(name, mibp, miblenp) != 0) {           \
+			malloc_printf(                                         \
+			    "<jemalloc>: Failure in "                          \
+			    "xmallctlnametomib(\"%s\", ...)\n",                \
+			    name);                                             \
+			abort();                                               \
+		}                                                              \
+	} while (0)
 
-#define xmallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen) do {	\
-	if (je_mallctlbymib(mib, miblen, oldp, oldlenp, newp,		\
-	    newlen) != 0) {						\
-		malloc_write(						\
-		    "<jemalloc>: Failure in xmallctlbymib()\n");	\
-		abort();						\
-	}								\
-} while (0)
+#define xmallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen)                \
+	do {                                                                   \
+		if (je_mallctlbymib(mib, miblen, oldp, oldlenp, newp, newlen)  \
+		    != 0) {                                                    \
+			malloc_write(                                          \
+			    "<jemalloc>: Failure in xmallctlbymib()\n");       \
+			abort();                                               \
+		}                                                              \
+	} while (0)
 
-#define xmallctlmibnametomib(mib, miblen, name, miblenp) do {		\
-	if (ctl_mibnametomib(tsd_fetch(), mib, miblen, name, miblenp)	\
-	    != 0) {							\
-		malloc_write(						\
-		    "<jemalloc>: Failure in ctl_mibnametomib()\n");	\
-		abort();						\
-	}								\
-} while (0)
+#define xmallctlmibnametomib(mib, miblen, name, miblenp)                       \
+	do {                                                                   \
+		if (ctl_mibnametomib(tsd_fetch(), mib, miblen, name, miblenp)  \
+		    != 0) {                                                    \
+			malloc_write(                                          \
+			    "<jemalloc>: Failure in ctl_mibnametomib()\n");    \
+			abort();                                               \
+		}                                                              \
+	} while (0)
 
-#define xmallctlbymibname(mib, miblen, name, miblenp, oldp, oldlenp,	\
-    newp, newlen) do {							\
-	if (ctl_bymibname(tsd_fetch(), mib, miblen, name, miblenp,	\
-	    oldp, oldlenp, newp, newlen) != 0) {			\
-		malloc_write(						\
-		    "<jemalloc>: Failure in ctl_bymibname()\n");	\
-		abort();						\
-	}								\
-} while (0)
+#define xmallctlbymibname(                                                     \
+    mib, miblen, name, miblenp, oldp, oldlenp, newp, newlen)                   \
+	do {                                                                   \
+		if (ctl_bymibname(tsd_fetch(), mib, miblen, name, miblenp,     \
+		        oldp, oldlenp, newp, newlen)                           \
+		    != 0) {                                                    \
+			malloc_write(                                          \
+			    "<jemalloc>: Failure in ctl_bymibname()\n");       \
+			abort();                                               \
+		}                                                              \
+	} while (0)
 
 #endif /* JEMALLOC_INTERNAL_CTL_H */
diff --git a/include/jemalloc/internal/decay.h b/include/jemalloc/internal/decay.h
index 74be55da..e8773655 100644
--- a/include/jemalloc/internal/decay.h
+++ b/include/jemalloc/internal/decay.h
@@ -5,7 +5,7 @@
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/smoothstep.h"
 
-#define DECAY_UNBOUNDED_TIME_TO_PURGE ((uint64_t)-1)
+#define DECAY_UNBOUNDED_TIME_TO_PURGE ((uint64_t) - 1)
 
 /*
  * The decay_t computes the number of pages we should purge at any given time.
@@ -168,12 +168,12 @@ void decay_reinit(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms);
 /*
  * Compute how many of 'npages_new' pages we would need to purge in 'time'.
  */
-uint64_t decay_npages_purge_in(decay_t *decay, nstime_t *time,
-    size_t npages_new);
+uint64_t decay_npages_purge_in(
+    decay_t *decay, nstime_t *time, size_t npages_new);
 
 /* Returns true if the epoch advanced and there are pages to purge. */
-bool decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
-    size_t current_npages);
+bool decay_maybe_advance_epoch(
+    decay_t *decay, nstime_t *new_time, size_t current_npages);
 
 /*
  * Calculates wait time until a number of pages in the interval
@@ -182,7 +182,7 @@ bool decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
  * Returns number of nanoseconds or DECAY_UNBOUNDED_TIME_TO_PURGE in case of
  * indefinite wait.
  */
-uint64_t decay_ns_until_purge(decay_t *decay, size_t npages_current,
-    uint64_t npages_threshold);
+uint64_t decay_ns_until_purge(
+    decay_t *decay, size_t npages_current, uint64_t npages_threshold);
 
 #endif /* JEMALLOC_INTERNAL_DECAY_H */
diff --git a/include/jemalloc/internal/ecache.h b/include/jemalloc/internal/ecache.h
index 2bd74fde..605733b5 100644
--- a/include/jemalloc/internal/ecache.h
+++ b/include/jemalloc/internal/ecache.h
@@ -9,8 +9,8 @@
 typedef struct ecache_s ecache_t;
 struct ecache_s {
 	malloc_mutex_t mtx;
-	eset_t eset;
-	eset_t guarded_eset;
+	eset_t         eset;
+	eset_t         guarded_eset;
 	/* All stored extents must be in the same state. */
 	extent_state_t state;
 	/* The index of the ehooks the ecache is associated with. */
@@ -24,22 +24,22 @@ struct ecache_s {
 
 static inline size_t
 ecache_npages_get(ecache_t *ecache) {
-	return eset_npages_get(&ecache->eset) +
-	    eset_npages_get(&ecache->guarded_eset);
+	return eset_npages_get(&ecache->eset)
+	    + eset_npages_get(&ecache->guarded_eset);
 }
 
 /* Get the number of extents in the given page size index. */
 static inline size_t
 ecache_nextents_get(ecache_t *ecache, pszind_t ind) {
-	return eset_nextents_get(&ecache->eset, ind) +
-	    eset_nextents_get(&ecache->guarded_eset, ind);
+	return eset_nextents_get(&ecache->eset, ind)
+	    + eset_nextents_get(&ecache->guarded_eset, ind);
 }
 
 /* Get the sum total bytes of the extents in the given page size index. */
 static inline size_t
 ecache_nbytes_get(ecache_t *ecache, pszind_t ind) {
-	return eset_nbytes_get(&ecache->eset, ind) +
-	    eset_nbytes_get(&ecache->guarded_eset, ind);
+	return eset_nbytes_get(&ecache->eset, ind)
+	    + eset_nbytes_get(&ecache->guarded_eset, ind);
 }
 
 static inline unsigned
diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index e41e4efa..2b229e7d 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -30,9 +30,9 @@
 #define ESET_ENUMERATE_MAX_NUM 32
 
 enum extent_state_e {
-	extent_state_active   = 0,
-	extent_state_dirty    = 1,
-	extent_state_muzzy    = 2,
+	extent_state_active = 0,
+	extent_state_dirty = 1,
+	extent_state_muzzy = 2,
 	extent_state_retained = 3,
 	extent_state_transition = 4, /* States below are intermediate. */
 	extent_state_merging = 5,
@@ -42,7 +42,7 @@ typedef enum extent_state_e extent_state_t;
 
 enum extent_head_state_e {
 	EXTENT_NOT_HEAD,
-	EXTENT_IS_HEAD   /* See comments in ehooks_default_merge_impl(). */
+	EXTENT_IS_HEAD /* See comments in ehooks_default_merge_impl(). */
 };
 typedef enum extent_head_state_e extent_head_state_t;
 
@@ -50,25 +50,22 @@ typedef enum extent_head_state_e extent_head_state_t;
  * Which implementation of the page allocator interface, (PAI, defined in
  * pai.h) owns the given extent?
  */
-enum extent_pai_e {
-	EXTENT_PAI_PAC = 0,
-	EXTENT_PAI_HPA = 1
-};
+enum extent_pai_e { EXTENT_PAI_PAC = 0, EXTENT_PAI_HPA = 1 };
 typedef enum extent_pai_e extent_pai_t;
 
 struct e_prof_info_s {
 	/* Time when this was allocated. */
-	nstime_t	e_prof_alloc_time;
+	nstime_t e_prof_alloc_time;
 	/* Allocation request size. */
-	size_t		e_prof_alloc_size;
+	size_t e_prof_alloc_size;
 	/* Points to a prof_tctx_t. */
-	atomic_p_t	e_prof_tctx;
+	atomic_p_t e_prof_tctx;
 	/*
 	 * Points to a prof_recent_t for the allocation; NULL
 	 * means the recent allocation record no longer exists.
 	 * Protected by prof_recent_alloc_mtx.
 	 */
-	atomic_p_t	e_prof_recent_alloc;
+	atomic_p_t e_prof_recent_alloc;
 };
 typedef struct e_prof_info_s e_prof_info_t;
 
@@ -85,13 +82,13 @@ typedef struct e_prof_info_s e_prof_info_t;
  */
 typedef struct edata_map_info_s edata_map_info_t;
 struct edata_map_info_s {
-	bool slab;
+	bool    slab;
 	szind_t szind;
 };
 
 typedef struct edata_cmp_summary_s edata_cmp_summary_t;
 struct edata_cmp_summary_s {
-	uint64_t sn;
+	uint64_t  sn;
 	uintptr_t addr;
 };
 
@@ -149,55 +146,72 @@ struct edata_s {
 	 *
 	 * bin_shard: the shard of the bin from which this extent came.
 	 */
-	uint64_t		e_bits;
-#define MASK(CURRENT_FIELD_WIDTH, CURRENT_FIELD_SHIFT) ((((((uint64_t)0x1U) << (CURRENT_FIELD_WIDTH)) - 1)) << (CURRENT_FIELD_SHIFT))
+	uint64_t e_bits;
+#define MASK(CURRENT_FIELD_WIDTH, CURRENT_FIELD_SHIFT)                         \
+	((((((uint64_t)0x1U) << (CURRENT_FIELD_WIDTH)) - 1))                   \
+	    << (CURRENT_FIELD_SHIFT))
 
-#define EDATA_BITS_ARENA_WIDTH  MALLOCX_ARENA_BITS
-#define EDATA_BITS_ARENA_SHIFT  0
-#define EDATA_BITS_ARENA_MASK  MASK(EDATA_BITS_ARENA_WIDTH, EDATA_BITS_ARENA_SHIFT)
+#define EDATA_BITS_ARENA_WIDTH MALLOCX_ARENA_BITS
+#define EDATA_BITS_ARENA_SHIFT 0
+#define EDATA_BITS_ARENA_MASK                                                  \
+	MASK(EDATA_BITS_ARENA_WIDTH, EDATA_BITS_ARENA_SHIFT)
 
-#define EDATA_BITS_SLAB_WIDTH  1
-#define EDATA_BITS_SLAB_SHIFT  (EDATA_BITS_ARENA_WIDTH + EDATA_BITS_ARENA_SHIFT)
-#define EDATA_BITS_SLAB_MASK  MASK(EDATA_BITS_SLAB_WIDTH, EDATA_BITS_SLAB_SHIFT)
+#define EDATA_BITS_SLAB_WIDTH 1
+#define EDATA_BITS_SLAB_SHIFT (EDATA_BITS_ARENA_WIDTH + EDATA_BITS_ARENA_SHIFT)
+#define EDATA_BITS_SLAB_MASK MASK(EDATA_BITS_SLAB_WIDTH, EDATA_BITS_SLAB_SHIFT)
 
-#define EDATA_BITS_COMMITTED_WIDTH  1
-#define EDATA_BITS_COMMITTED_SHIFT  (EDATA_BITS_SLAB_WIDTH + EDATA_BITS_SLAB_SHIFT)
-#define EDATA_BITS_COMMITTED_MASK  MASK(EDATA_BITS_COMMITTED_WIDTH, EDATA_BITS_COMMITTED_SHIFT)
+#define EDATA_BITS_COMMITTED_WIDTH 1
+#define EDATA_BITS_COMMITTED_SHIFT                                             \
+	(EDATA_BITS_SLAB_WIDTH + EDATA_BITS_SLAB_SHIFT)
+#define EDATA_BITS_COMMITTED_MASK                                              \
+	MASK(EDATA_BITS_COMMITTED_WIDTH, EDATA_BITS_COMMITTED_SHIFT)
 
-#define EDATA_BITS_PAI_WIDTH  1
-#define EDATA_BITS_PAI_SHIFT  (EDATA_BITS_COMMITTED_WIDTH + EDATA_BITS_COMMITTED_SHIFT)
-#define EDATA_BITS_PAI_MASK  MASK(EDATA_BITS_PAI_WIDTH, EDATA_BITS_PAI_SHIFT)
+#define EDATA_BITS_PAI_WIDTH 1
+#define EDATA_BITS_PAI_SHIFT                                                   \
+	(EDATA_BITS_COMMITTED_WIDTH + EDATA_BITS_COMMITTED_SHIFT)
+#define EDATA_BITS_PAI_MASK MASK(EDATA_BITS_PAI_WIDTH, EDATA_BITS_PAI_SHIFT)
 
-#define EDATA_BITS_ZEROED_WIDTH  1
-#define EDATA_BITS_ZEROED_SHIFT  (EDATA_BITS_PAI_WIDTH + EDATA_BITS_PAI_SHIFT)
-#define EDATA_BITS_ZEROED_MASK  MASK(EDATA_BITS_ZEROED_WIDTH, EDATA_BITS_ZEROED_SHIFT)
+#define EDATA_BITS_ZEROED_WIDTH 1
+#define EDATA_BITS_ZEROED_SHIFT (EDATA_BITS_PAI_WIDTH + EDATA_BITS_PAI_SHIFT)
+#define EDATA_BITS_ZEROED_MASK                                                 \
+	MASK(EDATA_BITS_ZEROED_WIDTH, EDATA_BITS_ZEROED_SHIFT)
 
-#define EDATA_BITS_GUARDED_WIDTH  1
-#define EDATA_BITS_GUARDED_SHIFT  (EDATA_BITS_ZEROED_WIDTH + EDATA_BITS_ZEROED_SHIFT)
-#define EDATA_BITS_GUARDED_MASK  MASK(EDATA_BITS_GUARDED_WIDTH, EDATA_BITS_GUARDED_SHIFT)
+#define EDATA_BITS_GUARDED_WIDTH 1
+#define EDATA_BITS_GUARDED_SHIFT                                               \
+	(EDATA_BITS_ZEROED_WIDTH + EDATA_BITS_ZEROED_SHIFT)
+#define EDATA_BITS_GUARDED_MASK                                                \
+	MASK(EDATA_BITS_GUARDED_WIDTH, EDATA_BITS_GUARDED_SHIFT)
 
-#define EDATA_BITS_STATE_WIDTH  3
-#define EDATA_BITS_STATE_SHIFT  (EDATA_BITS_GUARDED_WIDTH + EDATA_BITS_GUARDED_SHIFT)
-#define EDATA_BITS_STATE_MASK  MASK(EDATA_BITS_STATE_WIDTH, EDATA_BITS_STATE_SHIFT)
+#define EDATA_BITS_STATE_WIDTH 3
+#define EDATA_BITS_STATE_SHIFT                                                 \
+	(EDATA_BITS_GUARDED_WIDTH + EDATA_BITS_GUARDED_SHIFT)
+#define EDATA_BITS_STATE_MASK                                                  \
+	MASK(EDATA_BITS_STATE_WIDTH, EDATA_BITS_STATE_SHIFT)
 
-#define EDATA_BITS_SZIND_WIDTH  LG_CEIL(SC_NSIZES)
-#define EDATA_BITS_SZIND_SHIFT  (EDATA_BITS_STATE_WIDTH + EDATA_BITS_STATE_SHIFT)
-#define EDATA_BITS_SZIND_MASK  MASK(EDATA_BITS_SZIND_WIDTH, EDATA_BITS_SZIND_SHIFT)
+#define EDATA_BITS_SZIND_WIDTH LG_CEIL(SC_NSIZES)
+#define EDATA_BITS_SZIND_SHIFT (EDATA_BITS_STATE_WIDTH + EDATA_BITS_STATE_SHIFT)
+#define EDATA_BITS_SZIND_MASK                                                  \
+	MASK(EDATA_BITS_SZIND_WIDTH, EDATA_BITS_SZIND_SHIFT)
 
-#define EDATA_BITS_NFREE_WIDTH  (SC_LG_SLAB_MAXREGS + 1)
-#define EDATA_BITS_NFREE_SHIFT  (EDATA_BITS_SZIND_WIDTH + EDATA_BITS_SZIND_SHIFT)
-#define EDATA_BITS_NFREE_MASK  MASK(EDATA_BITS_NFREE_WIDTH, EDATA_BITS_NFREE_SHIFT)
+#define EDATA_BITS_NFREE_WIDTH (SC_LG_SLAB_MAXREGS + 1)
+#define EDATA_BITS_NFREE_SHIFT (EDATA_BITS_SZIND_WIDTH + EDATA_BITS_SZIND_SHIFT)
+#define EDATA_BITS_NFREE_MASK                                                  \
+	MASK(EDATA_BITS_NFREE_WIDTH, EDATA_BITS_NFREE_SHIFT)
 
-#define EDATA_BITS_BINSHARD_WIDTH  6
-#define EDATA_BITS_BINSHARD_SHIFT  (EDATA_BITS_NFREE_WIDTH + EDATA_BITS_NFREE_SHIFT)
-#define EDATA_BITS_BINSHARD_MASK  MASK(EDATA_BITS_BINSHARD_WIDTH, EDATA_BITS_BINSHARD_SHIFT)
+#define EDATA_BITS_BINSHARD_WIDTH 6
+#define EDATA_BITS_BINSHARD_SHIFT                                              \
+	(EDATA_BITS_NFREE_WIDTH + EDATA_BITS_NFREE_SHIFT)
+#define EDATA_BITS_BINSHARD_MASK                                               \
+	MASK(EDATA_BITS_BINSHARD_WIDTH, EDATA_BITS_BINSHARD_SHIFT)
 
 #define EDATA_BITS_IS_HEAD_WIDTH 1
-#define EDATA_BITS_IS_HEAD_SHIFT  (EDATA_BITS_BINSHARD_WIDTH + EDATA_BITS_BINSHARD_SHIFT)
-#define EDATA_BITS_IS_HEAD_MASK  MASK(EDATA_BITS_IS_HEAD_WIDTH, EDATA_BITS_IS_HEAD_SHIFT)
+#define EDATA_BITS_IS_HEAD_SHIFT                                               \
+	(EDATA_BITS_BINSHARD_WIDTH + EDATA_BITS_BINSHARD_SHIFT)
+#define EDATA_BITS_IS_HEAD_MASK                                                \
+	MASK(EDATA_BITS_IS_HEAD_WIDTH, EDATA_BITS_IS_HEAD_SHIFT)
 
 	/* Pointer to the extent that this structure is responsible for. */
-	void			*e_addr;
+	void *e_addr;
 
 	union {
 		/*
@@ -207,11 +221,11 @@ struct edata_s {
 		 *
 		 * ssssssss [...] ssssssss ssssnnnn nnnnnnnn
 		 */
-		size_t			e_size_esn;
-	#define EDATA_SIZE_MASK	((size_t)~(PAGE-1))
-	#define EDATA_ESN_MASK		((size_t)PAGE-1)
+		size_t e_size_esn;
+#define EDATA_SIZE_MASK ((size_t) ~(PAGE - 1))
+#define EDATA_ESN_MASK ((size_t)PAGE - 1)
 		/* Base extent size, which may not be a multiple of PAGE. */
-		size_t			e_bsize;
+		size_t e_bsize;
 	};
 
 	/*
@@ -232,7 +246,7 @@ struct edata_s {
 		 * List linkage used when the edata_t is active; either in
 		 * arena's large allocations or bin_t's slabs_full.
 		 */
-		ql_elm(edata_t)	ql_link_active;
+		ql_elm(edata_t) ql_link_active;
 		/*
 		 * Pairing heap linkage.  Used whenever the extent is inactive
 		 * (in the page allocators), or when it is active and in
@@ -240,7 +254,7 @@ struct edata_s {
 		 * extent and sitting in an edata_cache.
 		 */
 		union {
-			edata_heap_link_t heap_link;
+			edata_heap_link_t  heap_link;
 			edata_avail_link_t avail_link;
 		};
 	};
@@ -253,10 +267,10 @@ struct edata_s {
 		 */
 		ql_elm(edata_t) ql_link_inactive;
 		/* Small region slab metadata. */
-		slab_data_t	e_slab_data;
+		slab_data_t e_slab_data;
 
 		/* Profiling data, used for large objects. */
-		e_prof_info_t	e_prof_info;
+		e_prof_info_t e_prof_info;
 	};
 };
 
@@ -265,8 +279,8 @@ TYPED_LIST(edata_list_inactive, edata_t, ql_link_inactive)
 
 static inline unsigned
 edata_arena_ind_get(const edata_t *edata) {
-	unsigned arena_ind = (unsigned)((edata->e_bits &
-	    EDATA_BITS_ARENA_MASK) >> EDATA_BITS_ARENA_SHIFT);
+	unsigned arena_ind = (unsigned)((edata->e_bits & EDATA_BITS_ARENA_MASK)
+	    >> EDATA_BITS_ARENA_SHIFT);
 	assert(arena_ind < MALLOCX_ARENA_LIMIT);
 
 	return arena_ind;
@@ -274,8 +288,8 @@ edata_arena_ind_get(const edata_t *edata) {
 
 static inline szind_t
 edata_szind_get_maybe_invalid(const edata_t *edata) {
-	szind_t szind = (szind_t)((edata->e_bits & EDATA_BITS_SZIND_MASK) >>
-	    EDATA_BITS_SZIND_SHIFT);
+	szind_t szind = (szind_t)((edata->e_bits & EDATA_BITS_SZIND_MASK)
+	    >> EDATA_BITS_SZIND_SHIFT);
 	assert(szind <= SC_NSIZES);
 	return szind;
 }
@@ -318,8 +332,8 @@ edata_usize_get(const edata_t *edata) {
 
 	if (!sz_large_size_classes_disabled() || szind < SC_NBINS) {
 		size_t usize_from_ind = sz_index2size(szind);
-		if (!sz_large_size_classes_disabled() &&
-		    usize_from_ind >= SC_LARGE_MINCLASS) {
+		if (!sz_large_size_classes_disabled()
+		    && usize_from_ind >= SC_LARGE_MINCLASS) {
 			size_t size = (edata->e_size_esn & EDATA_SIZE_MASK);
 			assert(size > sz_large_pad);
 			size_t usize_from_size = size - sz_large_pad;
@@ -341,8 +355,9 @@ edata_usize_get(const edata_t *edata) {
 
 static inline unsigned
 edata_binshard_get(const edata_t *edata) {
-	unsigned binshard = (unsigned)((edata->e_bits &
-	    EDATA_BITS_BINSHARD_MASK) >> EDATA_BITS_BINSHARD_SHIFT);
+	unsigned binshard = (unsigned)((edata->e_bits
+	                                   & EDATA_BITS_BINSHARD_MASK)
+	    >> EDATA_BITS_BINSHARD_SHIFT);
 	assert(binshard < bin_infos[edata_szind_get(edata)].n_shards);
 	return binshard;
 }
@@ -354,58 +369,58 @@ edata_sn_get(const edata_t *edata) {
 
 static inline extent_state_t
 edata_state_get(const edata_t *edata) {
-	return (extent_state_t)((edata->e_bits & EDATA_BITS_STATE_MASK) >>
-	    EDATA_BITS_STATE_SHIFT);
+	return (extent_state_t)((edata->e_bits & EDATA_BITS_STATE_MASK)
+	    >> EDATA_BITS_STATE_SHIFT);
 }
 
 static inline bool
 edata_guarded_get(const edata_t *edata) {
-	return (bool)((edata->e_bits & EDATA_BITS_GUARDED_MASK) >>
-	    EDATA_BITS_GUARDED_SHIFT);
+	return (bool)((edata->e_bits & EDATA_BITS_GUARDED_MASK)
+	    >> EDATA_BITS_GUARDED_SHIFT);
 }
 
 static inline bool
 edata_zeroed_get(const edata_t *edata) {
-	return (bool)((edata->e_bits & EDATA_BITS_ZEROED_MASK) >>
-	    EDATA_BITS_ZEROED_SHIFT);
+	return (bool)((edata->e_bits & EDATA_BITS_ZEROED_MASK)
+	    >> EDATA_BITS_ZEROED_SHIFT);
 }
 
 static inline bool
 edata_committed_get(const edata_t *edata) {
-	return (bool)((edata->e_bits & EDATA_BITS_COMMITTED_MASK) >>
-	    EDATA_BITS_COMMITTED_SHIFT);
+	return (bool)((edata->e_bits & EDATA_BITS_COMMITTED_MASK)
+	    >> EDATA_BITS_COMMITTED_SHIFT);
 }
 
 static inline extent_pai_t
 edata_pai_get(const edata_t *edata) {
-	return (extent_pai_t)((edata->e_bits & EDATA_BITS_PAI_MASK) >>
-	    EDATA_BITS_PAI_SHIFT);
+	return (extent_pai_t)((edata->e_bits & EDATA_BITS_PAI_MASK)
+	    >> EDATA_BITS_PAI_SHIFT);
 }
 
 static inline bool
 edata_slab_get(const edata_t *edata) {
-	return (bool)((edata->e_bits & EDATA_BITS_SLAB_MASK) >>
-	    EDATA_BITS_SLAB_SHIFT);
+	return (bool)((edata->e_bits & EDATA_BITS_SLAB_MASK)
+	    >> EDATA_BITS_SLAB_SHIFT);
 }
 
 static inline unsigned
 edata_nfree_get(const edata_t *edata) {
 	assert(edata_slab_get(edata));
-	return (unsigned)((edata->e_bits & EDATA_BITS_NFREE_MASK) >>
-	    EDATA_BITS_NFREE_SHIFT);
+	return (unsigned)((edata->e_bits & EDATA_BITS_NFREE_MASK)
+	    >> EDATA_BITS_NFREE_SHIFT);
 }
 
 static inline void *
 edata_base_get(const edata_t *edata) {
-	assert(edata->e_addr == PAGE_ADDR2BASE(edata->e_addr) ||
-	    !edata_slab_get(edata));
+	assert(edata->e_addr == PAGE_ADDR2BASE(edata->e_addr)
+	    || !edata_slab_get(edata));
 	return PAGE_ADDR2BASE(edata->e_addr);
 }
 
 static inline void *
 edata_addr_get(const edata_t *edata) {
-	assert(edata->e_addr == PAGE_ADDR2BASE(edata->e_addr) ||
-	    !edata_slab_get(edata));
+	assert(edata->e_addr == PAGE_ADDR2BASE(edata->e_addr)
+	    || !edata_slab_get(edata));
 	return edata->e_addr;
 }
 
@@ -437,14 +452,14 @@ edata_before_get(const edata_t *edata) {
 
 static inline void *
 edata_last_get(const edata_t *edata) {
-	return (void *)((byte_t *)edata_base_get(edata) +
-	    edata_size_get(edata) - PAGE);
+	return (void *)((byte_t *)edata_base_get(edata) + edata_size_get(edata)
+	    - PAGE);
 }
 
 static inline void *
 edata_past_get(const edata_t *edata) {
-	return (void *)((byte_t *)edata_base_get(edata) +
-	    edata_size_get(edata));
+	return (
+	    void *)((byte_t *)edata_base_get(edata) + edata_size_get(edata));
 }
 
 static inline slab_data_t *
@@ -461,8 +476,8 @@ edata_slab_data_get_const(const edata_t *edata) {
 
 static inline prof_tctx_t *
 edata_prof_tctx_get(const edata_t *edata) {
-	return (prof_tctx_t *)atomic_load_p(&edata->e_prof_info.e_prof_tctx,
-	    ATOMIC_ACQUIRE);
+	return (prof_tctx_t *)atomic_load_p(
+	    &edata->e_prof_info.e_prof_tctx, ATOMIC_ACQUIRE);
 }
 
 static inline const nstime_t *
@@ -483,16 +498,16 @@ edata_prof_recent_alloc_get_dont_call_directly(const edata_t *edata) {
 
 static inline void
 edata_arena_ind_set(edata_t *edata, unsigned arena_ind) {
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_ARENA_MASK) |
-	    ((uint64_t)arena_ind << EDATA_BITS_ARENA_SHIFT);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_ARENA_MASK)
+	    | ((uint64_t)arena_ind << EDATA_BITS_ARENA_SHIFT);
 }
 
 static inline void
 edata_binshard_set(edata_t *edata, unsigned binshard) {
 	/* The assertion assumes szind is set already. */
 	assert(binshard < bin_infos[edata_szind_get(edata)].n_shards);
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_BINSHARD_MASK) |
-	    ((uint64_t)binshard << EDATA_BITS_BINSHARD_SHIFT);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_BINSHARD_MASK)
+	    | ((uint64_t)binshard << EDATA_BITS_BINSHARD_SHIFT);
 }
 
 static inline void
@@ -508,8 +523,8 @@ edata_size_set(edata_t *edata, size_t size) {
 
 static inline void
 edata_esn_set(edata_t *edata, size_t esn) {
-	edata->e_size_esn = (edata->e_size_esn & ~EDATA_ESN_MASK) | (esn &
-	    EDATA_ESN_MASK);
+	edata->e_size_esn = (edata->e_size_esn & ~EDATA_ESN_MASK)
+	    | (esn & EDATA_ESN_MASK);
 }
 
 static inline void
@@ -526,25 +541,26 @@ edata_ps_set(edata_t *edata, hpdata_t *ps) {
 static inline void
 edata_szind_set(edata_t *edata, szind_t szind) {
 	assert(szind <= SC_NSIZES); /* SC_NSIZES means "invalid". */
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_SZIND_MASK) |
-	    ((uint64_t)szind << EDATA_BITS_SZIND_SHIFT);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_SZIND_MASK)
+	    | ((uint64_t)szind << EDATA_BITS_SZIND_SHIFT);
 }
 
 static inline void
 edata_nfree_set(edata_t *edata, unsigned nfree) {
 	assert(edata_slab_get(edata));
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_NFREE_MASK) |
-	    ((uint64_t)nfree << EDATA_BITS_NFREE_SHIFT);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_NFREE_MASK)
+	    | ((uint64_t)nfree << EDATA_BITS_NFREE_SHIFT);
 }
 
 static inline void
 edata_nfree_binshard_set(edata_t *edata, unsigned nfree, unsigned binshard) {
 	/* The assertion assumes szind is set already. */
 	assert(binshard < bin_infos[edata_szind_get(edata)].n_shards);
-	edata->e_bits = (edata->e_bits &
-	    (~EDATA_BITS_NFREE_MASK & ~EDATA_BITS_BINSHARD_MASK)) |
-	    ((uint64_t)binshard << EDATA_BITS_BINSHARD_SHIFT) |
-	    ((uint64_t)nfree << EDATA_BITS_NFREE_SHIFT);
+	edata->e_bits = (edata->e_bits
+	                    & (~EDATA_BITS_NFREE_MASK
+	                        & ~EDATA_BITS_BINSHARD_MASK))
+	    | ((uint64_t)binshard << EDATA_BITS_BINSHARD_SHIFT)
+	    | ((uint64_t)nfree << EDATA_BITS_NFREE_SHIFT);
 }
 
 static inline void
@@ -572,38 +588,38 @@ edata_sn_set(edata_t *edata, uint64_t sn) {
 
 static inline void
 edata_state_set(edata_t *edata, extent_state_t state) {
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_STATE_MASK) |
-	    ((uint64_t)state << EDATA_BITS_STATE_SHIFT);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_STATE_MASK)
+	    | ((uint64_t)state << EDATA_BITS_STATE_SHIFT);
 }
 
 static inline void
 edata_guarded_set(edata_t *edata, bool guarded) {
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_GUARDED_MASK) |
-	    ((uint64_t)guarded << EDATA_BITS_GUARDED_SHIFT);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_GUARDED_MASK)
+	    | ((uint64_t)guarded << EDATA_BITS_GUARDED_SHIFT);
 }
 
 static inline void
 edata_zeroed_set(edata_t *edata, bool zeroed) {
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_ZEROED_MASK) |
-	    ((uint64_t)zeroed << EDATA_BITS_ZEROED_SHIFT);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_ZEROED_MASK)
+	    | ((uint64_t)zeroed << EDATA_BITS_ZEROED_SHIFT);
 }
 
 static inline void
 edata_committed_set(edata_t *edata, bool committed) {
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_COMMITTED_MASK) |
-	    ((uint64_t)committed << EDATA_BITS_COMMITTED_SHIFT);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_COMMITTED_MASK)
+	    | ((uint64_t)committed << EDATA_BITS_COMMITTED_SHIFT);
 }
 
 static inline void
 edata_pai_set(edata_t *edata, extent_pai_t pai) {
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_PAI_MASK) |
-	    ((uint64_t)pai << EDATA_BITS_PAI_SHIFT);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_PAI_MASK)
+	    | ((uint64_t)pai << EDATA_BITS_PAI_SHIFT);
 }
 
 static inline void
 edata_slab_set(edata_t *edata, bool slab) {
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_SLAB_MASK) |
-	    ((uint64_t)slab << EDATA_BITS_SLAB_SHIFT);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_SLAB_MASK)
+	    | ((uint64_t)slab << EDATA_BITS_SLAB_SHIFT);
 }
 
 static inline void
@@ -622,22 +638,22 @@ edata_prof_alloc_size_set(edata_t *edata, size_t size) {
 }
 
 static inline void
-edata_prof_recent_alloc_set_dont_call_directly(edata_t *edata,
-    prof_recent_t *recent_alloc) {
+edata_prof_recent_alloc_set_dont_call_directly(
+    edata_t *edata, prof_recent_t *recent_alloc) {
 	atomic_store_p(&edata->e_prof_info.e_prof_recent_alloc, recent_alloc,
 	    ATOMIC_RELAXED);
 }
 
 static inline bool
 edata_is_head_get(edata_t *edata) {
-	return (bool)((edata->e_bits & EDATA_BITS_IS_HEAD_MASK) >>
-	    EDATA_BITS_IS_HEAD_SHIFT);
+	return (bool)((edata->e_bits & EDATA_BITS_IS_HEAD_MASK)
+	    >> EDATA_BITS_IS_HEAD_SHIFT);
 }
 
 static inline void
 edata_is_head_set(edata_t *edata, bool is_head) {
-	edata->e_bits = (edata->e_bits & ~EDATA_BITS_IS_HEAD_MASK) |
-	    ((uint64_t)is_head << EDATA_BITS_IS_HEAD_SHIFT);
+	edata->e_bits = (edata->e_bits & ~EDATA_BITS_IS_HEAD_MASK)
+	    | ((uint64_t)is_head << EDATA_BITS_IS_HEAD_SHIFT);
 }
 
 static inline bool
@@ -676,8 +692,8 @@ edata_init(edata_t *edata, unsigned arena_ind, void *addr, size_t size,
 }
 
 static inline void
-edata_binit(edata_t *edata, void *addr, size_t bsize, uint64_t sn,
-    bool reused) {
+edata_binit(
+    edata_t *edata, void *addr, size_t bsize, uint64_t sn, bool reused) {
 	edata_arena_ind_set(edata, (1U << MALLOCX_ARENA_BITS) - 1);
 	edata_addr_set(edata, addr);
 	edata_bsize_set(edata, bsize);
@@ -729,11 +745,13 @@ edata_cmp_summary_encode(edata_cmp_summary_t src) {
 
 static inline int
 edata_cmp_summary_comp(edata_cmp_summary_t a, edata_cmp_summary_t b) {
-    unsigned __int128 a_encoded = edata_cmp_summary_encode(a);
-    unsigned __int128 b_encoded = edata_cmp_summary_encode(b);
-    if (a_encoded < b_encoded) return -1;
-    if (a_encoded == b_encoded) return 0;
-    return 1;
+	unsigned __int128 a_encoded = edata_cmp_summary_encode(a);
+	unsigned __int128 b_encoded = edata_cmp_summary_encode(b);
+	if (a_encoded < b_encoded)
+		return -1;
+	if (a_encoded == b_encoded)
+		return 0;
+	return 1;
 }
 #else
 static inline int
@@ -750,8 +768,8 @@ edata_cmp_summary_comp(edata_cmp_summary_t a, edata_cmp_summary_t b) {
 	 * prediction accuracy is not great. As a result, this implementation
 	 * is measurably faster (by around 30%).
 	 */
-	return (2 * ((a.sn > b.sn) - (a.sn < b.sn))) +
-	       ((a.addr > b.addr) - (a.addr < b.addr));
+	return (2 * ((a.sn > b.sn) - (a.sn < b.sn)))
+	    + ((a.addr > b.addr) - (a.addr < b.addr));
 }
 #endif
 
@@ -772,7 +790,6 @@ edata_esnead_comp(const edata_t *a, const edata_t *b) {
 	return (2 * edata_esn_comp(a, b)) + edata_ead_comp(a, b);
 }
 
-ph_proto(, edata_avail, edata_t)
-ph_proto(, edata_heap, edata_t)
+ph_proto(, edata_avail, edata_t) ph_proto(, edata_heap, edata_t)
 
 #endif /* JEMALLOC_INTERNAL_EDATA_H */
diff --git a/include/jemalloc/internal/edata_cache.h b/include/jemalloc/internal/edata_cache.h
index b2c7b4f1..d92d90cb 100644
--- a/include/jemalloc/internal/edata_cache.h
+++ b/include/jemalloc/internal/edata_cache.h
@@ -15,13 +15,13 @@
 
 typedef struct edata_cache_s edata_cache_t;
 struct edata_cache_s {
-	edata_avail_t avail;
-	atomic_zu_t count;
+	edata_avail_t  avail;
+	atomic_zu_t    count;
 	malloc_mutex_t mtx;
-	base_t *base;
+	base_t        *base;
 };
 
-bool edata_cache_init(edata_cache_t *edata_cache, base_t *base);
+bool     edata_cache_init(edata_cache_t *edata_cache, base_t *base);
 edata_t *edata_cache_get(tsdn_t *tsdn, edata_cache_t *edata_cache);
 void edata_cache_put(tsdn_t *tsdn, edata_cache_t *edata_cache, edata_t *edata);
 
@@ -37,14 +37,14 @@ void edata_cache_postfork_child(tsdn_t *tsdn, edata_cache_t *edata_cache);
 typedef struct edata_cache_fast_s edata_cache_fast_t;
 struct edata_cache_fast_s {
 	edata_list_inactive_t list;
-	edata_cache_t *fallback;
-	bool disabled;
+	edata_cache_t        *fallback;
+	bool                  disabled;
 };
 
 void edata_cache_fast_init(edata_cache_fast_t *ecs, edata_cache_t *fallback);
 edata_t *edata_cache_fast_get(tsdn_t *tsdn, edata_cache_fast_t *ecs);
-void edata_cache_fast_put(tsdn_t *tsdn, edata_cache_fast_t *ecs,
-    edata_t *edata);
+void     edata_cache_fast_put(
+        tsdn_t *tsdn, edata_cache_fast_t *ecs, edata_t *edata);
 void edata_cache_fast_disable(tsdn_t *tsdn, edata_cache_fast_t *ecs);
 
 #endif /* JEMALLOC_INTERNAL_EDATA_CACHE_H */
diff --git a/include/jemalloc/internal/ehooks.h b/include/jemalloc/internal/ehooks.h
index 947e056c..c65e189a 100644
--- a/include/jemalloc/internal/ehooks.h
+++ b/include/jemalloc/internal/ehooks.h
@@ -46,10 +46,10 @@ extern const extent_hooks_t ehooks_default_extent_hooks;
  */
 void *ehooks_default_alloc_impl(tsdn_t *tsdn, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit, unsigned arena_ind);
-bool ehooks_default_dalloc_impl(void *addr, size_t size);
-void ehooks_default_destroy_impl(void *addr, size_t size);
-bool ehooks_default_commit_impl(void *addr, size_t offset, size_t length);
-bool ehooks_default_decommit_impl(void *addr, size_t offset, size_t length);
+bool  ehooks_default_dalloc_impl(void *addr, size_t size);
+void  ehooks_default_destroy_impl(void *addr, size_t size);
+bool  ehooks_default_commit_impl(void *addr, size_t offset, size_t length);
+bool  ehooks_default_decommit_impl(void *addr, size_t offset, size_t length);
 #ifdef PAGES_CAN_PURGE_LAZY
 bool ehooks_default_purge_lazy_impl(void *addr, size_t offset, size_t length);
 #endif
@@ -116,8 +116,8 @@ ehooks_get_extent_hooks_ptr(ehooks_t *ehooks) {
 
 static inline bool
 ehooks_are_default(ehooks_t *ehooks) {
-	return ehooks_get_extent_hooks_ptr(ehooks) ==
-	    &ehooks_default_extent_hooks;
+	return ehooks_get_extent_hooks_ptr(ehooks)
+	    == &ehooks_default_extent_hooks;
 }
 
 /*
@@ -189,16 +189,15 @@ ehooks_debug_zero_check(void *addr, size_t size) {
 	}
 }
 
-
 static inline void *
 ehooks_alloc(tsdn_t *tsdn, ehooks_t *ehooks, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit) {
-	bool orig_zero = *zero;
-	void *ret;
+	bool            orig_zero = *zero;
+	void           *ret;
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 	if (extent_hooks == &ehooks_default_extent_hooks) {
-		ret = ehooks_default_alloc_impl(tsdn, new_addr, size,
-		    alignment, zero, commit, ehooks_ind_get(ehooks));
+		ret = ehooks_default_alloc_impl(tsdn, new_addr, size, alignment,
+		    zero, commit, ehooks_ind_get(ehooks));
 	} else {
 		ehooks_pre_reentrancy(tsdn);
 		ret = extent_hooks->alloc(extent_hooks, new_addr, size,
@@ -214,8 +213,8 @@ ehooks_alloc(tsdn_t *tsdn, ehooks_t *ehooks, void *new_addr, size_t size,
 }
 
 static inline bool
-ehooks_dalloc(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
-    bool committed) {
+ehooks_dalloc(
+    tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size, bool committed) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 	if (extent_hooks == &ehooks_default_extent_hooks) {
 		return ehooks_default_dalloc_impl(addr, size);
@@ -231,8 +230,8 @@ ehooks_dalloc(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
 }
 
 static inline void
-ehooks_destroy(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
-    bool committed) {
+ehooks_destroy(
+    tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size, bool committed) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 	if (extent_hooks == &ehooks_default_extent_hooks) {
 		ehooks_default_destroy_impl(addr, size);
@@ -250,15 +249,15 @@ static inline bool
 ehooks_commit(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size,
     size_t offset, size_t length) {
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
-	bool err;
+	bool            err;
 	if (extent_hooks == &ehooks_default_extent_hooks) {
 		err = ehooks_default_commit_impl(addr, offset, length);
 	} else if (extent_hooks->commit == NULL) {
 		err = true;
 	} else {
 		ehooks_pre_reentrancy(tsdn);
-		err = extent_hooks->commit(extent_hooks, addr, size,
-		    offset, length, ehooks_ind_get(ehooks));
+		err = extent_hooks->commit(extent_hooks, addr, size, offset,
+		    length, ehooks_ind_get(ehooks));
 		ehooks_post_reentrancy(tsdn);
 	}
 	if (!err) {
@@ -384,7 +383,7 @@ ehooks_zero(tsdn_t *tsdn, ehooks_t *ehooks, void *addr, size_t size) {
 
 static inline bool
 ehooks_guard(tsdn_t *tsdn, ehooks_t *ehooks, void *guard1, void *guard2) {
-	bool err;
+	bool            err;
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 
 	if (extent_hooks == &ehooks_default_extent_hooks) {
@@ -399,7 +398,7 @@ ehooks_guard(tsdn_t *tsdn, ehooks_t *ehooks, void *guard1, void *guard2) {
 
 static inline bool
 ehooks_unguard(tsdn_t *tsdn, ehooks_t *ehooks, void *guard1, void *guard2) {
-	bool err;
+	bool            err;
 	extent_hooks_t *extent_hooks = ehooks_get_extent_hooks_ptr(ehooks);
 
 	if (extent_hooks == &ehooks_default_extent_hooks) {
diff --git a/include/jemalloc/internal/emap.h b/include/jemalloc/internal/emap.h
index fba46abe..88692356 100644
--- a/include/jemalloc/internal/emap.h
+++ b/include/jemalloc/internal/emap.h
@@ -10,9 +10,9 @@
  *     EMAP_DECLARE_RTREE_CTX;
  * in uses will avoid empty-statement warnings.
  */
-#define EMAP_DECLARE_RTREE_CTX						\
-    rtree_ctx_t rtree_ctx_fallback;					\
-    rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback)
+#define EMAP_DECLARE_RTREE_CTX                                                 \
+	rtree_ctx_t  rtree_ctx_fallback;                                       \
+	rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback)
 
 typedef struct emap_s emap_t;
 struct emap_s {
@@ -22,25 +22,25 @@ struct emap_s {
 /* Used to pass rtree lookup context down the path. */
 typedef struct emap_alloc_ctx_s emap_alloc_ctx_t;
 struct emap_alloc_ctx_s {
-	size_t usize;
+	size_t  usize;
 	szind_t szind;
-	bool slab;
+	bool    slab;
 };
 
 typedef struct emap_full_alloc_ctx_s emap_full_alloc_ctx_t;
 struct emap_full_alloc_ctx_s {
-	szind_t szind;
-	bool slab;
+	szind_t  szind;
+	bool     slab;
 	edata_t *edata;
 };
 
 bool emap_init(emap_t *emap, base_t *base, bool zeroed);
 
-void emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
-    bool slab);
+void emap_remap(
+    tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind, bool slab);
 
-void emap_update_edata_state(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
-    extent_state_t state);
+void emap_update_edata_state(
+    tsdn_t *tsdn, emap_t *emap, edata_t *edata, extent_state_t state);
 
 /*
  * The two acquire functions below allow accessing neighbor edatas, if it's safe
@@ -62,16 +62,16 @@ edata_t *emap_try_acquire_edata_neighbor(tsdn_t *tsdn, emap_t *emap,
     bool forward);
 edata_t *emap_try_acquire_edata_neighbor_expand(tsdn_t *tsdn, emap_t *emap,
     edata_t *edata, extent_pai_t pai, extent_state_t expected_state);
-void emap_release_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
-    extent_state_t new_state);
+void     emap_release_edata(
+        tsdn_t *tsdn, emap_t *emap, edata_t *edata, extent_state_t new_state);
 
 /*
  * Associate the given edata with its beginning and end address, setting the
  * szind and slab info appropriately.
  * Returns true on error (i.e. resource exhaustion).
  */
-bool emap_register_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
-    szind_t szind, bool slab);
+bool emap_register_boundary(
+    tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind, bool slab);
 
 /*
  * Does the same thing, but with the interior of the range, for slab
@@ -92,8 +92,8 @@ bool emap_register_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
  * touched, so no allocation is necessary to fill the interior once the boundary
  * has been touched.
  */
-void emap_register_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
-    szind_t szind);
+void emap_register_interior(
+    tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind);
 
 void emap_deregister_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata);
 void emap_deregister_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata);
@@ -161,8 +161,8 @@ emap_edata_in_transition(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	emap_assert_mapped(tsdn, emap, edata);
 
 	EMAP_DECLARE_RTREE_CTX;
-	rtree_contents_t contents = rtree_read(tsdn, &emap->rtree, rtree_ctx,
-	    (uintptr_t)edata_base_get(edata));
+	rtree_contents_t contents = rtree_read(
+	    tsdn, &emap->rtree, rtree_ctx, (uintptr_t)edata_base_get(edata));
 
 	return edata_state_in_transition(contents.metadata.state);
 }
@@ -194,9 +194,9 @@ emap_edata_is_acquired(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	}
 	rtree_contents_t contents = rtree_leaf_elm_read(tsdn, &emap->rtree, elm,
 	    /* dependent */ false);
-	if (contents.edata == NULL ||
-	    contents.metadata.state == extent_state_active ||
-	    edata_state_in_transition(contents.metadata.state)) {
+	if (contents.edata == NULL
+	    || contents.metadata.state == extent_state_active
+	    || edata_state_in_transition(contents.metadata.state)) {
 		return true;
 	}
 
@@ -211,8 +211,8 @@ extent_assert_can_coalesce(const edata_t *inner, const edata_t *outer) {
 	assert(edata_state_get(inner) == extent_state_active);
 	assert(edata_state_get(outer) == extent_state_merging);
 	assert(!edata_guarded_get(inner) && !edata_guarded_get(outer));
-	assert(edata_base_get(inner) == edata_past_get(outer) ||
-	    edata_base_get(outer) == edata_past_get(inner));
+	assert(edata_base_get(inner) == edata_past_get(outer)
+	    || edata_base_get(outer) == edata_past_get(inner));
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -232,13 +232,13 @@ emap_edata_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-emap_alloc_ctx_init(emap_alloc_ctx_t *alloc_ctx, szind_t szind, bool slab,
-    size_t usize) {
+emap_alloc_ctx_init(
+    emap_alloc_ctx_t *alloc_ctx, szind_t szind, bool slab, size_t usize) {
 	alloc_ctx->szind = szind;
 	alloc_ctx->slab = slab;
 	alloc_ctx->usize = usize;
-	assert(sz_large_size_classes_disabled() ||
-	    usize == sz_index2size(szind));
+	assert(
+	    sz_large_size_classes_disabled() || usize == sz_index2size(szind));
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
@@ -248,27 +248,29 @@ emap_alloc_ctx_usize_get(emap_alloc_ctx_t *alloc_ctx) {
 		assert(alloc_ctx->usize == sz_index2size(alloc_ctx->szind));
 		return sz_index2size(alloc_ctx->szind);
 	}
-	assert(sz_large_size_classes_disabled() ||
-	    alloc_ctx->usize == sz_index2size(alloc_ctx->szind));
+	assert(sz_large_size_classes_disabled()
+	    || alloc_ctx->usize == sz_index2size(alloc_ctx->szind));
 	assert(alloc_ctx->usize <= SC_LARGE_MAXCLASS);
 	return alloc_ctx->usize;
 }
 
 /* Fills in alloc_ctx with the info in the map. */
 JEMALLOC_ALWAYS_INLINE void
-emap_alloc_ctx_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
-    emap_alloc_ctx_t *alloc_ctx) {
+emap_alloc_ctx_lookup(
+    tsdn_t *tsdn, emap_t *emap, const void *ptr, emap_alloc_ctx_t *alloc_ctx) {
 	EMAP_DECLARE_RTREE_CTX;
 
-	rtree_contents_t contents = rtree_read(tsdn, &emap->rtree,
-	    rtree_ctx, (uintptr_t)ptr);
+	rtree_contents_t contents = rtree_read(
+	    tsdn, &emap->rtree, rtree_ctx, (uintptr_t)ptr);
 	/*
 	 * If the alloc is invalid, do not calculate usize since edata
 	 * could be corrupted.
 	 */
 	emap_alloc_ctx_init(alloc_ctx, contents.metadata.szind,
-	    contents.metadata.slab, (contents.metadata.szind == SC_NSIZES
-	    || contents.edata == NULL)? 0: edata_usize_get(contents.edata));
+	    contents.metadata.slab,
+	    (contents.metadata.szind == SC_NSIZES || contents.edata == NULL)
+	        ? 0
+	        : edata_usize_get(contents.edata));
 }
 
 /* The pointer must be mapped. */
@@ -277,8 +279,8 @@ emap_full_alloc_ctx_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
     emap_full_alloc_ctx_t *full_alloc_ctx) {
 	EMAP_DECLARE_RTREE_CTX;
 
-	rtree_contents_t contents = rtree_read(tsdn, &emap->rtree, rtree_ctx,
-	    (uintptr_t)ptr);
+	rtree_contents_t contents = rtree_read(
+	    tsdn, &emap->rtree, rtree_ctx, (uintptr_t)ptr);
 	full_alloc_ctx->edata = contents.edata;
 	full_alloc_ctx->szind = contents.metadata.szind;
 	full_alloc_ctx->slab = contents.metadata.slab;
@@ -295,8 +297,8 @@ emap_full_alloc_ctx_try_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
 	EMAP_DECLARE_RTREE_CTX;
 
 	rtree_contents_t contents;
-	bool err = rtree_read_independent(tsdn, &emap->rtree, rtree_ctx,
-	    (uintptr_t)ptr, &contents);
+	bool             err = rtree_read_independent(
+            tsdn, &emap->rtree, rtree_ctx, (uintptr_t)ptr, &contents);
 	if (err) {
 		return true;
 	}
@@ -311,14 +313,14 @@ emap_full_alloc_ctx_try_lookup(tsdn_t *tsdn, emap_t *emap, const void *ptr,
  * fast path, e.g. when the metadata key is not cached.
  */
 JEMALLOC_ALWAYS_INLINE bool
-emap_alloc_ctx_try_lookup_fast(tsd_t *tsd, emap_t *emap, const void *ptr,
-    emap_alloc_ctx_t *alloc_ctx) {
+emap_alloc_ctx_try_lookup_fast(
+    tsd_t *tsd, emap_t *emap, const void *ptr, emap_alloc_ctx_t *alloc_ctx) {
 	/* Use the unsafe getter since this may gets called during exit. */
 	rtree_ctx_t *rtree_ctx = tsd_rtree_ctxp_get_unsafe(tsd);
 
 	rtree_metadata_t metadata;
-	bool err = rtree_metadata_try_read_fast(tsd_tsdn(tsd), &emap->rtree,
-	    rtree_ctx, (uintptr_t)ptr, &metadata);
+	bool             err = rtree_metadata_try_read_fast(
+            tsd_tsdn(tsd), &emap->rtree, rtree_ctx, (uintptr_t)ptr, &metadata);
 	if (err) {
 		return true;
 	}
@@ -345,11 +347,12 @@ typedef const void *(*emap_ptr_getter)(void *ctx, size_t ind);
  * This allows size-checking assertions, which we can only do while we're in the
  * process of edata lookups.
  */
-typedef void (*emap_metadata_visitor)(void *ctx, emap_full_alloc_ctx_t *alloc_ctx);
+typedef void (*emap_metadata_visitor)(
+    void *ctx, emap_full_alloc_ctx_t *alloc_ctx);
 
 typedef union emap_batch_lookup_result_u emap_batch_lookup_result_t;
 union emap_batch_lookup_result_u {
-	edata_t *edata;
+	edata_t          *edata;
 	rtree_leaf_elm_t *rtree_leaf;
 };
 
@@ -375,8 +378,8 @@ emap_edata_lookup_batch(tsd_t *tsd, emap_t *emap, size_t nptrs,
 
 	for (size_t i = 0; i < nptrs; i++) {
 		rtree_leaf_elm_t *elm = result[i].rtree_leaf;
-		rtree_contents_t contents = rtree_leaf_elm_read(tsd_tsdn(tsd),
-		    &emap->rtree, elm, /* dependent */ true);
+		rtree_contents_t  contents = rtree_leaf_elm_read(
+                    tsd_tsdn(tsd), &emap->rtree, elm, /* dependent */ true);
 		result[i].edata = contents.edata;
 		emap_full_alloc_ctx_t alloc_ctx;
 		/*
diff --git a/include/jemalloc/internal/emitter.h b/include/jemalloc/internal/emitter.h
index 11153254..a4073e6a 100644
--- a/include/jemalloc/internal/emitter.h
+++ b/include/jemalloc/internal/emitter.h
@@ -44,18 +44,18 @@ typedef struct emitter_col_s emitter_col_t;
 struct emitter_col_s {
 	/* Filled in by the user. */
 	emitter_justify_t justify;
-	int width;
-	emitter_type_t type;
+	int               width;
+	emitter_type_t    type;
 	union {
-		bool bool_val;
-		int int_val;
-		unsigned unsigned_val;
-		uint32_t uint32_val;
-		uint32_t uint32_t_val;
-		uint64_t uint64_val;
-		uint64_t uint64_t_val;
-		size_t size_val;
-		ssize_t ssize_val;
+		bool        bool_val;
+		int         int_val;
+		unsigned    unsigned_val;
+		uint32_t    uint32_val;
+		uint32_t    uint32_t_val;
+		uint64_t    uint64_val;
+		uint64_t    uint64_t_val;
+		size_t      size_val;
+		ssize_t     ssize_val;
 		const char *str_val;
 	};
 
@@ -73,8 +73,8 @@ struct emitter_s {
 	emitter_output_t output;
 	/* The output information. */
 	write_cb_t *write_cb;
-	void *cbopaque;
-	int nesting_depth;
+	void       *cbopaque;
+	int         nesting_depth;
 	/* True if we've already emitted a value at the given depth. */
 	bool item_at_depth;
 	/* True if we emitted a key and will emit corresponding value next. */
@@ -83,8 +83,8 @@ struct emitter_s {
 
 static inline bool
 emitter_outputs_json(emitter_t *emitter) {
-	return emitter->output == emitter_output_json ||
-	    emitter->output == emitter_output_json_compact;
+	return emitter->output == emitter_output_json
+	    || emitter->output == emitter_output_json_compact;
 }
 
 /* Internal convenience function.  Write to the emitter the given string. */
@@ -98,23 +98,23 @@ emitter_printf(emitter_t *emitter, const char *format, ...) {
 	va_end(ap);
 }
 
-static inline const char * JEMALLOC_FORMAT_ARG(3)
-emitter_gen_fmt(char *out_fmt, size_t out_size, const char *fmt_specifier,
-    emitter_justify_t justify, int width) {
+static inline const char *
+JEMALLOC_FORMAT_ARG(3) emitter_gen_fmt(char *out_fmt, size_t out_size,
+    const char *fmt_specifier, emitter_justify_t justify, int width) {
 	size_t written;
 	fmt_specifier++;
 	if (justify == emitter_justify_none) {
-		written = malloc_snprintf(out_fmt, out_size,
-		    "%%%s", fmt_specifier);
+		written = malloc_snprintf(
+		    out_fmt, out_size, "%%%s", fmt_specifier);
 	} else if (justify == emitter_justify_left) {
-		written = malloc_snprintf(out_fmt, out_size,
-		    "%%-%d%s", width, fmt_specifier);
+		written = malloc_snprintf(
+		    out_fmt, out_size, "%%-%d%s", width, fmt_specifier);
 	} else {
-		written = malloc_snprintf(out_fmt, out_size,
-		    "%%%d%s", width, fmt_specifier);
+		written = malloc_snprintf(
+		    out_fmt, out_size, "%%%d%s", width, fmt_specifier);
 	}
 	/* Only happens in case of bad format string, which *we* choose. */
-	assert(written <  out_size);
+	assert(written < out_size);
 	return out_fmt;
 }
 
@@ -122,10 +122,10 @@ static inline void
 emitter_emit_str(emitter_t *emitter, emitter_justify_t justify, int width,
     char *fmt, size_t fmt_size, const char *str) {
 #define BUF_SIZE 256
-	char buf[BUF_SIZE];
+	char   buf[BUF_SIZE];
 	size_t str_written = malloc_snprintf(buf, BUF_SIZE, "\"%s\"", str);
-	emitter_printf(emitter,
-	    emitter_gen_fmt(fmt, fmt_size, "%s", justify, width), buf);
+	emitter_printf(
+	    emitter, emitter_gen_fmt(fmt, fmt_size, "%s", justify, width), buf);
 	if (str_written < BUF_SIZE) {
 		return;
 	}
@@ -168,16 +168,16 @@ emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width,
 	 */
 	char fmt[FMT_SIZE];
 
-#define EMIT_SIMPLE(type, format)					\
-	emitter_printf(emitter,						\
-	    emitter_gen_fmt(fmt, FMT_SIZE, format, justify, width),	\
+#define EMIT_SIMPLE(type, format)                                              \
+	emitter_printf(emitter,                                                \
+	    emitter_gen_fmt(fmt, FMT_SIZE, format, justify, width),            \
 	    *(const type *)value);
 
 	switch (value_type) {
 	case emitter_type_bool:
 		emitter_printf(emitter,
 		    emitter_gen_fmt(fmt, FMT_SIZE, "%s", justify, width),
-		    *(const bool *)value ?  "true" : "false");
+		    *(const bool *)value ? "true" : "false");
 		break;
 	case emitter_type_int:
 		EMIT_SIMPLE(int, "%d")
@@ -213,7 +213,6 @@ emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width,
 #undef FMT_SIZE
 }
 
-
 /* Internal functions.  In json mode, tracks nesting state. */
 static inline void
 emitter_nest_inc(emitter_t *emitter) {
@@ -229,7 +228,7 @@ emitter_nest_dec(emitter_t *emitter) {
 
 static inline void
 emitter_indent(emitter_t *emitter) {
-	int amount = emitter->nesting_depth;
+	int         amount = emitter->nesting_depth;
 	const char *indent_str;
 	assert(emitter->output != emitter_output_json_compact);
 	if (emitter->output == emitter_output_json) {
@@ -291,12 +290,12 @@ emitter_json_key(emitter_t *emitter, const char *json_key) {
 }
 
 static inline void
-emitter_json_value(emitter_t *emitter, emitter_type_t value_type,
-    const void *value) {
+emitter_json_value(
+    emitter_t *emitter, emitter_type_t value_type, const void *value) {
 	if (emitter_outputs_json(emitter)) {
 		emitter_json_key_prefix(emitter);
-		emitter_print_value(emitter, emitter_justify_none, -1,
-		    value_type, value);
+		emitter_print_value(
+		    emitter, emitter_justify_none, -1, value_type, value);
 		emitter->item_at_depth = true;
 	}
 }
@@ -367,7 +366,6 @@ emitter_json_object_end(emitter_t *emitter) {
 	}
 }
 
-
 /******************************************************************************/
 /* Table public API. */
 
@@ -389,14 +387,13 @@ emitter_table_dict_end(emitter_t *emitter) {
 
 static inline void
 emitter_table_kv_note(emitter_t *emitter, const char *table_key,
-    emitter_type_t value_type, const void *value,
-    const char *table_note_key, emitter_type_t table_note_value_type,
-    const void *table_note_value) {
+    emitter_type_t value_type, const void *value, const char *table_note_key,
+    emitter_type_t table_note_value_type, const void *table_note_value) {
 	if (emitter->output == emitter_output_table) {
 		emitter_indent(emitter);
 		emitter_printf(emitter, "%s: ", table_key);
-		emitter_print_value(emitter, emitter_justify_none, -1,
-		    value_type, value);
+		emitter_print_value(
+		    emitter, emitter_justify_none, -1, value_type, value);
 		if (table_note_key != NULL) {
 			emitter_printf(emitter, " (%s: ", table_note_key);
 			emitter_print_value(emitter, emitter_justify_none, -1,
@@ -415,7 +412,6 @@ emitter_table_kv(emitter_t *emitter, const char *table_key,
 	    emitter_type_bool, NULL);
 }
 
-
 /* Write to the emitter the given string, but only in table mode. */
 JEMALLOC_FORMAT_PRINTF(2, 3)
 static inline void
@@ -423,7 +419,8 @@ emitter_table_printf(emitter_t *emitter, const char *format, ...) {
 	if (emitter->output == emitter_output_table) {
 		va_list ap;
 		va_start(ap, format);
-		malloc_vcprintf(emitter->write_cb, emitter->cbopaque, format, ap);
+		malloc_vcprintf(
+		    emitter->write_cb, emitter->cbopaque, format, ap);
 		va_end(ap);
 	}
 }
@@ -434,7 +431,7 @@ emitter_table_row(emitter_t *emitter, emitter_row_t *row) {
 		return;
 	}
 	emitter_col_t *col;
-	ql_foreach(col, &row->cols, link) {
+	ql_foreach (col, &row->cols, link) {
 		emitter_print_value(emitter, col->justify, col->width,
 		    col->type, (const void *)&col->bool_val);
 	}
@@ -452,7 +449,6 @@ emitter_col_init(emitter_col_t *col, emitter_row_t *row) {
 	ql_tail_insert(&row->cols, col, link);
 }
 
-
 /******************************************************************************/
 /*
  * Generalized public API. Emits using either JSON or table, according to
@@ -464,9 +460,8 @@ emitter_col_init(emitter_col_t *col, emitter_row_t *row) {
  */
 static inline void
 emitter_kv_note(emitter_t *emitter, const char *json_key, const char *table_key,
-    emitter_type_t value_type, const void *value,
-    const char *table_note_key, emitter_type_t table_note_value_type,
-    const void *table_note_value) {
+    emitter_type_t value_type, const void *value, const char *table_note_key,
+    emitter_type_t table_note_value_type, const void *table_note_value) {
 	if (emitter_outputs_json(emitter)) {
 		emitter_json_key(emitter, json_key);
 		emitter_json_value(emitter, value_type, value);
@@ -485,8 +480,8 @@ emitter_kv(emitter_t *emitter, const char *json_key, const char *table_key,
 }
 
 static inline void
-emitter_dict_begin(emitter_t *emitter, const char *json_key,
-    const char *table_header) {
+emitter_dict_begin(
+    emitter_t *emitter, const char *json_key, const char *table_header) {
 	if (emitter_outputs_json(emitter)) {
 		emitter_json_key(emitter, json_key);
 		emitter_json_object_begin(emitter);
@@ -526,8 +521,9 @@ emitter_end(emitter_t *emitter) {
 	if (emitter_outputs_json(emitter)) {
 		assert(emitter->nesting_depth == 1);
 		emitter_nest_dec(emitter);
-		emitter_printf(emitter, "%s", emitter->output ==
-		    emitter_output_json_compact ? "}" : "\n}\n");
+		emitter_printf(emitter, "%s",
+		    emitter->output == emitter_output_json_compact ? "}"
+		                                                   : "\n}\n");
 	}
 }
 
diff --git a/include/jemalloc/internal/exp_grow.h b/include/jemalloc/internal/exp_grow.h
index 40a1add0..8206ba85 100644
--- a/include/jemalloc/internal/exp_grow.h
+++ b/include/jemalloc/internal/exp_grow.h
@@ -27,8 +27,7 @@ exp_grow_size_prepare(exp_grow_t *exp_grow, size_t alloc_size_min,
 	*r_alloc_size = sz_pind2sz(exp_grow->next + *r_skip);
 	while (*r_alloc_size < alloc_size_min) {
 		(*r_skip)++;
-		if (exp_grow->next + *r_skip  >=
-		    sz_psz2ind(SC_LARGE_MAXCLASS)) {
+		if (exp_grow->next + *r_skip >= sz_psz2ind(SC_LARGE_MAXCLASS)) {
 			/* Outside legal range. */
 			return true;
 		}
@@ -44,7 +43,6 @@ exp_grow_size_commit(exp_grow_t *exp_grow, pszind_t skip) {
 	} else {
 		exp_grow->next = exp_grow->limit;
 	}
-
 }
 
 void exp_grow_init(exp_grow_t *exp_grow);
diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index be61db8d..e81dff2c 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -26,9 +26,10 @@ extern size_t opt_process_madvise_max_batch;
 
 #ifdef JEMALLOC_HAVE_PROCESS_MADVISE
 /* The iovec is on stack.  Limit the max batch to avoid stack overflow. */
-#define PROCESS_MADVISE_MAX_BATCH_LIMIT (VARIABLE_ARRAY_SIZE_MAX / sizeof(struct iovec))
+#	define PROCESS_MADVISE_MAX_BATCH_LIMIT                                \
+		(VARIABLE_ARRAY_SIZE_MAX / sizeof(struct iovec))
 #else
-#define PROCESS_MADVISE_MAX_BATCH_LIMIT 0
+#	define PROCESS_MADVISE_MAX_BATCH_LIMIT 0
 #endif
 
 edata_t *ecache_alloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
@@ -37,44 +38,43 @@ edata_t *ecache_alloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 edata_t *ecache_alloc_grow(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *expand_edata, size_t size, size_t alignment,
     bool zero, bool guarded);
-void ecache_dalloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, edata_t *edata);
+void ecache_dalloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
+    edata_t *edata);
 edata_t *ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, size_t npages_min);
 
 void extent_gdump_add(tsdn_t *tsdn, const edata_t *edata);
 void extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
     edata_t *edata);
-void extent_dalloc_gap(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *edata);
+void extent_dalloc_gap(
+    tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata);
 edata_t *extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     void *new_addr, size_t size, size_t alignment, bool zero, bool *commit,
     bool growing_retained);
-void extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *edata);
-void extent_dalloc_wrapper_purged(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *edata);
-void extent_destroy_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *edata);
+void     extent_dalloc_wrapper(
+        tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata);
+void extent_dalloc_wrapper_purged(
+    tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata);
+void extent_destroy_wrapper(
+    tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata);
 bool extent_commit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length);
 bool extent_purge_lazy_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length);
 bool extent_purge_forced_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length);
-edata_t *extent_split_wrapper(tsdn_t *tsdn, pac_t *pac,
-    ehooks_t *ehooks, edata_t *edata, size_t size_a, size_t size_b,
-    bool holding_core_locks);
-bool extent_merge_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *a, edata_t *b);
-bool extent_commit_zero(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    bool commit, bool zero, bool growing_retained);
+edata_t *extent_split_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
+    edata_t *edata, size_t size_a, size_t size_b, bool holding_core_locks);
+bool     extent_merge_wrapper(
+        tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *a, edata_t *b);
+bool   extent_commit_zero(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
+      bool commit, bool zero, bool growing_retained);
 size_t extent_sn_next(pac_t *pac);
-bool extent_boot(void);
+bool   extent_boot(void);
 
 JEMALLOC_ALWAYS_INLINE bool
-extent_neighbor_head_state_mergeable(bool edata_is_head,
-    bool neighbor_is_head, bool forward) {
+extent_neighbor_head_state_mergeable(
+    bool edata_is_head, bool neighbor_is_head, bool forward) {
 	/*
 	 * Head states checking: disallow merging if the higher addr extent is a
 	 * head extent.  This helps preserve first-fit, and more importantly
@@ -102,8 +102,8 @@ extent_can_acquire_neighbor(edata_t *edata, rtree_contents_t contents,
 	}
 	/* It's not safe to access *neighbor yet; must verify states first. */
 	bool neighbor_is_head = contents.metadata.is_head;
-	if (!extent_neighbor_head_state_mergeable(edata_is_head_get(edata),
-	    neighbor_is_head, forward)) {
+	if (!extent_neighbor_head_state_mergeable(
+	        edata_is_head_get(edata), neighbor_is_head, forward)) {
 		return false;
 	}
 	extent_state_t neighbor_state = contents.metadata.state;
@@ -112,8 +112,9 @@ extent_can_acquire_neighbor(edata_t *edata, rtree_contents_t contents,
 			return false;
 		}
 		/* From this point, it's safe to access *neighbor. */
-		if (!expanding && (edata_committed_get(edata) !=
-		    edata_committed_get(neighbor))) {
+		if (!expanding
+		    && (edata_committed_get(edata)
+		        != edata_committed_get(neighbor))) {
 			/*
 			 * Some platforms (e.g. Windows) require an explicit
 			 * commit step (and writing to uncommitted memory is not
@@ -133,11 +134,11 @@ extent_can_acquire_neighbor(edata_t *edata, rtree_contents_t contents,
 		return false;
 	}
 	if (opt_retain) {
-		assert(edata_arena_ind_get(edata) ==
-		    edata_arena_ind_get(neighbor));
+		assert(edata_arena_ind_get(edata)
+		    == edata_arena_ind_get(neighbor));
 	} else {
-		if (edata_arena_ind_get(edata) !=
-		    edata_arena_ind_get(neighbor)) {
+		if (edata_arena_ind_get(edata)
+		    != edata_arena_ind_get(neighbor)) {
 			return false;
 		}
 	}
diff --git a/include/jemalloc/internal/extent_dss.h b/include/jemalloc/internal/extent_dss.h
index c8e71e82..4bb3f51d 100644
--- a/include/jemalloc/internal/extent_dss.h
+++ b/include/jemalloc/internal/extent_dss.h
@@ -6,11 +6,11 @@
 #include "jemalloc/internal/tsd_types.h"
 
 typedef enum {
-	dss_prec_disabled  = 0,
-	dss_prec_primary   = 1,
+	dss_prec_disabled = 0,
+	dss_prec_primary = 1,
 	dss_prec_secondary = 2,
 
-	dss_prec_limit     = 3
+	dss_prec_limit = 3
 } dss_prec_t;
 #define DSS_PREC_DEFAULT dss_prec_secondary
 #define DSS_DEFAULT "secondary"
@@ -20,11 +20,11 @@ extern const char *const dss_prec_names[];
 extern const char *opt_dss;
 
 dss_prec_t extent_dss_prec_get(void);
-bool extent_dss_prec_set(dss_prec_t dss_prec);
-void *extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr,
-    size_t size, size_t alignment, bool *zero, bool *commit);
-bool extent_in_dss(void *addr);
-bool extent_dss_mergeable(void *addr_a, void *addr_b);
-void extent_dss_boot(void);
+bool       extent_dss_prec_set(dss_prec_t dss_prec);
+void      *extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr,
+         size_t size, size_t alignment, bool *zero, bool *commit);
+bool       extent_in_dss(void *addr);
+bool       extent_dss_mergeable(void *addr_a, void *addr_b);
+void       extent_dss_boot(void);
 
 #endif /* JEMALLOC_INTERNAL_EXTENT_DSS_H */
diff --git a/include/jemalloc/internal/extent_mmap.h b/include/jemalloc/internal/extent_mmap.h
index e6a4649e..aa469896 100644
--- a/include/jemalloc/internal/extent_mmap.h
+++ b/include/jemalloc/internal/extent_mmap.h
@@ -5,8 +5,8 @@
 
 extern bool opt_retain;
 
-void *extent_alloc_mmap(void *new_addr, size_t size, size_t alignment,
-    bool *zero, bool *commit);
+void *extent_alloc_mmap(
+    void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit);
 bool extent_dalloc_mmap(void *addr, size_t size);
 
 #endif /* JEMALLOC_INTERNAL_EXTENT_MMAP_EXTERNS_H */
diff --git a/include/jemalloc/internal/fb.h b/include/jemalloc/internal/fb.h
index e38095af..bf76f362 100644
--- a/include/jemalloc/internal/fb.h
+++ b/include/jemalloc/internal/fb.h
@@ -15,8 +15,8 @@
 
 typedef unsigned long fb_group_t;
 #define FB_GROUP_BITS (ZU(1) << (LG_SIZEOF_LONG + 3))
-#define FB_NGROUPS(nbits) ((nbits) / FB_GROUP_BITS \
-    + ((nbits) % FB_GROUP_BITS == 0 ? 0 : 1))
+#define FB_NGROUPS(nbits)                                                      \
+	((nbits) / FB_GROUP_BITS + ((nbits) % FB_GROUP_BITS == 0 ? 0 : 1))
 
 static inline void
 fb_init(fb_group_t *fb, size_t nbits) {
@@ -75,7 +75,6 @@ fb_unset(fb_group_t *fb, size_t nbits, size_t bit) {
 	fb[group_ind] &= ~((fb_group_t)1 << bit_ind);
 }
 
-
 /*
  * Some implementation details.  This visitation function lets us apply a group
  * visitor to each group in the bitmap (potentially modifying it).  The mask
@@ -94,7 +93,8 @@ fb_visit_impl(fb_group_t *fb, size_t nbits, fb_group_visitor_t visit, void *ctx,
 	 * to from bit 0.
 	 */
 	size_t first_group_cnt = (start_bit_ind + cnt > FB_GROUP_BITS
-		? FB_GROUP_BITS - start_bit_ind : cnt);
+	        ? FB_GROUP_BITS - start_bit_ind
+	        : cnt);
 	/*
 	 * We can basically split affected words into:
 	 *   - The first group, where we touch only the high bits
@@ -104,8 +104,8 @@ fb_visit_impl(fb_group_t *fb, size_t nbits, fb_group_visitor_t visit, void *ctx,
 	 * this can lead to bad codegen for those middle words.
 	 */
 	/* First group */
-	fb_group_t mask = ((~(fb_group_t)0)
-	    >> (FB_GROUP_BITS - first_group_cnt))
+	fb_group_t mask =
+	    ((~(fb_group_t)0) >> (FB_GROUP_BITS - first_group_cnt))
 	    << start_bit_ind;
 	visit(ctx, &fb[group_ind], mask);
 
@@ -176,12 +176,12 @@ fb_ucount(fb_group_t *fb, size_t nbits, size_t start, size_t cnt) {
  * Returns the number of bits in the bitmap if no such bit exists.
  */
 JEMALLOC_ALWAYS_INLINE ssize_t
-fb_find_impl(fb_group_t *fb, size_t nbits, size_t start, bool val,
-    bool forward) {
+fb_find_impl(
+    fb_group_t *fb, size_t nbits, size_t start, bool val, bool forward) {
 	assert(start < nbits);
-	size_t ngroups = FB_NGROUPS(nbits);
+	size_t  ngroups = FB_NGROUPS(nbits);
 	ssize_t group_ind = start / FB_GROUP_BITS;
-	size_t bit_ind = start % FB_GROUP_BITS;
+	size_t  bit_ind = start % FB_GROUP_BITS;
 
 	fb_group_t maybe_invert = (val ? 0 : (fb_group_t)-1);
 
@@ -265,8 +265,8 @@ fb_iter_range_impl(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
 		return false;
 	}
 	/* Half open range; the set bits are [begin, end). */
-	ssize_t next_range_end = fb_find_impl(fb, nbits, next_range_begin, !val,
-	    forward);
+	ssize_t next_range_end = fb_find_impl(
+	    fb, nbits, next_range_begin, !val, forward);
 	if (forward) {
 		*r_begin = next_range_begin;
 		*r_len = next_range_end - next_range_begin;
@@ -324,8 +324,9 @@ fb_range_longest_impl(fb_group_t *fb, size_t nbits, bool val) {
 	size_t begin = 0;
 	size_t longest_len = 0;
 	size_t len = 0;
-	while (begin < nbits && fb_iter_range_impl(fb, nbits, begin, &begin,
-	    &len, val, /* forward */ true)) {
+	while (begin < nbits
+	    && fb_iter_range_impl(
+	        fb, nbits, begin, &begin, &len, val, /* forward */ true)) {
 		if (len > longest_len) {
 			longest_len = len;
 		}
diff --git a/include/jemalloc/internal/fxp.h b/include/jemalloc/internal/fxp.h
index e42425f9..8ca4f3c6 100644
--- a/include/jemalloc/internal/fxp.h
+++ b/include/jemalloc/internal/fxp.h
@@ -89,7 +89,7 @@ fxp_round_down(fxp_t a) {
 
 static inline uint32_t
 fxp_round_nearest(fxp_t a) {
-	uint32_t fractional_part = (a  & ((1U << 16) - 1));
+	uint32_t fractional_part = (a & ((1U << 16) - 1));
 	uint32_t increment = (uint32_t)(fractional_part >= (1U << 15));
 	return (a >> 16) + increment;
 }
diff --git a/include/jemalloc/internal/hash.h b/include/jemalloc/internal/hash.h
index 15162b94..73e2214e 100644
--- a/include/jemalloc/internal/hash.h
+++ b/include/jemalloc/internal/hash.h
@@ -25,7 +25,7 @@ hash_rotl_64(uint64_t x, int8_t r) {
 static inline uint32_t
 hash_get_block_32(const uint32_t *p, int i) {
 	/* Handle unaligned read. */
-	if (unlikely((uintptr_t)p & (sizeof(uint32_t)-1)) != 0) {
+	if (unlikely((uintptr_t)p & (sizeof(uint32_t) - 1)) != 0) {
 		uint32_t ret;
 
 		memcpy(&ret, (uint8_t *)(p + i), sizeof(uint32_t));
@@ -38,7 +38,7 @@ hash_get_block_32(const uint32_t *p, int i) {
 static inline uint64_t
 hash_get_block_64(const uint64_t *p, int i) {
 	/* Handle unaligned read. */
-	if (unlikely((uintptr_t)p & (sizeof(uint64_t)-1)) != 0) {
+	if (unlikely((uintptr_t)p & (sizeof(uint64_t) - 1)) != 0) {
 		uint64_t ret;
 
 		memcpy(&ret, (uint8_t *)(p + i), sizeof(uint64_t));
@@ -72,8 +72,8 @@ hash_fmix_64(uint64_t k) {
 
 static inline uint32_t
 hash_x86_32(const void *key, int len, uint32_t seed) {
-	const uint8_t *data = (const uint8_t *) key;
-	const int nblocks = len / 4;
+	const uint8_t *data = (const uint8_t *)key;
+	const int      nblocks = len / 4;
 
 	uint32_t h1 = seed;
 
@@ -82,8 +82,8 @@ hash_x86_32(const void *key, int len, uint32_t seed) {
 
 	/* body */
 	{
-		const uint32_t *blocks = (const uint32_t *) (data + nblocks*4);
-		int i;
+		const uint32_t *blocks = (const uint32_t *)(data + nblocks * 4);
+		int             i;
 
 		for (i = -nblocks; i; i++) {
 			uint32_t k1 = hash_get_block_32(blocks, i);
@@ -94,21 +94,29 @@ hash_x86_32(const void *key, int len, uint32_t seed) {
 
 			h1 ^= k1;
 			h1 = hash_rotl_32(h1, 13);
-			h1 = h1*5 + 0xe6546b64;
+			h1 = h1 * 5 + 0xe6546b64;
 		}
 	}
 
 	/* tail */
 	{
-		const uint8_t *tail = (const uint8_t *) (data + nblocks*4);
+		const uint8_t *tail = (const uint8_t *)(data + nblocks * 4);
 
 		uint32_t k1 = 0;
 
 		switch (len & 3) {
-		case 3: k1 ^= tail[2] << 16; JEMALLOC_FALLTHROUGH;
-		case 2: k1 ^= tail[1] << 8; JEMALLOC_FALLTHROUGH;
-		case 1: k1 ^= tail[0]; k1 *= c1; k1 = hash_rotl_32(k1, 15);
-			k1 *= c2; h1 ^= k1;
+		case 3:
+			k1 ^= tail[2] << 16;
+			JEMALLOC_FALLTHROUGH;
+		case 2:
+			k1 ^= tail[1] << 8;
+			JEMALLOC_FALLTHROUGH;
+		case 1:
+			k1 ^= tail[0];
+			k1 *= c1;
+			k1 = hash_rotl_32(k1, 15);
+			k1 *= c2;
+			h1 ^= k1;
 		}
 	}
 
@@ -121,10 +129,9 @@ hash_x86_32(const void *key, int len, uint32_t seed) {
 }
 
 static inline void
-hash_x86_128(const void *key, const int len, uint32_t seed,
-    uint64_t r_out[2]) {
-	const uint8_t * data = (const uint8_t *) key;
-	const int nblocks = len / 16;
+hash_x86_128(const void *key, const int len, uint32_t seed, uint64_t r_out[2]) {
+	const uint8_t *data = (const uint8_t *)key;
+	const int      nblocks = len / 16;
 
 	uint32_t h1 = seed;
 	uint32_t h2 = seed;
@@ -138,95 +145,161 @@ hash_x86_128(const void *key, const int len, uint32_t seed,
 
 	/* body */
 	{
-		const uint32_t *blocks = (const uint32_t *) (data + nblocks*16);
-		int i;
+		const uint32_t *blocks = (const uint32_t *)(data
+		    + nblocks * 16);
+		int             i;
 
 		for (i = -nblocks; i; i++) {
-			uint32_t k1 = hash_get_block_32(blocks, i*4 + 0);
-			uint32_t k2 = hash_get_block_32(blocks, i*4 + 1);
-			uint32_t k3 = hash_get_block_32(blocks, i*4 + 2);
-			uint32_t k4 = hash_get_block_32(blocks, i*4 + 3);
+			uint32_t k1 = hash_get_block_32(blocks, i * 4 + 0);
+			uint32_t k2 = hash_get_block_32(blocks, i * 4 + 1);
+			uint32_t k3 = hash_get_block_32(blocks, i * 4 + 2);
+			uint32_t k4 = hash_get_block_32(blocks, i * 4 + 3);
 
-			k1 *= c1; k1 = hash_rotl_32(k1, 15); k1 *= c2; h1 ^= k1;
+			k1 *= c1;
+			k1 = hash_rotl_32(k1, 15);
+			k1 *= c2;
+			h1 ^= k1;
 
-			h1 = hash_rotl_32(h1, 19); h1 += h2;
-			h1 = h1*5 + 0x561ccd1b;
+			h1 = hash_rotl_32(h1, 19);
+			h1 += h2;
+			h1 = h1 * 5 + 0x561ccd1b;
 
-			k2 *= c2; k2 = hash_rotl_32(k2, 16); k2 *= c3; h2 ^= k2;
+			k2 *= c2;
+			k2 = hash_rotl_32(k2, 16);
+			k2 *= c3;
+			h2 ^= k2;
 
-			h2 = hash_rotl_32(h2, 17); h2 += h3;
-			h2 = h2*5 + 0x0bcaa747;
+			h2 = hash_rotl_32(h2, 17);
+			h2 += h3;
+			h2 = h2 * 5 + 0x0bcaa747;
 
-			k3 *= c3; k3 = hash_rotl_32(k3, 17); k3 *= c4; h3 ^= k3;
+			k3 *= c3;
+			k3 = hash_rotl_32(k3, 17);
+			k3 *= c4;
+			h3 ^= k3;
 
-			h3 = hash_rotl_32(h3, 15); h3 += h4;
-			h3 = h3*5 + 0x96cd1c35;
+			h3 = hash_rotl_32(h3, 15);
+			h3 += h4;
+			h3 = h3 * 5 + 0x96cd1c35;
 
-			k4 *= c4; k4 = hash_rotl_32(k4, 18); k4 *= c1; h4 ^= k4;
+			k4 *= c4;
+			k4 = hash_rotl_32(k4, 18);
+			k4 *= c1;
+			h4 ^= k4;
 
-			h4 = hash_rotl_32(h4, 13); h4 += h1;
-			h4 = h4*5 + 0x32ac3b17;
+			h4 = hash_rotl_32(h4, 13);
+			h4 += h1;
+			h4 = h4 * 5 + 0x32ac3b17;
 		}
 	}
 
 	/* tail */
 	{
-		const uint8_t *tail = (const uint8_t *) (data + nblocks*16);
-		uint32_t k1 = 0;
-		uint32_t k2 = 0;
-		uint32_t k3 = 0;
-		uint32_t k4 = 0;
+		const uint8_t *tail = (const uint8_t *)(data + nblocks * 16);
+		uint32_t       k1 = 0;
+		uint32_t       k2 = 0;
+		uint32_t       k3 = 0;
+		uint32_t       k4 = 0;
 
 		switch (len & 15) {
-		case 15: k4 ^= tail[14] << 16; JEMALLOC_FALLTHROUGH;
-		case 14: k4 ^= tail[13] << 8; JEMALLOC_FALLTHROUGH;
-		case 13: k4 ^= tail[12] << 0;
-			k4 *= c4; k4 = hash_rotl_32(k4, 18); k4 *= c1; h4 ^= k4;
+		case 15:
+			k4 ^= tail[14] << 16;
 			JEMALLOC_FALLTHROUGH;
-		case 12: k3 ^= (uint32_t) tail[11] << 24; JEMALLOC_FALLTHROUGH;
-		case 11: k3 ^= tail[10] << 16; JEMALLOC_FALLTHROUGH;
-		case 10: k3 ^= tail[ 9] << 8; JEMALLOC_FALLTHROUGH;
-		case  9: k3 ^= tail[ 8] << 0;
-			k3 *= c3; k3 = hash_rotl_32(k3, 17); k3 *= c4; h3 ^= k3;
+		case 14:
+			k4 ^= tail[13] << 8;
 			JEMALLOC_FALLTHROUGH;
-		case  8: k2 ^= (uint32_t) tail[ 7] << 24; JEMALLOC_FALLTHROUGH;
-		case  7: k2 ^= tail[ 6] << 16; JEMALLOC_FALLTHROUGH;
-		case  6: k2 ^= tail[ 5] << 8; JEMALLOC_FALLTHROUGH;
-		case  5: k2 ^= tail[ 4] << 0;
-			k2 *= c2; k2 = hash_rotl_32(k2, 16); k2 *= c3; h2 ^= k2;
+		case 13:
+			k4 ^= tail[12] << 0;
+			k4 *= c4;
+			k4 = hash_rotl_32(k4, 18);
+			k4 *= c1;
+			h4 ^= k4;
 			JEMALLOC_FALLTHROUGH;
-		case  4: k1 ^= (uint32_t) tail[ 3] << 24; JEMALLOC_FALLTHROUGH;
-		case  3: k1 ^= tail[ 2] << 16; JEMALLOC_FALLTHROUGH;
-		case  2: k1 ^= tail[ 1] << 8; JEMALLOC_FALLTHROUGH;
-		case  1: k1 ^= tail[ 0] << 0;
-			k1 *= c1; k1 = hash_rotl_32(k1, 15); k1 *= c2; h1 ^= k1;
+		case 12:
+			k3 ^= (uint32_t)tail[11] << 24;
+			JEMALLOC_FALLTHROUGH;
+		case 11:
+			k3 ^= tail[10] << 16;
+			JEMALLOC_FALLTHROUGH;
+		case 10:
+			k3 ^= tail[9] << 8;
+			JEMALLOC_FALLTHROUGH;
+		case 9:
+			k3 ^= tail[8] << 0;
+			k3 *= c3;
+			k3 = hash_rotl_32(k3, 17);
+			k3 *= c4;
+			h3 ^= k3;
+			JEMALLOC_FALLTHROUGH;
+		case 8:
+			k2 ^= (uint32_t)tail[7] << 24;
+			JEMALLOC_FALLTHROUGH;
+		case 7:
+			k2 ^= tail[6] << 16;
+			JEMALLOC_FALLTHROUGH;
+		case 6:
+			k2 ^= tail[5] << 8;
+			JEMALLOC_FALLTHROUGH;
+		case 5:
+			k2 ^= tail[4] << 0;
+			k2 *= c2;
+			k2 = hash_rotl_32(k2, 16);
+			k2 *= c3;
+			h2 ^= k2;
+			JEMALLOC_FALLTHROUGH;
+		case 4:
+			k1 ^= (uint32_t)tail[3] << 24;
+			JEMALLOC_FALLTHROUGH;
+		case 3:
+			k1 ^= tail[2] << 16;
+			JEMALLOC_FALLTHROUGH;
+		case 2:
+			k1 ^= tail[1] << 8;
+			JEMALLOC_FALLTHROUGH;
+		case 1:
+			k1 ^= tail[0] << 0;
+			k1 *= c1;
+			k1 = hash_rotl_32(k1, 15);
+			k1 *= c2;
+			h1 ^= k1;
 			break;
 		}
 	}
 
 	/* finalization */
-	h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len;
+	h1 ^= len;
+	h2 ^= len;
+	h3 ^= len;
+	h4 ^= len;
 
-	h1 += h2; h1 += h3; h1 += h4;
-	h2 += h1; h3 += h1; h4 += h1;
+	h1 += h2;
+	h1 += h3;
+	h1 += h4;
+	h2 += h1;
+	h3 += h1;
+	h4 += h1;
 
 	h1 = hash_fmix_32(h1);
 	h2 = hash_fmix_32(h2);
 	h3 = hash_fmix_32(h3);
 	h4 = hash_fmix_32(h4);
 
-	h1 += h2; h1 += h3; h1 += h4;
-	h2 += h1; h3 += h1; h4 += h1;
+	h1 += h2;
+	h1 += h3;
+	h1 += h4;
+	h2 += h1;
+	h3 += h1;
+	h4 += h1;
 
-	r_out[0] = (((uint64_t) h2) << 32) | h1;
-	r_out[1] = (((uint64_t) h4) << 32) | h3;
+	r_out[0] = (((uint64_t)h2) << 32) | h1;
+	r_out[1] = (((uint64_t)h4) << 32) | h3;
 }
 
 static inline void
-hash_x64_128(const void *key, const int len, const uint32_t seed,
-    uint64_t r_out[2]) {
-	const uint8_t *data = (const uint8_t *) key;
-	const int nblocks = len / 16;
+hash_x64_128(
+    const void *key, const int len, const uint32_t seed, uint64_t r_out[2]) {
+	const uint8_t *data = (const uint8_t *)key;
+	const int      nblocks = len / 16;
 
 	uint64_t h1 = seed;
 	uint64_t h2 = seed;
@@ -236,56 +309,99 @@ hash_x64_128(const void *key, const int len, const uint32_t seed,
 
 	/* body */
 	{
-		const uint64_t *blocks = (const uint64_t *) (data);
-		int i;
+		const uint64_t *blocks = (const uint64_t *)(data);
+		int             i;
 
 		for (i = 0; i < nblocks; i++) {
-			uint64_t k1 = hash_get_block_64(blocks, i*2 + 0);
-			uint64_t k2 = hash_get_block_64(blocks, i*2 + 1);
+			uint64_t k1 = hash_get_block_64(blocks, i * 2 + 0);
+			uint64_t k2 = hash_get_block_64(blocks, i * 2 + 1);
 
-			k1 *= c1; k1 = hash_rotl_64(k1, 31); k1 *= c2; h1 ^= k1;
+			k1 *= c1;
+			k1 = hash_rotl_64(k1, 31);
+			k1 *= c2;
+			h1 ^= k1;
 
-			h1 = hash_rotl_64(h1, 27); h1 += h2;
-			h1 = h1*5 + 0x52dce729;
+			h1 = hash_rotl_64(h1, 27);
+			h1 += h2;
+			h1 = h1 * 5 + 0x52dce729;
 
-			k2 *= c2; k2 = hash_rotl_64(k2, 33); k2 *= c1; h2 ^= k2;
+			k2 *= c2;
+			k2 = hash_rotl_64(k2, 33);
+			k2 *= c1;
+			h2 ^= k2;
 
-			h2 = hash_rotl_64(h2, 31); h2 += h1;
-			h2 = h2*5 + 0x38495ab5;
+			h2 = hash_rotl_64(h2, 31);
+			h2 += h1;
+			h2 = h2 * 5 + 0x38495ab5;
 		}
 	}
 
 	/* tail */
 	{
-		const uint8_t *tail = (const uint8_t*)(data + nblocks*16);
-		uint64_t k1 = 0;
-		uint64_t k2 = 0;
+		const uint8_t *tail = (const uint8_t *)(data + nblocks * 16);
+		uint64_t       k1 = 0;
+		uint64_t       k2 = 0;
 
 		switch (len & 15) {
-		case 15: k2 ^= ((uint64_t)(tail[14])) << 48; JEMALLOC_FALLTHROUGH;
-		case 14: k2 ^= ((uint64_t)(tail[13])) << 40; JEMALLOC_FALLTHROUGH;
-		case 13: k2 ^= ((uint64_t)(tail[12])) << 32; JEMALLOC_FALLTHROUGH;
-		case 12: k2 ^= ((uint64_t)(tail[11])) << 24; JEMALLOC_FALLTHROUGH;
-		case 11: k2 ^= ((uint64_t)(tail[10])) << 16; JEMALLOC_FALLTHROUGH;
-		case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8;  JEMALLOC_FALLTHROUGH;
-		case  9: k2 ^= ((uint64_t)(tail[ 8])) << 0;
-			k2 *= c2; k2 = hash_rotl_64(k2, 33); k2 *= c1; h2 ^= k2;
+		case 15:
+			k2 ^= ((uint64_t)(tail[14])) << 48;
 			JEMALLOC_FALLTHROUGH;
-		case  8: k1 ^= ((uint64_t)(tail[ 7])) << 56; JEMALLOC_FALLTHROUGH;
-		case  7: k1 ^= ((uint64_t)(tail[ 6])) << 48; JEMALLOC_FALLTHROUGH;
-		case  6: k1 ^= ((uint64_t)(tail[ 5])) << 40; JEMALLOC_FALLTHROUGH;
-		case  5: k1 ^= ((uint64_t)(tail[ 4])) << 32; JEMALLOC_FALLTHROUGH;
-		case  4: k1 ^= ((uint64_t)(tail[ 3])) << 24; JEMALLOC_FALLTHROUGH;
-		case  3: k1 ^= ((uint64_t)(tail[ 2])) << 16; JEMALLOC_FALLTHROUGH;
-		case  2: k1 ^= ((uint64_t)(tail[ 1])) << 8;  JEMALLOC_FALLTHROUGH;
-		case  1: k1 ^= ((uint64_t)(tail[ 0])) << 0;
-			k1 *= c1; k1 = hash_rotl_64(k1, 31); k1 *= c2; h1 ^= k1;
+		case 14:
+			k2 ^= ((uint64_t)(tail[13])) << 40;
+			JEMALLOC_FALLTHROUGH;
+		case 13:
+			k2 ^= ((uint64_t)(tail[12])) << 32;
+			JEMALLOC_FALLTHROUGH;
+		case 12:
+			k2 ^= ((uint64_t)(tail[11])) << 24;
+			JEMALLOC_FALLTHROUGH;
+		case 11:
+			k2 ^= ((uint64_t)(tail[10])) << 16;
+			JEMALLOC_FALLTHROUGH;
+		case 10:
+			k2 ^= ((uint64_t)(tail[9])) << 8;
+			JEMALLOC_FALLTHROUGH;
+		case 9:
+			k2 ^= ((uint64_t)(tail[8])) << 0;
+			k2 *= c2;
+			k2 = hash_rotl_64(k2, 33);
+			k2 *= c1;
+			h2 ^= k2;
+			JEMALLOC_FALLTHROUGH;
+		case 8:
+			k1 ^= ((uint64_t)(tail[7])) << 56;
+			JEMALLOC_FALLTHROUGH;
+		case 7:
+			k1 ^= ((uint64_t)(tail[6])) << 48;
+			JEMALLOC_FALLTHROUGH;
+		case 6:
+			k1 ^= ((uint64_t)(tail[5])) << 40;
+			JEMALLOC_FALLTHROUGH;
+		case 5:
+			k1 ^= ((uint64_t)(tail[4])) << 32;
+			JEMALLOC_FALLTHROUGH;
+		case 4:
+			k1 ^= ((uint64_t)(tail[3])) << 24;
+			JEMALLOC_FALLTHROUGH;
+		case 3:
+			k1 ^= ((uint64_t)(tail[2])) << 16;
+			JEMALLOC_FALLTHROUGH;
+		case 2:
+			k1 ^= ((uint64_t)(tail[1])) << 8;
+			JEMALLOC_FALLTHROUGH;
+		case 1:
+			k1 ^= ((uint64_t)(tail[0])) << 0;
+			k1 *= c1;
+			k1 = hash_rotl_64(k1, 31);
+			k1 *= c2;
+			h1 ^= k1;
 			break;
 		}
 	}
 
 	/* finalization */
-	h1 ^= len; h2 ^= len;
+	h1 ^= len;
+	h2 ^= len;
 
 	h1 += h2;
 	h2 += h1;
diff --git a/include/jemalloc/internal/hook.h b/include/jemalloc/internal/hook.h
index 76b9130d..bbbcb320 100644
--- a/include/jemalloc/internal/hook.h
+++ b/include/jemalloc/internal/hook.h
@@ -83,7 +83,6 @@ enum hook_dalloc_e {
 };
 typedef enum hook_dalloc_e hook_dalloc_t;
 
-
 enum hook_expand_e {
 	hook_expand_realloc,
 	hook_expand_rallocx,
@@ -91,23 +90,22 @@ enum hook_expand_e {
 };
 typedef enum hook_expand_e hook_expand_t;
 
-typedef void (*hook_alloc)(
-    void *extra, hook_alloc_t type, void *result, uintptr_t result_raw,
-    uintptr_t args_raw[3]);
+typedef void (*hook_alloc)(void *extra, hook_alloc_t type, void *result,
+    uintptr_t result_raw, uintptr_t args_raw[3]);
 
 typedef void (*hook_dalloc)(
     void *extra, hook_dalloc_t type, void *address, uintptr_t args_raw[3]);
 
-typedef void (*hook_expand)(
-    void *extra, hook_expand_t type, void *address, size_t old_usize,
-    size_t new_usize, uintptr_t result_raw, uintptr_t args_raw[4]);
+typedef void (*hook_expand)(void *extra, hook_expand_t type, void *address,
+    size_t old_usize, size_t new_usize, uintptr_t result_raw,
+    uintptr_t args_raw[4]);
 
 typedef struct hooks_s hooks_t;
 struct hooks_s {
-	hook_alloc alloc_hook;
+	hook_alloc  alloc_hook;
 	hook_dalloc dalloc_hook;
 	hook_expand expand_hook;
-	void *extra;
+	void       *extra;
 };
 
 /*
@@ -156,8 +154,8 @@ void hook_remove(tsdn_t *tsdn, void *opaque);
 void hook_invoke_alloc(hook_alloc_t type, void *result, uintptr_t result_raw,
     uintptr_t args_raw[3]);
 
-void hook_invoke_dalloc(hook_dalloc_t type, void *address,
-    uintptr_t args_raw[3]);
+void hook_invoke_dalloc(
+    hook_dalloc_t type, void *address, uintptr_t args_raw[3]);
 
 void hook_invoke_expand(hook_expand_t type, void *address, size_t old_usize,
     size_t new_usize, uintptr_t result_raw, uintptr_t args_raw[4]);
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 2e9fccc2..7a6ba0b9 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -27,7 +27,7 @@ struct hpa_central_s {
 	 *
 	 * Guarded by grow_mtx.
 	 */
-	void *eden;
+	void  *eden;
 	size_t eden_len;
 	/* Source for metadata. */
 	base_t *base;
@@ -78,7 +78,7 @@ struct hpa_shard_nonderived_stats_s {
 /* Completely derived; only used by CTL. */
 typedef struct hpa_shard_stats_s hpa_shard_stats_t;
 struct hpa_shard_stats_s {
-	psset_stats_t psset_stats;
+	psset_stats_t                psset_stats;
 	hpa_shard_nonderived_stats_t nonderived_stats;
 };
 
@@ -156,14 +156,15 @@ bool hpa_hugepage_size_exceeds_limit(void);
  * just that it can function properly given the system it's running on.
  */
 bool hpa_supported(void);
-bool hpa_central_init(hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks);
+bool hpa_central_init(
+    hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks);
 bool hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
     base_t *base, edata_cache_t *edata_cache, unsigned ind,
     const hpa_shard_opts_t *opts);
 
 void hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src);
-void hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard,
-    hpa_shard_stats_t *dst);
+void hpa_shard_stats_merge(
+    tsdn_t *tsdn, hpa_shard_t *shard, hpa_shard_stats_t *dst);
 
 /*
  * Notify the shard that we won't use it for allocations much longer.  Due to
@@ -173,8 +174,8 @@ void hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard,
 void hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard);
 void hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard);
 
-void hpa_shard_set_deferral_allowed(tsdn_t *tsdn, hpa_shard_t *shard,
-    bool deferral_allowed);
+void hpa_shard_set_deferral_allowed(
+    tsdn_t *tsdn, hpa_shard_t *shard, bool deferral_allowed);
 void hpa_shard_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard);
 
 /*
diff --git a/include/jemalloc/internal/hpa_hooks.h b/include/jemalloc/internal/hpa_hooks.h
index d0618f89..f50ff58f 100644
--- a/include/jemalloc/internal/hpa_hooks.h
+++ b/include/jemalloc/internal/hpa_hooks.h
@@ -13,7 +13,7 @@ struct hpa_hooks_s {
 	void (*dehugify)(void *ptr, size_t size);
 	void (*curtime)(nstime_t *r_time, bool first_reading);
 	uint64_t (*ms_since)(nstime_t *r_time);
-	bool (*vectorized_purge)(void* vec, size_t vlen, size_t nbytes);
+	bool (*vectorized_purge)(void *vec, size_t vlen, size_t nbytes);
 };
 
 extern const hpa_hooks_t hpa_hooks_default;
diff --git a/include/jemalloc/internal/hpa_utils.h b/include/jemalloc/internal/hpa_utils.h
index 283510b9..53bcb670 100644
--- a/include/jemalloc/internal/hpa_utils.h
+++ b/include/jemalloc/internal/hpa_utils.h
@@ -8,26 +8,27 @@
 typedef struct iovec hpa_io_vector_t;
 #else
 typedef struct {
-    void *iov_base;
-    size_t iov_len;
+	void  *iov_base;
+	size_t iov_len;
 } hpa_io_vector_t;
 #endif
 
 /* Actually invoke hooks. If we fail vectorized, use single purges */
 static void
 hpa_try_vectorized_purge(
-  hpa_shard_t *shard, hpa_io_vector_t *vec, size_t vlen, size_t nbytes) {
-    bool success = opt_process_madvise_max_batch > 0
-      && !shard->central->hooks.vectorized_purge(vec, vlen, nbytes);
-    if (!success) {
-        /* On failure, it is safe to purge again (potential perf
+    hpa_shard_t *shard, hpa_io_vector_t *vec, size_t vlen, size_t nbytes) {
+	bool success = opt_process_madvise_max_batch > 0
+	    && !shard->central->hooks.vectorized_purge(vec, vlen, nbytes);
+	if (!success) {
+		/* On failure, it is safe to purge again (potential perf
          * penalty) If kernel can tell exactly which regions
          * failed, we could avoid that penalty.
          */
-        for (size_t i = 0; i < vlen; ++i) {
-            shard->central->hooks.purge(vec[i].iov_base, vec[i].iov_len);
-        }
-    }
+		for (size_t i = 0; i < vlen; ++i) {
+			shard->central->hooks.purge(
+			    vec[i].iov_base, vec[i].iov_len);
+		}
+	}
 }
 
 /*
@@ -35,48 +36,48 @@ hpa_try_vectorized_purge(
  * It invokes the hook when batch limit is reached
  */
 typedef struct {
-    hpa_io_vector_t *vp;
-    size_t cur;
-    size_t total_bytes;
-    size_t capacity;
+	hpa_io_vector_t *vp;
+	size_t           cur;
+	size_t           total_bytes;
+	size_t           capacity;
 } hpa_range_accum_t;
 
 static inline void
 hpa_range_accum_init(hpa_range_accum_t *ra, hpa_io_vector_t *v, size_t sz) {
-    ra->vp = v;
-    ra->capacity = sz;
-    ra->total_bytes = 0;
-    ra->cur = 0;
+	ra->vp = v;
+	ra->capacity = sz;
+	ra->total_bytes = 0;
+	ra->cur = 0;
 }
 
 static inline void
 hpa_range_accum_flush(hpa_range_accum_t *ra, hpa_shard_t *shard) {
-    assert(ra->total_bytes > 0 && ra->cur > 0);
-    hpa_try_vectorized_purge(shard, ra->vp, ra->cur, ra->total_bytes);
-    ra->cur = 0;
-    ra->total_bytes = 0;
+	assert(ra->total_bytes > 0 && ra->cur > 0);
+	hpa_try_vectorized_purge(shard, ra->vp, ra->cur, ra->total_bytes);
+	ra->cur = 0;
+	ra->total_bytes = 0;
 }
 
 static inline void
 hpa_range_accum_add(
-  hpa_range_accum_t *ra, void *addr, size_t sz, hpa_shard_t *shard) {
-    assert(ra->cur < ra->capacity);
+    hpa_range_accum_t *ra, void *addr, size_t sz, hpa_shard_t *shard) {
+	assert(ra->cur < ra->capacity);
 
-    ra->vp[ra->cur].iov_base = addr;
-    ra->vp[ra->cur].iov_len = sz;
-    ra->total_bytes += sz;
-    ra->cur++;
+	ra->vp[ra->cur].iov_base = addr;
+	ra->vp[ra->cur].iov_len = sz;
+	ra->total_bytes += sz;
+	ra->cur++;
 
-    if (ra->cur == ra->capacity) {
-        hpa_range_accum_flush(ra, shard);
-    }
+	if (ra->cur == ra->capacity) {
+		hpa_range_accum_flush(ra, shard);
+	}
 }
 
 static inline void
 hpa_range_accum_finish(hpa_range_accum_t *ra, hpa_shard_t *shard) {
-    if (ra->cur > 0) {
-        hpa_range_accum_flush(ra, shard);
-    }
+	if (ra->cur > 0) {
+		hpa_range_accum_flush(ra, shard);
+	}
 }
 
 /*
@@ -84,14 +85,14 @@ hpa_range_accum_finish(hpa_range_accum_t *ra, hpa_shard_t *shard) {
  */
 typedef struct {
 	hpdata_purge_state_t state;
-	hpdata_t *hp;
-	bool dehugify;
+	hpdata_t            *hp;
+	bool                 dehugify;
 } hpa_purge_item_t;
 
 typedef struct hpa_purge_batch_s hpa_purge_batch_t;
 struct hpa_purge_batch_s {
 	hpa_purge_item_t *items;
-	size_t items_capacity;
+	size_t            items_capacity;
 	/* Number of huge pages to purge in current batch */
 	size_t item_cnt;
 	/* Number of ranges to purge in current batch */
diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index a8a4a552..75550f9b 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -73,7 +73,7 @@ struct hpdata_s {
 	bool h_hugify_allowed;
 	/* When we became a hugification candidate. */
 	nstime_t h_time_hugify_allowed;
-	bool h_in_psset_hugify_container;
+	bool     h_in_psset_hugify_container;
 
 	/* Whether or not a purge or hugify is currently happening. */
 	bool h_mid_purge;
@@ -186,8 +186,8 @@ hpdata_purge_allowed_get(const hpdata_t *hpdata) {
 
 static inline void
 hpdata_purge_allowed_set(hpdata_t *hpdata, bool purge_allowed) {
-       assert(purge_allowed == false || !hpdata->h_mid_purge);
-       hpdata->h_purge_allowed = purge_allowed;
+	assert(purge_allowed == false || !hpdata->h_mid_purge);
+	hpdata->h_purge_allowed = purge_allowed;
 }
 
 static inline bool
@@ -250,7 +250,6 @@ hpdata_changing_state_get(const hpdata_t *hpdata) {
 	return hpdata->h_mid_purge || hpdata->h_mid_hugify;
 }
 
-
 static inline bool
 hpdata_updating_get(const hpdata_t *hpdata) {
 	return hpdata->h_updating;
@@ -317,7 +316,7 @@ hpdata_assert_empty(hpdata_t *hpdata) {
  */
 static inline bool
 hpdata_consistent(hpdata_t *hpdata) {
-	if(fb_urange_longest(hpdata->active_pages, HUGEPAGE_PAGES)
+	if (fb_urange_longest(hpdata->active_pages, HUGEPAGE_PAGES)
 	    != hpdata_longest_free_range_get(hpdata)) {
 		return false;
 	}
@@ -368,7 +367,7 @@ void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age);
  * offset within that allocation.
  */
 void *hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz);
-void hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz);
+void  hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz);
 
 /*
  * The hpdata_purge_prepare_t allows grabbing the metadata required to purge
@@ -377,10 +376,10 @@ void hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz);
  */
 typedef struct hpdata_purge_state_s hpdata_purge_state_t;
 struct hpdata_purge_state_s {
-	size_t npurged;
-	size_t ndirty_to_purge;
+	size_t     npurged;
+	size_t     ndirty_to_purge;
 	fb_group_t to_purge[FB_NGROUPS(HUGEPAGE_PAGES)];
-	size_t next_purge_search_begin;
+	size_t     next_purge_search_begin;
 };
 
 /*
@@ -398,8 +397,8 @@ struct hpdata_purge_state_s {
  * Returns the number of dirty pages that will be purged and sets nranges
  * to number of ranges with dirty pages that will be purged.
  */
-size_t hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
-    size_t *nranges);
+size_t hpdata_purge_begin(
+    hpdata_t *hpdata, hpdata_purge_state_t *purge_state, size_t *nranges);
 
 /*
  * If there are more extents to purge, sets *r_purge_addr and *r_purge_size to
diff --git a/include/jemalloc/internal/inspect.h b/include/jemalloc/internal/inspect.h
index 0da920ca..e8ed44d3 100644
--- a/include/jemalloc/internal/inspect.h
+++ b/include/jemalloc/internal/inspect.h
@@ -26,7 +26,7 @@ typedef struct inspect_extent_util_stats_verbose_s
     inspect_extent_util_stats_verbose_t;
 
 struct inspect_extent_util_stats_verbose_s {
-	void *slabcur_addr;
+	void  *slabcur_addr;
 	size_t nfree;
 	size_t nregs;
 	size_t size;
@@ -34,10 +34,10 @@ struct inspect_extent_util_stats_verbose_s {
 	size_t bin_nregs;
 };
 
-void inspect_extent_util_stats_get(tsdn_t *tsdn, const void *ptr,
-    size_t *nfree, size_t *nregs, size_t *size);
+void inspect_extent_util_stats_get(
+    tsdn_t *tsdn, const void *ptr, size_t *nfree, size_t *nregs, size_t *size);
 void inspect_extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
-    size_t *nfree, size_t *nregs, size_t *size,
-    size_t *bin_nfree, size_t *bin_nregs, void **slabcur_addr);
+    size_t *nfree, size_t *nregs, size_t *size, size_t *bin_nfree,
+    size_t *bin_nregs, void **slabcur_addr);
 
 #endif /* JEMALLOC_INTERNAL_INSPECT_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h
index 0bca9133..2ca12c4a 100644
--- a/include/jemalloc/internal/jemalloc_internal_decls.h
+++ b/include/jemalloc/internal/jemalloc_internal_decls.h
@@ -3,64 +3,65 @@
 
 #include <math.h>
 #ifdef _WIN32
-#  include <windows.h>
-#  include "msvc_compat/windows_extra.h"
-#  include "msvc_compat/strings.h"
-#  ifdef _WIN64
-#    if LG_VADDR <= 32
-#      error Generate the headers using x64 vcargs
-#    endif
-#  else
-#    if LG_VADDR > 32
-#      undef LG_VADDR
-#      define LG_VADDR 32
-#    endif
-#  endif
+#	include <windows.h>
+#	include "msvc_compat/windows_extra.h"
+#	include "msvc_compat/strings.h"
+#	ifdef _WIN64
+#		if LG_VADDR <= 32
+#			error Generate the headers using x64 vcargs
+#		endif
+#	else
+#		if LG_VADDR > 32
+#			undef LG_VADDR
+#			define LG_VADDR 32
+#		endif
+#	endif
 #else
-#  include <sys/param.h>
-#  include <sys/mman.h>
-#  if !defined(__pnacl__) && !defined(__native_client__)
-#    include <sys/syscall.h>
-#    if !defined(SYS_write) && defined(__NR_write)
-#      define SYS_write __NR_write
-#    endif
-#    if defined(SYS_open) && defined(__aarch64__)
-       /* Android headers may define SYS_open to __NR_open even though
+#	include <sys/param.h>
+#	include <sys/mman.h>
+#	if !defined(__pnacl__) && !defined(__native_client__)
+#		include <sys/syscall.h>
+#		if !defined(SYS_write) && defined(__NR_write)
+#			define SYS_write __NR_write
+#		endif
+#		if defined(SYS_open) && defined(__aarch64__)
+/* Android headers may define SYS_open to __NR_open even though
         * __NR_open may not exist on AArch64 (superseded by __NR_openat). */
-#      undef SYS_open
-#    endif
-#    include <sys/uio.h>
-#  endif
-#  include <pthread.h>
-#  if defined(__FreeBSD__) || defined(__DragonFly__) || defined(__OpenBSD__)
-#  include <pthread_np.h>
-#  include <sched.h>
-#  if defined(__FreeBSD__)
-#    define cpu_set_t cpuset_t
-#  endif
-#  endif
-#  include <signal.h>
-#  ifdef JEMALLOC_OS_UNFAIR_LOCK
-#    include <os/lock.h>
-#  endif
-#  ifdef JEMALLOC_GLIBC_MALLOC_HOOK
-#    include <sched.h>
-#  endif
-#  include <errno.h>
-#  include <sys/time.h>
-#  include <time.h>
-#  ifdef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME
-#    include <mach/mach_time.h>
-#  endif
+#			undef SYS_open
+#		endif
+#		include <sys/uio.h>
+#	endif
+#	include <pthread.h>
+#	if defined(__FreeBSD__) || defined(__DragonFly__)                     \
+	    || defined(__OpenBSD__)
+#		include <pthread_np.h>
+#		include <sched.h>
+#		if defined(__FreeBSD__)
+#			define cpu_set_t cpuset_t
+#		endif
+#	endif
+#	include <signal.h>
+#	ifdef JEMALLOC_OS_UNFAIR_LOCK
+#		include <os/lock.h>
+#	endif
+#	ifdef JEMALLOC_GLIBC_MALLOC_HOOK
+#		include <sched.h>
+#	endif
+#	include <errno.h>
+#	include <sys/time.h>
+#	include <time.h>
+#	ifdef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME
+#		include <mach/mach_time.h>
+#	endif
 #endif
 #include <sys/types.h>
 
 #include <limits.h>
 #ifndef SIZE_T_MAX
-#  define SIZE_T_MAX	SIZE_MAX
+#	define SIZE_T_MAX SIZE_MAX
 #endif
 #ifndef SSIZE_MAX
-#  define SSIZE_MAX	((ssize_t)(SIZE_T_MAX >> 1))
+#	define SSIZE_MAX ((ssize_t)(SIZE_T_MAX >> 1))
 #endif
 #include <stdarg.h>
 #include <stdbool.h>
@@ -69,30 +70,30 @@
 #include <stdint.h>
 #include <stddef.h>
 #ifndef offsetof
-#  define offsetof(type, member)	((size_t)&(((type *)NULL)->member))
+#	define offsetof(type, member) ((size_t) & (((type *)NULL)->member))
 #endif
 #include <string.h>
 #include <strings.h>
 #include <ctype.h>
 #ifdef _MSC_VER
-#  include <io.h>
+#	include <io.h>
 typedef intptr_t ssize_t;
-#  define PATH_MAX 1024
-#  define STDERR_FILENO 2
-#  define __func__ __FUNCTION__
-#  ifdef JEMALLOC_HAS_RESTRICT
-#    define restrict __restrict
-#  endif
+#	define PATH_MAX 1024
+#	define STDERR_FILENO 2
+#	define __func__ __FUNCTION__
+#	ifdef JEMALLOC_HAS_RESTRICT
+#		define restrict __restrict
+#	endif
 /* Disable warnings about deprecated system functions. */
-#  pragma warning(disable: 4996)
-#if _MSC_VER < 1800
+#	pragma warning(disable : 4996)
+#	if _MSC_VER < 1800
 static int
 isblank(int c) {
 	return (c == '\t' || c == ' ');
 }
-#endif
+#	endif
 #else
-#  include <unistd.h>
+#	include <unistd.h>
 #endif
 #include <fcntl.h>
 
@@ -102,7 +103,7 @@ isblank(int c) {
  * classes.
  */
 #ifdef small
-#  undef small
+#	undef small
 #endif
 
 /*
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index 3b42f833..b502c7e7 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -12,34 +12,34 @@
 extern bool malloc_slow;
 
 /* Run-time options. */
-extern bool opt_abort;
-extern bool opt_abort_conf;
-extern bool opt_trust_madvise;
-extern bool opt_confirm_conf;
-extern bool opt_hpa;
+extern bool             opt_abort;
+extern bool             opt_abort_conf;
+extern bool             opt_trust_madvise;
+extern bool             opt_confirm_conf;
+extern bool             opt_hpa;
 extern hpa_shard_opts_t opt_hpa_opts;
-extern sec_opts_t opt_hpa_sec_opts;
+extern sec_opts_t       opt_hpa_sec_opts;
 
 extern const char *opt_junk;
-extern bool opt_junk_alloc;
-extern bool opt_junk_free;
+extern bool        opt_junk_alloc;
+extern bool        opt_junk_free;
 extern void (*JET_MUTABLE junk_free_callback)(void *ptr, size_t size);
 extern void (*JET_MUTABLE junk_alloc_callback)(void *ptr, size_t size);
 extern void (*JET_MUTABLE invalid_conf_abort)(void);
-extern bool opt_utrace;
-extern bool opt_xmalloc;
-extern bool opt_experimental_infallible_new;
-extern bool opt_experimental_tcache_gc;
-extern bool opt_zero;
-extern unsigned opt_narenas;
+extern bool                  opt_utrace;
+extern bool                  opt_xmalloc;
+extern bool                  opt_experimental_infallible_new;
+extern bool                  opt_experimental_tcache_gc;
+extern bool                  opt_zero;
+extern unsigned              opt_narenas;
 extern zero_realloc_action_t opt_zero_realloc_action;
-extern malloc_init_t malloc_init_state;
-extern const char *const zero_realloc_mode_names[];
-extern atomic_zu_t zero_realloc_count;
-extern bool opt_cache_oblivious;
-extern unsigned opt_debug_double_free_max_scan;
-extern size_t opt_calloc_madvise_threshold;
-extern bool opt_disable_large_size_classes;
+extern malloc_init_t         malloc_init_state;
+extern const char *const     zero_realloc_mode_names[];
+extern atomic_zu_t           zero_realloc_count;
+extern bool                  opt_cache_oblivious;
+extern unsigned              opt_debug_double_free_max_scan;
+extern size_t                opt_calloc_madvise_threshold;
+extern bool                  opt_disable_large_size_classes;
 
 extern const char *opt_malloc_conf_symlink;
 extern const char *opt_malloc_conf_env_var;
@@ -64,24 +64,24 @@ extern atomic_p_t arenas[];
 
 extern unsigned huge_arena_ind;
 
-void *a0malloc(size_t size);
-void a0dalloc(void *ptr);
-void *bootstrap_malloc(size_t size);
-void *bootstrap_calloc(size_t num, size_t size);
-void bootstrap_free(void *ptr);
-void arena_set(unsigned ind, arena_t *arena);
+void    *a0malloc(size_t size);
+void     a0dalloc(void *ptr);
+void    *bootstrap_malloc(size_t size);
+void    *bootstrap_calloc(size_t num, size_t size);
+void     bootstrap_free(void *ptr);
+void     arena_set(unsigned ind, arena_t *arena);
 unsigned narenas_total_get(void);
 arena_t *arena_init(tsdn_t *tsdn, unsigned ind, const arena_config_t *config);
 arena_t *arena_choose_hard(tsd_t *tsd, bool internal);
-void arena_migrate(tsd_t *tsd, arena_t *oldarena, arena_t *newarena);
-void iarena_cleanup(tsd_t *tsd);
-void arena_cleanup(tsd_t *tsd);
-size_t batch_alloc(void **ptrs, size_t num, size_t size, int flags);
-void jemalloc_prefork(void);
-void jemalloc_postfork_parent(void);
-void jemalloc_postfork_child(void);
-void sdallocx_default(void *ptr, size_t size, int flags);
-void free_default(void *ptr);
-void *malloc_default(size_t size);
+void     arena_migrate(tsd_t *tsd, arena_t *oldarena, arena_t *newarena);
+void     iarena_cleanup(tsd_t *tsd);
+void     arena_cleanup(tsd_t *tsd);
+size_t   batch_alloc(void **ptrs, size_t num, size_t size, int flags);
+void     jemalloc_prefork(void);
+void     jemalloc_postfork_parent(void);
+void     jemalloc_postfork_child(void);
+void     sdallocx_default(void *ptr, size_t size, int flags);
+void     free_default(void *ptr);
+void    *malloc_default(size_t size);
 
 #endif /* JEMALLOC_INTERNAL_EXTERNS_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
index 111cda42..8513effd 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_a.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_a.h
@@ -20,12 +20,12 @@ malloc_getcpu(void) {
 	return (malloc_cpuid_t)sched_getcpu();
 #elif defined(JEMALLOC_HAVE_RDTSCP)
 	unsigned int ecx;
-	asm volatile("rdtscp" : "=c" (ecx) :: "eax", "edx");
+	asm volatile("rdtscp" : "=c"(ecx)::"eax", "edx");
 	return (malloc_cpuid_t)(ecx & 0xfff);
 #elif defined(__aarch64__) && defined(__APPLE__)
 	/* Other oses most likely use tpidr_el0 instead */
 	uintptr_t c;
-	asm volatile("mrs %x0, tpidrro_el0" : "=r"(c) :: "memory");
+	asm volatile("mrs %x0, tpidrro_el0" : "=r"(c)::"memory");
 	return (malloc_cpuid_t)(c & (1 << 3) - 1);
 #else
 	not_reached();
@@ -42,8 +42,8 @@ percpu_arena_choose(void) {
 	assert(cpuid >= 0);
 
 	unsigned arena_ind;
-	if ((opt_percpu_arena == percpu_arena) || ((unsigned)cpuid < ncpus /
-	    2)) {
+	if ((opt_percpu_arena == percpu_arena)
+	    || ((unsigned)cpuid < ncpus / 2)) {
 		arena_ind = cpuid;
 	} else {
 		assert(opt_percpu_arena == per_phycpu_arena);
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_b.h b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
index 2ddb4a89..dad37a9c 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_b.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_b.h
@@ -24,13 +24,12 @@ percpu_arena_update(tsd_t *tsd, unsigned cpu) {
 		if (tcache != NULL) {
 			tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd);
 			assert(tcache_slow->arena != NULL);
-			tcache_arena_reassociate(tsd_tsdn(tsd), tcache_slow,
-			    tcache, newarena);
+			tcache_arena_reassociate(
+			    tsd_tsdn(tsd), tcache_slow, tcache, newarena);
 		}
 	}
 }
 
-
 /* Choose an arena based on a per-thread value. */
 static inline arena_t *
 arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {
@@ -51,18 +50,18 @@ arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {
 		assert(ret);
 		if (tcache_available(tsd)) {
 			tcache_slow_t *tcache_slow = tsd_tcache_slowp_get(tsd);
-			tcache_t *tcache = tsd_tcachep_get(tsd);
+			tcache_t      *tcache = tsd_tcachep_get(tsd);
 			if (tcache_slow->arena != NULL) {
 				/* See comments in tsd_tcache_data_init().*/
-				assert(tcache_slow->arena ==
-				    arena_get(tsd_tsdn(tsd), 0, false));
+				assert(tcache_slow->arena
+				    == arena_get(tsd_tsdn(tsd), 0, false));
 				if (tcache_slow->arena != ret) {
 					tcache_arena_reassociate(tsd_tsdn(tsd),
 					    tcache_slow, tcache, ret);
 				}
 			} else {
-				tcache_arena_associate(tsd_tsdn(tsd),
-				    tcache_slow, tcache, ret);
+				tcache_arena_associate(
+				    tsd_tsdn(tsd), tcache_slow, tcache, ret);
 			}
 		}
 	}
@@ -72,10 +71,10 @@ arena_choose_impl(tsd_t *tsd, arena_t *arena, bool internal) {
 	 * auto percpu arena range, (i.e. thread is assigned to a manually
 	 * managed arena), then percpu arena is skipped.
 	 */
-	if (have_percpu_arena && PERCPU_ARENA_ENABLED(opt_percpu_arena) &&
-	    !internal && (arena_ind_get(ret) <
-	    percpu_arena_ind_limit(opt_percpu_arena)) && (ret->last_thd !=
-	    tsd_tsdn(tsd))) {
+	if (have_percpu_arena && PERCPU_ARENA_ENABLED(opt_percpu_arena)
+	    && !internal
+	    && (arena_ind_get(ret) < percpu_arena_ind_limit(opt_percpu_arena))
+	    && (ret->last_thd != tsd_tsdn(tsd))) {
 		unsigned ind = percpu_arena_choose();
 		if (arena_ind_get(ret) != ind) {
 			percpu_arena_update(tsd, ind);
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 39c196a5..2c61f8c4 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -63,11 +63,12 @@ iallocztm_explicit_slab(tsdn_t *tsdn, size_t size, szind_t ind, bool zero,
 	assert(!is_internal || tcache == NULL);
 	assert(!is_internal || arena == NULL || arena_is_auto(arena));
 	if (!tsdn_null(tsdn) && tsd_reentrancy_level_get(tsdn_tsd(tsdn)) == 0) {
-		witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-		    WITNESS_RANK_CORE, 0);
+		witness_assert_depth_to_rank(
+		    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 	}
 
-	ret = arena_malloc(tsdn, arena, size, ind, zero, slab, tcache, slow_path);
+	ret = arena_malloc(
+	    tsdn, arena, size, ind, zero, slab, tcache, slow_path);
 	if (config_stats && is_internal && likely(ret != NULL)) {
 		arena_internal_add(iaalloc(tsdn, ret), isalloc(tsdn, ret));
 	}
@@ -78,8 +79,8 @@ JEMALLOC_ALWAYS_INLINE void *
 iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache,
     bool is_internal, arena_t *arena, bool slow_path) {
 	bool slab = sz_can_use_slab(size);
-	return iallocztm_explicit_slab(tsdn, size, ind, zero, slab, tcache,
-	    is_internal, arena, slow_path);
+	return iallocztm_explicit_slab(
+	    tsdn, size, ind, zero, slab, tcache, is_internal, arena, slow_path);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
@@ -89,8 +90,8 @@ ialloc(tsd_t *tsd, size_t size, szind_t ind, bool zero, bool slow_path) {
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-ipallocztm_explicit_slab(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
-    bool slab, tcache_t *tcache, bool is_internal, arena_t *arena) {
+ipallocztm_explicit_slab(tsdn_t *tsdn, size_t usize, size_t alignment,
+    bool zero, bool slab, tcache_t *tcache, bool is_internal, arena_t *arena) {
 	void *ret;
 
 	assert(!slab || sz_can_use_slab(usize)); /* slab && large is illegal */
@@ -98,8 +99,8 @@ ipallocztm_explicit_slab(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero
 	assert(usize == sz_sa2u(usize, alignment));
 	assert(!is_internal || tcache == NULL);
 	assert(!is_internal || arena == NULL || arena_is_auto(arena));
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
 	ret = arena_palloc(tsdn, arena, usize, alignment, zero, slab, tcache);
 	assert(ALIGNMENT_ADDR2BASE(ret, alignment) == ret);
@@ -123,10 +124,10 @@ ipalloct(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-ipalloct_explicit_slab(tsdn_t *tsdn, size_t usize, size_t alignment,
-    bool zero, bool slab, tcache_t *tcache, arena_t *arena) {
-	return ipallocztm_explicit_slab(tsdn, usize, alignment, zero, slab,
-	    tcache, false, arena);
+ipalloct_explicit_slab(tsdn_t *tsdn, size_t usize, size_t alignment, bool zero,
+    bool slab, tcache_t *tcache, arena_t *arena) {
+	return ipallocztm_explicit_slab(
+	    tsdn, usize, alignment, zero, slab, tcache, false, arena);
 }
 
 JEMALLOC_ALWAYS_INLINE void *
@@ -146,13 +147,13 @@ idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 	assert(ptr != NULL);
 	assert(!is_internal || tcache == NULL);
 	assert(!is_internal || arena_is_auto(iaalloc(tsdn, ptr)));
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 	if (config_stats && is_internal) {
 		arena_internal_sub(iaalloc(tsdn, ptr), isalloc(tsdn, ptr));
 	}
-	if (!is_internal && !tsdn_null(tsdn) &&
-	    tsd_reentrancy_level_get(tsdn_tsd(tsdn)) != 0) {
+	if (!is_internal && !tsdn_null(tsdn)
+	    && tsd_reentrancy_level_get(tsdn_tsd(tsdn)) != 0) {
 		assert(tcache == NULL);
 	}
 	arena_dalloc(tsdn, ptr, tcache, alloc_ctx, slow_path);
@@ -166,8 +167,8 @@ idalloc(tsd_t *tsd, void *ptr) {
 JEMALLOC_ALWAYS_INLINE void
 isdalloct(tsdn_t *tsdn, void *ptr, size_t size, tcache_t *tcache,
     emap_alloc_ctx_t *alloc_ctx, bool slow_path) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 	arena_sdalloc(tsdn, ptr, size, tcache, alloc_ctx, slow_path);
 }
 
@@ -175,17 +176,17 @@ JEMALLOC_ALWAYS_INLINE void *
 iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t alignment, bool zero, bool slab, tcache_t *tcache, arena_t *arena,
     hook_ralloc_args_t *hook_args) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
-	void *p;
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
+	void  *p;
 	size_t usize, copysize;
 
 	usize = sz_sa2u(size, alignment);
 	if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
 		return NULL;
 	}
-	p = ipalloct_explicit_slab(tsdn, usize, alignment, zero, slab,
-	    tcache, arena);
+	p = ipalloct_explicit_slab(
+	    tsdn, usize, alignment, zero, slab, tcache, arena);
 	if (p == NULL) {
 		return NULL;
 	}
@@ -195,11 +196,12 @@ iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 	 */
 	copysize = (size < oldsize) ? size : oldsize;
 	memcpy(p, ptr, copysize);
-	hook_invoke_alloc(hook_args->is_realloc
-	    ? hook_alloc_realloc : hook_alloc_rallocx, p, (uintptr_t)p,
-	    hook_args->args);
-	hook_invoke_dalloc(hook_args->is_realloc
-	    ? hook_dalloc_realloc : hook_dalloc_rallocx, ptr, hook_args->args);
+	hook_invoke_alloc(
+	    hook_args->is_realloc ? hook_alloc_realloc : hook_alloc_rallocx, p,
+	    (uintptr_t)p, hook_args->args);
+	hook_invoke_dalloc(
+	    hook_args->is_realloc ? hook_dalloc_realloc : hook_dalloc_rallocx,
+	    ptr, hook_args->args);
 	isdalloct(tsdn, ptr, oldsize, tcache, NULL, true);
 	return p;
 }
@@ -214,15 +216,14 @@ iralloct_realign(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 JEMALLOC_ALWAYS_INLINE void *
 iralloct_explicit_slab(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t alignment, bool zero, bool slab, tcache_t *tcache, arena_t *arena,
-    hook_ralloc_args_t *hook_args)
-{
+    hook_ralloc_args_t *hook_args) {
 	assert(ptr != NULL);
 	assert(size != 0);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
-	if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
-	    != 0) {
+	if (alignment != 0
+	    && ((uintptr_t)ptr & ((uintptr_t)alignment - 1)) != 0) {
 		/*
 		 * Existing object alignment is inadequate; allocate new space
 		 * and copy.
@@ -238,8 +239,7 @@ iralloct_explicit_slab(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 JEMALLOC_ALWAYS_INLINE void *
 iralloct(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t alignment,
     size_t usize, bool zero, tcache_t *tcache, arena_t *arena,
-    hook_ralloc_args_t *hook_args)
-{
+    hook_ralloc_args_t *hook_args) {
 	bool slab = sz_can_use_slab(usize);
 	return iralloct_explicit_slab(tsdn, ptr, oldsize, size, alignment, zero,
 	    slab, tcache, arena, hook_args);
@@ -257,23 +257,23 @@ ixalloc(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, size_t extra,
     size_t alignment, bool zero, size_t *newsize) {
 	assert(ptr != NULL);
 	assert(size != 0);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
-	if (alignment != 0 && ((uintptr_t)ptr & ((uintptr_t)alignment-1))
-	    != 0) {
+	if (alignment != 0
+	    && ((uintptr_t)ptr & ((uintptr_t)alignment - 1)) != 0) {
 		/* Existing object alignment is inadequate. */
 		*newsize = oldsize;
 		return true;
 	}
 
-	return arena_ralloc_no_move(tsdn, ptr, oldsize, size, extra, zero,
-	    newsize);
+	return arena_ralloc_no_move(
+	    tsdn, ptr, oldsize, size, extra, zero, newsize);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-fastpath_success_finish(tsd_t *tsd, uint64_t allocated_after,
-    cache_bin_t *bin, void *ret) {
+fastpath_success_finish(
+    tsd_t *tsd, uint64_t allocated_after, cache_bin_t *bin, void *ret) {
 	thread_allocated_set(tsd, allocated_after);
 	if (config_stats) {
 		bin->tstats.nrequests++;
@@ -331,8 +331,8 @@ imalloc_fastpath(size_t size, void *(fallback_alloc)(size_t)) {
 	sz_size2index_usize_fastpath(size, &ind, &usize);
 	/* Fast path relies on size being a bin. */
 	assert(ind < SC_NBINS);
-	assert((SC_LOOKUP_MAXCLASS < SC_SMALL_MAXCLASS) &&
-	    (size <= SC_SMALL_MAXCLASS));
+	assert((SC_LOOKUP_MAXCLASS < SC_SMALL_MAXCLASS)
+	    && (size <= SC_SMALL_MAXCLASS));
 
 	uint64_t allocated, threshold;
 	te_malloc_fastpath_ctx(tsd, &allocated, &threshold);
@@ -363,7 +363,7 @@ imalloc_fastpath(size_t size, void *(fallback_alloc)(size_t)) {
 	cache_bin_t *bin = &tcache->bins[ind];
 	/* Suppress spurious warning from static analysis */
 	assert(bin != NULL);
-	bool tcache_success;
+	bool  tcache_success;
 	void *ret;
 
 	/*
@@ -388,56 +388,56 @@ imalloc_fastpath(size_t size, void *(fallback_alloc)(size_t)) {
 
 JEMALLOC_ALWAYS_INLINE tcache_t *
 tcache_get_from_ind(tsd_t *tsd, unsigned tcache_ind, bool slow, bool is_alloc) {
-        tcache_t *tcache;
-        if (tcache_ind == TCACHE_IND_AUTOMATIC) {
-                if (likely(!slow)) {
-                        /* Getting tcache ptr unconditionally. */
-                        tcache = tsd_tcachep_get(tsd);
-                        assert(tcache == tcache_get(tsd));
-                } else if (is_alloc ||
-                    likely(tsd_reentrancy_level_get(tsd) == 0)) {
-                        tcache = tcache_get(tsd);
-                } else {
-                        tcache = NULL;
-                }
-        } else {
-                /*
+	tcache_t *tcache;
+	if (tcache_ind == TCACHE_IND_AUTOMATIC) {
+		if (likely(!slow)) {
+			/* Getting tcache ptr unconditionally. */
+			tcache = tsd_tcachep_get(tsd);
+			assert(tcache == tcache_get(tsd));
+		} else if (is_alloc
+		    || likely(tsd_reentrancy_level_get(tsd) == 0)) {
+			tcache = tcache_get(tsd);
+		} else {
+			tcache = NULL;
+		}
+	} else {
+		/*
                  * Should not specify tcache on deallocation path when being
                  * reentrant.
                  */
-                assert(is_alloc || tsd_reentrancy_level_get(tsd) == 0 ||
-                    tsd_state_nocleanup(tsd));
-                if (tcache_ind == TCACHE_IND_NONE) {
-                        tcache = NULL;
-                } else {
-                        tcache = tcaches_get(tsd, tcache_ind);
-                }
-        }
-        return tcache;
+		assert(is_alloc || tsd_reentrancy_level_get(tsd) == 0
+		    || tsd_state_nocleanup(tsd));
+		if (tcache_ind == TCACHE_IND_NONE) {
+			tcache = NULL;
+		} else {
+			tcache = tcaches_get(tsd, tcache_ind);
+		}
+	}
+	return tcache;
 }
 
 JEMALLOC_ALWAYS_INLINE bool
 maybe_check_alloc_ctx(tsd_t *tsd, void *ptr, emap_alloc_ctx_t *alloc_ctx) {
-        if (config_opt_size_checks) {
-                emap_alloc_ctx_t dbg_ctx;
-                emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
-                    &dbg_ctx);
-                if (alloc_ctx->szind != dbg_ctx.szind) {
-                        safety_check_fail_sized_dealloc(
-                            /* current_dealloc */ true, ptr,
-                            /* true_size */ emap_alloc_ctx_usize_get(&dbg_ctx),
-                            /* input_size */ emap_alloc_ctx_usize_get(
-                            alloc_ctx));
-                        return true;
-                }
-                if (alloc_ctx->slab != dbg_ctx.slab) {
-                        safety_check_fail(
-                            "Internal heap corruption detected: "
-                            "mismatch in slab bit");
-                        return true;
-                }
-        }
-        return false;
+	if (config_opt_size_checks) {
+		emap_alloc_ctx_t dbg_ctx;
+		emap_alloc_ctx_lookup(
+		    tsd_tsdn(tsd), &arena_emap_global, ptr, &dbg_ctx);
+		if (alloc_ctx->szind != dbg_ctx.szind) {
+			safety_check_fail_sized_dealloc(
+			    /* current_dealloc */ true, ptr,
+			    /* true_size */ emap_alloc_ctx_usize_get(&dbg_ctx),
+			    /* input_size */
+			    emap_alloc_ctx_usize_get(alloc_ctx));
+			return true;
+		}
+		if (alloc_ctx->slab != dbg_ctx.slab) {
+			safety_check_fail(
+			    "Internal heap corruption detected: "
+			    "mismatch in slab bit");
+			return true;
+		}
+	}
+	return false;
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -447,7 +447,7 @@ prof_sample_aligned(const void *ptr) {
 
 JEMALLOC_ALWAYS_INLINE bool
 free_fastpath_nonfast_aligned(void *ptr, bool check_prof) {
-        /*
+	/*
          * free_fastpath do not handle two uncommon cases: 1) sampled profiled
          * objects and 2) sampled junk & stash for use-after-free detection.
          * Both have special alignments which are used to escape the fastpath.
@@ -456,144 +456,145 @@ free_fastpath_nonfast_aligned(void *ptr, bool check_prof) {
          * are enabled (the assertion below).  Avoiding redundant checks since
          * this is on the fastpath -- at most one runtime branch from this.
          */
-        if (config_debug && cache_bin_nonfast_aligned(ptr)) {
-                assert(prof_sample_aligned(ptr));
-        }
+	if (config_debug && cache_bin_nonfast_aligned(ptr)) {
+		assert(prof_sample_aligned(ptr));
+	}
 
-        if (config_prof && check_prof) {
-                /* When prof is enabled, the prof_sample alignment is enough. */
-                if (prof_sample_aligned(ptr)) {
-                        return true;
-                } else {
-                        return false;
-                }
-        }
+	if (config_prof && check_prof) {
+		/* When prof is enabled, the prof_sample alignment is enough. */
+		if (prof_sample_aligned(ptr)) {
+			return true;
+		} else {
+			return false;
+		}
+	}
 
-        if (config_uaf_detection) {
-                if (cache_bin_nonfast_aligned(ptr)) {
-                        return true;
-                } else {
-                        return false;
-                }
-        }
+	if (config_uaf_detection) {
+		if (cache_bin_nonfast_aligned(ptr)) {
+			return true;
+		} else {
+			return false;
+		}
+	}
 
-        return false;
+	return false;
 }
 
 /* Returns whether or not the free attempt was successful. */
 JEMALLOC_ALWAYS_INLINE
-bool free_fastpath(void *ptr, size_t size, bool size_hint) {
-        tsd_t *tsd = tsd_get(false);
-        /* The branch gets optimized away unless tsd_get_allocates(). */
-        if (unlikely(tsd == NULL)) {
-                return false;
-        }
-        /*
+bool
+free_fastpath(void *ptr, size_t size, bool size_hint) {
+	tsd_t *tsd = tsd_get(false);
+	/* The branch gets optimized away unless tsd_get_allocates(). */
+	if (unlikely(tsd == NULL)) {
+		return false;
+	}
+	/*
          *  The tsd_fast() / initialized checks are folded into the branch
          *  testing (deallocated_after >= threshold) later in this function.
          *  The threshold will be set to 0 when !tsd_fast.
          */
-        assert(tsd_fast(tsd) ||
-            *tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd) == 0);
+	assert(tsd_fast(tsd)
+	    || *tsd_thread_deallocated_next_event_fastp_get_unsafe(tsd) == 0);
 
-        emap_alloc_ctx_t alloc_ctx JEMALLOC_CC_SILENCE_INIT({0, 0, false});
-	size_t usize;
-        if (!size_hint) {
-                bool err = emap_alloc_ctx_try_lookup_fast(tsd,
-                    &arena_emap_global, ptr, &alloc_ctx);
+	emap_alloc_ctx_t alloc_ctx JEMALLOC_CC_SILENCE_INIT({0, 0, false});
+	size_t                     usize;
+	if (!size_hint) {
+		bool err = emap_alloc_ctx_try_lookup_fast(
+		    tsd, &arena_emap_global, ptr, &alloc_ctx);
 
-                /* Note: profiled objects will have alloc_ctx.slab set */
-                if (unlikely(err || !alloc_ctx.slab ||
-                    free_fastpath_nonfast_aligned(ptr,
-                    /* check_prof */ false))) {
-                        return false;
-                }
-                assert(alloc_ctx.szind != SC_NSIZES);
+		/* Note: profiled objects will have alloc_ctx.slab set */
+		if (unlikely(err || !alloc_ctx.slab
+		        || free_fastpath_nonfast_aligned(ptr,
+		            /* check_prof */ false))) {
+			return false;
+		}
+		assert(alloc_ctx.szind != SC_NSIZES);
 		usize = sz_index2size(alloc_ctx.szind);
-        } else {
-                /*
+	} else {
+		/*
                  * Check for both sizes that are too large, and for sampled /
                  * special aligned objects.  The alignment check will also check
                  * for null ptr.
                  */
-                if (unlikely(size > SC_LOOKUP_MAXCLASS ||
-                    free_fastpath_nonfast_aligned(ptr,
-                    /* check_prof */ true))) {
-                        return false;
-                }
+		if (unlikely(size > SC_LOOKUP_MAXCLASS
+		        || free_fastpath_nonfast_aligned(ptr,
+		            /* check_prof */ true))) {
+			return false;
+		}
 		sz_size2index_usize_fastpath(size, &alloc_ctx.szind, &usize);
-                /* Max lookup class must be small. */
-                assert(alloc_ctx.szind < SC_NBINS);
-                /* This is a dead store, except when opt size checking is on. */
-                alloc_ctx.slab = true;
-        }
-        /*
+		/* Max lookup class must be small. */
+		assert(alloc_ctx.szind < SC_NBINS);
+		/* This is a dead store, except when opt size checking is on. */
+		alloc_ctx.slab = true;
+	}
+	/*
          * Currently the fastpath only handles small sizes.  The branch on
          * SC_LOOKUP_MAXCLASS makes sure of it.  This lets us avoid checking
          * tcache szind upper limit (i.e. tcache_max) as well.
          */
-        assert(alloc_ctx.slab);
+	assert(alloc_ctx.slab);
 
-        uint64_t deallocated, threshold;
-        te_free_fastpath_ctx(tsd, &deallocated, &threshold);
+	uint64_t deallocated, threshold;
+	te_free_fastpath_ctx(tsd, &deallocated, &threshold);
 
-        uint64_t deallocated_after = deallocated + usize;
-        /*
+	uint64_t deallocated_after = deallocated + usize;
+	/*
          * Check for events and tsd non-nominal (fast_threshold will be set to
          * 0) in a single branch.  Note that this handles the uninitialized case
          * as well (TSD init will be triggered on the non-fastpath).  Therefore
          * anything depends on a functional TSD (e.g. the alloc_ctx sanity check
          * below) needs to be after this branch.
          */
-        if (unlikely(deallocated_after >= threshold)) {
-                return false;
-        }
-        assert(tsd_fast(tsd));
-        bool fail = maybe_check_alloc_ctx(tsd, ptr, &alloc_ctx);
-        if (fail) {
-                /* See the comment in isfree. */
-                return true;
-        }
+	if (unlikely(deallocated_after >= threshold)) {
+		return false;
+	}
+	assert(tsd_fast(tsd));
+	bool fail = maybe_check_alloc_ctx(tsd, ptr, &alloc_ctx);
+	if (fail) {
+		/* See the comment in isfree. */
+		return true;
+	}
 
-        tcache_t *tcache = tcache_get_from_ind(tsd, TCACHE_IND_AUTOMATIC,
-            /* slow */ false, /* is_alloc */ false);
-        cache_bin_t *bin = &tcache->bins[alloc_ctx.szind];
+	tcache_t    *tcache = tcache_get_from_ind(tsd, TCACHE_IND_AUTOMATIC,
+	       /* slow */ false, /* is_alloc */ false);
+	cache_bin_t *bin = &tcache->bins[alloc_ctx.szind];
 
-        /*
+	/*
          * If junking were enabled, this is where we would do it.  It's not
          * though, since we ensured above that we're on the fast path.  Assert
          * that to double-check.
          */
-        assert(!opt_junk_free);
+	assert(!opt_junk_free);
 
-        if (!cache_bin_dalloc_easy(bin, ptr)) {
-                return false;
-        }
+	if (!cache_bin_dalloc_easy(bin, ptr)) {
+		return false;
+	}
 
-        *tsd_thread_deallocatedp_get(tsd) = deallocated_after;
+	*tsd_thread_deallocatedp_get(tsd) = deallocated_after;
 
-        return true;
+	return true;
 }
 
 JEMALLOC_ALWAYS_INLINE void JEMALLOC_NOTHROW
 je_sdallocx_noflags(void *ptr, size_t size) {
-        if (!free_fastpath(ptr, size, true)) {
-                sdallocx_default(ptr, size, 0);
-        }
+	if (!free_fastpath(ptr, size, true)) {
+		sdallocx_default(ptr, size, 0);
+	}
 }
 
 JEMALLOC_ALWAYS_INLINE void JEMALLOC_NOTHROW
 je_sdallocx_impl(void *ptr, size_t size, int flags) {
-        if (flags != 0 || !free_fastpath(ptr, size, true)) {
-                sdallocx_default(ptr, size, flags);
-        }
+	if (flags != 0 || !free_fastpath(ptr, size, true)) {
+		sdallocx_default(ptr, size, flags);
+	}
 }
 
 JEMALLOC_ALWAYS_INLINE void JEMALLOC_NOTHROW
 je_free_impl(void *ptr) {
-        if (!free_fastpath(ptr, 0, false)) {
-                free_default(ptr);
-        }
+	if (!free_fastpath(ptr, 0, false)) {
+		free_default(ptr);
+	}
 }
 
 #endif /* JEMALLOC_INTERNAL_INLINES_C_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_macros.h b/include/jemalloc/internal/jemalloc_internal_macros.h
index 407e868a..eb1ca119 100644
--- a/include/jemalloc/internal/jemalloc_internal_macros.h
+++ b/include/jemalloc/internal/jemalloc_internal_macros.h
@@ -2,45 +2,46 @@
 #define JEMALLOC_INTERNAL_MACROS_H
 
 #ifdef JEMALLOC_DEBUG
-#  define JEMALLOC_ALWAYS_INLINE static inline
+#	define JEMALLOC_ALWAYS_INLINE static inline
 #else
-#  ifdef _MSC_VER
-#    define JEMALLOC_ALWAYS_INLINE static __forceinline
-#  else
-#    define JEMALLOC_ALWAYS_INLINE JEMALLOC_ATTR(always_inline) static inline
-#  endif
+#	ifdef _MSC_VER
+#		define JEMALLOC_ALWAYS_INLINE static __forceinline
+#	else
+#		define JEMALLOC_ALWAYS_INLINE                                 \
+			JEMALLOC_ATTR(always_inline) static inline
+#	endif
 #endif
 #ifdef _MSC_VER
-#  define inline _inline
+#	define inline _inline
 #endif
 
 #define UNUSED JEMALLOC_ATTR(unused)
 
-#define ZU(z)	((size_t)z)
-#define ZD(z)	((ssize_t)z)
-#define QU(q)	((uint64_t)q)
-#define QD(q)	((int64_t)q)
+#define ZU(z) ((size_t)z)
+#define ZD(z) ((ssize_t)z)
+#define QU(q) ((uint64_t)q)
+#define QD(q) ((int64_t)q)
 
-#define KZU(z)	ZU(z##ULL)
-#define KZD(z)	ZD(z##LL)
-#define KQU(q)	QU(q##ULL)
-#define KQD(q)	QI(q##LL)
+#define KZU(z) ZU(z##ULL)
+#define KZD(z) ZD(z##LL)
+#define KQU(q) QU(q##ULL)
+#define KQD(q) QI(q##LL)
 
 #ifndef __DECONST
-#  define	__DECONST(type, var)	((type)(uintptr_t)(const void *)(var))
+#	define __DECONST(type, var) ((type)(uintptr_t)(const void *)(var))
 #endif
 
 #if !defined(JEMALLOC_HAS_RESTRICT) || defined(__cplusplus)
-#  define restrict
+#	define restrict
 #endif
 
 /* Various function pointers are static and immutable except during testing. */
 #ifdef JEMALLOC_JET
-#  define JET_MUTABLE
-#  define JET_EXTERN extern
+#	define JET_MUTABLE
+#	define JET_EXTERN extern
 #else
-#  define JET_MUTABLE const
-#  define JET_EXTERN static
+#	define JET_MUTABLE const
+#	define JET_EXTERN static
 #endif
 
 #define JEMALLOC_VA_ARGS_HEAD(head, ...) head
@@ -48,91 +49,93 @@
 
 /* Diagnostic suppression macros */
 #if defined(_MSC_VER) && !defined(__clang__)
-#  define JEMALLOC_DIAGNOSTIC_PUSH __pragma(warning(push))
-#  define JEMALLOC_DIAGNOSTIC_POP __pragma(warning(pop))
-#  define JEMALLOC_DIAGNOSTIC_IGNORE(W) __pragma(warning(disable:W))
-#  define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
-#  define JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS
-#  define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
-#  define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
-#  define JEMALLOC_DIAGNOSTIC_IGNORE_DEPRECATED
-#  define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
+#	define JEMALLOC_DIAGNOSTIC_PUSH __pragma(warning(push))
+#	define JEMALLOC_DIAGNOSTIC_POP __pragma(warning(pop))
+#	define JEMALLOC_DIAGNOSTIC_IGNORE(W) __pragma(warning(disable : W))
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_DEPRECATED
+#	define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
 /* #pragma GCC diagnostic first appeared in gcc 4.6. */
-#elif (defined(__GNUC__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && \
-  (__GNUC_MINOR__ > 5)))) || defined(__clang__)
+#elif (defined(__GNUC__)                                                       \
+    && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ > 5))))          \
+    || defined(__clang__)
 /*
  * The JEMALLOC_PRAGMA__ macro is an implementation detail of the GCC and Clang
  * diagnostic suppression macros and should not be used anywhere else.
  */
-#  define JEMALLOC_PRAGMA__(X) _Pragma(#X)
-#  define JEMALLOC_DIAGNOSTIC_PUSH JEMALLOC_PRAGMA__(GCC diagnostic push)
-#  define JEMALLOC_DIAGNOSTIC_POP JEMALLOC_PRAGMA__(GCC diagnostic pop)
-#  define JEMALLOC_DIAGNOSTIC_IGNORE(W) \
-     JEMALLOC_PRAGMA__(GCC diagnostic ignored W)
+#	define JEMALLOC_PRAGMA__(X) _Pragma(#X)
+#	define JEMALLOC_DIAGNOSTIC_PUSH JEMALLOC_PRAGMA__(GCC diagnostic push)
+#	define JEMALLOC_DIAGNOSTIC_POP JEMALLOC_PRAGMA__(GCC diagnostic pop)
+#	define JEMALLOC_DIAGNOSTIC_IGNORE(W)                                  \
+		JEMALLOC_PRAGMA__(GCC diagnostic ignored W)
 
 /*
  * The -Wmissing-field-initializers warning is buggy in GCC versions < 5.1 and
  * all clang versions up to version 7 (currently trunk, unreleased).  This macro
  * suppresses the warning for the affected compiler versions only.
  */
-#  if ((defined(__GNUC__) && !defined(__clang__)) && (__GNUC__ < 5)) || \
-     defined(__clang__)
-#    define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS  \
-          JEMALLOC_DIAGNOSTIC_IGNORE("-Wmissing-field-initializers")
-#  else
-#    define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
-#  endif
+#	if ((defined(__GNUC__) && !defined(__clang__)) && (__GNUC__ < 5))     \
+	    || defined(__clang__)
+#		define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS \
+			JEMALLOC_DIAGNOSTIC_IGNORE(                                  \
+			    "-Wmissing-field-initializers")
+#	else
+#		define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
+#	endif
 
-#  define JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS  \
-     JEMALLOC_DIAGNOSTIC_IGNORE("-Wframe-address")
-#  define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS  \
-     JEMALLOC_DIAGNOSTIC_IGNORE("-Wtype-limits")
-#  define JEMALLOC_DIAGNOSTIC_IGNORE_UNUSED_PARAMETER \
-     JEMALLOC_DIAGNOSTIC_IGNORE("-Wunused-parameter")
-#  if defined(__GNUC__) && !defined(__clang__) && (__GNUC__ >= 7)
-#    define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN \
-       JEMALLOC_DIAGNOSTIC_IGNORE("-Walloc-size-larger-than=")
-#  else
-#    define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
-#  endif
-#  ifdef JEMALLOC_HAVE_ATTR_DEPRECATED
-#    define JEMALLOC_DIAGNOSTIC_IGNORE_DEPRECATED \
-       JEMALLOC_DIAGNOSTIC_IGNORE("-Wdeprecated-declarations")
-#  else
-#    define JEMALLOC_DIAGNOSTIC_IGNORE_DEPRECATED
-#  endif
-#  define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS \
-  JEMALLOC_DIAGNOSTIC_PUSH \
-  JEMALLOC_DIAGNOSTIC_IGNORE_UNUSED_PARAMETER
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS                       \
+		JEMALLOC_DIAGNOSTIC_IGNORE("-Wframe-address")
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS                         \
+		JEMALLOC_DIAGNOSTIC_IGNORE("-Wtype-limits")
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_UNUSED_PARAMETER                    \
+		JEMALLOC_DIAGNOSTIC_IGNORE("-Wunused-parameter")
+#	if defined(__GNUC__) && !defined(__clang__) && (__GNUC__ >= 7)
+#		define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN      \
+			JEMALLOC_DIAGNOSTIC_IGNORE("-Walloc-size-larger-than=")
+#	else
+#		define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
+#	endif
+#	ifdef JEMALLOC_HAVE_ATTR_DEPRECATED
+#		define JEMALLOC_DIAGNOSTIC_IGNORE_DEPRECATED                  \
+			JEMALLOC_DIAGNOSTIC_IGNORE("-Wdeprecated-declarations")
+#	else
+#		define JEMALLOC_DIAGNOSTIC_IGNORE_DEPRECATED
+#	endif
+#	define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS                           \
+		JEMALLOC_DIAGNOSTIC_PUSH                                       \
+		JEMALLOC_DIAGNOSTIC_IGNORE_UNUSED_PARAMETER
 #else
-#  define JEMALLOC_DIAGNOSTIC_PUSH
-#  define JEMALLOC_DIAGNOSTIC_POP
-#  define JEMALLOC_DIAGNOSTIC_IGNORE(W)
-#  define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
-#  define JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS
-#  define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
-#  define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
-#  define JEMALLOC_DIAGNOSTIC_IGNORE_DEPRECATED
-#  define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
+#	define JEMALLOC_DIAGNOSTIC_PUSH
+#	define JEMALLOC_DIAGNOSTIC_POP
+#	define JEMALLOC_DIAGNOSTIC_IGNORE(W)
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
+#	define JEMALLOC_DIAGNOSTIC_IGNORE_DEPRECATED
+#	define JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
 #endif
 
 #ifdef __clang_analyzer__
-#  define JEMALLOC_CLANG_ANALYZER
+#	define JEMALLOC_CLANG_ANALYZER
 #endif
 
 #ifdef JEMALLOC_CLANG_ANALYZER
-#  define JEMALLOC_CLANG_ANALYZER_SUPPRESS __attribute__((suppress))
-#  define JEMALLOC_CLANG_ANALYZER_SILENCE_INIT(v) = v
+#	define JEMALLOC_CLANG_ANALYZER_SUPPRESS __attribute__((suppress))
+#	define JEMALLOC_CLANG_ANALYZER_SILENCE_INIT(v) = v
 #else
-#  define JEMALLOC_CLANG_ANALYZER_SUPPRESS
-#  define JEMALLOC_CLANG_ANALYZER_SILENCE_INIT(v)
+#	define JEMALLOC_CLANG_ANALYZER_SUPPRESS
+#	define JEMALLOC_CLANG_ANALYZER_SILENCE_INIT(v)
 #endif
 
-#define JEMALLOC_SUPPRESS_WARN_ON_USAGE(...) \
-   JEMALLOC_DIAGNOSTIC_PUSH \
-   JEMALLOC_DIAGNOSTIC_IGNORE_DEPRECATED \
-   __VA_ARGS__ \
-   JEMALLOC_DIAGNOSTIC_POP
+#define JEMALLOC_SUPPRESS_WARN_ON_USAGE(...)                                   \
+	JEMALLOC_DIAGNOSTIC_PUSH                                               \
+	JEMALLOC_DIAGNOSTIC_IGNORE_DEPRECATED                                  \
+	__VA_ARGS__                                                            \
+	JEMALLOC_DIAGNOSTIC_POP
 
 /*
  * Disables spurious diagnostics for all headers.  Since these headers are not
diff --git a/include/jemalloc/internal/jemalloc_internal_overrides.h b/include/jemalloc/internal/jemalloc_internal_overrides.h
index 5fbbe249..bf74a612 100644
--- a/include/jemalloc/internal/jemalloc_internal_overrides.h
+++ b/include/jemalloc/internal/jemalloc_internal_overrides.h
@@ -9,13 +9,14 @@
  */
 
 #ifdef JEMALLOC_OVERRIDE_LG_PAGE
-    #undef LG_PAGE
-    #define LG_PAGE JEMALLOC_OVERRIDE_LG_PAGE
+#	undef LG_PAGE
+#	define LG_PAGE JEMALLOC_OVERRIDE_LG_PAGE
 #endif
 
 #ifdef JEMALLOC_OVERRIDE_JEMALLOC_CONFIG_MALLOC_CONF
-	#undef JEMALLOC_CONFIG_MALLOC_CONF
-	#define JEMALLOC_CONFIG_MALLOC_CONF JEMALLOC_OVERRIDE_JEMALLOC_CONFIG_MALLOC_CONF
+#	undef JEMALLOC_CONFIG_MALLOC_CONF
+#	define JEMALLOC_CONFIG_MALLOC_CONF                                    \
+		JEMALLOC_OVERRIDE_JEMALLOC_CONFIG_MALLOC_CONF
 #endif
 
 #endif /* JEMALLOC_INTERNAL_OVERRIDES_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_types.h b/include/jemalloc/internal/jemalloc_internal_types.h
index cddbfb65..0ade5461 100644
--- a/include/jemalloc/internal/jemalloc_internal_types.h
+++ b/include/jemalloc/internal/jemalloc_internal_types.h
@@ -18,13 +18,13 @@ enum zero_realloc_action_e {
 typedef enum zero_realloc_action_e zero_realloc_action_t;
 
 /* Signature of write callback. */
-typedef void (write_cb_t)(void *, const char *);
+typedef void(write_cb_t)(void *, const char *);
 
 enum malloc_init_e {
-	malloc_init_uninitialized	= 3,
-	malloc_init_a0_initialized	= 2,
-	malloc_init_recursible		= 1,
-	malloc_init_initialized		= 0 /* Common case --> jnz. */
+	malloc_init_uninitialized = 3,
+	malloc_init_a0_initialized = 2,
+	malloc_init_recursible = 1,
+	malloc_init_initialized = 0 /* Common case --> jnz. */
 };
 typedef enum malloc_init_e malloc_init_t;
 
@@ -39,48 +39,46 @@ typedef enum malloc_init_e malloc_init_t;
  *
  * aaaaaaaa aaaatttt tttttttt 0znnnnnn
  */
-#define MALLOCX_ARENA_BITS	12
-#define MALLOCX_TCACHE_BITS	12
-#define MALLOCX_LG_ALIGN_BITS	6
-#define MALLOCX_ARENA_SHIFT	20
-#define MALLOCX_TCACHE_SHIFT	8
-#define MALLOCX_ARENA_MASK \
-    ((unsigned)(((1U << MALLOCX_ARENA_BITS) - 1) << MALLOCX_ARENA_SHIFT))
+#define MALLOCX_ARENA_BITS 12
+#define MALLOCX_TCACHE_BITS 12
+#define MALLOCX_LG_ALIGN_BITS 6
+#define MALLOCX_ARENA_SHIFT 20
+#define MALLOCX_TCACHE_SHIFT 8
+#define MALLOCX_ARENA_MASK                                                     \
+	((unsigned)(((1U << MALLOCX_ARENA_BITS) - 1) << MALLOCX_ARENA_SHIFT))
 /* NB: Arena index bias decreases the maximum number of arenas by 1. */
-#define MALLOCX_ARENA_LIMIT	((unsigned)((1U << MALLOCX_ARENA_BITS) - 1))
-#define MALLOCX_TCACHE_MASK \
-    ((unsigned)(((1U << MALLOCX_TCACHE_BITS) - 1) << MALLOCX_TCACHE_SHIFT))
-#define MALLOCX_TCACHE_MAX	((unsigned)((1U << MALLOCX_TCACHE_BITS) - 3))
-#define MALLOCX_LG_ALIGN_MASK	((1 << MALLOCX_LG_ALIGN_BITS) - 1)
+#define MALLOCX_ARENA_LIMIT ((unsigned)((1U << MALLOCX_ARENA_BITS) - 1))
+#define MALLOCX_TCACHE_MASK                                                    \
+	((unsigned)(((1U << MALLOCX_TCACHE_BITS) - 1) << MALLOCX_TCACHE_SHIFT))
+#define MALLOCX_TCACHE_MAX ((unsigned)((1U << MALLOCX_TCACHE_BITS) - 3))
+#define MALLOCX_LG_ALIGN_MASK ((1 << MALLOCX_LG_ALIGN_BITS) - 1)
 /* Use MALLOCX_ALIGN_GET() if alignment may not be specified in flags. */
-#define MALLOCX_ALIGN_GET_SPECIFIED(flags)				\
-    (ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK))
-#define MALLOCX_ALIGN_GET(flags)					\
-    (MALLOCX_ALIGN_GET_SPECIFIED(flags) & (SIZE_T_MAX-1))
-#define MALLOCX_ZERO_GET(flags)						\
-    ((bool)(flags & MALLOCX_ZERO))
+#define MALLOCX_ALIGN_GET_SPECIFIED(flags)                                     \
+	(ZU(1) << (flags & MALLOCX_LG_ALIGN_MASK))
+#define MALLOCX_ALIGN_GET(flags)                                               \
+	(MALLOCX_ALIGN_GET_SPECIFIED(flags) & (SIZE_T_MAX - 1))
+#define MALLOCX_ZERO_GET(flags) ((bool)(flags & MALLOCX_ZERO))
 
-#define MALLOCX_TCACHE_GET(flags)					\
-    (((unsigned)((flags & MALLOCX_TCACHE_MASK) >> MALLOCX_TCACHE_SHIFT)) - 2)
-#define MALLOCX_ARENA_GET(flags)					\
-    (((unsigned)(((unsigned)flags) >> MALLOCX_ARENA_SHIFT)) - 1)
+#define MALLOCX_TCACHE_GET(flags)                                              \
+	(((unsigned)((flags & MALLOCX_TCACHE_MASK) >> MALLOCX_TCACHE_SHIFT))   \
+	    - 2)
+#define MALLOCX_ARENA_GET(flags)                                               \
+	(((unsigned)(((unsigned)flags) >> MALLOCX_ARENA_SHIFT)) - 1)
 
 /* Smallest size class to support. */
-#define TINY_MIN		(1U << LG_TINY_MIN)
+#define TINY_MIN (1U << LG_TINY_MIN)
 
-#define LONG			((size_t)(1U << LG_SIZEOF_LONG))
-#define LONG_MASK		(LONG - 1)
+#define LONG ((size_t)(1U << LG_SIZEOF_LONG))
+#define LONG_MASK (LONG - 1)
 
 /* Return the smallest long multiple that is >= a. */
-#define LONG_CEILING(a)							\
-	(((a) + LONG_MASK) & ~LONG_MASK)
+#define LONG_CEILING(a) (((a) + LONG_MASK) & ~LONG_MASK)
 
-#define SIZEOF_PTR		(1U << LG_SIZEOF_PTR)
-#define PTR_MASK		(SIZEOF_PTR - 1)
+#define SIZEOF_PTR (1U << LG_SIZEOF_PTR)
+#define PTR_MASK (SIZEOF_PTR - 1)
 
 /* Return the smallest (void *) multiple that is >= a. */
-#define PTR_CEILING(a)							\
-	(((a) + PTR_MASK) & ~PTR_MASK)
+#define PTR_CEILING(a) (((a) + PTR_MASK) & ~PTR_MASK)
 
 /*
  * Maximum size of L1 cache line.  This is used to avoid cache line aliasing.
@@ -89,25 +87,24 @@ typedef enum malloc_init_e malloc_init_t;
  * CACHELINE cannot be based on LG_CACHELINE because __declspec(align()) can
  * only handle raw constants.
  */
-#define LG_CACHELINE		6
-#define CACHELINE		64
-#define CACHELINE_MASK		(CACHELINE - 1)
+#define LG_CACHELINE 6
+#define CACHELINE 64
+#define CACHELINE_MASK (CACHELINE - 1)
 
 /* Return the smallest cacheline multiple that is >= s. */
-#define CACHELINE_CEILING(s)						\
-	(((s) + CACHELINE_MASK) & ~CACHELINE_MASK)
+#define CACHELINE_CEILING(s) (((s) + CACHELINE_MASK) & ~CACHELINE_MASK)
 
 /* Return the nearest aligned address at or below a. */
-#define ALIGNMENT_ADDR2BASE(a, alignment)				\
-	((void *)(((byte_t *)(a)) - (((uintptr_t)(a)) -			\
-	    ((uintptr_t)(a) & ((~(alignment)) + 1)))))
+#define ALIGNMENT_ADDR2BASE(a, alignment)                                      \
+	((void *)(((byte_t *)(a))                                              \
+	    - (((uintptr_t)(a)) - ((uintptr_t)(a) & ((~(alignment)) + 1)))))
 
 /* Return the offset between a and the nearest aligned address at or below a. */
-#define ALIGNMENT_ADDR2OFFSET(a, alignment)				\
+#define ALIGNMENT_ADDR2OFFSET(a, alignment)                                    \
 	((size_t)((uintptr_t)(a) & (alignment - 1)))
 
 /* Return the smallest alignment multiple that is >= s. */
-#define ALIGNMENT_CEILING(s, alignment)					\
+#define ALIGNMENT_CEILING(s, alignment)                                        \
 	(((s) + (alignment - 1)) & ((~(alignment)) + 1))
 
 /*
@@ -119,30 +116,31 @@ typedef enum malloc_init_e malloc_init_t;
  * provenance from the compiler. See the block-comment on the
  * definition of `byte_t` for more details.
  */
-#define ALIGNMENT_ADDR2CEILING(a, alignment)				\
-	((void *)(((byte_t *)(a)) + (((((uintptr_t)(a)) +		\
-	    (alignment - 1)) & ((~(alignment)) + 1)) - ((uintptr_t)(a)))))
+#define ALIGNMENT_ADDR2CEILING(a, alignment)                                   \
+	((void *)(((byte_t *)(a))                                              \
+	    + (((((uintptr_t)(a)) + (alignment - 1)) & ((~(alignment)) + 1))   \
+	        - ((uintptr_t)(a)))))
 
 /* Declare a variable-length array. */
 #if __STDC_VERSION__ < 199901L || defined(__STDC_NO_VLA__)
-#  ifdef _MSC_VER
-#    include <malloc.h>
-#    define alloca _alloca
-#  else
-#    ifdef JEMALLOC_HAS_ALLOCA_H
-#      include <alloca.h>
-#    else
-#      include <stdlib.h>
-#    endif
-#  endif
-#  define VARIABLE_ARRAY_UNSAFE(type, name, count) \
-	type *name = alloca(sizeof(type) * (count))
+#	ifdef _MSC_VER
+#		include <malloc.h>
+#		define alloca _alloca
+#	else
+#		ifdef JEMALLOC_HAS_ALLOCA_H
+#			include <alloca.h>
+#		else
+#			include <stdlib.h>
+#		endif
+#	endif
+#	define VARIABLE_ARRAY_UNSAFE(type, name, count)                       \
+		type *name = alloca(sizeof(type) * (count))
 #else
-#  define VARIABLE_ARRAY_UNSAFE(type, name, count) type name[(count)]
+#	define VARIABLE_ARRAY_UNSAFE(type, name, count) type name[(count)]
 #endif
-#define VARIABLE_ARRAY_SIZE_MAX	2048
-#define VARIABLE_ARRAY(type, name, count)	\
-	assert(sizeof(type) * (count) <= VARIABLE_ARRAY_SIZE_MAX);	\
+#define VARIABLE_ARRAY_SIZE_MAX 2048
+#define VARIABLE_ARRAY(type, name, count)                                      \
+	assert(sizeof(type) * (count) <= VARIABLE_ARRAY_SIZE_MAX);             \
 	VARIABLE_ARRAY_UNSAFE(type, name, count)
 
 #define CALLOC_MADVISE_THRESHOLD_DEFAULT (((size_t)1) << 23) /* 8 MB */
diff --git a/include/jemalloc/internal/large_externs.h b/include/jemalloc/internal/large_externs.h
index ce9c8689..7cee6752 100644
--- a/include/jemalloc/internal/large_externs.h
+++ b/include/jemalloc/internal/large_externs.h
@@ -6,20 +6,20 @@
 #include "jemalloc/internal/hook.h"
 
 void *large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero);
-void *large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
-    bool zero);
-bool large_ralloc_no_move(tsdn_t *tsdn, edata_t *edata, size_t usize_min,
-    size_t usize_max, bool zero);
+void *large_palloc(
+    tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool zero);
+bool  large_ralloc_no_move(tsdn_t *tsdn, edata_t *edata, size_t usize_min,
+     size_t usize_max, bool zero);
 void *large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
     size_t alignment, bool zero, tcache_t *tcache,
     hook_ralloc_args_t *hook_args);
 
-void large_dalloc_prep_locked(tsdn_t *tsdn, edata_t *edata);
-void large_dalloc_finish(tsdn_t *tsdn, edata_t *edata);
-void large_dalloc(tsdn_t *tsdn, edata_t *edata);
+void   large_dalloc_prep_locked(tsdn_t *tsdn, edata_t *edata);
+void   large_dalloc_finish(tsdn_t *tsdn, edata_t *edata);
+void   large_dalloc(tsdn_t *tsdn, edata_t *edata);
 size_t large_salloc(tsdn_t *tsdn, const edata_t *edata);
-void large_prof_info_get(tsd_t *tsd, edata_t *edata, prof_info_t *prof_info,
-    bool reset_recent);
+void   large_prof_info_get(
+      tsd_t *tsd, edata_t *edata, prof_info_t *prof_info, bool reset_recent);
 void large_prof_tctx_reset(edata_t *edata);
 void large_prof_info_set(edata_t *edata, prof_tctx_t *tctx, size_t size);
 
diff --git a/include/jemalloc/internal/lockedint.h b/include/jemalloc/internal/lockedint.h
index 062dedbf..46aba8ff 100644
--- a/include/jemalloc/internal/lockedint.h
+++ b/include/jemalloc/internal/lockedint.h
@@ -30,33 +30,34 @@ struct locked_zu_s {
 };
 
 #ifndef JEMALLOC_ATOMIC_U64
-#  define LOCKEDINT_MTX_DECLARE(name) malloc_mutex_t name;
-#  define LOCKEDINT_MTX_INIT(mu, name, rank, rank_mode)			\
-    malloc_mutex_init(&(mu), name, rank, rank_mode)
-#  define LOCKEDINT_MTX(mtx) (&(mtx))
-#  define LOCKEDINT_MTX_LOCK(tsdn, mu) malloc_mutex_lock(tsdn, &(mu))
-#  define LOCKEDINT_MTX_UNLOCK(tsdn, mu) malloc_mutex_unlock(tsdn, &(mu))
-#  define LOCKEDINT_MTX_PREFORK(tsdn, mu) malloc_mutex_prefork(tsdn, &(mu))
-#  define LOCKEDINT_MTX_POSTFORK_PARENT(tsdn, mu)			\
-    malloc_mutex_postfork_parent(tsdn, &(mu))
-#  define LOCKEDINT_MTX_POSTFORK_CHILD(tsdn, mu)			\
-    malloc_mutex_postfork_child(tsdn, &(mu))
+#	define LOCKEDINT_MTX_DECLARE(name) malloc_mutex_t name;
+#	define LOCKEDINT_MTX_INIT(mu, name, rank, rank_mode)                  \
+		malloc_mutex_init(&(mu), name, rank, rank_mode)
+#	define LOCKEDINT_MTX(mtx) (&(mtx))
+#	define LOCKEDINT_MTX_LOCK(tsdn, mu) malloc_mutex_lock(tsdn, &(mu))
+#	define LOCKEDINT_MTX_UNLOCK(tsdn, mu) malloc_mutex_unlock(tsdn, &(mu))
+#	define LOCKEDINT_MTX_PREFORK(tsdn, mu)                                \
+		malloc_mutex_prefork(tsdn, &(mu))
+#	define LOCKEDINT_MTX_POSTFORK_PARENT(tsdn, mu)                        \
+		malloc_mutex_postfork_parent(tsdn, &(mu))
+#	define LOCKEDINT_MTX_POSTFORK_CHILD(tsdn, mu)                         \
+		malloc_mutex_postfork_child(tsdn, &(mu))
 #else
-#  define LOCKEDINT_MTX_DECLARE(name)
-#  define LOCKEDINT_MTX(mtx) NULL
-#  define LOCKEDINT_MTX_INIT(mu, name, rank, rank_mode) false
-#  define LOCKEDINT_MTX_LOCK(tsdn, mu)
-#  define LOCKEDINT_MTX_UNLOCK(tsdn, mu)
-#  define LOCKEDINT_MTX_PREFORK(tsdn, mu)
-#  define LOCKEDINT_MTX_POSTFORK_PARENT(tsdn, mu)
-#  define LOCKEDINT_MTX_POSTFORK_CHILD(tsdn, mu)
+#	define LOCKEDINT_MTX_DECLARE(name)
+#	define LOCKEDINT_MTX(mtx) NULL
+#	define LOCKEDINT_MTX_INIT(mu, name, rank, rank_mode) false
+#	define LOCKEDINT_MTX_LOCK(tsdn, mu)
+#	define LOCKEDINT_MTX_UNLOCK(tsdn, mu)
+#	define LOCKEDINT_MTX_PREFORK(tsdn, mu)
+#	define LOCKEDINT_MTX_POSTFORK_PARENT(tsdn, mu)
+#	define LOCKEDINT_MTX_POSTFORK_CHILD(tsdn, mu)
 #endif
 
 #ifdef JEMALLOC_ATOMIC_U64
-#  define LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx) assert((mtx) == NULL)
+#	define LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx) assert((mtx) == NULL)
 #else
-#  define LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx)			\
-    malloc_mutex_assert_owner(tsdn, (mtx))
+#	define LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx)                       \
+		malloc_mutex_assert_owner(tsdn, (mtx))
 #endif
 
 static inline uint64_t
@@ -70,8 +71,7 @@ locked_read_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p) {
 }
 
 static inline void
-locked_inc_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p,
-    uint64_t x) {
+locked_inc_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p, uint64_t x) {
 	LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx);
 #ifdef JEMALLOC_ATOMIC_U64
 	atomic_fetch_add_u64(&p->val, x, ATOMIC_RELAXED);
@@ -81,8 +81,7 @@ locked_inc_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p,
 }
 
 static inline void
-locked_dec_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p,
-    uint64_t x) {
+locked_dec_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p, uint64_t x) {
 	LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx);
 #ifdef JEMALLOC_ATOMIC_U64
 	uint64_t r = atomic_fetch_sub_u64(&p->val, x, ATOMIC_RELAXED);
@@ -99,7 +98,7 @@ locked_inc_mod_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p,
     const uint64_t x, const uint64_t modulus) {
 	LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx);
 	uint64_t before, after;
-	bool overflow;
+	bool     overflow;
 #ifdef JEMALLOC_ATOMIC_U64
 	before = atomic_load_u64(&p->val, ATOMIC_RELAXED);
 	do {
@@ -109,8 +108,8 @@ locked_inc_mod_u64(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_u64_t *p,
 		if (overflow) {
 			after %= modulus;
 		}
-	} while (!atomic_compare_exchange_weak_u64(&p->val, &before, after,
-	    ATOMIC_RELAXED, ATOMIC_RELAXED));
+	} while (!atomic_compare_exchange_weak_u64(
+	    &p->val, &before, after, ATOMIC_RELAXED, ATOMIC_RELAXED));
 #else
 	before = p->val;
 	after = before + x;
@@ -167,8 +166,7 @@ locked_read_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p) {
 }
 
 static inline void
-locked_inc_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p,
-    size_t x) {
+locked_inc_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p, size_t x) {
 	LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx);
 #ifdef JEMALLOC_ATOMIC_U64
 	atomic_fetch_add_zu(&p->val, x, ATOMIC_RELAXED);
@@ -179,8 +177,7 @@ locked_inc_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p,
 }
 
 static inline void
-locked_dec_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p,
-    size_t x) {
+locked_dec_zu(tsdn_t *tsdn, malloc_mutex_t *mtx, locked_zu_t *p, size_t x) {
 	LOCKEDINT_MTX_ASSERT_INTERNAL(tsdn, mtx);
 #ifdef JEMALLOC_ATOMIC_U64
 	size_t r = atomic_fetch_sub_zu(&p->val, x, ATOMIC_RELAXED);
diff --git a/include/jemalloc/internal/log.h b/include/jemalloc/internal/log.h
index 7b074abd..f213beda 100644
--- a/include/jemalloc/internal/log.h
+++ b/include/jemalloc/internal/log.h
@@ -7,9 +7,9 @@
 #include "jemalloc/internal/mutex.h"
 
 #ifdef JEMALLOC_LOG
-#  define JEMALLOC_LOG_VAR_BUFSIZE 1000
+#	define JEMALLOC_LOG_VAR_BUFSIZE 1000
 #else
-#  define JEMALLOC_LOG_VAR_BUFSIZE 1
+#	define JEMALLOC_LOG_VAR_BUFSIZE 1
 #endif
 
 #define JEMALLOC_LOG_BUFSIZE 4096
@@ -36,7 +36,7 @@
  * statements.
  */
 
-extern char log_var_names[JEMALLOC_LOG_VAR_BUFSIZE];
+extern char       log_var_names[JEMALLOC_LOG_VAR_BUFSIZE];
 extern atomic_b_t log_init_done;
 
 typedef struct log_var_s log_var_t;
@@ -45,7 +45,7 @@ struct log_var_s {
 	 * Lowest bit is "inited", second lowest is "enabled".  Putting them in
 	 * a single word lets us avoid any fences on weak architectures.
 	 */
-	atomic_u_t state;
+	atomic_u_t  state;
 	const char *name;
 };
 
@@ -53,7 +53,8 @@ struct log_var_s {
 #define LOG_INITIALIZED_NOT_ENABLED 1U
 #define LOG_ENABLED 2U
 
-#define LOG_VAR_INIT(name_str) {ATOMIC_INIT(LOG_NOT_INITIALIZED), name_str}
+#define LOG_VAR_INIT(name_str)                                                 \
+	{ ATOMIC_INIT(LOG_NOT_INITIALIZED), name_str }
 
 /*
  * Returns the value we should assume for state (which is not necessarily
@@ -63,21 +64,21 @@ struct log_var_s {
 unsigned log_var_update_state(log_var_t *log_var);
 
 /* We factor out the metadata management to allow us to test more easily. */
-#define log_do_begin(log_var)						\
-if (config_log) {							\
-	unsigned log_state = atomic_load_u(&(log_var).state,		\
-	    ATOMIC_RELAXED);						\
-	if (unlikely(log_state == LOG_NOT_INITIALIZED)) {		\
-		log_state = log_var_update_state(&(log_var));		\
-		assert(log_state != LOG_NOT_INITIALIZED);		\
-	}								\
-	if (log_state == LOG_ENABLED) {					\
-		{
-			/* User code executes here. */
-#define log_do_end(log_var)						\
-		}							\
-	}								\
-}
+#define log_do_begin(log_var)                                                  \
+	if (config_log) {                                                      \
+		unsigned log_state = atomic_load_u(                            \
+		    &(log_var).state, ATOMIC_RELAXED);                         \
+		if (unlikely(log_state == LOG_NOT_INITIALIZED)) {              \
+			log_state = log_var_update_state(&(log_var));          \
+			assert(log_state != LOG_NOT_INITIALIZED);              \
+		}                                                              \
+		if (log_state == LOG_ENABLED) {                                \
+			{
+/* User code executes here. */
+#define log_do_end(log_var)                                                    \
+	}                                                                      \
+	}                                                                      \
+	}
 
 /*
  * MSVC has some preprocessor bugs in its expansion of __VA_ARGS__ during
@@ -88,28 +89,29 @@ if (config_log) {							\
  */
 static inline void
 log_impl_varargs(const char *name, ...) {
-	char buf[JEMALLOC_LOG_BUFSIZE];
+	char    buf[JEMALLOC_LOG_BUFSIZE];
 	va_list ap;
 
 	va_start(ap, name);
 	const char *format = va_arg(ap, const char *);
-	size_t dst_offset = 0;
+	size_t      dst_offset = 0;
 	dst_offset += malloc_snprintf(buf, JEMALLOC_LOG_BUFSIZE, "%s: ", name);
-	dst_offset += malloc_vsnprintf(buf + dst_offset,
-	    JEMALLOC_LOG_BUFSIZE - dst_offset, format, ap);
-	malloc_snprintf(buf + dst_offset, JEMALLOC_LOG_BUFSIZE - dst_offset, "\n");
+	dst_offset += malloc_vsnprintf(
+	    buf + dst_offset, JEMALLOC_LOG_BUFSIZE - dst_offset, format, ap);
+	malloc_snprintf(
+	    buf + dst_offset, JEMALLOC_LOG_BUFSIZE - dst_offset, "\n");
 	va_end(ap);
 
 	malloc_write(buf);
 }
 
 /* Call as log("log.var.str", "format_string %d", arg_for_format_string); */
-#define LOG(log_var_str, ...)						\
-do {									\
-	static log_var_t log_var = LOG_VAR_INIT(log_var_str);		\
-	log_do_begin(log_var)						\
-		log_impl_varargs((log_var).name, __VA_ARGS__);		\
-	log_do_end(log_var)						\
-} while (0)
+#define LOG(log_var_str, ...)                                                  \
+	do {                                                                   \
+		static log_var_t log_var = LOG_VAR_INIT(log_var_str);          \
+		log_do_begin(log_var)                                          \
+		    log_impl_varargs((log_var).name, __VA_ARGS__);             \
+		log_do_end(log_var)                                            \
+	} while (0)
 
 #endif /* JEMALLOC_INTERNAL_LOG_H */
diff --git a/include/jemalloc/internal/malloc_io.h b/include/jemalloc/internal/malloc_io.h
index 9c7c6ec2..0f82f678 100644
--- a/include/jemalloc/internal/malloc_io.h
+++ b/include/jemalloc/internal/malloc_io.h
@@ -5,64 +5,63 @@
 #include "jemalloc/internal/jemalloc_internal_types.h"
 
 #ifdef _WIN32
-#  ifdef _WIN64
-#    define FMT64_PREFIX "ll"
-#    define FMTPTR_PREFIX "ll"
-#  else
-#    define FMT64_PREFIX "ll"
-#    define FMTPTR_PREFIX ""
-#  endif
-#  define FMTd32 "d"
-#  define FMTu32 "u"
-#  define FMTx32 "x"
-#  define FMTd64 FMT64_PREFIX "d"
-#  define FMTu64 FMT64_PREFIX "u"
-#  define FMTx64 FMT64_PREFIX "x"
-#  define FMTdPTR FMTPTR_PREFIX "d"
-#  define FMTuPTR FMTPTR_PREFIX "u"
-#  define FMTxPTR FMTPTR_PREFIX "x"
+#	ifdef _WIN64
+#		define FMT64_PREFIX "ll"
+#		define FMTPTR_PREFIX "ll"
+#	else
+#		define FMT64_PREFIX "ll"
+#		define FMTPTR_PREFIX ""
+#	endif
+#	define FMTd32 "d"
+#	define FMTu32 "u"
+#	define FMTx32 "x"
+#	define FMTd64 FMT64_PREFIX "d"
+#	define FMTu64 FMT64_PREFIX "u"
+#	define FMTx64 FMT64_PREFIX "x"
+#	define FMTdPTR FMTPTR_PREFIX "d"
+#	define FMTuPTR FMTPTR_PREFIX "u"
+#	define FMTxPTR FMTPTR_PREFIX "x"
 #else
-#  include <inttypes.h>
-#  define FMTd32 PRId32
-#  define FMTu32 PRIu32
-#  define FMTx32 PRIx32
-#  define FMTd64 PRId64
-#  define FMTu64 PRIu64
-#  define FMTx64 PRIx64
-#  define FMTdPTR PRIdPTR
-#  define FMTuPTR PRIuPTR
-#  define FMTxPTR PRIxPTR
+#	include <inttypes.h>
+#	define FMTd32 PRId32
+#	define FMTu32 PRIu32
+#	define FMTx32 PRIx32
+#	define FMTd64 PRId64
+#	define FMTu64 PRIu64
+#	define FMTx64 PRIx64
+#	define FMTdPTR PRIdPTR
+#	define FMTuPTR PRIuPTR
+#	define FMTxPTR PRIxPTR
 #endif
 
 /* Size of stack-allocated buffer passed to buferror(). */
-#define BUFERROR_BUF		64
+#define BUFERROR_BUF 64
 
 /*
  * Size of stack-allocated buffer used by malloc_{,v,vc}printf().  This must be
  * large enough for all possible uses within jemalloc.
  */
-#define MALLOC_PRINTF_BUFSIZE	4096
+#define MALLOC_PRINTF_BUFSIZE 4096
 
 write_cb_t wrtmessage;
-int buferror(int err, char *buf, size_t buflen);
-uintmax_t malloc_strtoumax(const char *restrict nptr, char **restrict endptr,
-    int base);
+int        buferror(int err, char *buf, size_t buflen);
+uintmax_t  malloc_strtoumax(
+     const char *restrict nptr, char **restrict endptr, int base);
 void malloc_write(const char *s);
 
 /*
  * malloc_vsnprintf() supports a subset of snprintf(3) that avoids floating
  * point math.
  */
-size_t malloc_vsnprintf(char *str, size_t size, const char *format,
-    va_list ap);
+size_t malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap);
 size_t malloc_snprintf(char *str, size_t size, const char *format, ...)
     JEMALLOC_FORMAT_PRINTF(3, 4);
 /*
  * The caller can set write_cb to null to choose to print with the
  * je_malloc_message hook.
  */
-void malloc_vcprintf(write_cb_t *write_cb, void *cbopaque, const char *format,
-    va_list ap);
+void malloc_vcprintf(
+    write_cb_t *write_cb, void *cbopaque, const char *format, va_list ap);
 void malloc_cprintf(write_cb_t *write_cb, void *cbopaque, const char *format,
     ...) JEMALLOC_FORMAT_PRINTF(3, 4);
 void malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
@@ -81,10 +80,10 @@ malloc_write_fd_syscall(int fd, const void *buf, size_t count) {
 	long result = syscall(SYS_write, fd, buf, count);
 #else
 	ssize_t result = (ssize_t)write(fd, buf,
-#ifdef _WIN32
+#	ifdef _WIN32
 	    (unsigned int)
-#endif
-	    count);
+#	endif
+	        count);
 #endif
 	return (ssize_t)result;
 }
@@ -110,10 +109,10 @@ malloc_read_fd_syscall(int fd, void *buf, size_t count) {
 	long result = syscall(SYS_read, fd, buf, count);
 #else
 	ssize_t result = read(fd, buf,
-#ifdef _WIN32
+#	ifdef _WIN32
 	    (unsigned int)
-#endif
-	    count);
+#	endif
+	        count);
 #endif
 	return (ssize_t)result;
 }
@@ -122,8 +121,8 @@ static inline ssize_t
 malloc_read_fd(int fd, void *buf, size_t count) {
 	size_t bytes_read = 0;
 	do {
-		ssize_t result = malloc_read_fd_syscall(fd,
-		    &((byte_t *)buf)[bytes_read], count - bytes_read);
+		ssize_t result = malloc_read_fd_syscall(
+		    fd, &((byte_t *)buf)[bytes_read], count - bytes_read);
 		if (result < 0) {
 			return result;
 		} else if (result == 0) {
@@ -134,7 +133,8 @@ malloc_read_fd(int fd, void *buf, size_t count) {
 	return bytes_read;
 }
 
-static inline int malloc_open(const char *path, int flags) {
+static inline int
+malloc_open(const char *path, int flags) {
 	int fd;
 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
 	fd = (int)syscall(SYS_open, path, flags);
@@ -146,7 +146,8 @@ static inline int malloc_open(const char *path, int flags) {
 	return fd;
 }
 
-static inline int malloc_close(int fd) {
+static inline int
+malloc_close(int fd) {
 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
 	return (int)syscall(SYS_close, fd);
 #else
@@ -154,11 +155,12 @@ static inline int malloc_close(int fd) {
 #endif
 }
 
-static inline off_t malloc_lseek(int fd, off_t offset, int whence) {
+static inline off_t
+malloc_lseek(int fd, off_t offset, int whence) {
 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_lseek)
-  return (off_t)syscall(SYS_lseek, fd, offset, whence);
+	return (off_t)syscall(SYS_lseek, fd, offset, whence);
 #else
-  return lseek(fd, offset, whence);
+	return lseek(fd, offset, whence);
 #endif
 }
 
diff --git a/include/jemalloc/internal/mutex.h b/include/jemalloc/internal/mutex.h
index db2bdf37..943c7928 100644
--- a/include/jemalloc/internal/mutex.h
+++ b/include/jemalloc/internal/mutex.h
@@ -31,7 +31,7 @@ struct malloc_mutex_s {
 			 * avoid prefetching a modified cacheline (for the
 			 * unlocking thread).
 			 */
-			mutex_prof_data_t	prof_data;
+			mutex_prof_data_t prof_data;
 			/*
 			 * Hint flag to avoid exclusive cache line contention
 			 * during spin waiting.  Placed along with prof_data
@@ -39,20 +39,20 @@ struct malloc_mutex_s {
 			 * Modified by the lock owner only (after acquired, and
 			 * before release), and may be read by other threads.
 			 */
-			atomic_b_t		locked;
+			atomic_b_t locked;
 #ifdef _WIN32
-#  if _WIN32_WINNT >= 0x0600
-			SRWLOCK         	lock;
-#  else
-			CRITICAL_SECTION	lock;
-#  endif
+#	if _WIN32_WINNT >= 0x0600
+			SRWLOCK lock;
+#	else
+			CRITICAL_SECTION lock;
+#	endif
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
-			os_unfair_lock		lock;
+			os_unfair_lock lock;
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
-			pthread_mutex_t		lock;
-			malloc_mutex_t		*postponed_next;
+			pthread_mutex_t lock;
+			malloc_mutex_t *postponed_next;
 #else
-			pthread_mutex_t		lock;
+			pthread_mutex_t lock;
 #endif
 		};
 		/*
@@ -62,82 +62,118 @@ struct malloc_mutex_s {
 		 * memory cost.
 		 */
 #if !defined(JEMALLOC_DEBUG)
-		witness_t			witness;
-		malloc_mutex_lock_order_t	lock_order;
+		witness_t                 witness;
+		malloc_mutex_lock_order_t lock_order;
 #endif
 	};
 
 #if defined(JEMALLOC_DEBUG)
-	witness_t			witness;
-	malloc_mutex_lock_order_t	lock_order;
+	witness_t                 witness;
+	malloc_mutex_lock_order_t lock_order;
 #endif
 };
 
 #ifdef _WIN32
-#  if _WIN32_WINNT >= 0x0600
-#    define MALLOC_MUTEX_LOCK(m)    AcquireSRWLockExclusive(&(m)->lock)
-#    define MALLOC_MUTEX_UNLOCK(m)  ReleaseSRWLockExclusive(&(m)->lock)
-#    define MALLOC_MUTEX_TRYLOCK(m) (!TryAcquireSRWLockExclusive(&(m)->lock))
-#  else
-#    define MALLOC_MUTEX_LOCK(m)    EnterCriticalSection(&(m)->lock)
-#    define MALLOC_MUTEX_UNLOCK(m)  LeaveCriticalSection(&(m)->lock)
-#    define MALLOC_MUTEX_TRYLOCK(m) (!TryEnterCriticalSection(&(m)->lock))
-#  endif
+#	if _WIN32_WINNT >= 0x0600
+#		define MALLOC_MUTEX_LOCK(m) AcquireSRWLockExclusive(&(m)->lock)
+#		define MALLOC_MUTEX_UNLOCK(m)                                 \
+			ReleaseSRWLockExclusive(&(m)->lock)
+#		define MALLOC_MUTEX_TRYLOCK(m)                                \
+			(!TryAcquireSRWLockExclusive(&(m)->lock))
+#	else
+#		define MALLOC_MUTEX_LOCK(m) EnterCriticalSection(&(m)->lock)
+#		define MALLOC_MUTEX_UNLOCK(m) LeaveCriticalSection(&(m)->lock)
+#		define MALLOC_MUTEX_TRYLOCK(m)                                \
+			(!TryEnterCriticalSection(&(m)->lock))
+#	endif
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
-#    define MALLOC_MUTEX_LOCK(m)    os_unfair_lock_lock(&(m)->lock)
-#    define MALLOC_MUTEX_UNLOCK(m)  os_unfair_lock_unlock(&(m)->lock)
-#    define MALLOC_MUTEX_TRYLOCK(m) (!os_unfair_lock_trylock(&(m)->lock))
+#	define MALLOC_MUTEX_LOCK(m) os_unfair_lock_lock(&(m)->lock)
+#	define MALLOC_MUTEX_UNLOCK(m) os_unfair_lock_unlock(&(m)->lock)
+#	define MALLOC_MUTEX_TRYLOCK(m) (!os_unfair_lock_trylock(&(m)->lock))
 #else
-#    define MALLOC_MUTEX_LOCK(m)    pthread_mutex_lock(&(m)->lock)
-#    define MALLOC_MUTEX_UNLOCK(m)  pthread_mutex_unlock(&(m)->lock)
-#    define MALLOC_MUTEX_TRYLOCK(m) (pthread_mutex_trylock(&(m)->lock) != 0)
+#	define MALLOC_MUTEX_LOCK(m) pthread_mutex_lock(&(m)->lock)
+#	define MALLOC_MUTEX_UNLOCK(m) pthread_mutex_unlock(&(m)->lock)
+#	define MALLOC_MUTEX_TRYLOCK(m) (pthread_mutex_trylock(&(m)->lock) != 0)
 #endif
 
-#define LOCK_PROF_DATA_INITIALIZER					\
-    {NSTIME_ZERO_INITIALIZER, NSTIME_ZERO_INITIALIZER, 0, 0, 0,		\
-	    ATOMIC_INIT(0), 0, NULL, 0}
+#define LOCK_PROF_DATA_INITIALIZER                                             \
+	{                                                                      \
+		NSTIME_ZERO_INITIALIZER, NSTIME_ZERO_INITIALIZER, 0, 0, 0,     \
+		    ATOMIC_INIT(0), 0, NULL, 0                                 \
+	}
 
 #ifdef _WIN32
-#  define MALLOC_MUTEX_INITIALIZER
+#	define MALLOC_MUTEX_INITIALIZER
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
-#  if defined(JEMALLOC_DEBUG)
-#    define MALLOC_MUTEX_INITIALIZER					\
-  {{{LOCK_PROF_DATA_INITIALIZER, ATOMIC_INIT(false), OS_UNFAIR_LOCK_INIT}}, \
-         WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0}
-#  else
-#    define MALLOC_MUTEX_INITIALIZER                      \
-  {{{LOCK_PROF_DATA_INITIALIZER, ATOMIC_INIT(false), OS_UNFAIR_LOCK_INIT}},  \
-      WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
-#  endif
+#	if defined(JEMALLOC_DEBUG)
+#		define MALLOC_MUTEX_INITIALIZER                               \
+			{                                                      \
+				{{LOCK_PROF_DATA_INITIALIZER,                  \
+				    ATOMIC_INIT(false), OS_UNFAIR_LOCK_INIT}}, \
+				    WITNESS_INITIALIZER(                       \
+				        "mutex", WITNESS_RANK_OMIT),           \
+				    0                                          \
+			}
+#	else
+#		define MALLOC_MUTEX_INITIALIZER                               \
+			{                                                      \
+				{{LOCK_PROF_DATA_INITIALIZER,                  \
+				    ATOMIC_INIT(false), OS_UNFAIR_LOCK_INIT}}, \
+				    WITNESS_INITIALIZER(                       \
+				        "mutex", WITNESS_RANK_OMIT)            \
+			}
+#	endif
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
-#  if (defined(JEMALLOC_DEBUG))
-#     define MALLOC_MUTEX_INITIALIZER					\
-      {{{LOCK_PROF_DATA_INITIALIZER, ATOMIC_INIT(false), PTHREAD_MUTEX_INITIALIZER, NULL}},	\
-           WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0}
-#  else
-#     define MALLOC_MUTEX_INITIALIZER					\
-      {{{LOCK_PROF_DATA_INITIALIZER, ATOMIC_INIT(false), PTHREAD_MUTEX_INITIALIZER, NULL}},	\
-           WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
-#  endif
+#	if (defined(JEMALLOC_DEBUG))
+#		define MALLOC_MUTEX_INITIALIZER                               \
+			{                                                      \
+				{{LOCK_PROF_DATA_INITIALIZER,                  \
+				    ATOMIC_INIT(false),                        \
+				    PTHREAD_MUTEX_INITIALIZER, NULL}},         \
+				    WITNESS_INITIALIZER(                       \
+				        "mutex", WITNESS_RANK_OMIT),           \
+				    0                                          \
+			}
+#	else
+#		define MALLOC_MUTEX_INITIALIZER                               \
+			{                                                      \
+				{{LOCK_PROF_DATA_INITIALIZER,                  \
+				    ATOMIC_INIT(false),                        \
+				    PTHREAD_MUTEX_INITIALIZER, NULL}},         \
+				    WITNESS_INITIALIZER(                       \
+				        "mutex", WITNESS_RANK_OMIT)            \
+			}
+#	endif
 
 #else
-#    define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT
-#  if defined(JEMALLOC_DEBUG)
-#    define MALLOC_MUTEX_INITIALIZER					\
-     {{{LOCK_PROF_DATA_INITIALIZER, ATOMIC_INIT(false), PTHREAD_MUTEX_INITIALIZER}}, \
-           WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT), 0}
-#  else
-#    define MALLOC_MUTEX_INITIALIZER                          \
-     {{{LOCK_PROF_DATA_INITIALIZER, ATOMIC_INIT(false), PTHREAD_MUTEX_INITIALIZER}},	\
-      WITNESS_INITIALIZER("mutex", WITNESS_RANK_OMIT)}
-#  endif
+#	define MALLOC_MUTEX_TYPE PTHREAD_MUTEX_DEFAULT
+#	if defined(JEMALLOC_DEBUG)
+#		define MALLOC_MUTEX_INITIALIZER                               \
+			{                                                      \
+				{{LOCK_PROF_DATA_INITIALIZER,                  \
+				    ATOMIC_INIT(false),                        \
+				    PTHREAD_MUTEX_INITIALIZER}},               \
+				    WITNESS_INITIALIZER(                       \
+				        "mutex", WITNESS_RANK_OMIT),           \
+				    0                                          \
+			}
+#	else
+#		define MALLOC_MUTEX_INITIALIZER                               \
+			{                                                      \
+				{{LOCK_PROF_DATA_INITIALIZER,                  \
+				    ATOMIC_INIT(false),                        \
+				    PTHREAD_MUTEX_INITIALIZER}},               \
+				    WITNESS_INITIALIZER(                       \
+				        "mutex", WITNESS_RANK_OMIT)            \
+			}
+#	endif
 #endif
 
 #ifdef JEMALLOC_LAZY_LOCK
 extern bool isthreaded;
 #else
-#  undef isthreaded /* Undo private_namespace.h definition. */
-#  define isthreaded true
+#	undef isthreaded /* Undo private_namespace.h definition. */
+#	define isthreaded true
 #endif
 
 bool malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
@@ -214,12 +250,12 @@ malloc_mutex_prof_merge(mutex_prof_data_t *sum, mutex_prof_data_t *data) {
 	if (sum->max_n_thds < data->max_n_thds) {
 		sum->max_n_thds = data->max_n_thds;
 	}
-	uint32_t cur_n_waiting_thds = atomic_load_u32(&sum->n_waiting_thds,
-	    ATOMIC_RELAXED);
-	uint32_t new_n_waiting_thds = cur_n_waiting_thds + atomic_load_u32(
-	    &data->n_waiting_thds, ATOMIC_RELAXED);
-	atomic_store_u32(&sum->n_waiting_thds, new_n_waiting_thds,
-	    ATOMIC_RELAXED);
+	uint32_t cur_n_waiting_thds = atomic_load_u32(
+	    &sum->n_waiting_thds, ATOMIC_RELAXED);
+	uint32_t new_n_waiting_thds = cur_n_waiting_thds
+	    + atomic_load_u32(&data->n_waiting_thds, ATOMIC_RELAXED);
+	atomic_store_u32(
+	    &sum->n_waiting_thds, new_n_waiting_thds, ATOMIC_RELAXED);
 	sum->n_owner_switches += data->n_owner_switches;
 	sum->n_lock_ops += data->n_lock_ops;
 }
@@ -274,16 +310,16 @@ malloc_mutex_prof_copy(mutex_prof_data_t *dst, mutex_prof_data_t *source) {
 
 /* Copy the prof data from mutex for processing. */
 static inline void
-malloc_mutex_prof_read(tsdn_t *tsdn, mutex_prof_data_t *data,
-    malloc_mutex_t *mutex) {
+malloc_mutex_prof_read(
+    tsdn_t *tsdn, mutex_prof_data_t *data, malloc_mutex_t *mutex) {
 	/* Can only read holding the mutex. */
 	malloc_mutex_assert_owner(tsdn, mutex);
 	malloc_mutex_prof_copy(data, &mutex->prof_data);
 }
 
 static inline void
-malloc_mutex_prof_accum(tsdn_t *tsdn, mutex_prof_data_t *data,
-    malloc_mutex_t *mutex) {
+malloc_mutex_prof_accum(
+    tsdn_t *tsdn, mutex_prof_data_t *data, malloc_mutex_t *mutex) {
 	mutex_prof_data_t *source = &mutex->prof_data;
 	/* Can only read holding the mutex. */
 	malloc_mutex_assert_owner(tsdn, mutex);
@@ -305,8 +341,8 @@ malloc_mutex_prof_accum(tsdn_t *tsdn, mutex_prof_data_t *data,
 
 /* Compare the prof data and update to the maximum. */
 static inline void
-malloc_mutex_prof_max_update(tsdn_t *tsdn, mutex_prof_data_t *data,
-    malloc_mutex_t *mutex) {
+malloc_mutex_prof_max_update(
+    tsdn_t *tsdn, mutex_prof_data_t *data, malloc_mutex_t *mutex) {
 	mutex_prof_data_t *source = &mutex->prof_data;
 	/* Can only read holding the mutex. */
 	malloc_mutex_assert_owner(tsdn, mutex);
diff --git a/include/jemalloc/internal/mutex_prof.h b/include/jemalloc/internal/mutex_prof.h
index 14e4340b..572200f3 100644
--- a/include/jemalloc/internal/mutex_prof.h
+++ b/include/jemalloc/internal/mutex_prof.h
@@ -6,76 +6,76 @@
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/tsd_types.h"
 
-#define MUTEX_PROF_GLOBAL_MUTEXES					\
-    OP(background_thread)						\
-    OP(max_per_bg_thd)							\
-    OP(ctl)								\
-    OP(prof)								\
-    OP(prof_thds_data)							\
-    OP(prof_dump)							\
-    OP(prof_recent_alloc)						\
-    OP(prof_recent_dump)						\
-    OP(prof_stats)
+#define MUTEX_PROF_GLOBAL_MUTEXES                                              \
+	OP(background_thread)                                                  \
+	OP(max_per_bg_thd)                                                     \
+	OP(ctl)                                                                \
+	OP(prof)                                                               \
+	OP(prof_thds_data)                                                     \
+	OP(prof_dump)                                                          \
+	OP(prof_recent_alloc)                                                  \
+	OP(prof_recent_dump)                                                   \
+	OP(prof_stats)
 
 typedef enum {
 #define OP(mtx) global_prof_mutex_##mtx,
 	MUTEX_PROF_GLOBAL_MUTEXES
 #undef OP
-	mutex_prof_num_global_mutexes
+	    mutex_prof_num_global_mutexes
 } mutex_prof_global_ind_t;
 
-#define MUTEX_PROF_ARENA_MUTEXES					\
-    OP(large)								\
-    OP(extent_avail)							\
-    OP(extents_dirty)							\
-    OP(extents_muzzy)							\
-    OP(extents_retained)						\
-    OP(decay_dirty)							\
-    OP(decay_muzzy)							\
-    OP(base)								\
-    OP(tcache_list)							\
-    OP(hpa_shard)							\
-    OP(hpa_shard_grow)							\
-    OP(hpa_sec)
+#define MUTEX_PROF_ARENA_MUTEXES                                               \
+	OP(large)                                                              \
+	OP(extent_avail)                                                       \
+	OP(extents_dirty)                                                      \
+	OP(extents_muzzy)                                                      \
+	OP(extents_retained)                                                   \
+	OP(decay_dirty)                                                        \
+	OP(decay_muzzy)                                                        \
+	OP(base)                                                               \
+	OP(tcache_list)                                                        \
+	OP(hpa_shard)                                                          \
+	OP(hpa_shard_grow)                                                     \
+	OP(hpa_sec)
 
 typedef enum {
 #define OP(mtx) arena_prof_mutex_##mtx,
 	MUTEX_PROF_ARENA_MUTEXES
 #undef OP
-	mutex_prof_num_arena_mutexes
+	    mutex_prof_num_arena_mutexes
 } mutex_prof_arena_ind_t;
 
 /*
  * The forth parameter is a boolean value that is true for derived rate counters
  * and false for real ones.
  */
-#define MUTEX_PROF_UINT64_COUNTERS					\
-    OP(num_ops, uint64_t, "n_lock_ops", false, num_ops)					\
-    OP(num_ops_ps, uint64_t, "(#/sec)", true, num_ops)				\
-    OP(num_wait, uint64_t, "n_waiting", false, num_wait)				\
-    OP(num_wait_ps, uint64_t, "(#/sec)", true, num_wait)				\
-    OP(num_spin_acq, uint64_t, "n_spin_acq", false, num_spin_acq)			\
-    OP(num_spin_acq_ps, uint64_t, "(#/sec)", true, num_spin_acq)			\
-    OP(num_owner_switch, uint64_t, "n_owner_switch", false, num_owner_switch)		\
-    OP(num_owner_switch_ps, uint64_t, "(#/sec)", true, num_owner_switch)	\
-    OP(total_wait_time, uint64_t, "total_wait_ns", false, total_wait_time)		\
-    OP(total_wait_time_ps, uint64_t, "(#/sec)", true, total_wait_time)		\
-    OP(max_wait_time, uint64_t, "max_wait_ns", false, max_wait_time)
+#define MUTEX_PROF_UINT64_COUNTERS                                             \
+	OP(num_ops, uint64_t, "n_lock_ops", false, num_ops)                    \
+	OP(num_ops_ps, uint64_t, "(#/sec)", true, num_ops)                     \
+	OP(num_wait, uint64_t, "n_waiting", false, num_wait)                   \
+	OP(num_wait_ps, uint64_t, "(#/sec)", true, num_wait)                   \
+	OP(num_spin_acq, uint64_t, "n_spin_acq", false, num_spin_acq)          \
+	OP(num_spin_acq_ps, uint64_t, "(#/sec)", true, num_spin_acq)           \
+	OP(num_owner_switch, uint64_t, "n_owner_switch", false,                \
+	    num_owner_switch)                                                  \
+	OP(num_owner_switch_ps, uint64_t, "(#/sec)", true, num_owner_switch)   \
+	OP(total_wait_time, uint64_t, "total_wait_ns", false, total_wait_time) \
+	OP(total_wait_time_ps, uint64_t, "(#/sec)", true, total_wait_time)     \
+	OP(max_wait_time, uint64_t, "max_wait_ns", false, max_wait_time)
 
-#define MUTEX_PROF_UINT32_COUNTERS					\
-    OP(max_num_thds, uint32_t, "max_n_thds", false, max_num_thds)
+#define MUTEX_PROF_UINT32_COUNTERS                                             \
+	OP(max_num_thds, uint32_t, "max_n_thds", false, max_num_thds)
 
-#define MUTEX_PROF_COUNTERS						\
-		MUTEX_PROF_UINT64_COUNTERS				\
-		MUTEX_PROF_UINT32_COUNTERS
+#define MUTEX_PROF_COUNTERS                                                    \
+	MUTEX_PROF_UINT64_COUNTERS                                             \
+	MUTEX_PROF_UINT32_COUNTERS
 
 #define OP(counter, type, human, derived, base_counter) mutex_counter_##counter,
 
-#define COUNTER_ENUM(counter_list, t)					\
-		typedef enum {						\
-			counter_list					\
-			mutex_prof_num_##t##_counters			\
-		} mutex_prof_##t##_counter_ind_t;
+#define COUNTER_ENUM(counter_list, t)                                          \
+	typedef enum {                                                         \
+		counter_list mutex_prof_num_##t##_counters                     \
+	} mutex_prof_##t##_counter_ind_t;
 
 COUNTER_ENUM(MUTEX_PROF_UINT64_COUNTERS, uint64_t)
 COUNTER_ENUM(MUTEX_PROF_UINT32_COUNTERS, uint32_t)
@@ -89,17 +89,17 @@ typedef struct {
 	 * contention.  We update them once we have the lock.
 	 */
 	/* Total time (in nano seconds) spent waiting on this mutex. */
-	nstime_t		tot_wait_time;
+	nstime_t tot_wait_time;
 	/* Max time (in nano seconds) spent on a single lock operation. */
-	nstime_t		max_wait_time;
+	nstime_t max_wait_time;
 	/* # of times have to wait for this mutex (after spinning). */
-	uint64_t		n_wait_times;
+	uint64_t n_wait_times;
 	/* # of times acquired the mutex through local spinning. */
-	uint64_t		n_spin_acquired;
+	uint64_t n_spin_acquired;
 	/* Max # of threads waiting for the mutex at the same time. */
-	uint32_t		max_n_thds;
+	uint32_t max_n_thds;
 	/* Current # of threads waiting on the lock.  Atomic synced. */
-	atomic_u32_t		n_waiting_thds;
+	atomic_u32_t n_waiting_thds;
 
 	/*
 	 * Data touched on the fast path.  These are modified right after we
@@ -108,11 +108,11 @@ typedef struct {
 	 * cacheline.
 	 */
 	/* # of times the mutex holder is different than the previous one. */
-	uint64_t		n_owner_switches;
+	uint64_t n_owner_switches;
 	/* Previous mutex holder, to facilitate n_owner_switches. */
-	tsdn_t			*prev_owner;
+	tsdn_t *prev_owner;
 	/* # of lock() operations in total. */
-	uint64_t		n_lock_ops;
+	uint64_t n_lock_ops;
 } mutex_prof_data_t;
 
 #endif /* JEMALLOC_INTERNAL_MUTEX_PROF_H */
diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h
index 1f32df58..a10b2de1 100644
--- a/include/jemalloc/internal/nstime.h
+++ b/include/jemalloc/internal/nstime.h
@@ -9,9 +9,11 @@
 
 #define NSTIME_MAGIC ((uint32_t)0xb8a9ce37)
 #ifdef JEMALLOC_DEBUG
-#  define NSTIME_ZERO_INITIALIZER {0, NSTIME_MAGIC}
+#	define NSTIME_ZERO_INITIALIZER                                        \
+		{ 0, NSTIME_MAGIC }
 #else
-#  define NSTIME_ZERO_INITIALIZER {0}
+#	define NSTIME_ZERO_INITIALIZER                                        \
+		{ 0 }
 #endif
 
 typedef struct {
@@ -23,43 +25,40 @@ typedef struct {
 
 static const nstime_t nstime_zero = NSTIME_ZERO_INITIALIZER;
 
-void nstime_init(nstime_t *time, uint64_t ns);
-void nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec);
+void     nstime_init(nstime_t *time, uint64_t ns);
+void     nstime_init2(nstime_t *time, uint64_t sec, uint64_t nsec);
 uint64_t nstime_ns(const nstime_t *time);
 uint64_t nstime_ms(const nstime_t *time);
 uint64_t nstime_sec(const nstime_t *time);
 uint64_t nstime_nsec(const nstime_t *time);
-void nstime_copy(nstime_t *time, const nstime_t *source);
-int nstime_compare(const nstime_t *a, const nstime_t *b);
-void nstime_add(nstime_t *time, const nstime_t *addend);
-void nstime_iadd(nstime_t *time, uint64_t addend);
-void nstime_subtract(nstime_t *time, const nstime_t *subtrahend);
-void nstime_isubtract(nstime_t *time, uint64_t subtrahend);
-void nstime_imultiply(nstime_t *time, uint64_t multiplier);
-void nstime_idivide(nstime_t *time, uint64_t divisor);
+void     nstime_copy(nstime_t *time, const nstime_t *source);
+int      nstime_compare(const nstime_t *a, const nstime_t *b);
+void     nstime_add(nstime_t *time, const nstime_t *addend);
+void     nstime_iadd(nstime_t *time, uint64_t addend);
+void     nstime_subtract(nstime_t *time, const nstime_t *subtrahend);
+void     nstime_isubtract(nstime_t *time, uint64_t subtrahend);
+void     nstime_imultiply(nstime_t *time, uint64_t multiplier);
+void     nstime_idivide(nstime_t *time, uint64_t divisor);
 uint64_t nstime_divide(const nstime_t *time, const nstime_t *divisor);
 uint64_t nstime_ns_since(const nstime_t *past);
 uint64_t nstime_ms_since(const nstime_t *past);
 
-typedef bool (nstime_monotonic_t)(void);
+typedef bool(nstime_monotonic_t)(void);
 extern nstime_monotonic_t *JET_MUTABLE nstime_monotonic;
 
-typedef void (nstime_update_t)(nstime_t *);
+typedef void(nstime_update_t)(nstime_t *);
 extern nstime_update_t *JET_MUTABLE nstime_update;
 
-typedef void (nstime_prof_update_t)(nstime_t *);
+typedef void(nstime_prof_update_t)(nstime_t *);
 extern nstime_prof_update_t *JET_MUTABLE nstime_prof_update;
 
 void nstime_init_update(nstime_t *time);
 void nstime_prof_init_update(nstime_t *time);
 
-enum prof_time_res_e {
-	prof_time_res_default = 0,
-	prof_time_res_high = 1
-};
+enum prof_time_res_e { prof_time_res_default = 0, prof_time_res_high = 1 };
 typedef enum prof_time_res_e prof_time_res_t;
 
-extern prof_time_res_t opt_prof_time_res;
+extern prof_time_res_t   opt_prof_time_res;
 extern const char *const prof_time_res_mode_names[];
 
 JEMALLOC_ALWAYS_INLINE void
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 75626738..3f2d10b0 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -101,7 +101,7 @@ struct pa_shard_s {
 	 * these configurations to use many fewer arenas, and therefore have a
 	 * higher risk of hot locks.
 	 */
-	sec_t hpa_sec;
+	sec_t       hpa_sec;
 	hpa_shard_t hpa_shard;
 
 	/* The source of edata_t objects. */
@@ -109,7 +109,7 @@ struct pa_shard_s {
 
 	unsigned ind;
 
-	malloc_mutex_t *stats_mtx;
+	malloc_mutex_t   *stats_mtx;
 	pa_shard_stats_t *stats;
 
 	/* The emap this shard is tied to. */
@@ -121,8 +121,8 @@ struct pa_shard_s {
 
 static inline bool
 pa_shard_dont_decay_muzzy(pa_shard_t *shard) {
-	return ecache_npages_get(&shard->pac.ecache_muzzy) == 0 &&
-	    pac_decay_ms_get(&shard->pac, extent_state_muzzy) <= 0;
+	return ecache_npages_get(&shard->pac.ecache_muzzy) == 0
+	    && pac_decay_ms_get(&shard->pac, extent_state_muzzy) <= 0;
 }
 
 static inline ehooks_t *
@@ -186,10 +186,10 @@ bool pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
  * (We could make generated_dirty the return value of course, but this is more
  * consistent with the shrink pathway and our error codes here).
  */
-void pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
-    bool *deferred_work_generated);
-bool pa_decay_ms_set(tsdn_t *tsdn, pa_shard_t *shard, extent_state_t state,
-    ssize_t decay_ms, pac_purge_eagerness_t eagerness);
+void    pa_dalloc(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata,
+       bool *deferred_work_generated);
+bool    pa_decay_ms_set(tsdn_t *tsdn, pa_shard_t *shard, extent_state_t state,
+       ssize_t decay_ms, pac_purge_eagerness_t eagerness);
 ssize_t pa_decay_ms_get(pa_shard_t *shard, extent_state_t state);
 
 /*
@@ -199,10 +199,10 @@ ssize_t pa_decay_ms_get(pa_shard_t *shard, extent_state_t state);
  * though, the arena, background thread, and PAC modules are tightly interwoven
  * in a way that's tricky to extricate, so we only do the HPA-specific parts.
  */
-void pa_shard_set_deferral_allowed(tsdn_t *tsdn, pa_shard_t *shard,
-    bool deferral_allowed);
-void pa_shard_do_deferred_work(tsdn_t *tsdn, pa_shard_t *shard);
-void pa_shard_try_deferred_work(tsdn_t *tsdn, pa_shard_t *shard);
+void pa_shard_set_deferral_allowed(
+    tsdn_t *tsdn, pa_shard_t *shard, bool deferral_allowed);
+void     pa_shard_do_deferred_work(tsdn_t *tsdn, pa_shard_t *shard);
+void     pa_shard_try_deferred_work(tsdn_t *tsdn, pa_shard_t *shard);
 uint64_t pa_shard_time_until_deferred_work(tsdn_t *tsdn, pa_shard_t *shard);
 
 /******************************************************************************/
@@ -228,8 +228,8 @@ size_t pa_shard_nactive(pa_shard_t *shard);
 size_t pa_shard_ndirty(pa_shard_t *shard);
 size_t pa_shard_nmuzzy(pa_shard_t *shard);
 
-void pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive,
-    size_t *ndirty, size_t *nmuzzy);
+void pa_shard_basic_stats_merge(
+    pa_shard_t *shard, size_t *nactive, size_t *ndirty, size_t *nmuzzy);
 
 void pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
     pa_shard_stats_t *pa_shard_stats_out, pac_estats_t *estats_out,
diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index 243e97f3..a9edc19b 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -95,12 +95,12 @@ struct pac_s {
 	ecache_t ecache_muzzy;
 	ecache_t ecache_retained;
 
-	base_t *base;
-	emap_t *emap;
+	base_t        *base;
+	emap_t        *emap;
 	edata_cache_t *edata_cache;
 
 	/* The grow info for the retained ecache. */
-	exp_grow_t exp_grow;
+	exp_grow_t     exp_grow;
 	malloc_mutex_t grow_mtx;
 
 	/* Special allocator for guarded frequently reused extents. */
@@ -119,7 +119,7 @@ struct pac_s {
 	decay_t decay_muzzy; /* muzzy --> retained */
 
 	malloc_mutex_t *stats_mtx;
-	pac_stats_t *stats;
+	pac_stats_t    *stats;
 
 	/* Extent serial number generator state. */
 	atomic_zu_t extent_sn_next;
@@ -141,8 +141,8 @@ struct pac_thp_s {
 	bool thp_madvise;
 	/* Below fields are protected by the lock. */
 	malloc_mutex_t lock;
-	bool auto_thp_switched;
-	atomic_u_t n_thp_lazy;
+	bool           auto_thp_switched;
+	atomic_u_t     n_thp_lazy;
 	/*
 	 * List that tracks HUGEPAGE aligned regions that're lazily hugified
 	 * in auto thp mode.
@@ -195,11 +195,11 @@ bool pac_maybe_decay_purge(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
  *
  * Returns true on error (if the new limit is not valid).
  */
-bool pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
-    size_t *new_limit);
+bool pac_retain_grow_limit_get_set(
+    tsdn_t *tsdn, pac_t *pac, size_t *old_limit, size_t *new_limit);
 
-bool pac_decay_ms_set(tsdn_t *tsdn, pac_t *pac, extent_state_t state,
-    ssize_t decay_ms, pac_purge_eagerness_t eagerness);
+bool    pac_decay_ms_set(tsdn_t *tsdn, pac_t *pac, extent_state_t state,
+       ssize_t decay_ms, pac_purge_eagerness_t eagerness);
 ssize_t pac_decay_ms_get(pac_t *pac, extent_state_t state);
 
 void pac_reset(tsdn_t *tsdn, pac_t *pac);
diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index 366bc30b..31909934 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -5,27 +5,24 @@
 #include "jemalloc/internal/jemalloc_internal_types.h"
 
 /* Actual operating system page size, detected during bootstrap, <= PAGE. */
-extern size_t	os_page;
+extern size_t os_page;
 
 /* Page size.  LG_PAGE is determined by the configure script. */
 #ifdef PAGE_MASK
-#  undef PAGE_MASK
+#	undef PAGE_MASK
 #endif
-#define PAGE		((size_t)(1U << LG_PAGE))
-#define PAGE_MASK	((size_t)(PAGE - 1))
+#define PAGE ((size_t)(1U << LG_PAGE))
+#define PAGE_MASK ((size_t)(PAGE - 1))
 /* Return the page base address for the page containing address a. */
-#define PAGE_ADDR2BASE(a)						\
-	ALIGNMENT_ADDR2BASE(a, PAGE)
+#define PAGE_ADDR2BASE(a) ALIGNMENT_ADDR2BASE(a, PAGE)
 /* Return the smallest pagesize multiple that is >= s. */
-#define PAGE_CEILING(s)							\
-	(((s) + PAGE_MASK) & ~PAGE_MASK)
+#define PAGE_CEILING(s) (((s) + PAGE_MASK) & ~PAGE_MASK)
 /* Return the largest pagesize multiple that is <=s. */
-#define PAGE_FLOOR(s) 							\
-	((s) & ~PAGE_MASK)
+#define PAGE_FLOOR(s) ((s) & ~PAGE_MASK)
 
 /* Huge page size.  LG_HUGEPAGE is determined by the configure script. */
-#define HUGEPAGE	((size_t)(1U << LG_HUGEPAGE))
-#define HUGEPAGE_MASK	((size_t)(HUGEPAGE - 1))
+#define HUGEPAGE ((size_t)(1U << LG_HUGEPAGE))
+#define HUGEPAGE_MASK ((size_t)(HUGEPAGE - 1))
 
 /*
  * Used to validate that the hugepage size is not unexpectedly high.  The huge
@@ -36,7 +33,7 @@ extern size_t	os_page;
 #define HUGEPAGE_MAX_EXPECTED_SIZE ((size_t)(16U << 20))
 
 #if LG_HUGEPAGE != 0
-#  define HUGEPAGE_PAGES (HUGEPAGE / PAGE)
+#	define HUGEPAGE_PAGES (HUGEPAGE / PAGE)
 #else
 /*
  * It's convenient to define arrays (or bitmaps) of HUGEPAGE_PAGES lengths.  If
@@ -45,19 +42,17 @@ extern size_t	os_page;
  * that this value is at least 1.  (We won't ever run in this degraded state;
  * hpa_supported() returns false in this case.
  */
-#  define HUGEPAGE_PAGES 1
+#	define HUGEPAGE_PAGES 1
 #endif
 
 /* Return the huge page base address for the huge page containing address a. */
-#define HUGEPAGE_ADDR2BASE(a)						\
-	ALIGNMENT_ADDR2BASE(a, HUGEPAGE)
+#define HUGEPAGE_ADDR2BASE(a) ALIGNMENT_ADDR2BASE(a, HUGEPAGE)
 /* Return the smallest pagesize multiple that is >= s. */
-#define HUGEPAGE_CEILING(s)						\
-	(((s) + HUGEPAGE_MASK) & ~HUGEPAGE_MASK)
+#define HUGEPAGE_CEILING(s) (((s) + HUGEPAGE_MASK) & ~HUGEPAGE_MASK)
 
 /* PAGES_CAN_PURGE_LAZY is defined if lazy purging is supported. */
 #if defined(_WIN32) || defined(JEMALLOC_PURGE_MADVISE_FREE)
-#  define PAGES_CAN_PURGE_LAZY
+#	define PAGES_CAN_PURGE_LAZY
 #endif
 /*
  * PAGES_CAN_PURGE_FORCED is defined if forced purging is supported.
@@ -68,10 +63,11 @@ extern size_t	os_page;
  * next step after purging on Windows anyway, there's no point in adding such
  * complexity.
  */
-#if !defined(_WIN32) && ((defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
-    defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)) || \
-    defined(JEMALLOC_MAPS_COALESCE))
-#  define PAGES_CAN_PURGE_FORCED
+#if !defined(_WIN32)                                                           \
+    && ((defined(JEMALLOC_PURGE_MADVISE_DONTNEED)                              \
+            && defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS))                 \
+        || defined(JEMALLOC_MAPS_COALESCE))
+#	define PAGES_CAN_PURGE_FORCED
 #endif
 
 static const bool pages_can_purge_lazy =
@@ -90,7 +86,7 @@ static const bool pages_can_purge_forced =
     ;
 
 #if defined(JEMALLOC_HAVE_MADVISE_HUGE) || defined(JEMALLOC_HAVE_MEMCNTL)
-#  define PAGES_CAN_HUGIFY
+#	define PAGES_CAN_HUGIFY
 #endif
 
 static const bool pages_can_hugify =
@@ -102,25 +98,25 @@ static const bool pages_can_hugify =
     ;
 
 typedef enum {
-	thp_mode_default       = 0, /* Do not change hugepage settings. */
-	thp_mode_always        = 1, /* Always set MADV_HUGEPAGE. */
-	thp_mode_never         = 2, /* Always set MADV_NOHUGEPAGE. */
+	thp_mode_default = 0, /* Do not change hugepage settings. */
+	thp_mode_always = 1,  /* Always set MADV_HUGEPAGE. */
+	thp_mode_never = 2,   /* Always set MADV_NOHUGEPAGE. */
 
-	thp_mode_names_limit   = 3, /* Used for option processing. */
-	thp_mode_not_supported = 3  /* No THP support detected. */
+	thp_mode_names_limit = 3,  /* Used for option processing. */
+	thp_mode_not_supported = 3 /* No THP support detected. */
 } thp_mode_t;
 
 #define THP_MODE_DEFAULT thp_mode_default
-extern thp_mode_t opt_thp;
-extern thp_mode_t init_system_thp_mode; /* Initial system wide state. */
+extern thp_mode_t        opt_thp;
+extern thp_mode_t        init_system_thp_mode; /* Initial system wide state. */
 extern const char *const thp_mode_names[];
 
 void *pages_map(void *addr, size_t size, size_t alignment, bool *commit);
-void pages_unmap(void *addr, size_t size);
-bool pages_commit(void *addr, size_t size);
-bool pages_decommit(void *addr, size_t size);
-bool pages_purge_lazy(void *addr, size_t size);
-bool pages_purge_forced(void *addr, size_t size);
+void  pages_unmap(void *addr, size_t size);
+bool  pages_commit(void *addr, size_t size);
+bool  pages_decommit(void *addr, size_t size);
+bool  pages_purge_lazy(void *addr, size_t size);
+bool  pages_purge_forced(void *addr, size_t size);
 bool pages_purge_process_madvise(void *vec, size_t ven_len, size_t total_bytes);
 bool pages_huge(void *addr, size_t size);
 bool pages_nohuge(void *addr, size_t size);
@@ -128,7 +124,7 @@ bool pages_collapse(void *addr, size_t size);
 bool pages_dontdump(void *addr, size_t size);
 bool pages_dodump(void *addr, size_t size);
 bool pages_boot(void);
-void pages_set_thp_state (void *ptr, size_t size);
+void pages_set_thp_state(void *ptr, size_t size);
 void pages_mark_guards(void *head, void *tail);
 void pages_unmark_guards(void *head, void *tail);
 
diff --git a/include/jemalloc/internal/pai.h b/include/jemalloc/internal/pai.h
index 557d30d1..1d924657 100644
--- a/include/jemalloc/internal/pai.h
+++ b/include/jemalloc/internal/pai.h
@@ -41,9 +41,8 @@ struct pai_s {
  */
 
 static inline edata_t *
-pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
-    bool zero, bool guarded, bool frequent_reuse,
-    bool *deferred_work_generated) {
+pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
+    bool guarded, bool frequent_reuse, bool *deferred_work_generated) {
 	return self->alloc(tsdn, self, size, alignment, zero, guarded,
 	    frequent_reuse, deferred_work_generated);
 }
@@ -66,13 +65,13 @@ pai_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 static inline bool
 pai_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
     size_t new_size, bool *deferred_work_generated) {
-	return self->shrink(tsdn, self, edata, old_size, new_size,
-	    deferred_work_generated);
+	return self->shrink(
+	    tsdn, self, edata, old_size, new_size, deferred_work_generated);
 }
 
 static inline void
-pai_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    bool *deferred_work_generated) {
+pai_dalloc(
+    tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated) {
 	self->dalloc(tsdn, self, edata, deferred_work_generated);
 }
 
diff --git a/include/jemalloc/internal/peak.h b/include/jemalloc/internal/peak.h
index 2a973cb8..599f1a02 100644
--- a/include/jemalloc/internal/peak.h
+++ b/include/jemalloc/internal/peak.h
@@ -14,7 +14,8 @@ struct peak_s {
 	uint64_t adjustment;
 };
 
-#define PEAK_INITIALIZER {0, 0}
+#define PEAK_INITIALIZER                                                       \
+	{ 0, 0 }
 
 static inline uint64_t
 peak_max(peak_t *peak) {
diff --git a/include/jemalloc/internal/peak_event.h b/include/jemalloc/internal/peak_event.h
index 1e339ff8..0d1f1627 100644
--- a/include/jemalloc/internal/peak_event.h
+++ b/include/jemalloc/internal/peak_event.h
@@ -20,7 +20,7 @@
 /* Update the peak with current tsd state. */
 void peak_event_update(tsd_t *tsd);
 /* Set current state to zero. */
-void peak_event_zero(tsd_t *tsd);
+void     peak_event_zero(tsd_t *tsd);
 uint64_t peak_event_max(tsd_t *tsd);
 
 extern te_base_cb_t peak_te_handler;
diff --git a/include/jemalloc/internal/ph.h b/include/jemalloc/internal/ph.h
index 05376004..803d2cbd 100644
--- a/include/jemalloc/internal/ph.h
+++ b/include/jemalloc/internal/ph.h
@@ -129,8 +129,7 @@ phn_prev_set(void *phn, void *prev, size_t offset) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-phn_merge_ordered(void *phn0, void *phn1, size_t offset,
-    ph_cmp_t cmp) {
+phn_merge_ordered(void *phn0, void *phn1, size_t offset, ph_cmp_t cmp) {
 	void *phn0child;
 
 	assert(phn0 != NULL);
@@ -361,15 +360,14 @@ ph_insert(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) {
 
 	phn_next_set(phn, phn_next_get(ph->root, offset), offset);
 	if (phn_next_get(ph->root, offset) != NULL) {
-		phn_prev_set(phn_next_get(ph->root, offset), phn,
-		    offset);
+		phn_prev_set(phn_next_get(ph->root, offset), phn, offset);
 	}
 	phn_prev_set(phn, ph->root, offset);
 	phn_next_set(ph->root, phn, offset);
 
 	ph->auxcount++;
 	unsigned nmerges = ffs_zu(ph->auxcount);
-	bool done = false;
+	bool     done = false;
 	for (unsigned i = 0; i < nmerges && !done; i++) {
 		done = ph_try_aux_merge_pair(ph, offset, cmp);
 	}
@@ -387,7 +385,6 @@ ph_remove_first(ph_t *ph, size_t offset, ph_cmp_t cmp) {
 	ph->root = ph_merge_children(ph->root, offset, cmp);
 
 	return ret;
-
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -398,11 +395,11 @@ ph_remove(ph_t *ph, void *phn, size_t offset, ph_cmp_t cmp) {
 		return;
 	}
 
-	void* prev = phn_prev_get(phn, offset);
-	void* next = phn_next_get(phn, offset);
+	void *prev = phn_prev_get(phn, offset);
+	void *next = phn_next_get(phn, offset);
 
 	/* If we have children, then we integrate them back in the heap. */
-	void* replace = ph_merge_children(phn, offset, cmp);
+	void *replace = ph_merge_children(phn, offset, cmp);
 	if (replace != NULL) {
 		phn_next_set(replace, next, offset);
 		if (next != NULL) {
@@ -438,16 +435,16 @@ ph_enumerate_vars_init(ph_enumerate_vars_t *vars, uint16_t max_visit_num,
 	 * max_queue_size must be able to support max_visit_num, which means
 	 * the queue will not overflow before reaching max_visit_num.
 	 */
-	assert(vars->max_queue_size >= (vars->max_visit_num + 1)/2);
+	assert(vars->max_queue_size >= (vars->max_visit_num + 1) / 2);
 }
 
 JEMALLOC_ALWAYS_INLINE void
-ph_enumerate_queue_push(void *phn, void **bfs_queue,
-    ph_enumerate_vars_t *vars) {
+ph_enumerate_queue_push(
+    void *phn, void **bfs_queue, ph_enumerate_vars_t *vars) {
 	assert(vars->queue_size < vars->max_queue_size);
 	bfs_queue[vars->rear] = phn;
 	vars->rear = (vars->rear + 1) % vars->max_queue_size;
-	(vars->queue_size) ++;
+	(vars->queue_size)++;
 }
 
 JEMALLOC_ALWAYS_INLINE void *
@@ -456,11 +453,10 @@ ph_enumerate_queue_pop(void **bfs_queue, ph_enumerate_vars_t *vars) {
 	assert(vars->queue_size <= vars->max_queue_size);
 	void *ret = bfs_queue[vars->front];
 	vars->front = (vars->front + 1) % vars->max_queue_size;
-	(vars->queue_size) --;
+	(vars->queue_size)--;
 	return ret;
 }
 
-
 /*
  * The two functions below offer a solution to enumerate the pairing heap.
  * Whe enumerating, always call ph_enumerate_prepare first to prepare the queue
@@ -478,13 +474,13 @@ ph_enumerate_prepare(ph_t *ph, void **bfs_queue, ph_enumerate_vars_t *vars,
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-ph_enumerate_next(ph_t *ph, size_t offset, void **bfs_queue,
-    ph_enumerate_vars_t *vars) {
+ph_enumerate_next(
+    ph_t *ph, size_t offset, void **bfs_queue, ph_enumerate_vars_t *vars) {
 	if (vars->queue_size == 0) {
 		return NULL;
 	}
 
-	(vars->visited_num) ++;
+	(vars->visited_num)++;
 	if (vars->visited_num > vars->max_visit_num) {
 		return NULL;
 	}
@@ -502,109 +498,97 @@ ph_enumerate_next(ph_t *ph, size_t offset, void **bfs_queue,
 	return ret;
 }
 
-#define ph_structs(a_prefix, a_type, a_max_queue_size)			\
-typedef struct {							\
-	phn_link_t link;						\
-} a_prefix##_link_t;							\
-									\
-typedef struct {							\
-	ph_t ph;							\
-} a_prefix##_t;								\
-									\
-typedef struct {							\
-	void *bfs_queue[a_max_queue_size];				\
-	ph_enumerate_vars_t vars;					\
-} a_prefix##_enumerate_helper_t;
-
+#define ph_structs(a_prefix, a_type, a_max_queue_size)                         \
+	typedef struct {                                                       \
+		phn_link_t link;                                               \
+	} a_prefix##_link_t;                                                   \
+                                                                               \
+	typedef struct {                                                       \
+		ph_t ph;                                                       \
+	} a_prefix##_t;                                                        \
+                                                                               \
+	typedef struct {                                                       \
+		void               *bfs_queue[a_max_queue_size];               \
+		ph_enumerate_vars_t vars;                                      \
+	} a_prefix##_enumerate_helper_t;
 
 /*
  * The ph_proto() macro generates function prototypes that correspond to the
  * functions generated by an equivalently parameterized call to ph_gen().
  */
-#define ph_proto(a_attr, a_prefix, a_type)				\
-									\
-a_attr void a_prefix##_new(a_prefix##_t *ph);				\
-a_attr bool a_prefix##_empty(a_prefix##_t *ph);				\
-a_attr a_type *a_prefix##_first(a_prefix##_t *ph);			\
-a_attr a_type *a_prefix##_any(a_prefix##_t *ph);			\
-a_attr void a_prefix##_insert(a_prefix##_t *ph, a_type *phn);		\
-a_attr a_type *a_prefix##_remove_first(a_prefix##_t *ph);		\
-a_attr void a_prefix##_remove(a_prefix##_t *ph, a_type *phn);		\
-a_attr a_type *a_prefix##_remove_any(a_prefix##_t *ph);			\
-a_attr void a_prefix##_enumerate_prepare(a_prefix##_t *ph,		\
-    a_prefix##_enumerate_helper_t *helper, uint16_t max_visit_num,	\
-    uint16_t max_queue_size);						\
-a_attr a_type *a_prefix##_enumerate_next(a_prefix##_t *ph,		\
-    a_prefix##_enumerate_helper_t *helper);
+#define ph_proto(a_attr, a_prefix, a_type)                                     \
+                                                                               \
+	a_attr void    a_prefix##_new(a_prefix##_t *ph);                       \
+	a_attr bool    a_prefix##_empty(a_prefix##_t *ph);                     \
+	a_attr a_type *a_prefix##_first(a_prefix##_t *ph);                     \
+	a_attr a_type *a_prefix##_any(a_prefix##_t *ph);                       \
+	a_attr void    a_prefix##_insert(a_prefix##_t *ph, a_type *phn);       \
+	a_attr a_type *a_prefix##_remove_first(a_prefix##_t *ph);              \
+	a_attr void    a_prefix##_remove(a_prefix##_t *ph, a_type *phn);       \
+	a_attr a_type *a_prefix##_remove_any(a_prefix##_t *ph);                \
+	a_attr void    a_prefix##_enumerate_prepare(a_prefix##_t *ph,          \
+	       a_prefix##_enumerate_helper_t *helper, uint16_t max_visit_num,  \
+	       uint16_t max_queue_size);                                       \
+	a_attr a_type *a_prefix##_enumerate_next(                              \
+	    a_prefix##_t *ph, a_prefix##_enumerate_helper_t *helper);
 
 /* The ph_gen() macro generates a type-specific pairing heap implementation. */
-#define ph_gen(a_attr, a_prefix, a_type, a_field, a_cmp)		\
-JEMALLOC_ALWAYS_INLINE int						\
-a_prefix##_ph_cmp(void *a, void *b) {					\
-	return a_cmp((a_type *)a, (a_type *)b);				\
-}									\
-									\
-a_attr void								\
-a_prefix##_new(a_prefix##_t *ph) {					\
-	ph_new(&ph->ph);						\
-}									\
-									\
-a_attr bool								\
-a_prefix##_empty(a_prefix##_t *ph) {					\
-	return ph_empty(&ph->ph);					\
-}									\
-									\
-a_attr a_type *								\
-a_prefix##_first(a_prefix##_t *ph) {					\
-	return ph_first(&ph->ph, offsetof(a_type, a_field),		\
-	    &a_prefix##_ph_cmp);					\
-}									\
-									\
-a_attr a_type *								\
-a_prefix##_any(a_prefix##_t *ph) {					\
-	return ph_any(&ph->ph, offsetof(a_type, a_field));		\
-}									\
-									\
-a_attr void								\
-a_prefix##_insert(a_prefix##_t *ph, a_type *phn) {			\
-	ph_insert(&ph->ph, phn, offsetof(a_type, a_field),		\
-	    a_prefix##_ph_cmp);						\
-}									\
-									\
-a_attr a_type *								\
-a_prefix##_remove_first(a_prefix##_t *ph) {				\
-	return ph_remove_first(&ph->ph, offsetof(a_type, a_field),	\
-	    a_prefix##_ph_cmp);						\
-}									\
-									\
-a_attr void								\
-a_prefix##_remove(a_prefix##_t *ph, a_type *phn) {			\
-	ph_remove(&ph->ph, phn, offsetof(a_type, a_field),		\
-	    a_prefix##_ph_cmp);						\
-}									\
-									\
-a_attr a_type *								\
-a_prefix##_remove_any(a_prefix##_t *ph) {				\
-	a_type *ret = a_prefix##_any(ph);				\
-	if (ret != NULL) {						\
-		a_prefix##_remove(ph, ret);				\
-	}								\
-	return ret;							\
-}									\
-									\
-a_attr void								\
-a_prefix##_enumerate_prepare(a_prefix##_t *ph,				\
-    a_prefix##_enumerate_helper_t *helper, uint16_t max_visit_num,	\
-    uint16_t max_queue_size) {						\
-	ph_enumerate_prepare(&ph->ph, helper->bfs_queue, &helper->vars,	\
-	    max_visit_num, max_queue_size);				\
-}									\
-									\
-a_attr a_type *								\
-a_prefix##_enumerate_next(a_prefix##_t *ph,				\
-    a_prefix##_enumerate_helper_t *helper) {				\
-	return ph_enumerate_next(&ph->ph, offsetof(a_type, a_field),	\
-	    helper->bfs_queue, &helper->vars);				\
-}
+#define ph_gen(a_attr, a_prefix, a_type, a_field, a_cmp)                       \
+	JEMALLOC_ALWAYS_INLINE int a_prefix##_ph_cmp(void *a, void *b) {       \
+		return a_cmp((a_type *)a, (a_type *)b);                        \
+	}                                                                      \
+                                                                               \
+	a_attr void a_prefix##_new(a_prefix##_t *ph) {                         \
+		ph_new(&ph->ph);                                               \
+	}                                                                      \
+                                                                               \
+	a_attr bool a_prefix##_empty(a_prefix##_t *ph) {                       \
+		return ph_empty(&ph->ph);                                      \
+	}                                                                      \
+                                                                               \
+	a_attr a_type *a_prefix##_first(a_prefix##_t *ph) {                    \
+		return ph_first(                                               \
+		    &ph->ph, offsetof(a_type, a_field), &a_prefix##_ph_cmp);   \
+	}                                                                      \
+                                                                               \
+	a_attr a_type *a_prefix##_any(a_prefix##_t *ph) {                      \
+		return ph_any(&ph->ph, offsetof(a_type, a_field));             \
+	}                                                                      \
+                                                                               \
+	a_attr void a_prefix##_insert(a_prefix##_t *ph, a_type *phn) {         \
+		ph_insert(&ph->ph, phn, offsetof(a_type, a_field),             \
+		    a_prefix##_ph_cmp);                                        \
+	}                                                                      \
+                                                                               \
+	a_attr a_type *a_prefix##_remove_first(a_prefix##_t *ph) {             \
+		return ph_remove_first(                                        \
+		    &ph->ph, offsetof(a_type, a_field), a_prefix##_ph_cmp);    \
+	}                                                                      \
+                                                                               \
+	a_attr void a_prefix##_remove(a_prefix##_t *ph, a_type *phn) {         \
+		ph_remove(&ph->ph, phn, offsetof(a_type, a_field),             \
+		    a_prefix##_ph_cmp);                                        \
+	}                                                                      \
+                                                                               \
+	a_attr a_type *a_prefix##_remove_any(a_prefix##_t *ph) {               \
+		a_type *ret = a_prefix##_any(ph);                              \
+		if (ret != NULL) {                                             \
+			a_prefix##_remove(ph, ret);                            \
+		}                                                              \
+		return ret;                                                    \
+	}                                                                      \
+                                                                               \
+	a_attr void a_prefix##_enumerate_prepare(a_prefix##_t *ph,             \
+	    a_prefix##_enumerate_helper_t *helper, uint16_t max_visit_num,     \
+	    uint16_t max_queue_size) {                                         \
+		ph_enumerate_prepare(&ph->ph, helper->bfs_queue,               \
+		    &helper->vars, max_visit_num, max_queue_size);             \
+	}                                                                      \
+                                                                               \
+	a_attr a_type *a_prefix##_enumerate_next(                              \
+	    a_prefix##_t *ph, a_prefix##_enumerate_helper_t *helper) {         \
+		return ph_enumerate_next(&ph->ph, offsetof(a_type, a_field),   \
+		    helper->bfs_queue, &helper->vars);                         \
+	}
 
 #endif /* JEMALLOC_INTERNAL_PH_H */
diff --git a/include/jemalloc/internal/prng.h b/include/jemalloc/internal/prng.h
index 81060d32..04049519 100644
--- a/include/jemalloc/internal/prng.h
+++ b/include/jemalloc/internal/prng.h
@@ -26,11 +26,11 @@
 /******************************************************************************/
 /* INTERNAL DEFINITIONS -- IGNORE */
 /******************************************************************************/
-#define PRNG_A_32	UINT32_C(1103515241)
-#define PRNG_C_32	UINT32_C(12347)
+#define PRNG_A_32 UINT32_C(1103515241)
+#define PRNG_C_32 UINT32_C(12347)
 
-#define PRNG_A_64	UINT64_C(6364136223846793005)
-#define PRNG_C_64	UINT64_C(1442695040888963407)
+#define PRNG_A_64 UINT64_C(6364136223846793005)
+#define PRNG_C_64 UINT64_C(1442695040888963407)
 
 JEMALLOC_ALWAYS_INLINE uint32_t
 prng_state_next_u32(uint32_t state) {
@@ -49,7 +49,7 @@ prng_state_next_zu(size_t state) {
 #elif LG_SIZEOF_PTR == 3
 	return (state * PRNG_A_64) + PRNG_C_64;
 #else
-#error Unsupported pointer size
+#	error Unsupported pointer size
 #endif
 }
 
diff --git a/include/jemalloc/internal/prof_data.h b/include/jemalloc/internal/prof_data.h
index 43e8d7e7..0af5835c 100644
--- a/include/jemalloc/internal/prof_data.h
+++ b/include/jemalloc/internal/prof_data.h
@@ -17,21 +17,21 @@ extern size_t prof_shifted_unbiased_cnt[PROF_SC_NSIZES];
 void prof_bt_hash(const void *key, size_t r_hash[2]);
 bool prof_bt_keycomp(const void *k1, const void *k2);
 
-bool prof_data_init(tsd_t *tsd);
+bool         prof_data_init(tsd_t *tsd);
 prof_tctx_t *prof_lookup(tsd_t *tsd, prof_bt_t *bt);
-int prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name);
-void prof_unbias_map_init(void);
+int          prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name);
+void         prof_unbias_map_init(void);
 void prof_dump_impl(tsd_t *tsd, write_cb_t *prof_dump_write, void *cbopaque,
     prof_tdata_t *tdata, bool leakcheck);
-prof_tdata_t * prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
+prof_tdata_t *prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid,
     uint64_t thr_discrim, char *thread_name, bool active);
-void prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata);
-void prof_reset(tsd_t *tsd, size_t lg_sample);
-void prof_tctx_try_destroy(tsd_t *tsd, prof_tctx_t *tctx);
+void          prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata);
+void          prof_reset(tsd_t *tsd, size_t lg_sample);
+void          prof_tctx_try_destroy(tsd_t *tsd, prof_tctx_t *tctx);
 
 /* Used in unit tests. */
 size_t prof_tdata_count(void);
 size_t prof_bt_count(void);
-void prof_cnt_all(prof_cnt_t *cnt_all);
+void   prof_cnt_all(prof_cnt_t *cnt_all);
 
 #endif /* JEMALLOC_INTERNAL_PROF_DATA_H */
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index 7d962522..e41e30a0 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -7,21 +7,22 @@
 #include "jemalloc/internal/prof_hook.h"
 #include "jemalloc/internal/thread_event_registry.h"
 
-extern bool opt_prof;
-extern bool opt_prof_active;
-extern bool opt_prof_thread_active_init;
+extern bool     opt_prof;
+extern bool     opt_prof_active;
+extern bool     opt_prof_thread_active_init;
 extern unsigned opt_prof_bt_max;
-extern size_t opt_lg_prof_sample;    /* Mean bytes between samples. */
-extern size_t opt_experimental_lg_prof_threshold;    /* Mean bytes between thresholds. */
-extern ssize_t opt_lg_prof_interval; /* lg(prof_interval). */
-extern bool opt_prof_gdump;          /* High-water memory dumping. */
-extern bool opt_prof_final;          /* Final profile dumping. */
-extern bool opt_prof_leak;           /* Dump leak summary at exit. */
-extern bool opt_prof_leak_error;     /* Exit with error code if memory leaked */
-extern bool opt_prof_accum;          /* Report cumulative bytes. */
-extern bool opt_prof_log;            /* Turn logging on at boot. */
-extern char opt_prof_prefix[
-    /* Minimize memory bloat for non-prof builds. */
+extern size_t   opt_lg_prof_sample; /* Mean bytes between samples. */
+extern size_t
+    opt_experimental_lg_prof_threshold; /* Mean bytes between thresholds. */
+extern ssize_t opt_lg_prof_interval;    /* lg(prof_interval). */
+extern bool    opt_prof_gdump;          /* High-water memory dumping. */
+extern bool    opt_prof_final;          /* Final profile dumping. */
+extern bool    opt_prof_leak;           /* Dump leak summary at exit. */
+extern bool    opt_prof_leak_error; /* Exit with error code if memory leaked */
+extern bool    opt_prof_accum;      /* Report cumulative bytes. */
+extern bool    opt_prof_log;        /* Turn logging on at boot. */
+extern char    opt_prof_prefix[
+/* Minimize memory bloat for non-prof builds. */
 #ifdef JEMALLOC_PROF
     PATH_MAX +
 #endif
@@ -57,19 +58,19 @@ extern size_t lg_prof_sample;
 
 extern bool prof_booted;
 
-void prof_backtrace_hook_set(prof_backtrace_hook_t hook);
+void                  prof_backtrace_hook_set(prof_backtrace_hook_t hook);
 prof_backtrace_hook_t prof_backtrace_hook_get(void);
 
-void prof_dump_hook_set(prof_dump_hook_t hook);
+void             prof_dump_hook_set(prof_dump_hook_t hook);
 prof_dump_hook_t prof_dump_hook_get(void);
 
-void prof_sample_hook_set(prof_sample_hook_t hook);
+void               prof_sample_hook_set(prof_sample_hook_t hook);
 prof_sample_hook_t prof_sample_hook_get(void);
 
-void prof_sample_free_hook_set(prof_sample_free_hook_t hook);
+void                    prof_sample_free_hook_set(prof_sample_free_hook_t hook);
 prof_sample_free_hook_t prof_sample_free_hook_get(void);
 
-void prof_threshold_hook_set(prof_threshold_hook_t hook);
+void                  prof_threshold_hook_set(prof_threshold_hook_t hook);
 prof_threshold_hook_t prof_threshold_hook_get(void);
 
 /* Functions only accessed in prof_inlines.h */
@@ -77,33 +78,33 @@ prof_tdata_t *prof_tdata_init(tsd_t *tsd);
 prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
 
 void prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx);
-void prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
-    size_t usize, prof_tctx_t *tctx);
-void prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize,
-    prof_info_t *prof_info);
+void prof_malloc_sample_object(
+    tsd_t *tsd, const void *ptr, size_t size, size_t usize, prof_tctx_t *tctx);
+void prof_free_sampled_object(
+    tsd_t *tsd, const void *ptr, size_t usize, prof_info_t *prof_info);
 prof_tctx_t *prof_tctx_create(tsd_t *tsd);
-void prof_idump(tsdn_t *tsdn);
-bool prof_mdump(tsd_t *tsd, const char *filename);
-void prof_gdump(tsdn_t *tsdn);
+void         prof_idump(tsdn_t *tsdn);
+bool         prof_mdump(tsd_t *tsd, const char *filename);
+void         prof_gdump(tsdn_t *tsdn);
 
-void prof_tdata_cleanup(tsd_t *tsd);
-bool prof_active_get(tsdn_t *tsdn);
-bool prof_active_set(tsdn_t *tsdn, bool active);
+void        prof_tdata_cleanup(tsd_t *tsd);
+bool        prof_active_get(tsdn_t *tsdn);
+bool        prof_active_set(tsdn_t *tsdn, bool active);
 const char *prof_thread_name_get(tsd_t *tsd);
-int prof_thread_name_set(tsd_t *tsd, const char *thread_name);
-bool prof_thread_active_get(tsd_t *tsd);
-bool prof_thread_active_set(tsd_t *tsd, bool active);
-bool prof_thread_active_init_get(tsdn_t *tsdn);
-bool prof_thread_active_init_set(tsdn_t *tsdn, bool active_init);
-bool prof_gdump_get(tsdn_t *tsdn);
-bool prof_gdump_set(tsdn_t *tsdn, bool active);
-void prof_boot0(void);
-void prof_boot1(void);
-bool prof_boot2(tsd_t *tsd, base_t *base);
-void prof_prefork0(tsdn_t *tsdn);
-void prof_prefork1(tsdn_t *tsdn);
-void prof_postfork_parent(tsdn_t *tsdn);
-void prof_postfork_child(tsdn_t *tsdn);
+int         prof_thread_name_set(tsd_t *tsd, const char *thread_name);
+bool        prof_thread_active_get(tsd_t *tsd);
+bool        prof_thread_active_set(tsd_t *tsd, bool active);
+bool        prof_thread_active_init_get(tsdn_t *tsdn);
+bool        prof_thread_active_init_set(tsdn_t *tsdn, bool active_init);
+bool        prof_gdump_get(tsdn_t *tsdn);
+bool        prof_gdump_set(tsdn_t *tsdn, bool active);
+void        prof_boot0(void);
+void        prof_boot1(void);
+bool        prof_boot2(tsd_t *tsd, base_t *base);
+void        prof_prefork0(tsdn_t *tsdn);
+void        prof_prefork1(tsdn_t *tsdn);
+void        prof_postfork_parent(tsdn_t *tsdn);
+void        prof_postfork_child(tsdn_t *tsdn);
 
 uint64_t prof_sample_new_event_wait(tsd_t *tsd);
 uint64_t tsd_prof_sample_event_wait_get(tsd_t *tsd);
@@ -130,8 +131,8 @@ uint64_t tsd_prof_sample_event_wait_get(tsd_t *tsd);
  */
 
 JEMALLOC_ALWAYS_INLINE bool
-te_prof_sample_event_lookahead_surplus(tsd_t *tsd, size_t usize,
-    size_t *surplus) {
+te_prof_sample_event_lookahead_surplus(
+    tsd_t *tsd, size_t usize, size_t *surplus) {
 	if (surplus != NULL) {
 		/*
 		 * This is a dead store: the surplus will be overwritten before
@@ -146,8 +147,8 @@ te_prof_sample_event_lookahead_surplus(tsd_t *tsd, size_t usize,
 		return false;
 	}
 	/* The subtraction is intentionally susceptible to underflow. */
-	uint64_t accumbytes = tsd_thread_allocated_get(tsd) + usize -
-	    tsd_thread_allocated_last_event_get(tsd);
+	uint64_t accumbytes = tsd_thread_allocated_get(tsd) + usize
+	    - tsd_thread_allocated_last_event_get(tsd);
 	uint64_t sample_wait = tsd_prof_sample_event_wait_get(tsd);
 	if (accumbytes < sample_wait) {
 		return false;
diff --git a/include/jemalloc/internal/prof_hook.h b/include/jemalloc/internal/prof_hook.h
index 2f3a81af..69dfaabf 100644
--- a/include/jemalloc/internal/prof_hook.h
+++ b/include/jemalloc/internal/prof_hook.h
@@ -21,7 +21,8 @@ typedef void (*prof_backtrace_hook_t)(void **, unsigned *, unsigned);
 typedef void (*prof_dump_hook_t)(const char *filename);
 
 /* ptr, size, backtrace vector, backtrace vector length, usize */
-typedef void (*prof_sample_hook_t)(const void *ptr, size_t size, void **backtrace, unsigned backtrace_length, size_t usize);
+typedef void (*prof_sample_hook_t)(const void *ptr, size_t size,
+    void **backtrace, unsigned backtrace_length, size_t usize);
 
 /* ptr, size */
 typedef void (*prof_sample_free_hook_t)(const void *, size_t);
@@ -29,6 +30,7 @@ typedef void (*prof_sample_free_hook_t)(const void *, size_t);
 /*
  * A callback hook that notifies when an allocation threshold has been crossed.
  */
-typedef void (*prof_threshold_hook_t)(uint64_t alloc, uint64_t dealloc, uint64_t peak);
+typedef void (*prof_threshold_hook_t)(
+    uint64_t alloc, uint64_t dealloc, uint64_t peak);
 
 #endif /* JEMALLOC_INTERNAL_PROF_HOOK_H */
diff --git a/include/jemalloc/internal/prof_inlines.h b/include/jemalloc/internal/prof_inlines.h
index 75300ee4..4a36bd7a 100644
--- a/include/jemalloc/internal/prof_inlines.h
+++ b/include/jemalloc/internal/prof_inlines.h
@@ -164,8 +164,8 @@ JEMALLOC_ALWAYS_INLINE prof_tctx_t *
 prof_alloc_prep(tsd_t *tsd, bool prof_active, bool sample_event) {
 	prof_tctx_t *ret;
 
-	if (!prof_active ||
-	    likely(prof_sample_should_skip(tsd, sample_event))) {
+	if (!prof_active
+	    || likely(prof_sample_should_skip(tsd, sample_event))) {
 		ret = PROF_TCTX_SENTINEL;
 	} else {
 		ret = prof_tctx_create(tsd);
@@ -242,8 +242,8 @@ prof_realloc(tsd_t *tsd, const void *ptr, size_t size, size_t usize,
 	 * counters.
 	 */
 	if (unlikely(old_sampled)) {
-		prof_free_sampled_object(tsd, old_ptr, old_usize,
-		    old_prof_info);
+		prof_free_sampled_object(
+		    tsd, old_ptr, old_usize, old_prof_info);
 	}
 }
 
@@ -254,9 +254,10 @@ prof_sample_align(size_t usize, size_t orig_align) {
 	 * w/o metadata lookup.
 	 */
 	assert(opt_prof);
-	return (orig_align < PROF_SAMPLE_ALIGNMENT &&
-	       (sz_can_use_slab(usize) || opt_cache_oblivious)) ?
-	           PROF_SAMPLE_ALIGNMENT : orig_align;
+	return (orig_align < PROF_SAMPLE_ALIGNMENT
+	           && (sz_can_use_slab(usize) || opt_cache_oblivious))
+	    ? PROF_SAMPLE_ALIGNMENT
+	    : orig_align;
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -271,8 +272,8 @@ prof_sampled(tsd_t *tsd, const void *ptr) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-prof_free(tsd_t *tsd, const void *ptr, size_t usize,
-    emap_alloc_ctx_t *alloc_ctx) {
+prof_free(
+    tsd_t *tsd, const void *ptr, size_t usize, emap_alloc_ctx_t *alloc_ctx) {
 	prof_info_t prof_info;
 	prof_info_get_and_reset_recent(tsd, ptr, alloc_ctx, &prof_info);
 
diff --git a/include/jemalloc/internal/prof_log.h b/include/jemalloc/internal/prof_log.h
index 0b1271c8..d9b97dc8 100644
--- a/include/jemalloc/internal/prof_log.h
+++ b/include/jemalloc/internal/prof_log.h
@@ -13,9 +13,9 @@ bool prof_log_init(tsd_t *tsdn);
 size_t prof_log_bt_count(void);
 size_t prof_log_alloc_count(void);
 size_t prof_log_thr_count(void);
-bool prof_log_is_logging(void);
-bool prof_log_rep_check(void);
-void prof_log_dummy_set(bool new_value);
+bool   prof_log_is_logging(void);
+bool   prof_log_rep_check(void);
+void   prof_log_dummy_set(bool new_value);
 
 bool prof_log_start(tsdn_t *tsdn, const char *filename);
 bool prof_log_stop(tsdn_t *tsdn);
diff --git a/include/jemalloc/internal/prof_structs.h b/include/jemalloc/internal/prof_structs.h
index 084a549d..d38b15ea 100644
--- a/include/jemalloc/internal/prof_structs.h
+++ b/include/jemalloc/internal/prof_structs.h
@@ -10,29 +10,29 @@
 
 struct prof_bt_s {
 	/* Backtrace, stored as len program counters. */
-	void		**vec;
-	unsigned	len;
+	void   **vec;
+	unsigned len;
 };
 
 #ifdef JEMALLOC_PROF_LIBGCC
 /* Data structure passed to libgcc _Unwind_Backtrace() callback functions. */
 typedef struct {
-	void 		**vec;
-	unsigned	*len;
-	unsigned	max;
+	void    **vec;
+	unsigned *len;
+	unsigned  max;
 } prof_unwind_data_t;
 #endif
 
 struct prof_cnt_s {
 	/* Profiling counters. */
-	uint64_t	curobjs;
-	uint64_t	curobjs_shifted_unbiased;
-	uint64_t	curbytes;
-	uint64_t	curbytes_unbiased;
-	uint64_t	accumobjs;
-	uint64_t	accumobjs_shifted_unbiased;
-	uint64_t	accumbytes;
-	uint64_t	accumbytes_unbiased;
+	uint64_t curobjs;
+	uint64_t curobjs_shifted_unbiased;
+	uint64_t curbytes;
+	uint64_t curbytes_unbiased;
+	uint64_t accumobjs;
+	uint64_t accumobjs_shifted_unbiased;
+	uint64_t accumbytes;
+	uint64_t accumbytes_unbiased;
 };
 
 typedef enum {
@@ -44,26 +44,26 @@ typedef enum {
 
 struct prof_tctx_s {
 	/* Thread data for thread that performed the allocation. */
-	prof_tdata_t		*tdata;
+	prof_tdata_t *tdata;
 
 	/*
 	 * Copy of tdata->thr_{uid,discrim}, necessary because tdata may be
 	 * defunct during teardown.
 	 */
-	uint64_t		thr_uid;
-	uint64_t		thr_discrim;
+	uint64_t thr_uid;
+	uint64_t thr_discrim;
 
 	/*
 	 * Reference count of how many times this tctx object is referenced in
 	 * recent allocation / deallocation records, protected by tdata->lock.
 	 */
-	uint64_t		recent_count;
+	uint64_t recent_count;
 
 	/* Profiling counters, protected by tdata->lock. */
-	prof_cnt_t		cnts;
+	prof_cnt_t cnts;
 
 	/* Associated global context. */
-	prof_gctx_t		*gctx;
+	prof_gctx_t *gctx;
 
 	/*
 	 * UID that distinguishes multiple tctx's created by the same thread,
@@ -78,40 +78,40 @@ struct prof_tctx_s {
 	 *   threshold can be hit again before the first consumer finishes
 	 *   executing prof_tctx_destroy().
 	 */
-	uint64_t		tctx_uid;
+	uint64_t tctx_uid;
 
 	/* Linkage into gctx's tctxs. */
-	rb_node(prof_tctx_t)	tctx_link;
+	rb_node(prof_tctx_t) tctx_link;
 
 	/*
 	 * True during prof_alloc_prep()..prof_malloc_sample_object(), prevents
 	 * sample vs destroy race.
 	 */
-	bool			prepared;
+	bool prepared;
 
 	/* Current dump-related state, protected by gctx->lock. */
-	prof_tctx_state_t	state;
+	prof_tctx_state_t state;
 
 	/*
 	 * Copy of cnts snapshotted during early dump phase, protected by
 	 * dump_mtx.
 	 */
-	prof_cnt_t		dump_cnts;
+	prof_cnt_t dump_cnts;
 };
 typedef rb_tree(prof_tctx_t) prof_tctx_tree_t;
 
 struct prof_info_s {
 	/* Time when the allocation was made. */
-	nstime_t		alloc_time;
+	nstime_t alloc_time;
 	/* Points to the prof_tctx_t corresponding to the allocation. */
-	prof_tctx_t		*alloc_tctx;
+	prof_tctx_t *alloc_tctx;
 	/* Allocation request size. */
-	size_t			alloc_size;
+	size_t alloc_size;
 };
 
 struct prof_gctx_s {
 	/* Protects nlimbo, cnt_summed, and tctxs. */
-	malloc_mutex_t		*lock;
+	malloc_mutex_t *lock;
 
 	/*
 	 * Number of threads that currently cause this gctx to be in a state of
@@ -123,48 +123,48 @@ struct prof_gctx_s {
 	 * nlimbo must be 1 (single destroyer) in order to safely destroy the
 	 * gctx.
 	 */
-	unsigned		nlimbo;
+	unsigned nlimbo;
 
 	/*
 	 * Tree of profile counters, one for each thread that has allocated in
 	 * this context.
 	 */
-	prof_tctx_tree_t	tctxs;
+	prof_tctx_tree_t tctxs;
 
 	/* Linkage for tree of contexts to be dumped. */
-	rb_node(prof_gctx_t)	dump_link;
+	rb_node(prof_gctx_t) dump_link;
 
 	/* Temporary storage for summation during dump. */
-	prof_cnt_t		cnt_summed;
+	prof_cnt_t cnt_summed;
 
 	/* Associated backtrace. */
-	prof_bt_t		bt;
+	prof_bt_t bt;
 
 	/* Backtrace vector, variable size, referred to by bt. */
-	void			*vec[1];
+	void *vec[1];
 };
 typedef rb_tree(prof_gctx_t) prof_gctx_tree_t;
 
 struct prof_tdata_s {
-	malloc_mutex_t		*lock;
+	malloc_mutex_t *lock;
 
 	/* Monotonically increasing unique thread identifier. */
-	uint64_t		thr_uid;
+	uint64_t thr_uid;
 
 	/*
 	 * Monotonically increasing discriminator among tdata structures
 	 * associated with the same thr_uid.
 	 */
-	uint64_t		thr_discrim;
+	uint64_t thr_discrim;
 
-	rb_node(prof_tdata_t)	tdata_link;
+	rb_node(prof_tdata_t) tdata_link;
 
 	/*
 	 * Counter used to initialize prof_tctx_t's tctx_uid.  No locking is
 	 * necessary when incrementing this field, because only one thread ever
 	 * does so.
 	 */
-	uint64_t		tctx_uid_next;
+	uint64_t tctx_uid_next;
 
 	/*
 	 * Hash of (prof_bt_t *)-->(prof_tctx_t *).  Each thread tracks
@@ -172,15 +172,15 @@ struct prof_tdata_s {
 	 * associated with thread-specific prof_tctx_t objects.  Other threads
 	 * may write to prof_tctx_t contents when freeing associated objects.
 	 */
-	ckh_t			bt2tctx;
+	ckh_t bt2tctx;
 
 	/* Included in heap profile dumps if has content. */
-	char			thread_name[PROF_THREAD_NAME_MAX_LEN];
+	char thread_name[PROF_THREAD_NAME_MAX_LEN];
 
 	/* State used to avoid dumping while operating on prof internals. */
-	bool			enq;
-	bool			enq_idump;
-	bool			enq_gdump;
+	bool enq;
+	bool enq_idump;
+	bool enq_gdump;
 
 	/*
 	 * Set to true during an early dump phase for tdata's which are
@@ -188,22 +188,22 @@ struct prof_tdata_s {
 	 * to false so that they aren't accidentally included in later dump
 	 * phases.
 	 */
-	bool			dumping;
+	bool dumping;
 
 	/*
 	 * True if profiling is active for this tdata's thread
 	 * (thread.prof.active mallctl).
 	 */
-	bool			active;
+	bool active;
 
-	bool			attached;
-	bool			expired;
+	bool attached;
+	bool expired;
 
 	/* Temporary storage for summation during dump. */
-	prof_cnt_t		cnt_summed;
+	prof_cnt_t cnt_summed;
 
 	/* Backtrace vector, used for calls to prof_backtrace(). */
-	void 			**vec;
+	void **vec;
 };
 typedef rb_tree(prof_tdata_t) prof_tdata_tree_t;
 
@@ -212,9 +212,9 @@ struct prof_recent_s {
 	nstime_t dalloc_time;
 
 	ql_elm(prof_recent_t) link;
-	size_t size;
-	size_t usize;
-	atomic_p_t alloc_edata; /* NULL means allocation has been freed. */
+	size_t       size;
+	size_t       usize;
+	atomic_p_t   alloc_edata; /* NULL means allocation has been freed. */
 	prof_tctx_t *alloc_tctx;
 	prof_tctx_t *dalloc_tctx;
 };
diff --git a/include/jemalloc/internal/prof_sys.h b/include/jemalloc/internal/prof_sys.h
index 42284b38..0745b991 100644
--- a/include/jemalloc/internal/prof_sys.h
+++ b/include/jemalloc/internal/prof_sys.h
@@ -6,30 +6,30 @@
 #include "jemalloc/internal/mutex.h"
 
 extern malloc_mutex_t prof_dump_filename_mtx;
-extern base_t *prof_base;
+extern base_t        *prof_base;
 
 void bt_init(prof_bt_t *bt, void **vec);
 void prof_backtrace(tsd_t *tsd, prof_bt_t *bt);
 void prof_hooks_init(void);
 void prof_unwind_init(void);
 void prof_sys_thread_name_fetch(tsd_t *tsd);
-int prof_getpid(void);
+int  prof_getpid(void);
 void prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind);
 bool prof_prefix_set(tsdn_t *tsdn, const char *prefix);
 void prof_fdump_impl(tsd_t *tsd);
 void prof_idump_impl(tsd_t *tsd);
 bool prof_mdump_impl(tsd_t *tsd, const char *filename);
 void prof_gdump_impl(tsd_t *tsd);
-int prof_thread_stack_range(uintptr_t fp, uintptr_t *low, uintptr_t *high);
+int  prof_thread_stack_range(uintptr_t fp, uintptr_t *low, uintptr_t *high);
 
 /* Used in unit tests. */
-typedef int (prof_sys_thread_name_read_t)(char *buf, size_t limit);
+typedef int(prof_sys_thread_name_read_t)(char *buf, size_t limit);
 extern prof_sys_thread_name_read_t *JET_MUTABLE prof_sys_thread_name_read;
-typedef int (prof_dump_open_file_t)(const char *, int);
+typedef int(prof_dump_open_file_t)(const char *, int);
 extern prof_dump_open_file_t *JET_MUTABLE prof_dump_open_file;
-typedef ssize_t (prof_dump_write_file_t)(int, const void *, size_t);
+typedef ssize_t(prof_dump_write_file_t)(int, const void *, size_t);
 extern prof_dump_write_file_t *JET_MUTABLE prof_dump_write_file;
-typedef int (prof_dump_open_maps_t)(void);
+typedef int(prof_dump_open_maps_t)(void);
 extern prof_dump_open_maps_t *JET_MUTABLE prof_dump_open_maps;
 
 #endif /* JEMALLOC_INTERNAL_PROF_SYS_H */
diff --git a/include/jemalloc/internal/prof_types.h b/include/jemalloc/internal/prof_types.h
index a27f7fb3..7468885e 100644
--- a/include/jemalloc/internal/prof_types.h
+++ b/include/jemalloc/internal/prof_types.h
@@ -1,22 +1,22 @@
 #ifndef JEMALLOC_INTERNAL_PROF_TYPES_H
 #define JEMALLOC_INTERNAL_PROF_TYPES_H
 
-typedef struct prof_bt_s prof_bt_t;
-typedef struct prof_cnt_s prof_cnt_t;
-typedef struct prof_tctx_s prof_tctx_t;
-typedef struct prof_info_s prof_info_t;
-typedef struct prof_gctx_s prof_gctx_t;
-typedef struct prof_tdata_s prof_tdata_t;
+typedef struct prof_bt_s     prof_bt_t;
+typedef struct prof_cnt_s    prof_cnt_t;
+typedef struct prof_tctx_s   prof_tctx_t;
+typedef struct prof_info_s   prof_info_t;
+typedef struct prof_gctx_s   prof_gctx_t;
+typedef struct prof_tdata_s  prof_tdata_t;
 typedef struct prof_recent_s prof_recent_t;
 
 /* Option defaults. */
 #ifdef JEMALLOC_PROF
-#  define PROF_PREFIX_DEFAULT		"jeprof"
+#	define PROF_PREFIX_DEFAULT "jeprof"
 #else
-#  define PROF_PREFIX_DEFAULT		""
+#	define PROF_PREFIX_DEFAULT ""
 #endif
-#define LG_PROF_SAMPLE_DEFAULT		19
-#define LG_PROF_INTERVAL_DEFAULT	-1
+#define LG_PROF_SAMPLE_DEFAULT 19
+#define LG_PROF_INTERVAL_DEFAULT -1
 
 /*
  * Hard limit on stack backtrace depth.  The version of prof_backtrace() that
@@ -24,54 +24,54 @@ typedef struct prof_recent_s prof_recent_t;
  * of backtrace frame handlers, and should be kept in sync with this setting.
  */
 #ifdef JEMALLOC_PROF_GCC
-#  define PROF_BT_MAX_LIMIT 256
+#	define PROF_BT_MAX_LIMIT 256
 #else
-#  define PROF_BT_MAX_LIMIT UINT_MAX
+#	define PROF_BT_MAX_LIMIT UINT_MAX
 #endif
-#define PROF_BT_MAX_DEFAULT			128
+#define PROF_BT_MAX_DEFAULT 128
 
 /* Initial hash table size. */
-#define PROF_CKH_MINITEMS		64
+#define PROF_CKH_MINITEMS 64
 
 /* Size of memory buffer to use when writing dump files. */
 #ifndef JEMALLOC_PROF
 /* Minimize memory bloat for non-prof builds. */
-#  define PROF_DUMP_BUFSIZE		1
+#	define PROF_DUMP_BUFSIZE 1
 #elif defined(JEMALLOC_DEBUG)
 /* Use a small buffer size in debug build, mainly to facilitate testing. */
-#  define PROF_DUMP_BUFSIZE		16
+#	define PROF_DUMP_BUFSIZE 16
 #else
-#  define PROF_DUMP_BUFSIZE		65536
+#	define PROF_DUMP_BUFSIZE 65536
 #endif
 
 /* Size of size class related tables */
 #ifdef JEMALLOC_PROF
-#  define PROF_SC_NSIZES		SC_NSIZES
+#	define PROF_SC_NSIZES SC_NSIZES
 #else
 /* Minimize memory bloat for non-prof builds. */
-#  define PROF_SC_NSIZES		1
+#	define PROF_SC_NSIZES 1
 #endif
 
 /* Size of stack-allocated buffer used by prof_printf(). */
-#define PROF_PRINTF_BUFSIZE		128
+#define PROF_PRINTF_BUFSIZE 128
 
 /*
  * Number of mutexes shared among all gctx's.  No space is allocated for these
  * unless profiling is enabled, so it's okay to over-provision.
  */
-#define PROF_NCTX_LOCKS			1024
+#define PROF_NCTX_LOCKS 1024
 
 /*
  * Number of mutexes shared among all tdata's.  No space is allocated for these
  * unless profiling is enabled, so it's okay to over-provision.
  */
-#define PROF_NTDATA_LOCKS		256
+#define PROF_NTDATA_LOCKS 256
 
 /* Minimize memory bloat for non-prof builds. */
 #ifdef JEMALLOC_PROF
-#define PROF_DUMP_FILENAME_LEN (PATH_MAX + 1)
+#	define PROF_DUMP_FILENAME_LEN (PATH_MAX + 1)
 #else
-#define PROF_DUMP_FILENAME_LEN 1
+#	define PROF_DUMP_FILENAME_LEN 1
 #endif
 
 /* Default number of recent allocations to record. */
diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index ea608213..3fdecaed 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -90,7 +90,7 @@ struct psset_s {
 	 */
 	hpdata_age_heap_t pageslabs[PSSET_NPSIZES];
 	/* Bitmap for which set bits correspond to non-empty heaps. */
-	fb_group_t pageslab_bitmap[FB_NGROUPS(PSSET_NPSIZES)];
+	fb_group_t    pageslab_bitmap[FB_NGROUPS(PSSET_NPSIZES)];
 	psset_stats_t stats;
 	/*
 	 * Slabs with no active allocations, but which are allowed to serve new
diff --git a/include/jemalloc/internal/ql.h b/include/jemalloc/internal/ql.h
index ebe69988..9c1776a4 100644
--- a/include/jemalloc/internal/ql.h
+++ b/include/jemalloc/internal/ql.h
@@ -28,33 +28,36 @@
  */
 
 /* List definitions. */
-#define ql_head(a_type)							\
-struct {								\
-	a_type *qlh_first;						\
-}
+#define ql_head(a_type)                                                        \
+	struct {                                                               \
+		a_type *qlh_first;                                             \
+	}
 
 /* Static initializer for an empty list. */
-#define ql_head_initializer(a_head) {NULL}
+#define ql_head_initializer(a_head)                                            \
+	{ NULL }
 
 /* The field definition. */
-#define ql_elm(a_type)	qr(a_type)
+#define ql_elm(a_type) qr(a_type)
 
 /* A pointer to the first element in the list, or NULL if the list is empty. */
 #define ql_first(a_head) ((a_head)->qlh_first)
 
 /* Dynamically initializes a list. */
-#define ql_new(a_head) do {						\
-	ql_first(a_head) = NULL;					\
-} while (0)
+#define ql_new(a_head)                                                         \
+	do {                                                                   \
+		ql_first(a_head) = NULL;                                       \
+	} while (0)
 
 /*
  * Sets dest to be the contents of src (overwriting any elements there), leaving
  * src empty.
  */
-#define ql_move(a_head_dest, a_head_src) do {				\
-	ql_first(a_head_dest) = ql_first(a_head_src);			\
-	ql_new(a_head_src);						\
-} while (0)
+#define ql_move(a_head_dest, a_head_src)                                       \
+	do {                                                                   \
+		ql_first(a_head_dest) = ql_first(a_head_src);                  \
+		ql_new(a_head_src);                                            \
+	} while (0)
 
 /* True if the list is empty, otherwise false. */
 #define ql_empty(a_head) (ql_first(a_head) == NULL)
@@ -68,85 +71,91 @@ struct {								\
 /*
  * Obtains the last item in the list.
  */
-#define ql_last(a_head, a_field)					\
+#define ql_last(a_head, a_field)                                               \
 	(ql_empty(a_head) ? NULL : qr_prev(ql_first(a_head), a_field))
 
 /*
  * Gets a pointer to the next/prev element in the list.  Trying to advance past
  * the end or retreat before the beginning of the list returns NULL.
  */
-#define ql_next(a_head, a_elm, a_field)					\
-	((ql_last(a_head, a_field) != (a_elm))				\
-	    ? qr_next((a_elm), a_field)	: NULL)
-#define ql_prev(a_head, a_elm, a_field)					\
-	((ql_first(a_head) != (a_elm)) ? qr_prev((a_elm), a_field)	\
-				       : NULL)
+#define ql_next(a_head, a_elm, a_field)                                        \
+	((ql_last(a_head, a_field) != (a_elm)) ? qr_next((a_elm), a_field)     \
+	                                       : NULL)
+#define ql_prev(a_head, a_elm, a_field)                                        \
+	((ql_first(a_head) != (a_elm)) ? qr_prev((a_elm), a_field) : NULL)
 
 /* Inserts a_elm before a_qlelm in the list. */
-#define ql_before_insert(a_head, a_qlelm, a_elm, a_field) do {		\
-	qr_before_insert((a_qlelm), (a_elm), a_field);			\
-	if (ql_first(a_head) == (a_qlelm)) {				\
-		ql_first(a_head) = (a_elm);				\
-	}								\
-} while (0)
+#define ql_before_insert(a_head, a_qlelm, a_elm, a_field)                      \
+	do {                                                                   \
+		qr_before_insert((a_qlelm), (a_elm), a_field);                 \
+		if (ql_first(a_head) == (a_qlelm)) {                           \
+			ql_first(a_head) = (a_elm);                            \
+		}                                                              \
+	} while (0)
 
 /* Inserts a_elm after a_qlelm in the list. */
-#define ql_after_insert(a_qlelm, a_elm, a_field)			\
+#define ql_after_insert(a_qlelm, a_elm, a_field)                               \
 	qr_after_insert((a_qlelm), (a_elm), a_field)
 
 /* Inserts a_elm as the first item in the list. */
-#define ql_head_insert(a_head, a_elm, a_field) do {			\
-	if (!ql_empty(a_head)) {					\
-		qr_before_insert(ql_first(a_head), (a_elm), a_field);	\
-	}								\
-	ql_first(a_head) = (a_elm);					\
-} while (0)
+#define ql_head_insert(a_head, a_elm, a_field)                                 \
+	do {                                                                   \
+		if (!ql_empty(a_head)) {                                       \
+			qr_before_insert(ql_first(a_head), (a_elm), a_field);  \
+		}                                                              \
+		ql_first(a_head) = (a_elm);                                    \
+	} while (0)
 
 /* Inserts a_elm as the last item in the list. */
-#define ql_tail_insert(a_head, a_elm, a_field) do {			\
-	if (!ql_empty(a_head)) {					\
-		qr_before_insert(ql_first(a_head), (a_elm), a_field);	\
-	}								\
-	ql_first(a_head) = qr_next((a_elm), a_field);			\
-} while (0)
+#define ql_tail_insert(a_head, a_elm, a_field)                                 \
+	do {                                                                   \
+		if (!ql_empty(a_head)) {                                       \
+			qr_before_insert(ql_first(a_head), (a_elm), a_field);  \
+		}                                                              \
+		ql_first(a_head) = qr_next((a_elm), a_field);                  \
+	} while (0)
 
 /*
  * Given lists a = [a_1, ..., a_n] and [b_1, ..., b_n], results in:
  * a = [a1, ..., a_n, b_1, ..., b_n] and b = [].
  */
-#define ql_concat(a_head_a, a_head_b, a_field) do {			\
-	if (ql_empty(a_head_a)) {					\
-		ql_move(a_head_a, a_head_b);				\
-	} else if (!ql_empty(a_head_b)) {				\
-		qr_meld(ql_first(a_head_a), ql_first(a_head_b),		\
-		    a_field);						\
-		ql_new(a_head_b);					\
-	}								\
-} while (0)
+#define ql_concat(a_head_a, a_head_b, a_field)                                 \
+	do {                                                                   \
+		if (ql_empty(a_head_a)) {                                      \
+			ql_move(a_head_a, a_head_b);                           \
+		} else if (!ql_empty(a_head_b)) {                              \
+			qr_meld(                                               \
+			    ql_first(a_head_a), ql_first(a_head_b), a_field);  \
+			ql_new(a_head_b);                                      \
+		}                                                              \
+	} while (0)
 
 /* Removes a_elm from the list. */
-#define ql_remove(a_head, a_elm, a_field) do {				\
-	if (ql_first(a_head) == (a_elm)) {				\
-		ql_first(a_head) = qr_next(ql_first(a_head), a_field);	\
-	}								\
-	if (ql_first(a_head) != (a_elm)) {				\
-		qr_remove((a_elm), a_field);				\
-	} else {							\
-		ql_new(a_head);						\
-	}								\
-} while (0)
+#define ql_remove(a_head, a_elm, a_field)                                      \
+	do {                                                                   \
+		if (ql_first(a_head) == (a_elm)) {                             \
+			ql_first(a_head) = qr_next(ql_first(a_head), a_field); \
+		}                                                              \
+		if (ql_first(a_head) != (a_elm)) {                             \
+			qr_remove((a_elm), a_field);                           \
+		} else {                                                       \
+			ql_new(a_head);                                        \
+		}                                                              \
+	} while (0)
 
 /* Removes the first item in the list. */
-#define ql_head_remove(a_head, a_type, a_field) do {			\
-	a_type *t = ql_first(a_head);					\
-	ql_remove((a_head), t, a_field);				\
-} while (0)
+#define ql_head_remove(a_head, a_type, a_field)                                \
+	do {                                                                   \
+		a_type *t = ql_first(a_head);                                  \
+		ql_remove((a_head), t, a_field);                               \
+	} while (0)
 
 /* Removes the last item in the list. */
-#define ql_tail_remove(a_head, a_type, a_field) do {			\
-	a_type *t = ql_last(a_head, a_field);				\
-	ql_remove((a_head), t, a_field);				\
-} while (0)
+#define ql_tail_remove(a_head, a_type, a_field)                                \
+	do {                                                                   \
+		a_type *t = ql_last(a_head, a_field);                          \
+		ql_remove((a_head), t, a_field);                               \
+	} while (0)
 
 /*
  * Given a = [a_1, a_2, ..., a_n-1, a_n, a_n+1, ...],
@@ -155,14 +164,15 @@ struct {								\
  * and replaces b's contents with:
  *   b = [a_n, a_n+1, ...]
  */
-#define ql_split(a_head_a, a_elm, a_head_b, a_field) do {		\
-	if (ql_first(a_head_a) == (a_elm)) {				\
-		ql_move(a_head_b, a_head_a);				\
-	} else {							\
-		qr_split(ql_first(a_head_a), (a_elm), a_field);		\
-		ql_first(a_head_b) = (a_elm);				\
-	}								\
-} while (0)
+#define ql_split(a_head_a, a_elm, a_head_b, a_field)                           \
+	do {                                                                   \
+		if (ql_first(a_head_a) == (a_elm)) {                           \
+			ql_move(a_head_b, a_head_a);                           \
+		} else {                                                       \
+			qr_split(ql_first(a_head_a), (a_elm), a_field);        \
+			ql_first(a_head_b) = (a_elm);                          \
+		}                                                              \
+	} while (0)
 
 /*
  * An optimized version of:
@@ -170,9 +180,10 @@ struct {								\
  *	ql_remove((a_head), t, a_field);
  *	ql_tail_insert((a_head), t, a_field);
  */
-#define ql_rotate(a_head, a_field) do {					\
-	ql_first(a_head) = qr_next(ql_first(a_head), a_field);		\
-} while (0)
+#define ql_rotate(a_head, a_field)                                             \
+	do {                                                                   \
+		ql_first(a_head) = qr_next(ql_first(a_head), a_field);         \
+	} while (0)
 
 /*
  * Helper macro to iterate over each element in a list in order, starting from
@@ -189,10 +200,10 @@ struct {								\
  * }
  */
 
-#define ql_foreach(a_var, a_head, a_field)				\
-	qr_foreach((a_var), ql_first(a_head), a_field)
+#define ql_foreach(a_var, a_head, a_field)                                     \
+	qr_foreach ((a_var), ql_first(a_head), a_field)
 
-#define ql_reverse_foreach(a_var, a_head, a_field)			\
+#define ql_reverse_foreach(a_var, a_head, a_field)                             \
 	qr_reverse_foreach((a_var), ql_first(a_head), a_field)
 
 #endif /* JEMALLOC_INTERNAL_QL_H */
diff --git a/include/jemalloc/internal/qr.h b/include/jemalloc/internal/qr.h
index ece4f556..1bd61f38 100644
--- a/include/jemalloc/internal/qr.h
+++ b/include/jemalloc/internal/qr.h
@@ -17,21 +17,22 @@
  */
 
 /* Ring definitions. */
-#define qr(a_type)							\
-struct {								\
-	a_type	*qre_next;						\
-	a_type	*qre_prev;						\
-}
+#define qr(a_type)                                                             \
+	struct {                                                               \
+		a_type *qre_next;                                              \
+		a_type *qre_prev;                                              \
+	}
 
 /*
  * Initialize a qr link.  Every link must be initialized before being used, even
  * if that initialization is going to be immediately overwritten (say, by being
  * passed into an insertion macro).
  */
-#define qr_new(a_qr, a_field) do {					\
-	(a_qr)->a_field.qre_next = (a_qr);				\
-	(a_qr)->a_field.qre_prev = (a_qr);				\
-} while (0)
+#define qr_new(a_qr, a_field)                                                  \
+	do {                                                                   \
+		(a_qr)->a_field.qre_next = (a_qr);                             \
+		(a_qr)->a_field.qre_prev = (a_qr);                             \
+	} while (0)
 
 /*
  * Go forwards or backwards in the ring.  Note that (the ring being circular), this
@@ -58,26 +59,27 @@ struct {								\
  *
  * a_qr_a can directly be a qr_next() macro, but a_qr_b cannot.
  */
-#define qr_meld(a_qr_a, a_qr_b, a_field) do {				\
-	(a_qr_b)->a_field.qre_prev->a_field.qre_next =			\
-	    (a_qr_a)->a_field.qre_prev;					\
-	(a_qr_a)->a_field.qre_prev = (a_qr_b)->a_field.qre_prev;	\
-	(a_qr_b)->a_field.qre_prev =					\
-	    (a_qr_b)->a_field.qre_prev->a_field.qre_next;		\
-	(a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_a);	\
-	(a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_b);	\
-} while (0)
+#define qr_meld(a_qr_a, a_qr_b, a_field)                                       \
+	do {                                                                   \
+		(a_qr_b)->a_field.qre_prev->a_field.qre_next =                 \
+		    (a_qr_a)->a_field.qre_prev;                                \
+		(a_qr_a)->a_field.qre_prev = (a_qr_b)->a_field.qre_prev;       \
+		(a_qr_b)->a_field.qre_prev =                                   \
+		    (a_qr_b)->a_field.qre_prev->a_field.qre_next;              \
+		(a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_a);       \
+		(a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_b);       \
+	} while (0)
 
 /*
  * Logically, this is just a meld.  The intent, though, is that a_qrelm is a
  * single-element ring, so that "before" has a more obvious interpretation than
  * meld.
  */
-#define qr_before_insert(a_qrelm, a_qr, a_field)			\
+#define qr_before_insert(a_qrelm, a_qr, a_field)                               \
 	qr_meld((a_qrelm), (a_qr), a_field)
 
 /* Ditto, but inserting after rather than before. */
-#define qr_after_insert(a_qrelm, a_qr, a_field)				\
+#define qr_after_insert(a_qrelm, a_qr, a_field)                                \
 	qr_before_insert(qr_next(a_qrelm, a_field), (a_qr), a_field)
 
 /*
@@ -98,14 +100,13 @@ struct {								\
  * qr_meld() and qr_split() are functionally equivalent, so there's no need to
  * have two copies of the code.
  */
-#define qr_split(a_qr_a, a_qr_b, a_field)				\
-	qr_meld((a_qr_a), (a_qr_b), a_field)
+#define qr_split(a_qr_a, a_qr_b, a_field) qr_meld((a_qr_a), (a_qr_b), a_field)
 
 /*
  * Splits off a_qr from the rest of its ring, so that it becomes a
  * single-element ring.
  */
-#define qr_remove(a_qr, a_field)					\
+#define qr_remove(a_qr, a_field)                                               \
 	qr_split(qr_next(a_qr, a_field), (a_qr), a_field)
 
 /*
@@ -121,20 +122,19 @@ struct {								\
  *   return sum;
  * }
  */
-#define qr_foreach(var, a_qr, a_field)					\
-	for ((var) = (a_qr);						\
-	    (var) != NULL;						\
-	    (var) = (((var)->a_field.qre_next != (a_qr))		\
-	    ? (var)->a_field.qre_next : NULL))
+#define qr_foreach(var, a_qr, a_field)                                         \
+	for ((var) = (a_qr); (var) != NULL;                                    \
+	     (var) = (((var)->a_field.qre_next != (a_qr))                      \
+	             ? (var)->a_field.qre_next                                 \
+	             : NULL))
 
 /*
  * The same (and with the same usage) as qr_foreach, but in the opposite order,
  * ending with a_qr.
  */
-#define qr_reverse_foreach(var, a_qr, a_field)				\
-	for ((var) = ((a_qr) != NULL) ? qr_prev(a_qr, a_field) : NULL;	\
-	    (var) != NULL;						\
-	    (var) = (((var) != (a_qr))					\
-	    ? (var)->a_field.qre_prev : NULL))
+#define qr_reverse_foreach(var, a_qr, a_field)                                 \
+	for ((var) = ((a_qr) != NULL) ? qr_prev(a_qr, a_field) : NULL;         \
+	     (var) != NULL;                                                    \
+	     (var) = (((var) != (a_qr)) ? (var)->a_field.qre_prev : NULL))
 
 #endif /* JEMALLOC_INTERNAL_QR_H */
diff --git a/include/jemalloc/internal/quantum.h b/include/jemalloc/internal/quantum.h
index b4beb309..2f7c0466 100644
--- a/include/jemalloc/internal/quantum.h
+++ b/include/jemalloc/internal/quantum.h
@@ -6,82 +6,84 @@
  * classes).
  */
 #ifndef LG_QUANTUM
-#  if (defined(__i386__) || defined(_M_IX86))
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __ia64__
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __alpha__
-#    define LG_QUANTUM		4
-#  endif
-#  if (defined(__sparc64__) || defined(__sparcv9) || defined(__sparc_v9__))
-#    define LG_QUANTUM		4
-#  endif
-#  if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64))
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __arm__
-#    define LG_QUANTUM		3
-#  endif
-#  if defined(__aarch64__) || defined(_M_ARM64)
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __hppa__
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __loongarch__
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __m68k__
-#    define LG_QUANTUM		3
-#  endif
-#  ifdef __mips__
-#    if defined(__mips_n32) || defined(__mips_n64)
-#      define LG_QUANTUM		4
-#    else
-#      define LG_QUANTUM		3
-#    endif
-#  endif
-#  ifdef __nios2__
-#    define LG_QUANTUM		3
-#  endif
-#  ifdef __or1k__
-#    define LG_QUANTUM		3
-#  endif
-#  if defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__) || defined(__ppc64__)
-#    define LG_QUANTUM		4
-#  endif
-#  if defined(__riscv) || defined(__riscv__)
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __s390__
-#    define LG_QUANTUM		4
-#  endif
-#  if (defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || \
-	defined(__SH4_SINGLE_ONLY__))
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __tile__
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __le32__
-#    define LG_QUANTUM		4
-#  endif
-#  ifdef __arc__
-#    define LG_QUANTUM		3
-#  endif
-#  ifndef LG_QUANTUM
-#    error "Unknown minimum alignment for architecture; specify via "
-	 "--with-lg-quantum"
-#  endif
+#	if (defined(__i386__) || defined(_M_IX86))
+#		define LG_QUANTUM 4
+#	endif
+#	ifdef __ia64__
+#		define LG_QUANTUM 4
+#	endif
+#	ifdef __alpha__
+#		define LG_QUANTUM 4
+#	endif
+#	if (defined(__sparc64__) || defined(__sparcv9)                        \
+	    || defined(__sparc_v9__))
+#		define LG_QUANTUM 4
+#	endif
+#	if (defined(__amd64__) || defined(__x86_64__) || defined(_M_X64))
+#		define LG_QUANTUM 4
+#	endif
+#	ifdef __arm__
+#		define LG_QUANTUM 3
+#	endif
+#	if defined(__aarch64__) || defined(_M_ARM64)
+#		define LG_QUANTUM 4
+#	endif
+#	ifdef __hppa__
+#		define LG_QUANTUM 4
+#	endif
+#	ifdef __loongarch__
+#		define LG_QUANTUM 4
+#	endif
+#	ifdef __m68k__
+#		define LG_QUANTUM 3
+#	endif
+#	ifdef __mips__
+#		if defined(__mips_n32) || defined(__mips_n64)
+#			define LG_QUANTUM 4
+#		else
+#			define LG_QUANTUM 3
+#		endif
+#	endif
+#	ifdef __nios2__
+#		define LG_QUANTUM 3
+#	endif
+#	ifdef __or1k__
+#		define LG_QUANTUM 3
+#	endif
+#	if defined(__powerpc__) || defined(__ppc__) || defined(__powerpc64__) \
+	    || defined(__ppc64__)
+#		define LG_QUANTUM 4
+#	endif
+#	if defined(__riscv) || defined(__riscv__)
+#		define LG_QUANTUM 4
+#	endif
+#	ifdef __s390__
+#		define LG_QUANTUM 4
+#	endif
+#	if (defined(__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__)  \
+	    || defined(__SH4_SINGLE_ONLY__))
+#		define LG_QUANTUM 4
+#	endif
+#	ifdef __tile__
+#		define LG_QUANTUM 4
+#	endif
+#	ifdef __le32__
+#		define LG_QUANTUM 4
+#	endif
+#	ifdef __arc__
+#		define LG_QUANTUM 3
+#	endif
+#	ifndef LG_QUANTUM
+#		error                                                          \
+		    "Unknown minimum alignment for architecture; specify via "
+"--with-lg-quantum"
+#	endif
 #endif
 
-#define QUANTUM			((size_t)(1U << LG_QUANTUM))
-#define QUANTUM_MASK		(QUANTUM - 1)
+#define QUANTUM ((size_t)(1U << LG_QUANTUM))
+#define QUANTUM_MASK (QUANTUM - 1)
 
 /* Return the smallest quantum multiple that is >= a. */
-#define QUANTUM_CEILING(a)						\
-	(((a) + QUANTUM_MASK) & ~QUANTUM_MASK)
+#define QUANTUM_CEILING(a) (((a) + QUANTUM_MASK) & ~QUANTUM_MASK)
 
 #endif /* JEMALLOC_INTERNAL_QUANTUM_H */
diff --git a/include/jemalloc/internal/rb.h b/include/jemalloc/internal/rb.h
index 235d548e..58510e4d 100644
--- a/include/jemalloc/internal/rb.h
+++ b/include/jemalloc/internal/rb.h
@@ -26,7 +26,7 @@
  */
 
 #ifndef __PGI
-#define RB_COMPACT
+#	define RB_COMPACT
 #endif
 
 /*
diff --git a/include/jemalloc/internal/rtree.h b/include/jemalloc/internal/rtree.h
index f35368ae..07205958 100644
--- a/include/jemalloc/internal/rtree.h
+++ b/include/jemalloc/internal/rtree.h
@@ -18,48 +18,49 @@
  */
 
 /* Number of high insignificant bits. */
-#define RTREE_NHIB ((1U << (LG_SIZEOF_PTR+3)) - LG_VADDR)
+#define RTREE_NHIB ((1U << (LG_SIZEOF_PTR + 3)) - LG_VADDR)
 /* Number of low insigificant bits. */
 #define RTREE_NLIB LG_PAGE
 /* Number of significant bits. */
 #define RTREE_NSB (LG_VADDR - RTREE_NLIB)
 /* Number of levels in radix tree. */
 #if RTREE_NSB <= 10
-#  define RTREE_HEIGHT 1
+#	define RTREE_HEIGHT 1
 #elif RTREE_NSB <= 36
-#  define RTREE_HEIGHT 2
+#	define RTREE_HEIGHT 2
 #elif RTREE_NSB <= 52
-#  define RTREE_HEIGHT 3
+#	define RTREE_HEIGHT 3
 #else
-#  error Unsupported number of significant virtual address bits
+#	error Unsupported number of significant virtual address bits
 #endif
 /* Use compact leaf representation if virtual address encoding allows. */
 #if RTREE_NHIB >= LG_CEIL(SC_NSIZES)
-#  define RTREE_LEAF_COMPACT
+#	define RTREE_LEAF_COMPACT
 #endif
 
 typedef struct rtree_node_elm_s rtree_node_elm_t;
 struct rtree_node_elm_s {
-	atomic_p_t	child; /* (rtree_{node,leaf}_elm_t *) */
+	atomic_p_t child; /* (rtree_{node,leaf}_elm_t *) */
 };
 
 typedef struct rtree_metadata_s rtree_metadata_t;
 struct rtree_metadata_s {
-	szind_t szind;
-	extent_state_t state; /* Mirrors edata->state. */
-	bool is_head; /* Mirrors edata->is_head. */
-	bool slab;
+	szind_t        szind;
+	extent_state_t state;   /* Mirrors edata->state. */
+	bool           is_head; /* Mirrors edata->is_head. */
+	bool           slab;
 };
 
 typedef struct rtree_contents_s rtree_contents_t;
 struct rtree_contents_s {
-	edata_t *edata;
+	edata_t         *edata;
 	rtree_metadata_t metadata;
 };
 
 #define RTREE_LEAF_STATE_WIDTH EDATA_BITS_STATE_WIDTH
 #define RTREE_LEAF_STATE_SHIFT 2
-#define RTREE_LEAF_STATE_MASK MASK(RTREE_LEAF_STATE_WIDTH, RTREE_LEAF_STATE_SHIFT)
+#define RTREE_LEAF_STATE_MASK                                                  \
+	MASK(RTREE_LEAF_STATE_WIDTH, RTREE_LEAF_STATE_SHIFT)
 
 struct rtree_leaf_elm_s {
 #ifdef RTREE_LEAF_COMPACT
@@ -77,36 +78,36 @@ struct rtree_leaf_elm_s {
 	 *
 	 *   00000000 xxxxxxxx eeeeeeee [...] eeeeeeee e00ssshb
 	 */
-	atomic_p_t	le_bits;
+	atomic_p_t le_bits;
 #else
-	atomic_p_t	le_edata; /* (edata_t *) */
+	atomic_p_t le_edata; /* (edata_t *) */
 	/*
 	 * From high to low bits: szind (8 bits), state (4 bits), is_head, slab
 	 */
-	atomic_u_t	le_metadata;
+	atomic_u_t le_metadata;
 #endif
 };
 
 typedef struct rtree_level_s rtree_level_t;
 struct rtree_level_s {
 	/* Number of key bits distinguished by this level. */
-	unsigned		bits;
+	unsigned bits;
 	/*
 	 * Cumulative number of key bits distinguished by traversing to
 	 * corresponding tree level.
 	 */
-	unsigned		cumbits;
+	unsigned cumbits;
 };
 
 typedef struct rtree_s rtree_t;
 struct rtree_s {
-	base_t			*base;
-	malloc_mutex_t		init_lock;
+	base_t        *base;
+	malloc_mutex_t init_lock;
 	/* Number of elements based on rtree_levels[0].bits. */
 #if RTREE_HEIGHT > 1
-	rtree_node_elm_t	root[1U << (RTREE_NSB/RTREE_HEIGHT)];
+	rtree_node_elm_t root[1U << (RTREE_NSB / RTREE_HEIGHT)];
 #else
-	rtree_leaf_elm_t	root[1U << (RTREE_NSB/RTREE_HEIGHT)];
+	rtree_leaf_elm_t root[1U << (RTREE_NSB / RTREE_HEIGHT)];
 #endif
 };
 
@@ -118,17 +119,17 @@ struct rtree_s {
  */
 static const rtree_level_t rtree_levels[] = {
 #if RTREE_HEIGHT == 1
-	{RTREE_NSB, RTREE_NHIB + RTREE_NSB}
+    {RTREE_NSB, RTREE_NHIB + RTREE_NSB}
 #elif RTREE_HEIGHT == 2
-	{RTREE_NSB/2, RTREE_NHIB + RTREE_NSB/2},
-	{RTREE_NSB/2 + RTREE_NSB%2, RTREE_NHIB + RTREE_NSB}
+    {RTREE_NSB / 2, RTREE_NHIB + RTREE_NSB / 2},
+    {RTREE_NSB / 2 + RTREE_NSB % 2, RTREE_NHIB + RTREE_NSB}
 #elif RTREE_HEIGHT == 3
-	{RTREE_NSB/3, RTREE_NHIB + RTREE_NSB/3},
-	{RTREE_NSB/3 + RTREE_NSB%3/2,
-	    RTREE_NHIB + RTREE_NSB/3*2 + RTREE_NSB%3/2},
-	{RTREE_NSB/3 + RTREE_NSB%3 - RTREE_NSB%3/2, RTREE_NHIB + RTREE_NSB}
+    {RTREE_NSB / 3, RTREE_NHIB + RTREE_NSB / 3},
+    {RTREE_NSB / 3 + RTREE_NSB % 3 / 2,
+        RTREE_NHIB + RTREE_NSB / 3 * 2 + RTREE_NSB % 3 / 2},
+    {RTREE_NSB / 3 + RTREE_NSB % 3 - RTREE_NSB % 3 / 2, RTREE_NHIB + RTREE_NSB}
 #else
-#  error Unsupported rtree height
+#	error Unsupported rtree height
 #endif
 };
 
@@ -139,9 +140,9 @@ rtree_leaf_elm_t *rtree_leaf_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree,
 
 JEMALLOC_ALWAYS_INLINE unsigned
 rtree_leaf_maskbits(void) {
-	unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3);
-	unsigned cumbits = (rtree_levels[RTREE_HEIGHT-1].cumbits -
-	    rtree_levels[RTREE_HEIGHT-1].bits);
+	unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR + 3);
+	unsigned cumbits = (rtree_levels[RTREE_HEIGHT - 1].cumbits
+	    - rtree_levels[RTREE_HEIGHT - 1].bits);
 	return ptrbits - cumbits;
 }
 
@@ -153,16 +154,16 @@ rtree_leafkey(uintptr_t key) {
 
 JEMALLOC_ALWAYS_INLINE size_t
 rtree_cache_direct_map(uintptr_t key) {
-	return (size_t)((key >> rtree_leaf_maskbits()) &
-	    (RTREE_CTX_NCACHE - 1));
+	return (
+	    size_t)((key >> rtree_leaf_maskbits()) & (RTREE_CTX_NCACHE - 1));
 }
 
 JEMALLOC_ALWAYS_INLINE uintptr_t
 rtree_subkey(uintptr_t key, unsigned level) {
-	unsigned ptrbits = ZU(1) << (LG_SIZEOF_PTR+3);
-	unsigned cumbits = rtree_levels[level].cumbits;
-	unsigned shiftbits = ptrbits - cumbits;
-	unsigned maskbits = rtree_levels[level].bits;
+	unsigned  ptrbits = ZU(1) << (LG_SIZEOF_PTR + 3);
+	unsigned  cumbits = rtree_levels[level].cumbits;
+	unsigned  shiftbits = ptrbits - cumbits;
+	unsigned  maskbits = rtree_levels[level].bits;
 	uintptr_t mask = (ZU(1) << maskbits) - 1;
 	return ((key >> shiftbits) & mask);
 }
@@ -178,12 +179,12 @@ rtree_subkey(uintptr_t key, unsigned level) {
  *             dependent on a previous rtree write, which means a stale read
  *             could result if synchronization were omitted here.
  */
-#  ifdef RTREE_LEAF_COMPACT
+#ifdef RTREE_LEAF_COMPACT
 JEMALLOC_ALWAYS_INLINE uintptr_t
-rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, bool dependent) {
-	return (uintptr_t)atomic_load_p(&elm->le_bits, dependent
-	    ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
+rtree_leaf_elm_bits_read(
+    tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, bool dependent) {
+	return (uintptr_t)atomic_load_p(
+	    &elm->le_bits, dependent ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
 }
 
 JEMALLOC_ALWAYS_INLINE uintptr_t
@@ -195,10 +196,10 @@ rtree_leaf_elm_bits_encode(rtree_contents_t contents) {
 	uintptr_t szind_bits = (uintptr_t)contents.metadata.szind << LG_VADDR;
 	uintptr_t slab_bits = (uintptr_t)contents.metadata.slab;
 	uintptr_t is_head_bits = (uintptr_t)contents.metadata.is_head << 1;
-	uintptr_t state_bits = (uintptr_t)contents.metadata.state <<
-	    RTREE_LEAF_STATE_SHIFT;
-	uintptr_t metadata_bits = szind_bits | state_bits | is_head_bits |
-	    slab_bits;
+	uintptr_t state_bits = (uintptr_t)contents.metadata.state
+	    << RTREE_LEAF_STATE_SHIFT;
+	uintptr_t metadata_bits = szind_bits | state_bits | is_head_bits
+	    | slab_bits;
 	assert((edata_bits & metadata_bits) == 0);
 
 	return edata_bits | metadata_bits;
@@ -212,13 +213,13 @@ rtree_leaf_elm_bits_decode(uintptr_t bits) {
 	contents.metadata.slab = (bool)(bits & 1);
 	contents.metadata.is_head = (bool)(bits & (1 << 1));
 
-	uintptr_t state_bits = (bits & RTREE_LEAF_STATE_MASK) >>
-	    RTREE_LEAF_STATE_SHIFT;
+	uintptr_t state_bits = (bits & RTREE_LEAF_STATE_MASK)
+	    >> RTREE_LEAF_STATE_SHIFT;
 	assert(state_bits <= extent_state_max);
 	contents.metadata.state = (extent_state_t)state_bits;
 
 	uintptr_t low_bit_mask = ~((uintptr_t)EDATA_ALIGNMENT - 1);
-#    ifdef __aarch64__
+#	ifdef __aarch64__
 	/*
 	 * aarch64 doesn't sign extend the highest virtual address bit to set
 	 * the higher ones.  Instead, the high bits get zeroed.
@@ -228,49 +229,50 @@ rtree_leaf_elm_bits_decode(uintptr_t bits) {
 	uintptr_t mask = high_bit_mask & low_bit_mask;
 	/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 	contents.edata = (edata_t *)(bits & mask);
-#    else
+#	else
 	/* Restore sign-extended high bits, mask metadata bits. */
 	/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 	contents.edata = (edata_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB)
-	    >> RTREE_NHIB) & low_bit_mask);
-#    endif
+	                                 >> RTREE_NHIB)
+	    & low_bit_mask);
+#	endif
 	assert((uintptr_t)contents.edata % (uintptr_t)EDATA_ALIGNMENT == 0);
 	return contents;
 }
 
-#  endif /* RTREE_LEAF_COMPACT */
+#endif /* RTREE_LEAF_COMPACT */
 
 JEMALLOC_ALWAYS_INLINE rtree_contents_t
-rtree_leaf_elm_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
-    bool dependent) {
+rtree_leaf_elm_read(
+    tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, bool dependent) {
 #ifdef RTREE_LEAF_COMPACT
 	uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent);
 	rtree_contents_t contents = rtree_leaf_elm_bits_decode(bits);
 	return contents;
 #else
 	rtree_contents_t contents;
-	unsigned metadata_bits = atomic_load_u(&elm->le_metadata, dependent
-	    ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
+	unsigned         metadata_bits = atomic_load_u(
+            &elm->le_metadata, dependent ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
 	contents.metadata.slab = (bool)(metadata_bits & 1);
 	contents.metadata.is_head = (bool)(metadata_bits & (1 << 1));
 
-	uintptr_t state_bits = (metadata_bits & RTREE_LEAF_STATE_MASK) >>
-	    RTREE_LEAF_STATE_SHIFT;
+	uintptr_t state_bits = (metadata_bits & RTREE_LEAF_STATE_MASK)
+	    >> RTREE_LEAF_STATE_SHIFT;
 	assert(state_bits <= extent_state_max);
 	contents.metadata.state = (extent_state_t)state_bits;
-	contents.metadata.szind = metadata_bits >> (RTREE_LEAF_STATE_SHIFT +
-	    RTREE_LEAF_STATE_WIDTH);
+	contents.metadata.szind = metadata_bits
+	    >> (RTREE_LEAF_STATE_SHIFT + RTREE_LEAF_STATE_WIDTH);
 
-	contents.edata = (edata_t *)atomic_load_p(&elm->le_edata, dependent
-	    ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
+	contents.edata = (edata_t *)atomic_load_p(
+	    &elm->le_edata, dependent ? ATOMIC_RELAXED : ATOMIC_ACQUIRE);
 
 	return contents;
 #endif
 }
 
 JEMALLOC_ALWAYS_INLINE void
-rtree_contents_encode(rtree_contents_t contents, void **bits,
-    unsigned *additional) {
+rtree_contents_encode(
+    rtree_contents_t contents, void **bits, unsigned *additional) {
 #ifdef RTREE_LEAF_COMPACT
 	/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 	*bits = (void *)rtree_leaf_elm_bits_encode(contents);
@@ -282,15 +284,15 @@ rtree_contents_encode(rtree_contents_t contents, void **bits,
 	*additional = (unsigned)contents.metadata.slab
 	    | ((unsigned)contents.metadata.is_head << 1)
 	    | ((unsigned)contents.metadata.state << RTREE_LEAF_STATE_SHIFT)
-	    | ((unsigned)contents.metadata.szind << (RTREE_LEAF_STATE_SHIFT +
-	    RTREE_LEAF_STATE_WIDTH));
+	    | ((unsigned)contents.metadata.szind
+	        << (RTREE_LEAF_STATE_SHIFT + RTREE_LEAF_STATE_WIDTH));
 	*bits = contents.edata;
 #endif
 }
 
 JEMALLOC_ALWAYS_INLINE void
-rtree_leaf_elm_write_commit(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, void *bits, unsigned additional) {
+rtree_leaf_elm_write_commit(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
+    void *bits, unsigned additional) {
 #ifdef RTREE_LEAF_COMPACT
 	atomic_store_p(&elm->le_bits, bits, ATOMIC_RELEASE);
 #else
@@ -304,10 +306,10 @@ rtree_leaf_elm_write_commit(tsdn_t *tsdn, rtree_t *rtree,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree,
-    rtree_leaf_elm_t *elm, rtree_contents_t contents) {
+rtree_leaf_elm_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm,
+    rtree_contents_t contents) {
 	assert((uintptr_t)contents.edata % EDATA_ALIGNMENT == 0);
-	void *bits;
+	void    *bits;
 	unsigned additional;
 	rtree_contents_encode(contents, &bits, &additional);
 	rtree_leaf_elm_write_commit(tsdn, rtree, elm, bits, additional);
@@ -348,7 +350,7 @@ rtree_leaf_elm_state_update(tsdn_t *tsdn, rtree_t *rtree,
 JEMALLOC_ALWAYS_INLINE bool
 rtree_leaf_elm_lookup_fast(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
     uintptr_t key, rtree_leaf_elm_t **elm) {
-	size_t slot = rtree_cache_direct_map(key);
+	size_t    slot = rtree_cache_direct_map(key);
 	uintptr_t leafkey = rtree_leafkey(key);
 	assert(leafkey != RTREE_LEAFKEY_INVALID);
 
@@ -358,7 +360,7 @@ rtree_leaf_elm_lookup_fast(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 
 	rtree_leaf_elm_t *leaf = rtree_ctx->cache[slot].leaf;
 	assert(leaf != NULL);
-	uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1);
+	uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT - 1);
 	*elm = &leaf[subkey];
 
 	return false;
@@ -370,7 +372,7 @@ rtree_leaf_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	assert(key != 0);
 	assert(!dependent || !init_missing);
 
-	size_t slot = rtree_cache_direct_map(key);
+	size_t    slot = rtree_cache_direct_map(key);
 	uintptr_t leafkey = rtree_leafkey(key);
 	assert(leafkey != RTREE_LEAFKEY_INVALID);
 
@@ -378,39 +380,41 @@ rtree_leaf_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	if (likely(rtree_ctx->cache[slot].leafkey == leafkey)) {
 		rtree_leaf_elm_t *leaf = rtree_ctx->cache[slot].leaf;
 		assert(leaf != NULL);
-		uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1);
+		uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT - 1);
 		return &leaf[subkey];
 	}
 	/*
 	 * Search the L2 LRU cache.  On hit, swap the matching element into the
 	 * slot in L1 cache, and move the position in L2 up by 1.
 	 */
-#define RTREE_CACHE_CHECK_L2(i) do {					\
-	if (likely(rtree_ctx->l2_cache[i].leafkey == leafkey)) {	\
-		rtree_leaf_elm_t *leaf = rtree_ctx->l2_cache[i].leaf;	\
-		assert(leaf != NULL);					\
-		if (i > 0) {						\
-			/* Bubble up by one. */				\
-			rtree_ctx->l2_cache[i].leafkey =		\
-				rtree_ctx->l2_cache[i - 1].leafkey;	\
-			rtree_ctx->l2_cache[i].leaf =			\
-				rtree_ctx->l2_cache[i - 1].leaf;	\
-			rtree_ctx->l2_cache[i - 1].leafkey =		\
-			    rtree_ctx->cache[slot].leafkey;		\
-			rtree_ctx->l2_cache[i - 1].leaf =		\
-			    rtree_ctx->cache[slot].leaf;		\
-		} else {						\
-			rtree_ctx->l2_cache[0].leafkey =		\
-			    rtree_ctx->cache[slot].leafkey;		\
-			rtree_ctx->l2_cache[0].leaf =			\
-			    rtree_ctx->cache[slot].leaf;		\
-		}							\
-		rtree_ctx->cache[slot].leafkey = leafkey;		\
-		rtree_ctx->cache[slot].leaf = leaf;			\
-		uintptr_t subkey = rtree_subkey(key, RTREE_HEIGHT-1);	\
-		return &leaf[subkey];					\
-	}								\
-} while (0)
+#define RTREE_CACHE_CHECK_L2(i)                                                \
+	do {                                                                   \
+		if (likely(rtree_ctx->l2_cache[i].leafkey == leafkey)) {       \
+			rtree_leaf_elm_t *leaf = rtree_ctx->l2_cache[i].leaf;  \
+			assert(leaf != NULL);                                  \
+			if (i > 0) {                                           \
+				/* Bubble up by one. */                        \
+				rtree_ctx->l2_cache[i].leafkey =               \
+				    rtree_ctx->l2_cache[i - 1].leafkey;        \
+				rtree_ctx->l2_cache[i].leaf =                  \
+				    rtree_ctx->l2_cache[i - 1].leaf;           \
+				rtree_ctx->l2_cache[i - 1].leafkey =           \
+				    rtree_ctx->cache[slot].leafkey;            \
+				rtree_ctx->l2_cache[i - 1].leaf =              \
+				    rtree_ctx->cache[slot].leaf;               \
+			} else {                                               \
+				rtree_ctx->l2_cache[0].leafkey =               \
+				    rtree_ctx->cache[slot].leafkey;            \
+				rtree_ctx->l2_cache[0].leaf =                  \
+				    rtree_ctx->cache[slot].leaf;               \
+			}                                                      \
+			rtree_ctx->cache[slot].leafkey = leafkey;              \
+			rtree_ctx->cache[slot].leaf = leaf;                    \
+			uintptr_t subkey = rtree_subkey(                       \
+			    key, RTREE_HEIGHT - 1);                            \
+			return &leaf[subkey];                                  \
+		}                                                              \
+	} while (0)
 	/* Check the first cache entry. */
 	RTREE_CACHE_CHECK_L2(0);
 	/* Search the remaining cache elements. */
@@ -419,8 +423,8 @@ rtree_leaf_elm_lookup(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	}
 #undef RTREE_CACHE_CHECK_L2
 
-	return rtree_leaf_elm_lookup_hard(tsdn, rtree, rtree_ctx, key,
-	    dependent, init_missing);
+	return rtree_leaf_elm_lookup_hard(
+	    tsdn, rtree, rtree_ctx, key, dependent, init_missing);
 }
 
 /*
@@ -440,8 +444,8 @@ rtree_read_independent(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 }
 
 static inline rtree_contents_t
-rtree_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
-    uintptr_t key) {
+rtree_read(
+    tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key) {
 	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
 	    key, /* dependent */ true, /* init_missing */ false);
 	assert(elm != NULL);
@@ -449,21 +453,22 @@ rtree_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 }
 
 static inline rtree_metadata_t
-rtree_metadata_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
-    uintptr_t key) {
+rtree_metadata_read(
+    tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key) {
 	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
 	    key, /* dependent */ true, /* init_missing */ false);
 	assert(elm != NULL);
 	return rtree_leaf_elm_read(tsdn, rtree, elm,
-	    /* dependent */ true).metadata;
+	    /* dependent */ true)
+	    .metadata;
 }
 
 /*
  * Returns true when the request cannot be fulfilled by fastpath.
  */
 static inline bool
-rtree_metadata_try_read_fast(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
-    uintptr_t key, rtree_metadata_t *r_rtree_metadata) {
+rtree_metadata_try_read_fast(tsdn_t *tsdn, rtree_t *rtree,
+    rtree_ctx_t *rtree_ctx, uintptr_t key, rtree_metadata_t *r_rtree_metadata) {
 	rtree_leaf_elm_t *elm;
 	/*
 	 * Should check the bool return value (lookup success or not) instead of
@@ -476,7 +481,8 @@ rtree_metadata_try_read_fast(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ct
 	}
 	assert(elm != NULL);
 	*r_rtree_metadata = rtree_leaf_elm_read(tsdn, rtree, elm,
-	    /* dependent */ true).metadata;
+	    /* dependent */ true)
+	                        .metadata;
 	return false;
 }
 
@@ -490,22 +496,27 @@ rtree_write_range_impl(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 	 * are dependent w/o init_missing, assuming the range spans across at
 	 * most 2 rtree leaf nodes (each covers 1 GiB of vaddr).
 	 */
-	void *bits;
+	void    *bits;
 	unsigned additional;
 	rtree_contents_encode(contents, &bits, &additional);
 
 	rtree_leaf_elm_t *elm = NULL; /* Dead store. */
 	for (uintptr_t addr = base; addr <= end; addr += PAGE) {
-		if (addr == base ||
-		    (addr & ((ZU(1) << rtree_leaf_maskbits()) - 1)) == 0) {
-			elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx, addr,
+		if (addr == base
+		    || (addr & ((ZU(1) << rtree_leaf_maskbits()) - 1)) == 0) {
+			elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
+			    addr,
 			    /* dependent */ true, /* init_missing */ false);
 			assert(elm != NULL);
 		}
-		assert(elm == rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx, addr,
-		    /* dependent */ true, /* init_missing */ false));
-		assert(!clearing || rtree_leaf_elm_read(tsdn, rtree, elm,
-		    /* dependent */ true).edata != NULL);
+		assert(elm
+		    == rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx, addr,
+		        /* dependent */ true, /* init_missing */ false));
+		assert(!clearing
+		    || rtree_leaf_elm_read(tsdn, rtree, elm,
+		           /* dependent */ true)
+		            .edata
+		        != NULL);
 		rtree_leaf_elm_write_commit(tsdn, rtree, elm, bits, additional);
 		elm++;
 	}
@@ -533,13 +544,15 @@ rtree_write(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key,
 }
 
 static inline void
-rtree_clear(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
-    uintptr_t key) {
+rtree_clear(
+    tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, uintptr_t key) {
 	rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree, rtree_ctx,
 	    key, /* dependent */ true, /* init_missing */ false);
 	assert(elm != NULL);
 	assert(rtree_leaf_elm_read(tsdn, rtree, elm,
-	    /* dependent */ true).edata != NULL);
+	           /* dependent */ true)
+	           .edata
+	    != NULL);
 	rtree_contents_t contents;
 	contents.edata = NULL;
 	contents.metadata.szind = SC_NSIZES;
diff --git a/include/jemalloc/internal/rtree_tsd.h b/include/jemalloc/internal/rtree_tsd.h
index 59f18570..4014dde0 100644
--- a/include/jemalloc/internal/rtree_tsd.h
+++ b/include/jemalloc/internal/rtree_tsd.h
@@ -25,7 +25,8 @@
 
 /* Needed for initialization only. */
 #define RTREE_LEAFKEY_INVALID ((uintptr_t)1)
-#define RTREE_CTX_CACHE_ELM_INVALID {RTREE_LEAFKEY_INVALID, NULL}
+#define RTREE_CTX_CACHE_ELM_INVALID                                            \
+	{ RTREE_LEAFKEY_INVALID, NULL }
 
 #define RTREE_CTX_INIT_ELM_1 RTREE_CTX_CACHE_ELM_INVALID
 #define RTREE_CTX_INIT_ELM_2 RTREE_CTX_INIT_ELM_1, RTREE_CTX_INIT_ELM_1
@@ -40,23 +41,27 @@
  * Static initializer (to invalidate the cache entries) is required because the
  * free fastpath may access the rtree cache before a full tsd initialization.
  */
-#define RTREE_CTX_INITIALIZER {{RTREE_CTX_INIT_ELM_DATA(RTREE_CTX_NCACHE)}, \
-			       {RTREE_CTX_INIT_ELM_DATA(RTREE_CTX_NCACHE_L2)}}
+#define RTREE_CTX_INITIALIZER                                                  \
+	{                                                                      \
+		{RTREE_CTX_INIT_ELM_DATA(RTREE_CTX_NCACHE)}, {                 \
+			RTREE_CTX_INIT_ELM_DATA(RTREE_CTX_NCACHE_L2)           \
+		}                                                              \
+	}
 
 typedef struct rtree_leaf_elm_s rtree_leaf_elm_t;
 
 typedef struct rtree_ctx_cache_elm_s rtree_ctx_cache_elm_t;
 struct rtree_ctx_cache_elm_s {
-	uintptr_t		leafkey;
-	rtree_leaf_elm_t	*leaf;
+	uintptr_t         leafkey;
+	rtree_leaf_elm_t *leaf;
 };
 
 typedef struct rtree_ctx_s rtree_ctx_t;
 struct rtree_ctx_s {
 	/* Direct mapped cache. */
-	rtree_ctx_cache_elm_t	cache[RTREE_CTX_NCACHE];
+	rtree_ctx_cache_elm_t cache[RTREE_CTX_NCACHE];
 	/* L2 LRU cache. */
-	rtree_ctx_cache_elm_t	l2_cache[RTREE_CTX_NCACHE_L2];
+	rtree_ctx_cache_elm_t l2_cache[RTREE_CTX_NCACHE_L2];
 };
 
 void rtree_ctx_data_init(rtree_ctx_t *ctx);
diff --git a/include/jemalloc/internal/safety_check.h b/include/jemalloc/internal/safety_check.h
index 194b7744..2b4b2d0e 100644
--- a/include/jemalloc/internal/safety_check.h
+++ b/include/jemalloc/internal/safety_check.h
@@ -7,8 +7,8 @@
 
 #define SAFETY_CHECK_DOUBLE_FREE_MAX_SCAN_DEFAULT 32
 
-void safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr,
-    size_t true_size, size_t input_size);
+void safety_check_fail_sized_dealloc(
+    bool current_dealloc, const void *ptr, size_t true_size, size_t input_size);
 void safety_check_fail(const char *format, ...);
 
 typedef void (*safety_check_abort_hook_t)(const char *message);
@@ -16,7 +16,7 @@ typedef void (*safety_check_abort_hook_t)(const char *message);
 /* Can set to NULL for a default. */
 void safety_check_set_abort(safety_check_abort_hook_t abort_fn);
 
-#define REDZONE_SIZE ((size_t) 32)
+#define REDZONE_SIZE ((size_t)32)
 #define REDZONE_FILL_VALUE 0xBC
 
 /*
@@ -27,9 +27,10 @@ void safety_check_set_abort(safety_check_abort_hook_t abort_fn);
  */
 JEMALLOC_ALWAYS_INLINE const unsigned char *
 compute_redzone_end(const void *_ptr, size_t usize, size_t bumped_usize) {
-	const unsigned char *ptr = (const unsigned char *) _ptr;
-	const unsigned char *redzone_end = usize + REDZONE_SIZE < bumped_usize ?
-	    &ptr[usize + REDZONE_SIZE] : &ptr[bumped_usize];
+	const unsigned char *ptr = (const unsigned char *)_ptr;
+	const unsigned char *redzone_end = usize + REDZONE_SIZE < bumped_usize
+	    ? &ptr[usize + REDZONE_SIZE]
+	    : &ptr[bumped_usize];
 	const unsigned char *page_end = (const unsigned char *)
 	    ALIGNMENT_ADDR2CEILING(&ptr[usize], os_page);
 	return redzone_end < page_end ? redzone_end : page_end;
@@ -38,8 +39,8 @@ compute_redzone_end(const void *_ptr, size_t usize, size_t bumped_usize) {
 JEMALLOC_ALWAYS_INLINE void
 safety_check_set_redzone(void *ptr, size_t usize, size_t bumped_usize) {
 	assert(usize <= bumped_usize);
-	const unsigned char *redzone_end =
-		compute_redzone_end(ptr, usize, bumped_usize);
+	const unsigned char *redzone_end = compute_redzone_end(
+	    ptr, usize, bumped_usize);
 	for (unsigned char *curr = &((unsigned char *)ptr)[usize];
 	     curr < redzone_end; curr++) {
 		*curr = REDZONE_FILL_VALUE;
@@ -47,11 +48,11 @@ safety_check_set_redzone(void *ptr, size_t usize, size_t bumped_usize) {
 }
 
 JEMALLOC_ALWAYS_INLINE void
-safety_check_verify_redzone(const void *ptr, size_t usize, size_t bumped_usize)
-{
-	const unsigned char *redzone_end =
-		compute_redzone_end(ptr, usize, bumped_usize);
-	for (const unsigned char *curr= &((const unsigned char *)ptr)[usize];
+safety_check_verify_redzone(
+    const void *ptr, size_t usize, size_t bumped_usize) {
+	const unsigned char *redzone_end = compute_redzone_end(
+	    ptr, usize, bumped_usize);
+	for (const unsigned char *curr = &((const unsigned char *)ptr)[usize];
 	     curr < redzone_end; curr++) {
 		if (unlikely(*curr != REDZONE_FILL_VALUE)) {
 			safety_check_fail("Use after free error\n");
diff --git a/include/jemalloc/internal/san.h b/include/jemalloc/internal/san.h
index 669f99dd..5dcae376 100644
--- a/include/jemalloc/internal/san.h
+++ b/include/jemalloc/internal/san.h
@@ -32,22 +32,22 @@ void san_unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
  * Unguard the extent, but don't modify emap boundaries. Must be called on an
  * extent that has been erased from emap and shouldn't be placed back.
  */
-void san_unguard_pages_pre_destroy(tsdn_t *tsdn, ehooks_t *ehooks,
-    edata_t *edata, emap_t *emap);
+void san_unguard_pages_pre_destroy(
+    tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap);
 void san_check_stashed_ptrs(void **ptrs, size_t nstashed, size_t usize);
 
 void tsd_san_init(tsd_t *tsd);
 void san_init(ssize_t lg_san_uaf_align);
 
 static inline void
-san_guard_pages_two_sided(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    emap_t *emap, bool remap) {
+san_guard_pages_two_sided(
+    tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap, bool remap) {
 	san_guard_pages(tsdn, ehooks, edata, emap, true, true, remap);
 }
 
 static inline void
-san_unguard_pages_two_sided(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    emap_t *emap) {
+san_unguard_pages_two_sided(
+    tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap) {
 	san_unguard_pages(tsdn, ehooks, edata, emap, true, true);
 }
 
@@ -83,14 +83,14 @@ san_guard_enabled(void) {
 }
 
 static inline bool
-san_large_extent_decide_guard(tsdn_t *tsdn, ehooks_t *ehooks, size_t size,
-    size_t alignment) {
-	if (opt_san_guard_large == 0 || ehooks_guard_will_fail(ehooks) ||
-	    tsdn_null(tsdn)) {
+san_large_extent_decide_guard(
+    tsdn_t *tsdn, ehooks_t *ehooks, size_t size, size_t alignment) {
+	if (opt_san_guard_large == 0 || ehooks_guard_will_fail(ehooks)
+	    || tsdn_null(tsdn)) {
 		return false;
 	}
 
-	tsd_t *tsd = tsdn_tsd(tsdn);
+	tsd_t   *tsd = tsdn_tsd(tsdn);
 	uint64_t n = tsd_san_extents_until_guard_large_get(tsd);
 	assert(n >= 1);
 	if (n > 1) {
@@ -101,10 +101,10 @@ san_large_extent_decide_guard(tsdn_t *tsdn, ehooks_t *ehooks, size_t size,
 		*tsd_san_extents_until_guard_largep_get(tsd) = n - 1;
 	}
 
-	if (n == 1 && (alignment <= PAGE) &&
-	    (san_two_side_guarded_sz(size) <= SC_LARGE_MAXCLASS)) {
-		*tsd_san_extents_until_guard_largep_get(tsd) =
-		    opt_san_guard_large;
+	if (n == 1 && (alignment <= PAGE)
+	    && (san_two_side_guarded_sz(size) <= SC_LARGE_MAXCLASS)) {
+		*tsd_san_extents_until_guard_largep_get(
+		    tsd) = opt_san_guard_large;
 		return true;
 	} else {
 		assert(tsd_san_extents_until_guard_large_get(tsd) >= 1);
@@ -114,17 +114,17 @@ san_large_extent_decide_guard(tsdn_t *tsdn, ehooks_t *ehooks, size_t size,
 
 static inline bool
 san_slab_extent_decide_guard(tsdn_t *tsdn, ehooks_t *ehooks) {
-	if (opt_san_guard_small == 0 || ehooks_guard_will_fail(ehooks) ||
-	    tsdn_null(tsdn)) {
+	if (opt_san_guard_small == 0 || ehooks_guard_will_fail(ehooks)
+	    || tsdn_null(tsdn)) {
 		return false;
 	}
 
-	tsd_t *tsd = tsdn_tsd(tsdn);
+	tsd_t   *tsd = tsdn_tsd(tsdn);
 	uint64_t n = tsd_san_extents_until_guard_small_get(tsd);
 	assert(n >= 1);
 	if (n == 1) {
-		*tsd_san_extents_until_guard_smallp_get(tsd) =
-		    opt_san_guard_small;
+		*tsd_san_extents_until_guard_smallp_get(
+		    tsd) = opt_san_guard_small;
 		return true;
 	} else {
 		*tsd_san_extents_until_guard_smallp_get(tsd) = n - 1;
@@ -134,8 +134,8 @@ san_slab_extent_decide_guard(tsdn_t *tsdn, ehooks_t *ehooks) {
 }
 
 static inline void
-san_junk_ptr_locations(void *ptr, size_t usize, void **first, void **mid,
-    void **last) {
+san_junk_ptr_locations(
+    void *ptr, size_t usize, void **first, void **mid, void **last) {
 	size_t ptr_sz = sizeof(void *);
 
 	*first = ptr;
@@ -184,8 +184,8 @@ static inline bool
 san_uaf_detection_enabled(void) {
 	bool ret = config_uaf_detection && (opt_lg_san_uaf_align != -1);
 	if (config_uaf_detection && ret) {
-		assert(san_cache_bin_nonfast_mask == ((uintptr_t)1 <<
-		    opt_lg_san_uaf_align) - 1);
+		assert(san_cache_bin_nonfast_mask
+		    == ((uintptr_t)1 << opt_lg_san_uaf_align) - 1);
 	}
 
 	return ret;
diff --git a/include/jemalloc/internal/san_bump.h b/include/jemalloc/internal/san_bump.h
index d6e9cfc5..9e42b69b 100644
--- a/include/jemalloc/internal/san_bump.h
+++ b/include/jemalloc/internal/san_bump.h
@@ -12,7 +12,7 @@
 extern bool opt_retain;
 
 typedef struct ehooks_s ehooks_t;
-typedef struct pac_s pac_t;
+typedef struct pac_s    pac_t;
 
 typedef struct san_bump_alloc_s san_bump_alloc_t;
 struct san_bump_alloc_s {
@@ -36,7 +36,7 @@ san_bump_enabled(void) {
 }
 
 static inline bool
-san_bump_alloc_init(san_bump_alloc_t* sba) {
+san_bump_alloc_init(san_bump_alloc_t *sba) {
 	bool err = malloc_mutex_init(&sba->mtx, "sanitizer_bump_allocator",
 	    WITNESS_RANK_SAN_BUMP_ALLOC, malloc_mutex_rank_exclusive);
 	if (err) {
@@ -47,8 +47,7 @@ san_bump_alloc_init(san_bump_alloc_t* sba) {
 	return false;
 }
 
-edata_t *
-san_bump_alloc(tsdn_t *tsdn, san_bump_alloc_t* sba, pac_t *pac, ehooks_t *ehooks,
-    size_t size, bool zero);
+edata_t *san_bump_alloc(tsdn_t *tsdn, san_bump_alloc_t *sba, pac_t *pac,
+    ehooks_t *ehooks, size_t size, bool zero);
 
 #endif /* JEMALLOC_INTERNAL_SAN_BUMP_H */
diff --git a/include/jemalloc/internal/sc.h b/include/jemalloc/internal/sc.h
index 97956e7a..17a8278a 100644
--- a/include/jemalloc/internal/sc.h
+++ b/include/jemalloc/internal/sc.h
@@ -174,7 +174,7 @@
 
 #if SC_LG_TINY_MIN == 0
 /* The div module doesn't support division by 1, which this would require. */
-#error "Unsupported LG_TINY_MIN"
+#	error "Unsupported LG_TINY_MIN"
 #endif
 
 /*
@@ -194,8 +194,8 @@
  * We could probably save some space in arenas by capping this at LG_VADDR size.
  */
 #define SC_LG_BASE_MAX (SC_PTR_BITS - 2)
-#define SC_NREGULAR (SC_NGROUP * 					\
-    (SC_LG_BASE_MAX - SC_LG_FIRST_REGULAR_BASE + 1) - 1)
+#define SC_NREGULAR                                                            \
+	(SC_NGROUP * (SC_LG_BASE_MAX - SC_LG_FIRST_REGULAR_BASE + 1) - 1)
 #define SC_NSIZES (SC_NTINY + SC_NPSEUDO + SC_NREGULAR)
 
 /*
@@ -222,29 +222,29 @@
  *
  * This gives us the quantity we seek.
  */
-#define SC_NPSIZES (							\
-    SC_NGROUP								\
-    + (SC_LG_BASE_MAX - (LG_PAGE + SC_LG_NGROUP)) * SC_NGROUP		\
-    + SC_NGROUP - 1)
+#define SC_NPSIZES                                                             \
+	(SC_NGROUP + (SC_LG_BASE_MAX - (LG_PAGE + SC_LG_NGROUP)) * SC_NGROUP   \
+	    + SC_NGROUP - 1)
 
 /*
  * We declare a size class is binnable if size < page size * group. Or, in other
  * words, lg(size) < lg(page size) + lg(group size).
  */
-#define SC_NBINS (							\
-    /* Sub-regular size classes. */					\
-    SC_NTINY + SC_NPSEUDO						\
-    /* Groups with lg_regular_min_base <= lg_base <= lg_base_max */	\
-    + SC_NGROUP * (LG_PAGE + SC_LG_NGROUP - SC_LG_FIRST_REGULAR_BASE)	\
-    /* Last SC of the last group hits the bound exactly; exclude it. */	\
-    - 1)
+#define SC_NBINS                                                                                                    \
+	(/* Sub-regular size classes. */                                                                            \
+	    SC_NTINY                                                                                                \
+	    + SC_NPSEUDO /* Groups with lg_regular_min_base <= lg_base <= lg_base_max */                            \
+	    + SC_NGROUP                                                                                             \
+	        * (LG_PAGE + SC_LG_NGROUP                                                                           \
+	            - SC_LG_FIRST_REGULAR_BASE) /* Last SC of the last group hits the bound exactly; exclude it. */ \
+	    - 1)
 
 /*
  * The size2index_tab lookup table uses uint8_t to encode each bin index, so we
  * cannot support more than 256 small size classes.
  */
 #if (SC_NBINS > 256)
-#  error "Too many small size classes"
+#	error "Too many small size classes"
 #endif
 
 /* The largest size class in the lookup table, and its binary log. */
@@ -256,12 +256,12 @@
 #define SC_SMALL_MAX_DELTA (1 << (LG_PAGE - 1))
 
 /* The largest size class allocated out of a slab. */
-#define SC_SMALL_MAXCLASS (SC_SMALL_MAX_BASE				\
-    + (SC_NGROUP - 1) * SC_SMALL_MAX_DELTA)
+#define SC_SMALL_MAXCLASS                                                      \
+	(SC_SMALL_MAX_BASE + (SC_NGROUP - 1) * SC_SMALL_MAX_DELTA)
 
 /* The fastpath assumes all lookup-able sizes are small. */
 #if (SC_SMALL_MAXCLASS < SC_LOOKUP_MAXCLASS)
-#  error "Lookup table sizes must be small"
+#	error "Lookup table sizes must be small"
 #endif
 
 /* The smallest size class not allocated out of a slab. */
@@ -277,13 +277,13 @@
 
 /* Maximum number of regions in one slab. */
 #ifndef CONFIG_LG_SLAB_MAXREGS
-#  define SC_LG_SLAB_MAXREGS (LG_PAGE - SC_LG_TINY_MIN)
+#	define SC_LG_SLAB_MAXREGS (LG_PAGE - SC_LG_TINY_MIN)
 #else
-#  if CONFIG_LG_SLAB_MAXREGS < (LG_PAGE - SC_LG_TINY_MIN)
-#    error "Unsupported SC_LG_SLAB_MAXREGS"
-#  else
-#    define SC_LG_SLAB_MAXREGS CONFIG_LG_SLAB_MAXREGS
-#  endif
+#	if CONFIG_LG_SLAB_MAXREGS < (LG_PAGE - SC_LG_TINY_MIN)
+#		error "Unsupported SC_LG_SLAB_MAXREGS"
+#	else
+#		define SC_LG_SLAB_MAXREGS CONFIG_LG_SLAB_MAXREGS
+#	endif
 #endif
 
 /*
@@ -364,13 +364,13 @@ struct sc_data_s {
 };
 
 size_t reg_size_compute(int lg_base, int lg_delta, int ndelta);
-void sc_data_init(sc_data_t *data);
+void   sc_data_init(sc_data_t *data);
 /*
  * Updates slab sizes in [begin, end] to be pgs pages in length, if possible.
  * Otherwise, does its best to accommodate the request.
  */
-void sc_data_update_slab_size(sc_data_t *data, size_t begin, size_t end,
-    int pgs);
+void sc_data_update_slab_size(
+    sc_data_t *data, size_t begin, size_t end, int pgs);
 void sc_boot(sc_data_t *data);
 
 #endif /* JEMALLOC_INTERNAL_SC_H */
diff --git a/include/jemalloc/internal/sec.h b/include/jemalloc/internal/sec.h
index 8ef1e9fb..50daf066 100644
--- a/include/jemalloc/internal/sec.h
+++ b/include/jemalloc/internal/sec.h
@@ -59,7 +59,7 @@ struct sec_bin_s {
 	 * stats; rather, it allows us to quickly determine the change in the
 	 * centralized counter when flushing.
 	 */
-	size_t bytes_cur;
+	size_t              bytes_cur;
 	edata_list_active_t freelist;
 };
 
@@ -80,7 +80,7 @@ struct sec_shard_s {
 	 * that we won't go down these pathways very often after custom extent
 	 * hooks are installed.
 	 */
-	bool enabled;
+	bool       enabled;
 	sec_bin_t *bins;
 	/* Number of bytes in all bins in the shard. */
 	size_t bytes_cur;
@@ -90,12 +90,12 @@ struct sec_shard_s {
 
 typedef struct sec_s sec_t;
 struct sec_s {
-	pai_t pai;
+	pai_t  pai;
 	pai_t *fallback;
 
-	sec_opts_t opts;
+	sec_opts_t   opts;
 	sec_shard_t *shards;
-	pszind_t npsizes;
+	pszind_t     npsizes;
 };
 
 bool sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, pai_t *fallback,
@@ -110,8 +110,8 @@ void sec_disable(tsdn_t *tsdn, sec_t *sec);
  * split), which simplifies the stats management.
  */
 void sec_stats_merge(tsdn_t *tsdn, sec_t *sec, sec_stats_t *stats);
-void sec_mutex_stats_read(tsdn_t *tsdn, sec_t *sec,
-    mutex_prof_data_t *mutex_prof_data);
+void sec_mutex_stats_read(
+    tsdn_t *tsdn, sec_t *sec, mutex_prof_data_t *mutex_prof_data);
 
 /*
  * We use the arena lock ordering; these are acquired in phase 2 of forking, but
diff --git a/include/jemalloc/internal/sec_opts.h b/include/jemalloc/internal/sec_opts.h
index 19ed1492..e0699d7a 100644
--- a/include/jemalloc/internal/sec_opts.h
+++ b/include/jemalloc/internal/sec_opts.h
@@ -44,18 +44,14 @@ struct sec_opts_s {
 	size_t batch_fill_extra;
 };
 
-#define SEC_OPTS_DEFAULT {						\
-	/* nshards */							\
-	4,								\
-	/* max_alloc */							\
-	(32 * 1024) < PAGE ? PAGE : (32 * 1024),			\
-	/* max_bytes */							\
-	256 * 1024,							\
-	/* bytes_after_flush */						\
-	128 * 1024,							\
-	/* batch_fill_extra */						\
-	0								\
-}
-
+#define SEC_OPTS_DEFAULT                                                       \
+	{                                                                      \
+		/* nshards */                                                  \
+		4,                                           /* max_alloc */   \
+		    (32 * 1024) < PAGE ? PAGE : (32 * 1024), /* max_bytes */   \
+		    256 * 1024, /* bytes_after_flush */                        \
+		    128 * 1024, /* batch_fill_extra */                         \
+		    0                                                          \
+	}
 
 #endif /* JEMALLOC_INTERNAL_SEC_OPTS_H */
diff --git a/include/jemalloc/internal/smoothstep.h b/include/jemalloc/internal/smoothstep.h
index 2e14430f..135c4905 100644
--- a/include/jemalloc/internal/smoothstep.h
+++ b/include/jemalloc/internal/smoothstep.h
@@ -23,210 +23,210 @@
  *   smootheststep(x) = -20x  + 70x  - 84x  + 35x
  */
 
-#define SMOOTHSTEP_VARIANT	"smoother"
-#define SMOOTHSTEP_NSTEPS	200
-#define SMOOTHSTEP_BFP		24
-#define SMOOTHSTEP \
- /* STEP(step, h,                            x,     y) */ \
-    STEP(   1, UINT64_C(0x0000000000000014), 0.005, 0.000001240643750) \
-    STEP(   2, UINT64_C(0x00000000000000a5), 0.010, 0.000009850600000) \
-    STEP(   3, UINT64_C(0x0000000000000229), 0.015, 0.000032995181250) \
-    STEP(   4, UINT64_C(0x0000000000000516), 0.020, 0.000077619200000) \
-    STEP(   5, UINT64_C(0x00000000000009dc), 0.025, 0.000150449218750) \
-    STEP(   6, UINT64_C(0x00000000000010e8), 0.030, 0.000257995800000) \
-    STEP(   7, UINT64_C(0x0000000000001aa4), 0.035, 0.000406555756250) \
-    STEP(   8, UINT64_C(0x0000000000002777), 0.040, 0.000602214400000) \
-    STEP(   9, UINT64_C(0x00000000000037c2), 0.045, 0.000850847793750) \
-    STEP(  10, UINT64_C(0x0000000000004be6), 0.050, 0.001158125000000) \
-    STEP(  11, UINT64_C(0x000000000000643c), 0.055, 0.001529510331250) \
-    STEP(  12, UINT64_C(0x000000000000811f), 0.060, 0.001970265600000) \
-    STEP(  13, UINT64_C(0x000000000000a2e2), 0.065, 0.002485452368750) \
-    STEP(  14, UINT64_C(0x000000000000c9d8), 0.070, 0.003079934200000) \
-    STEP(  15, UINT64_C(0x000000000000f64f), 0.075, 0.003758378906250) \
-    STEP(  16, UINT64_C(0x0000000000012891), 0.080, 0.004525260800000) \
-    STEP(  17, UINT64_C(0x00000000000160e7), 0.085, 0.005384862943750) \
-    STEP(  18, UINT64_C(0x0000000000019f95), 0.090, 0.006341279400000) \
-    STEP(  19, UINT64_C(0x000000000001e4dc), 0.095, 0.007398417481250) \
-    STEP(  20, UINT64_C(0x00000000000230fc), 0.100, 0.008560000000000) \
-    STEP(  21, UINT64_C(0x0000000000028430), 0.105, 0.009829567518750) \
-    STEP(  22, UINT64_C(0x000000000002deb0), 0.110, 0.011210480600000) \
-    STEP(  23, UINT64_C(0x00000000000340b1), 0.115, 0.012705922056250) \
-    STEP(  24, UINT64_C(0x000000000003aa67), 0.120, 0.014318899200000) \
-    STEP(  25, UINT64_C(0x0000000000041c00), 0.125, 0.016052246093750) \
-    STEP(  26, UINT64_C(0x00000000000495a8), 0.130, 0.017908625800000) \
-    STEP(  27, UINT64_C(0x000000000005178b), 0.135, 0.019890532631250) \
-    STEP(  28, UINT64_C(0x000000000005a1cf), 0.140, 0.022000294400000) \
-    STEP(  29, UINT64_C(0x0000000000063498), 0.145, 0.024240074668750) \
-    STEP(  30, UINT64_C(0x000000000006d009), 0.150, 0.026611875000000) \
-    STEP(  31, UINT64_C(0x000000000007743f), 0.155, 0.029117537206250) \
-    STEP(  32, UINT64_C(0x0000000000082157), 0.160, 0.031758745600000) \
-    STEP(  33, UINT64_C(0x000000000008d76b), 0.165, 0.034537029243750) \
-    STEP(  34, UINT64_C(0x0000000000099691), 0.170, 0.037453764200000) \
-    STEP(  35, UINT64_C(0x00000000000a5edf), 0.175, 0.040510175781250) \
-    STEP(  36, UINT64_C(0x00000000000b3067), 0.180, 0.043707340800000) \
-    STEP(  37, UINT64_C(0x00000000000c0b38), 0.185, 0.047046189818750) \
-    STEP(  38, UINT64_C(0x00000000000cef5e), 0.190, 0.050527509400000) \
-    STEP(  39, UINT64_C(0x00000000000ddce6), 0.195, 0.054151944356250) \
-    STEP(  40, UINT64_C(0x00000000000ed3d8), 0.200, 0.057920000000000) \
-    STEP(  41, UINT64_C(0x00000000000fd439), 0.205, 0.061832044393750) \
-    STEP(  42, UINT64_C(0x000000000010de0e), 0.210, 0.065888310600000) \
-    STEP(  43, UINT64_C(0x000000000011f158), 0.215, 0.070088898931250) \
-    STEP(  44, UINT64_C(0x0000000000130e17), 0.220, 0.074433779200000) \
-    STEP(  45, UINT64_C(0x0000000000143448), 0.225, 0.078922792968750) \
-    STEP(  46, UINT64_C(0x00000000001563e7), 0.230, 0.083555655800000) \
-    STEP(  47, UINT64_C(0x0000000000169cec), 0.235, 0.088331959506250) \
-    STEP(  48, UINT64_C(0x000000000017df4f), 0.240, 0.093251174400000) \
-    STEP(  49, UINT64_C(0x0000000000192b04), 0.245, 0.098312651543750) \
-    STEP(  50, UINT64_C(0x00000000001a8000), 0.250, 0.103515625000000) \
-    STEP(  51, UINT64_C(0x00000000001bde32), 0.255, 0.108859214081250) \
-    STEP(  52, UINT64_C(0x00000000001d458b), 0.260, 0.114342425600000) \
-    STEP(  53, UINT64_C(0x00000000001eb5f8), 0.265, 0.119964156118750) \
-    STEP(  54, UINT64_C(0x0000000000202f65), 0.270, 0.125723194200000) \
-    STEP(  55, UINT64_C(0x000000000021b1bb), 0.275, 0.131618222656250) \
-    STEP(  56, UINT64_C(0x0000000000233ce3), 0.280, 0.137647820800000) \
-    STEP(  57, UINT64_C(0x000000000024d0c3), 0.285, 0.143810466693750) \
-    STEP(  58, UINT64_C(0x0000000000266d40), 0.290, 0.150104539400000) \
-    STEP(  59, UINT64_C(0x000000000028123d), 0.295, 0.156528321231250) \
-    STEP(  60, UINT64_C(0x000000000029bf9c), 0.300, 0.163080000000000) \
-    STEP(  61, UINT64_C(0x00000000002b753d), 0.305, 0.169757671268750) \
-    STEP(  62, UINT64_C(0x00000000002d32fe), 0.310, 0.176559340600000) \
-    STEP(  63, UINT64_C(0x00000000002ef8bc), 0.315, 0.183482925806250) \
-    STEP(  64, UINT64_C(0x000000000030c654), 0.320, 0.190526259200000) \
-    STEP(  65, UINT64_C(0x0000000000329b9f), 0.325, 0.197687089843750) \
-    STEP(  66, UINT64_C(0x0000000000347875), 0.330, 0.204963085800000) \
-    STEP(  67, UINT64_C(0x0000000000365cb0), 0.335, 0.212351836381250) \
-    STEP(  68, UINT64_C(0x0000000000384825), 0.340, 0.219850854400000) \
-    STEP(  69, UINT64_C(0x00000000003a3aa8), 0.345, 0.227457578418750) \
-    STEP(  70, UINT64_C(0x00000000003c340f), 0.350, 0.235169375000000) \
-    STEP(  71, UINT64_C(0x00000000003e342b), 0.355, 0.242983540956250) \
-    STEP(  72, UINT64_C(0x0000000000403ace), 0.360, 0.250897305600000) \
-    STEP(  73, UINT64_C(0x00000000004247c8), 0.365, 0.258907832993750) \
-    STEP(  74, UINT64_C(0x0000000000445ae9), 0.370, 0.267012224200000) \
-    STEP(  75, UINT64_C(0x0000000000467400), 0.375, 0.275207519531250) \
-    STEP(  76, UINT64_C(0x00000000004892d8), 0.380, 0.283490700800000) \
-    STEP(  77, UINT64_C(0x00000000004ab740), 0.385, 0.291858693568750) \
-    STEP(  78, UINT64_C(0x00000000004ce102), 0.390, 0.300308369400000) \
-    STEP(  79, UINT64_C(0x00000000004f0fe9), 0.395, 0.308836548106250) \
-    STEP(  80, UINT64_C(0x00000000005143bf), 0.400, 0.317440000000000) \
-    STEP(  81, UINT64_C(0x0000000000537c4d), 0.405, 0.326115448143750) \
-    STEP(  82, UINT64_C(0x000000000055b95b), 0.410, 0.334859570600000) \
-    STEP(  83, UINT64_C(0x000000000057fab1), 0.415, 0.343669002681250) \
-    STEP(  84, UINT64_C(0x00000000005a4015), 0.420, 0.352540339200000) \
-    STEP(  85, UINT64_C(0x00000000005c894e), 0.425, 0.361470136718750) \
-    STEP(  86, UINT64_C(0x00000000005ed622), 0.430, 0.370454915800000) \
-    STEP(  87, UINT64_C(0x0000000000612655), 0.435, 0.379491163256250) \
-    STEP(  88, UINT64_C(0x00000000006379ac), 0.440, 0.388575334400000) \
-    STEP(  89, UINT64_C(0x000000000065cfeb), 0.445, 0.397703855293750) \
-    STEP(  90, UINT64_C(0x00000000006828d6), 0.450, 0.406873125000000) \
-    STEP(  91, UINT64_C(0x00000000006a842f), 0.455, 0.416079517831250) \
-    STEP(  92, UINT64_C(0x00000000006ce1bb), 0.460, 0.425319385600000) \
-    STEP(  93, UINT64_C(0x00000000006f413a), 0.465, 0.434589059868750) \
-    STEP(  94, UINT64_C(0x000000000071a270), 0.470, 0.443884854200000) \
-    STEP(  95, UINT64_C(0x000000000074051d), 0.475, 0.453203066406250) \
-    STEP(  96, UINT64_C(0x0000000000766905), 0.480, 0.462539980800000) \
-    STEP(  97, UINT64_C(0x000000000078cde7), 0.485, 0.471891870443750) \
-    STEP(  98, UINT64_C(0x00000000007b3387), 0.490, 0.481254999400000) \
-    STEP(  99, UINT64_C(0x00000000007d99a4), 0.495, 0.490625624981250) \
-    STEP( 100, UINT64_C(0x0000000000800000), 0.500, 0.500000000000000) \
-    STEP( 101, UINT64_C(0x000000000082665b), 0.505, 0.509374375018750) \
-    STEP( 102, UINT64_C(0x000000000084cc78), 0.510, 0.518745000600000) \
-    STEP( 103, UINT64_C(0x0000000000873218), 0.515, 0.528108129556250) \
-    STEP( 104, UINT64_C(0x00000000008996fa), 0.520, 0.537460019200000) \
-    STEP( 105, UINT64_C(0x00000000008bfae2), 0.525, 0.546796933593750) \
-    STEP( 106, UINT64_C(0x00000000008e5d8f), 0.530, 0.556115145800000) \
-    STEP( 107, UINT64_C(0x000000000090bec5), 0.535, 0.565410940131250) \
-    STEP( 108, UINT64_C(0x0000000000931e44), 0.540, 0.574680614400000) \
-    STEP( 109, UINT64_C(0x0000000000957bd0), 0.545, 0.583920482168750) \
-    STEP( 110, UINT64_C(0x000000000097d729), 0.550, 0.593126875000000) \
-    STEP( 111, UINT64_C(0x00000000009a3014), 0.555, 0.602296144706250) \
-    STEP( 112, UINT64_C(0x00000000009c8653), 0.560, 0.611424665600000) \
-    STEP( 113, UINT64_C(0x00000000009ed9aa), 0.565, 0.620508836743750) \
-    STEP( 114, UINT64_C(0x0000000000a129dd), 0.570, 0.629545084200000) \
-    STEP( 115, UINT64_C(0x0000000000a376b1), 0.575, 0.638529863281250) \
-    STEP( 116, UINT64_C(0x0000000000a5bfea), 0.580, 0.647459660800000) \
-    STEP( 117, UINT64_C(0x0000000000a8054e), 0.585, 0.656330997318750) \
-    STEP( 118, UINT64_C(0x0000000000aa46a4), 0.590, 0.665140429400000) \
-    STEP( 119, UINT64_C(0x0000000000ac83b2), 0.595, 0.673884551856250) \
-    STEP( 120, UINT64_C(0x0000000000aebc40), 0.600, 0.682560000000000) \
-    STEP( 121, UINT64_C(0x0000000000b0f016), 0.605, 0.691163451893750) \
-    STEP( 122, UINT64_C(0x0000000000b31efd), 0.610, 0.699691630600000) \
-    STEP( 123, UINT64_C(0x0000000000b548bf), 0.615, 0.708141306431250) \
-    STEP( 124, UINT64_C(0x0000000000b76d27), 0.620, 0.716509299200000) \
-    STEP( 125, UINT64_C(0x0000000000b98c00), 0.625, 0.724792480468750) \
-    STEP( 126, UINT64_C(0x0000000000bba516), 0.630, 0.732987775800000) \
-    STEP( 127, UINT64_C(0x0000000000bdb837), 0.635, 0.741092167006250) \
-    STEP( 128, UINT64_C(0x0000000000bfc531), 0.640, 0.749102694400000) \
-    STEP( 129, UINT64_C(0x0000000000c1cbd4), 0.645, 0.757016459043750) \
-    STEP( 130, UINT64_C(0x0000000000c3cbf0), 0.650, 0.764830625000000) \
-    STEP( 131, UINT64_C(0x0000000000c5c557), 0.655, 0.772542421581250) \
-    STEP( 132, UINT64_C(0x0000000000c7b7da), 0.660, 0.780149145600000) \
-    STEP( 133, UINT64_C(0x0000000000c9a34f), 0.665, 0.787648163618750) \
-    STEP( 134, UINT64_C(0x0000000000cb878a), 0.670, 0.795036914200000) \
-    STEP( 135, UINT64_C(0x0000000000cd6460), 0.675, 0.802312910156250) \
-    STEP( 136, UINT64_C(0x0000000000cf39ab), 0.680, 0.809473740800000) \
-    STEP( 137, UINT64_C(0x0000000000d10743), 0.685, 0.816517074193750) \
-    STEP( 138, UINT64_C(0x0000000000d2cd01), 0.690, 0.823440659400000) \
-    STEP( 139, UINT64_C(0x0000000000d48ac2), 0.695, 0.830242328731250) \
-    STEP( 140, UINT64_C(0x0000000000d64063), 0.700, 0.836920000000000) \
-    STEP( 141, UINT64_C(0x0000000000d7edc2), 0.705, 0.843471678768750) \
-    STEP( 142, UINT64_C(0x0000000000d992bf), 0.710, 0.849895460600000) \
-    STEP( 143, UINT64_C(0x0000000000db2f3c), 0.715, 0.856189533306250) \
-    STEP( 144, UINT64_C(0x0000000000dcc31c), 0.720, 0.862352179200000) \
-    STEP( 145, UINT64_C(0x0000000000de4e44), 0.725, 0.868381777343750) \
-    STEP( 146, UINT64_C(0x0000000000dfd09a), 0.730, 0.874276805800000) \
-    STEP( 147, UINT64_C(0x0000000000e14a07), 0.735, 0.880035843881250) \
-    STEP( 148, UINT64_C(0x0000000000e2ba74), 0.740, 0.885657574400000) \
-    STEP( 149, UINT64_C(0x0000000000e421cd), 0.745, 0.891140785918750) \
-    STEP( 150, UINT64_C(0x0000000000e58000), 0.750, 0.896484375000000) \
-    STEP( 151, UINT64_C(0x0000000000e6d4fb), 0.755, 0.901687348456250) \
-    STEP( 152, UINT64_C(0x0000000000e820b0), 0.760, 0.906748825600000) \
-    STEP( 153, UINT64_C(0x0000000000e96313), 0.765, 0.911668040493750) \
-    STEP( 154, UINT64_C(0x0000000000ea9c18), 0.770, 0.916444344200000) \
-    STEP( 155, UINT64_C(0x0000000000ebcbb7), 0.775, 0.921077207031250) \
-    STEP( 156, UINT64_C(0x0000000000ecf1e8), 0.780, 0.925566220800000) \
-    STEP( 157, UINT64_C(0x0000000000ee0ea7), 0.785, 0.929911101068750) \
-    STEP( 158, UINT64_C(0x0000000000ef21f1), 0.790, 0.934111689400000) \
-    STEP( 159, UINT64_C(0x0000000000f02bc6), 0.795, 0.938167955606250) \
-    STEP( 160, UINT64_C(0x0000000000f12c27), 0.800, 0.942080000000000) \
-    STEP( 161, UINT64_C(0x0000000000f22319), 0.805, 0.945848055643750) \
-    STEP( 162, UINT64_C(0x0000000000f310a1), 0.810, 0.949472490600000) \
-    STEP( 163, UINT64_C(0x0000000000f3f4c7), 0.815, 0.952953810181250) \
-    STEP( 164, UINT64_C(0x0000000000f4cf98), 0.820, 0.956292659200000) \
-    STEP( 165, UINT64_C(0x0000000000f5a120), 0.825, 0.959489824218750) \
-    STEP( 166, UINT64_C(0x0000000000f6696e), 0.830, 0.962546235800000) \
-    STEP( 167, UINT64_C(0x0000000000f72894), 0.835, 0.965462970756250) \
-    STEP( 168, UINT64_C(0x0000000000f7dea8), 0.840, 0.968241254400000) \
-    STEP( 169, UINT64_C(0x0000000000f88bc0), 0.845, 0.970882462793750) \
-    STEP( 170, UINT64_C(0x0000000000f92ff6), 0.850, 0.973388125000000) \
-    STEP( 171, UINT64_C(0x0000000000f9cb67), 0.855, 0.975759925331250) \
-    STEP( 172, UINT64_C(0x0000000000fa5e30), 0.860, 0.977999705600000) \
-    STEP( 173, UINT64_C(0x0000000000fae874), 0.865, 0.980109467368750) \
-    STEP( 174, UINT64_C(0x0000000000fb6a57), 0.870, 0.982091374200000) \
-    STEP( 175, UINT64_C(0x0000000000fbe400), 0.875, 0.983947753906250) \
-    STEP( 176, UINT64_C(0x0000000000fc5598), 0.880, 0.985681100800000) \
-    STEP( 177, UINT64_C(0x0000000000fcbf4e), 0.885, 0.987294077943750) \
-    STEP( 178, UINT64_C(0x0000000000fd214f), 0.890, 0.988789519400000) \
-    STEP( 179, UINT64_C(0x0000000000fd7bcf), 0.895, 0.990170432481250) \
-    STEP( 180, UINT64_C(0x0000000000fdcf03), 0.900, 0.991440000000000) \
-    STEP( 181, UINT64_C(0x0000000000fe1b23), 0.905, 0.992601582518750) \
-    STEP( 182, UINT64_C(0x0000000000fe606a), 0.910, 0.993658720600000) \
-    STEP( 183, UINT64_C(0x0000000000fe9f18), 0.915, 0.994615137056250) \
-    STEP( 184, UINT64_C(0x0000000000fed76e), 0.920, 0.995474739200000) \
-    STEP( 185, UINT64_C(0x0000000000ff09b0), 0.925, 0.996241621093750) \
-    STEP( 186, UINT64_C(0x0000000000ff3627), 0.930, 0.996920065800000) \
-    STEP( 187, UINT64_C(0x0000000000ff5d1d), 0.935, 0.997514547631250) \
-    STEP( 188, UINT64_C(0x0000000000ff7ee0), 0.940, 0.998029734400000) \
-    STEP( 189, UINT64_C(0x0000000000ff9bc3), 0.945, 0.998470489668750) \
-    STEP( 190, UINT64_C(0x0000000000ffb419), 0.950, 0.998841875000000) \
-    STEP( 191, UINT64_C(0x0000000000ffc83d), 0.955, 0.999149152206250) \
-    STEP( 192, UINT64_C(0x0000000000ffd888), 0.960, 0.999397785600000) \
-    STEP( 193, UINT64_C(0x0000000000ffe55b), 0.965, 0.999593444243750) \
-    STEP( 194, UINT64_C(0x0000000000ffef17), 0.970, 0.999742004200000) \
-    STEP( 195, UINT64_C(0x0000000000fff623), 0.975, 0.999849550781250) \
-    STEP( 196, UINT64_C(0x0000000000fffae9), 0.980, 0.999922380800000) \
-    STEP( 197, UINT64_C(0x0000000000fffdd6), 0.985, 0.999967004818750) \
-    STEP( 198, UINT64_C(0x0000000000ffff5a), 0.990, 0.999990149400000) \
-    STEP( 199, UINT64_C(0x0000000000ffffeb), 0.995, 0.999998759356250) \
-    STEP( 200, UINT64_C(0x0000000001000000), 1.000, 1.000000000000000) \
+#define SMOOTHSTEP_VARIANT "smoother"
+#define SMOOTHSTEP_NSTEPS 200
+#define SMOOTHSTEP_BFP 24
+#define SMOOTHSTEP                                                             \
+	/* STEP(step, h,                            x,     y) */               \
+	STEP(1, UINT64_C(0x0000000000000014), 0.005, 0.000001240643750)        \
+	STEP(2, UINT64_C(0x00000000000000a5), 0.010, 0.000009850600000)        \
+	STEP(3, UINT64_C(0x0000000000000229), 0.015, 0.000032995181250)        \
+	STEP(4, UINT64_C(0x0000000000000516), 0.020, 0.000077619200000)        \
+	STEP(5, UINT64_C(0x00000000000009dc), 0.025, 0.000150449218750)        \
+	STEP(6, UINT64_C(0x00000000000010e8), 0.030, 0.000257995800000)        \
+	STEP(7, UINT64_C(0x0000000000001aa4), 0.035, 0.000406555756250)        \
+	STEP(8, UINT64_C(0x0000000000002777), 0.040, 0.000602214400000)        \
+	STEP(9, UINT64_C(0x00000000000037c2), 0.045, 0.000850847793750)        \
+	STEP(10, UINT64_C(0x0000000000004be6), 0.050, 0.001158125000000)       \
+	STEP(11, UINT64_C(0x000000000000643c), 0.055, 0.001529510331250)       \
+	STEP(12, UINT64_C(0x000000000000811f), 0.060, 0.001970265600000)       \
+	STEP(13, UINT64_C(0x000000000000a2e2), 0.065, 0.002485452368750)       \
+	STEP(14, UINT64_C(0x000000000000c9d8), 0.070, 0.003079934200000)       \
+	STEP(15, UINT64_C(0x000000000000f64f), 0.075, 0.003758378906250)       \
+	STEP(16, UINT64_C(0x0000000000012891), 0.080, 0.004525260800000)       \
+	STEP(17, UINT64_C(0x00000000000160e7), 0.085, 0.005384862943750)       \
+	STEP(18, UINT64_C(0x0000000000019f95), 0.090, 0.006341279400000)       \
+	STEP(19, UINT64_C(0x000000000001e4dc), 0.095, 0.007398417481250)       \
+	STEP(20, UINT64_C(0x00000000000230fc), 0.100, 0.008560000000000)       \
+	STEP(21, UINT64_C(0x0000000000028430), 0.105, 0.009829567518750)       \
+	STEP(22, UINT64_C(0x000000000002deb0), 0.110, 0.011210480600000)       \
+	STEP(23, UINT64_C(0x00000000000340b1), 0.115, 0.012705922056250)       \
+	STEP(24, UINT64_C(0x000000000003aa67), 0.120, 0.014318899200000)       \
+	STEP(25, UINT64_C(0x0000000000041c00), 0.125, 0.016052246093750)       \
+	STEP(26, UINT64_C(0x00000000000495a8), 0.130, 0.017908625800000)       \
+	STEP(27, UINT64_C(0x000000000005178b), 0.135, 0.019890532631250)       \
+	STEP(28, UINT64_C(0x000000000005a1cf), 0.140, 0.022000294400000)       \
+	STEP(29, UINT64_C(0x0000000000063498), 0.145, 0.024240074668750)       \
+	STEP(30, UINT64_C(0x000000000006d009), 0.150, 0.026611875000000)       \
+	STEP(31, UINT64_C(0x000000000007743f), 0.155, 0.029117537206250)       \
+	STEP(32, UINT64_C(0x0000000000082157), 0.160, 0.031758745600000)       \
+	STEP(33, UINT64_C(0x000000000008d76b), 0.165, 0.034537029243750)       \
+	STEP(34, UINT64_C(0x0000000000099691), 0.170, 0.037453764200000)       \
+	STEP(35, UINT64_C(0x00000000000a5edf), 0.175, 0.040510175781250)       \
+	STEP(36, UINT64_C(0x00000000000b3067), 0.180, 0.043707340800000)       \
+	STEP(37, UINT64_C(0x00000000000c0b38), 0.185, 0.047046189818750)       \
+	STEP(38, UINT64_C(0x00000000000cef5e), 0.190, 0.050527509400000)       \
+	STEP(39, UINT64_C(0x00000000000ddce6), 0.195, 0.054151944356250)       \
+	STEP(40, UINT64_C(0x00000000000ed3d8), 0.200, 0.057920000000000)       \
+	STEP(41, UINT64_C(0x00000000000fd439), 0.205, 0.061832044393750)       \
+	STEP(42, UINT64_C(0x000000000010de0e), 0.210, 0.065888310600000)       \
+	STEP(43, UINT64_C(0x000000000011f158), 0.215, 0.070088898931250)       \
+	STEP(44, UINT64_C(0x0000000000130e17), 0.220, 0.074433779200000)       \
+	STEP(45, UINT64_C(0x0000000000143448), 0.225, 0.078922792968750)       \
+	STEP(46, UINT64_C(0x00000000001563e7), 0.230, 0.083555655800000)       \
+	STEP(47, UINT64_C(0x0000000000169cec), 0.235, 0.088331959506250)       \
+	STEP(48, UINT64_C(0x000000000017df4f), 0.240, 0.093251174400000)       \
+	STEP(49, UINT64_C(0x0000000000192b04), 0.245, 0.098312651543750)       \
+	STEP(50, UINT64_C(0x00000000001a8000), 0.250, 0.103515625000000)       \
+	STEP(51, UINT64_C(0x00000000001bde32), 0.255, 0.108859214081250)       \
+	STEP(52, UINT64_C(0x00000000001d458b), 0.260, 0.114342425600000)       \
+	STEP(53, UINT64_C(0x00000000001eb5f8), 0.265, 0.119964156118750)       \
+	STEP(54, UINT64_C(0x0000000000202f65), 0.270, 0.125723194200000)       \
+	STEP(55, UINT64_C(0x000000000021b1bb), 0.275, 0.131618222656250)       \
+	STEP(56, UINT64_C(0x0000000000233ce3), 0.280, 0.137647820800000)       \
+	STEP(57, UINT64_C(0x000000000024d0c3), 0.285, 0.143810466693750)       \
+	STEP(58, UINT64_C(0x0000000000266d40), 0.290, 0.150104539400000)       \
+	STEP(59, UINT64_C(0x000000000028123d), 0.295, 0.156528321231250)       \
+	STEP(60, UINT64_C(0x000000000029bf9c), 0.300, 0.163080000000000)       \
+	STEP(61, UINT64_C(0x00000000002b753d), 0.305, 0.169757671268750)       \
+	STEP(62, UINT64_C(0x00000000002d32fe), 0.310, 0.176559340600000)       \
+	STEP(63, UINT64_C(0x00000000002ef8bc), 0.315, 0.183482925806250)       \
+	STEP(64, UINT64_C(0x000000000030c654), 0.320, 0.190526259200000)       \
+	STEP(65, UINT64_C(0x0000000000329b9f), 0.325, 0.197687089843750)       \
+	STEP(66, UINT64_C(0x0000000000347875), 0.330, 0.204963085800000)       \
+	STEP(67, UINT64_C(0x0000000000365cb0), 0.335, 0.212351836381250)       \
+	STEP(68, UINT64_C(0x0000000000384825), 0.340, 0.219850854400000)       \
+	STEP(69, UINT64_C(0x00000000003a3aa8), 0.345, 0.227457578418750)       \
+	STEP(70, UINT64_C(0x00000000003c340f), 0.350, 0.235169375000000)       \
+	STEP(71, UINT64_C(0x00000000003e342b), 0.355, 0.242983540956250)       \
+	STEP(72, UINT64_C(0x0000000000403ace), 0.360, 0.250897305600000)       \
+	STEP(73, UINT64_C(0x00000000004247c8), 0.365, 0.258907832993750)       \
+	STEP(74, UINT64_C(0x0000000000445ae9), 0.370, 0.267012224200000)       \
+	STEP(75, UINT64_C(0x0000000000467400), 0.375, 0.275207519531250)       \
+	STEP(76, UINT64_C(0x00000000004892d8), 0.380, 0.283490700800000)       \
+	STEP(77, UINT64_C(0x00000000004ab740), 0.385, 0.291858693568750)       \
+	STEP(78, UINT64_C(0x00000000004ce102), 0.390, 0.300308369400000)       \
+	STEP(79, UINT64_C(0x00000000004f0fe9), 0.395, 0.308836548106250)       \
+	STEP(80, UINT64_C(0x00000000005143bf), 0.400, 0.317440000000000)       \
+	STEP(81, UINT64_C(0x0000000000537c4d), 0.405, 0.326115448143750)       \
+	STEP(82, UINT64_C(0x000000000055b95b), 0.410, 0.334859570600000)       \
+	STEP(83, UINT64_C(0x000000000057fab1), 0.415, 0.343669002681250)       \
+	STEP(84, UINT64_C(0x00000000005a4015), 0.420, 0.352540339200000)       \
+	STEP(85, UINT64_C(0x00000000005c894e), 0.425, 0.361470136718750)       \
+	STEP(86, UINT64_C(0x00000000005ed622), 0.430, 0.370454915800000)       \
+	STEP(87, UINT64_C(0x0000000000612655), 0.435, 0.379491163256250)       \
+	STEP(88, UINT64_C(0x00000000006379ac), 0.440, 0.388575334400000)       \
+	STEP(89, UINT64_C(0x000000000065cfeb), 0.445, 0.397703855293750)       \
+	STEP(90, UINT64_C(0x00000000006828d6), 0.450, 0.406873125000000)       \
+	STEP(91, UINT64_C(0x00000000006a842f), 0.455, 0.416079517831250)       \
+	STEP(92, UINT64_C(0x00000000006ce1bb), 0.460, 0.425319385600000)       \
+	STEP(93, UINT64_C(0x00000000006f413a), 0.465, 0.434589059868750)       \
+	STEP(94, UINT64_C(0x000000000071a270), 0.470, 0.443884854200000)       \
+	STEP(95, UINT64_C(0x000000000074051d), 0.475, 0.453203066406250)       \
+	STEP(96, UINT64_C(0x0000000000766905), 0.480, 0.462539980800000)       \
+	STEP(97, UINT64_C(0x000000000078cde7), 0.485, 0.471891870443750)       \
+	STEP(98, UINT64_C(0x00000000007b3387), 0.490, 0.481254999400000)       \
+	STEP(99, UINT64_C(0x00000000007d99a4), 0.495, 0.490625624981250)       \
+	STEP(100, UINT64_C(0x0000000000800000), 0.500, 0.500000000000000)      \
+	STEP(101, UINT64_C(0x000000000082665b), 0.505, 0.509374375018750)      \
+	STEP(102, UINT64_C(0x000000000084cc78), 0.510, 0.518745000600000)      \
+	STEP(103, UINT64_C(0x0000000000873218), 0.515, 0.528108129556250)      \
+	STEP(104, UINT64_C(0x00000000008996fa), 0.520, 0.537460019200000)      \
+	STEP(105, UINT64_C(0x00000000008bfae2), 0.525, 0.546796933593750)      \
+	STEP(106, UINT64_C(0x00000000008e5d8f), 0.530, 0.556115145800000)      \
+	STEP(107, UINT64_C(0x000000000090bec5), 0.535, 0.565410940131250)      \
+	STEP(108, UINT64_C(0x0000000000931e44), 0.540, 0.574680614400000)      \
+	STEP(109, UINT64_C(0x0000000000957bd0), 0.545, 0.583920482168750)      \
+	STEP(110, UINT64_C(0x000000000097d729), 0.550, 0.593126875000000)      \
+	STEP(111, UINT64_C(0x00000000009a3014), 0.555, 0.602296144706250)      \
+	STEP(112, UINT64_C(0x00000000009c8653), 0.560, 0.611424665600000)      \
+	STEP(113, UINT64_C(0x00000000009ed9aa), 0.565, 0.620508836743750)      \
+	STEP(114, UINT64_C(0x0000000000a129dd), 0.570, 0.629545084200000)      \
+	STEP(115, UINT64_C(0x0000000000a376b1), 0.575, 0.638529863281250)      \
+	STEP(116, UINT64_C(0x0000000000a5bfea), 0.580, 0.647459660800000)      \
+	STEP(117, UINT64_C(0x0000000000a8054e), 0.585, 0.656330997318750)      \
+	STEP(118, UINT64_C(0x0000000000aa46a4), 0.590, 0.665140429400000)      \
+	STEP(119, UINT64_C(0x0000000000ac83b2), 0.595, 0.673884551856250)      \
+	STEP(120, UINT64_C(0x0000000000aebc40), 0.600, 0.682560000000000)      \
+	STEP(121, UINT64_C(0x0000000000b0f016), 0.605, 0.691163451893750)      \
+	STEP(122, UINT64_C(0x0000000000b31efd), 0.610, 0.699691630600000)      \
+	STEP(123, UINT64_C(0x0000000000b548bf), 0.615, 0.708141306431250)      \
+	STEP(124, UINT64_C(0x0000000000b76d27), 0.620, 0.716509299200000)      \
+	STEP(125, UINT64_C(0x0000000000b98c00), 0.625, 0.724792480468750)      \
+	STEP(126, UINT64_C(0x0000000000bba516), 0.630, 0.732987775800000)      \
+	STEP(127, UINT64_C(0x0000000000bdb837), 0.635, 0.741092167006250)      \
+	STEP(128, UINT64_C(0x0000000000bfc531), 0.640, 0.749102694400000)      \
+	STEP(129, UINT64_C(0x0000000000c1cbd4), 0.645, 0.757016459043750)      \
+	STEP(130, UINT64_C(0x0000000000c3cbf0), 0.650, 0.764830625000000)      \
+	STEP(131, UINT64_C(0x0000000000c5c557), 0.655, 0.772542421581250)      \
+	STEP(132, UINT64_C(0x0000000000c7b7da), 0.660, 0.780149145600000)      \
+	STEP(133, UINT64_C(0x0000000000c9a34f), 0.665, 0.787648163618750)      \
+	STEP(134, UINT64_C(0x0000000000cb878a), 0.670, 0.795036914200000)      \
+	STEP(135, UINT64_C(0x0000000000cd6460), 0.675, 0.802312910156250)      \
+	STEP(136, UINT64_C(0x0000000000cf39ab), 0.680, 0.809473740800000)      \
+	STEP(137, UINT64_C(0x0000000000d10743), 0.685, 0.816517074193750)      \
+	STEP(138, UINT64_C(0x0000000000d2cd01), 0.690, 0.823440659400000)      \
+	STEP(139, UINT64_C(0x0000000000d48ac2), 0.695, 0.830242328731250)      \
+	STEP(140, UINT64_C(0x0000000000d64063), 0.700, 0.836920000000000)      \
+	STEP(141, UINT64_C(0x0000000000d7edc2), 0.705, 0.843471678768750)      \
+	STEP(142, UINT64_C(0x0000000000d992bf), 0.710, 0.849895460600000)      \
+	STEP(143, UINT64_C(0x0000000000db2f3c), 0.715, 0.856189533306250)      \
+	STEP(144, UINT64_C(0x0000000000dcc31c), 0.720, 0.862352179200000)      \
+	STEP(145, UINT64_C(0x0000000000de4e44), 0.725, 0.868381777343750)      \
+	STEP(146, UINT64_C(0x0000000000dfd09a), 0.730, 0.874276805800000)      \
+	STEP(147, UINT64_C(0x0000000000e14a07), 0.735, 0.880035843881250)      \
+	STEP(148, UINT64_C(0x0000000000e2ba74), 0.740, 0.885657574400000)      \
+	STEP(149, UINT64_C(0x0000000000e421cd), 0.745, 0.891140785918750)      \
+	STEP(150, UINT64_C(0x0000000000e58000), 0.750, 0.896484375000000)      \
+	STEP(151, UINT64_C(0x0000000000e6d4fb), 0.755, 0.901687348456250)      \
+	STEP(152, UINT64_C(0x0000000000e820b0), 0.760, 0.906748825600000)      \
+	STEP(153, UINT64_C(0x0000000000e96313), 0.765, 0.911668040493750)      \
+	STEP(154, UINT64_C(0x0000000000ea9c18), 0.770, 0.916444344200000)      \
+	STEP(155, UINT64_C(0x0000000000ebcbb7), 0.775, 0.921077207031250)      \
+	STEP(156, UINT64_C(0x0000000000ecf1e8), 0.780, 0.925566220800000)      \
+	STEP(157, UINT64_C(0x0000000000ee0ea7), 0.785, 0.929911101068750)      \
+	STEP(158, UINT64_C(0x0000000000ef21f1), 0.790, 0.934111689400000)      \
+	STEP(159, UINT64_C(0x0000000000f02bc6), 0.795, 0.938167955606250)      \
+	STEP(160, UINT64_C(0x0000000000f12c27), 0.800, 0.942080000000000)      \
+	STEP(161, UINT64_C(0x0000000000f22319), 0.805, 0.945848055643750)      \
+	STEP(162, UINT64_C(0x0000000000f310a1), 0.810, 0.949472490600000)      \
+	STEP(163, UINT64_C(0x0000000000f3f4c7), 0.815, 0.952953810181250)      \
+	STEP(164, UINT64_C(0x0000000000f4cf98), 0.820, 0.956292659200000)      \
+	STEP(165, UINT64_C(0x0000000000f5a120), 0.825, 0.959489824218750)      \
+	STEP(166, UINT64_C(0x0000000000f6696e), 0.830, 0.962546235800000)      \
+	STEP(167, UINT64_C(0x0000000000f72894), 0.835, 0.965462970756250)      \
+	STEP(168, UINT64_C(0x0000000000f7dea8), 0.840, 0.968241254400000)      \
+	STEP(169, UINT64_C(0x0000000000f88bc0), 0.845, 0.970882462793750)      \
+	STEP(170, UINT64_C(0x0000000000f92ff6), 0.850, 0.973388125000000)      \
+	STEP(171, UINT64_C(0x0000000000f9cb67), 0.855, 0.975759925331250)      \
+	STEP(172, UINT64_C(0x0000000000fa5e30), 0.860, 0.977999705600000)      \
+	STEP(173, UINT64_C(0x0000000000fae874), 0.865, 0.980109467368750)      \
+	STEP(174, UINT64_C(0x0000000000fb6a57), 0.870, 0.982091374200000)      \
+	STEP(175, UINT64_C(0x0000000000fbe400), 0.875, 0.983947753906250)      \
+	STEP(176, UINT64_C(0x0000000000fc5598), 0.880, 0.985681100800000)      \
+	STEP(177, UINT64_C(0x0000000000fcbf4e), 0.885, 0.987294077943750)      \
+	STEP(178, UINT64_C(0x0000000000fd214f), 0.890, 0.988789519400000)      \
+	STEP(179, UINT64_C(0x0000000000fd7bcf), 0.895, 0.990170432481250)      \
+	STEP(180, UINT64_C(0x0000000000fdcf03), 0.900, 0.991440000000000)      \
+	STEP(181, UINT64_C(0x0000000000fe1b23), 0.905, 0.992601582518750)      \
+	STEP(182, UINT64_C(0x0000000000fe606a), 0.910, 0.993658720600000)      \
+	STEP(183, UINT64_C(0x0000000000fe9f18), 0.915, 0.994615137056250)      \
+	STEP(184, UINT64_C(0x0000000000fed76e), 0.920, 0.995474739200000)      \
+	STEP(185, UINT64_C(0x0000000000ff09b0), 0.925, 0.996241621093750)      \
+	STEP(186, UINT64_C(0x0000000000ff3627), 0.930, 0.996920065800000)      \
+	STEP(187, UINT64_C(0x0000000000ff5d1d), 0.935, 0.997514547631250)      \
+	STEP(188, UINT64_C(0x0000000000ff7ee0), 0.940, 0.998029734400000)      \
+	STEP(189, UINT64_C(0x0000000000ff9bc3), 0.945, 0.998470489668750)      \
+	STEP(190, UINT64_C(0x0000000000ffb419), 0.950, 0.998841875000000)      \
+	STEP(191, UINT64_C(0x0000000000ffc83d), 0.955, 0.999149152206250)      \
+	STEP(192, UINT64_C(0x0000000000ffd888), 0.960, 0.999397785600000)      \
+	STEP(193, UINT64_C(0x0000000000ffe55b), 0.965, 0.999593444243750)      \
+	STEP(194, UINT64_C(0x0000000000ffef17), 0.970, 0.999742004200000)      \
+	STEP(195, UINT64_C(0x0000000000fff623), 0.975, 0.999849550781250)      \
+	STEP(196, UINT64_C(0x0000000000fffae9), 0.980, 0.999922380800000)      \
+	STEP(197, UINT64_C(0x0000000000fffdd6), 0.985, 0.999967004818750)      \
+	STEP(198, UINT64_C(0x0000000000ffff5a), 0.990, 0.999990149400000)      \
+	STEP(199, UINT64_C(0x0000000000ffffeb), 0.995, 0.999998759356250)      \
+	STEP(200, UINT64_C(0x0000000001000000), 1.000, 1.000000000000000)
 
 #endif /* JEMALLOC_INTERNAL_SMOOTHSTEP_H */
diff --git a/include/jemalloc/internal/spin.h b/include/jemalloc/internal/spin.h
index 87c400d5..4cd5e1db 100644
--- a/include/jemalloc/internal/spin.h
+++ b/include/jemalloc/internal/spin.h
@@ -3,7 +3,8 @@
 
 #include "jemalloc/internal/jemalloc_preamble.h"
 
-#define SPIN_INITIALIZER {0U}
+#define SPIN_INITIALIZER                                                       \
+	{ 0U }
 
 typedef struct {
 	unsigned iteration;
@@ -11,12 +12,12 @@ typedef struct {
 
 static inline void
 spin_cpu_spinwait(void) {
-#  if HAVE_CPU_SPINWAIT
+#if HAVE_CPU_SPINWAIT
 	CPU_SPINWAIT;
-#  else
+#else
 	volatile int x = 0;
 	x = x;
-#  endif
+#endif
 }
 
 static inline void
diff --git a/include/jemalloc/internal/stats.h b/include/jemalloc/internal/stats.h
index a5f1be32..1c7b23e0 100644
--- a/include/jemalloc/internal/stats.h
+++ b/include/jemalloc/internal/stats.h
@@ -7,32 +7,32 @@
 #include "jemalloc/internal/tsd_types.h"
 
 /*  OPTION(opt,		var_name,	default,	set_value_to) */
-#define STATS_PRINT_OPTIONS						\
-    OPTION('J',		json,		false,		true)		\
-    OPTION('g',		general,	true,		false)		\
-    OPTION('m',		merged,		config_stats,	false)		\
-    OPTION('d',		destroyed,	config_stats,	false)		\
-    OPTION('a',		unmerged,	config_stats,	false)		\
-    OPTION('b',		bins,		true,		false)		\
-    OPTION('l',		large,		true,		false)		\
-    OPTION('x',		mutex,		true,		false)		\
-    OPTION('e',		extents,	true,		false)		\
-    OPTION('h',		hpa,		config_stats,	false)
+#define STATS_PRINT_OPTIONS                                                    \
+	OPTION('J', json, false, true)                                         \
+	OPTION('g', general, true, false)                                      \
+	OPTION('m', merged, config_stats, false)                               \
+	OPTION('d', destroyed, config_stats, false)                            \
+	OPTION('a', unmerged, config_stats, false)                             \
+	OPTION('b', bins, true, false)                                         \
+	OPTION('l', large, true, false)                                        \
+	OPTION('x', mutex, true, false)                                        \
+	OPTION('e', extents, true, false)                                      \
+	OPTION('h', hpa, config_stats, false)
 
 enum {
 #define OPTION(o, v, d, s) stats_print_option_num_##v,
-    STATS_PRINT_OPTIONS
+	STATS_PRINT_OPTIONS
 #undef OPTION
-    stats_print_tot_num_options
+	    stats_print_tot_num_options
 };
 
 /* Options for stats_print. */
 extern bool opt_stats_print;
-extern char opt_stats_print_opts[stats_print_tot_num_options+1];
+extern char opt_stats_print_opts[stats_print_tot_num_options + 1];
 
 /* Utilities for stats_interval. */
 extern int64_t opt_stats_interval;
-extern char opt_stats_interval_opts[stats_print_tot_num_options+1];
+extern char    opt_stats_interval_opts[stats_print_tot_num_options + 1];
 
 #define STATS_INTERVAL_DEFAULT -1
 /*
diff --git a/include/jemalloc/internal/sz.h b/include/jemalloc/internal/sz.h
index 3a32e232..d75a3034 100644
--- a/include/jemalloc/internal/sz.h
+++ b/include/jemalloc/internal/sz.h
@@ -76,8 +76,9 @@ sz_psz2ind(size_t psz) {
 	 * SC_NGROUP. off_to_first_ps_rg begins from 1, instead of 0. e.g.
 	 * off_to_first_ps_rg is 1 when psz is (PAGE * SC_NGROUP + 1).
 	 */
-	pszind_t off_to_first_ps_rg = (x < SC_LG_NGROUP + LG_PAGE) ?
-	    0 : x - (SC_LG_NGROUP + LG_PAGE);
+	pszind_t off_to_first_ps_rg = (x < SC_LG_NGROUP + LG_PAGE)
+	    ? 0
+	    : x - (SC_LG_NGROUP + LG_PAGE);
 
 	/*
 	 * Same as sc_s::lg_delta.
@@ -85,8 +86,9 @@ sz_psz2ind(size_t psz) {
 	 * for each increase in offset, it's multiplied by two.
 	 * Therefore, lg_delta = LG_PAGE + (off_to_first_ps_rg - 1).
 	 */
-	pszind_t lg_delta = (off_to_first_ps_rg == 0) ?
-	    LG_PAGE : LG_PAGE + (off_to_first_ps_rg - 1);
+	pszind_t lg_delta = (off_to_first_ps_rg == 0)
+	    ? LG_PAGE
+	    : LG_PAGE + (off_to_first_ps_rg - 1);
 
 	/*
 	 * Let's write psz in binary, e.g. 0011 for 0x3, 0111 for 0x7.
@@ -118,13 +120,13 @@ sz_pind2sz_compute(pszind_t pind) {
 	size_t grp = pind >> SC_LG_NGROUP;
 	size_t mod = pind & ((ZU(1) << SC_LG_NGROUP) - 1);
 
-	size_t grp_size_mask = ~((!!grp)-1);
-	size_t grp_size = ((ZU(1) << (LG_PAGE + (SC_LG_NGROUP-1))) << grp)
+	size_t grp_size_mask = ~((!!grp) - 1);
+	size_t grp_size = ((ZU(1) << (LG_PAGE + (SC_LG_NGROUP - 1))) << grp)
 	    & grp_size_mask;
 
 	size_t shift = (grp == 0) ? 1 : grp;
-	size_t lg_delta = shift + (LG_PAGE-1);
-	size_t mod_size = (mod+1) << lg_delta;
+	size_t lg_delta = shift + (LG_PAGE - 1);
+	size_t mod_size = (mod + 1) << lg_delta;
 
 	size_t sz = grp_size + mod_size;
 	return sz;
@@ -148,9 +150,10 @@ sz_psz2u(size_t psz) {
 	if (unlikely(psz > SC_LARGE_MAXCLASS)) {
 		return SC_LARGE_MAXCLASS + PAGE;
 	}
-	size_t x = lg_floor((psz<<1)-1);
-	size_t lg_delta = (x < SC_LG_NGROUP + LG_PAGE + 1) ?
-	    LG_PAGE : x - SC_LG_NGROUP - 1;
+	size_t x = lg_floor((psz << 1) - 1);
+	size_t lg_delta = (x < SC_LG_NGROUP + LG_PAGE + 1)
+	    ? LG_PAGE
+	    : x - SC_LG_NGROUP - 1;
 	size_t delta = ZU(1) << lg_delta;
 	size_t delta_mask = delta - 1;
 	size_t usize = (psz + delta_mask) & ~delta_mask;
@@ -174,17 +177,19 @@ sz_size2index_compute_inline(size_t size) {
 	}
 #endif
 	{
-		szind_t x = lg_floor((size<<1)-1);
-		szind_t shift = (x < SC_LG_NGROUP + LG_QUANTUM) ? 0 :
-		    x - (SC_LG_NGROUP + LG_QUANTUM);
+		szind_t x = lg_floor((size << 1) - 1);
+		szind_t shift = (x < SC_LG_NGROUP + LG_QUANTUM)
+		    ? 0
+		    : x - (SC_LG_NGROUP + LG_QUANTUM);
 		szind_t grp = shift << SC_LG_NGROUP;
 
 		szind_t lg_delta = (x < SC_LG_NGROUP + LG_QUANTUM + 1)
-		    ? LG_QUANTUM : x - SC_LG_NGROUP - 1;
+		    ? LG_QUANTUM
+		    : x - SC_LG_NGROUP - 1;
 
-		size_t delta_inverse_mask = ZU(-1) << lg_delta;
-		szind_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) &
-		    ((ZU(1) << SC_LG_NGROUP) - 1);
+		size_t  delta_inverse_mask = ZU(-1) << lg_delta;
+		szind_t mod = ((((size - 1) & delta_inverse_mask) >> lg_delta))
+		    & ((ZU(1) << SC_LG_NGROUP) - 1);
 
 		szind_t index = SC_NTINY + grp + mod;
 		return index;
@@ -228,16 +233,16 @@ sz_index2size_compute_inline(szind_t index) {
 	{
 		size_t reduced_index = index - SC_NTINY;
 		size_t grp = reduced_index >> SC_LG_NGROUP;
-		size_t mod = reduced_index & ((ZU(1) << SC_LG_NGROUP) -
-		    1);
+		size_t mod = reduced_index & ((ZU(1) << SC_LG_NGROUP) - 1);
 
-		size_t grp_size_mask = ~((!!grp)-1);
-		size_t grp_size = ((ZU(1) << (LG_QUANTUM +
-		    (SC_LG_NGROUP-1))) << grp) & grp_size_mask;
+		size_t grp_size_mask = ~((!!grp) - 1);
+		size_t grp_size = ((ZU(1) << (LG_QUANTUM + (SC_LG_NGROUP - 1)))
+		                      << grp)
+		    & grp_size_mask;
 
 		size_t shift = (grp == 0) ? 1 : grp;
-		size_t lg_delta = shift + (LG_QUANTUM-1);
-		size_t mod_size = (mod+1) << lg_delta;
+		size_t lg_delta = shift + (LG_QUANTUM - 1);
+		size_t mod_size = (mod + 1) << lg_delta;
 
 		size_t usize = grp_size + mod_size;
 		return usize;
@@ -269,8 +274,8 @@ sz_index2size_unsafe(szind_t index) {
 
 JEMALLOC_ALWAYS_INLINE size_t
 sz_index2size(szind_t index) {
-	assert(!sz_large_size_classes_disabled() ||
-	    index <= sz_size2index(USIZE_GROW_SLOW_THRESHOLD));
+	assert(!sz_large_size_classes_disabled()
+	    || index <= sz_size2index(USIZE_GROW_SLOW_THRESHOLD));
 	size_t size = sz_index2size_unsafe(index);
 	/*
 	 * With large size classes disabled, the usize above
@@ -285,8 +290,8 @@ sz_index2size(szind_t index) {
 	 * the size is no larger than USIZE_GROW_SLOW_THRESHOLD here
 	 * instead of SC_LARGE_MINCLASS.
 	 */
-	assert(!sz_large_size_classes_disabled() ||
-	    size <= USIZE_GROW_SLOW_THRESHOLD);
+	assert(!sz_large_size_classes_disabled()
+	    || size <= USIZE_GROW_SLOW_THRESHOLD);
 	return size;
 }
 
@@ -309,9 +314,10 @@ sz_size2index_usize_fastpath(size_t size, szind_t *ind, size_t *usize) {
 
 JEMALLOC_ALWAYS_INLINE size_t
 sz_s2u_compute_using_delta(size_t size) {
-	size_t x = lg_floor((size<<1)-1);
+	size_t x = lg_floor((size << 1) - 1);
 	size_t lg_delta = (x < SC_LG_NGROUP + LG_QUANTUM + 1)
-	    ?  LG_QUANTUM : x - SC_LG_NGROUP - 1;
+	    ? LG_QUANTUM
+	    : x - SC_LG_NGROUP - 1;
 	size_t delta = ZU(1) << lg_delta;
 	size_t delta_mask = delta - 1;
 	size_t usize = (size + delta_mask) & ~delta_mask;
@@ -331,8 +337,8 @@ sz_s2u_compute(size_t size) {
 	if (size <= (ZU(1) << SC_LG_TINY_MAXCLASS)) {
 		size_t lg_tmin = SC_LG_TINY_MAXCLASS - SC_NTINY + 1;
 		size_t lg_ceil = lg_floor(pow2_ceil_zu(size));
-		return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin) :
-		    (ZU(1) << lg_ceil));
+		return (lg_ceil < lg_tmin ? (ZU(1) << lg_tmin)
+		                          : (ZU(1) << lg_ceil));
 	}
 #endif
 	if (size <= SC_SMALL_MAXCLASS || !sz_large_size_classes_disabled()) {
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 76d601c3..73126db7 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -8,15 +8,15 @@
 #include "jemalloc/internal/tcache_types.h"
 #include "jemalloc/internal/thread_event_registry.h"
 
-extern bool opt_tcache;
-extern size_t opt_tcache_max;
-extern ssize_t	opt_lg_tcache_nslots_mul;
+extern bool     opt_tcache;
+extern size_t   opt_tcache_max;
+extern ssize_t  opt_lg_tcache_nslots_mul;
 extern unsigned opt_tcache_nslots_small_min;
 extern unsigned opt_tcache_nslots_small_max;
 extern unsigned opt_tcache_nslots_large;
-extern ssize_t opt_lg_tcache_shift;
-extern size_t opt_tcache_gc_incr_bytes;
-extern size_t opt_tcache_gc_delay_bytes;
+extern ssize_t  opt_lg_tcache_shift;
+extern size_t   opt_tcache_gc_incr_bytes;
+extern size_t   opt_tcache_gc_delay_bytes;
 extern unsigned opt_lg_tcache_flush_small_div;
 extern unsigned opt_lg_tcache_flush_large_div;
 
@@ -27,14 +27,14 @@ extern unsigned opt_lg_tcache_flush_large_div;
  * it should not be changed on the fly.  To change the number of tcache bins
  * in use, refer to tcache_nbins of each tcache.
  */
-extern unsigned	global_do_not_change_tcache_nbins;
+extern unsigned global_do_not_change_tcache_nbins;
 
 /*
  * Maximum cached size class.  Same as above, this is only used during threads
  * initialization and should not be changed.  To change the maximum cached size
  * class, refer to tcache_max of each tcache.
  */
-extern size_t	global_do_not_change_tcache_maxclass;
+extern size_t global_do_not_change_tcache_maxclass;
 
 /*
  * Explicit tcaches, managed via the tcache.{create,flush,destroy} mallctls and
@@ -44,11 +44,11 @@ extern size_t	global_do_not_change_tcache_maxclass;
  * touched.  This allows the entire array to be allocated the first time an
  * explicit tcache is created without a disproportionate impact on memory usage.
  */
-extern tcaches_t	*tcaches;
+extern tcaches_t *tcaches;
 
 size_t tcache_salloc(tsdn_t *tsdn, const void *ptr);
-void *tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
-    cache_bin_t *cache_bin, szind_t binind, bool *tcache_success);
+void  *tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
+     cache_bin_t *cache_bin, szind_t binind, bool *tcache_success);
 
 void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache,
     cache_bin_t *cache_bin, szind_t binind, unsigned rem);
@@ -56,23 +56,23 @@ void tcache_bin_flush_large(tsd_t *tsd, tcache_t *tcache,
     cache_bin_t *cache_bin, szind_t binind, unsigned rem);
 void tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache,
     cache_bin_t *cache_bin, szind_t binind, bool is_small);
-bool tcache_bin_info_default_init(const char *bin_settings_segment_cur,
-    size_t len_left);
+bool tcache_bin_info_default_init(
+    const char *bin_settings_segment_cur, size_t len_left);
 bool tcache_bins_ncached_max_write(tsd_t *tsd, char *settings, size_t len);
-bool tcache_bin_ncached_max_read(tsd_t *tsd, size_t bin_size,
-    cache_bin_sz_t *ncached_max);
-void tcache_arena_reassociate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
-    tcache_t *tcache, arena_t *arena);
+bool tcache_bin_ncached_max_read(
+    tsd_t *tsd, size_t bin_size, cache_bin_sz_t *ncached_max);
+void tcache_arena_reassociate(
+    tsdn_t *tsdn, tcache_slow_t *tcache_slow, tcache_t *tcache, arena_t *arena);
 tcache_t *tcache_create_explicit(tsd_t *tsd);
-void thread_tcache_max_set(tsd_t *tsd, size_t tcache_max);
-void tcache_cleanup(tsd_t *tsd);
-void tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
-bool tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind);
-void tcaches_flush(tsd_t *tsd, unsigned ind);
-void tcaches_destroy(tsd_t *tsd, unsigned ind);
-bool tcache_boot(tsdn_t *tsdn, base_t *base);
-void tcache_arena_associate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
-    tcache_t *tcache, arena_t *arena);
+void      thread_tcache_max_set(tsd_t *tsd, size_t tcache_max);
+void      tcache_cleanup(tsd_t *tsd);
+void      tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
+bool      tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind);
+void      tcaches_flush(tsd_t *tsd, unsigned ind);
+void      tcaches_destroy(tsd_t *tsd, unsigned ind);
+bool      tcache_boot(tsdn_t *tsdn, base_t *base);
+void      tcache_arena_associate(
+         tsdn_t *tsdn, tcache_slow_t *tcache_slow, tcache_t *tcache, arena_t *arena);
 void tcache_prefork(tsdn_t *tsdn);
 void tcache_postfork_parent(tsdn_t *tsdn);
 void tcache_postfork_child(tsdn_t *tsdn);
diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index e8e3b41f..6bd1b339 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -42,8 +42,8 @@ tcache_max_set(tcache_slow_t *tcache_slow, size_t tcache_max) {
 }
 
 static inline void
-tcache_bin_settings_backup(tcache_t *tcache,
-    cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
+tcache_bin_settings_backup(
+    tcache_t *tcache, cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
 	for (unsigned i = 0; i < TCACHE_NBINS_MAX; i++) {
 		cache_bin_info_init(&tcache_bin_info[i],
 		    cache_bin_ncached_max_get_unsafe(&tcache->bins[i]));
@@ -51,8 +51,7 @@ tcache_bin_settings_backup(tcache_t *tcache,
 }
 
 JEMALLOC_ALWAYS_INLINE bool
-tcache_bin_disabled(szind_t ind, cache_bin_t *bin,
-    tcache_slow_t *tcache_slow) {
+tcache_bin_disabled(szind_t ind, cache_bin_t *bin, tcache_slow_t *tcache_slow) {
 	assert(bin != NULL);
 	assert(ind < TCACHE_NBINS_MAX);
 	bool disabled = cache_bin_disabled(bin);
@@ -66,7 +65,7 @@ tcache_bin_disabled(szind_t ind, cache_bin_t *bin,
 	 * ind >= nbins or ncached_max == 0.  If a bin is enabled, it has
 	 * ind < nbins and ncached_max > 0.
 	 */
-	unsigned nbins = tcache_nbins_get(tcache_slow);
+	unsigned       nbins = tcache_nbins_get(tcache_slow);
 	cache_bin_sz_t ncached_max = cache_bin_ncached_max_get_unsafe(bin);
 	if (ind >= nbins) {
 		assert(disabled);
@@ -88,10 +87,10 @@ tcache_bin_disabled(szind_t ind, cache_bin_t *bin,
 }
 
 JEMALLOC_ALWAYS_INLINE void *
-tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
-    size_t size, szind_t binind, bool zero, bool slow_path) {
+tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
+    szind_t binind, bool zero, bool slow_path) {
 	void *ret;
-	bool tcache_success;
+	bool  tcache_success;
 
 	assert(binind < SC_NBINS);
 	cache_bin_t *bin = &tcache->bins[binind];
@@ -103,8 +102,8 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
 		if (unlikely(arena == NULL)) {
 			return NULL;
 		}
-		if (unlikely(tcache_bin_disabled(binind, bin,
-		    tcache->tcache_slow))) {
+		if (unlikely(tcache_bin_disabled(
+		        binind, bin, tcache->tcache_slow))) {
 			/* stats and zero are handled directly by the arena. */
 			return arena_malloc_hard(tsd_tsdn(tsd), arena, size,
 			    binind, zero, /* slab */ true);
@@ -112,8 +111,8 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache,
 		tcache_bin_flush_stashed(tsd, tcache, bin, binind,
 		    /* is_small */ true);
 
-		ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache,
-		    bin, binind, &tcache_hard_success);
+		ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache, bin,
+		    binind, &tcache_hard_success);
 		if (tcache_hard_success == false) {
 			return NULL;
 		}
@@ -135,11 +134,11 @@ JEMALLOC_ALWAYS_INLINE void *
 tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
     szind_t binind, bool zero, bool slow_path) {
 	void *ret;
-	bool tcache_success;
+	bool  tcache_success;
 
 	cache_bin_t *bin = &tcache->bins[binind];
-	assert(binind >= SC_NBINS &&
-	    !tcache_bin_disabled(binind, bin, tcache->tcache_slow));
+	assert(binind >= SC_NBINS
+	    && !tcache_bin_disabled(binind, bin, tcache->tcache_slow));
 	ret = cache_bin_alloc(bin, &tcache_success);
 	assert(tcache_success == (ret != NULL));
 	if (unlikely(!tcache_success)) {
@@ -174,8 +173,8 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
-    bool slow_path) {
+tcache_dalloc_small(
+    tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind, bool slow_path) {
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SC_SMALL_MAXCLASS);
 
 	cache_bin_t *bin = &tcache->bins[binind];
@@ -195,13 +194,13 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 	}
 
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
-		if (unlikely(tcache_bin_disabled(binind, bin,
-		    tcache->tcache_slow))) {
+		if (unlikely(tcache_bin_disabled(
+		        binind, bin, tcache->tcache_slow))) {
 			arena_dalloc_small(tsd_tsdn(tsd), ptr);
 			return;
 		}
 		cache_bin_sz_t max = cache_bin_ncached_max_get(bin);
-		unsigned remain = max >> opt_lg_tcache_flush_small_div;
+		unsigned       remain = max >> opt_lg_tcache_flush_small_div;
 		tcache_bin_flush_small(tsd, tcache, bin, binind, remain);
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
 		assert(ret);
@@ -209,19 +208,18 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
 }
 
 JEMALLOC_ALWAYS_INLINE void
-tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind,
-    bool slow_path) {
-
+tcache_dalloc_large(
+    tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind, bool slow_path) {
 	assert(tcache_salloc(tsd_tsdn(tsd), ptr) > SC_SMALL_MAXCLASS);
-	assert(tcache_salloc(tsd_tsdn(tsd), ptr) <=
-	    tcache_max_get(tcache->tcache_slow));
-	assert(!tcache_bin_disabled(binind, &tcache->bins[binind],
-	    tcache->tcache_slow));
+	assert(tcache_salloc(tsd_tsdn(tsd), ptr)
+	    <= tcache_max_get(tcache->tcache_slow));
+	assert(!tcache_bin_disabled(
+	    binind, &tcache->bins[binind], tcache->tcache_slow));
 
 	cache_bin_t *bin = &tcache->bins[binind];
 	if (unlikely(!cache_bin_dalloc_easy(bin, ptr))) {
-		unsigned remain = cache_bin_ncached_max_get(bin) >>
-		    opt_lg_tcache_flush_large_div;
+		unsigned remain = cache_bin_ncached_max_get(bin)
+		    >> opt_lg_tcache_flush_large_div;
 		tcache_bin_flush_large(tsd, tcache, bin, binind, remain);
 		bool ret = cache_bin_dalloc_easy(bin, ptr);
 		assert(ret);
diff --git a/include/jemalloc/internal/tcache_structs.h b/include/jemalloc/internal/tcache_structs.h
index e9a68152..2c000de3 100644
--- a/include/jemalloc/internal/tcache_structs.h
+++ b/include/jemalloc/internal/tcache_structs.h
@@ -30,45 +30,45 @@ struct tcache_slow_s {
 	cache_bin_array_descriptor_t cache_bin_array_descriptor;
 
 	/* The arena this tcache is associated with. */
-	arena_t		*arena;
+	arena_t *arena;
 	/* The number of bins activated in the tcache. */
-	unsigned	tcache_nbins;
+	unsigned tcache_nbins;
 	/* Last time GC has been performed.  */
-	nstime_t	last_gc_time;
+	nstime_t last_gc_time;
 	/* Next bin to GC. */
-	szind_t		next_gc_bin;
-	szind_t		next_gc_bin_small;
-	szind_t		next_gc_bin_large;
+	szind_t next_gc_bin;
+	szind_t next_gc_bin_small;
+	szind_t next_gc_bin_large;
 	/* For small bins, help determine how many items to fill at a time. */
-	cache_bin_fill_ctl_t	bin_fill_ctl_do_not_access_directly[SC_NBINS];
+	cache_bin_fill_ctl_t bin_fill_ctl_do_not_access_directly[SC_NBINS];
 	/* For small bins, whether has been refilled since last GC. */
-	bool		bin_refilled[SC_NBINS];
+	bool bin_refilled[SC_NBINS];
 	/*
 	 * For small bins, the number of items we can pretend to flush before
 	 * actually flushing.
 	 */
-	uint8_t		bin_flush_delay_items[SC_NBINS];
+	uint8_t bin_flush_delay_items[SC_NBINS];
 	/*
 	 * The start of the allocation containing the dynamic allocation for
 	 * either the cache bins alone, or the cache bin memory as well as this
 	 * tcache_slow_t and its associated tcache_t.
 	 */
-	void		*dyn_alloc;
+	void *dyn_alloc;
 
 	/* The associated bins. */
-	tcache_t	*tcache;
+	tcache_t *tcache;
 };
 
 struct tcache_s {
-	tcache_slow_t	*tcache_slow;
-	cache_bin_t	bins[TCACHE_NBINS_MAX];
+	tcache_slow_t *tcache_slow;
+	cache_bin_t    bins[TCACHE_NBINS_MAX];
 };
 
 /* Linkage for list of available (previously used) explicit tcache IDs. */
 struct tcaches_s {
 	union {
-		tcache_t	*tcache;
-		tcaches_t	*next;
+		tcache_t  *tcache;
+		tcaches_t *next;
 	};
 };
 
diff --git a/include/jemalloc/internal/tcache_types.h b/include/jemalloc/internal/tcache_types.h
index b3828ecf..27d80d3c 100644
--- a/include/jemalloc/internal/tcache_types.h
+++ b/include/jemalloc/internal/tcache_types.h
@@ -5,12 +5,16 @@
 #include "jemalloc/internal/sc.h"
 
 typedef struct tcache_slow_s tcache_slow_t;
-typedef struct tcache_s tcache_t;
-typedef struct tcaches_s tcaches_t;
+typedef struct tcache_s      tcache_t;
+typedef struct tcaches_s     tcaches_t;
 
 /* Used in TSD static initializer only. Real init in tsd_tcache_data_init(). */
-#define TCACHE_ZERO_INITIALIZER {0}
-#define TCACHE_SLOW_ZERO_INITIALIZER {{0}}
+#define TCACHE_ZERO_INITIALIZER                                                \
+	{ 0 }
+#define TCACHE_SLOW_ZERO_INITIALIZER                                           \
+	{                                                                      \
+		{ 0 }                                                          \
+	}
 
 /* Used in TSD static initializer only. Will be initialized to opt_tcache. */
 #define TCACHE_ENABLED_ZERO_INITIALIZER false
@@ -21,9 +25,11 @@ typedef struct tcaches_s tcaches_t;
 
 #define TCACHE_LG_MAXCLASS_LIMIT LG_USIZE_GROW_SLOW_THRESHOLD
 #define TCACHE_MAXCLASS_LIMIT ((size_t)1 << TCACHE_LG_MAXCLASS_LIMIT)
-#define TCACHE_NBINS_MAX (SC_NBINS + SC_NGROUP *			\
-    (TCACHE_LG_MAXCLASS_LIMIT - SC_LG_LARGE_MINCLASS) + 1)
-#define TCACHE_GC_NEIGHBOR_LIMIT ((uintptr_t)1 << 21) /* 2M */
+#define TCACHE_NBINS_MAX                                                       \
+	(SC_NBINS                                                              \
+	    + SC_NGROUP * (TCACHE_LG_MAXCLASS_LIMIT - SC_LG_LARGE_MINCLASS)    \
+	    + 1)
+#define TCACHE_GC_NEIGHBOR_LIMIT ((uintptr_t)1 << 21)       /* 2M */
 #define TCACHE_GC_INTERVAL_NS ((uint64_t)10 * KQU(1000000)) /* 10ms */
 #define TCACHE_GC_SMALL_NBINS_MAX ((SC_NBINS > 8) ? (SC_NBINS >> 3) : 1)
 #define TCACHE_GC_LARGE_NBINS_MAX 1
diff --git a/include/jemalloc/internal/test_hooks.h b/include/jemalloc/internal/test_hooks.h
index af3f2755..35f3a211 100644
--- a/include/jemalloc/internal/test_hooks.h
+++ b/include/jemalloc/internal/test_hooks.h
@@ -7,20 +7,22 @@ extern JEMALLOC_EXPORT void (*test_hooks_arena_new_hook)(void);
 extern JEMALLOC_EXPORT void (*test_hooks_libc_hook)(void);
 
 #if defined(JEMALLOC_JET) || defined(JEMALLOC_UNIT_TEST)
-#  define JEMALLOC_TEST_HOOK(fn, hook) ((void)(hook != NULL && (hook(), 0)), fn)
+#	define JEMALLOC_TEST_HOOK(fn, hook)                                   \
+		((void)(hook != NULL && (hook(), 0)), fn)
 
-#  define open JEMALLOC_TEST_HOOK(open, test_hooks_libc_hook)
-#  define read JEMALLOC_TEST_HOOK(read, test_hooks_libc_hook)
-#  define write JEMALLOC_TEST_HOOK(write, test_hooks_libc_hook)
-#  define readlink JEMALLOC_TEST_HOOK(readlink, test_hooks_libc_hook)
-#  define close JEMALLOC_TEST_HOOK(close, test_hooks_libc_hook)
-#  define creat JEMALLOC_TEST_HOOK(creat, test_hooks_libc_hook)
-#  define secure_getenv JEMALLOC_TEST_HOOK(secure_getenv, test_hooks_libc_hook)
+#	define open JEMALLOC_TEST_HOOK(open, test_hooks_libc_hook)
+#	define read JEMALLOC_TEST_HOOK(read, test_hooks_libc_hook)
+#	define write JEMALLOC_TEST_HOOK(write, test_hooks_libc_hook)
+#	define readlink JEMALLOC_TEST_HOOK(readlink, test_hooks_libc_hook)
+#	define close JEMALLOC_TEST_HOOK(close, test_hooks_libc_hook)
+#	define creat JEMALLOC_TEST_HOOK(creat, test_hooks_libc_hook)
+#	define secure_getenv                                                  \
+		JEMALLOC_TEST_HOOK(secure_getenv, test_hooks_libc_hook)
 /* Note that this is undef'd and re-define'd in src/prof.c. */
-#  define _Unwind_Backtrace JEMALLOC_TEST_HOOK(_Unwind_Backtrace, test_hooks_libc_hook)
+#	define _Unwind_Backtrace                                              \
+		JEMALLOC_TEST_HOOK(_Unwind_Backtrace, test_hooks_libc_hook)
 #else
-#  define JEMALLOC_TEST_HOOK(fn, hook) fn
+#	define JEMALLOC_TEST_HOOK(fn, hook) fn
 #endif
 
-
 #endif /* JEMALLOC_INTERNAL_TEST_HOOKS_H */
diff --git a/include/jemalloc/internal/thread_event.h b/include/jemalloc/internal/thread_event.h
index bf9ca3cc..e9e2b6cd 100644
--- a/include/jemalloc/internal/thread_event.h
+++ b/include/jemalloc/internal/thread_event.h
@@ -37,7 +37,7 @@
 #define TE_INVALID_ELAPSED UINT64_MAX
 
 typedef struct te_ctx_s {
-	bool is_alloc;
+	bool      is_alloc;
 	uint64_t *current;
 	uint64_t *last_event;
 	uint64_t *next_event;
@@ -48,22 +48,20 @@ void te_assert_invariants_debug(tsd_t *tsd);
 void te_event_trigger(tsd_t *tsd, te_ctx_t *ctx);
 void te_recompute_fast_threshold(tsd_t *tsd);
 void tsd_te_init(tsd_t *tsd);
-void te_adjust_thresholds_helper(tsd_t *tsd, te_ctx_t *ctx,
-    uint64_t wait);
+void te_adjust_thresholds_helper(tsd_t *tsd, te_ctx_t *ctx, uint64_t wait);
 
 /* List of all thread event counters. */
-#define ITERATE_OVER_ALL_COUNTERS					\
-	C(thread_allocated)						\
-	C(thread_allocated_last_event)					\
-	C(prof_sample_last_event)					\
+#define ITERATE_OVER_ALL_COUNTERS                                              \
+	C(thread_allocated)                                                    \
+	C(thread_allocated_last_event)                                         \
+	C(prof_sample_last_event)                                              \
 	C(stats_interval_last_event)
 
 /* Getters directly wrap TSD getters. */
-#define C(counter)							\
-JEMALLOC_ALWAYS_INLINE uint64_t						\
-counter##_get(tsd_t *tsd) {						\
-	return tsd_##counter##_get(tsd);				\
-}
+#define C(counter)                                                             \
+	JEMALLOC_ALWAYS_INLINE uint64_t counter##_get(tsd_t *tsd) {            \
+		return tsd_##counter##_get(tsd);                               \
+	}
 
 ITERATE_OVER_ALL_COUNTERS
 #undef C
@@ -75,11 +73,10 @@ ITERATE_OVER_ALL_COUNTERS
  * temporarily delay the event and let it be immediately triggered at the next
  * allocation call.
  */
-#define C(counter)							\
-JEMALLOC_ALWAYS_INLINE void						\
-counter##_set(tsd_t *tsd, uint64_t v) {					\
-	*tsd_##counter##p_get(tsd) = v;					\
-}
+#define C(counter)                                                             \
+	JEMALLOC_ALWAYS_INLINE void counter##_set(tsd_t *tsd, uint64_t v) {    \
+		*tsd_##counter##p_get(tsd) = v;                                \
+	}
 
 ITERATE_OVER_ALL_COUNTERS
 #undef C
diff --git a/include/jemalloc/internal/thread_event_registry.h b/include/jemalloc/internal/thread_event_registry.h
index 1957e727..7ded440d 100644
--- a/include/jemalloc/internal/thread_event_registry.h
+++ b/include/jemalloc/internal/thread_event_registry.h
@@ -87,8 +87,8 @@ typedef void (*user_event_cb_t)(
 typedef struct user_hook_object_s user_hook_object_t;
 struct user_hook_object_s {
 	user_event_cb_t callback;
-	uint64_t interval;
-	bool is_alloc_only;
+	uint64_t        interval;
+	bool            is_alloc_only;
 };
 
 /*
diff --git a/include/jemalloc/internal/ticker.h b/include/jemalloc/internal/ticker.h
index dca9bd10..a1eec628 100644
--- a/include/jemalloc/internal/ticker.h
+++ b/include/jemalloc/internal/ticker.h
@@ -53,7 +53,7 @@ ticker_read(const ticker_t *ticker) {
  * worth the hassle, but this is on the fast path of both malloc and free (via
  * tcache_event).
  */
-#if defined(__GNUC__) && !defined(__clang__)				\
+#if defined(__GNUC__) && !defined(__clang__)                                   \
     && (defined(__x86_64__) || defined(__i386__))
 JEMALLOC_NOINLINE
 #endif
@@ -129,7 +129,8 @@ struct ticker_geom_s {
  * the behavior over long periods of time rather than the exact timing of the
  * initial ticks.
  */
-#define TICKER_GEOM_INIT(nticks) {nticks, nticks}
+#define TICKER_GEOM_INIT(nticks)                                               \
+	{ nticks, nticks }
 
 static inline void
 ticker_geom_init(ticker_geom_t *ticker, int32_t nticks) {
@@ -150,22 +151,21 @@ ticker_geom_read(const ticker_geom_t *ticker) {
 }
 
 /* Same deal as above. */
-#if defined(__GNUC__) && !defined(__clang__)				\
+#if defined(__GNUC__) && !defined(__clang__)                                   \
     && (defined(__x86_64__) || defined(__i386__))
 JEMALLOC_NOINLINE
 #endif
 static bool
-ticker_geom_fixup(ticker_geom_t *ticker, uint64_t *prng_state,
-    bool delay_trigger) {
+ticker_geom_fixup(
+    ticker_geom_t *ticker, uint64_t *prng_state, bool delay_trigger) {
 	if (delay_trigger) {
 		ticker->tick = 0;
 		return false;
 	}
 
 	uint64_t idx = prng_lg_range_u64(prng_state, TICKER_GEOM_NBITS);
-	ticker->tick = (uint32_t)(
-	    (uint64_t)ticker->nticks * (uint64_t)ticker_geom_table[idx]
-	    / (uint64_t)TICKER_GEOM_MUL);
+	ticker->tick = (uint32_t)((uint64_t)ticker->nticks
+	    * (uint64_t)ticker_geom_table[idx] / (uint64_t)TICKER_GEOM_MUL);
 
 	return true;
 }
@@ -181,8 +181,8 @@ ticker_geom_ticks(ticker_geom_t *ticker, uint64_t *prng_state, int32_t nticks,
 }
 
 static inline bool
-ticker_geom_tick(ticker_geom_t *ticker, uint64_t *prng_state,
-    bool delay_trigger) {
+ticker_geom_tick(
+    ticker_geom_t *ticker, uint64_t *prng_state, bool delay_trigger) {
 	return ticker_geom_ticks(ticker, prng_state, 1, delay_trigger);
 }
 
diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h
index c06605df..84101c65 100644
--- a/include/jemalloc/internal/tsd.h
+++ b/include/jemalloc/internal/tsd.h
@@ -7,14 +7,14 @@
  * tsd_boot1, tsd_boot, tsd_booted_get, tsd_get_allocates, tsd_get, and tsd_set.
  */
 #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
-#include "jemalloc/internal/jemalloc_preamble.h"
-#include "jemalloc/internal/tsd_malloc_thread_cleanup.h"
+#	include "jemalloc/internal/jemalloc_preamble.h"
+#	include "jemalloc/internal/tsd_malloc_thread_cleanup.h"
 #elif (defined(JEMALLOC_TLS))
-#include "jemalloc/internal/tsd_tls.h"
+#	include "jemalloc/internal/tsd_tls.h"
 #elif (defined(_WIN32))
-#include "jemalloc/internal/tsd_win.h"
+#	include "jemalloc/internal/tsd_win.h"
 #else
-#include "jemalloc/internal/tsd_generic.h"
+#	include "jemalloc/internal/tsd_generic.h"
 #endif
 
 /*
@@ -22,11 +22,10 @@
  * foo.  This omits some safety checks, and so can be used during tsd
  * initialization and cleanup.
  */
-#define O(n, t, nt)							\
-JEMALLOC_ALWAYS_INLINE t *						\
-tsd_##n##p_get_unsafe(tsd_t *tsd) {					\
-	return &tsd->TSD_MANGLE(n);					\
-}
+#define O(n, t, nt)                                                            \
+	JEMALLOC_ALWAYS_INLINE t *tsd_##n##p_get_unsafe(tsd_t *tsd) {          \
+		return &tsd->TSD_MANGLE(n);                                    \
+	}
 TSD_DATA_SLOW
 TSD_DATA_FAST
 TSD_DATA_SLOWER
@@ -59,39 +58,36 @@ TSD_DATA_SLOWER
  * tsdn_foop_get(tsdn) returns either the thread-local instance of foo (if tsdn
  * isn't NULL), or NULL (if tsdn is NULL), cast to the nullable pointer type.
  */
-#define O(n, t, nt)							\
-JEMALLOC_ALWAYS_INLINE nt *						\
-tsdn_##n##p_get(tsdn_t *tsdn) {						\
-	if (tsdn_null(tsdn)) {						\
-		return NULL;						\
-	}								\
-	tsd_t *tsd = tsdn_tsd(tsdn);					\
-	return (nt *)tsd_##n##p_get(tsd);				\
-}
+#define O(n, t, nt)                                                            \
+	JEMALLOC_ALWAYS_INLINE nt *tsdn_##n##p_get(tsdn_t *tsdn) {             \
+		if (tsdn_null(tsdn)) {                                         \
+			return NULL;                                           \
+		}                                                              \
+		tsd_t *tsd = tsdn_tsd(tsdn);                                   \
+		return (nt *)tsd_##n##p_get(tsd);                              \
+	}
 TSD_DATA_SLOW
 TSD_DATA_FAST
 TSD_DATA_SLOWER
 #undef O
 
 /* tsd_foo_get(tsd) returns the value of the thread-local instance of foo. */
-#define O(n, t, nt)							\
-JEMALLOC_ALWAYS_INLINE t						\
-tsd_##n##_get(tsd_t *tsd) {						\
-	return *tsd_##n##p_get(tsd);					\
-}
+#define O(n, t, nt)                                                            \
+	JEMALLOC_ALWAYS_INLINE t tsd_##n##_get(tsd_t *tsd) {                   \
+		return *tsd_##n##p_get(tsd);                                   \
+	}
 TSD_DATA_SLOW
 TSD_DATA_FAST
 TSD_DATA_SLOWER
 #undef O
 
 /* tsd_foo_set(tsd, val) updates the thread-local instance of foo to be val. */
-#define O(n, t, nt)							\
-JEMALLOC_ALWAYS_INLINE void						\
-tsd_##n##_set(tsd_t *tsd, t val) {					\
-	assert(tsd_state_get(tsd) != tsd_state_reincarnated &&		\
-	    tsd_state_get(tsd) != tsd_state_minimal_initialized);	\
-	*tsd_##n##p_get(tsd) = val;					\
-}
+#define O(n, t, nt)                                                            \
+	JEMALLOC_ALWAYS_INLINE void tsd_##n##_set(tsd_t *tsd, t val) {         \
+		assert(tsd_state_get(tsd) != tsd_state_reincarnated            \
+		    && tsd_state_get(tsd) != tsd_state_minimal_initialized);   \
+		*tsd_##n##p_get(tsd) = val;                                    \
+	}
 TSD_DATA_SLOW
 TSD_DATA_FAST
 TSD_DATA_SLOWER
@@ -104,8 +100,8 @@ tsd_assert_fast(tsd_t *tsd) {
 	 * counters; it's not in general possible to ensure that they won't
 	 * change asynchronously from underneath us.
 	 */
-	assert(!malloc_slow && tsd_tcache_enabled_get(tsd) &&
-	    tsd_reentrancy_level_get(tsd) == 0);
+	assert(!malloc_slow && tsd_tcache_enabled_get(tsd)
+	    && tsd_reentrancy_level_get(tsd) == 0);
 }
 
 JEMALLOC_ALWAYS_INLINE bool
@@ -194,8 +190,8 @@ tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback) {
 
 static inline bool
 tsd_state_nocleanup(tsd_t *tsd) {
-	return tsd_state_get(tsd) == tsd_state_reincarnated ||
-	    tsd_state_get(tsd) == tsd_state_minimal_initialized;
+	return tsd_state_get(tsd) == tsd_state_reincarnated
+	    || tsd_state_get(tsd) == tsd_state_minimal_initialized;
 }
 
 /*
diff --git a/include/jemalloc/internal/tsd_generic.h b/include/jemalloc/internal/tsd_generic.h
index aa8042a4..e049766f 100644
--- a/include/jemalloc/internal/tsd_generic.h
+++ b/include/jemalloc/internal/tsd_generic.h
@@ -1,5 +1,5 @@
 #ifdef JEMALLOC_INTERNAL_TSD_GENERIC_H
-#error This file should be included only once, by tsd.h.
+#	error This file should be included only once, by tsd.h.
 #endif
 #define JEMALLOC_INTERNAL_TSD_GENERIC_H
 
@@ -12,25 +12,24 @@ typedef struct tsd_init_block_s tsd_init_block_t;
 struct tsd_init_block_s {
 	ql_elm(tsd_init_block_t) link;
 	pthread_t thread;
-	void *data;
+	void     *data;
 };
 
 /* Defined in tsd.c, to allow the mutex headers to have tsd dependencies. */
 typedef struct tsd_init_head_s tsd_init_head_t;
 
 typedef struct {
-	bool initialized;
+	bool  initialized;
 	tsd_t val;
 } tsd_wrapper_t;
 
-void *tsd_init_check_recursion(tsd_init_head_t *head,
-    tsd_init_block_t *block);
-void tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block);
+void *tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block);
+void  tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block);
 
-extern pthread_key_t tsd_tsd;
+extern pthread_key_t   tsd_tsd;
 extern tsd_init_head_t tsd_init_head;
-extern tsd_wrapper_t tsd_boot_wrapper;
-extern bool tsd_booted;
+extern tsd_wrapper_t   tsd_boot_wrapper;
+extern bool            tsd_booted;
 
 /* Initialization/cleanup. */
 JEMALLOC_ALWAYS_INLINE void
@@ -42,8 +41,8 @@ tsd_cleanup_wrapper(void *arg) {
 		tsd_cleanup(&wrapper->val);
 		if (wrapper->initialized) {
 			/* Trigger another cleanup round. */
-			if (pthread_setspecific(tsd_tsd, (void *)wrapper) != 0)
-			{
+			if (pthread_setspecific(tsd_tsd, (void *)wrapper)
+			    != 0) {
 				malloc_write("<jemalloc>: Error setting TSD\n");
 				if (opt_abort) {
 					abort();
@@ -78,23 +77,23 @@ tsd_wrapper_get(bool init) {
 
 	if (init && unlikely(wrapper == NULL)) {
 		tsd_init_block_t block;
-		wrapper = (tsd_wrapper_t *)
-		    tsd_init_check_recursion(&tsd_init_head, &block);
+		wrapper = (tsd_wrapper_t *)tsd_init_check_recursion(
+		    &tsd_init_head, &block);
 		if (wrapper) {
 			return wrapper;
 		}
-		wrapper = (tsd_wrapper_t *)
-		    malloc_tsd_malloc(sizeof(tsd_wrapper_t));
+		wrapper = (tsd_wrapper_t *)malloc_tsd_malloc(
+		    sizeof(tsd_wrapper_t));
 		block.data = (void *)wrapper;
 		if (wrapper == NULL) {
 			malloc_write("<jemalloc>: Error allocating TSD\n");
 			abort();
 		} else {
 			wrapper->initialized = false;
-      JEMALLOC_DIAGNOSTIC_PUSH
-      JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
+			JEMALLOC_DIAGNOSTIC_PUSH
+			JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
 			tsd_t initializer = TSD_INITIALIZER;
-      JEMALLOC_DIAGNOSTIC_POP
+			JEMALLOC_DIAGNOSTIC_POP
 			wrapper->val = initializer;
 		}
 		tsd_wrapper_set(wrapper);
@@ -105,11 +104,11 @@ tsd_wrapper_get(bool init) {
 
 JEMALLOC_ALWAYS_INLINE bool
 tsd_boot0(void) {
-	tsd_wrapper_t *wrapper;
+	tsd_wrapper_t   *wrapper;
 	tsd_init_block_t block;
 
-	wrapper = (tsd_wrapper_t *)
-	    tsd_init_check_recursion(&tsd_init_head, &block);
+	wrapper = (tsd_wrapper_t *)tsd_init_check_recursion(
+	    &tsd_init_head, &block);
 	if (wrapper) {
 		return false;
 	}
@@ -134,10 +133,10 @@ tsd_boot1(void) {
 	tsd_boot_wrapper.initialized = false;
 	tsd_cleanup(&tsd_boot_wrapper.val);
 	wrapper->initialized = false;
-  JEMALLOC_DIAGNOSTIC_PUSH
-  JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
+	JEMALLOC_DIAGNOSTIC_PUSH
+	JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
 	tsd_t initializer = TSD_INITIALIZER;
-  JEMALLOC_DIAGNOSTIC_POP
+	JEMALLOC_DIAGNOSTIC_POP
 	wrapper->val = initializer;
 	tsd_wrapper_set(wrapper);
 }
diff --git a/include/jemalloc/internal/tsd_internals.h b/include/jemalloc/internal/tsd_internals.h
index 69b60519..f675587d 100644
--- a/include/jemalloc/internal/tsd_internals.h
+++ b/include/jemalloc/internal/tsd_internals.h
@@ -48,123 +48,113 @@
 
 #ifdef JEMALLOC_JET
 typedef void (*test_callback_t)(int *);
-#  define MALLOC_TSD_TEST_DATA_INIT 0x72b65c10
-#  define MALLOC_TEST_TSD \
-    O(test_data,		int,			int)		\
-    O(test_callback,		test_callback_t,	int)
-#  define MALLOC_TEST_TSD_INITIALIZER , MALLOC_TSD_TEST_DATA_INIT, NULL
+#	define MALLOC_TSD_TEST_DATA_INIT 0x72b65c10
+#	define MALLOC_TEST_TSD                                                \
+		O(test_data, int, int)                                         \
+		O(test_callback, test_callback_t, int)
+#	define MALLOC_TEST_TSD_INITIALIZER , MALLOC_TSD_TEST_DATA_INIT, NULL
 #else
-#  define MALLOC_TEST_TSD
-#  define MALLOC_TEST_TSD_INITIALIZER
+#	define MALLOC_TEST_TSD
+#	define MALLOC_TEST_TSD_INITIALIZER
 #endif
 
 typedef ql_elm(tsd_t) tsd_link_t;
 
 /*  O(name,			type,			nullable type) */
-#define TSD_DATA_SLOW							\
-    O(tcache_enabled,		bool,			bool)		\
-    O(reentrancy_level,		int8_t,			int8_t)		\
-    O(min_init_state_nfetched,		uint8_t,	uint8_t)	\
-    O(thread_allocated_last_event,	uint64_t,	uint64_t)	\
-    O(thread_allocated_next_event,	uint64_t,	uint64_t)	\
-    O(thread_deallocated_last_event,	uint64_t,	uint64_t)	\
-    O(thread_deallocated_next_event,	uint64_t,	uint64_t)	\
-    O(te_data, 			te_data_t,		te_data_t)	\
-    O(prof_sample_last_event,	uint64_t,		uint64_t)	\
-    O(stats_interval_last_event, uint64_t, 		uint64_t)	\
-    O(prof_tdata,		prof_tdata_t *,		prof_tdata_t *)	\
-    O(prng_state,		uint64_t,		uint64_t)	\
-    O(san_extents_until_guard_small,	uint64_t,	uint64_t)	\
-    O(san_extents_until_guard_large,	uint64_t,	uint64_t)	\
-    O(iarena,			arena_t *,		arena_t *)	\
-    O(arena,			arena_t *,		arena_t *)	\
-    O(arena_decay_ticker,	ticker_geom_t,		ticker_geom_t)	\
-    O(sec_shard,		uint8_t,		uint8_t)	\
-    O(binshards,		tsd_binshards_t,	tsd_binshards_t)\
-    O(tsd_link,			tsd_link_t,		tsd_link_t)	\
-    O(in_hook,			bool,			bool)		\
-    O(peak,			peak_t,			peak_t)		\
-    O(activity_callback_thunk,	activity_callback_thunk_t,		\
-	activity_callback_thunk_t)					\
-    O(tcache_slow,		tcache_slow_t,		tcache_slow_t)	\
-    O(rtree_ctx,		rtree_ctx_t,		rtree_ctx_t)
+#define TSD_DATA_SLOW                                                          \
+	O(tcache_enabled, bool, bool)                                          \
+	O(reentrancy_level, int8_t, int8_t)                                    \
+	O(min_init_state_nfetched, uint8_t, uint8_t)                           \
+	O(thread_allocated_last_event, uint64_t, uint64_t)                     \
+	O(thread_allocated_next_event, uint64_t, uint64_t)                     \
+	O(thread_deallocated_last_event, uint64_t, uint64_t)                   \
+	O(thread_deallocated_next_event, uint64_t, uint64_t)                   \
+	O(te_data, te_data_t, te_data_t)                                       \
+	O(prof_sample_last_event, uint64_t, uint64_t)                          \
+	O(stats_interval_last_event, uint64_t, uint64_t)                       \
+	O(prof_tdata, prof_tdata_t *, prof_tdata_t *)                          \
+	O(prng_state, uint64_t, uint64_t)                                      \
+	O(san_extents_until_guard_small, uint64_t, uint64_t)                   \
+	O(san_extents_until_guard_large, uint64_t, uint64_t)                   \
+	O(iarena, arena_t *, arena_t *)                                        \
+	O(arena, arena_t *, arena_t *)                                         \
+	O(arena_decay_ticker, ticker_geom_t, ticker_geom_t)                    \
+	O(sec_shard, uint8_t, uint8_t)                                         \
+	O(binshards, tsd_binshards_t, tsd_binshards_t)                         \
+	O(tsd_link, tsd_link_t, tsd_link_t)                                    \
+	O(in_hook, bool, bool)                                                 \
+	O(peak, peak_t, peak_t)                                                \
+	O(activity_callback_thunk, activity_callback_thunk_t,                  \
+	    activity_callback_thunk_t)                                         \
+	O(tcache_slow, tcache_slow_t, tcache_slow_t)                           \
+	O(rtree_ctx, rtree_ctx_t, rtree_ctx_t)
 
-#define TSD_DATA_SLOW_INITIALIZER					\
-    /* tcache_enabled */	TCACHE_ENABLED_ZERO_INITIALIZER,	\
-    /* reentrancy_level */	0,					\
-    /* min_init_state_nfetched */	0,				\
-    /* thread_allocated_last_event */	0,				\
-    /* thread_allocated_next_event */	0,				\
-    /* thread_deallocated_last_event */	0,				\
-    /* thread_deallocated_next_event */	0,				\
-    /* te_data */			TE_DATA_INITIALIZER,   		\
-    /* prof_sample_last_event */	0,				\
-    /* stats_interval_last_event */	0,				\
-    /* prof_tdata */		NULL,					\
-    /* prng_state */		0,					\
-    /* san_extents_until_guard_small */	0,				\
-    /* san_extents_until_guard_large */	0,				\
-    /* iarena */		NULL,					\
-    /* arena */			NULL,					\
-    /* arena_decay_ticker */						\
-	TICKER_GEOM_INIT(ARENA_DECAY_NTICKS_PER_UPDATE),		\
-    /* sec_shard */		(uint8_t)-1,				\
-    /* binshards */		TSD_BINSHARDS_ZERO_INITIALIZER,		\
-    /* tsd_link */		{NULL},					\
-    /* in_hook */		false,					\
-    /* peak */			PEAK_INITIALIZER,			\
-    /* activity_callback_thunk */					\
-	ACTIVITY_CALLBACK_THUNK_INITIALIZER,				\
-    /* tcache_slow */		TCACHE_SLOW_ZERO_INITIALIZER,		\
-    /* rtree_ctx */		RTREE_CTX_INITIALIZER,
+#define TSD_DATA_SLOW_INITIALIZER                                              \
+	/* tcache_enabled */ TCACHE_ENABLED_ZERO_INITIALIZER,                  \
+	    /* reentrancy_level */ 0, /* min_init_state_nfetched */ 0,         \
+	    /* thread_allocated_last_event */ 0,                               \
+	    /* thread_allocated_next_event */ 0,                               \
+	    /* thread_deallocated_last_event */ 0,                             \
+	    /* thread_deallocated_next_event */ 0,                             \
+	    /* te_data */ TE_DATA_INITIALIZER, /* prof_sample_last_event */ 0, \
+	    /* stats_interval_last_event */ 0, /* prof_tdata */ NULL,          \
+	    /* prng_state */ 0, /* san_extents_until_guard_small */ 0,         \
+	    /* san_extents_until_guard_large */ 0, /* iarena */ NULL,          \
+	    /* arena */ NULL, /* arena_decay_ticker */                         \
+	    TICKER_GEOM_INIT(ARENA_DECAY_NTICKS_PER_UPDATE),                   \
+	    /* sec_shard */ (uint8_t) - 1,                                     \
+	    /* binshards */ TSD_BINSHARDS_ZERO_INITIALIZER,                    \
+	    /* tsd_link */ {NULL}, /* in_hook */ false,                        \
+	    /* peak */ PEAK_INITIALIZER, /* activity_callback_thunk */         \
+	    ACTIVITY_CALLBACK_THUNK_INITIALIZER,                               \
+	    /* tcache_slow */ TCACHE_SLOW_ZERO_INITIALIZER,                    \
+	    /* rtree_ctx */ RTREE_CTX_INITIALIZER,
 
 /*  O(name,			type,			nullable type) */
-#define TSD_DATA_FAST							\
-    O(thread_allocated,		uint64_t,		uint64_t)	\
-    O(thread_allocated_next_event_fast,	uint64_t,	uint64_t)	\
-    O(thread_deallocated,	uint64_t,		uint64_t)	\
-    O(thread_deallocated_next_event_fast, uint64_t,	uint64_t)	\
-    O(tcache,			tcache_t,		tcache_t)
+#define TSD_DATA_FAST                                                          \
+	O(thread_allocated, uint64_t, uint64_t)                                \
+	O(thread_allocated_next_event_fast, uint64_t, uint64_t)                \
+	O(thread_deallocated, uint64_t, uint64_t)                              \
+	O(thread_deallocated_next_event_fast, uint64_t, uint64_t)              \
+	O(tcache, tcache_t, tcache_t)
 
-#define TSD_DATA_FAST_INITIALIZER					\
-    /* thread_allocated */	0,					\
-    /* thread_allocated_next_event_fast */ 0, 				\
-    /* thread_deallocated */	0,					\
-    /* thread_deallocated_next_event_fast */	0,			\
-    /* tcache */		TCACHE_ZERO_INITIALIZER,
+#define TSD_DATA_FAST_INITIALIZER                                              \
+	/* thread_allocated */ 0, /* thread_allocated_next_event_fast */ 0,    \
+	    /* thread_deallocated */ 0,                                        \
+	    /* thread_deallocated_next_event_fast */ 0,                        \
+	    /* tcache */ TCACHE_ZERO_INITIALIZER,
 
 /*  O(name,			type,			nullable type) */
-#define TSD_DATA_SLOWER							\
-    O(witness_tsd,              witness_tsd_t,		witness_tsdn_t)	\
-    MALLOC_TEST_TSD
+#define TSD_DATA_SLOWER                                                        \
+	O(witness_tsd, witness_tsd_t, witness_tsdn_t)                          \
+	MALLOC_TEST_TSD
 
-#define TSD_DATA_SLOWER_INITIALIZER					\
-    /* witness */		WITNESS_TSD_INITIALIZER			\
-    /* test data */		MALLOC_TEST_TSD_INITIALIZER
+#define TSD_DATA_SLOWER_INITIALIZER                                            \
+	/* witness */ WITNESS_TSD_INITIALIZER                                  \
+	/* test data */ MALLOC_TEST_TSD_INITIALIZER
 
-
-#define TSD_INITIALIZER {						\
-    				TSD_DATA_SLOW_INITIALIZER		\
-    /* state */			ATOMIC_INIT(tsd_state_uninitialized),	\
-    				TSD_DATA_FAST_INITIALIZER		\
-    				TSD_DATA_SLOWER_INITIALIZER		\
-}
+#define TSD_INITIALIZER                                                        \
+	{                                                                      \
+		TSD_DATA_SLOW_INITIALIZER                                      \
+		/* state */ ATOMIC_INIT(tsd_state_uninitialized),              \
+		    TSD_DATA_FAST_INITIALIZER TSD_DATA_SLOWER_INITIALIZER      \
+	}
 
 #if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
 void _malloc_tsd_cleanup_register(bool (*f)(void));
 #endif
 
-void *malloc_tsd_malloc(size_t size);
-void malloc_tsd_dalloc(void *wrapper);
+void  *malloc_tsd_malloc(size_t size);
+void   malloc_tsd_dalloc(void *wrapper);
 tsd_t *malloc_tsd_boot0(void);
-void malloc_tsd_boot1(void);
-void tsd_cleanup(void *arg);
+void   malloc_tsd_boot1(void);
+void   tsd_cleanup(void *arg);
 tsd_t *tsd_fetch_slow(tsd_t *tsd, bool minimal);
-void tsd_state_set(tsd_t *tsd, uint8_t new_state);
-void tsd_slow_update(tsd_t *tsd);
-void tsd_prefork(tsd_t *tsd);
-void tsd_postfork_parent(tsd_t *tsd);
-void tsd_postfork_child(tsd_t *tsd);
+void   tsd_state_set(tsd_t *tsd, uint8_t new_state);
+void   tsd_slow_update(tsd_t *tsd);
+void   tsd_prefork(tsd_t *tsd);
+void   tsd_postfork_parent(tsd_t *tsd);
+void   tsd_postfork_child(tsd_t *tsd);
 
 /*
  * Call ..._inc when your module wants to take all threads down the slow paths,
@@ -224,15 +214,15 @@ enum {
 #define TSD_MANGLE(n) cant_access_tsd_items_directly_use_a_getter_or_setter_##n
 
 #ifdef JEMALLOC_U8_ATOMICS
-#  define tsd_state_t atomic_u8_t
-#  define tsd_atomic_load atomic_load_u8
-#  define tsd_atomic_store atomic_store_u8
-#  define tsd_atomic_exchange atomic_exchange_u8
+#	define tsd_state_t atomic_u8_t
+#	define tsd_atomic_load atomic_load_u8
+#	define tsd_atomic_store atomic_store_u8
+#	define tsd_atomic_exchange atomic_exchange_u8
 #else
-#  define tsd_state_t atomic_u32_t
-#  define tsd_atomic_load atomic_load_u32
-#  define tsd_atomic_store atomic_store_u32
-#  define tsd_atomic_exchange atomic_exchange_u32
+#	define tsd_state_t atomic_u32_t
+#	define tsd_atomic_load atomic_load_u32
+#	define tsd_atomic_store atomic_store_u32
+#	define tsd_atomic_exchange atomic_exchange_u32
 #endif
 
 /* The actual tsd. */
@@ -243,8 +233,7 @@ struct tsd_s {
 	 * setters below.
 	 */
 
-#define O(n, t, nt)							\
-	t TSD_MANGLE(n);
+#define O(n, t, nt) t TSD_MANGLE(n);
 
 	TSD_DATA_SLOW
 	/*
diff --git a/include/jemalloc/internal/tsd_malloc_thread_cleanup.h b/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
index fb9ea1b4..00756df1 100644
--- a/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
+++ b/include/jemalloc/internal/tsd_malloc_thread_cleanup.h
@@ -1,5 +1,5 @@
 #ifdef JEMALLOC_INTERNAL_TSD_MALLOC_THREAD_CLEANUP_H
-#error This file should be included only once, by tsd.h.
+#	error This file should be included only once, by tsd.h.
 #endif
 #define JEMALLOC_INTERNAL_TSD_MALLOC_THREAD_CLEANUP_H
 
diff --git a/include/jemalloc/internal/tsd_tls.h b/include/jemalloc/internal/tsd_tls.h
index 5e5a6e5e..6536eb54 100644
--- a/include/jemalloc/internal/tsd_tls.h
+++ b/include/jemalloc/internal/tsd_tls.h
@@ -1,5 +1,5 @@
 #ifdef JEMALLOC_INTERNAL_TSD_TLS_H
-#error This file should be included only once, by tsd.h.
+#	error This file should be included only once, by tsd.h.
 #endif
 #define JEMALLOC_INTERNAL_TSD_TLS_H
 
@@ -11,7 +11,7 @@
 
 extern JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls;
 extern pthread_key_t tsd_tsd;
-extern bool tsd_booted;
+extern bool          tsd_booted;
 
 /* Initialization/cleanup. */
 JEMALLOC_ALWAYS_INLINE bool
diff --git a/include/jemalloc/internal/tsd_types.h b/include/jemalloc/internal/tsd_types.h
index 73bbe486..46479506 100644
--- a/include/jemalloc/internal/tsd_types.h
+++ b/include/jemalloc/internal/tsd_types.h
@@ -1,11 +1,11 @@
 #ifndef JEMALLOC_INTERNAL_TSD_TYPES_H
 #define JEMALLOC_INTERNAL_TSD_TYPES_H
 
-#define MALLOC_TSD_CLEANUPS_MAX	4
+#define MALLOC_TSD_CLEANUPS_MAX 4
 
 #include "jemalloc/internal/jemalloc_preamble.h"
 
-typedef struct tsd_s tsd_t;
+typedef struct tsd_s  tsd_t;
 typedef struct tsdn_s tsdn_t;
 typedef bool (*malloc_tsd_cleanup_t)(void);
 
diff --git a/include/jemalloc/internal/tsd_win.h b/include/jemalloc/internal/tsd_win.h
index 559ee78f..8b22bec1 100644
--- a/include/jemalloc/internal/tsd_win.h
+++ b/include/jemalloc/internal/tsd_win.h
@@ -1,5 +1,5 @@
 #ifdef JEMALLOC_INTERNAL_TSD_WIN_H
-#error This file should be included only once, by tsd.h.
+#	error This file should be included only once, by tsd.h.
 #endif
 #define JEMALLOC_INTERNAL_TSD_WIN_H
 
@@ -13,29 +13,29 @@
    than a type cast. */
 typedef struct {
 	tsd_t val;
-	bool initialized;
+	bool  initialized;
 } tsd_wrapper_t;
 
 #if defined(JEMALLOC_LEGACY_WINDOWS_SUPPORT) || !defined(_MSC_VER)
 
-extern DWORD tsd_tsd;
+extern DWORD         tsd_tsd;
 extern tsd_wrapper_t tsd_boot_wrapper;
-extern bool tsd_booted;
-#if defined(_M_ARM64EC)
-#define JEMALLOC_WIN32_TLSGETVALUE2 0
-#else
-#define JEMALLOC_WIN32_TLSGETVALUE2 1
-#endif
-#if JEMALLOC_WIN32_TLSGETVALUE2
-typedef LPVOID (WINAPI *TGV2)(DWORD dwTlsIndex);
-extern TGV2 tls_get_value2;
+extern bool          tsd_booted;
+#        if defined(_M_ARM64EC)
+#                define JEMALLOC_WIN32_TLSGETVALUE2 0
+#        else
+#                define JEMALLOC_WIN32_TLSGETVALUE2 1
+#        endif
+#        if JEMALLOC_WIN32_TLSGETVALUE2
+typedef LPVOID(WINAPI *TGV2)(DWORD dwTlsIndex);
+extern TGV2    tls_get_value2;
 extern HMODULE tgv2_mod;
-#endif
+#	endif
 
 /* Initialization/cleanup. */
 JEMALLOC_ALWAYS_INLINE bool
 tsd_cleanup_wrapper(void) {
-	DWORD error = GetLastError();
+	DWORD          error = GetLastError();
 	tsd_wrapper_t *wrapper = (tsd_wrapper_t *)TlsGetValue(tsd_tsd);
 	SetLastError(error);
 
@@ -66,20 +66,20 @@ tsd_wrapper_set(tsd_wrapper_t *wrapper) {
 JEMALLOC_ALWAYS_INLINE tsd_wrapper_t *
 tsd_wrapper_get(bool init) {
 	tsd_wrapper_t *wrapper;
-#if JEMALLOC_WIN32_TLSGETVALUE2
+#	if JEMALLOC_WIN32_TLSGETVALUE2
 	if (tls_get_value2 != NULL) {
-		wrapper = (tsd_wrapper_t *) tls_get_value2(tsd_tsd);
+		wrapper = (tsd_wrapper_t *)tls_get_value2(tsd_tsd);
 	} else
-#endif
+#	endif
 	{
 		DWORD error = GetLastError();
-		wrapper = (tsd_wrapper_t *) TlsGetValue(tsd_tsd);
+		wrapper = (tsd_wrapper_t *)TlsGetValue(tsd_tsd);
 		SetLastError(error);
 	}
 
 	if (init && unlikely(wrapper == NULL)) {
-		wrapper = (tsd_wrapper_t *)
-		    malloc_tsd_malloc(sizeof(tsd_wrapper_t));
+		wrapper = (tsd_wrapper_t *)malloc_tsd_malloc(
+		    sizeof(tsd_wrapper_t));
 		if (wrapper == NULL) {
 			malloc_write("<jemalloc>: Error allocating TSD\n");
 			abort();
@@ -102,12 +102,12 @@ tsd_boot0(void) {
 	}
 	_malloc_tsd_cleanup_register(&tsd_cleanup_wrapper);
 	tsd_wrapper_set(&tsd_boot_wrapper);
-#if JEMALLOC_WIN32_TLSGETVALUE2
+#	if JEMALLOC_WIN32_TLSGETVALUE2
 	tgv2_mod = LoadLibraryA("api-ms-win-core-processthreads-l1-1-8.dll");
 	if (tgv2_mod != NULL) {
 		tls_get_value2 = (TGV2)GetProcAddress(tgv2_mod, "TlsGetValue2");
 	}
-#endif
+#	endif
 	tsd_booted = true;
 	return false;
 }
@@ -115,8 +115,7 @@ tsd_boot0(void) {
 JEMALLOC_ALWAYS_INLINE void
 tsd_boot1(void) {
 	tsd_wrapper_t *wrapper;
-	wrapper = (tsd_wrapper_t *)
-	    malloc_tsd_malloc(sizeof(tsd_wrapper_t));
+	wrapper = (tsd_wrapper_t *)malloc_tsd_malloc(sizeof(tsd_wrapper_t));
 	if (wrapper == NULL) {
 		malloc_write("<jemalloc>: Error allocating TSD\n");
 		abort();
@@ -174,7 +173,7 @@ tsd_set(tsd_t *val) {
 
 #else // defined(JEMALLOC_LEGACY_WINDOWS_SUPPORT) || !defined(_MSC_VER)
 
-#define JEMALLOC_TSD_TYPE_ATTR(type) __declspec(thread) type
+#	define JEMALLOC_TSD_TYPE_ATTR(type) __declspec(thread) type
 
 extern JEMALLOC_TSD_TYPE_ATTR(tsd_wrapper_t) tsd_wrapper_tls;
 extern bool tsd_booted;
diff --git a/include/jemalloc/internal/typed_list.h b/include/jemalloc/internal/typed_list.h
index 7c4826fc..78704e48 100644
--- a/include/jemalloc/internal/typed_list.h
+++ b/include/jemalloc/internal/typed_list.h
@@ -6,54 +6,49 @@
  * bit easier to use; it handles ql_elm_new calls and provides type safety.
  */
 
-#define TYPED_LIST(list_type, el_type, linkage)				\
-typedef struct {							\
-	ql_head(el_type) head;						\
-} list_type##_t;							\
-static inline void							\
-list_type##_init(list_type##_t *list) {					\
-	ql_new(&list->head);						\
-}									\
-static inline el_type *							\
-list_type##_first(const list_type##_t *list) {				\
-	return ql_first(&list->head);					\
-}									\
-static inline el_type *							\
-list_type##_last(const list_type##_t *list) {				\
-	return ql_last(&list->head, linkage);				\
-}									\
-static inline el_type *							\
-list_type##_next(const list_type##_t *list, el_type *item) {		\
-	return ql_next(&list->head, item, linkage);			\
-}									\
-static inline void							\
-list_type##_append(list_type##_t *list, el_type *item) {		\
-	ql_elm_new(item, linkage);					\
-	ql_tail_insert(&list->head, item, linkage);			\
-}									\
-static inline void							\
-list_type##_prepend(list_type##_t *list, el_type *item) {		\
-	ql_elm_new(item, linkage);					\
-	ql_head_insert(&list->head, item, linkage);			\
-}									\
-static inline void							\
-list_type##_replace(list_type##_t *list, el_type *to_remove,		\
-    el_type *to_insert) {						\
-	ql_elm_new(to_insert, linkage);					\
-	ql_after_insert(to_remove, to_insert, linkage);			\
-	ql_remove(&list->head, to_remove, linkage);			\
-}									\
-static inline void							\
-list_type##_remove(list_type##_t *list, el_type *item) {		\
-	ql_remove(&list->head, item, linkage);				\
-}									\
-static inline bool							\
-list_type##_empty(list_type##_t *list) {				\
-	return ql_empty(&list->head);					\
-}									\
-static inline void							\
-list_type##_concat(list_type##_t *list_a, list_type##_t *list_b) {	\
-	ql_concat(&list_a->head, &list_b->head, linkage);		\
-}
+#define TYPED_LIST(list_type, el_type, linkage)                                \
+	typedef struct {                                                       \
+		ql_head(el_type) head;                                         \
+	} list_type##_t;                                                       \
+	static inline void list_type##_init(list_type##_t *list) {             \
+		ql_new(&list->head);                                           \
+	}                                                                      \
+	static inline el_type *list_type##_first(const list_type##_t *list) {  \
+		return ql_first(&list->head);                                  \
+	}                                                                      \
+	static inline el_type *list_type##_last(const list_type##_t *list) {   \
+		return ql_last(&list->head, linkage);                          \
+	}                                                                      \
+	static inline el_type *list_type##_next(                               \
+	    const list_type##_t *list, el_type *item) {                        \
+		return ql_next(&list->head, item, linkage);                    \
+	}                                                                      \
+	static inline void list_type##_append(                                 \
+	    list_type##_t *list, el_type *item) {                              \
+		ql_elm_new(item, linkage);                                     \
+		ql_tail_insert(&list->head, item, linkage);                    \
+	}                                                                      \
+	static inline void list_type##_prepend(                                \
+	    list_type##_t *list, el_type *item) {                              \
+		ql_elm_new(item, linkage);                                     \
+		ql_head_insert(&list->head, item, linkage);                    \
+	}                                                                      \
+	static inline void list_type##_replace(                                \
+	    list_type##_t *list, el_type *to_remove, el_type *to_insert) {     \
+		ql_elm_new(to_insert, linkage);                                \
+		ql_after_insert(to_remove, to_insert, linkage);                \
+		ql_remove(&list->head, to_remove, linkage);                    \
+	}                                                                      \
+	static inline void list_type##_remove(                                 \
+	    list_type##_t *list, el_type *item) {                              \
+		ql_remove(&list->head, item, linkage);                         \
+	}                                                                      \
+	static inline bool list_type##_empty(list_type##_t *list) {            \
+		return ql_empty(&list->head);                                  \
+	}                                                                      \
+	static inline void list_type##_concat(                                 \
+	    list_type##_t *list_a, list_type##_t *list_b) {                    \
+		ql_concat(&list_a->head, &list_b->head, linkage);              \
+	}
 
 #endif /* JEMALLOC_INTERNAL_TYPED_LIST_H */
diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index 35aa26e6..bf246c95 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -8,10 +8,10 @@
 
 /* Junk fill patterns. */
 #ifndef JEMALLOC_ALLOC_JUNK
-#  define JEMALLOC_ALLOC_JUNK	((uint8_t)0xa5)
+#	define JEMALLOC_ALLOC_JUNK ((uint8_t)0xa5)
 #endif
 #ifndef JEMALLOC_FREE_JUNK
-#  define JEMALLOC_FREE_JUNK	((uint8_t)0x5a)
+#	define JEMALLOC_FREE_JUNK ((uint8_t)0x5a)
 #endif
 
 /*
@@ -32,20 +32,20 @@
 #define JEMALLOC_CC_SILENCE_INIT(...) = __VA_ARGS__
 
 #ifdef __GNUC__
-#  define likely(x)   __builtin_expect(!!(x), 1)
-#  define unlikely(x) __builtin_expect(!!(x), 0)
+#	define likely(x) __builtin_expect(!!(x), 1)
+#	define unlikely(x) __builtin_expect(!!(x), 0)
 #else
-#  define likely(x)   !!(x)
-#  define unlikely(x) !!(x)
+#	define likely(x) !!(x)
+#	define unlikely(x) !!(x)
 #endif
 
 #if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
-#include <stddef.h>
+#	include <stddef.h>
 #else
-#if !defined(JEMALLOC_INTERNAL_UNREACHABLE)
-#  error JEMALLOC_INTERNAL_UNREACHABLE should have been defined by configure
-#endif
-#define unreachable() JEMALLOC_INTERNAL_UNREACHABLE()
+#	if !defined(JEMALLOC_INTERNAL_UNREACHABLE)
+#		error JEMALLOC_INTERNAL_UNREACHABLE should have been defined by configure
+#	endif
+#	define unreachable() JEMALLOC_INTERNAL_UNREACHABLE()
 #endif
 
 /* Set error code. */
@@ -69,27 +69,27 @@ get_errno(void) {
 }
 
 #ifdef _MSC_VER
-#define util_assume __assume
-#elif defined(__clang__) && (__clang_major__ > 3 || \
-    (__clang_major__ == 3 && __clang_minor__ >= 6))
-#define util_assume __builtin_assume
+#	define util_assume __assume
+#elif defined(__clang__)                                                       \
+    && (__clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 6))
+#	define util_assume __builtin_assume
 #else
-#define util_assume(expr)		\
-	do {				\
-		if (!(expr)) {		\
-			unreachable();	\
-		}			\
-	} while(0)
+#	define util_assume(expr)                                              \
+		do {                                                           \
+			if (!(expr)) {                                         \
+				unreachable();                                 \
+			}                                                      \
+		} while (0)
 #endif
 
 /* Allows compiler constant folding on inlined paths. */
 #if defined(__has_builtin)
-#  if __has_builtin(__builtin_constant_p)
-#    define util_compile_time_const(x) __builtin_constant_p(x)
-#  endif
+#	if __has_builtin(__builtin_constant_p)
+#		define util_compile_time_const(x) __builtin_constant_p(x)
+#	endif
 #endif
 #ifndef util_compile_time_const
-#  define util_compile_time_const(x) (false)
+#	define util_compile_time_const(x) (false)
 #endif
 
 /* ptr should be valid. */
@@ -148,7 +148,6 @@ util_prefetch_write_range(void *ptr, size_t sz) {
  * key1-key2:value|key3-key4:value|...
  * Note it does not handle the ending '\0'.
  */
-bool
-multi_setting_parse_next(const char **setting_segment_cur, size_t *len_left,
-    size_t *key_start, size_t *key_end, size_t *value);
+bool multi_setting_parse_next(const char **setting_segment_cur,
+    size_t *len_left, size_t *key_start, size_t *key_end, size_t *value);
 #endif /* JEMALLOC_INTERNAL_UTIL_H */
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index acf7860d..73770713 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -66,8 +66,8 @@ enum witness_rank_e {
 	WITNESS_RANK_HOOK,
 	WITNESS_RANK_BIN,
 
-	WITNESS_RANK_LEAF=0x1000,
-	WITNESS_RANK_BATCHER=WITNESS_RANK_LEAF,
+	WITNESS_RANK_LEAF = 0x1000,
+	WITNESS_RANK_BATCHER = WITNESS_RANK_LEAF,
 	WITNESS_RANK_ARENA_STATS = WITNESS_RANK_LEAF,
 	WITNESS_RANK_COUNTER_ACCUM = WITNESS_RANK_LEAF,
 	WITNESS_RANK_DSS = WITNESS_RANK_LEAF,
@@ -86,38 +86,43 @@ typedef enum witness_rank_e witness_rank_t;
 /* PER-WITNESS DATA */
 /******************************************************************************/
 #if defined(JEMALLOC_DEBUG)
-#  define WITNESS_INITIALIZER(name, rank) {name, rank, NULL, NULL, {NULL, NULL}}
+#	define WITNESS_INITIALIZER(name, rank)                                \
+		{                                                              \
+			name, rank, NULL, NULL, {                              \
+				NULL, NULL                                     \
+			}                                                      \
+		}
 #else
-#  define WITNESS_INITIALIZER(name, rank)
+#	define WITNESS_INITIALIZER(name, rank)
 #endif
 
 typedef struct witness_s witness_t;
 typedef ql_head(witness_t) witness_list_t;
-typedef int witness_comp_t (const witness_t *, void *, const witness_t *,
-    void *);
+typedef int witness_comp_t(
+    const witness_t *, void *, const witness_t *, void *);
 
 struct witness_s {
 	/* Name, used for printing lock order reversal messages. */
-	const char		*name;
+	const char *name;
 
 	/*
 	 * Witness rank, where 0 is lowest and WITNESS_RANK_LEAF is highest.
 	 * Witnesses must be acquired in order of increasing rank.
 	 */
-	witness_rank_t		rank;
+	witness_rank_t rank;
 
 	/*
 	 * If two witnesses are of equal rank and they have the samp comp
 	 * function pointer, it is called as a last attempt to differentiate
 	 * between witnesses of equal rank.
 	 */
-	witness_comp_t		*comp;
+	witness_comp_t *comp;
 
 	/* Opaque data, passed to comp(). */
-	void			*opaque;
+	void *opaque;
 
 	/* Linkage for thread's currently owned locks. */
-	ql_elm(witness_t)	link;
+	ql_elm(witness_t) link;
 };
 
 /******************************************************************************/
@@ -126,10 +131,11 @@ struct witness_s {
 typedef struct witness_tsd_s witness_tsd_t;
 struct witness_tsd_s {
 	witness_list_t witnesses;
-	bool forking;
+	bool           forking;
 };
 
-#define WITNESS_TSD_INITIALIZER { ql_head_initializer(witnesses), false }
+#define WITNESS_TSD_INITIALIZER                                                \
+	{ ql_head_initializer(witnesses), false }
 #define WITNESS_TSDN_NULL ((witness_tsdn_t *)0)
 
 /******************************************************************************/
@@ -162,17 +168,17 @@ witness_tsdn_tsd(witness_tsdn_t *witness_tsdn) {
 void witness_init(witness_t *witness, const char *name, witness_rank_t rank,
     witness_comp_t *comp, void *opaque);
 
-typedef void (witness_lock_error_t)(const witness_list_t *, const witness_t *);
+typedef void(witness_lock_error_t)(const witness_list_t *, const witness_t *);
 extern witness_lock_error_t *JET_MUTABLE witness_lock_error;
 
-typedef void (witness_owner_error_t)(const witness_t *);
+typedef void(witness_owner_error_t)(const witness_t *);
 extern witness_owner_error_t *JET_MUTABLE witness_owner_error;
 
-typedef void (witness_not_owner_error_t)(const witness_t *);
+typedef void(witness_not_owner_error_t)(const witness_t *);
 extern witness_not_owner_error_t *JET_MUTABLE witness_not_owner_error;
 
-typedef void (witness_depth_error_t)(const witness_list_t *,
-    witness_rank_t rank_inclusive, unsigned depth);
+typedef void(witness_depth_error_t)(
+    const witness_list_t *, witness_rank_t rank_inclusive, unsigned depth);
 extern witness_depth_error_t *JET_MUTABLE witness_depth_error;
 
 void witnesses_cleanup(witness_tsd_t *witness_tsd);
@@ -184,12 +190,12 @@ void witness_postfork_child(witness_tsd_t *witness_tsd);
 static inline bool
 witness_owner(witness_tsd_t *witness_tsd, const witness_t *witness) {
 	witness_list_t *witnesses;
-	witness_t *w;
+	witness_t      *w;
 
 	cassert(config_debug);
 
 	witnesses = &witness_tsd->witnesses;
-	ql_foreach(w, witnesses, link) {
+	ql_foreach (w, witnesses, link) {
 		if (w == witness) {
 			return true;
 		}
@@ -221,11 +227,11 @@ witness_assert_owner(witness_tsdn_t *witness_tsdn, const witness_t *witness) {
 }
 
 static inline void
-witness_assert_not_owner(witness_tsdn_t *witness_tsdn,
-    const witness_t *witness) {
-	witness_tsd_t *witness_tsd;
+witness_assert_not_owner(
+    witness_tsdn_t *witness_tsdn, const witness_t *witness) {
+	witness_tsd_t  *witness_tsd;
 	witness_list_t *witnesses;
-	witness_t *w;
+	witness_t      *w;
 
 	if (!config_debug) {
 		return;
@@ -240,7 +246,7 @@ witness_assert_not_owner(witness_tsdn_t *witness_tsdn,
 	}
 
 	witnesses = &witness_tsd->witnesses;
-	ql_foreach(w, witnesses, link) {
+	ql_foreach (w, witnesses, link) {
 		if (w == witness) {
 			witness_not_owner_error(witness);
 		}
@@ -249,9 +255,9 @@ witness_assert_not_owner(witness_tsdn_t *witness_tsdn,
 
 /* Returns depth.  Not intended for direct use. */
 static inline unsigned
-witness_depth_to_rank(witness_list_t *witnesses, witness_rank_t rank_inclusive)
-{
-	unsigned d = 0;
+witness_depth_to_rank(
+    witness_list_t *witnesses, witness_rank_t rank_inclusive) {
+	unsigned   d = 0;
 	witness_t *w = ql_last(witnesses, link);
 
 	if (w != NULL) {
@@ -274,7 +280,7 @@ witness_assert_depth_to_rank(witness_tsdn_t *witness_tsdn,
 	}
 
 	witness_list_t *witnesses = &witness_tsdn_tsd(witness_tsdn)->witnesses;
-	unsigned d = witness_depth_to_rank(witnesses, rank_inclusive);
+	unsigned        d = witness_depth_to_rank(witnesses, rank_inclusive);
 
 	if (d != depth) {
 		witness_depth_error(witnesses, rank_inclusive, depth);
@@ -292,14 +298,14 @@ witness_assert_lockless(witness_tsdn_t *witness_tsdn) {
 }
 
 static inline void
-witness_assert_positive_depth_to_rank(witness_tsdn_t *witness_tsdn,
-    witness_rank_t rank_inclusive) {
+witness_assert_positive_depth_to_rank(
+    witness_tsdn_t *witness_tsdn, witness_rank_t rank_inclusive) {
 	if (!config_debug || witness_tsdn_null(witness_tsdn)) {
 		return;
 	}
 
 	witness_list_t *witnesses = &witness_tsdn_tsd(witness_tsdn)->witnesses;
-	unsigned d = witness_depth_to_rank(witnesses, rank_inclusive);
+	unsigned        d = witness_depth_to_rank(witnesses, rank_inclusive);
 
 	if (d == 0) {
 		witness_depth_error(witnesses, rank_inclusive, 1);
@@ -308,9 +314,9 @@ witness_assert_positive_depth_to_rank(witness_tsdn_t *witness_tsdn,
 
 static inline void
 witness_lock(witness_tsdn_t *witness_tsdn, witness_t *witness) {
-	witness_tsd_t *witness_tsd;
+	witness_tsd_t  *witness_tsd;
 	witness_list_t *witnesses;
-	witness_t *w;
+	witness_t      *w;
 
 	if (!config_debug) {
 		return;
@@ -335,9 +341,9 @@ witness_lock(witness_tsdn_t *witness_tsdn, witness_t *witness) {
 	} else if (w->rank > witness->rank) {
 		/* Not forking, rank order reversal. */
 		witness_lock_error(witnesses, witness);
-	} else if (w->rank == witness->rank && (w->comp == NULL || w->comp !=
-	    witness->comp || w->comp(w, w->opaque, witness, witness->opaque) >
-	    0)) {
+	} else if (w->rank == witness->rank
+	    && (w->comp == NULL || w->comp != witness->comp
+	        || w->comp(w, w->opaque, witness, witness->opaque) > 0)) {
 		/*
 		 * Missing/incompatible comparison function, or comparison
 		 * function indicates rank order reversal.
@@ -346,15 +352,15 @@ witness_lock(witness_tsdn_t *witness_tsdn, witness_t *witness) {
 	}
 
 	/* Suppress spurious warning from static analysis */
-	assert(ql_empty(witnesses) ||
-	    qr_prev(ql_first(witnesses), link) != NULL);
+	assert(
+	    ql_empty(witnesses) || qr_prev(ql_first(witnesses), link) != NULL);
 	ql_elm_new(witness, link);
 	ql_tail_insert(witnesses, witness, link);
 }
 
 static inline void
 witness_unlock(witness_tsdn_t *witness_tsdn, witness_t *witness) {
-	witness_tsd_t *witness_tsd;
+	witness_tsd_t  *witness_tsd;
 	witness_list_t *witnesses;
 
 	if (!config_debug) {
diff --git a/include/msvc_compat/C99/stdint.h b/include/msvc_compat/C99/stdint.h
index c66fbb81..5ee3992b 100644
--- a/include/msvc_compat/C99/stdint.h
+++ b/include/msvc_compat/C99/stdint.h
@@ -30,39 +30,39 @@
 ///////////////////////////////////////////////////////////////////////////////
 
 #ifndef _MSC_VER // [
-#error "Use this header only with Microsoft Visual C++ compilers!"
+#	error "Use this header only with Microsoft Visual C++ compilers!"
 #endif // _MSC_VER ]
 
 #ifndef _MSC_STDINT_H_ // [
-#define _MSC_STDINT_H_
+#	define _MSC_STDINT_H_
 
-#if _MSC_VER > 1000
-#pragma once
-#endif
+#	if _MSC_VER > 1000
+#		pragma once
+#	endif
 
-#include <limits.h>
+#	include <limits.h>
 
 // For Visual Studio 6 in C++ mode and for many Visual Studio versions when
 // compiling for ARM we should wrap <wchar.h> include with 'extern "C++" {}'
 // or compiler give many errors like this:
 //   error C2733: second C linkage of overloaded function 'wmemchr' not allowed
-#ifdef __cplusplus
+#	ifdef __cplusplus
 extern "C" {
-#endif
-#  include <wchar.h>
-#ifdef __cplusplus
+#	endif
+#	include <wchar.h>
+#	ifdef __cplusplus
 }
-#endif
+#	endif
 
 // Define _W64 macros to mark types changing their size, like intptr_t.
-#ifndef _W64
-#  if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300
-#     define _W64 __w64
-#  else
-#     define _W64
-#  endif
-#endif
-
+#	ifndef _W64
+#		if !defined(__midl) && (defined(_X86_) || defined(_M_IX86))   \
+		    && _MSC_VER >= 1300
+#			define _W64 __w64
+#		else
+#			define _W64
+#		endif
+#	endif
 
 // 7.18.1 Integer types
 
@@ -71,177 +71,177 @@ extern "C" {
 // Visual Studio 6 and Embedded Visual C++ 4 doesn't
 // realize that, e.g. char has the same size as __int8
 // so we give up on __intX for them.
-#if (_MSC_VER < 1300)
-   typedef signed char       int8_t;
-   typedef signed short      int16_t;
-   typedef signed int        int32_t;
-   typedef unsigned char     uint8_t;
-   typedef unsigned short    uint16_t;
-   typedef unsigned int      uint32_t;
-#else
-   typedef signed __int8     int8_t;
-   typedef signed __int16    int16_t;
-   typedef signed __int32    int32_t;
-   typedef unsigned __int8   uint8_t;
-   typedef unsigned __int16  uint16_t;
-   typedef unsigned __int32  uint32_t;
-#endif
-typedef signed __int64       int64_t;
-typedef unsigned __int64     uint64_t;
-
+#	if (_MSC_VER < 1300)
+typedef signed char    int8_t;
+typedef signed short   int16_t;
+typedef signed int     int32_t;
+typedef unsigned char  uint8_t;
+typedef unsigned short uint16_t;
+typedef unsigned int   uint32_t;
+#        else
+typedef signed __int8    int8_t;
+typedef signed __int16   int16_t;
+typedef signed __int32   int32_t;
+typedef unsigned __int8  uint8_t;
+typedef unsigned __int16 uint16_t;
+typedef unsigned __int32 uint32_t;
+#        endif
+typedef signed __int64   int64_t;
+typedef unsigned __int64 uint64_t;
 
 // 7.18.1.2 Minimum-width integer types
-typedef int8_t    int_least8_t;
-typedef int16_t   int_least16_t;
-typedef int32_t   int_least32_t;
-typedef int64_t   int_least64_t;
-typedef uint8_t   uint_least8_t;
-typedef uint16_t  uint_least16_t;
-typedef uint32_t  uint_least32_t;
-typedef uint64_t  uint_least64_t;
+typedef int8_t   int_least8_t;
+typedef int16_t  int_least16_t;
+typedef int32_t  int_least32_t;
+typedef int64_t  int_least64_t;
+typedef uint8_t  uint_least8_t;
+typedef uint16_t uint_least16_t;
+typedef uint32_t uint_least32_t;
+typedef uint64_t uint_least64_t;
 
 // 7.18.1.3 Fastest minimum-width integer types
-typedef int8_t    int_fast8_t;
-typedef int16_t   int_fast16_t;
-typedef int32_t   int_fast32_t;
-typedef int64_t   int_fast64_t;
-typedef uint8_t   uint_fast8_t;
-typedef uint16_t  uint_fast16_t;
-typedef uint32_t  uint_fast32_t;
-typedef uint64_t  uint_fast64_t;
+typedef int8_t   int_fast8_t;
+typedef int16_t  int_fast16_t;
+typedef int32_t  int_fast32_t;
+typedef int64_t  int_fast64_t;
+typedef uint8_t  uint_fast8_t;
+typedef uint16_t uint_fast16_t;
+typedef uint32_t uint_fast32_t;
+typedef uint64_t uint_fast64_t;
 
 // 7.18.1.4 Integer types capable of holding object pointers
-#ifdef _WIN64 // [
-   typedef signed __int64    intptr_t;
-   typedef unsigned __int64  uintptr_t;
-#else // _WIN64 ][
-   typedef _W64 signed int   intptr_t;
-   typedef _W64 unsigned int uintptr_t;
-#endif // _WIN64 ]
+#	ifdef _WIN64 // [
+typedef signed __int64   intptr_t;
+typedef unsigned __int64 uintptr_t;
+#	else  // _WIN64 ][
+typedef _W64 signed int   intptr_t;
+typedef _W64 unsigned int uintptr_t;
+#	endif // _WIN64 ]
 
 // 7.18.1.5 Greatest-width integer types
-typedef int64_t   intmax_t;
-typedef uint64_t  uintmax_t;
-
+typedef int64_t  intmax_t;
+typedef uint64_t uintmax_t;
 
 // 7.18.2 Limits of specified-width integer types
 
-#if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [   See footnote 220 at page 257 and footnote 221 at page 259
+#	if !defined(__cplusplus)                                              \
+	    || defined(                                                        \
+	        __STDC_LIMIT_MACROS) // [   See footnote 220 at page 257 and footnote 221 at page 259
 
 // 7.18.2.1 Limits of exact-width integer types
-#define INT8_MIN     ((int8_t)_I8_MIN)
-#define INT8_MAX     _I8_MAX
-#define INT16_MIN    ((int16_t)_I16_MIN)
-#define INT16_MAX    _I16_MAX
-#define INT32_MIN    ((int32_t)_I32_MIN)
-#define INT32_MAX    _I32_MAX
-#define INT64_MIN    ((int64_t)_I64_MIN)
-#define INT64_MAX    _I64_MAX
-#define UINT8_MAX    _UI8_MAX
-#define UINT16_MAX   _UI16_MAX
-#define UINT32_MAX   _UI32_MAX
-#define UINT64_MAX   _UI64_MAX
+#		define INT8_MIN ((int8_t)_I8_MIN)
+#		define INT8_MAX _I8_MAX
+#		define INT16_MIN ((int16_t)_I16_MIN)
+#		define INT16_MAX _I16_MAX
+#		define INT32_MIN ((int32_t)_I32_MIN)
+#		define INT32_MAX _I32_MAX
+#		define INT64_MIN ((int64_t)_I64_MIN)
+#		define INT64_MAX _I64_MAX
+#		define UINT8_MAX _UI8_MAX
+#		define UINT16_MAX _UI16_MAX
+#		define UINT32_MAX _UI32_MAX
+#		define UINT64_MAX _UI64_MAX
 
 // 7.18.2.2 Limits of minimum-width integer types
-#define INT_LEAST8_MIN    INT8_MIN
-#define INT_LEAST8_MAX    INT8_MAX
-#define INT_LEAST16_MIN   INT16_MIN
-#define INT_LEAST16_MAX   INT16_MAX
-#define INT_LEAST32_MIN   INT32_MIN
-#define INT_LEAST32_MAX   INT32_MAX
-#define INT_LEAST64_MIN   INT64_MIN
-#define INT_LEAST64_MAX   INT64_MAX
-#define UINT_LEAST8_MAX   UINT8_MAX
-#define UINT_LEAST16_MAX  UINT16_MAX
-#define UINT_LEAST32_MAX  UINT32_MAX
-#define UINT_LEAST64_MAX  UINT64_MAX
+#		define INT_LEAST8_MIN INT8_MIN
+#		define INT_LEAST8_MAX INT8_MAX
+#		define INT_LEAST16_MIN INT16_MIN
+#		define INT_LEAST16_MAX INT16_MAX
+#		define INT_LEAST32_MIN INT32_MIN
+#		define INT_LEAST32_MAX INT32_MAX
+#		define INT_LEAST64_MIN INT64_MIN
+#		define INT_LEAST64_MAX INT64_MAX
+#		define UINT_LEAST8_MAX UINT8_MAX
+#		define UINT_LEAST16_MAX UINT16_MAX
+#		define UINT_LEAST32_MAX UINT32_MAX
+#		define UINT_LEAST64_MAX UINT64_MAX
 
 // 7.18.2.3 Limits of fastest minimum-width integer types
-#define INT_FAST8_MIN    INT8_MIN
-#define INT_FAST8_MAX    INT8_MAX
-#define INT_FAST16_MIN   INT16_MIN
-#define INT_FAST16_MAX   INT16_MAX
-#define INT_FAST32_MIN   INT32_MIN
-#define INT_FAST32_MAX   INT32_MAX
-#define INT_FAST64_MIN   INT64_MIN
-#define INT_FAST64_MAX   INT64_MAX
-#define UINT_FAST8_MAX   UINT8_MAX
-#define UINT_FAST16_MAX  UINT16_MAX
-#define UINT_FAST32_MAX  UINT32_MAX
-#define UINT_FAST64_MAX  UINT64_MAX
+#		define INT_FAST8_MIN INT8_MIN
+#		define INT_FAST8_MAX INT8_MAX
+#		define INT_FAST16_MIN INT16_MIN
+#		define INT_FAST16_MAX INT16_MAX
+#		define INT_FAST32_MIN INT32_MIN
+#		define INT_FAST32_MAX INT32_MAX
+#		define INT_FAST64_MIN INT64_MIN
+#		define INT_FAST64_MAX INT64_MAX
+#		define UINT_FAST8_MAX UINT8_MAX
+#		define UINT_FAST16_MAX UINT16_MAX
+#		define UINT_FAST32_MAX UINT32_MAX
+#		define UINT_FAST64_MAX UINT64_MAX
 
 // 7.18.2.4 Limits of integer types capable of holding object pointers
-#ifdef _WIN64 // [
-#  define INTPTR_MIN   INT64_MIN
-#  define INTPTR_MAX   INT64_MAX
-#  define UINTPTR_MAX  UINT64_MAX
-#else // _WIN64 ][
-#  define INTPTR_MIN   INT32_MIN
-#  define INTPTR_MAX   INT32_MAX
-#  define UINTPTR_MAX  UINT32_MAX
-#endif // _WIN64 ]
+#		ifdef _WIN64 // [
+#			define INTPTR_MIN INT64_MIN
+#			define INTPTR_MAX INT64_MAX
+#			define UINTPTR_MAX UINT64_MAX
+#		else // _WIN64 ][
+#			define INTPTR_MIN INT32_MIN
+#			define INTPTR_MAX INT32_MAX
+#			define UINTPTR_MAX UINT32_MAX
+#		endif // _WIN64 ]
 
 // 7.18.2.5 Limits of greatest-width integer types
-#define INTMAX_MIN   INT64_MIN
-#define INTMAX_MAX   INT64_MAX
-#define UINTMAX_MAX  UINT64_MAX
+#		define INTMAX_MIN INT64_MIN
+#		define INTMAX_MAX INT64_MAX
+#		define UINTMAX_MAX UINT64_MAX
 
 // 7.18.3 Limits of other integer types
 
-#ifdef _WIN64 // [
-#  define PTRDIFF_MIN  _I64_MIN
-#  define PTRDIFF_MAX  _I64_MAX
-#else  // _WIN64 ][
-#  define PTRDIFF_MIN  _I32_MIN
-#  define PTRDIFF_MAX  _I32_MAX
-#endif  // _WIN64 ]
+#		ifdef _WIN64 // [
+#			define PTRDIFF_MIN _I64_MIN
+#			define PTRDIFF_MAX _I64_MAX
+#		else // _WIN64 ][
+#			define PTRDIFF_MIN _I32_MIN
+#			define PTRDIFF_MAX _I32_MAX
+#		endif // _WIN64 ]
 
-#define SIG_ATOMIC_MIN  INT_MIN
-#define SIG_ATOMIC_MAX  INT_MAX
+#		define SIG_ATOMIC_MIN INT_MIN
+#		define SIG_ATOMIC_MAX INT_MAX
 
-#ifndef SIZE_MAX // [
-#  ifdef _WIN64 // [
-#     define SIZE_MAX  _UI64_MAX
-#  else // _WIN64 ][
-#     define SIZE_MAX  _UI32_MAX
-#  endif // _WIN64 ]
-#endif // SIZE_MAX ]
+#		ifndef SIZE_MAX      // [
+#			ifdef _WIN64 // [
+#				define SIZE_MAX _UI64_MAX
+#			else // _WIN64 ][
+#				define SIZE_MAX _UI32_MAX
+#			endif // _WIN64 ]
+#		endif         // SIZE_MAX ]
 
 // WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h>
-#ifndef WCHAR_MIN // [
-#  define WCHAR_MIN  0
-#endif  // WCHAR_MIN ]
-#ifndef WCHAR_MAX // [
-#  define WCHAR_MAX  _UI16_MAX
-#endif  // WCHAR_MAX ]
+#		ifndef WCHAR_MIN // [
+#			define WCHAR_MIN 0
+#		endif            // WCHAR_MIN ]
+#		ifndef WCHAR_MAX // [
+#			define WCHAR_MAX _UI16_MAX
+#		endif // WCHAR_MAX ]
 
-#define WINT_MIN  0
-#define WINT_MAX  _UI16_MAX
-
-#endif // __STDC_LIMIT_MACROS ]
+#		define WINT_MIN 0
+#		define WINT_MAX _UI16_MAX
 
+#	endif // __STDC_LIMIT_MACROS ]
 
 // 7.18.4 Limits of other integer types
 
-#if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [   See footnote 224 at page 260
+#	if !defined(__cplusplus)                                              \
+	    || defined(                                                        \
+	        __STDC_CONSTANT_MACROS) // [   See footnote 224 at page 260
 
 // 7.18.4.1 Macros for minimum-width integer constants
 
-#define INT8_C(val)  val##i8
-#define INT16_C(val) val##i16
-#define INT32_C(val) val##i32
-#define INT64_C(val) val##i64
+#		define INT8_C(val) val##i8
+#		define INT16_C(val) val##i16
+#		define INT32_C(val) val##i32
+#		define INT64_C(val) val##i64
 
-#define UINT8_C(val)  val##ui8
-#define UINT16_C(val) val##ui16
-#define UINT32_C(val) val##ui32
-#define UINT64_C(val) val##ui64
+#		define UINT8_C(val) val##ui8
+#		define UINT16_C(val) val##ui16
+#		define UINT32_C(val) val##ui32
+#		define UINT64_C(val) val##ui64
 
 // 7.18.4.2 Macros for greatest-width integer constants
-#define INTMAX_C   INT64_C
-#define UINTMAX_C  UINT64_C
-
-#endif // __STDC_CONSTANT_MACROS ]
+#		define INTMAX_C INT64_C
+#		define UINTMAX_C UINT64_C
 
+#	endif // __STDC_CONSTANT_MACROS ]
 
 #endif // _MSC_STDINT_H_ ]
diff --git a/include/msvc_compat/strings.h b/include/msvc_compat/strings.h
index 996f256c..6a1acc0f 100644
--- a/include/msvc_compat/strings.h
+++ b/include/msvc_compat/strings.h
@@ -4,9 +4,10 @@
 /* MSVC doesn't define ffs/ffsl. This dummy strings.h header is provided
  * for both */
 #ifdef _MSC_VER
-#  include <intrin.h>
-#  pragma intrinsic(_BitScanForward)
-static __forceinline int ffsl(long x) {
+#	include <intrin.h>
+#	pragma intrinsic(_BitScanForward)
+static __forceinline int
+ffsl(long x) {
 	unsigned long i;
 
 	if (_BitScanForward(&i, x)) {
@@ -15,44 +16,46 @@ static __forceinline int ffsl(long x) {
 	return 0;
 }
 
-static __forceinline int ffs(int x) {
+static __forceinline int
+ffs(int x) {
 	return ffsl(x);
 }
 
-#  ifdef  _M_X64
-#    pragma intrinsic(_BitScanForward64)
-#  endif
+#	ifdef _M_X64
+#		pragma intrinsic(_BitScanForward64)
+#	endif
 
-static __forceinline int ffsll(unsigned __int64 x) {
+static __forceinline int
+ffsll(unsigned __int64 x) {
 	unsigned long i;
-#ifdef  _M_X64
+#	ifdef _M_X64
 	if (_BitScanForward64(&i, x)) {
 		return i + 1;
 	}
 	return 0;
-#else
-// Fallback for 32-bit build where 64-bit version not available
-// assuming little endian
+#	else
+	// Fallback for 32-bit build where 64-bit version not available
+	// assuming little endian
 	union {
 		unsigned __int64 ll;
-		unsigned   long l[2];
+		unsigned long    l[2];
 	} s;
 
 	s.ll = x;
 
 	if (_BitScanForward(&i, s.l[0])) {
 		return i + 1;
-	} else if(_BitScanForward(&i, s.l[1])) {
+	} else if (_BitScanForward(&i, s.l[1])) {
 		return i + 33;
 	}
 	return 0;
-#endif
+#	endif
 }
 
 #else
-#  define ffsll(x) __builtin_ffsll(x)
-#  define ffsl(x) __builtin_ffsl(x)
-#  define ffs(x) __builtin_ffs(x)
+#	define ffsll(x) __builtin_ffsll(x)
+#	define ffsl(x) __builtin_ffsl(x)
+#	define ffs(x) __builtin_ffs(x)
 #endif
 
 #endif /* strings_h */
diff --git a/msvc/test_threads/test_threads.cpp b/msvc/test_threads/test_threads.cpp
index 6eed028d..e709c177 100644
--- a/msvc/test_threads/test_threads.cpp
+++ b/msvc/test_threads/test_threads.cpp
@@ -12,78 +12,108 @@
 #define JEMALLOC_NO_DEMANGLE
 #include <jemalloc/jemalloc.h>
 
-using std::vector;
+using std::minstd_rand;
 using std::thread;
 using std::uniform_int_distribution;
-using std::minstd_rand;
+using std::vector;
 
-int test_threads() {
-  je_malloc_conf = "narenas:3";
-  int narenas = 0;
-  size_t sz = sizeof(narenas);
-  je_mallctl("opt.narenas", (void *)&narenas, &sz, NULL, 0);
-  if (narenas != 3) {
-    printf("Error: unexpected number of arenas: %d\n", narenas);
-    return 1;
-  }
-  static const int sizes[] = { 7, 16, 32, 60, 91, 100, 120, 144, 169, 199, 255, 400, 670, 900, 917, 1025, 3333, 5190, 13131, 49192, 99999, 123123, 255265, 2333111 };
-  static const int numSizes = (int)(sizeof(sizes) / sizeof(sizes[0]));
-  vector<thread> workers;
-  static const int numThreads = narenas + 1, numAllocsMax = 25, numIter1 = 50, numIter2 = 50;
-  je_malloc_stats_print(NULL, NULL, NULL);
-  size_t allocated1;
-  size_t sz1 = sizeof(allocated1);
-  je_mallctl("stats.active", (void *)&allocated1, &sz1, NULL, 0);
-  printf("\nPress Enter to start threads...\n");
-  getchar();
-  printf("Starting %d threads x %d x %d iterations...\n", numThreads, numIter1, numIter2);
-  for (int i = 0; i < numThreads; i++) {
-    workers.emplace_back([tid=i]() {
-      uniform_int_distribution<int> sizeDist(0, numSizes - 1);
-      minstd_rand rnd(tid * 17);
-      uint8_t* ptrs[numAllocsMax];
-      int ptrsz[numAllocsMax];
-      for (int i = 0; i < numIter1; ++i) {
-        thread t([&]() {
-          for (int i = 0; i < numIter2; ++i) {
-            const int numAllocs = numAllocsMax - sizeDist(rnd);
-            for (int j = 0; j < numAllocs; j += 64) {
-              const int x = sizeDist(rnd);
-              const int sz = sizes[x];
-              ptrsz[j] = sz;
-              ptrs[j] = (uint8_t*)je_malloc(sz);
-              if (!ptrs[j]) {
-                printf("Unable to allocate %d bytes in thread %d, iter %d, alloc %d. %d\n", sz, tid, i, j, x);
-                exit(1);
-              }
-              for (int k = 0; k < sz; k++)
-                ptrs[j][k] = tid + k;
-            }
-            for (int j = 0; j < numAllocs; j += 64) {
-              for (int k = 0, sz = ptrsz[j]; k < sz; k++)
-                if (ptrs[j][k] != (uint8_t)(tid + k)) {
-                  printf("Memory error in thread %d, iter %d, alloc %d @ %d : %02X!=%02X\n", tid, i, j, k, ptrs[j][k], (uint8_t)(tid + k));
-                  exit(1);
-                }
-              je_free(ptrs[j]);
-            }
-          }
-        });
-        t.join();
-      }
-    });
-  }
-  for (thread& t : workers) {
-    t.join();
-  }
-  je_malloc_stats_print(NULL, NULL, NULL);
-  size_t allocated2;
-  je_mallctl("stats.active", (void *)&allocated2, &sz1, NULL, 0);
-  size_t leaked = allocated2 - allocated1;
-  printf("\nDone. Leaked: %zd bytes\n", leaked);
-  bool failed = leaked > 65536; // in case C++ runtime allocated something (e.g. iostream locale or facet)
-  printf("\nTest %s!\n", (failed ? "FAILED" : "successful"));
-  printf("\nPress Enter to continue...\n");
-  getchar();
-  return failed ? 1 : 0;
+int
+test_threads() {
+	je_malloc_conf = "narenas:3";
+	int    narenas = 0;
+	size_t sz = sizeof(narenas);
+	je_mallctl("opt.narenas", (void *)&narenas, &sz, NULL, 0);
+	if (narenas != 3) {
+		printf("Error: unexpected number of arenas: %d\n", narenas);
+		return 1;
+	}
+	static const int sizes[] = {7, 16, 32, 60, 91, 100, 120, 144, 169, 199,
+	    255, 400, 670, 900, 917, 1025, 3333, 5190, 13131, 49192, 99999,
+	    123123, 255265, 2333111};
+	static const int numSizes = (int)(sizeof(sizes) / sizeof(sizes[0]));
+	vector<thread>   workers;
+	static const int numThreads = narenas + 1, numAllocsMax = 25,
+	                 numIter1 = 50, numIter2 = 50;
+	je_malloc_stats_print(NULL, NULL, NULL);
+	size_t allocated1;
+	size_t sz1 = sizeof(allocated1);
+	je_mallctl("stats.active", (void *)&allocated1, &sz1, NULL, 0);
+	printf("\nPress Enter to start threads...\n");
+	getchar();
+	printf("Starting %d threads x %d x %d iterations...\n", numThreads,
+	    numIter1, numIter2);
+	for (int i = 0; i < numThreads; i++) {
+		workers.emplace_back([tid = i]() {
+			uniform_int_distribution<int> sizeDist(0, numSizes - 1);
+			minstd_rand                   rnd(tid * 17);
+			uint8_t                      *ptrs[numAllocsMax];
+			int                           ptrsz[numAllocsMax];
+			for (int i = 0; i < numIter1; ++i) {
+				thread t([&]() {
+					for (int i = 0; i < numIter2; ++i) {
+						const int numAllocs =
+						    numAllocsMax
+						    - sizeDist(rnd);
+						for (int j = 0; j < numAllocs;
+						     j += 64) {
+							const int x = sizeDist(
+							    rnd);
+							const int sz = sizes[x];
+							ptrsz[j] = sz;
+							ptrs[j] = (uint8_t *)
+							    je_malloc(sz);
+							if (!ptrs[j]) {
+								printf(
+								    "Unable to allocate %d bytes in thread %d, iter %d, alloc %d. %d\n",
+								    sz, tid, i,
+								    j, x);
+								exit(1);
+							}
+							for (int k = 0; k < sz;
+							     k++)
+								ptrs[j][k] = tid
+								    + k;
+						}
+						for (int j = 0; j < numAllocs;
+						     j += 64) {
+							for (int k = 0,
+							         sz = ptrsz[j];
+							     k < sz; k++)
+								if (ptrs[j][k]
+								    != (uint8_t)(tid
+								        + k)) {
+									printf(
+									    "Memory error in thread %d, iter %d, alloc %d @ %d : %02X!=%02X\n",
+									    tid,
+									    i,
+									    j,
+									    k,
+									    ptrs[j]
+									        [k],
+									    (uint8_t)(tid
+									        + k));
+									exit(1);
+								}
+							je_free(ptrs[j]);
+						}
+					}
+				});
+				t.join();
+			}
+		});
+	}
+	for (thread &t : workers) {
+		t.join();
+	}
+	je_malloc_stats_print(NULL, NULL, NULL);
+	size_t allocated2;
+	je_mallctl("stats.active", (void *)&allocated2, &sz1, NULL, 0);
+	size_t leaked = allocated2 - allocated1;
+	printf("\nDone. Leaked: %zd bytes\n", leaked);
+	bool failed = leaked
+	    > 65536; // in case C++ runtime allocated something (e.g. iostream locale or facet)
+	printf("\nTest %s!\n", (failed ? "FAILED" : "successful"));
+	printf("\nPress Enter to continue...\n");
+	getchar();
+	return failed ? 1 : 0;
 }
diff --git a/msvc/test_threads/test_threads_main.cpp b/msvc/test_threads/test_threads_main.cpp
index 0a022fba..3e88c286 100644
--- a/msvc/test_threads/test_threads_main.cpp
+++ b/msvc/test_threads/test_threads_main.cpp
@@ -5,7 +5,8 @@
 
 using namespace std::chrono_literals;
 
-int main(int argc, char** argv) {
-  int rc = test_threads();
-  return rc;
+int
+main(int argc, char **argv) {
+	int rc = test_threads();
+	return rc;
 }
diff --git a/src/arena.c b/src/arena.c
index 1586ee91..2f58b038 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -22,12 +22,7 @@ JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
  * options and mallctl processing are straightforward.
  */
 const char *const percpu_arena_mode_names[] = {
-	"percpu",
-	"phycpu",
-	"disabled",
-	"percpu",
-	"phycpu"
-};
+    "percpu", "phycpu", "disabled", "percpu", "phycpu"};
 percpu_arena_mode_t opt_percpu_arena = PERCPU_ARENA_DEFAULT;
 
 ssize_t opt_dirty_decay_ms = DIRTY_DECAY_MS_DEFAULT;
@@ -36,7 +31,7 @@ ssize_t opt_muzzy_decay_ms = MUZZY_DECAY_MS_DEFAULT;
 static atomic_zd_t dirty_decay_ms_default;
 static atomic_zd_t muzzy_decay_ms_default;
 
-emap_t arena_emap_global;
+emap_t              arena_emap_global;
 static pa_central_t arena_pa_central_global;
 
 div_info_t arena_binind_div_info[SC_NBINS];
@@ -51,14 +46,15 @@ uint32_t arena_bin_offsets[SC_NBINS];
  * that,the huge_arena_ind is updated to point to the actual huge arena,
  * which is the last one of the auto arenas.
  */
-unsigned huge_arena_ind = 0;
-bool opt_huge_arena_pac_thp = false;
+unsigned  huge_arena_ind = 0;
+bool      opt_huge_arena_pac_thp = false;
 pac_thp_t huge_arena_pac_thp = {.thp_madvise = false,
-    .auto_thp_switched = false, .n_thp_lazy = ATOMIC_INIT(0)};
+    .auto_thp_switched = false,
+    .n_thp_lazy = ATOMIC_INIT(0)};
 
 const arena_config_t arena_config_default = {
-	/* .extent_hooks = */ (extent_hooks_t *)&ehooks_default_extent_hooks,
-	/* .metadata_use_hooks = */ true,
+    /* .extent_hooks = */ (extent_hooks_t *)&ehooks_default_extent_hooks,
+    /* .metadata_use_hooks = */ true,
 };
 
 /******************************************************************************/
@@ -67,13 +63,12 @@ const arena_config_t arena_config_default = {
  * definition.
  */
 
-static bool arena_decay_dirty(tsdn_t *tsdn, arena_t *arena,
-    bool is_background_thread, bool all);
-static void arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, edata_t *slab,
-    bin_t *bin);
-static void
-arena_maybe_do_deferred_work(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
-    size_t npages_new);
+static bool arena_decay_dirty(
+    tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all);
+static void arena_bin_lower_slab(
+    tsdn_t *tsdn, arena_t *arena, edata_t *slab, bin_t *bin);
+static void arena_maybe_do_deferred_work(
+    tsdn_t *tsdn, arena_t *arena, decay_t *decay, size_t npages_new);
 
 /******************************************************************************/
 
@@ -92,8 +87,8 @@ void
 arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
-    bin_stats_data_t *bstats, arena_stats_large_t *lstats,
-    pac_estats_t *estats, hpa_shard_stats_t *hpastats, sec_stats_t *secstats) {
+    bin_stats_data_t *bstats, arena_stats_large_t *lstats, pac_estats_t *estats,
+    hpa_shard_stats_t *hpastats, sec_stats_t *secstats) {
 	cassert(config_stats);
 
 	arena_basic_stats_merge(tsdn, arena, nthreads, dss, dirty_decay_ms,
@@ -135,8 +130,8 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 		uint64_t nrequests = locked_read_u64(tsdn,
 		    LOCKEDINT_MTX(arena->stats.mtx),
 		    &arena->stats.lstats[i].nrequests);
-		locked_inc_u64_unsynchronized(&lstats[i].nrequests,
-		    nmalloc + nrequests);
+		locked_inc_u64_unsynchronized(
+		    &lstats[i].nrequests, nmalloc + nrequests);
 		astats->nrequests_large += nmalloc + nrequests;
 
 		/* nfill == nmalloc for large currently. */
@@ -172,7 +167,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	astats->tcache_stashed_bytes = 0;
 	malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
 	cache_bin_array_descriptor_t *descriptor;
-	ql_foreach(descriptor, &arena->cache_bin_array_descriptor_ql, link) {
+	ql_foreach (descriptor, &arena->cache_bin_array_descriptor_ql, link) {
 		for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
 			cache_bin_t *cache_bin = &descriptor->bins[i];
 			if (cache_bin_disabled(cache_bin)) {
@@ -180,10 +175,11 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 			}
 
 			cache_bin_sz_t ncached, nstashed;
-			cache_bin_nitems_get_remote(cache_bin, &ncached, &nstashed);
+			cache_bin_nitems_get_remote(
+			    cache_bin, &ncached, &nstashed);
 			astats->tcache_bytes += ncached * sz_index2size(i);
-			astats->tcache_stashed_bytes += nstashed *
-			    sz_index2size(i);
+			astats->tcache_stashed_bytes += nstashed
+			    * sz_index2size(i);
 		}
 	}
 	malloc_mutex_prof_read(tsdn,
@@ -191,19 +187,18 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	    &arena->tcache_ql_mtx);
 	malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
 
-#define READ_ARENA_MUTEX_PROF_DATA(mtx, ind)				\
-    malloc_mutex_lock(tsdn, &arena->mtx);				\
-    malloc_mutex_prof_read(tsdn, &astats->mutex_prof_data[ind],		\
-        &arena->mtx);							\
-    malloc_mutex_unlock(tsdn, &arena->mtx);
+#define READ_ARENA_MUTEX_PROF_DATA(mtx, ind)                                   \
+	malloc_mutex_lock(tsdn, &arena->mtx);                                  \
+	malloc_mutex_prof_read(                                                \
+	    tsdn, &astats->mutex_prof_data[ind], &arena->mtx);                 \
+	malloc_mutex_unlock(tsdn, &arena->mtx);
 
 	/* Gather per arena mutex profiling data. */
 	READ_ARENA_MUTEX_PROF_DATA(large_mtx, arena_prof_mutex_large);
-	READ_ARENA_MUTEX_PROF_DATA(base->mtx,
-	    arena_prof_mutex_base);
+	READ_ARENA_MUTEX_PROF_DATA(base->mtx, arena_prof_mutex_base);
 #undef READ_ARENA_MUTEX_PROF_DATA
-	pa_shard_mtx_stats_read(tsdn, &arena->pa_shard,
-	    astats->mutex_prof_data);
+	pa_shard_mtx_stats_read(
+	    tsdn, &arena->pa_shard, astats->mutex_prof_data);
 
 	nstime_copy(&astats->uptime, &arena->create_time);
 	nstime_update(&astats->uptime);
@@ -211,32 +206,33 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 
 	for (szind_t i = 0; i < SC_NBINS; i++) {
 		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
-			bin_stats_merge(tsdn, &bstats[i],
-			    arena_get_bin(arena, i, j));
+			bin_stats_merge(
+			    tsdn, &bstats[i], arena_get_bin(arena, i, j));
 		}
 	}
 }
 
 static void
-arena_background_thread_inactivity_check(tsdn_t *tsdn, arena_t *arena,
-    bool is_background_thread) {
+arena_background_thread_inactivity_check(
+    tsdn_t *tsdn, arena_t *arena, bool is_background_thread) {
 	if (!background_thread_enabled() || is_background_thread) {
 		return;
 	}
-	background_thread_info_t *info =
-	    arena_background_thread_info_get(arena);
+	background_thread_info_t *info = arena_background_thread_info_get(
+	    arena);
 	if (background_thread_indefinite_sleep(info)) {
-		arena_maybe_do_deferred_work(tsdn, arena,
-		    &arena->pa_shard.pac.decay_dirty, 0);
+		arena_maybe_do_deferred_work(
+		    tsdn, arena, &arena->pa_shard.pac.decay_dirty, 0);
 	}
 }
 
 /*
  * React to deferred work generated by a PAI function.
  */
-void arena_handle_deferred_work(tsdn_t *tsdn, arena_t *arena) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+void
+arena_handle_deferred_work(tsdn_t *tsdn, arena_t *arena) {
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
 	if (decay_immediately(&arena->pa_shard.pac.decay_dirty)) {
 		arena_decay_dirty(tsdn, arena, false, true);
@@ -246,34 +242,34 @@ void arena_handle_deferred_work(tsdn_t *tsdn, arena_t *arena) {
 
 static void *
 arena_slab_reg_alloc(edata_t *slab, const bin_info_t *bin_info) {
-	void *ret;
+	void        *ret;
 	slab_data_t *slab_data = edata_slab_data_get(slab);
-	size_t regind;
+	size_t       regind;
 
 	assert(edata_nfree_get(slab) > 0);
 	assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info));
 
 	regind = bitmap_sfu(slab_data->bitmap, &bin_info->bitmap_info);
-	ret = (void *)((byte_t *)edata_addr_get(slab) +
-	    (uintptr_t)(bin_info->reg_size * regind));
+	ret = (void *)((byte_t *)edata_addr_get(slab)
+	    + (uintptr_t)(bin_info->reg_size * regind));
 	edata_nfree_dec(slab);
 	return ret;
 }
 
 static void
-arena_slab_reg_alloc_batch(edata_t *slab, const bin_info_t *bin_info,
-			   unsigned cnt, void** ptrs) {
+arena_slab_reg_alloc_batch(
+    edata_t *slab, const bin_info_t *bin_info, unsigned cnt, void **ptrs) {
 	slab_data_t *slab_data = edata_slab_data_get(slab);
 
 	assert(edata_nfree_get(slab) >= cnt);
 	assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info));
 
-#if (! defined JEMALLOC_INTERNAL_POPCOUNTL) || (defined BITMAP_USE_TREE)
+#if (!defined JEMALLOC_INTERNAL_POPCOUNTL) || (defined BITMAP_USE_TREE)
 	for (unsigned i = 0; i < cnt; i++) {
-		size_t regind = bitmap_sfu(slab_data->bitmap,
-					   &bin_info->bitmap_info);
-		*(ptrs + i) = (void *)((uintptr_t)edata_addr_get(slab) +
-		    (uintptr_t)(bin_info->reg_size * regind));
+		size_t regind = bitmap_sfu(
+		    slab_data->bitmap, &bin_info->bitmap_info);
+		*(ptrs + i) = (void *)((uintptr_t)edata_addr_get(slab)
+		    + (uintptr_t)(bin_info->reg_size * regind));
 	}
 #else
 	unsigned group = 0;
@@ -327,10 +323,9 @@ arena_large_malloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 		szind_t hindex = index - SC_NBINS;
 		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-			&arena->stats.lstats[hindex].nmalloc, 1);
+		    &arena->stats.lstats[hindex].nmalloc, 1);
 		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-		    &arena->stats.lstats[hindex].active_bytes,
-		    usize);
+		    &arena->stats.lstats[hindex].active_bytes, usize);
 		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
 }
@@ -353,30 +348,29 @@ arena_large_dalloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 		szind_t hindex = index - SC_NBINS;
 		LOCKEDINT_MTX_LOCK(tsdn, arena->stats.mtx);
 		locked_inc_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-			&arena->stats.lstats[hindex].ndalloc, 1);
+		    &arena->stats.lstats[hindex].ndalloc, 1);
 		locked_dec_u64(tsdn, LOCKEDINT_MTX(arena->stats.mtx),
-		    &arena->stats.lstats[hindex].active_bytes,
-		    usize);
+		    &arena->stats.lstats[hindex].active_bytes, usize);
 		LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 	}
 }
 
 static void
-arena_large_ralloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t oldusize,
-    size_t usize) {
+arena_large_ralloc_stats_update(
+    tsdn_t *tsdn, arena_t *arena, size_t oldusize, size_t usize) {
 	arena_large_malloc_stats_update(tsdn, arena, usize);
 	arena_large_dalloc_stats_update(tsdn, arena, oldusize);
 }
 
 edata_t *
-arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
-    size_t alignment, bool zero) {
-	bool deferred_work_generated = false;
+arena_extent_alloc_large(
+    tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool zero) {
+	bool    deferred_work_generated = false;
 	szind_t szind = sz_size2index(usize);
-	size_t esize = usize + sz_large_pad;
+	size_t  esize = usize + sz_large_pad;
 
-	bool guarded = san_large_extent_decide_guard(tsdn,
-	    arena_get_ehooks(arena), esize, alignment);
+	bool guarded = san_large_extent_decide_guard(
+	    tsdn, arena_get_ehooks(arena), esize, alignment);
 
 	/*
 	 * - if usize >= opt_calloc_madvise_threshold,
@@ -406,7 +400,7 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	 * if cache_oblivious is enabled.
 	 */
 	if (zero && !zero_override && !edata_zeroed_get(edata)) {
-		void *addr = edata_addr_get(edata);
+		void  *addr = edata_addr_get(edata);
 		size_t usize = edata_usize_get(edata);
 		memset(addr, 0, usize);
 	}
@@ -417,14 +411,14 @@ arena_extent_alloc_large(tsdn_t *tsdn, arena_t *arena, size_t usize,
 void
 arena_extent_dalloc_large_prep(tsdn_t *tsdn, arena_t *arena, edata_t *edata) {
 	if (config_stats) {
-		arena_large_dalloc_stats_update(tsdn, arena,
-		    edata_usize_get(edata));
+		arena_large_dalloc_stats_update(
+		    tsdn, arena, edata_usize_get(edata));
 	}
 }
 
 void
-arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
-    size_t oldusize) {
+arena_extent_ralloc_large_shrink(
+    tsdn_t *tsdn, arena_t *arena, edata_t *edata, size_t oldusize) {
 	size_t usize = edata_usize_get(edata);
 
 	if (config_stats) {
@@ -433,8 +427,8 @@ arena_extent_ralloc_large_shrink(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
 }
 
 void
-arena_extent_ralloc_large_expand(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
-    size_t oldusize) {
+arena_extent_ralloc_large_expand(
+    tsdn_t *tsdn, arena_t *arena, edata_t *edata, size_t oldusize) {
 	size_t usize = edata_usize_get(edata);
 
 	if (config_stats) {
@@ -459,12 +453,12 @@ arena_decide_unforced_purge_eagerness(bool is_background_thread) {
 }
 
 bool
-arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, extent_state_t state,
-    ssize_t decay_ms) {
+arena_decay_ms_set(
+    tsdn_t *tsdn, arena_t *arena, extent_state_t state, ssize_t decay_ms) {
 	pac_purge_eagerness_t eagerness = arena_decide_unforced_purge_eagerness(
 	    /* is_background_thread */ false);
-	return pa_decay_ms_set(tsdn, &arena->pa_shard, state, decay_ms,
-	    eagerness);
+	return pa_decay_ms_set(
+	    tsdn, &arena->pa_shard, state, decay_ms, eagerness);
 }
 
 ssize_t
@@ -474,8 +468,8 @@ arena_decay_ms_get(arena_t *arena, extent_state_t state) {
 
 static bool
 arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
-    pac_decay_stats_t *decay_stats, ecache_t *ecache,
-    bool is_background_thread, bool all) {
+    pac_decay_stats_t *decay_stats, ecache_t *ecache, bool is_background_thread,
+    bool all) {
 	if (all) {
 		malloc_mutex_lock(tsdn, &decay->mtx);
 		pac_decay_all(tsdn, &arena->pa_shard.pac, decay, decay_stats,
@@ -488,10 +482,10 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 		/* No need to wait if another thread is in progress. */
 		return true;
 	}
-	pac_purge_eagerness_t eagerness =
-	    arena_decide_unforced_purge_eagerness(is_background_thread);
-	bool epoch_advanced = pac_maybe_decay_purge(tsdn, &arena->pa_shard.pac,
-	    decay, decay_stats, ecache, eagerness);
+	pac_purge_eagerness_t eagerness = arena_decide_unforced_purge_eagerness(
+	    is_background_thread);
+	bool epoch_advanced = pac_maybe_decay_purge(
+	    tsdn, &arena->pa_shard.pac, decay, decay_stats, ecache, eagerness);
 	size_t npages_new JEMALLOC_CLANG_ANALYZER_SILENCE_INIT(0);
 	if (epoch_advanced) {
 		/* Backlog is updated on epoch advance. */
@@ -499,8 +493,8 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	}
 	malloc_mutex_unlock(tsdn, &decay->mtx);
 
-	if (have_background_thread && background_thread_enabled() &&
-	    epoch_advanced && !is_background_thread) {
+	if (have_background_thread && background_thread_enabled()
+	    && epoch_advanced && !is_background_thread) {
 		arena_maybe_do_deferred_work(tsdn, arena, decay, npages_new);
 	}
 
@@ -508,16 +502,16 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 }
 
 static bool
-arena_decay_dirty(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
-    bool all) {
+arena_decay_dirty(
+    tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) {
 	return arena_decay_impl(tsdn, arena, &arena->pa_shard.pac.decay_dirty,
 	    &arena->pa_shard.pac.stats->decay_dirty,
 	    &arena->pa_shard.pac.ecache_dirty, is_background_thread, all);
 }
 
 static bool
-arena_decay_muzzy(tsdn_t *tsdn, arena_t *arena, bool is_background_thread,
-    bool all) {
+arena_decay_muzzy(
+    tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) {
 	if (pa_shard_dont_decay_muzzy(&arena->pa_shard)) {
 		return false;
 	}
@@ -564,13 +558,13 @@ arena_should_decay_early(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	}
 	nstime_subtract(remaining_sleep, &decay->epoch);
 	if (npages_new > 0) {
-		uint64_t npurge_new = decay_npages_purge_in(decay,
-		    remaining_sleep, npages_new);
+		uint64_t npurge_new = decay_npages_purge_in(
+		    decay, remaining_sleep, npages_new);
 		info->npages_to_purge_new += npurge_new;
 	}
 	malloc_mutex_unlock(tsdn, &decay->mtx);
-	return info->npages_to_purge_new >
-	    ARENA_DEFERRED_PURGE_NPAGES_THRESHOLD;
+	return info->npages_to_purge_new
+	    > ARENA_DEFERRED_PURGE_NPAGES_THRESHOLD;
 }
 
 /*
@@ -582,8 +576,8 @@ arena_should_decay_early(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
  * deferred work has been generated.
  */
 static void
-arena_maybe_do_deferred_work(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
-    size_t npages_new) {
+arena_maybe_do_deferred_work(
+    tsdn_t *tsdn, arena_t *arena, decay_t *decay, size_t npages_new) {
 	background_thread_info_t *info = arena_background_thread_info_get(
 	    arena);
 	if (malloc_mutex_trylock(tsdn, &info->mtx)) {
@@ -603,7 +597,7 @@ arena_maybe_do_deferred_work(tsdn_t *tsdn, arena_t *arena, decay_t *decay,
 	if (background_thread_indefinite_sleep(info)) {
 		background_thread_wakeup_early(info, NULL);
 	} else if (arena_should_decay_early(tsdn, arena, decay, info,
-	    &remaining_sleep, npages_new)) {
+	               &remaining_sleep, npages_new)) {
 		info->npages_to_purge_new = 0;
 		background_thread_wakeup_early(info, &remaining_sleep);
 	}
@@ -687,8 +681,8 @@ arena_bin_reset(tsd_t *tsd, arena_t *arena, bin_t *bin, unsigned binind) {
 
 	if (arena_bin_has_batch(binind)) {
 		bin_with_batch_t *batched_bin = (bin_with_batch_t *)bin;
-		batcher_init(&batched_bin->remote_frees,
-		    BIN_REMOTE_FREE_ELEMS_MAX);
+		batcher_init(
+		    &batched_bin->remote_frees, BIN_REMOTE_FREE_ELEMS_MAX);
 	}
 
 	if (bin->slabcur != NULL) {
@@ -743,8 +737,8 @@ arena_prof_demote(tsdn_t *tsdn, edata_t *edata, const void *ptr) {
 	assert(ptr != NULL);
 	size_t usize = isalloc(tsdn, ptr);
 	size_t bumped_usize = sz_sa2u(usize, PROF_SAMPLE_ALIGNMENT);
-	assert(bumped_usize <= SC_LARGE_MINCLASS &&
-	    PAGE_CEILING(bumped_usize) == bumped_usize);
+	assert(bumped_usize <= SC_LARGE_MINCLASS
+	    && PAGE_CEILING(bumped_usize) == bumped_usize);
 	assert(edata_size_get(edata) - bumped_usize <= sz_large_pad);
 	szind_t szind = sz_size2index(bumped_usize);
 
@@ -757,8 +751,8 @@ arena_prof_demote(tsdn_t *tsdn, edata_t *edata, const void *ptr) {
 }
 
 static void
-arena_dalloc_promoted_impl(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
-    bool slow_path, edata_t *edata) {
+arena_dalloc_promoted_impl(
+    tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path, edata_t *edata) {
 	cassert(config_prof);
 	assert(opt_prof);
 
@@ -772,20 +766,20 @@ arena_dalloc_promoted_impl(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
 		safety_check_verify_redzone(ptr, usize, bumped_usize);
 	}
 	szind_t bumped_ind = sz_size2index(bumped_usize);
-	if (bumped_usize >= SC_LARGE_MINCLASS &&
-	    tcache != NULL && bumped_ind < TCACHE_NBINS_MAX &&
-	    !tcache_bin_disabled(bumped_ind, &tcache->bins[bumped_ind],
-	    tcache->tcache_slow)) {
-		tcache_dalloc_large(tsdn_tsd(tsdn), tcache, ptr, bumped_ind,
-		    slow_path);
+	if (bumped_usize >= SC_LARGE_MINCLASS && tcache != NULL
+	    && bumped_ind < TCACHE_NBINS_MAX
+	    && !tcache_bin_disabled(
+	        bumped_ind, &tcache->bins[bumped_ind], tcache->tcache_slow)) {
+		tcache_dalloc_large(
+		    tsdn_tsd(tsdn), tcache, ptr, bumped_ind, slow_path);
 	} else {
 		large_dalloc(tsdn, edata);
 	}
 }
 
 void
-arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache,
-    bool slow_path) {
+arena_dalloc_promoted(
+    tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path) {
 	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
 	arena_dalloc_promoted_impl(tsdn, ptr, tcache, slow_path, edata);
 }
@@ -810,14 +804,14 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 	malloc_mutex_lock(tsd_tsdn(tsd), &arena->large_mtx);
 
 	for (edata_t *edata = edata_list_active_first(&arena->large);
-	    edata != NULL; edata = edata_list_active_first(&arena->large)) {
-		void *ptr = edata_base_get(edata);
+	     edata != NULL; edata = edata_list_active_first(&arena->large)) {
+		void  *ptr = edata_base_get(edata);
 		size_t usize;
 
 		malloc_mutex_unlock(tsd_tsdn(tsd), &arena->large_mtx);
 		emap_alloc_ctx_t alloc_ctx;
-		emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
-		    &alloc_ctx);
+		emap_alloc_ctx_lookup(
+		    tsd_tsdn(tsd), &arena_emap_global, ptr, &alloc_ctx);
 		assert(alloc_ctx.szind != SC_NSIZES);
 
 		if (config_stats || (config_prof && opt_prof)) {
@@ -841,16 +835,16 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 	/* Bins. */
 	for (unsigned i = 0; i < SC_NBINS; i++) {
 		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
-			arena_bin_reset(tsd, arena, arena_get_bin(arena, i, j),
-			    i);
+			arena_bin_reset(
+			    tsd, arena, arena_get_bin(arena, i, j), i);
 		}
 	}
 	pa_shard_reset(tsd_tsdn(tsd), &arena->pa_shard);
 }
 
 static void
-arena_prepare_base_deletion_sync_finish(tsd_t *tsd, malloc_mutex_t **mutexes,
-    unsigned n_mtx) {
+arena_prepare_base_deletion_sync_finish(
+    tsd_t *tsd, malloc_mutex_t **mutexes, unsigned n_mtx) {
 	for (unsigned i = 0; i < n_mtx; i++) {
 		malloc_mutex_lock(tsd_tsdn(tsd), mutexes[i]);
 		malloc_mutex_unlock(tsd_tsdn(tsd), mutexes[i]);
@@ -909,9 +903,9 @@ arena_prepare_base_deletion(tsd_t *tsd, base_t *base_to_destroy) {
 	unsigned destroy_ind = base_ind_get(base_to_destroy);
 	assert(destroy_ind >= manual_arena_base);
 
-	tsdn_t *tsdn = tsd_tsdn(tsd);
+	tsdn_t         *tsdn = tsd_tsdn(tsd);
 	malloc_mutex_t *delayed_mtx[ARENA_DESTROY_MAX_DELAYED_MTX];
-	unsigned n_delayed = 0, total = narenas_total_get();
+	unsigned        n_delayed = 0, total = narenas_total_get();
 	for (unsigned i = 0; i < total; i++) {
 		if (i == destroy_ind) {
 			continue;
@@ -921,12 +915,12 @@ arena_prepare_base_deletion(tsd_t *tsd, base_t *base_to_destroy) {
 			continue;
 		}
 		pac_t *pac = &arena->pa_shard.pac;
-		arena_prepare_base_deletion_sync(tsd, &pac->ecache_dirty.mtx,
-		    delayed_mtx, &n_delayed);
-		arena_prepare_base_deletion_sync(tsd, &pac->ecache_muzzy.mtx,
-		    delayed_mtx, &n_delayed);
-		arena_prepare_base_deletion_sync(tsd, &pac->ecache_retained.mtx,
-		    delayed_mtx, &n_delayed);
+		arena_prepare_base_deletion_sync(
+		    tsd, &pac->ecache_dirty.mtx, delayed_mtx, &n_delayed);
+		arena_prepare_base_deletion_sync(
+		    tsd, &pac->ecache_muzzy.mtx, delayed_mtx, &n_delayed);
+		arena_prepare_base_deletion_sync(
+		    tsd, &pac->ecache_retained.mtx, delayed_mtx, &n_delayed);
 	}
 	arena_prepare_base_deletion_sync_finish(tsd, delayed_mtx, n_delayed);
 }
@@ -968,17 +962,17 @@ arena_destroy(tsd_t *tsd, arena_t *arena) {
 }
 
 static edata_t *
-arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned binshard,
-    const bin_info_t *bin_info) {
+arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind,
+    unsigned binshard, const bin_info_t *bin_info) {
 	bool deferred_work_generated = false;
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
-	bool guarded = san_slab_extent_decide_guard(tsdn,
-	    arena_get_ehooks(arena));
+	bool guarded = san_slab_extent_decide_guard(
+	    tsdn, arena_get_ehooks(arena));
 	edata_t *slab = pa_alloc(tsdn, &arena->pa_shard, bin_info->slab_size,
 	    /* alignment */ PAGE, /* slab */ true, /* szind */ binind,
-	     /* zero */ false, guarded, &deferred_work_generated);
+	    /* zero */ false, guarded, &deferred_work_generated);
 
 	if (deferred_work_generated) {
 		arena_handle_deferred_work(tsdn, arena);
@@ -1024,15 +1018,15 @@ static void *
 arena_bin_malloc_with_fresh_slab(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
     szind_t binind, edata_t *fresh_slab) {
 	malloc_mutex_assert_owner(tsdn, &bin->lock);
-	arena_bin_refill_slabcur_with_fresh_slab(tsdn, arena, bin, binind,
-	    fresh_slab);
+	arena_bin_refill_slabcur_with_fresh_slab(
+	    tsdn, arena, bin, binind, fresh_slab);
 
 	return arena_slab_reg_alloc(bin->slabcur, &bin_infos[binind]);
 }
 
 static bool
-arena_bin_refill_slabcur_no_fresh_slab(tsdn_t *tsdn, arena_t *arena,
-    bin_t *bin) {
+arena_bin_refill_slabcur_no_fresh_slab(
+    tsdn_t *tsdn, arena_t *arena, bin_t *bin) {
 	malloc_mutex_assert_owner(tsdn, &bin->lock);
 	/* Only called after arena_slab_reg_alloc[_batch] failed. */
 	assert(bin->slabcur == NULL || edata_nfree_get(bin->slabcur) == 0);
@@ -1049,8 +1043,8 @@ arena_bin_refill_slabcur_no_fresh_slab(tsdn_t *tsdn, arena_t *arena,
 }
 
 bin_t *
-arena_bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
-    unsigned *binshard_p) {
+arena_bin_choose(
+    tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned *binshard_p) {
 	unsigned binshard;
 	if (tsdn_null(tsdn) || tsd_arena_get(tsdn_tsd(tsdn)) == NULL) {
 		binshard = 0;
@@ -1065,8 +1059,8 @@ arena_bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 }
 
 void
-arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
-    cache_bin_t *cache_bin, szind_t binind, const cache_bin_sz_t nfill_min,
+arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena, cache_bin_t *cache_bin,
+    szind_t binind, const cache_bin_sz_t nfill_min,
     const cache_bin_sz_t nfill_max) {
 	assert(cache_bin_ncached_get_local(cache_bin) == 0);
 	assert(nfill_min > 0 && nfill_min <= nfill_max);
@@ -1102,12 +1096,12 @@ arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
 	 * local exhausted, b) unlock and slab_alloc returns null, c) re-lock
 	 * and bin local fails again.
 	 */
-	bool made_progress = true;
-	edata_t *fresh_slab = NULL;
-	bool alloc_and_retry = false;
+	bool           made_progress = true;
+	edata_t       *fresh_slab = NULL;
+	bool           alloc_and_retry = false;
 	cache_bin_sz_t filled = 0;
-	unsigned binshard;
-	bin_t *bin = arena_bin_choose(tsdn, arena, binind, &binshard);
+	unsigned       binshard;
+	bin_t         *bin = arena_bin_choose(tsdn, arena, binind, &binshard);
 
 	/*
 	 * This has some fields that are conditionally initialized down batch
@@ -1120,7 +1114,8 @@ arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
 	    JEMALLOC_CLANG_ANALYZER_SILENCE_INIT({0});
 label_refill:
 	malloc_mutex_lock(tsdn, &bin->lock);
-	arena_bin_flush_batch_after_lock(tsdn, arena, bin, binind, &batch_flush_state);
+	arena_bin_flush_batch_after_lock(
+	    tsdn, arena, bin, binind, &batch_flush_state);
 
 	while (filled < nfill_min) {
 		/* Try batch-fill from slabcur first. */
@@ -1136,8 +1131,8 @@ label_refill:
 				cnt = nfill_min - filled;
 			}
 
-			arena_slab_reg_alloc_batch(slabcur, bin_info, cnt,
-			    &ptrs.ptr[filled]);
+			arena_slab_reg_alloc_batch(
+			    slabcur, bin_info, cnt, &ptrs.ptr[filled]);
 			made_progress = true;
 			filled += cnt;
 			continue;
@@ -1150,8 +1145,8 @@ label_refill:
 
 		/* Then see if a new slab was reserved already. */
 		if (fresh_slab != NULL) {
-			arena_bin_refill_slabcur_with_fresh_slab(tsdn, arena,
-			    bin, binind, fresh_slab);
+			arena_bin_refill_slabcur_with_fresh_slab(
+			    tsdn, arena, bin, binind, fresh_slab);
 			assert(bin->slabcur != NULL);
 			fresh_slab = NULL;
 			continue;
@@ -1181,27 +1176,27 @@ label_refill:
 		cache_bin->tstats.nrequests = 0;
 	}
 
-	arena_bin_flush_batch_before_unlock(tsdn, arena, bin, binind,
-	    &batch_flush_state);
+	arena_bin_flush_batch_before_unlock(
+	    tsdn, arena, bin, binind, &batch_flush_state);
 	malloc_mutex_unlock(tsdn, &bin->lock);
-	arena_bin_flush_batch_after_unlock(tsdn, arena, bin, binind,
-	    &batch_flush_state);
+	arena_bin_flush_batch_after_unlock(
+	    tsdn, arena, bin, binind, &batch_flush_state);
 
 	if (alloc_and_retry) {
 		assert(fresh_slab == NULL);
 		assert(filled < nfill_min);
 		assert(made_progress);
 
-		fresh_slab = arena_slab_alloc(tsdn, arena, binind, binshard,
-		    bin_info);
+		fresh_slab = arena_slab_alloc(
+		    tsdn, arena, binind, binshard, bin_info);
 		/* fresh_slab NULL case handled in the for loop. */
 
 		alloc_and_retry = false;
 		made_progress = false;
 		goto label_refill;
 	}
-	assert((filled >= nfill_min && filled <= nfill_max) ||
-	    (fresh_slab == NULL && !made_progress));
+	assert((filled >= nfill_min && filled <= nfill_max)
+	    || (fresh_slab == NULL && !made_progress));
 
 	/* Release if allocated but not used. */
 	if (fresh_slab != NULL) {
@@ -1219,22 +1214,24 @@ arena_fill_small_fresh(tsdn_t *tsdn, arena_t *arena, szind_t binind,
     void **ptrs, size_t nfill, bool zero) {
 	assert(binind < SC_NBINS);
 	const bin_info_t *bin_info = &bin_infos[binind];
-	const size_t nregs = bin_info->nregs;
+	const size_t      nregs = bin_info->nregs;
 	assert(nregs > 0);
 	const size_t usize = bin_info->reg_size;
 
 	const bool manual_arena = !arena_is_auto(arena);
-	unsigned binshard;
-	bin_t *bin = arena_bin_choose(tsdn, arena, binind, &binshard);
+	unsigned   binshard;
+	bin_t     *bin = arena_bin_choose(tsdn, arena, binind, &binshard);
 
-	size_t nslab = 0;
-	size_t filled = 0;
-	edata_t *slab = NULL;
+	size_t              nslab = 0;
+	size_t              filled = 0;
+	edata_t            *slab = NULL;
 	edata_list_active_t fulls;
 	edata_list_active_init(&fulls);
 
-	while (filled < nfill && (slab = arena_slab_alloc(tsdn, arena, binind,
-	    binshard, bin_info)) != NULL) {
+	while (filled < nfill
+	    && (slab = arena_slab_alloc(
+	            tsdn, arena, binind, binshard, bin_info))
+	        != NULL) {
 		assert((size_t)edata_nfree_get(slab) == nregs);
 		++nslab;
 		size_t batch = nfill - filled;
@@ -1242,8 +1239,8 @@ arena_fill_small_fresh(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 			batch = nregs;
 		}
 		assert(batch > 0);
-		arena_slab_reg_alloc_batch(slab, bin_info, (unsigned)batch,
-		    &ptrs[filled]);
+		arena_slab_reg_alloc_batch(
+		    slab, bin_info, (unsigned)batch, &ptrs[filled]);
 		assert(edata_addr_get(slab) == ptrs[filled]);
 		if (zero) {
 			memset(ptrs[filled], 0, batch * usize);
@@ -1287,8 +1284,8 @@ arena_fill_small_fresh(tsdn_t *tsdn, arena_t *arena, szind_t binind,
  * bin->slabcur if necessary.
  */
 static void *
-arena_bin_malloc_no_fresh_slab(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    szind_t binind) {
+arena_bin_malloc_no_fresh_slab(
+    tsdn_t *tsdn, arena_t *arena, bin_t *bin, szind_t binind) {
 	malloc_mutex_assert_owner(tsdn, &bin->lock);
 	if (bin->slabcur == NULL || edata_nfree_get(bin->slabcur) == 0) {
 		if (arena_bin_refill_slabcur_no_fresh_slab(tsdn, arena, bin)) {
@@ -1304,18 +1301,18 @@ static void *
 arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
 	assert(binind < SC_NBINS);
 	const bin_info_t *bin_info = &bin_infos[binind];
-	size_t usize = sz_index2size(binind);
-	unsigned binshard;
+	size_t            usize = sz_index2size(binind);
+	unsigned          binshard;
 	bin_t *bin = arena_bin_choose(tsdn, arena, binind, &binshard);
 
 	malloc_mutex_lock(tsdn, &bin->lock);
 	edata_t *fresh_slab = NULL;
-	void *ret = arena_bin_malloc_no_fresh_slab(tsdn, arena, bin, binind);
+	void    *ret = arena_bin_malloc_no_fresh_slab(tsdn, arena, bin, binind);
 	if (ret == NULL) {
 		malloc_mutex_unlock(tsdn, &bin->lock);
 		/******************************/
-		fresh_slab = arena_slab_alloc(tsdn, arena, binind, binshard,
-		    bin_info);
+		fresh_slab = arena_slab_alloc(
+		    tsdn, arena, binind, binshard, bin_info);
 		/********************************/
 		malloc_mutex_lock(tsdn, &bin->lock);
 		/* Retry since the lock was dropped. */
@@ -1326,8 +1323,8 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
 				malloc_mutex_unlock(tsdn, &bin->lock);
 				return NULL;
 			}
-			ret = arena_bin_malloc_with_fresh_slab(tsdn, arena, bin,
-			    binind, fresh_slab);
+			ret = arena_bin_malloc_with_fresh_slab(
+			    tsdn, arena, bin, binind, fresh_slab);
 			fresh_slab = NULL;
 		}
 	}
@@ -1390,7 +1387,8 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 		if (likely(alignment <= CACHELINE)) {
 			return large_malloc(tsdn, arena, usize, zero);
 		} else {
-			return large_palloc(tsdn, arena, usize, alignment, zero);
+			return large_palloc(
+			    tsdn, arena, usize, alignment, zero);
 		}
 	}
 }
@@ -1401,7 +1399,7 @@ arena_dissociate_bin_slab(arena_t *arena, edata_t *slab, bin_t *bin) {
 	if (slab == bin->slabcur) {
 		bin->slabcur = NULL;
 	} else {
-		szind_t binind = edata_szind_get(slab);
+		szind_t           binind = edata_szind_get(slab);
 		const bin_info_t *bin_info = &bin_infos[binind];
 
 		/*
@@ -1418,8 +1416,7 @@ arena_dissociate_bin_slab(arena_t *arena, edata_t *slab, bin_t *bin) {
 }
 
 static void
-arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, edata_t *slab,
-    bin_t *bin) {
+arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, edata_t *slab, bin_t *bin) {
 	assert(edata_nfree_get(slab) > 0);
 
 	/*
@@ -1455,24 +1452,24 @@ arena_dalloc_bin_slab_prepare(tsdn_t *tsdn, edata_t *slab, bin_t *bin) {
 }
 
 void
-arena_dalloc_bin_locked_handle_newly_empty(tsdn_t *tsdn, arena_t *arena,
-    edata_t *slab, bin_t *bin) {
+arena_dalloc_bin_locked_handle_newly_empty(
+    tsdn_t *tsdn, arena_t *arena, edata_t *slab, bin_t *bin) {
 	arena_dissociate_bin_slab(arena, slab, bin);
 	arena_dalloc_bin_slab_prepare(tsdn, slab, bin);
 }
 
 void
-arena_dalloc_bin_locked_handle_newly_nonempty(tsdn_t *tsdn, arena_t *arena,
-    edata_t *slab, bin_t *bin) {
+arena_dalloc_bin_locked_handle_newly_nonempty(
+    tsdn_t *tsdn, arena_t *arena, edata_t *slab, bin_t *bin) {
 	arena_bin_slabs_full_remove(arena, bin, slab);
 	arena_bin_lower_slab(tsdn, arena, slab, bin);
 }
 
 static void
 arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, edata_t *edata, void *ptr) {
-	szind_t binind = edata_szind_get(edata);
+	szind_t  binind = edata_szind_get(edata);
 	unsigned binshard = edata_binshard_get(edata);
-	bin_t *bin = arena_get_bin(arena, binind, binshard);
+	bin_t   *bin = arena_get_bin(arena, binind, binshard);
 
 	malloc_mutex_lock(tsdn, &bin->lock);
 	arena_dalloc_bin_locked_info_t info;
@@ -1515,16 +1512,15 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 
 	size_t usize_min = sz_s2u(size);
 	size_t usize_max = sz_s2u(size + extra);
-	if (likely(oldsize <= SC_SMALL_MAXCLASS && usize_min
-	    <= SC_SMALL_MAXCLASS)) {
+	if (likely(oldsize <= SC_SMALL_MAXCLASS
+	        && usize_min <= SC_SMALL_MAXCLASS)) {
 		/*
 		 * Avoid moving the allocation if the size class can be left the
 		 * same.
 		 */
-		assert(bin_infos[sz_size2index(oldsize)].reg_size ==
-		    oldsize);
+		assert(bin_infos[sz_size2index(oldsize)].reg_size == oldsize);
 		if ((usize_max > SC_SMALL_MAXCLASS
-		    || sz_size2index(usize_max) != sz_size2index(oldsize))
+		        || sz_size2index(usize_max) != sz_size2index(oldsize))
 		    && (size > oldsize || usize_max < oldsize)) {
 			ret = true;
 			goto done;
@@ -1535,8 +1531,8 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
 		ret = false;
 	} else if (oldsize >= SC_LARGE_MINCLASS
 	    && usize_max >= SC_LARGE_MINCLASS) {
-		ret = large_ralloc_no_move(tsdn, edata, usize_min, usize_max,
-		    zero);
+		ret = large_ralloc_no_move(
+		    tsdn, edata, usize_min, usize_max, zero);
 	} else {
 		ret = true;
 	}
@@ -1558,8 +1554,8 @@ arena_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
 	if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
 		return NULL;
 	}
-	return ipalloct_explicit_slab(tsdn, usize, alignment, zero, slab,
-	    tcache, arena);
+	return ipalloct_explicit_slab(
+	    tsdn, usize, alignment, zero, slab, tcache, arena);
 }
 
 void *
@@ -1575,37 +1571,38 @@ arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
 		assert(sz_can_use_slab(usize));
 		/* Try to avoid moving the allocation. */
 		UNUSED size_t newsize;
-		if (!arena_ralloc_no_move(tsdn, ptr, oldsize, usize, 0, zero,
-		    &newsize)) {
+		if (!arena_ralloc_no_move(
+		        tsdn, ptr, oldsize, usize, 0, zero, &newsize)) {
 			hook_invoke_expand(hook_args->is_realloc
-			    ? hook_expand_realloc : hook_expand_rallocx,
+			        ? hook_expand_realloc
+			        : hook_expand_rallocx,
 			    ptr, oldsize, usize, (uintptr_t)ptr,
 			    hook_args->args);
 			return ptr;
 		}
 	}
 
-	if (oldsize >= SC_LARGE_MINCLASS
-	    && usize >= SC_LARGE_MINCLASS) {
-		return large_ralloc(tsdn, arena, ptr, usize,
-		    alignment, zero, tcache, hook_args);
+	if (oldsize >= SC_LARGE_MINCLASS && usize >= SC_LARGE_MINCLASS) {
+		return large_ralloc(tsdn, arena, ptr, usize, alignment, zero,
+		    tcache, hook_args);
 	}
 
 	/*
 	 * size and oldsize are different enough that we need to move the
 	 * object.  In that case, fall back to allocating new space and copying.
 	 */
-	void *ret = arena_ralloc_move_helper(tsdn, arena, usize, alignment,
-	    zero, slab, tcache);
+	void *ret = arena_ralloc_move_helper(
+	    tsdn, arena, usize, alignment, zero, slab, tcache);
 	if (ret == NULL) {
 		return NULL;
 	}
 
-	hook_invoke_alloc(hook_args->is_realloc
-	    ? hook_alloc_realloc : hook_alloc_rallocx, ret, (uintptr_t)ret,
-	    hook_args->args);
-	hook_invoke_dalloc(hook_args->is_realloc
-	    ? hook_dalloc_realloc : hook_dalloc_rallocx, ptr, hook_args->args);
+	hook_invoke_alloc(
+	    hook_args->is_realloc ? hook_alloc_realloc : hook_alloc_rallocx,
+	    ret, (uintptr_t)ret, hook_args->args);
+	hook_invoke_dalloc(
+	    hook_args->is_realloc ? hook_dalloc_realloc : hook_dalloc_rallocx,
+	    ptr, hook_args->args);
 
 	/*
 	 * Junk/zero-filling were already done by
@@ -1623,8 +1620,8 @@ arena_get_ehooks(arena_t *arena) {
 }
 
 extent_hooks_t *
-arena_set_extent_hooks(tsd_t *tsd, arena_t *arena,
-    extent_hooks_t *extent_hooks) {
+arena_set_extent_hooks(
+    tsd_t *tsd, arena_t *arena, extent_hooks_t *extent_hooks) {
 	background_thread_info_t *info;
 	if (have_background_thread) {
 		info = arena_background_thread_info_get(arena);
@@ -1699,11 +1696,11 @@ arena_muzzy_decay_ms_default_set(ssize_t decay_ms) {
 }
 
 bool
-arena_retain_grow_limit_get_set(tsd_t *tsd, arena_t *arena, size_t *old_limit,
-    size_t *new_limit) {
+arena_retain_grow_limit_get_set(
+    tsd_t *tsd, arena_t *arena, size_t *old_limit, size_t *new_limit) {
 	assert(opt_retain);
-	return pac_retain_grow_limit_get_set(tsd_tsdn(tsd),
-	    &arena->pa_shard.pac, old_limit, new_limit);
+	return pac_retain_grow_limit_get_set(
+	    tsd_tsdn(tsd), &arena->pa_shard.pac, old_limit, new_limit);
 }
 
 unsigned
@@ -1724,7 +1721,7 @@ arena_nthreads_dec(arena_t *arena, bool internal) {
 arena_t *
 arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 	arena_t *arena;
-	base_t *base;
+	base_t  *base;
 
 	if (ind == 0) {
 		base = b0get();
@@ -1736,8 +1733,8 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 		}
 	}
 
-	size_t arena_size = ALIGNMENT_CEILING(sizeof(arena_t), CACHELINE) +
-	    sizeof(bin_with_batch_t) * bin_info_nbatched_bins
+	size_t arena_size = ALIGNMENT_CEILING(sizeof(arena_t), CACHELINE)
+	    + sizeof(bin_with_batch_t) * bin_info_nbatched_bins
 	    + sizeof(bin_t) * bin_info_nunbatched_bins;
 	arena = (arena_t *)base_alloc(tsdn, base, arena_size, CACHELINE);
 	if (arena == NULL) {
@@ -1756,27 +1753,27 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 		ql_new(&arena->tcache_ql);
 		ql_new(&arena->cache_bin_array_descriptor_ql);
 		if (malloc_mutex_init(&arena->tcache_ql_mtx, "tcache_ql",
-		    WITNESS_RANK_TCACHE_QL, malloc_mutex_rank_exclusive)) {
+		        WITNESS_RANK_TCACHE_QL, malloc_mutex_rank_exclusive)) {
 			goto label_error;
 		}
 	}
 
-	atomic_store_u(&arena->dss_prec, (unsigned)extent_dss_prec_get(),
-	    ATOMIC_RELAXED);
+	atomic_store_u(
+	    &arena->dss_prec, (unsigned)extent_dss_prec_get(), ATOMIC_RELAXED);
 
 	edata_list_active_init(&arena->large);
 	if (malloc_mutex_init(&arena->large_mtx, "arena_large",
-	    WITNESS_RANK_ARENA_LARGE, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_ARENA_LARGE, malloc_mutex_rank_exclusive)) {
 		goto label_error;
 	}
 
 	nstime_t cur_time;
 	nstime_init_update(&cur_time);
 	if (pa_shard_init(tsdn, &arena->pa_shard, &arena_pa_central_global,
-	    &arena_emap_global, base, ind, &arena->stats.pa_shard_stats,
-	    LOCKEDINT_MTX(arena->stats.mtx), &cur_time, oversize_threshold,
-	    arena_dirty_decay_ms_default_get(),
-	    arena_muzzy_decay_ms_default_get())) {
+	        &arena_emap_global, base, ind, &arena->stats.pa_shard_stats,
+	        LOCKEDINT_MTX(arena->stats.mtx), &cur_time, oversize_threshold,
+	        arena_dirty_decay_ms_default_get(),
+	        arena_muzzy_decay_ms_default_get())) {
 		goto label_error;
 	}
 
@@ -1785,7 +1782,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 	for (unsigned i = 0; i < SC_NBINS; i++) {
 		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
 			bin_t *bin = arena_get_bin(arena, i, j);
-			bool err = bin_init(bin, i);
+			bool   err = bin_init(bin, i);
 			if (err) {
 				goto label_error;
 			}
@@ -1814,8 +1811,8 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 	if (opt_hpa && ehooks_are_default(base_ehooks_get(base)) && ind != 0) {
 		hpa_shard_opts_t hpa_shard_opts = opt_hpa_opts;
 		hpa_shard_opts.deferral_allowed = background_thread_enabled();
-		if (pa_shard_enable_hpa(tsdn, &arena->pa_shard,
-		    &hpa_shard_opts, &opt_hpa_sec_opts)) {
+		if (pa_shard_enable_hpa(tsdn, &arena->pa_shard, &hpa_shard_opts,
+		        &opt_hpa_sec_opts)) {
 			goto label_error;
 		}
 	}
@@ -1866,13 +1863,13 @@ arena_create_huge_arena(tsd_t *tsd, unsigned ind) {
 	 */
 	if (!background_thread_enabled()
 	    && arena_dirty_decay_ms_default_get() > 0) {
-		arena_decay_ms_set(tsd_tsdn(tsd), huge_arena,
-		    extent_state_dirty, 0);
+		arena_decay_ms_set(
+		    tsd_tsdn(tsd), huge_arena, extent_state_dirty, 0);
 	}
 	if (!background_thread_enabled()
-	    &&arena_muzzy_decay_ms_default_get() > 0) {
-		arena_decay_ms_set(tsd_tsdn(tsd), huge_arena,
-		    extent_state_muzzy, 0);
+	    && arena_muzzy_decay_ms_default_get() > 0) {
+		arena_decay_ms_set(
+		    tsd_tsdn(tsd), huge_arena, extent_state_muzzy, 0);
 	}
 
 	return huge_arena;
@@ -1900,8 +1897,8 @@ arena_init_huge(tsdn_t *tsdn, arena_t *a0) {
 	assert(huge_arena_ind == 0);
 
 	/* The threshold should be large size class. */
-	if (opt_oversize_threshold > SC_LARGE_MAXCLASS ||
-	    opt_oversize_threshold < SC_LARGE_MINCLASS) {
+	if (opt_oversize_threshold > SC_LARGE_MAXCLASS
+	    || opt_oversize_threshold < SC_LARGE_MINCLASS) {
 		opt_oversize_threshold = 0;
 		oversize_threshold = SC_LARGE_MAXCLASS + PAGE;
 		huge_enabled = false;
@@ -1917,10 +1914,11 @@ arena_init_huge(tsdn_t *tsdn, arena_t *a0) {
 		base_t *b0 = a0->base;
 		/* Make sure that b0 thp auto-switch won't happen concurrently here. */
 		malloc_mutex_lock(tsdn, &b0->mtx);
-		(&huge_arena_pac_thp)->thp_madvise = opt_huge_arena_pac_thp &&
-		    metadata_thp_enabled() && (opt_thp == thp_mode_default) &&
-		    (init_system_thp_mode == thp_mode_default);
-		(&huge_arena_pac_thp)->auto_thp_switched = b0->auto_thp_switched;
+		(&huge_arena_pac_thp)->thp_madvise = opt_huge_arena_pac_thp
+		    && metadata_thp_enabled() && (opt_thp == thp_mode_default)
+		    && (init_system_thp_mode == thp_mode_default);
+		(&huge_arena_pac_thp)->auto_thp_switched =
+		    b0->auto_thp_switched;
 		malloc_mutex_init(&(&huge_arena_pac_thp)->lock, "pac_thp",
 		    WITNESS_RANK_LEAF, malloc_mutex_rank_exclusive);
 		edata_list_active_init(&(&huge_arena_pac_thp)->thp_lazy_list);
@@ -1942,16 +1940,16 @@ arena_boot(sc_data_t *sc_data, base_t *base, bool hpa) {
 	}
 
 	JEMALLOC_SUPPRESS_WARN_ON_USAGE(
-	uint32_t cur_offset = (uint32_t)offsetof(arena_t, all_bins);
-	)
+	    uint32_t cur_offset = (uint32_t)offsetof(arena_t, all_bins);)
 	for (szind_t i = 0; i < SC_NBINS; i++) {
 		arena_bin_offsets[i] = cur_offset;
 		uint32_t bin_sz = (i < bin_info_nbatched_sizes
-		    ? sizeof(bin_with_batch_t) : sizeof(bin_t));
+		        ? sizeof(bin_with_batch_t)
+		        : sizeof(bin_t));
 		cur_offset += (uint32_t)bin_infos[i].n_shards * bin_sz;
 	}
-	return pa_central_init(&arena_pa_central_global, base, hpa,
-	    &hpa_hooks_default);
+	return pa_central_init(
+	    &arena_pa_central_global, base, hpa, &hpa_hooks_default);
 }
 
 void
diff --git a/src/background_thread.c b/src/background_thread.c
index 511febac..2eb08dd2 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -11,15 +11,15 @@ JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
 /* This option should be opt-in only. */
 #define BACKGROUND_THREAD_DEFAULT false
 /* Read-only after initialization. */
-bool opt_background_thread = BACKGROUND_THREAD_DEFAULT;
+bool   opt_background_thread = BACKGROUND_THREAD_DEFAULT;
 size_t opt_max_background_threads = MAX_BACKGROUND_THREAD_LIMIT + 1;
 
 /* Used for thread creation, termination and stats. */
 malloc_mutex_t background_thread_lock;
 /* Indicates global state.  Atomic because decay reads this w/o locking. */
 atomic_b_t background_thread_enabled_state;
-size_t n_background_threads;
-size_t max_background_threads;
+size_t     n_background_threads;
+size_t     max_background_threads;
 /* Thread info per-index. */
 background_thread_info_t *background_thread_info;
 
@@ -32,11 +32,11 @@ static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
 
 static void
 pthread_create_wrapper_init(void) {
-#ifdef JEMALLOC_LAZY_LOCK
+#	ifdef JEMALLOC_LAZY_LOCK
 	if (!isthreaded) {
 		isthreaded = true;
 	}
-#endif
+#	endif
 }
 
 int
@@ -47,9 +47,9 @@ pthread_create_wrapper(pthread_t *__restrict thread, const pthread_attr_t *attr,
 	return pthread_create_fptr(thread, attr, start_routine, arg);
 }
 
-#ifdef JEMALLOC_HAVE_DLSYM
-#include <dlfcn.h>
-#endif
+#	ifdef JEMALLOC_HAVE_DLSYM
+#		include <dlfcn.h>
+#	endif
 
 static bool
 pthread_create_fptr_init(void) {
@@ -61,17 +61,18 @@ pthread_create_fptr_init(void) {
 	 * wrapper for pthread_create; and 2) application may define its own
 	 * wrapper as well (and can call malloc within the wrapper).
 	 */
-#ifdef JEMALLOC_HAVE_DLSYM
+#	ifdef JEMALLOC_HAVE_DLSYM
 	pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
 	if (pthread_create_fptr == NULL) {
 		pthread_create_fptr = dlsym(RTLD_DEFAULT, "pthread_create");
 	}
-#else
+#	else
 	pthread_create_fptr = NULL;
-#endif
+#	endif
 	if (pthread_create_fptr == NULL) {
 		if (config_lazy_lock) {
-			malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
+			malloc_write(
+			    "<jemalloc>: Error in dlsym(RTLD_NEXT, "
 			    "\"pthread_create\")\n");
 			abort();
 		} else {
@@ -85,21 +86,24 @@ pthread_create_fptr_init(void) {
 #endif /* JEMALLOC_PTHREAD_CREATE_WRAPPER */
 
 #ifndef JEMALLOC_BACKGROUND_THREAD
-#define NOT_REACHED { not_reached(); }
-bool background_thread_create(tsd_t *tsd, unsigned arena_ind) NOT_REACHED
-bool background_threads_enable(tsd_t *tsd) NOT_REACHED
-bool background_threads_disable(tsd_t *tsd) NOT_REACHED
-bool background_thread_is_started(background_thread_info_t *info) NOT_REACHED
-void background_thread_wakeup_early(background_thread_info_t *info,
-    nstime_t *remaining_sleep) NOT_REACHED
-void background_thread_prefork0(tsdn_t *tsdn) NOT_REACHED
-void background_thread_prefork1(tsdn_t *tsdn) NOT_REACHED
-void background_thread_postfork_parent(tsdn_t *tsdn) NOT_REACHED
-void background_thread_postfork_child(tsdn_t *tsdn) NOT_REACHED
-bool background_thread_stats_read(tsdn_t *tsdn,
-    background_thread_stats_t *stats) NOT_REACHED
-void background_thread_ctl_init(tsdn_t *tsdn) NOT_REACHED
-#undef NOT_REACHED
+#	define NOT_REACHED                                                    \
+		{ not_reached(); }
+bool
+background_thread_create(tsd_t *tsd, unsigned arena_ind) NOT_REACHED
+    bool background_threads_enable(tsd_t *tsd) NOT_REACHED
+    bool background_threads_disable(tsd_t *tsd) NOT_REACHED
+    bool background_thread_is_started(
+        background_thread_info_t *info) NOT_REACHED
+    void background_thread_wakeup_early(
+        background_thread_info_t *info, nstime_t *remaining_sleep) NOT_REACHED
+    void background_thread_prefork0(tsdn_t *tsdn) NOT_REACHED
+    void background_thread_prefork1(tsdn_t *tsdn) NOT_REACHED
+    void background_thread_postfork_parent(tsdn_t *tsdn) NOT_REACHED
+    void background_thread_postfork_child(tsdn_t *tsdn) NOT_REACHED
+    bool background_thread_stats_read(
+        tsdn_t *tsdn, background_thread_stats_t *stats) NOT_REACHED
+    void background_thread_ctl_init(tsdn_t *tsdn) NOT_REACHED
+#	undef NOT_REACHED
 #else
 
 static bool background_thread_enabled_at_fork;
@@ -116,49 +120,50 @@ background_thread_info_init(tsdn_t *tsdn, background_thread_info_t *info) {
 
 static inline bool
 set_current_thread_affinity(int cpu) {
-#if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY) || defined(JEMALLOC_HAVE_PTHREAD_SETAFFINITY_NP)
-#if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
+#	if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)                           \
+	    || defined(JEMALLOC_HAVE_PTHREAD_SETAFFINITY_NP)
+#		if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
 	cpu_set_t cpuset;
-#else
-#  ifndef __NetBSD__
+#		else
+#			ifndef __NetBSD__
 	cpuset_t cpuset;
-#  else
+#			else
 	cpuset_t *cpuset;
-#  endif
-#endif
+#			endif
+#		endif
 
-#ifndef __NetBSD__
+#		ifndef __NetBSD__
 	CPU_ZERO(&cpuset);
 	CPU_SET(cpu, &cpuset);
-#else
+#		else
 	cpuset = cpuset_create();
-#endif
+#		endif
 
-#if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
+#		if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
 	return (sched_setaffinity(0, sizeof(cpu_set_t), &cpuset) != 0);
-#else
-#  ifndef __NetBSD__
-	int ret = pthread_setaffinity_np(pthread_self(), sizeof(cpuset_t),
-	    &cpuset);
-#  else
-	int ret = pthread_setaffinity_np(pthread_self(), cpuset_size(cpuset),
-	    cpuset);
+#		else
+#			ifndef __NetBSD__
+	int ret = pthread_setaffinity_np(
+	    pthread_self(), sizeof(cpuset_t), &cpuset);
+#			else
+	int ret = pthread_setaffinity_np(
+	    pthread_self(), cpuset_size(cpuset), cpuset);
 	cpuset_destroy(cpuset);
-#  endif
+#			endif
 	return ret != 0;
-#endif
-#else
-        return false;
-#endif
+#		endif
+#	else
+	return false;
+#	endif
 }
 
-#define BILLION UINT64_C(1000000000)
+#	define BILLION UINT64_C(1000000000)
 /* Minimal sleep interval 100 ms. */
-#define BACKGROUND_THREAD_MIN_INTERVAL_NS (BILLION / 10)
+#	define BACKGROUND_THREAD_MIN_INTERVAL_NS (BILLION / 10)
 
 static int
-background_thread_cond_wait(background_thread_info_t *info,
-    struct timespec *ts) {
+background_thread_cond_wait(
+    background_thread_info_t *info, struct timespec *ts) {
 	int ret;
 
 	/*
@@ -177,8 +182,8 @@ background_thread_cond_wait(background_thread_info_t *info,
 }
 
 static void
-background_thread_sleep(tsdn_t *tsdn, background_thread_info_t *info,
-    uint64_t interval) {
+background_thread_sleep(
+    tsdn_t *tsdn, background_thread_info_t *info, uint64_t interval) {
 	if (config_stats) {
 		info->tot_n_runs++;
 	}
@@ -192,21 +197,21 @@ background_thread_sleep(tsdn_t *tsdn, background_thread_info_t *info,
 
 	int ret;
 	if (interval == BACKGROUND_THREAD_INDEFINITE_SLEEP) {
-		background_thread_wakeup_time_set(tsdn, info,
-		    BACKGROUND_THREAD_INDEFINITE_SLEEP);
+		background_thread_wakeup_time_set(
+		    tsdn, info, BACKGROUND_THREAD_INDEFINITE_SLEEP);
 		ret = background_thread_cond_wait(info, NULL);
 		assert(ret == 0);
 	} else {
-		assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS &&
-		    interval <= BACKGROUND_THREAD_INDEFINITE_SLEEP);
+		assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS
+		    && interval <= BACKGROUND_THREAD_INDEFINITE_SLEEP);
 		/* We need malloc clock (can be different from tv). */
 		nstime_t next_wakeup;
 		nstime_init_update(&next_wakeup);
 		nstime_iadd(&next_wakeup, interval);
-		assert(nstime_ns(&next_wakeup) <
-		    BACKGROUND_THREAD_INDEFINITE_SLEEP);
-		background_thread_wakeup_time_set(tsdn, info,
-		    nstime_ns(&next_wakeup));
+		assert(nstime_ns(&next_wakeup)
+		    < BACKGROUND_THREAD_INDEFINITE_SLEEP);
+		background_thread_wakeup_time_set(
+		    tsdn, info, nstime_ns(&next_wakeup));
 
 		nstime_t ts_wakeup;
 		nstime_copy(&ts_wakeup, &before_sleep);
@@ -245,11 +250,11 @@ background_thread_pause_check(tsdn_t *tsdn, background_thread_info_t *info) {
 }
 
 static inline void
-background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info,
-    unsigned ind) {
+background_work_sleep_once(
+    tsdn_t *tsdn, background_thread_info_t *info, unsigned ind) {
 	uint64_t ns_until_deferred = BACKGROUND_THREAD_DEFERRED_MAX;
 	unsigned narenas = narenas_total_get();
-	bool slept_indefinitely = background_thread_indefinite_sleep(info);
+	bool     slept_indefinitely = background_thread_indefinite_sleep(info);
 
 	for (unsigned i = ind; i < narenas; i += max_background_threads) {
 		arena_t *arena = arena_get(tsdn, i, false);
@@ -279,11 +284,10 @@ background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info,
 	if (ns_until_deferred == BACKGROUND_THREAD_DEFERRED_MAX) {
 		sleep_ns = BACKGROUND_THREAD_INDEFINITE_SLEEP;
 	} else {
-		sleep_ns =
-		    (ns_until_deferred < BACKGROUND_THREAD_MIN_INTERVAL_NS)
+		sleep_ns = (ns_until_deferred
+		               < BACKGROUND_THREAD_MIN_INTERVAL_NS)
 		    ? BACKGROUND_THREAD_MIN_INTERVAL_NS
 		    : ns_until_deferred;
-
 	}
 
 	background_thread_sleep(tsdn, info, sleep_ns);
@@ -292,11 +296,11 @@ background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info,
 static bool
 background_threads_disable_single(tsd_t *tsd, background_thread_info_t *info) {
 	if (info == &background_thread_info[0]) {
-		malloc_mutex_assert_owner(tsd_tsdn(tsd),
-		    &background_thread_lock);
+		malloc_mutex_assert_owner(
+		    tsd_tsdn(tsd), &background_thread_lock);
 	} else {
-		malloc_mutex_assert_not_owner(tsd_tsdn(tsd),
-		    &background_thread_lock);
+		malloc_mutex_assert_not_owner(
+		    tsd_tsdn(tsd), &background_thread_lock);
 	}
 
 	pre_reentrancy(tsd, NULL);
@@ -340,21 +344,23 @@ background_thread_create_signals_masked(pthread_t *thread,
 	sigset_t set;
 	sigfillset(&set);
 	sigset_t oldset;
-	int mask_err = pthread_sigmask(SIG_SETMASK, &set, &oldset);
+	int      mask_err = pthread_sigmask(SIG_SETMASK, &set, &oldset);
 	if (mask_err != 0) {
 		return mask_err;
 	}
-	int create_err = pthread_create_wrapper(thread, attr, start_routine,
-	    arg);
+	int create_err = pthread_create_wrapper(
+	    thread, attr, start_routine, arg);
 	/*
 	 * Restore the signal mask.  Failure to restore the signal mask here
 	 * changes program behavior.
 	 */
 	int restore_err = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
 	if (restore_err != 0) {
-		malloc_printf("<jemalloc>: background thread creation "
+		malloc_printf(
+		    "<jemalloc>: background thread creation "
 		    "failed (%d), and signal mask restoration failed "
-		    "(%d)\n", create_err, restore_err);
+		    "(%d)\n",
+		    create_err, restore_err);
 		if (opt_abort) {
 			abort();
 		}
@@ -364,8 +370,8 @@ background_thread_create_signals_masked(pthread_t *thread,
 
 static bool
 check_background_thread_creation(tsd_t *tsd,
-    const size_t const_max_background_threads,
-    unsigned *n_created, bool *created_threads) {
+    const size_t const_max_background_threads, unsigned *n_created,
+    bool *created_threads) {
 	bool ret = false;
 	if (likely(*n_created == n_background_threads)) {
 		return ret;
@@ -391,7 +397,7 @@ check_background_thread_creation(tsd_t *tsd,
 
 		pre_reentrancy(tsd, NULL);
 		int err = background_thread_create_signals_masked(&info->thread,
-			/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
+		    /* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 		    NULL, background_thread_entry, (void *)(uintptr_t)i);
 		post_reentrancy(tsd);
 
@@ -399,8 +405,10 @@ check_background_thread_creation(tsd_t *tsd,
 			(*n_created)++;
 			created_threads[i] = true;
 		} else {
-			malloc_printf("<jemalloc>: background thread "
-			    "creation failed (%d)\n", err);
+			malloc_printf(
+			    "<jemalloc>: background thread "
+			    "creation failed (%d)\n",
+			    err);
 			if (opt_abort) {
 				abort();
 			}
@@ -434,16 +442,17 @@ background_thread0_work(tsd_t *tsd) {
 	/* Start working, and create more threads when asked. */
 	unsigned n_created = 1;
 	while (background_thread_info[0].state != background_thread_stopped) {
-		if (background_thread_pause_check(tsd_tsdn(tsd),
-		    &background_thread_info[0])) {
+		if (background_thread_pause_check(
+		        tsd_tsdn(tsd), &background_thread_info[0])) {
 			continue;
 		}
-		if (check_background_thread_creation(tsd, const_max_background_threads,
-		    &n_created, (bool *)&created_threads)) {
+		if (check_background_thread_creation(tsd,
+		        const_max_background_threads, &n_created,
+		        (bool *)&created_threads)) {
 			continue;
 		}
-		background_work_sleep_once(tsd_tsdn(tsd),
-		    &background_thread_info[0], 0);
+		background_work_sleep_once(
+		    tsd_tsdn(tsd), &background_thread_info[0], 0);
 	}
 
 	/*
@@ -460,8 +469,8 @@ background_thread0_work(tsd_t *tsd) {
 			malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
 			if (info->state != background_thread_stopped) {
 				/* The thread was not created. */
-				assert(info->state ==
-				    background_thread_started);
+				assert(
+				    info->state == background_thread_started);
 				n_background_threads--;
 				info->state = background_thread_stopped;
 			}
@@ -477,14 +486,14 @@ background_work(tsd_t *tsd, unsigned ind) {
 	background_thread_info_t *info = &background_thread_info[ind];
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
-	background_thread_wakeup_time_set(tsd_tsdn(tsd), info,
-	    BACKGROUND_THREAD_INDEFINITE_SLEEP);
+	background_thread_wakeup_time_set(
+	    tsd_tsdn(tsd), info, BACKGROUND_THREAD_INDEFINITE_SLEEP);
 	if (ind == 0) {
 		background_thread0_work(tsd);
 	} else {
 		while (info->state != background_thread_stopped) {
-			if (background_thread_pause_check(tsd_tsdn(tsd),
-			    info)) {
+			if (background_thread_pause_check(
+			        tsd_tsdn(tsd), info)) {
 				continue;
 			}
 			background_work_sleep_once(tsd_tsdn(tsd), info, ind);
@@ -499,11 +508,11 @@ static void *
 background_thread_entry(void *ind_arg) {
 	unsigned thread_ind = (unsigned)(uintptr_t)ind_arg;
 	assert(thread_ind < max_background_threads);
-#ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
+#	ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
 	pthread_setname_np(pthread_self(), "jemalloc_bg_thd");
-#elif defined(JEMALLOC_HAVE_PTHREAD_SET_NAME_NP)
+#	elif defined(JEMALLOC_HAVE_PTHREAD_SET_NAME_NP)
 	pthread_set_name_np(pthread_self(), "jemalloc_bg_thd");
-#endif
+#	endif
 	if (opt_percpu_arena != percpu_arena_disabled) {
 		set_current_thread_affinity((int)thread_ind);
 	}
@@ -513,8 +522,8 @@ background_thread_entry(void *ind_arg) {
 	 * turn triggers another background thread creation).
 	 */
 	background_work(tsd_internal_fetch(), thread_ind);
-	assert(pthread_equal(pthread_self(),
-	    background_thread_info[thread_ind].thread));
+	assert(pthread_equal(
+	    pthread_self(), background_thread_info[thread_ind].thread));
 
 	return NULL;
 }
@@ -538,8 +547,8 @@ background_thread_create_locked(tsd_t *tsd, unsigned arena_ind) {
 
 	bool need_new_thread;
 	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
-	need_new_thread = background_thread_enabled() &&
-	    (info->state == background_thread_stopped);
+	need_new_thread = background_thread_enabled()
+	    && (info->state == background_thread_stopped);
 	if (need_new_thread) {
 		background_thread_init(tsd, info);
 	}
@@ -564,13 +573,15 @@ background_thread_create_locked(tsd_t *tsd, unsigned arena_ind) {
 	 * background threads with the underlying pthread_create.
 	 */
 	int err = background_thread_create_signals_masked(&info->thread, NULL,
-		/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
+	    /* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 	    background_thread_entry, (void *)thread_ind);
 	post_reentrancy(tsd);
 
 	if (err != 0) {
-		malloc_printf("<jemalloc>: arena 0 background thread creation "
-		    "failed (%d)\n", err);
+		malloc_printf(
+		    "<jemalloc>: arena 0 background thread creation "
+		    "failed (%d)\n",
+		    err);
 		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
 		info->state = background_thread_stopped;
 		n_background_threads--;
@@ -612,12 +623,12 @@ background_threads_enable(tsd_t *tsd) {
 	/* Mark the threads we need to create for thread 0. */
 	unsigned narenas = narenas_total_get();
 	for (unsigned i = 1; i < narenas; i++) {
-		if (marked[i % max_background_threads] ||
-		    arena_get(tsd_tsdn(tsd), i, false) == NULL) {
+		if (marked[i % max_background_threads]
+		    || arena_get(tsd_tsdn(tsd), i, false) == NULL) {
 			continue;
 		}
-		background_thread_info_t *info = &background_thread_info[
-		    i % max_background_threads];
+		background_thread_info_t *info =
+		    &background_thread_info[i % max_background_threads];
 		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
 		assert(info->state == background_thread_stopped);
 		background_thread_init(tsd, info);
@@ -635,8 +646,8 @@ background_threads_enable(tsd_t *tsd) {
 	for (unsigned i = 0; i < narenas; i++) {
 		arena_t *arena = arena_get(tsd_tsdn(tsd), i, false);
 		if (arena != NULL) {
-			pa_shard_set_deferral_allowed(tsd_tsdn(tsd),
-			    &arena->pa_shard, true);
+			pa_shard_set_deferral_allowed(
+			    tsd_tsdn(tsd), &arena->pa_shard, true);
 		}
 	}
 	return false;
@@ -648,8 +659,8 @@ background_threads_disable(tsd_t *tsd) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
 
 	/* Thread 0 will be responsible for terminating other threads. */
-	if (background_threads_disable_single(tsd,
-	    &background_thread_info[0])) {
+	if (background_threads_disable_single(
+	        tsd, &background_thread_info[0])) {
 		return true;
 	}
 	assert(n_background_threads == 0);
@@ -657,8 +668,8 @@ background_threads_disable(tsd_t *tsd) {
 	for (unsigned i = 0; i < narenas; i++) {
 		arena_t *arena = arena_get(tsd_tsdn(tsd), i, false);
 		if (arena != NULL) {
-			pa_shard_set_deferral_allowed(tsd_tsdn(tsd),
-			    &arena->pa_shard, false);
+			pa_shard_set_deferral_allowed(
+			    tsd_tsdn(tsd), &arena->pa_shard, false);
 		}
 	}
 
@@ -671,15 +682,15 @@ background_thread_is_started(background_thread_info_t *info) {
 }
 
 void
-background_thread_wakeup_early(background_thread_info_t *info,
-    nstime_t *remaining_sleep) {
+background_thread_wakeup_early(
+    background_thread_info_t *info, nstime_t *remaining_sleep) {
 	/*
 	 * This is an optimization to increase batching. At this point
 	 * we know that background thread wakes up soon, so the time to cache
 	 * the just freed memory is bounded and low.
 	 */
-	if (remaining_sleep != NULL && nstime_ns(remaining_sleep) <
-	    BACKGROUND_THREAD_MIN_INTERVAL_NS) {
+	if (remaining_sleep != NULL
+	    && nstime_ns(remaining_sleep) < BACKGROUND_THREAD_MIN_INTERVAL_NS) {
 		return;
 	}
 	pthread_cond_signal(&info->cond);
@@ -701,8 +712,8 @@ background_thread_prefork1(tsdn_t *tsdn) {
 void
 background_thread_postfork_parent(tsdn_t *tsdn) {
 	for (unsigned i = 0; i < max_background_threads; i++) {
-		malloc_mutex_postfork_parent(tsdn,
-		    &background_thread_info[i].mtx);
+		malloc_mutex_postfork_parent(
+		    tsdn, &background_thread_info[i].mtx);
 	}
 	malloc_mutex_postfork_parent(tsdn, &background_thread_lock);
 }
@@ -710,8 +721,8 @@ background_thread_postfork_parent(tsdn_t *tsdn) {
 void
 background_thread_postfork_child(tsdn_t *tsdn) {
 	for (unsigned i = 0; i < max_background_threads; i++) {
-		malloc_mutex_postfork_child(tsdn,
-		    &background_thread_info[i].mtx);
+		malloc_mutex_postfork_child(
+		    tsdn, &background_thread_info[i].mtx);
 	}
 	malloc_mutex_postfork_child(tsdn, &background_thread_lock);
 	if (!background_thread_enabled_at_fork) {
@@ -760,8 +771,8 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
 		if (info->state != background_thread_stopped) {
 			num_runs += info->tot_n_runs;
 			nstime_add(&stats->run_interval, &info->tot_sleep_time);
-			malloc_mutex_prof_max_update(tsdn,
-			    &stats->max_counter_per_bg_thd, &info->mtx);
+			malloc_mutex_prof_max_update(
+			    tsdn, &stats->max_counter_per_bg_thd, &info->mtx);
 		}
 		malloc_mutex_unlock(tsdn, &info->mtx);
 	}
@@ -774,9 +785,9 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
 	return false;
 }
 
-#undef BACKGROUND_THREAD_NPAGES_THRESHOLD
-#undef BILLION
-#undef BACKGROUND_THREAD_MIN_INTERVAL_NS
+#	undef BACKGROUND_THREAD_NPAGES_THRESHOLD
+#	undef BILLION
+#	undef BACKGROUND_THREAD_MIN_INTERVAL_NS
 
 /*
  * When lazy lock is enabled, we need to make sure setting isthreaded before
@@ -787,24 +798,24 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
 void
 background_thread_ctl_init(tsdn_t *tsdn) {
 	malloc_mutex_assert_not_owner(tsdn, &background_thread_lock);
-#ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
+#	ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
 	pthread_create_fptr_init();
 	pthread_create_wrapper_init();
-#endif
+#	endif
 }
 
 #endif /* defined(JEMALLOC_BACKGROUND_THREAD) */
 
-bool
-background_thread_boot0(void) {
+    bool background_thread_boot0(void) {
 	if (!have_background_thread && opt_background_thread) {
-		malloc_printf("<jemalloc>: option background_thread currently "
+		malloc_printf(
+		    "<jemalloc>: option background_thread currently "
 		    "supports pthread only\n");
 		return true;
 	}
 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
-	if ((config_lazy_lock || opt_background_thread) &&
-	    pthread_create_fptr_init()) {
+	if ((config_lazy_lock || opt_background_thread)
+	    && pthread_create_fptr_init()) {
 		return true;
 	}
 #endif
@@ -823,15 +834,15 @@ background_thread_boot1(tsdn_t *tsdn, base_t *base) {
 	max_background_threads = opt_max_background_threads;
 
 	if (malloc_mutex_init(&background_thread_lock,
-	    "background_thread_global",
-	    WITNESS_RANK_BACKGROUND_THREAD_GLOBAL,
-	    malloc_mutex_rank_exclusive)) {
+	        "background_thread_global",
+	        WITNESS_RANK_BACKGROUND_THREAD_GLOBAL,
+	        malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 
 	background_thread_info = (background_thread_info_t *)base_alloc(tsdn,
-	    base, opt_max_background_threads *
-	    sizeof(background_thread_info_t), CACHELINE);
+	    base, opt_max_background_threads * sizeof(background_thread_info_t),
+	    CACHELINE);
 	if (background_thread_info == NULL) {
 		return true;
 	}
@@ -840,8 +851,8 @@ background_thread_boot1(tsdn_t *tsdn, base_t *base) {
 		background_thread_info_t *info = &background_thread_info[i];
 		/* Thread mutex is rank_inclusive because of thread0. */
 		if (malloc_mutex_init(&info->mtx, "background_thread",
-		    WITNESS_RANK_BACKGROUND_THREAD,
-		    malloc_mutex_address_ordered)) {
+		        WITNESS_RANK_BACKGROUND_THREAD,
+		        malloc_mutex_address_ordered)) {
 			return true;
 		}
 		if (pthread_cond_init(&info->cond, NULL)) {
diff --git a/src/base.c b/src/base.c
index 52f3d1d3..c494556c 100644
--- a/src/base.c
+++ b/src/base.c
@@ -12,7 +12,7 @@
  * of metadata), since more metadata (e.g. rtree nodes) come from a0's base.
  */
 
-#define BASE_AUTO_THP_THRESHOLD    2
+#define BASE_AUTO_THP_THRESHOLD 2
 #define BASE_AUTO_THP_THRESHOLD_A0 5
 
 /******************************************************************************/
@@ -22,25 +22,21 @@ static base_t *b0;
 
 metadata_thp_mode_t opt_metadata_thp = METADATA_THP_DEFAULT;
 
-const char *const metadata_thp_mode_names[] = {
-	"disabled",
-	"auto",
-	"always"
-};
+const char *const metadata_thp_mode_names[] = {"disabled", "auto", "always"};
 
 /******************************************************************************/
 
 static inline bool
 metadata_thp_madvise(void) {
-	return (metadata_thp_enabled() &&
-	    (init_system_thp_mode == thp_mode_default));
+	return (metadata_thp_enabled()
+	    && (init_system_thp_mode == thp_mode_default));
 }
 
 static void *
 base_map(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, size_t size) {
 	void *addr;
-	bool zero = true;
-	bool commit = true;
+	bool  zero = true;
+	bool  commit = true;
 
 	/*
 	 * Use huge page sizes and alignment when opt_metadata_thp is enabled
@@ -56,16 +52,16 @@ base_map(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, size_t size) {
 	if (ehooks_are_default(ehooks)) {
 		addr = extent_alloc_mmap(NULL, size, alignment, &zero, &commit);
 	} else {
-		addr = ehooks_alloc(tsdn, ehooks, NULL, size, alignment, &zero,
-		    &commit);
+		addr = ehooks_alloc(
+		    tsdn, ehooks, NULL, size, alignment, &zero, &commit);
 	}
 
 	return addr;
 }
 
 static void
-base_unmap(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, void *addr,
-    size_t size) {
+base_unmap(
+    tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, void *addr, size_t size) {
 	/*
 	 * Cascade through dalloc, decommit, purge_forced, and purge_lazy,
 	 * stopping at first success.  This cascade is performed for consistency
@@ -109,8 +105,8 @@ base_unmap(tsdn_t *tsdn, ehooks_t *ehooks, unsigned ind, void *addr,
 label_done:
 	if (metadata_thp_madvise()) {
 		/* Set NOHUGEPAGE after unmap to avoid kernel defrag. */
-		assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
-		    (size & HUGEPAGE_MASK) == 0);
+		assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0
+		    && (size & HUGEPAGE_MASK) == 0);
 		pages_nohuge(addr, size);
 	}
 }
@@ -126,8 +122,8 @@ base_edata_is_reused(edata_t *edata) {
 }
 
 static void
-base_edata_init(size_t *extent_sn_next, edata_t *edata, void *addr,
-    size_t size) {
+base_edata_init(
+    size_t *extent_sn_next, edata_t *edata, void *addr, size_t size) {
 	size_t sn;
 
 	sn = *extent_sn_next;
@@ -174,9 +170,9 @@ huge_arena_auto_thp_switch(tsdn_t *tsdn, pac_thp_t *pac_thp) {
 
 	unsigned cnt = 0;
 	edata_t *edata;
-	ql_foreach(edata, &pending_list->head, ql_link_active) {
+	ql_foreach (edata, &pending_list->head, ql_link_active) {
 		assert(edata != NULL);
-		void *addr = edata_addr_get(edata);
+		void  *addr = edata_addr_get(edata);
 		size_t size = edata_size_get(edata);
 		assert(HUGEPAGE_ADDR2BASE(addr) == addr);
 		assert(HUGEPAGE_CEILING(size) == size && size != 0);
@@ -196,11 +192,11 @@ base_auto_thp_switch(tsdn_t *tsdn, base_t *base) {
 	/* Called when adding a new block. */
 	bool should_switch;
 	if (base_ind_get(base) != 0) {
-		should_switch = (base_get_num_blocks(base, true) ==
-		    BASE_AUTO_THP_THRESHOLD);
+		should_switch = (base_get_num_blocks(base, true)
+		    == BASE_AUTO_THP_THRESHOLD);
 	} else {
-		should_switch = (base_get_num_blocks(base, true) ==
-		    BASE_AUTO_THP_THRESHOLD_A0);
+		should_switch = (base_get_num_blocks(base, true)
+		    == BASE_AUTO_THP_THRESHOLD_A0);
 	}
 	if (!should_switch) {
 		return;
@@ -214,8 +210,9 @@ base_auto_thp_switch(tsdn_t *tsdn, base_t *base) {
 		assert((block->size & HUGEPAGE_MASK) == 0);
 		pages_huge(block, block->size);
 		if (config_stats) {
-			base->n_thp += HUGEPAGE_CEILING(block->size -
-			    edata_bsize_get(&block->edata)) >> LG_HUGEPAGE;
+			base->n_thp += HUGEPAGE_CEILING(block->size
+			                   - edata_bsize_get(&block->edata))
+			    >> LG_HUGEPAGE;
 		}
 		block = block->next;
 		assert(block == NULL || (base_ind_get(base) == 0));
@@ -242,20 +239,22 @@ base_auto_thp_switch(tsdn_t *tsdn, base_t *base) {
 }
 
 static void *
-base_extent_bump_alloc_helper(edata_t *edata, size_t *gap_size, size_t size,
-    size_t alignment) {
+base_extent_bump_alloc_helper(
+    edata_t *edata, size_t *gap_size, size_t size, size_t alignment) {
 	void *ret;
 
 	assert(alignment == ALIGNMENT_CEILING(alignment, QUANTUM));
 	assert(size == ALIGNMENT_CEILING(size, alignment));
 
-	*gap_size = ALIGNMENT_CEILING((uintptr_t)edata_addr_get(edata),
-	    alignment) - (uintptr_t)edata_addr_get(edata);
+	*gap_size = ALIGNMENT_CEILING(
+	                (uintptr_t)edata_addr_get(edata), alignment)
+	    - (uintptr_t)edata_addr_get(edata);
 	ret = (void *)((byte_t *)edata_addr_get(edata) + *gap_size);
 	assert(edata_bsize_get(edata) >= *gap_size + size);
-	edata_binit(edata, (void *)((byte_t *)edata_addr_get(edata) +
-	    *gap_size + size), edata_bsize_get(edata) - *gap_size - size,
-	    edata_sn_get(edata), base_edata_is_reused(edata));
+	edata_binit(edata,
+	    (void *)((byte_t *)edata_addr_get(edata) + *gap_size + size),
+	    edata_bsize_get(edata) - *gap_size - size, edata_sn_get(edata),
+	    base_edata_is_reused(edata));
 	return ret;
 }
 
@@ -312,24 +311,26 @@ base_extent_bump_alloc_post(tsdn_t *tsdn, base_t *base, edata_t *edata,
 		 * crossed by the new allocation. Adjust n_thp similarly when
 		 * metadata_thp is enabled.
 		 */
-		base->resident += PAGE_CEILING((uintptr_t)addr + size) -
-		    PAGE_CEILING((uintptr_t)addr - gap_size);
+		base->resident += PAGE_CEILING((uintptr_t)addr + size)
+		    - PAGE_CEILING((uintptr_t)addr - gap_size);
 		assert(base->allocated <= base->resident);
 		assert(base->resident <= base->mapped);
-		if (metadata_thp_madvise() && (opt_metadata_thp ==
-		    metadata_thp_always || base->auto_thp_switched)) {
+		if (metadata_thp_madvise()
+		    && (opt_metadata_thp == metadata_thp_always
+		        || base->auto_thp_switched)) {
 			base->n_thp += (HUGEPAGE_CEILING((uintptr_t)addr + size)
-			    - HUGEPAGE_CEILING((uintptr_t)addr - gap_size)) >>
-			    LG_HUGEPAGE;
+			                   - HUGEPAGE_CEILING(
+			                       (uintptr_t)addr - gap_size))
+			    >> LG_HUGEPAGE;
 			assert(base->mapped >= base->n_thp << LG_HUGEPAGE);
 		}
 	}
 }
 
 static void *
-base_extent_bump_alloc(tsdn_t *tsdn, base_t *base, edata_t *edata, size_t size,
-    size_t alignment) {
-	void *ret;
+base_extent_bump_alloc(
+    tsdn_t *tsdn, base_t *base, edata_t *edata, size_t size, size_t alignment) {
+	void  *ret;
 	size_t gap_size;
 
 	ret = base_extent_bump_alloc_helper(edata, &gap_size, size, alignment);
@@ -339,9 +340,9 @@ base_extent_bump_alloc(tsdn_t *tsdn, base_t *base, edata_t *edata, size_t size,
 
 static size_t
 base_block_size_ceil(size_t block_size) {
-	return opt_metadata_thp == metadata_thp_disabled ?
-	    ALIGNMENT_CEILING(block_size, BASE_BLOCK_MIN_ALIGN) :
-	    HUGEPAGE_CEILING(block_size);
+	return opt_metadata_thp == metadata_thp_disabled
+	    ? ALIGNMENT_CEILING(block_size, BASE_BLOCK_MIN_ALIGN)
+	    : HUGEPAGE_CEILING(block_size);
 }
 
 /*
@@ -356,8 +357,8 @@ base_block_alloc(tsdn_t *tsdn, base_t *base, ehooks_t *ehooks, unsigned ind,
 	alignment = ALIGNMENT_CEILING(alignment, QUANTUM);
 	size_t usize = ALIGNMENT_CEILING(size, alignment);
 	size_t header_size = sizeof(base_block_t);
-	size_t gap_size = ALIGNMENT_CEILING(header_size, alignment) -
-	    header_size;
+	size_t gap_size = ALIGNMENT_CEILING(header_size, alignment)
+	    - header_size;
 	/*
 	 * Create increasingly larger blocks in order to limit the total number
 	 * of disjoint virtual memory ranges.  Choose the next size in the page
@@ -365,27 +366,29 @@ base_block_alloc(tsdn_t *tsdn, base_t *base, ehooks_t *ehooks, unsigned ind,
 	 * HUGEPAGE when using metadata_thp), or a size large enough to satisfy
 	 * the requested size and alignment, whichever is larger.
 	 */
-	size_t min_block_size = base_block_size_ceil(sz_psz2u(header_size +
-	    gap_size + usize));
-	pszind_t pind_next = (*pind_last + 1 < sz_psz2ind(SC_LARGE_MAXCLASS)) ?
-	    *pind_last + 1 : *pind_last;
-	size_t next_block_size = base_block_size_ceil(sz_pind2sz(pind_next));
-	size_t block_size = (min_block_size > next_block_size) ? min_block_size
-	    : next_block_size;
-	base_block_t *block = (base_block_t *)base_map(tsdn, ehooks, ind,
-	    block_size);
+	size_t min_block_size = base_block_size_ceil(
+	    sz_psz2u(header_size + gap_size + usize));
+	pszind_t pind_next = (*pind_last + 1 < sz_psz2ind(SC_LARGE_MAXCLASS))
+	    ? *pind_last + 1
+	    : *pind_last;
+	size_t   next_block_size = base_block_size_ceil(sz_pind2sz(pind_next));
+	size_t   block_size = (min_block_size > next_block_size)
+	      ? min_block_size
+	      : next_block_size;
+	base_block_t *block = (base_block_t *)base_map(
+	    tsdn, ehooks, ind, block_size);
 	if (block == NULL) {
 		return NULL;
 	}
 
 	if (metadata_thp_madvise()) {
 		void *addr = (void *)block;
-		assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
-		    (block_size & HUGEPAGE_MASK) == 0);
+		assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0
+		    && (block_size & HUGEPAGE_MASK) == 0);
 		if (opt_metadata_thp == metadata_thp_always) {
 			pages_huge(addr, block_size);
-		} else if (opt_metadata_thp == metadata_thp_auto &&
-		    base != NULL) {
+		} else if (opt_metadata_thp == metadata_thp_auto
+		    && base != NULL) {
 			/* base != NULL indicates this is not a new base. */
 			malloc_mutex_lock(tsdn, &base->mtx);
 			base_auto_thp_switch(tsdn, base);
@@ -432,12 +435,12 @@ base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
 		base->allocated += sizeof(base_block_t);
 		base->resident += PAGE_CEILING(sizeof(base_block_t));
 		base->mapped += block->size;
-		if (metadata_thp_madvise() &&
-		    !(opt_metadata_thp == metadata_thp_auto
-		      && !base->auto_thp_switched)) {
+		if (metadata_thp_madvise()
+		    && !(opt_metadata_thp == metadata_thp_auto
+		        && !base->auto_thp_switched)) {
 			assert(base->n_thp > 0);
-			base->n_thp += HUGEPAGE_CEILING(sizeof(base_block_t)) >>
-			    LG_HUGEPAGE;
+			base->n_thp += HUGEPAGE_CEILING(sizeof(base_block_t))
+			    >> LG_HUGEPAGE;
 		}
 		assert(base->allocated <= base->resident);
 		assert(base->resident <= base->mapped);
@@ -455,7 +458,7 @@ base_t *
 base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks,
     bool metadata_use_hooks) {
 	pszind_t pind_last = 0;
-	size_t extent_sn_next = 0;
+	size_t   extent_sn_next = 0;
 
 	/*
 	 * The base will contain the ehooks eventually, but it itself is
@@ -463,9 +466,10 @@ base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks,
 	 * memory, and then initialize the ehooks within the base_t.
 	 */
 	ehooks_t fake_ehooks;
-	ehooks_init(&fake_ehooks, metadata_use_hooks ?
-	    (extent_hooks_t *)extent_hooks :
-	    (extent_hooks_t *)&ehooks_default_extent_hooks, ind);
+	ehooks_init(&fake_ehooks,
+	    metadata_use_hooks ? (extent_hooks_t *)extent_hooks
+	                       : (extent_hooks_t *)&ehooks_default_extent_hooks,
+	    ind);
 
 	base_block_t *block = base_block_alloc(tsdn, NULL, &fake_ehooks, ind,
 	    &pind_last, &extent_sn_next, sizeof(base_t), QUANTUM);
@@ -473,17 +477,18 @@ base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks,
 		return NULL;
 	}
 
-	size_t gap_size;
-	size_t base_alignment = CACHELINE;
-	size_t base_size = ALIGNMENT_CEILING(sizeof(base_t), base_alignment);
-	base_t *base = (base_t *)base_extent_bump_alloc_helper(&block->edata,
-	    &gap_size, base_size, base_alignment);
+	size_t  gap_size;
+	size_t  base_alignment = CACHELINE;
+	size_t  base_size = ALIGNMENT_CEILING(sizeof(base_t), base_alignment);
+	base_t *base = (base_t *)base_extent_bump_alloc_helper(
+	    &block->edata, &gap_size, base_size, base_alignment);
 	ehooks_init(&base->ehooks, (extent_hooks_t *)extent_hooks, ind);
-	ehooks_init(&base->ehooks_base, metadata_use_hooks ?
-	    (extent_hooks_t *)extent_hooks :
-	    (extent_hooks_t *)&ehooks_default_extent_hooks, ind);
+	ehooks_init(&base->ehooks_base,
+	    metadata_use_hooks ? (extent_hooks_t *)extent_hooks
+	                       : (extent_hooks_t *)&ehooks_default_extent_hooks,
+	    ind);
 	if (malloc_mutex_init(&base->mtx, "base", WITNESS_RANK_BASE,
-	    malloc_mutex_rank_exclusive)) {
+	        malloc_mutex_rank_exclusive)) {
 		base_unmap(tsdn, &fake_ehooks, ind, block, block->size);
 		return NULL;
 	}
@@ -502,9 +507,10 @@ base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks,
 		base->allocated = sizeof(base_block_t);
 		base->resident = PAGE_CEILING(sizeof(base_block_t));
 		base->mapped = block->size;
-		base->n_thp = (opt_metadata_thp == metadata_thp_always) &&
-		    metadata_thp_madvise() ? HUGEPAGE_CEILING(sizeof(base_block_t))
-		    >> LG_HUGEPAGE : 0;
+		base->n_thp = (opt_metadata_thp == metadata_thp_always)
+		        && metadata_thp_madvise()
+		    ? HUGEPAGE_CEILING(sizeof(base_block_t)) >> LG_HUGEPAGE
+		    : 0;
 		assert(base->allocated <= base->resident);
 		assert(base->resident <= base->mapped);
 		assert(base->n_thp << LG_HUGEPAGE <= base->mapped);
@@ -512,8 +518,8 @@ base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks,
 
 	/* Locking here is only necessary because of assertions. */
 	malloc_mutex_lock(tsdn, &base->mtx);
-	base_extent_bump_alloc_post(tsdn, base, &block->edata, gap_size, base,
-	    base_size);
+	base_extent_bump_alloc_post(
+	    tsdn, base, &block->edata, gap_size, base, base_size);
 	malloc_mutex_unlock(tsdn, &base->mtx);
 
 	return base;
@@ -521,13 +527,13 @@ base_new(tsdn_t *tsdn, unsigned ind, const extent_hooks_t *extent_hooks,
 
 void
 base_delete(tsdn_t *tsdn, base_t *base) {
-	ehooks_t *ehooks = base_ehooks_get_for_metadata(base);
+	ehooks_t     *ehooks = base_ehooks_get_for_metadata(base);
 	base_block_t *next = base->blocks;
 	do {
 		base_block_t *block = next;
 		next = block->next;
-		base_unmap(tsdn, ehooks, base_ind_get(base), block,
-		    block->size);
+		base_unmap(
+		    tsdn, ehooks, base_ind_get(base), block, block->size);
 	} while (next != NULL);
 }
 
@@ -543,8 +549,8 @@ base_ehooks_get_for_metadata(base_t *base) {
 
 extent_hooks_t *
 base_extent_hooks_set(base_t *base, extent_hooks_t *extent_hooks) {
-	extent_hooks_t *old_extent_hooks =
-	    ehooks_get_extent_hooks_ptr(&base->ehooks);
+	extent_hooks_t *old_extent_hooks = ehooks_get_extent_hooks_ptr(
+	    &base->ehooks);
 	ehooks_init(&base->ehooks, extent_hooks, ehooks_ind_get(&base->ehooks));
 	return old_extent_hooks;
 }
@@ -602,9 +608,9 @@ base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
 
 edata_t *
 base_alloc_edata(tsdn_t *tsdn, base_t *base) {
-	size_t esn, usize;
-	edata_t *edata = base_alloc_impl(tsdn, base, sizeof(edata_t),
-	    EDATA_ALIGNMENT, &esn, &usize);
+	size_t   esn, usize;
+	edata_t *edata = base_alloc_impl(
+	    tsdn, base, sizeof(edata_t), EDATA_ALIGNMENT, &esn, &usize);
 	if (edata == NULL) {
 		return NULL;
 	}
@@ -618,8 +624,8 @@ base_alloc_edata(tsdn_t *tsdn, base_t *base) {
 void *
 base_alloc_rtree(tsdn_t *tsdn, base_t *base, size_t size) {
 	size_t usize;
-	void *rtree = base_alloc_impl(tsdn, base, size, CACHELINE, NULL,
-	    &usize);
+	void  *rtree = base_alloc_impl(
+            tsdn, base, size, CACHELINE, NULL, &usize);
 	if (rtree == NULL) {
 		return NULL;
 	}
@@ -632,8 +638,8 @@ base_alloc_rtree(tsdn_t *tsdn, base_t *base, size_t size) {
 static inline void
 b0_alloc_header_size(size_t *header_size, size_t *alignment) {
 	*alignment = QUANTUM;
-	*header_size = QUANTUM > sizeof(edata_t *) ? QUANTUM :
-	    sizeof(edata_t *);
+	*header_size = QUANTUM > sizeof(edata_t *) ? QUANTUM
+	                                           : sizeof(edata_t *);
 }
 
 /*
@@ -645,7 +651,7 @@ b0_alloc_header_size(size_t *header_size, size_t *alignment) {
  */
 void *
 b0_alloc_tcache_stack(tsdn_t *tsdn, size_t stack_size) {
-	base_t *base = b0get();
+	base_t  *base = b0get();
 	edata_t *edata = base_alloc_base_edata(tsdn, base);
 	if (edata == NULL) {
 		return NULL;
@@ -662,8 +668,8 @@ b0_alloc_tcache_stack(tsdn_t *tsdn, size_t stack_size) {
 	b0_alloc_header_size(&header_size, &alignment);
 
 	size_t alloc_size = sz_s2u(stack_size + header_size);
-	void *addr = base_alloc_impl(tsdn, base, alloc_size, alignment, &esn,
-	    NULL);
+	void  *addr = base_alloc_impl(
+            tsdn, base, alloc_size, alignment, &esn, NULL);
 	if (addr == NULL) {
 		edata_avail_insert(&base->edata_avail, edata);
 		return NULL;
@@ -683,8 +689,8 @@ b0_dalloc_tcache_stack(tsdn_t *tsdn, void *tcache_stack) {
 	b0_alloc_header_size(&header_size, &alignment);
 
 	edata_t *edata = *(edata_t **)((byte_t *)tcache_stack - header_size);
-	void *addr = edata_addr_get(edata);
-	size_t bsize = edata_bsize_get(edata);
+	void    *addr = edata_addr_get(edata);
+	size_t   bsize = edata_bsize_get(edata);
 	/* Marked as "reused" to avoid double counting stats. */
 	assert(base_edata_is_reused(edata));
 	assert(addr != NULL && bsize > 0);
@@ -707,7 +713,8 @@ base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated,
 	malloc_mutex_lock(tsdn, &base->mtx);
 	assert(base->allocated <= base->resident);
 	assert(base->resident <= base->mapped);
-	assert(base->edata_allocated + base->rtree_allocated <= base->allocated);
+	assert(
+	    base->edata_allocated + base->rtree_allocated <= base->allocated);
 	*allocated = base->allocated;
 	*edata_allocated = base->edata_allocated;
 	*rtree_allocated = base->rtree_allocated;
diff --git a/src/batcher.c b/src/batcher.c
index 2570b3a9..af71dae5 100644
--- a/src/batcher.c
+++ b/src/batcher.c
@@ -18,8 +18,8 @@ batcher_init(batcher_t *batcher, size_t nelems_max) {
  * Returns an index (into some user-owned array) to use for pushing, or
  * BATCHER_NO_IDX if no index is free.
  */
-size_t batcher_push_begin(tsdn_t *tsdn, batcher_t *batcher,
-    size_t elems_to_push) {
+size_t
+batcher_push_begin(tsdn_t *tsdn, batcher_t *batcher, size_t elems_to_push) {
 	assert(elems_to_push > 0);
 	size_t nelems_guess = atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED);
 	if (nelems_guess + elems_to_push > batcher->nelems_max) {
@@ -37,7 +37,8 @@ size_t batcher_push_begin(tsdn_t *tsdn, batcher_t *batcher,
 	 * racing accesses of the batcher can fail fast instead of trying to
 	 * acquire a mutex only to discover that there's no space for them.
 	 */
-	atomic_store_zu(&batcher->nelems, nelems + elems_to_push, ATOMIC_RELAXED);
+	atomic_store_zu(
+	    &batcher->nelems, nelems + elems_to_push, ATOMIC_RELAXED);
 	batcher->npushes++;
 	return nelems;
 }
@@ -75,7 +76,8 @@ batcher_pop_begin(tsdn_t *tsdn, batcher_t *batcher) {
 	return nelems;
 }
 
-void batcher_pop_end(tsdn_t *tsdn, batcher_t *batcher) {
+void
+batcher_pop_end(tsdn_t *tsdn, batcher_t *batcher) {
 	assert(atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED) == 0);
 	malloc_mutex_unlock(tsdn, &batcher->mtx);
 }
diff --git a/src/bin.c b/src/bin.c
index 267aa0f3..98d1da02 100644
--- a/src/bin.c
+++ b/src/bin.c
@@ -10,8 +10,8 @@
 unsigned bin_batching_test_ndalloc_slabs_max = (unsigned)-1;
 void (*bin_batching_test_after_push_hook)(size_t push_idx);
 void (*bin_batching_test_mid_pop_hook)(size_t nelems_to_pop);
-void (*bin_batching_test_after_unlock_hook)(unsigned slab_dalloc_count,
-    bool list_empty);
+void (*bin_batching_test_after_unlock_hook)(
+    unsigned slab_dalloc_count, bool list_empty);
 #endif
 
 bool
@@ -49,7 +49,7 @@ bin_shard_sizes_boot(unsigned bin_shard_sizes[SC_NBINS]) {
 bool
 bin_init(bin_t *bin, unsigned binind) {
 	if (malloc_mutex_init(&bin->lock, "bin", WITNESS_RANK_BIN,
-	    malloc_mutex_rank_exclusive)) {
+	        malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	bin->slabcur = NULL;
@@ -60,8 +60,8 @@ bin_init(bin_t *bin, unsigned binind) {
 	}
 	if (arena_bin_has_batch(binind)) {
 		bin_with_batch_t *batched_bin = (bin_with_batch_t *)bin;
-		batcher_init(&batched_bin->remote_frees,
-		    opt_bin_info_remote_free_max);
+		batcher_init(
+		    &batched_bin->remote_frees, opt_bin_info_remote_free_max);
 	}
 	return false;
 }
diff --git a/src/bin_info.c b/src/bin_info.c
index f8a64ae3..de93418a 100644
--- a/src/bin_info.c
+++ b/src/bin_info.c
@@ -19,7 +19,7 @@ size_t opt_bin_info_remote_free_max = BIN_REMOTE_FREE_ELEMS_MAX;
 
 bin_info_t bin_infos[SC_NBINS];
 
-szind_t bin_info_nbatched_sizes;
+szind_t  bin_info_nbatched_sizes;
 unsigned bin_info_nbatched_bins;
 unsigned bin_info_nunbatched_bins;
 
@@ -28,12 +28,12 @@ bin_infos_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
     bin_info_t infos[SC_NBINS]) {
 	for (unsigned i = 0; i < SC_NBINS; i++) {
 		bin_info_t *bin_info = &infos[i];
-		sc_t *sc = &sc_data->sc[i];
+		sc_t       *sc = &sc_data->sc[i];
 		bin_info->reg_size = ((size_t)1U << sc->lg_base)
 		    + ((size_t)sc->ndelta << sc->lg_delta);
 		bin_info->slab_size = (sc->pgs << LG_PAGE);
-		bin_info->nregs =
-		    (uint32_t)(bin_info->slab_size / bin_info->reg_size);
+		bin_info->nregs = (uint32_t)(bin_info->slab_size
+		    / bin_info->reg_size);
 		bin_info->n_shards = bin_shard_sizes[i];
 		bitmap_info_t bitmap_info = BITMAP_INFO_INITIALIZER(
 		    bin_info->nregs);
diff --git a/src/bitmap.c b/src/bitmap.c
index 0ccedc5d..8ac81a67 100644
--- a/src/bitmap.c
+++ b/src/bitmap.c
@@ -10,7 +10,7 @@
 void
 bitmap_info_init(bitmap_info_t *binfo, size_t nbits) {
 	unsigned i;
-	size_t group_count;
+	size_t   group_count;
 
 	assert(nbits > 0);
 	assert(nbits <= (ZU(1) << LG_BITMAP_MAXBITS));
@@ -24,11 +24,11 @@ bitmap_info_init(bitmap_info_t *binfo, size_t nbits) {
 	group_count = BITMAP_BITS2GROUPS(nbits);
 	for (i = 1; group_count > 1; i++) {
 		assert(i < BITMAP_MAX_LEVELS);
-		binfo->levels[i].group_offset = binfo->levels[i-1].group_offset
-		    + group_count;
+		binfo->levels[i].group_offset =
+		    binfo->levels[i - 1].group_offset + group_count;
 		group_count = BITMAP_BITS2GROUPS(group_count);
 	}
-	binfo->levels[i].group_offset = binfo->levels[i-1].group_offset
+	binfo->levels[i].group_offset = binfo->levels[i - 1].group_offset
 	    + group_count;
 	assert(binfo->levels[i].group_offset <= BITMAP_GROUPS_MAX);
 	binfo->nlevels = i;
@@ -42,7 +42,7 @@ bitmap_info_ngroups(const bitmap_info_t *binfo) {
 
 void
 bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo, bool fill) {
-	size_t extra;
+	size_t   extra;
 	unsigned i;
 
 	/*
@@ -69,12 +69,13 @@ bitmap_init(bitmap_t *bitmap, const bitmap_info_t *binfo, bool fill) {
 		bitmap[binfo->levels[1].group_offset - 1] >>= extra;
 	}
 	for (i = 1; i < binfo->nlevels; i++) {
-		size_t group_count = binfo->levels[i].group_offset -
-		    binfo->levels[i-1].group_offset;
-		extra = (BITMAP_GROUP_NBITS - (group_count &
-		    BITMAP_GROUP_NBITS_MASK)) & BITMAP_GROUP_NBITS_MASK;
+		size_t group_count = binfo->levels[i].group_offset
+		    - binfo->levels[i - 1].group_offset;
+		extra = (BITMAP_GROUP_NBITS
+		            - (group_count & BITMAP_GROUP_NBITS_MASK))
+		    & BITMAP_GROUP_NBITS_MASK;
 		if (extra != 0) {
-			bitmap[binfo->levels[i+1].group_offset - 1] >>= extra;
+			bitmap[binfo->levels[i + 1].group_offset - 1] >>= extra;
 		}
 	}
 }
diff --git a/src/buf_writer.c b/src/buf_writer.c
index 7c6f7940..3c298502 100644
--- a/src/buf_writer.c
+++ b/src/buf_writer.c
@@ -43,8 +43,9 @@ buf_writer_init(tsdn_t *tsdn, buf_writer_t *buf_writer, write_cb_t *write_cb,
 	if (write_cb != NULL) {
 		buf_writer->write_cb = write_cb;
 	} else {
-		buf_writer->write_cb = je_malloc_message != NULL ?
-		    je_malloc_message : wrtmessage;
+		buf_writer->write_cb = je_malloc_message != NULL
+		    ? je_malloc_message
+		    : wrtmessage;
 	}
 	buf_writer->cbopaque = cbopaque;
 	assert(buf_len >= 2);
@@ -52,8 +53,8 @@ buf_writer_init(tsdn_t *tsdn, buf_writer_t *buf_writer, write_cb_t *write_cb,
 		buf_writer->buf = buf;
 		buf_writer->internal_buf = false;
 	} else {
-		buf_writer->buf = buf_writer_allocate_internal_buf(tsdn,
-		    buf_len);
+		buf_writer->buf = buf_writer_allocate_internal_buf(
+		    tsdn, buf_len);
 		buf_writer->internal_buf = true;
 	}
 	if (buf_writer->buf != NULL) {
@@ -111,13 +112,13 @@ buf_writer_terminate(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 }
 
 void
-buf_writer_pipe(buf_writer_t *buf_writer, read_cb_t *read_cb,
-    void *read_cbopaque) {
+buf_writer_pipe(
+    buf_writer_t *buf_writer, read_cb_t *read_cb, void *read_cbopaque) {
 	/*
 	 * A tiny local buffer in case the buffered writer failed to allocate
 	 * at init.
 	 */
-	static char backup_buf[16];
+	static char         backup_buf[16];
 	static buf_writer_t backup_buf_writer;
 
 	buf_writer_assert(buf_writer);
diff --git a/src/cache_bin.c b/src/cache_bin.c
index 2f5afeb9..ec677948 100644
--- a/src/cache_bin.c
+++ b/src/cache_bin.c
@@ -8,8 +8,7 @@
 const uintptr_t disabled_bin = JUNK_ADDR;
 
 void
-cache_bin_info_init(cache_bin_info_t *info,
-    cache_bin_sz_t ncached_max) {
+cache_bin_info_init(cache_bin_info_t *info, cache_bin_sz_t ncached_max) {
 	assert(ncached_max <= CACHE_BIN_NCACHED_MAX);
 	size_t stack_size = (size_t)ncached_max * sizeof(void *);
 	assert(stack_size < ((size_t)1 << (sizeof(cache_bin_sz_t) * 8)));
@@ -51,27 +50,26 @@ cache_bin_info_compute_alloc(const cache_bin_info_t *infos, szind_t ninfos,
 }
 
 void
-cache_bin_preincrement(const cache_bin_info_t *infos, szind_t ninfos, void *alloc,
-    size_t *cur_offset) {
+cache_bin_preincrement(const cache_bin_info_t *infos, szind_t ninfos,
+    void *alloc, size_t *cur_offset) {
 	if (config_debug) {
 		size_t computed_size;
 		size_t computed_alignment;
 
 		/* Pointer should be as aligned as we asked for. */
-		cache_bin_info_compute_alloc(infos, ninfos, &computed_size,
-		    &computed_alignment);
+		cache_bin_info_compute_alloc(
+		    infos, ninfos, &computed_size, &computed_alignment);
 		assert(((uintptr_t)alloc & (computed_alignment - 1)) == 0);
 	}
 
-	*(uintptr_t *)((byte_t *)alloc + *cur_offset) =
-	    cache_bin_preceding_junk;
+	*(uintptr_t *)((byte_t *)alloc
+	    + *cur_offset) = cache_bin_preceding_junk;
 	*cur_offset += sizeof(void *);
 }
 
 void
 cache_bin_postincrement(void *alloc, size_t *cur_offset) {
-	*(uintptr_t *)((byte_t *)alloc + *cur_offset) =
-	    cache_bin_trailing_junk;
+	*(uintptr_t *)((byte_t *)alloc + *cur_offset) = cache_bin_trailing_junk;
 	*cur_offset += sizeof(void *);
 }
 
@@ -83,8 +81,8 @@ cache_bin_init(cache_bin_t *bin, const cache_bin_info_t *info, void *alloc,
 	 * will access the slots toward higher addresses (for the benefit of
 	 * adjacent prefetch).
 	 */
-	void *stack_cur = (void *)((byte_t *)alloc + *cur_offset);
-	void *full_position = stack_cur;
+	void          *stack_cur = (void *)((byte_t *)alloc + *cur_offset);
+	void          *full_position = stack_cur;
 	cache_bin_sz_t bin_stack_size = info->ncached_max * sizeof(void *);
 
 	*cur_offset += bin_stack_size;
@@ -96,8 +94,8 @@ cache_bin_init(cache_bin_t *bin, const cache_bin_info_t *info, void *alloc,
 	bin->low_bits_full = (cache_bin_sz_t)(uintptr_t)full_position;
 	bin->low_bits_empty = (cache_bin_sz_t)(uintptr_t)empty_position;
 	cache_bin_info_init(&bin->bin_info, info->ncached_max);
-	cache_bin_sz_t free_spots = cache_bin_diff(bin,
-	    bin->low_bits_full, (cache_bin_sz_t)(uintptr_t)bin->stack_head);
+	cache_bin_sz_t free_spots = cache_bin_diff(bin, bin->low_bits_full,
+	    (cache_bin_sz_t)(uintptr_t)bin->stack_head);
 	assert(free_spots == bin_stack_size);
 	if (!cache_bin_disabled(bin)) {
 		assert(cache_bin_ncached_get_local(bin) == 0);
@@ -109,8 +107,8 @@ cache_bin_init(cache_bin_t *bin, const cache_bin_info_t *info, void *alloc,
 
 void
 cache_bin_init_disabled(cache_bin_t *bin, cache_bin_sz_t ncached_max) {
-	const void *fake_stack = cache_bin_disabled_bin_stack();
-	size_t fake_offset = 0;
+	const void      *fake_stack = cache_bin_disabled_bin_stack();
+	size_t           fake_offset = 0;
 	cache_bin_info_t fake_info;
 	cache_bin_info_init(&fake_info, 0);
 	cache_bin_init(bin, &fake_info, (void *)fake_stack, &fake_offset);
diff --git a/src/ckh.c b/src/ckh.c
index 8db4319c..80688162 100644
--- a/src/ckh.c
+++ b/src/ckh.c
@@ -49,8 +49,8 @@
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
-static bool	ckh_grow(tsd_t *tsd, ckh_t *ckh);
-static void	ckh_shrink(tsd_t *tsd, ckh_t *ckh);
+static bool ckh_grow(tsd_t *tsd, ckh_t *ckh);
+static void ckh_shrink(tsd_t *tsd, ckh_t *ckh);
 
 /******************************************************************************/
 
@@ -60,7 +60,7 @@ static void	ckh_shrink(tsd_t *tsd, ckh_t *ckh);
  */
 static size_t
 ckh_bucket_search(ckh_t *ckh, size_t bucket, const void *key) {
-	ckhc_t *cell;
+	ckhc_t  *cell;
 	unsigned i;
 
 	for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
@@ -98,20 +98,20 @@ ckh_isearch(ckh_t *ckh, const void *key) {
 }
 
 static bool
-ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
-    const void *data) {
-	ckhc_t *cell;
+ckh_try_bucket_insert(
+    ckh_t *ckh, size_t bucket, const void *key, const void *data) {
+	ckhc_t  *cell;
 	unsigned offset, i;
 
 	/*
 	 * Cycle through the cells in the bucket, starting at a random position.
 	 * The randomness avoids worst-case search overhead as buckets fill up.
 	 */
-	offset = (unsigned)prng_lg_range_u64(&ckh->prng_state,
-	    LG_CKH_BUCKET_CELLS);
+	offset = (unsigned)prng_lg_range_u64(
+	    &ckh->prng_state, LG_CKH_BUCKET_CELLS);
 	for (i = 0; i < (ZU(1) << LG_CKH_BUCKET_CELLS); i++) {
-		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) +
-		    ((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))];
+		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS)
+		    + ((i + offset) & ((ZU(1) << LG_CKH_BUCKET_CELLS) - 1))];
 		if (cell->key == NULL) {
 			cell->key = key;
 			cell->data = data;
@@ -130,12 +130,12 @@ ckh_try_bucket_insert(ckh_t *ckh, size_t bucket, const void *key,
  * eviction/relocation bucket cycle.
  */
 static bool
-ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
-    void const **argdata) {
+ckh_evict_reloc_insert(
+    ckh_t *ckh, size_t argbucket, void const **argkey, void const **argdata) {
 	const void *key, *data, *tkey, *tdata;
-	ckhc_t *cell;
-	size_t hashes[2], bucket, tbucket;
-	unsigned i;
+	ckhc_t     *cell;
+	size_t      hashes[2], bucket, tbucket;
+	unsigned    i;
 
 	bucket = argbucket;
 	key = *argkey;
@@ -149,15 +149,18 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
 		 * were an item for which both hashes indicated the same
 		 * bucket.
 		 */
-		i = (unsigned)prng_lg_range_u64(&ckh->prng_state,
-		    LG_CKH_BUCKET_CELLS);
+		i = (unsigned)prng_lg_range_u64(
+		    &ckh->prng_state, LG_CKH_BUCKET_CELLS);
 		cell = &ckh->tab[(bucket << LG_CKH_BUCKET_CELLS) + i];
 		assert(cell->key != NULL);
 
 		/* Swap cell->{key,data} and {key,data} (evict). */
-		tkey = cell->key; tdata = cell->data;
-		cell->key = key; cell->data = data;
-		key = tkey; data = tdata;
+		tkey = cell->key;
+		tdata = cell->data;
+		cell->key = key;
+		cell->data = data;
+		key = tkey;
+		data = tdata;
 
 #ifdef CKH_COUNT
 		ckh->nrelocs++;
@@ -167,8 +170,8 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
 		ckh->hash(key, hashes);
 		tbucket = hashes[1] & ((ZU(1) << ckh->lg_curbuckets) - 1);
 		if (tbucket == bucket) {
-			tbucket = hashes[0] & ((ZU(1) << ckh->lg_curbuckets)
-			    - 1);
+			tbucket = hashes[0]
+			    & ((ZU(1) << ckh->lg_curbuckets) - 1);
 			/*
 			 * It may be that (tbucket == bucket) still, if the
 			 * item's hashes both indicate this bucket.  However,
@@ -201,8 +204,8 @@ ckh_evict_reloc_insert(ckh_t *ckh, size_t argbucket, void const **argkey,
 }
 
 static bool
-ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata) {
-	size_t hashes[2], bucket;
+ckh_try_insert(ckh_t *ckh, void const **argkey, void const **argdata) {
+	size_t      hashes[2], bucket;
 	const void *key = *argkey;
 	const void *data = *argdata;
 
@@ -232,7 +235,7 @@ ckh_try_insert(ckh_t *ckh, void const**argkey, void const**argdata) {
  */
 static bool
 ckh_rebuild(ckh_t *ckh, ckhc_t *aTab) {
-	size_t count, i, nins;
+	size_t      count, i, nins;
 	const void *key, *data;
 
 	count = ckh->count;
@@ -254,8 +257,8 @@ ckh_rebuild(ckh_t *ckh, ckhc_t *aTab) {
 
 static bool
 ckh_grow(tsd_t *tsd, ckh_t *ckh) {
-	bool ret;
-	ckhc_t *tab, *ttab;
+	bool     ret;
+	ckhc_t  *tab, *ttab;
 	unsigned lg_prevbuckets, lg_curcells;
 
 #ifdef CKH_COUNT
@@ -274,8 +277,7 @@ ckh_grow(tsd_t *tsd, ckh_t *ckh) {
 
 		lg_curcells++;
 		usize = sz_sa2u(sizeof(ckhc_t) << lg_curcells, CACHELINE);
-		if (unlikely(usize == 0
-		    || usize > SC_LARGE_MAXCLASS)) {
+		if (unlikely(usize == 0 || usize > SC_LARGE_MAXCLASS)) {
 			ret = true;
 			goto label_return;
 		}
@@ -309,8 +311,8 @@ label_return:
 
 static void
 ckh_shrink(tsd_t *tsd, ckh_t *ckh) {
-	ckhc_t *tab, *ttab;
-	size_t usize;
+	ckhc_t  *tab, *ttab;
+	size_t   usize;
 	unsigned lg_prevbuckets, lg_curcells;
 
 	/*
@@ -358,8 +360,8 @@ ckh_shrink(tsd_t *tsd, ckh_t *ckh) {
 bool
 ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *ckh_hash,
     ckh_keycomp_t *keycomp) {
-	bool ret;
-	size_t mincells, usize;
+	bool     ret;
+	size_t   mincells, usize;
 	unsigned lg_mincells;
 
 	assert(minitems > 0);
@@ -386,8 +388,7 @@ ckh_new(tsd_t *tsd, ckh_t *ckh, size_t minitems, ckh_hash_t *ckh_hash,
 	assert(LG_CKH_BUCKET_CELLS > 0);
 	mincells = ((minitems + (3 - (minitems % 3))) / 3) << 2;
 	for (lg_mincells = LG_CKH_BUCKET_CELLS;
-	    (ZU(1) << lg_mincells) < mincells;
-	    lg_mincells++) {
+	     (ZU(1) << lg_mincells) < mincells; lg_mincells++) {
 		/* Do nothing. */
 	}
 	ckh->lg_minbuckets = lg_mincells - LG_CKH_BUCKET_CELLS;
@@ -417,11 +418,12 @@ ckh_delete(tsd_t *tsd, ckh_t *ckh) {
 	assert(ckh != NULL);
 
 #ifdef CKH_VERBOSE
-	malloc_printf(
-	    "%s(%p): ngrows: %"FMTu64", nshrinks: %"FMTu64","
-	    " nshrinkfails: %"FMTu64", ninserts: %"FMTu64","
-	    " nrelocs: %"FMTu64"\n", __func__, ckh,
-	    (unsigned long long)ckh->ngrows,
+	malloc_printf("%s(%p): ngrows: %" FMTu64 ", nshrinks: %" FMTu64
+	              ","
+	              " nshrinkfails: %" FMTu64 ", ninserts: %" FMTu64
+	              ","
+	              " nrelocs: %" FMTu64 "\n",
+	    __func__, ckh, (unsigned long long)ckh->ngrows,
 	    (unsigned long long)ckh->nshrinks,
 	    (unsigned long long)ckh->nshrinkfails,
 	    (unsigned long long)ckh->ninserts,
@@ -445,8 +447,9 @@ bool
 ckh_iter(ckh_t *ckh, size_t *tabind, void **key, void **data) {
 	size_t i, ncells;
 
-	for (i = *tabind, ncells = (ZU(1) << (ckh->lg_curbuckets +
-	    LG_CKH_BUCKET_CELLS)); i < ncells; i++) {
+	for (i = *tabind,
+	    ncells = (ZU(1) << (ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS));
+	     i < ncells; i++) {
 		if (ckh->tab[i].key != NULL) {
 			if (key != NULL) {
 				*key = (void *)ckh->tab[i].key;
@@ -486,8 +489,8 @@ label_return:
 }
 
 bool
-ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
-    void **data) {
+ckh_remove(
+    tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key, void **data) {
 	size_t cell;
 
 	assert(ckh != NULL);
@@ -505,9 +508,9 @@ ckh_remove(tsd_t *tsd, ckh_t *ckh, const void *searchkey, void **key,
 
 		ckh->count--;
 		/* Try to halve the table if it is less than 1/4 full. */
-		if (ckh->count < (ZU(1) << (ckh->lg_curbuckets
-		    + LG_CKH_BUCKET_CELLS - 2)) && ckh->lg_curbuckets
-		    > ckh->lg_minbuckets) {
+		if (ckh->count < (ZU(1)
+		        << (ckh->lg_curbuckets + LG_CKH_BUCKET_CELLS - 2))
+		    && ckh->lg_curbuckets > ckh->lg_minbuckets) {
 			/* Ignore error due to OOM. */
 			ckh_shrink(tsd, ckh);
 		}
@@ -554,8 +557,8 @@ ckh_string_keycomp(const void *k1, const void *k2) {
 void
 ckh_pointer_hash(const void *key, size_t r_hash[2]) {
 	union {
-		const void	*v;
-		size_t		i;
+		const void *v;
+		size_t      i;
 	} u;
 
 	assert(sizeof(u.v) == sizeof(u.i));
diff --git a/src/counter.c b/src/counter.c
index 8f1ae3af..8257a062 100644
--- a/src/counter.c
+++ b/src/counter.c
@@ -6,7 +6,7 @@
 bool
 counter_accum_init(counter_accum_t *counter, uint64_t interval) {
 	if (LOCKEDINT_MTX_INIT(counter->mtx, "counter_accum",
-	    WITNESS_RANK_COUNTER_ACCUM, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_COUNTER_ACCUM, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	locked_init_u64_unsynchronized(&counter->accumbytes, 0);
diff --git a/src/ctl.c b/src/ctl.c
index 4f06363a..9e9a4b43 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -25,10 +25,10 @@
  * ctl_mtx protects the following:
  * - ctl_stats->*
  */
-static malloc_mutex_t	ctl_mtx;
-static bool		ctl_initialized;
-static ctl_stats_t	*ctl_stats;
-static ctl_arenas_t	*ctl_arenas;
+static malloc_mutex_t ctl_mtx;
+static bool           ctl_initialized;
+static ctl_stats_t   *ctl_stats;
+static ctl_arenas_t  *ctl_arenas;
 
 /******************************************************************************/
 /* Helpers for named and indexed nodes. */
@@ -53,13 +53,13 @@ ctl_indexed_node(const ctl_node_t *node) {
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
-#define CTL_PROTO(n)							\
-static int	n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,	\
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen);
+#define CTL_PROTO(n)                                                           \
+	static int n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,       \
+	    void *oldp, size_t *oldlenp, void *newp, size_t newlen);
 
-#define INDEX_PROTO(n)							\
-static const ctl_named_node_t	*n##_index(tsdn_t *tsdn,		\
-    const size_t *mib, size_t miblen, size_t i);
+#define INDEX_PROTO(n)                                                         \
+	static const ctl_named_node_t *n##_index(                              \
+	    tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i);
 
 CTL_PROTO(version)
 CTL_PROTO(epoch)
@@ -374,14 +374,14 @@ CTL_PROTO(experimental_prof_recent_alloc_dump)
 CTL_PROTO(experimental_batch_alloc)
 CTL_PROTO(experimental_arenas_create_ext)
 
-#define MUTEX_STATS_CTL_PROTO_GEN(n)					\
-CTL_PROTO(stats_##n##_num_ops)						\
-CTL_PROTO(stats_##n##_num_wait)						\
-CTL_PROTO(stats_##n##_num_spin_acq)					\
-CTL_PROTO(stats_##n##_num_owner_switch)					\
-CTL_PROTO(stats_##n##_total_wait_time)					\
-CTL_PROTO(stats_##n##_max_wait_time)					\
-CTL_PROTO(stats_##n##_max_num_thds)
+#define MUTEX_STATS_CTL_PROTO_GEN(n)                                           \
+	CTL_PROTO(stats_##n##_num_ops)                                         \
+	CTL_PROTO(stats_##n##_num_wait)                                        \
+	CTL_PROTO(stats_##n##_num_spin_acq)                                    \
+	CTL_PROTO(stats_##n##_num_owner_switch)                                \
+	CTL_PROTO(stats_##n##_total_wait_time)                                 \
+	CTL_PROTO(stats_##n##_max_wait_time)                                   \
+	CTL_PROTO(stats_##n##_max_num_thds)
 
 /* Global mutexes. */
 #define OP(mtx) MUTEX_STATS_CTL_PROTO_GEN(mutexes_##mtx)
@@ -402,542 +402,448 @@ CTL_PROTO(stats_mutexes_reset)
 /******************************************************************************/
 /* mallctl tree. */
 
-#define NAME(n)	{true},	n
-#define CHILD(t, c)							\
-	sizeof(c##_node) / sizeof(ctl_##t##_node_t),			\
-	(ctl_node_t *)c##_node,						\
-	NULL
-#define CTL(c)	0, NULL, c##_ctl
+#define NAME(n) {true}, n
+#define CHILD(t, c)                                                            \
+	sizeof(c##_node) / sizeof(ctl_##t##_node_t), (ctl_node_t *)c##_node,   \
+	    NULL
+#define CTL(c) 0, NULL, c##_ctl
 
 /*
  * Only handles internal indexed nodes, since there are currently no external
  * ones.
  */
-#define INDEX(i)	{false},	i##_index
+#define INDEX(i) {false}, i##_index
 
-static const ctl_named_node_t	thread_tcache_ncached_max_node[] = {
-	{NAME("read_sizeclass"),
-	    CTL(thread_tcache_ncached_max_read_sizeclass)},
-	{NAME("write"),		CTL(thread_tcache_ncached_max_write)}
+static const ctl_named_node_t thread_tcache_ncached_max_node[] = {
+    {NAME("read_sizeclass"), CTL(thread_tcache_ncached_max_read_sizeclass)},
+    {NAME("write"), CTL(thread_tcache_ncached_max_write)}};
+
+static const ctl_named_node_t thread_tcache_node[] = {
+    {NAME("enabled"), CTL(thread_tcache_enabled)},
+    {NAME("max"), CTL(thread_tcache_max)},
+    {NAME("flush"), CTL(thread_tcache_flush)},
+    {NAME("ncached_max"), CHILD(named, thread_tcache_ncached_max)}};
+
+static const ctl_named_node_t thread_peak_node[] = {
+    {NAME("read"), CTL(thread_peak_read)},
+    {NAME("reset"), CTL(thread_peak_reset)},
 };
 
-static const ctl_named_node_t	thread_tcache_node[] = {
-	{NAME("enabled"),	CTL(thread_tcache_enabled)},
-	{NAME("max"),		CTL(thread_tcache_max)},
-	{NAME("flush"),		CTL(thread_tcache_flush)},
-	{NAME("ncached_max"),	CHILD(named, thread_tcache_ncached_max)}
-};
+static const ctl_named_node_t thread_prof_node[] = {
+    {NAME("name"), CTL(thread_prof_name)},
+    {NAME("active"), CTL(thread_prof_active)}};
 
-static const ctl_named_node_t	thread_peak_node[] = {
-	{NAME("read"),		CTL(thread_peak_read)},
-	{NAME("reset"),		CTL(thread_peak_reset)},
-};
+static const ctl_named_node_t thread_node[] = {
+    {NAME("arena"), CTL(thread_arena)},
+    {NAME("allocated"), CTL(thread_allocated)},
+    {NAME("allocatedp"), CTL(thread_allocatedp)},
+    {NAME("deallocated"), CTL(thread_deallocated)},
+    {NAME("deallocatedp"), CTL(thread_deallocatedp)},
+    {NAME("tcache"), CHILD(named, thread_tcache)},
+    {NAME("peak"), CHILD(named, thread_peak)},
+    {NAME("prof"), CHILD(named, thread_prof)},
+    {NAME("idle"), CTL(thread_idle)}};
 
-static const ctl_named_node_t	thread_prof_node[] = {
-	{NAME("name"),		CTL(thread_prof_name)},
-	{NAME("active"),	CTL(thread_prof_active)}
-};
-
-static const ctl_named_node_t	thread_node[] = {
-	{NAME("arena"),		CTL(thread_arena)},
-	{NAME("allocated"),	CTL(thread_allocated)},
-	{NAME("allocatedp"),	CTL(thread_allocatedp)},
-	{NAME("deallocated"),	CTL(thread_deallocated)},
-	{NAME("deallocatedp"),	CTL(thread_deallocatedp)},
-	{NAME("tcache"),	CHILD(named, thread_tcache)},
-	{NAME("peak"),		CHILD(named, thread_peak)},
-	{NAME("prof"),		CHILD(named, thread_prof)},
-	{NAME("idle"),		CTL(thread_idle)}
-};
-
-static const ctl_named_node_t	config_node[] = {
-	{NAME("cache_oblivious"), CTL(config_cache_oblivious)},
-	{NAME("debug"),		CTL(config_debug)},
-	{NAME("fill"),		CTL(config_fill)},
-	{NAME("lazy_lock"),	CTL(config_lazy_lock)},
-	{NAME("malloc_conf"),	CTL(config_malloc_conf)},
-	{NAME("opt_safety_checks"),	CTL(config_opt_safety_checks)},
-	{NAME("prof"),		CTL(config_prof)},
-	{NAME("prof_libgcc"),	CTL(config_prof_libgcc)},
-	{NAME("prof_libunwind"), CTL(config_prof_libunwind)},
-	{NAME("prof_frameptr"), CTL(config_prof_frameptr)},
-	{NAME("stats"),		CTL(config_stats)},
-	{NAME("utrace"),	CTL(config_utrace)},
-	{NAME("xmalloc"),	CTL(config_xmalloc)}
-};
+static const ctl_named_node_t config_node[] = {
+    {NAME("cache_oblivious"), CTL(config_cache_oblivious)},
+    {NAME("debug"), CTL(config_debug)}, {NAME("fill"), CTL(config_fill)},
+    {NAME("lazy_lock"), CTL(config_lazy_lock)},
+    {NAME("malloc_conf"), CTL(config_malloc_conf)},
+    {NAME("opt_safety_checks"), CTL(config_opt_safety_checks)},
+    {NAME("prof"), CTL(config_prof)},
+    {NAME("prof_libgcc"), CTL(config_prof_libgcc)},
+    {NAME("prof_libunwind"), CTL(config_prof_libunwind)},
+    {NAME("prof_frameptr"), CTL(config_prof_frameptr)},
+    {NAME("stats"), CTL(config_stats)}, {NAME("utrace"), CTL(config_utrace)},
+    {NAME("xmalloc"), CTL(config_xmalloc)}};
 
 static const ctl_named_node_t opt_malloc_conf_node[] = {
-	{NAME("symlink"),	CTL(opt_malloc_conf_symlink)},
-	{NAME("env_var"),	CTL(opt_malloc_conf_env_var)},
-	{NAME("global_var"),	CTL(opt_malloc_conf_global_var)},
-	{NAME("global_var_2_conf_harder"),
-	    CTL(opt_malloc_conf_global_var_2_conf_harder)}
-};
+    {NAME("symlink"), CTL(opt_malloc_conf_symlink)},
+    {NAME("env_var"), CTL(opt_malloc_conf_env_var)},
+    {NAME("global_var"), CTL(opt_malloc_conf_global_var)},
+    {NAME("global_var_2_conf_harder"),
+        CTL(opt_malloc_conf_global_var_2_conf_harder)}};
 
-static const ctl_named_node_t opt_node[] = {
-	{NAME("abort"),		CTL(opt_abort)},
-	{NAME("abort_conf"),	CTL(opt_abort_conf)},
-	{NAME("cache_oblivious"),	CTL(opt_cache_oblivious)},
-	{NAME("trust_madvise"),	CTL(opt_trust_madvise)},
-	{NAME("confirm_conf"),	CTL(opt_confirm_conf)},
-	{NAME("hpa"),		CTL(opt_hpa)},
-	{NAME("hpa_slab_max_alloc"),	CTL(opt_hpa_slab_max_alloc)},
-	{NAME("hpa_hugification_threshold"),
-		CTL(opt_hpa_hugification_threshold)},
-	{NAME("hpa_hugify_delay_ms"), CTL(opt_hpa_hugify_delay_ms)},
-	{NAME("hpa_hugify_sync"), CTL(opt_hpa_hugify_sync)},
-	{NAME("hpa_min_purge_interval_ms"), CTL(opt_hpa_min_purge_interval_ms)},
-	{NAME("experimental_hpa_max_purge_nhp"),
-		CTL(opt_experimental_hpa_max_purge_nhp)},
-	{NAME("hpa_dirty_mult"), CTL(opt_hpa_dirty_mult)},
-	{NAME("hpa_sec_nshards"),	CTL(opt_hpa_sec_nshards)},
-	{NAME("hpa_sec_max_alloc"),	CTL(opt_hpa_sec_max_alloc)},
-	{NAME("hpa_sec_max_bytes"),	CTL(opt_hpa_sec_max_bytes)},
-	{NAME("hpa_sec_bytes_after_flush"),
-		CTL(opt_hpa_sec_bytes_after_flush)},
-	{NAME("hpa_sec_batch_fill_extra"),
-		CTL(opt_hpa_sec_batch_fill_extra)},
-	{NAME("huge_arena_pac_thp"), CTL(opt_huge_arena_pac_thp)},
-	{NAME("metadata_thp"),	CTL(opt_metadata_thp)},
-	{NAME("retain"),	CTL(opt_retain)},
-	{NAME("dss"),		CTL(opt_dss)},
-	{NAME("narenas"),	CTL(opt_narenas)},
-	{NAME("percpu_arena"),	CTL(opt_percpu_arena)},
-	{NAME("oversize_threshold"),	CTL(opt_oversize_threshold)},
-	{NAME("mutex_max_spin"),	CTL(opt_mutex_max_spin)},
-	{NAME("background_thread"),	CTL(opt_background_thread)},
-	{NAME("max_background_threads"),	CTL(opt_max_background_threads)},
-	{NAME("dirty_decay_ms"), CTL(opt_dirty_decay_ms)},
-	{NAME("muzzy_decay_ms"), CTL(opt_muzzy_decay_ms)},
-	{NAME("stats_print"),	CTL(opt_stats_print)},
-	{NAME("stats_print_opts"),	CTL(opt_stats_print_opts)},
-	{NAME("stats_interval"),	CTL(opt_stats_interval)},
-	{NAME("stats_interval_opts"),	CTL(opt_stats_interval_opts)},
-	{NAME("junk"),		CTL(opt_junk)},
-	{NAME("zero"),		CTL(opt_zero)},
-	{NAME("utrace"),	CTL(opt_utrace)},
-	{NAME("xmalloc"),	CTL(opt_xmalloc)},
-	{NAME("experimental_infallible_new"),
-		CTL(opt_experimental_infallible_new)},
-	{NAME("experimental_tcache_gc"),
-		CTL(opt_experimental_tcache_gc)},
-	{NAME("max_batched_size"),	CTL(opt_max_batched_size)},
-	{NAME("remote_free_max"),	CTL(opt_remote_free_max)},
-	{NAME("remote_free_max_batch"),	CTL(opt_remote_free_max_batch)},
-	{NAME("tcache"),	CTL(opt_tcache)},
-	{NAME("tcache_max"),	CTL(opt_tcache_max)},
-	{NAME("tcache_nslots_small_min"),
-		CTL(opt_tcache_nslots_small_min)},
-	{NAME("tcache_nslots_small_max"),
-		CTL(opt_tcache_nslots_small_max)},
-	{NAME("tcache_nslots_large"),	CTL(opt_tcache_nslots_large)},
-	{NAME("lg_tcache_nslots_mul"),	CTL(opt_lg_tcache_nslots_mul)},
-	{NAME("tcache_gc_incr_bytes"),	CTL(opt_tcache_gc_incr_bytes)},
-	{NAME("tcache_gc_delay_bytes"),	CTL(opt_tcache_gc_delay_bytes)},
-	{NAME("lg_tcache_flush_small_div"),
-		CTL(opt_lg_tcache_flush_small_div)},
-	{NAME("lg_tcache_flush_large_div"),
-		CTL(opt_lg_tcache_flush_large_div)},
-	{NAME("thp"),		CTL(opt_thp)},
-	{NAME("lg_extent_max_active_fit"), CTL(opt_lg_extent_max_active_fit)},
-	{NAME("prof"),		CTL(opt_prof)},
-	{NAME("prof_prefix"),	CTL(opt_prof_prefix)},
-	{NAME("prof_active"),	CTL(opt_prof_active)},
-	{NAME("prof_thread_active_init"), CTL(opt_prof_thread_active_init)},
-	{NAME("prof_bt_max"), CTL(opt_prof_bt_max)},
-	{NAME("lg_prof_sample"), CTL(opt_lg_prof_sample)},
-	{NAME("experimental_lg_prof_threshold"), CTL(opt_experimental_lg_prof_threshold)},
-	{NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)},
-	{NAME("prof_gdump"),	CTL(opt_prof_gdump)},
-	{NAME("prof_final"),	CTL(opt_prof_final)},
-	{NAME("prof_leak"),	CTL(opt_prof_leak)},
-	{NAME("prof_leak_error"),	CTL(opt_prof_leak_error)},
-	{NAME("prof_accum"),	CTL(opt_prof_accum)},
-	{NAME("prof_pid_namespace"),	CTL(opt_prof_pid_namespace)},
-	{NAME("prof_recent_alloc_max"),	CTL(opt_prof_recent_alloc_max)},
-	{NAME("prof_stats"),	CTL(opt_prof_stats)},
-	{NAME("prof_sys_thread_name"),	CTL(opt_prof_sys_thread_name)},
-	{NAME("prof_time_resolution"),	CTL(opt_prof_time_res)},
-	{NAME("lg_san_uaf_align"),	CTL(opt_lg_san_uaf_align)},
-	{NAME("zero_realloc"),	CTL(opt_zero_realloc)},
-	{NAME("debug_double_free_max_scan"),
-		CTL(opt_debug_double_free_max_scan)},
-	{NAME("disable_large_size_classes"),	CTL(opt_disable_large_size_classes)},
-	{NAME("process_madvise_max_batch"), CTL(opt_process_madvise_max_batch)},
-	{NAME("malloc_conf"),	CHILD(named, opt_malloc_conf)}
-};
+static const ctl_named_node_t opt_node[] = {{NAME("abort"), CTL(opt_abort)},
+    {NAME("abort_conf"), CTL(opt_abort_conf)},
+    {NAME("cache_oblivious"), CTL(opt_cache_oblivious)},
+    {NAME("trust_madvise"), CTL(opt_trust_madvise)},
+    {NAME("confirm_conf"), CTL(opt_confirm_conf)}, {NAME("hpa"), CTL(opt_hpa)},
+    {NAME("hpa_slab_max_alloc"), CTL(opt_hpa_slab_max_alloc)},
+    {NAME("hpa_hugification_threshold"), CTL(opt_hpa_hugification_threshold)},
+    {NAME("hpa_hugify_delay_ms"), CTL(opt_hpa_hugify_delay_ms)},
+    {NAME("hpa_hugify_sync"), CTL(opt_hpa_hugify_sync)},
+    {NAME("hpa_min_purge_interval_ms"), CTL(opt_hpa_min_purge_interval_ms)},
+    {NAME("experimental_hpa_max_purge_nhp"),
+        CTL(opt_experimental_hpa_max_purge_nhp)},
+    {NAME("hpa_dirty_mult"), CTL(opt_hpa_dirty_mult)},
+    {NAME("hpa_sec_nshards"), CTL(opt_hpa_sec_nshards)},
+    {NAME("hpa_sec_max_alloc"), CTL(opt_hpa_sec_max_alloc)},
+    {NAME("hpa_sec_max_bytes"), CTL(opt_hpa_sec_max_bytes)},
+    {NAME("hpa_sec_bytes_after_flush"), CTL(opt_hpa_sec_bytes_after_flush)},
+    {NAME("hpa_sec_batch_fill_extra"), CTL(opt_hpa_sec_batch_fill_extra)},
+    {NAME("huge_arena_pac_thp"), CTL(opt_huge_arena_pac_thp)},
+    {NAME("metadata_thp"), CTL(opt_metadata_thp)},
+    {NAME("retain"), CTL(opt_retain)}, {NAME("dss"), CTL(opt_dss)},
+    {NAME("narenas"), CTL(opt_narenas)},
+    {NAME("percpu_arena"), CTL(opt_percpu_arena)},
+    {NAME("oversize_threshold"), CTL(opt_oversize_threshold)},
+    {NAME("mutex_max_spin"), CTL(opt_mutex_max_spin)},
+    {NAME("background_thread"), CTL(opt_background_thread)},
+    {NAME("max_background_threads"), CTL(opt_max_background_threads)},
+    {NAME("dirty_decay_ms"), CTL(opt_dirty_decay_ms)},
+    {NAME("muzzy_decay_ms"), CTL(opt_muzzy_decay_ms)},
+    {NAME("stats_print"), CTL(opt_stats_print)},
+    {NAME("stats_print_opts"), CTL(opt_stats_print_opts)},
+    {NAME("stats_interval"), CTL(opt_stats_interval)},
+    {NAME("stats_interval_opts"), CTL(opt_stats_interval_opts)},
+    {NAME("junk"), CTL(opt_junk)}, {NAME("zero"), CTL(opt_zero)},
+    {NAME("utrace"), CTL(opt_utrace)}, {NAME("xmalloc"), CTL(opt_xmalloc)},
+    {NAME("experimental_infallible_new"), CTL(opt_experimental_infallible_new)},
+    {NAME("experimental_tcache_gc"), CTL(opt_experimental_tcache_gc)},
+    {NAME("max_batched_size"), CTL(opt_max_batched_size)},
+    {NAME("remote_free_max"), CTL(opt_remote_free_max)},
+    {NAME("remote_free_max_batch"), CTL(opt_remote_free_max_batch)},
+    {NAME("tcache"), CTL(opt_tcache)},
+    {NAME("tcache_max"), CTL(opt_tcache_max)},
+    {NAME("tcache_nslots_small_min"), CTL(opt_tcache_nslots_small_min)},
+    {NAME("tcache_nslots_small_max"), CTL(opt_tcache_nslots_small_max)},
+    {NAME("tcache_nslots_large"), CTL(opt_tcache_nslots_large)},
+    {NAME("lg_tcache_nslots_mul"), CTL(opt_lg_tcache_nslots_mul)},
+    {NAME("tcache_gc_incr_bytes"), CTL(opt_tcache_gc_incr_bytes)},
+    {NAME("tcache_gc_delay_bytes"), CTL(opt_tcache_gc_delay_bytes)},
+    {NAME("lg_tcache_flush_small_div"), CTL(opt_lg_tcache_flush_small_div)},
+    {NAME("lg_tcache_flush_large_div"), CTL(opt_lg_tcache_flush_large_div)},
+    {NAME("thp"), CTL(opt_thp)},
+    {NAME("lg_extent_max_active_fit"), CTL(opt_lg_extent_max_active_fit)},
+    {NAME("prof"), CTL(opt_prof)}, {NAME("prof_prefix"), CTL(opt_prof_prefix)},
+    {NAME("prof_active"), CTL(opt_prof_active)},
+    {NAME("prof_thread_active_init"), CTL(opt_prof_thread_active_init)},
+    {NAME("prof_bt_max"), CTL(opt_prof_bt_max)},
+    {NAME("lg_prof_sample"), CTL(opt_lg_prof_sample)},
+    {NAME("experimental_lg_prof_threshold"),
+        CTL(opt_experimental_lg_prof_threshold)},
+    {NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)},
+    {NAME("prof_gdump"), CTL(opt_prof_gdump)},
+    {NAME("prof_final"), CTL(opt_prof_final)},
+    {NAME("prof_leak"), CTL(opt_prof_leak)},
+    {NAME("prof_leak_error"), CTL(opt_prof_leak_error)},
+    {NAME("prof_accum"), CTL(opt_prof_accum)},
+    {NAME("prof_pid_namespace"), CTL(opt_prof_pid_namespace)},
+    {NAME("prof_recent_alloc_max"), CTL(opt_prof_recent_alloc_max)},
+    {NAME("prof_stats"), CTL(opt_prof_stats)},
+    {NAME("prof_sys_thread_name"), CTL(opt_prof_sys_thread_name)},
+    {NAME("prof_time_resolution"), CTL(opt_prof_time_res)},
+    {NAME("lg_san_uaf_align"), CTL(opt_lg_san_uaf_align)},
+    {NAME("zero_realloc"), CTL(opt_zero_realloc)},
+    {NAME("debug_double_free_max_scan"), CTL(opt_debug_double_free_max_scan)},
+    {NAME("disable_large_size_classes"), CTL(opt_disable_large_size_classes)},
+    {NAME("process_madvise_max_batch"), CTL(opt_process_madvise_max_batch)},
+    {NAME("malloc_conf"), CHILD(named, opt_malloc_conf)}};
 
-static const ctl_named_node_t	tcache_node[] = {
-	{NAME("create"),	CTL(tcache_create)},
-	{NAME("flush"),		CTL(tcache_flush)},
-	{NAME("destroy"),	CTL(tcache_destroy)}
-};
+static const ctl_named_node_t tcache_node[] = {
+    {NAME("create"), CTL(tcache_create)}, {NAME("flush"), CTL(tcache_flush)},
+    {NAME("destroy"), CTL(tcache_destroy)}};
 
 static const ctl_named_node_t arena_i_node[] = {
-	{NAME("initialized"),	CTL(arena_i_initialized)},
-	{NAME("decay"),		CTL(arena_i_decay)},
-	{NAME("purge"),		CTL(arena_i_purge)},
-	{NAME("reset"),		CTL(arena_i_reset)},
-	{NAME("destroy"),	CTL(arena_i_destroy)},
-	{NAME("dss"),		CTL(arena_i_dss)},
-	/*
+    {NAME("initialized"), CTL(arena_i_initialized)},
+    {NAME("decay"), CTL(arena_i_decay)}, {NAME("purge"), CTL(arena_i_purge)},
+    {NAME("reset"), CTL(arena_i_reset)},
+    {NAME("destroy"), CTL(arena_i_destroy)}, {NAME("dss"), CTL(arena_i_dss)},
+    /*
 	 * Undocumented for now, since we anticipate an arena API in flux after
 	 * we cut the last 5-series release.
 	 */
-	{NAME("oversize_threshold"),	CTL(arena_i_oversize_threshold)},
-	{NAME("dirty_decay_ms"),	CTL(arena_i_dirty_decay_ms)},
-	{NAME("muzzy_decay_ms"),	CTL(arena_i_muzzy_decay_ms)},
-	{NAME("extent_hooks"),		CTL(arena_i_extent_hooks)},
-	{NAME("retain_grow_limit"),	CTL(arena_i_retain_grow_limit)},
-	{NAME("name"),			CTL(arena_i_name)}
-};
+    {NAME("oversize_threshold"), CTL(arena_i_oversize_threshold)},
+    {NAME("dirty_decay_ms"), CTL(arena_i_dirty_decay_ms)},
+    {NAME("muzzy_decay_ms"), CTL(arena_i_muzzy_decay_ms)},
+    {NAME("extent_hooks"), CTL(arena_i_extent_hooks)},
+    {NAME("retain_grow_limit"), CTL(arena_i_retain_grow_limit)},
+    {NAME("name"), CTL(arena_i_name)}};
 static const ctl_named_node_t super_arena_i_node[] = {
-	{NAME(""),		CHILD(named, arena_i)}
-};
+    {NAME(""), CHILD(named, arena_i)}};
 
-static const ctl_indexed_node_t arena_node[] = {
-	{INDEX(arena_i)}
-};
+static const ctl_indexed_node_t arena_node[] = {{INDEX(arena_i)}};
 
 static const ctl_named_node_t arenas_bin_i_node[] = {
-	{NAME("size"),		CTL(arenas_bin_i_size)},
-	{NAME("nregs"),		CTL(arenas_bin_i_nregs)},
-	{NAME("slab_size"),	CTL(arenas_bin_i_slab_size)},
-	{NAME("nshards"),	CTL(arenas_bin_i_nshards)}
-};
+    {NAME("size"), CTL(arenas_bin_i_size)},
+    {NAME("nregs"), CTL(arenas_bin_i_nregs)},
+    {NAME("slab_size"), CTL(arenas_bin_i_slab_size)},
+    {NAME("nshards"), CTL(arenas_bin_i_nshards)}};
 static const ctl_named_node_t super_arenas_bin_i_node[] = {
-	{NAME(""),		CHILD(named, arenas_bin_i)}
-};
+    {NAME(""), CHILD(named, arenas_bin_i)}};
 
-static const ctl_indexed_node_t arenas_bin_node[] = {
-	{INDEX(arenas_bin_i)}
-};
+static const ctl_indexed_node_t arenas_bin_node[] = {{INDEX(arenas_bin_i)}};
 
 static const ctl_named_node_t arenas_lextent_i_node[] = {
-	{NAME("size"),		CTL(arenas_lextent_i_size)}
-};
+    {NAME("size"), CTL(arenas_lextent_i_size)}};
 static const ctl_named_node_t super_arenas_lextent_i_node[] = {
-	{NAME(""),		CHILD(named, arenas_lextent_i)}
-};
+    {NAME(""), CHILD(named, arenas_lextent_i)}};
 
 static const ctl_indexed_node_t arenas_lextent_node[] = {
-	{INDEX(arenas_lextent_i)}
-};
+    {INDEX(arenas_lextent_i)}};
 
 static const ctl_named_node_t arenas_node[] = {
-	{NAME("narenas"),	CTL(arenas_narenas)},
-	{NAME("dirty_decay_ms"), CTL(arenas_dirty_decay_ms)},
-	{NAME("muzzy_decay_ms"), CTL(arenas_muzzy_decay_ms)},
-	{NAME("quantum"),	CTL(arenas_quantum)},
-	{NAME("page"),		CTL(arenas_page)},
-	{NAME("hugepage"),	CTL(arenas_hugepage)},
-	{NAME("tcache_max"),	CTL(arenas_tcache_max)},
-	{NAME("nbins"),		CTL(arenas_nbins)},
-	{NAME("nhbins"),	CTL(arenas_nhbins)},
-	{NAME("bin"),		CHILD(indexed, arenas_bin)},
-	{NAME("nlextents"),	CTL(arenas_nlextents)},
-	{NAME("lextent"),	CHILD(indexed, arenas_lextent)},
-	{NAME("create"),	CTL(arenas_create)},
-	{NAME("lookup"),	CTL(arenas_lookup)}
-};
+    {NAME("narenas"), CTL(arenas_narenas)},
+    {NAME("dirty_decay_ms"), CTL(arenas_dirty_decay_ms)},
+    {NAME("muzzy_decay_ms"), CTL(arenas_muzzy_decay_ms)},
+    {NAME("quantum"), CTL(arenas_quantum)}, {NAME("page"), CTL(arenas_page)},
+    {NAME("hugepage"), CTL(arenas_hugepage)},
+    {NAME("tcache_max"), CTL(arenas_tcache_max)},
+    {NAME("nbins"), CTL(arenas_nbins)}, {NAME("nhbins"), CTL(arenas_nhbins)},
+    {NAME("bin"), CHILD(indexed, arenas_bin)},
+    {NAME("nlextents"), CTL(arenas_nlextents)},
+    {NAME("lextent"), CHILD(indexed, arenas_lextent)},
+    {NAME("create"), CTL(arenas_create)}, {NAME("lookup"), CTL(arenas_lookup)}};
 
 static const ctl_named_node_t prof_stats_bins_i_node[] = {
-	{NAME("live"),		CTL(prof_stats_bins_i_live)},
-	{NAME("accum"),		CTL(prof_stats_bins_i_accum)}
-};
+    {NAME("live"), CTL(prof_stats_bins_i_live)},
+    {NAME("accum"), CTL(prof_stats_bins_i_accum)}};
 
 static const ctl_named_node_t super_prof_stats_bins_i_node[] = {
-	{NAME(""),		CHILD(named, prof_stats_bins_i)}
-};
+    {NAME(""), CHILD(named, prof_stats_bins_i)}};
 
 static const ctl_indexed_node_t prof_stats_bins_node[] = {
-	{INDEX(prof_stats_bins_i)}
-};
+    {INDEX(prof_stats_bins_i)}};
 
 static const ctl_named_node_t prof_stats_lextents_i_node[] = {
-	{NAME("live"),		CTL(prof_stats_lextents_i_live)},
-	{NAME("accum"),		CTL(prof_stats_lextents_i_accum)}
-};
+    {NAME("live"), CTL(prof_stats_lextents_i_live)},
+    {NAME("accum"), CTL(prof_stats_lextents_i_accum)}};
 
 static const ctl_named_node_t super_prof_stats_lextents_i_node[] = {
-	{NAME(""),		CHILD(named, prof_stats_lextents_i)}
-};
+    {NAME(""), CHILD(named, prof_stats_lextents_i)}};
 
 static const ctl_indexed_node_t prof_stats_lextents_node[] = {
-	{INDEX(prof_stats_lextents_i)}
+    {INDEX(prof_stats_lextents_i)}};
+
+static const ctl_named_node_t prof_stats_node[] = {
+    {NAME("bins"), CHILD(indexed, prof_stats_bins)},
+    {NAME("lextents"), CHILD(indexed, prof_stats_lextents)},
 };
 
-static const ctl_named_node_t	prof_stats_node[] = {
-	{NAME("bins"),		CHILD(indexed, prof_stats_bins)},
-	{NAME("lextents"),	CHILD(indexed, prof_stats_lextents)},
-};
-
-static const ctl_named_node_t	prof_node[] = {
-	{NAME("thread_active_init"), CTL(prof_thread_active_init)},
-	{NAME("active"),	CTL(prof_active)},
-	{NAME("dump"),		CTL(prof_dump)},
-	{NAME("gdump"),		CTL(prof_gdump)},
-	{NAME("prefix"),	CTL(prof_prefix)},
-	{NAME("reset"),		CTL(prof_reset)},
-	{NAME("interval"),	CTL(prof_interval)},
-	{NAME("lg_sample"),	CTL(lg_prof_sample)},
-	{NAME("log_start"),	CTL(prof_log_start)},
-	{NAME("log_stop"),	CTL(prof_log_stop)},
-	{NAME("stats"),		CHILD(named, prof_stats)}
-};
+static const ctl_named_node_t prof_node[] = {
+    {NAME("thread_active_init"), CTL(prof_thread_active_init)},
+    {NAME("active"), CTL(prof_active)}, {NAME("dump"), CTL(prof_dump)},
+    {NAME("gdump"), CTL(prof_gdump)}, {NAME("prefix"), CTL(prof_prefix)},
+    {NAME("reset"), CTL(prof_reset)}, {NAME("interval"), CTL(prof_interval)},
+    {NAME("lg_sample"), CTL(lg_prof_sample)},
+    {NAME("log_start"), CTL(prof_log_start)},
+    {NAME("log_stop"), CTL(prof_log_stop)},
+    {NAME("stats"), CHILD(named, prof_stats)}};
 
 static const ctl_named_node_t stats_arenas_i_small_node[] = {
-	{NAME("allocated"),	CTL(stats_arenas_i_small_allocated)},
-	{NAME("nmalloc"),	CTL(stats_arenas_i_small_nmalloc)},
-	{NAME("ndalloc"),	CTL(stats_arenas_i_small_ndalloc)},
-	{NAME("nrequests"),	CTL(stats_arenas_i_small_nrequests)},
-	{NAME("nfills"),	CTL(stats_arenas_i_small_nfills)},
-	{NAME("nflushes"),	CTL(stats_arenas_i_small_nflushes)}
-};
+    {NAME("allocated"), CTL(stats_arenas_i_small_allocated)},
+    {NAME("nmalloc"), CTL(stats_arenas_i_small_nmalloc)},
+    {NAME("ndalloc"), CTL(stats_arenas_i_small_ndalloc)},
+    {NAME("nrequests"), CTL(stats_arenas_i_small_nrequests)},
+    {NAME("nfills"), CTL(stats_arenas_i_small_nfills)},
+    {NAME("nflushes"), CTL(stats_arenas_i_small_nflushes)}};
 
 static const ctl_named_node_t stats_arenas_i_large_node[] = {
-	{NAME("allocated"),	CTL(stats_arenas_i_large_allocated)},
-	{NAME("nmalloc"),	CTL(stats_arenas_i_large_nmalloc)},
-	{NAME("ndalloc"),	CTL(stats_arenas_i_large_ndalloc)},
-	{NAME("nrequests"),	CTL(stats_arenas_i_large_nrequests)},
-	{NAME("nfills"),	CTL(stats_arenas_i_large_nfills)},
-	{NAME("nflushes"),	CTL(stats_arenas_i_large_nflushes)}
-};
+    {NAME("allocated"), CTL(stats_arenas_i_large_allocated)},
+    {NAME("nmalloc"), CTL(stats_arenas_i_large_nmalloc)},
+    {NAME("ndalloc"), CTL(stats_arenas_i_large_ndalloc)},
+    {NAME("nrequests"), CTL(stats_arenas_i_large_nrequests)},
+    {NAME("nfills"), CTL(stats_arenas_i_large_nfills)},
+    {NAME("nflushes"), CTL(stats_arenas_i_large_nflushes)}};
 
-#define MUTEX_PROF_DATA_NODE(prefix)					\
-static const ctl_named_node_t stats_##prefix##_node[] = {		\
-	{NAME("num_ops"),						\
-	 CTL(stats_##prefix##_num_ops)},				\
-	{NAME("num_wait"),						\
-	 CTL(stats_##prefix##_num_wait)},				\
-	{NAME("num_spin_acq"),						\
-	 CTL(stats_##prefix##_num_spin_acq)},				\
-	{NAME("num_owner_switch"),					\
-	 CTL(stats_##prefix##_num_owner_switch)},			\
-	{NAME("total_wait_time"),					\
-	 CTL(stats_##prefix##_total_wait_time)},			\
-	{NAME("max_wait_time"),						\
-	 CTL(stats_##prefix##_max_wait_time)},				\
-	{NAME("max_num_thds"),						\
-	 CTL(stats_##prefix##_max_num_thds)}				\
-	/* Note that # of current waiting thread not provided. */	\
-};
+#define MUTEX_PROF_DATA_NODE(prefix)                                                                          \
+	static const ctl_named_node_t stats_##prefix##_node[] = {                                             \
+	    {NAME("num_ops"), CTL(stats_##prefix##_num_ops)},                                                 \
+	    {NAME("num_wait"), CTL(stats_##prefix##_num_wait)},                                               \
+	    {NAME("num_spin_acq"), CTL(stats_##prefix##_num_spin_acq)},                                       \
+	    {NAME("num_owner_switch"),                                                                        \
+	        CTL(stats_##prefix##_num_owner_switch)},                                                      \
+	    {NAME("total_wait_time"), CTL(stats_##prefix##_total_wait_time)},                                 \
+	    {NAME("max_wait_time"), CTL(stats_##prefix##_max_wait_time)},                                     \
+	    {NAME("max_num_thds"),                                                                            \
+	        CTL(stats_##prefix##_max_num_thds)} /* Note that # of current waiting thread not provided. */ \
+	};
 
 MUTEX_PROF_DATA_NODE(arenas_i_bins_j_mutex)
 
 static const ctl_named_node_t stats_arenas_i_bins_j_node[] = {
-	{NAME("nmalloc"),	CTL(stats_arenas_i_bins_j_nmalloc)},
-	{NAME("ndalloc"),	CTL(stats_arenas_i_bins_j_ndalloc)},
-	{NAME("nrequests"),	CTL(stats_arenas_i_bins_j_nrequests)},
-	{NAME("curregs"),	CTL(stats_arenas_i_bins_j_curregs)},
-	{NAME("nfills"),	CTL(stats_arenas_i_bins_j_nfills)},
-	{NAME("nflushes"),	CTL(stats_arenas_i_bins_j_nflushes)},
-	{NAME("nslabs"),	CTL(stats_arenas_i_bins_j_nslabs)},
-	{NAME("nreslabs"),	CTL(stats_arenas_i_bins_j_nreslabs)},
-	{NAME("curslabs"),	CTL(stats_arenas_i_bins_j_curslabs)},
-	{NAME("nonfull_slabs"),	CTL(stats_arenas_i_bins_j_nonfull_slabs)},
-	{NAME("batch_pops"),
-		CTL(stats_arenas_i_bins_j_batch_pops)},
-	{NAME("batch_failed_pushes"),
-		CTL(stats_arenas_i_bins_j_batch_failed_pushes)},
-	{NAME("batch_pushes"),
-		CTL(stats_arenas_i_bins_j_batch_pushes)},
-	{NAME("batch_pushed_elems"),
-		CTL(stats_arenas_i_bins_j_batch_pushed_elems)},
-	{NAME("mutex"),		CHILD(named, stats_arenas_i_bins_j_mutex)}
-};
+    {NAME("nmalloc"), CTL(stats_arenas_i_bins_j_nmalloc)},
+    {NAME("ndalloc"), CTL(stats_arenas_i_bins_j_ndalloc)},
+    {NAME("nrequests"), CTL(stats_arenas_i_bins_j_nrequests)},
+    {NAME("curregs"), CTL(stats_arenas_i_bins_j_curregs)},
+    {NAME("nfills"), CTL(stats_arenas_i_bins_j_nfills)},
+    {NAME("nflushes"), CTL(stats_arenas_i_bins_j_nflushes)},
+    {NAME("nslabs"), CTL(stats_arenas_i_bins_j_nslabs)},
+    {NAME("nreslabs"), CTL(stats_arenas_i_bins_j_nreslabs)},
+    {NAME("curslabs"), CTL(stats_arenas_i_bins_j_curslabs)},
+    {NAME("nonfull_slabs"), CTL(stats_arenas_i_bins_j_nonfull_slabs)},
+    {NAME("batch_pops"), CTL(stats_arenas_i_bins_j_batch_pops)},
+    {NAME("batch_failed_pushes"),
+        CTL(stats_arenas_i_bins_j_batch_failed_pushes)},
+    {NAME("batch_pushes"), CTL(stats_arenas_i_bins_j_batch_pushes)},
+    {NAME("batch_pushed_elems"), CTL(stats_arenas_i_bins_j_batch_pushed_elems)},
+    {NAME("mutex"), CHILD(named, stats_arenas_i_bins_j_mutex)}};
 
 static const ctl_named_node_t super_stats_arenas_i_bins_j_node[] = {
-	{NAME(""),		CHILD(named, stats_arenas_i_bins_j)}
-};
+    {NAME(""), CHILD(named, stats_arenas_i_bins_j)}};
 
 static const ctl_indexed_node_t stats_arenas_i_bins_node[] = {
-	{INDEX(stats_arenas_i_bins_j)}
-};
+    {INDEX(stats_arenas_i_bins_j)}};
 
 static const ctl_named_node_t stats_arenas_i_lextents_j_node[] = {
-	{NAME("nmalloc"),	CTL(stats_arenas_i_lextents_j_nmalloc)},
-	{NAME("ndalloc"),	CTL(stats_arenas_i_lextents_j_ndalloc)},
-	{NAME("nrequests"),	CTL(stats_arenas_i_lextents_j_nrequests)},
-	{NAME("curlextents"),	CTL(stats_arenas_i_lextents_j_curlextents)}
-};
+    {NAME("nmalloc"), CTL(stats_arenas_i_lextents_j_nmalloc)},
+    {NAME("ndalloc"), CTL(stats_arenas_i_lextents_j_ndalloc)},
+    {NAME("nrequests"), CTL(stats_arenas_i_lextents_j_nrequests)},
+    {NAME("curlextents"), CTL(stats_arenas_i_lextents_j_curlextents)}};
 static const ctl_named_node_t super_stats_arenas_i_lextents_j_node[] = {
-	{NAME(""),		CHILD(named, stats_arenas_i_lextents_j)}
-};
+    {NAME(""), CHILD(named, stats_arenas_i_lextents_j)}};
 
 static const ctl_indexed_node_t stats_arenas_i_lextents_node[] = {
-	{INDEX(stats_arenas_i_lextents_j)}
-};
+    {INDEX(stats_arenas_i_lextents_j)}};
 
 static const ctl_named_node_t stats_arenas_i_extents_j_node[] = {
-	{NAME("ndirty"),	CTL(stats_arenas_i_extents_j_ndirty)},
-	{NAME("nmuzzy"),	CTL(stats_arenas_i_extents_j_nmuzzy)},
-	{NAME("nretained"),	CTL(stats_arenas_i_extents_j_nretained)},
-	{NAME("dirty_bytes"),	CTL(stats_arenas_i_extents_j_dirty_bytes)},
-	{NAME("muzzy_bytes"),	CTL(stats_arenas_i_extents_j_muzzy_bytes)},
-	{NAME("retained_bytes"), CTL(stats_arenas_i_extents_j_retained_bytes)}
-};
+    {NAME("ndirty"), CTL(stats_arenas_i_extents_j_ndirty)},
+    {NAME("nmuzzy"), CTL(stats_arenas_i_extents_j_nmuzzy)},
+    {NAME("nretained"), CTL(stats_arenas_i_extents_j_nretained)},
+    {NAME("dirty_bytes"), CTL(stats_arenas_i_extents_j_dirty_bytes)},
+    {NAME("muzzy_bytes"), CTL(stats_arenas_i_extents_j_muzzy_bytes)},
+    {NAME("retained_bytes"), CTL(stats_arenas_i_extents_j_retained_bytes)}};
 
 static const ctl_named_node_t super_stats_arenas_i_extents_j_node[] = {
-	{NAME(""),		CHILD(named, stats_arenas_i_extents_j)}
-};
+    {NAME(""), CHILD(named, stats_arenas_i_extents_j)}};
 
 static const ctl_indexed_node_t stats_arenas_i_extents_node[] = {
-	{INDEX(stats_arenas_i_extents_j)}
-};
+    {INDEX(stats_arenas_i_extents_j)}};
 
-#define OP(mtx)  MUTEX_PROF_DATA_NODE(arenas_i_mutexes_##mtx)
+#define OP(mtx) MUTEX_PROF_DATA_NODE(arenas_i_mutexes_##mtx)
 MUTEX_PROF_ARENA_MUTEXES
 #undef OP
 
 static const ctl_named_node_t stats_arenas_i_mutexes_node[] = {
 #define OP(mtx) {NAME(#mtx), CHILD(named, stats_arenas_i_mutexes_##mtx)},
-MUTEX_PROF_ARENA_MUTEXES
+    MUTEX_PROF_ARENA_MUTEXES
 #undef OP
 };
 
 static const ctl_named_node_t stats_arenas_i_hpa_shard_slabs_node[] = {
-	{NAME("npageslabs_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_slabs_npageslabs_nonhuge)},
-	{NAME("npageslabs_huge"),
-		CTL(stats_arenas_i_hpa_shard_slabs_npageslabs_huge)},
-	{NAME("nactive_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_slabs_nactive_nonhuge)},
-	{NAME("nactive_huge"),
-		CTL(stats_arenas_i_hpa_shard_slabs_nactive_huge)},
-	{NAME("ndirty_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_slabs_ndirty_nonhuge)},
-	{NAME("ndirty_huge"),
-		CTL(stats_arenas_i_hpa_shard_slabs_ndirty_huge)}
-};
+    {NAME("npageslabs_nonhuge"),
+        CTL(stats_arenas_i_hpa_shard_slabs_npageslabs_nonhuge)},
+    {NAME("npageslabs_huge"),
+        CTL(stats_arenas_i_hpa_shard_slabs_npageslabs_huge)},
+    {NAME("nactive_nonhuge"),
+        CTL(stats_arenas_i_hpa_shard_slabs_nactive_nonhuge)},
+    {NAME("nactive_huge"), CTL(stats_arenas_i_hpa_shard_slabs_nactive_huge)},
+    {NAME("ndirty_nonhuge"),
+        CTL(stats_arenas_i_hpa_shard_slabs_ndirty_nonhuge)},
+    {NAME("ndirty_huge"), CTL(stats_arenas_i_hpa_shard_slabs_ndirty_huge)}};
 
 static const ctl_named_node_t stats_arenas_i_hpa_shard_full_slabs_node[] = {
-	{NAME("npageslabs_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge)},
-	{NAME("npageslabs_huge"),
-		CTL(stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge)},
-	{NAME("nactive_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge)},
-	{NAME("nactive_huge"),
-		CTL(stats_arenas_i_hpa_shard_full_slabs_nactive_huge)},
-	{NAME("ndirty_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_full_slabs_ndirty_nonhuge)},
-	{NAME("ndirty_huge"),
-		CTL(stats_arenas_i_hpa_shard_full_slabs_ndirty_huge)}
-};
+    {NAME("npageslabs_nonhuge"),
+        CTL(stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge)},
+    {NAME("npageslabs_huge"),
+        CTL(stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge)},
+    {NAME("nactive_nonhuge"),
+        CTL(stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge)},
+    {NAME("nactive_huge"),
+        CTL(stats_arenas_i_hpa_shard_full_slabs_nactive_huge)},
+    {NAME("ndirty_nonhuge"),
+        CTL(stats_arenas_i_hpa_shard_full_slabs_ndirty_nonhuge)},
+    {NAME("ndirty_huge"),
+        CTL(stats_arenas_i_hpa_shard_full_slabs_ndirty_huge)}};
 
 static const ctl_named_node_t stats_arenas_i_hpa_shard_empty_slabs_node[] = {
-	{NAME("npageslabs_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_empty_slabs_npageslabs_nonhuge)},
-	{NAME("npageslabs_huge"),
-		CTL(stats_arenas_i_hpa_shard_empty_slabs_npageslabs_huge)},
-	{NAME("nactive_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_empty_slabs_nactive_nonhuge)},
-	{NAME("nactive_huge"),
-		CTL(stats_arenas_i_hpa_shard_empty_slabs_nactive_huge)},
-	{NAME("ndirty_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_empty_slabs_ndirty_nonhuge)},
-	{NAME("ndirty_huge"),
-		CTL(stats_arenas_i_hpa_shard_empty_slabs_ndirty_huge)}
-};
+    {NAME("npageslabs_nonhuge"),
+        CTL(stats_arenas_i_hpa_shard_empty_slabs_npageslabs_nonhuge)},
+    {NAME("npageslabs_huge"),
+        CTL(stats_arenas_i_hpa_shard_empty_slabs_npageslabs_huge)},
+    {NAME("nactive_nonhuge"),
+        CTL(stats_arenas_i_hpa_shard_empty_slabs_nactive_nonhuge)},
+    {NAME("nactive_huge"),
+        CTL(stats_arenas_i_hpa_shard_empty_slabs_nactive_huge)},
+    {NAME("ndirty_nonhuge"),
+        CTL(stats_arenas_i_hpa_shard_empty_slabs_ndirty_nonhuge)},
+    {NAME("ndirty_huge"),
+        CTL(stats_arenas_i_hpa_shard_empty_slabs_ndirty_huge)}};
 
-static const ctl_named_node_t stats_arenas_i_hpa_shard_nonfull_slabs_j_node[] = {
-	{NAME("npageslabs_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge)},
-	{NAME("npageslabs_huge"),
-		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge)},
-	{NAME("nactive_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge)},
-	{NAME("nactive_huge"),
-		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge)},
-	{NAME("ndirty_nonhuge"),
-		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_nonhuge)},
-	{NAME("ndirty_huge"),
-		CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_huge)}
-};
+static const ctl_named_node_t stats_arenas_i_hpa_shard_nonfull_slabs_j_node[] =
+    {{NAME("npageslabs_nonhuge"),
+         CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge)},
+        {NAME("npageslabs_huge"),
+            CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge)},
+        {NAME("nactive_nonhuge"),
+            CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge)},
+        {NAME("nactive_huge"),
+            CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge)},
+        {NAME("ndirty_nonhuge"),
+            CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_nonhuge)},
+        {NAME("ndirty_huge"),
+            CTL(stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_huge)}};
 
-static const ctl_named_node_t super_stats_arenas_i_hpa_shard_nonfull_slabs_j_node[] = {
-	{NAME(""),
-		CHILD(named, stats_arenas_i_hpa_shard_nonfull_slabs_j)}
-};
+static const ctl_named_node_t
+    super_stats_arenas_i_hpa_shard_nonfull_slabs_j_node[] = {
+        {NAME(""), CHILD(named, stats_arenas_i_hpa_shard_nonfull_slabs_j)}};
 
 static const ctl_indexed_node_t stats_arenas_i_hpa_shard_nonfull_slabs_node[] =
-{
-	{INDEX(stats_arenas_i_hpa_shard_nonfull_slabs_j)}
-};
+    {{INDEX(stats_arenas_i_hpa_shard_nonfull_slabs_j)}};
 
 static const ctl_named_node_t stats_arenas_i_hpa_shard_node[] = {
-	{NAME("npageslabs"),	CTL(stats_arenas_i_hpa_shard_npageslabs)},
-	{NAME("nactive"),	CTL(stats_arenas_i_hpa_shard_nactive)},
-	{NAME("ndirty"),	CTL(stats_arenas_i_hpa_shard_ndirty)},
+    {NAME("npageslabs"), CTL(stats_arenas_i_hpa_shard_npageslabs)},
+    {NAME("nactive"), CTL(stats_arenas_i_hpa_shard_nactive)},
+    {NAME("ndirty"), CTL(stats_arenas_i_hpa_shard_ndirty)},
 
-	{NAME("slabs"),	CHILD(named, stats_arenas_i_hpa_shard_slabs)},
+    {NAME("slabs"), CHILD(named, stats_arenas_i_hpa_shard_slabs)},
 
-	{NAME("npurge_passes"),	CTL(stats_arenas_i_hpa_shard_npurge_passes)},
-	{NAME("npurges"),	CTL(stats_arenas_i_hpa_shard_npurges)},
-	{NAME("nhugifies"),	CTL(stats_arenas_i_hpa_shard_nhugifies)},
-	{NAME("nhugify_failures"),
-	    CTL(stats_arenas_i_hpa_shard_nhugify_failures)},
-	{NAME("ndehugifies"),	CTL(stats_arenas_i_hpa_shard_ndehugifies)},
+    {NAME("npurge_passes"), CTL(stats_arenas_i_hpa_shard_npurge_passes)},
+    {NAME("npurges"), CTL(stats_arenas_i_hpa_shard_npurges)},
+    {NAME("nhugifies"), CTL(stats_arenas_i_hpa_shard_nhugifies)},
+    {NAME("nhugify_failures"), CTL(stats_arenas_i_hpa_shard_nhugify_failures)},
+    {NAME("ndehugifies"), CTL(stats_arenas_i_hpa_shard_ndehugifies)},
 
-	{NAME("full_slabs"),	CHILD(named,
-	    stats_arenas_i_hpa_shard_full_slabs)},
-	{NAME("empty_slabs"),	CHILD(named,
-	    stats_arenas_i_hpa_shard_empty_slabs)},
-	{NAME("nonfull_slabs"),	CHILD(indexed,
-	    stats_arenas_i_hpa_shard_nonfull_slabs)}
-};
+    {NAME("full_slabs"), CHILD(named, stats_arenas_i_hpa_shard_full_slabs)},
+    {NAME("empty_slabs"), CHILD(named, stats_arenas_i_hpa_shard_empty_slabs)},
+    {NAME("nonfull_slabs"),
+        CHILD(indexed, stats_arenas_i_hpa_shard_nonfull_slabs)}};
 
 static const ctl_named_node_t stats_arenas_i_node[] = {
-	{NAME("nthreads"),	CTL(stats_arenas_i_nthreads)},
-	{NAME("uptime"),	CTL(stats_arenas_i_uptime)},
-	{NAME("dss"),		CTL(stats_arenas_i_dss)},
-	{NAME("dirty_decay_ms"), CTL(stats_arenas_i_dirty_decay_ms)},
-	{NAME("muzzy_decay_ms"), CTL(stats_arenas_i_muzzy_decay_ms)},
-	{NAME("pactive"),	CTL(stats_arenas_i_pactive)},
-	{NAME("pdirty"),	CTL(stats_arenas_i_pdirty)},
-	{NAME("pmuzzy"),	CTL(stats_arenas_i_pmuzzy)},
-	{NAME("mapped"),	CTL(stats_arenas_i_mapped)},
-	{NAME("retained"),	CTL(stats_arenas_i_retained)},
-	{NAME("extent_avail"),	CTL(stats_arenas_i_extent_avail)},
-	{NAME("dirty_npurge"),	CTL(stats_arenas_i_dirty_npurge)},
-	{NAME("dirty_nmadvise"), CTL(stats_arenas_i_dirty_nmadvise)},
-	{NAME("dirty_purged"),	CTL(stats_arenas_i_dirty_purged)},
-	{NAME("muzzy_npurge"),	CTL(stats_arenas_i_muzzy_npurge)},
-	{NAME("muzzy_nmadvise"), CTL(stats_arenas_i_muzzy_nmadvise)},
-	{NAME("muzzy_purged"),	CTL(stats_arenas_i_muzzy_purged)},
-	{NAME("base"),		CTL(stats_arenas_i_base)},
-	{NAME("internal"),	CTL(stats_arenas_i_internal)},
-	{NAME("metadata_edata"),	CTL(stats_arenas_i_metadata_edata)},
-	{NAME("metadata_rtree"),	CTL(stats_arenas_i_metadata_rtree)},
-	{NAME("metadata_thp"),	CTL(stats_arenas_i_metadata_thp)},
-	{NAME("tcache_bytes"),	CTL(stats_arenas_i_tcache_bytes)},
-	{NAME("tcache_stashed_bytes"),
-	    CTL(stats_arenas_i_tcache_stashed_bytes)},
-	{NAME("resident"),	CTL(stats_arenas_i_resident)},
-	{NAME("abandoned_vm"),	CTL(stats_arenas_i_abandoned_vm)},
-	{NAME("hpa_sec_bytes"),	CTL(stats_arenas_i_hpa_sec_bytes)},
-	{NAME("small"),		CHILD(named, stats_arenas_i_small)},
-	{NAME("large"),		CHILD(named, stats_arenas_i_large)},
-	{NAME("bins"),		CHILD(indexed, stats_arenas_i_bins)},
-	{NAME("lextents"),	CHILD(indexed, stats_arenas_i_lextents)},
-	{NAME("extents"),	CHILD(indexed, stats_arenas_i_extents)},
-	{NAME("mutexes"),	CHILD(named, stats_arenas_i_mutexes)},
-	{NAME("hpa_shard"),	CHILD(named, stats_arenas_i_hpa_shard)}
-};
+    {NAME("nthreads"), CTL(stats_arenas_i_nthreads)},
+    {NAME("uptime"), CTL(stats_arenas_i_uptime)},
+    {NAME("dss"), CTL(stats_arenas_i_dss)},
+    {NAME("dirty_decay_ms"), CTL(stats_arenas_i_dirty_decay_ms)},
+    {NAME("muzzy_decay_ms"), CTL(stats_arenas_i_muzzy_decay_ms)},
+    {NAME("pactive"), CTL(stats_arenas_i_pactive)},
+    {NAME("pdirty"), CTL(stats_arenas_i_pdirty)},
+    {NAME("pmuzzy"), CTL(stats_arenas_i_pmuzzy)},
+    {NAME("mapped"), CTL(stats_arenas_i_mapped)},
+    {NAME("retained"), CTL(stats_arenas_i_retained)},
+    {NAME("extent_avail"), CTL(stats_arenas_i_extent_avail)},
+    {NAME("dirty_npurge"), CTL(stats_arenas_i_dirty_npurge)},
+    {NAME("dirty_nmadvise"), CTL(stats_arenas_i_dirty_nmadvise)},
+    {NAME("dirty_purged"), CTL(stats_arenas_i_dirty_purged)},
+    {NAME("muzzy_npurge"), CTL(stats_arenas_i_muzzy_npurge)},
+    {NAME("muzzy_nmadvise"), CTL(stats_arenas_i_muzzy_nmadvise)},
+    {NAME("muzzy_purged"), CTL(stats_arenas_i_muzzy_purged)},
+    {NAME("base"), CTL(stats_arenas_i_base)},
+    {NAME("internal"), CTL(stats_arenas_i_internal)},
+    {NAME("metadata_edata"), CTL(stats_arenas_i_metadata_edata)},
+    {NAME("metadata_rtree"), CTL(stats_arenas_i_metadata_rtree)},
+    {NAME("metadata_thp"), CTL(stats_arenas_i_metadata_thp)},
+    {NAME("tcache_bytes"), CTL(stats_arenas_i_tcache_bytes)},
+    {NAME("tcache_stashed_bytes"), CTL(stats_arenas_i_tcache_stashed_bytes)},
+    {NAME("resident"), CTL(stats_arenas_i_resident)},
+    {NAME("abandoned_vm"), CTL(stats_arenas_i_abandoned_vm)},
+    {NAME("hpa_sec_bytes"), CTL(stats_arenas_i_hpa_sec_bytes)},
+    {NAME("small"), CHILD(named, stats_arenas_i_small)},
+    {NAME("large"), CHILD(named, stats_arenas_i_large)},
+    {NAME("bins"), CHILD(indexed, stats_arenas_i_bins)},
+    {NAME("lextents"), CHILD(indexed, stats_arenas_i_lextents)},
+    {NAME("extents"), CHILD(indexed, stats_arenas_i_extents)},
+    {NAME("mutexes"), CHILD(named, stats_arenas_i_mutexes)},
+    {NAME("hpa_shard"), CHILD(named, stats_arenas_i_hpa_shard)}};
 static const ctl_named_node_t super_stats_arenas_i_node[] = {
-	{NAME(""),		CHILD(named, stats_arenas_i)}
-};
+    {NAME(""), CHILD(named, stats_arenas_i)}};
 
-static const ctl_indexed_node_t stats_arenas_node[] = {
-	{INDEX(stats_arenas_i)}
-};
+static const ctl_indexed_node_t stats_arenas_node[] = {{INDEX(stats_arenas_i)}};
 
 static const ctl_named_node_t stats_background_thread_node[] = {
-	{NAME("num_threads"),	CTL(stats_background_thread_num_threads)},
-	{NAME("num_runs"),	CTL(stats_background_thread_num_runs)},
-	{NAME("run_interval"),	CTL(stats_background_thread_run_interval)}
-};
+    {NAME("num_threads"), CTL(stats_background_thread_num_threads)},
+    {NAME("num_runs"), CTL(stats_background_thread_num_runs)},
+    {NAME("run_interval"), CTL(stats_background_thread_run_interval)}};
 
 #define OP(mtx) MUTEX_PROF_DATA_NODE(mutexes_##mtx)
 MUTEX_PROF_GLOBAL_MUTEXES
@@ -945,95 +851,81 @@ MUTEX_PROF_GLOBAL_MUTEXES
 
 static const ctl_named_node_t stats_mutexes_node[] = {
 #define OP(mtx) {NAME(#mtx), CHILD(named, stats_mutexes_##mtx)},
-MUTEX_PROF_GLOBAL_MUTEXES
+    MUTEX_PROF_GLOBAL_MUTEXES
 #undef OP
-	{NAME("reset"),		CTL(stats_mutexes_reset)}
-};
+    {NAME("reset"), CTL(stats_mutexes_reset)}};
 #undef MUTEX_PROF_DATA_NODE
 
 static const ctl_named_node_t stats_node[] = {
-	{NAME("allocated"),	CTL(stats_allocated)},
-	{NAME("active"),	CTL(stats_active)},
-	{NAME("metadata"),	CTL(stats_metadata)},
-	{NAME("metadata_edata"),	CTL(stats_metadata_edata)},
-	{NAME("metadata_rtree"),	CTL(stats_metadata_rtree)},
-	{NAME("metadata_thp"),	CTL(stats_metadata_thp)},
-	{NAME("resident"),	CTL(stats_resident)},
-	{NAME("mapped"),	CTL(stats_mapped)},
-	{NAME("retained"),	CTL(stats_retained)},
-	{NAME("background_thread"),
-	 CHILD(named, stats_background_thread)},
-	{NAME("mutexes"),	CHILD(named, stats_mutexes)},
-	{NAME("arenas"),	CHILD(indexed, stats_arenas)},
-	{NAME("zero_reallocs"),	CTL(stats_zero_reallocs)},
+    {NAME("allocated"), CTL(stats_allocated)},
+    {NAME("active"), CTL(stats_active)},
+    {NAME("metadata"), CTL(stats_metadata)},
+    {NAME("metadata_edata"), CTL(stats_metadata_edata)},
+    {NAME("metadata_rtree"), CTL(stats_metadata_rtree)},
+    {NAME("metadata_thp"), CTL(stats_metadata_thp)},
+    {NAME("resident"), CTL(stats_resident)},
+    {NAME("mapped"), CTL(stats_mapped)},
+    {NAME("retained"), CTL(stats_retained)},
+    {NAME("background_thread"), CHILD(named, stats_background_thread)},
+    {NAME("mutexes"), CHILD(named, stats_mutexes)},
+    {NAME("arenas"), CHILD(indexed, stats_arenas)},
+    {NAME("zero_reallocs"), CTL(stats_zero_reallocs)},
 };
 
 static const ctl_named_node_t experimental_hooks_node[] = {
-	{NAME("install"),	CTL(experimental_hooks_install)},
-	{NAME("remove"),	CTL(experimental_hooks_remove)},
-	{NAME("prof_backtrace"),	CTL(experimental_hooks_prof_backtrace)},
-	{NAME("prof_dump"),	CTL(experimental_hooks_prof_dump)},
-	{NAME("prof_sample"),	CTL(experimental_hooks_prof_sample)},
-	{NAME("prof_sample_free"),	CTL(experimental_hooks_prof_sample_free)},
-	{NAME("prof_threshold"),	CTL(experimental_hooks_prof_threshold)},
-	{NAME("safety_check_abort"),	CTL(experimental_hooks_safety_check_abort)},
-	{NAME("thread_event"),	CTL(experimental_hooks_thread_event)},
+    {NAME("install"), CTL(experimental_hooks_install)},
+    {NAME("remove"), CTL(experimental_hooks_remove)},
+    {NAME("prof_backtrace"), CTL(experimental_hooks_prof_backtrace)},
+    {NAME("prof_dump"), CTL(experimental_hooks_prof_dump)},
+    {NAME("prof_sample"), CTL(experimental_hooks_prof_sample)},
+    {NAME("prof_sample_free"), CTL(experimental_hooks_prof_sample_free)},
+    {NAME("prof_threshold"), CTL(experimental_hooks_prof_threshold)},
+    {NAME("safety_check_abort"), CTL(experimental_hooks_safety_check_abort)},
+    {NAME("thread_event"), CTL(experimental_hooks_thread_event)},
 };
 
 static const ctl_named_node_t experimental_thread_node[] = {
-	{NAME("activity_callback"),
-		CTL(experimental_thread_activity_callback)}
-};
+    {NAME("activity_callback"), CTL(experimental_thread_activity_callback)}};
 
 static const ctl_named_node_t experimental_utilization_node[] = {
-	{NAME("query"),		CTL(experimental_utilization_query)},
-	{NAME("batch_query"),	CTL(experimental_utilization_batch_query)}
-};
+    {NAME("query"), CTL(experimental_utilization_query)},
+    {NAME("batch_query"), CTL(experimental_utilization_batch_query)}};
 
 static const ctl_named_node_t experimental_arenas_i_node[] = {
-	{NAME("pactivep"),	CTL(experimental_arenas_i_pactivep)}
-};
+    {NAME("pactivep"), CTL(experimental_arenas_i_pactivep)}};
 static const ctl_named_node_t super_experimental_arenas_i_node[] = {
-	{NAME(""),		CHILD(named, experimental_arenas_i)}
-};
+    {NAME(""), CHILD(named, experimental_arenas_i)}};
 
 static const ctl_indexed_node_t experimental_arenas_node[] = {
-	{INDEX(experimental_arenas_i)}
-};
+    {INDEX(experimental_arenas_i)}};
 
 static const ctl_named_node_t experimental_prof_recent_node[] = {
-	{NAME("alloc_max"),	CTL(experimental_prof_recent_alloc_max)},
-	{NAME("alloc_dump"),	CTL(experimental_prof_recent_alloc_dump)},
+    {NAME("alloc_max"), CTL(experimental_prof_recent_alloc_max)},
+    {NAME("alloc_dump"), CTL(experimental_prof_recent_alloc_dump)},
 };
 
 static const ctl_named_node_t experimental_node[] = {
-	{NAME("hooks"),		CHILD(named, experimental_hooks)},
-	{NAME("utilization"),	CHILD(named, experimental_utilization)},
-	{NAME("arenas"),	CHILD(indexed, experimental_arenas)},
-	{NAME("arenas_create_ext"),	CTL(experimental_arenas_create_ext)},
-	{NAME("prof_recent"),	CHILD(named, experimental_prof_recent)},
-	{NAME("batch_alloc"),	CTL(experimental_batch_alloc)},
-	{NAME("thread"),	CHILD(named, experimental_thread)}
-};
+    {NAME("hooks"), CHILD(named, experimental_hooks)},
+    {NAME("utilization"), CHILD(named, experimental_utilization)},
+    {NAME("arenas"), CHILD(indexed, experimental_arenas)},
+    {NAME("arenas_create_ext"), CTL(experimental_arenas_create_ext)},
+    {NAME("prof_recent"), CHILD(named, experimental_prof_recent)},
+    {NAME("batch_alloc"), CTL(experimental_batch_alloc)},
+    {NAME("thread"), CHILD(named, experimental_thread)}};
 
-static const ctl_named_node_t	root_node[] = {
-	{NAME("version"),	CTL(version)},
-	{NAME("epoch"),		CTL(epoch)},
-	{NAME("background_thread"),	CTL(background_thread)},
-	{NAME("max_background_threads"),	CTL(max_background_threads)},
-	{NAME("thread"),	CHILD(named, thread)},
-	{NAME("config"),	CHILD(named, config)},
-	{NAME("opt"),		CHILD(named, opt)},
-	{NAME("tcache"),	CHILD(named, tcache)},
-	{NAME("arena"),		CHILD(indexed, arena)},
-	{NAME("arenas"),	CHILD(named, arenas)},
-	{NAME("prof"),		CHILD(named, prof)},
-	{NAME("stats"),		CHILD(named, stats)},
-	{NAME("experimental"),	CHILD(named, experimental)}
-};
+static const ctl_named_node_t root_node[] = {{NAME("version"), CTL(version)},
+    {NAME("epoch"), CTL(epoch)},
+    {NAME("background_thread"), CTL(background_thread)},
+    {NAME("max_background_threads"), CTL(max_background_threads)},
+    {NAME("thread"), CHILD(named, thread)},
+    {NAME("config"), CHILD(named, config)}, {NAME("opt"), CHILD(named, opt)},
+    {NAME("tcache"), CHILD(named, tcache)},
+    {NAME("arena"), CHILD(indexed, arena)},
+    {NAME("arenas"), CHILD(named, arenas)}, {NAME("prof"), CHILD(named, prof)},
+    {NAME("stats"), CHILD(named, stats)},
+    {NAME("experimental"), CHILD(named, experimental)}};
 static const ctl_named_node_t super_root_node[] = {
-	{NAME(""),		CHILD(named, root)}
-};
+    {NAME(""), CHILD(named, root)}};
 
 #undef NAME
 #undef CHILD
@@ -1048,8 +940,7 @@ static const ctl_named_node_t super_root_node[] = {
  */
 static void
 ctl_accum_locked_u64(locked_u64_t *dst, locked_u64_t *src) {
-	locked_inc_u64_unsynchronized(dst,
-	    locked_read_u64_unsynchronized(src));
+	locked_inc_u64_unsynchronized(dst, locked_read_u64_unsynchronized(src));
 }
 
 static void
@@ -1089,8 +980,8 @@ arenas_i2a_impl(size_t i, bool compat, bool validate) {
 			 * more than one past the range of indices that have
 			 * initialized ctl data.
 			 */
-			assert(i < ctl_arenas->narenas || (!validate && i ==
-			    ctl_arenas->narenas));
+			assert(i < ctl_arenas->narenas
+			    || (!validate && i == ctl_arenas->narenas));
 			a = (unsigned)i + 2;
 		}
 		break;
@@ -1114,12 +1005,12 @@ arenas_i_impl(tsd_t *tsd, size_t i, bool compat, bool init) {
 	if (init && ret == NULL) {
 		if (config_stats) {
 			struct container_s {
-				ctl_arena_t		ctl_arena;
-				ctl_arena_stats_t	astats;
+				ctl_arena_t       ctl_arena;
+				ctl_arena_stats_t astats;
 			};
-			struct container_s *cont =
-			    (struct container_s *)base_alloc(tsd_tsdn(tsd),
-			    b0get(), sizeof(struct container_s), QUANTUM);
+			struct container_s *cont = (struct container_s *)
+			    base_alloc(tsd_tsdn(tsd), b0get(),
+			        sizeof(struct container_s), QUANTUM);
 			if (cont == NULL) {
 				return NULL;
 			}
@@ -1177,8 +1068,8 @@ ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena) {
 		for (i = 0; i < SC_NBINS; i++) {
 			bin_stats_t *bstats =
 			    &ctl_arena->astats->bstats[i].stats_data;
-			ctl_arena->astats->allocated_small += bstats->curregs *
-			    sz_index2size(i);
+			ctl_arena->astats->allocated_small += bstats->curregs
+			    * sz_index2size(i);
 			ctl_arena->astats->nmalloc_small += bstats->nmalloc;
 			ctl_arena->astats->ndalloc_small += bstats->ndalloc;
 			ctl_arena->astats->nrequests_small += bstats->nrequests;
@@ -1194,8 +1085,8 @@ ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena) {
 }
 
 static void
-ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
-    bool destroyed) {
+ctl_arena_stats_sdmerge(
+    ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena, bool destroyed) {
 	unsigned i;
 
 	if (!destroyed) {
@@ -1216,52 +1107,59 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena,
 
 		if (!destroyed) {
 			sdstats->astats.mapped += astats->astats.mapped;
-			sdstats->astats.pa_shard_stats.pac_stats.retained
-			    += astats->astats.pa_shard_stats.pac_stats.retained;
-			sdstats->astats.pa_shard_stats.edata_avail
-			    += astats->astats.pa_shard_stats.edata_avail;
+			sdstats->astats.pa_shard_stats.pac_stats.retained +=
+			    astats->astats.pa_shard_stats.pac_stats.retained;
+			sdstats->astats.pa_shard_stats.edata_avail +=
+			    astats->astats.pa_shard_stats.edata_avail;
 		}
 
-		ctl_accum_locked_u64(
-		    &sdstats->astats.pa_shard_stats.pac_stats.decay_dirty.npurge,
-		    &astats->astats.pa_shard_stats.pac_stats.decay_dirty.npurge);
-		ctl_accum_locked_u64(
-		    &sdstats->astats.pa_shard_stats.pac_stats.decay_dirty.nmadvise,
-		    &astats->astats.pa_shard_stats.pac_stats.decay_dirty.nmadvise);
-		ctl_accum_locked_u64(
-		    &sdstats->astats.pa_shard_stats.pac_stats.decay_dirty.purged,
-		    &astats->astats.pa_shard_stats.pac_stats.decay_dirty.purged);
+		ctl_accum_locked_u64(&sdstats->astats.pa_shard_stats.pac_stats
+		                          .decay_dirty.npurge,
+		    &astats->astats.pa_shard_stats.pac_stats.decay_dirty
+		         .npurge);
+		ctl_accum_locked_u64(&sdstats->astats.pa_shard_stats.pac_stats
+		                          .decay_dirty.nmadvise,
+		    &astats->astats.pa_shard_stats.pac_stats.decay_dirty
+		         .nmadvise);
+		ctl_accum_locked_u64(&sdstats->astats.pa_shard_stats.pac_stats
+		                          .decay_dirty.purged,
+		    &astats->astats.pa_shard_stats.pac_stats.decay_dirty
+		         .purged);
 
-		ctl_accum_locked_u64(
-		    &sdstats->astats.pa_shard_stats.pac_stats.decay_muzzy.npurge,
-		    &astats->astats.pa_shard_stats.pac_stats.decay_muzzy.npurge);
-		ctl_accum_locked_u64(
-		    &sdstats->astats.pa_shard_stats.pac_stats.decay_muzzy.nmadvise,
-		    &astats->astats.pa_shard_stats.pac_stats.decay_muzzy.nmadvise);
-		ctl_accum_locked_u64(
-		    &sdstats->astats.pa_shard_stats.pac_stats.decay_muzzy.purged,
-		    &astats->astats.pa_shard_stats.pac_stats.decay_muzzy.purged);
+		ctl_accum_locked_u64(&sdstats->astats.pa_shard_stats.pac_stats
+		                          .decay_muzzy.npurge,
+		    &astats->astats.pa_shard_stats.pac_stats.decay_muzzy
+		         .npurge);
+		ctl_accum_locked_u64(&sdstats->astats.pa_shard_stats.pac_stats
+		                          .decay_muzzy.nmadvise,
+		    &astats->astats.pa_shard_stats.pac_stats.decay_muzzy
+		         .nmadvise);
+		ctl_accum_locked_u64(&sdstats->astats.pa_shard_stats.pac_stats
+		                          .decay_muzzy.purged,
+		    &astats->astats.pa_shard_stats.pac_stats.decay_muzzy
+		         .purged);
 
-#define OP(mtx) malloc_mutex_prof_merge(				\
-		    &(sdstats->astats.mutex_prof_data[			\
-		        arena_prof_mutex_##mtx]),			\
-		    &(astats->astats.mutex_prof_data[			\
-		        arena_prof_mutex_##mtx]));
-MUTEX_PROF_ARENA_MUTEXES
+#define OP(mtx)                                                                \
+	malloc_mutex_prof_merge(                                               \
+	    &(sdstats->astats.mutex_prof_data[arena_prof_mutex_##mtx]),        \
+	    &(astats->astats.mutex_prof_data[arena_prof_mutex_##mtx]));
+		MUTEX_PROF_ARENA_MUTEXES
 #undef OP
 		if (!destroyed) {
 			sdstats->astats.base += astats->astats.base;
-			sdstats->astats.metadata_edata += astats->astats
-			    .metadata_edata;
-			sdstats->astats.metadata_rtree += astats->astats
-			    .metadata_rtree;
+			sdstats->astats.metadata_edata +=
+			    astats->astats.metadata_edata;
+			sdstats->astats.metadata_rtree +=
+			    astats->astats.metadata_rtree;
 			sdstats->astats.resident += astats->astats.resident;
-			sdstats->astats.metadata_thp += astats->astats.metadata_thp;
+			sdstats->astats.metadata_thp +=
+			    astats->astats.metadata_thp;
 			ctl_accum_atomic_zu(&sdstats->astats.internal,
 			    &astats->astats.internal);
 		} else {
 			assert(atomic_load_zu(
-			    &astats->astats.internal, ATOMIC_RELAXED) == 0);
+			           &astats->astats.internal, ATOMIC_RELAXED)
+			    == 0);
 		}
 
 		if (!destroyed) {
@@ -1283,8 +1181,8 @@ MUTEX_PROF_ARENA_MUTEXES
 		}
 		sdstats->astats.nmalloc_large += astats->astats.nmalloc_large;
 		sdstats->astats.ndalloc_large += astats->astats.ndalloc_large;
-		sdstats->astats.nrequests_large
-		    += astats->astats.nrequests_large;
+		sdstats->astats.nrequests_large +=
+		    astats->astats.nrequests_large;
 		sdstats->astats.nflushes_large += astats->astats.nflushes_large;
 		ctl_accum_atomic_zu(
 		    &sdstats->astats.pa_shard_stats.pac_stats.abandoned_vm,
@@ -1322,14 +1220,12 @@ MUTEX_PROF_ARENA_MUTEXES
 				assert(bstats->nonfull_slabs == 0);
 			}
 
-			merged->batch_pops
-			    += bstats->batch_pops;
-			merged->batch_failed_pushes
-			    += bstats->batch_failed_pushes;
-			merged->batch_pushes
-			    += bstats->batch_pushes;
-			merged->batch_pushed_elems
-			    += bstats->batch_pushed_elems;
+			merged->batch_pops += bstats->batch_pops;
+			merged->batch_failed_pushes +=
+			    bstats->batch_failed_pushes;
+			merged->batch_pushes += bstats->batch_pushes;
+			merged->batch_pushed_elems +=
+			    bstats->batch_pushed_elems;
 
 			malloc_mutex_prof_merge(&sdstats->bstats[i].mutex_data,
 			    &astats->bstats[i].mutex_data);
@@ -1355,14 +1251,14 @@ MUTEX_PROF_ARENA_MUTEXES
 		for (i = 0; i < SC_NPSIZES; i++) {
 			sdstats->estats[i].ndirty += astats->estats[i].ndirty;
 			sdstats->estats[i].nmuzzy += astats->estats[i].nmuzzy;
-			sdstats->estats[i].nretained
-			    += astats->estats[i].nretained;
-			sdstats->estats[i].dirty_bytes
-			    += astats->estats[i].dirty_bytes;
-			sdstats->estats[i].muzzy_bytes
-			    += astats->estats[i].muzzy_bytes;
-			sdstats->estats[i].retained_bytes
-			    += astats->estats[i].retained_bytes;
+			sdstats->estats[i].nretained +=
+			    astats->estats[i].nretained;
+			sdstats->estats[i].dirty_bytes +=
+			    astats->estats[i].dirty_bytes;
+			sdstats->estats[i].muzzy_bytes +=
+			    astats->estats[i].muzzy_bytes;
+			sdstats->estats[i].retained_bytes +=
+			    astats->estats[i].retained_bytes;
 		}
 
 		/* Merge HPA stats. */
@@ -1384,11 +1280,11 @@ ctl_arena_refresh(tsdn_t *tsdn, arena_t *arena, ctl_arena_t *ctl_sdarena,
 
 static unsigned
 ctl_arena_init(tsd_t *tsd, const arena_config_t *config) {
-	unsigned arena_ind;
+	unsigned     arena_ind;
 	ctl_arena_t *ctl_arena;
 
-	if ((ctl_arena = ql_last(&ctl_arenas->destroyed, destroyed_link)) !=
-	    NULL) {
+	if ((ctl_arena = ql_last(&ctl_arenas->destroyed, destroyed_link))
+	    != NULL) {
 		ql_remove(&ctl_arenas->destroyed, ctl_arena, destroyed_link);
 		arena_ind = ctl_arena->arena_ind;
 	} else {
@@ -1415,8 +1311,8 @@ ctl_arena_init(tsd_t *tsd, const arena_config_t *config) {
 static void
 ctl_background_thread_stats_read(tsdn_t *tsdn) {
 	background_thread_stats_t *stats = &ctl_stats->background_thread;
-	if (!have_background_thread ||
-	    background_thread_stats_read(tsdn, stats)) {
+	if (!have_background_thread
+	    || background_thread_stats_read(tsdn, stats)) {
 		memset(stats, 0, sizeof(background_thread_stats_t));
 		nstime_init_zero(&stats->run_interval);
 	}
@@ -1452,39 +1348,39 @@ ctl_refresh(tsdn_t *tsdn) {
 
 	for (unsigned i = 0; i < narenas; i++) {
 		ctl_arena_t *ctl_arena = arenas_i(i);
-		bool initialized = (tarenas[i] != NULL);
+		bool         initialized = (tarenas[i] != NULL);
 
 		ctl_arena->initialized = initialized;
 		if (initialized) {
-			ctl_arena_refresh(tsdn, tarenas[i], ctl_sarena, i,
-			    false);
+			ctl_arena_refresh(
+			    tsdn, tarenas[i], ctl_sarena, i, false);
 		}
 	}
 
 	if (config_stats) {
-		ctl_stats->allocated = ctl_sarena->astats->allocated_small +
-		    ctl_sarena->astats->astats.allocated_large;
+		ctl_stats->allocated = ctl_sarena->astats->allocated_small
+		    + ctl_sarena->astats->astats.allocated_large;
 		ctl_stats->active = (ctl_sarena->pactive << LG_PAGE);
-		ctl_stats->metadata = ctl_sarena->astats->astats.base +
-		    atomic_load_zu(&ctl_sarena->astats->astats.internal,
-			ATOMIC_RELAXED);
-		ctl_stats->metadata_edata = ctl_sarena->astats->astats
-		    .metadata_edata;
-		ctl_stats->metadata_rtree = ctl_sarena->astats->astats
-		    .metadata_rtree;
+		ctl_stats->metadata = ctl_sarena->astats->astats.base
+		    + atomic_load_zu(
+		        &ctl_sarena->astats->astats.internal, ATOMIC_RELAXED);
+		ctl_stats->metadata_edata =
+		    ctl_sarena->astats->astats.metadata_edata;
+		ctl_stats->metadata_rtree =
+		    ctl_sarena->astats->astats.metadata_rtree;
 		ctl_stats->resident = ctl_sarena->astats->astats.resident;
 		ctl_stats->metadata_thp =
 		    ctl_sarena->astats->astats.metadata_thp;
 		ctl_stats->mapped = ctl_sarena->astats->astats.mapped;
-		ctl_stats->retained = ctl_sarena->astats->astats
-		    .pa_shard_stats.pac_stats.retained;
+		ctl_stats->retained = ctl_sarena->astats->astats.pa_shard_stats
+		                          .pac_stats.retained;
 
 		ctl_background_thread_stats_read(tsdn);
 
-#define READ_GLOBAL_MUTEX_PROF_DATA(i, mtx)				\
-    malloc_mutex_lock(tsdn, &mtx);					\
-    malloc_mutex_prof_read(tsdn, &ctl_stats->mutex_prof_data[i], &mtx);	\
-    malloc_mutex_unlock(tsdn, &mtx);
+#define READ_GLOBAL_MUTEX_PROF_DATA(i, mtx)                                    \
+	malloc_mutex_lock(tsdn, &mtx);                                         \
+	malloc_mutex_prof_read(tsdn, &ctl_stats->mutex_prof_data[i], &mtx);    \
+	malloc_mutex_unlock(tsdn, &mtx);
 
 		if (config_prof && opt_prof) {
 			READ_GLOBAL_MUTEX_PROF_DATA(
@@ -1507,9 +1403,9 @@ ctl_refresh(tsdn_t *tsdn) {
 			    global_prof_mutex_background_thread,
 			    background_thread_lock);
 		} else {
-			memset(&ctl_stats->mutex_prof_data[
-			    global_prof_mutex_background_thread], 0,
-			    sizeof(mutex_prof_data_t));
+			memset(&ctl_stats->mutex_prof_data
+			            [global_prof_mutex_background_thread],
+			    0, sizeof(mutex_prof_data_t));
 		}
 		/* We own ctl mutex already. */
 		malloc_mutex_prof_read(tsdn,
@@ -1522,21 +1418,21 @@ ctl_refresh(tsdn_t *tsdn) {
 
 static bool
 ctl_init(tsd_t *tsd) {
-	bool ret;
+	bool    ret;
 	tsdn_t *tsdn = tsd_tsdn(tsd);
 
 	malloc_mutex_lock(tsdn, &ctl_mtx);
 	if (!ctl_initialized) {
 		ctl_arena_t *ctl_sarena, *ctl_darena;
-		unsigned i;
+		unsigned     i;
 
 		/*
 		 * Allocate demand-zeroed space for pointers to the full
 		 * range of supported arena indices.
 		 */
 		if (ctl_arenas == NULL) {
-			ctl_arenas = (ctl_arenas_t *)base_alloc(tsdn,
-			    b0get(), sizeof(ctl_arenas_t), QUANTUM);
+			ctl_arenas = (ctl_arenas_t *)base_alloc(
+			    tsdn, b0get(), sizeof(ctl_arenas_t), QUANTUM);
 			if (ctl_arenas == NULL) {
 				ret = true;
 				goto label_return;
@@ -1544,8 +1440,8 @@ ctl_init(tsd_t *tsd) {
 		}
 
 		if (config_stats && ctl_stats == NULL) {
-			ctl_stats = (ctl_stats_t *)base_alloc(tsdn, b0get(),
-			    sizeof(ctl_stats_t), QUANTUM);
+			ctl_stats = (ctl_stats_t *)base_alloc(
+			    tsdn, b0get(), sizeof(ctl_stats_t), QUANTUM);
 			if (ctl_stats == NULL) {
 				ret = true;
 				goto label_return;
@@ -1557,15 +1453,17 @@ ctl_init(tsd_t *tsd) {
 		 * here rather than doing it lazily elsewhere, in order
 		 * to limit when OOM-caused errors can occur.
 		 */
-		if ((ctl_sarena = arenas_i_impl(tsd, MALLCTL_ARENAS_ALL, false,
-		    true)) == NULL) {
+		if ((ctl_sarena = arenas_i_impl(
+		         tsd, MALLCTL_ARENAS_ALL, false, true))
+		    == NULL) {
 			ret = true;
 			goto label_return;
 		}
 		ctl_sarena->initialized = true;
 
-		if ((ctl_darena = arenas_i_impl(tsd, MALLCTL_ARENAS_DESTROYED,
-		    false, true)) == NULL) {
+		if ((ctl_darena = arenas_i_impl(
+		         tsd, MALLCTL_ARENAS_DESTROYED, false, true))
+		    == NULL) {
 			ret = true;
 			goto label_return;
 		}
@@ -1600,9 +1498,9 @@ static int
 ctl_lookup(tsdn_t *tsdn, const ctl_named_node_t *starting_node,
     const char *name, const ctl_named_node_t **ending_nodep, size_t *mibp,
     size_t *depthp) {
-	int ret;
-	const char *elm, *tdot, *dot;
-	size_t elen, i, j;
+	int                     ret;
+	const char             *elm, *tdot, *dot;
+	size_t                  elen, i, j;
 	const ctl_named_node_t *node;
 
 	elm = name;
@@ -1624,8 +1522,8 @@ ctl_lookup(tsdn_t *tsdn, const ctl_named_node_t *starting_node,
 			for (j = 0; j < node->nchildren; j++) {
 				const ctl_named_node_t *child =
 				    ctl_named_children(node, j);
-				if (strlen(child->name) == elen &&
-				    strncmp(elm, child->name, elen) == 0) {
+				if (strlen(child->name) == elen
+				    && strncmp(elm, child->name, elen) == 0) {
 					node = child;
 					mibp[i] = j;
 					break;
@@ -1636,7 +1534,7 @@ ctl_lookup(tsdn_t *tsdn, const ctl_named_node_t *starting_node,
 				goto label_return;
 			}
 		} else {
-			uintmax_t index;
+			uintmax_t                 index;
 			const ctl_indexed_node_t *inode;
 
 			/* Children are indexed. */
@@ -1674,8 +1572,8 @@ ctl_lookup(tsdn_t *tsdn, const ctl_named_node_t *starting_node,
 
 		/* Update elm. */
 		elm = &dot[1];
-		dot = ((tdot = strchr(elm, '.')) != NULL) ? tdot :
-		    strchr(elm, '\0');
+		dot = ((tdot = strchr(elm, '.')) != NULL) ? tdot
+		                                          : strchr(elm, '\0');
 		elen = (size_t)((uintptr_t)dot - (uintptr_t)elm);
 	}
 	if (ending_nodep != NULL) {
@@ -1690,9 +1588,9 @@ label_return:
 int
 ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
     void *newp, size_t newlen) {
-	int ret;
-	size_t depth;
-	size_t mib[CTL_MAX_DEPTH];
+	int                     ret;
+	size_t                  depth;
+	size_t                  mib[CTL_MAX_DEPTH];
 	const ctl_named_node_t *node;
 
 	if (!ctl_initialized && ctl_init(tsd)) {
@@ -1701,8 +1599,8 @@ ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
 	}
 
 	depth = CTL_MAX_DEPTH;
-	ret = ctl_lookup(tsd_tsdn(tsd), super_root_node, name, &node, mib,
-	    &depth);
+	ret = ctl_lookup(
+	    tsd_tsdn(tsd), super_root_node, name, &node, mib, &depth);
 	if (ret != 0) {
 		goto label_return;
 	}
@@ -1715,7 +1613,7 @@ ctl_byname(tsd_t *tsd, const char *name, void *oldp, size_t *oldlenp,
 	}
 
 label_return:
-	return(ret);
+	return (ret);
 }
 
 int
@@ -1727,10 +1625,10 @@ ctl_nametomib(tsd_t *tsd, const char *name, size_t *mibp, size_t *miblenp) {
 		goto label_return;
 	}
 
-	ret = ctl_lookup(tsd_tsdn(tsd), super_root_node, name, NULL, mibp,
-	    miblenp);
+	ret = ctl_lookup(
+	    tsd_tsdn(tsd), super_root_node, name, NULL, mibp, miblenp);
 label_return:
-	return(ret);
+	return (ret);
 }
 
 static int
@@ -1766,13 +1664,13 @@ ctl_lookupbymib(tsdn_t *tsdn, const ctl_named_node_t **ending_nodep,
 	ret = 0;
 
 label_return:
-	return(ret);
+	return (ret);
 }
 
 int
 ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+	int                     ret;
 	const ctl_named_node_t *node;
 
 	if (!ctl_initialized && ctl_init(tsd)) {
@@ -1794,13 +1692,13 @@ ctl_bymib(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	}
 
 label_return:
-	return(ret);
+	return (ret);
 }
 
 int
-ctl_mibnametomib(tsd_t *tsd, size_t *mib, size_t miblen, const char *name,
-    size_t *miblenp) {
-	int ret;
+ctl_mibnametomib(
+    tsd_t *tsd, size_t *mib, size_t miblen, const char *name, size_t *miblenp) {
+	int                     ret;
 	const ctl_named_node_t *node;
 
 	if (!ctl_initialized && ctl_init(tsd)) {
@@ -1820,17 +1718,17 @@ ctl_mibnametomib(tsd_t *tsd, size_t *mib, size_t miblen, const char *name,
 	assert(miblenp != NULL);
 	assert(*miblenp >= miblen);
 	*miblenp -= miblen;
-	ret = ctl_lookup(tsd_tsdn(tsd), node, name, NULL, mib + miblen,
-	    miblenp);
+	ret = ctl_lookup(
+	    tsd_tsdn(tsd), node, name, NULL, mib + miblen, miblenp);
 	*miblenp += miblen;
 label_return:
-	return(ret);
+	return (ret);
 }
 
 int
 ctl_bymibname(tsd_t *tsd, size_t *mib, size_t miblen, const char *name,
     size_t *miblenp, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+	int                     ret;
 	const ctl_named_node_t *node;
 
 	if (!ctl_initialized && ctl_init(tsd)) {
@@ -1853,29 +1751,29 @@ ctl_bymibname(tsd_t *tsd, size_t *mib, size_t miblen, const char *name,
 	/*
 	 * The same node supplies the starting node and stores the ending node.
 	 */
-	ret = ctl_lookup(tsd_tsdn(tsd), node, name, &node, mib + miblen,
-	    miblenp);
+	ret = ctl_lookup(
+	    tsd_tsdn(tsd), node, name, &node, mib + miblen, miblenp);
 	*miblenp += miblen;
 	if (ret != 0) {
 		goto label_return;
 	}
 
 	if (node != NULL && node->ctl) {
-		ret = node->ctl(tsd, mib, *miblenp, oldp, oldlenp, newp,
-		    newlen);
+		ret = node->ctl(
+		    tsd, mib, *miblenp, oldp, oldlenp, newp, newlen);
 	} else {
 		/* The name refers to a partial path through the ctl tree. */
 		ret = ENOENT;
 	}
 
 label_return:
-	return(ret);
+	return (ret);
 }
 
 bool
 ctl_boot(void) {
 	if (malloc_mutex_init(&ctl_mtx, "ctl", WITNESS_RANK_CTL,
-	    malloc_mutex_rank_exclusive)) {
+	        malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 
@@ -1907,195 +1805,201 @@ ctl_mtx_assert_held(tsdn_t *tsdn) {
 /******************************************************************************/
 /* *_ctl() functions. */
 
-#define READONLY()	do {						\
-	if (newp != NULL || newlen != 0) {				\
-		ret = EPERM;						\
-		goto label_return;					\
-	}								\
-} while (0)
+#define READONLY()                                                             \
+	do {                                                                   \
+		if (newp != NULL || newlen != 0) {                             \
+			ret = EPERM;                                           \
+			goto label_return;                                     \
+		}                                                              \
+	} while (0)
 
-#define WRITEONLY()	do {						\
-	if (oldp != NULL || oldlenp != NULL) {				\
-		ret = EPERM;						\
-		goto label_return;					\
-	}								\
-} while (0)
+#define WRITEONLY()                                                            \
+	do {                                                                   \
+		if (oldp != NULL || oldlenp != NULL) {                         \
+			ret = EPERM;                                           \
+			goto label_return;                                     \
+		}                                                              \
+	} while (0)
 
 /* Can read or write, but not both. */
-#define READ_XOR_WRITE()	do {					\
-	if ((oldp != NULL && oldlenp != NULL) && (newp != NULL ||	\
-	    newlen != 0)) {						\
-		ret = EPERM;						\
-		goto label_return;					\
-	}								\
-} while (0)
+#define READ_XOR_WRITE()                                                       \
+	do {                                                                   \
+		if ((oldp != NULL && oldlenp != NULL)                          \
+		    && (newp != NULL || newlen != 0)) {                        \
+			ret = EPERM;                                           \
+			goto label_return;                                     \
+		}                                                              \
+	} while (0)
 
 /* Can neither read nor write. */
-#define NEITHER_READ_NOR_WRITE()	do {				\
-	if (oldp != NULL || oldlenp != NULL || newp != NULL ||		\
-	    newlen != 0) {						\
-		ret = EPERM;						\
-		goto label_return;					\
-	}								\
-} while (0)
+#define NEITHER_READ_NOR_WRITE()                                               \
+	do {                                                                   \
+		if (oldp != NULL || oldlenp != NULL || newp != NULL            \
+		    || newlen != 0) {                                          \
+			ret = EPERM;                                           \
+			goto label_return;                                     \
+		}                                                              \
+	} while (0)
 
 /* Verify that the space provided is enough. */
-#define VERIFY_READ(t)	do {						\
-	if (oldp == NULL || oldlenp == NULL || *oldlenp != sizeof(t)) {	\
-		if (oldlenp != NULL) {					\
-			*oldlenp = 0;					\
-		}							\
-		ret = EINVAL;						\
-		goto label_return;					\
-	}								\
-} while (0)
+#define VERIFY_READ(t)                                                         \
+	do {                                                                   \
+		if (oldp == NULL || oldlenp == NULL                            \
+		    || *oldlenp != sizeof(t)) {                                \
+			if (oldlenp != NULL) {                                 \
+				*oldlenp = 0;                                  \
+			}                                                      \
+			ret = EINVAL;                                          \
+			goto label_return;                                     \
+		}                                                              \
+	} while (0)
 
-#define READ(v, t)	do {						\
-	if (oldp != NULL && oldlenp != NULL) {				\
-		if (*oldlenp != sizeof(t)) {				\
-			size_t	copylen = (sizeof(t) <= *oldlenp)	\
-			    ? sizeof(t) : *oldlenp;			\
-			memcpy(oldp, (void *)&(v), copylen);		\
-			*oldlenp = copylen;				\
-			ret = EINVAL;					\
-			goto label_return;				\
-		}							\
-		*(t *)oldp = (v);					\
-	}								\
-} while (0)
+#define READ(v, t)                                                             \
+	do {                                                                   \
+		if (oldp != NULL && oldlenp != NULL) {                         \
+			if (*oldlenp != sizeof(t)) {                           \
+				size_t copylen = (sizeof(t) <= *oldlenp)       \
+				    ? sizeof(t)                                \
+				    : *oldlenp;                                \
+				memcpy(oldp, (void *)&(v), copylen);           \
+				*oldlenp = copylen;                            \
+				ret = EINVAL;                                  \
+				goto label_return;                             \
+			}                                                      \
+			*(t *)oldp = (v);                                      \
+		}                                                              \
+	} while (0)
 
-#define WRITE(v, t)	do {						\
-	if (newp != NULL) {						\
-		if (newlen != sizeof(t)) {				\
-			ret = EINVAL;					\
-			goto label_return;				\
-		}							\
-		(v) = *(t *)newp;					\
-	}								\
-} while (0)
+#define WRITE(v, t)                                                            \
+	do {                                                                   \
+		if (newp != NULL) {                                            \
+			if (newlen != sizeof(t)) {                             \
+				ret = EINVAL;                                  \
+				goto label_return;                             \
+			}                                                      \
+			(v) = *(t *)newp;                                      \
+		}                                                              \
+	} while (0)
 
-#define ASSURED_WRITE(v, t)	do {					\
-	if (newp == NULL || newlen != sizeof(t)) {			\
-		ret = EINVAL;						\
-		goto label_return;					\
-	}								\
-	(v) = *(t *)newp;						\
-} while (0)
+#define ASSURED_WRITE(v, t)                                                    \
+	do {                                                                   \
+		if (newp == NULL || newlen != sizeof(t)) {                     \
+			ret = EINVAL;                                          \
+			goto label_return;                                     \
+		}                                                              \
+		(v) = *(t *)newp;                                              \
+	} while (0)
 
-#define MIB_UNSIGNED(v, i) do {						\
-	if (mib[i] > UINT_MAX) {					\
-		ret = EFAULT;						\
-		goto label_return;					\
-	}								\
-	v = (unsigned)mib[i];						\
-} while (0)
+#define MIB_UNSIGNED(v, i)                                                     \
+	do {                                                                   \
+		if (mib[i] > UINT_MAX) {                                       \
+			ret = EFAULT;                                          \
+			goto label_return;                                     \
+		}                                                              \
+		v = (unsigned)mib[i];                                          \
+	} while (0)
 
 /*
  * There's a lot of code duplication in the following macros due to limitations
  * in how nested cpp macros are expanded.
  */
-#define CTL_RO_CGEN(c, n, v, t)						\
-static int								\
-n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,			\
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {		\
-	int ret;							\
-	t oldval;							\
-									\
-	if (!(c)) {							\
-		return ENOENT;						\
-	}								\
-	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);			\
-	READONLY();							\
-	oldval = (v);							\
-	READ(oldval, t);						\
-									\
-	ret = 0;							\
-label_return:								\
-	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);			\
-	return ret;							\
-}
+#define CTL_RO_CGEN(c, n, v, t)                                                \
+	static int n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,       \
+	    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {          \
+		int ret;                                                       \
+		t   oldval;                                                    \
+                                                                               \
+		if (!(c)) {                                                    \
+			return ENOENT;                                         \
+		}                                                              \
+		malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);                    \
+		READONLY();                                                    \
+		oldval = (v);                                                  \
+		READ(oldval, t);                                               \
+                                                                               \
+		ret = 0;                                                       \
+	label_return:                                                          \
+		malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);                  \
+		return ret;                                                    \
+	}
 
-#define CTL_RO_GEN(n, v, t)						\
-static int								\
-n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,	\
-    size_t *oldlenp, void *newp, size_t newlen) {			\
-	int ret;							\
-	t oldval;							\
-									\
-	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);			\
-	READONLY();							\
-	oldval = (v);							\
-	READ(oldval, t);						\
-									\
-	ret = 0;							\
-label_return:								\
-	malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);			\
-	return ret;							\
-}
+#define CTL_RO_GEN(n, v, t)                                                    \
+	static int n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,       \
+	    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {          \
+		int ret;                                                       \
+		t   oldval;                                                    \
+                                                                               \
+		malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);                    \
+		READONLY();                                                    \
+		oldval = (v);                                                  \
+		READ(oldval, t);                                               \
+                                                                               \
+		ret = 0;                                                       \
+	label_return:                                                          \
+		malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx);                  \
+		return ret;                                                    \
+	}
 
 /*
  * ctl_mtx is not acquired, under the assumption that no pertinent data will
  * mutate during the call.
  */
-#define CTL_RO_NL_CGEN(c, n, v, t)					\
-static int								\
-n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,			\
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {		\
-	int ret;							\
-	t oldval;							\
-									\
-	if (!(c)) {							\
-		return ENOENT;						\
-	}								\
-	READONLY();							\
-	oldval = (v);							\
-	READ(oldval, t);						\
-									\
-	ret = 0;							\
-label_return:								\
-	return ret;							\
-}
+#define CTL_RO_NL_CGEN(c, n, v, t)                                             \
+	static int n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,       \
+	    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {          \
+		int ret;                                                       \
+		t   oldval;                                                    \
+                                                                               \
+		if (!(c)) {                                                    \
+			return ENOENT;                                         \
+		}                                                              \
+		READONLY();                                                    \
+		oldval = (v);                                                  \
+		READ(oldval, t);                                               \
+                                                                               \
+		ret = 0;                                                       \
+	label_return:                                                          \
+		return ret;                                                    \
+	}
 
-#define CTL_RO_NL_GEN(n, v, t)						\
-static int								\
-n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,			\
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {		\
-	int ret;							\
-	t oldval;							\
-									\
-	READONLY();							\
-	oldval = (v);							\
-	READ(oldval, t);						\
-									\
-	ret = 0;							\
-label_return:								\
-	return ret;							\
-}
+#define CTL_RO_NL_GEN(n, v, t)                                                 \
+	static int n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,       \
+	    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {          \
+		int ret;                                                       \
+		t   oldval;                                                    \
+                                                                               \
+		READONLY();                                                    \
+		oldval = (v);                                                  \
+		READ(oldval, t);                                               \
+                                                                               \
+		ret = 0;                                                       \
+	label_return:                                                          \
+		return ret;                                                    \
+	}
 
-#define CTL_RO_CONFIG_GEN(n, t)						\
-static int								\
-n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,			\
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {		\
-	int ret;							\
-	t oldval;							\
-									\
-	READONLY();							\
-	oldval = n;							\
-	READ(oldval, t);						\
-									\
-	ret = 0;							\
-label_return:								\
-	return ret;							\
-}
+#define CTL_RO_CONFIG_GEN(n, t)                                                \
+	static int n##_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,       \
+	    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {          \
+		int ret;                                                       \
+		t   oldval;                                                    \
+                                                                               \
+		READONLY();                                                    \
+		oldval = n;                                                    \
+		READ(oldval, t);                                               \
+                                                                               \
+		ret = 0;                                                       \
+	label_return:                                                          \
+		return ret;                                                    \
+	}
 
 /******************************************************************************/
 
 CTL_RO_NL_GEN(version, JEMALLOC_VERSION, const char *)
 
 static int
-epoch_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+epoch_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int             ret;
 	UNUSED uint64_t newval;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
@@ -2112,10 +2016,9 @@ label_return:
 }
 
 static int
-background_thread_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen) {
-	int ret;
+background_thread_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int  ret;
 	bool oldval;
 
 	if (!have_background_thread) {
@@ -2164,10 +2067,9 @@ label_return:
 }
 
 static int
-max_background_threads_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
-	int ret;
+max_background_threads_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int    ret;
 	size_t oldval;
 
 	if (!have_background_thread) {
@@ -2193,8 +2095,7 @@ max_background_threads_ctl(tsd_t *tsd, const size_t *mib,
 			ret = 0;
 			goto label_return;
 		}
-		if (newval > opt_max_background_threads ||
-		    newval == 0) {
+		if (newval > opt_max_background_threads || newval == 0) {
 			ret = EINVAL;
 			goto label_return;
 		}
@@ -2244,19 +2145,19 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool)
 CTL_RO_NL_GEN(opt_abort, opt_abort, bool)
 CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool)
 CTL_RO_NL_GEN(opt_cache_oblivious, opt_cache_oblivious, bool)
-CTL_RO_NL_GEN(opt_debug_double_free_max_scan,
-    opt_debug_double_free_max_scan, unsigned)
+CTL_RO_NL_GEN(
+    opt_debug_double_free_max_scan, opt_debug_double_free_max_scan, unsigned)
 CTL_RO_NL_GEN(opt_trust_madvise, opt_trust_madvise, bool)
 CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
 
 /* HPA options. */
 CTL_RO_NL_GEN(opt_hpa, opt_hpa, bool)
-CTL_RO_NL_GEN(opt_hpa_hugification_threshold,
-    opt_hpa_opts.hugification_threshold, size_t)
+CTL_RO_NL_GEN(
+    opt_hpa_hugification_threshold, opt_hpa_opts.hugification_threshold, size_t)
 CTL_RO_NL_GEN(opt_hpa_hugify_delay_ms, opt_hpa_opts.hugify_delay_ms, uint64_t)
 CTL_RO_NL_GEN(opt_hpa_hugify_sync, opt_hpa_opts.hugify_sync, bool)
-CTL_RO_NL_GEN(opt_hpa_min_purge_interval_ms, opt_hpa_opts.min_purge_interval_ms,
-    uint64_t)
+CTL_RO_NL_GEN(
+    opt_hpa_min_purge_interval_ms, opt_hpa_opts.min_purge_interval_ms, uint64_t)
 CTL_RO_NL_GEN(opt_experimental_hpa_max_purge_nhp,
     opt_hpa_opts.experimental_max_purge_nhp, ssize_t)
 
@@ -2271,19 +2172,19 @@ CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_opts.slab_max_alloc, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_nshards, opt_hpa_sec_opts.nshards, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_max_alloc, opt_hpa_sec_opts.max_alloc, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_max_bytes, opt_hpa_sec_opts.max_bytes, size_t)
-CTL_RO_NL_GEN(opt_hpa_sec_bytes_after_flush, opt_hpa_sec_opts.bytes_after_flush,
-    size_t)
-CTL_RO_NL_GEN(opt_hpa_sec_batch_fill_extra, opt_hpa_sec_opts.batch_fill_extra,
-    size_t)
+CTL_RO_NL_GEN(
+    opt_hpa_sec_bytes_after_flush, opt_hpa_sec_opts.bytes_after_flush, size_t)
+CTL_RO_NL_GEN(
+    opt_hpa_sec_batch_fill_extra, opt_hpa_sec_opts.batch_fill_extra, size_t)
 
 CTL_RO_NL_GEN(opt_huge_arena_pac_thp, opt_huge_arena_pac_thp, bool)
-CTL_RO_NL_GEN(opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp],
-    const char *)
+CTL_RO_NL_GEN(
+    opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp], const char *)
 CTL_RO_NL_GEN(opt_retain, opt_retain, bool)
 CTL_RO_NL_GEN(opt_dss, opt_dss, const char *)
 CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned)
-CTL_RO_NL_GEN(opt_percpu_arena, percpu_arena_mode_names[opt_percpu_arena],
-    const char *)
+CTL_RO_NL_GEN(
+    opt_percpu_arena, percpu_arena_mode_names[opt_percpu_arena], const char *)
 CTL_RO_NL_GEN(opt_mutex_max_spin, opt_mutex_max_spin, int64_t)
 CTL_RO_NL_GEN(opt_oversize_threshold, opt_oversize_threshold, size_t)
 CTL_RO_NL_GEN(opt_background_thread, opt_background_thread, bool)
@@ -2302,65 +2203,66 @@ CTL_RO_NL_CGEN(config_enable_cxx, opt_experimental_infallible_new,
     opt_experimental_infallible_new, bool)
 CTL_RO_NL_GEN(opt_experimental_tcache_gc, opt_experimental_tcache_gc, bool)
 CTL_RO_NL_GEN(opt_max_batched_size, opt_bin_info_max_batched_size, size_t)
-CTL_RO_NL_GEN(opt_remote_free_max, opt_bin_info_remote_free_max,
-    size_t)
-CTL_RO_NL_GEN(opt_remote_free_max_batch, opt_bin_info_remote_free_max_batch,
-    size_t)
+CTL_RO_NL_GEN(opt_remote_free_max, opt_bin_info_remote_free_max, size_t)
+CTL_RO_NL_GEN(
+    opt_remote_free_max_batch, opt_bin_info_remote_free_max_batch, size_t)
 CTL_RO_NL_GEN(opt_tcache, opt_tcache, bool)
 CTL_RO_NL_GEN(opt_tcache_max, opt_tcache_max, size_t)
-CTL_RO_NL_GEN(opt_tcache_nslots_small_min, opt_tcache_nslots_small_min,
-    unsigned)
-CTL_RO_NL_GEN(opt_tcache_nslots_small_max, opt_tcache_nslots_small_max,
-    unsigned)
+CTL_RO_NL_GEN(
+    opt_tcache_nslots_small_min, opt_tcache_nslots_small_min, unsigned)
+CTL_RO_NL_GEN(
+    opt_tcache_nslots_small_max, opt_tcache_nslots_small_max, unsigned)
 CTL_RO_NL_GEN(opt_tcache_nslots_large, opt_tcache_nslots_large, unsigned)
 CTL_RO_NL_GEN(opt_lg_tcache_nslots_mul, opt_lg_tcache_nslots_mul, ssize_t)
 CTL_RO_NL_GEN(opt_tcache_gc_incr_bytes, opt_tcache_gc_incr_bytes, size_t)
 CTL_RO_NL_GEN(opt_tcache_gc_delay_bytes, opt_tcache_gc_delay_bytes, size_t)
-CTL_RO_NL_GEN(opt_lg_tcache_flush_small_div, opt_lg_tcache_flush_small_div,
-    unsigned)
-CTL_RO_NL_GEN(opt_lg_tcache_flush_large_div, opt_lg_tcache_flush_large_div,
-    unsigned)
+CTL_RO_NL_GEN(
+    opt_lg_tcache_flush_small_div, opt_lg_tcache_flush_small_div, unsigned)
+CTL_RO_NL_GEN(
+    opt_lg_tcache_flush_large_div, opt_lg_tcache_flush_large_div, unsigned)
 CTL_RO_NL_GEN(opt_thp, thp_mode_names[opt_thp], const char *)
-CTL_RO_NL_GEN(opt_lg_extent_max_active_fit, opt_lg_extent_max_active_fit,
-    size_t)
-CTL_RO_NL_GEN(opt_process_madvise_max_batch, opt_process_madvise_max_batch,
-    size_t)
+CTL_RO_NL_GEN(
+    opt_lg_extent_max_active_fit, opt_lg_extent_max_active_fit, size_t)
+CTL_RO_NL_GEN(
+    opt_process_madvise_max_batch, opt_process_madvise_max_batch, size_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *)
 CTL_RO_NL_CGEN(config_prof, opt_prof_active, opt_prof_active, bool)
-CTL_RO_NL_CGEN(config_prof, opt_prof_thread_active_init,
-    opt_prof_thread_active_init, bool)
+CTL_RO_NL_CGEN(
+    config_prof, opt_prof_thread_active_init, opt_prof_thread_active_init, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_bt_max, opt_prof_bt_max, unsigned)
 CTL_RO_NL_CGEN(config_prof, opt_lg_prof_sample, opt_lg_prof_sample, size_t)
-CTL_RO_NL_CGEN(config_prof, opt_experimental_lg_prof_threshold, opt_experimental_lg_prof_threshold, size_t)
+CTL_RO_NL_CGEN(config_prof, opt_experimental_lg_prof_threshold,
+    opt_experimental_lg_prof_threshold, size_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool)
-CTL_RO_NL_CGEN(config_prof, opt_prof_pid_namespace, opt_prof_pid_namespace,
-    bool)
+CTL_RO_NL_CGEN(
+    config_prof, opt_prof_pid_namespace, opt_prof_pid_namespace, bool)
 CTL_RO_NL_CGEN(config_prof, opt_lg_prof_interval, opt_lg_prof_interval, ssize_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof_gdump, opt_prof_gdump, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_final, opt_prof_final, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_leak, opt_prof_leak, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_leak_error, opt_prof_leak_error, bool)
-CTL_RO_NL_CGEN(config_prof, opt_prof_recent_alloc_max,
-    opt_prof_recent_alloc_max, ssize_t)
+CTL_RO_NL_CGEN(
+    config_prof, opt_prof_recent_alloc_max, opt_prof_recent_alloc_max, ssize_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof_stats, opt_prof_stats, bool)
-CTL_RO_NL_CGEN(config_prof, opt_prof_sys_thread_name, opt_prof_sys_thread_name,
-    bool)
+CTL_RO_NL_CGEN(
+    config_prof, opt_prof_sys_thread_name, opt_prof_sys_thread_name, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_time_res,
     prof_time_res_mode_names[opt_prof_time_res], const char *)
-CTL_RO_NL_CGEN(config_uaf_detection, opt_lg_san_uaf_align,
-    opt_lg_san_uaf_align, ssize_t)
+CTL_RO_NL_CGEN(
+    config_uaf_detection, opt_lg_san_uaf_align, opt_lg_san_uaf_align, ssize_t)
 CTL_RO_NL_GEN(opt_zero_realloc,
     zero_realloc_mode_names[opt_zero_realloc_action], const char *)
-CTL_RO_NL_GEN(opt_disable_large_size_classes, opt_disable_large_size_classes, bool)
+CTL_RO_NL_GEN(
+    opt_disable_large_size_classes, opt_disable_large_size_classes, bool)
 
 /* malloc_conf options */
 CTL_RO_NL_CGEN(opt_malloc_conf_symlink, opt_malloc_conf_symlink,
     opt_malloc_conf_symlink, const char *)
 CTL_RO_NL_CGEN(opt_malloc_conf_env_var, opt_malloc_conf_env_var,
     opt_malloc_conf_env_var, const char *)
-CTL_RO_NL_CGEN(je_malloc_conf, opt_malloc_conf_global_var, je_malloc_conf,
-    const char *)
+CTL_RO_NL_CGEN(
+    je_malloc_conf, opt_malloc_conf_global_var, je_malloc_conf, const char *)
 CTL_RO_NL_CGEN(je_malloc_conf_2_conf_harder,
     opt_malloc_conf_global_var_2_conf_harder, je_malloc_conf_2_conf_harder,
     const char *)
@@ -2368,9 +2270,9 @@ CTL_RO_NL_CGEN(je_malloc_conf_2_conf_harder,
 /******************************************************************************/
 
 static int
-thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int      ret;
 	arena_t *oldarena;
 	unsigned newind, oldind;
 
@@ -2391,8 +2293,8 @@ thread_arena_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 			goto label_return;
 		}
 
-		if (have_percpu_arena &&
-		    PERCPU_ARENA_ENABLED(opt_percpu_arena)) {
+		if (have_percpu_arena
+		    && PERCPU_ARENA_ENABLED(opt_percpu_arena)) {
 			if (newind < percpu_arena_ind_limit(opt_percpu_arena)) {
 				/*
 				 * If perCPU arena is enabled, thread_arena
@@ -2429,9 +2331,8 @@ CTL_RO_NL_GEN(thread_allocatedp, tsd_thread_allocatedp_get(tsd), uint64_t *)
 
 static int
 thread_tcache_ncached_max_read_sizeclass_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
-	int ret;
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int    ret;
 	size_t bin_size = 0;
 
 	/* Read the bin size from newp. */
@@ -2455,8 +2356,7 @@ label_return:
 
 static int
 thread_tcache_ncached_max_write_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	WRITEONLY();
 	if (newp != NULL) {
@@ -2471,8 +2371,8 @@ thread_tcache_ncached_max_write_ctl(tsd_t *tsd, const size_t *mib,
 			goto label_return;
 		}
 		/* Get the length of the setting string safely. */
-		char *end = (char *)memchr(settings, '\0',
-		    CTL_MULTI_SETTING_MAX_LEN);
+		char *end = (char *)memchr(
+		    settings, '\0', CTL_MULTI_SETTING_MAX_LEN);
 		if (end == NULL) {
 			ret = EINVAL;
 			goto label_return;
@@ -2502,10 +2402,9 @@ CTL_RO_NL_GEN(thread_deallocated, tsd_thread_deallocated_get(tsd), uint64_t)
 CTL_RO_NL_GEN(thread_deallocatedp, tsd_thread_deallocatedp_get(tsd), uint64_t *)
 
 static int
-thread_tcache_enabled_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
-	int ret;
+thread_tcache_enabled_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int  ret;
 	bool oldval;
 
 	oldval = tcache_enabled_get(tsd);
@@ -2524,10 +2423,9 @@ label_return:
 }
 
 static int
-thread_tcache_max_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
-	int ret;
+thread_tcache_max_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int    ret;
 	size_t oldval;
 
 	/* pointer to tcache_t always exists even with tcache disabled. */
@@ -2547,7 +2445,7 @@ thread_tcache_max_ctl(tsd_t *tsd, const size_t *mib,
 			new_tcache_max = TCACHE_MAXCLASS_LIMIT;
 		}
 		new_tcache_max = sz_s2u(new_tcache_max);
-		if(new_tcache_max != oldval) {
+		if (new_tcache_max != oldval) {
 			thread_tcache_max_set(tsd, new_tcache_max);
 		}
 	}
@@ -2558,9 +2456,8 @@ label_return:
 }
 
 static int
-thread_tcache_flush_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
+thread_tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 
 	if (!tcache_available(tsd)) {
@@ -2578,9 +2475,8 @@ label_return:
 }
 
 static int
-thread_peak_read_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
+thread_peak_read_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	if (!config_stats) {
 		return ENOENT;
@@ -2595,9 +2491,8 @@ label_return:
 }
 
 static int
-thread_peak_reset_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
+thread_peak_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 	if (!config_stats) {
 		return ENOENT;
@@ -2610,9 +2505,8 @@ label_return:
 }
 
 static int
-thread_prof_name_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
+thread_prof_name_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 
 	if (!config_prof || !opt_prof) {
@@ -2642,10 +2536,9 @@ label_return:
 }
 
 static int
-thread_prof_active_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
-	int ret;
+thread_prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int  ret;
 	bool oldval;
 
 	if (!config_prof) {
@@ -2675,9 +2568,8 @@ label_return:
 }
 
 static int
-thread_idle_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
+thread_idle_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 
 	NEITHER_READ_NOR_WRITE();
@@ -2710,9 +2602,9 @@ label_return:
 /******************************************************************************/
 
 static int
-tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+tcache_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int      ret;
 	unsigned tcache_ind;
 
 	READONLY();
@@ -2729,9 +2621,9 @@ label_return:
 }
 
 static int
-tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+tcache_flush_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int      ret;
 	unsigned tcache_ind;
 
 	WRITEONLY();
@@ -2744,9 +2636,9 @@ label_return:
 }
 
 static int
-tcache_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+tcache_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int      ret;
 	unsigned tcache_ind;
 
 	WRITEONLY();
@@ -2763,10 +2655,10 @@ label_return:
 static int
 arena_i_initialized_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
-	tsdn_t *tsdn = tsd_tsdn(tsd);
+	int      ret;
+	tsdn_t  *tsdn = tsd_tsdn(tsd);
 	unsigned arena_ind;
-	bool initialized;
+	bool     initialized;
 
 	READONLY();
 	MIB_UNSIGNED(arena_ind, 1);
@@ -2808,8 +2700,8 @@ arena_i_decay(tsdn_t *tsdn, unsigned arena_ind, bool all) {
 
 			for (i = 0; i < narenas; i++) {
 				if (tarenas[i] != NULL) {
-					arena_decay(tsdn, tarenas[i], false,
-					    all);
+					arena_decay(
+					    tsdn, tarenas[i], false, all);
 				}
 			}
 		} else {
@@ -2832,7 +2724,7 @@ arena_i_decay(tsdn_t *tsdn, unsigned arena_ind, bool all) {
 static int
 arena_i_decay_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+	int      ret;
 	unsigned arena_ind;
 
 	NEITHER_READ_NOR_WRITE();
@@ -2847,7 +2739,7 @@ label_return:
 static int
 arena_i_purge_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+	int      ret;
 	unsigned arena_ind;
 
 	NEITHER_READ_NOR_WRITE();
@@ -2913,12 +2805,12 @@ arena_reset_finish_background_thread(tsd_t *tsd, unsigned arena_ind) {
 static int
 arena_i_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+	int      ret;
 	unsigned arena_ind;
 	arena_t *arena;
 
-	ret = arena_i_reset_destroy_helper(tsd, mib, miblen, oldp, oldlenp,
-	    newp, newlen, &arena_ind, &arena);
+	ret = arena_i_reset_destroy_helper(
+	    tsd, mib, miblen, oldp, oldlenp, newp, newlen, &arena_ind, &arena);
 	if (ret != 0) {
 		return ret;
 	}
@@ -2933,21 +2825,21 @@ arena_i_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 static int
 arena_i_destroy_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
-	unsigned arena_ind;
-	arena_t *arena;
+	int          ret;
+	unsigned     arena_ind;
+	arena_t     *arena;
 	ctl_arena_t *ctl_darena, *ctl_arena;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 
-	ret = arena_i_reset_destroy_helper(tsd, mib, miblen, oldp, oldlenp,
-	    newp, newlen, &arena_ind, &arena);
+	ret = arena_i_reset_destroy_helper(
+	    tsd, mib, miblen, oldp, oldlenp, newp, newlen, &arena_ind, &arena);
 	if (ret != 0) {
 		goto label_return;
 	}
 
-	if (arena_nthreads_get(arena, false) != 0 || arena_nthreads_get(arena,
-	    true) != 0) {
+	if (arena_nthreads_get(arena, false) != 0
+	    || arena_nthreads_get(arena, true) != 0) {
 		ret = EFAULT;
 		goto label_return;
 	}
@@ -2978,16 +2870,16 @@ label_return:
 static int
 arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
     size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+	int         ret;
 	const char *dss = NULL;
-	unsigned arena_ind;
-	dss_prec_t dss_prec = dss_prec_limit;
+	unsigned    arena_ind;
+	dss_prec_t  dss_prec = dss_prec_limit;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	WRITE(dss, const char *);
 	MIB_UNSIGNED(arena_ind, 1);
 	if (dss != NULL) {
-		int i;
+		int  i;
 		bool match = false;
 
 		for (i = 0; i < dss_prec_limit; i++) {
@@ -3009,18 +2901,19 @@ arena_i_dss_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
 	 * 6.0.0.
 	 */
 	dss_prec_t dss_prec_old;
-	if (arena_ind == MALLCTL_ARENAS_ALL || arena_ind ==
-	    ctl_arenas->narenas) {
-		if (dss_prec != dss_prec_limit &&
-		    extent_dss_prec_set(dss_prec)) {
+	if (arena_ind == MALLCTL_ARENAS_ALL
+	    || arena_ind == ctl_arenas->narenas) {
+		if (dss_prec != dss_prec_limit
+		    && extent_dss_prec_set(dss_prec)) {
 			ret = EFAULT;
 			goto label_return;
 		}
 		dss_prec_old = extent_dss_prec_get();
 	} else {
 		arena_t *arena = arena_get(tsd_tsdn(tsd), arena_ind, false);
-		if (arena == NULL || (dss_prec != dss_prec_limit &&
-		    arena_dss_prec_set(arena, dss_prec))) {
+		if (arena == NULL
+		    || (dss_prec != dss_prec_limit
+		        && arena_dss_prec_set(arena, dss_prec))) {
 			ret = EFAULT;
 			goto label_return;
 		}
@@ -3071,7 +2964,7 @@ label_return:
 static int
 arena_i_decay_ms_ctl_impl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen, bool dirty) {
-	int ret;
+	int      ret;
 	unsigned arena_ind;
 	arena_t *arena;
 
@@ -3093,8 +2986,8 @@ arena_i_decay_ms_ctl_impl(tsd_t *tsd, const size_t *mib, size_t miblen,
 			goto label_return;
 		}
 
-		if (arena_decay_ms_set(tsd_tsdn(tsd), arena, state,
-		    *(ssize_t *)newp)) {
+		if (arena_decay_ms_set(
+		        tsd_tsdn(tsd), arena, state, *(ssize_t *)newp)) {
 			ret = EFAULT;
 			goto label_return;
 		}
@@ -3108,21 +3001,21 @@ label_return:
 static int
 arena_i_dirty_decay_ms_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	return arena_i_decay_ms_ctl_impl(tsd, mib, miblen, oldp, oldlenp, newp,
-	    newlen, true);
+	return arena_i_decay_ms_ctl_impl(
+	    tsd, mib, miblen, oldp, oldlenp, newp, newlen, true);
 }
 
 static int
 arena_i_muzzy_decay_ms_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	return arena_i_decay_ms_ctl_impl(tsd, mib, miblen, oldp, oldlenp, newp,
-	    newlen, false);
+	return arena_i_decay_ms_ctl_impl(
+	    tsd, mib, miblen, oldp, oldlenp, newp, newlen, false);
 }
 
 static int
 arena_i_extent_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+	int      ret;
 	unsigned arena_ind;
 	arena_t *arena;
 
@@ -3147,8 +3040,8 @@ arena_i_extent_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 				arena_config_t config = arena_config_default;
 				config.extent_hooks = new_extent_hooks;
 
-				arena = arena_init(tsd_tsdn(tsd), arena_ind,
-				    &config);
+				arena = arena_init(
+				    tsd_tsdn(tsd), arena_ind, &config);
 				if (arena == NULL) {
 					ret = EFAULT;
 					goto label_return;
@@ -3159,13 +3052,12 @@ arena_i_extent_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
 				extent_hooks_t *new_extent_hooks
 				    JEMALLOC_CC_SILENCE_INIT(NULL);
 				WRITE(new_extent_hooks, extent_hooks_t *);
-				old_extent_hooks = arena_set_extent_hooks(tsd,
-				    arena, new_extent_hooks);
+				old_extent_hooks = arena_set_extent_hooks(
+				    tsd, arena, new_extent_hooks);
 				READ(old_extent_hooks, extent_hooks_t *);
 			} else {
-				old_extent_hooks =
-				    ehooks_get_extent_hooks_ptr(
-					arena_get_ehooks(arena));
+				old_extent_hooks = ehooks_get_extent_hooks_ptr(
+				    arena_get_ehooks(arena));
 				READ(old_extent_hooks, extent_hooks_t *);
 			}
 		}
@@ -3180,10 +3072,9 @@ label_return:
 }
 
 static int
-arena_i_retain_grow_limit_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
-	int ret;
+arena_i_retain_grow_limit_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int      ret;
 	unsigned arena_ind;
 	arena_t *arena;
 
@@ -3194,14 +3085,14 @@ arena_i_retain_grow_limit_ctl(tsd_t *tsd, const size_t *mib,
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	MIB_UNSIGNED(arena_ind, 1);
-	if (arena_ind < narenas_total_get() && (arena =
-	    arena_get(tsd_tsdn(tsd), arena_ind, false)) != NULL) {
+	if (arena_ind < narenas_total_get()
+	    && (arena = arena_get(tsd_tsdn(tsd), arena_ind, false)) != NULL) {
 		size_t old_limit, new_limit;
 		if (newp != NULL) {
 			WRITE(new_limit, size_t);
 		}
-		bool err = arena_retain_grow_limit_get_set(tsd, arena,
-		    &old_limit, newp != NULL ? &new_limit : NULL);
+		bool err = arena_retain_grow_limit_get_set(
+		    tsd, arena, &old_limit, newp != NULL ? &new_limit : NULL);
 		if (!err) {
 			READ(old_limit, size_t);
 			ret = 0;
@@ -3223,16 +3114,16 @@ label_return:
  * ARENA_NAME_LEN or the length of the name when it was set.
  */
 static int
-arena_i_name_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
-	unsigned arena_ind;
+arena_i_name_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int        ret;
+	unsigned   arena_ind;
 	char *name JEMALLOC_CLANG_ANALYZER_SILENCE_INIT(NULL);
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	MIB_UNSIGNED(arena_ind, 1);
-	if (arena_ind == MALLCTL_ARENAS_ALL || arena_ind >=
-	    ctl_arenas->narenas) {
+	if (arena_ind == MALLCTL_ARENAS_ALL
+	    || arena_ind >= ctl_arenas->narenas) {
 		ret = EINVAL;
 		goto label_return;
 	}
@@ -3272,8 +3163,7 @@ label_return:
 }
 
 static const ctl_named_node_t *
-arena_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
-    size_t i) {
+arena_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
 	const ctl_named_node_t *ret;
 
 	malloc_mutex_lock(tsdn, &ctl_mtx);
@@ -3298,9 +3188,9 @@ label_return:
 /******************************************************************************/
 
 static int
-arenas_narenas_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+arenas_narenas_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int      ret;
 	unsigned narenas;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
@@ -3315,14 +3205,13 @@ label_return:
 }
 
 static int
-arenas_decay_ms_ctl_impl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen, bool dirty) {
+arenas_decay_ms_ctl_impl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen, bool dirty) {
 	int ret;
 
 	if (oldp != NULL && oldlenp != NULL) {
-		size_t oldval = (dirty ? arena_dirty_decay_ms_default_get() :
-		    arena_muzzy_decay_ms_default_get());
+		size_t oldval = (dirty ? arena_dirty_decay_ms_default_get()
+		                       : arena_muzzy_decay_ms_default_get());
 		READ(oldval, ssize_t);
 	}
 	if (newp != NULL) {
@@ -3330,8 +3219,9 @@ arenas_decay_ms_ctl_impl(tsd_t *tsd, const size_t *mib,
 			ret = EINVAL;
 			goto label_return;
 		}
-		if (dirty ? arena_dirty_decay_ms_default_set(*(ssize_t *)newp)
-		    : arena_muzzy_decay_ms_default_set(*(ssize_t *)newp)) {
+		if (dirty
+		        ? arena_dirty_decay_ms_default_set(*(ssize_t *)newp)
+		        : arena_muzzy_decay_ms_default_set(*(ssize_t *)newp)) {
 			ret = EFAULT;
 			goto label_return;
 		}
@@ -3345,15 +3235,15 @@ label_return:
 static int
 arenas_dirty_decay_ms_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	return arenas_decay_ms_ctl_impl(tsd, mib, miblen, oldp, oldlenp, newp,
-	    newlen, true);
+	return arenas_decay_ms_ctl_impl(
+	    tsd, mib, miblen, oldp, oldlenp, newp, newlen, true);
 }
 
 static int
 arenas_muzzy_decay_ms_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	return arenas_decay_ms_ctl_impl(tsd, mib, miblen, oldp, oldlenp, newp,
-	    newlen, false);
+	return arenas_decay_ms_ctl_impl(
+	    tsd, mib, miblen, oldp, oldlenp, newp, newlen, false);
 }
 
 CTL_RO_NL_GEN(arenas_quantum, QUANTUM, size_t)
@@ -3367,8 +3257,7 @@ CTL_RO_NL_GEN(arenas_bin_i_nregs, bin_infos[mib[2]].nregs, uint32_t)
 CTL_RO_NL_GEN(arenas_bin_i_slab_size, bin_infos[mib[2]].slab_size, size_t)
 CTL_RO_NL_GEN(arenas_bin_i_nshards, bin_infos[mib[2]].n_shards, uint32_t)
 static const ctl_named_node_t *
-arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib,
-    size_t miblen, size_t i) {
+arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
 	if (i > SC_NBINS) {
 		return NULL;
 	}
@@ -3377,10 +3266,10 @@ arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib,
 
 CTL_RO_NL_GEN(arenas_nlextents, SC_NSIZES - SC_NBINS, unsigned)
 CTL_RO_NL_GEN(arenas_lextent_i_size,
-    sz_index2size_unsafe(SC_NBINS+(szind_t)mib[2]), size_t)
+    sz_index2size_unsafe(SC_NBINS + (szind_t)mib[2]), size_t)
 static const ctl_named_node_t *
-arenas_lextent_i_index(tsdn_t *tsdn, const size_t *mib,
-    size_t miblen, size_t i) {
+arenas_lextent_i_index(
+    tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
 	if (i > SC_NSIZES - SC_NBINS) {
 		return NULL;
 	}
@@ -3388,9 +3277,9 @@ arenas_lextent_i_index(tsdn_t *tsdn, const size_t *mib,
 }
 
 static int
-arenas_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+arenas_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int      ret;
 	unsigned arena_ind;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
@@ -3411,10 +3300,9 @@ label_return:
 }
 
 static int
-experimental_arenas_create_ext_ctl(tsd_t *tsd,
-    const size_t *mib, size_t miblen,
+experimental_arenas_create_ext_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+	int      ret;
 	unsigned arena_ind;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
@@ -3435,22 +3323,21 @@ label_return:
 }
 
 static int
-arenas_lookup_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
-	int ret;
-	unsigned arena_ind;
-	void *ptr;
+arenas_lookup_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int                   ret;
+	unsigned              arena_ind;
+	void                 *ptr;
 	emap_full_alloc_ctx_t alloc_ctx;
-	bool ptr_not_present;
-	arena_t *arena;
+	bool                  ptr_not_present;
+	arena_t              *arena;
 
 	ptr = NULL;
 	ret = EINVAL;
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	WRITE(ptr, void *);
-	ptr_not_present = emap_full_alloc_ctx_try_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
-		&alloc_ctx);
+	ptr_not_present = emap_full_alloc_ctx_try_lookup(
+	    tsd_tsdn(tsd), &arena_emap_global, ptr, &alloc_ctx);
 	if (ptr_not_present || alloc_ctx.edata == NULL) {
 		goto label_return;
 	}
@@ -3472,10 +3359,9 @@ label_return:
 /******************************************************************************/
 
 static int
-prof_thread_active_init_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
-	int ret;
+prof_thread_active_init_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int  ret;
 	bool oldval;
 
 	if (!config_prof) {
@@ -3491,11 +3377,11 @@ prof_thread_active_init_ctl(tsd_t *tsd, const size_t *mib,
 			ret = EINVAL;
 			goto label_return;
 		}
-		oldval = prof_thread_active_init_set(tsd_tsdn(tsd),
-		    *(bool *)newp);
+		oldval = prof_thread_active_init_set(
+		    tsd_tsdn(tsd), *(bool *)newp);
 	} else {
-		oldval = opt_prof ? prof_thread_active_init_get(tsd_tsdn(tsd)) :
-		    false;
+		oldval = opt_prof ? prof_thread_active_init_get(tsd_tsdn(tsd))
+		                  : false;
 	}
 	READ(oldval, bool);
 
@@ -3505,9 +3391,9 @@ label_return:
 }
 
 static int
-prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+prof_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int  ret;
 	bool oldval;
 
 	if (!config_prof) {
@@ -3543,9 +3429,9 @@ label_return:
 }
 
 static int
-prof_dump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+prof_dump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int         ret;
 	const char *filename = NULL;
 
 	if (!config_prof || !opt_prof) {
@@ -3566,9 +3452,9 @@ label_return:
 }
 
 static int
-prof_gdump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+prof_gdump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int  ret;
 	bool oldval;
 
 	if (!config_prof) {
@@ -3596,9 +3482,9 @@ label_return:
 }
 
 static int
-prof_prefix_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+prof_prefix_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int         ret;
 	const char *prefix = NULL;
 
 	if (!config_prof || !opt_prof) {
@@ -3616,9 +3502,9 @@ label_return:
 }
 
 static int
-prof_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
-    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
+prof_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp,
+    size_t *oldlenp, void *newp, size_t newlen) {
+	int    ret;
 	size_t lg_sample = lg_prof_sample;
 
 	if (!config_prof || !opt_prof) {
@@ -3689,8 +3575,7 @@ experimental_hooks_prof_backtrace_ctl(tsd_t *tsd, const size_t *mib,
 		goto label_return;
 	}
 	if (oldp != NULL) {
-		prof_backtrace_hook_t old_hook =
-		    prof_backtrace_hook_get();
+		prof_backtrace_hook_t old_hook = prof_backtrace_hook_get();
 		READ(old_hook, prof_backtrace_hook_t);
 	}
 	if (newp != NULL) {
@@ -3712,8 +3597,8 @@ label_return:
 }
 
 static int
-experimental_hooks_prof_dump_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+experimental_hooks_prof_dump_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 
 	if (oldp == NULL && newp == NULL) {
@@ -3721,8 +3606,7 @@ experimental_hooks_prof_dump_ctl(tsd_t *tsd, const size_t *mib,
 		goto label_return;
 	}
 	if (oldp != NULL) {
-		prof_dump_hook_t old_hook =
-		    prof_dump_hook_get();
+		prof_dump_hook_t old_hook = prof_dump_hook_get();
 		READ(old_hook, prof_dump_hook_t);
 	}
 	if (newp != NULL) {
@@ -3740,8 +3624,8 @@ label_return:
 }
 
 static int
-experimental_hooks_prof_sample_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+experimental_hooks_prof_sample_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 
 	if (oldp == NULL && newp == NULL) {
@@ -3749,8 +3633,7 @@ experimental_hooks_prof_sample_ctl(tsd_t *tsd, const size_t *mib,
 		goto label_return;
 	}
 	if (oldp != NULL) {
-		prof_sample_hook_t old_hook =
-		    prof_sample_hook_get();
+		prof_sample_hook_t old_hook = prof_sample_hook_get();
 		READ(old_hook, prof_sample_hook_t);
 	}
 	if (newp != NULL) {
@@ -3777,8 +3660,7 @@ experimental_hooks_prof_sample_free_ctl(tsd_t *tsd, const size_t *mib,
 		goto label_return;
 	}
 	if (oldp != NULL) {
-		prof_sample_free_hook_t old_hook =
-		    prof_sample_free_hook_get();
+		prof_sample_free_hook_t old_hook = prof_sample_free_hook_get();
 		READ(old_hook, prof_sample_free_hook_t);
 	}
 	if (newp != NULL) {
@@ -3795,7 +3677,6 @@ label_return:
 	return ret;
 }
 
-
 static int
 experimental_hooks_prof_threshold_ctl(tsd_t *tsd, const size_t *mib,
     size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
@@ -3806,8 +3687,7 @@ experimental_hooks_prof_threshold_ctl(tsd_t *tsd, const size_t *mib,
 		goto label_return;
 	}
 	if (oldp != NULL) {
-		prof_threshold_hook_t old_hook =
-		    prof_threshold_hook_get();
+		prof_threshold_hook_t old_hook = prof_threshold_hook_get();
 		READ(old_hook, prof_threshold_hook_t);
 	}
 	if (newp != NULL) {
@@ -3822,7 +3702,7 @@ label_return:
 
 static int
 experimental_hooks_thread_event_ctl(tsd_t *tsd, const size_t *mib,
-	size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 
 	if (newp == NULL) {
@@ -3864,10 +3744,10 @@ label_return:
 CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats->allocated, size_t)
 CTL_RO_CGEN(config_stats, stats_active, ctl_stats->active, size_t)
 CTL_RO_CGEN(config_stats, stats_metadata, ctl_stats->metadata, size_t)
-CTL_RO_CGEN(config_stats, stats_metadata_edata, ctl_stats->metadata_edata,
-    size_t)
-CTL_RO_CGEN(config_stats, stats_metadata_rtree, ctl_stats->metadata_rtree,
-    size_t)
+CTL_RO_CGEN(
+    config_stats, stats_metadata_edata, ctl_stats->metadata_edata, size_t)
+CTL_RO_CGEN(
+    config_stats, stats_metadata_rtree, ctl_stats->metadata_rtree, size_t)
 CTL_RO_CGEN(config_stats, stats_metadata_thp, ctl_stats->metadata_thp, size_t)
 CTL_RO_CGEN(config_stats, stats_resident, ctl_stats->resident, size_t)
 CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats->mapped, size_t)
@@ -3884,10 +3764,10 @@ CTL_RO_CGEN(config_stats, stats_zero_reallocs,
     atomic_load_zu(&zero_realloc_count, ATOMIC_RELAXED), size_t)
 
 CTL_RO_GEN(stats_arenas_i_dss, arenas_i(mib[2])->dss, const char *)
-CTL_RO_GEN(stats_arenas_i_dirty_decay_ms, arenas_i(mib[2])->dirty_decay_ms,
-    ssize_t)
-CTL_RO_GEN(stats_arenas_i_muzzy_decay_ms, arenas_i(mib[2])->muzzy_decay_ms,
-    ssize_t)
+CTL_RO_GEN(
+    stats_arenas_i_dirty_decay_ms, arenas_i(mib[2])->dirty_decay_ms, ssize_t)
+CTL_RO_GEN(
+    stats_arenas_i_muzzy_decay_ms, arenas_i(mib[2])->muzzy_decay_ms, ssize_t)
 CTL_RO_GEN(stats_arenas_i_nthreads, arenas_i(mib[2])->nthreads, unsigned)
 CTL_RO_GEN(stats_arenas_i_uptime,
     nstime_ns(&arenas_i(mib[2])->astats->astats.uptime), uint64_t)
@@ -3903,33 +3783,38 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_extent_avail,
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_npurge,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.decay_dirty.npurge),
+        &arenas_i(mib[2])
+             ->astats->astats.pa_shard_stats.pac_stats.decay_dirty.npurge),
     uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_nmadvise,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.decay_dirty.nmadvise),
+        &arenas_i(mib[2])
+             ->astats->astats.pa_shard_stats.pac_stats.decay_dirty.nmadvise),
     uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_purged,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.decay_dirty.purged),
+        &arenas_i(mib[2])
+             ->astats->astats.pa_shard_stats.pac_stats.decay_dirty.purged),
     uint64_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_npurge,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.npurge),
+        &arenas_i(mib[2])
+             ->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.npurge),
     uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_nmadvise,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.nmadvise),
+        &arenas_i(mib[2])
+             ->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.nmadvise),
     uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_purged,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.purged),
+        &arenas_i(mib[2])
+             ->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.purged),
     uint64_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_base,
-    arenas_i(mib[2])->astats->astats.base,
-    size_t)
+    arenas_i(mib[2])->astats->astats.base, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_internal,
     atomic_load_zu(&arenas_i(mib[2])->astats->astats.internal, ATOMIC_RELAXED),
     size_t)
@@ -3944,12 +3829,12 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_tcache_bytes,
 CTL_RO_CGEN(config_stats, stats_arenas_i_tcache_stashed_bytes,
     arenas_i(mib[2])->astats->astats.tcache_stashed_bytes, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_resident,
-    arenas_i(mib[2])->astats->astats.resident,
-    size_t)
+    arenas_i(mib[2])->astats->astats.resident, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_abandoned_vm,
     atomic_load_zu(
-    &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.abandoned_vm,
-    ATOMIC_RELAXED), size_t)
+        &arenas_i(mib[2])->astats->astats.pa_shard_stats.pac_stats.abandoned_vm,
+        ATOMIC_RELAXED),
+    size_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_sec_bytes,
     arenas_i(mib[2])->astats->secstats.bytes, size_t)
@@ -3984,55 +3869,55 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_large_nflushes,
     arenas_i(mib[2])->astats->astats.nflushes_large, uint64_t)
 
 /* Lock profiling related APIs below. */
-#define RO_MUTEX_CTL_GEN(n, l)						\
-CTL_RO_CGEN(config_stats, stats_##n##_num_ops,				\
-    l.n_lock_ops, uint64_t)						\
-CTL_RO_CGEN(config_stats, stats_##n##_num_wait,				\
-    l.n_wait_times, uint64_t)						\
-CTL_RO_CGEN(config_stats, stats_##n##_num_spin_acq,			\
-    l.n_spin_acquired, uint64_t)					\
-CTL_RO_CGEN(config_stats, stats_##n##_num_owner_switch,			\
-    l.n_owner_switches, uint64_t) 					\
-CTL_RO_CGEN(config_stats, stats_##n##_total_wait_time,			\
-    nstime_ns(&l.tot_wait_time), uint64_t)				\
-CTL_RO_CGEN(config_stats, stats_##n##_max_wait_time,			\
-    nstime_ns(&l.max_wait_time), uint64_t)				\
-CTL_RO_CGEN(config_stats, stats_##n##_max_num_thds,			\
-    l.max_n_thds, uint32_t)
+#define RO_MUTEX_CTL_GEN(n, l)                                                 \
+	CTL_RO_CGEN(config_stats, stats_##n##_num_ops, l.n_lock_ops, uint64_t) \
+	CTL_RO_CGEN(                                                           \
+	    config_stats, stats_##n##_num_wait, l.n_wait_times, uint64_t)      \
+	CTL_RO_CGEN(config_stats, stats_##n##_num_spin_acq, l.n_spin_acquired, \
+	    uint64_t)                                                          \
+	CTL_RO_CGEN(config_stats, stats_##n##_num_owner_switch,                \
+	    l.n_owner_switches, uint64_t)                                      \
+	CTL_RO_CGEN(config_stats, stats_##n##_total_wait_time,                 \
+	    nstime_ns(&l.tot_wait_time), uint64_t)                             \
+	CTL_RO_CGEN(config_stats, stats_##n##_max_wait_time,                   \
+	    nstime_ns(&l.max_wait_time), uint64_t)                             \
+	CTL_RO_CGEN(                                                           \
+	    config_stats, stats_##n##_max_num_thds, l.max_n_thds, uint32_t)
 
 /* Global mutexes. */
-#define OP(mtx)								\
-    RO_MUTEX_CTL_GEN(mutexes_##mtx,					\
-        ctl_stats->mutex_prof_data[global_prof_mutex_##mtx])
+#define OP(mtx)                                                                \
+	RO_MUTEX_CTL_GEN(mutexes_##mtx,                                        \
+	    ctl_stats->mutex_prof_data[global_prof_mutex_##mtx])
 MUTEX_PROF_GLOBAL_MUTEXES
 #undef OP
 
 /* Per arena mutexes */
-#define OP(mtx) RO_MUTEX_CTL_GEN(arenas_i_mutexes_##mtx,		\
-    arenas_i(mib[2])->astats->astats.mutex_prof_data[arena_prof_mutex_##mtx])
+#define OP(mtx)                                                                \
+	RO_MUTEX_CTL_GEN(arenas_i_mutexes_##mtx,                               \
+	    arenas_i(mib[2])                                                   \
+	        ->astats->astats.mutex_prof_data[arena_prof_mutex_##mtx])
 MUTEX_PROF_ARENA_MUTEXES
 #undef OP
 
 /* tcache bin mutex */
-RO_MUTEX_CTL_GEN(arenas_i_bins_j_mutex,
-    arenas_i(mib[2])->astats->bstats[mib[4]].mutex_data)
+RO_MUTEX_CTL_GEN(
+    arenas_i_bins_j_mutex, arenas_i(mib[2])->astats->bstats[mib[4]].mutex_data)
 #undef RO_MUTEX_CTL_GEN
 
 /* Resets all mutex stats, including global, arena and bin mutexes. */
 static int
-stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp,
-    void *newp, size_t newlen) {
+stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	if (!config_stats) {
 		return ENOENT;
 	}
 
 	tsdn_t *tsdn = tsd_tsdn(tsd);
 
-#define MUTEX_PROF_RESET(mtx)						\
-    malloc_mutex_lock(tsdn, &mtx);					\
-    malloc_mutex_prof_data_reset(tsdn, &mtx);				\
-    malloc_mutex_unlock(tsdn, &mtx);
+#define MUTEX_PROF_RESET(mtx)                                                  \
+	malloc_mutex_lock(tsdn, &mtx);                                         \
+	malloc_mutex_prof_data_reset(tsdn, &mtx);                              \
+	malloc_mutex_unlock(tsdn, &mtx);
 
 	/* Global mutexes: ctl and prof. */
 	MUTEX_PROF_RESET(ctl_mtx);
@@ -4100,15 +3985,17 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nonfull_slabs,
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_batch_pops,
     arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_pops, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_batch_failed_pushes,
-    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_failed_pushes, uint64_t)
+    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_failed_pushes,
+    uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_batch_pushes,
     arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_pushes, uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_batch_pushed_elems,
-    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_pushed_elems, uint64_t)
+    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_pushed_elems,
+    uint64_t)
 
 static const ctl_named_node_t *
-stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib,
-    size_t miblen, size_t j) {
+stats_arenas_i_bins_j_index(
+    tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t j) {
 	if (j > SC_NBINS) {
 		return NULL;
 	}
@@ -4117,19 +4004,22 @@ stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib,
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_nmalloc,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->lstats[mib[4]].nmalloc), uint64_t)
+        &arenas_i(mib[2])->astats->lstats[mib[4]].nmalloc),
+    uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_ndalloc,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->lstats[mib[4]].ndalloc), uint64_t)
+        &arenas_i(mib[2])->astats->lstats[mib[4]].ndalloc),
+    uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_nrequests,
     locked_read_u64_unsynchronized(
-    &arenas_i(mib[2])->astats->lstats[mib[4]].nrequests), uint64_t)
+        &arenas_i(mib[2])->astats->lstats[mib[4]].nrequests),
+    uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_curlextents,
     arenas_i(mib[2])->astats->lstats[mib[4]].curlextents, size_t)
 
 static const ctl_named_node_t *
-stats_arenas_i_lextents_j_index(tsdn_t *tsdn, const size_t *mib,
-    size_t miblen, size_t j) {
+stats_arenas_i_lextents_j_index(
+    tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t j) {
 	if (j > SC_NSIZES - SC_NBINS) {
 		return NULL;
 	}
@@ -4137,21 +4027,21 @@ stats_arenas_i_lextents_j_index(tsdn_t *tsdn, const size_t *mib,
 }
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_ndirty,
-        arenas_i(mib[2])->astats->estats[mib[4]].ndirty, size_t);
+    arenas_i(mib[2])->astats->estats[mib[4]].ndirty, size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_nmuzzy,
-        arenas_i(mib[2])->astats->estats[mib[4]].nmuzzy, size_t);
+    arenas_i(mib[2])->astats->estats[mib[4]].nmuzzy, size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_nretained,
-        arenas_i(mib[2])->astats->estats[mib[4]].nretained, size_t);
+    arenas_i(mib[2])->astats->estats[mib[4]].nretained, size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_dirty_bytes,
-        arenas_i(mib[2])->astats->estats[mib[4]].dirty_bytes, size_t);
+    arenas_i(mib[2])->astats->estats[mib[4]].dirty_bytes, size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_muzzy_bytes,
-        arenas_i(mib[2])->astats->estats[mib[4]].muzzy_bytes, size_t);
+    arenas_i(mib[2])->astats->estats[mib[4]].muzzy_bytes, size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_extents_j_retained_bytes,
-        arenas_i(mib[2])->astats->estats[mib[4]].retained_bytes, size_t);
+    arenas_i(mib[2])->astats->estats[mib[4]].retained_bytes, size_t);
 
 static const ctl_named_node_t *
-stats_arenas_i_extents_j_index(tsdn_t *tsdn, const size_t *mib,
-    size_t miblen, size_t j) {
+stats_arenas_i_extents_j_index(
+    tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t j) {
 	if (j >= SC_NPSIZES) {
 		return NULL;
 	}
@@ -4182,7 +4072,8 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_slabs_ndirty_huge,
     arenas_i(mib[2])->astats->hpastats.psset_stats.slabs[1].ndirty, size_t);
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_npurge_passes,
-    arenas_i(mib[2])->astats->hpastats.nonderived_stats.npurge_passes, uint64_t);
+    arenas_i(mib[2])->astats->hpastats.nonderived_stats.npurge_passes,
+    uint64_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_npurges,
     arenas_i(mib[2])->astats->hpastats.nonderived_stats.npurges, uint64_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nhugifies,
@@ -4194,66 +4085,92 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_ndehugifies,
     arenas_i(mib[2])->astats->hpastats.nonderived_stats.ndehugifies, uint64_t);
 
 /* Full, nonhuge */
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge,
+CTL_RO_CGEN(config_stats,
+    stats_arenas_i_hpa_shard_full_slabs_npageslabs_nonhuge,
     arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[0].npageslabs,
     size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_nactive_nonhuge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[0].nactive, size_t);
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[0].nactive,
+    size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_ndirty_nonhuge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[0].ndirty, size_t);
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[0].ndirty,
+    size_t);
 
 /* Full, huge */
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_npageslabs_huge,
     arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[1].npageslabs,
     size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_nactive_huge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[1].nactive, size_t);
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[1].nactive,
+    size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_full_slabs_ndirty_huge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[1].ndirty, size_t);
+    arenas_i(mib[2])->astats->hpastats.psset_stats.full_slabs[1].ndirty,
+    size_t);
 
 /* Empty, nonhuge */
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_empty_slabs_npageslabs_nonhuge,
+CTL_RO_CGEN(config_stats,
+    stats_arenas_i_hpa_shard_empty_slabs_npageslabs_nonhuge,
     arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[0].npageslabs,
     size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_empty_slabs_nactive_nonhuge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[0].nactive, size_t);
+    arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[0].nactive,
+    size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_empty_slabs_ndirty_nonhuge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[0].ndirty, size_t);
+    arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[0].ndirty,
+    size_t);
 
 /* Empty, huge */
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_empty_slabs_npageslabs_huge,
     arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[1].npageslabs,
     size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_empty_slabs_nactive_huge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[1].nactive, size_t);
+    arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[1].nactive,
+    size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_empty_slabs_ndirty_huge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[1].ndirty, size_t);
+    arenas_i(mib[2])->astats->hpastats.psset_stats.empty_slabs[1].ndirty,
+    size_t);
 
 /* Nonfull, nonhuge */
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][0].npageslabs,
+CTL_RO_CGEN(config_stats,
+    stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_nonhuge,
+    arenas_i(mib[2])
+        ->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][0]
+        .npageslabs,
     size_t);
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][0].nactive,
+CTL_RO_CGEN(config_stats,
+    stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_nonhuge,
+    arenas_i(mib[2])
+        ->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][0]
+        .nactive,
     size_t);
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_nonhuge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][0].ndirty,
+CTL_RO_CGEN(config_stats,
+    stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_nonhuge,
+    arenas_i(mib[2])
+        ->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][0]
+        .ndirty,
     size_t);
 
 /* Nonfull, huge */
-CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][1].npageslabs,
+CTL_RO_CGEN(config_stats,
+    stats_arenas_i_hpa_shard_nonfull_slabs_j_npageslabs_huge,
+    arenas_i(mib[2])
+        ->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][1]
+        .npageslabs,
     size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_nactive_huge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][1].nactive,
+    arenas_i(mib[2])
+        ->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][1]
+        .nactive,
     size_t);
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_shard_nonfull_slabs_j_ndirty_huge,
-    arenas_i(mib[2])->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][1].ndirty,
+    arenas_i(mib[2])
+        ->astats->hpastats.psset_stats.nonfull_slabs[mib[5]][1]
+        .ndirty,
     size_t);
 
 static const ctl_named_node_t *
-stats_arenas_i_hpa_shard_nonfull_slabs_j_index(tsdn_t *tsdn, const size_t *mib,
-    size_t miblen, size_t j) {
+stats_arenas_i_hpa_shard_nonfull_slabs_j_index(
+    tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t j) {
 	if (j >= PSSET_NPSIZES) {
 		return NULL;
 	}
@@ -4271,8 +4188,7 @@ ctl_arenas_i_verify(size_t i) {
 }
 
 static const ctl_named_node_t *
-stats_arenas_i_index(tsdn_t *tsdn, const size_t *mib,
-    size_t miblen, size_t i) {
+stats_arenas_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
 	const ctl_named_node_t *ret;
 
 	malloc_mutex_lock(tsdn, &ctl_mtx);
@@ -4291,7 +4207,7 @@ static int
 experimental_hooks_install_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
-	if (oldp == NULL || oldlenp == NULL|| newp == NULL) {
+	if (oldp == NULL || oldlenp == NULL || newp == NULL) {
 		ret = EINVAL;
 		goto label_return;
 	}
@@ -4426,8 +4342,8 @@ label_return:
  * motivation from C++.
  */
 static int
-experimental_utilization_query_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+experimental_utilization_query_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 
 	assert(sizeof(inspect_extent_util_stats_verbose_t)
@@ -4442,8 +4358,8 @@ experimental_utilization_query_ctl(tsd_t *tsd, const size_t *mib,
 
 	void *ptr = NULL;
 	WRITE(ptr, void *);
-	inspect_extent_util_stats_verbose_t *util_stats
-	    = (inspect_extent_util_stats_verbose_t *)oldp;
+	inspect_extent_util_stats_verbose_t *util_stats =
+	    (inspect_extent_util_stats_verbose_t *)oldp;
 	inspect_extent_util_stats_verbose_get(tsd_tsdn(tsd), ptr,
 	    &util_stats->nfree, &util_stats->nregs, &util_stats->size,
 	    &util_stats->bin_nfree, &util_stats->bin_nregs,
@@ -4565,7 +4481,7 @@ experimental_utilization_batch_query_ctl(tsd_t *tsd, const size_t *mib,
 		goto label_return;
 	}
 
-	void **ptrs = (void **)newp;
+	void                       **ptrs = (void **)newp;
 	inspect_extent_util_stats_t *util_stats =
 	    (inspect_extent_util_stats_t *)oldp;
 	size_t i;
@@ -4581,8 +4497,8 @@ label_return:
 }
 
 static const ctl_named_node_t *
-experimental_arenas_i_index(tsdn_t *tsdn, const size_t *mib,
-    size_t miblen, size_t i) {
+experimental_arenas_i_index(
+    tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
 	const ctl_named_node_t *ret;
 
 	malloc_mutex_lock(tsdn, &ctl_mtx);
@@ -4597,8 +4513,8 @@ label_return:
 }
 
 static int
-experimental_arenas_i_pactivep_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+experimental_arenas_i_pactivep_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	if (!config_stats) {
 		return ENOENT;
 	}
@@ -4608,16 +4524,16 @@ experimental_arenas_i_pactivep_ctl(tsd_t *tsd, const size_t *mib,
 
 	unsigned arena_ind;
 	arena_t *arena;
-	int ret;
-	size_t *pactivep;
+	int      ret;
+	size_t  *pactivep;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx);
 	READONLY();
 	MIB_UNSIGNED(arena_ind, 2);
-	if (arena_ind < narenas_total_get() && (arena =
-	    arena_get(tsd_tsdn(tsd), arena_ind, false)) != NULL) {
-#if defined(JEMALLOC_GCC_ATOMIC_ATOMICS) ||				\
-    defined(JEMALLOC_GCC_SYNC_ATOMICS) || defined(_MSC_VER)
+	if (arena_ind < narenas_total_get()
+	    && (arena = arena_get(tsd_tsdn(tsd), arena_ind, false)) != NULL) {
+#if defined(JEMALLOC_GCC_ATOMIC_ATOMICS) || defined(JEMALLOC_GCC_SYNC_ATOMICS) \
+    || defined(_MSC_VER)
 		/* Expose the underlying counter for fast read. */
 		pactivep = (size_t *)&(arena->pa_shard.nactive.repr);
 		READ(pactivep, size_t *);
@@ -4669,7 +4585,7 @@ label_return:
 typedef struct write_cb_packet_s write_cb_packet_t;
 struct write_cb_packet_s {
 	write_cb_t *write_cb;
-	void *cbopaque;
+	void       *cbopaque;
 };
 
 static int
@@ -4688,8 +4604,8 @@ experimental_prof_recent_alloc_dump_ctl(tsd_t *tsd, const size_t *mib,
 	write_cb_packet_t write_cb_packet;
 	ASSURED_WRITE(write_cb_packet, write_cb_packet_t);
 
-	prof_recent_alloc_dump(tsd, write_cb_packet.write_cb,
-	    write_cb_packet.cbopaque);
+	prof_recent_alloc_dump(
+	    tsd, write_cb_packet.write_cb, write_cb_packet.cbopaque);
 
 	ret = 0;
 
@@ -4702,12 +4618,12 @@ struct batch_alloc_packet_s {
 	void **ptrs;
 	size_t num;
 	size_t size;
-	int flags;
+	int    flags;
 };
 
 static int
-experimental_batch_alloc_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+experimental_batch_alloc_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
 	int ret;
 
 	VERIFY_READ(size_t);
@@ -4728,8 +4644,8 @@ label_return:
 static int
 prof_stats_bins_i_live_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
-	unsigned binind;
+	int          ret;
+	unsigned     binind;
 	prof_stats_t stats;
 
 	if (!(config_prof && opt_prof && opt_prof_stats)) {
@@ -4754,8 +4670,8 @@ label_return:
 static int
 prof_stats_bins_i_accum_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
-	unsigned binind;
+	int          ret;
+	unsigned     binind;
 	prof_stats_t stats;
 
 	if (!(config_prof && opt_prof && opt_prof_stats)) {
@@ -4778,8 +4694,8 @@ label_return:
 }
 
 static const ctl_named_node_t *
-prof_stats_bins_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
-    size_t i) {
+prof_stats_bins_i_index(
+    tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
 	if (!(config_prof && opt_prof && opt_prof_stats)) {
 		return NULL;
 	}
@@ -4792,8 +4708,8 @@ prof_stats_bins_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
 static int
 prof_stats_lextents_i_live_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
-	unsigned lextent_ind;
+	int          ret;
+	unsigned     lextent_ind;
 	prof_stats_t stats;
 
 	if (!(config_prof && opt_prof && opt_prof_stats)) {
@@ -4818,8 +4734,8 @@ label_return:
 static int
 prof_stats_lextents_i_accum_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
     void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
-	unsigned lextent_ind;
+	int          ret;
+	unsigned     lextent_ind;
 	prof_stats_t stats;
 
 	if (!(config_prof && opt_prof && opt_prof_stats)) {
@@ -4842,8 +4758,8 @@ label_return:
 }
 
 static const ctl_named_node_t *
-prof_stats_lextents_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen,
-    size_t i) {
+prof_stats_lextents_i_index(
+    tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
 	if (!(config_prof && opt_prof && opt_prof_stats)) {
 		return NULL;
 	}
diff --git a/src/decay.c b/src/decay.c
index f75696dd..7bbce2a6 100644
--- a/src/decay.c
+++ b/src/decay.c
@@ -4,9 +4,8 @@
 #include "jemalloc/internal/decay.h"
 
 static const uint64_t h_steps[SMOOTHSTEP_NSTEPS] = {
-#define STEP(step, h, x, y)			\
-		h,
-		SMOOTHSTEP
+#define STEP(step, h, x, y) h,
+    SMOOTHSTEP
 #undef STEP
 };
 
@@ -21,8 +20,9 @@ decay_deadline_init(decay_t *decay) {
 	if (decay_ms_read(decay) > 0) {
 		nstime_t jitter;
 
-		nstime_init(&jitter, prng_range_u64(&decay->jitter_state,
-		    nstime_ns(&decay->interval)));
+		nstime_init(&jitter,
+		    prng_range_u64(
+		        &decay->jitter_state, nstime_ns(&decay->interval)));
 		nstime_add(&decay->deadline, &jitter);
 	}
 }
@@ -31,8 +31,8 @@ void
 decay_reinit(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms) {
 	atomic_store_zd(&decay->time_ms, decay_ms, ATOMIC_RELAXED);
 	if (decay_ms > 0) {
-		nstime_init(&decay->interval, (uint64_t)decay_ms *
-		    KQU(1000000));
+		nstime_init(
+		    &decay->interval, (uint64_t)decay_ms * KQU(1000000));
 		nstime_idivide(&decay->interval, SMOOTHSTEP_NSTEPS);
 	}
 
@@ -52,7 +52,7 @@ decay_init(decay_t *decay, nstime_t *cur_time, ssize_t decay_ms) {
 		decay->ceil_npages = 0;
 	}
 	if (malloc_mutex_init(&decay->mtx, "decay", WITNESS_RANK_DECAY,
-	    malloc_mutex_rank_exclusive)) {
+	        malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	decay->purging = false;
@@ -65,8 +65,8 @@ decay_ms_valid(ssize_t decay_ms) {
 	if (decay_ms < -1) {
 		return false;
 	}
-	if (decay_ms == -1 || (uint64_t)decay_ms <= NSTIME_SEC_MAX *
-	    KQU(1000)) {
+	if (decay_ms == -1
+	    || (uint64_t)decay_ms <= NSTIME_SEC_MAX * KQU(1000)) {
 		return true;
 	}
 	return false;
@@ -74,8 +74,8 @@ decay_ms_valid(ssize_t decay_ms) {
 
 static void
 decay_maybe_update_time(decay_t *decay, nstime_t *new_time) {
-	if (unlikely(!nstime_monotonic() && nstime_compare(&decay->epoch,
-	    new_time) > 0)) {
+	if (unlikely(!nstime_monotonic()
+	        && nstime_compare(&decay->epoch, new_time) > 0)) {
 		/*
 		 * Time went backwards.  Move the epoch back in time and
 		 * generate a new deadline, with the expectation that time
@@ -115,11 +115,11 @@ decay_backlog_npages_limit(const decay_t *decay) {
  * placed as the newest record.
  */
 static void
-decay_backlog_update(decay_t *decay, uint64_t nadvance_u64,
-    size_t current_npages) {
+decay_backlog_update(
+    decay_t *decay, uint64_t nadvance_u64, size_t current_npages) {
 	if (nadvance_u64 >= SMOOTHSTEP_NSTEPS) {
-		memset(decay->backlog, 0, (SMOOTHSTEP_NSTEPS-1) *
-		    sizeof(size_t));
+		memset(decay->backlog, 0,
+		    (SMOOTHSTEP_NSTEPS - 1) * sizeof(size_t));
 	} else {
 		size_t nadvance_z = (size_t)nadvance_u64;
 
@@ -128,14 +128,15 @@ decay_backlog_update(decay_t *decay, uint64_t nadvance_u64,
 		memmove(decay->backlog, &decay->backlog[nadvance_z],
 		    (SMOOTHSTEP_NSTEPS - nadvance_z) * sizeof(size_t));
 		if (nadvance_z > 1) {
-			memset(&decay->backlog[SMOOTHSTEP_NSTEPS -
-			    nadvance_z], 0, (nadvance_z-1) * sizeof(size_t));
+			memset(&decay->backlog[SMOOTHSTEP_NSTEPS - nadvance_z],
+			    0, (nadvance_z - 1) * sizeof(size_t));
 		}
 	}
 
-	size_t npages_delta = (current_npages > decay->nunpurged) ?
-	    current_npages - decay->nunpurged : 0;
-	decay->backlog[SMOOTHSTEP_NSTEPS-1] = npages_delta;
+	size_t npages_delta = (current_npages > decay->nunpurged)
+	    ? current_npages - decay->nunpurged
+	    : 0;
+	decay->backlog[SMOOTHSTEP_NSTEPS - 1] = npages_delta;
 
 	if (config_debug) {
 		if (current_npages > decay->ceil_npages) {
@@ -165,18 +166,17 @@ decay_npages_purge_in(decay_t *decay, nstime_t *time, size_t npages_new) {
 		npages_purge = npages_new;
 	} else {
 		uint64_t h_steps_max = h_steps[SMOOTHSTEP_NSTEPS - 1];
-		assert(h_steps_max >=
-		    h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
-		npages_purge = npages_new * (h_steps_max -
-		    h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
+		assert(h_steps_max >= h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
+		npages_purge = npages_new
+		    * (h_steps_max - h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
 		npages_purge >>= SMOOTHSTEP_BFP;
 	}
 	return npages_purge;
 }
 
 bool
-decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
-    size_t npages_current) {
+decay_maybe_advance_epoch(
+    decay_t *decay, nstime_t *new_time, size_t npages_current) {
 	/* Handle possible non-monotonicity of time. */
 	decay_maybe_update_time(decay, new_time);
 
@@ -202,8 +202,9 @@ decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
 	decay_backlog_update(decay, nadvance_u64, npages_current);
 
 	decay->npages_limit = decay_backlog_npages_limit(decay);
-	decay->nunpurged = (decay->npages_limit > npages_current) ?
-	    decay->npages_limit : npages_current;
+	decay->nunpurged = (decay->npages_limit > npages_current)
+	    ? decay->npages_limit
+	    : npages_current;
 
 	return true;
 }
@@ -226,21 +227,21 @@ decay_maybe_advance_epoch(decay_t *decay, nstime_t *new_time,
  */
 static inline size_t
 decay_npurge_after_interval(decay_t *decay, size_t interval) {
-	size_t i;
+	size_t   i;
 	uint64_t sum = 0;
 	for (i = 0; i < interval; i++) {
 		sum += decay->backlog[i] * h_steps[i];
 	}
 	for (; i < SMOOTHSTEP_NSTEPS; i++) {
-		sum += decay->backlog[i] *
-		    (h_steps[i] - h_steps[i - interval]);
+		sum += decay->backlog[i] * (h_steps[i] - h_steps[i - interval]);
 	}
 
 	return (size_t)(sum >> SMOOTHSTEP_BFP);
 }
 
-uint64_t decay_ns_until_purge(decay_t *decay, size_t npages_current,
-    uint64_t npages_threshold) {
+uint64_t
+decay_ns_until_purge(
+    decay_t *decay, size_t npages_current, uint64_t npages_threshold) {
 	if (!decay_gradually(decay)) {
 		return DECAY_UNBOUNDED_TIME_TO_PURGE;
 	}
@@ -278,7 +279,7 @@ uint64_t decay_ns_until_purge(decay_t *decay, size_t npages_current,
 	}
 
 	unsigned n_search = 0;
-	size_t target, npurge;
+	size_t   target, npurge;
 	while ((npurge_lb + npages_threshold < npurge_ub) && (lb + 2 < ub)) {
 		target = (lb + ub) / 2;
 		npurge = decay_npurge_after_interval(decay, target);
diff --git a/src/ecache.c b/src/ecache.c
index a242227d..20fcee9e 100644
--- a/src/ecache.c
+++ b/src/ecache.c
@@ -7,7 +7,7 @@ bool
 ecache_init(tsdn_t *tsdn, ecache_t *ecache, extent_state_t state, unsigned ind,
     bool delay_coalesce) {
 	if (malloc_mutex_init(&ecache->mtx, "extents", WITNESS_RANK_EXTENTS,
-	    malloc_mutex_rank_exclusive)) {
+	        malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	ecache->state = state;
diff --git a/src/edata.c b/src/edata.c
index 82b6f565..d71d1679 100644
--- a/src/edata.c
+++ b/src/edata.c
@@ -1,6 +1,5 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
-ph_gen(, edata_avail, edata_t, avail_link,
-    edata_esnead_comp)
-ph_gen(, edata_heap, edata_t, heap_link, edata_snad_comp)
+ph_gen(, edata_avail, edata_t, avail_link, edata_esnead_comp)
+    ph_gen(, edata_heap, edata_t, heap_link, edata_snad_comp)
diff --git a/src/edata_cache.c b/src/edata_cache.c
index 6bc1848c..3ac8273a 100644
--- a/src/edata_cache.c
+++ b/src/edata_cache.c
@@ -11,7 +11,7 @@ edata_cache_init(edata_cache_t *edata_cache, base_t *base) {
 	 */
 	atomic_store_zu(&edata_cache->count, 0, ATOMIC_RELAXED);
 	if (malloc_mutex_init(&edata_cache->mtx, "edata_cache",
-	    WITNESS_RANK_EDATA_CACHE, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_EDATA_CACHE, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	edata_cache->base = base;
@@ -63,8 +63,7 @@ edata_cache_fast_init(edata_cache_fast_t *ecs, edata_cache_t *fallback) {
 }
 
 static void
-edata_cache_fast_try_fill_from_fallback(tsdn_t *tsdn,
-    edata_cache_fast_t *ecs) {
+edata_cache_fast_try_fill_from_fallback(tsdn_t *tsdn, edata_cache_fast_t *ecs) {
 	edata_t *edata;
 	malloc_mutex_lock(tsdn, &ecs->fallback->mtx);
 	for (int i = 0; i < EDATA_CACHE_FAST_FILL; i++) {
@@ -80,8 +79,8 @@ edata_cache_fast_try_fill_from_fallback(tsdn_t *tsdn,
 
 edata_t *
 edata_cache_fast_get(tsdn_t *tsdn, edata_cache_fast_t *ecs) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_EDATA_CACHE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_EDATA_CACHE, 0);
 
 	if (ecs->disabled) {
 		assert(edata_list_inactive_first(&ecs->list) == NULL);
@@ -118,7 +117,7 @@ edata_cache_fast_flush_all(tsdn_t *tsdn, edata_cache_fast_t *ecs) {
 	 * flush and disable pathways.
 	 */
 	edata_t *edata;
-	size_t nflushed = 0;
+	size_t   nflushed = 0;
 	malloc_mutex_lock(tsdn, &ecs->fallback->mtx);
 	while ((edata = edata_list_inactive_first(&ecs->list)) != NULL) {
 		edata_list_inactive_remove(&ecs->list, edata);
@@ -131,8 +130,8 @@ edata_cache_fast_flush_all(tsdn_t *tsdn, edata_cache_fast_t *ecs) {
 
 void
 edata_cache_fast_put(tsdn_t *tsdn, edata_cache_fast_t *ecs, edata_t *edata) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_EDATA_CACHE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_EDATA_CACHE, 0);
 
 	if (ecs->disabled) {
 		assert(edata_list_inactive_first(&ecs->list) == NULL);
diff --git a/src/ehooks.c b/src/ehooks.c
index 89e30409..d7abb960 100644
--- a/src/ehooks.c
+++ b/src/ehooks.c
@@ -27,9 +27,10 @@ extent_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 	assert(alignment != 0);
 
 	/* "primary" dss. */
-	if (have_dss && dss_prec == dss_prec_primary && (ret =
-	    extent_alloc_dss(tsdn, arena, new_addr, size, alignment, zero,
-	    commit)) != NULL) {
+	if (have_dss && dss_prec == dss_prec_primary
+	    && (ret = extent_alloc_dss(
+	            tsdn, arena, new_addr, size, alignment, zero, commit))
+	        != NULL) {
 		return ret;
 	}
 	/* mmap. */
@@ -38,9 +39,10 @@ extent_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 		return ret;
 	}
 	/* "secondary" dss. */
-	if (have_dss && dss_prec == dss_prec_secondary && (ret =
-	    extent_alloc_dss(tsdn, arena, new_addr, size, alignment, zero,
-	    commit)) != NULL) {
+	if (have_dss && dss_prec == dss_prec_secondary
+	    && (ret = extent_alloc_dss(
+	            tsdn, arena, new_addr, size, alignment, zero, commit))
+	        != NULL) {
 		return ret;
 	}
 
@@ -54,10 +56,11 @@ ehooks_default_alloc_impl(tsdn_t *tsdn, void *new_addr, size_t size,
 	arena_t *arena = arena_get(tsdn, arena_ind, false);
 	/* NULL arena indicates arena_create. */
 	assert(arena != NULL || alignment == BASE_BLOCK_MIN_ALIGN);
-	dss_prec_t dss = (arena == NULL) ? dss_prec_disabled :
-	    (dss_prec_t)atomic_load_u(&arena->dss_prec, ATOMIC_RELAXED);
-	void *ret = extent_alloc_core(tsdn, arena, new_addr, size, alignment,
-	    zero, commit, dss);
+	dss_prec_t dss = (arena == NULL)
+	    ? dss_prec_disabled
+	    : (dss_prec_t)atomic_load_u(&arena->dss_prec, ATOMIC_RELAXED);
+	void      *ret = extent_alloc_core(
+            tsdn, arena, new_addr, size, alignment, zero, commit, dss);
 	if (have_madvise_huge && ret) {
 		pages_set_thp_state(ret, size);
 	}
@@ -100,8 +103,8 @@ ehooks_default_destroy(extent_hooks_t *extent_hooks, void *addr, size_t size,
 
 bool
 ehooks_default_commit_impl(void *addr, size_t offset, size_t length) {
-	return pages_commit((void *)((byte_t *)addr + (uintptr_t)offset),
-	    length);
+	return pages_commit(
+	    (void *)((byte_t *)addr + (uintptr_t)offset), length);
 }
 
 static bool
@@ -112,8 +115,8 @@ ehooks_default_commit(extent_hooks_t *extent_hooks, void *addr, size_t size,
 
 bool
 ehooks_default_decommit_impl(void *addr, size_t offset, size_t length) {
-	return pages_decommit((void *)((byte_t *)addr + (uintptr_t)offset),
-	    length);
+	return pages_decommit(
+	    (void *)((byte_t *)addr + (uintptr_t)offset), length);
 }
 
 static bool
@@ -125,8 +128,8 @@ ehooks_default_decommit(extent_hooks_t *extent_hooks, void *addr, size_t size,
 #ifdef PAGES_CAN_PURGE_LAZY
 bool
 ehooks_default_purge_lazy_impl(void *addr, size_t offset, size_t length) {
-	return pages_purge_lazy((void *)((byte_t *)addr + (uintptr_t)offset),
-	    length);
+	return pages_purge_lazy(
+	    (void *)((byte_t *)addr + (uintptr_t)offset), length);
 }
 
 static bool
@@ -143,8 +146,8 @@ ehooks_default_purge_lazy(extent_hooks_t *extent_hooks, void *addr, size_t size,
 #ifdef PAGES_CAN_PURGE_FORCED
 bool
 ehooks_default_purge_forced_impl(void *addr, size_t offset, size_t length) {
-	return pages_purge_forced((void *)((byte_t *)addr +
-	    (uintptr_t)offset), length);
+	return pages_purge_forced(
+	    (void *)((byte_t *)addr + (uintptr_t)offset), length);
 }
 
 static bool
@@ -201,11 +204,11 @@ ehooks_default_merge_impl(tsdn_t *tsdn, void *addr_a, void *addr_b) {
 		return true;
 	}
 	if (config_debug) {
-		edata_t *a = emap_edata_lookup(tsdn, &arena_emap_global,
-		    addr_a);
-		bool head_a = edata_is_head_get(a);
-		edata_t *b = emap_edata_lookup(tsdn, &arena_emap_global,
-		    addr_b);
+		edata_t *a = emap_edata_lookup(
+		    tsdn, &arena_emap_global, addr_a);
+		bool     head_a = edata_is_head_get(a);
+		edata_t *b = emap_edata_lookup(
+		    tsdn, &arena_emap_global, addr_b);
 		bool head_b = edata_is_head_get(b);
 		emap_assert_mapped(tsdn, &arena_emap_global, a);
 		emap_assert_mapped(tsdn, &arena_emap_global, b);
@@ -254,22 +257,17 @@ ehooks_default_unguard_impl(void *guard1, void *guard2) {
 	pages_unmark_guards(guard1, guard2);
 }
 
-const extent_hooks_t ehooks_default_extent_hooks = {
-	ehooks_default_alloc,
-	ehooks_default_dalloc,
-	ehooks_default_destroy,
-	ehooks_default_commit,
-	ehooks_default_decommit,
+const extent_hooks_t ehooks_default_extent_hooks = {ehooks_default_alloc,
+    ehooks_default_dalloc, ehooks_default_destroy, ehooks_default_commit,
+    ehooks_default_decommit,
 #ifdef PAGES_CAN_PURGE_LAZY
-	ehooks_default_purge_lazy,
+    ehooks_default_purge_lazy,
 #else
-	NULL,
+    NULL,
 #endif
 #ifdef PAGES_CAN_PURGE_FORCED
-	ehooks_default_purge_forced,
+    ehooks_default_purge_forced,
 #else
-	NULL,
+    NULL,
 #endif
-	ehooks_default_split,
-	ehooks_default_merge
-};
+    ehooks_default_split, ehooks_default_merge};
diff --git a/src/emap.c b/src/emap.c
index f7d5c25a..54bfabab 100644
--- a/src/emap.c
+++ b/src/emap.c
@@ -16,10 +16,10 @@ emap_init(emap_t *emap, base_t *base, bool zeroed) {
 }
 
 void
-emap_update_edata_state(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
-    extent_state_t state) {
-	witness_assert_positive_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE);
+emap_update_edata_state(
+    tsdn_t *tsdn, emap_t *emap, edata_t *edata, extent_state_t state) {
+	witness_assert_positive_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE);
 
 	edata_state_set(edata, state);
 
@@ -28,10 +28,11 @@ emap_update_edata_state(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
 	    rtree_ctx, (uintptr_t)edata_base_get(edata), /* dependent */ true,
 	    /* init_missing */ false);
 	assert(elm1 != NULL);
-	rtree_leaf_elm_t *elm2 = edata_size_get(edata) == PAGE ? NULL :
-	    rtree_leaf_elm_lookup(tsdn, &emap->rtree, rtree_ctx,
-	    (uintptr_t)edata_last_get(edata), /* dependent */ true,
-	    /* init_missing */ false);
+	rtree_leaf_elm_t *elm2 = edata_size_get(edata) == PAGE
+	    ? NULL
+	    : rtree_leaf_elm_lookup(tsdn, &emap->rtree, rtree_ctx,
+	          (uintptr_t)edata_last_get(edata), /* dependent */ true,
+	          /* init_missing */ false);
 
 	rtree_leaf_elm_state_update(tsdn, &emap->rtree, elm1, elm2, state);
 
@@ -42,17 +43,17 @@ static inline edata_t *
 emap_try_acquire_edata_neighbor_impl(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
     extent_pai_t pai, extent_state_t expected_state, bool forward,
     bool expanding) {
-	witness_assert_positive_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE);
+	witness_assert_positive_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE);
 	assert(!edata_guarded_get(edata));
 	assert(!expanding || forward);
 	assert(!edata_state_in_transition(expected_state));
-	assert(expected_state == extent_state_dirty ||
-	       expected_state == extent_state_muzzy ||
-	       expected_state == extent_state_retained);
+	assert(expected_state == extent_state_dirty
+	    || expected_state == extent_state_muzzy
+	    || expected_state == extent_state_retained);
 
-	void *neighbor_addr = forward ? edata_past_get(edata) :
-	    edata_before_get(edata);
+	void *neighbor_addr = forward ? edata_past_get(edata)
+	                              : edata_before_get(edata);
 	/*
 	 * This is subtle; the rtree code asserts that its input pointer is
 	 * non-NULL, and this is a useful thing to check.  But it's possible
@@ -73,10 +74,10 @@ emap_try_acquire_edata_neighbor_impl(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
 		return NULL;
 	}
 
-	rtree_contents_t neighbor_contents = rtree_leaf_elm_read(tsdn,
-	    &emap->rtree, elm, /* dependent */ false);
+	rtree_contents_t neighbor_contents = rtree_leaf_elm_read(
+	    tsdn, &emap->rtree, elm, /* dependent */ false);
 	if (!extent_can_acquire_neighbor(edata, neighbor_contents, pai,
-	    expected_state, forward, expanding)) {
+	        expected_state, forward, expanding)) {
 		return NULL;
 	}
 
@@ -109,8 +110,8 @@ emap_try_acquire_edata_neighbor_expand(tsdn_t *tsdn, emap_t *emap,
 }
 
 void
-emap_release_edata(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
-    extent_state_t new_state) {
+emap_release_edata(
+    tsdn_t *tsdn, emap_t *emap, edata_t *edata, extent_state_t new_state) {
 	assert(emap_edata_in_transition(tsdn, emap, edata));
 	assert(emap_edata_is_acquired(tsdn, emap, edata));
 
@@ -145,8 +146,8 @@ emap_rtree_write_acquired(tsdn_t *tsdn, emap_t *emap, rtree_leaf_elm_t *elm_a,
 	contents.edata = edata;
 	contents.metadata.szind = szind;
 	contents.metadata.slab = slab;
-	contents.metadata.is_head = (edata == NULL) ? false :
-	    edata_is_head_get(edata);
+	contents.metadata.is_head = (edata == NULL) ? false
+	                                            : edata_is_head_get(edata);
 	contents.metadata.state = (edata == NULL) ? 0 : edata_state_get(edata);
 	rtree_leaf_elm_write(tsdn, &emap->rtree, elm_a, contents);
 	if (elm_b != NULL) {
@@ -155,29 +156,33 @@ emap_rtree_write_acquired(tsdn_t *tsdn, emap_t *emap, rtree_leaf_elm_t *elm_a,
 }
 
 bool
-emap_register_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
-    szind_t szind, bool slab) {
+emap_register_boundary(
+    tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind, bool slab) {
 	assert(edata_state_get(edata) == extent_state_active);
 	EMAP_DECLARE_RTREE_CTX;
 
 	rtree_leaf_elm_t *elm_a, *elm_b;
-	bool err = emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, edata,
-	    false, true, &elm_a, &elm_b);
+	bool              err = emap_rtree_leaf_elms_lookup(
+            tsdn, emap, rtree_ctx, edata, false, true, &elm_a, &elm_b);
 	if (err) {
 		return true;
 	}
 	assert(rtree_leaf_elm_read(tsdn, &emap->rtree, elm_a,
-	    /* dependent */ false).edata == NULL);
+	           /* dependent */ false)
+	           .edata
+	    == NULL);
 	assert(rtree_leaf_elm_read(tsdn, &emap->rtree, elm_b,
-	    /* dependent */ false).edata == NULL);
+	           /* dependent */ false)
+	           .edata
+	    == NULL);
 	emap_rtree_write_acquired(tsdn, emap, elm_a, elm_b, edata, szind, slab);
 	return false;
 }
 
 /* Invoked *after* emap_register_boundary. */
 void
-emap_register_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata,
-    szind_t szind) {
+emap_register_interior(
+    tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind) {
 	EMAP_DECLARE_RTREE_CTX;
 
 	assert(edata_slab_get(edata));
@@ -226,10 +231,10 @@ emap_deregister_boundary(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	EMAP_DECLARE_RTREE_CTX;
 	rtree_leaf_elm_t *elm_a, *elm_b;
 
-	emap_rtree_leaf_elms_lookup(tsdn, emap, rtree_ctx, edata,
-	    true, false, &elm_a, &elm_b);
-	emap_rtree_write_acquired(tsdn, emap, elm_a, elm_b, NULL, SC_NSIZES,
-	    false);
+	emap_rtree_leaf_elms_lookup(
+	    tsdn, emap, rtree_ctx, edata, true, false, &elm_a, &elm_b);
+	emap_rtree_write_acquired(
+	    tsdn, emap, elm_a, elm_b, NULL, SC_NSIZES, false);
 }
 
 void
@@ -245,8 +250,8 @@ emap_deregister_interior(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 }
 
 void
-emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
-    bool slab) {
+emap_remap(
+    tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind, bool slab) {
 	EMAP_DECLARE_RTREE_CTX;
 
 	if (szind != SC_NSIZES) {
@@ -274,8 +279,8 @@ emap_remap(tsdn_t *tsdn, emap_t *emap, edata_t *edata, szind_t szind,
 		if (slab && edata_size_get(edata) > PAGE) {
 			uintptr_t key = (uintptr_t)edata_past_get(edata)
 			    - (uintptr_t)PAGE;
-			rtree_write(tsdn, &emap->rtree, rtree_ctx, key,
-			    contents);
+			rtree_write(
+			    tsdn, &emap->rtree, rtree_ctx, key, contents);
 		}
 	}
 }
@@ -344,29 +349,29 @@ emap_merge_commit(tsdn_t *tsdn, emap_t *emap, emap_prepare_t *prepare,
 	clear_contents.metadata.state = (extent_state_t)0;
 
 	if (prepare->lead_elm_b != NULL) {
-		rtree_leaf_elm_write(tsdn, &emap->rtree,
-		    prepare->lead_elm_b, clear_contents);
+		rtree_leaf_elm_write(
+		    tsdn, &emap->rtree, prepare->lead_elm_b, clear_contents);
 	}
 
 	rtree_leaf_elm_t *merged_b;
 	if (prepare->trail_elm_b != NULL) {
-		rtree_leaf_elm_write(tsdn, &emap->rtree,
-		    prepare->trail_elm_a, clear_contents);
+		rtree_leaf_elm_write(
+		    tsdn, &emap->rtree, prepare->trail_elm_a, clear_contents);
 		merged_b = prepare->trail_elm_b;
 	} else {
 		merged_b = prepare->trail_elm_a;
 	}
 
-	emap_rtree_write_acquired(tsdn, emap, prepare->lead_elm_a, merged_b,
-	    lead, SC_NSIZES, false);
+	emap_rtree_write_acquired(
+	    tsdn, emap, prepare->lead_elm_a, merged_b, lead, SC_NSIZES, false);
 }
 
 void
 emap_do_assert_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	EMAP_DECLARE_RTREE_CTX;
 
-	rtree_contents_t contents = rtree_read(tsdn, &emap->rtree, rtree_ctx,
-	    (uintptr_t)edata_base_get(edata));
+	rtree_contents_t contents = rtree_read(
+	    tsdn, &emap->rtree, rtree_ctx, (uintptr_t)edata_base_get(edata));
 	assert(contents.edata == edata);
 	assert(contents.metadata.is_head == edata_is_head_get(edata));
 	assert(contents.metadata.state == edata_state_get(edata));
@@ -375,12 +380,12 @@ emap_do_assert_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 void
 emap_do_assert_not_mapped(tsdn_t *tsdn, emap_t *emap, edata_t *edata) {
 	emap_full_alloc_ctx_t context1 = {0};
-	emap_full_alloc_ctx_try_lookup(tsdn, emap, edata_base_get(edata),
-	    &context1);
+	emap_full_alloc_ctx_try_lookup(
+	    tsdn, emap, edata_base_get(edata), &context1);
 	assert(context1.edata == NULL);
 
 	emap_full_alloc_ctx_t context2 = {0};
-	emap_full_alloc_ctx_try_lookup(tsdn, emap, edata_last_get(edata),
-	    &context2);
+	emap_full_alloc_ctx_try_lookup(
+	    tsdn, emap, edata_last_get(edata), &context2);
 	assert(context2.edata == NULL);
 }
diff --git a/src/eset.c b/src/eset.c
index b4666e2c..4a427d78 100644
--- a/src/eset.c
+++ b/src/eset.c
@@ -48,32 +48,32 @@ eset_nbytes_get(eset_t *eset, pszind_t pind) {
 
 static void
 eset_stats_add(eset_t *eset, pszind_t pind, size_t sz) {
-	size_t cur = atomic_load_zu(&eset->bin_stats[pind].nextents,
-	    ATOMIC_RELAXED);
-	atomic_store_zu(&eset->bin_stats[pind].nextents, cur + 1,
-	    ATOMIC_RELAXED);
+	size_t cur = atomic_load_zu(
+	    &eset->bin_stats[pind].nextents, ATOMIC_RELAXED);
+	atomic_store_zu(
+	    &eset->bin_stats[pind].nextents, cur + 1, ATOMIC_RELAXED);
 	cur = atomic_load_zu(&eset->bin_stats[pind].nbytes, ATOMIC_RELAXED);
-	atomic_store_zu(&eset->bin_stats[pind].nbytes, cur + sz,
-	    ATOMIC_RELAXED);
+	atomic_store_zu(
+	    &eset->bin_stats[pind].nbytes, cur + sz, ATOMIC_RELAXED);
 }
 
 static void
 eset_stats_sub(eset_t *eset, pszind_t pind, size_t sz) {
-	size_t cur = atomic_load_zu(&eset->bin_stats[pind].nextents,
-	    ATOMIC_RELAXED);
-	atomic_store_zu(&eset->bin_stats[pind].nextents, cur - 1,
-	    ATOMIC_RELAXED);
+	size_t cur = atomic_load_zu(
+	    &eset->bin_stats[pind].nextents, ATOMIC_RELAXED);
+	atomic_store_zu(
+	    &eset->bin_stats[pind].nextents, cur - 1, ATOMIC_RELAXED);
 	cur = atomic_load_zu(&eset->bin_stats[pind].nbytes, ATOMIC_RELAXED);
-	atomic_store_zu(&eset->bin_stats[pind].nbytes, cur - sz,
-	    ATOMIC_RELAXED);
+	atomic_store_zu(
+	    &eset->bin_stats[pind].nbytes, cur - sz, ATOMIC_RELAXED);
 }
 
 void
 eset_insert(eset_t *eset, edata_t *edata) {
 	assert(edata_state_get(edata) == eset->state);
 
-	size_t size = edata_size_get(edata);
-	size_t psz = sz_psz_quantize_floor(size);
+	size_t   size = edata_size_get(edata);
+	size_t   psz = sz_psz_quantize_floor(size);
 	pszind_t pind = sz_psz2ind(psz);
 
 	edata_cmp_summary_t edata_cmp_summary = edata_cmp_summary_get(edata);
@@ -86,8 +86,9 @@ eset_insert(eset_t *eset, edata_t *edata) {
 		 * There's already a min element; update the summary if we're
 		 * about to insert a lower one.
 		 */
-		if (edata_cmp_summary_comp(edata_cmp_summary,
-		    eset->bins[pind].heap_min) < 0) {
+		if (edata_cmp_summary_comp(
+		        edata_cmp_summary, eset->bins[pind].heap_min)
+		    < 0) {
 			eset->bins[pind].heap_min = edata_cmp_summary;
 		}
 	}
@@ -104,19 +105,18 @@ eset_insert(eset_t *eset, edata_t *edata) {
 	 * don't need an atomic fetch-add; we can get by with a load followed by
 	 * a store.
 	 */
-	size_t cur_eset_npages =
-	    atomic_load_zu(&eset->npages, ATOMIC_RELAXED);
-	atomic_store_zu(&eset->npages, cur_eset_npages + npages,
-	    ATOMIC_RELAXED);
+	size_t cur_eset_npages = atomic_load_zu(&eset->npages, ATOMIC_RELAXED);
+	atomic_store_zu(
+	    &eset->npages, cur_eset_npages + npages, ATOMIC_RELAXED);
 }
 
 void
 eset_remove(eset_t *eset, edata_t *edata) {
-	assert(edata_state_get(edata) == eset->state ||
-	    edata_state_in_transition(edata_state_get(edata)));
+	assert(edata_state_get(edata) == eset->state
+	    || edata_state_in_transition(edata_state_get(edata)));
 
-	size_t size = edata_size_get(edata);
-	size_t psz = sz_psz_quantize_floor(size);
+	size_t   size = edata_size_get(edata);
+	size_t   psz = sz_psz_quantize_floor(size);
 	pszind_t pind = sz_psz2ind(psz);
 	if (config_stats) {
 		eset_stats_sub(eset, pind, size);
@@ -136,8 +136,9 @@ eset_remove(eset_t *eset, edata_t *edata) {
 		 * summaries of the removed element and the min element should
 		 * compare equal.
 		 */
-		if (edata_cmp_summary_comp(edata_cmp_summary,
-		    eset->bins[pind].heap_min) == 0) {
+		if (edata_cmp_summary_comp(
+		        edata_cmp_summary, eset->bins[pind].heap_min)
+		    == 0) {
 			eset->bins[pind].heap_min = edata_cmp_summary_get(
 			    edata_heap_first(&eset->bins[pind].heap));
 		}
@@ -148,35 +149,35 @@ eset_remove(eset_t *eset, edata_t *edata) {
 	 * As in eset_insert, we hold eset->mtx and so don't need atomic
 	 * operations for updating eset->npages.
 	 */
-	size_t cur_extents_npages =
-	    atomic_load_zu(&eset->npages, ATOMIC_RELAXED);
+	size_t cur_extents_npages = atomic_load_zu(
+	    &eset->npages, ATOMIC_RELAXED);
 	assert(cur_extents_npages >= npages);
-	atomic_store_zu(&eset->npages,
-	    cur_extents_npages - (size >> LG_PAGE), ATOMIC_RELAXED);
+	atomic_store_zu(&eset->npages, cur_extents_npages - (size >> LG_PAGE),
+	    ATOMIC_RELAXED);
 }
 
 static edata_t *
-eset_enumerate_alignment_search(eset_t *eset, size_t size, pszind_t bin_ind,
-    size_t alignment) {
+eset_enumerate_alignment_search(
+    eset_t *eset, size_t size, pszind_t bin_ind, size_t alignment) {
 	if (edata_heap_empty(&eset->bins[bin_ind].heap)) {
 		return NULL;
 	}
 
-	edata_t *edata = NULL;
+	edata_t                      *edata = NULL;
 	edata_heap_enumerate_helper_t helper;
 	edata_heap_enumerate_prepare(&eset->bins[bin_ind].heap, &helper,
-	    ESET_ENUMERATE_MAX_NUM, sizeof(helper.bfs_queue)/sizeof(void *));
-	while ((edata =
-	    edata_heap_enumerate_next(&eset->bins[bin_ind].heap, &helper)) !=
-	    NULL) {
+	    ESET_ENUMERATE_MAX_NUM, sizeof(helper.bfs_queue) / sizeof(void *));
+	while ((edata = edata_heap_enumerate_next(
+	            &eset->bins[bin_ind].heap, &helper))
+	    != NULL) {
 		uintptr_t base = (uintptr_t)edata_base_get(edata);
-		size_t candidate_size = edata_size_get(edata);
+		size_t    candidate_size = edata_size_get(edata);
 		if (candidate_size < size) {
 			continue;
 		}
 
-		uintptr_t next_align = ALIGNMENT_CEILING((uintptr_t)base,
-		    PAGE_CEILING(alignment));
+		uintptr_t next_align = ALIGNMENT_CEILING(
+		    (uintptr_t)base, PAGE_CEILING(alignment));
 		if (base > next_align || base + candidate_size <= next_align) {
 			/* Overflow or not crossing the next alignment. */
 			continue;
@@ -198,19 +199,20 @@ eset_enumerate_search(eset_t *eset, size_t size, pszind_t bin_ind,
 		return NULL;
 	}
 
-	edata_t *ret = NULL, *edata = NULL;
+	edata_t                      *ret = NULL, *edata = NULL;
 	edata_heap_enumerate_helper_t helper;
 	edata_heap_enumerate_prepare(&eset->bins[bin_ind].heap, &helper,
-	    ESET_ENUMERATE_MAX_NUM, sizeof(helper.bfs_queue)/sizeof(void *));
-	while ((edata =
-	    edata_heap_enumerate_next(&eset->bins[bin_ind].heap, &helper)) !=
-	    NULL) {
-		if ((!exact_only && edata_size_get(edata) >= size) ||
-		    (exact_only && edata_size_get(edata) == size)) {
-			edata_cmp_summary_t temp_summ =
-			    edata_cmp_summary_get(edata);
-			if (ret == NULL || edata_cmp_summary_comp(temp_summ,
-			    *ret_summ) < 0) {
+	    ESET_ENUMERATE_MAX_NUM, sizeof(helper.bfs_queue) / sizeof(void *));
+	while ((edata = edata_heap_enumerate_next(
+	            &eset->bins[bin_ind].heap, &helper))
+	    != NULL) {
+		if ((!exact_only && edata_size_get(edata) >= size)
+		    || (exact_only && edata_size_get(edata) == size)) {
+			edata_cmp_summary_t temp_summ = edata_cmp_summary_get(
+			    edata);
+			if (ret == NULL
+			    || edata_cmp_summary_comp(temp_summ, *ret_summ)
+			        < 0) {
 				ret = edata;
 				*ret_summ = temp_summ;
 			}
@@ -225,8 +227,8 @@ eset_enumerate_search(eset_t *eset, size_t size, pszind_t bin_ind,
  * requirement.  For each size, try only the first extent in the heap.
  */
 static edata_t *
-eset_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
-    size_t alignment) {
+eset_fit_alignment(
+    eset_t *eset, size_t min_size, size_t max_size, size_t alignment) {
 	pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(min_size));
 	pszind_t pind_max = sz_psz2ind(sz_psz_quantize_ceil(max_size));
 
@@ -234,26 +236,26 @@ eset_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
 	pszind_t pind_prev = sz_psz2ind(sz_psz_quantize_floor(min_size));
 	if (sz_large_size_classes_disabled() && pind != pind_prev) {
 		edata_t *ret = NULL;
-		ret = eset_enumerate_alignment_search(eset, min_size, pind_prev,
-		    alignment);
+		ret = eset_enumerate_alignment_search(
+		    eset, min_size, pind_prev, alignment);
 		if (ret != NULL) {
 			return ret;
 		}
 	}
 
 	for (pszind_t i =
-	    (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)pind);
-	    i < pind_max;
-	    i = (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)i + 1)) {
+	         (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)pind);
+	     i < pind_max;
+	     i = (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)i + 1)) {
 		assert(i < SC_NPSIZES);
 		assert(!edata_heap_empty(&eset->bins[i].heap));
-		edata_t *edata = edata_heap_first(&eset->bins[i].heap);
+		edata_t  *edata = edata_heap_first(&eset->bins[i].heap);
 		uintptr_t base = (uintptr_t)edata_base_get(edata);
-		size_t candidate_size = edata_size_get(edata);
+		size_t    candidate_size = edata_size_get(edata);
 		assert(candidate_size >= min_size);
 
-		uintptr_t next_align = ALIGNMENT_CEILING((uintptr_t)base,
-		    PAGE_CEILING(alignment));
+		uintptr_t next_align = ALIGNMENT_CEILING(
+		    (uintptr_t)base, PAGE_CEILING(alignment));
 		if (base > next_align || base + candidate_size <= next_align) {
 			/* Overflow or not crossing the next alignment. */
 			continue;
@@ -279,22 +281,23 @@ eset_fit_alignment(eset_t *eset, size_t min_size, size_t max_size,
  * for others.
  */
 static edata_t *
-eset_first_fit(eset_t *eset, size_t size, bool exact_only,
-    unsigned lg_max_fit) {
-	edata_t *ret = NULL;
+eset_first_fit(
+    eset_t *eset, size_t size, bool exact_only, unsigned lg_max_fit) {
+	edata_t                     *ret = NULL;
 	edata_cmp_summary_t ret_summ JEMALLOC_CC_SILENCE_INIT({0});
 
 	pszind_t pind = sz_psz2ind(sz_psz_quantize_ceil(size));
 
 	if (exact_only) {
 		if (sz_large_size_classes_disabled()) {
-			pszind_t pind_prev =
-			    sz_psz2ind(sz_psz_quantize_floor(size));
+			pszind_t pind_prev = sz_psz2ind(
+			    sz_psz_quantize_floor(size));
 			return eset_enumerate_search(eset, size, pind_prev,
 			    /* exact_only */ true, &ret_summ);
 		} else {
-			return edata_heap_empty(&eset->bins[pind].heap) ? NULL:
-			    edata_heap_first(&eset->bins[pind].heap);
+			return edata_heap_empty(&eset->bins[pind].heap)
+			    ? NULL
+			    : edata_heap_first(&eset->bins[pind].heap);
 		}
 	}
 
@@ -321,15 +324,15 @@ eset_first_fit(eset_t *eset, size_t size, bool exact_only,
 	 * usize and thus should be enumerated.
 	 */
 	pszind_t pind_prev = sz_psz2ind(sz_psz_quantize_floor(size));
-	if (sz_large_size_classes_disabled() && pind != pind_prev){
+	if (sz_large_size_classes_disabled() && pind != pind_prev) {
 		ret = eset_enumerate_search(eset, size, pind_prev,
 		    /* exact_only */ false, &ret_summ);
 	}
 
 	for (pszind_t i =
-	    (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)pind);
-	    i < ESET_NPSIZES;
-	    i = (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)i + 1)) {
+	         (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)pind);
+	     i < ESET_NPSIZES;
+	     i = (pszind_t)fb_ffs(eset->bitmap, ESET_NPSIZES, (size_t)i + 1)) {
 		assert(!edata_heap_empty(&eset->bins[i].heap));
 		if (lg_max_fit == SC_PTR_BITS) {
 			/*
@@ -342,8 +345,9 @@ eset_first_fit(eset_t *eset, size_t size, bool exact_only,
 		if ((sz_pind2sz(i) >> lg_max_fit) > size) {
 			break;
 		}
-		if (ret == NULL || edata_cmp_summary_comp(
-		    eset->bins[i].heap_min, ret_summ) < 0) {
+		if (ret == NULL
+		    || edata_cmp_summary_comp(eset->bins[i].heap_min, ret_summ)
+		        < 0) {
 			/*
 			 * We grab the edata as early as possible, even though
 			 * we might change it later.  Practically, a large
@@ -354,9 +358,10 @@ eset_first_fit(eset_t *eset, size_t size, bool exact_only,
 			edata_t *edata = edata_heap_first(&eset->bins[i].heap);
 			assert(edata_size_get(edata) >= size);
 			assert(ret == NULL || edata_snad_comp(edata, ret) < 0);
-			assert(ret == NULL || edata_cmp_summary_comp(
-			    eset->bins[i].heap_min,
-			    edata_cmp_summary_get(edata)) == 0);
+			assert(ret == NULL
+			    || edata_cmp_summary_comp(eset->bins[i].heap_min,
+			           edata_cmp_summary_get(edata))
+			        == 0);
 			ret = edata;
 			ret_summ = eset->bins[i].heap_min;
 		}
diff --git a/src/extent.c b/src/extent.c
index 03a3fdd8..0a23bbd9 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -19,7 +19,7 @@ size_t opt_process_madvise_max_batch =
 #else
     0
 #endif
-    ;
+;
 
 static bool extent_commit_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length, bool growing_retained);
@@ -29,8 +29,8 @@ static bool extent_purge_forced_impl(tsdn_t *tsdn, ehooks_t *ehooks,
     edata_t *edata, size_t offset, size_t length, bool growing_retained);
 static edata_t *extent_split_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     edata_t *edata, size_t size_a, size_t size_b, bool holding_core_locks);
-static bool extent_merge_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *a, edata_t *b, bool holding_core_locks);
+static bool     extent_merge_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
+        edata_t *a, edata_t *b, bool holding_core_locks);
 
 /* Used exclusively for gdump triggering. */
 static atomic_zu_t curpages;
@@ -42,7 +42,7 @@ static atomic_zu_t highpages;
  * definition.
  */
 
-static void extent_deregister(tsdn_t *tsdn, pac_t *pac, edata_t *edata);
+static void     extent_deregister(tsdn_t *tsdn, pac_t *pac, edata_t *edata);
 static edata_t *extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *expand_edata, size_t usize, size_t alignment,
     bool zero, bool *commit, bool growing_retained, bool guarded);
@@ -51,8 +51,8 @@ static edata_t *extent_try_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 static edata_t *extent_alloc_retained(tsdn_t *tsdn, pac_t *pac,
     ehooks_t *ehooks, edata_t *expand_edata, size_t size, size_t alignment,
     bool zero, bool *commit, bool guarded);
-static bool extent_decommit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks,
-    edata_t *edata, size_t offset, size_t length);
+static bool     extent_decommit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks,
+        edata_t *edata, size_t offset, size_t length);
 
 /******************************************************************************/
 
@@ -73,8 +73,8 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	emap_update_edata_state(tsdn, pac->emap, edata, extent_state_active);
 
 	bool coalesced;
-	edata = extent_try_coalesce(tsdn, pac, ehooks, ecache,
-	    edata, &coalesced);
+	edata = extent_try_coalesce(
+	    tsdn, pac, ehooks, ecache, edata, &coalesced);
 	emap_update_edata_state(tsdn, pac->emap, edata, ecache->state);
 
 	if (!coalesced) {
@@ -90,10 +90,10 @@ ecache_alloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
     bool guarded) {
 	assert(size != 0);
 	assert(alignment != 0);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
-	bool commit = true;
+	bool     commit = true;
 	edata_t *edata = extent_recycle(tsdn, pac, ehooks, ecache, expand_edata,
 	    size, alignment, zero, &commit, false, guarded);
 	assert(edata == NULL || edata_pai_get(edata) == EXTENT_PAI_PAC);
@@ -107,10 +107,10 @@ ecache_alloc_grow(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
     bool guarded) {
 	assert(size != 0);
 	assert(alignment != 0);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
-	bool commit = true;
+	bool     commit = true;
 	edata_t *edata = extent_alloc_retained(tsdn, pac, ehooks, expand_edata,
 	    size, alignment, zero, &commit, guarded);
 	if (edata == NULL) {
@@ -131,10 +131,11 @@ ecache_alloc_grow(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 			 */
 			return NULL;
 		}
-		void *new_addr = (expand_edata == NULL) ? NULL :
-		    edata_past_get(expand_edata);
-		edata = extent_alloc_wrapper(tsdn, pac, ehooks, new_addr,
-		    size, alignment, zero, &commit,
+		void *new_addr = (expand_edata == NULL)
+		    ? NULL
+		    : edata_past_get(expand_edata);
+		edata = extent_alloc_wrapper(tsdn, pac, ehooks, new_addr, size,
+		    alignment, zero, &commit,
 		    /* growing_retained */ false);
 	}
 
@@ -148,8 +149,8 @@ ecache_dalloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 	assert(edata_base_get(edata) != NULL);
 	assert(edata_size_get(edata) != 0);
 	assert(edata_pai_get(edata) == EXTENT_PAI_PAC);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
 	edata_addr_set(edata, edata_base_get(edata));
 	edata_zeroed_set(edata, false);
@@ -158,8 +159,8 @@ ecache_dalloc(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 }
 
 edata_t *
-ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    ecache_t *ecache, size_t npages_min) {
+ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
+    size_t npages_min) {
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 
 	/*
@@ -194,8 +195,8 @@ ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 			break;
 		}
 		/* Try to coalesce. */
-		if (extent_try_delayed_coalesce(tsdn, pac, ehooks, ecache,
-		    edata)) {
+		if (extent_try_delayed_coalesce(
+		        tsdn, pac, ehooks, ecache, edata)) {
 			break;
 		}
 		/*
@@ -211,8 +212,8 @@ ecache_evict(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	switch (ecache->state) {
 	case extent_state_dirty:
 	case extent_state_muzzy:
-		emap_update_edata_state(tsdn, pac->emap, edata,
-		    extent_state_active);
+		emap_update_edata_state(
+		    tsdn, pac->emap, edata, extent_state_active);
 		break;
 	case extent_state_retained:
 		extent_deregister(tsdn, pac, edata);
@@ -238,16 +239,16 @@ extents_abandon_vm(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
     edata_t *edata, bool growing_retained) {
 	size_t sz = edata_size_get(edata);
 	if (config_stats) {
-		atomic_fetch_add_zu(&pac->stats->abandoned_vm, sz,
-		    ATOMIC_RELAXED);
+		atomic_fetch_add_zu(
+		    &pac->stats->abandoned_vm, sz, ATOMIC_RELAXED);
 	}
 	/*
 	 * Leak extent after making sure its pages have already been purged, so
 	 * that this is only a virtual memory leak.
 	 */
 	if (ecache->state == extent_state_dirty) {
-		if (extent_purge_lazy_impl(tsdn, ehooks, edata, 0, sz,
-		    growing_retained)) {
+		if (extent_purge_lazy_impl(
+		        tsdn, ehooks, edata, 0, sz, growing_retained)) {
 			extent_purge_forced_impl(tsdn, ehooks, edata, 0,
 			    edata_size_get(edata), growing_retained);
 		}
@@ -256,20 +257,20 @@ extents_abandon_vm(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 }
 
 static void
-extent_deactivate_locked_impl(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
-    edata_t *edata) {
+extent_deactivate_locked_impl(
+    tsdn_t *tsdn, pac_t *pac, ecache_t *ecache, edata_t *edata) {
 	malloc_mutex_assert_owner(tsdn, &ecache->mtx);
 	assert(edata_arena_ind_get(edata) == ecache_ind_get(ecache));
 
 	emap_update_edata_state(tsdn, pac->emap, edata, ecache->state);
-	eset_t *eset = edata_guarded_get(edata) ? &ecache->guarded_eset :
-	    &ecache->eset;
+	eset_t *eset = edata_guarded_get(edata) ? &ecache->guarded_eset
+	                                        : &ecache->eset;
 	eset_insert(eset, edata);
 }
 
 static void
-extent_deactivate_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
-    edata_t *edata) {
+extent_deactivate_locked(
+    tsdn_t *tsdn, pac_t *pac, ecache_t *ecache, edata_t *edata) {
 	assert(edata_state_get(edata) == extent_state_active);
 	extent_deactivate_locked_impl(tsdn, pac, ecache, edata);
 }
@@ -282,11 +283,11 @@ extent_deactivate_check_state_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
 }
 
 static void
-extent_activate_locked(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache, eset_t *eset,
-    edata_t *edata) {
+extent_activate_locked(
+    tsdn_t *tsdn, pac_t *pac, ecache_t *ecache, eset_t *eset, edata_t *edata) {
 	assert(edata_arena_ind_get(edata) == ecache_ind_get(ecache));
-	assert(edata_state_get(edata) == ecache->state ||
-	    edata_state_get(edata) == extent_state_merging);
+	assert(edata_state_get(edata) == ecache->state
+	    || edata_state_get(edata) == extent_state_merging);
 
 	eset_remove(eset, edata);
 	emap_update_edata_state(tsdn, pac->emap, edata, extent_state_active);
@@ -296,16 +297,18 @@ void
 extent_gdump_add(tsdn_t *tsdn, const edata_t *edata) {
 	cassert(config_prof);
 	/* prof_gdump() requirement. */
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
 	if (opt_prof && edata_state_get(edata) == extent_state_active) {
 		size_t nadd = edata_size_get(edata) >> LG_PAGE;
-		size_t cur = atomic_fetch_add_zu(&curpages, nadd,
-		    ATOMIC_RELAXED) + nadd;
+		size_t cur = atomic_fetch_add_zu(
+		                 &curpages, nadd, ATOMIC_RELAXED)
+		    + nadd;
 		size_t high = atomic_load_zu(&highpages, ATOMIC_RELAXED);
-		while (cur > high && !atomic_compare_exchange_weak_zu(
-		    &highpages, &high, cur, ATOMIC_RELAXED, ATOMIC_RELAXED)) {
+		while (cur > high
+		    && !atomic_compare_exchange_weak_zu(&highpages, &high, cur,
+		        ATOMIC_RELAXED, ATOMIC_RELAXED)) {
 			/*
 			 * Don't refresh cur, because it may have decreased
 			 * since this thread lost the highpages update race.
@@ -337,7 +340,7 @@ extent_register_impl(tsdn_t *tsdn, pac_t *pac, edata_t *edata, bool gdump_add) {
 	 * prevents other threads from accessing the edata.
 	 */
 	if (emap_register_boundary(tsdn, pac->emap, edata, SC_NSIZES,
-	    /* slab */ false)) {
+	        /* slab */ false)) {
 		return true;
 	}
 
@@ -368,8 +371,7 @@ extent_reregister(tsdn_t *tsdn, pac_t *pac, edata_t *edata) {
  * Removes all pointers to the given extent from the global rtree.
  */
 static void
-extent_deregister_impl(tsdn_t *tsdn, pac_t *pac, edata_t *edata,
-    bool gdump) {
+extent_deregister_impl(tsdn_t *tsdn, pac_t *pac, edata_t *edata, bool gdump) {
 	emap_deregister_boundary(tsdn, pac->emap, edata);
 
 	if (config_prof && gdump) {
@@ -383,8 +385,7 @@ extent_deregister(tsdn_t *tsdn, pac_t *pac, edata_t *edata) {
 }
 
 static void
-extent_deregister_no_gdump_sub(tsdn_t *tsdn, pac_t *pac,
-    edata_t *edata) {
+extent_deregister_no_gdump_sub(tsdn_t *tsdn, pac_t *pac, edata_t *edata) {
 	extent_deregister_impl(tsdn, pac, edata, false);
 }
 
@@ -411,7 +412,7 @@ extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	}
 
 	edata_t *edata;
-	eset_t *eset = guarded ? &ecache->guarded_eset : &ecache->eset;
+	eset_t  *eset = guarded ? &ecache->guarded_eset : &ecache->eset;
 	if (expand_edata != NULL) {
 		edata = emap_try_acquire_edata_neighbor_expand(tsdn, pac->emap,
 		    expand_edata, EXTENT_PAI_PAC, ecache->state);
@@ -419,8 +420,8 @@ extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 			/* NOLINTNEXTLINE(readability-suspicious-call-argument) */
 			extent_assert_can_expand(expand_edata, edata);
 			if (edata_size_get(edata) < size) {
-				emap_release_edata(tsdn, pac->emap, edata,
-				    ecache->state);
+				emap_release_edata(
+				    tsdn, pac->emap, edata, ecache->state);
 				edata = NULL;
 			}
 		}
@@ -435,7 +436,8 @@ extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		 * put a cap on how big an extent we can split for a request.
 		 */
 		unsigned lg_max_fit = ecache->delay_coalesce
-		    ? (unsigned)opt_lg_extent_max_active_fit : SC_PTR_BITS;
+		    ? (unsigned)opt_lg_extent_max_active_fit
+		    : SC_PTR_BITS;
 
 		/*
 		 * If split and merge are not allowed (Windows w/o retain), try
@@ -446,8 +448,7 @@ extent_recycle_extract(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		 * allocations.
 		 */
 		bool exact_only = (!maps_coalesce && !opt_retain) || guarded;
-		edata = eset_fit(eset, size, alignment, exact_only,
-		    lg_max_fit);
+		edata = eset_fit(eset, size, alignment, exact_only, lg_max_fit);
 	}
 	if (edata == NULL) {
 		return NULL;
@@ -489,10 +490,11 @@ extent_split_interior(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     /* The result of splitting, in case of success. */
     edata_t **edata, edata_t **lead, edata_t **trail,
     /* The mess to clean up, in case of error. */
-    edata_t **to_leak, edata_t **to_salvage,
-    edata_t *expand_edata, size_t size, size_t alignment) {
+    edata_t **to_leak, edata_t **to_salvage, edata_t *expand_edata, size_t size,
+    size_t alignment) {
 	size_t leadsize = ALIGNMENT_CEILING((uintptr_t)edata_base_get(*edata),
-	    PAGE_CEILING(alignment)) - (uintptr_t)edata_base_get(*edata);
+	                      PAGE_CEILING(alignment))
+	    - (uintptr_t)edata_base_get(*edata);
 	assert(expand_edata == NULL || leadsize == 0);
 	if (edata_size_get(*edata) < leadsize + size) {
 		return extent_split_interior_cant_alloc;
@@ -547,14 +549,14 @@ extent_recycle_split(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	assert(!edata_guarded_get(edata) || size == edata_size_get(edata));
 	malloc_mutex_assert_owner(tsdn, &ecache->mtx);
 
-	edata_t *lead;
-	edata_t *trail;
-	edata_t *to_leak JEMALLOC_CC_SILENCE_INIT(NULL);
+	edata_t            *lead;
+	edata_t            *trail;
+	edata_t *to_leak    JEMALLOC_CC_SILENCE_INIT(NULL);
 	edata_t *to_salvage JEMALLOC_CC_SILENCE_INIT(NULL);
 
-	extent_split_interior_result_t result = extent_split_interior(
-	    tsdn, pac, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage,
-	    expand_edata, size, alignment);
+	extent_split_interior_result_t result = extent_split_interior(tsdn, pac,
+	    ehooks, &edata, &lead, &trail, &to_leak, &to_salvage, expand_edata,
+	    size, alignment);
 
 	if (!maps_coalesce && result != extent_split_interior_ok
 	    && !opt_retain) {
@@ -615,8 +617,8 @@ extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 
-	edata_t *edata = extent_recycle_extract(tsdn, pac, ehooks, ecache,
-	    expand_edata, size, alignment, guarded);
+	edata_t *edata = extent_recycle_extract(
+	    tsdn, pac, ehooks, ecache, expand_edata, size, alignment, guarded);
 	if (edata == NULL) {
 		malloc_mutex_unlock(tsdn, &ecache->mtx);
 		return NULL;
@@ -630,8 +632,8 @@ extent_recycle(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 	}
 
 	assert(edata_state_get(edata) == extent_state_active);
-	if (extent_commit_zero(tsdn, ehooks, edata, *commit, zero,
-	    growing_retained)) {
+	if (extent_commit_zero(
+	        tsdn, ehooks, edata, *commit, zero, growing_retained)) {
 		extent_record(tsdn, pac, ehooks, ecache, edata);
 		return NULL;
 	}
@@ -660,16 +662,16 @@ extent_handle_huge_arena_thp(tsdn_t *tsdn, pac_thp_t *pac_thp,
 	 * be within the range of [0, 2 * (HUGEPAGE - 1)].
 	 */
 	void *huge_addr = HUGEPAGE_ADDR2BASE(addr);
-	void *huge_end = HUGEPAGE_ADDR2BASE((void *)((byte_t *)addr +
-	    (uintptr_t)(size + HUGEPAGE - 1)));
+	void *huge_end = HUGEPAGE_ADDR2BASE(
+	    (void *)((byte_t *)addr + (uintptr_t)(size + HUGEPAGE - 1)));
 	assert((uintptr_t)huge_end > (uintptr_t)huge_addr);
 
 	size_t huge_size = (uintptr_t)huge_end - (uintptr_t)huge_addr;
-	assert(huge_size <= (size + ((HUGEPAGE - 1) << 1)) &&
-		    huge_size >= size);
+	assert(
+	    huge_size <= (size + ((HUGEPAGE - 1) << 1)) && huge_size >= size);
 
-	if (opt_metadata_thp == metadata_thp_always ||
-	    pac_thp->auto_thp_switched) {
+	if (opt_metadata_thp == metadata_thp_always
+	    || pac_thp->auto_thp_switched) {
 		pages_huge(huge_addr, huge_size);
 	} else {
 		assert(opt_metadata_thp == metadata_thp_auto);
@@ -687,8 +689,10 @@ extent_handle_huge_arena_thp(tsdn_t *tsdn, pac_thp_t *pac_thp,
 			if (edata != NULL) {
 				edata_addr_set(edata, huge_addr);
 				edata_size_set(edata, huge_size);
-				edata_list_active_append(&pac_thp->thp_lazy_list, edata);
-				atomic_fetch_add_u(&pac_thp->n_thp_lazy, 1, ATOMIC_RELAXED);
+				edata_list_active_append(
+				    &pac_thp->thp_lazy_list, edata);
+				atomic_fetch_add_u(
+				    &pac_thp->n_thp_lazy, 1, ATOMIC_RELAXED);
 			}
 			malloc_mutex_unlock(tsdn, &pac_thp->lock);
 		}
@@ -702,8 +706,8 @@ extent_handle_huge_arena_thp(tsdn_t *tsdn, pac_thp_t *pac_thp,
  * virtual memory ranges retained by each shard.
  */
 static edata_t *
-extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    size_t size, size_t alignment, bool zero, bool *commit) {
+extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
+    size_t alignment, bool zero, bool *commit) {
 	malloc_mutex_assert_owner(tsdn, &pac->grow_mtx);
 
 	size_t alloc_size_min = size + PAGE_CEILING(alignment) - PAGE;
@@ -715,10 +719,10 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	 * Find the next extent size in the series that would be large enough to
 	 * satisfy this request.
 	 */
-	size_t alloc_size;
+	size_t   alloc_size;
 	pszind_t exp_grow_skip;
-	bool err = exp_grow_size_prepare(&pac->exp_grow, alloc_size_min,
-	    &alloc_size, &exp_grow_skip);
+	bool     err = exp_grow_size_prepare(
+            &pac->exp_grow, alloc_size_min, &alloc_size, &exp_grow_skip);
 	if (err) {
 		goto label_err;
 	}
@@ -730,8 +734,8 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	bool zeroed = false;
 	bool committed = false;
 
-	void *ptr = ehooks_alloc(tsdn, ehooks, NULL, alloc_size, PAGE, &zeroed,
-	    &committed);
+	void *ptr = ehooks_alloc(
+	    tsdn, ehooks, NULL, alloc_size, PAGE, &zeroed, &committed);
 
 	if (ptr == NULL) {
 		edata_cache_put(tsdn, pac->edata_cache, edata);
@@ -752,23 +756,23 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		*commit = true;
 	}
 
-	edata_t *lead;
-	edata_t *trail;
-	edata_t *to_leak JEMALLOC_CC_SILENCE_INIT(NULL);
+	edata_t            *lead;
+	edata_t            *trail;
+	edata_t *to_leak    JEMALLOC_CC_SILENCE_INIT(NULL);
 	edata_t *to_salvage JEMALLOC_CC_SILENCE_INIT(NULL);
 
-	extent_split_interior_result_t result = extent_split_interior(tsdn,
-	    pac, ehooks, &edata, &lead, &trail, &to_leak, &to_salvage, NULL,
-	    size, alignment);
+	extent_split_interior_result_t result = extent_split_interior(tsdn, pac,
+	    ehooks, &edata, &lead, &trail, &to_leak, &to_salvage, NULL, size,
+	    alignment);
 
 	if (result == extent_split_interior_ok) {
 		if (lead != NULL) {
-			extent_record(tsdn, pac, ehooks, &pac->ecache_retained,
-			    lead);
+			extent_record(
+			    tsdn, pac, ehooks, &pac->ecache_retained, lead);
 		}
 		if (trail != NULL) {
-			extent_record(tsdn, pac, ehooks, &pac->ecache_retained,
-			    trail);
+			extent_record(
+			    tsdn, pac, ehooks, &pac->ecache_retained, trail);
 		}
 	} else {
 		/*
@@ -792,15 +796,15 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	}
 
 	if (*commit && !edata_committed_get(edata)) {
-		if (extent_commit_impl(tsdn, ehooks, edata, 0,
-		    edata_size_get(edata), true)) {
-			extent_record(tsdn, pac, ehooks,
-			    &pac->ecache_retained, edata);
+		if (extent_commit_impl(
+		        tsdn, ehooks, edata, 0, edata_size_get(edata), true)) {
+			extent_record(
+			    tsdn, pac, ehooks, &pac->ecache_retained, edata);
 			goto label_err;
 		}
 		/* A successful commit should return zeroed memory. */
 		if (config_debug) {
-			void *addr = edata_addr_get(edata);
+			void   *addr = edata_addr_get(edata);
 			size_t *p = (size_t *)addr;
 			/* Check the first page only. */
 			for (size_t i = 0; i < PAGE / sizeof(size_t); i++) {
@@ -819,8 +823,9 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 
 	if (huge_arena_pac_thp.thp_madvise) {
 		/* Avoid using HUGEPAGE when the grow size is less than HUGEPAGE. */
-		if (ind != 0 && ind == huge_arena_ind && ehooks_are_default(ehooks) &&
-		    likely(alloc_size >= HUGEPAGE)) {
+		if (ind != 0 && ind == huge_arena_ind
+		    && ehooks_are_default(ehooks)
+		    && likely(alloc_size >= HUGEPAGE)) {
 			extent_handle_huge_arena_thp(tsdn, &huge_arena_pac_thp,
 			    pac->edata_cache, ptr, alloc_size);
 		}
@@ -831,8 +836,8 @@ extent_grow_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		extent_gdump_add(tsdn, edata);
 	}
 	if (zero && !edata_zeroed_get(edata)) {
-		ehooks_zero(tsdn, ehooks, edata_base_get(edata),
-		    edata_size_get(edata));
+		ehooks_zero(
+		    tsdn, ehooks, edata_base_get(edata), edata_size_get(edata));
 	}
 	return edata;
 label_err:
@@ -858,8 +863,8 @@ extent_alloc_retained(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 			extent_gdump_add(tsdn, edata);
 		}
 	} else if (opt_retain && expand_edata == NULL && !guarded) {
-		edata = extent_grow_retained(tsdn, pac, ehooks, size,
-		    alignment, zero, commit);
+		edata = extent_grow_retained(
+		    tsdn, pac, ehooks, size, alignment, zero, commit);
 		/* extent_grow_retained() always releases pac->grow_mtx. */
 	} else {
 		malloc_mutex_unlock(tsdn, &pac->grow_mtx);
@@ -875,12 +880,12 @@ extent_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 	extent_assert_can_coalesce(inner, outer);
 	eset_remove(&ecache->eset, outer);
 
-	bool err = extent_merge_impl(tsdn, pac, ehooks,
-	    forward ? inner : outer, forward ? outer : inner,
+	bool err = extent_merge_impl(tsdn, pac, ehooks, forward ? inner : outer,
+	    forward ? outer : inner,
 	    /* holding_core_locks */ true);
 	if (err) {
-		extent_deactivate_check_state_locked(tsdn, pac, ecache, outer,
-		    extent_state_merging);
+		extent_deactivate_check_state_locked(
+		    tsdn, pac, ecache, outer, extent_state_merging);
 	}
 
 	return err;
@@ -908,10 +913,12 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		/* Try to coalesce forward. */
 		edata_t *next = emap_try_acquire_edata_neighbor(tsdn, pac->emap,
 		    edata, EXTENT_PAI_PAC, ecache->state, /* forward */ true);
-		size_t max_next_neighbor = max_size > edata_size_get(edata) ?  max_size - edata_size_get(edata) : 0;
+		size_t   max_next_neighbor = max_size > edata_size_get(edata)
+		      ? max_size - edata_size_get(edata)
+		      : 0;
 		if (next != NULL && edata_size_get(next) <= max_next_neighbor) {
-			if (!extent_coalesce(tsdn, pac, ehooks, ecache, edata,
-			    next, true)) {
+			if (!extent_coalesce(
+			        tsdn, pac, ehooks, ecache, edata, next, true)) {
 				if (ecache->delay_coalesce) {
 					/* Do minimal coalescing. */
 					*coalesced = true;
@@ -924,10 +931,12 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		/* Try to coalesce backward. */
 		edata_t *prev = emap_try_acquire_edata_neighbor(tsdn, pac->emap,
 		    edata, EXTENT_PAI_PAC, ecache->state, /* forward */ false);
-		size_t max_prev_neighbor = max_size > edata_size_get(edata) ?  max_size - edata_size_get(edata) : 0;
+		size_t   max_prev_neighbor = max_size > edata_size_get(edata)
+		      ? max_size - edata_size_get(edata)
+		      : 0;
 		if (prev != NULL && edata_size_get(prev) <= max_prev_neighbor) {
 			if (!extent_coalesce(tsdn, pac, ehooks, ecache, edata,
-			    prev, false)) {
+			        prev, false)) {
 				edata = prev;
 				if (ecache->delay_coalesce) {
 					/* Do minimal coalescing. */
@@ -948,36 +957,33 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 static edata_t *
 extent_try_coalesce(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, bool *coalesced) {
-	return extent_try_coalesce_impl(tsdn, pac, ehooks, ecache, edata,
-	    SC_LARGE_MAXCLASS, coalesced);
+	return extent_try_coalesce_impl(
+	    tsdn, pac, ehooks, ecache, edata, SC_LARGE_MAXCLASS, coalesced);
 }
 
 static edata_t *
 extent_try_coalesce_large(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
     ecache_t *ecache, edata_t *edata, size_t max_size, bool *coalesced) {
-	return extent_try_coalesce_impl(tsdn, pac, ehooks, ecache, edata,
-	    max_size, coalesced);
+	return extent_try_coalesce_impl(
+	    tsdn, pac, ehooks, ecache, edata, max_size, coalesced);
 }
 
 /* Purge a single extent to retained / unmapped directly. */
 static void
-extent_maximally_purge(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *edata) {
+extent_maximally_purge(
+    tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata) {
 	size_t extent_size = edata_size_get(edata);
 	extent_dalloc_wrapper(tsdn, pac, ehooks, edata);
 	if (config_stats) {
 		/* Update stats accordingly. */
 		LOCKEDINT_MTX_LOCK(tsdn, *pac->stats_mtx);
-		locked_inc_u64(tsdn,
-		    LOCKEDINT_MTX(*pac->stats_mtx),
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(*pac->stats_mtx),
 		    &pac->stats->decay_dirty.nmadvise, 1);
-		locked_inc_u64(tsdn,
-		    LOCKEDINT_MTX(*pac->stats_mtx),
-		    &pac->stats->decay_dirty.purged,
-		    extent_size >> LG_PAGE);
+		locked_inc_u64(tsdn, LOCKEDINT_MTX(*pac->stats_mtx),
+		    &pac->stats->decay_dirty.purged, extent_size >> LG_PAGE);
 		LOCKEDINT_MTX_UNLOCK(tsdn, *pac->stats_mtx);
-		atomic_fetch_sub_zu(&pac->stats->pac_mapped, extent_size,
-		    ATOMIC_RELAXED);
+		atomic_fetch_sub_zu(
+		    &pac->stats->pac_mapped, extent_size, ATOMIC_RELAXED);
 	}
 }
 
@@ -988,9 +994,9 @@ extent_maximally_purge(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 void
 extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
     edata_t *edata) {
-	assert((ecache->state != extent_state_dirty &&
-	    ecache->state != extent_state_muzzy) ||
-	    !edata_zeroed_get(edata));
+	assert((ecache->state != extent_state_dirty
+	           && ecache->state != extent_state_muzzy)
+	    || !edata_zeroed_get(edata));
 
 	malloc_mutex_lock(tsdn, &ecache->mtx);
 
@@ -1001,8 +1007,8 @@ extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 	}
 	if (!ecache->delay_coalesce) {
 		bool coalesced_unused;
-		edata = extent_try_coalesce(tsdn, pac, ehooks, ecache, edata,
-		    &coalesced_unused);
+		edata = extent_try_coalesce(
+		    tsdn, pac, ehooks, ecache, edata, &coalesced_unused);
 	} else if (edata_size_get(edata) >= SC_LARGE_MINCLASS) {
 		assert(ecache == &pac->ecache_dirty);
 		/* Always coalesce large extents eagerly. */
@@ -1027,17 +1033,21 @@ extent_record(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, ecache_t *ecache,
 		* the final coalescing that happens during the transition from dirty ecache
 		* to muzzy/retained ecache states.
 		*/
-		unsigned lg_max_coalesce = (unsigned)opt_lg_extent_max_active_fit;
+		unsigned lg_max_coalesce = (unsigned)
+		    opt_lg_extent_max_active_fit;
 		size_t edata_size = edata_size_get(edata);
-		size_t max_size = (SC_LARGE_MAXCLASS >> lg_max_coalesce) > edata_size ? (edata_size << lg_max_coalesce) : SC_LARGE_MAXCLASS;
-		bool coalesced;
+		size_t max_size = (SC_LARGE_MAXCLASS >> lg_max_coalesce)
+		        > edata_size
+		    ? (edata_size << lg_max_coalesce)
+		    : SC_LARGE_MAXCLASS;
+		bool   coalesced;
 		do {
 			assert(edata_state_get(edata) == extent_state_active);
 			edata = extent_try_coalesce_large(tsdn, pac, ehooks,
 			    ecache, edata, max_size, &coalesced);
 		} while (coalesced);
-		if (edata_size_get(edata) >=
-		    atomic_load_zu(&pac->oversize_threshold, ATOMIC_RELAXED)
+		if (edata_size_get(edata) >= atomic_load_zu(
+		        &pac->oversize_threshold, ATOMIC_RELAXED)
 		    && !background_thread_enabled()
 		    && extent_may_force_decay(pac)) {
 			/* Shortcut to purge the oversize extent eagerly. */
@@ -1053,10 +1063,9 @@ label_skip_coalesce:
 }
 
 void
-extent_dalloc_gap(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *edata) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+extent_dalloc_gap(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata) {
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
 	if (extent_register(tsdn, pac, edata)) {
 		edata_cache_put(tsdn, pac->edata_cache, edata);
@@ -1066,14 +1075,14 @@ extent_dalloc_gap(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 }
 
 static bool
-extent_dalloc_wrapper_try(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *edata) {
+extent_dalloc_wrapper_try(
+    tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata) {
 	bool err;
 
 	assert(edata_base_get(edata) != NULL);
 	assert(edata_size_get(edata) != 0);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
 	edata_addr_set(edata, edata_base_get(edata));
 
@@ -1089,8 +1098,8 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 }
 
 edata_t *
-extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    void *new_addr, size_t size, size_t alignment, bool zero, bool *commit,
+extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, void *new_addr,
+    size_t size, size_t alignment, bool zero, bool *commit,
     bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
@@ -1100,14 +1109,14 @@ extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		return NULL;
 	}
 	size_t palignment = ALIGNMENT_CEILING(alignment, PAGE);
-	void *addr = ehooks_alloc(tsdn, ehooks, new_addr, size, palignment,
-	    &zero, commit);
+	void  *addr = ehooks_alloc(
+            tsdn, ehooks, new_addr, size, palignment, &zero, commit);
 	if (addr == NULL) {
 		edata_cache_put(tsdn, pac->edata_cache, edata);
 		return NULL;
 	}
-	edata_init(edata, ecache_ind_get(&pac->ecache_dirty), addr,
-	    size, /* slab */ false, SC_NSIZES, extent_sn_next(pac),
+	edata_init(edata, ecache_ind_get(&pac->ecache_dirty), addr, size,
+	    /* slab */ false, SC_NSIZES, extent_sn_next(pac),
 	    extent_state_active, zero, *commit, EXTENT_PAI_PAC,
 	    opt_retain ? EXTENT_IS_HEAD : EXTENT_NOT_HEAD);
 	/*
@@ -1125,8 +1134,8 @@ extent_alloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 }
 
 static void
-extent_dalloc_wrapper_finish(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *edata) {
+extent_dalloc_wrapper_finish(
+    tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata) {
 	if (config_prof) {
 		extent_gdump_sub(tsdn, edata);
 	}
@@ -1134,11 +1143,11 @@ extent_dalloc_wrapper_finish(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 }
 
 void
-extent_dalloc_wrapper_purged(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *edata) {
+extent_dalloc_wrapper_purged(
+    tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata) {
 	assert(edata_pai_get(edata) == EXTENT_PAI_PAC);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
 	/* Verify that will not go down the dalloc / munmap route. */
 	assert(ehooks_dalloc_will_fail(ehooks));
@@ -1148,19 +1157,19 @@ extent_dalloc_wrapper_purged(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 }
 
 void
-extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *edata) {
+extent_dalloc_wrapper(
+    tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata) {
 	assert(edata_pai_get(edata) == EXTENT_PAI_PAC);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
 	/* Avoid calling the default extent_dalloc unless have to. */
 	if (!ehooks_dalloc_will_fail(ehooks)) {
 		/* Remove guard pages for dalloc / unmap. */
 		if (edata_guarded_get(edata)) {
 			assert(ehooks_are_default(ehooks));
-			san_unguard_pages_two_sided(tsdn, ehooks, edata,
-			    pac->emap);
+			san_unguard_pages_two_sided(
+			    tsdn, ehooks, edata, pac->emap);
 		}
 		/*
 		 * Deregister first to avoid a race with other allocating
@@ -1177,15 +1186,15 @@ extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	bool zeroed;
 	if (!edata_committed_get(edata)) {
 		zeroed = true;
-	} else if (!extent_decommit_wrapper(tsdn, ehooks, edata, 0,
-	    edata_size_get(edata))) {
+	} else if (!extent_decommit_wrapper(
+	               tsdn, ehooks, edata, 0, edata_size_get(edata))) {
 		zeroed = true;
 	} else if (!ehooks_purge_forced(tsdn, ehooks, edata_base_get(edata),
-	    edata_size_get(edata), 0, edata_size_get(edata))) {
+	               edata_size_get(edata), 0, edata_size_get(edata))) {
 		zeroed = true;
-	} else if (edata_state_get(edata) == extent_state_muzzy ||
-	    !ehooks_purge_lazy(tsdn, ehooks, edata_base_get(edata),
-	    edata_size_get(edata), 0, edata_size_get(edata))) {
+	} else if (edata_state_get(edata) == extent_state_muzzy
+	    || !ehooks_purge_lazy(tsdn, ehooks, edata_base_get(edata),
+	        edata_size_get(edata), 0, edata_size_get(edata))) {
 		zeroed = false;
 	} else {
 		zeroed = false;
@@ -1196,15 +1205,15 @@ extent_dalloc_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 }
 
 void
-extent_destroy_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *edata) {
+extent_destroy_wrapper(
+    tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata) {
 	assert(edata_base_get(edata) != NULL);
 	assert(edata_size_get(edata) != 0);
 	extent_state_t state = edata_state_get(edata);
 	assert(state == extent_state_retained || state == extent_state_active);
 	assert(emap_edata_is_acquired(tsdn, pac->emap, edata));
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
 	if (edata_guarded_get(edata)) {
 		assert(opt_retain);
@@ -1240,8 +1249,8 @@ extent_commit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 static bool
 extent_decommit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 	bool err = ehooks_decommit(tsdn, ehooks, edata_base_get(edata),
 	    edata_size_get(edata), offset, length);
 	edata_committed_set(edata, edata_committed_get(edata) && err);
@@ -1261,8 +1270,8 @@ extent_purge_lazy_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 bool
 extent_purge_lazy_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length) {
-	return extent_purge_lazy_impl(tsdn, ehooks, edata, offset,
-	    length, false);
+	return extent_purge_lazy_impl(
+	    tsdn, ehooks, edata, offset, length, false);
 }
 
 static bool
@@ -1278,8 +1287,8 @@ extent_purge_forced_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 bool
 extent_purge_forced_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length) {
-	return extent_purge_forced_impl(tsdn, ehooks, edata, offset, length,
-	    false);
+	return extent_purge_forced_impl(
+	    tsdn, ehooks, edata, offset, length, false);
 }
 
 /*
@@ -1290,16 +1299,16 @@ extent_purge_forced_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
  * and returns the trail (except in case of error).
  */
 static edata_t *
-extent_split_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *edata, size_t size_a, size_t size_b, bool holding_core_locks) {
+extent_split_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata,
+    size_t size_a, size_t size_b, bool holding_core_locks) {
 	assert(edata_size_get(edata) == size_a + size_b);
 	/* Only the shrink path may split w/o holding core locks. */
 	if (holding_core_locks) {
 		witness_assert_positive_depth_to_rank(
 		    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE);
 	} else {
-		witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-		    WITNESS_RANK_CORE, 0);
+		witness_assert_depth_to_rank(
+		    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 	}
 
 	if (ehooks_split_will_fail(ehooks)) {
@@ -1317,8 +1326,8 @@ extent_split_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	    edata_state_get(edata), edata_zeroed_get(edata),
 	    edata_committed_get(edata), EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 	emap_prepare_t prepare;
-	bool err = emap_split_prepare(tsdn, pac->emap, &prepare, edata,
-	    size_a, trail, size_b);
+	bool           err = emap_split_prepare(
+            tsdn, pac->emap, &prepare, edata, size_a, trail, size_b);
 	if (err) {
 		goto label_error_b;
 	}
@@ -1340,8 +1349,8 @@ extent_split_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 	}
 
 	edata_size_set(edata, size_a);
-	emap_split_commit(tsdn, pac->emap, &prepare, edata, size_a, trail,
-	    size_b);
+	emap_split_commit(
+	    tsdn, pac->emap, &prepare, edata, size_a, trail, size_b);
 
 	return trail;
 label_error_b:
@@ -1353,8 +1362,8 @@ label_error_a:
 edata_t *
 extent_split_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata,
     size_t size_a, size_t size_b, bool holding_core_locks) {
-	return extent_split_impl(tsdn, pac, ehooks, edata, size_a, size_b,
-	    holding_core_locks);
+	return extent_split_impl(
+	    tsdn, pac, ehooks, edata, size_a, size_b, holding_core_locks);
 }
 
 static bool
@@ -1365,8 +1374,8 @@ extent_merge_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *a,
 		witness_assert_positive_depth_to_rank(
 		    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE);
 	} else {
-		witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-		    WITNESS_RANK_CORE, 0);
+		witness_assert_depth_to_rank(
+		    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 	}
 
 	assert(edata_base_get(a) < edata_base_get(b));
@@ -1391,12 +1400,13 @@ extent_merge_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *a,
 	emap_prepare_t prepare;
 	emap_merge_prepare(tsdn, pac->emap, &prepare, a, b);
 
-	assert(edata_state_get(a) == extent_state_active ||
-	    edata_state_get(a) == extent_state_merging);
+	assert(edata_state_get(a) == extent_state_active
+	    || edata_state_get(a) == extent_state_merging);
 	edata_state_set(a, extent_state_active);
 	edata_size_set(a, edata_size_get(a) + edata_size_get(b));
-	edata_sn_set(a, (edata_sn_get(a) < edata_sn_get(b)) ?
-	    edata_sn_get(a) : edata_sn_get(b));
+	edata_sn_set(a,
+	    (edata_sn_get(a) < edata_sn_get(b)) ? edata_sn_get(a)
+	                                        : edata_sn_get(b));
 	edata_zeroed_set(a, edata_zeroed_get(a) && edata_zeroed_get(b));
 
 	emap_merge_commit(tsdn, pac->emap, &prepare, a, b);
@@ -1407,26 +1417,26 @@ extent_merge_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *a,
 }
 
 bool
-extent_merge_wrapper(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
-    edata_t *a, edata_t *b) {
+extent_merge_wrapper(
+    tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *a, edata_t *b) {
 	return extent_merge_impl(tsdn, pac, ehooks, a, b,
 	    /* holding_core_locks */ false);
 }
 
 bool
-extent_commit_zero(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    bool commit, bool zero, bool growing_retained) {
+extent_commit_zero(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, bool commit,
+    bool zero, bool growing_retained) {
 	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
 	    WITNESS_RANK_CORE, growing_retained ? 1 : 0);
 
 	if (commit && !edata_committed_get(edata)) {
 		if (extent_commit_impl(tsdn, ehooks, edata, 0,
-		    edata_size_get(edata), growing_retained)) {
+		        edata_size_get(edata), growing_retained)) {
 			return true;
 		}
 	}
 	if (zero && !edata_zeroed_get(edata)) {
-		void *addr = edata_base_get(edata);
+		void  *addr = edata_base_get(edata);
 		size_t size = edata_size_get(edata);
 		ehooks_zero(tsdn, ehooks, addr, size);
 	}
diff --git a/src/extent_dss.c b/src/extent_dss.c
index 32fb4112..3f7a15d0 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -11,14 +11,10 @@
 /* NOLINTNEXTLINE(performance-no-int-to-ptr) */
 #define SBRK_INVALID ((void *)-1)
 
-const char	*opt_dss = DSS_DEFAULT;
+const char *opt_dss = DSS_DEFAULT;
 
-const char	*const dss_prec_names[] = {
-	"disabled",
-	"primary",
-	"secondary",
-	"N/A"
-};
+const char *const dss_prec_names[] = {
+    "disabled", "primary", "secondary", "N/A"};
 
 /*
  * Current dss precedence default, used when creating new arenas.  NB: This is
@@ -26,17 +22,16 @@ const char	*const dss_prec_names[] = {
  * guarantee that sizeof(dss_prec_t) is the same as sizeof(unsigned), and we use
  * atomic operations to synchronize the setting.
  */
-static atomic_u_t	dss_prec_default = ATOMIC_INIT(
-    (unsigned)DSS_PREC_DEFAULT);
+static atomic_u_t dss_prec_default = ATOMIC_INIT((unsigned)DSS_PREC_DEFAULT);
 
 /* Base address of the DSS. */
-static void		*dss_base;
+static void *dss_base;
 /* Atomic boolean indicating whether a thread is currently extending DSS. */
-static atomic_b_t	dss_extending;
+static atomic_b_t dss_extending;
 /* Atomic boolean indicating whether the DSS is exhausted. */
-static atomic_b_t	dss_exhausted;
+static atomic_b_t dss_exhausted;
 /* Atomic current upper limit on DSS addresses. */
-static atomic_p_t	dss_max;
+static atomic_p_t dss_max;
 
 /******************************************************************************/
 
@@ -76,7 +71,7 @@ extent_dss_extending_start(void) {
 	while (true) {
 		bool expected = false;
 		if (atomic_compare_exchange_weak_b(&dss_extending, &expected,
-		    true, ATOMIC_ACQ_REL, ATOMIC_RELAXED)) {
+		        true, ATOMIC_ACQ_REL, ATOMIC_RELAXED)) {
 			break;
 		}
 		spin_adaptive(&spinner);
@@ -143,24 +138,24 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 				goto label_oom;
 			}
 
-			bool head_state = opt_retain ? EXTENT_IS_HEAD :
-			    EXTENT_NOT_HEAD;
+			bool head_state = opt_retain ? EXTENT_IS_HEAD
+			                             : EXTENT_NOT_HEAD;
 			/*
 			 * Compute how much page-aligned gap space (if any) is
 			 * necessary to satisfy alignment.  This space can be
 			 * recycled for later use.
 			 */
-			void *gap_addr_page = ALIGNMENT_ADDR2CEILING(max_cur,
-			    PAGE);
+			void *gap_addr_page = ALIGNMENT_ADDR2CEILING(
+			    max_cur, PAGE);
 			void *ret = ALIGNMENT_ADDR2CEILING(
 			    gap_addr_page, alignment);
-			size_t gap_size_page = (uintptr_t)ret -
-			    (uintptr_t)gap_addr_page;
+			size_t gap_size_page = (uintptr_t)ret
+			    - (uintptr_t)gap_addr_page;
 			if (gap_size_page != 0) {
 				edata_init(gap, arena_ind_get(arena),
 				    gap_addr_page, gap_size_page, false,
-				    SC_NSIZES, extent_sn_next(
-					&arena->pa_shard.pac),
+				    SC_NSIZES,
+				    extent_sn_next(&arena->pa_shard.pac),
 				    extent_state_active, false, true,
 				    EXTENT_PAI_PAC, head_state);
 			}
@@ -169,25 +164,25 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			 * allocation space.
 			 */
 			void *dss_next = (void *)((byte_t *)ret + size);
-			if ((uintptr_t)ret < (uintptr_t)max_cur ||
-			    (uintptr_t)dss_next < (uintptr_t)max_cur) {
+			if ((uintptr_t)ret < (uintptr_t)max_cur
+			    || (uintptr_t)dss_next < (uintptr_t)max_cur) {
 				goto label_oom; /* Wrap-around. */
 			}
 			/* Compute the increment, including subpage bytes. */
-			void *gap_addr_subpage = max_cur;
-			size_t gap_size_subpage = (uintptr_t)ret -
-			    (uintptr_t)gap_addr_subpage;
+			void  *gap_addr_subpage = max_cur;
+			size_t gap_size_subpage = (uintptr_t)ret
+			    - (uintptr_t)gap_addr_subpage;
 			intptr_t incr = gap_size_subpage + size;
 
-			assert((uintptr_t)max_cur + incr == (uintptr_t)ret +
-			    size);
+			assert(
+			    (uintptr_t)max_cur + incr == (uintptr_t)ret + size);
 
 			/* Try to allocate. */
 			void *dss_prev = extent_dss_sbrk(incr);
 			if (dss_prev == max_cur) {
 				/* Success. */
-				atomic_store_p(&dss_max, dss_next,
-				    ATOMIC_RELEASE);
+				atomic_store_p(
+				    &dss_max, dss_next, ATOMIC_RELEASE);
 				extent_dss_extending_finish();
 
 				if (gap_size_page != 0) {
@@ -203,17 +198,16 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					*commit = pages_decommit(ret, size);
 				}
 				if (*zero && *commit) {
-					edata_t edata = {0};
+					edata_t   edata = {0};
 					ehooks_t *ehooks = arena_get_ehooks(
 					    arena);
 
-					edata_init(&edata,
-					    arena_ind_get(arena), ret, size,
-					    size, false, SC_NSIZES,
+					edata_init(&edata, arena_ind_get(arena),
+					    ret, size, size, false, SC_NSIZES,
 					    extent_state_active, false, true,
 					    EXTENT_PAI_PAC, head_state);
 					if (extent_purge_forced_wrapper(tsdn,
-					    ehooks, &edata, 0, size)) {
+					        ehooks, &edata, 0, size)) {
 						memset(ret, 0, size);
 					}
 				}
@@ -225,8 +219,8 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			 */
 			if (dss_prev == SBRK_INVALID) {
 				/* OOM. */
-				atomic_store_b(&dss_exhausted, true,
-				    ATOMIC_RELEASE);
+				atomic_store_b(
+				    &dss_exhausted, true, ATOMIC_RELEASE);
 				goto label_oom;
 			}
 		}
@@ -239,16 +233,16 @@ label_oom:
 
 static bool
 extent_in_dss_helper(void *addr, void *max) {
-	return ((uintptr_t)addr >= (uintptr_t)dss_base && (uintptr_t)addr <
-	    (uintptr_t)max);
+	return ((uintptr_t)addr >= (uintptr_t)dss_base
+	    && (uintptr_t)addr < (uintptr_t)max);
 }
 
 bool
 extent_in_dss(void *addr) {
 	cassert(have_dss);
 
-	return extent_in_dss_helper(addr, atomic_load_p(&dss_max,
-	    ATOMIC_ACQUIRE));
+	return extent_in_dss_helper(
+	    addr, atomic_load_p(&dss_max, ATOMIC_ACQUIRE));
 }
 
 bool
@@ -257,14 +251,14 @@ extent_dss_mergeable(void *addr_a, void *addr_b) {
 
 	cassert(have_dss);
 
-	if ((uintptr_t)addr_a < (uintptr_t)dss_base && (uintptr_t)addr_b <
-	    (uintptr_t)dss_base) {
+	if ((uintptr_t)addr_a < (uintptr_t)dss_base
+	    && (uintptr_t)addr_b < (uintptr_t)dss_base) {
 		return true;
 	}
 
 	max = atomic_load_p(&dss_max, ATOMIC_ACQUIRE);
-	return (extent_in_dss_helper(addr_a, max) ==
-	    extent_in_dss_helper(addr_b, max));
+	return (extent_in_dss_helper(addr_a, max)
+	    == extent_in_dss_helper(addr_b, max));
 }
 
 void
@@ -273,7 +267,8 @@ extent_dss_boot(void) {
 
 	dss_base = extent_dss_sbrk(0);
 	atomic_store_b(&dss_extending, false, ATOMIC_RELAXED);
-	atomic_store_b(&dss_exhausted, dss_base == SBRK_INVALID, ATOMIC_RELAXED);
+	atomic_store_b(
+	    &dss_exhausted, dss_base == SBRK_INVALID, ATOMIC_RELAXED);
 	atomic_store_p(&dss_max, dss_base, ATOMIC_RELAXED);
 }
 
diff --git a/src/extent_mmap.c b/src/extent_mmap.c
index 5f0ee2d2..d39bddc6 100644
--- a/src/extent_mmap.c
+++ b/src/extent_mmap.c
@@ -7,7 +7,7 @@
 /******************************************************************************/
 /* Data. */
 
-bool	opt_retain =
+bool opt_retain =
 #ifdef JEMALLOC_RETAIN
     true
 #else
@@ -18,8 +18,8 @@ bool	opt_retain =
 /******************************************************************************/
 
 void *
-extent_alloc_mmap(void *new_addr, size_t size, size_t alignment, bool *zero,
-    bool *commit) {
+extent_alloc_mmap(
+    void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit) {
 	assert(alignment == ALIGNMENT_CEILING(alignment, PAGE));
 	void *ret = pages_map(new_addr, size, alignment, commit);
 	if (ret == NULL) {
diff --git a/src/fxp.c b/src/fxp.c
index 96585f0a..faeab207 100644
--- a/src/fxp.c
+++ b/src/fxp.c
@@ -83,8 +83,8 @@ fxp_parse(fxp_t *result, const char *str, char **end) {
 	}
 
 	assert(fractional_part < frac_div);
-	uint32_t fractional_repr = (uint32_t)(
-	    (fractional_part << 16) / frac_div);
+	uint32_t fractional_repr = (uint32_t)((fractional_part << 16)
+	    / frac_div);
 
 	/* Success! */
 	*result = (integer_part << 16) + fractional_repr;
@@ -99,7 +99,7 @@ fxp_print(fxp_t a, char buf[FXP_BUF_SIZE]) {
 	uint32_t integer_part = fxp_round_down(a);
 	uint32_t fractional_part = (a & ((1U << 16) - 1));
 
-	int leading_fraction_zeros = 0;
+	int      leading_fraction_zeros = 0;
 	uint64_t fraction_digits = fractional_part;
 	for (int i = 0; i < FXP_FRACTIONAL_PART_DIGITS; i++) {
 		if (fraction_digits < (1U << 16)
@@ -113,12 +113,12 @@ fxp_print(fxp_t a, char buf[FXP_BUF_SIZE]) {
 		fraction_digits /= 10;
 	}
 
-	size_t printed = malloc_snprintf(buf, FXP_BUF_SIZE, "%"FMTu32".",
-	    integer_part);
+	size_t printed = malloc_snprintf(
+	    buf, FXP_BUF_SIZE, "%" FMTu32 ".", integer_part);
 	for (int i = 0; i < leading_fraction_zeros; i++) {
 		buf[printed] = '0';
 		printed++;
 	}
-	malloc_snprintf(&buf[printed], FXP_BUF_SIZE - printed, "%"FMTu64,
-	    fraction_digits);
+	malloc_snprintf(
+	    &buf[printed], FXP_BUF_SIZE - printed, "%" FMTu64, fraction_digits);
 }
diff --git a/src/hook.c b/src/hook.c
index 77a988d7..4270ad60 100644
--- a/src/hook.c
+++ b/src/hook.c
@@ -9,19 +9,19 @@
 typedef struct hooks_internal_s hooks_internal_t;
 struct hooks_internal_s {
 	hooks_t hooks;
-	bool in_use;
+	bool    in_use;
 };
 
 seq_define(hooks_internal_t, hooks)
 
-static atomic_u_t nhooks = ATOMIC_INIT(0);
-static seq_hooks_t hooks[HOOK_MAX];
+    static atomic_u_t nhooks = ATOMIC_INIT(0);
+static seq_hooks_t    hooks[HOOK_MAX];
 static malloc_mutex_t hooks_mu;
 
 bool
 hook_boot(void) {
-	return malloc_mutex_init(&hooks_mu, "hooks", WITNESS_RANK_HOOK,
-	    malloc_mutex_rank_exclusive);
+	return malloc_mutex_init(
+	    &hooks_mu, "hooks", WITNESS_RANK_HOOK, malloc_mutex_rank_exclusive);
 }
 
 static void *
@@ -84,20 +84,18 @@ hook_remove(tsdn_t *tsdn, void *opaque) {
 	malloc_mutex_unlock(tsdn, &hooks_mu);
 }
 
-#define FOR_EACH_HOOK_BEGIN(hooks_internal_ptr)				\
-for (int for_each_hook_counter = 0;					\
-    for_each_hook_counter < HOOK_MAX;					\
-    for_each_hook_counter++) {						\
-	bool for_each_hook_success = seq_try_load_hooks(		\
-	    (hooks_internal_ptr), &hooks[for_each_hook_counter]);	\
-	if (!for_each_hook_success) {					\
-		continue;						\
-	}								\
-	if (!(hooks_internal_ptr)->in_use) {				\
-		continue;						\
-	}
-#define FOR_EACH_HOOK_END						\
-}
+#define FOR_EACH_HOOK_BEGIN(hooks_internal_ptr)                                \
+	for (int for_each_hook_counter = 0; for_each_hook_counter < HOOK_MAX;  \
+	     for_each_hook_counter++) {                                        \
+		bool for_each_hook_success = seq_try_load_hooks(               \
+		    (hooks_internal_ptr), &hooks[for_each_hook_counter]);      \
+		if (!for_each_hook_success) {                                  \
+			continue;                                              \
+		}                                                              \
+		if (!(hooks_internal_ptr)->in_use) {                           \
+			continue;                                              \
+		}
+#define FOR_EACH_HOOK_END }
 
 static bool *
 hook_reentrantp(void) {
@@ -129,26 +127,25 @@ hook_reentrantp(void) {
 	 * untouched.
 	 */
 	static bool in_hook_global = true;
-	tsdn_t *tsdn = tsdn_fetch();
-	bool *in_hook = tsdn_in_hookp_get(tsdn);
-	if (in_hook!= NULL) {
+	tsdn_t     *tsdn = tsdn_fetch();
+	bool       *in_hook = tsdn_in_hookp_get(tsdn);
+	if (in_hook != NULL) {
 		return in_hook;
 	}
 	return &in_hook_global;
 }
 
-#define HOOK_PROLOGUE							\
-	if (likely(atomic_load_u(&nhooks, ATOMIC_RELAXED) == 0)) {	\
-		return;							\
-	}								\
-	bool *in_hook = hook_reentrantp();				\
-	if (*in_hook) {							\
-		return;							\
-	}								\
+#define HOOK_PROLOGUE                                                          \
+	if (likely(atomic_load_u(&nhooks, ATOMIC_RELAXED) == 0)) {             \
+		return;                                                        \
+	}                                                                      \
+	bool *in_hook = hook_reentrantp();                                     \
+	if (*in_hook) {                                                        \
+		return;                                                        \
+	}                                                                      \
 	*in_hook = true;
 
-#define HOOK_EPILOGUE							\
-	*in_hook = false;
+#define HOOK_EPILOGUE *in_hook = false;
 
 void
 hook_invoke_alloc(hook_alloc_t type, void *result, uintptr_t result_raw,
@@ -157,10 +154,10 @@ hook_invoke_alloc(hook_alloc_t type, void *result, uintptr_t result_raw,
 
 	hooks_internal_t hook;
 	FOR_EACH_HOOK_BEGIN(&hook)
-		hook_alloc h = hook.hooks.alloc_hook;
-		if (h != NULL) {
-			h(hook.hooks.extra, type, result, result_raw, args_raw);
-		}
+	hook_alloc h = hook.hooks.alloc_hook;
+	if (h != NULL) {
+		h(hook.hooks.extra, type, result, result_raw, args_raw);
+	}
 	FOR_EACH_HOOK_END
 
 	HOOK_EPILOGUE
@@ -171,10 +168,10 @@ hook_invoke_dalloc(hook_dalloc_t type, void *address, uintptr_t args_raw[3]) {
 	HOOK_PROLOGUE
 	hooks_internal_t hook;
 	FOR_EACH_HOOK_BEGIN(&hook)
-		hook_dalloc h = hook.hooks.dalloc_hook;
-		if (h != NULL) {
-			h(hook.hooks.extra, type, address, args_raw);
-		}
+	hook_dalloc h = hook.hooks.dalloc_hook;
+	if (h != NULL) {
+		h(hook.hooks.extra, type, address, args_raw);
+	}
 	FOR_EACH_HOOK_END
 	HOOK_EPILOGUE
 }
@@ -185,11 +182,11 @@ hook_invoke_expand(hook_expand_t type, void *address, size_t old_usize,
 	HOOK_PROLOGUE
 	hooks_internal_t hook;
 	FOR_EACH_HOOK_BEGIN(&hook)
-		hook_expand h = hook.hooks.expand_hook;
-		if (h != NULL) {
-			h(hook.hooks.extra, type, address, old_usize, new_usize,
-			    result_raw, args_raw);
-		}
+	hook_expand h = hook.hooks.expand_hook;
+	if (h != NULL) {
+		h(hook.hooks.extra, type, address, old_usize, new_usize,
+		    result_raw, args_raw);
+	}
 	FOR_EACH_HOOK_END
 	HOOK_EPILOGUE
 }
diff --git a/src/hpa.c b/src/hpa.c
index 48e356c6..03668f06 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -12,17 +12,17 @@
 static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t alignment, bool zero, bool guarded, bool frequent_reuse,
     bool *deferred_work_generated);
-static size_t hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t nallocs, edata_list_active_t *results, bool frequent_reuse,
-    bool *deferred_work_generated);
-static bool hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
-static bool hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool *deferred_work_generated);
-static void hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    bool *deferred_work_generated);
-static void hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self,
-    edata_list_active_t *list, bool *deferred_work_generated);
+static size_t   hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size,
+      size_t nallocs, edata_list_active_t *results, bool frequent_reuse,
+      bool *deferred_work_generated);
+static bool     hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+        size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
+static bool     hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+        size_t old_size, size_t new_size, bool *deferred_work_generated);
+static void     hpa_dalloc(
+        tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated);
+static void     hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self,
+        edata_list_active_t *list, bool *deferred_work_generated);
 static uint64_t hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self);
 
 bool
@@ -70,7 +70,8 @@ hpa_do_consistency_checks(hpa_shard_t *shard) {
 }
 
 bool
-hpa_central_init(hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks) {
+hpa_central_init(
+    hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks) {
 	/* malloc_conf processing should have filtered out these cases. */
 	assert(hpa_supported());
 	bool err;
@@ -89,8 +90,8 @@ hpa_central_init(hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks)
 
 static hpdata_t *
 hpa_alloc_ps(tsdn_t *tsdn, hpa_central_t *central) {
-	return (hpdata_t *)base_alloc(tsdn, central->base, sizeof(hpdata_t),
-	    CACHELINE);
+	return (hpdata_t *)base_alloc(
+	    tsdn, central->base, sizeof(hpdata_t), CACHELINE);
 }
 
 static hpdata_t *
@@ -137,8 +138,8 @@ hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
 		 */
 		bool commit = true;
 		/* Allocate address space, bailing if we fail. */
-		void *new_eden = pages_map(NULL, HPA_EDEN_SIZE, HUGEPAGE,
-		    &commit);
+		void *new_eden = pages_map(
+		    NULL, HPA_EDEN_SIZE, HUGEPAGE, &commit);
 		if (new_eden == NULL) {
 			*oom = true;
 			malloc_mutex_unlock(tsdn, &central->grow_mtx);
@@ -243,8 +244,8 @@ hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
  * locking here.
  */
 static void
-hpa_shard_nonderived_stats_accum(hpa_shard_nonderived_stats_t *dst,
-    hpa_shard_nonderived_stats_t *src) {
+hpa_shard_nonderived_stats_accum(
+    hpa_shard_nonderived_stats_t *dst, hpa_shard_nonderived_stats_t *src) {
 	dst->npurge_passes += src->npurge_passes;
 	dst->npurges += src->npurges;
 	dst->nhugifies += src->nhugifies;
@@ -255,13 +256,13 @@ hpa_shard_nonderived_stats_accum(hpa_shard_nonderived_stats_t *dst,
 void
 hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src) {
 	psset_stats_accum(&dst->psset_stats, &src->psset_stats);
-	hpa_shard_nonderived_stats_accum(&dst->nonderived_stats,
-	    &src->nonderived_stats);
+	hpa_shard_nonderived_stats_accum(
+	    &dst->nonderived_stats, &src->nonderived_stats);
 }
 
 void
-hpa_shard_stats_merge(tsdn_t *tsdn, hpa_shard_t *shard,
-    hpa_shard_stats_t *dst) {
+hpa_shard_stats_merge(
+    tsdn_t *tsdn, hpa_shard_t *shard, hpa_shard_stats_t *dst) {
 	hpa_do_consistency_checks(shard);
 
 	malloc_mutex_lock(tsdn, &shard->grow_mtx);
@@ -295,8 +296,8 @@ hpa_ndirty_max(tsdn_t *tsdn, hpa_shard_t *shard) {
 	if (shard->opts.dirty_mult == (fxp_t)-1) {
 		return (size_t)-1;
 	}
-	return fxp_mul_frac(psset_nactive(&shard->psset),
-	    shard->opts.dirty_mult);
+	return fxp_mul_frac(
+	    psset_nactive(&shard->psset), shard->opts.dirty_mult);
 }
 
 static bool
@@ -307,7 +308,8 @@ hpa_hugify_blocked_by_ndirty(tsdn_t *tsdn, hpa_shard_t *shard) {
 		return false;
 	}
 	return hpa_adjusted_ndirty(tsdn, shard)
-	    + hpdata_nretained_get(to_hugify) > hpa_ndirty_max(tsdn, shard);
+	    + hpdata_nretained_get(to_hugify)
+	    > hpa_ndirty_max(tsdn, shard);
 }
 
 static bool
@@ -323,8 +325,8 @@ hpa_should_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 }
 
 static void
-hpa_update_purge_hugify_eligibility(tsdn_t *tsdn, hpa_shard_t *shard,
-    hpdata_t *ps) {
+hpa_update_purge_hugify_eligibility(
+    tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	if (hpdata_changing_state_get(ps)) {
 		hpdata_purge_allowed_set(ps, false);
@@ -397,7 +399,7 @@ hpa_shard_has_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
 #define HPA_PURGE_BATCH_MAX_DEFAULT 16
 
 #ifndef JEMALLOC_JET
-#define HPA_PURGE_BATCH_MAX HPA_PURGE_BATCH_MAX_DEFAULT
+#	define HPA_PURGE_BATCH_MAX HPA_PURGE_BATCH_MAX_DEFAULT
 #else
 size_t hpa_purge_max_batch_size_for_test = HPA_PURGE_BATCH_MAX_DEFAULT;
 size_t
@@ -406,20 +408,21 @@ hpa_purge_max_batch_size_for_test_set(size_t new_size) {
 	hpa_purge_max_batch_size_for_test = new_size;
 	return old_size;
 }
-#define HPA_PURGE_BATCH_MAX hpa_purge_max_batch_size_for_test
+#	define HPA_PURGE_BATCH_MAX hpa_purge_max_batch_size_for_test
 #endif
 
 static inline size_t
 hpa_process_madvise_max_iovec_len(void) {
-	assert(opt_process_madvise_max_batch <=
-		PROCESS_MADVISE_MAX_BATCH_LIMIT);
-	return opt_process_madvise_max_batch == 0 ?
-		HPA_MIN_VAR_VEC_SIZE : opt_process_madvise_max_batch;
+	assert(
+	    opt_process_madvise_max_batch <= PROCESS_MADVISE_MAX_BATCH_LIMIT);
+	return opt_process_madvise_max_batch == 0
+	    ? HPA_MIN_VAR_VEC_SIZE
+	    : opt_process_madvise_max_batch;
 }
 
 static inline void
-hpa_purge_actual_unlocked(hpa_shard_t *shard, hpa_purge_item_t *batch,
-	size_t batch_sz) {
+hpa_purge_actual_unlocked(
+    hpa_shard_t *shard, hpa_purge_item_t *batch, size_t batch_sz) {
 	assert(batch_sz > 0);
 
 	size_t len = hpa_process_madvise_max_iovec_len();
@@ -433,17 +436,18 @@ hpa_purge_actual_unlocked(hpa_shard_t *shard, hpa_purge_item_t *batch,
 
 		/* Actually do the purging, now that the lock is dropped. */
 		if (batch[i].dehugify) {
-			shard->central->hooks.dehugify(hpdata_addr_get(to_purge),
-		    	HUGEPAGE);
+			shard->central->hooks.dehugify(
+			    hpdata_addr_get(to_purge), HUGEPAGE);
 		}
-		void *purge_addr;
+		void  *purge_addr;
 		size_t purge_size;
 		size_t total_purged_on_one_hp = 0;
 		while (hpdata_purge_next(
-				to_purge, &batch[i].state, &purge_addr, &purge_size)) {
+		    to_purge, &batch[i].state, &purge_addr, &purge_size)) {
 			total_purged_on_one_hp += purge_size;
 			assert(total_purged_on_one_hp <= HUGEPAGE);
-			hpa_range_accum_add(&accum, purge_addr, purge_size, shard);
+			hpa_range_accum_add(
+			    &accum, purge_addr, purge_size, shard);
 		}
 	}
 	hpa_range_accum_finish(&accum, shard);
@@ -490,10 +494,10 @@ hpa_purge_start_hp(hpa_purge_batch_t *b, psset_t *psset) {
 	/* Gather all the metadata we'll need during the purge. */
 	hp_item->dehugify = hpdata_huge_get(hp_item->hp);
 	size_t nranges;
-	size_t ndirty =
-		hpdata_purge_begin(hp_item->hp, &hp_item->state, &nranges);
+	size_t ndirty = hpdata_purge_begin(
+	    hp_item->hp, &hp_item->state, &nranges);
 	/* We picked hp to purge, so it should have some dirty ranges */
-	assert(ndirty > 0 && nranges >0);
+	assert(ndirty > 0 && nranges > 0);
 	b->ndirty_in_batch += ndirty;
 	b->nranges += nranges;
 	return ndirty;
@@ -501,8 +505,8 @@ hpa_purge_start_hp(hpa_purge_batch_t *b, psset_t *psset) {
 
 /* Finish purge of one huge page. */
 static inline void
-hpa_purge_finish_hp(tsdn_t *tsdn, hpa_shard_t *shard,
-	hpa_purge_item_t *hp_item) {
+hpa_purge_finish_hp(
+    tsdn_t *tsdn, hpa_shard_t *shard, hpa_purge_item_t *hp_item) {
 	if (hp_item->dehugify) {
 		shard->stats.ndehugifies++;
 	}
@@ -523,9 +527,9 @@ hpa_purge_finish_hp(tsdn_t *tsdn, hpa_shard_t *shard,
 static inline bool
 hpa_batch_full(hpa_purge_batch_t *b) {
 	/* It's okay for ranges to go above */
-	return b->npurged_hp_total == b->max_hp ||
-		b->item_cnt == b->items_capacity ||
-		b->nranges >= b->range_watermark;
+	return b->npurged_hp_total == b->max_hp
+	    || b->item_cnt == b->items_capacity
+	    || b->nranges >= b->range_watermark;
 }
 
 static inline void
@@ -547,23 +551,25 @@ hpa_purge(tsdn_t *tsdn, hpa_shard_t *shard, size_t max_hp) {
 	assert(max_hp > 0);
 
 	assert(HPA_PURGE_BATCH_MAX > 0);
-	assert(HPA_PURGE_BATCH_MAX <
-		(VARIABLE_ARRAY_SIZE_MAX / sizeof(hpa_purge_item_t)));
+	assert(HPA_PURGE_BATCH_MAX
+	    < (VARIABLE_ARRAY_SIZE_MAX / sizeof(hpa_purge_item_t)));
 	VARIABLE_ARRAY(hpa_purge_item_t, items, HPA_PURGE_BATCH_MAX);
 	hpa_purge_batch_t batch = {
-		.max_hp = max_hp,
-		.npurged_hp_total = 0,
-		.items = &items[0],
-		.items_capacity = HPA_PURGE_BATCH_MAX,
-		.range_watermark = hpa_process_madvise_max_iovec_len(),
+	    .max_hp = max_hp,
+	    .npurged_hp_total = 0,
+	    .items = &items[0],
+	    .items_capacity = HPA_PURGE_BATCH_MAX,
+	    .range_watermark = hpa_process_madvise_max_iovec_len(),
 	};
 	assert(batch.range_watermark > 0);
 
 	while (1) {
 		hpa_batch_pass_start(&batch);
 		assert(hpa_batch_empty(&batch));
-		while(!hpa_batch_full(&batch) && hpa_should_purge(tsdn, shard)) {
-			size_t ndirty = hpa_purge_start_hp(&batch, &shard->psset);
+		while (
+		    !hpa_batch_full(&batch) && hpa_should_purge(tsdn, shard)) {
+			size_t ndirty = hpa_purge_start_hp(
+			    &batch, &shard->psset);
 			if (ndirty == 0) {
 				break;
 			}
@@ -582,8 +588,8 @@ hpa_purge(tsdn_t *tsdn, hpa_shard_t *shard, size_t max_hp) {
 		shard->npending_purge -= batch.ndirty_in_batch;
 		shard->stats.npurges += batch.ndirty_in_batch;
 		shard->central->hooks.curtime(&shard->last_purge,
-			/* first_reading */ false);
-		for (size_t i=0; i<batch.item_cnt; ++i) {
+		    /* first_reading */ false);
+		for (size_t i = 0; i < batch.item_cnt; ++i) {
 			hpa_purge_finish_hp(tsdn, shard, &batch.items[i]);
 		}
 	}
@@ -629,8 +635,8 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 
-	bool err = shard->central->hooks.hugify(hpdata_addr_get(to_hugify),
-	    HUGEPAGE, shard->opts.hugify_sync);
+	bool err = shard->central->hooks.hugify(
+	    hpdata_addr_get(to_hugify), HUGEPAGE, shard->opts.hugify_sync);
 
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	shard->stats.nhugifies++;
@@ -669,8 +675,8 @@ hpa_min_purge_interval_passed(tsdn_t *tsdn, hpa_shard_t *shard) {
  * hpa_shard_do_deferred_work() call.
  */
 static void
-hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard,
-    bool forced) {
+hpa_shard_maybe_do_deferred_work(
+    tsdn_t *tsdn, hpa_shard_t *shard, bool forced) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	if (!forced && shard->opts.deferral_allowed) {
 		return;
@@ -704,8 +710,7 @@ hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard,
 		 * of purging algorithm.
 		 */
 		ssize_t max_purge_nhp = shard->opts.experimental_max_purge_nhp;
-		if (max_purge_nhp != -1 &&
-		    max_purges > (size_t)max_purge_nhp) {
+		if (max_purge_nhp != -1 && max_purges > (size_t)max_purge_nhp) {
 			max_purges = max_purge_nhp;
 		}
 
@@ -725,9 +730,9 @@ hpa_shard_maybe_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard,
 }
 
 static edata_t *
-hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
-    bool *oom) {
-	bool err;
+hpa_try_alloc_one_no_grow(
+    tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom) {
+	bool     err;
 	edata_t *edata = edata_cache_fast_get(tsdn, &shard->ecf);
 	if (edata == NULL) {
 		*oom = true;
@@ -754,8 +759,8 @@ hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 	}
 
 	void *addr = hpdata_reserve_alloc(ps, size);
-	edata_init(edata, shard->ind, addr, size, /* slab */ false,
-	    SC_NSIZES, /* sn */ hpdata_age_get(ps), extent_state_active,
+	edata_init(edata, shard->ind, addr, size, /* slab */ false, SC_NSIZES,
+	    /* sn */ hpdata_age_get(ps), extent_state_active,
 	    /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
 	    EXTENT_NOT_HEAD);
 	edata_ps_set(edata, ps);
@@ -768,11 +773,11 @@ hpa_try_alloc_one_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 	 * dropped.  This would force us to deal with a pageslab eviction down
 	 * the error pathway, which is a pain.
 	 */
-	err = emap_register_boundary(tsdn, shard->emap, edata,
-	    SC_NSIZES, /* slab */ false);
+	err = emap_register_boundary(
+	    tsdn, shard->emap, edata, SC_NSIZES, /* slab */ false);
 	if (err) {
-		hpdata_unreserve(ps, edata_addr_get(edata),
-		    edata_size_get(edata));
+		hpdata_unreserve(
+		    ps, edata_addr_get(edata), edata_size_get(edata));
 		/*
 		 * We should arguably reset dirty state here, but this would
 		 * require some sort of prepare + commit functionality that's a
@@ -800,8 +805,8 @@ hpa_try_alloc_batch_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	size_t nsuccess = 0;
 	for (; nsuccess < nallocs; nsuccess++) {
-		edata_t *edata = hpa_try_alloc_one_no_grow(tsdn, shard, size,
-		    oom);
+		edata_t *edata = hpa_try_alloc_one_no_grow(
+		    tsdn, shard, size, oom);
 		if (edata == NULL) {
 			break;
 		}
@@ -819,12 +824,11 @@ hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
     size_t nallocs, edata_list_active_t *results,
     bool *deferred_work_generated) {
 	assert(size <= HUGEPAGE);
-	assert(size <= shard->opts.slab_max_alloc ||
-	    size == sz_s2u(size));
+	assert(size <= shard->opts.slab_max_alloc || size == sz_s2u(size));
 	bool oom = false;
 
-	size_t nsuccess = hpa_try_alloc_batch_no_grow(tsdn, shard, size, &oom,
-	    nallocs, results, deferred_work_generated);
+	size_t nsuccess = hpa_try_alloc_batch_no_grow(
+	    tsdn, shard, size, &oom, nallocs, results, deferred_work_generated);
 
 	if (nsuccess == nallocs || oom) {
 		return nsuccess;
@@ -851,8 +855,8 @@ hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 	 * deallocations (and allocations of smaller sizes) may still succeed
 	 * while we're doing this potentially expensive system call.
 	 */
-	hpdata_t *ps = hpa_central_extract(tsdn, shard->central, size,
-	    shard->age_counter++, &oom);
+	hpdata_t *ps = hpa_central_extract(
+	    tsdn, shard->central, size, shard->age_counter++, &oom);
 	if (ps == NULL) {
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 		return nsuccess;
@@ -894,8 +898,8 @@ hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
     bool *deferred_work_generated) {
 	assert(nallocs > 0);
 	assert((size & PAGE_MASK) == 0);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 	hpa_shard_t *shard = hpa_from_pai(self);
 
 	/*
@@ -908,16 +912,16 @@ hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
 	 * huge page size).  These requests do not concern internal
 	 * fragmentation with huge pages (again, the full size will be used).
 	 */
-	if (!(frequent_reuse && size <= HUGEPAGE) &&
-	    (size > shard->opts.slab_max_alloc)) {
+	if (!(frequent_reuse && size <= HUGEPAGE)
+	    && (size > shard->opts.slab_max_alloc)) {
 		return 0;
 	}
 
-	size_t nsuccess = hpa_alloc_batch_psset(tsdn, shard, size, nallocs,
-	    results, deferred_work_generated);
+	size_t nsuccess = hpa_alloc_batch_psset(
+	    tsdn, shard, size, nallocs, results, deferred_work_generated);
 
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
 	/*
 	 * Guard the sanity checks with config_debug because the loop cannot be
@@ -926,13 +930,13 @@ hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
 	 */
 	if (config_debug) {
 		edata_t *edata;
-		ql_foreach(edata, &results->head, ql_link_active) {
+		ql_foreach (edata, &results->head, ql_link_active) {
 			emap_assert_mapped(tsdn, shard->emap, edata);
 			assert(edata_pai_get(edata) == EXTENT_PAI_HPA);
 			assert(edata_state_get(edata) == extent_state_active);
 			assert(edata_arena_ind_get(edata) == shard->ind);
-			assert(edata_szind_get_maybe_invalid(edata) ==
-			    SC_NSIZES);
+			assert(
+			    edata_szind_get_maybe_invalid(edata) == SC_NSIZES);
 			assert(!edata_slab_get(edata));
 			assert(edata_committed_get(edata));
 			assert(edata_base_get(edata) == edata_addr_get(edata));
@@ -947,8 +951,8 @@ hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
     bool guarded, bool frequent_reuse, bool *deferred_work_generated) {
 	assert((size & PAGE_MASK) == 0);
 	assert(!guarded);
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 
 	/* We don't handle alignment or zeroing for now. */
 	if (alignment > PAGE || zero) {
@@ -975,8 +979,8 @@ hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 }
 
 static bool
-hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool *deferred_work_generated) {
+hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
+    size_t new_size, bool *deferred_work_generated) {
 	/* Shrink not yet supported. */
 	return true;
 }
@@ -1021,7 +1025,7 @@ hpa_dalloc_locked(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) {
 	hpdata_t *ps = edata_ps_get(edata);
 	/* Currently, all edatas come from pageslabs. */
 	assert(ps != NULL);
-	void *unreserve_addr = edata_addr_get(edata);
+	void  *unreserve_addr = edata_addr_get(edata);
 	size_t unreserve_size = edata_size_get(edata);
 	edata_cache_fast_put(tsdn, &shard->ecf, edata);
 
@@ -1037,7 +1041,7 @@ hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list,
 	hpa_shard_t *shard = hpa_from_pai(self);
 
 	edata_t *edata;
-	ql_foreach(edata, &list->head, ql_link_active) {
+	ql_foreach (edata, &list->head, ql_link_active) {
 		hpa_dalloc_prepare_unlocked(tsdn, shard, edata);
 	}
 
@@ -1048,15 +1052,14 @@ hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list,
 		hpa_dalloc_locked(tsdn, shard, edata);
 	}
 	hpa_shard_maybe_do_deferred_work(tsdn, shard, /* forced */ false);
-	*deferred_work_generated =
-	    hpa_shard_has_deferred_work(tsdn, shard);
+	*deferred_work_generated = hpa_shard_has_deferred_work(tsdn, shard);
 
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 }
 
 static void
-hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    bool *deferred_work_generated) {
+hpa_dalloc(
+    tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated) {
 	assert(!edata_guarded_get(edata));
 	/* Just a dalloc_batch of size 1; this lets us share logic. */
 	edata_list_active_t dalloc_list;
@@ -1072,14 +1075,14 @@ hpa_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 static uint64_t
 hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
 	hpa_shard_t *shard = hpa_from_pai(self);
-	uint64_t time_ns = BACKGROUND_THREAD_DEFERRED_MAX;
+	uint64_t     time_ns = BACKGROUND_THREAD_DEFERRED_MAX;
 
 	malloc_mutex_lock(tsdn, &shard->mtx);
 
 	hpdata_t *to_hugify = psset_pick_hugify(&shard->psset);
 	if (to_hugify != NULL) {
-		nstime_t time_hugify_allowed =
-		    hpdata_time_hugify_allowed(to_hugify);
+		nstime_t time_hugify_allowed = hpdata_time_hugify_allowed(
+		    to_hugify);
 		uint64_t since_hugify_allowed_ms =
 		    shard->central->hooks.ms_since(&time_hugify_allowed);
 		/*
@@ -1087,8 +1090,8 @@ hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
 		 * sleep for the rest.
 		 */
 		if (since_hugify_allowed_ms < shard->opts.hugify_delay_ms) {
-			time_ns = shard->opts.hugify_delay_ms -
-			    since_hugify_allowed_ms;
+			time_ns = shard->opts.hugify_delay_ms
+			    - since_hugify_allowed_ms;
 			time_ns *= 1000 * 1000;
 		} else {
 			malloc_mutex_unlock(tsdn, &shard->mtx);
@@ -1110,8 +1113,8 @@ hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
 
 		if (since_last_purge_ms < shard->opts.min_purge_interval_ms) {
 			uint64_t until_purge_ns;
-			until_purge_ns = shard->opts.min_purge_interval_ms -
-			    since_last_purge_ms;
+			until_purge_ns = shard->opts.min_purge_interval_ms
+			    - since_last_purge_ms;
 			until_purge_ns *= 1000 * 1000;
 
 			if (until_purge_ns < time_ns) {
@@ -1176,8 +1179,8 @@ hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
 }
 
 void
-hpa_shard_set_deferral_allowed(tsdn_t *tsdn, hpa_shard_t *shard,
-    bool deferral_allowed) {
+hpa_shard_set_deferral_allowed(
+    tsdn_t *tsdn, hpa_shard_t *shard, bool deferral_allowed) {
 	hpa_do_consistency_checks(shard);
 
 	malloc_mutex_lock(tsdn, &shard->mtx);
diff --git a/src/hpa_hooks.c b/src/hpa_hooks.c
index 072d490e..45bebe41 100644
--- a/src/hpa_hooks.c
+++ b/src/hpa_hooks.c
@@ -3,26 +3,18 @@
 
 #include "jemalloc/internal/hpa_hooks.h"
 
-static void *hpa_hooks_map(size_t size);
-static void hpa_hooks_unmap(void *ptr, size_t size);
-static void hpa_hooks_purge(void *ptr, size_t size);
-static bool hpa_hooks_hugify(void *ptr, size_t size, bool sync);
-static void hpa_hooks_dehugify(void *ptr, size_t size);
-static void hpa_hooks_curtime(nstime_t *r_nstime, bool first_reading);
+static void    *hpa_hooks_map(size_t size);
+static void     hpa_hooks_unmap(void *ptr, size_t size);
+static void     hpa_hooks_purge(void *ptr, size_t size);
+static bool     hpa_hooks_hugify(void *ptr, size_t size, bool sync);
+static void     hpa_hooks_dehugify(void *ptr, size_t size);
+static void     hpa_hooks_curtime(nstime_t *r_nstime, bool first_reading);
 static uint64_t hpa_hooks_ms_since(nstime_t *past_nstime);
-static bool hpa_hooks_vectorized_purge(
-	void *vec, size_t vlen, size_t nbytes);
+static bool hpa_hooks_vectorized_purge(void *vec, size_t vlen, size_t nbytes);
 
-const hpa_hooks_t hpa_hooks_default = {
-	&hpa_hooks_map,
-	&hpa_hooks_unmap,
-	&hpa_hooks_purge,
-	&hpa_hooks_hugify,
-	&hpa_hooks_dehugify,
-	&hpa_hooks_curtime,
-	&hpa_hooks_ms_since,
-	&hpa_hooks_vectorized_purge
-};
+const hpa_hooks_t hpa_hooks_default = {&hpa_hooks_map, &hpa_hooks_unmap,
+    &hpa_hooks_purge, &hpa_hooks_hugify, &hpa_hooks_dehugify,
+    &hpa_hooks_curtime, &hpa_hooks_ms_since, &hpa_hooks_vectorized_purge};
 
 static void *
 hpa_hooks_map(size_t size) {
@@ -82,13 +74,12 @@ hpa_hooks_ms_since(nstime_t *past_nstime) {
 	return nstime_ms_since(past_nstime);
 }
 
-
 /* Return true if we did not purge all nbytes, or on some error */
 static bool
 hpa_hooks_vectorized_purge(void *vec, size_t vlen, size_t nbytes) {
 #ifdef JEMALLOC_HAVE_PROCESS_MADVISE
-    return pages_purge_process_madvise(vec, vlen, nbytes);
+	return pages_purge_process_madvise(vec, vlen, nbytes);
 #else
-    return true;
+	return true;
 #endif
 }
diff --git a/src/hpdata.c b/src/hpdata.c
index f3e347c4..9d324952 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -17,8 +17,7 @@ hpdata_age_comp(const hpdata_t *a, const hpdata_t *b) {
 
 ph_gen(, hpdata_age_heap, hpdata_t, age_link, hpdata_age_comp)
 
-void
-hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
+    void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 	hpdata_addr_set(hpdata, addr);
 	hpdata_age_set(hpdata, age);
 	hpdata->h_huge = false;
@@ -66,8 +65,8 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 
 	size_t largest_unchosen_range = 0;
 	while (true) {
-		bool found = fb_urange_iter(hpdata->active_pages,
-		    HUGEPAGE_PAGES, start, &begin, &len);
+		bool found = fb_urange_iter(
+		    hpdata->active_pages, HUGEPAGE_PAGES, start, &begin, &len);
 		/*
 		 * A precondition to this function is that hpdata must be able
 		 * to serve the allocation.
@@ -97,8 +96,8 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 	 * We might be about to dirty some memory for the first time; update our
 	 * count if so.
 	 */
-	size_t new_dirty = fb_ucount(hpdata->touched_pages,  HUGEPAGE_PAGES,
-	    result, npages);
+	size_t new_dirty = fb_ucount(
+	    hpdata->touched_pages, HUGEPAGE_PAGES, result, npages);
 	fb_set_range(hpdata->touched_pages, HUGEPAGE_PAGES, result, npages);
 	hpdata->h_ntouched += new_dirty;
 
@@ -129,8 +128,8 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 	}
 
 	hpdata_assert_consistent(hpdata);
-	return (void *)(
-	    (byte_t *)hpdata_addr_get(hpdata) + (result << LG_PAGE));
+	return (
+	    void *)((byte_t *)hpdata_addr_get(hpdata) + (result << LG_PAGE));
 }
 
 void
@@ -148,10 +147,10 @@ hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
 
 	fb_unset_range(hpdata->active_pages, HUGEPAGE_PAGES, begin, npages);
 	/* We might have just created a new, larger range. */
-	size_t new_begin = (fb_fls(hpdata->active_pages, HUGEPAGE_PAGES,
-	    begin) + 1);
-	size_t new_end = fb_ffs(hpdata->active_pages, HUGEPAGE_PAGES,
-	    begin + npages - 1);
+	size_t new_begin = (fb_fls(hpdata->active_pages, HUGEPAGE_PAGES, begin)
+	    + 1);
+	size_t new_end = fb_ffs(
+	    hpdata->active_pages, HUGEPAGE_PAGES, begin + npages - 1);
 	size_t new_range_len = new_end - new_begin;
 
 	if (new_range_len > old_longest_range) {
@@ -164,8 +163,8 @@ hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
 }
 
 size_t
-hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
-	size_t *nranges) {
+hpdata_purge_begin(
+    hpdata_t *hpdata, hpdata_purge_state_t *purge_state, size_t *nranges) {
 	hpdata_assert_consistent(hpdata);
 	/*
 	 * See the comment below; we might purge any inactive extent, so it's
@@ -212,29 +211,29 @@ hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
 	fb_group_t dirty_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
 	fb_init(dirty_pages, HUGEPAGE_PAGES);
 	fb_bit_not(dirty_pages, hpdata->active_pages, HUGEPAGE_PAGES);
-	fb_bit_and(dirty_pages, dirty_pages, hpdata->touched_pages,
-	    HUGEPAGE_PAGES);
+	fb_bit_and(
+	    dirty_pages, dirty_pages, hpdata->touched_pages, HUGEPAGE_PAGES);
 
 	fb_init(purge_state->to_purge, HUGEPAGE_PAGES);
 	size_t next_bit = 0;
 	*nranges = 0;
 	while (next_bit < HUGEPAGE_PAGES) {
-		size_t next_dirty = fb_ffs(dirty_pages, HUGEPAGE_PAGES,
-		    next_bit);
+		size_t next_dirty = fb_ffs(
+		    dirty_pages, HUGEPAGE_PAGES, next_bit);
 		/* Recall that fb_ffs returns nbits if no set bit is found. */
 		if (next_dirty == HUGEPAGE_PAGES) {
 			break;
 		}
-		size_t next_active = fb_ffs(hpdata->active_pages,
-		    HUGEPAGE_PAGES, next_dirty);
+		size_t next_active = fb_ffs(
+		    hpdata->active_pages, HUGEPAGE_PAGES, next_dirty);
 		/*
 		 * Don't purge past the end of the dirty extent, into retained
 		 * pages.  This helps the kernel a tiny bit, but honestly it's
 		 * mostly helpful for testing (where we tend to write test cases
 		 * that think in terms of the dirty ranges).
 		 */
-		ssize_t last_dirty = fb_fls(dirty_pages, HUGEPAGE_PAGES,
-		    next_active - 1);
+		ssize_t last_dirty = fb_fls(
+		    dirty_pages, HUGEPAGE_PAGES, next_active - 1);
 		assert(last_dirty >= 0);
 		assert((size_t)last_dirty >= next_dirty);
 		assert((size_t)last_dirty - next_dirty + 1 <= HUGEPAGE_PAGES);
@@ -249,9 +248,9 @@ hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
 	size_t ndirty = hpdata->h_ntouched - hpdata->h_nactive;
 	purge_state->ndirty_to_purge = ndirty;
 	assert(ndirty <= fb_scount(
-	    purge_state->to_purge, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
-	assert(ndirty == fb_scount(dirty_pages, HUGEPAGE_PAGES, 0,
-	    HUGEPAGE_PAGES));
+	           purge_state->to_purge, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
+	assert(ndirty
+	    == fb_scount(dirty_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
 	assert(*nranges <= ndirty);
 	assert(ndirty == 0 || *nranges > 0);
 
@@ -281,8 +280,8 @@ hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state,
 		return false;
 	}
 
-	*r_purge_addr = (void *)(
-	    (byte_t *)hpdata_addr_get(hpdata) + purge_begin * PAGE);
+	*r_purge_addr = (void *)((byte_t *)hpdata_addr_get(hpdata)
+	    + purge_begin * PAGE);
 	*r_purge_size = purge_len * PAGE;
 
 	purge_state->next_purge_search_begin = purge_begin + purge_len;
@@ -299,12 +298,13 @@ hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) {
 	/* See the comment in reserve. */
 	assert(!hpdata->h_in_psset || hpdata->h_updating);
 
-	assert(purge_state->npurged == fb_scount(purge_state->to_purge,
-	    HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
+	assert(purge_state->npurged
+	    == fb_scount(
+	        purge_state->to_purge, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES));
 	assert(purge_state->npurged >= purge_state->ndirty_to_purge);
 
-	fb_bit_not(purge_state->to_purge, purge_state->to_purge,
-	    HUGEPAGE_PAGES);
+	fb_bit_not(
+	    purge_state->to_purge, purge_state->to_purge, HUGEPAGE_PAGES);
 	fb_bit_and(hpdata->touched_pages, hpdata->touched_pages,
 	    purge_state->to_purge, HUGEPAGE_PAGES);
 	assert(hpdata->h_ntouched >= purge_state->ndirty_to_purge);
diff --git a/src/inspect.c b/src/inspect.c
index 2575b5c1..116e77a1 100644
--- a/src/inspect.c
+++ b/src/inspect.c
@@ -3,8 +3,8 @@
 #include "jemalloc/internal/inspect.h"
 
 void
-inspect_extent_util_stats_get(tsdn_t *tsdn, const void *ptr, size_t *nfree,
-    size_t *nregs, size_t *size) {
+inspect_extent_util_stats_get(
+    tsdn_t *tsdn, const void *ptr, size_t *nfree, size_t *nregs, size_t *size) {
 	assert(ptr != NULL && nfree != NULL && nregs != NULL && size != NULL);
 
 	const edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
@@ -57,7 +57,7 @@ inspect_extent_util_stats_verbose_get(tsdn_t *tsdn, const void *ptr,
 	    &arenas[edata_arena_ind_get(edata)], ATOMIC_RELAXED);
 	assert(arena != NULL);
 	const unsigned binshard = edata_binshard_get(edata);
-	bin_t *bin = arena_get_bin(arena, szind, binshard);
+	bin_t         *bin = arena_get_bin(arena, szind, binshard);
 
 	malloc_mutex_lock(tsdn, &bin->lock);
 	if (config_stats) {
diff --git a/src/jemalloc.c b/src/jemalloc.c
index c6621a79..876c49e8 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -29,11 +29,11 @@
 /* Data. */
 
 /* Runtime configuration options. */
-const char	*je_malloc_conf
+const char *je_malloc_conf
 #ifndef _WIN32
     JEMALLOC_ATTR(weak)
 #endif
-    ;
+        ;
 /*
  * The usual rule is that the closer to runtime you are, the higher priority
  * your configuration settings are (so the jemalloc config options get lower
@@ -51,23 +51,23 @@ const char	*je_malloc_conf
  * We don't actually want this to be widespread, so we'll give it a silly name
  * and not mention it in headers or documentation.
  */
-const char	*je_malloc_conf_2_conf_harder
+const char *je_malloc_conf_2_conf_harder
 #ifndef _WIN32
     JEMALLOC_ATTR(weak)
 #endif
-    ;
+        ;
 
 const char *opt_malloc_conf_symlink = NULL;
 const char *opt_malloc_conf_env_var = NULL;
 
-bool	opt_abort =
+bool opt_abort =
 #ifdef JEMALLOC_DEBUG
     true
 #else
     false
 #endif
     ;
-bool	opt_abort_conf =
+bool opt_abort_conf =
 #ifdef JEMALLOC_DEBUG
     true
 #else
@@ -75,29 +75,29 @@ bool	opt_abort_conf =
 #endif
     ;
 /* Intentionally default off, even with debug builds. */
-bool	opt_confirm_conf = false;
-const char	*opt_junk =
+bool        opt_confirm_conf = false;
+const char *opt_junk =
 #if (defined(JEMALLOC_DEBUG) && defined(JEMALLOC_FILL))
     "true"
 #else
     "false"
 #endif
     ;
-bool	opt_junk_alloc =
+bool opt_junk_alloc =
 #if (defined(JEMALLOC_DEBUG) && defined(JEMALLOC_FILL))
     true
 #else
     false
 #endif
     ;
-bool	opt_junk_free =
+bool opt_junk_free =
 #if (defined(JEMALLOC_DEBUG) && defined(JEMALLOC_FILL))
     true
 #else
     false
 #endif
     ;
-bool	opt_trust_madvise =
+bool opt_trust_madvise =
 #ifdef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS
     false
 #else
@@ -131,9 +131,9 @@ atomic_zu_t zero_realloc_count = ATOMIC_INIT(0);
 bool opt_disable_large_size_classes = true;
 
 const char *const zero_realloc_mode_names[] = {
-	"alloc",
-	"free",
-	"abort",
+    "alloc",
+    "free",
+    "abort",
 };
 
 /*
@@ -143,27 +143,31 @@ const char *const zero_realloc_mode_names[] = {
 static const uint8_t junk_alloc_byte = 0xa5;
 static const uint8_t junk_free_byte = 0x5a;
 
-static void default_junk_alloc(void *ptr, size_t usize) {
+static void
+default_junk_alloc(void *ptr, size_t usize) {
 	memset(ptr, junk_alloc_byte, usize);
 }
 
-static void default_junk_free(void *ptr, size_t usize) {
+static void
+default_junk_free(void *ptr, size_t usize) {
 	memset(ptr, junk_free_byte, usize);
 }
 
-void (*JET_MUTABLE junk_alloc_callback)(void *ptr, size_t size) = &default_junk_alloc;
-void (*JET_MUTABLE junk_free_callback)(void *ptr, size_t size) = &default_junk_free;
+void (*JET_MUTABLE junk_alloc_callback)(
+    void *ptr, size_t size) = &default_junk_alloc;
+void (*JET_MUTABLE junk_free_callback)(
+    void *ptr, size_t size) = &default_junk_free;
 void (*JET_MUTABLE invalid_conf_abort)(void) = &abort;
 
-bool	opt_utrace = false;
-bool	opt_xmalloc = false;
-bool	opt_experimental_infallible_new = false;
-bool	opt_experimental_tcache_gc = true;
-bool	opt_zero = false;
-unsigned	opt_narenas = 0;
-static fxp_t		opt_narenas_ratio = FXP_INIT_INT(4);
+bool         opt_utrace = false;
+bool         opt_xmalloc = false;
+bool         opt_experimental_infallible_new = false;
+bool         opt_experimental_tcache_gc = true;
+bool         opt_zero = false;
+unsigned     opt_narenas = 0;
+static fxp_t opt_narenas_ratio = FXP_INIT_INT(4);
 
-unsigned	ncpus;
+unsigned ncpus;
 
 unsigned opt_debug_double_free_max_scan =
     SAFETY_CHECK_DOUBLE_FREE_MAX_SCAN_DEFAULT;
@@ -175,9 +179,9 @@ size_t opt_calloc_madvise_threshold =
 static malloc_mutex_t arenas_lock;
 
 /* The global hpa, and whether it's on. */
-bool opt_hpa = false;
+bool             opt_hpa = false;
 hpa_shard_opts_t opt_hpa_opts = HPA_SHARD_OPTS_DEFAULT;
-sec_opts_t opt_hpa_sec_opts = SEC_OPTS_DEFAULT;
+sec_opts_t       opt_hpa_sec_opts = SEC_OPTS_DEFAULT;
 
 /*
  * Arenas that are used to service external requests.  Not all elements of the
@@ -190,48 +194,48 @@ sec_opts_t opt_hpa_sec_opts = SEC_OPTS_DEFAULT;
  * Points to an arena_t.
  */
 JEMALLOC_ALIGNED(CACHELINE)
-atomic_p_t		arenas[MALLOCX_ARENA_LIMIT];
-static atomic_u_t	narenas_total; /* Use narenas_total_*(). */
+atomic_p_t        arenas[MALLOCX_ARENA_LIMIT];
+static atomic_u_t narenas_total; /* Use narenas_total_*(). */
 /* Below three are read-only after initialization. */
-static arena_t		*a0; /* arenas[0]. */
-unsigned		narenas_auto;
-unsigned		manual_arena_base;
+static arena_t *a0; /* arenas[0]. */
+unsigned        narenas_auto;
+unsigned        manual_arena_base;
 
 malloc_init_t malloc_init_state = malloc_init_uninitialized;
 
 /* False should be the common case.  Set to true to trigger initialization. */
-bool			malloc_slow = true;
+bool malloc_slow = true;
 
 /* When malloc_slow is true, set the corresponding bits for sanity check. */
 enum {
-	flag_opt_junk_alloc	= (1U),
-	flag_opt_junk_free	= (1U << 1),
-	flag_opt_zero		= (1U << 2),
-	flag_opt_utrace		= (1U << 3),
-	flag_opt_xmalloc	= (1U << 4)
+	flag_opt_junk_alloc = (1U),
+	flag_opt_junk_free = (1U << 1),
+	flag_opt_zero = (1U << 2),
+	flag_opt_utrace = (1U << 3),
+	flag_opt_xmalloc = (1U << 4)
 };
-static uint8_t	malloc_slow_flags;
+static uint8_t malloc_slow_flags;
 
 #ifdef JEMALLOC_THREADED_INIT
 /* Used to let the initializing thread recursively allocate. */
-#  define NO_INITIALIZER	((unsigned long)0)
-#  define INITIALIZER		pthread_self()
-#  define IS_INITIALIZER	(malloc_initializer == pthread_self())
-static pthread_t		malloc_initializer = NO_INITIALIZER;
+#	define NO_INITIALIZER ((unsigned long)0)
+#	define INITIALIZER pthread_self()
+#	define IS_INITIALIZER (malloc_initializer == pthread_self())
+static pthread_t malloc_initializer = NO_INITIALIZER;
 #else
-#  define NO_INITIALIZER	false
-#  define INITIALIZER		true
-#  define IS_INITIALIZER	malloc_initializer
-static bool			malloc_initializer = NO_INITIALIZER;
+#	define NO_INITIALIZER false
+#	define INITIALIZER true
+#	define IS_INITIALIZER malloc_initializer
+static bool malloc_initializer = NO_INITIALIZER;
 #endif
 
 /* Used to avoid initialization races. */
 #ifdef _WIN32
-#if _WIN32_WINNT >= 0x0600
-static malloc_mutex_t	init_lock = SRWLOCK_INIT;
-#else
-static malloc_mutex_t	init_lock;
-static bool init_lock_initialized = false;
+#	if _WIN32_WINNT >= 0x0600
+static malloc_mutex_t init_lock = SRWLOCK_INIT;
+#	else
+static malloc_mutex_t init_lock;
+static bool           init_lock_initialized = false;
 
 JEMALLOC_ATTR(constructor)
 static void WINAPI
@@ -253,36 +257,38 @@ _init_init_lock(void) {
 	init_lock_initialized = true;
 }
 
-#ifdef _MSC_VER
-#  pragma section(".CRT$XCU", read)
-JEMALLOC_SECTION(".CRT$XCU") JEMALLOC_ATTR(used)
-static const void (WINAPI *init_init_lock)(void) = _init_init_lock;
-#endif
-#endif
+#		ifdef _MSC_VER
+#			pragma section(".CRT$XCU", read)
+JEMALLOC_SECTION(".CRT$XCU")
+JEMALLOC_ATTR(used) static const
+    void(WINAPI *init_init_lock)(void) = _init_init_lock;
+#		endif
+#	endif
 #else
-static malloc_mutex_t	init_lock = MALLOC_MUTEX_INITIALIZER;
+static malloc_mutex_t init_lock = MALLOC_MUTEX_INITIALIZER;
 #endif
 
 typedef struct {
-	void	*p;	/* Input pointer (as in realloc(p, s)). */
-	size_t	s;	/* Request size. */
-	void	*r;	/* Result pointer. */
+	void  *p; /* Input pointer (as in realloc(p, s)). */
+	size_t s; /* Request size. */
+	void  *r; /* Result pointer. */
 } malloc_utrace_t;
 
 #ifdef JEMALLOC_UTRACE
-#  define UTRACE(a, b, c) do {						\
-	if (unlikely(opt_utrace)) {					\
-		int utrace_serrno = errno;				\
-		malloc_utrace_t ut;					\
-		ut.p = (a);						\
-		ut.s = (b);						\
-		ut.r = (c);						\
-		UTRACE_CALL(&ut, sizeof(ut));				\
-		errno = utrace_serrno;					\
-	}								\
-} while (0)
+#	define UTRACE(a, b, c)                                                \
+		do {                                                           \
+			if (unlikely(opt_utrace)) {                            \
+				int             utrace_serrno = errno;         \
+				malloc_utrace_t ut;                            \
+				ut.p = (a);                                    \
+				ut.s = (b);                                    \
+				ut.r = (c);                                    \
+				UTRACE_CALL(&ut, sizeof(ut));                  \
+				errno = utrace_serrno;                         \
+			}                                                      \
+		} while (0)
 #else
-#  define UTRACE(a, b, c)
+#	define UTRACE(a, b, c)
 #endif
 
 /* Whether encountered any invalid config options. */
@@ -294,8 +300,8 @@ static bool had_conf_error = false;
  * definition.
  */
 
-static bool	malloc_init_hard_a0(void);
-static bool	malloc_init_hard(void);
+static bool malloc_init_hard_a0(void);
+static bool malloc_init_hard(void);
 
 /******************************************************************************/
 /*
@@ -442,8 +448,10 @@ arena_new_create_background_thread(tsdn_t *tsdn, unsigned ind) {
 
 	if (have_background_thread) {
 		if (background_thread_create(tsdn_tsd(tsdn), ind)) {
-			malloc_printf("<jemalloc>: error in background thread "
-				      "creation for arena %u. Abort.\n", ind);
+			malloc_printf(
+			    "<jemalloc>: error in background thread "
+			    "creation for arena %u. Abort.\n",
+			    ind);
 			abort();
 		}
 	}
@@ -479,8 +487,8 @@ arena_bind(tsd_t *tsd, unsigned ind, bool internal) {
 		    &arena->binshard_next, 1, ATOMIC_RELAXED);
 		tsd_binshards_t *bins = tsd_binshardsp_get(tsd);
 		for (unsigned i = 0; i < SC_NBINS; i++) {
-			assert(bin_infos[i].n_shards > 0 &&
-			    bin_infos[i].n_shards <= BIN_SHARDS_MAX);
+			assert(bin_infos[i].n_shards > 0
+			    && bin_infos[i].n_shards <= BIN_SHARDS_MAX);
 			bins->binshard[i] = shard % bin_infos[i].n_shards;
 		}
 	}
@@ -495,8 +503,8 @@ arena_migrate(tsd_t *tsd, arena_t *oldarena, arena_t *newarena) {
 	arena_nthreads_inc(newarena, false);
 	tsd_arena_set(tsd, newarena);
 
-	if (arena_nthreads_get(oldarena, false) == 0 &&
-	    !background_thread_enabled()) {
+	if (arena_nthreads_get(oldarena, false) == 0
+	    && !background_thread_enabled()) {
 		/*
 		 * Purge if the old arena has no associated threads anymore and
 		 * no background threads.
@@ -537,7 +545,7 @@ arena_choose_hard(tsd_t *tsd, bool internal) {
 
 	if (narenas_auto > 1) {
 		unsigned i, j, choose[2], first_null;
-		bool is_new_arena[2];
+		bool     is_new_arena[2];
 
 		/*
 		 * Determine binding for both non-internal and internal
@@ -562,11 +570,14 @@ arena_choose_hard(tsd_t *tsd, bool internal) {
 				 * number of threads assigned to it.
 				 */
 				for (j = 0; j < 2; j++) {
-					if (arena_nthreads_get(arena_get(
-					    tsd_tsdn(tsd), i, false), !!j) <
-					    arena_nthreads_get(arena_get(
-					    tsd_tsdn(tsd), choose[j], false),
-					    !!j)) {
+					if (arena_nthreads_get(
+					        arena_get(
+					            tsd_tsdn(tsd), i, false),
+					        !!j)
+					    < arena_nthreads_get(
+					        arena_get(tsd_tsdn(tsd),
+					            choose[j], false),
+					        !!j)) {
 						choose[j] = i;
 					}
 				}
@@ -585,16 +596,17 @@ arena_choose_hard(tsd_t *tsd, bool internal) {
 		}
 
 		for (j = 0; j < 2; j++) {
-			if (arena_nthreads_get(arena_get(tsd_tsdn(tsd),
-			    choose[j], false), !!j) == 0 || first_null ==
-			    narenas_auto) {
+			if (arena_nthreads_get(
+			        arena_get(tsd_tsdn(tsd), choose[j], false), !!j)
+			        == 0
+			    || first_null == narenas_auto) {
 				/*
 				 * Use an unloaded arena, or the least loaded
 				 * arena if all arenas are already initialized.
 				 */
 				if (!!j == internal) {
-					ret = arena_get(tsd_tsdn(tsd),
-					    choose[j], false);
+					ret = arena_get(
+					    tsd_tsdn(tsd), choose[j], false);
 				}
 			} else {
 				arena_t *arena;
@@ -604,8 +616,8 @@ arena_choose_hard(tsd_t *tsd, bool internal) {
 				arena = arena_init_locked(tsd_tsdn(tsd),
 				    choose[j], &arena_config_default);
 				if (arena == NULL) {
-					malloc_mutex_unlock(tsd_tsdn(tsd),
-					    &arenas_lock);
+					malloc_mutex_unlock(
+					    tsd_tsdn(tsd), &arenas_lock);
 					return NULL;
 				}
 				is_new_arena[j] = true;
@@ -657,7 +669,7 @@ arena_cleanup(tsd_t *tsd) {
 static void
 stats_print_atexit(void) {
 	if (config_stats) {
-		tsdn_t *tsdn;
+		tsdn_t  *tsdn;
 		unsigned narenas, i;
 
 		tsdn = tsdn_fetch();
@@ -675,13 +687,13 @@ stats_print_atexit(void) {
 				tcache_slow_t *tcache_slow;
 
 				malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
-				ql_foreach(tcache_slow, &arena->tcache_ql,
-				    link) {
-					tcache_stats_merge(tsdn,
-					    tcache_slow->tcache, arena);
+				ql_foreach (
+				    tcache_slow, &arena->tcache_ql, link) {
+					tcache_stats_merge(
+					    tsdn, tcache_slow->tcache, arena);
 				}
-				malloc_mutex_unlock(tsdn,
-				    &arena->tcache_ql_mtx);
+				malloc_mutex_unlock(
+				    tsdn, &arena->tcache_ql_mtx);
 			}
 		}
 	}
@@ -726,16 +738,16 @@ jemalloc_getenv(const char *name) {
 #ifdef JEMALLOC_FORCE_GETENV
 	return getenv(name);
 #else
-#  ifdef JEMALLOC_HAVE_SECURE_GETENV
+#	ifdef JEMALLOC_HAVE_SECURE_GETENV
 	return secure_getenv(name);
-#  else
-#    ifdef JEMALLOC_HAVE_ISSETUGID
+#	else
+#		ifdef JEMALLOC_HAVE_ISSETUGID
 	if (issetugid() != 0) {
 		return NULL;
 	}
-#    endif
+#		endif
 	return getenv(name);
-#  endif
+#	endif
 #endif
 }
 
@@ -759,16 +771,16 @@ malloc_ncpus(void) {
 	 * is available, to avoid using more arenas than necessary.
 	 */
 	{
-#  if defined(__FreeBSD__) || defined(__DragonFly__)
+#	if defined(__FreeBSD__) || defined(__DragonFly__)
 		cpuset_t set;
-#  else
+#	else
 		cpu_set_t set;
-#  endif
-#  if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
+#	endif
+#	if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
 		sched_getaffinity(0, sizeof(set), &set);
-#  else
+#	else
 		pthread_getaffinity_np(pthread_self(), sizeof(set), &set);
-#  endif
+#	endif
 		result = CPU_COUNT(&set);
 	}
 #else
@@ -785,8 +797,7 @@ malloc_ncpus(void) {
  * Since otherwise tricky things is possible with percpu arenas in use.
  */
 static bool
-malloc_cpu_count_is_deterministic(void)
-{
+malloc_cpu_count_is_deterministic(void) {
 #ifdef _WIN32
 	return true;
 #else
@@ -795,22 +806,22 @@ malloc_cpu_count_is_deterministic(void)
 	if (cpu_onln != cpu_conf) {
 		return false;
 	}
-#  if defined(CPU_COUNT)
-#    if defined(__FreeBSD__) || defined(__DragonFly__)
+#	if defined(CPU_COUNT)
+#		if defined(__FreeBSD__) || defined(__DragonFly__)
 	cpuset_t set;
-#    else
+#		else
 	cpu_set_t set;
-#    endif /* __FreeBSD__ */
-#    if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
+#		endif /* __FreeBSD__ */
+#		if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
 	sched_getaffinity(0, sizeof(set), &set);
-#    else /* !JEMALLOC_HAVE_SCHED_SETAFFINITY */
+#		else  /* !JEMALLOC_HAVE_SCHED_SETAFFINITY */
 	pthread_getaffinity_np(pthread_self(), sizeof(set), &set);
-#    endif /* JEMALLOC_HAVE_SCHED_SETAFFINITY */
+#		endif /* JEMALLOC_HAVE_SCHED_SETAFFINITY */
 	long cpu_affinity = CPU_COUNT(&set);
 	if (cpu_affinity != cpu_conf) {
 		return false;
 	}
-#  endif /* CPU_COUNT */
+#	endif         /* CPU_COUNT */
 	return true;
 #endif
 }
@@ -822,10 +833,13 @@ init_opt_stats_opts(const char *v, size_t vlen, char *dest) {
 
 	for (size_t i = 0; i < vlen; i++) {
 		switch (v[i]) {
-#define OPTION(o, v, d, s) case o: break;
+#define OPTION(o, v, d, s)                                                     \
+	case o:                                                                \
+		break;
 			STATS_PRINT_OPTIONS
 #undef OPTION
-		default: continue;
+		default:
+			continue;
 		}
 
 		if (strchr(dest, v[i]) != NULL) {
@@ -851,25 +865,75 @@ malloc_conf_format_error(const char *msg, const char *begin, const char *end) {
 static bool
 malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
     char const **v_p, size_t *vlen_p) {
-	bool accept;
+	bool        accept;
 	const char *opts = *opts_p;
 
 	*k_p = opts;
 
 	for (accept = false; !accept;) {
 		switch (*opts) {
-		case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
-		case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
-		case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
-		case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
-		case 'Y': case 'Z':
-		case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
-		case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
-		case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
-		case 's': case 't': case 'u': case 'v': case 'w': case 'x':
-		case 'y': case 'z':
-		case '0': case '1': case '2': case '3': case '4': case '5':
-		case '6': case '7': case '8': case '9':
+		case 'A':
+		case 'B':
+		case 'C':
+		case 'D':
+		case 'E':
+		case 'F':
+		case 'G':
+		case 'H':
+		case 'I':
+		case 'J':
+		case 'K':
+		case 'L':
+		case 'M':
+		case 'N':
+		case 'O':
+		case 'P':
+		case 'Q':
+		case 'R':
+		case 'S':
+		case 'T':
+		case 'U':
+		case 'V':
+		case 'W':
+		case 'X':
+		case 'Y':
+		case 'Z':
+		case 'a':
+		case 'b':
+		case 'c':
+		case 'd':
+		case 'e':
+		case 'f':
+		case 'g':
+		case 'h':
+		case 'i':
+		case 'j':
+		case 'k':
+		case 'l':
+		case 'm':
+		case 'n':
+		case 'o':
+		case 'p':
+		case 'q':
+		case 'r':
+		case 's':
+		case 't':
+		case 'u':
+		case 'v':
+		case 'w':
+		case 'x':
+		case 'y':
+		case 'z':
+		case '0':
+		case '1':
+		case '2':
+		case '3':
+		case '4':
+		case '5':
+		case '6':
+		case '7':
+		case '8':
+		case '9':
 		case '_':
 			opts++;
 			break;
@@ -882,8 +946,8 @@ malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
 		case '\0':
 			if (opts != *opts_p) {
 				malloc_conf_format_error(
-				    "Conf string ends with key",
-				    *opts_p, opts - 1);
+				    "Conf string ends with key", *opts_p,
+				    opts - 1);
 				had_conf_error = true;
 			}
 			return true;
@@ -908,8 +972,8 @@ malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
 			 */
 			if (*opts == '\0') {
 				malloc_conf_format_error(
-				    "Conf string ends with comma",
-				    *opts_p, opts - 1);
+				    "Conf string ends with comma", *opts_p,
+				    opts - 1);
 				had_conf_error = true;
 			}
 			*vlen_p = (uintptr_t)opts - 1 - (uintptr_t)*v_p;
@@ -932,16 +996,17 @@ malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
 static void
 malloc_abort_invalid_conf(void) {
 	assert(opt_abort_conf);
-	malloc_printf("<jemalloc>: Abort (abort_conf:true) on invalid conf "
+	malloc_printf(
+	    "<jemalloc>: Abort (abort_conf:true) on invalid conf "
 	    "value (see above).\n");
 	invalid_conf_abort();
 }
 
 static void
-malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v,
-    size_t vlen) {
-	malloc_printf("<jemalloc>: %s: %.*s:%.*s\n", msg, (int)klen, k,
-	    (int)vlen, v);
+malloc_conf_error(
+    const char *msg, const char *k, size_t klen, const char *v, size_t vlen) {
+	malloc_printf(
+	    "<jemalloc>: %s: %.*s:%.*s\n", msg, (int)klen, k, (int)vlen, v);
 	/* If abort_conf is set, error out after processing all options. */
 	const char *experimental = "experimental_";
 	if (strncmp(k, experimental, strlen(experimental)) == 0) {
@@ -1002,48 +1067,50 @@ obtain_malloc_conf(unsigned which_source, char readlink_buf[PATH_MAX + 1]) {
 		break;
 #else
 		ssize_t linklen = 0;
-#  ifndef _WIN32
-		int saved_errno = errno;
+#	ifndef _WIN32
+		int         saved_errno = errno;
 		const char *linkname =
-#    ifdef JEMALLOC_PREFIX
-		    "/etc/"JEMALLOC_PREFIX"malloc.conf"
-#    else
+#		ifdef JEMALLOC_PREFIX
+		    "/etc/" JEMALLOC_PREFIX "malloc.conf"
+#		else
 		    "/etc/malloc.conf"
-#    endif
+#		endif
 		    ;
 
 		/*
 		 * Try to use the contents of the "/etc/malloc.conf" symbolic
 		 * link's name.
 		 */
-#    ifndef JEMALLOC_READLINKAT
+#		ifndef JEMALLOC_READLINKAT
 		linklen = readlink(linkname, readlink_buf, PATH_MAX);
-#    else
-		linklen = readlinkat(AT_FDCWD, linkname, readlink_buf, PATH_MAX);
-#    endif
+#		else
+		linklen = readlinkat(
+		    AT_FDCWD, linkname, readlink_buf, PATH_MAX);
+#		endif
 		if (linklen == -1) {
 			/* No configuration specified. */
 			linklen = 0;
 			/* Restore errno. */
 			set_errno(saved_errno);
 		}
-#  endif
+#	endif
 		readlink_buf[linklen] = '\0';
 		ret = readlink_buf;
 		break;
 #endif
-	} case 3: {
+	}
+	case 3: {
 #ifndef JEMALLOC_CONFIG_ENV
 		ret = NULL;
 		break;
 #else
 		const char *envname =
-#  ifdef JEMALLOC_PREFIX
-			JEMALLOC_CPREFIX"MALLOC_CONF"
-#  else
-			"MALLOC_CONF"
-#  endif
-			;
+#	ifdef JEMALLOC_PREFIX
+		    JEMALLOC_CPREFIX "MALLOC_CONF"
+#	else
+		    "MALLOC_CONF"
+#	endif
+		    ;
 
 		if ((ret = jemalloc_getenv(envname)) != NULL) {
 			opt_malloc_conf_env_var = ret;
@@ -1053,10 +1120,12 @@ obtain_malloc_conf(unsigned which_source, char readlink_buf[PATH_MAX + 1]) {
 		}
 		break;
 #endif
-	} case 4: {
+	}
+	case 4: {
 		ret = je_malloc_conf_2_conf_harder;
 		break;
-	} default:
+	}
+	default:
 		not_reached();
 		ret = NULL;
 	}
@@ -1072,15 +1141,16 @@ validate_hpa_settings(void) {
 		had_conf_error = true;
 		malloc_printf(
 		    "<jemalloc>: huge page size (%zu) greater than expected."
-		    "May not be supported or behave as expected.", HUGEPAGE);
+		    "May not be supported or behave as expected.",
+		    HUGEPAGE);
 	}
 #ifndef JEMALLOC_HAVE_MADVISE_COLLAPSE
 	if (opt_hpa_opts.hugify_sync) {
-	       had_conf_error = true;
-	       malloc_printf(
-		   "<jemalloc>: hpa_hugify_sync config option is enabled, "
-		   "but MADV_COLLAPSE support was not detected at build "
-		   "time.");
+		had_conf_error = true;
+		malloc_printf(
+		    "<jemalloc>: hpa_hugify_sync config option is enabled, "
+		    "but MADV_COLLAPSE support was not detected at build "
+		    "time.");
 	}
 #endif
 }
@@ -1090,17 +1160,17 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
     bool initial_call, const char *opts_cache[MALLOC_CONF_NSOURCES],
     char readlink_buf[PATH_MAX + 1]) {
 	static const char *opts_explain[MALLOC_CONF_NSOURCES] = {
-		"string specified via --with-malloc-conf",
-		"string pointed to by the global variable malloc_conf",
-		"\"name\" of the file referenced by the symbolic link named "
-		    "/etc/malloc.conf",
-		"value of the environment variable MALLOC_CONF",
-		"string pointed to by the global variable "
-		    "malloc_conf_2_conf_harder",
+	    "string specified via --with-malloc-conf",
+	    "string pointed to by the global variable malloc_conf",
+	    "\"name\" of the file referenced by the symbolic link named "
+	    "/etc/malloc.conf",
+	    "value of the environment variable MALLOC_CONF",
+	    "string pointed to by the global variable "
+	    "malloc_conf_2_conf_harder",
 	};
-	unsigned i;
+	unsigned    i;
 	const char *opts, *k, *v;
-	size_t klen, vlen;
+	size_t      klen, vlen;
 
 	for (i = 0; i < MALLOC_CONF_NSOURCES; i++) {
 		/* Get runtime configuration. */
@@ -1110,129 +1180,116 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 		opts = opts_cache[i];
 		if (!initial_call && opt_confirm_conf) {
 			malloc_printf(
-			    "<jemalloc>: malloc_conf #%u (%s): \"%s\"\n",
-			    i + 1, opts_explain[i], opts != NULL ? opts : "");
+			    "<jemalloc>: malloc_conf #%u (%s): \"%s\"\n", i + 1,
+			    opts_explain[i], opts != NULL ? opts : "");
 		}
 		if (opts == NULL) {
 			continue;
 		}
 
-		while (*opts != '\0' && !malloc_conf_next(&opts, &k, &klen, &v,
-		    &vlen)) {
-
-#define CONF_ERROR(msg, k, klen, v, vlen)				\
-			if (!initial_call) {				\
-				malloc_conf_error(			\
-				    msg, k, klen, v, vlen);		\
-				cur_opt_valid = false;			\
-			}
-#define CONF_CONTINUE	{						\
-				if (!initial_call && opt_confirm_conf	\
-				    && cur_opt_valid) {			\
-					malloc_printf("<jemalloc>: -- "	\
-					    "Set conf value: %.*s:%.*s"	\
-					    "\n", (int)klen, k,		\
-					    (int)vlen, v);		\
-				}					\
-				continue;				\
-			}
-#define CONF_MATCH(n)							\
-	(sizeof(n)-1 == klen && strncmp(n, k, klen) == 0)
-#define CONF_MATCH_VALUE(n)						\
-	(sizeof(n)-1 == vlen && strncmp(n, v, vlen) == 0)
-#define CONF_HANDLE_BOOL(o, n)						\
-			if (CONF_MATCH(n)) {				\
-				if (CONF_MATCH_VALUE("true")) {		\
-					o = true;			\
-				} else if (CONF_MATCH_VALUE("false")) {	\
-					o = false;			\
-				} else {				\
-					CONF_ERROR("Invalid conf value",\
-					    k, klen, v, vlen);		\
-				}					\
-				CONF_CONTINUE;				\
-			}
-      /*
+		while (*opts != '\0'
+		    && !malloc_conf_next(&opts, &k, &klen, &v, &vlen)) {
+#define CONF_ERROR(msg, k, klen, v, vlen)                                      \
+	if (!initial_call) {                                                   \
+		malloc_conf_error(msg, k, klen, v, vlen);                      \
+		cur_opt_valid = false;                                         \
+	}
+#define CONF_CONTINUE                                                          \
+	{                                                                      \
+		if (!initial_call && opt_confirm_conf && cur_opt_valid) {      \
+			malloc_printf(                                         \
+			    "<jemalloc>: -- "                                  \
+			    "Set conf value: %.*s:%.*s"                        \
+			    "\n",                                              \
+			    (int)klen, k, (int)vlen, v);                       \
+		}                                                              \
+		continue;                                                      \
+	}
+#define CONF_MATCH(n) (sizeof(n) - 1 == klen && strncmp(n, k, klen) == 0)
+#define CONF_MATCH_VALUE(n) (sizeof(n) - 1 == vlen && strncmp(n, v, vlen) == 0)
+#define CONF_HANDLE_BOOL(o, n)                                                 \
+	if (CONF_MATCH(n)) {                                                   \
+		if (CONF_MATCH_VALUE("true")) {                                \
+			o = true;                                              \
+		} else if (CONF_MATCH_VALUE("false")) {                        \
+			o = false;                                             \
+		} else {                                                       \
+			CONF_ERROR("Invalid conf value", k, klen, v, vlen);    \
+		}                                                              \
+		CONF_CONTINUE;                                                 \
+	}
+			/*
        * One of the CONF_MIN macros below expands, in one of the use points,
        * to "unsigned integer < 0", which is always false, triggering the
        * GCC -Wtype-limits warning, which we disable here and re-enable below.
        */
-      JEMALLOC_DIAGNOSTIC_PUSH
-      JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
+			JEMALLOC_DIAGNOSTIC_PUSH
+			JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
 
-#define CONF_DONT_CHECK_MIN(um, min)	false
-#define CONF_CHECK_MIN(um, min)	((um) < (min))
-#define CONF_DONT_CHECK_MAX(um, max)	false
-#define CONF_CHECK_MAX(um, max)	((um) > (max))
+#define CONF_DONT_CHECK_MIN(um, min) false
+#define CONF_CHECK_MIN(um, min) ((um) < (min))
+#define CONF_DONT_CHECK_MAX(um, max) false
+#define CONF_CHECK_MAX(um, max) ((um) > (max))
 
-#define CONF_VALUE_READ(max_t, result)					\
-	      char *end;						\
-	      set_errno(0);						\
-	      result = (max_t)malloc_strtoumax(v, &end, 0);
-#define CONF_VALUE_READ_FAIL()						\
-	      (get_errno() != 0 || (uintptr_t)end - (uintptr_t)v != vlen)
+#define CONF_VALUE_READ(max_t, result)                                         \
+	char *end;                                                             \
+	set_errno(0);                                                          \
+	result = (max_t)malloc_strtoumax(v, &end, 0);
+#define CONF_VALUE_READ_FAIL()                                                 \
+	(get_errno() != 0 || (uintptr_t)end - (uintptr_t)v != vlen)
 
-#define CONF_HANDLE_T(t, max_t, o, n, min, max, check_min, check_max, clip) \
-			if (CONF_MATCH(n)) {				\
-				max_t mv;				\
-				CONF_VALUE_READ(max_t, mv)		\
-				if (CONF_VALUE_READ_FAIL()) {		\
-					CONF_ERROR("Invalid conf value",\
-					    k, klen, v, vlen);		\
-				} else if (clip) {			\
-					if (check_min(mv, (t)(min))) {	\
-						o = (t)(min);		\
-					} else if (			\
-					    check_max(mv, (t)(max))) {	\
-						o = (t)(max);		\
-					} else {			\
-						o = (t)mv;		\
-					}				\
-				} else {				\
-					if (check_min(mv, (t)(min)) ||	\
-					    check_max(mv, (t)(max))) {	\
-						CONF_ERROR(		\
-						    "Out-of-range "	\
-						    "conf value",	\
-						    k, klen, v, vlen);	\
-					} else {			\
-						o = (t)mv;		\
-					}				\
-				}					\
-				CONF_CONTINUE;				\
-			}
-#define CONF_HANDLE_T_U(t, o, n, min, max, check_min, check_max, clip)	\
-	      CONF_HANDLE_T(t, uintmax_t, o, n, min, max, check_min,	\
-			    check_max, clip)
-#define CONF_HANDLE_T_SIGNED(t, o, n, min, max, check_min, check_max, clip)\
-	      CONF_HANDLE_T(t, intmax_t, o, n, min, max, check_min,	\
-			    check_max, clip)
+#define CONF_HANDLE_T(t, max_t, o, n, min, max, check_min, check_max, clip)    \
+	if (CONF_MATCH(n)) {                                                   \
+		max_t mv;                                                      \
+		CONF_VALUE_READ(max_t, mv)                                     \
+		if (CONF_VALUE_READ_FAIL()) {                                  \
+			CONF_ERROR("Invalid conf value", k, klen, v, vlen);    \
+		} else if (clip) {                                             \
+			if (check_min(mv, (t)(min))) {                         \
+				o = (t)(min);                                  \
+			} else if (check_max(mv, (t)(max))) {                  \
+				o = (t)(max);                                  \
+			} else {                                               \
+				o = (t)mv;                                     \
+			}                                                      \
+		} else {                                                       \
+			if (check_min(mv, (t)(min))                            \
+			    || check_max(mv, (t)(max))) {                      \
+				CONF_ERROR(                                    \
+				    "Out-of-range "                            \
+				    "conf value",                              \
+				    k, klen, v, vlen);                         \
+			} else {                                               \
+				o = (t)mv;                                     \
+			}                                                      \
+		}                                                              \
+		CONF_CONTINUE;                                                 \
+	}
+#define CONF_HANDLE_T_U(t, o, n, min, max, check_min, check_max, clip)         \
+	CONF_HANDLE_T(t, uintmax_t, o, n, min, max, check_min, check_max, clip)
+#define CONF_HANDLE_T_SIGNED(t, o, n, min, max, check_min, check_max, clip)    \
+	CONF_HANDLE_T(t, intmax_t, o, n, min, max, check_min, check_max, clip)
 
-#define CONF_HANDLE_UNSIGNED(o, n, min, max, check_min, check_max,	\
-    clip)								\
-			CONF_HANDLE_T_U(unsigned, o, n, min, max,	\
-			    check_min, check_max, clip)
-#define CONF_HANDLE_SIZE_T(o, n, min, max, check_min, check_max, clip)	\
-			CONF_HANDLE_T_U(size_t, o, n, min, max,		\
-			    check_min, check_max, clip)
-#define CONF_HANDLE_INT64_T(o, n, min, max, check_min, check_max, clip)	\
-			CONF_HANDLE_T_SIGNED(int64_t, o, n, min, max,	\
-			    check_min, check_max, clip)
-#define CONF_HANDLE_UINT64_T(o, n, min, max, check_min, check_max, clip)\
-			CONF_HANDLE_T_U(uint64_t, o, n, min, max,	\
-			    check_min, check_max, clip)
-#define CONF_HANDLE_SSIZE_T(o, n, min, max)				\
-			CONF_HANDLE_T_SIGNED(ssize_t, o, n, min, max,	\
-			    CONF_CHECK_MIN, CONF_CHECK_MAX, false)
-#define CONF_HANDLE_CHAR_P(o, n, d)					\
-			if (CONF_MATCH(n)) {				\
-				size_t cpylen = (vlen <=		\
-				    sizeof(o)-1) ? vlen :		\
-				    sizeof(o)-1;			\
-				strncpy(o, v, cpylen);			\
-				o[cpylen] = '\0';			\
-				CONF_CONTINUE;				\
-			}
+#define CONF_HANDLE_UNSIGNED(o, n, min, max, check_min, check_max, clip)       \
+	CONF_HANDLE_T_U(unsigned, o, n, min, max, check_min, check_max, clip)
+#define CONF_HANDLE_SIZE_T(o, n, min, max, check_min, check_max, clip)         \
+	CONF_HANDLE_T_U(size_t, o, n, min, max, check_min, check_max, clip)
+#define CONF_HANDLE_INT64_T(o, n, min, max, check_min, check_max, clip)        \
+	CONF_HANDLE_T_SIGNED(                                                  \
+	    int64_t, o, n, min, max, check_min, check_max, clip)
+#define CONF_HANDLE_UINT64_T(o, n, min, max, check_min, check_max, clip)       \
+	CONF_HANDLE_T_U(uint64_t, o, n, min, max, check_min, check_max, clip)
+#define CONF_HANDLE_SSIZE_T(o, n, min, max)                                    \
+	CONF_HANDLE_T_SIGNED(                                                  \
+	    ssize_t, o, n, min, max, CONF_CHECK_MIN, CONF_CHECK_MAX, false)
+#define CONF_HANDLE_CHAR_P(o, n, d)                                            \
+	if (CONF_MATCH(n)) {                                                   \
+		size_t cpylen = (vlen <= sizeof(o) - 1) ? vlen                 \
+		                                        : sizeof(o) - 1;       \
+		strncpy(o, v, cpylen);                                         \
+		o[cpylen] = '\0';                                              \
+		CONF_CONTINUE;                                                 \
+	}
 
 			bool cur_opt_valid = true;
 
@@ -1245,27 +1302,29 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			CONF_HANDLE_BOOL(opt_abort_conf, "abort_conf")
 			CONF_HANDLE_BOOL(opt_cache_oblivious, "cache_oblivious")
 			CONF_HANDLE_BOOL(opt_trust_madvise, "trust_madvise")
-			CONF_HANDLE_BOOL(opt_huge_arena_pac_thp, "huge_arena_pac_thp")
+			CONF_HANDLE_BOOL(
+			    opt_huge_arena_pac_thp, "huge_arena_pac_thp")
 			if (strncmp("metadata_thp", k, klen) == 0) {
-				int m;
+				int  m;
 				bool match = false;
 				for (m = 0; m < metadata_thp_mode_limit; m++) {
 					if (strncmp(metadata_thp_mode_names[m],
-					    v, vlen) == 0) {
+					        v, vlen)
+					    == 0) {
 						opt_metadata_thp = m;
 						match = true;
 						break;
 					}
 				}
 				if (!match) {
-					CONF_ERROR("Invalid conf value",
-					    k, klen, v, vlen);
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
 				}
 				CONF_CONTINUE;
 			}
 			CONF_HANDLE_BOOL(opt_retain, "retain")
 			if (strncmp("dss", k, klen) == 0) {
-				int m;
+				int  m;
 				bool match = false;
 				for (m = 0; m < dss_prec_limit; m++) {
 					if (strncmp(dss_prec_names[m], v, vlen)
@@ -1283,8 +1342,8 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 					}
 				}
 				if (!match) {
-					CONF_ERROR("Invalid conf value",
-					    k, klen, v, vlen);
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
 				}
 				CONF_CONTINUE;
 			}
@@ -1301,31 +1360,32 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			}
 			if (CONF_MATCH("narenas_ratio")) {
 				char *end;
-				bool err = fxp_parse(&opt_narenas_ratio, v,
-				    &end);
+				bool  err = fxp_parse(
+                                    &opt_narenas_ratio, v, &end);
 				if (err || (size_t)(end - v) != vlen) {
-					CONF_ERROR("Invalid conf value",
-					    k, klen, v, vlen);
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
 				}
 				CONF_CONTINUE;
 			}
 			if (CONF_MATCH("bin_shards")) {
 				const char *bin_shards_segment_cur = v;
-				size_t vlen_left = vlen;
+				size_t      vlen_left = vlen;
 				do {
 					size_t size_start;
 					size_t size_end;
 					size_t nshards;
-					bool err = multi_setting_parse_next(
-					    &bin_shards_segment_cur, &vlen_left,
-					    &size_start, &size_end, &nshards);
-					if (err || bin_update_shard_size(
-					    bin_shard_sizes, size_start,
-					    size_end, nshards)) {
+					bool   err = multi_setting_parse_next(
+                                            &bin_shards_segment_cur, &vlen_left,
+                                            &size_start, &size_end, &nshards);
+					if (err
+					    || bin_update_shard_size(
+					        bin_shard_sizes, size_start,
+					        size_end, nshards)) {
 						CONF_ERROR(
 						    "Invalid settings for "
-						    "bin_shards", k, klen, v,
-						    vlen);
+						    "bin_shards",
+						    k, klen, v, vlen);
 						break;
 					}
 				} while (vlen_left > 0);
@@ -1337,12 +1397,11 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    /* clip */ true)
 			CONF_HANDLE_SIZE_T(opt_bin_info_remote_free_max_batch,
 			    "remote_free_max_batch", 0,
-			    BIN_REMOTE_FREE_ELEMS_MAX,
-			    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
+			    BIN_REMOTE_FREE_ELEMS_MAX, CONF_DONT_CHECK_MIN,
+			    CONF_CHECK_MAX,
 			    /* clip */ true)
 			CONF_HANDLE_SIZE_T(opt_bin_info_remote_free_max,
-			    "remote_free_max", 0,
-			    BIN_REMOTE_FREE_ELEMS_MAX,
+			    "remote_free_max", 0, BIN_REMOTE_FREE_ELEMS_MAX,
 			    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
 			    /* clip */ true)
 
@@ -1350,9 +1409,10 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				bool err = tcache_bin_info_default_init(
 				    v, vlen);
 				if (err) {
-					CONF_ERROR("Invalid settings for "
-					    "tcache_ncached_max", k, klen, v,
-					    vlen);
+					CONF_ERROR(
+					    "Invalid settings for "
+					    "tcache_ncached_max",
+					    k, klen, v, vlen);
 				}
 				CONF_CONTINUE;
 			}
@@ -1360,13 +1420,15 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    "mutex_max_spin", -1, INT64_MAX, CONF_CHECK_MIN,
 			    CONF_DONT_CHECK_MAX, false);
 			CONF_HANDLE_SSIZE_T(opt_dirty_decay_ms,
-			    "dirty_decay_ms", -1, NSTIME_SEC_MAX * KQU(1000) <
-			    QU(SSIZE_MAX) ? NSTIME_SEC_MAX * KQU(1000) :
-			    SSIZE_MAX);
+			    "dirty_decay_ms", -1,
+			    NSTIME_SEC_MAX * KQU(1000) < QU(SSIZE_MAX)
+			        ? NSTIME_SEC_MAX * KQU(1000)
+			        : SSIZE_MAX);
 			CONF_HANDLE_SSIZE_T(opt_muzzy_decay_ms,
-			    "muzzy_decay_ms", -1, NSTIME_SEC_MAX * KQU(1000) <
-			    QU(SSIZE_MAX) ? NSTIME_SEC_MAX * KQU(1000) :
-			    SSIZE_MAX);
+			    "muzzy_decay_ms", -1,
+			    NSTIME_SEC_MAX * KQU(1000) < QU(SSIZE_MAX)
+			        ? NSTIME_SEC_MAX * KQU(1000)
+			        : SSIZE_MAX);
 			CONF_HANDLE_SIZE_T(opt_process_madvise_max_batch,
 			    "process_madvise_max_batch", 0,
 			    PROCESS_MADVISE_MAX_BATCH_LIMIT,
@@ -1374,16 +1436,16 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    /* clip */ true)
 			CONF_HANDLE_BOOL(opt_stats_print, "stats_print")
 			if (CONF_MATCH("stats_print_opts")) {
-				init_opt_stats_opts(v, vlen,
-				    opt_stats_print_opts);
+				init_opt_stats_opts(
+				    v, vlen, opt_stats_print_opts);
 				CONF_CONTINUE;
 			}
 			CONF_HANDLE_INT64_T(opt_stats_interval,
-			    "stats_interval", -1, INT64_MAX,
-			    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, false)
+			    "stats_interval", -1, INT64_MAX, CONF_CHECK_MIN,
+			    CONF_DONT_CHECK_MAX, false)
 			if (CONF_MATCH("stats_interval_opts")) {
-				init_opt_stats_opts(v, vlen,
-				    opt_stats_interval_opts);
+				init_opt_stats_opts(
+				    v, vlen, opt_stats_interval_opts);
 				CONF_CONTINUE;
 			}
 			if (config_fill) {
@@ -1405,8 +1467,7 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 						opt_junk_alloc = false;
 						opt_junk_free = true;
 					} else {
-						CONF_ERROR(
-						    "Invalid conf value",
+						CONF_ERROR("Invalid conf value",
 						    k, klen, v, vlen);
 					}
 					CONF_CONTINUE;
@@ -1428,15 +1489,15 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			CONF_HANDLE_BOOL(opt_experimental_tcache_gc,
 			    "experimental_tcache_gc")
 			CONF_HANDLE_BOOL(opt_tcache, "tcache")
-			CONF_HANDLE_SIZE_T(opt_tcache_max, "tcache_max",
-			    0, TCACHE_MAXCLASS_LIMIT, CONF_DONT_CHECK_MIN,
+			CONF_HANDLE_SIZE_T(opt_tcache_max, "tcache_max", 0,
+			    TCACHE_MAXCLASS_LIMIT, CONF_DONT_CHECK_MIN,
 			    CONF_CHECK_MAX, /* clip */ true)
 			if (CONF_MATCH("lg_tcache_max")) {
 				size_t m;
 				CONF_VALUE_READ(size_t, m)
 				if (CONF_VALUE_READ_FAIL()) {
-					CONF_ERROR("Invalid conf value",
-					    k, klen, v, vlen);
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
 				} else {
 					/* clip if necessary */
 					if (m > TCACHE_LG_MAXCLASS_LIMIT) {
@@ -1454,14 +1515,14 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    "lg_tcache_nslots_mul", -16, 16)
 			/* Ditto with values past 2048. */
 			CONF_HANDLE_UNSIGNED(opt_tcache_nslots_small_min,
-			    "tcache_nslots_small_min", 1, 2048,
-			    CONF_CHECK_MIN, CONF_CHECK_MAX, /* clip */ true)
+			    "tcache_nslots_small_min", 1, 2048, CONF_CHECK_MIN,
+			    CONF_CHECK_MAX, /* clip */ true)
 			CONF_HANDLE_UNSIGNED(opt_tcache_nslots_small_max,
-			    "tcache_nslots_small_max", 1, 2048,
-			    CONF_CHECK_MIN, CONF_CHECK_MAX, /* clip */ true)
+			    "tcache_nslots_small_max", 1, 2048, CONF_CHECK_MIN,
+			    CONF_CHECK_MAX, /* clip */ true)
 			CONF_HANDLE_UNSIGNED(opt_tcache_nslots_large,
-			    "tcache_nslots_large", 1, 2048,
-			    CONF_CHECK_MIN, CONF_CHECK_MAX, /* clip */ true)
+			    "tcache_nslots_large", 1, 2048, CONF_CHECK_MIN,
+			    CONF_CHECK_MAX, /* clip */ true)
 			CONF_HANDLE_SIZE_T(opt_tcache_gc_incr_bytes,
 			    "tcache_gc_incr_bytes", 1024, SIZE_T_MAX,
 			    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX,
@@ -1471,18 +1532,19 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
 			    /* clip */ false)
 			CONF_HANDLE_UNSIGNED(opt_lg_tcache_flush_small_div,
-			    "lg_tcache_flush_small_div", 1, 16,
-			    CONF_CHECK_MIN, CONF_CHECK_MAX, /* clip */ true)
+			    "lg_tcache_flush_small_div", 1, 16, CONF_CHECK_MIN,
+			    CONF_CHECK_MAX, /* clip */ true)
 			CONF_HANDLE_UNSIGNED(opt_lg_tcache_flush_large_div,
-			    "lg_tcache_flush_large_div", 1, 16,
-			    CONF_CHECK_MIN, CONF_CHECK_MAX, /* clip */ true)
+			    "lg_tcache_flush_large_div", 1, 16, CONF_CHECK_MIN,
+			    CONF_CHECK_MAX, /* clip */ true)
 			CONF_HANDLE_UNSIGNED(opt_debug_double_free_max_scan,
 			    "debug_double_free_max_scan", 0, UINT_MAX,
 			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
 			    /* clip */ false)
 			CONF_HANDLE_SIZE_T(opt_calloc_madvise_threshold,
 			    "calloc_madvise_threshold", 0, SC_LARGE_MAXCLASS,
-			    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX, /* clip */ false)
+			    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
+			    /* clip */ false)
 
 			/*
 			 * The runtime option of oversize_threshold remains
@@ -1502,10 +1564,11 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 
 			if (strncmp("percpu_arena", k, klen) == 0) {
 				bool match = false;
-				for (int m = percpu_arena_mode_names_base; m <
-				    percpu_arena_mode_names_limit; m++) {
+				for (int m = percpu_arena_mode_names_base;
+				     m < percpu_arena_mode_names_limit; m++) {
 					if (strncmp(percpu_arena_mode_names[m],
-					    v, vlen) == 0) {
+					        v, vlen)
+					    == 0) {
 						if (!have_percpu_arena) {
 							CONF_ERROR(
 							    "No getcpu support",
@@ -1517,18 +1580,17 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 					}
 				}
 				if (!match) {
-					CONF_ERROR("Invalid conf value",
-					    k, klen, v, vlen);
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
 				}
 				CONF_CONTINUE;
 			}
-			CONF_HANDLE_BOOL(opt_background_thread,
-			    "background_thread");
+			CONF_HANDLE_BOOL(
+			    opt_background_thread, "background_thread");
 			CONF_HANDLE_SIZE_T(opt_max_background_threads,
-					   "max_background_threads", 1,
-					   opt_max_background_threads,
-					   CONF_CHECK_MIN, CONF_CHECK_MAX,
-					   true);
+			    "max_background_threads", 1,
+			    opt_max_background_threads, CONF_CHECK_MIN,
+			    CONF_CHECK_MAX, true);
 			CONF_HANDLE_BOOL(opt_hpa, "hpa")
 			CONF_HANDLE_SIZE_T(opt_hpa_opts.slab_max_alloc,
 			    "hpa_slab_max_alloc", PAGE, HUGEPAGE,
@@ -1544,12 +1606,11 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			if (CONF_MATCH("hpa_hugification_threshold_ratio")) {
 				fxp_t ratio;
 				char *end;
-				bool err = fxp_parse(&ratio, v,
-				    &end);
+				bool  err = fxp_parse(&ratio, v, &end);
 				if (err || (size_t)(end - v) != vlen
 				    || ratio > FXP_INIT_INT(1)) {
-					CONF_ERROR("Invalid conf value",
-					    k, klen, v, vlen);
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
 				} else {
 					opt_hpa_opts.hugification_threshold =
 					    fxp_mul_frac(HUGEPAGE, ratio);
@@ -1557,16 +1618,14 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				CONF_CONTINUE;
 			}
 
-			CONF_HANDLE_UINT64_T(
-			    opt_hpa_opts.hugify_delay_ms, "hpa_hugify_delay_ms",
-			    0, 0, CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
-			    false);
+			CONF_HANDLE_UINT64_T(opt_hpa_opts.hugify_delay_ms,
+			    "hpa_hugify_delay_ms", 0, 0, CONF_DONT_CHECK_MIN,
+			    CONF_DONT_CHECK_MAX, false);
 
 			CONF_HANDLE_BOOL(
 			    opt_hpa_opts.hugify_sync, "hpa_hugify_sync");
 
-			CONF_HANDLE_UINT64_T(
-			    opt_hpa_opts.min_purge_interval_ms,
+			CONF_HANDLE_UINT64_T(opt_hpa_opts.min_purge_interval_ms,
 			    "hpa_min_purge_interval_ms", 0, 0,
 			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false);
 
@@ -1581,11 +1640,10 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				}
 				fxp_t ratio;
 				char *end;
-				bool err = fxp_parse(&ratio, v,
-				    &end);
+				bool  err = fxp_parse(&ratio, v, &end);
 				if (err || (size_t)(end - v) != vlen) {
-					CONF_ERROR("Invalid conf value",
-					    k, klen, v, vlen);
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
 				} else {
 					opt_hpa_opts.dirty_mult = ratio;
 				}
@@ -1596,8 +1654,9 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    "hpa_sec_nshards", 0, 0, CONF_CHECK_MIN,
 			    CONF_DONT_CHECK_MAX, true);
 			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.max_alloc,
-			    "hpa_sec_max_alloc", PAGE, USIZE_GROW_SLOW_THRESHOLD,
-			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
+			    "hpa_sec_max_alloc", PAGE,
+			    USIZE_GROW_SLOW_THRESHOLD, CONF_CHECK_MIN,
+			    CONF_CHECK_MAX, true);
 			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.max_bytes,
 			    "hpa_sec_max_bytes", PAGE, 0, CONF_CHECK_MIN,
 			    CONF_DONT_CHECK_MAX, true);
@@ -1613,23 +1672,23 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 					sc_data_init(sc_data);
 					CONF_CONTINUE;
 				}
-				bool err;
+				bool        err;
 				const char *slab_size_segment_cur = v;
-				size_t vlen_left = vlen;
+				size_t      vlen_left = vlen;
 				do {
 					size_t slab_start;
 					size_t slab_end;
 					size_t pgs;
 					err = multi_setting_parse_next(
-					    &slab_size_segment_cur,
-					    &vlen_left, &slab_start, &slab_end,
-					    &pgs);
+					    &slab_size_segment_cur, &vlen_left,
+					    &slab_start, &slab_end, &pgs);
 					if (!err) {
 						sc_data_update_slab_size(
 						    sc_data, slab_start,
 						    slab_end, (int)pgs);
 					} else {
-						CONF_ERROR("Invalid settings "
+						CONF_ERROR(
+						    "Invalid settings "
 						    "for slab_sizes",
 						    k, klen, v, vlen);
 					}
@@ -1638,22 +1697,24 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			}
 			if (config_prof) {
 				CONF_HANDLE_BOOL(opt_prof, "prof")
-				CONF_HANDLE_CHAR_P(opt_prof_prefix,
-				    "prof_prefix", "jeprof")
+				CONF_HANDLE_CHAR_P(
+				    opt_prof_prefix, "prof_prefix", "jeprof")
 				CONF_HANDLE_BOOL(opt_prof_active, "prof_active")
 				CONF_HANDLE_BOOL(opt_prof_thread_active_init,
 				    "prof_thread_active_init")
 				CONF_HANDLE_SIZE_T(opt_lg_prof_sample,
-				    "lg_prof_sample", 0, (sizeof(uint64_t) << 3)
-				    - 1, CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
-				    true)
-				CONF_HANDLE_SIZE_T(opt_experimental_lg_prof_threshold,
-				    "experimental_lg_prof_threshold", 0, (sizeof(uint64_t) << 3)
-				    - 1, CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
-				    true)
+				    "lg_prof_sample", 0,
+				    (sizeof(uint64_t) << 3) - 1,
+				    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX, true)
+				CONF_HANDLE_SIZE_T(
+				    opt_experimental_lg_prof_threshold,
+				    "experimental_lg_prof_threshold", 0,
+				    (sizeof(uint64_t) << 3) - 1,
+				    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX, true)
 				CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum")
-				CONF_HANDLE_UNSIGNED(opt_prof_bt_max, "prof_bt_max",
-				    1, PROF_BT_MAX_LIMIT, CONF_CHECK_MIN, CONF_CHECK_MAX,
+				CONF_HANDLE_UNSIGNED(opt_prof_bt_max,
+				    "prof_bt_max", 1, PROF_BT_MAX_LIMIT,
+				    CONF_CHECK_MIN, CONF_CHECK_MAX,
 				    /* clip */ true)
 				CONF_HANDLE_SSIZE_T(opt_lg_prof_interval,
 				    "lg_prof_interval", -1,
@@ -1661,10 +1722,11 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				CONF_HANDLE_BOOL(opt_prof_gdump, "prof_gdump")
 				CONF_HANDLE_BOOL(opt_prof_final, "prof_final")
 				CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak")
-				CONF_HANDLE_BOOL(opt_prof_leak_error,
-				    "prof_leak_error")
+				CONF_HANDLE_BOOL(
+				    opt_prof_leak_error, "prof_leak_error")
 				CONF_HANDLE_BOOL(opt_prof_log, "prof_log")
-				CONF_HANDLE_BOOL(opt_prof_pid_namespace, "prof_pid_namespace")
+				CONF_HANDLE_BOOL(opt_prof_pid_namespace,
+				    "prof_pid_namespace")
 				CONF_HANDLE_SSIZE_T(opt_prof_recent_alloc_max,
 				    "prof_recent_alloc_max", -1, SSIZE_MAX)
 				CONF_HANDLE_BOOL(opt_prof_stats, "prof_stats")
@@ -1703,9 +1765,10 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			}
 			if (config_log) {
 				if (CONF_MATCH("log")) {
-					size_t cpylen = (
-					    vlen <= sizeof(log_var_names) ?
-					    vlen : sizeof(log_var_names) - 1);
+					size_t cpylen = (vlen
+					            <= sizeof(log_var_names)
+					        ? vlen
+					        : sizeof(log_var_names) - 1);
 					strncpy(log_var_names, v, cpylen);
 					log_var_names[cpylen] = '\0';
 					CONF_CONTINUE;
@@ -1714,12 +1777,13 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			if (CONF_MATCH("thp")) {
 				bool match = false;
 				for (int m = 0; m < thp_mode_names_limit; m++) {
-					if (strncmp(thp_mode_names[m],v, vlen)
+					if (strncmp(thp_mode_names[m], v, vlen)
 					    == 0) {
-						if (!have_madvise_huge && !have_memcntl) {
+						if (!have_madvise_huge
+						    && !have_memcntl) {
 							CONF_ERROR(
-							    "No THP support",
-							    k, klen, v, vlen);
+							    "No THP support", k,
+							    klen, v, vlen);
 						}
 						opt_thp = m;
 						match = true;
@@ -1727,34 +1791,34 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 					}
 				}
 				if (!match) {
-					CONF_ERROR("Invalid conf value",
-					    k, klen, v, vlen);
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
 				}
 				CONF_CONTINUE;
 			}
 			if (CONF_MATCH("zero_realloc")) {
 				if (CONF_MATCH_VALUE("alloc")) {
-					opt_zero_realloc_action
-					    = zero_realloc_action_alloc;
+					opt_zero_realloc_action =
+					    zero_realloc_action_alloc;
 				} else if (CONF_MATCH_VALUE("free")) {
-					opt_zero_realloc_action
-					    = zero_realloc_action_free;
+					opt_zero_realloc_action =
+					    zero_realloc_action_free;
 				} else if (CONF_MATCH_VALUE("abort")) {
-					opt_zero_realloc_action
-					    = zero_realloc_action_abort;
+					opt_zero_realloc_action =
+					    zero_realloc_action_abort;
 				} else {
-					CONF_ERROR("Invalid conf value",
-					    k, klen, v, vlen);
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
 				}
 				CONF_CONTINUE;
 			}
-			if (config_uaf_detection &&
-			    CONF_MATCH("lg_san_uaf_align")) {
+			if (config_uaf_detection
+			    && CONF_MATCH("lg_san_uaf_align")) {
 				ssize_t a;
 				CONF_VALUE_READ(ssize_t, a)
 				if (CONF_VALUE_READ_FAIL() || a < -1) {
-					CONF_ERROR("Invalid conf value",
-					    k, klen, v, vlen);
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
 				}
 				if (a == -1) {
 					opt_lg_san_uaf_align = -1;
@@ -1807,8 +1871,8 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 #undef CONF_HANDLE_SIZE_T
 #undef CONF_HANDLE_SSIZE_T
 #undef CONF_HANDLE_CHAR_P
-    /* Re-enable diagnostic "-Wtype-limits" */
-    JEMALLOC_DIAGNOSTIC_POP
+			/* Re-enable diagnostic "-Wtype-limits" */
+			JEMALLOC_DIAGNOSTIC_POP
 		}
 		validate_hpa_settings();
 		if (opt_abort_conf && had_conf_error) {
@@ -1821,7 +1885,8 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 static bool
 malloc_conf_init_check_deps(void) {
 	if (opt_prof_leak_error && !opt_prof_final) {
-		malloc_printf("<jemalloc>: prof_leak_error is set w/o "
+		malloc_printf(
+		    "<jemalloc>: prof_leak_error is set w/o "
 		    "prof_final.\n");
 		return true;
 	}
@@ -1836,13 +1901,13 @@ malloc_conf_init_check_deps(void) {
 static void
 malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
     char readlink_buf[PATH_MAX + 1]) {
-	const char *opts_cache[MALLOC_CONF_NSOURCES] = {NULL, NULL, NULL, NULL,
-		NULL};
+	const char *opts_cache[MALLOC_CONF_NSOURCES] = {
+	    NULL, NULL, NULL, NULL, NULL};
 
 	/* The first call only set the confirm_conf option and opts_cache */
 	malloc_conf_init_helper(NULL, NULL, true, opts_cache, readlink_buf);
-	malloc_conf_init_helper(sc_data, bin_shard_sizes, false, opts_cache,
-	    NULL);
+	malloc_conf_init_helper(
+	    sc_data, bin_shard_sizes, false, opts_cache, NULL);
 	if (malloc_conf_init_check_deps()) {
 		/* check_deps does warning msg only; abort below if needed. */
 		if (opt_abort_conf) {
@@ -1855,8 +1920,9 @@ malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 
 static bool
 malloc_init_hard_needed(void) {
-	if (malloc_initialized() || (IS_INITIALIZER && malloc_init_state ==
-	    malloc_init_recursible)) {
+	if (malloc_initialized()
+	    || (IS_INITIALIZER
+	        && malloc_init_state == malloc_init_recursible)) {
 		/*
 		 * Another thread initialized the allocator before this one
 		 * acquired init_lock, or this thread is the initializing
@@ -1946,7 +2012,8 @@ malloc_init_hard_a0_locked(void) {
 		prof_boot1();
 	}
 	if (opt_hpa && !hpa_supported()) {
-		malloc_printf("<jemalloc>: HPA not supported in the current "
+		malloc_printf(
+		    "<jemalloc>: HPA not supported in the current "
 		    "configuration; %s.",
 		    opt_abort_conf ? "aborting" : "disabling");
 		if (opt_abort_conf) {
@@ -1962,7 +2029,7 @@ malloc_init_hard_a0_locked(void) {
 		return true;
 	}
 	if (malloc_mutex_init(&arenas_lock, "arenas", WITNESS_RANK_ARENAS,
-	    malloc_mutex_rank_exclusive)) {
+	        malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	hook_boot();
@@ -1984,7 +2051,8 @@ malloc_init_hard_a0_locked(void) {
 	a0 = arena_get(TSDN_NULL, 0, false);
 
 	if (opt_hpa && !hpa_supported()) {
-		malloc_printf("<jemalloc>: HPA not supported in the current "
+		malloc_printf(
+		    "<jemalloc>: HPA not supported in the current "
 		    "configuration; %s.",
 		    opt_abort_conf ? "aborting" : "disabling");
 		if (opt_abort_conf) {
@@ -2035,7 +2103,8 @@ malloc_init_hard_recursible(void) {
 			 */
 			if (opt_narenas == 0) {
 				opt_percpu_arena = percpu_arena_disabled;
-				malloc_write("<jemalloc>: Number of CPUs "
+				malloc_write(
+				    "<jemalloc>: Number of CPUs "
 				    "detected is not deterministic. Per-CPU "
 				    "arena disabled.\n");
 				if (opt_abort_conf) {
@@ -2049,11 +2118,12 @@ malloc_init_hard_recursible(void) {
 	}
 
 #if (defined(JEMALLOC_HAVE_PTHREAD_ATFORK) && !defined(JEMALLOC_MUTEX_INIT_CB) \
-    && !defined(JEMALLOC_ZONE) && !defined(_WIN32) && \
-    !defined(__native_client__))
+    && !defined(JEMALLOC_ZONE) && !defined(_WIN32)                             \
+    && !defined(__native_client__))
 	/* LinuxThreads' pthread_atfork() allocates. */
 	if (pthread_atfork(jemalloc_prefork, jemalloc_postfork_parent,
-	    jemalloc_postfork_child) != 0) {
+	        jemalloc_postfork_child)
+	    != 0) {
 		malloc_write("<jemalloc>: Error in pthread_atfork()\n");
 		if (opt_abort) {
 			abort();
@@ -2077,8 +2147,8 @@ malloc_narenas_default(void) {
 	 * default.
 	 */
 	if (ncpus > 1) {
-		fxp_t fxp_ncpus = FXP_INIT_INT(ncpus);
-		fxp_t goal = fxp_mul(fxp_ncpus, opt_narenas_ratio);
+		fxp_t    fxp_ncpus = FXP_INIT_INT(ncpus);
+		fxp_t    goal = fxp_mul(fxp_ncpus, opt_narenas_ratio);
 		uint32_t int_goal = fxp_round_nearest(goal);
 		if (int_goal == 0) {
 			return 1;
@@ -2108,28 +2178,35 @@ malloc_init_narenas(tsdn_t *tsdn) {
 	if (opt_percpu_arena != percpu_arena_disabled) {
 		if (!have_percpu_arena || malloc_getcpu() < 0) {
 			opt_percpu_arena = percpu_arena_disabled;
-			malloc_printf("<jemalloc>: perCPU arena getcpu() not "
-			    "available. Setting narenas to %u.\n", opt_narenas ?
-			    opt_narenas : malloc_narenas_default());
+			malloc_printf(
+			    "<jemalloc>: perCPU arena getcpu() not "
+			    "available. Setting narenas to %u.\n",
+			    opt_narenas ? opt_narenas
+			                : malloc_narenas_default());
 			if (opt_abort) {
 				abort();
 			}
 		} else {
 			if (ncpus >= MALLOCX_ARENA_LIMIT) {
-				malloc_printf("<jemalloc>: narenas w/ percpu"
-				    "arena beyond limit (%d)\n", ncpus);
+				malloc_printf(
+				    "<jemalloc>: narenas w/ percpu"
+				    "arena beyond limit (%d)\n",
+				    ncpus);
 				if (opt_abort) {
 					abort();
 				}
 				return true;
 			}
 			/* NB: opt_percpu_arena isn't fully initialized yet. */
-			if (percpu_arena_as_initialized(opt_percpu_arena) ==
-			    per_phycpu_arena && ncpus % 2 != 0) {
-				malloc_printf("<jemalloc>: invalid "
+			if (percpu_arena_as_initialized(opt_percpu_arena)
+			        == per_phycpu_arena
+			    && ncpus % 2 != 0) {
+				malloc_printf(
+				    "<jemalloc>: invalid "
 				    "configuration -- per physical CPU arena "
 				    "with odd number (%u) of CPUs (no hyper "
-				    "threading?).\n", ncpus);
+				    "threading?).\n",
+				    ncpus);
 				if (opt_abort)
 					abort();
 			}
@@ -2217,24 +2294,23 @@ malloc_init_hard(void) {
 	 * than LARGE_MINCLASS.  It could only happen if some constants
 	 * are configured miserably wrong.
 	 */
-	assert(SC_LG_TINY_MAXCLASS <=
-	    (size_t)1ULL << (LG_PAGE + SC_LG_NGROUP));
+	assert(SC_LG_TINY_MAXCLASS <= (size_t)1ULL << (LG_PAGE + SC_LG_NGROUP));
 
 #if defined(_WIN32) && _WIN32_WINNT < 0x0600
 	_init_init_lock();
 #endif
 	malloc_mutex_lock(TSDN_NULL, &init_lock);
 
-#define UNLOCK_RETURN(tsdn, ret, reentrancy)		\
-	malloc_init_hard_cleanup(tsdn, reentrancy);	\
+#define UNLOCK_RETURN(tsdn, ret, reentrancy)                                   \
+	malloc_init_hard_cleanup(tsdn, reentrancy);                            \
 	return ret;
 
 	if (!malloc_init_hard_needed()) {
 		UNLOCK_RETURN(TSDN_NULL, false, false)
 	}
 
-	if (malloc_init_state != malloc_init_a0_initialized &&
-	    malloc_init_hard_a0_locked()) {
+	if (malloc_init_state != malloc_init_a0_initialized
+	    && malloc_init_hard_a0_locked()) {
 		UNLOCK_RETURN(TSDN_NULL, true, false)
 	}
 
@@ -2262,11 +2338,11 @@ malloc_init_hard(void) {
 		 * background_thread_enabled wasn't initialized yet, but we
 		 * need it to set correct value for deferral_allowed.
 		 */
-		arena_t *a0 = arena_get(tsd_tsdn(tsd), 0, false);
+		arena_t         *a0 = arena_get(tsd_tsdn(tsd), 0, false);
 		hpa_shard_opts_t hpa_shard_opts = opt_hpa_opts;
 		hpa_shard_opts.deferral_allowed = background_thread_enabled();
 		if (pa_shard_enable_hpa(tsd_tsdn(tsd), &a0->pa_shard,
-		    &hpa_shard_opts, &opt_hpa_sec_opts)) {
+		        &hpa_shard_opts, &opt_hpa_sec_opts)) {
 			UNLOCK_RETURN(tsd_tsdn(tsd), true, true)
 		}
 	}
@@ -2282,8 +2358,8 @@ malloc_init_hard(void) {
 	post_reentrancy(tsd);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock);
 
-	witness_assert_lockless(witness_tsd_tsdn(
-	    tsd_witness_tsdp_get_unsafe(tsd)));
+	witness_assert_lockless(
+	    witness_tsd_tsdn(tsd_witness_tsdp_get_unsafe(tsd)));
 	malloc_tsd_boot1();
 	/* Update TSD after tsd_boot1. */
 	tsd = tsd_fetch();
@@ -2378,12 +2454,12 @@ static_opts_init(static_opts_t *static_opts) {
 
 typedef struct dynamic_opts_s dynamic_opts_t;
 struct dynamic_opts_s {
-	void **result;
-	size_t usize;
-	size_t num_items;
-	size_t item_size;
-	size_t alignment;
-	bool zero;
+	void   **result;
+	size_t   usize;
+	size_t   num_items;
+	size_t   item_size;
+	size_t   alignment;
+	bool     zero;
 	unsigned tcache_ind;
 	unsigned arena_ind;
 };
@@ -2414,8 +2490,9 @@ aligned_usize_get(size_t size, size_t alignment, size_t *usize, szind_t *ind,
 			if (unlikely(*ind >= SC_NSIZES)) {
 				return true;
 			}
-			*usize = sz_large_size_classes_disabled()? sz_s2u(size):
-			    sz_index2size(*ind);
+			*usize = sz_large_size_classes_disabled()
+			    ? sz_s2u(size)
+			    : sz_index2size(*ind);
 			assert(*usize > 0 && *usize <= SC_LARGE_MAXCLASS);
 			return false;
 		}
@@ -2465,8 +2542,8 @@ JEMALLOC_ALWAYS_INLINE void *
 imalloc_no_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
     size_t size, size_t usize, szind_t ind, bool slab) {
 	/* Fill in the tcache. */
-	tcache_t *tcache = tcache_get_from_ind(tsd, dopts->tcache_ind,
-	    sopts->slow, /* is_alloc */ true);
+	tcache_t *tcache = tcache_get_from_ind(
+	    tsd, dopts->tcache_ind, sopts->slow, /* is_alloc */ true);
 
 	/* Fill in the arena. */
 	arena_t *arena;
@@ -2496,7 +2573,7 @@ imalloc_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
 	 */
 	if (sz_can_use_slab(usize)) {
 		assert((dopts->alignment & PROF_SAMPLE_ALIGNMENT_MASK) == 0);
-		size_t bumped_usize = sz_sa2u(usize, dopts->alignment);
+		size_t  bumped_usize = sz_sa2u(usize, dopts->alignment);
 		szind_t bumped_ind = sz_size2index(bumped_usize);
 		dopts->tcache_ind = TCACHE_IND_NONE;
 		ret = imalloc_no_sample(sopts, dopts, tsd, bumped_usize,
@@ -2519,8 +2596,8 @@ imalloc_sample(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd,
  * *size to the product either way.
  */
 JEMALLOC_ALWAYS_INLINE bool
-compute_size_with_overflow(bool may_overflow, dynamic_opts_t *dopts,
-    size_t *size) {
+compute_size_with_overflow(
+    bool may_overflow, dynamic_opts_t *dopts, size_t *size) {
 	/*
 	 * This function is just num_items * item_size, except that we may have
 	 * to check for overflow.
@@ -2576,26 +2653,26 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 	int8_t reentrancy_level;
 
 	/* Compute the amount of memory the user wants. */
-	if (unlikely(compute_size_with_overflow(sopts->may_overflow, dopts,
-	    &size))) {
+	if (unlikely(compute_size_with_overflow(
+	        sopts->may_overflow, dopts, &size))) {
 		goto label_oom;
 	}
 
 	if (unlikely(dopts->alignment < sopts->min_alignment
-	    || (dopts->alignment & (dopts->alignment - 1)) != 0)) {
+	        || (dopts->alignment & (dopts->alignment - 1)) != 0)) {
 		goto label_invalid_alignment;
 	}
 
 	/* This is the beginning of the "core" algorithm. */
 	dopts->zero = zero_get(dopts->zero, sopts->slow);
 	if (aligned_usize_get(size, dopts->alignment, &usize, &ind,
-	    sopts->bump_empty_aligned_alloc)) {
+	        sopts->bump_empty_aligned_alloc)) {
 		goto label_oom;
 	}
 	dopts->usize = usize;
 	/* Validate the user input. */
 	if (sopts->assert_nonempty_alloc) {
-		assert (size != 0);
+		assert(size != 0);
 	}
 
 	check_entry_exit_locking(tsd_tsdn(tsd));
@@ -2610,8 +2687,8 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 		 * We should never specify particular arenas or tcaches from
 		 * within our internal allocations.
 		 */
-		assert(dopts->tcache_ind == TCACHE_IND_AUTOMATIC ||
-		    dopts->tcache_ind == TCACHE_IND_NONE);
+		assert(dopts->tcache_ind == TCACHE_IND_AUTOMATIC
+		    || dopts->tcache_ind == TCACHE_IND_NONE);
 		assert(dopts->arena_ind == ARENA_IND_AUTOMATIC);
 		dopts->tcache_ind = TCACHE_IND_NONE;
 		/* We know that arena 0 has already been initialized. */
@@ -2628,15 +2705,14 @@ imalloc_body(static_opts_t *sopts, dynamic_opts_t *dopts, tsd_t *tsd) {
 	if (config_prof && opt_prof) {
 		bool prof_active = prof_active_get_unlocked();
 		bool sample_event = te_prof_sample_event_lookahead(tsd, usize);
-		prof_tctx_t *tctx = prof_alloc_prep(tsd, prof_active,
-		    sample_event);
+		prof_tctx_t *tctx = prof_alloc_prep(
+		    tsd, prof_active, sample_event);
 
 		emap_alloc_ctx_t alloc_ctx;
 		if (likely(tctx == PROF_TCTX_SENTINEL)) {
 			alloc_ctx.slab = sz_can_use_slab(usize);
-			allocation = imalloc_no_sample(
-			    sopts, dopts, tsd, usize, usize, ind,
-			    alloc_ctx.slab);
+			allocation = imalloc_no_sample(sopts, dopts, tsd, usize,
+			    usize, ind, alloc_ctx.slab);
 		} else if (tctx != NULL) {
 			allocation = imalloc_sample(
 			    sopts, dopts, tsd, usize, ind);
@@ -2780,8 +2856,8 @@ imalloc(static_opts_t *sopts, dynamic_opts_t *dopts) {
 JEMALLOC_NOINLINE
 void *
 malloc_default(size_t size) {
-	void *ret;
-	static_opts_t sopts;
+	void          *ret;
+	static_opts_t  sopts;
 	dynamic_opts_t dopts;
 
 	/*
@@ -2819,13 +2895,12 @@ malloc_default(size_t size) {
  * Begin malloc(3)-compatible functions.
  */
 
-JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-void JEMALLOC_NOTHROW *
-JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)
-je_malloc(size_t size) {
+JEMALLOC_EXPORT
+    JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+    JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1) je_malloc(size_t size) {
 	LOG("core.malloc.entry", "size: %zu", size);
 
-	void * ret = imalloc_fastpath(size, &malloc_default);
+	void *ret = imalloc_fastpath(size, &malloc_default);
 
 	LOG("core.malloc.exit", "result: %p", ret);
 	return ret;
@@ -2833,13 +2908,15 @@ je_malloc(size_t size) {
 
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 JEMALLOC_ATTR(nonnull(1))
-je_posix_memalign(void **memptr, size_t alignment, size_t size) {
-	int ret;
-	static_opts_t sopts;
+    je_posix_memalign(void **memptr, size_t alignment, size_t size) {
+	int            ret;
+	static_opts_t  sopts;
 	dynamic_opts_t dopts;
 
-	LOG("core.posix_memalign.entry", "mem ptr: %p, alignment: %zu, "
-	    "size: %zu", memptr, alignment, size);
+	LOG("core.posix_memalign.entry",
+	    "mem ptr: %p, alignment: %zu, "
+	    "size: %zu",
+	    memptr, alignment, size);
 
 	static_opts_init(&sopts);
 	dynamic_opts_init(&dopts);
@@ -2858,10 +2935,10 @@ je_posix_memalign(void **memptr, size_t alignment, size_t size) {
 
 	ret = imalloc(&sopts, &dopts);
 	if (sopts.slow) {
-		uintptr_t args[3] = {(uintptr_t)memptr, (uintptr_t)alignment,
-			(uintptr_t)size};
-		hook_invoke_alloc(hook_alloc_posix_memalign, *memptr,
-		    (uintptr_t)ret, args);
+		uintptr_t args[3] = {
+		    (uintptr_t)memptr, (uintptr_t)alignment, (uintptr_t)size};
+		hook_invoke_alloc(
+		    hook_alloc_posix_memalign, *memptr, (uintptr_t)ret, args);
 	}
 
 	LOG("core.posix_memalign.exit", "result: %d, alloc ptr: %p", ret,
@@ -2870,13 +2947,13 @@ je_posix_memalign(void **memptr, size_t alignment, size_t size) {
 	return ret;
 }
 
-JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-void JEMALLOC_NOTHROW *
-JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(2)
-je_aligned_alloc(size_t alignment, size_t size) {
+JEMALLOC_EXPORT
+    JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+    JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(2)
+        je_aligned_alloc(size_t alignment, size_t size) {
 	void *ret;
 
-	static_opts_t sopts;
+	static_opts_t  sopts;
 	dynamic_opts_t dopts;
 
 	LOG("core.aligned_alloc.entry", "alignment: %zu, size: %zu\n",
@@ -2902,8 +2979,8 @@ je_aligned_alloc(size_t alignment, size_t size) {
 	imalloc(&sopts, &dopts);
 	if (sopts.slow) {
 		uintptr_t args[3] = {(uintptr_t)alignment, (uintptr_t)size};
-		hook_invoke_alloc(hook_alloc_aligned_alloc, ret,
-		    (uintptr_t)ret, args);
+		hook_invoke_alloc(
+		    hook_alloc_aligned_alloc, ret, (uintptr_t)ret, args);
 	}
 
 	LOG("core.aligned_alloc.exit", "result: %p", ret);
@@ -2911,12 +2988,12 @@ je_aligned_alloc(size_t alignment, size_t size) {
 	return ret;
 }
 
-JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-void JEMALLOC_NOTHROW *
-JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2)
-je_calloc(size_t num, size_t size) {
-	void *ret;
-	static_opts_t sopts;
+JEMALLOC_EXPORT
+    JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+    JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2)
+        je_calloc(size_t num, size_t size) {
+	void          *ret;
+	static_opts_t  sopts;
 	dynamic_opts_t dopts;
 
 	LOG("core.calloc.entry", "num: %zu, size: %zu", num, size);
@@ -2959,8 +3036,8 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	emap_alloc_ctx_t alloc_ctx;
-	emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
-	    &alloc_ctx);
+	emap_alloc_ctx_lookup(
+	    tsd_tsdn(tsd), &arena_emap_global, ptr, &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
 
 	size_t usize = emap_alloc_ctx_usize_get(&alloc_ctx);
@@ -2969,14 +3046,12 @@ ifree(tsd_t *tsd, void *ptr, tcache_t *tcache, bool slow_path) {
 	}
 
 	if (likely(!slow_path)) {
-		idalloctm(tsd_tsdn(tsd), ptr, tcache, &alloc_ctx, false,
-		    false);
+		idalloctm(tsd_tsdn(tsd), ptr, tcache, &alloc_ctx, false, false);
 	} else {
 		if (config_fill && slow_path && opt_junk_free) {
 			junk_free_callback(ptr, usize);
 		}
-		idalloctm(tsd_tsdn(tsd), ptr, tcache, &alloc_ctx, false,
-		    true);
+		idalloctm(tsd_tsdn(tsd), ptr, tcache, &alloc_ctx, false, true);
 	}
 	thread_dalloc_event(tsd, usize);
 }
@@ -2995,32 +3070,32 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	emap_alloc_ctx_t alloc_ctx;
-	szind_t szind = sz_size2index(usize);
+	szind_t          szind = sz_size2index(usize);
 	if (!config_prof) {
-		emap_alloc_ctx_init(&alloc_ctx, szind, (szind < SC_NBINS),
-		    usize);
+		emap_alloc_ctx_init(
+		    &alloc_ctx, szind, (szind < SC_NBINS), usize);
 	} else {
 		if (likely(!prof_sample_aligned(ptr))) {
 			/*
 			 * When the ptr is not page aligned, it was not sampled.
 			 * usize can be trusted to determine szind and slab.
 			 */
-			emap_alloc_ctx_init(&alloc_ctx, szind,
-			    (szind < SC_NBINS), usize);
+			emap_alloc_ctx_init(
+			    &alloc_ctx, szind, (szind < SC_NBINS), usize);
 		} else if (opt_prof) {
 			/*
 			 * Small sampled allocs promoted can still get correct
 			 * usize here.  Check comments in edata_usize_get.
 			 */
-			emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global,
-			    ptr, &alloc_ctx);
+			emap_alloc_ctx_lookup(
+			    tsd_tsdn(tsd), &arena_emap_global, ptr, &alloc_ctx);
 
 			if (config_opt_safety_checks) {
 				/* Small alloc may have !slab (sampled). */
-				size_t true_size =
-				    emap_alloc_ctx_usize_get(&alloc_ctx);
-				if (unlikely(alloc_ctx.szind !=
-				    sz_size2index(usize))) {
+				size_t true_size = emap_alloc_ctx_usize_get(
+				    &alloc_ctx);
+				if (unlikely(alloc_ctx.szind
+				        != sz_size2index(usize))) {
 					safety_check_fail_sized_dealloc(
 					    /* current_dealloc */ true, ptr,
 					    /* true_size */ true_size,
@@ -3028,8 +3103,8 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 				}
 			}
 		} else {
-			emap_alloc_ctx_init(&alloc_ctx, szind,
-			    (szind < SC_NBINS), usize);
+			emap_alloc_ctx_init(
+			    &alloc_ctx, szind, (szind < SC_NBINS), usize);
 		}
 	}
 	bool fail = maybe_check_alloc_ctx(tsd, ptr, &alloc_ctx);
@@ -3047,14 +3122,12 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) {
 		prof_free(tsd, ptr, usize, &alloc_ctx);
 	}
 	if (likely(!slow_path)) {
-		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, &alloc_ctx,
-		    false);
+		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, &alloc_ctx, false);
 	} else {
 		if (config_fill && slow_path && opt_junk_free) {
 			junk_free_callback(ptr, usize);
 		}
-		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, &alloc_ctx,
-		    true);
+		isdalloct(tsd_tsdn(tsd), ptr, usize, tcache, &alloc_ctx, true);
 	}
 	thread_dalloc_event(tsd, usize);
 }
@@ -3125,12 +3198,11 @@ je_free_aligned_sized(void *ptr, size_t alignment, size_t size) {
  */
 
 #ifdef JEMALLOC_OVERRIDE_MEMALIGN
-JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-void JEMALLOC_NOTHROW *
-JEMALLOC_ATTR(malloc)
-je_memalign(size_t alignment, size_t size) {
-	void *ret;
-	static_opts_t sopts;
+JEMALLOC_EXPORT
+    JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+    JEMALLOC_ATTR(malloc) je_memalign(size_t alignment, size_t size) {
+	void          *ret;
+	static_opts_t  sopts;
 	dynamic_opts_t dopts;
 
 	LOG("core.memalign.entry", "alignment: %zu, size: %zu\n", alignment,
@@ -3155,8 +3227,8 @@ je_memalign(size_t alignment, size_t size) {
 	imalloc(&sopts, &dopts);
 	if (sopts.slow) {
 		uintptr_t args[3] = {alignment, size};
-		hook_invoke_alloc(hook_alloc_memalign, ret, (uintptr_t)ret,
-		    args);
+		hook_invoke_alloc(
+		    hook_alloc_memalign, ret, (uintptr_t)ret, args);
 	}
 
 	LOG("core.memalign.exit", "result: %p", ret);
@@ -3165,13 +3237,12 @@ je_memalign(size_t alignment, size_t size) {
 #endif
 
 #ifdef JEMALLOC_OVERRIDE_VALLOC
-JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-void JEMALLOC_NOTHROW *
-JEMALLOC_ATTR(malloc)
-je_valloc(size_t size) {
+JEMALLOC_EXPORT
+    JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+    JEMALLOC_ATTR(malloc) je_valloc(size_t size) {
 	void *ret;
 
-	static_opts_t sopts;
+	static_opts_t  sopts;
 	dynamic_opts_t dopts;
 
 	LOG("core.valloc.entry", "size: %zu\n", size);
@@ -3203,13 +3274,12 @@ je_valloc(size_t size) {
 #endif
 
 #ifdef JEMALLOC_OVERRIDE_PVALLOC
-JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-void JEMALLOC_NOTHROW *
-JEMALLOC_ATTR(malloc)
-je_pvalloc(size_t size) {
+JEMALLOC_EXPORT
+    JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+    JEMALLOC_ATTR(malloc) je_pvalloc(size_t size) {
 	void *ret;
 
-	static_opts_t sopts;
+	static_opts_t  sopts;
 	dynamic_opts_t dopts;
 
 	LOG("core.pvalloc.entry", "size: %zu\n", size);
@@ -3236,8 +3306,8 @@ je_pvalloc(size_t size) {
 	imalloc(&sopts, &dopts);
 	if (sopts.slow) {
 		uintptr_t args[3] = {size};
-		hook_invoke_alloc(hook_alloc_pvalloc, ret, (uintptr_t)ret,
-		    args);
+		hook_invoke_alloc(
+		    hook_alloc_pvalloc, ret, (uintptr_t)ret, args);
 	}
 
 	LOG("core.pvalloc.exit", "result: %p\n", ret);
@@ -3255,59 +3325,59 @@ je_pvalloc(size_t size) {
  * passed an extra argument for the caller return address, which will be
  * ignored.
  */
-#include <features.h> // defines __GLIBC__ if we are compiling against glibc
+#	include <features.h> // defines __GLIBC__ if we are compiling against glibc
 
 JEMALLOC_EXPORT void (*__free_hook)(void *ptr) = je_free;
 JEMALLOC_EXPORT void *(*__malloc_hook)(size_t size) = je_malloc;
 JEMALLOC_EXPORT void *(*__realloc_hook)(void *ptr, size_t size) = je_realloc;
-#  ifdef JEMALLOC_GLIBC_MEMALIGN_HOOK
-JEMALLOC_EXPORT void *(*__memalign_hook)(size_t alignment, size_t size) =
-    je_memalign;
-#  endif
+#	ifdef JEMALLOC_GLIBC_MEMALIGN_HOOK
+JEMALLOC_EXPORT void *(*__memalign_hook)(
+    size_t alignment, size_t size) = je_memalign;
+#	endif
 
-#  ifdef __GLIBC__
+#	ifdef __GLIBC__
 /*
  * To enable static linking with glibc, the libc specific malloc interface must
  * be implemented also, so none of glibc's malloc.o functions are added to the
  * link.
  */
-#    define ALIAS(je_fn)	__attribute__((alias (#je_fn), used))
+#		define ALIAS(je_fn) __attribute__((alias(#je_fn), used))
 /* To force macro expansion of je_ prefix before stringification. */
-#    define PREALIAS(je_fn)	ALIAS(je_fn)
-#    ifdef JEMALLOC_OVERRIDE___LIBC_CALLOC
+#		define PREALIAS(je_fn) ALIAS(je_fn)
+#		ifdef JEMALLOC_OVERRIDE___LIBC_CALLOC
 void *__libc_calloc(size_t n, size_t size) PREALIAS(je_calloc);
-#    endif
-#    ifdef JEMALLOC_OVERRIDE___LIBC_FREE
-void __libc_free(void* ptr) PREALIAS(je_free);
-#    endif
-#    ifdef JEMALLOC_OVERRIDE___LIBC_FREE_SIZED
-void __libc_free_sized(void* ptr, size_t size) PREALIAS(je_free_sized);
-#    endif
-#    ifdef JEMALLOC_OVERRIDE___LIBC_FREE_ALIGNED_SIZED
-void __libc_free_aligned_sized(
-    void* ptr, size_t alignment, size_t size) PREALIAS(je_free_aligned_sized);
-#    endif
-#    ifdef JEMALLOC_OVERRIDE___LIBC_MALLOC
+#		endif
+#		ifdef JEMALLOC_OVERRIDE___LIBC_FREE
+void __libc_free(void *ptr) PREALIAS(je_free);
+#		endif
+#		ifdef JEMALLOC_OVERRIDE___LIBC_FREE_SIZED
+void __libc_free_sized(void *ptr, size_t size) PREALIAS(je_free_sized);
+#		endif
+#		ifdef JEMALLOC_OVERRIDE___LIBC_FREE_ALIGNED_SIZED
+void __libc_free_aligned_sized(void *ptr, size_t alignment, size_t size)
+    PREALIAS(je_free_aligned_sized);
+#		endif
+#		ifdef JEMALLOC_OVERRIDE___LIBC_MALLOC
 void *__libc_malloc(size_t size) PREALIAS(je_malloc);
-#    endif
-#    ifdef JEMALLOC_OVERRIDE___LIBC_MEMALIGN
+#		endif
+#		ifdef JEMALLOC_OVERRIDE___LIBC_MEMALIGN
 void *__libc_memalign(size_t align, size_t s) PREALIAS(je_memalign);
-#    endif
-#    ifdef JEMALLOC_OVERRIDE___LIBC_REALLOC
-void *__libc_realloc(void* ptr, size_t size) PREALIAS(je_realloc);
-#    endif
-#    ifdef JEMALLOC_OVERRIDE___LIBC_VALLOC
+#		endif
+#		ifdef JEMALLOC_OVERRIDE___LIBC_REALLOC
+void *__libc_realloc(void *ptr, size_t size) PREALIAS(je_realloc);
+#		endif
+#		ifdef JEMALLOC_OVERRIDE___LIBC_VALLOC
 void *__libc_valloc(size_t size) PREALIAS(je_valloc);
-#    endif
-#    ifdef JEMALLOC_OVERRIDE___LIBC_PVALLOC
+#		endif
+#		ifdef JEMALLOC_OVERRIDE___LIBC_PVALLOC
 void *__libc_pvalloc(size_t size) PREALIAS(je_pvalloc);
-#    endif
-#    ifdef JEMALLOC_OVERRIDE___POSIX_MEMALIGN
-int __posix_memalign(void** r, size_t a, size_t s) PREALIAS(je_posix_memalign);
-#    endif
-#    undef PREALIAS
-#    undef ALIAS
-#  endif
+#		endif
+#		ifdef JEMALLOC_OVERRIDE___POSIX_MEMALIGN
+int __posix_memalign(void **r, size_t a, size_t s) PREALIAS(je_posix_memalign);
+#		endif
+#		undef PREALIAS
+#		undef ALIAS
+#	endif
 #endif
 
 /*
@@ -3340,23 +3410,23 @@ mallocx_arena_get(int flags) {
 
 #ifdef JEMALLOC_EXPERIMENTAL_SMALLOCX_API
 
-#define JEMALLOC_SMALLOCX_CONCAT_HELPER(x, y) x ## y
-#define JEMALLOC_SMALLOCX_CONCAT_HELPER2(x, y)  \
-  JEMALLOC_SMALLOCX_CONCAT_HELPER(x, y)
+#	define JEMALLOC_SMALLOCX_CONCAT_HELPER(x, y) x##y
+#	define JEMALLOC_SMALLOCX_CONCAT_HELPER2(x, y)                         \
+		JEMALLOC_SMALLOCX_CONCAT_HELPER(x, y)
 
 typedef struct {
-	void *ptr;
+	void  *ptr;
 	size_t size;
 } smallocx_return_t;
 
-JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-smallocx_return_t JEMALLOC_NOTHROW
-/*
+JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN smallocx_return_t
+    JEMALLOC_NOTHROW
+    /*
  * The attribute JEMALLOC_ATTR(malloc) cannot be used due to:
  *  - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86488
  */
-JEMALLOC_SMALLOCX_CONCAT_HELPER2(je_smallocx_, JEMALLOC_VERSION_GID_IDENT)
-  (size_t size, int flags) {
+    JEMALLOC_SMALLOCX_CONCAT_HELPER2(je_smallocx_, JEMALLOC_VERSION_GID_IDENT)(
+        size_t size, int flags) {
 	/*
 	 * Note: the attribute JEMALLOC_ALLOC_SIZE(1) cannot be
 	 * used here because it makes writing beyond the `size`
@@ -3365,8 +3435,8 @@ JEMALLOC_SMALLOCX_CONCAT_HELPER2(je_smallocx_, JEMALLOC_VERSION_GID_IDENT)
 	 * up to `smallocx_return_t::size`.
 	 */
 	smallocx_return_t ret;
-	static_opts_t sopts;
-	dynamic_opts_t dopts;
+	static_opts_t     sopts;
+	dynamic_opts_t    dopts;
 
 	LOG("core.smallocx.entry", "size: %zu, flags: %d", size, flags);
 
@@ -3395,16 +3465,16 @@ JEMALLOC_SMALLOCX_CONCAT_HELPER2(je_smallocx_, JEMALLOC_VERSION_GID_IDENT)
 	LOG("core.smallocx.exit", "result: %p, size: %zu", ret.ptr, ret.size);
 	return ret;
 }
-#undef JEMALLOC_SMALLOCX_CONCAT_HELPER
-#undef JEMALLOC_SMALLOCX_CONCAT_HELPER2
+#	undef JEMALLOC_SMALLOCX_CONCAT_HELPER
+#	undef JEMALLOC_SMALLOCX_CONCAT_HELPER2
 #endif
 
-JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-void JEMALLOC_NOTHROW *
-JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)
-je_mallocx(size_t size, int flags) {
-	void *ret;
-	static_opts_t sopts;
+JEMALLOC_EXPORT
+    JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+    JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)
+        je_mallocx(size_t size, int flags) {
+	void          *ret;
+	static_opts_t  sopts;
 	dynamic_opts_t dopts;
 
 	LOG("core.mallocx.entry", "size: %zu, flags: %d", size, flags);
@@ -3429,8 +3499,8 @@ je_mallocx(size_t size, int flags) {
 	imalloc(&sopts, &dopts);
 	if (sopts.slow) {
 		uintptr_t args[3] = {size, flags};
-		hook_invoke_alloc(hook_alloc_mallocx, ret, (uintptr_t)ret,
-		    args);
+		hook_invoke_alloc(
+		    hook_alloc_mallocx, ret, (uintptr_t)ret, args);
 	}
 
 	LOG("core.mallocx.exit", "result: %p", ret);
@@ -3456,8 +3526,8 @@ irallocx_prof_sample(tsdn_t *tsdn, void *old_ptr, size_t old_usize,
 	if (sz_can_use_slab(usize)) {
 		size_t bumped_usize = sz_sa2u(usize, alignment);
 		p = iralloct_explicit_slab(tsdn, old_ptr, old_usize,
-		    bumped_usize, alignment, zero, /* slab */ false,
-		    tcache, arena, hook_args);
+		    bumped_usize, alignment, zero, /* slab */ false, tcache,
+		    arena, hook_args);
 		if (p == NULL) {
 			return NULL;
 		}
@@ -3474,15 +3544,14 @@ irallocx_prof_sample(tsdn_t *tsdn, void *old_ptr, size_t old_usize,
 
 JEMALLOC_ALWAYS_INLINE void *
 irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
-    size_t alignment, size_t usize, bool zero, tcache_t *tcache,
-    arena_t *arena, emap_alloc_ctx_t *alloc_ctx,
-    hook_ralloc_args_t *hook_args) {
+    size_t alignment, size_t usize, bool zero, tcache_t *tcache, arena_t *arena,
+    emap_alloc_ctx_t *alloc_ctx, hook_ralloc_args_t *hook_args) {
 	prof_info_t old_prof_info;
 	prof_info_get_and_reset_recent(tsd, old_ptr, alloc_ctx, &old_prof_info);
-	bool prof_active = prof_active_get_unlocked();
-	bool sample_event = te_prof_sample_event_lookahead(tsd, usize);
+	bool         prof_active = prof_active_get_unlocked();
+	bool         sample_event = te_prof_sample_event_lookahead(tsd, usize);
 	prof_tctx_t *tctx = prof_alloc_prep(tsd, prof_active, sample_event);
-	void *p;
+	void        *p;
 	if (unlikely(tctx != PROF_TCTX_SENTINEL)) {
 		p = irallocx_prof_sample(tsd_tsdn(tsd), old_ptr, old_usize,
 		    usize, alignment, zero, tcache, arena, tctx, hook_args);
@@ -3495,19 +3564,19 @@ irallocx_prof(tsd_t *tsd, void *old_ptr, size_t old_usize, size_t size,
 		return NULL;
 	}
 	assert(usize == isalloc(tsd_tsdn(tsd), p));
-	prof_realloc(tsd, p, size, usize, tctx, prof_active, old_ptr,
-	    old_usize, &old_prof_info, sample_event);
+	prof_realloc(tsd, p, size, usize, tctx, prof_active, old_ptr, old_usize,
+	    &old_prof_info, sample_event);
 
 	return p;
 }
 
 static void *
 do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
-	void *p;
-	tsd_t *tsd;
-	size_t usize;
-	size_t old_usize;
-	size_t alignment = MALLOCX_ALIGN_GET(flags);
+	void    *p;
+	tsd_t   *tsd;
+	size_t   usize;
+	size_t   old_usize;
+	size_t   alignment = MALLOCX_ALIGN_GET(flags);
 	arena_t *arena;
 
 	assert(ptr != NULL);
@@ -3523,13 +3592,13 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 		goto label_oom;
 	}
 
-	unsigned tcache_ind = mallocx_tcache_get(flags);
+	unsigned  tcache_ind = mallocx_tcache_get(flags);
 	tcache_t *tcache = tcache_get_from_ind(tsd, tcache_ind,
 	    /* slow */ true, /* is_alloc */ true);
 
 	emap_alloc_ctx_t alloc_ctx;
-	emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
-	    &alloc_ctx);
+	emap_alloc_ctx_lookup(
+	    tsd_tsdn(tsd), &arena_emap_global, ptr, &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
 	old_usize = emap_alloc_ctx_usize_get(&alloc_ctx);
 	assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
@@ -3537,8 +3606,8 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 		goto label_oom;
 	}
 
-	hook_ralloc_args_t hook_args = {is_realloc, {(uintptr_t)ptr, size,
-		flags, 0}};
+	hook_ralloc_args_t hook_args = {
+	    is_realloc, {(uintptr_t)ptr, size, flags, 0}};
 	if (config_prof && opt_prof) {
 		p = irallocx_prof(tsd, ptr, old_usize, size, alignment, usize,
 		    zero, tcache, arena, &alloc_ctx, &hook_args);
@@ -3563,7 +3632,7 @@ do_rallocx(void *ptr, size_t size, int flags, bool is_realloc) {
 	if (config_fill && unlikely(opt_junk_alloc) && usize > old_usize
 	    && !zero) {
 		size_t excess_len = usize - old_usize;
-		void *excess_start = (void *)((byte_t *)p + old_usize);
+		void  *excess_start = (void *)((byte_t *)p + old_usize);
 		junk_alloc_callback(excess_start, excess_len);
 	}
 
@@ -3582,12 +3651,11 @@ label_oom:
 	return NULL;
 }
 
-JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-void JEMALLOC_NOTHROW *
-JEMALLOC_ALLOC_SIZE(2)
-je_rallocx(void *ptr, size_t size, int flags) {
-	LOG("core.rallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr,
-	    size, flags);
+JEMALLOC_EXPORT
+    JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+    JEMALLOC_ALLOC_SIZE(2) je_rallocx(void *ptr, size_t size, int flags) {
+	LOG("core.rallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr, size,
+	    flags);
 	void *ret = do_rallocx(ptr, size, flags, false);
 	LOG("core.rallocx.exit", "result: %p", ret);
 	return ret;
@@ -3621,7 +3689,8 @@ do_realloc_nonnull_zero(void *ptr) {
 		check_entry_exit_locking(tsd_tsdn(tsd));
 		return NULL;
 	} else {
-		safety_check_fail("Called realloc(non-null-ptr, 0) with "
+		safety_check_fail(
+		    "Called realloc(non-null-ptr, 0) with "
 		    "zero_realloc:abort set\n");
 		/* In real code, this will never run; the safety check failure
 		 * will call abort.  In the unit test, we just want to bail out
@@ -3632,10 +3701,9 @@ do_realloc_nonnull_zero(void *ptr) {
 	}
 }
 
-JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN
-void JEMALLOC_NOTHROW *
-JEMALLOC_ALLOC_SIZE(2)
-je_realloc(void *ptr, size_t size) {
+JEMALLOC_EXPORT
+    JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+    JEMALLOC_ALLOC_SIZE(2) je_realloc(void *ptr, size_t size) {
 	LOG("core.realloc.entry", "ptr: %p, size: %zu\n", ptr, size);
 
 	if (likely(ptr != NULL && size != 0)) {
@@ -3650,7 +3718,7 @@ je_realloc(void *ptr, size_t size) {
 		/* realloc(NULL, size) is equivalent to malloc(size). */
 		void *ret;
 
-		static_opts_t sopts;
+		static_opts_t  sopts;
 		dynamic_opts_t dopts;
 
 		static_opts_init(&sopts);
@@ -3668,8 +3736,8 @@ je_realloc(void *ptr, size_t size) {
 		imalloc(&sopts, &dopts);
 		if (sopts.slow) {
 			uintptr_t args[3] = {(uintptr_t)ptr, size};
-			hook_invoke_alloc(hook_alloc_realloc, ret,
-			    (uintptr_t)ret, args);
+			hook_invoke_alloc(
+			    hook_alloc_realloc, ret, (uintptr_t)ret, args);
 		}
 		LOG("core.realloc.exit", "result: %p", ret);
 		return ret;
@@ -3681,8 +3749,8 @@ ixallocx_helper(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
     size_t extra, size_t alignment, bool zero) {
 	size_t newsize;
 
-	if (ixalloc(tsdn, ptr, old_usize, size, extra, alignment, zero,
-	    &newsize)) {
+	if (ixalloc(
+	        tsdn, ptr, old_usize, size, extra, alignment, zero, &newsize)) {
 		return old_usize;
 	}
 
@@ -3697,8 +3765,8 @@ ixallocx_prof_sample(tsdn_t *tsdn, void *ptr, size_t old_usize, size_t size,
 		return old_usize;
 	}
 
-	return ixallocx_helper(tsdn, ptr, old_usize, size, extra, alignment,
-	    zero);
+	return ixallocx_helper(
+	    tsdn, ptr, old_usize, size, extra, alignment, zero);
 }
 
 JEMALLOC_ALWAYS_INLINE size_t
@@ -3718,8 +3786,8 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 	 * prof_realloc() will use the actual usize to decide whether to sample.
 	 */
 	size_t usize_max;
-	if (aligned_usize_get(size + extra, alignment, &usize_max, NULL,
-	    false)) {
+	if (aligned_usize_get(
+	        size + extra, alignment, &usize_max, NULL, false)) {
 		/*
 		 * usize_max is out of range, and chances are that allocation
 		 * will fail, but use the maximum possible value and carry on
@@ -3758,10 +3826,10 @@ ixallocx_prof(tsd_t *tsd, void *ptr, size_t old_usize, size_t size,
 		 * to edata has already been done.
 		 */
 		emap_alloc_ctx_t new_alloc_ctx;
-		emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
-		    &new_alloc_ctx);
-		prof_info_get_and_reset_recent(tsd, ptr, &new_alloc_ctx,
-		    &prof_info);
+		emap_alloc_ctx_lookup(
+		    tsd_tsdn(tsd), &arena_emap_global, ptr, &new_alloc_ctx);
+		prof_info_get_and_reset_recent(
+		    tsd, ptr, &new_alloc_ctx, &prof_info);
 		assert(usize <= usize_max);
 		sample_event = te_prof_sample_event_lookahead(tsd, usize);
 		prof_realloc(tsd, ptr, size, usize, tctx, prof_active, ptr,
@@ -3777,10 +3845,12 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	tsd_t *tsd;
 	size_t usize, old_usize;
 	size_t alignment = MALLOCX_ALIGN_GET(flags);
-	bool zero = zero_get(MALLOCX_ZERO_GET(flags), /* slow */ true);
+	bool   zero = zero_get(MALLOCX_ZERO_GET(flags), /* slow */ true);
 
-	LOG("core.xallocx.entry", "ptr: %p, size: %zu, extra: %zu, "
-	    "flags: %d", ptr, size, extra, flags);
+	LOG("core.xallocx.entry",
+	    "ptr: %p, size: %zu, extra: %zu, "
+	    "flags: %d",
+	    ptr, size, extra, flags);
 
 	assert(ptr != NULL);
 	assert(size != 0);
@@ -3794,12 +3864,12 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	 * object associated with the ptr (though the content of the edata_t
 	 * object can be changed).
 	 */
-	edata_t *old_edata = emap_edata_lookup(tsd_tsdn(tsd),
-	    &arena_emap_global, ptr);
+	edata_t *old_edata = emap_edata_lookup(
+	    tsd_tsdn(tsd), &arena_emap_global, ptr);
 
 	emap_alloc_ctx_t alloc_ctx;
-	emap_alloc_ctx_lookup(tsd_tsdn(tsd), &arena_emap_global, ptr,
-	    &alloc_ctx);
+	emap_alloc_ctx_lookup(
+	    tsd_tsdn(tsd), &arena_emap_global, ptr, &alloc_ctx);
 	assert(alloc_ctx.szind != SC_NSIZES);
 	old_usize = emap_alloc_ctx_usize_get(&alloc_ctx);
 	assert(old_usize == isalloc(tsd_tsdn(tsd), ptr));
@@ -3841,17 +3911,17 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) {
 	thread_alloc_event(tsd, usize);
 	thread_dalloc_event(tsd, old_usize);
 
-	if (config_fill && unlikely(opt_junk_alloc) && usize > old_usize &&
-	    !zero) {
+	if (config_fill && unlikely(opt_junk_alloc) && usize > old_usize
+	    && !zero) {
 		size_t excess_len = usize - old_usize;
-		void *excess_start = (void *)((byte_t *)ptr + old_usize);
+		void  *excess_start = (void *)((byte_t *)ptr + old_usize);
 		junk_alloc_callback(excess_start, excess_len);
 	}
 label_not_resized:
 	if (unlikely(!tsd_fast(tsd))) {
 		uintptr_t args[4] = {(uintptr_t)ptr, size, extra, flags};
-		hook_invoke_expand(hook_expand_xallocx, ptr, old_usize,
-		    usize, (uintptr_t)usize, args);
+		hook_invoke_expand(hook_expand_xallocx, ptr, old_usize, usize,
+		    (uintptr_t)usize, args);
 	}
 
 	UTRACE(ptr, size, ptr);
@@ -3862,9 +3932,8 @@ label_not_resized:
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
-JEMALLOC_ATTR(pure)
-je_sallocx(const void *ptr, int flags) {
-	size_t usize;
+JEMALLOC_ATTR(pure) je_sallocx(const void *ptr, int flags) {
+	size_t  usize;
 	tsdn_t *tsdn;
 
 	LOG("core.sallocx.entry", "ptr: %p, flags: %d", ptr, flags);
@@ -3896,10 +3965,10 @@ je_dallocx(void *ptr, int flags) {
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	tsd_t *tsd = tsd_fetch_min();
-	bool fast = tsd_fast(tsd);
+	bool   fast = tsd_fast(tsd);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	unsigned tcache_ind = mallocx_tcache_get(flags);
+	unsigned  tcache_ind = mallocx_tcache_get(flags);
 	tcache_t *tcache = tcache_get_from_ind(tsd, tcache_ind, !fast,
 	    /* is_alloc */ false);
 
@@ -3933,11 +4002,11 @@ sdallocx_default(void *ptr, size_t size, int flags) {
 	assert(malloc_initialized() || IS_INITIALIZER);
 
 	tsd_t *tsd = tsd_fetch_min();
-	bool fast = tsd_fast(tsd);
+	bool   fast = tsd_fast(tsd);
 	size_t usize = inallocx(tsd_tsdn(tsd), size, flags);
 	check_entry_exit_locking(tsd_tsdn(tsd));
 
-	unsigned tcache_ind = mallocx_tcache_get(flags);
+	unsigned  tcache_ind = mallocx_tcache_get(flags);
 	tcache_t *tcache = tcache_get_from_ind(tsd, tcache_ind, !fast,
 	    /* is_alloc */ false);
 
@@ -3955,8 +4024,8 @@ sdallocx_default(void *ptr, size_t size, int flags) {
 
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
 je_sdallocx(void *ptr, size_t size, int flags) {
-	LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr,
-		size, flags);
+	LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr, size,
+	    flags);
 
 	je_sdallocx_impl(ptr, size, flags);
 
@@ -3964,9 +4033,8 @@ je_sdallocx(void *ptr, size_t size, int flags) {
 }
 
 JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW
-JEMALLOC_ATTR(pure)
-je_nallocx(size_t size, int flags) {
-	size_t usize;
+JEMALLOC_ATTR(pure) je_nallocx(size_t size, int flags) {
+	size_t  usize;
 	tsdn_t *tsdn;
 
 	assert(size != 0);
@@ -3991,9 +4059,9 @@ je_nallocx(size_t size, int flags) {
 }
 
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
-je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen) {
-	int ret;
+je_mallctl(
+    const char *name, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int    ret;
 	tsd_t *tsd;
 
 	LOG("core.mallctl.entry", "name: %s", name);
@@ -4034,8 +4102,8 @@ je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) {
 
 JEMALLOC_EXPORT int JEMALLOC_NOTHROW
 je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
-  void *newp, size_t newlen) {
-	int ret;
+    void *newp, size_t newlen) {
+	int    ret;
 	tsd_t *tsd;
 
 	LOG("core.mallctlbymib.entry", "");
@@ -4055,8 +4123,8 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp,
 
 #define STATS_PRINT_BUFSIZE 65536
 JEMALLOC_EXPORT void JEMALLOC_NOTHROW
-je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque,
-    const char *opts) {
+je_malloc_stats_print(
+    void (*write_cb)(void *, const char *), void *cbopaque, const char *opts) {
 	tsdn_t *tsdn;
 
 	LOG("core.malloc_stats_print.entry", "");
@@ -4127,12 +4195,12 @@ je_malloc_size(const void *ptr) {
 static void
 batch_alloc_prof_sample_assert(tsd_t *tsd, size_t batch, size_t usize) {
 	assert(config_prof && opt_prof);
-	bool prof_sample_event = te_prof_sample_event_lookahead(tsd,
-	    batch * usize);
+	bool prof_sample_event = te_prof_sample_event_lookahead(
+	    tsd, batch * usize);
 	assert(!prof_sample_event);
 	size_t surplus;
-	prof_sample_event = te_prof_sample_event_lookahead_surplus(tsd,
-	    (batch + 1) * usize, &surplus);
+	prof_sample_event = te_prof_sample_event_lookahead_surplus(
+	    tsd, (batch + 1) * usize, &surplus);
 	assert(prof_sample_event);
 	assert(surplus < usize);
 }
@@ -4157,14 +4225,14 @@ batch_alloc(void **ptrs, size_t num, size_t size, int flags) {
 		goto label_done;
 	}
 	szind_t ind = sz_size2index(usize);
-	bool zero = zero_get(MALLOCX_ZERO_GET(flags), /* slow */ true);
+	bool    zero = zero_get(MALLOCX_ZERO_GET(flags), /* slow */ true);
 
 	/*
 	 * The cache bin and arena will be lazily initialized; it's hard to
 	 * know in advance whether each of them needs to be initialized.
 	 */
 	cache_bin_t *bin = NULL;
-	arena_t *arena = NULL;
+	arena_t     *arena = NULL;
 
 	size_t nregs = 0;
 	if (likely(ind < SC_NBINS)) {
@@ -4175,10 +4243,10 @@ batch_alloc(void **ptrs, size_t num, size_t size, int flags) {
 	while (filled < num) {
 		size_t batch = num - filled;
 		size_t surplus = SIZE_MAX; /* Dead store. */
-		bool prof_sample_event = config_prof && opt_prof
+		bool   prof_sample_event = config_prof && opt_prof
 		    && prof_active_get_unlocked()
-		    && te_prof_sample_event_lookahead_surplus(tsd,
-		    batch * usize, &surplus);
+		    && te_prof_sample_event_lookahead_surplus(
+		        tsd, batch * usize, &surplus);
 
 		if (prof_sample_event) {
 			/*
@@ -4194,8 +4262,8 @@ batch_alloc(void **ptrs, size_t num, size_t size, int flags) {
 		if (likely(ind < SC_NBINS) && batch >= nregs) {
 			if (arena == NULL) {
 				unsigned arena_ind = mallocx_arena_get(flags);
-				if (arena_get_from_ind(tsd, arena_ind,
-				    &arena)) {
+				if (arena_get_from_ind(
+				        tsd, arena_ind, &arena)) {
 					goto label_done;
 				}
 				if (arena == NULL) {
@@ -4212,13 +4280,14 @@ batch_alloc(void **ptrs, size_t num, size_t size, int flags) {
 			filled += n;
 		}
 
-		unsigned tcache_ind = mallocx_tcache_get(flags);
+		unsigned  tcache_ind = mallocx_tcache_get(flags);
 		tcache_t *tcache = tcache_get_from_ind(tsd, tcache_ind,
 		    /* slow */ true, /* is_alloc */ true);
-		if (likely(tcache != NULL &&
-		    ind < tcache_nbins_get(tcache->tcache_slow) &&
-		    !tcache_bin_disabled(ind, &tcache->bins[ind],
-		    tcache->tcache_slow)) && progress < batch) {
+		if (likely(tcache != NULL
+		        && ind < tcache_nbins_get(tcache->tcache_slow)
+		        && !tcache_bin_disabled(
+		            ind, &tcache->bins[ind], tcache->tcache_slow))
+		    && progress < batch) {
 			if (bin == NULL) {
 				bin = &tcache->bins[ind];
 			}
@@ -4249,22 +4318,22 @@ batch_alloc(void **ptrs, size_t num, size_t size, int flags) {
 				 * additional benefit is that the tcache will
 				 * not be empty for the next allocation request.
 				 */
-				size_t n = cache_bin_alloc_batch(bin, bin_batch,
-				    ptrs + filled);
+				size_t n = cache_bin_alloc_batch(
+				    bin, bin_batch, ptrs + filled);
 				if (config_stats) {
 					bin->tstats.nrequests += n;
 				}
 				if (zero) {
 					for (size_t i = 0; i < n; ++i) {
-						memset(ptrs[filled + i], 0,
-						    usize);
+						memset(
+						    ptrs[filled + i], 0, usize);
 					}
 				}
 				if (config_prof && opt_prof
 				    && unlikely(ind >= SC_NBINS)) {
 					for (size_t i = 0; i < n; ++i) {
-						prof_tctx_reset_sampled(tsd,
-						    ptrs[filled + i]);
+						prof_tctx_reset_sampled(
+						    tsd, ptrs[filled + i]);
 					}
 				}
 				progress += n;
@@ -4340,7 +4409,7 @@ JEMALLOC_EXPORT void
 _malloc_prefork(void)
 #endif
 {
-	tsd_t *tsd;
+	tsd_t   *tsd;
 	unsigned i, j, narenas;
 	arena_t *arena;
 
@@ -4370,8 +4439,8 @@ _malloc_prefork(void)
 	/* Break arena prefork into stages to preserve lock order. */
 	for (i = 0; i < 9; i++) {
 		for (j = 0; j < narenas; j++) {
-			if ((arena = arena_get(tsd_tsdn(tsd), j, false)) !=
-			    NULL) {
+			if ((arena = arena_get(tsd_tsdn(tsd), j, false))
+			    != NULL) {
 				switch (i) {
 				case 0:
 					arena_prefork0(tsd_tsdn(tsd), arena);
@@ -4400,11 +4469,11 @@ _malloc_prefork(void)
 				case 8:
 					arena_prefork8(tsd_tsdn(tsd), arena);
 					break;
-				default: not_reached();
+				default:
+					not_reached();
 				}
 			}
 		}
-
 	}
 	prof_prefork1(tsd_tsdn(tsd));
 	stats_prefork(tsd_tsdn(tsd));
@@ -4419,7 +4488,7 @@ JEMALLOC_EXPORT void
 _malloc_postfork(void)
 #endif
 {
-	tsd_t *tsd;
+	tsd_t   *tsd;
 	unsigned i, narenas;
 
 #ifdef JEMALLOC_MUTEX_INIT_CB
@@ -4454,7 +4523,7 @@ _malloc_postfork(void)
 
 void
 jemalloc_postfork_child(void) {
-	tsd_t *tsd;
+	tsd_t   *tsd;
 	unsigned i, narenas;
 
 	assert(malloc_initialized());
diff --git a/src/jemalloc_cpp.cpp b/src/jemalloc_cpp.cpp
index fffd6aee..4e838d3b 100644
--- a/src/jemalloc_cpp.cpp
+++ b/src/jemalloc_cpp.cpp
@@ -24,45 +24,52 @@ extern "C" {
 //
 // ... but it needs to work with jemalloc namespaces.
 
-void	*operator new(std::size_t size);
-void	*operator new[](std::size_t size);
-void	*operator new(std::size_t size, const std::nothrow_t &) noexcept;
-void	*operator new[](std::size_t size, const std::nothrow_t &) noexcept;
-void	operator delete(void *ptr) noexcept;
-void	operator delete[](void *ptr) noexcept;
-void	operator delete(void *ptr, const std::nothrow_t &) noexcept;
-void	operator delete[](void *ptr, const std::nothrow_t &) noexcept;
+void *operator new(std::size_t size);
+void *operator new[](std::size_t size);
+void *operator new(std::size_t size, const std::nothrow_t &) noexcept;
+void *operator new[](std::size_t size, const std::nothrow_t &) noexcept;
+void  operator delete(void *ptr) noexcept;
+void  operator delete[](void *ptr) noexcept;
+void  operator delete(void *ptr, const std::nothrow_t &) noexcept;
+void  operator delete[](void *ptr, const std::nothrow_t &) noexcept;
 
 #if __cpp_sized_deallocation >= 201309
 /* C++14's sized-delete operators. */
-void	operator delete(void *ptr, std::size_t size) noexcept;
-void	operator delete[](void *ptr, std::size_t size) noexcept;
+void operator delete(void *ptr, std::size_t size) noexcept;
+void operator delete[](void *ptr, std::size_t size) noexcept;
 #endif
 
 #if __cpp_aligned_new >= 201606
 /* C++17's over-aligned operators. */
-void	*operator new(std::size_t size, std::align_val_t);
-void	*operator new(std::size_t size, std::align_val_t, const std::nothrow_t &) noexcept;
-void	*operator new[](std::size_t size, std::align_val_t);
-void	*operator new[](std::size_t size, std::align_val_t, const std::nothrow_t &) noexcept;
-void	operator delete(void* ptr, std::align_val_t) noexcept;
-void	operator delete(void* ptr, std::align_val_t, const std::nothrow_t &) noexcept;
-void	operator delete(void* ptr, std::size_t size, std::align_val_t al) noexcept;
-void	operator delete[](void* ptr, std::align_val_t) noexcept;
-void	operator delete[](void* ptr, std::align_val_t, const std::nothrow_t &) noexcept;
-void	operator delete[](void* ptr, std::size_t size, std::align_val_t al) noexcept;
+void *operator new(std::size_t size, std::align_val_t);
+void *operator new(
+    std::size_t size, std::align_val_t, const std::nothrow_t &) noexcept;
+void *operator new[](std::size_t size, std::align_val_t);
+void *operator new[](
+    std::size_t size, std::align_val_t, const std::nothrow_t &) noexcept;
+void operator delete(void *ptr, std::align_val_t) noexcept;
+void operator delete(
+    void *ptr, std::align_val_t, const std::nothrow_t &) noexcept;
+void operator delete(void *ptr, std::size_t size, std::align_val_t al) noexcept;
+void operator delete[](void *ptr, std::align_val_t) noexcept;
+void operator delete[](
+    void *ptr, std::align_val_t, const std::nothrow_t &) noexcept;
+void operator delete[](
+    void *ptr, std::size_t size, std::align_val_t al) noexcept;
 #endif
 
 JEMALLOC_NOINLINE
 static void *
 handleOOM(std::size_t size, bool nothrow) {
 	if (opt_experimental_infallible_new) {
-		const char *huge_warning = (size >= ((std::size_t)1 << 30)) ?
-		    "This may be caused by heap corruption, if the large size "
-		    "is unexpected (suggest building with sanitizers for "
-		    "debugging)." : "";
+		const char *huge_warning = (size >= ((std::size_t)1 << 30))
+		    ? "This may be caused by heap corruption, if the large size "
+		      "is unexpected (suggest building with sanitizers for "
+		      "debugging)."
+		    : "";
 
-		safety_check_fail("<jemalloc>: Allocation of size %zu failed. "
+		safety_check_fail(
+		    "<jemalloc>: Allocation of size %zu failed. "
 		    "%s opt.experimental_infallible_new is true. Aborting.\n",
 		    size, huge_warning);
 		return nullptr;
@@ -74,7 +81,7 @@ handleOOM(std::size_t size, bool nothrow) {
 		std::new_handler handler;
 		// GCC-4.8 and clang 4.0 do not have std::get_new_handler.
 		{
-			static std::mutex mtx;
+			static std::mutex           mtx;
 			std::lock_guard<std::mutex> lock(mtx);
 
 			handler = std::set_new_handler(nullptr);
@@ -98,8 +105,7 @@ handleOOM(std::size_t size, bool nothrow) {
 }
 
 template <bool IsNoExcept>
-JEMALLOC_NOINLINE
-static void *
+JEMALLOC_NOINLINE static void *
 fallbackNewImpl(std::size_t size) noexcept(IsNoExcept) {
 	void *ptr = malloc_default(size);
 	if (likely(ptr != nullptr)) {
@@ -109,12 +115,11 @@ fallbackNewImpl(std::size_t size) noexcept(IsNoExcept) {
 }
 
 template <bool IsNoExcept>
-JEMALLOC_ALWAYS_INLINE
-void *
+JEMALLOC_ALWAYS_INLINE void *
 newImpl(std::size_t size) noexcept(IsNoExcept) {
 	LOG("core.operator_new.entry", "size: %zu", size);
 
-	void * ret = imalloc_fastpath(size, &fallbackNewImpl<IsNoExcept>);
+	void *ret = imalloc_fastpath(size, &fallbackNewImpl<IsNoExcept>);
 
 	LOG("core.operator_new.exit", "result: %p", ret);
 	return ret;
@@ -143,9 +148,9 @@ operator new[](std::size_t size, const std::nothrow_t &) noexcept {
 #if __cpp_aligned_new >= 201606
 
 template <bool IsNoExcept>
-JEMALLOC_ALWAYS_INLINE
-void *
-alignedNewImpl(std::size_t size, std::align_val_t alignment) noexcept(IsNoExcept) {
+JEMALLOC_ALWAYS_INLINE void *
+alignedNewImpl(std::size_t size, std::align_val_t alignment) noexcept(
+    IsNoExcept) {
 	void *ptr = je_aligned_alloc(static_cast<std::size_t>(alignment), size);
 	if (likely(ptr != nullptr)) {
 		return ptr;
@@ -165,16 +170,18 @@ operator new[](std::size_t size, std::align_val_t alignment) {
 }
 
 void *
-operator new(std::size_t size, std::align_val_t alignment, const std::nothrow_t &) noexcept {
+operator new(std::size_t size, std::align_val_t alignment,
+    const std::nothrow_t &) noexcept {
 	return alignedNewImpl<true>(size, alignment);
 }
 
 void *
-operator new[](std::size_t size, std::align_val_t alignment, const std::nothrow_t &) noexcept {
+operator new[](std::size_t size, std::align_val_t alignment,
+    const std::nothrow_t &) noexcept {
 	return alignedNewImpl<true>(size, alignment);
 }
 
-#endif  // __cpp_aligned_new
+#endif // __cpp_aligned_new
 
 void
 operator delete(void *ptr) noexcept {
@@ -203,7 +210,8 @@ operator delete(void *ptr, const std::nothrow_t &) noexcept {
 	LOG("core.operator_delete.exit", "");
 }
 
-void operator delete[](void *ptr, const std::nothrow_t &) noexcept {
+void
+operator delete[](void *ptr, const std::nothrow_t &) noexcept {
 	LOG("core.operator_delete.entry", "ptr: %p", ptr);
 
 	je_free_impl(ptr);
@@ -215,7 +223,7 @@ void operator delete[](void *ptr, const std::nothrow_t &) noexcept {
 
 JEMALLOC_ALWAYS_INLINE
 void
-sizedDeleteImpl(void* ptr, std::size_t size) noexcept {
+sizedDeleteImpl(void *ptr, std::size_t size) noexcept {
 	if (unlikely(ptr == nullptr)) {
 		return;
 	}
@@ -236,14 +244,14 @@ operator delete[](void *ptr, std::size_t size) noexcept {
 	sizedDeleteImpl(ptr, size);
 }
 
-#endif  // __cpp_sized_deallocation
+#endif // __cpp_sized_deallocation
 
 #if __cpp_aligned_new >= 201606
 
 JEMALLOC_ALWAYS_INLINE
 void
-alignedSizedDeleteImpl(void* ptr, std::size_t size, std::align_val_t alignment)
-    noexcept {
+alignedSizedDeleteImpl(
+    void *ptr, std::size_t size, std::align_val_t alignment) noexcept {
 	if (config_debug) {
 		assert(((size_t)alignment & ((size_t)alignment - 1)) == 0);
 	}
@@ -259,7 +267,7 @@ alignedSizedDeleteImpl(void* ptr, std::size_t size, std::align_val_t alignment)
 }
 
 void
-operator delete(void* ptr, std::align_val_t) noexcept {
+operator delete(void *ptr, std::align_val_t) noexcept {
 	LOG("core.operator_delete.entry", "ptr: %p", ptr);
 
 	je_free_impl(ptr);
@@ -268,7 +276,7 @@ operator delete(void* ptr, std::align_val_t) noexcept {
 }
 
 void
-operator delete[](void* ptr, std::align_val_t) noexcept {
+operator delete[](void *ptr, std::align_val_t) noexcept {
 	LOG("core.operator_delete.entry", "ptr: %p", ptr);
 
 	je_free_impl(ptr);
@@ -277,7 +285,7 @@ operator delete[](void* ptr, std::align_val_t) noexcept {
 }
 
 void
-operator delete(void* ptr, std::align_val_t, const std::nothrow_t&) noexcept {
+operator delete(void *ptr, std::align_val_t, const std::nothrow_t &) noexcept {
 	LOG("core.operator_delete.entry", "ptr: %p", ptr);
 
 	je_free_impl(ptr);
@@ -286,7 +294,8 @@ operator delete(void* ptr, std::align_val_t, const std::nothrow_t&) noexcept {
 }
 
 void
-operator delete[](void* ptr, std::align_val_t, const std::nothrow_t&) noexcept {
+operator delete[](
+    void *ptr, std::align_val_t, const std::nothrow_t &) noexcept {
 	LOG("core.operator_delete.entry", "ptr: %p", ptr);
 
 	je_free_impl(ptr);
@@ -295,14 +304,16 @@ operator delete[](void* ptr, std::align_val_t, const std::nothrow_t&) noexcept {
 }
 
 void
-operator delete(void* ptr, std::size_t size, std::align_val_t alignment) noexcept {
+operator delete(
+    void *ptr, std::size_t size, std::align_val_t alignment) noexcept {
 	alignedSizedDeleteImpl(ptr, size, alignment);
 }
 
 void
-operator delete[](void* ptr, std::size_t size, std::align_val_t alignment) noexcept {
+operator delete[](
+    void *ptr, std::size_t size, std::align_val_t alignment) noexcept {
 	alignedSizedDeleteImpl(ptr, size, alignment);
 }
 
-#endif  // __cpp_aligned_new
+#endif // __cpp_aligned_new
 // NOLINTEND(misc-use-anonymous-namespace)
diff --git a/src/large.c b/src/large.c
index d78085f0..7cae61ae 100644
--- a/src/large.c
+++ b/src/large.c
@@ -18,10 +18,10 @@ large_malloc(tsdn_t *tsdn, arena_t *arena, size_t usize, bool zero) {
 }
 
 void *
-large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
-    bool zero) {
-	size_t ausize;
-	edata_t *edata;
+large_palloc(
+    tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool zero) {
+	size_t            ausize;
+	edata_t          *edata;
 	UNUSED bool idump JEMALLOC_CC_SILENCE_INIT(false);
 
 	assert(!tsdn_null(tsdn) || arena != NULL);
@@ -34,8 +34,10 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	if (likely(!tsdn_null(tsdn))) {
 		arena = arena_choose_maybe_huge(tsdn_tsd(tsdn), arena, usize);
 	}
-	if (unlikely(arena == NULL) || (edata = arena_extent_alloc_large(tsdn,
-	    arena, usize, alignment, zero)) == NULL) {
+	if (unlikely(arena == NULL)
+	    || (edata = arena_extent_alloc_large(
+	            tsdn, arena, usize, alignment, zero))
+	        == NULL) {
 		return NULL;
 	}
 
@@ -53,10 +55,10 @@ large_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 
 static bool
 large_ralloc_no_move_shrink(tsdn_t *tsdn, edata_t *edata, size_t usize) {
-	arena_t *arena = arena_get_from_edata(edata);
+	arena_t  *arena = arena_get_from_edata(edata);
 	ehooks_t *ehooks = arena_get_ehooks(arena);
-	size_t old_size = edata_size_get(edata);
-	size_t old_usize = edata_usize_get(edata);
+	size_t    old_size = edata_size_get(edata);
+	size_t    old_usize = edata_usize_get(edata);
 
 	assert(old_usize > usize);
 
@@ -80,8 +82,8 @@ large_ralloc_no_move_shrink(tsdn_t *tsdn, edata_t *edata, size_t usize) {
 }
 
 static bool
-large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
-    bool zero) {
+large_ralloc_no_move_expand(
+    tsdn_t *tsdn, edata_t *edata, size_t usize, bool zero) {
 	arena_t *arena = arena_get_from_edata(edata);
 
 	size_t old_size = edata_size_get(edata);
@@ -112,10 +114,10 @@ large_ralloc_no_move_expand(tsdn_t *tsdn, edata_t *edata, size_t usize,
 			 * offset from the beginning of the extent is a multiple
 			 * of CACHELINE in [0 .. PAGE).
 			 */
-			void *zbase = (void *)
-			    ((byte_t *)edata_addr_get(edata) + old_usize);
-			void *zpast = PAGE_ADDR2BASE((void *)((byte_t *)zbase +
-			    PAGE));
+			void *zbase = (void *)((byte_t *)edata_addr_get(edata)
+			    + old_usize);
+			void *zpast = PAGE_ADDR2BASE(
+			    (void *)((byte_t *)zbase + PAGE));
 			size_t nzero = (byte_t *)zpast - (byte_t *)zbase;
 			assert(nzero > 0);
 			memset(zbase, 0, nzero);
@@ -134,19 +136,19 @@ large_ralloc_no_move(tsdn_t *tsdn, edata_t *edata, size_t usize_min,
 	/* The following should have been caught by callers. */
 	assert(usize_min > 0 && usize_max <= SC_LARGE_MAXCLASS);
 	/* Both allocation sizes must be large to avoid a move. */
-	assert(oldusize >= SC_LARGE_MINCLASS
-	    && usize_max >= SC_LARGE_MINCLASS);
+	assert(oldusize >= SC_LARGE_MINCLASS && usize_max >= SC_LARGE_MINCLASS);
 
 	if (usize_max > oldusize) {
 		/* Attempt to expand the allocation in-place. */
-		if (!large_ralloc_no_move_expand(tsdn, edata, usize_max,
-		    zero)) {
+		if (!large_ralloc_no_move_expand(
+		        tsdn, edata, usize_max, zero)) {
 			arena_decay_tick(tsdn, arena_get_from_edata(edata));
 			return false;
 		}
 		/* Try again, this time with usize_min. */
-		if (usize_min < usize_max && usize_min > oldusize &&
-		    large_ralloc_no_move_expand(tsdn, edata, usize_min, zero)) {
+		if (usize_min < usize_max && usize_min > oldusize
+		    && large_ralloc_no_move_expand(
+		        tsdn, edata, usize_min, zero)) {
 			arena_decay_tick(tsdn, arena_get_from_edata(edata));
 			return false;
 		}
@@ -172,8 +174,8 @@ large_ralloc_no_move(tsdn_t *tsdn, edata_t *edata, size_t usize_min,
 }
 
 static void *
-large_ralloc_move_helper(tsdn_t *tsdn, arena_t *arena, size_t usize,
-    size_t alignment, bool zero) {
+large_ralloc_move_helper(
+    tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool zero) {
 	if (alignment <= CACHELINE) {
 		return large_malloc(tsdn, arena, usize, zero);
 	}
@@ -190,14 +192,13 @@ large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
 	/* The following should have been caught by callers. */
 	assert(usize > 0 && usize <= SC_LARGE_MAXCLASS);
 	/* Both allocation sizes must be large to avoid a move. */
-	assert(oldusize >= SC_LARGE_MINCLASS
-	    && usize >= SC_LARGE_MINCLASS);
+	assert(oldusize >= SC_LARGE_MINCLASS && usize >= SC_LARGE_MINCLASS);
 
 	/* Try to avoid moving the allocation. */
 	if (!large_ralloc_no_move(tsdn, edata, usize, usize, zero)) {
-		hook_invoke_expand(hook_args->is_realloc
-		    ? hook_expand_realloc : hook_expand_rallocx, ptr, oldusize,
-		    usize, (uintptr_t)ptr, hook_args->args);
+		hook_invoke_expand(hook_args->is_realloc ? hook_expand_realloc
+		                                         : hook_expand_rallocx,
+		    ptr, oldusize, usize, (uintptr_t)ptr, hook_args->args);
 		return edata_addr_get(edata);
 	}
 
@@ -206,17 +207,18 @@ large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
 	 * different size class.  In that case, fall back to allocating new
 	 * space and copying.
 	 */
-	void *ret = large_ralloc_move_helper(tsdn, arena, usize, alignment,
-	    zero);
+	void *ret = large_ralloc_move_helper(
+	    tsdn, arena, usize, alignment, zero);
 	if (ret == NULL) {
 		return NULL;
 	}
 
-	hook_invoke_alloc(hook_args->is_realloc
-	    ? hook_alloc_realloc : hook_alloc_rallocx, ret, (uintptr_t)ret,
-	    hook_args->args);
-	hook_invoke_dalloc(hook_args->is_realloc
-	    ? hook_dalloc_realloc : hook_dalloc_rallocx, ptr, hook_args->args);
+	hook_invoke_alloc(
+	    hook_args->is_realloc ? hook_alloc_realloc : hook_alloc_rallocx,
+	    ret, (uintptr_t)ret, hook_args->args);
+	hook_invoke_dalloc(
+	    hook_args->is_realloc ? hook_dalloc_realloc : hook_dalloc_rallocx,
+	    ptr, hook_args->args);
 
 	size_t copysize = (usize < oldusize) ? usize : oldusize;
 	memcpy(ret, edata_addr_get(edata), copysize);
@@ -228,8 +230,8 @@ large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
  * locked indicates whether the arena's large_mtx is currently held.
  */
 static void
-large_dalloc_prep_impl(tsdn_t *tsdn, arena_t *arena, edata_t *edata,
-    bool locked) {
+large_dalloc_prep_impl(
+    tsdn_t *tsdn, arena_t *arena, edata_t *edata, bool locked) {
 	if (!locked) {
 		/* See comments in arena_bin_slabs_full_insert(). */
 		if (!arena_is_auto(arena)) {
@@ -280,16 +282,16 @@ large_salloc(tsdn_t *tsdn, const edata_t *edata) {
 }
 
 void
-large_prof_info_get(tsd_t *tsd, edata_t *edata, prof_info_t *prof_info,
-    bool reset_recent) {
+large_prof_info_get(
+    tsd_t *tsd, edata_t *edata, prof_info_t *prof_info, bool reset_recent) {
 	assert(prof_info != NULL);
 
 	prof_tctx_t *alloc_tctx = edata_prof_tctx_get(edata);
 	prof_info->alloc_tctx = alloc_tctx;
 
 	if (prof_tctx_is_valid(alloc_tctx)) {
-		nstime_copy(&prof_info->alloc_time,
-		    edata_prof_alloc_time_get(edata));
+		nstime_copy(
+		    &prof_info->alloc_time, edata_prof_alloc_time_get(edata));
 		prof_info->alloc_size = edata_prof_alloc_size_get(edata);
 		if (reset_recent) {
 			/*
diff --git a/src/log.c b/src/log.c
index 778902fb..9b1c6261 100644
--- a/src/log.c
+++ b/src/log.c
@@ -3,7 +3,7 @@
 
 #include "jemalloc/internal/log.h"
 
-char log_var_names[JEMALLOC_LOG_VAR_BUFSIZE];
+char       log_var_names[JEMALLOC_LOG_VAR_BUFSIZE];
 atomic_b_t log_init_done = ATOMIC_INIT(false);
 
 /*
@@ -11,7 +11,7 @@ atomic_b_t log_init_done = ATOMIC_INIT(false);
  * with a pointer to the first character after the end of the string.
  */
 static const char *
-log_var_extract_segment(const char* segment_begin) {
+log_var_extract_segment(const char *segment_begin) {
 	const char *end;
 	for (end = segment_begin; *end != '\0' && *end != '|'; end++) {
 	}
@@ -30,12 +30,12 @@ log_var_matches_segment(const char *segment_begin, const char *segment_end,
 	if (segment_len == 1 && *segment_begin == '.') {
 		return true;
 	}
-        if (segment_len == log_var_len) {
+	if (segment_len == log_var_len) {
 		return strncmp(segment_begin, log_var_begin, segment_len) == 0;
 	} else if (segment_len < log_var_len) {
 		return strncmp(segment_begin, log_var_begin, segment_len) == 0
 		    && log_var_begin[segment_len] == '.';
-        } else {
+	} else {
 		return false;
 	}
 }
@@ -61,9 +61,9 @@ log_var_update_state(log_var_t *log_var) {
 		    segment_begin);
 		assert(segment_end < log_var_names + JEMALLOC_LOG_VAR_BUFSIZE);
 		if (log_var_matches_segment(segment_begin, segment_end,
-		    log_var_begin, log_var_end)) {
-			atomic_store_u(&log_var->state, LOG_ENABLED,
-			    ATOMIC_RELAXED);
+		        log_var_begin, log_var_end)) {
+			atomic_store_u(
+			    &log_var->state, LOG_ENABLED, ATOMIC_RELAXED);
 			return LOG_ENABLED;
 		}
 		if (*segment_end == '\0') {
diff --git a/src/malloc_io.c b/src/malloc_io.c
index 192d8208..0c5d6c03 100644
--- a/src/malloc_io.c
+++ b/src/malloc_io.c
@@ -5,63 +5,68 @@
 #include "jemalloc/internal/util.h"
 
 #ifdef assert
-#  undef assert
+#	undef assert
 #endif
 #ifdef not_reached
-#  undef not_reached
+#	undef not_reached
 #endif
 #ifdef not_implemented
-#  undef not_implemented
+#	undef not_implemented
 #endif
 #ifdef assert_not_implemented
-#  undef assert_not_implemented
+#	undef assert_not_implemented
 #endif
 
 /*
  * Define simple versions of assertion macros that won't recurse in case
  * of assertion failures in malloc_*printf().
  */
-#define assert(e) do {							\
-	if (config_debug && !(e)) {					\
-		malloc_write("<jemalloc>: Failed assertion\n");		\
-		abort();						\
-	}								\
-} while (0)
+#define assert(e)                                                              \
+	do {                                                                   \
+		if (config_debug && !(e)) {                                    \
+			malloc_write("<jemalloc>: Failed assertion\n");        \
+			abort();                                               \
+		}                                                              \
+	} while (0)
 
-#define not_reached() do {						\
-	if (config_debug) {						\
-		malloc_write("<jemalloc>: Unreachable code reached\n");	\
-		abort();						\
-	}								\
-	unreachable();							\
-} while (0)
+#define not_reached()                                                          \
+	do {                                                                   \
+		if (config_debug) {                                            \
+			malloc_write(                                          \
+			    "<jemalloc>: Unreachable code reached\n");         \
+			abort();                                               \
+		}                                                              \
+		unreachable();                                                 \
+	} while (0)
 
-#define not_implemented() do {						\
-	if (config_debug) {						\
-		malloc_write("<jemalloc>: Not implemented\n");		\
-		abort();						\
-	}								\
-} while (0)
+#define not_implemented()                                                      \
+	do {                                                                   \
+		if (config_debug) {                                            \
+			malloc_write("<jemalloc>: Not implemented\n");         \
+			abort();                                               \
+		}                                                              \
+	} while (0)
 
-#define assert_not_implemented(e) do {					\
-	if (unlikely(config_debug && !(e))) {				\
-		not_implemented();					\
-	}								\
-} while (0)
+#define assert_not_implemented(e)                                              \
+	do {                                                                   \
+		if (unlikely(config_debug && !(e))) {                          \
+			not_implemented();                                     \
+		}                                                              \
+	} while (0)
 
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
 #define U2S_BUFSIZE ((1U << (LG_SIZEOF_INTMAX_T + 3)) + 1)
-static char *u2s(uintmax_t x, unsigned base, bool uppercase, char *s,
-    size_t *slen_p);
+static char *u2s(
+    uintmax_t x, unsigned base, bool uppercase, char *s, size_t *slen_p);
 #define D2S_BUFSIZE (1 + U2S_BUFSIZE)
 static char *d2s(intmax_t x, char sign, char *s, size_t *slen_p);
 #define O2S_BUFSIZE (1 + U2S_BUFSIZE)
 static char *o2s(uintmax_t x, bool alt_form, char *s, size_t *slen_p);
 #define X2S_BUFSIZE (2 + U2S_BUFSIZE)
-static char *x2s(uintmax_t x, bool alt_form, bool uppercase, char *s,
-    size_t *slen_p);
+static char *x2s(
+    uintmax_t x, bool alt_form, bool uppercase, char *s, size_t *slen_p);
 
 /******************************************************************************/
 
@@ -71,7 +76,7 @@ wrtmessage(void *cbopaque, const char *s) {
 	malloc_write_fd(STDERR_FILENO, s, strlen(s));
 }
 
-JEMALLOC_EXPORT void	(*je_malloc_message)(void *, const char *s);
+JEMALLOC_EXPORT void (*je_malloc_message)(void *, const char *s);
 
 /*
  * Wrapper around malloc_message() that avoids the need for
@@ -93,14 +98,15 @@ malloc_write(const char *s) {
 int
 buferror(int err, char *buf, size_t buflen) {
 #ifdef _WIN32
-	FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, err, 0,
-	    (LPSTR)buf, (DWORD)buflen, NULL);
+	FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, err, 0, (LPSTR)buf,
+	    (DWORD)buflen, NULL);
 	return 0;
-#elif defined(JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE) && defined(_GNU_SOURCE)
+#elif defined(JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE)                \
+    && defined(_GNU_SOURCE)
 	char *b = strerror_r(err, buf, buflen);
 	if (b != buf) {
 		strncpy(buf, b, buflen);
-		buf[buflen-1] = '\0';
+		buf[buflen - 1] = '\0';
 	}
 	return 0;
 #else
@@ -110,9 +116,9 @@ buferror(int err, char *buf, size_t buflen) {
 
 uintmax_t
 malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base) {
-	uintmax_t ret, digit;
-	unsigned b;
-	bool neg;
+	uintmax_t   ret, digit;
+	unsigned    b;
+	bool        neg;
 	const char *p, *ns;
 
 	p = nptr;
@@ -128,7 +134,12 @@ malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base) {
 	neg = false;
 	while (true) {
 		switch (*p) {
-		case '\t': case '\n': case '\v': case '\f': case '\r': case ' ':
+		case '\t':
+		case '\n':
+		case '\v':
+		case '\f':
+		case '\r':
+		case ' ':
 			p++;
 			break;
 		case '-':
@@ -142,8 +153,8 @@ malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base) {
 		}
 	}
 
-	/* Get prefix, if any. */
-	label_prefix:
+/* Get prefix, if any. */
+label_prefix:
 	/*
 	 * Note where the first non-whitespace/sign character is so that it is
 	 * possible to tell whether any digits are consumed (e.g., "  0" vs.
@@ -152,8 +163,14 @@ malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base) {
 	ns = p;
 	if (*p == '0') {
 		switch (p[1]) {
-		case '0': case '1': case '2': case '3': case '4': case '5':
-		case '6': case '7':
+		case '0':
+		case '1':
+		case '2':
+		case '3':
+		case '4':
+		case '5':
+		case '6':
+		case '7':
 			if (b == 0) {
 				b = 8;
 			}
@@ -161,13 +178,30 @@ malloc_strtoumax(const char *restrict nptr, char **restrict endptr, int base) {
 				p++;
 			}
 			break;
-		case 'X': case 'x':
+		case 'X':
+		case 'x':
 			switch (p[2]) {
-			case '0': case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
-			case 'A': case 'B': case 'C': case 'D': case 'E':
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
+			case 'A':
+			case 'B':
+			case 'C':
+			case 'D':
+			case 'E':
 			case 'F':
-			case 'a': case 'b': case 'c': case 'd': case 'e':
+			case 'a':
+			case 'b':
+			case 'c':
+			case 'd':
+			case 'e':
 			case 'f':
 				if (b == 0) {
 					b = 16;
@@ -244,9 +278,8 @@ u2s(uintmax_t x, unsigned base, bool uppercase, char *s, size_t *slen_p) {
 		} while (x > 0);
 		break;
 	case 16: {
-		const char *digits = (uppercase)
-		    ? "0123456789ABCDEF"
-		    : "0123456789abcdef";
+		const char *digits = (uppercase) ? "0123456789ABCDEF"
+		                                 : "0123456789abcdef";
 
 		do {
 			i--;
@@ -254,7 +287,8 @@ u2s(uintmax_t x, unsigned base, bool uppercase, char *s, size_t *slen_p) {
 			x >>= 4;
 		} while (x > 0);
 		break;
-	} default: {
+	}
+	default: {
 		const char *digits = (uppercase)
 		    ? "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
 		    : "0123456789abcdefghijklmnopqrstuvwxyz";
@@ -265,7 +299,8 @@ u2s(uintmax_t x, unsigned base, bool uppercase, char *s, size_t *slen_p) {
 			s[i] = digits[x % (uint64_t)base];
 			x /= (uint64_t)base;
 		} while (x > 0);
-	}}
+	}
+	}
 
 	*slen_p = U2S_BUFSIZE - 1 - i;
 	return &s[i];
@@ -294,7 +329,8 @@ d2s(intmax_t x, char sign, char *s, size_t *slen_p) {
 		(*slen_p)++;
 		*s = sign;
 		break;
-	default: not_reached();
+	default:
+		not_reached();
 	}
 	return s;
 }
@@ -325,106 +361,112 @@ x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, size_t *slen_p) {
 JEMALLOC_COLD
 size_t
 malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
-	size_t i;
+	size_t      i;
 	const char *f;
 
-#define APPEND_C(c) do {						\
-	if (i < size) {							\
-		str[i] = (c);						\
-	}								\
-	i++;								\
-} while (0)
-#define APPEND_S(s, slen) do {						\
-	if (i < size) {							\
-		size_t cpylen = (slen <= size - i) ? slen : size - i;	\
-		memcpy(&str[i], s, cpylen);				\
-	}								\
-	i += slen;							\
-} while (0)
-#define APPEND_PADDED_S(s, slen, width, left_justify) do {		\
-	/* Left padding. */						\
-	size_t pad_len = (width == -1) ? 0 : ((slen < (size_t)width) ?	\
-	    (size_t)width - slen : 0);					\
-	if (!left_justify && pad_len != 0) {				\
-		size_t j;						\
-		for (j = 0; j < pad_len; j++) {				\
-			if (pad_zero) {					\
-				APPEND_C('0');				\
-			} else {					\
-				APPEND_C(' ');				\
-			}						\
-		}							\
-	}								\
-	/* Value. */							\
-	APPEND_S(s, slen);						\
-	/* Right padding. */						\
-	if (left_justify && pad_len != 0) {				\
-		size_t j;						\
-		for (j = 0; j < pad_len; j++) {				\
-			APPEND_C(' ');					\
-		}							\
-	}								\
-} while (0)
-#define GET_ARG_NUMERIC(val, len) do {					\
-	switch ((unsigned char)len) {					\
-	case '?':							\
-		val = va_arg(ap, int);					\
-		break;							\
-	case '?' | 0x80:						\
-		val = va_arg(ap, unsigned int);				\
-		break;							\
-	case 'l':							\
-		val = va_arg(ap, long);					\
-		break;							\
-	case 'l' | 0x80:						\
-		val = va_arg(ap, unsigned long);			\
-		break;							\
-	case 'q':							\
-		val = va_arg(ap, long long);				\
-		break;							\
-	case 'q' | 0x80:						\
-		val = va_arg(ap, unsigned long long);			\
-		break;							\
-	case 'j':							\
-		val = va_arg(ap, intmax_t);				\
-		break;							\
-	case 'j' | 0x80:						\
-		val = va_arg(ap, uintmax_t);				\
-		break;							\
-	case 't':							\
-		val = va_arg(ap, ptrdiff_t);				\
-		break;							\
-	case 'z':							\
-		val = va_arg(ap, ssize_t);				\
-		break;							\
-	case 'z' | 0x80:						\
-		val = va_arg(ap, size_t);				\
-		break;							\
-	case 'p': /* Synthetic; used for %p. */				\
-		val = va_arg(ap, uintptr_t);				\
-		break;							\
-	default:							\
-		not_reached();						\
-		val = 0;						\
-	}								\
-} while (0)
+#define APPEND_C(c)                                                            \
+	do {                                                                   \
+		if (i < size) {                                                \
+			str[i] = (c);                                          \
+		}                                                              \
+		i++;                                                           \
+	} while (0)
+#define APPEND_S(s, slen)                                                      \
+	do {                                                                   \
+		if (i < size) {                                                \
+			size_t cpylen = (slen <= size - i) ? slen : size - i;  \
+			memcpy(&str[i], s, cpylen);                            \
+		}                                                              \
+		i += slen;                                                     \
+	} while (0)
+#define APPEND_PADDED_S(s, slen, width, left_justify)                          \
+	do {                                                                   \
+		/* Left padding. */                                            \
+		size_t pad_len = (width == -1)                                 \
+		    ? 0                                                        \
+		    : ((slen < (size_t)width) ? (size_t)width - slen : 0);     \
+		if (!left_justify && pad_len != 0) {                           \
+			size_t j;                                              \
+			for (j = 0; j < pad_len; j++) {                        \
+				if (pad_zero) {                                \
+					APPEND_C('0');                         \
+				} else {                                       \
+					APPEND_C(' ');                         \
+				}                                              \
+			}                                                      \
+		}                                                              \
+		/* Value. */                                                   \
+		APPEND_S(s, slen);                                             \
+		/* Right padding. */                                           \
+		if (left_justify && pad_len != 0) {                            \
+			size_t j;                                              \
+			for (j = 0; j < pad_len; j++) {                        \
+				APPEND_C(' ');                                 \
+			}                                                      \
+		}                                                              \
+	} while (0)
+#define GET_ARG_NUMERIC(val, len)                                              \
+	do {                                                                   \
+		switch ((unsigned char)len) {                                  \
+		case '?':                                                      \
+			val = va_arg(ap, int);                                 \
+			break;                                                 \
+		case '?' | 0x80:                                               \
+			val = va_arg(ap, unsigned int);                        \
+			break;                                                 \
+		case 'l':                                                      \
+			val = va_arg(ap, long);                                \
+			break;                                                 \
+		case 'l' | 0x80:                                               \
+			val = va_arg(ap, unsigned long);                       \
+			break;                                                 \
+		case 'q':                                                      \
+			val = va_arg(ap, long long);                           \
+			break;                                                 \
+		case 'q' | 0x80:                                               \
+			val = va_arg(ap, unsigned long long);                  \
+			break;                                                 \
+		case 'j':                                                      \
+			val = va_arg(ap, intmax_t);                            \
+			break;                                                 \
+		case 'j' | 0x80:                                               \
+			val = va_arg(ap, uintmax_t);                           \
+			break;                                                 \
+		case 't':                                                      \
+			val = va_arg(ap, ptrdiff_t);                           \
+			break;                                                 \
+		case 'z':                                                      \
+			val = va_arg(ap, ssize_t);                             \
+			break;                                                 \
+		case 'z' | 0x80:                                               \
+			val = va_arg(ap, size_t);                              \
+			break;                                                 \
+		case 'p': /* Synthetic; used for %p. */                        \
+			val = va_arg(ap, uintptr_t);                           \
+			break;                                                 \
+		default:                                                       \
+			not_reached();                                         \
+			val = 0;                                               \
+		}                                                              \
+	} while (0)
 
 	i = 0;
 	f = format;
 	while (true) {
 		switch (*f) {
-		case '\0': goto label_out;
+		case '\0':
+			goto label_out;
 		case '%': {
-			bool alt_form = false;
-			bool left_justify = false;
-			bool plus_space = false;
-			bool plus_plus = false;
-			int prec = -1;
-			int width = -1;
+			bool          alt_form = false;
+			bool          left_justify = false;
+			bool          plus_space = false;
+			bool          plus_plus = false;
+			int           prec = -1;
+			int           width = -1;
 			unsigned char len = '?';
-			char *s;
-			size_t slen;
-			bool pad_zero = false;
+			char         *s;
+			size_t        slen;
+			bool          pad_zero = false;
 
 			f++;
 			/* Flags. */
@@ -446,12 +488,13 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 					assert(!plus_plus);
 					plus_plus = true;
 					break;
-				default: goto label_width;
+				default:
+					goto label_width;
 				}
 				f++;
 			}
-			/* Width. */
-			label_width:
+		/* Width. */
+		label_width:
 			switch (*f) {
 			case '*':
 				width = va_arg(ap, int);
@@ -464,16 +507,24 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 			case '0':
 				pad_zero = true;
 				JEMALLOC_FALLTHROUGH;
-			case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9': {
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9': {
 				uintmax_t uwidth;
 				set_errno(0);
 				uwidth = malloc_strtoumax(f, (char **)&f, 10);
-				assert(uwidth != UINTMAX_MAX || get_errno() !=
-				    ERANGE);
+				assert(uwidth != UINTMAX_MAX
+				    || get_errno() != ERANGE);
 				width = (int)uwidth;
 				break;
-			} default:
+			}
+			default:
 				break;
 			}
 			/* Width/precision separator. */
@@ -488,20 +539,29 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 				prec = va_arg(ap, int);
 				f++;
 				break;
-			case '0': case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9': {
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9': {
 				uintmax_t uprec;
 				set_errno(0);
 				uprec = malloc_strtoumax(f, (char **)&f, 10);
-				assert(uprec != UINTMAX_MAX || get_errno() !=
-				    ERANGE);
+				assert(uprec != UINTMAX_MAX
+				    || get_errno() != ERANGE);
 				prec = (int)uprec;
 				break;
 			}
-			default: break;
+			default:
+				break;
 			}
-			/* Length. */
-			label_length:
+		/* Length. */
+		label_length:
 			switch (*f) {
 			case 'l':
 				f++;
@@ -512,11 +572,15 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 					len = 'l';
 				}
 				break;
-			case 'q': case 'j': case 't': case 'z':
+			case 'q':
+			case 'j':
+			case 't':
+			case 'z':
 				len = *f;
 				f++;
 				break;
-			default: break;
+			default:
+				break;
 			}
 			/* Conversion specifier. */
 			switch (*f) {
@@ -525,9 +589,10 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 				APPEND_C(*f);
 				f++;
 				break;
-			case 'd': case 'i': {
+			case 'd':
+			case 'i': {
 				intmax_t val JEMALLOC_CC_SILENCE_INIT(0);
-				char buf[D2S_BUFSIZE];
+				char         buf[D2S_BUFSIZE];
 
 				/*
 				 * Outputting negative, zero-padded numbers
@@ -542,41 +607,48 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 				assert(!pad_zero);
 
 				GET_ARG_NUMERIC(val, len);
-				s = d2s(val, (plus_plus ? '+' : (plus_space ?
-				    ' ' : '-')), buf, &slen);
+				s = d2s(val,
+				    (plus_plus ? '+'
+				               : (plus_space ? ' ' : '-')),
+				    buf, &slen);
 				APPEND_PADDED_S(s, slen, width, left_justify);
 				f++;
 				break;
-			} case 'o': {
+			}
+			case 'o': {
 				uintmax_t val JEMALLOC_CC_SILENCE_INIT(0);
-				char buf[O2S_BUFSIZE];
+				char          buf[O2S_BUFSIZE];
 
 				GET_ARG_NUMERIC(val, len | 0x80);
 				s = o2s(val, alt_form, buf, &slen);
 				APPEND_PADDED_S(s, slen, width, left_justify);
 				f++;
 				break;
-			} case 'u': {
+			}
+			case 'u': {
 				uintmax_t val JEMALLOC_CC_SILENCE_INIT(0);
-				char buf[U2S_BUFSIZE];
+				char          buf[U2S_BUFSIZE];
 
 				GET_ARG_NUMERIC(val, len | 0x80);
 				s = u2s(val, 10, false, buf, &slen);
 				APPEND_PADDED_S(s, slen, width, left_justify);
 				f++;
 				break;
-			} case 'x': case 'X': {
+			}
+			case 'x':
+			case 'X': {
 				uintmax_t val JEMALLOC_CC_SILENCE_INIT(0);
-				char buf[X2S_BUFSIZE];
+				char          buf[X2S_BUFSIZE];
 
 				GET_ARG_NUMERIC(val, len | 0x80);
 				s = x2s(val, alt_form, *f == 'X', buf, &slen);
 				APPEND_PADDED_S(s, slen, width, left_justify);
 				f++;
 				break;
-			} case 'c': {
+			}
+			case 'c': {
 				unsigned char val;
-				char buf[2];
+				char          buf[2];
 
 				assert(len == '?' || len == 'l');
 				assert_not_implemented(len != 'l');
@@ -586,7 +658,8 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 				APPEND_PADDED_S(buf, 1, width, left_justify);
 				f++;
 				break;
-			} case 's':
+			}
+			case 's':
 				assert(len == '?' || len == 'l');
 				assert_not_implemented(len != 'l');
 				s = va_arg(ap, char *);
@@ -596,23 +669,27 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 				break;
 			case 'p': {
 				uintmax_t val;
-				char buf[X2S_BUFSIZE];
+				char      buf[X2S_BUFSIZE];
 
 				GET_ARG_NUMERIC(val, 'p');
 				s = x2s(val, true, false, buf, &slen);
 				APPEND_PADDED_S(s, slen, width, left_justify);
 				f++;
 				break;
-			} default: not_reached();
+			}
+			default:
+				not_reached();
 			}
 			break;
-		} default: {
+		}
+		default: {
 			APPEND_C(*f);
 			f++;
 			break;
-		}}
+		}
+		}
 	}
-	label_out:
+label_out:
 	if (i < size) {
 		str[i] = '\0';
 	} else {
@@ -629,7 +706,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 JEMALLOC_FORMAT_PRINTF(3, 4)
 size_t
 malloc_snprintf(char *str, size_t size, const char *format, ...) {
-	size_t ret;
+	size_t  ret;
 	va_list ap;
 
 	va_start(ap, format);
@@ -640,8 +717,8 @@ malloc_snprintf(char *str, size_t size, const char *format, ...) {
 }
 
 void
-malloc_vcprintf(write_cb_t *write_cb, void *cbopaque, const char *format,
-    va_list ap) {
+malloc_vcprintf(
+    write_cb_t *write_cb, void *cbopaque, const char *format, va_list ap) {
 	char buf[MALLOC_PRINTF_BUFSIZE];
 
 	if (write_cb == NULL) {
@@ -650,8 +727,8 @@ malloc_vcprintf(write_cb_t *write_cb, void *cbopaque, const char *format,
 		 * function, so use the default one.  malloc_write() is an
 		 * inline function, so use malloc_message() directly here.
 		 */
-		write_cb = (je_malloc_message != NULL) ? je_malloc_message :
-		    wrtmessage;
+		write_cb = (je_malloc_message != NULL) ? je_malloc_message
+		                                       : wrtmessage;
 	}
 
 	malloc_vsnprintf(buf, sizeof(buf), format, ap);
diff --git a/src/mutex.c b/src/mutex.c
index 5655100d..aa2ab665 100644
--- a/src/mutex.c
+++ b/src/mutex.c
@@ -6,7 +6,7 @@
 #include "jemalloc/internal/spin.h"
 
 #if defined(_WIN32) && !defined(_CRT_SPINCOUNT)
-#define _CRT_SPINCOUNT 4000
+#	define _CRT_SPINCOUNT 4000
 #endif
 
 /*
@@ -22,8 +22,8 @@ int64_t opt_mutex_max_spin = 600;
 bool isthreaded = false;
 #endif
 #ifdef JEMALLOC_MUTEX_INIT_CB
-static bool		postpone_init = true;
-static malloc_mutex_t	*postponed_mutexes = NULL;
+static bool            postpone_init = true;
+static malloc_mutex_t *postponed_mutexes = NULL;
 #endif
 
 /******************************************************************************/
@@ -44,14 +44,14 @@ pthread_create(pthread_t *__restrict thread,
 /******************************************************************************/
 
 #ifdef JEMALLOC_MUTEX_INIT_CB
-JEMALLOC_EXPORT int	_pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
-    void *(calloc_cb)(size_t, size_t));
+JEMALLOC_EXPORT int _pthread_mutex_init_calloc_cb(
+    pthread_mutex_t *mutex, void *(calloc_cb)(size_t, size_t));
 #endif
 
 void
 malloc_mutex_lock_slow(malloc_mutex_t *mutex) {
 	mutex_prof_data_t *data = &mutex->prof_data;
-	nstime_t before;
+	nstime_t           before;
 
 	if (ncpus == 1) {
 		goto label_spin_done;
@@ -61,7 +61,7 @@ malloc_mutex_lock_slow(malloc_mutex_t *mutex) {
 	do {
 		spin_cpu_spinwait();
 		if (!atomic_load_b(&mutex->locked, ATOMIC_RELAXED)
-                    && !malloc_mutex_trylock_final(mutex)) {
+		    && !malloc_mutex_trylock_final(mutex)) {
 			data->n_spin_acquired++;
 			return;
 		}
@@ -77,8 +77,9 @@ label_spin_done:
 	/* Copy before to after to avoid clock skews. */
 	nstime_t after;
 	nstime_copy(&after, &before);
-	uint32_t n_thds = atomic_fetch_add_u32(&data->n_waiting_thds, 1,
-	    ATOMIC_RELAXED) + 1;
+	uint32_t n_thds = atomic_fetch_add_u32(
+	                      &data->n_waiting_thds, 1, ATOMIC_RELAXED)
+	    + 1;
 	/* One last try as above two calls may take quite some cycles. */
 	if (!malloc_mutex_trylock_final(mutex)) {
 		atomic_fetch_sub_u32(&data->n_waiting_thds, 1, ATOMIC_RELAXED);
@@ -137,27 +138,28 @@ mutex_addr_comp(const witness_t *witness1, void *mutex1,
 }
 
 bool
-malloc_mutex_init(malloc_mutex_t *mutex, const char *name,
-    witness_rank_t rank, malloc_mutex_lock_order_t lock_order) {
+malloc_mutex_init(malloc_mutex_t *mutex, const char *name, witness_rank_t rank,
+    malloc_mutex_lock_order_t lock_order) {
 	mutex_prof_data_init(&mutex->prof_data);
 #ifdef _WIN32
-#  if _WIN32_WINNT >= 0x0600
+#	if _WIN32_WINNT >= 0x0600
 	InitializeSRWLock(&mutex->lock);
-#  else
-	if (!InitializeCriticalSectionAndSpinCount(&mutex->lock,
-	    _CRT_SPINCOUNT)) {
+#	else
+	if (!InitializeCriticalSectionAndSpinCount(
+	        &mutex->lock, _CRT_SPINCOUNT)) {
 		return true;
 	}
-#  endif
+#	endif
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
-       mutex->lock = OS_UNFAIR_LOCK_INIT;
+	mutex->lock = OS_UNFAIR_LOCK_INIT;
 #elif (defined(JEMALLOC_MUTEX_INIT_CB))
 	if (postpone_init) {
 		mutex->postponed_next = postponed_mutexes;
 		postponed_mutexes = mutex;
 	} else {
-		if (_pthread_mutex_init_calloc_cb(&mutex->lock,
-		    bootstrap_calloc) != 0) {
+		if (_pthread_mutex_init_calloc_cb(
+		        &mutex->lock, bootstrap_calloc)
+		    != 0) {
 			return true;
 		}
 	}
@@ -201,9 +203,10 @@ malloc_mutex_postfork_child(tsdn_t *tsdn, malloc_mutex_t *mutex) {
 #ifdef JEMALLOC_MUTEX_INIT_CB
 	malloc_mutex_unlock(tsdn, mutex);
 #else
-	if (malloc_mutex_init(mutex, mutex->witness.name,
-	    mutex->witness.rank, mutex->lock_order)) {
-		malloc_printf("<jemalloc>: Error re-initializing mutex in "
+	if (malloc_mutex_init(mutex, mutex->witness.name, mutex->witness.rank,
+	        mutex->lock_order)) {
+		malloc_printf(
+		    "<jemalloc>: Error re-initializing mutex in "
 		    "child\n");
 		if (opt_abort) {
 			abort();
@@ -217,8 +220,9 @@ malloc_mutex_boot(void) {
 #ifdef JEMALLOC_MUTEX_INIT_CB
 	postpone_init = false;
 	while (postponed_mutexes != NULL) {
-		if (_pthread_mutex_init_calloc_cb(&postponed_mutexes->lock,
-		    bootstrap_calloc) != 0) {
+		if (_pthread_mutex_init_calloc_cb(
+		        &postponed_mutexes->lock, bootstrap_calloc)
+		    != 0) {
 			return true;
 		}
 		postponed_mutexes = postponed_mutexes->postponed_next;
diff --git a/src/nstime.c b/src/nstime.c
index 894753aa..ee2ddc51 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -5,8 +5,8 @@
 
 #include "jemalloc/internal/assert.h"
 
-#define BILLION	UINT64_C(1000000000)
-#define MILLION	UINT64_C(1000000)
+#define BILLION UINT64_C(1000000000)
+#define MILLION UINT64_C(1000000)
 
 static void
 nstime_set_initialized(nstime_t *time) {
@@ -22,8 +22,8 @@ nstime_assert_initialized(const nstime_t *time) {
 	 * Some parts (e.g. stats) rely on memset to zero initialize.  Treat
 	 * these as valid initialization.
 	 */
-	assert(time->magic == NSTIME_MAGIC ||
-	    (time->magic == 0 && time->ns == 0));
+	assert(
+	    time->magic == NSTIME_MAGIC || (time->magic == 0 && time->ns == 0));
 #endif
 }
 
@@ -133,8 +133,10 @@ nstime_isubtract(nstime_t *time, uint64_t subtrahend) {
 void
 nstime_imultiply(nstime_t *time, uint64_t multiplier) {
 	nstime_assert_initialized(time);
-	assert((((time->ns | multiplier) & (UINT64_MAX << (sizeof(uint64_t) <<
-	    2))) == 0) || ((time->ns * multiplier) / multiplier == time->ns));
+	assert(
+	    (((time->ns | multiplier) & (UINT64_MAX << (sizeof(uint64_t) << 2)))
+	        == 0)
+	    || ((time->ns * multiplier) / multiplier == time->ns));
 
 	nstime_initialize_operand(time);
 	time->ns *= multiplier;
@@ -178,7 +180,7 @@ nstime_ms_since(const nstime_t *past) {
 }
 
 #ifdef _WIN32
-#  define NSTIME_MONOTONIC false
+#	define NSTIME_MONOTONIC false
 static void
 nstime_get(nstime_t *time) {
 	FILETIME ft;
@@ -190,7 +192,7 @@ nstime_get(nstime_t *time) {
 	nstime_init(time, ticks_100ns * 100);
 }
 #elif defined(JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE)
-#  define NSTIME_MONOTONIC true
+#	define NSTIME_MONOTONIC true
 static void
 nstime_get(nstime_t *time) {
 	struct timespec ts;
@@ -199,7 +201,7 @@ nstime_get(nstime_t *time) {
 	nstime_init2(time, ts.tv_sec, ts.tv_nsec);
 }
 #elif defined(JEMALLOC_HAVE_CLOCK_MONOTONIC)
-#  define NSTIME_MONOTONIC true
+#	define NSTIME_MONOTONIC true
 static void
 nstime_get(nstime_t *time) {
 	struct timespec ts;
@@ -208,24 +210,24 @@ nstime_get(nstime_t *time) {
 	nstime_init2(time, ts.tv_sec, ts.tv_nsec);
 }
 #elif defined(JEMALLOC_HAVE_CLOCK_GETTIME_NSEC_NP)
-#  define NSTIME_MONOTONIC true
+#	define NSTIME_MONOTONIC true
 static void
 nstime_get(nstime_t *time) {
 	nstime_init(time, clock_gettime_nsec_np(CLOCK_UPTIME_RAW));
 }
 #elif defined(JEMALLOC_HAVE_MACH_ABSOLUTE_TIME)
-#  define NSTIME_MONOTONIC true
+#	define NSTIME_MONOTONIC true
 static void
 nstime_get(nstime_t *time) {
 	static mach_timebase_info_data_t sTimebaseInfo;
 	if (sTimebaseInfo.denom == 0) {
-		(void) mach_timebase_info(&sTimebaseInfo);
+		(void)mach_timebase_info(&sTimebaseInfo);
 	}
-	nstime_init(time, mach_absolute_time() * sTimebaseInfo.numer
-	    / sTimebaseInfo.denom);
+	nstime_init(time,
+	    mach_absolute_time() * sTimebaseInfo.numer / sTimebaseInfo.denom);
 }
 #else
-#  define NSTIME_MONOTONIC false
+#	define NSTIME_MONOTONIC false
 static void
 nstime_get(nstime_t *time) {
 	struct timeval tv;
@@ -242,15 +244,13 @@ nstime_monotonic_impl(void) {
 }
 nstime_monotonic_t *JET_MUTABLE nstime_monotonic = nstime_monotonic_impl;
 
-prof_time_res_t opt_prof_time_res =
-	prof_time_res_default;
+prof_time_res_t opt_prof_time_res = prof_time_res_default;
 
 const char *const prof_time_res_mode_names[] = {
-	"default",
-	"high",
+    "default",
+    "high",
 };
 
-
 static void
 nstime_get_realtime(nstime_t *time) {
 #if defined(JEMALLOC_HAVE_CLOCK_REALTIME) && !defined(_WIN32)
@@ -302,5 +302,3 @@ nstime_prof_init_update(nstime_t *time) {
 	nstime_init_zero(time);
 	nstime_prof_update(time);
 }
-
-
diff --git a/src/pa.c b/src/pa.c
index 7a24ae65..becf69b1 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -41,8 +41,8 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, pa_central_t *central,
 	}
 
 	if (pac_init(tsdn, &shard->pac, base, emap, &shard->edata_cache,
-	    cur_time, pac_oversize_threshold, dirty_decay_ms, muzzy_decay_ms,
-	    &stats->pac_stats, stats_mtx)) {
+	        cur_time, pac_oversize_threshold, dirty_decay_ms,
+	        muzzy_decay_ms, &stats->pac_stats, stats_mtx)) {
 		return true;
 	}
 
@@ -68,11 +68,11 @@ bool
 pa_shard_enable_hpa(tsdn_t *tsdn, pa_shard_t *shard,
     const hpa_shard_opts_t *hpa_opts, const sec_opts_t *hpa_sec_opts) {
 	if (hpa_shard_init(&shard->hpa_shard, &shard->central->hpa, shard->emap,
-	    shard->base, &shard->edata_cache, shard->ind, hpa_opts)) {
+	        shard->base, &shard->edata_cache, shard->ind, hpa_opts)) {
 		return true;
 	}
 	if (sec_init(tsdn, &shard->hpa_sec, shard->base, &shard->hpa_shard.pai,
-	    hpa_sec_opts)) {
+	        hpa_sec_opts)) {
 		return true;
 	}
 	shard->ever_used_hpa = true;
@@ -114,16 +114,16 @@ pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard) {
 
 static pai_t *
 pa_get_pai(pa_shard_t *shard, edata_t *edata) {
-	return (edata_pai_get(edata) == EXTENT_PAI_PAC
-	    ? &shard->pac.pai : &shard->hpa_sec.pai);
+	return (edata_pai_get(edata) == EXTENT_PAI_PAC ? &shard->pac.pai
+	                                               : &shard->hpa_sec.pai);
 }
 
 edata_t *
 pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
     bool slab, szind_t szind, bool zero, bool guarded,
     bool *deferred_work_generated) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 	assert(!guarded || alignment <= PAGE);
 
 	edata_t *edata = NULL;
@@ -190,8 +190,8 @@ pa_shrink(tsdn_t *tsdn, pa_shard_t *shard, edata_t *edata, size_t old_size,
 	size_t shrink_amount = old_size - new_size;
 
 	pai_t *pai = pa_get_pai(shard, edata);
-	bool error = pai_shrink(tsdn, pai, edata, old_size, new_size,
-	    deferred_work_generated);
+	bool   error = pai_shrink(
+            tsdn, pai, edata, old_size, new_size, deferred_work_generated);
 	if (error) {
 		return true;
 	}
@@ -232,11 +232,11 @@ pa_decay_ms_get(pa_shard_t *shard, extent_state_t state) {
 }
 
 void
-pa_shard_set_deferral_allowed(tsdn_t *tsdn, pa_shard_t *shard,
-    bool deferral_allowed) {
+pa_shard_set_deferral_allowed(
+    tsdn_t *tsdn, pa_shard_t *shard, bool deferral_allowed) {
 	if (pa_shard_uses_hpa(shard)) {
-		hpa_shard_set_deferral_allowed(tsdn, &shard->hpa_shard,
-		    deferral_allowed);
+		hpa_shard_set_deferral_allowed(
+		    tsdn, &shard->hpa_shard, deferral_allowed);
 	}
 }
 
@@ -260,8 +260,8 @@ pa_shard_time_until_deferred_work(tsdn_t *tsdn, pa_shard_t *shard) {
 	}
 
 	if (pa_shard_uses_hpa(shard)) {
-		uint64_t hpa =
-		    pai_time_until_deferred_work(tsdn, &shard->hpa_shard.pai);
+		uint64_t hpa = pai_time_until_deferred_work(
+		    tsdn, &shard->hpa_shard.pai);
 		if (hpa < time) {
 			time = hpa;
 		}
diff --git a/src/pa_extra.c b/src/pa_extra.c
index 76507039..7c2498b7 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -94,8 +94,8 @@ pa_shard_nmuzzy(pa_shard_t *shard) {
 }
 
 void
-pa_shard_basic_stats_merge(pa_shard_t *shard, size_t *nactive, size_t *ndirty,
-    size_t *nmuzzy) {
+pa_shard_basic_stats_merge(
+    pa_shard_t *shard, size_t *nactive, size_t *ndirty, size_t *nmuzzy) {
 	*nactive += pa_shard_nactive(shard);
 	*ndirty += pa_shard_ndirty(shard);
 	*nmuzzy += pa_shard_nmuzzy(shard);
@@ -122,29 +122,29 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
 	locked_inc_u64_unsynchronized(
 	    &pa_shard_stats_out->pac_stats.decay_dirty.npurge,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-	    &shard->pac.stats->decay_dirty.npurge));
+	        &shard->pac.stats->decay_dirty.npurge));
 	locked_inc_u64_unsynchronized(
 	    &pa_shard_stats_out->pac_stats.decay_dirty.nmadvise,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-	    &shard->pac.stats->decay_dirty.nmadvise));
+	        &shard->pac.stats->decay_dirty.nmadvise));
 	locked_inc_u64_unsynchronized(
 	    &pa_shard_stats_out->pac_stats.decay_dirty.purged,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-	    &shard->pac.stats->decay_dirty.purged));
+	        &shard->pac.stats->decay_dirty.purged));
 
 	/* Muzzy decay stats */
 	locked_inc_u64_unsynchronized(
 	    &pa_shard_stats_out->pac_stats.decay_muzzy.npurge,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-	    &shard->pac.stats->decay_muzzy.npurge));
+	        &shard->pac.stats->decay_muzzy.npurge));
 	locked_inc_u64_unsynchronized(
 	    &pa_shard_stats_out->pac_stats.decay_muzzy.nmadvise,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-	    &shard->pac.stats->decay_muzzy.nmadvise));
+	        &shard->pac.stats->decay_muzzy.nmadvise));
 	locked_inc_u64_unsynchronized(
 	    &pa_shard_stats_out->pac_stats.decay_muzzy.purged,
 	    locked_read_u64(tsdn, LOCKEDINT_MTX(*shard->stats_mtx),
-	    &shard->pac.stats->decay_muzzy.purged));
+	        &shard->pac.stats->decay_muzzy.purged));
 
 	atomic_load_add_store_zu(&pa_shard_stats_out->pac_stats.abandoned_vm,
 	    atomic_load_zu(&shard->pac.stats->abandoned_vm, ATOMIC_RELAXED));
@@ -157,8 +157,8 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
 		retained = ecache_nextents_get(&shard->pac.ecache_retained, i);
 		dirty_bytes = ecache_nbytes_get(&shard->pac.ecache_dirty, i);
 		muzzy_bytes = ecache_nbytes_get(&shard->pac.ecache_muzzy, i);
-		retained_bytes = ecache_nbytes_get(&shard->pac.ecache_retained,
-		    i);
+		retained_bytes = ecache_nbytes_get(
+		    &shard->pac.ecache_retained, i);
 
 		estats_out[i].ndirty = dirty;
 		estats_out[i].nmuzzy = muzzy;
diff --git a/src/pac.c b/src/pac.c
index 0e435717..361816e9 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -7,18 +7,18 @@
 static edata_t *pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t alignment, bool zero, bool guarded, bool frequent_reuse,
     bool *deferred_work_generated);
-static bool pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
-static bool pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool *deferred_work_generated);
-static void pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    bool *deferred_work_generated);
+static bool     pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+        size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
+static bool     pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+        size_t old_size, size_t new_size, bool *deferred_work_generated);
+static void     pac_dalloc_impl(
+        tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated);
 static uint64_t pac_time_until_deferred_work(tsdn_t *tsdn, pai_t *self);
 
 static inline void
-pac_decay_data_get(pac_t *pac, extent_state_t state,
-    decay_t **r_decay, pac_decay_stats_t **r_decay_stats, ecache_t **r_ecache) {
-	switch(state) {
+pac_decay_data_get(pac_t *pac, extent_state_t state, decay_t **r_decay,
+    pac_decay_stats_t **r_decay_stats, ecache_t **r_ecache) {
+	switch (state) {
 	case extent_state_dirty:
 		*r_decay = &pac->decay_dirty;
 		*r_decay_stats = &pac->stats->decay_dirty;
@@ -51,7 +51,7 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
 	 * merging/splitting extents is non-trivial.
 	 */
 	if (ecache_init(tsdn, &pac->ecache_dirty, extent_state_dirty, ind,
-	    /* delay_coalesce */ true)) {
+	        /* delay_coalesce */ true)) {
 		return true;
 	}
 	/*
@@ -59,7 +59,7 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
 	 * the critical path much less often than for dirty extents.
 	 */
 	if (ecache_init(tsdn, &pac->ecache_muzzy, extent_state_muzzy, ind,
-	    /* delay_coalesce */ false)) {
+	        /* delay_coalesce */ false)) {
 		return true;
 	}
 	/*
@@ -68,17 +68,17 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
 	 * coalescing), but also because operations on retained extents are not
 	 * in the critical path.
 	 */
-	if (ecache_init(tsdn, &pac->ecache_retained, extent_state_retained,
-	    ind, /* delay_coalesce */ false)) {
+	if (ecache_init(tsdn, &pac->ecache_retained, extent_state_retained, ind,
+	        /* delay_coalesce */ false)) {
 		return true;
 	}
 	exp_grow_init(&pac->exp_grow);
 	if (malloc_mutex_init(&pac->grow_mtx, "extent_grow",
-	    WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
-	atomic_store_zu(&pac->oversize_threshold, pac_oversize_threshold,
-	    ATOMIC_RELAXED);
+	atomic_store_zu(
+	    &pac->oversize_threshold, pac_oversize_threshold, ATOMIC_RELAXED);
 	if (decay_init(&pac->decay_dirty, cur_time, dirty_decay_ms)) {
 		return true;
 	}
@@ -112,7 +112,8 @@ pac_may_have_muzzy(pac_t *pac) {
 	return pac_decay_ms_get(pac, extent_state_muzzy) != 0;
 }
 
-static size_t pac_alloc_retained_batched_size(size_t size) {
+static size_t
+pac_alloc_retained_batched_size(size_t size) {
 	if (size > SC_LARGE_MAXCLASS) {
 		/*
 		 * A valid input with usize SC_LARGE_MAXCLASS could still
@@ -124,8 +125,8 @@ static size_t pac_alloc_retained_batched_size(size_t size) {
 	}
 	size_t batched_size = sz_s2u_compute_using_delta(size);
 	size_t next_hugepage_size = HUGEPAGE_CEILING(size);
-	return batched_size > next_hugepage_size? next_hugepage_size:
-	    batched_size;
+	return batched_size > next_hugepage_size ? next_hugepage_size
+	                                         : batched_size;
 }
 
 static edata_t *
@@ -162,8 +163,8 @@ pac_alloc_real(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
 	 * limits.  This choice should be reevaluated if
 	 * pac_alloc_retained_batched_size is changed to be more aggressive.
 	 */
-	if (sz_large_size_classes_disabled() && edata == NULL &&
-	    (maps_coalesce || opt_retain)) {
+	if (sz_large_size_classes_disabled() && edata == NULL
+	    && (maps_coalesce || opt_retain)) {
 		size_t batched_size = pac_alloc_retained_batched_size(size);
 		/*
 		 * Note that ecache_alloc_grow will try to retrieve virtual
@@ -173,12 +174,12 @@ pac_alloc_real(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
 		 * with opt_retain off.
 		 */
 		edata = ecache_alloc_grow(tsdn, pac, ehooks,
-		    &pac->ecache_retained, NULL, batched_size,
-		    alignment, zero, guarded);
+		    &pac->ecache_retained, NULL, batched_size, alignment, zero,
+		    guarded);
 
 		if (edata != NULL && batched_size > size) {
-			edata_t *trail = extent_split_wrapper(tsdn, pac,
-			    ehooks, edata, size, batched_size - size,
+			edata_t *trail = extent_split_wrapper(tsdn, pac, ehooks,
+			    edata, size, batched_size - size,
 			    /* holding_core_locks */ false);
 			if (trail == NULL) {
 				ecache_dalloc(tsdn, pac, ehooks,
@@ -203,8 +204,8 @@ pac_alloc_real(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
 	}
 
 	if (config_stats && newly_mapped_size != 0) {
-		atomic_fetch_add_zu(&pac->stats->pac_mapped,
-		    newly_mapped_size, ATOMIC_RELAXED);
+		atomic_fetch_add_zu(
+		    &pac->stats->pac_mapped, newly_mapped_size, ATOMIC_RELAXED);
 	}
 
 	return edata;
@@ -217,8 +218,8 @@ pac_alloc_new_guarded(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
 
 	edata_t *edata;
 	if (san_bump_enabled() && frequent_reuse) {
-		edata = san_bump_alloc(tsdn, &pac->sba, pac, ehooks, size,
-		    zero);
+		edata = san_bump_alloc(
+		    tsdn, &pac->sba, pac, ehooks, size, zero);
 	} else {
 		size_t size_with_guards = san_two_side_guarded_sz(size);
 		/* Alloc a non-guarded extent first.*/
@@ -227,12 +228,12 @@ pac_alloc_new_guarded(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
 		if (edata != NULL) {
 			/* Add guards around it. */
 			assert(edata_size_get(edata) == size_with_guards);
-			san_guard_pages_two_sided(tsdn, ehooks, edata,
-			    pac->emap, true);
+			san_guard_pages_two_sided(
+			    tsdn, ehooks, edata, pac->emap, true);
 		}
 	}
-	assert(edata == NULL || (edata_guarded_get(edata) &&
-	    edata_size_get(edata) == size));
+	assert(edata == NULL
+	    || (edata_guarded_get(edata) && edata_size_get(edata) == size));
 
 	return edata;
 }
@@ -241,7 +242,7 @@ static edata_t *
 pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
     bool zero, bool guarded, bool frequent_reuse,
     bool *deferred_work_generated) {
-	pac_t *pac = (pac_t *)self;
+	pac_t    *pac = (pac_t *)self;
 	ehooks_t *ehooks = pac_ehooks_get(pac);
 
 	edata_t *edata = NULL;
@@ -252,13 +253,13 @@ pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
 	 * for such allocations would always return NULL.
 	 * */
 	if (!guarded || frequent_reuse) {
-		edata =	pac_alloc_real(tsdn, pac, ehooks, size, alignment,
-		    zero, guarded);
+		edata = pac_alloc_real(
+		    tsdn, pac, ehooks, size, alignment, zero, guarded);
 	}
 	if (edata == NULL && guarded) {
 		/* No cached guarded extents; creating a new one. */
-		edata = pac_alloc_new_guarded(tsdn, pac, ehooks, size,
-		    alignment, zero, frequent_reuse);
+		edata = pac_alloc_new_guarded(
+		    tsdn, pac, ehooks, size, alignment, zero, frequent_reuse);
 	}
 
 	return edata;
@@ -267,7 +268,7 @@ pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment,
 static bool
 pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
     size_t new_size, bool zero, bool *deferred_work_generated) {
-	pac_t *pac = (pac_t *)self;
+	pac_t    *pac = (pac_t *)self;
 	ehooks_t *ehooks = pac_ehooks_get(pac);
 
 	size_t mapped_add = 0;
@@ -296,8 +297,8 @@ pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 		return true;
 	}
 	if (config_stats && mapped_add > 0) {
-		atomic_fetch_add_zu(&pac->stats->pac_mapped, mapped_add,
-		    ATOMIC_RELAXED);
+		atomic_fetch_add_zu(
+		    &pac->stats->pac_mapped, mapped_add, ATOMIC_RELAXED);
 	}
 	return false;
 }
@@ -305,7 +306,7 @@ pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 static bool
 pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
     size_t new_size, bool *deferred_work_generated) {
-	pac_t *pac = (pac_t *)self;
+	pac_t    *pac = (pac_t *)self;
 	ehooks_t *ehooks = pac_ehooks_get(pac);
 
 	size_t shrink_amount = old_size - new_size;
@@ -325,9 +326,9 @@ pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
 }
 
 static void
-pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    bool *deferred_work_generated) {
-	pac_t *pac = (pac_t *)self;
+pac_dalloc_impl(
+    tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated) {
+	pac_t    *pac = (pac_t *)self;
 	ehooks_t *ehooks = pac_ehooks_get(pac);
 
 	if (edata_guarded_get(edata)) {
@@ -344,10 +345,10 @@ pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 		 * guarded).
 		 */
 		if (!edata_slab_get(edata) || !maps_coalesce) {
-			assert(edata_size_get(edata) >= SC_LARGE_MINCLASS ||
-			    !maps_coalesce);
-			san_unguard_pages_two_sided(tsdn, ehooks, edata,
-			    pac->emap);
+			assert(edata_size_get(edata) >= SC_LARGE_MINCLASS
+			    || !maps_coalesce);
+			san_unguard_pages_two_sided(
+			    tsdn, ehooks, edata, pac->emap);
 		}
 	}
 
@@ -362,8 +363,8 @@ pac_ns_until_purge(tsdn_t *tsdn, decay_t *decay, size_t npages) {
 		/* Use minimal interval if decay is contended. */
 		return BACKGROUND_THREAD_DEFERRED_MIN;
 	}
-	uint64_t result = decay_ns_until_purge(decay, npages,
-	    ARENA_DEFERRED_PURGE_NPAGES_THRESHOLD);
+	uint64_t result = decay_ns_until_purge(
+	    decay, npages, ARENA_DEFERRED_PURGE_NPAGES_THRESHOLD);
 
 	malloc_mutex_unlock(tsdn, &decay->mtx);
 	return result;
@@ -372,18 +373,16 @@ pac_ns_until_purge(tsdn_t *tsdn, decay_t *decay, size_t npages) {
 static uint64_t
 pac_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
 	uint64_t time;
-	pac_t *pac = (pac_t *)self;
+	pac_t   *pac = (pac_t *)self;
 
-	time = pac_ns_until_purge(tsdn,
-	    &pac->decay_dirty,
-	    ecache_npages_get(&pac->ecache_dirty));
+	time = pac_ns_until_purge(
+	    tsdn, &pac->decay_dirty, ecache_npages_get(&pac->ecache_dirty));
 	if (time == BACKGROUND_THREAD_DEFERRED_MIN) {
 		return time;
 	}
 
-	uint64_t muzzy = pac_ns_until_purge(tsdn,
-	    &pac->decay_muzzy,
-	    ecache_npages_get(&pac->ecache_muzzy));
+	uint64_t muzzy = pac_ns_until_purge(
+	    tsdn, &pac->decay_muzzy, ecache_npages_get(&pac->ecache_muzzy));
 	if (muzzy < time) {
 		time = muzzy;
 	}
@@ -391,8 +390,8 @@ pac_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
 }
 
 bool
-pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit,
-    size_t *new_limit) {
+pac_retain_grow_limit_get_set(
+    tsdn_t *tsdn, pac_t *pac, size_t *old_limit, size_t *new_limit) {
 	pszind_t new_ind JEMALLOC_CC_SILENCE_INIT(0);
 	if (new_limit != NULL) {
 		size_t limit = *new_limit;
@@ -418,15 +417,15 @@ static size_t
 pac_stash_decayed(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache,
     size_t npages_limit, size_t npages_decay_max,
     edata_list_inactive_t *result) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 0);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 	ehooks_t *ehooks = pac_ehooks_get(pac);
 
 	/* Stash extents according to npages_limit. */
 	size_t nstashed = 0;
 	while (nstashed < npages_decay_max) {
-		edata_t *edata = ecache_evict(tsdn, pac, ehooks, ecache,
-		    npages_limit);
+		edata_t *edata = ecache_evict(
+		    tsdn, pac, ehooks, ecache, npages_limit);
 		if (edata == NULL) {
 			break;
 		}
@@ -443,8 +442,8 @@ decay_with_process_madvise(edata_list_inactive_t *decay_extents) {
 #ifndef JEMALLOC_HAVE_PROCESS_MADVISE
 	return true;
 #else
-	assert(opt_process_madvise_max_batch <=
-	    PROCESS_MADVISE_MAX_BATCH_LIMIT);
+	assert(
+	    opt_process_madvise_max_batch <= PROCESS_MADVISE_MAX_BATCH_LIMIT);
 	size_t len = opt_process_madvise_max_batch;
 	VARIABLE_ARRAY(struct iovec, vec, len);
 
@@ -458,8 +457,8 @@ decay_with_process_madvise(edata_list_inactive_t *decay_extents) {
 		total_bytes += pages_bytes;
 		cur++;
 		if (cur == len) {
-			bool err = pages_purge_process_madvise(vec, len,
-			    total_bytes);
+			bool err = pages_purge_process_madvise(
+			    vec, len, total_bytes);
 			if (err) {
 				return true;
 			}
@@ -489,14 +488,14 @@ pac_decay_stashed(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 	bool try_muzzy = !fully_decay
 	    && pac_decay_ms_get(pac, extent_state_muzzy) != 0;
 
-	bool purge_to_retained = !try_muzzy ||
-	    ecache->state == extent_state_muzzy;
+	bool purge_to_retained = !try_muzzy
+	    || ecache->state == extent_state_muzzy;
 	/*
 	 * Attempt process_madvise only if 1) enabled, 2) purging to retained,
 	 * and 3) not using custom hooks.
 	 */
-	bool try_process_madvise = (opt_process_madvise_max_batch > 0) &&
-	    purge_to_retained && ehooks_dalloc_will_fail(ehooks);
+	bool try_process_madvise = (opt_process_madvise_max_batch > 0)
+	    && purge_to_retained && ehooks_dalloc_will_fail(ehooks);
 
 	bool already_purged;
 	if (try_process_madvise) {
@@ -511,8 +510,8 @@ pac_decay_stashed(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 		already_purged = false;
 	}
 
-	for (edata_t *edata = edata_list_inactive_first(decay_extents); edata !=
-	    NULL; edata = edata_list_inactive_first(decay_extents)) {
+	for (edata_t *edata = edata_list_inactive_first(decay_extents);
+	     edata != NULL; edata = edata_list_inactive_first(decay_extents)) {
 		edata_list_inactive_remove(decay_extents, edata);
 
 		size_t size = edata_size_get(edata);
@@ -524,8 +523,8 @@ pac_decay_stashed(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 		switch (ecache->state) {
 		case extent_state_dirty:
 			if (try_muzzy) {
-				err = extent_purge_lazy_wrapper(tsdn, ehooks,
-				    edata, /* offset */ 0, size);
+				err = extent_purge_lazy_wrapper(
+				    tsdn, ehooks, edata, /* offset */ 0, size);
 				if (!err) {
 					ecache_dalloc(tsdn, pac, ehooks,
 					    &pac->ecache_muzzy, edata);
@@ -535,8 +534,8 @@ pac_decay_stashed(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 			JEMALLOC_FALLTHROUGH;
 		case extent_state_muzzy:
 			if (already_purged) {
-				extent_dalloc_wrapper_purged(tsdn, pac, ehooks,
-				    edata);
+				extent_dalloc_wrapper_purged(
+				    tsdn, pac, ehooks, edata);
 			} else {
 				extent_dalloc_wrapper(tsdn, pac, ehooks, edata);
 			}
@@ -578,8 +577,8 @@ static void
 pac_decay_to_limit(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
     pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay,
     size_t npages_limit, size_t npages_decay_max) {
-	witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn),
-	    WITNESS_RANK_CORE, 1);
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 1);
 
 	if (decay->purging || npages_decay_max == 0) {
 		return;
@@ -589,8 +588,8 @@ pac_decay_to_limit(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 
 	edata_list_inactive_t decay_extents;
 	edata_list_inactive_init(&decay_extents);
-	size_t npurge = pac_stash_decayed(tsdn, pac, ecache, npages_limit,
-	    npages_decay_max, &decay_extents);
+	size_t npurge = pac_stash_decayed(
+	    tsdn, pac, ecache, npages_limit, npages_decay_max, &decay_extents);
 	if (npurge != 0) {
 		size_t npurged = pac_decay_stashed(tsdn, pac, decay,
 		    decay_stats, ecache, fully_decay, &decay_extents);
@@ -611,8 +610,8 @@ pac_decay_all(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 
 static void
 pac_decay_try_purge(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
-    pac_decay_stats_t *decay_stats, ecache_t *ecache,
-    size_t current_npages, size_t npages_limit) {
+    pac_decay_stats_t *decay_stats, ecache_t *ecache, size_t current_npages,
+    size_t npages_limit) {
 	if (current_npages > npages_limit) {
 		pac_decay_to_limit(tsdn, pac, decay, decay_stats, ecache,
 		    /* fully_decay */ false, npages_limit,
@@ -647,8 +646,8 @@ pac_maybe_decay_purge(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 	nstime_t time;
 	nstime_init_update(&time);
 	size_t npages_current = ecache_npages_get(ecache);
-	bool epoch_advanced = decay_maybe_advance_epoch(decay, &time,
-	    npages_current);
+	bool   epoch_advanced = decay_maybe_advance_epoch(
+            decay, &time, npages_current);
 	if (eagerness == PAC_PURGE_ALWAYS
 	    || (epoch_advanced && eagerness == PAC_PURGE_ON_EPOCH_ADVANCE)) {
 		size_t npages_limit = decay_npages_limit_get(decay);
@@ -662,9 +661,9 @@ pac_maybe_decay_purge(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 bool
 pac_decay_ms_set(tsdn_t *tsdn, pac_t *pac, extent_state_t state,
     ssize_t decay_ms, pac_purge_eagerness_t eagerness) {
-	decay_t *decay;
+	decay_t           *decay;
 	pac_decay_stats_t *decay_stats;
-	ecache_t *ecache;
+	ecache_t          *ecache;
 	pac_decay_data_get(pac, state, &decay, &decay_stats, &ecache);
 
 	if (!decay_ms_valid(decay_ms)) {
@@ -691,9 +690,9 @@ pac_decay_ms_set(tsdn_t *tsdn, pac_t *pac, extent_state_t state,
 
 ssize_t
 pac_decay_ms_get(pac_t *pac, extent_state_t state) {
-	decay_t *decay;
+	decay_t           *decay;
 	pac_decay_stats_t *decay_stats;
-	ecache_t *ecache;
+	ecache_t          *ecache;
 	pac_decay_data_get(pac, state, &decay, &decay_stats, &ecache);
 	return decay_ms_read(decay);
 }
@@ -722,9 +721,10 @@ pac_destroy(tsdn_t *tsdn, pac_t *pac) {
 	 * dss-based extents for later reuse.
 	 */
 	ehooks_t *ehooks = pac_ehooks_get(pac);
-	edata_t *edata;
-	while ((edata = ecache_evict(tsdn, pac, ehooks,
-	    &pac->ecache_retained, 0)) != NULL) {
+	edata_t  *edata;
+	while (
+	    (edata = ecache_evict(tsdn, pac, ehooks, &pac->ecache_retained, 0))
+	    != NULL) {
 		extent_destroy_wrapper(tsdn, pac, ehooks, edata);
 	}
 }
diff --git a/src/pages.c b/src/pages.c
index d53e0fef..88301c2b 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -8,46 +8,42 @@
 #include "jemalloc/internal/malloc_io.h"
 
 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
-#include <sys/sysctl.h>
-#ifdef __FreeBSD__
-#include <vm/vm_param.h>
-#endif
+#	include <sys/sysctl.h>
+#	ifdef __FreeBSD__
+#		include <vm/vm_param.h>
+#	endif
 #endif
 #ifdef __NetBSD__
-#include <sys/bitops.h>	/* ilog2 */
+#	include <sys/bitops.h> /* ilog2 */
 #endif
 #ifdef JEMALLOC_HAVE_VM_MAKE_TAG
-#define PAGES_FD_TAG VM_MAKE_TAG(254U)
+#	define PAGES_FD_TAG VM_MAKE_TAG(254U)
 #else
-#define PAGES_FD_TAG -1
+#	define PAGES_FD_TAG -1
 #endif
 #if defined(JEMALLOC_HAVE_PRCTL) && defined(JEMALLOC_PAGEID)
-#include <sys/prctl.h>
-#ifndef PR_SET_VMA
-#define PR_SET_VMA 0x53564d41
-#define PR_SET_VMA_ANON_NAME 0
-#endif
+#	include <sys/prctl.h>
+#	ifndef PR_SET_VMA
+#		define PR_SET_VMA 0x53564d41
+#		define PR_SET_VMA_ANON_NAME 0
+#	endif
 #endif
 
 /******************************************************************************/
 /* Data. */
 
 /* Actual operating system page size, detected during bootstrap, <= PAGE. */
-size_t	os_page;
+size_t os_page;
 
 #ifndef _WIN32
-#  define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE)
-#  define PAGES_PROT_DECOMMIT (PROT_NONE)
-static int	mmap_flags;
+#	define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE)
+#	define PAGES_PROT_DECOMMIT (PROT_NONE)
+static int mmap_flags;
 #endif
-static bool	os_overcommits;
+static bool os_overcommits;
 
 const char *const thp_mode_names[] = {
-	"default",
-	"always",
-	"never",
-	"not supported"
-};
+    "default", "always", "never", "not supported"};
 thp_mode_t opt_thp = THP_MODE_DEFAULT;
 thp_mode_t init_system_thp_mode;
 
@@ -66,15 +62,16 @@ static int madvise_dont_need_zeros_is_faulty = -1;
  *
  *   [1]: https://patchwork.kernel.org/patch/10576637/
  */
-static int madvise_MADV_DONTNEED_zeroes_pages(void)
-{
+static int
+madvise_MADV_DONTNEED_zeroes_pages(void) {
 	size_t size = PAGE;
 
-	void * addr = mmap(NULL, size, PROT_READ|PROT_WRITE,
-	    MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+	void *addr = mmap(NULL, size, PROT_READ | PROT_WRITE,
+	    MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
 
 	if (addr == MAP_FAILED) {
-		malloc_write("<jemalloc>: Cannot allocate memory for "
+		malloc_write(
+		    "<jemalloc>: Cannot allocate memory for "
 		    "MADV_DONTNEED check\n");
 		if (opt_abort) {
 			abort();
@@ -94,7 +91,8 @@ static int madvise_MADV_DONTNEED_zeroes_pages(void)
 	}
 
 	if (munmap(addr, size) != 0) {
-		malloc_write("<jemalloc>: Cannot deallocate memory for "
+		malloc_write(
+		    "<jemalloc>: Cannot deallocate memory for "
 		    "MADV_DONTNEED check\n");
 		if (opt_abort) {
 			abort();
@@ -106,18 +104,18 @@ static int madvise_MADV_DONTNEED_zeroes_pages(void)
 #endif
 
 #ifdef JEMALLOC_PAGEID
-static int os_page_id(void *addr, size_t size, const char *name)
-{
-#ifdef JEMALLOC_HAVE_PRCTL
+static int
+os_page_id(void *addr, size_t size, const char *name) {
+#	ifdef JEMALLOC_HAVE_PRCTL
 	/*
 	 * While parsing `/proc/<pid>/maps` file, the block could appear as
 	 * 7f4836000000-7f4836800000 rw-p 00000000 00:00 0 [anon:jemalloc_pg_overcommit]`
 	 */
 	return prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, (uintptr_t)addr, size,
 	    (uintptr_t)name);
-#else
+#	else
 	return 0;
-#endif
+#	endif
 }
 #endif
 
@@ -156,7 +154,7 @@ os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
 	 */
 	{
 		int flags = mmap_flags;
-#ifdef __NetBSD__
+#	ifdef __NetBSD__
 		/*
 		 * On NetBSD PAGE for a platform is defined to the
 		 * maximum page size of all machine architectures
@@ -167,7 +165,7 @@ os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
 			unsigned int a = ilog2(MAX(alignment, PAGE));
 			flags |= MAP_ALIGNED(a);
 		}
-#endif
+#	endif
 		int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
 
 		ret = mmap(addr, size, prot, flags, PAGES_FD_TAG, 0);
@@ -184,8 +182,8 @@ os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
 		ret = NULL;
 	}
 #endif
-	assert(ret == NULL || (addr == NULL && ret != addr) || (addr != NULL &&
-	    ret == addr));
+	assert(ret == NULL || (addr == NULL && ret != addr)
+	    || (addr != NULL && ret == addr));
 #ifdef JEMALLOC_PAGEID
 	int n = os_page_id(ret, size,
 	    os_overcommits ? "jemalloc_pg_overcommit" : "jemalloc_pg");
@@ -195,8 +193,8 @@ os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
 }
 
 static void *
-os_pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size,
-    bool *commit) {
+os_pages_trim(
+    void *addr, size_t alloc_size, size_t leadsize, size_t size, bool *commit) {
 	void *ret = (void *)((byte_t *)addr + leadsize);
 
 	assert(alloc_size >= leadsize + size);
@@ -237,13 +235,15 @@ os_pages_unmap(void *addr, size_t size) {
 		char buf[BUFERROR_BUF];
 
 		buferror(get_errno(), buf, sizeof(buf));
-		malloc_printf("<jemalloc>: Error in "
+		malloc_printf(
+		    "<jemalloc>: Error in "
 #ifdef _WIN32
 		    "VirtualFree"
 #else
 		    "munmap"
 #endif
-		    "(): %s\n", buf);
+		    "(): %s\n",
+		    buf);
 		if (opt_abort) {
 			abort();
 		}
@@ -350,13 +350,14 @@ os_pages_commit(void *addr, size_t size, bool commit) {
 	assert(PAGE_CEILING(size) == size);
 
 #ifdef _WIN32
-	return (commit ? (addr != VirtualAlloc(addr, size, MEM_COMMIT,
-	    PAGE_READWRITE)) : (!VirtualFree(addr, size, MEM_DECOMMIT)));
+	return (commit
+	        ? (addr != VirtualAlloc(addr, size, MEM_COMMIT, PAGE_READWRITE))
+	        : (!VirtualFree(addr, size, MEM_DECOMMIT)));
 #else
 	{
-		int prot = commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
-		void *result = mmap(addr, size, prot, mmap_flags | MAP_FIXED,
-		    PAGES_FD_TAG, 0);
+		int   prot = commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
+		void *result = mmap(
+		    addr, size, prot, mmap_flags | MAP_FIXED, PAGES_FD_TAG, 0);
 		if (result == MAP_FAILED) {
 			return true;
 		}
@@ -395,8 +396,8 @@ pages_decommit(void *addr, size_t size) {
 void
 pages_mark_guards(void *head, void *tail) {
 	assert(head != NULL || tail != NULL);
-	assert(head == NULL || tail == NULL ||
-	    (uintptr_t)head < (uintptr_t)tail);
+	assert(
+	    head == NULL || tail == NULL || (uintptr_t)head < (uintptr_t)tail);
 #ifdef JEMALLOC_HAVE_MPROTECT
 	if (head != NULL) {
 		mprotect(head, PAGE, PROT_NONE);
@@ -418,13 +419,12 @@ pages_mark_guards(void *head, void *tail) {
 void
 pages_unmark_guards(void *head, void *tail) {
 	assert(head != NULL || tail != NULL);
-	assert(head == NULL || tail == NULL ||
-	    (uintptr_t)head < (uintptr_t)tail);
+	assert(
+	    head == NULL || tail == NULL || (uintptr_t)head < (uintptr_t)tail);
 #ifdef JEMALLOC_HAVE_MPROTECT
-	bool head_and_tail = (head != NULL) && (tail != NULL);
-	size_t range = head_and_tail ?
-	    (uintptr_t)tail - (uintptr_t)head + PAGE :
-	    SIZE_T_MAX;
+	bool   head_and_tail = (head != NULL) && (tail != NULL);
+	size_t range = head_and_tail ? (uintptr_t)tail - (uintptr_t)head + PAGE
+	                             : SIZE_T_MAX;
 	/*
 	 * The amount of work that the kernel does in mprotect depends on the
 	 * range argument.  SC_LARGE_MINCLASS is an arbitrary threshold chosen
@@ -473,17 +473,18 @@ pages_purge_lazy(void *addr, size_t size) {
 	return false;
 #elif defined(JEMALLOC_PURGE_MADVISE_FREE)
 	return (madvise(addr, size,
-#  ifdef MADV_FREE
-	    MADV_FREE
-#  else
-	    JEMALLOC_MADV_FREE
-#  endif
-	    ) != 0);
-#elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
-    !defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
+#	ifdef MADV_FREE
+	            MADV_FREE
+#	else
+	            JEMALLOC_MADV_FREE
+#	endif
+	            )
+	    != 0);
+#elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED)                                 \
+    && !defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
 	return (madvise(addr, size, MADV_DONTNEED) != 0);
-#elif defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED) && \
-    !defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS)
+#elif defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED)                           \
+    && !defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS)
 	return (posix_madvise(addr, size, POSIX_MADV_DONTNEED) != 0);
 #else
 	not_reached();
@@ -499,14 +500,14 @@ pages_purge_forced(void *addr, size_t size) {
 		return true;
 	}
 
-#if defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
-    defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
-	return (unlikely(madvise_dont_need_zeros_is_faulty) ||
-	    madvise(addr, size, MADV_DONTNEED) != 0);
-#elif defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED) && \
-    defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS)
-	return (unlikely(madvise_dont_need_zeros_is_faulty) ||
-	    posix_madvise(addr, size, POSIX_MADV_DONTNEED) != 0);
+#if defined(JEMALLOC_PURGE_MADVISE_DONTNEED)                                   \
+    && defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
+	return (unlikely(madvise_dont_need_zeros_is_faulty)
+	    || madvise(addr, size, MADV_DONTNEED) != 0);
+#elif defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED)                           \
+    && defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS)
+	return (unlikely(madvise_dont_need_zeros_is_faulty)
+	    || posix_madvise(addr, size, POSIX_MADV_DONTNEED) != 0);
 #elif defined(JEMALLOC_MAPS_COALESCE)
 	/* Try to overlay a new demand-zeroed mapping. */
 	return pages_commit(addr, size);
@@ -579,13 +580,13 @@ pages_collapse(void *addr, size_t size) {
 	 * means we can't call pages_collapse on freshly mapped memory region.
 	 * See madvise(2) man page for more details.
 	 */
-#if defined(JEMALLOC_HAVE_MADVISE_COLLAPSE) && \
-    (defined(MADV_COLLAPSE) || defined(JEMALLOC_MADV_COLLAPSE))
-#  if defined(MADV_COLLAPSE)
+#if defined(JEMALLOC_HAVE_MADVISE_COLLAPSE)                                    \
+    && (defined(MADV_COLLAPSE) || defined(JEMALLOC_MADV_COLLAPSE))
+#	if defined(MADV_COLLAPSE)
 	return (madvise(addr, size, MADV_COLLAPSE) != 0);
-#  elif defined(JEMALLOC_MADV_COLLAPSE)
+#	elif defined(JEMALLOC_MADV_COLLAPSE)
 	return (madvise(addr, size, JEMALLOC_MADV_COLLAPSE) != 0);
-#  endif
+#	endif
 #else
 	return true;
 #endif
@@ -618,8 +619,8 @@ pages_dodump(void *addr, size_t size) {
 }
 
 #ifdef JEMALLOC_HAVE_PROCESS_MADVISE
-#include <sys/mman.h>
-#include <sys/syscall.h>
+#	include <sys/mman.h>
+#	include <sys/syscall.h>
 static int pidfd;
 
 static bool
@@ -640,15 +641,16 @@ init_process_madvise(void) {
 	return false;
 }
 
-#ifdef SYS_process_madvise
-#define JE_SYS_PROCESS_MADVISE_NR SYS_process_madvise
-#else
-#define JE_SYS_PROCESS_MADVISE_NR EXPERIMENTAL_SYS_PROCESS_MADVISE_NR
-#endif
+#	ifdef SYS_process_madvise
+#		define JE_SYS_PROCESS_MADVISE_NR SYS_process_madvise
+#	else
+#		define JE_SYS_PROCESS_MADVISE_NR                              \
+			EXPERIMENTAL_SYS_PROCESS_MADVISE_NR
+#	endif
 
 static bool
-pages_purge_process_madvise_impl(void *vec, size_t vec_len,
-    size_t total_bytes) {
+pages_purge_process_madvise_impl(
+    void *vec, size_t vec_len, size_t total_bytes) {
 	size_t purged_bytes = (size_t)syscall(JE_SYS_PROCESS_MADVISE_NR, pidfd,
 	    (struct iovec *)vec, vec_len, MADV_DONTNEED, 0);
 
@@ -663,8 +665,8 @@ init_process_madvise(void) {
 }
 
 static bool
-pages_purge_process_madvise_impl(void *vec, size_t vec_len,
-    size_t total_bytes) {
+pages_purge_process_madvise_impl(
+    void *vec, size_t vec_len, size_t total_bytes) {
 	not_reached();
 	return true;
 }
@@ -700,11 +702,11 @@ os_page_detect(void) {
 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
 static bool
 os_overcommits_sysctl(void) {
-	int vm_overcommit;
+	int    vm_overcommit;
 	size_t sz;
 
 	sz = sizeof(vm_overcommit);
-#if defined(__FreeBSD__) && defined(VM_OVERCOMMIT)
+#	if defined(__FreeBSD__) && defined(VM_OVERCOMMIT)
 	int mib[2];
 
 	mib[0] = CTL_VM;
@@ -712,11 +714,11 @@ os_overcommits_sysctl(void) {
 	if (sysctl(mib, 2, &vm_overcommit, &sz, NULL, 0) != 0) {
 		return false; /* Error. */
 	}
-#else
+#	else
 	if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0) {
 		return false; /* Error. */
 	}
-#endif
+#	endif
 
 	return ((vm_overcommit & 0x3) == 0);
 }
@@ -730,17 +732,18 @@ os_overcommits_sysctl(void) {
  */
 static bool
 os_overcommits_proc(void) {
-	int fd;
+	int  fd;
 	char buf[1];
 
-#if defined(O_CLOEXEC)
-	fd = malloc_open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
-#else
+#	if defined(O_CLOEXEC)
+	fd = malloc_open(
+	    "/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
+#	else
 	fd = malloc_open("/proc/sys/vm/overcommit_memory", O_RDONLY);
 	if (fd != -1) {
 		fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
 	}
-#endif
+#	endif
 
 	if (fd == -1) {
 		return false; /* Error. */
@@ -763,20 +766,20 @@ os_overcommits_proc(void) {
 #endif
 
 void
-pages_set_thp_state (void *ptr, size_t size) {
+pages_set_thp_state(void *ptr, size_t size) {
 	if (opt_thp == thp_mode_default || opt_thp == init_system_thp_mode) {
 		return;
 	}
-	assert(opt_thp != thp_mode_not_supported &&
-	    init_system_thp_mode != thp_mode_not_supported);
+	assert(opt_thp != thp_mode_not_supported
+	    && init_system_thp_mode != thp_mode_not_supported);
 
 	if (opt_thp == thp_mode_always
 	    && init_system_thp_mode != thp_mode_never) {
 		assert(init_system_thp_mode == thp_mode_default);
 		pages_huge_unaligned(ptr, size);
 	} else if (opt_thp == thp_mode_never) {
-		assert(init_system_thp_mode == thp_mode_default ||
-		    init_system_thp_mode == thp_mode_always);
+		assert(init_system_thp_mode == thp_mode_default
+		    || init_system_thp_mode == thp_mode_always);
 		pages_nohuge_unaligned(ptr, size);
 	}
 }
@@ -794,7 +797,7 @@ init_thp_state(void) {
 	static const char sys_state_madvise[] = "always [madvise] never\n";
 	static const char sys_state_always[] = "[always] madvise never\n";
 	static const char sys_state_never[] = "always madvise [never]\n";
-	char buf[sizeof(sys_state_madvise)];
+	char              buf[sizeof(sys_state_madvise)];
 
 	int fd = malloc_open(
 	    "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
@@ -839,10 +842,13 @@ pages_boot(void) {
 
 #ifdef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS
 	if (!opt_trust_madvise) {
-		madvise_dont_need_zeros_is_faulty = !madvise_MADV_DONTNEED_zeroes_pages();
+		madvise_dont_need_zeros_is_faulty =
+		    !madvise_MADV_DONTNEED_zeroes_pages();
 		if (madvise_dont_need_zeros_is_faulty) {
-			malloc_write("<jemalloc>: MADV_DONTNEED does not work (memset will be used instead)\n");
-			malloc_write("<jemalloc>: (This is the expected behaviour if you are running under QEMU)\n");
+			malloc_write(
+			    "<jemalloc>: MADV_DONTNEED does not work (memset will be used instead)\n");
+			malloc_write(
+			    "<jemalloc>: (This is the expected behaviour if you are running under QEMU)\n");
 		}
 	} else {
 		/* In case opt_trust_madvise is disable,
@@ -859,11 +865,11 @@ pages_boot(void) {
 	os_overcommits = os_overcommits_sysctl();
 #elif defined(JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY)
 	os_overcommits = os_overcommits_proc();
-#  ifdef MAP_NORESERVE
+#	ifdef MAP_NORESERVE
 	if (os_overcommits) {
 		mmap_flags |= MAP_NORESERVE;
 	}
-#  endif
+#	endif
 #elif defined(__NetBSD__)
 	os_overcommits = true;
 #else
@@ -879,8 +885,9 @@ pages_boot(void) {
 #else
 	/* Detect lazy purge runtime support. */
 	if (pages_can_purge_lazy) {
-		bool committed = false;
-		void *madv_free_page = os_pages_map(NULL, PAGE, PAGE, &committed);
+		bool  committed = false;
+		void *madv_free_page = os_pages_map(
+		    NULL, PAGE, PAGE, &committed);
 		if (madv_free_page == NULL) {
 			return true;
 		}
diff --git a/src/pai.c b/src/pai.c
index e8cddfc3..3114e658 100644
--- a/src/pai.c
+++ b/src/pai.c
@@ -6,7 +6,7 @@ pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
     edata_list_active_t *results, bool frequent_reuse,
     bool *deferred_work_generated) {
 	for (size_t i = 0; i < nallocs; i++) {
-		bool deferred_by_alloc = false;
+		bool     deferred_by_alloc = false;
 		edata_t *edata = pai_alloc(tsdn, self, size, PAGE,
 		    /* zero */ false, /* guarded */ false, frequent_reuse,
 		    &deferred_by_alloc);
@@ -20,8 +20,8 @@ pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
 }
 
 void
-pai_dalloc_batch_default(tsdn_t *tsdn, pai_t *self,
-    edata_list_active_t *list, bool *deferred_work_generated) {
+pai_dalloc_batch_default(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list,
+    bool *deferred_work_generated) {
 	edata_t *edata;
 	while ((edata = edata_list_active_first(list)) != NULL) {
 		bool deferred_by_dalloc = false;
diff --git a/src/peak_event.c b/src/peak_event.c
index e7f3ced6..e7f54dba 100644
--- a/src/peak_event.c
+++ b/src/peak_event.c
@@ -12,7 +12,7 @@ void
 peak_event_update(tsd_t *tsd) {
 	uint64_t alloc = tsd_thread_allocated_get(tsd);
 	uint64_t dalloc = tsd_thread_deallocated_get(tsd);
-	peak_t *peak = tsd_peakp_get(tsd);
+	peak_t  *peak = tsd_peakp_get(tsd);
 	peak_update(peak, alloc, dalloc);
 }
 
@@ -32,7 +32,7 @@ void
 peak_event_zero(tsd_t *tsd) {
 	uint64_t alloc = tsd_thread_allocated_get(tsd);
 	uint64_t dalloc = tsd_thread_deallocated_get(tsd);
-	peak_t *peak = tsd_peakp_get(tsd);
+	peak_t  *peak = tsd_peakp_get(tsd);
 	peak_set_zero(peak, alloc, dalloc);
 }
 
@@ -65,8 +65,8 @@ peak_event_enabled(void) {
 
 /* Handles alloc and dalloc */
 te_base_cb_t peak_te_handler = {
-	.enabled = &peak_event_enabled,
-	.new_event_wait = &peak_event_new_event_wait,
-	.postponed_event_wait = &peak_event_postponed_event_wait,
-	.event_handler = &peak_event_handler,
+    .enabled = &peak_event_enabled,
+    .new_event_wait = &peak_event_new_event_wait,
+    .postponed_event_wait = &peak_event_postponed_event_wait,
+    .event_handler = &peak_event_handler,
 };
diff --git a/src/prof.c b/src/prof.c
index ec13afbd..a833fed5 100644
--- a/src/prof.c
+++ b/src/prof.c
@@ -24,21 +24,21 @@
 
 /* Data. */
 
-bool opt_prof = false;
-bool opt_prof_active = true;
-bool opt_prof_thread_active_init = true;
+bool     opt_prof = false;
+bool     opt_prof_active = true;
+bool     opt_prof_thread_active_init = true;
 unsigned opt_prof_bt_max = PROF_BT_MAX_DEFAULT;
-size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
-ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
-bool opt_prof_gdump = false;
-bool opt_prof_final = false;
-bool opt_prof_leak = false;
-bool opt_prof_leak_error = false;
-bool opt_prof_accum = false;
-bool opt_prof_pid_namespace = false;
-char opt_prof_prefix[PROF_DUMP_FILENAME_LEN];
-bool opt_prof_sys_thread_name = false;
-bool opt_prof_unbias = true;
+size_t   opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
+ssize_t  opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
+bool     opt_prof_gdump = false;
+bool     opt_prof_final = false;
+bool     opt_prof_leak = false;
+bool     opt_prof_leak_error = false;
+bool     opt_prof_accum = false;
+bool     opt_prof_pid_namespace = false;
+char     opt_prof_prefix[PROF_DUMP_FILENAME_LEN];
+bool     opt_prof_sys_thread_name = false;
+bool     opt_prof_unbias = true;
 
 /* Accessed via prof_sample_event_handler(). */
 static counter_accum_t prof_idump_accumulated;
@@ -47,28 +47,28 @@ static counter_accum_t prof_idump_accumulated;
  * Initialized as opt_prof_active, and accessed via
  * prof_active_[gs]et{_unlocked,}().
  */
-bool prof_active_state;
+bool                  prof_active_state;
 static malloc_mutex_t prof_active_mtx;
 
 /*
  * Initialized as opt_prof_thread_active_init, and accessed via
  * prof_thread_active_init_[gs]et().
  */
-static bool prof_thread_active_init;
+static bool           prof_thread_active_init;
 static malloc_mutex_t prof_thread_active_init_mtx;
 
 /*
  * Initialized as opt_prof_gdump, and accessed via
  * prof_gdump_[gs]et{_unlocked,}().
  */
-bool prof_gdump_val;
+bool                  prof_gdump_val;
 static malloc_mutex_t prof_gdump_mtx;
 
 uint64_t prof_interval = 0;
 
 size_t lg_prof_sample;
 
-static uint64_t next_thr_uid;
+static uint64_t       next_thr_uid;
 static malloc_mutex_t next_thr_uid_mtx;
 
 /* Do not dump any profiles until bootstrapping is complete. */
@@ -113,16 +113,16 @@ prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx) {
 }
 
 void
-prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
-    size_t usize, prof_tctx_t *tctx) {
+prof_malloc_sample_object(
+    tsd_t *tsd, const void *ptr, size_t size, size_t usize, prof_tctx_t *tctx) {
 	cassert(config_prof);
 
 	if (opt_prof_sys_thread_name) {
 		prof_sys_thread_name_fetch(tsd);
 	}
 
-	edata_t *edata = emap_edata_lookup(tsd_tsdn(tsd), &arena_emap_global,
-	    ptr);
+	edata_t *edata = emap_edata_lookup(
+	    tsd_tsdn(tsd), &arena_emap_global, ptr);
 	prof_info_set(tsd, edata, tctx, size);
 
 	szind_t szind = sz_size2index(usize);
@@ -173,8 +173,8 @@ prof_malloc_sample_object(tsd_t *tsd, const void *ptr, size_t size,
 }
 
 void
-prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize,
-    prof_info_t *prof_info) {
+prof_free_sampled_object(
+    tsd_t *tsd, const void *ptr, size_t usize, prof_info_t *prof_info) {
 	cassert(config_prof);
 
 	assert(prof_info != NULL);
@@ -279,10 +279,12 @@ prof_sample_new_event_wait(tsd_t *tsd) {
 	 * otherwise bytes_until_sample would be 0 if u is exactly 1.0.
 	 */
 	uint64_t r = prng_lg_range_u64(tsd_prng_statep_get(tsd), 53);
-	double u = (r == 0U) ? 1.0 : (double)((long double)r *
-	    (1.0L/9007199254740992.0L));
-	return (uint64_t)(log(u) /
-	    log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
+	double   u = (r == 0U)
+	      ? 1.0
+	      : (double)((long double)r * (1.0L / 9007199254740992.0L));
+	return (uint64_t)(log(u)
+	           / log(
+	               1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
 	    + (uint64_t)1U;
 #else
 	not_reached();
@@ -322,9 +324,9 @@ prof_sample_enabled(void) {
 }
 
 te_base_cb_t prof_sample_te_handler = {
-	.enabled = &prof_sample_enabled,
-	.new_event_wait = &prof_sample_new_event_wait,
-	/*
+    .enabled = &prof_sample_enabled,
+    .new_event_wait = &prof_sample_new_event_wait,
+    /*
 	 * The postponed wait time for prof sample event is computed as if we
 	 * want a new wait time (i.e. as if the event were triggered).  If we
 	 * instead postpone to the immediate next allocation, like how we're
@@ -332,8 +334,8 @@ te_base_cb_t prof_sample_te_handler = {
 	 * the allocation immediately following a reentrancy always comes from
 	 * the same stack trace.
 	*/
-	.postponed_event_wait = &prof_sample_new_event_wait,
-	.event_handler = &prof_sample_event_handler,
+    .postponed_event_wait = &prof_sample_new_event_wait,
+    .event_handler = &prof_sample_event_handler,
 };
 
 static void
@@ -361,7 +363,7 @@ prof_idump_accum_init(void) {
 
 void
 prof_idump(tsdn_t *tsdn) {
-	tsd_t *tsd;
+	tsd_t        *tsd;
 	prof_tdata_t *tdata;
 
 	cassert(config_prof);
@@ -400,7 +402,7 @@ prof_mdump(tsd_t *tsd, const char *filename) {
 
 void
 prof_gdump(tsdn_t *tsdn) {
-	tsd_t *tsd;
+	tsd_t        *tsd;
 	prof_tdata_t *tdata;
 
 	cassert(config_prof);
@@ -447,7 +449,7 @@ prof_tdata_t *
 prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) {
 	uint64_t thr_uid = tdata->thr_uid;
 	uint64_t thr_discrim = tdata->thr_discrim + 1;
-	bool active = tdata->active;
+	bool     active = tdata->active;
 
 	/* Keep a local copy of the thread name, before detaching. */
 	prof_thread_name_assert(tdata);
@@ -455,8 +457,8 @@ prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) {
 	strncpy(thread_name, tdata->thread_name, PROF_THREAD_NAME_MAX_LEN);
 	prof_tdata_detach(tsd, tdata);
 
-	return prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name,
-	    active);
+	return prof_tdata_init_impl(
+	    tsd, thr_uid, thr_discrim, thread_name, active);
 }
 
 void
@@ -595,8 +597,8 @@ prof_backtrace_hook_set(prof_backtrace_hook_t hook) {
 
 prof_backtrace_hook_t
 prof_backtrace_hook_get(void) {
-	return (prof_backtrace_hook_t)atomic_load_p(&prof_backtrace_hook,
-	    ATOMIC_ACQUIRE);
+	return (prof_backtrace_hook_t)atomic_load_p(
+	    &prof_backtrace_hook, ATOMIC_ACQUIRE);
 }
 
 void
@@ -606,8 +608,7 @@ prof_dump_hook_set(prof_dump_hook_t hook) {
 
 prof_dump_hook_t
 prof_dump_hook_get(void) {
-	return (prof_dump_hook_t)atomic_load_p(&prof_dump_hook,
-	    ATOMIC_ACQUIRE);
+	return (prof_dump_hook_t)atomic_load_p(&prof_dump_hook, ATOMIC_ACQUIRE);
 }
 
 void
@@ -617,8 +618,8 @@ prof_sample_hook_set(prof_sample_hook_t hook) {
 
 prof_sample_hook_t
 prof_sample_hook_get(void) {
-	return (prof_sample_hook_t)atomic_load_p(&prof_sample_hook,
-	    ATOMIC_ACQUIRE);
+	return (prof_sample_hook_t)atomic_load_p(
+	    &prof_sample_hook, ATOMIC_ACQUIRE);
 }
 
 void
@@ -628,16 +629,16 @@ prof_sample_free_hook_set(prof_sample_free_hook_t hook) {
 
 prof_sample_free_hook_t
 prof_sample_free_hook_get(void) {
-	return (prof_sample_free_hook_t)atomic_load_p(&prof_sample_free_hook,
-	    ATOMIC_ACQUIRE);
+	return (prof_sample_free_hook_t)atomic_load_p(
+	    &prof_sample_free_hook, ATOMIC_ACQUIRE);
 }
 
 void
 prof_boot0(void) {
 	cassert(config_prof);
 
-	memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT,
-	    sizeof(PROF_PREFIX_DEFAULT));
+	memcpy(
+	    opt_prof_prefix, PROF_PREFIX_DEFAULT, sizeof(PROF_PREFIX_DEFAULT));
 }
 
 void
@@ -661,8 +662,8 @@ prof_boot1(void) {
 		opt_prof_gdump = false;
 	} else if (opt_prof) {
 		if (opt_lg_prof_interval >= 0) {
-			prof_interval = (((uint64_t)1U) <<
-			    opt_lg_prof_interval);
+			prof_interval = (((uint64_t)1U)
+			    << opt_lg_prof_interval);
 		}
 	}
 }
@@ -676,41 +677,40 @@ prof_boot2(tsd_t *tsd, base_t *base) {
 	 * stats when opt_prof is false.
 	 */
 	if (malloc_mutex_init(&prof_active_mtx, "prof_active",
-	    WITNESS_RANK_PROF_ACTIVE, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_PROF_ACTIVE, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	if (malloc_mutex_init(&prof_gdump_mtx, "prof_gdump",
-	    WITNESS_RANK_PROF_GDUMP, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_PROF_GDUMP, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	if (malloc_mutex_init(&prof_thread_active_init_mtx,
-	    "prof_thread_active_init", WITNESS_RANK_PROF_THREAD_ACTIVE_INIT,
-	    malloc_mutex_rank_exclusive)) {
+	        "prof_thread_active_init", WITNESS_RANK_PROF_THREAD_ACTIVE_INIT,
+	        malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx",
-	    WITNESS_RANK_PROF_BT2GCTX, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_PROF_BT2GCTX, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	if (malloc_mutex_init(&tdatas_mtx, "prof_tdatas",
-	    WITNESS_RANK_PROF_TDATAS, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_PROF_TDATAS, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	if (malloc_mutex_init(&next_thr_uid_mtx, "prof_next_thr_uid",
-	    WITNESS_RANK_PROF_NEXT_THR_UID, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_PROF_NEXT_THR_UID, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	if (malloc_mutex_init(&prof_stats_mtx, "prof_stats",
-	    WITNESS_RANK_PROF_STATS, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_PROF_STATS, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
-	if (malloc_mutex_init(&prof_dump_filename_mtx,
-	    "prof_dump_filename", WITNESS_RANK_PROF_DUMP_FILENAME,
-	    malloc_mutex_rank_exclusive)) {
+	if (malloc_mutex_init(&prof_dump_filename_mtx, "prof_dump_filename",
+	        WITNESS_RANK_PROF_DUMP_FILENAME, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 	if (malloc_mutex_init(&prof_dump_mtx, "prof_dump",
-	    WITNESS_RANK_PROF_DUMP, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_PROF_DUMP, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 
@@ -730,8 +730,8 @@ prof_boot2(tsd_t *tsd, base_t *base) {
 			return true;
 		}
 
-		if (opt_prof_final && opt_prof_prefix[0] != '\0' &&
-		    atexit(prof_fdump) != 0) {
+		if (opt_prof_final && opt_prof_prefix[0] != '\0'
+		    && atexit(prof_fdump) != 0) {
 			malloc_write("<jemalloc>: Error in atexit()\n");
 			if (opt_abort) {
 				abort();
@@ -755,8 +755,8 @@ prof_boot2(tsd_t *tsd, base_t *base) {
 		}
 		for (unsigned i = 0; i < PROF_NCTX_LOCKS; i++) {
 			if (malloc_mutex_init(&gctx_locks[i], "prof_gctx",
-			    WITNESS_RANK_PROF_GCTX,
-			    malloc_mutex_rank_exclusive)) {
+			        WITNESS_RANK_PROF_GCTX,
+			        malloc_mutex_rank_exclusive)) {
 				return true;
 			}
 		}
@@ -768,8 +768,8 @@ prof_boot2(tsd_t *tsd, base_t *base) {
 		}
 		for (unsigned i = 0; i < PROF_NTDATA_LOCKS; i++) {
 			if (malloc_mutex_init(&tdata_locks[i], "prof_tdata",
-			    WITNESS_RANK_PROF_TDATA,
-			    malloc_mutex_rank_exclusive)) {
+			        WITNESS_RANK_PROF_TDATA,
+			        malloc_mutex_rank_exclusive)) {
 				return true;
 			}
 		}
@@ -820,8 +820,8 @@ prof_postfork_parent(tsdn_t *tsdn) {
 	if (config_prof && opt_prof) {
 		unsigned i;
 
-		malloc_mutex_postfork_parent(tsdn,
-		    &prof_thread_active_init_mtx);
+		malloc_mutex_postfork_parent(
+		    tsdn, &prof_thread_active_init_mtx);
 		malloc_mutex_postfork_parent(tsdn, &next_thr_uid_mtx);
 		malloc_mutex_postfork_parent(tsdn, &prof_stats_mtx);
 		malloc_mutex_postfork_parent(tsdn, &prof_recent_alloc_mtx);
diff --git a/src/prof_data.c b/src/prof_data.c
index edc5c558..7aa047ac 100644
--- a/src/prof_data.c
+++ b/src/prof_data.c
@@ -36,7 +36,7 @@ malloc_mutex_t prof_dump_mtx;
  * and destroying mutexes causes complications for systems that allocate when
  * creating/destroying mutexes.
  */
-malloc_mutex_t *gctx_locks;
+malloc_mutex_t   *gctx_locks;
 static atomic_u_t cum_gctxs; /* Atomic counter. */
 
 /*
@@ -69,33 +69,32 @@ static int
 prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b) {
 	uint64_t a_thr_uid = a->thr_uid;
 	uint64_t b_thr_uid = b->thr_uid;
-	int ret = (a_thr_uid > b_thr_uid) - (a_thr_uid < b_thr_uid);
+	int      ret = (a_thr_uid > b_thr_uid) - (a_thr_uid < b_thr_uid);
 	if (ret == 0) {
 		uint64_t a_thr_discrim = a->thr_discrim;
 		uint64_t b_thr_discrim = b->thr_discrim;
-		ret = (a_thr_discrim > b_thr_discrim) - (a_thr_discrim <
-		    b_thr_discrim);
+		ret = (a_thr_discrim > b_thr_discrim)
+		    - (a_thr_discrim < b_thr_discrim);
 		if (ret == 0) {
 			uint64_t a_tctx_uid = a->tctx_uid;
 			uint64_t b_tctx_uid = b->tctx_uid;
-			ret = (a_tctx_uid > b_tctx_uid) - (a_tctx_uid <
-			    b_tctx_uid);
+			ret = (a_tctx_uid > b_tctx_uid)
+			    - (a_tctx_uid < b_tctx_uid);
 		}
 	}
 	return ret;
 }
 
 /* NOLINTBEGIN(performance-no-int-to-ptr) */
-rb_gen(static UNUSED, tctx_tree_, prof_tctx_tree_t, prof_tctx_t,
-    tctx_link, prof_tctx_comp)
-/* NOLINTEND(performance-no-int-to-ptr) */
+rb_gen(static UNUSED, tctx_tree_, prof_tctx_tree_t, prof_tctx_t, tctx_link,
+    prof_tctx_comp)
+    /* NOLINTEND(performance-no-int-to-ptr) */
 
-static int
-prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b) {
+    static int prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b) {
 	unsigned a_len = a->bt.len;
 	unsigned b_len = b->bt.len;
 	unsigned comp_len = (a_len < b_len) ? a_len : b_len;
-	int ret = memcmp(a->bt.vec, b->bt.vec, comp_len * sizeof(void *));
+	int      ret = memcmp(a->bt.vec, b->bt.vec, comp_len * sizeof(void *));
 	if (ret == 0) {
 		ret = (a_len > b_len) - (a_len < b_len);
 	}
@@ -105,11 +104,10 @@ prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b) {
 /* NOLINTBEGIN(performance-no-int-to-ptr) */
 rb_gen(static UNUSED, gctx_tree_, prof_gctx_tree_t, prof_gctx_t, dump_link,
     prof_gctx_comp)
-/* NOLINTEND(performance-no-int-to-ptr) */
+    /* NOLINTEND(performance-no-int-to-ptr) */
 
-static int
-prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b) {
-	int ret;
+    static int prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b) {
+	int      ret;
 	uint64_t a_uid = a->thr_uid;
 	uint64_t b_uid = b->thr_uid;
 
@@ -126,12 +124,11 @@ prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b) {
 /* NOLINTBEGIN(performance-no-int-to-ptr) */
 rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link,
     prof_tdata_comp)
-/* NOLINTEND(performance-no-int-to-ptr) */
+    /* NOLINTEND(performance-no-int-to-ptr) */
 
-/******************************************************************************/
+    /******************************************************************************/
 
-static malloc_mutex_t *
-prof_gctx_mutex_choose(void) {
+    static malloc_mutex_t *prof_gctx_mutex_choose(void) {
 	unsigned ngctxs = atomic_fetch_add_u(&cum_gctxs, 1, ATOMIC_RELAXED);
 
 	return &gctx_locks[(ngctxs - 1) % PROF_NCTX_LOCKS];
@@ -145,8 +142,8 @@ prof_tdata_mutex_choose(uint64_t thr_uid) {
 bool
 prof_data_init(tsd_t *tsd) {
 	tdata_tree_new(&tdatas);
-	return ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS,
-	    prof_bt_hash, prof_bt_keycomp);
+	return ckh_new(
+	    tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash, prof_bt_keycomp);
 }
 
 static void
@@ -195,8 +192,8 @@ prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt) {
 	 */
 	size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *));
 	prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsdn, size,
-	    sz_size2index(size), false, NULL, true, arena_get(TSDN_NULL, 0, true),
-	    true);
+	    sz_size2index(size), false, NULL, true,
+	    arena_get(TSDN_NULL, 0, true), true);
 	if (gctx == NULL) {
 		return NULL;
 	}
@@ -215,8 +212,7 @@ prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt) {
 }
 
 static void
-prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self,
-    prof_gctx_t *gctx) {
+prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx) {
 	cassert(config_prof);
 
 	/*
@@ -267,12 +263,12 @@ static bool
 prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
     void **p_btkey, prof_gctx_t **p_gctx, bool *p_new_gctx) {
 	union {
-		prof_gctx_t	*p;
-		void		*v;
+		prof_gctx_t *p;
+		void        *v;
 	} gctx, tgctx;
 	union {
-		prof_bt_t	*p;
-		void		*v;
+		prof_bt_t *p;
+		void      *v;
 	} btkey;
 	bool new_gctx;
 
@@ -316,8 +312,8 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
 
 		if (tgctx.v != NULL) {
 			/* Lost race to insert. */
-			idalloctm(tsd_tsdn(tsd), tgctx.v, NULL, NULL, true,
-			    true);
+			idalloctm(
+			    tsd_tsdn(tsd), tgctx.v, NULL, NULL, true, true);
 		}
 	}
 	prof_leave(tsd, tdata);
@@ -331,11 +327,11 @@ prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
 prof_tctx_t *
 prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 	union {
-		prof_tctx_t	*p;
-		void		*v;
+		prof_tctx_t *p;
+		void        *v;
 	} ret;
 	prof_tdata_t *tdata;
-	bool not_found;
+	bool          not_found;
 
 	cassert(config_prof);
 
@@ -349,16 +345,16 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 	}
 	malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
 	if (not_found) {
-		void *btkey;
+		void        *btkey;
 		prof_gctx_t *gctx;
-		bool new_gctx, error;
+		bool         new_gctx, error;
 
 		/*
 		 * This thread's cache lacks bt.  Look for it in the global
 		 * cache.
 		 */
-		if (prof_lookup_global(tsd, bt, tdata, &btkey, &gctx,
-		    &new_gctx)) {
+		if (prof_lookup_global(
+		        tsd, bt, tdata, &btkey, &gctx, &new_gctx)) {
 			return NULL;
 		}
 
@@ -403,8 +399,8 @@ prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
 
 /* Used in unit tests. */
 static prof_tdata_t *
-prof_tdata_count_iter(prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata,
-    void *arg) {
+prof_tdata_count_iter(
+    prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata, void *arg) {
 	size_t *tdata_count = (size_t *)arg;
 
 	(*tdata_count)++;
@@ -415,13 +411,13 @@ prof_tdata_count_iter(prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata,
 /* Used in unit tests. */
 size_t
 prof_tdata_count(void) {
-	size_t tdata_count = 0;
+	size_t  tdata_count = 0;
 	tsdn_t *tsdn;
 
 	tsdn = tsdn_fetch();
 	malloc_mutex_lock(tsdn, &tdatas_mtx);
-	tdata_tree_iter(&tdatas, NULL, prof_tdata_count_iter,
-	    (void *)&tdata_count);
+	tdata_tree_iter(
+	    &tdatas, NULL, prof_tdata_count_iter, (void *)&tdata_count);
 	malloc_mutex_unlock(tsdn, &tdatas_mtx);
 
 	return tdata_count;
@@ -430,8 +426,8 @@ prof_tdata_count(void) {
 /* Used in unit tests. */
 size_t
 prof_bt_count(void) {
-	size_t bt_count;
-	tsd_t *tsd;
+	size_t        bt_count;
+	tsd_t        *tsd;
 	prof_tdata_t *tdata;
 
 	tsd = tsd_fetch();
@@ -477,10 +473,10 @@ prof_thread_name_set_impl(tsd_t *tsd, const char *thread_name) {
 
 JEMALLOC_FORMAT_PRINTF(3, 4)
 static void
-prof_dump_printf(write_cb_t *prof_dump_write, void *cbopaque,
-    const char *format, ...) {
+prof_dump_printf(
+    write_cb_t *prof_dump_write, void *cbopaque, const char *format, ...) {
 	va_list ap;
-	char buf[PROF_PRINTF_BUFSIZE];
+	char    buf[PROF_PRINTF_BUFSIZE];
 
 	va_start(ap, format);
 	malloc_vsnprintf(buf, sizeof(buf), format, ap);
@@ -509,7 +505,8 @@ prof_double_uint64_cast(double d) {
 }
 #endif
 
-void prof_unbias_map_init(void) {
+void
+prof_unbias_map_init(void) {
 	/* See the comment in prof_sample_new_event_wait */
 #ifdef JEMALLOC_PROF
 	for (szind_t i = 0; i < SC_NSIZES; i++) {
@@ -621,8 +618,8 @@ prof_do_unbias(uint64_t c_out_shifted_i, uint64_t s_out_i, uint64_t *r_c_in,
 }
 
 static void
-prof_dump_print_cnts(write_cb_t *prof_dump_write, void *cbopaque,
-    const prof_cnt_t *cnts) {
+prof_dump_print_cnts(
+    write_cb_t *prof_dump_write, void *cbopaque, const prof_cnt_t *cnts) {
 	uint64_t curobjs;
 	uint64_t curbytes;
 	uint64_t accumobjs;
@@ -639,8 +636,8 @@ prof_dump_print_cnts(write_cb_t *prof_dump_write, void *cbopaque,
 		accumbytes = cnts->accumbytes;
 	}
 	prof_dump_printf(prof_dump_write, cbopaque,
-	    "%"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]",
-	    curobjs, curbytes, accumobjs, accumbytes);
+	    "%" FMTu64 ": %" FMTu64 " [%" FMTu64 ": %" FMTu64 "]", curobjs,
+	    curbytes, accumobjs, accumbytes);
 }
 
 static void
@@ -660,11 +657,11 @@ prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata) {
 		memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t));
 
 		tdata->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
-		tdata->cnt_summed.curobjs_shifted_unbiased
-		    += tctx->dump_cnts.curobjs_shifted_unbiased;
+		tdata->cnt_summed.curobjs_shifted_unbiased +=
+		    tctx->dump_cnts.curobjs_shifted_unbiased;
 		tdata->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
-		tdata->cnt_summed.curbytes_unbiased
-		    += tctx->dump_cnts.curbytes_unbiased;
+		tdata->cnt_summed.curbytes_unbiased +=
+		    tctx->dump_cnts.curbytes_unbiased;
 		if (opt_prof_accum) {
 			tdata->cnt_summed.accumobjs +=
 			    tctx->dump_cnts.accumobjs;
@@ -687,17 +684,17 @@ prof_tctx_merge_gctx(tsdn_t *tsdn, prof_tctx_t *tctx, prof_gctx_t *gctx) {
 	malloc_mutex_assert_owner(tsdn, gctx->lock);
 
 	gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
-	gctx->cnt_summed.curobjs_shifted_unbiased
-	    += tctx->dump_cnts.curobjs_shifted_unbiased;
+	gctx->cnt_summed.curobjs_shifted_unbiased +=
+	    tctx->dump_cnts.curobjs_shifted_unbiased;
 	gctx->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
 	gctx->cnt_summed.curbytes_unbiased += tctx->dump_cnts.curbytes_unbiased;
 	if (opt_prof_accum) {
 		gctx->cnt_summed.accumobjs += tctx->dump_cnts.accumobjs;
-		gctx->cnt_summed.accumobjs_shifted_unbiased
-		    += tctx->dump_cnts.accumobjs_shifted_unbiased;
+		gctx->cnt_summed.accumobjs_shifted_unbiased +=
+		    tctx->dump_cnts.accumobjs_shifted_unbiased;
 		gctx->cnt_summed.accumbytes += tctx->dump_cnts.accumbytes;
-		gctx->cnt_summed.accumbytes_unbiased
-		    += tctx->dump_cnts.accumbytes_unbiased;
+		gctx->cnt_summed.accumbytes_unbiased +=
+		    tctx->dump_cnts.accumbytes_unbiased;
 	}
 }
 
@@ -725,9 +722,9 @@ prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
 
 typedef struct prof_dump_iter_arg_s prof_dump_iter_arg_t;
 struct prof_dump_iter_arg_s {
-	tsdn_t *tsdn;
+	tsdn_t     *tsdn;
 	write_cb_t *prof_dump_write;
-	void *cbopaque;
+	void       *cbopaque;
 };
 
 static prof_tctx_t *
@@ -743,9 +740,9 @@ prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) {
 	case prof_tctx_state_dumping:
 	case prof_tctx_state_purgatory:
 		prof_dump_printf(arg->prof_dump_write, arg->cbopaque,
-		    "  t%"FMTu64": ", tctx->thr_uid);
-		prof_dump_print_cnts(arg->prof_dump_write, arg->cbopaque,
-		    &tctx->dump_cnts);
+		    "  t%" FMTu64 ": ", tctx->thr_uid);
+		prof_dump_print_cnts(
+		    arg->prof_dump_write, arg->cbopaque, &tctx->dump_cnts);
 		arg->prof_dump_write(arg->cbopaque, "\n");
 		break;
 	default:
@@ -756,7 +753,7 @@ prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) {
 
 static prof_tctx_t *
 prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
-	tsdn_t *tsdn = (tsdn_t *)arg;
+	tsdn_t      *tsdn = (tsdn_t *)arg;
 	prof_tctx_t *ret;
 
 	malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
@@ -811,8 +808,8 @@ prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
 	prof_gctx_merge_iter_arg_t *arg = (prof_gctx_merge_iter_arg_t *)opaque;
 
 	malloc_mutex_lock(arg->tsdn, gctx->lock);
-	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter,
-	    (void *)arg->tsdn);
+	tctx_tree_iter(
+	    &gctx->tctxs, NULL, prof_tctx_merge_iter, (void *)arg->tsdn);
 	if (gctx->cnt_summed.curobjs != 0) {
 		(*arg->leak_ngctx)++;
 	}
@@ -824,7 +821,7 @@ prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
 static void
 prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) {
 	prof_tdata_t *tdata = prof_tdata_get(tsd, false);
-	prof_gctx_t *gctx;
+	prof_gctx_t  *gctx;
 
 	/*
 	 * Standard tree iteration won't work here, because as soon as we
@@ -840,15 +837,14 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) {
 
 			next = NULL;
 			do {
-				prof_tctx_t *to_destroy =
-				    tctx_tree_iter(&gctx->tctxs, next,
-				    prof_tctx_finish_iter,
+				prof_tctx_t *to_destroy = tctx_tree_iter(
+				    &gctx->tctxs, next, prof_tctx_finish_iter,
 				    (void *)tsd_tsdn(tsd));
 				if (to_destroy != NULL) {
-					next = tctx_tree_next(&gctx->tctxs,
-					    to_destroy);
-					tctx_tree_remove(&gctx->tctxs,
-					    to_destroy);
+					next = tctx_tree_next(
+					    &gctx->tctxs, to_destroy);
+					tctx_tree_remove(
+					    &gctx->tctxs, to_destroy);
 					idalloctm(tsd_tsdn(tsd), to_destroy,
 					    NULL, NULL, true, true);
 				} else {
@@ -869,41 +865,41 @@ prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) {
 
 typedef struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg_t;
 struct prof_tdata_merge_iter_arg_s {
-	tsdn_t *tsdn;
+	tsdn_t     *tsdn;
 	prof_cnt_t *cnt_all;
 };
 
 static prof_tdata_t *
-prof_tdata_merge_iter(prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata,
-    void *opaque) {
-	prof_tdata_merge_iter_arg_t *arg =
-	    (prof_tdata_merge_iter_arg_t *)opaque;
+prof_tdata_merge_iter(
+    prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata, void *opaque) {
+	prof_tdata_merge_iter_arg_t *arg = (prof_tdata_merge_iter_arg_t *)
+	    opaque;
 
 	malloc_mutex_lock(arg->tsdn, tdata->lock);
 	if (!tdata->expired) {
 		size_t tabind;
 		union {
-			prof_tctx_t	*p;
-			void		*v;
+			prof_tctx_t *p;
+			void        *v;
 		} tctx;
 
 		tdata->dumping = true;
 		memset(&tdata->cnt_summed, 0, sizeof(prof_cnt_t));
-		for (tabind = 0; !ckh_iter(&tdata->bt2tctx, &tabind, NULL,
-		    &tctx.v);) {
+		for (tabind = 0;
+		     !ckh_iter(&tdata->bt2tctx, &tabind, NULL, &tctx.v);) {
 			prof_tctx_merge_tdata(arg->tsdn, tctx.p, tdata);
 		}
 
 		arg->cnt_all->curobjs += tdata->cnt_summed.curobjs;
-		arg->cnt_all->curobjs_shifted_unbiased
-		    += tdata->cnt_summed.curobjs_shifted_unbiased;
+		arg->cnt_all->curobjs_shifted_unbiased +=
+		    tdata->cnt_summed.curobjs_shifted_unbiased;
 		arg->cnt_all->curbytes += tdata->cnt_summed.curbytes;
-		arg->cnt_all->curbytes_unbiased
-		    += tdata->cnt_summed.curbytes_unbiased;
+		arg->cnt_all->curbytes_unbiased +=
+		    tdata->cnt_summed.curbytes_unbiased;
 		if (opt_prof_accum) {
 			arg->cnt_all->accumobjs += tdata->cnt_summed.accumobjs;
-			arg->cnt_all->accumobjs_shifted_unbiased
-			    += tdata->cnt_summed.accumobjs_shifted_unbiased;
+			arg->cnt_all->accumobjs_shifted_unbiased +=
+			    tdata->cnt_summed.accumobjs_shifted_unbiased;
 			arg->cnt_all->accumbytes +=
 			    tdata->cnt_summed.accumbytes;
 			arg->cnt_all->accumbytes_unbiased +=
@@ -918,17 +914,17 @@ prof_tdata_merge_iter(prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata,
 }
 
 static prof_tdata_t *
-prof_tdata_dump_iter(prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata,
-    void *opaque) {
+prof_tdata_dump_iter(
+    prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata, void *opaque) {
 	if (!tdata->dumping) {
 		return NULL;
 	}
 
 	prof_dump_iter_arg_t *arg = (prof_dump_iter_arg_t *)opaque;
-	prof_dump_printf(arg->prof_dump_write, arg->cbopaque, "  t%"FMTu64": ",
-	    tdata->thr_uid);
-	prof_dump_print_cnts(arg->prof_dump_write, arg->cbopaque,
-	    &tdata->cnt_summed);
+	prof_dump_printf(arg->prof_dump_write, arg->cbopaque,
+	    "  t%" FMTu64 ": ", tdata->thr_uid);
+	prof_dump_print_cnts(
+	    arg->prof_dump_write, arg->cbopaque, &tdata->cnt_summed);
 	if (!prof_thread_name_empty(tdata)) {
 		arg->prof_dump_write(arg->cbopaque, " ");
 		arg->prof_dump_write(arg->cbopaque, tdata->thread_name);
@@ -940,7 +936,7 @@ prof_tdata_dump_iter(prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata,
 static void
 prof_dump_header(prof_dump_iter_arg_t *arg, const prof_cnt_t *cnt_all) {
 	prof_dump_printf(arg->prof_dump_write, arg->cbopaque,
-	    "heap_v2/%"FMTu64"\n  t*: ", ((uint64_t)1U << lg_prof_sample));
+	    "heap_v2/%" FMTu64 "\n  t*: ", ((uint64_t)1U << lg_prof_sample));
 	prof_dump_print_cnts(arg->prof_dump_write, arg->cbopaque, cnt_all);
 	arg->prof_dump_write(arg->cbopaque, "\n");
 
@@ -956,8 +952,8 @@ prof_dump_gctx(prof_dump_iter_arg_t *arg, prof_gctx_t *gctx,
 	malloc_mutex_assert_owner(arg->tsdn, gctx->lock);
 
 	/* Avoid dumping such gctx's that have no useful data. */
-	if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0) ||
-	    (opt_prof_accum && gctx->cnt_summed.accumobjs == 0)) {
+	if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0)
+	    || (opt_prof_accum && gctx->cnt_summed.accumobjs == 0)) {
 		assert(gctx->cnt_summed.curobjs == 0);
 		assert(gctx->cnt_summed.curbytes == 0);
 		/*
@@ -976,12 +972,12 @@ prof_dump_gctx(prof_dump_iter_arg_t *arg, prof_gctx_t *gctx,
 	arg->prof_dump_write(arg->cbopaque, "@");
 	for (unsigned i = 0; i < bt->len; i++) {
 		prof_dump_printf(arg->prof_dump_write, arg->cbopaque,
-		    " %#"FMTxPTR, (uintptr_t)bt->vec[i]);
+		    " %#" FMTxPTR, (uintptr_t)bt->vec[i]);
 	}
 
 	arg->prof_dump_write(arg->cbopaque, "\n  t*: ");
-	prof_dump_print_cnts(arg->prof_dump_write, arg->cbopaque,
-	    &gctx->cnt_summed);
+	prof_dump_print_cnts(
+	    arg->prof_dump_write, arg->cbopaque, &gctx->cnt_summed);
 	arg->prof_dump_write(arg->cbopaque, "\n");
 
 	tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter, arg);
@@ -1002,18 +998,21 @@ prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx) {
 	 */
 	if (cnt_all->curbytes != 0) {
 		double sample_period = (double)((uint64_t)1 << lg_prof_sample);
-		double ratio = (((double)cnt_all->curbytes) /
-		    (double)cnt_all->curobjs) / sample_period;
-		double scale_factor = 1.0 / (1.0 - exp(-ratio));
-		uint64_t curbytes = (uint64_t)round(((double)cnt_all->curbytes)
-		    * scale_factor);
-		uint64_t curobjs = (uint64_t)round(((double)cnt_all->curobjs) *
-		    scale_factor);
+		double ratio = (((double)cnt_all->curbytes)
+		                   / (double)cnt_all->curobjs)
+		    / sample_period;
+		double   scale_factor = 1.0 / (1.0 - exp(-ratio));
+		uint64_t curbytes = (uint64_t)round(
+		    ((double)cnt_all->curbytes) * scale_factor);
+		uint64_t curobjs = (uint64_t)round(
+		    ((double)cnt_all->curobjs) * scale_factor);
 
-		malloc_printf("<jemalloc>: Leak approximation summary: ~%"FMTu64
-		    " byte%s, ~%"FMTu64" object%s, >= %zu context%s\n",
-		    curbytes, (curbytes != 1) ? "s" : "", curobjs, (curobjs !=
-		    1) ? "s" : "", leak_ngctx, (leak_ngctx != 1) ? "s" : "");
+		malloc_printf(
+		    "<jemalloc>: Leak approximation summary: ~%" FMTu64
+		    " byte%s, ~%" FMTu64 " object%s, >= %zu context%s\n",
+		    curbytes, (curbytes != 1) ? "s" : "", curobjs,
+		    (curobjs != 1) ? "s" : "", leak_ngctx,
+		    (leak_ngctx != 1) ? "s" : "");
 		malloc_printf(
 		    "<jemalloc>: Run jeprof on dump output for leak detail\n");
 		if (opt_prof_leak_error) {
@@ -1044,8 +1043,8 @@ prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata, prof_cnt_t *cnt_all,
     size_t *leak_ngctx, prof_gctx_tree_t *gctxs) {
 	size_t tabind;
 	union {
-		prof_gctx_t	*p;
-		void		*v;
+		prof_gctx_t *p;
+		void        *v;
 	} gctx;
 
 	prof_enter(tsd, tdata);
@@ -1064,19 +1063,19 @@ prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata, prof_cnt_t *cnt_all,
 	 * stats and merge them into the associated gctx's.
 	 */
 	memset(cnt_all, 0, sizeof(prof_cnt_t));
-	prof_tdata_merge_iter_arg_t prof_tdata_merge_iter_arg = {tsd_tsdn(tsd),
-	    cnt_all};
+	prof_tdata_merge_iter_arg_t prof_tdata_merge_iter_arg = {
+	    tsd_tsdn(tsd), cnt_all};
 	malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
-	tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter,
-	    &prof_tdata_merge_iter_arg);
+	tdata_tree_iter(
+	    &tdatas, NULL, prof_tdata_merge_iter, &prof_tdata_merge_iter_arg);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
 
 	/* Merge tctx stats into gctx's. */
 	*leak_ngctx = 0;
-	prof_gctx_merge_iter_arg_t prof_gctx_merge_iter_arg = {tsd_tsdn(tsd),
-	    leak_ngctx};
-	gctx_tree_iter(gctxs, NULL, prof_gctx_merge_iter,
-	    &prof_gctx_merge_iter_arg);
+	prof_gctx_merge_iter_arg_t prof_gctx_merge_iter_arg = {
+	    tsd_tsdn(tsd), leak_ngctx};
+	gctx_tree_iter(
+	    gctxs, NULL, prof_gctx_merge_iter, &prof_gctx_merge_iter_arg);
 
 	prof_leave(tsd, tdata);
 }
@@ -1085,12 +1084,12 @@ void
 prof_dump_impl(tsd_t *tsd, write_cb_t *prof_dump_write, void *cbopaque,
     prof_tdata_t *tdata, bool leakcheck) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_dump_mtx);
-	prof_cnt_t cnt_all;
-	size_t leak_ngctx;
+	prof_cnt_t       cnt_all;
+	size_t           leak_ngctx;
 	prof_gctx_tree_t gctxs;
 	prof_dump_prep(tsd, tdata, &cnt_all, &leak_ngctx, &gctxs);
-	prof_dump_iter_arg_t prof_dump_iter_arg = {tsd_tsdn(tsd),
-	    prof_dump_write, cbopaque};
+	prof_dump_iter_arg_t prof_dump_iter_arg = {
+	    tsd_tsdn(tsd), prof_dump_write, cbopaque};
 	prof_dump_header(&prof_dump_iter_arg, &cnt_all);
 	gctx_tree_iter(&gctxs, NULL, prof_gctx_dump_iter, &prof_dump_iter_arg);
 	prof_gctx_finish(tsd, &gctxs);
@@ -1102,12 +1101,12 @@ prof_dump_impl(tsd_t *tsd, write_cb_t *prof_dump_write, void *cbopaque,
 /* Used in unit tests. */
 void
 prof_cnt_all(prof_cnt_t *cnt_all) {
-	tsd_t *tsd = tsd_fetch();
+	tsd_t        *tsd = tsd_fetch();
 	prof_tdata_t *tdata = prof_tdata_get(tsd, false);
 	if (tdata == NULL) {
 		memset(cnt_all, 0, sizeof(prof_cnt_t));
 	} else {
-		size_t leak_ngctx;
+		size_t           leak_ngctx;
 		prof_gctx_tree_t gctxs;
 		prof_dump_prep(tsd, tdata, cnt_all, &leak_ngctx, &gctxs);
 		prof_gctx_finish(tsd, &gctxs);
@@ -1148,8 +1147,8 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
 	/* Initialize an empty cache for this thread. */
 	size_t tdata_sz = ALIGNMENT_CEILING(sizeof(prof_tdata_t), QUANTUM);
 	size_t total_sz = tdata_sz + sizeof(void *) * opt_prof_bt_max;
-	tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd),
-	    total_sz, sz_size2index(total_sz), false, NULL, true,
+	tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd), total_sz,
+	    sz_size2index(total_sz), false, NULL, true,
 	    arena_get(TSDN_NULL, 0, true), true);
 	if (tdata == NULL) {
 		return NULL;
@@ -1170,7 +1169,7 @@ prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
 	prof_thread_name_assert(tdata);
 
 	if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash,
-	    prof_bt_keycomp)) {
+	        prof_bt_keycomp)) {
 		idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
 		return NULL;
 	}
@@ -1201,16 +1200,16 @@ prof_tdata_should_destroy_unlocked(prof_tdata_t *tdata, bool even_if_attached) {
 }
 
 static bool
-prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
-    bool even_if_attached) {
+prof_tdata_should_destroy(
+    tsdn_t *tsdn, prof_tdata_t *tdata, bool even_if_attached) {
 	malloc_mutex_assert_owner(tsdn, tdata->lock);
 
 	return prof_tdata_should_destroy_unlocked(tdata, even_if_attached);
 }
 
 static void
-prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
-    bool even_if_attached) {
+prof_tdata_destroy_locked(
+    tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &tdatas_mtx);
 	malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tdata->lock);
 
@@ -1234,8 +1233,8 @@ prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata) {
 
 	malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
 	if (tdata->attached) {
-		destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata,
-		    true);
+		destroy_tdata = prof_tdata_should_destroy(
+		    tsd_tsdn(tsd), tdata, true);
 		/*
 		 * Only detach if !destroy_tdata, because detaching would allow
 		 * another thread to win the race to destroy tdata.
@@ -1270,8 +1269,8 @@ prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata) {
 }
 
 static prof_tdata_t *
-prof_tdata_reset_iter(prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata,
-    void *arg) {
+prof_tdata_reset_iter(
+    prof_tdata_tree_t *tdatas_ptr, prof_tdata_t *tdata, void *arg) {
 	tsdn_t *tsdn = (tsdn_t *)arg;
 
 	return (prof_tdata_expire(tsdn, tdata) ? tdata : NULL);
@@ -1291,8 +1290,8 @@ prof_reset(tsd_t *tsd, size_t lg_sample) {
 
 	next = NULL;
 	do {
-		prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next,
-		    prof_tdata_reset_iter, (void *)tsd);
+		prof_tdata_t *to_destroy = tdata_tree_iter(
+		    &tdatas, next, prof_tdata_reset_iter, (void *)tsd);
 		if (to_destroy != NULL) {
 			next = tdata_tree_next(&tdatas, to_destroy);
 			prof_tdata_destroy_locked(tsd, to_destroy, false);
@@ -1355,8 +1354,8 @@ prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
 		prof_tdata_t *tdata = tctx->tdata;
 		tctx->tdata = NULL;
 		ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL);
-		bool destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd),
-		    tdata, false);
+		bool destroy_tdata = prof_tdata_should_destroy(
+		    tsd_tsdn(tsd), tdata, false);
 		malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
 		if (destroy_tdata) {
 			prof_tdata_destroy(tsd, tdata, false);
diff --git a/src/prof_log.c b/src/prof_log.c
index f4000aec..64b363bb 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -12,7 +12,7 @@
 #include "jemalloc/internal/prof_log.h"
 #include "jemalloc/internal/prof_sys.h"
 
-bool opt_prof_log = false;
+bool                              opt_prof_log = false;
 typedef enum prof_logging_state_e prof_logging_state_t;
 enum prof_logging_state_e {
 	prof_logging_state_stopped,
@@ -32,8 +32,8 @@ static bool prof_log_dummy = false;
 
 /* Incremented for every log file that is output. */
 static uint64_t log_seq = 0;
-static char log_filename[
-    /* Minimize memory bloat for non-prof builds. */
+static char     log_filename[
+/* Minimize memory bloat for non-prof builds. */
 #ifdef JEMALLOC_PROF
     PATH_MAX +
 #endif
@@ -51,8 +51,8 @@ typedef struct prof_bt_node_s prof_bt_node_t;
 
 struct prof_bt_node_s {
 	prof_bt_node_t *next;
-	size_t index;
-	prof_bt_t bt;
+	size_t          index;
+	prof_bt_t       bt;
 	/* Variable size backtrace vector pointed to by bt. */
 	void *vec[1];
 };
@@ -61,8 +61,8 @@ typedef struct prof_thr_node_s prof_thr_node_t;
 
 struct prof_thr_node_s {
 	prof_thr_node_t *next;
-	size_t index;
-	uint64_t thr_uid;
+	size_t           index;
+	uint64_t         thr_uid;
 	/* Variable size based on thr_name_sz. */
 	char name[1];
 };
@@ -91,15 +91,15 @@ struct prof_alloc_node_s {
  * These are the backtraces and threads that have already been logged by an
  * allocation.
  */
-static bool log_tables_initialized = false;
+static bool  log_tables_initialized = false;
 static ckh_t log_bt_node_set;
 static ckh_t log_thr_node_set;
 
 /* Store linked lists for logged data. */
-static prof_bt_node_t *log_bt_first = NULL;
-static prof_bt_node_t *log_bt_last = NULL;
-static prof_thr_node_t *log_thr_first = NULL;
-static prof_thr_node_t *log_thr_last = NULL;
+static prof_bt_node_t    *log_bt_first = NULL;
+static prof_bt_node_t    *log_bt_last = NULL;
+static prof_thr_node_t   *log_thr_first = NULL;
+static prof_thr_node_t   *log_thr_last = NULL;
 static prof_alloc_node_t *log_alloc_first = NULL;
 static prof_alloc_node_t *log_alloc_last = NULL;
 
@@ -131,12 +131,12 @@ prof_log_bt_index(tsd_t *tsd, prof_bt_t *bt) {
 
 	/* See if this backtrace is already cached in the table. */
 	if (ckh_search(&log_bt_node_set, (void *)(&dummy_node),
-	    (void **)(&node), NULL)) {
-		size_t sz = offsetof(prof_bt_node_t, vec) +
-			        (bt->len * sizeof(void *));
-		prof_bt_node_t *new_node = (prof_bt_node_t *)
-		    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL,
-		    true, arena_get(TSDN_NULL, 0, true), true);
+	        (void **)(&node), NULL)) {
+		size_t sz = offsetof(prof_bt_node_t, vec)
+		    + (bt->len * sizeof(void *));
+		prof_bt_node_t *new_node = (prof_bt_node_t *)iallocztm(
+		    tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL, true,
+		    arena_get(TSDN_NULL, 0, true), true);
 		if (log_bt_first == NULL) {
 			log_bt_first = new_node;
 			log_bt_last = new_node;
@@ -174,11 +174,11 @@ prof_log_thr_index(tsd_t *tsd, uint64_t thr_uid, const char *name) {
 
 	/* See if this thread is already cached in the table. */
 	if (ckh_search(&log_thr_node_set, (void *)(&dummy_node),
-	    (void **)(&node), NULL)) {
+	        (void **)(&node), NULL)) {
 		size_t sz = offsetof(prof_thr_node_t, name) + strlen(name) + 1;
-		prof_thr_node_t *new_node = (prof_thr_node_t *)
-		    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL,
-		    true, arena_get(TSDN_NULL, 0, true), true);
+		prof_thr_node_t *new_node = (prof_thr_node_t *)iallocztm(
+		    tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL, true,
+		    arena_get(TSDN_NULL, 0, true), true);
 		if (log_thr_first == NULL) {
 			log_thr_first = new_node;
 			log_thr_last = new_node;
@@ -225,9 +225,9 @@ prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
 
 	if (!log_tables_initialized) {
 		bool err1 = ckh_new(tsd, &log_bt_node_set, PROF_CKH_MINITEMS,
-				prof_bt_node_hash, prof_bt_node_keycomp);
+		    prof_bt_node_hash, prof_bt_node_keycomp);
 		bool err2 = ckh_new(tsd, &log_thr_node_set, PROF_CKH_MINITEMS,
-				prof_thr_node_hash, prof_thr_node_keycomp);
+		    prof_thr_node_hash, prof_thr_node_keycomp);
 		if (err1 || err2) {
 			goto label_done;
 		}
@@ -238,9 +238,9 @@ prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
 	nstime_t free_time;
 	nstime_prof_init_update(&free_time);
 
-	size_t sz = sizeof(prof_alloc_node_t);
-	prof_alloc_node_t *new_node = (prof_alloc_node_t *)
-	    iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL, true,
+	size_t             sz = sizeof(prof_alloc_node_t);
+	prof_alloc_node_t *new_node = (prof_alloc_node_t *)iallocztm(
+	    tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL, true,
 	    arena_get(TSDN_NULL, 0, true), true);
 
 	const char *prod_thr_name = tctx->tdata->thread_name;
@@ -256,10 +256,10 @@ prof_try_log(tsd_t *tsd, size_t usize, prof_info_t *prof_info) {
 	prof_bt_t *prod_bt = &tctx->gctx->bt;
 
 	new_node->next = NULL;
-	new_node->alloc_thr_ind = prof_log_thr_index(tsd, tctx->tdata->thr_uid,
-				      prod_thr_name);
-	new_node->free_thr_ind = prof_log_thr_index(tsd, cons_tdata->thr_uid,
-				     cons_thr_name);
+	new_node->alloc_thr_ind = prof_log_thr_index(
+	    tsd, tctx->tdata->thr_uid, prod_thr_name);
+	new_node->free_thr_ind = prof_log_thr_index(
+	    tsd, cons_tdata->thr_uid, cons_thr_name);
 	new_node->alloc_bt_ind = prof_log_bt_index(tsd, prod_bt);
 	new_node->free_bt_ind = prof_log_bt_index(tsd, cons_bt);
 	new_node->alloc_time_ns = nstime_ns(&alloc_time);
@@ -288,8 +288,8 @@ static bool
 prof_bt_node_keycomp(const void *k1, const void *k2) {
 	const prof_bt_node_t *bt_node1 = (prof_bt_node_t *)k1;
 	const prof_bt_node_t *bt_node2 = (prof_bt_node_t *)k2;
-	return prof_bt_keycomp((void *)(&bt_node1->bt),
-	    (void *)(&bt_node2->bt));
+	return prof_bt_keycomp(
+	    (void *)(&bt_node1->bt), (void *)(&bt_node2->bt));
 }
 
 static void
@@ -309,7 +309,7 @@ prof_thr_node_keycomp(const void *k1, const void *k2) {
 size_t
 prof_log_bt_count(void) {
 	cassert(config_prof);
-	size_t cnt = 0;
+	size_t          cnt = 0;
 	prof_bt_node_t *node = log_bt_first;
 	while (node != NULL) {
 		cnt++;
@@ -322,7 +322,7 @@ prof_log_bt_count(void) {
 size_t
 prof_log_alloc_count(void) {
 	cassert(config_prof);
-	size_t cnt = 0;
+	size_t             cnt = 0;
 	prof_alloc_node_t *node = log_alloc_first;
 	while (node != NULL) {
 		cnt++;
@@ -335,7 +335,7 @@ prof_log_alloc_count(void) {
 size_t
 prof_log_thr_count(void) {
 	cassert(config_prof);
-	size_t cnt = 0;
+	size_t           cnt = 0;
 	prof_thr_node_t *node = log_thr_first;
 	while (node != NULL) {
 		cnt++;
@@ -374,7 +374,6 @@ prof_log_rep_check(void) {
 	size_t thr_count = prof_log_thr_count();
 	size_t alloc_count = prof_log_alloc_count();
 
-
 	if (prof_logging_state == prof_logging_state_stopped) {
 		if (bt_count != 0 || thr_count != 0 || alloc_count || 0) {
 			return true;
@@ -435,7 +434,8 @@ prof_log_start(tsdn_t *tsdn, const char *filename) {
 	if (!prof_log_atexit_called) {
 		prof_log_atexit_called = true;
 		if (atexit(prof_log_stop_final) != 0) {
-			malloc_write("<jemalloc>: Error in atexit() "
+			malloc_write(
+			    "<jemalloc>: Error in atexit() "
 			    "for logging\n");
 			if (opt_abort) {
 				abort();
@@ -469,14 +469,14 @@ label_done:
 }
 
 struct prof_emitter_cb_arg_s {
-	int fd;
+	int     fd;
 	ssize_t ret;
 };
 
 static void
 prof_emitter_write_cb(void *opaque, const char *to_write) {
-	struct prof_emitter_cb_arg_s *arg =
-	    (struct prof_emitter_cb_arg_s *)opaque;
+	struct prof_emitter_cb_arg_s *arg = (struct prof_emitter_cb_arg_s *)
+	    opaque;
 	size_t bytes = strlen(to_write);
 	if (prof_log_dummy) {
 		return;
@@ -501,8 +501,8 @@ prof_log_emit_threads(tsd_t *tsd, emitter_t *emitter) {
 
 		char *thr_name = thr_node->name;
 
-		emitter_json_kv(emitter, "thr_name", emitter_type_string,
-		    &thr_name);
+		emitter_json_kv(
+		    emitter, "thr_name", emitter_type_string, &thr_name);
 
 		emitter_json_object_end(emitter);
 		thr_old_node = thr_node;
@@ -521,7 +521,7 @@ prof_log_emit_traces(tsd_t *tsd, emitter_t *emitter) {
 	 * Calculate how many hex digits we need: twice number of bytes, two for
 	 * "0x", and then one more for terminating '\0'.
 	 */
-	char buf[2 * sizeof(intptr_t) + 3];
+	char   buf[2 * sizeof(intptr_t) + 3];
 	size_t buf_sz = sizeof(buf);
 	while (bt_node != NULL) {
 		emitter_json_array_begin(emitter);
@@ -529,8 +529,8 @@ prof_log_emit_traces(tsd_t *tsd, emitter_t *emitter) {
 		for (i = 0; i < bt_node->bt.len; i++) {
 			malloc_snprintf(buf, buf_sz, "%p", bt_node->bt.vec[i]);
 			char *trace_str = buf;
-			emitter_json_value(emitter, emitter_type_string,
-			    &trace_str);
+			emitter_json_value(
+			    emitter, emitter_type_string, &trace_str);
 		}
 		emitter_json_array_end(emitter);
 
@@ -561,21 +561,21 @@ prof_log_emit_allocs(tsd_t *tsd, emitter_t *emitter) {
 		emitter_json_kv(emitter, "free_trace", emitter_type_size,
 		    &alloc_node->free_bt_ind);
 
-		emitter_json_kv(emitter, "alloc_timestamp",
-		    emitter_type_uint64, &alloc_node->alloc_time_ns);
+		emitter_json_kv(emitter, "alloc_timestamp", emitter_type_uint64,
+		    &alloc_node->alloc_time_ns);
 
 		emitter_json_kv(emitter, "free_timestamp", emitter_type_uint64,
 		    &alloc_node->free_time_ns);
 
-		emitter_json_kv(emitter, "usize", emitter_type_uint64,
-		    &alloc_node->usize);
+		emitter_json_kv(
+		    emitter, "usize", emitter_type_uint64, &alloc_node->usize);
 
 		emitter_json_object_end(emitter);
 
 		alloc_old_node = alloc_node;
 		alloc_node = alloc_node->next;
-		idalloctm(tsd_tsdn(tsd), alloc_old_node, NULL, NULL, true,
-		    true);
+		idalloctm(
+		    tsd_tsdn(tsd), alloc_old_node, NULL, NULL, true, true);
 	}
 	emitter_json_array_end(emitter);
 }
@@ -591,15 +591,14 @@ prof_log_emit_metadata(emitter_t *emitter) {
 	emitter_json_kv(emitter, "duration", emitter_type_uint64, &ns);
 
 	char *vers = JEMALLOC_VERSION;
-	emitter_json_kv(emitter, "version",
-	    emitter_type_string, &vers);
+	emitter_json_kv(emitter, "version", emitter_type_string, &vers);
 
-	emitter_json_kv(emitter, "lg_sample_rate",
-	    emitter_type_int, &lg_prof_sample);
+	emitter_json_kv(
+	    emitter, "lg_sample_rate", emitter_type_int, &lg_prof_sample);
 
 	const char *res_type = prof_time_res_mode_names[opt_prof_time_res];
-	emitter_json_kv(emitter, "prof_time_resolution", emitter_type_string,
-	    &res_type);
+	emitter_json_kv(
+	    emitter, "prof_time_resolution", emitter_type_string, &res_type);
 
 	int pid = prof_getpid();
 	emitter_json_kv(emitter, "pid", emitter_type_int, &pid);
@@ -632,7 +631,6 @@ prof_log_stop(tsdn_t *tsdn) {
 	prof_logging_state = prof_logging_state_dumping;
 	malloc_mutex_unlock(tsdn, &log_mtx);
 
-
 	emitter_t emitter;
 
 	/* Create a file. */
@@ -645,8 +643,10 @@ prof_log_stop(tsdn_t *tsdn) {
 	}
 
 	if (fd == -1) {
-		malloc_printf("<jemalloc>: creat() for log file \"%s\" "
-			      " failed with %d\n", log_filename, errno);
+		malloc_printf(
+		    "<jemalloc>: creat() for log file \"%s\" "
+		    " failed with %d\n",
+		    log_filename, errno);
 		if (opt_abort) {
 			abort();
 		}
@@ -659,8 +659,8 @@ prof_log_stop(tsdn_t *tsdn) {
 	buf_writer_t buf_writer;
 	buf_writer_init(tsdn, &buf_writer, prof_emitter_write_cb, &arg, NULL,
 	    PROF_LOG_STOP_BUFSIZE);
-	emitter_init(&emitter, emitter_output_json_compact, buf_writer_cb,
-	    &buf_writer);
+	emitter_init(
+	    &emitter, emitter_output_json_compact, buf_writer_cb, &buf_writer);
 
 	emitter_begin(&emitter);
 	prof_log_emit_metadata(&emitter);
@@ -701,8 +701,8 @@ JEMALLOC_COLD
 bool
 prof_log_init(tsd_t *tsd) {
 	cassert(config_prof);
-	if (malloc_mutex_init(&log_mtx, "prof_log",
-	    WITNESS_RANK_PROF_LOG, malloc_mutex_rank_exclusive)) {
+	if (malloc_mutex_init(&log_mtx, "prof_log", WITNESS_RANK_PROF_LOG,
+	        malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 
diff --git a/src/prof_recent.c b/src/prof_recent.c
index b5639b4c..f7108bee 100644
--- a/src/prof_recent.c
+++ b/src/prof_recent.c
@@ -7,18 +7,18 @@
 #include "jemalloc/internal/prof_data.h"
 #include "jemalloc/internal/prof_recent.h"
 
-ssize_t opt_prof_recent_alloc_max = PROF_RECENT_ALLOC_MAX_DEFAULT;
-malloc_mutex_t prof_recent_alloc_mtx; /* Protects the fields below */
+ssize_t            opt_prof_recent_alloc_max = PROF_RECENT_ALLOC_MAX_DEFAULT;
+malloc_mutex_t     prof_recent_alloc_mtx; /* Protects the fields below */
 static atomic_zd_t prof_recent_alloc_max;
-static ssize_t prof_recent_alloc_count = 0;
+static ssize_t     prof_recent_alloc_count = 0;
 prof_recent_list_t prof_recent_alloc_list;
 
 malloc_mutex_t prof_recent_dump_mtx; /* Protects dumping. */
 
 static void
 prof_recent_alloc_max_init(void) {
-	atomic_store_zd(&prof_recent_alloc_max, opt_prof_recent_alloc_max,
-	    ATOMIC_RELAXED);
+	atomic_store_zd(
+	    &prof_recent_alloc_max, opt_prof_recent_alloc_max, ATOMIC_RELAXED);
 }
 
 static inline ssize_t
@@ -144,26 +144,26 @@ edata_prof_recent_alloc_get_no_lock_test(const edata_t *edata) {
 static inline prof_recent_t *
 edata_prof_recent_alloc_get(tsd_t *tsd, const edata_t *edata) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	prof_recent_t *recent_alloc =
-	    edata_prof_recent_alloc_get_no_lock(edata);
-	assert(recent_alloc == NULL ||
-	    prof_recent_alloc_edata_get(tsd, recent_alloc) == edata);
+	prof_recent_t *recent_alloc = edata_prof_recent_alloc_get_no_lock(
+	    edata);
+	assert(recent_alloc == NULL
+	    || prof_recent_alloc_edata_get(tsd, recent_alloc) == edata);
 	return recent_alloc;
 }
 
 static prof_recent_t *
-edata_prof_recent_alloc_update_internal(tsd_t *tsd, edata_t *edata,
-    prof_recent_t *recent_alloc) {
+edata_prof_recent_alloc_update_internal(
+    tsd_t *tsd, edata_t *edata, prof_recent_t *recent_alloc) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
-	prof_recent_t *old_recent_alloc =
-	    edata_prof_recent_alloc_get(tsd, edata);
+	prof_recent_t *old_recent_alloc = edata_prof_recent_alloc_get(
+	    tsd, edata);
 	edata_prof_recent_alloc_set_dont_call_directly(edata, recent_alloc);
 	return old_recent_alloc;
 }
 
 static void
-edata_prof_recent_alloc_set(tsd_t *tsd, edata_t *edata,
-    prof_recent_t *recent_alloc) {
+edata_prof_recent_alloc_set(
+    tsd_t *tsd, edata_t *edata, prof_recent_t *recent_alloc) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	assert(recent_alloc != NULL);
 	prof_recent_t *old_recent_alloc =
@@ -173,8 +173,8 @@ edata_prof_recent_alloc_set(tsd_t *tsd, edata_t *edata,
 }
 
 static void
-edata_prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata,
-    prof_recent_t *recent_alloc) {
+edata_prof_recent_alloc_reset(
+    tsd_t *tsd, edata_t *edata, prof_recent_t *recent_alloc) {
 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	assert(recent_alloc != NULL);
 	prof_recent_t *old_recent_alloc =
@@ -265,14 +265,14 @@ prof_recent_alloc_assert_count(tsd_t *tsd) {
 	if (!config_debug) {
 		return;
 	}
-	ssize_t count = 0;
+	ssize_t        count = 0;
 	prof_recent_t *n;
-	ql_foreach(n, &prof_recent_alloc_list, link) {
+	ql_foreach (n, &prof_recent_alloc_list, link) {
 		++count;
 	}
 	assert(count == prof_recent_alloc_count);
-	assert(prof_recent_alloc_max_get(tsd) == -1 ||
-	    count <= prof_recent_alloc_max_get(tsd));
+	assert(prof_recent_alloc_max_get(tsd) == -1
+	    || count <= prof_recent_alloc_max_get(tsd));
 }
 
 void
@@ -319,8 +319,8 @@ prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size, size_t usize) {
 	 * the allocation locks.
 	 */
 	prof_recent_t *reserve = NULL;
-	if (prof_recent_alloc_max_get(tsd) == -1 ||
-	    prof_recent_alloc_count < prof_recent_alloc_max_get(tsd)) {
+	if (prof_recent_alloc_max_get(tsd) == -1
+	    || prof_recent_alloc_count < prof_recent_alloc_max_get(tsd)) {
 		assert(prof_recent_alloc_max_get(tsd) != 0);
 		malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 		reserve = prof_recent_allocate_node(tsd_tsdn(tsd));
@@ -346,8 +346,9 @@ prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size, size_t usize) {
 		ql_rotate(&prof_recent_alloc_list, link);
 	} else {
 		/* Otherwise make use of the new node. */
-		assert(prof_recent_alloc_max_get(tsd) == -1 ||
-		    prof_recent_alloc_count < prof_recent_alloc_max_get(tsd));
+		assert(prof_recent_alloc_max_get(tsd) == -1
+		    || prof_recent_alloc_count
+		        < prof_recent_alloc_max_get(tsd));
 		if (reserve == NULL) {
 			goto label_rollback;
 		}
@@ -421,7 +422,7 @@ prof_recent_alloc_restore_locked(tsd_t *tsd, prof_recent_list_t *to_delete) {
 	}
 
 	prof_recent_t *node;
-	ql_foreach(node, &prof_recent_alloc_list, link) {
+	ql_foreach (node, &prof_recent_alloc_list, link) {
 		if (prof_recent_alloc_count == max) {
 			break;
 		}
@@ -462,7 +463,7 @@ prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max) {
 	assert(max >= -1);
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
 	prof_recent_alloc_assert_count(tsd);
-	const ssize_t old_max = prof_recent_alloc_max_update(tsd, max);
+	const ssize_t      old_max = prof_recent_alloc_max_update(tsd, max);
 	prof_recent_list_t to_delete;
 	prof_recent_alloc_restore_locked(tsd, &to_delete);
 	malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
@@ -472,7 +473,7 @@ prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max) {
 
 static void
 prof_recent_alloc_dump_bt(emitter_t *emitter, prof_tctx_t *tctx) {
-	char bt_buf[2 * sizeof(intptr_t) + 3];
+	char  bt_buf[2 * sizeof(intptr_t) + 3];
 	char *s = bt_buf;
 	assert(tctx != NULL);
 	prof_bt_t *bt = &tctx->gctx->bt;
@@ -501,8 +502,8 @@ prof_recent_alloc_dump_node(emitter_t *emitter, prof_recent_t *node) {
 		    emitter_type_string, &thread_name);
 	}
 	uint64_t alloc_time_ns = nstime_ns(&node->alloc_time);
-	emitter_json_kv(emitter, "alloc_time", emitter_type_uint64,
-	    &alloc_time_ns);
+	emitter_json_kv(
+	    emitter, "alloc_time", emitter_type_uint64, &alloc_time_ns);
 	emitter_json_array_kv_begin(emitter, "alloc_trace");
 	prof_recent_alloc_dump_bt(emitter, node->alloc_tctx);
 	emitter_json_array_end(emitter);
@@ -539,8 +540,8 @@ prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque) {
 	buf_writer_init(tsd_tsdn(tsd), &buf_writer, write_cb, cbopaque, NULL,
 	    PROF_RECENT_PRINT_BUFSIZE);
 	emitter_t emitter;
-	emitter_init(&emitter, emitter_output_json_compact, buf_writer_cb,
-	    &buf_writer);
+	emitter_init(
+	    &emitter, emitter_output_json_compact, buf_writer_cb, &buf_writer);
 	prof_recent_list_t temp_list;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx);
@@ -554,13 +555,13 @@ prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque) {
 
 	emitter_begin(&emitter);
 	uint64_t sample_interval = (uint64_t)1U << lg_prof_sample;
-	emitter_json_kv(&emitter, "sample_interval", emitter_type_uint64,
-	    &sample_interval);
-	emitter_json_kv(&emitter, "recent_alloc_max", emitter_type_ssize,
-	    &dump_max);
+	emitter_json_kv(
+	    &emitter, "sample_interval", emitter_type_uint64, &sample_interval);
+	emitter_json_kv(
+	    &emitter, "recent_alloc_max", emitter_type_ssize, &dump_max);
 	emitter_json_array_kv_begin(&emitter, "recent_alloc");
 	prof_recent_t *node;
-	ql_foreach(node, &temp_list, link) {
+	ql_foreach (node, &temp_list, link) {
 		prof_recent_alloc_dump_node(&emitter, node);
 	}
 	emitter_json_array_end(&emitter);
@@ -587,12 +588,12 @@ prof_recent_init(void) {
 	prof_recent_alloc_max_init();
 
 	if (malloc_mutex_init(&prof_recent_alloc_mtx, "prof_recent_alloc",
-	    WITNESS_RANK_PROF_RECENT_ALLOC, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_PROF_RECENT_ALLOC, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 
 	if (malloc_mutex_init(&prof_recent_dump_mtx, "prof_recent_dump",
-	    WITNESS_RANK_PROF_RECENT_DUMP, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_PROF_RECENT_DUMP, malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 
diff --git a/src/prof_stack_range.c b/src/prof_stack_range.c
index f5e5c044..b167b132 100644
--- a/src/prof_stack_range.c
+++ b/src/prof_stack_range.c
@@ -6,12 +6,12 @@
 
 #if defined(__linux__) && defined(JEMALLOC_HAVE_GETTID)
 
-#    include <errno.h>
-#    include <fcntl.h>
-#    include <stdio.h>
-#    include <stdlib.h>  // strtoul
-#    include <string.h>
-#    include <unistd.h>
+#	include <errno.h>
+#	include <fcntl.h>
+#	include <stdio.h>
+#	include <stdlib.h> // strtoul
+#	include <string.h>
+#	include <unistd.h>
 
 /*
  * Converts a string representing a hexadecimal number to an unsigned long long
@@ -25,31 +25,31 @@
  */
 static inline unsigned long long int
 strtoull_hex(const char *nptr, char **endptr) {
-    unsigned long long int val = 0;
-    int ii = 0;
-    for (; ii < 16; ++ii) {
-        char c = nptr[ii];
-        if (c >= '0' && c <= '9') {
-            val = (val << 4) + (c - '0');
-        } else if (c >= 'a' && c <= 'f') {
-            val = (val << 4) + (c - 'a' + 10);
-        } else {
-            break;
-        }
-    }
-    if (endptr) {
-        *endptr = (char *)(nptr + ii);
-    }
-    return val;
+	unsigned long long int val = 0;
+	int                    ii = 0;
+	for (; ii < 16; ++ii) {
+		char c = nptr[ii];
+		if (c >= '0' && c <= '9') {
+			val = (val << 4) + (c - '0');
+		} else if (c >= 'a' && c <= 'f') {
+			val = (val << 4) + (c - 'a' + 10);
+		} else {
+			break;
+		}
+	}
+	if (endptr) {
+		*endptr = (char *)(nptr + ii);
+	}
+	return val;
 }
 
 static int
 prof_mapping_containing_addr(uintptr_t addr, const char *maps_path,
-  uintptr_t *mm_start, uintptr_t *mm_end) {
-    int ret = ENOENT; /* not found */
-    *mm_start = *mm_end = 0;
+    uintptr_t *mm_start, uintptr_t *mm_end) {
+	int ret = ENOENT; /* not found */
+	*mm_start = *mm_end = 0;
 
-    /*
+	/*
      * Each line of /proc/<pid>/maps is:
      * <start>-<end> <perms> <offset> <dev> <inode> <pathname>
      *
@@ -57,90 +57,93 @@ prof_mapping_containing_addr(uintptr_t addr, const char *maps_path,
      * as long as `buf` contains the start of a mapping line it can always be
      * parsed.
      */
-    static const int kMappingFieldsWidth = 34;
+	static const int kMappingFieldsWidth = 34;
 
-    int fd = -1;
-    char buf[4096];
-    ssize_t remaining = 0; /* actual number of bytes read to buf */
-    char *line = NULL;
+	int     fd = -1;
+	char    buf[4096];
+	ssize_t remaining = 0; /* actual number of bytes read to buf */
+	char   *line = NULL;
 
-    while (1) {
-        if (fd < 0) {
-            /* case 0: initial open of maps file */
-            fd = malloc_open(maps_path, O_RDONLY);
-            if (fd < 0) {
-                return errno;
-            }
+	while (1) {
+		if (fd < 0) {
+			/* case 0: initial open of maps file */
+			fd = malloc_open(maps_path, O_RDONLY);
+			if (fd < 0) {
+				return errno;
+			}
 
-            remaining = malloc_read_fd(fd, buf, sizeof(buf));
-            if (remaining <= 0) {
-                ret = errno;
-                break;
-            }
-            line = buf;
-        } else if (line == NULL) {
-            /* case 1: no newline found in buf */
-            remaining = malloc_read_fd(fd, buf, sizeof(buf));
-            if (remaining <= 0) {
-                ret = errno;
-                break;
-            }
-            line = memchr(buf, '\n', remaining);
-            if (line != NULL) {
-                line++;  /* advance to character after newline */
-                remaining -= (line - buf);
-            }
-        } else if (line != NULL && remaining < kMappingFieldsWidth) {
-            /*
+			remaining = malloc_read_fd(fd, buf, sizeof(buf));
+			if (remaining <= 0) {
+				ret = errno;
+				break;
+			}
+			line = buf;
+		} else if (line == NULL) {
+			/* case 1: no newline found in buf */
+			remaining = malloc_read_fd(fd, buf, sizeof(buf));
+			if (remaining <= 0) {
+				ret = errno;
+				break;
+			}
+			line = memchr(buf, '\n', remaining);
+			if (line != NULL) {
+				line++; /* advance to character after newline */
+				remaining -= (line - buf);
+			}
+		} else if (line != NULL && remaining < kMappingFieldsWidth) {
+			/*
              * case 2: found newline but insufficient characters remaining in
              * buf
              */
-            memcpy(buf, line,
-              remaining);  /* copy remaining characters to start of buf */
-            line = buf;
+			memcpy(buf, line,
+			    remaining); /* copy remaining characters to start of buf */
+			line = buf;
 
-            size_t count =
-              malloc_read_fd(fd, buf + remaining, sizeof(buf) - remaining);
-            if (count <= 0) {
-                ret = errno;
-                break;
-            }
+			size_t count = malloc_read_fd(
+			    fd, buf + remaining, sizeof(buf) - remaining);
+			if (count <= 0) {
+				ret = errno;
+				break;
+			}
 
-            remaining += count;  /* actual number of bytes read to buf */
-        } else {
-            /* case 3: found newline and sufficient characters to parse */
+			remaining +=
+			    count; /* actual number of bytes read to buf */
+		} else {
+			/* case 3: found newline and sufficient characters to parse */
 
-            /* parse <start>-<end> */
-            char *tmp = line;
-            uintptr_t start_addr = (uintptr_t)strtoull_hex(tmp, &tmp);
-            if (addr >= start_addr) {
-                tmp++;  /* advance to character after '-' */
-                uintptr_t end_addr = (uintptr_t)strtoull_hex(tmp, NULL);
-                if (addr < end_addr) {
-                    *mm_start = start_addr;
-                    *mm_end = end_addr;
-                    ret = 0;
-                    break;
-                }
-            }
+			/* parse <start>-<end> */
+			char     *tmp = line;
+			uintptr_t start_addr = (uintptr_t)strtoull_hex(
+			    tmp, &tmp);
+			if (addr >= start_addr) {
+				tmp++; /* advance to character after '-' */
+				uintptr_t end_addr = (uintptr_t)strtoull_hex(
+				    tmp, NULL);
+				if (addr < end_addr) {
+					*mm_start = start_addr;
+					*mm_end = end_addr;
+					ret = 0;
+					break;
+				}
+			}
 
-            /* Advance to character after next newline in the current buf. */
-            char *prev_line = line;
-            line = memchr(line, '\n', remaining);
-            if (line != NULL) {
-                line++;  /* advance to character after newline */
-                remaining -= (line - prev_line);
-            }
-        }
-    }
+			/* Advance to character after next newline in the current buf. */
+			char *prev_line = line;
+			line = memchr(line, '\n', remaining);
+			if (line != NULL) {
+				line++; /* advance to character after newline */
+				remaining -= (line - prev_line);
+			}
+		}
+	}
 
-    malloc_close(fd);
-    return ret;
+	malloc_close(fd);
+	return ret;
 }
 
 int
 prof_thread_stack_range(uintptr_t fp, uintptr_t *low, uintptr_t *high) {
-    /*
+	/*
      * NOTE: Prior to kernel 4.5 an entry for every thread stack was included in
      * /proc/<pid>/maps as [STACK:<tid>]. Starting with kernel 4.5 only the main
      * thread stack remains as the [stack] mapping. For other thread stacks the
@@ -148,19 +151,19 @@ prof_thread_stack_range(uintptr_t fp, uintptr_t *low, uintptr_t *high) {
      * labeled as [STACK:tid]).
      * https://lists.ubuntu.com/archives/kernel-team/2016-March/074681.html
     */
-    char maps_path[64];  // "/proc/<pid>/task/<tid>/maps"
-    malloc_snprintf(maps_path, sizeof(maps_path), "/proc/%d/task/%d/maps",
-      getpid(), gettid());
-    return prof_mapping_containing_addr(fp, maps_path, low, high);
+	char maps_path[64]; // "/proc/<pid>/task/<tid>/maps"
+	malloc_snprintf(maps_path, sizeof(maps_path), "/proc/%d/task/%d/maps",
+	    getpid(), gettid());
+	return prof_mapping_containing_addr(fp, maps_path, low, high);
 }
 
 #else
 
 int
 prof_thread_stack_range(
-  UNUSED uintptr_t addr, uintptr_t *stack_start, uintptr_t *stack_end) {
-    *stack_start = *stack_end = 0;
-    return ENOENT;
+    UNUSED uintptr_t addr, uintptr_t *stack_start, uintptr_t *stack_end) {
+	*stack_start = *stack_end = 0;
+	return ENOENT;
 }
 
-#endif  // __linux__
+#endif // __linux__
diff --git a/src/prof_stats.c b/src/prof_stats.c
index 5d1a506b..db248be7 100644
--- a/src/prof_stats.c
+++ b/src/prof_stats.c
@@ -3,8 +3,8 @@
 
 #include "jemalloc/internal/prof_stats.h"
 
-bool opt_prof_stats = false;
-malloc_mutex_t prof_stats_mtx;
+bool                opt_prof_stats = false;
+malloc_mutex_t      prof_stats_mtx;
 static prof_stats_t prof_stats_live[PROF_SC_NSIZES];
 static prof_stats_t prof_stats_accum[PROF_SC_NSIZES];
 
diff --git a/src/prof_sys.c b/src/prof_sys.c
index e3b7bbcb..be50c0be 100644
--- a/src/prof_sys.c
+++ b/src/prof_sys.c
@@ -8,8 +8,8 @@
 #include "jemalloc/internal/prof_sys.h"
 
 #ifdef JEMALLOC_PROF_LIBUNWIND
-#define UNW_LOCAL_ONLY
-#include <libunwind.h>
+#	define UNW_LOCAL_ONLY
+#	include <libunwind.h>
 #endif
 
 #ifdef JEMALLOC_PROF_LIBGCC
@@ -18,14 +18,15 @@
  * use libgcc's unwinding functionality, but after we've included that, we've
  * already hooked _Unwind_Backtrace.  We'll temporarily disable hooking.
  */
-#undef _Unwind_Backtrace
-#include <unwind.h>
-#define _Unwind_Backtrace JEMALLOC_TEST_HOOK(_Unwind_Backtrace, test_hooks_libc_hook)
+#	undef _Unwind_Backtrace
+#	include <unwind.h>
+#	define _Unwind_Backtrace                                              \
+		JEMALLOC_TEST_HOOK(_Unwind_Backtrace, test_hooks_libc_hook)
 #endif
 
 #ifdef JEMALLOC_PROF_FRAME_POINTER
 // execinfo backtrace() as fallback unwinder
-#include <execinfo.h>
+#	include <execinfo.h>
 #endif
 
 /******************************************************************************/
@@ -77,7 +78,7 @@ prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) {
 static _Unwind_Reason_Code
 prof_unwind_callback(struct _Unwind_Context *context, void *arg) {
 	prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
-	void *ip;
+	void               *ip;
 
 	cassert(config_prof);
 
@@ -115,14 +116,15 @@ struct stack_range {
 
 struct thread_unwind_info {
 	struct stack_range stack_range;
-	bool fallback;
+	bool               fallback;
 };
 static __thread struct thread_unwind_info unwind_info = {
-	.stack_range = {
-		.start = 0,
-		.end = 0,
-	},
-	.fallback = false,
+    .stack_range =
+        {
+            .start = 0,
+            .end = 0,
+        },
+    .fallback = false,
 }; /* thread local */
 
 static void
@@ -142,10 +144,11 @@ prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 	uintptr_t fp = (uintptr_t)__builtin_frame_address(0);
 
 	/* new thread - get the stack range */
-	if (!unwind_info.fallback &&
-	    unwind_info.stack_range.start == unwind_info.stack_range.end) {
+	if (!unwind_info.fallback
+	    && unwind_info.stack_range.start == unwind_info.stack_range.end) {
 		if (prof_thread_stack_range(fp, &unwind_info.stack_range.start,
-		    &unwind_info.stack_range.end) != 0) {
+		        &unwind_info.stack_range.end)
+		    != 0) {
 			unwind_info.fallback = true;
 		} else {
 			assert(fp >= unwind_info.stack_range.start
@@ -159,8 +162,8 @@ prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 
 	unsigned ii = 0;
 	while (ii < max_len && fp != 0) {
-		if (fp < unwind_info.stack_range.start ||
-		    fp >= unwind_info.stack_range.end) {
+		if (fp < unwind_info.stack_range.start
+		    || fp >= unwind_info.stack_range.end) {
 			/*
 			 * Determining the stack range from procfs can be
 			 * relatively expensive especially for programs with
@@ -173,7 +176,7 @@ prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 			unwind_info.fallback = true;
 			goto label_fallback;
 		}
-		void* ip = ((void **)fp)[1];
+		void *ip = ((void **)fp)[1];
 		if (ip == 0) {
 			break;
 		}
@@ -205,21 +208,21 @@ JEMALLOC_DIAGNOSTIC_IGNORE_FRAME_ADDRESS
 static void
 prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 /* The input arg must be a constant for __builtin_return_address. */
-#define BT_FRAME(i)							\
-	if ((i) < max_len) {						\
-		void *p;						\
-		if (__builtin_frame_address(i) == 0) {			\
-			return;						\
-		}							\
-		p = __builtin_return_address(i);			\
-		if (p == NULL) {					\
-			return;						\
-		}							\
-		vec[(i)] = p;						\
-		*len = (i) + 1;						\
-	} else {							\
-		return;							\
-	}
+#	define BT_FRAME(i)                                                    \
+		if ((i) < max_len) {                                           \
+			void *p;                                               \
+			if (__builtin_frame_address(i) == 0) {                 \
+				return;                                        \
+			}                                                      \
+			p = __builtin_return_address(i);                       \
+			if (p == NULL) {                                       \
+				return;                                        \
+			}                                                      \
+			vec[(i)] = p;                                          \
+			*len = (i) + 1;                                        \
+		} else {                                                       \
+			return;                                                \
+		}
 
 	cassert(config_prof);
 	assert(vec != NULL);
@@ -506,8 +509,8 @@ prof_backtrace_impl(void **vec, unsigned *len, unsigned max_len) {
 	BT_FRAME(253)
 	BT_FRAME(254)
 	BT_FRAME(255)
-#undef BT_FRAME
-JEMALLOC_DIAGNOSTIC_POP
+#	undef BT_FRAME
+	JEMALLOC_DIAGNOSTIC_POP
 }
 #else
 static void
@@ -568,8 +571,9 @@ prof_sys_thread_name_fetch(tsd_t *tsd) {
 		return;
 	}
 
-	if (prof_sys_thread_name_read(tdata->thread_name,
-	    PROF_THREAD_NAME_MAX_LEN) != 0) {
+	if (prof_sys_thread_name_read(
+	        tdata->thread_name, PROF_THREAD_NAME_MAX_LEN)
+	    != 0) {
 		prof_thread_name_clear(tdata);
 	}
 
@@ -592,32 +596,32 @@ prof_get_pid_namespace(void) {
 #if defined(_WIN32) || defined(__APPLE__)
 	// Not supported, do nothing.
 #else
-	char buf[PATH_MAX];
-	const char* linkname =
-#  if defined(__FreeBSD__) || defined(__DragonFly__)
+	char        buf[PATH_MAX];
+	const char *linkname =
+#	if defined(__FreeBSD__) || defined(__DragonFly__)
 	    "/proc/curproc/ns/pid"
-#  else
+#	else
 	    "/proc/self/ns/pid"
-#  endif
+#	endif
 	    ;
 	ssize_t linklen =
-#  ifndef JEMALLOC_READLINKAT
-	readlink(linkname, buf, PATH_MAX)
-#  else
-	readlinkat(AT_FDCWD, linkname, buf, PATH_MAX)
-#  endif
+#	ifndef JEMALLOC_READLINKAT
+	    readlink(linkname, buf, PATH_MAX)
+#	else
+	    readlinkat(AT_FDCWD, linkname, buf, PATH_MAX)
+#	endif
 	    ;
 
 	// namespace string is expected to be like pid:[4026531836]
 	if (linklen > 0) {
 		// Trim the trailing "]"
-		buf[linklen-1] = '\0';
-		char* index = strtok(buf, "pid:[");
+		buf[linklen - 1] = '\0';
+		char *index = strtok(buf, "pid:[");
 		ret = atol(index);
 	}
 #endif
 
-  return ret;
+	return ret;
 }
 
 /*
@@ -647,8 +651,8 @@ struct prof_dump_arg_s {
 };
 
 static void
-prof_dump_check_possible_error(prof_dump_arg_t *arg, bool err_cond,
-    const char *format, ...) {
+prof_dump_check_possible_error(
+    prof_dump_arg_t *arg, bool err_cond, const char *format, ...) {
 	assert(!arg->error);
 	if (!err_cond) {
 		return;
@@ -660,7 +664,7 @@ prof_dump_check_possible_error(prof_dump_arg_t *arg, bool err_cond,
 	}
 
 	va_list ap;
-	char buf[PROF_PRINTF_BUFSIZE];
+	char    buf[PROF_PRINTF_BUFSIZE];
 	va_start(ap, format);
 	malloc_vsnprintf(buf, sizeof(buf), format, ap);
 	va_end(ap);
@@ -692,8 +696,8 @@ prof_dump_flush(void *opaque, const char *s) {
 	cassert(config_prof);
 	prof_dump_arg_t *arg = (prof_dump_arg_t *)opaque;
 	if (!arg->error) {
-		ssize_t err = prof_dump_write_file(arg->prof_dump_fd, s,
-		    strlen(s));
+		ssize_t err = prof_dump_write_file(
+		    arg->prof_dump_fd, s, strlen(s));
 		prof_dump_check_possible_error(arg, err == -1,
 		    "<jemalloc>: failed to write during heap profile flush\n");
 	}
@@ -707,36 +711,37 @@ prof_dump_close(prof_dump_arg_t *arg) {
 }
 
 #ifdef __APPLE__
-#include <mach-o/dyld.h>
+#	include <mach-o/dyld.h>
 
-#ifdef __LP64__
-typedef struct mach_header_64 mach_header_t;
+#	ifdef __LP64__
+typedef struct mach_header_64     mach_header_t;
 typedef struct segment_command_64 segment_command_t;
-#define MH_MAGIC_VALUE MH_MAGIC_64
-#define MH_CIGAM_VALUE MH_CIGAM_64
-#define LC_SEGMENT_VALUE LC_SEGMENT_64
-#else
-typedef struct mach_header mach_header_t;
+#		define MH_MAGIC_VALUE MH_MAGIC_64
+#		define MH_CIGAM_VALUE MH_CIGAM_64
+#		define LC_SEGMENT_VALUE LC_SEGMENT_64
+#	else
+typedef struct mach_header     mach_header_t;
 typedef struct segment_command segment_command_t;
-#define MH_MAGIC_VALUE MH_MAGIC
-#define MH_CIGAM_VALUE MH_CIGAM
-#define LC_SEGMENT_VALUE LC_SEGMENT
-#endif
+#		define MH_MAGIC_VALUE MH_MAGIC
+#		define MH_CIGAM_VALUE MH_CIGAM
+#		define LC_SEGMENT_VALUE LC_SEGMENT
+#	endif
 
 static void
 prof_dump_dyld_image_vmaddr(buf_writer_t *buf_writer, uint32_t image_index) {
 	const mach_header_t *header = (const mach_header_t *)
 	    _dyld_get_image_header(image_index);
-	if (header == NULL || (header->magic != MH_MAGIC_VALUE &&
-	    header->magic != MH_CIGAM_VALUE)) {
+	if (header == NULL
+	    || (header->magic != MH_MAGIC_VALUE
+	        && header->magic != MH_CIGAM_VALUE)) {
 		// Invalid header
 		return;
 	}
 
-	intptr_t slide = _dyld_get_image_vmaddr_slide(image_index);
-	const char *name = _dyld_get_image_name(image_index);
-	struct load_command *load_cmd = (struct load_command *)
-	    ((char *)header + sizeof(mach_header_t));
+	intptr_t             slide = _dyld_get_image_vmaddr_slide(image_index);
+	const char          *name = _dyld_get_image_name(image_index);
+	struct load_command *load_cmd = (struct load_command *)((char *)header
+	    + sizeof(mach_header_t));
 	for (uint32_t i = 0; load_cmd && (i < header->ncmds); i++) {
 		if (load_cmd->cmd == LC_SEGMENT_VALUE) {
 			const segment_command_t *segment_cmd =
@@ -744,14 +749,17 @@ prof_dump_dyld_image_vmaddr(buf_writer_t *buf_writer, uint32_t image_index) {
 			if (!strcmp(segment_cmd->segname, "__TEXT")) {
 				char buffer[PATH_MAX + 1];
 				malloc_snprintf(buffer, sizeof(buffer),
-				    "%016llx-%016llx: %s\n", segment_cmd->vmaddr + slide,
-				    segment_cmd->vmaddr + slide + segment_cmd->vmsize, name);
+				    "%016llx-%016llx: %s\n",
+				    segment_cmd->vmaddr + slide,
+				    segment_cmd->vmaddr + slide
+				        + segment_cmd->vmsize,
+				    name);
 				buf_writer_cb(buf_writer, buffer);
 				return;
 			}
 		}
-		load_cmd =
-		    (struct load_command *)((char *)load_cmd + load_cmd->cmdsize);
+		load_cmd = (struct load_command *)((char *)load_cmd
+		    + load_cmd->cmdsize);
 	}
 }
 
@@ -772,48 +780,48 @@ prof_dump_maps(buf_writer_t *buf_writer) {
 	prof_dump_dyld_maps(buf_writer);
 }
 #else /* !__APPLE__ */
-#ifndef _WIN32
+#	ifndef _WIN32
 JEMALLOC_FORMAT_PRINTF(1, 2)
 static int
 prof_open_maps_internal(const char *format, ...) {
-	int mfd;
+	int     mfd;
 	va_list ap;
-	char filename[PATH_MAX + 1];
+	char    filename[PATH_MAX + 1];
 
 	va_start(ap, format);
 	malloc_vsnprintf(filename, sizeof(filename), format, ap);
 	va_end(ap);
 
-#if defined(O_CLOEXEC)
+#		if defined(O_CLOEXEC)
 	mfd = open(filename, O_RDONLY | O_CLOEXEC);
-#else
+#		else
 	mfd = open(filename, O_RDONLY);
 	if (mfd != -1) {
 		fcntl(mfd, F_SETFD, fcntl(mfd, F_GETFD) | FD_CLOEXEC);
 	}
-#endif
+#		endif
 
 	return mfd;
 }
-#endif
+#	endif
 
 static int
 prof_dump_open_maps_impl(void) {
 	int mfd;
 
 	cassert(config_prof);
-#if defined(__FreeBSD__) || defined(__DragonFly__)
+#	if defined(__FreeBSD__) || defined(__DragonFly__)
 	mfd = prof_open_maps_internal("/proc/curproc/map");
-#elif defined(_WIN32)
+#	elif defined(_WIN32)
 	mfd = -1; // Not implemented
-#else
+#	else
 	int pid = prof_getpid();
 
 	mfd = prof_open_maps_internal("/proc/%d/task/%d/maps", pid, pid);
 	if (mfd == -1) {
 		mfd = prof_open_maps_internal("/proc/%d/maps", pid);
 	}
-#endif
+#	endif
 	return mfd;
 }
 prof_dump_open_maps_t *JET_MUTABLE prof_dump_open_maps =
@@ -840,12 +848,12 @@ prof_dump_maps(buf_writer_t *buf_writer) {
 #endif /* __APPLE__ */
 
 static bool
-prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
-    bool leakcheck) {
+prof_dump(
+    tsd_t *tsd, bool propagate_err, const char *filename, bool leakcheck) {
 	cassert(config_prof);
 	assert(tsd_reentrancy_level_get(tsd) == 0);
 
-	prof_tdata_t * tdata = prof_tdata_get(tsd, true);
+	prof_tdata_t *tdata = prof_tdata_get(tsd, true);
 	if (tdata == NULL) {
 		return true;
 	}
@@ -892,7 +900,7 @@ prof_strncpy(char *UNUSED dest, const char *UNUSED src, size_t UNUSED size) {
 }
 
 static const char *
-prof_prefix_get(tsdn_t* tsdn) {
+prof_prefix_get(tsdn_t *tsdn) {
 	malloc_mutex_assert_owner(tsdn, &prof_dump_filename_mtx);
 
 	return prof_prefix == NULL ? opt_prof_prefix : prof_prefix;
@@ -919,25 +927,26 @@ prof_dump_filename(tsd_t *tsd, char *filename, char v, uint64_t vseq) {
 		if (opt_prof_pid_namespace) {
 			/* "<prefix>.<pid_namespace>.<pid>.<seq>.v<vseq>.heap" */
 			malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
-			    "%s.%ld.%d.%"FMTu64".%c%"FMTu64".heap", prefix,
-			    prof_get_pid_namespace(), prof_getpid(), prof_dump_seq, v,
-			    vseq);
+			    "%s.%ld.%d.%" FMTu64 ".%c%" FMTu64 ".heap", prefix,
+			    prof_get_pid_namespace(), prof_getpid(),
+			    prof_dump_seq, v, vseq);
 		} else {
 			/* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
 			malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
-			    "%s.%d.%"FMTu64".%c%"FMTu64".heap", prefix, prof_getpid(),
-			    prof_dump_seq, v, vseq);
+			    "%s.%d.%" FMTu64 ".%c%" FMTu64 ".heap", prefix,
+			    prof_getpid(), prof_dump_seq, v, vseq);
 		}
 	} else {
 		if (opt_prof_pid_namespace) {
 			/* "<prefix>.<pid_namespace>.<pid>.<seq>.<v>.heap" */
 			malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
-			    "%s.%ld.%d.%"FMTu64".%c.heap", prefix,
-			    prof_get_pid_namespace(), prof_getpid(), prof_dump_seq, v);
+			    "%s.%ld.%d.%" FMTu64 ".%c.heap", prefix,
+			    prof_get_pid_namespace(), prof_getpid(),
+			    prof_dump_seq, v);
 		} else {
 			/* "<prefix>.<pid>.<seq>.<v>.heap" */
 			malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
-			    "%s.%d.%"FMTu64".%c.heap", prefix, prof_getpid(),
+			    "%s.%d.%" FMTu64 ".%c.heap", prefix, prof_getpid(),
 			    prof_dump_seq, v);
 		}
 	}
@@ -949,11 +958,12 @@ prof_get_default_filename(tsdn_t *tsdn, char *filename, uint64_t ind) {
 	malloc_mutex_lock(tsdn, &prof_dump_filename_mtx);
 	if (opt_prof_pid_namespace) {
 		malloc_snprintf(filename, PROF_DUMP_FILENAME_LEN,
-		    "%s.%ld.%d.%"FMTu64".json", prof_prefix_get(tsdn),
+		    "%s.%ld.%d.%" FMTu64 ".json", prof_prefix_get(tsdn),
 		    prof_get_pid_namespace(), prof_getpid(), ind);
 	} else {
 		malloc_snprintf(filename, PROF_DUMP_FILENAME_LEN,
-		    "%s.%d.%"FMTu64".json", prof_prefix_get(tsdn), prof_getpid(), ind);
+		    "%s.%d.%" FMTu64 ".json", prof_prefix_get(tsdn),
+		    prof_getpid(), ind);
 	}
 	malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
 }
@@ -980,8 +990,8 @@ prof_prefix_set(tsdn_t *tsdn, const char *prefix) {
 	if (prof_prefix == NULL) {
 		malloc_mutex_unlock(tsdn, &prof_dump_filename_mtx);
 		/* Everything is still guarded by ctl_mtx. */
-		char *buffer = base_alloc(tsdn, prof_base,
-		    PROF_DUMP_FILENAME_LEN, QUANTUM);
+		char *buffer = base_alloc(
+		    tsdn, prof_base, PROF_DUMP_FILENAME_LEN, QUANTUM);
 		if (buffer == NULL) {
 			return true;
 		}
@@ -1018,7 +1028,8 @@ prof_mdump_impl(tsd_t *tsd, const char *filename) {
 		/* No filename specified, so automatically generate one. */
 		malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
 		if (prof_prefix_get(tsd_tsdn(tsd))[0] == '\0') {
-			malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_filename_mtx);
+			malloc_mutex_unlock(
+			    tsd_tsdn(tsd), &prof_dump_filename_mtx);
 			return true;
 		}
 		prof_dump_filename(tsd, filename_buf, 'm', prof_dump_mseq);
diff --git a/src/prof_threshold.c b/src/prof_threshold.c
index 0b5cb53c..5b72a491 100644
--- a/src/prof_threshold.c
+++ b/src/prof_threshold.c
@@ -22,8 +22,8 @@ prof_threshold_hook_set(prof_threshold_hook_t hook) {
 
 prof_threshold_hook_t
 prof_threshold_hook_get(void) {
-	return (prof_threshold_hook_t)atomic_load_p(&prof_threshold_hook,
-	    ATOMIC_ACQUIRE);
+	return (prof_threshold_hook_t)atomic_load_p(
+	    &prof_threshold_hook, ATOMIC_ACQUIRE);
 }
 
 /* Invoke callback for threshold reached */
@@ -32,10 +32,10 @@ prof_threshold_update(tsd_t *tsd) {
 	prof_threshold_hook_t prof_threshold_hook = prof_threshold_hook_get();
 	if (prof_threshold_hook == NULL) {
 		return;
-        }
+	}
 	uint64_t alloc = tsd_thread_allocated_get(tsd);
 	uint64_t dalloc = tsd_thread_deallocated_get(tsd);
-	peak_t *peak = tsd_peakp_get(tsd);
+	peak_t  *peak = tsd_peakp_get(tsd);
 	pre_reentrancy(tsd, NULL);
 	prof_threshold_hook(alloc, dalloc, peak->cur_max);
 	post_reentrancy(tsd);
@@ -62,8 +62,8 @@ prof_threshold_enabled(void) {
 }
 
 te_base_cb_t prof_threshold_te_handler = {
-	.enabled = &prof_threshold_enabled,
-	.new_event_wait = &prof_threshold_new_event_wait,
-	.postponed_event_wait = &prof_threshold_postponed_event_wait,
-	.event_handler = &prof_threshold_event_handler,
+    .enabled = &prof_threshold_enabled,
+    .new_event_wait = &prof_threshold_new_event_wait,
+    .postponed_event_wait = &prof_threshold_postponed_event_wait,
+    .event_handler = &prof_threshold_event_handler,
 };
diff --git a/src/psset.c b/src/psset.c
index afe9f1c1..509df064 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -32,16 +32,16 @@ psset_stats_accum(psset_stats_t *dst, psset_stats_t *src) {
 	psset_bin_stats_accum(&dst->merged, &src->merged);
 	for (int huge = 0; huge < PSSET_NHUGE; huge++) {
 		psset_bin_stats_accum(&dst->slabs[huge], &src->slabs[huge]);
-		psset_bin_stats_accum(&dst->full_slabs[huge],
-		    &src->full_slabs[huge]);
-		psset_bin_stats_accum(&dst->empty_slabs[huge],
-		    &src->empty_slabs[huge]);
+		psset_bin_stats_accum(
+		    &dst->full_slabs[huge], &src->full_slabs[huge]);
+		psset_bin_stats_accum(
+		    &dst->empty_slabs[huge], &src->empty_slabs[huge]);
 	}
 	for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
-		psset_bin_stats_accum(&dst->nonfull_slabs[i][0],
-		    &src->nonfull_slabs[i][0]);
-		psset_bin_stats_accum(&dst->nonfull_slabs[i][1],
-		    &src->nonfull_slabs[i][1]);
+		psset_bin_stats_accum(
+		    &dst->nonfull_slabs[i][0], &src->nonfull_slabs[i][0]);
+		psset_bin_stats_accum(
+		    &dst->nonfull_slabs[i][1], &src->nonfull_slabs[i][1]);
 	}
 }
 
@@ -83,10 +83,10 @@ psset_slab_stats_insert_remove(psset_stats_t *stats,
 	if (config_debug) {
 		psset_bin_stats_t check_stats[PSSET_NHUGE] = {{0}};
 		for (int huge = 0; huge < PSSET_NHUGE; huge++) {
-			psset_bin_stats_accum(&check_stats[huge],
-			    &stats->full_slabs[huge]);
-			psset_bin_stats_accum(&check_stats[huge],
-			    &stats->empty_slabs[huge]);
+			psset_bin_stats_accum(
+			    &check_stats[huge], &stats->full_slabs[huge]);
+			psset_bin_stats_accum(
+			    &check_stats[huge], &stats->empty_slabs[huge]);
 			for (pszind_t pind = 0; pind < PSSET_NPSIZES; pind++) {
 				psset_bin_stats_accum(&check_stats[huge],
 				    &stats->nonfull_slabs[pind][huge]);
@@ -112,14 +112,14 @@ psset_slab_stats_insert_remove(psset_stats_t *stats,
 }
 
 static void
-psset_slab_stats_insert(psset_stats_t *stats, psset_bin_stats_t *binstats,
-    hpdata_t *ps) {
+psset_slab_stats_insert(
+    psset_stats_t *stats, psset_bin_stats_t *binstats, hpdata_t *ps) {
 	psset_slab_stats_insert_remove(stats, binstats, ps, true);
 }
 
 static void
-psset_slab_stats_remove(psset_stats_t *stats, psset_bin_stats_t *binstats,
-    hpdata_t *ps) {
+psset_slab_stats_remove(
+    psset_stats_t *stats, psset_bin_stats_t *binstats, hpdata_t *ps) {
 	psset_slab_stats_insert_remove(stats, binstats, ps, false);
 }
 
@@ -127,9 +127,9 @@ static pszind_t
 psset_hpdata_heap_index(const hpdata_t *ps) {
 	assert(!hpdata_full(ps));
 	assert(!hpdata_empty(ps));
-	size_t longest_free_range = hpdata_longest_free_range_get(ps);
-	pszind_t pind = sz_psz2ind(sz_psz_quantize_floor(
-	    longest_free_range << LG_PAGE));
+	size_t   longest_free_range = hpdata_longest_free_range_get(ps);
+	pszind_t pind = sz_psz2ind(
+	    sz_psz_quantize_floor(longest_free_range << LG_PAGE));
 	assert(pind < PSSET_NPSIZES);
 	return pind;
 }
@@ -161,8 +161,8 @@ psset_stats_insert(psset_t *psset, hpdata_t *ps) {
 		psset_slab_stats_insert(stats, psset->stats.full_slabs, ps);
 	} else {
 		pszind_t pind = psset_hpdata_heap_index(ps);
-		psset_slab_stats_insert(stats, psset->stats.nonfull_slabs[pind],
-		    ps);
+		psset_slab_stats_insert(
+		    stats, psset->stats.nonfull_slabs[pind], ps);
 	}
 }
 
@@ -175,8 +175,8 @@ psset_stats_remove(psset_t *psset, hpdata_t *ps) {
 		psset_slab_stats_remove(stats, psset->stats.full_slabs, ps);
 	} else {
 		pszind_t pind = psset_hpdata_heap_index(ps);
-		psset_slab_stats_remove(stats, psset->stats.nonfull_slabs[pind],
-		    ps);
+		psset_slab_stats_remove(
+		    stats, psset->stats.nonfull_slabs[pind], ps);
 	}
 }
 
@@ -264,7 +264,7 @@ psset_maybe_remove_purge_list(psset_t *psset, hpdata_t *ps) {
 	 * purge LRU within a given dirtiness bucket.
 	 */
 	if (hpdata_purge_allowed_get(ps)) {
-		size_t ind = psset_purge_list_ind(ps);
+		size_t               ind = psset_purge_list_ind(ps);
 		hpdata_purge_list_t *purge_list = &psset->to_purge[ind];
 		hpdata_purge_list_remove(purge_list, ps);
 		if (hpdata_purge_list_empty(purge_list)) {
@@ -276,14 +276,13 @@ psset_maybe_remove_purge_list(psset_t *psset, hpdata_t *ps) {
 static void
 psset_maybe_insert_purge_list(psset_t *psset, hpdata_t *ps) {
 	if (hpdata_purge_allowed_get(ps)) {
-		size_t ind = psset_purge_list_ind(ps);
+		size_t               ind = psset_purge_list_ind(ps);
 		hpdata_purge_list_t *purge_list = &psset->to_purge[ind];
 		if (hpdata_purge_list_empty(purge_list)) {
 			fb_set(psset->purge_bitmap, PSSET_NPURGE_LISTS, ind);
 		}
 		hpdata_purge_list_append(purge_list, ps);
 	}
-
 }
 
 void
@@ -343,13 +342,13 @@ psset_enumerate_search(psset_t *psset, pszind_t pind, size_t size) {
 		return NULL;
 	}
 
-	hpdata_t *ps = NULL;
+	hpdata_t                          *ps = NULL;
 	hpdata_age_heap_enumerate_helper_t helper;
 	hpdata_age_heap_enumerate_prepare(&psset->pageslabs[pind], &helper,
 	    PSSET_ENUMERATE_MAX_NUM, sizeof(helper.bfs_queue) / sizeof(void *));
 
-	while ((ps = hpdata_age_heap_enumerate_next(&psset->pageslabs[pind],
-	    &helper))) {
+	while ((ps = hpdata_age_heap_enumerate_next(
+	            &psset->pageslabs[pind], &helper))) {
 		if (hpdata_longest_free_range_get(ps) >= size) {
 			return ps;
 		}
@@ -363,7 +362,7 @@ psset_pick_alloc(psset_t *psset, size_t size) {
 	assert((size & PAGE_MASK) == 0);
 	assert(size <= HUGEPAGE);
 
-	pszind_t min_pind = sz_psz2ind(sz_psz_quantize_ceil(size));
+	pszind_t  min_pind = sz_psz2ind(sz_psz_quantize_ceil(size));
 	hpdata_t *ps = NULL;
 
 	/* See comments in eset_first_fit for why we enumerate search below. */
@@ -375,8 +374,8 @@ psset_pick_alloc(psset_t *psset, size_t size) {
 		}
 	}
 
-	pszind_t pind = (pszind_t)fb_ffs(psset->pageslab_bitmap, PSSET_NPSIZES,
-	    (size_t)min_pind);
+	pszind_t pind = (pszind_t)fb_ffs(
+	    psset->pageslab_bitmap, PSSET_NPSIZES, (size_t)min_pind);
 	if (pind == PSSET_NPSIZES) {
 		return hpdata_empty_list_first(&psset->empty);
 	}
@@ -392,8 +391,8 @@ psset_pick_alloc(psset_t *psset, size_t size) {
 
 hpdata_t *
 psset_pick_purge(psset_t *psset) {
-	ssize_t ind_ssz = fb_fls(psset->purge_bitmap, PSSET_NPURGE_LISTS,
-	    PSSET_NPURGE_LISTS - 1);
+	ssize_t ind_ssz = fb_fls(
+	    psset->purge_bitmap, PSSET_NPURGE_LISTS, PSSET_NPURGE_LISTS - 1);
 	if (ind_ssz < 0) {
 		return NULL;
 	}
diff --git a/src/rtree.c b/src/rtree.c
index b6ac04b7..ac27f829 100644
--- a/src/rtree.c
+++ b/src/rtree.c
@@ -20,7 +20,7 @@ rtree_new(rtree_t *rtree, base_t *base, bool zeroed) {
 	rtree->base = base;
 
 	if (malloc_mutex_init(&rtree->init_lock, "rtree", WITNESS_RANK_RTREE,
-	    malloc_mutex_rank_exclusive)) {
+	        malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 
@@ -29,19 +29,19 @@ rtree_new(rtree_t *rtree, base_t *base, bool zeroed) {
 
 static rtree_node_elm_t *
 rtree_node_alloc(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
-	return (rtree_node_elm_t *)base_alloc_rtree(tsdn, rtree->base,
-	    nelms * sizeof(rtree_node_elm_t));
+	return (rtree_node_elm_t *)base_alloc_rtree(
+	    tsdn, rtree->base, nelms * sizeof(rtree_node_elm_t));
 }
 
 static rtree_leaf_elm_t *
 rtree_leaf_alloc(tsdn_t *tsdn, rtree_t *rtree, size_t nelms) {
-	return (rtree_leaf_elm_t *)base_alloc_rtree(tsdn, rtree->base,
-	    nelms * sizeof(rtree_leaf_elm_t));
+	return (rtree_leaf_elm_t *)base_alloc_rtree(
+	    tsdn, rtree->base, nelms * sizeof(rtree_leaf_elm_t));
 }
 
 static rtree_node_elm_t *
-rtree_node_init(tsdn_t *tsdn, rtree_t *rtree, unsigned level,
-    atomic_p_t *elmp) {
+rtree_node_init(
+    tsdn_t *tsdn, rtree_t *rtree, unsigned level, atomic_p_t *elmp) {
 	malloc_mutex_lock(tsdn, &rtree->init_lock);
 	/*
 	 * If *elmp is non-null, then it was initialized with the init lock
@@ -49,8 +49,8 @@ rtree_node_init(tsdn_t *tsdn, rtree_t *rtree, unsigned level,
 	 */
 	rtree_node_elm_t *node = atomic_load_p(elmp, ATOMIC_RELAXED);
 	if (node == NULL) {
-		node = rtree_node_alloc(tsdn, rtree, ZU(1) <<
-		    rtree_levels[level].bits);
+		node = rtree_node_alloc(
+		    tsdn, rtree, ZU(1) << rtree_levels[level].bits);
 		if (node == NULL) {
 			malloc_mutex_unlock(tsdn, &rtree->init_lock);
 			return NULL;
@@ -75,8 +75,8 @@ rtree_leaf_init(tsdn_t *tsdn, rtree_t *rtree, atomic_p_t *elmp) {
 	 */
 	rtree_leaf_elm_t *leaf = atomic_load_p(elmp, ATOMIC_RELAXED);
 	if (leaf == NULL) {
-		leaf = rtree_leaf_alloc(tsdn, rtree, ZU(1) <<
-		    rtree_levels[RTREE_HEIGHT-1].bits);
+		leaf = rtree_leaf_alloc(
+		    tsdn, rtree, ZU(1) << rtree_levels[RTREE_HEIGHT - 1].bits);
 		if (leaf == NULL) {
 			malloc_mutex_unlock(tsdn, &rtree->init_lock);
 			return NULL;
@@ -107,11 +107,11 @@ rtree_child_node_tryread(rtree_node_elm_t *elm, bool dependent) {
 	rtree_node_elm_t *node;
 
 	if (dependent) {
-		node = (rtree_node_elm_t *)atomic_load_p(&elm->child,
-		    ATOMIC_RELAXED);
+		node = (rtree_node_elm_t *)atomic_load_p(
+		    &elm->child, ATOMIC_RELAXED);
 	} else {
-		node = (rtree_node_elm_t *)atomic_load_p(&elm->child,
-		    ATOMIC_ACQUIRE);
+		node = (rtree_node_elm_t *)atomic_load_p(
+		    &elm->child, ATOMIC_ACQUIRE);
 	}
 
 	assert(!dependent || node != NULL);
@@ -136,11 +136,11 @@ rtree_child_leaf_tryread(rtree_node_elm_t *elm, bool dependent) {
 	rtree_leaf_elm_t *leaf;
 
 	if (dependent) {
-		leaf = (rtree_leaf_elm_t *)atomic_load_p(&elm->child,
-		    ATOMIC_RELAXED);
+		leaf = (rtree_leaf_elm_t *)atomic_load_p(
+		    &elm->child, ATOMIC_RELAXED);
 	} else {
-		leaf = (rtree_leaf_elm_t *)atomic_load_p(&elm->child,
-		    ATOMIC_ACQUIRE);
+		leaf = (rtree_leaf_elm_t *)atomic_load_p(
+		    &elm->child, ATOMIC_ACQUIRE);
 	}
 
 	assert(!dependent || leaf != NULL);
@@ -181,53 +181,54 @@ rtree_leaf_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 		}
 	}
 
-#define RTREE_GET_CHILD(level) {					\
-		assert(level < RTREE_HEIGHT-1);				\
-		if (level != 0 && !dependent &&				\
-		    unlikely(!rtree_node_valid(node))) {		\
-			return NULL;					\
-		}							\
-		uintptr_t subkey = rtree_subkey(key, level);		\
-		if (level + 2 < RTREE_HEIGHT) {				\
-			node = init_missing ?				\
-			    rtree_child_node_read(tsdn, rtree,		\
-			    &node[subkey], level, dependent) :		\
-			    rtree_child_node_tryread(&node[subkey],	\
-			    dependent);					\
-		} else {						\
-			leaf = init_missing ?				\
-			    rtree_child_leaf_read(tsdn, rtree,		\
-			    &node[subkey], level, dependent) :		\
-			    rtree_child_leaf_tryread(&node[subkey],	\
-			    dependent);					\
-		}							\
+#define RTREE_GET_CHILD(level)                                                 \
+	{                                                                      \
+		assert(level < RTREE_HEIGHT - 1);                              \
+		if (level != 0 && !dependent                                   \
+		    && unlikely(!rtree_node_valid(node))) {                    \
+			return NULL;                                           \
+		}                                                              \
+		uintptr_t subkey = rtree_subkey(key, level);                   \
+		if (level + 2 < RTREE_HEIGHT) {                                \
+			node = init_missing                                    \
+			    ? rtree_child_node_read(tsdn, rtree,               \
+			          &node[subkey], level, dependent)             \
+			    : rtree_child_node_tryread(                        \
+			          &node[subkey], dependent);                   \
+		} else {                                                       \
+			leaf = init_missing                                    \
+			    ? rtree_child_leaf_read(tsdn, rtree,               \
+			          &node[subkey], level, dependent)             \
+			    : rtree_child_leaf_tryread(                        \
+			          &node[subkey], dependent);                   \
+		}                                                              \
 	}
 	/*
 	 * Cache replacement upon hard lookup (i.e. L1 & L2 rtree cache miss):
 	 * (1) evict last entry in L2 cache; (2) move the collision slot from L1
 	 * cache down to L2; and 3) fill L1.
 	 */
-#define RTREE_GET_LEAF(level) {						\
-		assert(level == RTREE_HEIGHT-1);			\
-		if (!dependent && unlikely(!rtree_leaf_valid(leaf))) {	\
-			return NULL;					\
-		}							\
-		if (RTREE_CTX_NCACHE_L2 > 1) {				\
-			memmove(&rtree_ctx->l2_cache[1],		\
-			    &rtree_ctx->l2_cache[0],			\
-			    sizeof(rtree_ctx_cache_elm_t) *		\
-			    (RTREE_CTX_NCACHE_L2 - 1));			\
-		}							\
-		size_t slot = rtree_cache_direct_map(key);		\
-		rtree_ctx->l2_cache[0].leafkey =			\
-		    rtree_ctx->cache[slot].leafkey;			\
-		rtree_ctx->l2_cache[0].leaf =				\
-		    rtree_ctx->cache[slot].leaf;			\
-		uintptr_t leafkey = rtree_leafkey(key);			\
-		rtree_ctx->cache[slot].leafkey = leafkey;		\
-		rtree_ctx->cache[slot].leaf = leaf;			\
-		uintptr_t subkey = rtree_subkey(key, level);		\
-		return &leaf[subkey];					\
+#define RTREE_GET_LEAF(level)                                                  \
+	{                                                                      \
+		assert(level == RTREE_HEIGHT - 1);                             \
+		if (!dependent && unlikely(!rtree_leaf_valid(leaf))) {         \
+			return NULL;                                           \
+		}                                                              \
+		if (RTREE_CTX_NCACHE_L2 > 1) {                                 \
+			memmove(&rtree_ctx->l2_cache[1],                       \
+			    &rtree_ctx->l2_cache[0],                           \
+			    sizeof(rtree_ctx_cache_elm_t)                      \
+			        * (RTREE_CTX_NCACHE_L2 - 1));                  \
+		}                                                              \
+		size_t slot = rtree_cache_direct_map(key);                     \
+		rtree_ctx->l2_cache[0].leafkey =                               \
+		    rtree_ctx->cache[slot].leafkey;                            \
+		rtree_ctx->l2_cache[0].leaf = rtree_ctx->cache[slot].leaf;     \
+		uintptr_t leafkey = rtree_leafkey(key);                        \
+		rtree_ctx->cache[slot].leafkey = leafkey;                      \
+		rtree_ctx->cache[slot].leaf = leaf;                            \
+		uintptr_t subkey = rtree_subkey(key, level);                   \
+		return &leaf[subkey];                                          \
 	}
 	if (RTREE_HEIGHT > 1) {
 		RTREE_GET_CHILD(0)
@@ -236,11 +237,11 @@ rtree_leaf_elm_lookup_hard(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx,
 		RTREE_GET_CHILD(1)
 	}
 	if (RTREE_HEIGHT > 3) {
-		for (unsigned i = 2; i < RTREE_HEIGHT-1; i++) {
+		for (unsigned i = 2; i < RTREE_HEIGHT - 1; i++) {
 			RTREE_GET_CHILD(i)
 		}
 	}
-	RTREE_GET_LEAF(RTREE_HEIGHT-1)
+	RTREE_GET_LEAF(RTREE_HEIGHT - 1)
 #undef RTREE_GET_CHILD
 #undef RTREE_GET_LEAF
 	not_reached();
diff --git a/src/safety_check.c b/src/safety_check.c
index d3f68fbc..d052718d 100644
--- a/src/safety_check.c
+++ b/src/safety_check.c
@@ -3,20 +3,24 @@
 
 static safety_check_abort_hook_t safety_check_abort;
 
-void safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr,
+void
+safety_check_fail_sized_dealloc(bool current_dealloc, const void *ptr,
     size_t true_size, size_t input_size) {
-	char *src = current_dealloc ? "the current pointer being freed" :
-	    "in thread cache, possibly from previous deallocations";
+	char *src = current_dealloc
+	    ? "the current pointer being freed"
+	    : "in thread cache, possibly from previous deallocations";
 	char *suggest_debug_build = config_debug ? "" : " --enable-debug or";
 
-	safety_check_fail("<jemalloc>: size mismatch detected (true size %zu "
+	safety_check_fail(
+	    "<jemalloc>: size mismatch detected (true size %zu "
 	    "vs input size %zu), likely caused by application sized "
 	    "deallocation bugs (source address: %p, %s). Suggest building with"
 	    "%s address sanitizer for debugging. Abort.\n",
 	    true_size, input_size, ptr, src, suggest_debug_build);
 }
 
-void safety_check_set_abort(safety_check_abort_hook_t abort_fn) {
+void
+safety_check_set_abort(safety_check_abort_hook_t abort_fn) {
 	safety_check_abort = abort_fn;
 }
 
@@ -25,7 +29,8 @@ void safety_check_set_abort(safety_check_abort_hook_t abort_fn) {
  * because there are cases only logging crash stack traces.
  */
 static void
-safety_check_detected_heap_corruption___run_address_sanitizer_build_to_debug(const char *buf) {
+safety_check_detected_heap_corruption___run_address_sanitizer_build_to_debug(
+    const char *buf) {
 	if (safety_check_abort == NULL) {
 		malloc_write(buf);
 		abort();
@@ -34,7 +39,8 @@ safety_check_detected_heap_corruption___run_address_sanitizer_build_to_debug(con
 	}
 }
 
-void safety_check_fail(const char *format, ...) {
+void
+safety_check_fail(const char *format, ...) {
 	char buf[MALLOC_PRINTF_BUFSIZE];
 
 	va_list ap;
@@ -42,5 +48,6 @@ void safety_check_fail(const char *format, ...) {
 	malloc_vsnprintf(buf, MALLOC_PRINTF_BUFSIZE, format, ap);
 	va_end(ap);
 
-	safety_check_detected_heap_corruption___run_address_sanitizer_build_to_debug(buf);
+	safety_check_detected_heap_corruption___run_address_sanitizer_build_to_debug(
+	    buf);
 }
diff --git a/src/san.c b/src/san.c
index 28ea3d7c..5448c67f 100644
--- a/src/san.c
+++ b/src/san.c
@@ -20,8 +20,8 @@ ssize_t opt_lg_san_uaf_align = SAN_LG_UAF_ALIGN_DEFAULT;
 uintptr_t san_cache_bin_nonfast_mask = SAN_CACHE_BIN_NONFAST_MASK_DEFAULT;
 
 static inline void
-san_find_guarded_addr(edata_t *edata, void **guard1, void **guard2,
-    void **addr, size_t size, bool left, bool right) {
+san_find_guarded_addr(edata_t *edata, void **guard1, void **guard2, void **addr,
+    size_t size, bool left, bool right) {
 	assert(!edata_guarded_get(edata));
 	assert(size % PAGE == 0);
 	*addr = edata_base_get(edata);
@@ -74,8 +74,8 @@ san_guard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap,
 	    : san_one_side_unguarded_sz(size_with_guards);
 
 	void *guard1, *guard2, *addr;
-	san_find_guarded_addr(edata, &guard1, &guard2, &addr, usize, left,
-	    right);
+	san_find_guarded_addr(
+	    edata, &guard1, &guard2, &addr, usize, left, right);
 
 	assert(edata_state_get(edata) == extent_state_active);
 	ehooks_guard(tsdn, ehooks, guard1, guard2);
@@ -109,8 +109,8 @@ san_unguard_pages_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 	    : san_one_side_guarded_sz(size);
 
 	void *guard1, *guard2, *addr;
-	san_find_unguarded_addr(edata, &guard1, &guard2, &addr, size, left,
-	    right);
+	san_find_unguarded_addr(
+	    edata, &guard1, &guard2, &addr, size, left, right);
 
 	ehooks_unguard(tsdn, ehooks, (void *)guard1, (void *)guard2);
 
@@ -130,15 +130,15 @@ san_unguard_pages_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 }
 
 void
-san_unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    emap_t *emap, bool left, bool right) {
+san_unguard_pages(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap,
+    bool left, bool right) {
 	san_unguard_pages_impl(tsdn, ehooks, edata, emap, left, right,
 	    /* remap */ true);
 }
 
 void
-san_unguard_pages_pre_destroy(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    emap_t *emap) {
+san_unguard_pages_pre_destroy(
+    tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata, emap_t *emap) {
 	emap_assert_not_mapped(tsdn, emap, edata);
 	/*
 	 * We don't want to touch the emap of about to be destroyed extents, as
@@ -146,7 +146,7 @@ san_unguard_pages_pre_destroy(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 	 * we unguard the extents to the right, because retained extents only
 	 * own their right guard page per san_bump_alloc's logic.
 	 */
-	 san_unguard_pages_impl(tsdn, ehooks, edata, emap, /* left */ false,
+	san_unguard_pages_impl(tsdn, ehooks, edata, emap, /* left */ false,
 	    /* right */ true, /* remap */ false);
 }
 
@@ -163,9 +163,9 @@ san_stashed_corrupted(void *ptr, size_t size) {
 
 	void *first, *mid, *last;
 	san_junk_ptr_locations(ptr, size, &first, &mid, &last);
-	if (*(uintptr_t *)first != uaf_detect_junk ||
-	    *(uintptr_t *)mid != uaf_detect_junk ||
-	    *(uintptr_t *)last != uaf_detect_junk) {
+	if (*(uintptr_t *)first != uaf_detect_junk
+	    || *(uintptr_t *)mid != uaf_detect_junk
+	    || *(uintptr_t *)last != uaf_detect_junk) {
 		return true;
 	}
 
@@ -183,7 +183,8 @@ san_check_stashed_ptrs(void **ptrs, size_t nstashed, size_t usize) {
 		assert(stashed != NULL);
 		assert(cache_bin_nonfast_aligned(stashed));
 		if (unlikely(san_stashed_corrupted(stashed, usize))) {
-			safety_check_fail("<jemalloc>: Write-after-free "
+			safety_check_fail(
+			    "<jemalloc>: Write-after-free "
 			    "detected on deallocated pointer %p (size %zu).\n",
 			    stashed, usize);
 		}
diff --git a/src/san_bump.c b/src/san_bump.c
index 88897455..09ed18ca 100644
--- a/src/san_bump.c
+++ b/src/san_bump.c
@@ -7,30 +7,29 @@
 #include "jemalloc/internal/ehooks.h"
 #include "jemalloc/internal/edata_cache.h"
 
-static bool
-san_bump_grow_locked(tsdn_t *tsdn, san_bump_alloc_t *sba, pac_t *pac,
-    ehooks_t *ehooks, size_t size);
+static bool san_bump_grow_locked(tsdn_t *tsdn, san_bump_alloc_t *sba,
+    pac_t *pac, ehooks_t *ehooks, size_t size);
 
 edata_t *
-san_bump_alloc(tsdn_t *tsdn, san_bump_alloc_t* sba, pac_t *pac,
+san_bump_alloc(tsdn_t *tsdn, san_bump_alloc_t *sba, pac_t *pac,
     ehooks_t *ehooks, size_t size, bool zero) {
 	assert(san_bump_enabled());
 
-	edata_t* to_destroy;
-	size_t guarded_size = san_one_side_guarded_sz(size);
+	edata_t *to_destroy;
+	size_t   guarded_size = san_one_side_guarded_sz(size);
 
 	malloc_mutex_lock(tsdn, &sba->mtx);
 
-	if (sba->curr_reg == NULL ||
-	    edata_size_get(sba->curr_reg) < guarded_size) {
+	if (sba->curr_reg == NULL
+	    || edata_size_get(sba->curr_reg) < guarded_size) {
 		/*
 		 * If the current region can't accommodate the allocation,
 		 * try replacing it with a larger one and destroy current if the
 		 * replacement succeeds.
 		 */
 		to_destroy = sba->curr_reg;
-		bool err = san_bump_grow_locked(tsdn, sba, pac, ehooks,
-		    guarded_size);
+		bool err = san_bump_grow_locked(
+		    tsdn, sba, pac, ehooks, guarded_size);
 		if (err) {
 			goto label_err;
 		}
@@ -40,9 +39,9 @@ san_bump_alloc(tsdn_t *tsdn, san_bump_alloc_t* sba, pac_t *pac,
 	assert(guarded_size <= edata_size_get(sba->curr_reg));
 	size_t trail_size = edata_size_get(sba->curr_reg) - guarded_size;
 
-	edata_t* edata;
+	edata_t *edata;
 	if (trail_size != 0) {
-		edata_t* curr_reg_trail = extent_split_wrapper(tsdn, pac,
+		edata_t *curr_reg_trail = extent_split_wrapper(tsdn, pac,
 		    ehooks, sba->curr_reg, guarded_size, trail_size,
 		    /* holding_core_locks */ true);
 		if (curr_reg_trail == NULL) {
@@ -69,9 +68,8 @@ san_bump_alloc(tsdn_t *tsdn, san_bump_alloc_t* sba, pac_t *pac,
 	    /* right */ true, /* remap */ true);
 
 	if (extent_commit_zero(tsdn, ehooks, edata, /* commit */ true, zero,
-	    /* growing_retained */ false)) {
-		extent_record(tsdn, pac, ehooks, &pac->ecache_retained,
-		    edata);
+	        /* growing_retained */ false)) {
+		extent_record(tsdn, pac, ehooks, &pac->ecache_retained, edata);
 		return NULL;
 	}
 
@@ -90,9 +88,10 @@ san_bump_grow_locked(tsdn_t *tsdn, san_bump_alloc_t *sba, pac_t *pac,
     ehooks_t *ehooks, size_t size) {
 	malloc_mutex_assert_owner(tsdn, &sba->mtx);
 
-	bool committed = false, zeroed = false;
-	size_t alloc_size = size > SBA_RETAINED_ALLOC_SIZE ? size :
-	    SBA_RETAINED_ALLOC_SIZE;
+	bool   committed = false, zeroed = false;
+	size_t alloc_size = size > SBA_RETAINED_ALLOC_SIZE
+	    ? size
+	    : SBA_RETAINED_ALLOC_SIZE;
 	assert((alloc_size & PAGE_MASK) == 0);
 	sba->curr_reg = extent_alloc_wrapper(tsdn, pac, ehooks, NULL,
 	    alloc_size, PAGE, zeroed, &committed,
diff --git a/src/sc.c b/src/sc.c
index e4a94d89..014ab95d 100644
--- a/src/sc.c
+++ b/src/sc.c
@@ -27,7 +27,7 @@ slab_size(int lg_page, int lg_base, int lg_delta, int ndelta) {
 	size_t try_slab_size = page;
 	size_t try_nregs = try_slab_size / reg_size;
 	size_t perfect_slab_size = 0;
-	bool perfect = false;
+	bool   perfect = false;
 	/*
 	 * This loop continues until we find the least common multiple of the
 	 * page size and size class size.  Size classes are all of the form
@@ -106,7 +106,7 @@ size_classes(
 	/* Outputs that we update as we go. */
 	size_t lookup_maxclass = 0;
 	size_t small_maxclass = 0;
-	int lg_large_minclass = 0;
+	int    lg_large_minclass = 0;
 	size_t large_maxclass = 0;
 
 	/* Tiny size classes. */
@@ -209,7 +209,7 @@ size_classes(
 		lg_delta++;
 	}
 	/* Additional outputs. */
-	int nsizes = index;
+	int      nsizes = index;
 	unsigned lg_ceil_nsizes = lg_ceil(nsizes);
 
 	/* Fill in the output data. */
@@ -292,8 +292,8 @@ sc_data_update_slab_size(sc_data_t *data, size_t begin, size_t end, int pgs) {
 		if (!sc->bin) {
 			break;
 		}
-		size_t reg_size = reg_size_compute(sc->lg_base, sc->lg_delta,
-		    sc->ndelta);
+		size_t reg_size = reg_size_compute(
+		    sc->lg_base, sc->lg_delta, sc->ndelta);
 		if (begin <= reg_size && reg_size <= end) {
 			sc_data_update_sc_slab_size(sc, reg_size, pgs);
 		}
diff --git a/src/sec.c b/src/sec.c
index 67585a71..36cd2dcc 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -6,12 +6,12 @@
 static edata_t *sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t alignment, bool zero, bool guarded, bool frequent_reuse,
     bool *deferred_work_generated);
-static bool sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
-static bool sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool *deferred_work_generated);
-static void sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    bool *deferred_work_generated);
+static bool     sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+        size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
+static bool     sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
+        size_t old_size, size_t new_size, bool *deferred_work_generated);
+static void     sec_dalloc(
+        tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated);
 
 static void
 sec_bin_init(sec_bin_t *bin) {
@@ -29,16 +29,16 @@ sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, pai_t *fallback,
 	 * USIZE_GROW_SLOW_THRESHOLD because the usize above this increases
 	 * by PAGE and the number of usizes is too large.
 	 */
-	assert(!sz_large_size_classes_disabled() ||
-	    opts->max_alloc <= USIZE_GROW_SLOW_THRESHOLD);
+	assert(!sz_large_size_classes_disabled()
+	    || opts->max_alloc <= USIZE_GROW_SLOW_THRESHOLD);
 
-	size_t max_alloc = PAGE_FLOOR(opts->max_alloc);
+	size_t   max_alloc = PAGE_FLOOR(opts->max_alloc);
 	pszind_t npsizes = sz_psz2ind(max_alloc) + 1;
 
 	size_t sz_shards = opts->nshards * sizeof(sec_shard_t);
 	size_t sz_bins = opts->nshards * (size_t)npsizes * sizeof(sec_bin_t);
 	size_t sz_alloc = sz_shards + sz_bins;
-	void *dynalloc = base_alloc(tsdn, base, sz_alloc, CACHELINE);
+	void  *dynalloc = base_alloc(tsdn, base, sz_alloc, CACHELINE);
 	if (dynalloc == NULL) {
 		return true;
 	}
@@ -74,7 +74,6 @@ sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, pai_t *fallback,
 	assert((char *)bin_cur == ((char *)dynalloc + sz_alloc));
 	sec->fallback = fallback;
 
-
 	sec->opts = *opts;
 	sec->npsizes = npsizes;
 
@@ -102,7 +101,7 @@ sec_shard_pick(tsdn_t *tsdn, sec_t *sec) {
 	if (tsdn_null(tsdn)) {
 		return &sec->shards[0];
 	}
-	tsd_t *tsd = tsdn_tsd(tsdn);
+	tsd_t   *tsd = tsdn_tsd(tsdn);
 	uint8_t *idxp = tsd_sec_shardp_get(tsd);
 	if (*idxp == (uint8_t)-1) {
 		/*
@@ -111,9 +110,10 @@ sec_shard_pick(tsdn_t *tsdn, sec_t *sec) {
 		 * number to store 32 bits, since we'll deliberately overflow
 		 * when we multiply by the number of shards.
 		 */
-		uint64_t rand32 = prng_lg_range_u64(tsd_prng_statep_get(tsd), 32);
-		uint32_t idx =
-		    (uint32_t)((rand32 * (uint64_t)sec->opts.nshards) >> 32);
+		uint64_t rand32 = prng_lg_range_u64(
+		    tsd_prng_statep_get(tsd), 32);
+		uint32_t idx = (uint32_t)((rand32 * (uint64_t)sec->opts.nshards)
+		    >> 32);
 		assert(idx < (uint32_t)sec->opts.nshards);
 		*idxp = (uint8_t)idx;
 	}
@@ -157,13 +157,13 @@ sec_flush_some_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
 
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 	bool deferred_work_generated = false;
-	pai_dalloc_batch(tsdn, sec->fallback, &to_flush,
-	    &deferred_work_generated);
+	pai_dalloc_batch(
+	    tsdn, sec->fallback, &to_flush, &deferred_work_generated);
 }
 
 static edata_t *
-sec_shard_alloc_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
-    sec_bin_t *bin) {
+sec_shard_alloc_locked(
+    tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard, sec_bin_t *bin) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	if (!shard->enabled) {
 		return NULL;
@@ -186,7 +186,7 @@ sec_batch_fill_and_alloc(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
 
 	edata_list_active_t result;
 	edata_list_active_init(&result);
-	bool deferred_work_generated = false;
+	bool   deferred_work_generated = false;
 	size_t nalloc = pai_alloc_batch(tsdn, sec->fallback, size,
 	    1 + sec->opts.batch_fill_extra, &result, frequent_reuse,
 	    &deferred_work_generated);
@@ -243,8 +243,8 @@ sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
 	assert(pszind < sec->npsizes);
 
 	sec_shard_t *shard = sec_shard_pick(tsdn, sec);
-	sec_bin_t *bin = &shard->bins[pszind];
-	bool do_batch_fill = false;
+	sec_bin_t   *bin = &shard->bins[pszind];
+	bool         do_batch_fill = false;
 
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	edata_t *edata = sec_shard_alloc_locked(tsdn, sec, shard, bin);
@@ -258,8 +258,8 @@ sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 	if (edata == NULL) {
 		if (do_batch_fill) {
-			edata = sec_batch_fill_and_alloc(tsdn, sec, shard, bin,
-			    size, frequent_reuse);
+			edata = sec_batch_fill_and_alloc(
+			    tsdn, sec, shard, bin, size, frequent_reuse);
 		} else {
 			edata = pai_alloc(tsdn, sec->fallback, size, alignment,
 			    zero, /* guarded */ false, frequent_reuse,
@@ -304,16 +304,16 @@ sec_flush_all_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
 	 * rare pathways.
 	 */
 	bool deferred_work_generated = false;
-	pai_dalloc_batch(tsdn, sec->fallback, &to_flush,
-	    &deferred_work_generated);
+	pai_dalloc_batch(
+	    tsdn, sec->fallback, &to_flush, &deferred_work_generated);
 }
 
 static void
-sec_shard_dalloc_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
-    edata_t *edata) {
+sec_shard_dalloc_and_unlock(
+    tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard, edata_t *edata) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	assert(shard->bytes_cur <= sec->opts.max_bytes);
-	size_t size = edata_size_get(edata);
+	size_t   size = edata_size_get(edata);
 	pszind_t pszind = sz_psz2ind(size);
 	assert(pszind < sec->npsizes);
 	/*
@@ -342,13 +342,12 @@ sec_shard_dalloc_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
 }
 
 static void
-sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    bool *deferred_work_generated) {
+sec_dalloc(
+    tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated) {
 	sec_t *sec = (sec_t *)self;
 	if (sec->opts.nshards == 0
 	    || edata_size_get(edata) > sec->opts.max_alloc) {
-		pai_dalloc(tsdn, sec->fallback, edata,
-		    deferred_work_generated);
+		pai_dalloc(tsdn, sec->fallback, edata, deferred_work_generated);
 		return;
 	}
 	sec_shard_t *shard = sec_shard_pick(tsdn, sec);
@@ -357,8 +356,7 @@ sec_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 		sec_shard_dalloc_and_unlock(tsdn, sec, shard, edata);
 	} else {
 		malloc_mutex_unlock(tsdn, &shard->mtx);
-		pai_dalloc(tsdn, sec->fallback, edata,
-		    deferred_work_generated);
+		pai_dalloc(tsdn, sec->fallback, edata, deferred_work_generated);
 	}
 }
 
@@ -398,12 +396,12 @@ sec_stats_merge(tsdn_t *tsdn, sec_t *sec, sec_stats_t *stats) {
 }
 
 void
-sec_mutex_stats_read(tsdn_t *tsdn, sec_t *sec,
-    mutex_prof_data_t *mutex_prof_data) {
+sec_mutex_stats_read(
+    tsdn_t *tsdn, sec_t *sec, mutex_prof_data_t *mutex_prof_data) {
 	for (size_t i = 0; i < sec->opts.nshards; i++) {
 		malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
-		malloc_mutex_prof_accum(tsdn, mutex_prof_data,
-		    &sec->shards[i].mtx);
+		malloc_mutex_prof_accum(
+		    tsdn, mutex_prof_data, &sec->shards[i].mtx);
 		malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
 	}
 }
diff --git a/src/stats.c b/src/stats.c
index b2a00319..84af3911 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -11,45 +11,49 @@
 
 static const char *const global_mutex_names[mutex_prof_num_global_mutexes] = {
 #define OP(mtx) #mtx,
-	MUTEX_PROF_GLOBAL_MUTEXES
+    MUTEX_PROF_GLOBAL_MUTEXES
 #undef OP
 };
 
 static const char *const arena_mutex_names[mutex_prof_num_arena_mutexes] = {
 #define OP(mtx) #mtx,
-	MUTEX_PROF_ARENA_MUTEXES
+    MUTEX_PROF_ARENA_MUTEXES
 #undef OP
 };
 
-#define CTL_GET(n, v, t) do {						\
-	size_t sz = sizeof(t);						\
-	xmallctl(n, (void *)v, &sz, NULL, 0);				\
-} while (0)
+#define CTL_GET(n, v, t)                                                       \
+	do {                                                                   \
+		size_t sz = sizeof(t);                                         \
+		xmallctl(n, (void *)v, &sz, NULL, 0);                          \
+	} while (0)
 
-#define CTL_LEAF_PREPARE(mib, miblen, name) do {			\
-	assert(miblen < CTL_MAX_DEPTH);					\
-	size_t miblen_new = CTL_MAX_DEPTH;				\
-	xmallctlmibnametomib(mib, miblen, name, &miblen_new);		\
-	assert(miblen_new > miblen);					\
-} while (0)
+#define CTL_LEAF_PREPARE(mib, miblen, name)                                    \
+	do {                                                                   \
+		assert(miblen < CTL_MAX_DEPTH);                                \
+		size_t miblen_new = CTL_MAX_DEPTH;                             \
+		xmallctlmibnametomib(mib, miblen, name, &miblen_new);          \
+		assert(miblen_new > miblen);                                   \
+	} while (0)
 
-#define CTL_LEAF(mib, miblen, leaf, v, t) do {			\
-	assert(miblen < CTL_MAX_DEPTH);					\
-	size_t miblen_new = CTL_MAX_DEPTH;				\
-	size_t sz = sizeof(t);						\
-	xmallctlbymibname(mib, miblen, leaf, &miblen_new, (void *)v,	\
-	    &sz, NULL, 0);						\
-	assert(miblen_new == miblen + 1);				\
-} while (0)
+#define CTL_LEAF(mib, miblen, leaf, v, t)                                      \
+	do {                                                                   \
+		assert(miblen < CTL_MAX_DEPTH);                                \
+		size_t miblen_new = CTL_MAX_DEPTH;                             \
+		size_t sz = sizeof(t);                                         \
+		xmallctlbymibname(                                             \
+		    mib, miblen, leaf, &miblen_new, (void *)v, &sz, NULL, 0);  \
+		assert(miblen_new == miblen + 1);                              \
+	} while (0)
 
-#define CTL_MIB_GET(n, i, v, t, ind) do {				\
-	size_t mib[CTL_MAX_DEPTH];					\
-	size_t miblen = sizeof(mib) / sizeof(size_t);			\
-	size_t sz = sizeof(t);						\
-	xmallctlnametomib(n, mib, &miblen);				\
-	mib[(ind)] = (i);							\
-	xmallctlbymib(mib, miblen, (void *)v, &sz, NULL, 0);		\
-} while (0)
+#define CTL_MIB_GET(n, i, v, t, ind)                                           \
+	do {                                                                   \
+		size_t mib[CTL_MAX_DEPTH];                                     \
+		size_t miblen = sizeof(mib) / sizeof(size_t);                  \
+		size_t sz = sizeof(t);                                         \
+		xmallctlnametomib(n, mib, &miblen);                            \
+		mib[(ind)] = (i);                                              \
+		xmallctlbymib(mib, miblen, (void *)v, &sz, NULL, 0);           \
+	} while (0)
 
 #define CTL_M1_GET(n, i, v, t) CTL_MIB_GET(n, i, v, t, 1)
 #define CTL_M2_GET(n, i, v, t) CTL_MIB_GET(n, i, v, t, 2)
@@ -58,10 +62,10 @@ static const char *const arena_mutex_names[mutex_prof_num_arena_mutexes] = {
 /* Data. */
 
 bool opt_stats_print = false;
-char opt_stats_print_opts[stats_print_tot_num_options+1] = "";
+char opt_stats_print_opts[stats_print_tot_num_options + 1] = "";
 
 int64_t opt_stats_interval = STATS_INTERVAL_DEFAULT;
-char opt_stats_interval_opts[stats_print_tot_num_options+1] = "";
+char    opt_stats_interval_opts[stats_print_tot_num_options + 1] = "";
 
 static counter_accum_t stats_interval_accumulated;
 /* Per thread batch accum size for stats_interval. */
@@ -111,8 +115,8 @@ get_rate_str(uint64_t dividend, uint64_t divisor, char str[6]) {
 static void
 mutex_stats_init_cols(emitter_row_t *row, const char *table_name,
     emitter_col_t *name,
-    emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters],
-    emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters]) {
+    emitter_col_t  col_uint64_t[mutex_prof_num_uint64_t_counters],
+    emitter_col_t  col_uint32_t[mutex_prof_num_uint32_t_counters]) {
 	mutex_prof_uint64_t_counter_ind_t k_uint64_t = 0;
 	mutex_prof_uint32_t_counter_ind_t k_uint32_t = 0;
 
@@ -128,13 +132,13 @@ mutex_stats_init_cols(emitter_row_t *row, const char *table_name,
 
 #define WIDTH_uint32_t 12
 #define WIDTH_uint64_t 16
-#define OP(counter, counter_type, human, derived, base_counter)		\
-	col = &col_##counter_type[k_##counter_type];			\
-	++k_##counter_type;						\
-	emitter_col_init(col, row);					\
-	col->justify = emitter_justify_right;				\
-	col->width = derived ? 8 : WIDTH_##counter_type;		\
-	col->type = emitter_type_title;					\
+#define OP(counter, counter_type, human, derived, base_counter)                \
+	col = &col_##counter_type[k_##counter_type];                           \
+	++k_##counter_type;                                                    \
+	emitter_col_init(col, row);                                            \
+	col->justify = emitter_justify_right;                                  \
+	col->width = derived ? 8 : WIDTH_##counter_type;                       \
+	col->type = emitter_type_title;                                        \
 	col->str_val = human;
 	MUTEX_PROF_COUNTERS
 #undef OP
@@ -146,9 +150,9 @@ mutex_stats_init_cols(emitter_row_t *row, const char *table_name,
 static void
 mutex_stats_read_global(size_t mib[], size_t miblen, const char *name,
     emitter_col_t *col_name,
-    emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters],
-    emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters],
-    uint64_t uptime) {
+    emitter_col_t  col_uint64_t[mutex_prof_num_uint64_t_counters],
+    emitter_col_t  col_uint32_t[mutex_prof_num_uint32_t_counters],
+    uint64_t       uptime) {
 	CTL_LEAF_PREPARE(mib, miblen, name);
 	size_t miblen_name = miblen + 1;
 
@@ -157,18 +161,17 @@ mutex_stats_read_global(size_t mib[], size_t miblen, const char *name,
 	emitter_col_t *dst;
 #define EMITTER_TYPE_uint32_t emitter_type_uint32
 #define EMITTER_TYPE_uint64_t emitter_type_uint64
-#define OP(counter, counter_type, human, derived, base_counter)		\
-	dst = &col_##counter_type[mutex_counter_##counter];		\
-	dst->type = EMITTER_TYPE_##counter_type;			\
-	if (!derived) {							\
-		CTL_LEAF(mib, miblen_name, #counter,			\
-		    (counter_type *)&dst->bool_val, counter_type);	\
-	} else {							\
-		emitter_col_t *base =					\
-		    &col_##counter_type[mutex_counter_##base_counter];	\
-		dst->counter_type##_val =				\
-		    (counter_type)rate_per_second(			\
-		    base->counter_type##_val, uptime);			\
+#define OP(counter, counter_type, human, derived, base_counter)                \
+	dst = &col_##counter_type[mutex_counter_##counter];                    \
+	dst->type = EMITTER_TYPE_##counter_type;                               \
+	if (!derived) {                                                        \
+		CTL_LEAF(mib, miblen_name, #counter,                           \
+		    (counter_type *)&dst->bool_val, counter_type);             \
+	} else {                                                               \
+		emitter_col_t *base =                                          \
+		    &col_##counter_type[mutex_counter_##base_counter];         \
+		dst->counter_type##_val = (counter_type)rate_per_second(       \
+		    base->counter_type##_val, uptime);                         \
 	}
 	MUTEX_PROF_COUNTERS
 #undef OP
@@ -179,9 +182,9 @@ mutex_stats_read_global(size_t mib[], size_t miblen, const char *name,
 static void
 mutex_stats_read_arena(size_t mib[], size_t miblen, const char *name,
     emitter_col_t *col_name,
-    emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters],
-    emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters],
-    uint64_t uptime) {
+    emitter_col_t  col_uint64_t[mutex_prof_num_uint64_t_counters],
+    emitter_col_t  col_uint32_t[mutex_prof_num_uint32_t_counters],
+    uint64_t       uptime) {
 	CTL_LEAF_PREPARE(mib, miblen, name);
 	size_t miblen_name = miblen + 1;
 
@@ -190,18 +193,17 @@ mutex_stats_read_arena(size_t mib[], size_t miblen, const char *name,
 	emitter_col_t *dst;
 #define EMITTER_TYPE_uint32_t emitter_type_uint32
 #define EMITTER_TYPE_uint64_t emitter_type_uint64
-#define OP(counter, counter_type, human, derived, base_counter)		\
-	dst = &col_##counter_type[mutex_counter_##counter];		\
-	dst->type = EMITTER_TYPE_##counter_type;			\
-	if (!derived) {							\
-		CTL_LEAF(mib, miblen_name, #counter,			\
-		    (counter_type *)&dst->bool_val, counter_type);	\
-	} else {							\
-		emitter_col_t *base =					\
-		    &col_##counter_type[mutex_counter_##base_counter];	\
-		dst->counter_type##_val =				\
-		    (counter_type)rate_per_second(			\
-		    base->counter_type##_val, uptime);			\
+#define OP(counter, counter_type, human, derived, base_counter)                \
+	dst = &col_##counter_type[mutex_counter_##counter];                    \
+	dst->type = EMITTER_TYPE_##counter_type;                               \
+	if (!derived) {                                                        \
+		CTL_LEAF(mib, miblen_name, #counter,                           \
+		    (counter_type *)&dst->bool_val, counter_type);             \
+	} else {                                                               \
+		emitter_col_t *base =                                          \
+		    &col_##counter_type[mutex_counter_##base_counter];         \
+		dst->counter_type##_val = (counter_type)rate_per_second(       \
+		    base->counter_type##_val, uptime);                         \
 	}
 	MUTEX_PROF_COUNTERS
 #undef OP
@@ -213,7 +215,7 @@ static void
 mutex_stats_read_arena_bin(size_t mib[], size_t miblen,
     emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters],
     emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters],
-    uint64_t uptime) {
+    uint64_t      uptime) {
 	CTL_LEAF_PREPARE(mib, miblen, "mutex");
 	size_t miblen_mutex = miblen + 1;
 
@@ -221,18 +223,17 @@ mutex_stats_read_arena_bin(size_t mib[], size_t miblen,
 
 #define EMITTER_TYPE_uint32_t emitter_type_uint32
 #define EMITTER_TYPE_uint64_t emitter_type_uint64
-#define OP(counter, counter_type, human, derived, base_counter)		\
-	dst = &col_##counter_type[mutex_counter_##counter];		\
-	dst->type = EMITTER_TYPE_##counter_type;			\
-	if (!derived) {							\
-		CTL_LEAF(mib, miblen_mutex, #counter,			\
-		    (counter_type *)&dst->bool_val, counter_type);	\
-	} else {							\
-		emitter_col_t *base =					\
-		    &col_##counter_type[mutex_counter_##base_counter];	\
-		dst->counter_type##_val =				\
-		    (counter_type)rate_per_second(			\
-		    base->counter_type##_val, uptime);			\
+#define OP(counter, counter_type, human, derived, base_counter)                \
+	dst = &col_##counter_type[mutex_counter_##counter];                    \
+	dst->type = EMITTER_TYPE_##counter_type;                               \
+	if (!derived) {                                                        \
+		CTL_LEAF(mib, miblen_mutex, #counter,                          \
+		    (counter_type *)&dst->bool_val, counter_type);             \
+	} else {                                                               \
+		emitter_col_t *base =                                          \
+		    &col_##counter_type[mutex_counter_##base_counter];         \
+		dst->counter_type##_val = (counter_type)rate_per_second(       \
+		    base->counter_type##_val, uptime);                         \
 	}
 	MUTEX_PROF_COUNTERS
 #undef OP
@@ -256,12 +257,12 @@ mutex_stats_emit(emitter_t *emitter, emitter_row_t *row,
 
 #define EMITTER_TYPE_uint32_t emitter_type_uint32
 #define EMITTER_TYPE_uint64_t emitter_type_uint64
-#define OP(counter, type, human, derived, base_counter)		\
-	if (!derived) {                    \
-		col = &col_##type[k_##type];                        \
-		++k_##type;                            \
+#define OP(counter, type, human, derived, base_counter)                        \
+	if (!derived) {                                                        \
+		col = &col_##type[k_##type];                                   \
+		++k_##type;                                                    \
 		emitter_json_kv(emitter, #counter, EMITTER_TYPE_##type,        \
-		    (const void *)&col->bool_val); \
+		    (const void *)&col->bool_val);                             \
 	}
 	MUTEX_PROF_COUNTERS;
 #undef OP
@@ -269,44 +270,42 @@ mutex_stats_emit(emitter_t *emitter, emitter_row_t *row,
 #undef EMITTER_TYPE_uint64_t
 }
 
-#define COL_DECLARE(column_name)					\
-	emitter_col_t col_##column_name;
+#define COL_DECLARE(column_name) emitter_col_t col_##column_name;
 
-#define COL_INIT(row_name, column_name, left_or_right, col_width, etype)\
-	emitter_col_init(&col_##column_name, &row_name);		\
-	col_##column_name.justify = emitter_justify_##left_or_right;	\
-	col_##column_name.width = col_width;				\
+#define COL_INIT(row_name, column_name, left_or_right, col_width, etype)       \
+	emitter_col_init(&col_##column_name, &row_name);                       \
+	col_##column_name.justify = emitter_justify_##left_or_right;           \
+	col_##column_name.width = col_width;                                   \
 	col_##column_name.type = emitter_type_##etype;
 
-#define COL(row_name, column_name, left_or_right, col_width, etype)	\
-	COL_DECLARE(column_name);					\
+#define COL(row_name, column_name, left_or_right, col_width, etype)            \
+	COL_DECLARE(column_name);                                              \
 	COL_INIT(row_name, column_name, left_or_right, col_width, etype)
 
-#define COL_HDR_DECLARE(column_name)					\
-	COL_DECLARE(column_name);					\
+#define COL_HDR_DECLARE(column_name)                                           \
+	COL_DECLARE(column_name);                                              \
 	emitter_col_t header_##column_name;
 
-#define COL_HDR_INIT(row_name, column_name, human, left_or_right,	\
-	col_width, etype)						\
-	COL_INIT(row_name, column_name, left_or_right, col_width, etype)\
-	emitter_col_init(&header_##column_name, &header_##row_name);	\
-	header_##column_name.justify = emitter_justify_##left_or_right;	\
-	header_##column_name.width = col_width;				\
-	header_##column_name.type = emitter_type_title;			\
+#define COL_HDR_INIT(                                                          \
+    row_name, column_name, human, left_or_right, col_width, etype)             \
+	COL_INIT(row_name, column_name, left_or_right, col_width, etype)       \
+	emitter_col_init(&header_##column_name, &header_##row_name);           \
+	header_##column_name.justify = emitter_justify_##left_or_right;        \
+	header_##column_name.width = col_width;                                \
+	header_##column_name.type = emitter_type_title;                        \
 	header_##column_name.str_val = human ? human : #column_name;
 
-#define COL_HDR(row_name, column_name, human, left_or_right, col_width,	\
-    etype)								\
-	COL_HDR_DECLARE(column_name)					\
-	COL_HDR_INIT(row_name, column_name, human, left_or_right,	\
-	    col_width, etype)
+#define COL_HDR(row_name, column_name, human, left_or_right, col_width, etype) \
+	COL_HDR_DECLARE(column_name)                                           \
+	COL_HDR_INIT(                                                          \
+	    row_name, column_name, human, left_or_right, col_width, etype)
 
 JEMALLOC_COLD
 static void
-stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
-    uint64_t uptime) {
-	size_t page;
-	bool in_gap, in_gap_prev;
+stats_arena_bins_print(
+    emitter_t *emitter, bool mutex, unsigned i, uint64_t uptime) {
+	size_t   page;
+	bool     in_gap, in_gap_prev;
 	unsigned nbins, j;
 
 	CTL_GET("arenas.page", &page, size_t);
@@ -378,17 +377,17 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 	emitter_col_t header_mutex32[mutex_prof_num_uint32_t_counters];
 
 	if (mutex) {
-		mutex_stats_init_cols(&row, NULL, NULL, col_mutex64,
-		    col_mutex32);
-		mutex_stats_init_cols(&header_row, NULL, NULL, header_mutex64,
-		    header_mutex32);
+		mutex_stats_init_cols(
+		    &row, NULL, NULL, col_mutex64, col_mutex32);
+		mutex_stats_init_cols(
+		    &header_row, NULL, NULL, header_mutex64, header_mutex32);
 	}
 
 	/*
 	 * We print a "bins:" header as part of the table row; we need to adjust
 	 * the header size column to compensate.
 	 */
-	header_size.width -=5;
+	header_size.width -= 5;
 	emitter_table_printf(emitter, "bins:");
 	emitter_table_row(emitter, &header_row);
 	emitter_json_array_kv_begin(emitter, "bins");
@@ -408,9 +407,9 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 
 	for (j = 0, in_gap = false; j < nbins; j++) {
 		uint64_t nslabs;
-		size_t reg_size, slab_size, curregs;
-		size_t curslabs;
-		size_t nonfull_slabs;
+		size_t   reg_size, slab_size, curregs;
+		size_t   curslabs;
+		size_t   nonfull_slabs;
 		uint32_t nregs, nshards;
 		uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes;
 		uint64_t nreslabs;
@@ -440,8 +439,8 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 		}
 
 		if (in_gap_prev && !in_gap) {
-			emitter_table_printf(emitter,
-			    "                     ---\n");
+			emitter_table_printf(
+			    emitter, "                     ---\n");
 		}
 
 		if (in_gap && !emitter_outputs_json(emitter)) {
@@ -455,8 +454,8 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 		CTL_LEAF(stats_arenas_mib, 5, "nmalloc", &nmalloc, uint64_t);
 		CTL_LEAF(stats_arenas_mib, 5, "ndalloc", &ndalloc, uint64_t);
 		CTL_LEAF(stats_arenas_mib, 5, "curregs", &curregs, size_t);
-		CTL_LEAF(stats_arenas_mib, 5, "nrequests", &nrequests,
-		    uint64_t);
+		CTL_LEAF(
+		    stats_arenas_mib, 5, "nrequests", &nrequests, uint64_t);
 		CTL_LEAF(stats_arenas_mib, 5, "nfills", &nfills, uint64_t);
 		CTL_LEAF(stats_arenas_mib, 5, "nflushes", &nflushes, uint64_t);
 		CTL_LEAF(stats_arenas_mib, 5, "nreslabs", &nreslabs, uint64_t);
@@ -464,12 +463,12 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 		CTL_LEAF(stats_arenas_mib, 5, "nonfull_slabs", &nonfull_slabs,
 		    size_t);
 
-		CTL_LEAF(stats_arenas_mib, 5, "batch_pops", &batch_pops,
-		    uint64_t);
+		CTL_LEAF(
+		    stats_arenas_mib, 5, "batch_pops", &batch_pops, uint64_t);
 		CTL_LEAF(stats_arenas_mib, 5, "batch_failed_pushes",
 		    &batch_failed_pushes, uint64_t);
-		CTL_LEAF(stats_arenas_mib, 5, "batch_pushes",
-		    &batch_pushes, uint64_t);
+		CTL_LEAF(stats_arenas_mib, 5, "batch_pushes", &batch_pushes,
+		    uint64_t);
 		CTL_LEAF(stats_arenas_mib, 5, "batch_pushed_elems",
 		    &batch_pushed_elems, uint64_t);
 
@@ -479,14 +478,14 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 		}
 
 		emitter_json_object_begin(emitter);
-		emitter_json_kv(emitter, "nmalloc", emitter_type_uint64,
-		    &nmalloc);
-		emitter_json_kv(emitter, "ndalloc", emitter_type_uint64,
-		    &ndalloc);
-		emitter_json_kv(emitter, "curregs", emitter_type_size,
-		    &curregs);
-		emitter_json_kv(emitter, "nrequests", emitter_type_uint64,
-		    &nrequests);
+		emitter_json_kv(
+		    emitter, "nmalloc", emitter_type_uint64, &nmalloc);
+		emitter_json_kv(
+		    emitter, "ndalloc", emitter_type_uint64, &ndalloc);
+		emitter_json_kv(
+		    emitter, "curregs", emitter_type_size, &curregs);
+		emitter_json_kv(
+		    emitter, "nrequests", emitter_type_uint64, &nrequests);
 		if (prof_stats_on) {
 			emitter_json_kv(emitter, "prof_live_requested",
 			    emitter_type_uint64, &prof_live.req_sum);
@@ -497,36 +496,36 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 			emitter_json_kv(emitter, "prof_accum_count",
 			    emitter_type_uint64, &prof_accum.count);
 		}
-		emitter_json_kv(emitter, "nfills", emitter_type_uint64,
-		    &nfills);
-		emitter_json_kv(emitter, "nflushes", emitter_type_uint64,
-		    &nflushes);
-		emitter_json_kv(emitter, "nreslabs", emitter_type_uint64,
-		    &nreslabs);
-		emitter_json_kv(emitter, "curslabs", emitter_type_size,
-		    &curslabs);
+		emitter_json_kv(
+		    emitter, "nfills", emitter_type_uint64, &nfills);
+		emitter_json_kv(
+		    emitter, "nflushes", emitter_type_uint64, &nflushes);
+		emitter_json_kv(
+		    emitter, "nreslabs", emitter_type_uint64, &nreslabs);
+		emitter_json_kv(
+		    emitter, "curslabs", emitter_type_size, &curslabs);
 		emitter_json_kv(emitter, "nonfull_slabs", emitter_type_size,
 		    &nonfull_slabs);
-		emitter_json_kv(emitter, "batch_pops",
-		    emitter_type_uint64, &batch_pops);
+		emitter_json_kv(
+		    emitter, "batch_pops", emitter_type_uint64, &batch_pops);
 		emitter_json_kv(emitter, "batch_failed_pushes",
 		    emitter_type_uint64, &batch_failed_pushes);
-		emitter_json_kv(emitter, "batch_pushes",
-		    emitter_type_uint64, &batch_pushes);
+		emitter_json_kv(emitter, "batch_pushes", emitter_type_uint64,
+		    &batch_pushes);
 		emitter_json_kv(emitter, "batch_pushed_elems",
 		    emitter_type_uint64, &batch_pushed_elems);
 		if (mutex) {
 			emitter_json_object_kv_begin(emitter, "mutex");
-			mutex_stats_emit(emitter, NULL, col_mutex64,
-			    col_mutex32);
+			mutex_stats_emit(
+			    emitter, NULL, col_mutex64, col_mutex32);
 			emitter_json_object_end(emitter);
 		}
 		emitter_json_object_end(emitter);
 
 		size_t availregs = nregs * curslabs;
-		char util[6];
-		if (get_rate_str((uint64_t)curregs, (uint64_t)availregs, util))
-		{
+		char   util[6];
+		if (get_rate_str(
+		        (uint64_t)curregs, (uint64_t)availregs, util)) {
 			if (availregs == 0) {
 				malloc_snprintf(util, sizeof(util), "1");
 			} else if (curregs > availregs) {
@@ -550,7 +549,8 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 		col_ndalloc.uint64_val = ndalloc;
 		col_ndalloc_ps.uint64_val = rate_per_second(ndalloc, uptime);
 		col_nrequests.uint64_val = nrequests;
-		col_nrequests_ps.uint64_val = rate_per_second(nrequests, uptime);
+		col_nrequests_ps.uint64_val = rate_per_second(
+		    nrequests, uptime);
 		if (prof_stats_on) {
 			col_prof_live_requested.uint64_val = prof_live.req_sum;
 			col_prof_live_count.uint64_val = prof_live.count;
@@ -574,19 +574,17 @@ stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i,
 		col_nreslabs_ps.uint64_val = rate_per_second(nreslabs, uptime);
 
 		col_pops.uint64_val = batch_pops;
-		col_pops_ps.uint64_val
-		    = rate_per_second(batch_pops, uptime);
+		col_pops_ps.uint64_val = rate_per_second(batch_pops, uptime);
 
 		col_failed_push.uint64_val = batch_failed_pushes;
-		col_failed_push_ps.uint64_val
-		    = rate_per_second(batch_failed_pushes, uptime);
+		col_failed_push_ps.uint64_val = rate_per_second(
+		    batch_failed_pushes, uptime);
 		col_push.uint64_val = batch_pushes;
-		col_push_ps.uint64_val
-		    = rate_per_second(batch_pushes, uptime);
+		col_push_ps.uint64_val = rate_per_second(batch_pushes, uptime);
 
 		col_push_elem.uint64_val = batch_pushed_elems;
-		col_push_elem_ps.uint64_val
-		    = rate_per_second(batch_pushed_elems, uptime);
+		col_push_elem_ps.uint64_val = rate_per_second(
+		    batch_pushed_elems, uptime);
 
 		/*
 		 * Note that mutex columns were initialized above, if mutex ==
@@ -606,7 +604,7 @@ JEMALLOC_COLD
 static void
 stats_arena_lextents_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	unsigned nbins, nlextents, j;
-	bool in_gap, in_gap_prev;
+	bool     in_gap, in_gap_prev;
 
 	CTL_GET("arenas.nbins", &nbins, unsigned);
 	CTL_GET("arenas.nlextents", &nlextents, unsigned);
@@ -660,8 +658,8 @@ stats_arena_lextents_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 	}
 
 	for (j = 0, in_gap = false; j < nlextents; j++) {
-		uint64_t nmalloc, ndalloc, nrequests;
-		size_t lextent_size, curlextents;
+		uint64_t     nmalloc, ndalloc, nrequests;
+		size_t       lextent_size, curlextents;
 		prof_stats_t prof_live;
 		prof_stats_t prof_accum;
 
@@ -670,20 +668,20 @@ stats_arena_lextents_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 
 		CTL_LEAF(stats_arenas_mib, 5, "nmalloc", &nmalloc, uint64_t);
 		CTL_LEAF(stats_arenas_mib, 5, "ndalloc", &ndalloc, uint64_t);
-		CTL_LEAF(stats_arenas_mib, 5, "nrequests", &nrequests,
-		    uint64_t);
+		CTL_LEAF(
+		    stats_arenas_mib, 5, "nrequests", &nrequests, uint64_t);
 
 		in_gap_prev = in_gap;
 		in_gap = (nrequests == 0);
 
 		if (in_gap_prev && !in_gap) {
-			emitter_table_printf(emitter,
-			    "                     ---\n");
+			emitter_table_printf(
+			    emitter, "                     ---\n");
 		}
 
 		CTL_LEAF(arenas_lextent_mib, 3, "size", &lextent_size, size_t);
-		CTL_LEAF(stats_arenas_mib, 5, "curlextents", &curlextents,
-		    size_t);
+		CTL_LEAF(
+		    stats_arenas_mib, 5, "curlextents", &curlextents, size_t);
 
 		if (prof_stats_on) {
 			prof_stats_mib[3] = j;
@@ -704,8 +702,8 @@ stats_arena_lextents_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 			emitter_json_kv(emitter, "prof_accum_count",
 			    emitter_type_uint64, &prof_accum.count);
 		}
-		emitter_json_kv(emitter, "curlextents", emitter_type_size,
-		    &curlextents);
+		emitter_json_kv(
+		    emitter, "curlextents", emitter_type_size, &curlextents);
 		emitter_json_object_end(emitter);
 
 		col_size.size_val = lextent_size;
@@ -716,7 +714,8 @@ stats_arena_lextents_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 		col_ndalloc.uint64_val = ndalloc;
 		col_ndalloc_ps.uint64_val = rate_per_second(ndalloc, uptime);
 		col_nrequests.uint64_val = nrequests;
-		col_nrequests_ps.uint64_val = rate_per_second(nrequests, uptime);
+		col_nrequests_ps.uint64_val = rate_per_second(
+		    nrequests, uptime);
 		if (prof_stats_on) {
 			col_prof_live_requested.uint64_val = prof_live.req_sum;
 			col_prof_live_count.uint64_val = prof_live.count;
@@ -739,8 +738,8 @@ stats_arena_lextents_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 JEMALLOC_COLD
 static void
 stats_arena_extents_print(emitter_t *emitter, unsigned i) {
-	unsigned j;
-	bool in_gap, in_gap_prev;
+	unsigned      j;
+	bool          in_gap, in_gap_prev;
 	emitter_row_t header_row;
 	emitter_row_init(&header_row);
 	emitter_row_t row;
@@ -777,12 +776,12 @@ stats_arena_extents_print(emitter_t *emitter, unsigned i) {
 		CTL_LEAF(stats_arenas_mib, 5, "ndirty", &ndirty, size_t);
 		CTL_LEAF(stats_arenas_mib, 5, "nmuzzy", &nmuzzy, size_t);
 		CTL_LEAF(stats_arenas_mib, 5, "nretained", &nretained, size_t);
-		CTL_LEAF(stats_arenas_mib, 5, "dirty_bytes", &dirty_bytes,
+		CTL_LEAF(
+		    stats_arenas_mib, 5, "dirty_bytes", &dirty_bytes, size_t);
+		CTL_LEAF(
+		    stats_arenas_mib, 5, "muzzy_bytes", &muzzy_bytes, size_t);
+		CTL_LEAF(stats_arenas_mib, 5, "retained_bytes", &retained_bytes,
 		    size_t);
-		CTL_LEAF(stats_arenas_mib, 5, "muzzy_bytes", &muzzy_bytes,
-		    size_t);
-		CTL_LEAF(stats_arenas_mib, 5, "retained_bytes",
-		    &retained_bytes, size_t);
 
 		total = ndirty + nmuzzy + nretained;
 		total_bytes = dirty_bytes + muzzy_bytes + retained_bytes;
@@ -791,20 +790,20 @@ stats_arena_extents_print(emitter_t *emitter, unsigned i) {
 		in_gap = (total == 0);
 
 		if (in_gap_prev && !in_gap) {
-			emitter_table_printf(emitter,
-			    "                     ---\n");
+			emitter_table_printf(
+			    emitter, "                     ---\n");
 		}
 
 		emitter_json_object_begin(emitter);
 		emitter_json_kv(emitter, "ndirty", emitter_type_size, &ndirty);
 		emitter_json_kv(emitter, "nmuzzy", emitter_type_size, &nmuzzy);
-		emitter_json_kv(emitter, "nretained", emitter_type_size,
-		    &nretained);
+		emitter_json_kv(
+		    emitter, "nretained", emitter_type_size, &nretained);
 
-		emitter_json_kv(emitter, "dirty_bytes", emitter_type_size,
-		    &dirty_bytes);
-		emitter_json_kv(emitter, "muzzy_bytes", emitter_type_size,
-		    &muzzy_bytes);
+		emitter_json_kv(
+		    emitter, "dirty_bytes", emitter_type_size, &dirty_bytes);
+		emitter_json_kv(
+		    emitter, "muzzy_bytes", emitter_type_size, &muzzy_bytes);
 		emitter_json_kv(emitter, "retained_bytes", emitter_type_size,
 		    &retained_bytes);
 		emitter_json_object_end(emitter);
@@ -839,8 +838,8 @@ stats_arena_hpa_shard_sec_print(emitter_t *emitter, unsigned i) {
 }
 
 static void
-stats_arena_hpa_shard_counters_print(emitter_t *emitter, unsigned i,
-    uint64_t uptime) {
+stats_arena_hpa_shard_counters_print(
+    emitter_t *emitter, unsigned i, uint64_t uptime) {
 	size_t npageslabs;
 	size_t nactive;
 	size_t ndirty;
@@ -860,39 +859,36 @@ stats_arena_hpa_shard_counters_print(emitter_t *emitter, unsigned i,
 	uint64_t nhugify_failures;
 	uint64_t ndehugifies;
 
-	CTL_M2_GET("stats.arenas.0.hpa_shard.npageslabs",
-	    i, &npageslabs, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.nactive",
-	    i, &nactive, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.ndirty",
-	    i, &ndirty, size_t);
+	CTL_M2_GET(
+	    "stats.arenas.0.hpa_shard.npageslabs", i, &npageslabs, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.nactive", i, &nactive, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.ndirty", i, &ndirty, size_t);
 
-	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.npageslabs_nonhuge",
-	    i, &npageslabs_nonhuge, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.nactive_nonhuge",
-	    i, &nactive_nonhuge, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.ndirty_nonhuge",
-	    i, &ndirty_nonhuge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.npageslabs_nonhuge", i,
+	    &npageslabs_nonhuge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.nactive_nonhuge", i,
+	    &nactive_nonhuge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.ndirty_nonhuge", i,
+	    &ndirty_nonhuge, size_t);
 	nretained_nonhuge = npageslabs_nonhuge * HUGEPAGE_PAGES
 	    - nactive_nonhuge - ndirty_nonhuge;
 
-	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.npageslabs_huge",
-	    i, &npageslabs_huge, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.nactive_huge",
-	    i, &nactive_huge, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.ndirty_huge",
-	    i, &ndirty_huge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.npageslabs_huge", i,
+	    &npageslabs_huge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.nactive_huge", i,
+	    &nactive_huge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.slabs.ndirty_huge", i,
+	    &ndirty_huge, size_t);
 
-	CTL_M2_GET("stats.arenas.0.hpa_shard.npurge_passes",
-	    i, &npurge_passes, uint64_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.npurges",
-	    i, &npurges, uint64_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.nhugifies",
-	    i, &nhugifies, uint64_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.nhugify_failures",
-	    i, &nhugify_failures, uint64_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.ndehugifies",
-	    i, &ndehugifies, uint64_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.npurge_passes", i, &npurge_passes,
+	    uint64_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.npurges", i, &npurges, uint64_t);
+	CTL_M2_GET(
+	    "stats.arenas.0.hpa_shard.nhugifies", i, &nhugifies, uint64_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.nhugify_failures", i,
+	    &nhugify_failures, uint64_t);
+	CTL_M2_GET(
+	    "stats.arenas.0.hpa_shard.ndehugifies", i, &ndehugifies, uint64_t);
 
 	emitter_table_printf(emitter,
 	    "HPA shard stats:\n"
@@ -900,56 +896,55 @@ stats_arena_hpa_shard_counters_print(emitter_t *emitter, unsigned i,
 	    "  Active pages: %zu (%zu huge, %zu nonhuge)\n"
 	    "  Dirty pages: %zu (%zu huge, %zu nonhuge)\n"
 	    "  Retained pages: %zu\n"
-	    "  Purge passes: %" FMTu64 " (%" FMTu64 " / sec)\n"
-	    "  Purges: %" FMTu64 " (%" FMTu64 " / sec)\n"
-	    "  Hugeifies: %" FMTu64 " (%" FMTu64 " / sec)\n"
-	    "  Hugify failures: %" FMTu64 " (%" FMTu64 " / sec)\n"
-	    "  Dehugifies: %" FMTu64 " (%" FMTu64 " / sec)\n"
+	    "  Purge passes: %" FMTu64 " (%" FMTu64
+	    " / sec)\n"
+	    "  Purges: %" FMTu64 " (%" FMTu64
+	    " / sec)\n"
+	    "  Hugeifies: %" FMTu64 " (%" FMTu64
+	    " / sec)\n"
+	    "  Hugify failures: %" FMTu64 " (%" FMTu64
+	    " / sec)\n"
+	    "  Dehugifies: %" FMTu64 " (%" FMTu64
+	    " / sec)\n"
 	    "\n",
-	    npageslabs, npageslabs_huge, npageslabs_nonhuge,
-	    nactive, nactive_huge, nactive_nonhuge,
-	    ndirty, ndirty_huge, ndirty_nonhuge,
-	    nretained_nonhuge,
-	    npurge_passes, rate_per_second(npurge_passes, uptime),
-	    npurges, rate_per_second(npurges, uptime),
-	    nhugifies, rate_per_second(nhugifies, uptime),
-	    nhugify_failures, rate_per_second(nhugify_failures, uptime),
-	    ndehugifies, rate_per_second(ndehugifies, uptime));
+	    npageslabs, npageslabs_huge, npageslabs_nonhuge, nactive,
+	    nactive_huge, nactive_nonhuge, ndirty, ndirty_huge, ndirty_nonhuge,
+	    nretained_nonhuge, npurge_passes,
+	    rate_per_second(npurge_passes, uptime), npurges,
+	    rate_per_second(npurges, uptime), nhugifies,
+	    rate_per_second(nhugifies, uptime), nhugify_failures,
+	    rate_per_second(nhugify_failures, uptime), ndehugifies,
+	    rate_per_second(ndehugifies, uptime));
 
-	emitter_json_kv(emitter, "npageslabs", emitter_type_size,
-	    &npageslabs);
-	emitter_json_kv(emitter, "nactive", emitter_type_size,
-	    &nactive);
-	emitter_json_kv(emitter, "ndirty", emitter_type_size,
-	    &ndirty);
+	emitter_json_kv(emitter, "npageslabs", emitter_type_size, &npageslabs);
+	emitter_json_kv(emitter, "nactive", emitter_type_size, &nactive);
+	emitter_json_kv(emitter, "ndirty", emitter_type_size, &ndirty);
 
-	emitter_json_kv(emitter, "npurge_passes", emitter_type_uint64,
-	    &npurge_passes);
-	emitter_json_kv(emitter, "npurges", emitter_type_uint64,
-	    &npurges);
-	emitter_json_kv(emitter, "nhugifies", emitter_type_uint64,
-	    &nhugifies);
+	emitter_json_kv(
+	    emitter, "npurge_passes", emitter_type_uint64, &npurge_passes);
+	emitter_json_kv(emitter, "npurges", emitter_type_uint64, &npurges);
+	emitter_json_kv(emitter, "nhugifies", emitter_type_uint64, &nhugifies);
 	emitter_json_kv(emitter, "nhugify_failures", emitter_type_uint64,
 	    &nhugify_failures);
-	emitter_json_kv(emitter, "ndehugifies", emitter_type_uint64,
-	    &ndehugifies);
+	emitter_json_kv(
+	    emitter, "ndehugifies", emitter_type_uint64, &ndehugifies);
 
 	emitter_json_object_kv_begin(emitter, "slabs");
 	emitter_json_kv(emitter, "npageslabs_nonhuge", emitter_type_size,
 	    &npageslabs_nonhuge);
-	emitter_json_kv(emitter, "nactive_nonhuge", emitter_type_size,
-	    &nactive_nonhuge);
-	emitter_json_kv(emitter, "ndirty_nonhuge", emitter_type_size,
-	    &ndirty_nonhuge);
+	emitter_json_kv(
+	    emitter, "nactive_nonhuge", emitter_type_size, &nactive_nonhuge);
+	emitter_json_kv(
+	    emitter, "ndirty_nonhuge", emitter_type_size, &ndirty_nonhuge);
 	emitter_json_kv(emitter, "nretained_nonhuge", emitter_type_size,
 	    &nretained_nonhuge);
 
-	emitter_json_kv(emitter, "npageslabs_huge", emitter_type_size,
-	    &npageslabs_huge);
-	emitter_json_kv(emitter, "nactive_huge", emitter_type_size,
-	    &nactive_huge);
-	emitter_json_kv(emitter, "ndirty_huge", emitter_type_size,
-	    &ndirty_huge);
+	emitter_json_kv(
+	    emitter, "npageslabs_huge", emitter_type_size, &npageslabs_huge);
+	emitter_json_kv(
+	    emitter, "nactive_huge", emitter_type_size, &nactive_huge);
+	emitter_json_kv(
+	    emitter, "ndirty_huge", emitter_type_size, &ndirty_huge);
 	emitter_json_object_end(emitter); /* End "slabs" */
 }
 
@@ -970,19 +965,19 @@ stats_arena_hpa_shard_slabs_print(emitter_t *emitter, unsigned i) {
 	size_t nretained_nonhuge;
 
 	/* Full slab stats. */
-	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs_huge",
-	    i, &npageslabs_huge, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive_huge",
-	    i, &nactive_huge, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ndirty_huge",
-	    i, &ndirty_huge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs_huge", i,
+	    &npageslabs_huge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive_huge", i,
+	    &nactive_huge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ndirty_huge", i,
+	    &ndirty_huge, size_t);
 
-	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs_nonhuge",
-	    i, &npageslabs_nonhuge, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive_nonhuge",
-	    i, &nactive_nonhuge, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ndirty_nonhuge",
-	    i, &ndirty_nonhuge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.npageslabs_nonhuge", i,
+	    &npageslabs_nonhuge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.nactive_nonhuge", i,
+	    &nactive_nonhuge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.full_slabs.ndirty_nonhuge", i,
+	    &ndirty_nonhuge, size_t);
 	nretained_nonhuge = npageslabs_nonhuge * HUGEPAGE_PAGES
 	    - nactive_nonhuge - ndirty_nonhuge;
 
@@ -992,40 +987,38 @@ stats_arena_hpa_shard_slabs_print(emitter_t *emitter, unsigned i) {
 	    "      nactive: %zu huge, %zu nonhuge \n"
 	    "      ndirty: %zu huge, %zu nonhuge \n"
 	    "      nretained: 0 huge, %zu nonhuge \n",
-	    npageslabs_huge, npageslabs_nonhuge,
-	    nactive_huge, nactive_nonhuge,
-	    ndirty_huge, ndirty_nonhuge,
-	    nretained_nonhuge);
+	    npageslabs_huge, npageslabs_nonhuge, nactive_huge, nactive_nonhuge,
+	    ndirty_huge, ndirty_nonhuge, nretained_nonhuge);
 
 	emitter_json_object_kv_begin(emitter, "full_slabs");
-	emitter_json_kv(emitter, "npageslabs_huge", emitter_type_size,
-	    &npageslabs_huge);
-	emitter_json_kv(emitter, "nactive_huge", emitter_type_size,
-	    &nactive_huge);
-	emitter_json_kv(emitter, "nactive_huge", emitter_type_size,
-	    &nactive_huge);
+	emitter_json_kv(
+	    emitter, "npageslabs_huge", emitter_type_size, &npageslabs_huge);
+	emitter_json_kv(
+	    emitter, "nactive_huge", emitter_type_size, &nactive_huge);
+	emitter_json_kv(
+	    emitter, "nactive_huge", emitter_type_size, &nactive_huge);
 	emitter_json_kv(emitter, "npageslabs_nonhuge", emitter_type_size,
 	    &npageslabs_nonhuge);
-	emitter_json_kv(emitter, "nactive_nonhuge", emitter_type_size,
-	    &nactive_nonhuge);
-	emitter_json_kv(emitter, "ndirty_nonhuge", emitter_type_size,
-	    &ndirty_nonhuge);
+	emitter_json_kv(
+	    emitter, "nactive_nonhuge", emitter_type_size, &nactive_nonhuge);
+	emitter_json_kv(
+	    emitter, "ndirty_nonhuge", emitter_type_size, &ndirty_nonhuge);
 	emitter_json_object_end(emitter); /* End "full_slabs" */
 
 	/* Next, empty slab stats. */
-	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.npageslabs_huge",
-	    i, &npageslabs_huge, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.nactive_huge",
-	    i, &nactive_huge, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.ndirty_huge",
-	    i, &ndirty_huge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.npageslabs_huge", i,
+	    &npageslabs_huge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.nactive_huge", i,
+	    &nactive_huge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.ndirty_huge", i,
+	    &ndirty_huge, size_t);
 
-	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.npageslabs_nonhuge",
-	    i, &npageslabs_nonhuge, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.nactive_nonhuge",
-	    i, &nactive_nonhuge, size_t);
-	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.ndirty_nonhuge",
-	    i, &ndirty_nonhuge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.npageslabs_nonhuge", i,
+	    &npageslabs_nonhuge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.nactive_nonhuge", i,
+	    &nactive_nonhuge, size_t);
+	CTL_M2_GET("stats.arenas.0.hpa_shard.empty_slabs.ndirty_nonhuge", i,
+	    &ndirty_nonhuge, size_t);
 	nretained_nonhuge = npageslabs_nonhuge * HUGEPAGE_PAGES
 	    - nactive_nonhuge - ndirty_nonhuge;
 
@@ -1035,24 +1028,22 @@ stats_arena_hpa_shard_slabs_print(emitter_t *emitter, unsigned i) {
 	    "      nactive: %zu huge, %zu nonhuge \n"
 	    "      ndirty: %zu huge, %zu nonhuge \n"
 	    "      nretained: 0 huge, %zu nonhuge \n",
-	    npageslabs_huge, npageslabs_nonhuge,
-	    nactive_huge, nactive_nonhuge,
-	    ndirty_huge, ndirty_nonhuge,
-	    nretained_nonhuge);
+	    npageslabs_huge, npageslabs_nonhuge, nactive_huge, nactive_nonhuge,
+	    ndirty_huge, ndirty_nonhuge, nretained_nonhuge);
 
 	emitter_json_object_kv_begin(emitter, "empty_slabs");
-	emitter_json_kv(emitter, "npageslabs_huge", emitter_type_size,
-	    &npageslabs_huge);
-	emitter_json_kv(emitter, "nactive_huge", emitter_type_size,
-	    &nactive_huge);
-	emitter_json_kv(emitter, "nactive_huge", emitter_type_size,
-	    &nactive_huge);
+	emitter_json_kv(
+	    emitter, "npageslabs_huge", emitter_type_size, &npageslabs_huge);
+	emitter_json_kv(
+	    emitter, "nactive_huge", emitter_type_size, &nactive_huge);
+	emitter_json_kv(
+	    emitter, "nactive_huge", emitter_type_size, &nactive_huge);
 	emitter_json_kv(emitter, "npageslabs_nonhuge", emitter_type_size,
 	    &npageslabs_nonhuge);
-	emitter_json_kv(emitter, "nactive_nonhuge", emitter_type_size,
-	    &nactive_nonhuge);
-	emitter_json_kv(emitter, "ndirty_nonhuge", emitter_type_size,
-	    &ndirty_nonhuge);
+	emitter_json_kv(
+	    emitter, "nactive_nonhuge", emitter_type_size, &nactive_nonhuge);
+	emitter_json_kv(
+	    emitter, "ndirty_nonhuge", emitter_type_size, &ndirty_nonhuge);
 	emitter_json_object_end(emitter); /* End "empty_slabs" */
 
 	/* Last, nonfull slab stats. */
@@ -1080,25 +1071,25 @@ stats_arena_hpa_shard_slabs_print(emitter_t *emitter, unsigned i) {
 
 		CTL_LEAF(stats_arenas_mib, 6, "npageslabs_huge",
 		    &npageslabs_huge, size_t);
-		CTL_LEAF(stats_arenas_mib, 6, "nactive_huge",
-		    &nactive_huge, size_t);
-		CTL_LEAF(stats_arenas_mib, 6, "ndirty_huge",
-		    &ndirty_huge, size_t);
+		CTL_LEAF(
+		    stats_arenas_mib, 6, "nactive_huge", &nactive_huge, size_t);
+		CTL_LEAF(
+		    stats_arenas_mib, 6, "ndirty_huge", &ndirty_huge, size_t);
 
 		CTL_LEAF(stats_arenas_mib, 6, "npageslabs_nonhuge",
 		    &npageslabs_nonhuge, size_t);
 		CTL_LEAF(stats_arenas_mib, 6, "nactive_nonhuge",
 		    &nactive_nonhuge, size_t);
-		CTL_LEAF(stats_arenas_mib, 6, "ndirty_nonhuge",
-		    &ndirty_nonhuge, size_t);
+		CTL_LEAF(stats_arenas_mib, 6, "ndirty_nonhuge", &ndirty_nonhuge,
+		    size_t);
 		nretained_nonhuge = npageslabs_nonhuge * HUGEPAGE_PAGES
 		    - nactive_nonhuge - ndirty_nonhuge;
 
 		bool in_gap_prev = in_gap;
 		in_gap = (npageslabs_huge == 0 && npageslabs_nonhuge == 0);
 		if (in_gap_prev && !in_gap) {
-			emitter_table_printf(emitter,
-			    "                     ---\n");
+			emitter_table_printf(
+			    emitter, "                     ---\n");
 		}
 
 		col_size.size_val = sz_pind2sz(j);
@@ -1117,12 +1108,12 @@ stats_arena_hpa_shard_slabs_print(emitter_t *emitter, unsigned i) {
 		emitter_json_object_begin(emitter);
 		emitter_json_kv(emitter, "npageslabs_huge", emitter_type_size,
 		    &npageslabs_huge);
-		emitter_json_kv(emitter, "nactive_huge", emitter_type_size,
-		    &nactive_huge);
-		emitter_json_kv(emitter, "ndirty_huge", emitter_type_size,
-		    &ndirty_huge);
-		emitter_json_kv(emitter, "npageslabs_nonhuge", emitter_type_size,
-		    &npageslabs_nonhuge);
+		emitter_json_kv(
+		    emitter, "nactive_huge", emitter_type_size, &nactive_huge);
+		emitter_json_kv(
+		    emitter, "ndirty_huge", emitter_type_size, &ndirty_huge);
+		emitter_json_kv(emitter, "npageslabs_nonhuge",
+		    emitter_type_size, &npageslabs_nonhuge);
 		emitter_json_kv(emitter, "nactive_nonhuge", emitter_type_size,
 		    &nactive_nonhuge);
 		emitter_json_kv(emitter, "ndirty_nonhuge", emitter_type_size,
@@ -1146,7 +1137,8 @@ stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
 }
 
 static void
-stats_arena_mutexes_print(emitter_t *emitter, unsigned arena_ind, uint64_t uptime) {
+stats_arena_mutexes_print(
+    emitter_t *emitter, unsigned arena_ind, uint64_t uptime) {
 	emitter_row_t row;
 	emitter_col_t col_name;
 	emitter_col_t col64[mutex_prof_num_uint64_t_counters];
@@ -1164,11 +1156,11 @@ stats_arena_mutexes_print(emitter_t *emitter, unsigned arena_ind, uint64_t uptim
 	CTL_LEAF_PREPARE(stats_arenas_mib, 3, "mutexes");
 
 	for (mutex_prof_arena_ind_t i = 0; i < mutex_prof_num_arena_mutexes;
-	    i++) {
+	     i++) {
 		const char *name = arena_mutex_names[i];
 		emitter_json_object_kv_begin(emitter, name);
-		mutex_stats_read_arena(stats_arenas_mib, 4, name, &col_name,
-		    col64, col32, uptime);
+		mutex_stats_read_arena(
+		    stats_arenas_mib, 4, name, &col_name, col64, col32, uptime);
 		mutex_stats_emit(emitter, &row, col64, col32);
 		emitter_json_object_end(emitter); /* Close the mutex dict. */
 	}
@@ -1179,29 +1171,30 @@ JEMALLOC_COLD
 static void
 stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
     bool mutex, bool extents, bool hpa) {
-	char name[ARENA_NAME_LEN];
-	char *namep = name;
-	unsigned nthreads;
+	char        name[ARENA_NAME_LEN];
+	char       *namep = name;
+	unsigned    nthreads;
 	const char *dss;
-	ssize_t dirty_decay_ms, muzzy_decay_ms;
-	size_t page, pactive, pdirty, pmuzzy, mapped, retained;
-	size_t base, internal, resident, metadata_edata, metadata_rtree,
+	ssize_t     dirty_decay_ms, muzzy_decay_ms;
+	size_t      page, pactive, pdirty, pmuzzy, mapped, retained;
+	size_t      base, internal, resident, metadata_edata, metadata_rtree,
 	    metadata_thp, extent_avail;
 	uint64_t dirty_npurge, dirty_nmadvise, dirty_purged;
 	uint64_t muzzy_npurge, muzzy_nmadvise, muzzy_purged;
-	size_t small_allocated;
+	size_t   small_allocated;
 	uint64_t small_nmalloc, small_ndalloc, small_nrequests, small_nfills,
 	    small_nflushes;
-	size_t large_allocated;
+	size_t   large_allocated;
 	uint64_t large_nmalloc, large_ndalloc, large_nrequests, large_nfills,
 	    large_nflushes;
-	size_t tcache_bytes, tcache_stashed_bytes, abandoned_vm;
+	size_t   tcache_bytes, tcache_stashed_bytes, abandoned_vm;
 	uint64_t uptime;
 
 	CTL_GET("arenas.page", &page, size_t);
 	if (i != MALLCTL_ARENAS_ALL && i != MALLCTL_ARENAS_DESTROYED) {
 		CTL_M1_GET("arena.0.name", i, (void *)&namep, const char *);
-		emitter_kv(emitter, "name", "name", emitter_type_string, &namep);
+		emitter_kv(
+		    emitter, "name", "name", emitter_type_string, &namep);
 	}
 
 	CTL_M2_GET("stats.arenas.0.nthreads", i, &nthreads, unsigned);
@@ -1209,55 +1202,55 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	    emitter_type_unsigned, &nthreads);
 
 	CTL_M2_GET("stats.arenas.0.uptime", i, &uptime, uint64_t);
-	emitter_kv(emitter, "uptime_ns", "uptime", emitter_type_uint64,
-	    &uptime);
+	emitter_kv(
+	    emitter, "uptime_ns", "uptime", emitter_type_uint64, &uptime);
 
 	CTL_M2_GET("stats.arenas.0.dss", i, &dss, const char *);
 	emitter_kv(emitter, "dss", "dss allocation precedence",
 	    emitter_type_string, &dss);
 
-	CTL_M2_GET("stats.arenas.0.dirty_decay_ms", i, &dirty_decay_ms,
-	    ssize_t);
-	CTL_M2_GET("stats.arenas.0.muzzy_decay_ms", i, &muzzy_decay_ms,
-	    ssize_t);
+	CTL_M2_GET(
+	    "stats.arenas.0.dirty_decay_ms", i, &dirty_decay_ms, ssize_t);
+	CTL_M2_GET(
+	    "stats.arenas.0.muzzy_decay_ms", i, &muzzy_decay_ms, ssize_t);
 	CTL_M2_GET("stats.arenas.0.pactive", i, &pactive, size_t);
 	CTL_M2_GET("stats.arenas.0.pdirty", i, &pdirty, size_t);
 	CTL_M2_GET("stats.arenas.0.pmuzzy", i, &pmuzzy, size_t);
 	CTL_M2_GET("stats.arenas.0.dirty_npurge", i, &dirty_npurge, uint64_t);
-	CTL_M2_GET("stats.arenas.0.dirty_nmadvise", i, &dirty_nmadvise,
-	    uint64_t);
+	CTL_M2_GET(
+	    "stats.arenas.0.dirty_nmadvise", i, &dirty_nmadvise, uint64_t);
 	CTL_M2_GET("stats.arenas.0.dirty_purged", i, &dirty_purged, uint64_t);
 	CTL_M2_GET("stats.arenas.0.muzzy_npurge", i, &muzzy_npurge, uint64_t);
-	CTL_M2_GET("stats.arenas.0.muzzy_nmadvise", i, &muzzy_nmadvise,
-	    uint64_t);
+	CTL_M2_GET(
+	    "stats.arenas.0.muzzy_nmadvise", i, &muzzy_nmadvise, uint64_t);
 	CTL_M2_GET("stats.arenas.0.muzzy_purged", i, &muzzy_purged, uint64_t);
 
 	emitter_row_t decay_row;
 	emitter_row_init(&decay_row);
 
 	/* JSON-style emission. */
-	emitter_json_kv(emitter, "dirty_decay_ms", emitter_type_ssize,
-	    &dirty_decay_ms);
-	emitter_json_kv(emitter, "muzzy_decay_ms", emitter_type_ssize,
-	    &muzzy_decay_ms);
+	emitter_json_kv(
+	    emitter, "dirty_decay_ms", emitter_type_ssize, &dirty_decay_ms);
+	emitter_json_kv(
+	    emitter, "muzzy_decay_ms", emitter_type_ssize, &muzzy_decay_ms);
 
 	emitter_json_kv(emitter, "pactive", emitter_type_size, &pactive);
 	emitter_json_kv(emitter, "pdirty", emitter_type_size, &pdirty);
 	emitter_json_kv(emitter, "pmuzzy", emitter_type_size, &pmuzzy);
 
-	emitter_json_kv(emitter, "dirty_npurge", emitter_type_uint64,
-	    &dirty_npurge);
-	emitter_json_kv(emitter, "dirty_nmadvise", emitter_type_uint64,
-	    &dirty_nmadvise);
-	emitter_json_kv(emitter, "dirty_purged", emitter_type_uint64,
-	    &dirty_purged);
+	emitter_json_kv(
+	    emitter, "dirty_npurge", emitter_type_uint64, &dirty_npurge);
+	emitter_json_kv(
+	    emitter, "dirty_nmadvise", emitter_type_uint64, &dirty_nmadvise);
+	emitter_json_kv(
+	    emitter, "dirty_purged", emitter_type_uint64, &dirty_purged);
 
-	emitter_json_kv(emitter, "muzzy_npurge", emitter_type_uint64,
-	    &muzzy_npurge);
-	emitter_json_kv(emitter, "muzzy_nmadvise", emitter_type_uint64,
-	    &muzzy_nmadvise);
-	emitter_json_kv(emitter, "muzzy_purged", emitter_type_uint64,
-	    &muzzy_purged);
+	emitter_json_kv(
+	    emitter, "muzzy_npurge", emitter_type_uint64, &muzzy_npurge);
+	emitter_json_kv(
+	    emitter, "muzzy_nmadvise", emitter_type_uint64, &muzzy_nmadvise);
+	emitter_json_kv(
+	    emitter, "muzzy_purged", emitter_type_uint64, &muzzy_purged);
 
 	/* Table-style emission. */
 	COL(decay_row, decay_type, right, 9, title);
@@ -1374,12 +1367,12 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	col_count_nfills_ps.type = emitter_type_uint64;
 	col_count_nflushes_ps.type = emitter_type_uint64;
 
-#define GET_AND_EMIT_ALLOC_STAT(small_or_large, name, valtype)		\
-	CTL_M2_GET("stats.arenas.0." #small_or_large "." #name, i,	\
-	    &small_or_large##_##name, valtype##_t);			\
-	emitter_json_kv(emitter, #name, emitter_type_##valtype,		\
-	    &small_or_large##_##name);					\
-	col_count_##name.type = emitter_type_##valtype;		\
+#define GET_AND_EMIT_ALLOC_STAT(small_or_large, name, valtype)                 \
+	CTL_M2_GET("stats.arenas.0." #small_or_large "." #name, i,             \
+	    &small_or_large##_##name, valtype##_t);                            \
+	emitter_json_kv(                                                       \
+	    emitter, #name, emitter_type_##valtype, &small_or_large##_##name); \
+	col_count_##name.type = emitter_type_##valtype;                        \
 	col_count_##name.valtype##_val = small_or_large##_##name;
 
 	emitter_json_object_kv_begin(emitter, "small");
@@ -1387,20 +1380,20 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 
 	GET_AND_EMIT_ALLOC_STAT(small, allocated, size)
 	GET_AND_EMIT_ALLOC_STAT(small, nmalloc, uint64)
-	col_count_nmalloc_ps.uint64_val =
-	    rate_per_second(col_count_nmalloc.uint64_val, uptime);
+	col_count_nmalloc_ps.uint64_val = rate_per_second(
+	    col_count_nmalloc.uint64_val, uptime);
 	GET_AND_EMIT_ALLOC_STAT(small, ndalloc, uint64)
-	col_count_ndalloc_ps.uint64_val =
-	    rate_per_second(col_count_ndalloc.uint64_val, uptime);
+	col_count_ndalloc_ps.uint64_val = rate_per_second(
+	    col_count_ndalloc.uint64_val, uptime);
 	GET_AND_EMIT_ALLOC_STAT(small, nrequests, uint64)
-	col_count_nrequests_ps.uint64_val =
-	    rate_per_second(col_count_nrequests.uint64_val, uptime);
+	col_count_nrequests_ps.uint64_val = rate_per_second(
+	    col_count_nrequests.uint64_val, uptime);
 	GET_AND_EMIT_ALLOC_STAT(small, nfills, uint64)
-	col_count_nfills_ps.uint64_val =
-	    rate_per_second(col_count_nfills.uint64_val, uptime);
+	col_count_nfills_ps.uint64_val = rate_per_second(
+	    col_count_nfills.uint64_val, uptime);
 	GET_AND_EMIT_ALLOC_STAT(small, nflushes, uint64)
-	col_count_nflushes_ps.uint64_val =
-	    rate_per_second(col_count_nflushes.uint64_val, uptime);
+	col_count_nflushes_ps.uint64_val = rate_per_second(
+	    col_count_nflushes.uint64_val, uptime);
 
 	emitter_table_row(emitter, &alloc_count_row);
 	emitter_json_object_end(emitter); /* Close "small". */
@@ -1410,20 +1403,20 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 
 	GET_AND_EMIT_ALLOC_STAT(large, allocated, size)
 	GET_AND_EMIT_ALLOC_STAT(large, nmalloc, uint64)
-	col_count_nmalloc_ps.uint64_val =
-	    rate_per_second(col_count_nmalloc.uint64_val, uptime);
+	col_count_nmalloc_ps.uint64_val = rate_per_second(
+	    col_count_nmalloc.uint64_val, uptime);
 	GET_AND_EMIT_ALLOC_STAT(large, ndalloc, uint64)
-	col_count_ndalloc_ps.uint64_val =
-	    rate_per_second(col_count_ndalloc.uint64_val, uptime);
+	col_count_ndalloc_ps.uint64_val = rate_per_second(
+	    col_count_ndalloc.uint64_val, uptime);
 	GET_AND_EMIT_ALLOC_STAT(large, nrequests, uint64)
-	col_count_nrequests_ps.uint64_val =
-	    rate_per_second(col_count_nrequests.uint64_val, uptime);
+	col_count_nrequests_ps.uint64_val = rate_per_second(
+	    col_count_nrequests.uint64_val, uptime);
 	GET_AND_EMIT_ALLOC_STAT(large, nfills, uint64)
-	col_count_nfills_ps.uint64_val =
-	    rate_per_second(col_count_nfills.uint64_val, uptime);
+	col_count_nfills_ps.uint64_val = rate_per_second(
+	    col_count_nfills.uint64_val, uptime);
 	GET_AND_EMIT_ALLOC_STAT(large, nflushes, uint64)
-	col_count_nflushes_ps.uint64_val =
-	    rate_per_second(col_count_nflushes.uint64_val, uptime);
+	col_count_nflushes_ps.uint64_val = rate_per_second(
+	    col_count_nflushes.uint64_val, uptime);
 
 	emitter_table_row(emitter, &alloc_count_row);
 	emitter_json_object_end(emitter); /* Close "large". */
@@ -1438,16 +1431,16 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	col_count_nrequests.uint64_val = small_nrequests + large_nrequests;
 	col_count_nfills.uint64_val = small_nfills + large_nfills;
 	col_count_nflushes.uint64_val = small_nflushes + large_nflushes;
-	col_count_nmalloc_ps.uint64_val =
-	    rate_per_second(col_count_nmalloc.uint64_val, uptime);
-	col_count_ndalloc_ps.uint64_val =
-	    rate_per_second(col_count_ndalloc.uint64_val, uptime);
-	col_count_nrequests_ps.uint64_val =
-	    rate_per_second(col_count_nrequests.uint64_val, uptime);
-	col_count_nfills_ps.uint64_val =
-	    rate_per_second(col_count_nfills.uint64_val, uptime);
-	col_count_nflushes_ps.uint64_val =
-	    rate_per_second(col_count_nflushes.uint64_val, uptime);
+	col_count_nmalloc_ps.uint64_val = rate_per_second(
+	    col_count_nmalloc.uint64_val, uptime);
+	col_count_ndalloc_ps.uint64_val = rate_per_second(
+	    col_count_ndalloc.uint64_val, uptime);
+	col_count_nrequests_ps.uint64_val = rate_per_second(
+	    col_count_nrequests.uint64_val, uptime);
+	col_count_nfills_ps.uint64_val = rate_per_second(
+	    col_count_nfills.uint64_val, uptime);
+	col_count_nflushes_ps.uint64_val = rate_per_second(
+	    col_count_nflushes.uint64_val, uptime);
 	emitter_table_row(emitter, &alloc_count_row);
 
 	emitter_row_t mem_count_row;
@@ -1475,11 +1468,11 @@ stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large,
 	mem_count_val.size_val = pactive * page;
 	emitter_table_row(emitter, &mem_count_row);
 
-#define GET_AND_EMIT_MEM_STAT(stat)					\
-	CTL_M2_GET("stats.arenas.0."#stat, i, &stat, size_t);		\
-	emitter_json_kv(emitter, #stat, emitter_type_size, &stat);	\
-	mem_count_title.str_val = #stat":";				\
-	mem_count_val.size_val = stat;					\
+#define GET_AND_EMIT_MEM_STAT(stat)                                            \
+	CTL_M2_GET("stats.arenas.0." #stat, i, &stat, size_t);                 \
+	emitter_json_kv(emitter, #stat, emitter_type_size, &stat);             \
+	mem_count_title.str_val = #stat ":";                                   \
+	mem_count_val.size_val = stat;                                         \
 	emitter_table_row(emitter, &mem_count_row);
 
 	GET_AND_EMIT_MEM_STAT(mapped)
@@ -1517,13 +1510,13 @@ JEMALLOC_COLD
 static void
 stats_general_print(emitter_t *emitter) {
 	const char *cpv;
-	bool bv, bv2;
-	unsigned uv;
-	uint32_t u32v;
-	uint64_t u64v;
-	int64_t i64v;
-	ssize_t ssv, ssv2;
-	size_t sv, bsz, usz, u32sz, u64sz, i64sz, ssz, sssz, cpsz;
+	bool        bv, bv2;
+	unsigned    uv;
+	uint32_t    u32v;
+	uint64_t    u64v;
+	int64_t     i64v;
+	ssize_t     ssv, ssv2;
+	size_t      sv, bsz, usz, u32sz, u64sz, i64sz, ssz, sssz, cpsz;
 
 	bsz = sizeof(bool);
 	usz = sizeof(unsigned);
@@ -1539,11 +1532,11 @@ stats_general_print(emitter_t *emitter) {
 
 	/* config. */
 	emitter_dict_begin(emitter, "config", "Build-time option settings");
-#define CONFIG_WRITE_BOOL(name)						\
-	do {								\
-		CTL_GET("config."#name, &bv, bool);			\
-		emitter_kv(emitter, #name, "config."#name,		\
-		    emitter_type_bool, &bv);				\
+#define CONFIG_WRITE_BOOL(name)                                                \
+	do {                                                                   \
+		CTL_GET("config." #name, &bv, bool);                           \
+		emitter_kv(                                                    \
+		    emitter, #name, "config." #name, emitter_type_bool, &bv);  \
 	} while (0)
 
 	CONFIG_WRITE_BOOL(cache_oblivious);
@@ -1565,45 +1558,33 @@ stats_general_print(emitter_t *emitter) {
 	emitter_dict_end(emitter); /* Close "config" dict. */
 
 	/* opt. */
-#define OPT_WRITE(name, var, size, emitter_type)			\
-	if (je_mallctl("opt."name, (void *)&var, &size, NULL, 0) ==	\
-	    0) {							\
-		emitter_kv(emitter, name, "opt."name, emitter_type,	\
-		    &var);						\
+#define OPT_WRITE(name, var, size, emitter_type)                               \
+	if (je_mallctl("opt." name, (void *)&var, &size, NULL, 0) == 0) {      \
+		emitter_kv(emitter, name, "opt." name, emitter_type, &var);    \
 	}
 
-#define OPT_WRITE_MUTABLE(name, var1, var2, size, emitter_type,		\
-    altname)								\
-	if (je_mallctl("opt."name, (void *)&var1, &size, NULL, 0) ==	\
-	    0 && je_mallctl(altname, (void *)&var2, &size, NULL, 0)	\
-	    == 0) {							\
-		emitter_kv_note(emitter, name, "opt."name,		\
-		    emitter_type, &var1, altname, emitter_type,		\
-		    &var2);						\
+#define OPT_WRITE_MUTABLE(name, var1, var2, size, emitter_type, altname)       \
+	if (je_mallctl("opt." name, (void *)&var1, &size, NULL, 0) == 0        \
+	    && je_mallctl(altname, (void *)&var2, &size, NULL, 0) == 0) {      \
+		emitter_kv_note(emitter, name, "opt." name, emitter_type,      \
+		    &var1, altname, emitter_type, &var2);                      \
 	}
 
 #define OPT_WRITE_BOOL(name) OPT_WRITE(name, bv, bsz, emitter_type_bool)
-#define OPT_WRITE_BOOL_MUTABLE(name, altname)				\
+#define OPT_WRITE_BOOL_MUTABLE(name, altname)                                  \
 	OPT_WRITE_MUTABLE(name, bv, bv2, bsz, emitter_type_bool, altname)
 
-#define OPT_WRITE_UNSIGNED(name)					\
-	OPT_WRITE(name, uv, usz, emitter_type_unsigned)
+#define OPT_WRITE_UNSIGNED(name) OPT_WRITE(name, uv, usz, emitter_type_unsigned)
 
-#define OPT_WRITE_INT64(name)						\
-	OPT_WRITE(name, i64v, i64sz, emitter_type_int64)
-#define OPT_WRITE_UINT64(name)						\
-	OPT_WRITE(name, u64v, u64sz, emitter_type_uint64)
+#define OPT_WRITE_INT64(name) OPT_WRITE(name, i64v, i64sz, emitter_type_int64)
+#define OPT_WRITE_UINT64(name) OPT_WRITE(name, u64v, u64sz, emitter_type_uint64)
 
-#define OPT_WRITE_SIZE_T(name)						\
-	OPT_WRITE(name, sv, ssz, emitter_type_size)
-#define OPT_WRITE_SSIZE_T(name)						\
-	OPT_WRITE(name, ssv, sssz, emitter_type_ssize)
-#define OPT_WRITE_SSIZE_T_MUTABLE(name, altname)			\
-	OPT_WRITE_MUTABLE(name, ssv, ssv2, sssz, emitter_type_ssize,	\
-	    altname)
+#define OPT_WRITE_SIZE_T(name) OPT_WRITE(name, sv, ssz, emitter_type_size)
+#define OPT_WRITE_SSIZE_T(name) OPT_WRITE(name, ssv, sssz, emitter_type_ssize)
+#define OPT_WRITE_SSIZE_T_MUTABLE(name, altname)                               \
+	OPT_WRITE_MUTABLE(name, ssv, ssv2, sssz, emitter_type_ssize, altname)
 
-#define OPT_WRITE_CHAR_P(name)						\
-	OPT_WRITE(name, cpv, cpsz, emitter_type_string)
+#define OPT_WRITE_CHAR_P(name) OPT_WRITE(name, cpv, cpsz, emitter_type_string)
 
 	emitter_dict_begin(emitter, "opt", "Run-time option settings");
 
@@ -1623,21 +1604,24 @@ stats_general_print(emitter_t *emitter) {
 	 * Note: The outputs are strictly ordered by priorities (low -> high).
 	 *
 	 */
-#define MALLOC_CONF_WRITE(name, message)					\
-	if (je_mallctl("opt.malloc_conf."name, (void *)&cpv, &cpsz, NULL, 0) !=	\
-	    0) {								\
-		cpv = "";							\
-	}									\
-	emitter_kv(emitter, name, message, emitter_type_string,	&cpv);
+#define MALLOC_CONF_WRITE(name, message)                                       \
+	if (je_mallctl("opt.malloc_conf." name, (void *)&cpv, &cpsz, NULL, 0)  \
+	    != 0) {                                                            \
+		cpv = "";                                                      \
+	}                                                                      \
+	emitter_kv(emitter, name, message, emitter_type_string, &cpv);
 
 	MALLOC_CONF_WRITE("global_var", "Global variable malloc_conf");
 	MALLOC_CONF_WRITE("symlink", "Symbolic link malloc.conf");
 	MALLOC_CONF_WRITE("env_var", "Environment variable MALLOC_CONF");
 	/* As this config is unofficial, skip the output if it's NULL */
-	if (je_mallctl("opt.malloc_conf.global_var_2_conf_harder",
-	    (void *)&cpv, &cpsz, NULL, 0) == 0) {
-		emitter_kv(emitter, "global_var_2_conf_harder", "Global "
-		    "variable malloc_conf_2_conf_harder", emitter_type_string, &cpv);
+	if (je_mallctl("opt.malloc_conf.global_var_2_conf_harder", (void *)&cpv,
+	        &cpsz, NULL, 0)
+	    == 0) {
+		emitter_kv(emitter, "global_var_2_conf_harder",
+		    "Global "
+		    "variable malloc_conf_2_conf_harder",
+		    emitter_type_string, &cpv);
 	}
 #undef MALLOC_CONF_WRITE
 
@@ -1712,8 +1696,8 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_UNSIGNED("prof_bt_max")
 	OPT_WRITE_CHAR_P("prof_prefix")
 	OPT_WRITE_BOOL_MUTABLE("prof_active", "prof.active")
-	OPT_WRITE_BOOL_MUTABLE("prof_thread_active_init",
-	    "prof.thread_active_init")
+	OPT_WRITE_BOOL_MUTABLE(
+	    "prof_thread_active_init", "prof.thread_active_init")
 	OPT_WRITE_SSIZE_T_MUTABLE("lg_prof_sample", "prof.lg_sample")
 	OPT_WRITE_BOOL("prof_accum")
 	OPT_WRITE_SSIZE_T("lg_prof_interval")
@@ -1751,12 +1735,12 @@ stats_general_print(emitter_t *emitter) {
 		    "prof.thread_active_init", emitter_type_bool, &bv);
 
 		CTL_GET("prof.active", &bv, bool);
-		emitter_kv(emitter, "active", "prof.active", emitter_type_bool,
-		    &bv);
+		emitter_kv(
+		    emitter, "active", "prof.active", emitter_type_bool, &bv);
 
 		CTL_GET("prof.gdump", &bv, bool);
-		emitter_kv(emitter, "gdump", "prof.gdump", emitter_type_bool,
-		    &bv);
+		emitter_kv(
+		    emitter, "gdump", "prof.gdump", emitter_type_bool, &bv);
 
 		CTL_GET("prof.interval", &u64v, uint64_t);
 		emitter_kv(emitter, "interval", "prof.interval",
@@ -1796,8 +1780,8 @@ stats_general_print(emitter_t *emitter) {
 	emitter_kv(emitter, "page", "Page size", emitter_type_size, &sv);
 
 	CTL_GET("arenas.hugepage", &sv, size_t);
-	emitter_kv(emitter, "hugepage", "Hugepage size", emitter_type_size,
-	    &sv);
+	emitter_kv(
+	    emitter, "hugepage", "Hugepage size", emitter_type_size, &sv);
 
 	if (je_mallctl("arenas.tcache_max", (void *)&sv, &ssz, NULL, 0) == 0) {
 		emitter_kv(emitter, "tcache_max",
@@ -1827,20 +1811,20 @@ stats_general_print(emitter_t *emitter) {
 			emitter_json_object_begin(emitter);
 
 			CTL_LEAF(arenas_bin_mib, 3, "size", &sv, size_t);
-			emitter_json_kv(emitter, "size", emitter_type_size,
-			    &sv);
+			emitter_json_kv(
+			    emitter, "size", emitter_type_size, &sv);
 
 			CTL_LEAF(arenas_bin_mib, 3, "nregs", &u32v, uint32_t);
-			emitter_json_kv(emitter, "nregs", emitter_type_uint32,
-			    &u32v);
+			emitter_json_kv(
+			    emitter, "nregs", emitter_type_uint32, &u32v);
 
 			CTL_LEAF(arenas_bin_mib, 3, "slab_size", &sv, size_t);
-			emitter_json_kv(emitter, "slab_size", emitter_type_size,
-			    &sv);
+			emitter_json_kv(
+			    emitter, "slab_size", emitter_type_size, &sv);
 
 			CTL_LEAF(arenas_bin_mib, 3, "nshards", &u32v, uint32_t);
-			emitter_json_kv(emitter, "nshards", emitter_type_uint32,
-			    &u32v);
+			emitter_json_kv(
+			    emitter, "nshards", emitter_type_uint32, &u32v);
 
 			emitter_json_object_end(emitter);
 		}
@@ -1861,8 +1845,8 @@ stats_general_print(emitter_t *emitter) {
 			emitter_json_object_begin(emitter);
 
 			CTL_LEAF(arenas_lextent_mib, 3, "size", &sv, size_t);
-			emitter_json_kv(emitter, "size", emitter_type_size,
-			    &sv);
+			emitter_json_kv(
+			    emitter, "size", emitter_type_size, &sv);
 
 			emitter_json_object_end(emitter);
 		}
@@ -1882,8 +1866,8 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 	 */
 	size_t allocated, active, metadata, metadata_edata, metadata_rtree,
 	    metadata_thp, resident, mapped, retained;
-	size_t num_background_threads;
-	size_t zero_reallocs;
+	size_t   num_background_threads;
+	size_t   zero_reallocs;
 	uint64_t background_thread_num_runs, background_thread_run_interval;
 
 	CTL_GET("stats.allocated", &allocated, size_t);
@@ -1916,23 +1900,24 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 	emitter_json_kv(emitter, "allocated", emitter_type_size, &allocated);
 	emitter_json_kv(emitter, "active", emitter_type_size, &active);
 	emitter_json_kv(emitter, "metadata", emitter_type_size, &metadata);
-	emitter_json_kv(emitter, "metadata_edata", emitter_type_size,
-	    &metadata_edata);
-	emitter_json_kv(emitter, "metadata_rtree", emitter_type_size,
-	    &metadata_rtree);
-	emitter_json_kv(emitter, "metadata_thp", emitter_type_size,
-	    &metadata_thp);
+	emitter_json_kv(
+	    emitter, "metadata_edata", emitter_type_size, &metadata_edata);
+	emitter_json_kv(
+	    emitter, "metadata_rtree", emitter_type_size, &metadata_rtree);
+	emitter_json_kv(
+	    emitter, "metadata_thp", emitter_type_size, &metadata_thp);
 	emitter_json_kv(emitter, "resident", emitter_type_size, &resident);
 	emitter_json_kv(emitter, "mapped", emitter_type_size, &mapped);
 	emitter_json_kv(emitter, "retained", emitter_type_size, &retained);
-	emitter_json_kv(emitter, "zero_reallocs", emitter_type_size,
-	    &zero_reallocs);
+	emitter_json_kv(
+	    emitter, "zero_reallocs", emitter_type_size, &zero_reallocs);
 
-	emitter_table_printf(emitter, "Allocated: %zu, active: %zu, "
+	emitter_table_printf(emitter,
+	    "Allocated: %zu, active: %zu, "
 	    "metadata: %zu (n_thp %zu, edata %zu, rtree %zu), resident: %zu, "
-	    "mapped: %zu, retained: %zu\n", allocated, active, metadata,
-		metadata_thp, metadata_edata, metadata_rtree, resident, mapped,
-	    retained);
+	    "mapped: %zu, retained: %zu\n",
+	    allocated, active, metadata, metadata_thp, metadata_edata,
+	    metadata_rtree, resident, mapped, retained);
 
 	/* Strange behaviors */
 	emitter_table_printf(emitter,
@@ -1940,16 +1925,17 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 
 	/* Background thread stats. */
 	emitter_json_object_kv_begin(emitter, "background_thread");
-	emitter_json_kv(emitter, "num_threads", emitter_type_size,
-	    &num_background_threads);
+	emitter_json_kv(
+	    emitter, "num_threads", emitter_type_size, &num_background_threads);
 	emitter_json_kv(emitter, "num_runs", emitter_type_uint64,
 	    &background_thread_num_runs);
 	emitter_json_kv(emitter, "run_interval", emitter_type_uint64,
 	    &background_thread_run_interval);
 	emitter_json_object_end(emitter); /* Close "background_thread". */
 
-	emitter_table_printf(emitter, "Background threads: %zu, "
-	    "num_runs: %"FMTu64", run_interval: %"FMTu64" ns\n",
+	emitter_table_printf(emitter,
+	    "Background threads: %zu, "
+	    "num_runs: %" FMTu64 ", run_interval: %" FMTu64 " ns\n",
 	    num_background_threads, background_thread_num_runs,
 	    background_thread_run_interval);
 
@@ -1958,7 +1944,7 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 		emitter_col_t name;
 		emitter_col_t col64[mutex_prof_num_uint64_t_counters];
 		emitter_col_t col32[mutex_prof_num_uint32_t_counters];
-		uint64_t uptime;
+		uint64_t      uptime;
 
 		emitter_row_init(&row);
 		mutex_stats_init_cols(&row, "", &name, col64, col32);
@@ -1973,7 +1959,8 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 		for (int i = 0; i < mutex_prof_num_global_mutexes; i++) {
 			mutex_stats_read_global(stats_mutexes_mib, 2,
 			    global_mutex_names[i], &name, col64, col32, uptime);
-			emitter_json_object_kv_begin(emitter, global_mutex_names[i]);
+			emitter_json_object_kv_begin(
+			    emitter, global_mutex_names[i]);
 			mutex_stats_emit(emitter, &row, col64, col32);
 			emitter_json_object_end(emitter);
 		}
@@ -1993,23 +1980,23 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 		size_t miblen = sizeof(mib) / sizeof(size_t);
 		size_t sz;
 		VARIABLE_ARRAY_UNSAFE(bool, initialized, narenas);
-		bool destroyed_initialized;
+		bool     destroyed_initialized;
 		unsigned i, ninitialized;
 
 		xmallctlnametomib("arena.0.initialized", mib, &miblen);
 		for (i = ninitialized = 0; i < narenas; i++) {
 			mib[1] = i;
 			sz = sizeof(bool);
-			xmallctlbymib(mib, miblen, &initialized[i], &sz,
-			    NULL, 0);
+			xmallctlbymib(
+			    mib, miblen, &initialized[i], &sz, NULL, 0);
 			if (initialized[i]) {
 				ninitialized++;
 			}
 		}
 		mib[1] = MALLCTL_ARENAS_DESTROYED;
 		sz = sizeof(bool);
-		xmallctlbymib(mib, miblen, &destroyed_initialized, &sz,
-		    NULL, 0);
+		xmallctlbymib(
+		    mib, miblen, &destroyed_initialized, &sz, NULL, 0);
 
 		/* Merged stats. */
 		if (merged && (ninitialized > 1 || !unmerged)) {
@@ -2024,12 +2011,13 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 		/* Destroyed stats. */
 		if (destroyed_initialized && destroyed) {
 			/* Print destroyed arena stats. */
-			emitter_table_printf(emitter,
-			    "Destroyed arenas stats:\n");
+			emitter_table_printf(
+			    emitter, "Destroyed arenas stats:\n");
 			emitter_json_object_kv_begin(emitter, "destroyed");
 			stats_arena_print(emitter, MALLCTL_ARENAS_DESTROYED,
 			    bins, large, mutex, extents, hpa);
-			emitter_json_object_end(emitter); /* Close "destroyed". */
+			emitter_json_object_end(
+			    emitter); /* Close "destroyed". */
 		}
 
 		/* Unmerged stats. */
@@ -2039,8 +2027,8 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 					char arena_ind_str[20];
 					malloc_snprintf(arena_ind_str,
 					    sizeof(arena_ind_str), "%u", i);
-					emitter_json_object_kv_begin(emitter,
-					    arena_ind_str);
+					emitter_json_object_kv_begin(
+					    emitter, arena_ind_str);
 					emitter_table_printf(emitter,
 					    "arenas[%s]:\n", arena_ind_str);
 					stats_arena_print(emitter, i, bins,
@@ -2056,9 +2044,9 @@ stats_print_helper(emitter_t *emitter, bool merged, bool destroyed,
 
 void
 stats_print(write_cb_t *write_cb, void *cbopaque, const char *opts) {
-	int err;
+	int      err;
 	uint64_t epoch;
-	size_t u64sz;
+	size_t   u64sz;
 #define OPTION(o, v, d, s) bool v = d;
 	STATS_PRINT_OPTIONS
 #undef OPTION
@@ -2072,15 +2060,17 @@ stats_print(write_cb_t *write_cb, void *cbopaque, const char *opts) {
 	 * */
 	epoch = 1;
 	u64sz = sizeof(uint64_t);
-	err = je_mallctl("epoch", (void *)&epoch, &u64sz, (void *)&epoch,
-	    sizeof(uint64_t));
+	err = je_mallctl(
+	    "epoch", (void *)&epoch, &u64sz, (void *)&epoch, sizeof(uint64_t));
 	if (err != 0) {
 		if (err == EAGAIN) {
-			malloc_write("<jemalloc>: Memory allocation failure in "
+			malloc_write(
+			    "<jemalloc>: Memory allocation failure in "
 			    "mallctl(\"epoch\", ...)\n");
 			return;
 		}
-		malloc_write("<jemalloc>: Failure in mallctl(\"epoch\", "
+		malloc_write(
+		    "<jemalloc>: Failure in mallctl(\"epoch\", "
 		    "...)\n");
 		abort();
 	}
@@ -2088,7 +2078,10 @@ stats_print(write_cb_t *write_cb, void *cbopaque, const char *opts) {
 	if (opts != NULL) {
 		for (unsigned i = 0; opts[i] != '\0'; i++) {
 			switch (opts[i]) {
-#define OPTION(o, v, d, s) case o: v = s; break;
+#define OPTION(o, v, d, s)                                                     \
+	case o:                                                                \
+		v = s;                                                         \
+		break;
 				STATS_PRINT_OPTIONS
 #undef OPTION
 			default:;
@@ -2098,8 +2091,8 @@ stats_print(write_cb_t *write_cb, void *cbopaque, const char *opts) {
 
 	emitter_t emitter;
 	emitter_init(&emitter,
-	    json ? emitter_output_json_compact : emitter_output_table,
-	    write_cb, cbopaque);
+	    json ? emitter_output_json_compact : emitter_output_table, write_cb,
+	    cbopaque);
 	emitter_begin(&emitter);
 	emitter_table_printf(&emitter, "___ Begin jemalloc statistics ___\n");
 	emitter_json_object_kv_begin(&emitter, "jemalloc");
@@ -2108,8 +2101,8 @@ stats_print(write_cb_t *write_cb, void *cbopaque, const char *opts) {
 		stats_general_print(&emitter);
 	}
 	if (config_stats) {
-		stats_print_helper(&emitter, merged, destroyed, unmerged,
-		    bins, large, mutex, extents, hpa);
+		stats_print_helper(&emitter, merged, destroyed, unmerged, bins,
+		    large, mutex, extents, hpa);
 	}
 
 	emitter_json_object_end(&emitter); /* Closes the "jemalloc" dict. */
@@ -2135,8 +2128,8 @@ stats_interval_event_handler(tsd_t *tsd) {
 	uint64_t elapsed = last_event - last_sample_event;
 
 	assert(elapsed > 0 && elapsed != TE_INVALID_ELAPSED);
-	if (counter_accum(tsd_tsdn(tsd), &stats_interval_accumulated,
-	    elapsed)) {
+	if (counter_accum(
+	        tsd_tsdn(tsd), &stats_interval_accumulated, elapsed)) {
 		je_malloc_stats_print(NULL, NULL, opt_stats_interval_opts);
 	}
 }
@@ -2147,10 +2140,10 @@ stats_interval_enabled(void) {
 }
 
 te_base_cb_t stats_interval_te_handler = {
-	.enabled = &stats_interval_enabled,
-	.new_event_wait = &stats_interval_new_event_wait,
-	.postponed_event_wait = &stats_interval_postponed_event_wait,
-	.event_handler = &stats_interval_event_handler,
+    .enabled = &stats_interval_enabled,
+    .new_event_wait = &stats_interval_new_event_wait,
+    .postponed_event_wait = &stats_interval_postponed_event_wait,
+    .event_handler = &stats_interval_event_handler,
 };
 
 bool
@@ -2160,12 +2153,12 @@ stats_boot(void) {
 		assert(opt_stats_interval == -1);
 		stats_interval = 0;
 		stats_interval_accum_batch = 0;
-	} else{
+	} else {
 		/* See comments in stats.h */
-		stats_interval = (opt_stats_interval > 0) ?
-		    opt_stats_interval : 1;
-		uint64_t batch = stats_interval >>
-		    STATS_INTERVAL_ACCUM_LG_BATCH_SIZE;
+		stats_interval = (opt_stats_interval > 0) ? opt_stats_interval
+		                                          : 1;
+		uint64_t batch = stats_interval
+		    >> STATS_INTERVAL_ACCUM_LG_BATCH_SIZE;
 		if (batch > STATS_INTERVAL_ACCUM_BATCH_MAX) {
 			batch = STATS_INTERVAL_ACCUM_BATCH_MAX;
 		} else if (batch == 0) {
diff --git a/src/sz.c b/src/sz.c
index 89def9d5..4a4c057d 100644
--- a/src/sz.c
+++ b/src/sz.c
@@ -3,12 +3,12 @@
 #include "jemalloc/internal/sz.h"
 
 JEMALLOC_ALIGNED(CACHELINE)
-size_t sz_pind2sz_tab[SC_NPSIZES+1];
+size_t sz_pind2sz_tab[SC_NPSIZES + 1];
 size_t sz_large_pad;
 
 size_t
 sz_psz_quantize_floor(size_t size) {
-	size_t ret;
+	size_t   ret;
 	pszind_t pind;
 
 	assert(size > 0);
@@ -47,8 +47,8 @@ sz_psz_quantize_ceil(size_t size) {
 		 * search would potentially find sufficiently aligned available
 		 * memory somewhere lower.
 		 */
-		ret = sz_pind2sz(sz_psz2ind(ret - sz_large_pad + 1)) +
-		    sz_large_pad;
+		ret = sz_pind2sz(sz_psz2ind(ret - sz_large_pad + 1))
+		    + sz_large_pad;
 	}
 	return ret;
 }
@@ -93,12 +93,12 @@ sz_boot_size2index_tab(const sc_data_t *sc_data) {
 	size_t dst_max = (SC_LOOKUP_MAXCLASS >> SC_LG_TINY_MIN) + 1;
 	size_t dst_ind = 0;
 	for (unsigned sc_ind = 0; sc_ind < SC_NSIZES && dst_ind < dst_max;
-	    sc_ind++) {
+	     sc_ind++) {
 		const sc_t *sc = &sc_data->sc[sc_ind];
-		size_t sz = (ZU(1) << sc->lg_base)
+		size_t      sz = (ZU(1) << sc->lg_base)
 		    + (ZU(sc->ndelta) << sc->lg_delta);
 		size_t max_ind = ((sz + (ZU(1) << SC_LG_TINY_MIN) - 1)
-				   >> SC_LG_TINY_MIN);
+		    >> SC_LG_TINY_MIN);
 		for (; dst_ind <= max_ind && dst_ind < dst_max; dst_ind++) {
 			assert(sc_ind < 1 << (sizeof(uint8_t) * 8));
 			sz_size2index_tab[dst_ind] = (uint8_t)sc_ind;
diff --git a/src/tcache.c b/src/tcache.c
index 0154403d..44a96841 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -29,7 +29,7 @@ unsigned opt_tcache_nslots_large = 20;
  * This is bounded by some other constraints as well, like the fact that it
  * must be even, must be less than opt_tcache_nslots_small_max, etc..
  */
-ssize_t	opt_lg_tcache_nslots_mul = 1;
+ssize_t opt_lg_tcache_nslots_mul = 1;
 
 /*
  * Number of allocation bytes between tcache incremental GCs.  Again, this
@@ -63,13 +63,13 @@ unsigned opt_lg_tcache_flush_large_div = 1;
  * is only used to initialize tcache_nbins in the per-thread tcache.
  * Directly modifying it will not affect threads already launched.
  */
-unsigned		global_do_not_change_tcache_nbins;
+unsigned global_do_not_change_tcache_nbins;
 /*
  * Max size class to be cached (can be small or large). This value is only used
  * to initialize tcache_max in the per-thread tcache.   Directly modifying it
  * will not affect threads already launched.
  */
-size_t			global_do_not_change_tcache_maxclass;
+size_t global_do_not_change_tcache_maxclass;
 
 /*
  * Default bin info for each bin.  Will be initialized in malloc_conf_init
@@ -83,16 +83,16 @@ static cache_bin_info_t opt_tcache_ncached_max[TCACHE_NBINS_MAX] = {{0}};
  */
 static bool opt_tcache_ncached_max_set[TCACHE_NBINS_MAX] = {0};
 
-tcaches_t		*tcaches;
+tcaches_t *tcaches;
 
 /* Index of first element within tcaches that has never been used. */
-static unsigned		tcaches_past;
+static unsigned tcaches_past;
 
 /* Head of singly linked list tracking available tcaches elements. */
-static tcaches_t	*tcaches_avail;
+static tcaches_t *tcaches_avail;
 
 /* Protects tcaches{,_past,_avail}. */
-static malloc_mutex_t	tcaches_mtx;
+static malloc_mutex_t tcaches_mtx;
 
 /******************************************************************************/
 
@@ -180,8 +180,8 @@ tcache_nfill_small_burst_reset(tcache_slow_t *tcache_slow, szind_t szind) {
  * count should be decreased, i.e. lg_div(base) should be increased.
  */
 static inline void
-tcache_nfill_small_gc_update(tcache_slow_t *tcache_slow, szind_t szind,
-    cache_bin_sz_t limit) {
+tcache_nfill_small_gc_update(
+    tcache_slow_t *tcache_slow, szind_t szind, cache_bin_sz_t limit) {
 	cache_bin_fill_ctl_t *ctl = tcache_bin_fill_ctl_get(tcache_slow, szind);
 	if (!limit && ctl->base > 1) {
 		/*
@@ -214,16 +214,17 @@ tcache_gc_item_delay_compute(szind_t szind) {
 }
 
 static inline void *
-tcache_gc_small_heuristic_addr_get(tsd_t *tsd, tcache_slow_t *tcache_slow,
-    szind_t szind) {
+tcache_gc_small_heuristic_addr_get(
+    tsd_t *tsd, tcache_slow_t *tcache_slow, szind_t szind) {
 	assert(szind < SC_NBINS);
 	tsdn_t *tsdn = tsd_tsdn(tsd);
-	bin_t *bin = arena_bin_choose(tsdn, tcache_slow->arena, szind, NULL);
+	bin_t  *bin = arena_bin_choose(tsdn, tcache_slow->arena, szind, NULL);
 	assert(bin != NULL);
 
 	malloc_mutex_lock(tsdn, &bin->lock);
-	edata_t *slab = (bin->slabcur == NULL) ?
-	    edata_heap_first(&bin->slabs_nonfull) : bin->slabcur;
+	edata_t *slab = (bin->slabcur == NULL)
+	    ? edata_heap_first(&bin->slabs_nonfull)
+	    : bin->slabcur;
 	assert(slab != NULL || edata_heap_empty(&bin->slabs_nonfull));
 	void *ret = (slab != NULL) ? edata_addr_get(slab) : NULL;
 	assert(ret != NULL || slab == NULL);
@@ -250,21 +251,23 @@ tcache_gc_small_nremote_get(cache_bin_t *cache_bin, void *addr,
 	 * starting from 2M, so that the total number of disjoint virtual
 	 * memory ranges retained by each shard is limited.
 	 */
-	uintptr_t neighbor_min = ((uintptr_t)addr > TCACHE_GC_NEIGHBOR_LIMIT) ?
-	    ((uintptr_t)addr - TCACHE_GC_NEIGHBOR_LIMIT) : 0;
-	uintptr_t neighbor_max = ((uintptr_t)addr < (UINTPTR_MAX -
-	    TCACHE_GC_NEIGHBOR_LIMIT)) ? ((uintptr_t)addr +
-	    TCACHE_GC_NEIGHBOR_LIMIT) : UINTPTR_MAX;
+	uintptr_t neighbor_min = ((uintptr_t)addr > TCACHE_GC_NEIGHBOR_LIMIT)
+	    ? ((uintptr_t)addr - TCACHE_GC_NEIGHBOR_LIMIT)
+	    : 0;
+	uintptr_t neighbor_max = ((uintptr_t)addr
+	                             < (UINTPTR_MAX - TCACHE_GC_NEIGHBOR_LIMIT))
+	    ? ((uintptr_t)addr + TCACHE_GC_NEIGHBOR_LIMIT)
+	    : UINTPTR_MAX;
 
 	/* Scan the entire bin to count the number of remote pointers. */
-	void **head = cache_bin->stack_head;
+	void         **head = cache_bin->stack_head;
 	cache_bin_sz_t n_remote_slab = 0, n_remote_neighbor = 0;
 	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin);
 	for (void **cur = head; cur < head + ncached; cur++) {
-		n_remote_slab += (cache_bin_sz_t)tcache_gc_is_addr_remote(*cur,
-		    slab_min, slab_max);
-		n_remote_neighbor += (cache_bin_sz_t)tcache_gc_is_addr_remote(*cur,
-		    neighbor_min, neighbor_max);
+		n_remote_slab += (cache_bin_sz_t)tcache_gc_is_addr_remote(
+		    *cur, slab_min, slab_max);
+		n_remote_neighbor += (cache_bin_sz_t)tcache_gc_is_addr_remote(
+		    *cur, neighbor_min, neighbor_max);
 	}
 	/*
 	 * Note: since slab size is dynamic and can be larger than 2M, i.e.
@@ -295,8 +298,8 @@ tcache_gc_small_nremote_get(cache_bin_t *cache_bin, void *addr,
 /* Shuffle the ptrs in the bin to put the remote pointers at the bottom. */
 static inline void
 tcache_gc_small_bin_shuffle(cache_bin_t *cache_bin, cache_bin_sz_t nremote,
-   uintptr_t addr_min, uintptr_t addr_max) {
-	void **swap = NULL;
+    uintptr_t addr_min, uintptr_t addr_max) {
+	void         **swap = NULL;
 	cache_bin_sz_t ncached = cache_bin_ncached_get_local(cache_bin);
 	cache_bin_sz_t ntop = ncached - nremote, cnt = 0;
 	assert(ntop > 0 && ntop < ncached);
@@ -320,13 +323,15 @@ tcache_gc_small_bin_shuffle(cache_bin_t *cache_bin, cache_bin_sz_t nremote,
 			 */
 			if (swap != NULL) {
 				assert(swap < cur);
-				assert(tcache_gc_is_addr_remote(*swap, addr_min, addr_max));
+				assert(tcache_gc_is_addr_remote(
+				    *swap, addr_min, addr_max));
 				void *tmp = *cur;
 				*cur = *swap;
 				*swap = tmp;
 				swap++;
 				assert(swap <= cur);
-				assert(tcache_gc_is_addr_remote(*swap, addr_min, addr_max));
+				assert(tcache_gc_is_addr_remote(
+				    *swap, addr_min, addr_max));
 			}
 			continue;
 		} else if (swap == NULL) {
@@ -344,8 +349,8 @@ tcache_gc_small_bin_shuffle(cache_bin_t *cache_bin, cache_bin_sz_t nremote,
 			break;
 		}
 		if (!tcache_gc_is_addr_remote(*cur, addr_min, addr_max)) {
-			assert(tcache_gc_is_addr_remote(*(head + cnt), addr_min,
-			    addr_max));
+			assert(tcache_gc_is_addr_remote(
+			    *(head + cnt), addr_min, addr_max));
 			void *tmp = *cur;
 			*cur = *(head + cnt);
 			*(head + cnt) = tmp;
@@ -356,15 +361,17 @@ tcache_gc_small_bin_shuffle(cache_bin_t *cache_bin, cache_bin_sz_t nremote,
 	/* Sanity check to make sure the shuffle is done correctly. */
 	for (void **cur = head; cur < head + ncached; cur++) {
 		assert(*cur != NULL);
-		assert(((cur < head + ntop) && !tcache_gc_is_addr_remote(
-		    *cur, addr_min, addr_max)) || ((cur >= head + ntop) &&
-		    tcache_gc_is_addr_remote(*cur, addr_min, addr_max)));
+		assert(
+		    ((cur < head + ntop)
+		        && !tcache_gc_is_addr_remote(*cur, addr_min, addr_max))
+		    || ((cur >= head + ntop)
+		        && tcache_gc_is_addr_remote(*cur, addr_min, addr_max)));
 	}
 }
 
 static bool
-tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
-    szind_t szind) {
+tcache_gc_small(
+    tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache, szind_t szind) {
 	/*
 	 * Aim to flush 3/4 of items below low-water, with remote pointers being
 	 * prioritized for flushing.
@@ -403,24 +410,26 @@ tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 		if (nflush < tcache_slow->bin_flush_delay_items[szind]) {
 			/* Workaround for a conversion warning. */
 			uint8_t nflush_uint8 = (uint8_t)nflush;
-			assert(sizeof(tcache_slow->bin_flush_delay_items[0]) ==
-			    sizeof(nflush_uint8));
-			tcache_slow->bin_flush_delay_items[szind] -= nflush_uint8;
+			assert(sizeof(tcache_slow->bin_flush_delay_items[0])
+			    == sizeof(nflush_uint8));
+			tcache_slow->bin_flush_delay_items[szind] -=
+			    nflush_uint8;
 			return false;
 		}
 
-		tcache_slow->bin_flush_delay_items[szind]
-		    = tcache_gc_item_delay_compute(szind);
+		tcache_slow->bin_flush_delay_items[szind] =
+		    tcache_gc_item_delay_compute(szind);
 		goto label_flush;
 	}
 
 	/* Directly goto the flush path when the entire bin needs to be flushed. */
-	if ( nflush == ncached) {
+	if (nflush == ncached) {
 		goto label_flush;
 	}
 
 	/* Query arena binshard to get heuristic locality info. */
-	void *addr = tcache_gc_small_heuristic_addr_get(tsd, tcache_slow, szind);
+	void *addr = tcache_gc_small_heuristic_addr_get(
+	    tsd, tcache_slow, szind);
 	if (addr == NULL) {
 		goto label_flush;
 	}
@@ -429,9 +438,9 @@ tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	 * Use the queried addr above to get the number of remote ptrs in the
 	 * bin, and the min/max of the local addr range.
 	 */
-	uintptr_t addr_min, addr_max;
-	cache_bin_sz_t nremote = tcache_gc_small_nremote_get(cache_bin, addr,
-	    &addr_min, &addr_max, szind, nflush);
+	uintptr_t      addr_min, addr_max;
+	cache_bin_sz_t nremote = tcache_gc_small_nremote_get(
+	    cache_bin, addr, &addr_min, &addr_max, szind, nflush);
 
 	/*
 	 * Update the nflush to the larger value between the intended flush count
@@ -448,7 +457,7 @@ tcache_gc_small(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	 * also be flushed.
 	 */
 	assert(nflush < ncached || nremote == ncached);
-	if (nremote == 0 || nremote == ncached)	{
+	if (nremote == 0 || nremote == ncached) {
 		goto label_flush;
 	}
 
@@ -467,14 +476,14 @@ label_flush:
 		return false;
 	}
 	assert(nflush <= ncached);
-	tcache_bin_flush_small(tsd, tcache, cache_bin, szind,
-	    (unsigned)(ncached - nflush));
+	tcache_bin_flush_small(
+	    tsd, tcache, cache_bin, szind, (unsigned)(ncached - nflush));
 	return true;
 }
 
 static bool
-tcache_gc_large(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
-    szind_t szind) {
+tcache_gc_large(
+    tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache, szind_t szind) {
 	/*
 	 * Like the small GC, flush 3/4 of untouched items. However, simply flush
 	 * the bottom nflush items, without any locality check.
@@ -486,16 +495,16 @@ tcache_gc_large(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	if (low_water == 0) {
 		return false;
 	}
-	unsigned nrem = (unsigned)(cache_bin_ncached_get_local(cache_bin) -
-	    low_water + (low_water >> 2));
+	unsigned nrem = (unsigned)(cache_bin_ncached_get_local(cache_bin)
+	    - low_water + (low_water >> 2));
 	tcache_bin_flush_large(tsd, tcache, cache_bin, szind, nrem);
 	return true;
 }
 
 /* Try to gc one bin by szind, return true if there is item flushed. */
 static bool
-tcache_try_gc_bin(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
-    szind_t szind) {
+tcache_try_gc_bin(
+    tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache, szind_t szind) {
 	assert(tcache != NULL);
 	cache_bin_t *cache_bin = &tcache->bins[szind];
 	if (tcache_bin_disabled(szind, cache_bin, tcache_slow)) {
@@ -504,8 +513,8 @@ tcache_try_gc_bin(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 
 	bool is_small = (szind < SC_NBINS);
 	tcache_bin_flush_stashed(tsd, tcache, cache_bin, szind, is_small);
-	bool ret = is_small ? tcache_gc_small(tsd, tcache_slow, tcache, szind) :
-	    tcache_gc_large(tsd, tcache_slow, tcache, szind);
+	bool ret = is_small ? tcache_gc_small(tsd, tcache_slow, tcache, szind)
+	                    : tcache_gc_large(tsd, tcache_slow, tcache, szind);
 	cache_bin_low_water_set(cache_bin);
 	return ret;
 }
@@ -536,8 +545,8 @@ tcache_gc_event(tsd_t *tsd) {
 	nstime_update(&now);
 	assert(nstime_compare(&now, &tcache_slow->last_gc_time) >= 0);
 
-	if (nstime_ns(&now) - nstime_ns(&tcache_slow->last_gc_time) <
-	    TCACHE_GC_INTERVAL_NS) {
+	if (nstime_ns(&now) - nstime_ns(&tcache_slow->last_gc_time)
+	    < TCACHE_GC_INTERVAL_NS) {
 		// time interval is too short, skip this event.
 		return;
 	}
@@ -546,13 +555,15 @@ tcache_gc_event(tsd_t *tsd) {
 
 	unsigned gc_small_nbins = 0, gc_large_nbins = 0;
 	unsigned tcache_nbins = tcache_nbins_get(tcache_slow);
-	unsigned small_nbins = tcache_nbins > SC_NBINS ? SC_NBINS : tcache_nbins;
-	szind_t szind_small = tcache_slow->next_gc_bin_small;
-	szind_t szind_large = tcache_slow->next_gc_bin_large;
+	unsigned small_nbins = tcache_nbins > SC_NBINS ? SC_NBINS
+	                                               : tcache_nbins;
+	szind_t  szind_small = tcache_slow->next_gc_bin_small;
+	szind_t  szind_large = tcache_slow->next_gc_bin_large;
 
 	/* Flush at most TCACHE_GC_SMALL_NBINS_MAX small bins at a time. */
-	for (unsigned i = 0; i < small_nbins && gc_small_nbins <
-	    TCACHE_GC_SMALL_NBINS_MAX; i++) {
+	for (unsigned i = 0;
+	     i < small_nbins && gc_small_nbins < TCACHE_GC_SMALL_NBINS_MAX;
+	     i++) {
 		assert(szind_small < SC_NBINS);
 		if (tcache_try_gc_bin(tsd, tcache_slow, tcache, szind_small)) {
 			gc_small_nbins++;
@@ -568,8 +579,9 @@ tcache_gc_event(tsd_t *tsd) {
 	}
 
 	/* Flush at most TCACHE_GC_LARGE_NBINS_MAX large bins at a time. */
-	for (unsigned i = SC_NBINS; i < tcache_nbins && gc_large_nbins <
-	    TCACHE_GC_LARGE_NBINS_MAX; i++) {
+	for (unsigned i = SC_NBINS;
+	     i < tcache_nbins && gc_large_nbins < TCACHE_GC_LARGE_NBINS_MAX;
+	     i++) {
 		assert(szind_large >= SC_NBINS && szind_large < tcache_nbins);
 		if (tcache_try_gc_bin(tsd, tcache_slow, tcache, szind_large)) {
 			gc_large_nbins++;
@@ -582,11 +594,10 @@ tcache_gc_event(tsd_t *tsd) {
 }
 
 void *
-tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
-    tcache_t *tcache, cache_bin_t *cache_bin, szind_t binind,
-    bool *tcache_success) {
+tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
+    cache_bin_t *cache_bin, szind_t binind, bool *tcache_success) {
 	tcache_slow_t *tcache_slow = tcache->tcache_slow;
-	void *ret;
+	void          *ret;
 
 	assert(tcache_slow->arena != NULL);
 	assert(!tcache_bin_disabled(binind, cache_bin, tcache_slow));
@@ -596,8 +607,9 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena,
 		nfill = 1;
 	}
 	arena_cache_bin_fill_small(tsdn, arena, cache_bin, binind,
-	    /* nfill_min */ opt_experimental_tcache_gc ?
-	    ((nfill >> 1) + 1) : nfill, /* nfill_max */ nfill);
+	    /* nfill_min */
+	        opt_experimental_tcache_gc ? ((nfill >> 1) + 1) : nfill,
+	    /* nfill_max */ nfill);
 	tcache_slow->bin_refilled[binind] = true;
 	tcache_nfill_small_burst_prepare(tcache_slow, binind);
 	ret = cache_bin_alloc(cache_bin, tcache_success);
@@ -612,8 +624,8 @@ tcache_bin_flush_ptr_getter(void *arr_ctx, size_t ind) {
 }
 
 static void
-tcache_bin_flush_metadata_visitor(void *szind_sum_ctx,
-    emap_full_alloc_ctx_t *alloc_ctx) {
+tcache_bin_flush_metadata_visitor(
+    void *szind_sum_ctx, emap_full_alloc_ctx_t *alloc_ctx) {
 	size_t *szind_sum = (size_t *)szind_sum_ctx;
 	*szind_sum -= alloc_ctx->szind;
 	util_prefetch_write_range(alloc_ctx->edata, sizeof(edata_t));
@@ -640,7 +652,6 @@ tcache_bin_flush_size_check_fail(cache_bin_ptr_array_t *arr, szind_t szind,
 static void
 tcache_bin_flush_edatas_lookup(tsd_t *tsd, cache_bin_ptr_array_t *arr,
     szind_t binind, size_t nflush, emap_batch_lookup_result_t *edatas) {
-
 	/*
 	 * This gets compiled away when config_opt_safety_checks is false.
 	 * Checks for sized deallocation bugs, failing early rather than
@@ -649,16 +660,16 @@ tcache_bin_flush_edatas_lookup(tsd_t *tsd, cache_bin_ptr_array_t *arr,
 	size_t szind_sum = binind * nflush;
 	emap_edata_lookup_batch(tsd, &arena_emap_global, nflush,
 	    &tcache_bin_flush_ptr_getter, (void *)arr,
-	    &tcache_bin_flush_metadata_visitor, (void *)&szind_sum,
-	    edatas);
+	    &tcache_bin_flush_metadata_visitor, (void *)&szind_sum, edatas);
 	if (config_opt_safety_checks && unlikely(szind_sum != 0)) {
 		tcache_bin_flush_size_check_fail(arr, binind, nflush, edatas);
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
-tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
-    szind_t binind, cache_bin_ptr_array_t *ptrs, unsigned nflush) {
+tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache,
+    cache_bin_t *cache_bin, szind_t binind, cache_bin_ptr_array_t *ptrs,
+    unsigned nflush) {
 	tcache_slow_t *tcache_slow = tcache->tcache_slow;
 	/*
 	 * A couple lookup calls take tsdn; declare it once for convenience
@@ -669,7 +680,8 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 	assert(binind < SC_NBINS);
 	arena_t *tcache_arena = tcache_slow->arena;
 	assert(tcache_arena != NULL);
-	unsigned tcache_binshard = tsd_binshardsp_get(tsdn_tsd(tsdn))->binshard[binind];
+	unsigned tcache_binshard =
+	    tsd_binshardsp_get(tsdn_tsd(tsdn))->binshard[binind];
 
 	/*
 	 * Variable length array must have > 0 length; the last element is never
@@ -727,8 +739,7 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 		arena_t *cur_arena = arena_get(tsdn, cur_arena_ind, false);
 
 		unsigned cur_binshard = edata_binshard_get(cur_edata);
-		bin_t *cur_bin = arena_get_bin(cur_arena, binind,
-		    cur_binshard);
+		bin_t *cur_bin = arena_get_bin(cur_arena, binind, cur_binshard);
 		assert(cur_binshard < bin_infos[binind].n_shards);
 
 		/*
@@ -737,16 +748,18 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 		 */
 		flush_start++;
 		for (unsigned i = flush_start; i < nflush; i++) {
-			void *ptr = ptrs->ptr[i];
+			void    *ptr = ptrs->ptr[i];
 			edata_t *edata = item_edata[i].edata;
 			assert(ptr != NULL && edata != NULL);
-			assert((uintptr_t)ptr >= (uintptr_t)edata_addr_get(edata));
-			assert((uintptr_t)ptr < (uintptr_t)edata_past_get(edata));
+			assert(
+			    (uintptr_t)ptr >= (uintptr_t)edata_addr_get(edata));
+			assert(
+			    (uintptr_t)ptr < (uintptr_t)edata_past_get(edata));
 			if (edata_arena_ind_get(edata) == cur_arena_ind
 			    && edata_binshard_get(edata) == cur_binshard) {
 				/* Swap the edatas. */
-				emap_batch_lookup_result_t temp_edata
-				    = item_edata[flush_start];
+				emap_batch_lookup_result_t temp_edata =
+				    item_edata[flush_start];
 				item_edata[flush_start] = item_edata[i];
 				item_edata[i] = temp_edata;
 				/* Swap the pointers */
@@ -759,7 +772,7 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 		/* Make sure we implemented partitioning correctly. */
 		if (config_debug) {
 			for (unsigned i = prev_flush_start; i < flush_start;
-			    i++) {
+			     i++) {
 				edata_t *edata = item_edata[i].edata;
 				unsigned arena_ind = edata_arena_ind_get(edata);
 				assert(arena_ind == cur_arena_ind);
@@ -768,10 +781,10 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 			}
 			for (unsigned i = flush_start; i < nflush; i++) {
 				edata_t *edata = item_edata[i].edata;
-				assert(edata_arena_ind_get(edata)
-				    != cur_arena_ind
+				assert(
+				    edata_arena_ind_get(edata) != cur_arena_ind
 				    || edata_binshard_get(edata)
-				    != cur_binshard);
+				        != cur_binshard);
 			}
 		}
 
@@ -817,7 +830,7 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 		bool home_binshard = (cur_arena == tcache_arena
 		    && cur_binshard == tcache_binshard);
 		bool can_batch = (flush_start - prev_flush_start
-		    <= opt_bin_info_remote_free_max_batch)
+		                     <= opt_bin_info_remote_free_max_batch)
 		    && !home_binshard && bin_is_batched;
 
 		/*
@@ -831,8 +844,8 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 			locked = !malloc_mutex_trylock(tsdn, &cur_bin->lock);
 		}
 		if (can_batch && !locked) {
-			bin_with_batch_t *batched_bin =
-			    (bin_with_batch_t *)cur_bin;
+			bin_with_batch_t *batched_bin = (bin_with_batch_t *)
+			    cur_bin;
 			size_t push_idx = batcher_push_begin(tsdn,
 			    &batched_bin->remote_frees,
 			    flush_start - prev_flush_start);
@@ -840,19 +853,19 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 
 			if (push_idx != BATCHER_NO_IDX) {
 				batched = true;
-				unsigned nbatched
-				    = flush_start - prev_flush_start;
+				unsigned nbatched = flush_start
+				    - prev_flush_start;
 				for (unsigned i = 0; i < nbatched; i++) {
 					unsigned src_ind = prev_flush_start + i;
-					batched_bin->remote_free_data[
-					    push_idx + i].ptr
-						= ptrs->ptr[src_ind];
-					batched_bin->remote_free_data[
-					    push_idx + i].slab
-						= item_edata[src_ind].edata;
+					batched_bin
+					    ->remote_free_data[push_idx + i]
+					    .ptr = ptrs->ptr[src_ind];
+					batched_bin
+					    ->remote_free_data[push_idx + i]
+					    .slab = item_edata[src_ind].edata;
 				}
-				batcher_push_end(tsdn,
-				    &batched_bin->remote_frees);
+				batcher_push_end(
+				    tsdn, &batched_bin->remote_frees);
 			} else {
 				batch_failed = true;
 			}
@@ -887,16 +900,17 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 				cache_bin->tstats.nrequests = 0;
 			}
 			unsigned preallocated_slabs = nflush;
-			unsigned ndalloc_slabs = arena_bin_batch_get_ndalloc_slabs(
-			    preallocated_slabs);
+			unsigned ndalloc_slabs =
+			    arena_bin_batch_get_ndalloc_slabs(
+			        preallocated_slabs);
 
 			/* Next flush objects our own objects. */
 			/* Init only to avoid used-uninitialized warning. */
 			arena_dalloc_bin_locked_info_t dalloc_bin_info = {0};
 			arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind);
 			for (unsigned i = prev_flush_start; i < flush_start;
-			    i++) {
-				void *ptr = ptrs->ptr[i];
+			     i++) {
+				void    *ptr = ptrs->ptr[i];
 				edata_t *edata = item_edata[i].edata;
 				arena_dalloc_bin_locked_step(tsdn, cur_arena,
 				    cur_bin, &dalloc_bin_info, binind, edata,
@@ -910,16 +924,16 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 			if (bin_is_batched) {
 				arena_bin_flush_batch_impl(tsdn, cur_arena,
 				    cur_bin, &dalloc_bin_info, binind,
-				    dalloc_slabs, ndalloc_slabs,
-				    &dalloc_count, &dalloc_slabs_extra);
+				    dalloc_slabs, ndalloc_slabs, &dalloc_count,
+				    &dalloc_slabs_extra);
 			}
 
-			arena_dalloc_bin_locked_finish(tsdn, cur_arena, cur_bin,
-			    &dalloc_bin_info);
+			arena_dalloc_bin_locked_finish(
+			    tsdn, cur_arena, cur_bin, &dalloc_bin_info);
 			malloc_mutex_unlock(tsdn, &cur_bin->lock);
 		}
-		arena_decay_ticks(tsdn, cur_arena,
-		    flush_start - prev_flush_start);
+		arena_decay_ticks(
+		    tsdn, cur_arena, flush_start - prev_flush_start);
 	}
 
 	/* Handle all deferred slab dalloc. */
@@ -934,24 +948,24 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 	}
 
 	if (config_stats && !merged_stats) {
-			/*
+		/*
 			 * The flush loop didn't happen to flush to this
 			 * thread's arena, so the stats didn't get merged.
 			 * Manually do so now.
 			 */
-			bin_t *bin = arena_bin_choose(tsdn, tcache_arena,
-			    binind, NULL);
-			malloc_mutex_lock(tsdn, &bin->lock);
-			bin->stats.nflushes++;
-			bin->stats.nrequests += cache_bin->tstats.nrequests;
-			cache_bin->tstats.nrequests = 0;
-			malloc_mutex_unlock(tsdn, &bin->lock);
+		bin_t *bin = arena_bin_choose(tsdn, tcache_arena, binind, NULL);
+		malloc_mutex_lock(tsdn, &bin->lock);
+		bin->stats.nflushes++;
+		bin->stats.nrequests += cache_bin->tstats.nrequests;
+		cache_bin->tstats.nrequests = 0;
+		malloc_mutex_unlock(tsdn, &bin->lock);
 	}
 }
 
 JEMALLOC_ALWAYS_INLINE void
-tcache_bin_flush_impl_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
-    szind_t binind, cache_bin_ptr_array_t *ptrs, unsigned nflush) {
+tcache_bin_flush_impl_large(tsd_t *tsd, tcache_t *tcache,
+    cache_bin_t *cache_bin, szind_t binind, cache_bin_ptr_array_t *ptrs,
+    unsigned nflush) {
 	tcache_slow_t *tcache_slow = tcache->tcache_slow;
 	/*
 	 * A couple lookup calls take tsdn; declare it once for convenience
@@ -1009,8 +1023,7 @@ tcache_bin_flush_impl_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 			assert(ptr != NULL && edata != NULL);
 
 			if (edata_arena_ind_get(edata) == cur_arena_ind) {
-				large_dalloc_prep_locked(tsdn,
-				    edata);
+				large_dalloc_prep_locked(tsdn, edata);
 			}
 		}
 		if (!arena_is_auto(cur_arena)) {
@@ -1035,8 +1048,8 @@ tcache_bin_flush_impl_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 				ndeferred++;
 				continue;
 			}
-			if (large_dalloc_safety_checks(edata, ptr,
-			    sz_index2size(binind))) {
+			if (large_dalloc_safety_checks(
+			        edata, ptr, sz_index2size(binind))) {
 				/* See the comment in isfree. */
 				continue;
 			}
@@ -1048,8 +1061,7 @@ tcache_bin_flush_impl_large(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin
 
 	if (config_stats && !merged_stats) {
 		arena_stats_large_flush_nrequests_add(tsdn,
-		    &tcache_arena->stats, binind,
-		    cache_bin->tstats.nrequests);
+		    &tcache_arena->stats, binind, cache_bin->tstats.nrequests);
 		cache_bin->tstats.nrequests = 0;
 	}
 }
@@ -1058,7 +1070,7 @@ JEMALLOC_ALWAYS_INLINE void
 tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
     szind_t binind, cache_bin_ptr_array_t *ptrs, unsigned nflush, bool small) {
 	assert(ptrs != NULL && ptrs->ptr != NULL);
-	unsigned nflush_batch, nflushed = 0;
+	unsigned              nflush_batch, nflushed = 0;
 	cache_bin_ptr_array_t ptrs_batch;
 	do {
 		nflush_batch = nflush - nflushed;
@@ -1078,11 +1090,11 @@ tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 		 * '...' is morally equivalent, the code itself needs slight tweaks.
 		 */
 		if (small) {
-			tcache_bin_flush_impl_small(tsd, tcache, cache_bin, binind,
-			    &ptrs_batch, nflush_batch);
+			tcache_bin_flush_impl_small(tsd, tcache, cache_bin,
+			    binind, &ptrs_batch, nflush_batch);
 		} else {
-			tcache_bin_flush_impl_large(tsd, tcache, cache_bin, binind,
-			    &ptrs_batch, nflush_batch);
+			tcache_bin_flush_impl_large(tsd, tcache, cache_bin,
+			    binind, &ptrs_batch, nflush_batch);
 		}
 		nflushed += nflush_batch;
 	} while (nflushed < nflush);
@@ -1117,8 +1129,8 @@ tcache_bin_flush_bottom(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nflush);
 	cache_bin_init_ptr_array_for_flush(cache_bin, &ptrs, nflush);
 
-	tcache_bin_flush_impl(tsd, tcache, cache_bin, binind, &ptrs, nflush,
-	    small);
+	tcache_bin_flush_impl(
+	    tsd, tcache, cache_bin, binind, &ptrs, nflush, small);
 
 	cache_bin_finish_flush(cache_bin, &ptrs, nflush);
 }
@@ -1157,7 +1169,7 @@ tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	 * items remain unchanged -- the stashed items reside on the other end
 	 * of the stack.  Checking the stack head and ncached to verify.
 	 */
-	void *head_content = *cache_bin->stack_head;
+	void          *head_content = *cache_bin->stack_head;
 	cache_bin_sz_t orig_cached = cache_bin_ncached_get_local(cache_bin);
 
 	cache_bin_sz_t nstashed = cache_bin_nstashed_get_local(cache_bin);
@@ -1167,11 +1179,11 @@ tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	}
 
 	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nstashed);
-	cache_bin_init_ptr_array_for_stashed(cache_bin, binind, &ptrs,
-	    nstashed);
+	cache_bin_init_ptr_array_for_stashed(
+	    cache_bin, binind, &ptrs, nstashed);
 	san_check_stashed_ptrs(ptrs.ptr, nstashed, sz_index2size(binind));
-	tcache_bin_flush_impl(tsd, tcache, cache_bin, binind, &ptrs, nstashed,
-	    is_small);
+	tcache_bin_flush_impl(
+	    tsd, tcache, cache_bin, binind, &ptrs, nstashed, is_small);
 	cache_bin_finish_flush_stashed(cache_bin);
 
 	assert(cache_bin_nstashed_get_local(cache_bin) == 0);
@@ -1190,8 +1202,8 @@ tcache_get_default_ncached_max(void) {
 }
 
 bool
-tcache_bin_ncached_max_read(tsd_t *tsd, size_t bin_size,
-    cache_bin_sz_t *ncached_max) {
+tcache_bin_ncached_max_read(
+    tsd_t *tsd, size_t bin_size, cache_bin_sz_t *ncached_max) {
 	if (bin_size > TCACHE_MAXCLASS_LIMIT) {
 		return true;
 	}
@@ -1206,8 +1218,9 @@ tcache_bin_ncached_max_read(tsd_t *tsd, size_t bin_size,
 	szind_t bin_ind = sz_size2index(bin_size);
 
 	cache_bin_t *bin = &tcache->bins[bin_ind];
-	*ncached_max = tcache_bin_disabled(bin_ind, bin, tcache->tcache_slow) ?
-	    0: cache_bin_ncached_max_get(bin);
+	*ncached_max = tcache_bin_disabled(bin_ind, bin, tcache->tcache_slow)
+	    ? 0
+	    : cache_bin_ncached_max_get(bin);
 	return false;
 }
 
@@ -1233,17 +1246,17 @@ tcache_arena_associate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
 }
 
 static void
-tcache_arena_dissociate(tsdn_t *tsdn, tcache_slow_t *tcache_slow,
-    tcache_t *tcache) {
+tcache_arena_dissociate(
+    tsdn_t *tsdn, tcache_slow_t *tcache_slow, tcache_t *tcache) {
 	arena_t *arena = tcache_slow->arena;
 	assert(arena != NULL);
 	if (config_stats) {
 		/* Unlink from list of extant tcaches. */
 		malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
 		if (config_debug) {
-			bool in_ql = false;
+			bool           in_ql = false;
 			tcache_slow_t *iter;
-			ql_foreach(iter, &arena->tcache_ql, link) {
+			ql_foreach (iter, &arena->tcache_ql, link) {
 				if (iter == tcache_slow) {
 					in_ql = true;
 					break;
@@ -1276,8 +1289,8 @@ tcache_default_settings_init(tcache_slow_t *tcache_slow) {
 }
 
 static void
-tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
-    void *mem, const cache_bin_info_t *tcache_bin_info) {
+tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache, void *mem,
+    const cache_bin_info_t *tcache_bin_info) {
 	tcache->tcache_slow = tcache_slow;
 	tcache_slow->tcache = tcache;
 
@@ -1296,23 +1309,22 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	 * worrying about which ones are disabled.
 	 */
 	unsigned tcache_nbins = tcache_nbins_get(tcache_slow);
-	size_t cur_offset = 0;
-	cache_bin_preincrement(tcache_bin_info, tcache_nbins, mem,
-	    &cur_offset);
+	size_t   cur_offset = 0;
+	cache_bin_preincrement(tcache_bin_info, tcache_nbins, mem, &cur_offset);
 	for (unsigned i = 0; i < tcache_nbins; i++) {
 		if (i < SC_NBINS) {
 			tcache_bin_fill_ctl_init(tcache_slow, i);
 			tcache_slow->bin_refilled[i] = false;
-			tcache_slow->bin_flush_delay_items[i]
-			    = tcache_gc_item_delay_compute(i);
+			tcache_slow->bin_flush_delay_items[i] =
+			    tcache_gc_item_delay_compute(i);
 		}
 		cache_bin_t *cache_bin = &tcache->bins[i];
 		if (tcache_bin_info[i].ncached_max > 0) {
-			cache_bin_init(cache_bin, &tcache_bin_info[i], mem,
-			    &cur_offset);
+			cache_bin_init(
+			    cache_bin, &tcache_bin_info[i], mem, &cur_offset);
 		} else {
-			cache_bin_init_disabled(cache_bin,
-			    tcache_bin_info[i].ncached_max);
+			cache_bin_init_disabled(
+			    cache_bin, tcache_bin_info[i].ncached_max);
 		}
 	}
 	/*
@@ -1323,8 +1335,8 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	 */
 	for (unsigned i = tcache_nbins; i < TCACHE_NBINS_MAX; i++) {
 		cache_bin_t *cache_bin = &tcache->bins[i];
-		cache_bin_init_disabled(cache_bin,
-		    tcache_bin_info[i].ncached_max);
+		cache_bin_init_disabled(
+		    cache_bin, tcache_bin_info[i].ncached_max);
 		assert(tcache_bin_disabled(i, cache_bin, tcache->tcache_slow));
 	}
 
@@ -1332,8 +1344,8 @@ tcache_init(tsd_t *tsd, tcache_slow_t *tcache_slow, tcache_t *tcache,
 	if (config_debug) {
 		/* Sanity check that the whole stack is used. */
 		size_t size, alignment;
-		cache_bin_info_compute_alloc(tcache_bin_info, tcache_nbins,
-		    &size, &alignment);
+		cache_bin_info_compute_alloc(
+		    tcache_bin_info, tcache_nbins, &size, &alignment);
 		assert(cur_offset == size);
 	}
 }
@@ -1402,26 +1414,26 @@ tcache_bin_info_compute(cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
 	 * than tcache_nbins, no items will be cached.
 	 */
 	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
-		unsigned ncached_max = tcache_get_default_ncached_max_set(i) ?
-		    (unsigned)tcache_get_default_ncached_max()[i].ncached_max:
-		    tcache_ncached_max_compute(i);
+		unsigned ncached_max = tcache_get_default_ncached_max_set(i)
+		    ? (unsigned)tcache_get_default_ncached_max()[i].ncached_max
+		    : tcache_ncached_max_compute(i);
 		assert(ncached_max <= CACHE_BIN_NCACHED_MAX);
-		cache_bin_info_init(&tcache_bin_info[i],
-		    (cache_bin_sz_t)ncached_max);
+		cache_bin_info_init(
+		    &tcache_bin_info[i], (cache_bin_sz_t)ncached_max);
 	}
 }
 
 static bool
-tsd_tcache_data_init_impl(tsd_t *tsd, arena_t *arena,
-    const cache_bin_info_t *tcache_bin_info) {
+tsd_tcache_data_init_impl(
+    tsd_t *tsd, arena_t *arena, const cache_bin_info_t *tcache_bin_info) {
 	tcache_slow_t *tcache_slow = tsd_tcache_slowp_get_unsafe(tsd);
-	tcache_t *tcache = tsd_tcachep_get_unsafe(tsd);
+	tcache_t      *tcache = tsd_tcachep_get_unsafe(tsd);
 
 	assert(cache_bin_still_zero_initialized(&tcache->bins[0]));
 	unsigned tcache_nbins = tcache_nbins_get(tcache_slow);
-	size_t size, alignment;
-	cache_bin_info_compute_alloc(tcache_bin_info, tcache_nbins,
-	    &size, &alignment);
+	size_t   size, alignment;
+	cache_bin_info_compute_alloc(
+	    tcache_bin_info, tcache_nbins, &size, &alignment);
 
 	void *mem;
 	if (cache_bin_stack_use_thp()) {
@@ -1450,16 +1462,16 @@ tsd_tcache_data_init_impl(tsd_t *tsd, arena_t *arena,
 	if (!malloc_initialized()) {
 		/* If in initialization, assign to a0. */
 		arena = arena_get(tsd_tsdn(tsd), 0, false);
-		tcache_arena_associate(tsd_tsdn(tsd), tcache_slow, tcache,
-		    arena);
+		tcache_arena_associate(
+		    tsd_tsdn(tsd), tcache_slow, tcache, arena);
 	} else {
 		if (arena == NULL) {
 			arena = arena_choose(tsd, NULL);
 		}
 		/* This may happen if thread.tcache.enabled is used. */
 		if (tcache_slow->arena == NULL) {
-			tcache_arena_associate(tsd_tsdn(tsd), tcache_slow,
-			    tcache, arena);
+			tcache_arena_associate(
+			    tsd_tsdn(tsd), tcache_slow, tcache, arena);
 		}
 	}
 	assert(arena == tcache_slow->arena);
@@ -1484,30 +1496,29 @@ tcache_create_explicit(tsd_t *tsd) {
 	 * the cache bins have the requested alignment.
 	 */
 	unsigned tcache_nbins = global_do_not_change_tcache_nbins;
-	size_t tcache_size, alignment;
+	size_t   tcache_size, alignment;
 	cache_bin_info_compute_alloc(tcache_get_default_ncached_max(),
 	    tcache_nbins, &tcache_size, &alignment);
 
-	size_t size = tcache_size + sizeof(tcache_t)
-	    + sizeof(tcache_slow_t);
+	size_t size = tcache_size + sizeof(tcache_t) + sizeof(tcache_slow_t);
 	/* Naturally align the pointer stacks. */
 	size = PTR_CEILING(size);
 	size = sz_sa2u(size, alignment);
 
-	void *mem = ipallocztm(tsd_tsdn(tsd), size, alignment,
-	    true, NULL, true, arena_get(TSDN_NULL, 0, true));
+	void *mem = ipallocztm(tsd_tsdn(tsd), size, alignment, true, NULL, true,
+	    arena_get(TSDN_NULL, 0, true));
 	if (mem == NULL) {
 		return NULL;
 	}
-	tcache_t *tcache = (void *)((byte_t *)mem + tcache_size);
-	tcache_slow_t *tcache_slow =
-	    (void *)((byte_t *)mem + tcache_size + sizeof(tcache_t));
+	tcache_t      *tcache = (void *)((byte_t *)mem + tcache_size);
+	tcache_slow_t *tcache_slow = (void *)((byte_t *)mem + tcache_size
+	    + sizeof(tcache_t));
 	tcache_default_settings_init(tcache_slow);
-	tcache_init(tsd, tcache_slow, tcache, mem,
-	    tcache_get_default_ncached_max());
+	tcache_init(
+	    tsd, tcache_slow, tcache, mem, tcache_get_default_ncached_max());
 
-	tcache_arena_associate(tsd_tsdn(tsd), tcache_slow, tcache,
-	    arena_ichoose(tsd, NULL));
+	tcache_arena_associate(
+	    tsd_tsdn(tsd), tcache_slow, tcache, arena_ichoose(tsd, NULL));
 
 	return tcache;
 }
@@ -1525,8 +1536,8 @@ tsd_tcache_enabled_data_init(tsd_t *tsd) {
 
 	if (opt_tcache) {
 		/* Trigger tcache init. */
-		tsd_tcache_data_init(tsd, NULL,
-		    tcache_get_default_ncached_max());
+		tsd_tcache_data_init(
+		    tsd, NULL, tcache_get_default_ncached_max());
 	}
 
 	return false;
@@ -1537,8 +1548,8 @@ tcache_enabled_set(tsd_t *tsd, bool enabled) {
 	bool was_enabled = tsd_tcache_enabled_get(tsd);
 
 	if (!was_enabled && enabled) {
-		tsd_tcache_data_init(tsd, NULL,
-		    tcache_get_default_ncached_max());
+		tsd_tcache_data_init(
+		    tsd, NULL, tcache_get_default_ncached_max());
 	} else if (was_enabled && !enabled) {
 		tcache_cleanup(tsd);
 	}
@@ -1551,12 +1562,12 @@ void
 thread_tcache_max_set(tsd_t *tsd, size_t tcache_max) {
 	assert(tcache_max <= TCACHE_MAXCLASS_LIMIT);
 	assert(tcache_max == sz_s2u(tcache_max));
-	tcache_t *tcache = tsd_tcachep_get(tsd);
-	tcache_slow_t *tcache_slow = tcache->tcache_slow;
+	tcache_t        *tcache = tsd_tcachep_get(tsd);
+	tcache_slow_t   *tcache_slow = tcache->tcache_slow;
 	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX] = {{0}};
 	assert(tcache != NULL && tcache_slow != NULL);
 
-	bool enabled = tcache_available(tsd);
+	bool                    enabled = tcache_available(tsd);
 	arena_t *assigned_arena JEMALLOC_CLANG_ANALYZER_SILENCE_INIT(NULL);
 	if (enabled) {
 		assigned_arena = tcache_slow->arena;
@@ -1587,16 +1598,16 @@ tcache_bin_info_settings_parse(const char *bin_settings_segment_cur,
 	do {
 		size_t size_start, size_end;
 		size_t ncached_max;
-		bool err = multi_setting_parse_next(&bin_settings_segment_cur,
-		    &len_left, &size_start, &size_end, &ncached_max);
+		bool   err = multi_setting_parse_next(&bin_settings_segment_cur,
+		      &len_left, &size_start, &size_end, &ncached_max);
 		if (err) {
 			return true;
 		}
 		if (size_end > TCACHE_MAXCLASS_LIMIT) {
 			size_end = TCACHE_MAXCLASS_LIMIT;
 		}
-		if (size_start > TCACHE_MAXCLASS_LIMIT ||
-		    size_start > size_end) {
+		if (size_start > TCACHE_MAXCLASS_LIMIT
+		    || size_start > size_end) {
 			continue;
 		}
 		/* May get called before sz_init (during malloc_conf_init). */
@@ -1606,8 +1617,8 @@ tcache_bin_info_settings_parse(const char *bin_settings_segment_cur,
 			ncached_max = (size_t)CACHE_BIN_NCACHED_MAX;
 		}
 		for (szind_t i = bin_start; i <= bin_end; i++) {
-			cache_bin_info_init(&tcache_bin_info[i],
-			    (cache_bin_sz_t)ncached_max);
+			cache_bin_info_init(
+			    &tcache_bin_info[i], (cache_bin_sz_t)ncached_max);
 			if (bin_info_is_set != NULL) {
 				bin_info_is_set[i] = true;
 			}
@@ -1618,13 +1629,12 @@ tcache_bin_info_settings_parse(const char *bin_settings_segment_cur,
 }
 
 bool
-tcache_bin_info_default_init(const char *bin_settings_segment_cur,
-    size_t len_left) {
+tcache_bin_info_default_init(
+    const char *bin_settings_segment_cur, size_t len_left) {
 	return tcache_bin_info_settings_parse(bin_settings_segment_cur,
 	    len_left, opt_tcache_ncached_max, opt_tcache_ncached_max_set);
 }
 
-
 bool
 tcache_bins_ncached_max_write(tsd_t *tsd, char *settings, size_t len) {
 	assert(tcache_available(tsd));
@@ -1634,15 +1644,14 @@ tcache_bins_ncached_max_write(tsd_t *tsd, char *settings, size_t len) {
 	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX];
 	tcache_bin_settings_backup(tcache, tcache_bin_info);
 
-	if(tcache_bin_info_settings_parse(settings, len, tcache_bin_info,
-	    NULL)) {
+	if (tcache_bin_info_settings_parse(
+	        settings, len, tcache_bin_info, NULL)) {
 		return true;
 	}
 
 	arena_t *assigned_arena = tcache->tcache_slow->arena;
 	tcache_cleanup(tsd);
-	tsd_tcache_data_init(tsd, assigned_arena,
-	    tcache_bin_info);
+	tsd_tcache_data_init(tsd, assigned_arena, tcache_bin_info);
 
 	return false;
 }
@@ -1698,11 +1707,11 @@ tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
 	 * tsd).  Manually trigger decay to avoid pathological cases.  Also
 	 * include arena 0 because the tcache array is allocated from it.
 	 */
-	arena_decay(tsd_tsdn(tsd), arena_get(tsd_tsdn(tsd), 0, false),
-	    false, false);
+	arena_decay(
+	    tsd_tsdn(tsd), arena_get(tsd_tsdn(tsd), 0, false), false, false);
 
-	if (arena_nthreads_get(arena, false) == 0 &&
-	    !background_thread_enabled()) {
+	if (arena_nthreads_get(arena, false) == 0
+	    && !background_thread_enabled()) {
 		/* Force purging when no threads assigned to the arena anymore. */
 		arena_decay(tsd_tsdn(tsd), arena,
 		    /* is_background_thread */ false, /* all */ true);
@@ -1760,7 +1769,7 @@ tcaches_create_prep(tsd_t *tsd, base_t *base) {
 
 	if (tcaches == NULL) {
 		tcaches = base_alloc(tsd_tsdn(tsd), base,
-		    sizeof(tcache_t *) * (MALLOCX_TCACHE_MAX+1), CACHELINE);
+		    sizeof(tcache_t *) * (MALLOCX_TCACHE_MAX + 1), CACHELINE);
 		if (tcaches == NULL) {
 			err = true;
 			goto label_return;
@@ -1851,7 +1860,7 @@ void
 tcaches_destroy(tsd_t *tsd, unsigned ind) {
 	malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
 	tcaches_t *elm = &tcaches[ind];
-	tcache_t *tcache = tcaches_elm_remove(tsd, elm, false);
+	tcache_t  *tcache = tcaches_elm_remove(tsd, elm, false);
 	elm->next = tcaches_avail;
 	tcaches_avail = elm;
 	malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
@@ -1875,7 +1884,7 @@ tcache_boot(tsdn_t *tsdn, base_t *base) {
 	tcache_bin_info_compute(opt_tcache_ncached_max);
 
 	if (malloc_mutex_init(&tcaches_mtx, "tcaches", WITNESS_RANK_TCACHES,
-	    malloc_mutex_rank_exclusive)) {
+	        malloc_mutex_rank_exclusive)) {
 		return true;
 	}
 
@@ -1897,7 +1906,8 @@ tcache_postfork_child(tsdn_t *tsdn) {
 	malloc_mutex_postfork_child(tsdn, &tcaches_mtx);
 }
 
-void tcache_assert_initialized(tcache_t *tcache) {
+void
+tcache_assert_initialized(tcache_t *tcache) {
 	assert(!cache_bin_still_zero_initialized(&tcache->bins[0]));
 }
 
@@ -1908,8 +1918,8 @@ tcache_gc_enabled(void) {
 
 /* Handles alloc and dalloc the same way */
 te_base_cb_t tcache_gc_te_handler = {
-	.enabled = &tcache_gc_enabled,
-	.new_event_wait = &tcache_gc_new_event_wait,
-	.postponed_event_wait = &tcache_gc_postponed_event_wait,
-	.event_handler = &tcache_gc_event,
+    .enabled = &tcache_gc_enabled,
+    .new_event_wait = &tcache_gc_new_event_wait,
+    .postponed_event_wait = &tcache_gc_postponed_event_wait,
+    .event_handler = &tcache_gc_event,
 };
diff --git a/src/thread_event.c b/src/thread_event.c
index 496c16be..c59027ed 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -16,7 +16,8 @@ te_ctx_has_active_events(te_ctx_t *ctx) {
 		}
 	} else {
 		for (int i = 0; i < te_dalloc_count; ++i) {
-			if (te_enabled_yes == te_dalloc_handlers[i]->enabled()) {
+			if (te_enabled_yes
+			    == te_dalloc_handlers[i]->enabled()) {
 				return true;
 			}
 		}
@@ -26,12 +27,11 @@ te_ctx_has_active_events(te_ctx_t *ctx) {
 
 static uint64_t
 te_next_event_compute(tsd_t *tsd, bool is_alloc) {
-	te_base_cb_t **handlers = is_alloc ?
-	    te_alloc_handlers : te_dalloc_handlers;
-	uint64_t *waits = is_alloc ?
-	    tsd_te_datap_get_unsafe(tsd)->alloc_wait :
-	    tsd_te_datap_get_unsafe(tsd)->dalloc_wait;
-	int count = is_alloc ? te_alloc_count : te_dalloc_count;
+	te_base_cb_t **handlers = is_alloc ? te_alloc_handlers
+	                                   : te_dalloc_handlers;
+	uint64_t *waits = is_alloc ? tsd_te_datap_get_unsafe(tsd)->alloc_wait
+	                           : tsd_te_datap_get_unsafe(tsd)->dalloc_wait;
+	int       count = is_alloc ? te_alloc_count : te_dalloc_count;
 
 	uint64_t wait = TE_MAX_START_WAIT;
 
@@ -86,9 +86,9 @@ te_assert_invariants_impl(tsd_t *tsd, te_ctx_t *ctx) {
 	 * below is stronger than needed, but having an exactly accurate guard
 	 * is more complicated to implement.
 	 */
-	assert((!te_ctx_has_active_events(ctx) && last_event == 0U) ||
-	    interval == min_wait ||
-	    (interval < min_wait && interval == TE_MAX_INTERVAL));
+	assert((!te_ctx_has_active_events(ctx) && last_event == 0U)
+	    || interval == min_wait
+	    || (interval < min_wait && interval == TE_MAX_INTERVAL));
 }
 
 void
@@ -151,8 +151,9 @@ te_assert_invariants_debug(tsd_t *tsd) {
 static void
 te_ctx_next_event_fast_update(te_ctx_t *ctx) {
 	uint64_t next_event = te_ctx_next_event_get(ctx);
-	uint64_t next_event_fast = (next_event <= TE_NEXT_EVENT_FAST_MAX) ?
-	    next_event : 0U;
+	uint64_t next_event_fast = (next_event <= TE_NEXT_EVENT_FAST_MAX)
+	    ? next_event
+	    : 0U;
 	te_ctx_next_event_fast_set(ctx, next_event_fast);
 }
 
@@ -177,8 +178,7 @@ te_recompute_fast_threshold(tsd_t *tsd) {
 }
 
 static inline void
-te_adjust_thresholds_impl(tsd_t *tsd, te_ctx_t *ctx,
-    uint64_t wait) {
+te_adjust_thresholds_impl(tsd_t *tsd, te_ctx_t *ctx, uint64_t wait) {
 	/*
 	 * The next threshold based on future events can only be adjusted after
 	 * progressing the last_event counter (which is set to current).
@@ -186,23 +186,22 @@ te_adjust_thresholds_impl(tsd_t *tsd, te_ctx_t *ctx,
 	assert(te_ctx_current_bytes_get(ctx) == te_ctx_last_event_get(ctx));
 	assert(wait <= TE_MAX_START_WAIT);
 
-	uint64_t next_event = te_ctx_last_event_get(ctx) + (wait <=
-	    TE_MAX_INTERVAL ? wait : TE_MAX_INTERVAL);
+	uint64_t next_event = te_ctx_last_event_get(ctx)
+	    + (wait <= TE_MAX_INTERVAL ? wait : TE_MAX_INTERVAL);
 	te_ctx_next_event_set(tsd, ctx, next_event);
 }
 void
-te_adjust_thresholds_helper(tsd_t *tsd, te_ctx_t *ctx,
-    uint64_t wait) {
+te_adjust_thresholds_helper(tsd_t *tsd, te_ctx_t *ctx, uint64_t wait) {
 	te_adjust_thresholds_impl(tsd, ctx, wait);
 }
 
 static void
 te_init_waits(tsd_t *tsd, uint64_t *wait, bool is_alloc) {
-	te_base_cb_t **handlers = is_alloc ? te_alloc_handlers : te_dalloc_handlers;
-	uint64_t *waits = is_alloc ?
-	    tsd_te_datap_get_unsafe(tsd)->alloc_wait :
-	    tsd_te_datap_get_unsafe(tsd)->dalloc_wait;
-	int count = is_alloc ? te_alloc_count : te_dalloc_count;
+	te_base_cb_t **handlers = is_alloc ? te_alloc_handlers
+	                                   : te_dalloc_handlers;
+	uint64_t *waits = is_alloc ? tsd_te_datap_get_unsafe(tsd)->alloc_wait
+	                           : tsd_te_datap_get_unsafe(tsd)->dalloc_wait;
+	int       count = is_alloc ? te_alloc_count : te_dalloc_count;
 	for (int i = 0; i < count; i++) {
 		if (te_enabled_yes == handlers[i]->enabled()) {
 			uint64_t ev_wait = handlers[i]->new_event_wait(tsd);
@@ -216,25 +215,23 @@ te_init_waits(tsd_t *tsd, uint64_t *wait, bool is_alloc) {
 }
 
 static inline bool
-te_update_wait(tsd_t *tsd, uint64_t accumbytes, bool allow,
-	       uint64_t *ev_wait, uint64_t *wait, te_base_cb_t *handler,
-	       uint64_t new_wait) {
+te_update_wait(tsd_t *tsd, uint64_t accumbytes, bool allow, uint64_t *ev_wait,
+    uint64_t *wait, te_base_cb_t *handler, uint64_t new_wait) {
 	bool ret = false;
 	if (*ev_wait > accumbytes) {
-                *ev_wait -= accumbytes;
-        } else if (!allow) {
-                *ev_wait = handler->postponed_event_wait(tsd);
-        } else {
-                ret = true;
-                *ev_wait = new_wait == 0 ?
-		    handler->new_event_wait(tsd) :
-		    new_wait;
-        }
+		*ev_wait -= accumbytes;
+	} else if (!allow) {
+		*ev_wait = handler->postponed_event_wait(tsd);
+	} else {
+		ret = true;
+		*ev_wait = new_wait == 0 ? handler->new_event_wait(tsd)
+		                         : new_wait;
+	}
 
-        assert(*ev_wait > 0);
-        if (*ev_wait < *wait) {
-                *wait = *ev_wait;
-        }
+	assert(*ev_wait > 0);
+	if (*ev_wait < *wait) {
+		*wait = *ev_wait;
+	}
 	return ret;
 }
 
@@ -242,32 +239,32 @@ extern uint64_t stats_interval_accum_batch;
 /* Return number of handlers enqueued into to_trigger array */
 static inline size_t
 te_update_alloc_events(tsd_t *tsd, te_base_cb_t **to_trigger,
-		       uint64_t accumbytes, bool allow, uint64_t *wait) {
+    uint64_t accumbytes, bool allow, uint64_t *wait) {
 	/*
 	 * We do not loop and invoke the functions via interface because
 	 * of the perf cost.  This path is relatively hot, so we sacrifice
 	 * elegance for perf.
 	 */
-	size_t nto_trigger = 0;
+	size_t    nto_trigger = 0;
 	uint64_t *waits = tsd_te_datap_get_unsafe(tsd)->alloc_wait;
 	if (opt_tcache_gc_incr_bytes > 0) {
-		assert(te_enabled_yes ==
-		       te_alloc_handlers[te_alloc_tcache_gc]->enabled());
+		assert(te_enabled_yes
+		    == te_alloc_handlers[te_alloc_tcache_gc]->enabled());
 		if (te_update_wait(tsd, accumbytes, allow,
-				   &waits[te_alloc_tcache_gc], wait,
-				   te_alloc_handlers[te_alloc_tcache_gc],
-				   opt_tcache_gc_incr_bytes)) {
+		        &waits[te_alloc_tcache_gc], wait,
+		        te_alloc_handlers[te_alloc_tcache_gc],
+		        opt_tcache_gc_incr_bytes)) {
 			to_trigger[nto_trigger++] =
 			    te_alloc_handlers[te_alloc_tcache_gc];
 		}
 	}
 #ifdef JEMALLOC_PROF
-        if (opt_prof) {
-		assert(te_enabled_yes ==
-		       te_alloc_handlers[te_alloc_prof_sample]->enabled());
-		if(te_update_wait(tsd, accumbytes, allow,
-				  &waits[te_alloc_prof_sample], wait,
-				  te_alloc_handlers[te_alloc_prof_sample], 0)) {
+	if (opt_prof) {
+		assert(te_enabled_yes
+		    == te_alloc_handlers[te_alloc_prof_sample]->enabled());
+		if (te_update_wait(tsd, accumbytes, allow,
+		        &waits[te_alloc_prof_sample], wait,
+		        te_alloc_handlers[te_alloc_prof_sample], 0)) {
 			to_trigger[nto_trigger++] =
 			    te_alloc_handlers[te_alloc_prof_sample];
 		}
@@ -275,12 +272,12 @@ te_update_alloc_events(tsd_t *tsd, te_base_cb_t **to_trigger,
 #endif
 	if (opt_stats_interval >= 0) {
 		if (te_update_wait(tsd, accumbytes, allow,
-				   &waits[te_alloc_stats_interval],
-				   wait,
-				   te_alloc_handlers[te_alloc_stats_interval],
-				   stats_interval_accum_batch)) {
-			assert(te_enabled_yes ==
-			       te_alloc_handlers[te_alloc_stats_interval]->enabled());
+		        &waits[te_alloc_stats_interval], wait,
+		        te_alloc_handlers[te_alloc_stats_interval],
+		        stats_interval_accum_batch)) {
+			assert(te_enabled_yes
+			    == te_alloc_handlers[te_alloc_stats_interval]
+			           ->enabled());
 			to_trigger[nto_trigger++] =
 			    te_alloc_handlers[te_alloc_stats_interval];
 		}
@@ -288,30 +285,30 @@ te_update_alloc_events(tsd_t *tsd, te_base_cb_t **to_trigger,
 
 #ifdef JEMALLOC_STATS
 	assert(te_enabled_yes == te_alloc_handlers[te_alloc_peak]->enabled());
- 	if(te_update_wait(tsd, accumbytes, allow, &waits[te_alloc_peak], wait,
-			  te_alloc_handlers[te_alloc_peak], PEAK_EVENT_WAIT)) {
+	if (te_update_wait(tsd, accumbytes, allow, &waits[te_alloc_peak], wait,
+	        te_alloc_handlers[te_alloc_peak], PEAK_EVENT_WAIT)) {
 		to_trigger[nto_trigger++] = te_alloc_handlers[te_alloc_peak];
- 	}
+	}
 
-        assert(te_enabled_yes ==
-	       te_alloc_handlers[te_alloc_prof_threshold]->enabled());
-        if(te_update_wait(tsd, accumbytes, allow,
-			  &waits[te_alloc_prof_threshold], wait,
-			  te_alloc_handlers[te_alloc_prof_threshold],
-			  1 << opt_experimental_lg_prof_threshold)) {
+	assert(te_enabled_yes
+	    == te_alloc_handlers[te_alloc_prof_threshold]->enabled());
+	if (te_update_wait(tsd, accumbytes, allow,
+	        &waits[te_alloc_prof_threshold], wait,
+	        te_alloc_handlers[te_alloc_prof_threshold],
+	        1 << opt_experimental_lg_prof_threshold)) {
 		to_trigger[nto_trigger++] =
 		    te_alloc_handlers[te_alloc_prof_threshold];
- 	}
+	}
 #endif
 
 	for (te_alloc_t ue = te_alloc_user0; ue <= te_alloc_user3; ue++) {
-		te_enabled_t status =
-		    te_user_event_enabled(ue - te_alloc_user0, true);
+		te_enabled_t status = te_user_event_enabled(
+		    ue - te_alloc_user0, true);
 		if (status == te_enabled_not_installed) {
 			break;
 		} else if (status == te_enabled_yes) {
 			if (te_update_wait(tsd, accumbytes, allow, &waits[ue],
-					   wait, te_alloc_handlers[ue], 0)) {
+			        wait, te_alloc_handlers[ue], 0)) {
 				to_trigger[nto_trigger++] =
 				    te_alloc_handlers[ue];
 			}
@@ -321,37 +318,36 @@ te_update_alloc_events(tsd_t *tsd, te_base_cb_t **to_trigger,
 }
 
 static inline size_t
-te_update_dalloc_events(tsd_t *tsd, te_base_cb_t **to_trigger, uint64_t accumbytes,
-			bool allow, uint64_t *wait) {
-	size_t nto_trigger = 0;
+te_update_dalloc_events(tsd_t *tsd, te_base_cb_t **to_trigger,
+    uint64_t accumbytes, bool allow, uint64_t *wait) {
+	size_t    nto_trigger = 0;
 	uint64_t *waits = tsd_te_datap_get_unsafe(tsd)->dalloc_wait;
 	if (opt_tcache_gc_incr_bytes > 0) {
-		assert(te_enabled_yes ==
-		       te_dalloc_handlers[te_dalloc_tcache_gc]->enabled());
+		assert(te_enabled_yes
+		    == te_dalloc_handlers[te_dalloc_tcache_gc]->enabled());
 		if (te_update_wait(tsd, accumbytes, allow,
-				   &waits[te_dalloc_tcache_gc], wait,
-				   te_dalloc_handlers[te_dalloc_tcache_gc],
-				   opt_tcache_gc_incr_bytes)) {
+		        &waits[te_dalloc_tcache_gc], wait,
+		        te_dalloc_handlers[te_dalloc_tcache_gc],
+		        opt_tcache_gc_incr_bytes)) {
 			to_trigger[nto_trigger++] =
 			    te_dalloc_handlers[te_dalloc_tcache_gc];
 		}
-        }
+	}
 #ifdef JEMALLOC_STATS
 	assert(te_enabled_yes == te_dalloc_handlers[te_dalloc_peak]->enabled());
-        if(te_update_wait(tsd, accumbytes, allow, &waits[te_dalloc_peak], wait,
-			  te_dalloc_handlers[te_dalloc_peak],
-			  PEAK_EVENT_WAIT)) {
+	if (te_update_wait(tsd, accumbytes, allow, &waits[te_dalloc_peak], wait,
+	        te_dalloc_handlers[te_dalloc_peak], PEAK_EVENT_WAIT)) {
 		to_trigger[nto_trigger++] = te_dalloc_handlers[te_dalloc_peak];
- 	}
+	}
 #endif
 	for (te_dalloc_t ue = te_dalloc_user0; ue <= te_dalloc_user3; ue++) {
-		te_enabled_t status =
-		    te_user_event_enabled(ue - te_dalloc_user0, false);
+		te_enabled_t status = te_user_event_enabled(
+		    ue - te_dalloc_user0, false);
 		if (status == te_enabled_not_installed) {
 			break;
 		} else if (status == te_enabled_yes) {
 			if (te_update_wait(tsd, accumbytes, allow, &waits[ue],
-					   wait, te_dalloc_handlers[ue], 0)) {
+			        wait, te_dalloc_handlers[ue], 0)) {
 				to_trigger[nto_trigger++] =
 				    te_dalloc_handlers[ue];
 			}
@@ -369,26 +365,22 @@ te_event_trigger(tsd_t *tsd, te_ctx_t *ctx) {
 
 	te_ctx_last_event_set(ctx, bytes_after);
 
-	bool allow_event_trigger = tsd_nominal(tsd) &&
-	    tsd_reentrancy_level_get(tsd) == 0;
+	bool allow_event_trigger = tsd_nominal(tsd)
+	    && tsd_reentrancy_level_get(tsd) == 0;
 	uint64_t wait = TE_MAX_START_WAIT;
 
-	assert((int)te_alloc_count >= (int) te_dalloc_count);
+	assert((int)te_alloc_count >= (int)te_dalloc_count);
 	te_base_cb_t *to_trigger[te_alloc_count];
-	size_t nto_trigger;
+	size_t        nto_trigger;
 	if (ctx->is_alloc) {
-		nto_trigger = te_update_alloc_events(tsd, to_trigger,
-						     accumbytes,
-						     allow_event_trigger,
-						     &wait);
+		nto_trigger = te_update_alloc_events(
+		    tsd, to_trigger, accumbytes, allow_event_trigger, &wait);
 	} else {
-		nto_trigger = te_update_dalloc_events(tsd, to_trigger,
-						      accumbytes,
-						      allow_event_trigger,
-						      &wait);
+		nto_trigger = te_update_dalloc_events(
+		    tsd, to_trigger, accumbytes, allow_event_trigger, &wait);
 	}
 
-        assert(wait <= TE_MAX_START_WAIT);
+	assert(wait <= TE_MAX_START_WAIT);
 	te_adjust_thresholds_helper(tsd, ctx, wait);
 	te_assert_invariants(tsd);
 
diff --git a/src/thread_event_registry.c b/src/thread_event_registry.c
index f5408178..05882616 100644
--- a/src/thread_event_registry.c
+++ b/src/thread_event_registry.c
@@ -145,34 +145,25 @@ TE_USER_HANDLER_BINDING_IDX(3);
 /* Table of all the thread events. */
 te_base_cb_t *te_alloc_handlers[te_alloc_count] = {
 #ifdef JEMALLOC_PROF
-	&prof_sample_te_handler,
+    &prof_sample_te_handler,
 #endif
-	&stats_interval_te_handler,
-	&tcache_gc_te_handler,
+    &stats_interval_te_handler, &tcache_gc_te_handler,
 #ifdef JEMALLOC_STATS
-	&prof_threshold_te_handler,
-	&peak_te_handler,
+    &prof_threshold_te_handler, &peak_te_handler,
 #endif
-	&user_alloc_handler0,
-	&user_alloc_handler1,
-	&user_alloc_handler2,
-	&user_alloc_handler3
-};
+    &user_alloc_handler0, &user_alloc_handler1, &user_alloc_handler2,
+    &user_alloc_handler3};
 
-te_base_cb_t *te_dalloc_handlers[te_dalloc_count] = {
-	&tcache_gc_te_handler,
+te_base_cb_t *te_dalloc_handlers[te_dalloc_count] = {&tcache_gc_te_handler,
 #ifdef JEMALLOC_STATS
-	&peak_te_handler,
+    &peak_te_handler,
 #endif
-	&user_dalloc_handler0,
-	&user_dalloc_handler1,
-	&user_dalloc_handler2,
-	&user_dalloc_handler3
-};
+    &user_dalloc_handler0, &user_dalloc_handler1, &user_dalloc_handler2,
+    &user_dalloc_handler3};
 
 static inline bool
 te_update_tsd(tsd_t *tsd, uint64_t new_wait, size_t ue_idx, bool is_alloc) {
-	bool needs_recompute = false;
+	bool     needs_recompute = false;
 	te_ctx_t ctx;
 	uint64_t next, current, cur_wait;
 
diff --git a/src/ticker.c b/src/ticker.c
index 790b5c20..1fd6ac96 100644
--- a/src/ticker.c
+++ b/src/ticker.c
@@ -20,13 +20,8 @@
  * The values here are computed in src/ticker.py
  */
 
-const uint8_t ticker_geom_table[1 << TICKER_GEOM_NBITS] = {
-	254, 211, 187, 169, 156, 144, 135, 127,
-	120, 113, 107, 102, 97, 93, 89, 85,
-	81, 77, 74, 71, 68, 65, 62, 60,
-	57, 55, 53, 50, 48, 46, 44, 42,
-	40, 39, 37, 35, 33, 32, 30, 29,
-	27, 26, 24, 23, 21, 20, 19, 18,
-	16, 15, 14, 13, 12, 10, 9, 8,
-	7, 6, 5, 4, 3, 2, 1, 0
-};
+const uint8_t ticker_geom_table[1 << TICKER_GEOM_NBITS] = {254, 211, 187, 169,
+    156, 144, 135, 127, 120, 113, 107, 102, 97, 93, 89, 85, 81, 77, 74, 71, 68,
+    65, 62, 60, 57, 55, 53, 50, 48, 46, 44, 42, 40, 39, 37, 35, 33, 32, 30, 29,
+    27, 26, 24, 23, 21, 20, 19, 18, 16, 15, 14, 13, 12, 10, 9, 8, 7, 6, 5, 4, 3,
+    2, 1, 0};
diff --git a/src/tsd.c b/src/tsd.c
index 0a2ccc59..20042c2d 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -20,19 +20,20 @@ bool tsd_booted = false;
 #elif (defined(JEMALLOC_TLS))
 JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER;
 pthread_key_t tsd_tsd;
-bool tsd_booted = false;
+bool          tsd_booted = false;
 #elif (defined(_WIN32))
-#if defined(JEMALLOC_LEGACY_WINDOWS_SUPPORT) || !defined(_MSC_VER)
-DWORD tsd_tsd;
+#	if defined(JEMALLOC_LEGACY_WINDOWS_SUPPORT) || !defined(_MSC_VER)
+DWORD         tsd_tsd;
 tsd_wrapper_t tsd_boot_wrapper = {TSD_INITIALIZER, false};
-#else
-JEMALLOC_TSD_TYPE_ATTR(tsd_wrapper_t) tsd_wrapper_tls = { TSD_INITIALIZER, false };
-#endif
+#	else
+JEMALLOC_TSD_TYPE_ATTR(tsd_wrapper_t)
+tsd_wrapper_tls = {TSD_INITIALIZER, false};
+#	endif
 bool tsd_booted = false;
-#if JEMALLOC_WIN32_TLSGETVALUE2
-TGV2 tls_get_value2 = NULL;
+#	if JEMALLOC_WIN32_TLSGETVALUE2
+TGV2    tls_get_value2 = NULL;
 HMODULE tgv2_mod = NULL;
-#endif
+#	endif
 #else
 
 /*
@@ -45,17 +46,12 @@ struct tsd_init_head_s {
 	malloc_mutex_t lock;
 };
 
-pthread_key_t tsd_tsd;
-tsd_init_head_t	tsd_init_head = {
-	ql_head_initializer(blocks),
-	MALLOC_MUTEX_INITIALIZER
-};
+pthread_key_t   tsd_tsd;
+tsd_init_head_t tsd_init_head = {
+    ql_head_initializer(blocks), MALLOC_MUTEX_INITIALIZER};
 
-tsd_wrapper_t tsd_boot_wrapper = {
-	false,
-	TSD_INITIALIZER
-};
-bool tsd_booted = false;
+tsd_wrapper_t tsd_boot_wrapper = {false, TSD_INITIALIZER};
+bool          tsd_booted = false;
 #endif
 
 JEMALLOC_DIAGNOSTIC_POP
@@ -64,7 +60,7 @@ JEMALLOC_DIAGNOSTIC_POP
 
 /* A list of all the tsds in the nominal state. */
 typedef ql_head(tsd_t) tsd_list_t;
-static tsd_list_t tsd_nominal_tsds = ql_head_initializer(tsd_nominal_tsds);
+static tsd_list_t     tsd_nominal_tsds = ql_head_initializer(tsd_nominal_tsds);
 static malloc_mutex_t tsd_nominal_tsds_lock;
 
 /* How many slow-path-enabling features are turned on. */
@@ -73,13 +69,13 @@ static atomic_u32_t tsd_global_slow_count = ATOMIC_INIT(0);
 static bool
 tsd_in_nominal_list(tsd_t *tsd) {
 	tsd_t *tsd_list;
-	bool found = false;
+	bool   found = false;
 	/*
 	 * We don't know that tsd is nominal; it might not be safe to get data
 	 * out of it here.
 	 */
 	malloc_mutex_lock(TSDN_NULL, &tsd_nominal_tsds_lock);
-	ql_foreach(tsd_list, &tsd_nominal_tsds, TSD_MANGLE(tsd_link)) {
+	ql_foreach (tsd_list, &tsd_nominal_tsds, TSD_MANGLE(tsd_link)) {
 		if (tsd == tsd_list) {
 			found = true;
 			break;
@@ -117,7 +113,7 @@ tsd_force_recompute(tsdn_t *tsdn) {
 	atomic_fence(ATOMIC_RELEASE);
 	malloc_mutex_lock(tsdn, &tsd_nominal_tsds_lock);
 	tsd_t *remote_tsd;
-	ql_foreach(remote_tsd, &tsd_nominal_tsds, TSD_MANGLE(tsd_link)) {
+	ql_foreach (remote_tsd, &tsd_nominal_tsds, TSD_MANGLE(tsd_link)) {
 		assert(tsd_atomic_load(&remote_tsd->state, ATOMIC_RELAXED)
 		    <= tsd_state_nominal_max);
 		tsd_atomic_store(&remote_tsd->state,
@@ -143,7 +139,8 @@ tsd_global_slow_inc(tsdn_t *tsdn) {
 	tsd_force_recompute(tsdn);
 }
 
-void tsd_global_slow_dec(tsdn_t *tsdn) {
+void
+tsd_global_slow_dec(tsdn_t *tsdn) {
 	atomic_fetch_sub_u32(&tsd_global_slow_count, 1, ATOMIC_RELAXED);
 	/* See the note in ..._inc(). */
 	tsd_force_recompute(tsdn);
@@ -180,8 +177,8 @@ tsd_slow_update(tsd_t *tsd) {
 	uint8_t old_state;
 	do {
 		uint8_t new_state = tsd_state_compute(tsd);
-		old_state = tsd_atomic_exchange(&tsd->state, new_state,
-		    ATOMIC_ACQUIRE);
+		old_state = tsd_atomic_exchange(
+		    &tsd->state, new_state, ATOMIC_ACQUIRE);
 	} while (old_state == tsd_state_nominal_recompute);
 
 	te_recompute_fast_threshold(tsd);
@@ -211,8 +208,8 @@ tsd_state_set(tsd_t *tsd, uint8_t new_state) {
 		assert(tsd_in_nominal_list(tsd));
 		if (new_state > tsd_state_nominal_max) {
 			tsd_remove_nominal(tsd);
-			tsd_atomic_store(&tsd->state, new_state,
-			    ATOMIC_RELAXED);
+			tsd_atomic_store(
+			    &tsd->state, new_state, ATOMIC_RELAXED);
 		} else {
 			/*
 			 * This is the tricky case.  We're transitioning from
@@ -235,8 +232,7 @@ tsd_prng_state_init(tsd_t *tsd) {
 	 * cost of test repeatability.  For debug builds, instead use a
 	 * deterministic seed.
 	 */
-	*tsd_prng_statep_get(tsd) = config_debug ? 0 :
-	    (uint64_t)(uintptr_t)tsd;
+	*tsd_prng_statep_get(tsd) = config_debug ? 0 : (uint64_t)(uintptr_t)tsd;
 }
 
 static bool
@@ -264,8 +260,8 @@ assert_tsd_data_cleanup_done(tsd_t *tsd) {
 
 static bool
 tsd_data_init_nocleanup(tsd_t *tsd) {
-	assert(tsd_state_get(tsd) == tsd_state_reincarnated ||
-	    tsd_state_get(tsd) == tsd_state_minimal_initialized);
+	assert(tsd_state_get(tsd) == tsd_state_reincarnated
+	    || tsd_state_get(tsd) == tsd_state_minimal_initialized);
 	/*
 	 * During reincarnation, there is no guarantee that the cleanup function
 	 * will be called (deallocation may happen after all tsd destructors).
@@ -358,15 +354,15 @@ malloc_tsd_dalloc(void *wrapper) {
 }
 
 #if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
-static unsigned ncleanups;
+static unsigned             ncleanups;
 static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX];
 
-#ifndef _WIN32
+#	ifndef _WIN32
 JEMALLOC_EXPORT
-#endif
+#	endif
 void
 _malloc_thread_cleanup(void) {
-	bool pending[MALLOC_TSD_CLEANUPS_MAX], again;
+	bool     pending[MALLOC_TSD_CLEANUPS_MAX], again;
 	unsigned i;
 
 	for (i = 0; i < ncleanups; i++) {
@@ -386,9 +382,9 @@ _malloc_thread_cleanup(void) {
 	} while (again);
 }
 
-#ifndef _WIN32
+#	ifndef _WIN32
 JEMALLOC_EXPORT
-#endif
+#	endif
 void
 _malloc_tsd_cleanup_register(bool (*f)(void)) {
 	assert(ncleanups < MALLOC_TSD_CLEANUPS_MAX);
@@ -446,7 +442,7 @@ tsd_cleanup(void *arg) {
 	}
 #ifdef JEMALLOC_JET
 	test_callback_t test_callback = *tsd_test_callbackp_get_unsafe(tsd);
-	int *data = tsd_test_datap_get_unsafe(tsd);
+	int            *data = tsd_test_datap_get_unsafe(tsd);
 	if (test_callback != NULL) {
 		test_callback(data);
 	}
@@ -461,7 +457,7 @@ malloc_tsd_boot0(void) {
 	ncleanups = 0;
 #endif
 	if (malloc_mutex_init(&tsd_nominal_tsds_lock, "tsd_nominal_tsds_lock",
-	    WITNESS_RANK_OMIT, malloc_mutex_rank_exclusive)) {
+	        WITNESS_RANK_OMIT, malloc_mutex_rank_exclusive)) {
 		return NULL;
 	}
 	if (tsd_boot0()) {
@@ -483,11 +479,11 @@ malloc_tsd_boot1(void) {
 static BOOL WINAPI
 _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) {
 	switch (fdwReason) {
-#ifdef JEMALLOC_LAZY_LOCK
+#	ifdef JEMALLOC_LAZY_LOCK
 	case DLL_THREAD_ATTACH:
 		isthreaded = true;
 		break;
-#endif
+#	endif
 	case DLL_THREAD_DETACH:
 		_malloc_thread_cleanup();
 		break;
@@ -502,35 +498,36 @@ _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) {
  * hooked "read". We won't read for the rest of the file, so we can get away
  * with unhooking.
  */
-#ifdef read
-#  undef read
+#	ifdef read
+#		undef read
+#	endif
+
+#	ifdef _MSC_VER
+#		ifdef _M_IX86
+#			pragma comment(linker, "/INCLUDE:__tls_used")
+#			pragma comment(linker, "/INCLUDE:_tls_callback")
+#		else
+#			pragma comment(linker, "/INCLUDE:_tls_used")
+#			pragma comment(                                       \
+			    linker, "/INCLUDE:" STRINGIFY(tls_callback))
+#		endif
+#		pragma section(".CRT$XLY", long, read)
+#	endif
+JEMALLOC_SECTION(".CRT$XLY")
+JEMALLOC_ATTR(used) BOOL(WINAPI *const tls_callback)(
+    HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) = _tls_callback;
 #endif
 
-#ifdef _MSC_VER
-#  ifdef _M_IX86
-#    pragma comment(linker, "/INCLUDE:__tls_used")
-#    pragma comment(linker, "/INCLUDE:_tls_callback")
-#  else
-#    pragma comment(linker, "/INCLUDE:_tls_used")
-#    pragma comment(linker, "/INCLUDE:" STRINGIFY(tls_callback) )
-#  endif
-#  pragma section(".CRT$XLY",long,read)
-#endif
-JEMALLOC_SECTION(".CRT$XLY") JEMALLOC_ATTR(used)
-BOOL	(WINAPI *const tls_callback)(HINSTANCE hinstDLL,
-    DWORD fdwReason, LPVOID lpvReserved) = _tls_callback;
-#endif
-
-#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \
-    !defined(_WIN32))
+#if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS)        \
+    && !defined(_WIN32))
 void *
 tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block) {
-	pthread_t self = pthread_self();
+	pthread_t         self = pthread_self();
 	tsd_init_block_t *iter;
 
 	/* Check whether this thread has already inserted into the list. */
 	malloc_mutex_lock(TSDN_NULL, &head->lock);
-	ql_foreach(iter, &head->blocks, link) {
+	ql_foreach (iter, &head->blocks, link) {
 		if (iter->thread == self) {
 			malloc_mutex_unlock(TSDN_NULL, &head->lock);
 			return iter->data;
diff --git a/src/util.c b/src/util.c
index b73848fb..1bcf4fee 100644
--- a/src/util.c
+++ b/src/util.c
@@ -8,8 +8,8 @@ bool
 multi_setting_parse_next(const char **setting_segment_cur, size_t *len_left,
     size_t *key_start, size_t *key_end, size_t *value) {
 	const char *cur = *setting_segment_cur;
-	char *end;
-	uintmax_t um;
+	char       *end;
+	uintmax_t   um;
 
 	set_errno(0);
 
@@ -46,4 +46,3 @@ multi_setting_parse_next(const char **setting_segment_cur, size_t *len_left,
 
 	return false;
 }
-
diff --git a/src/witness.c b/src/witness.c
index 4474af04..940b1eae 100644
--- a/src/witness.c
+++ b/src/witness.c
@@ -26,8 +26,8 @@ witness_print_witness(witness_t *w, unsigned n) {
 static void
 witness_print_witnesses(const witness_list_t *witnesses) {
 	witness_t *w, *last = NULL;
-	unsigned n = 0;
-	ql_foreach(w, witnesses, link) {
+	unsigned   n = 0;
+	ql_foreach (w, witnesses, link) {
 		if (last != NULL && w->rank > last->rank) {
 			assert(w->name != last->name);
 			witness_print_witness(last, n);
@@ -45,8 +45,8 @@ witness_print_witnesses(const witness_list_t *witnesses) {
 }
 
 static void
-witness_lock_error_impl(const witness_list_t *witnesses,
-    const witness_t *witness) {
+witness_lock_error_impl(
+    const witness_list_t *witnesses, const witness_t *witness) {
 	malloc_printf("<jemalloc>: Lock rank order reversal:");
 	witness_print_witnesses(witnesses);
 	malloc_printf(" %s(%u)\n", witness->name, witness->rank);
@@ -56,8 +56,8 @@ witness_lock_error_t *JET_MUTABLE witness_lock_error = witness_lock_error_impl;
 
 static void
 witness_owner_error_impl(const witness_t *witness) {
-	malloc_printf("<jemalloc>: Should own %s(%u)\n", witness->name,
-	    witness->rank);
+	malloc_printf(
+	    "<jemalloc>: Should own %s(%u)\n", witness->name, witness->rank);
 	abort();
 }
 witness_owner_error_t *JET_MUTABLE witness_owner_error =
@@ -76,7 +76,7 @@ static void
 witness_depth_error_impl(const witness_list_t *witnesses,
     witness_rank_t rank_inclusive, unsigned depth) {
 	malloc_printf("<jemalloc>: Should own %u lock%s of rank >= %u:", depth,
-	    (depth != 1) ?  "s" : "", rank_inclusive);
+	    (depth != 1) ? "s" : "", rank_inclusive);
 	witness_print_witnesses(witnesses);
 	malloc_printf("\n");
 	abort();
diff --git a/src/zone.c b/src/zone.c
index 23dfdd04..e09de4b8 100644
--- a/src/zone.c
+++ b/src/zone.c
@@ -4,7 +4,7 @@
 #include "jemalloc/internal/assert.h"
 
 #ifndef JEMALLOC_ZONE
-#  error "This source file is for zones on Darwin (OS X)."
+#	error "This source file is for zones on Darwin (OS X)."
 #endif
 
 /* Definitions of the following structs in malloc/malloc.h might be too old
@@ -22,10 +22,11 @@ typedef struct _malloc_zone_t {
 	void *(*realloc)(struct _malloc_zone_t *, void *, size_t);
 	void (*destroy)(struct _malloc_zone_t *);
 	const char *zone_name;
-	unsigned (*batch_malloc)(struct _malloc_zone_t *, size_t, void **, unsigned);
+	unsigned (*batch_malloc)(
+	    struct _malloc_zone_t *, size_t, void **, unsigned);
 	void (*batch_free)(struct _malloc_zone_t *, void **, unsigned);
 	struct malloc_introspection_t *introspect;
-	unsigned version;
+	unsigned                       version;
 	void *(*memalign)(struct _malloc_zone_t *, size_t, size_t);
 	void (*free_definite_size)(struct _malloc_zone_t *, void *, size_t);
 	size_t (*pressure_relief)(struct _malloc_zone_t *, size_t);
@@ -33,22 +34,24 @@ typedef struct _malloc_zone_t {
 
 typedef struct {
 	vm_address_t address;
-	vm_size_t size;
+	vm_size_t    size;
 } vm_range_t;
 
 typedef struct malloc_statistics_t {
 	unsigned blocks_in_use;
-	size_t size_in_use;
-	size_t max_size_in_use;
-	size_t size_allocated;
+	size_t   size_in_use;
+	size_t   max_size_in_use;
+	size_t   size_allocated;
 } malloc_statistics_t;
 
 typedef kern_return_t memory_reader_t(task_t, vm_address_t, vm_size_t, void **);
 
-typedef void vm_range_recorder_t(task_t, void *, unsigned type, vm_range_t *, unsigned);
+typedef void vm_range_recorder_t(
+    task_t, void *, unsigned type, vm_range_t *, unsigned);
 
 typedef struct malloc_introspection_t {
-	kern_return_t (*enumerator)(task_t, void *, unsigned, vm_address_t, memory_reader_t, vm_range_recorder_t);
+	kern_return_t (*enumerator)(task_t, void *, unsigned, vm_address_t,
+	    memory_reader_t, vm_range_recorder_t);
 	size_t (*good_size)(malloc_zone_t *, size_t);
 	boolean_t (*check)(malloc_zone_t *);
 	void (*print)(malloc_zone_t *, boolean_t);
@@ -61,14 +64,16 @@ typedef struct malloc_introspection_t {
 	boolean_t (*disable_discharge_checking)(malloc_zone_t *);
 	void (*discharge)(malloc_zone_t *, void *);
 #ifdef __BLOCKS__
-	void (*enumerate_discharged_pointers)(malloc_zone_t *, void (^)(void *, void *));
+	void (*enumerate_discharged_pointers)(
+	    malloc_zone_t *, void (^)(void *, void *));
 #else
 	void *enumerate_unavailable_without_blocks;
 #endif
 	void (*reinit_lock)(malloc_zone_t *);
 } malloc_introspection_t;
 
-extern kern_return_t malloc_get_all_zones(task_t, memory_reader_t, vm_address_t **, unsigned *);
+extern kern_return_t malloc_get_all_zones(
+    task_t, memory_reader_t, vm_address_t **, unsigned *);
 
 extern malloc_zone_t *malloc_default_zone(void);
 
@@ -81,48 +86,46 @@ extern void malloc_zone_unregister(malloc_zone_t *zone);
  * We need to check whether it is present at runtime, thus the weak_import.
  */
 extern malloc_zone_t *malloc_default_purgeable_zone(void)
-JEMALLOC_ATTR(weak_import);
+    JEMALLOC_ATTR(weak_import);
 
 /******************************************************************************/
 /* Data. */
 
-static malloc_zone_t *default_zone, *purgeable_zone;
-static malloc_zone_t jemalloc_zone;
+static malloc_zone_t                *default_zone, *purgeable_zone;
+static malloc_zone_t                 jemalloc_zone;
 static struct malloc_introspection_t jemalloc_zone_introspect;
-static pid_t zone_force_lock_pid = -1;
+static pid_t                         zone_force_lock_pid = -1;
 
 /******************************************************************************/
 /* Function prototypes for non-inline static functions. */
 
-static size_t	zone_size(malloc_zone_t *zone, const void *ptr);
-static void	*zone_malloc(malloc_zone_t *zone, size_t size);
-static void	*zone_calloc(malloc_zone_t *zone, size_t num, size_t size);
-static void	*zone_valloc(malloc_zone_t *zone, size_t size);
-static void	zone_free(malloc_zone_t *zone, void *ptr);
-static void	*zone_realloc(malloc_zone_t *zone, void *ptr, size_t size);
-static void	*zone_memalign(malloc_zone_t *zone, size_t alignment,
-    size_t size);
-static void	zone_free_definite_size(malloc_zone_t *zone, void *ptr,
-    size_t size);
-static void	zone_destroy(malloc_zone_t *zone);
-static unsigned	zone_batch_malloc(struct _malloc_zone_t *zone, size_t size,
+static size_t zone_size(malloc_zone_t *zone, const void *ptr);
+static void  *zone_malloc(malloc_zone_t *zone, size_t size);
+static void  *zone_calloc(malloc_zone_t *zone, size_t num, size_t size);
+static void  *zone_valloc(malloc_zone_t *zone, size_t size);
+static void   zone_free(malloc_zone_t *zone, void *ptr);
+static void  *zone_realloc(malloc_zone_t *zone, void *ptr, size_t size);
+static void  *zone_memalign(malloc_zone_t *zone, size_t alignment, size_t size);
+static void   zone_free_definite_size(
+      malloc_zone_t *zone, void *ptr, size_t size);
+static void     zone_destroy(malloc_zone_t *zone);
+static unsigned zone_batch_malloc(struct _malloc_zone_t *zone, size_t size,
     void **results, unsigned num_requested);
-static void	zone_batch_free(struct _malloc_zone_t *zone,
-    void **to_be_freed, unsigned num_to_be_freed);
-static size_t	zone_pressure_relief(struct _malloc_zone_t *zone, size_t goal);
-static size_t	zone_good_size(malloc_zone_t *zone, size_t size);
-static kern_return_t	zone_enumerator(task_t task, void *data, unsigned type_mask,
-    vm_address_t zone_address, memory_reader_t reader,
+static void     zone_batch_free(
+        struct _malloc_zone_t *zone, void **to_be_freed, unsigned num_to_be_freed);
+static size_t zone_pressure_relief(struct _malloc_zone_t *zone, size_t goal);
+static size_t zone_good_size(malloc_zone_t *zone, size_t size);
+static kern_return_t zone_enumerator(task_t task, void *data,
+    unsigned type_mask, vm_address_t zone_address, memory_reader_t reader,
     vm_range_recorder_t recorder);
-static boolean_t	zone_check(malloc_zone_t *zone);
-static void	zone_print(malloc_zone_t *zone, boolean_t verbose);
-static void	zone_log(malloc_zone_t *zone, void *address);
-static void	zone_force_lock(malloc_zone_t *zone);
-static void	zone_force_unlock(malloc_zone_t *zone);
-static void	zone_statistics(malloc_zone_t *zone,
-    malloc_statistics_t *stats);
-static boolean_t	zone_locked(malloc_zone_t *zone);
-static void	zone_reinit_lock(malloc_zone_t *zone);
+static boolean_t     zone_check(malloc_zone_t *zone);
+static void          zone_print(malloc_zone_t *zone, boolean_t verbose);
+static void          zone_log(malloc_zone_t *zone, void *address);
+static void          zone_force_lock(malloc_zone_t *zone);
+static void          zone_force_unlock(malloc_zone_t *zone);
+static void zone_statistics(malloc_zone_t *zone, malloc_statistics_t *stats);
+static boolean_t zone_locked(malloc_zone_t *zone);
+static void      zone_reinit_lock(malloc_zone_t *zone);
 
 /******************************************************************************/
 /*
@@ -225,8 +228,8 @@ zone_batch_malloc(struct _malloc_zone_t *zone, size_t size, void **results,
 }
 
 static void
-zone_batch_free(struct _malloc_zone_t *zone, void **to_be_freed,
-    unsigned num_to_be_freed) {
+zone_batch_free(
+    struct _malloc_zone_t *zone, void **to_be_freed, unsigned num_to_be_freed) {
 	unsigned i;
 
 	for (i = 0; i < num_to_be_freed; i++) {
@@ -261,12 +264,10 @@ zone_check(malloc_zone_t *zone) {
 }
 
 static void
-zone_print(malloc_zone_t *zone, boolean_t verbose) {
-}
+zone_print(malloc_zone_t *zone, boolean_t verbose) {}
 
 static void
-zone_log(malloc_zone_t *zone, void *address) {
-}
+zone_log(malloc_zone_t *zone, void *address) {}
 
 static void
 zone_force_lock(malloc_zone_t *zone) {
@@ -369,7 +370,7 @@ zone_init(void) {
 static malloc_zone_t *
 zone_default_get(void) {
 	malloc_zone_t **zones = NULL;
-	unsigned int num_zones = 0;
+	unsigned int    num_zones = 0;
 
 	/*
 	 * On OSX 10.12, malloc_default_zone returns a special zone that is not
@@ -380,8 +381,9 @@ zone_default_get(void) {
 	 * zone is the default.  So get the list of zones to get the first one,
 	 * instead of relying on malloc_default_zone.
 	 */
-	if (KERN_SUCCESS != malloc_get_all_zones(0, NULL,
-	    (vm_address_t**)&zones, &num_zones)) {
+	if (KERN_SUCCESS
+	    != malloc_get_all_zones(
+	        0, NULL, (vm_address_t **)&zones, &num_zones)) {
 		/*
 		 * Reset the value in case the failure happened after it was
 		 * set.
@@ -441,8 +443,8 @@ zone_register(void) {
 	 * register jemalloc's.
 	 */
 	default_zone = zone_default_get();
-	if (!default_zone->zone_name || strcmp(default_zone->zone_name,
-	    "DefaultMallocZone") != 0) {
+	if (!default_zone->zone_name
+	    || strcmp(default_zone->zone_name, "DefaultMallocZone") != 0) {
 		return;
 	}
 
@@ -457,8 +459,9 @@ zone_register(void) {
 	 * to check for the existence of malloc_default_purgeable_zone() at
 	 * run time.
 	 */
-	purgeable_zone = (malloc_default_purgeable_zone == NULL) ? NULL :
-	    malloc_default_purgeable_zone();
+	purgeable_zone = (malloc_default_purgeable_zone == NULL)
+	    ? NULL
+	    : malloc_default_purgeable_zone();
 
 	/* Register the custom zone.  At this point it won't be the default. */
 	zone_init();
diff --git a/test/analyze/prof_bias.c b/test/analyze/prof_bias.c
index a96ca942..e4bf7942 100644
--- a/test/analyze/prof_bias.c
+++ b/test/analyze/prof_bias.c
@@ -46,15 +46,15 @@ do_allocs(size_t sz, size_t cnt, bool do_frees) {
 int
 main(void) {
 	size_t lg_prof_sample_local = 19;
-	int err = mallctl("prof.reset", NULL, NULL,
-	    (void *)&lg_prof_sample_local, sizeof(lg_prof_sample_local));
+	int    err = mallctl("prof.reset", NULL, NULL,
+	       (void *)&lg_prof_sample_local, sizeof(lg_prof_sample_local));
 	assert(err == 0);
 
 	prof_backtrace_hook_set(mock_backtrace);
 	do_allocs(16, 32 * 1024 * 1024, /* do_frees */ true);
-	do_allocs(32 * 1024* 1024, 16, /* do_frees */ true);
+	do_allocs(32 * 1024 * 1024, 16, /* do_frees */ true);
 	do_allocs(16, 32 * 1024 * 1024, /* do_frees */ false);
-	do_allocs(32 * 1024* 1024, 16, /* do_frees */ false);
+	do_allocs(32 * 1024 * 1024, 16, /* do_frees */ false);
 
 	return 0;
 }
diff --git a/test/analyze/rand.c b/test/analyze/rand.c
index bb20b06e..4c7e18c7 100644
--- a/test/analyze/rand.c
+++ b/test/analyze/rand.c
@@ -72,13 +72,13 @@ print_buckets(const size_t buckets[], const size_t means[],
 		if (buckets[i] + stddevs[i] <= means[i]) {
 			malloc_write(" ");
 			for (size_t t = means[i] - buckets[i]; t >= stddevs[i];
-			    t -= stddevs[i]) {
+			     t -= stddevs[i]) {
 				malloc_write("-");
 			}
 		} else if (buckets[i] >= means[i] + stddevs[i]) {
 			malloc_write(" ");
 			for (size_t t = buckets[i] - means[i]; t >= stddevs[i];
-			    t -= stddevs[i]) {
+			     t -= stddevs[i]) {
 				malloc_write("+");
 			}
 		}
@@ -93,8 +93,8 @@ bucket_analysis(uint64_t (*gen)(void *), void *opaque, size_t buckets[],
 	for (size_t i = 1; i <= 3; ++i) {
 		malloc_printf("round %zu\n", i);
 		fill(buckets, n_bucket, 0);
-		collect_buckets(gen, opaque, buckets, n_bucket,
-		    lg_bucket_width, n_iter);
+		collect_buckets(
+		    gen, opaque, buckets, n_bucket, lg_bucket_width, n_iter);
 		print_buckets(buckets, means, stddevs, n_bucket);
 	}
 }
@@ -108,7 +108,7 @@ bucket_analysis(uint64_t (*gen)(void *), void *opaque, size_t buckets[],
 
 typedef struct uniform_gen_arg_s uniform_gen_arg_t;
 struct uniform_gen_arg_s {
-	uint64_t state;
+	uint64_t       state;
 	const unsigned lg_range;
 };
 
@@ -131,8 +131,10 @@ TEST_BEGIN(test_uniform) {
 	 * integers, and that the minimal bucket mean is at least
 	 * MIN_BUCKET_MEAN.
 	 */
-	const size_t q = 1 << QUOTIENT_CEIL(LG_CEIL(QUOTIENT_CEIL(
-	    MIN_BUCKET_MEAN, N_BUCKET * (N_BUCKET - 1))), 2);
+	const size_t q = 1 << QUOTIENT_CEIL(
+	                     LG_CEIL(QUOTIENT_CEIL(
+	                         MIN_BUCKET_MEAN, N_BUCKET * (N_BUCKET - 1))),
+	                     2);
 	const size_t stddev = (N_BUCKET - 1) * q;
 	const size_t mean = N_BUCKET * stddev * q;
 	const size_t n_iter = N_BUCKET * mean;
@@ -142,14 +144,14 @@ TEST_BEGIN(test_uniform) {
 	size_t stddevs[N_BUCKET];
 	fill(stddevs, N_BUCKET, stddev);
 
-	uniform_gen_arg_t arg = {(uint64_t)(uintptr_t)&lg_range_test,
-	    lg_range_test};
+	uniform_gen_arg_t arg = {
+	    (uint64_t)(uintptr_t)&lg_range_test, lg_range_test};
 	size_t buckets[N_BUCKET];
 	assert_zu_ge(lg_range_test, LG_N_BUCKET, "");
 	const size_t lg_bucket_width = lg_range_test - LG_N_BUCKET;
 
-	bucket_analysis(uniform_gen, &arg, buckets, means, stddevs,
-	    N_BUCKET, lg_bucket_width, n_iter);
+	bucket_analysis(uniform_gen, &arg, buckets, means, stddevs, N_BUCKET,
+	    lg_bucket_width, n_iter);
 
 #undef LG_N_BUCKET
 #undef N_BUCKET
@@ -168,8 +170,8 @@ TEST_END
  * comments in test_prof_sample for explanations for n_divide.
  */
 static double
-fill_geometric_proportions(double proportions[], const size_t n_bucket,
-    const size_t n_divide) {
+fill_geometric_proportions(
+    double proportions[], const size_t n_bucket, const size_t n_divide) {
 	assert(n_bucket > 0);
 	assert(n_divide > 0);
 	double x = 1.;
@@ -220,12 +222,12 @@ TEST_BEGIN(test_prof_sample) {
 #ifdef JEMALLOC_PROF
 
 /* Number of divisions within [0, mean). */
-#define LG_N_DIVIDE 3
-#define N_DIVIDE (1 << LG_N_DIVIDE)
+#	define LG_N_DIVIDE 3
+#	define N_DIVIDE (1 << LG_N_DIVIDE)
 
 /* Coverage of buckets in terms of multiples of mean. */
-#define LG_N_MULTIPLY 2
-#define N_GEO_BUCKET (N_DIVIDE << LG_N_MULTIPLY)
+#	define LG_N_MULTIPLY 2
+#	define N_GEO_BUCKET (N_DIVIDE << LG_N_MULTIPLY)
 
 	test_skip_if(!opt_prof);
 
@@ -233,14 +235,15 @@ TEST_BEGIN(test_prof_sample) {
 
 	size_t lg_prof_sample_orig = lg_prof_sample;
 	assert_d_eq(mallctl("prof.reset", NULL, NULL, &lg_prof_sample_test,
-	    sizeof(size_t)), 0, "");
+	                sizeof(size_t)),
+	    0, "");
 	malloc_printf("lg_prof_sample = %zu\n", lg_prof_sample_test);
 
-	double proportions[N_GEO_BUCKET + 1];
-	const double min_proportion = fill_geometric_proportions(proportions,
-	    N_GEO_BUCKET + 1, N_DIVIDE);
-	const size_t n_iter = round_to_nearest(MIN_BUCKET_MEAN /
-	    min_proportion);
+	double       proportions[N_GEO_BUCKET + 1];
+	const double min_proportion = fill_geometric_proportions(
+	    proportions, N_GEO_BUCKET + 1, N_DIVIDE);
+	const size_t n_iter = round_to_nearest(
+	    MIN_BUCKET_MEAN / min_proportion);
 	size_t means[N_GEO_BUCKET + 1];
 	size_t stddevs[N_GEO_BUCKET + 1];
 	fill_references(means, stddevs, proportions, N_GEO_BUCKET + 1, n_iter);
@@ -255,12 +258,13 @@ TEST_BEGIN(test_prof_sample) {
 	    N_GEO_BUCKET + 1, lg_bucket_width, n_iter);
 
 	assert_d_eq(mallctl("prof.reset", NULL, NULL, &lg_prof_sample_orig,
-	    sizeof(size_t)), 0, "");
+	                sizeof(size_t)),
+	    0, "");
 
-#undef LG_N_DIVIDE
-#undef N_DIVIDE
-#undef LG_N_MULTIPLY
-#undef N_GEO_BUCKET
+#	undef LG_N_DIVIDE
+#	undef N_DIVIDE
+#	undef LG_N_MULTIPLY
+#	undef N_GEO_BUCKET
 
 #endif /* JEMALLOC_PROF */
 }
@@ -270,7 +274,5 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_uniform,
-	    test_prof_sample);
+	return test_no_reentrancy(test_uniform, test_prof_sample);
 }
diff --git a/test/analyze/sizes.c b/test/analyze/sizes.c
index cfb5ce51..cc6c3806 100644
--- a/test/analyze/sizes.c
+++ b/test/analyze/sizes.c
@@ -11,9 +11,9 @@
 
 static void
 do_print(const char *name, size_t sz_bytes) {
-	const char *sizes[] = {"bytes", "KB", "MB", "GB", "TB", "PB", "EB",
-		"ZB"};
-	size_t sizes_max = sizeof(sizes)/sizeof(sizes[0]);
+	const char *sizes[] = {
+	    "bytes", "KB", "MB", "GB", "TB", "PB", "EB", "ZB"};
+	size_t sizes_max = sizeof(sizes) / sizeof(sizes[0]);
 
 	size_t ind = 0;
 	double sz = sz_bytes;
@@ -30,8 +30,7 @@ do_print(const char *name, size_t sz_bytes) {
 
 int
 main(void) {
-#define P(type)								\
-	do_print(#type, sizeof(type))
+#define P(type) do_print(#type, sizeof(type))
 	P(arena_t);
 	P(arena_stats_t);
 	P(base_t);
diff --git a/test/include/test/SFMT-alti.h b/test/include/test/SFMT-alti.h
index a1885dbf..d6a85ad1 100644
--- a/test/include/test/SFMT-alti.h
+++ b/test/include/test/SFMT-alti.h
@@ -61,58 +61,59 @@
  * @return output
  */
 JEMALLOC_ALWAYS_INLINE
-vector unsigned int vec_recursion(vector unsigned int a,
-						vector unsigned int b,
-						vector unsigned int c,
-						vector unsigned int d) {
-
-    const vector unsigned int sl1 = ALTI_SL1;
-    const vector unsigned int sr1 = ALTI_SR1;
+vector unsigned int
+vec_recursion(vector unsigned int a, vector unsigned int b,
+    vector unsigned int c, vector unsigned int d) {
+	const vector unsigned int sl1 = ALTI_SL1;
+	const vector unsigned int sr1 = ALTI_SR1;
 #ifdef ONLY64
-    const vector unsigned int mask = ALTI_MSK64;
-    const vector unsigned char perm_sl = ALTI_SL2_PERM64;
-    const vector unsigned char perm_sr = ALTI_SR2_PERM64;
+	const vector unsigned int  mask = ALTI_MSK64;
+	const vector unsigned char perm_sl = ALTI_SL2_PERM64;
+	const vector unsigned char perm_sr = ALTI_SR2_PERM64;
 #else
-    const vector unsigned int mask = ALTI_MSK;
-    const vector unsigned char perm_sl = ALTI_SL2_PERM;
-    const vector unsigned char perm_sr = ALTI_SR2_PERM;
+	const vector unsigned int  mask = ALTI_MSK;
+	const vector unsigned char perm_sl = ALTI_SL2_PERM;
+	const vector unsigned char perm_sr = ALTI_SR2_PERM;
 #endif
-    vector unsigned int v, w, x, y, z;
-    x = vec_perm(a, (vector unsigned int)perm_sl, perm_sl);
-    v = a;
-    y = vec_sr(b, sr1);
-    z = vec_perm(c, (vector unsigned int)perm_sr, perm_sr);
-    w = vec_sl(d, sl1);
-    z = vec_xor(z, w);
-    y = vec_and(y, mask);
-    v = vec_xor(v, x);
-    z = vec_xor(z, y);
-    z = vec_xor(z, v);
-    return z;
+	vector unsigned int v, w, x, y, z;
+	x = vec_perm(a, (vector unsigned int)perm_sl, perm_sl);
+	v = a;
+	y = vec_sr(b, sr1);
+	z = vec_perm(c, (vector unsigned int)perm_sr, perm_sr);
+	w = vec_sl(d, sl1);
+	z = vec_xor(z, w);
+	y = vec_and(y, mask);
+	v = vec_xor(v, x);
+	z = vec_xor(z, y);
+	z = vec_xor(z, v);
+	return z;
 }
 
 /**
  * This function fills the internal state array with pseudorandom
  * integers.
  */
-static inline void gen_rand_all(sfmt_t *ctx) {
-    int i;
-    vector unsigned int r, r1, r2;
+static inline void
+gen_rand_all(sfmt_t *ctx) {
+	int                 i;
+	vector unsigned int r, r1, r2;
 
-    r1 = ctx->sfmt[N - 2].s;
-    r2 = ctx->sfmt[N - 1].s;
-    for (i = 0; i < N - POS1; i++) {
-	r = vec_recursion(ctx->sfmt[i].s, ctx->sfmt[i + POS1].s, r1, r2);
-	ctx->sfmt[i].s = r;
-	r1 = r2;
-	r2 = r;
-    }
-    for (; i < N; i++) {
-	r = vec_recursion(ctx->sfmt[i].s, ctx->sfmt[i + POS1 - N].s, r1, r2);
-	ctx->sfmt[i].s = r;
-	r1 = r2;
-	r2 = r;
-    }
+	r1 = ctx->sfmt[N - 2].s;
+	r2 = ctx->sfmt[N - 1].s;
+	for (i = 0; i < N - POS1; i++) {
+		r = vec_recursion(
+		    ctx->sfmt[i].s, ctx->sfmt[i + POS1].s, r1, r2);
+		ctx->sfmt[i].s = r;
+		r1 = r2;
+		r2 = r;
+	}
+	for (; i < N; i++) {
+		r = vec_recursion(
+		    ctx->sfmt[i].s, ctx->sfmt[i + POS1 - N].s, r1, r2);
+		ctx->sfmt[i].s = r;
+		r1 = r2;
+		r2 = r;
+	}
 }
 
 /**
@@ -122,50 +123,57 @@ static inline void gen_rand_all(sfmt_t *ctx) {
  * @param array an 128-bit array to be filled by pseudorandom numbers.
  * @param size number of 128-bit pesudorandom numbers to be generated.
  */
-static inline void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
-    int i, j;
-    vector unsigned int r, r1, r2;
+static inline void
+gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
+	int                 i, j;
+	vector unsigned int r, r1, r2;
 
-    r1 = ctx->sfmt[N - 2].s;
-    r2 = ctx->sfmt[N - 1].s;
-    for (i = 0; i < N - POS1; i++) {
-	r = vec_recursion(ctx->sfmt[i].s, ctx->sfmt[i + POS1].s, r1, r2);
-	array[i].s = r;
-	r1 = r2;
-	r2 = r;
-    }
-    for (; i < N; i++) {
-	r = vec_recursion(ctx->sfmt[i].s, array[i + POS1 - N].s, r1, r2);
-	array[i].s = r;
-	r1 = r2;
-	r2 = r;
-    }
-    /* main loop */
-    for (; i < size - N; i++) {
-	r = vec_recursion(array[i - N].s, array[i + POS1 - N].s, r1, r2);
-	array[i].s = r;
-	r1 = r2;
-	r2 = r;
-    }
-    for (j = 0; j < 2 * N - size; j++) {
-	ctx->sfmt[j].s = array[j + size - N].s;
-    }
-    for (; i < size; i++) {
-	r = vec_recursion(array[i - N].s, array[i + POS1 - N].s, r1, r2);
-	array[i].s = r;
-	ctx->sfmt[j++].s = r;
-	r1 = r2;
-	r2 = r;
-    }
+	r1 = ctx->sfmt[N - 2].s;
+	r2 = ctx->sfmt[N - 1].s;
+	for (i = 0; i < N - POS1; i++) {
+		r = vec_recursion(
+		    ctx->sfmt[i].s, ctx->sfmt[i + POS1].s, r1, r2);
+		array[i].s = r;
+		r1 = r2;
+		r2 = r;
+	}
+	for (; i < N; i++) {
+		r = vec_recursion(
+		    ctx->sfmt[i].s, array[i + POS1 - N].s, r1, r2);
+		array[i].s = r;
+		r1 = r2;
+		r2 = r;
+	}
+	/* main loop */
+	for (; i < size - N; i++) {
+		r = vec_recursion(
+		    array[i - N].s, array[i + POS1 - N].s, r1, r2);
+		array[i].s = r;
+		r1 = r2;
+		r2 = r;
+	}
+	for (j = 0; j < 2 * N - size; j++) {
+		ctx->sfmt[j].s = array[j + size - N].s;
+	}
+	for (; i < size; i++) {
+		r = vec_recursion(
+		    array[i - N].s, array[i + POS1 - N].s, r1, r2);
+		array[i].s = r;
+		ctx->sfmt[j++].s = r;
+		r1 = r2;
+		r2 = r;
+	}
 }
 
 #ifndef ONLY64
-#if defined(__APPLE__)
-#define ALTI_SWAP (vector unsigned char) \
-	(4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11)
-#else
-#define ALTI_SWAP {4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11}
-#endif
+#	if defined(__APPLE__)
+#		define ALTI_SWAP                                              \
+			(vector unsigned char)(4, 5, 6, 7, 0, 1, 2, 3, 12, 13, \
+			    14, 15, 8, 9, 10, 11)
+#	else
+#		define ALTI_SWAP                                              \
+			{ 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11 }
+#	endif
 /**
  * This function swaps high and low 32-bit of 64-bit integers in user
  * specified array.
@@ -173,13 +181,15 @@ static inline void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
  * @param array an 128-bit array to be swaped.
  * @param size size of 128-bit array.
  */
-static inline void swap(w128_t *array, int size) {
-    int i;
-    const vector unsigned char perm = ALTI_SWAP;
+static inline void
+swap(w128_t *array, int size) {
+	int                        i;
+	const vector unsigned char perm = ALTI_SWAP;
 
-    for (i = 0; i < size; i++) {
-	array[i].s = vec_perm(array[i].s, (vector unsigned int)perm, perm);
-    }
+	for (i = 0; i < size; i++) {
+		array[i].s = vec_perm(
+		    array[i].s, (vector unsigned int)perm, perm);
+	}
 }
 #endif
 
diff --git a/test/include/test/SFMT-params.h b/test/include/test/SFMT-params.h
index 6730adf8..4ff4316f 100644
--- a/test/include/test/SFMT-params.h
+++ b/test/include/test/SFMT-params.h
@@ -37,10 +37,10 @@
 #define SFMT_PARAMS_H
 
 #if !defined(MEXP)
-#ifdef __GNUC__
-  #warning "MEXP is not defined. I assume MEXP is 19937."
-#endif
-  #define MEXP 19937
+#	ifdef __GNUC__
+#		warning "MEXP is not defined. I assume MEXP is 19937."
+#	endif
+#	define MEXP 19937
 #endif
 /*-----------------
   BASIC DEFINITIONS
@@ -100,32 +100,32 @@
 */
 
 #if MEXP == 607
-  #include "test/SFMT-params607.h"
+#	include "test/SFMT-params607.h"
 #elif MEXP == 1279
-  #include "test/SFMT-params1279.h"
+#	include "test/SFMT-params1279.h"
 #elif MEXP == 2281
-  #include "test/SFMT-params2281.h"
+#	include "test/SFMT-params2281.h"
 #elif MEXP == 4253
-  #include "test/SFMT-params4253.h"
+#	include "test/SFMT-params4253.h"
 #elif MEXP == 11213
-  #include "test/SFMT-params11213.h"
+#	include "test/SFMT-params11213.h"
 #elif MEXP == 19937
-  #include "test/SFMT-params19937.h"
+#	include "test/SFMT-params19937.h"
 #elif MEXP == 44497
-  #include "test/SFMT-params44497.h"
+#	include "test/SFMT-params44497.h"
 #elif MEXP == 86243
-  #include "test/SFMT-params86243.h"
+#	include "test/SFMT-params86243.h"
 #elif MEXP == 132049
-  #include "test/SFMT-params132049.h"
+#	include "test/SFMT-params132049.h"
 #elif MEXP == 216091
-  #include "test/SFMT-params216091.h"
+#	include "test/SFMT-params216091.h"
 #else
-#ifdef __GNUC__
-  #error "MEXP is not valid."
-  #undef MEXP
-#else
-  #undef MEXP
-#endif
+#	ifdef __GNUC__
+#		error "MEXP is not valid."
+#		undef MEXP
+#	else
+#		undef MEXP
+#	endif
 
 #endif
 
diff --git a/test/include/test/SFMT-params11213.h b/test/include/test/SFMT-params11213.h
index 2994bd21..d2ab5b7c 100644
--- a/test/include/test/SFMT-params11213.h
+++ b/test/include/test/SFMT-params11213.h
@@ -36,46 +36,56 @@
 #ifndef SFMT_PARAMS11213_H
 #define SFMT_PARAMS11213_H
 
-#define POS1	68
-#define SL1	14
-#define SL2	3
-#define SR1	7
-#define SR2	3
-#define MSK1	0xeffff7fbU
-#define MSK2	0xffffffefU
-#define MSK3	0xdfdfbfffU
-#define MSK4	0x7fffdbfdU
-#define PARITY1	0x00000001U
-#define PARITY2	0x00000000U
-#define PARITY3	0xe8148000U
-#define PARITY4	0xd0c7afa3U
-
+#define POS1 68
+#define SL1 14
+#define SL2 3
+#define SR1 7
+#define SR2 3
+#define MSK1 0xeffff7fbU
+#define MSK2 0xffffffefU
+#define MSK3 0xdfdfbfffU
+#define MSK4 0x7fffdbfdU
+#define PARITY1 0x00000001U
+#define PARITY2 0x00000000U
+#define PARITY3 0xe8148000U
+#define PARITY4 0xd0c7afa3U
 
 /* PARAMETERS FOR ALTIVEC */
-#if defined(__APPLE__)	/* For OSX */
-    #define ALTI_SL1	(vector unsigned int)(SL1, SL1, SL1, SL1)
-    #define ALTI_SR1	(vector unsigned int)(SR1, SR1, SR1, SR1)
-    #define ALTI_MSK	(vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
-    #define ALTI_MSK64 \
-	(vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
-    #define ALTI_SL2_PERM \
-	(vector unsigned char)(3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10)
-    #define ALTI_SL2_PERM64 \
-	(vector unsigned char)(3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2)
-    #define ALTI_SR2_PERM \
-	(vector unsigned char)(5,6,7,0,9,10,11,4,13,14,15,8,19,19,19,12)
-    #define ALTI_SR2_PERM64 \
-	(vector unsigned char)(13,14,15,0,1,2,3,4,19,19,19,8,9,10,11,12)
-#else	/* For OTHER OSs(Linux?) */
-    #define ALTI_SL1	{SL1, SL1, SL1, SL1}
-    #define ALTI_SR1	{SR1, SR1, SR1, SR1}
-    #define ALTI_MSK	{MSK1, MSK2, MSK3, MSK4}
-    #define ALTI_MSK64	{MSK2, MSK1, MSK4, MSK3}
-    #define ALTI_SL2_PERM	{3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10}
-    #define ALTI_SL2_PERM64	{3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2}
-    #define ALTI_SR2_PERM	{5,6,7,0,9,10,11,4,13,14,15,8,19,19,19,12}
-    #define ALTI_SR2_PERM64	{13,14,15,0,1,2,3,4,19,19,19,8,9,10,11,12}
-#endif	/* For OSX */
-#define IDSTR	"SFMT-11213:68-14-3-7-3:effff7fb-ffffffef-dfdfbfff-7fffdbfd"
+#if defined(__APPLE__) /* For OSX */
+#	define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1)
+#	define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1)
+#	define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
+#	define ALTI_MSK64 (vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
+#	define ALTI_SL2_PERM                                                  \
+		(vector unsigned char)(3, 21, 21, 21, 7, 0, 1, 2, 11, 4, 5, 6, \
+		    15, 8, 9, 10)
+#	define ALTI_SL2_PERM64                                                \
+		(vector unsigned char)(3, 4, 5, 6, 7, 29, 29, 29, 11, 12, 13,  \
+		    14, 15, 0, 1, 2)
+#	define ALTI_SR2_PERM                                                  \
+		(vector unsigned char)(5, 6, 7, 0, 9, 10, 11, 4, 13, 14, 15,   \
+		    8, 19, 19, 19, 12)
+#	define ALTI_SR2_PERM64                                                \
+		(vector unsigned char)(13, 14, 15, 0, 1, 2, 3, 4, 19, 19, 19,  \
+		    8, 9, 10, 11, 12)
+#else /* For OTHER OSs(Linux?) */
+#	define ALTI_SL1                                                       \
+		{ SL1, SL1, SL1, SL1 }
+#	define ALTI_SR1                                                       \
+		{ SR1, SR1, SR1, SR1 }
+#	define ALTI_MSK                                                       \
+		{ MSK1, MSK2, MSK3, MSK4 }
+#	define ALTI_MSK64                                                     \
+		{ MSK2, MSK1, MSK4, MSK3 }
+#	define ALTI_SL2_PERM                                                  \
+		{ 3, 21, 21, 21, 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10 }
+#	define ALTI_SL2_PERM64                                                \
+		{ 3, 4, 5, 6, 7, 29, 29, 29, 11, 12, 13, 14, 15, 0, 1, 2 }
+#	define ALTI_SR2_PERM                                                  \
+		{ 5, 6, 7, 0, 9, 10, 11, 4, 13, 14, 15, 8, 19, 19, 19, 12 }
+#	define ALTI_SR2_PERM64                                                \
+		{ 13, 14, 15, 0, 1, 2, 3, 4, 19, 19, 19, 8, 9, 10, 11, 12 }
+#endif /* For OSX */
+#define IDSTR "SFMT-11213:68-14-3-7-3:effff7fb-ffffffef-dfdfbfff-7fffdbfd"
 
 #endif /* SFMT_PARAMS11213_H */
diff --git a/test/include/test/SFMT-params1279.h b/test/include/test/SFMT-params1279.h
index d7959f98..1be5c01d 100644
--- a/test/include/test/SFMT-params1279.h
+++ b/test/include/test/SFMT-params1279.h
@@ -36,46 +36,56 @@
 #ifndef SFMT_PARAMS1279_H
 #define SFMT_PARAMS1279_H
 
-#define POS1	7
-#define SL1	14
-#define SL2	3
-#define SR1	5
-#define SR2	1
-#define MSK1	0xf7fefffdU
-#define MSK2	0x7fefcfffU
-#define MSK3	0xaff3ef3fU
-#define MSK4	0xb5ffff7fU
-#define PARITY1	0x00000001U
-#define PARITY2	0x00000000U
-#define PARITY3	0x00000000U
-#define PARITY4	0x20000000U
-
+#define POS1 7
+#define SL1 14
+#define SL2 3
+#define SR1 5
+#define SR2 1
+#define MSK1 0xf7fefffdU
+#define MSK2 0x7fefcfffU
+#define MSK3 0xaff3ef3fU
+#define MSK4 0xb5ffff7fU
+#define PARITY1 0x00000001U
+#define PARITY2 0x00000000U
+#define PARITY3 0x00000000U
+#define PARITY4 0x20000000U
 
 /* PARAMETERS FOR ALTIVEC */
-#if defined(__APPLE__)	/* For OSX */
-    #define ALTI_SL1	(vector unsigned int)(SL1, SL1, SL1, SL1)
-    #define ALTI_SR1	(vector unsigned int)(SR1, SR1, SR1, SR1)
-    #define ALTI_MSK	(vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
-    #define ALTI_MSK64 \
-	(vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
-    #define ALTI_SL2_PERM \
-	(vector unsigned char)(3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10)
-    #define ALTI_SL2_PERM64 \
-	(vector unsigned char)(3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2)
-    #define ALTI_SR2_PERM \
-	(vector unsigned char)(7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14)
-    #define ALTI_SR2_PERM64 \
-	(vector unsigned char)(15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14)
-#else	/* For OTHER OSs(Linux?) */
-    #define ALTI_SL1	{SL1, SL1, SL1, SL1}
-    #define ALTI_SR1	{SR1, SR1, SR1, SR1}
-    #define ALTI_MSK	{MSK1, MSK2, MSK3, MSK4}
-    #define ALTI_MSK64	{MSK2, MSK1, MSK4, MSK3}
-    #define ALTI_SL2_PERM	{3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10}
-    #define ALTI_SL2_PERM64	{3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2}
-    #define ALTI_SR2_PERM	{7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14}
-    #define ALTI_SR2_PERM64	{15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14}
-#endif	/* For OSX */
-#define IDSTR	"SFMT-1279:7-14-3-5-1:f7fefffd-7fefcfff-aff3ef3f-b5ffff7f"
+#if defined(__APPLE__) /* For OSX */
+#	define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1)
+#	define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1)
+#	define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
+#	define ALTI_MSK64 (vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
+#	define ALTI_SL2_PERM                                                  \
+		(vector unsigned char)(3, 21, 21, 21, 7, 0, 1, 2, 11, 4, 5, 6, \
+		    15, 8, 9, 10)
+#	define ALTI_SL2_PERM64                                                \
+		(vector unsigned char)(3, 4, 5, 6, 7, 29, 29, 29, 11, 12, 13,  \
+		    14, 15, 0, 1, 2)
+#	define ALTI_SR2_PERM                                                  \
+		(vector unsigned char)(7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10,  \
+		    17, 12, 13, 14)
+#	define ALTI_SR2_PERM64                                                \
+		(vector unsigned char)(15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10,  \
+		    11, 12, 13, 14)
+#else /* For OTHER OSs(Linux?) */
+#	define ALTI_SL1                                                       \
+		{ SL1, SL1, SL1, SL1 }
+#	define ALTI_SR1                                                       \
+		{ SR1, SR1, SR1, SR1 }
+#	define ALTI_MSK                                                       \
+		{ MSK1, MSK2, MSK3, MSK4 }
+#	define ALTI_MSK64                                                     \
+		{ MSK2, MSK1, MSK4, MSK3 }
+#	define ALTI_SL2_PERM                                                  \
+		{ 3, 21, 21, 21, 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10 }
+#	define ALTI_SL2_PERM64                                                \
+		{ 3, 4, 5, 6, 7, 29, 29, 29, 11, 12, 13, 14, 15, 0, 1, 2 }
+#	define ALTI_SR2_PERM                                                  \
+		{ 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10, 17, 12, 13, 14 }
+#	define ALTI_SR2_PERM64                                                \
+		{ 15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10, 11, 12, 13, 14 }
+#endif /* For OSX */
+#define IDSTR "SFMT-1279:7-14-3-5-1:f7fefffd-7fefcfff-aff3ef3f-b5ffff7f"
 
 #endif /* SFMT_PARAMS1279_H */
diff --git a/test/include/test/SFMT-params132049.h b/test/include/test/SFMT-params132049.h
index a1dcec39..1002614b 100644
--- a/test/include/test/SFMT-params132049.h
+++ b/test/include/test/SFMT-params132049.h
@@ -36,46 +36,56 @@
 #ifndef SFMT_PARAMS132049_H
 #define SFMT_PARAMS132049_H
 
-#define POS1	110
-#define SL1	19
-#define SL2	1
-#define SR1	21
-#define SR2	1
-#define MSK1	0xffffbb5fU
-#define MSK2	0xfb6ebf95U
-#define MSK3	0xfffefffaU
-#define MSK4	0xcff77fffU
-#define PARITY1	0x00000001U
-#define PARITY2	0x00000000U
-#define PARITY3	0xcb520000U
-#define PARITY4	0xc7e91c7dU
-
+#define POS1 110
+#define SL1 19
+#define SL2 1
+#define SR1 21
+#define SR2 1
+#define MSK1 0xffffbb5fU
+#define MSK2 0xfb6ebf95U
+#define MSK3 0xfffefffaU
+#define MSK4 0xcff77fffU
+#define PARITY1 0x00000001U
+#define PARITY2 0x00000000U
+#define PARITY3 0xcb520000U
+#define PARITY4 0xc7e91c7dU
 
 /* PARAMETERS FOR ALTIVEC */
-#if defined(__APPLE__)	/* For OSX */
-    #define ALTI_SL1	(vector unsigned int)(SL1, SL1, SL1, SL1)
-    #define ALTI_SR1	(vector unsigned int)(SR1, SR1, SR1, SR1)
-    #define ALTI_MSK	(vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
-    #define ALTI_MSK64 \
-	(vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
-    #define ALTI_SL2_PERM \
-	(vector unsigned char)(1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8)
-    #define ALTI_SL2_PERM64 \
-	(vector unsigned char)(1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0)
-    #define ALTI_SR2_PERM \
-	(vector unsigned char)(7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14)
-    #define ALTI_SR2_PERM64 \
-	(vector unsigned char)(15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14)
-#else	/* For OTHER OSs(Linux?) */
-    #define ALTI_SL1	{SL1, SL1, SL1, SL1}
-    #define ALTI_SR1	{SR1, SR1, SR1, SR1}
-    #define ALTI_MSK	{MSK1, MSK2, MSK3, MSK4}
-    #define ALTI_MSK64	{MSK2, MSK1, MSK4, MSK3}
-    #define ALTI_SL2_PERM	{1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8}
-    #define ALTI_SL2_PERM64	{1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0}
-    #define ALTI_SR2_PERM	{7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14}
-    #define ALTI_SR2_PERM64	{15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14}
-#endif	/* For OSX */
-#define IDSTR	"SFMT-132049:110-19-1-21-1:ffffbb5f-fb6ebf95-fffefffa-cff77fff"
+#if defined(__APPLE__) /* For OSX */
+#	define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1)
+#	define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1)
+#	define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
+#	define ALTI_MSK64 (vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
+#	define ALTI_SL2_PERM                                                  \
+		(vector unsigned char)(1, 2, 3, 23, 5, 6, 7, 0, 9, 10, 11, 4,  \
+		    13, 14, 15, 8)
+#	define ALTI_SL2_PERM64                                                \
+		(vector unsigned char)(1, 2, 3, 4, 5, 6, 7, 31, 9, 10, 11, 12, \
+		    13, 14, 15, 0)
+#	define ALTI_SR2_PERM                                                  \
+		(vector unsigned char)(7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10,  \
+		    17, 12, 13, 14)
+#	define ALTI_SR2_PERM64                                                \
+		(vector unsigned char)(15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10,  \
+		    11, 12, 13, 14)
+#else /* For OTHER OSs(Linux?) */
+#	define ALTI_SL1                                                       \
+		{ SL1, SL1, SL1, SL1 }
+#	define ALTI_SR1                                                       \
+		{ SR1, SR1, SR1, SR1 }
+#	define ALTI_MSK                                                       \
+		{ MSK1, MSK2, MSK3, MSK4 }
+#	define ALTI_MSK64                                                     \
+		{ MSK2, MSK1, MSK4, MSK3 }
+#	define ALTI_SL2_PERM                                                  \
+		{ 1, 2, 3, 23, 5, 6, 7, 0, 9, 10, 11, 4, 13, 14, 15, 8 }
+#	define ALTI_SL2_PERM64                                                \
+		{ 1, 2, 3, 4, 5, 6, 7, 31, 9, 10, 11, 12, 13, 14, 15, 0 }
+#	define ALTI_SR2_PERM                                                  \
+		{ 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10, 17, 12, 13, 14 }
+#	define ALTI_SR2_PERM64                                                \
+		{ 15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10, 11, 12, 13, 14 }
+#endif /* For OSX */
+#define IDSTR "SFMT-132049:110-19-1-21-1:ffffbb5f-fb6ebf95-fffefffa-cff77fff"
 
 #endif /* SFMT_PARAMS132049_H */
diff --git a/test/include/test/SFMT-params19937.h b/test/include/test/SFMT-params19937.h
index fb92b4c9..71df2713 100644
--- a/test/include/test/SFMT-params19937.h
+++ b/test/include/test/SFMT-params19937.h
@@ -36,46 +36,56 @@
 #ifndef SFMT_PARAMS19937_H
 #define SFMT_PARAMS19937_H
 
-#define POS1	122
-#define SL1	18
-#define SL2	1
-#define SR1	11
-#define SR2	1
-#define MSK1	0xdfffffefU
-#define MSK2	0xddfecb7fU
-#define MSK3	0xbffaffffU
-#define MSK4	0xbffffff6U
-#define PARITY1	0x00000001U
-#define PARITY2	0x00000000U
-#define PARITY3	0x00000000U
-#define PARITY4	0x13c9e684U
-
+#define POS1 122
+#define SL1 18
+#define SL2 1
+#define SR1 11
+#define SR2 1
+#define MSK1 0xdfffffefU
+#define MSK2 0xddfecb7fU
+#define MSK3 0xbffaffffU
+#define MSK4 0xbffffff6U
+#define PARITY1 0x00000001U
+#define PARITY2 0x00000000U
+#define PARITY3 0x00000000U
+#define PARITY4 0x13c9e684U
 
 /* PARAMETERS FOR ALTIVEC */
-#if defined(__APPLE__)	/* For OSX */
-    #define ALTI_SL1	(vector unsigned int)(SL1, SL1, SL1, SL1)
-    #define ALTI_SR1	(vector unsigned int)(SR1, SR1, SR1, SR1)
-    #define ALTI_MSK	(vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
-    #define ALTI_MSK64 \
-	(vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
-    #define ALTI_SL2_PERM \
-	(vector unsigned char)(1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8)
-    #define ALTI_SL2_PERM64 \
-	(vector unsigned char)(1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0)
-    #define ALTI_SR2_PERM \
-	(vector unsigned char)(7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14)
-    #define ALTI_SR2_PERM64 \
-	(vector unsigned char)(15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14)
-#else	/* For OTHER OSs(Linux?) */
-    #define ALTI_SL1	{SL1, SL1, SL1, SL1}
-    #define ALTI_SR1	{SR1, SR1, SR1, SR1}
-    #define ALTI_MSK	{MSK1, MSK2, MSK3, MSK4}
-    #define ALTI_MSK64	{MSK2, MSK1, MSK4, MSK3}
-    #define ALTI_SL2_PERM	{1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8}
-    #define ALTI_SL2_PERM64	{1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0}
-    #define ALTI_SR2_PERM	{7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14}
-    #define ALTI_SR2_PERM64	{15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14}
-#endif	/* For OSX */
-#define IDSTR	"SFMT-19937:122-18-1-11-1:dfffffef-ddfecb7f-bffaffff-bffffff6"
+#if defined(__APPLE__) /* For OSX */
+#	define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1)
+#	define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1)
+#	define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
+#	define ALTI_MSK64 (vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
+#	define ALTI_SL2_PERM                                                  \
+		(vector unsigned char)(1, 2, 3, 23, 5, 6, 7, 0, 9, 10, 11, 4,  \
+		    13, 14, 15, 8)
+#	define ALTI_SL2_PERM64                                                \
+		(vector unsigned char)(1, 2, 3, 4, 5, 6, 7, 31, 9, 10, 11, 12, \
+		    13, 14, 15, 0)
+#	define ALTI_SR2_PERM                                                  \
+		(vector unsigned char)(7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10,  \
+		    17, 12, 13, 14)
+#	define ALTI_SR2_PERM64                                                \
+		(vector unsigned char)(15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10,  \
+		    11, 12, 13, 14)
+#else /* For OTHER OSs(Linux?) */
+#	define ALTI_SL1                                                       \
+		{ SL1, SL1, SL1, SL1 }
+#	define ALTI_SR1                                                       \
+		{ SR1, SR1, SR1, SR1 }
+#	define ALTI_MSK                                                       \
+		{ MSK1, MSK2, MSK3, MSK4 }
+#	define ALTI_MSK64                                                     \
+		{ MSK2, MSK1, MSK4, MSK3 }
+#	define ALTI_SL2_PERM                                                  \
+		{ 1, 2, 3, 23, 5, 6, 7, 0, 9, 10, 11, 4, 13, 14, 15, 8 }
+#	define ALTI_SL2_PERM64                                                \
+		{ 1, 2, 3, 4, 5, 6, 7, 31, 9, 10, 11, 12, 13, 14, 15, 0 }
+#	define ALTI_SR2_PERM                                                  \
+		{ 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10, 17, 12, 13, 14 }
+#	define ALTI_SR2_PERM64                                                \
+		{ 15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10, 11, 12, 13, 14 }
+#endif /* For OSX */
+#define IDSTR "SFMT-19937:122-18-1-11-1:dfffffef-ddfecb7f-bffaffff-bffffff6"
 
 #endif /* SFMT_PARAMS19937_H */
diff --git a/test/include/test/SFMT-params216091.h b/test/include/test/SFMT-params216091.h
index 125ce282..d2d240e2 100644
--- a/test/include/test/SFMT-params216091.h
+++ b/test/include/test/SFMT-params216091.h
@@ -36,46 +36,56 @@
 #ifndef SFMT_PARAMS216091_H
 #define SFMT_PARAMS216091_H
 
-#define POS1	627
-#define SL1	11
-#define SL2	3
-#define SR1	10
-#define SR2	1
-#define MSK1	0xbff7bff7U
-#define MSK2	0xbfffffffU
-#define MSK3	0xbffffa7fU
-#define MSK4	0xffddfbfbU
-#define PARITY1	0xf8000001U
-#define PARITY2	0x89e80709U
-#define PARITY3	0x3bd2b64bU
-#define PARITY4	0x0c64b1e4U
-
+#define POS1 627
+#define SL1 11
+#define SL2 3
+#define SR1 10
+#define SR2 1
+#define MSK1 0xbff7bff7U
+#define MSK2 0xbfffffffU
+#define MSK3 0xbffffa7fU
+#define MSK4 0xffddfbfbU
+#define PARITY1 0xf8000001U
+#define PARITY2 0x89e80709U
+#define PARITY3 0x3bd2b64bU
+#define PARITY4 0x0c64b1e4U
 
 /* PARAMETERS FOR ALTIVEC */
-#if defined(__APPLE__)	/* For OSX */
-    #define ALTI_SL1	(vector unsigned int)(SL1, SL1, SL1, SL1)
-    #define ALTI_SR1	(vector unsigned int)(SR1, SR1, SR1, SR1)
-    #define ALTI_MSK	(vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
-    #define ALTI_MSK64 \
-	(vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
-    #define ALTI_SL2_PERM \
-	(vector unsigned char)(3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10)
-    #define ALTI_SL2_PERM64 \
-	(vector unsigned char)(3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2)
-    #define ALTI_SR2_PERM \
-	(vector unsigned char)(7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14)
-    #define ALTI_SR2_PERM64 \
-	(vector unsigned char)(15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14)
-#else	/* For OTHER OSs(Linux?) */
-    #define ALTI_SL1	{SL1, SL1, SL1, SL1}
-    #define ALTI_SR1	{SR1, SR1, SR1, SR1}
-    #define ALTI_MSK	{MSK1, MSK2, MSK3, MSK4}
-    #define ALTI_MSK64	{MSK2, MSK1, MSK4, MSK3}
-    #define ALTI_SL2_PERM	{3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10}
-    #define ALTI_SL2_PERM64	{3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2}
-    #define ALTI_SR2_PERM	{7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14}
-    #define ALTI_SR2_PERM64	{15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14}
-#endif	/* For OSX */
-#define IDSTR	"SFMT-216091:627-11-3-10-1:bff7bff7-bfffffff-bffffa7f-ffddfbfb"
+#if defined(__APPLE__) /* For OSX */
+#	define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1)
+#	define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1)
+#	define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
+#	define ALTI_MSK64 (vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
+#	define ALTI_SL2_PERM                                                  \
+		(vector unsigned char)(3, 21, 21, 21, 7, 0, 1, 2, 11, 4, 5, 6, \
+		    15, 8, 9, 10)
+#	define ALTI_SL2_PERM64                                                \
+		(vector unsigned char)(3, 4, 5, 6, 7, 29, 29, 29, 11, 12, 13,  \
+		    14, 15, 0, 1, 2)
+#	define ALTI_SR2_PERM                                                  \
+		(vector unsigned char)(7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10,  \
+		    17, 12, 13, 14)
+#	define ALTI_SR2_PERM64                                                \
+		(vector unsigned char)(15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10,  \
+		    11, 12, 13, 14)
+#else /* For OTHER OSs(Linux?) */
+#	define ALTI_SL1                                                       \
+		{ SL1, SL1, SL1, SL1 }
+#	define ALTI_SR1                                                       \
+		{ SR1, SR1, SR1, SR1 }
+#	define ALTI_MSK                                                       \
+		{ MSK1, MSK2, MSK3, MSK4 }
+#	define ALTI_MSK64                                                     \
+		{ MSK2, MSK1, MSK4, MSK3 }
+#	define ALTI_SL2_PERM                                                  \
+		{ 3, 21, 21, 21, 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10 }
+#	define ALTI_SL2_PERM64                                                \
+		{ 3, 4, 5, 6, 7, 29, 29, 29, 11, 12, 13, 14, 15, 0, 1, 2 }
+#	define ALTI_SR2_PERM                                                  \
+		{ 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10, 17, 12, 13, 14 }
+#	define ALTI_SR2_PERM64                                                \
+		{ 15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10, 11, 12, 13, 14 }
+#endif /* For OSX */
+#define IDSTR "SFMT-216091:627-11-3-10-1:bff7bff7-bfffffff-bffffa7f-ffddfbfb"
 
 #endif /* SFMT_PARAMS216091_H */
diff --git a/test/include/test/SFMT-params2281.h b/test/include/test/SFMT-params2281.h
index 0ef85c40..97b8de68 100644
--- a/test/include/test/SFMT-params2281.h
+++ b/test/include/test/SFMT-params2281.h
@@ -36,46 +36,56 @@
 #ifndef SFMT_PARAMS2281_H
 #define SFMT_PARAMS2281_H
 
-#define POS1	12
-#define SL1	19
-#define SL2	1
-#define SR1	5
-#define SR2	1
-#define MSK1	0xbff7ffbfU
-#define MSK2	0xfdfffffeU
-#define MSK3	0xf7ffef7fU
-#define MSK4	0xf2f7cbbfU
-#define PARITY1	0x00000001U
-#define PARITY2	0x00000000U
-#define PARITY3	0x00000000U
-#define PARITY4	0x41dfa600U
-
+#define POS1 12
+#define SL1 19
+#define SL2 1
+#define SR1 5
+#define SR2 1
+#define MSK1 0xbff7ffbfU
+#define MSK2 0xfdfffffeU
+#define MSK3 0xf7ffef7fU
+#define MSK4 0xf2f7cbbfU
+#define PARITY1 0x00000001U
+#define PARITY2 0x00000000U
+#define PARITY3 0x00000000U
+#define PARITY4 0x41dfa600U
 
 /* PARAMETERS FOR ALTIVEC */
-#if defined(__APPLE__)	/* For OSX */
-    #define ALTI_SL1	(vector unsigned int)(SL1, SL1, SL1, SL1)
-    #define ALTI_SR1	(vector unsigned int)(SR1, SR1, SR1, SR1)
-    #define ALTI_MSK	(vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
-    #define ALTI_MSK64 \
-	(vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
-    #define ALTI_SL2_PERM \
-	(vector unsigned char)(1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8)
-    #define ALTI_SL2_PERM64 \
-	(vector unsigned char)(1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0)
-    #define ALTI_SR2_PERM \
-	(vector unsigned char)(7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14)
-    #define ALTI_SR2_PERM64 \
-	(vector unsigned char)(15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14)
-#else	/* For OTHER OSs(Linux?) */
-    #define ALTI_SL1	{SL1, SL1, SL1, SL1}
-    #define ALTI_SR1	{SR1, SR1, SR1, SR1}
-    #define ALTI_MSK	{MSK1, MSK2, MSK3, MSK4}
-    #define ALTI_MSK64	{MSK2, MSK1, MSK4, MSK3}
-    #define ALTI_SL2_PERM	{1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8}
-    #define ALTI_SL2_PERM64	{1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0}
-    #define ALTI_SR2_PERM	{7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14}
-    #define ALTI_SR2_PERM64	{15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14}
-#endif	/* For OSX */
-#define IDSTR	"SFMT-2281:12-19-1-5-1:bff7ffbf-fdfffffe-f7ffef7f-f2f7cbbf"
+#if defined(__APPLE__) /* For OSX */
+#	define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1)
+#	define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1)
+#	define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
+#	define ALTI_MSK64 (vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
+#	define ALTI_SL2_PERM                                                  \
+		(vector unsigned char)(1, 2, 3, 23, 5, 6, 7, 0, 9, 10, 11, 4,  \
+		    13, 14, 15, 8)
+#	define ALTI_SL2_PERM64                                                \
+		(vector unsigned char)(1, 2, 3, 4, 5, 6, 7, 31, 9, 10, 11, 12, \
+		    13, 14, 15, 0)
+#	define ALTI_SR2_PERM                                                  \
+		(vector unsigned char)(7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10,  \
+		    17, 12, 13, 14)
+#	define ALTI_SR2_PERM64                                                \
+		(vector unsigned char)(15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10,  \
+		    11, 12, 13, 14)
+#else /* For OTHER OSs(Linux?) */
+#	define ALTI_SL1                                                       \
+		{ SL1, SL1, SL1, SL1 }
+#	define ALTI_SR1                                                       \
+		{ SR1, SR1, SR1, SR1 }
+#	define ALTI_MSK                                                       \
+		{ MSK1, MSK2, MSK3, MSK4 }
+#	define ALTI_MSK64                                                     \
+		{ MSK2, MSK1, MSK4, MSK3 }
+#	define ALTI_SL2_PERM                                                  \
+		{ 1, 2, 3, 23, 5, 6, 7, 0, 9, 10, 11, 4, 13, 14, 15, 8 }
+#	define ALTI_SL2_PERM64                                                \
+		{ 1, 2, 3, 4, 5, 6, 7, 31, 9, 10, 11, 12, 13, 14, 15, 0 }
+#	define ALTI_SR2_PERM                                                  \
+		{ 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10, 17, 12, 13, 14 }
+#	define ALTI_SR2_PERM64                                                \
+		{ 15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10, 11, 12, 13, 14 }
+#endif /* For OSX */
+#define IDSTR "SFMT-2281:12-19-1-5-1:bff7ffbf-fdfffffe-f7ffef7f-f2f7cbbf"
 
 #endif /* SFMT_PARAMS2281_H */
diff --git a/test/include/test/SFMT-params4253.h b/test/include/test/SFMT-params4253.h
index 9f07bc67..7e51edd8 100644
--- a/test/include/test/SFMT-params4253.h
+++ b/test/include/test/SFMT-params4253.h
@@ -36,46 +36,56 @@
 #ifndef SFMT_PARAMS4253_H
 #define SFMT_PARAMS4253_H
 
-#define POS1	17
-#define SL1	20
-#define SL2	1
-#define SR1	7
-#define SR2	1
-#define MSK1	0x9f7bffffU
-#define MSK2	0x9fffff5fU
-#define MSK3	0x3efffffbU
-#define MSK4	0xfffff7bbU
-#define PARITY1	0xa8000001U
-#define PARITY2	0xaf5390a3U
-#define PARITY3	0xb740b3f8U
-#define PARITY4	0x6c11486dU
-
+#define POS1 17
+#define SL1 20
+#define SL2 1
+#define SR1 7
+#define SR2 1
+#define MSK1 0x9f7bffffU
+#define MSK2 0x9fffff5fU
+#define MSK3 0x3efffffbU
+#define MSK4 0xfffff7bbU
+#define PARITY1 0xa8000001U
+#define PARITY2 0xaf5390a3U
+#define PARITY3 0xb740b3f8U
+#define PARITY4 0x6c11486dU
 
 /* PARAMETERS FOR ALTIVEC */
-#if defined(__APPLE__)	/* For OSX */
-    #define ALTI_SL1	(vector unsigned int)(SL1, SL1, SL1, SL1)
-    #define ALTI_SR1	(vector unsigned int)(SR1, SR1, SR1, SR1)
-    #define ALTI_MSK	(vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
-    #define ALTI_MSK64 \
-	(vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
-    #define ALTI_SL2_PERM \
-	(vector unsigned char)(1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8)
-    #define ALTI_SL2_PERM64 \
-	(vector unsigned char)(1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0)
-    #define ALTI_SR2_PERM \
-	(vector unsigned char)(7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14)
-    #define ALTI_SR2_PERM64 \
-	(vector unsigned char)(15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14)
-#else	/* For OTHER OSs(Linux?) */
-    #define ALTI_SL1	{SL1, SL1, SL1, SL1}
-    #define ALTI_SR1	{SR1, SR1, SR1, SR1}
-    #define ALTI_MSK	{MSK1, MSK2, MSK3, MSK4}
-    #define ALTI_MSK64	{MSK2, MSK1, MSK4, MSK3}
-    #define ALTI_SL2_PERM	{1,2,3,23,5,6,7,0,9,10,11,4,13,14,15,8}
-    #define ALTI_SL2_PERM64	{1,2,3,4,5,6,7,31,9,10,11,12,13,14,15,0}
-    #define ALTI_SR2_PERM	{7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14}
-    #define ALTI_SR2_PERM64	{15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14}
-#endif	/* For OSX */
-#define IDSTR	"SFMT-4253:17-20-1-7-1:9f7bffff-9fffff5f-3efffffb-fffff7bb"
+#if defined(__APPLE__) /* For OSX */
+#	define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1)
+#	define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1)
+#	define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
+#	define ALTI_MSK64 (vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
+#	define ALTI_SL2_PERM                                                  \
+		(vector unsigned char)(1, 2, 3, 23, 5, 6, 7, 0, 9, 10, 11, 4,  \
+		    13, 14, 15, 8)
+#	define ALTI_SL2_PERM64                                                \
+		(vector unsigned char)(1, 2, 3, 4, 5, 6, 7, 31, 9, 10, 11, 12, \
+		    13, 14, 15, 0)
+#	define ALTI_SR2_PERM                                                  \
+		(vector unsigned char)(7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10,  \
+		    17, 12, 13, 14)
+#	define ALTI_SR2_PERM64                                                \
+		(vector unsigned char)(15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10,  \
+		    11, 12, 13, 14)
+#else /* For OTHER OSs(Linux?) */
+#	define ALTI_SL1                                                       \
+		{ SL1, SL1, SL1, SL1 }
+#	define ALTI_SR1                                                       \
+		{ SR1, SR1, SR1, SR1 }
+#	define ALTI_MSK                                                       \
+		{ MSK1, MSK2, MSK3, MSK4 }
+#	define ALTI_MSK64                                                     \
+		{ MSK2, MSK1, MSK4, MSK3 }
+#	define ALTI_SL2_PERM                                                  \
+		{ 1, 2, 3, 23, 5, 6, 7, 0, 9, 10, 11, 4, 13, 14, 15, 8 }
+#	define ALTI_SL2_PERM64                                                \
+		{ 1, 2, 3, 4, 5, 6, 7, 31, 9, 10, 11, 12, 13, 14, 15, 0 }
+#	define ALTI_SR2_PERM                                                  \
+		{ 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10, 17, 12, 13, 14 }
+#	define ALTI_SR2_PERM64                                                \
+		{ 15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10, 11, 12, 13, 14 }
+#endif /* For OSX */
+#define IDSTR "SFMT-4253:17-20-1-7-1:9f7bffff-9fffff5f-3efffffb-fffff7bb"
 
 #endif /* SFMT_PARAMS4253_H */
diff --git a/test/include/test/SFMT-params44497.h b/test/include/test/SFMT-params44497.h
index 85598fed..8f6fee7b 100644
--- a/test/include/test/SFMT-params44497.h
+++ b/test/include/test/SFMT-params44497.h
@@ -36,46 +36,56 @@
 #ifndef SFMT_PARAMS44497_H
 #define SFMT_PARAMS44497_H
 
-#define POS1	330
-#define SL1	5
-#define SL2	3
-#define SR1	9
-#define SR2	3
-#define MSK1	0xeffffffbU
-#define MSK2	0xdfbebfffU
-#define MSK3	0xbfbf7befU
-#define MSK4	0x9ffd7bffU
-#define PARITY1	0x00000001U
-#define PARITY2	0x00000000U
-#define PARITY3	0xa3ac4000U
-#define PARITY4	0xecc1327aU
-
+#define POS1 330
+#define SL1 5
+#define SL2 3
+#define SR1 9
+#define SR2 3
+#define MSK1 0xeffffffbU
+#define MSK2 0xdfbebfffU
+#define MSK3 0xbfbf7befU
+#define MSK4 0x9ffd7bffU
+#define PARITY1 0x00000001U
+#define PARITY2 0x00000000U
+#define PARITY3 0xa3ac4000U
+#define PARITY4 0xecc1327aU
 
 /* PARAMETERS FOR ALTIVEC */
-#if defined(__APPLE__)	/* For OSX */
-    #define ALTI_SL1	(vector unsigned int)(SL1, SL1, SL1, SL1)
-    #define ALTI_SR1	(vector unsigned int)(SR1, SR1, SR1, SR1)
-    #define ALTI_MSK	(vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
-    #define ALTI_MSK64 \
-	(vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
-    #define ALTI_SL2_PERM \
-	(vector unsigned char)(3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10)
-    #define ALTI_SL2_PERM64 \
-	(vector unsigned char)(3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2)
-    #define ALTI_SR2_PERM \
-	(vector unsigned char)(5,6,7,0,9,10,11,4,13,14,15,8,19,19,19,12)
-    #define ALTI_SR2_PERM64 \
-	(vector unsigned char)(13,14,15,0,1,2,3,4,19,19,19,8,9,10,11,12)
-#else	/* For OTHER OSs(Linux?) */
-    #define ALTI_SL1	{SL1, SL1, SL1, SL1}
-    #define ALTI_SR1	{SR1, SR1, SR1, SR1}
-    #define ALTI_MSK	{MSK1, MSK2, MSK3, MSK4}
-    #define ALTI_MSK64	{MSK2, MSK1, MSK4, MSK3}
-    #define ALTI_SL2_PERM	{3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10}
-    #define ALTI_SL2_PERM64	{3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2}
-    #define ALTI_SR2_PERM	{5,6,7,0,9,10,11,4,13,14,15,8,19,19,19,12}
-    #define ALTI_SR2_PERM64	{13,14,15,0,1,2,3,4,19,19,19,8,9,10,11,12}
-#endif	/* For OSX */
-#define IDSTR	"SFMT-44497:330-5-3-9-3:effffffb-dfbebfff-bfbf7bef-9ffd7bff"
+#if defined(__APPLE__) /* For OSX */
+#	define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1)
+#	define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1)
+#	define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
+#	define ALTI_MSK64 (vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
+#	define ALTI_SL2_PERM                                                  \
+		(vector unsigned char)(3, 21, 21, 21, 7, 0, 1, 2, 11, 4, 5, 6, \
+		    15, 8, 9, 10)
+#	define ALTI_SL2_PERM64                                                \
+		(vector unsigned char)(3, 4, 5, 6, 7, 29, 29, 29, 11, 12, 13,  \
+		    14, 15, 0, 1, 2)
+#	define ALTI_SR2_PERM                                                  \
+		(vector unsigned char)(5, 6, 7, 0, 9, 10, 11, 4, 13, 14, 15,   \
+		    8, 19, 19, 19, 12)
+#	define ALTI_SR2_PERM64                                                \
+		(vector unsigned char)(13, 14, 15, 0, 1, 2, 3, 4, 19, 19, 19,  \
+		    8, 9, 10, 11, 12)
+#else /* For OTHER OSs(Linux?) */
+#	define ALTI_SL1                                                       \
+		{ SL1, SL1, SL1, SL1 }
+#	define ALTI_SR1                                                       \
+		{ SR1, SR1, SR1, SR1 }
+#	define ALTI_MSK                                                       \
+		{ MSK1, MSK2, MSK3, MSK4 }
+#	define ALTI_MSK64                                                     \
+		{ MSK2, MSK1, MSK4, MSK3 }
+#	define ALTI_SL2_PERM                                                  \
+		{ 3, 21, 21, 21, 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10 }
+#	define ALTI_SL2_PERM64                                                \
+		{ 3, 4, 5, 6, 7, 29, 29, 29, 11, 12, 13, 14, 15, 0, 1, 2 }
+#	define ALTI_SR2_PERM                                                  \
+		{ 5, 6, 7, 0, 9, 10, 11, 4, 13, 14, 15, 8, 19, 19, 19, 12 }
+#	define ALTI_SR2_PERM64                                                \
+		{ 13, 14, 15, 0, 1, 2, 3, 4, 19, 19, 19, 8, 9, 10, 11, 12 }
+#endif /* For OSX */
+#define IDSTR "SFMT-44497:330-5-3-9-3:effffffb-dfbebfff-bfbf7bef-9ffd7bff"
 
 #endif /* SFMT_PARAMS44497_H */
diff --git a/test/include/test/SFMT-params607.h b/test/include/test/SFMT-params607.h
index bc76485f..29fb3913 100644
--- a/test/include/test/SFMT-params607.h
+++ b/test/include/test/SFMT-params607.h
@@ -36,46 +36,56 @@
 #ifndef SFMT_PARAMS607_H
 #define SFMT_PARAMS607_H
 
-#define POS1	2
-#define SL1	15
-#define SL2	3
-#define SR1	13
-#define SR2	3
-#define MSK1	0xfdff37ffU
-#define MSK2	0xef7f3f7dU
-#define MSK3	0xff777b7dU
-#define MSK4	0x7ff7fb2fU
-#define PARITY1	0x00000001U
-#define PARITY2	0x00000000U
-#define PARITY3	0x00000000U
-#define PARITY4	0x5986f054U
-
+#define POS1 2
+#define SL1 15
+#define SL2 3
+#define SR1 13
+#define SR2 3
+#define MSK1 0xfdff37ffU
+#define MSK2 0xef7f3f7dU
+#define MSK3 0xff777b7dU
+#define MSK4 0x7ff7fb2fU
+#define PARITY1 0x00000001U
+#define PARITY2 0x00000000U
+#define PARITY3 0x00000000U
+#define PARITY4 0x5986f054U
 
 /* PARAMETERS FOR ALTIVEC */
-#if defined(__APPLE__)	/* For OSX */
-    #define ALTI_SL1	(vector unsigned int)(SL1, SL1, SL1, SL1)
-    #define ALTI_SR1	(vector unsigned int)(SR1, SR1, SR1, SR1)
-    #define ALTI_MSK	(vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
-    #define ALTI_MSK64 \
-	(vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
-    #define ALTI_SL2_PERM \
-	(vector unsigned char)(3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10)
-    #define ALTI_SL2_PERM64 \
-	(vector unsigned char)(3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2)
-    #define ALTI_SR2_PERM \
-	(vector unsigned char)(5,6,7,0,9,10,11,4,13,14,15,8,19,19,19,12)
-    #define ALTI_SR2_PERM64 \
-	(vector unsigned char)(13,14,15,0,1,2,3,4,19,19,19,8,9,10,11,12)
-#else	/* For OTHER OSs(Linux?) */
-    #define ALTI_SL1	{SL1, SL1, SL1, SL1}
-    #define ALTI_SR1	{SR1, SR1, SR1, SR1}
-    #define ALTI_MSK	{MSK1, MSK2, MSK3, MSK4}
-    #define ALTI_MSK64	{MSK2, MSK1, MSK4, MSK3}
-    #define ALTI_SL2_PERM	{3,21,21,21,7,0,1,2,11,4,5,6,15,8,9,10}
-    #define ALTI_SL2_PERM64	{3,4,5,6,7,29,29,29,11,12,13,14,15,0,1,2}
-    #define ALTI_SR2_PERM	{5,6,7,0,9,10,11,4,13,14,15,8,19,19,19,12}
-    #define ALTI_SR2_PERM64	{13,14,15,0,1,2,3,4,19,19,19,8,9,10,11,12}
-#endif	/* For OSX */
-#define IDSTR	"SFMT-607:2-15-3-13-3:fdff37ff-ef7f3f7d-ff777b7d-7ff7fb2f"
+#if defined(__APPLE__) /* For OSX */
+#	define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1)
+#	define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1)
+#	define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
+#	define ALTI_MSK64 (vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
+#	define ALTI_SL2_PERM                                                  \
+		(vector unsigned char)(3, 21, 21, 21, 7, 0, 1, 2, 11, 4, 5, 6, \
+		    15, 8, 9, 10)
+#	define ALTI_SL2_PERM64                                                \
+		(vector unsigned char)(3, 4, 5, 6, 7, 29, 29, 29, 11, 12, 13,  \
+		    14, 15, 0, 1, 2)
+#	define ALTI_SR2_PERM                                                  \
+		(vector unsigned char)(5, 6, 7, 0, 9, 10, 11, 4, 13, 14, 15,   \
+		    8, 19, 19, 19, 12)
+#	define ALTI_SR2_PERM64                                                \
+		(vector unsigned char)(13, 14, 15, 0, 1, 2, 3, 4, 19, 19, 19,  \
+		    8, 9, 10, 11, 12)
+#else /* For OTHER OSs(Linux?) */
+#	define ALTI_SL1                                                       \
+		{ SL1, SL1, SL1, SL1 }
+#	define ALTI_SR1                                                       \
+		{ SR1, SR1, SR1, SR1 }
+#	define ALTI_MSK                                                       \
+		{ MSK1, MSK2, MSK3, MSK4 }
+#	define ALTI_MSK64                                                     \
+		{ MSK2, MSK1, MSK4, MSK3 }
+#	define ALTI_SL2_PERM                                                  \
+		{ 3, 21, 21, 21, 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10 }
+#	define ALTI_SL2_PERM64                                                \
+		{ 3, 4, 5, 6, 7, 29, 29, 29, 11, 12, 13, 14, 15, 0, 1, 2 }
+#	define ALTI_SR2_PERM                                                  \
+		{ 5, 6, 7, 0, 9, 10, 11, 4, 13, 14, 15, 8, 19, 19, 19, 12 }
+#	define ALTI_SR2_PERM64                                                \
+		{ 13, 14, 15, 0, 1, 2, 3, 4, 19, 19, 19, 8, 9, 10, 11, 12 }
+#endif /* For OSX */
+#define IDSTR "SFMT-607:2-15-3-13-3:fdff37ff-ef7f3f7d-ff777b7d-7ff7fb2f"
 
 #endif /* SFMT_PARAMS607_H */
diff --git a/test/include/test/SFMT-params86243.h b/test/include/test/SFMT-params86243.h
index 5e4d783c..5e3747e9 100644
--- a/test/include/test/SFMT-params86243.h
+++ b/test/include/test/SFMT-params86243.h
@@ -36,46 +36,56 @@
 #ifndef SFMT_PARAMS86243_H
 #define SFMT_PARAMS86243_H
 
-#define POS1	366
-#define SL1	6
-#define SL2	7
-#define SR1	19
-#define SR2	1
-#define MSK1	0xfdbffbffU
-#define MSK2	0xbff7ff3fU
-#define MSK3	0xfd77efffU
-#define MSK4	0xbf9ff3ffU
-#define PARITY1	0x00000001U
-#define PARITY2	0x00000000U
-#define PARITY3	0x00000000U
-#define PARITY4	0xe9528d85U
-
+#define POS1 366
+#define SL1 6
+#define SL2 7
+#define SR1 19
+#define SR2 1
+#define MSK1 0xfdbffbffU
+#define MSK2 0xbff7ff3fU
+#define MSK3 0xfd77efffU
+#define MSK4 0xbf9ff3ffU
+#define PARITY1 0x00000001U
+#define PARITY2 0x00000000U
+#define PARITY3 0x00000000U
+#define PARITY4 0xe9528d85U
 
 /* PARAMETERS FOR ALTIVEC */
-#if defined(__APPLE__)	/* For OSX */
-    #define ALTI_SL1	(vector unsigned int)(SL1, SL1, SL1, SL1)
-    #define ALTI_SR1	(vector unsigned int)(SR1, SR1, SR1, SR1)
-    #define ALTI_MSK	(vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
-    #define ALTI_MSK64 \
-	(vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
-    #define ALTI_SL2_PERM \
-	(vector unsigned char)(25,25,25,25,3,25,25,25,7,0,1,2,11,4,5,6)
-    #define ALTI_SL2_PERM64 \
-	(vector unsigned char)(7,25,25,25,25,25,25,25,15,0,1,2,3,4,5,6)
-    #define ALTI_SR2_PERM \
-	(vector unsigned char)(7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14)
-    #define ALTI_SR2_PERM64 \
-	(vector unsigned char)(15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14)
-#else	/* For OTHER OSs(Linux?) */
-    #define ALTI_SL1	{SL1, SL1, SL1, SL1}
-    #define ALTI_SR1	{SR1, SR1, SR1, SR1}
-    #define ALTI_MSK	{MSK1, MSK2, MSK3, MSK4}
-    #define ALTI_MSK64	{MSK2, MSK1, MSK4, MSK3}
-    #define ALTI_SL2_PERM	{25,25,25,25,3,25,25,25,7,0,1,2,11,4,5,6}
-    #define ALTI_SL2_PERM64	{7,25,25,25,25,25,25,25,15,0,1,2,3,4,5,6}
-    #define ALTI_SR2_PERM	{7,0,1,2,11,4,5,6,15,8,9,10,17,12,13,14}
-    #define ALTI_SR2_PERM64	{15,0,1,2,3,4,5,6,17,8,9,10,11,12,13,14}
-#endif	/* For OSX */
-#define IDSTR	"SFMT-86243:366-6-7-19-1:fdbffbff-bff7ff3f-fd77efff-bf9ff3ff"
+#if defined(__APPLE__) /* For OSX */
+#	define ALTI_SL1 (vector unsigned int)(SL1, SL1, SL1, SL1)
+#	define ALTI_SR1 (vector unsigned int)(SR1, SR1, SR1, SR1)
+#	define ALTI_MSK (vector unsigned int)(MSK1, MSK2, MSK3, MSK4)
+#	define ALTI_MSK64 (vector unsigned int)(MSK2, MSK1, MSK4, MSK3)
+#	define ALTI_SL2_PERM                                                  \
+		(vector unsigned char)(25, 25, 25, 25, 3, 25, 25, 25, 7, 0, 1, \
+		    2, 11, 4, 5, 6)
+#	define ALTI_SL2_PERM64                                                \
+		(vector unsigned char)(7, 25, 25, 25, 25, 25, 25, 25, 15, 0,   \
+		    1, 2, 3, 4, 5, 6)
+#	define ALTI_SR2_PERM                                                  \
+		(vector unsigned char)(7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10,  \
+		    17, 12, 13, 14)
+#	define ALTI_SR2_PERM64                                                \
+		(vector unsigned char)(15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10,  \
+		    11, 12, 13, 14)
+#else /* For OTHER OSs(Linux?) */
+#	define ALTI_SL1                                                       \
+		{ SL1, SL1, SL1, SL1 }
+#	define ALTI_SR1                                                       \
+		{ SR1, SR1, SR1, SR1 }
+#	define ALTI_MSK                                                       \
+		{ MSK1, MSK2, MSK3, MSK4 }
+#	define ALTI_MSK64                                                     \
+		{ MSK2, MSK1, MSK4, MSK3 }
+#	define ALTI_SL2_PERM                                                  \
+		{ 25, 25, 25, 25, 3, 25, 25, 25, 7, 0, 1, 2, 11, 4, 5, 6 }
+#	define ALTI_SL2_PERM64                                                \
+		{ 7, 25, 25, 25, 25, 25, 25, 25, 15, 0, 1, 2, 3, 4, 5, 6 }
+#	define ALTI_SR2_PERM                                                  \
+		{ 7, 0, 1, 2, 11, 4, 5, 6, 15, 8, 9, 10, 17, 12, 13, 14 }
+#	define ALTI_SR2_PERM64                                                \
+		{ 15, 0, 1, 2, 3, 4, 5, 6, 17, 8, 9, 10, 11, 12, 13, 14 }
+#endif /* For OSX */
+#define IDSTR "SFMT-86243:366-6-7-19-1:fdbffbff-bff7ff3f-fd77efff-bf9ff3ff"
 
 #endif /* SFMT_PARAMS86243_H */
diff --git a/test/include/test/SFMT-sse2.h b/test/include/test/SFMT-sse2.h
index 169ad558..83b35b43 100644
--- a/test/include/test/SFMT-sse2.h
+++ b/test/include/test/SFMT-sse2.h
@@ -60,48 +60,49 @@
  * @param mask 128-bit mask
  * @return output
  */
-JEMALLOC_ALWAYS_INLINE __m128i mm_recursion(__m128i *a, __m128i *b,
-				   __m128i c, __m128i d, __m128i mask) {
-    __m128i v, x, y, z;
+JEMALLOC_ALWAYS_INLINE __m128i
+mm_recursion(__m128i *a, __m128i *b, __m128i c, __m128i d, __m128i mask) {
+	__m128i v, x, y, z;
 
-    x = _mm_load_si128(a);
-    y = _mm_srli_epi32(*b, SR1);
-    z = _mm_srli_si128(c, SR2);
-    v = _mm_slli_epi32(d, SL1);
-    z = _mm_xor_si128(z, x);
-    z = _mm_xor_si128(z, v);
-    x = _mm_slli_si128(x, SL2);
-    y = _mm_and_si128(y, mask);
-    z = _mm_xor_si128(z, x);
-    z = _mm_xor_si128(z, y);
-    return z;
+	x = _mm_load_si128(a);
+	y = _mm_srli_epi32(*b, SR1);
+	z = _mm_srli_si128(c, SR2);
+	v = _mm_slli_epi32(d, SL1);
+	z = _mm_xor_si128(z, x);
+	z = _mm_xor_si128(z, v);
+	x = _mm_slli_si128(x, SL2);
+	y = _mm_and_si128(y, mask);
+	z = _mm_xor_si128(z, x);
+	z = _mm_xor_si128(z, y);
+	return z;
 }
 
 /**
  * This function fills the internal state array with pseudorandom
  * integers.
  */
-static inline void gen_rand_all(sfmt_t *ctx) {
-    int i;
-    __m128i r, r1, r2, mask;
-    mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1);
+static inline void
+gen_rand_all(sfmt_t *ctx) {
+	int     i;
+	__m128i r, r1, r2, mask;
+	mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1);
 
-    r1 = _mm_load_si128(&ctx->sfmt[N - 2].si);
-    r2 = _mm_load_si128(&ctx->sfmt[N - 1].si);
-    for (i = 0; i < N - POS1; i++) {
-	r = mm_recursion(&ctx->sfmt[i].si, &ctx->sfmt[i + POS1].si, r1, r2,
-	  mask);
-	_mm_store_si128(&ctx->sfmt[i].si, r);
-	r1 = r2;
-	r2 = r;
-    }
-    for (; i < N; i++) {
-	r = mm_recursion(&ctx->sfmt[i].si, &ctx->sfmt[i + POS1 - N].si, r1, r2,
-	  mask);
-	_mm_store_si128(&ctx->sfmt[i].si, r);
-	r1 = r2;
-	r2 = r;
-    }
+	r1 = _mm_load_si128(&ctx->sfmt[N - 2].si);
+	r2 = _mm_load_si128(&ctx->sfmt[N - 1].si);
+	for (i = 0; i < N - POS1; i++) {
+		r = mm_recursion(
+		    &ctx->sfmt[i].si, &ctx->sfmt[i + POS1].si, r1, r2, mask);
+		_mm_store_si128(&ctx->sfmt[i].si, r);
+		r1 = r2;
+		r2 = r;
+	}
+	for (; i < N; i++) {
+		r = mm_recursion(&ctx->sfmt[i].si, &ctx->sfmt[i + POS1 - N].si,
+		    r1, r2, mask);
+		_mm_store_si128(&ctx->sfmt[i].si, r);
+		r1 = r2;
+		r2 = r;
+	}
 }
 
 /**
@@ -111,47 +112,48 @@ static inline void gen_rand_all(sfmt_t *ctx) {
  * @param array an 128-bit array to be filled by pseudorandom numbers.
  * @param size number of 128-bit pesudorandom numbers to be generated.
  */
-static inline void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
-    int i, j;
-    __m128i r, r1, r2, mask;
-    mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1);
+static inline void
+gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
+	int     i, j;
+	__m128i r, r1, r2, mask;
+	mask = _mm_set_epi32(MSK4, MSK3, MSK2, MSK1);
 
-    r1 = _mm_load_si128(&ctx->sfmt[N - 2].si);
-    r2 = _mm_load_si128(&ctx->sfmt[N - 1].si);
-    for (i = 0; i < N - POS1; i++) {
-	r = mm_recursion(&ctx->sfmt[i].si, &ctx->sfmt[i + POS1].si, r1, r2,
-	  mask);
-	_mm_store_si128(&array[i].si, r);
-	r1 = r2;
-	r2 = r;
-    }
-    for (; i < N; i++) {
-	r = mm_recursion(&ctx->sfmt[i].si, &array[i + POS1 - N].si, r1, r2,
-	  mask);
-	_mm_store_si128(&array[i].si, r);
-	r1 = r2;
-	r2 = r;
-    }
-    /* main loop */
-    for (; i < size - N; i++) {
-	r = mm_recursion(&array[i - N].si, &array[i + POS1 - N].si, r1, r2,
-			 mask);
-	_mm_store_si128(&array[i].si, r);
-	r1 = r2;
-	r2 = r;
-    }
-    for (j = 0; j < 2 * N - size; j++) {
-	r = _mm_load_si128(&array[j + size - N].si);
-	_mm_store_si128(&ctx->sfmt[j].si, r);
-    }
-    for (; i < size; i++) {
-	r = mm_recursion(&array[i - N].si, &array[i + POS1 - N].si, r1, r2,
-			 mask);
-	_mm_store_si128(&array[i].si, r);
-	_mm_store_si128(&ctx->sfmt[j++].si, r);
-	r1 = r2;
-	r2 = r;
-    }
+	r1 = _mm_load_si128(&ctx->sfmt[N - 2].si);
+	r2 = _mm_load_si128(&ctx->sfmt[N - 1].si);
+	for (i = 0; i < N - POS1; i++) {
+		r = mm_recursion(
+		    &ctx->sfmt[i].si, &ctx->sfmt[i + POS1].si, r1, r2, mask);
+		_mm_store_si128(&array[i].si, r);
+		r1 = r2;
+		r2 = r;
+	}
+	for (; i < N; i++) {
+		r = mm_recursion(
+		    &ctx->sfmt[i].si, &array[i + POS1 - N].si, r1, r2, mask);
+		_mm_store_si128(&array[i].si, r);
+		r1 = r2;
+		r2 = r;
+	}
+	/* main loop */
+	for (; i < size - N; i++) {
+		r = mm_recursion(
+		    &array[i - N].si, &array[i + POS1 - N].si, r1, r2, mask);
+		_mm_store_si128(&array[i].si, r);
+		r1 = r2;
+		r2 = r;
+	}
+	for (j = 0; j < 2 * N - size; j++) {
+		r = _mm_load_si128(&array[j + size - N].si);
+		_mm_store_si128(&ctx->sfmt[j].si, r);
+	}
+	for (; i < size; i++) {
+		r = mm_recursion(
+		    &array[i - N].si, &array[i + POS1 - N].si, r1, r2, mask);
+		_mm_store_si128(&array[i].si, r);
+		_mm_store_si128(&ctx->sfmt[j++].si, r);
+		r1 = r2;
+		r2 = r;
+	}
 }
 
 #endif
diff --git a/test/include/test/SFMT.h b/test/include/test/SFMT.h
index 338dd45c..0082c026 100644
--- a/test/include/test/SFMT.h
+++ b/test/include/test/SFMT.h
@@ -68,79 +68,89 @@
 
 typedef struct sfmt_s sfmt_t;
 
-uint32_t gen_rand32(sfmt_t *ctx);
-uint32_t gen_rand32_range(sfmt_t *ctx, uint32_t limit);
-uint64_t gen_rand64(sfmt_t *ctx);
-uint64_t gen_rand64_range(sfmt_t *ctx, uint64_t limit);
-void fill_array32(sfmt_t *ctx, uint32_t *array, int size);
-void fill_array64(sfmt_t *ctx, uint64_t *array, int size);
-sfmt_t *init_gen_rand(uint32_t seed);
-sfmt_t *init_by_array(uint32_t *init_key, int key_length);
-void fini_gen_rand(sfmt_t *ctx);
+uint32_t    gen_rand32(sfmt_t *ctx);
+uint32_t    gen_rand32_range(sfmt_t *ctx, uint32_t limit);
+uint64_t    gen_rand64(sfmt_t *ctx);
+uint64_t    gen_rand64_range(sfmt_t *ctx, uint64_t limit);
+void        fill_array32(sfmt_t *ctx, uint32_t *array, int size);
+void        fill_array64(sfmt_t *ctx, uint64_t *array, int size);
+sfmt_t     *init_gen_rand(uint32_t seed);
+sfmt_t     *init_by_array(uint32_t *init_key, int key_length);
+void        fini_gen_rand(sfmt_t *ctx);
 const char *get_idstring(void);
-int get_min_array_size32(void);
-int get_min_array_size64(void);
+int         get_min_array_size32(void);
+int         get_min_array_size64(void);
 
 /* These real versions are due to Isaku Wada */
 /** generates a random number on [0,1]-real-interval */
-static inline double to_real1(uint32_t v) {
-    return v * (1.0/4294967295.0);
-    /* divided by 2^32-1 */
+static inline double
+to_real1(uint32_t v) {
+	return v * (1.0 / 4294967295.0);
+	/* divided by 2^32-1 */
 }
 
 /** generates a random number on [0,1]-real-interval */
-static inline double genrand_real1(sfmt_t *ctx) {
-    return to_real1(gen_rand32(ctx));
+static inline double
+genrand_real1(sfmt_t *ctx) {
+	return to_real1(gen_rand32(ctx));
 }
 
 /** generates a random number on [0,1)-real-interval */
-static inline double to_real2(uint32_t v) {
-    return v * (1.0/4294967296.0);
-    /* divided by 2^32 */
+static inline double
+to_real2(uint32_t v) {
+	return v * (1.0 / 4294967296.0);
+	/* divided by 2^32 */
 }
 
 /** generates a random number on [0,1)-real-interval */
-static inline double genrand_real2(sfmt_t *ctx) {
-    return to_real2(gen_rand32(ctx));
+static inline double
+genrand_real2(sfmt_t *ctx) {
+	return to_real2(gen_rand32(ctx));
 }
 
 /** generates a random number on (0,1)-real-interval */
-static inline double to_real3(uint32_t v) {
-    return (((double)v) + 0.5)*(1.0/4294967296.0);
-    /* divided by 2^32 */
+static inline double
+to_real3(uint32_t v) {
+	return (((double)v) + 0.5) * (1.0 / 4294967296.0);
+	/* divided by 2^32 */
 }
 
 /** generates a random number on (0,1)-real-interval */
-static inline double genrand_real3(sfmt_t *ctx) {
-    return to_real3(gen_rand32(ctx));
+static inline double
+genrand_real3(sfmt_t *ctx) {
+	return to_real3(gen_rand32(ctx));
 }
 /** These real versions are due to Isaku Wada */
 
 /** generates a random number on [0,1) with 53-bit resolution*/
-static inline double to_res53(uint64_t v) {
-    return v * (1.0/18446744073709551616.0L);
+static inline double
+to_res53(uint64_t v) {
+	return v * (1.0 / 18446744073709551616.0L);
 }
 
 /** generates a random number on [0,1) with 53-bit resolution from two
  * 32 bit integers */
-static inline double to_res53_mix(uint32_t x, uint32_t y) {
-    return to_res53(x | ((uint64_t)y << 32));
+static inline double
+to_res53_mix(uint32_t x, uint32_t y) {
+	return to_res53(x | ((uint64_t)y << 32));
 }
 
 /** generates a random number on [0,1) with 53-bit resolution
  */
-static inline double genrand_res53(sfmt_t *ctx) {
-    return to_res53(gen_rand64(ctx));
+static inline double
+genrand_res53(sfmt_t *ctx) {
+	return to_res53(gen_rand64(ctx));
 }
 
 /** generates a random number on [0,1) with 53-bit resolution
     using 32bit integer.
  */
-static inline double genrand_res53_mix(sfmt_t *ctx) {
-    uint32_t x, y;
+static inline double
+genrand_res53_mix(sfmt_t *ctx) {
+	uint32_t x, y;
 
-    x = gen_rand32(ctx);
-    y = gen_rand32(ctx);
-    return to_res53_mix(x, y);
+	x = gen_rand32(ctx);
+	y = gen_rand32(ctx);
+	return to_res53_mix(x, y);
 }
 #endif
diff --git a/test/include/test/arena_util.h b/test/include/test/arena_util.h
index 535c1aa1..431fdfae 100644
--- a/test/include/test/arena_util.h
+++ b/test/include/test/arena_util.h
@@ -1,25 +1,25 @@
 static inline unsigned
 do_arena_create(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 	unsigned arena_ind;
-	size_t sz = sizeof(unsigned);
+	size_t   sz = sizeof(unsigned);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
 	size_t mib[3];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
+	size_t miblen = sizeof(mib) / sizeof(size_t);
 
-	expect_d_eq(mallctlnametomib("arena.0.dirty_decay_ms", mib, &miblen),
-	    0, "Unexpected mallctlnametomib() failure");
+	expect_d_eq(mallctlnametomib("arena.0.dirty_decay_ms", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
 	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL,
-	    (void *)&dirty_decay_ms, sizeof(dirty_decay_ms)), 0,
-	    "Unexpected mallctlbymib() failure");
+	                (void *)&dirty_decay_ms, sizeof(dirty_decay_ms)),
+	    0, "Unexpected mallctlbymib() failure");
 
-	expect_d_eq(mallctlnametomib("arena.0.muzzy_decay_ms", mib, &miblen),
-	    0, "Unexpected mallctlnametomib() failure");
+	expect_d_eq(mallctlnametomib("arena.0.muzzy_decay_ms", mib, &miblen), 0,
+	    "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
 	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL,
-	    (void *)&muzzy_decay_ms, sizeof(muzzy_decay_ms)), 0,
-	    "Unexpected mallctlbymib() failure");
+	                (void *)&muzzy_decay_ms, sizeof(muzzy_decay_ms)),
+	    0, "Unexpected mallctlbymib() failure");
 
 	return arena_ind;
 }
@@ -33,7 +33,7 @@ do_arena_destroy(unsigned arena_ind) {
 	mallctl("thread.tcache.flush", NULL, NULL, NULL, 0);
 
 	size_t mib[3];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
+	size_t miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("arena.0.destroy", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
@@ -51,7 +51,7 @@ do_epoch(void) {
 static inline void
 do_purge(unsigned arena_ind) {
 	size_t mib[3];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
+	size_t miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("arena.0.purge", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
@@ -62,7 +62,7 @@ do_purge(unsigned arena_ind) {
 static inline void
 do_decay(unsigned arena_ind) {
 	size_t mib[3];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
+	size_t miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("arena.0.decay", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
@@ -73,12 +73,12 @@ do_decay(unsigned arena_ind) {
 static inline uint64_t
 get_arena_npurge_impl(const char *mibname, unsigned arena_ind) {
 	size_t mib[4];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
+	size_t miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib(mibname, mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[2] = (size_t)arena_ind;
 	uint64_t npurge = 0;
-	size_t sz = sizeof(npurge);
+	size_t   sz = sizeof(npurge);
 	expect_d_eq(mallctlbymib(mib, miblen, (void *)&npurge, &sz, NULL, 0),
 	    config_stats ? 0 : ENOENT, "Unexpected mallctlbymib() failure");
 	return npurge;
@@ -105,15 +105,15 @@ get_arena_muzzy_npurge(unsigned arena_ind) {
 static inline uint64_t
 get_arena_npurge(unsigned arena_ind) {
 	do_epoch();
-	return get_arena_npurge_impl("stats.arenas.0.dirty_npurge", arena_ind) +
-	    get_arena_npurge_impl("stats.arenas.0.muzzy_npurge", arena_ind);
+	return get_arena_npurge_impl("stats.arenas.0.dirty_npurge", arena_ind)
+	    + get_arena_npurge_impl("stats.arenas.0.muzzy_npurge", arena_ind);
 }
 
 static inline size_t
 get_arena_pdirty(unsigned arena_ind) {
 	do_epoch();
 	size_t mib[4];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
+	size_t miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("stats.arenas.0.pdirty", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[2] = (size_t)arena_ind;
@@ -128,7 +128,7 @@ static inline size_t
 get_arena_pmuzzy(unsigned arena_ind) {
 	do_epoch();
 	size_t mib[4];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
+	size_t miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("stats.arenas.0.pmuzzy", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[2] = (size_t)arena_ind;
@@ -148,8 +148,7 @@ do_mallocx(size_t size, int flags) {
 
 static inline void
 generate_dirty(unsigned arena_ind, size_t size) {
-	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+	int   flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
 	void *p = do_mallocx(size, flags);
 	dallocx(p, flags);
 }
-
diff --git a/test/include/test/bench.h b/test/include/test/bench.h
index e2a9fc09..faebfd77 100644
--- a/test/include/test/bench.h
+++ b/test/include/test/bench.h
@@ -1,6 +1,6 @@
 static inline void
-time_func(timedelta_t *timer, uint64_t nwarmup, uint64_t niter,
-    void (*func)(void)) {
+time_func(
+    timedelta_t *timer, uint64_t nwarmup, uint64_t niter, void (*func)(void)) {
 	uint64_t i;
 
 	for (i = 0; i < nwarmup; i++) {
@@ -23,16 +23,16 @@ fmt_nsecs(uint64_t usec, uint64_t iters, char *buf) {
 	uint64_t nsecs_per_iter1000 = nsec1000 / iters;
 	uint64_t intpart = nsecs_per_iter1000 / 1000;
 	uint64_t fracpart = nsecs_per_iter1000 % 1000;
-	malloc_snprintf(buf, FMT_NSECS_BUF_SIZE, "%" FMTu64 ".%03" FMTu64, intpart,
-	    fracpart);
+	malloc_snprintf(buf, FMT_NSECS_BUF_SIZE, "%" FMTu64 ".%03" FMTu64,
+	    intpart, fracpart);
 }
 
 static inline void
 compare_funcs(uint64_t nwarmup, uint64_t niter, const char *name_a,
-    void (*func_a), const char *name_b, void (*func_b)) {
+    void(*func_a), const char *name_b, void(*func_b)) {
 	timedelta_t timer_a, timer_b;
-	char ratio_buf[6];
-	void *p;
+	char        ratio_buf[6];
+	void       *p;
 
 	p = mallocx(1, 0);
 	if (p == NULL) {
@@ -44,16 +44,18 @@ compare_funcs(uint64_t nwarmup, uint64_t niter, const char *name_a,
 	time_func(&timer_b, nwarmup, niter, (void (*)(void))func_b);
 
 	uint64_t usec_a = timer_usec(&timer_a);
-	char buf_a[FMT_NSECS_BUF_SIZE];
+	char     buf_a[FMT_NSECS_BUF_SIZE];
 	fmt_nsecs(usec_a, niter, buf_a);
 
 	uint64_t usec_b = timer_usec(&timer_b);
-	char buf_b[FMT_NSECS_BUF_SIZE];
+	char     buf_b[FMT_NSECS_BUF_SIZE];
 	fmt_nsecs(usec_b, niter, buf_b);
 
 	timer_ratio(&timer_a, &timer_b, ratio_buf, sizeof(ratio_buf));
-	malloc_printf("%" FMTu64 " iterations, %s=%" FMTu64 "us (%s ns/iter), "
-	    "%s=%" FMTu64 "us (%s ns/iter), time consumption ratio=%s:1\n",
+	malloc_printf("%" FMTu64 " iterations, %s=%" FMTu64
+	              "us (%s ns/iter), "
+	              "%s=%" FMTu64
+	              "us (%s ns/iter), time consumption ratio=%s:1\n",
 	    niter, name_a, usec_a, buf_a, name_b, usec_b, buf_b, ratio_buf);
 
 	dallocx(p, 0);
@@ -62,10 +64,10 @@ compare_funcs(uint64_t nwarmup, uint64_t niter, const char *name_a,
 static inline void *
 no_opt_ptr(void *ptr) {
 #ifdef JEMALLOC_HAVE_ASM_VOLATILE
-  asm volatile("" : "+r"(ptr));
+	asm volatile("" : "+r"(ptr));
 #else
-  void *volatile dup = ptr;
-  ptr = dup;
+	void *volatile dup = ptr;
+	ptr = dup;
 #endif
-  return ptr;
+	return ptr;
 }
diff --git a/test/include/test/bgthd.h b/test/include/test/bgthd.h
index 4fa2395e..0a7e789b 100644
--- a/test/include/test/bgthd.h
+++ b/test/include/test/bgthd.h
@@ -5,9 +5,9 @@
 
 static inline bool
 is_background_thread_enabled(void) {
-	bool enabled;
+	bool   enabled;
 	size_t sz = sizeof(bool);
-	int ret = mallctl("background_thread", (void *)&enabled, &sz, NULL,0);
+	int ret = mallctl("background_thread", (void *)&enabled, &sz, NULL, 0);
 	if (ret == ENOENT) {
 		return false;
 	}
diff --git a/test/include/test/btalloc.h b/test/include/test/btalloc.h
index 8f345993..04a336d5 100644
--- a/test/include/test/btalloc.h
+++ b/test/include/test/btalloc.h
@@ -1,30 +1,28 @@
 /* btalloc() provides a mechanism for allocating via permuted backtraces. */
-void	*btalloc(size_t size, unsigned bits);
+void *btalloc(size_t size, unsigned bits);
 
-#define btalloc_n_proto(n)						\
-void	*btalloc_##n(size_t size, unsigned bits);
-btalloc_n_proto(0)
-btalloc_n_proto(1)
+#define btalloc_n_proto(n) void *btalloc_##n(size_t size, unsigned bits);
+btalloc_n_proto(0) btalloc_n_proto(1)
 
-#define btalloc_n_gen(n)						\
-void *									\
-btalloc_##n(size_t size, unsigned bits) {				\
-	void *p;							\
-									\
-	if (bits == 0) {						\
-		p = mallocx(size, 0);					\
-	} else {							\
-		switch (bits & 0x1U) {					\
-		case 0:							\
-			p = (btalloc_0(size, bits >> 1));		\
-			break;						\
-		case 1:							\
-			p = (btalloc_1(size, bits >> 1));		\
-			break;						\
-		default: not_reached();					\
-		}							\
-	}								\
-	/* Intentionally sabotage tail call optimization. */		\
-	expect_ptr_not_null(p, "Unexpected mallocx() failure");		\
-	return p;							\
-}
+#define btalloc_n_gen(n)                                                       \
+	void *btalloc_##n(size_t size, unsigned bits) {                        \
+		void *p;                                                       \
+                                                                               \
+		if (bits == 0) {                                               \
+			p = mallocx(size, 0);                                  \
+		} else {                                                       \
+			switch (bits & 0x1U) {                                 \
+			case 0:                                                \
+				p = (btalloc_0(size, bits >> 1));              \
+				break;                                         \
+			case 1:                                                \
+				p = (btalloc_1(size, bits >> 1));              \
+				break;                                         \
+			default:                                               \
+				not_reached();                                 \
+			}                                                      \
+		}                                                              \
+		/* Intentionally sabotage tail call optimization. */           \
+		expect_ptr_not_null(p, "Unexpected mallocx() failure");        \
+		return p;                                                      \
+	}
diff --git a/test/include/test/extent_hooks.h b/test/include/test/extent_hooks.h
index aad0a46c..33bb8593 100644
--- a/test/include/test/extent_hooks.h
+++ b/test/include/test/extent_hooks.h
@@ -3,40 +3,33 @@
  * passthrough.
  */
 
-static void	*extent_alloc_hook(extent_hooks_t *extent_hooks, void *new_addr,
+static void *extent_alloc_hook(extent_hooks_t *extent_hooks, void *new_addr,
     size_t size, size_t alignment, bool *zero, bool *commit,
     unsigned arena_ind);
-static bool	extent_dalloc_hook(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, bool committed, unsigned arena_ind);
-static void	extent_destroy_hook(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, bool committed, unsigned arena_ind);
-static bool	extent_commit_hook(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, size_t offset, size_t length, unsigned arena_ind);
-static bool	extent_decommit_hook(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, size_t offset, size_t length, unsigned arena_ind);
-static bool	extent_purge_lazy_hook(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, size_t offset, size_t length, unsigned arena_ind);
-static bool	extent_purge_forced_hook(extent_hooks_t *extent_hooks,
-    void *addr, size_t size, size_t offset, size_t length, unsigned arena_ind);
-static bool	extent_split_hook(extent_hooks_t *extent_hooks, void *addr,
-    size_t size, size_t size_a, size_t size_b, bool committed,
-    unsigned arena_ind);
-static bool	extent_merge_hook(extent_hooks_t *extent_hooks, void *addr_a,
-    size_t size_a, void *addr_b, size_t size_b, bool committed,
-    unsigned arena_ind);
+static bool  extent_dalloc_hook(extent_hooks_t *extent_hooks, void *addr,
+     size_t size, bool committed, unsigned arena_ind);
+static void  extent_destroy_hook(extent_hooks_t *extent_hooks, void *addr,
+     size_t size, bool committed, unsigned arena_ind);
+static bool  extent_commit_hook(extent_hooks_t *extent_hooks, void *addr,
+     size_t size, size_t offset, size_t length, unsigned arena_ind);
+static bool  extent_decommit_hook(extent_hooks_t *extent_hooks, void *addr,
+     size_t size, size_t offset, size_t length, unsigned arena_ind);
+static bool  extent_purge_lazy_hook(extent_hooks_t *extent_hooks, void *addr,
+     size_t size, size_t offset, size_t length, unsigned arena_ind);
+static bool  extent_purge_forced_hook(extent_hooks_t *extent_hooks, void *addr,
+     size_t size, size_t offset, size_t length, unsigned arena_ind);
+static bool  extent_split_hook(extent_hooks_t *extent_hooks, void *addr,
+     size_t size, size_t size_a, size_t size_b, bool committed,
+     unsigned arena_ind);
+static bool  extent_merge_hook(extent_hooks_t *extent_hooks, void *addr_a,
+     size_t size_a, void *addr_b, size_t size_b, bool committed,
+     unsigned arena_ind);
 
 static extent_hooks_t *default_hooks;
-static extent_hooks_t hooks = {
-	extent_alloc_hook,
-	extent_dalloc_hook,
-	extent_destroy_hook,
-	extent_commit_hook,
-	extent_decommit_hook,
-	extent_purge_lazy_hook,
-	extent_purge_forced_hook,
-	extent_split_hook,
-	extent_merge_hook
-};
+static extent_hooks_t  hooks = {extent_alloc_hook, extent_dalloc_hook,
+     extent_destroy_hook, extent_commit_hook, extent_decommit_hook,
+     extent_purge_lazy_hook, extent_purge_forced_hook, extent_split_hook,
+     extent_merge_hook};
 
 /* Control whether hook functions pass calls through to default hooks. */
 static bool try_alloc = true;
@@ -72,9 +65,9 @@ static bool did_split;
 static bool did_merge;
 
 #if 0
-#  define TRACE_HOOK(fmt, ...) malloc_printf(fmt, __VA_ARGS__)
+#	define TRACE_HOOK(fmt, ...) malloc_printf(fmt, __VA_ARGS__)
 #else
-#  define TRACE_HOOK(fmt, ...)
+#	define TRACE_HOOK(fmt, ...)
 #endif
 
 static void *
@@ -82,20 +75,21 @@ extent_alloc_hook(extent_hooks_t *extent_hooks, void *new_addr, size_t size,
     size_t alignment, bool *zero, bool *commit, unsigned arena_ind) {
 	void *ret;
 
-	TRACE_HOOK("%s(extent_hooks=%p, new_addr=%p, size=%zu, alignment=%zu, "
-	    "*zero=%s, *commit=%s, arena_ind=%u)\n", __func__, extent_hooks,
-	    new_addr, size, alignment, *zero ?  "true" : "false", *commit ?
-	    "true" : "false", arena_ind);
+	TRACE_HOOK(
+	    "%s(extent_hooks=%p, new_addr=%p, size=%zu, alignment=%zu, "
+	    "*zero=%s, *commit=%s, arena_ind=%u)\n",
+	    __func__, extent_hooks, new_addr, size, alignment,
+	    *zero ? "true" : "false", *commit ? "true" : "false", arena_ind);
 	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	expect_ptr_eq(extent_hooks->alloc, extent_alloc_hook,
-	    "Wrong hook function");
+	expect_ptr_eq(
+	    extent_hooks->alloc, extent_alloc_hook, "Wrong hook function");
 	called_alloc = true;
 	if (!try_alloc) {
 		return NULL;
 	}
-	ret = default_hooks->alloc(default_hooks, new_addr, size, alignment,
-	    zero, commit, 0);
+	ret = default_hooks->alloc(
+	    default_hooks, new_addr, size, alignment, zero, commit, 0);
 	did_alloc = (ret != NULL);
 	return ret;
 }
@@ -105,13 +99,15 @@ extent_dalloc_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
     bool committed, unsigned arena_ind) {
 	bool err;
 
-	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, committed=%s, "
-	    "arena_ind=%u)\n", __func__, extent_hooks, addr, size, committed ?
-	    "true" : "false", arena_ind);
+	TRACE_HOOK(
+	    "%s(extent_hooks=%p, addr=%p, size=%zu, committed=%s, "
+	    "arena_ind=%u)\n",
+	    __func__, extent_hooks, addr, size, committed ? "true" : "false",
+	    arena_ind);
 	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	expect_ptr_eq(extent_hooks->dalloc, extent_dalloc_hook,
-	    "Wrong hook function");
+	expect_ptr_eq(
+	    extent_hooks->dalloc, extent_dalloc_hook, "Wrong hook function");
 	called_dalloc = true;
 	if (!try_dalloc) {
 		return true;
@@ -124,13 +120,15 @@ extent_dalloc_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 static void
 extent_destroy_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
     bool committed, unsigned arena_ind) {
-	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, committed=%s, "
-	    "arena_ind=%u)\n", __func__, extent_hooks, addr, size, committed ?
-	    "true" : "false", arena_ind);
+	TRACE_HOOK(
+	    "%s(extent_hooks=%p, addr=%p, size=%zu, committed=%s, "
+	    "arena_ind=%u)\n",
+	    __func__, extent_hooks, addr, size, committed ? "true" : "false",
+	    arena_ind);
 	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	expect_ptr_eq(extent_hooks->destroy, extent_destroy_hook,
-	    "Wrong hook function");
+	expect_ptr_eq(
+	    extent_hooks->destroy, extent_destroy_hook, "Wrong hook function");
 	called_destroy = true;
 	if (!try_destroy) {
 		return;
@@ -144,19 +142,20 @@ extent_commit_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
 	bool err;
 
-	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
-	    "length=%zu, arena_ind=%u)\n", __func__, extent_hooks, addr, size,
-	    offset, length, arena_ind);
+	TRACE_HOOK(
+	    "%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
+	    "length=%zu, arena_ind=%u)\n",
+	    __func__, extent_hooks, addr, size, offset, length, arena_ind);
 	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	expect_ptr_eq(extent_hooks->commit, extent_commit_hook,
-	    "Wrong hook function");
+	expect_ptr_eq(
+	    extent_hooks->commit, extent_commit_hook, "Wrong hook function");
 	called_commit = true;
 	if (!try_commit) {
 		return true;
 	}
-	err = default_hooks->commit(default_hooks, addr, size, offset, length,
-	    0);
+	err = default_hooks->commit(
+	    default_hooks, addr, size, offset, length, 0);
 	did_commit = !err;
 	return err;
 }
@@ -166,9 +165,10 @@ extent_decommit_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
 	bool err;
 
-	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
-	    "length=%zu, arena_ind=%u)\n", __func__, extent_hooks, addr, size,
-	    offset, length, arena_ind);
+	TRACE_HOOK(
+	    "%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
+	    "length=%zu, arena_ind=%u)\n",
+	    __func__, extent_hooks, addr, size, offset, length, arena_ind);
 	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
 	expect_ptr_eq(extent_hooks->decommit, extent_decommit_hook,
@@ -177,8 +177,8 @@ extent_decommit_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	if (!try_decommit) {
 		return true;
 	}
-	err = default_hooks->decommit(default_hooks, addr, size, offset, length,
-	    0);
+	err = default_hooks->decommit(
+	    default_hooks, addr, size, offset, length, 0);
 	did_decommit = !err;
 	return err;
 }
@@ -188,9 +188,10 @@ extent_purge_lazy_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
 	bool err;
 
-	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
-	    "length=%zu arena_ind=%u)\n", __func__, extent_hooks, addr, size,
-	    offset, length, arena_ind);
+	TRACE_HOOK(
+	    "%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
+	    "length=%zu arena_ind=%u)\n",
+	    __func__, extent_hooks, addr, size, offset, length, arena_ind);
 	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
 	expect_ptr_eq(extent_hooks->purge_lazy, extent_purge_lazy_hook,
@@ -199,9 +200,9 @@ extent_purge_lazy_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	if (!try_purge_lazy) {
 		return true;
 	}
-	err = default_hooks->purge_lazy == NULL ||
-	    default_hooks->purge_lazy(default_hooks, addr, size, offset, length,
-	    0);
+	err = default_hooks->purge_lazy == NULL
+	    || default_hooks->purge_lazy(
+	        default_hooks, addr, size, offset, length, 0);
 	did_purge_lazy = !err;
 	return err;
 }
@@ -211,9 +212,10 @@ extent_purge_forced_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t offset, size_t length, unsigned arena_ind) {
 	bool err;
 
-	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
-	    "length=%zu arena_ind=%u)\n", __func__, extent_hooks, addr, size,
-	    offset, length, arena_ind);
+	TRACE_HOOK(
+	    "%s(extent_hooks=%p, addr=%p, size=%zu, offset=%zu, "
+	    "length=%zu arena_ind=%u)\n",
+	    __func__, extent_hooks, addr, size, offset, length, arena_ind);
 	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
 	expect_ptr_eq(extent_hooks->purge_forced, extent_purge_forced_hook,
@@ -222,9 +224,9 @@ extent_purge_forced_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
 	if (!try_purge_forced) {
 		return true;
 	}
-	err = default_hooks->purge_forced == NULL ||
-	    default_hooks->purge_forced(default_hooks, addr, size, offset,
-	    length, 0);
+	err = default_hooks->purge_forced == NULL
+	    || default_hooks->purge_forced(
+	        default_hooks, addr, size, offset, length, 0);
 	did_purge_forced = !err;
 	return err;
 }
@@ -234,21 +236,22 @@ extent_split_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
     size_t size_a, size_t size_b, bool committed, unsigned arena_ind) {
 	bool err;
 
-	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, size_a=%zu, "
-	    "size_b=%zu, committed=%s, arena_ind=%u)\n", __func__, extent_hooks,
-	    addr, size, size_a, size_b, committed ? "true" : "false",
-	    arena_ind);
+	TRACE_HOOK(
+	    "%s(extent_hooks=%p, addr=%p, size=%zu, size_a=%zu, "
+	    "size_b=%zu, committed=%s, arena_ind=%u)\n",
+	    __func__, extent_hooks, addr, size, size_a, size_b,
+	    committed ? "true" : "false", arena_ind);
 	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	expect_ptr_eq(extent_hooks->split, extent_split_hook,
-	    "Wrong hook function");
+	expect_ptr_eq(
+	    extent_hooks->split, extent_split_hook, "Wrong hook function");
 	called_split = true;
 	if (!try_split) {
 		return true;
 	}
-	err = (default_hooks->split == NULL ||
-	    default_hooks->split(default_hooks, addr, size, size_a, size_b,
-	    committed, 0));
+	err = (default_hooks->split == NULL
+	    || default_hooks->split(
+	        default_hooks, addr, size, size_a, size_b, committed, 0));
 	did_split = !err;
 	return err;
 }
@@ -258,23 +261,24 @@ extent_merge_hook(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
     void *addr_b, size_t size_b, bool committed, unsigned arena_ind) {
 	bool err;
 
-	TRACE_HOOK("%s(extent_hooks=%p, addr_a=%p, size_a=%zu, addr_b=%p "
-	    "size_b=%zu, committed=%s, arena_ind=%u)\n", __func__, extent_hooks,
-	    addr_a, size_a, addr_b, size_b, committed ? "true" : "false",
-	    arena_ind);
+	TRACE_HOOK(
+	    "%s(extent_hooks=%p, addr_a=%p, size_a=%zu, addr_b=%p "
+	    "size_b=%zu, committed=%s, arena_ind=%u)\n",
+	    __func__, extent_hooks, addr_a, size_a, addr_b, size_b,
+	    committed ? "true" : "false", arena_ind);
 	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	expect_ptr_eq(extent_hooks->merge, extent_merge_hook,
-	    "Wrong hook function");
+	expect_ptr_eq(
+	    extent_hooks->merge, extent_merge_hook, "Wrong hook function");
 	expect_ptr_eq((void *)((uintptr_t)addr_a + size_a), addr_b,
 	    "Extents not mergeable");
 	called_merge = true;
 	if (!try_merge) {
 		return true;
 	}
-	err = (default_hooks->merge == NULL ||
-	    default_hooks->merge(default_hooks, addr_a, size_a, addr_b, size_b,
-	    committed, 0));
+	err = (default_hooks->merge == NULL
+	    || default_hooks->merge(
+	        default_hooks, addr_a, size_a, addr_b, size_b, committed, 0));
 	did_merge = !err;
 	return err;
 }
@@ -285,5 +289,6 @@ extent_hooks_prep(void) {
 
 	sz = sizeof(default_hooks);
 	expect_d_eq(mallctl("arena.0.extent_hooks", (void *)&default_hooks, &sz,
-	    NULL, 0), 0, "Unexpected mallctl() error");
+	                NULL, 0),
+	    0, "Unexpected mallctl() error");
 }
diff --git a/test/include/test/fork.h b/test/include/test/fork.h
index ac9b1858..9e04d279 100644
--- a/test/include/test/fork.h
+++ b/test/include/test/fork.h
@@ -3,7 +3,7 @@
 
 #ifndef _WIN32
 
-#include <sys/wait.h>
+#	include <sys/wait.h>
 
 static inline void
 fork_wait_for_child_exit(int pid) {
@@ -13,8 +13,10 @@ fork_wait_for_child_exit(int pid) {
 			test_fail("Unexpected waitpid() failure.");
 		}
 		if (WIFSIGNALED(status)) {
-			test_fail("Unexpected child termination due to "
-			    "signal %d", WTERMSIG(status));
+			test_fail(
+			    "Unexpected child termination due to "
+			    "signal %d",
+			    WTERMSIG(status));
 			break;
 		}
 		if (WIFEXITED(status)) {
diff --git a/test/include/test/math.h b/test/include/test/math.h
index efba086d..c9b32e91 100644
--- a/test/include/test/math.h
+++ b/test/include/test/math.h
@@ -27,9 +27,12 @@ ln_gamma(double x) {
 
 	z = 1.0 / (x * x);
 
-	return f + (x-0.5) * log(x) - x + 0.918938533204673 +
-	    (((-0.000595238095238 * z + 0.000793650793651) * z -
-	    0.002777777777778) * z + 0.083333333333333) / x;
+	return f + (x - 0.5) * log(x) - x + 0.918938533204673
+	    + (((-0.000595238095238 * z + 0.000793650793651) * z
+	           - 0.002777777777778)
+	              * z
+	          + 0.083333333333333)
+	    / x;
 }
 
 /*
@@ -43,8 +46,8 @@ ln_gamma(double x) {
  */
 static inline double
 i_gamma(double x, double p, double ln_gamma_p) {
-	double acu, factor, oflo, gin, term, rn, a, b, an, dif;
-	double pn[6];
+	double   acu, factor, oflo, gin, term, rn, a, b, an, dif;
+	double   pn[6];
 	unsigned i;
 
 	assert(p > 0.0);
@@ -91,7 +94,7 @@ i_gamma(double x, double p, double ln_gamma_p) {
 			term += 1.0;
 			an = a * term;
 			for (i = 0; i < 2; i++) {
-				pn[i+4] = b * pn[i+2] - an * pn[i];
+				pn[i + 4] = b * pn[i + 2] - an * pn[i];
 			}
 			if (pn[5] != 0.0) {
 				rn = pn[4] / pn[5];
@@ -103,7 +106,7 @@ i_gamma(double x, double p, double ln_gamma_p) {
 				gin = rn;
 			}
 			for (i = 0; i < 4; i++) {
-				pn[i] = pn[i+2];
+				pn[i] = pn[i + 2];
 			}
 
 			if (fabs(pn[4]) >= oflo) {
@@ -135,16 +138,35 @@ pt_norm(double p) {
 	if (fabs(q) <= 0.425) {
 		/* p close to 1/2. */
 		r = 0.180625 - q * q;
-		return q * (((((((2.5090809287301226727e3 * r +
-		    3.3430575583588128105e4) * r + 6.7265770927008700853e4) * r
-		    + 4.5921953931549871457e4) * r + 1.3731693765509461125e4) *
-		    r + 1.9715909503065514427e3) * r + 1.3314166789178437745e2)
-		    * r + 3.3871328727963666080e0) /
-		    (((((((5.2264952788528545610e3 * r +
-		    2.8729085735721942674e4) * r + 3.9307895800092710610e4) * r
-		    + 2.1213794301586595867e4) * r + 5.3941960214247511077e3) *
-		    r + 6.8718700749205790830e2) * r + 4.2313330701600911252e1)
-		    * r + 1.0);
+		return q
+		    * (((((((2.5090809287301226727e3 * r
+		                + 3.3430575583588128105e4)
+		                   * r
+		               + 6.7265770927008700853e4)
+		                  * r
+		              + 4.5921953931549871457e4)
+		                 * r
+		             + 1.3731693765509461125e4)
+		                * r
+		            + 1.9715909503065514427e3)
+		               * r
+		           + 1.3314166789178437745e2)
+		            * r
+		        + 3.3871328727963666080e0)
+		    / (((((((5.2264952788528545610e3 * r
+		                + 2.8729085735721942674e4)
+		                   * r
+		               + 3.9307895800092710610e4)
+		                  * r
+		              + 2.1213794301586595867e4)
+		                 * r
+		             + 5.3941960214247511077e3)
+		                * r
+		            + 6.8718700749205790830e2)
+		               * r
+		           + 4.2313330701600911252e1)
+		            * r
+		        + 1.0);
 	} else {
 		if (q < 0.0) {
 			r = p;
@@ -157,40 +179,65 @@ pt_norm(double p) {
 		if (r <= 5.0) {
 			/* p neither close to 1/2 nor 0 or 1. */
 			r -= 1.6;
-			ret = ((((((((7.74545014278341407640e-4 * r +
-			    2.27238449892691845833e-2) * r +
-			    2.41780725177450611770e-1) * r +
-			    1.27045825245236838258e0) * r +
-			    3.64784832476320460504e0) * r +
-			    5.76949722146069140550e0) * r +
-			    4.63033784615654529590e0) * r +
-			    1.42343711074968357734e0) /
-			    (((((((1.05075007164441684324e-9 * r +
-			    5.47593808499534494600e-4) * r +
-			    1.51986665636164571966e-2)
-			    * r + 1.48103976427480074590e-1) * r +
-			    6.89767334985100004550e-1) * r +
-			    1.67638483018380384940e0) * r +
-			    2.05319162663775882187e0) * r + 1.0));
+			ret = ((((((((7.74545014278341407640e-4 * r
+			                 + 2.27238449892691845833e-2)
+			                    * r
+			                + 2.41780725177450611770e-1)
+			                   * r
+			               + 1.27045825245236838258e0)
+			                  * r
+			              + 3.64784832476320460504e0)
+			                 * r
+			             + 5.76949722146069140550e0)
+			                * r
+			            + 4.63033784615654529590e0)
+			               * r
+			           + 1.42343711074968357734e0)
+			    / (((((((1.05075007164441684324e-9 * r
+			                + 5.47593808499534494600e-4)
+			                   * r
+			               + 1.51986665636164571966e-2)
+			                  * r
+			              + 1.48103976427480074590e-1)
+			                 * r
+			             + 6.89767334985100004550e-1)
+			                * r
+			            + 1.67638483018380384940e0)
+			               * r
+			           + 2.05319162663775882187e0)
+			            * r
+			        + 1.0));
 		} else {
 			/* p near 0 or 1. */
 			r -= 5.0;
-			ret = ((((((((2.01033439929228813265e-7 * r +
-			    2.71155556874348757815e-5) * r +
-			    1.24266094738807843860e-3) * r +
-			    2.65321895265761230930e-2) * r +
-			    2.96560571828504891230e-1) * r +
-			    1.78482653991729133580e0) * r +
-			    5.46378491116411436990e0) * r +
-			    6.65790464350110377720e0) /
-			    (((((((2.04426310338993978564e-15 * r +
-			    1.42151175831644588870e-7) * r +
-			    1.84631831751005468180e-5) * r +
-			    7.86869131145613259100e-4) * r +
-			    1.48753612908506148525e-2) * r +
-			    1.36929880922735805310e-1) * r +
-			    5.99832206555887937690e-1)
-			    * r + 1.0));
+			ret = ((((((((2.01033439929228813265e-7 * r
+			                 + 2.71155556874348757815e-5)
+			                    * r
+			                + 1.24266094738807843860e-3)
+			                   * r
+			               + 2.65321895265761230930e-2)
+			                  * r
+			              + 2.96560571828504891230e-1)
+			                 * r
+			             + 1.78482653991729133580e0)
+			                * r
+			            + 5.46378491116411436990e0)
+			               * r
+			           + 6.65790464350110377720e0)
+			    / (((((((2.04426310338993978564e-15 * r
+			                + 1.42151175831644588870e-7)
+			                   * r
+			               + 1.84631831751005468180e-5)
+			                  * r
+			              + 7.86869131145613259100e-4)
+			                 * r
+			             + 1.48753612908506148525e-2)
+			                * r
+			            + 1.36929880922735805310e-1)
+			               * r
+			           + 5.99832206555887937690e-1)
+			            * r
+			        + 1.0));
 		}
 		if (q < 0.0) {
 			ret = -ret;
@@ -244,8 +291,9 @@ pt_chi2(double p, double df, double ln_gamma_df_2) {
 			ch = df * pow(x * sqrt(p1) + 1.0 - p1, 3.0);
 			/* Starting approximation for p tending to 1. */
 			if (ch > 2.2 * df + 6.0) {
-				ch = -2.0 * (log(1.0 - p) - c * log(0.5 * ch) +
-				    ln_gamma_df_2);
+				ch = -2.0
+				    * (log(1.0 - p) - c * log(0.5 * ch)
+				        + ln_gamma_df_2);
 			}
 		} else {
 			ch = 0.4;
@@ -254,10 +302,13 @@ pt_chi2(double p, double df, double ln_gamma_df_2) {
 				q = ch;
 				p1 = 1.0 + ch * (4.67 + ch);
 				p2 = ch * (6.73 + ch * (6.66 + ch));
-				t = -0.5 + (4.67 + 2.0 * ch) / p1 - (6.73 + ch
-				    * (13.32 + 3.0 * ch)) / p2;
-				ch -= (1.0 - exp(a + ln_gamma_df_2 + 0.5 * ch +
-				    c * aa) * p2 / p1) / t;
+				t = -0.5 + (4.67 + 2.0 * ch) / p1
+				    - (6.73 + ch * (13.32 + 3.0 * ch)) / p2;
+				ch -= (1.0
+				          - exp(a + ln_gamma_df_2 + 0.5 * ch
+				                + c * aa)
+				              * p2 / p1)
+				    / t;
 				if (fabs(q / ch - 1.0) - 0.01 <= 0.0) {
 					break;
 				}
@@ -276,17 +327,36 @@ pt_chi2(double p, double df, double ln_gamma_df_2) {
 		t = p2 * exp(xx * aa + ln_gamma_df_2 + p1 - c * log(ch));
 		b = t / ch;
 		a = 0.5 * t - b * c;
-		s1 = (210.0 + a * (140.0 + a * (105.0 + a * (84.0 + a * (70.0 +
-		    60.0 * a))))) / 420.0;
-		s2 = (420.0 + a * (735.0 + a * (966.0 + a * (1141.0 + 1278.0 *
-		    a)))) / 2520.0;
+		s1 = (210.0
+		         + a
+		             * (140.0
+		                 + a
+		                     * (105.0
+		                         + a * (84.0 + a * (70.0 + 60.0 * a)))))
+		    / 420.0;
+		s2 =
+		    (420.0
+		        + a * (735.0 + a * (966.0 + a * (1141.0 + 1278.0 * a))))
+		    / 2520.0;
 		s3 = (210.0 + a * (462.0 + a * (707.0 + 932.0 * a))) / 2520.0;
-		s4 = (252.0 + a * (672.0 + 1182.0 * a) + c * (294.0 + a *
-		    (889.0 + 1740.0 * a))) / 5040.0;
+		s4 = (252.0 + a * (672.0 + 1182.0 * a)
+		         + c * (294.0 + a * (889.0 + 1740.0 * a)))
+		    / 5040.0;
 		s5 = (84.0 + 264.0 * a + c * (175.0 + 606.0 * a)) / 2520.0;
 		s6 = (120.0 + c * (346.0 + 127.0 * c)) / 5040.0;
-		ch += t * (1.0 + 0.5 * t * s1 - b * c * (s1 - b * (s2 - b * (s3
-		    - b * (s4 - b * (s5 - b * s6))))));
+		ch += t
+		    * (1.0 + 0.5 * t * s1
+		        - b * c
+		            * (s1
+		                - b
+		                    * (s2
+		                        - b
+		                            * (s3
+		                                - b
+		                                    * (s4
+		                                        - b
+		                                            * (s5
+		                                                - b * s6))))));
 		if (fabs(q / ch - 1.0) <= e) {
 			break;
 		}
diff --git a/test/include/test/mq.h b/test/include/test/mq.h
index 5dc6486c..4a68d709 100644
--- a/test/include/test/mq.h
+++ b/test/include/test/mq.h
@@ -26,82 +26,74 @@
  * does not perform any cleanup of messages, since it knows nothing of their
  * payloads.
  */
-#define mq_msg(a_mq_msg_type)	ql_elm(a_mq_msg_type)
+#define mq_msg(a_mq_msg_type) ql_elm(a_mq_msg_type)
 
-#define mq_gen(a_attr, a_prefix, a_mq_type, a_mq_msg_type, a_field)	\
-typedef struct {							\
-	mtx_t			lock;					\
-	ql_head(a_mq_msg_type)	msgs;					\
-	unsigned		count;					\
-} a_mq_type;								\
-a_attr bool								\
-a_prefix##init(a_mq_type *mq) {						\
-									\
-	if (mtx_init(&mq->lock)) {					\
-		return true;						\
-	}								\
-	ql_new(&mq->msgs);						\
-	mq->count = 0;							\
-	return false;							\
-}									\
-a_attr void								\
-a_prefix##fini(a_mq_type *mq) {						\
-	mtx_fini(&mq->lock);						\
-}									\
-a_attr unsigned								\
-a_prefix##count(a_mq_type *mq) {					\
-	unsigned count;							\
-									\
-	mtx_lock(&mq->lock);						\
-	count = mq->count;						\
-	mtx_unlock(&mq->lock);						\
-	return count;							\
-}									\
-a_attr a_mq_msg_type *							\
-a_prefix##tryget(a_mq_type *mq) {					\
-	a_mq_msg_type *msg;						\
-									\
-	mtx_lock(&mq->lock);						\
-	msg = ql_first(&mq->msgs);					\
-	if (msg != NULL) {						\
-		ql_head_remove(&mq->msgs, a_mq_msg_type, a_field);	\
-		mq->count--;						\
-	}								\
-	mtx_unlock(&mq->lock);						\
-	return msg;							\
-}									\
-a_attr a_mq_msg_type *							\
-a_prefix##get(a_mq_type *mq) {						\
-	a_mq_msg_type *msg;						\
-	unsigned ns;							\
-									\
-	msg = a_prefix##tryget(mq);					\
-	if (msg != NULL) {						\
-		return msg;						\
-	}								\
-									\
-	ns = 1;								\
-	while (true) {							\
-		sleep_ns(ns);						\
-		msg = a_prefix##tryget(mq);				\
-		if (msg != NULL) {					\
-			return msg;					\
-		}							\
-		if (ns < 1000*1000*1000) {				\
-			/* Double sleep time, up to max 1 second. */	\
-			ns <<= 1;					\
-			if (ns > 1000*1000*1000) {			\
-				ns = 1000*1000*1000;			\
-			}						\
-		}							\
-	}								\
-}									\
-a_attr void								\
-a_prefix##put(a_mq_type *mq, a_mq_msg_type *msg) {			\
-									\
-	mtx_lock(&mq->lock);						\
-	ql_elm_new(msg, a_field);					\
-	ql_tail_insert(&mq->msgs, msg, a_field);			\
-	mq->count++;							\
-	mtx_unlock(&mq->lock);						\
-}
+#define mq_gen(a_attr, a_prefix, a_mq_type, a_mq_msg_type, a_field)            \
+	typedef struct {                                                       \
+		mtx_t lock;                                                    \
+		ql_head(a_mq_msg_type) msgs;                                   \
+		unsigned count;                                                \
+	} a_mq_type;                                                           \
+	a_attr bool a_prefix##init(a_mq_type *mq) {                            \
+		if (mtx_init(&mq->lock)) {                                     \
+			return true;                                           \
+		}                                                              \
+		ql_new(&mq->msgs);                                             \
+		mq->count = 0;                                                 \
+		return false;                                                  \
+	}                                                                      \
+	a_attr void a_prefix##fini(a_mq_type *mq) {                            \
+		mtx_fini(&mq->lock);                                           \
+	}                                                                      \
+	a_attr unsigned a_prefix##count(a_mq_type *mq) {                       \
+		unsigned count;                                                \
+                                                                               \
+		mtx_lock(&mq->lock);                                           \
+		count = mq->count;                                             \
+		mtx_unlock(&mq->lock);                                         \
+		return count;                                                  \
+	}                                                                      \
+	a_attr a_mq_msg_type *a_prefix##tryget(a_mq_type *mq) {                \
+		a_mq_msg_type *msg;                                            \
+                                                                               \
+		mtx_lock(&mq->lock);                                           \
+		msg = ql_first(&mq->msgs);                                     \
+		if (msg != NULL) {                                             \
+			ql_head_remove(&mq->msgs, a_mq_msg_type, a_field);     \
+			mq->count--;                                           \
+		}                                                              \
+		mtx_unlock(&mq->lock);                                         \
+		return msg;                                                    \
+	}                                                                      \
+	a_attr a_mq_msg_type *a_prefix##get(a_mq_type *mq) {                   \
+		a_mq_msg_type *msg;                                            \
+		unsigned       ns;                                             \
+                                                                               \
+		msg = a_prefix##tryget(mq);                                    \
+		if (msg != NULL) {                                             \
+			return msg;                                            \
+		}                                                              \
+                                                                               \
+		ns = 1;                                                        \
+		while (true) {                                                 \
+			sleep_ns(ns);                                          \
+			msg = a_prefix##tryget(mq);                            \
+			if (msg != NULL) {                                     \
+				return msg;                                    \
+			}                                                      \
+			if (ns < 1000 * 1000 * 1000) {                         \
+				/* Double sleep time, up to max 1 second. */   \
+				ns <<= 1;                                      \
+				if (ns > 1000 * 1000 * 1000) {                 \
+					ns = 1000 * 1000 * 1000;               \
+				}                                              \
+			}                                                      \
+		}                                                              \
+	}                                                                      \
+	a_attr void a_prefix##put(a_mq_type *mq, a_mq_msg_type *msg) {         \
+		mtx_lock(&mq->lock);                                           \
+		ql_elm_new(msg, a_field);                                      \
+		ql_tail_insert(&mq->msgs, msg, a_field);                       \
+		mq->count++;                                                   \
+		mtx_unlock(&mq->lock);                                         \
+	}
diff --git a/test/include/test/mtx.h b/test/include/test/mtx.h
index 066a2137..c771ca3a 100644
--- a/test/include/test/mtx.h
+++ b/test/include/test/mtx.h
@@ -7,15 +7,15 @@
 
 typedef struct {
 #ifdef _WIN32
-	CRITICAL_SECTION	lock;
+	CRITICAL_SECTION lock;
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
-	os_unfair_lock		lock;
+	os_unfair_lock lock;
 #else
-	pthread_mutex_t		lock;
+	pthread_mutex_t lock;
 #endif
 } mtx_t;
 
-bool	mtx_init(mtx_t *mtx);
-void	mtx_fini(mtx_t *mtx);
-void	mtx_lock(mtx_t *mtx);
-void	mtx_unlock(mtx_t *mtx);
+bool mtx_init(mtx_t *mtx);
+void mtx_fini(mtx_t *mtx);
+void mtx_lock(mtx_t *mtx);
+void mtx_unlock(mtx_t *mtx);
diff --git a/test/include/test/nbits.h b/test/include/test/nbits.h
index c06cf1b4..2c30a61c 100644
--- a/test/include/test/nbits.h
+++ b/test/include/test/nbits.h
@@ -3,109 +3,109 @@
 
 /* Interesting bitmap counts to test. */
 
-#define NBITS_TAB \
-    NB( 1) \
-    NB( 2) \
-    NB( 3) \
-    NB( 4) \
-    NB( 5) \
-    NB( 6) \
-    NB( 7) \
-    NB( 8) \
-    NB( 9) \
-    NB(10) \
-    NB(11) \
-    NB(12) \
-    NB(13) \
-    NB(14) \
-    NB(15) \
-    NB(16) \
-    NB(17) \
-    NB(18) \
-    NB(19) \
-    NB(20) \
-    NB(21) \
-    NB(22) \
-    NB(23) \
-    NB(24) \
-    NB(25) \
-    NB(26) \
-    NB(27) \
-    NB(28) \
-    NB(29) \
-    NB(30) \
-    NB(31) \
-    NB(32) \
-    \
-    NB(33) \
-    NB(34) \
-    NB(35) \
-    NB(36) \
-    NB(37) \
-    NB(38) \
-    NB(39) \
-    NB(40) \
-    NB(41) \
-    NB(42) \
-    NB(43) \
-    NB(44) \
-    NB(45) \
-    NB(46) \
-    NB(47) \
-    NB(48) \
-    NB(49) \
-    NB(50) \
-    NB(51) \
-    NB(52) \
-    NB(53) \
-    NB(54) \
-    NB(55) \
-    NB(56) \
-    NB(57) \
-    NB(58) \
-    NB(59) \
-    NB(60) \
-    NB(61) \
-    NB(62) \
-    NB(63) \
-    NB(64) \
-    NB(65) \
-    NB(66) \
-    NB(67) \
-    \
-    NB(126) \
-    NB(127) \
-    NB(128) \
-    NB(129) \
-    NB(130) \
-    \
-    NB(254) \
-    NB(255) \
-    NB(256) \
-    NB(257) \
-    NB(258) \
-    \
-    NB(510) \
-    NB(511) \
-    NB(512) \
-    NB(513) \
-    NB(514) \
-    \
-    NB(1022) \
-    NB(1023) \
-    NB(1024) \
-    NB(1025) \
-    NB(1026) \
-    \
-    NB(2048) \
-    \
-    NB(4094) \
-    NB(4095) \
-    NB(4096) \
-    NB(4097) \
-    NB(4098) \
-    \
-    NB(8192) \
-    NB(16384)
+#define NBITS_TAB                                                              \
+	NB(1)                                                                  \
+	NB(2)                                                                  \
+	NB(3)                                                                  \
+	NB(4)                                                                  \
+	NB(5)                                                                  \
+	NB(6)                                                                  \
+	NB(7)                                                                  \
+	NB(8)                                                                  \
+	NB(9)                                                                  \
+	NB(10)                                                                 \
+	NB(11)                                                                 \
+	NB(12)                                                                 \
+	NB(13)                                                                 \
+	NB(14)                                                                 \
+	NB(15)                                                                 \
+	NB(16)                                                                 \
+	NB(17)                                                                 \
+	NB(18)                                                                 \
+	NB(19)                                                                 \
+	NB(20)                                                                 \
+	NB(21)                                                                 \
+	NB(22)                                                                 \
+	NB(23)                                                                 \
+	NB(24)                                                                 \
+	NB(25)                                                                 \
+	NB(26)                                                                 \
+	NB(27)                                                                 \
+	NB(28)                                                                 \
+	NB(29)                                                                 \
+	NB(30)                                                                 \
+	NB(31)                                                                 \
+	NB(32)                                                                 \
+                                                                               \
+	NB(33)                                                                 \
+	NB(34)                                                                 \
+	NB(35)                                                                 \
+	NB(36)                                                                 \
+	NB(37)                                                                 \
+	NB(38)                                                                 \
+	NB(39)                                                                 \
+	NB(40)                                                                 \
+	NB(41)                                                                 \
+	NB(42)                                                                 \
+	NB(43)                                                                 \
+	NB(44)                                                                 \
+	NB(45)                                                                 \
+	NB(46)                                                                 \
+	NB(47)                                                                 \
+	NB(48)                                                                 \
+	NB(49)                                                                 \
+	NB(50)                                                                 \
+	NB(51)                                                                 \
+	NB(52)                                                                 \
+	NB(53)                                                                 \
+	NB(54)                                                                 \
+	NB(55)                                                                 \
+	NB(56)                                                                 \
+	NB(57)                                                                 \
+	NB(58)                                                                 \
+	NB(59)                                                                 \
+	NB(60)                                                                 \
+	NB(61)                                                                 \
+	NB(62)                                                                 \
+	NB(63)                                                                 \
+	NB(64)                                                                 \
+	NB(65)                                                                 \
+	NB(66)                                                                 \
+	NB(67)                                                                 \
+                                                                               \
+	NB(126)                                                                \
+	NB(127)                                                                \
+	NB(128)                                                                \
+	NB(129)                                                                \
+	NB(130)                                                                \
+                                                                               \
+	NB(254)                                                                \
+	NB(255)                                                                \
+	NB(256)                                                                \
+	NB(257)                                                                \
+	NB(258)                                                                \
+                                                                               \
+	NB(510)                                                                \
+	NB(511)                                                                \
+	NB(512)                                                                \
+	NB(513)                                                                \
+	NB(514)                                                                \
+                                                                               \
+	NB(1022)                                                               \
+	NB(1023)                                                               \
+	NB(1024)                                                               \
+	NB(1025)                                                               \
+	NB(1026)                                                               \
+                                                                               \
+	NB(2048)                                                               \
+                                                                               \
+	NB(4094)                                                               \
+	NB(4095)                                                               \
+	NB(4096)                                                               \
+	NB(4097)                                                               \
+	NB(4098)                                                               \
+                                                                               \
+	NB(8192)                                                               \
+	NB(16384)
 
 #endif /* TEST_NBITS_H */
diff --git a/test/include/test/san.h b/test/include/test/san.h
index da07865c..65a235e9 100644
--- a/test/include/test/san.h
+++ b/test/include/test/san.h
@@ -1,9 +1,9 @@
 #if defined(JEMALLOC_UAF_DETECTION) || defined(JEMALLOC_DEBUG)
-#  define TEST_SAN_UAF_ALIGN_ENABLE "lg_san_uaf_align:12"
-#  define TEST_SAN_UAF_ALIGN_DISABLE "lg_san_uaf_align:-1"
+#	define TEST_SAN_UAF_ALIGN_ENABLE "lg_san_uaf_align:12"
+#	define TEST_SAN_UAF_ALIGN_DISABLE "lg_san_uaf_align:-1"
 #else
-#  define TEST_SAN_UAF_ALIGN_ENABLE ""
-#  define TEST_SAN_UAF_ALIGN_DISABLE ""
+#	define TEST_SAN_UAF_ALIGN_ENABLE ""
+#	define TEST_SAN_UAF_ALIGN_DISABLE ""
 #endif
 
 static inline bool
@@ -11,4 +11,3 @@ extent_is_guarded(tsdn_t *tsdn, void *ptr) {
 	edata_t *edata = emap_edata_lookup(tsdn, &arena_emap_global, ptr);
 	return edata_guarded_get(edata);
 }
-
diff --git a/test/include/test/test.h b/test/include/test/test.h
index 80ca7cbb..025c167d 100644
--- a/test/include/test/test.h
+++ b/test/include/test/test.h
@@ -1,502 +1,503 @@
-#define ASSERT_BUFSIZE	256
+#define ASSERT_BUFSIZE 256
 
-#define verify_cmp(may_abort, t, a, b, cmp, neg_cmp, pri, ...) do {	\
-	const t a_ = (a);						\
-	const t b_ = (b);						\
-	if (!(a_ cmp b_)) {						\
-		char prefix[ASSERT_BUFSIZE];				\
-		char message[ASSERT_BUFSIZE];				\
-		malloc_snprintf(prefix, sizeof(prefix),			\
-		    "%s:%s:%d: Failed assertion: "			\
-		    "(%s) " #cmp " (%s) --> "				\
-		    "%" pri " " #neg_cmp " %" pri ": ",			\
-		    __func__, __FILE__, __LINE__,			\
-		    #a, #b, a_, b_);					\
-		malloc_snprintf(message, sizeof(message), __VA_ARGS__);	\
-		p_test_fail(may_abort, prefix, message);		\
-	}								\
-} while (0)
+#define verify_cmp(may_abort, t, a, b, cmp, neg_cmp, pri, ...)                 \
+	do {                                                                   \
+		const t a_ = (a);                                              \
+		const t b_ = (b);                                              \
+		if (!(a_ cmp b_)) {                                            \
+			char prefix[ASSERT_BUFSIZE];                           \
+			char message[ASSERT_BUFSIZE];                          \
+			malloc_snprintf(prefix, sizeof(prefix),                \
+			    "%s:%s:%d: Failed assertion: "                     \
+			    "(%s) " #cmp                                       \
+			    " (%s) --> "                                       \
+			    "%" pri " " #neg_cmp " %" pri ": ",                \
+			    __func__, __FILE__, __LINE__, #a, #b, a_, b_);     \
+			malloc_snprintf(                                       \
+			    message, sizeof(message), __VA_ARGS__);            \
+			p_test_fail(may_abort, prefix, message);               \
+		}                                                              \
+	} while (0)
 
-#define expect_cmp(t, a, b, cmp, neg_cmp, pri, ...) verify_cmp(false,	\
-    t, a, b, cmp, neg_cmp, pri, __VA_ARGS__)
+#define expect_cmp(t, a, b, cmp, neg_cmp, pri, ...)                            \
+	verify_cmp(false, t, a, b, cmp, neg_cmp, pri, __VA_ARGS__)
 
-#define expect_ptr_eq(a, b, ...)	expect_cmp(void *, a, b, ==,	\
-    !=, "p", __VA_ARGS__)
-#define expect_ptr_ne(a, b, ...)	expect_cmp(void *, a, b, !=,	\
-    ==, "p", __VA_ARGS__)
-#define expect_ptr_null(a, ...)		expect_cmp(void *, a, NULL, ==,	\
-    !=, "p", __VA_ARGS__)
-#define expect_ptr_not_null(a, ...)	expect_cmp(void *, a, NULL, !=,	\
-    ==, "p", __VA_ARGS__)
+#define expect_ptr_eq(a, b, ...)                                               \
+	expect_cmp(void *, a, b, ==, !=, "p", __VA_ARGS__)
+#define expect_ptr_ne(a, b, ...)                                               \
+	expect_cmp(void *, a, b, !=, ==, "p", __VA_ARGS__)
+#define expect_ptr_null(a, ...)                                                \
+	expect_cmp(void *, a, NULL, ==, !=, "p", __VA_ARGS__)
+#define expect_ptr_not_null(a, ...)                                            \
+	expect_cmp(void *, a, NULL, !=, ==, "p", __VA_ARGS__)
 
-#define expect_c_eq(a, b, ...)	expect_cmp(char, a, b, ==, !=, "c", __VA_ARGS__)
-#define expect_c_ne(a, b, ...)	expect_cmp(char, a, b, !=, ==, "c", __VA_ARGS__)
-#define expect_c_lt(a, b, ...)	expect_cmp(char, a, b, <, >=, "c", __VA_ARGS__)
-#define expect_c_le(a, b, ...)	expect_cmp(char, a, b, <=, >, "c", __VA_ARGS__)
-#define expect_c_ge(a, b, ...)	expect_cmp(char, a, b, >=, <, "c", __VA_ARGS__)
-#define expect_c_gt(a, b, ...)	expect_cmp(char, a, b, >, <=, "c", __VA_ARGS__)
+#define expect_c_eq(a, b, ...) expect_cmp(char, a, b, ==, !=, "c", __VA_ARGS__)
+#define expect_c_ne(a, b, ...) expect_cmp(char, a, b, !=, ==, "c", __VA_ARGS__)
+#define expect_c_lt(a, b, ...) expect_cmp(char, a, b, <, >=, "c", __VA_ARGS__)
+#define expect_c_le(a, b, ...) expect_cmp(char, a, b, <=, >, "c", __VA_ARGS__)
+#define expect_c_ge(a, b, ...) expect_cmp(char, a, b, >=, <, "c", __VA_ARGS__)
+#define expect_c_gt(a, b, ...) expect_cmp(char, a, b, >, <=, "c", __VA_ARGS__)
 
-#define expect_x_eq(a, b, ...)	expect_cmp(int, a, b, ==, !=, "#x", __VA_ARGS__)
-#define expect_x_ne(a, b, ...)	expect_cmp(int, a, b, !=, ==, "#x", __VA_ARGS__)
-#define expect_x_lt(a, b, ...)	expect_cmp(int, a, b, <, >=, "#x", __VA_ARGS__)
-#define expect_x_le(a, b, ...)	expect_cmp(int, a, b, <=, >, "#x", __VA_ARGS__)
-#define expect_x_ge(a, b, ...)	expect_cmp(int, a, b, >=, <, "#x", __VA_ARGS__)
-#define expect_x_gt(a, b, ...)	expect_cmp(int, a, b, >, <=, "#x", __VA_ARGS__)
+#define expect_x_eq(a, b, ...) expect_cmp(int, a, b, ==, !=, "#x", __VA_ARGS__)
+#define expect_x_ne(a, b, ...) expect_cmp(int, a, b, !=, ==, "#x", __VA_ARGS__)
+#define expect_x_lt(a, b, ...) expect_cmp(int, a, b, <, >=, "#x", __VA_ARGS__)
+#define expect_x_le(a, b, ...) expect_cmp(int, a, b, <=, >, "#x", __VA_ARGS__)
+#define expect_x_ge(a, b, ...) expect_cmp(int, a, b, >=, <, "#x", __VA_ARGS__)
+#define expect_x_gt(a, b, ...) expect_cmp(int, a, b, >, <=, "#x", __VA_ARGS__)
 
-#define expect_d_eq(a, b, ...)	expect_cmp(int, a, b, ==, !=, "d", __VA_ARGS__)
-#define expect_d_ne(a, b, ...)	expect_cmp(int, a, b, !=, ==, "d", __VA_ARGS__)
-#define expect_d_lt(a, b, ...)	expect_cmp(int, a, b, <, >=, "d", __VA_ARGS__)
-#define expect_d_le(a, b, ...)	expect_cmp(int, a, b, <=, >, "d", __VA_ARGS__)
-#define expect_d_ge(a, b, ...)	expect_cmp(int, a, b, >=, <, "d", __VA_ARGS__)
-#define expect_d_gt(a, b, ...)	expect_cmp(int, a, b, >, <=, "d", __VA_ARGS__)
+#define expect_d_eq(a, b, ...) expect_cmp(int, a, b, ==, !=, "d", __VA_ARGS__)
+#define expect_d_ne(a, b, ...) expect_cmp(int, a, b, !=, ==, "d", __VA_ARGS__)
+#define expect_d_lt(a, b, ...) expect_cmp(int, a, b, <, >=, "d", __VA_ARGS__)
+#define expect_d_le(a, b, ...) expect_cmp(int, a, b, <=, >, "d", __VA_ARGS__)
+#define expect_d_ge(a, b, ...) expect_cmp(int, a, b, >=, <, "d", __VA_ARGS__)
+#define expect_d_gt(a, b, ...) expect_cmp(int, a, b, >, <=, "d", __VA_ARGS__)
 
-#define expect_u_eq(a, b, ...)	expect_cmp(int, a, b, ==, !=, "u", __VA_ARGS__)
-#define expect_u_ne(a, b, ...)	expect_cmp(int, a, b, !=, ==, "u", __VA_ARGS__)
-#define expect_u_lt(a, b, ...)	expect_cmp(int, a, b, <, >=, "u", __VA_ARGS__)
-#define expect_u_le(a, b, ...)	expect_cmp(int, a, b, <=, >, "u", __VA_ARGS__)
-#define expect_u_ge(a, b, ...)	expect_cmp(int, a, b, >=, <, "u", __VA_ARGS__)
-#define expect_u_gt(a, b, ...)	expect_cmp(int, a, b, >, <=, "u", __VA_ARGS__)
+#define expect_u_eq(a, b, ...) expect_cmp(int, a, b, ==, !=, "u", __VA_ARGS__)
+#define expect_u_ne(a, b, ...) expect_cmp(int, a, b, !=, ==, "u", __VA_ARGS__)
+#define expect_u_lt(a, b, ...) expect_cmp(int, a, b, <, >=, "u", __VA_ARGS__)
+#define expect_u_le(a, b, ...) expect_cmp(int, a, b, <=, >, "u", __VA_ARGS__)
+#define expect_u_ge(a, b, ...) expect_cmp(int, a, b, >=, <, "u", __VA_ARGS__)
+#define expect_u_gt(a, b, ...) expect_cmp(int, a, b, >, <=, "u", __VA_ARGS__)
 
-#define expect_ld_eq(a, b, ...)	expect_cmp(long, a, b, ==,	\
-    !=, "ld", __VA_ARGS__)
-#define expect_ld_ne(a, b, ...)	expect_cmp(long, a, b, !=,	\
-    ==, "ld", __VA_ARGS__)
-#define expect_ld_lt(a, b, ...)	expect_cmp(long, a, b, <,	\
-    >=, "ld", __VA_ARGS__)
-#define expect_ld_le(a, b, ...)	expect_cmp(long, a, b, <=,	\
-    >, "ld", __VA_ARGS__)
-#define expect_ld_ge(a, b, ...)	expect_cmp(long, a, b, >=,	\
-    <, "ld", __VA_ARGS__)
-#define expect_ld_gt(a, b, ...)	expect_cmp(long, a, b, >,	\
-    <=, "ld", __VA_ARGS__)
+#define expect_ld_eq(a, b, ...)                                                \
+	expect_cmp(long, a, b, ==, !=, "ld", __VA_ARGS__)
+#define expect_ld_ne(a, b, ...)                                                \
+	expect_cmp(long, a, b, !=, ==, "ld", __VA_ARGS__)
+#define expect_ld_lt(a, b, ...) expect_cmp(long, a, b, <, >=, "ld", __VA_ARGS__)
+#define expect_ld_le(a, b, ...) expect_cmp(long, a, b, <=, >, "ld", __VA_ARGS__)
+#define expect_ld_ge(a, b, ...) expect_cmp(long, a, b, >=, <, "ld", __VA_ARGS__)
+#define expect_ld_gt(a, b, ...) expect_cmp(long, a, b, >, <=, "ld", __VA_ARGS__)
 
-#define expect_lu_eq(a, b, ...)	expect_cmp(unsigned long,	\
-    a, b, ==, !=, "lu", __VA_ARGS__)
-#define expect_lu_ne(a, b, ...)	expect_cmp(unsigned long,	\
-    a, b, !=, ==, "lu", __VA_ARGS__)
-#define expect_lu_lt(a, b, ...)	expect_cmp(unsigned long,	\
-    a, b, <, >=, "lu", __VA_ARGS__)
-#define expect_lu_le(a, b, ...)	expect_cmp(unsigned long,	\
-    a, b, <=, >, "lu", __VA_ARGS__)
-#define expect_lu_ge(a, b, ...)	expect_cmp(unsigned long,	\
-    a, b, >=, <, "lu", __VA_ARGS__)
-#define expect_lu_gt(a, b, ...)	expect_cmp(unsigned long,	\
-    a, b, >, <=, "lu", __VA_ARGS__)
+#define expect_lu_eq(a, b, ...)                                                \
+	expect_cmp(unsigned long, a, b, ==, !=, "lu", __VA_ARGS__)
+#define expect_lu_ne(a, b, ...)                                                \
+	expect_cmp(unsigned long, a, b, !=, ==, "lu", __VA_ARGS__)
+#define expect_lu_lt(a, b, ...)                                                \
+	expect_cmp(unsigned long, a, b, <, >=, "lu", __VA_ARGS__)
+#define expect_lu_le(a, b, ...)                                                \
+	expect_cmp(unsigned long, a, b, <=, >, "lu", __VA_ARGS__)
+#define expect_lu_ge(a, b, ...)                                                \
+	expect_cmp(unsigned long, a, b, >=, <, "lu", __VA_ARGS__)
+#define expect_lu_gt(a, b, ...)                                                \
+	expect_cmp(unsigned long, a, b, >, <=, "lu", __VA_ARGS__)
 
-#define expect_qd_eq(a, b, ...)	expect_cmp(long long, a, b, ==,	\
-    !=, "qd", __VA_ARGS__)
-#define expect_qd_ne(a, b, ...)	expect_cmp(long long, a, b, !=,	\
-    ==, "qd", __VA_ARGS__)
-#define expect_qd_lt(a, b, ...)	expect_cmp(long long, a, b, <,	\
-    >=, "qd", __VA_ARGS__)
-#define expect_qd_le(a, b, ...)	expect_cmp(long long, a, b, <=,	\
-    >, "qd", __VA_ARGS__)
-#define expect_qd_ge(a, b, ...)	expect_cmp(long long, a, b, >=,	\
-    <, "qd", __VA_ARGS__)
-#define expect_qd_gt(a, b, ...)	expect_cmp(long long, a, b, >,	\
-    <=, "qd", __VA_ARGS__)
+#define expect_qd_eq(a, b, ...)                                                \
+	expect_cmp(long long, a, b, ==, !=, "qd", __VA_ARGS__)
+#define expect_qd_ne(a, b, ...)                                                \
+	expect_cmp(long long, a, b, !=, ==, "qd", __VA_ARGS__)
+#define expect_qd_lt(a, b, ...)                                                \
+	expect_cmp(long long, a, b, <, >=, "qd", __VA_ARGS__)
+#define expect_qd_le(a, b, ...)                                                \
+	expect_cmp(long long, a, b, <=, >, "qd", __VA_ARGS__)
+#define expect_qd_ge(a, b, ...)                                                \
+	expect_cmp(long long, a, b, >=, <, "qd", __VA_ARGS__)
+#define expect_qd_gt(a, b, ...)                                                \
+	expect_cmp(long long, a, b, >, <=, "qd", __VA_ARGS__)
 
-#define expect_qu_eq(a, b, ...)	expect_cmp(unsigned long long,	\
-    a, b, ==, !=, "qu", __VA_ARGS__)
-#define expect_qu_ne(a, b, ...)	expect_cmp(unsigned long long,	\
-    a, b, !=, ==, "qu", __VA_ARGS__)
-#define expect_qu_lt(a, b, ...)	expect_cmp(unsigned long long,	\
-    a, b, <, >=, "qu", __VA_ARGS__)
-#define expect_qu_le(a, b, ...)	expect_cmp(unsigned long long,	\
-    a, b, <=, >, "qu", __VA_ARGS__)
-#define expect_qu_ge(a, b, ...)	expect_cmp(unsigned long long,	\
-    a, b, >=, <, "qu", __VA_ARGS__)
-#define expect_qu_gt(a, b, ...)	expect_cmp(unsigned long long,	\
-    a, b, >, <=, "qu", __VA_ARGS__)
+#define expect_qu_eq(a, b, ...)                                                \
+	expect_cmp(unsigned long long, a, b, ==, !=, "qu", __VA_ARGS__)
+#define expect_qu_ne(a, b, ...)                                                \
+	expect_cmp(unsigned long long, a, b, !=, ==, "qu", __VA_ARGS__)
+#define expect_qu_lt(a, b, ...)                                                \
+	expect_cmp(unsigned long long, a, b, <, >=, "qu", __VA_ARGS__)
+#define expect_qu_le(a, b, ...)                                                \
+	expect_cmp(unsigned long long, a, b, <=, >, "qu", __VA_ARGS__)
+#define expect_qu_ge(a, b, ...)                                                \
+	expect_cmp(unsigned long long, a, b, >=, <, "qu", __VA_ARGS__)
+#define expect_qu_gt(a, b, ...)                                                \
+	expect_cmp(unsigned long long, a, b, >, <=, "qu", __VA_ARGS__)
 
-#define expect_jd_eq(a, b, ...)	expect_cmp(intmax_t, a, b, ==,	\
-    !=, "jd", __VA_ARGS__)
-#define expect_jd_ne(a, b, ...)	expect_cmp(intmax_t, a, b, !=,	\
-    ==, "jd", __VA_ARGS__)
-#define expect_jd_lt(a, b, ...)	expect_cmp(intmax_t, a, b, <,	\
-    >=, "jd", __VA_ARGS__)
-#define expect_jd_le(a, b, ...)	expect_cmp(intmax_t, a, b, <=,	\
-    >, "jd", __VA_ARGS__)
-#define expect_jd_ge(a, b, ...)	expect_cmp(intmax_t, a, b, >=,	\
-    <, "jd", __VA_ARGS__)
-#define expect_jd_gt(a, b, ...)	expect_cmp(intmax_t, a, b, >,	\
-    <=, "jd", __VA_ARGS__)
+#define expect_jd_eq(a, b, ...)                                                \
+	expect_cmp(intmax_t, a, b, ==, !=, "jd", __VA_ARGS__)
+#define expect_jd_ne(a, b, ...)                                                \
+	expect_cmp(intmax_t, a, b, !=, ==, "jd", __VA_ARGS__)
+#define expect_jd_lt(a, b, ...)                                                \
+	expect_cmp(intmax_t, a, b, <, >=, "jd", __VA_ARGS__)
+#define expect_jd_le(a, b, ...)                                                \
+	expect_cmp(intmax_t, a, b, <=, >, "jd", __VA_ARGS__)
+#define expect_jd_ge(a, b, ...)                                                \
+	expect_cmp(intmax_t, a, b, >=, <, "jd", __VA_ARGS__)
+#define expect_jd_gt(a, b, ...)                                                \
+	expect_cmp(intmax_t, a, b, >, <=, "jd", __VA_ARGS__)
 
-#define expect_ju_eq(a, b, ...)	expect_cmp(uintmax_t, a, b, ==,	\
-    !=, "ju", __VA_ARGS__)
-#define expect_ju_ne(a, b, ...)	expect_cmp(uintmax_t, a, b, !=,	\
-    ==, "ju", __VA_ARGS__)
-#define expect_ju_lt(a, b, ...)	expect_cmp(uintmax_t, a, b, <,	\
-    >=, "ju", __VA_ARGS__)
-#define expect_ju_le(a, b, ...)	expect_cmp(uintmax_t, a, b, <=,	\
-    >, "ju", __VA_ARGS__)
-#define expect_ju_ge(a, b, ...)	expect_cmp(uintmax_t, a, b, >=,	\
-    <, "ju", __VA_ARGS__)
-#define expect_ju_gt(a, b, ...)	expect_cmp(uintmax_t, a, b, >,	\
-    <=, "ju", __VA_ARGS__)
+#define expect_ju_eq(a, b, ...)                                                \
+	expect_cmp(uintmax_t, a, b, ==, !=, "ju", __VA_ARGS__)
+#define expect_ju_ne(a, b, ...)                                                \
+	expect_cmp(uintmax_t, a, b, !=, ==, "ju", __VA_ARGS__)
+#define expect_ju_lt(a, b, ...)                                                \
+	expect_cmp(uintmax_t, a, b, <, >=, "ju", __VA_ARGS__)
+#define expect_ju_le(a, b, ...)                                                \
+	expect_cmp(uintmax_t, a, b, <=, >, "ju", __VA_ARGS__)
+#define expect_ju_ge(a, b, ...)                                                \
+	expect_cmp(uintmax_t, a, b, >=, <, "ju", __VA_ARGS__)
+#define expect_ju_gt(a, b, ...)                                                \
+	expect_cmp(uintmax_t, a, b, >, <=, "ju", __VA_ARGS__)
 
-#define expect_zd_eq(a, b, ...)	expect_cmp(ssize_t, a, b, ==,	\
-    !=, "zd", __VA_ARGS__)
-#define expect_zd_ne(a, b, ...)	expect_cmp(ssize_t, a, b, !=,	\
-    ==, "zd", __VA_ARGS__)
-#define expect_zd_lt(a, b, ...)	expect_cmp(ssize_t, a, b, <,	\
-    >=, "zd", __VA_ARGS__)
-#define expect_zd_le(a, b, ...)	expect_cmp(ssize_t, a, b, <=,	\
-    >, "zd", __VA_ARGS__)
-#define expect_zd_ge(a, b, ...)	expect_cmp(ssize_t, a, b, >=,	\
-    <, "zd", __VA_ARGS__)
-#define expect_zd_gt(a, b, ...)	expect_cmp(ssize_t, a, b, >,	\
-    <=, "zd", __VA_ARGS__)
+#define expect_zd_eq(a, b, ...)                                                \
+	expect_cmp(ssize_t, a, b, ==, !=, "zd", __VA_ARGS__)
+#define expect_zd_ne(a, b, ...)                                                \
+	expect_cmp(ssize_t, a, b, !=, ==, "zd", __VA_ARGS__)
+#define expect_zd_lt(a, b, ...)                                                \
+	expect_cmp(ssize_t, a, b, <, >=, "zd", __VA_ARGS__)
+#define expect_zd_le(a, b, ...)                                                \
+	expect_cmp(ssize_t, a, b, <=, >, "zd", __VA_ARGS__)
+#define expect_zd_ge(a, b, ...)                                                \
+	expect_cmp(ssize_t, a, b, >=, <, "zd", __VA_ARGS__)
+#define expect_zd_gt(a, b, ...)                                                \
+	expect_cmp(ssize_t, a, b, >, <=, "zd", __VA_ARGS__)
 
-#define expect_zu_eq(a, b, ...)	expect_cmp(size_t, a, b, ==,	\
-    !=, "zu", __VA_ARGS__)
-#define expect_zu_ne(a, b, ...)	expect_cmp(size_t, a, b, !=,	\
-    ==, "zu", __VA_ARGS__)
-#define expect_zu_lt(a, b, ...)	expect_cmp(size_t, a, b, <,	\
-    >=, "zu", __VA_ARGS__)
-#define expect_zu_le(a, b, ...)	expect_cmp(size_t, a, b, <=,	\
-    >, "zu", __VA_ARGS__)
-#define expect_zu_ge(a, b, ...)	expect_cmp(size_t, a, b, >=,	\
-    <, "zu", __VA_ARGS__)
-#define expect_zu_gt(a, b, ...)	expect_cmp(size_t, a, b, >,	\
-    <=, "zu", __VA_ARGS__)
+#define expect_zu_eq(a, b, ...)                                                \
+	expect_cmp(size_t, a, b, ==, !=, "zu", __VA_ARGS__)
+#define expect_zu_ne(a, b, ...)                                                \
+	expect_cmp(size_t, a, b, !=, ==, "zu", __VA_ARGS__)
+#define expect_zu_lt(a, b, ...)                                                \
+	expect_cmp(size_t, a, b, <, >=, "zu", __VA_ARGS__)
+#define expect_zu_le(a, b, ...)                                                \
+	expect_cmp(size_t, a, b, <=, >, "zu", __VA_ARGS__)
+#define expect_zu_ge(a, b, ...)                                                \
+	expect_cmp(size_t, a, b, >=, <, "zu", __VA_ARGS__)
+#define expect_zu_gt(a, b, ...)                                                \
+	expect_cmp(size_t, a, b, >, <=, "zu", __VA_ARGS__)
 
-#define expect_d32_eq(a, b, ...)	expect_cmp(int32_t, a, b, ==,	\
-    !=, FMTd32, __VA_ARGS__)
-#define expect_d32_ne(a, b, ...)	expect_cmp(int32_t, a, b, !=,	\
-    ==, FMTd32, __VA_ARGS__)
-#define expect_d32_lt(a, b, ...)	expect_cmp(int32_t, a, b, <,	\
-    >=, FMTd32, __VA_ARGS__)
-#define expect_d32_le(a, b, ...)	expect_cmp(int32_t, a, b, <=,	\
-    >, FMTd32, __VA_ARGS__)
-#define expect_d32_ge(a, b, ...)	expect_cmp(int32_t, a, b, >=,	\
-    <, FMTd32, __VA_ARGS__)
-#define expect_d32_gt(a, b, ...)	expect_cmp(int32_t, a, b, >,	\
-    <=, FMTd32, __VA_ARGS__)
+#define expect_d32_eq(a, b, ...)                                               \
+	expect_cmp(int32_t, a, b, ==, !=, FMTd32, __VA_ARGS__)
+#define expect_d32_ne(a, b, ...)                                               \
+	expect_cmp(int32_t, a, b, !=, ==, FMTd32, __VA_ARGS__)
+#define expect_d32_lt(a, b, ...)                                               \
+	expect_cmp(int32_t, a, b, <, >=, FMTd32, __VA_ARGS__)
+#define expect_d32_le(a, b, ...)                                               \
+	expect_cmp(int32_t, a, b, <=, >, FMTd32, __VA_ARGS__)
+#define expect_d32_ge(a, b, ...)                                               \
+	expect_cmp(int32_t, a, b, >=, <, FMTd32, __VA_ARGS__)
+#define expect_d32_gt(a, b, ...)                                               \
+	expect_cmp(int32_t, a, b, >, <=, FMTd32, __VA_ARGS__)
 
-#define expect_u32_eq(a, b, ...)	expect_cmp(uint32_t, a, b, ==,	\
-    !=, FMTu32, __VA_ARGS__)
-#define expect_u32_ne(a, b, ...)	expect_cmp(uint32_t, a, b, !=,	\
-    ==, FMTu32, __VA_ARGS__)
-#define expect_u32_lt(a, b, ...)	expect_cmp(uint32_t, a, b, <,	\
-    >=, FMTu32, __VA_ARGS__)
-#define expect_u32_le(a, b, ...)	expect_cmp(uint32_t, a, b, <=,	\
-    >, FMTu32, __VA_ARGS__)
-#define expect_u32_ge(a, b, ...)	expect_cmp(uint32_t, a, b, >=,	\
-    <, FMTu32, __VA_ARGS__)
-#define expect_u32_gt(a, b, ...)	expect_cmp(uint32_t, a, b, >,	\
-    <=, FMTu32, __VA_ARGS__)
+#define expect_u32_eq(a, b, ...)                                               \
+	expect_cmp(uint32_t, a, b, ==, !=, FMTu32, __VA_ARGS__)
+#define expect_u32_ne(a, b, ...)                                               \
+	expect_cmp(uint32_t, a, b, !=, ==, FMTu32, __VA_ARGS__)
+#define expect_u32_lt(a, b, ...)                                               \
+	expect_cmp(uint32_t, a, b, <, >=, FMTu32, __VA_ARGS__)
+#define expect_u32_le(a, b, ...)                                               \
+	expect_cmp(uint32_t, a, b, <=, >, FMTu32, __VA_ARGS__)
+#define expect_u32_ge(a, b, ...)                                               \
+	expect_cmp(uint32_t, a, b, >=, <, FMTu32, __VA_ARGS__)
+#define expect_u32_gt(a, b, ...)                                               \
+	expect_cmp(uint32_t, a, b, >, <=, FMTu32, __VA_ARGS__)
 
-#define expect_d64_eq(a, b, ...)	expect_cmp(int64_t, a, b, ==,	\
-    !=, FMTd64, __VA_ARGS__)
-#define expect_d64_ne(a, b, ...)	expect_cmp(int64_t, a, b, !=,	\
-    ==, FMTd64, __VA_ARGS__)
-#define expect_d64_lt(a, b, ...)	expect_cmp(int64_t, a, b, <,	\
-    >=, FMTd64, __VA_ARGS__)
-#define expect_d64_le(a, b, ...)	expect_cmp(int64_t, a, b, <=,	\
-    >, FMTd64, __VA_ARGS__)
-#define expect_d64_ge(a, b, ...)	expect_cmp(int64_t, a, b, >=,	\
-    <, FMTd64, __VA_ARGS__)
-#define expect_d64_gt(a, b, ...)	expect_cmp(int64_t, a, b, >,	\
-    <=, FMTd64, __VA_ARGS__)
+#define expect_d64_eq(a, b, ...)                                               \
+	expect_cmp(int64_t, a, b, ==, !=, FMTd64, __VA_ARGS__)
+#define expect_d64_ne(a, b, ...)                                               \
+	expect_cmp(int64_t, a, b, !=, ==, FMTd64, __VA_ARGS__)
+#define expect_d64_lt(a, b, ...)                                               \
+	expect_cmp(int64_t, a, b, <, >=, FMTd64, __VA_ARGS__)
+#define expect_d64_le(a, b, ...)                                               \
+	expect_cmp(int64_t, a, b, <=, >, FMTd64, __VA_ARGS__)
+#define expect_d64_ge(a, b, ...)                                               \
+	expect_cmp(int64_t, a, b, >=, <, FMTd64, __VA_ARGS__)
+#define expect_d64_gt(a, b, ...)                                               \
+	expect_cmp(int64_t, a, b, >, <=, FMTd64, __VA_ARGS__)
 
-#define expect_u64_eq(a, b, ...)	expect_cmp(uint64_t, a, b, ==,	\
-    !=, FMTu64, __VA_ARGS__)
-#define expect_u64_ne(a, b, ...)	expect_cmp(uint64_t, a, b, !=,	\
-    ==, FMTu64, __VA_ARGS__)
-#define expect_u64_lt(a, b, ...)	expect_cmp(uint64_t, a, b, <,	\
-    >=, FMTu64, __VA_ARGS__)
-#define expect_u64_le(a, b, ...)	expect_cmp(uint64_t, a, b, <=,	\
-    >, FMTu64, __VA_ARGS__)
-#define expect_u64_ge(a, b, ...)	expect_cmp(uint64_t, a, b, >=,	\
-    <, FMTu64, __VA_ARGS__)
-#define expect_u64_gt(a, b, ...)	expect_cmp(uint64_t, a, b, >,	\
-    <=, FMTu64, __VA_ARGS__)
+#define expect_u64_eq(a, b, ...)                                               \
+	expect_cmp(uint64_t, a, b, ==, !=, FMTu64, __VA_ARGS__)
+#define expect_u64_ne(a, b, ...)                                               \
+	expect_cmp(uint64_t, a, b, !=, ==, FMTu64, __VA_ARGS__)
+#define expect_u64_lt(a, b, ...)                                               \
+	expect_cmp(uint64_t, a, b, <, >=, FMTu64, __VA_ARGS__)
+#define expect_u64_le(a, b, ...)                                               \
+	expect_cmp(uint64_t, a, b, <=, >, FMTu64, __VA_ARGS__)
+#define expect_u64_ge(a, b, ...)                                               \
+	expect_cmp(uint64_t, a, b, >=, <, FMTu64, __VA_ARGS__)
+#define expect_u64_gt(a, b, ...)                                               \
+	expect_cmp(uint64_t, a, b, >, <=, FMTu64, __VA_ARGS__)
 
-#define verify_b_eq(may_abort, a, b, ...) do {				\
-	bool a_ = (a);							\
-	bool b_ = (b);							\
-	if (!(a_ == b_)) {						\
-		char prefix[ASSERT_BUFSIZE];				\
-		char message[ASSERT_BUFSIZE];				\
-		malloc_snprintf(prefix, sizeof(prefix),			\
-		    "%s:%s:%d: Failed assertion: "			\
-		    "(%s) == (%s) --> %s != %s: ",			\
-		    __func__, __FILE__, __LINE__,			\
-		    #a, #b, a_ ? "true" : "false",			\
-		    b_ ? "true" : "false");				\
-		malloc_snprintf(message, sizeof(message), __VA_ARGS__);	\
-		p_test_fail(may_abort, prefix, message);		\
-	}								\
-} while (0)
+#define verify_b_eq(may_abort, a, b, ...)                                      \
+	do {                                                                   \
+		bool a_ = (a);                                                 \
+		bool b_ = (b);                                                 \
+		if (!(a_ == b_)) {                                             \
+			char prefix[ASSERT_BUFSIZE];                           \
+			char message[ASSERT_BUFSIZE];                          \
+			malloc_snprintf(prefix, sizeof(prefix),                \
+			    "%s:%s:%d: Failed assertion: "                     \
+			    "(%s) == (%s) --> %s != %s: ",                     \
+			    __func__, __FILE__, __LINE__, #a, #b,              \
+			    a_ ? "true" : "false", b_ ? "true" : "false");     \
+			malloc_snprintf(                                       \
+			    message, sizeof(message), __VA_ARGS__);            \
+			p_test_fail(may_abort, prefix, message);               \
+		}                                                              \
+	} while (0)
 
-#define verify_b_ne(may_abort, a, b, ...) do {				\
-	bool a_ = (a);							\
-	bool b_ = (b);							\
-	if (!(a_ != b_)) {						\
-		char prefix[ASSERT_BUFSIZE];				\
-		char message[ASSERT_BUFSIZE];				\
-		malloc_snprintf(prefix, sizeof(prefix),			\
-		    "%s:%s:%d: Failed assertion: "			\
-		    "(%s) != (%s) --> %s == %s: ",			\
-		    __func__, __FILE__, __LINE__,			\
-		    #a, #b, a_ ? "true" : "false",			\
-		    b_ ? "true" : "false");				\
-		malloc_snprintf(message, sizeof(message), __VA_ARGS__);	\
-		p_test_fail(may_abort, prefix, message);		\
-	}								\
-} while (0)
+#define verify_b_ne(may_abort, a, b, ...)                                      \
+	do {                                                                   \
+		bool a_ = (a);                                                 \
+		bool b_ = (b);                                                 \
+		if (!(a_ != b_)) {                                             \
+			char prefix[ASSERT_BUFSIZE];                           \
+			char message[ASSERT_BUFSIZE];                          \
+			malloc_snprintf(prefix, sizeof(prefix),                \
+			    "%s:%s:%d: Failed assertion: "                     \
+			    "(%s) != (%s) --> %s == %s: ",                     \
+			    __func__, __FILE__, __LINE__, #a, #b,              \
+			    a_ ? "true" : "false", b_ ? "true" : "false");     \
+			malloc_snprintf(                                       \
+			    message, sizeof(message), __VA_ARGS__);            \
+			p_test_fail(may_abort, prefix, message);               \
+		}                                                              \
+	} while (0)
 
-#define expect_b_eq(a, b, ...)	verify_b_eq(false, a, b, __VA_ARGS__)
-#define expect_b_ne(a, b, ...)	verify_b_ne(false, a, b, __VA_ARGS__)
+#define expect_b_eq(a, b, ...) verify_b_eq(false, a, b, __VA_ARGS__)
+#define expect_b_ne(a, b, ...) verify_b_ne(false, a, b, __VA_ARGS__)
 
-#define expect_true(a, ...)	expect_b_eq(a, true, __VA_ARGS__)
-#define expect_false(a, ...)	expect_b_eq(a, false, __VA_ARGS__)
+#define expect_true(a, ...) expect_b_eq(a, true, __VA_ARGS__)
+#define expect_false(a, ...) expect_b_eq(a, false, __VA_ARGS__)
 
-#define verify_str_eq(may_abort, a, b, ...) do {			\
-	if (strcmp((a), (b)) != 0) {						\
-		char prefix[ASSERT_BUFSIZE];				\
-		char message[ASSERT_BUFSIZE];				\
-		malloc_snprintf(prefix, sizeof(prefix),			\
-		    "%s:%s:%d: Failed assertion: "			\
-		    "(%s) same as (%s) --> "				\
-		    "\"%s\" differs from \"%s\": ",			\
-		    __func__, __FILE__, __LINE__, #a, #b, a, b);	\
-		malloc_snprintf(message, sizeof(message), __VA_ARGS__);	\
-		p_test_fail(may_abort, prefix, message);		\
-	}								\
-} while (0)
+#define verify_str_eq(may_abort, a, b, ...)                                    \
+	do {                                                                   \
+		if (strcmp((a), (b)) != 0) {                                   \
+			char prefix[ASSERT_BUFSIZE];                           \
+			char message[ASSERT_BUFSIZE];                          \
+			malloc_snprintf(prefix, sizeof(prefix),                \
+			    "%s:%s:%d: Failed assertion: "                     \
+			    "(%s) same as (%s) --> "                           \
+			    "\"%s\" differs from \"%s\": ",                    \
+			    __func__, __FILE__, __LINE__, #a, #b, a, b);       \
+			malloc_snprintf(                                       \
+			    message, sizeof(message), __VA_ARGS__);            \
+			p_test_fail(may_abort, prefix, message);               \
+		}                                                              \
+	} while (0)
 
-#define verify_str_ne(may_abort, a, b, ...) do {			\
-	if (strcmp((a), (b)) == 0) {					\
-		char prefix[ASSERT_BUFSIZE];				\
-		char message[ASSERT_BUFSIZE];				\
-		malloc_snprintf(prefix, sizeof(prefix),			\
-		    "%s:%s:%d: Failed assertion: "			\
-		    "(%s) differs from (%s) --> "			\
-		    "\"%s\" same as \"%s\": ",				\
-		    __func__, __FILE__, __LINE__, #a, #b, a, b);	\
-		malloc_snprintf(message, sizeof(message), __VA_ARGS__);	\
-		p_test_fail(may_abort, prefix, message);		\
-	}								\
-} while (0)
+#define verify_str_ne(may_abort, a, b, ...)                                    \
+	do {                                                                   \
+		if (strcmp((a), (b)) == 0) {                                   \
+			char prefix[ASSERT_BUFSIZE];                           \
+			char message[ASSERT_BUFSIZE];                          \
+			malloc_snprintf(prefix, sizeof(prefix),                \
+			    "%s:%s:%d: Failed assertion: "                     \
+			    "(%s) differs from (%s) --> "                      \
+			    "\"%s\" same as \"%s\": ",                         \
+			    __func__, __FILE__, __LINE__, #a, #b, a, b);       \
+			malloc_snprintf(                                       \
+			    message, sizeof(message), __VA_ARGS__);            \
+			p_test_fail(may_abort, prefix, message);               \
+		}                                                              \
+	} while (0)
 
 #define expect_str_eq(a, b, ...) verify_str_eq(false, a, b, __VA_ARGS__)
 #define expect_str_ne(a, b, ...) verify_str_ne(false, a, b, __VA_ARGS__)
 
-#define verify_not_reached(may_abort, ...) do {				\
-	char prefix[ASSERT_BUFSIZE];					\
-	char message[ASSERT_BUFSIZE];					\
-	malloc_snprintf(prefix, sizeof(prefix),				\
-	    "%s:%s:%d: Unreachable code reached: ",			\
-	    __func__, __FILE__, __LINE__);				\
-	malloc_snprintf(message, sizeof(message), __VA_ARGS__);		\
-	p_test_fail(may_abort, prefix, message);			\
-} while (0)
+#define verify_not_reached(may_abort, ...)                                     \
+	do {                                                                   \
+		char prefix[ASSERT_BUFSIZE];                                   \
+		char message[ASSERT_BUFSIZE];                                  \
+		malloc_snprintf(prefix, sizeof(prefix),                        \
+		    "%s:%s:%d: Unreachable code reached: ", __func__,          \
+		    __FILE__, __LINE__);                                       \
+		malloc_snprintf(message, sizeof(message), __VA_ARGS__);        \
+		p_test_fail(may_abort, prefix, message);                       \
+	} while (0)
 
 #define expect_not_reached(...) verify_not_reached(false, __VA_ARGS__)
 
-#define assert_cmp(t, a, b, cmp, neg_cmp, pri, ...) verify_cmp(true,	\
-    t, a, b, cmp, neg_cmp, pri, __VA_ARGS__)
+#define assert_cmp(t, a, b, cmp, neg_cmp, pri, ...)                            \
+	verify_cmp(true, t, a, b, cmp, neg_cmp, pri, __VA_ARGS__)
 
-#define assert_ptr_eq(a, b, ...)	assert_cmp(void *, a, b, ==,	\
-    !=, "p", __VA_ARGS__)
-#define assert_ptr_ne(a, b, ...)	assert_cmp(void *, a, b, !=,	\
-    ==, "p", __VA_ARGS__)
-#define assert_ptr_null(a, ...)		assert_cmp(void *, a, NULL, ==,	\
-    !=, "p", __VA_ARGS__)
-#define assert_ptr_not_null(a, ...)	assert_cmp(void *, a, NULL, !=,	\
-    ==, "p", __VA_ARGS__)
+#define assert_ptr_eq(a, b, ...)                                               \
+	assert_cmp(void *, a, b, ==, !=, "p", __VA_ARGS__)
+#define assert_ptr_ne(a, b, ...)                                               \
+	assert_cmp(void *, a, b, !=, ==, "p", __VA_ARGS__)
+#define assert_ptr_null(a, ...)                                                \
+	assert_cmp(void *, a, NULL, ==, !=, "p", __VA_ARGS__)
+#define assert_ptr_not_null(a, ...)                                            \
+	assert_cmp(void *, a, NULL, !=, ==, "p", __VA_ARGS__)
 
-#define assert_c_eq(a, b, ...)	assert_cmp(char, a, b, ==, !=, "c", __VA_ARGS__)
-#define assert_c_ne(a, b, ...)	assert_cmp(char, a, b, !=, ==, "c", __VA_ARGS__)
-#define assert_c_lt(a, b, ...)	assert_cmp(char, a, b, <, >=, "c", __VA_ARGS__)
-#define assert_c_le(a, b, ...)	assert_cmp(char, a, b, <=, >, "c", __VA_ARGS__)
-#define assert_c_ge(a, b, ...)	assert_cmp(char, a, b, >=, <, "c", __VA_ARGS__)
-#define assert_c_gt(a, b, ...)	assert_cmp(char, a, b, >, <=, "c", __VA_ARGS__)
+#define assert_c_eq(a, b, ...) assert_cmp(char, a, b, ==, !=, "c", __VA_ARGS__)
+#define assert_c_ne(a, b, ...) assert_cmp(char, a, b, !=, ==, "c", __VA_ARGS__)
+#define assert_c_lt(a, b, ...) assert_cmp(char, a, b, <, >=, "c", __VA_ARGS__)
+#define assert_c_le(a, b, ...) assert_cmp(char, a, b, <=, >, "c", __VA_ARGS__)
+#define assert_c_ge(a, b, ...) assert_cmp(char, a, b, >=, <, "c", __VA_ARGS__)
+#define assert_c_gt(a, b, ...) assert_cmp(char, a, b, >, <=, "c", __VA_ARGS__)
 
-#define assert_x_eq(a, b, ...)	assert_cmp(int, a, b, ==, !=, "#x", __VA_ARGS__)
-#define assert_x_ne(a, b, ...)	assert_cmp(int, a, b, !=, ==, "#x", __VA_ARGS__)
-#define assert_x_lt(a, b, ...)	assert_cmp(int, a, b, <, >=, "#x", __VA_ARGS__)
-#define assert_x_le(a, b, ...)	assert_cmp(int, a, b, <=, >, "#x", __VA_ARGS__)
-#define assert_x_ge(a, b, ...)	assert_cmp(int, a, b, >=, <, "#x", __VA_ARGS__)
-#define assert_x_gt(a, b, ...)	assert_cmp(int, a, b, >, <=, "#x", __VA_ARGS__)
+#define assert_x_eq(a, b, ...) assert_cmp(int, a, b, ==, !=, "#x", __VA_ARGS__)
+#define assert_x_ne(a, b, ...) assert_cmp(int, a, b, !=, ==, "#x", __VA_ARGS__)
+#define assert_x_lt(a, b, ...) assert_cmp(int, a, b, <, >=, "#x", __VA_ARGS__)
+#define assert_x_le(a, b, ...) assert_cmp(int, a, b, <=, >, "#x", __VA_ARGS__)
+#define assert_x_ge(a, b, ...) assert_cmp(int, a, b, >=, <, "#x", __VA_ARGS__)
+#define assert_x_gt(a, b, ...) assert_cmp(int, a, b, >, <=, "#x", __VA_ARGS__)
 
-#define assert_d_eq(a, b, ...)	assert_cmp(int, a, b, ==, !=, "d", __VA_ARGS__)
-#define assert_d_ne(a, b, ...)	assert_cmp(int, a, b, !=, ==, "d", __VA_ARGS__)
-#define assert_d_lt(a, b, ...)	assert_cmp(int, a, b, <, >=, "d", __VA_ARGS__)
-#define assert_d_le(a, b, ...)	assert_cmp(int, a, b, <=, >, "d", __VA_ARGS__)
-#define assert_d_ge(a, b, ...)	assert_cmp(int, a, b, >=, <, "d", __VA_ARGS__)
-#define assert_d_gt(a, b, ...)	assert_cmp(int, a, b, >, <=, "d", __VA_ARGS__)
+#define assert_d_eq(a, b, ...) assert_cmp(int, a, b, ==, !=, "d", __VA_ARGS__)
+#define assert_d_ne(a, b, ...) assert_cmp(int, a, b, !=, ==, "d", __VA_ARGS__)
+#define assert_d_lt(a, b, ...) assert_cmp(int, a, b, <, >=, "d", __VA_ARGS__)
+#define assert_d_le(a, b, ...) assert_cmp(int, a, b, <=, >, "d", __VA_ARGS__)
+#define assert_d_ge(a, b, ...) assert_cmp(int, a, b, >=, <, "d", __VA_ARGS__)
+#define assert_d_gt(a, b, ...) assert_cmp(int, a, b, >, <=, "d", __VA_ARGS__)
 
-#define assert_u_eq(a, b, ...)	assert_cmp(int, a, b, ==, !=, "u", __VA_ARGS__)
-#define assert_u_ne(a, b, ...)	assert_cmp(int, a, b, !=, ==, "u", __VA_ARGS__)
-#define assert_u_lt(a, b, ...)	assert_cmp(int, a, b, <, >=, "u", __VA_ARGS__)
-#define assert_u_le(a, b, ...)	assert_cmp(int, a, b, <=, >, "u", __VA_ARGS__)
-#define assert_u_ge(a, b, ...)	assert_cmp(int, a, b, >=, <, "u", __VA_ARGS__)
-#define assert_u_gt(a, b, ...)	assert_cmp(int, a, b, >, <=, "u", __VA_ARGS__)
+#define assert_u_eq(a, b, ...) assert_cmp(int, a, b, ==, !=, "u", __VA_ARGS__)
+#define assert_u_ne(a, b, ...) assert_cmp(int, a, b, !=, ==, "u", __VA_ARGS__)
+#define assert_u_lt(a, b, ...) assert_cmp(int, a, b, <, >=, "u", __VA_ARGS__)
+#define assert_u_le(a, b, ...) assert_cmp(int, a, b, <=, >, "u", __VA_ARGS__)
+#define assert_u_ge(a, b, ...) assert_cmp(int, a, b, >=, <, "u", __VA_ARGS__)
+#define assert_u_gt(a, b, ...) assert_cmp(int, a, b, >, <=, "u", __VA_ARGS__)
 
-#define assert_ld_eq(a, b, ...)	assert_cmp(long, a, b, ==,	\
-    !=, "ld", __VA_ARGS__)
-#define assert_ld_ne(a, b, ...)	assert_cmp(long, a, b, !=,	\
-    ==, "ld", __VA_ARGS__)
-#define assert_ld_lt(a, b, ...)	assert_cmp(long, a, b, <,	\
-    >=, "ld", __VA_ARGS__)
-#define assert_ld_le(a, b, ...)	assert_cmp(long, a, b, <=,	\
-    >, "ld", __VA_ARGS__)
-#define assert_ld_ge(a, b, ...)	assert_cmp(long, a, b, >=,	\
-    <, "ld", __VA_ARGS__)
-#define assert_ld_gt(a, b, ...)	assert_cmp(long, a, b, >,	\
-    <=, "ld", __VA_ARGS__)
+#define assert_ld_eq(a, b, ...)                                                \
+	assert_cmp(long, a, b, ==, !=, "ld", __VA_ARGS__)
+#define assert_ld_ne(a, b, ...)                                                \
+	assert_cmp(long, a, b, !=, ==, "ld", __VA_ARGS__)
+#define assert_ld_lt(a, b, ...) assert_cmp(long, a, b, <, >=, "ld", __VA_ARGS__)
+#define assert_ld_le(a, b, ...) assert_cmp(long, a, b, <=, >, "ld", __VA_ARGS__)
+#define assert_ld_ge(a, b, ...) assert_cmp(long, a, b, >=, <, "ld", __VA_ARGS__)
+#define assert_ld_gt(a, b, ...) assert_cmp(long, a, b, >, <=, "ld", __VA_ARGS__)
 
-#define assert_lu_eq(a, b, ...)	assert_cmp(unsigned long,	\
-    a, b, ==, !=, "lu", __VA_ARGS__)
-#define assert_lu_ne(a, b, ...)	assert_cmp(unsigned long,	\
-    a, b, !=, ==, "lu", __VA_ARGS__)
-#define assert_lu_lt(a, b, ...)	assert_cmp(unsigned long,	\
-    a, b, <, >=, "lu", __VA_ARGS__)
-#define assert_lu_le(a, b, ...)	assert_cmp(unsigned long,	\
-    a, b, <=, >, "lu", __VA_ARGS__)
-#define assert_lu_ge(a, b, ...)	assert_cmp(unsigned long,	\
-    a, b, >=, <, "lu", __VA_ARGS__)
-#define assert_lu_gt(a, b, ...)	assert_cmp(unsigned long,	\
-    a, b, >, <=, "lu", __VA_ARGS__)
+#define assert_lu_eq(a, b, ...)                                                \
+	assert_cmp(unsigned long, a, b, ==, !=, "lu", __VA_ARGS__)
+#define assert_lu_ne(a, b, ...)                                                \
+	assert_cmp(unsigned long, a, b, !=, ==, "lu", __VA_ARGS__)
+#define assert_lu_lt(a, b, ...)                                                \
+	assert_cmp(unsigned long, a, b, <, >=, "lu", __VA_ARGS__)
+#define assert_lu_le(a, b, ...)                                                \
+	assert_cmp(unsigned long, a, b, <=, >, "lu", __VA_ARGS__)
+#define assert_lu_ge(a, b, ...)                                                \
+	assert_cmp(unsigned long, a, b, >=, <, "lu", __VA_ARGS__)
+#define assert_lu_gt(a, b, ...)                                                \
+	assert_cmp(unsigned long, a, b, >, <=, "lu", __VA_ARGS__)
 
-#define assert_qd_eq(a, b, ...)	assert_cmp(long long, a, b, ==,	\
-    !=, "qd", __VA_ARGS__)
-#define assert_qd_ne(a, b, ...)	assert_cmp(long long, a, b, !=,	\
-    ==, "qd", __VA_ARGS__)
-#define assert_qd_lt(a, b, ...)	assert_cmp(long long, a, b, <,	\
-    >=, "qd", __VA_ARGS__)
-#define assert_qd_le(a, b, ...)	assert_cmp(long long, a, b, <=,	\
-    >, "qd", __VA_ARGS__)
-#define assert_qd_ge(a, b, ...)	assert_cmp(long long, a, b, >=,	\
-    <, "qd", __VA_ARGS__)
-#define assert_qd_gt(a, b, ...)	assert_cmp(long long, a, b, >,	\
-    <=, "qd", __VA_ARGS__)
+#define assert_qd_eq(a, b, ...)                                                \
+	assert_cmp(long long, a, b, ==, !=, "qd", __VA_ARGS__)
+#define assert_qd_ne(a, b, ...)                                                \
+	assert_cmp(long long, a, b, !=, ==, "qd", __VA_ARGS__)
+#define assert_qd_lt(a, b, ...)                                                \
+	assert_cmp(long long, a, b, <, >=, "qd", __VA_ARGS__)
+#define assert_qd_le(a, b, ...)                                                \
+	assert_cmp(long long, a, b, <=, >, "qd", __VA_ARGS__)
+#define assert_qd_ge(a, b, ...)                                                \
+	assert_cmp(long long, a, b, >=, <, "qd", __VA_ARGS__)
+#define assert_qd_gt(a, b, ...)                                                \
+	assert_cmp(long long, a, b, >, <=, "qd", __VA_ARGS__)
 
-#define assert_qu_eq(a, b, ...)	assert_cmp(unsigned long long,	\
-    a, b, ==, !=, "qu", __VA_ARGS__)
-#define assert_qu_ne(a, b, ...)	assert_cmp(unsigned long long,	\
-    a, b, !=, ==, "qu", __VA_ARGS__)
-#define assert_qu_lt(a, b, ...)	assert_cmp(unsigned long long,	\
-    a, b, <, >=, "qu", __VA_ARGS__)
-#define assert_qu_le(a, b, ...)	assert_cmp(unsigned long long,	\
-    a, b, <=, >, "qu", __VA_ARGS__)
-#define assert_qu_ge(a, b, ...)	assert_cmp(unsigned long long,	\
-    a, b, >=, <, "qu", __VA_ARGS__)
-#define assert_qu_gt(a, b, ...)	assert_cmp(unsigned long long,	\
-    a, b, >, <=, "qu", __VA_ARGS__)
+#define assert_qu_eq(a, b, ...)                                                \
+	assert_cmp(unsigned long long, a, b, ==, !=, "qu", __VA_ARGS__)
+#define assert_qu_ne(a, b, ...)                                                \
+	assert_cmp(unsigned long long, a, b, !=, ==, "qu", __VA_ARGS__)
+#define assert_qu_lt(a, b, ...)                                                \
+	assert_cmp(unsigned long long, a, b, <, >=, "qu", __VA_ARGS__)
+#define assert_qu_le(a, b, ...)                                                \
+	assert_cmp(unsigned long long, a, b, <=, >, "qu", __VA_ARGS__)
+#define assert_qu_ge(a, b, ...)                                                \
+	assert_cmp(unsigned long long, a, b, >=, <, "qu", __VA_ARGS__)
+#define assert_qu_gt(a, b, ...)                                                \
+	assert_cmp(unsigned long long, a, b, >, <=, "qu", __VA_ARGS__)
 
-#define assert_jd_eq(a, b, ...)	assert_cmp(intmax_t, a, b, ==,	\
-    !=, "jd", __VA_ARGS__)
-#define assert_jd_ne(a, b, ...)	assert_cmp(intmax_t, a, b, !=,	\
-    ==, "jd", __VA_ARGS__)
-#define assert_jd_lt(a, b, ...)	assert_cmp(intmax_t, a, b, <,	\
-    >=, "jd", __VA_ARGS__)
-#define assert_jd_le(a, b, ...)	assert_cmp(intmax_t, a, b, <=,	\
-    >, "jd", __VA_ARGS__)
-#define assert_jd_ge(a, b, ...)	assert_cmp(intmax_t, a, b, >=,	\
-    <, "jd", __VA_ARGS__)
-#define assert_jd_gt(a, b, ...)	assert_cmp(intmax_t, a, b, >,	\
-    <=, "jd", __VA_ARGS__)
+#define assert_jd_eq(a, b, ...)                                                \
+	assert_cmp(intmax_t, a, b, ==, !=, "jd", __VA_ARGS__)
+#define assert_jd_ne(a, b, ...)                                                \
+	assert_cmp(intmax_t, a, b, !=, ==, "jd", __VA_ARGS__)
+#define assert_jd_lt(a, b, ...)                                                \
+	assert_cmp(intmax_t, a, b, <, >=, "jd", __VA_ARGS__)
+#define assert_jd_le(a, b, ...)                                                \
+	assert_cmp(intmax_t, a, b, <=, >, "jd", __VA_ARGS__)
+#define assert_jd_ge(a, b, ...)                                                \
+	assert_cmp(intmax_t, a, b, >=, <, "jd", __VA_ARGS__)
+#define assert_jd_gt(a, b, ...)                                                \
+	assert_cmp(intmax_t, a, b, >, <=, "jd", __VA_ARGS__)
 
-#define assert_ju_eq(a, b, ...)	assert_cmp(uintmax_t, a, b, ==,	\
-    !=, "ju", __VA_ARGS__)
-#define assert_ju_ne(a, b, ...)	assert_cmp(uintmax_t, a, b, !=,	\
-    ==, "ju", __VA_ARGS__)
-#define assert_ju_lt(a, b, ...)	assert_cmp(uintmax_t, a, b, <,	\
-    >=, "ju", __VA_ARGS__)
-#define assert_ju_le(a, b, ...)	assert_cmp(uintmax_t, a, b, <=,	\
-    >, "ju", __VA_ARGS__)
-#define assert_ju_ge(a, b, ...)	assert_cmp(uintmax_t, a, b, >=,	\
-    <, "ju", __VA_ARGS__)
-#define assert_ju_gt(a, b, ...)	assert_cmp(uintmax_t, a, b, >,	\
-    <=, "ju", __VA_ARGS__)
+#define assert_ju_eq(a, b, ...)                                                \
+	assert_cmp(uintmax_t, a, b, ==, !=, "ju", __VA_ARGS__)
+#define assert_ju_ne(a, b, ...)                                                \
+	assert_cmp(uintmax_t, a, b, !=, ==, "ju", __VA_ARGS__)
+#define assert_ju_lt(a, b, ...)                                                \
+	assert_cmp(uintmax_t, a, b, <, >=, "ju", __VA_ARGS__)
+#define assert_ju_le(a, b, ...)                                                \
+	assert_cmp(uintmax_t, a, b, <=, >, "ju", __VA_ARGS__)
+#define assert_ju_ge(a, b, ...)                                                \
+	assert_cmp(uintmax_t, a, b, >=, <, "ju", __VA_ARGS__)
+#define assert_ju_gt(a, b, ...)                                                \
+	assert_cmp(uintmax_t, a, b, >, <=, "ju", __VA_ARGS__)
 
-#define assert_zd_eq(a, b, ...)	assert_cmp(ssize_t, a, b, ==,	\
-    !=, "zd", __VA_ARGS__)
-#define assert_zd_ne(a, b, ...)	assert_cmp(ssize_t, a, b, !=,	\
-    ==, "zd", __VA_ARGS__)
-#define assert_zd_lt(a, b, ...)	assert_cmp(ssize_t, a, b, <,	\
-    >=, "zd", __VA_ARGS__)
-#define assert_zd_le(a, b, ...)	assert_cmp(ssize_t, a, b, <=,	\
-    >, "zd", __VA_ARGS__)
-#define assert_zd_ge(a, b, ...)	assert_cmp(ssize_t, a, b, >=,	\
-    <, "zd", __VA_ARGS__)
-#define assert_zd_gt(a, b, ...)	assert_cmp(ssize_t, a, b, >,	\
-    <=, "zd", __VA_ARGS__)
+#define assert_zd_eq(a, b, ...)                                                \
+	assert_cmp(ssize_t, a, b, ==, !=, "zd", __VA_ARGS__)
+#define assert_zd_ne(a, b, ...)                                                \
+	assert_cmp(ssize_t, a, b, !=, ==, "zd", __VA_ARGS__)
+#define assert_zd_lt(a, b, ...)                                                \
+	assert_cmp(ssize_t, a, b, <, >=, "zd", __VA_ARGS__)
+#define assert_zd_le(a, b, ...)                                                \
+	assert_cmp(ssize_t, a, b, <=, >, "zd", __VA_ARGS__)
+#define assert_zd_ge(a, b, ...)                                                \
+	assert_cmp(ssize_t, a, b, >=, <, "zd", __VA_ARGS__)
+#define assert_zd_gt(a, b, ...)                                                \
+	assert_cmp(ssize_t, a, b, >, <=, "zd", __VA_ARGS__)
 
-#define assert_zu_eq(a, b, ...)	assert_cmp(size_t, a, b, ==,	\
-    !=, "zu", __VA_ARGS__)
-#define assert_zu_ne(a, b, ...)	assert_cmp(size_t, a, b, !=,	\
-    ==, "zu", __VA_ARGS__)
-#define assert_zu_lt(a, b, ...)	assert_cmp(size_t, a, b, <,	\
-    >=, "zu", __VA_ARGS__)
-#define assert_zu_le(a, b, ...)	assert_cmp(size_t, a, b, <=,	\
-    >, "zu", __VA_ARGS__)
-#define assert_zu_ge(a, b, ...)	assert_cmp(size_t, a, b, >=,	\
-    <, "zu", __VA_ARGS__)
-#define assert_zu_gt(a, b, ...)	assert_cmp(size_t, a, b, >,	\
-    <=, "zu", __VA_ARGS__)
+#define assert_zu_eq(a, b, ...)                                                \
+	assert_cmp(size_t, a, b, ==, !=, "zu", __VA_ARGS__)
+#define assert_zu_ne(a, b, ...)                                                \
+	assert_cmp(size_t, a, b, !=, ==, "zu", __VA_ARGS__)
+#define assert_zu_lt(a, b, ...)                                                \
+	assert_cmp(size_t, a, b, <, >=, "zu", __VA_ARGS__)
+#define assert_zu_le(a, b, ...)                                                \
+	assert_cmp(size_t, a, b, <=, >, "zu", __VA_ARGS__)
+#define assert_zu_ge(a, b, ...)                                                \
+	assert_cmp(size_t, a, b, >=, <, "zu", __VA_ARGS__)
+#define assert_zu_gt(a, b, ...)                                                \
+	assert_cmp(size_t, a, b, >, <=, "zu", __VA_ARGS__)
 
-#define assert_d32_eq(a, b, ...)	assert_cmp(int32_t, a, b, ==,	\
-    !=, FMTd32, __VA_ARGS__)
-#define assert_d32_ne(a, b, ...)	assert_cmp(int32_t, a, b, !=,	\
-    ==, FMTd32, __VA_ARGS__)
-#define assert_d32_lt(a, b, ...)	assert_cmp(int32_t, a, b, <,	\
-    >=, FMTd32, __VA_ARGS__)
-#define assert_d32_le(a, b, ...)	assert_cmp(int32_t, a, b, <=,	\
-    >, FMTd32, __VA_ARGS__)
-#define assert_d32_ge(a, b, ...)	assert_cmp(int32_t, a, b, >=,	\
-    <, FMTd32, __VA_ARGS__)
-#define assert_d32_gt(a, b, ...)	assert_cmp(int32_t, a, b, >,	\
-    <=, FMTd32, __VA_ARGS__)
+#define assert_d32_eq(a, b, ...)                                               \
+	assert_cmp(int32_t, a, b, ==, !=, FMTd32, __VA_ARGS__)
+#define assert_d32_ne(a, b, ...)                                               \
+	assert_cmp(int32_t, a, b, !=, ==, FMTd32, __VA_ARGS__)
+#define assert_d32_lt(a, b, ...)                                               \
+	assert_cmp(int32_t, a, b, <, >=, FMTd32, __VA_ARGS__)
+#define assert_d32_le(a, b, ...)                                               \
+	assert_cmp(int32_t, a, b, <=, >, FMTd32, __VA_ARGS__)
+#define assert_d32_ge(a, b, ...)                                               \
+	assert_cmp(int32_t, a, b, >=, <, FMTd32, __VA_ARGS__)
+#define assert_d32_gt(a, b, ...)                                               \
+	assert_cmp(int32_t, a, b, >, <=, FMTd32, __VA_ARGS__)
 
-#define assert_u32_eq(a, b, ...)	assert_cmp(uint32_t, a, b, ==,	\
-    !=, FMTu32, __VA_ARGS__)
-#define assert_u32_ne(a, b, ...)	assert_cmp(uint32_t, a, b, !=,	\
-    ==, FMTu32, __VA_ARGS__)
-#define assert_u32_lt(a, b, ...)	assert_cmp(uint32_t, a, b, <,	\
-    >=, FMTu32, __VA_ARGS__)
-#define assert_u32_le(a, b, ...)	assert_cmp(uint32_t, a, b, <=,	\
-    >, FMTu32, __VA_ARGS__)
-#define assert_u32_ge(a, b, ...)	assert_cmp(uint32_t, a, b, >=,	\
-    <, FMTu32, __VA_ARGS__)
-#define assert_u32_gt(a, b, ...)	assert_cmp(uint32_t, a, b, >,	\
-    <=, FMTu32, __VA_ARGS__)
+#define assert_u32_eq(a, b, ...)                                               \
+	assert_cmp(uint32_t, a, b, ==, !=, FMTu32, __VA_ARGS__)
+#define assert_u32_ne(a, b, ...)                                               \
+	assert_cmp(uint32_t, a, b, !=, ==, FMTu32, __VA_ARGS__)
+#define assert_u32_lt(a, b, ...)                                               \
+	assert_cmp(uint32_t, a, b, <, >=, FMTu32, __VA_ARGS__)
+#define assert_u32_le(a, b, ...)                                               \
+	assert_cmp(uint32_t, a, b, <=, >, FMTu32, __VA_ARGS__)
+#define assert_u32_ge(a, b, ...)                                               \
+	assert_cmp(uint32_t, a, b, >=, <, FMTu32, __VA_ARGS__)
+#define assert_u32_gt(a, b, ...)                                               \
+	assert_cmp(uint32_t, a, b, >, <=, FMTu32, __VA_ARGS__)
 
-#define assert_d64_eq(a, b, ...)	assert_cmp(int64_t, a, b, ==,	\
-    !=, FMTd64, __VA_ARGS__)
-#define assert_d64_ne(a, b, ...)	assert_cmp(int64_t, a, b, !=,	\
-    ==, FMTd64, __VA_ARGS__)
-#define assert_d64_lt(a, b, ...)	assert_cmp(int64_t, a, b, <,	\
-    >=, FMTd64, __VA_ARGS__)
-#define assert_d64_le(a, b, ...)	assert_cmp(int64_t, a, b, <=,	\
-    >, FMTd64, __VA_ARGS__)
-#define assert_d64_ge(a, b, ...)	assert_cmp(int64_t, a, b, >=,	\
-    <, FMTd64, __VA_ARGS__)
-#define assert_d64_gt(a, b, ...)	assert_cmp(int64_t, a, b, >,	\
-    <=, FMTd64, __VA_ARGS__)
+#define assert_d64_eq(a, b, ...)                                               \
+	assert_cmp(int64_t, a, b, ==, !=, FMTd64, __VA_ARGS__)
+#define assert_d64_ne(a, b, ...)                                               \
+	assert_cmp(int64_t, a, b, !=, ==, FMTd64, __VA_ARGS__)
+#define assert_d64_lt(a, b, ...)                                               \
+	assert_cmp(int64_t, a, b, <, >=, FMTd64, __VA_ARGS__)
+#define assert_d64_le(a, b, ...)                                               \
+	assert_cmp(int64_t, a, b, <=, >, FMTd64, __VA_ARGS__)
+#define assert_d64_ge(a, b, ...)                                               \
+	assert_cmp(int64_t, a, b, >=, <, FMTd64, __VA_ARGS__)
+#define assert_d64_gt(a, b, ...)                                               \
+	assert_cmp(int64_t, a, b, >, <=, FMTd64, __VA_ARGS__)
 
-#define assert_u64_eq(a, b, ...)	assert_cmp(uint64_t, a, b, ==,	\
-    !=, FMTu64, __VA_ARGS__)
-#define assert_u64_ne(a, b, ...)	assert_cmp(uint64_t, a, b, !=,	\
-    ==, FMTu64, __VA_ARGS__)
-#define assert_u64_lt(a, b, ...)	assert_cmp(uint64_t, a, b, <,	\
-    >=, FMTu64, __VA_ARGS__)
-#define assert_u64_le(a, b, ...)	assert_cmp(uint64_t, a, b, <=,	\
-    >, FMTu64, __VA_ARGS__)
-#define assert_u64_ge(a, b, ...)	assert_cmp(uint64_t, a, b, >=,	\
-    <, FMTu64, __VA_ARGS__)
-#define assert_u64_gt(a, b, ...)	assert_cmp(uint64_t, a, b, >,	\
-    <=, FMTu64, __VA_ARGS__)
+#define assert_u64_eq(a, b, ...)                                               \
+	assert_cmp(uint64_t, a, b, ==, !=, FMTu64, __VA_ARGS__)
+#define assert_u64_ne(a, b, ...)                                               \
+	assert_cmp(uint64_t, a, b, !=, ==, FMTu64, __VA_ARGS__)
+#define assert_u64_lt(a, b, ...)                                               \
+	assert_cmp(uint64_t, a, b, <, >=, FMTu64, __VA_ARGS__)
+#define assert_u64_le(a, b, ...)                                               \
+	assert_cmp(uint64_t, a, b, <=, >, FMTu64, __VA_ARGS__)
+#define assert_u64_ge(a, b, ...)                                               \
+	assert_cmp(uint64_t, a, b, >=, <, FMTu64, __VA_ARGS__)
+#define assert_u64_gt(a, b, ...)                                               \
+	assert_cmp(uint64_t, a, b, >, <=, FMTu64, __VA_ARGS__)
 
-#define assert_b_eq(a, b, ...)	verify_b_eq(true, a, b, __VA_ARGS__)
-#define assert_b_ne(a, b, ...)	verify_b_ne(true, a, b, __VA_ARGS__)
+#define assert_b_eq(a, b, ...) verify_b_eq(true, a, b, __VA_ARGS__)
+#define assert_b_ne(a, b, ...) verify_b_ne(true, a, b, __VA_ARGS__)
 
-#define assert_true(a, ...)	assert_b_eq(a, true, __VA_ARGS__)
-#define assert_false(a, ...)	assert_b_eq(a, false, __VA_ARGS__)
+#define assert_true(a, ...) assert_b_eq(a, true, __VA_ARGS__)
+#define assert_false(a, ...) assert_b_eq(a, false, __VA_ARGS__)
 
 #define assert_str_eq(a, b, ...) verify_str_eq(true, a, b, __VA_ARGS__)
 #define assert_str_ne(a, b, ...) verify_str_ne(true, a, b, __VA_ARGS__)
@@ -515,45 +516,42 @@ typedef enum {
 	test_status_count = 3
 } test_status_t;
 
-typedef void (test_t)(void);
+typedef void(test_t)(void);
 
-#define TEST_BEGIN(f)							\
-static void								\
-f(void) {								\
-	p_test_init(#f);
+#define TEST_BEGIN(f)                                                          \
+	static void f(void) {                                                  \
+		p_test_init(#f);
 
-#define TEST_END							\
-	goto label_test_end;						\
-label_test_end:								\
-	p_test_fini();							\
-}
+#define TEST_END                                                               \
+	goto label_test_end;                                                   \
+	label_test_end:                                                        \
+	p_test_fini();                                                         \
+	}
 
-#define test(...)							\
-	p_test(__VA_ARGS__, NULL)
+#define test(...) p_test(__VA_ARGS__, NULL)
 
-#define test_no_reentrancy(...)							\
-	p_test_no_reentrancy(__VA_ARGS__, NULL)
+#define test_no_reentrancy(...) p_test_no_reentrancy(__VA_ARGS__, NULL)
 
-#define test_no_malloc_init(...)					\
-	p_test_no_malloc_init(__VA_ARGS__, NULL)
+#define test_no_malloc_init(...) p_test_no_malloc_init(__VA_ARGS__, NULL)
 
-#define test_skip_if(e) do {						\
-	if (e) {							\
-		test_skip("%s:%s:%d: Test skipped: (%s)",		\
-		    __func__, __FILE__, __LINE__, #e);			\
-		goto label_test_end;					\
-	}								\
-} while (0)
+#define test_skip_if(e)                                                        \
+	do {                                                                   \
+		if (e) {                                                       \
+			test_skip("%s:%s:%d: Test skipped: (%s)", __func__,    \
+			    __FILE__, __LINE__, #e);                           \
+			goto label_test_end;                                   \
+		}                                                              \
+	} while (0)
 
 bool test_is_reentrant(void);
 
-void	test_skip(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
-void	test_fail(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
+void test_skip(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
+void test_fail(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
 
 /* For private use by macros. */
-test_status_t	p_test(test_t *t, ...);
-test_status_t	p_test_no_reentrancy(test_t *t, ...);
-test_status_t	p_test_no_malloc_init(test_t *t, ...);
-void	p_test_init(const char *name);
-void	p_test_fini(void);
-void	p_test_fail(bool may_abort, const char *prefix, const char *message);
+test_status_t p_test(test_t *t, ...);
+test_status_t p_test_no_reentrancy(test_t *t, ...);
+test_status_t p_test_no_malloc_init(test_t *t, ...);
+void          p_test_init(const char *name);
+void          p_test_fini(void);
+void p_test_fail(bool may_abort, const char *prefix, const char *message);
diff --git a/test/include/test/timer.h b/test/include/test/timer.h
index ace6191b..c1d59eb4 100644
--- a/test/include/test/timer.h
+++ b/test/include/test/timer.h
@@ -5,7 +5,7 @@ typedef struct {
 	nstime_t t1;
 } timedelta_t;
 
-void	timer_start(timedelta_t *timer);
-void	timer_stop(timedelta_t *timer);
-uint64_t	timer_usec(const timedelta_t *timer);
-void	timer_ratio(timedelta_t *a, timedelta_t *b, char *buf, size_t buflen);
+void     timer_start(timedelta_t *timer);
+void     timer_stop(timedelta_t *timer);
+uint64_t timer_usec(const timedelta_t *timer);
+void     timer_ratio(timedelta_t *a, timedelta_t *b, char *buf, size_t buflen);
diff --git a/test/integration/MALLOCX_ARENA.c b/test/integration/MALLOCX_ARENA.c
index 440ad9ef..c81566a8 100644
--- a/test/integration/MALLOCX_ARENA.c
+++ b/test/integration/MALLOCX_ARENA.c
@@ -6,27 +6,27 @@ void *
 thd_start(void *arg) {
 	unsigned thread_ind = (unsigned)(uintptr_t)arg;
 	unsigned arena_ind;
-	void *p;
-	size_t sz;
+	void    *p;
+	size_t   sz;
 
 	sz = sizeof(arena_ind);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
 	    0, "Error in arenas.create");
 
 	if (thread_ind % 4 != 3) {
-		size_t mib[3];
-		size_t miblen = sizeof(mib) / sizeof(size_t);
+		size_t      mib[3];
+		size_t      miblen = sizeof(mib) / sizeof(size_t);
 		const char *dss_precs[] = {"disabled", "primary", "secondary"};
-		unsigned prec_ind = thread_ind %
-		    (sizeof(dss_precs)/sizeof(char*));
+		unsigned    prec_ind = thread_ind
+		    % (sizeof(dss_precs) / sizeof(char *));
 		const char *dss = dss_precs[prec_ind];
 		int expected_err = (have_dss || prec_ind == 0) ? 0 : EFAULT;
 		expect_d_eq(mallctlnametomib("arena.0.dss", mib, &miblen), 0,
 		    "Error in mallctlnametomib()");
 		mib[1] = arena_ind;
 		expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&dss,
-		    sizeof(const char *)), expected_err,
-		    "Error in mallctlbymib()");
+		                sizeof(const char *)),
+		    expected_err, "Error in mallctlbymib()");
 	}
 
 	p = mallocx(1, MALLOCX_ARENA(arena_ind));
@@ -37,12 +37,11 @@ thd_start(void *arg) {
 }
 
 TEST_BEGIN(test_MALLOCX_ARENA) {
-	thd_t thds[NTHREADS];
+	thd_t    thds[NTHREADS];
 	unsigned i;
 
 	for (i = 0; i < NTHREADS; i++) {
-		thd_create(&thds[i], thd_start,
-		    (void *)(uintptr_t)i);
+		thd_create(&thds[i], thd_start, (void *)(uintptr_t)i);
 	}
 
 	for (i = 0; i < NTHREADS; i++) {
@@ -53,6 +52,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_MALLOCX_ARENA);
+	return test(test_MALLOCX_ARENA);
 }
diff --git a/test/integration/aligned_alloc.c b/test/integration/aligned_alloc.c
index b37d5ba0..1cf2a2f1 100644
--- a/test/integration/aligned_alloc.c
+++ b/test/integration/aligned_alloc.c
@@ -15,7 +15,7 @@ purge(void) {
 
 TEST_BEGIN(test_alignment_errors) {
 	size_t alignment;
-	void *p;
+	void  *p;
 
 	alignment = 0;
 	set_errno(0);
@@ -24,17 +24,15 @@ TEST_BEGIN(test_alignment_errors) {
 	    "Expected error for invalid alignment %zu", alignment);
 
 	for (alignment = sizeof(size_t); alignment < MAXALIGN;
-	    alignment <<= 1) {
+	     alignment <<= 1) {
 		set_errno(0);
 		p = aligned_alloc(alignment + 1, 1);
 		expect_false(p != NULL || get_errno() != EINVAL,
-		    "Expected error for invalid alignment %zu",
-		    alignment + 1);
+		    "Expected error for invalid alignment %zu", alignment + 1);
 	}
 }
 TEST_END
 
-
 /*
  * GCC "-Walloc-size-larger-than" warning detects when one of the memory
  * allocation functions is called with a size larger than the maximum size that
@@ -47,33 +45,31 @@ JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
 
 TEST_BEGIN(test_oom_errors) {
 	size_t alignment, size;
-	void *p;
+	void  *p;
 
 #if LG_SIZEOF_PTR == 3
 	alignment = UINT64_C(0x8000000000000000);
-	size      = UINT64_C(0x8000000000000000);
+	size = UINT64_C(0x8000000000000000);
 #else
 	alignment = 0x80000000LU;
-	size      = 0x80000000LU;
+	size = 0x80000000LU;
 #endif
 	set_errno(0);
 	p = aligned_alloc(alignment, size);
 	expect_false(p != NULL || get_errno() != ENOMEM,
-	    "Expected error for aligned_alloc(%zu, %zu)",
-	    alignment, size);
+	    "Expected error for aligned_alloc(%zu, %zu)", alignment, size);
 
 #if LG_SIZEOF_PTR == 3
 	alignment = UINT64_C(0x4000000000000000);
-	size      = UINT64_C(0xc000000000000001);
+	size = UINT64_C(0xc000000000000001);
 #else
 	alignment = 0x40000000LU;
-	size      = 0xc0000001LU;
+	size = 0xc0000001LU;
 #endif
 	set_errno(0);
 	p = aligned_alloc(alignment, size);
 	expect_false(p != NULL || get_errno() != ENOMEM,
-	    "Expected error for aligned_alloc(%zu, %zu)",
-	    alignment, size);
+	    "Expected error for aligned_alloc(%zu, %zu)", alignment, size);
 
 	alignment = 0x10LU;
 #if LG_SIZEOF_PTR == 3
@@ -84,8 +80,7 @@ TEST_BEGIN(test_oom_errors) {
 	set_errno(0);
 	p = aligned_alloc(alignment, size);
 	expect_false(p != NULL || get_errno() != ENOMEM,
-	    "Expected error for aligned_alloc(&p, %zu, %zu)",
-	    alignment, size);
+	    "Expected error for aligned_alloc(&p, %zu, %zu)", alignment, size);
 }
 TEST_END
 
@@ -94,21 +89,18 @@ JEMALLOC_DIAGNOSTIC_POP
 
 TEST_BEGIN(test_alignment_and_size) {
 #define NITER 4
-	size_t alignment, size, total;
+	size_t   alignment, size, total;
 	unsigned i;
-	void *ps[NITER];
+	void    *ps[NITER];
 
 	for (i = 0; i < NITER; i++) {
 		ps[i] = NULL;
 	}
 
-	for (alignment = 8;
-	    alignment <= MAXALIGN;
-	    alignment <<= 1) {
+	for (alignment = 8; alignment <= MAXALIGN; alignment <<= 1) {
 		total = 0;
-		for (size = 1;
-		    size < 3 * alignment && size < (1U << 31);
-		    size += (alignment >> (LG_SIZEOF_PTR-1)) - 1) {
+		for (size = 1; size < 3 * alignment && size < (1U << 31);
+		     size += (alignment >> (LG_SIZEOF_PTR - 1)) - 1) {
 			for (i = 0; i < NITER; i++) {
 				ps[i] = aligned_alloc(alignment, size);
 				if (ps[i] == NULL) {
@@ -149,9 +141,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_alignment_errors,
-	    test_oom_errors,
-	    test_alignment_and_size,
-	    test_zero_alloc);
+	return test(test_alignment_errors, test_oom_errors,
+	    test_alignment_and_size, test_zero_alloc);
 }
diff --git a/test/integration/allocated.c b/test/integration/allocated.c
index 967e0108..2c46d916 100644
--- a/test/integration/allocated.c
+++ b/test/integration/allocated.c
@@ -2,27 +2,27 @@
 
 void *
 thd_start(void *arg) {
-	int err;
-	void *p;
-	uint64_t a0, a1, d0, d1;
+	int       err;
+	void     *p;
+	uint64_t  a0, a1, d0, d1;
 	uint64_t *ap0, *ap1, *dp0, *dp1;
-	size_t sz, usize;
+	size_t    sz, usize;
 
 	sz = sizeof(a0);
 	if ((err = mallctl("thread.allocated", (void *)&a0, &sz, NULL, 0))) {
 		if (err == ENOENT) {
 			goto label_ENOENT;
 		}
-		test_fail("%s(): Error in mallctl(): %s", __func__,
-		    strerror(err));
+		test_fail(
+		    "%s(): Error in mallctl(): %s", __func__, strerror(err));
 	}
 	sz = sizeof(ap0);
 	if ((err = mallctl("thread.allocatedp", (void *)&ap0, &sz, NULL, 0))) {
 		if (err == ENOENT) {
 			goto label_ENOENT;
 		}
-		test_fail("%s(): Error in mallctl(): %s", __func__,
-		    strerror(err));
+		test_fail(
+		    "%s(): Error in mallctl(): %s", __func__, strerror(err));
 	}
 	expect_u64_eq(*ap0, a0,
 	    "\"thread.allocatedp\" should provide a pointer to internal "
@@ -33,17 +33,17 @@ thd_start(void *arg) {
 		if (err == ENOENT) {
 			goto label_ENOENT;
 		}
-		test_fail("%s(): Error in mallctl(): %s", __func__,
-		    strerror(err));
+		test_fail(
+		    "%s(): Error in mallctl(): %s", __func__, strerror(err));
 	}
 	sz = sizeof(dp0);
-	if ((err = mallctl("thread.deallocatedp", (void *)&dp0, &sz, NULL,
-	    0))) {
+	if ((err = mallctl(
+	         "thread.deallocatedp", (void *)&dp0, &sz, NULL, 0))) {
 		if (err == ENOENT) {
 			goto label_ENOENT;
 		}
-		test_fail("%s(): Error in mallctl(): %s", __func__,
-		    strerror(err));
+		test_fail(
+		    "%s(): Error in mallctl(): %s", __func__, strerror(err));
 	}
 	expect_u64_eq(*dp0, d0,
 	    "\"thread.deallocatedp\" should provide a pointer to internal "
@@ -107,10 +107,6 @@ TEST_END
 int
 main(void) {
 	/* Run tests multiple times to check for bad interactions. */
-	return test(
-	    test_main_thread,
-	    test_subthread,
-	    test_main_thread,
-	    test_subthread,
-	    test_main_thread);
+	return test(test_main_thread, test_subthread, test_main_thread,
+	    test_subthread, test_main_thread);
 }
diff --git a/test/integration/cpp/basic.cpp b/test/integration/cpp/basic.cpp
index c1cf6cd8..e0341176 100644
--- a/test/integration/cpp/basic.cpp
+++ b/test/integration/cpp/basic.cpp
@@ -19,6 +19,5 @@ TEST_END
 
 int
 main() {
-	return test(
-	    test_basic);
+	return test(test_basic);
 }
diff --git a/test/integration/cpp/infallible_new_false.cpp b/test/integration/cpp/infallible_new_false.cpp
index 42196d6a..5ba4f49e 100644
--- a/test/integration/cpp/infallible_new_false.cpp
+++ b/test/integration/cpp/infallible_new_false.cpp
@@ -17,7 +17,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_failing_alloc);
+	return test(test_failing_alloc);
 }
-
diff --git a/test/integration/cpp/infallible_new_true.cpp b/test/integration/cpp/infallible_new_true.cpp
index 3b2862bd..300bdd85 100644
--- a/test/integration/cpp/infallible_new_true.cpp
+++ b/test/integration/cpp/infallible_new_true.cpp
@@ -8,7 +8,8 @@
  */
 typedef void (*abort_hook_t)(const char *message);
 bool fake_abort_called;
-void fake_abort(const char *message) {
+void
+fake_abort(const char *message) {
 	const char *expected_start = "<jemalloc>: Allocation of size";
 	if (strncmp(message, expected_start, strlen(expected_start)) != 0) {
 		abort();
@@ -19,7 +20,7 @@ void fake_abort(const char *message) {
 static bool
 own_operator_new(void) {
 	uint64_t before, after;
-	size_t sz = sizeof(before);
+	size_t   sz = sizeof(before);
 
 	/* thread.allocated is always available, even w/o config_stats. */
 	expect_d_eq(mallctl("thread.allocated", (void *)&before, &sz, NULL, 0),
@@ -35,8 +36,8 @@ own_operator_new(void) {
 TEST_BEGIN(test_failing_alloc) {
 	abort_hook_t abort_hook = &fake_abort;
 	expect_d_eq(mallctl("experimental.hooks.safety_check_abort", NULL, NULL,
-	    (void *)&abort_hook, sizeof(abort_hook)), 0,
-	    "Unexpected mallctl failure setting abort hook");
+	                (void *)&abort_hook, sizeof(abort_hook)),
+	    0, "Unexpected mallctl failure setting abort hook");
 
 	/*
 	 * Not owning operator new is only expected to happen on MinGW which
@@ -61,6 +62,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_failing_alloc);
+	return test(test_failing_alloc);
 }
diff --git a/test/integration/extent.c b/test/integration/extent.c
index 7a028f18..c15bf761 100644
--- a/test/integration/extent.c
+++ b/test/integration/extent.c
@@ -6,26 +6,29 @@
 
 static void
 test_extent_body(unsigned arena_ind) {
-	void *p;
+	void  *p;
 	size_t large0, large1, large2, sz;
 	size_t purge_mib[3];
 	size_t purge_miblen;
-	int flags;
-	bool xallocx_success_a, xallocx_success_b, xallocx_success_c;
+	int    flags;
+	bool   xallocx_success_a, xallocx_success_b, xallocx_success_c;
 
 	flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
 
 	/* Get large size classes. */
 	sz = sizeof(size_t);
-	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL,
-	    0), 0, "Unexpected arenas.lextent.0.size failure");
-	expect_d_eq(mallctl("arenas.lextent.1.size", (void *)&large1, &sz, NULL,
-	    0), 0, "Unexpected arenas.lextent.1.size failure");
-	expect_d_eq(mallctl("arenas.lextent.2.size", (void *)&large2, &sz, NULL,
-	    0), 0, "Unexpected arenas.lextent.2.size failure");
+	expect_d_eq(
+	    mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL, 0), 0,
+	    "Unexpected arenas.lextent.0.size failure");
+	expect_d_eq(
+	    mallctl("arenas.lextent.1.size", (void *)&large1, &sz, NULL, 0), 0,
+	    "Unexpected arenas.lextent.1.size failure");
+	expect_d_eq(
+	    mallctl("arenas.lextent.2.size", (void *)&large2, &sz, NULL, 0), 0,
+	    "Unexpected arenas.lextent.2.size failure");
 
 	/* Test dalloc/decommit/purge cascade. */
-	purge_miblen = sizeof(purge_mib)/sizeof(size_t);
+	purge_miblen = sizeof(purge_mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("arena.0.purge", purge_mib, &purge_miblen),
 	    0, "Unexpected mallctlnametomib() failure");
 	purge_mib[1] = (size_t)arena_ind;
@@ -47,8 +50,8 @@ test_extent_body(unsigned arena_ind) {
 	if (xallocx_success_a) {
 		expect_true(called_dalloc, "Expected dalloc call");
 		expect_true(called_decommit, "Expected decommit call");
-		expect_true(did_purge_lazy || did_purge_forced,
-		    "Expected purge");
+		expect_true(
+		    did_purge_lazy || did_purge_forced, "Expected purge");
 		expect_true(called_split, "Expected split call");
 	}
 	dallocx(p, flags);
@@ -72,8 +75,8 @@ test_extent_body(unsigned arena_ind) {
 	}
 	xallocx_success_c = (xallocx(p, large0 * 2, 0, flags) == large0 * 2);
 	if (did_split) {
-		expect_b_eq(did_decommit, did_commit,
-		    "Expected decommit/commit match");
+		expect_b_eq(
+		    did_decommit, did_commit, "Expected decommit/commit match");
 	}
 	if (xallocx_success_b && xallocx_success_c) {
 		expect_true(did_merge, "Expected merge");
@@ -90,33 +93,34 @@ test_extent_body(unsigned arena_ind) {
 
 static void
 test_manual_hook_auto_arena(void) {
-	unsigned narenas;
-	size_t old_size, new_size, sz;
-	size_t hooks_mib[3];
-	size_t hooks_miblen;
+	unsigned        narenas;
+	size_t          old_size, new_size, sz;
+	size_t          hooks_mib[3];
+	size_t          hooks_miblen;
 	extent_hooks_t *new_hooks, *old_hooks;
 
 	extent_hooks_prep();
 
 	sz = sizeof(unsigned);
 	/* Get number of auto arenas. */
-	expect_d_eq(mallctl("opt.narenas", (void *)&narenas, &sz, NULL, 0),
-	    0, "Unexpected mallctl() failure");
+	expect_d_eq(mallctl("opt.narenas", (void *)&narenas, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
 	if (narenas == 1) {
 		return;
 	}
 
 	/* Install custom extent hooks on arena 1 (might not be initialized). */
-	hooks_miblen = sizeof(hooks_mib)/sizeof(size_t);
-	expect_d_eq(mallctlnametomib("arena.0.extent_hooks", hooks_mib,
-	    &hooks_miblen), 0, "Unexpected mallctlnametomib() failure");
+	hooks_miblen = sizeof(hooks_mib) / sizeof(size_t);
+	expect_d_eq(
+	    mallctlnametomib("arena.0.extent_hooks", hooks_mib, &hooks_miblen),
+	    0, "Unexpected mallctlnametomib() failure");
 	hooks_mib[1] = 1;
 	old_size = sizeof(extent_hooks_t *);
 	new_hooks = &hooks;
 	new_size = sizeof(extent_hooks_t *);
 	expect_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks,
-	    &old_size, (void *)&new_hooks, new_size), 0,
-	    "Unexpected extent_hooks error");
+	                &old_size, (void *)&new_hooks, new_size),
+	    0, "Unexpected extent_hooks error");
 	static bool auto_arena_created = false;
 	if (old_hooks != &hooks) {
 		expect_b_eq(auto_arena_created, false,
@@ -127,10 +131,10 @@ test_manual_hook_auto_arena(void) {
 
 static void
 test_manual_hook_body(void) {
-	unsigned arena_ind;
-	size_t old_size, new_size, sz;
-	size_t hooks_mib[3];
-	size_t hooks_miblen;
+	unsigned        arena_ind;
+	size_t          old_size, new_size, sz;
+	size_t          hooks_mib[3];
+	size_t          hooks_miblen;
 	extent_hooks_t *new_hooks, *old_hooks;
 
 	extent_hooks_prep();
@@ -140,16 +144,17 @@ test_manual_hook_body(void) {
 	    0, "Unexpected mallctl() failure");
 
 	/* Install custom extent hooks. */
-	hooks_miblen = sizeof(hooks_mib)/sizeof(size_t);
-	expect_d_eq(mallctlnametomib("arena.0.extent_hooks", hooks_mib,
-	    &hooks_miblen), 0, "Unexpected mallctlnametomib() failure");
+	hooks_miblen = sizeof(hooks_mib) / sizeof(size_t);
+	expect_d_eq(
+	    mallctlnametomib("arena.0.extent_hooks", hooks_mib, &hooks_miblen),
+	    0, "Unexpected mallctlnametomib() failure");
 	hooks_mib[1] = (size_t)arena_ind;
 	old_size = sizeof(extent_hooks_t *);
 	new_hooks = &hooks;
 	new_size = sizeof(extent_hooks_t *);
 	expect_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks,
-	    &old_size, (void *)&new_hooks, new_size), 0,
-	    "Unexpected extent_hooks error");
+	                &old_size, (void *)&new_hooks, new_size),
+	    0, "Unexpected extent_hooks error");
 	expect_ptr_ne(old_hooks->alloc, extent_alloc_hook,
 	    "Unexpected extent_hooks error");
 	expect_ptr_ne(old_hooks->dalloc, extent_dalloc_hook,
@@ -173,10 +178,13 @@ test_manual_hook_body(void) {
 
 	/* Restore extent hooks. */
 	expect_d_eq(mallctlbymib(hooks_mib, hooks_miblen, NULL, NULL,
-	    (void *)&old_hooks, new_size), 0, "Unexpected extent_hooks error");
+	                (void *)&old_hooks, new_size),
+	    0, "Unexpected extent_hooks error");
 	expect_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks,
-	    &old_size, NULL, 0), 0, "Unexpected extent_hooks error");
-	expect_ptr_eq(old_hooks, default_hooks, "Unexpected extent_hooks error");
+	                &old_size, NULL, 0),
+	    0, "Unexpected extent_hooks error");
+	expect_ptr_eq(
+	    old_hooks, default_hooks, "Unexpected extent_hooks error");
 	expect_ptr_eq(old_hooks->alloc, default_hooks->alloc,
 	    "Unexpected extent_hooks error");
 	expect_ptr_eq(old_hooks->dalloc, default_hooks->dalloc,
@@ -213,8 +221,8 @@ TEST_BEGIN(test_extent_manual_hook) {
 TEST_END
 
 TEST_BEGIN(test_extent_auto_hook) {
-	unsigned arena_ind;
-	size_t new_size, sz;
+	unsigned        arena_ind;
+	size_t          new_size, sz;
 	extent_hooks_t *new_hooks;
 
 	extent_hooks_prep();
@@ -223,7 +231,8 @@ TEST_BEGIN(test_extent_auto_hook) {
 	new_hooks = &hooks;
 	new_size = sizeof(extent_hooks_t *);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
-	    (void *)&new_hooks, new_size), 0, "Unexpected mallctl() failure");
+	                (void *)&new_hooks, new_size),
+	    0, "Unexpected mallctl() failure");
 
 	test_skip_if(is_background_thread_enabled());
 	test_extent_body(arena_ind);
@@ -231,19 +240,18 @@ TEST_BEGIN(test_extent_auto_hook) {
 TEST_END
 
 static void
-test_arenas_create_ext_base(arena_config_t config,
-	bool expect_hook_data, bool expect_hook_metadata)
-{
+test_arenas_create_ext_base(
+    arena_config_t config, bool expect_hook_data, bool expect_hook_metadata) {
 	unsigned arena, arena1;
-	void *ptr;
-	size_t sz = sizeof(unsigned);
+	void    *ptr;
+	size_t   sz = sizeof(unsigned);
 
 	extent_hooks_prep();
 
 	called_alloc = false;
-	expect_d_eq(mallctl("experimental.arenas_create_ext",
-	    (void *)&arena, &sz, &config, sizeof(arena_config_t)), 0,
-	    "Unexpected mallctl() failure");
+	expect_d_eq(mallctl("experimental.arenas_create_ext", (void *)&arena,
+	                &sz, &config, sizeof(arena_config_t)),
+	    0, "Unexpected mallctl() failure");
 	expect_b_eq(called_alloc, expect_hook_metadata,
 	    "expected hook metadata alloc mismatch");
 
@@ -279,9 +287,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_extent_manual_hook,
-	    test_extent_auto_hook,
+	return test(test_extent_manual_hook, test_extent_auto_hook,
 	    test_arenas_create_ext_with_ehooks_no_metadata,
 	    test_arenas_create_ext_with_ehooks_with_metadata);
 }
diff --git a/test/integration/malloc.c b/test/integration/malloc.c
index ef449163..a77e44a6 100644
--- a/test/integration/malloc.c
+++ b/test/integration/malloc.c
@@ -11,6 +11,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_zero_alloc);
+	return test(test_zero_alloc);
 }
diff --git a/test/integration/mallocx.c b/test/integration/mallocx.c
index fdf1e3f4..c7ed0fb9 100644
--- a/test/integration/mallocx.c
+++ b/test/integration/mallocx.c
@@ -3,7 +3,7 @@
 static unsigned
 get_nsizes_impl(const char *cmd) {
 	unsigned ret;
-	size_t z;
+	size_t   z;
 
 	z = sizeof(unsigned);
 	expect_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
@@ -25,12 +25,12 @@ get_size_impl(const char *cmd, size_t ind) {
 	size_t miblen = 4;
 
 	z = sizeof(size_t);
-	expect_d_eq(mallctlnametomib(cmd, mib, &miblen),
-	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
+	expect_d_eq(mallctlnametomib(cmd, mib, &miblen), 0,
+	    "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
 	mib[2] = ind;
 	z = sizeof(size_t);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
-	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0), 0,
+	    "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
 
 	return ret;
 }
@@ -64,36 +64,37 @@ JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
 TEST_BEGIN(test_overflow) {
 	size_t largemax;
 
-	largemax = get_large_size(get_nlarge()-1);
+	largemax = get_large_size(get_nlarge() - 1);
 
-	expect_ptr_null(mallocx(largemax+1, 0),
-	    "Expected OOM for mallocx(size=%#zx, 0)", largemax+1);
+	expect_ptr_null(mallocx(largemax + 1, 0),
+	    "Expected OOM for mallocx(size=%#zx, 0)", largemax + 1);
 
-	expect_ptr_null(mallocx(ZU(PTRDIFF_MAX)+1, 0),
-	    "Expected OOM for mallocx(size=%#zx, 0)", ZU(PTRDIFF_MAX)+1);
+	expect_ptr_null(mallocx(ZU(PTRDIFF_MAX) + 1, 0),
+	    "Expected OOM for mallocx(size=%#zx, 0)", ZU(PTRDIFF_MAX) + 1);
 
 	expect_ptr_null(mallocx(SIZE_T_MAX, 0),
 	    "Expected OOM for mallocx(size=%#zx, 0)", SIZE_T_MAX);
 
-	expect_ptr_null(mallocx(1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)),
+	expect_ptr_null(mallocx(1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX) + 1)),
 	    "Expected OOM for mallocx(size=1, MALLOCX_ALIGN(%#zx))",
-	    ZU(PTRDIFF_MAX)+1);
+	    ZU(PTRDIFF_MAX) + 1);
 }
 TEST_END
 
 static void *
 remote_alloc(void *arg) {
 	unsigned arena;
-	size_t sz = sizeof(unsigned);
+	size_t   sz = sizeof(unsigned);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 	size_t large_sz;
 	sz = sizeof(size_t);
-	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&large_sz, &sz,
-	    NULL, 0), 0, "Unexpected mallctl failure");
+	expect_d_eq(
+	    mallctl("arenas.lextent.0.size", (void *)&large_sz, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
 
-	void *ptr = mallocx(large_sz, MALLOCX_ARENA(arena)
-	    | MALLOCX_TCACHE_NONE);
+	void *ptr = mallocx(
+	    large_sz, MALLOCX_ARENA(arena) | MALLOCX_TCACHE_NONE);
 	void **ret = (void **)arg;
 	*ret = ptr;
 
@@ -114,16 +115,16 @@ TEST_BEGIN(test_remote_free) {
 TEST_END
 
 TEST_BEGIN(test_oom) {
-	size_t largemax;
-	bool oom;
-	void *ptrs[3];
+	size_t   largemax;
+	bool     oom;
+	void    *ptrs[3];
 	unsigned i;
 
 	/*
 	 * It should be impossible to allocate three objects that each consume
 	 * nearly half the virtual address space.
 	 */
-	largemax = get_large_size(get_nlarge()-1);
+	largemax = get_large_size(get_nlarge() - 1);
 	oom = false;
 	for (i = 0; i < sizeof(ptrs) / sizeof(void *); i++) {
 		ptrs[i] = mallocx(largemax, MALLOCX_ARENA(0));
@@ -143,10 +144,10 @@ TEST_BEGIN(test_oom) {
 
 #if LG_SIZEOF_PTR == 3
 	expect_ptr_null(mallocx(0x8000000000000000ULL,
-	    MALLOCX_ALIGN(0x8000000000000000ULL)),
+	                    MALLOCX_ALIGN(0x8000000000000000ULL)),
 	    "Expected OOM for mallocx()");
-	expect_ptr_null(mallocx(0x8000000000000000ULL,
-	    MALLOCX_ALIGN(0x80000000)),
+	expect_ptr_null(
+	    mallocx(0x8000000000000000ULL, MALLOCX_ALIGN(0x80000000)),
 	    "Expected OOM for mallocx()");
 #else
 	expect_ptr_null(mallocx(0x80000000UL, MALLOCX_ALIGN(0x80000000UL)),
@@ -164,20 +165,20 @@ TEST_BEGIN(test_basic) {
 
 	for (sz = 1; sz < MAXSZ; sz = nallocx(sz, 0) + 1) {
 		size_t nsz, rsz;
-		void *p;
+		void  *p;
 		nsz = nallocx(sz, 0);
 		expect_zu_ne(nsz, 0, "Unexpected nallocx() error");
 		p = mallocx(sz, 0);
-		expect_ptr_not_null(p,
-		    "Unexpected mallocx(size=%zx, flags=0) error", sz);
+		expect_ptr_not_null(
+		    p, "Unexpected mallocx(size=%zx, flags=0) error", sz);
 		rsz = sallocx(p, 0);
 		expect_zu_ge(rsz, sz, "Real size smaller than expected");
 		expect_zu_eq(nsz, rsz, "nallocx()/sallocx() size mismatch");
 		dallocx(p, 0);
 
 		p = mallocx(sz, 0);
-		expect_ptr_not_null(p,
-		    "Unexpected mallocx(size=%zx, flags=0) error", sz);
+		expect_ptr_not_null(
+		    p, "Unexpected mallocx(size=%zx, flags=0) error", sz);
 		dallocx(p, 0);
 
 		nsz = nallocx(sz, MALLOCX_ZERO);
@@ -197,53 +198,57 @@ TEST_END
 
 TEST_BEGIN(test_alignment_and_size) {
 	const char *percpu_arena;
-	size_t sz = sizeof(percpu_arena);
+	size_t      sz = sizeof(percpu_arena);
 
-	if(mallctl("opt.percpu_arena", (void *)&percpu_arena, &sz, NULL, 0) ||
-	    strcmp(percpu_arena, "disabled") != 0) {
-		test_skip("test_alignment_and_size skipped: "
+	if (mallctl("opt.percpu_arena", (void *)&percpu_arena, &sz, NULL, 0)
+	    || strcmp(percpu_arena, "disabled") != 0) {
+		test_skip(
+		    "test_alignment_and_size skipped: "
 		    "not working with percpu arena.");
 	};
 #define MAXALIGN (((size_t)1) << 23)
 #define NITER 4
-	size_t nsz, rsz, alignment, total;
+	size_t   nsz, rsz, alignment, total;
 	unsigned i;
-	void *ps[NITER];
+	void    *ps[NITER];
 
 	for (i = 0; i < NITER; i++) {
 		ps[i] = NULL;
 	}
 
-	for (alignment = 8;
-	    alignment <= MAXALIGN;
-	    alignment <<= 1) {
+	for (alignment = 8; alignment <= MAXALIGN; alignment <<= 1) {
 		total = 0;
-		for (sz = 1;
-		    sz < 3 * alignment && sz < (1U << 31);
-		    sz += (alignment >> (LG_SIZEOF_PTR-1)) - 1) {
+		for (sz = 1; sz < 3 * alignment && sz < (1U << 31);
+		     sz += (alignment >> (LG_SIZEOF_PTR - 1)) - 1) {
 			for (i = 0; i < NITER; i++) {
-				nsz = nallocx(sz, MALLOCX_ALIGN(alignment) |
-				    MALLOCX_ZERO | MALLOCX_ARENA(0));
+				nsz = nallocx(sz,
+				    MALLOCX_ALIGN(alignment) | MALLOCX_ZERO
+				        | MALLOCX_ARENA(0));
 				expect_zu_ne(nsz, 0,
 				    "nallocx() error for alignment=%zu, "
-				    "size=%zu (%#zx)", alignment, sz, sz);
-				ps[i] = mallocx(sz, MALLOCX_ALIGN(alignment) |
-				    MALLOCX_ZERO | MALLOCX_ARENA(0));
+				    "size=%zu (%#zx)",
+				    alignment, sz, sz);
+				ps[i] = mallocx(sz,
+				    MALLOCX_ALIGN(alignment) | MALLOCX_ZERO
+				        | MALLOCX_ARENA(0));
 				expect_ptr_not_null(ps[i],
 				    "mallocx() error for alignment=%zu, "
-				    "size=%zu (%#zx)", alignment, sz, sz);
+				    "size=%zu (%#zx)",
+				    alignment, sz, sz);
 				rsz = sallocx(ps[i], 0);
 				expect_zu_ge(rsz, sz,
 				    "Real size smaller than expected for "
-				    "alignment=%zu, size=%zu", alignment, sz);
+				    "alignment=%zu, size=%zu",
+				    alignment, sz);
 				expect_zu_eq(nsz, rsz,
 				    "nallocx()/sallocx() size mismatch for "
-				    "alignment=%zu, size=%zu", alignment, sz);
-				expect_ptr_null(
-				    (void *)((uintptr_t)ps[i] & (alignment-1)),
-				    "%p inadequately aligned for"
-				    " alignment=%zu, size=%zu", ps[i],
+				    "alignment=%zu, size=%zu",
 				    alignment, sz);
+				expect_ptr_null((void *)((uintptr_t)ps[i]
+				                    & (alignment - 1)),
+				    "%p inadequately aligned for"
+				    " alignment=%zu, size=%zu",
+				    ps[i], alignment, sz);
 				total += rsz;
 				if (total >= (MAXALIGN << 1)) {
 					break;
@@ -265,10 +270,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_overflow,
-	    test_oom,
-	    test_remote_free,
-	    test_basic,
+	return test(test_overflow, test_oom, test_remote_free, test_basic,
 	    test_alignment_and_size);
 }
diff --git a/test/integration/overflow.c b/test/integration/overflow.c
index ce63327c..17282e84 100644
--- a/test/integration/overflow.c
+++ b/test/integration/overflow.c
@@ -12,13 +12,14 @@ JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
 
 TEST_BEGIN(test_overflow) {
 	unsigned nlextents;
-	size_t mib[4];
-	size_t sz, miblen, max_size_class;
-	void *p;
+	size_t   mib[4];
+	size_t   sz, miblen, max_size_class;
+	void    *p;
 
 	sz = sizeof(unsigned);
-	expect_d_eq(mallctl("arenas.nlextents", (void *)&nlextents, &sz, NULL,
-	    0), 0, "Unexpected mallctl() error");
+	expect_d_eq(
+	    mallctl("arenas.nlextents", (void *)&nlextents, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() error");
 
 	miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("arenas.lextent.0.size", mib, &miblen), 0,
@@ -26,8 +27,9 @@ TEST_BEGIN(test_overflow) {
 	mib[2] = nlextents - 1;
 
 	sz = sizeof(size_t);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&max_size_class, &sz,
-	    NULL, 0), 0, "Unexpected mallctlbymib() error");
+	expect_d_eq(
+	    mallctlbymib(mib, miblen, (void *)&max_size_class, &sz, NULL, 0), 0,
+	    "Unexpected mallctlbymib() error");
 
 	expect_ptr_null(malloc(max_size_class + 1),
 	    "Expected OOM due to over-sized allocation request");
@@ -54,6 +56,5 @@ JEMALLOC_DIAGNOSTIC_POP
 
 int
 main(void) {
-	return test(
-	    test_overflow);
+	return test(test_overflow);
 }
diff --git a/test/integration/posix_memalign.c b/test/integration/posix_memalign.c
index 2da0549b..e0df56f3 100644
--- a/test/integration/posix_memalign.c
+++ b/test/integration/posix_memalign.c
@@ -15,48 +15,44 @@ purge(void) {
 
 TEST_BEGIN(test_alignment_errors) {
 	size_t alignment;
-	void *p;
+	void  *p;
 
 	for (alignment = 0; alignment < sizeof(void *); alignment++) {
 		expect_d_eq(posix_memalign(&p, alignment, 1), EINVAL,
-		    "Expected error for invalid alignment %zu",
-		    alignment);
+		    "Expected error for invalid alignment %zu", alignment);
 	}
 
 	for (alignment = sizeof(size_t); alignment < MAXALIGN;
-	    alignment <<= 1) {
+	     alignment <<= 1) {
 		expect_d_ne(posix_memalign(&p, alignment + 1, 1), 0,
-		    "Expected error for invalid alignment %zu",
-		    alignment + 1);
+		    "Expected error for invalid alignment %zu", alignment + 1);
 	}
 }
 TEST_END
 
 TEST_BEGIN(test_oom_errors) {
 	size_t alignment, size;
-	void *p;
+	void  *p;
 
 #if LG_SIZEOF_PTR == 3
 	alignment = UINT64_C(0x8000000000000000);
-	size      = UINT64_C(0x8000000000000000);
+	size = UINT64_C(0x8000000000000000);
 #else
 	alignment = 0x80000000LU;
-	size      = 0x80000000LU;
+	size = 0x80000000LU;
 #endif
 	expect_d_ne(posix_memalign(&p, alignment, size), 0,
-	    "Expected error for posix_memalign(&p, %zu, %zu)",
-	    alignment, size);
+	    "Expected error for posix_memalign(&p, %zu, %zu)", alignment, size);
 
 #if LG_SIZEOF_PTR == 3
 	alignment = UINT64_C(0x4000000000000000);
-	size      = UINT64_C(0xc000000000000001);
+	size = UINT64_C(0xc000000000000001);
 #else
 	alignment = 0x40000000LU;
-	size      = 0xc0000001LU;
+	size = 0xc0000001LU;
 #endif
 	expect_d_ne(posix_memalign(&p, alignment, size), 0,
-	    "Expected error for posix_memalign(&p, %zu, %zu)",
-	    alignment, size);
+	    "Expected error for posix_memalign(&p, %zu, %zu)", alignment, size);
 
 	alignment = 0x10LU;
 #if LG_SIZEOF_PTR == 3
@@ -65,33 +61,29 @@ TEST_BEGIN(test_oom_errors) {
 	size = 0xfffffff0LU;
 #endif
 	expect_d_ne(posix_memalign(&p, alignment, size), 0,
-	    "Expected error for posix_memalign(&p, %zu, %zu)",
-	    alignment, size);
+	    "Expected error for posix_memalign(&p, %zu, %zu)", alignment, size);
 }
 TEST_END
 
 TEST_BEGIN(test_alignment_and_size) {
 #define NITER 4
-	size_t alignment, size, total;
+	size_t   alignment, size, total;
 	unsigned i;
-	int err;
-	void *ps[NITER];
+	int      err;
+	void    *ps[NITER];
 
 	for (i = 0; i < NITER; i++) {
 		ps[i] = NULL;
 	}
 
-	for (alignment = 8;
-	    alignment <= MAXALIGN;
-	    alignment <<= 1) {
+	for (alignment = 8; alignment <= MAXALIGN; alignment <<= 1) {
 		total = 0;
-		for (size = 0;
-		    size < 3 * alignment && size < (1U << 31);
-		    size += ((size == 0) ? 1 :
-		    (alignment >> (LG_SIZEOF_PTR-1)) - 1)) {
+		for (size = 0; size < 3 * alignment && size < (1U << 31);
+		     size += ((size == 0)
+		             ? 1
+		             : (alignment >> (LG_SIZEOF_PTR - 1)) - 1)) {
 			for (i = 0; i < NITER; i++) {
-				err = posix_memalign(&ps[i],
-				    alignment, size);
+				err = posix_memalign(&ps[i], alignment, size);
 				if (err) {
 					char buf[BUFERROR_BUF];
 
@@ -122,7 +114,5 @@ TEST_END
 int
 main(void) {
 	return test(
-	    test_alignment_errors,
-	    test_oom_errors,
-	    test_alignment_and_size);
+	    test_alignment_errors, test_oom_errors, test_alignment_and_size);
 }
diff --git a/test/integration/rallocx.c b/test/integration/rallocx.c
index 85d9238b..8e822df7 100644
--- a/test/integration/rallocx.c
+++ b/test/integration/rallocx.c
@@ -3,7 +3,7 @@
 static unsigned
 get_nsizes_impl(const char *cmd) {
 	unsigned ret;
-	size_t z;
+	size_t   z;
 
 	z = sizeof(unsigned);
 	expect_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
@@ -25,12 +25,12 @@ get_size_impl(const char *cmd, size_t ind) {
 	size_t miblen = 4;
 
 	z = sizeof(size_t);
-	expect_d_eq(mallctlnametomib(cmd, mib, &miblen),
-	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
+	expect_d_eq(mallctlnametomib(cmd, mib, &miblen), 0,
+	    "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
 	mib[2] = ind;
 	z = sizeof(size_t);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
-	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0), 0,
+	    "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
 
 	return ret;
 }
@@ -58,25 +58,26 @@ TEST_BEGIN(test_grow_and_shrink) {
 	szs[0] = sallocx(p, 0);
 
 	for (i = 0; i < NCYCLES; i++) {
-		for (j = 1; j < NSZS && szs[j-1] < MAXSZ; j++) {
-			q = rallocx(p, szs[j-1]+1, 0);
+		for (j = 1; j < NSZS && szs[j - 1] < MAXSZ; j++) {
+			q = rallocx(p, szs[j - 1] + 1, 0);
 			expect_ptr_not_null(q,
 			    "Unexpected rallocx() error for size=%zu-->%zu",
-			    szs[j-1], szs[j-1]+1);
+			    szs[j - 1], szs[j - 1] + 1);
 			szs[j] = sallocx(q, 0);
-			expect_zu_ne(szs[j], szs[j-1]+1,
-			    "Expected size to be at least: %zu", szs[j-1]+1);
+			expect_zu_ne(szs[j], szs[j - 1] + 1,
+			    "Expected size to be at least: %zu",
+			    szs[j - 1] + 1);
 			p = q;
 		}
 
 		for (j--; j > 0; j--) {
-			q = rallocx(p, szs[j-1], 0);
+			q = rallocx(p, szs[j - 1], 0);
 			expect_ptr_not_null(q,
 			    "Unexpected rallocx() error for size=%zu-->%zu",
-			    szs[j], szs[j-1]);
+			    szs[j], szs[j - 1]);
 			tsz = sallocx(q, 0);
-			expect_zu_eq(tsz, szs[j-1],
-			    "Expected size=%zu, got size=%zu", szs[j-1], tsz);
+			expect_zu_eq(tsz, szs[j - 1],
+			    "Expected size=%zu, got size=%zu", szs[j - 1], tsz);
 			p = q;
 		}
 	}
@@ -99,11 +100,12 @@ validate_fill(void *p, uint8_t c, size_t offset, size_t len) {
 	size_t i;
 
 	for (i = 0; i < len; i++) {
-		uint8_t b = buf[offset+i];
+		uint8_t b = buf[offset + i];
 		if (b != c) {
-			test_fail("Allocation at %p (len=%zu) contains %#x "
-			    "rather than %#x at offset %zu", p, len, b, c,
-			    offset+i);
+			test_fail(
+			    "Allocation at %p (len=%zu) contains %#x "
+			    "rather than %#x at offset %zu",
+			    p, len, b, c, offset + i);
 			ret = true;
 		}
 	}
@@ -118,35 +120,37 @@ TEST_BEGIN(test_zero) {
 	 */
 	void *volatile p, *volatile q;
 	size_t psz, qsz, i, j;
-	size_t start_sizes[] = {1, 3*1024, 63*1024, 4095*1024};
+	size_t start_sizes[] = {1, 3 * 1024, 63 * 1024, 4095 * 1024};
 #define FILL_BYTE 0xaaU
 #define RANGE 2048
 
-	for (i = 0; i < sizeof(start_sizes)/sizeof(size_t); i++) {
+	for (i = 0; i < sizeof(start_sizes) / sizeof(size_t); i++) {
 		size_t start_size = start_sizes[i];
 		p = mallocx(start_size, MALLOCX_ZERO);
 		expect_ptr_not_null(p, "Unexpected mallocx() error");
 		psz = sallocx(p, 0);
 
-		expect_false(validate_fill(p, 0, 0, psz),
-		    "Expected zeroed memory");
+		expect_false(
+		    validate_fill(p, 0, 0, psz), "Expected zeroed memory");
 		memset(p, FILL_BYTE, psz);
 		expect_false(validate_fill(p, FILL_BYTE, 0, psz),
 		    "Expected filled memory");
 
 		for (j = 1; j < RANGE; j++) {
-			q = rallocx(p, start_size+j, MALLOCX_ZERO);
+			q = rallocx(p, start_size + j, MALLOCX_ZERO);
 			expect_ptr_not_null(q, "Unexpected rallocx() error");
 			qsz = sallocx(q, 0);
 			if (q != p || qsz != psz) {
-				expect_false(validate_fill(q, FILL_BYTE, 0,
-				    psz), "Expected filled memory");
-				expect_false(validate_fill(q, 0, psz, qsz-psz),
+				expect_false(
+				    validate_fill(q, FILL_BYTE, 0, psz),
+				    "Expected filled memory");
+				expect_false(
+				    validate_fill(q, 0, psz, qsz - psz),
 				    "Expected zeroed memory");
 			}
 			if (psz != qsz) {
-				memset((void *)((uintptr_t)q+psz), FILL_BYTE,
-				    qsz-psz);
+				memset((void *)((uintptr_t)q + psz), FILL_BYTE,
+				    qsz - psz);
 				psz = qsz;
 			}
 			p = q;
@@ -160,7 +164,7 @@ TEST_BEGIN(test_zero) {
 TEST_END
 
 TEST_BEGIN(test_align) {
-	void *p, *q;
+	void  *p, *q;
 	size_t align;
 #define MAX_ALIGN (ZU(1) << 25)
 
@@ -170,12 +174,10 @@ TEST_BEGIN(test_align) {
 
 	for (align <<= 1; align <= MAX_ALIGN; align <<= 1) {
 		q = rallocx(p, 1, MALLOCX_ALIGN(align));
-		expect_ptr_not_null(q,
-		    "Unexpected rallocx() error for align=%zu", align);
-		expect_ptr_null(
-		    (void *)((uintptr_t)q & (align-1)),
-		    "%p inadequately aligned for align=%zu",
-		    q, align);
+		expect_ptr_not_null(
+		    q, "Unexpected rallocx() error for align=%zu", align);
+		expect_ptr_null((void *)((uintptr_t)q & (align - 1)),
+		    "%p inadequately aligned for align=%zu", q, align);
 		p = q;
 	}
 	dallocx(p, 0);
@@ -191,19 +193,19 @@ TEST_BEGIN(test_align_enum) {
 		for (size_t lg_size = LG_MIN; lg_size <= LG_MAX; ++lg_size) {
 			size_t size = 1 << lg_size;
 			for (size_t lg_align_next = LG_MIN;
-			    lg_align_next <= LG_MAX; ++lg_align_next) {
-				int flags = MALLOCX_LG_ALIGN(lg_align);
+			     lg_align_next <= LG_MAX; ++lg_align_next) {
+				int   flags = MALLOCX_LG_ALIGN(lg_align);
 				void *p = mallocx(1, flags);
-				assert_ptr_not_null(p,
-				    "Unexpected mallocx() error");
+				assert_ptr_not_null(
+				    p, "Unexpected mallocx() error");
 				assert_zu_eq(nallocx(1, flags),
 				    TEST_MALLOC_SIZE(p),
 				    "Wrong mallocx() usable size");
-				int flags_next =
-				    MALLOCX_LG_ALIGN(lg_align_next);
+				int flags_next = MALLOCX_LG_ALIGN(
+				    lg_align_next);
 				p = rallocx(p, size, flags_next);
-				assert_ptr_not_null(p,
-				    "Unexpected rallocx() error");
+				assert_ptr_not_null(
+				    p, "Unexpected rallocx() error");
 				expect_zu_eq(nallocx(size, flags_next),
 				    TEST_MALLOC_SIZE(p),
 				    "Wrong rallocx() usable size");
@@ -223,20 +225,20 @@ TEST_BEGIN(test_lg_align_and_zero) {
 	 */
 	void *volatile p, *volatile q;
 	unsigned lg_align;
-	size_t sz;
+	size_t   sz;
 #define MAX_LG_ALIGN 25
 #define MAX_VALIDATE (ZU(1) << 22)
 
 	lg_align = 0;
-	p = mallocx(1, MALLOCX_LG_ALIGN(lg_align)|MALLOCX_ZERO);
+	p = mallocx(1, MALLOCX_LG_ALIGN(lg_align) | MALLOCX_ZERO);
 	expect_ptr_not_null(p, "Unexpected mallocx() error");
 
 	for (lg_align++; lg_align <= MAX_LG_ALIGN; lg_align++) {
-		q = rallocx(p, 1, MALLOCX_LG_ALIGN(lg_align)|MALLOCX_ZERO);
-		expect_ptr_not_null(q,
-		    "Unexpected rallocx() error for lg_align=%u", lg_align);
+		q = rallocx(p, 1, MALLOCX_LG_ALIGN(lg_align) | MALLOCX_ZERO);
+		expect_ptr_not_null(
+		    q, "Unexpected rallocx() error for lg_align=%u", lg_align);
 		expect_ptr_null(
-		    (void *)((uintptr_t)q & ((ZU(1) << lg_align)-1)),
+		    (void *)((uintptr_t)q & ((ZU(1) << lg_align) - 1)),
 		    "%p inadequately aligned for lg_align=%u", q, lg_align);
 		sz = sallocx(q, 0);
 		if ((sz << 1) <= MAX_VALIDATE) {
@@ -245,9 +247,10 @@ TEST_BEGIN(test_lg_align_and_zero) {
 		} else {
 			expect_false(validate_fill(q, 0, 0, MAX_VALIDATE),
 			    "Expected zeroed memory");
-			expect_false(validate_fill(
-			    (void *)((uintptr_t)q+sz-MAX_VALIDATE),
-			    0, 0, MAX_VALIDATE), "Expected zeroed memory");
+			expect_false(validate_fill((void *)((uintptr_t)q + sz
+			                               - MAX_VALIDATE),
+			                 0, 0, MAX_VALIDATE),
+			    "Expected zeroed memory");
 		}
 		p = q;
 	}
@@ -269,25 +272,25 @@ JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
 
 TEST_BEGIN(test_overflow) {
 	size_t largemax;
-	void *p;
+	void  *p;
 
-	largemax = get_large_size(get_nlarge()-1);
+	largemax = get_large_size(get_nlarge() - 1);
 
 	p = mallocx(1, 0);
 	expect_ptr_not_null(p, "Unexpected mallocx() failure");
 
-	expect_ptr_null(rallocx(p, largemax+1, 0),
-	    "Expected OOM for rallocx(p, size=%#zx, 0)", largemax+1);
+	expect_ptr_null(rallocx(p, largemax + 1, 0),
+	    "Expected OOM for rallocx(p, size=%#zx, 0)", largemax + 1);
 
-	expect_ptr_null(rallocx(p, ZU(PTRDIFF_MAX)+1, 0),
-	    "Expected OOM for rallocx(p, size=%#zx, 0)", ZU(PTRDIFF_MAX)+1);
+	expect_ptr_null(rallocx(p, ZU(PTRDIFF_MAX) + 1, 0),
+	    "Expected OOM for rallocx(p, size=%#zx, 0)", ZU(PTRDIFF_MAX) + 1);
 
 	expect_ptr_null(rallocx(p, SIZE_T_MAX, 0),
 	    "Expected OOM for rallocx(p, size=%#zx, 0)", SIZE_T_MAX);
 
-	expect_ptr_null(rallocx(p, 1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)),
+	expect_ptr_null(rallocx(p, 1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX) + 1)),
 	    "Expected OOM for rallocx(p, size=1, MALLOCX_ALIGN(%#zx))",
-	    ZU(PTRDIFF_MAX)+1);
+	    ZU(PTRDIFF_MAX) + 1);
 
 	dallocx(p, 0);
 }
@@ -298,11 +301,6 @@ JEMALLOC_DIAGNOSTIC_POP
 
 int
 main(void) {
-	return test(
-	    test_grow_and_shrink,
-	    test_zero,
-	    test_align,
-	    test_align_enum,
-	    test_lg_align_and_zero,
-	    test_overflow);
+	return test(test_grow_and_shrink, test_zero, test_align,
+	    test_align_enum, test_lg_align_and_zero, test_overflow);
 }
diff --git a/test/integration/sdallocx.c b/test/integration/sdallocx.c
index ca014485..ec2fb938 100644
--- a/test/integration/sdallocx.c
+++ b/test/integration/sdallocx.c
@@ -10,26 +10,23 @@ TEST_BEGIN(test_basic) {
 TEST_END
 
 TEST_BEGIN(test_alignment_and_size) {
-	size_t nsz, sz, alignment, total;
+	size_t   nsz, sz, alignment, total;
 	unsigned i;
-	void *ps[NITER];
+	void    *ps[NITER];
 
 	for (i = 0; i < NITER; i++) {
 		ps[i] = NULL;
 	}
 
-	for (alignment = 8;
-	    alignment <= MAXALIGN;
-	    alignment <<= 1) {
+	for (alignment = 8; alignment <= MAXALIGN; alignment <<= 1) {
 		total = 0;
-		for (sz = 1;
-		    sz < 3 * alignment && sz < (1U << 31);
-		    sz += (alignment >> (LG_SIZEOF_PTR-1)) - 1) {
+		for (sz = 1; sz < 3 * alignment && sz < (1U << 31);
+		     sz += (alignment >> (LG_SIZEOF_PTR - 1)) - 1) {
 			for (i = 0; i < NITER; i++) {
-				nsz = nallocx(sz, MALLOCX_ALIGN(alignment) |
-				    MALLOCX_ZERO);
-				ps[i] = mallocx(sz, MALLOCX_ALIGN(alignment) |
-				    MALLOCX_ZERO);
+				nsz = nallocx(sz,
+				    MALLOCX_ALIGN(alignment) | MALLOCX_ZERO);
+				ps[i] = mallocx(sz,
+				    MALLOCX_ALIGN(alignment) | MALLOCX_ZERO);
 				total += nsz;
 				if (total >= (MAXALIGN << 1)) {
 					break;
@@ -49,7 +46,5 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_basic,
-	    test_alignment_and_size);
+	return test_no_reentrancy(test_basic, test_alignment_and_size);
 }
diff --git a/test/integration/slab_sizes.c b/test/integration/slab_sizes.c
index f6a66f21..f1ff67aa 100644
--- a/test/integration/slab_sizes.c
+++ b/test/integration/slab_sizes.c
@@ -4,10 +4,10 @@
 
 TEST_BEGIN(test_slab_sizes) {
 	unsigned nbins;
-	size_t page;
-	size_t sizemib[4];
-	size_t slabmib[4];
-	size_t len;
+	size_t   page;
+	size_t   sizemib[4];
+	size_t   slabmib[4];
+	size_t   len;
 
 	len = sizeof(nbins);
 	expect_d_eq(mallctl("arenas.nbins", &nbins, &len, NULL, 0), 0,
@@ -33,12 +33,14 @@ TEST_BEGIN(test_slab_sizes) {
 		len = sizeof(size_t);
 		sizemib[2] = i;
 		slabmib[2] = i;
-		expect_d_eq(mallctlbymib(sizemib, 4, (void *)&bin_size, &len,
-		    NULL, 0), 0, "bin size mallctlbymib failure");
+		expect_d_eq(
+		    mallctlbymib(sizemib, 4, (void *)&bin_size, &len, NULL, 0),
+		    0, "bin size mallctlbymib failure");
 
 		len = sizeof(size_t);
-		expect_d_eq(mallctlbymib(slabmib, 4, (void *)&slab_size, &len,
-		    NULL, 0), 0, "slab size mallctlbymib failure");
+		expect_d_eq(
+		    mallctlbymib(slabmib, 4, (void *)&slab_size, &len, NULL, 0),
+		    0, "slab size mallctlbymib failure");
 
 		if (bin_size < 100) {
 			/*
@@ -51,8 +53,7 @@ TEST_BEGIN(test_slab_sizes) {
 			expect_zu_ge(slab_size, biggest_slab_seen,
 			    "Slab sizes should go up");
 			biggest_slab_seen = slab_size;
-		} else if (
-		    (100 <= bin_size && bin_size < 128)
+		} else if ((100 <= bin_size && bin_size < 128)
 		    || (128 < bin_size && bin_size <= 200)) {
 			expect_zu_eq(slab_size, page,
 			    "Forced-small slabs should be small");
@@ -75,6 +76,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_slab_sizes);
+	return test(test_slab_sizes);
 }
diff --git a/test/integration/smallocx.c b/test/integration/smallocx.c
index 389319b7..186a6492 100644
--- a/test/integration/smallocx.c
+++ b/test/integration/smallocx.c
@@ -5,25 +5,24 @@
 #define STR(x) STR_HELPER(x)
 
 #ifndef JEMALLOC_VERSION_GID_IDENT
-  #error "JEMALLOC_VERSION_GID_IDENT not defined"
+#	error "JEMALLOC_VERSION_GID_IDENT not defined"
 #endif
 
-#define JOIN(x, y) x ## y
+#define JOIN(x, y) x##y
 #define JOIN2(x, y) JOIN(x, y)
 #define smallocx JOIN2(smallocx_, JEMALLOC_VERSION_GID_IDENT)
 
 typedef struct {
-	void *ptr;
+	void  *ptr;
 	size_t size;
 } smallocx_return_t;
 
-extern smallocx_return_t
-smallocx(size_t size, int flags);
+extern smallocx_return_t smallocx(size_t size, int flags);
 
 static unsigned
 get_nsizes_impl(const char *cmd) {
 	unsigned ret;
-	size_t z;
+	size_t   z;
 
 	z = sizeof(unsigned);
 	expect_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
@@ -45,12 +44,12 @@ get_size_impl(const char *cmd, size_t ind) {
 	size_t miblen = 4;
 
 	z = sizeof(size_t);
-	expect_d_eq(mallctlnametomib(cmd, mib, &miblen),
-	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
+	expect_d_eq(mallctlnametomib(cmd, mib, &miblen), 0,
+	    "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
 	mib[2] = ind;
 	z = sizeof(size_t);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
-	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0), 0,
+	    "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
 
 	return ret;
 }
@@ -84,36 +83,37 @@ JEMALLOC_DIAGNOSTIC_IGNORE_ALLOC_SIZE_LARGER_THAN
 TEST_BEGIN(test_overflow) {
 	size_t largemax;
 
-	largemax = get_large_size(get_nlarge()-1);
+	largemax = get_large_size(get_nlarge() - 1);
 
-	expect_ptr_null(smallocx(largemax+1, 0).ptr,
-	    "Expected OOM for smallocx(size=%#zx, 0)", largemax+1);
+	expect_ptr_null(smallocx(largemax + 1, 0).ptr,
+	    "Expected OOM for smallocx(size=%#zx, 0)", largemax + 1);
 
-	expect_ptr_null(smallocx(ZU(PTRDIFF_MAX)+1, 0).ptr,
-	    "Expected OOM for smallocx(size=%#zx, 0)", ZU(PTRDIFF_MAX)+1);
+	expect_ptr_null(smallocx(ZU(PTRDIFF_MAX) + 1, 0).ptr,
+	    "Expected OOM for smallocx(size=%#zx, 0)", ZU(PTRDIFF_MAX) + 1);
 
 	expect_ptr_null(smallocx(SIZE_T_MAX, 0).ptr,
 	    "Expected OOM for smallocx(size=%#zx, 0)", SIZE_T_MAX);
 
-	expect_ptr_null(smallocx(1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX)+1)).ptr,
+	expect_ptr_null(smallocx(1, MALLOCX_ALIGN(ZU(PTRDIFF_MAX) + 1)).ptr,
 	    "Expected OOM for smallocx(size=1, MALLOCX_ALIGN(%#zx))",
-	    ZU(PTRDIFF_MAX)+1);
+	    ZU(PTRDIFF_MAX) + 1);
 }
 TEST_END
 
 static void *
 remote_alloc(void *arg) {
 	unsigned arena;
-	size_t sz = sizeof(unsigned);
+	size_t   sz = sizeof(unsigned);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 	size_t large_sz;
 	sz = sizeof(size_t);
-	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&large_sz, &sz,
-	    NULL, 0), 0, "Unexpected mallctl failure");
+	expect_d_eq(
+	    mallctl("arenas.lextent.0.size", (void *)&large_sz, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
 
-	smallocx_return_t r
-	    = smallocx(large_sz, MALLOCX_ARENA(arena) | MALLOCX_TCACHE_NONE);
+	smallocx_return_t r = smallocx(
+	    large_sz, MALLOCX_ARENA(arena) | MALLOCX_TCACHE_NONE);
 	void *ptr = r.ptr;
 	expect_zu_eq(r.size,
 	    nallocx(large_sz, MALLOCX_ARENA(arena) | MALLOCX_TCACHE_NONE),
@@ -138,16 +138,16 @@ TEST_BEGIN(test_remote_free) {
 TEST_END
 
 TEST_BEGIN(test_oom) {
-	size_t largemax;
-	bool oom;
-	void *ptrs[3];
+	size_t   largemax;
+	bool     oom;
+	void    *ptrs[3];
 	unsigned i;
 
 	/*
 	 * It should be impossible to allocate three objects that each consume
 	 * nearly half the virtual address space.
 	 */
-	largemax = get_large_size(get_nlarge()-1);
+	largemax = get_large_size(get_nlarge() - 1);
 	oom = false;
 	for (i = 0; i < sizeof(ptrs) / sizeof(void *); i++) {
 		ptrs[i] = smallocx(largemax, 0).ptr;
@@ -167,10 +167,11 @@ TEST_BEGIN(test_oom) {
 
 #if LG_SIZEOF_PTR == 3
 	expect_ptr_null(smallocx(0x8000000000000000ULL,
-	    MALLOCX_ALIGN(0x8000000000000000ULL)).ptr,
+	                    MALLOCX_ALIGN(0x8000000000000000ULL))
+	                    .ptr,
 	    "Expected OOM for smallocx()");
-	expect_ptr_null(smallocx(0x8000000000000000ULL,
-	    MALLOCX_ALIGN(0x80000000)).ptr,
+	expect_ptr_null(
+	    smallocx(0x8000000000000000ULL, MALLOCX_ALIGN(0x80000000)).ptr,
 	    "Expected OOM for smallocx()");
 #else
 	expect_ptr_null(smallocx(0x80000000UL, MALLOCX_ALIGN(0x80000000UL)).ptr,
@@ -188,15 +189,15 @@ TEST_BEGIN(test_basic) {
 
 	for (sz = 1; sz < MAXSZ; sz = nallocx(sz, 0) + 1) {
 		smallocx_return_t ret;
-		size_t nsz, rsz, smz;
-		void *p;
+		size_t            nsz, rsz, smz;
+		void             *p;
 		nsz = nallocx(sz, 0);
 		expect_zu_ne(nsz, 0, "Unexpected nallocx() error");
 		ret = smallocx(sz, 0);
 		p = ret.ptr;
 		smz = ret.size;
-		expect_ptr_not_null(p,
-		    "Unexpected smallocx(size=%zx, flags=0) error", sz);
+		expect_ptr_not_null(
+		    p, "Unexpected smallocx(size=%zx, flags=0) error", sz);
 		rsz = sallocx(p, 0);
 		expect_zu_ge(rsz, sz, "Real size smaller than expected");
 		expect_zu_eq(nsz, rsz, "nallocx()/sallocx() size mismatch");
@@ -206,8 +207,8 @@ TEST_BEGIN(test_basic) {
 		ret = smallocx(sz, 0);
 		p = ret.ptr;
 		smz = ret.size;
-		expect_ptr_not_null(p,
-		    "Unexpected smallocx(size=%zx, flags=0) error", sz);
+		expect_ptr_not_null(
+		    p, "Unexpected smallocx(size=%zx, flags=0) error", sz);
 		dallocx(p, 0);
 
 		nsz = nallocx(sz, MALLOCX_ZERO);
@@ -230,58 +231,61 @@ TEST_END
 
 TEST_BEGIN(test_alignment_and_size) {
 	const char *percpu_arena;
-	size_t sz = sizeof(percpu_arena);
+	size_t      sz = sizeof(percpu_arena);
 
-	if(mallctl("opt.percpu_arena", (void *)&percpu_arena, &sz, NULL, 0) ||
-	    strcmp(percpu_arena, "disabled") != 0) {
-		test_skip("test_alignment_and_size skipped: "
+	if (mallctl("opt.percpu_arena", (void *)&percpu_arena, &sz, NULL, 0)
+	    || strcmp(percpu_arena, "disabled") != 0) {
+		test_skip(
+		    "test_alignment_and_size skipped: "
 		    "not working with percpu arena.");
 	};
 #define MAXALIGN (((size_t)1) << 23)
 #define NITER 4
-	size_t nsz, rsz, smz, alignment, total;
+	size_t   nsz, rsz, smz, alignment, total;
 	unsigned i;
-	void *ps[NITER];
+	void    *ps[NITER];
 
 	for (i = 0; i < NITER; i++) {
 		ps[i] = NULL;
 	}
 
-	for (alignment = 8;
-	    alignment <= MAXALIGN;
-	    alignment <<= 1) {
+	for (alignment = 8; alignment <= MAXALIGN; alignment <<= 1) {
 		total = 0;
-		for (sz = 1;
-		    sz < 3 * alignment && sz < (1U << 31);
-		    sz += (alignment >> (LG_SIZEOF_PTR-1)) - 1) {
+		for (sz = 1; sz < 3 * alignment && sz < (1U << 31);
+		     sz += (alignment >> (LG_SIZEOF_PTR - 1)) - 1) {
 			for (i = 0; i < NITER; i++) {
-				nsz = nallocx(sz, MALLOCX_ALIGN(alignment) |
-				    MALLOCX_ZERO);
+				nsz = nallocx(sz,
+				    MALLOCX_ALIGN(alignment) | MALLOCX_ZERO);
 				expect_zu_ne(nsz, 0,
 				    "nallocx() error for alignment=%zu, "
-				    "size=%zu (%#zx)", alignment, sz, sz);
-				smallocx_return_t ret
-				    = smallocx(sz, MALLOCX_ALIGN(alignment) | MALLOCX_ZERO);
+				    "size=%zu (%#zx)",
+				    alignment, sz, sz);
+				smallocx_return_t ret = smallocx(sz,
+				    MALLOCX_ALIGN(alignment) | MALLOCX_ZERO);
 				ps[i] = ret.ptr;
 				expect_ptr_not_null(ps[i],
 				    "smallocx() error for alignment=%zu, "
-				    "size=%zu (%#zx)", alignment, sz, sz);
+				    "size=%zu (%#zx)",
+				    alignment, sz, sz);
 				rsz = sallocx(ps[i], 0);
 				smz = ret.size;
 				expect_zu_ge(rsz, sz,
 				    "Real size smaller than expected for "
-				    "alignment=%zu, size=%zu", alignment, sz);
+				    "alignment=%zu, size=%zu",
+				    alignment, sz);
 				expect_zu_eq(nsz, rsz,
 				    "nallocx()/sallocx() size mismatch for "
-				    "alignment=%zu, size=%zu", alignment, sz);
+				    "alignment=%zu, size=%zu",
+				    alignment, sz);
 				expect_zu_eq(nsz, smz,
 				    "nallocx()/smallocx() size mismatch for "
-				    "alignment=%zu, size=%zu", alignment, sz);
-				expect_ptr_null(
-				    (void *)((uintptr_t)ps[i] & (alignment-1)),
-				    "%p inadequately aligned for"
-				    " alignment=%zu, size=%zu", ps[i],
+				    "alignment=%zu, size=%zu",
 				    alignment, sz);
+				expect_ptr_null((void *)((uintptr_t)ps[i]
+				                    & (alignment - 1)),
+				    "%p inadequately aligned for"
+				    " alignment=%zu, size=%zu",
+				    ps[i], alignment, sz);
 				total += rsz;
 				if (total >= (MAXALIGN << 1)) {
 					break;
@@ -303,10 +307,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_overflow,
-	    test_oom,
-	    test_remote_free,
-	    test_basic,
+	return test(test_overflow, test_oom, test_remote_free, test_basic,
 	    test_alignment_and_size);
 }
diff --git a/test/integration/thread_arena.c b/test/integration/thread_arena.c
index 4a6abf64..48062183 100644
--- a/test/integration/thread_arena.c
+++ b/test/integration/thread_arena.c
@@ -5,10 +5,10 @@
 void *
 thd_start(void *arg) {
 	unsigned main_arena_ind = *(unsigned *)arg;
-	void *p;
+	void    *p;
 	unsigned arena_ind;
-	size_t size;
-	int err;
+	size_t   size;
+	int      err;
 
 	p = malloc(1);
 	expect_ptr_not_null(p, "Error in malloc()");
@@ -16,7 +16,7 @@ thd_start(void *arg) {
 
 	size = sizeof(arena_ind);
 	if ((err = mallctl("thread.arena", (void *)&arena_ind, &size,
-	    (void *)&main_arena_ind, sizeof(main_arena_ind)))) {
+	         (void *)&main_arena_ind, sizeof(main_arena_ind)))) {
 		char buf[BUFERROR_BUF];
 
 		buferror(err, buf, sizeof(buf));
@@ -24,8 +24,8 @@ thd_start(void *arg) {
 	}
 
 	size = sizeof(arena_ind);
-	if ((err = mallctl("thread.arena", (void *)&arena_ind, &size, NULL,
-	    0))) {
+	if ((err = mallctl(
+	         "thread.arena", (void *)&arena_ind, &size, NULL, 0))) {
 		char buf[BUFERROR_BUF];
 
 		buferror(err, buf, sizeof(buf));
@@ -46,28 +46,28 @@ mallctl_failure(int err) {
 }
 
 TEST_BEGIN(test_thread_arena) {
-	void *p;
-	int err;
-	thd_t thds[NTHREADS];
+	void    *p;
+	int      err;
+	thd_t    thds[NTHREADS];
 	unsigned i;
 
 	p = malloc(1);
 	expect_ptr_not_null(p, "Error in malloc()");
 
 	unsigned arena_ind, old_arena_ind;
-	size_t sz = sizeof(unsigned);
+	size_t   sz = sizeof(unsigned);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
 	    0, "Arena creation failure");
 
 	size_t size = sizeof(arena_ind);
 	if ((err = mallctl("thread.arena", (void *)&old_arena_ind, &size,
-	    (void *)&arena_ind, sizeof(arena_ind))) != 0) {
+	         (void *)&arena_ind, sizeof(arena_ind)))
+	    != 0) {
 		mallctl_failure(err);
 	}
 
 	for (i = 0; i < NTHREADS; i++) {
-		thd_create(&thds[i], thd_start,
-		    (void *)&arena_ind);
+		thd_create(&thds[i], thd_start, (void *)&arena_ind);
 	}
 
 	for (i = 0; i < NTHREADS; i++) {
@@ -81,6 +81,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_thread_arena);
+	return test(test_thread_arena);
 }
diff --git a/test/integration/thread_tcache_enabled.c b/test/integration/thread_tcache_enabled.c
index d44dbe90..3c7c95f6 100644
--- a/test/integration/thread_tcache_enabled.c
+++ b/test/integration/thread_tcache_enabled.c
@@ -2,60 +2,69 @@
 
 void *
 thd_start(void *arg) {
-	bool e0, e1;
+	bool   e0, e1;
 	size_t sz = sizeof(bool);
-	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz, NULL,
-	    0), 0, "Unexpected mallctl failure");
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
 
 	if (e0) {
 		e1 = false;
 		expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
-		    (void *)&e1, sz), 0, "Unexpected mallctl() error");
+		                (void *)&e1, sz),
+		    0, "Unexpected mallctl() error");
 		expect_true(e0, "tcache should be enabled");
 	}
 
 	e1 = true;
-	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
-	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
+	expect_d_eq(
+	    mallctl("thread.tcache.enabled", (void *)&e0, &sz, (void *)&e1, sz),
+	    0, "Unexpected mallctl() error");
 	expect_false(e0, "tcache should be disabled");
 
 	e1 = true;
-	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
-	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
+	expect_d_eq(
+	    mallctl("thread.tcache.enabled", (void *)&e0, &sz, (void *)&e1, sz),
+	    0, "Unexpected mallctl() error");
 	expect_true(e0, "tcache should be enabled");
 
 	e1 = false;
-	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
-	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
+	expect_d_eq(
+	    mallctl("thread.tcache.enabled", (void *)&e0, &sz, (void *)&e1, sz),
+	    0, "Unexpected mallctl() error");
 	expect_true(e0, "tcache should be enabled");
 
 	e1 = false;
-	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
-	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
+	expect_d_eq(
+	    mallctl("thread.tcache.enabled", (void *)&e0, &sz, (void *)&e1, sz),
+	    0, "Unexpected mallctl() error");
 	expect_false(e0, "tcache should be disabled");
 
 	free(malloc(1));
 	e1 = true;
-	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
-	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
+	expect_d_eq(
+	    mallctl("thread.tcache.enabled", (void *)&e0, &sz, (void *)&e1, sz),
+	    0, "Unexpected mallctl() error");
 	expect_false(e0, "tcache should be disabled");
 
 	free(malloc(1));
 	e1 = true;
-	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
-	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
+	expect_d_eq(
+	    mallctl("thread.tcache.enabled", (void *)&e0, &sz, (void *)&e1, sz),
+	    0, "Unexpected mallctl() error");
 	expect_true(e0, "tcache should be enabled");
 
 	free(malloc(1));
 	e1 = false;
-	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
-	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
+	expect_d_eq(
+	    mallctl("thread.tcache.enabled", (void *)&e0, &sz, (void *)&e1, sz),
+	    0, "Unexpected mallctl() error");
 	expect_true(e0, "tcache should be enabled");
 
 	free(malloc(1));
 	e1 = false;
-	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &sz,
-	    (void *)&e1, sz), 0, "Unexpected mallctl() error");
+	expect_d_eq(
+	    mallctl("thread.tcache.enabled", (void *)&e0, &sz, (void *)&e1, sz),
+	    0, "Unexpected mallctl() error");
 	expect_false(e0, "tcache should be disabled");
 
 	free(malloc(1));
@@ -78,10 +87,6 @@ TEST_END
 int
 main(void) {
 	/* Run tests multiple times to check for bad interactions. */
-	return test(
-	    test_main_thread,
-	    test_subthread,
-	    test_main_thread,
-	    test_subthread,
-	    test_main_thread);
+	return test(test_main_thread, test_subthread, test_main_thread,
+	    test_subthread, test_main_thread);
 }
diff --git a/test/integration/xallocx.c b/test/integration/xallocx.c
index 13708548..9b5ebcde 100644
--- a/test/integration/xallocx.c
+++ b/test/integration/xallocx.c
@@ -11,15 +11,16 @@ arena_ind(void) {
 
 	if (ind == 0) {
 		size_t sz = sizeof(ind);
-		expect_d_eq(mallctl("arenas.create", (void *)&ind, &sz, NULL,
-		    0), 0, "Unexpected mallctl failure creating arena");
+		expect_d_eq(
+		    mallctl("arenas.create", (void *)&ind, &sz, NULL, 0), 0,
+		    "Unexpected mallctl failure creating arena");
 	}
 
 	return ind;
 }
 
 TEST_BEGIN(test_same_size) {
-	void *p;
+	void  *p;
 	size_t sz, tsz;
 
 	p = mallocx(42, 0);
@@ -34,14 +35,14 @@ TEST_BEGIN(test_same_size) {
 TEST_END
 
 TEST_BEGIN(test_extra_no_move) {
-	void *p;
+	void  *p;
 	size_t sz, tsz;
 
 	p = mallocx(42, 0);
 	expect_ptr_not_null(p, "Unexpected mallocx() error");
 	sz = sallocx(p, 0);
 
-	tsz = xallocx(p, sz, sz-42, 0);
+	tsz = xallocx(p, sz, sz - 42, 0);
 	expect_zu_eq(tsz, sz, "Unexpected size change: %zu --> %zu", sz, tsz);
 
 	dallocx(p, 0);
@@ -49,7 +50,7 @@ TEST_BEGIN(test_extra_no_move) {
 TEST_END
 
 TEST_BEGIN(test_no_move_fail) {
-	void *p;
+	void  *p;
 	size_t sz, tsz;
 
 	p = mallocx(42, 0);
@@ -66,7 +67,7 @@ TEST_END
 static unsigned
 get_nsizes_impl(const char *cmd) {
 	unsigned ret;
-	size_t z;
+	size_t   z;
 
 	z = sizeof(unsigned);
 	expect_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
@@ -93,12 +94,12 @@ get_size_impl(const char *cmd, size_t ind) {
 	size_t miblen = 4;
 
 	z = sizeof(size_t);
-	expect_d_eq(mallctlnametomib(cmd, mib, &miblen),
-	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
+	expect_d_eq(mallctlnametomib(cmd, mib, &miblen), 0,
+	    "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
 	mib[2] = ind;
 	z = sizeof(size_t);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
-	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0), 0,
+	    "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
 
 	return ret;
 }
@@ -115,25 +116,25 @@ get_large_size(size_t ind) {
 
 TEST_BEGIN(test_size) {
 	size_t small0, largemax;
-	void *p;
+	void  *p;
 
 	/* Get size classes. */
 	small0 = get_small_size(0);
-	largemax = get_large_size(get_nlarge()-1);
+	largemax = get_large_size(get_nlarge() - 1);
 
 	p = mallocx(small0, 0);
 	expect_ptr_not_null(p, "Unexpected mallocx() error");
 
 	/* Test smallest supported size. */
-	expect_zu_eq(xallocx(p, 1, 0, 0), small0,
-	    "Unexpected xallocx() behavior");
+	expect_zu_eq(
+	    xallocx(p, 1, 0, 0), small0, "Unexpected xallocx() behavior");
 
 	/* Test largest supported size. */
 	expect_zu_le(xallocx(p, largemax, 0, 0), largemax,
 	    "Unexpected xallocx() behavior");
 
 	/* Test size overflow. */
-	expect_zu_le(xallocx(p, largemax+1, 0, 0), largemax,
+	expect_zu_le(xallocx(p, largemax + 1, 0, 0), largemax,
 	    "Unexpected xallocx() behavior");
 	expect_zu_le(xallocx(p, SIZE_T_MAX, 0, 0), largemax,
 	    "Unexpected xallocx() behavior");
@@ -144,29 +145,29 @@ TEST_END
 
 TEST_BEGIN(test_size_extra_overflow) {
 	size_t small0, largemax;
-	void *p;
+	void  *p;
 
 	/* Get size classes. */
 	small0 = get_small_size(0);
-	largemax = get_large_size(get_nlarge()-1);
+	largemax = get_large_size(get_nlarge() - 1);
 
 	p = mallocx(small0, 0);
 	expect_ptr_not_null(p, "Unexpected mallocx() error");
 
 	/* Test overflows that can be resolved by clamping extra. */
-	expect_zu_le(xallocx(p, largemax-1, 2, 0), largemax,
+	expect_zu_le(xallocx(p, largemax - 1, 2, 0), largemax,
 	    "Unexpected xallocx() behavior");
 	expect_zu_le(xallocx(p, largemax, 1, 0), largemax,
 	    "Unexpected xallocx() behavior");
 
 	/* Test overflow such that largemax-size underflows. */
-	expect_zu_le(xallocx(p, largemax+1, 2, 0), largemax,
+	expect_zu_le(xallocx(p, largemax + 1, 2, 0), largemax,
 	    "Unexpected xallocx() behavior");
-	expect_zu_le(xallocx(p, largemax+2, 3, 0), largemax,
+	expect_zu_le(xallocx(p, largemax + 2, 3, 0), largemax,
 	    "Unexpected xallocx() behavior");
-	expect_zu_le(xallocx(p, SIZE_T_MAX-2, 2, 0), largemax,
+	expect_zu_le(xallocx(p, SIZE_T_MAX - 2, 2, 0), largemax,
 	    "Unexpected xallocx() behavior");
-	expect_zu_le(xallocx(p, SIZE_T_MAX-1, 1, 0), largemax,
+	expect_zu_le(xallocx(p, SIZE_T_MAX - 1, 1, 0), largemax,
 	    "Unexpected xallocx() behavior");
 
 	dallocx(p, 0);
@@ -175,21 +176,21 @@ TEST_END
 
 TEST_BEGIN(test_extra_small) {
 	size_t small0, small1, largemax;
-	void *p;
+	void  *p;
 
 	/* Get size classes. */
 	small0 = get_small_size(0);
 	small1 = get_small_size(1);
-	largemax = get_large_size(get_nlarge()-1);
+	largemax = get_large_size(get_nlarge() - 1);
 
 	p = mallocx(small0, 0);
 	expect_ptr_not_null(p, "Unexpected mallocx() error");
 
-	expect_zu_eq(xallocx(p, small1, 0, 0), small0,
-	    "Unexpected xallocx() behavior");
+	expect_zu_eq(
+	    xallocx(p, small1, 0, 0), small0, "Unexpected xallocx() behavior");
 
-	expect_zu_eq(xallocx(p, small1, 0, 0), small0,
-	    "Unexpected xallocx() behavior");
+	expect_zu_eq(
+	    xallocx(p, small1, 0, 0), small0, "Unexpected xallocx() behavior");
 
 	expect_zu_eq(xallocx(p, small0, small1 - small0, 0), small0,
 	    "Unexpected xallocx() behavior");
@@ -205,16 +206,16 @@ TEST_BEGIN(test_extra_small) {
 TEST_END
 
 TEST_BEGIN(test_extra_large) {
-	int flags = MALLOCX_ARENA(arena_ind());
+	int    flags = MALLOCX_ARENA(arena_ind());
 	size_t smallmax, large1, large2, large3, largemax;
-	void *p;
+	void  *p;
 
 	/* Get size classes. */
-	smallmax = get_small_size(get_nsmall()-1);
+	smallmax = get_small_size(get_nsmall() - 1);
 	large1 = get_large_size(1);
 	large2 = get_large_size(2);
 	large3 = get_large_size(3);
-	largemax = get_large_size(get_nlarge()-1);
+	largemax = get_large_size(get_nlarge() - 1);
 
 	p = mallocx(large3, flags);
 	expect_ptr_not_null(p, "Unexpected mallocx() error");
@@ -246,7 +247,7 @@ TEST_BEGIN(test_extra_large) {
 	/* Test size increase with zero extra. */
 	expect_zu_le(xallocx(p, large3, 0, flags), large3,
 	    "Unexpected xallocx() behavior");
-	expect_zu_le(xallocx(p, largemax+1, 0, flags), large3,
+	expect_zu_le(xallocx(p, largemax + 1, 0, flags), large3,
 	    "Unexpected xallocx() behavior");
 
 	expect_zu_ge(xallocx(p, large1, 0, flags), large1,
@@ -276,8 +277,8 @@ TEST_END
 static void
 print_filled_extents(const void *p, uint8_t c, size_t len) {
 	const uint8_t *pc = (const uint8_t *)p;
-	size_t i, range0;
-	uint8_t c0;
+	size_t         i, range0;
+	uint8_t        c0;
 
 	malloc_printf("  p=%p, c=%#x, len=%zu:", p, c, len);
 	range0 = 0;
@@ -295,10 +296,10 @@ print_filled_extents(const void *p, uint8_t c, size_t len) {
 static bool
 validate_fill(const void *p, uint8_t c, size_t offset, size_t len) {
 	const uint8_t *pc = (const uint8_t *)p;
-	bool err;
-	size_t i;
+	bool           err;
+	size_t         i;
 
-	for (i = offset, err = false; i < offset+len; i++) {
+	for (i = offset, err = false; i < offset + len; i++) {
 		if (pc[i] != c) {
 			err = true;
 		}
@@ -313,16 +314,16 @@ validate_fill(const void *p, uint8_t c, size_t offset, size_t len) {
 
 static void
 test_zero(size_t szmin, size_t szmax) {
-	int flags = MALLOCX_ARENA(arena_ind()) | MALLOCX_ZERO;
+	int    flags = MALLOCX_ARENA(arena_ind()) | MALLOCX_ZERO;
 	size_t sz, nsz;
-	void *p;
+	void  *p;
 #define FILL_BYTE 0x7aU
 
 	sz = szmax;
 	p = mallocx(sz, flags);
 	expect_ptr_not_null(p, "Unexpected mallocx() error");
-	expect_false(validate_fill(p, 0x00, 0, sz), "Memory not filled: sz=%zu",
-	    sz);
+	expect_false(
+	    validate_fill(p, 0x00, 0, sz), "Memory not filled: sz=%zu", sz);
 
 	/*
 	 * Fill with non-zero so that non-debug builds are more likely to detect
@@ -342,16 +343,16 @@ test_zero(size_t szmin, size_t szmax) {
 	    "Memory not filled: sz=%zu", sz);
 
 	for (sz = szmin; sz < szmax; sz = nsz) {
-		nsz = nallocx(sz+1, flags);
-		if (xallocx(p, sz+1, 0, flags) != nsz) {
-			p = rallocx(p, sz+1, flags);
+		nsz = nallocx(sz + 1, flags);
+		if (xallocx(p, sz + 1, 0, flags) != nsz) {
+			p = rallocx(p, sz + 1, flags);
 			expect_ptr_not_null(p, "Unexpected rallocx() failure");
 		}
 		expect_false(validate_fill(p, FILL_BYTE, 0, sz),
 		    "Memory not filled: sz=%zu", sz);
-		expect_false(validate_fill(p, 0x00, sz, nsz-sz),
-		    "Memory not filled: sz=%zu, nsz-sz=%zu", sz, nsz-sz);
-		memset((void *)((uintptr_t)p + sz), FILL_BYTE, nsz-sz);
+		expect_false(validate_fill(p, 0x00, sz, nsz - sz),
+		    "Memory not filled: sz=%zu, nsz-sz=%zu", sz, nsz - sz);
+		memset((void *)((uintptr_t)p + sz), FILL_BYTE, nsz - sz);
 		expect_false(validate_fill(p, FILL_BYTE, 0, nsz),
 		    "Memory not filled: nsz=%zu", nsz);
 	}
@@ -372,13 +373,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_same_size,
-	    test_extra_no_move,
-	    test_no_move_fail,
-	    test_size,
-	    test_size_extra_overflow,
-	    test_extra_small,
-	    test_extra_large,
-	    test_zero_large);
+	return test(test_same_size, test_extra_no_move, test_no_move_fail,
+	    test_size, test_size_extra_overflow, test_extra_small,
+	    test_extra_large, test_zero_large);
 }
diff --git a/test/src/SFMT.c b/test/src/SFMT.c
index c05e2183..87b1fd1c 100644
--- a/test/src/SFMT.c
+++ b/test/src/SFMT.c
@@ -50,19 +50,19 @@
 #include "test/SFMT-params.h"
 
 #if defined(JEMALLOC_BIG_ENDIAN) && !defined(BIG_ENDIAN64)
-#define BIG_ENDIAN64 1
+#	define BIG_ENDIAN64 1
 #endif
 #if defined(__BIG_ENDIAN__) && !defined(__amd64) && !defined(BIG_ENDIAN64)
-#define BIG_ENDIAN64 1
+#	define BIG_ENDIAN64 1
 #endif
 #if defined(HAVE_ALTIVEC) && !defined(BIG_ENDIAN64)
-#define BIG_ENDIAN64 1
+#	define BIG_ENDIAN64 1
 #endif
 #if defined(ONLY64) && !defined(BIG_ENDIAN64)
-  #if defined(__GNUC__)
-    #error "-DONLY64 must be specified with -DBIG_ENDIAN64"
-  #endif
-#undef ONLY64
+#	if defined(__GNUC__)
+#		error "-DONLY64 must be specified with -DBIG_ENDIAN64"
+#	endif
+#	undef ONLY64
 #endif
 /*------------------------------------------------------
   128-bit SIMD data type for Altivec, SSE2 or standard C
@@ -70,8 +70,8 @@
 #if defined(HAVE_ALTIVEC)
 /** 128-bit data structure */
 union W128_T {
-    vector unsigned int s;
-    uint32_t u[4];
+	vector unsigned int s;
+	uint32_t            u[4];
 };
 /** 128-bit data type */
 typedef union W128_T w128_t;
@@ -79,8 +79,8 @@ typedef union W128_T w128_t;
 #elif defined(HAVE_SSE2)
 /** 128-bit data structure */
 union W128_T {
-    __m128i si;
-    uint32_t u[4];
+	__m128i  si;
+	uint32_t u[4];
 };
 /** 128-bit data type */
 typedef union W128_T w128_t;
@@ -89,7 +89,7 @@ typedef union W128_T w128_t;
 
 /** 128-bit data structure */
 struct W128_T {
-    uint32_t u[4];
+	uint32_t u[4];
 };
 /** 128-bit data type */
 typedef struct W128_T w128_t;
@@ -97,13 +97,13 @@ typedef struct W128_T w128_t;
 #endif
 
 struct sfmt_s {
-    /** the 128-bit internal state array */
-    w128_t sfmt[N];
-    /** index counter to the 32-bit internal state array */
-    int idx;
-    /** a flag: it is 0 if and only if the internal state is not yet
+	/** the 128-bit internal state array */
+	w128_t sfmt[N];
+	/** index counter to the 32-bit internal state array */
+	int idx;
+	/** a flag: it is 0 if and only if the internal state is not yet
      * initialized. */
-    int initialized;
+	int initialized;
 };
 
 /*--------------------------------------
@@ -119,22 +119,22 @@ static uint32_t parity[4] = {PARITY1, PARITY2, PARITY3, PARITY4};
   ----------------*/
 static inline int idxof(int i);
 #if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2))
-static inline void rshift128(w128_t *out,  w128_t const *in, int shift);
-static inline void lshift128(w128_t *out,  w128_t const *in, int shift);
+static inline void rshift128(w128_t *out, w128_t const *in, int shift);
+static inline void lshift128(w128_t *out, w128_t const *in, int shift);
 #endif
-static inline void gen_rand_all(sfmt_t *ctx);
-static inline void gen_rand_array(sfmt_t *ctx, w128_t *array, int size);
+static inline void     gen_rand_all(sfmt_t *ctx);
+static inline void     gen_rand_array(sfmt_t *ctx, w128_t *array, int size);
 static inline uint32_t func1(uint32_t x);
 static inline uint32_t func2(uint32_t x);
-static void period_certification(sfmt_t *ctx);
+static void            period_certification(sfmt_t *ctx);
 #if defined(BIG_ENDIAN64) && !defined(ONLY64)
 static inline void swap(w128_t *array, int size);
 #endif
 
 #if defined(HAVE_ALTIVEC)
-  #include "test/SFMT-alti.h"
+#	include "test/SFMT-alti.h"
 #elif defined(HAVE_SSE2)
-  #include "test/SFMT-sse2.h"
+#	include "test/SFMT-sse2.h"
 #endif
 
 /**
@@ -142,12 +142,14 @@ static inline void swap(w128_t *array, int size);
  * in BIG ENDIAN machine.
  */
 #ifdef ONLY64
-static inline int idxof(int i) {
-    return i ^ 1;
+static inline int
+idxof(int i) {
+	return i ^ 1;
 }
 #else
-static inline int idxof(int i) {
-    return i;
+static inline int
+idxof(int i) {
+	return i;
 }
 #endif
 /**
@@ -159,37 +161,39 @@ static inline int idxof(int i) {
  * @param shift the shift value
  */
 #if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2))
-#ifdef ONLY64
-static inline void rshift128(w128_t *out, w128_t const *in, int shift) {
-    uint64_t th, tl, oh, ol;
+#	ifdef ONLY64
+static inline void
+rshift128(w128_t *out, w128_t const *in, int shift) {
+	uint64_t th, tl, oh, ol;
 
-    th = ((uint64_t)in->u[2] << 32) | ((uint64_t)in->u[3]);
-    tl = ((uint64_t)in->u[0] << 32) | ((uint64_t)in->u[1]);
+	th = ((uint64_t)in->u[2] << 32) | ((uint64_t)in->u[3]);
+	tl = ((uint64_t)in->u[0] << 32) | ((uint64_t)in->u[1]);
 
-    oh = th >> (shift * 8);
-    ol = tl >> (shift * 8);
-    ol |= th << (64 - shift * 8);
-    out->u[0] = (uint32_t)(ol >> 32);
-    out->u[1] = (uint32_t)ol;
-    out->u[2] = (uint32_t)(oh >> 32);
-    out->u[3] = (uint32_t)oh;
+	oh = th >> (shift * 8);
+	ol = tl >> (shift * 8);
+	ol |= th << (64 - shift * 8);
+	out->u[0] = (uint32_t)(ol >> 32);
+	out->u[1] = (uint32_t)ol;
+	out->u[2] = (uint32_t)(oh >> 32);
+	out->u[3] = (uint32_t)oh;
 }
-#else
-static inline void rshift128(w128_t *out, w128_t const *in, int shift) {
-    uint64_t th, tl, oh, ol;
+#	else
+static inline void
+rshift128(w128_t *out, w128_t const *in, int shift) {
+	uint64_t th, tl, oh, ol;
 
-    th = ((uint64_t)in->u[3] << 32) | ((uint64_t)in->u[2]);
-    tl = ((uint64_t)in->u[1] << 32) | ((uint64_t)in->u[0]);
+	th = ((uint64_t)in->u[3] << 32) | ((uint64_t)in->u[2]);
+	tl = ((uint64_t)in->u[1] << 32) | ((uint64_t)in->u[0]);
 
-    oh = th >> (shift * 8);
-    ol = tl >> (shift * 8);
-    ol |= th << (64 - shift * 8);
-    out->u[1] = (uint32_t)(ol >> 32);
-    out->u[0] = (uint32_t)ol;
-    out->u[3] = (uint32_t)(oh >> 32);
-    out->u[2] = (uint32_t)oh;
+	oh = th >> (shift * 8);
+	ol = tl >> (shift * 8);
+	ol |= th << (64 - shift * 8);
+	out->u[1] = (uint32_t)(ol >> 32);
+	out->u[0] = (uint32_t)ol;
+	out->u[3] = (uint32_t)(oh >> 32);
+	out->u[2] = (uint32_t)oh;
 }
-#endif
+#	endif
 /**
  * This function simulates SIMD 128-bit left shift by the standard C.
  * The 128-bit integer given in in is shifted by (shift * 8) bits.
@@ -198,37 +202,39 @@ static inline void rshift128(w128_t *out, w128_t const *in, int shift) {
  * @param in the 128-bit data to be shifted
  * @param shift the shift value
  */
-#ifdef ONLY64
-static inline void lshift128(w128_t *out, w128_t const *in, int shift) {
-    uint64_t th, tl, oh, ol;
+#	ifdef ONLY64
+static inline void
+lshift128(w128_t *out, w128_t const *in, int shift) {
+	uint64_t th, tl, oh, ol;
 
-    th = ((uint64_t)in->u[2] << 32) | ((uint64_t)in->u[3]);
-    tl = ((uint64_t)in->u[0] << 32) | ((uint64_t)in->u[1]);
+	th = ((uint64_t)in->u[2] << 32) | ((uint64_t)in->u[3]);
+	tl = ((uint64_t)in->u[0] << 32) | ((uint64_t)in->u[1]);
 
-    oh = th << (shift * 8);
-    ol = tl << (shift * 8);
-    oh |= tl >> (64 - shift * 8);
-    out->u[0] = (uint32_t)(ol >> 32);
-    out->u[1] = (uint32_t)ol;
-    out->u[2] = (uint32_t)(oh >> 32);
-    out->u[3] = (uint32_t)oh;
+	oh = th << (shift * 8);
+	ol = tl << (shift * 8);
+	oh |= tl >> (64 - shift * 8);
+	out->u[0] = (uint32_t)(ol >> 32);
+	out->u[1] = (uint32_t)ol;
+	out->u[2] = (uint32_t)(oh >> 32);
+	out->u[3] = (uint32_t)oh;
 }
-#else
-static inline void lshift128(w128_t *out, w128_t const *in, int shift) {
-    uint64_t th, tl, oh, ol;
+#	else
+static inline void
+lshift128(w128_t *out, w128_t const *in, int shift) {
+	uint64_t th, tl, oh, ol;
 
-    th = ((uint64_t)in->u[3] << 32) | ((uint64_t)in->u[2]);
-    tl = ((uint64_t)in->u[1] << 32) | ((uint64_t)in->u[0]);
+	th = ((uint64_t)in->u[3] << 32) | ((uint64_t)in->u[2]);
+	tl = ((uint64_t)in->u[1] << 32) | ((uint64_t)in->u[0]);
 
-    oh = th << (shift * 8);
-    ol = tl << (shift * 8);
-    oh |= tl >> (64 - shift * 8);
-    out->u[1] = (uint32_t)(ol >> 32);
-    out->u[0] = (uint32_t)ol;
-    out->u[3] = (uint32_t)(oh >> 32);
-    out->u[2] = (uint32_t)oh;
+	oh = th << (shift * 8);
+	ol = tl << (shift * 8);
+	oh |= tl >> (64 - shift * 8);
+	out->u[1] = (uint32_t)(ol >> 32);
+	out->u[0] = (uint32_t)ol;
+	out->u[3] = (uint32_t)(oh >> 32);
+	out->u[2] = (uint32_t)oh;
 }
-#endif
+#	endif
 #endif
 
 /**
@@ -240,41 +246,41 @@ static inline void lshift128(w128_t *out, w128_t const *in, int shift) {
  * @param d a 128-bit part of the internal state array
  */
 #if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2))
-#ifdef ONLY64
-static inline void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c,
-				w128_t *d) {
-    w128_t x;
-    w128_t y;
+#	ifdef ONLY64
+static inline void
+do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c, w128_t *d) {
+	w128_t x;
+	w128_t y;
 
-    lshift128(&x, a, SL2);
-    rshift128(&y, c, SR2);
-    r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK2) ^ y.u[0]
-	^ (d->u[0] << SL1);
-    r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK1) ^ y.u[1]
-	^ (d->u[1] << SL1);
-    r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK4) ^ y.u[2]
-	^ (d->u[2] << SL1);
-    r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK3) ^ y.u[3]
-	^ (d->u[3] << SL1);
+	lshift128(&x, a, SL2);
+	rshift128(&y, c, SR2);
+	r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK2) ^ y.u[0]
+	    ^ (d->u[0] << SL1);
+	r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK1) ^ y.u[1]
+	    ^ (d->u[1] << SL1);
+	r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK4) ^ y.u[2]
+	    ^ (d->u[2] << SL1);
+	r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK3) ^ y.u[3]
+	    ^ (d->u[3] << SL1);
 }
-#else
-static inline void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c,
-				w128_t *d) {
-    w128_t x;
-    w128_t y;
+#	else
+static inline void
+do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c, w128_t *d) {
+	w128_t x;
+	w128_t y;
 
-    lshift128(&x, a, SL2);
-    rshift128(&y, c, SR2);
-    r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK1) ^ y.u[0]
-	^ (d->u[0] << SL1);
-    r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK2) ^ y.u[1]
-	^ (d->u[1] << SL1);
-    r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK3) ^ y.u[2]
-	^ (d->u[2] << SL1);
-    r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK4) ^ y.u[3]
-	^ (d->u[3] << SL1);
+	lshift128(&x, a, SL2);
+	rshift128(&y, c, SR2);
+	r->u[0] = a->u[0] ^ x.u[0] ^ ((b->u[0] >> SR1) & MSK1) ^ y.u[0]
+	    ^ (d->u[0] << SL1);
+	r->u[1] = a->u[1] ^ x.u[1] ^ ((b->u[1] >> SR1) & MSK2) ^ y.u[1]
+	    ^ (d->u[1] << SL1);
+	r->u[2] = a->u[2] ^ x.u[2] ^ ((b->u[2] >> SR1) & MSK3) ^ y.u[2]
+	    ^ (d->u[2] << SL1);
+	r->u[3] = a->u[3] ^ x.u[3] ^ ((b->u[3] >> SR1) & MSK4) ^ y.u[3]
+	    ^ (d->u[3] << SL1);
 }
-#endif
+#	endif
 #endif
 
 #if (!defined(HAVE_ALTIVEC)) && (!defined(HAVE_SSE2))
@@ -282,24 +288,25 @@ static inline void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *c,
  * This function fills the internal state array with pseudorandom
  * integers.
  */
-static inline void gen_rand_all(sfmt_t *ctx) {
-    int i;
-    w128_t *r1, *r2;
+static inline void
+gen_rand_all(sfmt_t *ctx) {
+	int     i;
+	w128_t *r1, *r2;
 
-    r1 = &ctx->sfmt[N - 2];
-    r2 = &ctx->sfmt[N - 1];
-    for (i = 0; i < N - POS1; i++) {
-	do_recursion(&ctx->sfmt[i], &ctx->sfmt[i], &ctx->sfmt[i + POS1], r1,
-	  r2);
-	r1 = r2;
-	r2 = &ctx->sfmt[i];
-    }
-    for (; i < N; i++) {
-	do_recursion(&ctx->sfmt[i], &ctx->sfmt[i], &ctx->sfmt[i + POS1 - N], r1,
-	  r2);
-	r1 = r2;
-	r2 = &ctx->sfmt[i];
-    }
+	r1 = &ctx->sfmt[N - 2];
+	r2 = &ctx->sfmt[N - 1];
+	for (i = 0; i < N - POS1; i++) {
+		do_recursion(
+		    &ctx->sfmt[i], &ctx->sfmt[i], &ctx->sfmt[i + POS1], r1, r2);
+		r1 = r2;
+		r2 = &ctx->sfmt[i];
+	}
+	for (; i < N; i++) {
+		do_recursion(&ctx->sfmt[i], &ctx->sfmt[i],
+		    &ctx->sfmt[i + POS1 - N], r1, r2);
+		r1 = r2;
+		r2 = &ctx->sfmt[i];
+	}
 }
 
 /**
@@ -309,52 +316,58 @@ static inline void gen_rand_all(sfmt_t *ctx) {
  * @param array an 128-bit array to be filled by pseudorandom numbers.
  * @param size number of 128-bit pseudorandom numbers to be generated.
  */
-static inline void gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
-    int i, j;
-    w128_t *r1, *r2;
+static inline void
+gen_rand_array(sfmt_t *ctx, w128_t *array, int size) {
+	int     i, j;
+	w128_t *r1, *r2;
 
-    r1 = &ctx->sfmt[N - 2];
-    r2 = &ctx->sfmt[N - 1];
-    for (i = 0; i < N - POS1; i++) {
-	do_recursion(&array[i], &ctx->sfmt[i], &ctx->sfmt[i + POS1], r1, r2);
-	r1 = r2;
-	r2 = &array[i];
-    }
-    for (; i < N; i++) {
-	do_recursion(&array[i], &ctx->sfmt[i], &array[i + POS1 - N], r1, r2);
-	r1 = r2;
-	r2 = &array[i];
-    }
-    for (; i < size - N; i++) {
-	do_recursion(&array[i], &array[i - N], &array[i + POS1 - N], r1, r2);
-	r1 = r2;
-	r2 = &array[i];
-    }
-    for (j = 0; j < 2 * N - size; j++) {
-	ctx->sfmt[j] = array[j + size - N];
-    }
-    for (; i < size; i++, j++) {
-	do_recursion(&array[i], &array[i - N], &array[i + POS1 - N], r1, r2);
-	r1 = r2;
-	r2 = &array[i];
-	ctx->sfmt[j] = array[i];
-    }
+	r1 = &ctx->sfmt[N - 2];
+	r2 = &ctx->sfmt[N - 1];
+	for (i = 0; i < N - POS1; i++) {
+		do_recursion(
+		    &array[i], &ctx->sfmt[i], &ctx->sfmt[i + POS1], r1, r2);
+		r1 = r2;
+		r2 = &array[i];
+	}
+	for (; i < N; i++) {
+		do_recursion(
+		    &array[i], &ctx->sfmt[i], &array[i + POS1 - N], r1, r2);
+		r1 = r2;
+		r2 = &array[i];
+	}
+	for (; i < size - N; i++) {
+		do_recursion(
+		    &array[i], &array[i - N], &array[i + POS1 - N], r1, r2);
+		r1 = r2;
+		r2 = &array[i];
+	}
+	for (j = 0; j < 2 * N - size; j++) {
+		ctx->sfmt[j] = array[j + size - N];
+	}
+	for (; i < size; i++, j++) {
+		do_recursion(
+		    &array[i], &array[i - N], &array[i + POS1 - N], r1, r2);
+		r1 = r2;
+		r2 = &array[i];
+		ctx->sfmt[j] = array[i];
+	}
 }
 #endif
 
 #if defined(BIG_ENDIAN64) && !defined(ONLY64) && !defined(HAVE_ALTIVEC)
-static inline void swap(w128_t *array, int size) {
-    int i;
-    uint32_t x, y;
+static inline void
+swap(w128_t *array, int size) {
+	int      i;
+	uint32_t x, y;
 
-    for (i = 0; i < size; i++) {
-	x = array[i].u[0];
-	y = array[i].u[2];
-	array[i].u[0] = array[i].u[1];
-	array[i].u[2] = array[i].u[3];
-	array[i].u[1] = x;
-	array[i].u[3] = y;
-    }
+	for (i = 0; i < size; i++) {
+		x = array[i].u[0];
+		y = array[i].u[2];
+		array[i].u[0] = array[i].u[1];
+		array[i].u[2] = array[i].u[3];
+		array[i].u[1] = x;
+		array[i].u[3] = y;
+	}
 }
 #endif
 /**
@@ -363,8 +376,9 @@ static inline void swap(w128_t *array, int size) {
  * @param x 32-bit integer
  * @return 32-bit integer
  */
-static uint32_t func1(uint32_t x) {
-    return (x ^ (x >> 27)) * (uint32_t)1664525UL;
+static uint32_t
+func1(uint32_t x) {
+	return (x ^ (x >> 27)) * (uint32_t)1664525UL;
 }
 
 /**
@@ -373,39 +387,41 @@ static uint32_t func1(uint32_t x) {
  * @param x 32-bit integer
  * @return 32-bit integer
  */
-static uint32_t func2(uint32_t x) {
-    return (x ^ (x >> 27)) * (uint32_t)1566083941UL;
+static uint32_t
+func2(uint32_t x) {
+	return (x ^ (x >> 27)) * (uint32_t)1566083941UL;
 }
 
 /**
  * This function certificate the period of 2^{MEXP}
  */
-static void period_certification(sfmt_t *ctx) {
-    int inner = 0;
-    int i, j;
-    uint32_t work;
-    uint32_t *psfmt32 = &ctx->sfmt[0].u[0];
+static void
+period_certification(sfmt_t *ctx) {
+	int       inner = 0;
+	int       i, j;
+	uint32_t  work;
+	uint32_t *psfmt32 = &ctx->sfmt[0].u[0];
 
-    for (i = 0; i < 4; i++)
-	inner ^= psfmt32[idxof(i)] & parity[i];
-    for (i = 16; i > 0; i >>= 1)
-	inner ^= inner >> i;
-    inner &= 1;
-    /* check OK */
-    if (inner == 1) {
-	return;
-    }
-    /* check NG, and modification */
-    for (i = 0; i < 4; i++) {
-	work = 1;
-	for (j = 0; j < 32; j++) {
-	    if ((work & parity[i]) != 0) {
-		psfmt32[idxof(i)] ^= work;
+	for (i = 0; i < 4; i++)
+		inner ^= psfmt32[idxof(i)] & parity[i];
+	for (i = 16; i > 0; i >>= 1)
+		inner ^= inner >> i;
+	inner &= 1;
+	/* check OK */
+	if (inner == 1) {
 		return;
-	    }
-	    work = work << 1;
 	}
-    }
+	/* check NG, and modification */
+	for (i = 0; i < 4; i++) {
+		work = 1;
+		for (j = 0; j < 32; j++) {
+			if ((work & parity[i]) != 0) {
+				psfmt32[idxof(i)] ^= work;
+				return;
+			}
+			work = work << 1;
+		}
+	}
 }
 
 /*----------------
@@ -416,8 +432,9 @@ static void period_certification(sfmt_t *ctx) {
  * The string shows the word size, the Mersenne exponent,
  * and all parameters of this generator.
  */
-const char *get_idstring(void) {
-    return IDSTR;
+const char *
+get_idstring(void) {
+	return IDSTR;
 }
 
 /**
@@ -425,8 +442,9 @@ const char *get_idstring(void) {
  * fill_array32() function.
  * @return minimum size of array used for fill_array32() function.
  */
-int get_min_array_size32(void) {
-    return N32;
+int
+get_min_array_size32(void) {
+	return N32;
 }
 
 /**
@@ -434,8 +452,9 @@ int get_min_array_size32(void) {
  * fill_array64() function.
  * @return minimum size of array used for fill_array64() function.
  */
-int get_min_array_size64(void) {
-    return N64;
+int
+get_min_array_size64(void) {
+	return N64;
 }
 
 #ifndef ONLY64
@@ -444,32 +463,34 @@ int get_min_array_size64(void) {
  * init_gen_rand or init_by_array must be called before this function.
  * @return 32-bit pseudorandom number
  */
-uint32_t gen_rand32(sfmt_t *ctx) {
-    uint32_t r;
-    uint32_t *psfmt32 = &ctx->sfmt[0].u[0];
+uint32_t
+gen_rand32(sfmt_t *ctx) {
+	uint32_t  r;
+	uint32_t *psfmt32 = &ctx->sfmt[0].u[0];
 
-    assert(ctx->initialized);
-    if (ctx->idx >= N32) {
-	gen_rand_all(ctx);
-	ctx->idx = 0;
-    }
-    r = psfmt32[ctx->idx++];
-    return r;
+	assert(ctx->initialized);
+	if (ctx->idx >= N32) {
+		gen_rand_all(ctx);
+		ctx->idx = 0;
+	}
+	r = psfmt32[ctx->idx++];
+	return r;
 }
 
 /* Generate a random integer in [0..limit). */
-uint32_t gen_rand32_range(sfmt_t *ctx, uint32_t limit) {
-    uint32_t ret, above;
+uint32_t
+gen_rand32_range(sfmt_t *ctx, uint32_t limit) {
+	uint32_t ret, above;
 
-    above = 0xffffffffU - (0xffffffffU % limit);
-    while (1) {
-	ret = gen_rand32(ctx);
-	if (ret < above) {
-	    ret %= limit;
-	    break;
+	above = 0xffffffffU - (0xffffffffU % limit);
+	while (1) {
+		ret = gen_rand32(ctx);
+		if (ret < above) {
+			ret %= limit;
+			break;
+		}
 	}
-    }
-    return ret;
+	return ret;
 }
 #endif
 /**
@@ -479,47 +500,49 @@ uint32_t gen_rand32_range(sfmt_t *ctx, uint32_t limit) {
  * unless an initialization is again executed.
  * @return 64-bit pseudorandom number
  */
-uint64_t gen_rand64(sfmt_t *ctx) {
+uint64_t
+gen_rand64(sfmt_t *ctx) {
 #if defined(BIG_ENDIAN64) && !defined(ONLY64)
-    uint32_t r1, r2;
-    uint32_t *psfmt32 = &ctx->sfmt[0].u[0];
+	uint32_t  r1, r2;
+	uint32_t *psfmt32 = &ctx->sfmt[0].u[0];
 #else
-    uint64_t r;
-    uint64_t *psfmt64 = (uint64_t *)&ctx->sfmt[0].u[0];
+	uint64_t  r;
+	uint64_t *psfmt64 = (uint64_t *)&ctx->sfmt[0].u[0];
 #endif
 
-    assert(ctx->initialized);
-    assert(ctx->idx % 2 == 0);
+	assert(ctx->initialized);
+	assert(ctx->idx % 2 == 0);
 
-    if (ctx->idx >= N32) {
-	gen_rand_all(ctx);
-	ctx->idx = 0;
-    }
+	if (ctx->idx >= N32) {
+		gen_rand_all(ctx);
+		ctx->idx = 0;
+	}
 #if defined(BIG_ENDIAN64) && !defined(ONLY64)
-    r1 = psfmt32[ctx->idx];
-    r2 = psfmt32[ctx->idx + 1];
-    ctx->idx += 2;
-    return ((uint64_t)r2 << 32) | r1;
+	r1 = psfmt32[ctx->idx];
+	r2 = psfmt32[ctx->idx + 1];
+	ctx->idx += 2;
+	return ((uint64_t)r2 << 32) | r1;
 #else
-    r = psfmt64[ctx->idx / 2];
-    ctx->idx += 2;
-    return r;
+	r = psfmt64[ctx->idx / 2];
+	ctx->idx += 2;
+	return r;
 #endif
 }
 
 /* Generate a random integer in [0..limit). */
-uint64_t gen_rand64_range(sfmt_t *ctx, uint64_t limit) {
-    uint64_t ret, above;
+uint64_t
+gen_rand64_range(sfmt_t *ctx, uint64_t limit) {
+	uint64_t ret, above;
 
-    above = KQU(0xffffffffffffffff) - (KQU(0xffffffffffffffff) % limit);
-    while (1) {
-	ret = gen_rand64(ctx);
-	if (ret < above) {
-	    ret %= limit;
-	    break;
+	above = KQU(0xffffffffffffffff) - (KQU(0xffffffffffffffff) % limit);
+	while (1) {
+		ret = gen_rand64(ctx);
+		if (ret < above) {
+			ret %= limit;
+			break;
+		}
 	}
-    }
-    return ret;
+	return ret;
 }
 
 #ifndef ONLY64
@@ -548,14 +571,15 @@ uint64_t gen_rand64_range(sfmt_t *ctx, uint64_t limit) {
  * memory. Mac OSX doesn't have these functions, but \b malloc of OSX
  * returns the pointer to the aligned memory block.
  */
-void fill_array32(sfmt_t *ctx, uint32_t *array, int size) {
-    assert(ctx->initialized);
-    assert(ctx->idx == N32);
-    assert(size % 4 == 0);
-    assert(size >= N32);
+void
+fill_array32(sfmt_t *ctx, uint32_t *array, int size) {
+	assert(ctx->initialized);
+	assert(ctx->idx == N32);
+	assert(size % 4 == 0);
+	assert(size >= N32);
 
-    gen_rand_array(ctx, (w128_t *)array, size / 4);
-    ctx->idx = N32;
+	gen_rand_array(ctx, (w128_t *)array, size / 4);
+	ctx->idx = N32;
 }
 #endif
 
@@ -584,17 +608,18 @@ void fill_array32(sfmt_t *ctx, uint32_t *array, int size) {
  * memory. Mac OSX doesn't have these functions, but \b malloc of OSX
  * returns the pointer to the aligned memory block.
  */
-void fill_array64(sfmt_t *ctx, uint64_t *array, int size) {
-    assert(ctx->initialized);
-    assert(ctx->idx == N32);
-    assert(size % 2 == 0);
-    assert(size >= N64);
+void
+fill_array64(sfmt_t *ctx, uint64_t *array, int size) {
+	assert(ctx->initialized);
+	assert(ctx->idx == N32);
+	assert(size % 2 == 0);
+	assert(size >= N64);
 
-    gen_rand_array(ctx, (w128_t *)array, size / 2);
-    ctx->idx = N32;
+	gen_rand_array(ctx, (w128_t *)array, size / 2);
+	ctx->idx = N32;
 
 #if defined(BIG_ENDIAN64) && !defined(ONLY64)
-    swap((w128_t *)array, size /2);
+	swap((w128_t *)array, size / 2);
 #endif
 }
 
@@ -604,29 +629,31 @@ void fill_array64(sfmt_t *ctx, uint64_t *array, int size) {
  *
  * @param seed a 32-bit integer used as the seed.
  */
-sfmt_t *init_gen_rand(uint32_t seed) {
-    void *p;
-    sfmt_t *ctx;
-    int i;
-    uint32_t *psfmt32;
+sfmt_t *
+init_gen_rand(uint32_t seed) {
+	void     *p;
+	sfmt_t   *ctx;
+	int       i;
+	uint32_t *psfmt32;
 
-    if (posix_memalign(&p, sizeof(w128_t), sizeof(sfmt_t)) != 0) {
-	return NULL;
-    }
-    ctx = (sfmt_t *)p;
-    psfmt32 = &ctx->sfmt[0].u[0];
+	if (posix_memalign(&p, sizeof(w128_t), sizeof(sfmt_t)) != 0) {
+		return NULL;
+	}
+	ctx = (sfmt_t *)p;
+	psfmt32 = &ctx->sfmt[0].u[0];
 
-    psfmt32[idxof(0)] = seed;
-    for (i = 1; i < N32; i++) {
-	psfmt32[idxof(i)] = 1812433253UL * (psfmt32[idxof(i - 1)]
-					    ^ (psfmt32[idxof(i - 1)] >> 30))
-	    + i;
-    }
-    ctx->idx = N32;
-    period_certification(ctx);
-    ctx->initialized = 1;
+	psfmt32[idxof(0)] = seed;
+	for (i = 1; i < N32; i++) {
+		psfmt32[idxof(i)] = 1812433253UL
+		        * (psfmt32[idxof(i - 1)]
+		            ^ (psfmt32[idxof(i - 1)] >> 30))
+		    + i;
+	}
+	ctx->idx = N32;
+	period_certification(ctx);
+	ctx->initialized = 1;
 
-    return ctx;
+	return ctx;
 }
 
 /**
@@ -635,85 +662,87 @@ sfmt_t *init_gen_rand(uint32_t seed) {
  * @param init_key the array of 32-bit integers, used as a seed.
  * @param key_length the length of init_key.
  */
-sfmt_t *init_by_array(uint32_t *init_key, int key_length) {
-    void *p;
-    sfmt_t *ctx;
-    int i, j, count;
-    uint32_t r;
-    int lag;
-    int mid;
-    int size = N * 4;
-    uint32_t *psfmt32;
+sfmt_t *
+init_by_array(uint32_t *init_key, int key_length) {
+	void     *p;
+	sfmt_t   *ctx;
+	int       i, j, count;
+	uint32_t  r;
+	int       lag;
+	int       mid;
+	int       size = N * 4;
+	uint32_t *psfmt32;
 
-    if (posix_memalign(&p, sizeof(w128_t), sizeof(sfmt_t)) != 0) {
-	return NULL;
-    }
-    ctx = (sfmt_t *)p;
-    psfmt32 = &ctx->sfmt[0].u[0];
+	if (posix_memalign(&p, sizeof(w128_t), sizeof(sfmt_t)) != 0) {
+		return NULL;
+	}
+	ctx = (sfmt_t *)p;
+	psfmt32 = &ctx->sfmt[0].u[0];
 
-    if (size >= 623) {
-	lag = 11;
-    } else if (size >= 68) {
-	lag = 7;
-    } else if (size >= 39) {
-	lag = 5;
-    } else {
-	lag = 3;
-    }
-    mid = (size - lag) / 2;
+	if (size >= 623) {
+		lag = 11;
+	} else if (size >= 68) {
+		lag = 7;
+	} else if (size >= 39) {
+		lag = 5;
+	} else {
+		lag = 3;
+	}
+	mid = (size - lag) / 2;
 
-    memset(ctx->sfmt, 0x8b, sizeof(ctx->sfmt));
-    if (key_length + 1 > N32) {
-	count = key_length + 1;
-    } else {
-	count = N32;
-    }
-    r = func1(psfmt32[idxof(0)] ^ psfmt32[idxof(mid)]
-	      ^ psfmt32[idxof(N32 - 1)]);
-    psfmt32[idxof(mid)] += r;
-    r += key_length;
-    psfmt32[idxof(mid + lag)] += r;
-    psfmt32[idxof(0)] = r;
+	memset(ctx->sfmt, 0x8b, sizeof(ctx->sfmt));
+	if (key_length + 1 > N32) {
+		count = key_length + 1;
+	} else {
+		count = N32;
+	}
+	r = func1(
+	    psfmt32[idxof(0)] ^ psfmt32[idxof(mid)] ^ psfmt32[idxof(N32 - 1)]);
+	psfmt32[idxof(mid)] += r;
+	r += key_length;
+	psfmt32[idxof(mid + lag)] += r;
+	psfmt32[idxof(0)] = r;
 
-    count--;
-    for (i = 1, j = 0; (j < count) && (j < key_length); j++) {
-	r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % N32)]
-		  ^ psfmt32[idxof((i + N32 - 1) % N32)]);
-	psfmt32[idxof((i + mid) % N32)] += r;
-	r += init_key[j] + i;
-	psfmt32[idxof((i + mid + lag) % N32)] += r;
-	psfmt32[idxof(i)] = r;
-	i = (i + 1) % N32;
-    }
-    for (; j < count; j++) {
-	r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % N32)]
-		  ^ psfmt32[idxof((i + N32 - 1) % N32)]);
-	psfmt32[idxof((i + mid) % N32)] += r;
-	r += i;
-	psfmt32[idxof((i + mid + lag) % N32)] += r;
-	psfmt32[idxof(i)] = r;
-	i = (i + 1) % N32;
-    }
-    for (j = 0; j < N32; j++) {
-	r = func2(psfmt32[idxof(i)] + psfmt32[idxof((i + mid) % N32)]
-		  + psfmt32[idxof((i + N32 - 1) % N32)]);
-	psfmt32[idxof((i + mid) % N32)] ^= r;
-	r -= i;
-	psfmt32[idxof((i + mid + lag) % N32)] ^= r;
-	psfmt32[idxof(i)] = r;
-	i = (i + 1) % N32;
-    }
+	count--;
+	for (i = 1, j = 0; (j < count) && (j < key_length); j++) {
+		r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % N32)]
+		    ^ psfmt32[idxof((i + N32 - 1) % N32)]);
+		psfmt32[idxof((i + mid) % N32)] += r;
+		r += init_key[j] + i;
+		psfmt32[idxof((i + mid + lag) % N32)] += r;
+		psfmt32[idxof(i)] = r;
+		i = (i + 1) % N32;
+	}
+	for (; j < count; j++) {
+		r = func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % N32)]
+		    ^ psfmt32[idxof((i + N32 - 1) % N32)]);
+		psfmt32[idxof((i + mid) % N32)] += r;
+		r += i;
+		psfmt32[idxof((i + mid + lag) % N32)] += r;
+		psfmt32[idxof(i)] = r;
+		i = (i + 1) % N32;
+	}
+	for (j = 0; j < N32; j++) {
+		r = func2(psfmt32[idxof(i)] + psfmt32[idxof((i + mid) % N32)]
+		    + psfmt32[idxof((i + N32 - 1) % N32)]);
+		psfmt32[idxof((i + mid) % N32)] ^= r;
+		r -= i;
+		psfmt32[idxof((i + mid + lag) % N32)] ^= r;
+		psfmt32[idxof(i)] = r;
+		i = (i + 1) % N32;
+	}
 
-    ctx->idx = N32;
-    period_certification(ctx);
-    ctx->initialized = 1;
+	ctx->idx = N32;
+	period_certification(ctx);
+	ctx->initialized = 1;
 
-    return ctx;
+	return ctx;
 }
 
-void fini_gen_rand(sfmt_t *ctx) {
-    assert(ctx != NULL);
+void
+fini_gen_rand(sfmt_t *ctx) {
+	assert(ctx != NULL);
 
-    ctx->initialized = 0;
-    free(ctx);
+	ctx->initialized = 0;
+	free(ctx);
 }
diff --git a/test/src/mtx.c b/test/src/mtx.c
index 6cb3ecd5..05c922bf 100644
--- a/test/src/mtx.c
+++ b/test/src/mtx.c
@@ -1,14 +1,14 @@
 #include "test/jemalloc_test.h"
 
 #if defined(_WIN32) && !defined(_CRT_SPINCOUNT)
-#define _CRT_SPINCOUNT 4000
+#	define _CRT_SPINCOUNT 4000
 #endif
 
 bool
 mtx_init(mtx_t *mtx) {
 #ifdef _WIN32
-	if (!InitializeCriticalSectionAndSpinCount(&mtx->lock,
-	    _CRT_SPINCOUNT)) {
+	if (!InitializeCriticalSectionAndSpinCount(
+	        &mtx->lock, _CRT_SPINCOUNT)) {
 		return true;
 	}
 #elif (defined(JEMALLOC_OS_UNFAIR_LOCK))
diff --git a/test/src/sleep.c b/test/src/sleep.c
index 2234b4bc..96b9b7bf 100644
--- a/test/src/sleep.c
+++ b/test/src/sleep.c
@@ -6,7 +6,7 @@
  */
 void
 sleep_ns(unsigned ns) {
-	assert(ns <= 1000*1000*1000);
+	assert(ns <= 1000 * 1000 * 1000);
 
 #ifdef _WIN32
 	Sleep(ns / 1000 / 1000);
@@ -14,7 +14,7 @@ sleep_ns(unsigned ns) {
 	{
 		struct timespec timeout;
 
-		if (ns < 1000*1000*1000) {
+		if (ns < 1000 * 1000 * 1000) {
 			timeout.tv_sec = 0;
 			timeout.tv_nsec = ns;
 		} else {
diff --git a/test/src/test.c b/test/src/test.c
index a21356d5..6eb84338 100644
--- a/test/src/test.c
+++ b/test/src/test.c
@@ -2,10 +2,10 @@
 
 /* Test status state. */
 
-static unsigned		test_count = 0;
-static test_status_t	test_counts[test_status_count] = {0, 0, 0};
-static test_status_t	test_status = test_status_pass;
-static const char *	test_name = "";
+static unsigned      test_count = 0;
+static test_status_t test_counts[test_status_count] = {0, 0, 0};
+static test_status_t test_status = test_status_pass;
+static const char   *test_name = "";
 
 /* Reentrancy testing helpers. */
 
@@ -89,10 +89,14 @@ test_fail(const char *format, ...) {
 static const char *
 test_status_string(test_status_t current_status) {
 	switch (current_status) {
-	case test_status_pass: return "pass";
-	case test_status_skip: return "skip";
-	case test_status_fail: return "fail";
-	default: not_reached();
+	case test_status_pass:
+		return "pass";
+	case test_status_skip:
+		return "skip";
+	case test_status_fail:
+		return "fail";
+	default:
+		not_reached();
 	}
 }
 
@@ -173,19 +177,16 @@ p_test_impl(bool do_malloc_init, bool do_reentrant, test_t *t, va_list ap) {
 		}
 	}
 
-	bool colored = test_counts[test_status_fail] != 0 &&
-	    isatty(STDERR_FILENO);
+	bool colored = test_counts[test_status_fail] != 0
+	    && isatty(STDERR_FILENO);
 	const char *color_start = colored ? "\033[1;31m" : "";
 	const char *color_end = colored ? "\033[0m" : "";
 	malloc_printf("%s--- %s: %u/%u, %s: %u/%u, %s: %u/%u ---\n%s",
-	    color_start,
-	    test_status_string(test_status_pass),
+	    color_start, test_status_string(test_status_pass),
 	    test_counts[test_status_pass], test_count,
-	    test_status_string(test_status_skip),
-	    test_counts[test_status_skip], test_count,
-	    test_status_string(test_status_fail),
-	    test_counts[test_status_fail], test_count,
-	    color_end);
+	    test_status_string(test_status_skip), test_counts[test_status_skip],
+	    test_count, test_status_string(test_status_fail),
+	    test_counts[test_status_fail], test_count, color_end);
 
 	return ret;
 }
@@ -193,7 +194,7 @@ p_test_impl(bool do_malloc_init, bool do_reentrant, test_t *t, va_list ap) {
 test_status_t
 p_test(test_t *t, ...) {
 	test_status_t ret;
-	va_list ap;
+	va_list       ap;
 
 	ret = test_status_pass;
 	va_start(ap, t);
@@ -206,7 +207,7 @@ p_test(test_t *t, ...) {
 test_status_t
 p_test_no_reentrancy(test_t *t, ...) {
 	test_status_t ret;
-	va_list ap;
+	va_list       ap;
 
 	ret = test_status_pass;
 	va_start(ap, t);
@@ -219,7 +220,7 @@ p_test_no_reentrancy(test_t *t, ...) {
 test_status_t
 p_test_no_malloc_init(test_t *t, ...) {
 	test_status_t ret;
-	va_list ap;
+	va_list       ap;
 
 	ret = test_status_pass;
 	va_start(ap, t);
@@ -235,12 +236,12 @@ p_test_no_malloc_init(test_t *t, ...) {
 
 void
 p_test_fail(bool may_abort, const char *prefix, const char *message) {
-	bool colored = test_counts[test_status_fail] != 0 &&
-	    isatty(STDERR_FILENO);
+	bool colored = test_counts[test_status_fail] != 0
+	    && isatty(STDERR_FILENO);
 	const char *color_start = colored ? "\033[1;31m" : "";
 	const char *color_end = colored ? "\033[0m" : "";
-	malloc_cprintf(NULL, NULL, "%s%s%s\n%s", color_start, prefix, message,
-	    color_end);
+	malloc_cprintf(
+	    NULL, NULL, "%s%s%s\n%s", color_start, prefix, message, color_end);
 	test_status = test_status_fail;
 	if (may_abort) {
 		abort();
diff --git a/test/src/thd.c b/test/src/thd.c
index 8f91a595..634dc262 100644
--- a/test/src/thd.c
+++ b/test/src/thd.c
@@ -14,7 +14,7 @@ void
 thd_join(thd_t thd, void **ret) {
 	if (WaitForSingleObject(thd, INFINITE) == WAIT_OBJECT_0 && ret) {
 		DWORD exit_code;
-		GetExitCodeThread(thd, (LPDWORD) &exit_code);
+		GetExitCodeThread(thd, (LPDWORD)&exit_code);
 		*ret = (void *)(uintptr_t)exit_code;
 	}
 }
@@ -44,7 +44,8 @@ thd_setname(const char *name) {
 
 bool
 thd_has_setname(void) {
-#if defined(JEMALLOC_HAVE_PTHREAD_SETNAME_NP) || defined(JEMALLOC_HAVE_PTHREAD_SET_NAME_NP)
+#if defined(JEMALLOC_HAVE_PTHREAD_SETNAME_NP)                                  \
+    || defined(JEMALLOC_HAVE_PTHREAD_SET_NAME_NP)
 	return true;
 #else
 	return false;
diff --git a/test/src/timer.c b/test/src/timer.c
index 94528a34..017bf5a5 100644
--- a/test/src/timer.c
+++ b/test/src/timer.c
@@ -25,8 +25,8 @@ timer_ratio(timedelta_t *a, timedelta_t *b, char *buf, size_t buflen) {
 	uint64_t t0 = timer_usec(a);
 	uint64_t t1 = timer_usec(b);
 	uint64_t mult;
-	size_t i = 0;
-	size_t j, n;
+	size_t   i = 0;
+	size_t   j, n;
 
 	/*
  	* The time difference could be 0 if the two clock readings are
@@ -36,11 +36,11 @@ timer_ratio(timedelta_t *a, timedelta_t *b, char *buf, size_t buflen) {
  	* Thus, bump t1 if it is 0 to avoid dividing 0.
  	*/
 	if (t1 == 0) {
-	    t1 = 1;
+		t1 = 1;
 	}
 
 	/* Whole. */
-	n = malloc_snprintf(&buf[i], buflen-i, "%"FMTu64, t0 / t1);
+	n = malloc_snprintf(&buf[i], buflen - i, "%" FMTu64, t0 / t1);
 	i += n;
 	if (i >= buflen) {
 		return;
@@ -51,15 +51,17 @@ timer_ratio(timedelta_t *a, timedelta_t *b, char *buf, size_t buflen) {
 	}
 
 	/* Decimal. */
-	n = malloc_snprintf(&buf[i], buflen-i, ".");
+	n = malloc_snprintf(&buf[i], buflen - i, ".");
 	i += n;
 
 	/* Fraction. */
-	while (i < buflen-1) {
-		uint64_t round = (i+1 == buflen-1 && ((t0 * mult * 10 / t1) % 10
-		    >= 5)) ? 1 : 0;
-		n = malloc_snprintf(&buf[i], buflen-i,
-		    "%"FMTu64, (t0 * mult / t1) % 10 + round);
+	while (i < buflen - 1) {
+		uint64_t round = (i + 1 == buflen - 1
+		                     && ((t0 * mult * 10 / t1) % 10 >= 5))
+		    ? 1
+		    : 0;
+		n = malloc_snprintf(&buf[i], buflen - i, "%" FMTu64,
+		    (t0 * mult / t1) % 10 + round);
 		i += n;
 		mult *= 10;
 	}
diff --git a/test/stress/batch_alloc.c b/test/stress/batch_alloc.c
index 6b973bb1..46ed0bf7 100644
--- a/test/stress/batch_alloc.c
+++ b/test/stress/batch_alloc.c
@@ -10,9 +10,9 @@ static size_t miblen = MIBLEN;
 #define HUGE_BATCH (1000 * 1000)
 #define HUGE_BATCH_ITER 100
 #define LEN (100 * 1000 * 1000)
-static void *batch_ptrs[LEN];
+static void  *batch_ptrs[LEN];
 static size_t batch_ptrs_next = 0;
-static void *item_ptrs[LEN];
+static void  *item_ptrs[LEN];
 static size_t item_ptrs_next = 0;
 
 #define SIZE 7
@@ -22,17 +22,18 @@ struct batch_alloc_packet_s {
 	void **ptrs;
 	size_t num;
 	size_t size;
-	int flags;
+	int    flags;
 };
 
 static void
 batch_alloc_wrapper(size_t batch) {
-	batch_alloc_packet_t batch_alloc_packet =
-	    {batch_ptrs + batch_ptrs_next, batch, SIZE, 0};
+	batch_alloc_packet_t batch_alloc_packet = {
+	    batch_ptrs + batch_ptrs_next, batch, SIZE, 0};
 	size_t filled;
 	size_t len = sizeof(size_t);
 	assert_d_eq(mallctlbymib(mib, miblen, &filled, &len,
-	    &batch_alloc_packet, sizeof(batch_alloc_packet)), 0, "");
+	                &batch_alloc_packet, sizeof(batch_alloc_packet)),
+	    0, "");
 	assert_zu_eq(filled, batch, "");
 }
 
@@ -94,9 +95,9 @@ compare_without_free(size_t batch, size_t iter,
 	batch_ptrs_next = 0;
 	release_and_clear(item_ptrs, item_ptrs_next);
 	item_ptrs_next = 0;
-	compare_funcs(0, iter,
-	    "batch allocation", batch_alloc_without_free_func,
-	    "item allocation", item_alloc_without_free_func);
+	compare_funcs(0, iter, "batch allocation",
+	    batch_alloc_without_free_func, "item allocation",
+	    item_alloc_without_free_func);
 	release_and_clear(batch_ptrs, batch_ptrs_next);
 	batch_ptrs_next = 0;
 	release_and_clear(item_ptrs, item_ptrs_next);
@@ -116,8 +117,7 @@ compare_with_free(size_t batch, size_t iter,
 	}
 	batch_ptrs_next = 0;
 	item_ptrs_next = 0;
-	compare_funcs(0, iter,
-	    "batch allocation", batch_alloc_with_free_func,
+	compare_funcs(0, iter, "batch allocation", batch_alloc_with_free_func,
 	    "item allocation", item_alloc_with_free_func);
 	batch_ptrs_next = 0;
 	item_ptrs_next = 0;
@@ -187,12 +187,11 @@ TEST_BEGIN(test_huge_batch_with_free) {
 }
 TEST_END
 
-int main(void) {
-	assert_d_eq(mallctlnametomib("experimental.batch_alloc", mib, &miblen),
-	    0, "");
-	return test_no_reentrancy(
-	    test_tiny_batch_without_free,
-	    test_tiny_batch_with_free,
-	    test_huge_batch_without_free,
+int
+main(void) {
+	assert_d_eq(
+	    mallctlnametomib("experimental.batch_alloc", mib, &miblen), 0, "");
+	return test_no_reentrancy(test_tiny_batch_without_free,
+	    test_tiny_batch_with_free, test_huge_batch_without_free,
 	    test_huge_batch_with_free);
 }
diff --git a/test/stress/cpp/microbench.cpp b/test/stress/cpp/microbench.cpp
index 7422d1ca..0c4697a6 100644
--- a/test/stress/cpp/microbench.cpp
+++ b/test/stress/cpp/microbench.cpp
@@ -3,7 +3,7 @@
 
 static void
 malloc_free(void) {
-	void* p = malloc(1);
+	void *p = malloc(1);
 	expect_ptr_not_null((void *)p, "Unexpected malloc failure");
 	p = no_opt_ptr(p);
 	free((void *)p);
@@ -11,7 +11,7 @@ malloc_free(void) {
 
 static void
 new_delete(void) {
-	void* p = ::operator new(1);
+	void *p = ::operator new(1);
 	expect_ptr_not_null((void *)p, "Unexpected new failure");
 	p = no_opt_ptr(p);
 	::operator delete((void *)p);
@@ -19,7 +19,7 @@ new_delete(void) {
 
 static void
 malloc_free_array(void) {
-	void* p = malloc(sizeof(int)*8);
+	void *p = malloc(sizeof(int) * 8);
 	expect_ptr_not_null((void *)p, "Unexpected malloc failure");
 	p = no_opt_ptr(p);
 	free((void *)p);
@@ -27,7 +27,7 @@ malloc_free_array(void) {
 
 static void
 new_delete_array(void) {
-	int* p = new int[8];
+	int *p = new int[8];
 	expect_ptr_not_null((void *)p, "Unexpected new[] failure");
 	p = (int *)no_opt_ptr((void *)p);
 	delete[] (int *)p;
@@ -36,7 +36,7 @@ new_delete_array(void) {
 #if __cpp_sized_deallocation >= 201309
 static void
 new_sized_delete(void) {
-	void* p = ::operator new(1);
+	void *p = ::operator new(1);
 	expect_ptr_not_null((void *)p, "Unexpected new failure");
 	p = no_opt_ptr(p);
 	::operator delete((void *)p, 1);
@@ -44,45 +44,41 @@ new_sized_delete(void) {
 
 static void
 malloc_sdallocx(void) {
-	void* p = malloc(1);
+	void *p = malloc(1);
 	expect_ptr_not_null((void *)p, "Unexpected malloc failure");
 	p = no_opt_ptr(p);
-        sdallocx((void *)p, 1, 0);
+	sdallocx((void *)p, 1, 0);
 }
 #endif
 
 TEST_BEGIN(test_free_vs_delete) {
-	compare_funcs(10*1000*1000, 100*1000*1000,
-	    "malloc_free", (void *)malloc_free,
-	    "new_delete", (void *)new_delete);
+	compare_funcs(10 * 1000 * 1000, 100 * 1000 * 1000, "malloc_free",
+	    (void *)malloc_free, "new_delete", (void *)new_delete);
 }
 TEST_END
 
 TEST_BEGIN(test_free_array_vs_delete_array) {
-	compare_funcs(10*1000*1000, 100*1000*1000,
-	    "malloc_free_array", (void *)malloc_free_array,
-	    "delete_array", (void *)new_delete_array);
+	compare_funcs(10 * 1000 * 1000, 100 * 1000 * 1000, "malloc_free_array",
+	    (void *)malloc_free_array, "delete_array",
+	    (void *)new_delete_array);
 }
 TEST_END
 
-
 TEST_BEGIN(test_sized_delete_vs_sdallocx) {
 #if __cpp_sized_deallocation >= 201309
-	compare_funcs(10*1000*1000, 100*1000*1000,
-	    "new_size_delete", (void *)new_sized_delete,
-	    "malloc_sdallocx", (void *)malloc_sdallocx);
+	compare_funcs(10 * 1000 * 1000, 100 * 1000 * 1000, "new_size_delete",
+	    (void *)new_sized_delete, "malloc_sdallocx",
+	    (void *)malloc_sdallocx);
 #else
-	malloc_printf("Skipping test_sized_delete_vs_sdallocx since \
+	malloc_printf(
+	    "Skipping test_sized_delete_vs_sdallocx since \
 	    sized deallocation is not enabled.\n");
 #endif
 }
 TEST_END
 
-
 int
 main() {
-	return test_no_reentrancy(
-	    test_free_vs_delete,
-	    test_free_array_vs_delete_array,
-	    test_sized_delete_vs_sdallocx);
+	return test_no_reentrancy(test_free_vs_delete,
+	    test_free_array_vs_delete_array, test_sized_delete_vs_sdallocx);
 }
diff --git a/test/stress/fill_flush.c b/test/stress/fill_flush.c
index 546bcc0b..c7b13404 100644
--- a/test/stress/fill_flush.c
+++ b/test/stress/fill_flush.c
@@ -35,9 +35,9 @@ item_alloc_dalloc_small(void) {
 }
 
 TEST_BEGIN(test_array_vs_item_small) {
-	compare_funcs(1 * 1000, 10 * 1000,
-	    "array of small allocations", array_alloc_dalloc_small,
-	    "small item allocation", item_alloc_dalloc_small);
+	compare_funcs(1 * 1000, 10 * 1000, "array of small allocations",
+	    array_alloc_dalloc_small, "small item allocation",
+	    item_alloc_dalloc_small);
 }
 TEST_END
 
@@ -64,14 +64,14 @@ item_alloc_dalloc_large(void) {
 }
 
 TEST_BEGIN(test_array_vs_item_large) {
-	compare_funcs(100, 1000,
-	    "array of large allocations", array_alloc_dalloc_large,
-	    "large item allocation", item_alloc_dalloc_large);
+	compare_funcs(100, 1000, "array of large allocations",
+	    array_alloc_dalloc_large, "large item allocation",
+	    item_alloc_dalloc_large);
 }
 TEST_END
 
-int main(void) {
+int
+main(void) {
 	return test_no_reentrancy(
-	    test_array_vs_item_small,
-	    test_array_vs_item_large);
+	    test_array_vs_item_small, test_array_vs_item_large);
 }
diff --git a/test/stress/hookbench.c b/test/stress/hookbench.c
index 97e90b0e..455e4c56 100644
--- a/test/stress/hookbench.c
+++ b/test/stress/hookbench.c
@@ -2,19 +2,16 @@
 
 static void
 noop_alloc_hook(void *extra, hook_alloc_t type, void *result,
-    uintptr_t result_raw, uintptr_t args_raw[3]) {
-}
+    uintptr_t result_raw, uintptr_t args_raw[3]) {}
 
 static void
-noop_dalloc_hook(void *extra, hook_dalloc_t type, void *address,
-    uintptr_t args_raw[3]) {
-}
+noop_dalloc_hook(
+    void *extra, hook_dalloc_t type, void *address, uintptr_t args_raw[3]) {}
 
 static void
 noop_expand_hook(void *extra, hook_expand_t type, void *address,
     size_t old_usize, size_t new_usize, uintptr_t result_raw,
-    uintptr_t args_raw[4]) {
-}
+    uintptr_t args_raw[4]) {}
 
 static void
 malloc_free_loop(int iters) {
@@ -26,23 +23,23 @@ malloc_free_loop(int iters) {
 
 static void
 test_hooked(int iters) {
-	hooks_t hooks = {&noop_alloc_hook, &noop_dalloc_hook, &noop_expand_hook,
-		NULL};
+	hooks_t hooks = {
+	    &noop_alloc_hook, &noop_dalloc_hook, &noop_expand_hook, NULL};
 
-	int err;
-	void *handles[HOOK_MAX];
+	int    err;
+	void  *handles[HOOK_MAX];
 	size_t sz = sizeof(handles[0]);
 
 	for (int i = 0; i < HOOK_MAX; i++) {
-		err = mallctl("experimental.hooks.install", &handles[i],
-		    &sz, &hooks, sizeof(hooks));
+		err = mallctl("experimental.hooks.install", &handles[i], &sz,
+		    &hooks, sizeof(hooks));
 		assert(err == 0);
 
 		timedelta_t timer;
 		timer_start(&timer);
 		malloc_free_loop(iters);
 		timer_stop(&timer);
-		malloc_printf("With %d hook%s: %"FMTu64"us\n", i + 1,
+		malloc_printf("With %d hook%s: %" FMTu64 "us\n", i + 1,
 		    i + 1 == 1 ? "" : "s", timer_usec(&timer));
 	}
 	for (int i = 0; i < HOOK_MAX; i++) {
@@ -59,7 +56,7 @@ test_unhooked(int iters) {
 	malloc_free_loop(iters);
 	timer_stop(&timer);
 
-	malloc_printf("Without hooks: %"FMTu64"us\n", timer_usec(&timer));
+	malloc_printf("Without hooks: %" FMTu64 "us\n", timer_usec(&timer));
 }
 
 int
diff --git a/test/stress/large_microbench.c b/test/stress/large_microbench.c
index 44a60c53..785ed836 100644
--- a/test/stress/large_microbench.c
+++ b/test/stress/large_microbench.c
@@ -22,14 +22,12 @@ small_mallocx_free(void) {
 }
 
 TEST_BEGIN(test_large_vs_small) {
-	compare_funcs(100*1000, 1*1000*1000, "large mallocx",
+	compare_funcs(100 * 1000, 1 * 1000 * 1000, "large mallocx",
 	    large_mallocx_free, "small mallocx", small_mallocx_free);
 }
 TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_large_vs_small);
+	return test_no_reentrancy(test_large_vs_small);
 }
-
diff --git a/test/stress/mallctl.c b/test/stress/mallctl.c
index d29b3118..b4c0f560 100644
--- a/test/stress/mallctl.c
+++ b/test/stress/mallctl.c
@@ -4,8 +4,8 @@
 static void
 mallctl_short(void) {
 	const char *version;
-	size_t sz = sizeof(version);
-	int err = mallctl("version", &version, &sz, NULL, 0);
+	size_t      sz = sizeof(version);
+	int         err = mallctl("version", &version, &sz, NULL, 0);
 	assert_d_eq(err, 0, "mallctl failure");
 }
 
@@ -13,19 +13,19 @@ size_t mib_short[1];
 
 static void
 mallctlbymib_short(void) {
-	size_t miblen = sizeof(mib_short)/sizeof(mib_short[0]);
+	size_t      miblen = sizeof(mib_short) / sizeof(mib_short[0]);
 	const char *version;
-	size_t sz = sizeof(version);
+	size_t      sz = sizeof(version);
 	int err = mallctlbymib(mib_short, miblen, &version, &sz, NULL, 0);
 	assert_d_eq(err, 0, "mallctlbymib failure");
 }
 
 TEST_BEGIN(test_mallctl_vs_mallctlbymib_short) {
-	size_t miblen = sizeof(mib_short)/sizeof(mib_short[0]);
+	size_t miblen = sizeof(mib_short) / sizeof(mib_short[0]);
 
 	int err = mallctlnametomib("version", mib_short, &miblen);
 	assert_d_eq(err, 0, "mallctlnametomib failure");
-	compare_funcs(10*1000*1000, 10*1000*1000, "mallctl_short",
+	compare_funcs(10 * 1000 * 1000, 10 * 1000 * 1000, "mallctl_short",
 	    mallctl_short, "mallctlbymib_short", mallctlbymib_short);
 }
 TEST_END
@@ -33,9 +33,9 @@ TEST_END
 static void
 mallctl_long(void) {
 	uint64_t nmalloc;
-	size_t sz = sizeof(nmalloc);
-	int err = mallctl("stats.arenas.0.bins.0.nmalloc", &nmalloc, &sz, NULL,
-	    0);
+	size_t   sz = sizeof(nmalloc);
+	int      err = mallctl(
+            "stats.arenas.0.bins.0.nmalloc", &nmalloc, &sz, NULL, 0);
 	assert_d_eq(err, 0, "mallctl failure");
 }
 
@@ -43,10 +43,10 @@ size_t mib_long[6];
 
 static void
 mallctlbymib_long(void) {
-	size_t miblen = sizeof(mib_long)/sizeof(mib_long[0]);
+	size_t   miblen = sizeof(mib_long) / sizeof(mib_long[0]);
 	uint64_t nmalloc;
-	size_t sz = sizeof(nmalloc);
-	int err = mallctlbymib(mib_long, miblen, &nmalloc, &sz, NULL, 0);
+	size_t   sz = sizeof(nmalloc);
+	int      err = mallctlbymib(mib_long, miblen, &nmalloc, &sz, NULL, 0);
 	assert_d_eq(err, 0, "mallctlbymib failure");
 }
 
@@ -57,18 +57,17 @@ TEST_BEGIN(test_mallctl_vs_mallctlbymib_long) {
 	 */
 	test_skip_if(!config_stats);
 
-	size_t miblen = sizeof(mib_long)/sizeof(mib_long[0]);
-	int err = mallctlnametomib("stats.arenas.0.bins.0.nmalloc", mib_long,
-	    &miblen);
+	size_t miblen = sizeof(mib_long) / sizeof(mib_long[0]);
+	int    err = mallctlnametomib(
+            "stats.arenas.0.bins.0.nmalloc", mib_long, &miblen);
 	assert_d_eq(err, 0, "mallctlnametomib failure");
-	compare_funcs(10*1000*1000, 10*1000*1000, "mallctl_long",
+	compare_funcs(10 * 1000 * 1000, 10 * 1000 * 1000, "mallctl_long",
 	    mallctl_long, "mallctlbymib_long", mallctlbymib_long);
 }
 TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_mallctl_vs_mallctlbymib_short,
+	return test_no_reentrancy(test_mallctl_vs_mallctlbymib_short,
 	    test_mallctl_vs_mallctlbymib_long);
 }
diff --git a/test/stress/microbench.c b/test/stress/microbench.c
index 89479b7e..3d261a92 100644
--- a/test/stress/microbench.c
+++ b/test/stress/microbench.c
@@ -25,7 +25,7 @@ mallocx_free(void) {
 }
 
 TEST_BEGIN(test_malloc_vs_mallocx) {
-	compare_funcs(10*1000*1000, 100*1000*1000, "malloc",
+	compare_funcs(10 * 1000 * 1000, 100 * 1000 * 1000, "malloc",
 	    malloc_free, "mallocx", mallocx_free);
 }
 TEST_END
@@ -53,14 +53,14 @@ malloc_sdallocx(void) {
 }
 
 TEST_BEGIN(test_free_vs_dallocx) {
-	compare_funcs(10*1000*1000, 100*1000*1000, "free", malloc_free,
+	compare_funcs(10 * 1000 * 1000, 100 * 1000 * 1000, "free", malloc_free,
 	    "dallocx", malloc_dallocx);
 }
 TEST_END
 
 TEST_BEGIN(test_dallocx_vs_sdallocx) {
-	compare_funcs(10*1000*1000, 100*1000*1000, "dallocx", malloc_dallocx,
-	    "sdallocx", malloc_sdallocx);
+	compare_funcs(10 * 1000 * 1000, 100 * 1000 * 1000, "dallocx",
+	    malloc_dallocx, "sdallocx", malloc_sdallocx);
 }
 TEST_END
 
@@ -94,7 +94,7 @@ malloc_sallocx_free(void) {
 }
 
 TEST_BEGIN(test_mus_vs_sallocx) {
-	compare_funcs(10*1000*1000, 100*1000*1000, "malloc_usable_size",
+	compare_funcs(10 * 1000 * 1000, 100 * 1000 * 1000, "malloc_usable_size",
 	    malloc_mus_free, "sallocx", malloc_sallocx_free);
 }
 TEST_END
@@ -116,17 +116,14 @@ malloc_nallocx_free(void) {
 }
 
 TEST_BEGIN(test_sallocx_vs_nallocx) {
-	compare_funcs(10*1000*1000, 100*1000*1000, "sallocx",
+	compare_funcs(10 * 1000 * 1000, 100 * 1000 * 1000, "sallocx",
 	    malloc_sallocx_free, "nallocx", malloc_nallocx_free);
 }
 TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_malloc_vs_mallocx,
-	    test_free_vs_dallocx,
-	    test_dallocx_vs_sdallocx,
-	    test_mus_vs_sallocx,
+	return test_no_reentrancy(test_malloc_vs_mallocx, test_free_vs_dallocx,
+	    test_dallocx_vs_sdallocx, test_mus_vs_sallocx,
 	    test_sallocx_vs_nallocx);
 }
diff --git a/test/unit/SFMT.c b/test/unit/SFMT.c
index b9f85dd9..8dbb61ed 100644
--- a/test/unit/SFMT.c
+++ b/test/unit/SFMT.c
@@ -40,1424 +40,1343 @@
 #define COUNT_1 1000
 #define COUNT_2 700
 
-static const uint32_t init_gen_rand_32_expected[] = {
-	3440181298U, 1564997079U, 1510669302U, 2930277156U, 1452439940U,
-	3796268453U,  423124208U, 2143818589U, 3827219408U, 2987036003U,
-	2674978610U, 1536842514U, 2027035537U, 2534897563U, 1686527725U,
-	 545368292U, 1489013321U, 1370534252U, 4231012796U, 3994803019U,
-	1764869045U,  824597505U,  862581900U, 2469764249U,  812862514U,
-	 359318673U,  116957936U, 3367389672U, 2327178354U, 1898245200U,
-	3206507879U, 2378925033U, 1040214787U, 2524778605U, 3088428700U,
-	1417665896U,  964324147U, 2282797708U, 2456269299U,  313400376U,
-	2245093271U, 1015729427U, 2694465011U, 3246975184U, 1992793635U,
-	 463679346U, 3721104591U, 3475064196U,  856141236U, 1499559719U,
-	3522818941U, 3721533109U, 1954826617U, 1282044024U, 1543279136U,
-	1301863085U, 2669145051U, 4221477354U, 3896016841U, 3392740262U,
-	 462466863U, 1037679449U, 1228140306U,  922298197U, 1205109853U,
-	1872938061U, 3102547608U, 2742766808U, 1888626088U, 4028039414U,
-	 157593879U, 1136901695U, 4038377686U, 3572517236U, 4231706728U,
-	2997311961U, 1189931652U, 3981543765U, 2826166703U,   87159245U,
-	1721379072U, 3897926942U, 1790395498U, 2569178939U, 1047368729U,
-	2340259131U, 3144212906U, 2301169789U, 2442885464U, 3034046771U,
-	3667880593U, 3935928400U, 2372805237U, 1666397115U, 2460584504U,
-	 513866770U, 3810869743U, 2147400037U, 2792078025U, 2941761810U,
-	3212265810U,  984692259U,  346590253U, 1804179199U, 3298543443U,
-	 750108141U, 2880257022U,  243310542U, 1869036465U, 1588062513U,
-	2983949551U, 1931450364U, 4034505847U, 2735030199U, 1628461061U,
-	2539522841U,  127965585U, 3992448871U,  913388237U,  559130076U,
-	1202933193U, 4087643167U, 2590021067U, 2256240196U, 1746697293U,
-	1013913783U, 1155864921U, 2715773730U,  915061862U, 1948766573U,
-	2322882854U, 3761119102U, 1343405684U, 3078711943U, 3067431651U,
-	3245156316U, 3588354584U, 3484623306U, 3899621563U, 4156689741U,
-	3237090058U, 3880063844U,  862416318U, 4039923869U, 2303788317U,
-	3073590536U,  701653667U, 2131530884U, 3169309950U, 2028486980U,
-	 747196777U, 3620218225U,  432016035U, 1449580595U, 2772266392U,
-	 444224948U, 1662832057U, 3184055582U, 3028331792U, 1861686254U,
-	1104864179U,  342430307U, 1350510923U, 3024656237U, 1028417492U,
-	2870772950U,  290847558U, 3675663500U,  508431529U, 4264340390U,
-	2263569913U, 1669302976U,  519511383U, 2706411211U, 3764615828U,
-	3883162495U, 4051445305U, 2412729798U, 3299405164U, 3991911166U,
-	2348767304U, 2664054906U, 3763609282U,  593943581U, 3757090046U,
-	2075338894U, 2020550814U, 4287452920U, 4290140003U, 1422957317U,
-	2512716667U, 2003485045U, 2307520103U, 2288472169U, 3940751663U,
-	4204638664U, 2892583423U, 1710068300U, 3904755993U, 2363243951U,
-	3038334120U,  547099465U,  771105860U, 3199983734U, 4282046461U,
-	2298388363U,  934810218U, 2837827901U, 3952500708U, 2095130248U,
-	3083335297U,   26885281U, 3932155283U, 1531751116U, 1425227133U,
-	 495654159U, 3279634176U, 3855562207U, 3957195338U, 4159985527U,
-	 893375062U, 1875515536U, 1327247422U, 3754140693U, 1028923197U,
-	1729880440U,  805571298U,  448971099U, 2726757106U, 2749436461U,
-	2485987104U,  175337042U, 3235477922U, 3882114302U, 2020970972U,
-	 943926109U, 2762587195U, 1904195558U, 3452650564U,  108432281U,
-	3893463573U, 3977583081U, 2636504348U, 1110673525U, 3548479841U,
-	4258854744U,  980047703U, 4057175418U, 3890008292U,  145653646U,
-	3141868989U, 3293216228U, 1194331837U, 1254570642U, 3049934521U,
-	2868313360U, 2886032750U, 1110873820U,  279553524U, 3007258565U,
-	1104807822U, 3186961098U,  315764646U, 2163680838U, 3574508994U,
-	3099755655U,  191957684U, 3642656737U, 3317946149U, 3522087636U,
-	 444526410U,  779157624U, 1088229627U, 1092460223U, 1856013765U,
-	3659877367U,  368270451U,  503570716U, 3000984671U, 2742789647U,
-	 928097709U, 2914109539U,  308843566U, 2816161253U, 3667192079U,
-	2762679057U, 3395240989U, 2928925038U, 1491465914U, 3458702834U,
-	3787782576U, 2894104823U, 1296880455U, 1253636503U,  989959407U,
-	2291560361U, 2776790436U, 1913178042U, 1584677829U,  689637520U,
-	1898406878U,  688391508U, 3385234998U,  845493284U, 1943591856U,
-	2720472050U,  222695101U, 1653320868U, 2904632120U, 4084936008U,
-	1080720688U, 3938032556U,  387896427U, 2650839632U,   99042991U,
-	1720913794U, 1047186003U, 1877048040U, 2090457659U,  517087501U,
-	4172014665U, 2129713163U, 2413533132U, 2760285054U, 4129272496U,
-	1317737175U, 2309566414U, 2228873332U, 3889671280U, 1110864630U,
-	3576797776U, 2074552772U,  832002644U, 3097122623U, 2464859298U,
-	2679603822U, 1667489885U, 3237652716U, 1478413938U, 1719340335U,
-	2306631119U,  639727358U, 3369698270U,  226902796U, 2099920751U,
-	1892289957U, 2201594097U, 3508197013U, 3495811856U, 3900381493U,
-	 841660320U, 3974501451U, 3360949056U, 1676829340U,  728899254U,
-	2047809627U, 2390948962U,  670165943U, 3412951831U, 4189320049U,
-	1911595255U, 2055363086U,  507170575U,  418219594U, 4141495280U,
-	2692088692U, 4203630654U, 3540093932U,  791986533U, 2237921051U,
-	2526864324U, 2956616642U, 1394958700U, 1983768223U, 1893373266U,
-	 591653646U,  228432437U, 1611046598U, 3007736357U, 1040040725U,
-	2726180733U, 2789804360U, 4263568405U,  829098158U, 3847722805U,
-	1123578029U, 1804276347U,  997971319U, 4203797076U, 4185199713U,
-	2811733626U, 2343642194U, 2985262313U, 1417930827U, 3759587724U,
-	1967077982U, 1585223204U, 1097475516U, 1903944948U,  740382444U,
-	1114142065U, 1541796065U, 1718384172U, 1544076191U, 1134682254U,
-	3519754455U, 2866243923U,  341865437U,  645498576U, 2690735853U,
-	1046963033U, 2493178460U, 1187604696U, 1619577821U,  488503634U,
-	3255768161U, 2306666149U, 1630514044U, 2377698367U, 2751503746U,
-	3794467088U, 1796415981U, 3657173746U,  409136296U, 1387122342U,
-	1297726519U,  219544855U, 4270285558U,  437578827U, 1444698679U,
-	2258519491U,  963109892U, 3982244073U, 3351535275U,  385328496U,
-	1804784013U,  698059346U, 3920535147U,  708331212U,  784338163U,
-	 785678147U, 1238376158U, 1557298846U, 2037809321U,  271576218U,
-	4145155269U, 1913481602U, 2763691931U,  588981080U, 1201098051U,
-	3717640232U, 1509206239U,  662536967U, 3180523616U, 1133105435U,
-	2963500837U, 2253971215U, 3153642623U, 1066925709U, 2582781958U,
-	3034720222U, 1090798544U, 2942170004U, 4036187520U,  686972531U,
-	2610990302U, 2641437026U, 1837562420U,  722096247U, 1315333033U,
-	2102231203U, 3402389208U, 3403698140U, 1312402831U, 2898426558U,
-	 814384596U,  385649582U, 1916643285U, 1924625106U, 2512905582U,
-	2501170304U, 4275223366U, 2841225246U, 1467663688U, 3563567847U,
-	2969208552U,  884750901U,  102992576U,  227844301U, 3681442994U,
-	3502881894U, 4034693299U, 1166727018U, 1697460687U, 1737778332U,
-	1787161139U, 1053003655U, 1215024478U, 2791616766U, 2525841204U,
-	1629323443U,    3233815U, 2003823032U, 3083834263U, 2379264872U,
-	3752392312U, 1287475550U, 3770904171U, 3004244617U, 1502117784U,
-	 918698423U, 2419857538U, 3864502062U, 1751322107U, 2188775056U,
-	4018728324U,  983712955U,  440071928U, 3710838677U, 2001027698U,
-	3994702151U,   22493119U, 3584400918U, 3446253670U, 4254789085U,
-	1405447860U, 1240245579U, 1800644159U, 1661363424U, 3278326132U,
-	3403623451U,   67092802U, 2609352193U, 3914150340U, 1814842761U,
-	3610830847U,  591531412U, 3880232807U, 1673505890U, 2585326991U,
-	1678544474U, 3148435887U, 3457217359U, 1193226330U, 2816576908U,
-	 154025329U,  121678860U, 1164915738U,  973873761U,  269116100U,
-	  52087970U,  744015362U,  498556057U,   94298882U, 1563271621U,
-	2383059628U, 4197367290U, 3958472990U, 2592083636U, 2906408439U,
-	1097742433U, 3924840517U,  264557272U, 2292287003U, 3203307984U,
-	4047038857U, 3820609705U, 2333416067U, 1839206046U, 3600944252U,
-	3412254904U,  583538222U, 2390557166U, 4140459427U, 2810357445U,
-	 226777499U, 2496151295U, 2207301712U, 3283683112U,  611630281U,
-	1933218215U, 3315610954U, 3889441987U, 3719454256U, 3957190521U,
-	1313998161U, 2365383016U, 3146941060U, 1801206260U,  796124080U,
-	2076248581U, 1747472464U, 3254365145U,  595543130U, 3573909503U,
-	3758250204U, 2020768540U, 2439254210U,   93368951U, 3155792250U,
-	2600232980U, 3709198295U, 3894900440U, 2971850836U, 1578909644U,
-	1443493395U, 2581621665U, 3086506297U, 2443465861U,  558107211U,
-	1519367835U,  249149686U,  908102264U, 2588765675U, 1232743965U,
-	1001330373U, 3561331654U, 2259301289U, 1564977624U, 3835077093U,
-	 727244906U, 4255738067U, 1214133513U, 2570786021U, 3899704621U,
-	1633861986U, 1636979509U, 1438500431U,   58463278U, 2823485629U,
-	2297430187U, 2926781924U, 3371352948U, 1864009023U, 2722267973U,
-	1444292075U,  437703973U, 1060414512U,  189705863U,  910018135U,
-	4077357964U,  884213423U, 2644986052U, 3973488374U, 1187906116U,
-	2331207875U,  780463700U, 3713351662U, 3854611290U,  412805574U,
-	2978462572U, 2176222820U,  829424696U, 2790788332U, 2750819108U,
-	1594611657U, 3899878394U, 3032870364U, 1702887682U, 1948167778U,
-	  14130042U,  192292500U,  947227076U,   90719497U, 3854230320U,
-	 784028434U, 2142399787U, 1563449646U, 2844400217U,  819143172U,
-	2883302356U, 2328055304U, 1328532246U, 2603885363U, 3375188924U,
-	 933941291U, 3627039714U, 2129697284U, 2167253953U, 2506905438U,
-	1412424497U, 2981395985U, 1418359660U, 2925902456U,   52752784U,
-	3713667988U, 3924669405U,  648975707U, 1145520213U, 4018650664U,
-	3805915440U, 2380542088U, 2013260958U, 3262572197U, 2465078101U,
-	1114540067U, 3728768081U, 2396958768U,  590672271U,  904818725U,
-	4263660715U,  700754408U, 1042601829U, 4094111823U, 4274838909U,
-	2512692617U, 2774300207U, 2057306915U, 3470942453U,   99333088U,
-	1142661026U, 2889931380U,   14316674U, 2201179167U,  415289459U,
-	 448265759U, 3515142743U, 3254903683U,  246633281U, 1184307224U,
-	2418347830U, 2092967314U, 2682072314U, 2558750234U, 2000352263U,
-	1544150531U,  399010405U, 1513946097U,  499682937U,  461167460U,
-	3045570638U, 1633669705U,  851492362U, 4052801922U, 2055266765U,
-	 635556996U,  368266356U, 2385737383U, 3218202352U, 2603772408U,
-	 349178792U,  226482567U, 3102426060U, 3575998268U, 2103001871U,
-	3243137071U,  225500688U, 1634718593U, 4283311431U, 4292122923U,
-	3842802787U,  811735523U,  105712518U,  663434053U, 1855889273U,
-	2847972595U, 1196355421U, 2552150115U, 4254510614U, 3752181265U,
-	3430721819U, 3828705396U, 3436287905U, 3441964937U, 4123670631U,
-	 353001539U,  459496439U, 3799690868U, 1293777660U, 2761079737U,
-	 498096339U, 3398433374U, 4080378380U, 2304691596U, 2995729055U,
-	4134660419U, 3903444024U, 3576494993U,  203682175U, 3321164857U,
-	2747963611U,   79749085U, 2992890370U, 1240278549U, 1772175713U,
-	2111331972U, 2655023449U, 1683896345U, 2836027212U, 3482868021U,
-	2489884874U,  756853961U, 2298874501U, 4013448667U, 4143996022U,
-	2948306858U, 4132920035U, 1283299272U,  995592228U, 3450508595U,
-	1027845759U, 1766942720U, 3861411826U, 1446861231U,   95974993U,
-	3502263554U, 1487532194U,  601502472U, 4129619129U,  250131773U,
-	2050079547U, 3198903947U, 3105589778U, 4066481316U, 3026383978U,
-	2276901713U,  365637751U, 2260718426U, 1394775634U, 1791172338U,
-	2690503163U, 2952737846U, 1568710462U,  732623190U, 2980358000U,
-	1053631832U, 1432426951U, 3229149635U, 1854113985U, 3719733532U,
-	3204031934U,  735775531U,  107468620U, 3734611984U,  631009402U,
-	3083622457U, 4109580626U,  159373458U, 1301970201U, 4132389302U,
-	1293255004U,  847182752U, 4170022737U,   96712900U, 2641406755U,
-	1381727755U,  405608287U, 4287919625U, 1703554290U, 3589580244U,
-	2911403488U,    2166565U, 2647306451U, 2330535117U, 1200815358U,
-	1165916754U,  245060911U, 4040679071U, 3684908771U, 2452834126U,
-	2486872773U, 2318678365U, 2940627908U, 1837837240U, 3447897409U,
-	4270484676U, 1495388728U, 3754288477U, 4204167884U, 1386977705U,
-	2692224733U, 3076249689U, 4109568048U, 4170955115U, 4167531356U,
-	4020189950U, 4261855038U, 3036907575U, 3410399885U, 3076395737U,
-	1046178638U,  144496770U,  230725846U, 3349637149U,   17065717U,
-	2809932048U, 2054581785U, 3608424964U, 3259628808U,  134897388U,
-	3743067463U,  257685904U, 3795656590U, 1562468719U, 3589103904U,
-	3120404710U,  254684547U, 2653661580U, 3663904795U, 2631942758U,
-	1063234347U, 2609732900U, 2332080715U, 3521125233U, 1180599599U,
-	1935868586U, 4110970440U,  296706371U, 2128666368U, 1319875791U,
-	1570900197U, 3096025483U, 1799882517U, 1928302007U, 1163707758U,
-	1244491489U, 3533770203U,  567496053U, 2757924305U, 2781639343U,
-	2818420107U,  560404889U, 2619609724U, 4176035430U, 2511289753U,
-	2521842019U, 3910553502U, 2926149387U, 3302078172U, 4237118867U,
-	 330725126U,  367400677U,  888239854U,  545570454U, 4259590525U,
-	 134343617U, 1102169784U, 1647463719U, 3260979784U, 1518840883U,
-	3631537963U, 3342671457U, 1301549147U, 2083739356U,  146593792U,
-	3217959080U,  652755743U, 2032187193U, 3898758414U, 1021358093U,
-	4037409230U, 2176407931U, 3427391950U, 2883553603U,  985613827U,
-	3105265092U, 3423168427U, 3387507672U,  467170288U, 2141266163U,
-	3723870208U,  916410914U, 1293987799U, 2652584950U,  769160137U,
-	3205292896U, 1561287359U, 1684510084U, 3136055621U, 3765171391U,
-	 639683232U, 2639569327U, 1218546948U, 4263586685U, 3058215773U,
-	2352279820U,  401870217U, 2625822463U, 1529125296U, 2981801895U,
-	1191285226U, 4027725437U, 3432700217U, 4098835661U,  971182783U,
-	2443861173U, 3881457123U, 3874386651U,  457276199U, 2638294160U,
-	4002809368U,  421169044U, 1112642589U, 3076213779U, 3387033971U,
-	2499610950U, 3057240914U, 1662679783U,  461224431U, 1168395933U
-};
-static const uint32_t init_by_array_32_expected[] = {
-	2920711183U, 3885745737U, 3501893680U,  856470934U, 1421864068U,
-	 277361036U, 1518638004U, 2328404353U, 3355513634U,   64329189U,
-	1624587673U, 3508467182U, 2481792141U, 3706480799U, 1925859037U,
-	2913275699U,  882658412U,  384641219U,  422202002U, 1873384891U,
-	2006084383U, 3924929912U, 1636718106U, 3108838742U, 1245465724U,
-	4195470535U,  779207191U, 1577721373U, 1390469554U, 2928648150U,
-	 121399709U, 3170839019U, 4044347501U,  953953814U, 3821710850U,
-	3085591323U, 3666535579U, 3577837737U, 2012008410U, 3565417471U,
-	4044408017U,  433600965U, 1637785608U, 1798509764U,  860770589U,
-	3081466273U, 3982393409U, 2451928325U, 3437124742U, 4093828739U,
-	3357389386U, 2154596123U,  496568176U, 2650035164U, 2472361850U,
-	   3438299U, 2150366101U, 1577256676U, 3802546413U, 1787774626U,
-	4078331588U, 3706103141U,  170391138U, 3806085154U, 1680970100U,
-	1961637521U, 3316029766U,  890610272U, 1453751581U, 1430283664U,
-	3051057411U, 3597003186U,  542563954U, 3796490244U, 1690016688U,
-	3448752238U,  440702173U,  347290497U, 1121336647U, 2540588620U,
-	 280881896U, 2495136428U,  213707396U,   15104824U, 2946180358U,
-	 659000016U,  566379385U, 2614030979U, 2855760170U,  334526548U,
-	2315569495U, 2729518615U,  564745877U, 1263517638U, 3157185798U,
-	1604852056U, 1011639885U, 2950579535U, 2524219188U,  312951012U,
-	1528896652U, 1327861054U, 2846910138U, 3966855905U, 2536721582U,
-	 855353911U, 1685434729U, 3303978929U, 1624872055U, 4020329649U,
-	3164802143U, 1642802700U, 1957727869U, 1792352426U, 3334618929U,
-	2631577923U, 3027156164U,  842334259U, 3353446843U, 1226432104U,
-	1742801369U, 3552852535U, 3471698828U, 1653910186U, 3380330939U,
-	2313782701U, 3351007196U, 2129839995U, 1800682418U, 4085884420U,
-	1625156629U, 3669701987U,  615211810U, 3294791649U, 4131143784U,
-	2590843588U, 3207422808U, 3275066464U,  561592872U, 3957205738U,
-	3396578098U,   48410678U, 3505556445U, 1005764855U, 3920606528U,
-	2936980473U, 2378918600U, 2404449845U, 1649515163U,  701203563U,
-	3705256349U,   83714199U, 3586854132U,  922978446U, 2863406304U,
-	3523398907U, 2606864832U, 2385399361U, 3171757816U, 4262841009U,
-	3645837721U, 1169579486U, 3666433897U, 3174689479U, 1457866976U,
-	3803895110U, 3346639145U, 1907224409U, 1978473712U, 1036712794U,
-	 980754888U, 1302782359U, 1765252468U,  459245755U, 3728923860U,
-	1512894209U, 2046491914U,  207860527U,  514188684U, 2288713615U,
-	1597354672U, 3349636117U, 2357291114U, 3995796221U,  945364213U,
-	1893326518U, 3770814016U, 1691552714U, 2397527410U,  967486361U,
-	 776416472U, 4197661421U,  951150819U, 1852770983U, 4044624181U,
-	1399439738U, 4194455275U, 2284037669U, 1550734958U, 3321078108U,
-	1865235926U, 2912129961U, 2664980877U, 1357572033U, 2600196436U,
-	2486728200U, 2372668724U, 1567316966U, 2374111491U, 1839843570U,
-	  20815612U, 3727008608U, 3871996229U,  824061249U, 1932503978U,
-	3404541726U,  758428924U, 2609331364U, 1223966026U, 1299179808U,
-	 648499352U, 2180134401U,  880821170U, 3781130950U,  113491270U,
-	1032413764U, 4185884695U, 2490396037U, 1201932817U, 4060951446U,
-	4165586898U, 1629813212U, 2887821158U,  415045333U,  628926856U,
-	2193466079U, 3391843445U, 2227540681U, 1907099846U, 2848448395U,
-	1717828221U, 1372704537U, 1707549841U, 2294058813U, 2101214437U,
-	2052479531U, 1695809164U, 3176587306U, 2632770465U,   81634404U,
-	1603220563U,  644238487U,  302857763U,  897352968U, 2613146653U,
-	1391730149U, 4245717312U, 4191828749U, 1948492526U, 2618174230U,
-	3992984522U, 2178852787U, 3596044509U, 3445573503U, 2026614616U,
-	 915763564U, 3415689334U, 2532153403U, 3879661562U, 2215027417U,
-	3111154986U, 2929478371U,  668346391U, 1152241381U, 2632029711U,
-	3004150659U, 2135025926U,  948690501U, 2799119116U, 4228829406U,
-	1981197489U, 4209064138U,  684318751U, 3459397845U,  201790843U,
-	4022541136U, 3043635877U,  492509624U, 3263466772U, 1509148086U,
-	 921459029U, 3198857146U,  705479721U, 3835966910U, 3603356465U,
-	 576159741U, 1742849431U,  594214882U, 2055294343U, 3634861861U,
-	 449571793U, 3246390646U, 3868232151U, 1479156585U, 2900125656U,
-	2464815318U, 3960178104U, 1784261920U,   18311476U, 3627135050U,
-	 644609697U,  424968996U,  919890700U, 2986824110U,  816423214U,
-	4003562844U, 1392714305U, 1757384428U, 2569030598U,  995949559U,
-	3875659880U, 2933807823U, 2752536860U, 2993858466U, 4030558899U,
-	2770783427U, 2775406005U, 2777781742U, 1931292655U,  472147933U,
-	3865853827U, 2726470545U, 2668412860U, 2887008249U,  408979190U,
-	3578063323U, 3242082049U, 1778193530U,   27981909U, 2362826515U,
-	 389875677U, 1043878156U,  581653903U, 3830568952U,  389535942U,
-	3713523185U, 2768373359U, 2526101582U, 1998618197U, 1160859704U,
-	3951172488U, 1098005003U,  906275699U, 3446228002U, 2220677963U,
-	2059306445U,  132199571U,  476838790U, 1868039399U, 3097344807U,
-	 857300945U,  396345050U, 2835919916U, 1782168828U, 1419519470U,
-	4288137521U,  819087232U,  596301494U,  872823172U, 1526888217U,
-	 805161465U, 1116186205U, 2829002754U, 2352620120U,  620121516U,
-	 354159268U, 3601949785U,  209568138U, 1352371732U, 2145977349U,
-	4236871834U, 1539414078U, 3558126206U, 3224857093U, 4164166682U,
-	3817553440U, 3301780278U, 2682696837U, 3734994768U, 1370950260U,
-	1477421202U, 2521315749U, 1330148125U, 1261554731U, 2769143688U,
-	3554756293U, 4235882678U, 3254686059U, 3530579953U, 1215452615U,
-	3574970923U, 4057131421U,  589224178U, 1000098193U,  171190718U,
-	2521852045U, 2351447494U, 2284441580U, 2646685513U, 3486933563U,
-	3789864960U, 1190528160U, 1702536782U, 1534105589U, 4262946827U,
-	2726686826U, 3584544841U, 2348270128U, 2145092281U, 2502718509U,
-	1027832411U, 3571171153U, 1287361161U, 4011474411U, 3241215351U,
-	2419700818U,  971242709U, 1361975763U, 1096842482U, 3271045537U,
-	  81165449U,  612438025U, 3912966678U, 1356929810U,  733545735U,
-	 537003843U, 1282953084U,  884458241U,  588930090U, 3930269801U,
-	2961472450U, 1219535534U, 3632251943U,  268183903U, 1441240533U,
-	3653903360U, 3854473319U, 2259087390U, 2548293048U, 2022641195U,
-	2105543911U, 1764085217U, 3246183186U,  482438805U,  888317895U,
-	2628314765U, 2466219854U,  717546004U, 2322237039U,  416725234U,
-	1544049923U, 1797944973U, 3398652364U, 3111909456U,  485742908U,
-	2277491072U, 1056355088U, 3181001278U,  129695079U, 2693624550U,
-	1764438564U, 3797785470U,  195503713U, 3266519725U, 2053389444U,
-	1961527818U, 3400226523U, 3777903038U, 2597274307U, 4235851091U,
-	4094406648U, 2171410785U, 1781151386U, 1378577117U,  654643266U,
-	3424024173U, 3385813322U,  679385799U,  479380913U,  681715441U,
-	3096225905U,  276813409U, 3854398070U, 2721105350U,  831263315U,
-	3276280337U, 2628301522U, 3984868494U, 1466099834U, 2104922114U,
-	1412672743U,  820330404U, 3491501010U,  942735832U,  710652807U,
-	3972652090U,  679881088U,   40577009U, 3705286397U, 2815423480U,
-	3566262429U,  663396513U, 3777887429U, 4016670678U,  404539370U,
-	1142712925U, 1140173408U, 2913248352U, 2872321286U,  263751841U,
-	3175196073U, 3162557581U, 2878996619U,   75498548U, 3836833140U,
-	3284664959U, 1157523805U,  112847376U,  207855609U, 1337979698U,
-	1222578451U,  157107174U,  901174378U, 3883717063U, 1618632639U,
-	1767889440U, 4264698824U, 1582999313U,  884471997U, 2508825098U,
-	3756370771U, 2457213553U, 3565776881U, 3709583214U,  915609601U,
-	 460833524U, 1091049576U,   85522880U,    2553251U,  132102809U,
-	2429882442U, 2562084610U, 1386507633U, 4112471229U,   21965213U,
-	1981516006U, 2418435617U, 3054872091U, 4251511224U, 2025783543U,
-	1916911512U, 2454491136U, 3938440891U, 3825869115U, 1121698605U,
-	3463052265U,  802340101U, 1912886800U, 4031997367U, 3550640406U,
-	1596096923U,  610150600U,  431464457U, 2541325046U,  486478003U,
-	 739704936U, 2862696430U, 3037903166U, 1129749694U, 2611481261U,
-	1228993498U,  510075548U, 3424962587U, 2458689681U,  818934833U,
-	4233309125U, 1608196251U, 3419476016U, 1858543939U, 2682166524U,
-	3317854285U,  631986188U, 3008214764U,  613826412U, 3567358221U,
-	3512343882U, 1552467474U, 3316162670U, 1275841024U, 4142173454U,
-	 565267881U,  768644821U,  198310105U, 2396688616U, 1837659011U,
-	 203429334U,  854539004U, 4235811518U, 3338304926U, 3730418692U,
-	3852254981U, 3032046452U, 2329811860U, 2303590566U, 2696092212U,
-	3894665932U,  145835667U,  249563655U, 1932210840U, 2431696407U,
-	3312636759U,  214962629U, 2092026914U, 3020145527U, 4073039873U,
-	2739105705U, 1308336752U,  855104522U, 2391715321U,   67448785U,
-	 547989482U,  854411802U, 3608633740U,  431731530U,  537375589U,
-	3888005760U,  696099141U,  397343236U, 1864511780U,   44029739U,
-	1729526891U, 1993398655U, 2010173426U, 2591546756U,  275223291U,
-	1503900299U, 4217765081U, 2185635252U, 1122436015U, 3550155364U,
-	 681707194U, 3260479338U,  933579397U, 2983029282U, 2505504587U,
-	2667410393U, 2962684490U, 4139721708U, 2658172284U, 2452602383U,
-	2607631612U, 1344296217U, 3075398709U, 2949785295U, 1049956168U,
-	3917185129U, 2155660174U, 3280524475U, 1503827867U,  674380765U,
-	1918468193U, 3843983676U,  634358221U, 2538335643U, 1873351298U,
-	3368723763U, 2129144130U, 3203528633U, 3087174986U, 2691698871U,
-	2516284287U,   24437745U, 1118381474U, 2816314867U, 2448576035U,
-	4281989654U,  217287825U,  165872888U, 2628995722U, 3533525116U,
-	2721669106U,  872340568U, 3429930655U, 3309047304U, 3916704967U,
-	3270160355U, 1348884255U, 1634797670U,  881214967U, 4259633554U,
-	 174613027U, 1103974314U, 1625224232U, 2678368291U, 1133866707U,
-	3853082619U, 4073196549U, 1189620777U,  637238656U,  930241537U,
-	4042750792U, 3842136042U, 2417007212U, 2524907510U, 1243036827U,
-	1282059441U, 3764588774U, 1394459615U, 2323620015U, 1166152231U,
-	3307479609U, 3849322257U, 3507445699U, 4247696636U,  758393720U,
-	 967665141U, 1095244571U, 1319812152U,  407678762U, 2640605208U,
-	2170766134U, 3663594275U, 4039329364U, 2512175520U,  725523154U,
-	2249807004U, 3312617979U, 2414634172U, 1278482215U,  349206484U,
-	1573063308U, 1196429124U, 3873264116U, 2400067801U,  268795167U,
-	 226175489U, 2961367263U, 1968719665U,   42656370U, 1010790699U,
-	 561600615U, 2422453992U, 3082197735U, 1636700484U, 3977715296U,
-	3125350482U, 3478021514U, 2227819446U, 1540868045U, 3061908980U,
-	1087362407U, 3625200291U,  361937537U,  580441897U, 1520043666U,
-	2270875402U, 1009161260U, 2502355842U, 4278769785U,  473902412U,
-	1057239083U, 1905829039U, 1483781177U, 2080011417U, 1207494246U,
-	1806991954U, 2194674403U, 3455972205U,  807207678U, 3655655687U,
-	 674112918U,  195425752U, 3917890095U, 1874364234U, 1837892715U,
-	3663478166U, 1548892014U, 2570748714U, 2049929836U, 2167029704U,
-	 697543767U, 3499545023U, 3342496315U, 1725251190U, 3561387469U,
-	2905606616U, 1580182447U, 3934525927U, 4103172792U, 1365672522U,
-	1534795737U, 3308667416U, 2841911405U, 3943182730U, 4072020313U,
-	3494770452U, 3332626671U,   55327267U,  478030603U,  411080625U,
-	3419529010U, 1604767823U, 3513468014U,  570668510U,  913790824U,
-	2283967995U,  695159462U, 3825542932U, 4150698144U, 1829758699U,
-	 202895590U, 1609122645U, 1267651008U, 2910315509U, 2511475445U,
-	2477423819U, 3932081579U,  900879979U, 2145588390U, 2670007504U,
-	 580819444U, 1864996828U, 2526325979U, 1019124258U,  815508628U,
-	2765933989U, 1277301341U, 3006021786U,  855540956U,  288025710U,
-	1919594237U, 2331223864U,  177452412U, 2475870369U, 2689291749U,
-	 865194284U,  253432152U, 2628531804U, 2861208555U, 2361597573U,
-	1653952120U, 1039661024U, 2159959078U, 3709040440U, 3564718533U,
-	2596878672U, 2041442161U,   31164696U, 2662962485U, 3665637339U,
-	1678115244U, 2699839832U, 3651968520U, 3521595541U,  458433303U,
-	2423096824U,   21831741U,  380011703U, 2498168716U,  861806087U,
-	1673574843U, 4188794405U, 2520563651U, 2632279153U, 2170465525U,
-	4171949898U, 3886039621U, 1661344005U, 3424285243U,  992588372U,
-	2500984144U, 2993248497U, 3590193895U, 1535327365U,  515645636U,
-	 131633450U, 3729760261U, 1613045101U, 3254194278U,   15889678U,
-	1493590689U,  244148718U, 2991472662U, 1401629333U,  777349878U,
-	2501401703U, 4285518317U, 3794656178U,  955526526U, 3442142820U,
-	3970298374U,  736025417U, 2737370764U, 1271509744U,  440570731U,
-	 136141826U, 1596189518U,  923399175U,  257541519U, 3505774281U,
-	2194358432U, 2518162991U, 1379893637U, 2667767062U, 3748146247U,
-	1821712620U, 3923161384U, 1947811444U, 2392527197U, 4127419685U,
-	1423694998U, 4156576871U, 1382885582U, 3420127279U, 3617499534U,
-	2994377493U, 4038063986U, 1918458672U, 2983166794U, 4200449033U,
-	 353294540U, 1609232588U,  243926648U, 2332803291U,  507996832U,
-	2392838793U, 4075145196U, 2060984340U, 4287475136U,   88232602U,
-	2491531140U, 4159725633U, 2272075455U,  759298618U,  201384554U,
-	 838356250U, 1416268324U,  674476934U,   90795364U,  141672229U,
-	3660399588U, 4196417251U, 3249270244U, 3774530247U,   59587265U,
-	3683164208U,   19392575U, 1463123697U, 1882205379U,  293780489U,
-	2553160622U, 2933904694U,  675638239U, 2851336944U, 1435238743U,
-	2448730183U,  804436302U, 2119845972U,  322560608U, 4097732704U,
-	2987802540U,  641492617U, 2575442710U, 4217822703U, 3271835300U,
-	2836418300U, 3739921620U, 2138378768U, 2879771855U, 4294903423U,
-	3121097946U, 2603440486U, 2560820391U, 1012930944U, 2313499967U,
-	 584489368U, 3431165766U,  897384869U, 2062537737U, 2847889234U,
-	3742362450U, 2951174585U, 4204621084U, 1109373893U, 3668075775U,
-	2750138839U, 3518055702U,  733072558U, 4169325400U,  788493625U
-};
-static const uint64_t init_gen_rand_64_expected[] = {
-	KQU(16924766246869039260), KQU( 8201438687333352714),
-	KQU( 2265290287015001750), KQU(18397264611805473832),
-	KQU( 3375255223302384358), KQU( 6345559975416828796),
-	KQU(18229739242790328073), KQU( 7596792742098800905),
-	KQU(  255338647169685981), KQU( 2052747240048610300),
-	KQU(18328151576097299343), KQU(12472905421133796567),
-	KQU(11315245349717600863), KQU(16594110197775871209),
-	KQU(15708751964632456450), KQU(10452031272054632535),
-	KQU(11097646720811454386), KQU( 4556090668445745441),
-	KQU(17116187693090663106), KQU(14931526836144510645),
-	KQU( 9190752218020552591), KQU( 9625800285771901401),
-	KQU(13995141077659972832), KQU( 5194209094927829625),
-	KQU( 4156788379151063303), KQU( 8523452593770139494),
-	KQU(14082382103049296727), KQU( 2462601863986088483),
-	KQU( 3030583461592840678), KQU( 5221622077872827681),
-	KQU( 3084210671228981236), KQU(13956758381389953823),
-	KQU(13503889856213423831), KQU(15696904024189836170),
-	KQU( 4612584152877036206), KQU( 6231135538447867881),
-	KQU(10172457294158869468), KQU( 6452258628466708150),
-	KQU(14044432824917330221), KQU(  370168364480044279),
-	KQU(10102144686427193359), KQU(  667870489994776076),
-	KQU( 2732271956925885858), KQU(18027788905977284151),
-	KQU(15009842788582923859), KQU( 7136357960180199542),
-	KQU(15901736243475578127), KQU(16951293785352615701),
-	KQU(10551492125243691632), KQU(17668869969146434804),
-	KQU(13646002971174390445), KQU( 9804471050759613248),
-	KQU( 5511670439655935493), KQU(18103342091070400926),
-	KQU(17224512747665137533), KQU(15534627482992618168),
-	KQU( 1423813266186582647), KQU(15821176807932930024),
-	KQU(   30323369733607156), KQU(11599382494723479403),
-	KQU(  653856076586810062), KQU( 3176437395144899659),
-	KQU(14028076268147963917), KQU(16156398271809666195),
-	KQU( 3166955484848201676), KQU( 5746805620136919390),
-	KQU(17297845208891256593), KQU(11691653183226428483),
-	KQU(17900026146506981577), KQU(15387382115755971042),
-	KQU(16923567681040845943), KQU( 8039057517199388606),
-	KQU(11748409241468629263), KQU(  794358245539076095),
-	KQU(13438501964693401242), KQU(14036803236515618962),
-	KQU( 5252311215205424721), KQU(17806589612915509081),
-	KQU( 6802767092397596006), KQU(14212120431184557140),
-	KQU( 1072951366761385712), KQU(13098491780722836296),
-	KQU( 9466676828710797353), KQU(12673056849042830081),
-	KQU(12763726623645357580), KQU(16468961652999309493),
-	KQU(15305979875636438926), KQU(17444713151223449734),
-	KQU( 5692214267627883674), KQU(13049589139196151505),
-	KQU(  880115207831670745), KQU( 1776529075789695498),
-	KQU(16695225897801466485), KQU(10666901778795346845),
-	KQU( 6164389346722833869), KQU( 2863817793264300475),
-	KQU( 9464049921886304754), KQU( 3993566636740015468),
-	KQU( 9983749692528514136), KQU(16375286075057755211),
-	KQU(16042643417005440820), KQU(11445419662923489877),
-	KQU( 7999038846885158836), KQU( 6721913661721511535),
-	KQU( 5363052654139357320), KQU( 1817788761173584205),
-	KQU(13290974386445856444), KQU( 4650350818937984680),
-	KQU( 8219183528102484836), KQU( 1569862923500819899),
-	KQU( 4189359732136641860), KQU(14202822961683148583),
-	KQU( 4457498315309429058), KQU(13089067387019074834),
-	KQU(11075517153328927293), KQU(10277016248336668389),
-	KQU( 7070509725324401122), KQU(17808892017780289380),
-	KQU(13143367339909287349), KQU( 1377743745360085151),
-	KQU( 5749341807421286485), KQU(14832814616770931325),
-	KQU( 7688820635324359492), KQU(10960474011539770045),
-	KQU(   81970066653179790), KQU(12619476072607878022),
-	KQU( 4419566616271201744), KQU(15147917311750568503),
-	KQU( 5549739182852706345), KQU( 7308198397975204770),
-	KQU(13580425496671289278), KQU(17070764785210130301),
-	KQU( 8202832846285604405), KQU( 6873046287640887249),
-	KQU( 6927424434308206114), KQU( 6139014645937224874),
-	KQU(10290373645978487639), KQU(15904261291701523804),
-	KQU( 9628743442057826883), KQU(18383429096255546714),
-	KQU( 4977413265753686967), KQU( 7714317492425012869),
-	KQU( 9025232586309926193), KQU(14627338359776709107),
-	KQU(14759849896467790763), KQU(10931129435864423252),
-	KQU( 4588456988775014359), KQU(10699388531797056724),
-	KQU(  468652268869238792), KQU( 5755943035328078086),
-	KQU( 2102437379988580216), KQU( 9986312786506674028),
-	KQU( 2654207180040945604), KQU( 8726634790559960062),
-	KQU(  100497234871808137), KQU( 2800137176951425819),
-	KQU( 6076627612918553487), KQU( 5780186919186152796),
-	KQU( 8179183595769929098), KQU( 6009426283716221169),
-	KQU( 2796662551397449358), KQU( 1756961367041986764),
-	KQU( 6972897917355606205), KQU(14524774345368968243),
-	KQU( 2773529684745706940), KQU( 4853632376213075959),
-	KQU( 4198177923731358102), KQU( 8271224913084139776),
-	KQU( 2741753121611092226), KQU(16782366145996731181),
-	KQU(15426125238972640790), KQU(13595497100671260342),
-	KQU( 3173531022836259898), KQU( 6573264560319511662),
-	KQU(18041111951511157441), KQU( 2351433581833135952),
-	KQU( 3113255578908173487), KQU( 1739371330877858784),
-	KQU(16046126562789165480), KQU( 8072101652214192925),
-	KQU(15267091584090664910), KQU( 9309579200403648940),
-	KQU( 5218892439752408722), KQU(14492477246004337115),
-	KQU(17431037586679770619), KQU( 7385248135963250480),
-	KQU( 9580144956565560660), KQU( 4919546228040008720),
-	KQU(15261542469145035584), KQU(18233297270822253102),
-	KQU( 5453248417992302857), KQU( 9309519155931460285),
-	KQU(10342813012345291756), KQU(15676085186784762381),
-	KQU(15912092950691300645), KQU( 9371053121499003195),
-	KQU( 9897186478226866746), KQU(14061858287188196327),
-	KQU(  122575971620788119), KQU(12146750969116317754),
-	KQU( 4438317272813245201), KQU( 8332576791009527119),
-	KQU(13907785691786542057), KQU(10374194887283287467),
-	KQU( 2098798755649059566), KQU( 3416235197748288894),
-	KQU( 8688269957320773484), KQU( 7503964602397371571),
-	KQU(16724977015147478236), KQU( 9461512855439858184),
-	KQU(13259049744534534727), KQU( 3583094952542899294),
-	KQU( 8764245731305528292), KQU(13240823595462088985),
-	KQU(13716141617617910448), KQU(18114969519935960955),
-	KQU( 2297553615798302206), KQU( 4585521442944663362),
-	KQU(17776858680630198686), KQU( 4685873229192163363),
-	KQU(  152558080671135627), KQU(15424900540842670088),
-	KQU(13229630297130024108), KQU(17530268788245718717),
-	KQU(16675633913065714144), KQU( 3158912717897568068),
-	KQU(15399132185380087288), KQU( 7401418744515677872),
-	KQU(13135412922344398535), KQU( 6385314346100509511),
-	KQU(13962867001134161139), KQU(10272780155442671999),
-	KQU(12894856086597769142), KQU(13340877795287554994),
-	KQU(12913630602094607396), KQU(12543167911119793857),
-	KQU(17343570372251873096), KQU(10959487764494150545),
-	KQU( 6966737953093821128), KQU(13780699135496988601),
-	KQU( 4405070719380142046), KQU(14923788365607284982),
-	KQU( 2869487678905148380), KQU( 6416272754197188403),
-	KQU(15017380475943612591), KQU( 1995636220918429487),
-	KQU( 3402016804620122716), KQU(15800188663407057080),
-	KQU(11362369990390932882), KQU(15262183501637986147),
-	KQU(10239175385387371494), KQU( 9352042420365748334),
-	KQU( 1682457034285119875), KQU( 1724710651376289644),
-	KQU( 2038157098893817966), KQU( 9897825558324608773),
-	KQU( 1477666236519164736), KQU(16835397314511233640),
-	KQU(10370866327005346508), KQU(10157504370660621982),
-	KQU(12113904045335882069), KQU(13326444439742783008),
-	KQU(11302769043000765804), KQU(13594979923955228484),
-	KQU(11779351762613475968), KQU( 3786101619539298383),
-	KQU( 8021122969180846063), KQU(15745904401162500495),
-	KQU(10762168465993897267), KQU(13552058957896319026),
-	KQU(11200228655252462013), KQU( 5035370357337441226),
-	KQU( 7593918984545500013), KQU( 5418554918361528700),
-	KQU( 4858270799405446371), KQU( 9974659566876282544),
-	KQU(18227595922273957859), KQU( 2772778443635656220),
-	KQU(14285143053182085385), KQU( 9939700992429600469),
-	KQU(12756185904545598068), KQU( 2020783375367345262),
-	KQU(   57026775058331227), KQU(  950827867930065454),
-	KQU( 6602279670145371217), KQU( 2291171535443566929),
-	KQU( 5832380724425010313), KQU( 1220343904715982285),
-	KQU(17045542598598037633), KQU(15460481779702820971),
-	KQU(13948388779949365130), KQU(13975040175430829518),
-	KQU(17477538238425541763), KQU(11104663041851745725),
-	KQU(15860992957141157587), KQU(14529434633012950138),
-	KQU( 2504838019075394203), KQU( 7512113882611121886),
-	KQU( 4859973559980886617), KQU( 1258601555703250219),
-	KQU(15594548157514316394), KQU( 4516730171963773048),
-	KQU(11380103193905031983), KQU( 6809282239982353344),
-	KQU(18045256930420065002), KQU( 2453702683108791859),
-	KQU(  977214582986981460), KQU( 2006410402232713466),
-	KQU( 6192236267216378358), KQU( 3429468402195675253),
-	KQU(18146933153017348921), KQU(17369978576367231139),
-	KQU( 1246940717230386603), KQU(11335758870083327110),
-	KQU(14166488801730353682), KQU( 9008573127269635732),
-	KQU(10776025389820643815), KQU(15087605441903942962),
-	KQU( 1359542462712147922), KQU(13898874411226454206),
-	KQU(17911176066536804411), KQU( 9435590428600085274),
-	KQU(  294488509967864007), KQU( 8890111397567922046),
-	KQU( 7987823476034328778), KQU(13263827582440967651),
-	KQU( 7503774813106751573), KQU(14974747296185646837),
-	KQU( 8504765037032103375), KQU(17340303357444536213),
-	KQU( 7704610912964485743), KQU( 8107533670327205061),
-	KQU( 9062969835083315985), KQU(16968963142126734184),
-	KQU(12958041214190810180), KQU( 2720170147759570200),
-	KQU( 2986358963942189566), KQU(14884226322219356580),
-	KQU(  286224325144368520), KQU(11313800433154279797),
-	KQU(18366849528439673248), KQU(17899725929482368789),
-	KQU( 3730004284609106799), KQU( 1654474302052767205),
-	KQU( 5006698007047077032), KQU( 8196893913601182838),
-	KQU(15214541774425211640), KQU(17391346045606626073),
-	KQU( 8369003584076969089), KQU( 3939046733368550293),
-	KQU(10178639720308707785), KQU( 2180248669304388697),
-	KQU(   62894391300126322), KQU( 9205708961736223191),
-	KQU( 6837431058165360438), KQU( 3150743890848308214),
-	KQU(17849330658111464583), KQU(12214815643135450865),
-	KQU(13410713840519603402), KQU( 3200778126692046802),
-	KQU(13354780043041779313), KQU(  800850022756886036),
-	KQU(15660052933953067433), KQU( 6572823544154375676),
-	KQU(11030281857015819266), KQU(12682241941471433835),
-	KQU(11654136407300274693), KQU( 4517795492388641109),
-	KQU( 9757017371504524244), KQU(17833043400781889277),
-	KQU(12685085201747792227), KQU(10408057728835019573),
-	KQU(   98370418513455221), KQU( 6732663555696848598),
-	KQU(13248530959948529780), KQU( 3530441401230622826),
-	KQU(18188251992895660615), KQU( 1847918354186383756),
-	KQU( 1127392190402660921), KQU(11293734643143819463),
-	KQU( 3015506344578682982), KQU(13852645444071153329),
-	KQU( 2121359659091349142), KQU( 1294604376116677694),
-	KQU( 5616576231286352318), KQU( 7112502442954235625),
-	KQU(11676228199551561689), KQU(12925182803007305359),
-	KQU( 7852375518160493082), KQU( 1136513130539296154),
-	KQU( 5636923900916593195), KQU( 3221077517612607747),
-	KQU(17784790465798152513), KQU( 3554210049056995938),
-	KQU(17476839685878225874), KQU( 3206836372585575732),
-	KQU( 2765333945644823430), KQU(10080070903718799528),
-	KQU( 5412370818878286353), KQU( 9689685887726257728),
-	KQU( 8236117509123533998), KQU( 1951139137165040214),
-	KQU( 4492205209227980349), KQU(16541291230861602967),
-	KQU( 1424371548301437940), KQU( 9117562079669206794),
-	KQU(14374681563251691625), KQU(13873164030199921303),
-	KQU( 6680317946770936731), KQU(15586334026918276214),
-	KQU(10896213950976109802), KQU( 9506261949596413689),
-	KQU( 9903949574308040616), KQU( 6038397344557204470),
-	KQU(  174601465422373648), KQU(15946141191338238030),
-	KQU(17142225620992044937), KQU( 7552030283784477064),
-	KQU( 2947372384532947997), KQU(  510797021688197711),
-	KQU( 4962499439249363461), KQU(   23770320158385357),
-	KQU(  959774499105138124), KQU( 1468396011518788276),
-	KQU( 2015698006852312308), KQU( 4149400718489980136),
-	KQU( 5992916099522371188), KQU(10819182935265531076),
-	KQU(16189787999192351131), KQU(  342833961790261950),
-	KQU(12470830319550495336), KQU(18128495041912812501),
-	KQU( 1193600899723524337), KQU( 9056793666590079770),
-	KQU( 2154021227041669041), KQU( 4963570213951235735),
-	KQU( 4865075960209211409), KQU( 2097724599039942963),
-	KQU( 2024080278583179845), KQU(11527054549196576736),
-	KQU(10650256084182390252), KQU( 4808408648695766755),
-	KQU( 1642839215013788844), KQU(10607187948250398390),
-	KQU( 7076868166085913508), KQU(  730522571106887032),
-	KQU(12500579240208524895), KQU( 4484390097311355324),
-	KQU(15145801330700623870), KQU( 8055827661392944028),
-	KQU( 5865092976832712268), KQU(15159212508053625143),
-	KQU( 3560964582876483341), KQU( 4070052741344438280),
-	KQU( 6032585709886855634), KQU(15643262320904604873),
-	KQU( 2565119772293371111), KQU(  318314293065348260),
-	KQU(15047458749141511872), KQU( 7772788389811528730),
-	KQU( 7081187494343801976), KQU( 6465136009467253947),
-	KQU(10425940692543362069), KQU(  554608190318339115),
-	KQU(14796699860302125214), KQU( 1638153134431111443),
-	KQU(10336967447052276248), KQU( 8412308070396592958),
-	KQU( 4004557277152051226), KQU( 8143598997278774834),
-	KQU(16413323996508783221), KQU(13139418758033994949),
-	KQU( 9772709138335006667), KQU( 2818167159287157659),
-	KQU(17091740573832523669), KQU(14629199013130751608),
-	KQU(18268322711500338185), KQU( 8290963415675493063),
-	KQU( 8830864907452542588), KQU( 1614839084637494849),
-	KQU(14855358500870422231), KQU( 3472996748392519937),
-	KQU(15317151166268877716), KQU( 5825895018698400362),
-	KQU(16730208429367544129), KQU(10481156578141202800),
-	KQU( 4746166512382823750), KQU(12720876014472464998),
-	KQU( 8825177124486735972), KQU(13733447296837467838),
-	KQU( 6412293741681359625), KQU( 8313213138756135033),
-	KQU(11421481194803712517), KQU( 7997007691544174032),
-	KQU( 6812963847917605930), KQU( 9683091901227558641),
-	KQU(14703594165860324713), KQU( 1775476144519618309),
-	KQU( 2724283288516469519), KQU(  717642555185856868),
-	KQU( 8736402192215092346), KQU(11878800336431381021),
-	KQU( 4348816066017061293), KQU( 6115112756583631307),
-	KQU( 9176597239667142976), KQU(12615622714894259204),
-	KQU(10283406711301385987), KQU( 5111762509485379420),
-	KQU( 3118290051198688449), KQU( 7345123071632232145),
-	KQU( 9176423451688682359), KQU( 4843865456157868971),
-	KQU(12008036363752566088), KQU(12058837181919397720),
-	KQU( 2145073958457347366), KQU( 1526504881672818067),
-	KQU( 3488830105567134848), KQU(13208362960674805143),
-	KQU( 4077549672899572192), KQU( 7770995684693818365),
-	KQU( 1398532341546313593), KQU(12711859908703927840),
-	KQU( 1417561172594446813), KQU(17045191024194170604),
-	KQU( 4101933177604931713), KQU(14708428834203480320),
-	KQU(17447509264469407724), KQU(14314821973983434255),
-	KQU(17990472271061617265), KQU( 5087756685841673942),
-	KQU(12797820586893859939), KQU( 1778128952671092879),
-	KQU( 3535918530508665898), KQU( 9035729701042481301),
-	KQU(14808661568277079962), KQU(14587345077537747914),
-	KQU(11920080002323122708), KQU( 6426515805197278753),
-	KQU( 3295612216725984831), KQU(11040722532100876120),
-	KQU(12305952936387598754), KQU(16097391899742004253),
-	KQU( 4908537335606182208), KQU(12446674552196795504),
-	KQU(16010497855816895177), KQU( 9194378874788615551),
-	KQU( 3382957529567613384), KQU( 5154647600754974077),
-	KQU( 9801822865328396141), KQU( 9023662173919288143),
-	KQU(17623115353825147868), KQU( 8238115767443015816),
-	KQU(15811444159859002560), KQU( 9085612528904059661),
-	KQU( 6888601089398614254), KQU(  258252992894160189),
-	KQU( 6704363880792428622), KQU( 6114966032147235763),
-	KQU(11075393882690261875), KQU( 8797664238933620407),
-	KQU( 5901892006476726920), KQU( 5309780159285518958),
-	KQU(14940808387240817367), KQU(14642032021449656698),
-	KQU( 9808256672068504139), KQU( 3670135111380607658),
-	KQU(11211211097845960152), KQU( 1474304506716695808),
-	KQU(15843166204506876239), KQU( 7661051252471780561),
-	KQU(10170905502249418476), KQU( 7801416045582028589),
-	KQU( 2763981484737053050), KQU( 9491377905499253054),
-	KQU(16201395896336915095), KQU( 9256513756442782198),
-	KQU( 5411283157972456034), KQU( 5059433122288321676),
-	KQU( 4327408006721123357), KQU( 9278544078834433377),
-	KQU( 7601527110882281612), KQU(11848295896975505251),
-	KQU(12096998801094735560), KQU(14773480339823506413),
-	KQU(15586227433895802149), KQU(12786541257830242872),
-	KQU( 6904692985140503067), KQU( 5309011515263103959),
-	KQU(12105257191179371066), KQU(14654380212442225037),
-	KQU( 2556774974190695009), KQU( 4461297399927600261),
-	KQU(14888225660915118646), KQU(14915459341148291824),
-	KQU( 2738802166252327631), KQU( 6047155789239131512),
-	KQU(12920545353217010338), KQU(10697617257007840205),
-	KQU( 2751585253158203504), KQU(13252729159780047496),
-	KQU(14700326134672815469), KQU(14082527904374600529),
-	KQU(16852962273496542070), KQU(17446675504235853907),
-	KQU(15019600398527572311), KQU(12312781346344081551),
-	KQU(14524667935039810450), KQU( 5634005663377195738),
-	KQU(11375574739525000569), KQU( 2423665396433260040),
-	KQU( 5222836914796015410), KQU( 4397666386492647387),
-	KQU( 4619294441691707638), KQU(  665088602354770716),
-	KQU(13246495665281593610), KQU( 6564144270549729409),
-	KQU(10223216188145661688), KQU( 3961556907299230585),
-	KQU(11543262515492439914), KQU(16118031437285993790),
-	KQU( 7143417964520166465), KQU(13295053515909486772),
-	KQU(   40434666004899675), KQU(17127804194038347164),
-	KQU( 8599165966560586269), KQU( 8214016749011284903),
-	KQU(13725130352140465239), KQU( 5467254474431726291),
-	KQU( 7748584297438219877), KQU(16933551114829772472),
-	KQU( 2169618439506799400), KQU( 2169787627665113463),
-	KQU(17314493571267943764), KQU(18053575102911354912),
-	KQU(11928303275378476973), KQU(11593850925061715550),
-	KQU(17782269923473589362), KQU( 3280235307704747039),
-	KQU( 6145343578598685149), KQU(17080117031114086090),
-	KQU(18066839902983594755), KQU( 6517508430331020706),
-	KQU( 8092908893950411541), KQU(12558378233386153732),
-	KQU( 4476532167973132976), KQU(16081642430367025016),
-	KQU( 4233154094369139361), KQU( 8693630486693161027),
-	KQU(11244959343027742285), KQU(12273503967768513508),
-	KQU(14108978636385284876), KQU( 7242414665378826984),
-	KQU( 6561316938846562432), KQU( 8601038474994665795),
-	KQU(17532942353612365904), KQU(17940076637020912186),
-	KQU( 7340260368823171304), KQU( 7061807613916067905),
-	KQU(10561734935039519326), KQU(17990796503724650862),
-	KQU( 6208732943911827159), KQU(  359077562804090617),
-	KQU(14177751537784403113), KQU(10659599444915362902),
-	KQU(15081727220615085833), KQU(13417573895659757486),
-	KQU(15513842342017811524), KQU(11814141516204288231),
-	KQU( 1827312513875101814), KQU( 2804611699894603103),
-	KQU(17116500469975602763), KQU(12270191815211952087),
-	KQU(12256358467786024988), KQU(18435021722453971267),
-	KQU(  671330264390865618), KQU(  476504300460286050),
-	KQU(16465470901027093441), KQU( 4047724406247136402),
-	KQU( 1322305451411883346), KQU( 1388308688834322280),
-	KQU( 7303989085269758176), KQU( 9323792664765233642),
-	KQU( 4542762575316368936), KQU(17342696132794337618),
-	KQU( 4588025054768498379), KQU(13415475057390330804),
-	KQU(17880279491733405570), KQU(10610553400618620353),
-	KQU( 3180842072658960139), KQU(13002966655454270120),
-	KQU( 1665301181064982826), KQU( 7083673946791258979),
-	KQU(  190522247122496820), KQU(17388280237250677740),
-	KQU( 8430770379923642945), KQU(12987180971921668584),
-	KQU( 2311086108365390642), KQU( 2870984383579822345),
-	KQU(14014682609164653318), KQU(14467187293062251484),
-	KQU(  192186361147413298), KQU(15171951713531796524),
-	KQU( 9900305495015948728), KQU(17958004775615466344),
-	KQU(14346380954498606514), KQU(18040047357617407096),
-	KQU( 5035237584833424532), KQU(15089555460613972287),
-	KQU( 4131411873749729831), KQU( 1329013581168250330),
-	KQU(10095353333051193949), KQU(10749518561022462716),
-	KQU( 9050611429810755847), KQU(15022028840236655649),
-	KQU( 8775554279239748298), KQU(13105754025489230502),
-	KQU(15471300118574167585), KQU(   89864764002355628),
-	KQU( 8776416323420466637), KQU( 5280258630612040891),
-	KQU( 2719174488591862912), KQU( 7599309137399661994),
-	KQU(15012887256778039979), KQU(14062981725630928925),
-	KQU(12038536286991689603), KQU( 7089756544681775245),
-	KQU(10376661532744718039), KQU( 1265198725901533130),
-	KQU(13807996727081142408), KQU( 2935019626765036403),
-	KQU( 7651672460680700141), KQU( 3644093016200370795),
-	KQU( 2840982578090080674), KQU(17956262740157449201),
-	KQU(18267979450492880548), KQU(11799503659796848070),
-	KQU( 9942537025669672388), KQU(11886606816406990297),
-	KQU( 5488594946437447576), KQU( 7226714353282744302),
-	KQU( 3784851653123877043), KQU(  878018453244803041),
-	KQU(12110022586268616085), KQU(  734072179404675123),
-	KQU(11869573627998248542), KQU(  469150421297783998),
-	KQU(  260151124912803804), KQU(11639179410120968649),
-	KQU( 9318165193840846253), KQU(12795671722734758075),
-	KQU(15318410297267253933), KQU(  691524703570062620),
-	KQU( 5837129010576994601), KQU(15045963859726941052),
-	KQU( 5850056944932238169), KQU(12017434144750943807),
-	KQU( 7447139064928956574), KQU( 3101711812658245019),
-	KQU(16052940704474982954), KQU(18195745945986994042),
-	KQU( 8932252132785575659), KQU(13390817488106794834),
-	KQU(11582771836502517453), KQU( 4964411326683611686),
-	KQU( 2195093981702694011), KQU(14145229538389675669),
-	KQU(16459605532062271798), KQU(  866316924816482864),
-	KQU( 4593041209937286377), KQU( 8415491391910972138),
-	KQU( 4171236715600528969), KQU(16637569303336782889),
-	KQU( 2002011073439212680), KQU(17695124661097601411),
-	KQU( 4627687053598611702), KQU( 7895831936020190403),
-	KQU( 8455951300917267802), KQU( 2923861649108534854),
-	KQU( 8344557563927786255), KQU( 6408671940373352556),
-	KQU(12210227354536675772), KQU(14294804157294222295),
-	KQU(10103022425071085127), KQU(10092959489504123771),
-	KQU( 6554774405376736268), KQU(12629917718410641774),
-	KQU( 6260933257596067126), KQU( 2460827021439369673),
-	KQU( 2541962996717103668), KQU(  597377203127351475),
-	KQU( 5316984203117315309), KQU( 4811211393563241961),
-	KQU(13119698597255811641), KQU( 8048691512862388981),
-	KQU(10216818971194073842), KQU( 4612229970165291764),
-	KQU(10000980798419974770), KQU( 6877640812402540687),
-	KQU( 1488727563290436992), KQU( 2227774069895697318),
-	KQU(11237754507523316593), KQU(13478948605382290972),
-	KQU( 1963583846976858124), KQU( 5512309205269276457),
-	KQU( 3972770164717652347), KQU( 3841751276198975037),
-	KQU(10283343042181903117), KQU( 8564001259792872199),
-	KQU(16472187244722489221), KQU( 8953493499268945921),
-	KQU( 3518747340357279580), KQU( 4003157546223963073),
-	KQU( 3270305958289814590), KQU( 3966704458129482496),
-	KQU( 8122141865926661939), KQU(14627734748099506653),
-	KQU(13064426990862560568), KQU( 2414079187889870829),
-	KQU( 5378461209354225306), KQU(10841985740128255566),
-	KQU(  538582442885401738), KQU( 7535089183482905946),
-	KQU(16117559957598879095), KQU( 8477890721414539741),
-	KQU( 1459127491209533386), KQU(17035126360733620462),
-	KQU( 8517668552872379126), KQU(10292151468337355014),
-	KQU(17081267732745344157), KQU(13751455337946087178),
-	KQU(14026945459523832966), KQU( 6653278775061723516),
-	KQU(10619085543856390441), KQU( 2196343631481122885),
-	KQU(10045966074702826136), KQU(10082317330452718282),
-	KQU( 5920859259504831242), KQU( 9951879073426540617),
-	KQU( 7074696649151414158), KQU(15808193543879464318),
-	KQU( 7385247772746953374), KQU( 3192003544283864292),
-	KQU(18153684490917593847), KQU(12423498260668568905),
-	KQU(10957758099756378169), KQU(11488762179911016040),
-	KQU( 2099931186465333782), KQU(11180979581250294432),
-	KQU( 8098916250668367933), KQU( 3529200436790763465),
-	KQU(12988418908674681745), KQU( 6147567275954808580),
-	KQU( 3207503344604030989), KQU(10761592604898615360),
-	KQU(  229854861031893504), KQU( 8809853962667144291),
-	KQU(13957364469005693860), KQU( 7634287665224495886),
-	KQU(12353487366976556874), KQU( 1134423796317152034),
-	KQU( 2088992471334107068), KQU( 7393372127190799698),
-	KQU( 1845367839871058391), KQU(  207922563987322884),
-	KQU(11960870813159944976), KQU(12182120053317317363),
-	KQU(17307358132571709283), KQU(13871081155552824936),
-	KQU(18304446751741566262), KQU( 7178705220184302849),
-	KQU(10929605677758824425), KQU(16446976977835806844),
-	KQU(13723874412159769044), KQU( 6942854352100915216),
-	KQU( 1726308474365729390), KQU( 2150078766445323155),
-	KQU(15345558947919656626), KQU(12145453828874527201),
-	KQU( 2054448620739726849), KQU( 2740102003352628137),
-	KQU(11294462163577610655), KQU(  756164283387413743),
-	KQU(17841144758438810880), KQU(10802406021185415861),
-	KQU( 8716455530476737846), KQU( 6321788834517649606),
-	KQU(14681322910577468426), KQU(17330043563884336387),
-	KQU(12701802180050071614), KQU(14695105111079727151),
-	KQU( 5112098511654172830), KQU( 4957505496794139973),
-	KQU( 8270979451952045982), KQU(12307685939199120969),
-	KQU(12425799408953443032), KQU( 8376410143634796588),
-	KQU(16621778679680060464), KQU( 3580497854566660073),
-	KQU( 1122515747803382416), KQU(  857664980960597599),
-	KQU( 6343640119895925918), KQU(12878473260854462891),
-	KQU(10036813920765722626), KQU(14451335468363173812),
-	KQU( 5476809692401102807), KQU(16442255173514366342),
-	KQU(13060203194757167104), KQU(14354124071243177715),
-	KQU(15961249405696125227), KQU(13703893649690872584),
-	KQU(  363907326340340064), KQU( 6247455540491754842),
-	KQU(12242249332757832361), KQU(  156065475679796717),
-	KQU( 9351116235749732355), KQU( 4590350628677701405),
-	KQU( 1671195940982350389), KQU(13501398458898451905),
-	KQU( 6526341991225002255), KQU( 1689782913778157592),
-	KQU( 7439222350869010334), KQU(13975150263226478308),
-	KQU(11411961169932682710), KQU(17204271834833847277),
-	KQU(  541534742544435367), KQU( 6591191931218949684),
-	KQU( 2645454775478232486), KQU( 4322857481256485321),
-	KQU( 8477416487553065110), KQU(12902505428548435048),
-	KQU(  971445777981341415), KQU(14995104682744976712),
-	KQU( 4243341648807158063), KQU( 8695061252721927661),
-	KQU( 5028202003270177222), KQU( 2289257340915567840),
-	KQU(13870416345121866007), KQU(13994481698072092233),
-	KQU( 6912785400753196481), KQU( 2278309315841980139),
-	KQU( 4329765449648304839), KQU( 5963108095785485298),
-	KQU( 4880024847478722478), KQU(16015608779890240947),
-	KQU( 1866679034261393544), KQU(  914821179919731519),
-	KQU( 9643404035648760131), KQU( 2418114953615593915),
-	KQU(  944756836073702374), KQU(15186388048737296834),
-	KQU( 7723355336128442206), KQU( 7500747479679599691),
-	KQU(18013961306453293634), KQU( 2315274808095756456),
-	KQU(13655308255424029566), KQU(17203800273561677098),
-	KQU( 1382158694422087756), KQU( 5090390250309588976),
-	KQU(  517170818384213989), KQU( 1612709252627729621),
-	KQU( 1330118955572449606), KQU(  300922478056709885),
-	KQU(18115693291289091987), KQU(13491407109725238321),
-	KQU(15293714633593827320), KQU( 5151539373053314504),
-	KQU( 5951523243743139207), KQU(14459112015249527975),
-	KQU( 5456113959000700739), KQU( 3877918438464873016),
-	KQU(12534071654260163555), KQU(15871678376893555041),
-	KQU(11005484805712025549), KQU(16353066973143374252),
-	KQU( 4358331472063256685), KQU( 8268349332210859288),
-	KQU(12485161590939658075), KQU(13955993592854471343),
-	KQU( 5911446886848367039), KQU(14925834086813706974),
-	KQU( 6590362597857994805), KQU( 1280544923533661875),
-	KQU( 1637756018947988164), KQU( 4734090064512686329),
-	KQU(16693705263131485912), KQU( 6834882340494360958),
-	KQU( 8120732176159658505), KQU( 2244371958905329346),
-	KQU(10447499707729734021), KQU( 7318742361446942194),
-	KQU( 8032857516355555296), KQU(14023605983059313116),
-	KQU( 1032336061815461376), KQU( 9840995337876562612),
-	KQU( 9869256223029203587), KQU(12227975697177267636),
-	KQU(12728115115844186033), KQU( 7752058479783205470),
-	KQU(  729733219713393087), KQU(12954017801239007622)
-};
-static const uint64_t init_by_array_64_expected[] = {
-	KQU( 2100341266307895239), KQU( 8344256300489757943),
-	KQU(15687933285484243894), KQU( 8268620370277076319),
-	KQU(12371852309826545459), KQU( 8800491541730110238),
-	KQU(18113268950100835773), KQU( 2886823658884438119),
-	KQU( 3293667307248180724), KQU( 9307928143300172731),
-	KQU( 7688082017574293629), KQU(  900986224735166665),
-	KQU( 9977972710722265039), KQU( 6008205004994830552),
-	KQU(  546909104521689292), KQU( 7428471521869107594),
-	KQU(14777563419314721179), KQU(16116143076567350053),
-	KQU( 5322685342003142329), KQU( 4200427048445863473),
-	KQU( 4693092150132559146), KQU(13671425863759338582),
-	KQU( 6747117460737639916), KQU( 4732666080236551150),
-	KQU( 5912839950611941263), KQU( 3903717554504704909),
-	KQU( 2615667650256786818), KQU(10844129913887006352),
-	KQU(13786467861810997820), KQU(14267853002994021570),
-	KQU(13767807302847237439), KQU(16407963253707224617),
-	KQU( 4802498363698583497), KQU( 2523802839317209764),
-	KQU( 3822579397797475589), KQU( 8950320572212130610),
-	KQU( 3745623504978342534), KQU(16092609066068482806),
-	KQU( 9817016950274642398), KQU(10591660660323829098),
-	KQU(11751606650792815920), KQU( 5122873818577122211),
-	KQU(17209553764913936624), KQU( 6249057709284380343),
-	KQU(15088791264695071830), KQU(15344673071709851930),
-	KQU( 4345751415293646084), KQU( 2542865750703067928),
-	KQU(13520525127852368784), KQU(18294188662880997241),
-	KQU( 3871781938044881523), KQU( 2873487268122812184),
-	KQU(15099676759482679005), KQU(15442599127239350490),
-	KQU( 6311893274367710888), KQU( 3286118760484672933),
-	KQU( 4146067961333542189), KQU(13303942567897208770),
-	KQU( 8196013722255630418), KQU( 4437815439340979989),
-	KQU(15433791533450605135), KQU( 4254828956815687049),
-	KQU( 1310903207708286015), KQU(10529182764462398549),
-	KQU(14900231311660638810), KQU( 9727017277104609793),
-	KQU( 1821308310948199033), KQU(11628861435066772084),
-	KQU( 9469019138491546924), KQU( 3145812670532604988),
-	KQU( 9938468915045491919), KQU( 1562447430672662142),
-	KQU(13963995266697989134), KQU( 3356884357625028695),
-	KQU( 4499850304584309747), KQU( 8456825817023658122),
-	KQU(10859039922814285279), KQU( 8099512337972526555),
-	KQU(  348006375109672149), KQU(11919893998241688603),
-	KQU( 1104199577402948826), KQU(16689191854356060289),
-	KQU(10992552041730168078), KQU( 7243733172705465836),
-	KQU( 5668075606180319560), KQU(18182847037333286970),
-	KQU( 4290215357664631322), KQU( 4061414220791828613),
-	KQU(13006291061652989604), KQU( 7140491178917128798),
-	KQU(12703446217663283481), KQU( 5500220597564558267),
-	KQU(10330551509971296358), KQU(15958554768648714492),
-	KQU( 5174555954515360045), KQU( 1731318837687577735),
-	KQU( 3557700801048354857), KQU(13764012341928616198),
-	KQU(13115166194379119043), KQU( 7989321021560255519),
-	KQU( 2103584280905877040), KQU( 9230788662155228488),
-	KQU(16396629323325547654), KQU(  657926409811318051),
-	KQU(15046700264391400727), KQU( 5120132858771880830),
-	KQU( 7934160097989028561), KQU( 6963121488531976245),
-	KQU(17412329602621742089), KQU(15144843053931774092),
-	KQU(17204176651763054532), KQU(13166595387554065870),
-	KQU( 8590377810513960213), KQU( 5834365135373991938),
-	KQU( 7640913007182226243), KQU( 3479394703859418425),
-	KQU(16402784452644521040), KQU( 4993979809687083980),
-	KQU(13254522168097688865), KQU(15643659095244365219),
-	KQU( 5881437660538424982), KQU(11174892200618987379),
-	KQU(  254409966159711077), KQU(17158413043140549909),
-	KQU( 3638048789290376272), KQU( 1376816930299489190),
-	KQU( 4622462095217761923), KQU(15086407973010263515),
-	KQU(13253971772784692238), KQU( 5270549043541649236),
-	KQU(11182714186805411604), KQU(12283846437495577140),
-	KQU( 5297647149908953219), KQU(10047451738316836654),
-	KQU( 4938228100367874746), KQU(12328523025304077923),
-	KQU( 3601049438595312361), KQU( 9313624118352733770),
-	KQU(13322966086117661798), KQU(16660005705644029394),
-	KQU(11337677526988872373), KQU(13869299102574417795),
-	KQU(15642043183045645437), KQU( 3021755569085880019),
-	KQU( 4979741767761188161), KQU(13679979092079279587),
-	KQU( 3344685842861071743), KQU(13947960059899588104),
-	KQU(  305806934293368007), KQU( 5749173929201650029),
-	KQU(11123724852118844098), KQU(15128987688788879802),
-	KQU(15251651211024665009), KQU( 7689925933816577776),
-	KQU(16732804392695859449), KQU(17087345401014078468),
-	KQU(14315108589159048871), KQU( 4820700266619778917),
-	KQU(16709637539357958441), KQU( 4936227875177351374),
-	KQU( 2137907697912987247), KQU(11628565601408395420),
-	KQU( 2333250549241556786), KQU( 5711200379577778637),
-	KQU( 5170680131529031729), KQU(12620392043061335164),
-	KQU(   95363390101096078), KQU( 5487981914081709462),
-	KQU( 1763109823981838620), KQU( 3395861271473224396),
-	KQU( 1300496844282213595), KQU( 6894316212820232902),
-	KQU(10673859651135576674), KQU( 5911839658857903252),
-	KQU(17407110743387299102), KQU( 8257427154623140385),
-	KQU(11389003026741800267), KQU( 4070043211095013717),
-	KQU(11663806997145259025), KQU(15265598950648798210),
-	KQU(  630585789434030934), KQU( 3524446529213587334),
-	KQU( 7186424168495184211), KQU(10806585451386379021),
-	KQU(11120017753500499273), KQU( 1586837651387701301),
-	KQU(17530454400954415544), KQU( 9991670045077880430),
-	KQU( 7550997268990730180), KQU( 8640249196597379304),
-	KQU( 3522203892786893823), KQU(10401116549878854788),
-	KQU(13690285544733124852), KQU( 8295785675455774586),
-	KQU(15535716172155117603), KQU( 3112108583723722511),
-	KQU(17633179955339271113), KQU(18154208056063759375),
-	KQU( 1866409236285815666), KQU(13326075895396412882),
-	KQU( 8756261842948020025), KQU( 6281852999868439131),
-	KQU(15087653361275292858), KQU(10333923911152949397),
-	KQU( 5265567645757408500), KQU(12728041843210352184),
-	KQU( 6347959327507828759), KQU(  154112802625564758),
-	KQU(18235228308679780218), KQU( 3253805274673352418),
-	KQU( 4849171610689031197), KQU(17948529398340432518),
-	KQU(13803510475637409167), KQU(13506570190409883095),
-	KQU(15870801273282960805), KQU( 8451286481299170773),
-	KQU( 9562190620034457541), KQU( 8518905387449138364),
-	KQU(12681306401363385655), KQU( 3788073690559762558),
-	KQU( 5256820289573487769), KQU( 2752021372314875467),
-	KQU( 6354035166862520716), KQU( 4328956378309739069),
-	KQU(  449087441228269600), KQU( 5533508742653090868),
-	KQU( 1260389420404746988), KQU(18175394473289055097),
-	KQU( 1535467109660399420), KQU( 8818894282874061442),
-	KQU(12140873243824811213), KQU(15031386653823014946),
-	KQU( 1286028221456149232), KQU( 6329608889367858784),
-	KQU( 9419654354945132725), KQU( 6094576547061672379),
-	KQU(17706217251847450255), KQU( 1733495073065878126),
-	KQU(16918923754607552663), KQU( 8881949849954945044),
-	KQU(12938977706896313891), KQU(14043628638299793407),
-	KQU(18393874581723718233), KQU( 6886318534846892044),
-	KQU(14577870878038334081), KQU(13541558383439414119),
-	KQU(13570472158807588273), KQU(18300760537910283361),
-	KQU(  818368572800609205), KQU( 1417000585112573219),
-	KQU(12337533143867683655), KQU(12433180994702314480),
-	KQU(  778190005829189083), KQU(13667356216206524711),
-	KQU( 9866149895295225230), KQU(11043240490417111999),
-	KQU( 1123933826541378598), KQU( 6469631933605123610),
-	KQU(14508554074431980040), KQU(13918931242962026714),
-	KQU( 2870785929342348285), KQU(14786362626740736974),
-	KQU(13176680060902695786), KQU( 9591778613541679456),
-	KQU( 9097662885117436706), KQU(  749262234240924947),
-	KQU( 1944844067793307093), KQU( 4339214904577487742),
-	KQU( 8009584152961946551), KQU(16073159501225501777),
-	KQU( 3335870590499306217), KQU(17088312653151202847),
-	KQU( 3108893142681931848), KQU(16636841767202792021),
-	KQU(10423316431118400637), KQU( 8008357368674443506),
-	KQU(11340015231914677875), KQU(17687896501594936090),
-	KQU(15173627921763199958), KQU(  542569482243721959),
-	KQU(15071714982769812975), KQU( 4466624872151386956),
-	KQU( 1901780715602332461), KQU( 9822227742154351098),
-	KQU( 1479332892928648780), KQU( 6981611948382474400),
-	KQU( 7620824924456077376), KQU(14095973329429406782),
-	KQU( 7902744005696185404), KQU(15830577219375036920),
-	KQU(10287076667317764416), KQU(12334872764071724025),
-	KQU( 4419302088133544331), KQU(14455842851266090520),
-	KQU(12488077416504654222), KQU( 7953892017701886766),
-	KQU( 6331484925529519007), KQU( 4902145853785030022),
-	KQU(17010159216096443073), KQU(11945354668653886087),
-	KQU(15112022728645230829), KQU(17363484484522986742),
-	KQU( 4423497825896692887), KQU( 8155489510809067471),
-	KQU(  258966605622576285), KQU( 5462958075742020534),
-	KQU( 6763710214913276228), KQU( 2368935183451109054),
-	KQU(14209506165246453811), KQU( 2646257040978514881),
-	KQU( 3776001911922207672), KQU( 1419304601390147631),
-	KQU(14987366598022458284), KQU( 3977770701065815721),
-	KQU(  730820417451838898), KQU( 3982991703612885327),
-	KQU( 2803544519671388477), KQU(17067667221114424649),
-	KQU( 2922555119737867166), KQU( 1989477584121460932),
-	KQU(15020387605892337354), KQU( 9293277796427533547),
-	KQU(10722181424063557247), KQU(16704542332047511651),
-	KQU( 5008286236142089514), KQU(16174732308747382540),
-	KQU(17597019485798338402), KQU(13081745199110622093),
-	KQU( 8850305883842258115), KQU(12723629125624589005),
-	KQU( 8140566453402805978), KQU(15356684607680935061),
-	KQU(14222190387342648650), KQU(11134610460665975178),
-	KQU( 1259799058620984266), KQU(13281656268025610041),
-	KQU(  298262561068153992), KQU(12277871700239212922),
-	KQU(13911297774719779438), KQU(16556727962761474934),
-	KQU(17903010316654728010), KQU( 9682617699648434744),
-	KQU(14757681836838592850), KQU( 1327242446558524473),
-	KQU(11126645098780572792), KQU( 1883602329313221774),
-	KQU( 2543897783922776873), KQU(15029168513767772842),
-	KQU(12710270651039129878), KQU(16118202956069604504),
-	KQU(15010759372168680524), KQU( 2296827082251923948),
-	KQU(10793729742623518101), KQU(13829764151845413046),
-	KQU(17769301223184451213), KQU( 3118268169210783372),
-	KQU(17626204544105123127), KQU( 7416718488974352644),
-	KQU(10450751996212925994), KQU( 9352529519128770586),
-	KQU(  259347569641110140), KQU( 8048588892269692697),
-	KQU( 1774414152306494058), KQU(10669548347214355622),
-	KQU(13061992253816795081), KQU(18432677803063861659),
-	KQU( 8879191055593984333), KQU(12433753195199268041),
-	KQU(14919392415439730602), KQU( 6612848378595332963),
-	KQU( 6320986812036143628), KQU(10465592420226092859),
-	KQU( 4196009278962570808), KQU( 3747816564473572224),
-	KQU(17941203486133732898), KQU( 2350310037040505198),
-	KQU( 5811779859134370113), KQU(10492109599506195126),
-	KQU( 7699650690179541274), KQU( 1954338494306022961),
-	KQU(14095816969027231152), KQU( 5841346919964852061),
-	KQU(14945969510148214735), KQU( 3680200305887550992),
-	KQU( 6218047466131695792), KQU( 8242165745175775096),
-	KQU(11021371934053307357), KQU( 1265099502753169797),
-	KQU( 4644347436111321718), KQU( 3609296916782832859),
-	KQU( 8109807992218521571), KQU(18387884215648662020),
-	KQU(14656324896296392902), KQU(17386819091238216751),
-	KQU(17788300878582317152), KQU( 7919446259742399591),
-	KQU( 4466613134576358004), KQU(12928181023667938509),
-	KQU(13147446154454932030), KQU(16552129038252734620),
-	KQU( 8395299403738822450), KQU(11313817655275361164),
-	KQU(  434258809499511718), KQU( 2074882104954788676),
-	KQU( 7929892178759395518), KQU( 9006461629105745388),
-	KQU( 5176475650000323086), KQU(11128357033468341069),
-	KQU(12026158851559118955), KQU(14699716249471156500),
-	KQU(  448982497120206757), KQU( 4156475356685519900),
-	KQU( 6063816103417215727), KQU(10073289387954971479),
-	KQU( 8174466846138590962), KQU( 2675777452363449006),
-	KQU( 9090685420572474281), KQU( 6659652652765562060),
-	KQU(12923120304018106621), KQU(11117480560334526775),
-	KQU(  937910473424587511), KQU( 1838692113502346645),
-	KQU(11133914074648726180), KQU( 7922600945143884053),
-	KQU(13435287702700959550), KQU( 5287964921251123332),
-	KQU(11354875374575318947), KQU(17955724760748238133),
-	KQU(13728617396297106512), KQU( 4107449660118101255),
-	KQU( 1210269794886589623), KQU(11408687205733456282),
-	KQU( 4538354710392677887), KQU(13566803319341319267),
-	KQU(17870798107734050771), KQU( 3354318982568089135),
-	KQU( 9034450839405133651), KQU(13087431795753424314),
-	KQU(  950333102820688239), KQU( 1968360654535604116),
-	KQU(16840551645563314995), KQU( 8867501803892924995),
-	KQU(11395388644490626845), KQU( 1529815836300732204),
-	KQU(13330848522996608842), KQU( 1813432878817504265),
-	KQU( 2336867432693429560), KQU(15192805445973385902),
-	KQU( 2528593071076407877), KQU(  128459777936689248),
-	KQU( 9976345382867214866), KQU( 6208885766767996043),
-	KQU(14982349522273141706), KQU( 3099654362410737822),
-	KQU(13776700761947297661), KQU( 8806185470684925550),
-	KQU( 8151717890410585321), KQU(  640860591588072925),
-	KQU(14592096303937307465), KQU( 9056472419613564846),
-	KQU(14861544647742266352), KQU(12703771500398470216),
-	KQU( 3142372800384138465), KQU( 6201105606917248196),
-	KQU(18337516409359270184), KQU(15042268695665115339),
-	KQU(15188246541383283846), KQU(12800028693090114519),
-	KQU( 5992859621101493472), KQU(18278043971816803521),
-	KQU( 9002773075219424560), KQU( 7325707116943598353),
-	KQU( 7930571931248040822), KQU( 5645275869617023448),
-	KQU( 7266107455295958487), KQU( 4363664528273524411),
-	KQU(14313875763787479809), KQU(17059695613553486802),
-	KQU( 9247761425889940932), KQU(13704726459237593128),
-	KQU( 2701312427328909832), KQU(17235532008287243115),
-	KQU(14093147761491729538), KQU( 6247352273768386516),
-	KQU( 8268710048153268415), KQU( 7985295214477182083),
-	KQU(15624495190888896807), KQU( 3772753430045262788),
-	KQU( 9133991620474991698), KQU( 5665791943316256028),
-	KQU( 7551996832462193473), KQU(13163729206798953877),
-	KQU( 9263532074153846374), KQU( 1015460703698618353),
-	KQU(17929874696989519390), KQU(18257884721466153847),
-	KQU(16271867543011222991), KQU( 3905971519021791941),
-	KQU(16814488397137052085), KQU( 1321197685504621613),
-	KQU( 2870359191894002181), KQU(14317282970323395450),
-	KQU(13663920845511074366), KQU( 2052463995796539594),
-	KQU(14126345686431444337), KQU( 1727572121947022534),
-	KQU(17793552254485594241), KQU( 6738857418849205750),
-	KQU( 1282987123157442952), KQU(16655480021581159251),
-	KQU( 6784587032080183866), KQU(14726758805359965162),
-	KQU( 7577995933961987349), KQU(12539609320311114036),
-	KQU(10789773033385439494), KQU( 8517001497411158227),
-	KQU(10075543932136339710), KQU(14838152340938811081),
-	KQU( 9560840631794044194), KQU(17445736541454117475),
-	KQU(10633026464336393186), KQU(15705729708242246293),
-	KQU( 1117517596891411098), KQU( 4305657943415886942),
-	KQU( 4948856840533979263), KQU(16071681989041789593),
-	KQU(13723031429272486527), KQU( 7639567622306509462),
-	KQU(12670424537483090390), KQU( 9715223453097197134),
-	KQU( 5457173389992686394), KQU(  289857129276135145),
-	KQU(17048610270521972512), KQU(  692768013309835485),
-	KQU(14823232360546632057), KQU(18218002361317895936),
-	KQU( 3281724260212650204), KQU(16453957266549513795),
-	KQU( 8592711109774511881), KQU(  929825123473369579),
-	KQU(15966784769764367791), KQU( 9627344291450607588),
-	KQU(10849555504977813287), KQU( 9234566913936339275),
-	KQU( 6413807690366911210), KQU(10862389016184219267),
-	KQU(13842504799335374048), KQU( 1531994113376881174),
-	KQU( 2081314867544364459), KQU(16430628791616959932),
-	KQU( 8314714038654394368), KQU( 9155473892098431813),
-	KQU(12577843786670475704), KQU( 4399161106452401017),
-	KQU( 1668083091682623186), KQU( 1741383777203714216),
-	KQU( 2162597285417794374), KQU(15841980159165218736),
-	KQU( 1971354603551467079), KQU( 1206714764913205968),
-	KQU( 4790860439591272330), KQU(14699375615594055799),
-	KQU( 8374423871657449988), KQU(10950685736472937738),
-	KQU(  697344331343267176), KQU(10084998763118059810),
-	KQU(12897369539795983124), KQU(12351260292144383605),
-	KQU( 1268810970176811234), KQU( 7406287800414582768),
-	KQU(  516169557043807831), KQU( 5077568278710520380),
-	KQU( 3828791738309039304), KQU( 7721974069946943610),
-	KQU( 3534670260981096460), KQU( 4865792189600584891),
-	KQU(16892578493734337298), KQU( 9161499464278042590),
-	KQU(11976149624067055931), KQU(13219479887277343990),
-	KQU(14161556738111500680), KQU(14670715255011223056),
-	KQU( 4671205678403576558), KQU(12633022931454259781),
-	KQU(14821376219869187646), KQU(  751181776484317028),
-	KQU( 2192211308839047070), KQU(11787306362361245189),
-	KQU(10672375120744095707), KQU( 4601972328345244467),
-	KQU(15457217788831125879), KQU( 8464345256775460809),
-	KQU(10191938789487159478), KQU( 6184348739615197613),
-	KQU(11425436778806882100), KQU( 2739227089124319793),
-	KQU(  461464518456000551), KQU( 4689850170029177442),
-	KQU( 6120307814374078625), KQU(11153579230681708671),
-	KQU( 7891721473905347926), KQU(10281646937824872400),
-	KQU( 3026099648191332248), KQU( 8666750296953273818),
-	KQU(14978499698844363232), KQU(13303395102890132065),
-	KQU( 8182358205292864080), KQU(10560547713972971291),
-	KQU(11981635489418959093), KQU( 3134621354935288409),
-	KQU(11580681977404383968), KQU(14205530317404088650),
-	KQU( 5997789011854923157), KQU(13659151593432238041),
-	KQU(11664332114338865086), KQU( 7490351383220929386),
-	KQU( 7189290499881530378), KQU(15039262734271020220),
-	KQU( 2057217285976980055), KQU(  555570804905355739),
-	KQU(11235311968348555110), KQU(13824557146269603217),
-	KQU(16906788840653099693), KQU( 7222878245455661677),
-	KQU( 5245139444332423756), KQU( 4723748462805674292),
-	KQU(12216509815698568612), KQU(17402362976648951187),
-	KQU(17389614836810366768), KQU( 4880936484146667711),
-	KQU( 9085007839292639880), KQU(13837353458498535449),
-	KQU(11914419854360366677), KQU(16595890135313864103),
-	KQU( 6313969847197627222), KQU(18296909792163910431),
-	KQU(10041780113382084042), KQU( 2499478551172884794),
-	KQU(11057894246241189489), KQU( 9742243032389068555),
-	KQU(12838934582673196228), KQU(13437023235248490367),
-	KQU(13372420669446163240), KQU( 6752564244716909224),
-	KQU( 7157333073400313737), KQU(12230281516370654308),
-	KQU( 1182884552219419117), KQU( 2955125381312499218),
-	KQU(10308827097079443249), KQU( 1337648572986534958),
-	KQU(16378788590020343939), KQU(  108619126514420935),
-	KQU( 3990981009621629188), KQU( 5460953070230946410),
-	KQU( 9703328329366531883), KQU(13166631489188077236),
-	KQU( 1104768831213675170), KQU( 3447930458553877908),
-	KQU( 8067172487769945676), KQU( 5445802098190775347),
-	KQU( 3244840981648973873), KQU(17314668322981950060),
-	KQU( 5006812527827763807), KQU(18158695070225526260),
-	KQU( 2824536478852417853), KQU(13974775809127519886),
-	KQU( 9814362769074067392), KQU(17276205156374862128),
-	KQU(11361680725379306967), KQU( 3422581970382012542),
-	KQU(11003189603753241266), KQU(11194292945277862261),
-	KQU( 6839623313908521348), KQU(11935326462707324634),
-	KQU( 1611456788685878444), KQU(13112620989475558907),
-	KQU(  517659108904450427), KQU(13558114318574407624),
-	KQU(15699089742731633077), KQU( 4988979278862685458),
-	KQU( 8111373583056521297), KQU( 3891258746615399627),
-	KQU( 8137298251469718086), KQU(12748663295624701649),
-	KQU( 4389835683495292062), KQU( 5775217872128831729),
-	KQU( 9462091896405534927), KQU( 8498124108820263989),
-	KQU( 8059131278842839525), KQU(10503167994254090892),
-	KQU(11613153541070396656), KQU(18069248738504647790),
-	KQU(  570657419109768508), KQU( 3950574167771159665),
-	KQU( 5514655599604313077), KQU( 2908460854428484165),
-	KQU(10777722615935663114), KQU(12007363304839279486),
-	KQU( 9800646187569484767), KQU( 8795423564889864287),
-	KQU(14257396680131028419), KQU( 6405465117315096498),
-	KQU( 7939411072208774878), KQU(17577572378528990006),
-	KQU(14785873806715994850), KQU(16770572680854747390),
-	KQU(18127549474419396481), KQU(11637013449455757750),
-	KQU(14371851933996761086), KQU( 3601181063650110280),
-	KQU( 4126442845019316144), KQU(10198287239244320669),
-	KQU(18000169628555379659), KQU(18392482400739978269),
-	KQU( 6219919037686919957), KQU( 3610085377719446052),
-	KQU( 2513925039981776336), KQU(16679413537926716955),
-	KQU(12903302131714909434), KQU( 5581145789762985009),
-	KQU(12325955044293303233), KQU(17216111180742141204),
-	KQU( 6321919595276545740), KQU( 3507521147216174501),
-	KQU( 9659194593319481840), KQU(11473976005975358326),
-	KQU(14742730101435987026), KQU(  492845897709954780),
-	KQU(16976371186162599676), KQU(17712703422837648655),
-	KQU( 9881254778587061697), KQU( 8413223156302299551),
-	KQU( 1563841828254089168), KQU( 9996032758786671975),
-	KQU(  138877700583772667), KQU(13003043368574995989),
-	KQU( 4390573668650456587), KQU( 8610287390568126755),
-	KQU(15126904974266642199), KQU( 6703637238986057662),
-	KQU( 2873075592956810157), KQU( 6035080933946049418),
-	KQU(13382846581202353014), KQU( 7303971031814642463),
-	KQU(18418024405307444267), KQU( 5847096731675404647),
-	KQU( 4035880699639842500), KQU(11525348625112218478),
-	KQU( 3041162365459574102), KQU( 2604734487727986558),
-	KQU(15526341771636983145), KQU(14556052310697370254),
-	KQU(12997787077930808155), KQU( 9601806501755554499),
-	KQU(11349677952521423389), KQU(14956777807644899350),
-	KQU(16559736957742852721), KQU(12360828274778140726),
-	KQU( 6685373272009662513), KQU(16932258748055324130),
-	KQU(15918051131954158508), KQU( 1692312913140790144),
-	KQU(  546653826801637367), KQU( 5341587076045986652),
-	KQU(14975057236342585662), KQU(12374976357340622412),
-	KQU(10328833995181940552), KQU(12831807101710443149),
-	KQU(10548514914382545716), KQU( 2217806727199715993),
-	KQU(12627067369242845138), KQU( 4598965364035438158),
-	KQU(  150923352751318171), KQU(14274109544442257283),
-	KQU( 4696661475093863031), KQU( 1505764114384654516),
-	KQU(10699185831891495147), KQU( 2392353847713620519),
-	KQU( 3652870166711788383), KQU( 8640653276221911108),
-	KQU( 3894077592275889704), KQU( 4918592872135964845),
-	KQU(16379121273281400789), KQU(12058465483591683656),
-	KQU(11250106829302924945), KQU( 1147537556296983005),
-	KQU( 6376342756004613268), KQU(14967128191709280506),
-	KQU(18007449949790627628), KQU( 9497178279316537841),
-	KQU( 7920174844809394893), KQU(10037752595255719907),
-	KQU(15875342784985217697), KQU(15311615921712850696),
-	KQU( 9552902652110992950), KQU(14054979450099721140),
-	KQU( 5998709773566417349), KQU(18027910339276320187),
-	KQU( 8223099053868585554), KQU( 7842270354824999767),
-	KQU( 4896315688770080292), KQU(12969320296569787895),
-	KQU( 2674321489185759961), KQU( 4053615936864718439),
-	KQU(11349775270588617578), KQU( 4743019256284553975),
-	KQU( 5602100217469723769), KQU(14398995691411527813),
-	KQU( 7412170493796825470), KQU(  836262406131744846),
-	KQU( 8231086633845153022), KQU( 5161377920438552287),
-	KQU( 8828731196169924949), KQU(16211142246465502680),
-	KQU( 3307990879253687818), KQU( 5193405406899782022),
-	KQU( 8510842117467566693), KQU( 6070955181022405365),
-	KQU(14482950231361409799), KQU(12585159371331138077),
-	KQU( 3511537678933588148), KQU( 2041849474531116417),
-	KQU(10944936685095345792), KQU(18303116923079107729),
-	KQU( 2720566371239725320), KQU( 4958672473562397622),
-	KQU( 3032326668253243412), KQU(13689418691726908338),
-	KQU( 1895205511728843996), KQU( 8146303515271990527),
-	KQU(16507343500056113480), KQU(  473996939105902919),
-	KQU( 9897686885246881481), KQU(14606433762712790575),
-	KQU( 6732796251605566368), KQU( 1399778120855368916),
-	KQU(  935023885182833777), KQU(16066282816186753477),
-	KQU( 7291270991820612055), KQU(17530230393129853844),
-	KQU(10223493623477451366), KQU(15841725630495676683),
-	KQU(17379567246435515824), KQU( 8588251429375561971),
-	KQU(18339511210887206423), KQU(17349587430725976100),
-	KQU(12244876521394838088), KQU( 6382187714147161259),
-	KQU(12335807181848950831), KQU(16948885622305460665),
-	KQU(13755097796371520506), KQU(14806740373324947801),
-	KQU( 4828699633859287703), KQU( 8209879281452301604),
-	KQU(12435716669553736437), KQU(13970976859588452131),
-	KQU( 6233960842566773148), KQU(12507096267900505759),
-	KQU( 1198713114381279421), KQU(14989862731124149015),
-	KQU(15932189508707978949), KQU( 2526406641432708722),
-	KQU(   29187427817271982), KQU( 1499802773054556353),
-	KQU(10816638187021897173), KQU( 5436139270839738132),
-	KQU( 6659882287036010082), KQU( 2154048955317173697),
-	KQU(10887317019333757642), KQU(16281091802634424955),
-	KQU(10754549879915384901), KQU(10760611745769249815),
-	KQU( 2161505946972504002), KQU( 5243132808986265107),
-	KQU(10129852179873415416), KQU(  710339480008649081),
-	KQU( 7802129453068808528), KQU(17967213567178907213),
-	KQU(15730859124668605599), KQU(13058356168962376502),
-	KQU( 3701224985413645909), KQU(14464065869149109264),
-	KQU( 9959272418844311646), KQU(10157426099515958752),
-	KQU(14013736814538268528), KQU(17797456992065653951),
-	KQU(17418878140257344806), KQU(15457429073540561521),
-	KQU( 2184426881360949378), KQU( 2062193041154712416),
-	KQU( 8553463347406931661), KQU( 4913057625202871854),
-	KQU( 2668943682126618425), KQU(17064444737891172288),
-	KQU( 4997115903913298637), KQU(12019402608892327416),
-	KQU(17603584559765897352), KQU(11367529582073647975),
-	KQU( 8211476043518436050), KQU( 8676849804070323674),
-	KQU(18431829230394475730), KQU(10490177861361247904),
-	KQU( 9508720602025651349), KQU( 7409627448555722700),
-	KQU( 5804047018862729008), KQU(11943858176893142594),
-	KQU(11908095418933847092), KQU( 5415449345715887652),
-	KQU( 1554022699166156407), KQU( 9073322106406017161),
-	KQU( 7080630967969047082), KQU(18049736940860732943),
-	KQU(12748714242594196794), KQU( 1226992415735156741),
-	KQU(17900981019609531193), KQU(11720739744008710999),
-	KQU( 3006400683394775434), KQU(11347974011751996028),
-	KQU( 3316999628257954608), KQU( 8384484563557639101),
-	KQU(18117794685961729767), KQU( 1900145025596618194),
-	KQU(17459527840632892676), KQU( 5634784101865710994),
-	KQU( 7918619300292897158), KQU( 3146577625026301350),
-	KQU( 9955212856499068767), KQU( 1873995843681746975),
-	KQU( 1561487759967972194), KQU( 8322718804375878474),
-	KQU(11300284215327028366), KQU( 4667391032508998982),
-	KQU( 9820104494306625580), KQU(17922397968599970610),
-	KQU( 1784690461886786712), KQU(14940365084341346821),
-	KQU( 5348719575594186181), KQU(10720419084507855261),
-	KQU(14210394354145143274), KQU( 2426468692164000131),
-	KQU(16271062114607059202), KQU(14851904092357070247),
-	KQU( 6524493015693121897), KQU( 9825473835127138531),
-	KQU(14222500616268569578), KQU(15521484052007487468),
-	KQU(14462579404124614699), KQU(11012375590820665520),
-	KQU(11625327350536084927), KQU(14452017765243785417),
-	KQU( 9989342263518766305), KQU( 3640105471101803790),
-	KQU( 4749866455897513242), KQU(13963064946736312044),
-	KQU(10007416591973223791), KQU(18314132234717431115),
-	KQU( 3286596588617483450), KQU( 7726163455370818765),
-	KQU( 7575454721115379328), KQU( 5308331576437663422),
-	KQU(18288821894903530934), KQU( 8028405805410554106),
-	KQU(15744019832103296628), KQU(  149765559630932100),
-	KQU( 6137705557200071977), KQU(14513416315434803615),
-	KQU(11665702820128984473), KQU(  218926670505601386),
-	KQU( 6868675028717769519), KQU(15282016569441512302),
-	KQU( 5707000497782960236), KQU( 6671120586555079567),
-	KQU( 2194098052618985448), KQU(16849577895477330978),
-	KQU(12957148471017466283), KQU( 1997805535404859393),
-	KQU( 1180721060263860490), KQU(13206391310193756958),
-	KQU(12980208674461861797), KQU( 3825967775058875366),
-	KQU(17543433670782042631), KQU( 1518339070120322730),
-	KQU(16344584340890991669), KQU( 2611327165318529819),
-	KQU(11265022723283422529), KQU( 4001552800373196817),
-	KQU(14509595890079346161), KQU( 3528717165416234562),
-	KQU(18153222571501914072), KQU( 9387182977209744425),
-	KQU(10064342315985580021), KQU(11373678413215253977),
-	KQU( 2308457853228798099), KQU( 9729042942839545302),
-	KQU( 7833785471140127746), KQU( 6351049900319844436),
-	KQU(14454610627133496067), KQU(12533175683634819111),
-	KQU(15570163926716513029), KQU(13356980519185762498)
-};
+static const uint32_t init_gen_rand_32_expected[] = {3440181298U, 1564997079U,
+    1510669302U, 2930277156U, 1452439940U, 3796268453U, 423124208U, 2143818589U,
+    3827219408U, 2987036003U, 2674978610U, 1536842514U, 2027035537U,
+    2534897563U, 1686527725U, 545368292U, 1489013321U, 1370534252U, 4231012796U,
+    3994803019U, 1764869045U, 824597505U, 862581900U, 2469764249U, 812862514U,
+    359318673U, 116957936U, 3367389672U, 2327178354U, 1898245200U, 3206507879U,
+    2378925033U, 1040214787U, 2524778605U, 3088428700U, 1417665896U, 964324147U,
+    2282797708U, 2456269299U, 313400376U, 2245093271U, 1015729427U, 2694465011U,
+    3246975184U, 1992793635U, 463679346U, 3721104591U, 3475064196U, 856141236U,
+    1499559719U, 3522818941U, 3721533109U, 1954826617U, 1282044024U,
+    1543279136U, 1301863085U, 2669145051U, 4221477354U, 3896016841U,
+    3392740262U, 462466863U, 1037679449U, 1228140306U, 922298197U, 1205109853U,
+    1872938061U, 3102547608U, 2742766808U, 1888626088U, 4028039414U, 157593879U,
+    1136901695U, 4038377686U, 3572517236U, 4231706728U, 2997311961U,
+    1189931652U, 3981543765U, 2826166703U, 87159245U, 1721379072U, 3897926942U,
+    1790395498U, 2569178939U, 1047368729U, 2340259131U, 3144212906U,
+    2301169789U, 2442885464U, 3034046771U, 3667880593U, 3935928400U,
+    2372805237U, 1666397115U, 2460584504U, 513866770U, 3810869743U, 2147400037U,
+    2792078025U, 2941761810U, 3212265810U, 984692259U, 346590253U, 1804179199U,
+    3298543443U, 750108141U, 2880257022U, 243310542U, 1869036465U, 1588062513U,
+    2983949551U, 1931450364U, 4034505847U, 2735030199U, 1628461061U,
+    2539522841U, 127965585U, 3992448871U, 913388237U, 559130076U, 1202933193U,
+    4087643167U, 2590021067U, 2256240196U, 1746697293U, 1013913783U,
+    1155864921U, 2715773730U, 915061862U, 1948766573U, 2322882854U, 3761119102U,
+    1343405684U, 3078711943U, 3067431651U, 3245156316U, 3588354584U,
+    3484623306U, 3899621563U, 4156689741U, 3237090058U, 3880063844U, 862416318U,
+    4039923869U, 2303788317U, 3073590536U, 701653667U, 2131530884U, 3169309950U,
+    2028486980U, 747196777U, 3620218225U, 432016035U, 1449580595U, 2772266392U,
+    444224948U, 1662832057U, 3184055582U, 3028331792U, 1861686254U, 1104864179U,
+    342430307U, 1350510923U, 3024656237U, 1028417492U, 2870772950U, 290847558U,
+    3675663500U, 508431529U, 4264340390U, 2263569913U, 1669302976U, 519511383U,
+    2706411211U, 3764615828U, 3883162495U, 4051445305U, 2412729798U,
+    3299405164U, 3991911166U, 2348767304U, 2664054906U, 3763609282U, 593943581U,
+    3757090046U, 2075338894U, 2020550814U, 4287452920U, 4290140003U,
+    1422957317U, 2512716667U, 2003485045U, 2307520103U, 2288472169U,
+    3940751663U, 4204638664U, 2892583423U, 1710068300U, 3904755993U,
+    2363243951U, 3038334120U, 547099465U, 771105860U, 3199983734U, 4282046461U,
+    2298388363U, 934810218U, 2837827901U, 3952500708U, 2095130248U, 3083335297U,
+    26885281U, 3932155283U, 1531751116U, 1425227133U, 495654159U, 3279634176U,
+    3855562207U, 3957195338U, 4159985527U, 893375062U, 1875515536U, 1327247422U,
+    3754140693U, 1028923197U, 1729880440U, 805571298U, 448971099U, 2726757106U,
+    2749436461U, 2485987104U, 175337042U, 3235477922U, 3882114302U, 2020970972U,
+    943926109U, 2762587195U, 1904195558U, 3452650564U, 108432281U, 3893463573U,
+    3977583081U, 2636504348U, 1110673525U, 3548479841U, 4258854744U, 980047703U,
+    4057175418U, 3890008292U, 145653646U, 3141868989U, 3293216228U, 1194331837U,
+    1254570642U, 3049934521U, 2868313360U, 2886032750U, 1110873820U, 279553524U,
+    3007258565U, 1104807822U, 3186961098U, 315764646U, 2163680838U, 3574508994U,
+    3099755655U, 191957684U, 3642656737U, 3317946149U, 3522087636U, 444526410U,
+    779157624U, 1088229627U, 1092460223U, 1856013765U, 3659877367U, 368270451U,
+    503570716U, 3000984671U, 2742789647U, 928097709U, 2914109539U, 308843566U,
+    2816161253U, 3667192079U, 2762679057U, 3395240989U, 2928925038U,
+    1491465914U, 3458702834U, 3787782576U, 2894104823U, 1296880455U,
+    1253636503U, 989959407U, 2291560361U, 2776790436U, 1913178042U, 1584677829U,
+    689637520U, 1898406878U, 688391508U, 3385234998U, 845493284U, 1943591856U,
+    2720472050U, 222695101U, 1653320868U, 2904632120U, 4084936008U, 1080720688U,
+    3938032556U, 387896427U, 2650839632U, 99042991U, 1720913794U, 1047186003U,
+    1877048040U, 2090457659U, 517087501U, 4172014665U, 2129713163U, 2413533132U,
+    2760285054U, 4129272496U, 1317737175U, 2309566414U, 2228873332U,
+    3889671280U, 1110864630U, 3576797776U, 2074552772U, 832002644U, 3097122623U,
+    2464859298U, 2679603822U, 1667489885U, 3237652716U, 1478413938U,
+    1719340335U, 2306631119U, 639727358U, 3369698270U, 226902796U, 2099920751U,
+    1892289957U, 2201594097U, 3508197013U, 3495811856U, 3900381493U, 841660320U,
+    3974501451U, 3360949056U, 1676829340U, 728899254U, 2047809627U, 2390948962U,
+    670165943U, 3412951831U, 4189320049U, 1911595255U, 2055363086U, 507170575U,
+    418219594U, 4141495280U, 2692088692U, 4203630654U, 3540093932U, 791986533U,
+    2237921051U, 2526864324U, 2956616642U, 1394958700U, 1983768223U,
+    1893373266U, 591653646U, 228432437U, 1611046598U, 3007736357U, 1040040725U,
+    2726180733U, 2789804360U, 4263568405U, 829098158U, 3847722805U, 1123578029U,
+    1804276347U, 997971319U, 4203797076U, 4185199713U, 2811733626U, 2343642194U,
+    2985262313U, 1417930827U, 3759587724U, 1967077982U, 1585223204U,
+    1097475516U, 1903944948U, 740382444U, 1114142065U, 1541796065U, 1718384172U,
+    1544076191U, 1134682254U, 3519754455U, 2866243923U, 341865437U, 645498576U,
+    2690735853U, 1046963033U, 2493178460U, 1187604696U, 1619577821U, 488503634U,
+    3255768161U, 2306666149U, 1630514044U, 2377698367U, 2751503746U,
+    3794467088U, 1796415981U, 3657173746U, 409136296U, 1387122342U, 1297726519U,
+    219544855U, 4270285558U, 437578827U, 1444698679U, 2258519491U, 963109892U,
+    3982244073U, 3351535275U, 385328496U, 1804784013U, 698059346U, 3920535147U,
+    708331212U, 784338163U, 785678147U, 1238376158U, 1557298846U, 2037809321U,
+    271576218U, 4145155269U, 1913481602U, 2763691931U, 588981080U, 1201098051U,
+    3717640232U, 1509206239U, 662536967U, 3180523616U, 1133105435U, 2963500837U,
+    2253971215U, 3153642623U, 1066925709U, 2582781958U, 3034720222U,
+    1090798544U, 2942170004U, 4036187520U, 686972531U, 2610990302U, 2641437026U,
+    1837562420U, 722096247U, 1315333033U, 2102231203U, 3402389208U, 3403698140U,
+    1312402831U, 2898426558U, 814384596U, 385649582U, 1916643285U, 1924625106U,
+    2512905582U, 2501170304U, 4275223366U, 2841225246U, 1467663688U,
+    3563567847U, 2969208552U, 884750901U, 102992576U, 227844301U, 3681442994U,
+    3502881894U, 4034693299U, 1166727018U, 1697460687U, 1737778332U,
+    1787161139U, 1053003655U, 1215024478U, 2791616766U, 2525841204U,
+    1629323443U, 3233815U, 2003823032U, 3083834263U, 2379264872U, 3752392312U,
+    1287475550U, 3770904171U, 3004244617U, 1502117784U, 918698423U, 2419857538U,
+    3864502062U, 1751322107U, 2188775056U, 4018728324U, 983712955U, 440071928U,
+    3710838677U, 2001027698U, 3994702151U, 22493119U, 3584400918U, 3446253670U,
+    4254789085U, 1405447860U, 1240245579U, 1800644159U, 1661363424U,
+    3278326132U, 3403623451U, 67092802U, 2609352193U, 3914150340U, 1814842761U,
+    3610830847U, 591531412U, 3880232807U, 1673505890U, 2585326991U, 1678544474U,
+    3148435887U, 3457217359U, 1193226330U, 2816576908U, 154025329U, 121678860U,
+    1164915738U, 973873761U, 269116100U, 52087970U, 744015362U, 498556057U,
+    94298882U, 1563271621U, 2383059628U, 4197367290U, 3958472990U, 2592083636U,
+    2906408439U, 1097742433U, 3924840517U, 264557272U, 2292287003U, 3203307984U,
+    4047038857U, 3820609705U, 2333416067U, 1839206046U, 3600944252U,
+    3412254904U, 583538222U, 2390557166U, 4140459427U, 2810357445U, 226777499U,
+    2496151295U, 2207301712U, 3283683112U, 611630281U, 1933218215U, 3315610954U,
+    3889441987U, 3719454256U, 3957190521U, 1313998161U, 2365383016U,
+    3146941060U, 1801206260U, 796124080U, 2076248581U, 1747472464U, 3254365145U,
+    595543130U, 3573909503U, 3758250204U, 2020768540U, 2439254210U, 93368951U,
+    3155792250U, 2600232980U, 3709198295U, 3894900440U, 2971850836U,
+    1578909644U, 1443493395U, 2581621665U, 3086506297U, 2443465861U, 558107211U,
+    1519367835U, 249149686U, 908102264U, 2588765675U, 1232743965U, 1001330373U,
+    3561331654U, 2259301289U, 1564977624U, 3835077093U, 727244906U, 4255738067U,
+    1214133513U, 2570786021U, 3899704621U, 1633861986U, 1636979509U,
+    1438500431U, 58463278U, 2823485629U, 2297430187U, 2926781924U, 3371352948U,
+    1864009023U, 2722267973U, 1444292075U, 437703973U, 1060414512U, 189705863U,
+    910018135U, 4077357964U, 884213423U, 2644986052U, 3973488374U, 1187906116U,
+    2331207875U, 780463700U, 3713351662U, 3854611290U, 412805574U, 2978462572U,
+    2176222820U, 829424696U, 2790788332U, 2750819108U, 1594611657U, 3899878394U,
+    3032870364U, 1702887682U, 1948167778U, 14130042U, 192292500U, 947227076U,
+    90719497U, 3854230320U, 784028434U, 2142399787U, 1563449646U, 2844400217U,
+    819143172U, 2883302356U, 2328055304U, 1328532246U, 2603885363U, 3375188924U,
+    933941291U, 3627039714U, 2129697284U, 2167253953U, 2506905438U, 1412424497U,
+    2981395985U, 1418359660U, 2925902456U, 52752784U, 3713667988U, 3924669405U,
+    648975707U, 1145520213U, 4018650664U, 3805915440U, 2380542088U, 2013260958U,
+    3262572197U, 2465078101U, 1114540067U, 3728768081U, 2396958768U, 590672271U,
+    904818725U, 4263660715U, 700754408U, 1042601829U, 4094111823U, 4274838909U,
+    2512692617U, 2774300207U, 2057306915U, 3470942453U, 99333088U, 1142661026U,
+    2889931380U, 14316674U, 2201179167U, 415289459U, 448265759U, 3515142743U,
+    3254903683U, 246633281U, 1184307224U, 2418347830U, 2092967314U, 2682072314U,
+    2558750234U, 2000352263U, 1544150531U, 399010405U, 1513946097U, 499682937U,
+    461167460U, 3045570638U, 1633669705U, 851492362U, 4052801922U, 2055266765U,
+    635556996U, 368266356U, 2385737383U, 3218202352U, 2603772408U, 349178792U,
+    226482567U, 3102426060U, 3575998268U, 2103001871U, 3243137071U, 225500688U,
+    1634718593U, 4283311431U, 4292122923U, 3842802787U, 811735523U, 105712518U,
+    663434053U, 1855889273U, 2847972595U, 1196355421U, 2552150115U, 4254510614U,
+    3752181265U, 3430721819U, 3828705396U, 3436287905U, 3441964937U,
+    4123670631U, 353001539U, 459496439U, 3799690868U, 1293777660U, 2761079737U,
+    498096339U, 3398433374U, 4080378380U, 2304691596U, 2995729055U, 4134660419U,
+    3903444024U, 3576494993U, 203682175U, 3321164857U, 2747963611U, 79749085U,
+    2992890370U, 1240278549U, 1772175713U, 2111331972U, 2655023449U,
+    1683896345U, 2836027212U, 3482868021U, 2489884874U, 756853961U, 2298874501U,
+    4013448667U, 4143996022U, 2948306858U, 4132920035U, 1283299272U, 995592228U,
+    3450508595U, 1027845759U, 1766942720U, 3861411826U, 1446861231U, 95974993U,
+    3502263554U, 1487532194U, 601502472U, 4129619129U, 250131773U, 2050079547U,
+    3198903947U, 3105589778U, 4066481316U, 3026383978U, 2276901713U, 365637751U,
+    2260718426U, 1394775634U, 1791172338U, 2690503163U, 2952737846U,
+    1568710462U, 732623190U, 2980358000U, 1053631832U, 1432426951U, 3229149635U,
+    1854113985U, 3719733532U, 3204031934U, 735775531U, 107468620U, 3734611984U,
+    631009402U, 3083622457U, 4109580626U, 159373458U, 1301970201U, 4132389302U,
+    1293255004U, 847182752U, 4170022737U, 96712900U, 2641406755U, 1381727755U,
+    405608287U, 4287919625U, 1703554290U, 3589580244U, 2911403488U, 2166565U,
+    2647306451U, 2330535117U, 1200815358U, 1165916754U, 245060911U, 4040679071U,
+    3684908771U, 2452834126U, 2486872773U, 2318678365U, 2940627908U,
+    1837837240U, 3447897409U, 4270484676U, 1495388728U, 3754288477U,
+    4204167884U, 1386977705U, 2692224733U, 3076249689U, 4109568048U,
+    4170955115U, 4167531356U, 4020189950U, 4261855038U, 3036907575U,
+    3410399885U, 3076395737U, 1046178638U, 144496770U, 230725846U, 3349637149U,
+    17065717U, 2809932048U, 2054581785U, 3608424964U, 3259628808U, 134897388U,
+    3743067463U, 257685904U, 3795656590U, 1562468719U, 3589103904U, 3120404710U,
+    254684547U, 2653661580U, 3663904795U, 2631942758U, 1063234347U, 2609732900U,
+    2332080715U, 3521125233U, 1180599599U, 1935868586U, 4110970440U, 296706371U,
+    2128666368U, 1319875791U, 1570900197U, 3096025483U, 1799882517U,
+    1928302007U, 1163707758U, 1244491489U, 3533770203U, 567496053U, 2757924305U,
+    2781639343U, 2818420107U, 560404889U, 2619609724U, 4176035430U, 2511289753U,
+    2521842019U, 3910553502U, 2926149387U, 3302078172U, 4237118867U, 330725126U,
+    367400677U, 888239854U, 545570454U, 4259590525U, 134343617U, 1102169784U,
+    1647463719U, 3260979784U, 1518840883U, 3631537963U, 3342671457U,
+    1301549147U, 2083739356U, 146593792U, 3217959080U, 652755743U, 2032187193U,
+    3898758414U, 1021358093U, 4037409230U, 2176407931U, 3427391950U,
+    2883553603U, 985613827U, 3105265092U, 3423168427U, 3387507672U, 467170288U,
+    2141266163U, 3723870208U, 916410914U, 1293987799U, 2652584950U, 769160137U,
+    3205292896U, 1561287359U, 1684510084U, 3136055621U, 3765171391U, 639683232U,
+    2639569327U, 1218546948U, 4263586685U, 3058215773U, 2352279820U, 401870217U,
+    2625822463U, 1529125296U, 2981801895U, 1191285226U, 4027725437U,
+    3432700217U, 4098835661U, 971182783U, 2443861173U, 3881457123U, 3874386651U,
+    457276199U, 2638294160U, 4002809368U, 421169044U, 1112642589U, 3076213779U,
+    3387033971U, 2499610950U, 3057240914U, 1662679783U, 461224431U,
+    1168395933U};
+static const uint32_t init_by_array_32_expected[] = {2920711183U, 3885745737U,
+    3501893680U, 856470934U, 1421864068U, 277361036U, 1518638004U, 2328404353U,
+    3355513634U, 64329189U, 1624587673U, 3508467182U, 2481792141U, 3706480799U,
+    1925859037U, 2913275699U, 882658412U, 384641219U, 422202002U, 1873384891U,
+    2006084383U, 3924929912U, 1636718106U, 3108838742U, 1245465724U,
+    4195470535U, 779207191U, 1577721373U, 1390469554U, 2928648150U, 121399709U,
+    3170839019U, 4044347501U, 953953814U, 3821710850U, 3085591323U, 3666535579U,
+    3577837737U, 2012008410U, 3565417471U, 4044408017U, 433600965U, 1637785608U,
+    1798509764U, 860770589U, 3081466273U, 3982393409U, 2451928325U, 3437124742U,
+    4093828739U, 3357389386U, 2154596123U, 496568176U, 2650035164U, 2472361850U,
+    3438299U, 2150366101U, 1577256676U, 3802546413U, 1787774626U, 4078331588U,
+    3706103141U, 170391138U, 3806085154U, 1680970100U, 1961637521U, 3316029766U,
+    890610272U, 1453751581U, 1430283664U, 3051057411U, 3597003186U, 542563954U,
+    3796490244U, 1690016688U, 3448752238U, 440702173U, 347290497U, 1121336647U,
+    2540588620U, 280881896U, 2495136428U, 213707396U, 15104824U, 2946180358U,
+    659000016U, 566379385U, 2614030979U, 2855760170U, 334526548U, 2315569495U,
+    2729518615U, 564745877U, 1263517638U, 3157185798U, 1604852056U, 1011639885U,
+    2950579535U, 2524219188U, 312951012U, 1528896652U, 1327861054U, 2846910138U,
+    3966855905U, 2536721582U, 855353911U, 1685434729U, 3303978929U, 1624872055U,
+    4020329649U, 3164802143U, 1642802700U, 1957727869U, 1792352426U,
+    3334618929U, 2631577923U, 3027156164U, 842334259U, 3353446843U, 1226432104U,
+    1742801369U, 3552852535U, 3471698828U, 1653910186U, 3380330939U,
+    2313782701U, 3351007196U, 2129839995U, 1800682418U, 4085884420U,
+    1625156629U, 3669701987U, 615211810U, 3294791649U, 4131143784U, 2590843588U,
+    3207422808U, 3275066464U, 561592872U, 3957205738U, 3396578098U, 48410678U,
+    3505556445U, 1005764855U, 3920606528U, 2936980473U, 2378918600U,
+    2404449845U, 1649515163U, 701203563U, 3705256349U, 83714199U, 3586854132U,
+    922978446U, 2863406304U, 3523398907U, 2606864832U, 2385399361U, 3171757816U,
+    4262841009U, 3645837721U, 1169579486U, 3666433897U, 3174689479U,
+    1457866976U, 3803895110U, 3346639145U, 1907224409U, 1978473712U,
+    1036712794U, 980754888U, 1302782359U, 1765252468U, 459245755U, 3728923860U,
+    1512894209U, 2046491914U, 207860527U, 514188684U, 2288713615U, 1597354672U,
+    3349636117U, 2357291114U, 3995796221U, 945364213U, 1893326518U, 3770814016U,
+    1691552714U, 2397527410U, 967486361U, 776416472U, 4197661421U, 951150819U,
+    1852770983U, 4044624181U, 1399439738U, 4194455275U, 2284037669U,
+    1550734958U, 3321078108U, 1865235926U, 2912129961U, 2664980877U,
+    1357572033U, 2600196436U, 2486728200U, 2372668724U, 1567316966U,
+    2374111491U, 1839843570U, 20815612U, 3727008608U, 3871996229U, 824061249U,
+    1932503978U, 3404541726U, 758428924U, 2609331364U, 1223966026U, 1299179808U,
+    648499352U, 2180134401U, 880821170U, 3781130950U, 113491270U, 1032413764U,
+    4185884695U, 2490396037U, 1201932817U, 4060951446U, 4165586898U,
+    1629813212U, 2887821158U, 415045333U, 628926856U, 2193466079U, 3391843445U,
+    2227540681U, 1907099846U, 2848448395U, 1717828221U, 1372704537U,
+    1707549841U, 2294058813U, 2101214437U, 2052479531U, 1695809164U,
+    3176587306U, 2632770465U, 81634404U, 1603220563U, 644238487U, 302857763U,
+    897352968U, 2613146653U, 1391730149U, 4245717312U, 4191828749U, 1948492526U,
+    2618174230U, 3992984522U, 2178852787U, 3596044509U, 3445573503U,
+    2026614616U, 915763564U, 3415689334U, 2532153403U, 3879661562U, 2215027417U,
+    3111154986U, 2929478371U, 668346391U, 1152241381U, 2632029711U, 3004150659U,
+    2135025926U, 948690501U, 2799119116U, 4228829406U, 1981197489U, 4209064138U,
+    684318751U, 3459397845U, 201790843U, 4022541136U, 3043635877U, 492509624U,
+    3263466772U, 1509148086U, 921459029U, 3198857146U, 705479721U, 3835966910U,
+    3603356465U, 576159741U, 1742849431U, 594214882U, 2055294343U, 3634861861U,
+    449571793U, 3246390646U, 3868232151U, 1479156585U, 2900125656U, 2464815318U,
+    3960178104U, 1784261920U, 18311476U, 3627135050U, 644609697U, 424968996U,
+    919890700U, 2986824110U, 816423214U, 4003562844U, 1392714305U, 1757384428U,
+    2569030598U, 995949559U, 3875659880U, 2933807823U, 2752536860U, 2993858466U,
+    4030558899U, 2770783427U, 2775406005U, 2777781742U, 1931292655U, 472147933U,
+    3865853827U, 2726470545U, 2668412860U, 2887008249U, 408979190U, 3578063323U,
+    3242082049U, 1778193530U, 27981909U, 2362826515U, 389875677U, 1043878156U,
+    581653903U, 3830568952U, 389535942U, 3713523185U, 2768373359U, 2526101582U,
+    1998618197U, 1160859704U, 3951172488U, 1098005003U, 906275699U, 3446228002U,
+    2220677963U, 2059306445U, 132199571U, 476838790U, 1868039399U, 3097344807U,
+    857300945U, 396345050U, 2835919916U, 1782168828U, 1419519470U, 4288137521U,
+    819087232U, 596301494U, 872823172U, 1526888217U, 805161465U, 1116186205U,
+    2829002754U, 2352620120U, 620121516U, 354159268U, 3601949785U, 209568138U,
+    1352371732U, 2145977349U, 4236871834U, 1539414078U, 3558126206U,
+    3224857093U, 4164166682U, 3817553440U, 3301780278U, 2682696837U,
+    3734994768U, 1370950260U, 1477421202U, 2521315749U, 1330148125U,
+    1261554731U, 2769143688U, 3554756293U, 4235882678U, 3254686059U,
+    3530579953U, 1215452615U, 3574970923U, 4057131421U, 589224178U, 1000098193U,
+    171190718U, 2521852045U, 2351447494U, 2284441580U, 2646685513U, 3486933563U,
+    3789864960U, 1190528160U, 1702536782U, 1534105589U, 4262946827U,
+    2726686826U, 3584544841U, 2348270128U, 2145092281U, 2502718509U,
+    1027832411U, 3571171153U, 1287361161U, 4011474411U, 3241215351U,
+    2419700818U, 971242709U, 1361975763U, 1096842482U, 3271045537U, 81165449U,
+    612438025U, 3912966678U, 1356929810U, 733545735U, 537003843U, 1282953084U,
+    884458241U, 588930090U, 3930269801U, 2961472450U, 1219535534U, 3632251943U,
+    268183903U, 1441240533U, 3653903360U, 3854473319U, 2259087390U, 2548293048U,
+    2022641195U, 2105543911U, 1764085217U, 3246183186U, 482438805U, 888317895U,
+    2628314765U, 2466219854U, 717546004U, 2322237039U, 416725234U, 1544049923U,
+    1797944973U, 3398652364U, 3111909456U, 485742908U, 2277491072U, 1056355088U,
+    3181001278U, 129695079U, 2693624550U, 1764438564U, 3797785470U, 195503713U,
+    3266519725U, 2053389444U, 1961527818U, 3400226523U, 3777903038U,
+    2597274307U, 4235851091U, 4094406648U, 2171410785U, 1781151386U,
+    1378577117U, 654643266U, 3424024173U, 3385813322U, 679385799U, 479380913U,
+    681715441U, 3096225905U, 276813409U, 3854398070U, 2721105350U, 831263315U,
+    3276280337U, 2628301522U, 3984868494U, 1466099834U, 2104922114U,
+    1412672743U, 820330404U, 3491501010U, 942735832U, 710652807U, 3972652090U,
+    679881088U, 40577009U, 3705286397U, 2815423480U, 3566262429U, 663396513U,
+    3777887429U, 4016670678U, 404539370U, 1142712925U, 1140173408U, 2913248352U,
+    2872321286U, 263751841U, 3175196073U, 3162557581U, 2878996619U, 75498548U,
+    3836833140U, 3284664959U, 1157523805U, 112847376U, 207855609U, 1337979698U,
+    1222578451U, 157107174U, 901174378U, 3883717063U, 1618632639U, 1767889440U,
+    4264698824U, 1582999313U, 884471997U, 2508825098U, 3756370771U, 2457213553U,
+    3565776881U, 3709583214U, 915609601U, 460833524U, 1091049576U, 85522880U,
+    2553251U, 132102809U, 2429882442U, 2562084610U, 1386507633U, 4112471229U,
+    21965213U, 1981516006U, 2418435617U, 3054872091U, 4251511224U, 2025783543U,
+    1916911512U, 2454491136U, 3938440891U, 3825869115U, 1121698605U,
+    3463052265U, 802340101U, 1912886800U, 4031997367U, 3550640406U, 1596096923U,
+    610150600U, 431464457U, 2541325046U, 486478003U, 739704936U, 2862696430U,
+    3037903166U, 1129749694U, 2611481261U, 1228993498U, 510075548U, 3424962587U,
+    2458689681U, 818934833U, 4233309125U, 1608196251U, 3419476016U, 1858543939U,
+    2682166524U, 3317854285U, 631986188U, 3008214764U, 613826412U, 3567358221U,
+    3512343882U, 1552467474U, 3316162670U, 1275841024U, 4142173454U, 565267881U,
+    768644821U, 198310105U, 2396688616U, 1837659011U, 203429334U, 854539004U,
+    4235811518U, 3338304926U, 3730418692U, 3852254981U, 3032046452U,
+    2329811860U, 2303590566U, 2696092212U, 3894665932U, 145835667U, 249563655U,
+    1932210840U, 2431696407U, 3312636759U, 214962629U, 2092026914U, 3020145527U,
+    4073039873U, 2739105705U, 1308336752U, 855104522U, 2391715321U, 67448785U,
+    547989482U, 854411802U, 3608633740U, 431731530U, 537375589U, 3888005760U,
+    696099141U, 397343236U, 1864511780U, 44029739U, 1729526891U, 1993398655U,
+    2010173426U, 2591546756U, 275223291U, 1503900299U, 4217765081U, 2185635252U,
+    1122436015U, 3550155364U, 681707194U, 3260479338U, 933579397U, 2983029282U,
+    2505504587U, 2667410393U, 2962684490U, 4139721708U, 2658172284U,
+    2452602383U, 2607631612U, 1344296217U, 3075398709U, 2949785295U,
+    1049956168U, 3917185129U, 2155660174U, 3280524475U, 1503827867U, 674380765U,
+    1918468193U, 3843983676U, 634358221U, 2538335643U, 1873351298U, 3368723763U,
+    2129144130U, 3203528633U, 3087174986U, 2691698871U, 2516284287U, 24437745U,
+    1118381474U, 2816314867U, 2448576035U, 4281989654U, 217287825U, 165872888U,
+    2628995722U, 3533525116U, 2721669106U, 872340568U, 3429930655U, 3309047304U,
+    3916704967U, 3270160355U, 1348884255U, 1634797670U, 881214967U, 4259633554U,
+    174613027U, 1103974314U, 1625224232U, 2678368291U, 1133866707U, 3853082619U,
+    4073196549U, 1189620777U, 637238656U, 930241537U, 4042750792U, 3842136042U,
+    2417007212U, 2524907510U, 1243036827U, 1282059441U, 3764588774U,
+    1394459615U, 2323620015U, 1166152231U, 3307479609U, 3849322257U,
+    3507445699U, 4247696636U, 758393720U, 967665141U, 1095244571U, 1319812152U,
+    407678762U, 2640605208U, 2170766134U, 3663594275U, 4039329364U, 2512175520U,
+    725523154U, 2249807004U, 3312617979U, 2414634172U, 1278482215U, 349206484U,
+    1573063308U, 1196429124U, 3873264116U, 2400067801U, 268795167U, 226175489U,
+    2961367263U, 1968719665U, 42656370U, 1010790699U, 561600615U, 2422453992U,
+    3082197735U, 1636700484U, 3977715296U, 3125350482U, 3478021514U,
+    2227819446U, 1540868045U, 3061908980U, 1087362407U, 3625200291U, 361937537U,
+    580441897U, 1520043666U, 2270875402U, 1009161260U, 2502355842U, 4278769785U,
+    473902412U, 1057239083U, 1905829039U, 1483781177U, 2080011417U, 1207494246U,
+    1806991954U, 2194674403U, 3455972205U, 807207678U, 3655655687U, 674112918U,
+    195425752U, 3917890095U, 1874364234U, 1837892715U, 3663478166U, 1548892014U,
+    2570748714U, 2049929836U, 2167029704U, 697543767U, 3499545023U, 3342496315U,
+    1725251190U, 3561387469U, 2905606616U, 1580182447U, 3934525927U,
+    4103172792U, 1365672522U, 1534795737U, 3308667416U, 2841911405U,
+    3943182730U, 4072020313U, 3494770452U, 3332626671U, 55327267U, 478030603U,
+    411080625U, 3419529010U, 1604767823U, 3513468014U, 570668510U, 913790824U,
+    2283967995U, 695159462U, 3825542932U, 4150698144U, 1829758699U, 202895590U,
+    1609122645U, 1267651008U, 2910315509U, 2511475445U, 2477423819U,
+    3932081579U, 900879979U, 2145588390U, 2670007504U, 580819444U, 1864996828U,
+    2526325979U, 1019124258U, 815508628U, 2765933989U, 1277301341U, 3006021786U,
+    855540956U, 288025710U, 1919594237U, 2331223864U, 177452412U, 2475870369U,
+    2689291749U, 865194284U, 253432152U, 2628531804U, 2861208555U, 2361597573U,
+    1653952120U, 1039661024U, 2159959078U, 3709040440U, 3564718533U,
+    2596878672U, 2041442161U, 31164696U, 2662962485U, 3665637339U, 1678115244U,
+    2699839832U, 3651968520U, 3521595541U, 458433303U, 2423096824U, 21831741U,
+    380011703U, 2498168716U, 861806087U, 1673574843U, 4188794405U, 2520563651U,
+    2632279153U, 2170465525U, 4171949898U, 3886039621U, 1661344005U,
+    3424285243U, 992588372U, 2500984144U, 2993248497U, 3590193895U, 1535327365U,
+    515645636U, 131633450U, 3729760261U, 1613045101U, 3254194278U, 15889678U,
+    1493590689U, 244148718U, 2991472662U, 1401629333U, 777349878U, 2501401703U,
+    4285518317U, 3794656178U, 955526526U, 3442142820U, 3970298374U, 736025417U,
+    2737370764U, 1271509744U, 440570731U, 136141826U, 1596189518U, 923399175U,
+    257541519U, 3505774281U, 2194358432U, 2518162991U, 1379893637U, 2667767062U,
+    3748146247U, 1821712620U, 3923161384U, 1947811444U, 2392527197U,
+    4127419685U, 1423694998U, 4156576871U, 1382885582U, 3420127279U,
+    3617499534U, 2994377493U, 4038063986U, 1918458672U, 2983166794U,
+    4200449033U, 353294540U, 1609232588U, 243926648U, 2332803291U, 507996832U,
+    2392838793U, 4075145196U, 2060984340U, 4287475136U, 88232602U, 2491531140U,
+    4159725633U, 2272075455U, 759298618U, 201384554U, 838356250U, 1416268324U,
+    674476934U, 90795364U, 141672229U, 3660399588U, 4196417251U, 3249270244U,
+    3774530247U, 59587265U, 3683164208U, 19392575U, 1463123697U, 1882205379U,
+    293780489U, 2553160622U, 2933904694U, 675638239U, 2851336944U, 1435238743U,
+    2448730183U, 804436302U, 2119845972U, 322560608U, 4097732704U, 2987802540U,
+    641492617U, 2575442710U, 4217822703U, 3271835300U, 2836418300U, 3739921620U,
+    2138378768U, 2879771855U, 4294903423U, 3121097946U, 2603440486U,
+    2560820391U, 1012930944U, 2313499967U, 584489368U, 3431165766U, 897384869U,
+    2062537737U, 2847889234U, 3742362450U, 2951174585U, 4204621084U,
+    1109373893U, 3668075775U, 2750138839U, 3518055702U, 733072558U, 4169325400U,
+    788493625U};
+static const uint64_t init_gen_rand_64_expected[] = {KQU(16924766246869039260),
+    KQU(8201438687333352714), KQU(2265290287015001750),
+    KQU(18397264611805473832), KQU(3375255223302384358),
+    KQU(6345559975416828796), KQU(18229739242790328073),
+    KQU(7596792742098800905), KQU(255338647169685981), KQU(2052747240048610300),
+    KQU(18328151576097299343), KQU(12472905421133796567),
+    KQU(11315245349717600863), KQU(16594110197775871209),
+    KQU(15708751964632456450), KQU(10452031272054632535),
+    KQU(11097646720811454386), KQU(4556090668445745441),
+    KQU(17116187693090663106), KQU(14931526836144510645),
+    KQU(9190752218020552591), KQU(9625800285771901401),
+    KQU(13995141077659972832), KQU(5194209094927829625),
+    KQU(4156788379151063303), KQU(8523452593770139494),
+    KQU(14082382103049296727), KQU(2462601863986088483),
+    KQU(3030583461592840678), KQU(5221622077872827681),
+    KQU(3084210671228981236), KQU(13956758381389953823),
+    KQU(13503889856213423831), KQU(15696904024189836170),
+    KQU(4612584152877036206), KQU(6231135538447867881),
+    KQU(10172457294158869468), KQU(6452258628466708150),
+    KQU(14044432824917330221), KQU(370168364480044279),
+    KQU(10102144686427193359), KQU(667870489994776076),
+    KQU(2732271956925885858), KQU(18027788905977284151),
+    KQU(15009842788582923859), KQU(7136357960180199542),
+    KQU(15901736243475578127), KQU(16951293785352615701),
+    KQU(10551492125243691632), KQU(17668869969146434804),
+    KQU(13646002971174390445), KQU(9804471050759613248),
+    KQU(5511670439655935493), KQU(18103342091070400926),
+    KQU(17224512747665137533), KQU(15534627482992618168),
+    KQU(1423813266186582647), KQU(15821176807932930024), KQU(30323369733607156),
+    KQU(11599382494723479403), KQU(653856076586810062),
+    KQU(3176437395144899659), KQU(14028076268147963917),
+    KQU(16156398271809666195), KQU(3166955484848201676),
+    KQU(5746805620136919390), KQU(17297845208891256593),
+    KQU(11691653183226428483), KQU(17900026146506981577),
+    KQU(15387382115755971042), KQU(16923567681040845943),
+    KQU(8039057517199388606), KQU(11748409241468629263),
+    KQU(794358245539076095), KQU(13438501964693401242),
+    KQU(14036803236515618962), KQU(5252311215205424721),
+    KQU(17806589612915509081), KQU(6802767092397596006),
+    KQU(14212120431184557140), KQU(1072951366761385712),
+    KQU(13098491780722836296), KQU(9466676828710797353),
+    KQU(12673056849042830081), KQU(12763726623645357580),
+    KQU(16468961652999309493), KQU(15305979875636438926),
+    KQU(17444713151223449734), KQU(5692214267627883674),
+    KQU(13049589139196151505), KQU(880115207831670745),
+    KQU(1776529075789695498), KQU(16695225897801466485),
+    KQU(10666901778795346845), KQU(6164389346722833869),
+    KQU(2863817793264300475), KQU(9464049921886304754),
+    KQU(3993566636740015468), KQU(9983749692528514136),
+    KQU(16375286075057755211), KQU(16042643417005440820),
+    KQU(11445419662923489877), KQU(7999038846885158836),
+    KQU(6721913661721511535), KQU(5363052654139357320),
+    KQU(1817788761173584205), KQU(13290974386445856444),
+    KQU(4650350818937984680), KQU(8219183528102484836),
+    KQU(1569862923500819899), KQU(4189359732136641860),
+    KQU(14202822961683148583), KQU(4457498315309429058),
+    KQU(13089067387019074834), KQU(11075517153328927293),
+    KQU(10277016248336668389), KQU(7070509725324401122),
+    KQU(17808892017780289380), KQU(13143367339909287349),
+    KQU(1377743745360085151), KQU(5749341807421286485),
+    KQU(14832814616770931325), KQU(7688820635324359492),
+    KQU(10960474011539770045), KQU(81970066653179790),
+    KQU(12619476072607878022), KQU(4419566616271201744),
+    KQU(15147917311750568503), KQU(5549739182852706345),
+    KQU(7308198397975204770), KQU(13580425496671289278),
+    KQU(17070764785210130301), KQU(8202832846285604405),
+    KQU(6873046287640887249), KQU(6927424434308206114),
+    KQU(6139014645937224874), KQU(10290373645978487639),
+    KQU(15904261291701523804), KQU(9628743442057826883),
+    KQU(18383429096255546714), KQU(4977413265753686967),
+    KQU(7714317492425012869), KQU(9025232586309926193),
+    KQU(14627338359776709107), KQU(14759849896467790763),
+    KQU(10931129435864423252), KQU(4588456988775014359),
+    KQU(10699388531797056724), KQU(468652268869238792),
+    KQU(5755943035328078086), KQU(2102437379988580216),
+    KQU(9986312786506674028), KQU(2654207180040945604),
+    KQU(8726634790559960062), KQU(100497234871808137), KQU(2800137176951425819),
+    KQU(6076627612918553487), KQU(5780186919186152796),
+    KQU(8179183595769929098), KQU(6009426283716221169),
+    KQU(2796662551397449358), KQU(1756961367041986764),
+    KQU(6972897917355606205), KQU(14524774345368968243),
+    KQU(2773529684745706940), KQU(4853632376213075959),
+    KQU(4198177923731358102), KQU(8271224913084139776),
+    KQU(2741753121611092226), KQU(16782366145996731181),
+    KQU(15426125238972640790), KQU(13595497100671260342),
+    KQU(3173531022836259898), KQU(6573264560319511662),
+    KQU(18041111951511157441), KQU(2351433581833135952),
+    KQU(3113255578908173487), KQU(1739371330877858784),
+    KQU(16046126562789165480), KQU(8072101652214192925),
+    KQU(15267091584090664910), KQU(9309579200403648940),
+    KQU(5218892439752408722), KQU(14492477246004337115),
+    KQU(17431037586679770619), KQU(7385248135963250480),
+    KQU(9580144956565560660), KQU(4919546228040008720),
+    KQU(15261542469145035584), KQU(18233297270822253102),
+    KQU(5453248417992302857), KQU(9309519155931460285),
+    KQU(10342813012345291756), KQU(15676085186784762381),
+    KQU(15912092950691300645), KQU(9371053121499003195),
+    KQU(9897186478226866746), KQU(14061858287188196327),
+    KQU(122575971620788119), KQU(12146750969116317754),
+    KQU(4438317272813245201), KQU(8332576791009527119),
+    KQU(13907785691786542057), KQU(10374194887283287467),
+    KQU(2098798755649059566), KQU(3416235197748288894),
+    KQU(8688269957320773484), KQU(7503964602397371571),
+    KQU(16724977015147478236), KQU(9461512855439858184),
+    KQU(13259049744534534727), KQU(3583094952542899294),
+    KQU(8764245731305528292), KQU(13240823595462088985),
+    KQU(13716141617617910448), KQU(18114969519935960955),
+    KQU(2297553615798302206), KQU(4585521442944663362),
+    KQU(17776858680630198686), KQU(4685873229192163363),
+    KQU(152558080671135627), KQU(15424900540842670088),
+    KQU(13229630297130024108), KQU(17530268788245718717),
+    KQU(16675633913065714144), KQU(3158912717897568068),
+    KQU(15399132185380087288), KQU(7401418744515677872),
+    KQU(13135412922344398535), KQU(6385314346100509511),
+    KQU(13962867001134161139), KQU(10272780155442671999),
+    KQU(12894856086597769142), KQU(13340877795287554994),
+    KQU(12913630602094607396), KQU(12543167911119793857),
+    KQU(17343570372251873096), KQU(10959487764494150545),
+    KQU(6966737953093821128), KQU(13780699135496988601),
+    KQU(4405070719380142046), KQU(14923788365607284982),
+    KQU(2869487678905148380), KQU(6416272754197188403),
+    KQU(15017380475943612591), KQU(1995636220918429487),
+    KQU(3402016804620122716), KQU(15800188663407057080),
+    KQU(11362369990390932882), KQU(15262183501637986147),
+    KQU(10239175385387371494), KQU(9352042420365748334),
+    KQU(1682457034285119875), KQU(1724710651376289644),
+    KQU(2038157098893817966), KQU(9897825558324608773),
+    KQU(1477666236519164736), KQU(16835397314511233640),
+    KQU(10370866327005346508), KQU(10157504370660621982),
+    KQU(12113904045335882069), KQU(13326444439742783008),
+    KQU(11302769043000765804), KQU(13594979923955228484),
+    KQU(11779351762613475968), KQU(3786101619539298383),
+    KQU(8021122969180846063), KQU(15745904401162500495),
+    KQU(10762168465993897267), KQU(13552058957896319026),
+    KQU(11200228655252462013), KQU(5035370357337441226),
+    KQU(7593918984545500013), KQU(5418554918361528700),
+    KQU(4858270799405446371), KQU(9974659566876282544),
+    KQU(18227595922273957859), KQU(2772778443635656220),
+    KQU(14285143053182085385), KQU(9939700992429600469),
+    KQU(12756185904545598068), KQU(2020783375367345262), KQU(57026775058331227),
+    KQU(950827867930065454), KQU(6602279670145371217), KQU(2291171535443566929),
+    KQU(5832380724425010313), KQU(1220343904715982285),
+    KQU(17045542598598037633), KQU(15460481779702820971),
+    KQU(13948388779949365130), KQU(13975040175430829518),
+    KQU(17477538238425541763), KQU(11104663041851745725),
+    KQU(15860992957141157587), KQU(14529434633012950138),
+    KQU(2504838019075394203), KQU(7512113882611121886),
+    KQU(4859973559980886617), KQU(1258601555703250219),
+    KQU(15594548157514316394), KQU(4516730171963773048),
+    KQU(11380103193905031983), KQU(6809282239982353344),
+    KQU(18045256930420065002), KQU(2453702683108791859),
+    KQU(977214582986981460), KQU(2006410402232713466), KQU(6192236267216378358),
+    KQU(3429468402195675253), KQU(18146933153017348921),
+    KQU(17369978576367231139), KQU(1246940717230386603),
+    KQU(11335758870083327110), KQU(14166488801730353682),
+    KQU(9008573127269635732), KQU(10776025389820643815),
+    KQU(15087605441903942962), KQU(1359542462712147922),
+    KQU(13898874411226454206), KQU(17911176066536804411),
+    KQU(9435590428600085274), KQU(294488509967864007), KQU(8890111397567922046),
+    KQU(7987823476034328778), KQU(13263827582440967651),
+    KQU(7503774813106751573), KQU(14974747296185646837),
+    KQU(8504765037032103375), KQU(17340303357444536213),
+    KQU(7704610912964485743), KQU(8107533670327205061),
+    KQU(9062969835083315985), KQU(16968963142126734184),
+    KQU(12958041214190810180), KQU(2720170147759570200),
+    KQU(2986358963942189566), KQU(14884226322219356580),
+    KQU(286224325144368520), KQU(11313800433154279797),
+    KQU(18366849528439673248), KQU(17899725929482368789),
+    KQU(3730004284609106799), KQU(1654474302052767205),
+    KQU(5006698007047077032), KQU(8196893913601182838),
+    KQU(15214541774425211640), KQU(17391346045606626073),
+    KQU(8369003584076969089), KQU(3939046733368550293),
+    KQU(10178639720308707785), KQU(2180248669304388697), KQU(62894391300126322),
+    KQU(9205708961736223191), KQU(6837431058165360438),
+    KQU(3150743890848308214), KQU(17849330658111464583),
+    KQU(12214815643135450865), KQU(13410713840519603402),
+    KQU(3200778126692046802), KQU(13354780043041779313),
+    KQU(800850022756886036), KQU(15660052933953067433),
+    KQU(6572823544154375676), KQU(11030281857015819266),
+    KQU(12682241941471433835), KQU(11654136407300274693),
+    KQU(4517795492388641109), KQU(9757017371504524244),
+    KQU(17833043400781889277), KQU(12685085201747792227),
+    KQU(10408057728835019573), KQU(98370418513455221), KQU(6732663555696848598),
+    KQU(13248530959948529780), KQU(3530441401230622826),
+    KQU(18188251992895660615), KQU(1847918354186383756),
+    KQU(1127392190402660921), KQU(11293734643143819463),
+    KQU(3015506344578682982), KQU(13852645444071153329),
+    KQU(2121359659091349142), KQU(1294604376116677694),
+    KQU(5616576231286352318), KQU(7112502442954235625),
+    KQU(11676228199551561689), KQU(12925182803007305359),
+    KQU(7852375518160493082), KQU(1136513130539296154),
+    KQU(5636923900916593195), KQU(3221077517612607747),
+    KQU(17784790465798152513), KQU(3554210049056995938),
+    KQU(17476839685878225874), KQU(3206836372585575732),
+    KQU(2765333945644823430), KQU(10080070903718799528),
+    KQU(5412370818878286353), KQU(9689685887726257728),
+    KQU(8236117509123533998), KQU(1951139137165040214),
+    KQU(4492205209227980349), KQU(16541291230861602967),
+    KQU(1424371548301437940), KQU(9117562079669206794),
+    KQU(14374681563251691625), KQU(13873164030199921303),
+    KQU(6680317946770936731), KQU(15586334026918276214),
+    KQU(10896213950976109802), KQU(9506261949596413689),
+    KQU(9903949574308040616), KQU(6038397344557204470), KQU(174601465422373648),
+    KQU(15946141191338238030), KQU(17142225620992044937),
+    KQU(7552030283784477064), KQU(2947372384532947997), KQU(510797021688197711),
+    KQU(4962499439249363461), KQU(23770320158385357), KQU(959774499105138124),
+    KQU(1468396011518788276), KQU(2015698006852312308),
+    KQU(4149400718489980136), KQU(5992916099522371188),
+    KQU(10819182935265531076), KQU(16189787999192351131),
+    KQU(342833961790261950), KQU(12470830319550495336),
+    KQU(18128495041912812501), KQU(1193600899723524337),
+    KQU(9056793666590079770), KQU(2154021227041669041),
+    KQU(4963570213951235735), KQU(4865075960209211409),
+    KQU(2097724599039942963), KQU(2024080278583179845),
+    KQU(11527054549196576736), KQU(10650256084182390252),
+    KQU(4808408648695766755), KQU(1642839215013788844),
+    KQU(10607187948250398390), KQU(7076868166085913508),
+    KQU(730522571106887032), KQU(12500579240208524895),
+    KQU(4484390097311355324), KQU(15145801330700623870),
+    KQU(8055827661392944028), KQU(5865092976832712268),
+    KQU(15159212508053625143), KQU(3560964582876483341),
+    KQU(4070052741344438280), KQU(6032585709886855634),
+    KQU(15643262320904604873), KQU(2565119772293371111),
+    KQU(318314293065348260), KQU(15047458749141511872),
+    KQU(7772788389811528730), KQU(7081187494343801976),
+    KQU(6465136009467253947), KQU(10425940692543362069),
+    KQU(554608190318339115), KQU(14796699860302125214),
+    KQU(1638153134431111443), KQU(10336967447052276248),
+    KQU(8412308070396592958), KQU(4004557277152051226),
+    KQU(8143598997278774834), KQU(16413323996508783221),
+    KQU(13139418758033994949), KQU(9772709138335006667),
+    KQU(2818167159287157659), KQU(17091740573832523669),
+    KQU(14629199013130751608), KQU(18268322711500338185),
+    KQU(8290963415675493063), KQU(8830864907452542588),
+    KQU(1614839084637494849), KQU(14855358500870422231),
+    KQU(3472996748392519937), KQU(15317151166268877716),
+    KQU(5825895018698400362), KQU(16730208429367544129),
+    KQU(10481156578141202800), KQU(4746166512382823750),
+    KQU(12720876014472464998), KQU(8825177124486735972),
+    KQU(13733447296837467838), KQU(6412293741681359625),
+    KQU(8313213138756135033), KQU(11421481194803712517),
+    KQU(7997007691544174032), KQU(6812963847917605930),
+    KQU(9683091901227558641), KQU(14703594165860324713),
+    KQU(1775476144519618309), KQU(2724283288516469519), KQU(717642555185856868),
+    KQU(8736402192215092346), KQU(11878800336431381021),
+    KQU(4348816066017061293), KQU(6115112756583631307),
+    KQU(9176597239667142976), KQU(12615622714894259204),
+    KQU(10283406711301385987), KQU(5111762509485379420),
+    KQU(3118290051198688449), KQU(7345123071632232145),
+    KQU(9176423451688682359), KQU(4843865456157868971),
+    KQU(12008036363752566088), KQU(12058837181919397720),
+    KQU(2145073958457347366), KQU(1526504881672818067),
+    KQU(3488830105567134848), KQU(13208362960674805143),
+    KQU(4077549672899572192), KQU(7770995684693818365),
+    KQU(1398532341546313593), KQU(12711859908703927840),
+    KQU(1417561172594446813), KQU(17045191024194170604),
+    KQU(4101933177604931713), KQU(14708428834203480320),
+    KQU(17447509264469407724), KQU(14314821973983434255),
+    KQU(17990472271061617265), KQU(5087756685841673942),
+    KQU(12797820586893859939), KQU(1778128952671092879),
+    KQU(3535918530508665898), KQU(9035729701042481301),
+    KQU(14808661568277079962), KQU(14587345077537747914),
+    KQU(11920080002323122708), KQU(6426515805197278753),
+    KQU(3295612216725984831), KQU(11040722532100876120),
+    KQU(12305952936387598754), KQU(16097391899742004253),
+    KQU(4908537335606182208), KQU(12446674552196795504),
+    KQU(16010497855816895177), KQU(9194378874788615551),
+    KQU(3382957529567613384), KQU(5154647600754974077),
+    KQU(9801822865328396141), KQU(9023662173919288143),
+    KQU(17623115353825147868), KQU(8238115767443015816),
+    KQU(15811444159859002560), KQU(9085612528904059661),
+    KQU(6888601089398614254), KQU(258252992894160189), KQU(6704363880792428622),
+    KQU(6114966032147235763), KQU(11075393882690261875),
+    KQU(8797664238933620407), KQU(5901892006476726920),
+    KQU(5309780159285518958), KQU(14940808387240817367),
+    KQU(14642032021449656698), KQU(9808256672068504139),
+    KQU(3670135111380607658), KQU(11211211097845960152),
+    KQU(1474304506716695808), KQU(15843166204506876239),
+    KQU(7661051252471780561), KQU(10170905502249418476),
+    KQU(7801416045582028589), KQU(2763981484737053050),
+    KQU(9491377905499253054), KQU(16201395896336915095),
+    KQU(9256513756442782198), KQU(5411283157972456034),
+    KQU(5059433122288321676), KQU(4327408006721123357),
+    KQU(9278544078834433377), KQU(7601527110882281612),
+    KQU(11848295896975505251), KQU(12096998801094735560),
+    KQU(14773480339823506413), KQU(15586227433895802149),
+    KQU(12786541257830242872), KQU(6904692985140503067),
+    KQU(5309011515263103959), KQU(12105257191179371066),
+    KQU(14654380212442225037), KQU(2556774974190695009),
+    KQU(4461297399927600261), KQU(14888225660915118646),
+    KQU(14915459341148291824), KQU(2738802166252327631),
+    KQU(6047155789239131512), KQU(12920545353217010338),
+    KQU(10697617257007840205), KQU(2751585253158203504),
+    KQU(13252729159780047496), KQU(14700326134672815469),
+    KQU(14082527904374600529), KQU(16852962273496542070),
+    KQU(17446675504235853907), KQU(15019600398527572311),
+    KQU(12312781346344081551), KQU(14524667935039810450),
+    KQU(5634005663377195738), KQU(11375574739525000569),
+    KQU(2423665396433260040), KQU(5222836914796015410),
+    KQU(4397666386492647387), KQU(4619294441691707638), KQU(665088602354770716),
+    KQU(13246495665281593610), KQU(6564144270549729409),
+    KQU(10223216188145661688), KQU(3961556907299230585),
+    KQU(11543262515492439914), KQU(16118031437285993790),
+    KQU(7143417964520166465), KQU(13295053515909486772), KQU(40434666004899675),
+    KQU(17127804194038347164), KQU(8599165966560586269),
+    KQU(8214016749011284903), KQU(13725130352140465239),
+    KQU(5467254474431726291), KQU(7748584297438219877),
+    KQU(16933551114829772472), KQU(2169618439506799400),
+    KQU(2169787627665113463), KQU(17314493571267943764),
+    KQU(18053575102911354912), KQU(11928303275378476973),
+    KQU(11593850925061715550), KQU(17782269923473589362),
+    KQU(3280235307704747039), KQU(6145343578598685149),
+    KQU(17080117031114086090), KQU(18066839902983594755),
+    KQU(6517508430331020706), KQU(8092908893950411541),
+    KQU(12558378233386153732), KQU(4476532167973132976),
+    KQU(16081642430367025016), KQU(4233154094369139361),
+    KQU(8693630486693161027), KQU(11244959343027742285),
+    KQU(12273503967768513508), KQU(14108978636385284876),
+    KQU(7242414665378826984), KQU(6561316938846562432),
+    KQU(8601038474994665795), KQU(17532942353612365904),
+    KQU(17940076637020912186), KQU(7340260368823171304),
+    KQU(7061807613916067905), KQU(10561734935039519326),
+    KQU(17990796503724650862), KQU(6208732943911827159),
+    KQU(359077562804090617), KQU(14177751537784403113),
+    KQU(10659599444915362902), KQU(15081727220615085833),
+    KQU(13417573895659757486), KQU(15513842342017811524),
+    KQU(11814141516204288231), KQU(1827312513875101814),
+    KQU(2804611699894603103), KQU(17116500469975602763),
+    KQU(12270191815211952087), KQU(12256358467786024988),
+    KQU(18435021722453971267), KQU(671330264390865618), KQU(476504300460286050),
+    KQU(16465470901027093441), KQU(4047724406247136402),
+    KQU(1322305451411883346), KQU(1388308688834322280),
+    KQU(7303989085269758176), KQU(9323792664765233642),
+    KQU(4542762575316368936), KQU(17342696132794337618),
+    KQU(4588025054768498379), KQU(13415475057390330804),
+    KQU(17880279491733405570), KQU(10610553400618620353),
+    KQU(3180842072658960139), KQU(13002966655454270120),
+    KQU(1665301181064982826), KQU(7083673946791258979), KQU(190522247122496820),
+    KQU(17388280237250677740), KQU(8430770379923642945),
+    KQU(12987180971921668584), KQU(2311086108365390642),
+    KQU(2870984383579822345), KQU(14014682609164653318),
+    KQU(14467187293062251484), KQU(192186361147413298),
+    KQU(15171951713531796524), KQU(9900305495015948728),
+    KQU(17958004775615466344), KQU(14346380954498606514),
+    KQU(18040047357617407096), KQU(5035237584833424532),
+    KQU(15089555460613972287), KQU(4131411873749729831),
+    KQU(1329013581168250330), KQU(10095353333051193949),
+    KQU(10749518561022462716), KQU(9050611429810755847),
+    KQU(15022028840236655649), KQU(8775554279239748298),
+    KQU(13105754025489230502), KQU(15471300118574167585),
+    KQU(89864764002355628), KQU(8776416323420466637), KQU(5280258630612040891),
+    KQU(2719174488591862912), KQU(7599309137399661994),
+    KQU(15012887256778039979), KQU(14062981725630928925),
+    KQU(12038536286991689603), KQU(7089756544681775245),
+    KQU(10376661532744718039), KQU(1265198725901533130),
+    KQU(13807996727081142408), KQU(2935019626765036403),
+    KQU(7651672460680700141), KQU(3644093016200370795),
+    KQU(2840982578090080674), KQU(17956262740157449201),
+    KQU(18267979450492880548), KQU(11799503659796848070),
+    KQU(9942537025669672388), KQU(11886606816406990297),
+    KQU(5488594946437447576), KQU(7226714353282744302),
+    KQU(3784851653123877043), KQU(878018453244803041),
+    KQU(12110022586268616085), KQU(734072179404675123),
+    KQU(11869573627998248542), KQU(469150421297783998), KQU(260151124912803804),
+    KQU(11639179410120968649), KQU(9318165193840846253),
+    KQU(12795671722734758075), KQU(15318410297267253933),
+    KQU(691524703570062620), KQU(5837129010576994601),
+    KQU(15045963859726941052), KQU(5850056944932238169),
+    KQU(12017434144750943807), KQU(7447139064928956574),
+    KQU(3101711812658245019), KQU(16052940704474982954),
+    KQU(18195745945986994042), KQU(8932252132785575659),
+    KQU(13390817488106794834), KQU(11582771836502517453),
+    KQU(4964411326683611686), KQU(2195093981702694011),
+    KQU(14145229538389675669), KQU(16459605532062271798),
+    KQU(866316924816482864), KQU(4593041209937286377), KQU(8415491391910972138),
+    KQU(4171236715600528969), KQU(16637569303336782889),
+    KQU(2002011073439212680), KQU(17695124661097601411),
+    KQU(4627687053598611702), KQU(7895831936020190403),
+    KQU(8455951300917267802), KQU(2923861649108534854),
+    KQU(8344557563927786255), KQU(6408671940373352556),
+    KQU(12210227354536675772), KQU(14294804157294222295),
+    KQU(10103022425071085127), KQU(10092959489504123771),
+    KQU(6554774405376736268), KQU(12629917718410641774),
+    KQU(6260933257596067126), KQU(2460827021439369673),
+    KQU(2541962996717103668), KQU(597377203127351475), KQU(5316984203117315309),
+    KQU(4811211393563241961), KQU(13119698597255811641),
+    KQU(8048691512862388981), KQU(10216818971194073842),
+    KQU(4612229970165291764), KQU(10000980798419974770),
+    KQU(6877640812402540687), KQU(1488727563290436992),
+    KQU(2227774069895697318), KQU(11237754507523316593),
+    KQU(13478948605382290972), KQU(1963583846976858124),
+    KQU(5512309205269276457), KQU(3972770164717652347),
+    KQU(3841751276198975037), KQU(10283343042181903117),
+    KQU(8564001259792872199), KQU(16472187244722489221),
+    KQU(8953493499268945921), KQU(3518747340357279580),
+    KQU(4003157546223963073), KQU(3270305958289814590),
+    KQU(3966704458129482496), KQU(8122141865926661939),
+    KQU(14627734748099506653), KQU(13064426990862560568),
+    KQU(2414079187889870829), KQU(5378461209354225306),
+    KQU(10841985740128255566), KQU(538582442885401738),
+    KQU(7535089183482905946), KQU(16117559957598879095),
+    KQU(8477890721414539741), KQU(1459127491209533386),
+    KQU(17035126360733620462), KQU(8517668552872379126),
+    KQU(10292151468337355014), KQU(17081267732745344157),
+    KQU(13751455337946087178), KQU(14026945459523832966),
+    KQU(6653278775061723516), KQU(10619085543856390441),
+    KQU(2196343631481122885), KQU(10045966074702826136),
+    KQU(10082317330452718282), KQU(5920859259504831242),
+    KQU(9951879073426540617), KQU(7074696649151414158),
+    KQU(15808193543879464318), KQU(7385247772746953374),
+    KQU(3192003544283864292), KQU(18153684490917593847),
+    KQU(12423498260668568905), KQU(10957758099756378169),
+    KQU(11488762179911016040), KQU(2099931186465333782),
+    KQU(11180979581250294432), KQU(8098916250668367933),
+    KQU(3529200436790763465), KQU(12988418908674681745),
+    KQU(6147567275954808580), KQU(3207503344604030989),
+    KQU(10761592604898615360), KQU(229854861031893504),
+    KQU(8809853962667144291), KQU(13957364469005693860),
+    KQU(7634287665224495886), KQU(12353487366976556874),
+    KQU(1134423796317152034), KQU(2088992471334107068),
+    KQU(7393372127190799698), KQU(1845367839871058391), KQU(207922563987322884),
+    KQU(11960870813159944976), KQU(12182120053317317363),
+    KQU(17307358132571709283), KQU(13871081155552824936),
+    KQU(18304446751741566262), KQU(7178705220184302849),
+    KQU(10929605677758824425), KQU(16446976977835806844),
+    KQU(13723874412159769044), KQU(6942854352100915216),
+    KQU(1726308474365729390), KQU(2150078766445323155),
+    KQU(15345558947919656626), KQU(12145453828874527201),
+    KQU(2054448620739726849), KQU(2740102003352628137),
+    KQU(11294462163577610655), KQU(756164283387413743),
+    KQU(17841144758438810880), KQU(10802406021185415861),
+    KQU(8716455530476737846), KQU(6321788834517649606),
+    KQU(14681322910577468426), KQU(17330043563884336387),
+    KQU(12701802180050071614), KQU(14695105111079727151),
+    KQU(5112098511654172830), KQU(4957505496794139973),
+    KQU(8270979451952045982), KQU(12307685939199120969),
+    KQU(12425799408953443032), KQU(8376410143634796588),
+    KQU(16621778679680060464), KQU(3580497854566660073),
+    KQU(1122515747803382416), KQU(857664980960597599), KQU(6343640119895925918),
+    KQU(12878473260854462891), KQU(10036813920765722626),
+    KQU(14451335468363173812), KQU(5476809692401102807),
+    KQU(16442255173514366342), KQU(13060203194757167104),
+    KQU(14354124071243177715), KQU(15961249405696125227),
+    KQU(13703893649690872584), KQU(363907326340340064),
+    KQU(6247455540491754842), KQU(12242249332757832361),
+    KQU(156065475679796717), KQU(9351116235749732355), KQU(4590350628677701405),
+    KQU(1671195940982350389), KQU(13501398458898451905),
+    KQU(6526341991225002255), KQU(1689782913778157592),
+    KQU(7439222350869010334), KQU(13975150263226478308),
+    KQU(11411961169932682710), KQU(17204271834833847277),
+    KQU(541534742544435367), KQU(6591191931218949684), KQU(2645454775478232486),
+    KQU(4322857481256485321), KQU(8477416487553065110),
+    KQU(12902505428548435048), KQU(971445777981341415),
+    KQU(14995104682744976712), KQU(4243341648807158063),
+    KQU(8695061252721927661), KQU(5028202003270177222),
+    KQU(2289257340915567840), KQU(13870416345121866007),
+    KQU(13994481698072092233), KQU(6912785400753196481),
+    KQU(2278309315841980139), KQU(4329765449648304839),
+    KQU(5963108095785485298), KQU(4880024847478722478),
+    KQU(16015608779890240947), KQU(1866679034261393544),
+    KQU(914821179919731519), KQU(9643404035648760131), KQU(2418114953615593915),
+    KQU(944756836073702374), KQU(15186388048737296834),
+    KQU(7723355336128442206), KQU(7500747479679599691),
+    KQU(18013961306453293634), KQU(2315274808095756456),
+    KQU(13655308255424029566), KQU(17203800273561677098),
+    KQU(1382158694422087756), KQU(5090390250309588976), KQU(517170818384213989),
+    KQU(1612709252627729621), KQU(1330118955572449606), KQU(300922478056709885),
+    KQU(18115693291289091987), KQU(13491407109725238321),
+    KQU(15293714633593827320), KQU(5151539373053314504),
+    KQU(5951523243743139207), KQU(14459112015249527975),
+    KQU(5456113959000700739), KQU(3877918438464873016),
+    KQU(12534071654260163555), KQU(15871678376893555041),
+    KQU(11005484805712025549), KQU(16353066973143374252),
+    KQU(4358331472063256685), KQU(8268349332210859288),
+    KQU(12485161590939658075), KQU(13955993592854471343),
+    KQU(5911446886848367039), KQU(14925834086813706974),
+    KQU(6590362597857994805), KQU(1280544923533661875),
+    KQU(1637756018947988164), KQU(4734090064512686329),
+    KQU(16693705263131485912), KQU(6834882340494360958),
+    KQU(8120732176159658505), KQU(2244371958905329346),
+    KQU(10447499707729734021), KQU(7318742361446942194),
+    KQU(8032857516355555296), KQU(14023605983059313116),
+    KQU(1032336061815461376), KQU(9840995337876562612),
+    KQU(9869256223029203587), KQU(12227975697177267636),
+    KQU(12728115115844186033), KQU(7752058479783205470),
+    KQU(729733219713393087), KQU(12954017801239007622)};
+static const uint64_t init_by_array_64_expected[] = {KQU(2100341266307895239),
+    KQU(8344256300489757943), KQU(15687933285484243894),
+    KQU(8268620370277076319), KQU(12371852309826545459),
+    KQU(8800491541730110238), KQU(18113268950100835773),
+    KQU(2886823658884438119), KQU(3293667307248180724),
+    KQU(9307928143300172731), KQU(7688082017574293629), KQU(900986224735166665),
+    KQU(9977972710722265039), KQU(6008205004994830552), KQU(546909104521689292),
+    KQU(7428471521869107594), KQU(14777563419314721179),
+    KQU(16116143076567350053), KQU(5322685342003142329),
+    KQU(4200427048445863473), KQU(4693092150132559146),
+    KQU(13671425863759338582), KQU(6747117460737639916),
+    KQU(4732666080236551150), KQU(5912839950611941263),
+    KQU(3903717554504704909), KQU(2615667650256786818),
+    KQU(10844129913887006352), KQU(13786467861810997820),
+    KQU(14267853002994021570), KQU(13767807302847237439),
+    KQU(16407963253707224617), KQU(4802498363698583497),
+    KQU(2523802839317209764), KQU(3822579397797475589),
+    KQU(8950320572212130610), KQU(3745623504978342534),
+    KQU(16092609066068482806), KQU(9817016950274642398),
+    KQU(10591660660323829098), KQU(11751606650792815920),
+    KQU(5122873818577122211), KQU(17209553764913936624),
+    KQU(6249057709284380343), KQU(15088791264695071830),
+    KQU(15344673071709851930), KQU(4345751415293646084),
+    KQU(2542865750703067928), KQU(13520525127852368784),
+    KQU(18294188662880997241), KQU(3871781938044881523),
+    KQU(2873487268122812184), KQU(15099676759482679005),
+    KQU(15442599127239350490), KQU(6311893274367710888),
+    KQU(3286118760484672933), KQU(4146067961333542189),
+    KQU(13303942567897208770), KQU(8196013722255630418),
+    KQU(4437815439340979989), KQU(15433791533450605135),
+    KQU(4254828956815687049), KQU(1310903207708286015),
+    KQU(10529182764462398549), KQU(14900231311660638810),
+    KQU(9727017277104609793), KQU(1821308310948199033),
+    KQU(11628861435066772084), KQU(9469019138491546924),
+    KQU(3145812670532604988), KQU(9938468915045491919),
+    KQU(1562447430672662142), KQU(13963995266697989134),
+    KQU(3356884357625028695), KQU(4499850304584309747),
+    KQU(8456825817023658122), KQU(10859039922814285279),
+    KQU(8099512337972526555), KQU(348006375109672149),
+    KQU(11919893998241688603), KQU(1104199577402948826),
+    KQU(16689191854356060289), KQU(10992552041730168078),
+    KQU(7243733172705465836), KQU(5668075606180319560),
+    KQU(18182847037333286970), KQU(4290215357664631322),
+    KQU(4061414220791828613), KQU(13006291061652989604),
+    KQU(7140491178917128798), KQU(12703446217663283481),
+    KQU(5500220597564558267), KQU(10330551509971296358),
+    KQU(15958554768648714492), KQU(5174555954515360045),
+    KQU(1731318837687577735), KQU(3557700801048354857),
+    KQU(13764012341928616198), KQU(13115166194379119043),
+    KQU(7989321021560255519), KQU(2103584280905877040),
+    KQU(9230788662155228488), KQU(16396629323325547654),
+    KQU(657926409811318051), KQU(15046700264391400727),
+    KQU(5120132858771880830), KQU(7934160097989028561),
+    KQU(6963121488531976245), KQU(17412329602621742089),
+    KQU(15144843053931774092), KQU(17204176651763054532),
+    KQU(13166595387554065870), KQU(8590377810513960213),
+    KQU(5834365135373991938), KQU(7640913007182226243),
+    KQU(3479394703859418425), KQU(16402784452644521040),
+    KQU(4993979809687083980), KQU(13254522168097688865),
+    KQU(15643659095244365219), KQU(5881437660538424982),
+    KQU(11174892200618987379), KQU(254409966159711077),
+    KQU(17158413043140549909), KQU(3638048789290376272),
+    KQU(1376816930299489190), KQU(4622462095217761923),
+    KQU(15086407973010263515), KQU(13253971772784692238),
+    KQU(5270549043541649236), KQU(11182714186805411604),
+    KQU(12283846437495577140), KQU(5297647149908953219),
+    KQU(10047451738316836654), KQU(4938228100367874746),
+    KQU(12328523025304077923), KQU(3601049438595312361),
+    KQU(9313624118352733770), KQU(13322966086117661798),
+    KQU(16660005705644029394), KQU(11337677526988872373),
+    KQU(13869299102574417795), KQU(15642043183045645437),
+    KQU(3021755569085880019), KQU(4979741767761188161),
+    KQU(13679979092079279587), KQU(3344685842861071743),
+    KQU(13947960059899588104), KQU(305806934293368007),
+    KQU(5749173929201650029), KQU(11123724852118844098),
+    KQU(15128987688788879802), KQU(15251651211024665009),
+    KQU(7689925933816577776), KQU(16732804392695859449),
+    KQU(17087345401014078468), KQU(14315108589159048871),
+    KQU(4820700266619778917), KQU(16709637539357958441),
+    KQU(4936227875177351374), KQU(2137907697912987247),
+    KQU(11628565601408395420), KQU(2333250549241556786),
+    KQU(5711200379577778637), KQU(5170680131529031729),
+    KQU(12620392043061335164), KQU(95363390101096078), KQU(5487981914081709462),
+    KQU(1763109823981838620), KQU(3395861271473224396),
+    KQU(1300496844282213595), KQU(6894316212820232902),
+    KQU(10673859651135576674), KQU(5911839658857903252),
+    KQU(17407110743387299102), KQU(8257427154623140385),
+    KQU(11389003026741800267), KQU(4070043211095013717),
+    KQU(11663806997145259025), KQU(15265598950648798210),
+    KQU(630585789434030934), KQU(3524446529213587334), KQU(7186424168495184211),
+    KQU(10806585451386379021), KQU(11120017753500499273),
+    KQU(1586837651387701301), KQU(17530454400954415544),
+    KQU(9991670045077880430), KQU(7550997268990730180),
+    KQU(8640249196597379304), KQU(3522203892786893823),
+    KQU(10401116549878854788), KQU(13690285544733124852),
+    KQU(8295785675455774586), KQU(15535716172155117603),
+    KQU(3112108583723722511), KQU(17633179955339271113),
+    KQU(18154208056063759375), KQU(1866409236285815666),
+    KQU(13326075895396412882), KQU(8756261842948020025),
+    KQU(6281852999868439131), KQU(15087653361275292858),
+    KQU(10333923911152949397), KQU(5265567645757408500),
+    KQU(12728041843210352184), KQU(6347959327507828759),
+    KQU(154112802625564758), KQU(18235228308679780218),
+    KQU(3253805274673352418), KQU(4849171610689031197),
+    KQU(17948529398340432518), KQU(13803510475637409167),
+    KQU(13506570190409883095), KQU(15870801273282960805),
+    KQU(8451286481299170773), KQU(9562190620034457541),
+    KQU(8518905387449138364), KQU(12681306401363385655),
+    KQU(3788073690559762558), KQU(5256820289573487769),
+    KQU(2752021372314875467), KQU(6354035166862520716),
+    KQU(4328956378309739069), KQU(449087441228269600), KQU(5533508742653090868),
+    KQU(1260389420404746988), KQU(18175394473289055097),
+    KQU(1535467109660399420), KQU(8818894282874061442),
+    KQU(12140873243824811213), KQU(15031386653823014946),
+    KQU(1286028221456149232), KQU(6329608889367858784),
+    KQU(9419654354945132725), KQU(6094576547061672379),
+    KQU(17706217251847450255), KQU(1733495073065878126),
+    KQU(16918923754607552663), KQU(8881949849954945044),
+    KQU(12938977706896313891), KQU(14043628638299793407),
+    KQU(18393874581723718233), KQU(6886318534846892044),
+    KQU(14577870878038334081), KQU(13541558383439414119),
+    KQU(13570472158807588273), KQU(18300760537910283361),
+    KQU(818368572800609205), KQU(1417000585112573219),
+    KQU(12337533143867683655), KQU(12433180994702314480),
+    KQU(778190005829189083), KQU(13667356216206524711),
+    KQU(9866149895295225230), KQU(11043240490417111999),
+    KQU(1123933826541378598), KQU(6469631933605123610),
+    KQU(14508554074431980040), KQU(13918931242962026714),
+    KQU(2870785929342348285), KQU(14786362626740736974),
+    KQU(13176680060902695786), KQU(9591778613541679456),
+    KQU(9097662885117436706), KQU(749262234240924947), KQU(1944844067793307093),
+    KQU(4339214904577487742), KQU(8009584152961946551),
+    KQU(16073159501225501777), KQU(3335870590499306217),
+    KQU(17088312653151202847), KQU(3108893142681931848),
+    KQU(16636841767202792021), KQU(10423316431118400637),
+    KQU(8008357368674443506), KQU(11340015231914677875),
+    KQU(17687896501594936090), KQU(15173627921763199958),
+    KQU(542569482243721959), KQU(15071714982769812975),
+    KQU(4466624872151386956), KQU(1901780715602332461),
+    KQU(9822227742154351098), KQU(1479332892928648780),
+    KQU(6981611948382474400), KQU(7620824924456077376),
+    KQU(14095973329429406782), KQU(7902744005696185404),
+    KQU(15830577219375036920), KQU(10287076667317764416),
+    KQU(12334872764071724025), KQU(4419302088133544331),
+    KQU(14455842851266090520), KQU(12488077416504654222),
+    KQU(7953892017701886766), KQU(6331484925529519007),
+    KQU(4902145853785030022), KQU(17010159216096443073),
+    KQU(11945354668653886087), KQU(15112022728645230829),
+    KQU(17363484484522986742), KQU(4423497825896692887),
+    KQU(8155489510809067471), KQU(258966605622576285), KQU(5462958075742020534),
+    KQU(6763710214913276228), KQU(2368935183451109054),
+    KQU(14209506165246453811), KQU(2646257040978514881),
+    KQU(3776001911922207672), KQU(1419304601390147631),
+    KQU(14987366598022458284), KQU(3977770701065815721),
+    KQU(730820417451838898), KQU(3982991703612885327), KQU(2803544519671388477),
+    KQU(17067667221114424649), KQU(2922555119737867166),
+    KQU(1989477584121460932), KQU(15020387605892337354),
+    KQU(9293277796427533547), KQU(10722181424063557247),
+    KQU(16704542332047511651), KQU(5008286236142089514),
+    KQU(16174732308747382540), KQU(17597019485798338402),
+    KQU(13081745199110622093), KQU(8850305883842258115),
+    KQU(12723629125624589005), KQU(8140566453402805978),
+    KQU(15356684607680935061), KQU(14222190387342648650),
+    KQU(11134610460665975178), KQU(1259799058620984266),
+    KQU(13281656268025610041), KQU(298262561068153992),
+    KQU(12277871700239212922), KQU(13911297774719779438),
+    KQU(16556727962761474934), KQU(17903010316654728010),
+    KQU(9682617699648434744), KQU(14757681836838592850),
+    KQU(1327242446558524473), KQU(11126645098780572792),
+    KQU(1883602329313221774), KQU(2543897783922776873),
+    KQU(15029168513767772842), KQU(12710270651039129878),
+    KQU(16118202956069604504), KQU(15010759372168680524),
+    KQU(2296827082251923948), KQU(10793729742623518101),
+    KQU(13829764151845413046), KQU(17769301223184451213),
+    KQU(3118268169210783372), KQU(17626204544105123127),
+    KQU(7416718488974352644), KQU(10450751996212925994),
+    KQU(9352529519128770586), KQU(259347569641110140), KQU(8048588892269692697),
+    KQU(1774414152306494058), KQU(10669548347214355622),
+    KQU(13061992253816795081), KQU(18432677803063861659),
+    KQU(8879191055593984333), KQU(12433753195199268041),
+    KQU(14919392415439730602), KQU(6612848378595332963),
+    KQU(6320986812036143628), KQU(10465592420226092859),
+    KQU(4196009278962570808), KQU(3747816564473572224),
+    KQU(17941203486133732898), KQU(2350310037040505198),
+    KQU(5811779859134370113), KQU(10492109599506195126),
+    KQU(7699650690179541274), KQU(1954338494306022961),
+    KQU(14095816969027231152), KQU(5841346919964852061),
+    KQU(14945969510148214735), KQU(3680200305887550992),
+    KQU(6218047466131695792), KQU(8242165745175775096),
+    KQU(11021371934053307357), KQU(1265099502753169797),
+    KQU(4644347436111321718), KQU(3609296916782832859),
+    KQU(8109807992218521571), KQU(18387884215648662020),
+    KQU(14656324896296392902), KQU(17386819091238216751),
+    KQU(17788300878582317152), KQU(7919446259742399591),
+    KQU(4466613134576358004), KQU(12928181023667938509),
+    KQU(13147446154454932030), KQU(16552129038252734620),
+    KQU(8395299403738822450), KQU(11313817655275361164),
+    KQU(434258809499511718), KQU(2074882104954788676), KQU(7929892178759395518),
+    KQU(9006461629105745388), KQU(5176475650000323086),
+    KQU(11128357033468341069), KQU(12026158851559118955),
+    KQU(14699716249471156500), KQU(448982497120206757),
+    KQU(4156475356685519900), KQU(6063816103417215727),
+    KQU(10073289387954971479), KQU(8174466846138590962),
+    KQU(2675777452363449006), KQU(9090685420572474281),
+    KQU(6659652652765562060), KQU(12923120304018106621),
+    KQU(11117480560334526775), KQU(937910473424587511),
+    KQU(1838692113502346645), KQU(11133914074648726180),
+    KQU(7922600945143884053), KQU(13435287702700959550),
+    KQU(5287964921251123332), KQU(11354875374575318947),
+    KQU(17955724760748238133), KQU(13728617396297106512),
+    KQU(4107449660118101255), KQU(1210269794886589623),
+    KQU(11408687205733456282), KQU(4538354710392677887),
+    KQU(13566803319341319267), KQU(17870798107734050771),
+    KQU(3354318982568089135), KQU(9034450839405133651),
+    KQU(13087431795753424314), KQU(950333102820688239),
+    KQU(1968360654535604116), KQU(16840551645563314995),
+    KQU(8867501803892924995), KQU(11395388644490626845),
+    KQU(1529815836300732204), KQU(13330848522996608842),
+    KQU(1813432878817504265), KQU(2336867432693429560),
+    KQU(15192805445973385902), KQU(2528593071076407877),
+    KQU(128459777936689248), KQU(9976345382867214866), KQU(6208885766767996043),
+    KQU(14982349522273141706), KQU(3099654362410737822),
+    KQU(13776700761947297661), KQU(8806185470684925550),
+    KQU(8151717890410585321), KQU(640860591588072925),
+    KQU(14592096303937307465), KQU(9056472419613564846),
+    KQU(14861544647742266352), KQU(12703771500398470216),
+    KQU(3142372800384138465), KQU(6201105606917248196),
+    KQU(18337516409359270184), KQU(15042268695665115339),
+    KQU(15188246541383283846), KQU(12800028693090114519),
+    KQU(5992859621101493472), KQU(18278043971816803521),
+    KQU(9002773075219424560), KQU(7325707116943598353),
+    KQU(7930571931248040822), KQU(5645275869617023448),
+    KQU(7266107455295958487), KQU(4363664528273524411),
+    KQU(14313875763787479809), KQU(17059695613553486802),
+    KQU(9247761425889940932), KQU(13704726459237593128),
+    KQU(2701312427328909832), KQU(17235532008287243115),
+    KQU(14093147761491729538), KQU(6247352273768386516),
+    KQU(8268710048153268415), KQU(7985295214477182083),
+    KQU(15624495190888896807), KQU(3772753430045262788),
+    KQU(9133991620474991698), KQU(5665791943316256028),
+    KQU(7551996832462193473), KQU(13163729206798953877),
+    KQU(9263532074153846374), KQU(1015460703698618353),
+    KQU(17929874696989519390), KQU(18257884721466153847),
+    KQU(16271867543011222991), KQU(3905971519021791941),
+    KQU(16814488397137052085), KQU(1321197685504621613),
+    KQU(2870359191894002181), KQU(14317282970323395450),
+    KQU(13663920845511074366), KQU(2052463995796539594),
+    KQU(14126345686431444337), KQU(1727572121947022534),
+    KQU(17793552254485594241), KQU(6738857418849205750),
+    KQU(1282987123157442952), KQU(16655480021581159251),
+    KQU(6784587032080183866), KQU(14726758805359965162),
+    KQU(7577995933961987349), KQU(12539609320311114036),
+    KQU(10789773033385439494), KQU(8517001497411158227),
+    KQU(10075543932136339710), KQU(14838152340938811081),
+    KQU(9560840631794044194), KQU(17445736541454117475),
+    KQU(10633026464336393186), KQU(15705729708242246293),
+    KQU(1117517596891411098), KQU(4305657943415886942),
+    KQU(4948856840533979263), KQU(16071681989041789593),
+    KQU(13723031429272486527), KQU(7639567622306509462),
+    KQU(12670424537483090390), KQU(9715223453097197134),
+    KQU(5457173389992686394), KQU(289857129276135145),
+    KQU(17048610270521972512), KQU(692768013309835485),
+    KQU(14823232360546632057), KQU(18218002361317895936),
+    KQU(3281724260212650204), KQU(16453957266549513795),
+    KQU(8592711109774511881), KQU(929825123473369579),
+    KQU(15966784769764367791), KQU(9627344291450607588),
+    KQU(10849555504977813287), KQU(9234566913936339275),
+    KQU(6413807690366911210), KQU(10862389016184219267),
+    KQU(13842504799335374048), KQU(1531994113376881174),
+    KQU(2081314867544364459), KQU(16430628791616959932),
+    KQU(8314714038654394368), KQU(9155473892098431813),
+    KQU(12577843786670475704), KQU(4399161106452401017),
+    KQU(1668083091682623186), KQU(1741383777203714216),
+    KQU(2162597285417794374), KQU(15841980159165218736),
+    KQU(1971354603551467079), KQU(1206714764913205968),
+    KQU(4790860439591272330), KQU(14699375615594055799),
+    KQU(8374423871657449988), KQU(10950685736472937738),
+    KQU(697344331343267176), KQU(10084998763118059810),
+    KQU(12897369539795983124), KQU(12351260292144383605),
+    KQU(1268810970176811234), KQU(7406287800414582768), KQU(516169557043807831),
+    KQU(5077568278710520380), KQU(3828791738309039304),
+    KQU(7721974069946943610), KQU(3534670260981096460),
+    KQU(4865792189600584891), KQU(16892578493734337298),
+    KQU(9161499464278042590), KQU(11976149624067055931),
+    KQU(13219479887277343990), KQU(14161556738111500680),
+    KQU(14670715255011223056), KQU(4671205678403576558),
+    KQU(12633022931454259781), KQU(14821376219869187646),
+    KQU(751181776484317028), KQU(2192211308839047070),
+    KQU(11787306362361245189), KQU(10672375120744095707),
+    KQU(4601972328345244467), KQU(15457217788831125879),
+    KQU(8464345256775460809), KQU(10191938789487159478),
+    KQU(6184348739615197613), KQU(11425436778806882100),
+    KQU(2739227089124319793), KQU(461464518456000551), KQU(4689850170029177442),
+    KQU(6120307814374078625), KQU(11153579230681708671),
+    KQU(7891721473905347926), KQU(10281646937824872400),
+    KQU(3026099648191332248), KQU(8666750296953273818),
+    KQU(14978499698844363232), KQU(13303395102890132065),
+    KQU(8182358205292864080), KQU(10560547713972971291),
+    KQU(11981635489418959093), KQU(3134621354935288409),
+    KQU(11580681977404383968), KQU(14205530317404088650),
+    KQU(5997789011854923157), KQU(13659151593432238041),
+    KQU(11664332114338865086), KQU(7490351383220929386),
+    KQU(7189290499881530378), KQU(15039262734271020220),
+    KQU(2057217285976980055), KQU(555570804905355739),
+    KQU(11235311968348555110), KQU(13824557146269603217),
+    KQU(16906788840653099693), KQU(7222878245455661677),
+    KQU(5245139444332423756), KQU(4723748462805674292),
+    KQU(12216509815698568612), KQU(17402362976648951187),
+    KQU(17389614836810366768), KQU(4880936484146667711),
+    KQU(9085007839292639880), KQU(13837353458498535449),
+    KQU(11914419854360366677), KQU(16595890135313864103),
+    KQU(6313969847197627222), KQU(18296909792163910431),
+    KQU(10041780113382084042), KQU(2499478551172884794),
+    KQU(11057894246241189489), KQU(9742243032389068555),
+    KQU(12838934582673196228), KQU(13437023235248490367),
+    KQU(13372420669446163240), KQU(6752564244716909224),
+    KQU(7157333073400313737), KQU(12230281516370654308),
+    KQU(1182884552219419117), KQU(2955125381312499218),
+    KQU(10308827097079443249), KQU(1337648572986534958),
+    KQU(16378788590020343939), KQU(108619126514420935),
+    KQU(3990981009621629188), KQU(5460953070230946410),
+    KQU(9703328329366531883), KQU(13166631489188077236),
+    KQU(1104768831213675170), KQU(3447930458553877908),
+    KQU(8067172487769945676), KQU(5445802098190775347),
+    KQU(3244840981648973873), KQU(17314668322981950060),
+    KQU(5006812527827763807), KQU(18158695070225526260),
+    KQU(2824536478852417853), KQU(13974775809127519886),
+    KQU(9814362769074067392), KQU(17276205156374862128),
+    KQU(11361680725379306967), KQU(3422581970382012542),
+    KQU(11003189603753241266), KQU(11194292945277862261),
+    KQU(6839623313908521348), KQU(11935326462707324634),
+    KQU(1611456788685878444), KQU(13112620989475558907),
+    KQU(517659108904450427), KQU(13558114318574407624),
+    KQU(15699089742731633077), KQU(4988979278862685458),
+    KQU(8111373583056521297), KQU(3891258746615399627),
+    KQU(8137298251469718086), KQU(12748663295624701649),
+    KQU(4389835683495292062), KQU(5775217872128831729),
+    KQU(9462091896405534927), KQU(8498124108820263989),
+    KQU(8059131278842839525), KQU(10503167994254090892),
+    KQU(11613153541070396656), KQU(18069248738504647790),
+    KQU(570657419109768508), KQU(3950574167771159665), KQU(5514655599604313077),
+    KQU(2908460854428484165), KQU(10777722615935663114),
+    KQU(12007363304839279486), KQU(9800646187569484767),
+    KQU(8795423564889864287), KQU(14257396680131028419),
+    KQU(6405465117315096498), KQU(7939411072208774878),
+    KQU(17577572378528990006), KQU(14785873806715994850),
+    KQU(16770572680854747390), KQU(18127549474419396481),
+    KQU(11637013449455757750), KQU(14371851933996761086),
+    KQU(3601181063650110280), KQU(4126442845019316144),
+    KQU(10198287239244320669), KQU(18000169628555379659),
+    KQU(18392482400739978269), KQU(6219919037686919957),
+    KQU(3610085377719446052), KQU(2513925039981776336),
+    KQU(16679413537926716955), KQU(12903302131714909434),
+    KQU(5581145789762985009), KQU(12325955044293303233),
+    KQU(17216111180742141204), KQU(6321919595276545740),
+    KQU(3507521147216174501), KQU(9659194593319481840),
+    KQU(11473976005975358326), KQU(14742730101435987026),
+    KQU(492845897709954780), KQU(16976371186162599676),
+    KQU(17712703422837648655), KQU(9881254778587061697),
+    KQU(8413223156302299551), KQU(1563841828254089168),
+    KQU(9996032758786671975), KQU(138877700583772667),
+    KQU(13003043368574995989), KQU(4390573668650456587),
+    KQU(8610287390568126755), KQU(15126904974266642199),
+    KQU(6703637238986057662), KQU(2873075592956810157),
+    KQU(6035080933946049418), KQU(13382846581202353014),
+    KQU(7303971031814642463), KQU(18418024405307444267),
+    KQU(5847096731675404647), KQU(4035880699639842500),
+    KQU(11525348625112218478), KQU(3041162365459574102),
+    KQU(2604734487727986558), KQU(15526341771636983145),
+    KQU(14556052310697370254), KQU(12997787077930808155),
+    KQU(9601806501755554499), KQU(11349677952521423389),
+    KQU(14956777807644899350), KQU(16559736957742852721),
+    KQU(12360828274778140726), KQU(6685373272009662513),
+    KQU(16932258748055324130), KQU(15918051131954158508),
+    KQU(1692312913140790144), KQU(546653826801637367), KQU(5341587076045986652),
+    KQU(14975057236342585662), KQU(12374976357340622412),
+    KQU(10328833995181940552), KQU(12831807101710443149),
+    KQU(10548514914382545716), KQU(2217806727199715993),
+    KQU(12627067369242845138), KQU(4598965364035438158),
+    KQU(150923352751318171), KQU(14274109544442257283),
+    KQU(4696661475093863031), KQU(1505764114384654516),
+    KQU(10699185831891495147), KQU(2392353847713620519),
+    KQU(3652870166711788383), KQU(8640653276221911108),
+    KQU(3894077592275889704), KQU(4918592872135964845),
+    KQU(16379121273281400789), KQU(12058465483591683656),
+    KQU(11250106829302924945), KQU(1147537556296983005),
+    KQU(6376342756004613268), KQU(14967128191709280506),
+    KQU(18007449949790627628), KQU(9497178279316537841),
+    KQU(7920174844809394893), KQU(10037752595255719907),
+    KQU(15875342784985217697), KQU(15311615921712850696),
+    KQU(9552902652110992950), KQU(14054979450099721140),
+    KQU(5998709773566417349), KQU(18027910339276320187),
+    KQU(8223099053868585554), KQU(7842270354824999767),
+    KQU(4896315688770080292), KQU(12969320296569787895),
+    KQU(2674321489185759961), KQU(4053615936864718439),
+    KQU(11349775270588617578), KQU(4743019256284553975),
+    KQU(5602100217469723769), KQU(14398995691411527813),
+    KQU(7412170493796825470), KQU(836262406131744846), KQU(8231086633845153022),
+    KQU(5161377920438552287), KQU(8828731196169924949),
+    KQU(16211142246465502680), KQU(3307990879253687818),
+    KQU(5193405406899782022), KQU(8510842117467566693),
+    KQU(6070955181022405365), KQU(14482950231361409799),
+    KQU(12585159371331138077), KQU(3511537678933588148),
+    KQU(2041849474531116417), KQU(10944936685095345792),
+    KQU(18303116923079107729), KQU(2720566371239725320),
+    KQU(4958672473562397622), KQU(3032326668253243412),
+    KQU(13689418691726908338), KQU(1895205511728843996),
+    KQU(8146303515271990527), KQU(16507343500056113480),
+    KQU(473996939105902919), KQU(9897686885246881481),
+    KQU(14606433762712790575), KQU(6732796251605566368),
+    KQU(1399778120855368916), KQU(935023885182833777),
+    KQU(16066282816186753477), KQU(7291270991820612055),
+    KQU(17530230393129853844), KQU(10223493623477451366),
+    KQU(15841725630495676683), KQU(17379567246435515824),
+    KQU(8588251429375561971), KQU(18339511210887206423),
+    KQU(17349587430725976100), KQU(12244876521394838088),
+    KQU(6382187714147161259), KQU(12335807181848950831),
+    KQU(16948885622305460665), KQU(13755097796371520506),
+    KQU(14806740373324947801), KQU(4828699633859287703),
+    KQU(8209879281452301604), KQU(12435716669553736437),
+    KQU(13970976859588452131), KQU(6233960842566773148),
+    KQU(12507096267900505759), KQU(1198713114381279421),
+    KQU(14989862731124149015), KQU(15932189508707978949),
+    KQU(2526406641432708722), KQU(29187427817271982), KQU(1499802773054556353),
+    KQU(10816638187021897173), KQU(5436139270839738132),
+    KQU(6659882287036010082), KQU(2154048955317173697),
+    KQU(10887317019333757642), KQU(16281091802634424955),
+    KQU(10754549879915384901), KQU(10760611745769249815),
+    KQU(2161505946972504002), KQU(5243132808986265107),
+    KQU(10129852179873415416), KQU(710339480008649081),
+    KQU(7802129453068808528), KQU(17967213567178907213),
+    KQU(15730859124668605599), KQU(13058356168962376502),
+    KQU(3701224985413645909), KQU(14464065869149109264),
+    KQU(9959272418844311646), KQU(10157426099515958752),
+    KQU(14013736814538268528), KQU(17797456992065653951),
+    KQU(17418878140257344806), KQU(15457429073540561521),
+    KQU(2184426881360949378), KQU(2062193041154712416),
+    KQU(8553463347406931661), KQU(4913057625202871854),
+    KQU(2668943682126618425), KQU(17064444737891172288),
+    KQU(4997115903913298637), KQU(12019402608892327416),
+    KQU(17603584559765897352), KQU(11367529582073647975),
+    KQU(8211476043518436050), KQU(8676849804070323674),
+    KQU(18431829230394475730), KQU(10490177861361247904),
+    KQU(9508720602025651349), KQU(7409627448555722700),
+    KQU(5804047018862729008), KQU(11943858176893142594),
+    KQU(11908095418933847092), KQU(5415449345715887652),
+    KQU(1554022699166156407), KQU(9073322106406017161),
+    KQU(7080630967969047082), KQU(18049736940860732943),
+    KQU(12748714242594196794), KQU(1226992415735156741),
+    KQU(17900981019609531193), KQU(11720739744008710999),
+    KQU(3006400683394775434), KQU(11347974011751996028),
+    KQU(3316999628257954608), KQU(8384484563557639101),
+    KQU(18117794685961729767), KQU(1900145025596618194),
+    KQU(17459527840632892676), KQU(5634784101865710994),
+    KQU(7918619300292897158), KQU(3146577625026301350),
+    KQU(9955212856499068767), KQU(1873995843681746975),
+    KQU(1561487759967972194), KQU(8322718804375878474),
+    KQU(11300284215327028366), KQU(4667391032508998982),
+    KQU(9820104494306625580), KQU(17922397968599970610),
+    KQU(1784690461886786712), KQU(14940365084341346821),
+    KQU(5348719575594186181), KQU(10720419084507855261),
+    KQU(14210394354145143274), KQU(2426468692164000131),
+    KQU(16271062114607059202), KQU(14851904092357070247),
+    KQU(6524493015693121897), KQU(9825473835127138531),
+    KQU(14222500616268569578), KQU(15521484052007487468),
+    KQU(14462579404124614699), KQU(11012375590820665520),
+    KQU(11625327350536084927), KQU(14452017765243785417),
+    KQU(9989342263518766305), KQU(3640105471101803790),
+    KQU(4749866455897513242), KQU(13963064946736312044),
+    KQU(10007416591973223791), KQU(18314132234717431115),
+    KQU(3286596588617483450), KQU(7726163455370818765),
+    KQU(7575454721115379328), KQU(5308331576437663422),
+    KQU(18288821894903530934), KQU(8028405805410554106),
+    KQU(15744019832103296628), KQU(149765559630932100),
+    KQU(6137705557200071977), KQU(14513416315434803615),
+    KQU(11665702820128984473), KQU(218926670505601386),
+    KQU(6868675028717769519), KQU(15282016569441512302),
+    KQU(5707000497782960236), KQU(6671120586555079567),
+    KQU(2194098052618985448), KQU(16849577895477330978),
+    KQU(12957148471017466283), KQU(1997805535404859393),
+    KQU(1180721060263860490), KQU(13206391310193756958),
+    KQU(12980208674461861797), KQU(3825967775058875366),
+    KQU(17543433670782042631), KQU(1518339070120322730),
+    KQU(16344584340890991669), KQU(2611327165318529819),
+    KQU(11265022723283422529), KQU(4001552800373196817),
+    KQU(14509595890079346161), KQU(3528717165416234562),
+    KQU(18153222571501914072), KQU(9387182977209744425),
+    KQU(10064342315985580021), KQU(11373678413215253977),
+    KQU(2308457853228798099), KQU(9729042942839545302),
+    KQU(7833785471140127746), KQU(6351049900319844436),
+    KQU(14454610627133496067), KQU(12533175683634819111),
+    KQU(15570163926716513029), KQU(13356980519185762498)};
 
 TEST_BEGIN(test_gen_rand_32) {
 	uint32_t array32[BLOCK_SIZE] JEMALLOC_ATTR(aligned(16));
 	uint32_t array32_2[BLOCK_SIZE] JEMALLOC_ATTR(aligned(16));
-	int i;
+	int      i;
 	uint32_t r32;
-	sfmt_t *ctx;
+	sfmt_t  *ctx;
 
-	expect_d_le(get_min_array_size32(), BLOCK_SIZE,
-	    "Array size too small");
+	expect_d_le(get_min_array_size32(), BLOCK_SIZE, "Array size too small");
 	ctx = init_gen_rand(1234);
 	fill_array32(ctx, array32, BLOCK_SIZE);
 	fill_array32(ctx, array32_2, BLOCK_SIZE);
@@ -1486,13 +1405,12 @@ TEST_END
 TEST_BEGIN(test_by_array_32) {
 	uint32_t array32[BLOCK_SIZE] JEMALLOC_ATTR(aligned(16));
 	uint32_t array32_2[BLOCK_SIZE] JEMALLOC_ATTR(aligned(16));
-	int i;
+	int      i;
 	uint32_t ini[4] = {0x1234, 0x5678, 0x9abc, 0xdef0};
 	uint32_t r32;
-	sfmt_t *ctx;
+	sfmt_t  *ctx;
 
-	expect_d_le(get_min_array_size32(), BLOCK_SIZE,
-	    "Array size too small");
+	expect_d_le(get_min_array_size32(), BLOCK_SIZE, "Array size too small");
 	ctx = init_by_array(ini, 4);
 	fill_array32(ctx, array32, BLOCK_SIZE);
 	fill_array32(ctx, array32_2, BLOCK_SIZE);
@@ -1521,12 +1439,12 @@ TEST_END
 TEST_BEGIN(test_gen_rand_64) {
 	uint64_t array64[BLOCK_SIZE64] JEMALLOC_ATTR(aligned(16));
 	uint64_t array64_2[BLOCK_SIZE64] JEMALLOC_ATTR(aligned(16));
-	int i;
+	int      i;
 	uint64_t r;
-	sfmt_t *ctx;
+	sfmt_t  *ctx;
 
-	expect_d_le(get_min_array_size64(), BLOCK_SIZE64,
-	    "Array size too small");
+	expect_d_le(
+	    get_min_array_size64(), BLOCK_SIZE64, "Array size too small");
 	ctx = init_gen_rand(4321);
 	fill_array64(ctx, array64, BLOCK_SIZE64);
 	fill_array64(ctx, array64_2, BLOCK_SIZE64);
@@ -1540,13 +1458,13 @@ TEST_BEGIN(test_gen_rand_64) {
 		}
 		r = gen_rand64(ctx);
 		expect_u64_eq(r, array64[i],
-		    "Mismatch at array64[%d]=%"FMTx64", gen=%"FMTx64, i,
+		    "Mismatch at array64[%d]=%" FMTx64 ", gen=%" FMTx64, i,
 		    array64[i], r);
 	}
 	for (i = 0; i < COUNT_2; i++) {
 		r = gen_rand64(ctx);
 		expect_u64_eq(r, array64_2[i],
-		    "Mismatch at array64_2[%d]=%"FMTx64" gen=%"FMTx64"", i,
+		    "Mismatch at array64_2[%d]=%" FMTx64 " gen=%" FMTx64 "", i,
 		    array64_2[i], r);
 	}
 	fini_gen_rand(ctx);
@@ -1556,13 +1474,13 @@ TEST_END
 TEST_BEGIN(test_by_array_64) {
 	uint64_t array64[BLOCK_SIZE64] JEMALLOC_ATTR(aligned(16));
 	uint64_t array64_2[BLOCK_SIZE64] JEMALLOC_ATTR(aligned(16));
-	int i;
+	int      i;
 	uint64_t r;
 	uint32_t ini[] = {5, 4, 3, 2, 1};
-	sfmt_t *ctx;
+	sfmt_t  *ctx;
 
-	expect_d_le(get_min_array_size64(), BLOCK_SIZE64,
-	    "Array size too small");
+	expect_d_le(
+	    get_min_array_size64(), BLOCK_SIZE64, "Array size too small");
 	ctx = init_by_array(ini, 5);
 	fill_array64(ctx, array64, BLOCK_SIZE64);
 	fill_array64(ctx, array64_2, BLOCK_SIZE64);
@@ -1576,13 +1494,13 @@ TEST_BEGIN(test_by_array_64) {
 		}
 		r = gen_rand64(ctx);
 		expect_u64_eq(r, array64[i],
-		    "Mismatch at array64[%d]=%"FMTx64" gen=%"FMTx64, i,
+		    "Mismatch at array64[%d]=%" FMTx64 " gen=%" FMTx64, i,
 		    array64[i], r);
 	}
 	for (i = 0; i < COUNT_2; i++) {
 		r = gen_rand64(ctx);
 		expect_u64_eq(r, array64_2[i],
-		    "Mismatch at array64_2[%d]=%"FMTx64" gen=%"FMTx64, i,
+		    "Mismatch at array64_2[%d]=%" FMTx64 " gen=%" FMTx64, i,
 		    array64_2[i], r);
 	}
 	fini_gen_rand(ctx);
@@ -1591,9 +1509,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_gen_rand_32,
-	    test_by_array_32,
-	    test_gen_rand_64,
+	return test(test_gen_rand_32, test_by_array_32, test_gen_rand_64,
 	    test_by_array_64);
 }
diff --git a/test/unit/a0.c b/test/unit/a0.c
index c1be79a6..63d792d2 100644
--- a/test/unit/a0.c
+++ b/test/unit/a0.c
@@ -11,6 +11,5 @@ TEST_END
 
 int
 main(void) {
-	return test_no_malloc_init(
-	    test_a0);
+	return test_no_malloc_init(test_a0);
 }
diff --git a/test/unit/arena_decay.c b/test/unit/arena_decay.c
index 177ba505..99c08ab9 100644
--- a/test/unit/arena_decay.c
+++ b/test/unit/arena_decay.c
@@ -4,11 +4,11 @@
 #include "jemalloc/internal/ticker.h"
 
 static nstime_monotonic_t *nstime_monotonic_orig;
-static nstime_update_t *nstime_update_orig;
+static nstime_update_t    *nstime_update_orig;
 
 static unsigned nupdates_mock;
 static nstime_t time_mock;
-static bool monotonic_mock;
+static bool     monotonic_mock;
 
 static bool
 nstime_monotonic_mock(void) {
@@ -28,26 +28,27 @@ TEST_BEGIN(test_decay_ticks) {
 	test_skip_if(opt_hpa);
 
 	ticker_geom_t *decay_ticker;
-	unsigned tick0, tick1, arena_ind;
-	size_t sz, large0;
-	void *p;
+	unsigned       tick0, tick1, arena_ind;
+	size_t         sz, large0;
+	void          *p;
 
 	sz = sizeof(size_t);
-	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL,
-	    0), 0, "Unexpected mallctl failure");
+	expect_d_eq(
+	    mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
 
 	/* Set up a manually managed arena for test. */
 	arena_ind = do_arena_create(0, 0);
 
 	/* Migrate to the new arena, and get the ticker. */
 	unsigned old_arena_ind;
-	size_t sz_arena_ind = sizeof(old_arena_ind);
+	size_t   sz_arena_ind = sizeof(old_arena_ind);
 	expect_d_eq(mallctl("thread.arena", (void *)&old_arena_ind,
-	    &sz_arena_ind, (void *)&arena_ind, sizeof(arena_ind)), 0,
-	    "Unexpected mallctl() failure");
+	                &sz_arena_ind, (void *)&arena_ind, sizeof(arena_ind)),
+	    0, "Unexpected mallctl() failure");
 	decay_ticker = tsd_arena_decay_tickerp_get(tsd_fetch());
-	expect_ptr_not_null(decay_ticker,
-	    "Unexpected failure getting decay ticker");
+	expect_ptr_not_null(
+	    decay_ticker, "Unexpected failure getting decay ticker");
 
 	/*
 	 * Test the standard APIs using a large size class, since we can't
@@ -80,8 +81,8 @@ TEST_BEGIN(test_decay_ticks) {
 	expect_d_eq(posix_memalign(&p, sizeof(size_t), large0), 0,
 	    "Unexpected posix_memalign() failure");
 	tick1 = ticker_geom_read(decay_ticker);
-	expect_u32_ne(tick1, tick0,
-	    "Expected ticker to tick during posix_memalign()");
+	expect_u32_ne(
+	    tick1, tick0, "Expected ticker to tick during posix_memalign()");
 	free(p);
 
 	/* aligned_alloc(). */
@@ -89,8 +90,8 @@ TEST_BEGIN(test_decay_ticks) {
 	p = aligned_alloc(sizeof(size_t), large0);
 	expect_ptr_not_null(p, "Unexpected aligned_alloc() failure");
 	tick1 = ticker_geom_read(decay_ticker);
-	expect_u32_ne(tick1, tick0,
-	    "Expected ticker to tick during aligned_alloc()");
+	expect_u32_ne(
+	    tick1, tick0, "Expected ticker to tick during aligned_alloc()");
 	free(p);
 
 	/* realloc(). */
@@ -118,7 +119,7 @@ TEST_BEGIN(test_decay_ticks) {
 	 */
 	{
 		unsigned i;
-		size_t allocx_sizes[2];
+		size_t   allocx_sizes[2];
 		allocx_sizes[0] = large0;
 		allocx_sizes[1] = 1;
 
@@ -163,7 +164,8 @@ TEST_BEGIN(test_decay_ticks) {
 			tick1 = ticker_geom_read(decay_ticker);
 			expect_u32_ne(tick1, tick0,
 			    "Expected ticker to tick during sdallocx() "
-			    "(sz=%zu)", sz);
+			    "(sz=%zu)",
+			    sz);
 		}
 	}
 
@@ -172,18 +174,19 @@ TEST_BEGIN(test_decay_ticks) {
 	 * using an explicit tcache.
 	 */
 	unsigned tcache_ind, i;
-	size_t tcache_sizes[2];
+	size_t   tcache_sizes[2];
 	tcache_sizes[0] = large0;
 	tcache_sizes[1] = 1;
 
 	size_t tcache_max, sz_tcache_max;
 	sz_tcache_max = sizeof(tcache_max);
 	expect_d_eq(mallctl("arenas.tcache_max", (void *)&tcache_max,
-	    &sz_tcache_max, NULL, 0), 0, "Unexpected mallctl() failure");
+	                &sz_tcache_max, NULL, 0),
+	    0, "Unexpected mallctl() failure");
 
 	sz = sizeof(unsigned);
-	expect_d_eq(mallctl("tcache.create", (void *)&tcache_ind, &sz,
-	    NULL, 0), 0, "Unexpected mallctl failure");
+	expect_d_eq(mallctl("tcache.create", (void *)&tcache_ind, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
 
 	for (i = 0; i < sizeof(tcache_sizes) / sizeof(size_t); i++) {
 		sz = tcache_sizes[i];
@@ -195,13 +198,14 @@ TEST_BEGIN(test_decay_ticks) {
 		tick1 = ticker_geom_read(decay_ticker);
 		expect_u32_ne(tick1, tick0,
 		    "Expected ticker to tick during tcache fill "
-		    "(sz=%zu)", sz);
+		    "(sz=%zu)",
+		    sz);
 		/* tcache flush. */
 		dallocx(p, MALLOCX_TCACHE(tcache_ind));
 		tick0 = ticker_geom_read(decay_ticker);
 		expect_d_eq(mallctl("tcache.flush", NULL, NULL,
-		    (void *)&tcache_ind, sizeof(unsigned)), 0,
-		    "Unexpected mallctl failure");
+		                (void *)&tcache_ind, sizeof(unsigned)),
+		    0, "Unexpected mallctl failure");
 		tick1 = ticker_geom_read(decay_ticker);
 
 		/* Will only tick if it's in tcache. */
@@ -231,11 +235,11 @@ decay_ticker_helper(unsigned arena_ind, int flags, bool dirty, ssize_t dt,
 	 * cached slab were to repeatedly come and go during looping, it could
 	 * prevent the decay backlog ever becoming empty.
 	 */
-	void *p = do_mallocx(1, flags);
+	void    *p = do_mallocx(1, flags);
 	uint64_t dirty_npurge1, muzzy_npurge1;
 	do {
 		for (unsigned i = 0; i < ARENA_DECAY_NTICKS_PER_UPDATE / 2;
-		    i++) {
+		     i++) {
 			void *q = do_mallocx(1, flags);
 			dallocx(q, flags);
 		}
@@ -244,14 +248,15 @@ decay_ticker_helper(unsigned arena_ind, int flags, bool dirty, ssize_t dt,
 
 		nstime_add(&time_mock, &update_interval);
 		nstime_update(&time);
-	} while (nstime_compare(&time, &deadline) <= 0 && ((dirty_npurge1 ==
-	    dirty_npurge0 && muzzy_npurge1 == muzzy_npurge0) ||
-	    !terminate_asap));
+	} while (nstime_compare(&time, &deadline) <= 0
+	    && ((dirty_npurge1 == dirty_npurge0
+	            && muzzy_npurge1 == muzzy_npurge0)
+	        || !terminate_asap));
 	dallocx(p, flags);
 
 	if (config_stats) {
-		expect_u64_gt(dirty_npurge1 + muzzy_npurge1, dirty_npurge0 +
-		    muzzy_npurge0, "Expected purging to occur");
+		expect_u64_gt(dirty_npurge1 + muzzy_npurge1,
+		    dirty_npurge0 + muzzy_npurge0, "Expected purging to occur");
 	}
 #undef NINTERVALS
 }
@@ -260,11 +265,11 @@ TEST_BEGIN(test_decay_ticker) {
 	test_skip_if(is_background_thread_enabled());
 	test_skip_if(opt_hpa);
 #define NPS 2048
-	ssize_t ddt = opt_dirty_decay_ms;
-	ssize_t mdt = opt_muzzy_decay_ms;
+	ssize_t  ddt = opt_dirty_decay_ms;
+	ssize_t  mdt = opt_muzzy_decay_ms;
 	unsigned arena_ind = do_arena_create(ddt, mdt);
-	int flags = (MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
-	void *ps[NPS];
+	int      flags = (MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
+	void    *ps[NPS];
 
 	/*
 	 * Allocate a bunch of large objects, pause the clock, deallocate every
@@ -274,8 +279,9 @@ TEST_BEGIN(test_decay_ticker) {
 	 */
 	size_t large;
 	size_t sz = sizeof(size_t);
-	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&large, &sz, NULL,
-	    0), 0, "Unexpected mallctl failure");
+	expect_d_eq(
+	    mallctl("arenas.lextent.0.size", (void *)&large, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
 
 	do_purge(arena_ind);
 	uint64_t dirty_npurge0 = get_arena_dirty_npurge(arena_ind);
@@ -302,9 +308,9 @@ TEST_BEGIN(test_decay_ticker) {
 		    "Expected nstime_update() to be called");
 	}
 
-	decay_ticker_helper(arena_ind, flags, true, ddt, dirty_npurge0,
-	    muzzy_npurge0, true);
-	decay_ticker_helper(arena_ind, flags, false, ddt+mdt, dirty_npurge0,
+	decay_ticker_helper(
+	    arena_ind, flags, true, ddt, dirty_npurge0, muzzy_npurge0, true);
+	decay_ticker_helper(arena_ind, flags, false, ddt + mdt, dirty_npurge0,
 	    muzzy_npurge0, false);
 
 	do_arena_destroy(arena_ind);
@@ -319,16 +325,17 @@ TEST_BEGIN(test_decay_nonmonotonic) {
 	test_skip_if(is_background_thread_enabled());
 	test_skip_if(opt_hpa);
 #define NPS (SMOOTHSTEP_NSTEPS + 1)
-	int flags = (MALLOCX_ARENA(0) | MALLOCX_TCACHE_NONE);
-	void *ps[NPS];
+	int      flags = (MALLOCX_ARENA(0) | MALLOCX_TCACHE_NONE);
+	void    *ps[NPS];
 	uint64_t npurge0 = 0;
 	uint64_t npurge1 = 0;
-	size_t sz, large0;
+	size_t   sz, large0;
 	unsigned i, nupdates0;
 
 	sz = sizeof(size_t);
-	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL,
-	    0), 0, "Unexpected mallctl failure");
+	expect_d_eq(
+	    mallctl("arenas.lextent.0.size", (void *)&large0, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
 
 	expect_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl failure");
@@ -380,15 +387,15 @@ TEST_BEGIN(test_decay_now) {
 	unsigned arena_ind = do_arena_create(0, 0);
 	expect_zu_eq(get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages");
 	expect_zu_eq(get_arena_pmuzzy(arena_ind), 0, "Unexpected muzzy pages");
-	size_t sizes[] = {16, PAGE<<2, HUGEPAGE<<2};
+	size_t sizes[] = {16, PAGE << 2, HUGEPAGE << 2};
 	/* Verify that dirty/muzzy pages never linger after deallocation. */
-	for (unsigned i = 0; i < sizeof(sizes)/sizeof(size_t); i++) {
+	for (unsigned i = 0; i < sizeof(sizes) / sizeof(size_t); i++) {
 		size_t size = sizes[i];
 		generate_dirty(arena_ind, size);
-		expect_zu_eq(get_arena_pdirty(arena_ind), 0,
-		    "Unexpected dirty pages");
-		expect_zu_eq(get_arena_pmuzzy(arena_ind), 0,
-		    "Unexpected muzzy pages");
+		expect_zu_eq(
+		    get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages");
+		expect_zu_eq(
+		    get_arena_pmuzzy(arena_ind), 0, "Unexpected muzzy pages");
 	}
 	do_arena_destroy(arena_ind);
 }
@@ -399,12 +406,12 @@ TEST_BEGIN(test_decay_never) {
 	test_skip_if(opt_hpa);
 
 	unsigned arena_ind = do_arena_create(-1, -1);
-	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+	int      flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
 	expect_zu_eq(get_arena_pdirty(arena_ind), 0, "Unexpected dirty pages");
 	expect_zu_eq(get_arena_pmuzzy(arena_ind), 0, "Unexpected muzzy pages");
-	size_t sizes[] = {16, PAGE<<2, HUGEPAGE<<2};
-	void *ptrs[sizeof(sizes)/sizeof(size_t)];
-	for (unsigned i = 0; i < sizeof(sizes)/sizeof(size_t); i++) {
+	size_t sizes[] = {16, PAGE << 2, HUGEPAGE << 2};
+	void  *ptrs[sizeof(sizes) / sizeof(size_t)];
+	for (unsigned i = 0; i < sizeof(sizes) / sizeof(size_t); i++) {
 		ptrs[i] = do_mallocx(sizes[i], flags);
 	}
 	/* Verify that each deallocation generates additional dirty pages. */
@@ -419,7 +426,7 @@ TEST_BEGIN(test_decay_never) {
 		expect_zu_eq(pdirty_prev, 0, "Unexpected dirty pages");
 	}
 	expect_zu_eq(pmuzzy_prev, 0, "Unexpected muzzy pages");
-	for (unsigned i = 0; i < sizeof(sizes)/sizeof(size_t); i++) {
+	for (unsigned i = 0; i < sizeof(sizes) / sizeof(size_t); i++) {
 		dallocx(ptrs[i], flags);
 		size_t pdirty = get_arena_pdirty(arena_ind);
 		size_t pmuzzy = get_arena_pmuzzy(arena_ind);
@@ -434,10 +441,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_decay_ticks,
-	    test_decay_ticker,
-	    test_decay_nonmonotonic,
-	    test_decay_now,
-	    test_decay_never);
+	return test(test_decay_ticks, test_decay_ticker,
+	    test_decay_nonmonotonic, test_decay_now, test_decay_never);
 }
diff --git a/test/unit/arena_reset.c b/test/unit/arena_reset.c
index 42fa9a5d..3e0f3d75 100644
--- a/test/unit/arena_reset.c
+++ b/test/unit/arena_reset.c
@@ -1,5 +1,5 @@
 #ifndef ARENA_RESET_PROF_C_
-#include "test/jemalloc_test.h"
+#	include "test/jemalloc_test.h"
 #endif
 
 #include "jemalloc/internal/extent_mmap.h"
@@ -10,7 +10,7 @@
 static unsigned
 get_nsizes_impl(const char *cmd) {
 	unsigned ret;
-	size_t z;
+	size_t   z;
 
 	z = sizeof(unsigned);
 	expect_d_eq(mallctl(cmd, (void *)&ret, &z, NULL, 0), 0,
@@ -37,12 +37,12 @@ get_size_impl(const char *cmd, size_t ind) {
 	size_t miblen = 4;
 
 	z = sizeof(size_t);
-	expect_d_eq(mallctlnametomib(cmd, mib, &miblen),
-	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
+	expect_d_eq(mallctlnametomib(cmd, mib, &miblen), 0,
+	    "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
 	mib[2] = ind;
 	z = sizeof(size_t);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0),
-	    0, "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&ret, &z, NULL, 0), 0,
+	    "Unexpected mallctlbymib([\"%s\", %zu], ...) failure", cmd, ind);
 
 	return ret;
 }
@@ -61,8 +61,8 @@ get_large_size(size_t ind) {
 static size_t
 vsalloc(tsdn_t *tsdn, const void *ptr) {
 	emap_full_alloc_ctx_t full_alloc_ctx;
-	bool missing = emap_full_alloc_ctx_try_lookup(tsdn, &arena_emap_global,
-	    ptr, &full_alloc_ctx);
+	bool                  missing = emap_full_alloc_ctx_try_lookup(
+            tsdn, &arena_emap_global, ptr, &full_alloc_ctx);
 	if (missing) {
 		return 0;
 	}
@@ -84,20 +84,21 @@ vsalloc(tsdn_t *tsdn, const void *ptr) {
 static unsigned
 do_arena_create(extent_hooks_t *h) {
 	unsigned arena_ind;
-	size_t sz = sizeof(unsigned);
-	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
-	    (void *)(h != NULL ? &h : NULL), (h != NULL ? sizeof(h) : 0)), 0,
-	    "Unexpected mallctl() failure");
+	size_t   sz = sizeof(unsigned);
+	expect_d_eq(
+	    mallctl("arenas.create", (void *)&arena_ind, &sz,
+	        (void *)(h != NULL ? &h : NULL), (h != NULL ? sizeof(h) : 0)),
+	    0, "Unexpected mallctl() failure");
 	return arena_ind;
 }
 
 static void
 do_arena_reset_pre(unsigned arena_ind, void ***ptrs, unsigned *nptrs) {
-#define NLARGE	32
+#define NLARGE 32
 	unsigned nsmall, nlarge, i;
-	size_t sz;
-	int flags;
-	tsdn_t *tsdn;
+	size_t   sz;
+	int      flags;
+	tsdn_t  *tsdn;
 
 	flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
 
@@ -132,14 +133,14 @@ do_arena_reset_pre(unsigned arena_ind, void ***ptrs, unsigned *nptrs) {
 
 static void
 do_arena_reset_post(void **ptrs, unsigned nptrs, unsigned arena_ind) {
-	tsdn_t *tsdn;
+	tsdn_t  *tsdn;
 	unsigned i;
 
 	tsdn = tsdn_fetch();
 
 	if (have_background_thread) {
-		malloc_mutex_lock(tsdn,
-		    &background_thread_info_get(arena_ind)->mtx);
+		malloc_mutex_lock(
+		    tsdn, &background_thread_info_get(arena_ind)->mtx);
 	}
 	/* Verify allocations no longer exist. */
 	for (i = 0; i < nptrs; i++) {
@@ -147,8 +148,8 @@ do_arena_reset_post(void **ptrs, unsigned nptrs, unsigned arena_ind) {
 		    "Allocation should no longer exist");
 	}
 	if (have_background_thread) {
-		malloc_mutex_unlock(tsdn,
-		    &background_thread_info_get(arena_ind)->mtx);
+		malloc_mutex_unlock(
+		    tsdn, &background_thread_info_get(arena_ind)->mtx);
 	}
 
 	free(ptrs);
@@ -159,7 +160,7 @@ do_arena_reset_destroy(const char *name, unsigned arena_ind) {
 	size_t mib[3];
 	size_t miblen;
 
-	miblen = sizeof(mib)/sizeof(size_t);
+	miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib(name, mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
@@ -179,7 +180,7 @@ do_arena_destroy(unsigned arena_ind) {
 
 TEST_BEGIN(test_arena_reset) {
 	unsigned arena_ind;
-	void **ptrs;
+	void   **ptrs;
 	unsigned nptrs;
 
 	arena_ind = do_arena_create(NULL);
@@ -191,23 +192,25 @@ TEST_END
 
 static bool
 arena_i_initialized(unsigned arena_ind, bool refresh) {
-	bool initialized;
+	bool   initialized;
 	size_t mib[3];
 	size_t miblen, sz;
 
 	if (refresh) {
 		uint64_t epoch = 1;
-		expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
-		    sizeof(epoch)), 0, "Unexpected mallctl() failure");
+		expect_d_eq(
+		    mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+		    0, "Unexpected mallctl() failure");
 	}
 
-	miblen = sizeof(mib)/sizeof(size_t);
+	miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("arena.0.initialized", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)arena_ind;
 	sz = sizeof(initialized);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&initialized, &sz, NULL,
-	    0), 0, "Unexpected mallctlbymib() failure");
+	expect_d_eq(
+	    mallctlbymib(mib, miblen, (void *)&initialized, &sz, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
 
 	return initialized;
 }
@@ -220,7 +223,7 @@ TEST_END
 
 TEST_BEGIN(test_arena_destroy_hooks_default) {
 	unsigned arena_ind, arena_ind_another, arena_ind_prev;
-	void **ptrs;
+	void   **ptrs;
 	unsigned nptrs;
 
 	arena_ind = do_arena_create(NULL);
@@ -249,26 +252,27 @@ TEST_BEGIN(test_arena_destroy_hooks_default) {
 	arena_ind_prev = arena_ind;
 	arena_ind = do_arena_create(NULL);
 	do_arena_reset_pre(arena_ind, &ptrs, &nptrs);
-	expect_u_eq(arena_ind, arena_ind_prev,
-	    "Arena index should have been recycled");
+	expect_u_eq(
+	    arena_ind, arena_ind_prev, "Arena index should have been recycled");
 	do_arena_destroy(arena_ind);
 	do_arena_reset_post(ptrs, nptrs, arena_ind);
 
 	do_arena_destroy(arena_ind_another);
 
 	/* Try arena.create with custom hooks. */
-	size_t sz = sizeof(extent_hooks_t *);
+	size_t          sz = sizeof(extent_hooks_t *);
 	extent_hooks_t *a0_default_hooks;
 	expect_d_eq(mallctl("arena.0.extent_hooks", (void *)&a0_default_hooks,
-	    &sz, NULL, 0), 0, "Unexpected mallctlnametomib() failure");
+	                &sz, NULL, 0),
+	    0, "Unexpected mallctlnametomib() failure");
 
 	/* Default impl; but wrapped as "customized". */
-	extent_hooks_t new_hooks = *a0_default_hooks;
+	extent_hooks_t  new_hooks = *a0_default_hooks;
 	extent_hooks_t *hook = &new_hooks;
 	sz = sizeof(unsigned);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz,
-	    (void *)&hook, sizeof(void *)), 0,
-	    "Unexpected mallctl() failure");
+	                (void *)&hook, sizeof(void *)),
+	    0, "Unexpected mallctl() failure");
 	do_arena_destroy(arena_ind);
 }
 TEST_END
@@ -280,13 +284,15 @@ TEST_END
 static bool
 extent_dalloc_unmap(extent_hooks_t *extent_hooks, void *addr, size_t size,
     bool committed, unsigned arena_ind) {
-	TRACE_HOOK("%s(extent_hooks=%p, addr=%p, size=%zu, committed=%s, "
-	    "arena_ind=%u)\n", __func__, extent_hooks, addr, size, committed ?
-	    "true" : "false", arena_ind);
+	TRACE_HOOK(
+	    "%s(extent_hooks=%p, addr=%p, size=%zu, committed=%s, "
+	    "arena_ind=%u)\n",
+	    __func__, extent_hooks, addr, size, committed ? "true" : "false",
+	    arena_ind);
 	expect_ptr_eq(extent_hooks, &hooks,
 	    "extent_hooks should be same as pointer used to set hooks");
-	expect_ptr_eq(extent_hooks->dalloc, extent_dalloc_unmap,
-	    "Wrong hook function");
+	expect_ptr_eq(
+	    extent_hooks->dalloc, extent_dalloc_unmap, "Wrong hook function");
 	called_dalloc = true;
 	if (!try_dalloc) {
 		return true;
@@ -301,21 +307,15 @@ extent_dalloc_unmap(extent_hooks_t *extent_hooks, void *addr, size_t size,
 
 static extent_hooks_t hooks_orig;
 
-static extent_hooks_t hooks_unmap = {
-	extent_alloc_hook,
-	extent_dalloc_unmap, /* dalloc */
-	extent_destroy_hook,
-	extent_commit_hook,
-	extent_decommit_hook,
-	extent_purge_lazy_hook,
-	extent_purge_forced_hook,
-	extent_split_hook,
-	extent_merge_hook
-};
+static extent_hooks_t hooks_unmap = {extent_alloc_hook,
+    extent_dalloc_unmap, /* dalloc */
+    extent_destroy_hook, extent_commit_hook, extent_decommit_hook,
+    extent_purge_lazy_hook, extent_purge_forced_hook, extent_split_hook,
+    extent_merge_hook};
 
 TEST_BEGIN(test_arena_destroy_hooks_unmap) {
 	unsigned arena_ind;
-	void **ptrs;
+	void   **ptrs;
 	unsigned nptrs;
 
 	extent_hooks_prep();
@@ -353,9 +353,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_arena_reset,
-	    test_arena_destroy_initial,
-	    test_arena_destroy_hooks_default,
-	    test_arena_destroy_hooks_unmap);
+	return test(test_arena_reset, test_arena_destroy_initial,
+	    test_arena_destroy_hooks_default, test_arena_destroy_hooks_unmap);
 }
diff --git a/test/unit/atomic.c b/test/unit/atomic.c
index 6c4b85e5..b4f59431 100644
--- a/test/unit/atomic.c
+++ b/test/unit/atomic.c
@@ -187,7 +187,6 @@ TEST_BEGIN(test_atomic_u64) {
 }
 TEST_END
 
-
 TEST_STRUCT(uint32_t, u32);
 TEST_BEGIN(test_atomic_u32) {
 	INTEGER_TEST_BODY(uint32_t, u32);
@@ -212,7 +211,6 @@ TEST_BEGIN(test_atomic_zd) {
 }
 TEST_END
 
-
 TEST_STRUCT(unsigned, u);
 TEST_BEGIN(test_atomic_u) {
 	INTEGER_TEST_BODY(unsigned, u);
@@ -221,11 +219,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_atomic_u64,
-	    test_atomic_u32,
-	    test_atomic_p,
-	    test_atomic_zu,
-	    test_atomic_zd,
-	    test_atomic_u);
+	return test(test_atomic_u64, test_atomic_u32, test_atomic_p,
+	    test_atomic_zu, test_atomic_zd, test_atomic_u);
 }
diff --git a/test/unit/background_thread.c b/test/unit/background_thread.c
index c60010a8..819a81a6 100644
--- a/test/unit/background_thread.c
+++ b/test/unit/background_thread.c
@@ -4,14 +4,13 @@
 
 static void
 test_switch_background_thread_ctl(bool new_val) {
-	bool e0, e1;
+	bool   e0, e1;
 	size_t sz = sizeof(bool);
 
 	e1 = new_val;
-	expect_d_eq(mallctl("background_thread", (void *)&e0, &sz,
-	    &e1, sz), 0, "Unexpected mallctl() failure");
-	expect_b_eq(e0, !e1,
-	    "background_thread should be %d before.\n", !e1);
+	expect_d_eq(mallctl("background_thread", (void *)&e0, &sz, &e1, sz), 0,
+	    "Unexpected mallctl() failure");
+	expect_b_eq(e0, !e1, "background_thread should be %d before.\n", !e1);
 	if (e1) {
 		expect_zu_gt(n_background_threads, 0,
 		    "Number of background threads should be non zero.\n");
@@ -23,14 +22,13 @@ test_switch_background_thread_ctl(bool new_val) {
 
 static void
 test_repeat_background_thread_ctl(bool before) {
-	bool e0, e1;
+	bool   e0, e1;
 	size_t sz = sizeof(bool);
 
 	e1 = before;
-	expect_d_eq(mallctl("background_thread", (void *)&e0, &sz,
-	    &e1, sz), 0, "Unexpected mallctl() failure");
-	expect_b_eq(e0, before,
-	    "background_thread should be %d.\n", before);
+	expect_d_eq(mallctl("background_thread", (void *)&e0, &sz, &e1, sz), 0,
+	    "Unexpected mallctl() failure");
+	expect_b_eq(e0, before, "background_thread should be %d.\n", before);
 	if (e1) {
 		expect_zu_gt(n_background_threads, 0,
 		    "Number of background threads should be non zero.\n");
@@ -43,15 +41,15 @@ test_repeat_background_thread_ctl(bool before) {
 TEST_BEGIN(test_background_thread_ctl) {
 	test_skip_if(!have_background_thread);
 
-	bool e0, e1;
+	bool   e0, e1;
 	size_t sz = sizeof(bool);
 
-	expect_d_eq(mallctl("opt.background_thread", (void *)&e0, &sz,
-	    NULL, 0), 0, "Unexpected mallctl() failure");
-	expect_d_eq(mallctl("background_thread", (void *)&e1, &sz,
-	    NULL, 0), 0, "Unexpected mallctl() failure");
-	expect_b_eq(e0, e1,
-	    "Default and opt.background_thread does not match.\n");
+	expect_d_eq(mallctl("opt.background_thread", (void *)&e0, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
+	expect_d_eq(mallctl("background_thread", (void *)&e1, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+	expect_b_eq(
+	    e0, e1, "Default and opt.background_thread does not match.\n");
 	if (e0) {
 		test_switch_background_thread_ctl(false);
 	}
@@ -75,7 +73,7 @@ TEST_BEGIN(test_background_thread_running) {
 	test_skip_if(!config_stats);
 
 #if defined(JEMALLOC_BACKGROUND_THREAD)
-	tsd_t *tsd = tsd_fetch();
+	tsd_t                    *tsd = tsd_fetch();
 	background_thread_info_t *info = &background_thread_info[0];
 
 	test_repeat_background_thread_ctl(false);
@@ -113,6 +111,5 @@ int
 main(void) {
 	/* Background_thread creation tests reentrancy naturally. */
 	return test_no_reentrancy(
-	    test_background_thread_ctl,
-	    test_background_thread_running);
+	    test_background_thread_ctl, test_background_thread_running);
 }
diff --git a/test/unit/background_thread_enable.c b/test/unit/background_thread_enable.c
index 3a2d55ac..57f26c4b 100644
--- a/test/unit/background_thread_enable.c
+++ b/test/unit/background_thread_enable.c
@@ -1,6 +1,7 @@
 #include "test/jemalloc_test.h"
 
-const char *malloc_conf = "background_thread:false,narenas:1,max_background_threads:8";
+const char *malloc_conf =
+    "background_thread:false,narenas:1,max_background_threads:8";
 
 static unsigned
 max_test_narenas(void) {
@@ -21,14 +22,14 @@ TEST_BEGIN(test_deferred) {
 	test_skip_if(!have_background_thread);
 
 	unsigned id;
-	size_t sz_u = sizeof(unsigned);
+	size_t   sz_u = sizeof(unsigned);
 
 	for (unsigned i = 0; i < max_test_narenas(); i++) {
 		expect_d_eq(mallctl("arenas.create", &id, &sz_u, NULL, 0), 0,
 		    "Failed to create arena");
 	}
 
-	bool enable = true;
+	bool   enable = true;
 	size_t sz_b = sizeof(bool);
 	expect_d_eq(mallctl("background_thread", NULL, NULL, &enable, sz_b), 0,
 	    "Failed to enable background threads");
@@ -44,29 +45,32 @@ TEST_BEGIN(test_max_background_threads) {
 	size_t max_n_thds;
 	size_t opt_max_n_thds;
 	size_t sz_m = sizeof(max_n_thds);
-	expect_d_eq(mallctl("opt.max_background_threads",
-	    &opt_max_n_thds, &sz_m, NULL, 0), 0,
-	    "Failed to get opt.max_background_threads");
-	expect_d_eq(mallctl("max_background_threads", &max_n_thds, &sz_m, NULL,
-	    0), 0, "Failed to get max background threads");
+	expect_d_eq(mallctl("opt.max_background_threads", &opt_max_n_thds,
+	                &sz_m, NULL, 0),
+	    0, "Failed to get opt.max_background_threads");
+	expect_d_eq(
+	    mallctl("max_background_threads", &max_n_thds, &sz_m, NULL, 0), 0,
+	    "Failed to get max background threads");
 	expect_zu_eq(opt_max_n_thds, max_n_thds,
 	    "max_background_threads and "
 	    "opt.max_background_threads should match");
-	expect_d_eq(mallctl("max_background_threads", NULL, NULL, &max_n_thds,
-	    sz_m), 0, "Failed to set max background threads");
+	expect_d_eq(
+	    mallctl("max_background_threads", NULL, NULL, &max_n_thds, sz_m), 0,
+	    "Failed to set max background threads");
 	size_t size_zero = 0;
-	expect_d_ne(mallctl("max_background_threads", NULL, NULL, &size_zero,
-	    sz_m), 0, "Should not allow zero background threads");
+	expect_d_ne(
+	    mallctl("max_background_threads", NULL, NULL, &size_zero, sz_m), 0,
+	    "Should not allow zero background threads");
 
 	unsigned id;
-	size_t sz_u = sizeof(unsigned);
+	size_t   sz_u = sizeof(unsigned);
 
 	for (unsigned i = 0; i < max_test_narenas(); i++) {
 		expect_d_eq(mallctl("arenas.create", &id, &sz_u, NULL, 0), 0,
 		    "Failed to create arena");
 	}
 
-	bool enable = true;
+	bool   enable = true;
 	size_t sz_b = sizeof(bool);
 	expect_d_eq(mallctl("background_thread", NULL, NULL, &enable, sz_b), 0,
 	    "Failed to enable background threads");
@@ -75,16 +79,18 @@ TEST_BEGIN(test_max_background_threads) {
 	size_t new_max_thds = max_n_thds - 1;
 	if (new_max_thds > 0) {
 		expect_d_eq(mallctl("max_background_threads", NULL, NULL,
-		    &new_max_thds, sz_m), 0,
-		    "Failed to set max background threads");
+		                &new_max_thds, sz_m),
+		    0, "Failed to set max background threads");
 		expect_zu_eq(n_background_threads, new_max_thds,
 		    "Number of background threads should decrease by 1.\n");
 	}
 	new_max_thds = 1;
-	expect_d_eq(mallctl("max_background_threads", NULL, NULL, &new_max_thds,
-	    sz_m), 0, "Failed to set max background threads");
-	expect_d_ne(mallctl("max_background_threads", NULL, NULL, &size_zero,
-	    sz_m), 0, "Should not allow zero background threads");
+	expect_d_eq(
+	    mallctl("max_background_threads", NULL, NULL, &new_max_thds, sz_m),
+	    0, "Failed to set max background threads");
+	expect_d_ne(
+	    mallctl("max_background_threads", NULL, NULL, &size_zero, sz_m), 0,
+	    "Should not allow zero background threads");
 	expect_zu_eq(n_background_threads, new_max_thds,
 	    "Number of background threads should be 1.\n");
 }
@@ -92,7 +98,5 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-		test_deferred,
-		test_max_background_threads);
+	return test_no_reentrancy(test_deferred, test_max_background_threads);
 }
diff --git a/test/unit/base.c b/test/unit/base.c
index 3e46626e..e6e82435 100644
--- a/test/unit/base.c
+++ b/test/unit/base.c
@@ -3,37 +3,31 @@
 #include "test/extent_hooks.h"
 
 static extent_hooks_t hooks_null = {
-	extent_alloc_hook,
-	NULL, /* dalloc */
-	NULL, /* destroy */
-	NULL, /* commit */
-	NULL, /* decommit */
-	NULL, /* purge_lazy */
-	NULL, /* purge_forced */
-	NULL, /* split */
-	NULL /* merge */
+    extent_alloc_hook, NULL, /* dalloc */
+    NULL,                    /* destroy */
+    NULL,                    /* commit */
+    NULL,                    /* decommit */
+    NULL,                    /* purge_lazy */
+    NULL,                    /* purge_forced */
+    NULL,                    /* split */
+    NULL                     /* merge */
 };
 
 static extent_hooks_t hooks_not_null = {
-	extent_alloc_hook,
-	extent_dalloc_hook,
-	extent_destroy_hook,
-	NULL, /* commit */
-	extent_decommit_hook,
-	extent_purge_lazy_hook,
-	extent_purge_forced_hook,
-	NULL, /* split */
-	NULL /* merge */
+    extent_alloc_hook, extent_dalloc_hook, extent_destroy_hook,
+    NULL, /* commit */
+    extent_decommit_hook, extent_purge_lazy_hook, extent_purge_forced_hook,
+    NULL, /* split */
+    NULL  /* merge */
 };
 
 TEST_BEGIN(test_base_hooks_default) {
 	base_t *base;
-	size_t allocated0, allocated1, edata_allocated,
-	    rtree_allocated, resident, mapped, n_thp;
+	size_t  allocated0, allocated1, edata_allocated, rtree_allocated,
+	    resident, mapped, n_thp;
 
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
-	base = base_new(tsdn, 0,
-	    (extent_hooks_t *)&ehooks_default_extent_hooks,
+	base = base_new(tsdn, 0, (extent_hooks_t *)&ehooks_default_extent_hooks,
 	    /* metadata_use_hooks */ true);
 
 	if (config_stats) {
@@ -42,13 +36,13 @@ TEST_BEGIN(test_base_hooks_default) {
 		expect_zu_ge(allocated0, sizeof(base_t),
 		    "Base header should count as allocated");
 		if (opt_metadata_thp == metadata_thp_always) {
-			expect_zu_gt(n_thp, 0,
-			    "Base should have 1 THP at least.");
+			expect_zu_gt(
+			    n_thp, 0, "Base should have 1 THP at least.");
 		}
 	}
 
-	expect_ptr_not_null(base_alloc(tsdn, base, 42, 1),
-	    "Unexpected base_alloc() failure");
+	expect_ptr_not_null(
+	    base_alloc(tsdn, base, 42, 1), "Unexpected base_alloc() failure");
 
 	if (config_stats) {
 		base_stats_get(tsdn, base, &allocated1, &edata_allocated,
@@ -63,9 +57,9 @@ TEST_END
 
 TEST_BEGIN(test_base_hooks_null) {
 	extent_hooks_t hooks_orig;
-	base_t *base;
-	size_t allocated0, allocated1, edata_allocated,
-	    rtree_allocated, resident, mapped, n_thp;
+	base_t        *base;
+	size_t         allocated0, allocated1, edata_allocated, rtree_allocated,
+	    resident, mapped, n_thp;
 
 	extent_hooks_prep();
 	try_dalloc = false;
@@ -86,13 +80,13 @@ TEST_BEGIN(test_base_hooks_null) {
 		expect_zu_ge(allocated0, sizeof(base_t),
 		    "Base header should count as allocated");
 		if (opt_metadata_thp == metadata_thp_always) {
-			expect_zu_gt(n_thp, 0,
-			    "Base should have 1 THP at least.");
+			expect_zu_gt(
+			    n_thp, 0, "Base should have 1 THP at least.");
 		}
 	}
 
-	expect_ptr_not_null(base_alloc(tsdn, base, 42, 1),
-	    "Unexpected base_alloc() failure");
+	expect_ptr_not_null(
+	    base_alloc(tsdn, base, 42, 1), "Unexpected base_alloc() failure");
 
 	if (config_stats) {
 		base_stats_get(tsdn, base, &allocated1, &edata_allocated,
@@ -109,8 +103,8 @@ TEST_END
 
 TEST_BEGIN(test_base_hooks_not_null) {
 	extent_hooks_t hooks_orig;
-	base_t *base;
-	void *p, *q, *r, *r_exp;
+	base_t        *base;
+	void          *p, *q, *r, *r_exp;
 
 	extent_hooks_prep();
 	try_dalloc = false;
@@ -133,33 +127,34 @@ TEST_BEGIN(test_base_hooks_not_null) {
 	 */
 	{
 		const size_t alignments[] = {
-			1,
-			QUANTUM,
-			QUANTUM << 1,
-			CACHELINE,
-			CACHELINE << 1,
+		    1,
+		    QUANTUM,
+		    QUANTUM << 1,
+		    CACHELINE,
+		    CACHELINE << 1,
 		};
 		unsigned i;
 
 		for (i = 0; i < sizeof(alignments) / sizeof(size_t); i++) {
 			size_t alignment = alignments[i];
-			size_t align_ceil = ALIGNMENT_CEILING(alignment,
-			    QUANTUM);
+			size_t align_ceil = ALIGNMENT_CEILING(
+			    alignment, QUANTUM);
 			p = base_alloc(tsdn, base, 1, alignment);
-			expect_ptr_not_null(p,
-			    "Unexpected base_alloc() failure");
+			expect_ptr_not_null(
+			    p, "Unexpected base_alloc() failure");
 			expect_ptr_eq(p,
-			    (void *)(ALIGNMENT_CEILING((uintptr_t)p,
-			    alignment)), "Expected quantum alignment");
+			    (void *)(ALIGNMENT_CEILING(
+			        (uintptr_t)p, alignment)),
+			    "Expected quantum alignment");
 			q = base_alloc(tsdn, base, alignment, alignment);
-			expect_ptr_not_null(q,
-			    "Unexpected base_alloc() failure");
+			expect_ptr_not_null(
+			    q, "Unexpected base_alloc() failure");
 			expect_ptr_eq((void *)((uintptr_t)p + align_ceil), q,
 			    "Minimal allocation should take up %zu bytes",
 			    align_ceil);
 			r = base_alloc(tsdn, base, 1, alignment);
-			expect_ptr_not_null(r,
-			    "Unexpected base_alloc() failure");
+			expect_ptr_not_null(
+			    r, "Unexpected base_alloc() failure");
 			expect_ptr_eq((void *)((uintptr_t)q + align_ceil), r,
 			    "Minimal allocation should take up %zu bytes",
 			    align_ceil);
@@ -193,21 +188,18 @@ TEST_BEGIN(test_base_hooks_not_null) {
 	 * Check for proper alignment support when normal blocks are too small.
 	 */
 	{
-		const size_t alignments[] = {
-			HUGEPAGE,
-			HUGEPAGE << 1
-		};
-		unsigned i;
+		const size_t alignments[] = {HUGEPAGE, HUGEPAGE << 1};
+		unsigned     i;
 
 		for (i = 0; i < sizeof(alignments) / sizeof(size_t); i++) {
 			size_t alignment = alignments[i];
 			p = base_alloc(tsdn, base, QUANTUM, alignment);
-			expect_ptr_not_null(p,
-			    "Unexpected base_alloc() failure");
+			expect_ptr_not_null(
+			    p, "Unexpected base_alloc() failure");
 			expect_ptr_eq(p,
-			    (void *)(ALIGNMENT_CEILING((uintptr_t)p,
-			    alignment)), "Expected %zu-byte alignment",
-			    alignment);
+			    (void *)(ALIGNMENT_CEILING(
+			        (uintptr_t)p, alignment)),
+			    "Expected %zu-byte alignment", alignment);
 		}
 	}
 
@@ -237,12 +229,11 @@ TEST_BEGIN(test_base_ehooks_get_for_metadata_default_hook) {
 	base = base_new(tsdn, 0, &hooks, /* metadata_use_hooks */ false);
 	ehooks_t *ehooks = base_ehooks_get_for_metadata(base);
 	expect_true(ehooks_are_default(ehooks),
-		"Expected default extent hook functions pointer");
+	    "Expected default extent hook functions pointer");
 	base_delete(tsdn, base);
 }
 TEST_END
 
-
 TEST_BEGIN(test_base_ehooks_get_for_metadata_custom_hook) {
 	extent_hooks_prep();
 	memcpy(&hooks, &hooks_not_null, sizeof(extent_hooks_t));
@@ -251,17 +242,15 @@ TEST_BEGIN(test_base_ehooks_get_for_metadata_custom_hook) {
 	base = base_new(tsdn, 0, &hooks, /* metadata_use_hooks */ true);
 	ehooks_t *ehooks = base_ehooks_get_for_metadata(base);
 	expect_ptr_eq(&hooks, ehooks_get_extent_hooks_ptr(ehooks),
-		"Expected user-specified extend hook functions pointer");
+	    "Expected user-specified extend hook functions pointer");
 	base_delete(tsdn, base);
 }
 TEST_END
 
 int
 main(void) {
-	return test(
-	    test_base_hooks_default,
-	    test_base_hooks_null,
+	return test(test_base_hooks_default, test_base_hooks_null,
 	    test_base_hooks_not_null,
-            test_base_ehooks_get_for_metadata_default_hook,
-            test_base_ehooks_get_for_metadata_custom_hook);
+	    test_base_ehooks_get_for_metadata_default_hook,
+	    test_base_ehooks_get_for_metadata_custom_hook);
 }
diff --git a/test/unit/batch_alloc.c b/test/unit/batch_alloc.c
index 2bd5968e..0c61bf77 100644
--- a/test/unit/batch_alloc.c
+++ b/test/unit/batch_alloc.c
@@ -6,8 +6,8 @@ static void *global_ptrs[BATCH_MAX];
 #define PAGE_ALIGNED(ptr) (((uintptr_t)ptr & PAGE_MASK) == 0)
 
 static void
-verify_batch_basic(tsd_t *tsd, void **ptrs, size_t batch, size_t usize,
-    bool zero) {
+verify_batch_basic(
+    tsd_t *tsd, void **ptrs, size_t batch, size_t usize, bool zero) {
 	for (size_t i = 0; i < batch; ++i) {
 		void *p = ptrs[i];
 		expect_zu_eq(isalloc(tsd_tsdn(tsd), p), usize, "");
@@ -46,7 +46,8 @@ verify_batch_locality(tsd_t *tsd, void **ptrs, size_t batch, size_t usize,
 		assert(i > 0);
 		void *q = ptrs[i - 1];
 		expect_true((uintptr_t)p > (uintptr_t)q
-		    && (size_t)((uintptr_t)p - (uintptr_t)q) == usize, "");
+		        && (size_t)((uintptr_t)p - (uintptr_t)q) == usize,
+		    "");
 	}
 }
 
@@ -62,16 +63,17 @@ struct batch_alloc_packet_s {
 	void **ptrs;
 	size_t num;
 	size_t size;
-	int flags;
+	int    flags;
 };
 
 static size_t
 batch_alloc_wrapper(void **ptrs, size_t num, size_t size, int flags) {
 	batch_alloc_packet_t batch_alloc_packet = {ptrs, num, size, flags};
-	size_t filled;
-	size_t len = sizeof(size_t);
+	size_t               filled;
+	size_t               len = sizeof(size_t);
 	assert_d_eq(mallctl("experimental.batch_alloc", &filled, &len,
-	    &batch_alloc_packet, sizeof(batch_alloc_packet)), 0, "");
+	                &batch_alloc_packet, sizeof(batch_alloc_packet)),
+	    0, "");
 	return filled;
 }
 
@@ -79,16 +81,16 @@ static void
 test_wrapper(size_t size, size_t alignment, bool zero, unsigned arena_flag) {
 	tsd_t *tsd = tsd_fetch();
 	assert(tsd != NULL);
-	const size_t usize =
-	    (alignment != 0 ? sz_sa2u(size, alignment) : sz_s2u(size));
-	const szind_t ind = sz_size2index(usize);
+	const size_t      usize = (alignment != 0 ? sz_sa2u(size, alignment)
+	                                          : sz_s2u(size));
+	const szind_t     ind = sz_size2index(usize);
 	const bin_info_t *bin_info = &bin_infos[ind];
-	const unsigned nregs = bin_info->nregs;
+	const unsigned    nregs = bin_info->nregs;
 	assert(nregs > 0);
 	arena_t *arena;
 	if (arena_flag != 0) {
-		arena = arena_get(tsd_tsdn(tsd), MALLOCX_ARENA_GET(arena_flag),
-		    false);
+		arena = arena_get(
+		    tsd_tsdn(tsd), MALLOCX_ARENA_GET(arena_flag), false);
 	} else {
 		arena = arena_choose(tsd, NULL);
 	}
@@ -122,13 +124,13 @@ test_wrapper(size_t size, size_t alignment, bool zero, unsigned arena_flag) {
 			}
 			size_t batch = base + (size_t)j;
 			assert(batch < BATCH_MAX);
-			size_t filled = batch_alloc_wrapper(global_ptrs, batch,
-			    size, flags);
+			size_t filled = batch_alloc_wrapper(
+			    global_ptrs, batch, size, flags);
 			assert_zu_eq(filled, batch, "");
-			verify_batch_basic(tsd, global_ptrs, batch, usize,
-			    zero);
-			verify_batch_locality(tsd, global_ptrs, batch, usize,
-			    arena, nregs);
+			verify_batch_basic(
+			    tsd, global_ptrs, batch, usize, zero);
+			verify_batch_locality(
+			    tsd, global_ptrs, batch, usize, arena, nregs);
 			release_batch(global_ptrs, batch, usize);
 		}
 	}
@@ -153,9 +155,10 @@ TEST_END
 
 TEST_BEGIN(test_batch_alloc_manual_arena) {
 	unsigned arena_ind;
-	size_t len_unsigned = sizeof(unsigned);
-	assert_d_eq(mallctl("arenas.create", &arena_ind, &len_unsigned, NULL,
-	    0), 0, "");
+	size_t   len_unsigned = sizeof(unsigned);
+	assert_d_eq(
+	    mallctl("arenas.create", &arena_ind, &len_unsigned, NULL, 0), 0,
+	    "");
 	test_wrapper(11, 0, false, MALLOCX_ARENA(arena_ind));
 }
 TEST_END
@@ -180,10 +183,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_batch_alloc,
-	    test_batch_alloc_zero,
-	    test_batch_alloc_aligned,
-	    test_batch_alloc_manual_arena,
+	return test(test_batch_alloc, test_batch_alloc_zero,
+	    test_batch_alloc_aligned, test_batch_alloc_manual_arena,
 	    test_batch_alloc_large);
 }
diff --git a/test/unit/batcher.c b/test/unit/batcher.c
index df9d3e5b..1052ca27 100644
--- a/test/unit/batcher.c
+++ b/test/unit/batcher.c
@@ -5,7 +5,7 @@
 TEST_BEGIN(test_simple) {
 	enum { NELEMS_MAX = 10, DATA_BASE_VAL = 100, NRUNS = 5 };
 	batcher_t batcher;
-	size_t data[NELEMS_MAX];
+	size_t    data[NELEMS_MAX];
 	for (size_t nelems = 0; nelems < NELEMS_MAX; nelems++) {
 		batcher_init(&batcher, nelems);
 		for (int run = 0; run < NRUNS; run++) {
@@ -13,8 +13,8 @@ TEST_BEGIN(test_simple) {
 				data[i] = (size_t)-1;
 			}
 			for (size_t i = 0; i < nelems; i++) {
-				size_t idx = batcher_push_begin(TSDN_NULL,
-				    &batcher, 1);
+				size_t idx = batcher_push_begin(
+				    TSDN_NULL, &batcher, 1);
 				assert_zu_eq(i, idx, "Wrong index");
 				assert_zu_eq((size_t)-1, data[idx],
 				    "Expected uninitialized slot");
@@ -22,8 +22,8 @@ TEST_BEGIN(test_simple) {
 				batcher_push_end(TSDN_NULL, &batcher);
 			}
 			if (nelems > 0) {
-				size_t idx = batcher_push_begin(TSDN_NULL,
-				    &batcher, 1);
+				size_t idx = batcher_push_begin(
+				    TSDN_NULL, &batcher, 1);
 				assert_zu_eq(BATCHER_NO_IDX, idx,
 				    "Shouldn't be able to push into a full "
 				    "batcher");
@@ -51,7 +51,7 @@ TEST_BEGIN(test_simple) {
 TEST_END
 
 TEST_BEGIN(test_multi_push) {
-	size_t idx, nelems;
+	size_t    idx, nelems;
 	batcher_t batcher;
 	batcher_init(&batcher, 11);
 	/* Push two at a time, 5 times, for 10 total. */
@@ -82,13 +82,13 @@ enum {
 
 typedef struct stress_test_data_s stress_test_data_t;
 struct stress_test_data_s {
-	batcher_t batcher;
-	mtx_t pop_mtx;
+	batcher_t    batcher;
+	mtx_t        pop_mtx;
 	atomic_u32_t thread_id;
 
-	uint32_t elems_data[STRESS_TEST_ELEMS];
-	size_t push_count[STRESS_TEST_ELEMS];
-	size_t pop_count[STRESS_TEST_ELEMS];
+	uint32_t    elems_data[STRESS_TEST_ELEMS];
+	size_t      push_count[STRESS_TEST_ELEMS];
+	size_t      pop_count[STRESS_TEST_ELEMS];
 	atomic_zu_t atomic_push_count[STRESS_TEST_ELEMS];
 	atomic_zu_t atomic_pop_count[STRESS_TEST_ELEMS];
 };
@@ -108,7 +108,8 @@ get_nth_set(bool elems_owned[STRESS_TEST_ELEMS], size_t n) {
 			return i;
 		}
 	}
-	assert_not_reached("Asked for the %zu'th set element when < %zu are "
+	assert_not_reached(
+	    "Asked for the %zu'th set element when < %zu are "
 	    "set",
 	    n, n);
 	/* Just to silence a compiler warning. */
@@ -118,20 +119,19 @@ get_nth_set(bool elems_owned[STRESS_TEST_ELEMS], size_t n) {
 static void *
 stress_test_thd(void *arg) {
 	stress_test_data_t *data = arg;
-	size_t prng = atomic_fetch_add_u32(&data->thread_id, 1,
-	    ATOMIC_RELAXED);
+	size_t prng = atomic_fetch_add_u32(&data->thread_id, 1, ATOMIC_RELAXED);
 
 	size_t nelems_owned = 0;
-	bool elems_owned[STRESS_TEST_ELEMS] = {0};
+	bool   elems_owned[STRESS_TEST_ELEMS] = {0};
 	size_t local_push_count[STRESS_TEST_ELEMS] = {0};
 	size_t local_pop_count[STRESS_TEST_ELEMS] = {0};
 
 	for (int i = 0; i < STRESS_TEST_OPS; i++) {
-		size_t rnd = prng_range_zu(&prng,
-		    STRESS_TEST_PUSH_TO_POP_RATIO);
+		size_t rnd = prng_range_zu(
+		    &prng, STRESS_TEST_PUSH_TO_POP_RATIO);
 		if (rnd == 0 || nelems_owned == 0) {
-			size_t nelems = batcher_pop_begin(TSDN_NULL,
-			    &data->batcher);
+			size_t nelems = batcher_pop_begin(
+			    TSDN_NULL, &data->batcher);
 			if (nelems == BATCHER_NO_IDX) {
 				continue;
 			}
@@ -147,19 +147,18 @@ stress_test_thd(void *arg) {
 			}
 			batcher_pop_end(TSDN_NULL, &data->batcher);
 		} else {
-			size_t elem_to_push_idx = prng_range_zu(&prng,
-			    nelems_owned);
-			size_t elem = get_nth_set(elems_owned,
-			    elem_to_push_idx);
-			assert_true(
-			    elems_owned[elem],
+			size_t elem_to_push_idx = prng_range_zu(
+			    &prng, nelems_owned);
+			size_t elem = get_nth_set(
+			    elems_owned, elem_to_push_idx);
+			assert_true(elems_owned[elem],
 			    "Should own element we're about to pop");
 			elems_owned[elem] = false;
 			local_push_count[elem]++;
 			data->push_count[elem]++;
 			nelems_owned--;
-			size_t idx = batcher_push_begin(TSDN_NULL,
-			    &data->batcher, 1);
+			size_t idx = batcher_push_begin(
+			    TSDN_NULL, &data->batcher, 1);
 			assert_zu_ne(idx, BATCHER_NO_IDX,
 			    "Batcher can't be full -- we have one of its "
 			    "elems!");
@@ -171,10 +170,10 @@ stress_test_thd(void *arg) {
 	/* Push all local elems back, flush local counts to the shared ones. */
 	size_t push_idx = 0;
 	if (nelems_owned != 0) {
-		push_idx = batcher_push_begin(TSDN_NULL, &data->batcher,
-		    nelems_owned);
-		assert_zu_ne(BATCHER_NO_IDX, push_idx,
-		    "Should be space to push");
+		push_idx = batcher_push_begin(
+		    TSDN_NULL, &data->batcher, nelems_owned);
+		assert_zu_ne(
+		    BATCHER_NO_IDX, push_idx, "Should be space to push");
 	}
 	for (size_t i = 0; i < STRESS_TEST_ELEMS; i++) {
 		if (elems_owned[i]) {
@@ -183,12 +182,10 @@ stress_test_thd(void *arg) {
 			local_push_count[i]++;
 			data->push_count[i]++;
 		}
-		atomic_fetch_add_zu(
-		    &data->atomic_push_count[i], local_push_count[i],
-		    ATOMIC_RELAXED);
-		atomic_fetch_add_zu(
-		    &data->atomic_pop_count[i], local_pop_count[i],
-		    ATOMIC_RELAXED);
+		atomic_fetch_add_zu(&data->atomic_push_count[i],
+		    local_push_count[i], ATOMIC_RELAXED);
+		atomic_fetch_add_zu(&data->atomic_pop_count[i],
+		    local_pop_count[i], ATOMIC_RELAXED);
 	}
 	if (nelems_owned != 0) {
 		batcher_push_end(TSDN_NULL, &data->batcher);
@@ -223,8 +220,8 @@ TEST_BEGIN(test_stress) {
 		thd_join(threads[i], NULL);
 	}
 	for (int i = 0; i < STRESS_TEST_ELEMS; i++) {
-		assert_zu_ne(0, data.push_count[i],
-		    "Should have done something!");
+		assert_zu_ne(
+		    0, data.push_count[i], "Should have done something!");
 		assert_zu_eq(data.push_count[i], data.pop_count[i],
 		    "every element should be pushed and popped an equal number "
 		    "of times");
diff --git a/test/unit/bin_batching.c b/test/unit/bin_batching.c
index a20062fd..a422586d 100644
--- a/test/unit/bin_batching.c
+++ b/test/unit/bin_batching.c
@@ -9,10 +9,10 @@ enum {
 
 typedef struct stress_thread_data_s stress_thread_data_t;
 struct stress_thread_data_s {
-	unsigned thd_id;
+	unsigned     thd_id;
 	atomic_zu_t *ready_thds;
 	atomic_zu_t *done_thds;
-	void **to_dalloc;
+	void       **to_dalloc;
 };
 
 static atomic_zu_t push_failure_count;
@@ -68,19 +68,19 @@ increment_pop_attempt(size_t elems_to_pop) {
 static void
 increment_slab_dalloc_count(unsigned slab_dalloc_count, bool list_empty) {
 	if (slab_dalloc_count > 0) {
-		atomic_fetch_add_zu(&dalloc_nonzero_slab_count, 1,
-		    ATOMIC_RELAXED);
+		atomic_fetch_add_zu(
+		    &dalloc_nonzero_slab_count, 1, ATOMIC_RELAXED);
 	} else {
-		atomic_fetch_add_zu(&dalloc_zero_slab_count, 1,
-		    ATOMIC_RELAXED);
+		atomic_fetch_add_zu(&dalloc_zero_slab_count, 1, ATOMIC_RELAXED);
 	}
 	if (!list_empty) {
-		atomic_fetch_add_zu(&dalloc_nonempty_list_count, 1,
-		    ATOMIC_RELAXED);
+		atomic_fetch_add_zu(
+		    &dalloc_nonempty_list_count, 1, ATOMIC_RELAXED);
 	}
 }
 
-static void flush_tcache() {
+static void
+flush_tcache() {
 	assert_d_eq(0, mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
 	    "Unexpected mallctl failure");
 }
@@ -88,7 +88,7 @@ static void flush_tcache() {
 static void *
 stress_thread(void *arg) {
 	stress_thread_data_t *data = arg;
-	uint64_t prng_state = data->thd_id;
+	uint64_t              prng_state = data->thd_id;
 	atomic_fetch_add_zu(data->ready_thds, 1, ATOMIC_RELAXED);
 	while (atomic_load_zu(data->ready_thds, ATOMIC_RELAXED)
 	    != STRESS_THREADS) {
@@ -99,7 +99,6 @@ stress_thread(void *arg) {
 		if (prng_range_u64(&prng_state, 3) == 0) {
 			flush_tcache();
 		}
-
 	}
 	flush_tcache();
 	atomic_fetch_add_zu(data->done_thds, 1, ATOMIC_RELAXED);
@@ -125,9 +124,9 @@ stress_run(void (*main_thread_fn)(), int nruns) {
 	atomic_store_zu(&dalloc_nonempty_list_count, 0, ATOMIC_RELAXED);
 
 	for (int run = 0; run < nruns; run++) {
-		thd_t thds[STRESS_THREADS];
+		thd_t                thds[STRESS_THREADS];
 		stress_thread_data_t thd_datas[STRESS_THREADS];
-		atomic_zu_t ready_thds;
+		atomic_zu_t          ready_thds;
 		atomic_store_zu(&ready_thds, 0, ATOMIC_RELAXED);
 		atomic_zu_t done_thds;
 		atomic_store_zu(&done_thds, 0, ATOMIC_RELAXED);
@@ -164,7 +163,7 @@ stress_run(void (*main_thread_fn)(), int nruns) {
 
 static void
 do_allocs_frees() {
-	enum {NALLOCS = 32};
+	enum { NALLOCS = 32 };
 	flush_tcache();
 	void *ptrs[NALLOCS];
 	for (int i = 0; i < NALLOCS; i++) {
@@ -182,7 +181,7 @@ test_arena_reset_main_fn() {
 }
 
 TEST_BEGIN(test_arena_reset) {
-	int err;
+	int      err;
 	unsigned arena;
 	unsigned old_arena;
 
@@ -256,17 +255,16 @@ TEST_BEGIN(test_races) {
 	    "Should have seen some pop successes");
 	assert_zu_lt(0, atomic_load_zu(&dalloc_zero_slab_count, ATOMIC_RELAXED),
 	    "Expected some frees that didn't empty a slab");
-	assert_zu_lt(0, atomic_load_zu(&dalloc_nonzero_slab_count,
-	    ATOMIC_RELAXED), "expected some frees that emptied a slab");
-	assert_zu_lt(0, atomic_load_zu(&dalloc_nonempty_list_count,
-	    ATOMIC_RELAXED), "expected some frees that used the empty list");
+	assert_zu_lt(0,
+	    atomic_load_zu(&dalloc_nonzero_slab_count, ATOMIC_RELAXED),
+	    "expected some frees that emptied a slab");
+	assert_zu_lt(0,
+	    atomic_load_zu(&dalloc_nonempty_list_count, ATOMIC_RELAXED),
+	    "expected some frees that used the empty list");
 }
 TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_arena_reset,
-	    test_races,
-	    test_fork);
+	return test_no_reentrancy(test_arena_reset, test_races, test_fork);
 }
diff --git a/test/unit/binshard.c b/test/unit/binshard.c
index 040ea54d..c3e1c2d6 100644
--- a/test/unit/binshard.c
+++ b/test/unit/binshard.c
@@ -7,9 +7,9 @@
 
 static void *
 thd_producer(void *varg) {
-	void **mem = varg;
+	void   **mem = varg;
 	unsigned arena, i;
-	size_t sz;
+	size_t   sz;
 
 	sz = sizeof(arena);
 	/* Remote arena. */
@@ -28,8 +28,8 @@ thd_producer(void *varg) {
 }
 
 TEST_BEGIN(test_producer_consumer) {
-	thd_t thds[NTHREADS];
-	void *mem[NTHREADS][REMOTE_NALLOC];
+	thd_t    thds[NTHREADS];
+	void    *mem[NTHREADS][REMOTE_NALLOC];
 	unsigned i;
 
 	/* Create producer threads to allocate. */
@@ -42,8 +42,8 @@ TEST_BEGIN(test_producer_consumer) {
 	/* Remote deallocation by the current thread. */
 	for (i = 0; i < NTHREADS; i++) {
 		for (unsigned j = 0; j < REMOTE_NALLOC; j++) {
-			expect_ptr_not_null(mem[i][j],
-			    "Unexpected remote allocation failure");
+			expect_ptr_not_null(
+			    mem[i][j], "Unexpected remote allocation failure");
 			dallocx(mem[i][j], 0);
 		}
 	}
@@ -52,7 +52,7 @@ TEST_END
 
 static void *
 thd_start(void *varg) {
-	void *ptr, *ptr2;
+	void    *ptr, *ptr2;
 	edata_t *edata;
 	unsigned shard1, shard2;
 
@@ -82,10 +82,10 @@ thd_start(void *varg) {
 }
 
 TEST_BEGIN(test_bin_shard_mt) {
-	test_skip_if(have_percpu_arena &&
-	    PERCPU_ARENA_ENABLED(opt_percpu_arena));
+	test_skip_if(
+	    have_percpu_arena && PERCPU_ARENA_ENABLED(opt_percpu_arena));
 
-	thd_t thds[NTHREADS];
+	thd_t    thds[NTHREADS];
 	unsigned i;
 	for (i = 0; i < NTHREADS; i++) {
 		thd_create(&thds[i], thd_start, NULL);
@@ -104,8 +104,8 @@ TEST_END
 
 TEST_BEGIN(test_bin_shard) {
 	unsigned nbins, i;
-	size_t mib[4], mib2[4];
-	size_t miblen, miblen2, len;
+	size_t   mib[4], mib2[4];
+	size_t   miblen, miblen2, len;
 
 	len = sizeof(nbins);
 	expect_d_eq(mallctl("arenas.nbins", (void *)&nbins, &len, NULL, 0), 0,
@@ -120,17 +120,19 @@ TEST_BEGIN(test_bin_shard) {
 
 	for (i = 0; i < nbins; i++) {
 		uint32_t nshards;
-		size_t size, sz1, sz2;
+		size_t   size, sz1, sz2;
 
 		mib[2] = i;
 		sz1 = sizeof(nshards);
-		expect_d_eq(mallctlbymib(mib, miblen, (void *)&nshards, &sz1,
-		    NULL, 0), 0, "Unexpected mallctlbymib() failure");
+		expect_d_eq(
+		    mallctlbymib(mib, miblen, (void *)&nshards, &sz1, NULL, 0),
+		    0, "Unexpected mallctlbymib() failure");
 
 		mib2[2] = i;
 		sz2 = sizeof(size);
-		expect_d_eq(mallctlbymib(mib2, miblen2, (void *)&size, &sz2,
-		    NULL, 0), 0, "Unexpected mallctlbymib() failure");
+		expect_d_eq(
+		    mallctlbymib(mib2, miblen2, (void *)&size, &sz2, NULL, 0),
+		    0, "Unexpected mallctlbymib() failure");
 
 		if (size >= 1 && size <= 128) {
 			expect_u_eq(nshards, 16, "Unexpected nshards");
@@ -148,7 +150,5 @@ TEST_END
 int
 main(void) {
 	return test_no_reentrancy(
-	    test_bin_shard,
-	    test_bin_shard_mt,
-	    test_producer_consumer);
+	    test_bin_shard, test_bin_shard_mt, test_producer_consumer);
 }
diff --git a/test/unit/bit_util.c b/test/unit/bit_util.c
index 4e9d2e16..986562d1 100644
--- a/test/unit/bit_util.c
+++ b/test/unit/bit_util.c
@@ -2,36 +2,37 @@
 
 #include "jemalloc/internal/bit_util.h"
 
-#define TEST_POW2_CEIL(t, suf, pri) do {				\
-	unsigned i, pow2;						\
-	t x;								\
-									\
-	expect_##suf##_eq(pow2_ceil_##suf(0), 0, "Unexpected result");	\
-									\
-	for (i = 0; i < sizeof(t) * 8; i++) {				\
-		expect_##suf##_eq(pow2_ceil_##suf(((t)1) << i), ((t)1)	\
-		    << i, "Unexpected result");				\
-	}								\
-									\
-	for (i = 2; i < sizeof(t) * 8; i++) {				\
-		expect_##suf##_eq(pow2_ceil_##suf((((t)1) << i) - 1),	\
-		    ((t)1) << i, "Unexpected result");			\
-	}								\
-									\
-	for (i = 0; i < sizeof(t) * 8 - 1; i++) {			\
-		expect_##suf##_eq(pow2_ceil_##suf((((t)1) << i) + 1),	\
-		    ((t)1) << (i+1), "Unexpected result");		\
-	}								\
-									\
-	for (pow2 = 1; pow2 < 25; pow2++) {				\
-		for (x = (((t)1) << (pow2-1)) + 1; x <= ((t)1) << pow2;	\
-		    x++) {						\
-			expect_##suf##_eq(pow2_ceil_##suf(x),		\
-			    ((t)1) << pow2,				\
-			    "Unexpected result, x=%"pri, x);		\
-		}							\
-	}								\
-} while (0)
+#define TEST_POW2_CEIL(t, suf, pri)                                            \
+	do {                                                                   \
+		unsigned i, pow2;                                              \
+		t        x;                                                    \
+                                                                               \
+		expect_##suf##_eq(pow2_ceil_##suf(0), 0, "Unexpected result"); \
+                                                                               \
+		for (i = 0; i < sizeof(t) * 8; i++) {                          \
+			expect_##suf##_eq(pow2_ceil_##suf(((t)1) << i),        \
+			    ((t)1) << i, "Unexpected result");                 \
+		}                                                              \
+                                                                               \
+		for (i = 2; i < sizeof(t) * 8; i++) {                          \
+			expect_##suf##_eq(pow2_ceil_##suf((((t)1) << i) - 1),  \
+			    ((t)1) << i, "Unexpected result");                 \
+		}                                                              \
+                                                                               \
+		for (i = 0; i < sizeof(t) * 8 - 1; i++) {                      \
+			expect_##suf##_eq(pow2_ceil_##suf((((t)1) << i) + 1),  \
+			    ((t)1) << (i + 1), "Unexpected result");           \
+		}                                                              \
+                                                                               \
+		for (pow2 = 1; pow2 < 25; pow2++) {                            \
+			for (x = (((t)1) << (pow2 - 1)) + 1;                   \
+			     x <= ((t)1) << pow2; x++) {                       \
+				expect_##suf##_eq(pow2_ceil_##suf(x),          \
+				    ((t)1) << pow2,                            \
+				    "Unexpected result, x=%" pri, x);          \
+			}                                                      \
+		}                                                              \
+	} while (0)
 
 TEST_BEGIN(test_pow2_ceil_u64) {
 	TEST_POW2_CEIL(uint64_t, u64, FMTu64);
@@ -54,10 +55,10 @@ expect_lg_ceil_range(size_t input, unsigned answer) {
 		expect_u_eq(0, answer, "Got %u as lg_ceil of 1", answer);
 		return;
 	}
-	expect_zu_le(input, (ZU(1) << answer),
-	    "Got %u as lg_ceil of %zu", answer, input);
-	expect_zu_gt(input, (ZU(1) << (answer - 1)),
-	    "Got %u as lg_ceil of %zu", answer, input);
+	expect_zu_le(input, (ZU(1) << answer), "Got %u as lg_ceil of %zu",
+	    answer, input);
+	expect_zu_gt(input, (ZU(1) << (answer - 1)), "Got %u as lg_ceil of %zu",
+	    answer, input);
 }
 
 static void
@@ -66,8 +67,8 @@ expect_lg_floor_range(size_t input, unsigned answer) {
 		expect_u_eq(0, answer, "Got %u as lg_floor of 1", answer);
 		return;
 	}
-	expect_zu_ge(input, (ZU(1) << answer),
-	    "Got %u as lg_floor of %zu", answer, input);
+	expect_zu_ge(input, (ZU(1) << answer), "Got %u as lg_floor of %zu",
+	    answer, input);
 	expect_zu_lt(input, (ZU(1) << (answer + 1)),
 	    "Got %u as lg_floor of %zu", answer, input);
 }
@@ -101,22 +102,24 @@ TEST_BEGIN(test_lg_ceil_floor) {
 }
 TEST_END
 
-#define TEST_FFS(t, suf, test_suf, pri) do {				\
-	for (unsigned i = 0; i < sizeof(t) * 8; i++) {			\
-		for (unsigned j = 0; j <= i; j++) {			\
-			for (unsigned k = 0; k <= j; k++) {		\
-				t x = (t)1 << i;			\
-				x |= (t)1 << j;				\
-				x |= (t)1 << k;				\
-				expect_##test_suf##_eq(ffs_##suf(x), k,	\
-				    "Unexpected result, x=%"pri, x);	\
-			}						\
-		}							\
-	}								\
-} while(0)
+#define TEST_FFS(t, suf, test_suf, pri)                                        \
+	do {                                                                   \
+		for (unsigned i = 0; i < sizeof(t) * 8; i++) {                 \
+			for (unsigned j = 0; j <= i; j++) {                    \
+				for (unsigned k = 0; k <= j; k++) {            \
+					t x = (t)1 << i;                       \
+					x |= (t)1 << j;                        \
+					x |= (t)1 << k;                        \
+					expect_##test_suf##_eq(ffs_##suf(x),   \
+					    k, "Unexpected result, x=%" pri,   \
+					    x);                                \
+				}                                              \
+			}                                                      \
+		}                                                              \
+	} while (0)
 
 TEST_BEGIN(test_ffs_u) {
-	TEST_FFS(unsigned, u, u,"u");
+	TEST_FFS(unsigned, u, u, "u");
 }
 TEST_END
 
@@ -145,22 +148,24 @@ TEST_BEGIN(test_ffs_zu) {
 }
 TEST_END
 
-#define TEST_FLS(t, suf, test_suf, pri) do {				\
-	for (unsigned i = 0; i < sizeof(t) * 8; i++) {			\
-		for (unsigned j = 0; j <= i; j++) {			\
-			for (unsigned k = 0; k <= j; k++) {		\
-				t x = (t)1 << i;			\
-				x |= (t)1 << j;				\
-				x |= (t)1 << k;				\
-				expect_##test_suf##_eq(fls_##suf(x), i,	\
-				    "Unexpected result, x=%"pri, x);	\
-			}						\
-		}							\
-	}								\
-} while(0)
+#define TEST_FLS(t, suf, test_suf, pri)                                        \
+	do {                                                                   \
+		for (unsigned i = 0; i < sizeof(t) * 8; i++) {                 \
+			for (unsigned j = 0; j <= i; j++) {                    \
+				for (unsigned k = 0; k <= j; k++) {            \
+					t x = (t)1 << i;                       \
+					x |= (t)1 << j;                        \
+					x |= (t)1 << k;                        \
+					expect_##test_suf##_eq(fls_##suf(x),   \
+					    i, "Unexpected result, x=%" pri,   \
+					    x);                                \
+				}                                              \
+			}                                                      \
+		}                                                              \
+	} while (0)
 
 TEST_BEGIN(test_fls_u) {
-	TEST_FLS(unsigned, u, u,"u");
+	TEST_FLS(unsigned, u, u, "u");
 }
 TEST_END
 
@@ -190,7 +195,7 @@ TEST_BEGIN(test_fls_zu) {
 TEST_END
 
 TEST_BEGIN(test_fls_u_slow) {
-	TEST_FLS(unsigned, u_slow, u,"u");
+	TEST_FLS(unsigned, u_slow, u, "u");
 }
 TEST_END
 
@@ -280,30 +285,11 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_pow2_ceil_u64,
-	    test_pow2_ceil_u32,
-	    test_pow2_ceil_zu,
-	    test_lg_ceil_floor,
-	    test_ffs_u,
-	    test_ffs_lu,
-	    test_ffs_llu,
-	    test_ffs_u32,
-	    test_ffs_u64,
-	    test_ffs_zu,
-	    test_fls_u,
-	    test_fls_lu,
-	    test_fls_llu,
-	    test_fls_u32,
-	    test_fls_u64,
-	    test_fls_zu,
-	    test_fls_u_slow,
-	    test_fls_lu_slow,
-	    test_fls_llu_slow,
-	    test_popcount_u,
-	    test_popcount_u_slow,
-	    test_popcount_lu,
-	    test_popcount_lu_slow,
-	    test_popcount_llu,
-	    test_popcount_llu_slow);
+	return test_no_reentrancy(test_pow2_ceil_u64, test_pow2_ceil_u32,
+	    test_pow2_ceil_zu, test_lg_ceil_floor, test_ffs_u, test_ffs_lu,
+	    test_ffs_llu, test_ffs_u32, test_ffs_u64, test_ffs_zu, test_fls_u,
+	    test_fls_lu, test_fls_llu, test_fls_u32, test_fls_u64, test_fls_zu,
+	    test_fls_u_slow, test_fls_lu_slow, test_fls_llu_slow,
+	    test_popcount_u, test_popcount_u_slow, test_popcount_lu,
+	    test_popcount_lu_slow, test_popcount_llu, test_popcount_llu_slow);
 }
diff --git a/test/unit/bitmap.c b/test/unit/bitmap.c
index 78e542b6..b3048cf3 100644
--- a/test/unit/bitmap.c
+++ b/test/unit/bitmap.c
@@ -9,14 +9,17 @@ test_bitmap_initializer_body(const bitmap_info_t *binfo, size_t nbits) {
 
 	expect_zu_eq(bitmap_size(binfo), bitmap_size(&binfo_dyn),
 	    "Unexpected difference between static and dynamic initialization, "
-	    "nbits=%zu", nbits);
+	    "nbits=%zu",
+	    nbits);
 	expect_zu_eq(binfo->nbits, binfo_dyn.nbits,
 	    "Unexpected difference between static and dynamic initialization, "
-	    "nbits=%zu", nbits);
+	    "nbits=%zu",
+	    nbits);
 #ifdef BITMAP_USE_TREE
 	expect_u_eq(binfo->nlevels, binfo_dyn.nlevels,
 	    "Unexpected difference between static and dynamic initialization, "
-	    "nbits=%zu", nbits);
+	    "nbits=%zu",
+	    nbits);
 	{
 		unsigned i;
 
@@ -24,7 +27,8 @@ test_bitmap_initializer_body(const bitmap_info_t *binfo, size_t nbits) {
 			expect_zu_eq(binfo->levels[i].group_offset,
 			    binfo_dyn.levels[i].group_offset,
 			    "Unexpected difference between static and dynamic "
-			    "initialization, nbits=%zu, level=%u", nbits, i);
+			    "initialization, nbits=%zu, level=%u",
+			    nbits, i);
 		}
 	}
 #else
@@ -34,12 +38,12 @@ test_bitmap_initializer_body(const bitmap_info_t *binfo, size_t nbits) {
 }
 
 TEST_BEGIN(test_bitmap_initializer) {
-#define NB(nbits) {							\
-		if (nbits <= BITMAP_MAXBITS) {				\
-			bitmap_info_t binfo =				\
-			    BITMAP_INFO_INITIALIZER(nbits);		\
-			test_bitmap_initializer_body(&binfo, nbits);	\
-		}							\
+#define NB(nbits)                                                              \
+	{                                                                      \
+		if (nbits <= BITMAP_MAXBITS) {                                 \
+			bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);  \
+			test_bitmap_initializer_body(&binfo, nbits);           \
+		}                                                              \
 	}
 	NBITS_TAB
 #undef NB
@@ -47,11 +51,11 @@ TEST_BEGIN(test_bitmap_initializer) {
 TEST_END
 
 static size_t
-test_bitmap_size_body(const bitmap_info_t *binfo, size_t nbits,
-    size_t prev_size) {
+test_bitmap_size_body(
+    const bitmap_info_t *binfo, size_t nbits, size_t prev_size) {
 	size_t size = bitmap_size(binfo);
-	expect_zu_ge(size, (nbits >> 3),
-	    "Bitmap size is smaller than expected");
+	expect_zu_ge(
+	    size, (nbits >> 3), "Bitmap size is smaller than expected");
 	expect_zu_ge(size, prev_size, "Bitmap size is smaller than expected");
 	return size;
 }
@@ -65,10 +69,10 @@ TEST_BEGIN(test_bitmap_size) {
 		bitmap_info_init(&binfo, nbits);
 		prev_size = test_bitmap_size_body(&binfo, nbits, prev_size);
 	}
-#define NB(nbits) {							\
-		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);	\
-		prev_size = test_bitmap_size_body(&binfo, nbits,	\
-		    prev_size);						\
+#define NB(nbits)                                                              \
+	{                                                                      \
+		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);          \
+		prev_size = test_bitmap_size_body(&binfo, nbits, prev_size);   \
 	}
 	prev_size = 0;
 	NBITS_TAB
@@ -78,14 +82,14 @@ TEST_END
 
 static void
 test_bitmap_init_body(const bitmap_info_t *binfo, size_t nbits) {
-	size_t i;
+	size_t    i;
 	bitmap_t *bitmap = (bitmap_t *)malloc(bitmap_size(binfo));
 	expect_ptr_not_null(bitmap, "Unexpected malloc() failure");
 
 	bitmap_init(bitmap, binfo, false);
 	for (i = 0; i < nbits; i++) {
-		expect_false(bitmap_get(bitmap, binfo, i),
-		    "Bit should be unset");
+		expect_false(
+		    bitmap_get(bitmap, binfo, i), "Bit should be unset");
 	}
 
 	bitmap_init(bitmap, binfo, true);
@@ -104,9 +108,10 @@ TEST_BEGIN(test_bitmap_init) {
 		bitmap_info_init(&binfo, nbits);
 		test_bitmap_init_body(&binfo, nbits);
 	}
-#define NB(nbits) {							\
-		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);	\
-		test_bitmap_init_body(&binfo, nbits);			\
+#define NB(nbits)                                                              \
+	{                                                                      \
+		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);          \
+		test_bitmap_init_body(&binfo, nbits);                          \
 	}
 	NBITS_TAB
 #undef NB
@@ -115,7 +120,7 @@ TEST_END
 
 static void
 test_bitmap_set_body(const bitmap_info_t *binfo, size_t nbits) {
-	size_t i;
+	size_t    i;
 	bitmap_t *bitmap = (bitmap_t *)malloc(bitmap_size(binfo));
 	expect_ptr_not_null(bitmap, "Unexpected malloc() failure");
 	bitmap_init(bitmap, binfo, false);
@@ -135,9 +140,10 @@ TEST_BEGIN(test_bitmap_set) {
 		bitmap_info_init(&binfo, nbits);
 		test_bitmap_set_body(&binfo, nbits);
 	}
-#define NB(nbits) {							\
-		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);	\
-		test_bitmap_set_body(&binfo, nbits);			\
+#define NB(nbits)                                                              \
+	{                                                                      \
+		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);          \
+		test_bitmap_set_body(&binfo, nbits);                           \
 	}
 	NBITS_TAB
 #undef NB
@@ -146,7 +152,7 @@ TEST_END
 
 static void
 test_bitmap_unset_body(const bitmap_info_t *binfo, size_t nbits) {
-	size_t i;
+	size_t    i;
 	bitmap_t *bitmap = (bitmap_t *)malloc(bitmap_size(binfo));
 	expect_ptr_not_null(bitmap, "Unexpected malloc() failure");
 	bitmap_init(bitmap, binfo, false);
@@ -173,9 +179,10 @@ TEST_BEGIN(test_bitmap_unset) {
 		bitmap_info_init(&binfo, nbits);
 		test_bitmap_unset_body(&binfo, nbits);
 	}
-#define NB(nbits) {							\
-		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);	\
-		test_bitmap_unset_body(&binfo, nbits);			\
+#define NB(nbits)                                                              \
+	{                                                                      \
+		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);          \
+		test_bitmap_unset_body(&binfo, nbits);                         \
 	}
 	NBITS_TAB
 #undef NB
@@ -193,7 +200,7 @@ test_bitmap_xfu_body(const bitmap_info_t *binfo, size_t nbits) {
 		expect_zu_eq(bitmap_ffu(bitmap, binfo, 0), i,
 		    "First unset bit should be just after previous first unset "
 		    "bit");
-		expect_zu_eq(bitmap_ffu(bitmap, binfo, (i > 0) ? i-1 : i), i,
+		expect_zu_eq(bitmap_ffu(bitmap, binfo, (i > 0) ? i - 1 : i), i,
 		    "First unset bit should be just after previous first unset "
 		    "bit");
 		expect_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
@@ -213,7 +220,7 @@ test_bitmap_xfu_body(const bitmap_info_t *binfo, size_t nbits) {
 		bitmap_unset(bitmap, binfo, i);
 		expect_zu_eq(bitmap_ffu(bitmap, binfo, 0), i,
 		    "First unset bit should the bit previously unset");
-		expect_zu_eq(bitmap_ffu(bitmap, binfo, (i > 0) ? i-1 : i), i,
+		expect_zu_eq(bitmap_ffu(bitmap, binfo, (i > 0) ? i - 1 : i), i,
 		    "First unset bit should the bit previously unset");
 		expect_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
 		    "First unset bit should the bit previously unset");
@@ -232,7 +239,7 @@ test_bitmap_xfu_body(const bitmap_info_t *binfo, size_t nbits) {
 		expect_zu_eq(bitmap_ffu(bitmap, binfo, 0), i,
 		    "First unset bit should be just after the bit previously "
 		    "set");
-		expect_zu_eq(bitmap_ffu(bitmap, binfo, (i > 0) ? i-1 : i), i,
+		expect_zu_eq(bitmap_ffu(bitmap, binfo, (i > 0) ? i - 1 : i), i,
 		    "First unset bit should be just after the bit previously "
 		    "set");
 		expect_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
@@ -245,7 +252,8 @@ test_bitmap_xfu_body(const bitmap_info_t *binfo, size_t nbits) {
 	}
 	expect_zu_eq(bitmap_ffu(bitmap, binfo, 0), nbits - 1,
 	    "First unset bit should be the last bit");
-	expect_zu_eq(bitmap_ffu(bitmap, binfo, (nbits > 1) ? nbits-2 : nbits-1),
+	expect_zu_eq(
+	    bitmap_ffu(bitmap, binfo, (nbits > 1) ? nbits - 2 : nbits - 1),
 	    nbits - 1, "First unset bit should be the last bit");
 	expect_zu_eq(bitmap_ffu(bitmap, binfo, nbits - 1), nbits - 1,
 	    "First unset bit should be the last bit");
@@ -258,26 +266,26 @@ test_bitmap_xfu_body(const bitmap_info_t *binfo, size_t nbits) {
 	 * bitmap_ffu() finds the correct bit for all five min_bit cases.
 	 */
 	if (nbits >= 3) {
-		for (size_t i = 0; i < nbits-2; i++) {
+		for (size_t i = 0; i < nbits - 2; i++) {
 			bitmap_unset(bitmap, binfo, i);
-			bitmap_unset(bitmap, binfo, i+2);
+			bitmap_unset(bitmap, binfo, i + 2);
 			if (i > 0) {
-				expect_zu_eq(bitmap_ffu(bitmap, binfo, i-1), i,
-				    "Unexpected first unset bit");
+				expect_zu_eq(bitmap_ffu(bitmap, binfo, i - 1),
+				    i, "Unexpected first unset bit");
 			}
 			expect_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
 			    "Unexpected first unset bit");
-			expect_zu_eq(bitmap_ffu(bitmap, binfo, i+1), i+2,
+			expect_zu_eq(bitmap_ffu(bitmap, binfo, i + 1), i + 2,
 			    "Unexpected first unset bit");
-			expect_zu_eq(bitmap_ffu(bitmap, binfo, i+2), i+2,
+			expect_zu_eq(bitmap_ffu(bitmap, binfo, i + 2), i + 2,
 			    "Unexpected first unset bit");
 			if (i + 3 < nbits) {
-				expect_zu_eq(bitmap_ffu(bitmap, binfo, i+3),
+				expect_zu_eq(bitmap_ffu(bitmap, binfo, i + 3),
 				    nbits, "Unexpected first unset bit");
 			}
 			expect_zu_eq(bitmap_sfu(bitmap, binfo), i,
 			    "Unexpected first unset bit");
-			expect_zu_eq(bitmap_sfu(bitmap, binfo), i+2,
+			expect_zu_eq(bitmap_sfu(bitmap, binfo), i + 2,
 			    "Unexpected first unset bit");
 		}
 	}
@@ -288,24 +296,24 @@ test_bitmap_xfu_body(const bitmap_info_t *binfo, size_t nbits) {
 	 * cases.
 	 */
 	if (nbits >= 3) {
-		bitmap_unset(bitmap, binfo, nbits-1);
-		for (size_t i = 0; i < nbits-1; i++) {
+		bitmap_unset(bitmap, binfo, nbits - 1);
+		for (size_t i = 0; i < nbits - 1; i++) {
 			bitmap_unset(bitmap, binfo, i);
 			if (i > 0) {
-				expect_zu_eq(bitmap_ffu(bitmap, binfo, i-1), i,
-				    "Unexpected first unset bit");
+				expect_zu_eq(bitmap_ffu(bitmap, binfo, i - 1),
+				    i, "Unexpected first unset bit");
 			}
 			expect_zu_eq(bitmap_ffu(bitmap, binfo, i), i,
 			    "Unexpected first unset bit");
-			expect_zu_eq(bitmap_ffu(bitmap, binfo, i+1), nbits-1,
-			    "Unexpected first unset bit");
-			expect_zu_eq(bitmap_ffu(bitmap, binfo, nbits-1),
-			    nbits-1, "Unexpected first unset bit");
+			expect_zu_eq(bitmap_ffu(bitmap, binfo, i + 1),
+			    nbits - 1, "Unexpected first unset bit");
+			expect_zu_eq(bitmap_ffu(bitmap, binfo, nbits - 1),
+			    nbits - 1, "Unexpected first unset bit");
 
 			expect_zu_eq(bitmap_sfu(bitmap, binfo), i,
 			    "Unexpected first unset bit");
 		}
-		expect_zu_eq(bitmap_sfu(bitmap, binfo), nbits-1,
+		expect_zu_eq(bitmap_sfu(bitmap, binfo), nbits - 1,
 		    "Unexpected first unset bit");
 	}
 
@@ -322,9 +330,10 @@ TEST_BEGIN(test_bitmap_xfu) {
 		bitmap_info_init(&binfo, nbits);
 		test_bitmap_xfu_body(&binfo, nbits);
 	}
-#define NB(nbits) {							\
-		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);	\
-		test_bitmap_xfu_body(&binfo, nbits);			\
+#define NB(nbits)                                                              \
+	{                                                                      \
+		bitmap_info_t binfo = BITMAP_INFO_INITIALIZER(nbits);          \
+		test_bitmap_xfu_body(&binfo, nbits);                           \
 	}
 	NBITS_TAB
 #undef NB
@@ -333,11 +342,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_bitmap_initializer,
-	    test_bitmap_size,
-	    test_bitmap_init,
-	    test_bitmap_set,
-	    test_bitmap_unset,
-	    test_bitmap_xfu);
+	return test(test_bitmap_initializer, test_bitmap_size, test_bitmap_init,
+	    test_bitmap_set, test_bitmap_unset, test_bitmap_xfu);
 }
diff --git a/test/unit/buf_writer.c b/test/unit/buf_writer.c
index d5e63a0e..643e430c 100644
--- a/test/unit/buf_writer.c
+++ b/test/unit/buf_writer.c
@@ -5,24 +5,24 @@
 #define TEST_BUF_SIZE 16
 #define UNIT_MAX (TEST_BUF_SIZE * 3)
 
-static size_t test_write_len;
-static char test_buf[TEST_BUF_SIZE];
+static size_t   test_write_len;
+static char     test_buf[TEST_BUF_SIZE];
 static uint64_t arg;
 static uint64_t arg_store;
 
 static void
 test_write_cb(void *cbopaque, const char *s) {
 	size_t prev_test_write_len = test_write_len;
-	test_write_len += strlen(s); /* only increase the length */
+	test_write_len += strlen(s);       /* only increase the length */
 	arg_store = *(uint64_t *)cbopaque; /* only pass along the argument */
-	assert_zu_le(prev_test_write_len, test_write_len,
-	    "Test write overflowed");
+	assert_zu_le(
+	    prev_test_write_len, test_write_len, "Test write overflowed");
 }
 
 static void
 test_buf_writer_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
-	char s[UNIT_MAX + 1];
-	size_t n_unit, remain, i;
+	char    s[UNIT_MAX + 1];
+	size_t  n_unit, remain, i;
 	ssize_t unit;
 
 	assert(buf_writer->buf != NULL);
@@ -41,7 +41,8 @@ test_buf_writer_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 				remain += unit;
 				if (remain > buf_writer->buf_size) {
 					/* Flushes should have happened. */
-					assert_u64_eq(arg_store, arg, "Call "
+					assert_u64_eq(arg_store, arg,
+					    "Call "
 					    "back argument didn't get through");
 					remain %= buf_writer->buf_size;
 					if (remain == 0) {
@@ -51,12 +52,14 @@ test_buf_writer_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 				}
 				assert_zu_eq(test_write_len + remain, i * unit,
 				    "Incorrect length after writing %zu strings"
-				    " of length %zu", i, unit);
+				    " of length %zu",
+				    i, unit);
 			}
 			buf_writer_flush(buf_writer);
 			expect_zu_eq(test_write_len, n_unit * unit,
 			    "Incorrect length after flushing at the end of"
-			    " writing %zu strings of length %zu", n_unit, unit);
+			    " writing %zu strings of length %zu",
+			    n_unit, unit);
 		}
 	}
 	buf_writer_terminate(tsdn, buf_writer);
@@ -64,9 +67,9 @@ test_buf_writer_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 
 TEST_BEGIN(test_buf_write_static) {
 	buf_writer_t buf_writer;
-	tsdn_t *tsdn = tsdn_fetch();
+	tsdn_t      *tsdn = tsdn_fetch();
 	assert_false(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
-	    test_buf, TEST_BUF_SIZE),
+	                 test_buf, TEST_BUF_SIZE),
 	    "buf_writer_init() should not encounter error on static buffer");
 	test_buf_writer_body(tsdn, &buf_writer);
 }
@@ -74,22 +77,24 @@ TEST_END
 
 TEST_BEGIN(test_buf_write_dynamic) {
 	buf_writer_t buf_writer;
-	tsdn_t *tsdn = tsdn_fetch();
+	tsdn_t      *tsdn = tsdn_fetch();
 	assert_false(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
-	    NULL, TEST_BUF_SIZE), "buf_writer_init() should not OOM");
+	                 NULL, TEST_BUF_SIZE),
+	    "buf_writer_init() should not OOM");
 	test_buf_writer_body(tsdn, &buf_writer);
 }
 TEST_END
 
 TEST_BEGIN(test_buf_write_oom) {
 	buf_writer_t buf_writer;
-	tsdn_t *tsdn = tsdn_fetch();
+	tsdn_t      *tsdn = tsdn_fetch();
 	assert_true(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
-	    NULL, SC_LARGE_MAXCLASS + 1), "buf_writer_init() should OOM");
+	                NULL, SC_LARGE_MAXCLASS + 1),
+	    "buf_writer_init() should OOM");
 	assert(buf_writer.buf == NULL);
 
-	char s[UNIT_MAX + 1];
-	size_t n_unit, i;
+	char    s[UNIT_MAX + 1];
+	size_t  n_unit, i;
 	ssize_t unit;
 
 	memset(s, 'a', UNIT_MAX);
@@ -107,20 +112,22 @@ TEST_BEGIN(test_buf_write_oom) {
 				    "Call back argument didn't get through");
 				assert_zu_eq(test_write_len, i * unit,
 				    "Incorrect length after writing %zu strings"
-				    " of length %zu", i, unit);
+				    " of length %zu",
+				    i, unit);
 			}
 			buf_writer_flush(&buf_writer);
 			expect_zu_eq(test_write_len, n_unit * unit,
 			    "Incorrect length after flushing at the end of"
-			    " writing %zu strings of length %zu", n_unit, unit);
+			    " writing %zu strings of length %zu",
+			    n_unit, unit);
 		}
 	}
 	buf_writer_terminate(tsdn, &buf_writer);
 }
 TEST_END
 
-static int test_read_count;
-static size_t test_read_len;
+static int      test_read_count;
+static size_t   test_read_len;
 static uint64_t arg_sum;
 
 ssize_t
@@ -142,8 +149,8 @@ test_read_cb(void *cbopaque, void *buf, size_t limit) {
 		memset(buf, 'a', read_len);
 		size_t prev_test_read_len = test_read_len;
 		test_read_len += read_len;
-		assert_zu_le(prev_test_read_len, test_read_len,
-		    "Test read overflowed");
+		assert_zu_le(
+		    prev_test_read_len, test_read_len, "Test read overflowed");
 		return read_len;
 	}
 }
@@ -168,9 +175,9 @@ test_buf_writer_pipe_body(tsdn_t *tsdn, buf_writer_t *buf_writer) {
 
 TEST_BEGIN(test_buf_write_pipe) {
 	buf_writer_t buf_writer;
-	tsdn_t *tsdn = tsdn_fetch();
+	tsdn_t      *tsdn = tsdn_fetch();
 	assert_false(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
-	    test_buf, TEST_BUF_SIZE),
+	                 test_buf, TEST_BUF_SIZE),
 	    "buf_writer_init() should not encounter error on static buffer");
 	test_buf_writer_pipe_body(tsdn, &buf_writer);
 }
@@ -178,19 +185,16 @@ TEST_END
 
 TEST_BEGIN(test_buf_write_pipe_oom) {
 	buf_writer_t buf_writer;
-	tsdn_t *tsdn = tsdn_fetch();
+	tsdn_t      *tsdn = tsdn_fetch();
 	assert_true(buf_writer_init(tsdn, &buf_writer, test_write_cb, &arg,
-	    NULL, SC_LARGE_MAXCLASS + 1), "buf_writer_init() should OOM");
+	                NULL, SC_LARGE_MAXCLASS + 1),
+	    "buf_writer_init() should OOM");
 	test_buf_writer_pipe_body(tsdn, &buf_writer);
 }
 TEST_END
 
 int
 main(void) {
-	return test(
-	    test_buf_write_static,
-	    test_buf_write_dynamic,
-	    test_buf_write_oom,
-	    test_buf_write_pipe,
-	    test_buf_write_pipe_oom);
+	return test(test_buf_write_static, test_buf_write_dynamic,
+	    test_buf_write_oom, test_buf_write_pipe, test_buf_write_pipe_oom);
 }
diff --git a/test/unit/cache_bin.c b/test/unit/cache_bin.c
index 1bb750d7..dc1dbe36 100644
--- a/test/unit/cache_bin.c
+++ b/test/unit/cache_bin.c
@@ -3,7 +3,7 @@
 static void
 do_fill_test(cache_bin_t *bin, void **ptrs, cache_bin_sz_t ncached_max,
     cache_bin_sz_t nfill_attempt, cache_bin_sz_t nfill_succeed) {
-	bool success;
+	bool  success;
 	void *ptr;
 	assert_true(cache_bin_ncached_get_local(bin) == 0, "");
 	CACHE_BIN_PTR_ARRAY_DECLARE(arr, nfill_attempt);
@@ -12,17 +12,16 @@ do_fill_test(cache_bin_t *bin, void **ptrs, cache_bin_sz_t ncached_max,
 		arr.ptr[i] = &ptrs[i];
 	}
 	cache_bin_finish_fill(bin, &arr, nfill_succeed);
-	expect_true(cache_bin_ncached_get_local(bin) == nfill_succeed,
-	    "");
+	expect_true(cache_bin_ncached_get_local(bin) == nfill_succeed, "");
 	cache_bin_low_water_set(bin);
 
 	for (cache_bin_sz_t i = 0; i < nfill_succeed; i++) {
 		ptr = cache_bin_alloc(bin, &success);
 		expect_true(success, "");
-		expect_ptr_eq(ptr, (void *)&ptrs[i],
-		    "Should pop in order filled");
-		expect_true(cache_bin_low_water_get(bin)
-		    == nfill_succeed - i - 1, "");
+		expect_ptr_eq(
+		    ptr, (void *)&ptrs[i], "Should pop in order filled");
+		expect_true(
+		    cache_bin_low_water_get(bin) == nfill_succeed - i - 1, "");
 	}
 	expect_true(cache_bin_ncached_get_local(bin) == 0, "");
 	expect_true(cache_bin_low_water_get(bin) == 0, "");
@@ -46,16 +45,15 @@ do_flush_test(cache_bin_t *bin, void **ptrs, cache_bin_sz_t nfill,
 	}
 	cache_bin_finish_flush(bin, &arr, nflush);
 
-	expect_true(cache_bin_ncached_get_local(bin) == nfill - nflush,
-	    "");
+	expect_true(cache_bin_ncached_get_local(bin) == nfill - nflush, "");
 	while (cache_bin_ncached_get_local(bin) > 0) {
 		cache_bin_alloc(bin, &success);
 	}
 }
 
 static void
-do_batch_alloc_test(cache_bin_t *bin, void **ptrs, cache_bin_sz_t nfill,
-    size_t batch) {
+do_batch_alloc_test(
+    cache_bin_t *bin, void **ptrs, cache_bin_sz_t nfill, size_t batch) {
 	assert_true(cache_bin_ncached_get_local(bin) == 0, "");
 	CACHE_BIN_PTR_ARRAY_DECLARE(arr, nfill);
 	cache_bin_init_ptr_array_for_fill(bin, &arr, nfill);
@@ -72,8 +70,8 @@ do_batch_alloc_test(cache_bin_t *bin, void **ptrs, cache_bin_sz_t nfill,
 	for (cache_bin_sz_t i = 0; i < (cache_bin_sz_t)n; i++) {
 		expect_ptr_eq(out[i], &ptrs[i], "");
 	}
-	expect_true(cache_bin_low_water_get(bin) == nfill -
-	    (cache_bin_sz_t)n, "");
+	expect_true(
+	    cache_bin_low_water_get(bin) == nfill - (cache_bin_sz_t)n, "");
 	while (cache_bin_ncached_get_local(bin) > 0) {
 		bool success;
 		cache_bin_alloc(bin, &success);
@@ -98,8 +96,8 @@ test_bin_init(cache_bin_t *bin, cache_bin_info_t *info) {
 
 TEST_BEGIN(test_cache_bin) {
 	const int ncached_max = 100;
-	bool success;
-	void *ptr;
+	bool      success;
+	void     *ptr;
 
 	cache_bin_info_t info;
 	cache_bin_info_init(&info, ncached_max);
@@ -125,7 +123,7 @@ TEST_BEGIN(test_cache_bin) {
 	 */
 	void **ptrs = mallocx(sizeof(void *) * (ncached_max + 1), 0);
 	assert_ptr_not_null(ptrs, "Unexpected mallocx failure");
-	for  (cache_bin_sz_t i = 0; i < ncached_max; i++) {
+	for (cache_bin_sz_t i = 0; i < ncached_max; i++) {
 		expect_true(cache_bin_ncached_get_local(&bin) == i, "");
 		success = cache_bin_dalloc_easy(&bin, &ptrs[i]);
 		expect_true(success,
@@ -133,18 +131,17 @@ TEST_BEGIN(test_cache_bin) {
 		expect_true(cache_bin_low_water_get(&bin) == 0,
 		    "Pushes and pops shouldn't change low water of zero.");
 	}
-	expect_true(cache_bin_ncached_get_local(&bin) == ncached_max,
-	    "");
+	expect_true(cache_bin_ncached_get_local(&bin) == ncached_max, "");
 	success = cache_bin_dalloc_easy(&bin, &ptrs[ncached_max]);
 	expect_false(success, "Shouldn't be able to dalloc into a full bin.");
 
 	cache_bin_low_water_set(&bin);
 
 	for (cache_bin_sz_t i = 0; i < ncached_max; i++) {
-		expect_true(cache_bin_low_water_get(&bin)
-		    == ncached_max - i, "");
-		expect_true(cache_bin_ncached_get_local(&bin)
-		    == ncached_max - i, "");
+		expect_true(
+		    cache_bin_low_water_get(&bin) == ncached_max - i, "");
+		expect_true(
+		    cache_bin_ncached_get_local(&bin) == ncached_max - i, "");
 		/*
 		 * This should fail -- the easy variant can't change the low
 		 * water mark.
@@ -152,20 +149,21 @@ TEST_BEGIN(test_cache_bin) {
 		ptr = cache_bin_alloc_easy(&bin, &success);
 		expect_ptr_null(ptr, "");
 		expect_false(success, "");
-		expect_true(cache_bin_low_water_get(&bin)
-		    == ncached_max - i, "");
-		expect_true(cache_bin_ncached_get_local(&bin)
-		    == ncached_max - i, "");
+		expect_true(
+		    cache_bin_low_water_get(&bin) == ncached_max - i, "");
+		expect_true(
+		    cache_bin_ncached_get_local(&bin) == ncached_max - i, "");
 
 		/* This should succeed, though. */
 		ptr = cache_bin_alloc(&bin, &success);
 		expect_true(success, "");
 		expect_ptr_eq(ptr, &ptrs[ncached_max - i - 1],
 		    "Alloc should pop in stack order");
-		expect_true(cache_bin_low_water_get(&bin)
-		    == ncached_max - i - 1, "");
-		expect_true(cache_bin_ncached_get_local(&bin)
-		    == ncached_max - i - 1, "");
+		expect_true(
+		    cache_bin_low_water_get(&bin) == ncached_max - i - 1, "");
+		expect_true(
+		    cache_bin_ncached_get_local(&bin) == ncached_max - i - 1,
+		    "");
 	}
 	/* Now we're empty -- all alloc attempts should fail. */
 	expect_true(cache_bin_ncached_get_local(&bin) == 0, "");
@@ -184,8 +182,7 @@ TEST_BEGIN(test_cache_bin) {
 	for (cache_bin_sz_t i = ncached_max / 2; i < ncached_max; i++) {
 		cache_bin_dalloc_easy(&bin, &ptrs[i]);
 	}
-	expect_true(cache_bin_ncached_get_local(&bin) == ncached_max,
-	    "");
+	expect_true(cache_bin_ncached_get_local(&bin) == ncached_max, "");
 	for (cache_bin_sz_t i = ncached_max - 1; i >= ncached_max / 2; i--) {
 		/*
 		 * Size is bigger than low water -- the reduced version should
@@ -208,20 +205,16 @@ TEST_BEGIN(test_cache_bin) {
 
 	/* Test fill. */
 	/* Try to fill all, succeed fully. */
-	do_fill_test(&bin, ptrs, ncached_max, ncached_max,
-	    ncached_max);
+	do_fill_test(&bin, ptrs, ncached_max, ncached_max, ncached_max);
 	/* Try to fill all, succeed partially. */
-	do_fill_test(&bin, ptrs, ncached_max, ncached_max,
-	    ncached_max / 2);
+	do_fill_test(&bin, ptrs, ncached_max, ncached_max, ncached_max / 2);
 	/* Try to fill all, fail completely. */
 	do_fill_test(&bin, ptrs, ncached_max, ncached_max, 0);
 
 	/* Try to fill some, succeed fully. */
-	do_fill_test(&bin, ptrs, ncached_max, ncached_max / 2,
-	    ncached_max / 2);
+	do_fill_test(&bin, ptrs, ncached_max, ncached_max / 2, ncached_max / 2);
 	/* Try to fill some, succeed partially. */
-	do_fill_test(&bin, ptrs, ncached_max, ncached_max / 2,
-	    ncached_max / 4);
+	do_fill_test(&bin, ptrs, ncached_max, ncached_max / 2, ncached_max / 4);
 	/* Try to fill some, fail completely. */
 	do_fill_test(&bin, ptrs, ncached_max, ncached_max / 2, 0);
 
@@ -262,11 +255,10 @@ TEST_END
 static void
 do_flush_stashed_test(cache_bin_t *bin, void **ptrs, cache_bin_sz_t nfill,
     cache_bin_sz_t nstash) {
-	expect_true(cache_bin_ncached_get_local(bin) == 0,
-	    "Bin not empty");
-	expect_true(cache_bin_nstashed_get_local(bin) == 0,
-	    "Bin not empty");
-	expect_true(nfill + nstash <= bin->bin_info.ncached_max, "Exceeded max");
+	expect_true(cache_bin_ncached_get_local(bin) == 0, "Bin not empty");
+	expect_true(cache_bin_nstashed_get_local(bin) == 0, "Bin not empty");
+	expect_true(
+	    nfill + nstash <= bin->bin_info.ncached_max, "Exceeded max");
 
 	bool ret;
 	/* Fill */
@@ -274,16 +266,16 @@ do_flush_stashed_test(cache_bin_t *bin, void **ptrs, cache_bin_sz_t nfill,
 		ret = cache_bin_dalloc_easy(bin, &ptrs[i]);
 		expect_true(ret, "Unexpected fill failure");
 	}
-	expect_true(cache_bin_ncached_get_local(bin) == nfill,
-	    "Wrong cached count");
+	expect_true(
+	    cache_bin_ncached_get_local(bin) == nfill, "Wrong cached count");
 
 	/* Stash */
 	for (cache_bin_sz_t i = 0; i < nstash; i++) {
 		ret = cache_bin_stash(bin, &ptrs[i + nfill]);
 		expect_true(ret, "Unexpected stash failure");
 	}
-	expect_true(cache_bin_nstashed_get_local(bin) == nstash,
-	    "Wrong stashed count");
+	expect_true(
+	    cache_bin_nstashed_get_local(bin) == nstash, "Wrong stashed count");
 
 	if (nfill + nstash == bin->bin_info.ncached_max) {
 		ret = cache_bin_dalloc_easy(bin, &ptrs[0]);
@@ -300,20 +292,20 @@ do_flush_stashed_test(cache_bin_t *bin, void **ptrs, cache_bin_sz_t nfill,
 		expect_true((uintptr_t)ptr < (uintptr_t)&ptrs[nfill],
 		    "Should not alloc stashed ptrs");
 	}
-	expect_true(cache_bin_ncached_get_local(bin) == 0,
-	    "Wrong cached count");
-	expect_true(cache_bin_nstashed_get_local(bin) == nstash,
-	    "Wrong stashed count");
+	expect_true(
+	    cache_bin_ncached_get_local(bin) == 0, "Wrong cached count");
+	expect_true(
+	    cache_bin_nstashed_get_local(bin) == nstash, "Wrong stashed count");
 
 	cache_bin_alloc(bin, &ret);
 	expect_false(ret, "Should not alloc stashed");
 
 	/* Clear stashed ones */
 	cache_bin_finish_flush_stashed(bin);
-	expect_true(cache_bin_ncached_get_local(bin) == 0,
-	    "Wrong cached count");
-	expect_true(cache_bin_nstashed_get_local(bin) == 0,
-	    "Wrong stashed count");
+	expect_true(
+	    cache_bin_ncached_get_local(bin) == 0, "Wrong cached count");
+	expect_true(
+	    cache_bin_nstashed_get_local(bin) == 0, "Wrong stashed count");
 
 	cache_bin_alloc(bin, &ret);
 	expect_false(ret, "Should not alloc from empty bin");
@@ -322,7 +314,7 @@ do_flush_stashed_test(cache_bin_t *bin, void **ptrs, cache_bin_sz_t nfill,
 TEST_BEGIN(test_cache_bin_stash) {
 	const int ncached_max = 100;
 
-	cache_bin_t bin;
+	cache_bin_t      bin;
 	cache_bin_info_t info;
 	cache_bin_info_init(&info, ncached_max);
 	test_bin_init(&bin, &info);
@@ -335,15 +327,17 @@ TEST_BEGIN(test_cache_bin_stash) {
 	assert_ptr_not_null(ptrs, "Unexpected mallocx failure");
 	bool ret;
 	for (cache_bin_sz_t i = 0; i < ncached_max; i++) {
-		expect_true(cache_bin_ncached_get_local(&bin) ==
-		    (i / 2 + i % 2), "Wrong ncached value");
-		expect_true(cache_bin_nstashed_get_local(&bin) ==
-		    i / 2, "Wrong nstashed value");
+		expect_true(
+		    cache_bin_ncached_get_local(&bin) == (i / 2 + i % 2),
+		    "Wrong ncached value");
+		expect_true(cache_bin_nstashed_get_local(&bin) == i / 2,
+		    "Wrong nstashed value");
 		if (i % 2 == 0) {
 			cache_bin_dalloc_easy(&bin, &ptrs[i]);
 		} else {
 			ret = cache_bin_stash(&bin, &ptrs[i]);
-			expect_true(ret, "Should be able to stash into a "
+			expect_true(ret,
+			    "Should be able to stash into a "
 			    "non-full cache bin");
 		}
 	}
@@ -360,7 +354,8 @@ TEST_BEGIN(test_cache_bin_stash) {
 			expect_true(diff % 2 == 0, "Should be able to alloc");
 		} else {
 			expect_false(ret, "Should not alloc stashed");
-			expect_true(cache_bin_nstashed_get_local(&bin) == ncached_max / 2,
+			expect_true(cache_bin_nstashed_get_local(&bin)
+			        == ncached_max / 2,
 			    "Wrong nstashed value");
 		}
 	}
@@ -368,19 +363,14 @@ TEST_BEGIN(test_cache_bin_stash) {
 	test_bin_init(&bin, &info);
 	do_flush_stashed_test(&bin, ptrs, ncached_max, 0);
 	do_flush_stashed_test(&bin, ptrs, 0, ncached_max);
-	do_flush_stashed_test(&bin, ptrs, ncached_max / 2,
-	    ncached_max / 2);
-	do_flush_stashed_test(&bin, ptrs, ncached_max / 4,
-	    ncached_max / 2);
-	do_flush_stashed_test(&bin, ptrs, ncached_max / 2,
-	    ncached_max / 4);
-	do_flush_stashed_test(&bin, ptrs, ncached_max / 4,
-	    ncached_max / 4);
+	do_flush_stashed_test(&bin, ptrs, ncached_max / 2, ncached_max / 2);
+	do_flush_stashed_test(&bin, ptrs, ncached_max / 4, ncached_max / 2);
+	do_flush_stashed_test(&bin, ptrs, ncached_max / 2, ncached_max / 4);
+	do_flush_stashed_test(&bin, ptrs, ncached_max / 4, ncached_max / 4);
 }
 TEST_END
 
 int
 main(void) {
-	return test(test_cache_bin,
-		test_cache_bin_stash);
+	return test(test_cache_bin, test_cache_bin_stash);
 }
diff --git a/test/unit/ckh.c b/test/unit/ckh.c
index 36142acd..f07892ac 100644
--- a/test/unit/ckh.c
+++ b/test/unit/ckh.c
@@ -2,55 +2,51 @@
 
 TEST_BEGIN(test_new_delete) {
 	tsd_t *tsd;
-	ckh_t ckh;
+	ckh_t  ckh;
 
 	tsd = tsd_fetch();
 
-	expect_false(ckh_new(tsd, &ckh, 2, ckh_string_hash,
-	    ckh_string_keycomp), "Unexpected ckh_new() error");
+	expect_false(ckh_new(tsd, &ckh, 2, ckh_string_hash, ckh_string_keycomp),
+	    "Unexpected ckh_new() error");
 	ckh_delete(tsd, &ckh);
 
-	expect_false(ckh_new(tsd, &ckh, 3, ckh_pointer_hash,
-	    ckh_pointer_keycomp), "Unexpected ckh_new() error");
+	expect_false(
+	    ckh_new(tsd, &ckh, 3, ckh_pointer_hash, ckh_pointer_keycomp),
+	    "Unexpected ckh_new() error");
 	ckh_delete(tsd, &ckh);
 }
 TEST_END
 
 TEST_BEGIN(test_count_insert_search_remove) {
-	tsd_t *tsd;
-	ckh_t ckh;
-	const char *strs[] = {
-	    "a string",
-	    "A string",
-	    "a string.",
-	    "A string."
-	};
+	tsd_t      *tsd;
+	ckh_t       ckh;
+	const char *strs[] = {"a string", "A string", "a string.", "A string."};
 	const char *missing = "A string not in the hash table.";
-	size_t i;
+	size_t      i;
 
 	tsd = tsd_fetch();
 
-	expect_false(ckh_new(tsd, &ckh, 2, ckh_string_hash,
-	    ckh_string_keycomp), "Unexpected ckh_new() error");
+	expect_false(ckh_new(tsd, &ckh, 2, ckh_string_hash, ckh_string_keycomp),
+	    "Unexpected ckh_new() error");
 	expect_zu_eq(ckh_count(&ckh), 0,
 	    "ckh_count() should return %zu, but it returned %zu", ZU(0),
 	    ckh_count(&ckh));
 
 	/* Insert. */
-	for (i = 0; i < sizeof(strs)/sizeof(const char *); i++) {
+	for (i = 0; i < sizeof(strs) / sizeof(const char *); i++) {
 		ckh_insert(tsd, &ckh, strs[i], strs[i]);
-		expect_zu_eq(ckh_count(&ckh), i+1,
-		    "ckh_count() should return %zu, but it returned %zu", i+1,
+		expect_zu_eq(ckh_count(&ckh), i + 1,
+		    "ckh_count() should return %zu, but it returned %zu", i + 1,
 		    ckh_count(&ckh));
 	}
 
 	/* Search. */
-	for (i = 0; i < sizeof(strs)/sizeof(const char *); i++) {
+	for (i = 0; i < sizeof(strs) / sizeof(const char *); i++) {
 		union {
-			void *p;
+			void       *p;
 			const char *s;
 		} k, v;
-		void **kp, **vp;
+		void      **kp, **vp;
 		const char *ks, *vs;
 
 		kp = (i & 1) ? &k.p : NULL;
@@ -62,21 +58,21 @@ TEST_BEGIN(test_count_insert_search_remove) {
 
 		ks = (i & 1) ? strs[i] : (const char *)NULL;
 		vs = (i & 2) ? strs[i] : (const char *)NULL;
-		expect_ptr_eq((void *)ks, (void *)k.s, "Key mismatch, i=%zu",
-		    i);
-		expect_ptr_eq((void *)vs, (void *)v.s, "Value mismatch, i=%zu",
-		    i);
+		expect_ptr_eq(
+		    (void *)ks, (void *)k.s, "Key mismatch, i=%zu", i);
+		expect_ptr_eq(
+		    (void *)vs, (void *)v.s, "Value mismatch, i=%zu", i);
 	}
 	expect_true(ckh_search(&ckh, missing, NULL, NULL),
 	    "Unexpected ckh_search() success");
 
 	/* Remove. */
-	for (i = 0; i < sizeof(strs)/sizeof(const char *); i++) {
+	for (i = 0; i < sizeof(strs) / sizeof(const char *); i++) {
 		union {
-			void *p;
+			void       *p;
 			const char *s;
 		} k, v;
-		void **kp, **vp;
+		void      **kp, **vp;
 		const char *ks, *vs;
 
 		kp = (i & 1) ? &k.p : NULL;
@@ -88,14 +84,14 @@ TEST_BEGIN(test_count_insert_search_remove) {
 
 		ks = (i & 1) ? strs[i] : (const char *)NULL;
 		vs = (i & 2) ? strs[i] : (const char *)NULL;
-		expect_ptr_eq((void *)ks, (void *)k.s, "Key mismatch, i=%zu",
-		    i);
-		expect_ptr_eq((void *)vs, (void *)v.s, "Value mismatch, i=%zu",
-		    i);
+		expect_ptr_eq(
+		    (void *)ks, (void *)k.s, "Key mismatch, i=%zu", i);
+		expect_ptr_eq(
+		    (void *)vs, (void *)v.s, "Value mismatch, i=%zu", i);
 		expect_zu_eq(ckh_count(&ckh),
-		    sizeof(strs)/sizeof(const char *) - i - 1,
+		    sizeof(strs) / sizeof(const char *) - i - 1,
 		    "ckh_count() should return %zu, but it returned %zu",
-		        sizeof(strs)/sizeof(const char *) - i - 1,
+		    sizeof(strs) / sizeof(const char *) - i - 1,
 		    ckh_count(&ckh));
 	}
 
@@ -106,18 +102,19 @@ TEST_END
 TEST_BEGIN(test_insert_iter_remove) {
 #define NITEMS ZU(1000)
 	tsd_t *tsd;
-	ckh_t ckh;
+	ckh_t  ckh;
 	void **p[NITEMS];
-	void *q, *r;
+	void  *q, *r;
 	size_t i;
 
 	tsd = tsd_fetch();
 
-	expect_false(ckh_new(tsd, &ckh, 2, ckh_pointer_hash,
-	    ckh_pointer_keycomp), "Unexpected ckh_new() error");
+	expect_false(
+	    ckh_new(tsd, &ckh, 2, ckh_pointer_hash, ckh_pointer_keycomp),
+	    "Unexpected ckh_new() error");
 
 	for (i = 0; i < NITEMS; i++) {
-		p[i] = mallocx(i+1, 0);
+		p[i] = mallocx(i + 1, 0);
 		expect_ptr_not_null(p[i], "Unexpected mallocx() failure");
 	}
 
@@ -151,7 +148,7 @@ TEST_BEGIN(test_insert_iter_remove) {
 		}
 
 		{
-			bool seen[NITEMS];
+			bool   seen[NITEMS];
 			size_t tabind;
 
 			memset(seen, 0, sizeof(seen));
@@ -195,8 +192,8 @@ TEST_BEGIN(test_insert_iter_remove) {
 	}
 
 	expect_zu_eq(ckh_count(&ckh), 0,
-	    "ckh_count() should return %zu, but it returned %zu",
-	    ZU(0), ckh_count(&ckh));
+	    "ckh_count() should return %zu, but it returned %zu", ZU(0),
+	    ckh_count(&ckh));
 	ckh_delete(tsd, &ckh);
 #undef NITEMS
 }
@@ -204,8 +201,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_new_delete,
-	    test_count_insert_search_remove,
+	return test(test_new_delete, test_count_insert_search_remove,
 	    test_insert_iter_remove);
 }
diff --git a/test/unit/counter.c b/test/unit/counter.c
index 277baac1..04100daa 100644
--- a/test/unit/counter.c
+++ b/test/unit/counter.c
@@ -11,7 +11,7 @@ TEST_BEGIN(test_counter_accum) {
 	counter_accum_init(&c, interval);
 
 	tsd_t *tsd = tsd_fetch();
-	bool trigger;
+	bool   trigger;
 	for (unsigned i = 0; i < n; i++) {
 		trigger = counter_accum(tsd_tsdn(tsd), &c, increment);
 		accum += increment;
@@ -39,8 +39,8 @@ static void *
 thd_start(void *varg) {
 	counter_accum_t *c = (counter_accum_t *)varg;
 
-	tsd_t *tsd = tsd_fetch();
-	bool trigger;
+	tsd_t    *tsd = tsd_fetch();
+	bool      trigger;
 	uintptr_t n_triggered = 0;
 	for (unsigned i = 0; i < N_ITER_THD; i++) {
 		trigger = counter_accum(tsd_tsdn(tsd), c, ITER_INCREMENT);
@@ -50,12 +50,11 @@ thd_start(void *varg) {
 	return (void *)n_triggered;
 }
 
-
 TEST_BEGIN(test_counter_mt) {
 	counter_accum_t shared_c;
 	counter_accum_init(&shared_c, interval);
 
-	thd_t thds[N_THDS];
+	thd_t    thds[N_THDS];
 	unsigned i;
 	for (i = 0; i < N_THDS; i++) {
 		thd_create(&thds[i], thd_start, (void *)&shared_c);
@@ -74,7 +73,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_counter_accum,
-	    test_counter_mt);
+	return test(test_counter_accum, test_counter_mt);
 }
diff --git a/test/unit/decay.c b/test/unit/decay.c
index bdb6d0a3..10740a85 100644
--- a/test/unit/decay.c
+++ b/test/unit/decay.c
@@ -22,12 +22,11 @@ TEST_BEGIN(test_decay_init) {
 TEST_END
 
 TEST_BEGIN(test_decay_ms_valid) {
-	expect_false(decay_ms_valid(-7),
-	    "Misclassified negative decay as valid");
+	expect_false(
+	    decay_ms_valid(-7), "Misclassified negative decay as valid");
 	expect_true(decay_ms_valid(-1),
 	    "Misclassified -1 (never decay) as invalid decay");
-	expect_true(decay_ms_valid(8943),
-	    "Misclassified valid decay");
+	expect_true(decay_ms_valid(8943), "Misclassified valid decay");
 	if (SSIZE_MAX > NSTIME_SEC_MAX) {
 		expect_false(
 		    decay_ms_valid((ssize_t)(NSTIME_SEC_MAX * KQU(1000) + 39)),
@@ -111,12 +110,12 @@ TEST_BEGIN(test_decay_empty) {
 	assert_false(err, "");
 
 	uint64_t time_between_calls = decay_epoch_duration_ns(&decay) / 5;
-	int nepochs = 0;
+	int      nepochs = 0;
 	for (uint64_t i = 0; i < decay_ns / time_between_calls * 10; i++) {
 		size_t dirty_pages = 0;
 		nstime_init(&curtime, i * time_between_calls);
-		bool epoch_advanced = decay_maybe_advance_epoch(&decay,
-		    &curtime, dirty_pages);
+		bool epoch_advanced = decay_maybe_advance_epoch(
+		    &decay, &curtime, dirty_pages);
 		if (epoch_advanced) {
 			nepochs++;
 			expect_zu_eq(decay_npages_limit_get(&decay), 0,
@@ -158,30 +157,32 @@ TEST_BEGIN(test_decay) {
 	nstime_init(&epochtime, decay_epoch_duration_ns(&decay));
 
 	const size_t dirty_pages_per_epoch = 1000;
-	size_t dirty_pages = 0;
-	uint64_t epoch_ns = decay_epoch_duration_ns(&decay);
-	bool epoch_advanced = false;
+	size_t       dirty_pages = 0;
+	uint64_t     epoch_ns = decay_epoch_duration_ns(&decay);
+	bool         epoch_advanced = false;
 
 	/* Populate backlog with some dirty pages */
 	for (uint64_t i = 0; i < nepoch_init; i++) {
 		nstime_add(&curtime, &epochtime);
 		dirty_pages += dirty_pages_per_epoch;
-		epoch_advanced |= decay_maybe_advance_epoch(&decay, &curtime,
-		    dirty_pages);
+		epoch_advanced |= decay_maybe_advance_epoch(
+		    &decay, &curtime, dirty_pages);
 	}
 	expect_true(epoch_advanced, "Epoch never advanced");
 
 	size_t npages_limit = decay_npages_limit_get(&decay);
-	expect_zu_gt(npages_limit, 0, "npages_limit is incorrectly equal "
+	expect_zu_gt(npages_limit, 0,
+	    "npages_limit is incorrectly equal "
 	    "to zero after dirty pages have been added");
 
 	/* Keep dirty pages unchanged and verify that npages_limit decreases */
 	for (uint64_t i = nepoch_init; i * epoch_ns < decay_ns; ++i) {
 		nstime_add(&curtime, &epochtime);
-		epoch_advanced = decay_maybe_advance_epoch(&decay, &curtime,
-				    dirty_pages);
+		epoch_advanced = decay_maybe_advance_epoch(
+		    &decay, &curtime, dirty_pages);
 		if (epoch_advanced) {
-			size_t npages_limit_new = decay_npages_limit_get(&decay);
+			size_t npages_limit_new = decay_npages_limit_get(
+			    &decay);
 			expect_zu_lt(npages_limit_new, npages_limit,
 			    "napges_limit failed to decay");
 
@@ -189,20 +190,22 @@ TEST_BEGIN(test_decay) {
 		}
 	}
 
-	expect_zu_gt(npages_limit, 0, "npages_limit decayed to zero earlier "
+	expect_zu_gt(npages_limit, 0,
+	    "npages_limit decayed to zero earlier "
 	    "than decay_ms since last dirty page was added");
 
 	/* Completely push all dirty pages out of the backlog */
 	epoch_advanced = false;
 	for (uint64_t i = 0; i < nepoch_init; i++) {
 		nstime_add(&curtime, &epochtime);
-		epoch_advanced |= decay_maybe_advance_epoch(&decay, &curtime,
-		    dirty_pages);
+		epoch_advanced |= decay_maybe_advance_epoch(
+		    &decay, &curtime, dirty_pages);
 	}
 	expect_true(epoch_advanced, "Epoch never advanced");
 
 	npages_limit = decay_npages_limit_get(&decay);
-	expect_zu_eq(npages_limit, 0, "npages_limit didn't decay to 0 after "
+	expect_zu_eq(npages_limit, 0,
+	    "npages_limit didn't decay to 0 after "
 	    "decay_ms since last bump in dirty pages");
 }
 TEST_END
@@ -230,29 +233,29 @@ TEST_BEGIN(test_decay_ns_until_purge) {
 	    "Failed to return unbounded wait time for zero threshold");
 
 	const size_t dirty_pages_per_epoch = 1000;
-	size_t dirty_pages = 0;
-	bool epoch_advanced = false;
+	size_t       dirty_pages = 0;
+	bool         epoch_advanced = false;
 	for (uint64_t i = 0; i < nepoch_init; i++) {
 		nstime_add(&curtime, &epochtime);
 		dirty_pages += dirty_pages_per_epoch;
-		epoch_advanced |= decay_maybe_advance_epoch(&decay, &curtime,
-		    dirty_pages);
+		epoch_advanced |= decay_maybe_advance_epoch(
+		    &decay, &curtime, dirty_pages);
 	}
 	expect_true(epoch_advanced, "Epoch never advanced");
 
-	uint64_t ns_until_purge_all = decay_ns_until_purge(&decay,
-	    dirty_pages, dirty_pages);
+	uint64_t ns_until_purge_all = decay_ns_until_purge(
+	    &decay, dirty_pages, dirty_pages);
 	expect_u64_ge(ns_until_purge_all, decay_ns,
 	    "Incorrectly calculated time to purge all pages");
 
-	uint64_t ns_until_purge_none = decay_ns_until_purge(&decay,
-	    dirty_pages, 0);
+	uint64_t ns_until_purge_none = decay_ns_until_purge(
+	    &decay, dirty_pages, 0);
 	expect_u64_eq(ns_until_purge_none, decay_epoch_duration_ns(&decay) * 2,
 	    "Incorrectly calculated time to purge 0 pages");
 
 	uint64_t npages_threshold = dirty_pages / 2;
-	uint64_t ns_until_purge_half = decay_ns_until_purge(&decay,
-	    dirty_pages, npages_threshold);
+	uint64_t ns_until_purge_half = decay_ns_until_purge(
+	    &decay, dirty_pages, npages_threshold);
 
 	nstime_t waittime;
 	nstime_init(&waittime, ns_until_purge_half);
@@ -263,7 +266,7 @@ TEST_BEGIN(test_decay_ns_until_purge) {
 	expect_zu_lt(npages_limit, dirty_pages,
 	    "npages_limit failed to decrease after waiting");
 	size_t expected = dirty_pages - npages_limit;
-	int deviation = abs((int)expected - (int)(npages_threshold));
+	int    deviation = abs((int)expected - (int)(npages_threshold));
 	expect_d_lt(deviation, (int)(npages_threshold / 2),
 	    "After waiting, number of pages is out of the expected interval "
 	    "[0.5 * npages_threshold .. 1.5 * npages_threshold]");
@@ -272,12 +275,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_decay_init,
-	    test_decay_ms_valid,
-	    test_decay_npages_purge_in,
-	    test_decay_maybe_advance_epoch,
-	    test_decay_empty,
-	    test_decay,
-	    test_decay_ns_until_purge);
+	return test(test_decay_init, test_decay_ms_valid,
+	    test_decay_npages_purge_in, test_decay_maybe_advance_epoch,
+	    test_decay_empty, test_decay, test_decay_ns_until_purge);
 }
diff --git a/test/unit/div.c b/test/unit/div.c
index 29aea665..53447f4a 100644
--- a/test/unit/div.c
+++ b/test/unit/div.c
@@ -11,12 +11,12 @@ TEST_BEGIN(test_div_exhaustive) {
 			max = 1000 * 1000;
 		}
 		for (size_t dividend = 0; dividend < 1000 * divisor;
-		    dividend += divisor) {
-			size_t quotient = div_compute(
-			    &div_info, dividend);
+		     dividend += divisor) {
+			size_t quotient = div_compute(&div_info, dividend);
 			expect_zu_eq(dividend, quotient * divisor,
 			    "With divisor = %zu, dividend = %zu, "
-			    "got quotient %zu", divisor, dividend, quotient);
+			    "got quotient %zu",
+			    divisor, dividend, quotient);
 		}
 	}
 }
@@ -24,6 +24,5 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_div_exhaustive);
+	return test_no_reentrancy(test_div_exhaustive);
 }
diff --git a/test/unit/double_free.c b/test/unit/double_free.c
index b6ae8f75..4bd6ab73 100644
--- a/test/unit/double_free.c
+++ b/test/unit/double_free.c
@@ -4,7 +4,8 @@
 #include "jemalloc/internal/safety_check.h"
 
 bool fake_abort_called;
-void fake_abort(const char *message) {
+void
+fake_abort(const char *message) {
 	(void)message;
 	fake_abort_called = true;
 }
@@ -23,10 +24,9 @@ test_double_free_post(void) {
 
 static bool
 tcache_enabled(void) {
-	bool enabled;
+	bool   enabled;
 	size_t sz = sizeof(enabled);
-	assert_d_eq(
-	    mallctl("thread.tcache.enabled", &enabled, &sz, NULL, 0), 0,
+	assert_d_eq(mallctl("thread.tcache.enabled", &enabled, &sz, NULL, 0), 0,
 	    "Unexpected mallctl failure");
 	return enabled;
 }
@@ -41,7 +41,7 @@ TEST_BEGIN(test_large_double_free_tcache) {
 
 	test_double_free_pre();
 	char *ptr = malloc(SC_LARGE_MINCLASS);
-	bool guarded = extent_is_guarded(tsdn_fetch(), ptr);
+	bool  guarded = extent_is_guarded(tsdn_fetch(), ptr);
 	free(ptr);
 	if (!guarded) {
 		free(ptr);
@@ -64,7 +64,7 @@ TEST_BEGIN(test_large_double_free_no_tcache) {
 
 	test_double_free_pre();
 	char *ptr = mallocx(SC_LARGE_MINCLASS, MALLOCX_TCACHE_NONE);
-	bool guarded = extent_is_guarded(tsdn_fetch(), ptr);
+	bool  guarded = extent_is_guarded(tsdn_fetch(), ptr);
 	dallocx(ptr, MALLOCX_TCACHE_NONE);
 	if (!guarded) {
 		dallocx(ptr, MALLOCX_TCACHE_NONE);
@@ -87,7 +87,7 @@ TEST_BEGIN(test_small_double_free_tcache) {
 
 	test_double_free_pre();
 	char *ptr = malloc(1);
-	bool guarded = extent_is_guarded(tsdn_fetch(), ptr);
+	bool  guarded = extent_is_guarded(tsdn_fetch(), ptr);
 	free(ptr);
 	if (!guarded) {
 		free(ptr);
@@ -115,7 +115,7 @@ TEST_BEGIN(test_small_double_free_arena) {
 	 */
 	char *ptr1 = malloc(1);
 	char *ptr = malloc(1);
-	bool guarded = extent_is_guarded(tsdn_fetch(), ptr);
+	bool  guarded = extent_is_guarded(tsdn_fetch(), ptr);
 	free(ptr);
 	if (!guarded) {
 		mallctl("thread.tcache.flush", NULL, NULL, NULL, 0);
@@ -135,9 +135,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_large_double_free_no_tcache,
-	    test_large_double_free_tcache,
-	    test_small_double_free_tcache,
+	return test(test_large_double_free_no_tcache,
+	    test_large_double_free_tcache, test_small_double_free_tcache,
 	    test_small_double_free_arena);
 }
diff --git a/test/unit/edata_cache.c b/test/unit/edata_cache.c
index af1110a9..16ed58b2 100644
--- a/test/unit/edata_cache.c
+++ b/test/unit/edata_cache.c
@@ -49,16 +49,16 @@ TEST_END
 
 static size_t
 ecf_count(edata_cache_fast_t *ecf) {
-	size_t count = 0;
+	size_t   count = 0;
 	edata_t *cur;
-	ql_foreach(cur, &ecf->list.head, ql_link_inactive) {
+	ql_foreach (cur, &ecf->list.head, ql_link_inactive) {
 		count++;
 	}
 	return count;
 }
 
 TEST_BEGIN(test_edata_cache_fast_simple) {
-	edata_cache_t ec;
+	edata_cache_t      ec;
 	edata_cache_fast_t ecf;
 
 	test_edata_cache_init(&ec);
@@ -96,7 +96,7 @@ TEST_BEGIN(test_edata_cache_fast_simple) {
 TEST_END
 
 TEST_BEGIN(test_edata_cache_fill) {
-	edata_cache_t ec;
+	edata_cache_t      ec;
 	edata_cache_fast_t ecf;
 
 	test_edata_cache_init(&ec);
@@ -179,7 +179,7 @@ TEST_BEGIN(test_edata_cache_fill) {
 TEST_END
 
 TEST_BEGIN(test_edata_cache_disable) {
-	edata_cache_t ec;
+	edata_cache_t      ec;
 	edata_cache_fast_t ecf;
 
 	test_edata_cache_init(&ec);
@@ -198,7 +198,8 @@ TEST_BEGIN(test_edata_cache_disable) {
 
 	expect_zu_eq(0, ecf_count(&ecf), "");
 	expect_zu_eq(EDATA_CACHE_FAST_FILL,
-	    atomic_load_zu(&ec.count, ATOMIC_RELAXED), "Disabling should flush");
+	    atomic_load_zu(&ec.count, ATOMIC_RELAXED),
+	    "Disabling should flush");
 
 	edata_t *edata = edata_cache_fast_get(TSDN_NULL, &ecf);
 	expect_zu_eq(0, ecf_count(&ecf), "");
@@ -218,9 +219,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_edata_cache,
-	    test_edata_cache_fast_simple,
-	    test_edata_cache_fill,
-	    test_edata_cache_disable);
+	return test(test_edata_cache, test_edata_cache_fast_simple,
+	    test_edata_cache_fill, test_edata_cache_disable);
 }
diff --git a/test/unit/emitter.c b/test/unit/emitter.c
index af0da90d..dc53b9eb 100644
--- a/test/unit/emitter.c
+++ b/test/unit/emitter.c
@@ -12,9 +12,9 @@ static bool print_escaped = false;
 
 typedef struct buf_descriptor_s buf_descriptor_t;
 struct buf_descriptor_s {
-	char *buf;
+	char  *buf;
 	size_t len;
-	bool mid_quote;
+	bool   mid_quote;
 };
 
 /*
@@ -56,8 +56,8 @@ forwarding_cb(void *buf_descriptor_v, const char *str) {
 		}
 	}
 
-	size_t written = malloc_snprintf(buf_descriptor->buf,
-	    buf_descriptor->len, "%s", str);
+	size_t written = malloc_snprintf(
+	    buf_descriptor->buf, buf_descriptor->len, "%s", str);
 	expect_zu_eq(written, strlen(str), "Buffer overflow!");
 	buf_descriptor->buf += written;
 	buf_descriptor->len -= written;
@@ -66,19 +66,18 @@ forwarding_cb(void *buf_descriptor_v, const char *str) {
 
 static void
 expect_emit_output(void (*emit_fn)(emitter_t *),
-    const char *expected_json_output,
-    const char *expected_json_compact_output,
+    const char *expected_json_output, const char *expected_json_compact_output,
     const char *expected_table_output) {
-	emitter_t emitter;
-	char buf[MALLOC_PRINTF_BUFSIZE];
+	emitter_t        emitter;
+	char             buf[MALLOC_PRINTF_BUFSIZE];
 	buf_descriptor_t buf_descriptor;
 
 	buf_descriptor.buf = buf;
 	buf_descriptor.len = MALLOC_PRINTF_BUFSIZE;
 	buf_descriptor.mid_quote = false;
 
-	emitter_init(&emitter, emitter_output_json, &forwarding_cb,
-	    &buf_descriptor);
+	emitter_init(
+	    &emitter, emitter_output_json, &forwarding_cb, &buf_descriptor);
 	(*emit_fn)(&emitter);
 	expect_str_eq(expected_json_output, buf, "json output failure");
 
@@ -89,24 +88,24 @@ expect_emit_output(void (*emit_fn)(emitter_t *),
 	emitter_init(&emitter, emitter_output_json_compact, &forwarding_cb,
 	    &buf_descriptor);
 	(*emit_fn)(&emitter);
-	expect_str_eq(expected_json_compact_output, buf,
-	    "compact json output failure");
+	expect_str_eq(
+	    expected_json_compact_output, buf, "compact json output failure");
 
 	buf_descriptor.buf = buf;
 	buf_descriptor.len = MALLOC_PRINTF_BUFSIZE;
 	buf_descriptor.mid_quote = false;
 
-	emitter_init(&emitter, emitter_output_table, &forwarding_cb,
-	    &buf_descriptor);
+	emitter_init(
+	    &emitter, emitter_output_table, &forwarding_cb, &buf_descriptor);
 	(*emit_fn)(&emitter);
 	expect_str_eq(expected_table_output, buf, "table output failure");
 }
 
 static void
 emit_dict(emitter_t *emitter) {
-	bool b_false = false;
-	bool b_true = true;
-	int i_123 = 123;
+	bool        b_false = false;
+	bool        b_true = true;
+	int         i_123 = 123;
 	const char *str = "a string";
 
 	emitter_begin(emitter);
@@ -122,48 +121,49 @@ emit_dict(emitter_t *emitter) {
 }
 
 static const char *dict_json =
-"{\n"
-"\t\"foo\": {\n"
-"\t\t\"abc\": false,\n"
-"\t\t\"def\": true,\n"
-"\t\t\"ghi\": 123,\n"
-"\t\t\"jkl\": \"a string\"\n"
-"\t}\n"
-"}\n";
+    "{\n"
+    "\t\"foo\": {\n"
+    "\t\t\"abc\": false,\n"
+    "\t\t\"def\": true,\n"
+    "\t\t\"ghi\": 123,\n"
+    "\t\t\"jkl\": \"a string\"\n"
+    "\t}\n"
+    "}\n";
 static const char *dict_json_compact =
-"{"
-	"\"foo\":{"
-		"\"abc\":false,"
-		"\"def\":true,"
-		"\"ghi\":123,"
-		"\"jkl\":\"a string\""
-	"}"
-"}";
+    "{"
+    "\"foo\":{"
+    "\"abc\":false,"
+    "\"def\":true,"
+    "\"ghi\":123,"
+    "\"jkl\":\"a string\""
+    "}"
+    "}";
 static const char *dict_table =
-"This is the foo table:\n"
-"  ABC: false\n"
-"  DEF: true\n"
-"  GHI: 123 (note_key1: \"a string\")\n"
-"  JKL: \"a string\" (note_key2: false)\n";
+    "This is the foo table:\n"
+    "  ABC: false\n"
+    "  DEF: true\n"
+    "  GHI: 123 (note_key1: \"a string\")\n"
+    "  JKL: \"a string\" (note_key2: false)\n";
 
 static void
 emit_table_printf(emitter_t *emitter) {
 	emitter_begin(emitter);
 	emitter_table_printf(emitter, "Table note 1\n");
-	emitter_table_printf(emitter, "Table note 2 %s\n",
-	    "with format string");
+	emitter_table_printf(
+	    emitter, "Table note 2 %s\n", "with format string");
 	emitter_end(emitter);
 }
 
 static const char *table_printf_json =
-"{\n"
-"}\n";
+    "{\n"
+    "}\n";
 static const char *table_printf_json_compact = "{}";
 static const char *table_printf_table =
-"Table note 1\n"
-"Table note 2 with format string\n";
+    "Table note 1\n"
+    "Table note 2 with format string\n";
 
-static void emit_nested_dict(emitter_t *emitter) {
+static void
+emit_nested_dict(emitter_t *emitter) {
 	int val = 123;
 	emitter_begin(emitter);
 	emitter_dict_begin(emitter, "json1", "Dict 1");
@@ -174,53 +174,53 @@ static void emit_nested_dict(emitter_t *emitter) {
 	emitter_dict_end(emitter); /* Close 3 */
 	emitter_dict_end(emitter); /* Close 1 */
 	emitter_dict_begin(emitter, "json4", "Dict 4");
-	emitter_kv(emitter, "primitive", "Another primitive",
-	    emitter_type_int, &val);
+	emitter_kv(
+	    emitter, "primitive", "Another primitive", emitter_type_int, &val);
 	emitter_dict_end(emitter); /* Close 4 */
 	emitter_end(emitter);
 }
 
 static const char *nested_dict_json =
-"{\n"
-"\t\"json1\": {\n"
-"\t\t\"json2\": {\n"
-"\t\t\t\"primitive\": 123\n"
-"\t\t},\n"
-"\t\t\"json3\": {\n"
-"\t\t}\n"
-"\t},\n"
-"\t\"json4\": {\n"
-"\t\t\"primitive\": 123\n"
-"\t}\n"
-"}\n";
+    "{\n"
+    "\t\"json1\": {\n"
+    "\t\t\"json2\": {\n"
+    "\t\t\t\"primitive\": 123\n"
+    "\t\t},\n"
+    "\t\t\"json3\": {\n"
+    "\t\t}\n"
+    "\t},\n"
+    "\t\"json4\": {\n"
+    "\t\t\"primitive\": 123\n"
+    "\t}\n"
+    "}\n";
 static const char *nested_dict_json_compact =
-"{"
-	"\"json1\":{"
-		"\"json2\":{"
-			"\"primitive\":123"
-		"},"
-		"\"json3\":{"
-		"}"
-	"},"
-	"\"json4\":{"
-		"\"primitive\":123"
-	"}"
-"}";
+    "{"
+    "\"json1\":{"
+    "\"json2\":{"
+    "\"primitive\":123"
+    "},"
+    "\"json3\":{"
+    "}"
+    "},"
+    "\"json4\":{"
+    "\"primitive\":123"
+    "}"
+    "}";
 static const char *nested_dict_table =
-"Dict 1\n"
-"  Dict 2\n"
-"    A primitive: 123\n"
-"  Dict 3\n"
-"Dict 4\n"
-"  Another primitive: 123\n";
+    "Dict 1\n"
+    "  Dict 2\n"
+    "    A primitive: 123\n"
+    "  Dict 3\n"
+    "Dict 4\n"
+    "  Another primitive: 123\n";
 
 static void
 emit_types(emitter_t *emitter) {
-	bool b = false;
-	int i = -123;
-	unsigned u = 123;
-	ssize_t zd = -456;
-	size_t zu = 456;
+	bool        b = false;
+	int         i = -123;
+	unsigned    u = 123;
+	ssize_t     zd = -456;
+	size_t      zu = 456;
 	const char *str = "string";
 	const char *long_str =
 	    "abcdefghijklmnopqrstuvwxyz "
@@ -254,55 +254,55 @@ emit_types(emitter_t *emitter) {
 }
 
 static const char *types_json =
-"{\n"
-"\t\"k1\": false,\n"
-"\t\"k2\": -123,\n"
-"\t\"k3\": 123,\n"
-"\t\"k4\": -456,\n"
-"\t\"k5\": 456,\n"
-"\t\"k6\": \"string\",\n"
-"\t\"k7\": \"abcdefghijklmnopqrstuvwxyz "
-	    "abcdefghijklmnopqrstuvwxyz "
-	    "abcdefghijklmnopqrstuvwxyz "
-	    "abcdefghijklmnopqrstuvwxyz "
-	    "abcdefghijklmnopqrstuvwxyz "
-	    "abcdefghijklmnopqrstuvwxyz "
-	    "abcdefghijklmnopqrstuvwxyz "
-	    "abcdefghijklmnopqrstuvwxyz "
-	    "abcdefghijklmnopqrstuvwxyz "
-	    "abcdefghijklmnopqrstuvwxyz\",\n"
-"\t\"k8\": 789,\n"
-"\t\"k9\": 10000000000\n"
-"}\n";
+    "{\n"
+    "\t\"k1\": false,\n"
+    "\t\"k2\": -123,\n"
+    "\t\"k3\": 123,\n"
+    "\t\"k4\": -456,\n"
+    "\t\"k5\": 456,\n"
+    "\t\"k6\": \"string\",\n"
+    "\t\"k7\": \"abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz\",\n"
+    "\t\"k8\": 789,\n"
+    "\t\"k9\": 10000000000\n"
+    "}\n";
 static const char *types_json_compact =
-"{"
-	"\"k1\":false,"
-	"\"k2\":-123,"
-	"\"k3\":123,"
-	"\"k4\":-456,"
-	"\"k5\":456,"
-	"\"k6\":\"string\","
-	"\"k7\":\"abcdefghijklmnopqrstuvwxyz "
-	    "abcdefghijklmnopqrstuvwxyz "
-	    "abcdefghijklmnopqrstuvwxyz "
-	    "abcdefghijklmnopqrstuvwxyz "
-	    "abcdefghijklmnopqrstuvwxyz "
-	    "abcdefghijklmnopqrstuvwxyz "
-	    "abcdefghijklmnopqrstuvwxyz "
-	    "abcdefghijklmnopqrstuvwxyz "
-	    "abcdefghijklmnopqrstuvwxyz "
-	    "abcdefghijklmnopqrstuvwxyz\","
-	"\"k8\":789,"
-	"\"k9\":10000000000"
-"}";
+    "{"
+    "\"k1\":false,"
+    "\"k2\":-123,"
+    "\"k3\":123,"
+    "\"k4\":-456,"
+    "\"k5\":456,"
+    "\"k6\":\"string\","
+    "\"k7\":\"abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz "
+    "abcdefghijklmnopqrstuvwxyz\","
+    "\"k8\":789,"
+    "\"k9\":10000000000"
+    "}";
 static const char *types_table =
-"K1: false\n"
-"K2: -123\n"
-"K3: 123\n"
-"K4: -456\n"
-"K5: 456\n"
-"K6: \"string\"\n"
-"K7: \"abcdefghijklmnopqrstuvwxyz "
+    "K1: false\n"
+    "K2: -123\n"
+    "K3: 123\n"
+    "K4: -456\n"
+    "K5: 456\n"
+    "K6: \"string\"\n"
+    "K7: \"abcdefghijklmnopqrstuvwxyz "
     "abcdefghijklmnopqrstuvwxyz "
     "abcdefghijklmnopqrstuvwxyz "
     "abcdefghijklmnopqrstuvwxyz "
@@ -312,8 +312,8 @@ static const char *types_table =
     "abcdefghijklmnopqrstuvwxyz "
     "abcdefghijklmnopqrstuvwxyz "
     "abcdefghijklmnopqrstuvwxyz\"\n"
-"K8: 789\n"
-"K9: 10000000000\n";
+    "K8: 789\n"
+    "K9: 10000000000\n";
 
 static void
 emit_modal(emitter_t *emitter) {
@@ -336,37 +336,37 @@ emit_modal(emitter_t *emitter) {
 }
 
 const char *modal_json =
-"{\n"
-"\t\"j0\": {\n"
-"\t\t\"j1\": {\n"
-"\t\t\t\"i1\": 123,\n"
-"\t\t\t\"i2\": 123,\n"
-"\t\t\t\"i4\": 123\n"
-"\t\t},\n"
-"\t\t\"i5\": 123,\n"
-"\t\t\"i6\": 123\n"
-"\t}\n"
-"}\n";
+    "{\n"
+    "\t\"j0\": {\n"
+    "\t\t\"j1\": {\n"
+    "\t\t\t\"i1\": 123,\n"
+    "\t\t\t\"i2\": 123,\n"
+    "\t\t\t\"i4\": 123\n"
+    "\t\t},\n"
+    "\t\t\"i5\": 123,\n"
+    "\t\t\"i6\": 123\n"
+    "\t}\n"
+    "}\n";
 const char *modal_json_compact =
-"{"
-	"\"j0\":{"
-		"\"j1\":{"
-			"\"i1\":123,"
-			"\"i2\":123,"
-			"\"i4\":123"
-		"},"
-		"\"i5\":123,"
-		"\"i6\":123"
-	"}"
-"}";
+    "{"
+    "\"j0\":{"
+    "\"j1\":{"
+    "\"i1\":123,"
+    "\"i2\":123,"
+    "\"i4\":123"
+    "},"
+    "\"i5\":123,"
+    "\"i6\":123"
+    "}"
+    "}";
 const char *modal_table =
-"T0\n"
-"  I1: 123\n"
-"  I3: 123\n"
-"  T1\n"
-"    I4: 123\n"
-"    I5: 123\n"
-"  I6: 123\n";
+    "T0\n"
+    "  I1: 123\n"
+    "  I3: 123\n"
+    "  T1\n"
+    "    I4: 123\n"
+    "    I5: 123\n"
+    "  I6: 123\n";
 
 static void
 emit_json_array(emitter_t *emitter) {
@@ -387,121 +387,124 @@ emit_json_array(emitter_t *emitter) {
 	emitter_json_kv(emitter, "bar", emitter_type_int, &ival);
 	emitter_json_kv(emitter, "baz", emitter_type_int, &ival);
 	emitter_json_object_end(emitter); /* Close arr[3]. */
-	emitter_json_array_end(emitter); /* Close arr. */
+	emitter_json_array_end(emitter);  /* Close arr. */
 	emitter_json_object_end(emitter); /* Close dict. */
 	emitter_end(emitter);
 }
 
 static const char *json_array_json =
-"{\n"
-"\t\"dict\": {\n"
-"\t\t\"arr\": [\n"
-"\t\t\t{\n"
-"\t\t\t\t\"foo\": 123\n"
-"\t\t\t},\n"
-"\t\t\t123,\n"
-"\t\t\t123,\n"
-"\t\t\t{\n"
-"\t\t\t\t\"bar\": 123,\n"
-"\t\t\t\t\"baz\": 123\n"
-"\t\t\t}\n"
-"\t\t]\n"
-"\t}\n"
-"}\n";
+    "{\n"
+    "\t\"dict\": {\n"
+    "\t\t\"arr\": [\n"
+    "\t\t\t{\n"
+    "\t\t\t\t\"foo\": 123\n"
+    "\t\t\t},\n"
+    "\t\t\t123,\n"
+    "\t\t\t123,\n"
+    "\t\t\t{\n"
+    "\t\t\t\t\"bar\": 123,\n"
+    "\t\t\t\t\"baz\": 123\n"
+    "\t\t\t}\n"
+    "\t\t]\n"
+    "\t}\n"
+    "}\n";
 static const char *json_array_json_compact =
-"{"
-	"\"dict\":{"
-		"\"arr\":["
-			"{"
-				"\"foo\":123"
-			"},"
-			"123,"
-			"123,"
-			"{"
-				"\"bar\":123,"
-				"\"baz\":123"
-			"}"
-		"]"
-	"}"
-"}";
+    "{"
+    "\"dict\":{"
+    "\"arr\":["
+    "{"
+    "\"foo\":123"
+    "},"
+    "123,"
+    "123,"
+    "{"
+    "\"bar\":123,"
+    "\"baz\":123"
+    "}"
+    "]"
+    "}"
+    "}";
 static const char *json_array_table = "";
 
 static void
 emit_json_nested_array(emitter_t *emitter) {
-	int ival = 123;
+	int   ival = 123;
 	char *sval = "foo";
 	emitter_begin(emitter);
 	emitter_json_array_begin(emitter);
-		emitter_json_array_begin(emitter);
-		emitter_json_value(emitter, emitter_type_int, &ival);
-		emitter_json_value(emitter, emitter_type_string, &sval);
-		emitter_json_value(emitter, emitter_type_int, &ival);
-		emitter_json_value(emitter, emitter_type_string, &sval);
-		emitter_json_array_end(emitter);
-		emitter_json_array_begin(emitter);
-		emitter_json_value(emitter, emitter_type_int, &ival);
-		emitter_json_array_end(emitter);
-		emitter_json_array_begin(emitter);
-		emitter_json_value(emitter, emitter_type_string, &sval);
-		emitter_json_value(emitter, emitter_type_int, &ival);
-		emitter_json_array_end(emitter);
-		emitter_json_array_begin(emitter);
-		emitter_json_array_end(emitter);
+	emitter_json_array_begin(emitter);
+	emitter_json_value(emitter, emitter_type_int, &ival);
+	emitter_json_value(emitter, emitter_type_string, &sval);
+	emitter_json_value(emitter, emitter_type_int, &ival);
+	emitter_json_value(emitter, emitter_type_string, &sval);
+	emitter_json_array_end(emitter);
+	emitter_json_array_begin(emitter);
+	emitter_json_value(emitter, emitter_type_int, &ival);
+	emitter_json_array_end(emitter);
+	emitter_json_array_begin(emitter);
+	emitter_json_value(emitter, emitter_type_string, &sval);
+	emitter_json_value(emitter, emitter_type_int, &ival);
+	emitter_json_array_end(emitter);
+	emitter_json_array_begin(emitter);
+	emitter_json_array_end(emitter);
 	emitter_json_array_end(emitter);
 	emitter_end(emitter);
 }
 
 static const char *json_nested_array_json =
-"{\n"
-"\t[\n"
-"\t\t[\n"
-"\t\t\t123,\n"
-"\t\t\t\"foo\",\n"
-"\t\t\t123,\n"
-"\t\t\t\"foo\"\n"
-"\t\t],\n"
-"\t\t[\n"
-"\t\t\t123\n"
-"\t\t],\n"
-"\t\t[\n"
-"\t\t\t\"foo\",\n"
-"\t\t\t123\n"
-"\t\t],\n"
-"\t\t[\n"
-"\t\t]\n"
-"\t]\n"
-"}\n";
+    "{\n"
+    "\t[\n"
+    "\t\t[\n"
+    "\t\t\t123,\n"
+    "\t\t\t\"foo\",\n"
+    "\t\t\t123,\n"
+    "\t\t\t\"foo\"\n"
+    "\t\t],\n"
+    "\t\t[\n"
+    "\t\t\t123\n"
+    "\t\t],\n"
+    "\t\t[\n"
+    "\t\t\t\"foo\",\n"
+    "\t\t\t123\n"
+    "\t\t],\n"
+    "\t\t[\n"
+    "\t\t]\n"
+    "\t]\n"
+    "}\n";
 static const char *json_nested_array_json_compact =
-"{"
-	"["
-		"["
-			"123,"
-			"\"foo\","
-			"123,"
-			"\"foo\""
-		"],"
-		"["
-			"123"
-		"],"
-		"["
-			"\"foo\","
-			"123"
-		"],"
-		"["
-		"]"
-	"]"
-"}";
+    "{"
+    "["
+    "["
+    "123,"
+    "\"foo\","
+    "123,"
+    "\"foo\""
+    "],"
+    "["
+    "123"
+    "],"
+    "["
+    "\"foo\","
+    "123"
+    "],"
+    "["
+    "]"
+    "]"
+    "}";
 static const char *json_nested_array_table = "";
 
 static void
 emit_table_row(emitter_t *emitter) {
 	emitter_begin(emitter);
 	emitter_row_t row;
-	emitter_col_t abc = {emitter_justify_left, 10, emitter_type_title, {0}, {0, 0}};
+	emitter_col_t abc = {
+	    emitter_justify_left, 10, emitter_type_title, {0}, {0, 0}};
 	abc.str_val = "ABC title";
-	emitter_col_t def = {emitter_justify_right, 15, emitter_type_title, {0}, {0, 0}};
+	emitter_col_t def = {
+	    emitter_justify_right, 15, emitter_type_title, {0}, {0, 0}};
 	def.str_val = "DEF title";
-	emitter_col_t ghi = {emitter_justify_right, 5, emitter_type_title, {0}, {0, 0}};
+	emitter_col_t ghi = {
+	    emitter_justify_right, 5, emitter_type_title, {0}, {0, 0}};
 	ghi.str_val = "GHI";
 
 	emitter_row_init(&row);
@@ -536,21 +539,21 @@ emit_table_row(emitter_t *emitter) {
 }
 
 static const char *table_row_json =
-"{\n"
-"}\n";
+    "{\n"
+    "}\n";
 static const char *table_row_json_compact = "{}";
 static const char *table_row_table =
-"ABC title       DEF title  GHI\n"
-"123                  true  456\n"
-"789                 false 1011\n"
-"\"a string\"          false  ghi\n";
+    "ABC title       DEF title  GHI\n"
+    "123                  true  456\n"
+    "789                 false 1011\n"
+    "\"a string\"          false  ghi\n";
 
-#define GENERATE_TEST(feature)					\
-TEST_BEGIN(test_##feature) {					\
-	expect_emit_output(emit_##feature, feature##_json,	\
-	    feature##_json_compact, feature##_table);		\
-}								\
-TEST_END
+#define GENERATE_TEST(feature)                                                 \
+	TEST_BEGIN(test_##feature) {                                           \
+		expect_emit_output(emit_##feature, feature##_json,             \
+		    feature##_json_compact, feature##_table);                  \
+	}                                                                      \
+	TEST_END
 
 GENERATE_TEST(dict)
 GENERATE_TEST(table_printf)
@@ -563,13 +566,7 @@ GENERATE_TEST(table_row)
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_dict,
-	    test_table_printf,
-	    test_nested_dict,
-	    test_types,
-	    test_modal,
-	    test_json_array,
-	    test_json_nested_array,
-	    test_table_row);
+	return test_no_reentrancy(test_dict, test_table_printf,
+	    test_nested_dict, test_types, test_modal, test_json_array,
+	    test_json_nested_array, test_table_row);
 }
diff --git a/test/unit/extent_quantize.c b/test/unit/extent_quantize.c
index e6bbd539..c178240e 100644
--- a/test/unit/extent_quantize.c
+++ b/test/unit/extent_quantize.c
@@ -2,9 +2,9 @@
 
 TEST_BEGIN(test_small_extent_size) {
 	unsigned nbins, i;
-	size_t sz, extent_size;
-	size_t mib[4];
-	size_t miblen = sizeof(mib) / sizeof(size_t);
+	size_t   sz, extent_size;
+	size_t   mib[4];
+	size_t   miblen = sizeof(mib) / sizeof(size_t);
 
 	/*
 	 * Iterate over all small size classes, get their extent sizes, and
@@ -21,25 +21,26 @@ TEST_BEGIN(test_small_extent_size) {
 		mib[2] = i;
 		sz = sizeof(size_t);
 		expect_d_eq(mallctlbymib(mib, miblen, (void *)&extent_size, &sz,
-		    NULL, 0), 0, "Unexpected mallctlbymib failure");
-		expect_zu_eq(extent_size,
-		    sz_psz_quantize_floor(extent_size),
+		                NULL, 0),
+		    0, "Unexpected mallctlbymib failure");
+		expect_zu_eq(extent_size, sz_psz_quantize_floor(extent_size),
 		    "Small extent quantization should be a no-op "
-		    "(extent_size=%zu)", extent_size);
-		expect_zu_eq(extent_size,
-		    sz_psz_quantize_ceil(extent_size),
+		    "(extent_size=%zu)",
+		    extent_size);
+		expect_zu_eq(extent_size, sz_psz_quantize_ceil(extent_size),
 		    "Small extent quantization should be a no-op "
-		    "(extent_size=%zu)", extent_size);
+		    "(extent_size=%zu)",
+		    extent_size);
 	}
 }
 TEST_END
 
 TEST_BEGIN(test_large_extent_size) {
-	bool cache_oblivious;
+	bool     cache_oblivious;
 	unsigned nlextents, i;
-	size_t sz, extent_size_prev, ceil_prev;
-	size_t mib[4];
-	size_t miblen = sizeof(mib) / sizeof(size_t);
+	size_t   sz, extent_size_prev, ceil_prev;
+	size_t   mib[4];
+	size_t   miblen = sizeof(mib) / sizeof(size_t);
 
 	/*
 	 * Iterate over all large size classes, get their extent sizes, and
@@ -48,11 +49,13 @@ TEST_BEGIN(test_large_extent_size) {
 
 	sz = sizeof(bool);
 	expect_d_eq(mallctl("opt.cache_oblivious", (void *)&cache_oblivious,
-	    &sz, NULL, 0), 0, "Unexpected mallctl failure");
+	                &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
 
 	sz = sizeof(unsigned);
-	expect_d_eq(mallctl("arenas.nlextents", (void *)&nlextents, &sz, NULL,
-	    0), 0, "Unexpected mallctl failure");
+	expect_d_eq(
+	    mallctl("arenas.nlextents", (void *)&nlextents, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
 
 	expect_d_eq(mallctlnametomib("arenas.lextent.0.size", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib failure");
@@ -62,20 +65,21 @@ TEST_BEGIN(test_large_extent_size) {
 		mib[2] = i;
 		sz = sizeof(size_t);
 		expect_d_eq(mallctlbymib(mib, miblen, (void *)&lextent_size,
-		    &sz, NULL, 0), 0, "Unexpected mallctlbymib failure");
-		extent_size = cache_oblivious ? lextent_size + PAGE :
-		    lextent_size;
+		                &sz, NULL, 0),
+		    0, "Unexpected mallctlbymib failure");
+		extent_size = cache_oblivious ? lextent_size + PAGE
+		                              : lextent_size;
 		floor = sz_psz_quantize_floor(extent_size);
 		ceil = sz_psz_quantize_ceil(extent_size);
 
 		expect_zu_eq(extent_size, floor,
 		    "Extent quantization should be a no-op for precise size "
-		    "(lextent_size=%zu, extent_size=%zu)", lextent_size,
-		    extent_size);
+		    "(lextent_size=%zu, extent_size=%zu)",
+		    lextent_size, extent_size);
 		expect_zu_eq(extent_size, ceil,
 		    "Extent quantization should be a no-op for precise size "
-		    "(lextent_size=%zu, extent_size=%zu)", lextent_size,
-		    extent_size);
+		    "(lextent_size=%zu, extent_size=%zu)",
+		    lextent_size, extent_size);
 
 		if (i > 0) {
 			expect_zu_eq(extent_size_prev,
@@ -85,23 +89,22 @@ TEST_BEGIN(test_large_extent_size) {
 				expect_zu_eq(ceil_prev, extent_size,
 				    "Ceiling should be a precise size "
 				    "(extent_size_prev=%zu, ceil_prev=%zu, "
-				    "extent_size=%zu)", extent_size_prev,
-				    ceil_prev, extent_size);
+				    "extent_size=%zu)",
+				    extent_size_prev, ceil_prev, extent_size);
 			}
 		}
 		if (i + 1 < nlextents) {
 			extent_size_prev = floor;
-			ceil_prev = sz_psz_quantize_ceil(extent_size +
-			    PAGE);
+			ceil_prev = sz_psz_quantize_ceil(extent_size + PAGE);
 		}
 	}
 }
 TEST_END
 
 TEST_BEGIN(test_monotonic) {
-#define SZ_MAX	ZU(4 * 1024 * 1024)
+#define SZ_MAX ZU(4 * 1024 * 1024)
 	unsigned i;
-	size_t floor_prev, ceil_prev;
+	size_t   floor_prev, ceil_prev;
 
 	floor_prev = 0;
 	ceil_prev = 0;
@@ -117,12 +120,15 @@ TEST_BEGIN(test_monotonic) {
 		    floor, extent_size, ceil);
 		expect_zu_ge(ceil, extent_size,
 		    "Ceiling should be >= (floor=%zu, extent_size=%zu, "
-		    "ceil=%zu)", floor, extent_size, ceil);
+		    "ceil=%zu)",
+		    floor, extent_size, ceil);
 
-		expect_zu_le(floor_prev, floor, "Floor should be monotonic "
+		expect_zu_le(floor_prev, floor,
+		    "Floor should be monotonic "
 		    "(floor_prev=%zu, floor=%zu, extent_size=%zu, ceil=%zu)",
 		    floor_prev, floor, extent_size, ceil);
-		expect_zu_le(ceil_prev, ceil, "Ceiling should be monotonic "
+		expect_zu_le(ceil_prev, ceil,
+		    "Ceiling should be monotonic "
 		    "(floor=%zu, extent_size=%zu, ceil_prev=%zu, ceil=%zu)",
 		    floor, extent_size, ceil_prev, ceil);
 
@@ -135,7 +141,5 @@ TEST_END
 int
 main(void) {
 	return test(
-	    test_small_extent_size,
-	    test_large_extent_size,
-	    test_monotonic);
+	    test_small_extent_size, test_large_extent_size, test_monotonic);
 }
diff --git a/test/unit/fb.c b/test/unit/fb.c
index ad72c75a..26a33fd9 100644
--- a/test/unit/fb.c
+++ b/test/unit/fb.c
@@ -5,21 +5,19 @@
 
 static void
 do_test_init(size_t nbits) {
-	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	size_t      sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
 	fb_group_t *fb = malloc(sz);
 	/* Junk fb's contents. */
 	memset(fb, 99, sz);
 	fb_init(fb, nbits);
 	for (size_t i = 0; i < nbits; i++) {
-		expect_false(fb_get(fb, nbits, i),
-		    "bitmap should start empty");
+		expect_false(fb_get(fb, nbits, i), "bitmap should start empty");
 	}
 	free(fb);
 }
 
 TEST_BEGIN(test_fb_init) {
-#define NB(nbits) \
-	do_test_init(nbits);
+#define NB(nbits) do_test_init(nbits);
 	NBITS_TAB
 #undef NB
 }
@@ -27,7 +25,7 @@ TEST_END
 
 static void
 do_test_get_set_unset(size_t nbits) {
-	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	size_t      sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
 	fb_group_t *fb = malloc(sz);
 	fb_init(fb, nbits);
 	/* Set the bits divisible by 3. */
@@ -56,8 +54,7 @@ do_test_get_set_unset(size_t nbits) {
 }
 
 TEST_BEGIN(test_get_set_unset) {
-#define NB(nbits) \
-	do_test_get_set_unset(nbits);
+#define NB(nbits) do_test_get_set_unset(nbits);
 	NBITS_TAB
 #undef NB
 }
@@ -65,7 +62,7 @@ TEST_END
 
 static ssize_t
 find_3_5_compute(ssize_t i, size_t nbits, bool bit, bool forward) {
-	for(; i < (ssize_t)nbits && i >= 0; i += (forward ? 1 : -1)) {
+	for (; i < (ssize_t)nbits && i >= 0; i += (forward ? 1 : -1)) {
 		bool expected_bit = i % 3 == 0 || i % 5 == 0;
 		if (expected_bit == bit) {
 			return i;
@@ -76,7 +73,7 @@ find_3_5_compute(ssize_t i, size_t nbits, bool bit, bool forward) {
 
 static void
 do_test_search_simple(size_t nbits) {
-	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	size_t      sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
 	fb_group_t *fb = malloc(sz);
 	fb_init(fb, nbits);
 
@@ -96,7 +93,7 @@ do_test_search_simple(size_t nbits) {
 		expect_zu_eq(ffs_compute, ffs_search, "ffs mismatch at %zu", i);
 
 		ssize_t fls_compute = find_3_5_compute(i, nbits, true, false);
-		size_t fls_search = fb_fls(fb, nbits, i);
+		size_t  fls_search = fb_fls(fb, nbits, i);
 		expect_zu_eq(fls_compute, fls_search, "fls mismatch at %zu", i);
 
 		size_t ffu_compute = find_3_5_compute(i, nbits, false, true);
@@ -112,8 +109,7 @@ do_test_search_simple(size_t nbits) {
 }
 
 TEST_BEGIN(test_search_simple) {
-#define NB(nbits) \
-	do_test_search_simple(nbits);
+#define NB(nbits) do_test_search_simple(nbits);
 	NBITS_TAB
 #undef NB
 }
@@ -145,15 +141,17 @@ expect_exhaustive_results(fb_group_t *mostly_full, fb_group_t *mostly_empty,
 		    "mismatch at %zu, %zu", position, special_bit);
 		expect_zd_eq(special_bit, fb_fls(mostly_empty, nbits, position),
 		    "mismatch at %zu, %zu", position, special_bit);
-		expect_zu_eq(position + 1, fb_ffu(mostly_empty, nbits, position),
+		expect_zu_eq(position + 1,
+		    fb_ffu(mostly_empty, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(position - 1,
+		    fb_flu(mostly_empty, nbits, position),
 		    "mismatch at %zu, %zu", position, special_bit);
-		expect_zd_eq(position - 1, fb_flu(mostly_empty, nbits,
-		    position), "mismatch at %zu, %zu", position, special_bit);
 
 		expect_zu_eq(position + 1, fb_ffs(mostly_full, nbits, position),
 		    "mismatch at %zu, %zu", position, special_bit);
-		expect_zd_eq(position - 1, fb_fls(mostly_full, nbits,
-		    position), "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(position - 1, fb_fls(mostly_full, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
 		expect_zu_eq(position, fb_ffu(mostly_full, nbits, position),
 		    "mismatch at %zu, %zu", position, special_bit);
 		expect_zd_eq(position, fb_flu(mostly_full, nbits, position),
@@ -162,8 +160,8 @@ expect_exhaustive_results(fb_group_t *mostly_full, fb_group_t *mostly_empty,
 		/* position > special_bit. */
 		expect_zu_eq(nbits, fb_ffs(mostly_empty, nbits, position),
 		    "mismatch at %zu, %zu", position, special_bit);
-		expect_zd_eq(special_bit, fb_fls(mostly_empty, nbits,
-		    position), "mismatch at %zu, %zu", position, special_bit);
+		expect_zd_eq(special_bit, fb_fls(mostly_empty, nbits, position),
+		    "mismatch at %zu, %zu", position, special_bit);
 		expect_zu_eq(position, fb_ffu(mostly_empty, nbits, position),
 		    "mismatch at %zu, %zu", position, special_bit);
 		expect_zd_eq(position, fb_flu(mostly_empty, nbits, position),
@@ -186,7 +184,7 @@ do_test_search_exhaustive(size_t nbits) {
 	if (nbits > 1000) {
 		return;
 	}
-	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	size_t      sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
 	fb_group_t *empty = malloc(sz);
 	fb_init(empty, nbits);
 	fb_group_t *full = malloc(sz);
@@ -209,8 +207,7 @@ do_test_search_exhaustive(size_t nbits) {
 }
 
 TEST_BEGIN(test_search_exhaustive) {
-#define NB(nbits) \
-	do_test_search_exhaustive(nbits);
+#define NB(nbits) do_test_search_exhaustive(nbits);
 	NBITS_TAB
 #undef NB
 }
@@ -222,8 +219,8 @@ TEST_BEGIN(test_range_simple) {
 	 * big enough that usages of things like weirdnum (below) near the
 	 * beginning fit comfortably into the beginning of the bitmap.
 	 */
-	size_t nbits = 64 * 10;
-	size_t ngroups = FB_NGROUPS(nbits);
+	size_t      nbits = 64 * 10;
+	size_t      ngroups = FB_NGROUPS(nbits);
 	fb_group_t *fb = malloc(sizeof(fb_group_t) * ngroups);
 	fb_init(fb, nbits);
 	for (size_t i = 0; i < nbits; i++) {
@@ -255,7 +252,7 @@ TEST_END
 
 static void
 do_test_empty_full_exhaustive(size_t nbits) {
-	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	size_t      sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
 	fb_group_t *empty = malloc(sz);
 	fb_init(empty, nbits);
 	fb_group_t *full = malloc(sz);
@@ -273,15 +270,15 @@ do_test_empty_full_exhaustive(size_t nbits) {
 
 		expect_false(fb_empty(empty, nbits), "error at bit %zu", i);
 		if (nbits != 1) {
-			expect_false(fb_full(empty, nbits),
-			    "error at bit %zu", i);
-			expect_false(fb_empty(full, nbits),
-			    "error at bit %zu", i);
+			expect_false(
+			    fb_full(empty, nbits), "error at bit %zu", i);
+			expect_false(
+			    fb_empty(full, nbits), "error at bit %zu", i);
 		} else {
-			expect_true(fb_full(empty, nbits),
-			    "error at bit %zu", i);
-			expect_true(fb_empty(full, nbits),
-			    "error at bit %zu", i);
+			expect_true(
+			    fb_full(empty, nbits), "error at bit %zu", i);
+			expect_true(
+			    fb_empty(full, nbits), "error at bit %zu", i);
 		}
 		expect_false(fb_full(full, nbits), "error at bit %zu", i);
 
@@ -294,8 +291,7 @@ do_test_empty_full_exhaustive(size_t nbits) {
 }
 
 TEST_BEGIN(test_empty_full) {
-#define NB(nbits) \
-	do_test_empty_full_exhaustive(nbits);
+#define NB(nbits) do_test_empty_full_exhaustive(nbits);
 	NBITS_TAB
 #undef NB
 }
@@ -306,8 +302,8 @@ TEST_END
  * built closely on top of it.
  */
 TEST_BEGIN(test_iter_range_simple) {
-	size_t set_limit = 30;
-	size_t nbits = 100;
+	size_t     set_limit = 30;
+	size_t     nbits = 100;
 	fb_group_t fb[FB_NGROUPS(100)];
 
 	fb_init(fb, nbits);
@@ -318,7 +314,7 @@ TEST_BEGIN(test_iter_range_simple) {
 	 */
 	size_t begin = (size_t)-1;
 	size_t len = (size_t)-1;
-	bool result;
+	bool   result;
 
 	/* A set of checks with only the first set_limit bits *set*. */
 	fb_set_range(fb, nbits, 0, set_limit);
@@ -410,7 +406,6 @@ TEST_BEGIN(test_iter_range_simple) {
 		expect_zu_eq(0, begin, "Incorrect begin at %zu", i);
 		expect_zu_eq(set_limit, len, "Incorrect len at %zu", i);
 	}
-
 }
 TEST_END
 
@@ -426,11 +421,11 @@ fb_iter_simple(fb_group_t *fb, size_t nbits, size_t start, size_t *r_begin,
 	ssize_t stride = (forward ? (ssize_t)1 : (ssize_t)-1);
 	ssize_t range_begin = (ssize_t)start;
 	for (; range_begin != (ssize_t)nbits && range_begin != -1;
-	    range_begin += stride) {
+	     range_begin += stride) {
 		if (fb_get(fb, nbits, range_begin) == val) {
 			ssize_t range_end = range_begin;
 			for (; range_end != (ssize_t)nbits && range_end != -1;
-			    range_end += stride) {
+			     range_end += stride) {
 				if (fb_get(fb, nbits, range_end) != val) {
 					break;
 				}
@@ -470,26 +465,26 @@ fb_range_longest_simple(fb_group_t *fb, size_t nbits, bool val) {
 }
 
 static void
-expect_iter_results_at(fb_group_t *fb, size_t nbits, size_t pos,
-    bool val, bool forward) {
-	bool iter_res;
+expect_iter_results_at(
+    fb_group_t *fb, size_t nbits, size_t pos, bool val, bool forward) {
+	bool              iter_res;
 	size_t iter_begin JEMALLOC_CC_SILENCE_INIT(0);
-	size_t iter_len JEMALLOC_CC_SILENCE_INIT(0);
+	size_t iter_len   JEMALLOC_CC_SILENCE_INIT(0);
 	if (val) {
 		if (forward) {
-			iter_res = fb_srange_iter(fb, nbits, pos,
-			    &iter_begin, &iter_len);
+			iter_res = fb_srange_iter(
+			    fb, nbits, pos, &iter_begin, &iter_len);
 		} else {
-			iter_res = fb_srange_riter(fb, nbits, pos,
-			    &iter_begin, &iter_len);
+			iter_res = fb_srange_riter(
+			    fb, nbits, pos, &iter_begin, &iter_len);
 		}
 	} else {
 		if (forward) {
-			iter_res = fb_urange_iter(fb, nbits, pos,
-			    &iter_begin, &iter_len);
+			iter_res = fb_urange_iter(
+			    fb, nbits, pos, &iter_begin, &iter_len);
 		} else {
-			iter_res = fb_urange_riter(fb, nbits, pos,
-			    &iter_begin, &iter_len);
+			iter_res = fb_urange_riter(
+			    fb, nbits, pos, &iter_begin, &iter_len);
 		}
 	}
 
@@ -500,15 +495,15 @@ expect_iter_results_at(fb_group_t *fb, size_t nbits, size_t pos,
 	 */
 	size_t simple_iter_begin = 0;
 	size_t simple_iter_len = 0;
-	simple_iter_res = fb_iter_simple(fb, nbits, pos, &simple_iter_begin,
-	    &simple_iter_len, val, forward);
+	simple_iter_res = fb_iter_simple(
+	    fb, nbits, pos, &simple_iter_begin, &simple_iter_len, val, forward);
 
 	expect_b_eq(iter_res, simple_iter_res, "Result mismatch at %zu", pos);
 	if (iter_res && simple_iter_res) {
 		assert_zu_eq(iter_begin, simple_iter_begin,
 		    "Begin mismatch at %zu", pos);
-		expect_zu_eq(iter_len, simple_iter_len,
-		    "Length mismatch at %zu", pos);
+		expect_zu_eq(
+		    iter_len, simple_iter_len, "Length mismatch at %zu", pos);
 	}
 }
 
@@ -543,7 +538,7 @@ do_test_iter_range_exhaustive(size_t nbits) {
 	if (nbits > 1000) {
 		return;
 	}
-	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	size_t      sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
 	fb_group_t *fb = malloc(sz);
 	fb_init(fb, nbits);
 
@@ -558,7 +553,7 @@ do_test_iter_range_exhaustive(size_t nbits) {
 	expect_iter_results(fb, nbits);
 
 	fb_unset_range(fb, nbits, 0, nbits);
-	fb_set_range(fb, nbits, 0, nbits / 2 == 0 ? 1: nbits / 2);
+	fb_set_range(fb, nbits, 0, nbits / 2 == 0 ? 1 : nbits / 2);
 	expect_iter_results(fb, nbits);
 
 	free(fb);
@@ -569,8 +564,7 @@ do_test_iter_range_exhaustive(size_t nbits) {
  * computation.
  */
 TEST_BEGIN(test_iter_range_exhaustive) {
-#define NB(nbits) \
-	do_test_iter_range_exhaustive(nbits);
+#define NB(nbits) do_test_iter_range_exhaustive(nbits);
 	NBITS_TAB
 #undef NB
 }
@@ -581,8 +575,8 @@ TEST_END
  * returns the number of set bits in [scount_start, scount_end).
  */
 static size_t
-scount_contiguous(size_t set_start, size_t set_end, size_t scount_start,
-    size_t scount_end) {
+scount_contiguous(
+    size_t set_start, size_t set_end, size_t scount_start, size_t scount_end) {
 	/* No overlap. */
 	if (set_end <= scount_start || scount_end <= set_start) {
 		return 0;
@@ -611,8 +605,8 @@ scount_contiguous(size_t set_start, size_t set_end, size_t scount_start,
 }
 
 static size_t
-ucount_contiguous(size_t set_start, size_t set_end, size_t ucount_start,
-    size_t ucount_end) {
+ucount_contiguous(
+    size_t set_start, size_t set_end, size_t ucount_start, size_t ucount_end) {
 	/* No overlap. */
 	if (set_end <= ucount_start || ucount_end <= set_start) {
 		return ucount_end - ucount_start;
@@ -641,34 +635,33 @@ ucount_contiguous(size_t set_start, size_t set_end, size_t ucount_start,
 }
 
 static void
-expect_count_match_contiguous(fb_group_t *fb, size_t nbits, size_t set_start,
-    size_t set_end) {
+expect_count_match_contiguous(
+    fb_group_t *fb, size_t nbits, size_t set_start, size_t set_end) {
 	for (size_t i = 0; i < nbits; i++) {
 		for (size_t j = i + 1; j <= nbits; j++) {
 			size_t cnt = j - i;
-			size_t scount_expected = scount_contiguous(set_start,
-			    set_end, i, j);
+			size_t scount_expected = scount_contiguous(
+			    set_start, set_end, i, j);
 			size_t scount_computed = fb_scount(fb, nbits, i, cnt);
 			expect_zu_eq(scount_expected, scount_computed,
 			    "fb_scount error with nbits=%zu, start=%zu, "
 			    "cnt=%zu, with bits set in [%zu, %zu)",
 			    nbits, i, cnt, set_start, set_end);
 
-			size_t ucount_expected = ucount_contiguous(set_start,
-			    set_end, i, j);
+			size_t ucount_expected = ucount_contiguous(
+			    set_start, set_end, i, j);
 			size_t ucount_computed = fb_ucount(fb, nbits, i, cnt);
 			assert_zu_eq(ucount_expected, ucount_computed,
 			    "fb_ucount error with nbits=%zu, start=%zu, "
 			    "cnt=%zu, with bits set in [%zu, %zu)",
 			    nbits, i, cnt, set_start, set_end);
-
 		}
 	}
 }
 
 static void
 do_test_count_contiguous(size_t nbits) {
-	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	size_t      sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
 	fb_group_t *fb = malloc(sz);
 
 	fb_init(fb, nbits);
@@ -688,7 +681,7 @@ do_test_count_contiguous(size_t nbits) {
 }
 
 TEST_BEGIN(test_count_contiguous_simple) {
-	enum {nbits = 300};
+	enum { nbits = 300 };
 	fb_group_t fb[FB_NGROUPS(nbits)];
 	fb_init(fb, nbits);
 	/* Just an arbitrary number. */
@@ -718,10 +711,10 @@ TEST_BEGIN(test_count_contiguous_simple) {
 TEST_END
 
 TEST_BEGIN(test_count_contiguous) {
-#define NB(nbits) \
-	/* This test is *particularly* slow in debug builds. */ \
-	if ((!config_debug && nbits < 300) || nbits < 150) { \
-		do_test_count_contiguous(nbits); \
+#define NB(nbits)                                                              \
+	/* This test is *particularly* slow in debug builds. */                \
+	if ((!config_debug && nbits < 300) || nbits < 150) {                   \
+		do_test_count_contiguous(nbits);                               \
 	}
 	NBITS_TAB
 #undef NB
@@ -729,15 +722,15 @@ TEST_BEGIN(test_count_contiguous) {
 TEST_END
 
 static void
-expect_count_match_alternating(fb_group_t *fb_even, fb_group_t *fb_odd,
-    size_t nbits) {
+expect_count_match_alternating(
+    fb_group_t *fb_even, fb_group_t *fb_odd, size_t nbits) {
 	for (size_t i = 0; i < nbits; i++) {
 		for (size_t j = i + 1; j <= nbits; j++) {
 			size_t cnt = j - i;
 			size_t odd_scount = cnt / 2
 			    + (size_t)(cnt % 2 == 1 && i % 2 == 1);
-			size_t odd_scount_computed = fb_scount(fb_odd, nbits,
-			    i, j - i);
+			size_t odd_scount_computed = fb_scount(
+			    fb_odd, nbits, i, j - i);
 			assert_zu_eq(odd_scount, odd_scount_computed,
 			    "fb_scount error with nbits=%zu, start=%zu, "
 			    "cnt=%zu, with alternating bits set.",
@@ -745,8 +738,8 @@ expect_count_match_alternating(fb_group_t *fb_even, fb_group_t *fb_odd,
 
 			size_t odd_ucount = cnt / 2
 			    + (size_t)(cnt % 2 == 1 && i % 2 == 0);
-			size_t odd_ucount_computed = fb_ucount(fb_odd, nbits,
-			    i, j - i);
+			size_t odd_ucount_computed = fb_ucount(
+			    fb_odd, nbits, i, j - i);
 			assert_zu_eq(odd_ucount, odd_ucount_computed,
 			    "fb_ucount error with nbits=%zu, start=%zu, "
 			    "cnt=%zu, with alternating bits set.",
@@ -754,8 +747,8 @@ expect_count_match_alternating(fb_group_t *fb_even, fb_group_t *fb_odd,
 
 			size_t even_scount = cnt / 2
 			    + (size_t)(cnt % 2 == 1 && i % 2 == 0);
-			size_t even_scount_computed = fb_scount(fb_even, nbits,
-			    i, j - i);
+			size_t even_scount_computed = fb_scount(
+			    fb_even, nbits, i, j - i);
 			assert_zu_eq(even_scount, even_scount_computed,
 			    "fb_scount error with nbits=%zu, start=%zu, "
 			    "cnt=%zu, with alternating bits set.",
@@ -763,8 +756,8 @@ expect_count_match_alternating(fb_group_t *fb_even, fb_group_t *fb_odd,
 
 			size_t even_ucount = cnt / 2
 			    + (size_t)(cnt % 2 == 1 && i % 2 == 1);
-			size_t even_ucount_computed = fb_ucount(fb_even, nbits,
-			    i, j - i);
+			size_t even_ucount_computed = fb_ucount(
+			    fb_even, nbits, i, j - i);
 			assert_zu_eq(even_ucount, even_ucount_computed,
 			    "fb_ucount error with nbits=%zu, start=%zu, "
 			    "cnt=%zu, with alternating bits set.",
@@ -778,7 +771,7 @@ do_test_count_alternating(size_t nbits) {
 	if (nbits > 1000) {
 		return;
 	}
-	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+	size_t      sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
 	fb_group_t *fb_even = malloc(sz);
 	fb_group_t *fb_odd = malloc(sz);
 
@@ -800,8 +793,7 @@ do_test_count_alternating(size_t nbits) {
 }
 
 TEST_BEGIN(test_count_alternating) {
-#define NB(nbits) \
-	do_test_count_alternating(nbits);
+#define NB(nbits) do_test_count_alternating(nbits);
 	NBITS_TAB
 #undef NB
 }
@@ -809,8 +801,9 @@ TEST_END
 
 static void
 do_test_bit_op(size_t nbits, bool (*op)(bool a, bool b),
-    void (*fb_op)(fb_group_t *dst, fb_group_t *src1, fb_group_t *src2, size_t nbits)) {
-	size_t sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
+    void (*fb_op)(
+        fb_group_t *dst, fb_group_t *src1, fb_group_t *src2, size_t nbits)) {
+	size_t      sz = FB_NGROUPS(nbits) * sizeof(fb_group_t);
 	fb_group_t *fb1 = malloc(sz);
 	fb_group_t *fb2 = malloc(sz);
 	fb_group_t *fb_result = malloc(sz);
@@ -853,8 +846,10 @@ do_test_bit_op(size_t nbits, bool (*op)(bool a, bool b),
 		bool bit2 = ((prng2 & (1ULL << (i % 64))) != 0);
 
 		/* Original bitmaps shouldn't change. */
-		expect_b_eq(bit1, fb_get(fb1, nbits, i), "difference at bit %zu", i);
-		expect_b_eq(bit2, fb_get(fb2, nbits, i), "difference at bit %zu", i);
+		expect_b_eq(
+		    bit1, fb_get(fb1, nbits, i), "difference at bit %zu", i);
+		expect_b_eq(
+		    bit2, fb_get(fb2, nbits, i), "difference at bit %zu", i);
 
 		/* New one should be bitwise and. */
 		expect_b_eq(op(bit1, bit2), fb_get(fb_result, nbits, i),
@@ -883,8 +878,7 @@ do_test_bit_and(size_t nbits) {
 }
 
 TEST_BEGIN(test_bit_and) {
-#define NB(nbits) \
-	do_test_bit_and(nbits);
+#define NB(nbits) do_test_bit_and(nbits);
 	NBITS_TAB
 #undef NB
 }
@@ -901,8 +895,7 @@ do_test_bit_or(size_t nbits) {
 }
 
 TEST_BEGIN(test_bit_or) {
-#define NB(nbits) \
-	do_test_bit_or(nbits);
+#define NB(nbits) do_test_bit_or(nbits);
 	NBITS_TAB
 #undef NB
 }
@@ -915,8 +908,8 @@ binary_not(bool a, bool b) {
 }
 
 static void
-fb_bit_not_shim(fb_group_t *dst, fb_group_t *src1, fb_group_t *src2,
-    size_t nbits) {
+fb_bit_not_shim(
+    fb_group_t *dst, fb_group_t *src1, fb_group_t *src2, size_t nbits) {
 	(void)src2;
 	fb_bit_not(dst, src1, nbits);
 }
@@ -927,8 +920,7 @@ do_test_bit_not(size_t nbits) {
 }
 
 TEST_BEGIN(test_bit_not) {
-#define NB(nbits) \
-	do_test_bit_not(nbits);
+#define NB(nbits) do_test_bit_not(nbits);
 	NBITS_TAB
 #undef NB
 }
@@ -936,19 +928,9 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_fb_init,
-	    test_get_set_unset,
-	    test_search_simple,
-	    test_search_exhaustive,
-	    test_range_simple,
-	    test_empty_full,
-	    test_iter_range_simple,
-	    test_iter_range_exhaustive,
-	    test_count_contiguous_simple,
-	    test_count_contiguous,
-	    test_count_alternating,
-	    test_bit_and,
-	    test_bit_or,
-	    test_bit_not);
+	return test_no_reentrancy(test_fb_init, test_get_set_unset,
+	    test_search_simple, test_search_exhaustive, test_range_simple,
+	    test_empty_full, test_iter_range_simple, test_iter_range_exhaustive,
+	    test_count_contiguous_simple, test_count_contiguous,
+	    test_count_alternating, test_bit_and, test_bit_or, test_bit_not);
 }
diff --git a/test/unit/fork.c b/test/unit/fork.c
index 1a4c575e..e52d0a6c 100644
--- a/test/unit/fork.c
+++ b/test/unit/fork.c
@@ -8,7 +8,7 @@ TEST_BEGIN(test_fork) {
 
 	/* Set up a manually managed arena for test. */
 	unsigned arena_ind;
-	size_t sz = sizeof(unsigned);
+	size_t   sz = sizeof(unsigned);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
 
@@ -16,8 +16,8 @@ TEST_BEGIN(test_fork) {
 	unsigned old_arena_ind;
 	sz = sizeof(old_arena_ind);
 	expect_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
-	    (void *)&arena_ind, sizeof(arena_ind)), 0,
-	    "Unexpected mallctl() failure");
+	                (void *)&arena_ind, sizeof(arena_ind)),
+	    0, "Unexpected mallctl() failure");
 
 	p = malloc(1);
 	expect_ptr_not_null(p, "Unexpected malloc() failure");
@@ -108,7 +108,5 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_fork,
-	    test_fork_multithreaded);
+	return test_no_reentrancy(test_fork, test_fork_multithreaded);
 }
diff --git a/test/unit/fxp.c b/test/unit/fxp.c
index 27f10976..02020efe 100644
--- a/test/unit/fxp.c
+++ b/test/unit/fxp.c
@@ -28,7 +28,7 @@ fxp_close(fxp_t a, fxp_t b) {
 static fxp_t
 xparse_fxp(const char *str) {
 	fxp_t result;
-	bool err = fxp_parse(&result, str, NULL);
+	bool  err = fxp_parse(&result, str, NULL);
 	assert_false(err, "Invalid fxp string: %s", str);
 	return result;
 }
@@ -36,14 +36,14 @@ xparse_fxp(const char *str) {
 static void
 expect_parse_accurate(const char *str, const char *parse_str) {
 	double true_val = strtod(str, NULL);
-	fxp_t fxp_val;
-	char *end;
-	bool err = fxp_parse(&fxp_val, parse_str, &end);
+	fxp_t  fxp_val;
+	char  *end;
+	bool   err = fxp_parse(&fxp_val, parse_str, &end);
 	expect_false(err, "Unexpected parse failure");
-	expect_ptr_eq(parse_str + strlen(str), end,
-	    "Didn't parse whole string");
-	expect_true(double_close(fxp2double(fxp_val), true_val),
-	    "Misparsed %s", str);
+	expect_ptr_eq(
+	    parse_str + strlen(str), end, "Didn't parse whole string");
+	expect_true(
+	    double_close(fxp2double(fxp_val), true_val), "Misparsed %s", str);
 }
 
 static void
@@ -100,12 +100,12 @@ static void
 expect_parse_failure(const char *str) {
 	fxp_t result = FXP_INIT_INT(333);
 	char *end = (void *)0x123;
-	bool err = fxp_parse(&result, str, &end);
+	bool  err = fxp_parse(&result, str, &end);
 	expect_true(err, "Expected a parse error on: %s", str);
-	expect_ptr_eq((void *)0x123, end,
-	    "Parse error shouldn't change results");
-	expect_u32_eq(result, FXP_INIT_INT(333),
-	    "Parse error shouldn't change results");
+	expect_ptr_eq(
+	    (void *)0x123, end, "Parse error shouldn't change results");
+	expect_u32_eq(
+	    result, FXP_INIT_INT(333), "Parse error shouldn't change results");
 }
 
 TEST_BEGIN(test_parse_invalid) {
@@ -129,7 +129,6 @@ expect_init_percent(unsigned percent, const char *str) {
 	    "Expect representations of FXP_INIT_PERCENT(%u) and "
 	    "fxp_parse(\"%s\") to be equal; got %x and %x",
 	    percent, str, result_init, result_parse);
-
 }
 
 /*
@@ -145,12 +144,12 @@ TEST_BEGIN(test_init_percent) {
 TEST_END
 
 static void
-expect_add(const char *astr, const char *bstr, const char* resultstr) {
+expect_add(const char *astr, const char *bstr, const char *resultstr) {
 	fxp_t a = xparse_fxp(astr);
 	fxp_t b = xparse_fxp(bstr);
 	fxp_t result = xparse_fxp(resultstr);
-	expect_true(fxp_close(fxp_add(a, b), result),
-	    "Expected %s + %s == %s", astr, bstr, resultstr);
+	expect_true(fxp_close(fxp_add(a, b), result), "Expected %s + %s == %s",
+	    astr, bstr, resultstr);
 }
 
 TEST_BEGIN(test_add_simple) {
@@ -164,12 +163,12 @@ TEST_BEGIN(test_add_simple) {
 TEST_END
 
 static void
-expect_sub(const char *astr, const char *bstr, const char* resultstr) {
+expect_sub(const char *astr, const char *bstr, const char *resultstr) {
 	fxp_t a = xparse_fxp(astr);
 	fxp_t b = xparse_fxp(bstr);
 	fxp_t result = xparse_fxp(resultstr);
-	expect_true(fxp_close(fxp_sub(a, b), result),
-	    "Expected %s - %s == %s", astr, bstr, resultstr);
+	expect_true(fxp_close(fxp_sub(a, b), result), "Expected %s - %s == %s",
+	    astr, bstr, resultstr);
 }
 
 TEST_BEGIN(test_sub_simple) {
@@ -183,12 +182,12 @@ TEST_BEGIN(test_sub_simple) {
 TEST_END
 
 static void
-expect_mul(const char *astr, const char *bstr, const char* resultstr) {
+expect_mul(const char *astr, const char *bstr, const char *resultstr) {
 	fxp_t a = xparse_fxp(astr);
 	fxp_t b = xparse_fxp(bstr);
 	fxp_t result = xparse_fxp(resultstr);
-	expect_true(fxp_close(fxp_mul(a, b), result),
-	    "Expected %s * %s == %s", astr, bstr, resultstr);
+	expect_true(fxp_close(fxp_mul(a, b), result), "Expected %s * %s == %s",
+	    astr, bstr, resultstr);
 }
 
 TEST_BEGIN(test_mul_simple) {
@@ -202,12 +201,12 @@ TEST_BEGIN(test_mul_simple) {
 TEST_END
 
 static void
-expect_div(const char *astr, const char *bstr, const char* resultstr) {
+expect_div(const char *astr, const char *bstr, const char *resultstr) {
 	fxp_t a = xparse_fxp(astr);
 	fxp_t b = xparse_fxp(bstr);
 	fxp_t result = xparse_fxp(resultstr);
-	expect_true(fxp_close(fxp_div(a, b), result),
-	    "Expected %s / %s == %s", astr, bstr, resultstr);
+	expect_true(fxp_close(fxp_div(a, b), result), "Expected %s / %s == %s",
+	    astr, bstr, resultstr);
 }
 
 TEST_BEGIN(test_div_simple) {
@@ -223,11 +222,11 @@ TEST_END
 
 static void
 expect_round(const char *str, uint32_t rounded_down, uint32_t rounded_nearest) {
-	fxp_t fxp = xparse_fxp(str);
+	fxp_t    fxp = xparse_fxp(str);
 	uint32_t fxp_rounded_down = fxp_round_down(fxp);
 	uint32_t fxp_rounded_nearest = fxp_round_nearest(fxp);
-	expect_u32_eq(rounded_down, fxp_rounded_down,
-	    "Mistake rounding %s down", str);
+	expect_u32_eq(
+	    rounded_down, fxp_rounded_down, "Mistake rounding %s down", str);
 	expect_u32_eq(rounded_nearest, fxp_rounded_nearest,
 	    "Mistake rounding %s to nearest", str);
 }
@@ -248,11 +247,11 @@ TEST_END
 
 static void
 expect_mul_frac(size_t a, const char *fracstr, size_t expected) {
-	fxp_t frac = xparse_fxp(fracstr);
+	fxp_t  frac = xparse_fxp(fracstr);
 	size_t result = fxp_mul_frac(a, frac);
 	expect_true(double_close(expected, result),
-	    "Expected %zu * %s == %zu (fracmul); got %zu", a, fracstr,
-	    expected, result);
+	    "Expected %zu * %s == %zu (fracmul); got %zu", a, fracstr, expected,
+	    result);
 }
 
 TEST_BEGIN(test_mul_frac_simple) {
@@ -273,7 +272,7 @@ TEST_END
 static void
 expect_print(const char *str) {
 	fxp_t fxp = xparse_fxp(str);
-	char buf[FXP_BUF_SIZE];
+	char  buf[FXP_BUF_SIZE];
 	fxp_print(fxp, buf);
 	expect_d_eq(0, strcmp(str, buf), "Couldn't round-trip print %s", str);
 }
@@ -298,33 +297,32 @@ TEST_BEGIN(test_print_simple) {
 TEST_END
 
 TEST_BEGIN(test_stress) {
-	const char *numbers[] = {
-		"0.0", "0.1", "0.2", "0.3", "0.4",
-		"0.5", "0.6", "0.7", "0.8", "0.9",
+	const char *numbers[] = {"0.0", "0.1", "0.2", "0.3", "0.4", "0.5",
+	    "0.6", "0.7", "0.8", "0.9",
 
-		"1.0", "1.1", "1.2", "1.3", "1.4",
-		"1.5", "1.6", "1.7", "1.8", "1.9",
+	    "1.0", "1.1", "1.2", "1.3", "1.4", "1.5", "1.6", "1.7", "1.8",
+	    "1.9",
 
-		"2.0", "2.1", "2.2", "2.3", "2.4",
-		"2.5", "2.6", "2.7", "2.8", "2.9",
+	    "2.0", "2.1", "2.2", "2.3", "2.4", "2.5", "2.6", "2.7", "2.8",
+	    "2.9",
 
-		"17.0", "17.1", "17.2", "17.3", "17.4",
-		"17.5", "17.6", "17.7", "17.8", "17.9",
+	    "17.0", "17.1", "17.2", "17.3", "17.4", "17.5", "17.6", "17.7",
+	    "17.8", "17.9",
 
-		"18.0", "18.1", "18.2", "18.3", "18.4",
-		"18.5", "18.6", "18.7", "18.8", "18.9",
+	    "18.0", "18.1", "18.2", "18.3", "18.4", "18.5", "18.6", "18.7",
+	    "18.8", "18.9",
 
-		"123.0", "123.1", "123.2", "123.3", "123.4",
-		"123.5", "123.6", "123.7", "123.8", "123.9",
+	    "123.0", "123.1", "123.2", "123.3", "123.4", "123.5", "123.6",
+	    "123.7", "123.8", "123.9",
 
-		"124.0", "124.1", "124.2", "124.3", "124.4",
-		"124.5", "124.6", "124.7", "124.8", "124.9",
+	    "124.0", "124.1", "124.2", "124.3", "124.4", "124.5", "124.6",
+	    "124.7", "124.8", "124.9",
 
-		"125.0", "125.1", "125.2", "125.3", "125.4",
-		"125.5", "125.6", "125.7", "125.8", "125.9"};
-	size_t numbers_len = sizeof(numbers)/sizeof(numbers[0]);
+	    "125.0", "125.1", "125.2", "125.3", "125.4", "125.5", "125.6",
+	    "125.7", "125.8", "125.9"};
+	size_t      numbers_len = sizeof(numbers) / sizeof(numbers[0]);
 	for (size_t i = 0; i < numbers_len; i++) {
-		fxp_t fxp_a = xparse_fxp(numbers[i]);
+		fxp_t  fxp_a = xparse_fxp(numbers[i]);
 		double double_a = strtod(numbers[i], NULL);
 
 		uint32_t fxp_rounded_down = fxp_round_down(fxp_a);
@@ -338,37 +336,35 @@ TEST_BEGIN(test_stress) {
 		    "Incorrectly rounded-to-nearest %s", numbers[i]);
 
 		for (size_t j = 0; j < numbers_len; j++) {
-			fxp_t fxp_b = xparse_fxp(numbers[j]);
+			fxp_t  fxp_b = xparse_fxp(numbers[j]);
 			double double_b = strtod(numbers[j], NULL);
 
-			fxp_t fxp_sum = fxp_add(fxp_a, fxp_b);
+			fxp_t  fxp_sum = fxp_add(fxp_a, fxp_b);
 			double double_sum = double_a + double_b;
 			expect_true(
 			    double_close(fxp2double(fxp_sum), double_sum),
 			    "Miscomputed %s + %s", numbers[i], numbers[j]);
 
 			if (double_a > double_b) {
-				fxp_t fxp_diff = fxp_sub(fxp_a, fxp_b);
+				fxp_t  fxp_diff = fxp_sub(fxp_a, fxp_b);
 				double double_diff = double_a - double_b;
-				expect_true(
-				    double_close(fxp2double(fxp_diff),
-				    double_diff),
+				expect_true(double_close(fxp2double(fxp_diff),
+				                double_diff),
 				    "Miscomputed %s - %s", numbers[i],
 				    numbers[j]);
 			}
 
-			fxp_t fxp_prod = fxp_mul(fxp_a, fxp_b);
+			fxp_t  fxp_prod = fxp_mul(fxp_a, fxp_b);
 			double double_prod = double_a * double_b;
 			expect_true(
 			    double_close(fxp2double(fxp_prod), double_prod),
 			    "Miscomputed %s * %s", numbers[i], numbers[j]);
 
 			if (double_b != 0.0) {
-				fxp_t fxp_quot = fxp_div(fxp_a, fxp_b);
+				fxp_t  fxp_quot = fxp_div(fxp_a, fxp_b);
 				double double_quot = double_a / double_b;
-				expect_true(
-				    double_close(fxp2double(fxp_quot),
-				    double_quot),
+				expect_true(double_close(fxp2double(fxp_quot),
+				                double_quot),
 				    "Miscomputed %s / %s", numbers[i],
 				    numbers[j]);
 			}
@@ -379,16 +375,8 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_parse_valid,
-	    test_parse_invalid,
-	    test_init_percent,
-	    test_add_simple,
-	    test_sub_simple,
-	    test_mul_simple,
-	    test_div_simple,
-	    test_round_simple,
-	    test_mul_frac_simple,
-	    test_print_simple,
-	    test_stress);
+	return test_no_reentrancy(test_parse_valid, test_parse_invalid,
+	    test_init_percent, test_add_simple, test_sub_simple,
+	    test_mul_simple, test_div_simple, test_round_simple,
+	    test_mul_frac_simple, test_print_simple, test_stress);
 }
diff --git a/test/unit/hash.c b/test/unit/hash.c
index 7276333d..e39110fc 100644
--- a/test/unit/hash.c
+++ b/test/unit/hash.c
@@ -39,24 +39,32 @@ typedef enum {
 static int
 hash_variant_bits(hash_variant_t variant) {
 	switch (variant) {
-	case hash_variant_x86_32: return 32;
-	case hash_variant_x86_128: return 128;
-	case hash_variant_x64_128: return 128;
-	default: not_reached();
+	case hash_variant_x86_32:
+		return 32;
+	case hash_variant_x86_128:
+		return 128;
+	case hash_variant_x64_128:
+		return 128;
+	default:
+		not_reached();
 	}
 }
 
 static const char *
 hash_variant_string(hash_variant_t variant) {
 	switch (variant) {
-	case hash_variant_x86_32: return "hash_x86_32";
-	case hash_variant_x86_128: return "hash_x86_128";
-	case hash_variant_x64_128: return "hash_x64_128";
-	default: not_reached();
+	case hash_variant_x86_32:
+		return "hash_x86_32";
+	case hash_variant_x86_128:
+		return "hash_x86_128";
+	case hash_variant_x64_128:
+		return "hash_x64_128";
+	default:
+		not_reached();
 	}
 }
 
-#define KEY_SIZE	256
+#define KEY_SIZE 256
 static void
 hash_variant_verify_key(hash_variant_t variant, uint8_t *key) {
 	const int hashbytes = hash_variant_bits(variant) / 8;
@@ -79,20 +87,24 @@ hash_variant_verify_key(hash_variant_t variant, uint8_t *key) {
 		switch (variant) {
 		case hash_variant_x86_32: {
 			uint32_t out;
-			out = hash_x86_32(key, i, 256-i);
-			memcpy(&hashes[i*hashbytes], &out, hashbytes);
+			out = hash_x86_32(key, i, 256 - i);
+			memcpy(&hashes[i * hashbytes], &out, hashbytes);
 			break;
-		} case hash_variant_x86_128: {
+		}
+		case hash_variant_x86_128: {
 			uint64_t out[2];
-			hash_x86_128(key, i, 256-i, out);
-			memcpy(&hashes[i*hashbytes], out, hashbytes);
+			hash_x86_128(key, i, 256 - i, out);
+			memcpy(&hashes[i * hashbytes], out, hashbytes);
 			break;
-		} case hash_variant_x64_128: {
+		}
+		case hash_variant_x64_128: {
 			uint64_t out[2];
-			hash_x64_128(key, i, 256-i, out);
-			memcpy(&hashes[i*hashbytes], out, hashbytes);
+			hash_x64_128(key, i, 256 - i, out);
+			memcpy(&hashes[i * hashbytes], out, hashbytes);
 			break;
-		} default: not_reached();
+		}
+		default:
+			not_reached();
 		}
 	}
 
@@ -102,36 +114,50 @@ hash_variant_verify_key(hash_variant_t variant, uint8_t *key) {
 		uint32_t out = hash_x86_32(hashes, hashes_size, 0);
 		memcpy(final, &out, sizeof(out));
 		break;
-	} case hash_variant_x86_128: {
+	}
+	case hash_variant_x86_128: {
 		uint64_t out[2];
 		hash_x86_128(hashes, hashes_size, 0, out);
 		memcpy(final, out, sizeof(out));
 		break;
-	} case hash_variant_x64_128: {
+	}
+	case hash_variant_x64_128: {
 		uint64_t out[2];
 		hash_x64_128(hashes, hashes_size, 0, out);
 		memcpy(final, out, sizeof(out));
 		break;
-	} default: not_reached();
+	}
+	default:
+		not_reached();
 	}
 
-	computed =
-	    ((uint32_t)final[0] << 0) |
-	    ((uint32_t)final[1] << 8) |
-	    ((uint32_t)final[2] << 16) |
-	    ((uint32_t)final[3] << 24);
+	computed = ((uint32_t) final[0] << 0) | ((uint32_t) final[1] << 8)
+	    | ((uint32_t) final[2] << 16) | ((uint32_t) final[3] << 24);
 
 	switch (variant) {
 #ifdef JEMALLOC_BIG_ENDIAN
-	case hash_variant_x86_32: expected = 0x6213303eU; break;
-	case hash_variant_x86_128: expected = 0x266820caU; break;
-	case hash_variant_x64_128: expected = 0xcc622b6fU; break;
+	case hash_variant_x86_32:
+		expected = 0x6213303eU;
+		break;
+	case hash_variant_x86_128:
+		expected = 0x266820caU;
+		break;
+	case hash_variant_x64_128:
+		expected = 0xcc622b6fU;
+		break;
 #else
-	case hash_variant_x86_32: expected = 0xb0f57ee3U; break;
-	case hash_variant_x86_128: expected = 0xb3ece62aU; break;
-	case hash_variant_x64_128: expected = 0x6384ba69U; break;
+	case hash_variant_x86_32:
+		expected = 0xb0f57ee3U;
+		break;
+	case hash_variant_x86_128:
+		expected = 0xb3ece62aU;
+		break;
+	case hash_variant_x64_128:
+		expected = 0x6384ba69U;
+		break;
 #endif
-	default: not_reached();
+	default:
+		not_reached();
 	}
 
 	expect_u32_eq(computed, expected,
@@ -141,8 +167,8 @@ hash_variant_verify_key(hash_variant_t variant, uint8_t *key) {
 
 static void
 hash_variant_verify(hash_variant_t variant) {
-#define MAX_ALIGN	16
-	uint8_t key[KEY_SIZE + (MAX_ALIGN - 1)];
+#define MAX_ALIGN 16
+	uint8_t  key[KEY_SIZE + (MAX_ALIGN - 1)];
 	unsigned i;
 
 	for (i = 0; i < MAX_ALIGN; i++) {
@@ -169,8 +195,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_hash_x86_32,
-	    test_hash_x86_128,
-	    test_hash_x64_128);
+	return test(test_hash_x86_32, test_hash_x86_128, test_hash_x64_128);
 }
diff --git a/test/unit/hook.c b/test/unit/hook.c
index f2a7f190..3a6b3c13 100644
--- a/test/unit/hook.c
+++ b/test/unit/hook.c
@@ -2,12 +2,12 @@
 
 #include "jemalloc/internal/hook.h"
 
-static void *arg_extra;
-static int arg_type;
-static void *arg_result;
-static void *arg_address;
-static size_t arg_old_usize;
-static size_t arg_new_usize;
+static void     *arg_extra;
+static int       arg_type;
+static void     *arg_result;
+static void     *arg_address;
+static size_t    arg_old_usize;
+static size_t    arg_new_usize;
 static uintptr_t arg_result_raw;
 static uintptr_t arg_args_raw[4];
 
@@ -71,8 +71,8 @@ set_args_raw(uintptr_t *args_raw, int nargs) {
 
 static void
 expect_args_raw(uintptr_t *args_raw_expected, int nargs) {
-	int cmp = memcmp(args_raw_expected, arg_args_raw,
-	    sizeof(uintptr_t) * nargs);
+	int cmp = memcmp(
+	    args_raw_expected, arg_args_raw, sizeof(uintptr_t) * nargs);
 	expect_d_eq(cmp, 0, "Raw args mismatch");
 }
 
@@ -95,8 +95,8 @@ test_alloc_hook(void *extra, hook_alloc_t type, void *result,
 }
 
 static void
-test_dalloc_hook(void *extra, hook_dalloc_t type, void *address,
-    uintptr_t args_raw[3]) {
+test_dalloc_hook(
+    void *extra, hook_dalloc_t type, void *address, uintptr_t args_raw[3]) {
 	call_count++;
 	arg_extra = extra;
 	arg_type = (int)type;
@@ -122,16 +122,15 @@ test_expand_hook(void *extra, hook_expand_t type, void *address,
 
 TEST_BEGIN(test_hooks_basic) {
 	/* Just verify that the record their arguments correctly. */
-	hooks_t hooks = {
-		&test_alloc_hook, &test_dalloc_hook, &test_expand_hook,
-		(void *)111};
-	void *handle = hook_install(TSDN_NULL, &hooks);
+	hooks_t hooks = {&test_alloc_hook, &test_dalloc_hook, &test_expand_hook,
+	    (void *)111};
+	void   *handle = hook_install(TSDN_NULL, &hooks);
 	uintptr_t args_raw[4] = {10, 20, 30, 40};
 
 	/* Alloc */
 	reset_args();
-	hook_invoke_alloc(hook_alloc_posix_memalign, (void *)222, 333,
-	    args_raw);
+	hook_invoke_alloc(
+	    hook_alloc_posix_memalign, (void *)222, 333, args_raw);
 	expect_ptr_eq(arg_extra, (void *)111, "Passed wrong user pointer");
 	expect_d_eq((int)hook_alloc_posix_memalign, arg_type,
 	    "Passed wrong alloc type");
@@ -142,18 +141,18 @@ TEST_BEGIN(test_hooks_basic) {
 	/* Dalloc */
 	reset_args();
 	hook_invoke_dalloc(hook_dalloc_sdallocx, (void *)222, args_raw);
-	expect_d_eq((int)hook_dalloc_sdallocx, arg_type,
-	    "Passed wrong dalloc type");
+	expect_d_eq(
+	    (int)hook_dalloc_sdallocx, arg_type, "Passed wrong dalloc type");
 	expect_ptr_eq((void *)111, arg_extra, "Passed wrong user pointer");
 	expect_ptr_eq((void *)222, arg_address, "Passed wrong address");
 	expect_args_raw(args_raw, 3);
 
 	/* Expand */
 	reset_args();
-	hook_invoke_expand(hook_expand_xallocx, (void *)222, 333, 444, 555,
-	    args_raw);
-	expect_d_eq((int)hook_expand_xallocx, arg_type,
-	    "Passed wrong expand type");
+	hook_invoke_expand(
+	    hook_expand_xallocx, (void *)222, 333, 444, 555, args_raw);
+	expect_d_eq(
+	    (int)hook_expand_xallocx, arg_type, "Passed wrong expand type");
 	expect_ptr_eq((void *)111, arg_extra, "Passed wrong user pointer");
 	expect_ptr_eq((void *)222, arg_address, "Passed wrong address");
 	expect_zu_eq(333, arg_old_usize, "Passed wrong old usize");
@@ -205,7 +204,7 @@ TEST_END
 
 TEST_BEGIN(test_hooks_remove) {
 	hooks_t hooks = {&test_alloc_hook, NULL, NULL, NULL};
-	void *handle = hook_install(TSDN_NULL, &hooks);
+	void   *handle = hook_install(TSDN_NULL, &hooks);
 	expect_ptr_ne(handle, NULL, "Hook installation failed");
 	call_count = 0;
 	uintptr_t args_raw[4] = {10, 20, 30, 40};
@@ -216,14 +215,13 @@ TEST_BEGIN(test_hooks_remove) {
 	hook_remove(TSDN_NULL, handle);
 	hook_invoke_alloc(hook_alloc_malloc, NULL, 0, NULL);
 	expect_d_eq(call_count, 0, "Hook invoked after removal");
-
 }
 TEST_END
 
 TEST_BEGIN(test_hooks_alloc_simple) {
 	/* "Simple" in the sense that we're not in a realloc variant. */
 	hooks_t hooks = {&test_alloc_hook, NULL, NULL, (void *)123};
-	void *handle = hook_install(TSDN_NULL, &hooks);
+	void   *handle = hook_install(TSDN_NULL, &hooks);
 	expect_ptr_ne(handle, NULL, "Hook installation failed");
 
 	/* Stop malloc from being optimized away. */
@@ -237,8 +235,8 @@ TEST_BEGIN(test_hooks_alloc_simple) {
 	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
 	expect_d_eq(arg_type, (int)hook_alloc_malloc, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_result, "Wrong result");
-	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)ptr, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)1, arg_args_raw[0], "Wrong argument");
 	free(ptr);
 
@@ -247,11 +245,11 @@ TEST_BEGIN(test_hooks_alloc_simple) {
 	err = posix_memalign((void **)&ptr, 1024, 1);
 	expect_d_eq(call_count, 1, "Hook not called");
 	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	expect_d_eq(arg_type, (int)hook_alloc_posix_memalign,
-	    "Wrong hook type");
+	expect_d_eq(
+	    arg_type, (int)hook_alloc_posix_memalign, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_result, "Wrong result");
-	expect_u64_eq((uintptr_t)err, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)err, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)&ptr, arg_args_raw[0], "Wrong argument");
 	expect_u64_eq((uintptr_t)1024, arg_args_raw[1], "Wrong argument");
 	expect_u64_eq((uintptr_t)1, arg_args_raw[2], "Wrong argument");
@@ -262,11 +260,10 @@ TEST_BEGIN(test_hooks_alloc_simple) {
 	ptr = aligned_alloc(1024, 1);
 	expect_d_eq(call_count, 1, "Hook not called");
 	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-	expect_d_eq(arg_type, (int)hook_alloc_aligned_alloc,
-	    "Wrong hook type");
+	expect_d_eq(arg_type, (int)hook_alloc_aligned_alloc, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_result, "Wrong result");
-	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)ptr, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)1024, arg_args_raw[0], "Wrong argument");
 	expect_u64_eq((uintptr_t)1, arg_args_raw[1], "Wrong argument");
 	free(ptr);
@@ -278,8 +275,8 @@ TEST_BEGIN(test_hooks_alloc_simple) {
 	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
 	expect_d_eq(arg_type, (int)hook_alloc_calloc, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_result, "Wrong result");
-	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)ptr, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)11, arg_args_raw[0], "Wrong argument");
 	expect_u64_eq((uintptr_t)13, arg_args_raw[1], "Wrong argument");
 	free(ptr);
@@ -292,8 +289,8 @@ TEST_BEGIN(test_hooks_alloc_simple) {
 	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
 	expect_d_eq(arg_type, (int)hook_alloc_memalign, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_result, "Wrong result");
-	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)ptr, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)1024, arg_args_raw[0], "Wrong argument");
 	expect_u64_eq((uintptr_t)1, arg_args_raw[1], "Wrong argument");
 	free(ptr);
@@ -307,8 +304,8 @@ TEST_BEGIN(test_hooks_alloc_simple) {
 	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
 	expect_d_eq(arg_type, (int)hook_alloc_valloc, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_result, "Wrong result");
-	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)ptr, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)1, arg_args_raw[0], "Wrong argument");
 	free(ptr);
 #endif /* JEMALLOC_OVERRIDE_VALLOC */
@@ -321,8 +318,8 @@ TEST_BEGIN(test_hooks_alloc_simple) {
 	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
 	expect_d_eq(arg_type, (int)hook_alloc_pvalloc, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_result, "Wrong result");
-	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)ptr, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)1, arg_args_raw[0], "Wrong argument");
 	free(ptr);
 #endif /* JEMALLOC_OVERRIDE_PVALLOC */
@@ -334,11 +331,11 @@ TEST_BEGIN(test_hooks_alloc_simple) {
 	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
 	expect_d_eq(arg_type, (int)hook_alloc_mallocx, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_result, "Wrong result");
-	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)ptr, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)1, arg_args_raw[0], "Wrong argument");
-	expect_u64_eq((uintptr_t)MALLOCX_LG_ALIGN(10), arg_args_raw[1],
-	    "Wrong flags");
+	expect_u64_eq(
+	    (uintptr_t)MALLOCX_LG_ALIGN(10), arg_args_raw[1], "Wrong flags");
 	free(ptr);
 
 	hook_remove(TSDN_NULL, handle);
@@ -348,7 +345,7 @@ TEST_END
 TEST_BEGIN(test_hooks_dalloc_simple) {
 	/* "Simple" in the sense that we're not in a realloc variant. */
 	hooks_t hooks = {NULL, &test_dalloc_hook, NULL, (void *)123};
-	void *handle = hook_install(TSDN_NULL, &hooks);
+	void   *handle = hook_install(TSDN_NULL, &hooks);
 	expect_ptr_ne(handle, NULL, "Hook installation failed");
 
 	void *volatile ptr;
@@ -372,8 +369,8 @@ TEST_BEGIN(test_hooks_dalloc_simple) {
 	expect_d_eq(arg_type, (int)hook_dalloc_dallocx, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_address, "Wrong pointer freed");
 	expect_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong raw arg");
-	expect_u64_eq((uintptr_t)MALLOCX_TCACHE_NONE, arg_args_raw[1],
-	    "Wrong raw arg");
+	expect_u64_eq(
+	    (uintptr_t)MALLOCX_TCACHE_NONE, arg_args_raw[1], "Wrong raw arg");
 
 	/* sdallocx() */
 	reset();
@@ -385,8 +382,8 @@ TEST_BEGIN(test_hooks_dalloc_simple) {
 	expect_ptr_eq(ptr, arg_address, "Wrong pointer freed");
 	expect_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong raw arg");
 	expect_u64_eq((uintptr_t)1, arg_args_raw[1], "Wrong raw arg");
-	expect_u64_eq((uintptr_t)MALLOCX_TCACHE_NONE, arg_args_raw[2],
-	    "Wrong raw arg");
+	expect_u64_eq(
+	    (uintptr_t)MALLOCX_TCACHE_NONE, arg_args_raw[2], "Wrong raw arg");
 
 	hook_remove(TSDN_NULL, handle);
 }
@@ -395,7 +392,7 @@ TEST_END
 TEST_BEGIN(test_hooks_expand_simple) {
 	/* "Simple" in the sense that we're not in a realloc variant. */
 	hooks_t hooks = {NULL, NULL, &test_expand_hook, (void *)123};
-	void *handle = hook_install(TSDN_NULL, &hooks);
+	void   *handle = hook_install(TSDN_NULL, &hooks);
 	expect_ptr_ne(handle, NULL, "Hook installation failed");
 
 	void *volatile ptr;
@@ -421,9 +418,9 @@ TEST_BEGIN(test_hooks_expand_simple) {
 TEST_END
 
 TEST_BEGIN(test_hooks_realloc_as_malloc_or_free) {
-	hooks_t hooks = {&test_alloc_hook, &test_dalloc_hook,
-		&test_expand_hook, (void *)123};
-	void *handle = hook_install(TSDN_NULL, &hooks);
+	hooks_t hooks = {&test_alloc_hook, &test_dalloc_hook, &test_expand_hook,
+	    (void *)123};
+	void   *handle = hook_install(TSDN_NULL, &hooks);
 	expect_ptr_ne(handle, NULL, "Hook installation failed");
 
 	void *volatile ptr;
@@ -435,8 +432,8 @@ TEST_BEGIN(test_hooks_realloc_as_malloc_or_free) {
 	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
 	expect_d_eq(arg_type, (int)hook_alloc_realloc, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_result, "Wrong result");
-	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)ptr, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)NULL, arg_args_raw[0], "Wrong argument");
 	expect_u64_eq((uintptr_t)1, arg_args_raw[1], "Wrong argument");
 	free(ptr);
@@ -448,14 +445,11 @@ TEST_BEGIN(test_hooks_realloc_as_malloc_or_free) {
 		realloc(ptr, 0);
 		expect_d_eq(call_count, 1, "Hook not called");
 		expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
-		expect_d_eq(arg_type, (int)hook_dalloc_realloc,
-		    "Wrong hook type");
-		expect_ptr_eq(ptr, arg_address,
-		    "Wrong pointer freed");
-		expect_u64_eq((uintptr_t)ptr, arg_args_raw[0],
-		    "Wrong raw arg");
-		expect_u64_eq((uintptr_t)0, arg_args_raw[1],
-		    "Wrong raw arg");
+		expect_d_eq(
+		    arg_type, (int)hook_dalloc_realloc, "Wrong hook type");
+		expect_ptr_eq(ptr, arg_address, "Wrong pointer freed");
+		expect_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong raw arg");
+		expect_u64_eq((uintptr_t)0, arg_args_raw[1], "Wrong raw arg");
 	}
 
 	/* realloc(NULL, 0) as malloc(0) */
@@ -465,8 +459,8 @@ TEST_BEGIN(test_hooks_realloc_as_malloc_or_free) {
 	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
 	expect_d_eq(arg_type, (int)hook_alloc_realloc, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_result, "Wrong result");
-	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)ptr, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)NULL, arg_args_raw[0], "Wrong argument");
 	expect_u64_eq((uintptr_t)0, arg_args_raw[1], "Wrong argument");
 	free(ptr);
@@ -478,9 +472,9 @@ TEST_END
 static void
 do_realloc_test(void *(*ralloc)(void *, size_t, int), int flags,
     int expand_type, int dalloc_type) {
-	hooks_t hooks = {&test_alloc_hook, &test_dalloc_hook,
-		&test_expand_hook, (void *)123};
-	void *handle = hook_install(TSDN_NULL, &hooks);
+	hooks_t hooks = {&test_alloc_hook, &test_dalloc_hook, &test_expand_hook,
+	    (void *)123};
+	void   *handle = hook_install(TSDN_NULL, &hooks);
 	expect_ptr_ne(handle, NULL, "Hook installation failed");
 
 	void *volatile ptr;
@@ -496,8 +490,8 @@ do_realloc_test(void *(*ralloc)(void *, size_t, int), int flags,
 	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
 	expect_d_eq(arg_type, expand_type, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_address, "Wrong address");
-	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)ptr, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong argument");
 	expect_u64_eq((uintptr_t)130, arg_args_raw[1], "Wrong argument");
 	free(ptr);
@@ -522,11 +516,11 @@ do_realloc_test(void *(*ralloc)(void *, size_t, int), int flags,
 	}
 	expect_ptr_eq(arg_extra, (void *)123, "Wrong extra");
 	expect_ptr_eq(ptr2, arg_address, "Wrong address");
-	expect_u64_eq((uintptr_t)ptr, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)ptr, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)ptr2, arg_args_raw[0], "Wrong argument");
-	expect_u64_eq((uintptr_t)2 * 1024 * 1024, arg_args_raw[1],
-	    "Wrong argument");
+	expect_u64_eq(
+	    (uintptr_t)2 * 1024 * 1024, arg_args_raw[1], "Wrong argument");
 	free(ptr);
 
 	/* Realloc with move, small. */
@@ -540,8 +534,8 @@ do_realloc_test(void *(*ralloc)(void *, size_t, int), int flags,
 	expect_d_eq(arg_type, dalloc_type, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_address, "Wrong address");
 	expect_ptr_eq(ptr2, arg_result, "Wrong address");
-	expect_u64_eq((uintptr_t)ptr2, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)ptr2, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong argument");
 	expect_u64_eq((uintptr_t)128, arg_args_raw[1], "Wrong argument");
 	free(ptr2);
@@ -557,11 +551,11 @@ do_realloc_test(void *(*ralloc)(void *, size_t, int), int flags,
 	expect_d_eq(arg_type, dalloc_type, "Wrong hook type");
 	expect_ptr_eq(ptr, arg_address, "Wrong address");
 	expect_ptr_eq(ptr2, arg_result, "Wrong address");
-	expect_u64_eq((uintptr_t)ptr2, (uintptr_t)arg_result_raw,
-	    "Wrong raw result");
+	expect_u64_eq(
+	    (uintptr_t)ptr2, (uintptr_t)arg_result_raw, "Wrong raw result");
 	expect_u64_eq((uintptr_t)ptr, arg_args_raw[0], "Wrong argument");
-	expect_u64_eq((uintptr_t)2 * 1024 * 1024, arg_args_raw[1],
-	    "Wrong argument");
+	expect_u64_eq(
+	    (uintptr_t)2 * 1024 * 1024, arg_args_raw[1], "Wrong argument");
 	free(ptr2);
 
 	hook_remove(TSDN_NULL, handle);
@@ -573,8 +567,8 @@ realloc_wrapper(void *ptr, size_t size, UNUSED int flags) {
 }
 
 TEST_BEGIN(test_hooks_realloc) {
-	do_realloc_test(&realloc_wrapper, 0, hook_expand_realloc,
-	    hook_dalloc_realloc);
+	do_realloc_test(
+	    &realloc_wrapper, 0, hook_expand_realloc, hook_dalloc_realloc);
 }
 TEST_END
 
@@ -587,14 +581,9 @@ TEST_END
 int
 main(void) {
 	/* We assert on call counts. */
-	return test_no_reentrancy(
-	    test_hooks_basic,
-	    test_hooks_null,
-	    test_hooks_remove,
-	    test_hooks_alloc_simple,
-	    test_hooks_dalloc_simple,
-	    test_hooks_expand_simple,
-	    test_hooks_realloc_as_malloc_or_free,
-	    test_hooks_realloc,
+	return test_no_reentrancy(test_hooks_basic, test_hooks_null,
+	    test_hooks_remove, test_hooks_alloc_simple,
+	    test_hooks_dalloc_simple, test_hooks_expand_simple,
+	    test_hooks_realloc_as_malloc_or_free, test_hooks_realloc,
 	    test_hooks_rallocx);
 }
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 47fa25f2..1fed8a80 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -13,55 +13,53 @@ struct test_data_s {
 	 * Must be the first member -- we convert back and forth between the
 	 * test_data_t and the hpa_shard_t;
 	 */
-	hpa_shard_t shard;
+	hpa_shard_t   shard;
 	hpa_central_t central;
-	base_t *base;
+	base_t       *base;
 	edata_cache_t shard_edata_cache;
 
 	emap_t emap;
 };
 
 static hpa_shard_opts_t test_hpa_shard_opts_default = {
-	/* slab_max_alloc */
-	ALLOC_MAX,
-	/* hugification_threshold */
-	HUGEPAGE,
-	/* dirty_mult */
-	FXP_INIT_PERCENT(25),
-	/* deferral_allowed */
-	false,
-	/* hugify_delay_ms */
-	10 * 1000,
-	/* hugify_sync */
-	false,
-	/* min_purge_interval_ms */
-	5 * 1000,
-	/* experimental_max_purge_nhp */
-	-1
-};
+    /* slab_max_alloc */
+    ALLOC_MAX,
+    /* hugification_threshold */
+    HUGEPAGE,
+    /* dirty_mult */
+    FXP_INIT_PERCENT(25),
+    /* deferral_allowed */
+    false,
+    /* hugify_delay_ms */
+    10 * 1000,
+    /* hugify_sync */
+    false,
+    /* min_purge_interval_ms */
+    5 * 1000,
+    /* experimental_max_purge_nhp */
+    -1};
 
 static hpa_shard_opts_t test_hpa_shard_opts_purge = {
-	/* slab_max_alloc */
-	HUGEPAGE,
-	/* hugification_threshold */
-	0.9 * HUGEPAGE,
-	/* dirty_mult */
-	FXP_INIT_PERCENT(11),
-	/* deferral_allowed */
-	true,
-	/* hugify_delay_ms */
-	0,
-	/* hugify_sync */
-	false,
-	/* min_purge_interval_ms */
-	5 * 1000,
-	/* experimental_max_purge_nhp */
-	-1
-};
+    /* slab_max_alloc */
+    HUGEPAGE,
+    /* hugification_threshold */
+    0.9 * HUGEPAGE,
+    /* dirty_mult */
+    FXP_INIT_PERCENT(11),
+    /* deferral_allowed */
+    true,
+    /* hugify_delay_ms */
+    0,
+    /* hugify_sync */
+    false,
+    /* min_purge_interval_ms */
+    5 * 1000,
+    /* experimental_max_purge_nhp */
+    -1};
 
 static hpa_shard_t *
 create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
-	bool err;
+	bool    err;
 	base_t *base = base_new(TSDN_NULL, /* ind */ SHARD_IND,
 	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
 	assert_ptr_not_null(base, "");
@@ -98,8 +96,8 @@ destroy_test_data(hpa_shard_t *shard) {
 TEST_BEGIN(test_alloc_max) {
 	test_skip_if(!hpa_supported());
 
-	hpa_shard_t *shard = create_test_data(&hpa_hooks_default,
-	    &test_hpa_shard_opts_default);
+	hpa_shard_t *shard = create_test_data(
+	    &hpa_hooks_default, &test_hpa_shard_opts_default);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 
 	edata_t *edata;
@@ -107,19 +105,19 @@ TEST_BEGIN(test_alloc_max) {
 	/* Small max */
 	bool deferred_work_generated = false;
 	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX, PAGE, false, false,
-	     /* frequent_reuse */ false, &deferred_work_generated);
+	    /* frequent_reuse */ false, &deferred_work_generated);
 	expect_ptr_not_null(edata, "Allocation of small max failed");
 
 	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX + PAGE, PAGE, false,
 	    false, /* frequent_reuse */ false, &deferred_work_generated);
 	expect_ptr_null(edata, "Allocation of larger than small max succeeded");
 
-	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX, PAGE, false,
-	    false, /* frequent_reuse */ true, &deferred_work_generated);
+	edata = pai_alloc(tsdn, &shard->pai, ALLOC_MAX, PAGE, false, false,
+	    /* frequent_reuse */ true, &deferred_work_generated);
 	expect_ptr_not_null(edata, "Allocation of frequent reused failed");
 
-	edata = pai_alloc(tsdn, &shard->pai, HUGEPAGE, PAGE, false,
-	    false, /* frequent_reuse */ true, &deferred_work_generated);
+	edata = pai_alloc(tsdn, &shard->pai, HUGEPAGE, PAGE, false, false,
+	    /* frequent_reuse */ true, &deferred_work_generated);
 	expect_ptr_not_null(edata, "Allocation of frequent reused failed");
 
 	edata = pai_alloc(tsdn, &shard->pai, HUGEPAGE + PAGE, PAGE, false,
@@ -133,8 +131,8 @@ TEST_END
 typedef struct mem_contents_s mem_contents_t;
 struct mem_contents_s {
 	uintptr_t my_addr;
-	size_t size;
-	edata_t *my_edata;
+	size_t    size;
+	edata_t  *my_edata;
 	rb_node(mem_contents_t) link;
 };
 
@@ -144,8 +142,7 @@ mem_contents_cmp(const mem_contents_t *a, const mem_contents_t *b) {
 }
 
 typedef rb_tree(mem_contents_t) mem_tree_t;
-rb_gen(static, mem_tree_, mem_tree_t, mem_contents_t, link,
-    mem_contents_cmp);
+rb_gen(static, mem_tree_, mem_tree_t, mem_contents_t, link, mem_contents_cmp);
 
 static void
 node_assert_ordered(mem_contents_t *a, mem_contents_t *b) {
@@ -191,14 +188,14 @@ node_remove(mem_tree_t *tree, edata_t *edata) {
 TEST_BEGIN(test_stress) {
 	test_skip_if(!hpa_supported());
 
-	hpa_shard_t *shard = create_test_data(&hpa_hooks_default,
-	    &test_hpa_shard_opts_default);
+	hpa_shard_t *shard = create_test_data(
+	    &hpa_hooks_default, &test_hpa_shard_opts_default);
 
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 
 	const size_t nlive_edatas_max = 500;
-	size_t nlive_edatas = 0;
-	edata_t **live_edatas = calloc(nlive_edatas_max, sizeof(edata_t *));
+	size_t       nlive_edatas = 0;
+	edata_t    **live_edatas = calloc(nlive_edatas_max, sizeof(edata_t *));
 	/*
 	 * Nothing special about this constant; we're only fixing it for
 	 * consistency across runs.
@@ -224,13 +221,14 @@ TEST_BEGIN(test_stress) {
 			 */
 			size_t npages_min = 1;
 			size_t npages_max = ALLOC_MAX / PAGE;
-			size_t npages = npages_min + prng_range_zu(&prng_state,
-			    npages_max - npages_min);
+			size_t npages = npages_min
+			    + prng_range_zu(
+			        &prng_state, npages_max - npages_min);
 			edata_t *edata = pai_alloc(tsdn, &shard->pai,
 			    npages * PAGE, PAGE, false, false, false,
 			    &deferred_work_generated);
-			assert_ptr_not_null(edata,
-			    "Unexpected allocation failure");
+			assert_ptr_not_null(
+			    edata, "Unexpected allocation failure");
 			live_edatas[nlive_edatas] = edata;
 			nlive_edatas++;
 			node_insert(&tree, edata, npages);
@@ -239,7 +237,8 @@ TEST_BEGIN(test_stress) {
 			if (nlive_edatas == 0) {
 				continue;
 			}
-			size_t victim = prng_range_zu(&prng_state, nlive_edatas);
+			size_t victim = prng_range_zu(
+			    &prng_state, nlive_edatas);
 			edata_t *to_free = live_edatas[victim];
 			live_edatas[victim] = live_edatas[nlive_edatas - 1];
 			nlive_edatas--;
@@ -251,7 +250,7 @@ TEST_BEGIN(test_stress) {
 
 	size_t ntreenodes = 0;
 	for (mem_contents_t *contents = mem_tree_first(&tree); contents != NULL;
-	    contents = mem_tree_next(&tree, contents)) {
+	     contents = mem_tree_next(&tree, contents)) {
 		ntreenodes++;
 		node_check(&tree, contents);
 	}
@@ -264,8 +263,8 @@ TEST_BEGIN(test_stress) {
 	for (size_t i = 0; i < nlive_edatas; i++) {
 		edata_t *to_free = live_edatas[i];
 		node_remove(&tree, to_free);
-		pai_dalloc(tsdn, &shard->pai, to_free,
-		    &deferred_work_generated);
+		pai_dalloc(
+		    tsdn, &shard->pai, to_free, &deferred_work_generated);
 	}
 	hpa_shard_destroy(tsdn, shard);
 
@@ -277,8 +276,7 @@ TEST_END
 static void
 expect_contiguous(edata_t **edatas, size_t nedatas) {
 	for (size_t i = 0; i < nedatas; i++) {
-		size_t expected = (size_t)edata_base_get(edatas[0])
-		    + i * PAGE;
+		size_t expected = (size_t)edata_base_get(edatas[0]) + i * PAGE;
 		expect_zu_eq(expected, (size_t)edata_base_get(edatas[i]),
 		    "Mismatch at index %zu", i);
 	}
@@ -287,13 +285,13 @@ expect_contiguous(edata_t **edatas, size_t nedatas) {
 TEST_BEGIN(test_alloc_dalloc_batch) {
 	test_skip_if(!hpa_supported());
 
-	hpa_shard_t *shard = create_test_data(&hpa_hooks_default,
-	    &test_hpa_shard_opts_default);
+	hpa_shard_t *shard = create_test_data(
+	    &hpa_hooks_default, &test_hpa_shard_opts_default);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 
 	bool deferred_work_generated = false;
 
-	enum {NALLOCS = 8};
+	enum { NALLOCS = 8 };
 
 	edata_t *allocs[NALLOCS];
 	/*
@@ -329,11 +327,11 @@ TEST_BEGIN(test_alloc_dalloc_batch) {
 	for (size_t i = 0; i < NALLOCS / 2; i++) {
 		edata_list_active_append(&allocs_list, allocs[i]);
 	}
-	pai_dalloc_batch(tsdn, &shard->pai, &allocs_list,
-	    &deferred_work_generated);
+	pai_dalloc_batch(
+	    tsdn, &shard->pai, &allocs_list, &deferred_work_generated);
 	for (size_t i = NALLOCS / 2; i < NALLOCS; i++) {
-		pai_dalloc(tsdn, &shard->pai, allocs[i],
-		    &deferred_work_generated);
+		pai_dalloc(
+		    tsdn, &shard->pai, allocs[i], &deferred_work_generated);
 	}
 
 	/* Reallocate (individually), and ensure reuse and contiguity. */
@@ -344,8 +342,8 @@ TEST_BEGIN(test_alloc_dalloc_batch) {
 		expect_ptr_not_null(allocs[i], "Unexpected alloc failure.");
 	}
 	void *new_base = edata_base_get(allocs[0]);
-	expect_ptr_eq(orig_base, new_base,
-	    "Failed to reuse the allocated memory.");
+	expect_ptr_eq(
+	    orig_base, new_base, "Failed to reuse the allocated memory.");
 	expect_contiguous(allocs, NALLOCS);
 
 	destroy_test_data(shard);
@@ -429,7 +427,7 @@ TEST_BEGIN(test_defer_time) {
 	bool deferred_work_generated = false;
 
 	nstime_init(&defer_curtime, 0);
-	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	tsdn_t  *tsdn = tsd_tsdn(tsd_fetch());
 	edata_t *edatas[HUGEPAGE_PAGES];
 	for (int i = 0; i < (int)HUGEPAGE_PAGES; i++) {
 		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
@@ -448,8 +446,8 @@ TEST_BEGIN(test_defer_time) {
 
 	/* Purge.  Recall that dirty_mult is .25. */
 	for (int i = 0; i < (int)HUGEPAGE_PAGES / 2; i++) {
-		pai_dalloc(tsdn, &shard->pai, edatas[i],
-		    &deferred_work_generated);
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
 	}
 
 	hpa_shard_do_deferred_work(tsdn, shard);
@@ -474,8 +472,7 @@ TEST_BEGIN(test_defer_time) {
 	 * We would be ineligible for hugification, had we not already met the
 	 * threshold before dipping below it.
 	 */
-	pai_dalloc(tsdn, &shard->pai, edatas[0],
-	    &deferred_work_generated);
+	pai_dalloc(tsdn, &shard->pai, edatas[0], &deferred_work_generated);
 	/* Wait for the threshold again. */
 	nstime_init2(&defer_curtime, 22, 0);
 	hpa_shard_do_deferred_work(tsdn, shard);
@@ -491,8 +488,8 @@ TEST_END
 TEST_BEGIN(test_purge_no_infinite_loop) {
 	test_skip_if(!hpa_supported());
 
-	hpa_shard_t *shard = create_test_data(&hpa_hooks_default,
-	    &test_hpa_shard_opts_purge);
+	hpa_shard_t *shard = create_test_data(
+	    &hpa_hooks_default, &test_hpa_shard_opts_purge);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 
 	/*
@@ -500,14 +497,15 @@ TEST_BEGIN(test_purge_no_infinite_loop) {
 	 * criteria for huge page and at the same time do not allow hugify page
 	 * without triggering a purge.
 	 */
-	const size_t npages =
-	    test_hpa_shard_opts_purge.hugification_threshold / PAGE + 1;
+	const size_t npages = test_hpa_shard_opts_purge.hugification_threshold
+	        / PAGE
+	    + 1;
 	const size_t size = npages * PAGE;
 
-	bool deferred_work_generated = false;
+	bool     deferred_work_generated = false;
 	edata_t *edata = pai_alloc(tsdn, &shard->pai, size, PAGE,
-	     /* zero */ false, /* guarded */ false, /* frequent_reuse */ false,
-	     &deferred_work_generated);
+	    /* zero */ false, /* guarded */ false, /* frequent_reuse */ false,
+	    &deferred_work_generated);
 	expect_ptr_not_null(edata, "Unexpected alloc failure");
 
 	hpa_shard_do_deferred_work(tsdn, shard);
@@ -542,8 +540,8 @@ TEST_BEGIN(test_no_min_purge_interval) {
 	nstime_init(&defer_curtime, 0);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 
-	edata_t *edata = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
-	    false, false, &deferred_work_generated);
+	edata_t *edata = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false, false,
+	    false, &deferred_work_generated);
 	expect_ptr_not_null(edata, "Unexpected null edata");
 	pai_dalloc(tsdn, &shard->pai, edata, &deferred_work_generated);
 	hpa_shard_do_deferred_work(tsdn, shard);
@@ -584,8 +582,8 @@ TEST_BEGIN(test_min_purge_interval) {
 	nstime_init(&defer_curtime, 0);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 
-	edata_t *edata = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
-	    false, false, &deferred_work_generated);
+	edata_t *edata = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false, false,
+	    false, &deferred_work_generated);
 	expect_ptr_not_null(edata, "Unexpected null edata");
 	pai_dalloc(tsdn, &shard->pai, edata, &deferred_work_generated);
 	hpa_shard_do_deferred_work(tsdn, shard);
@@ -634,7 +632,7 @@ TEST_BEGIN(test_purge) {
 
 	nstime_init(&defer_curtime, 0);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
-	enum {NALLOCS = 8 * HUGEPAGE_PAGES};
+	enum { NALLOCS = 8 * HUGEPAGE_PAGES };
 	edata_t *edatas[NALLOCS];
 	for (int i = 0; i < NALLOCS; i++) {
 		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
@@ -643,8 +641,8 @@ TEST_BEGIN(test_purge) {
 	}
 	/* Deallocate 3 hugepages out of 8. */
 	for (int i = 0; i < 3 * (int)HUGEPAGE_PAGES; i++) {
-		pai_dalloc(tsdn, &shard->pai, edatas[i],
-		    &deferred_work_generated);
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
 	}
 	nstime_init2(&defer_curtime, 6, 0);
 	hpa_shard_do_deferred_work(tsdn, shard);
@@ -702,7 +700,7 @@ TEST_BEGIN(test_experimental_max_purge_nhp) {
 
 	nstime_init(&defer_curtime, 0);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
-	enum {NALLOCS = 8 * HUGEPAGE_PAGES};
+	enum { NALLOCS = 8 * HUGEPAGE_PAGES };
 	edata_t *edatas[NALLOCS];
 	for (int i = 0; i < NALLOCS; i++) {
 		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
@@ -711,8 +709,8 @@ TEST_BEGIN(test_experimental_max_purge_nhp) {
 	}
 	/* Deallocate 3 hugepages out of 8. */
 	for (int i = 0; i < 3 * (int)HUGEPAGE_PAGES; i++) {
-		pai_dalloc(tsdn, &shard->pai, edatas[i],
-		    &deferred_work_generated);
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
 	}
 	nstime_init2(&defer_curtime, 6, 0);
 	hpa_shard_do_deferred_work(tsdn, shard);
@@ -749,8 +747,7 @@ TEST_BEGIN(test_experimental_max_purge_nhp) {
 TEST_END
 
 TEST_BEGIN(test_vectorized_opt_eq_zero) {
-    test_skip_if(!hpa_supported() ||
-		(opt_process_madvise_max_batch != 0));
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0));
 
 	hpa_hooks_t hooks;
 	hooks.map = &defer_test_map;
@@ -770,11 +767,11 @@ TEST_BEGIN(test_vectorized_opt_eq_zero) {
 	ndefer_purge_calls = 0;
 
 	hpa_shard_t *shard = create_test_data(&hooks, &opts);
-	bool deferred_work_generated = false;
+	bool         deferred_work_generated = false;
 	nstime_init(&defer_curtime, 0);
-	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
-	edata_t *edata = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
-		false, false, &deferred_work_generated);
+	tsdn_t  *tsdn = tsd_tsdn(tsd_fetch());
+	edata_t *edata = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false, false,
+	    false, &deferred_work_generated);
 	expect_ptr_not_null(edata, "Unexpected null edata");
 	pai_dalloc(tsdn, &shard->pai, edata, &deferred_work_generated);
 	hpa_shard_do_deferred_work(tsdn, shard);
@@ -800,15 +797,9 @@ main(void) {
 	(void)mem_tree_iter;
 	(void)mem_tree_reverse_iter;
 	(void)mem_tree_destroy;
-	return test_no_reentrancy(
-	    test_alloc_max,
-	    test_stress,
-	    test_alloc_dalloc_batch,
-	    test_defer_time,
-	    test_purge_no_infinite_loop,
-	    test_no_min_purge_interval,
-	    test_min_purge_interval,
-	    test_purge,
-	    test_experimental_max_purge_nhp,
-	    test_vectorized_opt_eq_zero);
+	return test_no_reentrancy(test_alloc_max, test_stress,
+	    test_alloc_dalloc_batch, test_defer_time,
+	    test_purge_no_infinite_loop, test_no_min_purge_interval,
+	    test_min_purge_interval, test_purge,
+	    test_experimental_max_purge_nhp, test_vectorized_opt_eq_zero);
 }
diff --git a/test/unit/hpa_background_thread.c b/test/unit/hpa_background_thread.c
index 93f046b5..80cf2fed 100644
--- a/test/unit/hpa_background_thread.c
+++ b/test/unit/hpa_background_thread.c
@@ -12,7 +12,7 @@ TEST_BEGIN(test_hpa_background_thread_a0_initialized) {
 	test_skip_if(!have_background_thread);
 	test_skip_if(san_guard_enabled());
 
-	bool enabled = false;
+	bool   enabled = false;
 	size_t sz = sizeof(enabled);
 	int err = mallctl("background_thread", (void *)&enabled, &sz, NULL, 0);
 	expect_d_eq(err, 0, "Unexpected mallctl() failure");
@@ -38,7 +38,7 @@ sleep_for_background_thread_interval(void) {
 static unsigned
 create_arena(void) {
 	unsigned arena_ind;
-	size_t sz;
+	size_t   sz;
 
 	sz = sizeof(unsigned);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 2),
@@ -48,17 +48,17 @@ create_arena(void) {
 
 static size_t
 get_empty_ndirty(unsigned arena_ind) {
-	int err;
-	size_t ndirty_huge;
-	size_t ndirty_nonhuge;
+	int      err;
+	size_t   ndirty_huge;
+	size_t   ndirty_nonhuge;
 	uint64_t epoch = 1;
-	size_t sz = sizeof(epoch);
-	err = je_mallctl("epoch", (void *)&epoch, &sz, (void *)&epoch,
-	    sizeof(epoch));
+	size_t   sz = sizeof(epoch);
+	err = je_mallctl(
+	    "epoch", (void *)&epoch, &sz, (void *)&epoch, sizeof(epoch));
 	expect_d_eq(0, err, "Unexpected mallctl() failure");
 
 	size_t mib[6];
-	size_t miblen = sizeof(mib)/sizeof(mib[0]);
+	size_t miblen = sizeof(mib) / sizeof(mib[0]);
 	err = mallctlnametomib(
 	    "stats.arenas.0.hpa_shard.empty_slabs.ndirty_nonhuge", mib,
 	    &miblen);
@@ -70,8 +70,7 @@ get_empty_ndirty(unsigned arena_ind) {
 	expect_d_eq(0, err, "Unexpected mallctlbymib() failure");
 
 	err = mallctlnametomib(
-	    "stats.arenas.0.hpa_shard.empty_slabs.ndirty_huge", mib,
-	    &miblen);
+	    "stats.arenas.0.hpa_shard.empty_slabs.ndirty_huge", mib, &miblen);
 	expect_d_eq(0, err, "Unexpected mallctlnametomib() failure");
 
 	sz = sizeof(ndirty_huge);
@@ -85,20 +84,20 @@ get_empty_ndirty(unsigned arena_ind) {
 static void
 set_background_thread_enabled(bool enabled) {
 	int err;
-	err = je_mallctl("background_thread", NULL, NULL, &enabled,
-	    sizeof(enabled));
+	err = je_mallctl(
+	    "background_thread", NULL, NULL, &enabled, sizeof(enabled));
 	expect_d_eq(0, err, "Unexpected mallctl failure");
 }
 
 static void
 wait_until_thread_is_enabled(unsigned arena_id) {
-	tsd_t* tsd = tsd_fetch();
+	tsd_t *tsd = tsd_fetch();
 
 	bool sleeping = false;
-	int iterations = 0;
+	int  iterations = 0;
 	do {
-		background_thread_info_t *info =
-		    background_thread_info_get(arena_id);
+		background_thread_info_t *info = background_thread_info_get(
+		    arena_id);
 		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
 		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
 		sleeping = background_thread_indefinite_sleep(info);
@@ -113,10 +112,8 @@ expect_purging(unsigned arena_ind) {
 	expect_zu_eq(0, empty_ndirty, "Expected arena to start unused.");
 
 	void *ptrs[2];
-	ptrs[0] = mallocx(PAGE,
-	    MALLOCX_TCACHE_NONE | MALLOCX_ARENA(arena_ind));
-	ptrs[1] = mallocx(PAGE,
-	    MALLOCX_TCACHE_NONE | MALLOCX_ARENA(arena_ind));
+	ptrs[0] = mallocx(PAGE, MALLOCX_TCACHE_NONE | MALLOCX_ARENA(arena_ind));
+	ptrs[1] = mallocx(PAGE, MALLOCX_TCACHE_NONE | MALLOCX_ARENA(arena_ind));
 
 	empty_ndirty = get_empty_ndirty(arena_ind);
 	expect_zu_eq(0, empty_ndirty, "All pages should be active");
@@ -151,15 +148,14 @@ expect_deferred_purging(unsigned arena_ind) {
 	 */
 	bool observed_dirty_page = false;
 	for (int i = 0; i < 10; i++) {
-		void *ptr = mallocx(PAGE,
-		    MALLOCX_TCACHE_NONE | MALLOCX_ARENA(arena_ind));
+		void *ptr = mallocx(
+		    PAGE, MALLOCX_TCACHE_NONE | MALLOCX_ARENA(arena_ind));
 		empty_ndirty = get_empty_ndirty(arena_ind);
 		expect_zu_eq(0, empty_ndirty, "All pages should be active");
 		dallocx(ptr, MALLOCX_TCACHE_NONE);
 		empty_ndirty = get_empty_ndirty(arena_ind);
-		expect_true(empty_ndirty == 0 || empty_ndirty == 1 ||
-		    opt_prof, "Unexpected extra dirty page count: %zu",
-		    empty_ndirty);
+		expect_true(empty_ndirty == 0 || empty_ndirty == 1 || opt_prof,
+		    "Unexpected extra dirty page count: %zu", empty_ndirty);
 		if (empty_ndirty > 0) {
 			observed_dirty_page = true;
 			break;
@@ -173,8 +169,8 @@ expect_deferred_purging(unsigned arena_ind) {
 	 * time.  Retry 100 times max before bailing out.
 	 */
 	unsigned retry = 0;
-	while ((empty_ndirty = get_empty_ndirty(arena_ind)) > 0 &&
-	    (retry++ < 100)) {
+	while ((empty_ndirty = get_empty_ndirty(arena_ind)) > 0
+	    && (retry++ < 100)) {
 		sleep_for_background_thread_interval();
 	}
 
diff --git a/test/unit/hpa_vectorized_madvise.c b/test/unit/hpa_vectorized_madvise.c
index 6770a9fa..8df54d06 100644
--- a/test/unit/hpa_vectorized_madvise.c
+++ b/test/unit/hpa_vectorized_madvise.c
@@ -13,36 +13,35 @@ struct test_data_s {
 	 * Must be the first member -- we convert back and forth between the
 	 * test_data_t and the hpa_shard_t;
 	 */
-	hpa_shard_t shard;
+	hpa_shard_t   shard;
 	hpa_central_t central;
-	base_t *base;
+	base_t       *base;
 	edata_cache_t shard_edata_cache;
 
 	emap_t emap;
 };
 
 static hpa_shard_opts_t test_hpa_shard_opts_default = {
-	/* slab_max_alloc */
-	ALLOC_MAX,
-	/* hugification_threshold */
-	HUGEPAGE,
-	/* dirty_mult */
-	FXP_INIT_PERCENT(25),
-	/* deferral_allowed */
-	false,
-	/* hugify_delay_ms */
-	10 * 1000,
-	/* hugify_sync */
-	false,
-	/* min_purge_interval_ms */
-	5 * 1000,
-	/* experimental_max_purge_nhp */
-	-1
-};
+    /* slab_max_alloc */
+    ALLOC_MAX,
+    /* hugification_threshold */
+    HUGEPAGE,
+    /* dirty_mult */
+    FXP_INIT_PERCENT(25),
+    /* deferral_allowed */
+    false,
+    /* hugify_delay_ms */
+    10 * 1000,
+    /* hugify_sync */
+    false,
+    /* min_purge_interval_ms */
+    5 * 1000,
+    /* experimental_max_purge_nhp */
+    -1};
 
 static hpa_shard_t *
 create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
-	bool err;
+	bool    err;
 	base_t *base = base_new(TSDN_NULL, /* ind */ SHARD_IND,
 	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
 	assert_ptr_not_null(base, "");
@@ -108,7 +107,8 @@ defer_vectorized_purge(void *vec, size_t vlen, size_t nbytes) {
 }
 
 static bool defer_vec_purge_didfail = false;
-static bool defer_vectorized_purge_fail(void *vec, size_t vlen, size_t nbytes) {
+static bool
+defer_vectorized_purge_fail(void *vec, size_t vlen, size_t nbytes) {
 	(void)vec;
 	(void)vlen;
 	(void)nbytes;
@@ -141,8 +141,7 @@ defer_test_ms_since(nstime_t *past_time) {
 }
 
 TEST_BEGIN(test_vectorized_failure_fallback) {
-	test_skip_if(!hpa_supported() ||
-		(opt_process_madvise_max_batch == 0));
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch == 0));
 
 	hpa_hooks_t hooks;
 	hooks.map = &defer_test_map;
@@ -166,8 +165,8 @@ TEST_BEGIN(test_vectorized_failure_fallback) {
 	nstime_init(&defer_curtime, 0);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 
-	edata_t *edata = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
-	false, false, &deferred_work_generated);
+	edata_t *edata = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false, false,
+	    false, &deferred_work_generated);
 	expect_ptr_not_null(edata, "Unexpected null edata");
 	pai_dalloc(tsdn, &shard->pai, edata, &deferred_work_generated);
 	hpa_shard_do_deferred_work(tsdn, shard);
@@ -181,9 +180,8 @@ TEST_BEGIN(test_vectorized_failure_fallback) {
 TEST_END
 
 TEST_BEGIN(test_more_regions_purged_from_one_page) {
-	test_skip_if(!hpa_supported() ||
-		(opt_process_madvise_max_batch == 0) ||
-		HUGEPAGE_PAGES <= 4);
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch == 0)
+	    || HUGEPAGE_PAGES <= 4);
 
 	hpa_hooks_t hooks;
 	hooks.map = &defer_test_map;
@@ -208,7 +206,7 @@ TEST_BEGIN(test_more_regions_purged_from_one_page) {
 	nstime_init(&defer_curtime, 0);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 
-	enum {NALLOCS = 8 * HUGEPAGE_PAGES};
+	enum { NALLOCS = 8 * HUGEPAGE_PAGES };
 	edata_t *edatas[NALLOCS];
 	for (int i = 0; i < NALLOCS; i++) {
 		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
@@ -249,12 +247,10 @@ TEST_BEGIN(test_more_regions_purged_from_one_page) {
 }
 TEST_END
 
-size_t
-hpa_purge_max_batch_size_for_test_set(size_t new_size);
+size_t hpa_purge_max_batch_size_for_test_set(size_t new_size);
 TEST_BEGIN(test_more_pages_than_batch_page_size) {
-	test_skip_if(!hpa_supported() ||
-		(opt_process_madvise_max_batch == 0) ||
-		HUGEPAGE_PAGES <= 4);
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch == 0)
+	    || HUGEPAGE_PAGES <= 4);
 
 	size_t old_page_batch = hpa_purge_max_batch_size_for_test_set(1);
 
@@ -281,7 +277,7 @@ TEST_BEGIN(test_more_pages_than_batch_page_size) {
 	nstime_init(&defer_curtime, 0);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 
-	enum {NALLOCS = 8 * HUGEPAGE_PAGES};
+	enum { NALLOCS = 8 * HUGEPAGE_PAGES };
 	edata_t *edatas[NALLOCS];
 	for (int i = 0; i < NALLOCS; i++) {
 		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
@@ -289,8 +285,8 @@ TEST_BEGIN(test_more_pages_than_batch_page_size) {
 		expect_ptr_not_null(edatas[i], "Unexpected null edata");
 	}
 	for (int i = 0; i < 3 * (int)HUGEPAGE_PAGES; i++) {
-		pai_dalloc(tsdn, &shard->pai, edatas[i],
-			&deferred_work_generated);
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
 	}
 
 	hpa_shard_do_deferred_work(tsdn, shard);
@@ -321,8 +317,7 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_vectorized_failure_fallback,
+	return test_no_reentrancy(test_vectorized_failure_fallback,
 	    test_more_regions_purged_from_one_page,
 	    test_more_pages_than_batch_page_size);
 }
diff --git a/test/unit/hpa_vectorized_madvise_large_batch.c b/test/unit/hpa_vectorized_madvise_large_batch.c
index 561da7a2..a5766620 100644
--- a/test/unit/hpa_vectorized_madvise_large_batch.c
+++ b/test/unit/hpa_vectorized_madvise_large_batch.c
@@ -13,36 +13,35 @@ struct test_data_s {
 	 * Must be the first member -- we convert back and forth between the
 	 * test_data_t and the hpa_shard_t;
 	 */
-	hpa_shard_t shard;
+	hpa_shard_t   shard;
 	hpa_central_t central;
-	base_t *base;
+	base_t       *base;
 	edata_cache_t shard_edata_cache;
 
 	emap_t emap;
 };
 
 static hpa_shard_opts_t test_hpa_shard_opts_default = {
-	/* slab_max_alloc */
-	ALLOC_MAX,
-	/* hugification_threshold */
-	HUGEPAGE,
-	/* dirty_mult */
-	FXP_INIT_PERCENT(25),
-	/* deferral_allowed */
-	false,
-	/* hugify_delay_ms */
-	10 * 1000,
-	/* hugify_sync */
-	false,
-	/* min_purge_interval_ms */
-	5 * 1000,
-	/* experimental_max_purge_nhp */
-	-1
-};
+    /* slab_max_alloc */
+    ALLOC_MAX,
+    /* hugification_threshold */
+    HUGEPAGE,
+    /* dirty_mult */
+    FXP_INIT_PERCENT(25),
+    /* deferral_allowed */
+    false,
+    /* hugify_delay_ms */
+    10 * 1000,
+    /* hugify_sync */
+    false,
+    /* min_purge_interval_ms */
+    5 * 1000,
+    /* experimental_max_purge_nhp */
+    -1};
 
 static hpa_shard_t *
 create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
-	bool err;
+	bool    err;
 	base_t *base = base_new(TSDN_NULL, /* ind */ SHARD_IND,
 	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
 	assert_ptr_not_null(base, "");
@@ -132,8 +131,8 @@ defer_test_ms_since(nstime_t *past_time) {
 }
 
 TEST_BEGIN(test_vectorized_purge) {
-	test_skip_if(!hpa_supported() ||
-		     opt_process_madvise_max_batch == 0 || HUGEPAGE_PAGES <= 4);
+	test_skip_if(!hpa_supported() || opt_process_madvise_max_batch == 0
+	    || HUGEPAGE_PAGES <= 4);
 	assert(opt_process_madvise_max_batch == 64);
 
 	hpa_hooks_t hooks;
@@ -159,7 +158,7 @@ TEST_BEGIN(test_vectorized_purge) {
 	nstime_init(&defer_curtime, 0);
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
 
-	enum {NALLOCS = 8 * HUGEPAGE_PAGES};
+	enum { NALLOCS = 8 * HUGEPAGE_PAGES };
 	edata_t *edatas[NALLOCS];
 	for (int i = 0; i < NALLOCS; i++) {
 		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
@@ -192,6 +191,5 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_vectorized_purge);
+	return test_no_reentrancy(test_vectorized_purge);
 }
diff --git a/test/unit/hpdata.c b/test/unit/hpdata.c
index 995ab77b..2329f065 100644
--- a/test/unit/hpdata.c
+++ b/test/unit/hpdata.c
@@ -69,23 +69,25 @@ TEST_BEGIN(test_purge_simple) {
 
 	hpdata_alloc_allowed_set(&hpdata, false);
 	hpdata_purge_state_t purge_state;
-	size_t nranges;
+	size_t               nranges;
 	size_t to_purge = hpdata_purge_begin(&hpdata, &purge_state, &nranges);
 	expect_zu_eq(HUGEPAGE_PAGES / 4, to_purge, "");
 	expect_zu_eq(1, nranges, "All dirty pages in a single range");
 
-	void *purge_addr;
+	void  *purge_addr;
 	size_t purge_size;
-	bool got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
-	    &purge_size);
+	bool   got_result = hpdata_purge_next(
+            &hpdata, &purge_state, &purge_addr, &purge_size);
 	expect_true(got_result, "");
 	expect_ptr_eq(HPDATA_ADDR, purge_addr, "");
 	expect_zu_eq(HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
 
-	got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
-	    &purge_size);
-	expect_false(got_result, "Unexpected additional purge range: "
-	    "extent at %p of size %zu", purge_addr, purge_size);
+	got_result = hpdata_purge_next(
+	    &hpdata, &purge_state, &purge_addr, &purge_size);
+	expect_false(got_result,
+	    "Unexpected additional purge range: "
+	    "extent at %p of size %zu",
+	    purge_addr, purge_size);
 
 	hpdata_purge_end(&hpdata, &purge_state);
 	expect_zu_eq(hpdata_ntouched_get(&hpdata), HUGEPAGE_PAGES / 4, "");
@@ -102,7 +104,8 @@ TEST_BEGIN(test_purge_intervening_dalloc) {
 	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
 
 	/* Allocate the first 3/4 of the pages. */
-	void *alloc = hpdata_reserve_alloc(&hpdata, 3 * HUGEPAGE_PAGES / 4  * PAGE);
+	void *alloc = hpdata_reserve_alloc(
+	    &hpdata, 3 * HUGEPAGE_PAGES / 4 * PAGE);
 	expect_ptr_eq(alloc, HPDATA_ADDR, "");
 
 	/* Free the first 1/4 and the third 1/4 of the pages. */
@@ -115,16 +118,16 @@ TEST_BEGIN(test_purge_intervening_dalloc) {
 
 	hpdata_alloc_allowed_set(&hpdata, false);
 	hpdata_purge_state_t purge_state;
-	size_t nranges;
+	size_t               nranges;
 	size_t to_purge = hpdata_purge_begin(&hpdata, &purge_state, &nranges);
 	expect_zu_eq(HUGEPAGE_PAGES / 2, to_purge, "");
 	expect_zu_eq(2, nranges, "First quarter and last half");
 
-	void *purge_addr;
+	void  *purge_addr;
 	size_t purge_size;
 	/* First purge. */
-	bool got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
-	    &purge_size);
+	bool got_result = hpdata_purge_next(
+	    &hpdata, &purge_state, &purge_addr, &purge_size);
 	expect_true(got_result, "");
 	expect_ptr_eq(HPDATA_ADDR, purge_addr, "");
 	expect_zu_eq(HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
@@ -135,18 +138,20 @@ TEST_BEGIN(test_purge_intervening_dalloc) {
 	    HUGEPAGE_PAGES / 4 * PAGE);
 
 	/* Now continue purging. */
-	got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
-	    &purge_size);
+	got_result = hpdata_purge_next(
+	    &hpdata, &purge_state, &purge_addr, &purge_size);
 	expect_true(got_result, "");
 	expect_ptr_eq(
 	    (void *)((uintptr_t)alloc + 2 * HUGEPAGE_PAGES / 4 * PAGE),
 	    purge_addr, "");
 	expect_zu_ge(HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
 
-	got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
-	    &purge_size);
-	expect_false(got_result, "Unexpected additional purge range: "
-	    "extent at %p of size %zu", purge_addr, purge_size);
+	got_result = hpdata_purge_next(
+	    &hpdata, &purge_state, &purge_addr, &purge_size);
+	expect_false(got_result,
+	    "Unexpected additional purge range: "
+	    "extent at %p of size %zu",
+	    purge_addr, purge_size);
 
 	hpdata_purge_end(&hpdata, &purge_state);
 
@@ -155,19 +160,20 @@ TEST_BEGIN(test_purge_intervening_dalloc) {
 TEST_END
 
 TEST_BEGIN(test_purge_over_retained) {
-	void *purge_addr;
+	void  *purge_addr;
 	size_t purge_size;
 
 	hpdata_t hpdata;
 	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
 
 	/* Allocate the first 3/4 of the pages. */
-	void *alloc = hpdata_reserve_alloc(&hpdata, 3 * HUGEPAGE_PAGES / 4  * PAGE);
+	void *alloc = hpdata_reserve_alloc(
+	    &hpdata, 3 * HUGEPAGE_PAGES / 4 * PAGE);
 	expect_ptr_eq(alloc, HPDATA_ADDR, "");
 
 	/* Free the second quarter. */
-	void *second_quarter =
-	    (void *)((uintptr_t)alloc + HUGEPAGE_PAGES / 4 * PAGE);
+	void *second_quarter = (void *)((uintptr_t)alloc
+	    + HUGEPAGE_PAGES / 4 * PAGE);
 	hpdata_unreserve(&hpdata, second_quarter, HUGEPAGE_PAGES / 4 * PAGE);
 
 	expect_zu_eq(hpdata_ntouched_get(&hpdata), 3 * HUGEPAGE_PAGES / 4, "");
@@ -175,21 +181,24 @@ TEST_BEGIN(test_purge_over_retained) {
 	/* Purge the second quarter. */
 	hpdata_alloc_allowed_set(&hpdata, false);
 	hpdata_purge_state_t purge_state;
-	size_t nranges;
-	size_t to_purge_dirty = hpdata_purge_begin(&hpdata, &purge_state, &nranges);
+	size_t               nranges;
+	size_t               to_purge_dirty = hpdata_purge_begin(
+            &hpdata, &purge_state, &nranges);
 	expect_zu_eq(HUGEPAGE_PAGES / 4, to_purge_dirty, "");
 	expect_zu_eq(1, nranges, "Second quarter only");
 
-	bool got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
-	    &purge_size);
+	bool got_result = hpdata_purge_next(
+	    &hpdata, &purge_state, &purge_addr, &purge_size);
 	expect_true(got_result, "");
 	expect_ptr_eq(second_quarter, purge_addr, "");
 	expect_zu_eq(HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
 
-	got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
-	    &purge_size);
-	expect_false(got_result, "Unexpected additional purge range: "
-	    "extent at %p of size %zu", purge_addr, purge_size);
+	got_result = hpdata_purge_next(
+	    &hpdata, &purge_state, &purge_addr, &purge_size);
+	expect_false(got_result,
+	    "Unexpected additional purge range: "
+	    "extent at %p of size %zu",
+	    purge_addr, purge_size);
 	hpdata_purge_end(&hpdata, &purge_state);
 
 	expect_zu_eq(hpdata_ntouched_get(&hpdata), HUGEPAGE_PAGES / 2, "");
@@ -209,16 +218,18 @@ TEST_BEGIN(test_purge_over_retained) {
 	expect_zu_eq(HUGEPAGE_PAGES / 2, to_purge_dirty, "");
 	expect_zu_eq(1, nranges, "Single range expected");
 
-	got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
-	    &purge_size);
+	got_result = hpdata_purge_next(
+	    &hpdata, &purge_state, &purge_addr, &purge_size);
 	expect_true(got_result, "");
 	expect_ptr_eq(HPDATA_ADDR, purge_addr, "");
 	expect_zu_eq(3 * HUGEPAGE_PAGES / 4 * PAGE, purge_size, "");
 
-	got_result = hpdata_purge_next(&hpdata, &purge_state, &purge_addr,
-	    &purge_size);
-	expect_false(got_result, "Unexpected additional purge range: "
-	    "extent at %p of size %zu", purge_addr, purge_size);
+	got_result = hpdata_purge_next(
+	    &hpdata, &purge_state, &purge_addr, &purge_size);
+	expect_false(got_result,
+	    "Unexpected additional purge range: "
+	    "extent at %p of size %zu",
+	    purge_addr, purge_size);
 	hpdata_purge_end(&hpdata, &purge_state);
 
 	expect_zu_eq(hpdata_ntouched_get(&hpdata), 0, "");
@@ -241,11 +252,9 @@ TEST_BEGIN(test_hugify) {
 }
 TEST_END
 
-int main(void) {
-	return test_no_reentrancy(
-	    test_reserve_alloc,
-	    test_purge_simple,
-	    test_purge_intervening_dalloc,
-	    test_purge_over_retained,
+int
+main(void) {
+	return test_no_reentrancy(test_reserve_alloc, test_purge_simple,
+	    test_purge_intervening_dalloc, test_purge_over_retained,
 	    test_hugify);
 }
diff --git a/test/unit/huge.c b/test/unit/huge.c
index 53f6577b..70abe4ac 100644
--- a/test/unit/huge.c
+++ b/test/unit/huge.c
@@ -8,38 +8,40 @@ const char *malloc_conf = "oversize_threshold:2097152";
 
 TEST_BEGIN(huge_bind_thread) {
 	unsigned arena1, arena2;
-	size_t sz = sizeof(unsigned);
+	size_t   sz = sizeof(unsigned);
 
 	/* Bind to a manual arena. */
 	expect_d_eq(mallctl("arenas.create", &arena1, &sz, NULL, 0), 0,
 	    "Failed to create arena");
-	expect_d_eq(mallctl("thread.arena", NULL, NULL, &arena1,
-	    sizeof(arena1)), 0, "Fail to bind thread");
+	expect_d_eq(
+	    mallctl("thread.arena", NULL, NULL, &arena1, sizeof(arena1)), 0,
+	    "Fail to bind thread");
 
 	void *ptr = mallocx(HUGE_SZ, 0);
 	expect_ptr_not_null(ptr, "Fail to allocate huge size");
-	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
-	    sizeof(ptr)), 0, "Unexpected mallctl() failure");
+	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr, sizeof(ptr)),
+	    0, "Unexpected mallctl() failure");
 	expect_u_eq(arena1, arena2, "Wrong arena used after binding");
 	dallocx(ptr, 0);
 
 	/* Switch back to arena 0. */
-	test_skip_if(have_percpu_arena &&
-	    PERCPU_ARENA_ENABLED(opt_percpu_arena));
+	test_skip_if(
+	    have_percpu_arena && PERCPU_ARENA_ENABLED(opt_percpu_arena));
 	arena2 = 0;
-	expect_d_eq(mallctl("thread.arena", NULL, NULL, &arena2,
-	    sizeof(arena2)), 0, "Fail to bind thread");
+	expect_d_eq(
+	    mallctl("thread.arena", NULL, NULL, &arena2, sizeof(arena2)), 0,
+	    "Fail to bind thread");
 	ptr = mallocx(SMALL_SZ, MALLOCX_TCACHE_NONE);
-	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
-	    sizeof(ptr)), 0, "Unexpected mallctl() failure");
+	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr, sizeof(ptr)),
+	    0, "Unexpected mallctl() failure");
 	expect_u_eq(arena2, 0, "Wrong arena used after binding");
 	dallocx(ptr, MALLOCX_TCACHE_NONE);
 
 	/* Then huge allocation should use the huge arena. */
 	ptr = mallocx(HUGE_SZ, 0);
 	expect_ptr_not_null(ptr, "Fail to allocate huge size");
-	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
-	    sizeof(ptr)), 0, "Unexpected mallctl() failure");
+	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr, sizeof(ptr)),
+	    0, "Unexpected mallctl() failure");
 	expect_u_ne(arena2, 0, "Wrong arena used after binding");
 	expect_u_ne(arena1, arena2, "Wrong arena used after binding");
 	dallocx(ptr, 0);
@@ -48,25 +50,26 @@ TEST_END
 
 TEST_BEGIN(huge_mallocx) {
 	unsigned arena1, arena2;
-	size_t sz = sizeof(unsigned);
+	size_t   sz = sizeof(unsigned);
 
 	expect_d_eq(mallctl("arenas.create", &arena1, &sz, NULL, 0), 0,
 	    "Failed to create arena");
 	void *huge = mallocx(HUGE_SZ, MALLOCX_ARENA(arena1));
 	expect_ptr_not_null(huge, "Fail to allocate huge size");
-	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &huge,
-	    sizeof(huge)), 0, "Unexpected mallctl() failure");
+	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &huge, sizeof(huge)),
+	    0, "Unexpected mallctl() failure");
 	expect_u_eq(arena1, arena2, "Wrong arena used for mallocx");
 	dallocx(huge, MALLOCX_ARENA(arena1));
 
 	void *huge2 = mallocx(HUGE_SZ, 0);
 	expect_ptr_not_null(huge, "Fail to allocate huge size");
-	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &huge2,
-	    sizeof(huge2)), 0, "Unexpected mallctl() failure");
+	expect_d_eq(
+	    mallctl("arenas.lookup", &arena2, &sz, &huge2, sizeof(huge2)), 0,
+	    "Unexpected mallctl() failure");
 	expect_u_ne(arena1, arena2,
 	    "Huge allocation should not come from the manual arena.");
-	expect_u_ne(arena2, 0,
-	    "Huge allocation should not come from the arena 0.");
+	expect_u_ne(
+	    arena2, 0, "Huge allocation should not come from the arena 0.");
 	dallocx(huge2, 0);
 }
 TEST_END
@@ -82,30 +85,27 @@ TEST_BEGIN(huge_allocation) {
 	expect_u_gt(arena1, 0, "Huge allocation should not come from arena 0");
 	dallocx(ptr, 0);
 
-	test_skip_if(have_percpu_arena &&
-	    PERCPU_ARENA_ENABLED(opt_percpu_arena));
+	test_skip_if(
+	    have_percpu_arena && PERCPU_ARENA_ENABLED(opt_percpu_arena));
 
 	ptr = mallocx(HUGE_SZ >> 1, 0);
 	expect_ptr_not_null(ptr, "Fail to allocate half huge size");
-	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
-	    sizeof(ptr)), 0, "Unexpected mallctl() failure");
+	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr, sizeof(ptr)),
+	    0, "Unexpected mallctl() failure");
 	expect_u_ne(arena1, arena2, "Wrong arena used for half huge");
 	dallocx(ptr, 0);
 
 	ptr = mallocx(SMALL_SZ, MALLOCX_TCACHE_NONE);
 	expect_ptr_not_null(ptr, "Fail to allocate small size");
-	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr,
-	    sizeof(ptr)), 0, "Unexpected mallctl() failure");
-	expect_u_ne(arena1, arena2,
-	    "Huge and small should be from different arenas");
+	expect_d_eq(mallctl("arenas.lookup", &arena2, &sz, &ptr, sizeof(ptr)),
+	    0, "Unexpected mallctl() failure");
+	expect_u_ne(
+	    arena1, arena2, "Huge and small should be from different arenas");
 	dallocx(ptr, 0);
 }
 TEST_END
 
 int
 main(void) {
-	return test(
-	    huge_allocation,
-	    huge_mallocx,
-	    huge_bind_thread);
+	return test(huge_allocation, huge_mallocx, huge_bind_thread);
 }
diff --git a/test/unit/inspect.c b/test/unit/inspect.c
index fe59e597..8111e4a5 100644
--- a/test/unit/inspect.c
+++ b/test/unit/inspect.c
@@ -1,27 +1,30 @@
 #include "test/jemalloc_test.h"
 
-#define TEST_UTIL_EINVAL(node, a, b, c, d, why_inval) do {		\
-	assert_d_eq(mallctl("experimental.utilization." node,		\
-	    a, b, c, d), EINVAL, "Should fail when " why_inval);	\
-	assert_zu_eq(out_sz, out_sz_ref,				\
-	    "Output size touched when given invalid arguments");	\
-	assert_d_eq(memcmp(out, out_ref, out_sz_ref), 0,		\
-	    "Output content touched when given invalid arguments");	\
-} while (0)
+#define TEST_UTIL_EINVAL(node, a, b, c, d, why_inval)                          \
+	do {                                                                   \
+		assert_d_eq(                                                   \
+		    mallctl("experimental.utilization." node, a, b, c, d),     \
+		    EINVAL, "Should fail when " why_inval);                    \
+		assert_zu_eq(out_sz, out_sz_ref,                               \
+		    "Output size touched when given invalid arguments");       \
+		assert_d_eq(memcmp(out, out_ref, out_sz_ref), 0,               \
+		    "Output content touched when given invalid arguments");    \
+	} while (0)
 
-#define TEST_UTIL_QUERY_EINVAL(a, b, c, d, why_inval)			\
+#define TEST_UTIL_QUERY_EINVAL(a, b, c, d, why_inval)                          \
 	TEST_UTIL_EINVAL("query", a, b, c, d, why_inval)
-#define TEST_UTIL_BATCH_EINVAL(a, b, c, d, why_inval)			\
+#define TEST_UTIL_BATCH_EINVAL(a, b, c, d, why_inval)                          \
 	TEST_UTIL_EINVAL("batch_query", a, b, c, d, why_inval)
 
-#define TEST_UTIL_VALID(node) do {					\
-        assert_d_eq(mallctl("experimental.utilization." node,		\
-	    out, &out_sz, in, in_sz), 0,				\
-	    "Should return 0 on correct arguments");			\
-        expect_zu_eq(out_sz, out_sz_ref, "incorrect output size");	\
-	expect_d_ne(memcmp(out, out_ref, out_sz_ref), 0,		\
-	    "Output content should be changed");			\
-} while (0)
+#define TEST_UTIL_VALID(node)                                                  \
+	do {                                                                   \
+		assert_d_eq(mallctl("experimental.utilization." node, out,     \
+		                &out_sz, in, in_sz),                           \
+		    0, "Should return 0 on correct arguments");                \
+		expect_zu_eq(out_sz, out_sz_ref, "incorrect output size");     \
+		expect_d_ne(memcmp(out, out_ref, out_sz_ref), 0,               \
+		    "Output content should be changed");                       \
+	} while (0)
 
 #define TEST_UTIL_BATCH_VALID TEST_UTIL_VALID("batch_query")
 
@@ -34,21 +37,19 @@ TEST_BEGIN(test_query) {
 	 * numerically unrelated to any size boundaries.
 	 */
 	for (sz = 7; sz <= TEST_MAX_SIZE && sz <= SC_LARGE_MAXCLASS;
-	    sz += (sz <= SC_SMALL_MAXCLASS ? 1009 : 99989)) {
-		void *p = mallocx(sz, 0);
+	     sz += (sz <= SC_SMALL_MAXCLASS ? 1009 : 99989)) {
+		void  *p = mallocx(sz, 0);
 		void **in = &p;
 		size_t in_sz = sizeof(const void *);
 		size_t out_sz = sizeof(void *) + sizeof(size_t) * 5;
-		void *out = mallocx(out_sz, 0);
-		void *out_ref = mallocx(out_sz, 0);
+		void  *out = mallocx(out_sz, 0);
+		void  *out_ref = mallocx(out_sz, 0);
 		size_t out_sz_ref = out_sz;
 
-		assert_ptr_not_null(p,
-		    "test pointer allocation failed");
-		assert_ptr_not_null(out,
-		    "test output allocation failed");
-		assert_ptr_not_null(out_ref,
-		    "test reference output allocation failed");
+		assert_ptr_not_null(p, "test pointer allocation failed");
+		assert_ptr_not_null(out, "test output allocation failed");
+		assert_ptr_not_null(
+		    out_ref, "test reference output allocation failed");
 
 #define SLABCUR_READ(out) (*(void **)out)
 #define COUNTS(out) ((size_t *)((void **)out + 1))
@@ -64,21 +65,18 @@ TEST_BEGIN(test_query) {
 		memcpy(out_ref, out, out_sz);
 
 		/* Test invalid argument(s) errors */
-		TEST_UTIL_QUERY_EINVAL(NULL, &out_sz, in, in_sz,
-		    "old is NULL");
-		TEST_UTIL_QUERY_EINVAL(out, NULL, in, in_sz,
-		    "oldlenp is NULL");
-		TEST_UTIL_QUERY_EINVAL(out, &out_sz, NULL, in_sz,
-		    "newp is NULL");
-		TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, 0,
-		    "newlen is zero");
+		TEST_UTIL_QUERY_EINVAL(NULL, &out_sz, in, in_sz, "old is NULL");
+		TEST_UTIL_QUERY_EINVAL(out, NULL, in, in_sz, "oldlenp is NULL");
+		TEST_UTIL_QUERY_EINVAL(
+		    out, &out_sz, NULL, in_sz, "newp is NULL");
+		TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, 0, "newlen is zero");
 		in_sz -= 1;
-		TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, in_sz,
-		    "invalid newlen");
+		TEST_UTIL_QUERY_EINVAL(
+		    out, &out_sz, in, in_sz, "invalid newlen");
 		in_sz += 1;
 		out_sz_ref = out_sz -= 2 * sizeof(size_t);
-		TEST_UTIL_QUERY_EINVAL(out, &out_sz, in, in_sz,
-		    "invalid *oldlenp");
+		TEST_UTIL_QUERY_EINVAL(
+		    out, &out_sz, in, in_sz, "invalid *oldlenp");
 		out_sz_ref = out_sz += 2 * sizeof(size_t);
 
 		/* Examine output for valid call */
@@ -100,8 +98,9 @@ TEST_BEGIN(test_query) {
 			    "Extent region count exceeded size");
 			expect_zu_ne(NREGS_READ(out), 0,
 			    "Extent region count must be positive");
-			expect_true(NFREE_READ(out) == 0 || (SLABCUR_READ(out)
-			    != NULL && SLABCUR_READ(out) <= p),
+			expect_true(NFREE_READ(out) == 0
+			        || (SLABCUR_READ(out) != NULL
+			            && SLABCUR_READ(out) <= p),
 			    "Allocation should follow first fit principle");
 
 			if (config_stats) {
@@ -117,8 +116,8 @@ TEST_BEGIN(test_query) {
 				    BIN_NREGS_READ(out),
 				    "Extent region count exceeded "
 				    "bin region count");
-				expect_zu_eq(BIN_NREGS_READ(out)
-				    % NREGS_READ(out), 0,
+				expect_zu_eq(
+				    BIN_NREGS_READ(out) % NREGS_READ(out), 0,
 				    "Bin region count isn't a multiple of "
 				    "extent region count");
 				expect_zu_le(
@@ -171,10 +170,10 @@ TEST_BEGIN(test_batch) {
 	 * numerically unrelated to any size boundaries.
 	 */
 	for (sz = 17; sz <= TEST_MAX_SIZE && sz <= SC_LARGE_MAXCLASS;
-	    sz += (sz <= SC_SMALL_MAXCLASS ? 1019 : 99991)) {
-		void *p = mallocx(sz, 0);
-		void *q = mallocx(sz, 0);
-		void *in[] = {p, q};
+	     sz += (sz <= SC_SMALL_MAXCLASS ? 1019 : 99991)) {
+		void  *p = mallocx(sz, 0);
+		void  *q = mallocx(sz, 0);
+		void  *in[] = {p, q};
 		size_t in_sz = sizeof(const void *) * 2;
 		size_t out[] = {-1, -1, -1, -1, -1, -1};
 		size_t out_sz = sizeof(size_t) * 6;
@@ -185,17 +184,14 @@ TEST_BEGIN(test_batch) {
 		assert_ptr_not_null(q, "test pointer allocation failed");
 
 		/* Test invalid argument(s) errors */
-		TEST_UTIL_BATCH_EINVAL(NULL, &out_sz, in, in_sz,
-		    "old is NULL");
-		TEST_UTIL_BATCH_EINVAL(out, NULL, in, in_sz,
-		    "oldlenp is NULL");
-		TEST_UTIL_BATCH_EINVAL(out, &out_sz, NULL, in_sz,
-		    "newp is NULL");
-		TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, 0,
-		    "newlen is zero");
+		TEST_UTIL_BATCH_EINVAL(NULL, &out_sz, in, in_sz, "old is NULL");
+		TEST_UTIL_BATCH_EINVAL(out, NULL, in, in_sz, "oldlenp is NULL");
+		TEST_UTIL_BATCH_EINVAL(
+		    out, &out_sz, NULL, in_sz, "newp is NULL");
+		TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, 0, "newlen is zero");
 		in_sz -= 1;
-		TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, in_sz,
-		    "newlen is not an exact multiple");
+		TEST_UTIL_BATCH_EINVAL(
+		    out, &out_sz, in, in_sz, "newlen is not an exact multiple");
 		in_sz += 1;
 		out_sz_ref = out_sz -= 2 * sizeof(size_t);
 		TEST_UTIL_BATCH_EINVAL(out, &out_sz, in, in_sz,
@@ -206,8 +202,8 @@ TEST_BEGIN(test_batch) {
 		    "*oldlenp and newlen do not match");
 		in_sz += sizeof(const void *);
 
-	/* Examine output for valid calls */
-#define TEST_EQUAL_REF(i, message) \
+		/* Examine output for valid calls */
+#define TEST_EQUAL_REF(i, message)                                             \
 	assert_d_eq(memcmp(out + (i) * 3, out_ref + (i) * 3, 3), 0, message)
 
 #define NFREE_READ(out, i) out[(i) * 3]
@@ -238,8 +234,8 @@ TEST_BEGIN(test_batch) {
 			expect_zu_eq(NREGS_READ(out, 0), 1,
 			    "Extent region count should be one");
 		}
-		TEST_EQUAL_REF(1,
-		    "Should not overwrite content beyond what's needed");
+		TEST_EQUAL_REF(
+		    1, "Should not overwrite content beyond what's needed");
 		in_sz *= 2;
 		out_sz_ref = out_sz *= 2;
 
diff --git a/test/unit/junk.c b/test/unit/junk.c
index 6c5b8beb..80f51e15 100644
--- a/test/unit/junk.c
+++ b/test/unit/junk.c
@@ -1,9 +1,9 @@
 #include "test/jemalloc_test.h"
 
-#define arraylen(arr) (sizeof(arr)/sizeof(arr[0]))
+#define arraylen(arr) (sizeof(arr) / sizeof(arr[0]))
 static size_t ptr_ind;
 static void *volatile ptrs[100];
-static void *last_junked_ptr;
+static void  *last_junked_ptr;
 static size_t last_junked_usize;
 
 static void
@@ -21,17 +21,17 @@ test_junk(void *ptr, size_t usize) {
 
 static void
 do_allocs(size_t size, bool zero, size_t lg_align) {
-#define JUNK_ALLOC(...)							\
-	do {								\
-		assert(ptr_ind + 1 < arraylen(ptrs));			\
-		void *ptr = __VA_ARGS__;				\
-		assert_ptr_not_null(ptr, "");				\
-		ptrs[ptr_ind++] = ptr;					\
-		if (opt_junk_alloc && !zero) {				\
-			expect_ptr_eq(ptr, last_junked_ptr, "");	\
-			expect_zu_eq(last_junked_usize,			\
-			    TEST_MALLOC_SIZE(ptr), "");			\
-		}							\
+#define JUNK_ALLOC(...)                                                        \
+	do {                                                                   \
+		assert(ptr_ind + 1 < arraylen(ptrs));                          \
+		void *ptr = __VA_ARGS__;                                       \
+		assert_ptr_not_null(ptr, "");                                  \
+		ptrs[ptr_ind++] = ptr;                                         \
+		if (opt_junk_alloc && !zero) {                                 \
+			expect_ptr_eq(ptr, last_junked_ptr, "");               \
+			expect_zu_eq(                                          \
+			    last_junked_usize, TEST_MALLOC_SIZE(ptr), "");     \
+		}                                                              \
 	} while (0)
 	if (!zero && lg_align == 0) {
 		JUNK_ALLOC(malloc(size));
@@ -51,21 +51,20 @@ do_allocs(size_t size, bool zero, size_t lg_align) {
 #endif
 	int zero_flag = zero ? MALLOCX_ZERO : 0;
 	JUNK_ALLOC(mallocx(size, zero_flag | MALLOCX_LG_ALIGN(lg_align)));
-	JUNK_ALLOC(mallocx(size, zero_flag | MALLOCX_LG_ALIGN(lg_align)
-	    | MALLOCX_TCACHE_NONE));
+	JUNK_ALLOC(mallocx(size,
+	    zero_flag | MALLOCX_LG_ALIGN(lg_align) | MALLOCX_TCACHE_NONE));
 	if (lg_align >= LG_SIZEOF_PTR) {
 		void *memalign_result;
-		int err = posix_memalign(&memalign_result, (1 << lg_align),
-		    size);
+		int   err = posix_memalign(
+                    &memalign_result, (1 << lg_align), size);
 		assert_d_eq(err, 0, "");
 		JUNK_ALLOC(memalign_result);
 	}
 }
 
 TEST_BEGIN(test_junk_alloc_free) {
-	bool zerovals[] = {false, true};
-	size_t sizevals[] = {
-		1, 8, 100, 1000, 100*1000
+	bool   zerovals[] = {false, true};
+	size_t sizevals[] = {1, 8, 100, 1000, 100 * 1000
 	/*
 	 * Memory allocation failure is a real possibility in 32-bit mode.
 	 * Rather than try to check in the face of resource exhaustion, we just
@@ -75,49 +74,49 @@ TEST_BEGIN(test_junk_alloc_free) {
 	 * mechanisms; but this is in fact the case.
 	 */
 #if LG_SIZEOF_PTR == 3
-		    , 10 * 1000 * 1000
+	    ,
+	    10 * 1000 * 1000
 #endif
 	};
-	size_t lg_alignvals[] = {
-		0, 4, 10, 15, 16, LG_PAGE
+	size_t lg_alignvals[] = {0, 4, 10, 15, 16, LG_PAGE
 #if LG_SIZEOF_PTR == 3
-		    , 20, 24
+	    ,
+	    20, 24
 #endif
 	};
 
-#define JUNK_FREE(...)							\
-	do {								\
-		do_allocs(size, zero, lg_align);			\
-		for (size_t n = 0; n < ptr_ind; n++) {			\
-			void *ptr = ptrs[n];				\
-			__VA_ARGS__;					\
-			if (opt_junk_free) {				\
-				assert_ptr_eq(ptr, last_junked_ptr,	\
-				    "");				\
-				assert_zu_eq(usize, last_junked_usize,	\
-				    "");				\
-			}						\
-			reset();					\
-		}							\
+#define JUNK_FREE(...)                                                         \
+	do {                                                                   \
+		do_allocs(size, zero, lg_align);                               \
+		for (size_t n = 0; n < ptr_ind; n++) {                         \
+			void *ptr = ptrs[n];                                   \
+			__VA_ARGS__;                                           \
+			if (opt_junk_free) {                                   \
+				assert_ptr_eq(ptr, last_junked_ptr, "");       \
+				assert_zu_eq(usize, last_junked_usize, "");    \
+			}                                                      \
+			reset();                                               \
+		}                                                              \
 	} while (0)
 	for (size_t i = 0; i < arraylen(zerovals); i++) {
 		for (size_t j = 0; j < arraylen(sizevals); j++) {
 			for (size_t k = 0; k < arraylen(lg_alignvals); k++) {
-				bool zero = zerovals[i];
+				bool   zero = zerovals[i];
 				size_t size = sizevals[j];
 				size_t lg_align = lg_alignvals[k];
-				size_t usize = nallocx(size,
-				    MALLOCX_LG_ALIGN(lg_align));
+				size_t usize = nallocx(
+				    size, MALLOCX_LG_ALIGN(lg_align));
 
 				JUNK_FREE(free(ptr));
 				JUNK_FREE(dallocx(ptr, 0));
 				JUNK_FREE(dallocx(ptr, MALLOCX_TCACHE_NONE));
-				JUNK_FREE(dallocx(ptr, MALLOCX_LG_ALIGN(
-				    lg_align)));
-				JUNK_FREE(sdallocx(ptr, usize, MALLOCX_LG_ALIGN(
-				    lg_align)));
+				JUNK_FREE(
+				    dallocx(ptr, MALLOCX_LG_ALIGN(lg_align)));
+				JUNK_FREE(sdallocx(
+				    ptr, usize, MALLOCX_LG_ALIGN(lg_align)));
 				JUNK_FREE(sdallocx(ptr, usize,
-				    MALLOCX_TCACHE_NONE | MALLOCX_LG_ALIGN(lg_align)));
+				    MALLOCX_TCACHE_NONE
+				        | MALLOCX_LG_ALIGN(lg_align)));
 				if (opt_zero_realloc_action
 				    == zero_realloc_action_free) {
 					JUNK_FREE(realloc(ptr, 0));
@@ -138,24 +137,24 @@ TEST_BEGIN(test_realloc_expand) {
 	ptr = malloc(SC_SMALL_MAXCLASS);
 	expanded = realloc(ptr, SC_LARGE_MINCLASS);
 	expect_ptr_eq(last_junked_ptr, &expanded[SC_SMALL_MAXCLASS], "");
-	expect_zu_eq(last_junked_usize,
-	    SC_LARGE_MINCLASS - SC_SMALL_MAXCLASS, "");
+	expect_zu_eq(
+	    last_junked_usize, SC_LARGE_MINCLASS - SC_SMALL_MAXCLASS, "");
 	free(expanded);
 
 	/* rallocx(..., 0) */
 	ptr = malloc(SC_SMALL_MAXCLASS);
 	expanded = rallocx(ptr, SC_LARGE_MINCLASS, 0);
 	expect_ptr_eq(last_junked_ptr, &expanded[SC_SMALL_MAXCLASS], "");
-	expect_zu_eq(last_junked_usize,
-	    SC_LARGE_MINCLASS - SC_SMALL_MAXCLASS, "");
+	expect_zu_eq(
+	    last_junked_usize, SC_LARGE_MINCLASS - SC_SMALL_MAXCLASS, "");
 	free(expanded);
 
 	/* rallocx(..., nonzero) */
 	ptr = malloc(SC_SMALL_MAXCLASS);
 	expanded = rallocx(ptr, SC_LARGE_MINCLASS, MALLOCX_TCACHE_NONE);
 	expect_ptr_eq(last_junked_ptr, &expanded[SC_SMALL_MAXCLASS], "");
-	expect_zu_eq(last_junked_usize,
-	    SC_LARGE_MINCLASS - SC_SMALL_MAXCLASS, "");
+	expect_zu_eq(
+	    last_junked_usize, SC_LARGE_MINCLASS - SC_SMALL_MAXCLASS, "");
 	free(expanded);
 
 	/* rallocx(..., MALLOCX_ZERO) */
@@ -189,7 +188,5 @@ main(void) {
 	 * We check the last pointer junked.  If a reentrant call happens, that
 	 * might be an internal allocation.
 	 */
-	return test_no_reentrancy(
-	    test_junk_alloc_free,
-	    test_realloc_expand);
+	return test_no_reentrancy(test_junk_alloc_free, test_realloc_expand);
 }
diff --git a/test/unit/log.c b/test/unit/log.c
index c09b5896..bf4ee1ff 100644
--- a/test/unit/log.c
+++ b/test/unit/log.c
@@ -18,16 +18,13 @@ expect_no_logging(const char *names) {
 	int count = 0;
 
 	for (int i = 0; i < 10; i++) {
-		log_do_begin(log_l1)
-			count++;
+		log_do_begin(log_l1) count++;
 		log_do_end(log_l1)
 
-		log_do_begin(log_l2)
-			count++;
+		    log_do_begin(log_l2) count++;
 		log_do_end(log_l2)
 
-		log_do_begin(log_l2_a)
-			count++;
+		    log_do_begin(log_l2_a) count++;
 		log_do_end(log_l2_a)
 	}
 	expect_d_eq(count, 0, "Disabled logging not ignored!");
@@ -57,8 +54,7 @@ TEST_BEGIN(test_log_enabled_direct) {
 	count = 0;
 	update_log_var_names("l1");
 	for (int i = 0; i < 10; i++) {
-		log_do_begin(log_l1)
-			count++;
+		log_do_begin(log_l1) count++;
 		log_do_end(log_l1)
 	}
 	expect_d_eq(count, 10, "Mis-logged!");
@@ -66,8 +62,7 @@ TEST_BEGIN(test_log_enabled_direct) {
 	count = 0;
 	update_log_var_names("l1.a");
 	for (int i = 0; i < 10; i++) {
-		log_do_begin(log_l1_a)
-			count++;
+		log_do_begin(log_l1_a) count++;
 		log_do_end(log_l1_a)
 	}
 	expect_d_eq(count, 10, "Mis-logged!");
@@ -75,12 +70,10 @@ TEST_BEGIN(test_log_enabled_direct) {
 	count = 0;
 	update_log_var_names("l1.a|abc|l2|def");
 	for (int i = 0; i < 10; i++) {
-		log_do_begin(log_l1_a)
-			count++;
+		log_do_begin(log_l1_a) count++;
 		log_do_end(log_l1_a)
 
-		log_do_begin(log_l2)
-			count++;
+		    log_do_begin(log_l2) count++;
 		log_do_end(log_l2)
 	}
 	expect_d_eq(count, 20, "Mis-logged!");
@@ -108,28 +101,22 @@ TEST_BEGIN(test_log_enabled_indirect) {
 	/* 4 are on total, so should sum to 40. */
 	int count = 0;
 	for (int i = 0; i < 10; i++) {
-		log_do_begin(log_l1)
-			count++;
+		log_do_begin(log_l1) count++;
 		log_do_end(log_l1)
 
-		log_do_begin(log_l1a)
-			count++;
+		    log_do_begin(log_l1a) count++;
 		log_do_end(log_l1a)
 
-		log_do_begin(log_l1_a)
-			count++;
+		    log_do_begin(log_l1_a) count++;
 		log_do_end(log_l1_a)
 
-		log_do_begin(log_l2_a)
-			count++;
+		    log_do_begin(log_l2_a) count++;
 		log_do_end(log_l2_a)
 
-		log_do_begin(log_l2_b_a)
-			count++;
+		    log_do_begin(log_l2_b_a) count++;
 		log_do_end(log_l2_b_a)
 
-		log_do_begin(log_l2_b_b)
-			count++;
+		    log_do_begin(log_l2_b_b) count++;
 		log_do_end(log_l2_b_b)
 	}
 
@@ -147,12 +134,10 @@ TEST_BEGIN(test_log_enabled_global) {
 
 	int count = 0;
 	for (int i = 0; i < 10; i++) {
-		log_do_begin(log_l1)
-		    count++;
+		log_do_begin(log_l1) count++;
 		log_do_end(log_l1)
 
-		log_do_begin(log_l2_a_a)
-		    count++;
+		    log_do_begin(log_l2_a_a) count++;
 		log_do_end(log_l2_a_a)
 	}
 	expect_d_eq(count, 20, "Mis-logged!");
@@ -167,8 +152,7 @@ TEST_BEGIN(test_logs_if_no_init) {
 
 	int count = 0;
 	for (int i = 0; i < 10; i++) {
-		log_do_begin(l)
-			count++;
+		log_do_begin(l) count++;
 		log_do_end(l)
 	}
 	expect_d_eq(count, 0, "Logging shouldn't happen if not initialized.");
@@ -188,11 +172,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_log_disabled,
-	    test_log_enabled_direct,
-	    test_log_enabled_indirect,
-	    test_log_enabled_global,
-	    test_logs_if_no_init,
-	    test_log_only_format_string);
+	return test(test_log_disabled, test_log_enabled_direct,
+	    test_log_enabled_indirect, test_log_enabled_global,
+	    test_logs_if_no_init, test_log_only_format_string);
 }
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 838a4445..ac7506cf 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -6,26 +6,27 @@
 
 TEST_BEGIN(test_mallctl_errors) {
 	uint64_t epoch;
-	size_t sz;
+	size_t   sz;
 
 	expect_d_eq(mallctl("no_such_name", NULL, NULL, NULL, 0), ENOENT,
 	    "mallctl() should return ENOENT for non-existent names");
 
 	expect_d_eq(mallctl("version", NULL, NULL, "0.0.0", strlen("0.0.0")),
-	    EPERM, "mallctl() should return EPERM on attempt to write "
+	    EPERM,
+	    "mallctl() should return EPERM on attempt to write "
 	    "read-only value");
 
-	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
-	    sizeof(epoch)-1), EINVAL,
-	    "mallctl() should return EINVAL for input size mismatch");
-	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch,
-	    sizeof(epoch)+1), EINVAL,
-	    "mallctl() should return EINVAL for input size mismatch");
+	expect_d_eq(
+	    mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch) - 1),
+	    EINVAL, "mallctl() should return EINVAL for input size mismatch");
+	expect_d_eq(
+	    mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch) + 1),
+	    EINVAL, "mallctl() should return EINVAL for input size mismatch");
 
-	sz = sizeof(epoch)-1;
+	sz = sizeof(epoch) - 1;
 	expect_d_eq(mallctl("epoch", (void *)&epoch, &sz, NULL, 0), EINVAL,
 	    "mallctl() should return EINVAL for output size mismatch");
-	sz = sizeof(epoch)+1;
+	sz = sizeof(epoch) + 1;
 	expect_d_eq(mallctl("epoch", (void *)&epoch, &sz, NULL, 0), EINVAL,
 	    "mallctl() should return EINVAL for output size mismatch");
 }
@@ -35,7 +36,7 @@ TEST_BEGIN(test_mallctlnametomib_errors) {
 	size_t mib[1];
 	size_t miblen;
 
-	miblen = sizeof(mib)/sizeof(size_t);
+	miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("no_such_name", mib, &miblen), ENOENT,
 	    "mallctlnametomib() should return ENOENT for non-existent names");
 }
@@ -43,34 +44,38 @@ TEST_END
 
 TEST_BEGIN(test_mallctlbymib_errors) {
 	uint64_t epoch;
-	size_t sz;
-	size_t mib[1];
-	size_t miblen;
+	size_t   sz;
+	size_t   mib[1];
+	size_t   miblen;
 
-	miblen = sizeof(mib)/sizeof(size_t);
+	miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("version", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 
-	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, "0.0.0",
-	    strlen("0.0.0")), EPERM, "mallctl() should return EPERM on "
+	expect_d_eq(
+	    mallctlbymib(mib, miblen, NULL, NULL, "0.0.0", strlen("0.0.0")),
+	    EPERM,
+	    "mallctl() should return EPERM on "
 	    "attempt to write read-only value");
 
-	miblen = sizeof(mib)/sizeof(size_t);
+	miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("epoch", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 
 	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&epoch,
-	    sizeof(epoch)-1), EINVAL,
+	                sizeof(epoch) - 1),
+	    EINVAL,
 	    "mallctlbymib() should return EINVAL for input size mismatch");
 	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, (void *)&epoch,
-	    sizeof(epoch)+1), EINVAL,
+	                sizeof(epoch) + 1),
+	    EINVAL,
 	    "mallctlbymib() should return EINVAL for input size mismatch");
 
-	sz = sizeof(epoch)-1;
+	sz = sizeof(epoch) - 1;
 	expect_d_eq(mallctlbymib(mib, miblen, (void *)&epoch, &sz, NULL, 0),
 	    EINVAL,
 	    "mallctlbymib() should return EINVAL for output size mismatch");
-	sz = sizeof(epoch)+1;
+	sz = sizeof(epoch) + 1;
 	expect_d_eq(mallctlbymib(mib, miblen, (void *)&epoch, &sz, NULL, 0),
 	    EINVAL,
 	    "mallctlbymib() should return EINVAL for output size mismatch");
@@ -79,7 +84,7 @@ TEST_END
 
 TEST_BEGIN(test_mallctl_read_write) {
 	uint64_t old_epoch, new_epoch;
-	size_t sz = sizeof(old_epoch);
+	size_t   sz = sizeof(old_epoch);
 
 	/* Blind. */
 	expect_d_eq(mallctl("epoch", NULL, NULL, NULL, 0), 0,
@@ -92,14 +97,15 @@ TEST_BEGIN(test_mallctl_read_write) {
 	expect_zu_eq(sz, sizeof(old_epoch), "Unexpected output size");
 
 	/* Write. */
-	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&new_epoch,
-	    sizeof(new_epoch)), 0, "Unexpected mallctl() failure");
+	expect_d_eq(
+	    mallctl("epoch", NULL, NULL, (void *)&new_epoch, sizeof(new_epoch)),
+	    0, "Unexpected mallctl() failure");
 	expect_zu_eq(sz, sizeof(old_epoch), "Unexpected output size");
 
 	/* Read+write. */
 	expect_d_eq(mallctl("epoch", (void *)&old_epoch, &sz,
-	    (void *)&new_epoch, sizeof(new_epoch)), 0,
-	    "Unexpected mallctl() failure");
+	                (void *)&new_epoch, sizeof(new_epoch)),
+	    0, "Unexpected mallctl() failure");
 	expect_zu_eq(sz, sizeof(old_epoch), "Unexpected output size");
 }
 TEST_END
@@ -133,10 +139,10 @@ TEST_BEGIN(test_mallctlnametomib_short_name) {
 TEST_END
 
 TEST_BEGIN(test_mallctlmibnametomib) {
-	size_t mib[4];
-	size_t miblen = 4;
+	size_t   mib[4];
+	size_t   miblen = 4;
 	uint32_t result, result_ref;
-	size_t len_result = sizeof(uint32_t);
+	size_t   len_result = sizeof(uint32_t);
 
 	tsd_t *tsd = tsd_fetch();
 
@@ -178,20 +184,21 @@ TEST_BEGIN(test_mallctlmibnametomib) {
 	/* Valid case. */
 	assert_d_eq(ctl_mibnametomib(tsd, mib, 3, "nregs", &miblen), 0, "");
 	assert_zu_eq(miblen, 4, "");
-	assert_d_eq(mallctlbymib(mib, miblen, &result, &len_result, NULL, 0),
-	    0, "Unexpected mallctlbymib() failure");
-	assert_d_eq(mallctl("arenas.bin.0.nregs", &result_ref, &len_result,
-	    NULL, 0), 0, "Unexpected mallctl() failure");
+	assert_d_eq(mallctlbymib(mib, miblen, &result, &len_result, NULL, 0), 0,
+	    "Unexpected mallctlbymib() failure");
+	assert_d_eq(
+	    mallctl("arenas.bin.0.nregs", &result_ref, &len_result, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
 	expect_zu_eq(result, result_ref,
 	    "mallctlbymib() and mallctl() returned different result");
 }
 TEST_END
 
 TEST_BEGIN(test_mallctlbymibname) {
-	size_t mib[4];
-	size_t miblen = 4;
+	size_t   mib[4];
+	size_t   miblen = 4;
 	uint32_t result, result_ref;
-	size_t len_result = sizeof(uint32_t);
+	size_t   len_result = sizeof(uint32_t);
 
 	tsd_t *tsd = tsd_fetch();
 
@@ -202,50 +209,60 @@ TEST_BEGIN(test_mallctlbymibname) {
 	assert_zu_eq(miblen, 1, "");
 
 	miblen = 4;
-	assert_d_eq(ctl_bymibname(tsd, mib, 1, "bin.0", &miblen,
-	    &result, &len_result, NULL, 0), ENOENT, "");
+	assert_d_eq(ctl_bymibname(tsd, mib, 1, "bin.0", &miblen, &result,
+	                &len_result, NULL, 0),
+	    ENOENT, "");
 	miblen = 4;
-	assert_d_eq(ctl_bymibname(tsd, mib, 1, "bin.0.bob", &miblen,
-	    &result, &len_result, NULL, 0), ENOENT, "");
+	assert_d_eq(ctl_bymibname(tsd, mib, 1, "bin.0.bob", &miblen, &result,
+	                &len_result, NULL, 0),
+	    ENOENT, "");
 	assert_zu_eq(miblen, 4, "");
 
 	/* Valid cases. */
 
-	assert_d_eq(mallctl("arenas.bin.0.nregs", &result_ref, &len_result,
-	    NULL, 0), 0, "Unexpected mallctl() failure");
+	assert_d_eq(
+	    mallctl("arenas.bin.0.nregs", &result_ref, &len_result, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
 	miblen = 4;
 
 	assert_d_eq(ctl_bymibname(tsd, mib, 0, "arenas.bin.0.nregs", &miblen,
-	    &result, &len_result, NULL, 0), 0, "");
+	                &result, &len_result, NULL, 0),
+	    0, "");
 	assert_zu_eq(miblen, 4, "");
 	expect_zu_eq(result, result_ref, "Unexpected result");
 
 	assert_d_eq(ctl_bymibname(tsd, mib, 1, "bin.0.nregs", &miblen, &result,
-	    &len_result, NULL, 0), 0, "");
+	                &len_result, NULL, 0),
+	    0, "");
 	assert_zu_eq(miblen, 4, "");
 	expect_zu_eq(result, result_ref, "Unexpected result");
 
 	assert_d_eq(ctl_bymibname(tsd, mib, 2, "0.nregs", &miblen, &result,
-	    &len_result, NULL, 0), 0, "");
+	                &len_result, NULL, 0),
+	    0, "");
 	assert_zu_eq(miblen, 4, "");
 	expect_zu_eq(result, result_ref, "Unexpected result");
 
 	assert_d_eq(ctl_bymibname(tsd, mib, 3, "nregs", &miblen, &result,
-	    &len_result, NULL, 0), 0, "");
+	                &len_result, NULL, 0),
+	    0, "");
 	assert_zu_eq(miblen, 4, "");
 	expect_zu_eq(result, result_ref, "Unexpected result");
 }
 TEST_END
 
 TEST_BEGIN(test_mallctl_config) {
-#define TEST_MALLCTL_CONFIG(config, t) do {				\
-	t oldval;							\
-	size_t sz = sizeof(oldval);					\
-	expect_d_eq(mallctl("config."#config, (void *)&oldval, &sz,	\
-	    NULL, 0), 0, "Unexpected mallctl() failure");		\
-	expect_b_eq(oldval, config_##config, "Incorrect config value");	\
-	expect_zu_eq(sz, sizeof(oldval), "Unexpected output size");	\
-} while (0)
+#define TEST_MALLCTL_CONFIG(config, t)                                         \
+	do {                                                                   \
+		t      oldval;                                                 \
+		size_t sz = sizeof(oldval);                                    \
+		expect_d_eq(                                                   \
+		    mallctl("config." #config, (void *)&oldval, &sz, NULL, 0), \
+		    0, "Unexpected mallctl() failure");                        \
+		expect_b_eq(                                                   \
+		    oldval, config_##config, "Incorrect config value");        \
+		expect_zu_eq(sz, sizeof(oldval), "Unexpected output size");    \
+	} while (0)
 
 	TEST_MALLCTL_CONFIG(cache_oblivious, bool);
 	TEST_MALLCTL_CONFIG(debug, bool);
@@ -267,16 +284,17 @@ TEST_END
 TEST_BEGIN(test_mallctl_opt) {
 	bool config_always = true;
 
-#define TEST_MALLCTL_OPT(t, opt, config) do {				\
-	t oldval;							\
-	size_t sz = sizeof(oldval);					\
-	int expected = config_##config ? 0 : ENOENT;			\
-	int result = mallctl("opt."#opt, (void *)&oldval, &sz, NULL,	\
-	    0);								\
-	expect_d_eq(result, expected,					\
-	    "Unexpected mallctl() result for opt."#opt);		\
-	expect_zu_eq(sz, sizeof(oldval), "Unexpected output size");	\
-} while (0)
+#define TEST_MALLCTL_OPT(t, opt, config)                                       \
+	do {                                                                   \
+		t      oldval;                                                 \
+		size_t sz = sizeof(oldval);                                    \
+		int    expected = config_##config ? 0 : ENOENT;                \
+		int    result = mallctl(                                       \
+                    "opt." #opt, (void *)&oldval, &sz, NULL, 0);            \
+		expect_d_eq(result, expected,                                  \
+		    "Unexpected mallctl() result for opt." #opt);              \
+		expect_zu_eq(sz, sizeof(oldval), "Unexpected output size");    \
+	} while (0)
 
 	TEST_MALLCTL_OPT(bool, abort, always);
 	TEST_MALLCTL_OPT(bool, abort_conf, always);
@@ -341,8 +359,8 @@ TEST_END
 
 TEST_BEGIN(test_manpage_example) {
 	unsigned nbins, i;
-	size_t mib[4];
-	size_t len, miblen;
+	size_t   mib[4];
+	size_t   len, miblen;
 
 	len = sizeof(nbins);
 	expect_d_eq(mallctl("arenas.nbins", (void *)&nbins, &len, NULL, 0), 0,
@@ -356,8 +374,9 @@ TEST_BEGIN(test_manpage_example) {
 
 		mib[2] = i;
 		len = sizeof(bin_size);
-		expect_d_eq(mallctlbymib(mib, miblen, (void *)&bin_size, &len,
-		    NULL, 0), 0, "Unexpected mallctlbymib() failure");
+		expect_d_eq(
+		    mallctlbymib(mib, miblen, (void *)&bin_size, &len, NULL, 0),
+		    0, "Unexpected mallctlbymib() failure");
 		/* Do something with bin_size... */
 	}
 }
@@ -380,8 +399,8 @@ TEST_BEGIN(test_tcache_none) {
 	void *p1 = mallocx(42, 0);
 	expect_ptr_not_null(p1, "Unexpected mallocx() failure");
 	if (!opt_prof && !san_uaf_detection_enabled()) {
-		expect_ptr_eq(p0, p1,
-		    "Expected tcache to allocate cached region");
+		expect_ptr_eq(
+		    p0, p1, "Expected tcache to allocate cached region");
 	}
 
 	/* Clean up. */
@@ -390,12 +409,12 @@ TEST_BEGIN(test_tcache_none) {
 TEST_END
 
 TEST_BEGIN(test_tcache) {
-#define NTCACHES	10
+#define NTCACHES 10
 	unsigned tis[NTCACHES];
-	void *ps[NTCACHES];
-	void *qs[NTCACHES];
+	void    *ps[NTCACHES];
+	void    *qs[NTCACHES];
 	unsigned i;
-	size_t sz, psz, qsz;
+	size_t   sz, psz, qsz;
 
 	psz = 42;
 	qsz = nallocx(psz, 0) + 1;
@@ -403,39 +422,41 @@ TEST_BEGIN(test_tcache) {
 	/* Create tcaches. */
 	for (i = 0; i < NTCACHES; i++) {
 		sz = sizeof(unsigned);
-		expect_d_eq(mallctl("tcache.create", (void *)&tis[i], &sz, NULL,
-		    0), 0, "Unexpected mallctl() failure, i=%u", i);
+		expect_d_eq(
+		    mallctl("tcache.create", (void *)&tis[i], &sz, NULL, 0), 0,
+		    "Unexpected mallctl() failure, i=%u", i);
 	}
 
 	/* Exercise tcache ID recycling. */
 	for (i = 0; i < NTCACHES; i++) {
 		expect_d_eq(mallctl("tcache.destroy", NULL, NULL,
-		    (void *)&tis[i], sizeof(unsigned)), 0,
-		    "Unexpected mallctl() failure, i=%u", i);
+		                (void *)&tis[i], sizeof(unsigned)),
+		    0, "Unexpected mallctl() failure, i=%u", i);
 	}
 	for (i = 0; i < NTCACHES; i++) {
 		sz = sizeof(unsigned);
-		expect_d_eq(mallctl("tcache.create", (void *)&tis[i], &sz, NULL,
-		    0), 0, "Unexpected mallctl() failure, i=%u", i);
+		expect_d_eq(
+		    mallctl("tcache.create", (void *)&tis[i], &sz, NULL, 0), 0,
+		    "Unexpected mallctl() failure, i=%u", i);
 	}
 
 	/* Flush empty tcaches. */
 	for (i = 0; i < NTCACHES; i++) {
 		expect_d_eq(mallctl("tcache.flush", NULL, NULL, (void *)&tis[i],
-		    sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u",
-		    i);
+		                sizeof(unsigned)),
+		    0, "Unexpected mallctl() failure, i=%u", i);
 	}
 
 	/* Cache some allocations. */
 	for (i = 0; i < NTCACHES; i++) {
 		ps[i] = mallocx(psz, MALLOCX_TCACHE(tis[i]));
-		expect_ptr_not_null(ps[i], "Unexpected mallocx() failure, i=%u",
-		    i);
+		expect_ptr_not_null(
+		    ps[i], "Unexpected mallocx() failure, i=%u", i);
 		dallocx(ps[i], MALLOCX_TCACHE(tis[i]));
 
 		qs[i] = mallocx(qsz, MALLOCX_TCACHE(tis[i]));
-		expect_ptr_not_null(qs[i], "Unexpected mallocx() failure, i=%u",
-		    i);
+		expect_ptr_not_null(
+		    qs[i], "Unexpected mallocx() failure, i=%u", i);
 		dallocx(qs[i], MALLOCX_TCACHE(tis[i]));
 	}
 
@@ -443,11 +464,13 @@ TEST_BEGIN(test_tcache) {
 	for (i = 0; i < NTCACHES; i++) {
 		void *p0 = ps[i];
 		ps[i] = mallocx(psz, MALLOCX_TCACHE(tis[i]));
-		expect_ptr_not_null(ps[i], "Unexpected mallocx() failure, i=%u",
-		    i);
+		expect_ptr_not_null(
+		    ps[i], "Unexpected mallocx() failure, i=%u", i);
 		if (!san_uaf_detection_enabled()) {
-			expect_ptr_eq(ps[i], p0, "Expected mallocx() to "
-			    "allocate cached region, i=%u", i);
+			expect_ptr_eq(ps[i], p0,
+			    "Expected mallocx() to "
+			    "allocate cached region, i=%u",
+			    i);
 		}
 	}
 
@@ -455,11 +478,13 @@ TEST_BEGIN(test_tcache) {
 	for (i = 0; i < NTCACHES; i++) {
 		void *q0 = qs[i];
 		qs[i] = rallocx(ps[i], qsz, MALLOCX_TCACHE(tis[i]));
-		expect_ptr_not_null(qs[i], "Unexpected rallocx() failure, i=%u",
-		    i);
+		expect_ptr_not_null(
+		    qs[i], "Unexpected rallocx() failure, i=%u", i);
 		if (!san_uaf_detection_enabled()) {
-			expect_ptr_eq(qs[i], q0, "Expected rallocx() to "
-			    "allocate cached region, i=%u", i);
+			expect_ptr_eq(qs[i], q0,
+			    "Expected rallocx() to "
+			    "allocate cached region, i=%u",
+			    i);
 		}
 		/* Avoid undefined behavior in case of test failure. */
 		if (qs[i] == NULL) {
@@ -471,17 +496,17 @@ TEST_BEGIN(test_tcache) {
 	}
 
 	/* Flush some non-empty tcaches. */
-	for (i = 0; i < NTCACHES/2; i++) {
+	for (i = 0; i < NTCACHES / 2; i++) {
 		expect_d_eq(mallctl("tcache.flush", NULL, NULL, (void *)&tis[i],
-		    sizeof(unsigned)), 0, "Unexpected mallctl() failure, i=%u",
-		    i);
+		                sizeof(unsigned)),
+		    0, "Unexpected mallctl() failure, i=%u", i);
 	}
 
 	/* Destroy tcaches. */
 	for (i = 0; i < NTCACHES; i++) {
 		expect_d_eq(mallctl("tcache.destroy", NULL, NULL,
-		    (void *)&tis[i], sizeof(unsigned)), 0,
-		    "Unexpected mallctl() failure, i=%u", i);
+		                (void *)&tis[i], sizeof(unsigned)),
+		    0, "Unexpected mallctl() failure, i=%u", i);
 	}
 }
 TEST_END
@@ -490,7 +515,7 @@ TEST_BEGIN(test_thread_arena) {
 	unsigned old_arena_ind, new_arena_ind, narenas;
 
 	const char *opa;
-	size_t sz = sizeof(opa);
+	size_t      sz = sizeof(opa);
 	expect_d_eq(mallctl("opt.percpu_arena", (void *)&opa, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
 
@@ -505,20 +530,23 @@ TEST_BEGIN(test_thread_arena) {
 	if (strcmp(opa, "disabled") == 0) {
 		new_arena_ind = narenas - 1;
 		expect_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
-		    (void *)&new_arena_ind, sizeof(unsigned)), 0,
-		    "Unexpected mallctl() failure");
+		                (void *)&new_arena_ind, sizeof(unsigned)),
+		    0, "Unexpected mallctl() failure");
 		new_arena_ind = 0;
 		expect_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
-		    (void *)&new_arena_ind, sizeof(unsigned)), 0,
-		    "Unexpected mallctl() failure");
+		                (void *)&new_arena_ind, sizeof(unsigned)),
+		    0, "Unexpected mallctl() failure");
 	} else {
 		expect_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
-		    NULL, 0), 0, "Unexpected mallctl() failure");
+		                NULL, 0),
+		    0, "Unexpected mallctl() failure");
 		new_arena_ind = percpu_arena_ind_limit(opt_percpu_arena) - 1;
 		if (old_arena_ind != new_arena_ind) {
-			expect_d_eq(mallctl("thread.arena",
-			    (void *)&old_arena_ind, &sz, (void *)&new_arena_ind,
-			    sizeof(unsigned)), EPERM, "thread.arena ctl "
+			expect_d_eq(
+			    mallctl("thread.arena", (void *)&old_arena_ind, &sz,
+			        (void *)&new_arena_ind, sizeof(unsigned)),
+			    EPERM,
+			    "thread.arena ctl "
 			    "should not be allowed with percpu arena");
 		}
 	}
@@ -527,10 +555,10 @@ TEST_END
 
 TEST_BEGIN(test_arena_i_initialized) {
 	unsigned narenas, i;
-	size_t sz;
-	size_t mib[3];
-	size_t miblen = sizeof(mib) / sizeof(size_t);
-	bool initialized;
+	size_t   sz;
+	size_t   mib[3];
+	size_t   miblen = sizeof(mib) / sizeof(size_t);
+	bool     initialized;
 
 	sz = sizeof(narenas);
 	expect_d_eq(mallctl("arenas.narenas", (void *)&narenas, &sz, NULL, 0),
@@ -541,8 +569,9 @@ TEST_BEGIN(test_arena_i_initialized) {
 	for (i = 0; i < narenas; i++) {
 		mib[1] = i;
 		sz = sizeof(initialized);
-		expect_d_eq(mallctlbymib(mib, miblen, &initialized, &sz, NULL,
-		    0), 0, "Unexpected mallctl() failure");
+		expect_d_eq(
+		    mallctlbymib(mib, miblen, &initialized, &sz, NULL, 0), 0,
+		    "Unexpected mallctl() failure");
 	}
 
 	mib[1] = MALLCTL_ARENAS_ALL;
@@ -554,10 +583,10 @@ TEST_BEGIN(test_arena_i_initialized) {
 
 	/* Equivalent to the above but using mallctl() directly. */
 	sz = sizeof(initialized);
-	expect_d_eq(mallctl(
-	    "arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".initialized",
-	    (void *)&initialized, &sz, NULL, 0), 0,
-	    "Unexpected mallctl() failure");
+	expect_d_eq(
+	    mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".initialized",
+	        (void *)&initialized, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
 	expect_true(initialized,
 	    "Merged arena statistics should always be initialized");
 }
@@ -565,30 +594,31 @@ TEST_END
 
 TEST_BEGIN(test_arena_i_dirty_decay_ms) {
 	ssize_t dirty_decay_ms, orig_dirty_decay_ms, prev_dirty_decay_ms;
-	size_t sz = sizeof(ssize_t);
+	size_t  sz = sizeof(ssize_t);
 
 	expect_d_eq(mallctl("arena.0.dirty_decay_ms",
-	    (void *)&orig_dirty_decay_ms, &sz, NULL, 0), 0,
-	    "Unexpected mallctl() failure");
+	                (void *)&orig_dirty_decay_ms, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
 
 	dirty_decay_ms = -2;
 	expect_d_eq(mallctl("arena.0.dirty_decay_ms", NULL, NULL,
-	    (void *)&dirty_decay_ms, sizeof(ssize_t)), EFAULT,
-	    "Unexpected mallctl() success");
+	                (void *)&dirty_decay_ms, sizeof(ssize_t)),
+	    EFAULT, "Unexpected mallctl() success");
 
 	dirty_decay_ms = 0x7fffffff;
 	expect_d_eq(mallctl("arena.0.dirty_decay_ms", NULL, NULL,
-	    (void *)&dirty_decay_ms, sizeof(ssize_t)), 0,
-	    "Unexpected mallctl() failure");
+	                (void *)&dirty_decay_ms, sizeof(ssize_t)),
+	    0, "Unexpected mallctl() failure");
 
 	for (prev_dirty_decay_ms = dirty_decay_ms, dirty_decay_ms = -1;
-	    dirty_decay_ms < 20; prev_dirty_decay_ms = dirty_decay_ms,
-	    dirty_decay_ms++) {
+	     dirty_decay_ms < 20;
+	     prev_dirty_decay_ms = dirty_decay_ms, dirty_decay_ms++) {
 		ssize_t old_dirty_decay_ms;
 
 		expect_d_eq(mallctl("arena.0.dirty_decay_ms",
-		    (void *)&old_dirty_decay_ms, &sz, (void *)&dirty_decay_ms,
-		    sizeof(ssize_t)), 0, "Unexpected mallctl() failure");
+		                (void *)&old_dirty_decay_ms, &sz,
+		                (void *)&dirty_decay_ms, sizeof(ssize_t)),
+		    0, "Unexpected mallctl() failure");
 		expect_zd_eq(old_dirty_decay_ms, prev_dirty_decay_ms,
 		    "Unexpected old arena.0.dirty_decay_ms");
 	}
@@ -597,30 +627,31 @@ TEST_END
 
 TEST_BEGIN(test_arena_i_muzzy_decay_ms) {
 	ssize_t muzzy_decay_ms, orig_muzzy_decay_ms, prev_muzzy_decay_ms;
-	size_t sz = sizeof(ssize_t);
+	size_t  sz = sizeof(ssize_t);
 
 	expect_d_eq(mallctl("arena.0.muzzy_decay_ms",
-	    (void *)&orig_muzzy_decay_ms, &sz, NULL, 0), 0,
-	    "Unexpected mallctl() failure");
+	                (void *)&orig_muzzy_decay_ms, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
 
 	muzzy_decay_ms = -2;
 	expect_d_eq(mallctl("arena.0.muzzy_decay_ms", NULL, NULL,
-	    (void *)&muzzy_decay_ms, sizeof(ssize_t)), EFAULT,
-	    "Unexpected mallctl() success");
+	                (void *)&muzzy_decay_ms, sizeof(ssize_t)),
+	    EFAULT, "Unexpected mallctl() success");
 
 	muzzy_decay_ms = 0x7fffffff;
 	expect_d_eq(mallctl("arena.0.muzzy_decay_ms", NULL, NULL,
-	    (void *)&muzzy_decay_ms, sizeof(ssize_t)), 0,
-	    "Unexpected mallctl() failure");
+	                (void *)&muzzy_decay_ms, sizeof(ssize_t)),
+	    0, "Unexpected mallctl() failure");
 
 	for (prev_muzzy_decay_ms = muzzy_decay_ms, muzzy_decay_ms = -1;
-	    muzzy_decay_ms < 20; prev_muzzy_decay_ms = muzzy_decay_ms,
-	    muzzy_decay_ms++) {
+	     muzzy_decay_ms < 20;
+	     prev_muzzy_decay_ms = muzzy_decay_ms, muzzy_decay_ms++) {
 		ssize_t old_muzzy_decay_ms;
 
 		expect_d_eq(mallctl("arena.0.muzzy_decay_ms",
-		    (void *)&old_muzzy_decay_ms, &sz, (void *)&muzzy_decay_ms,
-		    sizeof(ssize_t)), 0, "Unexpected mallctl() failure");
+		                (void *)&old_muzzy_decay_ms, &sz,
+		                (void *)&muzzy_decay_ms, sizeof(ssize_t)),
+		    0, "Unexpected mallctl() failure");
 		expect_zd_eq(old_muzzy_decay_ms, prev_muzzy_decay_ms,
 		    "Unexpected old arena.0.muzzy_decay_ms");
 	}
@@ -629,9 +660,9 @@ TEST_END
 
 TEST_BEGIN(test_arena_i_purge) {
 	unsigned narenas;
-	size_t sz = sizeof(unsigned);
-	size_t mib[3];
-	size_t miblen = 3;
+	size_t   sz = sizeof(unsigned);
+	size_t   mib[3];
+	size_t   miblen = 3;
 
 	expect_d_eq(mallctl("arena.0.purge", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
@@ -652,9 +683,9 @@ TEST_END
 
 TEST_BEGIN(test_arena_i_decay) {
 	unsigned narenas;
-	size_t sz = sizeof(unsigned);
-	size_t mib[3];
-	size_t miblen = 3;
+	size_t   sz = sizeof(unsigned);
+	size_t   mib[3];
+	size_t   miblen = 3;
 
 	expect_d_eq(mallctl("arena.0.decay", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
@@ -675,86 +706,89 @@ TEST_END
 
 TEST_BEGIN(test_arena_i_dss) {
 	const char *dss_prec_old, *dss_prec_new;
-	size_t sz = sizeof(dss_prec_old);
-	size_t mib[3];
-	size_t miblen;
+	size_t      sz = sizeof(dss_prec_old);
+	size_t      mib[3];
+	size_t      miblen;
 
-	miblen = sizeof(mib)/sizeof(size_t);
+	miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("arena.0.dss", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() error");
 
 	dss_prec_new = "disabled";
 	expect_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz,
-	    (void *)&dss_prec_new, sizeof(dss_prec_new)), 0,
-	    "Unexpected mallctl() failure");
-	expect_str_ne(dss_prec_old, "primary",
-	    "Unexpected default for dss precedence");
+	                (void *)&dss_prec_new, sizeof(dss_prec_new)),
+	    0, "Unexpected mallctl() failure");
+	expect_str_ne(
+	    dss_prec_old, "primary", "Unexpected default for dss precedence");
 
 	expect_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_new, &sz,
-	    (void *)&dss_prec_old, sizeof(dss_prec_old)), 0,
-	    "Unexpected mallctl() failure");
+	                (void *)&dss_prec_old, sizeof(dss_prec_old)),
+	    0, "Unexpected mallctl() failure");
 
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz, NULL,
-	    0), 0, "Unexpected mallctl() failure");
-	expect_str_ne(dss_prec_old, "primary",
-	    "Unexpected value for dss precedence");
+	expect_d_eq(
+	    mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+	expect_str_ne(
+	    dss_prec_old, "primary", "Unexpected value for dss precedence");
 
 	mib[1] = narenas_total_get();
 	dss_prec_new = "disabled";
 	expect_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz,
-	    (void *)&dss_prec_new, sizeof(dss_prec_new)), 0,
-	    "Unexpected mallctl() failure");
-	expect_str_ne(dss_prec_old, "primary",
-	    "Unexpected default for dss precedence");
+	                (void *)&dss_prec_new, sizeof(dss_prec_new)),
+	    0, "Unexpected mallctl() failure");
+	expect_str_ne(
+	    dss_prec_old, "primary", "Unexpected default for dss precedence");
 
 	expect_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_new, &sz,
-	    (void *)&dss_prec_old, sizeof(dss_prec_new)), 0,
-	    "Unexpected mallctl() failure");
+	                (void *)&dss_prec_old, sizeof(dss_prec_new)),
+	    0, "Unexpected mallctl() failure");
 
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz, NULL,
-	    0), 0, "Unexpected mallctl() failure");
-	expect_str_ne(dss_prec_old, "primary",
-	    "Unexpected value for dss precedence");
+	expect_d_eq(
+	    mallctlbymib(mib, miblen, (void *)&dss_prec_old, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+	expect_str_ne(
+	    dss_prec_old, "primary", "Unexpected value for dss precedence");
 }
 TEST_END
 
 TEST_BEGIN(test_arena_i_name) {
-	unsigned arena_ind;
-	size_t ind_sz = sizeof(arena_ind);
-	size_t mib[3];
-	size_t miblen;
-	char name_old[ARENA_NAME_LEN];
-	char *name_oldp = name_old;
-	size_t sz = sizeof(name_oldp);
-	char default_name[ARENA_NAME_LEN];
+	unsigned    arena_ind;
+	size_t      ind_sz = sizeof(arena_ind);
+	size_t      mib[3];
+	size_t      miblen;
+	char        name_old[ARENA_NAME_LEN];
+	char       *name_oldp = name_old;
+	size_t      sz = sizeof(name_oldp);
+	char        default_name[ARENA_NAME_LEN];
 	const char *name_new = "test name";
 	const char *super_long_name = "A name longer than ARENA_NAME_LEN";
-	size_t super_long_name_len = strlen(super_long_name);
+	size_t      super_long_name_len = strlen(super_long_name);
 	assert(super_long_name_len > ARENA_NAME_LEN);
 
-	miblen = sizeof(mib)/sizeof(size_t);
+	miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("arena.0.name", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() error");
 
-	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &ind_sz, NULL,
-	    0), 0, "Unexpected mallctl() failure");
+	expect_d_eq(
+	    mallctl("arenas.create", (void *)&arena_ind, &ind_sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
 	mib[1] = arena_ind;
 
-	malloc_snprintf(default_name, sizeof(default_name), "manual_%u",
-	    arena_ind);
+	malloc_snprintf(
+	    default_name, sizeof(default_name), "manual_%u", arena_ind);
 	expect_d_eq(mallctlbymib(mib, miblen, (void *)&name_oldp, &sz,
-	    (void *)&name_new, sizeof(name_new)), 0,
-	    "Unexpected mallctl() failure");
-	expect_str_eq(name_old, default_name,
-	    "Unexpected default value for arena name");
+	                (void *)&name_new, sizeof(name_new)),
+	    0, "Unexpected mallctl() failure");
+	expect_str_eq(
+	    name_old, default_name, "Unexpected default value for arena name");
 
 	expect_d_eq(mallctlbymib(mib, miblen, (void *)&name_oldp, &sz,
-	    (void *)&super_long_name, sizeof(super_long_name)), 0,
-	    "Unexpected mallctl() failure");
+	                (void *)&super_long_name, sizeof(super_long_name)),
+	    0, "Unexpected mallctl() failure");
 	expect_str_eq(name_old, name_new, "Unexpected value for arena name");
 
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&name_oldp, &sz,
-	    NULL, 0), 0, "Unexpected mallctl() failure");
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&name_oldp, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
 	int cmp = strncmp(name_old, super_long_name, ARENA_NAME_LEN - 1);
 	expect_true(cmp == 0, "Unexpected value for long arena name ");
 }
@@ -765,14 +799,14 @@ TEST_BEGIN(test_arena_i_retain_grow_limit) {
 	size_t mib[3];
 	size_t miblen;
 
-	bool retain_enabled;
+	bool   retain_enabled;
 	size_t sz = sizeof(retain_enabled);
-	expect_d_eq(mallctl("opt.retain", &retain_enabled, &sz, NULL, 0),
-	    0, "Unexpected mallctl() failure");
+	expect_d_eq(mallctl("opt.retain", &retain_enabled, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
 	test_skip_if(!retain_enabled);
 
 	sz = sizeof(default_limit);
-	miblen = sizeof(mib)/sizeof(size_t);
+	miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("arena.0.retain_grow_limit", mib, &miblen),
 	    0, "Unexpected mallctlnametomib() error");
 
@@ -782,58 +816,62 @@ TEST_BEGIN(test_arena_i_retain_grow_limit) {
 	    "Unexpected default for retain_grow_limit");
 
 	new_limit = PAGE - 1;
-	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &new_limit,
-	    sizeof(new_limit)), EFAULT, "Unexpected mallctl() success");
+	expect_d_eq(mallctlbymib(
+	                mib, miblen, NULL, NULL, &new_limit, sizeof(new_limit)),
+	    EFAULT, "Unexpected mallctl() success");
 
 	new_limit = PAGE + 1;
-	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &new_limit,
-	    sizeof(new_limit)), 0, "Unexpected mallctl() failure");
+	expect_d_eq(mallctlbymib(
+	                mib, miblen, NULL, NULL, &new_limit, sizeof(new_limit)),
+	    0, "Unexpected mallctl() failure");
 	expect_d_eq(mallctlbymib(mib, miblen, &old_limit, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
-	expect_zu_eq(old_limit, PAGE,
-	    "Unexpected value for retain_grow_limit");
+	expect_zu_eq(old_limit, PAGE, "Unexpected value for retain_grow_limit");
 
 	/* Expect grow less than psize class 10. */
 	new_limit = sz_pind2sz(10) - 1;
-	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &new_limit,
-	    sizeof(new_limit)), 0, "Unexpected mallctl() failure");
+	expect_d_eq(mallctlbymib(
+	                mib, miblen, NULL, NULL, &new_limit, sizeof(new_limit)),
+	    0, "Unexpected mallctl() failure");
 	expect_d_eq(mallctlbymib(mib, miblen, &old_limit, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
-	expect_zu_eq(old_limit, sz_pind2sz(9),
-	    "Unexpected value for retain_grow_limit");
+	expect_zu_eq(
+	    old_limit, sz_pind2sz(9), "Unexpected value for retain_grow_limit");
 
 	/* Restore to default. */
 	expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, &default_limit,
-	    sizeof(default_limit)), 0, "Unexpected mallctl() failure");
+	                sizeof(default_limit)),
+	    0, "Unexpected mallctl() failure");
 }
 TEST_END
 
 TEST_BEGIN(test_arenas_dirty_decay_ms) {
 	ssize_t dirty_decay_ms, orig_dirty_decay_ms, prev_dirty_decay_ms;
-	size_t sz = sizeof(ssize_t);
+	size_t  sz = sizeof(ssize_t);
 
 	expect_d_eq(mallctl("arenas.dirty_decay_ms",
-	    (void *)&orig_dirty_decay_ms, &sz, NULL, 0), 0,
-	    "Unexpected mallctl() failure");
+	                (void *)&orig_dirty_decay_ms, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
 
 	dirty_decay_ms = -2;
 	expect_d_eq(mallctl("arenas.dirty_decay_ms", NULL, NULL,
-	    (void *)&dirty_decay_ms, sizeof(ssize_t)), EFAULT,
-	    "Unexpected mallctl() success");
+	                (void *)&dirty_decay_ms, sizeof(ssize_t)),
+	    EFAULT, "Unexpected mallctl() success");
 
 	dirty_decay_ms = 0x7fffffff;
 	expect_d_eq(mallctl("arenas.dirty_decay_ms", NULL, NULL,
-	    (void *)&dirty_decay_ms, sizeof(ssize_t)), 0,
-	    "Expected mallctl() failure");
+	                (void *)&dirty_decay_ms, sizeof(ssize_t)),
+	    0, "Expected mallctl() failure");
 
 	for (prev_dirty_decay_ms = dirty_decay_ms, dirty_decay_ms = -1;
-	    dirty_decay_ms < 20; prev_dirty_decay_ms = dirty_decay_ms,
-	    dirty_decay_ms++) {
+	     dirty_decay_ms < 20;
+	     prev_dirty_decay_ms = dirty_decay_ms, dirty_decay_ms++) {
 		ssize_t old_dirty_decay_ms;
 
 		expect_d_eq(mallctl("arenas.dirty_decay_ms",
-		    (void *)&old_dirty_decay_ms, &sz, (void *)&dirty_decay_ms,
-		    sizeof(ssize_t)), 0, "Unexpected mallctl() failure");
+		                (void *)&old_dirty_decay_ms, &sz,
+		                (void *)&dirty_decay_ms, sizeof(ssize_t)),
+		    0, "Unexpected mallctl() failure");
 		expect_zd_eq(old_dirty_decay_ms, prev_dirty_decay_ms,
 		    "Unexpected old arenas.dirty_decay_ms");
 	}
@@ -842,30 +880,31 @@ TEST_END
 
 TEST_BEGIN(test_arenas_muzzy_decay_ms) {
 	ssize_t muzzy_decay_ms, orig_muzzy_decay_ms, prev_muzzy_decay_ms;
-	size_t sz = sizeof(ssize_t);
+	size_t  sz = sizeof(ssize_t);
 
 	expect_d_eq(mallctl("arenas.muzzy_decay_ms",
-	    (void *)&orig_muzzy_decay_ms, &sz, NULL, 0), 0,
-	    "Unexpected mallctl() failure");
+	                (void *)&orig_muzzy_decay_ms, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
 
 	muzzy_decay_ms = -2;
 	expect_d_eq(mallctl("arenas.muzzy_decay_ms", NULL, NULL,
-	    (void *)&muzzy_decay_ms, sizeof(ssize_t)), EFAULT,
-	    "Unexpected mallctl() success");
+	                (void *)&muzzy_decay_ms, sizeof(ssize_t)),
+	    EFAULT, "Unexpected mallctl() success");
 
 	muzzy_decay_ms = 0x7fffffff;
 	expect_d_eq(mallctl("arenas.muzzy_decay_ms", NULL, NULL,
-	    (void *)&muzzy_decay_ms, sizeof(ssize_t)), 0,
-	    "Expected mallctl() failure");
+	                (void *)&muzzy_decay_ms, sizeof(ssize_t)),
+	    0, "Expected mallctl() failure");
 
 	for (prev_muzzy_decay_ms = muzzy_decay_ms, muzzy_decay_ms = -1;
-	    muzzy_decay_ms < 20; prev_muzzy_decay_ms = muzzy_decay_ms,
-	    muzzy_decay_ms++) {
+	     muzzy_decay_ms < 20;
+	     prev_muzzy_decay_ms = muzzy_decay_ms, muzzy_decay_ms++) {
 		ssize_t old_muzzy_decay_ms;
 
 		expect_d_eq(mallctl("arenas.muzzy_decay_ms",
-		    (void *)&old_muzzy_decay_ms, &sz, (void *)&muzzy_decay_ms,
-		    sizeof(ssize_t)), 0, "Unexpected mallctl() failure");
+		                (void *)&old_muzzy_decay_ms, &sz,
+		                (void *)&muzzy_decay_ms, sizeof(ssize_t)),
+		    0, "Unexpected mallctl() failure");
 		expect_zd_eq(old_muzzy_decay_ms, prev_muzzy_decay_ms,
 		    "Unexpected old arenas.muzzy_decay_ms");
 	}
@@ -873,13 +912,15 @@ TEST_BEGIN(test_arenas_muzzy_decay_ms) {
 TEST_END
 
 TEST_BEGIN(test_arenas_constants) {
-#define TEST_ARENAS_CONSTANT(t, name, expected) do {			\
-	t name;								\
-	size_t sz = sizeof(t);						\
-	expect_d_eq(mallctl("arenas."#name, (void *)&name, &sz, NULL,	\
-	    0), 0, "Unexpected mallctl() failure");			\
-	expect_zu_eq(name, expected, "Incorrect "#name" size");		\
-} while (0)
+#define TEST_ARENAS_CONSTANT(t, name, expected)                                \
+	do {                                                                   \
+		t      name;                                                   \
+		size_t sz = sizeof(t);                                         \
+		expect_d_eq(                                                   \
+		    mallctl("arenas." #name, (void *)&name, &sz, NULL, 0), 0,  \
+		    "Unexpected mallctl() failure");                           \
+		expect_zu_eq(name, expected, "Incorrect " #name " size");      \
+	} while (0)
 
 	TEST_ARENAS_CONSTANT(size_t, quantum, QUANTUM);
 	TEST_ARENAS_CONSTANT(size_t, page, PAGE);
@@ -892,18 +933,19 @@ TEST_BEGIN(test_arenas_constants) {
 TEST_END
 
 TEST_BEGIN(test_arenas_bin_constants) {
-#define TEST_ARENAS_BIN_CONSTANT(t, name, expected) do {		\
-	t name;								\
-	size_t sz = sizeof(t);						\
-	expect_d_eq(mallctl("arenas.bin.0."#name, (void *)&name, &sz,	\
-	    NULL, 0), 0, "Unexpected mallctl() failure");		\
-	expect_zu_eq(name, expected, "Incorrect "#name" size");		\
-} while (0)
+#define TEST_ARENAS_BIN_CONSTANT(t, name, expected)                            \
+	do {                                                                   \
+		t      name;                                                   \
+		size_t sz = sizeof(t);                                         \
+		expect_d_eq(mallctl("arenas.bin.0." #name, (void *)&name, &sz, \
+		                NULL, 0),                                      \
+		    0, "Unexpected mallctl() failure");                        \
+		expect_zu_eq(name, expected, "Incorrect " #name " size");      \
+	} while (0)
 
 	TEST_ARENAS_BIN_CONSTANT(size_t, size, bin_infos[0].reg_size);
 	TEST_ARENAS_BIN_CONSTANT(uint32_t, nregs, bin_infos[0].nregs);
-	TEST_ARENAS_BIN_CONSTANT(size_t, slab_size,
-	    bin_infos[0].slab_size);
+	TEST_ARENAS_BIN_CONSTANT(size_t, slab_size, bin_infos[0].slab_size);
 	TEST_ARENAS_BIN_CONSTANT(uint32_t, nshards, bin_infos[0].n_shards);
 
 #undef TEST_ARENAS_BIN_CONSTANT
@@ -911,16 +953,17 @@ TEST_BEGIN(test_arenas_bin_constants) {
 TEST_END
 
 TEST_BEGIN(test_arenas_lextent_constants) {
-#define TEST_ARENAS_LEXTENT_CONSTANT(t, name, expected) do {		\
-	t name;								\
-	size_t sz = sizeof(t);						\
-	expect_d_eq(mallctl("arenas.lextent.0."#name, (void *)&name,	\
-	    &sz, NULL, 0), 0, "Unexpected mallctl() failure");		\
-	expect_zu_eq(name, expected, "Incorrect "#name" size");		\
-} while (0)
+#define TEST_ARENAS_LEXTENT_CONSTANT(t, name, expected)                        \
+	do {                                                                   \
+		t      name;                                                   \
+		size_t sz = sizeof(t);                                         \
+		expect_d_eq(mallctl("arenas.lextent.0." #name, (void *)&name,  \
+		                &sz, NULL, 0),                                 \
+		    0, "Unexpected mallctl() failure");                        \
+		expect_zu_eq(name, expected, "Incorrect " #name " size");      \
+	} while (0)
 
-	TEST_ARENAS_LEXTENT_CONSTANT(size_t, size,
-	    SC_LARGE_MINCLASS);
+	TEST_ARENAS_LEXTENT_CONSTANT(size_t, size, SC_LARGE_MINCLASS);
 
 #undef TEST_ARENAS_LEXTENT_CONSTANT
 }
@@ -928,25 +971,27 @@ TEST_END
 
 TEST_BEGIN(test_arenas_create) {
 	unsigned narenas_before, arena, narenas_after;
-	size_t sz = sizeof(unsigned);
+	size_t   sz = sizeof(unsigned);
 
-	expect_d_eq(mallctl("arenas.narenas", (void *)&narenas_before, &sz,
-	    NULL, 0), 0, "Unexpected mallctl() failure");
+	expect_d_eq(
+	    mallctl("arenas.narenas", (void *)&narenas_before, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
 	expect_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
-	expect_d_eq(mallctl("arenas.narenas", (void *)&narenas_after, &sz, NULL,
-	    0), 0, "Unexpected mallctl() failure");
+	expect_d_eq(
+	    mallctl("arenas.narenas", (void *)&narenas_after, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
 
-	expect_u_eq(narenas_before+1, narenas_after,
+	expect_u_eq(narenas_before + 1, narenas_after,
 	    "Unexpected number of arenas before versus after extension");
-	expect_u_eq(arena, narenas_after-1, "Unexpected arena index");
+	expect_u_eq(arena, narenas_after - 1, "Unexpected arena index");
 }
 TEST_END
 
 TEST_BEGIN(test_arenas_lookup) {
 	unsigned arena, arena1;
-	void *ptr;
-	size_t sz = sizeof(unsigned);
+	void    *ptr;
+	size_t   sz = sizeof(unsigned);
 
 	expect_d_eq(mallctl("arenas.create", (void *)&arena, &sz, NULL, 0), 0,
 	    "Unexpected mallctl() failure");
@@ -967,7 +1012,7 @@ TEST_BEGIN(test_prof_active) {
 	test_skip_if(!config_prof);
 	test_skip_if(opt_prof);
 
-	bool active, old;
+	bool   active, old;
 	size_t len = sizeof(bool);
 
 	active = true;
@@ -987,12 +1032,14 @@ TEST_BEGIN(test_prof_active) {
 TEST_END
 
 TEST_BEGIN(test_stats_arenas) {
-#define TEST_STATS_ARENAS(t, name) do {					\
-	t name;								\
-	size_t sz = sizeof(t);						\
-	expect_d_eq(mallctl("stats.arenas.0."#name, (void *)&name, &sz,	\
-	    NULL, 0), 0, "Unexpected mallctl() failure");		\
-} while (0)
+#define TEST_STATS_ARENAS(t, name)                                             \
+	do {                                                                   \
+		t      name;                                                   \
+		size_t sz = sizeof(t);                                         \
+		expect_d_eq(mallctl("stats.arenas.0." #name, (void *)&name,    \
+		                &sz, NULL, 0),                                 \
+		    0, "Unexpected mallctl() failure");                        \
+	} while (0)
 
 	TEST_STATS_ARENAS(unsigned, nthreads);
 	TEST_STATS_ARENAS(const char *, dss);
@@ -1008,13 +1055,14 @@ TEST_END
 TEST_BEGIN(test_stats_arenas_hpa_shard_counters) {
 	test_skip_if(!config_stats);
 
-#define TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(t, name) do {		\
-	t name;								\
-	size_t sz = sizeof(t);						\
-	expect_d_eq(mallctl("stats.arenas.0.hpa_shard."#name,		\
-	    (void *)&name, &sz,						\
-	    NULL, 0), 0, "Unexpected mallctl() failure");		\
-} while (0)
+#define TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(t, name)                          \
+	do {                                                                   \
+		t      name;                                                   \
+		size_t sz = sizeof(t);                                         \
+		expect_d_eq(mallctl("stats.arenas.0.hpa_shard." #name,         \
+		                (void *)&name, &sz, NULL, 0),                  \
+		    0, "Unexpected mallctl() failure");                        \
+	} while (0)
 
 	TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(size_t, npageslabs);
 	TEST_STATS_ARENAS_HPA_SHARD_COUNTERS(size_t, nactive);
@@ -1031,19 +1079,22 @@ TEST_END
 TEST_BEGIN(test_stats_arenas_hpa_shard_slabs) {
 	test_skip_if(!config_stats);
 
-#define TEST_STATS_ARENAS_HPA_SHARD_SLABS_GEN(t, slab, name) do {	\
-	t slab##_##name;						\
-	size_t sz = sizeof(t);						\
-	expect_d_eq(mallctl("stats.arenas.0.hpa_shard."#slab"."#name,	\
-	    (void *)&slab##_##name, &sz,				\
-	    NULL, 0), 0, "Unexpected mallctl() failure");		\
-} while (0)
+#define TEST_STATS_ARENAS_HPA_SHARD_SLABS_GEN(t, slab, name)                   \
+	do {                                                                   \
+		t      slab##_##name;                                          \
+		size_t sz = sizeof(t);                                         \
+		expect_d_eq(                                                   \
+		    mallctl("stats.arenas.0.hpa_shard." #slab "." #name,       \
+		        (void *)&slab##_##name, &sz, NULL, 0),                 \
+		    0, "Unexpected mallctl() failure");                        \
+	} while (0)
 
-#define TEST_STATS_ARENAS_HPA_SHARD_SLABS(t, slab, name) do {		\
-	TEST_STATS_ARENAS_HPA_SHARD_SLABS_GEN(t, slab,			\
-	    name##_##nonhuge);						\
-	TEST_STATS_ARENAS_HPA_SHARD_SLABS_GEN(t, slab, name##_##huge);	\
-} while (0)
+#define TEST_STATS_ARENAS_HPA_SHARD_SLABS(t, slab, name)                       \
+	do {                                                                   \
+		TEST_STATS_ARENAS_HPA_SHARD_SLABS_GEN(                         \
+		    t, slab, name##_##nonhuge);                                \
+		TEST_STATS_ARENAS_HPA_SHARD_SLABS_GEN(t, slab, name##_##huge); \
+	} while (0)
 
 	TEST_STATS_ARENAS_HPA_SHARD_SLABS(size_t, slabs, npageslabs);
 	TEST_STATS_ARENAS_HPA_SHARD_SLABS(size_t, slabs, nactive);
@@ -1069,18 +1120,18 @@ alloc_hook(void *extra, UNUSED hook_alloc_t type, UNUSED void *result,
 }
 
 static void
-dalloc_hook(void *extra, UNUSED hook_dalloc_t type,
-    UNUSED void *address, UNUSED uintptr_t args_raw[3]) {
+dalloc_hook(void *extra, UNUSED hook_dalloc_t type, UNUSED void *address,
+    UNUSED uintptr_t args_raw[3]) {
 	*(bool *)extra = true;
 }
 
 TEST_BEGIN(test_hooks) {
-	bool hook_called = false;
+	bool    hook_called = false;
 	hooks_t hooks = {&alloc_hook, &dalloc_hook, NULL, &hook_called};
-	void *handle = NULL;
-	size_t sz = sizeof(handle);
-	int err = mallctl("experimental.hooks.install", &handle, &sz, &hooks,
-	    sizeof(hooks));
+	void   *handle = NULL;
+	size_t  sz = sizeof(handle);
+	int     err = mallctl(
+            "experimental.hooks.install", &handle, &sz, &hooks, sizeof(hooks));
 	expect_d_eq(err, 0, "Hook installation failed");
 	expect_ptr_ne(handle, NULL, "Hook installation gave null handle");
 	void *ptr = mallocx(1, 0);
@@ -1089,8 +1140,8 @@ TEST_BEGIN(test_hooks) {
 	free(ptr);
 	expect_true(hook_called, "Free hook not called");
 
-	err = mallctl("experimental.hooks.remove", NULL, NULL, &handle,
-	    sizeof(handle));
+	err = mallctl(
+	    "experimental.hooks.remove", NULL, NULL, &handle, sizeof(handle));
 	expect_d_eq(err, 0, "Hook removal failed");
 	hook_called = false;
 	ptr = mallocx(1, 0);
@@ -1100,13 +1151,13 @@ TEST_BEGIN(test_hooks) {
 TEST_END
 
 TEST_BEGIN(test_hooks_exhaustion) {
-	bool hook_called = false;
+	bool    hook_called = false;
 	hooks_t hooks = {&alloc_hook, &dalloc_hook, NULL, &hook_called};
 
-	void *handle;
-	void *handles[HOOK_MAX];
+	void  *handle;
+	void  *handles[HOOK_MAX];
 	size_t sz = sizeof(handle);
-	int err;
+	int    err;
 	for (int i = 0; i < HOOK_MAX; i++) {
 		handle = NULL;
 		err = mallctl("experimental.hooks.install", &handle, &sz,
@@ -1115,8 +1166,8 @@ TEST_BEGIN(test_hooks_exhaustion) {
 		expect_ptr_ne(handle, NULL, "Got NULL handle");
 		handles[i] = handle;
 	}
-	err = mallctl("experimental.hooks.install", &handle, &sz, &hooks,
-	    sizeof(hooks));
+	err = mallctl(
+	    "experimental.hooks.install", &handle, &sz, &hooks, sizeof(hooks));
 	expect_d_eq(err, EAGAIN, "Should have failed hook installation");
 	for (int i = 0; i < HOOK_MAX; i++) {
 		err = mallctl("experimental.hooks.remove", NULL, NULL,
@@ -1125,12 +1176,12 @@ TEST_BEGIN(test_hooks_exhaustion) {
 	}
 	/* Insertion failed, but then we removed some; it should work now. */
 	handle = NULL;
-	err = mallctl("experimental.hooks.install", &handle, &sz, &hooks,
-	    sizeof(hooks));
+	err = mallctl(
+	    "experimental.hooks.install", &handle, &sz, &hooks, sizeof(hooks));
 	expect_d_eq(err, 0, "Hook insertion failed");
 	expect_ptr_ne(handle, NULL, "Got NULL handle");
-	err = mallctl("experimental.hooks.remove", NULL, NULL, &handle,
-	    sizeof(handle));
+	err = mallctl(
+	    "experimental.hooks.remove", NULL, NULL, &handle, sizeof(handle));
 	expect_d_eq(err, 0, "Hook removal failed");
 }
 TEST_END
@@ -1144,7 +1195,7 @@ TEST_BEGIN(test_thread_idle) {
 	 */
 	test_skip_if(!config_stats);
 
-	int err;
+	int    err;
 	size_t sz;
 	size_t miblen;
 
@@ -1164,14 +1215,15 @@ TEST_BEGIN(test_thread_idle) {
 	sz = sizeof(arena_ind);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
-        err = mallctl("thread.arena", NULL, NULL, &arena_ind, sizeof(arena_ind));
+	err = mallctl(
+	    "thread.arena", NULL, NULL, &arena_ind, sizeof(arena_ind));
 	expect_d_eq(err, 0, "Unexpected mallctl() failure");
 	err = mallctl("thread.tcache.flush", NULL, NULL, NULL, 0);
 	expect_d_eq(err, 0, "Unexpected mallctl() failure");
 
 	/* We're going to do an allocation of size 1, which we know is small. */
 	size_t mib[5];
-	miblen = sizeof(mib)/sizeof(mib[0]);
+	miblen = sizeof(mib) / sizeof(mib[0]);
 	err = mallctlnametomib("stats.arenas.0.small.ndalloc", mib, &miblen);
 	expect_d_eq(err, 0, "");
 	mib[2] = arena_ind;
@@ -1220,9 +1272,9 @@ TEST_BEGIN(test_thread_peak) {
 	size_t big_size = 10 * 1024 * 1024;
 	size_t small_size = 256;
 
-	void *ptr;
-	int err;
-	size_t sz;
+	void    *ptr;
+	int      err;
+	size_t   sz;
 	uint64_t peak;
 	sz = sizeof(uint64_t);
 
@@ -1293,9 +1345,9 @@ TEST_BEGIN(test_thread_activity_callback) {
 	test_skip_if(!config_stats);
 
 	const size_t big_size = 10 * 1024 * 1024;
-	void *ptr;
-	int err;
-	size_t sz;
+	void        *ptr;
+	int          err;
+	size_t       sz;
 
 	uint64_t *allocatedp;
 	uint64_t *deallocatedp;
@@ -1305,12 +1357,12 @@ TEST_BEGIN(test_thread_activity_callback) {
 	err = mallctl("thread.deallocatedp", &deallocatedp, &sz, NULL, 0);
 	assert_d_eq(0, err, "");
 
-	activity_callback_thunk_t old_thunk = {(activity_callback_t)111,
-		(void *)222};
+	activity_callback_thunk_t old_thunk = {
+	    (activity_callback_t)111, (void *)222};
 
-	activity_test_data_t test_data = {333, 444};
-	activity_callback_thunk_t new_thunk =
-	    {&activity_test_callback, &test_data};
+	activity_test_data_t      test_data = {333, 444};
+	activity_callback_thunk_t new_thunk = {
+	    &activity_test_callback, &test_data};
 
 	sz = sizeof(old_thunk);
 	err = mallctl("experimental.thread.activity_callback", &old_thunk, &sz,
@@ -1329,7 +1381,7 @@ TEST_BEGIN(test_thread_activity_callback) {
 	expect_u64_eq(test_data.obtained_dalloc, *deallocatedp, "");
 
 	sz = sizeof(old_thunk);
-	new_thunk = (activity_callback_thunk_t){ NULL, NULL };
+	new_thunk = (activity_callback_thunk_t){NULL, NULL};
 	err = mallctl("experimental.thread.activity_callback", &old_thunk, &sz,
 	    &new_thunk, sizeof(new_thunk));
 	assert_d_eq(0, err, "");
@@ -1347,8 +1399,6 @@ TEST_BEGIN(test_thread_activity_callback) {
 }
 TEST_END
 
-
-
 static unsigned nuser_thread_event_cb_calls;
 static void
 user_thread_event_cb(bool is_alloc, uint64_t tallocated, uint64_t tdallocated) {
@@ -1357,25 +1407,25 @@ user_thread_event_cb(bool is_alloc, uint64_t tallocated, uint64_t tdallocated) {
 	++nuser_thread_event_cb_calls;
 }
 static user_hook_object_t user_te_obj = {
-	.callback = user_thread_event_cb,
-	.interval = 100,
-	.is_alloc_only = false,
+    .callback = user_thread_event_cb,
+    .interval = 100,
+    .is_alloc_only = false,
 };
 
 TEST_BEGIN(test_thread_event_hook) {
 	const size_t big_size = 10 * 1024 * 1024;
-	void *ptr;
-	int err;
+	void        *ptr;
+	int          err;
 
 	unsigned current_calls = nuser_thread_event_cb_calls;
-	err = mallctl("experimental.hooks.thread_event", NULL, 0,
-	    &user_te_obj, sizeof(user_te_obj));
+	err = mallctl("experimental.hooks.thread_event", NULL, 0, &user_te_obj,
+	    sizeof(user_te_obj));
 	assert_d_eq(0, err, "");
 
-	err = mallctl("experimental.hooks.thread_event", NULL, 0,
-	    &user_te_obj, sizeof(user_te_obj));
-	assert_d_eq(0, err, "Not an error to provide object with same interval and cb");
-
+	err = mallctl("experimental.hooks.thread_event", NULL, 0, &user_te_obj,
+	    sizeof(user_te_obj));
+	assert_d_eq(
+	    0, err, "Not an error to provide object with same interval and cb");
 
 	ptr = mallocx(big_size, 0);
 	free(ptr);
@@ -1383,47 +1433,23 @@ TEST_BEGIN(test_thread_event_hook) {
 }
 TEST_END
 
-
 int
 main(void) {
-	return test(
-	    test_mallctl_errors,
-	    test_mallctlnametomib_errors,
-	    test_mallctlbymib_errors,
-	    test_mallctl_read_write,
-	    test_mallctlnametomib_short_mib,
-	    test_mallctlnametomib_short_name,
-	    test_mallctlmibnametomib,
-	    test_mallctlbymibname,
-	    test_mallctl_config,
-	    test_mallctl_opt,
-	    test_manpage_example,
-	    test_tcache_none,
-	    test_tcache,
-	    test_thread_arena,
-	    test_arena_i_initialized,
-	    test_arena_i_dirty_decay_ms,
-	    test_arena_i_muzzy_decay_ms,
-	    test_arena_i_purge,
-	    test_arena_i_decay,
-	    test_arena_i_dss,
-	    test_arena_i_name,
-	    test_arena_i_retain_grow_limit,
-	    test_arenas_dirty_decay_ms,
-	    test_arenas_muzzy_decay_ms,
-	    test_arenas_constants,
-	    test_arenas_bin_constants,
-	    test_arenas_lextent_constants,
-	    test_arenas_create,
-	    test_arenas_lookup,
-	    test_prof_active,
-	    test_stats_arenas,
+	return test(test_mallctl_errors, test_mallctlnametomib_errors,
+	    test_mallctlbymib_errors, test_mallctl_read_write,
+	    test_mallctlnametomib_short_mib, test_mallctlnametomib_short_name,
+	    test_mallctlmibnametomib, test_mallctlbymibname,
+	    test_mallctl_config, test_mallctl_opt, test_manpage_example,
+	    test_tcache_none, test_tcache, test_thread_arena,
+	    test_arena_i_initialized, test_arena_i_dirty_decay_ms,
+	    test_arena_i_muzzy_decay_ms, test_arena_i_purge, test_arena_i_decay,
+	    test_arena_i_dss, test_arena_i_name, test_arena_i_retain_grow_limit,
+	    test_arenas_dirty_decay_ms, test_arenas_muzzy_decay_ms,
+	    test_arenas_constants, test_arenas_bin_constants,
+	    test_arenas_lextent_constants, test_arenas_create,
+	    test_arenas_lookup, test_prof_active, test_stats_arenas,
 	    test_stats_arenas_hpa_shard_counters,
-	    test_stats_arenas_hpa_shard_slabs,
-	    test_hooks,
-	    test_hooks_exhaustion,
-	    test_thread_idle,
-	    test_thread_peak,
-	    test_thread_activity_callback,
-	    test_thread_event_hook);
+	    test_stats_arenas_hpa_shard_slabs, test_hooks,
+	    test_hooks_exhaustion, test_thread_idle, test_thread_peak,
+	    test_thread_activity_callback, test_thread_event_hook);
 }
diff --git a/test/unit/malloc_conf_2.c b/test/unit/malloc_conf_2.c
index 9d2c6077..023b7102 100644
--- a/test/unit/malloc_conf_2.c
+++ b/test/unit/malloc_conf_2.c
@@ -13,12 +13,12 @@ TEST_BEGIN(test_malloc_conf_2) {
 	test_skip_if(windows);
 
 	ssize_t dirty_decay_ms;
-	size_t sz = sizeof(dirty_decay_ms);
+	size_t  sz = sizeof(dirty_decay_ms);
 
 	int err = mallctl("opt.dirty_decay_ms", &dirty_decay_ms, &sz, NULL, 0);
 	assert_d_eq(err, 0, "Unexpected mallctl failure");
-	expect_zd_eq(dirty_decay_ms, 1234,
-	    "malloc_conf_2 setting didn't take effect");
+	expect_zd_eq(
+	    dirty_decay_ms, 1234, "malloc_conf_2 setting didn't take effect");
 }
 TEST_END
 
@@ -32,22 +32,24 @@ TEST_BEGIN(test_mallctl_global_var) {
 	test_skip_if(windows);
 
 	const char *mc;
-	size_t sz = sizeof(mc);
-	expect_d_eq(mallctl("opt.malloc_conf.global_var",
-	    (void *)&mc, &sz, NULL, 0), 0, "Unexpected mallctl() failure");
-	expect_str_eq(mc, malloc_conf, "Unexpected value for the global variable "
+	size_t      sz = sizeof(mc);
+	expect_d_eq(
+	    mallctl("opt.malloc_conf.global_var", (void *)&mc, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+	expect_str_eq(mc, malloc_conf,
+	    "Unexpected value for the global variable "
 	    "malloc_conf");
 
 	expect_d_eq(mallctl("opt.malloc_conf.global_var_2_conf_harder",
-	    (void *)&mc, &sz, NULL, 0), 0, "Unexpected mallctl() failure");
-	expect_str_eq(mc, malloc_conf_2_conf_harder, "Unexpected value for the "
+	                (void *)&mc, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
+	expect_str_eq(mc, malloc_conf_2_conf_harder,
+	    "Unexpected value for the "
 	    "global variable malloc_conf_2_conf_harder");
 }
 TEST_END
 
 int
 main(void) {
-	return test(
-	    test_malloc_conf_2,
-	    test_mallctl_global_var);
+	return test(test_malloc_conf_2, test_mallctl_global_var);
 }
diff --git a/test/unit/malloc_io.c b/test/unit/malloc_io.c
index 385f7450..f7895945 100644
--- a/test/unit/malloc_io.c
+++ b/test/unit/malloc_io.c
@@ -14,77 +14,68 @@ TEST_BEGIN(test_malloc_strtoumax) {
 	struct test_s {
 		const char *input;
 		const char *expected_remainder;
-		int base;
-		int expected_errno;
+		int         base;
+		int         expected_errno;
 		const char *expected_errno_name;
-		uintmax_t expected_x;
+		uintmax_t   expected_x;
 	};
-#define ERR(e)		e, #e
-#define KUMAX(x)	((uintmax_t)x##ULL)
-#define KSMAX(x)	((uintmax_t)(intmax_t)x##LL)
-	struct test_s tests[] = {
-		{"0",		"0",	-1,	ERR(EINVAL),	UINTMAX_MAX},
-		{"0",		"0",	1,	ERR(EINVAL),	UINTMAX_MAX},
-		{"0",		"0",	37,	ERR(EINVAL),	UINTMAX_MAX},
+#define ERR(e) e, #e
+#define KUMAX(x) ((uintmax_t)x##ULL)
+#define KSMAX(x) ((uintmax_t)(intmax_t)x##LL)
+	struct test_s tests[] = {{"0", "0", -1, ERR(EINVAL), UINTMAX_MAX},
+	    {"0", "0", 1, ERR(EINVAL), UINTMAX_MAX},
+	    {"0", "0", 37, ERR(EINVAL), UINTMAX_MAX},
 
-		{"",		"",	0,	ERR(EINVAL),	UINTMAX_MAX},
-		{"+",		"+",	0,	ERR(EINVAL),	UINTMAX_MAX},
-		{"++3",		"++3",	0,	ERR(EINVAL),	UINTMAX_MAX},
-		{"-",		"-",	0,	ERR(EINVAL),	UINTMAX_MAX},
+	    {"", "", 0, ERR(EINVAL), UINTMAX_MAX},
+	    {"+", "+", 0, ERR(EINVAL), UINTMAX_MAX},
+	    {"++3", "++3", 0, ERR(EINVAL), UINTMAX_MAX},
+	    {"-", "-", 0, ERR(EINVAL), UINTMAX_MAX},
 
-		{"42",		"",	0,	ERR(0),		KUMAX(42)},
-		{"+42",		"",	0,	ERR(0),		KUMAX(42)},
-		{"-42",		"",	0,	ERR(0),		KSMAX(-42)},
-		{"042",		"",	0,	ERR(0),		KUMAX(042)},
-		{"+042",	"",	0,	ERR(0),		KUMAX(042)},
-		{"-042",	"",	0,	ERR(0),		KSMAX(-042)},
-		{"0x42",	"",	0,	ERR(0),		KUMAX(0x42)},
-		{"+0x42",	"",	0,	ERR(0),		KUMAX(0x42)},
-		{"-0x42",	"",	0,	ERR(0),		KSMAX(-0x42)},
+	    {"42", "", 0, ERR(0), KUMAX(42)}, {"+42", "", 0, ERR(0), KUMAX(42)},
+	    {"-42", "", 0, ERR(0), KSMAX(-42)},
+	    {"042", "", 0, ERR(0), KUMAX(042)},
+	    {"+042", "", 0, ERR(0), KUMAX(042)},
+	    {"-042", "", 0, ERR(0), KSMAX(-042)},
+	    {"0x42", "", 0, ERR(0), KUMAX(0x42)},
+	    {"+0x42", "", 0, ERR(0), KUMAX(0x42)},
+	    {"-0x42", "", 0, ERR(0), KSMAX(-0x42)},
 
-		{"0",		"",	0,	ERR(0),		KUMAX(0)},
-		{"1",		"",	0,	ERR(0),		KUMAX(1)},
+	    {"0", "", 0, ERR(0), KUMAX(0)}, {"1", "", 0, ERR(0), KUMAX(1)},
 
-		{"42",		"",	0,	ERR(0),		KUMAX(42)},
-		{" 42",		"",	0,	ERR(0),		KUMAX(42)},
-		{"42 ",		" ",	0,	ERR(0),		KUMAX(42)},
-		{"0x",		"x",	0,	ERR(0),		KUMAX(0)},
-		{"42x",		"x",	0,	ERR(0),		KUMAX(42)},
+	    {"42", "", 0, ERR(0), KUMAX(42)}, {" 42", "", 0, ERR(0), KUMAX(42)},
+	    {"42 ", " ", 0, ERR(0), KUMAX(42)},
+	    {"0x", "x", 0, ERR(0), KUMAX(0)},
+	    {"42x", "x", 0, ERR(0), KUMAX(42)},
 
-		{"07",		"",	0,	ERR(0),		KUMAX(7)},
-		{"010",		"",	0,	ERR(0),		KUMAX(8)},
-		{"08",		"8",	0,	ERR(0),		KUMAX(0)},
-		{"0_",		"_",	0,	ERR(0),		KUMAX(0)},
+	    {"07", "", 0, ERR(0), KUMAX(7)}, {"010", "", 0, ERR(0), KUMAX(8)},
+	    {"08", "8", 0, ERR(0), KUMAX(0)}, {"0_", "_", 0, ERR(0), KUMAX(0)},
 
-		{"0x",		"x",	0,	ERR(0),		KUMAX(0)},
-		{"0X",		"X",	0,	ERR(0),		KUMAX(0)},
-		{"0xg",		"xg",	0,	ERR(0),		KUMAX(0)},
-		{"0XA",		"",	0,	ERR(0),		KUMAX(10)},
+	    {"0x", "x", 0, ERR(0), KUMAX(0)}, {"0X", "X", 0, ERR(0), KUMAX(0)},
+	    {"0xg", "xg", 0, ERR(0), KUMAX(0)},
+	    {"0XA", "", 0, ERR(0), KUMAX(10)},
 
-		{"010",		"",	10,	ERR(0),		KUMAX(10)},
-		{"0x3",		"x3",	10,	ERR(0),		KUMAX(0)},
+	    {"010", "", 10, ERR(0), KUMAX(10)},
+	    {"0x3", "x3", 10, ERR(0), KUMAX(0)},
 
-		{"12",		"2",	2,	ERR(0),		KUMAX(1)},
-		{"78",		"8",	8,	ERR(0),		KUMAX(7)},
-		{"9a",		"a",	10,	ERR(0),		KUMAX(9)},
-		{"9A",		"A",	10,	ERR(0),		KUMAX(9)},
-		{"fg",		"g",	16,	ERR(0),		KUMAX(15)},
-		{"FG",		"G",	16,	ERR(0),		KUMAX(15)},
-		{"0xfg",	"g",	16,	ERR(0),		KUMAX(15)},
-		{"0XFG",	"G",	16,	ERR(0),		KUMAX(15)},
-		{"z_",		"_",	36,	ERR(0),		KUMAX(35)},
-		{"Z_",		"_",	36,	ERR(0),		KUMAX(35)}
-	};
+	    {"12", "2", 2, ERR(0), KUMAX(1)}, {"78", "8", 8, ERR(0), KUMAX(7)},
+	    {"9a", "a", 10, ERR(0), KUMAX(9)},
+	    {"9A", "A", 10, ERR(0), KUMAX(9)},
+	    {"fg", "g", 16, ERR(0), KUMAX(15)},
+	    {"FG", "G", 16, ERR(0), KUMAX(15)},
+	    {"0xfg", "g", 16, ERR(0), KUMAX(15)},
+	    {"0XFG", "G", 16, ERR(0), KUMAX(15)},
+	    {"z_", "_", 36, ERR(0), KUMAX(35)},
+	    {"Z_", "_", 36, ERR(0), KUMAX(35)}};
 #undef ERR
 #undef KUMAX
 #undef KSMAX
 	unsigned i;
 
-	for (i = 0; i < sizeof(tests)/sizeof(struct test_s); i++) {
+	for (i = 0; i < sizeof(tests) / sizeof(struct test_s); i++) {
 		struct test_s *test = &tests[i];
-		int err;
-		uintmax_t result;
-		char *remainder;
+		int            err;
+		uintmax_t      result;
+		char          *remainder;
 
 		set_errno(0);
 		result = malloc_strtoumax(test->input, &remainder, test->base);
@@ -93,8 +84,8 @@ TEST_BEGIN(test_malloc_strtoumax) {
 		    "Expected errno %s for \"%s\", base %d",
 		    test->expected_errno_name, test->input, test->base);
 		expect_str_eq(remainder, test->expected_remainder,
-		    "Unexpected remainder for \"%s\", base %d",
-		    test->input, test->base);
+		    "Unexpected remainder for \"%s\", base %d", test->input,
+		    test->base);
 		if (err == 0) {
 			expect_ju_eq(result, test->expected_x,
 			    "Unexpected result for \"%s\", base %d",
@@ -105,31 +96,32 @@ TEST_BEGIN(test_malloc_strtoumax) {
 TEST_END
 
 TEST_BEGIN(test_malloc_snprintf_truncated) {
-#define BUFLEN	15
-	char buf[BUFLEN];
+#define BUFLEN 15
+	char   buf[BUFLEN];
 	size_t result;
 	size_t len;
-#define TEST(expected_str_untruncated, ...) do {			\
-	result = malloc_snprintf(buf, len, __VA_ARGS__);		\
-	expect_d_eq(strncmp(buf, expected_str_untruncated, len-1), 0,	\
-	    "Unexpected string inequality (\"%s\" vs \"%s\")",		\
-	    buf, expected_str_untruncated);				\
-	expect_zu_eq(result, strlen(expected_str_untruncated),		\
-	    "Unexpected result");					\
-} while (0)
+#define TEST(expected_str_untruncated, ...)                                    \
+	do {                                                                   \
+		result = malloc_snprintf(buf, len, __VA_ARGS__);               \
+		expect_d_eq(strncmp(buf, expected_str_untruncated, len - 1),   \
+		    0, "Unexpected string inequality (\"%s\" vs \"%s\")", buf, \
+		    expected_str_untruncated);                                 \
+		expect_zu_eq(result, strlen(expected_str_untruncated),         \
+		    "Unexpected result");                                      \
+	} while (0)
 
 	for (len = 1; len < BUFLEN; len++) {
-		TEST("012346789",	"012346789");
-		TEST("a0123b",		"a%sb", "0123");
-		TEST("a01234567",	"a%s%s", "0123", "4567");
-		TEST("a0123  ",		"a%-6s", "0123");
-		TEST("a  0123",		"a%6s", "0123");
-		TEST("a   012",		"a%6.3s", "0123");
-		TEST("a   012",		"a%*.*s", 6, 3, "0123");
-		TEST("a 123b",		"a% db", 123);
-		TEST("a123b",		"a%-db", 123);
-		TEST("a-123b",		"a%-db", -123);
-		TEST("a+123b",		"a%+db", 123);
+		TEST("012346789", "012346789");
+		TEST("a0123b", "a%sb", "0123");
+		TEST("a01234567", "a%s%s", "0123", "4567");
+		TEST("a0123  ", "a%-6s", "0123");
+		TEST("a  0123", "a%6s", "0123");
+		TEST("a   012", "a%6.3s", "0123");
+		TEST("a   012", "a%*.*s", 6, 3, "0123");
+		TEST("a 123b", "a% db", 123);
+		TEST("a123b", "a%-db", 123);
+		TEST("a-123b", "a%-db", -123);
+		TEST("a+123b", "a%+db", 123);
 	}
 #undef BUFLEN
 #undef TEST
@@ -137,14 +129,16 @@ TEST_BEGIN(test_malloc_snprintf_truncated) {
 TEST_END
 
 TEST_BEGIN(test_malloc_snprintf) {
-#define BUFLEN	128
-	char buf[BUFLEN];
+#define BUFLEN 128
+	char   buf[BUFLEN];
 	size_t result;
-#define TEST(expected_str, ...) do {					\
-	result = malloc_snprintf(buf, sizeof(buf), __VA_ARGS__);	\
-	expect_str_eq(buf, expected_str, "Unexpected output");		\
-	expect_zu_eq(result, strlen(expected_str), "Unexpected result");\
-} while (0)
+#define TEST(expected_str, ...)                                                \
+	do {                                                                   \
+		result = malloc_snprintf(buf, sizeof(buf), __VA_ARGS__);       \
+		expect_str_eq(buf, expected_str, "Unexpected output");         \
+		expect_zu_eq(                                                  \
+		    result, strlen(expected_str), "Unexpected result");        \
+	} while (0)
 
 	TEST("hello", "hello");
 
@@ -260,9 +254,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_malloc_strtoumax_no_endptr,
-	    test_malloc_strtoumax,
-	    test_malloc_snprintf_truncated,
-	    test_malloc_snprintf);
+	return test(test_malloc_strtoumax_no_endptr, test_malloc_strtoumax,
+	    test_malloc_snprintf_truncated, test_malloc_snprintf);
 }
diff --git a/test/unit/math.c b/test/unit/math.c
index a32767c5..b0994768 100644
--- a/test/unit/math.c
+++ b/test/unit/math.c
@@ -6,11 +6,11 @@
 #include <float.h>
 
 #ifdef __PGI
-#undef INFINITY
+#	undef INFINITY
 #endif
 
 #ifndef INFINITY
-#define INFINITY (DBL_MAX + DBL_MAX)
+#	define INFINITY (DBL_MAX + DBL_MAX)
 #endif
 
 static bool
@@ -20,7 +20,7 @@ double_eq_rel(double a, double b, double max_rel_err, double max_abs_err) {
 	if (fabs(a - b) < max_abs_err) {
 		return true;
 	}
-	rel_err = (fabs(b) > fabs(a)) ? fabs((a-b)/b) : fabs((a-b)/a);
+	rel_err = (fabs(b) > fabs(a)) ? fabs((a - b) / b) : fabs((a - b) / a);
 	return (rel_err < max_rel_err);
 }
 
@@ -41,209 +41,206 @@ TEST_BEGIN(test_ln_gamma_factorial) {
 
 	/* exp(ln_gamma(x)) == (x-1)! for integer x. */
 	for (x = 1; x <= 21; x++) {
-		expect_true(double_eq_rel(exp(ln_gamma(x)),
-		    (double)factorial(x-1), MAX_REL_ERR, MAX_ABS_ERR),
+		expect_true(
+		    double_eq_rel(exp(ln_gamma(x)), (double)factorial(x - 1),
+		        MAX_REL_ERR, MAX_ABS_ERR),
 		    "Incorrect factorial result for x=%u", x);
 	}
 }
 TEST_END
 
 /* Expected ln_gamma([0.0..100.0] increment=0.25). */
-static const double ln_gamma_misc_expected[] = {
-	INFINITY,
-	1.28802252469807743, 0.57236494292470008, 0.20328095143129538,
-	0.00000000000000000, -0.09827183642181320, -0.12078223763524518,
-	-0.08440112102048555, 0.00000000000000000, 0.12487171489239651,
-	0.28468287047291918, 0.47521466691493719, 0.69314718055994529,
-	0.93580193110872523, 1.20097360234707429, 1.48681557859341718,
-	1.79175946922805496, 2.11445692745037128, 2.45373657084244234,
-	2.80857141857573644, 3.17805383034794575, 3.56137591038669710,
-	3.95781396761871651, 4.36671603662228680, 4.78749174278204581,
-	5.21960398699022932, 5.66256205985714178, 6.11591589143154568,
-	6.57925121201010121, 7.05218545073853953, 7.53436423675873268,
-	8.02545839631598312, 8.52516136106541467, 9.03318691960512332,
-	9.54926725730099690, 10.07315123968123949, 10.60460290274525086,
-	11.14340011995171231, 11.68933342079726856, 12.24220494005076176,
-	12.80182748008146909, 13.36802367147604720, 13.94062521940376342,
-	14.51947222506051816, 15.10441257307551943, 15.69530137706046524,
-	16.29200047656724237, 16.89437797963419285, 17.50230784587389010,
-	18.11566950571089407, 18.73434751193644843, 19.35823122022435427,
-	19.98721449566188468, 20.62119544270163018, 21.26007615624470048,
-	21.90376249182879320, 22.55216385312342098, 23.20519299513386002,
-	23.86276584168908954, 24.52480131594137802, 25.19122118273868338,
-	25.86194990184851861, 26.53691449111561340, 27.21604439872720604,
-	27.89927138384089389, 28.58652940490193828, 29.27775451504081516,
-	29.97288476399884871, 30.67186010608067548, 31.37462231367769050,
-	32.08111489594735843, 32.79128302226991565, 33.50507345013689076,
-	34.22243445715505317, 34.94331577687681545, 35.66766853819134298,
-	36.39544520803305261, 37.12659953718355865, 37.86108650896109395,
-	38.59886229060776230, 39.33988418719949465, 40.08411059791735198,
-	40.83150097453079752, 41.58201578195490100, 42.33561646075348506,
-	43.09226539146988699, 43.85192586067515208, 44.61456202863158893,
-	45.38013889847690052, 46.14862228684032885, 46.91997879580877395,
-	47.69417578616628361, 48.47118135183522014, 49.25096429545256882,
-	50.03349410501914463, 50.81874093156324790, 51.60667556776436982,
-	52.39726942748592364, 53.19049452616926743, 53.98632346204390586,
-	54.78472939811231157, 55.58568604486942633, 56.38916764371992940,
-	57.19514895105859864, 58.00360522298051080, 58.81451220059079787,
-	59.62784609588432261, 60.44358357816834371, 61.26170176100199427,
-	62.08217818962842927, 62.90499082887649962, 63.73011805151035958,
-	64.55753862700632340, 65.38723171073768015, 66.21917683354901385,
-	67.05335389170279825, 67.88974313718154008, 68.72832516833013017,
-	69.56908092082363737, 70.41199165894616385, 71.25703896716800045,
-	72.10420474200799390, 72.95347118416940191, 73.80482079093779646,
-	74.65823634883015814, 75.51370092648485866, 76.37119786778275454,
-	77.23071078519033961, 78.09222355331530707, 78.95572030266725960,
-	79.82118541361435859, 80.68860351052903468, 81.55795945611502873,
-	82.42923834590904164, 83.30242550295004378, 84.17750647261028973,
-	85.05446701758152983, 85.93329311301090456, 86.81397094178107920,
-	87.69648688992882057, 88.58082754219766741, 89.46697967771913795,
-	90.35493026581838194, 91.24466646193963015, 92.13617560368709292,
-	93.02944520697742803, 93.92446296229978486, 94.82121673107967297,
-	95.71969454214321615, 96.61988458827809723, 97.52177522288820910,
-	98.42535495673848800, 99.33061245478741341, 100.23753653310367895,
-	101.14611615586458981, 102.05634043243354370, 102.96819861451382394,
-	103.88168009337621811, 104.79677439715833032, 105.71347118823287303,
-	106.63176026064346047, 107.55163153760463501, 108.47307506906540198,
-	109.39608102933323153, 110.32063971475740516, 111.24674154146920557,
-	112.17437704317786995, 113.10353686902013237, 114.03421178146170689,
-	114.96639265424990128, 115.90007047041454769, 116.83523632031698014,
-	117.77188139974506953, 118.70999700805310795, 119.64957454634490830,
-	120.59060551569974962, 121.53308151543865279, 122.47699424143097247,
-	123.42233548443955726, 124.36909712850338394, 125.31727114935689826,
-	126.26684961288492559, 127.21782467361175861, 128.17018857322420899,
-	129.12393363912724453, 130.07905228303084755, 131.03553699956862033,
-	131.99338036494577864, 132.95257503561629164, 133.91311374698926784,
-	134.87498931216194364, 135.83819462068046846, 136.80272263732638294,
-	137.76856640092901785, 138.73571902320256299, 139.70417368760718091,
-	140.67392364823425055, 141.64496222871400732, 142.61728282114600574,
-	143.59087888505104047, 144.56574394634486680, 145.54187159633210058,
-	146.51925549072063859, 147.49788934865566148, 148.47776695177302031,
-	149.45888214327129617, 150.44122882700193600, 151.42480096657754984,
-	152.40959258449737490, 153.39559776128982094, 154.38281063467164245,
-	155.37122539872302696, 156.36083630307879844, 157.35163765213474107,
-	158.34362380426921391, 159.33678917107920370, 160.33112821663092973,
-	161.32663545672428995, 162.32330545817117695, 163.32113283808695314,
-	164.32011226319519892, 165.32023844914485267, 166.32150615984036790,
-	167.32391020678358018, 168.32744544842768164, 169.33210678954270634,
-	170.33788918059275375, 171.34478761712384198, 172.35279713916281707,
-	173.36191283062726143, 174.37212981874515094, 175.38344327348534080,
-	176.39584840699734514, 177.40934047306160437, 178.42391476654847793,
-	179.43956662288721304, 180.45629141754378111, 181.47408456550741107,
-	182.49294152078630304, 183.51285777591152737, 184.53382886144947861,
-	185.55585034552262869, 186.57891783333786861, 187.60302696672312095,
-	188.62817342367162610, 189.65435291789341932, 190.68156119837468054,
-	191.70979404894376330, 192.73904728784492590, 193.76931676731820176,
-	194.80059837318714244, 195.83288802445184729, 196.86618167288995096,
-	197.90047530266301123, 198.93576492992946214, 199.97204660246373464,
-	201.00931639928148797, 202.04757043027063901, 203.08680483582807597,
-	204.12701578650228385, 205.16819948264117102, 206.21035215404597807,
-	207.25347005962987623, 208.29754948708190909, 209.34258675253678916,
-	210.38857820024875878, 211.43552020227099320, 212.48340915813977858,
-	213.53224149456323744, 214.58201366511514152, 215.63272214993284592,
-	216.68436345542014010, 217.73693411395422004, 218.79043068359703739,
-	219.84484974781133815, 220.90018791517996988, 221.95644181913033322,
-	223.01360811766215875, 224.07168349307951871, 225.13066465172661879,
-	226.19054832372759734, 227.25133126272962159, 228.31301024565024704,
-	229.37558207242807384, 230.43904356577689896, 231.50339157094342113,
-	232.56862295546847008, 233.63473460895144740, 234.70172344281823484,
-	235.76958639009222907, 236.83832040516844586, 237.90792246359117712,
-	238.97838956183431947, 240.04971871708477238, 241.12190696702904802,
-	242.19495136964280846, 243.26884900298270509, 244.34359696498191283,
-	245.41919237324782443, 246.49563236486270057, 247.57291409618682110,
-	248.65103474266476269, 249.72999149863338175, 250.80978157713354904,
-	251.89040220972316320, 252.97185064629374551, 254.05412415488834199,
-	255.13722002152300661, 256.22113555000953511, 257.30586806178126835,
-	258.39141489572085675, 259.47777340799029844, 260.56494097186322279,
-	261.65291497755913497, 262.74169283208021852, 263.83127195904967266,
-	264.92164979855277807, 266.01282380697938379, 267.10479145686849733,
-	268.19755023675537586, 269.29109765101975427, 270.38543121973674488,
-	271.48054847852881721, 272.57644697842033565, 273.67312428569374561,
-	274.77057798174683967, 275.86880566295326389, 276.96780494052313770,
-	278.06757344036617496, 279.16810880295668085, 280.26940868320008349,
-	281.37147075030043197, 282.47429268763045229, 283.57787219260217171,
-	284.68220697654078322, 285.78729476455760050, 286.89313329542699194,
-	287.99972032146268930, 289.10705360839756395, 290.21513093526289140,
-	291.32395009427028754, 292.43350889069523646, 293.54380514276073200,
-	294.65483668152336350, 295.76660135076059532, 296.87909700685889902,
-	297.99232151870342022, 299.10627276756946458, 300.22094864701409733,
-	301.33634706277030091, 302.45246593264130297, 303.56930318639643929,
-	304.68685676566872189, 305.80512462385280514, 306.92410472600477078,
-	308.04379504874236773, 309.16419358014690033, 310.28529831966631036,
-	311.40710727801865687, 312.52961847709792664, 313.65282994987899201,
-	314.77673974032603610, 315.90134590329950015, 317.02664650446632777,
-	318.15263962020929966, 319.27932333753892635, 320.40669575400545455,
-	321.53475497761127144, 322.66349912672620803, 323.79292633000159185,
-	324.92303472628691452, 326.05382246454587403, 327.18528770377525916,
-	328.31742861292224234, 329.45024337080525356, 330.58373016603343331,
-	331.71788719692847280, 332.85271267144611329, 333.98820480709991898,
-	335.12436183088397001, 336.26118197919845443, 337.39866349777429377,
-	338.53680464159958774, 339.67560367484657036, 340.81505887079896411,
-	341.95516851178109619, 343.09593088908627578, 344.23734430290727460,
-	345.37940706226686416, 346.52211748494903532, 347.66547389743118401,
-	348.80947463481720661, 349.95411804077025408, 351.09940246744753267,
-	352.24532627543504759, 353.39188783368263103, 354.53908551944078908,
-	355.68691771819692349, 356.83538282361303118, 357.98447923746385868,
-	359.13420536957539753
-};
+static const double ln_gamma_misc_expected[] = {INFINITY, 1.28802252469807743,
+    0.57236494292470008, 0.20328095143129538, 0.00000000000000000,
+    -0.09827183642181320, -0.12078223763524518, -0.08440112102048555,
+    0.00000000000000000, 0.12487171489239651, 0.28468287047291918,
+    0.47521466691493719, 0.69314718055994529, 0.93580193110872523,
+    1.20097360234707429, 1.48681557859341718, 1.79175946922805496,
+    2.11445692745037128, 2.45373657084244234, 2.80857141857573644,
+    3.17805383034794575, 3.56137591038669710, 3.95781396761871651,
+    4.36671603662228680, 4.78749174278204581, 5.21960398699022932,
+    5.66256205985714178, 6.11591589143154568, 6.57925121201010121,
+    7.05218545073853953, 7.53436423675873268, 8.02545839631598312,
+    8.52516136106541467, 9.03318691960512332, 9.54926725730099690,
+    10.07315123968123949, 10.60460290274525086, 11.14340011995171231,
+    11.68933342079726856, 12.24220494005076176, 12.80182748008146909,
+    13.36802367147604720, 13.94062521940376342, 14.51947222506051816,
+    15.10441257307551943, 15.69530137706046524, 16.29200047656724237,
+    16.89437797963419285, 17.50230784587389010, 18.11566950571089407,
+    18.73434751193644843, 19.35823122022435427, 19.98721449566188468,
+    20.62119544270163018, 21.26007615624470048, 21.90376249182879320,
+    22.55216385312342098, 23.20519299513386002, 23.86276584168908954,
+    24.52480131594137802, 25.19122118273868338, 25.86194990184851861,
+    26.53691449111561340, 27.21604439872720604, 27.89927138384089389,
+    28.58652940490193828, 29.27775451504081516, 29.97288476399884871,
+    30.67186010608067548, 31.37462231367769050, 32.08111489594735843,
+    32.79128302226991565, 33.50507345013689076, 34.22243445715505317,
+    34.94331577687681545, 35.66766853819134298, 36.39544520803305261,
+    37.12659953718355865, 37.86108650896109395, 38.59886229060776230,
+    39.33988418719949465, 40.08411059791735198, 40.83150097453079752,
+    41.58201578195490100, 42.33561646075348506, 43.09226539146988699,
+    43.85192586067515208, 44.61456202863158893, 45.38013889847690052,
+    46.14862228684032885, 46.91997879580877395, 47.69417578616628361,
+    48.47118135183522014, 49.25096429545256882, 50.03349410501914463,
+    50.81874093156324790, 51.60667556776436982, 52.39726942748592364,
+    53.19049452616926743, 53.98632346204390586, 54.78472939811231157,
+    55.58568604486942633, 56.38916764371992940, 57.19514895105859864,
+    58.00360522298051080, 58.81451220059079787, 59.62784609588432261,
+    60.44358357816834371, 61.26170176100199427, 62.08217818962842927,
+    62.90499082887649962, 63.73011805151035958, 64.55753862700632340,
+    65.38723171073768015, 66.21917683354901385, 67.05335389170279825,
+    67.88974313718154008, 68.72832516833013017, 69.56908092082363737,
+    70.41199165894616385, 71.25703896716800045, 72.10420474200799390,
+    72.95347118416940191, 73.80482079093779646, 74.65823634883015814,
+    75.51370092648485866, 76.37119786778275454, 77.23071078519033961,
+    78.09222355331530707, 78.95572030266725960, 79.82118541361435859,
+    80.68860351052903468, 81.55795945611502873, 82.42923834590904164,
+    83.30242550295004378, 84.17750647261028973, 85.05446701758152983,
+    85.93329311301090456, 86.81397094178107920, 87.69648688992882057,
+    88.58082754219766741, 89.46697967771913795, 90.35493026581838194,
+    91.24466646193963015, 92.13617560368709292, 93.02944520697742803,
+    93.92446296229978486, 94.82121673107967297, 95.71969454214321615,
+    96.61988458827809723, 97.52177522288820910, 98.42535495673848800,
+    99.33061245478741341, 100.23753653310367895, 101.14611615586458981,
+    102.05634043243354370, 102.96819861451382394, 103.88168009337621811,
+    104.79677439715833032, 105.71347118823287303, 106.63176026064346047,
+    107.55163153760463501, 108.47307506906540198, 109.39608102933323153,
+    110.32063971475740516, 111.24674154146920557, 112.17437704317786995,
+    113.10353686902013237, 114.03421178146170689, 114.96639265424990128,
+    115.90007047041454769, 116.83523632031698014, 117.77188139974506953,
+    118.70999700805310795, 119.64957454634490830, 120.59060551569974962,
+    121.53308151543865279, 122.47699424143097247, 123.42233548443955726,
+    124.36909712850338394, 125.31727114935689826, 126.26684961288492559,
+    127.21782467361175861, 128.17018857322420899, 129.12393363912724453,
+    130.07905228303084755, 131.03553699956862033, 131.99338036494577864,
+    132.95257503561629164, 133.91311374698926784, 134.87498931216194364,
+    135.83819462068046846, 136.80272263732638294, 137.76856640092901785,
+    138.73571902320256299, 139.70417368760718091, 140.67392364823425055,
+    141.64496222871400732, 142.61728282114600574, 143.59087888505104047,
+    144.56574394634486680, 145.54187159633210058, 146.51925549072063859,
+    147.49788934865566148, 148.47776695177302031, 149.45888214327129617,
+    150.44122882700193600, 151.42480096657754984, 152.40959258449737490,
+    153.39559776128982094, 154.38281063467164245, 155.37122539872302696,
+    156.36083630307879844, 157.35163765213474107, 158.34362380426921391,
+    159.33678917107920370, 160.33112821663092973, 161.32663545672428995,
+    162.32330545817117695, 163.32113283808695314, 164.32011226319519892,
+    165.32023844914485267, 166.32150615984036790, 167.32391020678358018,
+    168.32744544842768164, 169.33210678954270634, 170.33788918059275375,
+    171.34478761712384198, 172.35279713916281707, 173.36191283062726143,
+    174.37212981874515094, 175.38344327348534080, 176.39584840699734514,
+    177.40934047306160437, 178.42391476654847793, 179.43956662288721304,
+    180.45629141754378111, 181.47408456550741107, 182.49294152078630304,
+    183.51285777591152737, 184.53382886144947861, 185.55585034552262869,
+    186.57891783333786861, 187.60302696672312095, 188.62817342367162610,
+    189.65435291789341932, 190.68156119837468054, 191.70979404894376330,
+    192.73904728784492590, 193.76931676731820176, 194.80059837318714244,
+    195.83288802445184729, 196.86618167288995096, 197.90047530266301123,
+    198.93576492992946214, 199.97204660246373464, 201.00931639928148797,
+    202.04757043027063901, 203.08680483582807597, 204.12701578650228385,
+    205.16819948264117102, 206.21035215404597807, 207.25347005962987623,
+    208.29754948708190909, 209.34258675253678916, 210.38857820024875878,
+    211.43552020227099320, 212.48340915813977858, 213.53224149456323744,
+    214.58201366511514152, 215.63272214993284592, 216.68436345542014010,
+    217.73693411395422004, 218.79043068359703739, 219.84484974781133815,
+    220.90018791517996988, 221.95644181913033322, 223.01360811766215875,
+    224.07168349307951871, 225.13066465172661879, 226.19054832372759734,
+    227.25133126272962159, 228.31301024565024704, 229.37558207242807384,
+    230.43904356577689896, 231.50339157094342113, 232.56862295546847008,
+    233.63473460895144740, 234.70172344281823484, 235.76958639009222907,
+    236.83832040516844586, 237.90792246359117712, 238.97838956183431947,
+    240.04971871708477238, 241.12190696702904802, 242.19495136964280846,
+    243.26884900298270509, 244.34359696498191283, 245.41919237324782443,
+    246.49563236486270057, 247.57291409618682110, 248.65103474266476269,
+    249.72999149863338175, 250.80978157713354904, 251.89040220972316320,
+    252.97185064629374551, 254.05412415488834199, 255.13722002152300661,
+    256.22113555000953511, 257.30586806178126835, 258.39141489572085675,
+    259.47777340799029844, 260.56494097186322279, 261.65291497755913497,
+    262.74169283208021852, 263.83127195904967266, 264.92164979855277807,
+    266.01282380697938379, 267.10479145686849733, 268.19755023675537586,
+    269.29109765101975427, 270.38543121973674488, 271.48054847852881721,
+    272.57644697842033565, 273.67312428569374561, 274.77057798174683967,
+    275.86880566295326389, 276.96780494052313770, 278.06757344036617496,
+    279.16810880295668085, 280.26940868320008349, 281.37147075030043197,
+    282.47429268763045229, 283.57787219260217171, 284.68220697654078322,
+    285.78729476455760050, 286.89313329542699194, 287.99972032146268930,
+    289.10705360839756395, 290.21513093526289140, 291.32395009427028754,
+    292.43350889069523646, 293.54380514276073200, 294.65483668152336350,
+    295.76660135076059532, 296.87909700685889902, 297.99232151870342022,
+    299.10627276756946458, 300.22094864701409733, 301.33634706277030091,
+    302.45246593264130297, 303.56930318639643929, 304.68685676566872189,
+    305.80512462385280514, 306.92410472600477078, 308.04379504874236773,
+    309.16419358014690033, 310.28529831966631036, 311.40710727801865687,
+    312.52961847709792664, 313.65282994987899201, 314.77673974032603610,
+    315.90134590329950015, 317.02664650446632777, 318.15263962020929966,
+    319.27932333753892635, 320.40669575400545455, 321.53475497761127144,
+    322.66349912672620803, 323.79292633000159185, 324.92303472628691452,
+    326.05382246454587403, 327.18528770377525916, 328.31742861292224234,
+    329.45024337080525356, 330.58373016603343331, 331.71788719692847280,
+    332.85271267144611329, 333.98820480709991898, 335.12436183088397001,
+    336.26118197919845443, 337.39866349777429377, 338.53680464159958774,
+    339.67560367484657036, 340.81505887079896411, 341.95516851178109619,
+    343.09593088908627578, 344.23734430290727460, 345.37940706226686416,
+    346.52211748494903532, 347.66547389743118401, 348.80947463481720661,
+    349.95411804077025408, 351.09940246744753267, 352.24532627543504759,
+    353.39188783368263103, 354.53908551944078908, 355.68691771819692349,
+    356.83538282361303118, 357.98447923746385868, 359.13420536957539753};
 
 TEST_BEGIN(test_ln_gamma_misc) {
 	unsigned i;
 
-	for (i = 1; i < sizeof(ln_gamma_misc_expected)/sizeof(double); i++) {
+	for (i = 1; i < sizeof(ln_gamma_misc_expected) / sizeof(double); i++) {
 		double x = (double)i * 0.25;
-		expect_true(double_eq_rel(ln_gamma(x),
-		    ln_gamma_misc_expected[i], MAX_REL_ERR, MAX_ABS_ERR),
+		expect_true(
+		    double_eq_rel(ln_gamma(x), ln_gamma_misc_expected[i],
+		        MAX_REL_ERR, MAX_ABS_ERR),
 		    "Incorrect ln_gamma result for i=%u", i);
 	}
 }
 TEST_END
 
 /* Expected pt_norm([0.01..0.99] increment=0.01). */
-static const double pt_norm_expected[] = {
-	-INFINITY,
-	-2.32634787404084076, -2.05374891063182252, -1.88079360815125085,
-	-1.75068607125216946, -1.64485362695147264, -1.55477359459685305,
-	-1.47579102817917063, -1.40507156030963221, -1.34075503369021654,
-	-1.28155156554460081, -1.22652812003661049, -1.17498679206608991,
-	-1.12639112903880045, -1.08031934081495606, -1.03643338949378938,
-	-0.99445788320975281, -0.95416525314619416, -0.91536508784281390,
-	-0.87789629505122846, -0.84162123357291418, -0.80642124701824025,
-	-0.77219321418868492, -0.73884684918521371, -0.70630256284008752,
-	-0.67448975019608171, -0.64334540539291685, -0.61281299101662701,
-	-0.58284150727121620, -0.55338471955567281, -0.52440051270804067,
-	-0.49585034734745320, -0.46769879911450812, -0.43991316567323380,
-	-0.41246312944140462, -0.38532046640756751, -0.35845879325119373,
-	-0.33185334643681652, -0.30548078809939738, -0.27931903444745404,
-	-0.25334710313579978, -0.22754497664114931, -0.20189347914185077,
-	-0.17637416478086135, -0.15096921549677725, -0.12566134685507399,
-	-0.10043372051146975, -0.07526986209982976, -0.05015358346473352,
-	-0.02506890825871106, 0.00000000000000000, 0.02506890825871106,
-	0.05015358346473366, 0.07526986209982990, 0.10043372051146990,
-	0.12566134685507413, 0.15096921549677739, 0.17637416478086146,
-	0.20189347914185105, 0.22754497664114931, 0.25334710313579978,
-	0.27931903444745404, 0.30548078809939738, 0.33185334643681652,
-	0.35845879325119373, 0.38532046640756762, 0.41246312944140484,
-	0.43991316567323391, 0.46769879911450835, 0.49585034734745348,
-	0.52440051270804111, 0.55338471955567303, 0.58284150727121620,
-	0.61281299101662701, 0.64334540539291685, 0.67448975019608171,
-	0.70630256284008752, 0.73884684918521371, 0.77219321418868492,
-	0.80642124701824036, 0.84162123357291441, 0.87789629505122879,
-	0.91536508784281423, 0.95416525314619460, 0.99445788320975348,
-	1.03643338949378938, 1.08031934081495606, 1.12639112903880045,
-	1.17498679206608991, 1.22652812003661049, 1.28155156554460081,
-	1.34075503369021654, 1.40507156030963265, 1.47579102817917085,
-	1.55477359459685394, 1.64485362695147308, 1.75068607125217102,
-	1.88079360815125041, 2.05374891063182208, 2.32634787404084076
-};
+static const double pt_norm_expected[] = {-INFINITY, -2.32634787404084076,
+    -2.05374891063182252, -1.88079360815125085, -1.75068607125216946,
+    -1.64485362695147264, -1.55477359459685305, -1.47579102817917063,
+    -1.40507156030963221, -1.34075503369021654, -1.28155156554460081,
+    -1.22652812003661049, -1.17498679206608991, -1.12639112903880045,
+    -1.08031934081495606, -1.03643338949378938, -0.99445788320975281,
+    -0.95416525314619416, -0.91536508784281390, -0.87789629505122846,
+    -0.84162123357291418, -0.80642124701824025, -0.77219321418868492,
+    -0.73884684918521371, -0.70630256284008752, -0.67448975019608171,
+    -0.64334540539291685, -0.61281299101662701, -0.58284150727121620,
+    -0.55338471955567281, -0.52440051270804067, -0.49585034734745320,
+    -0.46769879911450812, -0.43991316567323380, -0.41246312944140462,
+    -0.38532046640756751, -0.35845879325119373, -0.33185334643681652,
+    -0.30548078809939738, -0.27931903444745404, -0.25334710313579978,
+    -0.22754497664114931, -0.20189347914185077, -0.17637416478086135,
+    -0.15096921549677725, -0.12566134685507399, -0.10043372051146975,
+    -0.07526986209982976, -0.05015358346473352, -0.02506890825871106,
+    0.00000000000000000, 0.02506890825871106, 0.05015358346473366,
+    0.07526986209982990, 0.10043372051146990, 0.12566134685507413,
+    0.15096921549677739, 0.17637416478086146, 0.20189347914185105,
+    0.22754497664114931, 0.25334710313579978, 0.27931903444745404,
+    0.30548078809939738, 0.33185334643681652, 0.35845879325119373,
+    0.38532046640756762, 0.41246312944140484, 0.43991316567323391,
+    0.46769879911450835, 0.49585034734745348, 0.52440051270804111,
+    0.55338471955567303, 0.58284150727121620, 0.61281299101662701,
+    0.64334540539291685, 0.67448975019608171, 0.70630256284008752,
+    0.73884684918521371, 0.77219321418868492, 0.80642124701824036,
+    0.84162123357291441, 0.87789629505122879, 0.91536508784281423,
+    0.95416525314619460, 0.99445788320975348, 1.03643338949378938,
+    1.08031934081495606, 1.12639112903880045, 1.17498679206608991,
+    1.22652812003661049, 1.28155156554460081, 1.34075503369021654,
+    1.40507156030963265, 1.47579102817917085, 1.55477359459685394,
+    1.64485362695147308, 1.75068607125217102, 1.88079360815125041,
+    2.05374891063182208, 2.32634787404084076};
 
 TEST_BEGIN(test_pt_norm) {
 	unsigned i;
 
-	for (i = 1; i < sizeof(pt_norm_expected)/sizeof(double); i++) {
+	for (i = 1; i < sizeof(pt_norm_expected) / sizeof(double); i++) {
 		double p = (double)i * 0.01;
 		expect_true(double_eq_rel(pt_norm(p), pt_norm_expected[i],
-		    MAX_REL_ERR, MAX_ABS_ERR),
+		                MAX_REL_ERR, MAX_ABS_ERR),
 		    "Incorrect pt_norm result for i=%u", i);
 	}
 }
@@ -254,49 +251,49 @@ TEST_END
  *                  df={0.1, 1.1, 10.1, 100.1, 1000.1}).
  */
 static const double pt_chi2_df[] = {0.1, 1.1, 10.1, 100.1, 1000.1};
-static const double pt_chi2_expected[] = {
-	1.168926411457320e-40, 1.347680397072034e-22, 3.886980416666260e-17,
-	8.245951724356564e-14, 2.068936347497604e-11, 1.562561743309233e-09,
-	5.459543043426564e-08, 1.114775688149252e-06, 1.532101202364371e-05,
-	1.553884683726585e-04, 1.239396954915939e-03, 8.153872320255721e-03,
-	4.631183739647523e-02, 2.473187311701327e-01, 2.175254800183617e+00,
+static const double pt_chi2_expected[] = {1.168926411457320e-40,
+    1.347680397072034e-22, 3.886980416666260e-17, 8.245951724356564e-14,
+    2.068936347497604e-11, 1.562561743309233e-09, 5.459543043426564e-08,
+    1.114775688149252e-06, 1.532101202364371e-05, 1.553884683726585e-04,
+    1.239396954915939e-03, 8.153872320255721e-03, 4.631183739647523e-02,
+    2.473187311701327e-01, 2.175254800183617e+00,
 
-	0.0003729887888876379, 0.0164409238228929513, 0.0521523015190650113,
-	0.1064701372271216612, 0.1800913735793082115, 0.2748704281195626931,
-	0.3939246282787986497, 0.5420727552260817816, 0.7267265822221973259,
-	0.9596554296000253670, 1.2607440376386165326, 1.6671185084541604304,
-	2.2604828984738705167, 3.2868613342148607082, 6.9298574921692139839,
+    0.0003729887888876379, 0.0164409238228929513, 0.0521523015190650113,
+    0.1064701372271216612, 0.1800913735793082115, 0.2748704281195626931,
+    0.3939246282787986497, 0.5420727552260817816, 0.7267265822221973259,
+    0.9596554296000253670, 1.2607440376386165326, 1.6671185084541604304,
+    2.2604828984738705167, 3.2868613342148607082, 6.9298574921692139839,
 
-	2.606673548632508, 4.602913725294877, 5.646152813924212,
-	6.488971315540869, 7.249823275816285, 7.977314231410841,
-	8.700354939944047, 9.441728024225892, 10.224338321374127,
-	11.076435368801061, 12.039320937038386, 13.183878752697167,
-	14.657791935084575, 16.885728216339373, 23.361991680031817,
+    2.606673548632508, 4.602913725294877, 5.646152813924212, 6.488971315540869,
+    7.249823275816285, 7.977314231410841, 8.700354939944047, 9.441728024225892,
+    10.224338321374127, 11.076435368801061, 12.039320937038386,
+    13.183878752697167, 14.657791935084575, 16.885728216339373,
+    23.361991680031817,
 
-	70.14844087392152, 80.92379498849355, 85.53325420085891,
-	88.94433120715347, 91.83732712857017, 94.46719943606301,
-	96.96896479994635, 99.43412843510363, 101.94074719829733,
-	104.57228644307247, 107.43900093448734, 110.71844673417287,
-	114.76616819871325, 120.57422505959563, 135.92318818757556,
+    70.14844087392152, 80.92379498849355, 85.53325420085891, 88.94433120715347,
+    91.83732712857017, 94.46719943606301, 96.96896479994635, 99.43412843510363,
+    101.94074719829733, 104.57228644307247, 107.43900093448734,
+    110.71844673417287, 114.76616819871325, 120.57422505959563,
+    135.92318818757556,
 
-	899.0072447849649, 937.9271278858220, 953.8117189560207,
-	965.3079371501154, 974.8974061207954, 983.4936235182347,
-	991.5691170518946, 999.4334123954690, 1007.3391826856553,
-	1015.5445154999951, 1024.3777075619569, 1034.3538789836223,
-	1046.4872561869577, 1063.5717461999654, 1107.0741966053859
-};
+    899.0072447849649, 937.9271278858220, 953.8117189560207, 965.3079371501154,
+    974.8974061207954, 983.4936235182347, 991.5691170518946, 999.4334123954690,
+    1007.3391826856553, 1015.5445154999951, 1024.3777075619569,
+    1034.3538789836223, 1046.4872561869577, 1063.5717461999654,
+    1107.0741966053859};
 
 TEST_BEGIN(test_pt_chi2) {
 	unsigned i, j;
 	unsigned e = 0;
 
-	for (i = 0; i < sizeof(pt_chi2_df)/sizeof(double); i++) {
+	for (i = 0; i < sizeof(pt_chi2_df) / sizeof(double); i++) {
 		double df = pt_chi2_df[i];
 		double ln_gamma_df = ln_gamma(df * 0.5);
 		for (j = 1; j < 100; j += 7) {
 			double p = (double)j * 0.01;
-			expect_true(double_eq_rel(pt_chi2(p, df, ln_gamma_df),
-			    pt_chi2_expected[e], MAX_REL_ERR, MAX_ABS_ERR),
+			expect_true(
+			    double_eq_rel(pt_chi2(p, df, ln_gamma_df),
+			        pt_chi2_expected[e], MAX_REL_ERR, MAX_ABS_ERR),
 			    "Incorrect pt_chi2 result for i=%u, j=%u", i, j);
 			e++;
 		}
@@ -309,56 +306,56 @@ TEST_END
  *                   shape=[0.5..3.0] increment=0.5).
  */
 static const double pt_gamma_shape[] = {0.5, 1.0, 1.5, 2.0, 2.5, 3.0};
-static const double pt_gamma_expected[] = {
-	7.854392895485103e-05, 5.043466107888016e-03, 1.788288957794883e-02,
-	3.900956150232906e-02, 6.913847560638034e-02, 1.093710833465766e-01,
-	1.613412523825817e-01, 2.274682115597864e-01, 3.114117323127083e-01,
-	4.189466220207417e-01, 5.598106789059246e-01, 7.521856146202706e-01,
-	1.036125427911119e+00, 1.532450860038180e+00, 3.317448300510606e+00,
+static const double pt_gamma_expected[] = {7.854392895485103e-05,
+    5.043466107888016e-03, 1.788288957794883e-02, 3.900956150232906e-02,
+    6.913847560638034e-02, 1.093710833465766e-01, 1.613412523825817e-01,
+    2.274682115597864e-01, 3.114117323127083e-01, 4.189466220207417e-01,
+    5.598106789059246e-01, 7.521856146202706e-01, 1.036125427911119e+00,
+    1.532450860038180e+00, 3.317448300510606e+00,
 
-	0.01005033585350144, 0.08338160893905107, 0.16251892949777497,
-	0.24846135929849966, 0.34249030894677596, 0.44628710262841947,
-	0.56211891815354142, 0.69314718055994529, 0.84397007029452920,
-	1.02165124753198167, 1.23787435600161766, 1.51412773262977574,
-	1.89711998488588196, 2.52572864430825783, 4.60517018598809091,
+    0.01005033585350144, 0.08338160893905107, 0.16251892949777497,
+    0.24846135929849966, 0.34249030894677596, 0.44628710262841947,
+    0.56211891815354142, 0.69314718055994529, 0.84397007029452920,
+    1.02165124753198167, 1.23787435600161766, 1.51412773262977574,
+    1.89711998488588196, 2.52572864430825783, 4.60517018598809091,
 
-	0.05741590094955853, 0.24747378084860744, 0.39888572212236084,
-	0.54394139997444901, 0.69048812513915159, 0.84311389861296104,
-	1.00580622221479898, 1.18298694218766931, 1.38038096305861213,
-	1.60627736383027453, 1.87396970522337947, 2.20749220408081070,
-	2.65852391865854942, 3.37934630984842244, 5.67243336507218476,
+    0.05741590094955853, 0.24747378084860744, 0.39888572212236084,
+    0.54394139997444901, 0.69048812513915159, 0.84311389861296104,
+    1.00580622221479898, 1.18298694218766931, 1.38038096305861213,
+    1.60627736383027453, 1.87396970522337947, 2.20749220408081070,
+    2.65852391865854942, 3.37934630984842244, 5.67243336507218476,
 
-	0.1485547402532659, 0.4657458011640391, 0.6832386130709406,
-	0.8794297834672100, 1.0700752852474524, 1.2629614217350744,
-	1.4638400448580779, 1.6783469900166610, 1.9132338090606940,
-	2.1778589228618777, 2.4868823970010991, 2.8664695666264195,
-	3.3724415436062114, 4.1682658512758071, 6.6383520679938108,
+    0.1485547402532659, 0.4657458011640391, 0.6832386130709406,
+    0.8794297834672100, 1.0700752852474524, 1.2629614217350744,
+    1.4638400448580779, 1.6783469900166610, 1.9132338090606940,
+    2.1778589228618777, 2.4868823970010991, 2.8664695666264195,
+    3.3724415436062114, 4.1682658512758071, 6.6383520679938108,
 
-	0.2771490383641385, 0.7195001279643727, 0.9969081732265243,
-	1.2383497880608061, 1.4675206597269927, 1.6953064251816552,
-	1.9291243435606809, 2.1757300955477641, 2.4428032131216391,
-	2.7406534569230616, 3.0851445039665513, 3.5043101122033367,
-	4.0575997065264637, 4.9182956424675286, 7.5431362346944937,
+    0.2771490383641385, 0.7195001279643727, 0.9969081732265243,
+    1.2383497880608061, 1.4675206597269927, 1.6953064251816552,
+    1.9291243435606809, 2.1757300955477641, 2.4428032131216391,
+    2.7406534569230616, 3.0851445039665513, 3.5043101122033367,
+    4.0575997065264637, 4.9182956424675286, 7.5431362346944937,
 
-	0.4360451650782932, 0.9983600902486267, 1.3306365880734528,
-	1.6129750834753802, 1.8767241606994294, 2.1357032436097660,
-	2.3988853336865565, 2.6740603137235603, 2.9697561737517959,
-	3.2971457713883265, 3.6731795898504660, 4.1275751617770631,
-	4.7230515633946677, 5.6417477865306020, 8.4059469148854635
-};
+    0.4360451650782932, 0.9983600902486267, 1.3306365880734528,
+    1.6129750834753802, 1.8767241606994294, 2.1357032436097660,
+    2.3988853336865565, 2.6740603137235603, 2.9697561737517959,
+    3.2971457713883265, 3.6731795898504660, 4.1275751617770631,
+    4.7230515633946677, 5.6417477865306020, 8.4059469148854635};
 
 TEST_BEGIN(test_pt_gamma_shape) {
 	unsigned i, j;
 	unsigned e = 0;
 
-	for (i = 0; i < sizeof(pt_gamma_shape)/sizeof(double); i++) {
+	for (i = 0; i < sizeof(pt_gamma_shape) / sizeof(double); i++) {
 		double shape = pt_gamma_shape[i];
 		double ln_gamma_shape = ln_gamma(shape);
 		for (j = 1; j < 100; j += 7) {
 			double p = (double)j * 0.01;
-			expect_true(double_eq_rel(pt_gamma(p, shape, 1.0,
-			    ln_gamma_shape), pt_gamma_expected[e], MAX_REL_ERR,
-			    MAX_ABS_ERR),
+			expect_true(
+			    double_eq_rel(
+			        pt_gamma(p, shape, 1.0, ln_gamma_shape),
+			        pt_gamma_expected[e], MAX_REL_ERR, MAX_ABS_ERR),
 			    "Incorrect pt_gamma result for i=%u, j=%u", i, j);
 			e++;
 		}
@@ -370,21 +367,16 @@ TEST_BEGIN(test_pt_gamma_scale) {
 	double shape = 1.0;
 	double ln_gamma_shape = ln_gamma(shape);
 
-	expect_true(double_eq_rel(
-	    pt_gamma(0.5, shape, 1.0, ln_gamma_shape) * 10.0,
-	    pt_gamma(0.5, shape, 10.0, ln_gamma_shape), MAX_REL_ERR,
-	    MAX_ABS_ERR),
+	expect_true(
+	    double_eq_rel(pt_gamma(0.5, shape, 1.0, ln_gamma_shape) * 10.0,
+	        pt_gamma(0.5, shape, 10.0, ln_gamma_shape), MAX_REL_ERR,
+	        MAX_ABS_ERR),
 	    "Scale should be trivially equivalent to external multiplication");
 }
 TEST_END
 
 int
 main(void) {
-	return test(
-	    test_ln_gamma_factorial,
-	    test_ln_gamma_misc,
-	    test_pt_norm,
-	    test_pt_chi2,
-	    test_pt_gamma_shape,
-	    test_pt_gamma_scale);
+	return test(test_ln_gamma_factorial, test_ln_gamma_misc, test_pt_norm,
+	    test_pt_chi2, test_pt_gamma_shape, test_pt_gamma_scale);
 }
diff --git a/test/unit/mpsc_queue.c b/test/unit/mpsc_queue.c
index 895edf84..d22d5488 100644
--- a/test/unit/mpsc_queue.c
+++ b/test/unit/mpsc_queue.c
@@ -12,10 +12,10 @@ struct elem_s {
 };
 
 /* Include both proto and gen to make sure they match up. */
-mpsc_queue_proto(static, elem_mpsc_queue_, elem_mpsc_queue_t, elem_t,
-    elem_list_t);
-mpsc_queue_gen(static, elem_mpsc_queue_, elem_mpsc_queue_t, elem_t,
-    elem_list_t, link);
+mpsc_queue_proto(
+    static, elem_mpsc_queue_, elem_mpsc_queue_t, elem_t, elem_list_t);
+mpsc_queue_gen(
+    static, elem_mpsc_queue_, elem_mpsc_queue_t, elem_t, elem_list_t, link);
 
 static void
 init_elems_simple(elem_t *elems, int nelems, int thread) {
@@ -29,8 +29,8 @@ init_elems_simple(elem_t *elems, int nelems, int thread) {
 static void
 check_elems_simple(elem_list_t *list, int nelems, int thread) {
 	elem_t *elem;
-	int next_idx = 0;
-	ql_foreach(elem, list, link) {
+	int     next_idx = 0;
+	ql_foreach (elem, list, link) {
 		expect_d_lt(next_idx, nelems, "Too many list items");
 		expect_d_eq(thread, elem->thread, "");
 		expect_d_eq(next_idx, elem->idx, "List out of order");
@@ -39,9 +39,9 @@ check_elems_simple(elem_list_t *list, int nelems, int thread) {
 }
 
 TEST_BEGIN(test_simple) {
-	enum {NELEMS = 10};
-	elem_t elems[NELEMS];
-	elem_list_t list;
+	enum { NELEMS = 10 };
+	elem_t            elems[NELEMS];
+	elem_list_t       list;
 	elem_mpsc_queue_t queue;
 
 	/* Pop empty queue onto empty list -> empty list */
@@ -82,7 +82,6 @@ TEST_BEGIN(test_simple) {
 	}
 	elem_mpsc_queue_pop_batch(&queue, &list);
 	check_elems_simple(&list, NELEMS, 0);
-
 }
 TEST_END
 
@@ -137,7 +136,7 @@ TEST_BEGIN(test_push_single_or_batch) {
 TEST_END
 
 TEST_BEGIN(test_multi_op) {
-	enum {NELEMS = 20};
+	enum { NELEMS = 20 };
 	elem_t elems[NELEMS];
 	init_elems_simple(elems, NELEMS, 0);
 	elem_list_t push_list;
@@ -176,30 +175,29 @@ TEST_BEGIN(test_multi_op) {
 	elem_mpsc_queue_pop_batch(&queue, &result_list);
 
 	check_elems_simple(&result_list, NELEMS, 0);
-
 }
 TEST_END
 
 typedef struct pusher_arg_s pusher_arg_t;
 struct pusher_arg_s {
 	elem_mpsc_queue_t *queue;
-	int thread;
-	elem_t *elems;
-	int nelems;
+	int                thread;
+	elem_t            *elems;
+	int                nelems;
 };
 
 typedef struct popper_arg_s popper_arg_t;
 struct popper_arg_s {
 	elem_mpsc_queue_t *queue;
-	int npushers;
-	int nelems_per_pusher;
-	int *pusher_counts;
+	int                npushers;
+	int                nelems_per_pusher;
+	int               *pusher_counts;
 };
 
 static void *
 thd_pusher(void *void_arg) {
 	pusher_arg_t *arg = (pusher_arg_t *)void_arg;
-	int next_idx = 0;
+	int           next_idx = 0;
 	while (next_idx < arg->nelems) {
 		/* Push 10 items in batch. */
 		elem_list_t list;
@@ -216,7 +214,6 @@ thd_pusher(void *void_arg) {
 			elem_mpsc_queue_push(arg->queue, &arg->elems[next_idx]);
 			next_idx++;
 		}
-
 	}
 	return NULL;
 }
@@ -224,13 +221,13 @@ thd_pusher(void *void_arg) {
 static void *
 thd_popper(void *void_arg) {
 	popper_arg_t *arg = (popper_arg_t *)void_arg;
-	int done_pushers = 0;
+	int           done_pushers = 0;
 	while (done_pushers < arg->npushers) {
 		elem_list_t list;
 		ql_new(&list);
 		elem_mpsc_queue_pop_batch(arg->queue, &list);
 		elem_t *elem;
-		ql_foreach(elem, &list, link) {
+		ql_foreach (elem, &list, link) {
 			int thread = elem->thread;
 			int idx = elem->idx;
 			expect_d_eq(arg->pusher_counts[thread], idx,
@@ -248,12 +245,12 @@ thd_popper(void *void_arg) {
 TEST_BEGIN(test_multiple_threads) {
 	enum {
 		NPUSHERS = 4,
-		NELEMS_PER_PUSHER = 1000*1000,
+		NELEMS_PER_PUSHER = 1000 * 1000,
 	};
-	thd_t pushers[NPUSHERS];
+	thd_t        pushers[NPUSHERS];
 	pusher_arg_t pusher_arg[NPUSHERS];
 
-	thd_t popper;
+	thd_t        popper;
 	popper_arg_t popper_arg;
 
 	elem_mpsc_queue_t queue;
@@ -296,9 +293,6 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_simple,
-	    test_push_single_or_batch,
-	    test_multi_op,
-	    test_multiple_threads);
+	return test_no_reentrancy(test_simple, test_push_single_or_batch,
+	    test_multi_op, test_multiple_threads);
 }
diff --git a/test/unit/mq.c b/test/unit/mq.c
index f833f77c..9b3b547a 100644
--- a/test/unit/mq.c
+++ b/test/unit/mq.c
@@ -1,22 +1,22 @@
 #include "test/jemalloc_test.h"
 
-#define NSENDERS	3
-#define NMSGS		100000
+#define NSENDERS 3
+#define NMSGS 100000
 
 typedef struct mq_msg_s mq_msg_t;
 struct mq_msg_s {
-	mq_msg(mq_msg_t)	link;
+	mq_msg(mq_msg_t) link;
 };
 mq_gen(static, mq_, mq_t, mq_msg_t, link)
 
-TEST_BEGIN(test_mq_basic) {
-	mq_t mq;
+    TEST_BEGIN(test_mq_basic) {
+	mq_t     mq;
 	mq_msg_t msg;
 
 	expect_false(mq_init(&mq), "Unexpected mq_init() failure");
 	expect_u_eq(mq_count(&mq), 0, "mq should be empty");
-	expect_ptr_null(mq_tryget(&mq),
-	    "mq_tryget() should fail when the queue is empty");
+	expect_ptr_null(
+	    mq_tryget(&mq), "mq_tryget() should fail when the queue is empty");
 
 	mq_put(&mq, &msg);
 	expect_u_eq(mq_count(&mq), 1, "mq should contain one message");
@@ -31,7 +31,7 @@ TEST_END
 
 static void *
 thd_receiver_start(void *arg) {
-	mq_t *mq = (mq_t *)arg;
+	mq_t    *mq = (mq_t *)arg;
 	unsigned i;
 
 	for (i = 0; i < (NSENDERS * NMSGS); i++) {
@@ -44,12 +44,12 @@ thd_receiver_start(void *arg) {
 
 static void *
 thd_sender_start(void *arg) {
-	mq_t *mq = (mq_t *)arg;
+	mq_t    *mq = (mq_t *)arg;
 	unsigned i;
 
 	for (i = 0; i < NMSGS; i++) {
 		mq_msg_t *msg;
-		void *p;
+		void     *p;
 		p = mallocx(sizeof(mq_msg_t), 0);
 		expect_ptr_not_null(p, "Unexpected mallocx() failure");
 		msg = (mq_msg_t *)p;
@@ -59,9 +59,9 @@ thd_sender_start(void *arg) {
 }
 
 TEST_BEGIN(test_mq_threaded) {
-	mq_t mq;
-	thd_t receiver;
-	thd_t senders[NSENDERS];
+	mq_t     mq;
+	thd_t    receiver;
+	thd_t    senders[NSENDERS];
 	unsigned i;
 
 	expect_false(mq_init(&mq), "Unexpected mq_init() failure");
@@ -82,8 +82,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_mq_basic,
-	    test_mq_threaded);
+	return test(test_mq_basic, test_mq_threaded);
 }
-
diff --git a/test/unit/mtx.c b/test/unit/mtx.c
index 4aeebc13..0fe15a90 100644
--- a/test/unit/mtx.c
+++ b/test/unit/mtx.c
@@ -1,7 +1,7 @@
 #include "test/jemalloc_test.h"
 
-#define NTHREADS	2
-#define NINCRS		2000000
+#define NTHREADS 2
+#define NINCRS 2000000
 
 TEST_BEGIN(test_mtx_basic) {
 	mtx_t mtx;
@@ -14,14 +14,14 @@ TEST_BEGIN(test_mtx_basic) {
 TEST_END
 
 typedef struct {
-	mtx_t		mtx;
-	unsigned	x;
+	mtx_t    mtx;
+	unsigned x;
 } thd_start_arg_t;
 
 static void *
 thd_start(void *varg) {
 	thd_start_arg_t *arg = (thd_start_arg_t *)varg;
-	unsigned i;
+	unsigned         i;
 
 	for (i = 0; i < NINCRS; i++) {
 		mtx_lock(&arg->mtx);
@@ -33,8 +33,8 @@ thd_start(void *varg) {
 
 TEST_BEGIN(test_mtx_race) {
 	thd_start_arg_t arg;
-	thd_t thds[NTHREADS];
-	unsigned i;
+	thd_t           thds[NTHREADS];
+	unsigned        i;
 
 	expect_false(mtx_init(&arg.mtx), "Unexpected mtx_init() failure");
 	arg.x = 0;
@@ -44,14 +44,12 @@ TEST_BEGIN(test_mtx_race) {
 	for (i = 0; i < NTHREADS; i++) {
 		thd_join(thds[i], NULL);
 	}
-	expect_u_eq(arg.x, NTHREADS * NINCRS,
-	    "Race-related counter corruption");
+	expect_u_eq(
+	    arg.x, NTHREADS * NINCRS, "Race-related counter corruption");
 }
 TEST_END
 
 int
 main(void) {
-	return test(
-	    test_mtx_basic,
-	    test_mtx_race);
+	return test(test_mtx_basic, test_mtx_race);
 }
diff --git a/test/unit/ncached_max.c b/test/unit/ncached_max.c
index 1a0d2885..4724f55b 100644
--- a/test/unit/ncached_max.c
+++ b/test/unit/ncached_max.c
@@ -2,10 +2,10 @@
 #include "test/san.h"
 
 const char *malloc_conf =
-"tcache_ncached_max:256-1024:1001|2048-2048:0|8192-8192:1,tcache_max:4096";
+    "tcache_ncached_max:256-1024:1001|2048-2048:0|8192-8192:1,tcache_max:4096";
 extern void tcache_bin_info_compute(
     cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]);
-extern bool tcache_get_default_ncached_max_set(szind_t ind);
+extern bool                    tcache_get_default_ncached_max_set(szind_t ind);
 extern const cache_bin_info_t *tcache_get_default_ncached_max(void);
 
 static void
@@ -13,54 +13,54 @@ check_bins_info(cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
 	size_t mib_get[4], mib_get_len;
 	mib_get_len = sizeof(mib_get) / sizeof(size_t);
 	const char *get_name = "thread.tcache.ncached_max.read_sizeclass";
-	size_t ncached_max;
-	size_t sz = sizeof(size_t);
+	size_t      ncached_max;
+	size_t      sz = sizeof(size_t);
 	expect_d_eq(mallctlnametomib(get_name, mib_get, &mib_get_len), 0,
 	    "Unexpected mallctlnametomib() failure");
 
 	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
 		size_t bin_size = sz_index2size(i);
-		expect_d_eq(mallctlbymib(mib_get, mib_get_len,
-		    (void *)&ncached_max, &sz,
-		    (void *)&bin_size, sizeof(size_t)), 0,
-		    "Unexpected mallctlbymib() failure");
+		expect_d_eq(
+		    mallctlbymib(mib_get, mib_get_len, (void *)&ncached_max,
+		        &sz, (void *)&bin_size, sizeof(size_t)),
+		    0, "Unexpected mallctlbymib() failure");
 		expect_zu_eq(ncached_max, tcache_bin_info[i].ncached_max,
 		    "Unexpected ncached_max for bin %d", i);
 		/* Check ncached_max returned under a non-bin size. */
 		bin_size--;
 		size_t temp_ncached_max = 0;
 		expect_d_eq(mallctlbymib(mib_get, mib_get_len,
-		    (void *)&temp_ncached_max, &sz,
-		    (void *)&bin_size, sizeof(size_t)), 0,
-		    "Unexpected mallctlbymib() failure");
+		                (void *)&temp_ncached_max, &sz,
+		                (void *)&bin_size, sizeof(size_t)),
+		    0, "Unexpected mallctlbymib() failure");
 		expect_zu_eq(temp_ncached_max, ncached_max,
 		    "Unexpected ncached_max for inaccurate bin size.");
 	}
 }
 
 static void *
-ncached_max_check(void* args) {
+ncached_max_check(void *args) {
 	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX];
 	cache_bin_info_t tcache_bin_info_backup[TCACHE_NBINS_MAX];
-	tsd_t *tsd = tsd_fetch();
-	tcache_t *tcache = tsd_tcachep_get(tsd);
+	tsd_t           *tsd = tsd_fetch();
+	tcache_t        *tcache = tsd_tcachep_get(tsd);
 	assert(tcache != NULL);
 	tcache_slow_t *tcache_slow = tcache->tcache_slow;
 
-
 	tcache_bin_info_compute(tcache_bin_info);
-	memcpy(tcache_bin_info_backup, tcache_bin_info,
-	    sizeof(tcache_bin_info));
+	memcpy(
+	    tcache_bin_info_backup, tcache_bin_info, sizeof(tcache_bin_info));
 	/* Check ncached_max set by malloc_conf. */
 	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
-		bool first_range = (i >= sz_size2index(256) &&
-		    i <= sz_size2index(1024));
-		bool second_range = (i == sz_size2index(2048));
-		bool third_range = (i == sz_size2index(8192));
+		bool           first_range = (i >= sz_size2index(256)
+                    && i <= sz_size2index(1024));
+		bool           second_range = (i == sz_size2index(2048));
+		bool           third_range = (i == sz_size2index(8192));
 		cache_bin_sz_t target_ncached_max = 0;
 		if (first_range || second_range || third_range) {
-			target_ncached_max = first_range ? 1001:
-			    (second_range ? 0: 1);
+			target_ncached_max = first_range
+			    ? 1001
+			    : (second_range ? 0 : 1);
 			expect_true(tcache_get_default_ncached_max_set(i),
 			    "Unexpected state for bin %u", i);
 			expect_zu_eq(target_ncached_max,
@@ -88,13 +88,13 @@ ncached_max_check(void* args) {
 	    "Unexpected mallctlnametomib() failure");
 
 	/* Test the ncached_max set with tcache on. */
-	char inputs[100] = "8-128:1|160-160:11|170-320:22|224-8388609:0";
+	char  inputs[100] = "8-128:1|160-160:11|170-320:22|224-8388609:0";
 	char *inputp = inputs;
 	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
-	    (void *)&inputp, sizeof(char *)), 0,
-	    "Unexpected mallctlbymib() failure");
+	                (void *)&inputp, sizeof(char *)),
+	    0, "Unexpected mallctlbymib() failure");
 	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
-		if (i >= sz_size2index(8) &&i <= sz_size2index(128)) {
+		if (i >= sz_size2index(8) && i <= sz_size2index(128)) {
 			cache_bin_info_init(&tcache_bin_info[i], 1);
 		}
 		if (i == sz_size2index(160)) {
@@ -119,16 +119,17 @@ ncached_max_check(void* args) {
 	 * the new setting will not be carried on.  Instead, the default
 	 * settings will be applied.
 	 */
-	bool e0 = false, e1;
+	bool   e0 = false, e1;
 	size_t bool_sz = sizeof(bool);
 	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e1, &bool_sz,
-	    (void *)&e0, bool_sz), 0, "Unexpected mallctl() error");
+	                (void *)&e0, bool_sz),
+	    0, "Unexpected mallctl() error");
 	expect_true(e1, "Unexpected previous tcache state");
 	strcpy(inputs, "0-112:8");
 	/* Setting returns ENOENT when the tcache is disabled. */
 	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
-	    (void *)&inputp, sizeof(char *)), ENOENT,
-	    "Unexpected mallctlbymib() failure");
+	                (void *)&inputp, sizeof(char *)),
+	    ENOENT, "Unexpected mallctlbymib() failure");
 	/* All ncached_max should return 0 once tcache is disabled. */
 	for (szind_t i = 0; i < TCACHE_NBINS_MAX; i++) {
 		cache_bin_info_init(&tcache_bin_info[i], 0);
@@ -137,12 +138,13 @@ ncached_max_check(void* args) {
 
 	e0 = true;
 	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e1, &bool_sz,
-	    (void *)&e0, bool_sz), 0, "Unexpected mallctl() error");
+	                (void *)&e0, bool_sz),
+	    0, "Unexpected mallctl() error");
 	expect_false(e1, "Unexpected previous tcache state");
 	memcpy(tcache_bin_info, tcache_bin_info_backup,
 	    sizeof(tcache_bin_info_backup));
 	for (szind_t i = tcache_nbins_get(tcache_slow); i < TCACHE_NBINS_MAX;
-	    i++) {
+	     i++) {
 		cache_bin_info_init(&tcache_bin_info[i], 0);
 	}
 	check_bins_info(tcache_bin_info);
@@ -152,22 +154,22 @@ ncached_max_check(void* args) {
 	 * resetting tcache_max.  The ncached_max changes should stay.
 	 */
 	size_t tcache_max = 1024;
-	assert_d_eq(mallctl("thread.tcache.max",
-	    NULL, NULL, (void *)&tcache_max, sizeof(size_t)),.0,
-	    "Unexpected.mallctl().failure");
+	assert_d_eq(mallctl("thread.tcache.max", NULL, NULL,
+	                (void *)&tcache_max, sizeof(size_t)),
+	    .0, "Unexpected.mallctl().failure");
 	for (szind_t i = sz_size2index(1024) + 1; i < TCACHE_NBINS_MAX; i++) {
 		cache_bin_info_init(&tcache_bin_info[i], 0);
 	}
 	strcpy(inputs, "2048-6144:123");
 	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
-	    (void *)&inputp, sizeof(char *)), 0,
-	    "Unexpected mallctlbymib() failure");
+	                (void *)&inputp, sizeof(char *)),
+	    0, "Unexpected mallctlbymib() failure");
 	check_bins_info(tcache_bin_info);
 
 	tcache_max = 6144;
-	assert_d_eq(mallctl("thread.tcache.max",
-	    NULL, NULL, (void *)&tcache_max, sizeof(size_t)),.0,
-	    "Unexpected.mallctl().failure");
+	assert_d_eq(mallctl("thread.tcache.max", NULL, NULL,
+	                (void *)&tcache_max, sizeof(size_t)),
+	    .0, "Unexpected.mallctl().failure");
 	memcpy(tcache_bin_info, tcache_bin_info_backup,
 	    sizeof(tcache_bin_info_backup));
 	for (szind_t i = sz_size2index(2048); i < TCACHE_NBINS_MAX; i++) {
@@ -182,15 +184,15 @@ ncached_max_check(void* args) {
 	/* Test an empty input, it should do nothing. */
 	strcpy(inputs, "");
 	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
-	    (void *)&inputp, sizeof(char *)), 0,
-	    "Unexpected mallctlbymib() failure");
+	                (void *)&inputp, sizeof(char *)),
+	    0, "Unexpected mallctlbymib() failure");
 	check_bins_info(tcache_bin_info);
 
 	/* Test a half-done string, it should return EINVAL and do nothing. */
 	strcpy(inputs, "4-1024:7|256-1024");
 	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
-	    (void *)&inputp, sizeof(char *)), EINVAL,
-	    "Unexpected mallctlbymib() failure");
+	                (void *)&inputp, sizeof(char *)),
+	    EINVAL, "Unexpected mallctlbymib() failure");
 	check_bins_info(tcache_bin_info);
 
 	/*
@@ -199,8 +201,8 @@ ncached_max_check(void* args) {
 	 */
 	strcpy(inputs, "1024-256:7");
 	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
-	    (void *)&inputp, sizeof(char *)), 0,
-	    "Unexpected mallctlbymib() failure");
+	                (void *)&inputp, sizeof(char *)),
+	    0, "Unexpected mallctlbymib() failure");
 	check_bins_info(tcache_bin_info);
 
 	/*
@@ -216,8 +218,8 @@ ncached_max_check(void* args) {
 	long_inputs[200 * 9 + 8] = '\0';
 	inputp = long_inputs;
 	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
-	    (void *)&inputp, sizeof(char *)), EINVAL,
-	    "Unexpected mallctlbymib() failure");
+	                (void *)&inputp, sizeof(char *)),
+	    EINVAL, "Unexpected mallctlbymib() failure");
 	check_bins_info(tcache_bin_info);
 	free(long_inputs);
 
@@ -228,17 +230,17 @@ ncached_max_check(void* args) {
 	strcpy(inputs, "k8-1024:77p");
 	inputp = inputs;
 	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
-	    (void *)&inputp, sizeof(char *)), EINVAL,
-	    "Unexpected mallctlbymib() failure");
+	                (void *)&inputp, sizeof(char *)),
+	    EINVAL, "Unexpected mallctlbymib() failure");
 	check_bins_info(tcache_bin_info);
 
 	/* Test large ncached_max, it should return success but capped. */
 	strcpy(inputs, "1024-1024:65540");
 	expect_d_eq(mallctlbymib(mib_set, mib_set_len, NULL, NULL,
-	    (void *)&inputp, sizeof(char *)), 0,
-	    "Unexpected mallctlbymib() failure");
-	cache_bin_info_init(&tcache_bin_info[sz_size2index(1024)],
-	    CACHE_BIN_NCACHED_MAX);
+	                (void *)&inputp, sizeof(char *)),
+	    0, "Unexpected mallctlbymib() failure");
+	cache_bin_info_init(
+	    &tcache_bin_info[sz_size2index(1024)], CACHE_BIN_NCACHED_MAX);
 	check_bins_info(tcache_bin_info);
 
 	return NULL;
@@ -262,7 +264,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_ncached_max);
+	return test(test_ncached_max);
 }
-
diff --git a/test/unit/nstime.c b/test/unit/nstime.c
index 43fd3954..8c095d09 100644
--- a/test/unit/nstime.c
+++ b/test/unit/nstime.c
@@ -1,6 +1,6 @@
 #include "test/jemalloc_test.h"
 
-#define BILLION	UINT64_C(1000000000)
+#define BILLION UINT64_C(1000000000)
 
 TEST_BEGIN(test_nstime_init) {
 	nstime_t nst;
@@ -43,24 +43,24 @@ TEST_BEGIN(test_nstime_compare) {
 	nstime_init2(&nstb, 42, 42);
 	expect_d_eq(nstime_compare(&nsta, &nstb), 1,
 	    "nsta should be greater than nstb");
-	expect_d_eq(nstime_compare(&nstb, &nsta), -1,
-	    "nstb should be less than nsta");
+	expect_d_eq(
+	    nstime_compare(&nstb, &nsta), -1, "nstb should be less than nsta");
 
 	nstime_init2(&nstb, 42, 44);
-	expect_d_eq(nstime_compare(&nsta, &nstb), -1,
-	    "nsta should be less than nstb");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), -1, "nsta should be less than nstb");
 	expect_d_eq(nstime_compare(&nstb, &nsta), 1,
 	    "nstb should be greater than nsta");
 
 	nstime_init2(&nstb, 41, BILLION - 1);
 	expect_d_eq(nstime_compare(&nsta, &nstb), 1,
 	    "nsta should be greater than nstb");
-	expect_d_eq(nstime_compare(&nstb, &nsta), -1,
-	    "nstb should be less than nsta");
+	expect_d_eq(
+	    nstime_compare(&nstb, &nsta), -1, "nstb should be less than nsta");
 
 	nstime_init2(&nstb, 43, 0);
-	expect_d_eq(nstime_compare(&nsta, &nstb), -1,
-	    "nsta should be less than nstb");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), -1, "nsta should be less than nstb");
 	expect_d_eq(nstime_compare(&nstb, &nsta), 1,
 	    "nstb should be greater than nsta");
 }
@@ -73,15 +73,15 @@ TEST_BEGIN(test_nstime_add) {
 	nstime_copy(&nstb, &nsta);
 	nstime_add(&nsta, &nstb);
 	nstime_init2(&nstb, 84, 86);
-	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
-	    "Incorrect addition result");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), 0, "Incorrect addition result");
 
 	nstime_init2(&nsta, 42, BILLION - 1);
 	nstime_copy(&nstb, &nsta);
 	nstime_add(&nsta, &nstb);
 	nstime_init2(&nstb, 85, BILLION - 2);
-	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
-	    "Incorrect addition result");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), 0, "Incorrect addition result");
 }
 TEST_END
 
@@ -91,14 +91,14 @@ TEST_BEGIN(test_nstime_iadd) {
 	nstime_init2(&nsta, 42, BILLION - 1);
 	nstime_iadd(&nsta, 1);
 	nstime_init2(&nstb, 43, 0);
-	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
-	    "Incorrect addition result");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), 0, "Incorrect addition result");
 
 	nstime_init2(&nsta, 42, 1);
 	nstime_iadd(&nsta, BILLION + 1);
 	nstime_init2(&nstb, 43, 2);
-	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
-	    "Incorrect addition result");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), 0, "Incorrect addition result");
 }
 TEST_END
 
@@ -109,15 +109,15 @@ TEST_BEGIN(test_nstime_subtract) {
 	nstime_copy(&nstb, &nsta);
 	nstime_subtract(&nsta, &nstb);
 	nstime_init_zero(&nstb);
-	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
-	    "Incorrect subtraction result");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), 0, "Incorrect subtraction result");
 
 	nstime_init2(&nsta, 42, 43);
 	nstime_init2(&nstb, 41, 44);
 	nstime_subtract(&nsta, &nstb);
 	nstime_init2(&nstb, 0, BILLION - 1);
-	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
-	    "Incorrect subtraction result");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), 0, "Incorrect subtraction result");
 }
 TEST_END
 
@@ -125,16 +125,16 @@ TEST_BEGIN(test_nstime_isubtract) {
 	nstime_t nsta, nstb;
 
 	nstime_init2(&nsta, 42, 43);
-	nstime_isubtract(&nsta, 42*BILLION + 43);
+	nstime_isubtract(&nsta, 42 * BILLION + 43);
 	nstime_init_zero(&nstb);
-	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
-	    "Incorrect subtraction result");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), 0, "Incorrect subtraction result");
 
 	nstime_init2(&nsta, 42, 43);
-	nstime_isubtract(&nsta, 41*BILLION + 44);
+	nstime_isubtract(&nsta, 41 * BILLION + 44);
 	nstime_init2(&nstb, 0, BILLION - 1);
-	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
-	    "Incorrect subtraction result");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), 0, "Incorrect subtraction result");
 }
 TEST_END
 
@@ -144,14 +144,14 @@ TEST_BEGIN(test_nstime_imultiply) {
 	nstime_init2(&nsta, 42, 43);
 	nstime_imultiply(&nsta, 10);
 	nstime_init2(&nstb, 420, 430);
-	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
-	    "Incorrect multiplication result");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), 0, "Incorrect multiplication result");
 
 	nstime_init2(&nsta, 42, 666666666);
 	nstime_imultiply(&nsta, 3);
 	nstime_init2(&nstb, 127, 999999998);
-	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
-	    "Incorrect multiplication result");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), 0, "Incorrect multiplication result");
 }
 TEST_END
 
@@ -162,15 +162,15 @@ TEST_BEGIN(test_nstime_idivide) {
 	nstime_copy(&nstb, &nsta);
 	nstime_imultiply(&nsta, 10);
 	nstime_idivide(&nsta, 10);
-	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
-	    "Incorrect division result");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), 0, "Incorrect division result");
 
 	nstime_init2(&nsta, 42, 666666666);
 	nstime_copy(&nstb, &nsta);
 	nstime_imultiply(&nsta, 3);
 	nstime_idivide(&nsta, 3);
-	expect_d_eq(nstime_compare(&nsta, &nstb), 0,
-	    "Incorrect division result");
+	expect_d_eq(
+	    nstime_compare(&nsta, &nstb), 0, "Incorrect division result");
 }
 TEST_END
 
@@ -180,24 +180,24 @@ TEST_BEGIN(test_nstime_divide) {
 	nstime_init2(&nsta, 42, 43);
 	nstime_copy(&nstb, &nsta);
 	nstime_imultiply(&nsta, 10);
-	expect_u64_eq(nstime_divide(&nsta, &nstb), 10,
-	    "Incorrect division result");
+	expect_u64_eq(
+	    nstime_divide(&nsta, &nstb), 10, "Incorrect division result");
 
 	nstime_init2(&nsta, 42, 43);
 	nstime_copy(&nstb, &nsta);
 	nstime_imultiply(&nsta, 10);
 	nstime_init(&nstc, 1);
 	nstime_add(&nsta, &nstc);
-	expect_u64_eq(nstime_divide(&nsta, &nstb), 10,
-	    "Incorrect division result");
+	expect_u64_eq(
+	    nstime_divide(&nsta, &nstb), 10, "Incorrect division result");
 
 	nstime_init2(&nsta, 42, 43);
 	nstime_copy(&nstb, &nsta);
 	nstime_imultiply(&nsta, 10);
 	nstime_init(&nstc, 1);
 	nstime_subtract(&nsta, &nstc);
-	expect_u64_eq(nstime_divide(&nsta, &nstb), 9,
-	    "Incorrect division result");
+	expect_u64_eq(
+	    nstime_divide(&nsta, &nstb), 9, "Incorrect division result");
 }
 TEST_END
 
@@ -213,8 +213,8 @@ test_nstime_since_once(nstime_t *t) {
 	nstime_copy(&new_t, t);
 	nstime_subtract(&new_t, &old_t);
 
-	expect_u64_ge(nstime_ns(&new_t), ns_since,
-	    "Incorrect time since result");
+	expect_u64_ge(
+	    nstime_ns(&new_t), ns_since, "Incorrect time since result");
 }
 
 TEST_BEGIN(test_nstime_ns_since) {
@@ -253,19 +253,9 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_nstime_init,
-	    test_nstime_init2,
-	    test_nstime_copy,
-	    test_nstime_compare,
-	    test_nstime_add,
-	    test_nstime_iadd,
-	    test_nstime_subtract,
-	    test_nstime_isubtract,
-	    test_nstime_imultiply,
-	    test_nstime_idivide,
-	    test_nstime_divide,
-	    test_nstime_ns_since,
-	    test_nstime_ms_since,
-	    test_nstime_monotonic);
+	return test(test_nstime_init, test_nstime_init2, test_nstime_copy,
+	    test_nstime_compare, test_nstime_add, test_nstime_iadd,
+	    test_nstime_subtract, test_nstime_isubtract, test_nstime_imultiply,
+	    test_nstime_idivide, test_nstime_divide, test_nstime_ns_since,
+	    test_nstime_ms_since, test_nstime_monotonic);
 }
diff --git a/test/unit/oversize_threshold.c b/test/unit/oversize_threshold.c
index 95ce6537..5d9aae10 100644
--- a/test/unit/oversize_threshold.c
+++ b/test/unit/oversize_threshold.c
@@ -5,7 +5,7 @@
 static void
 arena_mallctl(const char *mallctl_str, unsigned arena, void *oldp,
     size_t *oldlen, void *newp, size_t newlen) {
-	int err;
+	int  err;
 	char buf[100];
 	malloc_snprintf(buf, sizeof(buf), mallctl_str, arena);
 
@@ -14,13 +14,13 @@ arena_mallctl(const char *mallctl_str, unsigned arena, void *oldp,
 }
 
 TEST_BEGIN(test_oversize_threshold_get_set) {
-	int err;
+	int    err;
 	size_t old_threshold;
 	size_t new_threshold;
 	size_t threshold_sz = sizeof(old_threshold);
 
 	unsigned arena;
-	size_t arena_sz = sizeof(arena);
+	size_t   arena_sz = sizeof(arena);
 	err = mallctl("arenas.create", (void *)&arena, &arena_sz, NULL, 0);
 	expect_d_eq(0, err, "Arena creation failed");
 
@@ -38,13 +38,14 @@ TEST_BEGIN(test_oversize_threshold_get_set) {
 	/* Just a read */
 	arena_mallctl("arena.%u.oversize_threshold", arena, &old_threshold,
 	    &threshold_sz, NULL, 0);
-	expect_zu_eq(2 * 1024 * 1024, old_threshold, "Should have read old value");
+	expect_zu_eq(
+	    2 * 1024 * 1024, old_threshold, "Should have read old value");
 }
 TEST_END
 
 static size_t max_purged = 0;
 static bool
-purge_forced_record_max(extent_hooks_t* hooks, void *addr, size_t sz,
+purge_forced_record_max(extent_hooks_t *hooks, void *addr, size_t sz,
     size_t offset, size_t length, unsigned arena_ind) {
 	if (length > max_purged) {
 		max_purged = length;
@@ -73,7 +74,7 @@ TEST_BEGIN(test_oversize_threshold) {
 	int err;
 
 	unsigned arena;
-	size_t arena_sz = sizeof(arena);
+	size_t   arena_sz = sizeof(arena);
 	err = mallctl("arenas.create", (void *)&arena, &arena_sz, NULL, 0);
 	expect_d_eq(0, err, "Arena creation failed");
 	arena_mallctl("arena.%u.extent_hooks", arena, NULL, NULL, &extent_hooks,
@@ -121,8 +122,8 @@ TEST_BEGIN(test_oversize_threshold) {
 	ptr = mallocx(2 * 1024 * 1024, MALLOCX_ARENA(arena));
 	dallocx(ptr, MALLOCX_TCACHE_NONE);
 	if (!is_background_thread_enabled()) {
-		expect_zu_ge(max_purged, 2 * 1024 * 1024,
-		    "Expected a 2MB purge");
+		expect_zu_ge(
+		    max_purged, 2 * 1024 * 1024, "Expected a 2MB purge");
 	}
 }
 TEST_END
@@ -130,7 +131,5 @@ TEST_END
 int
 main(void) {
 	return test_no_reentrancy(
-	    test_oversize_threshold_get_set,
-	    test_oversize_threshold);
+	    test_oversize_threshold_get_set, test_oversize_threshold);
 }
-
diff --git a/test/unit/pa.c b/test/unit/pa.c
index d44bb95c..8552225f 100644
--- a/test/unit/pa.c
+++ b/test/unit/pa.c
@@ -16,8 +16,8 @@ merge_hook(extent_hooks_t *extent_hooks, void *addr_a, size_t size_a,
 }
 
 static bool
-split_hook(extent_hooks_t *extent_hooks, void *addr, size_t size,
-    size_t size_a, size_t size_b, bool committed, unsigned arena_ind) {
+split_hook(extent_hooks_t *extent_hooks, void *addr, size_t size, size_t size_a,
+    size_t size_b, bool committed, unsigned arena_ind) {
 	return !maps_coalesce;
 }
 
@@ -39,13 +39,13 @@ init_test_extent_hooks(extent_hooks_t *hooks) {
 
 typedef struct test_data_s test_data_t;
 struct test_data_s {
-	pa_shard_t shard;
-	pa_central_t central;
-	base_t *base;
-	emap_t emap;
+	pa_shard_t       shard;
+	pa_central_t     central;
+	base_t          *base;
+	emap_t           emap;
 	pa_shard_stats_t stats;
-	malloc_mutex_t stats_mtx;
-	extent_hooks_t hooks;
+	malloc_mutex_t   stats_mtx;
+	extent_hooks_t   hooks;
 };
 
 static test_data_t *
@@ -66,8 +66,8 @@ init_test_data(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 	nstime_t time;
 	nstime_init(&time, 0);
 
-	err = pa_central_init(&test_data->central, base, opt_hpa,
-	    &hpa_hooks_default);
+	err = pa_central_init(
+	    &test_data->central, base, opt_hpa, &hpa_hooks_default);
 	assert_false(err, "");
 
 	const size_t pa_oversize_threshold = 8 * 1024 * 1024;
@@ -80,7 +80,8 @@ init_test_data(ssize_t dirty_decay_ms, ssize_t muzzy_decay_ms) {
 	return test_data;
 }
 
-void destroy_test_data(test_data_t *data) {
+void
+destroy_test_data(test_data_t *data) {
 	base_delete(TSDN_NULL, data->base);
 	free(data);
 }
@@ -89,28 +90,28 @@ static void *
 do_alloc_free_purge(void *arg) {
 	test_data_t *test_data = (test_data_t *)arg;
 	for (int i = 0; i < 10 * 1000; i++) {
-		bool deferred_work_generated = false;
+		bool     deferred_work_generated = false;
 		edata_t *edata = pa_alloc(TSDN_NULL, &test_data->shard, PAGE,
 		    PAGE, /* slab */ false, /* szind */ 0, /* zero */ false,
 		    /* guarded */ false, &deferred_work_generated);
 		assert_ptr_not_null(edata, "");
 		pa_dalloc(TSDN_NULL, &test_data->shard, edata,
 		    &deferred_work_generated);
-		malloc_mutex_lock(TSDN_NULL,
-		    &test_data->shard.pac.decay_dirty.mtx);
+		malloc_mutex_lock(
+		    TSDN_NULL, &test_data->shard.pac.decay_dirty.mtx);
 		pac_decay_all(TSDN_NULL, &test_data->shard.pac,
 		    &test_data->shard.pac.decay_dirty,
 		    &test_data->shard.pac.stats->decay_dirty,
 		    &test_data->shard.pac.ecache_dirty, true);
-		malloc_mutex_unlock(TSDN_NULL,
-		    &test_data->shard.pac.decay_dirty.mtx);
+		malloc_mutex_unlock(
+		    TSDN_NULL, &test_data->shard.pac.decay_dirty.mtx);
 	}
 	return NULL;
 }
 
 TEST_BEGIN(test_alloc_free_purge_thds) {
 	test_data_t *test_data = init_test_data(0, 0);
-	thd_t thds[4];
+	thd_t        thds[4];
 	for (int i = 0; i < 4; i++) {
 		thd_create(&thds[i], do_alloc_free_purge, test_data);
 	}
@@ -122,6 +123,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_alloc_free_purge_thds);
+	return test(test_alloc_free_purge_thds);
 }
diff --git a/test/unit/pack.c b/test/unit/pack.c
index e6392825..e3024512 100644
--- a/test/unit/pack.c
+++ b/test/unit/pack.c
@@ -4,9 +4,9 @@
  * Size class that is a divisor of the page size, ideally 4+ regions per run.
  */
 #if LG_PAGE <= 14
-#define SZ	(ZU(1) << (LG_PAGE - 2))
+#	define SZ (ZU(1) << (LG_PAGE - 2))
 #else
-#define SZ	ZU(4096)
+#	define SZ ZU(4096)
 #endif
 
 /*
@@ -14,11 +14,11 @@
  * if mmap()ed memory grows downward, downward growth of mmap()ed memory is
  * tested.
  */
-#define NSLABS	8
+#define NSLABS 8
 
 static unsigned
 binind_compute(void) {
-	size_t sz;
+	size_t   sz;
 	unsigned nbins, i;
 
 	sz = sizeof(nbins);
@@ -27,16 +27,17 @@ binind_compute(void) {
 
 	for (i = 0; i < nbins; i++) {
 		size_t mib[4];
-		size_t miblen = sizeof(mib)/sizeof(size_t);
+		size_t miblen = sizeof(mib) / sizeof(size_t);
 		size_t size;
 
-		expect_d_eq(mallctlnametomib("arenas.bin.0.size", mib,
-		    &miblen), 0, "Unexpected mallctlnametomb failure");
+		expect_d_eq(mallctlnametomib("arenas.bin.0.size", mib, &miblen),
+		    0, "Unexpected mallctlnametomb failure");
 		mib[2] = (size_t)i;
 
 		sz = sizeof(size);
-		expect_d_eq(mallctlbymib(mib, miblen, (void *)&size, &sz, NULL,
-		    0), 0, "Unexpected mallctlbymib failure");
+		expect_d_eq(
+		    mallctlbymib(mib, miblen, (void *)&size, &sz, NULL, 0), 0,
+		    "Unexpected mallctlbymib failure");
 		if (size == SZ) {
 			return i;
 		}
@@ -49,24 +50,24 @@ binind_compute(void) {
 static size_t
 nregs_per_run_compute(void) {
 	uint32_t nregs;
-	size_t sz;
+	size_t   sz;
 	unsigned binind = binind_compute();
-	size_t mib[4];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
+	size_t   mib[4];
+	size_t   miblen = sizeof(mib) / sizeof(size_t);
 
 	expect_d_eq(mallctlnametomib("arenas.bin.0.nregs", mib, &miblen), 0,
 	    "Unexpected mallctlnametomb failure");
 	mib[2] = (size_t)binind;
 	sz = sizeof(nregs);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&nregs, &sz, NULL,
-	    0), 0, "Unexpected mallctlbymib failure");
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&nregs, &sz, NULL, 0), 0,
+	    "Unexpected mallctlbymib failure");
 	return nregs;
 }
 
 static unsigned
 arenas_create_mallctl(void) {
 	unsigned arena_ind;
-	size_t sz;
+	size_t   sz;
 
 	sz = sizeof(arena_ind);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
@@ -78,7 +79,7 @@ arenas_create_mallctl(void) {
 static void
 arena_reset_mallctl(unsigned arena_ind) {
 	size_t mib[3];
-	size_t miblen = sizeof(mib)/sizeof(size_t);
+	size_t miblen = sizeof(mib) / sizeof(size_t);
 
 	expect_d_eq(mallctlnametomib("arena.0.reset", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
@@ -88,23 +89,23 @@ arena_reset_mallctl(unsigned arena_ind) {
 }
 
 TEST_BEGIN(test_pack) {
-	bool prof_enabled;
+	bool   prof_enabled;
 	size_t sz = sizeof(prof_enabled);
 	if (mallctl("opt.prof", (void *)&prof_enabled, &sz, NULL, 0) == 0) {
 		test_skip_if(prof_enabled);
 	}
 
 	unsigned arena_ind = arenas_create_mallctl();
-	size_t nregs_per_run = nregs_per_run_compute();
-	size_t nregs = nregs_per_run * NSLABS;
+	size_t   nregs_per_run = nregs_per_run_compute();
+	size_t   nregs = nregs_per_run * NSLABS;
 	VARIABLE_ARRAY(void *, ptrs, nregs);
 	size_t i, j, offset;
 
 	/* Fill matrix. */
 	for (i = offset = 0; i < NSLABS; i++) {
 		for (j = 0; j < nregs_per_run; j++) {
-			void *p = mallocx(SZ, MALLOCX_ARENA(arena_ind) |
-			    MALLOCX_TCACHE_NONE);
+			void *p = mallocx(
+			    SZ, MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
 			expect_ptr_not_null(p,
 			    "Unexpected mallocx(%zu, MALLOCX_ARENA(%u) |"
 			    " MALLOCX_TCACHE_NONE) failure, run=%zu, reg=%zu",
@@ -119,16 +120,15 @@ TEST_BEGIN(test_pack) {
 	 * layout policy.
 	 */
 	offset = 0;
-	for (i = offset = 0;
-	    i < NSLABS;
-	    i++, offset = (offset + 1) % nregs_per_run) {
+	for (i = offset = 0; i < NSLABS;
+	     i++, offset = (offset + 1) % nregs_per_run) {
 		for (j = 0; j < nregs_per_run; j++) {
 			void *p = ptrs[(i * nregs_per_run) + j];
 			if (offset == j) {
 				continue;
 			}
-			dallocx(p, MALLOCX_ARENA(arena_ind) |
-			    MALLOCX_TCACHE_NONE);
+			dallocx(
+			    p, MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
 		}
 	}
 
@@ -137,17 +137,16 @@ TEST_BEGIN(test_pack) {
 	 * that the matrix is unmodified.
 	 */
 	offset = 0;
-	for (i = offset = 0;
-	    i < NSLABS;
-	    i++, offset = (offset + 1) % nregs_per_run) {
+	for (i = offset = 0; i < NSLABS;
+	     i++, offset = (offset + 1) % nregs_per_run) {
 		for (j = 0; j < nregs_per_run; j++) {
 			void *p;
 
 			if (offset == j) {
 				continue;
 			}
-			p = mallocx(SZ, MALLOCX_ARENA(arena_ind) |
-			    MALLOCX_TCACHE_NONE);
+			p = mallocx(
+			    SZ, MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
 			expect_ptr_eq(p, ptrs[(i * nregs_per_run) + j],
 			    "Unexpected refill discrepancy, run=%zu, reg=%zu\n",
 			    i, j);
@@ -161,6 +160,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_pack);
+	return test(test_pack);
 }
diff --git a/test/unit/pages.c b/test/unit/pages.c
index 8dfd1a72..dbee2f0c 100644
--- a/test/unit/pages.c
+++ b/test/unit/pages.c
@@ -2,8 +2,8 @@
 
 TEST_BEGIN(test_pages_huge) {
 	size_t alloc_size;
-	bool commit;
-	void *pages, *hugepage;
+	bool   commit;
+	void  *pages, *hugepage;
 
 	alloc_size = HUGEPAGE * 2 - PAGE;
 	commit = true;
@@ -11,11 +11,12 @@ TEST_BEGIN(test_pages_huge) {
 	expect_ptr_not_null(pages, "Unexpected pages_map() error");
 
 	if (init_system_thp_mode == thp_mode_default) {
-	    hugepage = (void *)(ALIGNMENT_CEILING((uintptr_t)pages, HUGEPAGE));
-	    expect_b_ne(pages_huge(hugepage, HUGEPAGE), have_madvise_huge,
-	        "Unexpected pages_huge() result");
-	    expect_false(pages_nohuge(hugepage, HUGEPAGE),
-	        "Unexpected pages_nohuge() result");
+		hugepage = (void *)(ALIGNMENT_CEILING(
+		    (uintptr_t)pages, HUGEPAGE));
+		expect_b_ne(pages_huge(hugepage, HUGEPAGE), have_madvise_huge,
+		    "Unexpected pages_huge() result");
+		expect_false(pages_nohuge(hugepage, HUGEPAGE),
+		    "Unexpected pages_nohuge() result");
 	}
 
 	pages_unmap(pages, alloc_size);
@@ -24,6 +25,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_pages_huge);
+	return test(test_pages_huge);
 }
diff --git a/test/unit/peak.c b/test/unit/peak.c
index 11129785..80eda30d 100644
--- a/test/unit/peak.c
+++ b/test/unit/peak.c
@@ -4,11 +4,10 @@
 
 TEST_BEGIN(test_peak) {
 	peak_t peak = PEAK_INITIALIZER;
-	expect_u64_eq(0, peak_max(&peak),
-	    "Peak should be zero at initialization");
+	expect_u64_eq(
+	    0, peak_max(&peak), "Peak should be zero at initialization");
 	peak_update(&peak, 100, 50);
-	expect_u64_eq(50, peak_max(&peak),
-	    "Missed update");
+	expect_u64_eq(50, peak_max(&peak), "Missed update");
 	peak_update(&peak, 100, 100);
 	expect_u64_eq(50, peak_max(&peak), "Dallocs shouldn't change peak");
 	peak_update(&peak, 100, 200);
@@ -42,6 +41,5 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_peak);
+	return test_no_reentrancy(test_peak);
 }
diff --git a/test/unit/ph.c b/test/unit/ph.c
index 0339f993..c9e4da9c 100644
--- a/test/unit/ph.c
+++ b/test/unit/ph.c
@@ -8,9 +8,9 @@ ph_structs(heap, node_t, BFS_ENUMERATE_MAX);
 
 struct node_s {
 #define NODE_MAGIC 0x9823af7e
-	uint32_t magic;
+	uint32_t    magic;
 	heap_link_t link;
-	uint64_t key;
+	uint64_t    key;
 };
 
 static int
@@ -31,7 +31,6 @@ node_cmp(const node_t *a, const node_t *b) {
 
 static int
 node_cmp_magic(const node_t *a, const node_t *b) {
-
 	expect_u32_eq(a->magic, NODE_MAGIC, "Bad magic");
 	expect_u32_eq(b->magic, NODE_MAGIC, "Bad magic");
 
@@ -58,12 +57,12 @@ node_lchild_get(const node_t *node) {
 static void
 node_print(const node_t *node, unsigned depth) {
 	unsigned i;
-	node_t *leftmost_child, *sibling;
+	node_t  *leftmost_child, *sibling;
 
 	for (i = 0; i < depth; i++) {
 		malloc_printf("\t");
 	}
-	malloc_printf("%2"FMTu64"\n", node->key);
+	malloc_printf("%2" FMTu64 "\n", node->key);
 
 	leftmost_child = node_lchild_get(node);
 	if (leftmost_child == NULL) {
@@ -71,8 +70,8 @@ node_print(const node_t *node, unsigned depth) {
 	}
 	node_print(leftmost_child, depth + 1);
 
-	for (sibling = node_next_get(leftmost_child); sibling !=
-	    NULL; sibling = node_next_get(sibling)) {
+	for (sibling = node_next_get(leftmost_child); sibling != NULL;
+	     sibling = node_next_get(sibling)) {
 		node_print(sibling, depth + 1);
 	}
 }
@@ -89,7 +88,7 @@ heap_print(const heap_t *heap) {
 	node_print(heap->ph.root, 0);
 
 	for (auxelm = node_next_get(heap->ph.root); auxelm != NULL;
-	    auxelm = node_next_get(auxelm)) {
+	     auxelm = node_next_get(auxelm)) {
 		expect_ptr_eq(node_next_get(node_prev_get(auxelm)), auxelm,
 		    "auxelm's prev doesn't link to auxelm");
 		node_print(auxelm, 0);
@@ -102,7 +101,7 @@ label_return:
 static unsigned
 node_validate(const node_t *node, const node_t *parent) {
 	unsigned nnodes = 1;
-	node_t *leftmost_child, *sibling;
+	node_t  *leftmost_child, *sibling;
 
 	if (parent != NULL) {
 		expect_d_ge(node_cmp_magic(node, parent), 0,
@@ -113,12 +112,12 @@ node_validate(const node_t *node, const node_t *parent) {
 	if (leftmost_child == NULL) {
 		return nnodes;
 	}
-	expect_ptr_eq(node_prev_get(leftmost_child),
-	    (void *)node, "Leftmost child does not link to node");
+	expect_ptr_eq(node_prev_get(leftmost_child), (void *)node,
+	    "Leftmost child does not link to node");
 	nnodes += node_validate(leftmost_child, node);
 
-	for (sibling = node_next_get(leftmost_child); sibling !=
-	    NULL; sibling = node_next_get(sibling)) {
+	for (sibling = node_next_get(leftmost_child); sibling != NULL;
+	     sibling = node_next_get(sibling)) {
 		expect_ptr_eq(node_next_get(node_prev_get(sibling)), sibling,
 		    "sibling's prev doesn't link to sibling");
 		nnodes += node_validate(sibling, node);
@@ -129,7 +128,7 @@ node_validate(const node_t *node, const node_t *parent) {
 static unsigned
 heap_validate(const heap_t *heap) {
 	unsigned nnodes = 0;
-	node_t *auxelm;
+	node_t  *auxelm;
 
 	if (heap->ph.root == NULL) {
 		goto label_return;
@@ -138,7 +137,7 @@ heap_validate(const heap_t *heap) {
 	nnodes += node_validate(heap->ph.root, NULL);
 
 	for (auxelm = node_next_get(heap->ph.root); auxelm != NULL;
-	    auxelm = node_next_get(auxelm)) {
+	     auxelm = node_next_get(auxelm)) {
 		expect_ptr_eq(node_next_get(node_prev_get(auxelm)), auxelm,
 		    "auxelm's prev doesn't link to auxelm");
 		nnodes += node_validate(auxelm, NULL);
@@ -186,10 +185,10 @@ TEST_BEGIN(test_ph_random) {
 #define NNODES 25
 #define NBAGS 250
 #define SEED 42
-	sfmt_t *sfmt;
+	sfmt_t  *sfmt;
 	uint64_t bag[NNODES];
-	heap_t heap;
-	node_t nodes[NNODES];
+	heap_t   heap;
+	node_t   nodes[NNODES];
 	unsigned i, j, k;
 
 	sfmt = init_gen_rand(SEED);
@@ -216,8 +215,8 @@ TEST_BEGIN(test_ph_random) {
 		for (j = 1; j <= NNODES; j++) {
 			/* Initialize heap and nodes. */
 			heap_new(&heap);
-			expect_u_eq(heap_validate(&heap), 0,
-			    "Incorrect node count");
+			expect_u_eq(
+			    heap_validate(&heap), 0, "Incorrect node count");
 			for (k = 0; k < j; k++) {
 				nodes[k].magic = NODE_MAGIC;
 				nodes[k].key = bag[k];
@@ -237,8 +236,8 @@ TEST_BEGIN(test_ph_random) {
 				    "Incorrect node count");
 			}
 
-			expect_false(heap_empty(&heap),
-			    "Heap should not be empty");
+			expect_false(
+			    heap_empty(&heap), "Heap should not be empty");
 
 			/* Enumerate nodes. */
 			heap_enumerate_helper_t helper;
@@ -247,14 +246,14 @@ TEST_BEGIN(test_ph_random) {
 			expect_u_eq(max_queue_size, BFS_ENUMERATE_MAX,
 			    "Incorrect bfs queue length initialized");
 			assert(max_queue_size == BFS_ENUMERATE_MAX);
-			heap_enumerate_prepare(&heap, &helper,
-			    BFS_ENUMERATE_MAX, max_queue_size);
+			heap_enumerate_prepare(
+			    &heap, &helper, BFS_ENUMERATE_MAX, max_queue_size);
 			size_t node_count = 0;
-			while(heap_enumerate_next(&heap, &helper)) {
-				node_count ++;
+			while (heap_enumerate_next(&heap, &helper)) {
+				node_count++;
 			}
-			expect_lu_eq(node_count, j,
-			    "Unexpected enumeration results.");
+			expect_lu_eq(
+			    node_count, j, "Unexpected enumeration results.");
 
 			/* Remove nodes. */
 			switch (i % 6) {
@@ -263,13 +262,13 @@ TEST_BEGIN(test_ph_random) {
 					expect_u_eq(heap_validate(&heap), j - k,
 					    "Incorrect node count");
 					node_remove(&heap, &nodes[k]);
-					expect_u_eq(heap_validate(&heap), j - k
-					    - 1, "Incorrect node count");
+					expect_u_eq(heap_validate(&heap),
+					    j - k - 1, "Incorrect node count");
 				}
 				break;
 			case 1:
 				for (k = j; k > 0; k--) {
-					node_remove(&heap, &nodes[k-1]);
+					node_remove(&heap, &nodes[k - 1]);
 					expect_u_eq(heap_validate(&heap), k - 1,
 					    "Incorrect node count");
 				}
@@ -278,58 +277,62 @@ TEST_BEGIN(test_ph_random) {
 				node_t *prev = NULL;
 				for (k = 0; k < j; k++) {
 					node_t *node = node_remove_first(&heap);
-					expect_u_eq(heap_validate(&heap), j - k
-					    - 1, "Incorrect node count");
+					expect_u_eq(heap_validate(&heap),
+					    j - k - 1, "Incorrect node count");
 					if (prev != NULL) {
-						expect_d_ge(node_cmp(node,
-						    prev), 0,
+						expect_d_ge(
+						    node_cmp(node, prev), 0,
 						    "Bad removal order");
 					}
 					prev = node;
 				}
 				break;
-			} case 3: {
+			}
+			case 3: {
 				node_t *prev = NULL;
 				for (k = 0; k < j; k++) {
 					node_t *node = heap_first(&heap);
 					expect_u_eq(heap_validate(&heap), j - k,
 					    "Incorrect node count");
 					if (prev != NULL) {
-						expect_d_ge(node_cmp(node,
-						    prev), 0,
+						expect_d_ge(
+						    node_cmp(node, prev), 0,
 						    "Bad removal order");
 					}
 					node_remove(&heap, node);
-					expect_u_eq(heap_validate(&heap), j - k
-					    - 1, "Incorrect node count");
+					expect_u_eq(heap_validate(&heap),
+					    j - k - 1, "Incorrect node count");
 					prev = node;
 				}
 				break;
-			} case 4: {
+			}
+			case 4: {
 				for (k = 0; k < j; k++) {
 					node_remove_any(&heap);
-					expect_u_eq(heap_validate(&heap), j - k
-					    - 1, "Incorrect node count");
+					expect_u_eq(heap_validate(&heap),
+					    j - k - 1, "Incorrect node count");
 				}
 				break;
-			} case 5: {
+			}
+			case 5: {
 				for (k = 0; k < j; k++) {
 					node_t *node = heap_any(&heap);
 					expect_u_eq(heap_validate(&heap), j - k,
 					    "Incorrect node count");
 					node_remove(&heap, node);
-					expect_u_eq(heap_validate(&heap), j - k
-					    - 1, "Incorrect node count");
+					expect_u_eq(heap_validate(&heap),
+					    j - k - 1, "Incorrect node count");
 				}
 				break;
-			} default:
+			}
+			default:
 				not_reached();
 			}
 
-			expect_ptr_null(heap_first(&heap),
-			    "Heap should be empty");
-			expect_ptr_null(heap_any(&heap),
-			    "Heap should be empty");
+			expect_ptr_null(
+			    heap_first(&heap), "Heap should be empty");
+			expect_ptr_null(
+			    heap_any(&heap), "Heap should be empty");
 			expect_true(heap_empty(&heap), "Heap should be empty");
 		}
 	}
@@ -341,7 +344,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_ph_empty,
-	    test_ph_random);
+	return test(test_ph_empty, test_ph_random);
 }
diff --git a/test/unit/prng.c b/test/unit/prng.c
index a6d9b014..20b8470e 100644
--- a/test/unit/prng.c
+++ b/test/unit/prng.c
@@ -9,32 +9,31 @@ TEST_BEGIN(test_prng_lg_range_u32) {
 	ra = prng_lg_range_u32(&sa, 32);
 	sa = 42;
 	rb = prng_lg_range_u32(&sa, 32);
-	expect_u32_eq(ra, rb,
-	    "Repeated generation should produce repeated results");
+	expect_u32_eq(
+	    ra, rb, "Repeated generation should produce repeated results");
 
 	sb = 42;
 	rb = prng_lg_range_u32(&sb, 32);
-	expect_u32_eq(ra, rb,
-	    "Equivalent generation should produce equivalent results");
+	expect_u32_eq(
+	    ra, rb, "Equivalent generation should produce equivalent results");
 
 	sa = 42;
 	ra = prng_lg_range_u32(&sa, 32);
 	rb = prng_lg_range_u32(&sa, 32);
-	expect_u32_ne(ra, rb,
-	    "Full-width results must not immediately repeat");
+	expect_u32_ne(ra, rb, "Full-width results must not immediately repeat");
 
 	sa = 42;
 	ra = prng_lg_range_u32(&sa, 32);
 	for (lg_range = 31; lg_range > 0; lg_range--) {
 		sb = 42;
 		rb = prng_lg_range_u32(&sb, lg_range);
-		expect_u32_eq((rb & (UINT32_C(0xffffffff) << lg_range)),
-		    0, "High order bits should be 0, lg_range=%u", lg_range);
+		expect_u32_eq((rb & (UINT32_C(0xffffffff) << lg_range)), 0,
+		    "High order bits should be 0, lg_range=%u", lg_range);
 		expect_u32_eq(rb, (ra >> (32 - lg_range)),
 		    "Expected high order bits of full-width result, "
-		    "lg_range=%u", lg_range);
+		    "lg_range=%u",
+		    lg_range);
 	}
-
 }
 TEST_END
 
@@ -46,19 +45,18 @@ TEST_BEGIN(test_prng_lg_range_u64) {
 	ra = prng_lg_range_u64(&sa, 64);
 	sa = 42;
 	rb = prng_lg_range_u64(&sa, 64);
-	expect_u64_eq(ra, rb,
-	    "Repeated generation should produce repeated results");
+	expect_u64_eq(
+	    ra, rb, "Repeated generation should produce repeated results");
 
 	sb = 42;
 	rb = prng_lg_range_u64(&sb, 64);
-	expect_u64_eq(ra, rb,
-	    "Equivalent generation should produce equivalent results");
+	expect_u64_eq(
+	    ra, rb, "Equivalent generation should produce equivalent results");
 
 	sa = 42;
 	ra = prng_lg_range_u64(&sa, 64);
 	rb = prng_lg_range_u64(&sa, 64);
-	expect_u64_ne(ra, rb,
-	    "Full-width results must not immediately repeat");
+	expect_u64_ne(ra, rb, "Full-width results must not immediately repeat");
 
 	sa = 42;
 	ra = prng_lg_range_u64(&sa, 64);
@@ -69,47 +67,48 @@ TEST_BEGIN(test_prng_lg_range_u64) {
 		    0, "High order bits should be 0, lg_range=%u", lg_range);
 		expect_u64_eq(rb, (ra >> (64 - lg_range)),
 		    "Expected high order bits of full-width result, "
-		    "lg_range=%u", lg_range);
+		    "lg_range=%u",
+		    lg_range);
 	}
 }
 TEST_END
 
 TEST_BEGIN(test_prng_lg_range_zu) {
-	size_t sa, sb;
-	size_t ra, rb;
+	size_t   sa, sb;
+	size_t   ra, rb;
 	unsigned lg_range;
 
 	sa = 42;
 	ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR));
 	sa = 42;
 	rb = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR));
-	expect_zu_eq(ra, rb,
-	    "Repeated generation should produce repeated results");
+	expect_zu_eq(
+	    ra, rb, "Repeated generation should produce repeated results");
 
 	sb = 42;
 	rb = prng_lg_range_zu(&sb, ZU(1) << (3 + LG_SIZEOF_PTR));
-	expect_zu_eq(ra, rb,
-	    "Equivalent generation should produce equivalent results");
+	expect_zu_eq(
+	    ra, rb, "Equivalent generation should produce equivalent results");
 
 	sa = 42;
 	ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR));
 	rb = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR));
-	expect_zu_ne(ra, rb,
-	    "Full-width results must not immediately repeat");
+	expect_zu_ne(ra, rb, "Full-width results must not immediately repeat");
 
 	sa = 42;
 	ra = prng_lg_range_zu(&sa, ZU(1) << (3 + LG_SIZEOF_PTR));
 	for (lg_range = (ZU(1) << (3 + LG_SIZEOF_PTR)) - 1; lg_range > 0;
-	    lg_range--) {
+	     lg_range--) {
 		sb = 42;
 		rb = prng_lg_range_zu(&sb, lg_range);
-		expect_zu_eq((rb & (SIZE_T_MAX << lg_range)),
-		    0, "High order bits should be 0, lg_range=%u", lg_range);
-		expect_zu_eq(rb, (ra >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) -
-		    lg_range)), "Expected high order bits of full-width "
-		    "result, lg_range=%u", lg_range);
+		expect_zu_eq((rb & (SIZE_T_MAX << lg_range)), 0,
+		    "High order bits should be 0, lg_range=%u", lg_range);
+		expect_zu_eq(rb,
+		    (ra >> ((ZU(1) << (3 + LG_SIZEOF_PTR)) - lg_range)),
+		    "Expected high order bits of full-width "
+		    "result, lg_range=%u",
+		    lg_range);
 	}
-
 }
 TEST_END
 
@@ -158,13 +157,12 @@ TEST_END
 TEST_BEGIN(test_prng_range_zu) {
 	size_t range;
 
-	const size_t max_range = 10000000;
-	const size_t range_step = 97;
+	const size_t   max_range = 10000000;
+	const size_t   range_step = 97;
 	const unsigned nreps = 10;
 
-
 	for (range = 2; range < max_range; range += range_step) {
-		size_t s;
+		size_t   s;
 		unsigned rep;
 
 		s = range;
@@ -179,11 +177,7 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_prng_lg_range_u32,
-	    test_prng_lg_range_u64,
-	    test_prng_lg_range_zu,
-	    test_prng_range_u32,
-	    test_prng_range_u64,
-	    test_prng_range_zu);
+	return test_no_reentrancy(test_prng_lg_range_u32,
+	    test_prng_lg_range_u64, test_prng_lg_range_zu, test_prng_range_u32,
+	    test_prng_range_u64, test_prng_range_zu);
 }
diff --git a/test/unit/prof_accum.c b/test/unit/prof_accum.c
index ef392acd..940468b9 100644
--- a/test/unit/prof_accum.c
+++ b/test/unit/prof_accum.c
@@ -3,10 +3,10 @@
 #include "jemalloc/internal/prof_data.h"
 #include "jemalloc/internal/prof_sys.h"
 
-#define NTHREADS		4
-#define NALLOCS_PER_THREAD	50
-#define DUMP_INTERVAL		1
-#define BT_COUNT_CHECK_INTERVAL	5
+#define NTHREADS 4
+#define NALLOCS_PER_THREAD 50
+#define DUMP_INTERVAL 1
+#define BT_COUNT_CHECK_INTERVAL 5
 
 static int
 prof_dump_open_file_intercept(const char *filename, int mode) {
@@ -20,13 +20,13 @@ prof_dump_open_file_intercept(const char *filename, int mode) {
 
 static void *
 alloc_from_permuted_backtrace(unsigned thd_ind, unsigned iteration) {
-	return btalloc(1, thd_ind*NALLOCS_PER_THREAD + iteration);
+	return btalloc(1, thd_ind * NALLOCS_PER_THREAD + iteration);
 }
 
 static void *
 thd_start(void *varg) {
 	unsigned thd_ind = *(unsigned *)varg;
-	size_t bt_count_prev, bt_count;
+	size_t   bt_count_prev, bt_count;
 	unsigned i_prev, i;
 
 	i_prev = 0;
@@ -39,10 +39,10 @@ thd_start(void *varg) {
 			    0, "Unexpected error while dumping heap profile");
 		}
 
-		if (i % BT_COUNT_CHECK_INTERVAL == 0 ||
-		    i+1 == NALLOCS_PER_THREAD) {
+		if (i % BT_COUNT_CHECK_INTERVAL == 0
+		    || i + 1 == NALLOCS_PER_THREAD) {
 			bt_count = prof_bt_count();
-			expect_zu_le(bt_count_prev+(i-i_prev), bt_count,
+			expect_zu_le(bt_count_prev + (i - i_prev), bt_count,
 			    "Expected larger backtrace count increase");
 			i_prev = i;
 			bt_count_prev = bt_count;
@@ -53,17 +53,17 @@ thd_start(void *varg) {
 }
 
 TEST_BEGIN(test_idump) {
-	bool active;
-	thd_t thds[NTHREADS];
+	bool     active;
+	thd_t    thds[NTHREADS];
 	unsigned thd_args[NTHREADS];
 	unsigned i;
 
 	test_skip_if(!config_prof);
 
 	active = true;
-	expect_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
-	    sizeof(active)), 0,
-	    "Unexpected mallctl failure while activating profiling");
+	expect_d_eq(
+	    mallctl("prof.active", NULL, NULL, (void *)&active, sizeof(active)),
+	    0, "Unexpected mallctl failure while activating profiling");
 
 	prof_dump_open_file = prof_dump_open_file_intercept;
 
@@ -79,6 +79,5 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_idump);
+	return test_no_reentrancy(test_idump);
 }
diff --git a/test/unit/prof_active.c b/test/unit/prof_active.c
index af29e7ad..fc8b150b 100644
--- a/test/unit/prof_active.c
+++ b/test/unit/prof_active.c
@@ -4,37 +4,37 @@
 
 static void
 mallctl_bool_get(const char *name, bool expected, const char *func, int line) {
-	bool old;
+	bool   old;
 	size_t sz;
 
 	sz = sizeof(old);
 	expect_d_eq(mallctl(name, (void *)&old, &sz, NULL, 0), 0,
 	    "%s():%d: Unexpected mallctl failure reading %s", func, line, name);
-	expect_b_eq(old, expected, "%s():%d: Unexpected %s value", func, line,
-	    name);
+	expect_b_eq(
+	    old, expected, "%s():%d: Unexpected %s value", func, line, name);
 }
 
 static void
 mallctl_bool_set(const char *name, bool old_expected, bool val_new,
     const char *func, int line) {
-	bool old;
+	bool   old;
 	size_t sz;
 
 	sz = sizeof(old);
-	expect_d_eq(mallctl(name, (void *)&old, &sz, (void *)&val_new,
-	    sizeof(val_new)), 0,
-	    "%s():%d: Unexpected mallctl failure reading/writing %s", func,
+	expect_d_eq(
+	    mallctl(name, (void *)&old, &sz, (void *)&val_new, sizeof(val_new)),
+	    0, "%s():%d: Unexpected mallctl failure reading/writing %s", func,
 	    line, name);
 	expect_b_eq(old, old_expected, "%s():%d: Unexpected %s value", func,
 	    line, name);
 }
 
 static void
-mallctl_prof_active_get_impl(bool prof_active_old_expected, const char *func,
-    int line) {
+mallctl_prof_active_get_impl(
+    bool prof_active_old_expected, const char *func, int line) {
 	mallctl_bool_get("prof.active", prof_active_old_expected, func, line);
 }
-#define mallctl_prof_active_get(a)					\
+#define mallctl_prof_active_get(a)                                             \
 	mallctl_prof_active_get_impl(a, __func__, __LINE__)
 
 static void
@@ -43,16 +43,16 @@ mallctl_prof_active_set_impl(bool prof_active_old_expected,
 	mallctl_bool_set("prof.active", prof_active_old_expected,
 	    prof_active_new, func, line);
 }
-#define mallctl_prof_active_set(a, b)					\
+#define mallctl_prof_active_set(a, b)                                          \
 	mallctl_prof_active_set_impl(a, b, __func__, __LINE__)
 
 static void
-mallctl_thread_prof_active_get_impl(bool thread_prof_active_old_expected,
-    const char *func, int line) {
-	mallctl_bool_get("thread.prof.active", thread_prof_active_old_expected,
-	    func, line);
+mallctl_thread_prof_active_get_impl(
+    bool thread_prof_active_old_expected, const char *func, int line) {
+	mallctl_bool_get(
+	    "thread.prof.active", thread_prof_active_old_expected, func, line);
 }
-#define mallctl_thread_prof_active_get(a)				\
+#define mallctl_thread_prof_active_get(a)                                      \
 	mallctl_thread_prof_active_get_impl(a, __func__, __LINE__)
 
 static void
@@ -61,24 +61,23 @@ mallctl_thread_prof_active_set_impl(bool thread_prof_active_old_expected,
 	mallctl_bool_set("thread.prof.active", thread_prof_active_old_expected,
 	    thread_prof_active_new, func, line);
 }
-#define mallctl_thread_prof_active_set(a, b)				\
+#define mallctl_thread_prof_active_set(a, b)                                   \
 	mallctl_thread_prof_active_set_impl(a, b, __func__, __LINE__)
 
 static void
 prof_sampling_probe_impl(bool expect_sample, const char *func, int line) {
-	void *p;
+	void  *p;
 	size_t expected_backtraces = expect_sample ? 1 : 0;
 
-	expect_zu_eq(prof_bt_count(), 0, "%s():%d: Expected 0 backtraces", func,
-	    line);
+	expect_zu_eq(
+	    prof_bt_count(), 0, "%s():%d: Expected 0 backtraces", func, line);
 	p = mallocx(1, 0);
 	expect_ptr_not_null(p, "Unexpected mallocx() failure");
 	expect_zu_eq(prof_bt_count(), expected_backtraces,
 	    "%s():%d: Unexpected backtrace count", func, line);
 	dallocx(p, 0);
 }
-#define prof_sampling_probe(a)						\
-	prof_sampling_probe_impl(a, __func__, __LINE__)
+#define prof_sampling_probe(a) prof_sampling_probe_impl(a, __func__, __LINE__)
 
 TEST_BEGIN(test_prof_active) {
 	test_skip_if(!config_prof);
@@ -114,6 +113,5 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_prof_active);
+	return test_no_reentrancy(test_prof_active);
 }
diff --git a/test/unit/prof_gdump.c b/test/unit/prof_gdump.c
index 46e45036..4cca9bdb 100644
--- a/test/unit/prof_gdump.c
+++ b/test/unit/prof_gdump.c
@@ -18,16 +18,16 @@ prof_dump_open_file_intercept(const char *filename, int mode) {
 
 TEST_BEGIN(test_gdump) {
 	test_skip_if(opt_hpa);
-	bool active, gdump, gdump_old;
-	void *p, *q, *r, *s;
+	bool   active, gdump, gdump_old;
+	void  *p, *q, *r, *s;
 	size_t sz;
 
 	test_skip_if(!config_prof);
 
 	active = true;
-	expect_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
-	    sizeof(active)), 0,
-	    "Unexpected mallctl failure while activating profiling");
+	expect_d_eq(
+	    mallctl("prof.active", NULL, NULL, (void *)&active, sizeof(active)),
+	    0, "Unexpected mallctl failure while activating profiling");
 
 	prof_dump_open_file = prof_dump_open_file_intercept;
 
@@ -44,8 +44,8 @@ TEST_BEGIN(test_gdump) {
 	gdump = false;
 	sz = sizeof(gdump_old);
 	expect_d_eq(mallctl("prof.gdump", (void *)&gdump_old, &sz,
-	    (void *)&gdump, sizeof(gdump)), 0,
-	    "Unexpected mallctl failure while disabling prof.gdump");
+	                (void *)&gdump, sizeof(gdump)),
+	    0, "Unexpected mallctl failure while disabling prof.gdump");
 	assert(gdump_old);
 	did_prof_dump_open = false;
 	r = mallocx((1U << SC_LG_LARGE_MINCLASS), 0);
@@ -55,8 +55,8 @@ TEST_BEGIN(test_gdump) {
 	gdump = true;
 	sz = sizeof(gdump_old);
 	expect_d_eq(mallctl("prof.gdump", (void *)&gdump_old, &sz,
-	    (void *)&gdump, sizeof(gdump)), 0,
-	    "Unexpected mallctl failure while enabling prof.gdump");
+	                (void *)&gdump, sizeof(gdump)),
+	    0, "Unexpected mallctl failure while enabling prof.gdump");
 	assert(!gdump_old);
 	did_prof_dump_open = false;
 	s = mallocx((1U << SC_LG_LARGE_MINCLASS), 0);
@@ -72,6 +72,5 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_gdump);
+	return test_no_reentrancy(test_gdump);
 }
diff --git a/test/unit/prof_hook.c b/test/unit/prof_hook.c
index fd2871e5..1d58469c 100644
--- a/test/unit/prof_hook.c
+++ b/test/unit/prof_hook.c
@@ -14,10 +14,10 @@ bool mock_dump_hook_called = false;
 bool mock_prof_sample_hook_called = false;
 bool mock_prof_sample_free_hook_called = false;
 
-void *sampled_ptr = NULL;
+void  *sampled_ptr = NULL;
 size_t sampled_ptr_sz = 0;
 size_t sampled_ptr_usz = 0;
-void *free_sampled_ptr = NULL;
+void  *free_sampled_ptr = NULL;
 size_t free_sampled_ptr_sz = 0;
 
 void
@@ -49,7 +49,6 @@ mock_bt_augmenting_hook(void **vec, unsigned *len, unsigned max_len) {
 		(*len)++;
 	}
 
-
 	mock_bt_hook_called = true;
 }
 
@@ -61,14 +60,15 @@ mock_dump_hook(const char *filename) {
 }
 
 void
-mock_prof_sample_hook(const void *ptr, size_t sz, void **vec, unsigned len, size_t usz) {
+mock_prof_sample_hook(
+    const void *ptr, size_t sz, void **vec, unsigned len, size_t usz) {
 	mock_prof_sample_hook_called = true;
 	sampled_ptr = (void *)ptr;
 	sampled_ptr_sz = sz;
 	sampled_ptr_usz = usz;
 	for (unsigned i = 0; i < len; i++) {
-		expect_ptr_not_null((void **)vec[i],
-		    "Backtrace should not contain NULL");
+		expect_ptr_not_null(
+		    (void **)vec[i], "Backtrace should not contain NULL");
 	}
 }
 
@@ -80,7 +80,6 @@ mock_prof_sample_free_hook(const void *ptr, size_t sz) {
 }
 
 TEST_BEGIN(test_prof_backtrace_hook_replace) {
-
 	test_skip_if(!config_prof);
 
 	mock_bt_hook_called = false;
@@ -91,15 +90,16 @@ TEST_BEGIN(test_prof_backtrace_hook_replace) {
 	expect_false(mock_bt_hook_called, "Called mock hook before it's set");
 
 	prof_backtrace_hook_t null_hook = NULL;
-	expect_d_eq(mallctl("experimental.hooks.prof_backtrace",
-	    NULL, 0, (void *)&null_hook,  sizeof(null_hook)),
-		EINVAL, "Incorrectly allowed NULL backtrace hook");
+	expect_d_eq(mallctl("experimental.hooks.prof_backtrace", NULL, 0,
+	                (void *)&null_hook, sizeof(null_hook)),
+	    EINVAL, "Incorrectly allowed NULL backtrace hook");
 
 	size_t default_bt_hook_sz = sizeof(prof_backtrace_hook_t);
 	prof_backtrace_hook_t hook = &mock_bt_hook;
 	expect_d_eq(mallctl("experimental.hooks.prof_backtrace",
-	    (void *)&default_bt_hook, &default_bt_hook_sz, (void *)&hook,
-	    sizeof(hook)), 0, "Unexpected mallctl failure setting hook");
+	                (void *)&default_bt_hook, &default_bt_hook_sz,
+	                (void *)&hook, sizeof(hook)),
+	    0, "Unexpected mallctl failure setting hook");
 
 	void *p1 = mallocx(1, 0);
 	assert_ptr_not_null(p1, "Failed to allocate");
@@ -107,11 +107,11 @@ TEST_BEGIN(test_prof_backtrace_hook_replace) {
 	expect_true(mock_bt_hook_called, "Didn't call mock hook");
 
 	prof_backtrace_hook_t current_hook;
-	size_t current_hook_sz = sizeof(prof_backtrace_hook_t);
+	size_t                current_hook_sz = sizeof(prof_backtrace_hook_t);
 	expect_d_eq(mallctl("experimental.hooks.prof_backtrace",
-	    (void *)&current_hook, &current_hook_sz, (void *)&default_bt_hook,
-	    sizeof(default_bt_hook)), 0,
-	    "Unexpected mallctl failure resetting hook to default");
+	                (void *)&current_hook, &current_hook_sz,
+	                (void *)&default_bt_hook, sizeof(default_bt_hook)),
+	    0, "Unexpected mallctl failure resetting hook to default");
 
 	expect_ptr_eq(current_hook, hook,
 	    "Hook returned by mallctl is not equal to mock hook");
@@ -122,7 +122,6 @@ TEST_BEGIN(test_prof_backtrace_hook_replace) {
 TEST_END
 
 TEST_BEGIN(test_prof_backtrace_hook_augment) {
-
 	test_skip_if(!config_prof);
 
 	mock_bt_hook_called = false;
@@ -135,8 +134,9 @@ TEST_BEGIN(test_prof_backtrace_hook_augment) {
 	size_t default_bt_hook_sz = sizeof(prof_backtrace_hook_t);
 	prof_backtrace_hook_t hook = &mock_bt_augmenting_hook;
 	expect_d_eq(mallctl("experimental.hooks.prof_backtrace",
-	    (void *)&default_bt_hook, &default_bt_hook_sz, (void *)&hook,
-	    sizeof(hook)), 0, "Unexpected mallctl failure setting hook");
+	                (void *)&default_bt_hook, &default_bt_hook_sz,
+	                (void *)&hook, sizeof(hook)),
+	    0, "Unexpected mallctl failure setting hook");
 
 	void *p1 = mallocx(1, 0);
 	assert_ptr_not_null(p1, "Failed to allocate");
@@ -144,11 +144,11 @@ TEST_BEGIN(test_prof_backtrace_hook_augment) {
 	expect_true(mock_bt_hook_called, "Didn't call mock hook");
 
 	prof_backtrace_hook_t current_hook;
-	size_t current_hook_sz = sizeof(prof_backtrace_hook_t);
+	size_t                current_hook_sz = sizeof(prof_backtrace_hook_t);
 	expect_d_eq(mallctl("experimental.hooks.prof_backtrace",
-	    (void *)&current_hook, &current_hook_sz, (void *)&default_bt_hook,
-	    sizeof(default_bt_hook)), 0,
-	    "Unexpected mallctl failure resetting hook to default");
+	                (void *)&current_hook, &current_hook_sz,
+	                (void *)&default_bt_hook, sizeof(default_bt_hook)),
+	    0, "Unexpected mallctl failure resetting hook to default");
 
 	expect_ptr_eq(current_hook, hook,
 	    "Hook returned by mallctl is not equal to mock hook");
@@ -159,34 +159,36 @@ TEST_BEGIN(test_prof_backtrace_hook_augment) {
 TEST_END
 
 TEST_BEGIN(test_prof_dump_hook) {
-
 	test_skip_if(!config_prof);
 	expect_u_eq(opt_prof_bt_max, 200, "Unexpected backtrace stack depth");
 
 	mock_dump_hook_called = false;
 
 	expect_d_eq(mallctl("prof.dump", NULL, NULL, (void *)&dump_filename,
-	    sizeof(dump_filename)), 0, "Failed to dump heap profile");
+	                sizeof(dump_filename)),
+	    0, "Failed to dump heap profile");
 
 	expect_false(mock_dump_hook_called, "Called dump hook before it's set");
 
-	size_t default_bt_hook_sz = sizeof(prof_dump_hook_t);
+	size_t           default_bt_hook_sz = sizeof(prof_dump_hook_t);
 	prof_dump_hook_t hook = &mock_dump_hook;
-	expect_d_eq(mallctl("experimental.hooks.prof_dump",
-	    (void *)&default_bt_hook, &default_bt_hook_sz, (void *)&hook,
-	    sizeof(hook)), 0, "Unexpected mallctl failure setting hook");
+	expect_d_eq(
+	    mallctl("experimental.hooks.prof_dump", (void *)&default_bt_hook,
+	        &default_bt_hook_sz, (void *)&hook, sizeof(hook)),
+	    0, "Unexpected mallctl failure setting hook");
 
 	expect_d_eq(mallctl("prof.dump", NULL, NULL, (void *)&dump_filename,
-	    sizeof(dump_filename)), 0, "Failed to dump heap profile");
+	                sizeof(dump_filename)),
+	    0, "Failed to dump heap profile");
 
 	expect_true(mock_dump_hook_called, "Didn't call mock hook");
 
 	prof_dump_hook_t current_hook;
-	size_t current_hook_sz = sizeof(prof_dump_hook_t);
+	size_t           current_hook_sz = sizeof(prof_dump_hook_t);
 	expect_d_eq(mallctl("experimental.hooks.prof_dump",
-	    (void *)&current_hook, &current_hook_sz, (void *)&default_bt_hook,
-	    sizeof(default_bt_hook)), 0,
-	    "Unexpected mallctl failure resetting hook to default");
+	                (void *)&current_hook, &current_hook_sz,
+	                (void *)&default_bt_hook, sizeof(default_bt_hook)),
+	    0, "Unexpected mallctl failure resetting hook to default");
 
 	expect_ptr_eq(current_hook, hook,
 	    "Hook returned by mallctl is not equal to mock hook");
@@ -195,12 +197,12 @@ TEST_END
 
 /* Need the do_write flag because NULL is a valid to_write value. */
 static void
-read_write_prof_sample_hook(prof_sample_hook_t *to_read, bool do_write,
-    prof_sample_hook_t to_write) {
+read_write_prof_sample_hook(
+    prof_sample_hook_t *to_read, bool do_write, prof_sample_hook_t to_write) {
 	size_t hook_sz = sizeof(prof_sample_hook_t);
-	expect_d_eq(mallctl("experimental.hooks.prof_sample",
-	    (void *)to_read, &hook_sz, do_write ? &to_write : NULL, hook_sz), 0,
-	    "Unexpected prof_sample_hook mallctl failure");
+	expect_d_eq(mallctl("experimental.hooks.prof_sample", (void *)to_read,
+	                &hook_sz, do_write ? &to_write : NULL, hook_sz),
+	    0, "Unexpected prof_sample_hook mallctl failure");
 }
 
 static void
@@ -220,9 +222,10 @@ static void
 read_write_prof_sample_free_hook(prof_sample_free_hook_t *to_read,
     bool do_write, prof_sample_free_hook_t to_write) {
 	size_t hook_sz = sizeof(prof_sample_free_hook_t);
-	expect_d_eq(mallctl("experimental.hooks.prof_sample_free",
-	    (void *)to_read, &hook_sz, do_write ? &to_write : NULL, hook_sz), 0,
-	    "Unexpected prof_sample_free_hook mallctl failure");
+	expect_d_eq(
+	    mallctl("experimental.hooks.prof_sample_free", (void *)to_read,
+	        &hook_sz, do_write ? &to_write : NULL, hook_sz),
+	    0, "Unexpected prof_sample_free_hook mallctl failure");
 }
 
 static void
@@ -248,38 +251,40 @@ check_prof_sample_hooks(bool sample_hook_set, bool sample_free_hook_set) {
 	expect_zu_eq(sampled_ptr_sz, 0, "Unexpected sampled ptr size");
 	expect_zu_eq(sampled_ptr_usz, 0, "Unexpected sampled ptr usize");
 	expect_ptr_null(free_sampled_ptr, "Unexpected free sampled ptr");
-	expect_zu_eq(free_sampled_ptr_sz, 0,
-	    "Unexpected free sampled ptr size");
+	expect_zu_eq(
+	    free_sampled_ptr_sz, 0, "Unexpected free sampled ptr size");
 
 	prof_sample_hook_t curr_hook = read_prof_sample_hook();
 	expect_ptr_eq(curr_hook, sample_hook_set ? mock_prof_sample_hook : NULL,
 	    "Unexpected non NULL default hook");
 
 	prof_sample_free_hook_t curr_free_hook = read_prof_sample_free_hook();
-	expect_ptr_eq(curr_free_hook, sample_free_hook_set ?
-	    mock_prof_sample_free_hook : NULL,
+	expect_ptr_eq(curr_free_hook,
+	    sample_free_hook_set ? mock_prof_sample_free_hook : NULL,
 	    "Unexpected non NULL default hook");
 
 	size_t alloc_sz = 10;
 	size_t alloc_usz = 16;
-	void *p = mallocx(alloc_sz, 0);
+	void  *p = mallocx(alloc_sz, 0);
 	expect_ptr_not_null(p, "Failed to allocate");
 	expect_true(mock_prof_sample_hook_called == sample_hook_set,
-	   "Incorrect prof_sample hook usage");
+	    "Incorrect prof_sample hook usage");
 	if (sample_hook_set) {
 		expect_ptr_eq(p, sampled_ptr, "Unexpected sampled ptr");
-		expect_zu_eq(alloc_sz, sampled_ptr_sz,
-		    "Unexpected sampled usize");
-		expect_zu_eq(alloc_usz, sampled_ptr_usz, "Unexpected sampled usize");
+		expect_zu_eq(
+		    alloc_sz, sampled_ptr_sz, "Unexpected sampled usize");
+		expect_zu_eq(
+		    alloc_usz, sampled_ptr_usz, "Unexpected sampled usize");
 	}
 
 	dallocx(p, 0);
 	expect_true(mock_prof_sample_free_hook_called == sample_free_hook_set,
-	   "Incorrect prof_sample_free hook usage");
+	    "Incorrect prof_sample_free hook usage");
 	if (sample_free_hook_set) {
 		size_t usz = sz_s2u(alloc_sz);
 		expect_ptr_eq(p, free_sampled_ptr, "Unexpected sampled ptr");
-		expect_zu_eq(usz, free_sampled_ptr_sz, "Unexpected sampled usize");
+		expect_zu_eq(
+		    usz, free_sampled_ptr_sz, "Unexpected sampled usize");
 	}
 
 	sampled_ptr = free_sampled_ptr = NULL;
@@ -312,14 +317,14 @@ TEST_BEGIN(test_prof_sample_hooks) {
 	check_prof_sample_hooks(true, false);
 
 	prof_sample_free_hook_t sample_free_hook;
-	read_write_prof_sample_free_hook(&sample_free_hook, true,
-	    mock_prof_sample_free_hook);
+	read_write_prof_sample_free_hook(
+	    &sample_free_hook, true, mock_prof_sample_free_hook);
 	expect_ptr_null(sample_free_hook, "Unexpected non NULL default hook");
 	check_prof_sample_hooks(true, true);
 
 	read_write_prof_sample_hook(&sample_hook, true, NULL);
-	expect_ptr_eq(sample_hook, mock_prof_sample_hook,
-	    "Unexpected prof_sample hook");
+	expect_ptr_eq(
+	    sample_hook, mock_prof_sample_hook, "Unexpected prof_sample hook");
 	check_prof_sample_hooks(false, true);
 
 	read_write_prof_sample_free_hook(&sample_free_hook, true, NULL);
@@ -331,9 +336,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_prof_backtrace_hook_replace,
-	    test_prof_backtrace_hook_augment,
-	    test_prof_dump_hook,
+	return test(test_prof_backtrace_hook_replace,
+	    test_prof_backtrace_hook_augment, test_prof_dump_hook,
 	    test_prof_sample_hooks);
 }
diff --git a/test/unit/prof_idump.c b/test/unit/prof_idump.c
index 455ac529..b16b4a1f 100644
--- a/test/unit/prof_idump.c
+++ b/test/unit/prof_idump.c
@@ -13,8 +13,9 @@ prof_dump_open_file_intercept(const char *filename, int mode) {
 	did_prof_dump_open = true;
 
 	const char filename_prefix[] = TEST_PREFIX ".";
-	expect_d_eq(strncmp(filename_prefix, filename, sizeof(filename_prefix)
-	    - 1), 0, "Dump file name should start with \"" TEST_PREFIX ".\"");
+	expect_d_eq(
+	    strncmp(filename_prefix, filename, sizeof(filename_prefix) - 1), 0,
+	    "Dump file name should start with \"" TEST_PREFIX ".\"");
 
 	fd = open("/dev/null", O_WRONLY);
 	assert_d_ne(fd, -1, "Unexpected open() failure");
@@ -23,7 +24,7 @@ prof_dump_open_file_intercept(const char *filename, int mode) {
 }
 
 TEST_BEGIN(test_idump) {
-	bool active;
+	bool  active;
 	void *p;
 
 	const char *test_prefix = TEST_PREFIX;
@@ -33,12 +34,12 @@ TEST_BEGIN(test_idump) {
 	active = true;
 
 	expect_d_eq(mallctl("prof.prefix", NULL, NULL, (void *)&test_prefix,
-	    sizeof(test_prefix)), 0,
-	    "Unexpected mallctl failure while overwriting dump prefix");
+	                sizeof(test_prefix)),
+	    0, "Unexpected mallctl failure while overwriting dump prefix");
 
-	expect_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
-	    sizeof(active)), 0,
-	    "Unexpected mallctl failure while activating profiling");
+	expect_d_eq(
+	    mallctl("prof.active", NULL, NULL, (void *)&active, sizeof(active)),
+	    0, "Unexpected mallctl failure while activating profiling");
 
 	prof_dump_open_file = prof_dump_open_file_intercept;
 
@@ -52,6 +53,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_idump);
+	return test(test_idump);
 }
diff --git a/test/unit/prof_log.c b/test/unit/prof_log.c
index a32fdd0b..8cfc19ff 100644
--- a/test/unit/prof_log.c
+++ b/test/unit/prof_log.c
@@ -4,22 +4,25 @@
 #define N_PARAM 100
 #define N_THREADS 10
 
-static void expect_rep(void) {
+static void
+expect_rep(void) {
 	expect_b_eq(prof_log_rep_check(), false, "Rep check failed");
 }
 
-static void expect_log_empty(void) {
-	expect_zu_eq(prof_log_bt_count(), 0,
-	    "The log has backtraces; it isn't empty");
-	expect_zu_eq(prof_log_thr_count(), 0,
-	    "The log has threads; it isn't empty");
+static void
+expect_log_empty(void) {
+	expect_zu_eq(
+	    prof_log_bt_count(), 0, "The log has backtraces; it isn't empty");
+	expect_zu_eq(
+	    prof_log_thr_count(), 0, "The log has threads; it isn't empty");
 	expect_zu_eq(prof_log_alloc_count(), 0,
 	    "The log has allocations; it isn't empty");
 }
 
 void *buf[N_PARAM];
 
-static void f(void) {
+static void
+f(void) {
 	int i;
 	for (i = 0; i < N_PARAM; i++) {
 		buf[i] = malloc(100);
@@ -46,8 +49,8 @@ TEST_BEGIN(test_prof_log_many_logs) {
 		f();
 		expect_zu_eq(prof_log_thr_count(), 1, "Wrong thread count");
 		expect_rep();
-		expect_b_eq(prof_log_is_logging(), true,
-		    "Logging should still be on");
+		expect_b_eq(
+		    prof_log_is_logging(), true, "Logging should still be on");
 		expect_d_eq(mallctl("prof.log_stop", NULL, NULL, NULL, 0), 0,
 		    "Unexpected mallctl failure when stopping logging");
 		expect_b_eq(prof_log_is_logging(), false,
@@ -58,7 +61,8 @@ TEST_END
 
 thd_t thr_buf[N_THREADS];
 
-static void *f_thread(void *unused) {
+static void *
+f_thread(void *unused) {
 	int i;
 	for (i = 0; i < N_PARAM; i++) {
 		void *p = malloc(100);
@@ -70,7 +74,6 @@ static void *f_thread(void *unused) {
 }
 
 TEST_BEGIN(test_prof_log_many_threads) {
-
 	test_skip_if(!config_prof);
 
 	int i;
@@ -83,32 +86,34 @@ TEST_BEGIN(test_prof_log_many_threads) {
 	for (i = 0; i < N_THREADS; i++) {
 		thd_join(thr_buf[i], NULL);
 	}
-	expect_zu_eq(prof_log_thr_count(), N_THREADS,
-	    "Wrong number of thread entries");
+	expect_zu_eq(
+	    prof_log_thr_count(), N_THREADS, "Wrong number of thread entries");
 	expect_rep();
 	expect_d_eq(mallctl("prof.log_stop", NULL, NULL, NULL, 0), 0,
 	    "Unexpected mallctl failure when stopping logging");
 }
 TEST_END
 
-static void f3(void) {
+static void
+f3(void) {
 	void *p = malloc(100);
 	free(p);
 }
 
-static void f1(void) {
+static void
+f1(void) {
 	void *p = malloc(100);
 	f3();
 	free(p);
 }
 
-static void f2(void) {
+static void
+f2(void) {
 	void *p = malloc(100);
 	free(p);
 }
 
 TEST_BEGIN(test_prof_log_many_traces) {
-
 	test_skip_if(!config_prof);
 
 	expect_d_eq(mallctl("prof.log_start", NULL, NULL, NULL, 0), 0,
@@ -144,8 +149,6 @@ main(void) {
 	if (config_prof) {
 		prof_log_dummy_set(true);
 	}
-	return test_no_reentrancy(
-	    test_prof_log_many_logs,
-	    test_prof_log_many_traces,
-	    test_prof_log_many_threads);
+	return test_no_reentrancy(test_prof_log_many_logs,
+	    test_prof_log_many_traces, test_prof_log_many_threads);
 }
diff --git a/test/unit/prof_mdump.c b/test/unit/prof_mdump.c
index 0559339e..0200f92f 100644
--- a/test/unit/prof_mdump.c
+++ b/test/unit/prof_mdump.c
@@ -3,7 +3,7 @@
 #include "jemalloc/internal/prof_sys.h"
 
 static const char *test_filename = "test_filename";
-static bool did_prof_dump_open;
+static bool        did_prof_dump_open;
 
 static int
 prof_dump_open_file_intercept(const char *filename, int mode) {
@@ -35,8 +35,8 @@ TEST_BEGIN(test_mdump_normal) {
 	prof_dump_open_file = prof_dump_open_file_intercept;
 	did_prof_dump_open = false;
 	expect_d_eq(mallctl("prof.dump", NULL, NULL, (void *)&test_filename,
-	    sizeof(test_filename)), 0,
-	    "Unexpected mallctl failure while dumping");
+	                sizeof(test_filename)),
+	    0, "Unexpected mallctl failure while dumping");
 	expect_true(did_prof_dump_open, "Expected a profile dump");
 
 	dallocx(p, 0);
@@ -89,7 +89,8 @@ static void
 expect_write_failure(int count) {
 	prof_dump_write_file_count = count;
 	expect_d_eq(mallctl("prof.dump", NULL, NULL, (void *)&test_filename,
-	    sizeof(test_filename)), EFAULT, "Dump should err");
+	                sizeof(test_filename)),
+	    EFAULT, "Dump should err");
 	expect_d_eq(prof_dump_write_file_count, 0,
 	    "Dumping stopped after a wrong number of writes");
 }
@@ -98,7 +99,7 @@ TEST_BEGIN(test_mdump_output_error) {
 	test_skip_if(!config_prof);
 	test_skip_if(!config_debug);
 
-	prof_dump_open_file_t *open_file_orig = prof_dump_open_file;
+	prof_dump_open_file_t  *open_file_orig = prof_dump_open_file;
 	prof_dump_write_file_t *write_file_orig = prof_dump_write_file;
 
 	prof_dump_write_file = prof_dump_write_file_error;
@@ -168,9 +169,9 @@ TEST_BEGIN(test_mdump_maps_error) {
 	test_skip_if(!config_debug);
 	test_skip_if(prof_dump_open_maps == NULL);
 
-	prof_dump_open_file_t *open_file_orig = prof_dump_open_file;
+	prof_dump_open_file_t  *open_file_orig = prof_dump_open_file;
 	prof_dump_write_file_t *write_file_orig = prof_dump_write_file;
-	prof_dump_open_maps_t *open_maps_orig = prof_dump_open_maps;
+	prof_dump_open_maps_t  *open_maps_orig = prof_dump_open_maps;
 
 	prof_dump_open_file = prof_dump_open_file_intercept;
 	prof_dump_write_file = prof_dump_write_maps_file_error;
@@ -186,8 +187,8 @@ TEST_BEGIN(test_mdump_maps_error) {
 	started_piping_maps_file = false;
 	prof_dump_write_file_count = 0;
 	expect_d_eq(mallctl("prof.dump", NULL, NULL, (void *)&test_filename,
-	    sizeof(test_filename)), 0,
-	    "mallctl should not fail in case of maps file opening failure");
+	                sizeof(test_filename)),
+	    0, "mallctl should not fail in case of maps file opening failure");
 	expect_false(started_piping_maps_file, "Shouldn't start piping maps");
 	expect_d_eq(prof_dump_write_file_count, 0,
 	    "Dumping stopped after a wrong number of writes");
@@ -211,7 +212,5 @@ TEST_END
 int
 main(void) {
 	return test(
-	    test_mdump_normal,
-	    test_mdump_output_error,
-	    test_mdump_maps_error);
+	    test_mdump_normal, test_mdump_output_error, test_mdump_maps_error);
 }
diff --git a/test/unit/prof_recent.c b/test/unit/prof_recent.c
index 24ee6f42..b8fd0ca8 100644
--- a/test/unit/prof_recent.c
+++ b/test/unit/prof_recent.c
@@ -32,18 +32,20 @@ TEST_BEGIN(test_prof_recent_off) {
 	test_skip_if(config_prof);
 
 	const ssize_t past_ref = 0, future_ref = 0;
-	const size_t len_ref = sizeof(ssize_t);
+	const size_t  len_ref = sizeof(ssize_t);
 
 	ssize_t past = past_ref, future = future_ref;
-	size_t len = len_ref;
+	size_t  len = len_ref;
 
-#define ASSERT_SHOULD_FAIL(opt, a, b, c, d) do {			\
-	assert_d_eq(mallctl("experimental.prof_recent." opt, a, b, c,	\
-	    d), ENOENT, "Should return ENOENT when config_prof is off");\
-	assert_zd_eq(past, past_ref, "output was touched");		\
-	assert_zu_eq(len, len_ref, "output length was touched");	\
-	assert_zd_eq(future, future_ref, "input was touched");		\
-} while (0)
+#define ASSERT_SHOULD_FAIL(opt, a, b, c, d)                                    \
+	do {                                                                   \
+		assert_d_eq(                                                   \
+		    mallctl("experimental.prof_recent." opt, a, b, c, d),      \
+		    ENOENT, "Should return ENOENT when config_prof is off");   \
+		assert_zd_eq(past, past_ref, "output was touched");            \
+		assert_zu_eq(len, len_ref, "output length was touched");       \
+		assert_zd_eq(future, future_ref, "input was touched");         \
+	} while (0)
 
 	ASSERT_SHOULD_FAIL("alloc_max", NULL, NULL, NULL, 0);
 	ASSERT_SHOULD_FAIL("alloc_max", &past, &len, NULL, 0);
@@ -58,40 +60,45 @@ TEST_BEGIN(test_prof_recent_on) {
 	test_skip_if(!config_prof);
 
 	ssize_t past, future;
-	size_t len = sizeof(ssize_t);
+	size_t  len = sizeof(ssize_t);
 
 	confirm_prof_setup();
 
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, NULL, 0), 0, "no-op mallctl should be allowed");
+	assert_d_eq(
+	    mallctl("experimental.prof_recent.alloc_max", NULL, NULL, NULL, 0),
+	    0, "no-op mallctl should be allowed");
 	confirm_prof_setup();
 
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    &past, &len, NULL, 0), 0, "Read error");
+	assert_d_eq(
+	    mallctl("experimental.prof_recent.alloc_max", &past, &len, NULL, 0),
+	    0, "Read error");
 	expect_zd_eq(past, OPT_ALLOC_MAX, "Wrong read result");
 	future = OPT_ALLOC_MAX + 1;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &future, len), 0, "Write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &future, len),
+	    0, "Write error");
 	future = -1;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    &past, &len, &future, len), 0, "Read/write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", &past, &len,
+	                &future, len),
+	    0, "Read/write error");
 	expect_zd_eq(past, OPT_ALLOC_MAX + 1, "Wrong read result");
 	future = -2;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    &past, &len, &future, len), EINVAL,
-	    "Invalid write should return EINVAL");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", &past, &len,
+	                &future, len),
+	    EINVAL, "Invalid write should return EINVAL");
 	expect_zd_eq(past, OPT_ALLOC_MAX + 1,
 	    "Output should not be touched given invalid write");
 	future = OPT_ALLOC_MAX;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    &past, &len, &future, len), 0, "Read/write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", &past, &len,
+	                &future, len),
+	    0, "Read/write error");
 	expect_zd_eq(past, -1, "Wrong read result");
 	future = OPT_ALLOC_MAX + 2;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    &past, &len, &future, len * 2), EINVAL,
-	    "Invalid write should return EINVAL");
-	expect_zd_eq(past, -1,
-	    "Output should not be touched given invalid write");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", &past, &len,
+	                &future, len * 2),
+	    EINVAL, "Invalid write should return EINVAL");
+	expect_zd_eq(
+	    past, -1, "Output should not be touched given invalid write");
 
 	confirm_prof_setup();
 }
@@ -107,8 +114,8 @@ confirm_malloc(void *p) {
 	assert_ptr_not_null(e, "NULL edata for living pointer");
 	prof_recent_t *n = edata_prof_recent_alloc_get_no_lock_test(e);
 	assert_ptr_not_null(n, "Record in edata should not be NULL");
-	expect_ptr_not_null(n->alloc_tctx,
-	    "alloc_tctx in record should not be NULL");
+	expect_ptr_not_null(
+	    n->alloc_tctx, "alloc_tctx in record should not be NULL");
 	expect_ptr_eq(e, prof_recent_alloc_edata_get_no_lock_test(n),
 	    "edata pointer in record is not correct");
 	expect_ptr_null(n->dalloc_tctx, "dalloc_tctx in record should be NULL");
@@ -116,17 +123,17 @@ confirm_malloc(void *p) {
 
 static void
 confirm_record_size(prof_recent_t *n, unsigned kth) {
-	expect_zu_eq(n->size, NTH_REQ_SIZE(kth),
-	    "Recorded allocation size is wrong");
+	expect_zu_eq(
+	    n->size, NTH_REQ_SIZE(kth), "Recorded allocation size is wrong");
 }
 
 static void
 confirm_record_living(prof_recent_t *n) {
-	expect_ptr_not_null(n->alloc_tctx,
-	    "alloc_tctx in record should not be NULL");
+	expect_ptr_not_null(
+	    n->alloc_tctx, "alloc_tctx in record should not be NULL");
 	edata_t *edata = prof_recent_alloc_edata_get_no_lock_test(n);
-	assert_ptr_not_null(edata,
-	    "Recorded edata should not be NULL for living pointer");
+	assert_ptr_not_null(
+	    edata, "Recorded edata should not be NULL for living pointer");
 	expect_ptr_eq(n, edata_prof_recent_alloc_get_no_lock_test(edata),
 	    "Record in edata is not correct");
 	expect_ptr_null(n->dalloc_tctx, "dalloc_tctx in record should be NULL");
@@ -134,8 +141,8 @@ confirm_record_living(prof_recent_t *n) {
 
 static void
 confirm_record_released(prof_recent_t *n) {
-	expect_ptr_not_null(n->alloc_tctx,
-	    "alloc_tctx in record should not be NULL");
+	expect_ptr_not_null(
+	    n->alloc_tctx, "alloc_tctx in record should not be NULL");
 	expect_ptr_null(prof_recent_alloc_edata_get_no_lock_test(n),
 	    "Recorded edata should be NULL for released pointer");
 	expect_ptr_not_null(n->dalloc_tctx,
@@ -145,12 +152,12 @@ confirm_record_released(prof_recent_t *n) {
 TEST_BEGIN(test_prof_recent_alloc) {
 	test_skip_if(!config_prof);
 
-	bool b;
-	unsigned i, c;
-	size_t req_size;
-	void *p;
+	bool           b;
+	unsigned       i, c;
+	size_t         req_size;
+	void          *p;
 	prof_recent_t *n;
-	ssize_t future;
+	ssize_t        future;
 
 	confirm_prof_setup();
 
@@ -175,7 +182,7 @@ TEST_BEGIN(test_prof_recent_alloc) {
 			continue;
 		}
 		c = 0;
-		ql_foreach(n, &prof_recent_alloc_list, link) {
+		ql_foreach (n, &prof_recent_alloc_list, link) {
 			++c;
 			confirm_record_size(n, i + c - OPT_ALLOC_MAX);
 			if (c == OPT_ALLOC_MAX) {
@@ -184,8 +191,8 @@ TEST_BEGIN(test_prof_recent_alloc) {
 				confirm_record_released(n);
 			}
 		}
-		assert_u_eq(c, OPT_ALLOC_MAX,
-		    "Incorrect total number of allocations");
+		assert_u_eq(
+		    c, OPT_ALLOC_MAX, "Incorrect total number of allocations");
 		free(p);
 	}
 
@@ -204,13 +211,13 @@ TEST_BEGIN(test_prof_recent_alloc) {
 		p = malloc(req_size);
 		assert_ptr_not_null(p, "malloc failed unexpectedly");
 		c = 0;
-		ql_foreach(n, &prof_recent_alloc_list, link) {
+		ql_foreach (n, &prof_recent_alloc_list, link) {
 			confirm_record_size(n, c + OPT_ALLOC_MAX);
 			confirm_record_released(n);
 			++c;
 		}
-		assert_u_eq(c, OPT_ALLOC_MAX,
-		    "Incorrect total number of allocations");
+		assert_u_eq(
+		    c, OPT_ALLOC_MAX, "Incorrect total number of allocations");
 		free(p);
 	}
 
@@ -231,91 +238,96 @@ TEST_BEGIN(test_prof_recent_alloc) {
 		p = malloc(req_size);
 		confirm_malloc(p);
 		c = 0;
-		ql_foreach(n, &prof_recent_alloc_list, link) {
+		ql_foreach (n, &prof_recent_alloc_list, link) {
 			++c;
 			confirm_record_size(n,
 			    /* Is the allocation from the third batch? */
-			    i + c - OPT_ALLOC_MAX >= 3 * OPT_ALLOC_MAX ?
-			    /* If yes, then it's just recorded. */
-			    i + c - OPT_ALLOC_MAX :
-			    /*
+			    i + c - OPT_ALLOC_MAX >= 3 * OPT_ALLOC_MAX
+			        ?
+			        /* If yes, then it's just recorded. */
+			        i + c - OPT_ALLOC_MAX
+			        :
+			        /*
 			     * Otherwise, it should come from the first batch
 			     * instead of the second batch.
 			     */
-			    i + c - 2 * OPT_ALLOC_MAX);
+			        i + c - 2 * OPT_ALLOC_MAX);
 			if (c == OPT_ALLOC_MAX) {
 				confirm_record_living(n);
 			} else {
 				confirm_record_released(n);
 			}
 		}
-		assert_u_eq(c, OPT_ALLOC_MAX,
-		    "Incorrect total number of allocations");
+		assert_u_eq(
+		    c, OPT_ALLOC_MAX, "Incorrect total number of allocations");
 		free(p);
 	}
 
 	/* Increasing the limit shouldn't alter the list of records. */
 	future = OPT_ALLOC_MAX + 1;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &future, sizeof(ssize_t)),
+	    0, "Write error");
 	c = 0;
-	ql_foreach(n, &prof_recent_alloc_list, link) {
+	ql_foreach (n, &prof_recent_alloc_list, link) {
 		confirm_record_size(n, c + 3 * OPT_ALLOC_MAX);
 		confirm_record_released(n);
 		++c;
 	}
-	assert_u_eq(c, OPT_ALLOC_MAX,
-	    "Incorrect total number of allocations");
+	assert_u_eq(c, OPT_ALLOC_MAX, "Incorrect total number of allocations");
 
 	/*
 	 * Decreasing the limit shouldn't alter the list of records as long as
 	 * the new limit is still no less than the length of the list.
 	 */
 	future = OPT_ALLOC_MAX;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &future, sizeof(ssize_t)),
+	    0, "Write error");
 	c = 0;
-	ql_foreach(n, &prof_recent_alloc_list, link) {
+	ql_foreach (n, &prof_recent_alloc_list, link) {
 		confirm_record_size(n, c + 3 * OPT_ALLOC_MAX);
 		confirm_record_released(n);
 		++c;
 	}
-	assert_u_eq(c, OPT_ALLOC_MAX,
-	    "Incorrect total number of allocations");
+	assert_u_eq(c, OPT_ALLOC_MAX, "Incorrect total number of allocations");
 
 	/*
 	 * Decreasing the limit should shorten the list of records if the new
 	 * limit is less than the length of the list.
 	 */
 	future = OPT_ALLOC_MAX - 1;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &future, sizeof(ssize_t)),
+	    0, "Write error");
 	c = 0;
-	ql_foreach(n, &prof_recent_alloc_list, link) {
+	ql_foreach (n, &prof_recent_alloc_list, link) {
 		++c;
 		confirm_record_size(n, c + 3 * OPT_ALLOC_MAX);
 		confirm_record_released(n);
 	}
-	assert_u_eq(c, OPT_ALLOC_MAX - 1,
-	    "Incorrect total number of allocations");
+	assert_u_eq(
+	    c, OPT_ALLOC_MAX - 1, "Incorrect total number of allocations");
 
 	/* Setting to unlimited shouldn't alter the list of records. */
 	future = -1;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &future, sizeof(ssize_t)),
+	    0, "Write error");
 	c = 0;
-	ql_foreach(n, &prof_recent_alloc_list, link) {
+	ql_foreach (n, &prof_recent_alloc_list, link) {
 		++c;
 		confirm_record_size(n, c + 3 * OPT_ALLOC_MAX);
 		confirm_record_released(n);
 	}
-	assert_u_eq(c, OPT_ALLOC_MAX - 1,
-	    "Incorrect total number of allocations");
+	assert_u_eq(
+	    c, OPT_ALLOC_MAX - 1, "Incorrect total number of allocations");
 
 	/* Downshift to only one record. */
 	future = 1;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &future, sizeof(ssize_t)),
+	    0, "Write error");
 	assert_false(ql_empty(&prof_recent_alloc_list), "Recent list is empty");
 	n = ql_first(&prof_recent_alloc_list);
 	confirm_record_size(n, 4 * OPT_ALLOC_MAX - 1);
@@ -325,17 +337,19 @@ TEST_BEGIN(test_prof_recent_alloc) {
 
 	/* Completely turn off. */
 	future = 0;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
-	assert_true(ql_empty(&prof_recent_alloc_list),
-	    "Recent list should be empty");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &future, sizeof(ssize_t)),
+	    0, "Write error");
+	assert_true(
+	    ql_empty(&prof_recent_alloc_list), "Recent list should be empty");
 
 	/* Restore the settings. */
 	future = OPT_ALLOC_MAX;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
-	assert_true(ql_empty(&prof_recent_alloc_list),
-	    "Recent list should be empty");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &future, sizeof(ssize_t)),
+	    0, "Write error");
+	assert_true(
+	    ql_empty(&prof_recent_alloc_list), "Recent list should be empty");
 
 	confirm_prof_setup();
 }
@@ -344,7 +358,7 @@ TEST_END
 #undef NTH_REQ_SIZE
 
 #define DUMP_OUT_SIZE 4096
-static char dump_out[DUMP_OUT_SIZE];
+static char   dump_out[DUMP_OUT_SIZE];
 static size_t dump_out_len = 0;
 
 static void
@@ -359,14 +373,15 @@ static void
 call_dump(void) {
 	static void *in[2] = {test_dump_write_cb, NULL};
 	dump_out_len = 0;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_dump",
-	    NULL, NULL, in, sizeof(in)), 0, "Dump mallctl raised error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_dump", NULL, NULL,
+	                in, sizeof(in)),
+	    0, "Dump mallctl raised error");
 }
 
 typedef struct {
 	size_t size;
 	size_t usize;
-	bool released;
+	bool   released;
 } confirm_record_t;
 
 #define DUMP_ERROR "Dump output is wrong"
@@ -375,7 +390,7 @@ static void
 confirm_record(const char *template, const confirm_record_t *records,
     const size_t n_records) {
 	static const char *types[2] = {"alloc", "dalloc"};
-	static char buf[64];
+	static char        buf[64];
 
 	/*
 	 * The template string would be in the form of:
@@ -384,32 +399,35 @@ confirm_record(const char *template, const confirm_record_t *records,
 	 * "{...,\"recent_alloc\":[...]}".
 	 * Using "- 2" serves to cut right before the ending "]}".
 	 */
-	assert_d_eq(memcmp(dump_out, template, strlen(template) - 2), 0,
-	    DUMP_ERROR);
+	assert_d_eq(
+	    memcmp(dump_out, template, strlen(template) - 2), 0, DUMP_ERROR);
 	assert_d_eq(memcmp(dump_out + strlen(dump_out) - 2,
-	    template + strlen(template) - 2, 2), 0, DUMP_ERROR);
+	                template + strlen(template) - 2, 2),
+	    0, DUMP_ERROR);
 
-	const char *start = dump_out + strlen(template) - 2;
-	const char *end = dump_out + strlen(dump_out) - 2;
+	const char             *start = dump_out + strlen(template) - 2;
+	const char             *end = dump_out + strlen(dump_out) - 2;
 	const confirm_record_t *record;
 	for (record = records; record < records + n_records; ++record) {
+#define ASSERT_CHAR(c)                                                         \
+	do {                                                                   \
+		assert_true(start < end, DUMP_ERROR);                          \
+		assert_c_eq(*start++, c, DUMP_ERROR);                          \
+	} while (0)
 
-#define ASSERT_CHAR(c) do {						\
-	assert_true(start < end, DUMP_ERROR);				\
-	assert_c_eq(*start++, c, DUMP_ERROR);				\
-} while (0)
+#define ASSERT_STR(s)                                                          \
+	do {                                                                   \
+		const size_t len = strlen(s);                                  \
+		assert_true(start + len <= end, DUMP_ERROR);                   \
+		assert_d_eq(memcmp(start, s, len), 0, DUMP_ERROR);             \
+		start += len;                                                  \
+	} while (0)
 
-#define ASSERT_STR(s) do {						\
-	const size_t len = strlen(s);					\
-	assert_true(start + len <= end, DUMP_ERROR);			\
-	assert_d_eq(memcmp(start, s, len), 0, DUMP_ERROR);		\
-	start += len;							\
-} while (0)
-
-#define ASSERT_FORMATTED_STR(s, ...) do {				\
-	malloc_snprintf(buf, sizeof(buf), s, __VA_ARGS__);		\
-	ASSERT_STR(buf);						\
-} while (0)
+#define ASSERT_FORMATTED_STR(s, ...)                                           \
+	do {                                                                   \
+		malloc_snprintf(buf, sizeof(buf), s, __VA_ARGS__);             \
+		ASSERT_STR(buf);                                               \
+	} while (0)
 
 		if (record != records) {
 			ASSERT_CHAR(',');
@@ -442,10 +460,10 @@ confirm_record(const char *template, const confirm_record_t *records,
 			ASSERT_CHAR(',');
 
 			if (thd_has_setname() && opt_prof_sys_thread_name) {
-				ASSERT_FORMATTED_STR("\"%s_thread_name\"",
-				    *type);
-				ASSERT_FORMATTED_STR(":\"%s\",",
-				    test_thread_name);
+				ASSERT_FORMATTED_STR(
+				    "\"%s_thread_name\"", *type);
+				ASSERT_FORMATTED_STR(
+				    ":\"%s\",", test_thread_name);
 			}
 
 			ASSERT_FORMATTED_STR("\"%s_time\"", *type);
@@ -458,9 +476,9 @@ confirm_record(const char *template, const confirm_record_t *records,
 			ASSERT_FORMATTED_STR("\"%s_trace\"", *type);
 			ASSERT_CHAR(':');
 			ASSERT_CHAR('[');
-			while (isdigit(*start) || *start == 'x' ||
-			    (*start >= 'a' && *start <= 'f') ||
-			    *start == '\"' || *start == ',') {
+			while (isdigit(*start) || *start == 'x'
+			    || (*start >= 'a' && *start <= 'f')
+			    || *start == '\"' || *start == ',') {
 				++start;
 			}
 			ASSERT_CHAR(']');
@@ -483,7 +501,6 @@ confirm_record(const char *template, const confirm_record_t *records,
 #undef ASSERT_FORMATTED_STR
 #undef ASSERT_STR
 #undef ASSERT_CHAR
-
 	}
 	assert_ptr_eq(record, records + n_records, DUMP_ERROR);
 	assert_ptr_eq(start, end, DUMP_ERROR);
@@ -495,25 +512,30 @@ TEST_BEGIN(test_prof_recent_alloc_dump) {
 	thd_setname(test_thread_name);
 	confirm_prof_setup();
 
-	ssize_t future;
-	void *p, *q;
+	ssize_t          future;
+	void            *p, *q;
 	confirm_record_t records[2];
 
-	assert_zu_eq(lg_prof_sample, (size_t)0,
-	    "lg_prof_sample not set correctly");
+	assert_zu_eq(
+	    lg_prof_sample, (size_t)0, "lg_prof_sample not set correctly");
 
 	future = 0;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &future, sizeof(ssize_t)),
+	    0, "Write error");
 	call_dump();
-	expect_str_eq(dump_out, "{\"sample_interval\":1,"
-	    "\"recent_alloc_max\":0,\"recent_alloc\":[]}", DUMP_ERROR);
+	expect_str_eq(dump_out,
+	    "{\"sample_interval\":1,"
+	    "\"recent_alloc_max\":0,\"recent_alloc\":[]}",
+	    DUMP_ERROR);
 
 	future = 2;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &future, sizeof(ssize_t)),
+	    0, "Write error");
 	call_dump();
-	const char *template = "{\"sample_interval\":1,"
+	const char *template =
+	    "{\"sample_interval\":1,"
 	    "\"recent_alloc_max\":2,\"recent_alloc\":[]}";
 	expect_str_eq(dump_out, template, DUMP_ERROR);
 
@@ -542,8 +564,9 @@ TEST_BEGIN(test_prof_recent_alloc_dump) {
 	confirm_record(template, records, 2);
 
 	future = OPT_ALLOC_MAX;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &future, sizeof(ssize_t)), 0, "Write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &future, sizeof(ssize_t)),
+	    0, "Write error");
 	confirm_prof_setup();
 }
 TEST_END
@@ -558,14 +581,14 @@ TEST_END
 #define STRESS_ALLOC_MAX 4096
 
 typedef struct {
-	thd_t thd;
+	thd_t  thd;
 	size_t id;
-	void *ptrs[N_PTRS];
+	void  *ptrs[N_PTRS];
 	size_t count;
 } thd_data_t;
 
 static thd_data_t thd_data[N_THREADS];
-static ssize_t test_max;
+static ssize_t    test_max;
 
 static void
 test_write_cb(void *cbopaque, const char *str) {
@@ -575,11 +598,11 @@ test_write_cb(void *cbopaque, const char *str) {
 static void *
 f_thread(void *arg) {
 	const size_t thd_id = *(size_t *)arg;
-	thd_data_t *data_p = thd_data + thd_id;
+	thd_data_t  *data_p = thd_data + thd_id;
 	assert(data_p->id == thd_id);
 	data_p->count = 0;
 	uint64_t rand = (uint64_t)thd_id;
-	tsd_t *tsd = tsd_fetch();
+	tsd_t   *tsd = tsd_fetch();
 	assert(test_max > 1);
 	ssize_t last_max = -1;
 	for (int i = 0; i < N_ITERS; i++) {
@@ -603,15 +626,15 @@ f_thread(void *arg) {
 		} else if (rand % 5 == 1) {
 			last_max = prof_recent_alloc_max_ctl_read();
 		} else if (rand % 5 == 2) {
-			last_max =
-			    prof_recent_alloc_max_ctl_write(tsd, test_max * 2);
+			last_max = prof_recent_alloc_max_ctl_write(
+			    tsd, test_max * 2);
 		} else if (rand % 5 == 3) {
-			last_max =
-			    prof_recent_alloc_max_ctl_write(tsd, test_max);
+			last_max = prof_recent_alloc_max_ctl_write(
+			    tsd, test_max);
 		} else {
 			assert(rand % 5 == 4);
-			last_max =
-			    prof_recent_alloc_max_ctl_write(tsd, test_max / 2);
+			last_max = prof_recent_alloc_max_ctl_write(
+			    tsd, test_max / 2);
 		}
 		assert_zd_ge(last_max, -1, "Illegal last-N max");
 	}
@@ -640,8 +663,9 @@ TEST_BEGIN(test_prof_recent_stress) {
 	}
 
 	test_max = STRESS_ALLOC_MAX;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &test_max, sizeof(ssize_t)), 0, "Write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &test_max, sizeof(ssize_t)),
+	    0, "Write error");
 	for (size_t i = 0; i < N_THREADS; i++) {
 		thd_data_t *data_p = thd_data + i;
 		data_p->id = i;
@@ -653,8 +677,9 @@ TEST_BEGIN(test_prof_recent_stress) {
 	}
 
 	test_max = OPT_ALLOC_MAX;
-	assert_d_eq(mallctl("experimental.prof_recent.alloc_max",
-	    NULL, NULL, &test_max, sizeof(ssize_t)), 0, "Write error");
+	assert_d_eq(mallctl("experimental.prof_recent.alloc_max", NULL, NULL,
+	                &test_max, sizeof(ssize_t)),
+	    0, "Write error");
 	confirm_prof_setup();
 }
 TEST_END
@@ -666,11 +691,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_confirm_setup,
-	    test_prof_recent_off,
-	    test_prof_recent_on,
-	    test_prof_recent_alloc,
-	    test_prof_recent_alloc_dump,
-	    test_prof_recent_stress);
+	return test(test_confirm_setup, test_prof_recent_off,
+	    test_prof_recent_on, test_prof_recent_alloc,
+	    test_prof_recent_alloc_dump, test_prof_recent_stress);
 }
diff --git a/test/unit/prof_reset.c b/test/unit/prof_reset.c
index 9b33b205..0e64279e 100644
--- a/test/unit/prof_reset.c
+++ b/test/unit/prof_reset.c
@@ -15,8 +15,9 @@ prof_dump_open_file_intercept(const char *filename, int mode) {
 
 static void
 set_prof_active(bool active) {
-	expect_d_eq(mallctl("prof.active", NULL, NULL, (void *)&active,
-	    sizeof(active)), 0, "Unexpected mallctl failure");
+	expect_d_eq(
+	    mallctl("prof.active", NULL, NULL, (void *)&active, sizeof(active)),
+	    0, "Unexpected mallctl failure");
 }
 
 static size_t
@@ -32,25 +33,26 @@ get_lg_prof_sample(void) {
 static void
 do_prof_reset(size_t lg_prof_sample_input) {
 	expect_d_eq(mallctl("prof.reset", NULL, NULL,
-	    (void *)&lg_prof_sample_input, sizeof(size_t)), 0,
-	    "Unexpected mallctl failure while resetting profile data");
+	                (void *)&lg_prof_sample_input, sizeof(size_t)),
+	    0, "Unexpected mallctl failure while resetting profile data");
 	expect_zu_eq(lg_prof_sample_input, get_lg_prof_sample(),
 	    "Expected profile sample rate change");
 }
 
 TEST_BEGIN(test_prof_reset_basic) {
-	size_t lg_prof_sample_orig, lg_prof_sample_cur, lg_prof_sample_next;
-	size_t sz;
+	size_t   lg_prof_sample_orig, lg_prof_sample_cur, lg_prof_sample_next;
+	size_t   sz;
 	unsigned i;
 
 	test_skip_if(!config_prof);
 
 	sz = sizeof(size_t);
 	expect_d_eq(mallctl("opt.lg_prof_sample", (void *)&lg_prof_sample_orig,
-	    &sz, NULL, 0), 0,
+	                &sz, NULL, 0),
+	    0,
 	    "Unexpected mallctl failure while reading profiling sample rate");
-	expect_zu_eq(lg_prof_sample_orig, 0,
-	    "Unexpected profiling sample rate");
+	expect_zu_eq(
+	    lg_prof_sample_orig, 0, "Unexpected profiling sample rate");
 	lg_prof_sample_cur = get_lg_prof_sample();
 	expect_zu_eq(lg_prof_sample_orig, lg_prof_sample_cur,
 	    "Unexpected disagreement between \"opt.lg_prof_sample\" and "
@@ -110,23 +112,24 @@ TEST_BEGIN(test_prof_reset_cleanup) {
 }
 TEST_END
 
-#define NTHREADS		4
-#define NALLOCS_PER_THREAD	(1U << 13)
-#define OBJ_RING_BUF_COUNT	1531
-#define RESET_INTERVAL		(1U << 10)
-#define DUMP_INTERVAL		3677
+#define NTHREADS 4
+#define NALLOCS_PER_THREAD (1U << 13)
+#define OBJ_RING_BUF_COUNT 1531
+#define RESET_INTERVAL (1U << 10)
+#define DUMP_INTERVAL 3677
 static void *
 thd_start(void *varg) {
 	unsigned thd_ind = *(unsigned *)varg;
 	unsigned i;
-	void *objs[OBJ_RING_BUF_COUNT];
+	void    *objs[OBJ_RING_BUF_COUNT];
 
 	memset(objs, 0, sizeof(objs));
 
 	for (i = 0; i < NALLOCS_PER_THREAD; i++) {
 		if (i % RESET_INTERVAL == 0) {
 			expect_d_eq(mallctl("prof.reset", NULL, NULL, NULL, 0),
-			    0, "Unexpected error while resetting heap profile "
+			    0,
+			    "Unexpected error while resetting heap profile "
 			    "data");
 		}
 
@@ -141,9 +144,9 @@ thd_start(void *varg) {
 				dallocx(*pp, 0);
 				*pp = NULL;
 			}
-			*pp = btalloc(1, thd_ind*NALLOCS_PER_THREAD + i);
-			expect_ptr_not_null(*pp,
-			    "Unexpected btalloc() failure");
+			*pp = btalloc(1, thd_ind * NALLOCS_PER_THREAD + i);
+			expect_ptr_not_null(
+			    *pp, "Unexpected btalloc() failure");
 		}
 	}
 
@@ -160,17 +163,16 @@ thd_start(void *varg) {
 }
 
 TEST_BEGIN(test_prof_reset) {
-	size_t lg_prof_sample_orig;
-	thd_t thds[NTHREADS];
+	size_t   lg_prof_sample_orig;
+	thd_t    thds[NTHREADS];
 	unsigned thd_args[NTHREADS];
 	unsigned i;
-	size_t bt_count, tdata_count;
+	size_t   bt_count, tdata_count;
 
 	test_skip_if(!config_prof);
 
 	bt_count = prof_bt_count();
-	expect_zu_eq(bt_count, 0,
-	    "Unexpected pre-existing tdata structures");
+	expect_zu_eq(bt_count, 0, "Unexpected pre-existing tdata structures");
 	tdata_count = prof_tdata_count();
 
 	lg_prof_sample_orig = get_lg_prof_sample();
@@ -186,8 +188,8 @@ TEST_BEGIN(test_prof_reset) {
 		thd_join(thds[i], NULL);
 	}
 
-	expect_zu_eq(prof_bt_count(), bt_count,
-	    "Unexpected bactrace count change");
+	expect_zu_eq(
+	    prof_bt_count(), bt_count, "Unexpected bactrace count change");
 	expect_zu_eq(prof_tdata_count(), tdata_count,
 	    "Unexpected remaining tdata structures");
 
@@ -205,9 +207,9 @@ TEST_END
 /* Test sampling at the same allocation site across resets. */
 #define NITER 10
 TEST_BEGIN(test_xallocx) {
-	size_t lg_prof_sample_orig;
+	size_t   lg_prof_sample_orig;
 	unsigned i;
-	void *ptrs[NITER];
+	void    *ptrs[NITER];
 
 	test_skip_if(!config_prof);
 
@@ -218,7 +220,7 @@ TEST_BEGIN(test_xallocx) {
 	do_prof_reset(0);
 
 	for (i = 0; i < NITER; i++) {
-		void *p;
+		void  *p;
 		size_t sz, nsz;
 
 		/* Reset profiling. */
@@ -233,13 +235,13 @@ TEST_BEGIN(test_xallocx) {
 
 		/* Perform successful xallocx(). */
 		sz = sallocx(p, 0);
-		expect_zu_eq(xallocx(p, sz, 0, 0), sz,
-		    "Unexpected xallocx() failure");
+		expect_zu_eq(
+		    xallocx(p, sz, 0, 0), sz, "Unexpected xallocx() failure");
 
 		/* Perform unsuccessful xallocx(). */
-		nsz = nallocx(sz+1, 0);
-		expect_zu_eq(xallocx(p, nsz, 0, 0), sz,
-		    "Unexpected xallocx() success");
+		nsz = nallocx(sz + 1, 0);
+		expect_zu_eq(
+		    xallocx(p, nsz, 0, 0), sz, "Unexpected xallocx() success");
 	}
 
 	for (i = 0; i < NITER; i++) {
@@ -258,9 +260,6 @@ main(void) {
 	/* Intercept dumping prior to running any tests. */
 	prof_dump_open_file = prof_dump_open_file_intercept;
 
-	return test_no_reentrancy(
-	    test_prof_reset_basic,
-	    test_prof_reset_cleanup,
-	    test_prof_reset,
-	    test_xallocx);
+	return test_no_reentrancy(test_prof_reset_basic,
+	    test_prof_reset_cleanup, test_prof_reset, test_xallocx);
 }
diff --git a/test/unit/prof_small.c b/test/unit/prof_small.c
index e3462c1f..993a83a7 100644
--- a/test/unit/prof_small.c
+++ b/test/unit/prof_small.c
@@ -1,6 +1,7 @@
 #include "test/jemalloc_test.h"
 
-static void assert_small_allocation_sampled(void *ptr, size_t size) {
+static void
+assert_small_allocation_sampled(void *ptr, size_t size) {
 	assert_ptr_not_null(ptr, "Unexpected malloc failure");
 	assert_zu_le(size, SC_SMALL_MAXCLASS, "Unexpected large size class");
 	edata_t *edata = emap_edata_lookup(TSDN_NULL, &arena_emap_global, ptr);
@@ -24,7 +25,7 @@ TEST_BEGIN(test_profile_small_allocations) {
 
 	for (szind_t index = 0; index < SC_NBINS; index++) {
 		size_t size = sz_index2size(index);
-		void *ptr = malloc(size);
+		void  *ptr = malloc(size);
 		assert_small_allocation_sampled(ptr, size);
 		free(ptr);
 	}
@@ -36,7 +37,7 @@ TEST_BEGIN(test_profile_small_allocations_sdallocx) {
 
 	for (szind_t index = 0; index < SC_NBINS; index++) {
 		size_t size = sz_index2size(index);
-		void *ptr = malloc(size);
+		void  *ptr = malloc(size);
 		assert_small_allocation_sampled(ptr, size);
 		/*
 		 * While free calls into ifree, sdallocx calls into isfree,
@@ -86,7 +87,7 @@ TEST_BEGIN(test_profile_small_reallocations_same_size_class) {
 
 	for (szind_t index = 0; index < SC_NBINS; index++) {
 		size_t size = sz_index2size(index);
-		void *ptr = malloc(size);
+		void  *ptr = malloc(size);
 		assert_small_allocation_sampled(ptr, size);
 		ptr = realloc(ptr, size - 1);
 		assert_small_allocation_sampled(ptr, size);
diff --git a/test/unit/prof_stats.c b/test/unit/prof_stats.c
index c88c4ae0..95ca051c 100644
--- a/test/unit/prof_stats.c
+++ b/test/unit/prof_stats.c
@@ -3,8 +3,8 @@
 #define N_PTRS 3
 
 static void
-test_combinations(szind_t ind, size_t sizes_array[N_PTRS],
-    int flags_array[N_PTRS]) {
+test_combinations(
+    szind_t ind, size_t sizes_array[N_PTRS], int flags_array[N_PTRS]) {
 #define MALLCTL_STR_LEN 64
 	assert(opt_prof && opt_prof_stats);
 
@@ -25,11 +25,13 @@ test_combinations(szind_t ind, size_t sizes_array[N_PTRS],
 	size_t stats_len = 2 * sizeof(uint64_t);
 
 	uint64_t live_stats_orig[2];
-	assert_d_eq(mallctl(mallctl_live_str, &live_stats_orig, &stats_len,
-	    NULL, 0), 0, "");
+	assert_d_eq(
+	    mallctl(mallctl_live_str, &live_stats_orig, &stats_len, NULL, 0), 0,
+	    "");
 	uint64_t accum_stats_orig[2];
-	assert_d_eq(mallctl(mallctl_accum_str, &accum_stats_orig, &stats_len,
-	    NULL, 0), 0, "");
+	assert_d_eq(
+	    mallctl(mallctl_accum_str, &accum_stats_orig, &stats_len, NULL, 0),
+	    0, "");
 
 	void *ptrs[N_PTRS];
 
@@ -40,8 +42,8 @@ test_combinations(szind_t ind, size_t sizes_array[N_PTRS],
 
 	for (size_t i = 0; i < N_PTRS; ++i) {
 		size_t sz = sizes_array[i];
-		int flags = flags_array[i];
-		void *p = mallocx(sz, flags);
+		int    flags = flags_array[i];
+		void  *p = mallocx(sz, flags);
 		assert_ptr_not_null(p, "malloc() failed");
 		assert(TEST_MALLOC_SIZE(p) == sz_index2size(ind));
 		ptrs[i] = p;
@@ -50,41 +52,45 @@ test_combinations(szind_t ind, size_t sizes_array[N_PTRS],
 		accum_req_sum += sz;
 		accum_count++;
 		uint64_t live_stats[2];
-		assert_d_eq(mallctl(mallctl_live_str, &live_stats, &stats_len,
-		    NULL, 0), 0, "");
-		expect_u64_eq(live_stats[0] - live_stats_orig[0],
-		    live_req_sum, "");
-		expect_u64_eq(live_stats[1] - live_stats_orig[1],
-		    live_count, "");
+		assert_d_eq(
+		    mallctl(mallctl_live_str, &live_stats, &stats_len, NULL, 0),
+		    0, "");
+		expect_u64_eq(
+		    live_stats[0] - live_stats_orig[0], live_req_sum, "");
+		expect_u64_eq(
+		    live_stats[1] - live_stats_orig[1], live_count, "");
 		uint64_t accum_stats[2];
 		assert_d_eq(mallctl(mallctl_accum_str, &accum_stats, &stats_len,
-		    NULL, 0), 0, "");
-		expect_u64_eq(accum_stats[0] - accum_stats_orig[0],
-		    accum_req_sum, "");
-		expect_u64_eq(accum_stats[1] - accum_stats_orig[1],
-		    accum_count, "");
+		                NULL, 0),
+		    0, "");
+		expect_u64_eq(
+		    accum_stats[0] - accum_stats_orig[0], accum_req_sum, "");
+		expect_u64_eq(
+		    accum_stats[1] - accum_stats_orig[1], accum_count, "");
 	}
 
 	for (size_t i = 0; i < N_PTRS; ++i) {
 		size_t sz = sizes_array[i];
-		int flags = flags_array[i];
+		int    flags = flags_array[i];
 		sdallocx(ptrs[i], sz, flags);
 		live_req_sum -= sz;
 		live_count--;
 		uint64_t live_stats[2];
-		assert_d_eq(mallctl(mallctl_live_str, &live_stats, &stats_len,
-		    NULL, 0), 0, "");
-		expect_u64_eq(live_stats[0] - live_stats_orig[0],
-		    live_req_sum, "");
-		expect_u64_eq(live_stats[1] - live_stats_orig[1],
-		    live_count, "");
+		assert_d_eq(
+		    mallctl(mallctl_live_str, &live_stats, &stats_len, NULL, 0),
+		    0, "");
+		expect_u64_eq(
+		    live_stats[0] - live_stats_orig[0], live_req_sum, "");
+		expect_u64_eq(
+		    live_stats[1] - live_stats_orig[1], live_count, "");
 		uint64_t accum_stats[2];
 		assert_d_eq(mallctl(mallctl_accum_str, &accum_stats, &stats_len,
-		    NULL, 0), 0, "");
-		expect_u64_eq(accum_stats[0] - accum_stats_orig[0],
-		    accum_req_sum, "");
-		expect_u64_eq(accum_stats[1] - accum_stats_orig[1],
-		    accum_count, "");
+		                NULL, 0),
+		    0, "");
+		expect_u64_eq(
+		    accum_stats[0] - accum_stats_orig[0], accum_req_sum, "");
+		expect_u64_eq(
+		    accum_stats[1] - accum_stats_orig[1], accum_count, "");
 	}
 #undef MALLCTL_STR_LEN
 }
@@ -92,9 +98,9 @@ test_combinations(szind_t ind, size_t sizes_array[N_PTRS],
 static void
 test_szind_wrapper(szind_t ind) {
 	size_t sizes_array[N_PTRS];
-	int flags_array[N_PTRS];
+	int    flags_array[N_PTRS];
 	for (size_t i = 0, sz = sz_index2size(ind) - N_PTRS; i < N_PTRS;
-	    ++i, ++sz) {
+	     ++i, ++sz) {
 		sizes_array[i] = sz;
 		flags_array[i] = 0;
 	}
@@ -115,10 +121,10 @@ TEST_END
 static void
 test_szind_aligned_wrapper(szind_t ind, unsigned lg_align) {
 	size_t sizes_array[N_PTRS];
-	int flags_array[N_PTRS];
-	int flags = MALLOCX_LG_ALIGN(lg_align);
+	int    flags_array[N_PTRS];
+	int    flags = MALLOCX_LG_ALIGN(lg_align);
 	for (size_t i = 0, sz = sz_index2size(ind) - N_PTRS; i < N_PTRS;
-	    ++i, ++sz) {
+	     ++i, ++sz) {
 		sizes_array[i] = sz;
 		flags_array[i] = flags;
 	}
@@ -136,7 +142,7 @@ TEST_BEGIN(test_prof_stats_aligned) {
 	}
 	for (szind_t ind = SC_NBINS - 5; ind < SC_NBINS + 5; ++ind) {
 		for (unsigned lg_align = SC_LG_LARGE_MINCLASS - 5;
-		    lg_align < SC_LG_LARGE_MINCLASS + 5; ++lg_align) {
+		     lg_align < SC_LG_LARGE_MINCLASS + 5; ++lg_align) {
 			test_szind_aligned_wrapper(ind, lg_align);
 		}
 	}
@@ -145,7 +151,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_prof_stats,
-	    test_prof_stats_aligned);
+	return test(test_prof_stats, test_prof_stats_aligned);
 }
diff --git a/test/unit/prof_sys_thread_name.c b/test/unit/prof_sys_thread_name.c
index 3aeb8cf1..242e2fc3 100644
--- a/test/unit/prof_sys_thread_name.c
+++ b/test/unit/prof_sys_thread_name.c
@@ -28,7 +28,7 @@ TEST_BEGIN(test_prof_sys_thread_name) {
 	test_skip_if(!config_prof);
 	test_skip_if(!opt_prof_sys_thread_name);
 
-	bool oldval;
+	bool   oldval;
 	size_t sz = sizeof(oldval);
 	assert_d_eq(mallctl("opt.prof_sys_thread_name", &oldval, &sz, NULL, 0),
 	    0, "mallctl failed");
@@ -43,8 +43,8 @@ TEST_BEGIN(test_prof_sys_thread_name) {
 	thread_name = test_thread_name;
 	assert_d_eq(mallctl("thread.prof.name", NULL, NULL, &thread_name, sz),
 	    ENOENT, "mallctl write for thread name should fail");
-	assert_ptr_eq(thread_name, test_thread_name,
-	    "Thread name should not be touched");
+	assert_ptr_eq(
+	    thread_name, test_thread_name, "Thread name should not be touched");
 
 	prof_sys_thread_name_read_t *orig_prof_sys_thread_name_read =
 	    prof_sys_thread_name_read;
@@ -69,14 +69,15 @@ TEST_BEGIN(test_prof_sys_thread_name) {
 	free(p);
 	assert_d_eq(mallctl("thread.prof.name", &thread_name, &sz, NULL, 0), 0,
 	    "mallctl read for thread name should not fail");
-	expect_str_eq(thread_name, "", "Thread name should be updated if the "
+	expect_str_eq(thread_name, "",
+	    "Thread name should be updated if the "
 	    "system call returns a different name");
 
 	prof_sys_thread_name_read = orig_prof_sys_thread_name_read;
 }
 TEST_END
 
-#define ITER (16*1024)
+#define ITER (16 * 1024)
 static void *
 thd_start(void *unused) {
 	/* Triggering samples which loads thread names. */
@@ -94,7 +95,7 @@ TEST_BEGIN(test_prof_sys_thread_name_mt) {
 	test_skip_if(!opt_prof_sys_thread_name);
 
 #define NTHREADS 4
-	thd_t thds[NTHREADS];
+	thd_t    thds[NTHREADS];
 	unsigned thd_args[NTHREADS];
 	unsigned i;
 
@@ -105,8 +106,8 @@ TEST_BEGIN(test_prof_sys_thread_name_mt) {
 	/* Prof dump which reads the thread names. */
 	for (i = 0; i < ITER; i++) {
 		expect_d_eq(mallctl("prof.dump", NULL, NULL,
-		    (void *)&dump_filename, sizeof(dump_filename)), 0,
-		    "Unexpected mallctl failure while dumping");
+		                (void *)&dump_filename, sizeof(dump_filename)),
+		    0, "Unexpected mallctl failure while dumping");
 	}
 
 	for (i = 0; i < NTHREADS; i++) {
@@ -119,7 +120,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_prof_sys_thread_name,
-	    test_prof_sys_thread_name_mt);
+	return test(test_prof_sys_thread_name, test_prof_sys_thread_name_mt);
 }
diff --git a/test/unit/prof_tctx.c b/test/unit/prof_tctx.c
index d19dd395..7fde7230 100644
--- a/test/unit/prof_tctx.c
+++ b/test/unit/prof_tctx.c
@@ -3,11 +3,11 @@
 #include "jemalloc/internal/prof_data.h"
 
 TEST_BEGIN(test_prof_realloc) {
-	tsd_t *tsd;
-	int flags;
-	void *p, *q;
+	tsd_t      *tsd;
+	int         flags;
+	void       *p, *q;
 	prof_info_t prof_info_p, prof_info_q;
-	prof_cnt_t cnt_0, cnt_1, cnt_2, cnt_3;
+	prof_cnt_t  cnt_0, cnt_1, cnt_2, cnt_3;
 
 	test_skip_if(!config_prof);
 
@@ -18,8 +18,8 @@ TEST_BEGIN(test_prof_realloc) {
 	p = mallocx(1024, flags);
 	expect_ptr_not_null(p, "Unexpected mallocx() failure");
 	prof_info_get(tsd, p, NULL, &prof_info_p);
-	expect_ptr_ne(prof_info_p.alloc_tctx, PROF_TCTX_SENTINEL,
-	    "Expected valid tctx");
+	expect_ptr_ne(
+	    prof_info_p.alloc_tctx, PROF_TCTX_SENTINEL, "Expected valid tctx");
 	prof_cnt_all(&cnt_1);
 	expect_u64_eq(cnt_0.curobjs + 1, cnt_1.curobjs,
 	    "Allocation should have increased sample size");
@@ -28,8 +28,8 @@ TEST_BEGIN(test_prof_realloc) {
 	expect_ptr_ne(p, q, "Expected move");
 	expect_ptr_not_null(p, "Unexpected rmallocx() failure");
 	prof_info_get(tsd, q, NULL, &prof_info_q);
-	expect_ptr_ne(prof_info_q.alloc_tctx, PROF_TCTX_SENTINEL,
-	    "Expected valid tctx");
+	expect_ptr_ne(
+	    prof_info_q.alloc_tctx, PROF_TCTX_SENTINEL, "Expected valid tctx");
 	prof_cnt_all(&cnt_2);
 	expect_u64_eq(cnt_1.curobjs, cnt_2.curobjs,
 	    "Reallocation should not have changed sample size");
@@ -43,6 +43,5 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_prof_realloc);
+	return test_no_reentrancy(test_prof_realloc);
 }
diff --git a/test/unit/prof_thread_name.c b/test/unit/prof_thread_name.c
index 0fc29f75..8b12c435 100644
--- a/test/unit/prof_thread_name.c
+++ b/test/unit/prof_thread_name.c
@@ -1,34 +1,34 @@
 #include "test/jemalloc_test.h"
 
 static void
-mallctl_thread_name_get_impl(const char *thread_name_expected, const char *func,
-    int line) {
+mallctl_thread_name_get_impl(
+    const char *thread_name_expected, const char *func, int line) {
 	const char *thread_name_old;
-	size_t sz;
+	size_t      sz;
 
 	sz = sizeof(thread_name_old);
-	expect_d_eq(mallctl("thread.prof.name", (void *)&thread_name_old, &sz,
-	    NULL, 0), 0,
-	    "%s():%d: Unexpected mallctl failure reading thread.prof.name",
+	expect_d_eq(
+	    mallctl("thread.prof.name", (void *)&thread_name_old, &sz, NULL, 0),
+	    0, "%s():%d: Unexpected mallctl failure reading thread.prof.name",
 	    func, line);
 	expect_str_eq(thread_name_old, thread_name_expected,
 	    "%s():%d: Unexpected thread.prof.name value", func, line);
 }
 
 static void
-mallctl_thread_name_set_impl(const char *thread_name, const char *func,
-    int line) {
+mallctl_thread_name_set_impl(
+    const char *thread_name, const char *func, int line) {
 	expect_d_eq(mallctl("thread.prof.name", NULL, NULL,
-	    (void *)&thread_name, sizeof(thread_name)), 0,
-	    "%s():%d: Unexpected mallctl failure writing thread.prof.name",
+	                (void *)&thread_name, sizeof(thread_name)),
+	    0, "%s():%d: Unexpected mallctl failure writing thread.prof.name",
 	    func, line);
 	mallctl_thread_name_get_impl(thread_name, func, line);
 }
 
-#define mallctl_thread_name_get(a)					\
+#define mallctl_thread_name_get(a)                                             \
 	mallctl_thread_name_get_impl(a, __func__, __LINE__)
 
-#define mallctl_thread_name_set(a)					\
+#define mallctl_thread_name_set(a)                                             \
 	mallctl_thread_name_set_impl(a, __func__, __LINE__)
 
 TEST_BEGIN(test_prof_thread_name_validation) {
@@ -44,34 +44,35 @@ TEST_BEGIN(test_prof_thread_name_validation) {
 	char long_name[] =
 	    "test case longer than expected; test case longer than expected";
 	expect_zu_gt(strlen(long_name), PROF_THREAD_NAME_MAX_LEN,
-	   "Long test name not long enough");
+	    "Long test name not long enough");
 	const char *test_name_long = long_name;
 	expect_d_eq(mallctl("thread.prof.name", NULL, NULL,
-	    (void *)&test_name_long, sizeof(test_name_long)), 0,
-	    "Unexpected mallctl failure from thread.prof.name");
+	                (void *)&test_name_long, sizeof(test_name_long)),
+	    0, "Unexpected mallctl failure from thread.prof.name");
 	/* Long name cut to match. */
 	long_name[PROF_THREAD_NAME_MAX_LEN - 1] = '\0';
 	mallctl_thread_name_get(test_name_long);
 
 	/* NULL input shouldn't be allowed. */
 	const char *test_name2 = NULL;
-	expect_d_eq(mallctl("thread.prof.name", NULL, NULL,
-	    (void *)&test_name2, sizeof(test_name2)), EINVAL,
-	    "Unexpected mallctl result writing to thread.prof.name");
+	expect_d_eq(mallctl("thread.prof.name", NULL, NULL, (void *)&test_name2,
+	                sizeof(test_name2)),
+	    EINVAL, "Unexpected mallctl result writing to thread.prof.name");
 
 	/* '\n' shouldn't be allowed. */
 	const char *test_name3 = "test\ncase";
-	expect_d_eq(mallctl("thread.prof.name", NULL, NULL,
-	    (void *)&test_name3, sizeof(test_name3)), EINVAL,
+	expect_d_eq(mallctl("thread.prof.name", NULL, NULL, (void *)&test_name3,
+	                sizeof(test_name3)),
+	    EINVAL,
 	    "Unexpected mallctl result writing \"%s\" to thread.prof.name",
 	    test_name3);
 
 	/* Simultaneous read/write shouldn't be allowed. */
 	const char *thread_name_old;
-	size_t sz = sizeof(thread_name_old);
+	size_t      sz = sizeof(thread_name_old);
 	expect_d_eq(mallctl("thread.prof.name", (void *)&thread_name_old, &sz,
-	    (void *)&test_name1, sizeof(test_name1)), EPERM,
-	    "Unexpected mallctl result from thread.prof.name");
+	                (void *)&test_name1, sizeof(test_name1)),
+	    EPERM, "Unexpected mallctl result from thread.prof.name");
 
 	mallctl_thread_name_set("");
 }
@@ -80,7 +81,7 @@ TEST_END
 static void *
 thd_start(void *varg) {
 	unsigned thd_ind = *(unsigned *)varg;
-	char thread_name[16] = "";
+	char     thread_name[16] = "";
 	unsigned i;
 
 	malloc_snprintf(thread_name, sizeof(thread_name), "thread %u", thd_ind);
@@ -107,7 +108,7 @@ TEST_BEGIN(test_prof_thread_name_threaded) {
 	test_skip_if(opt_prof_sys_thread_name);
 
 #define NTHREADS 4
-	thd_t thds[NTHREADS];
+	thd_t    thds[NTHREADS];
 	unsigned thd_args[NTHREADS];
 	unsigned i;
 
@@ -125,6 +126,5 @@ TEST_END
 int
 main(void) {
 	return test(
-	    test_prof_thread_name_validation,
-	    test_prof_thread_name_threaded);
+	    test_prof_thread_name_validation, test_prof_thread_name_threaded);
 }
diff --git a/test/unit/prof_threshold.c b/test/unit/prof_threshold.c
index c6f53983..a31a5a24 100644
--- a/test/unit/prof_threshold.c
+++ b/test/unit/prof_threshold.c
@@ -23,9 +23,10 @@ static void
 read_write_prof_threshold_hook(prof_threshold_hook_t *to_read, bool do_write,
     prof_threshold_hook_t to_write) {
 	size_t hook_sz = sizeof(prof_threshold_hook_t);
-	expect_d_eq(mallctl("experimental.hooks.prof_threshold",
-	    (void *)to_read, &hook_sz, do_write ? &to_write : NULL, hook_sz), 0,
-	    "Unexpected prof_threshold_hook mallctl failure");
+	expect_d_eq(
+	    mallctl("experimental.hooks.prof_threshold", (void *)to_read,
+	        &hook_sz, do_write ? &to_write : NULL, hook_sz),
+	    0, "Unexpected prof_threshold_hook mallctl failure");
 }
 
 static void
@@ -40,7 +41,8 @@ read_prof_threshold_hook() {
 	return hook;
 }
 
-static void reset_test_config() {
+static void
+reset_test_config() {
 	hook_calls = 0;
 	last_peak = 0;
 	alloc_baseline = last_alloc; /* We run the test multiple times */
@@ -49,15 +51,20 @@ static void reset_test_config() {
 	chunk_size = threshold_bytes / ALLOC_ITERATIONS_IN_THRESHOLD;
 }
 
-static void expect_threshold_calls(int calls) {
-	expect_u64_eq(hook_calls, calls, "Hook called the right amount of times");
-	expect_u64_lt(last_peak, chunk_size * 2, "We allocate chunk_size at a time");
-	expect_u64_ge(last_alloc, threshold_bytes * calls + alloc_baseline, "Crosses");
+static void
+expect_threshold_calls(int calls) {
+	expect_u64_eq(
+	    hook_calls, calls, "Hook called the right amount of times");
+	expect_u64_lt(
+	    last_peak, chunk_size * 2, "We allocate chunk_size at a time");
+	expect_u64_ge(
+	    last_alloc, threshold_bytes * calls + alloc_baseline, "Crosses");
 }
 
-static void allocate_chunks(int chunks) {
+static void
+allocate_chunks(int chunks) {
 	for (int i = 0; i < chunks; i++) {
-		void* p = mallocx((size_t)chunk_size, 0);
+		void *p = mallocx((size_t)chunk_size, 0);
 		expect_ptr_not_null(p, "Failed to allocate");
 		free(p);
 	}
@@ -68,7 +75,8 @@ TEST_BEGIN(test_prof_threshold_hook) {
 
 	/* Test setting and reading the hook (both value and null) */
 	write_prof_threshold_hook(mock_prof_threshold_hook);
-	expect_ptr_eq(read_prof_threshold_hook(), mock_prof_threshold_hook, "Unexpected hook");
+	expect_ptr_eq(read_prof_threshold_hook(), mock_prof_threshold_hook,
+	    "Unexpected hook");
 
 	write_prof_threshold_hook(NULL);
 	expect_ptr_null(read_prof_threshold_hook(), "Hook was erased");
@@ -100,6 +108,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_prof_threshold_hook);
+	return test(test_prof_threshold_hook);
 }
diff --git a/test/unit/psset.c b/test/unit/psset.c
index c834e531..73a9835a 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -21,8 +21,8 @@ test_psset_fake_purge(hpdata_t *ps) {
 	hpdata_alloc_allowed_set(ps, false);
 	size_t nranges;
 	hpdata_purge_begin(ps, &purge_state, &nranges);
-	(void) nranges;
-	void *addr;
+	(void)nranges;
+	void  *addr;
 	size_t size;
 	while (hpdata_purge_next(ps, &purge_state, &addr, &size)) {
 	}
@@ -31,8 +31,8 @@ test_psset_fake_purge(hpdata_t *ps) {
 }
 
 static void
-test_psset_alloc_new(psset_t *psset, hpdata_t *ps, edata_t *r_edata,
-    size_t size) {
+test_psset_alloc_new(
+    psset_t *psset, hpdata_t *ps, edata_t *r_edata, size_t size) {
 	hpdata_assert_empty(ps);
 
 	test_psset_fake_purge(ps);
@@ -40,12 +40,12 @@ test_psset_alloc_new(psset_t *psset, hpdata_t *ps, edata_t *r_edata,
 	psset_insert(psset, ps);
 	psset_update_begin(psset, ps);
 
-        void *addr = hpdata_reserve_alloc(ps, size);
-        edata_init(r_edata, edata_arena_ind_get(r_edata), addr, size,
+	void *addr = hpdata_reserve_alloc(ps, size);
+	edata_init(r_edata, edata_arena_ind_get(r_edata), addr, size,
 	    /* slab */ false, SC_NSIZES, /* sn */ 0, extent_state_active,
-            /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
-            EXTENT_NOT_HEAD);
-        edata_ps_set(r_edata, ps);
+	    /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
+	    EXTENT_NOT_HEAD);
+	edata_ps_set(r_edata, ps);
 	psset_update_end(psset, ps);
 }
 
@@ -104,15 +104,14 @@ edata_expect(edata_t *edata, size_t page_offset, size_t page_cnt) {
 	 * Note that allocations should get the arena ind of their home
 	 * arena, *not* the arena ind of the pageslab allocator.
 	 */
-	expect_u_eq(ALLOC_ARENA_IND, edata_arena_ind_get(edata),
-	    "Arena ind changed");
+	expect_u_eq(
+	    ALLOC_ARENA_IND, edata_arena_ind_get(edata), "Arena ind changed");
 	expect_ptr_eq(
 	    (void *)((uintptr_t)PAGESLAB_ADDR + (page_offset << LG_PAGE)),
 	    edata_addr_get(edata), "Didn't allocate in order");
 	expect_zu_eq(page_cnt << LG_PAGE, edata_size_get(edata), "");
 	expect_false(edata_slab_get(edata), "");
-	expect_u_eq(SC_NSIZES, edata_szind_get_maybe_invalid(edata),
-	    "");
+	expect_u_eq(SC_NSIZES, edata_szind_get_maybe_invalid(edata), "");
 	expect_u64_eq(0, edata_sn_get(edata), "");
 	expect_d_eq(edata_state_get(edata), extent_state_active, "");
 	expect_false(edata_zeroed_get(edata), "");
@@ -123,7 +122,7 @@ edata_expect(edata_t *edata, size_t page_offset, size_t page_cnt) {
 
 TEST_BEGIN(test_empty) {
 	test_skip_if(hpa_hugepage_size_exceeds_limit());
-	bool err;
+	bool     err;
 	hpdata_t pageslab;
 	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
 
@@ -176,7 +175,7 @@ TEST_END
 
 TEST_BEGIN(test_reuse) {
 	test_skip_if(hpa_hugepage_size_exceeds_limit());
-	bool err;
+	bool      err;
 	hpdata_t *ps;
 
 	hpdata_t pageslab;
@@ -196,7 +195,7 @@ TEST_BEGIN(test_reuse) {
 	}
 
 	/* Free odd indices. */
-	for (size_t i = 0; i < HUGEPAGE_PAGES; i ++) {
+	for (size_t i = 0; i < HUGEPAGE_PAGES; i++) {
 		if (i % 2 == 0) {
 			continue;
 		}
@@ -271,7 +270,7 @@ TEST_END
 
 TEST_BEGIN(test_evict) {
 	test_skip_if(hpa_hugepage_size_exceeds_limit());
-	bool err;
+	bool      err;
 	hpdata_t *ps;
 
 	hpdata_t pageslab;
@@ -308,16 +307,15 @@ TEST_END
 
 TEST_BEGIN(test_multi_pageslab) {
 	test_skip_if(hpa_hugepage_size_exceeds_limit());
-	bool err;
+	bool      err;
 	hpdata_t *ps;
 
 	hpdata_t pageslab[2];
 	hpdata_init(&pageslab[0], PAGESLAB_ADDR, PAGESLAB_AGE);
-	hpdata_init(&pageslab[1],
-	    (void *)((uintptr_t)PAGESLAB_ADDR + HUGEPAGE),
+	hpdata_init(&pageslab[1], (void *)((uintptr_t)PAGESLAB_ADDR + HUGEPAGE),
 	    PAGESLAB_AGE + 1);
 
-	edata_t* alloc[2];
+	edata_t *alloc[2];
 	alloc[0] = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 	alloc[1] = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
@@ -334,9 +332,10 @@ TEST_BEGIN(test_multi_pageslab) {
 	for (size_t i = 0; i < 2; i++) {
 		for (size_t j = 1; j < HUGEPAGE_PAGES; j++) {
 			edata_init_test(&alloc[i][j]);
-			err = test_psset_alloc_reuse(&psset, &alloc[i][j], PAGE);
-			expect_false(err,
-			    "Nonempty psset failed page allocation.");
+			err = test_psset_alloc_reuse(
+			    &psset, &alloc[i][j], PAGE);
+			expect_false(
+			    err, "Nonempty psset failed page allocation.");
 			assert_ptr_eq(&pageslab[i], edata_ps_get(&alloc[i][j]),
 			    "Didn't pick pageslabs in first-fit");
 		}
@@ -505,7 +504,8 @@ TEST_BEGIN(test_stats_huge) {
 
 		expect_zu_eq(1, psset.stats.slabs[0].npageslabs, "");
 		expect_zu_eq(i, psset.stats.slabs[0].nactive, "");
-		expect_zu_eq(HUGEPAGE_PAGES - i, psset.stats.slabs[0].ndirty, "");
+		expect_zu_eq(
+		    HUGEPAGE_PAGES - i, psset.stats.slabs[0].ndirty, "");
 
 		expect_zu_eq(0, psset.stats.slabs[1].npageslabs, "");
 		expect_zu_eq(0, psset.stats.slabs[1].nactive, "");
@@ -527,7 +527,8 @@ static void
 stats_expect_empty(psset_bin_stats_t *stats) {
 	assert_zu_eq(0, stats->npageslabs,
 	    "Supposedly empty bin had positive npageslabs");
-	expect_zu_eq(0, stats->nactive, "Unexpected nonempty bin"
+	expect_zu_eq(0, stats->nactive,
+	    "Unexpected nonempty bin"
 	    "Supposedly empty bin had positive nactive");
 }
 
@@ -536,17 +537,16 @@ stats_expect(psset_t *psset, size_t nactive) {
 	if (nactive == HUGEPAGE_PAGES) {
 		expect_zu_eq(1, psset->stats.full_slabs[0].npageslabs,
 		    "Expected a full slab");
-		expect_zu_eq(HUGEPAGE_PAGES,
-		    psset->stats.full_slabs[0].nactive,
+		expect_zu_eq(HUGEPAGE_PAGES, psset->stats.full_slabs[0].nactive,
 		    "Should have exactly filled the bin");
 	} else {
 		stats_expect_empty(&psset->stats.full_slabs[0]);
 	}
-	size_t ninactive = HUGEPAGE_PAGES - nactive;
+	size_t   ninactive = HUGEPAGE_PAGES - nactive;
 	pszind_t nonempty_pind = PSSET_NPSIZES;
 	if (ninactive != 0 && ninactive < HUGEPAGE_PAGES) {
-		nonempty_pind = sz_psz2ind(sz_psz_quantize_floor(
-		    ninactive << LG_PAGE));
+		nonempty_pind = sz_psz2ind(
+		    sz_psz_quantize_floor(ninactive << LG_PAGE));
 	}
 	for (pszind_t i = 0; i < PSSET_NPSIZES; i++) {
 		if (i == nonempty_pind) {
@@ -657,24 +657,25 @@ init_test_pageslabs(psset_t *psset, hpdata_t *pageslab,
 	}
 
 	/* Deallocate the last page from the older pageslab. */
-	hpdata_t *evicted = test_psset_dalloc(psset,
-	    &alloc[HUGEPAGE_PAGES - 1]);
+	hpdata_t *evicted = test_psset_dalloc(
+	    psset, &alloc[HUGEPAGE_PAGES - 1]);
 	expect_ptr_null(evicted, "Unexpected eviction");
 }
 
 TEST_BEGIN(test_oldest_fit) {
 	test_skip_if(hpa_hugepage_size_exceeds_limit());
-	bool err;
+	bool     err;
 	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
-	edata_t *worse_alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
+	edata_t *worse_alloc = (edata_t *)malloc(
+	    sizeof(edata_t) * HUGEPAGE_PAGES);
 
 	hpdata_t pageslab;
 	hpdata_t worse_pageslab;
 
 	psset_t psset;
 
-	init_test_pageslabs(&psset, &pageslab, &worse_pageslab, alloc,
-	    worse_alloc);
+	init_test_pageslabs(
+	    &psset, &pageslab, &worse_pageslab, alloc, worse_alloc);
 
 	/* The edata should come from the better pageslab. */
 	edata_t test_edata;
@@ -691,23 +692,24 @@ TEST_END
 
 TEST_BEGIN(test_insert_remove) {
 	test_skip_if(hpa_hugepage_size_exceeds_limit());
-	bool err;
+	bool      err;
 	hpdata_t *ps;
-	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
-	edata_t *worse_alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
+	edata_t  *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
+	edata_t  *worse_alloc = (edata_t *)malloc(
+            sizeof(edata_t) * HUGEPAGE_PAGES);
 
 	hpdata_t pageslab;
 	hpdata_t worse_pageslab;
 
 	psset_t psset;
 
-	init_test_pageslabs(&psset, &pageslab, &worse_pageslab, alloc,
-	    worse_alloc);
+	init_test_pageslabs(
+	    &psset, &pageslab, &worse_pageslab, alloc, worse_alloc);
 
 	/* Remove better; should still be able to alloc from worse. */
 	psset_update_begin(&psset, &pageslab);
-	err = test_psset_alloc_reuse(&psset, &worse_alloc[HUGEPAGE_PAGES - 1],
-	    PAGE);
+	err = test_psset_alloc_reuse(
+	    &psset, &worse_alloc[HUGEPAGE_PAGES - 1], PAGE);
 	expect_false(err, "Removal should still leave an empty page");
 	expect_ptr_eq(&worse_pageslab,
 	    edata_ps_get(&worse_alloc[HUGEPAGE_PAGES - 1]),
@@ -755,23 +757,21 @@ TEST_BEGIN(test_purge_prefers_nonhuge) {
 	psset_t psset;
 	psset_init(&psset);
 
-	hpdata_t hpdata_huge[NHP];
+	hpdata_t  hpdata_huge[NHP];
 	uintptr_t huge_begin = (uintptr_t)&hpdata_huge[0];
 	uintptr_t huge_end = (uintptr_t)&hpdata_huge[NHP];
-	hpdata_t hpdata_nonhuge[NHP];
+	hpdata_t  hpdata_nonhuge[NHP];
 	uintptr_t nonhuge_begin = (uintptr_t)&hpdata_nonhuge[0];
 	uintptr_t nonhuge_end = (uintptr_t)&hpdata_nonhuge[NHP];
 
 	for (size_t i = 0; i < NHP; i++) {
-		hpdata_init(&hpdata_huge[i], (void *)((10 + i) * HUGEPAGE),
-		    123 + i);
+		hpdata_init(
+		    &hpdata_huge[i], (void *)((10 + i) * HUGEPAGE), 123 + i);
 		psset_insert(&psset, &hpdata_huge[i]);
 
 		hpdata_init(&hpdata_nonhuge[i],
-		    (void *)((10 + NHP + i) * HUGEPAGE),
-		    456 + i);
+		    (void *)((10 + NHP + i) * HUGEPAGE), 456 + i);
 		psset_insert(&psset, &hpdata_nonhuge[i]);
-
 	}
 	for (int i = 0; i < 2 * NHP; i++) {
 		hpdata = psset_pick_alloc(&psset, HUGEPAGE * 3 / 4);
@@ -804,7 +804,8 @@ TEST_BEGIN(test_purge_prefers_nonhuge) {
 	for (int i = 0; i < NHP; i++) {
 		hpdata = psset_pick_purge(&psset);
 		assert_true(nonhuge_begin <= (uintptr_t)hpdata
-		    && (uintptr_t)hpdata < nonhuge_end, "");
+		        && (uintptr_t)hpdata < nonhuge_end,
+		    "");
 		psset_update_begin(&psset, hpdata);
 		test_psset_fake_purge(hpdata);
 		hpdata_purge_allowed_set(hpdata, false);
@@ -813,7 +814,8 @@ TEST_BEGIN(test_purge_prefers_nonhuge) {
 	for (int i = 0; i < NHP; i++) {
 		hpdata = psset_pick_purge(&psset);
 		expect_true(huge_begin <= (uintptr_t)hpdata
-		    && (uintptr_t)hpdata < huge_end, "");
+		        && (uintptr_t)hpdata < huge_end,
+		    "");
 		psset_update_begin(&psset, hpdata);
 		hpdata_dehugify(hpdata);
 		test_psset_fake_purge(hpdata);
@@ -867,13 +869,13 @@ TEST_BEGIN(test_purge_prefers_empty_huge) {
 	psset_t psset;
 	psset_init(&psset);
 
-	enum {NHP = 10 };
+	enum { NHP = 10 };
 
 	hpdata_t hpdata_huge[NHP];
 	hpdata_t hpdata_nonhuge[NHP];
 
 	uintptr_t cur_addr = 100 * HUGEPAGE;
-	uint64_t cur_age = 123;
+	uint64_t  cur_age = 123;
 	for (int i = 0; i < NHP; i++) {
 		hpdata_init(&hpdata_huge[i], (void *)cur_addr, cur_age);
 		cur_addr += HUGEPAGE;
@@ -933,18 +935,9 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_empty,
-	    test_fill,
-	    test_reuse,
-	    test_evict,
-	    test_multi_pageslab,
-	    test_stats_merged,
-	    test_stats_huge,
-	    test_stats_fullness,
-	    test_oldest_fit,
-	    test_insert_remove,
-	    test_purge_prefers_nonhuge,
-	    test_purge_prefers_empty,
+	return test_no_reentrancy(test_empty, test_fill, test_reuse, test_evict,
+	    test_multi_pageslab, test_stats_merged, test_stats_huge,
+	    test_stats_fullness, test_oldest_fit, test_insert_remove,
+	    test_purge_prefers_nonhuge, test_purge_prefers_empty,
 	    test_purge_prefers_empty_huge);
 }
diff --git a/test/unit/ql.c b/test/unit/ql.c
index f9130582..ff3b436e 100644
--- a/test/unit/ql.c
+++ b/test/unit/ql.c
@@ -15,16 +15,16 @@ struct list_s {
 
 static void
 test_empty_list(list_head_t *head) {
-	list_t *t;
+	list_t  *t;
 	unsigned i;
 
 	expect_true(ql_empty(head), "Unexpected element for empty list");
 	expect_ptr_null(ql_first(head), "Unexpected element for empty list");
-	expect_ptr_null(ql_last(head, link),
-	    "Unexpected element for empty list");
+	expect_ptr_null(
+	    ql_last(head, link), "Unexpected element for empty list");
 
 	i = 0;
-	ql_foreach(t, head, link) {
+	ql_foreach (t, head, link) {
 		i++;
 	}
 	expect_u_eq(i, 0, "Unexpected element for empty list");
@@ -56,48 +56,48 @@ init_entries(list_t *entries, unsigned nentries) {
 
 static void
 test_entries_list(list_head_t *head, list_t *entries, unsigned nentries) {
-	list_t *t;
+	list_t  *t;
 	unsigned i;
 
 	expect_false(ql_empty(head), "List should not be empty");
 	expect_c_eq(ql_first(head)->id, entries[0].id, "Element id mismatch");
-	expect_c_eq(ql_last(head, link)->id, entries[nentries-1].id,
+	expect_c_eq(ql_last(head, link)->id, entries[nentries - 1].id,
 	    "Element id mismatch");
 
 	i = 0;
-	ql_foreach(t, head, link) {
+	ql_foreach (t, head, link) {
 		expect_c_eq(t->id, entries[i].id, "Element id mismatch");
 		i++;
 	}
 
 	i = 0;
 	ql_reverse_foreach(t, head, link) {
-		expect_c_eq(t->id, entries[nentries-i-1].id,
-		    "Element id mismatch");
+		expect_c_eq(
+		    t->id, entries[nentries - i - 1].id, "Element id mismatch");
 		i++;
 	}
 
-	for (i = 0; i < nentries-1; i++) {
+	for (i = 0; i < nentries - 1; i++) {
 		t = ql_next(head, &entries[i], link);
-		expect_c_eq(t->id, entries[i+1].id, "Element id mismatch");
+		expect_c_eq(t->id, entries[i + 1].id, "Element id mismatch");
 	}
-	expect_ptr_null(ql_next(head, &entries[nentries-1], link),
-	    "Unexpected element");
+	expect_ptr_null(
+	    ql_next(head, &entries[nentries - 1], link), "Unexpected element");
 
 	expect_ptr_null(ql_prev(head, &entries[0], link), "Unexpected element");
 	for (i = 1; i < nentries; i++) {
 		t = ql_prev(head, &entries[i], link);
-		expect_c_eq(t->id, entries[i-1].id, "Element id mismatch");
+		expect_c_eq(t->id, entries[i - 1].id, "Element id mismatch");
 	}
 }
 
 TEST_BEGIN(test_ql_tail_insert) {
 	list_head_t head;
-	list_t entries[NENTRIES];
-	unsigned i;
+	list_t      entries[NENTRIES];
+	unsigned    i;
 
 	ql_new(&head);
-	init_entries(entries, sizeof(entries)/sizeof(list_t));
+	init_entries(entries, sizeof(entries) / sizeof(list_t));
 	for (i = 0; i < NENTRIES; i++) {
 		ql_tail_insert(&head, &entries[i], link);
 	}
@@ -108,17 +108,17 @@ TEST_END
 
 TEST_BEGIN(test_ql_tail_remove) {
 	list_head_t head;
-	list_t entries[NENTRIES];
-	unsigned i;
+	list_t      entries[NENTRIES];
+	unsigned    i;
 
 	ql_new(&head);
-	init_entries(entries, sizeof(entries)/sizeof(list_t));
+	init_entries(entries, sizeof(entries) / sizeof(list_t));
 	for (i = 0; i < NENTRIES; i++) {
 		ql_tail_insert(&head, &entries[i], link);
 	}
 
 	for (i = 0; i < NENTRIES; i++) {
-		test_entries_list(&head, entries, NENTRIES-i);
+		test_entries_list(&head, entries, NENTRIES - i);
 		ql_tail_remove(&head, list_t, link);
 	}
 	test_empty_list(&head);
@@ -127,13 +127,13 @@ TEST_END
 
 TEST_BEGIN(test_ql_head_insert) {
 	list_head_t head;
-	list_t entries[NENTRIES];
-	unsigned i;
+	list_t      entries[NENTRIES];
+	unsigned    i;
 
 	ql_new(&head);
-	init_entries(entries, sizeof(entries)/sizeof(list_t));
+	init_entries(entries, sizeof(entries) / sizeof(list_t));
 	for (i = 0; i < NENTRIES; i++) {
-		ql_head_insert(&head, &entries[NENTRIES-i-1], link);
+		ql_head_insert(&head, &entries[NENTRIES - i - 1], link);
 	}
 
 	test_entries_list(&head, entries, NENTRIES);
@@ -142,17 +142,17 @@ TEST_END
 
 TEST_BEGIN(test_ql_head_remove) {
 	list_head_t head;
-	list_t entries[NENTRIES];
-	unsigned i;
+	list_t      entries[NENTRIES];
+	unsigned    i;
 
 	ql_new(&head);
-	init_entries(entries, sizeof(entries)/sizeof(list_t));
+	init_entries(entries, sizeof(entries) / sizeof(list_t));
 	for (i = 0; i < NENTRIES; i++) {
-		ql_head_insert(&head, &entries[NENTRIES-i-1], link);
+		ql_head_insert(&head, &entries[NENTRIES - i - 1], link);
 	}
 
 	for (i = 0; i < NENTRIES; i++) {
-		test_entries_list(&head, &entries[i], NENTRIES-i);
+		test_entries_list(&head, &entries[i], NENTRIES - i);
 		ql_head_remove(&head, list_t, link);
 	}
 	test_empty_list(&head);
@@ -161,11 +161,11 @@ TEST_END
 
 TEST_BEGIN(test_ql_insert) {
 	list_head_t head;
-	list_t entries[8];
-	list_t *a, *b, *c, *d, *e, *f, *g, *h;
+	list_t      entries[8];
+	list_t     *a, *b, *c, *d, *e, *f, *g, *h;
 
 	ql_new(&head);
-	init_entries(entries, sizeof(entries)/sizeof(list_t));
+	init_entries(entries, sizeof(entries) / sizeof(list_t));
 	a = &entries[0];
 	b = &entries[1];
 	c = &entries[2];
@@ -190,13 +190,13 @@ TEST_BEGIN(test_ql_insert) {
 	ql_after_insert(c, d, link);
 	ql_before_insert(&head, f, e, link);
 
-	test_entries_list(&head, entries, sizeof(entries)/sizeof(list_t));
+	test_entries_list(&head, entries, sizeof(entries) / sizeof(list_t));
 }
 TEST_END
 
 static void
-test_concat_split_entries(list_t *entries, unsigned nentries_a,
-    unsigned nentries_b) {
+test_concat_split_entries(
+    list_t *entries, unsigned nentries_a, unsigned nentries_b) {
 	init_entries(entries, nentries_a + nentries_b);
 
 	list_head_t head_a;
@@ -253,8 +253,8 @@ TEST_BEGIN(test_ql_concat_split) {
 
 	test_concat_split_entries(entries, 0, NENTRIES);
 	test_concat_split_entries(entries, 1, NENTRIES - 1);
-	test_concat_split_entries(entries, NENTRIES / 2,
-	    NENTRIES - NENTRIES / 2);
+	test_concat_split_entries(
+	    entries, NENTRIES / 2, NENTRIES - NENTRIES / 2);
 	test_concat_split_entries(entries, NENTRIES - 1, 1);
 	test_concat_split_entries(entries, NENTRIES, 0);
 }
@@ -262,11 +262,11 @@ TEST_END
 
 TEST_BEGIN(test_ql_rotate) {
 	list_head_t head;
-	list_t entries[NENTRIES];
-	unsigned i;
+	list_t      entries[NENTRIES];
+	unsigned    i;
 
 	ql_new(&head);
-	init_entries(entries, sizeof(entries)/sizeof(list_t));
+	init_entries(entries, sizeof(entries) / sizeof(list_t));
 	for (i = 0; i < NENTRIES; i++) {
 		ql_tail_insert(&head, &entries[i], link);
 	}
@@ -284,15 +284,15 @@ TEST_END
 
 TEST_BEGIN(test_ql_move) {
 	list_head_t head_dest, head_src;
-	list_t entries[NENTRIES];
-	unsigned i;
+	list_t      entries[NENTRIES];
+	unsigned    i;
 
 	ql_new(&head_src);
 	ql_move(&head_dest, &head_src);
 	test_empty_list(&head_src);
 	test_empty_list(&head_dest);
 
-	init_entries(entries, sizeof(entries)/sizeof(list_t));
+	init_entries(entries, sizeof(entries) / sizeof(list_t));
 	for (i = 0; i < NENTRIES; i++) {
 		ql_tail_insert(&head_src, &entries[i], link);
 	}
@@ -304,14 +304,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_ql_empty,
-	    test_ql_tail_insert,
-	    test_ql_tail_remove,
-	    test_ql_head_insert,
-	    test_ql_head_remove,
-	    test_ql_insert,
-	    test_ql_concat_split,
-	    test_ql_rotate,
-	    test_ql_move);
+	return test(test_ql_empty, test_ql_tail_insert, test_ql_tail_remove,
+	    test_ql_head_insert, test_ql_head_remove, test_ql_insert,
+	    test_ql_concat_split, test_ql_rotate, test_ql_move);
 }
diff --git a/test/unit/qr.c b/test/unit/qr.c
index 16eed0e9..3d8b164b 100644
--- a/test/unit/qr.c
+++ b/test/unit/qr.c
@@ -26,12 +26,12 @@ init_entries(ring_t *entries) {
 
 static void
 test_independent_entries(ring_t *entries) {
-	ring_t *t;
+	ring_t  *t;
 	unsigned i, j;
 
 	for (i = 0; i < NENTRIES; i++) {
 		j = 0;
-		qr_foreach(t, &entries[i], link) {
+		qr_foreach (t, &entries[i], link) {
 			j++;
 		}
 		expect_u_eq(j, 1,
@@ -71,13 +71,13 @@ TEST_END
 
 static void
 test_entries_ring(ring_t *entries) {
-	ring_t *t;
+	ring_t  *t;
 	unsigned i, j;
 
 	for (i = 0; i < NENTRIES; i++) {
 		j = 0;
-		qr_foreach(t, &entries[i], link) {
-			expect_c_eq(t->id, entries[(i+j) % NENTRIES].id,
+		qr_foreach (t, &entries[i], link) {
+			expect_c_eq(t->id, entries[(i + j) % NENTRIES].id,
 			    "Element id mismatch");
 			j++;
 		}
@@ -85,25 +85,26 @@ test_entries_ring(ring_t *entries) {
 	for (i = 0; i < NENTRIES; i++) {
 		j = 0;
 		qr_reverse_foreach(t, &entries[i], link) {
-			expect_c_eq(t->id, entries[(NENTRIES+i-j-1) %
-			    NENTRIES].id, "Element id mismatch");
+			expect_c_eq(t->id,
+			    entries[(NENTRIES + i - j - 1) % NENTRIES].id,
+			    "Element id mismatch");
 			j++;
 		}
 	}
 	for (i = 0; i < NENTRIES; i++) {
 		t = qr_next(&entries[i], link);
-		expect_c_eq(t->id, entries[(i+1) % NENTRIES].id,
+		expect_c_eq(t->id, entries[(i + 1) % NENTRIES].id,
 		    "Element id mismatch");
 	}
 	for (i = 0; i < NENTRIES; i++) {
 		t = qr_prev(&entries[i], link);
-		expect_c_eq(t->id, entries[(NENTRIES+i-1) % NENTRIES].id,
+		expect_c_eq(t->id, entries[(NENTRIES + i - 1) % NENTRIES].id,
 		    "Element id mismatch");
 	}
 }
 
 TEST_BEGIN(test_qr_after_insert) {
-	ring_t entries[NENTRIES];
+	ring_t   entries[NENTRIES];
 	unsigned i;
 
 	init_entries(entries);
@@ -115,8 +116,8 @@ TEST_BEGIN(test_qr_after_insert) {
 TEST_END
 
 TEST_BEGIN(test_qr_remove) {
-	ring_t entries[NENTRIES];
-	ring_t *t;
+	ring_t   entries[NENTRIES];
+	ring_t  *t;
 	unsigned i, j;
 
 	init_entries(entries);
@@ -126,15 +127,15 @@ TEST_BEGIN(test_qr_remove) {
 
 	for (i = 0; i < NENTRIES; i++) {
 		j = 0;
-		qr_foreach(t, &entries[i], link) {
-			expect_c_eq(t->id, entries[i+j].id,
-			    "Element id mismatch");
+		qr_foreach (t, &entries[i], link) {
+			expect_c_eq(
+			    t->id, entries[i + j].id, "Element id mismatch");
 			j++;
 		}
 		j = 0;
 		qr_reverse_foreach(t, &entries[i], link) {
 			expect_c_eq(t->id, entries[NENTRIES - 1 - j].id,
-			"Element id mismatch");
+			    "Element id mismatch");
 			j++;
 		}
 		qr_remove(&entries[i], link);
@@ -144,8 +145,8 @@ TEST_BEGIN(test_qr_remove) {
 TEST_END
 
 TEST_BEGIN(test_qr_before_insert) {
-	ring_t entries[NENTRIES];
-	ring_t *t;
+	ring_t   entries[NENTRIES];
+	ring_t  *t;
 	unsigned i, j;
 
 	init_entries(entries);
@@ -154,28 +155,29 @@ TEST_BEGIN(test_qr_before_insert) {
 	}
 	for (i = 0; i < NENTRIES; i++) {
 		j = 0;
-		qr_foreach(t, &entries[i], link) {
-			expect_c_eq(t->id, entries[(NENTRIES+i-j) %
-			    NENTRIES].id, "Element id mismatch");
+		qr_foreach (t, &entries[i], link) {
+			expect_c_eq(t->id,
+			    entries[(NENTRIES + i - j) % NENTRIES].id,
+			    "Element id mismatch");
 			j++;
 		}
 	}
 	for (i = 0; i < NENTRIES; i++) {
 		j = 0;
 		qr_reverse_foreach(t, &entries[i], link) {
-			expect_c_eq(t->id, entries[(i+j+1) % NENTRIES].id,
+			expect_c_eq(t->id, entries[(i + j + 1) % NENTRIES].id,
 			    "Element id mismatch");
 			j++;
 		}
 	}
 	for (i = 0; i < NENTRIES; i++) {
 		t = qr_next(&entries[i], link);
-		expect_c_eq(t->id, entries[(NENTRIES+i-1) % NENTRIES].id,
+		expect_c_eq(t->id, entries[(NENTRIES + i - 1) % NENTRIES].id,
 		    "Element id mismatch");
 	}
 	for (i = 0; i < NENTRIES; i++) {
 		t = qr_prev(&entries[i], link);
-		expect_c_eq(t->id, entries[(i+1) % NENTRIES].id,
+		expect_c_eq(t->id, entries[(i + 1) % NENTRIES].id,
 		    "Element id mismatch");
 	}
 }
@@ -183,19 +185,22 @@ TEST_END
 
 static void
 test_split_entries(ring_t *entries) {
-	ring_t *t;
+	ring_t  *t;
 	unsigned i, j;
 
 	for (i = 0; i < NENTRIES; i++) {
 		j = 0;
-		qr_foreach(t, &entries[i], link) {
+		qr_foreach (t, &entries[i], link) {
 			if (i < SPLIT_INDEX) {
 				expect_c_eq(t->id,
-				    entries[(i+j) % SPLIT_INDEX].id,
+				    entries[(i + j) % SPLIT_INDEX].id,
 				    "Element id mismatch");
 			} else {
-				expect_c_eq(t->id, entries[(i+j-SPLIT_INDEX) %
-				    (NENTRIES-SPLIT_INDEX) + SPLIT_INDEX].id,
+				expect_c_eq(t->id,
+				    entries[(i + j - SPLIT_INDEX)
+				            % (NENTRIES - SPLIT_INDEX)
+				        + SPLIT_INDEX]
+				        .id,
 				    "Element id mismatch");
 			}
 			j++;
@@ -204,7 +209,7 @@ test_split_entries(ring_t *entries) {
 }
 
 TEST_BEGIN(test_qr_meld_split) {
-	ring_t entries[NENTRIES];
+	ring_t   entries[NENTRIES];
 	unsigned i;
 
 	init_entries(entries);
@@ -234,10 +239,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_qr_one,
-	    test_qr_after_insert,
-	    test_qr_remove,
-	    test_qr_before_insert,
-	    test_qr_meld_split);
+	return test(test_qr_one, test_qr_after_insert, test_qr_remove,
+	    test_qr_before_insert, test_qr_meld_split);
 }
diff --git a/test/unit/rb.c b/test/unit/rb.c
index 827ec510..790593e3 100644
--- a/test/unit/rb.c
+++ b/test/unit/rb.c
@@ -4,16 +4,17 @@
 
 #include "jemalloc/internal/rb.h"
 
-#define rbtn_black_height(a_type, a_field, a_rbt, r_height) do {	\
-	a_type *rbp_bh_t;						\
-	for (rbp_bh_t = (a_rbt)->rbt_root, (r_height) = 0; rbp_bh_t !=	\
-	    NULL; rbp_bh_t = rbtn_left_get(a_type, a_field,		\
-	    rbp_bh_t)) {						\
-		if (!rbtn_red_get(a_type, a_field, rbp_bh_t)) {		\
-		(r_height)++;						\
-		}							\
-	}								\
-} while (0)
+#define rbtn_black_height(a_type, a_field, a_rbt, r_height)                    \
+	do {                                                                   \
+		a_type *rbp_bh_t;                                              \
+		for (rbp_bh_t = (a_rbt)->rbt_root, (r_height) = 0;             \
+		     rbp_bh_t != NULL;                                         \
+		     rbp_bh_t = rbtn_left_get(a_type, a_field, rbp_bh_t)) {    \
+			if (!rbtn_red_get(a_type, a_field, rbp_bh_t)) {        \
+				(r_height)++;                                  \
+			}                                                      \
+		}                                                              \
+	} while (0)
 
 static bool summarize_always_returns_true = false;
 
@@ -55,7 +56,7 @@ struct node_s {
 	 */
 	const node_t *summary_lchild;
 	const node_t *summary_rchild;
-	uint64_t summary_max_specialness;
+	uint64_t      summary_max_specialness;
 };
 
 static int
@@ -80,8 +81,8 @@ node_cmp(const node_t *a, const node_t *b) {
 }
 
 static uint64_t
-node_subtree_specialness(node_t *n, const node_t *lchild,
-    const node_t *rchild) {
+node_subtree_specialness(
+    node_t *n, const node_t *lchild, const node_t *rchild) {
 	uint64_t subtree_specialness = n->specialness;
 	if (lchild != NULL
 	    && lchild->summary_max_specialness > subtree_specialness) {
@@ -109,8 +110,8 @@ node_summarize(node_t *a, const node_t *lchild, const node_t *rchild) {
 
 typedef rb_tree(node_t) tree_t;
 rb_summarized_proto(static, tree_, tree_t, node_t);
-rb_summarized_gen(static, tree_, tree_t, node_t, link, node_cmp,
-    node_summarize);
+rb_summarized_gen(
+    static, tree_, tree_t, node_t, link, node_cmp, node_summarize);
 
 static bool
 specialness_filter_node(void *ctx, node_t *node) {
@@ -127,24 +128,24 @@ specialness_filter_subtree(void *ctx, node_t *node) {
 static node_t *
 tree_iterate_cb(tree_t *tree, node_t *node, void *data) {
 	unsigned *i = (unsigned *)data;
-	node_t *search_node;
+	node_t   *search_node;
 
 	expect_u32_eq(node->magic, NODE_MAGIC, "Bad magic");
 
 	/* Test rb_search(). */
 	search_node = tree_search(tree, node);
-	expect_ptr_eq(search_node, node,
-	    "tree_search() returned unexpected node");
+	expect_ptr_eq(
+	    search_node, node, "tree_search() returned unexpected node");
 
 	/* Test rb_nsearch(). */
 	search_node = tree_nsearch(tree, node);
-	expect_ptr_eq(search_node, node,
-	    "tree_nsearch() returned unexpected node");
+	expect_ptr_eq(
+	    search_node, node, "tree_nsearch() returned unexpected node");
 
 	/* Test rb_psearch(). */
 	search_node = tree_psearch(tree, node);
-	expect_ptr_eq(search_node, node,
-	    "tree_psearch() returned unexpected node");
+	expect_ptr_eq(
+	    search_node, node, "tree_psearch() returned unexpected node");
 
 	(*i)++;
 
@@ -174,38 +175,44 @@ TEST_BEGIN(test_rb_empty) {
 	expect_ptr_null(tree_psearch(&tree, &key), "Unexpected node");
 
 	unsigned nodes = 0;
-	tree_iter_filtered(&tree, NULL, &tree_iterate_cb,
-	    &nodes, &specialness_filter_node, &specialness_filter_subtree,
-	    NULL);
+	tree_iter_filtered(&tree, NULL, &tree_iterate_cb, &nodes,
+	    &specialness_filter_node, &specialness_filter_subtree, NULL);
 	expect_u_eq(0, nodes, "");
 
 	nodes = 0;
-	tree_reverse_iter_filtered(&tree, NULL, &tree_iterate_cb,
-	    &nodes, &specialness_filter_node, &specialness_filter_subtree,
-	    NULL);
+	tree_reverse_iter_filtered(&tree, NULL, &tree_iterate_cb, &nodes,
+	    &specialness_filter_node, &specialness_filter_subtree, NULL);
 	expect_u_eq(0, nodes, "");
 
 	expect_ptr_null(tree_first_filtered(&tree, &specialness_filter_node,
-	    &specialness_filter_subtree, NULL), "");
+	                    &specialness_filter_subtree, NULL),
+	    "");
 	expect_ptr_null(tree_last_filtered(&tree, &specialness_filter_node,
-	    &specialness_filter_subtree, NULL), "");
+	                    &specialness_filter_subtree, NULL),
+	    "");
 
 	key.key = 0;
 	key.magic = NODE_MAGIC;
-	expect_ptr_null(tree_search_filtered(&tree, &key,
-	    &specialness_filter_node, &specialness_filter_subtree, NULL), "");
-	expect_ptr_null(tree_nsearch_filtered(&tree, &key,
-	    &specialness_filter_node, &specialness_filter_subtree, NULL), "");
-	expect_ptr_null(tree_psearch_filtered(&tree, &key,
-	    &specialness_filter_node, &specialness_filter_subtree, NULL), "");
+	expect_ptr_null(
+	    tree_search_filtered(&tree, &key, &specialness_filter_node,
+	        &specialness_filter_subtree, NULL),
+	    "");
+	expect_ptr_null(
+	    tree_nsearch_filtered(&tree, &key, &specialness_filter_node,
+	        &specialness_filter_subtree, NULL),
+	    "");
+	expect_ptr_null(
+	    tree_psearch_filtered(&tree, &key, &specialness_filter_node,
+	        &specialness_filter_subtree, NULL),
+	    "");
 }
 TEST_END
 
 static unsigned
 tree_recurse(node_t *node, unsigned black_height, unsigned black_depth) {
 	unsigned ret = 0;
-	node_t *left_node;
-	node_t *right_node;
+	node_t  *left_node;
+	node_t  *right_node;
 
 	if (node == NULL) {
 		return ret;
@@ -214,13 +221,13 @@ tree_recurse(node_t *node, unsigned black_height, unsigned black_depth) {
 	left_node = rbtn_left_get(node_t, link, node);
 	right_node = rbtn_right_get(node_t, link, node);
 
-	expect_ptr_eq(left_node, node->summary_lchild,
-	    "summary missed a tree update");
-	expect_ptr_eq(right_node, node->summary_rchild,
-	    "summary missed a tree update");
+	expect_ptr_eq(
+	    left_node, node->summary_lchild, "summary missed a tree update");
+	expect_ptr_eq(
+	    right_node, node->summary_rchild, "summary missed a tree update");
 
-	uint64_t expected_subtree_specialness = node_subtree_specialness(node,
-	    left_node, right_node);
+	uint64_t expected_subtree_specialness = node_subtree_specialness(
+	    node, left_node, right_node);
 	expect_u64_eq(expected_subtree_specialness,
 	    node->summary_max_specialness, "Incorrect summary");
 
@@ -232,7 +239,7 @@ tree_recurse(node_t *node, unsigned black_height, unsigned black_depth) {
 	if (rbtn_red_get(node_t, link, node)) {
 		if (left_node != NULL) {
 			expect_false(rbtn_red_get(node_t, link, left_node),
-				"Node should be black");
+			    "Node should be black");
 		}
 		if (right_node != NULL) {
 			expect_false(rbtn_red_get(node_t, link, right_node),
@@ -282,7 +289,7 @@ tree_iterate_reverse(tree_t *tree) {
 
 static void
 node_remove(tree_t *tree, node_t *node, unsigned nnodes) {
-	node_t *search_node;
+	node_t  *search_node;
 	unsigned black_height, imbalances;
 
 	tree_remove(tree, node);
@@ -290,15 +297,15 @@ node_remove(tree_t *tree, node_t *node, unsigned nnodes) {
 	/* Test rb_nsearch(). */
 	search_node = tree_nsearch(tree, node);
 	if (search_node != NULL) {
-		expect_u64_ge(search_node->key, node->key,
-		    "Key ordering error");
+		expect_u64_ge(
+		    search_node->key, node->key, "Key ordering error");
 	}
 
 	/* Test rb_psearch(). */
 	search_node = tree_psearch(tree, node);
 	if (search_node != NULL) {
-		expect_u64_le(search_node->key, node->key,
-		    "Key ordering error");
+		expect_u64_le(
+		    search_node->key, node->key, "Key ordering error");
 	}
 
 	node->magic = 0;
@@ -306,16 +313,16 @@ node_remove(tree_t *tree, node_t *node, unsigned nnodes) {
 	rbtn_black_height(node_t, link, tree, black_height);
 	imbalances = tree_recurse(tree->rbt_root, black_height, 0);
 	expect_u_eq(imbalances, 0, "Tree is unbalanced");
-	expect_u_eq(tree_iterate(tree), nnodes-1,
-	    "Unexpected node iteration count");
-	expect_u_eq(tree_iterate_reverse(tree), nnodes-1,
+	expect_u_eq(
+	    tree_iterate(tree), nnodes - 1, "Unexpected node iteration count");
+	expect_u_eq(tree_iterate_reverse(tree), nnodes - 1,
 	    "Unexpected node iteration count");
 }
 
 static node_t *
 remove_iterate_cb(tree_t *tree, node_t *node, void *data) {
 	unsigned *nnodes = (unsigned *)data;
-	node_t *ret = tree_next(tree, node);
+	node_t   *ret = tree_next(tree, node);
 
 	node_remove(tree, node, *nnodes);
 
@@ -325,7 +332,7 @@ remove_iterate_cb(tree_t *tree, node_t *node, void *data) {
 static node_t *
 remove_reverse_iterate_cb(tree_t *tree, node_t *node, void *data) {
 	unsigned *nnodes = (unsigned *)data;
-	node_t *ret = tree_prev(tree, node);
+	node_t   *ret = tree_prev(tree, node);
 
 	node_remove(tree, node, *nnodes);
 
@@ -341,15 +348,11 @@ destroy_cb(node_t *node, void *data) {
 }
 
 TEST_BEGIN(test_rb_random) {
-	enum {
-		NNODES = 25,
-		NBAGS = 500,
-		SEED = 42
-	};
-	sfmt_t *sfmt;
+	enum { NNODES = 25, NBAGS = 500, SEED = 42 };
+	sfmt_t  *sfmt;
 	uint64_t bag[NNODES];
-	tree_t tree;
-	node_t nodes[NNODES];
+	tree_t   tree;
+	node_t   nodes[NNODES];
 	unsigned i, j, k, black_height, imbalances;
 
 	sfmt = init_gen_rand(SEED);
@@ -386,8 +389,8 @@ TEST_BEGIN(test_rb_random) {
 			for (k = 0; k < j; k++) {
 				nodes[k].magic = NODE_MAGIC;
 				nodes[k].key = bag[k];
-				nodes[k].specialness = gen_rand64_range(sfmt,
-				    NNODES);
+				nodes[k].specialness = gen_rand64_range(
+				    sfmt, NNODES);
 				nodes[k].mid_remove = false;
 				nodes[k].allow_duplicates = false;
 				nodes[k].summary_lchild = NULL;
@@ -399,16 +402,16 @@ TEST_BEGIN(test_rb_random) {
 			for (k = 0; k < j; k++) {
 				tree_insert(&tree, &nodes[k]);
 
-				rbtn_black_height(node_t, link, &tree,
-				    black_height);
-				imbalances = tree_recurse(tree.rbt_root,
-				    black_height, 0);
-				expect_u_eq(imbalances, 0,
-				    "Tree is unbalanced");
+				rbtn_black_height(
+				    node_t, link, &tree, black_height);
+				imbalances = tree_recurse(
+				    tree.rbt_root, black_height, 0);
+				expect_u_eq(
+				    imbalances, 0, "Tree is unbalanced");
 
-				expect_u_eq(tree_iterate(&tree), k+1,
+				expect_u_eq(tree_iterate(&tree), k + 1,
 				    "Unexpected node iteration count");
-				expect_u_eq(tree_iterate_reverse(&tree), k+1,
+				expect_u_eq(tree_iterate_reverse(&tree), k + 1,
 				    "Unexpected node iteration count");
 
 				expect_false(tree_empty(&tree),
@@ -431,11 +434,11 @@ TEST_BEGIN(test_rb_random) {
 				break;
 			case 1:
 				for (k = j; k > 0; k--) {
-					node_remove(&tree, &nodes[k-1], k);
+					node_remove(&tree, &nodes[k - 1], k);
 				}
 				break;
 			case 2: {
-				node_t *start;
+				node_t  *start;
 				unsigned nnodes = j;
 
 				start = NULL;
@@ -444,11 +447,12 @@ TEST_BEGIN(test_rb_random) {
 					    remove_iterate_cb, (void *)&nnodes);
 					nnodes--;
 				} while (start != NULL);
-				expect_u_eq(nnodes, 0,
-				    "Removal terminated early");
+				expect_u_eq(
+				    nnodes, 0, "Removal terminated early");
 				break;
-			} case 3: {
-				node_t *start;
+			}
+			case 3: {
+				node_t  *start;
 				unsigned nnodes = j;
 
 				start = NULL;
@@ -458,16 +462,18 @@ TEST_BEGIN(test_rb_random) {
 					    (void *)&nnodes);
 					nnodes--;
 				} while (start != NULL);
-				expect_u_eq(nnodes, 0,
-				    "Removal terminated early");
+				expect_u_eq(
+				    nnodes, 0, "Removal terminated early");
 				break;
-			} case 4: {
+			}
+			case 4: {
 				unsigned nnodes = j;
 				tree_destroy(&tree, destroy_cb, &nnodes);
-				expect_u_eq(nnodes, 0,
-				    "Destruction terminated early");
+				expect_u_eq(
+				    nnodes, 0, "Destruction terminated early");
 				break;
-			} default:
+			}
+			default:
 				not_reached();
 			}
 		}
@@ -479,7 +485,7 @@ TEST_END
 static void
 expect_simple_consistency(tree_t *tree, uint64_t specialness,
     bool expected_empty, node_t *expected_first, node_t *expected_last) {
-	bool empty;
+	bool    empty;
 	node_t *first;
 	node_t *last;
 
@@ -487,19 +493,17 @@ expect_simple_consistency(tree_t *tree, uint64_t specialness,
 	    &specialness_filter_subtree, &specialness);
 	expect_b_eq(expected_empty, empty, "");
 
-	first = tree_first_filtered(tree,
-	    &specialness_filter_node, &specialness_filter_subtree,
-	    (void *)&specialness);
+	first = tree_first_filtered(tree, &specialness_filter_node,
+	    &specialness_filter_subtree, (void *)&specialness);
 	expect_ptr_eq(expected_first, first, "");
 
-	last = tree_last_filtered(tree,
-	    &specialness_filter_node, &specialness_filter_subtree,
-	    (void *)&specialness);
+	last = tree_last_filtered(tree, &specialness_filter_node,
+	    &specialness_filter_subtree, (void *)&specialness);
 	expect_ptr_eq(expected_last, last, "");
 }
 
 TEST_BEGIN(test_rb_filter_simple) {
-	enum {FILTER_NODES = 10};
+	enum { FILTER_NODES = 10 };
 	node_t nodes[FILTER_NODES];
 	for (unsigned i = 0; i < FILTER_NODES; i++) {
 		nodes[i].magic = NODE_MAGIC;
@@ -583,10 +587,10 @@ TEST_END
 
 typedef struct iter_ctx_s iter_ctx_t;
 struct iter_ctx_s {
-	int ncalls;
+	int     ncalls;
 	node_t *last_node;
 
-	int ncalls_max;
+	int  ncalls_max;
 	bool forward;
 };
 
@@ -624,8 +628,8 @@ static void
 check_consistency(tree_t *tree, node_t nodes[UPDATE_TEST_MAX], int nnodes) {
 	uint64_t specialness = 1;
 
-	bool empty;
-	bool real_empty = true;
+	bool    empty;
+	bool    real_empty = true;
 	node_t *first;
 	node_t *real_first = NULL;
 	node_t *last;
@@ -667,12 +671,14 @@ check_consistency(tree_t *tree, node_t nodes[UPDATE_TEST_MAX], int nnodes) {
 			}
 			if (node_cmp(&nodes[j], &nodes[i]) < 0
 			    && (real_prev_filtered == NULL
-			    || node_cmp(&nodes[j], real_prev_filtered) > 0)) {
+			        || node_cmp(&nodes[j], real_prev_filtered)
+			            > 0)) {
 				real_prev_filtered = &nodes[j];
 			}
 			if (node_cmp(&nodes[j], &nodes[i]) > 0
 			    && (real_next_filtered == NULL
-			    || node_cmp(&nodes[j], real_next_filtered) < 0)) {
+			        || node_cmp(&nodes[j], real_next_filtered)
+			            < 0)) {
 				real_next_filtered = &nodes[j];
 			}
 		}
@@ -707,8 +713,9 @@ check_consistency(tree_t *tree, node_t nodes[UPDATE_TEST_MAX], int nnodes) {
 		    &specialness);
 		expect_ptr_eq(real_search_filtered, search_filtered, "");
 
-		real_nsearch_filtered = (nodes[i].specialness >= specialness ?
-		    &nodes[i] : real_next_filtered);
+		real_nsearch_filtered = (nodes[i].specialness >= specialness
+		        ? &nodes[i]
+		        : real_next_filtered);
 		nsearch_filtered = tree_nsearch_filtered(tree, &before,
 		    &specialness_filter_node, &specialness_filter_subtree,
 		    &specialness);
@@ -721,22 +728,25 @@ check_consistency(tree_t *tree, node_t nodes[UPDATE_TEST_MAX], int nnodes) {
 		expect_ptr_eq(real_psearch_filtered, psearch_filtered, "");
 
 		/* search, nsearch, psearch from nodes[i] */
-		real_search_filtered = (nodes[i].specialness >= specialness ?
-		    &nodes[i] : NULL);
+		real_search_filtered = (nodes[i].specialness >= specialness
+		        ? &nodes[i]
+		        : NULL);
 		search_filtered = tree_search_filtered(tree, &nodes[i],
 		    &specialness_filter_node, &specialness_filter_subtree,
 		    &specialness);
 		expect_ptr_eq(real_search_filtered, search_filtered, "");
 
-		real_nsearch_filtered = (nodes[i].specialness >= specialness ?
-		    &nodes[i] : real_next_filtered);
+		real_nsearch_filtered = (nodes[i].specialness >= specialness
+		        ? &nodes[i]
+		        : real_next_filtered);
 		nsearch_filtered = tree_nsearch_filtered(tree, &nodes[i],
 		    &specialness_filter_node, &specialness_filter_subtree,
 		    &specialness);
 		expect_ptr_eq(real_nsearch_filtered, nsearch_filtered, "");
 
-		real_psearch_filtered = (nodes[i].specialness >= specialness ?
-		    &nodes[i] : real_prev_filtered);
+		real_psearch_filtered = (nodes[i].specialness >= specialness
+		        ? &nodes[i]
+		        : real_prev_filtered);
 		psearch_filtered = tree_psearch_filtered(tree, &nodes[i],
 		    &specialness_filter_node, &specialness_filter_subtree,
 		    &specialness);
@@ -750,22 +760,25 @@ check_consistency(tree_t *tree, node_t nodes[UPDATE_TEST_MAX], int nnodes) {
 		equiv.magic = NODE_MAGIC;
 		equiv.key = nodes[i].key;
 		equiv.allow_duplicates = true;
-		real_search_filtered = (nodes[i].specialness >= specialness ?
-		    &nodes[i] : NULL);
+		real_search_filtered = (nodes[i].specialness >= specialness
+		        ? &nodes[i]
+		        : NULL);
 		search_filtered = tree_search_filtered(tree, &equiv,
 		    &specialness_filter_node, &specialness_filter_subtree,
 		    &specialness);
 		expect_ptr_eq(real_search_filtered, search_filtered, "");
 
-		real_nsearch_filtered = (nodes[i].specialness >= specialness ?
-		    &nodes[i] : real_next_filtered);
+		real_nsearch_filtered = (nodes[i].specialness >= specialness
+		        ? &nodes[i]
+		        : real_next_filtered);
 		nsearch_filtered = tree_nsearch_filtered(tree, &equiv,
 		    &specialness_filter_node, &specialness_filter_subtree,
 		    &specialness);
 		expect_ptr_eq(real_nsearch_filtered, nsearch_filtered, "");
 
-		real_psearch_filtered = (nodes[i].specialness >= specialness ?
-		    &nodes[i] : real_prev_filtered);
+		real_psearch_filtered = (nodes[i].specialness >= specialness
+		        ? &nodes[i]
+		        : real_prev_filtered);
 		psearch_filtered = tree_psearch_filtered(tree, &equiv,
 		    &specialness_filter_node, &specialness_filter_subtree,
 		    &specialness);
@@ -791,8 +804,9 @@ check_consistency(tree_t *tree, node_t nodes[UPDATE_TEST_MAX], int nnodes) {
 		    &specialness);
 		expect_ptr_eq(real_nsearch_filtered, nsearch_filtered, "");
 
-		real_psearch_filtered = (nodes[i].specialness >= specialness ?
-		    &nodes[i] : real_prev_filtered);
+		real_psearch_filtered = (nodes[i].specialness >= specialness
+		        ? &nodes[i]
+		        : real_prev_filtered);
 		psearch_filtered = tree_psearch_filtered(tree, &after,
 		    &specialness_filter_node, &specialness_filter_subtree,
 		    &specialness);
@@ -800,7 +814,7 @@ check_consistency(tree_t *tree, node_t nodes[UPDATE_TEST_MAX], int nnodes) {
 	}
 
 	/* Filtered iteration test setup. */
-	int nspecial = 0;
+	int     nspecial = 0;
 	node_t *sorted_nodes[UPDATE_TEST_MAX];
 	node_t *sorted_filtered_nodes[UPDATE_TEST_MAX];
 	for (int i = 0; i < nnodes; i++) {
@@ -862,8 +876,9 @@ check_consistency(tree_t *tree, node_t nodes[UPDATE_TEST_MAX], int nnodes) {
 			    &specialness_filter_node,
 			    &specialness_filter_subtree, &specialness);
 			expect_d_eq(j + 1, ctx.ncalls, "");
-			expect_ptr_eq(sorted_filtered_nodes[
-			    nodes[i].filtered_rank + j], iter_result, "");
+			expect_ptr_eq(
+			    sorted_filtered_nodes[nodes[i].filtered_rank + j],
+			    iter_result, "");
 		}
 	}
 
@@ -888,8 +903,8 @@ check_consistency(tree_t *tree, node_t nodes[UPDATE_TEST_MAX], int nnodes) {
 		    &specialness_filter_subtree, &specialness);
 		expect_ptr_null(iter_result, "");
 		int surplus_rank = (nodes[i].specialness >= 1 ? 1 : 0);
-		expect_d_eq(nodes[i].filtered_rank + surplus_rank, ctx.ncalls,
-		    "");
+		expect_d_eq(
+		    nodes[i].filtered_rank + surplus_rank, ctx.ncalls, "");
 	}
 	/* Filtered backward iteration from the end, with stopping */
 	for (int i = 0; i < nspecial; i++) {
@@ -899,15 +914,15 @@ check_consistency(tree_t *tree, node_t nodes[UPDATE_TEST_MAX], int nnodes) {
 		iter_result = tree_reverse_iter_filtered(tree, NULL,
 		    &tree_iterate_filtered_cb, &ctx, &specialness_filter_node,
 		    &specialness_filter_subtree, &specialness);
-		expect_ptr_eq(sorted_filtered_nodes[nspecial - i - 1],
-		    iter_result, "");
+		expect_ptr_eq(
+		    sorted_filtered_nodes[nspecial - i - 1], iter_result, "");
 		expect_d_eq(ctx.ncalls, i + 1, "");
 	}
 	/* Filtered backward iteration from a starting point, with stopping. */
 	for (int i = 0; i < nnodes; i++) {
 		int surplus_rank = (nodes[i].specialness >= 1 ? 1 : 0);
 		for (int j = 0; j < nodes[i].filtered_rank + surplus_rank;
-		    j++) {
+		     j++) {
 			ctx.ncalls = 0;
 			ctx.last_node = NULL;
 			ctx.ncalls_max = j + 1;
@@ -916,16 +931,16 @@ check_consistency(tree_t *tree, node_t nodes[UPDATE_TEST_MAX], int nnodes) {
 			    &specialness_filter_node,
 			    &specialness_filter_subtree, &specialness);
 			expect_d_eq(j + 1, ctx.ncalls, "");
-			expect_ptr_eq(sorted_filtered_nodes[
-			    nodes[i].filtered_rank - j - 1 + surplus_rank],
+			expect_ptr_eq(
+			    sorted_filtered_nodes[nodes[i].filtered_rank - j - 1
+			        + surplus_rank],
 			    iter_result, "");
 		}
 	}
 }
 
 static void
-do_update_search_test(int nnodes, int ntrees, int nremovals,
-    int nupdates) {
+do_update_search_test(int nnodes, int ntrees, int nremovals, int nupdates) {
 	node_t nodes[UPDATE_TEST_MAX];
 	assert(nnodes <= UPDATE_TEST_MAX);
 
@@ -987,8 +1002,8 @@ rb_gen(static UNUSED, unsummarized_tree_, unsummarized_tree_t, node_t, link,
     node_cmp);
 
 static node_t *
-unsummarized_tree_iterate_cb(unsummarized_tree_t *tree, node_t *node,
-    void *data) {
+unsummarized_tree_iterate_cb(
+    unsummarized_tree_t *tree, node_t *node, void *data) {
 	unsigned *i = (unsigned *)data;
 	(*i)++;
 	return NULL;
@@ -1002,18 +1017,14 @@ TEST_BEGIN(test_rb_unsummarized) {
 	unsummarized_tree_t tree;
 	unsummarized_tree_new(&tree);
 	unsigned nnodes = 0;
-	unsummarized_tree_iter(&tree, NULL, &unsummarized_tree_iterate_cb,
-	    &nnodes);
+	unsummarized_tree_iter(
+	    &tree, NULL, &unsummarized_tree_iterate_cb, &nnodes);
 	expect_u_eq(0, nnodes, "");
 }
 TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_rb_empty,
-	    test_rb_random,
-	    test_rb_filter_simple,
-	    test_rb_update_search,
-	    test_rb_unsummarized);
+	return test_no_reentrancy(test_rb_empty, test_rb_random,
+	    test_rb_filter_simple, test_rb_update_search, test_rb_unsummarized);
 }
diff --git a/test/unit/retained.c b/test/unit/retained.c
index 40cbb0cd..687701c7 100644
--- a/test/unit/retained.c
+++ b/test/unit/retained.c
@@ -3,21 +3,22 @@
 #include "jemalloc/internal/san.h"
 #include "jemalloc/internal/spin.h"
 
-static unsigned		arena_ind;
-static size_t		sz;
-static size_t		esz;
-#define NEPOCHS		8
-#define PER_THD_NALLOCS	1
-static atomic_u_t	epoch;
-static atomic_u_t	nfinished;
+static unsigned arena_ind;
+static size_t   sz;
+static size_t   esz;
+#define NEPOCHS 8
+#define PER_THD_NALLOCS 1
+static atomic_u_t epoch;
+static atomic_u_t nfinished;
 
 static unsigned
 do_arena_create(extent_hooks_t *h) {
 	unsigned new_arena_ind;
-	size_t ind_sz = sizeof(unsigned);
-	expect_d_eq(mallctl("arenas.create", (void *)&new_arena_ind, &ind_sz,
-	    (void *)(h != NULL ? &h : NULL), (h != NULL ? sizeof(h) : 0)), 0,
-	    "Unexpected mallctl() failure");
+	size_t   ind_sz = sizeof(unsigned);
+	expect_d_eq(
+	    mallctl("arenas.create", (void *)&new_arena_ind, &ind_sz,
+	        (void *)(h != NULL ? &h : NULL), (h != NULL ? sizeof(h) : 0)),
+	    0, "Unexpected mallctl() failure");
 	return new_arena_ind;
 }
 
@@ -26,7 +27,7 @@ do_arena_destroy(unsigned ind) {
 	size_t mib[3];
 	size_t miblen;
 
-	miblen = sizeof(mib)/sizeof(size_t);
+	miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("arena.0.destroy", mib, &miblen), 0,
 	    "Unexpected mallctlnametomib() failure");
 	mib[1] = (size_t)ind;
@@ -38,7 +39,8 @@ static void
 do_refresh(void) {
 	uint64_t refresh_epoch = 1;
 	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&refresh_epoch,
-	    sizeof(refresh_epoch)), 0, "Unexpected mallctl() failure");
+	                sizeof(refresh_epoch)),
+	    0, "Unexpected mallctl() failure");
 }
 
 static size_t
@@ -47,12 +49,12 @@ do_get_size_impl(const char *cmd, unsigned ind) {
 	size_t miblen = sizeof(mib) / sizeof(size_t);
 	size_t z = sizeof(size_t);
 
-	expect_d_eq(mallctlnametomib(cmd, mib, &miblen),
-	    0, "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
+	expect_d_eq(mallctlnametomib(cmd, mib, &miblen), 0,
+	    "Unexpected mallctlnametomib(\"%s\", ...) failure", cmd);
 	mib[2] = ind;
 	size_t size;
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&size, &z, NULL, 0),
-	    0, "Unexpected mallctlbymib([\"%s\"], ...) failure", cmd);
+	expect_d_eq(mallctlbymib(mib, miblen, (void *)&size, &z, NULL, 0), 0,
+	    "Unexpected mallctlbymib([\"%s\"], ...) failure", cmd);
 
 	return size;
 }
@@ -72,9 +74,9 @@ thd_start(void *arg) {
 	for (unsigned next_epoch = 1; next_epoch < NEPOCHS; next_epoch++) {
 		/* Busy-wait for next epoch. */
 		unsigned cur_epoch;
-		spin_t spinner = SPIN_INITIALIZER;
-		while ((cur_epoch = atomic_load_u(&epoch, ATOMIC_ACQUIRE)) !=
-		    next_epoch) {
+		spin_t   spinner = SPIN_INITIALIZER;
+		while ((cur_epoch = atomic_load_u(&epoch, ATOMIC_ACQUIRE))
+		    != next_epoch) {
 			spin_adaptive(&spinner);
 		}
 		expect_u_eq(cur_epoch, next_epoch, "Unexpected epoch");
@@ -84,11 +86,10 @@ thd_start(void *arg) {
 		 * no need to deallocate.
 		 */
 		for (unsigned i = 0; i < PER_THD_NALLOCS; i++) {
-			void *p = mallocx(sz, MALLOCX_ARENA(arena_ind) |
-			    MALLOCX_TCACHE_NONE
-			    );
-			expect_ptr_not_null(p,
-			    "Unexpected mallocx() failure\n");
+			void *p = mallocx(
+			    sz, MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
+			expect_ptr_not_null(
+			    p, "Unexpected mallocx() failure\n");
 		}
 
 		/* Let the main thread know we've finished this iteration. */
@@ -142,17 +143,17 @@ TEST_BEGIN(test_retained) {
 		 */
 		do_refresh();
 
-		size_t allocated = (esz - guard_sz) * nthreads *
-		    PER_THD_NALLOCS;
+		size_t allocated = (esz - guard_sz) * nthreads
+		    * PER_THD_NALLOCS;
 		size_t active = do_get_active(arena_ind);
 		expect_zu_le(allocated, active, "Unexpected active memory");
 		size_t mapped = do_get_mapped(arena_ind);
 		expect_zu_le(active, mapped, "Unexpected mapped memory");
 
 		arena_t *arena = arena_get(tsdn_fetch(), arena_ind, false);
-		size_t usable = 0;
-		for (pszind_t pind = sz_psz2ind(HUGEPAGE); pind <
-		    arena->pa_shard.pac.exp_grow.next; pind++) {
+		size_t   usable = 0;
+		for (pszind_t pind = sz_psz2ind(HUGEPAGE);
+		     pind < arena->pa_shard.pac.exp_grow.next; pind++) {
 			size_t psz = sz_pind2sz(pind);
 			size_t psz_fragmented = psz % esz;
 			size_t psz_usable = psz - psz_fragmented;
@@ -162,8 +163,8 @@ TEST_BEGIN(test_retained) {
 			if (psz_usable > 0) {
 				expect_zu_lt(usable, allocated,
 				    "Excessive retained memory "
-				    "(%#zx[+%#zx] > %#zx)", usable, psz_usable,
-				    allocated);
+				    "(%#zx[+%#zx] > %#zx)",
+				    usable, psz_usable, allocated);
 				usable += psz_usable;
 			}
 		}
@@ -174,8 +175,8 @@ TEST_BEGIN(test_retained) {
 		 * (rather than retaining) during reset.
 		 */
 		do_arena_destroy(arena_ind);
-		expect_u_eq(do_arena_create(NULL), arena_ind,
-		    "Unexpected arena index");
+		expect_u_eq(
+		    do_arena_create(NULL), arena_ind, "Unexpected arena index");
 	}
 
 	for (unsigned i = 0; i < nthreads; i++) {
@@ -188,6 +189,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_retained);
+	return test(test_retained);
 }
diff --git a/test/unit/rtree.c b/test/unit/rtree.c
index 4101b72b..284c3eae 100644
--- a/test/unit/rtree.c
+++ b/test/unit/rtree.c
@@ -16,14 +16,15 @@ TEST_BEGIN(test_rtree_read_empty) {
 	    /* metadata_use_hooks */ true);
 	expect_ptr_not_null(base, "Unexpected base_new failure");
 
-	rtree_t *rtree = &test_rtree;
+	rtree_t    *rtree = &test_rtree;
 	rtree_ctx_t rtree_ctx;
 	rtree_ctx_data_init(&rtree_ctx);
-	expect_false(rtree_new(rtree, base, false),
-	    "Unexpected rtree_new() failure");
+	expect_false(
+	    rtree_new(rtree, base, false), "Unexpected rtree_new() failure");
 	rtree_contents_t contents;
-	expect_true(rtree_read_independent(tsdn, rtree, &rtree_ctx, PAGE,
-	    &contents), "rtree_read_independent() should fail on empty rtree.");
+	expect_true(
+	    rtree_read_independent(tsdn, rtree, &rtree_ctx, PAGE, &contents),
+	    "rtree_read_independent() should fail on empty rtree.");
 
 	base_delete(tsdn, base);
 }
@@ -45,9 +46,9 @@ TEST_BEGIN(test_rtree_extrema) {
 	edata_t *edata_a, *edata_b;
 	edata_a = alloc_edata();
 	edata_b = alloc_edata();
-	edata_init(edata_a, INVALID_ARENA_IND, NULL, SC_LARGE_MINCLASS,
-	    false, sz_size2index(SC_LARGE_MINCLASS), 0,
-	    extent_state_active, false, false, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
+	edata_init(edata_a, INVALID_ARENA_IND, NULL, SC_LARGE_MINCLASS, false,
+	    sz_size2index(SC_LARGE_MINCLASS), 0, extent_state_active, false,
+	    false, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 	edata_init(edata_b, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 
@@ -57,11 +58,11 @@ TEST_BEGIN(test_rtree_extrema) {
 	    /* metadata_use_hooks */ true);
 	expect_ptr_not_null(base, "Unexpected base_new failure");
 
-	rtree_t *rtree = &test_rtree;
+	rtree_t    *rtree = &test_rtree;
 	rtree_ctx_t rtree_ctx;
 	rtree_ctx_data_init(&rtree_ctx);
-	expect_false(rtree_new(rtree, base, false),
-	    "Unexpected rtree_new() failure");
+	expect_false(
+	    rtree_new(rtree, base, false), "Unexpected rtree_new() failure");
 
 	rtree_contents_t contents_a;
 	contents_a.edata = edata_a;
@@ -73,13 +74,14 @@ TEST_BEGIN(test_rtree_extrema) {
 	    "Unexpected rtree_write() failure");
 	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, PAGE, contents_a),
 	    "Unexpected rtree_write() failure");
-	rtree_contents_t read_contents_a = rtree_read(tsdn, rtree, &rtree_ctx,
-	    PAGE);
+	rtree_contents_t read_contents_a = rtree_read(
+	    tsdn, rtree, &rtree_ctx, PAGE);
 	expect_true(contents_a.edata == read_contents_a.edata
-	    && contents_a.metadata.szind == read_contents_a.metadata.szind
-	    && contents_a.metadata.slab == read_contents_a.metadata.slab
-	    && contents_a.metadata.is_head == read_contents_a.metadata.is_head
-	    && contents_a.metadata.state == read_contents_a.metadata.state,
+	        && contents_a.metadata.szind == read_contents_a.metadata.szind
+	        && contents_a.metadata.slab == read_contents_a.metadata.slab
+	        && contents_a.metadata.is_head
+	            == read_contents_a.metadata.is_head
+	        && contents_a.metadata.state == read_contents_a.metadata.state,
 	    "rtree_read() should return previously set value");
 
 	rtree_contents_t contents_b;
@@ -88,15 +90,17 @@ TEST_BEGIN(test_rtree_extrema) {
 	contents_b.metadata.slab = edata_slab_get(edata_b);
 	contents_b.metadata.is_head = edata_is_head_get(edata_b);
 	contents_b.metadata.state = edata_state_get(edata_b);
-	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, ~((uintptr_t)0),
-	    contents_b), "Unexpected rtree_write() failure");
-	rtree_contents_t read_contents_b = rtree_read(tsdn, rtree, &rtree_ctx,
-	    ~((uintptr_t)0));
+	expect_false(
+	    rtree_write(tsdn, rtree, &rtree_ctx, ~((uintptr_t)0), contents_b),
+	    "Unexpected rtree_write() failure");
+	rtree_contents_t read_contents_b = rtree_read(
+	    tsdn, rtree, &rtree_ctx, ~((uintptr_t)0));
 	assert_true(contents_b.edata == read_contents_b.edata
-	    && contents_b.metadata.szind == read_contents_b.metadata.szind
-	    && contents_b.metadata.slab == read_contents_b.metadata.slab
-	    && contents_b.metadata.is_head == read_contents_b.metadata.is_head
-	    && contents_b.metadata.state == read_contents_b.metadata.state,
+	        && contents_b.metadata.szind == read_contents_b.metadata.szind
+	        && contents_b.metadata.slab == read_contents_b.metadata.slab
+	        && contents_b.metadata.is_head
+	            == read_contents_b.metadata.is_head
+	        && contents_b.metadata.state == read_contents_b.metadata.state,
 	    "rtree_read() should return previously set value");
 
 	base_delete(tsdn, base);
@@ -109,19 +113,19 @@ TEST_BEGIN(test_rtree_bits) {
 	    /* metadata_use_hooks */ true);
 	expect_ptr_not_null(base, "Unexpected base_new failure");
 
-	uintptr_t keys[] = {PAGE, PAGE + 1,
-	    PAGE + (((uintptr_t)1) << LG_PAGE) - 1};
+	uintptr_t keys[] = {
+	    PAGE, PAGE + 1, PAGE + (((uintptr_t)1) << LG_PAGE) - 1};
 	edata_t *edata_c = alloc_edata();
 	edata_init(edata_c, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 
-	rtree_t *rtree = &test_rtree;
+	rtree_t    *rtree = &test_rtree;
 	rtree_ctx_t rtree_ctx;
 	rtree_ctx_data_init(&rtree_ctx);
-	expect_false(rtree_new(rtree, base, false),
-	    "Unexpected rtree_new() failure");
+	expect_false(
+	    rtree_new(rtree, base, false), "Unexpected rtree_new() failure");
 
-	for (unsigned i = 0; i < sizeof(keys)/sizeof(uintptr_t); i++) {
+	for (unsigned i = 0; i < sizeof(keys) / sizeof(uintptr_t); i++) {
 		rtree_contents_t contents;
 		contents.edata = edata_c;
 		contents.metadata.szind = SC_NSIZES;
@@ -129,18 +133,22 @@ TEST_BEGIN(test_rtree_bits) {
 		contents.metadata.is_head = false;
 		contents.metadata.state = extent_state_active;
 
-		expect_false(rtree_write(tsdn, rtree, &rtree_ctx, keys[i],
-		    contents), "Unexpected rtree_write() failure");
-		for (unsigned j = 0; j < sizeof(keys)/sizeof(uintptr_t); j++) {
-			expect_ptr_eq(rtree_read(tsdn, rtree, &rtree_ctx,
-			    keys[j]).edata, edata_c,
+		expect_false(
+		    rtree_write(tsdn, rtree, &rtree_ctx, keys[i], contents),
+		    "Unexpected rtree_write() failure");
+		for (unsigned j = 0; j < sizeof(keys) / sizeof(uintptr_t);
+		     j++) {
+			expect_ptr_eq(
+			    rtree_read(tsdn, rtree, &rtree_ctx, keys[j]).edata,
+			    edata_c,
 			    "rtree_edata_read() should return previously set "
 			    "value and ignore insignificant key bits; i=%u, "
-			    "j=%u, set key=%#"FMTxPTR", get key=%#"FMTxPTR, i,
-			    j, keys[i], keys[j]);
+			    "j=%u, set key=%#" FMTxPTR ", get key=%#" FMTxPTR,
+			    i, j, keys[i], keys[j]);
 		}
 		expect_ptr_null(rtree_read(tsdn, rtree, &rtree_ctx,
-		    (((uintptr_t)2) << LG_PAGE)).edata,
+		                    (((uintptr_t)2) << LG_PAGE))
+		                    .edata,
 		    "Only leftmost rtree leaf should be set; i=%u", i);
 		rtree_clear(tsdn, rtree, &rtree_ctx, keys[i]);
 	}
@@ -159,8 +167,8 @@ TEST_BEGIN(test_rtree_random) {
 	    /* metadata_use_hooks */ true);
 	expect_ptr_not_null(base, "Unexpected base_new failure");
 
-	uintptr_t keys[NSET];
-	rtree_t *rtree = &test_rtree;
+	uintptr_t   keys[NSET];
+	rtree_t    *rtree = &test_rtree;
 	rtree_ctx_t rtree_ctx;
 	rtree_ctx_data_init(&rtree_ctx);
 
@@ -168,15 +176,15 @@ TEST_BEGIN(test_rtree_random) {
 	edata_init(edata_d, INVALID_ARENA_IND, NULL, 0, false, SC_NSIZES, 0,
 	    extent_state_active, false, false, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 
-	expect_false(rtree_new(rtree, base, false),
-	    "Unexpected rtree_new() failure");
+	expect_false(
+	    rtree_new(rtree, base, false), "Unexpected rtree_new() failure");
 
 	for (unsigned i = 0; i < NSET; i++) {
 		keys[i] = (uintptr_t)gen_rand64(sfmt);
-		rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(tsdn, rtree,
-		    &rtree_ctx, keys[i], false, true);
-		expect_ptr_not_null(elm,
-		    "Unexpected rtree_leaf_elm_lookup() failure");
+		rtree_leaf_elm_t *elm = rtree_leaf_elm_lookup(
+		    tsdn, rtree, &rtree_ctx, keys[i], false, true);
+		expect_ptr_not_null(
+		    elm, "Unexpected rtree_leaf_elm_lookup() failure");
 		rtree_contents_t contents;
 		contents.edata = edata_d;
 		contents.metadata.szind = SC_NSIZES;
@@ -184,26 +192,27 @@ TEST_BEGIN(test_rtree_random) {
 		contents.metadata.is_head = false;
 		contents.metadata.state = edata_state_get(edata_d);
 		rtree_leaf_elm_write(tsdn, rtree, elm, contents);
-		expect_ptr_eq(rtree_read(tsdn, rtree, &rtree_ctx,
-		    keys[i]).edata, edata_d,
+		expect_ptr_eq(
+		    rtree_read(tsdn, rtree, &rtree_ctx, keys[i]).edata, edata_d,
 		    "rtree_edata_read() should return previously set value");
 	}
 	for (unsigned i = 0; i < NSET; i++) {
-		expect_ptr_eq(rtree_read(tsdn, rtree, &rtree_ctx,
-		    keys[i]).edata, edata_d,
+		expect_ptr_eq(
+		    rtree_read(tsdn, rtree, &rtree_ctx, keys[i]).edata, edata_d,
 		    "rtree_edata_read() should return previously set value, "
-		    "i=%u", i);
+		    "i=%u",
+		    i);
 	}
 
 	for (unsigned i = 0; i < NSET; i++) {
 		rtree_clear(tsdn, rtree, &rtree_ctx, keys[i]);
-		expect_ptr_null(rtree_read(tsdn, rtree, &rtree_ctx,
-		    keys[i]).edata,
-		   "rtree_edata_read() should return previously set value");
+		expect_ptr_null(
+		    rtree_read(tsdn, rtree, &rtree_ctx, keys[i]).edata,
+		    "rtree_edata_read() should return previously set value");
 	}
 	for (unsigned i = 0; i < NSET; i++) {
-		expect_ptr_null(rtree_read(tsdn, rtree, &rtree_ctx,
-		    keys[i]).edata,
+		expect_ptr_null(
+		    rtree_read(tsdn, rtree, &rtree_ctx, keys[i]).edata,
 		    "rtree_edata_read() should return previously set value");
 	}
 
@@ -215,8 +224,8 @@ TEST_BEGIN(test_rtree_random) {
 TEST_END
 
 static void
-test_rtree_range_write(tsdn_t *tsdn, rtree_t *rtree, uintptr_t start,
-    uintptr_t end) {
+test_rtree_range_write(
+    tsdn_t *tsdn, rtree_t *rtree, uintptr_t start, uintptr_t end) {
 	rtree_ctx_t rtree_ctx;
 	rtree_ctx_data_init(&rtree_ctx);
 
@@ -230,15 +239,17 @@ test_rtree_range_write(tsdn_t *tsdn, rtree_t *rtree, uintptr_t start,
 	contents.metadata.is_head = false;
 	contents.metadata.state = extent_state_active;
 
-	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, start,
-	    contents), "Unexpected rtree_write() failure");
-	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, end,
-	    contents), "Unexpected rtree_write() failure");
+	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, start, contents),
+	    "Unexpected rtree_write() failure");
+	expect_false(rtree_write(tsdn, rtree, &rtree_ctx, end, contents),
+	    "Unexpected rtree_write() failure");
 
 	rtree_write_range(tsdn, rtree, &rtree_ctx, start, end, contents);
 	for (uintptr_t i = 0; i < ((end - start) >> LG_PAGE); i++) {
-		expect_ptr_eq(rtree_read(tsdn, rtree, &rtree_ctx,
-		    start + (i << LG_PAGE)).edata, edata_e,
+		expect_ptr_eq(
+		    rtree_read(tsdn, rtree, &rtree_ctx, start + (i << LG_PAGE))
+		        .edata,
+		    edata_e,
 		    "rtree_edata_read() should return previously set value");
 	}
 	rtree_clear_range(tsdn, rtree, &rtree_ctx, start, end);
@@ -247,8 +258,9 @@ test_rtree_range_write(tsdn_t *tsdn, rtree_t *rtree, uintptr_t start,
 		elm = rtree_leaf_elm_lookup(tsdn, rtree, &rtree_ctx,
 		    start + (i << LG_PAGE), false, false);
 		expect_ptr_not_null(elm, "Should have been initialized.");
-		expect_ptr_null(rtree_leaf_elm_read(tsdn, rtree, elm,
-		    false).edata, "Should have been cleared.");
+		expect_ptr_null(
+		    rtree_leaf_elm_read(tsdn, rtree, elm, false).edata,
+		    "Should have been cleared.");
 	}
 }
 
@@ -259,8 +271,8 @@ TEST_BEGIN(test_rtree_range) {
 	expect_ptr_not_null(base, "Unexpected base_new failure");
 
 	rtree_t *rtree = &test_rtree;
-	expect_false(rtree_new(rtree, base, false),
-	    "Unexpected rtree_new() failure");
+	expect_false(
+	    rtree_new(rtree, base, false), "Unexpected rtree_new() failure");
 
 	/* Not crossing rtree node boundary first. */
 	uintptr_t start = ZU(1) << rtree_leaf_maskbits();
@@ -280,10 +292,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_rtree_read_empty,
-	    test_rtree_extrema,
-	    test_rtree_bits,
-	    test_rtree_random,
-	    test_rtree_range);
+	return test(test_rtree_read_empty, test_rtree_extrema, test_rtree_bits,
+	    test_rtree_random, test_rtree_range);
 }
diff --git a/test/unit/safety_check.c b/test/unit/safety_check.c
index 84726675..558797c0 100644
--- a/test/unit/safety_check.c
+++ b/test/unit/safety_check.c
@@ -8,7 +8,8 @@
  */
 
 bool fake_abort_called;
-void fake_abort(const char *message) {
+void
+fake_abort(const char *message) {
 	(void)message;
 	fake_abort_called = true;
 }
@@ -26,7 +27,7 @@ TEST_BEGIN(test_malloc_free_overflow) {
 
 	safety_check_set_abort(&fake_abort);
 	/* Buffer overflow! */
-	char* ptr = malloc(128);
+	char *ptr = malloc(128);
 	buffer_overflow_write(ptr, 128);
 	free(ptr);
 	safety_check_set_abort(NULL);
@@ -42,7 +43,7 @@ TEST_BEGIN(test_mallocx_dallocx_overflow) {
 
 	safety_check_set_abort(&fake_abort);
 	/* Buffer overflow! */
-	char* ptr = mallocx(128, 0);
+	char *ptr = mallocx(128, 0);
 	buffer_overflow_write(ptr, 128);
 	dallocx(ptr, 0);
 	safety_check_set_abort(NULL);
@@ -58,7 +59,7 @@ TEST_BEGIN(test_malloc_sdallocx_overflow) {
 
 	safety_check_set_abort(&fake_abort);
 	/* Buffer overflow! */
-	char* ptr = malloc(128);
+	char *ptr = malloc(128);
 	buffer_overflow_write(ptr, 128);
 	sdallocx(ptr, 128, 0);
 	safety_check_set_abort(NULL);
@@ -74,7 +75,7 @@ TEST_BEGIN(test_realloc_overflow) {
 
 	safety_check_set_abort(&fake_abort);
 	/* Buffer overflow! */
-	char* ptr = malloc(128);
+	char *ptr = malloc(128);
 	buffer_overflow_write(ptr, 128);
 	ptr = realloc(ptr, 129);
 	safety_check_set_abort(NULL);
@@ -91,7 +92,7 @@ TEST_BEGIN(test_rallocx_overflow) {
 
 	safety_check_set_abort(&fake_abort);
 	/* Buffer overflow! */
-	char* ptr = malloc(128);
+	char *ptr = malloc(128);
 	buffer_overflow_write(ptr, 128);
 	ptr = rallocx(ptr, 129, 0);
 	safety_check_set_abort(NULL);
@@ -108,7 +109,7 @@ TEST_BEGIN(test_xallocx_overflow) {
 
 	safety_check_set_abort(&fake_abort);
 	/* Buffer overflow! */
-	char* ptr = malloc(128);
+	char *ptr = malloc(128);
 	buffer_overflow_write(ptr, 128);
 	size_t result = xallocx(ptr, 129, 0, 0);
 	expect_zu_eq(result, 128, "");
@@ -120,7 +121,7 @@ TEST_BEGIN(test_xallocx_overflow) {
 TEST_END
 
 TEST_BEGIN(test_realloc_no_overflow) {
-	char* ptr = malloc(128);
+	char *ptr = malloc(128);
 	ptr = realloc(ptr, 256);
 	ptr[128] = 0;
 	ptr[255] = 0;
@@ -135,7 +136,7 @@ TEST_BEGIN(test_realloc_no_overflow) {
 TEST_END
 
 TEST_BEGIN(test_rallocx_no_overflow) {
-	char* ptr = malloc(128);
+	char *ptr = malloc(128);
 	ptr = rallocx(ptr, 256, 0);
 	ptr[128] = 0;
 	ptr[255] = 0;
@@ -151,13 +152,8 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_malloc_free_overflow,
-	    test_mallocx_dallocx_overflow,
-	    test_malloc_sdallocx_overflow,
-	    test_realloc_overflow,
-	    test_rallocx_overflow,
-	    test_xallocx_overflow,
-	    test_realloc_no_overflow,
-	    test_rallocx_no_overflow);
+	return test(test_malloc_free_overflow, test_mallocx_dallocx_overflow,
+	    test_malloc_sdallocx_overflow, test_realloc_overflow,
+	    test_rallocx_overflow, test_xallocx_overflow,
+	    test_realloc_no_overflow, test_rallocx_no_overflow);
 }
diff --git a/test/unit/san.c b/test/unit/san.c
index 5b98f52e..2c7f1ec5 100644
--- a/test/unit/san.c
+++ b/test/unit/san.c
@@ -6,8 +6,8 @@
 
 static void
 verify_extent_guarded(tsdn_t *tsdn, void *ptr) {
-	expect_true(extent_is_guarded(tsdn, ptr),
-	    "All extents should be guarded.");
+	expect_true(
+	    extent_is_guarded(tsdn, ptr), "All extents should be guarded.");
 }
 
 #define MAX_SMALL_ALLOCATIONS 4096
@@ -21,13 +21,13 @@ void *small_alloc[MAX_SMALL_ALLOCATIONS];
 TEST_BEGIN(test_guarded_small) {
 	test_skip_if(opt_prof);
 
-	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	tsdn_t  *tsdn = tsd_tsdn(tsd_fetch());
 	unsigned npages = 16, pages_found = 0, ends_found = 0;
 	VARIABLE_ARRAY(uintptr_t, pages, npages);
 
 	/* Allocate to get sanitized pointers. */
-	size_t slab_sz = PAGE;
-	size_t sz = slab_sz / 8;
+	size_t   slab_sz = PAGE;
+	size_t   sz = slab_sz / 8;
 	unsigned n_alloc = 0;
 	while (n_alloc < MAX_SMALL_ALLOCATIONS) {
 		void *ptr = malloc(sz);
@@ -54,8 +54,9 @@ TEST_BEGIN(test_guarded_small) {
 	/* Verify the pages are not continuous, i.e. separated by guards. */
 	for (unsigned i = 0; i < npages - 1; i++) {
 		for (unsigned j = i + 1; j < npages; j++) {
-			uintptr_t ptr_diff = pages[i] > pages[j] ?
-			    pages[i] - pages[j] : pages[j] - pages[i];
+			uintptr_t ptr_diff = pages[i] > pages[j]
+			    ? pages[i] - pages[j]
+			    : pages[j] - pages[i];
 			expect_zu_ge((size_t)ptr_diff, slab_sz + PAGE,
 			    "There should be at least one pages between "
 			    "guarded slabs");
@@ -69,7 +70,7 @@ TEST_BEGIN(test_guarded_small) {
 TEST_END
 
 TEST_BEGIN(test_guarded_large) {
-	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	tsdn_t  *tsdn = tsd_tsdn(tsd_fetch());
 	unsigned nlarge = 32;
 	VARIABLE_ARRAY(uintptr_t, large, nlarge);
 
@@ -85,8 +86,9 @@ TEST_BEGIN(test_guarded_large) {
 	/* Verify the pages are not continuous, i.e. separated by guards. */
 	for (unsigned i = 0; i < nlarge; i++) {
 		for (unsigned j = i + 1; j < nlarge; j++) {
-			uintptr_t ptr_diff = large[i] > large[j] ?
-			    large[i] - large[j] : large[j] - large[i];
+			uintptr_t ptr_diff = large[i] > large[j]
+			    ? large[i] - large[j]
+			    : large[j] - large[i];
 			expect_zu_ge((size_t)ptr_diff, large_sz + 2 * PAGE,
 			    "There should be at least two pages between "
 			    " guarded large allocations");
@@ -102,15 +104,13 @@ TEST_END
 static void
 verify_pdirty(unsigned arena_ind, uint64_t expected) {
 	uint64_t pdirty = get_arena_pdirty(arena_ind);
-	expect_u64_eq(pdirty, expected / PAGE,
-	    "Unexpected dirty page amount.");
+	expect_u64_eq(pdirty, expected / PAGE, "Unexpected dirty page amount.");
 }
 
 static void
 verify_pmuzzy(unsigned arena_ind, uint64_t expected) {
 	uint64_t pmuzzy = get_arena_pmuzzy(arena_ind);
-	expect_u64_eq(pmuzzy, expected / PAGE,
-	    "Unexpected muzzy page amount.");
+	expect_u64_eq(pmuzzy, expected / PAGE, "Unexpected muzzy page amount.");
 }
 
 TEST_BEGIN(test_guarded_decay) {
@@ -140,7 +140,7 @@ TEST_BEGIN(test_guarded_decay) {
 	verify_pmuzzy(arena_ind, 0);
 
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
-	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+	int     flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
 
 	/* Should reuse dirty extents for the two mallocx. */
 	void *p1 = do_mallocx(sz1, flags);
@@ -200,8 +200,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_guarded_small,
-	    test_guarded_large,
-	    test_guarded_decay);
+	return test(test_guarded_small, test_guarded_large, test_guarded_decay);
 }
diff --git a/test/unit/san_bump.c b/test/unit/san_bump.c
index cafa37fe..9aa0210e 100644
--- a/test/unit/san_bump.c
+++ b/test/unit/san_bump.c
@@ -16,12 +16,12 @@ TEST_BEGIN(test_san_bump_alloc) {
 	assert_u_ne(arena_ind, UINT_MAX, "Failed to create an arena");
 
 	arena_t *arena = arena_get(tsdn, arena_ind, false);
-	pac_t *pac = &arena->pa_shard.pac;
+	pac_t   *pac = &arena->pa_shard.pac;
 
-	size_t alloc_size = PAGE * 16;
-	size_t alloc_n = alloc_size / sizeof(unsigned);
-	edata_t* edata = san_bump_alloc(tsdn, &sba, pac, pac_ehooks_get(pac),
-	    alloc_size, /* zero */ false);
+	size_t   alloc_size = PAGE * 16;
+	size_t   alloc_n = alloc_size / sizeof(unsigned);
+	edata_t *edata = san_bump_alloc(
+	    tsdn, &sba, pac, pac_ehooks_get(pac), alloc_size, /* zero */ false);
 
 	expect_ptr_not_null(edata, "Failed to allocate edata");
 	expect_u_eq(edata_arena_ind_get(edata), arena_ind,
@@ -39,10 +39,10 @@ TEST_BEGIN(test_san_bump_alloc) {
 		((unsigned *)ptr)[i] = 1;
 	}
 
-	size_t alloc_size2 = PAGE * 28;
-	size_t alloc_n2 = alloc_size / sizeof(unsigned);
-	edata_t *edata2 = san_bump_alloc(tsdn, &sba, pac, pac_ehooks_get(pac),
-	    alloc_size2, /* zero */ true);
+	size_t   alloc_size2 = PAGE * 28;
+	size_t   alloc_n2 = alloc_size / sizeof(unsigned);
+	edata_t *edata2 = san_bump_alloc(
+	    tsdn, &sba, pac, pac_ehooks_get(pac), alloc_size2, /* zero */ true);
 
 	expect_ptr_not_null(edata2, "Failed to allocate edata");
 	expect_u_eq(edata_arena_ind_get(edata2), arena_ind,
@@ -57,11 +57,11 @@ TEST_BEGIN(test_san_bump_alloc) {
 	expect_ptr_not_null(ptr, "Edata was assigned an invalid address");
 
 	uintptr_t ptrdiff = ptr2 > ptr ? (uintptr_t)ptr2 - (uintptr_t)ptr
-	    : (uintptr_t)ptr - (uintptr_t)ptr2;
-	size_t between_allocs = (size_t)ptrdiff - alloc_size;
+	                               : (uintptr_t)ptr - (uintptr_t)ptr2;
+	size_t    between_allocs = (size_t)ptrdiff - alloc_size;
 
-	expect_zu_ge(between_allocs, PAGE,
-	    "Guard page between allocs is missing");
+	expect_zu_ge(
+	    between_allocs, PAGE, "Guard page between allocs is missing");
 
 	for (unsigned i = 0; i < alloc_n2; ++i) {
 		expect_u_eq(((unsigned *)ptr2)[i], 0, "Memory is not zeroed");
@@ -81,11 +81,11 @@ TEST_BEGIN(test_large_alloc_size) {
 	assert_u_ne(arena_ind, UINT_MAX, "Failed to create an arena");
 
 	arena_t *arena = arena_get(tsdn, arena_ind, false);
-	pac_t *pac = &arena->pa_shard.pac;
+	pac_t   *pac = &arena->pa_shard.pac;
 
-	size_t alloc_size = SBA_RETAINED_ALLOC_SIZE * 2;
-	edata_t* edata = san_bump_alloc(tsdn, &sba, pac, pac_ehooks_get(pac),
-	    alloc_size, /* zero */ false);
+	size_t   alloc_size = SBA_RETAINED_ALLOC_SIZE * 2;
+	edata_t *edata = san_bump_alloc(
+	    tsdn, &sba, pac, pac_ehooks_get(pac), alloc_size, /* zero */ false);
 	expect_u_eq(edata_arena_ind_get(edata), arena_ind,
 	    "Edata was assigned an incorrect arena id");
 	expect_zu_eq(edata_size_get(edata), alloc_size,
@@ -105,7 +105,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_san_bump_alloc,
-	    test_large_alloc_size);
+	return test(test_san_bump_alloc, test_large_alloc_size);
 }
diff --git a/test/unit/sc.c b/test/unit/sc.c
index d207481c..725ede0e 100644
--- a/test/unit/sc.c
+++ b/test/unit/sc.c
@@ -4,7 +4,7 @@ TEST_BEGIN(test_update_slab_size) {
 	sc_data_t data;
 	memset(&data, 0, sizeof(data));
 	sc_data_init(&data);
-	sc_t *tiny = &data.sc[0];
+	sc_t  *tiny = &data.sc[0];
 	size_t tiny_size = (ZU(1) << tiny->lg_base)
 	    + (ZU(tiny->ndelta) << tiny->lg_delta);
 	size_t pgs_too_big = (tiny_size * BITMAP_MAXBITS + PAGE - 1) / PAGE + 1;
@@ -13,14 +13,14 @@ TEST_BEGIN(test_update_slab_size) {
 
 	sc_data_update_slab_size(&data, 1, 10 * PAGE, 1);
 	for (int i = 0; i < data.nbins; i++) {
-		sc_t *sc = &data.sc[i];
+		sc_t  *sc = &data.sc[i];
 		size_t reg_size = (ZU(1) << sc->lg_base)
 		    + (ZU(sc->ndelta) << sc->lg_delta);
 		if (reg_size <= PAGE) {
 			expect_d_eq(sc->pgs, 1, "Ignored valid page size hint");
 		} else {
-			expect_d_gt(sc->pgs, 1,
-			    "Allowed invalid page size hint");
+			expect_d_gt(
+			    sc->pgs, 1, "Allowed invalid page size hint");
 		}
 	}
 }
@@ -28,6 +28,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_update_slab_size);
+	return test(test_update_slab_size);
 }
diff --git a/test/unit/sec.c b/test/unit/sec.c
index cfef043f..d57c66ec 100644
--- a/test/unit/sec.c
+++ b/test/unit/sec.c
@@ -4,8 +4,8 @@
 
 typedef struct pai_test_allocator_s pai_test_allocator_t;
 struct pai_test_allocator_s {
-	pai_t pai;
-	bool alloc_fail;
+	pai_t  pai;
+	bool   alloc_fail;
 	size_t alloc_count;
 	size_t alloc_batch_count;
 	size_t dalloc_count;
@@ -17,10 +17,10 @@ struct pai_test_allocator_s {
 	 * pointers it gets back; this is mostly just helpful for debugging.
 	 */
 	uintptr_t next_ptr;
-	size_t expand_count;
-	bool expand_return_value;
-	size_t shrink_count;
-	bool shrink_return_value;
+	size_t    expand_count;
+	bool      expand_return_value;
+	size_t    shrink_count;
+	bool      shrink_return_value;
 };
 
 static void
@@ -82,8 +82,7 @@ pai_test_allocator_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size,
 	for (size_t i = 0; i < nallocs; i++) {
 		edata_t *edata = malloc(sizeof(edata_t));
 		assert_ptr_not_null(edata, "");
-		edata_init(edata, /* arena_ind */ 0,
-		    (void *)ta->next_ptr, size,
+		edata_init(edata, /* arena_ind */ 0, (void *)ta->next_ptr, size,
 		    /* slab */ false, /* szind */ 0, /* sn */ 1,
 		    extent_state_active, /* zero */ false, /* comitted */ true,
 		    /* ranged */ false, EXTENT_NOT_HEAD);
@@ -112,8 +111,8 @@ pai_test_allocator_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 }
 
 static void
-pai_test_allocator_dalloc(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    bool *deferred_work_generated) {
+pai_test_allocator_dalloc(
+    tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated) {
 	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
 	ta->dalloc_count++;
 	free(edata);
@@ -174,7 +173,7 @@ TEST_BEGIN(test_reuse) {
 	enum { NALLOCS = 11 };
 	edata_t *one_page[NALLOCS];
 	edata_t *two_page[NALLOCS];
-	bool deferred_work_generated = false;
+	bool     deferred_work_generated = false;
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ 2 * PAGE,
 	    /* max_bytes */ 2 * (NALLOCS * PAGE + NALLOCS * 2 * PAGE));
 	for (int i = 0; i < NALLOCS; i++) {
@@ -189,26 +188,24 @@ TEST_BEGIN(test_reuse) {
 	}
 	expect_zu_eq(0, ta.alloc_count, "Should be using batch allocs");
 	size_t max_allocs = ta.alloc_count + ta.alloc_batch_count;
-	expect_zu_le(2 * NALLOCS, max_allocs,
-	    "Incorrect number of allocations");
-	expect_zu_eq(0, ta.dalloc_count,
-	    "Incorrect number of allocations");
+	expect_zu_le(
+	    2 * NALLOCS, max_allocs, "Incorrect number of allocations");
+	expect_zu_eq(0, ta.dalloc_count, "Incorrect number of allocations");
 	/*
 	 * Free in a different order than we allocated, to make sure free-list
 	 * separation works correctly.
 	 */
 	for (int i = NALLOCS - 1; i >= 0; i--) {
-		pai_dalloc(tsdn, &sec.pai, one_page[i],
-		    &deferred_work_generated);
+		pai_dalloc(
+		    tsdn, &sec.pai, one_page[i], &deferred_work_generated);
 	}
 	for (int i = NALLOCS - 1; i >= 0; i--) {
-		pai_dalloc(tsdn, &sec.pai, two_page[i],
-		    &deferred_work_generated);
+		pai_dalloc(
+		    tsdn, &sec.pai, two_page[i], &deferred_work_generated);
 	}
 	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
 	    "Incorrect number of allocations");
-	expect_zu_eq(0, ta.dalloc_count,
-	    "Incorrect number of allocations");
+	expect_zu_eq(0, ta.dalloc_count, "Incorrect number of allocations");
 	/*
 	 * Check that the n'th most recent deallocated extent is returned for
 	 * the n'th alloc request of a given size.
@@ -220,19 +217,15 @@ TEST_BEGIN(test_reuse) {
 		edata_t *alloc2 = pai_alloc(tsdn, &sec.pai, 2 * PAGE, PAGE,
 		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
 		    false, &deferred_work_generated);
-		expect_ptr_eq(one_page[i], alloc1,
-		    "Got unexpected allocation");
-		expect_ptr_eq(two_page[i], alloc2,
-		    "Got unexpected allocation");
+		expect_ptr_eq(one_page[i], alloc1, "Got unexpected allocation");
+		expect_ptr_eq(two_page[i], alloc2, "Got unexpected allocation");
 	}
 	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
 	    "Incorrect number of allocations");
-	expect_zu_eq(0, ta.dalloc_count,
-	    "Incorrect number of allocations");
+	expect_zu_eq(0, ta.dalloc_count, "Incorrect number of allocations");
 }
 TEST_END
 
-
 TEST_BEGIN(test_auto_flush) {
 	pai_test_allocator_t ta;
 	pai_test_allocator_init(&ta);
@@ -251,7 +244,7 @@ TEST_BEGIN(test_auto_flush) {
 	enum { NALLOCS = 10 };
 	edata_t *extra_alloc;
 	edata_t *allocs[NALLOCS];
-	bool deferred_work_generated = false;
+	bool     deferred_work_generated = false;
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
 	    /* max_bytes */ NALLOCS * PAGE);
 	for (int i = 0; i < NALLOCS; i++) {
@@ -265,18 +258,16 @@ TEST_BEGIN(test_auto_flush) {
 	    &deferred_work_generated);
 	expect_ptr_not_null(extra_alloc, "Unexpected alloc failure");
 	size_t max_allocs = ta.alloc_count + ta.alloc_batch_count;
-	expect_zu_le(NALLOCS + 1, max_allocs,
-	    "Incorrect number of allocations");
-	expect_zu_eq(0, ta.dalloc_count,
-	    "Incorrect number of allocations");
+	expect_zu_le(
+	    NALLOCS + 1, max_allocs, "Incorrect number of allocations");
+	expect_zu_eq(0, ta.dalloc_count, "Incorrect number of allocations");
 	/* Free until the SEC is full, but should not have flushed yet. */
 	for (int i = 0; i < NALLOCS; i++) {
 		pai_dalloc(tsdn, &sec.pai, allocs[i], &deferred_work_generated);
 	}
-	expect_zu_le(NALLOCS + 1, max_allocs,
-	    "Incorrect number of allocations");
-	expect_zu_eq(0, ta.dalloc_count,
-	    "Incorrect number of allocations");
+	expect_zu_le(
+	    NALLOCS + 1, max_allocs, "Incorrect number of allocations");
+	expect_zu_eq(0, ta.dalloc_count, "Incorrect number of allocations");
 	/*
 	 * Free the extra allocation; this should trigger a flush.  The internal
 	 * flushing logic is allowed to get complicated; for now, we rely on our
@@ -308,7 +299,7 @@ do_disable_flush_test(bool is_disable) {
 
 	enum { NALLOCS = 11 };
 	edata_t *allocs[NALLOCS];
-	bool deferred_work_generated = false;
+	bool     deferred_work_generated = false;
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
 	    /* max_bytes */ NALLOCS * PAGE);
 	for (int i = 0; i < NALLOCS; i++) {
@@ -324,8 +315,7 @@ do_disable_flush_test(bool is_disable) {
 	size_t max_allocs = ta.alloc_count + ta.alloc_batch_count;
 
 	expect_zu_le(NALLOCS, max_allocs, "Incorrect number of allocations");
-	expect_zu_eq(0, ta.dalloc_count,
-	    "Incorrect number of allocations");
+	expect_zu_eq(0, ta.dalloc_count, "Incorrect number of allocations");
 
 	if (is_disable) {
 		sec_disable(tsdn, &sec);
@@ -345,8 +335,8 @@ do_disable_flush_test(bool is_disable) {
 	 * If we free into a disabled SEC, it should forward to the fallback.
 	 * Otherwise, the SEC should accept the allocation.
 	 */
-	pai_dalloc(tsdn, &sec.pai, allocs[NALLOCS - 1],
-	    &deferred_work_generated);
+	pai_dalloc(
+	    tsdn, &sec.pai, allocs[NALLOCS - 1], &deferred_work_generated);
 
 	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
 	    "Incorrect number of allocations");
@@ -382,18 +372,18 @@ TEST_BEGIN(test_max_alloc_respected) {
 	    /* max_bytes */ 1000 * PAGE);
 
 	for (size_t i = 0; i < 100; i++) {
-		expect_zu_eq(i, ta.alloc_count,
-		    "Incorrect number of allocations");
-		expect_zu_eq(i, ta.dalloc_count,
-		    "Incorrect number of deallocations");
+		expect_zu_eq(
+		    i, ta.alloc_count, "Incorrect number of allocations");
+		expect_zu_eq(
+		    i, ta.dalloc_count, "Incorrect number of deallocations");
 		edata_t *edata = pai_alloc(tsdn, &sec.pai, attempted_alloc,
 		    PAGE, /* zero */ false, /* guarded */ false,
 		    /* frequent_reuse */ false, &deferred_work_generated);
 		expect_ptr_not_null(edata, "Unexpected alloc failure");
-		expect_zu_eq(i + 1, ta.alloc_count,
-		    "Incorrect number of allocations");
-		expect_zu_eq(i, ta.dalloc_count,
-		    "Incorrect number of deallocations");
+		expect_zu_eq(
+		    i + 1, ta.alloc_count, "Incorrect number of allocations");
+		expect_zu_eq(
+		    i, ta.dalloc_count, "Incorrect number of deallocations");
 		pai_dalloc(tsdn, &sec.pai, edata, &deferred_work_generated);
 	}
 }
@@ -435,8 +425,8 @@ TEST_BEGIN(test_expand_shrink_delegate) {
 	expect_false(err, "Unexpected shrink failure");
 	expect_zu_eq(1, ta.shrink_count, "");
 	ta.shrink_return_value = true;
-	err = pai_shrink(tsdn, &sec.pai, edata, 2 * PAGE, PAGE,
-	    &deferred_work_generated);
+	err = pai_shrink(
+	    tsdn, &sec.pai, edata, 2 * PAGE, PAGE, &deferred_work_generated);
 	expect_true(err, "Unexpected shrink success");
 	expect_zu_eq(2, ta.shrink_count, "");
 }
@@ -455,7 +445,7 @@ TEST_BEGIN(test_nshards_0) {
 	opts.nshards = 0;
 	sec_init(TSDN_NULL, &sec, base, &ta.pai, &opts);
 
-	bool deferred_work_generated = false;
+	bool     deferred_work_generated = false;
 	edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
 	    /* zero */ false, /* guarded */ false, /* frequent_reuse */ false,
 	    &deferred_work_generated);
@@ -570,8 +560,9 @@ TEST_BEGIN(test_stats_auto_flush) {
 
 	pai_dalloc(tsdn, &sec.pai, extra_alloc1, &deferred_work_generated);
 
-	expect_stats_pages(tsdn, &sec, ta.alloc_count + ta.alloc_batch_count
-	    - ta.dalloc_count - ta.dalloc_batch_count);
+	expect_stats_pages(tsdn, &sec,
+	    ta.alloc_count + ta.alloc_batch_count - ta.dalloc_count
+	        - ta.dalloc_batch_count);
 }
 TEST_END
 
@@ -590,7 +581,7 @@ TEST_BEGIN(test_stats_manual_flush) {
 	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
 	    /* max_bytes */ FLUSH_PAGES * PAGE);
 
-	bool deferred_work_generated = false;
+	bool     deferred_work_generated = false;
 	edata_t *allocs[FLUSH_PAGES];
 	for (size_t i = 0; i < FLUSH_PAGES; i++) {
 		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
@@ -621,15 +612,8 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_reuse,
-	    test_auto_flush,
-	    test_disable,
-	    test_flush,
-	    test_max_alloc_respected,
-	    test_expand_shrink_delegate,
-	    test_nshards_0,
-	    test_stats_simple,
-	    test_stats_auto_flush,
+	return test(test_reuse, test_auto_flush, test_disable, test_flush,
+	    test_max_alloc_respected, test_expand_shrink_delegate,
+	    test_nshards_0, test_stats_simple, test_stats_auto_flush,
 	    test_stats_manual_flush);
 }
diff --git a/test/unit/seq.c b/test/unit/seq.c
index 06ed6834..ca6c74b1 100644
--- a/test/unit/seq.c
+++ b/test/unit/seq.c
@@ -24,7 +24,7 @@ expect_data(data_t *data) {
 
 seq_define(data_t, data)
 
-typedef struct thd_data_s thd_data_t;
+    typedef struct thd_data_s thd_data_t;
 struct thd_data_s {
 	seq_data_t data;
 };
@@ -32,8 +32,8 @@ struct thd_data_s {
 static void *
 seq_reader_thd(void *arg) {
 	thd_data_t *thd_data = (thd_data_t *)arg;
-	int iter = 0;
-	data_t local_data;
+	int         iter = 0;
+	data_t      local_data;
 	while (iter < 1000 * 1000 - 1) {
 		bool success = seq_try_load_data(&local_data, &thd_data->data);
 		if (success) {
@@ -49,7 +49,7 @@ seq_reader_thd(void *arg) {
 static void *
 seq_writer_thd(void *arg) {
 	thd_data_t *thd_data = (thd_data_t *)arg;
-	data_t local_data;
+	data_t      local_data;
 	memset(&local_data, 0, sizeof(local_data));
 	for (int i = 0; i < 1000 * 1000; i++) {
 		set_data(&local_data, i);
@@ -74,7 +74,7 @@ TEST_BEGIN(test_seq_threaded) {
 TEST_END
 
 TEST_BEGIN(test_seq_simple) {
-	data_t data;
+	data_t     data;
 	seq_data_t seq;
 	memset(&seq, 0, sizeof(seq));
 	for (int i = 0; i < 1000 * 1000; i++) {
@@ -88,8 +88,7 @@ TEST_BEGIN(test_seq_simple) {
 }
 TEST_END
 
-int main(void) {
-	return test_no_reentrancy(
-	    test_seq_simple,
-	    test_seq_threaded);
+int
+main(void) {
+	return test_no_reentrancy(test_seq_simple, test_seq_threaded);
 }
diff --git a/test/unit/size_check.c b/test/unit/size_check.c
index 3cb3bc9c..a31578bf 100644
--- a/test/unit/size_check.c
+++ b/test/unit/size_check.c
@@ -3,7 +3,8 @@
 #include "jemalloc/internal/safety_check.h"
 
 bool fake_abort_called;
-void fake_abort(const char *message) {
+void
+fake_abort(const char *message) {
 	(void)message;
 	fake_abort_called = true;
 }
@@ -72,8 +73,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_invalid_size_sdallocx,
+	return test(test_invalid_size_sdallocx,
 	    test_invalid_size_sdallocx_nonzero_flag,
 	    test_invalid_size_sdallocx_noflags);
 }
diff --git a/test/unit/size_classes.c b/test/unit/size_classes.c
index c373829c..5379047c 100644
--- a/test/unit/size_classes.c
+++ b/test/unit/size_classes.c
@@ -3,12 +3,13 @@
 static size_t
 get_max_size_class(void) {
 	unsigned nlextents;
-	size_t mib[4];
-	size_t sz, miblen, max_size_class;
+	size_t   mib[4];
+	size_t   sz, miblen, max_size_class;
 
 	sz = sizeof(unsigned);
-	expect_d_eq(mallctl("arenas.nlextents", (void *)&nlextents, &sz, NULL,
-	    0), 0, "Unexpected mallctl() error");
+	expect_d_eq(
+	    mallctl("arenas.nlextents", (void *)&nlextents, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() error");
 
 	miblen = sizeof(mib) / sizeof(size_t);
 	expect_d_eq(mallctlnametomib("arenas.lextent.0.size", mib, &miblen), 0,
@@ -16,30 +17,34 @@ get_max_size_class(void) {
 	mib[2] = nlextents - 1;
 
 	sz = sizeof(size_t);
-	expect_d_eq(mallctlbymib(mib, miblen, (void *)&max_size_class, &sz,
-	    NULL, 0), 0, "Unexpected mallctlbymib() error");
+	expect_d_eq(
+	    mallctlbymib(mib, miblen, (void *)&max_size_class, &sz, NULL, 0), 0,
+	    "Unexpected mallctlbymib() error");
 
 	return max_size_class;
 }
 
 TEST_BEGIN(test_size_classes) {
-	size_t size_class, max_size_class;
+	size_t  size_class, max_size_class;
 	szind_t index, gen_index, max_index;
 
-	max_size_class = sz_large_size_classes_disabled()? SC_SMALL_MAXCLASS:
-	    get_max_size_class();
+	max_size_class = sz_large_size_classes_disabled()
+	    ? SC_SMALL_MAXCLASS
+	    : get_max_size_class();
 	max_index = sz_size2index(max_size_class);
 
-	for (index = 0, size_class = sz_index2size(index); index < max_index ||
-	    size_class < max_size_class; index++, size_class =
-	    sz_index2size(index)) {
+	for (index = 0, size_class = sz_index2size(index);
+	     index < max_index || size_class < max_size_class;
+	     index++, size_class = sz_index2size(index)) {
 		gen_index = sz_size2index(size_class);
 		expect_true(index < max_index,
 		    "Loop conditionals should be equivalent; index=%u, "
-		    "size_class=%zu (%#zx)", index, size_class, size_class);
+		    "size_class=%zu (%#zx)",
+		    index, size_class, size_class);
 		expect_true(size_class < max_size_class,
 		    "Loop conditionals should be equivalent; index=%u, "
-		    "size_class=%zu (%#zx)", index, size_class, size_class);
+		    "size_class=%zu (%#zx)",
+		    index, size_class, size_class);
 
 		expect_u_eq(index, gen_index,
 		    "sz_size2index() does not reverse sz_index2size(): index=%u"
@@ -51,29 +56,30 @@ TEST_BEGIN(test_size_classes) {
 		    " --> size_class=%zu --> index=%u --> size_class=%zu",
 		    index, size_class, gen_index, sz_index2size(gen_index));
 
-		expect_u_eq(index+1, sz_size2index(size_class+1),
+		expect_u_eq(index + 1, sz_size2index(size_class + 1),
 		    "Next size_class does not round up properly");
 
-		expect_zu_eq(size_class, (index > 0) ?
-		    sz_s2u(sz_index2size(index-1)+1) : sz_s2u(1),
+		expect_zu_eq(size_class,
+		    (index > 0) ? sz_s2u(sz_index2size(index - 1) + 1)
+		                : sz_s2u(1),
 		    "sz_s2u() does not round up to size class");
-		expect_zu_eq(size_class, sz_s2u(size_class-1),
+		expect_zu_eq(size_class, sz_s2u(size_class - 1),
 		    "sz_s2u() does not round up to size class");
 		expect_zu_eq(size_class, sz_s2u(size_class),
 		    "sz_s2u() does not compute same size class");
-		expect_zu_eq(sz_s2u(size_class+1), sz_index2size(index+1),
+		expect_zu_eq(sz_s2u(size_class + 1), sz_index2size(index + 1),
 		    "sz_s2u() does not round up to next size class");
 	}
 
 	expect_u_eq(index, sz_size2index(sz_index2size(index)),
 	    "sz_size2index() does not reverse sz_index2size()");
-	expect_zu_eq(max_size_class, sz_index2size(
-	    sz_size2index(max_size_class)),
+	expect_zu_eq(max_size_class,
+	    sz_index2size(sz_size2index(max_size_class)),
 	    "sz_index2size() does not reverse sz_size2index()");
 
-	expect_zu_eq(size_class, sz_s2u(sz_index2size(index-1)+1),
+	expect_zu_eq(size_class, sz_s2u(sz_index2size(index - 1) + 1),
 	    "sz_s2u() does not round up to size class");
-	expect_zu_eq(size_class, sz_s2u(size_class-1),
+	expect_zu_eq(size_class, sz_s2u(size_class - 1),
 	    "sz_s2u() does not round up to size class");
 	expect_zu_eq(size_class, sz_s2u(size_class),
 	    "sz_s2u() does not compute same size class");
@@ -115,31 +121,33 @@ TEST_BEGIN(test_grow_slow_size_classes) {
 TEST_END
 
 TEST_BEGIN(test_psize_classes) {
-	size_t size_class, max_psz;
+	size_t   size_class, max_psz;
 	pszind_t pind, max_pind;
 
 	max_psz = get_max_size_class() + PAGE;
 	max_pind = sz_psz2ind(max_psz);
 
 	for (pind = 0, size_class = sz_pind2sz(pind);
-	    pind < max_pind || size_class < max_psz;
-	    pind++, size_class = sz_pind2sz(pind)) {
+	     pind < max_pind || size_class < max_psz;
+	     pind++, size_class = sz_pind2sz(pind)) {
 		expect_true(pind < max_pind,
 		    "Loop conditionals should be equivalent; pind=%u, "
-		    "size_class=%zu (%#zx)", pind, size_class, size_class);
+		    "size_class=%zu (%#zx)",
+		    pind, size_class, size_class);
 		expect_true(size_class < max_psz,
 		    "Loop conditionals should be equivalent; pind=%u, "
-		    "size_class=%zu (%#zx)", pind, size_class, size_class);
+		    "size_class=%zu (%#zx)",
+		    pind, size_class, size_class);
 
 		expect_u_eq(pind, sz_psz2ind(size_class),
 		    "sz_psz2ind() does not reverse sz_pind2sz(): pind=%u -->"
-		    " size_class=%zu --> pind=%u --> size_class=%zu", pind,
-		    size_class, sz_psz2ind(size_class),
+		    " size_class=%zu --> pind=%u --> size_class=%zu",
+		    pind, size_class, sz_psz2ind(size_class),
 		    sz_pind2sz(sz_psz2ind(size_class)));
 		expect_zu_eq(size_class, sz_pind2sz(sz_psz2ind(size_class)),
 		    "sz_pind2sz() does not reverse sz_psz2ind(): pind=%u -->"
-		    " size_class=%zu --> pind=%u --> size_class=%zu", pind,
-		    size_class, sz_psz2ind(size_class),
+		    " size_class=%zu --> pind=%u --> size_class=%zu",
+		    pind, size_class, sz_psz2ind(size_class),
 		    sz_pind2sz(sz_psz2ind(size_class)));
 
 		if (size_class == SC_LARGE_MAXCLASS) {
@@ -150,14 +158,15 @@ TEST_BEGIN(test_psize_classes) {
 			    "Next size_class does not round up properly");
 		}
 
-		expect_zu_eq(size_class, (pind > 0) ?
-		    sz_psz2u(sz_pind2sz(pind-1)+1) : sz_psz2u(1),
+		expect_zu_eq(size_class,
+		    (pind > 0) ? sz_psz2u(sz_pind2sz(pind - 1) + 1)
+		               : sz_psz2u(1),
 		    "sz_psz2u() does not round up to size class");
-		expect_zu_eq(size_class, sz_psz2u(size_class-1),
+		expect_zu_eq(size_class, sz_psz2u(size_class - 1),
 		    "sz_psz2u() does not round up to size class");
 		expect_zu_eq(size_class, sz_psz2u(size_class),
 		    "sz_psz2u() does not compute same size class");
-		expect_zu_eq(sz_psz2u(size_class+1), sz_pind2sz(pind+1),
+		expect_zu_eq(sz_psz2u(size_class + 1), sz_pind2sz(pind + 1),
 		    "sz_psz2u() does not round up to next size class");
 	}
 
@@ -166,9 +175,9 @@ TEST_BEGIN(test_psize_classes) {
 	expect_zu_eq(max_psz, sz_pind2sz(sz_psz2ind(max_psz)),
 	    "sz_pind2sz() does not reverse sz_psz2ind()");
 
-	expect_zu_eq(size_class, sz_psz2u(sz_pind2sz(pind-1)+1),
+	expect_zu_eq(size_class, sz_psz2u(sz_pind2sz(pind - 1) + 1),
 	    "sz_psz2u() does not round up to size class");
-	expect_zu_eq(size_class, sz_psz2u(size_class-1),
+	expect_zu_eq(size_class, sz_psz2u(size_class - 1),
 	    "sz_psz2u() does not round up to size class");
 	expect_zu_eq(size_class, sz_psz2u(size_class),
 	    "sz_psz2u() does not compute same size class");
@@ -181,31 +190,31 @@ TEST_BEGIN(test_overflow) {
 	max_size_class = get_max_size_class();
 	max_psz = max_size_class + PAGE;
 
-	expect_u_eq(sz_size2index(max_size_class+1), SC_NSIZES,
+	expect_u_eq(sz_size2index(max_size_class + 1), SC_NSIZES,
 	    "sz_size2index() should return NSIZES on overflow");
-	expect_u_eq(sz_size2index(ZU(PTRDIFF_MAX)+1), SC_NSIZES,
+	expect_u_eq(sz_size2index(ZU(PTRDIFF_MAX) + 1), SC_NSIZES,
 	    "sz_size2index() should return NSIZES on overflow");
 	expect_u_eq(sz_size2index(SIZE_T_MAX), SC_NSIZES,
 	    "sz_size2index() should return NSIZES on overflow");
 
-	expect_zu_eq(sz_s2u(max_size_class+1), 0,
+	expect_zu_eq(sz_s2u(max_size_class + 1), 0,
 	    "sz_s2u() should return 0 for unsupported size");
-	expect_zu_eq(sz_s2u(ZU(PTRDIFF_MAX)+1), 0,
+	expect_zu_eq(sz_s2u(ZU(PTRDIFF_MAX) + 1), 0,
 	    "sz_s2u() should return 0 for unsupported size");
-	expect_zu_eq(sz_s2u(SIZE_T_MAX), 0,
-	    "sz_s2u() should return 0 on overflow");
+	expect_zu_eq(
+	    sz_s2u(SIZE_T_MAX), 0, "sz_s2u() should return 0 on overflow");
 
-	expect_u_eq(sz_psz2ind(max_size_class+1), SC_NPSIZES,
+	expect_u_eq(sz_psz2ind(max_size_class + 1), SC_NPSIZES,
 	    "sz_psz2ind() should return NPSIZES on overflow");
-	expect_u_eq(sz_psz2ind(ZU(PTRDIFF_MAX)+1), SC_NPSIZES,
+	expect_u_eq(sz_psz2ind(ZU(PTRDIFF_MAX) + 1), SC_NPSIZES,
 	    "sz_psz2ind() should return NPSIZES on overflow");
 	expect_u_eq(sz_psz2ind(SIZE_T_MAX), SC_NPSIZES,
 	    "sz_psz2ind() should return NPSIZES on overflow");
 
-	expect_zu_eq(sz_psz2u(max_size_class+1), max_psz,
+	expect_zu_eq(sz_psz2u(max_size_class + 1), max_psz,
 	    "sz_psz2u() should return (LARGE_MAXCLASS + PAGE) for unsupported"
 	    " size");
-	expect_zu_eq(sz_psz2u(ZU(PTRDIFF_MAX)+1), max_psz,
+	expect_zu_eq(sz_psz2u(ZU(PTRDIFF_MAX) + 1), max_psz,
 	    "sz_psz2u() should return (LARGE_MAXCLASS + PAGE) for unsupported "
 	    "size");
 	expect_zu_eq(sz_psz2u(SIZE_T_MAX), max_psz,
@@ -215,9 +224,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_size_classes,
-	    test_grow_slow_size_classes,
-	    test_psize_classes,
-	    test_overflow);
+	return test(test_size_classes, test_grow_slow_size_classes,
+	    test_psize_classes, test_overflow);
 }
diff --git a/test/unit/slab.c b/test/unit/slab.c
index 70fc5c7d..5c48e762 100644
--- a/test/unit/slab.c
+++ b/test/unit/slab.c
@@ -6,23 +6,22 @@ TEST_BEGIN(test_arena_slab_regind) {
 	szind_t binind;
 
 	for (binind = 0; binind < SC_NBINS; binind++) {
-		size_t regind;
-		edata_t slab;
+		size_t            regind;
+		edata_t           slab;
 		const bin_info_t *bin_info = &bin_infos[binind];
 		edata_init(&slab, INVALID_ARENA_IND,
 		    mallocx(bin_info->slab_size, MALLOCX_LG_ALIGN(LG_PAGE)),
-		    bin_info->slab_size, true,
-		    binind, 0, extent_state_active, false, true, EXTENT_PAI_PAC,
-		    EXTENT_NOT_HEAD);
-		expect_ptr_not_null(edata_addr_get(&slab),
-		    "Unexpected malloc() failure");
+		    bin_info->slab_size, true, binind, 0, extent_state_active,
+		    false, true, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
+		expect_ptr_not_null(
+		    edata_addr_get(&slab), "Unexpected malloc() failure");
 		arena_dalloc_bin_locked_info_t dalloc_info;
 		arena_dalloc_bin_locked_begin(&dalloc_info, binind);
 		for (regind = 0; regind < bin_info->nregs; regind++) {
-			void *reg = (void *)((uintptr_t)edata_addr_get(&slab) +
-			    (bin_info->reg_size * regind));
-			expect_zu_eq(arena_slab_regind(&dalloc_info, binind,
-			    &slab, reg),
+			void *reg = (void *)((uintptr_t)edata_addr_get(&slab)
+			    + (bin_info->reg_size * regind));
+			expect_zu_eq(
+			    arena_slab_regind(&dalloc_info, binind, &slab, reg),
 			    regind,
 			    "Incorrect region index computed for size %zu",
 			    bin_info->reg_size);
@@ -34,6 +33,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_arena_slab_regind);
+	return test(test_arena_slab_regind);
 }
diff --git a/test/unit/smoothstep.c b/test/unit/smoothstep.c
index 588c9f44..3686ca74 100644
--- a/test/unit/smoothstep.c
+++ b/test/unit/smoothstep.c
@@ -1,9 +1,8 @@
 #include "test/jemalloc_test.h"
 
 static const uint64_t smoothstep_tab[] = {
-#define STEP(step, h, x, y)			\
-	h,
-	SMOOTHSTEP
+#define STEP(step, h, x, y) h,
+    SMOOTHSTEP
 #undef STEP
 };
 
@@ -23,14 +22,14 @@ TEST_BEGIN(test_smoothstep_integral) {
 		sum += smoothstep_tab[i];
 	}
 
-	max = (KQU(1) << (SMOOTHSTEP_BFP-1)) * (SMOOTHSTEP_NSTEPS+1);
+	max = (KQU(1) << (SMOOTHSTEP_BFP - 1)) * (SMOOTHSTEP_NSTEPS + 1);
 	min = max - SMOOTHSTEP_NSTEPS;
 
-	expect_u64_ge(sum, min,
-	    "Integral too small, even accounting for truncation");
+	expect_u64_ge(
+	    sum, min, "Integral too small, even accounting for truncation");
 	expect_u64_le(sum, max, "Integral exceeds 1/2");
 	if (false) {
-		malloc_printf("%"FMTu64" ulps under 1/2 (limit %d)\n",
+		malloc_printf("%" FMTu64 " ulps under 1/2 (limit %d)\n",
 		    max - sum, SMOOTHSTEP_NSTEPS);
 	}
 }
@@ -52,7 +51,7 @@ TEST_BEGIN(test_smoothstep_monotonic) {
 		expect_u64_ge(h, prev_h, "Piecewise non-monotonic, i=%u", i);
 		prev_h = h;
 	}
-	expect_u64_eq(smoothstep_tab[SMOOTHSTEP_NSTEPS-1],
+	expect_u64_eq(smoothstep_tab[SMOOTHSTEP_NSTEPS - 1],
 	    (KQU(1) << SMOOTHSTEP_BFP), "Last step must equal 1");
 }
 TEST_END
@@ -74,19 +73,21 @@ TEST_BEGIN(test_smoothstep_slope) {
 		uint64_t delta = h - prev_h;
 		expect_u64_ge(delta, prev_delta,
 		    "Slope must monotonically increase in 0.0 <= x <= 0.5, "
-		    "i=%u", i);
+		    "i=%u",
+		    i);
 		prev_h = h;
 		prev_delta = delta;
 	}
 
 	prev_h = KQU(1) << SMOOTHSTEP_BFP;
 	prev_delta = 0;
-	for (i = SMOOTHSTEP_NSTEPS-1; i >= SMOOTHSTEP_NSTEPS / 2; i--) {
+	for (i = SMOOTHSTEP_NSTEPS - 1; i >= SMOOTHSTEP_NSTEPS / 2; i--) {
 		uint64_t h = smoothstep_tab[i];
 		uint64_t delta = prev_h - h;
 		expect_u64_ge(delta, prev_delta,
 		    "Slope must monotonically decrease in 0.5 <= x <= 1.0, "
-		    "i=%u", i);
+		    "i=%u",
+		    i);
 		prev_h = h;
 		prev_delta = delta;
 	}
@@ -95,8 +96,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_smoothstep_integral,
-	    test_smoothstep_monotonic,
+	return test(test_smoothstep_integral, test_smoothstep_monotonic,
 	    test_smoothstep_slope);
 }
diff --git a/test/unit/spin.c b/test/unit/spin.c
index b965f742..6dbd0dd1 100644
--- a/test/unit/spin.c
+++ b/test/unit/spin.c
@@ -13,6 +13,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_spin);
+	return test(test_spin);
 }
diff --git a/test/unit/stats.c b/test/unit/stats.c
index 584a582f..26516fa8 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -4,13 +4,14 @@
 #define STRINGIFY(x) STRINGIFY_HELPER(x)
 
 TEST_BEGIN(test_stats_summary) {
-	size_t sz, allocated, active, resident, mapped,
-	    metadata, metadata_edata, metadata_rtree;
+	size_t sz, allocated, active, resident, mapped, metadata,
+	    metadata_edata, metadata_rtree;
 	int expected = config_stats ? 0 : ENOENT;
 
 	sz = sizeof(size_t);
-	expect_d_eq(mallctl("stats.allocated", (void *)&allocated, &sz, NULL,
-	    0), expected, "Unexpected mallctl() result");
+	expect_d_eq(
+	    mallctl("stats.allocated", (void *)&allocated, &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 	expect_d_eq(mallctl("stats.active", (void *)&active, &sz, NULL, 0),
 	    expected, "Unexpected mallctl() result");
 	expect_d_eq(mallctl("stats.resident", (void *)&resident, &sz, NULL, 0),
@@ -21,17 +22,19 @@ TEST_BEGIN(test_stats_summary) {
 	expect_d_eq(mallctl("stats.metadata", (void *)&metadata, &sz, NULL, 0),
 	    expected, "Unexpected mallctl() result");
 	expect_d_eq(mallctl("stats.metadata_edata", (void *)&metadata_edata,
-	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	                &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 	expect_d_eq(mallctl("stats.metadata_rtree", (void *)&metadata_rtree,
-	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	                &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
 		expect_zu_le(allocated, active,
 		    "allocated should be no larger than active");
-		expect_zu_lt(active, resident,
-		    "active should be less than resident");
-		expect_zu_lt(active, mapped,
-		    "active should be less than mapped");
+		expect_zu_lt(
+		    active, resident, "active should be less than resident");
+		expect_zu_lt(
+		    active, mapped, "active should be less than mapped");
 		expect_zu_le(metadata_edata + metadata_rtree, metadata,
 		    "the sum of metadata_edata and metadata_rtree "
 		    "should be no larger than metadata");
@@ -40,12 +43,12 @@ TEST_BEGIN(test_stats_summary) {
 TEST_END
 
 TEST_BEGIN(test_stats_large) {
-	void *p;
+	void    *p;
 	uint64_t epoch;
-	size_t allocated;
+	size_t   allocated;
 	uint64_t nmalloc, ndalloc, nrequests;
-	size_t sz;
-	int expected = config_stats ? 0 : ENOENT;
+	size_t   sz;
+	int      expected = config_stats ? 0 : ENOENT;
 
 	p = mallocx(SC_SMALL_MAXCLASS + 1, MALLOCX_ARENA(0));
 	expect_ptr_not_null(p, "Unexpected mallocx() failure");
@@ -55,20 +58,22 @@ TEST_BEGIN(test_stats_large) {
 
 	sz = sizeof(size_t);
 	expect_d_eq(mallctl("stats.arenas.0.large.allocated",
-	    (void *)&allocated, &sz, NULL, 0), expected,
-	    "Unexpected mallctl() result");
+	                (void *)&allocated, &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 	sz = sizeof(uint64_t);
 	expect_d_eq(mallctl("stats.arenas.0.large.nmalloc", (void *)&nmalloc,
-	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	                &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 	expect_d_eq(mallctl("stats.arenas.0.large.ndalloc", (void *)&ndalloc,
-	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	                &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 	expect_d_eq(mallctl("stats.arenas.0.large.nrequests",
-	    (void *)&nrequests, &sz, NULL, 0), expected,
-	    "Unexpected mallctl() result");
+	                (void *)&nrequests, &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
-		expect_zu_gt(allocated, 0,
-		    "allocated should be greater than zero");
+		expect_zu_gt(
+		    allocated, 0, "allocated should be greater than zero");
 		expect_u64_ge(nmalloc, ndalloc,
 		    "nmalloc should be at least as large as ndalloc");
 		expect_u64_le(nmalloc, nrequests,
@@ -80,18 +85,17 @@ TEST_BEGIN(test_stats_large) {
 TEST_END
 
 TEST_BEGIN(test_stats_arenas_summary) {
-	void *little, *large;
+	void    *little, *large;
 	uint64_t epoch;
-	size_t sz;
-	int expected = config_stats ? 0 : ENOENT;
-	size_t mapped;
+	size_t   sz;
+	int      expected = config_stats ? 0 : ENOENT;
+	size_t   mapped;
 	uint64_t dirty_npurge, dirty_nmadvise, dirty_purged;
 	uint64_t muzzy_npurge, muzzy_nmadvise, muzzy_purged;
 
 	little = mallocx(SC_SMALL_MAXCLASS, MALLOCX_ARENA(0));
 	expect_ptr_not_null(little, "Unexpected mallocx() failure");
-	large = mallocx((1U << SC_LG_LARGE_MINCLASS),
-	    MALLOCX_ARENA(0));
+	large = mallocx((1U << SC_LG_LARGE_MINCLASS), MALLOCX_ARENA(0));
 	expect_ptr_not_null(large, "Unexpected mallocx() failure");
 
 	dallocx(little, 0);
@@ -106,28 +110,29 @@ TEST_BEGIN(test_stats_arenas_summary) {
 	    0, "Unexpected mallctl() failure");
 
 	sz = sizeof(size_t);
-	expect_d_eq(mallctl("stats.arenas.0.mapped", (void *)&mapped, &sz, NULL,
-	    0), expected, "Unexepected mallctl() result");
+	expect_d_eq(
+	    mallctl("stats.arenas.0.mapped", (void *)&mapped, &sz, NULL, 0),
+	    expected, "Unexepected mallctl() result");
 
 	sz = sizeof(uint64_t);
 	expect_d_eq(mallctl("stats.arenas.0.dirty_npurge",
-	    (void *)&dirty_npurge, &sz, NULL, 0), expected,
-	    "Unexepected mallctl() result");
+	                (void *)&dirty_npurge, &sz, NULL, 0),
+	    expected, "Unexepected mallctl() result");
 	expect_d_eq(mallctl("stats.arenas.0.dirty_nmadvise",
-	    (void *)&dirty_nmadvise, &sz, NULL, 0), expected,
-	    "Unexepected mallctl() result");
+	                (void *)&dirty_nmadvise, &sz, NULL, 0),
+	    expected, "Unexepected mallctl() result");
 	expect_d_eq(mallctl("stats.arenas.0.dirty_purged",
-	    (void *)&dirty_purged, &sz, NULL, 0), expected,
-	    "Unexepected mallctl() result");
+	                (void *)&dirty_purged, &sz, NULL, 0),
+	    expected, "Unexepected mallctl() result");
 	expect_d_eq(mallctl("stats.arenas.0.muzzy_npurge",
-	    (void *)&muzzy_npurge, &sz, NULL, 0), expected,
-	    "Unexepected mallctl() result");
+	                (void *)&muzzy_npurge, &sz, NULL, 0),
+	    expected, "Unexepected mallctl() result");
 	expect_d_eq(mallctl("stats.arenas.0.muzzy_nmadvise",
-	    (void *)&muzzy_nmadvise, &sz, NULL, 0), expected,
-	    "Unexepected mallctl() result");
+	                (void *)&muzzy_nmadvise, &sz, NULL, 0),
+	    expected, "Unexepected mallctl() result");
 	expect_d_eq(mallctl("stats.arenas.0.muzzy_purged",
-	    (void *)&muzzy_purged, &sz, NULL, 0), expected,
-	    "Unexepected mallctl() result");
+	                (void *)&muzzy_purged, &sz, NULL, 0),
+	    expected, "Unexepected mallctl() result");
 
 	if (config_stats) {
 		if (!is_background_thread_enabled() && !opt_hpa) {
@@ -156,10 +161,10 @@ no_lazy_lock(void) {
 }
 
 TEST_BEGIN(test_stats_arenas_small) {
-	void *p;
-	size_t sz, allocated;
+	void    *p;
+	size_t   sz, allocated;
 	uint64_t epoch, nmalloc, ndalloc, nrequests;
-	int expected = config_stats ? 0 : ENOENT;
+	int      expected = config_stats ? 0 : ENOENT;
 
 	no_lazy_lock(); /* Lazy locking would dodge tcache testing. */
 
@@ -174,26 +179,28 @@ TEST_BEGIN(test_stats_arenas_small) {
 
 	sz = sizeof(size_t);
 	expect_d_eq(mallctl("stats.arenas.0.small.allocated",
-	    (void *)&allocated, &sz, NULL, 0), expected,
-	    "Unexpected mallctl() result");
+	                (void *)&allocated, &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 	sz = sizeof(uint64_t);
 	expect_d_eq(mallctl("stats.arenas.0.small.nmalloc", (void *)&nmalloc,
-	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	                &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 	expect_d_eq(mallctl("stats.arenas.0.small.ndalloc", (void *)&ndalloc,
-	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	                &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 	expect_d_eq(mallctl("stats.arenas.0.small.nrequests",
-	    (void *)&nrequests, &sz, NULL, 0), expected,
-	    "Unexpected mallctl() result");
+	                (void *)&nrequests, &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
-		expect_zu_gt(allocated, 0,
-		    "allocated should be greater than zero");
-		expect_u64_gt(nmalloc, 0,
-		    "nmalloc should be no greater than zero");
+		expect_zu_gt(
+		    allocated, 0, "allocated should be greater than zero");
+		expect_u64_gt(
+		    nmalloc, 0, "nmalloc should be no greater than zero");
 		expect_u64_ge(nmalloc, ndalloc,
 		    "nmalloc should be at least as large as ndalloc");
-		expect_u64_gt(nrequests, 0,
-		    "nrequests should be greater than zero");
+		expect_u64_gt(
+		    nrequests, 0, "nrequests should be greater than zero");
 	}
 
 	dallocx(p, 0);
@@ -201,16 +208,16 @@ TEST_BEGIN(test_stats_arenas_small) {
 TEST_END
 
 TEST_BEGIN(test_stats_arenas_large) {
-	void *p;
-	size_t sz, allocated, allocated_before;
+	void    *p;
+	size_t   sz, allocated, allocated_before;
 	uint64_t epoch, nmalloc, ndalloc;
-	size_t malloc_size = (1U << (SC_LG_LARGE_MINCLASS + 1)) + 1;
-	int expected = config_stats ? 0 : ENOENT;
+	size_t   malloc_size = (1U << (SC_LG_LARGE_MINCLASS + 1)) + 1;
+	int      expected = config_stats ? 0 : ENOENT;
 
 	sz = sizeof(size_t);
 	expect_d_eq(mallctl("stats.arenas.0.large.allocated",
-	    (void *)&allocated_before, &sz, NULL, 0), expected,
-	    "Unexpected mallctl() result");
+	                (void *)&allocated_before, &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 
 	p = mallocx(malloc_size, MALLOCX_ARENA(0));
 	expect_ptr_not_null(p, "Unexpected mallocx() failure");
@@ -219,21 +226,23 @@ TEST_BEGIN(test_stats_arenas_large) {
 	    0, "Unexpected mallctl() failure");
 
 	expect_d_eq(mallctl("stats.arenas.0.large.allocated",
-	    (void *)&allocated, &sz, NULL, 0), expected,
-	    "Unexpected mallctl() result");
+	                (void *)&allocated, &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 	sz = sizeof(uint64_t);
 	expect_d_eq(mallctl("stats.arenas.0.large.nmalloc", (void *)&nmalloc,
-	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	                &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 	expect_d_eq(mallctl("stats.arenas.0.large.ndalloc", (void *)&ndalloc,
-	    &sz, NULL, 0), expected, "Unexpected mallctl() result");
+	                &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
 		expect_zu_ge(allocated_before, 0,
 		    "allocated should be greater than zero");
 		expect_zu_ge(allocated - allocated_before, sz_s2u(malloc_size),
 		    "the diff between allocated should be greater than the allocation made");
-		expect_u64_gt(nmalloc, 0,
-		    "nmalloc should be greater than zero");
+		expect_u64_gt(
+		    nmalloc, 0, "nmalloc should be greater than zero");
 		expect_u64_ge(nmalloc, ndalloc,
 		    "nmalloc should be at least as large as ndalloc");
 	}
@@ -248,11 +257,11 @@ gen_mallctl_str(char *cmd, char *name, unsigned arena_ind) {
 }
 
 TEST_BEGIN(test_stats_arenas_bins) {
-	void *p;
-	size_t sz, curslabs, curregs, nonfull_slabs;
+	void    *p;
+	size_t   sz, curslabs, curregs, nonfull_slabs;
 	uint64_t epoch, nmalloc, ndalloc, nrequests, nfills, nflushes;
 	uint64_t nslabs, nreslabs;
-	int expected = config_stats ? 0 : ENOENT;
+	int      expected = config_stats ? 0 : ENOENT;
 
 	/* Make sure allocation below isn't satisfied by tcache. */
 	expect_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
@@ -264,8 +273,8 @@ TEST_BEGIN(test_stats_arenas_bins) {
 	    0, "Arena creation failure");
 	sz = sizeof(arena_ind);
 	expect_d_eq(mallctl("thread.arena", (void *)&old_arena_ind, &sz,
-	    (void *)&arena_ind, sizeof(arena_ind)), 0,
-	    "Unexpected mallctl() failure");
+	                (void *)&arena_ind, sizeof(arena_ind)),
+	    0, "Unexpected mallctl() failure");
 
 	p = malloc(bin_infos[0].reg_size);
 	expect_ptr_not_null(p, "Unexpected malloc() failure");
@@ -315,26 +324,25 @@ TEST_BEGIN(test_stats_arenas_bins) {
 	    expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
-		expect_u64_gt(nmalloc, 0,
-		    "nmalloc should be greater than zero");
+		expect_u64_gt(
+		    nmalloc, 0, "nmalloc should be greater than zero");
 		expect_u64_ge(nmalloc, ndalloc,
 		    "nmalloc should be at least as large as ndalloc");
-		expect_u64_gt(nrequests, 0,
-		    "nrequests should be greater than zero");
-		expect_zu_gt(curregs, 0,
-		    "allocated should be greater than zero");
+		expect_u64_gt(
+		    nrequests, 0, "nrequests should be greater than zero");
+		expect_zu_gt(
+		    curregs, 0, "allocated should be greater than zero");
 		if (opt_tcache) {
 			expect_u64_gt(nfills, 0,
 			    "At least one fill should have occurred");
 			expect_u64_gt(nflushes, 0,
 			    "At least one flush should have occurred");
 		}
-		expect_u64_gt(nslabs, 0,
-		    "At least one slab should have been allocated");
+		expect_u64_gt(
+		    nslabs, 0, "At least one slab should have been allocated");
 		expect_zu_gt(curslabs, 0,
 		    "At least one slab should be currently allocated");
-		expect_zu_eq(nonfull_slabs, 0,
-		    "slabs_nonfull should be empty");
+		expect_zu_eq(nonfull_slabs, 0, "slabs_nonfull should be empty");
 	}
 
 	dallocx(p, 0);
@@ -342,14 +350,15 @@ TEST_BEGIN(test_stats_arenas_bins) {
 TEST_END
 
 TEST_BEGIN(test_stats_arenas_lextents) {
-	void *p;
+	void    *p;
 	uint64_t epoch, nmalloc, ndalloc;
-	size_t curlextents, sz, hsize;
-	int expected = config_stats ? 0 : ENOENT;
+	size_t   curlextents, sz, hsize;
+	int      expected = config_stats ? 0 : ENOENT;
 
 	sz = sizeof(size_t);
-	expect_d_eq(mallctl("arenas.lextent.0.size", (void *)&hsize, &sz, NULL,
-	    0), 0, "Unexpected mallctl() failure");
+	expect_d_eq(
+	    mallctl("arenas.lextent.0.size", (void *)&hsize, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
 
 	p = mallocx(hsize, MALLOCX_ARENA(0));
 	expect_ptr_not_null(p, "Unexpected mallocx() failure");
@@ -359,19 +368,19 @@ TEST_BEGIN(test_stats_arenas_lextents) {
 
 	sz = sizeof(uint64_t);
 	expect_d_eq(mallctl("stats.arenas.0.lextents.0.nmalloc",
-	    (void *)&nmalloc, &sz, NULL, 0), expected,
-	    "Unexpected mallctl() result");
+	                (void *)&nmalloc, &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 	expect_d_eq(mallctl("stats.arenas.0.lextents.0.ndalloc",
-	    (void *)&ndalloc, &sz, NULL, 0), expected,
-	    "Unexpected mallctl() result");
+	                (void *)&ndalloc, &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 	sz = sizeof(size_t);
 	expect_d_eq(mallctl("stats.arenas.0.lextents.0.curlextents",
-	    (void *)&curlextents, &sz, NULL, 0), expected,
-	    "Unexpected mallctl() result");
+	                (void *)&curlextents, &sz, NULL, 0),
+	    expected, "Unexpected mallctl() result");
 
 	if (config_stats) {
-		expect_u64_gt(nmalloc, 0,
-		    "nmalloc should be greater than zero");
+		expect_u64_gt(
+		    nmalloc, 0, "nmalloc should be greater than zero");
 		expect_u64_ge(nmalloc, ndalloc,
 		    "nmalloc should be at least as large as ndalloc");
 		expect_u64_gt(curlextents, 0,
@@ -385,35 +394,37 @@ TEST_END
 static void
 test_tcache_bytes_for_usize(size_t usize) {
 	uint64_t epoch;
-	size_t tcache_bytes, tcache_stashed_bytes;
-	size_t sz = sizeof(tcache_bytes);
+	size_t   tcache_bytes, tcache_stashed_bytes;
+	size_t   sz = sizeof(tcache_bytes);
 
 	void *ptr = mallocx(usize, 0);
 
 	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
 	    0, "Unexpected mallctl() failure");
-	assert_d_eq(mallctl(
-	    "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL) ".tcache_bytes",
-	    &tcache_bytes, &sz, NULL, 0), 0, "Unexpected mallctl failure");
-	assert_d_eq(mallctl(
-	    "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL)
-	    ".tcache_stashed_bytes", &tcache_stashed_bytes, &sz, NULL, 0), 0,
-	    "Unexpected mallctl failure");
+	assert_d_eq(mallctl("stats.arenas." STRINGIFY(
+	                        MALLCTL_ARENAS_ALL) ".tcache_bytes",
+	                &tcache_bytes, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
+	assert_d_eq(mallctl("stats.arenas." STRINGIFY(
+	                        MALLCTL_ARENAS_ALL) ".tcache_stashed_bytes",
+	                &tcache_stashed_bytes, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
 	size_t tcache_bytes_before = tcache_bytes + tcache_stashed_bytes;
 	dallocx(ptr, 0);
 
 	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
 	    0, "Unexpected mallctl() failure");
-	assert_d_eq(mallctl(
-	    "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL) ".tcache_bytes",
-	    &tcache_bytes, &sz, NULL, 0), 0, "Unexpected mallctl failure");
-	assert_d_eq(mallctl(
-	    "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL)
-	    ".tcache_stashed_bytes", &tcache_stashed_bytes, &sz, NULL, 0), 0,
-	    "Unexpected mallctl failure");
+	assert_d_eq(mallctl("stats.arenas." STRINGIFY(
+	                        MALLCTL_ARENAS_ALL) ".tcache_bytes",
+	                &tcache_bytes, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
+	assert_d_eq(mallctl("stats.arenas." STRINGIFY(
+	                        MALLCTL_ARENAS_ALL) ".tcache_stashed_bytes",
+	                &tcache_stashed_bytes, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
 	size_t tcache_bytes_after = tcache_bytes + tcache_stashed_bytes;
-	assert_zu_eq(tcache_bytes_after - tcache_bytes_before,
-	    usize, "Incorrectly attributed a free");
+	assert_zu_eq(tcache_bytes_after - tcache_bytes_before, usize,
+	    "Incorrectly attributed a free");
 }
 
 TEST_BEGIN(test_stats_tcache_bytes_small) {
@@ -436,14 +447,9 @@ TEST_END
 
 int
 main(void) {
-	return test_no_reentrancy(
-	    test_stats_summary,
-	    test_stats_large,
-	    test_stats_arenas_summary,
-	    test_stats_arenas_small,
-	    test_stats_arenas_large,
-	    test_stats_arenas_bins,
-	    test_stats_arenas_lextents,
-	    test_stats_tcache_bytes_small,
+	return test_no_reentrancy(test_stats_summary, test_stats_large,
+	    test_stats_arenas_summary, test_stats_arenas_small,
+	    test_stats_arenas_large, test_stats_arenas_bins,
+	    test_stats_arenas_lextents, test_stats_tcache_bytes_small,
 	    test_stats_tcache_bytes_large);
 }
diff --git a/test/unit/stats_print.c b/test/unit/stats_print.c
index 3b317753..e611369c 100644
--- a/test/unit/stats_print.c
+++ b/test/unit/stats_print.c
@@ -21,22 +21,22 @@ typedef enum {
 
 typedef struct parser_s parser_t;
 typedef struct {
-	parser_t	*parser;
-	token_type_t	token_type;
-	size_t		pos;
-	size_t		len;
-	size_t		line;
-	size_t		col;
+	parser_t    *parser;
+	token_type_t token_type;
+	size_t       pos;
+	size_t       len;
+	size_t       line;
+	size_t       col;
 } token_t;
 
 struct parser_s {
-	bool verbose;
-	char	*buf; /* '\0'-terminated. */
-	size_t	len; /* Number of characters preceding '\0' in buf. */
-	size_t	pos;
-	size_t	line;
-	size_t	col;
-	token_t	token;
+	bool    verbose;
+	char   *buf; /* '\0'-terminated. */
+	size_t  len; /* Number of characters preceding '\0' in buf. */
+	size_t  pos;
+	size_t  line;
+	size_t  col;
+	token_t token;
 };
 
 static void
@@ -63,12 +63,12 @@ token_error(token_t *token) {
 		    token->line, token->col);
 		break;
 	default:
-		malloc_printf("%zu:%zu: Unexpected token: ", token->line,
-		    token->col);
+		malloc_printf(
+		    "%zu:%zu: Unexpected token: ", token->line, token->col);
 		break;
 	}
-	UNUSED ssize_t err = malloc_write_fd(STDERR_FILENO,
-	    &token->parser->buf[token->pos], token->len);
+	UNUSED ssize_t err = malloc_write_fd(
+	    STDERR_FILENO, &token->parser->buf[token->pos], token->len);
 	malloc_printf("\n");
 }
 
@@ -92,9 +92,9 @@ parser_fini(parser_t *parser) {
 static bool
 parser_append(parser_t *parser, const char *str) {
 	size_t len = strlen(str);
-	char *buf = (parser->buf == NULL) ? mallocx(len + 1,
-	    MALLOCX_TCACHE_NONE) : rallocx(parser->buf, parser->len + len + 1,
-	    MALLOCX_TCACHE_NONE);
+	char  *buf = (parser->buf == NULL)
+	     ? mallocx(len + 1, MALLOCX_TCACHE_NONE)
+	     : rallocx(parser->buf, parser->len + len + 1, MALLOCX_TCACHE_NONE);
 	if (buf == NULL) {
 		return true;
 	}
@@ -109,9 +109,19 @@ parser_tokenize(parser_t *parser) {
 	enum {
 		STATE_START,
 		STATE_EOI,
-		STATE_N, STATE_NU, STATE_NUL, STATE_NULL,
-		STATE_F, STATE_FA, STATE_FAL, STATE_FALS, STATE_FALSE,
-		STATE_T, STATE_TR, STATE_TRU, STATE_TRUE,
+		STATE_N,
+		STATE_NU,
+		STATE_NUL,
+		STATE_NULL,
+		STATE_F,
+		STATE_FA,
+		STATE_FAL,
+		STATE_FALS,
+		STATE_FALSE,
+		STATE_T,
+		STATE_TR,
+		STATE_TRU,
+		STATE_TRUE,
 		STATE_LBRACKET,
 		STATE_RBRACKET,
 		STATE_LBRACE,
@@ -120,7 +130,10 @@ parser_tokenize(parser_t *parser) {
 		STATE_COMMA,
 		STATE_CHARS,
 		STATE_CHAR_ESCAPE,
-		STATE_CHAR_U, STATE_CHAR_UD, STATE_CHAR_UDD, STATE_CHAR_UDDD,
+		STATE_CHAR_U,
+		STATE_CHAR_UD,
+		STATE_CHAR_UDD,
+		STATE_CHAR_UDDD,
 		STATE_STRING,
 		STATE_MINUS,
 		STATE_LEADING_ZERO,
@@ -132,12 +145,12 @@ parser_tokenize(parser_t *parser) {
 		STATE_EXP_DIGITS,
 		STATE_ACCEPT
 	} state = STATE_START;
-	size_t token_pos JEMALLOC_CC_SILENCE_INIT(0);
+	size_t token_pos  JEMALLOC_CC_SILENCE_INIT(0);
 	size_t token_line JEMALLOC_CC_SILENCE_INIT(1);
-	size_t token_col JEMALLOC_CC_SILENCE_INIT(0);
+	size_t token_col  JEMALLOC_CC_SILENCE_INIT(0);
 
-	expect_zu_le(parser->pos, parser->len,
-	    "Position is past end of buffer");
+	expect_zu_le(
+	    parser->pos, parser->len, "Position is past end of buffer");
 
 	while (state != STATE_ACCEPT) {
 		char c = parser->buf[parser->pos];
@@ -148,7 +161,11 @@ parser_tokenize(parser_t *parser) {
 			token_line = parser->line;
 			token_col = parser->col;
 			switch (c) {
-			case ' ': case '\b': case '\n': case '\r': case '\t':
+			case ' ':
+			case '\b':
+			case '\n':
+			case '\r':
+			case '\t':
 				break;
 			case '\0':
 				state = STATE_EOI;
@@ -189,21 +206,29 @@ parser_tokenize(parser_t *parser) {
 			case '0':
 				state = STATE_LEADING_ZERO;
 				break;
-			case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
 				state = STATE_DIGITS;
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
 		case STATE_EOI:
-			token_init(&parser->token, parser,
-			    TOKEN_TYPE_EOI, token_pos, parser->pos -
-			    token_pos, token_line, token_col);
+			token_init(&parser->token, parser, TOKEN_TYPE_EOI,
+			    token_pos, parser->pos - token_pos, token_line,
+			    token_col);
 			state = STATE_ACCEPT;
 			break;
 		case STATE_N:
@@ -213,8 +238,9 @@ parser_tokenize(parser_t *parser) {
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
@@ -225,8 +251,9 @@ parser_tokenize(parser_t *parser) {
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
@@ -237,22 +264,32 @@ parser_tokenize(parser_t *parser) {
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
 		case STATE_NULL:
 			switch (c) {
-			case ' ': case '\b': case '\n': case '\r': case '\t':
+			case ' ':
+			case '\b':
+			case '\n':
+			case '\r':
+			case '\t':
 			case '\0':
-			case '[': case ']': case '{': case '}': case ':':
+			case '[':
+			case ']':
+			case '{':
+			case '}':
+			case ':':
 			case ',':
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			token_init(&parser->token, parser, TOKEN_TYPE_NULL,
@@ -267,8 +304,9 @@ parser_tokenize(parser_t *parser) {
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
@@ -279,8 +317,9 @@ parser_tokenize(parser_t *parser) {
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
@@ -291,8 +330,9 @@ parser_tokenize(parser_t *parser) {
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
@@ -303,27 +343,37 @@ parser_tokenize(parser_t *parser) {
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
 		case STATE_FALSE:
 			switch (c) {
-			case ' ': case '\b': case '\n': case '\r': case '\t':
+			case ' ':
+			case '\b':
+			case '\n':
+			case '\r':
+			case '\t':
 			case '\0':
-			case '[': case ']': case '{': case '}': case ':':
+			case '[':
+			case ']':
+			case '{':
+			case '}':
+			case ':':
 			case ',':
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
-			token_init(&parser->token, parser,
-			    TOKEN_TYPE_FALSE, token_pos, parser->pos -
-			    token_pos, token_line, token_col);
+			token_init(&parser->token, parser, TOKEN_TYPE_FALSE,
+			    token_pos, parser->pos - token_pos, token_line,
+			    token_col);
 			state = STATE_ACCEPT;
 			break;
 		case STATE_T:
@@ -333,8 +383,9 @@ parser_tokenize(parser_t *parser) {
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
@@ -345,8 +396,9 @@ parser_tokenize(parser_t *parser) {
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
@@ -357,22 +409,32 @@ parser_tokenize(parser_t *parser) {
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
 		case STATE_TRUE:
 			switch (c) {
-			case ' ': case '\b': case '\n': case '\r': case '\t':
+			case ' ':
+			case '\b':
+			case '\n':
+			case '\r':
+			case '\t':
 			case '\0':
-			case '[': case ']': case '{': case '}': case ':':
+			case '[':
+			case ']':
+			case '{':
+			case '}':
+			case ':':
 			case ',':
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			token_init(&parser->token, parser, TOKEN_TYPE_TRUE,
@@ -424,16 +486,42 @@ parser_tokenize(parser_t *parser) {
 			case '"':
 				state = STATE_STRING;
 				break;
-			case 0x00: case 0x01: case 0x02: case 0x03: case 0x04:
-			case 0x05: case 0x06: case 0x07: case 0x08: case 0x09:
-			case 0x0a: case 0x0b: case 0x0c: case 0x0d: case 0x0e:
-			case 0x0f: case 0x10: case 0x11: case 0x12: case 0x13:
-			case 0x14: case 0x15: case 0x16: case 0x17: case 0x18:
-			case 0x19: case 0x1a: case 0x1b: case 0x1c: case 0x1d:
-			case 0x1e: case 0x1f:
+			case 0x00:
+			case 0x01:
+			case 0x02:
+			case 0x03:
+			case 0x04:
+			case 0x05:
+			case 0x06:
+			case 0x07:
+			case 0x08:
+			case 0x09:
+			case 0x0a:
+			case 0x0b:
+			case 0x0c:
+			case 0x0d:
+			case 0x0e:
+			case 0x0f:
+			case 0x10:
+			case 0x11:
+			case 0x12:
+			case 0x13:
+			case 0x14:
+			case 0x15:
+			case 0x16:
+			case 0x17:
+			case 0x18:
+			case 0x19:
+			case 0x1a:
+			case 0x1b:
+			case 0x1c:
+			case 0x1d:
+			case 0x1e:
+			case 0x1f:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			default:
 				break;
@@ -441,8 +529,13 @@ parser_tokenize(parser_t *parser) {
 			break;
 		case STATE_CHAR_ESCAPE:
 			switch (c) {
-			case '"': case '\\': case '/': case 'b': case 'n':
-			case 'r': case 't':
+			case '"':
+			case '\\':
+			case '/':
+			case 'b':
+			case 'n':
+			case 'r':
+			case 't':
 				state = STATE_CHARS;
 				break;
 			case 'u':
@@ -450,76 +543,145 @@ parser_tokenize(parser_t *parser) {
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
 		case STATE_CHAR_U:
 			switch (c) {
-			case '0': case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
-			case 'a': case 'b': case 'c': case 'd': case 'e':
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
+			case 'a':
+			case 'b':
+			case 'c':
+			case 'd':
+			case 'e':
 			case 'f':
-			case 'A': case 'B': case 'C': case 'D': case 'E':
+			case 'A':
+			case 'B':
+			case 'C':
+			case 'D':
+			case 'E':
 			case 'F':
 				state = STATE_CHAR_UD;
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
 		case STATE_CHAR_UD:
 			switch (c) {
-			case '0': case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
-			case 'a': case 'b': case 'c': case 'd': case 'e':
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
+			case 'a':
+			case 'b':
+			case 'c':
+			case 'd':
+			case 'e':
 			case 'f':
-			case 'A': case 'B': case 'C': case 'D': case 'E':
+			case 'A':
+			case 'B':
+			case 'C':
+			case 'D':
+			case 'E':
 			case 'F':
 				state = STATE_CHAR_UDD;
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
 		case STATE_CHAR_UDD:
 			switch (c) {
-			case '0': case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
-			case 'a': case 'b': case 'c': case 'd': case 'e':
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
+			case 'a':
+			case 'b':
+			case 'c':
+			case 'd':
+			case 'e':
 			case 'f':
-			case 'A': case 'B': case 'C': case 'D': case 'E':
+			case 'A':
+			case 'B':
+			case 'C':
+			case 'D':
+			case 'E':
 			case 'F':
 				state = STATE_CHAR_UDDD;
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
 		case STATE_CHAR_UDDD:
 			switch (c) {
-			case '0': case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
-			case 'a': case 'b': case 'c': case 'd': case 'e':
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
+			case 'a':
+			case 'b':
+			case 'c':
+			case 'd':
+			case 'e':
 			case 'f':
-			case 'A': case 'B': case 'C': case 'D': case 'E':
+			case 'A':
+			case 'B':
+			case 'C':
+			case 'D':
+			case 'E':
 			case 'F':
 				state = STATE_CHARS;
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
@@ -534,14 +696,22 @@ parser_tokenize(parser_t *parser) {
 			case '0':
 				state = STATE_LEADING_ZERO;
 				break;
-			case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
 				state = STATE_DIGITS;
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
@@ -552,95 +722,152 @@ parser_tokenize(parser_t *parser) {
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_NUMBER, token_pos, parser->pos -
-				    token_pos, token_line, token_col);
+				    TOKEN_TYPE_NUMBER, token_pos,
+				    parser->pos - token_pos, token_line,
+				    token_col);
 				state = STATE_ACCEPT;
 				break;
 			}
 			break;
 		case STATE_DIGITS:
 			switch (c) {
-			case '0': case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
 				break;
 			case '.':
 				state = STATE_DECIMAL;
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_NUMBER, token_pos, parser->pos -
-				    token_pos, token_line, token_col);
+				    TOKEN_TYPE_NUMBER, token_pos,
+				    parser->pos - token_pos, token_line,
+				    token_col);
 				state = STATE_ACCEPT;
 				break;
 			}
 			break;
 		case STATE_DECIMAL:
 			switch (c) {
-			case '0': case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
 				state = STATE_FRAC_DIGITS;
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
 		case STATE_FRAC_DIGITS:
 			switch (c) {
-			case '0': case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
 				break;
-			case 'e': case 'E':
+			case 'e':
+			case 'E':
 				state = STATE_EXP;
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_NUMBER, token_pos, parser->pos -
-				    token_pos, token_line, token_col);
+				    TOKEN_TYPE_NUMBER, token_pos,
+				    parser->pos - token_pos, token_line,
+				    token_col);
 				state = STATE_ACCEPT;
 				break;
 			}
 			break;
 		case STATE_EXP:
 			switch (c) {
-			case '-': case '+':
+			case '-':
+			case '+':
 				state = STATE_EXP_SIGN;
 				break;
-			case '0': case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
 				state = STATE_EXP_DIGITS;
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
 		case STATE_EXP_SIGN:
 			switch (c) {
-			case '0': case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
 				state = STATE_EXP_DIGITS;
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_ERROR, token_pos, parser->pos + 1
-				    - token_pos, token_line, token_col);
+				    TOKEN_TYPE_ERROR, token_pos,
+				    parser->pos + 1 - token_pos, token_line,
+				    token_col);
 				return true;
 			}
 			break;
 		case STATE_EXP_DIGITS:
 			switch (c) {
-			case '0': case '1': case '2': case '3': case '4':
-			case '5': case '6': case '7': case '8': case '9':
+			case '0':
+			case '1':
+			case '2':
+			case '3':
+			case '4':
+			case '5':
+			case '6':
+			case '7':
+			case '8':
+			case '9':
 				break;
 			default:
 				token_init(&parser->token, parser,
-				    TOKEN_TYPE_NUMBER, token_pos, parser->pos -
-				    token_pos, token_line, token_col);
+				    TOKEN_TYPE_NUMBER, token_pos,
+				    parser->pos - token_pos, token_line,
+				    token_col);
 				state = STATE_ACCEPT;
 				break;
 			}
@@ -662,8 +889,8 @@ parser_tokenize(parser_t *parser) {
 	return false;
 }
 
-static bool	parser_parse_array(parser_t *parser);
-static bool	parser_parse_object(parser_t *parser);
+static bool parser_parse_array(parser_t *parser);
+static bool parser_parse_object(parser_t *parser);
 
 static bool
 parser_parse_value(parser_t *parser) {
@@ -824,80 +1051,80 @@ label_error:
 }
 
 TEST_BEGIN(test_json_parser) {
-	size_t i;
+	size_t      i;
 	const char *invalid_inputs[] = {
-		/* Tokenizer error case tests. */
-		"{ \"string\": X }",
-		"{ \"string\": nXll }",
-		"{ \"string\": nuXl }",
-		"{ \"string\": nulX }",
-		"{ \"string\": nullX }",
-		"{ \"string\": fXlse }",
-		"{ \"string\": faXse }",
-		"{ \"string\": falXe }",
-		"{ \"string\": falsX }",
-		"{ \"string\": falseX }",
-		"{ \"string\": tXue }",
-		"{ \"string\": trXe }",
-		"{ \"string\": truX }",
-		"{ \"string\": trueX }",
-		"{ \"string\": \"\n\" }",
-		"{ \"string\": \"\\z\" }",
-		"{ \"string\": \"\\uX000\" }",
-		"{ \"string\": \"\\u0X00\" }",
-		"{ \"string\": \"\\u00X0\" }",
-		"{ \"string\": \"\\u000X\" }",
-		"{ \"string\": -X }",
-		"{ \"string\": 0.X }",
-		"{ \"string\": 0.0eX }",
-		"{ \"string\": 0.0e+X }",
+	    /* Tokenizer error case tests. */
+	    "{ \"string\": X }",
+	    "{ \"string\": nXll }",
+	    "{ \"string\": nuXl }",
+	    "{ \"string\": nulX }",
+	    "{ \"string\": nullX }",
+	    "{ \"string\": fXlse }",
+	    "{ \"string\": faXse }",
+	    "{ \"string\": falXe }",
+	    "{ \"string\": falsX }",
+	    "{ \"string\": falseX }",
+	    "{ \"string\": tXue }",
+	    "{ \"string\": trXe }",
+	    "{ \"string\": truX }",
+	    "{ \"string\": trueX }",
+	    "{ \"string\": \"\n\" }",
+	    "{ \"string\": \"\\z\" }",
+	    "{ \"string\": \"\\uX000\" }",
+	    "{ \"string\": \"\\u0X00\" }",
+	    "{ \"string\": \"\\u00X0\" }",
+	    "{ \"string\": \"\\u000X\" }",
+	    "{ \"string\": -X }",
+	    "{ \"string\": 0.X }",
+	    "{ \"string\": 0.0eX }",
+	    "{ \"string\": 0.0e+X }",
 
-		/* Parser error test cases. */
-		"{\"string\": }",
-		"{\"string\" }",
-		"{\"string\": [ 0 }",
-		"{\"string\": {\"a\":0, 1 } }",
-		"{\"string\": {\"a\":0: } }",
-		"{",
-		"{}{",
+	    /* Parser error test cases. */
+	    "{\"string\": }",
+	    "{\"string\" }",
+	    "{\"string\": [ 0 }",
+	    "{\"string\": {\"a\":0, 1 } }",
+	    "{\"string\": {\"a\":0: } }",
+	    "{",
+	    "{}{",
 	};
 	const char *valid_inputs[] = {
-		/* Token tests. */
-		"null",
-		"false",
-		"true",
-		"{}",
-		"{\"a\": 0}",
-		"[]",
-		"[0, 1]",
-		"0",
-		"1",
-		"10",
-		"-10",
-		"10.23",
-		"10.23e4",
-		"10.23e-4",
-		"10.23e+4",
-		"10.23E4",
-		"10.23E-4",
-		"10.23E+4",
-		"-10.23",
-		"-10.23e4",
-		"-10.23e-4",
-		"-10.23e+4",
-		"-10.23E4",
-		"-10.23E-4",
-		"-10.23E+4",
-		"\"value\"",
-		"\" \\\" \\/ \\b \\n \\r \\t \\u0abc \\u1DEF \"",
+	    /* Token tests. */
+	    "null",
+	    "false",
+	    "true",
+	    "{}",
+	    "{\"a\": 0}",
+	    "[]",
+	    "[0, 1]",
+	    "0",
+	    "1",
+	    "10",
+	    "-10",
+	    "10.23",
+	    "10.23e4",
+	    "10.23e-4",
+	    "10.23e+4",
+	    "10.23E4",
+	    "10.23E-4",
+	    "10.23E+4",
+	    "-10.23",
+	    "-10.23e4",
+	    "-10.23e-4",
+	    "-10.23e+4",
+	    "-10.23E4",
+	    "-10.23E-4",
+	    "-10.23E+4",
+	    "\"value\"",
+	    "\" \\\" \\/ \\b \\n \\r \\t \\u0abc \\u1DEF \"",
 
-		/* Parser test with various nesting. */
-		"{\"a\":null, \"b\":[1,[{\"c\":2},3]], \"d\":{\"e\":true}}",
+	    /* Parser test with various nesting. */
+	    "{\"a\":null, \"b\":[1,[{\"c\":2},3]], \"d\":{\"e\":true}}",
 	};
 
-	for (i = 0; i < sizeof(invalid_inputs)/sizeof(const char *); i++) {
+	for (i = 0; i < sizeof(invalid_inputs) / sizeof(const char *); i++) {
 		const char *input = invalid_inputs[i];
-		parser_t parser;
+		parser_t    parser;
 		parser_init(&parser, false);
 		expect_false(parser_append(&parser, input),
 		    "Unexpected input appending failure");
@@ -906,9 +1133,9 @@ TEST_BEGIN(test_json_parser) {
 		parser_fini(&parser);
 	}
 
-	for (i = 0; i < sizeof(valid_inputs)/sizeof(const char *); i++) {
+	for (i = 0; i < sizeof(valid_inputs) / sizeof(const char *); i++) {
 		const char *input = valid_inputs[i];
-		parser_t parser;
+		parser_t    parser;
 		parser_init(&parser, true);
 		expect_false(parser_append(&parser, input),
 		    "Unexpected input appending failure");
@@ -929,27 +1156,27 @@ write_cb(void *opaque, const char *str) {
 
 TEST_BEGIN(test_stats_print_json) {
 	const char *opts[] = {
-		"J",
-		"Jg",
-		"Jm",
-		"Jd",
-		"Jmd",
-		"Jgd",
-		"Jgm",
-		"Jgmd",
-		"Ja",
-		"Jb",
-		"Jl",
-		"Jx",
-		"Jbl",
-		"Jal",
-		"Jab",
-		"Jabl",
-		"Jax",
-		"Jbx",
-		"Jlx",
-		"Jablx",
-		"Jgmdablx",
+	    "J",
+	    "Jg",
+	    "Jm",
+	    "Jd",
+	    "Jmd",
+	    "Jgd",
+	    "Jgm",
+	    "Jgmd",
+	    "Ja",
+	    "Jb",
+	    "Jl",
+	    "Jx",
+	    "Jbl",
+	    "Jal",
+	    "Jab",
+	    "Jabl",
+	    "Jax",
+	    "Jbx",
+	    "Jlx",
+	    "Jablx",
+	    "Jgmdablx",
 	};
 	unsigned arena_ind, i;
 
@@ -962,23 +1189,27 @@ TEST_BEGIN(test_stats_print_json) {
 		case 1: {
 			size_t sz = sizeof(arena_ind);
 			expect_d_eq(mallctl("arenas.create", (void *)&arena_ind,
-			    &sz, NULL, 0), 0, "Unexpected mallctl failure");
+			                &sz, NULL, 0),
+			    0, "Unexpected mallctl failure");
 			break;
-		} case 2: {
+		}
+		case 2: {
 			size_t mib[3];
-			size_t miblen = sizeof(mib)/sizeof(size_t);
-			expect_d_eq(mallctlnametomib("arena.0.destroy",
-			    mib, &miblen), 0,
-			    "Unexpected mallctlnametomib failure");
+			size_t miblen = sizeof(mib) / sizeof(size_t);
+			expect_d_eq(
+			    mallctlnametomib("arena.0.destroy", mib, &miblen),
+			    0, "Unexpected mallctlnametomib failure");
 			mib[1] = arena_ind;
-			expect_d_eq(mallctlbymib(mib, miblen, NULL, NULL, NULL,
-			    0), 0, "Unexpected mallctlbymib failure");
+			expect_d_eq(
+			    mallctlbymib(mib, miblen, NULL, NULL, NULL, 0), 0,
+			    "Unexpected mallctlbymib failure");
 			break;
-		} default:
+		}
+		default:
 			not_reached();
 		}
 
-		for (j = 0; j < sizeof(opts)/sizeof(const char *); j++) {
+		for (j = 0; j < sizeof(opts) / sizeof(const char *); j++) {
 			parser_t parser;
 
 			parser_init(&parser, true);
@@ -993,7 +1224,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_json_parser,
-	    test_stats_print_json);
+	return test(test_json_parser, test_stats_print_json);
 }
diff --git a/test/unit/sz.c b/test/unit/sz.c
index 8ae04b92..fa2b8dc0 100644
--- a/test/unit/sz.c
+++ b/test/unit/sz.c
@@ -10,8 +10,8 @@ TEST_BEGIN(test_sz_psz2ind) {
 	for (size_t i = 0; i < SC_NGROUP; i++) {
 		for (size_t psz = i * PAGE + 1; psz <= (i + 1) * PAGE; psz++) {
 			pszind_t ind = sz_psz2ind(psz);
-			expect_zu_eq(ind, i, "Got %u as sz_psz2ind of %zu", ind,
-			    psz);
+			expect_zu_eq(
+			    ind, i, "Got %u as sz_psz2ind of %zu", ind, psz);
 		}
 	}
 
@@ -25,15 +25,14 @@ TEST_BEGIN(test_sz_psz2ind) {
 	 */
 	size_t base_psz = 1 << (SC_LG_NGROUP + LG_PAGE);
 	size_t base_ind = 0;
-	while (base_ind < SC_NSIZES &&
-	    reg_size_compute(data.sc[base_ind].lg_base,
-		data.sc[base_ind].lg_delta,
-		data.sc[base_ind].ndelta) < base_psz) {
+	while (base_ind < SC_NSIZES
+	    && reg_size_compute(data.sc[base_ind].lg_base,
+	           data.sc[base_ind].lg_delta, data.sc[base_ind].ndelta)
+	        < base_psz) {
 		base_ind++;
 	}
-	expect_zu_eq(
-	    reg_size_compute(data.sc[base_ind].lg_base,
-		data.sc[base_ind].lg_delta, data.sc[base_ind].ndelta),
+	expect_zu_eq(reg_size_compute(data.sc[base_ind].lg_base,
+	                 data.sc[base_ind].lg_delta, data.sc[base_ind].ndelta),
 	    base_psz, "Size class equal to %zu not found", base_psz);
 	/*
 	 * Test different sizes falling into groups after the 'base'. The
@@ -42,21 +41,21 @@ TEST_BEGIN(test_sz_psz2ind) {
 	base_ind -= SC_NGROUP;
 	for (size_t psz = base_psz; psz <= 64 * 1024 * 1024; psz += PAGE / 3) {
 		pszind_t ind = sz_psz2ind(psz);
-		sc_t gt_sc = data.sc[ind + base_ind];
+		sc_t     gt_sc = data.sc[ind + base_ind];
 		expect_zu_gt(psz,
-		    reg_size_compute(gt_sc.lg_base, gt_sc.lg_delta,
-			gt_sc.ndelta),
+		    reg_size_compute(
+		        gt_sc.lg_base, gt_sc.lg_delta, gt_sc.ndelta),
 		    "Got %u as sz_psz2ind of %zu", ind, psz);
 		sc_t le_sc = data.sc[ind + base_ind + 1];
 		expect_zu_le(psz,
-		    reg_size_compute(le_sc.lg_base, le_sc.lg_delta,
-			le_sc.ndelta),
+		    reg_size_compute(
+		        le_sc.lg_base, le_sc.lg_delta, le_sc.ndelta),
 		    "Got %u as sz_psz2ind of %zu", ind, psz);
 	}
 
 	pszind_t max_ind = sz_psz2ind(SC_LARGE_MAXCLASS + 1);
-	expect_lu_eq(max_ind, SC_NPSIZES,
-	    "Got %u as sz_psz2ind of %llu", max_ind, SC_LARGE_MAXCLASS);
+	expect_lu_eq(max_ind, SC_NPSIZES, "Got %u as sz_psz2ind of %llu",
+	    max_ind, SC_LARGE_MAXCLASS);
 }
 TEST_END
 
diff --git a/test/unit/tcache_max.c b/test/unit/tcache_max.c
index 884ee7fe..d57b2d3b 100644
--- a/test/unit/tcache_max.c
+++ b/test/unit/tcache_max.c
@@ -69,8 +69,8 @@ tcache_bytes_read_global(void) {
 
 static size_t
 tcache_bytes_read_local(void) {
-	size_t tcache_bytes = 0;
-	tsd_t *tsd = tsd_fetch();
+	size_t    tcache_bytes = 0;
+	tsd_t    *tsd = tsd_fetch();
 	tcache_t *tcache = tcache_get(tsd);
 	for (szind_t i = 0; i < tcache_nbins_get(tcache->tcache_slow); i++) {
 		cache_bin_t *cache_bin = &tcache->bins[i];
@@ -98,7 +98,7 @@ test_tcache_bytes_alloc(size_t alloc_size, size_t tcache_max,
 
 	size_t usize = sz_s2u(alloc_size);
 	/* No change is expected if usize is outside of tcache_max range. */
-	bool cached = (usize <= tcache_max);
+	bool    cached = (usize <= tcache_max);
 	ssize_t diff = cached ? usize : 0;
 
 	void *ptr1 = alloc_func(alloc_size, alloc_option);
@@ -186,7 +186,7 @@ TEST_BEGIN(test_tcache_max) {
 	test_skip_if(san_uaf_detection_enabled());
 
 	unsigned arena_ind, alloc_option, dalloc_option;
-	size_t sz = sizeof(arena_ind);
+	size_t   sz = sizeof(arena_ind);
 	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
 	    0, "Unexpected mallctl() failure");
 	expect_d_eq(
@@ -215,12 +215,12 @@ static void
 validate_tcache_stack(tcache_t *tcache) {
 	/* Assume bins[0] is enabled. */
 	void *tcache_stack = tcache->bins[0].stack_head;
-	bool expect_found = cache_bin_stack_use_thp() ? true : false;
+	bool  expect_found = cache_bin_stack_use_thp() ? true : false;
 
 	/* Walk through all blocks to see if the stack is within range. */
-	base_t *base = b0get();
+	base_t       *base = b0get();
 	base_block_t *next = base->blocks;
-	bool found = false;
+	bool          found = false;
 	do {
 		base_block_t *block = next;
 		if ((byte_t *)tcache_stack >= (byte_t *)block
@@ -237,10 +237,10 @@ validate_tcache_stack(tcache_t *tcache) {
 
 static void *
 tcache_check(void *arg) {
-	size_t old_tcache_max, new_tcache_max, min_tcache_max, sz;
-	unsigned tcache_nbins;
-	tsd_t *tsd = tsd_fetch();
-	tcache_t *tcache = tsd_tcachep_get(tsd);
+	size_t         old_tcache_max, new_tcache_max, min_tcache_max, sz;
+	unsigned       tcache_nbins;
+	tsd_t         *tsd = tsd_fetch();
+	tcache_t      *tcache = tsd_tcachep_get(tsd);
 	tcache_slow_t *tcache_slow = tcache->tcache_slow;
 	sz = sizeof(size_t);
 	new_tcache_max = *(size_t *)arg;
@@ -263,7 +263,7 @@ tcache_check(void *arg) {
 	 * Test an input that is not a valid size class, it should be ceiled
 	 * to a valid size class.
 	 */
-	bool e0 = false, e1;
+	bool   e0 = false, e1;
 	size_t bool_sz = sizeof(bool);
 	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e1, &bool_sz,
 	                (void *)&e0, bool_sz),
diff --git a/test/unit/test_hooks.c b/test/unit/test_hooks.c
index 41e7bf35..47e5fa9e 100644
--- a/test/unit/test_hooks.c
+++ b/test/unit/test_hooks.c
@@ -32,7 +32,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    unhooked_call,
-	    hooked_call);
+	return test(unhooked_call, hooked_call);
 }
diff --git a/test/unit/thread_event.c b/test/unit/thread_event.c
index 66d61cd2..d886c998 100644
--- a/test/unit/thread_event.c
+++ b/test/unit/thread_event.c
@@ -1,20 +1,17 @@
 #include "test/jemalloc_test.h"
 
 static uint32_t nuser_hook_calls;
-static bool is_registered = false;
+static bool     is_registered = false;
 static void
 test_cb(bool is_alloc, uint64_t tallocated, uint64_t tdallocated) {
 	++nuser_hook_calls;
 }
 
 static user_hook_object_t tobj = {
-	.callback = &test_cb,
-	.interval = 10,
-	.is_alloc_only = false
-};
+    .callback = &test_cb, .interval = 10, .is_alloc_only = false};
 
 TEST_BEGIN(test_next_event_fast) {
-	tsd_t *tsd = tsd_fetch();
+	tsd_t   *tsd = tsd_fetch();
 	te_ctx_t ctx;
 	te_ctx_get(tsd, &ctx, true);
 
@@ -23,7 +20,8 @@ TEST_BEGIN(test_next_event_fast) {
 	te_ctx_next_event_set(tsd, &ctx, TE_NEXT_EVENT_FAST_MAX);
 
 	if (!is_registered) {
-		is_registered = 0 == te_register_user_handler(tsd_tsdn(tsd), &tobj);
+		is_registered = 0
+		    == te_register_user_handler(tsd_tsdn(tsd), &tobj);
 	}
 	assert_true(is_registered || !config_stats, "Register user handler");
 	nuser_hook_calls = 0;
@@ -35,7 +33,8 @@ TEST_BEGIN(test_next_event_fast) {
 
 	/* Test next_event_fast rolling back to 0. */
 	void *p = malloc(16U);
-	assert_true(nuser_hook_calls == 1 || !config_stats, "Expected alloc call");
+	assert_true(
+	    nuser_hook_calls == 1 || !config_stats, "Expected alloc call");
 	assert_ptr_not_null(p, "malloc() failed");
 	free(p);
 
@@ -48,6 +47,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_next_event_fast);
+	return test(test_next_event_fast);
 }
diff --git a/test/unit/ticker.c b/test/unit/ticker.c
index c4147a0c..31a2b8e0 100644
--- a/test/unit/ticker.c
+++ b/test/unit/ticker.c
@@ -6,7 +6,7 @@ TEST_BEGIN(test_ticker_tick) {
 #define NREPS 2
 #define NTICKS 3
 	ticker_t ticker;
-	int32_t i, j;
+	int32_t  i, j;
 
 	ticker_init(&ticker, NTICKS);
 	for (i = 0; i < NREPS; i++) {
@@ -16,12 +16,12 @@ TEST_BEGIN(test_ticker_tick) {
 			expect_false(ticker_tick(&ticker, false),
 			    "Unexpected ticker fire (i=%d, j=%d)", i, j);
 		}
-		expect_u32_eq(ticker_read(&ticker), 0,
-		    "Expected ticker depletion");
+		expect_u32_eq(
+		    ticker_read(&ticker), 0, "Expected ticker depletion");
 		expect_true(ticker_tick(&ticker, false),
 		    "Expected ticker fire (i=%d)", i);
-		expect_u32_eq(ticker_read(&ticker), NTICKS,
-		    "Expected ticker reset");
+		expect_u32_eq(
+		    ticker_read(&ticker), NTICKS, "Expected ticker reset");
 	}
 #undef NTICKS
 }
@@ -34,15 +34,15 @@ TEST_BEGIN(test_ticker_ticks) {
 	ticker_init(&ticker, NTICKS);
 
 	expect_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value");
-	expect_false(ticker_ticks(&ticker, NTICKS, false),
-	    "Unexpected ticker fire");
+	expect_false(
+	    ticker_ticks(&ticker, NTICKS, false), "Unexpected ticker fire");
 	expect_u_eq(ticker_read(&ticker), 0, "Unexpected ticker value");
-	expect_true(ticker_ticks(&ticker, NTICKS, false),
-	    "Expected ticker fire");
+	expect_true(
+	    ticker_ticks(&ticker, NTICKS, false), "Expected ticker fire");
 	expect_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value");
 
-	expect_true(ticker_ticks(&ticker, NTICKS + 1, false),
-	    "Expected ticker fire");
+	expect_true(
+	    ticker_ticks(&ticker, NTICKS + 1, false), "Expected ticker fire");
 	expect_u_eq(ticker_read(&ticker), NTICKS, "Unexpected ticker value");
 #undef NTICKS
 }
@@ -55,8 +55,8 @@ TEST_BEGIN(test_ticker_copy) {
 	ticker_init(&ta, NTICKS);
 	ticker_copy(&tb, &ta);
 	expect_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value");
-	expect_true(ticker_ticks(&tb, NTICKS + 1, false),
-	    "Expected ticker fire");
+	expect_true(
+	    ticker_ticks(&tb, NTICKS + 1, false), "Expected ticker fire");
 	expect_u_eq(ticker_read(&tb), NTICKS, "Unexpected ticker value");
 
 	ticker_tick(&ta, false);
@@ -69,7 +69,7 @@ TEST_BEGIN(test_ticker_copy) {
 TEST_END
 
 TEST_BEGIN(test_ticker_geom) {
-	const int32_t ticks = 100;
+	const int32_t  ticks = 100;
 	const uint64_t niters = 100 * 1000;
 
 	ticker_geom_t ticker;
@@ -78,7 +78,7 @@ TEST_BEGIN(test_ticker_geom) {
 	/* Just some random constant. */
 	uint64_t prng_state = 0x343219f93496db9fULL;
 	for (uint64_t i = 0; i < niters; i++) {
-		while(!ticker_geom_tick(&ticker, &prng_state, false)) {
+		while (!ticker_geom_tick(&ticker, &prng_state, false)) {
 			total_ticks++;
 		}
 	}
@@ -87,15 +87,15 @@ TEST_BEGIN(test_ticker_geom) {
 	 * used at the time this was tested, total_ticks is 95.1% of the
 	 * expected ticks.
 	 */
-	expect_u64_ge(total_ticks , niters * ticks * 9 / 10,
-	    "Mean off by > 10%%");
-	expect_u64_le(total_ticks , niters * ticks * 11 / 10,
-	    "Mean off by > 10%%");
+	expect_u64_ge(
+	    total_ticks, niters * ticks * 9 / 10, "Mean off by > 10%%");
+	expect_u64_le(
+	    total_ticks, niters * ticks * 11 / 10, "Mean off by > 10%%");
 }
 TEST_END
 
 TEST_BEGIN(test_ticker_delay) {
-	const int32_t ticks = 1000;
+	const int32_t  ticks = 1000;
 	const uint64_t niters = 10000;
 
 	ticker_t t1;
@@ -120,22 +120,19 @@ TEST_BEGIN(test_ticker_delay) {
 		expect_false(ticker_geom_tick(&t2, &prng_state, delay),
 		    "Unexpected ticker fire");
 		expect_d_eq(ticker_read(&t1), 0, "Unexpected ticker value");
-		expect_d_eq(ticker_geom_read(&t2), 0, "Unexpected ticker value");
+		expect_d_eq(
+		    ticker_geom_read(&t2), 0, "Unexpected ticker value");
 	}
 
 	delay = false;
 	expect_true(ticker_tick(&t1, delay), "Expected ticker fire");
-	expect_true(ticker_geom_tick(&t2, &prng_state, delay),
-	    "Expected ticker fire");
+	expect_true(
+	    ticker_geom_tick(&t2, &prng_state, delay), "Expected ticker fire");
 }
 TEST_END
 
 int
 main(void) {
-	return test(
-	    test_ticker_tick,
-	    test_ticker_ticks,
-	    test_ticker_copy,
-	    test_ticker_geom,
-	    test_ticker_delay);
+	return test(test_ticker_tick, test_ticker_ticks, test_ticker_copy,
+	    test_ticker_geom, test_ticker_delay);
 }
diff --git a/test/unit/tsd.c b/test/unit/tsd.c
index bb5cd9f6..9610ceac 100644
--- a/test/unit/tsd.c
+++ b/test/unit/tsd.c
@@ -5,7 +5,7 @@
  * be asserting that we're on one.
  */
 static bool originally_fast;
-static int data_cleanup_count;
+static int  data_cleanup_count;
 
 void
 data_cleanup(int *data) {
@@ -45,7 +45,7 @@ data_cleanup(int *data) {
 
 static void *
 thd_start(void *arg) {
-	int d = (int)(uintptr_t)arg;
+	int   d = (int)(uintptr_t)arg;
 	void *p;
 
 	/*
@@ -105,11 +105,10 @@ thd_start_reincarnated(void *arg) {
 	expect_ptr_not_null(p, "Unexpected malloc() failure");
 
 	/* Manually trigger reincarnation. */
-	expect_ptr_not_null(tsd_arena_get(tsd),
-	    "Should have tsd arena set.");
+	expect_ptr_not_null(tsd_arena_get(tsd), "Should have tsd arena set.");
 	tsd_cleanup((void *)tsd);
-	expect_ptr_null(*tsd_arenap_get_unsafe(tsd),
-	    "TSD arena should have been cleared.");
+	expect_ptr_null(
+	    *tsd_arenap_get_unsafe(tsd), "TSD arena should have been cleared.");
 	expect_u_eq(tsd_state_get(tsd), tsd_state_purgatory,
 	    "TSD state should be purgatory\n");
 
@@ -193,7 +192,7 @@ TEST_END
 
 typedef struct {
 	atomic_u32_t phase;
-	atomic_b_t error;
+	atomic_b_t   error;
 } global_slow_data_t;
 
 static void *
@@ -207,8 +206,8 @@ thd_start_global_slow(void *arg) {
 	 * No global slowness has happened yet; there was an error if we were
 	 * originally fast but aren't now.
 	 */
-	atomic_store_b(&data->error, originally_fast && !tsd_fast(tsd),
-	    ATOMIC_SEQ_CST);
+	atomic_store_b(
+	    &data->error, originally_fast && !tsd_fast(tsd), ATOMIC_SEQ_CST);
 	atomic_store_u32(&data->phase, 1, ATOMIC_SEQ_CST);
 
 	/* PHASE 2 */
@@ -241,8 +240,8 @@ thd_start_global_slow(void *arg) {
 	 * Both decrements happened; we should be fast again (if we ever
 	 * were)
 	 */
-	atomic_store_b(&data->error, originally_fast && !tsd_fast(tsd),
-	    ATOMIC_SEQ_CST);
+	atomic_store_b(
+	    &data->error, originally_fast && !tsd_fast(tsd), ATOMIC_SEQ_CST);
 	atomic_store_u32(&data->phase, 9, ATOMIC_SEQ_CST);
 
 	return NULL;
@@ -321,10 +320,7 @@ main(void) {
 		return test_status_fail;
 	}
 
-	return test_no_reentrancy(
-	    test_tsd_main_thread,
-	    test_tsd_sub_thread,
-	    test_tsd_sub_thread_dalloc_only,
-	    test_tsd_reincarnation,
+	return test_no_reentrancy(test_tsd_main_thread, test_tsd_sub_thread,
+	    test_tsd_sub_thread_dalloc_only, test_tsd_reincarnation,
 	    test_tsd_global_slow);
 }
diff --git a/test/unit/uaf.c b/test/unit/uaf.c
index a8433c29..25399ed0 100644
--- a/test/unit/uaf.c
+++ b/test/unit/uaf.c
@@ -11,7 +11,8 @@ const char *malloc_conf = TEST_SAN_UAF_ALIGN_ENABLE;
 static size_t san_uaf_align;
 
 static bool fake_abort_called;
-void fake_abort(const char *message) {
+void
+fake_abort(const char *message) {
 	(void)message;
 	fake_abort_called = true;
 }
@@ -24,8 +25,8 @@ test_write_after_free_pre(void) {
 
 static void
 test_write_after_free_post(void) {
-	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
-	    0, "Unexpected tcache flush failure");
+	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0), 0,
+	    "Unexpected tcache flush failure");
 	expect_true(fake_abort_called, "Use-after-free check didn't fire.");
 	safety_check_set_abort(NULL);
 }
@@ -37,9 +38,10 @@ uaf_detection_enabled(void) {
 	}
 
 	ssize_t lg_san_uaf_align;
-	size_t sz = sizeof(lg_san_uaf_align);
-	assert_d_eq(mallctl("opt.lg_san_uaf_align", &lg_san_uaf_align, &sz,
-	    NULL, 0), 0, "Unexpected mallctl failure");
+	size_t  sz = sizeof(lg_san_uaf_align);
+	assert_d_eq(
+	    mallctl("opt.lg_san_uaf_align", &lg_san_uaf_align, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
 	if (lg_san_uaf_align < 0) {
 		return false;
 	}
@@ -48,8 +50,9 @@ uaf_detection_enabled(void) {
 
 	bool tcache_enabled;
 	sz = sizeof(tcache_enabled);
-	assert_d_eq(mallctl("thread.tcache.enabled", &tcache_enabled, &sz, NULL,
-	    0), 0, "Unexpected mallctl failure");
+	assert_d_eq(
+	    mallctl("thread.tcache.enabled", &tcache_enabled, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
 	if (!tcache_enabled) {
 		return false;
 	}
@@ -69,10 +72,10 @@ read_tcache_stashed_bytes(unsigned arena_ind) {
 
 	size_t tcache_stashed_bytes;
 	size_t sz = sizeof(tcache_stashed_bytes);
-	assert_d_eq(mallctl(
-	    "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL)
-	    ".tcache_stashed_bytes", &tcache_stashed_bytes, &sz, NULL, 0), 0,
-	    "Unexpected mallctl failure");
+	assert_d_eq(mallctl("stats.arenas." STRINGIFY(
+	                        MALLCTL_ARENAS_ALL) ".tcache_stashed_bytes",
+	                &tcache_stashed_bytes, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
 
 	return tcache_stashed_bytes;
 }
@@ -91,17 +94,17 @@ test_use_after_free(size_t alloc_size, bool write_after_free) {
 	 * make use-after-free tolerable.
 	 */
 	unsigned arena_ind = do_arena_create(-1, -1);
-	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+	int      flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
 
 	size_t n_max = san_uaf_align * 2;
 	void **items = mallocx(n_max * sizeof(void *), flags);
 	assert_ptr_not_null(items, "Unexpected mallocx failure");
 
-	bool found = false;
+	bool   found = false;
 	size_t iter = 0;
-	char magic = 's';
-	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
-	    0, "Unexpected tcache flush failure");
+	char   magic = 's';
+	assert_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0), 0,
+	    "Unexpected tcache flush failure");
 	while (!found) {
 		ptr = mallocx(alloc_size, flags);
 		assert_ptr_not_null(ptr, "Unexpected mallocx failure");
@@ -194,7 +197,7 @@ static bool
 check_allocated_intact(void **allocated, size_t n_alloc) {
 	for (unsigned i = 0; i < n_alloc; i++) {
 		void *ptr = *(void **)allocated[i];
-		bool found = false;
+		bool  found = false;
 		for (unsigned j = 0; j < n_alloc; j++) {
 			if (ptr == allocated[j]) {
 				found = true;
@@ -213,7 +216,7 @@ TEST_BEGIN(test_use_after_free_integration) {
 	test_skip_if(!uaf_detection_enabled());
 
 	unsigned arena_ind = do_arena_create(-1, -1);
-	int flags = MALLOCX_ARENA(arena_ind);
+	int      flags = MALLOCX_ARENA(arena_ind);
 
 	size_t n_alloc = san_uaf_align * 2;
 	void **allocated = mallocx(n_alloc * sizeof(void *), flags);
@@ -255,8 +258,6 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_read_after_free,
-	    test_write_after_free,
+	return test(test_read_after_free, test_write_after_free,
 	    test_use_after_free_integration);
 }
diff --git a/test/unit/witness.c b/test/unit/witness.c
index 5a6c4482..ccefb5a2 100644
--- a/test/unit/witness.c
+++ b/test/unit/witness.c
@@ -1,9 +1,9 @@
 #include "test/jemalloc_test.h"
 
-static witness_lock_error_t *witness_lock_error_orig;
-static witness_owner_error_t *witness_owner_error_orig;
+static witness_lock_error_t      *witness_lock_error_orig;
+static witness_owner_error_t     *witness_owner_error_orig;
 static witness_not_owner_error_t *witness_not_owner_error_orig;
-static witness_depth_error_t *witness_depth_error_orig;
+static witness_depth_error_t     *witness_depth_error_orig;
 
 static bool saw_lock_error;
 static bool saw_owner_error;
@@ -11,8 +11,8 @@ static bool saw_not_owner_error;
 static bool saw_depth_error;
 
 static void
-witness_lock_error_intercept(const witness_list_t *witnesses,
-    const witness_t *witness) {
+witness_lock_error_intercept(
+    const witness_list_t *witnesses, const witness_t *witness) {
 	saw_lock_error = true;
 }
 
@@ -43,8 +43,8 @@ witness_comp(const witness_t *a, void *oa, const witness_t *b, void *ob) {
 }
 
 static int
-witness_comp_reverse(const witness_t *a, void *oa, const witness_t *b,
-    void *ob) {
+witness_comp_reverse(
+    const witness_t *a, void *oa, const witness_t *b, void *ob) {
 	expect_u_eq(a->rank, b->rank, "Witnesses should have equal rank");
 
 	assert(oa == (void *)a);
@@ -54,8 +54,8 @@ witness_comp_reverse(const witness_t *a, void *oa, const witness_t *b,
 }
 
 TEST_BEGIN(test_witness) {
-	witness_t a, b;
-	witness_tsdn_t witness_tsdn = { WITNESS_TSD_INITIALIZER };
+	witness_t      a, b;
+	witness_tsdn_t witness_tsdn = {WITNESS_TSD_INITIALIZER};
 
 	test_skip_if(!config_debug);
 
@@ -94,8 +94,8 @@ TEST_BEGIN(test_witness) {
 TEST_END
 
 TEST_BEGIN(test_witness_comp) {
-	witness_t a, b, c, d;
-	witness_tsdn_t witness_tsdn = { WITNESS_TSD_INITIALIZER };
+	witness_t      a, b, c, d;
+	witness_tsdn_t witness_tsdn = {WITNESS_TSD_INITIALIZER};
 
 	test_skip_if(!config_debug);
 
@@ -146,8 +146,8 @@ TEST_BEGIN(test_witness_comp) {
 TEST_END
 
 TEST_BEGIN(test_witness_reversal) {
-	witness_t a, b;
-	witness_tsdn_t witness_tsdn = { WITNESS_TSD_INITIALIZER };
+	witness_t      a, b;
+	witness_tsdn_t witness_tsdn = {WITNESS_TSD_INITIALIZER};
 
 	test_skip_if(!config_debug);
 
@@ -177,8 +177,8 @@ TEST_BEGIN(test_witness_reversal) {
 TEST_END
 
 TEST_BEGIN(test_witness_recursive) {
-	witness_t a;
-	witness_tsdn_t witness_tsdn = { WITNESS_TSD_INITIALIZER };
+	witness_t      a;
+	witness_tsdn_t witness_tsdn = {WITNESS_TSD_INITIALIZER};
 
 	test_skip_if(!config_debug);
 
@@ -207,13 +207,12 @@ TEST_BEGIN(test_witness_recursive) {
 
 	witness_owner_error = witness_owner_error_orig;
 	witness_lock_error = witness_lock_error_orig;
-
 }
 TEST_END
 
 TEST_BEGIN(test_witness_unlock_not_owned) {
-	witness_t a;
-	witness_tsdn_t witness_tsdn = { WITNESS_TSD_INITIALIZER };
+	witness_t      a;
+	witness_tsdn_t witness_tsdn = {WITNESS_TSD_INITIALIZER};
 
 	test_skip_if(!config_debug);
 
@@ -236,8 +235,8 @@ TEST_BEGIN(test_witness_unlock_not_owned) {
 TEST_END
 
 TEST_BEGIN(test_witness_depth) {
-	witness_t a;
-	witness_tsdn_t witness_tsdn = { WITNESS_TSD_INITIALIZER };
+	witness_t      a;
+	witness_tsdn_t witness_tsdn = {WITNESS_TSD_INITIALIZER};
 
 	test_skip_if(!config_debug);
 
@@ -270,11 +269,7 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_witness,
-	    test_witness_comp,
-	    test_witness_reversal,
-	    test_witness_recursive,
-	    test_witness_unlock_not_owned,
+	return test(test_witness, test_witness_comp, test_witness_reversal,
+	    test_witness_recursive, test_witness_unlock_not_owned,
 	    test_witness_depth);
 }
diff --git a/test/unit/zero.c b/test/unit/zero.c
index d3e81f1b..522d6908 100644
--- a/test/unit/zero.c
+++ b/test/unit/zero.c
@@ -3,35 +3,35 @@
 static void
 test_zero(size_t sz_min, size_t sz_max) {
 	uint8_t *s;
-	size_t sz_prev, sz, i;
-#define MAGIC	((uint8_t)0x61)
+	size_t   sz_prev, sz, i;
+#define MAGIC ((uint8_t)0x61)
 
 	sz_prev = 0;
 	s = (uint8_t *)mallocx(sz_min, 0);
 	expect_ptr_not_null((void *)s, "Unexpected mallocx() failure");
 
 	for (sz = sallocx(s, 0); sz <= sz_max;
-	    sz_prev = sz, sz = sallocx(s, 0)) {
+	     sz_prev = sz, sz = sallocx(s, 0)) {
 		if (sz_prev > 0) {
 			expect_u_eq(s[0], MAGIC,
 			    "Previously allocated byte %zu/%zu is corrupted",
 			    ZU(0), sz_prev);
-			expect_u_eq(s[sz_prev-1], MAGIC,
+			expect_u_eq(s[sz_prev - 1], MAGIC,
 			    "Previously allocated byte %zu/%zu is corrupted",
-			    sz_prev-1, sz_prev);
+			    sz_prev - 1, sz_prev);
 		}
 
 		for (i = sz_prev; i < sz; i++) {
 			expect_u_eq(s[i], 0x0,
-			    "Newly allocated byte %zu/%zu isn't zero-filled",
-			    i, sz);
+			    "Newly allocated byte %zu/%zu isn't zero-filled", i,
+			    sz);
 			s[i] = MAGIC;
 		}
 
-		if (xallocx(s, sz+1, 0, 0) == sz) {
-			s = (uint8_t *)rallocx(s, sz+1, 0);
-			expect_ptr_not_null((void *)s,
-			    "Unexpected rallocx() failure");
+		if (xallocx(s, sz + 1, 0, 0) == sz) {
+			s = (uint8_t *)rallocx(s, sz + 1, 0);
+			expect_ptr_not_null(
+			    (void *)s, "Unexpected rallocx() failure");
 		}
 	}
 
@@ -53,7 +53,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_zero_small,
-	    test_zero_large);
+	return test(test_zero_small, test_zero_large);
 }
diff --git a/test/unit/zero_realloc_abort.c b/test/unit/zero_realloc_abort.c
index f014cdc2..1d8bf9c3 100644
--- a/test/unit/zero_realloc_abort.c
+++ b/test/unit/zero_realloc_abort.c
@@ -4,7 +4,8 @@
 
 static bool abort_called = false;
 
-void set_abort_called(const char *message) {
+void
+set_abort_called(const char *message) {
 	(void)message;
 	abort_called = true;
 };
@@ -21,7 +22,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_realloc_abort);
+	return test(test_realloc_abort);
 }
-
diff --git a/test/unit/zero_realloc_alloc.c b/test/unit/zero_realloc_alloc.c
index 6954818c..5b4f985f 100644
--- a/test/unit/zero_realloc_alloc.c
+++ b/test/unit/zero_realloc_alloc.c
@@ -6,9 +6,10 @@ allocated(void) {
 		return 0;
 	}
 	uint64_t allocated;
-	size_t sz = sizeof(allocated);
-	expect_d_eq(mallctl("thread.allocated", (void *)&allocated, &sz, NULL,
-	    0), 0, "Unexpected mallctl failure");
+	size_t   sz = sizeof(allocated);
+	expect_d_eq(
+	    mallctl("thread.allocated", (void *)&allocated, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
 	return allocated;
 }
 
@@ -18,9 +19,10 @@ deallocated(void) {
 		return 0;
 	}
 	uint64_t deallocated;
-	size_t sz = sizeof(deallocated);
-	expect_d_eq(mallctl("thread.deallocated", (void *)&deallocated, &sz,
-	    NULL, 0), 0, "Unexpected mallctl failure");
+	size_t   sz = sizeof(deallocated);
+	expect_d_eq(
+	    mallctl("thread.deallocated", (void *)&deallocated, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
 	return deallocated;
 }
 
@@ -43,6 +45,5 @@ TEST_BEGIN(test_realloc_alloc) {
 TEST_END
 int
 main(void) {
-	return test(
-	    test_realloc_alloc);
+	return test(test_realloc_alloc);
 }
diff --git a/test/unit/zero_realloc_free.c b/test/unit/zero_realloc_free.c
index 277f219d..c2aa0afa 100644
--- a/test/unit/zero_realloc_free.c
+++ b/test/unit/zero_realloc_free.c
@@ -6,9 +6,10 @@ deallocated(void) {
 		return 0;
 	}
 	uint64_t deallocated;
-	size_t sz = sizeof(deallocated);
-	expect_d_eq(mallctl("thread.deallocated", (void *)&deallocated, &sz,
-	    NULL, 0), 0, "Unexpected mallctl failure");
+	size_t   sz = sizeof(deallocated);
+	expect_d_eq(
+	    mallctl("thread.deallocated", (void *)&deallocated, &sz, NULL, 0),
+	    0, "Unexpected mallctl failure");
 	return deallocated;
 }
 
@@ -28,6 +29,5 @@ TEST_END
 
 int
 main(void) {
-	return test(
-	    test_realloc_free);
+	return test(test_realloc_free);
 }
diff --git a/test/unit/zero_reallocs.c b/test/unit/zero_reallocs.c
index a9077222..6c4a51d6 100644
--- a/test/unit/zero_reallocs.c
+++ b/test/unit/zero_reallocs.c
@@ -8,8 +8,9 @@ zero_reallocs(void) {
 	size_t count = 12345;
 	size_t sz = sizeof(count);
 
-	expect_d_eq(mallctl("stats.zero_reallocs", (void *)&count, &sz,
-	    NULL, 0), 0, "Unexpected mallctl failure");
+	expect_d_eq(
+	    mallctl("stats.zero_reallocs", (void *)&count, &sz, NULL, 0), 0,
+	    "Unexpected mallctl failure");
 	return count;
 }
 
@@ -35,6 +36,5 @@ main(void) {
 	 * We expect explicit counts; reentrant tests run multiple times, so
 	 * counts leak across runs.
 	 */
-	return test_no_reentrancy(
-	    test_zero_reallocs);
+	return test_no_reentrancy(test_zero_reallocs);
 }

From 5847516692b4022fa8e0fe333f6e676ae48f02a7 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Fri, 20 Jun 2025 14:41:13 -0700
Subject: [PATCH 2519/2608] Ignore the clang-format changes in the git blame.

---
 .git-blame-ignore-revs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
index 365e8bb1..7f5f6975 100644
--- a/.git-blame-ignore-revs
+++ b/.git-blame-ignore-revs
@@ -1 +1,2 @@
 554185356bf990155df8d72060c4efe993642baf
+34f359e0ca613b5f9d970e9b2152a5203c9df8d6

From 711fff750ce904d0b881a6fe534732dcb75874e6 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Mon, 9 Jun 2025 21:29:55 -0700
Subject: [PATCH 2520/2608] Add experimental support for usdt systemtap probes

---
 configure.ac                                  |  49 ++++++
 .../internal/jemalloc_internal_defs.h.in      |   9 ++
 include/jemalloc/internal/jemalloc_probe.h    |  49 ++++++
 .../jemalloc/internal/jemalloc_probe_custom.h | 148 ++++++++++++++++++
 .../jemalloc/internal/jemalloc_probe_stap.h   |  11 ++
 5 files changed, 266 insertions(+)
 create mode 100644 include/jemalloc/internal/jemalloc_probe.h
 create mode 100644 include/jemalloc/internal/jemalloc_probe_custom.h
 create mode 100644 include/jemalloc/internal/jemalloc_probe_stap.h

diff --git a/configure.ac b/configure.ac
index c615cab2..c703a6d1 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1681,6 +1681,55 @@ else
 fi
 AC_SUBST([enable_utrace])
 
+dnl Disable experimental sdt tracing by default.
+AC_ARG_ENABLE([experimental-sdt],
+  [AS_HELP_STRING([--enable-experimental-sdt], [Enable systemtap USDT probes])],
+[if test "x$enable_experimental_sdt" = "xno" ; then
+  enable_experimental_sdt="0"
+else
+	JE_COMPILABLE([systemtap sdt], [
+#include <sys/sdt.h>
+	], [
+void foo(int i, void *p) { STAP_PROBE2(jemalloc, test, i, p); }
+  	],
+	[je_cv_stap_sdt])
+
+	if test "x${je_cv_stap_sdt}" = "xyes" ; then
+	   enable_experimental_sdt="1"
+	elif test "x${abi}" = "xelf" ; then
+	     case "${host}" in
+	     	  *-*-linux-android*)
+			case "${host_cpu}" in aarch64|x86_64)
+			     enable_experimental_sdt="2"
+			     ;;
+			esac
+			;;
+		  *-*-linux*)
+			case "${host_cpu}" in x86_64|aarch64|arm*)
+			      enable_experimental_sdt="2"
+			      ;;
+			esac
+		        ;;
+		  *)
+			enable_experimental_sdt="0"
+			AC_MSG_ERROR([Unsupported sdt on this platform])
+			;;
+	     esac
+	else
+	   AC_MSG_ERROR([Unsupported sdt on this platform])
+   	fi
+fi
+],
+[enable_experimental_sdt="0"]
+)
+
+if test "x$enable_experimental_sdt" = "x1" ; then
+    AC_DEFINE([JEMALLOC_EXPERIMENTAL_USDT_STAP], [ ], [ ])
+elif test "x$enable_experimental_sdt" = "x2"; then
+    AC_DEFINE([JEMALLOC_EXPERIMENTAL_USDT_CUSTOM], [ ], [ ])
+fi
+AC_SUBST([enable_experimental_sdt])
+
 dnl Do not support the xmalloc option by default.
 AC_ARG_ENABLE([xmalloc],
   [AS_HELP_STRING([--enable-xmalloc], [Support xmalloc option])],
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 6d557959..31ae2e8e 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -353,6 +353,15 @@
 /* Defined if mprotect(2) is available. */
 #undef JEMALLOC_HAVE_MPROTECT
 
+/* Defined if sys/sdt.h is available and sdt tracing enabled */
+#undef JEMALLOC_EXPERIMENTAL_USDT_STAP
+
+/*
+ * Defined if sys/sdt.h is unavailable, sdt tracing enabled, and
+ * platform is supported
+ */
+#undef JEMALLOC_EXPERIMENTAL_USDT_CUSTOM
+
 /*
  * Defined if transparent huge pages (THPs) are supported via the
  * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled.
diff --git a/include/jemalloc/internal/jemalloc_probe.h b/include/jemalloc/internal/jemalloc_probe.h
new file mode 100644
index 00000000..8ef3105d
--- /dev/null
+++ b/include/jemalloc/internal/jemalloc_probe.h
@@ -0,0 +1,49 @@
+#ifndef JEMALLOC_INTERNAL_JEMALLOC_PROBE_H
+#define JEMALLOC_INTERNAL_JEMALLOC_PROBE_H
+
+#include <jemalloc/internal/jemalloc_preamble.h>
+
+#ifdef JEMALLOC_EXPERIMENTAL_USDT_STAP
+#include <jemalloc/internal/jemalloc_probe_stap.h>
+#elif defined(JEMALLOC_EXPERIMENTAL_USDT_CUSTOM)
+#include <jemalloc/internal/jemalloc_probe_custom.h>
+#elif defined(_MSC_VER)
+#define JE_USDT(name, N, ...) /* Nothing */
+#else /*  no USDT, just check the args */
+
+#define JE_USDT(name, N, ...) _JE_USDT_CHECK_ARG##N(__VA_ARGS__)
+
+#define _JE_USDT_CHECK_ARG1(a)						\
+	do {								\
+		(void)(a);						\
+	} while (0)
+#define _JE_USDT_CHECK_ARG2(a, b)					\
+	do {								\
+		(void)(a);						\
+		(void)(b);						\
+	} while (0)
+#define _JE_USDT_CHECK_ARG3(a, b, c)					\
+	do {								\
+		(void)(a);						\
+		(void)(b);						\
+		(void)(c);						\
+	} while (0)
+#define _JE_USDT_CHECK_ARG4(a, b, c, d)					\
+	do {								\
+		(void)(a);						\
+		(void)(b);						\
+		(void)(c);						\
+		(void)(d);						\
+	} while (0)
+#define _JE_USDT_CHECK_ARG5(a, b, c, d, e)				\
+	do {								\
+		(void)(a);						\
+		(void)(b);						\
+		(void)(c);						\
+		(void)(d);						\
+		(void)(e);						\
+	} while (0)
+
+#endif /* JEMALLOC_EXPERIMENTAL_USDT_* */
+
+#endif /* JEMALLOC_INTERNAL_JEMALLOC_PROBE_H */
diff --git a/include/jemalloc/internal/jemalloc_probe_custom.h b/include/jemalloc/internal/jemalloc_probe_custom.h
new file mode 100644
index 00000000..3c22749f
--- /dev/null
+++ b/include/jemalloc/internal/jemalloc_probe_custom.h
@@ -0,0 +1,148 @@
+#ifndef JEMALLOC_INTERNAL_JEMALLOC_PROBE_CUSTOM_H
+#define JEMALLOC_INTERNAL_JEMALLOC_PROBE_CUSTOM_H
+
+/* clang-format off */
+
+/*
+ * This section is based on sys/sdt.h and
+ * https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation
+ */
+
+/* Emit NOP for the probe. */
+#if (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) || \
+     defined(__arm__)) && defined(__linux__)
+#define JE_SDT_NOP nop
+#else
+#error "Architecture not supported"
+#endif
+
+/* Assembly macros */
+#define JE_SDT_S(x) #x
+
+#define JE_SDT_ASM_1(x) JE_SDT_S(x) "\n"
+
+#define JE_SDT_ASM_2(x, y)			\
+	JE_SDT_S(x) "," JE_SDT_S(y) "\n"
+
+#define JE_SDT_ASM_3(x, y, z)					\
+	JE_SDT_S(x) "," JE_SDT_S(y) ","  JE_SDT_S(z) "\n"
+
+#define JE_SDT_ASM_3(x, y, z)					\
+	JE_SDT_S(x) "," JE_SDT_S(y) ","  JE_SDT_S(z) "\n"
+
+#define JE_SDT_ASM_4(x, y, z, p)					\
+	JE_SDT_S(x) "," JE_SDT_S(y) "," JE_SDT_S(z) "," JE_SDT_S(p) "\n"
+
+#define JE_SDT_ASM_5(x, y, z, p, q)					\
+	JE_SDT_S(x) "," JE_SDT_S(y) ","	JE_SDT_S(z) "," JE_SDT_S(p) ","	\
+		JE_SDT_S(q) "\n"
+
+/* Arg size */
+#ifdef __LP64__
+#define JE_SDT_ASM_ADDR            .8byte
+#else
+#define JE_SDT_ASM_ADDR            .4byte
+#endif
+
+#define JE_SDT_NOTE_NAME  "stapsdt"
+#define JE_SDT_NOTE_TYPE  3
+
+#define JE_SDT_SEMAPHORE_NONE(provider, name)			\
+	JE_SDT_ASM_1(JE_SDT_ASM_ADDR 0) /* No Semaphore support */
+#define JE_SDT_SEMAPHORE_OPERAND(provider, name)	\
+	[__sdt_semaphore] "ip" (0) /* No Semaphore */
+
+#define JE_SDT_ASM_STRING(x)     JE_SDT_ASM_1(.asciz JE_SDT_S(x))
+
+#define JE_SDT_NOTE(provider, name, arg_template)			\
+	JE_SDT_ASM_1(990: JE_SDT_NOP)					\
+	JE_SDT_ASM_3(     .pushsection .note.stapsdt,"?","note")	\
+	JE_SDT_ASM_1(     .balign 4)					\
+	JE_SDT_ASM_3(     .4byte 992f-991f, 994f-993f, JE_SDT_NOTE_TYPE) \
+	JE_SDT_ASM_1(991: .asciz JE_SDT_NOTE_NAME)			\
+	JE_SDT_ASM_1(992: .balign 4)					\
+	JE_SDT_ASM_1(993: JE_SDT_ASM_ADDR 990b)				\
+	JE_SDT_ASM_1(     JE_SDT_ASM_ADDR _.stapsdt.base)		\
+	JE_SDT_SEMAPHORE_NONE(provider, name)				\
+	JE_SDT_ASM_STRING(provider)					\
+	JE_SDT_ASM_STRING(name)						\
+	JE_SDT_ASM_STRING(arg_template)					\
+	JE_SDT_ASM_1(994: .balign 4)					\
+	JE_SDT_ASM_1(     .popsection)
+
+#define JE_SDT_BASE							\
+	JE_SDT_ASM_1(     .ifndef _.stapsdt.base)			\
+	JE_SDT_ASM_5(     .pushsection .stapsdt.base, "aG", "progbits",	\
+		    .stapsdt.base,comdat)				\
+	JE_SDT_ASM_1(     .weak _.stapsdt.base)				\
+	JE_SDT_ASM_1(     .hidden _.stapsdt.base)			\
+	JE_SDT_ASM_1(     _.stapsdt.base: .space 1)			\
+	JE_SDT_ASM_2(     .size _.stapsdt.base, 1)			\
+	JE_SDT_ASM_1(     .popsection)					\
+	JE_SDT_ASM_1(     .endif)
+
+
+/*
+ * Default constraint for probes arguments.
+ * See https://gcc.gnu.org/onlinedocs/gcc/Constraints.html
+ */
+#ifndef JE_SDT_ARG_CONSTRAINT
+#define JE_SDT_ARG_CONSTRAINT      "nor"
+#endif
+
+#define JE_SDT_ARGARRAY(x)  ((__builtin_classify_type(x) == 14) ||  \
+			     (__builtin_classify_type(x) == 5))
+#define JE_SDT_ARGSIZE(x)   (JE_SDT_ARGARRAY(x) ? sizeof(void*) : sizeof(x))
+
+/*
+ * Format of each probe argument as operand.  Size tagged with JE_SDT_Sn,
+ * with "n" constraint.  Value is tagged with JE_SDT_An with configured
+ * constraint.
+ */
+#define JE_SDT_ARG(n, x)						\
+	[JE_SDT_S##n] "n"                ((size_t)JE_SDT_ARGSIZE(x)),	\
+		[JE_SDT_A##n] JE_SDT_ARG_CONSTRAINT(x)
+
+/* Templates to append arguments as operands. */
+#define JE_SDT_OPERANDS_0()     [__sdt_dummy] "g" (0)
+#define JE_SDT_OPERANDS_1(_1)      JE_SDT_ARG(1, _1)
+#define JE_SDT_OPERANDS_2(_1, _2)  JE_SDT_OPERANDS_1(_1), JE_SDT_ARG(2, _2)
+#define JE_SDT_OPERANDS_3(_1, _2, _3) JE_SDT_OPERANDS_2(_1, _2), JE_SDT_ARG(3, _3)
+#define JE_SDT_OPERANDS_4(_1, _2, _3, _4)			\
+	JE_SDT_OPERANDS_3(_1, _2, _3), JE_SDT_ARG(4, _4)
+#define JE_SDT_OPERANDS_5(_1, _2, _3, _4, _5)			\
+	JE_SDT_OPERANDS_4(_1, _2, _3, _4), JE_SDT_ARG(5, _5)
+#define JE_SDT_OPERANDS_6(_1, _2, _3, _4, _5, _6)			\
+	JE_SDT_OPERANDS_5(_1, _2, _3, _4, _5), JE_SDT_ARG(6, _6)
+#define JE_SDT_OPERANDS_7(_1, _2, _3, _4, _5, _6, _7)		\
+	JE_SDT_OPERANDS_6(_1, _2, _3, _4, _5, _6), JE_SDT_ARG(7, _7)
+
+/* Templates to reference the arguments from operands. */
+#define JE_SDT_ARGFMT(num)        %n[JE_SDT_S##num]@%[JE_SDT_A##num]
+#define JE_SDT_ARG_TEMPLATE_0    /* No args */
+#define JE_SDT_ARG_TEMPLATE_1    JE_SDT_ARGFMT(1)
+#define JE_SDT_ARG_TEMPLATE_2    JE_SDT_ARG_TEMPLATE_1 JE_SDT_ARGFMT(2)
+#define JE_SDT_ARG_TEMPLATE_3    JE_SDT_ARG_TEMPLATE_2 JE_SDT_ARGFMT(3)
+#define JE_SDT_ARG_TEMPLATE_4    JE_SDT_ARG_TEMPLATE_3 JE_SDT_ARGFMT(4)
+#define JE_SDT_ARG_TEMPLATE_5    JE_SDT_ARG_TEMPLATE_4 JE_SDT_ARGFMT(5)
+#define JE_SDT_ARG_TEMPLATE_6    JE_SDT_ARG_TEMPLATE_5 JE_SDT_ARGFMT(6)
+#define JE_SDT_ARG_TEMPLATE_7    JE_SDT_ARG_TEMPLATE_6 JE_SDT_ARGFMT(7)
+
+#define JE_SDT_PROBE(							\
+	provider, name, n, arglist)					\
+	do {								\
+		__asm__ __volatile__(					\
+			JE_SDT_NOTE(provider, name,			\
+				    JE_SDT_ARG_TEMPLATE_##n)		\
+			:: JE_SDT_SEMAPHORE_OPERAND(provider, name),	\
+			JE_SDT_OPERANDS_##n arglist);			\
+		__asm__ __volatile__(JE_SDT_BASE);			\
+	} while (0)
+
+#define JE_USDT(name, N, ...)						\
+  JE_SDT_PROBE(jemalloc, name, N, (__VA_ARGS__))
+
+
+#endif /* JEMALLOC_INTERNAL_JEMALLOC_PROBE_CUSTOM_H */
+
+/* clang-format on */
diff --git a/include/jemalloc/internal/jemalloc_probe_stap.h b/include/jemalloc/internal/jemalloc_probe_stap.h
new file mode 100644
index 00000000..302b6cbb
--- /dev/null
+++ b/include/jemalloc/internal/jemalloc_probe_stap.h
@@ -0,0 +1,11 @@
+#ifndef JEMALLOC_INTERNAL_JEMALLOC_PROBE_STAP_H
+#define JEMALLOC_INTERNAL_JEMALLOC_PROBE_STAP_H
+
+#include <sys/sdt.h>
+
+#define JE_USDT(name, N, ...) JE_USDT_PROBE_N(name, N, ##__VA_ARGS__)
+
+#define JE_USDT_PROBE_N(name, N, ...)                                          \
+	STAP_PROBE##N(jemalloc, name, ##__VA_ARGS__)
+
+#endif /* JEMALLOC_INTERNAL_JEMALLOC_PROBE_STAP_H */

From f87bbab22cf5a81dd314c7811867edc5c69025d2 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Tue, 10 Jun 2025 11:44:23 -0700
Subject: [PATCH 2521/2608] Add several USDT probes for hpa

---
 src/hpa_hooks.c | 13 +++++++++++--
 src/hpdata.c    |  5 +++++
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/hpa_hooks.c b/src/hpa_hooks.c
index 45bebe41..14005ae0 100644
--- a/src/hpa_hooks.c
+++ b/src/hpa_hooks.c
@@ -2,6 +2,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/hpa_hooks.h"
+#include "jemalloc/internal/jemalloc_probe.h"
 
 static void    *hpa_hooks_map(size_t size);
 static void     hpa_hooks_unmap(void *ptr, size_t size);
@@ -19,16 +20,20 @@ const hpa_hooks_t hpa_hooks_default = {&hpa_hooks_map, &hpa_hooks_unmap,
 static void *
 hpa_hooks_map(size_t size) {
 	bool commit = true;
-	return pages_map(NULL, size, HUGEPAGE, &commit);
+	void *ret = pages_map(NULL, size, HUGEPAGE, &commit);
+	JE_USDT(hpa_map, 2, size, ret);
+	return ret;
 }
 
 static void
 hpa_hooks_unmap(void *ptr, size_t size) {
+	JE_USDT(hpa_unmap, 2, size, ptr);
 	pages_unmap(ptr, size);
 }
 
 static void
 hpa_hooks_purge(void *ptr, size_t size) {
+	JE_USDT(hpa_purge, 2, size, ptr);
 	pages_purge_forced(ptr, size);
 }
 
@@ -52,12 +57,14 @@ hpa_hooks_hugify(void *ptr, size_t size, bool sync) {
 	if (sync) {
 		err = pages_collapse(ptr, size);
 	}
+	JE_USDT(hpa_hugify, 4, size, ptr, err, sync);
 	return err;
 }
 
 static void
 hpa_hooks_dehugify(void *ptr, size_t size) {
 	bool err = pages_nohuge(ptr, size);
+	JE_USDT(hpa_dehugify, 3, size, ptr, err);
 	(void)err;
 }
 
@@ -78,7 +85,9 @@ hpa_hooks_ms_since(nstime_t *past_nstime) {
 static bool
 hpa_hooks_vectorized_purge(void *vec, size_t vlen, size_t nbytes) {
 #ifdef JEMALLOC_HAVE_PROCESS_MADVISE
-	return pages_purge_process_madvise(vec, vlen, nbytes);
+	bool err = pages_purge_process_madvise(vec, vlen, nbytes);
+	JE_USDT(hpa_vectorized_purge, 3, nbytes, vlen, err);
+	return err;
 #else
 	return true;
 #endif
diff --git a/src/hpdata.c b/src/hpdata.c
index 9d324952..e9ee2738 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -2,6 +2,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/hpdata.h"
+#include "jemalloc/internal/jemalloc_probe.h"
 
 static int
 hpdata_age_comp(const hpdata_t *a, const hpdata_t *b) {
@@ -100,6 +101,8 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 	    hpdata->touched_pages, HUGEPAGE_PAGES, result, npages);
 	fb_set_range(hpdata->touched_pages, HUGEPAGE_PAGES, result, npages);
 	hpdata->h_ntouched += new_dirty;
+	JE_USDT(hpa_reserve, 5, npages, hpdata->h_nactive, hpdata->h_ntouched,
+	    new_dirty, largest_unchosen_range);
 
 	/*
 	 * If we allocated out of a range that was the longest in the hpdata, it
@@ -160,6 +163,8 @@ hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
 	hpdata->h_nactive -= npages;
 
 	hpdata_assert_consistent(hpdata);
+	JE_USDT(hpa_unreserve, 5, npages, hpdata->h_nactive, hpdata->h_ntouched,
+	    old_longest_range, new_range_len);
 }
 
 size_t

From 4246475b44e660010256206857d941e6f45ca113 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 25 Jul 2025 10:14:28 -0700
Subject: [PATCH 2522/2608] [process_madvise] Make init lazy so that python
 tests pass. Reset the pidfd on fork

---
 include/jemalloc/internal/atomic.h |  2 ++
 include/jemalloc/internal/pages.h  |  1 +
 src/jemalloc.c                     |  1 +
 src/pages.c                        | 37 ++++++++++++++++++++++++------
 4 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/include/jemalloc/internal/atomic.h b/include/jemalloc/internal/atomic.h
index ddd9341e..f80e5640 100644
--- a/include/jemalloc/internal/atomic.h
+++ b/include/jemalloc/internal/atomic.h
@@ -89,6 +89,8 @@ JEMALLOC_GENERATE_ATOMICS(bool, b, 0)
 
 JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(unsigned, u, LG_SIZEOF_INT)
 
+JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(int, i, LG_SIZEOF_INT)
+
 JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(size_t, zu, LG_SIZEOF_PTR)
 
 JEMALLOC_GENERATE_EXPANDED_INT_ATOMICS(ssize_t, zd, LG_SIZEOF_PTR)
diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index 31909934..b0cc5bba 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -127,5 +127,6 @@ bool pages_boot(void);
 void pages_set_thp_state(void *ptr, size_t size);
 void pages_mark_guards(void *head, void *tail);
 void pages_unmark_guards(void *head, void *tail);
+void pages_postfork_child(void);
 
 #endif /* JEMALLOC_INTERNAL_PAGES_EXTERNS_H */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 876c49e8..4adcbf3c 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -4549,6 +4549,7 @@ jemalloc_postfork_child(void) {
 	malloc_mutex_postfork_child(tsd_tsdn(tsd), &arenas_lock);
 	tcache_postfork_child(tsd_tsdn(tsd));
 	ctl_postfork_child(tsd_tsdn(tsd));
+	pages_postfork_child();
 }
 
 /******************************************************************************/
diff --git a/src/pages.c b/src/pages.c
index 88301c2b..54678a38 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -621,7 +621,7 @@ pages_dodump(void *addr, size_t size) {
 #ifdef JEMALLOC_HAVE_PROCESS_MADVISE
 #	include <sys/mman.h>
 #	include <sys/syscall.h>
-static int pidfd;
+static atomic_i_t process_madvise_pidfd = ATOMIC_INIT(-1);
 
 static bool
 init_process_madvise(void) {
@@ -632,11 +632,6 @@ init_process_madvise(void) {
 	if (opt_process_madvise_max_batch > PROCESS_MADVISE_MAX_BATCH_LIMIT) {
 		opt_process_madvise_max_batch = PROCESS_MADVISE_MAX_BATCH_LIMIT;
 	}
-	pid_t pid = getpid();
-	pidfd = syscall(SYS_pidfd_open, pid, 0);
-	if (pidfd == -1) {
-		return true;
-	}
 
 	return false;
 }
@@ -651,12 +646,38 @@ init_process_madvise(void) {
 static bool
 pages_purge_process_madvise_impl(
     void *vec, size_t vec_len, size_t total_bytes) {
-	size_t purged_bytes = (size_t)syscall(JE_SYS_PROCESS_MADVISE_NR, pidfd,
+	int pid_fd = atomic_load_i(&process_madvise_pidfd, ATOMIC_SEQ_CST);
+	while (pid_fd == -1) {
+		int newfd = syscall(SYS_pidfd_open, getpid(), 0);
+		if (newfd == -1) {
+			return true;
+		}
+		if (!atomic_compare_exchange_strong_i(&process_madvise_pidfd,
+						      &pid_fd, newfd,
+						      ATOMIC_SEQ_CST,
+						      ATOMIC_SEQ_CST)) {
+			/* Someone else set the fd, so we close ours */
+			assert(pid_fd != -1);
+			close(newfd);
+		} else {
+			pid_fd = newfd;
+		}
+	}
+	size_t purged_bytes = (size_t)syscall(JE_SYS_PROCESS_MADVISE_NR, pid_fd,
 	    (struct iovec *)vec, vec_len, MADV_DONTNEED, 0);
 
 	return purged_bytes != total_bytes;
 }
 
+void pages_postfork_child(void) {
+	/* Reset the file descriptor we inherited from parent process */
+	int pid_fd = atomic_load_i(&process_madvise_pidfd, ATOMIC_SEQ_CST);
+	if (pid_fd != -1) {
+		atomic_store_i(&process_madvise_pidfd, -1, ATOMIC_SEQ_CST);
+		close(pid_fd);
+	}
+}
+
 #else
 
 static bool
@@ -671,6 +692,8 @@ pages_purge_process_madvise_impl(
 	return true;
 }
 
+void pages_postfork_child(void) {}
+
 #endif
 
 bool

From 395e63bf7e79b9faf7187add17ee6b0571857a60 Mon Sep 17 00:00:00 2001
From: lexprfuncall <5360361+lexprfuncall@users.noreply.github.com>
Date: Mon, 4 Aug 2025 11:25:10 -0700
Subject: [PATCH 2523/2608] Fix several spelling errors in comments

---
 src/hpa.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/hpa.c b/src/hpa.c
index 03668f06..4c0f4e36 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -240,7 +240,7 @@ hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
 /*
  * Note that the stats functions here follow the usual stats naming conventions;
  * "merge" obtains the stats from some live object of instance, while "accum"
- * only combines the stats from one stats objet to another.  Hence the lack of
+ * only combines the stats from one stats object to another.  Hence the lack of
  * locking here.
  */
 static void
@@ -368,7 +368,7 @@ hpa_update_purge_hugify_eligibility(
 	 * could lead to situations where a hugepage that spends most of its
 	 * time meeting the criteria never quite getting hugified if there are
 	 * intervening deallocations).  The idea is that the hugification delay
-	 * will allow them to get purged, reseting their "hugify-allowed" bit.
+	 * will allow them to get purged, resetting their "hugify-allowed" bit.
 	 * If they don't get purged, then the hugification isn't hurting and
 	 * might help.  As an exception, we don't hugify hugepages that are now
 	 * empty; it definitely doesn't help there until the hugepage gets
@@ -642,11 +642,11 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 	shard->stats.nhugifies++;
 	if (err) {
 		/*
-		 * When asynchronious hugification is used
+		 * When asynchronous hugification is used
 		 * (shard->opts.hugify_sync option is false), we are not
 		 * expecting to get here, unless something went terrible wrong.
 		 * Because underlying syscall is only setting kernel flag for
-		 * memory range (actual hugification happens asynchroniously
+		 * memory range (actual hugification happens asynchronously
 		 * and we are not getting any feedback about its outcome), we
 		 * expect syscall to be successful all the time.
 		 */
@@ -706,7 +706,7 @@ hpa_shard_maybe_do_deferred_work(
 		 * When experimental_max_purge_nhp option is used, there is no
 		 * guarantee we'll always respect dirty_mult option.  Option
 		 * experimental_max_purge_nhp provides a way to configure same
-		 * behaviour as was possible before, with buggy implementation
+		 * behavior as was possible before, with buggy implementation
 		 * of purging algorithm.
 		 */
 		ssize_t max_purge_nhp = shard->opts.experimental_max_purge_nhp;

From a156e997d7037aba2b2dc09993a62798966c991e Mon Sep 17 00:00:00 2001
From: lexprfuncall <5360361+lexprfuncall@users.noreply.github.com>
Date: Mon, 4 Aug 2025 14:43:03 -0700
Subject: [PATCH 2524/2608] Do not dehugify when purging

Giving the advice MADV_DONTNEED to a range of virtual memory backed by
a transparent huge page already causes that range of virtual memory to
become backed by regular pages.
---
 include/jemalloc/internal/hpa_hooks.h         |  1 -
 src/hpa.c                                     |  9 +-------
 src/hpa_hooks.c                               | 12 ++--------
 test/unit/hpa.c                               | 23 -------------------
 test/unit/hpa_vectorized_madvise.c            | 11 ---------
 .../unit/hpa_vectorized_madvise_large_batch.c |  1 -
 6 files changed, 3 insertions(+), 54 deletions(-)

diff --git a/include/jemalloc/internal/hpa_hooks.h b/include/jemalloc/internal/hpa_hooks.h
index f50ff58f..5e68e349 100644
--- a/include/jemalloc/internal/hpa_hooks.h
+++ b/include/jemalloc/internal/hpa_hooks.h
@@ -10,7 +10,6 @@ struct hpa_hooks_s {
 	void (*unmap)(void *ptr, size_t size);
 	void (*purge)(void *ptr, size_t size);
 	bool (*hugify)(void *ptr, size_t size, bool sync);
-	void (*dehugify)(void *ptr, size_t size);
 	void (*curtime)(nstime_t *r_time, bool first_reading);
 	uint64_t (*ms_since)(nstime_t *r_time);
 	bool (*vectorized_purge)(void *vec, size_t vlen, size_t nbytes);
diff --git a/src/hpa.c b/src/hpa.c
index 4c0f4e36..e297e411 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -432,18 +432,11 @@ hpa_purge_actual_unlocked(
 	hpa_range_accum_init(&accum, vec, len);
 
 	for (size_t i = 0; i < batch_sz; ++i) {
-		hpdata_t *to_purge = batch[i].hp;
-
-		/* Actually do the purging, now that the lock is dropped. */
-		if (batch[i].dehugify) {
-			shard->central->hooks.dehugify(
-			    hpdata_addr_get(to_purge), HUGEPAGE);
-		}
 		void  *purge_addr;
 		size_t purge_size;
 		size_t total_purged_on_one_hp = 0;
 		while (hpdata_purge_next(
-		    to_purge, &batch[i].state, &purge_addr, &purge_size)) {
+		    batch[i].hp, &batch[i].state, &purge_addr, &purge_size)) {
 			total_purged_on_one_hp += purge_size;
 			assert(total_purged_on_one_hp <= HUGEPAGE);
 			hpa_range_accum_add(
diff --git a/src/hpa_hooks.c b/src/hpa_hooks.c
index 14005ae0..e40d30ec 100644
--- a/src/hpa_hooks.c
+++ b/src/hpa_hooks.c
@@ -8,14 +8,13 @@ static void    *hpa_hooks_map(size_t size);
 static void     hpa_hooks_unmap(void *ptr, size_t size);
 static void     hpa_hooks_purge(void *ptr, size_t size);
 static bool     hpa_hooks_hugify(void *ptr, size_t size, bool sync);
-static void     hpa_hooks_dehugify(void *ptr, size_t size);
 static void     hpa_hooks_curtime(nstime_t *r_nstime, bool first_reading);
 static uint64_t hpa_hooks_ms_since(nstime_t *past_nstime);
 static bool hpa_hooks_vectorized_purge(void *vec, size_t vlen, size_t nbytes);
 
 const hpa_hooks_t hpa_hooks_default = {&hpa_hooks_map, &hpa_hooks_unmap,
-    &hpa_hooks_purge, &hpa_hooks_hugify, &hpa_hooks_dehugify,
-    &hpa_hooks_curtime, &hpa_hooks_ms_since, &hpa_hooks_vectorized_purge};
+    &hpa_hooks_purge, &hpa_hooks_hugify, &hpa_hooks_curtime,
+    &hpa_hooks_ms_since, &hpa_hooks_vectorized_purge};
 
 static void *
 hpa_hooks_map(size_t size) {
@@ -61,13 +60,6 @@ hpa_hooks_hugify(void *ptr, size_t size, bool sync) {
 	return err;
 }
 
-static void
-hpa_hooks_dehugify(void *ptr, size_t size) {
-	bool err = pages_nohuge(ptr, size);
-	JE_USDT(hpa_dehugify, 3, size, ptr, err);
-	(void)err;
-}
-
 static void
 hpa_hooks_curtime(nstime_t *r_nstime, bool first_reading) {
 	if (first_reading) {
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 1fed8a80..d62ac762 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -389,12 +389,6 @@ defer_test_hugify(void *ptr, size_t size, bool sync) {
 	return false;
 }
 
-static size_t ndefer_dehugify_calls = 0;
-static void
-defer_test_dehugify(void *ptr, size_t size) {
-	++ndefer_dehugify_calls;
-}
-
 static nstime_t defer_curtime;
 static void
 defer_test_curtime(nstime_t *r_time, bool first_reading) {
@@ -414,7 +408,6 @@ TEST_BEGIN(test_defer_time) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
-	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge;
@@ -453,10 +446,8 @@ TEST_BEGIN(test_defer_time) {
 	hpa_shard_do_deferred_work(tsdn, shard);
 
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
-	expect_zu_eq(1, ndefer_dehugify_calls, "Should have dehugified");
 	expect_zu_eq(1, ndefer_purge_calls, "Should have purged");
 	ndefer_hugify_calls = 0;
-	ndefer_dehugify_calls = 0;
 	ndefer_purge_calls = 0;
 
 	/*
@@ -477,7 +468,6 @@ TEST_BEGIN(test_defer_time) {
 	nstime_init2(&defer_curtime, 22, 0);
 	hpa_shard_do_deferred_work(tsdn, shard);
 	expect_zu_eq(1, ndefer_hugify_calls, "Failed to hugify");
-	expect_zu_eq(0, ndefer_dehugify_calls, "Unexpected dehugify");
 	expect_zu_eq(0, ndefer_purge_calls, "Unexpected purge");
 	ndefer_hugify_calls = 0;
 
@@ -524,7 +514,6 @@ TEST_BEGIN(test_no_min_purge_interval) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
-	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge;
@@ -551,7 +540,6 @@ TEST_BEGIN(test_no_min_purge_interval) {
 	 * we have dirty pages.
 	 */
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
-	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 	expect_zu_eq(1, ndefer_purge_calls, "Expect purge");
 	ndefer_purge_calls = 0;
 
@@ -567,7 +555,6 @@ TEST_BEGIN(test_min_purge_interval) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
-	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge;
@@ -593,7 +580,6 @@ TEST_BEGIN(test_min_purge_interval) {
 	 * opt.min_purge_interval_ms didn't pass yet.
 	 */
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
-	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 	expect_zu_eq(0, ndefer_purge_calls, "Purged too early");
 
 	/* Minumum purge interval is set to 5 seconds in options. */
@@ -602,7 +588,6 @@ TEST_BEGIN(test_min_purge_interval) {
 
 	/* Now we should purge, but nothing else. */
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
-	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 	expect_zu_eq(1, ndefer_purge_calls, "Expect purge");
 	ndefer_purge_calls = 0;
 
@@ -618,7 +603,6 @@ TEST_BEGIN(test_purge) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
-	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge;
@@ -648,7 +632,6 @@ TEST_BEGIN(test_purge) {
 	hpa_shard_do_deferred_work(tsdn, shard);
 
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
-	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 	/*
 	 * Expect only 2 purges, because opt.dirty_mult is set to 0.25 and we still
 	 * have 5 active hugepages (1 / 5 = 0.2 < 0.25).
@@ -665,7 +648,6 @@ TEST_BEGIN(test_purge) {
 	 */
 	expect_zu_eq(5, ndefer_hugify_calls, "Expect hugification");
 	ndefer_hugify_calls = 0;
-	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 	/*
 	 * We still have completely dirty hugepage, but we are below
 	 * opt.dirty_mult.
@@ -685,7 +667,6 @@ TEST_BEGIN(test_experimental_max_purge_nhp) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
-	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge;
@@ -716,7 +697,6 @@ TEST_BEGIN(test_experimental_max_purge_nhp) {
 	hpa_shard_do_deferred_work(tsdn, shard);
 
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
-	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 	/*
 	 * Expect only one purge call, because opts.experimental_max_purge_nhp
 	 * is set to 1.
@@ -729,7 +709,6 @@ TEST_BEGIN(test_experimental_max_purge_nhp) {
 
 	expect_zu_eq(5, ndefer_hugify_calls, "Expect hugification");
 	ndefer_hugify_calls = 0;
-	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 	/* We still above the limit for dirty pages. */
 	expect_zu_eq(1, ndefer_purge_calls, "Expect purge");
 	ndefer_purge_calls = 0;
@@ -738,7 +717,6 @@ TEST_BEGIN(test_experimental_max_purge_nhp) {
 	hpa_shard_do_deferred_work(tsdn, shard);
 
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
-	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 	/* Finally, we are below the limit, no purges are expected. */
 	expect_zu_eq(0, ndefer_purge_calls, "Purged too early");
 
@@ -754,7 +732,6 @@ TEST_BEGIN(test_vectorized_opt_eq_zero) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
-	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge;
diff --git a/test/unit/hpa_vectorized_madvise.c b/test/unit/hpa_vectorized_madvise.c
index 8df54d06..c2aa3b58 100644
--- a/test/unit/hpa_vectorized_madvise.c
+++ b/test/unit/hpa_vectorized_madvise.c
@@ -123,12 +123,6 @@ defer_test_hugify(void *ptr, size_t size, bool sync) {
 	return false;
 }
 
-static size_t ndefer_dehugify_calls = 0;
-static void
-defer_test_dehugify(void *ptr, size_t size) {
-	++ndefer_dehugify_calls;
-}
-
 static nstime_t defer_curtime;
 static void
 defer_test_curtime(nstime_t *r_time, bool first_reading) {
@@ -148,7 +142,6 @@ TEST_BEGIN(test_vectorized_failure_fallback) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
-	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge_fail;
@@ -188,7 +181,6 @@ TEST_BEGIN(test_more_regions_purged_from_one_page) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
-	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge;
@@ -231,7 +223,6 @@ TEST_BEGIN(test_more_regions_purged_from_one_page) {
 	 * we have dirty pages.
 	 */
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
-	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 
 	/* We purge from 2 huge pages, each one 3 dirty continous segments.
 	 * For opt_process_madvise_max_batch = 2, that is
@@ -259,7 +250,6 @@ TEST_BEGIN(test_more_pages_than_batch_page_size) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
-	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge;
@@ -296,7 +286,6 @@ TEST_BEGIN(test_more_pages_than_batch_page_size) {
 	 * we have dirty pages.
 	 */
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
-	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 
 	/* We have page batch size = 1.
 	 * we have 5 * HP active pages, 3 * HP dirty pages
diff --git a/test/unit/hpa_vectorized_madvise_large_batch.c b/test/unit/hpa_vectorized_madvise_large_batch.c
index a5766620..c974500c 100644
--- a/test/unit/hpa_vectorized_madvise_large_batch.c
+++ b/test/unit/hpa_vectorized_madvise_large_batch.c
@@ -140,7 +140,6 @@ TEST_BEGIN(test_vectorized_purge) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
-	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge;

From 9528a2e2dd37154475b8a36186e62f32de17cf58 Mon Sep 17 00:00:00 2001
From: lexprfuncall <5360361+lexprfuncall@users.noreply.github.com>
Date: Mon, 4 Aug 2025 13:13:27 -0700
Subject: [PATCH 2525/2608] Use relaxed atomics to access the process madvise
 pid fd

Relaxed atomics already provide sequentially consistent access to single
location data structures.
---
 src/pages.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/pages.c b/src/pages.c
index 54678a38..076091e3 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -646,7 +646,7 @@ init_process_madvise(void) {
 static bool
 pages_purge_process_madvise_impl(
     void *vec, size_t vec_len, size_t total_bytes) {
-	int pid_fd = atomic_load_i(&process_madvise_pidfd, ATOMIC_SEQ_CST);
+	int pid_fd = atomic_load_i(&process_madvise_pidfd, ATOMIC_RELAXED);
 	while (pid_fd == -1) {
 		int newfd = syscall(SYS_pidfd_open, getpid(), 0);
 		if (newfd == -1) {
@@ -654,8 +654,8 @@ pages_purge_process_madvise_impl(
 		}
 		if (!atomic_compare_exchange_strong_i(&process_madvise_pidfd,
 						      &pid_fd, newfd,
-						      ATOMIC_SEQ_CST,
-						      ATOMIC_SEQ_CST)) {
+						      ATOMIC_RELAXED,
+						      ATOMIC_RELAXED)) {
 			/* Someone else set the fd, so we close ours */
 			assert(pid_fd != -1);
 			close(newfd);
@@ -671,9 +671,9 @@ pages_purge_process_madvise_impl(
 
 void pages_postfork_child(void) {
 	/* Reset the file descriptor we inherited from parent process */
-	int pid_fd = atomic_load_i(&process_madvise_pidfd, ATOMIC_SEQ_CST);
+	int pid_fd = atomic_load_i(&process_madvise_pidfd, ATOMIC_RELAXED);
 	if (pid_fd != -1) {
-		atomic_store_i(&process_madvise_pidfd, -1, ATOMIC_SEQ_CST);
+		atomic_store_i(&process_madvise_pidfd, -1, ATOMIC_RELAXED);
 		close(pid_fd);
 	}
 }

From d73de95f722247a56b5266a27267cd24668081e9 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Wed, 4 Jun 2025 09:48:14 -0700
Subject: [PATCH 2526/2608] Experimental configuration option for fast path
 prefetch from cache_bin

---
 configure.ac                                  | 30 +++++++++++++++++++
 .../internal/jemalloc_internal_defs.h.in      |  5 ++++
 .../internal/jemalloc_internal_inlines_c.h    |  6 ++++
 3 files changed, 41 insertions(+)

diff --git a/configure.ac b/configure.ac
index c703a6d1..d9153feb 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1434,6 +1434,36 @@ if test "x$enable_experimental_smallocx" = "x1" ; then
 fi
 AC_SUBST([enable_experimental_smallocx])
 
+dnl Do not enable fastpath prefetch by default.
+AC_ARG_ENABLE([experimental_fp_prefetch],
+  [AS_HELP_STRING([--enable-experimental-fp-prefetch], [Enable experimental fastpath prefetch])],
+[if test "x$enable_experimental_fp_prefetch" = "xno" ; then
+enable_experimental_fp_prefetch="0"
+else
+  dnl Check if we have __builtin_prefetch.
+  JE_CFLAGS_SAVE()
+  JE_CFLAGS_ADD([-Werror=implicit-function-declaration])
+  JE_COMPILABLE([builtin prefetch], [], [
+void foo(void *p) { __builtin_prefetch(p, 1, 3); }
+  	],
+	[je_cv_have_builtin_prefetch])
+
+	if test "x${je_cv_have_builtin_prefetch}" = "xyes" ; then
+	   enable_experimental_fp_prefetch="1"
+	else
+	   enable_experimental_fp_prefetch="0"
+	   AC_MSG_ERROR([--enable--experimental-fp-prefetch can only be used when builtin_preftech is available])
+	fi
+   JE_CFLAGS_RESTORE()
+fi
+],
+[enable_experimental_fp_prefetch="0"]
+)
+if test "x$enable_experimental_fp_prefetch" = "x1" ; then
+  AC_DEFINE([JEMALLOC_EXPERIMENTAL_FASTPATH_PREFETCH], [ ], [ ])
+fi
+AC_SUBST([enable_experimental_fp_prefetch])
+
 dnl Do not enable profiling by default.
 AC_ARG_ENABLE([prof],
   [AS_HELP_STRING([--enable-prof], [Enable allocation profiling])],
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 31ae2e8e..3a945ba1 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -160,6 +160,11 @@
 /* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */
 #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API
 
+/* JEMALLOC_EXPERIMENTAL_FASTPATH_PREFETCH enables prefetch
+ * on malloc fast path.
+ */
+#undef JEMALLOC_EXPERIMENTAL_FASTPATH_PREFETCH
+
 /* JEMALLOC_PROF enables allocation profiling. */
 #undef JEMALLOC_PROF
 
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 2c61f8c4..16f86ad4 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -374,6 +374,12 @@ imalloc_fastpath(size_t size, void *(fallback_alloc)(size_t)) {
 	 */
 	ret = cache_bin_alloc_easy(bin, &tcache_success);
 	if (tcache_success) {
+#if defined(JEMALLOC_EXPERIMENTAL_FASTPATH_PREFETCH)
+		cache_bin_sz_t lb = (cache_bin_sz_t)(uintptr_t)bin->stack_head;
+		if(likely(lb != bin->low_bits_empty)) {
+			util_prefetch_write_range(*(bin->stack_head), usize);
+		}
+#endif
 		fastpath_success_finish(tsd, allocated_after, bin, ret);
 		return ret;
 	}

From e4fa33148a4e93275dac0f306d8759c89597d55f Mon Sep 17 00:00:00 2001
From: lexprfuncall <5360361+lexprfuncall@users.noreply.github.com>
Date: Wed, 20 Aug 2025 16:30:00 -0700
Subject: [PATCH 2527/2608] Remove an unused function and global variable

When the dehugify functionality was retired in an previous commit, a
dehugify-related function and global variable in a test was
accidentally left in-place causing builds that add -Werror to CFLAGS
to fail.
---
 test/unit/hpa_vectorized_madvise_large_batch.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/test/unit/hpa_vectorized_madvise_large_batch.c b/test/unit/hpa_vectorized_madvise_large_batch.c
index c974500c..e1393225 100644
--- a/test/unit/hpa_vectorized_madvise_large_batch.c
+++ b/test/unit/hpa_vectorized_madvise_large_batch.c
@@ -113,12 +113,6 @@ defer_test_hugify(void *ptr, size_t size, bool sync) {
 	return false;
 }
 
-static size_t ndefer_dehugify_calls = 0;
-static void
-defer_test_dehugify(void *ptr, size_t size) {
-	++ndefer_dehugify_calls;
-}
-
 static nstime_t defer_curtime;
 static void
 defer_test_curtime(nstime_t *r_time, bool first_reading) {

From 5e98585b37556cdb762e36f02b657742b8c47fe3 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Wed, 13 Aug 2025 17:59:36 -0700
Subject: [PATCH 2528/2608] Save and restore errno when calling process_madvise

---
 src/pages.c | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/src/pages.c b/src/pages.c
index 076091e3..78f3a1b7 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -622,6 +622,7 @@ pages_dodump(void *addr, size_t size) {
 #	include <sys/mman.h>
 #	include <sys/syscall.h>
 static atomic_i_t process_madvise_pidfd = ATOMIC_INIT(-1);
+static atomic_b_t process_madvise_gate = ATOMIC_INIT(true);
 
 static bool
 init_process_madvise(void) {
@@ -646,9 +647,12 @@ init_process_madvise(void) {
 static bool
 pages_purge_process_madvise_impl(
     void *vec, size_t vec_len, size_t total_bytes) {
+	if (!atomic_load_b(&process_madvise_gate, ATOMIC_RELAXED)) {
+		return true;
+	}
 	int pid_fd = atomic_load_i(&process_madvise_pidfd, ATOMIC_RELAXED);
 	while (pid_fd == -1) {
-		int newfd = syscall(SYS_pidfd_open, getpid(), 0);
+		int newfd = (int) syscall(SYS_pidfd_open, getpid(), 0);
 		if (newfd == -1) {
 			return true;
 		}
@@ -663,8 +667,22 @@ pages_purge_process_madvise_impl(
 			pid_fd = newfd;
 		}
 	}
+
+	/*
+	 * TODO: remove this save/restore of errno after supporting errno
+	 * preservation for free() call properly.
+	 */
+	int saved_errno = get_errno();
 	size_t purged_bytes = (size_t)syscall(JE_SYS_PROCESS_MADVISE_NR, pid_fd,
 	    (struct iovec *)vec, vec_len, MADV_DONTNEED, 0);
+	if (purged_bytes == (size_t) -1) {
+		if (errno == EPERM || errno == EINVAL || errno == ENOSYS) {
+			/* Process madvise not supported the way we need it. */
+			atomic_store_b(&process_madvise_gate, false,
+				       ATOMIC_RELAXED);
+		}
+		set_errno(saved_errno);
+	}
 
 	return purged_bytes != total_bytes;
 }

From ced8b3cffb650af8b7bef7f6995b9032b55aeb0b Mon Sep 17 00:00:00 2001
From: lexprfuncall <5360361+lexprfuncall@users.noreply.github.com>
Date: Thu, 21 Aug 2025 11:16:33 -0700
Subject: [PATCH 2529/2608] Fix the compilation check for process madvise

An include of unistd.h is needed to make the declaration of the
syscall function visible to the compiler.  The include of sys/mman.h
is not used at all.
---
 configure.ac | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index d9153feb..ce5c8adc 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2633,8 +2633,8 @@ if test "x${je_cv_madvise}" = "xyes" ; then
 
   dnl Check for process_madvise
   JE_COMPILABLE([process_madvise(2)], [
-#include <sys/mman.h>
 #include <sys/syscall.h>
+#include <unistd.h>
 ], [
 	syscall(SYS_process_madvise, 0, (void *)0, 0, 0, 0);
 ], [je_cv_process_madvise])

From 2114349a4e9933ebff87df01572a94a12eca5d86 Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Tue, 15 Jul 2025 15:44:14 -0700
Subject: [PATCH 2530/2608] Revert PR #2608: Manually revert commits
 70c94d..f9c0b5

Closes: #2707
---
 Makefile.in                                   |   3 -
 include/jemalloc/internal/arena_inlines_b.h   | 154 +---------
 include/jemalloc/internal/arena_structs.h     |   2 +-
 include/jemalloc/internal/batcher.h           |  46 ---
 include/jemalloc/internal/bin.h               |  74 +----
 include/jemalloc/internal/bin_info.h          |  11 -
 include/jemalloc/internal/bin_stats.h         |   5 -
 include/jemalloc/internal/witness.h           |   3 +-
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |   3 +-
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |   5 +-
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |   3 +-
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |   5 +-
 .../projects/vc2019/jemalloc/jemalloc.vcxproj |   3 +-
 .../vc2019/jemalloc/jemalloc.vcxproj.filters  |   5 +-
 .../projects/vc2022/jemalloc/jemalloc.vcxproj |   3 +-
 .../vc2022/jemalloc/jemalloc.vcxproj.filters  |   5 +-
 src/arena.c                                   |  89 ++----
 src/batcher.c                                 |  98 -------
 src/bin.c                                     |  48 +---
 src/bin_info.c                                |  24 --
 src/ctl.c                                     |  37 ---
 src/jemalloc.c                                |  14 -
 src/stats.c                                   |  58 +---
 src/tcache.c                                  | 194 +++----------
 test/analyze/sizes.c                          |   2 -
 test/include/test/fork.h                      |  34 ---
 test/unit/batcher.c                           | 243 ----------------
 test/unit/bin_batching.c                      | 270 ------------------
 test/unit/bin_batching.sh                     |  10 -
 test/unit/fork.c                              |  37 ++-
 30 files changed, 124 insertions(+), 1364 deletions(-)
 delete mode 100644 include/jemalloc/internal/batcher.h
 delete mode 100644 src/batcher.c
 delete mode 100644 test/include/test/fork.h
 delete mode 100644 test/unit/batcher.c
 delete mode 100644 test/unit/bin_batching.c
 delete mode 100644 test/unit/bin_batching.sh

diff --git a/Makefile.in b/Makefile.in
index 2519ed83..4e9d0bea 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -98,7 +98,6 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/arena.c \
 	$(srcroot)src/background_thread.c \
 	$(srcroot)src/base.c \
-	$(srcroot)src/batcher.c \
 	$(srcroot)src/bin.c \
 	$(srcroot)src/bin_info.c \
 	$(srcroot)src/bitmap.c \
@@ -208,8 +207,6 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/background_thread_enable.c \
 	$(srcroot)test/unit/base.c \
 	$(srcroot)test/unit/batch_alloc.c \
-	$(srcroot)test/unit/batcher.c \
-	$(srcroot)test/unit/bin_batching.c \
 	$(srcroot)test/unit/binshard.c \
 	$(srcroot)test/unit/bitmap.c \
 	$(srcroot)test/unit/bit_util.c \
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 549dfb8a..6276deaa 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -588,11 +588,10 @@ arena_dalloc_bin_locked_begin(
  * stats updates, which happen during finish (this lets running counts get left
  * in a register).
  */
-JEMALLOC_ALWAYS_INLINE void
+JEMALLOC_ALWAYS_INLINE bool
 arena_dalloc_bin_locked_step(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
     arena_dalloc_bin_locked_info_t *info, szind_t binind, edata_t *slab,
-    void *ptr, edata_t **dalloc_slabs, unsigned ndalloc_slabs,
-    unsigned *dalloc_slabs_count, edata_list_active_t *dalloc_slabs_extra) {
+    void *ptr) {
 	const bin_info_t *bin_info = &bin_infos[binind];
 	size_t            regind = arena_slab_regind(info, binind, slab, ptr);
 	slab_data_t      *slab_data = edata_slab_data_get(slab);
@@ -612,17 +611,12 @@ arena_dalloc_bin_locked_step(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 	if (nfree == bin_info->nregs) {
 		arena_dalloc_bin_locked_handle_newly_empty(
 		    tsdn, arena, slab, bin);
-
-		if (*dalloc_slabs_count < ndalloc_slabs) {
-			dalloc_slabs[*dalloc_slabs_count] = slab;
-			(*dalloc_slabs_count)++;
-		} else {
-			edata_list_active_append(dalloc_slabs_extra, slab);
-		}
+		return true;
 	} else if (nfree == 1 && slab != bin->slabcur) {
 		arena_dalloc_bin_locked_handle_newly_nonempty(
 		    tsdn, arena, slab, bin);
 	}
+	return false;
 }
 
 JEMALLOC_ALWAYS_INLINE void
@@ -635,148 +629,10 @@ arena_dalloc_bin_locked_finish(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 	}
 }
 
-JEMALLOC_ALWAYS_INLINE void
-arena_bin_flush_batch_impl(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    arena_dalloc_bin_locked_info_t *dalloc_bin_info, unsigned binind,
-    edata_t **dalloc_slabs, unsigned ndalloc_slabs, unsigned *dalloc_count,
-    edata_list_active_t *dalloc_slabs_extra) {
-	assert(binind < bin_info_nbatched_sizes);
-	bin_with_batch_t *batched_bin = (bin_with_batch_t *)bin;
-	size_t            nelems_to_pop = batcher_pop_begin(
-            tsdn, &batched_bin->remote_frees);
-
-	bin_batching_test_mid_pop(nelems_to_pop);
-	if (nelems_to_pop == BATCHER_NO_IDX) {
-		malloc_mutex_assert_not_owner(
-		    tsdn, &batched_bin->remote_frees.mtx);
-		return;
-	} else {
-		malloc_mutex_assert_owner(tsdn, &batched_bin->remote_frees.mtx);
-	}
-
-	size_t npushes = batcher_pop_get_pushes(
-	    tsdn, &batched_bin->remote_frees);
-	bin_remote_free_data_t remote_free_data[BIN_REMOTE_FREE_ELEMS_MAX];
-	for (size_t i = 0; i < nelems_to_pop; i++) {
-		remote_free_data[i] = batched_bin->remote_free_data[i];
-	}
-	batcher_pop_end(tsdn, &batched_bin->remote_frees);
-
-	for (size_t i = 0; i < nelems_to_pop; i++) {
-		arena_dalloc_bin_locked_step(tsdn, arena, bin, dalloc_bin_info,
-		    binind, remote_free_data[i].slab, remote_free_data[i].ptr,
-		    dalloc_slabs, ndalloc_slabs, dalloc_count,
-		    dalloc_slabs_extra);
-	}
-
-	bin->stats.batch_pops++;
-	bin->stats.batch_pushes += npushes;
-	bin->stats.batch_pushed_elems += nelems_to_pop;
-}
-
-typedef struct arena_bin_flush_batch_state_s arena_bin_flush_batch_state_t;
-struct arena_bin_flush_batch_state_s {
-	arena_dalloc_bin_locked_info_t info;
-
-	/*
-	 * Bin batching is subtle in that there are unusual edge cases in which
-	 * it can trigger the deallocation of more slabs than there were items
-	 * flushed (say, if every original deallocation triggered a slab
-	 * deallocation, and so did every batched one).  So we keep a small
-	 * backup array for any "extra" slabs, as well as a a list to allow a
-	 * dynamic number of ones exceeding that array.
-	 */
-	edata_t            *dalloc_slabs[8];
-	unsigned            dalloc_slab_count;
-	edata_list_active_t dalloc_slabs_extra;
-};
-
-JEMALLOC_ALWAYS_INLINE unsigned
-arena_bin_batch_get_ndalloc_slabs(unsigned preallocated_slabs) {
-	if (preallocated_slabs > bin_batching_test_ndalloc_slabs_max) {
-		return bin_batching_test_ndalloc_slabs_max;
-	}
-	return preallocated_slabs;
-}
-
-JEMALLOC_ALWAYS_INLINE void
-arena_bin_flush_batch_after_lock(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    unsigned binind, arena_bin_flush_batch_state_t *state) {
-	if (binind >= bin_info_nbatched_sizes) {
-		return;
-	}
-
-	arena_dalloc_bin_locked_begin(&state->info, binind);
-	state->dalloc_slab_count = 0;
-	edata_list_active_init(&state->dalloc_slabs_extra);
-
-	unsigned preallocated_slabs = (unsigned)(sizeof(state->dalloc_slabs)
-	    / sizeof(state->dalloc_slabs[0]));
-	unsigned ndalloc_slabs = arena_bin_batch_get_ndalloc_slabs(
-	    preallocated_slabs);
-
-	arena_bin_flush_batch_impl(tsdn, arena, bin, &state->info, binind,
-	    state->dalloc_slabs, ndalloc_slabs, &state->dalloc_slab_count,
-	    &state->dalloc_slabs_extra);
-}
-
-JEMALLOC_ALWAYS_INLINE void
-arena_bin_flush_batch_before_unlock(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    unsigned binind, arena_bin_flush_batch_state_t *state) {
-	if (binind >= bin_info_nbatched_sizes) {
-		return;
-	}
-
-	arena_dalloc_bin_locked_finish(tsdn, arena, bin, &state->info);
-}
-
-static inline bool
-arena_bin_has_batch(szind_t binind) {
-	return binind < bin_info_nbatched_sizes;
-}
-
-JEMALLOC_ALWAYS_INLINE void
-arena_bin_flush_batch_after_unlock(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    unsigned binind, arena_bin_flush_batch_state_t *state) {
-	if (!arena_bin_has_batch(binind)) {
-		return;
-	}
-	/*
-	 * The initialization of dalloc_slabs_extra is guarded by an
-	 * arena_bin_has_batch check higher up the stack.  But the clang
-	 * analyzer forgets this down the stack, triggering a spurious error
-	 * reported here.
-	 */
-	JEMALLOC_CLANG_ANALYZER_SUPPRESS {
-		bin_batching_test_after_unlock(state->dalloc_slab_count,
-		    edata_list_active_empty(&state->dalloc_slabs_extra));
-	}
-	for (unsigned i = 0; i < state->dalloc_slab_count; i++) {
-		edata_t *slab = state->dalloc_slabs[i];
-		arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
-	}
-	while (!edata_list_active_empty(&state->dalloc_slabs_extra)) {
-		edata_t *slab = edata_list_active_first(
-		    &state->dalloc_slabs_extra);
-		edata_list_active_remove(&state->dalloc_slabs_extra, slab);
-		arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
-	}
-}
-
 static inline bin_t *
 arena_get_bin(arena_t *arena, szind_t binind, unsigned binshard) {
 	bin_t *shard0 = (bin_t *)((byte_t *)arena + arena_bin_offsets[binind]);
-	bin_t *ret;
-	if (arena_bin_has_batch(binind)) {
-		ret = (bin_t *)((bin_with_batch_t *)shard0 + binshard);
-	} else {
-		ret = shard0 + binshard;
-	}
-	assert(binind >= SC_NBINS - 1
-	    || (uintptr_t)ret
-	        < (uintptr_t)arena + arena_bin_offsets[binind + 1]);
-
-	return ret;
+	return shard0 + binshard;
 }
 
 #endif /* JEMALLOC_INTERNAL_ARENA_INLINES_B_H */
diff --git a/include/jemalloc/internal/arena_structs.h b/include/jemalloc/internal/arena_structs.h
index 4778ca1b..471f7692 100644
--- a/include/jemalloc/internal/arena_structs.h
+++ b/include/jemalloc/internal/arena_structs.h
@@ -105,7 +105,7 @@ struct arena_s {
 	    "Do not use this field directly. "
 	    "Use `arena_get_bin` instead.")
 	JEMALLOC_ALIGNED(CACHELINE)
-	bin_with_batch_t all_bins[0];
+	bin_t all_bins[0];
 };
 
 #endif /* JEMALLOC_INTERNAL_ARENA_STRUCTS_H */
diff --git a/include/jemalloc/internal/batcher.h b/include/jemalloc/internal/batcher.h
deleted file mode 100644
index 3ceb8256..00000000
--- a/include/jemalloc/internal/batcher.h
+++ /dev/null
@@ -1,46 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_BATCHER_H
-#define JEMALLOC_INTERNAL_BATCHER_H
-
-#include "jemalloc/internal/jemalloc_preamble.h"
-#include "jemalloc/internal/atomic.h"
-#include "jemalloc/internal/mutex.h"
-
-#define BATCHER_NO_IDX ((size_t) - 1)
-
-typedef struct batcher_s batcher_t;
-struct batcher_s {
-	/*
-	 * Optimize for locality -- nelems_max and nelems are always touched
-	 * togehter, along with the front of the mutex. The end of the mutex is
-	 * only touched if there's contention.
-	 */
-	atomic_zu_t    nelems;
-	size_t         nelems_max;
-	size_t         npushes;
-	malloc_mutex_t mtx;
-};
-
-void batcher_init(batcher_t *batcher, size_t nelems_max);
-
-/*
- * Returns an index (into some user-owned array) to use for pushing, or
- * BATCHER_NO_IDX if no index is free.  If the former, the caller must call
- * batcher_push_end once done.
- */
-size_t batcher_push_begin(
-    tsdn_t *tsdn, batcher_t *batcher, size_t elems_to_push);
-void batcher_push_end(tsdn_t *tsdn, batcher_t *batcher);
-
-/*
- * Returns the number of items to pop, or BATCHER_NO_IDX if there are none.
- * If the former, must be followed by a call to batcher_pop_end.
- */
-size_t batcher_pop_begin(tsdn_t *tsdn, batcher_t *batcher);
-size_t batcher_pop_get_pushes(tsdn_t *tsdn, batcher_t *batcher);
-void   batcher_pop_end(tsdn_t *tsdn, batcher_t *batcher);
-
-void batcher_prefork(tsdn_t *tsdn, batcher_t *batcher);
-void batcher_postfork_parent(tsdn_t *tsdn, batcher_t *batcher);
-void batcher_postfork_child(tsdn_t *tsdn, batcher_t *batcher);
-
-#endif /* JEMALLOC_INTERNAL_BATCHER_H */
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index e91583d7..05a2f845 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -2,60 +2,12 @@
 #define JEMALLOC_INTERNAL_BIN_H
 
 #include "jemalloc/internal/jemalloc_preamble.h"
-#include "jemalloc/internal/batcher.h"
 #include "jemalloc/internal/bin_stats.h"
 #include "jemalloc/internal/bin_types.h"
 #include "jemalloc/internal/edata.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/sc.h"
 
-#define BIN_REMOTE_FREE_ELEMS_MAX 16
-
-#ifdef JEMALLOC_JET
-extern void (*bin_batching_test_after_push_hook)(size_t idx);
-extern void (*bin_batching_test_mid_pop_hook)(size_t elems_to_pop);
-extern void (*bin_batching_test_after_unlock_hook)(
-    unsigned slab_dalloc_count, bool list_empty);
-#endif
-
-#ifdef JEMALLOC_JET
-extern unsigned bin_batching_test_ndalloc_slabs_max;
-#else
-static const unsigned bin_batching_test_ndalloc_slabs_max = (unsigned)-1;
-#endif
-
-JEMALLOC_ALWAYS_INLINE void
-bin_batching_test_after_push(size_t idx) {
-	(void)idx;
-#ifdef JEMALLOC_JET
-	if (bin_batching_test_after_push_hook != NULL) {
-		bin_batching_test_after_push_hook(idx);
-	}
-#endif
-}
-
-JEMALLOC_ALWAYS_INLINE void
-bin_batching_test_mid_pop(size_t elems_to_pop) {
-	(void)elems_to_pop;
-#ifdef JEMALLOC_JET
-	if (bin_batching_test_mid_pop_hook != NULL) {
-		bin_batching_test_mid_pop_hook(elems_to_pop);
-	}
-#endif
-}
-
-JEMALLOC_ALWAYS_INLINE void
-bin_batching_test_after_unlock(unsigned slab_dalloc_count, bool list_empty) {
-	(void)slab_dalloc_count;
-	(void)list_empty;
-#ifdef JEMALLOC_JET
-	if (bin_batching_test_after_unlock_hook != NULL) {
-		bin_batching_test_after_unlock_hook(
-		    slab_dalloc_count, list_empty);
-	}
-#endif
-}
-
 /*
  * A bin contains a set of extents that are currently being used for slab
  * allocations.
@@ -90,19 +42,6 @@ struct bin_s {
 	edata_list_active_t slabs_full;
 };
 
-typedef struct bin_remote_free_data_s bin_remote_free_data_t;
-struct bin_remote_free_data_s {
-	void    *ptr;
-	edata_t *slab;
-};
-
-typedef struct bin_with_batch_s bin_with_batch_t;
-struct bin_with_batch_s {
-	bin_t                  bin;
-	batcher_t              remote_frees;
-	bin_remote_free_data_t remote_free_data[BIN_REMOTE_FREE_ELEMS_MAX];
-};
-
 /* A set of sharded bins of the same size class. */
 typedef struct bins_s bins_t;
 struct bins_s {
@@ -115,12 +54,12 @@ bool bin_update_shard_size(unsigned bin_shards[SC_NBINS], size_t start_size,
     size_t end_size, size_t nshards);
 
 /* Initializes a bin to empty.  Returns true on error. */
-bool bin_init(bin_t *bin, unsigned binind);
+bool bin_init(bin_t *bin);
 
 /* Forking. */
-void bin_prefork(tsdn_t *tsdn, bin_t *bin, bool has_batch);
-void bin_postfork_parent(tsdn_t *tsdn, bin_t *bin, bool has_batch);
-void bin_postfork_child(tsdn_t *tsdn, bin_t *bin, bool has_batch);
+void bin_prefork(tsdn_t *tsdn, bin_t *bin);
+void bin_postfork_parent(tsdn_t *tsdn, bin_t *bin);
+void bin_postfork_child(tsdn_t *tsdn, bin_t *bin);
 
 /* Stats. */
 static inline void
@@ -138,11 +77,6 @@ bin_stats_merge(tsdn_t *tsdn, bin_stats_data_t *dst_bin_stats, bin_t *bin) {
 	stats->reslabs += bin->stats.reslabs;
 	stats->curslabs += bin->stats.curslabs;
 	stats->nonfull_slabs += bin->stats.nonfull_slabs;
-
-	stats->batch_failed_pushes += bin->stats.batch_failed_pushes;
-	stats->batch_pushes += bin->stats.batch_pushes;
-	stats->batch_pushed_elems += bin->stats.batch_pushed_elems;
-
 	malloc_mutex_unlock(tsdn, &bin->lock);
 }
 
diff --git a/include/jemalloc/internal/bin_info.h b/include/jemalloc/internal/bin_info.h
index 0022c3f7..8c563dee 100644
--- a/include/jemalloc/internal/bin_info.h
+++ b/include/jemalloc/internal/bin_info.h
@@ -44,17 +44,6 @@ struct bin_info_s {
 	bitmap_info_t bitmap_info;
 };
 
-/* The maximum size a size class can be and still get batching behavior. */
-extern size_t opt_bin_info_max_batched_size;
-/* The number of batches per batched size class. */
-extern size_t opt_bin_info_remote_free_max_batch;
-// The max number of pending elems (across all batches)
-extern size_t opt_bin_info_remote_free_max;
-
-extern szind_t  bin_info_nbatched_sizes;
-extern unsigned bin_info_nbatched_bins;
-extern unsigned bin_info_nunbatched_bins;
-
 extern bin_info_t bin_infos[SC_NBINS];
 
 void bin_info_boot(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS]);
diff --git a/include/jemalloc/internal/bin_stats.h b/include/jemalloc/internal/bin_stats.h
index e1095f38..9900e0d1 100644
--- a/include/jemalloc/internal/bin_stats.h
+++ b/include/jemalloc/internal/bin_stats.h
@@ -48,11 +48,6 @@ struct bin_stats_s {
 
 	/* Current size of nonfull slabs heap in this bin. */
 	size_t nonfull_slabs;
-
-	uint64_t batch_pops;
-	uint64_t batch_failed_pushes;
-	uint64_t batch_pushes;
-	uint64_t batch_pushed_elems;
 };
 
 typedef struct bin_stats_data_s bin_stats_data_t;
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 73770713..7ca3c347 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -64,10 +64,9 @@ enum witness_rank_e {
 	WITNESS_RANK_BASE,
 	WITNESS_RANK_ARENA_LARGE,
 	WITNESS_RANK_HOOK,
-	WITNESS_RANK_BIN,
 
 	WITNESS_RANK_LEAF = 0x1000,
-	WITNESS_RANK_BATCHER = WITNESS_RANK_LEAF,
+	WITNESS_RANK_BIN = WITNESS_RANK_LEAF,
 	WITNESS_RANK_ARENA_STATS = WITNESS_RANK_LEAF,
 	WITNESS_RANK_COUNTER_ACCUM = WITNESS_RANK_LEAF,
 	WITNESS_RANK_DSS = WITNESS_RANK_LEAF,
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index c43b30b1..9743e10b 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -38,7 +38,6 @@
     <ClCompile Include="..\..\..\..\src\arena.c" />
     <ClCompile Include="..\..\..\..\src\background_thread.c" />
     <ClCompile Include="..\..\..\..\src\base.c" />
-    <ClCompile Include="..\..\..\..\src\batcher.c" />
     <ClCompile Include="..\..\..\..\src\bin.c" />
     <ClCompile Include="..\..\..\..\src\bin_info.c" />
     <ClCompile Include="..\..\..\..\src\bitmap.c" />
@@ -380,4 +379,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index f091475e..c8236a12 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -16,9 +16,6 @@
     <ClCompile Include="..\..\..\..\src\base.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\batcher.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\bin.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -203,4 +200,4 @@
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
-</Project>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index a195f6b3..c1ff11a9 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -38,7 +38,6 @@
     <ClCompile Include="..\..\..\..\src\arena.c" />
     <ClCompile Include="..\..\..\..\src\background_thread.c" />
     <ClCompile Include="..\..\..\..\src\base.c" />
-    <ClCompile Include="..\..\..\..\src\batcher.c" />
     <ClCompile Include="..\..\..\..\src\bin.c" />
     <ClCompile Include="..\..\..\..\src\bin_info.c" />
     <ClCompile Include="..\..\..\..\src\bitmap.c" />
@@ -379,4 +378,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index f091475e..c8236a12 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -16,9 +16,6 @@
     <ClCompile Include="..\..\..\..\src\base.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\batcher.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\bin.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -203,4 +200,4 @@
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
-</Project>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
index cd16005d..6cb1b35e 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
@@ -38,7 +38,6 @@
     <ClCompile Include="..\..\..\..\src\arena.c" />
     <ClCompile Include="..\..\..\..\src\background_thread.c" />
     <ClCompile Include="..\..\..\..\src\base.c" />
-    <ClCompile Include="..\..\..\..\src\batcher.c" />
     <ClCompile Include="..\..\..\..\src\bin.c" />
     <ClCompile Include="..\..\..\..\src\bin_info.c" />
     <ClCompile Include="..\..\..\..\src\bitmap.c" />
@@ -379,4 +378,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
index f091475e..c8236a12 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
@@ -16,9 +16,6 @@
     <ClCompile Include="..\..\..\..\src\base.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\batcher.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\bin.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -203,4 +200,4 @@
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
-</Project>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
index 2d8c4be6..5c7b00a2 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
@@ -38,7 +38,6 @@
     <ClCompile Include="..\..\..\..\src\arena.c" />
     <ClCompile Include="..\..\..\..\src\background_thread.c" />
     <ClCompile Include="..\..\..\..\src\base.c" />
-    <ClCompile Include="..\..\..\..\src\batcher.c" />
     <ClCompile Include="..\..\..\..\src\bin.c" />
     <ClCompile Include="..\..\..\..\src\bin_info.c" />
     <ClCompile Include="..\..\..\..\src\bitmap.c" />
@@ -379,4 +378,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
+</Project>
\ No newline at end of file
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
index f091475e..c8236a12 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
@@ -16,9 +16,6 @@
     <ClCompile Include="..\..\..\..\src\base.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\batcher.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\bin.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -203,4 +200,4 @@
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
-</Project>
+</Project>
\ No newline at end of file
diff --git a/src/arena.c b/src/arena.c
index 2f58b038..962a325d 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -39,7 +39,8 @@ div_info_t arena_binind_div_info[SC_NBINS];
 size_t opt_oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
 size_t oversize_threshold = OVERSIZE_THRESHOLD_DEFAULT;
 
-uint32_t arena_bin_offsets[SC_NBINS];
+uint32_t        arena_bin_offsets[SC_NBINS];
+static unsigned nbins_total;
 
 /*
  * a0 is used to handle huge requests before malloc init completes. After
@@ -674,17 +675,11 @@ arena_bin_slabs_full_remove(arena_t *arena, bin_t *bin, edata_t *slab) {
 }
 
 static void
-arena_bin_reset(tsd_t *tsd, arena_t *arena, bin_t *bin, unsigned binind) {
+arena_bin_reset(tsd_t *tsd, arena_t *arena, bin_t *bin) {
 	edata_t *slab;
 
 	malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 
-	if (arena_bin_has_batch(binind)) {
-		bin_with_batch_t *batched_bin = (bin_with_batch_t *)bin;
-		batcher_init(
-		    &batched_bin->remote_frees, BIN_REMOTE_FREE_ELEMS_MAX);
-	}
-
 	if (bin->slabcur != NULL) {
 		slab = bin->slabcur;
 		bin->slabcur = NULL;
@@ -835,8 +830,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 	/* Bins. */
 	for (unsigned i = 0; i < SC_NBINS; i++) {
 		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
-			arena_bin_reset(
-			    tsd, arena, arena_get_bin(arena, i, j), i);
+			arena_bin_reset(tsd, arena, arena_get_bin(arena, i, j));
 		}
 	}
 	pa_shard_reset(tsd_tsdn(tsd), &arena->pa_shard);
@@ -1103,19 +1097,8 @@ arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena, cache_bin_t *cache_bin,
 	unsigned       binshard;
 	bin_t         *bin = arena_bin_choose(tsdn, arena, binind, &binshard);
 
-	/*
-	 * This has some fields that are conditionally initialized down batch
-	 * flush pathways.  This can trigger static analysis warnings deeper
-	 * down in the static.  The accesses are guarded by the same checks as
-	 * the initialization, but the analysis isn't able to track that across
-	 * multiple stack frames.
-	 */
-	arena_bin_flush_batch_state_t batch_flush_state
-	    JEMALLOC_CLANG_ANALYZER_SILENCE_INIT({0});
 label_refill:
 	malloc_mutex_lock(tsdn, &bin->lock);
-	arena_bin_flush_batch_after_lock(
-	    tsdn, arena, bin, binind, &batch_flush_state);
 
 	while (filled < nfill_min) {
 		/* Try batch-fill from slabcur first. */
@@ -1176,11 +1159,7 @@ label_refill:
 		cache_bin->tstats.nrequests = 0;
 	}
 
-	arena_bin_flush_batch_before_unlock(
-	    tsdn, arena, bin, binind, &batch_flush_state);
 	malloc_mutex_unlock(tsdn, &bin->lock);
-	arena_bin_flush_batch_after_unlock(
-	    tsdn, arena, bin, binind, &batch_flush_state);
 
 	if (alloc_and_retry) {
 		assert(fresh_slab == NULL);
@@ -1474,16 +1453,12 @@ arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, edata_t *edata, void *ptr) {
 	malloc_mutex_lock(tsdn, &bin->lock);
 	arena_dalloc_bin_locked_info_t info;
 	arena_dalloc_bin_locked_begin(&info, binind);
-	edata_t *dalloc_slabs[1];
-	unsigned dalloc_slabs_count = 0;
-	arena_dalloc_bin_locked_step(tsdn, arena, bin, &info, binind, edata,
-	    ptr, dalloc_slabs, /* ndalloc_slabs */ 1, &dalloc_slabs_count,
-	    /* dalloc_slabs_extra */ NULL);
+	bool ret = arena_dalloc_bin_locked_step(
+	    tsdn, arena, bin, &info, binind, edata, ptr);
 	arena_dalloc_bin_locked_finish(tsdn, arena, bin, &info);
 	malloc_mutex_unlock(tsdn, &bin->lock);
 
-	if (dalloc_slabs_count != 0) {
-		assert(dalloc_slabs[0] == edata);
+	if (ret) {
 		arena_slab_dalloc(tsdn, arena, edata);
 	}
 }
@@ -1722,6 +1697,7 @@ arena_t *
 arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 	arena_t *arena;
 	base_t  *base;
+	unsigned i;
 
 	if (ind == 0) {
 		base = b0get();
@@ -1734,13 +1710,14 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 	}
 
 	size_t arena_size = ALIGNMENT_CEILING(sizeof(arena_t), CACHELINE)
-	    + sizeof(bin_with_batch_t) * bin_info_nbatched_bins
-	    + sizeof(bin_t) * bin_info_nunbatched_bins;
+	    + sizeof(bin_t) * nbins_total;
 	arena = (arena_t *)base_alloc(tsdn, base, arena_size, CACHELINE);
 	if (arena == NULL) {
 		goto label_error;
 	}
-
+	JEMALLOC_SUPPRESS_WARN_ON_USAGE(
+	    assert((uintptr_t)&arena->all_bins[nbins_total - 1] + sizeof(bin_t)
+	        <= (uintptr_t)arena + arena_size);)
 	atomic_store_u(&arena->nthreads[0], 0, ATOMIC_RELAXED);
 	atomic_store_u(&arena->nthreads[1], 0, ATOMIC_RELAXED);
 	arena->last_thd = NULL;
@@ -1779,13 +1756,11 @@ arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config) {
 
 	/* Initialize bins. */
 	atomic_store_u(&arena->binshard_next, 0, ATOMIC_RELEASE);
-	for (unsigned i = 0; i < SC_NBINS; i++) {
-		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
-			bin_t *bin = arena_get_bin(arena, i, j);
-			bool   err = bin_init(bin, i);
-			if (err) {
-				goto label_error;
-			}
+	for (i = 0; i < nbins_total; i++) {
+		JEMALLOC_SUPPRESS_WARN_ON_USAGE(
+		    bool err = bin_init(&arena->all_bins[i]);)
+		if (err) {
+			goto label_error;
 		}
 	}
 
@@ -1943,10 +1918,8 @@ arena_boot(sc_data_t *sc_data, base_t *base, bool hpa) {
 	    uint32_t cur_offset = (uint32_t)offsetof(arena_t, all_bins);)
 	for (szind_t i = 0; i < SC_NBINS; i++) {
 		arena_bin_offsets[i] = cur_offset;
-		uint32_t bin_sz = (i < bin_info_nbatched_sizes
-		        ? sizeof(bin_with_batch_t)
-		        : sizeof(bin_t));
-		cur_offset += (uint32_t)bin_infos[i].n_shards * bin_sz;
+		nbins_total += bin_infos[i].n_shards;
+		cur_offset += (uint32_t)(bin_infos[i].n_shards * sizeof(bin_t));
 	}
 	return pa_central_init(
 	    &arena_pa_central_global, base, hpa, &hpa_hooks_default);
@@ -1996,21 +1969,17 @@ arena_prefork7(tsdn_t *tsdn, arena_t *arena) {
 
 void
 arena_prefork8(tsdn_t *tsdn, arena_t *arena) {
-	for (szind_t i = 0; i < SC_NBINS; i++) {
-		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
-			bin_t *bin = arena_get_bin(arena, i, j);
-			bin_prefork(tsdn, bin, arena_bin_has_batch(i));
-		}
+	for (unsigned i = 0; i < nbins_total; i++) {
+		JEMALLOC_SUPPRESS_WARN_ON_USAGE(
+		    bin_prefork(tsdn, &arena->all_bins[i]);)
 	}
 }
 
 void
 arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) {
-	for (szind_t i = 0; i < SC_NBINS; i++) {
-		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
-			bin_t *bin = arena_get_bin(arena, i, j);
-			bin_postfork_parent(tsdn, bin, arena_bin_has_batch(i));
-		}
+	for (unsigned i = 0; i < nbins_total; i++) {
+		JEMALLOC_SUPPRESS_WARN_ON_USAGE(
+		    bin_postfork_parent(tsdn, &arena->all_bins[i]);)
 	}
 
 	malloc_mutex_postfork_parent(tsdn, &arena->large_mtx);
@@ -2047,11 +2016,9 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) {
 		}
 	}
 
-	for (szind_t i = 0; i < SC_NBINS; i++) {
-		for (unsigned j = 0; j < bin_infos[i].n_shards; j++) {
-			bin_t *bin = arena_get_bin(arena, i, j);
-			bin_postfork_child(tsdn, bin, arena_bin_has_batch(i));
-		}
+	for (unsigned i = 0; i < nbins_total; i++) {
+		JEMALLOC_SUPPRESS_WARN_ON_USAGE(
+		    bin_postfork_child(tsdn, &arena->all_bins[i]);)
 	}
 
 	malloc_mutex_postfork_child(tsdn, &arena->large_mtx);
diff --git a/src/batcher.c b/src/batcher.c
deleted file mode 100644
index af71dae5..00000000
--- a/src/batcher.c
+++ /dev/null
@@ -1,98 +0,0 @@
-#include "jemalloc/internal/jemalloc_preamble.h"
-
-#include "jemalloc/internal/batcher.h"
-
-#include "jemalloc/internal/assert.h"
-#include "jemalloc/internal/atomic.h"
-
-void
-batcher_init(batcher_t *batcher, size_t nelems_max) {
-	atomic_store_zu(&batcher->nelems, 0, ATOMIC_RELAXED);
-	batcher->nelems_max = nelems_max;
-	batcher->npushes = 0;
-	malloc_mutex_init(&batcher->mtx, "batcher", WITNESS_RANK_BATCHER,
-	    malloc_mutex_rank_exclusive);
-}
-
-/*
- * Returns an index (into some user-owned array) to use for pushing, or
- * BATCHER_NO_IDX if no index is free.
- */
-size_t
-batcher_push_begin(tsdn_t *tsdn, batcher_t *batcher, size_t elems_to_push) {
-	assert(elems_to_push > 0);
-	size_t nelems_guess = atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED);
-	if (nelems_guess + elems_to_push > batcher->nelems_max) {
-		return BATCHER_NO_IDX;
-	}
-	malloc_mutex_lock(tsdn, &batcher->mtx);
-	size_t nelems = atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED);
-	if (nelems + elems_to_push > batcher->nelems_max) {
-		malloc_mutex_unlock(tsdn, &batcher->mtx);
-		return BATCHER_NO_IDX;
-	}
-	assert(elems_to_push <= batcher->nelems_max - nelems);
-	/*
-	 * We update nelems at push time (instead of during pop) so that other
-	 * racing accesses of the batcher can fail fast instead of trying to
-	 * acquire a mutex only to discover that there's no space for them.
-	 */
-	atomic_store_zu(
-	    &batcher->nelems, nelems + elems_to_push, ATOMIC_RELAXED);
-	batcher->npushes++;
-	return nelems;
-}
-
-size_t
-batcher_pop_get_pushes(tsdn_t *tsdn, batcher_t *batcher) {
-	malloc_mutex_assert_owner(tsdn, &batcher->mtx);
-	size_t npushes = batcher->npushes;
-	batcher->npushes = 0;
-	return npushes;
-}
-
-void
-batcher_push_end(tsdn_t *tsdn, batcher_t *batcher) {
-	malloc_mutex_assert_owner(tsdn, &batcher->mtx);
-	assert(atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED) > 0);
-	malloc_mutex_unlock(tsdn, &batcher->mtx);
-}
-
-size_t
-batcher_pop_begin(tsdn_t *tsdn, batcher_t *batcher) {
-	size_t nelems_guess = atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED);
-	assert(nelems_guess <= batcher->nelems_max);
-	if (nelems_guess == 0) {
-		return BATCHER_NO_IDX;
-	}
-	malloc_mutex_lock(tsdn, &batcher->mtx);
-	size_t nelems = atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED);
-	assert(nelems <= batcher->nelems_max);
-	if (nelems == 0) {
-		malloc_mutex_unlock(tsdn, &batcher->mtx);
-		return BATCHER_NO_IDX;
-	}
-	atomic_store_zu(&batcher->nelems, 0, ATOMIC_RELAXED);
-	return nelems;
-}
-
-void
-batcher_pop_end(tsdn_t *tsdn, batcher_t *batcher) {
-	assert(atomic_load_zu(&batcher->nelems, ATOMIC_RELAXED) == 0);
-	malloc_mutex_unlock(tsdn, &batcher->mtx);
-}
-
-void
-batcher_prefork(tsdn_t *tsdn, batcher_t *batcher) {
-	malloc_mutex_prefork(tsdn, &batcher->mtx);
-}
-
-void
-batcher_postfork_parent(tsdn_t *tsdn, batcher_t *batcher) {
-	malloc_mutex_postfork_parent(tsdn, &batcher->mtx);
-}
-
-void
-batcher_postfork_child(tsdn_t *tsdn, batcher_t *batcher) {
-	malloc_mutex_postfork_child(tsdn, &batcher->mtx);
-}
diff --git a/src/bin.c b/src/bin.c
index 98d1da02..a11b108e 100644
--- a/src/bin.c
+++ b/src/bin.c
@@ -6,14 +6,6 @@
 #include "jemalloc/internal/sc.h"
 #include "jemalloc/internal/witness.h"
 
-#ifdef JEMALLOC_JET
-unsigned bin_batching_test_ndalloc_slabs_max = (unsigned)-1;
-void (*bin_batching_test_after_push_hook)(size_t push_idx);
-void (*bin_batching_test_mid_pop_hook)(size_t nelems_to_pop);
-void (*bin_batching_test_after_unlock_hook)(
-    unsigned slab_dalloc_count, bool list_empty);
-#endif
-
 bool
 bin_update_shard_size(unsigned bin_shard_sizes[SC_NBINS], size_t start_size,
     size_t end_size, size_t nshards) {
@@ -47,7 +39,7 @@ bin_shard_sizes_boot(unsigned bin_shard_sizes[SC_NBINS]) {
 }
 
 bool
-bin_init(bin_t *bin, unsigned binind) {
+bin_init(bin_t *bin) {
 	if (malloc_mutex_init(&bin->lock, "bin", WITNESS_RANK_BIN,
 	        malloc_mutex_rank_exclusive)) {
 		return true;
@@ -58,52 +50,20 @@ bin_init(bin_t *bin, unsigned binind) {
 	if (config_stats) {
 		memset(&bin->stats, 0, sizeof(bin_stats_t));
 	}
-	if (arena_bin_has_batch(binind)) {
-		bin_with_batch_t *batched_bin = (bin_with_batch_t *)bin;
-		batcher_init(
-		    &batched_bin->remote_frees, opt_bin_info_remote_free_max);
-	}
 	return false;
 }
 
 void
-bin_prefork(tsdn_t *tsdn, bin_t *bin, bool has_batch) {
+bin_prefork(tsdn_t *tsdn, bin_t *bin) {
 	malloc_mutex_prefork(tsdn, &bin->lock);
-	if (has_batch) {
-		/*
-		 * The batch mutex has lower rank than the bin mutex (as it must
-		 * -- it's acquired later).  But during forking, we go
-		 *  bin-at-a-time, so that we acquire mutex on bin 0, then on
-		 *  the bin 0 batcher, then on bin 1.  This is a safe ordering
-		 *  (it's ordered by the index of arenas and bins within those
-		 *  arenas), but will trigger witness errors that would
-		 *  otherwise force another level of arena forking that breaks
-		 *  bin encapsulation (because the witness API doesn't "know"
-		 *  about arena or bin ordering -- it just sees that the batcher
-		 *  has a lower rank than the bin).  So instead we exclude the
-		 *  batcher mutex from witness checking during fork (which is
-		 *  the only time we touch multiple bins at once) by passing
-		 *  TSDN_NULL.
-		 */
-		bin_with_batch_t *batched = (bin_with_batch_t *)bin;
-		batcher_prefork(TSDN_NULL, &batched->remote_frees);
-	}
 }
 
 void
-bin_postfork_parent(tsdn_t *tsdn, bin_t *bin, bool has_batch) {
+bin_postfork_parent(tsdn_t *tsdn, bin_t *bin) {
 	malloc_mutex_postfork_parent(tsdn, &bin->lock);
-	if (has_batch) {
-		bin_with_batch_t *batched = (bin_with_batch_t *)bin;
-		batcher_postfork_parent(TSDN_NULL, &batched->remote_frees);
-	}
 }
 
 void
-bin_postfork_child(tsdn_t *tsdn, bin_t *bin, bool has_batch) {
+bin_postfork_child(tsdn_t *tsdn, bin_t *bin) {
 	malloc_mutex_postfork_child(tsdn, &bin->lock);
-	if (has_batch) {
-		bin_with_batch_t *batched = (bin_with_batch_t *)bin;
-		batcher_postfork_child(TSDN_NULL, &batched->remote_frees);
-	}
 }
diff --git a/src/bin_info.c b/src/bin_info.c
index de93418a..e10042fd 100644
--- a/src/bin_info.c
+++ b/src/bin_info.c
@@ -3,26 +3,8 @@
 
 #include "jemalloc/internal/bin_info.h"
 
-/*
- * We leave bin-batching disabled by default, with other settings chosen mostly
- * empirically; across the test programs I looked at they provided the most bang
- * for the buck.  With other default settings, these choices for bin batching
- * result in them consuming far less memory (even in the worst case) than the
- * tcaches themselves, the arena, etc.
- * Note that we always try to pop all bins on every arena cache bin lock
- * operation, so the typical memory waste is far less than this (and only on
- * hot bins, which tend to be large anyways).
- */
-size_t opt_bin_info_max_batched_size = 0; /* 192 is a good default. */
-size_t opt_bin_info_remote_free_max_batch = 4;
-size_t opt_bin_info_remote_free_max = BIN_REMOTE_FREE_ELEMS_MAX;
-
 bin_info_t bin_infos[SC_NBINS];
 
-szind_t  bin_info_nbatched_sizes;
-unsigned bin_info_nbatched_bins;
-unsigned bin_info_nunbatched_bins;
-
 static void
 bin_infos_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
     bin_info_t infos[SC_NBINS]) {
@@ -38,12 +20,6 @@ bin_infos_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 		bitmap_info_t bitmap_info = BITMAP_INFO_INITIALIZER(
 		    bin_info->nregs);
 		bin_info->bitmap_info = bitmap_info;
-		if (bin_info->reg_size <= opt_bin_info_max_batched_size) {
-			bin_info_nbatched_sizes++;
-			bin_info_nbatched_bins += bin_info->n_shards;
-		} else {
-			bin_info_nunbatched_bins += bin_info->n_shards;
-		}
 	}
 }
 
diff --git a/src/ctl.c b/src/ctl.c
index 9e9a4b43..a4c60ce0 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -134,9 +134,6 @@ CTL_PROTO(opt_utrace)
 CTL_PROTO(opt_xmalloc)
 CTL_PROTO(opt_experimental_infallible_new)
 CTL_PROTO(opt_experimental_tcache_gc)
-CTL_PROTO(opt_max_batched_size)
-CTL_PROTO(opt_remote_free_max)
-CTL_PROTO(opt_remote_free_max_batch)
 CTL_PROTO(opt_tcache)
 CTL_PROTO(opt_tcache_max)
 CTL_PROTO(opt_tcache_nslots_small_min)
@@ -248,10 +245,6 @@ CTL_PROTO(stats_arenas_i_bins_j_nslabs)
 CTL_PROTO(stats_arenas_i_bins_j_nreslabs)
 CTL_PROTO(stats_arenas_i_bins_j_curslabs)
 CTL_PROTO(stats_arenas_i_bins_j_nonfull_slabs)
-CTL_PROTO(stats_arenas_i_bins_j_batch_pops)
-CTL_PROTO(stats_arenas_i_bins_j_batch_failed_pushes)
-CTL_PROTO(stats_arenas_i_bins_j_batch_pushes)
-CTL_PROTO(stats_arenas_i_bins_j_batch_pushed_elems)
 INDEX_PROTO(stats_arenas_i_bins_j)
 CTL_PROTO(stats_arenas_i_lextents_j_nmalloc)
 CTL_PROTO(stats_arenas_i_lextents_j_ndalloc)
@@ -501,9 +494,6 @@ static const ctl_named_node_t opt_node[] = {{NAME("abort"), CTL(opt_abort)},
     {NAME("utrace"), CTL(opt_utrace)}, {NAME("xmalloc"), CTL(opt_xmalloc)},
     {NAME("experimental_infallible_new"), CTL(opt_experimental_infallible_new)},
     {NAME("experimental_tcache_gc"), CTL(opt_experimental_tcache_gc)},
-    {NAME("max_batched_size"), CTL(opt_max_batched_size)},
-    {NAME("remote_free_max"), CTL(opt_remote_free_max)},
-    {NAME("remote_free_max_batch"), CTL(opt_remote_free_max_batch)},
     {NAME("tcache"), CTL(opt_tcache)},
     {NAME("tcache_max"), CTL(opt_tcache_max)},
     {NAME("tcache_nslots_small_min"), CTL(opt_tcache_nslots_small_min)},
@@ -673,11 +663,6 @@ static const ctl_named_node_t stats_arenas_i_bins_j_node[] = {
     {NAME("nreslabs"), CTL(stats_arenas_i_bins_j_nreslabs)},
     {NAME("curslabs"), CTL(stats_arenas_i_bins_j_curslabs)},
     {NAME("nonfull_slabs"), CTL(stats_arenas_i_bins_j_nonfull_slabs)},
-    {NAME("batch_pops"), CTL(stats_arenas_i_bins_j_batch_pops)},
-    {NAME("batch_failed_pushes"),
-        CTL(stats_arenas_i_bins_j_batch_failed_pushes)},
-    {NAME("batch_pushes"), CTL(stats_arenas_i_bins_j_batch_pushes)},
-    {NAME("batch_pushed_elems"), CTL(stats_arenas_i_bins_j_batch_pushed_elems)},
     {NAME("mutex"), CHILD(named, stats_arenas_i_bins_j_mutex)}};
 
 static const ctl_named_node_t super_stats_arenas_i_bins_j_node[] = {
@@ -1219,14 +1204,6 @@ ctl_arena_stats_sdmerge(
 				assert(bstats->curslabs == 0);
 				assert(bstats->nonfull_slabs == 0);
 			}
-
-			merged->batch_pops += bstats->batch_pops;
-			merged->batch_failed_pushes +=
-			    bstats->batch_failed_pushes;
-			merged->batch_pushes += bstats->batch_pushes;
-			merged->batch_pushed_elems +=
-			    bstats->batch_pushed_elems;
-
 			malloc_mutex_prof_merge(&sdstats->bstats[i].mutex_data,
 			    &astats->bstats[i].mutex_data);
 		}
@@ -2202,10 +2179,6 @@ CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool)
 CTL_RO_NL_CGEN(config_enable_cxx, opt_experimental_infallible_new,
     opt_experimental_infallible_new, bool)
 CTL_RO_NL_GEN(opt_experimental_tcache_gc, opt_experimental_tcache_gc, bool)
-CTL_RO_NL_GEN(opt_max_batched_size, opt_bin_info_max_batched_size, size_t)
-CTL_RO_NL_GEN(opt_remote_free_max, opt_bin_info_remote_free_max, size_t)
-CTL_RO_NL_GEN(
-    opt_remote_free_max_batch, opt_bin_info_remote_free_max_batch, size_t)
 CTL_RO_NL_GEN(opt_tcache, opt_tcache, bool)
 CTL_RO_NL_GEN(opt_tcache_max, opt_tcache_max, size_t)
 CTL_RO_NL_GEN(
@@ -3982,16 +3955,6 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_curslabs,
     arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.curslabs, size_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nonfull_slabs,
     arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.nonfull_slabs, size_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_batch_pops,
-    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_pops, uint64_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_batch_failed_pushes,
-    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_failed_pushes,
-    uint64_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_batch_pushes,
-    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_pushes, uint64_t)
-CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_batch_pushed_elems,
-    arenas_i(mib[2])->astats->bstats[mib[4]].stats_data.batch_pushed_elems,
-    uint64_t)
 
 static const ctl_named_node_t *
 stats_arenas_i_bins_j_index(
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 4adcbf3c..9f59a781 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1391,20 +1391,6 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				} while (vlen_left > 0);
 				CONF_CONTINUE;
 			}
-			CONF_HANDLE_SIZE_T(opt_bin_info_max_batched_size,
-			    "max_batched_size", 0, SIZE_T_MAX,
-			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
-			    /* clip */ true)
-			CONF_HANDLE_SIZE_T(opt_bin_info_remote_free_max_batch,
-			    "remote_free_max_batch", 0,
-			    BIN_REMOTE_FREE_ELEMS_MAX, CONF_DONT_CHECK_MIN,
-			    CONF_CHECK_MAX,
-			    /* clip */ true)
-			CONF_HANDLE_SIZE_T(opt_bin_info_remote_free_max,
-			    "remote_free_max", 0, BIN_REMOTE_FREE_ELEMS_MAX,
-			    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
-			    /* clip */ true)
-
 			if (CONF_MATCH("tcache_ncached_max")) {
 				bool err = tcache_bin_info_default_init(
 				    v, vlen);
diff --git a/src/stats.c b/src/stats.c
index 84af3911..a8a574ac 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -357,15 +357,6 @@ stats_arena_bins_print(
 	COL_HDR(row, nreslabs, NULL, right, 13, uint64)
 	COL_HDR(row, nreslabs_ps, "(#/sec)", right, 8, uint64)
 
-	COL_HDR(row, pops, NULL, right, 10, uint64)
-	COL_HDR(row, pops_ps, "(#/sec)", right, 8, uint64)
-	COL_HDR(row, failed_push, NULL, right, 13, uint64)
-	COL_HDR(row, failed_push_ps, "(#/sec)", right, 8, uint64)
-	COL_HDR(row, push, NULL, right, 7, uint64)
-	COL_HDR(row, push_ps, "(#/sec)", right, 8, uint64)
-	COL_HDR(row, push_elem, NULL, right, 12, uint64)
-	COL_HDR(row, push_elem_ps, "(#/sec)", right, 8, uint64)
-
 	/* Don't want to actually print the name. */
 	header_justify_spacer.str_val = " ";
 	col_justify_spacer.str_val = " ";
@@ -406,15 +397,13 @@ stats_arena_bins_print(
 	}
 
 	for (j = 0, in_gap = false; j < nbins; j++) {
-		uint64_t nslabs;
-		size_t   reg_size, slab_size, curregs;
-		size_t   curslabs;
-		size_t   nonfull_slabs;
-		uint32_t nregs, nshards;
-		uint64_t nmalloc, ndalloc, nrequests, nfills, nflushes;
-		uint64_t nreslabs;
-		uint64_t batch_pops, batch_failed_pushes, batch_pushes,
-		    batch_pushed_elems;
+		uint64_t     nslabs;
+		size_t       reg_size, slab_size, curregs;
+		size_t       curslabs;
+		size_t       nonfull_slabs;
+		uint32_t     nregs, nshards;
+		uint64_t     nmalloc, ndalloc, nrequests, nfills, nflushes;
+		uint64_t     nreslabs;
 		prof_stats_t prof_live;
 		prof_stats_t prof_accum;
 
@@ -463,15 +452,6 @@ stats_arena_bins_print(
 		CTL_LEAF(stats_arenas_mib, 5, "nonfull_slabs", &nonfull_slabs,
 		    size_t);
 
-		CTL_LEAF(
-		    stats_arenas_mib, 5, "batch_pops", &batch_pops, uint64_t);
-		CTL_LEAF(stats_arenas_mib, 5, "batch_failed_pushes",
-		    &batch_failed_pushes, uint64_t);
-		CTL_LEAF(stats_arenas_mib, 5, "batch_pushes", &batch_pushes,
-		    uint64_t);
-		CTL_LEAF(stats_arenas_mib, 5, "batch_pushed_elems",
-		    &batch_pushed_elems, uint64_t);
-
 		if (mutex) {
 			mutex_stats_read_arena_bin(stats_arenas_mib, 5,
 			    col_mutex64, col_mutex32, uptime);
@@ -506,14 +486,6 @@ stats_arena_bins_print(
 		    emitter, "curslabs", emitter_type_size, &curslabs);
 		emitter_json_kv(emitter, "nonfull_slabs", emitter_type_size,
 		    &nonfull_slabs);
-		emitter_json_kv(
-		    emitter, "batch_pops", emitter_type_uint64, &batch_pops);
-		emitter_json_kv(emitter, "batch_failed_pushes",
-		    emitter_type_uint64, &batch_failed_pushes);
-		emitter_json_kv(emitter, "batch_pushes", emitter_type_uint64,
-		    &batch_pushes);
-		emitter_json_kv(emitter, "batch_pushed_elems",
-		    emitter_type_uint64, &batch_pushed_elems);
 		if (mutex) {
 			emitter_json_object_kv_begin(emitter, "mutex");
 			mutex_stats_emit(
@@ -573,19 +545,6 @@ stats_arena_bins_print(
 		col_nreslabs.uint64_val = nreslabs;
 		col_nreslabs_ps.uint64_val = rate_per_second(nreslabs, uptime);
 
-		col_pops.uint64_val = batch_pops;
-		col_pops_ps.uint64_val = rate_per_second(batch_pops, uptime);
-
-		col_failed_push.uint64_val = batch_failed_pushes;
-		col_failed_push_ps.uint64_val = rate_per_second(
-		    batch_failed_pushes, uptime);
-		col_push.uint64_val = batch_pushes;
-		col_push_ps.uint64_val = rate_per_second(batch_pushes, uptime);
-
-		col_push_elem.uint64_val = batch_pushed_elems;
-		col_push_elem_ps.uint64_val = rate_per_second(
-		    batch_pushed_elems, uptime);
-
 		/*
 		 * Note that mutex columns were initialized above, if mutex ==
 		 * true.
@@ -1677,9 +1636,6 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_BOOL("xmalloc")
 	OPT_WRITE_BOOL("experimental_infallible_new")
 	OPT_WRITE_BOOL("experimental_tcache_gc")
-	OPT_WRITE_SIZE_T("max_batched_size")
-	OPT_WRITE_SIZE_T("remote_free_max")
-	OPT_WRITE_SIZE_T("remote_free_max_batch")
 	OPT_WRITE_BOOL("tcache")
 	OPT_WRITE_SIZE_T("tcache_max")
 	OPT_WRITE_UNSIGNED("tcache_nslots_small_min")
diff --git a/src/tcache.c b/src/tcache.c
index 44a96841..2d73237b 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -608,7 +608,7 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 	}
 	arena_cache_bin_fill_small(tsdn, arena, cache_bin, binind,
 	    /* nfill_min */
-	        opt_experimental_tcache_gc ? ((nfill >> 1) + 1) : nfill,
+	    opt_experimental_tcache_gc ? ((nfill >> 1) + 1) : nfill,
 	    /* nfill_max */ nfill);
 	tcache_slow->bin_refilled[binind] = true;
 	tcache_nfill_small_burst_prepare(tcache_slow, binind);
@@ -680,8 +680,6 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache,
 	assert(binind < SC_NBINS);
 	arena_t *tcache_arena = tcache_slow->arena;
 	assert(tcache_arena != NULL);
-	unsigned tcache_binshard =
-	    tsd_binshardsp_get(tsdn_tsd(tsdn))->binshard[binind];
 
 	/*
 	 * Variable length array must have > 0 length; the last element is never
@@ -698,25 +696,12 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache,
 	unsigned dalloc_count = 0;
 	VARIABLE_ARRAY(edata_t *, dalloc_slabs, nflush + 1);
 
-	/*
-	 * There's an edge case where we need to deallocate more slabs than we
-	 * have elements of dalloc_slabs.  This can if we end up deallocating
-	 * items batched by another thread in addition to ones flushed from the
-	 * cache.  Since this is not very likely (most small object
-	 * deallocations don't free up a whole slab), we don't want to burn the
-	 * stack space to keep those excess slabs in an array.  Instead we'll
-	 * maintain an overflow list.
-	 */
-	edata_list_active_t dalloc_slabs_extra;
-	edata_list_active_init(&dalloc_slabs_extra);
-
 	/*
 	 * We're about to grab a bunch of locks.  If one of them happens to be
 	 * the one guarding the arena-level stats counters we flush our
 	 * thread-local ones to, we do so under one critical section.
 	 */
 	bool merged_stats = false;
-
 	/*
 	 * We maintain the invariant that all edatas yet to be flushed are
 	 * contained in the half-open range [flush_start, flush_end).  We'll
@@ -741,7 +726,6 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache,
 		unsigned cur_binshard = edata_binshard_get(cur_edata);
 		bin_t *cur_bin = arena_get_bin(cur_arena, binind, cur_binshard);
 		assert(cur_binshard < bin_infos[binind].n_shards);
-
 		/*
 		 * Start off the partition; item_edata[i] always matches itself
 		 * of course.
@@ -788,150 +772,43 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache,
 			}
 		}
 
-		/*
-		 * We never batch when flushing to our home-base bin shard,
-		 * since it's likely that we'll have to acquire that lock anyway
-		 * when flushing stats.
-		 *
-		 * A plausible check we could add to can_batch is
-		 * '&& arena_is_auto(cur_arena)'.  The motivation would be that
-		 * we have a higher tolerance for dubious user assumptions
-		 * around non-auto arenas (e.g. "if I deallocate every object I
-		 * allocated, and then call tcache.flush, then the arena stats
-		 * must reflect zero live allocations").
-		 *
-		 * This is dubious for a couple reasons:
-		 * - We already don't provide perfect fidelity for stats
-		 *   counting (e.g. for profiled allocations, whose size can
-		 *   inflate in stats).
-		 * - Hanging load-bearing guarantees around stats impedes
-		 *   scalability in general.
-		 *
-		 * There are some "complete" strategies we could do instead:
-		 * - Add a arena.<i>.quiesce call to pop all bins for users who
-		 *   do want those stats accounted for.
-		 * - Make batchability a user-controllable per-arena option.
-		 * - Do a batch pop after every mutex acquisition for which we
-		 *   want to provide accurate stats.  This gives perfectly
-		 *   accurate stats, but can cause weird performance effects
-		 *   (because doing stats collection can now result in slabs
-		 *   becoming empty, and therefore purging, large mutex
-		 *   acquisition, etc.).
-		 * - Propagate the "why" behind a flush down to the level of the
-		 *   batcher, and include a batch pop attempt down full tcache
-		 *   flushing pathways.  This is just a lot of plumbing and
-		 *   internal complexity.
-		 *
-		 * We don't do any of these right now, but the decision calculus
-		 * and tradeoffs are subtle enough that the reasoning was worth
-		 * leaving in this comment.
-		 */
-		bool bin_is_batched = arena_bin_has_batch(binind);
-		bool home_binshard = (cur_arena == tcache_arena
-		    && cur_binshard == tcache_binshard);
-		bool can_batch = (flush_start - prev_flush_start
-		                     <= opt_bin_info_remote_free_max_batch)
-		    && !home_binshard && bin_is_batched;
+		/* Actually do the flushing. */
+		malloc_mutex_lock(tsdn, &cur_bin->lock);
 
 		/*
-		 * We try to avoid the batching pathway if we can, so we always
-		 * at least *try* to lock.
+		 * Flush stats first, if that was the right lock.  Note that we
+		 * don't actually have to flush stats into the current thread's
+		 * binshard. Flushing into any binshard in the same arena is
+		 * enough; we don't expose stats on per-binshard basis (just
+		 * per-bin).
 		 */
-		bool locked = false;
-		bool batched = false;
-		bool batch_failed = false;
-		if (can_batch) {
-			locked = !malloc_mutex_trylock(tsdn, &cur_bin->lock);
+		if (config_stats && tcache_arena == cur_arena
+		    && !merged_stats) {
+			merged_stats = true;
+			cur_bin->stats.nflushes++;
+			cur_bin->stats.nrequests += cache_bin->tstats.nrequests;
+			cache_bin->tstats.nrequests = 0;
 		}
-		if (can_batch && !locked) {
-			bin_with_batch_t *batched_bin = (bin_with_batch_t *)
-			    cur_bin;
-			size_t push_idx = batcher_push_begin(tsdn,
-			    &batched_bin->remote_frees,
-			    flush_start - prev_flush_start);
-			bin_batching_test_after_push(push_idx);
 
-			if (push_idx != BATCHER_NO_IDX) {
-				batched = true;
-				unsigned nbatched = flush_start
-				    - prev_flush_start;
-				for (unsigned i = 0; i < nbatched; i++) {
-					unsigned src_ind = prev_flush_start + i;
-					batched_bin
-					    ->remote_free_data[push_idx + i]
-					    .ptr = ptrs->ptr[src_ind];
-					batched_bin
-					    ->remote_free_data[push_idx + i]
-					    .slab = item_edata[src_ind].edata;
-				}
-				batcher_push_end(
-				    tsdn, &batched_bin->remote_frees);
-			} else {
-				batch_failed = true;
+		/* Next flush objects. */
+		/* Init only to avoid used-uninitialized warning. */
+		arena_dalloc_bin_locked_info_t dalloc_bin_info = {0};
+		arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind);
+		for (unsigned i = prev_flush_start; i < flush_start; i++) {
+			void    *ptr = ptrs->ptr[i];
+			edata_t *edata = item_edata[i].edata;
+			if (arena_dalloc_bin_locked_step(tsdn, cur_arena,
+			        cur_bin, &dalloc_bin_info, binind, edata,
+			        ptr)) {
+				dalloc_slabs[dalloc_count] = edata;
+				dalloc_count++;
 			}
 		}
-		if (!batched) {
-			if (!locked) {
-				malloc_mutex_lock(tsdn, &cur_bin->lock);
-			}
-			/*
-			 * Unlike other stats (which only ever get flushed into
-			 * a tcache's associated arena), batch_failed counts get
-			 * accumulated into the bin where the push attempt
-			 * failed.
-			 */
-			if (config_stats && batch_failed) {
-				cur_bin->stats.batch_failed_pushes++;
-			}
 
-			/*
-			 * Flush stats first, if that was the right lock.  Note
-			 * that we don't actually have to flush stats into the
-			 * current thread's binshard. Flushing into any binshard
-			 * in the same arena is enough; we don't expose stats on
-			 * per-binshard basis (just per-bin).
-			 */
-			if (config_stats && tcache_arena == cur_arena
-			    && !merged_stats) {
-				merged_stats = true;
-				cur_bin->stats.nflushes++;
-				cur_bin->stats.nrequests +=
-				    cache_bin->tstats.nrequests;
-				cache_bin->tstats.nrequests = 0;
-			}
-			unsigned preallocated_slabs = nflush;
-			unsigned ndalloc_slabs =
-			    arena_bin_batch_get_ndalloc_slabs(
-			        preallocated_slabs);
+		arena_dalloc_bin_locked_finish(
+		    tsdn, cur_arena, cur_bin, &dalloc_bin_info);
+		malloc_mutex_unlock(tsdn, &cur_bin->lock);
 
-			/* Next flush objects our own objects. */
-			/* Init only to avoid used-uninitialized warning. */
-			arena_dalloc_bin_locked_info_t dalloc_bin_info = {0};
-			arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind);
-			for (unsigned i = prev_flush_start; i < flush_start;
-			     i++) {
-				void    *ptr = ptrs->ptr[i];
-				edata_t *edata = item_edata[i].edata;
-				arena_dalloc_bin_locked_step(tsdn, cur_arena,
-				    cur_bin, &dalloc_bin_info, binind, edata,
-				    ptr, dalloc_slabs, ndalloc_slabs,
-				    &dalloc_count, &dalloc_slabs_extra);
-			}
-			/*
-			 * Lastly, flush any batched objects (from other
-			 * threads).
-			 */
-			if (bin_is_batched) {
-				arena_bin_flush_batch_impl(tsdn, cur_arena,
-				    cur_bin, &dalloc_bin_info, binind,
-				    dalloc_slabs, ndalloc_slabs, &dalloc_count,
-				    &dalloc_slabs_extra);
-			}
-
-			arena_dalloc_bin_locked_finish(
-			    tsdn, cur_arena, cur_bin, &dalloc_bin_info);
-			malloc_mutex_unlock(tsdn, &cur_bin->lock);
-		}
 		arena_decay_ticks(
 		    tsdn, cur_arena, flush_start - prev_flush_start);
 	}
@@ -941,18 +818,13 @@ tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache,
 		edata_t *slab = dalloc_slabs[i];
 		arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
 	}
-	while (!edata_list_active_empty(&dalloc_slabs_extra)) {
-		edata_t *slab = edata_list_active_first(&dalloc_slabs_extra);
-		edata_list_active_remove(&dalloc_slabs_extra, slab);
-		arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
-	}
 
 	if (config_stats && !merged_stats) {
 		/*
-			 * The flush loop didn't happen to flush to this
-			 * thread's arena, so the stats didn't get merged.
-			 * Manually do so now.
-			 */
+		 * The flush loop didn't happen to flush to this
+		 * thread's arena, so the stats didn't get merged.
+		 * Manually do so now.
+		 */
 		bin_t *bin = arena_bin_choose(tsdn, tcache_arena, binind, NULL);
 		malloc_mutex_lock(tsdn, &bin->lock);
 		bin->stats.nflushes++;
diff --git a/test/analyze/sizes.c b/test/analyze/sizes.c
index cc6c3806..b8d10629 100644
--- a/test/analyze/sizes.c
+++ b/test/analyze/sizes.c
@@ -34,8 +34,6 @@ main(void) {
 	P(arena_t);
 	P(arena_stats_t);
 	P(base_t);
-	P(bin_t);
-	P(bin_with_batch_t);
 	P(decay_t);
 	P(edata_t);
 	P(ecache_t);
diff --git a/test/include/test/fork.h b/test/include/test/fork.h
deleted file mode 100644
index 9e04d279..00000000
--- a/test/include/test/fork.h
+++ /dev/null
@@ -1,34 +0,0 @@
-#ifndef JEMALLOC_TEST_FORK_H
-#define JEMALLOC_TEST_FORK_H
-
-#ifndef _WIN32
-
-#	include <sys/wait.h>
-
-static inline void
-fork_wait_for_child_exit(int pid) {
-	int status;
-	while (true) {
-		if (waitpid(pid, &status, 0) == -1) {
-			test_fail("Unexpected waitpid() failure.");
-		}
-		if (WIFSIGNALED(status)) {
-			test_fail(
-			    "Unexpected child termination due to "
-			    "signal %d",
-			    WTERMSIG(status));
-			break;
-		}
-		if (WIFEXITED(status)) {
-			if (WEXITSTATUS(status) != 0) {
-				test_fail("Unexpected child exit value %d",
-				    WEXITSTATUS(status));
-			}
-			break;
-		}
-	}
-}
-
-#endif
-
-#endif /* JEMALLOC_TEST_FORK_H */
diff --git a/test/unit/batcher.c b/test/unit/batcher.c
deleted file mode 100644
index 1052ca27..00000000
--- a/test/unit/batcher.c
+++ /dev/null
@@ -1,243 +0,0 @@
-#include "test/jemalloc_test.h"
-
-#include "jemalloc/internal/batcher.h"
-
-TEST_BEGIN(test_simple) {
-	enum { NELEMS_MAX = 10, DATA_BASE_VAL = 100, NRUNS = 5 };
-	batcher_t batcher;
-	size_t    data[NELEMS_MAX];
-	for (size_t nelems = 0; nelems < NELEMS_MAX; nelems++) {
-		batcher_init(&batcher, nelems);
-		for (int run = 0; run < NRUNS; run++) {
-			for (int i = 0; i < NELEMS_MAX; i++) {
-				data[i] = (size_t)-1;
-			}
-			for (size_t i = 0; i < nelems; i++) {
-				size_t idx = batcher_push_begin(
-				    TSDN_NULL, &batcher, 1);
-				assert_zu_eq(i, idx, "Wrong index");
-				assert_zu_eq((size_t)-1, data[idx],
-				    "Expected uninitialized slot");
-				data[idx] = DATA_BASE_VAL + i;
-				batcher_push_end(TSDN_NULL, &batcher);
-			}
-			if (nelems > 0) {
-				size_t idx = batcher_push_begin(
-				    TSDN_NULL, &batcher, 1);
-				assert_zu_eq(BATCHER_NO_IDX, idx,
-				    "Shouldn't be able to push into a full "
-				    "batcher");
-			}
-
-			size_t npop = batcher_pop_begin(TSDN_NULL, &batcher);
-			if (nelems == 0) {
-				assert_zu_eq(npop, BATCHER_NO_IDX,
-				    "Shouldn't get any items out of an empty "
-				    "batcher");
-			} else {
-				assert_zu_eq(npop, nelems,
-				    "Wrong number of elements popped");
-			}
-			for (size_t i = 0; i < nelems; i++) {
-				assert_zu_eq(data[i], DATA_BASE_VAL + i,
-				    "Item popped out of order!");
-			}
-			if (nelems != 0) {
-				batcher_pop_end(TSDN_NULL, &batcher);
-			}
-		}
-	}
-}
-TEST_END
-
-TEST_BEGIN(test_multi_push) {
-	size_t    idx, nelems;
-	batcher_t batcher;
-	batcher_init(&batcher, 11);
-	/* Push two at a time, 5 times, for 10 total. */
-	for (int i = 0; i < 5; i++) {
-		idx = batcher_push_begin(TSDN_NULL, &batcher, 2);
-		assert_zu_eq(2 * i, idx, "Should push in order");
-		batcher_push_end(TSDN_NULL, &batcher);
-	}
-	/* Pushing two more should fail -- would put us at 12 elems. */
-	idx = batcher_push_begin(TSDN_NULL, &batcher, 2);
-	assert_zu_eq(BATCHER_NO_IDX, idx, "Should be out of space");
-	/* But one more should work */
-	idx = batcher_push_begin(TSDN_NULL, &batcher, 1);
-	assert_zu_eq(10, idx, "Should be out of space");
-	batcher_push_end(TSDN_NULL, &batcher);
-	nelems = batcher_pop_begin(TSDN_NULL, &batcher);
-	batcher_pop_end(TSDN_NULL, &batcher);
-	assert_zu_eq(11, nelems, "Should have popped everything");
-}
-TEST_END
-
-enum {
-	STRESS_TEST_ELEMS = 10,
-	STRESS_TEST_THREADS = 4,
-	STRESS_TEST_OPS = 1000 * 1000,
-	STRESS_TEST_PUSH_TO_POP_RATIO = 5,
-};
-
-typedef struct stress_test_data_s stress_test_data_t;
-struct stress_test_data_s {
-	batcher_t    batcher;
-	mtx_t        pop_mtx;
-	atomic_u32_t thread_id;
-
-	uint32_t    elems_data[STRESS_TEST_ELEMS];
-	size_t      push_count[STRESS_TEST_ELEMS];
-	size_t      pop_count[STRESS_TEST_ELEMS];
-	atomic_zu_t atomic_push_count[STRESS_TEST_ELEMS];
-	atomic_zu_t atomic_pop_count[STRESS_TEST_ELEMS];
-};
-
-/*
- * Note: 0-indexed. If one element is set and you want to find it, you call
- * get_nth_set(elems, 0).
- */
-static size_t
-get_nth_set(bool elems_owned[STRESS_TEST_ELEMS], size_t n) {
-	size_t ntrue = 0;
-	for (size_t i = 0; i < STRESS_TEST_ELEMS; i++) {
-		if (elems_owned[i]) {
-			ntrue++;
-		}
-		if (ntrue > n) {
-			return i;
-		}
-	}
-	assert_not_reached(
-	    "Asked for the %zu'th set element when < %zu are "
-	    "set",
-	    n, n);
-	/* Just to silence a compiler warning. */
-	return 0;
-}
-
-static void *
-stress_test_thd(void *arg) {
-	stress_test_data_t *data = arg;
-	size_t prng = atomic_fetch_add_u32(&data->thread_id, 1, ATOMIC_RELAXED);
-
-	size_t nelems_owned = 0;
-	bool   elems_owned[STRESS_TEST_ELEMS] = {0};
-	size_t local_push_count[STRESS_TEST_ELEMS] = {0};
-	size_t local_pop_count[STRESS_TEST_ELEMS] = {0};
-
-	for (int i = 0; i < STRESS_TEST_OPS; i++) {
-		size_t rnd = prng_range_zu(
-		    &prng, STRESS_TEST_PUSH_TO_POP_RATIO);
-		if (rnd == 0 || nelems_owned == 0) {
-			size_t nelems = batcher_pop_begin(
-			    TSDN_NULL, &data->batcher);
-			if (nelems == BATCHER_NO_IDX) {
-				continue;
-			}
-			for (size_t i = 0; i < nelems; i++) {
-				uint32_t elem = data->elems_data[i];
-				assert_false(elems_owned[elem],
-				    "Shouldn't already own what we just "
-				    "popped");
-				elems_owned[elem] = true;
-				nelems_owned++;
-				local_pop_count[elem]++;
-				data->pop_count[elem]++;
-			}
-			batcher_pop_end(TSDN_NULL, &data->batcher);
-		} else {
-			size_t elem_to_push_idx = prng_range_zu(
-			    &prng, nelems_owned);
-			size_t elem = get_nth_set(
-			    elems_owned, elem_to_push_idx);
-			assert_true(elems_owned[elem],
-			    "Should own element we're about to pop");
-			elems_owned[elem] = false;
-			local_push_count[elem]++;
-			data->push_count[elem]++;
-			nelems_owned--;
-			size_t idx = batcher_push_begin(
-			    TSDN_NULL, &data->batcher, 1);
-			assert_zu_ne(idx, BATCHER_NO_IDX,
-			    "Batcher can't be full -- we have one of its "
-			    "elems!");
-			data->elems_data[idx] = (uint32_t)elem;
-			batcher_push_end(TSDN_NULL, &data->batcher);
-		}
-	}
-
-	/* Push all local elems back, flush local counts to the shared ones. */
-	size_t push_idx = 0;
-	if (nelems_owned != 0) {
-		push_idx = batcher_push_begin(
-		    TSDN_NULL, &data->batcher, nelems_owned);
-		assert_zu_ne(
-		    BATCHER_NO_IDX, push_idx, "Should be space to push");
-	}
-	for (size_t i = 0; i < STRESS_TEST_ELEMS; i++) {
-		if (elems_owned[i]) {
-			data->elems_data[push_idx] = (uint32_t)i;
-			push_idx++;
-			local_push_count[i]++;
-			data->push_count[i]++;
-		}
-		atomic_fetch_add_zu(&data->atomic_push_count[i],
-		    local_push_count[i], ATOMIC_RELAXED);
-		atomic_fetch_add_zu(&data->atomic_pop_count[i],
-		    local_pop_count[i], ATOMIC_RELAXED);
-	}
-	if (nelems_owned != 0) {
-		batcher_push_end(TSDN_NULL, &data->batcher);
-	}
-
-	return NULL;
-}
-
-TEST_BEGIN(test_stress) {
-	stress_test_data_t data;
-	batcher_init(&data.batcher, STRESS_TEST_ELEMS);
-	bool err = mtx_init(&data.pop_mtx);
-	assert_false(err, "mtx_init failure");
-	atomic_store_u32(&data.thread_id, 0, ATOMIC_RELAXED);
-	for (int i = 0; i < STRESS_TEST_ELEMS; i++) {
-		data.push_count[i] = 0;
-		data.pop_count[i] = 0;
-		atomic_store_zu(&data.atomic_push_count[i], 0, ATOMIC_RELAXED);
-		atomic_store_zu(&data.atomic_pop_count[i], 0, ATOMIC_RELAXED);
-
-		size_t idx = batcher_push_begin(TSDN_NULL, &data.batcher, 1);
-		assert_zu_eq(i, idx, "Should push in order");
-		data.elems_data[idx] = i;
-		batcher_push_end(TSDN_NULL, &data.batcher);
-	}
-
-	thd_t threads[STRESS_TEST_THREADS];
-	for (int i = 0; i < STRESS_TEST_THREADS; i++) {
-		thd_create(&threads[i], stress_test_thd, &data);
-	}
-	for (int i = 0; i < STRESS_TEST_THREADS; i++) {
-		thd_join(threads[i], NULL);
-	}
-	for (int i = 0; i < STRESS_TEST_ELEMS; i++) {
-		assert_zu_ne(
-		    0, data.push_count[i], "Should have done something!");
-		assert_zu_eq(data.push_count[i], data.pop_count[i],
-		    "every element should be pushed and popped an equal number "
-		    "of times");
-		assert_zu_eq(data.push_count[i],
-		    atomic_load_zu(&data.atomic_push_count[i], ATOMIC_RELAXED),
-		    "atomic and non-atomic count should be equal given proper "
-		    "synchronization");
-		assert_zu_eq(data.pop_count[i],
-		    atomic_load_zu(&data.atomic_pop_count[i], ATOMIC_RELAXED),
-		    "atomic and non-atomic count should be equal given proper "
-		    "synchronization");
-	}
-}
-TEST_END
-
-int
-main(void) {
-	return test_no_reentrancy(test_simple, test_multi_push, test_stress);
-}
diff --git a/test/unit/bin_batching.c b/test/unit/bin_batching.c
deleted file mode 100644
index a422586d..00000000
--- a/test/unit/bin_batching.c
+++ /dev/null
@@ -1,270 +0,0 @@
-#include "test/jemalloc_test.h"
-#include "test/fork.h"
-
-enum {
-	STRESS_THREADS = 3,
-	STRESS_OBJECTS_PER_THREAD = 1000,
-	STRESS_ALLOC_SZ = PAGE / 2,
-};
-
-typedef struct stress_thread_data_s stress_thread_data_t;
-struct stress_thread_data_s {
-	unsigned     thd_id;
-	atomic_zu_t *ready_thds;
-	atomic_zu_t *done_thds;
-	void       **to_dalloc;
-};
-
-static atomic_zu_t push_failure_count;
-static atomic_zu_t pop_attempt_results[2];
-static atomic_zu_t dalloc_zero_slab_count;
-static atomic_zu_t dalloc_nonzero_slab_count;
-static atomic_zu_t dalloc_nonempty_list_count;
-
-static bool
-should_skip() {
-	return
-	    /*
-	     * We do batching operations on tcache flush pathways; we can't if
-	     * caching is disabled.
-	     */
-	    !opt_tcache ||
-	    /* We rely on tcache fill/flush operations of the size we use. */
-	    opt_tcache_max < STRESS_ALLOC_SZ
-	    /*
-	     * Some of the races we want to trigger are fiddly enough that they
-	     * only show up under real concurrency.  We add 1 to account for the
-	     * main thread, which also does some work.
-	     */
-	    || ncpus < STRESS_THREADS + 1;
-}
-
-static void
-increment_push_failure(size_t push_idx) {
-	if (push_idx == BATCHER_NO_IDX) {
-		atomic_fetch_add_zu(&push_failure_count, 1, ATOMIC_RELAXED);
-	} else {
-		assert_zu_lt(push_idx, 4, "Only 4 elems");
-		volatile size_t x = 10000;
-		while (--x) {
-			/* Spin for a while, to try to provoke a failure. */
-			if (x == push_idx) {
-#ifdef _WIN32
-				SwitchToThread();
-#else
-				sched_yield();
-#endif
-			}
-		}
-	}
-}
-
-static void
-increment_pop_attempt(size_t elems_to_pop) {
-	bool elems = (elems_to_pop != BATCHER_NO_IDX);
-	atomic_fetch_add_zu(&pop_attempt_results[elems], 1, ATOMIC_RELAXED);
-}
-
-static void
-increment_slab_dalloc_count(unsigned slab_dalloc_count, bool list_empty) {
-	if (slab_dalloc_count > 0) {
-		atomic_fetch_add_zu(
-		    &dalloc_nonzero_slab_count, 1, ATOMIC_RELAXED);
-	} else {
-		atomic_fetch_add_zu(&dalloc_zero_slab_count, 1, ATOMIC_RELAXED);
-	}
-	if (!list_empty) {
-		atomic_fetch_add_zu(
-		    &dalloc_nonempty_list_count, 1, ATOMIC_RELAXED);
-	}
-}
-
-static void
-flush_tcache() {
-	assert_d_eq(0, mallctl("thread.tcache.flush", NULL, NULL, NULL, 0),
-	    "Unexpected mallctl failure");
-}
-
-static void *
-stress_thread(void *arg) {
-	stress_thread_data_t *data = arg;
-	uint64_t              prng_state = data->thd_id;
-	atomic_fetch_add_zu(data->ready_thds, 1, ATOMIC_RELAXED);
-	while (atomic_load_zu(data->ready_thds, ATOMIC_RELAXED)
-	    != STRESS_THREADS) {
-		/* Spin */
-	}
-	for (int i = 0; i < STRESS_OBJECTS_PER_THREAD; i++) {
-		dallocx(data->to_dalloc[i], 0);
-		if (prng_range_u64(&prng_state, 3) == 0) {
-			flush_tcache();
-		}
-	}
-	flush_tcache();
-	atomic_fetch_add_zu(data->done_thds, 1, ATOMIC_RELAXED);
-	return NULL;
-}
-
-/*
- * Run main_thread_fn in conditions that trigger all the various edge cases and
- * subtle race conditions.
- */
-static void
-stress_run(void (*main_thread_fn)(), int nruns) {
-	bin_batching_test_ndalloc_slabs_max = 1;
-	bin_batching_test_after_push_hook = &increment_push_failure;
-	bin_batching_test_mid_pop_hook = &increment_pop_attempt;
-	bin_batching_test_after_unlock_hook = &increment_slab_dalloc_count;
-
-	atomic_store_zu(&push_failure_count, 0, ATOMIC_RELAXED);
-	atomic_store_zu(&pop_attempt_results[0], 0, ATOMIC_RELAXED);
-	atomic_store_zu(&pop_attempt_results[1], 0, ATOMIC_RELAXED);
-	atomic_store_zu(&dalloc_zero_slab_count, 0, ATOMIC_RELAXED);
-	atomic_store_zu(&dalloc_nonzero_slab_count, 0, ATOMIC_RELAXED);
-	atomic_store_zu(&dalloc_nonempty_list_count, 0, ATOMIC_RELAXED);
-
-	for (int run = 0; run < nruns; run++) {
-		thd_t                thds[STRESS_THREADS];
-		stress_thread_data_t thd_datas[STRESS_THREADS];
-		atomic_zu_t          ready_thds;
-		atomic_store_zu(&ready_thds, 0, ATOMIC_RELAXED);
-		atomic_zu_t done_thds;
-		atomic_store_zu(&done_thds, 0, ATOMIC_RELAXED);
-
-		void *ptrs[STRESS_THREADS][STRESS_OBJECTS_PER_THREAD];
-		for (int i = 0; i < STRESS_THREADS; i++) {
-			thd_datas[i].thd_id = i;
-			thd_datas[i].ready_thds = &ready_thds;
-			thd_datas[i].done_thds = &done_thds;
-			thd_datas[i].to_dalloc = ptrs[i];
-			for (int j = 0; j < STRESS_OBJECTS_PER_THREAD; j++) {
-				void *ptr = mallocx(STRESS_ALLOC_SZ, 0);
-				assert_ptr_not_null(ptr, "alloc failure");
-				ptrs[i][j] = ptr;
-			}
-		}
-		for (int i = 0; i < STRESS_THREADS; i++) {
-			thd_create(&thds[i], stress_thread, &thd_datas[i]);
-		}
-		while (atomic_load_zu(&done_thds, ATOMIC_RELAXED)
-		    != STRESS_THREADS) {
-			main_thread_fn();
-		}
-		for (int i = 0; i < STRESS_THREADS; i++) {
-			thd_join(thds[i], NULL);
-		}
-	}
-
-	bin_batching_test_ndalloc_slabs_max = (unsigned)-1;
-	bin_batching_test_after_push_hook = NULL;
-	bin_batching_test_mid_pop_hook = NULL;
-	bin_batching_test_after_unlock_hook = NULL;
-}
-
-static void
-do_allocs_frees() {
-	enum { NALLOCS = 32 };
-	flush_tcache();
-	void *ptrs[NALLOCS];
-	for (int i = 0; i < NALLOCS; i++) {
-		ptrs[i] = mallocx(STRESS_ALLOC_SZ, 0);
-	}
-	for (int i = 0; i < NALLOCS; i++) {
-		dallocx(ptrs[i], 0);
-	}
-	flush_tcache();
-}
-
-static void
-test_arena_reset_main_fn() {
-	do_allocs_frees();
-}
-
-TEST_BEGIN(test_arena_reset) {
-	int      err;
-	unsigned arena;
-	unsigned old_arena;
-
-	test_skip_if(should_skip());
-	test_skip_if(opt_percpu_arena != percpu_arena_disabled);
-
-	size_t arena_sz = sizeof(arena);
-	err = mallctl("arenas.create", (void *)&arena, &arena_sz, NULL, 0);
-	assert_d_eq(0, err, "Arena creation failed");
-
-	err = mallctl("thread.arena", &old_arena, &arena_sz, &arena, arena_sz);
-	assert_d_eq(0, err, "changing arena failed");
-
-	stress_run(&test_arena_reset_main_fn, /* nruns */ 10);
-
-	flush_tcache();
-
-	char buf[100];
-	malloc_snprintf(buf, sizeof(buf), "arena.%u.reset", arena);
-	err = mallctl(buf, NULL, NULL, NULL, 0);
-	assert_d_eq(0, err, "Couldn't change arena");
-
-	do_allocs_frees();
-
-	err = mallctl("thread.arena", NULL, NULL, &old_arena, arena_sz);
-	assert_d_eq(0, err, "changing arena failed");
-}
-TEST_END
-
-static void
-test_fork_main_fn() {
-#ifndef _WIN32
-	pid_t pid = fork();
-	if (pid == -1) {
-		test_fail("Fork failure!");
-	} else if (pid == 0) {
-		/* Child */
-		do_allocs_frees();
-		_exit(0);
-	} else {
-		fork_wait_for_child_exit(pid);
-		do_allocs_frees();
-	}
-#endif
-}
-
-TEST_BEGIN(test_fork) {
-#ifdef _WIN32
-	test_skip("No fork on windows");
-#endif
-	test_skip_if(should_skip());
-	stress_run(&test_fork_main_fn, /* nruns */ 10);
-}
-TEST_END
-
-static void
-test_races_main_fn() {
-	do_allocs_frees();
-}
-
-TEST_BEGIN(test_races) {
-	test_skip_if(should_skip());
-
-	stress_run(&test_races_main_fn, /* nruns */ 400);
-
-	assert_zu_lt(0, atomic_load_zu(&push_failure_count, ATOMIC_RELAXED),
-	    "Should have seen some push failures");
-	assert_zu_lt(0, atomic_load_zu(&pop_attempt_results[0], ATOMIC_RELAXED),
-	    "Should have seen some pop failures");
-	assert_zu_lt(0, atomic_load_zu(&pop_attempt_results[1], ATOMIC_RELAXED),
-	    "Should have seen some pop successes");
-	assert_zu_lt(0, atomic_load_zu(&dalloc_zero_slab_count, ATOMIC_RELAXED),
-	    "Expected some frees that didn't empty a slab");
-	assert_zu_lt(0,
-	    atomic_load_zu(&dalloc_nonzero_slab_count, ATOMIC_RELAXED),
-	    "expected some frees that emptied a slab");
-	assert_zu_lt(0,
-	    atomic_load_zu(&dalloc_nonempty_list_count, ATOMIC_RELAXED),
-	    "expected some frees that used the empty list");
-}
-TEST_END
-
-int
-main(void) {
-	return test_no_reentrancy(test_arena_reset, test_races, test_fork);
-}
diff --git a/test/unit/bin_batching.sh b/test/unit/bin_batching.sh
deleted file mode 100644
index fef9bdc6..00000000
--- a/test/unit/bin_batching.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/sh
-
-# This value of max_batched_size effectively requires all bins to be batched;
-# our page limits are fuzzy, but we bound slab item counts to 2**32, so we'd be
-# at multi-gigabyte minimum page sizes.
-# The reason for this sort of hacky approach is that we want to
-# allocate/deallocate PAGE/2-sized objects (to trigger the "non-empty" ->
-# "empty" and "non-empty"-> "full" transitions often, which have special
-# handling). But the value of PAGE isn't easily available in test scripts.
-export MALLOC_CONF="narenas:2,bin_shards:1-1000000000:3,max_batched_size:1000000000,remote_free_max_batch:1,remote_free_max:4"
diff --git a/test/unit/fork.c b/test/unit/fork.c
index e52d0a6c..60675b77 100644
--- a/test/unit/fork.c
+++ b/test/unit/fork.c
@@ -1,5 +1,34 @@
 #include "test/jemalloc_test.h"
-#include "test/fork.h"
+
+#ifndef _WIN32
+#	include <sys/wait.h>
+#endif
+
+#ifndef _WIN32
+static void
+wait_for_child_exit(int pid) {
+	int status;
+	while (true) {
+		if (waitpid(pid, &status, 0) == -1) {
+			test_fail("Unexpected waitpid() failure.");
+		}
+		if (WIFSIGNALED(status)) {
+			test_fail(
+			    "Unexpected child termination due to "
+			    "signal %d",
+			    WTERMSIG(status));
+			break;
+		}
+		if (WIFEXITED(status)) {
+			if (WEXITSTATUS(status) != 0) {
+				test_fail("Unexpected child exit value %d",
+				    WEXITSTATUS(status));
+			}
+			break;
+		}
+	}
+}
+#endif
 
 TEST_BEGIN(test_fork) {
 #ifndef _WIN32
@@ -37,7 +66,7 @@ TEST_BEGIN(test_fork) {
 		/* Child. */
 		_exit(0);
 	} else {
-		fork_wait_for_child_exit(pid);
+		wait_for_child_exit(pid);
 	}
 #else
 	test_skip("fork(2) is irrelevant to Windows");
@@ -60,7 +89,7 @@ do_fork_thd(void *arg) {
 		test_fail("Exec failed");
 	} else {
 		/* Parent */
-		fork_wait_for_child_exit(pid);
+		wait_for_child_exit(pid);
 	}
 	return NULL;
 }
@@ -97,7 +126,7 @@ TEST_BEGIN(test_fork_multithreaded) {
 			do_test_fork_multithreaded();
 			_exit(0);
 		} else {
-			fork_wait_for_child_exit(pid);
+			wait_for_child_exit(pid);
 		}
 	}
 #else

From 48b4ad60a7ee897c813fb987183bb13d3596814c Mon Sep 17 00:00:00 2001
From: lexprfuncall <5360361+lexprfuncall@users.noreply.github.com>
Date: Tue, 5 Aug 2025 20:39:04 -0700
Subject: [PATCH 2531/2608] Remove an orphaned comment

This was left behind when definitions of malloc_open and malloc_close
were abstracted from code that had followed.
---
 src/pages.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/pages.c b/src/pages.c
index 78f3a1b7..1daab43b 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -766,11 +766,6 @@ os_overcommits_sysctl(void) {
 #endif
 
 #ifdef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY
-/*
- * Use syscall(2) rather than {open,read,close}(2) when possible to avoid
- * reentry during bootstrapping if another library has interposed system call
- * wrappers.
- */
 static bool
 os_overcommits_proc(void) {
 	int  fd;

From 9fdc1160c5793d99f26192aee0406c653affb484 Mon Sep 17 00:00:00 2001
From: lexprfuncall <5360361+lexprfuncall@users.noreply.github.com>
Date: Thu, 21 Aug 2025 20:44:18 -0700
Subject: [PATCH 2532/2608] Handle interruptions and retries of read(2) and
 write(2)

---
 include/jemalloc/internal/malloc_io.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/include/jemalloc/internal/malloc_io.h b/include/jemalloc/internal/malloc_io.h
index 0f82f678..5e0805ed 100644
--- a/include/jemalloc/internal/malloc_io.h
+++ b/include/jemalloc/internal/malloc_io.h
@@ -96,6 +96,11 @@ malloc_write_fd(int fd, const void *buf, size_t count) {
 		    &((const byte_t *)buf)[bytes_written],
 		    count - bytes_written);
 		if (result < 0) {
+#ifndef _WIN32
+			if (errno == EINTR) {
+				continue;
+			}
+#endif
 			return result;
 		}
 		bytes_written += result;
@@ -124,6 +129,11 @@ malloc_read_fd(int fd, void *buf, size_t count) {
 		ssize_t result = malloc_read_fd_syscall(
 		    fd, &((byte_t *)buf)[bytes_read], count - bytes_read);
 		if (result < 0) {
+#ifndef _WIN32
+			if (errno == EINTR) {
+				continue;
+			}
+#endif
 			return result;
 		} else if (result == 0) {
 			break;

From 38b12427b7a832fd97739d7cfcca4081a964df2e Mon Sep 17 00:00:00 2001
From: lexprfuncall <5360361+lexprfuncall@users.noreply.github.com>
Date: Wed, 6 Aug 2025 21:32:16 -0700
Subject: [PATCH 2533/2608] Define malloc_{write,read}_fd as non-inline global
 functions

The static inline definition made more sense when these functions just
dispatched to a syscall wrapper.  Since they acquired a retry loop, a
non-inline definition makes more sense.
---
 include/jemalloc/internal/malloc_io.h | 86 ++-------------------------
 src/malloc_io.c                       | 75 +++++++++++++++++++++++
 2 files changed, 80 insertions(+), 81 deletions(-)

diff --git a/include/jemalloc/internal/malloc_io.h b/include/jemalloc/internal/malloc_io.h
index 5e0805ed..0f70c3c3 100644
--- a/include/jemalloc/internal/malloc_io.h
+++ b/include/jemalloc/internal/malloc_io.h
@@ -66,94 +66,18 @@ void malloc_cprintf(write_cb_t *write_cb, void *cbopaque, const char *format,
     ...) JEMALLOC_FORMAT_PRINTF(3, 4);
 void malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
 
-static inline ssize_t
-malloc_write_fd_syscall(int fd, const void *buf, size_t count) {
-#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_write)
-	/*
-	 * Use syscall(2) rather than write(2) when possible in order to avoid
-	 * the possibility of memory allocation within libc.  This is necessary
-	 * on FreeBSD; most operating systems do not have this problem though.
-	 *
-	 * syscall() returns long or int, depending on platform, so capture the
-	 * result in the widest plausible type to avoid compiler warnings.
-	 */
-	long result = syscall(SYS_write, fd, buf, count);
-#else
-	ssize_t result = (ssize_t)write(fd, buf,
-#	ifdef _WIN32
-	    (unsigned int)
-#	endif
-	        count);
-#endif
-	return (ssize_t)result;
-}
-
-static inline ssize_t
-malloc_write_fd(int fd, const void *buf, size_t count) {
-	size_t bytes_written = 0;
-	do {
-		ssize_t result = malloc_write_fd_syscall(fd,
-		    &((const byte_t *)buf)[bytes_written],
-		    count - bytes_written);
-		if (result < 0) {
-#ifndef _WIN32
-			if (errno == EINTR) {
-				continue;
-			}
-#endif
-			return result;
-		}
-		bytes_written += result;
-	} while (bytes_written < count);
-	return bytes_written;
-}
-
-static inline ssize_t
-malloc_read_fd_syscall(int fd, void *buf, size_t count) {
-#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read)
-	long result = syscall(SYS_read, fd, buf, count);
-#else
-	ssize_t result = read(fd, buf,
-#	ifdef _WIN32
-	    (unsigned int)
-#	endif
-	        count);
-#endif
-	return (ssize_t)result;
-}
-
-static inline ssize_t
-malloc_read_fd(int fd, void *buf, size_t count) {
-	size_t bytes_read = 0;
-	do {
-		ssize_t result = malloc_read_fd_syscall(
-		    fd, &((byte_t *)buf)[bytes_read], count - bytes_read);
-		if (result < 0) {
-#ifndef _WIN32
-			if (errno == EINTR) {
-				continue;
-			}
-#endif
-			return result;
-		} else if (result == 0) {
-			break;
-		}
-		bytes_read += result;
-	} while (bytes_read < count);
-	return bytes_read;
-}
+ssize_t malloc_write_fd(int fd, const void *buf, size_t count);
+ssize_t malloc_read_fd(int fd, void *buf, size_t count);
 
 static inline int
 malloc_open(const char *path, int flags) {
-	int fd;
 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
-	fd = (int)syscall(SYS_open, path, flags);
+	return (int)syscall(SYS_open, path, flags);
 #elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat)
-	fd = (int)syscall(SYS_openat, AT_FDCWD, path, flags);
+	return (int)syscall(SYS_openat, AT_FDCWD, path, flags);
 #else
-	fd = open(path, flags);
+	return open(path, flags);
 #endif
-	return fd;
 }
 
 static inline int
diff --git a/src/malloc_io.c b/src/malloc_io.c
index 0c5d6c03..779cdc05 100644
--- a/src/malloc_io.c
+++ b/src/malloc_io.c
@@ -760,6 +760,81 @@ malloc_printf(const char *format, ...) {
 	va_end(ap);
 }
 
+static ssize_t
+malloc_write_fd_syscall(int fd, const void *buf, size_t count) {
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_write)
+	/*
+	 * Use syscall(2) rather than write(2) when possible in order to avoid
+	 * the possibility of memory allocation within libc.  This is necessary
+	 * on FreeBSD; most operating systems do not have this problem though.
+	 *
+	 * syscall() returns long or int, depending on platform, so capture the
+	 * result in the widest plausible type to avoid compiler warnings.
+	 */
+	return (ssize_t)syscall(SYS_write, fd, buf, count);
+#else
+	return (ssize_t)write(fd, buf,
+#	ifdef _WIN32
+	    (unsigned int)
+#	endif
+	        count);
+#endif
+}
+
+ssize_t
+malloc_write_fd(int fd, const void *buf, size_t count) {
+	size_t bytes_written = 0;
+	do {
+		ssize_t result = malloc_write_fd_syscall(fd,
+		    &((const byte_t *)buf)[bytes_written],
+		    count - bytes_written);
+		if (result < 0) {
+#ifndef _WIN32
+			if (errno == EINTR) {
+				continue;
+			}
+#endif
+			return result;
+		}
+		bytes_written += result;
+	} while (bytes_written < count);
+	return bytes_written;
+}
+
+static ssize_t
+malloc_read_fd_syscall(int fd, void *buf, size_t count) {
+#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read)
+	return (ssize_t)syscall(SYS_read, fd, buf, count);
+#else
+	return (ssize_t)read(fd, buf,
+#	ifdef _WIN32
+	    (unsigned int)
+#	endif
+	        count);
+#endif
+}
+
+ssize_t
+malloc_read_fd(int fd, void *buf, size_t count) {
+	size_t bytes_read = 0;
+	do {
+		ssize_t result = malloc_read_fd_syscall(
+		    fd, &((byte_t *)buf)[bytes_read], count - bytes_read);
+		if (result < 0) {
+#ifndef _WIN32
+			if (errno == EINTR) {
+				continue;
+			}
+#endif
+			return result;
+		} else if (result == 0) {
+			break;
+		}
+		bytes_read += result;
+	} while (bytes_read < count);
+	return bytes_read;
+}
+
 /*
  * Restore normal assertion macros, in order to make it possible to compile all
  * C files as a single concatenation.

From 2a66c0be5a3727817ccf95c6150d10c19aae00f4 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Sat, 23 Aug 2025 08:53:28 -0700
Subject: [PATCH 2534/2608] [EASY][BUGFIX] Spelling and format

---
 include/jemalloc/internal/pac.h | 2 +-
 src/hpdata.c                    | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/pac.h b/include/jemalloc/internal/pac.h
index a9edc19b..a19c8b35 100644
--- a/include/jemalloc/internal/pac.h
+++ b/include/jemalloc/internal/pac.h
@@ -131,7 +131,7 @@ struct pac_thp_s {
 	 * opt_thp controls THP for user requested allocations. Settings
 	 * "always", "never" and "default" are available if THP is supported
 	 * by the OS and the default extent hooks are used:
-	 * - "always" and "never" are convered by pages_set_thp_state() in
+	 * - "always" and "never" are covered by pages_set_thp_state() in
 	 *   ehooks_default_alloc_impl().
 	 * - "default" makes no change for all the other auto arenas except
 	 *   the huge arena. For the huge arena, we might also look at
diff --git a/src/hpdata.c b/src/hpdata.c
index e9ee2738..e18e03cd 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -18,7 +18,8 @@ hpdata_age_comp(const hpdata_t *a, const hpdata_t *b) {
 
 ph_gen(, hpdata_age_heap, hpdata_t, age_link, hpdata_age_comp)
 
-    void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
+void
+hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 	hpdata_addr_set(hpdata, addr);
 	hpdata_age_set(hpdata, age);
 	hpdata->h_huge = false;

From 9442300cc3adebdbf1d518dcba990a1c971e4f2e Mon Sep 17 00:00:00 2001
From: lexprfuncall <5360361+lexprfuncall@users.noreply.github.com>
Date: Mon, 25 Aug 2025 19:39:30 -0700
Subject: [PATCH 2535/2608] Change the default page size to 64KiB on Aarch64
 Linux

This updates the configuration script to set the default page size to
64KiB on Aarch64 Linux.  This is motivated by compatibility as a build
configured for a 64KiB page will work on kernels that use the smaller
4KiB or 16KiB pages, whereas the reverse is not true.

To make the configured page size setting more visible, the script now
displays the page size when printing the configuration results.

Users that want to override the page size in to choose a smaller value
can still do so with the --with-lg-pagesize configuration option.
---
 configure.ac | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/configure.ac b/configure.ac
index ce5c8adc..dd0c3cc8 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1990,6 +1990,11 @@ case "${host}" in
         LG_PAGE=14
       fi
       ;;
+  aarch64-unknown-linux-*)
+      if test "x$LG_PAGE" = "xdetect"; then
+        LG_PAGE=16
+      fi
+      ;;
 esac
 if test "x$LG_PAGE" = "xdetect"; then
   AC_CACHE_CHECK([LG_PAGE],
@@ -3077,6 +3082,8 @@ AC_MSG_RESULT([INCLUDEDIR         : ${INCLUDEDIR}])
 AC_MSG_RESULT([LIBDIR             : ${LIBDIR}])
 AC_MSG_RESULT([MANDIR             : ${MANDIR}])
 AC_MSG_RESULT([])
+AC_MSG_RESULT([LG_PAGE            : ${LG_PAGE}])
+AC_MSG_RESULT([])
 AC_MSG_RESULT([srcroot            : ${srcroot}])
 AC_MSG_RESULT([abs_srcroot        : ${abs_srcroot}])
 AC_MSG_RESULT([objroot            : ${objroot}])

From 5d5f76ee015696e0e086650e85722ceca9d191c1 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Tue, 26 Aug 2025 15:15:08 -0700
Subject: [PATCH 2536/2608] Remove pidfd_open call handling and rely on
 PIDFD_SELF

---
 configure.ac                      |  3 ++-
 include/jemalloc/internal/pages.h |  1 -
 src/jemalloc.c                    |  1 -
 src/pages.c                       | 41 +++++++------------------------
 4 files changed, 11 insertions(+), 35 deletions(-)

diff --git a/configure.ac b/configure.ac
index dd0c3cc8..8ea092d6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2638,10 +2638,11 @@ if test "x${je_cv_madvise}" = "xyes" ; then
 
   dnl Check for process_madvise
   JE_COMPILABLE([process_madvise(2)], [
+#include <sys/pidfd.h>
 #include <sys/syscall.h>
 #include <unistd.h>
 ], [
-	syscall(SYS_process_madvise, 0, (void *)0, 0, 0, 0);
+	syscall(SYS_process_madvise, PIDFD_SELF, (void *)0, 0, 0, 0);
 ], [je_cv_process_madvise])
   if test "x${je_cv_process_madvise}" = "xyes" ; then
     AC_DEFINE([JEMALLOC_HAVE_PROCESS_MADVISE], [ ], [ ])
diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index b0cc5bba..31909934 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -127,6 +127,5 @@ bool pages_boot(void);
 void pages_set_thp_state(void *ptr, size_t size);
 void pages_mark_guards(void *head, void *tail);
 void pages_unmark_guards(void *head, void *tail);
-void pages_postfork_child(void);
 
 #endif /* JEMALLOC_INTERNAL_PAGES_EXTERNS_H */
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 9f59a781..0fe69a1e 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -4535,7 +4535,6 @@ jemalloc_postfork_child(void) {
 	malloc_mutex_postfork_child(tsd_tsdn(tsd), &arenas_lock);
 	tcache_postfork_child(tsd_tsdn(tsd));
 	ctl_postfork_child(tsd_tsdn(tsd));
-	pages_postfork_child();
 }
 
 /******************************************************************************/
diff --git a/src/pages.c b/src/pages.c
index 1daab43b..44c57b28 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -621,7 +621,11 @@ pages_dodump(void *addr, size_t size) {
 #ifdef JEMALLOC_HAVE_PROCESS_MADVISE
 #	include <sys/mman.h>
 #	include <sys/syscall.h>
-static atomic_i_t process_madvise_pidfd = ATOMIC_INIT(-1);
+
+#ifndef PIDFD_SELF
+#define PIDFD_SELF -10000
+#endif
+
 static atomic_b_t process_madvise_gate = ATOMIC_INIT(true);
 
 static bool
@@ -650,33 +654,17 @@ pages_purge_process_madvise_impl(
 	if (!atomic_load_b(&process_madvise_gate, ATOMIC_RELAXED)) {
 		return true;
 	}
-	int pid_fd = atomic_load_i(&process_madvise_pidfd, ATOMIC_RELAXED);
-	while (pid_fd == -1) {
-		int newfd = (int) syscall(SYS_pidfd_open, getpid(), 0);
-		if (newfd == -1) {
-			return true;
-		}
-		if (!atomic_compare_exchange_strong_i(&process_madvise_pidfd,
-						      &pid_fd, newfd,
-						      ATOMIC_RELAXED,
-						      ATOMIC_RELAXED)) {
-			/* Someone else set the fd, so we close ours */
-			assert(pid_fd != -1);
-			close(newfd);
-		} else {
-			pid_fd = newfd;
-		}
-	}
 
 	/*
 	 * TODO: remove this save/restore of errno after supporting errno
 	 * preservation for free() call properly.
 	 */
 	int saved_errno = get_errno();
-	size_t purged_bytes = (size_t)syscall(JE_SYS_PROCESS_MADVISE_NR, pid_fd,
-	    (struct iovec *)vec, vec_len, MADV_DONTNEED, 0);
+	size_t purged_bytes = (size_t)syscall(JE_SYS_PROCESS_MADVISE_NR,
+	    PIDFD_SELF, (struct iovec *)vec, vec_len, MADV_DONTNEED, 0);
 	if (purged_bytes == (size_t) -1) {
-		if (errno == EPERM || errno == EINVAL || errno == ENOSYS) {
+		if (errno == EPERM || errno == EINVAL || errno == ENOSYS
+		    || errno == EBADF) {
 			/* Process madvise not supported the way we need it. */
 			atomic_store_b(&process_madvise_gate, false,
 				       ATOMIC_RELAXED);
@@ -687,15 +675,6 @@ pages_purge_process_madvise_impl(
 	return purged_bytes != total_bytes;
 }
 
-void pages_postfork_child(void) {
-	/* Reset the file descriptor we inherited from parent process */
-	int pid_fd = atomic_load_i(&process_madvise_pidfd, ATOMIC_RELAXED);
-	if (pid_fd != -1) {
-		atomic_store_i(&process_madvise_pidfd, -1, ATOMIC_RELAXED);
-		close(pid_fd);
-	}
-}
-
 #else
 
 static bool
@@ -710,8 +689,6 @@ pages_purge_process_madvise_impl(
 	return true;
 }
 
-void pages_postfork_child(void) {}
-
 #endif
 
 bool

From 5a634a8d0a1d853fc9905bc7b8908895f147322a Mon Sep 17 00:00:00 2001
From: Carl Shapiro <cshapiro@meta.com>
Date: Wed, 27 Aug 2025 16:48:40 -0700
Subject: [PATCH 2537/2608] Always use pthread_equal to compare thread IDs

This change replaces direct comparisons of Pthread thread IDs with
calls to pthread_equal.  Directly comparing thread IDs is neither
portable nor reliable since a thread ID is defined as an opaque type
that can be implemented using a structure.
---
 src/jemalloc.c | 3 ++-
 src/tsd.c      | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 0fe69a1e..5c77621c 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -220,7 +220,8 @@ static uint8_t malloc_slow_flags;
 /* Used to let the initializing thread recursively allocate. */
 #	define NO_INITIALIZER ((unsigned long)0)
 #	define INITIALIZER pthread_self()
-#	define IS_INITIALIZER (malloc_initializer == pthread_self())
+#	define IS_INITIALIZER                                                 \
+		(pthread_equal(malloc_initializer, pthread_self()))
 static pthread_t malloc_initializer = NO_INITIALIZER;
 #else
 #	define NO_INITIALIZER false
diff --git a/src/tsd.c b/src/tsd.c
index 20042c2d..30acad93 100644
--- a/src/tsd.c
+++ b/src/tsd.c
@@ -528,7 +528,7 @@ tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block) {
 	/* Check whether this thread has already inserted into the list. */
 	malloc_mutex_lock(TSDN_NULL, &head->lock);
 	ql_foreach (iter, &head->blocks, link) {
-		if (iter->thread == self) {
+		if (pthread_equal(iter->thread, self)) {
 			malloc_mutex_unlock(TSDN_NULL, &head->lock);
 			return iter->data;
 		}

From c51949ea3ee75c8e417b59b89334f225775d4e64 Mon Sep 17 00:00:00 2001
From: lexprfuncall <5360361+lexprfuncall@users.noreply.github.com>
Date: Tue, 26 Aug 2025 13:32:57 -0700
Subject: [PATCH 2538/2608] Update config.guess and config.sub to the latest
 versions

These files need to be refreshed periodically to support new platform
types.

The following command was used to retrieve the updates

curl -L -O https://git.savannah.gnu.org/cgit/config.git/plain/config.guess
curl -L -O https://git.savannah.gnu.org/cgit/config.git/plain/config.sub

Closes: #2814
---
 build-aux/config.guess | 1345 ++++++++++++++++++++++------------------
 build-aux/config.sub   | 1015 ++++++++++++++++++++++--------
 2 files changed, 1493 insertions(+), 867 deletions(-)

diff --git a/build-aux/config.guess b/build-aux/config.guess
index f7727026..a9d01fde 100755
--- a/build-aux/config.guess
+++ b/build-aux/config.guess
@@ -1,12 +1,14 @@
 #! /bin/sh
 # Attempt to guess a canonical system name.
-#   Copyright 1992-2021 Free Software Foundation, Inc.
+#   Copyright 1992-2025 Free Software Foundation, Inc.
 
-timestamp='2021-01-01'
+# shellcheck disable=SC2006,SC2268 # see below for rationale
+
+timestamp='2025-07-10'
 
 # This file is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or
+# the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
@@ -32,12 +34,20 @@ timestamp='2021-01-01'
 # Please send patches to <config-patches@gnu.org>.
 
 
-me=$(echo "$0" | sed -e 's,.*/,,')
+# The "shellcheck disable" line above the timestamp inhibits complaints
+# about features and limitations of the classic Bourne shell that were
+# superseded or lifted in POSIX.  However, this script identifies a wide
+# variety of pre-POSIX systems that do not have POSIX shells at all, and
+# even some reasonably current systems (Solaris 10 as case-in-point) still
+# have a pre-POSIX /bin/sh.
+
+
+me=`echo "$0" | sed -e 's,.*/,,'`
 
 usage="\
 Usage: $0 [OPTION]
 
-Output the configuration name of the system \`$me' is run on.
+Output the configuration name of the system '$me' is run on.
 
 Options:
   -h, --help         print this help, then exit
@@ -50,13 +60,13 @@ version="\
 GNU config.guess ($timestamp)
 
 Originally written by Per Bothner.
-Copyright 1992-2021 Free Software Foundation, Inc.
+Copyright 1992-2025 Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
 
 help="
-Try \`$me --help' for more information."
+Try '$me --help' for more information."
 
 # Parse command line
 while test $# -gt 0 ; do
@@ -84,13 +94,16 @@ if test $# != 0; then
   exit 1
 fi
 
+# Just in case it came from the environment.
+GUESS=
+
 # CC_FOR_BUILD -- compiler used by this script. Note that the use of a
 # compiler to aid in system detection is discouraged as it requires
 # temporary files to be created and, as you can see below, it is a
 # headache to deal with in a portable fashion.
 
-# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still
-# use `HOST_CC' if defined, but it is deprecated.
+# Historically, 'CC_FOR_BUILD' used to be named 'HOST_CC'. We still
+# use 'HOST_CC' if defined, but it is deprecated.
 
 # Portable tmp directory creation inspired by the Autoconf team.
 
@@ -102,17 +115,17 @@ set_cc_for_build() {
     # prevent multiple calls if $tmp is already set
     test "$tmp" && return 0
     : "${TMPDIR=/tmp}"
-    # shellcheck disable=SC2039
-    { tmp=$( (umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null) && test -n "$tmp" && test -d "$tmp" ; } ||
+    # shellcheck disable=SC2039,SC3028
+    { tmp=`(umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null` && test -n "$tmp" && test -d "$tmp" ; } ||
 	{ test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir "$tmp" 2>/dev/null) ; } ||
 	{ tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir "$tmp" 2>/dev/null) && echo "Warning: creating insecure temp directory" >&2 ; } ||
 	{ echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; }
     dummy=$tmp/dummy
     case ${CC_FOR_BUILD-},${HOST_CC-},${CC-} in
 	,,)    echo "int x;" > "$dummy.c"
-	       for driver in cc gcc c89 c99 ; do
+	       for driver in cc gcc c17 c99 c89 ; do
 		   if ($driver -c -o "$dummy.o" "$dummy.c") >/dev/null 2>&1 ; then
-		       CC_FOR_BUILD="$driver"
+		       CC_FOR_BUILD=$driver
 		       break
 		   fi
 	       done
@@ -131,17 +144,20 @@ if test -f /.attbin/uname ; then
 	PATH=$PATH:/.attbin ; export PATH
 fi
 
-UNAME_MACHINE=$( (uname -m) 2>/dev/null) || UNAME_MACHINE=unknown
-UNAME_RELEASE=$( (uname -r) 2>/dev/null) || UNAME_RELEASE=unknown
-UNAME_SYSTEM=$( (uname -s) 2>/dev/null) || UNAME_SYSTEM=unknown
-UNAME_VERSION=$( (uname -v) 2>/dev/null) || UNAME_VERSION=unknown
+UNAME_MACHINE=`(uname -m) 2>/dev/null` || UNAME_MACHINE=unknown
+UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown
+UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown
+UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown
 
-case "$UNAME_SYSTEM" in
+case $UNAME_SYSTEM in
 Linux|GNU|GNU/*)
 	LIBC=unknown
 
 	set_cc_for_build
 	cat <<-EOF > "$dummy.c"
+	#if defined(__ANDROID__)
+	LIBC=android
+	#else
 	#include <features.h>
 	#if defined(__UCLIBC__)
 	LIBC=uclibc
@@ -149,6 +165,8 @@ Linux|GNU|GNU/*)
 	LIBC=dietlibc
 	#elif defined(__GLIBC__)
 	LIBC=gnu
+	#elif defined(__LLVM_LIBC__)
+	LIBC=llvm
 	#else
 	#include <stdarg.h>
 	/* First heuristic to detect musl libc.  */
@@ -156,8 +174,10 @@ Linux|GNU|GNU/*)
 	LIBC=musl
 	#endif
 	#endif
+	#endif
 	EOF
-	eval "$($CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^LIBC' | sed 's, ,,g')"
+	cc_set_libc=`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^LIBC' | sed 's, ,,g'`
+	eval "$cc_set_libc"
 
 	# Second heuristic to detect musl libc.
 	if [ "$LIBC" = unknown ] &&
@@ -176,7 +196,7 @@ esac
 
 # Note: order is significant - the case branches are not exclusive.
 
-case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
+case $UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION in
     *:NetBSD:*:*)
 	# NetBSD (nbsd) targets should (where applicable) match one or
 	# more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*,
@@ -188,12 +208,11 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
 	#
 	# Note: NetBSD doesn't particularly care about the vendor
 	# portion of the name.  We always set it to "unknown".
-	sysctl="sysctl -n hw.machine_arch"
-	UNAME_MACHINE_ARCH=$( (uname -p 2>/dev/null || \
-	    "/sbin/$sysctl" 2>/dev/null || \
-	    "/usr/sbin/$sysctl" 2>/dev/null || \
-	    echo unknown))
-	case "$UNAME_MACHINE_ARCH" in
+	UNAME_MACHINE_ARCH=`(uname -p 2>/dev/null || \
+	    /sbin/sysctl -n hw.machine_arch 2>/dev/null || \
+	    /usr/sbin/sysctl -n hw.machine_arch 2>/dev/null || \
+	    echo unknown)`
+	case $UNAME_MACHINE_ARCH in
 	    aarch64eb) machine=aarch64_be-unknown ;;
 	    armeb) machine=armeb-unknown ;;
 	    arm*) machine=arm-unknown ;;
@@ -201,15 +220,15 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
 	    sh3eb) machine=sh-unknown ;;
 	    sh5el) machine=sh5le-unknown ;;
 	    earmv*)
-		arch=$(echo "$UNAME_MACHINE_ARCH" | sed -e 's,^e\(armv[0-9]\).*$,\1,')
-		endian=$(echo "$UNAME_MACHINE_ARCH" | sed -ne 's,^.*\(eb\)$,\1,p')
-		machine="${arch}${endian}"-unknown
+		arch=`echo "$UNAME_MACHINE_ARCH" | sed -e 's,^e\(armv[0-9]\).*$,\1,'`
+		endian=`echo "$UNAME_MACHINE_ARCH" | sed -ne 's,^.*\(eb\)$,\1,p'`
+		machine=${arch}${endian}-unknown
 		;;
-	    *) machine="$UNAME_MACHINE_ARCH"-unknown ;;
+	    *) machine=$UNAME_MACHINE_ARCH-unknown ;;
 	esac
 	# The Operating System including object format, if it has switched
 	# to ELF recently (or will in the future) and ABI.
-	case "$UNAME_MACHINE_ARCH" in
+	case $UNAME_MACHINE_ARCH in
 	    earm*)
 		os=netbsdelf
 		;;
@@ -230,10 +249,10 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
 		;;
 	esac
 	# Determine ABI tags.
-	case "$UNAME_MACHINE_ARCH" in
+	case $UNAME_MACHINE_ARCH in
 	    earm*)
 		expr='s/^earmv[0-9]/-eabi/;s/eb$//'
-		abi=$(echo "$UNAME_MACHINE_ARCH" | sed -e "$expr")
+		abi=`echo "$UNAME_MACHINE_ARCH" | sed -e "$expr"`
 		;;
 	esac
 	# The OS release
@@ -241,76 +260,82 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
 	# thus, need a distinct triplet. However, they do not need
 	# kernel version information, so it can be replaced with a
 	# suitable tag, in the style of linux-gnu.
-	case "$UNAME_VERSION" in
+	case $UNAME_VERSION in
 	    Debian*)
 		release='-gnu'
 		;;
 	    *)
-		release=$(echo "$UNAME_RELEASE" | sed -e 's/[-_].*//' | cut -d. -f1,2)
+		release=`echo "$UNAME_RELEASE" | sed -e 's/[-_].*//' | cut -d. -f1,2`
 		;;
 	esac
 	# Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM:
 	# contains redundant information, the shorter form:
 	# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
-	echo "$machine-${os}${release}${abi-}"
-	exit ;;
+	GUESS=$machine-${os}${release}${abi-}
+	;;
     *:Bitrig:*:*)
-	UNAME_MACHINE_ARCH=$(arch | sed 's/Bitrig.//')
-	echo "$UNAME_MACHINE_ARCH"-unknown-bitrig"$UNAME_RELEASE"
-	exit ;;
+	UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'`
+	GUESS=$UNAME_MACHINE_ARCH-unknown-bitrig$UNAME_RELEASE
+	;;
     *:OpenBSD:*:*)
-	UNAME_MACHINE_ARCH=$(arch | sed 's/OpenBSD.//')
-	echo "$UNAME_MACHINE_ARCH"-unknown-openbsd"$UNAME_RELEASE"
-	exit ;;
+	UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'`
+	GUESS=$UNAME_MACHINE_ARCH-unknown-openbsd$UNAME_RELEASE
+	;;
+    *:SecBSD:*:*)
+	UNAME_MACHINE_ARCH=`arch | sed 's/SecBSD.//'`
+	GUESS=$UNAME_MACHINE_ARCH-unknown-secbsd$UNAME_RELEASE
+	;;
     *:LibertyBSD:*:*)
-	UNAME_MACHINE_ARCH=$(arch | sed 's/^.*BSD\.//')
-	echo "$UNAME_MACHINE_ARCH"-unknown-libertybsd"$UNAME_RELEASE"
-	exit ;;
+	UNAME_MACHINE_ARCH=`arch | sed 's/^.*BSD\.//'`
+	GUESS=$UNAME_MACHINE_ARCH-unknown-libertybsd$UNAME_RELEASE
+	;;
     *:MidnightBSD:*:*)
-	echo "$UNAME_MACHINE"-unknown-midnightbsd"$UNAME_RELEASE"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-midnightbsd$UNAME_RELEASE
+	;;
     *:ekkoBSD:*:*)
-	echo "$UNAME_MACHINE"-unknown-ekkobsd"$UNAME_RELEASE"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-ekkobsd$UNAME_RELEASE
+	;;
     *:SolidBSD:*:*)
-	echo "$UNAME_MACHINE"-unknown-solidbsd"$UNAME_RELEASE"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-solidbsd$UNAME_RELEASE
+	;;
     *:OS108:*:*)
-	echo "$UNAME_MACHINE"-unknown-os108_"$UNAME_RELEASE"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-os108_$UNAME_RELEASE
+	;;
     macppc:MirBSD:*:*)
-	echo powerpc-unknown-mirbsd"$UNAME_RELEASE"
-	exit ;;
+	GUESS=powerpc-unknown-mirbsd$UNAME_RELEASE
+	;;
     *:MirBSD:*:*)
-	echo "$UNAME_MACHINE"-unknown-mirbsd"$UNAME_RELEASE"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-mirbsd$UNAME_RELEASE
+	;;
     *:Sortix:*:*)
-	echo "$UNAME_MACHINE"-unknown-sortix
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-sortix
+	;;
     *:Twizzler:*:*)
-	echo "$UNAME_MACHINE"-unknown-twizzler
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-twizzler
+	;;
     *:Redox:*:*)
-	echo "$UNAME_MACHINE"-unknown-redox
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-redox
+	;;
     mips:OSF1:*.*)
-	echo mips-dec-osf1
-	exit ;;
+	GUESS=mips-dec-osf1
+	;;
     alpha:OSF1:*:*)
+	# Reset EXIT trap before exiting to avoid spurious non-zero exit code.
+	trap '' 0
 	case $UNAME_RELEASE in
 	*4.0)
-		UNAME_RELEASE=$(/usr/sbin/sizer -v | awk '{print $3}')
+		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'`
 		;;
 	*5.*)
-		UNAME_RELEASE=$(/usr/sbin/sizer -v | awk '{print $4}')
+		UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'`
 		;;
 	esac
 	# According to Compaq, /usr/sbin/psrinfo has been available on
 	# OSF/1 and Tru64 systems produced since 1995.  I hope that
 	# covers most systems running today.  This code pipes the CPU
 	# types through head -n 1, so we only detect the type of CPU 0.
-	ALPHA_CPU_TYPE=$(/usr/sbin/psrinfo -v | sed -n -e 's/^  The alpha \(.*\) processor.*$/\1/p' | head -n 1)
-	case "$ALPHA_CPU_TYPE" in
+	ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^  The alpha \(.*\) processor.*$/\1/p' | head -n 1`
+	case $ALPHA_CPU_TYPE in
 	    "EV4 (21064)")
 		UNAME_MACHINE=alpha ;;
 	    "EV4.5 (21064)")
@@ -347,68 +372,69 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
 	# A Tn.n version is a released field test version.
 	# A Xn.n version is an unreleased experimental baselevel.
 	# 1.2 uses "1.2" for uname -r.
-	echo "$UNAME_MACHINE"-dec-osf"$(echo "$UNAME_RELEASE" | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz)"
-	# Reset EXIT trap before exiting to avoid spurious non-zero exit code.
-	exitcode=$?
-	trap '' 0
-	exit $exitcode ;;
+	OSF_REL=`echo "$UNAME_RELEASE" | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`
+	GUESS=$UNAME_MACHINE-dec-osf$OSF_REL
+	;;
     Amiga*:UNIX_System_V:4.0:*)
-	echo m68k-unknown-sysv4
-	exit ;;
+	GUESS=m68k-unknown-sysv4
+	;;
     *:[Aa]miga[Oo][Ss]:*:*)
-	echo "$UNAME_MACHINE"-unknown-amigaos
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-amigaos
+	;;
     *:[Mm]orph[Oo][Ss]:*:*)
-	echo "$UNAME_MACHINE"-unknown-morphos
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-morphos
+	;;
     *:OS/390:*:*)
-	echo i370-ibm-openedition
-	exit ;;
+	GUESS=i370-ibm-openedition
+	;;
     *:z/VM:*:*)
-	echo s390-ibm-zvmoe
-	exit ;;
+	GUESS=s390-ibm-zvmoe
+	;;
     *:OS400:*:*)
-	echo powerpc-ibm-os400
-	exit ;;
+	GUESS=powerpc-ibm-os400
+	;;
     arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
-	echo arm-acorn-riscix"$UNAME_RELEASE"
-	exit ;;
+	GUESS=arm-acorn-riscix$UNAME_RELEASE
+	;;
     arm*:riscos:*:*|arm*:RISCOS:*:*)
-	echo arm-unknown-riscos
-	exit ;;
+	GUESS=arm-unknown-riscos
+	;;
     SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*)
-	echo hppa1.1-hitachi-hiuxmpp
-	exit ;;
+	GUESS=hppa1.1-hitachi-hiuxmpp
+	;;
     Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*)
 	# akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE.
-	if test "$( (/bin/universe) 2>/dev/null)" = att ; then
-		echo pyramid-pyramid-sysv3
-	else
-		echo pyramid-pyramid-bsd
-	fi
-	exit ;;
+	case `(/bin/universe) 2>/dev/null` in
+	    att) GUESS=pyramid-pyramid-sysv3 ;;
+	    *)   GUESS=pyramid-pyramid-bsd   ;;
+	esac
+	;;
     NILE*:*:*:dcosx)
-	echo pyramid-pyramid-svr4
-	exit ;;
+	GUESS=pyramid-pyramid-svr4
+	;;
     DRS?6000:unix:4.0:6*)
-	echo sparc-icl-nx6
-	exit ;;
+	GUESS=sparc-icl-nx6
+	;;
     DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*)
-	case $(/usr/bin/uname -p) in
-	    sparc) echo sparc-icl-nx7; exit ;;
-	esac ;;
+	case `/usr/bin/uname -p` in
+	    sparc) GUESS=sparc-icl-nx7 ;;
+	esac
+	;;
     s390x:SunOS:*:*)
-	echo "$UNAME_MACHINE"-ibm-solaris2"$(echo "$UNAME_RELEASE" | sed -e 's/[^.]*//')"
-	exit ;;
+	SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`
+	GUESS=$UNAME_MACHINE-ibm-solaris2$SUN_REL
+	;;
     sun4H:SunOS:5.*:*)
-	echo sparc-hal-solaris2"$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*//')"
-	exit ;;
+	SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`
+	GUESS=sparc-hal-solaris2$SUN_REL
+	;;
     sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
-	echo sparc-sun-solaris2"$(echo "$UNAME_RELEASE" | sed -e 's/[^.]*//')"
-	exit ;;
+	SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`
+	GUESS=sparc-sun-solaris2$SUN_REL
+	;;
     i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*)
-	echo i386-pc-auroraux"$UNAME_RELEASE"
-	exit ;;
+	GUESS=i386-pc-auroraux$UNAME_RELEASE
+	;;
     i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
 	set_cc_for_build
 	SUN_ARCH=i386
@@ -417,47 +443,50 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
 	# This test works for both compilers.
 	if test "$CC_FOR_BUILD" != no_compiler_found; then
 	    if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \
-		(CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
+		(CCOPTS="" $CC_FOR_BUILD -m64 -E - 2>/dev/null) | \
 		grep IS_64BIT_ARCH >/dev/null
 	    then
 		SUN_ARCH=x86_64
 	    fi
 	fi
-	echo "$SUN_ARCH"-pc-solaris2"$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*//')"
-	exit ;;
+	SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`
+	GUESS=$SUN_ARCH-pc-solaris2$SUN_REL
+	;;
     sun4*:SunOS:6*:*)
 	# According to config.sub, this is the proper way to canonicalize
 	# SunOS6.  Hard to guess exactly what SunOS6 will be like, but
 	# it's likely to be more like Solaris than SunOS4.
-	echo sparc-sun-solaris3"$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*//')"
-	exit ;;
+	SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`
+	GUESS=sparc-sun-solaris3$SUN_REL
+	;;
     sun4*:SunOS:*:*)
-	case "$(/usr/bin/arch -k)" in
+	case `/usr/bin/arch -k` in
 	    Series*|S4*)
-		UNAME_RELEASE=$(uname -v)
+		UNAME_RELEASE=`uname -v`
 		;;
 	esac
-	# Japanese Language versions have a version number like `4.1.3-JL'.
-	echo sparc-sun-sunos"$(echo "$UNAME_RELEASE"|sed -e 's/-/_/')"
-	exit ;;
+	# Japanese Language versions have a version number like '4.1.3-JL'.
+	SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/-/_/'`
+	GUESS=sparc-sun-sunos$SUN_REL
+	;;
     sun3*:SunOS:*:*)
-	echo m68k-sun-sunos"$UNAME_RELEASE"
-	exit ;;
+	GUESS=m68k-sun-sunos$UNAME_RELEASE
+	;;
     sun*:*:4.2BSD:*)
-	UNAME_RELEASE=$( (sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null)
+	UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null`
 	test "x$UNAME_RELEASE" = x && UNAME_RELEASE=3
-	case "$(/bin/arch)" in
+	case `/bin/arch` in
 	    sun3)
-		echo m68k-sun-sunos"$UNAME_RELEASE"
+		GUESS=m68k-sun-sunos$UNAME_RELEASE
 		;;
 	    sun4)
-		echo sparc-sun-sunos"$UNAME_RELEASE"
+		GUESS=sparc-sun-sunos$UNAME_RELEASE
 		;;
 	esac
-	exit ;;
+	;;
     aushp:SunOS:*:*)
-	echo sparc-auspex-sunos"$UNAME_RELEASE"
-	exit ;;
+	GUESS=sparc-auspex-sunos$UNAME_RELEASE
+	;;
     # The situation for MiNT is a little confusing.  The machine name
     # can be virtually everything (everything which is not
     # "atarist" or "atariste" at least should have a processor
@@ -467,41 +496,41 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
     # MiNT.  But MiNT is downward compatible to TOS, so this should
     # be no problem.
     atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
-	echo m68k-atari-mint"$UNAME_RELEASE"
-	exit ;;
+	GUESS=m68k-atari-mint$UNAME_RELEASE
+	;;
     atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
-	echo m68k-atari-mint"$UNAME_RELEASE"
-	exit ;;
+	GUESS=m68k-atari-mint$UNAME_RELEASE
+	;;
     *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
-	echo m68k-atari-mint"$UNAME_RELEASE"
-	exit ;;
+	GUESS=m68k-atari-mint$UNAME_RELEASE
+	;;
     milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
-	echo m68k-milan-mint"$UNAME_RELEASE"
-	exit ;;
+	GUESS=m68k-milan-mint$UNAME_RELEASE
+	;;
     hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
-	echo m68k-hades-mint"$UNAME_RELEASE"
-	exit ;;
+	GUESS=m68k-hades-mint$UNAME_RELEASE
+	;;
     *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
-	echo m68k-unknown-mint"$UNAME_RELEASE"
-	exit ;;
+	GUESS=m68k-unknown-mint$UNAME_RELEASE
+	;;
     m68k:machten:*:*)
-	echo m68k-apple-machten"$UNAME_RELEASE"
-	exit ;;
+	GUESS=m68k-apple-machten$UNAME_RELEASE
+	;;
     powerpc:machten:*:*)
-	echo powerpc-apple-machten"$UNAME_RELEASE"
-	exit ;;
+	GUESS=powerpc-apple-machten$UNAME_RELEASE
+	;;
     RISC*:Mach:*:*)
-	echo mips-dec-mach_bsd4.3
-	exit ;;
+	GUESS=mips-dec-mach_bsd4.3
+	;;
     RISC*:ULTRIX:*:*)
-	echo mips-dec-ultrix"$UNAME_RELEASE"
-	exit ;;
+	GUESS=mips-dec-ultrix$UNAME_RELEASE
+	;;
     VAX*:ULTRIX*:*:*)
-	echo vax-dec-ultrix"$UNAME_RELEASE"
-	exit ;;
+	GUESS=vax-dec-ultrix$UNAME_RELEASE
+	;;
     2020:CLIX:*:* | 2430:CLIX:*:*)
-	echo clipper-intergraph-clix"$UNAME_RELEASE"
-	exit ;;
+	GUESS=clipper-intergraph-clix$UNAME_RELEASE
+	;;
     mips:*:*:UMIPS | mips:*:*:RISCos)
 	set_cc_for_build
 	sed 's/^	//' << EOF > "$dummy.c"
@@ -526,85 +555,87 @@ case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
 	}
 EOF
 	$CC_FOR_BUILD -o "$dummy" "$dummy.c" &&
-	  dummyarg=$(echo "$UNAME_RELEASE" | sed -n 's/\([0-9]*\).*/\1/p') &&
-	  SYSTEM_NAME=$("$dummy" "$dummyarg") &&
+	  dummyarg=`echo "$UNAME_RELEASE" | sed -n 's/\([0-9]*\).*/\1/p'` &&
+	  SYSTEM_NAME=`"$dummy" "$dummyarg"` &&
 	    { echo "$SYSTEM_NAME"; exit; }
-	echo mips-mips-riscos"$UNAME_RELEASE"
-	exit ;;
+	GUESS=mips-mips-riscos$UNAME_RELEASE
+	;;
     Motorola:PowerMAX_OS:*:*)
-	echo powerpc-motorola-powermax
-	exit ;;
+	GUESS=powerpc-motorola-powermax
+	;;
     Motorola:*:4.3:PL8-*)
-	echo powerpc-harris-powermax
-	exit ;;
+	GUESS=powerpc-harris-powermax
+	;;
     Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*)
-	echo powerpc-harris-powermax
-	exit ;;
+	GUESS=powerpc-harris-powermax
+	;;
     Night_Hawk:Power_UNIX:*:*)
-	echo powerpc-harris-powerunix
-	exit ;;
+	GUESS=powerpc-harris-powerunix
+	;;
     m88k:CX/UX:7*:*)
-	echo m88k-harris-cxux7
-	exit ;;
+	GUESS=m88k-harris-cxux7
+	;;
     m88k:*:4*:R4*)
-	echo m88k-motorola-sysv4
-	exit ;;
+	GUESS=m88k-motorola-sysv4
+	;;
     m88k:*:3*:R3*)
-	echo m88k-motorola-sysv3
-	exit ;;
+	GUESS=m88k-motorola-sysv3
+	;;
     AViiON:dgux:*:*)
 	# DG/UX returns AViiON for all architectures
-	UNAME_PROCESSOR=$(/usr/bin/uname -p)
+	UNAME_PROCESSOR=`/usr/bin/uname -p`
 	if test "$UNAME_PROCESSOR" = mc88100 || test "$UNAME_PROCESSOR" = mc88110
 	then
 	    if test "$TARGET_BINARY_INTERFACE"x = m88kdguxelfx || \
 	       test "$TARGET_BINARY_INTERFACE"x = x
 	    then
-		echo m88k-dg-dgux"$UNAME_RELEASE"
+		GUESS=m88k-dg-dgux$UNAME_RELEASE
 	    else
-		echo m88k-dg-dguxbcs"$UNAME_RELEASE"
+		GUESS=m88k-dg-dguxbcs$UNAME_RELEASE
 	    fi
 	else
-	    echo i586-dg-dgux"$UNAME_RELEASE"
+	    GUESS=i586-dg-dgux$UNAME_RELEASE
 	fi
-	exit ;;
+	;;
     M88*:DolphinOS:*:*)	# DolphinOS (SVR3)
-	echo m88k-dolphin-sysv3
-	exit ;;
+	GUESS=m88k-dolphin-sysv3
+	;;
     M88*:*:R3*:*)
 	# Delta 88k system running SVR3
-	echo m88k-motorola-sysv3
-	exit ;;
+	GUESS=m88k-motorola-sysv3
+	;;
     XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3)
-	echo m88k-tektronix-sysv3
-	exit ;;
+	GUESS=m88k-tektronix-sysv3
+	;;
     Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD)
-	echo m68k-tektronix-bsd
-	exit ;;
+	GUESS=m68k-tektronix-bsd
+	;;
     *:IRIX*:*:*)
-	echo mips-sgi-irix"$(echo "$UNAME_RELEASE"|sed -e 's/-/_/g')"
-	exit ;;
+	IRIX_REL=`echo "$UNAME_RELEASE" | sed -e 's/-/_/g'`
+	GUESS=mips-sgi-irix$IRIX_REL
+	;;
     ????????:AIX?:[12].1:2)   # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX.
-	echo romp-ibm-aix     # uname -m gives an 8 hex-code CPU id
-	exit ;;               # Note that: echo "'$(uname -s)'" gives 'AIX '
+	GUESS=romp-ibm-aix    # uname -m gives an 8 hex-code CPU id
+	;;                    # Note that: echo "'`uname -s`'" gives 'AIX '
     i*86:AIX:*:*)
-	echo i386-ibm-aix
-	exit ;;
+	GUESS=i386-ibm-aix
+	;;
     ia64:AIX:*:*)
 	if test -x /usr/bin/oslevel ; then
-		IBM_REV=$(/usr/bin/oslevel)
+		IBM_REV=`/usr/bin/oslevel`
 	else
-		IBM_REV="$UNAME_VERSION.$UNAME_RELEASE"
+		IBM_REV=$UNAME_VERSION.$UNAME_RELEASE
 	fi
-	echo "$UNAME_MACHINE"-ibm-aix"$IBM_REV"
-	exit ;;
+	GUESS=$UNAME_MACHINE-ibm-aix$IBM_REV
+	;;
     *:AIX:2:3)
 	if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then
 		set_cc_for_build
 		sed 's/^		//' << EOF > "$dummy.c"
 		#include <sys/systemcfg.h>
 
-		main()
+		int
+		main ()
 			{
 			if (!__power_pc())
 				exit(1);
@@ -612,68 +643,68 @@ EOF
 			exit(0);
 			}
 EOF
-		if $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=$("$dummy")
+		if $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"`
 		then
-			echo "$SYSTEM_NAME"
+			GUESS=$SYSTEM_NAME
 		else
-			echo rs6000-ibm-aix3.2.5
+			GUESS=rs6000-ibm-aix3.2.5
 		fi
 	elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then
-		echo rs6000-ibm-aix3.2.4
+		GUESS=rs6000-ibm-aix3.2.4
 	else
-		echo rs6000-ibm-aix3.2
+		GUESS=rs6000-ibm-aix3.2
 	fi
-	exit ;;
+	;;
     *:AIX:*:[4567])
-	IBM_CPU_ID=$(/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }')
+	IBM_CPU_ID=`/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }'`
 	if /usr/sbin/lsattr -El "$IBM_CPU_ID" | grep ' POWER' >/dev/null 2>&1; then
 		IBM_ARCH=rs6000
 	else
 		IBM_ARCH=powerpc
 	fi
 	if test -x /usr/bin/lslpp ; then
-		IBM_REV=$(/usr/bin/lslpp -Lqc bos.rte.libc |
-			   awk -F: '{ print $3 }' | sed s/[0-9]*$/0/)
+		IBM_REV=`/usr/bin/lslpp -Lqc bos.rte.libc | \
+			   awk -F: '{ print $3 }' | sed s/[0-9]*$/0/`
 	else
-		IBM_REV="$UNAME_VERSION.$UNAME_RELEASE"
+		IBM_REV=$UNAME_VERSION.$UNAME_RELEASE
 	fi
-	echo "$IBM_ARCH"-ibm-aix"$IBM_REV"
-	exit ;;
+	GUESS=$IBM_ARCH-ibm-aix$IBM_REV
+	;;
     *:AIX:*:*)
-	echo rs6000-ibm-aix
-	exit ;;
+	GUESS=rs6000-ibm-aix
+	;;
     ibmrt:4.4BSD:*|romp-ibm:4.4BSD:*)
-	echo romp-ibm-bsd4.4
-	exit ;;
+	GUESS=romp-ibm-bsd4.4
+	;;
     ibmrt:*BSD:*|romp-ibm:BSD:*)            # covers RT/PC BSD and
-	echo romp-ibm-bsd"$UNAME_RELEASE"   # 4.3 with uname added to
-	exit ;;                             # report: romp-ibm BSD 4.3
+	GUESS=romp-ibm-bsd$UNAME_RELEASE    # 4.3 with uname added to
+	;;                                  # report: romp-ibm BSD 4.3
     *:BOSX:*:*)
-	echo rs6000-bull-bosx
-	exit ;;
+	GUESS=rs6000-bull-bosx
+	;;
     DPX/2?00:B.O.S.:*:*)
-	echo m68k-bull-sysv3
-	exit ;;
+	GUESS=m68k-bull-sysv3
+	;;
     9000/[34]??:4.3bsd:1.*:*)
-	echo m68k-hp-bsd
-	exit ;;
+	GUESS=m68k-hp-bsd
+	;;
     hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*)
-	echo m68k-hp-bsd4.4
-	exit ;;
+	GUESS=m68k-hp-bsd4.4
+	;;
     9000/[34678]??:HP-UX:*:*)
-	HPUX_REV=$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*.[0B]*//')
-	case "$UNAME_MACHINE" in
+	HPUX_REV=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*.[0B]*//'`
+	case $UNAME_MACHINE in
 	    9000/31?)            HP_ARCH=m68000 ;;
 	    9000/[34]??)         HP_ARCH=m68k ;;
 	    9000/[678][0-9][0-9])
 		if test -x /usr/bin/getconf; then
-		    sc_cpu_version=$(/usr/bin/getconf SC_CPU_VERSION 2>/dev/null)
-		    sc_kernel_bits=$(/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null)
-		    case "$sc_cpu_version" in
+		    sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null`
+		    sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null`
+		    case $sc_cpu_version in
 		      523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0
 		      528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1
 		      532)                      # CPU_PA_RISC2_0
-			case "$sc_kernel_bits" in
+			case $sc_kernel_bits in
 			  32) HP_ARCH=hppa2.0n ;;
 			  64) HP_ARCH=hppa2.0w ;;
 			  '') HP_ARCH=hppa2.0 ;;   # HP-UX 10.20
@@ -688,7 +719,8 @@ EOF
 		#include <stdlib.h>
 		#include <unistd.h>
 
-		int main ()
+		int
+		main ()
 		{
 		#if defined(_SC_KERNEL_BITS)
 		    long bits = sysconf(_SC_KERNEL_BITS);
@@ -715,7 +747,7 @@ EOF
 		    exit (0);
 		}
 EOF
-		    (CCOPTS="" $CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null) && HP_ARCH=$("$dummy")
+		    (CCOPTS="" $CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null) && HP_ARCH=`"$dummy"`
 		    test -z "$HP_ARCH" && HP_ARCH=hppa
 		fi ;;
 	esac
@@ -740,12 +772,12 @@ EOF
 		HP_ARCH=hppa64
 	    fi
 	fi
-	echo "$HP_ARCH"-hp-hpux"$HPUX_REV"
-	exit ;;
+	GUESS=$HP_ARCH-hp-hpux$HPUX_REV
+	;;
     ia64:HP-UX:*:*)
-	HPUX_REV=$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*.[0B]*//')
-	echo ia64-hp-hpux"$HPUX_REV"
-	exit ;;
+	HPUX_REV=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*.[0B]*//'`
+	GUESS=ia64-hp-hpux$HPUX_REV
+	;;
     3050*:HI-UX:*:*)
 	set_cc_for_build
 	sed 's/^	//' << EOF > "$dummy.c"
@@ -773,38 +805,38 @@ EOF
 	  exit (0);
 	}
 EOF
-	$CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=$("$dummy") &&
+	$CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=`"$dummy"` &&
 		{ echo "$SYSTEM_NAME"; exit; }
-	echo unknown-hitachi-hiuxwe2
-	exit ;;
+	GUESS=unknown-hitachi-hiuxwe2
+	;;
     9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:*)
-	echo hppa1.1-hp-bsd
-	exit ;;
+	GUESS=hppa1.1-hp-bsd
+	;;
     9000/8??:4.3bsd:*:*)
-	echo hppa1.0-hp-bsd
-	exit ;;
+	GUESS=hppa1.0-hp-bsd
+	;;
     *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*)
-	echo hppa1.0-hp-mpeix
-	exit ;;
+	GUESS=hppa1.0-hp-mpeix
+	;;
     hp7??:OSF1:*:* | hp8?[79]:OSF1:*:*)
-	echo hppa1.1-hp-osf
-	exit ;;
+	GUESS=hppa1.1-hp-osf
+	;;
     hp8??:OSF1:*:*)
-	echo hppa1.0-hp-osf
-	exit ;;
+	GUESS=hppa1.0-hp-osf
+	;;
     i*86:OSF1:*:*)
 	if test -x /usr/sbin/sysversion ; then
-	    echo "$UNAME_MACHINE"-unknown-osf1mk
+	    GUESS=$UNAME_MACHINE-unknown-osf1mk
 	else
-	    echo "$UNAME_MACHINE"-unknown-osf1
+	    GUESS=$UNAME_MACHINE-unknown-osf1
 	fi
-	exit ;;
+	;;
     parisc*:Lites*:*:*)
-	echo hppa1.1-hp-lites
-	exit ;;
+	GUESS=hppa1.1-hp-lites
+	;;
     C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)
-	echo c1-convex-bsd
-	exit ;;
+	GUESS=c1-convex-bsd
+	;;
     C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)
 	if getsysinfo -f scalar_acc
 	then echo c32-convex-bsd
@@ -812,17 +844,18 @@ EOF
 	fi
 	exit ;;
     C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)
-	echo c34-convex-bsd
-	exit ;;
+	GUESS=c34-convex-bsd
+	;;
     C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)
-	echo c38-convex-bsd
-	exit ;;
+	GUESS=c38-convex-bsd
+	;;
     C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)
-	echo c4-convex-bsd
-	exit ;;
+	GUESS=c4-convex-bsd
+	;;
     CRAY*Y-MP:*:*:*)
-	echo ymp-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
-	exit ;;
+	CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'`
+	GUESS=ymp-cray-unicos$CRAY_REL
+	;;
     CRAY*[A-Z]90:*:*:*)
 	echo "$UNAME_MACHINE"-cray-unicos"$UNAME_RELEASE" \
 	| sed -e 's/CRAY.*\([A-Z]90\)/\1/' \
@@ -830,114 +863,155 @@ EOF
 	      -e 's/\.[^.]*$/.X/'
 	exit ;;
     CRAY*TS:*:*:*)
-	echo t90-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
-	exit ;;
+	CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'`
+	GUESS=t90-cray-unicos$CRAY_REL
+	;;
     CRAY*T3E:*:*:*)
-	echo alphaev5-cray-unicosmk"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
-	exit ;;
+	CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'`
+	GUESS=alphaev5-cray-unicosmk$CRAY_REL
+	;;
     CRAY*SV1:*:*:*)
-	echo sv1-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
-	exit ;;
+	CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'`
+	GUESS=sv1-cray-unicos$CRAY_REL
+	;;
     *:UNICOS/mp:*:*)
-	echo craynv-cray-unicosmp"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
-	exit ;;
+	CRAY_REL=`echo "$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'`
+	GUESS=craynv-cray-unicosmp$CRAY_REL
+	;;
     F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
-	FUJITSU_PROC=$(uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz)
-	FUJITSU_SYS=$(uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///')
-	FUJITSU_REL=$(echo "$UNAME_RELEASE" | sed -e 's/ /_/')
-	echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
-	exit ;;
+	FUJITSU_PROC=`uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz`
+	FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'`
+	FUJITSU_REL=`echo "$UNAME_RELEASE" | sed -e 's/ /_/'`
+	GUESS=${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}
+	;;
     5000:UNIX_System_V:4.*:*)
-	FUJITSU_SYS=$(uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///')
-	FUJITSU_REL=$(echo "$UNAME_RELEASE" | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/')
-	echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
-	exit ;;
+	FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'`
+	FUJITSU_REL=`echo "$UNAME_RELEASE" | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/'`
+	GUESS=sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}
+	;;
     i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
-	echo "$UNAME_MACHINE"-pc-bsdi"$UNAME_RELEASE"
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-bsdi$UNAME_RELEASE
+	;;
     sparc*:BSD/OS:*:*)
-	echo sparc-unknown-bsdi"$UNAME_RELEASE"
-	exit ;;
+	GUESS=sparc-unknown-bsdi$UNAME_RELEASE
+	;;
     *:BSD/OS:*:*)
-	echo "$UNAME_MACHINE"-unknown-bsdi"$UNAME_RELEASE"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-bsdi$UNAME_RELEASE
+	;;
     arm:FreeBSD:*:*)
-	UNAME_PROCESSOR=$(uname -p)
+	UNAME_PROCESSOR=`uname -p`
 	set_cc_for_build
 	if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
 	    | grep -q __ARM_PCS_VFP
 	then
-	    echo "${UNAME_PROCESSOR}"-unknown-freebsd"$(echo ${UNAME_RELEASE}|sed -e 's/[-(].*//')"-gnueabi
+	    FREEBSD_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'`
+	    GUESS=$UNAME_PROCESSOR-unknown-freebsd$FREEBSD_REL-gnueabi
 	else
-	    echo "${UNAME_PROCESSOR}"-unknown-freebsd"$(echo ${UNAME_RELEASE}|sed -e 's/[-(].*//')"-gnueabihf
+	    FREEBSD_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'`
+	    GUESS=$UNAME_PROCESSOR-unknown-freebsd$FREEBSD_REL-gnueabihf
 	fi
-	exit ;;
+	;;
     *:FreeBSD:*:*)
-	UNAME_PROCESSOR=$(/usr/bin/uname -p)
-	case "$UNAME_PROCESSOR" in
+	UNAME_PROCESSOR=`uname -p`
+	case $UNAME_PROCESSOR in
 	    amd64)
 		UNAME_PROCESSOR=x86_64 ;;
 	    i386)
 		UNAME_PROCESSOR=i586 ;;
 	esac
-	echo "$UNAME_PROCESSOR"-unknown-freebsd"$(echo "$UNAME_RELEASE"|sed -e 's/[-(].*//')"
-	exit ;;
+	FREEBSD_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'`
+	GUESS=$UNAME_PROCESSOR-unknown-freebsd$FREEBSD_REL
+	;;
     i*:CYGWIN*:*)
-	echo "$UNAME_MACHINE"-pc-cygwin
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-cygwin
+	;;
     *:MINGW64*:*)
-	echo "$UNAME_MACHINE"-pc-mingw64
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-mingw64
+	;;
     *:MINGW*:*)
-	echo "$UNAME_MACHINE"-pc-mingw32
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-mingw32
+	;;
     *:MSYS*:*)
-	echo "$UNAME_MACHINE"-pc-msys
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-msys
+	;;
     i*:PW*:*)
-	echo "$UNAME_MACHINE"-pc-pw32
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-pw32
+	;;
+    *:SerenityOS:*:*)
+        GUESS=$UNAME_MACHINE-pc-serenity
+        ;;
     *:Interix*:*)
-	case "$UNAME_MACHINE" in
+	case $UNAME_MACHINE in
 	    x86)
-		echo i586-pc-interix"$UNAME_RELEASE"
-		exit ;;
+		GUESS=i586-pc-interix$UNAME_RELEASE
+		;;
 	    authenticamd | genuineintel | EM64T)
-		echo x86_64-unknown-interix"$UNAME_RELEASE"
-		exit ;;
+		GUESS=x86_64-unknown-interix$UNAME_RELEASE
+		;;
 	    IA64)
-		echo ia64-unknown-interix"$UNAME_RELEASE"
-		exit ;;
+		GUESS=ia64-unknown-interix$UNAME_RELEASE
+		;;
 	esac ;;
     i*:UWIN*:*)
-	echo "$UNAME_MACHINE"-pc-uwin
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-uwin
+	;;
     amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*)
-	echo x86_64-pc-cygwin
-	exit ;;
+	GUESS=x86_64-pc-cygwin
+	;;
     prep*:SunOS:5.*:*)
-	echo powerpcle-unknown-solaris2"$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*//')"
-	exit ;;
+	SUN_REL=`echo "$UNAME_RELEASE" | sed -e 's/[^.]*//'`
+	GUESS=powerpcle-unknown-solaris2$SUN_REL
+	;;
     *:GNU:*:*)
 	# the GNU system
-	echo "$(echo "$UNAME_MACHINE"|sed -e 's,[-/].*$,,')-unknown-$LIBC$(echo "$UNAME_RELEASE"|sed -e 's,/.*$,,')"
-	exit ;;
+	GNU_ARCH=`echo "$UNAME_MACHINE" | sed -e 's,[-/].*$,,'`
+	GNU_REL=`echo "$UNAME_RELEASE" | sed -e 's,/.*$,,'`
+	GUESS=$GNU_ARCH-unknown-$LIBC$GNU_REL
+	;;
     *:GNU/*:*:*)
 	# other systems with GNU libc and userland
-	echo "$UNAME_MACHINE-unknown-$(echo "$UNAME_SYSTEM" | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]")$(echo "$UNAME_RELEASE"|sed -e 's/[-(].*//')-$LIBC"
-	exit ;;
+	GNU_SYS=`echo "$UNAME_SYSTEM" | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"`
+	GNU_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'`
+	GUESS=$UNAME_MACHINE-unknown-$GNU_SYS$GNU_REL-$LIBC
+	;;
+    x86_64:[Mm]anagarm:*:*|i?86:[Mm]anagarm:*:*)
+	GUESS="$UNAME_MACHINE-pc-managarm-mlibc"
+	;;
+    *:[Mm]anagarm:*:*)
+	GUESS="$UNAME_MACHINE-unknown-managarm-mlibc"
+	;;
     *:Minix:*:*)
-	echo "$UNAME_MACHINE"-unknown-minix
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-minix
+	;;
     aarch64:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	set_cc_for_build
+	CPU=$UNAME_MACHINE
+	LIBCABI=$LIBC
+	if test "$CC_FOR_BUILD" != no_compiler_found; then
+	    ABI=64
+	    sed 's/^	    //' << EOF > "$dummy.c"
+	    #ifdef __ARM_EABI__
+	    #ifdef __ARM_PCS_VFP
+	    ABI=eabihf
+	    #else
+	    ABI=eabi
+	    #endif
+	    #endif
+EOF
+	    cc_set_abi=`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^ABI' | sed 's, ,,g'`
+	    eval "$cc_set_abi"
+	    case $ABI in
+		eabi | eabihf) CPU=armv8l; LIBCABI=$LIBC$ABI ;;
+	    esac
+	fi
+	GUESS=$CPU-unknown-linux-$LIBCABI
+	;;
     aarch64_be:Linux:*:*)
 	UNAME_MACHINE=aarch64_be
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     alpha:Linux:*:*)
-	case $(sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' /proc/cpuinfo 2>/dev/null) in
+	case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' /proc/cpuinfo 2>/dev/null` in
 	  EV5)   UNAME_MACHINE=alphaev5 ;;
 	  EV56)  UNAME_MACHINE=alphaev56 ;;
 	  PCA56) UNAME_MACHINE=alphapca56 ;;
@@ -948,63 +1022,72 @@ EOF
 	esac
 	objdump --private-headers /bin/sh | grep -q ld.so.1
 	if test "$?" = 0 ; then LIBC=gnulibc1 ; fi
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
-    arc:Linux:*:* | arceb:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
+    arc:Linux:*:* | arceb:Linux:*:* | arc32:Linux:*:* | arc64:Linux:*:*)
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     arm*:Linux:*:*)
 	set_cc_for_build
 	if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
 	    | grep -q __ARM_EABI__
 	then
-	    echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+	    GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
 	else
 	    if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
 		| grep -q __ARM_PCS_VFP
 	    then
-		echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"eabi
+		GUESS=$UNAME_MACHINE-unknown-linux-${LIBC}eabi
 	    else
-		echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"eabihf
+		GUESS=$UNAME_MACHINE-unknown-linux-${LIBC}eabihf
 	    fi
 	fi
-	exit ;;
+	;;
     avr32*:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     cris:Linux:*:*)
-	echo "$UNAME_MACHINE"-axis-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-axis-linux-$LIBC
+	;;
     crisv32:Linux:*:*)
-	echo "$UNAME_MACHINE"-axis-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-axis-linux-$LIBC
+	;;
     e2k:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     frv:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     hexagon:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     i*86:Linux:*:*)
-	echo "$UNAME_MACHINE"-pc-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-linux-$LIBC
+	;;
     ia64:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     k1om:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
-    loongarch32:Linux:*:* | loongarch64:Linux:*:* | loongarchx32:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
+    kvx:Linux:*:*)
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
+    kvx:cos:*:*)
+	GUESS=$UNAME_MACHINE-unknown-cos
+	;;
+    kvx:mbr:*:*)
+	GUESS=$UNAME_MACHINE-unknown-mbr
+	;;
+    loongarch32:Linux:*:* | loongarch64:Linux:*:*)
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     m32r*:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     m68*:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     mips:Linux:*:* | mips64:Linux:*:*)
 	set_cc_for_build
 	IS_GLIBC=0
@@ -1049,138 +1132,150 @@ EOF
 	#endif
 	#endif
 EOF
-	eval "$($CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^CPU\|^MIPS_ENDIAN\|^LIBCABI')"
+	cc_set_vars=`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^CPU\|^MIPS_ENDIAN\|^LIBCABI'`
+	eval "$cc_set_vars"
 	test "x$CPU" != x && { echo "$CPU${MIPS_ENDIAN}-unknown-linux-$LIBCABI"; exit; }
 	;;
     mips64el:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     openrisc*:Linux:*:*)
-	echo or1k-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=or1k-unknown-linux-$LIBC
+	;;
     or32:Linux:*:* | or1k*:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     padre:Linux:*:*)
-	echo sparc-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=sparc-unknown-linux-$LIBC
+	;;
     parisc64:Linux:*:* | hppa64:Linux:*:*)
-	echo hppa64-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=hppa64-unknown-linux-$LIBC
+	;;
     parisc:Linux:*:* | hppa:Linux:*:*)
 	# Look for CPU level
-	case $(grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2) in
-	  PA7*) echo hppa1.1-unknown-linux-"$LIBC" ;;
-	  PA8*) echo hppa2.0-unknown-linux-"$LIBC" ;;
-	  *)    echo hppa-unknown-linux-"$LIBC" ;;
+	case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in
+	  PA7*) GUESS=hppa1.1-unknown-linux-$LIBC ;;
+	  PA8*) GUESS=hppa2.0-unknown-linux-$LIBC ;;
+	  *)    GUESS=hppa-unknown-linux-$LIBC ;;
 	esac
-	exit ;;
+	;;
     ppc64:Linux:*:*)
-	echo powerpc64-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=powerpc64-unknown-linux-$LIBC
+	;;
     ppc:Linux:*:*)
-	echo powerpc-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=powerpc-unknown-linux-$LIBC
+	;;
     ppc64le:Linux:*:*)
-	echo powerpc64le-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=powerpc64le-unknown-linux-$LIBC
+	;;
     ppcle:Linux:*:*)
-	echo powerpcle-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=powerpcle-unknown-linux-$LIBC
+	;;
     riscv32:Linux:*:* | riscv32be:Linux:*:* | riscv64:Linux:*:* | riscv64be:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     s390:Linux:*:* | s390x:Linux:*:*)
-	echo "$UNAME_MACHINE"-ibm-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-ibm-linux-$LIBC
+	;;
     sh64*:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     sh*:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     sparc:Linux:*:* | sparc64:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     tile*:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     vax:Linux:*:*)
-	echo "$UNAME_MACHINE"-dec-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-dec-linux-$LIBC
+	;;
     x86_64:Linux:*:*)
 	set_cc_for_build
+	CPU=$UNAME_MACHINE
 	LIBCABI=$LIBC
 	if test "$CC_FOR_BUILD" != no_compiler_found; then
-	    if (echo '#ifdef __ILP32__'; echo IS_X32; echo '#endif') | \
-		(CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
-		grep IS_X32 >/dev/null
-	    then
-		LIBCABI="$LIBC"x32
-	    fi
+	    ABI=64
+	    sed 's/^	    //' << EOF > "$dummy.c"
+	    #ifdef __i386__
+	    ABI=x86
+	    #else
+	    #ifdef __ILP32__
+	    ABI=x32
+	    #endif
+	    #endif
+EOF
+	    cc_set_abi=`$CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^ABI' | sed 's, ,,g'`
+	    eval "$cc_set_abi"
+	    case $ABI in
+		x86) CPU=i686 ;;
+		x32) LIBCABI=${LIBC}x32 ;;
+	    esac
 	fi
-	echo "$UNAME_MACHINE"-pc-linux-"$LIBCABI"
-	exit ;;
+	GUESS=$CPU-pc-linux-$LIBCABI
+	;;
     xtensa*:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-linux-$LIBC
+	;;
     i*86:DYNIX/ptx:4*:*)
 	# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
 	# earlier versions are messed up and put the nodename in both
 	# sysname and nodename.
-	echo i386-sequent-sysv4
-	exit ;;
+	GUESS=i386-sequent-sysv4
+	;;
     i*86:UNIX_SV:4.2MP:2.*)
 	# Unixware is an offshoot of SVR4, but it has its own version
 	# number series starting with 2...
 	# I am not positive that other SVR4 systems won't match this,
 	# I just have to hope.  -- rms.
 	# Use sysv4.2uw... so that sysv4* matches it.
-	echo "$UNAME_MACHINE"-pc-sysv4.2uw"$UNAME_VERSION"
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-sysv4.2uw$UNAME_VERSION
+	;;
     i*86:OS/2:*:*)
-	# If we were able to find `uname', then EMX Unix compatibility
+	# If we were able to find 'uname', then EMX Unix compatibility
 	# is probably installed.
-	echo "$UNAME_MACHINE"-pc-os2-emx
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-os2-emx
+	;;
     i*86:XTS-300:*:STOP)
-	echo "$UNAME_MACHINE"-unknown-stop
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-stop
+	;;
     i*86:atheos:*:*)
-	echo "$UNAME_MACHINE"-unknown-atheos
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-atheos
+	;;
     i*86:syllable:*:*)
-	echo "$UNAME_MACHINE"-pc-syllable
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-syllable
+	;;
     i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*)
-	echo i386-unknown-lynxos"$UNAME_RELEASE"
-	exit ;;
+	GUESS=i386-unknown-lynxos$UNAME_RELEASE
+	;;
     i*86:*DOS:*:*)
-	echo "$UNAME_MACHINE"-pc-msdosdjgpp
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-msdosdjgpp
+	;;
     i*86:*:4.*:*)
-	UNAME_REL=$(echo "$UNAME_RELEASE" | sed 's/\/MP$//')
+	UNAME_REL=`echo "$UNAME_RELEASE" | sed 's/\/MP$//'`
 	if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then
-		echo "$UNAME_MACHINE"-univel-sysv"$UNAME_REL"
+		GUESS=$UNAME_MACHINE-univel-sysv$UNAME_REL
 	else
-		echo "$UNAME_MACHINE"-pc-sysv"$UNAME_REL"
+		GUESS=$UNAME_MACHINE-pc-sysv$UNAME_REL
 	fi
-	exit ;;
+	;;
     i*86:*:5:[678]*)
 	# UnixWare 7.x, OpenUNIX and OpenServer 6.
-	case $(/bin/uname -X | grep "^Machine") in
+	case `/bin/uname -X | grep "^Machine"` in
 	    *486*)	     UNAME_MACHINE=i486 ;;
 	    *Pentium)	     UNAME_MACHINE=i586 ;;
 	    *Pent*|*Celeron) UNAME_MACHINE=i686 ;;
 	esac
-	echo "$UNAME_MACHINE-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION}"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION}
+	;;
     i*86:*:3.2:*)
 	if test -f /usr/options/cb.name; then
-		UNAME_REL=$(sed -n 's/.*Version //p' </usr/options/cb.name)
-		echo "$UNAME_MACHINE"-pc-isc"$UNAME_REL"
+		UNAME_REL=`sed -n 's/.*Version //p' </usr/options/cb.name`
+		GUESS=$UNAME_MACHINE-pc-isc$UNAME_REL
 	elif /bin/uname -X 2>/dev/null >/dev/null ; then
-		UNAME_REL=$( (/bin/uname -X|grep Release|sed -e 's/.*= //'))
+		UNAME_REL=`(/bin/uname -X|grep Release|sed -e 's/.*= //')`
 		(/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486
 		(/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \
 			&& UNAME_MACHINE=i586
@@ -1188,11 +1283,11 @@ EOF
 			&& UNAME_MACHINE=i686
 		(/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \
 			&& UNAME_MACHINE=i686
-		echo "$UNAME_MACHINE"-pc-sco"$UNAME_REL"
+		GUESS=$UNAME_MACHINE-pc-sco$UNAME_REL
 	else
-		echo "$UNAME_MACHINE"-pc-sysv32
+		GUESS=$UNAME_MACHINE-pc-sysv32
 	fi
-	exit ;;
+	;;
     pc:*:*:*)
 	# Left here for compatibility:
 	# uname -m prints for DJGPP always 'pc', but it prints nothing about
@@ -1200,37 +1295,37 @@ EOF
 	# Note: whatever this is, it MUST be the same as what config.sub
 	# prints for the "djgpp" host, or else GDB configure will decide that
 	# this is a cross-build.
-	echo i586-pc-msdosdjgpp
-	exit ;;
+	GUESS=i586-pc-msdosdjgpp
+	;;
     Intel:Mach:3*:*)
-	echo i386-pc-mach3
-	exit ;;
+	GUESS=i386-pc-mach3
+	;;
     paragon:*:*:*)
-	echo i860-intel-osf1
-	exit ;;
+	GUESS=i860-intel-osf1
+	;;
     i860:*:4.*:*) # i860-SVR4
 	if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then
-	  echo i860-stardent-sysv"$UNAME_RELEASE" # Stardent Vistra i860-SVR4
+	  GUESS=i860-stardent-sysv$UNAME_RELEASE    # Stardent Vistra i860-SVR4
 	else # Add other i860-SVR4 vendors below as they are discovered.
-	  echo i860-unknown-sysv"$UNAME_RELEASE"  # Unknown i860-SVR4
+	  GUESS=i860-unknown-sysv$UNAME_RELEASE     # Unknown i860-SVR4
 	fi
-	exit ;;
+	;;
     mini*:CTIX:SYS*5:*)
 	# "miniframe"
-	echo m68010-convergent-sysv
-	exit ;;
+	GUESS=m68010-convergent-sysv
+	;;
     mc68k:UNIX:SYSTEM5:3.51m)
-	echo m68k-convergent-sysv
-	exit ;;
+	GUESS=m68k-convergent-sysv
+	;;
     M680?0:D-NIX:5.3:*)
-	echo m68k-diab-dnix
-	exit ;;
+	GUESS=m68k-diab-dnix
+	;;
     M68*:*:R3V[5678]*:*)
 	test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;;
     3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0)
 	OS_REL=''
 	test -r /etc/.relid \
-	&& OS_REL=.$(sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid)
+	&& OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
 	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
 	  && { echo i486-ncr-sysv4.3"$OS_REL"; exit; }
 	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
@@ -1241,7 +1336,7 @@ EOF
     NCR*:*:4.2:* | MPRAS*:*:4.2:*)
 	OS_REL='.3'
 	test -r /etc/.relid \
-	    && OS_REL=.$(sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid)
+	    && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid`
 	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
 	    && { echo i486-ncr-sysv4.3"$OS_REL"; exit; }
 	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
@@ -1249,118 +1344,121 @@ EOF
 	/bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \
 	    && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;;
     m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*)
-	echo m68k-unknown-lynxos"$UNAME_RELEASE"
-	exit ;;
+	GUESS=m68k-unknown-lynxos$UNAME_RELEASE
+	;;
     mc68030:UNIX_System_V:4.*:*)
-	echo m68k-atari-sysv4
-	exit ;;
+	GUESS=m68k-atari-sysv4
+	;;
     TSUNAMI:LynxOS:2.*:*)
-	echo sparc-unknown-lynxos"$UNAME_RELEASE"
-	exit ;;
+	GUESS=sparc-unknown-lynxos$UNAME_RELEASE
+	;;
     rs6000:LynxOS:2.*:*)
-	echo rs6000-unknown-lynxos"$UNAME_RELEASE"
-	exit ;;
+	GUESS=rs6000-unknown-lynxos$UNAME_RELEASE
+	;;
     PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*)
-	echo powerpc-unknown-lynxos"$UNAME_RELEASE"
-	exit ;;
+	GUESS=powerpc-unknown-lynxos$UNAME_RELEASE
+	;;
     SM[BE]S:UNIX_SV:*:*)
-	echo mips-dde-sysv"$UNAME_RELEASE"
-	exit ;;
+	GUESS=mips-dde-sysv$UNAME_RELEASE
+	;;
     RM*:ReliantUNIX-*:*:*)
-	echo mips-sni-sysv4
-	exit ;;
+	GUESS=mips-sni-sysv4
+	;;
     RM*:SINIX-*:*:*)
-	echo mips-sni-sysv4
-	exit ;;
+	GUESS=mips-sni-sysv4
+	;;
     *:SINIX-*:*:*)
 	if uname -p 2>/dev/null >/dev/null ; then
-		UNAME_MACHINE=$( (uname -p) 2>/dev/null)
-		echo "$UNAME_MACHINE"-sni-sysv4
+		UNAME_MACHINE=`(uname -p) 2>/dev/null`
+		GUESS=$UNAME_MACHINE-sni-sysv4
 	else
-		echo ns32k-sni-sysv
+		GUESS=ns32k-sni-sysv
 	fi
-	exit ;;
-    PENTIUM:*:4.0*:*)	# Unisys `ClearPath HMP IX 4000' SVR4/MP effort
+	;;
+    PENTIUM:*:4.0*:*)	# Unisys 'ClearPath HMP IX 4000' SVR4/MP effort
 			# says <Richard.M.Bartel@ccMail.Census.GOV>
-	echo i586-unisys-sysv4
-	exit ;;
+	GUESS=i586-unisys-sysv4
+	;;
     *:UNIX_System_V:4*:FTX*)
 	# From Gerald Hewes <hewes@openmarket.com>.
 	# How about differentiating between stratus architectures? -djm
-	echo hppa1.1-stratus-sysv4
-	exit ;;
+	GUESS=hppa1.1-stratus-sysv4
+	;;
     *:*:*:FTX*)
 	# From seanf@swdc.stratus.com.
-	echo i860-stratus-sysv4
-	exit ;;
+	GUESS=i860-stratus-sysv4
+	;;
     i*86:VOS:*:*)
 	# From Paul.Green@stratus.com.
-	echo "$UNAME_MACHINE"-stratus-vos
-	exit ;;
+	GUESS=$UNAME_MACHINE-stratus-vos
+	;;
     *:VOS:*:*)
 	# From Paul.Green@stratus.com.
-	echo hppa1.1-stratus-vos
-	exit ;;
+	GUESS=hppa1.1-stratus-vos
+	;;
     mc68*:A/UX:*:*)
-	echo m68k-apple-aux"$UNAME_RELEASE"
-	exit ;;
+	GUESS=m68k-apple-aux$UNAME_RELEASE
+	;;
     news*:NEWS-OS:6*:*)
-	echo mips-sony-newsos6
-	exit ;;
+	GUESS=mips-sony-newsos6
+	;;
     R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
 	if test -d /usr/nec; then
-		echo mips-nec-sysv"$UNAME_RELEASE"
+		GUESS=mips-nec-sysv$UNAME_RELEASE
 	else
-		echo mips-unknown-sysv"$UNAME_RELEASE"
+		GUESS=mips-unknown-sysv$UNAME_RELEASE
 	fi
-	exit ;;
+	;;
     BeBox:BeOS:*:*)	# BeOS running on hardware made by Be, PPC only.
-	echo powerpc-be-beos
-	exit ;;
+	GUESS=powerpc-be-beos
+	;;
     BeMac:BeOS:*:*)	# BeOS running on Mac or Mac clone, PPC only.
-	echo powerpc-apple-beos
-	exit ;;
+	GUESS=powerpc-apple-beos
+	;;
     BePC:BeOS:*:*)	# BeOS running on Intel PC compatible.
-	echo i586-pc-beos
-	exit ;;
+	GUESS=i586-pc-beos
+	;;
     BePC:Haiku:*:*)	# Haiku running on Intel PC compatible.
-	echo i586-pc-haiku
-	exit ;;
-    x86_64:Haiku:*:*)
-	echo x86_64-unknown-haiku
-	exit ;;
+	GUESS=i586-pc-haiku
+	;;
+    ppc:Haiku:*:*)	# Haiku running on Apple PowerPC
+	GUESS=powerpc-apple-haiku
+	;;
+    *:Haiku:*:*)	# Haiku modern gcc (not bound by BeOS compat)
+	GUESS=$UNAME_MACHINE-unknown-haiku
+	;;
     SX-4:SUPER-UX:*:*)
-	echo sx4-nec-superux"$UNAME_RELEASE"
-	exit ;;
+	GUESS=sx4-nec-superux$UNAME_RELEASE
+	;;
     SX-5:SUPER-UX:*:*)
-	echo sx5-nec-superux"$UNAME_RELEASE"
-	exit ;;
+	GUESS=sx5-nec-superux$UNAME_RELEASE
+	;;
     SX-6:SUPER-UX:*:*)
-	echo sx6-nec-superux"$UNAME_RELEASE"
-	exit ;;
+	GUESS=sx6-nec-superux$UNAME_RELEASE
+	;;
     SX-7:SUPER-UX:*:*)
-	echo sx7-nec-superux"$UNAME_RELEASE"
-	exit ;;
+	GUESS=sx7-nec-superux$UNAME_RELEASE
+	;;
     SX-8:SUPER-UX:*:*)
-	echo sx8-nec-superux"$UNAME_RELEASE"
-	exit ;;
+	GUESS=sx8-nec-superux$UNAME_RELEASE
+	;;
     SX-8R:SUPER-UX:*:*)
-	echo sx8r-nec-superux"$UNAME_RELEASE"
-	exit ;;
+	GUESS=sx8r-nec-superux$UNAME_RELEASE
+	;;
     SX-ACE:SUPER-UX:*:*)
-	echo sxace-nec-superux"$UNAME_RELEASE"
-	exit ;;
+	GUESS=sxace-nec-superux$UNAME_RELEASE
+	;;
     Power*:Rhapsody:*:*)
-	echo powerpc-apple-rhapsody"$UNAME_RELEASE"
-	exit ;;
+	GUESS=powerpc-apple-rhapsody$UNAME_RELEASE
+	;;
     *:Rhapsody:*:*)
-	echo "$UNAME_MACHINE"-apple-rhapsody"$UNAME_RELEASE"
-	exit ;;
+	GUESS=$UNAME_MACHINE-apple-rhapsody$UNAME_RELEASE
+	;;
     arm64:Darwin:*:*)
-	echo aarch64-apple-darwin"$UNAME_RELEASE"
-	exit ;;
+	GUESS=aarch64-apple-darwin$UNAME_RELEASE
+	;;
     *:Darwin:*:*)
-	UNAME_PROCESSOR=$(uname -p)
+	UNAME_PROCESSOR=`uname -p`
 	case $UNAME_PROCESSOR in
 	    unknown) UNAME_PROCESSOR=powerpc ;;
 	esac
@@ -1394,109 +1492,125 @@ EOF
 	    # uname -m returns i386 or x86_64
 	    UNAME_PROCESSOR=$UNAME_MACHINE
 	fi
-	echo "$UNAME_PROCESSOR"-apple-darwin"$UNAME_RELEASE"
-	exit ;;
+	GUESS=$UNAME_PROCESSOR-apple-darwin$UNAME_RELEASE
+	;;
     *:procnto*:*:* | *:QNX:[0123456789]*:*)
-	UNAME_PROCESSOR=$(uname -p)
+	UNAME_PROCESSOR=`uname -p`
 	if test "$UNAME_PROCESSOR" = x86; then
 		UNAME_PROCESSOR=i386
 		UNAME_MACHINE=pc
 	fi
-	echo "$UNAME_PROCESSOR"-"$UNAME_MACHINE"-nto-qnx"$UNAME_RELEASE"
-	exit ;;
+	GUESS=$UNAME_PROCESSOR-$UNAME_MACHINE-nto-qnx$UNAME_RELEASE
+	;;
     *:QNX:*:4*)
-	echo i386-pc-qnx
-	exit ;;
+	GUESS=i386-pc-qnx
+	;;
     NEO-*:NONSTOP_KERNEL:*:*)
-	echo neo-tandem-nsk"$UNAME_RELEASE"
-	exit ;;
+	GUESS=neo-tandem-nsk$UNAME_RELEASE
+	;;
     NSE-*:NONSTOP_KERNEL:*:*)
-	echo nse-tandem-nsk"$UNAME_RELEASE"
-	exit ;;
+	GUESS=nse-tandem-nsk$UNAME_RELEASE
+	;;
     NSR-*:NONSTOP_KERNEL:*:*)
-	echo nsr-tandem-nsk"$UNAME_RELEASE"
-	exit ;;
+	GUESS=nsr-tandem-nsk$UNAME_RELEASE
+	;;
     NSV-*:NONSTOP_KERNEL:*:*)
-	echo nsv-tandem-nsk"$UNAME_RELEASE"
-	exit ;;
+	GUESS=nsv-tandem-nsk$UNAME_RELEASE
+	;;
     NSX-*:NONSTOP_KERNEL:*:*)
-	echo nsx-tandem-nsk"$UNAME_RELEASE"
-	exit ;;
+	GUESS=nsx-tandem-nsk$UNAME_RELEASE
+	;;
     *:NonStop-UX:*:*)
-	echo mips-compaq-nonstopux
-	exit ;;
+	GUESS=mips-compaq-nonstopux
+	;;
     BS2000:POSIX*:*:*)
-	echo bs2000-siemens-sysv
-	exit ;;
+	GUESS=bs2000-siemens-sysv
+	;;
     DS/*:UNIX_System_V:*:*)
-	echo "$UNAME_MACHINE"-"$UNAME_SYSTEM"-"$UNAME_RELEASE"
-	exit ;;
+	GUESS=$UNAME_MACHINE-$UNAME_SYSTEM-$UNAME_RELEASE
+	;;
     *:Plan9:*:*)
 	# "uname -m" is not consistent, so use $cputype instead. 386
 	# is converted to i386 for consistency with other x86
 	# operating systems.
-	# shellcheck disable=SC2154
-	if test "$cputype" = 386; then
+	if test "${cputype-}" = 386; then
 	    UNAME_MACHINE=i386
-	else
-	    UNAME_MACHINE="$cputype"
+	elif test "x${cputype-}" != x; then
+	    UNAME_MACHINE=$cputype
 	fi
-	echo "$UNAME_MACHINE"-unknown-plan9
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-plan9
+	;;
     *:TOPS-10:*:*)
-	echo pdp10-unknown-tops10
-	exit ;;
+	GUESS=pdp10-unknown-tops10
+	;;
     *:TENEX:*:*)
-	echo pdp10-unknown-tenex
-	exit ;;
+	GUESS=pdp10-unknown-tenex
+	;;
     KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*)
-	echo pdp10-dec-tops20
-	exit ;;
+	GUESS=pdp10-dec-tops20
+	;;
     XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*)
-	echo pdp10-xkl-tops20
-	exit ;;
+	GUESS=pdp10-xkl-tops20
+	;;
     *:TOPS-20:*:*)
-	echo pdp10-unknown-tops20
-	exit ;;
+	GUESS=pdp10-unknown-tops20
+	;;
     *:ITS:*:*)
-	echo pdp10-unknown-its
-	exit ;;
+	GUESS=pdp10-unknown-its
+	;;
     SEI:*:*:SEIUX)
-	echo mips-sei-seiux"$UNAME_RELEASE"
-	exit ;;
+	GUESS=mips-sei-seiux$UNAME_RELEASE
+	;;
     *:DragonFly:*:*)
-	echo "$UNAME_MACHINE"-unknown-dragonfly"$(echo "$UNAME_RELEASE"|sed -e 's/[-(].*//')"
-	exit ;;
+	DRAGONFLY_REL=`echo "$UNAME_RELEASE" | sed -e 's/[-(].*//'`
+	GUESS=$UNAME_MACHINE-unknown-dragonfly$DRAGONFLY_REL
+	;;
     *:*VMS:*:*)
-	UNAME_MACHINE=$( (uname -p) 2>/dev/null)
-	case "$UNAME_MACHINE" in
-	    A*) echo alpha-dec-vms ; exit ;;
-	    I*) echo ia64-dec-vms ; exit ;;
-	    V*) echo vax-dec-vms ; exit ;;
+	UNAME_MACHINE=`(uname -p) 2>/dev/null`
+	case $UNAME_MACHINE in
+	    A*) GUESS=alpha-dec-vms ;;
+	    I*) GUESS=ia64-dec-vms ;;
+	    V*) GUESS=vax-dec-vms ;;
 	esac ;;
     *:XENIX:*:SysV)
-	echo i386-pc-xenix
-	exit ;;
+	GUESS=i386-pc-xenix
+	;;
     i*86:skyos:*:*)
-	echo "$UNAME_MACHINE"-pc-skyos"$(echo "$UNAME_RELEASE" | sed -e 's/ .*$//')"
-	exit ;;
+	SKYOS_REL=`echo "$UNAME_RELEASE" | sed -e 's/ .*$//'`
+	GUESS=$UNAME_MACHINE-pc-skyos$SKYOS_REL
+	;;
     i*86:rdos:*:*)
-	echo "$UNAME_MACHINE"-pc-rdos
-	exit ;;
-    i*86:AROS:*:*)
-	echo "$UNAME_MACHINE"-pc-aros
-	exit ;;
+	GUESS=$UNAME_MACHINE-pc-rdos
+	;;
+    i*86:Fiwix:*:*)
+	GUESS=$UNAME_MACHINE-pc-fiwix
+	;;
+    *:AROS:*:*)
+	GUESS=$UNAME_MACHINE-unknown-aros
+	;;
     x86_64:VMkernel:*:*)
-	echo "$UNAME_MACHINE"-unknown-esx
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-esx
+	;;
     amd64:Isilon\ OneFS:*:*)
-	echo x86_64-unknown-onefs
-	exit ;;
+	GUESS=x86_64-unknown-onefs
+	;;
     *:Unleashed:*:*)
-	echo "$UNAME_MACHINE"-unknown-unleashed"$UNAME_RELEASE"
-	exit ;;
+	GUESS=$UNAME_MACHINE-unknown-unleashed$UNAME_RELEASE
+	;;
+    x86_64:[Ii]ronclad:*:*|i?86:[Ii]ronclad:*:*)
+	GUESS=$UNAME_MACHINE-pc-ironclad-mlibc
+	;;
+    *:[Ii]ronclad:*:*)
+	GUESS=$UNAME_MACHINE-unknown-ironclad-mlibc
+	;;
 esac
 
+# Do we have a guess based on uname results?
+if test "x$GUESS" != x; then
+    echo "$GUESS"
+    exit
+fi
+
 # No uname command or uname output not recognized.
 set_cc_for_build
 cat > "$dummy.c" <<EOF
@@ -1512,6 +1626,7 @@ cat > "$dummy.c" <<EOF
 #endif
 #endif
 #endif
+int
 main ()
 {
 #if defined (sony)
@@ -1536,7 +1651,7 @@ main ()
 #define __ARCHITECTURE__ "m68k"
 #endif
   int version;
-  version=$( (hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null);
+  version=`(hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null`;
   if (version < 4)
     printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version);
   else
@@ -1628,7 +1743,7 @@ main ()
 }
 EOF
 
-$CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null && SYSTEM_NAME=$($dummy) &&
+$CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null && SYSTEM_NAME=`"$dummy"` &&
 	{ echo "$SYSTEM_NAME"; exit; }
 
 # Apollos put the system type in the environment.
@@ -1636,7 +1751,7 @@ test -d /usr/apollo && { echo "$ISP-apollo-$SYSTYPE"; exit; }
 
 echo "$0: unable to guess system type" >&2
 
-case "$UNAME_MACHINE:$UNAME_SYSTEM" in
+case $UNAME_MACHINE:$UNAME_SYSTEM in
     mips:Linux | mips64:Linux)
 	# If we got here on MIPS GNU/Linux, output extra information.
 	cat >&2 <<EOF
@@ -1658,9 +1773,11 @@ and
   https://git.savannah.gnu.org/cgit/config.git/plain/config.sub
 EOF
 
-year=$(echo $timestamp | sed 's,-.*,,')
+our_year=`echo $timestamp | sed 's,-.*,,'`
+thisyear=`date +%Y`
 # shellcheck disable=SC2003
-if test "$(expr "$(date +%Y)" - "$year")" -lt 3 ; then
+script_age=`expr "$thisyear" - "$our_year"`
+if test "$script_age" -lt 3 ; then
    cat >&2 <<EOF
 
 If $0 has already been updated, send the following data and any
@@ -1669,20 +1786,20 @@ provide the necessary information to handle your system.
 
 config.guess timestamp = $timestamp
 
-uname -m = $( (uname -m) 2>/dev/null || echo unknown)
-uname -r = $( (uname -r) 2>/dev/null || echo unknown)
-uname -s = $( (uname -s) 2>/dev/null || echo unknown)
-uname -v = $( (uname -v) 2>/dev/null || echo unknown)
+uname -m = `(uname -m) 2>/dev/null || echo unknown`
+uname -r = `(uname -r) 2>/dev/null || echo unknown`
+uname -s = `(uname -s) 2>/dev/null || echo unknown`
+uname -v = `(uname -v) 2>/dev/null || echo unknown`
 
-/usr/bin/uname -p = $( (/usr/bin/uname -p) 2>/dev/null)
-/bin/uname -X     = $( (/bin/uname -X) 2>/dev/null)
+/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null`
+/bin/uname -X     = `(/bin/uname -X) 2>/dev/null`
 
-hostinfo               = $( (hostinfo) 2>/dev/null)
-/bin/universe          = $( (/bin/universe) 2>/dev/null)
-/usr/bin/arch -k       = $( (/usr/bin/arch -k) 2>/dev/null)
-/bin/arch              = $( (/bin/arch) 2>/dev/null)
-/usr/bin/oslevel       = $( (/usr/bin/oslevel) 2>/dev/null)
-/usr/convex/getsysinfo = $( (/usr/convex/getsysinfo) 2>/dev/null)
+hostinfo               = `(hostinfo) 2>/dev/null`
+/bin/universe          = `(/bin/universe) 2>/dev/null`
+/usr/bin/arch -k       = `(/usr/bin/arch -k) 2>/dev/null`
+/bin/arch              = `(/bin/arch) 2>/dev/null`
+/usr/bin/oslevel       = `(/usr/bin/oslevel) 2>/dev/null`
+/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null`
 
 UNAME_MACHINE = "$UNAME_MACHINE"
 UNAME_RELEASE = "$UNAME_RELEASE"
@@ -1694,8 +1811,8 @@ fi
 exit 1
 
 # Local variables:
-# eval: (add-hook 'before-save-hook 'time-stamp)
+# eval: (add-hook 'before-save-hook 'time-stamp nil t)
 # time-stamp-start: "timestamp='"
-# time-stamp-format: "%:y-%02m-%02d"
+# time-stamp-format: "%Y-%02m-%02d"
 # time-stamp-end: "'"
 # End:
diff --git a/build-aux/config.sub b/build-aux/config.sub
index b0f84923..3d35cde1 100755
--- a/build-aux/config.sub
+++ b/build-aux/config.sub
@@ -1,12 +1,14 @@
 #! /bin/sh
 # Configuration validation subroutine script.
-#   Copyright 1992-2021 Free Software Foundation, Inc.
+#   Copyright 1992-2025 Free Software Foundation, Inc.
 
-timestamp='2021-01-07'
+# shellcheck disable=SC2006,SC2268,SC2162 # see below for rationale
+
+timestamp='2025-07-10'
 
 # This file is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or
+# the Free Software Foundation, either version 3 of the License, or
 # (at your option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
@@ -50,7 +52,14 @@ timestamp='2021-01-07'
 #	CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM
 # It is wrong to echo any other type of specification.
 
-me=$(echo "$0" | sed -e 's,.*/,,')
+# The "shellcheck disable" line above the timestamp inhibits complaints
+# about features and limitations of the classic Bourne shell that were
+# superseded or lifted in POSIX.  However, this script identifies a wide
+# variety of pre-POSIX systems that do not have POSIX shells at all, and
+# even some reasonably current systems (Solaris 10 as case-in-point) still
+# have a pre-POSIX /bin/sh.
+
+me=`echo "$0" | sed -e 's,.*/,,'`
 
 usage="\
 Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS
@@ -67,13 +76,13 @@ Report bugs and patches to <config-patches@gnu.org>."
 version="\
 GNU config.sub ($timestamp)
 
-Copyright 1992-2021 Free Software Foundation, Inc.
+Copyright 1992-2025 Free Software Foundation, Inc.
 
 This is free software; see the source for copying conditions.  There is NO
 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
 
 help="
-Try \`$me --help' for more information."
+Try '$me --help' for more information."
 
 # Parse command line
 while test $# -gt 0 ; do
@@ -111,15 +120,16 @@ case $# in
 esac
 
 # Split fields of configuration type
-# shellcheck disable=SC2162
+saved_IFS=$IFS
 IFS="-" read field1 field2 field3 field4 <<EOF
 $1
 EOF
+IFS=$saved_IFS
 
 # Separate into logical components for further validation
 case $1 in
 	*-*-*-*-*)
-		echo Invalid configuration \`"$1"\': more than four components >&2
+		echo "Invalid configuration '$1': more than four components" >&2
 		exit 1
 		;;
 	*-*-*-*)
@@ -131,10 +141,22 @@ case $1 in
 		# parts
 		maybe_os=$field2-$field3
 		case $maybe_os in
-			nto-qnx* | linux-* | uclinux-uclibc* \
-			| uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* \
-			| netbsd*-eabi* | kopensolaris*-gnu* | cloudabi*-eabi* \
-			| storm-chaos* | os2-emx* | rtmk-nova*)
+			  cloudabi*-eabi* \
+			| kfreebsd*-gnu* \
+			| knetbsd*-gnu* \
+			| kopensolaris*-gnu* \
+			| ironclad-* \
+			| linux-* \
+			| managarm-* \
+			| netbsd*-eabi* \
+			| netbsd*-gnu* \
+			| nto-qnx* \
+			| os2-emx* \
+			| rtmk-nova* \
+			| storm-chaos* \
+			| uclinux-gnu* \
+			| uclinux-uclibc* \
+			| windows-* )
 				basic_machine=$field1
 				basic_os=$maybe_os
 				;;
@@ -149,8 +171,12 @@ case $1 in
 		esac
 		;;
 	*-*)
-		# A lone config we happen to match not fitting any pattern
 		case $field1-$field2 in
+			# Shorthands that happen to contain a single dash
+			convex-c[12] | convex-c3[248])
+				basic_machine=$field2-convex
+				basic_os=
+				;;
 			decstation-3100)
 				basic_machine=mips-dec
 				basic_os=
@@ -158,24 +184,87 @@ case $1 in
 			*-*)
 				# Second component is usually, but not always the OS
 				case $field2 in
-					# Prevent following clause from handling this valid os
+					# Do not treat sunos as a manufacturer
 					sun*os*)
 						basic_machine=$field1
 						basic_os=$field2
 						;;
 					# Manufacturers
-					dec* | mips* | sequent* | encore* | pc533* | sgi* | sony* \
-					| att* | 7300* | 3300* | delta* | motorola* | sun[234]* \
-					| unicom* | ibm* | next | hp | isi* | apollo | altos* \
-					| convergent* | ncr* | news | 32* | 3600* | 3100* \
-					| hitachi* | c[123]* | convex* | sun | crds | omron* | dg \
-					| ultra | tti* | harris | dolphin | highlevel | gould \
-					| cbm | ns | masscomp | apple | axis | knuth | cray \
-					| microblaze* | sim | cisco \
-					| oki | wec | wrs | winbond)
+					  3100* \
+					| 32* \
+					| 3300* \
+					| 3600* \
+					| 7300* \
+					| acorn \
+					| altos* \
+					| apollo \
+					| apple \
+					| atari \
+					| att* \
+					| axis \
+					| be \
+					| bull \
+					| cbm \
+					| ccur \
+					| cisco \
+					| commodore \
+					| convergent* \
+					| convex* \
+					| cray \
+					| crds \
+					| dec* \
+					| delta* \
+					| dg \
+					| digital \
+					| dolphin \
+					| encore* \
+					| gould \
+					| harris \
+					| highlevel \
+					| hitachi* \
+					| hp \
+					| ibm* \
+					| intergraph \
+					| isi* \
+					| knuth \
+					| masscomp \
+					| microblaze* \
+					| mips* \
+					| motorola* \
+					| ncr* \
+					| news \
+					| next \
+					| ns \
+					| oki \
+					| omron* \
+					| pc533* \
+					| rebel \
+					| rom68k \
+					| rombug \
+					| semi \
+					| sequent* \
+					| sgi* \
+					| siemens \
+					| sim \
+					| sni \
+					| sony* \
+					| stratus \
+					| sun \
+					| sun[234]* \
+					| tektronix \
+					| tti* \
+					| ultra \
+					| unicom* \
+					| wec \
+					| winbond \
+					| wrs)
 						basic_machine=$field1-$field2
 						basic_os=
 						;;
+					tock* | zephyr*)
+						basic_machine=$field1-unknown
+						basic_os=$field2
+						;;
 					*)
 						basic_machine=$field1
 						basic_os=$field2
@@ -256,26 +345,6 @@ case $1 in
 				basic_machine=arm-unknown
 				basic_os=cegcc
 				;;
-			convex-c1)
-				basic_machine=c1-convex
-				basic_os=bsd
-				;;
-			convex-c2)
-				basic_machine=c2-convex
-				basic_os=bsd
-				;;
-			convex-c32)
-				basic_machine=c32-convex
-				basic_os=bsd
-				;;
-			convex-c34)
-				basic_machine=c34-convex
-				basic_os=bsd
-				;;
-			convex-c38)
-				basic_machine=c38-convex
-				basic_os=bsd
-				;;
 			cray)
 				basic_machine=j90-cray
 				basic_os=unicos
@@ -698,15 +767,26 @@ case $basic_machine in
 		vendor=dec
 		basic_os=tops20
 		;;
-	delta | 3300 | motorola-3300 | motorola-delta \
-	      | 3300-motorola | delta-motorola)
+	delta | 3300 | delta-motorola | 3300-motorola | motorola-delta | motorola-3300)
 		cpu=m68k
 		vendor=motorola
 		;;
-	dpx2*)
+	# This used to be dpx2*, but that gets the RS6000-based
+	# DPX/20 and the x86-based DPX/2-100 wrong.  See
+	# https://oldskool.silicium.org/stations/bull_dpx20.htm
+	# https://www.feb-patrimoine.com/english/bull_dpx2.htm
+	# https://www.feb-patrimoine.com/english/unix_and_bull.htm
+	dpx2 | dpx2[23]00 | dpx2[23]xx)
 		cpu=m68k
 		vendor=bull
-		basic_os=sysv3
+		;;
+	dpx2100 | dpx21xx)
+		cpu=i386
+		vendor=bull
+		;;
+	dpx20)
+		cpu=rs6000
+		vendor=bull
 		;;
 	encore | umax | mmax)
 		cpu=ns32k
@@ -769,22 +849,22 @@ case $basic_machine in
 		vendor=hp
 		;;
 	i*86v32)
-		cpu=$(echo "$1" | sed -e 's/86.*/86/')
+		cpu=`echo "$1" | sed -e 's/86.*/86/'`
 		vendor=pc
 		basic_os=sysv32
 		;;
 	i*86v4*)
-		cpu=$(echo "$1" | sed -e 's/86.*/86/')
+		cpu=`echo "$1" | sed -e 's/86.*/86/'`
 		vendor=pc
 		basic_os=sysv4
 		;;
 	i*86v)
-		cpu=$(echo "$1" | sed -e 's/86.*/86/')
+		cpu=`echo "$1" | sed -e 's/86.*/86/'`
 		vendor=pc
 		basic_os=sysv
 		;;
 	i*86sol2)
-		cpu=$(echo "$1" | sed -e 's/86.*/86/')
+		cpu=`echo "$1" | sed -e 's/86.*/86/'`
 		vendor=pc
 		basic_os=solaris2
 		;;
@@ -821,18 +901,6 @@ case $basic_machine in
 	next | m*-next)
 		cpu=m68k
 		vendor=next
-		case $basic_os in
-		    openstep*)
-		        ;;
-		    nextstep*)
-			;;
-		    ns2*)
-		      basic_os=nextstep2
-			;;
-		    *)
-		      basic_os=nextstep3
-			;;
-		esac
 		;;
 	np1)
 		cpu=np1
@@ -917,16 +985,17 @@ case $basic_machine in
 		;;
 	leon-*|leon[3-9]-*)
 		cpu=sparc
-		vendor=$(echo "$basic_machine" | sed 's/-.*//')
+		vendor=`echo "$basic_machine" | sed 's/-.*//'`
 		;;
 
 	*-*)
-		# shellcheck disable=SC2162
+		saved_IFS=$IFS
 		IFS="-" read cpu vendor <<EOF
 $basic_machine
 EOF
+		IFS=$saved_IFS
 		;;
-	# We use `pc' rather than `unknown'
+	# We use 'pc' rather than 'unknown'
 	# because (1) that's what they normally are, and
 	# (2) the word "unknown" tends to confuse beginning users.
 	i*86 | x86_64)
@@ -954,15 +1023,19 @@ unset -v basic_machine
 
 # Decode basic machines in the full and proper CPU-Company form.
 case $cpu-$vendor in
-	# Here we handle the default manufacturer of certain CPU types in canonical form. It is in
-	# some cases the only manufacturer, in others, it is the most popular.
+	# Here we handle the default manufacturer of certain CPU types in canonical form.
+	# It is in some cases the only manufacturer, in others, it is the most popular.
+	c[12]-convex | c[12]-unknown | c3[248]-convex | c3[248]-unknown)
+		vendor=convex
+		basic_os=${basic_os:-bsd}
+		;;
 	craynv-unknown)
 		vendor=cray
 		basic_os=${basic_os:-unicosmp}
 		;;
 	c90-unknown | c90-cray)
 		vendor=cray
-		basic_os=${Basic_os:-unicos}
+		basic_os=${basic_os:-unicos}
 		;;
 	fx80-unknown)
 		vendor=alliant
@@ -1003,11 +1076,34 @@ case $cpu-$vendor in
 		;;
 
 	# Here we normalize CPU types with a missing or matching vendor
-	dpx20-unknown | dpx20-bull)
-		cpu=rs6000
-		vendor=bull
+	armh-unknown | armh-alt)
+		cpu=armv7l
+		vendor=alt
+		basic_os=${basic_os:-linux-gnueabihf}
+		;;
+
+	# Normalized CPU+vendor pairs that imply an OS, if not otherwise specified
+	m68k-isi)
+		basic_os=${basic_os:-sysv}
+		;;
+	m68k-sony)
+		basic_os=${basic_os:-newsos}
+		;;
+	m68k-tektronix)
+		basic_os=${basic_os:-bsd}
+		;;
+	m88k-harris)
+		basic_os=${basic_os:-sysv3}
+		;;
+	i386-bull | m68k-bull)
+		basic_os=${basic_os:-sysv3}
+		;;
+	rs6000-bull)
 		basic_os=${basic_os:-bosx}
 		;;
+	mips-sni)
+		basic_os=${basic_os:-sysv4}
+		;;
 
 	# Here we normalize CPU types irrespective of the vendor
 	amd64-*)
@@ -1015,7 +1111,7 @@ case $cpu-$vendor in
 		;;
 	blackfin-*)
 		cpu=bfin
-		basic_os=linux
+		basic_os=${basic_os:-linux}
 		;;
 	c54x-*)
 		cpu=tic54x
@@ -1038,7 +1134,7 @@ case $cpu-$vendor in
 		;;
 	m68knommu-*)
 		cpu=m68k
-		basic_os=linux
+		basic_os=${basic_os:-linux}
 		;;
 	m9s12z-* | m68hcs12z-* | hcs12z-* | s12z-*)
 		cpu=s12z
@@ -1048,12 +1144,12 @@ case $cpu-$vendor in
 		;;
 	parisc-*)
 		cpu=hppa
-		basic_os=linux
+		basic_os=${basic_os:-linux}
 		;;
 	pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*)
 		cpu=i586
 		;;
-	pentiumpro-* | p6-* | 6x86-* | athlon-* | athalon_*-*)
+	pentiumpro-* | p6-* | 6x86-* | athlon-* | athlon_*-*)
 		cpu=i686
 		;;
 	pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*)
@@ -1062,9 +1158,6 @@ case $cpu-$vendor in
 	pentium4-*)
 		cpu=i786
 		;;
-	pc98-*)
-		cpu=i386
-		;;
 	ppc-* | ppcbe-*)
 		cpu=powerpc
 		;;
@@ -1084,7 +1177,7 @@ case $cpu-$vendor in
 		cpu=mipsisa64sb1el
 		;;
 	sh5e[lb]-*)
-		cpu=$(echo "$cpu" | sed 's/^\(sh.\)e\(.\)$/\1\2e/')
+		cpu=`echo "$cpu" | sed 's/^\(sh.\)e\(.\)$/\1\2e/'`
 		;;
 	spur-*)
 		cpu=spur
@@ -1098,13 +1191,10 @@ case $cpu-$vendor in
 	tx39el-*)
 		cpu=mipstx39el
 		;;
-	x64-*)
-		cpu=x86_64
-		;;
 	xscale-* | xscalee[bl]-*)
-		cpu=$(echo "$cpu" | sed 's/^xscale/arm/')
+		cpu=`echo "$cpu" | sed 's/^xscale/arm/'`
 		;;
-	arm64-*)
+	arm64-* | aarch64le-* | arm64_32-*)
 		cpu=aarch64
 		;;
 
@@ -1156,110 +1246,232 @@ case $cpu-$vendor in
 		# Recognize the canonical CPU types that are allowed with any
 		# company name.
 		case $cpu in
-			1750a | 580 \
+			  1750a \
+			| 580 \
+			| [cjt]90 \
 			| a29k \
-			| aarch64 | aarch64_be \
+			| aarch64 \
+			| aarch64_be \
+			| aarch64c \
 			| abacus \
-			| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] \
-			| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] \
-			| alphapca5[67] | alpha64pca5[67] \
+			| alpha \
+			| alpha64 \
+			| alpha64ev56 \
+			| alpha64ev6[78] \
+			| alpha64ev[4-8] \
+			| alpha64pca5[67] \
+			| alphaev56 \
+			| alphaev6[78] \
+			| alphaev[4-8] \
+			| alphapca5[67] \
 			| am33_2.0 \
 			| amdgcn \
-			| arc | arceb \
-			| arm | arm[lb]e | arme[lb] | armv* \
-			| avr | avr32 \
+			| arc \
+			| arc32 \
+			| arc64 \
+			| arceb \
+			| arm \
+			| arm64e \
+			| arm64ec \
+			| arm[lb]e \
+			| arme[lb] \
+			| armv* \
 			| asmjs \
+			| avr \
+			| avr32 \
 			| ba \
-			| be32 | be64 \
-			| bfin | bpf | bs2000 \
-			| c[123]* | c30 | [cjt]90 | c4x \
-			| c8051 | clipper | craynv | csky | cydra \
-			| d10v | d30v | dlx | dsp16xx \
-			| e2k | elxsi | epiphany \
-			| f30[01] | f700 | fido | fr30 | frv | ft32 | fx80 \
-			| h8300 | h8500 \
-			| hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
+			| be32 \
+			| be64 \
+			| bfin \
+			| bpf \
+			| bs2000 \
+			| c30 \
+			| c4x \
+			| c8051 \
+			| c[123]* \
+			| clipper \
+			| craynv \
+			| csky \
+			| cydra \
+			| d10v \
+			| d30v \
+			| dlx \
+			| dsp16xx \
+			| e2k \
+			| elxsi \
+			| epiphany \
+			| f30[01] \
+			| f700 \
+			| fido \
+			| fr30 \
+			| frv \
+			| ft32 \
+			| fx80 \
+			| h8300 \
+			| h8500 \
 			| hexagon \
-			| i370 | i*86 | i860 | i960 | ia16 | ia64 \
-			| ip2k | iq2000 \
+			| hppa \
+			| hppa1.[01] \
+			| hppa2.0 \
+			| hppa2.0[nw] \
+			| hppa64 \
+			| i*86 \
+			| i370 \
+			| i860 \
+			| i960 \
+			| ia16 \
+			| ia64 \
+			| intelgt \
+			| ip2k \
+			| iq2000 \
+			| javascript \
 			| k1om \
-			| le32 | le64 \
+			| kvx \
+			| le32 \
+			| le64 \
 			| lm32 \
-			| loongarch32 | loongarch64 | loongarchx32 \
-			| m32c | m32r | m32rle \
-			| m5200 | m68000 | m680[012346]0 | m68360 | m683?2 | m68k \
-			| m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x \
-			| m88110 | m88k | maxq | mb | mcore | mep | metag \
-			| microblaze | microblazeel \
-			| mips | mipsbe | mipseb | mipsel | mipsle \
-			| mips16 \
-			| mips64 | mips64eb | mips64el \
-			| mips64octeon | mips64octeonel \
-			| mips64orion | mips64orionel \
-			| mips64r5900 | mips64r5900el \
-			| mips64vr | mips64vrel \
-			| mips64vr4100 | mips64vr4100el \
-			| mips64vr4300 | mips64vr4300el \
-			| mips64vr5000 | mips64vr5000el \
-			| mips64vr5900 | mips64vr5900el \
-			| mipsisa32 | mipsisa32el \
-			| mipsisa32r2 | mipsisa32r2el \
-			| mipsisa32r6 | mipsisa32r6el \
-			| mipsisa64 | mipsisa64el \
-			| mipsisa64r2 | mipsisa64r2el \
-			| mipsisa64r6 | mipsisa64r6el \
-			| mipsisa64sb1 | mipsisa64sb1el \
-			| mipsisa64sr71k | mipsisa64sr71kel \
-			| mipsr5900 | mipsr5900el \
-			| mipstx39 | mipstx39el \
+			| loongarch32 \
+			| loongarch64 \
+			| m32c \
+			| m32r \
+			| m32rle \
+			| m5200 \
+			| m68000 \
+			| m680[012346]0 \
+			| m6811 \
+			| m6812 \
+			| m68360 \
+			| m683?2 \
+			| m68hc11 \
+			| m68hc12 \
+			| m68hcs12x \
+			| m68k \
+			| m88110 \
+			| m88k \
+			| maxq \
+			| mb \
+			| mcore \
+			| mep \
+			| metag \
+			| microblaze \
+			| microblazeel \
+			| mips* \
 			| mmix \
-			| mn10200 | mn10300 \
+			| mn10200 \
+			| mn10300 \
 			| moxie \
-			| mt \
 			| msp430 \
-			| nds32 | nds32le | nds32be \
+			| mt \
+			| nanomips* \
+			| nds32 \
+			| nds32be \
+			| nds32le \
 			| nfp \
-			| nios | nios2 | nios2eb | nios2el \
-			| none | np1 | ns16k | ns32k | nvptx \
+			| nios \
+			| nios2 \
+			| nios2eb \
+			| nios2el \
+			| none \
+			| np1 \
+			| ns16k \
+			| ns32k \
+			| nvptx \
 			| open8 \
 			| or1k* \
 			| or32 \
 			| orion \
+			| pdp10 \
+			| pdp11 \
 			| picochip \
-			| pdp10 | pdp11 | pj | pjl | pn | power \
-			| powerpc | powerpc64 | powerpc64le | powerpcle | powerpcspe \
+			| pj \
+			| pjl \
+			| pn \
+			| power \
+			| powerpc \
+			| powerpc64 \
+			| powerpc64le \
+			| powerpcle \
+			| powerpcspe \
 			| pru \
 			| pyramid \
-			| riscv | riscv32 | riscv32be | riscv64 | riscv64be \
-			| rl78 | romp | rs6000 | rx \
-			| s390 | s390x \
+			| riscv \
+			| riscv32 \
+			| riscv32be \
+			| riscv64 \
+			| riscv64be \
+			| rl78 \
+			| romp \
+			| rs6000 \
+			| rx \
+			| s390 \
+			| s390x \
 			| score \
-			| sh | shl \
-			| sh[1234] | sh[24]a | sh[24]ae[lb] | sh[23]e | she[lb] | sh[lb]e \
-			| sh[1234]e[lb] |  sh[12345][lb]e | sh[23]ele | sh64 | sh64le \
-			| sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet \
+			| sh \
+			| sh64 \
+			| sh64le \
+			| sh[12345][lb]e \
+			| sh[1234] \
+			| sh[1234]e[lb] \
+			| sh[23]e \
+			| sh[23]ele \
+			| sh[24]a \
+			| sh[24]ae[lb] \
+			| sh[lb]e \
+			| she[lb] \
+			| shl \
+			| sparc \
+			| sparc64 \
+			| sparc64b \
+			| sparc64v \
+			| sparc86x \
+			| sparclet \
 			| sparclite \
-			| sparcv8 | sparcv9 | sparcv9b | sparcv9v | sv1 | sx* \
+			| sparcv8 \
+			| sparcv9 \
+			| sparcv9b \
+			| sparcv9v \
 			| spu \
+			| sv1 \
+			| sx* \
 			| tahoe \
 			| thumbv7* \
-			| tic30 | tic4x | tic54x | tic55x | tic6x | tic80 \
+			| tic30 \
+			| tic4x \
+			| tic54x \
+			| tic55x \
+			| tic6x \
+			| tic80 \
 			| tron \
 			| ubicom32 \
-			| v70 | v850 | v850e | v850e1 | v850es | v850e2 | v850e2v3 \
+			| v70 \
+			| v810 \
+			| v850 \
+			| v850e \
+			| v850e1 \
+			| v850e2 \
+			| v850e2v3 \
+			| v850es \
 			| vax \
+			| vc4 \
 			| visium \
 			| w65 \
-			| wasm32 | wasm64 \
+			| wasm32 \
+			| wasm64 \
 			| we32k \
-			| x86 | x86_64 | xc16x | xgate | xps100 \
-			| xstormy16 | xtensa* \
+			| x86 \
+			| x86_64 \
+			| xc16x \
+			| xgate \
+			| xps100 \
+			| xstormy16 \
+			| xtensa* \
 			| ymp \
-			| z8k | z80)
+			| z80 \
+			| z8k)
 				;;
 
 			*)
-				echo Invalid configuration \`"$1"\': machine \`"$cpu-$vendor"\' not recognized 1>&2
+				echo "Invalid configuration '$1': machine '$cpu-$vendor' not recognized" 1>&2
 				exit 1
 				;;
 		esac
@@ -1280,38 +1492,48 @@ esac
 
 # Decode manufacturer-specific aliases for certain operating systems.
 
-if test x$basic_os != x
+if test x"$basic_os" != x
 then
 
-# First recognize some ad-hoc caes, or perhaps split kernel-os, or else just
+# First recognize some ad-hoc cases, or perhaps split kernel-os, or else just
 # set os.
+obj=
 case $basic_os in
 	gnu/linux*)
 		kernel=linux
-		os=$(echo $basic_os | sed -e 's|gnu/linux|gnu|')
+		os=`echo "$basic_os" | sed -e 's|gnu/linux|gnu|'`
 		;;
 	os2-emx)
 		kernel=os2
-		os=$(echo $basic_os | sed -e 's|os2-emx|emx|')
+		os=`echo "$basic_os" | sed -e 's|os2-emx|emx|'`
 		;;
 	nto-qnx*)
 		kernel=nto
-		os=$(echo $basic_os | sed -e 's|nto-qnx|qnx|')
+		os=`echo "$basic_os" | sed -e 's|nto-qnx|qnx|'`
 		;;
 	*-*)
-		# shellcheck disable=SC2162
+		saved_IFS=$IFS
 		IFS="-" read kernel os <<EOF
 $basic_os
 EOF
+		IFS=$saved_IFS
 		;;
 	# Default OS when just kernel was specified
 	nto*)
 		kernel=nto
-		os=$(echo $basic_os | sed -e 's|nto|qnx|')
+		os=`echo "$basic_os" | sed -e 's|nto|qnx|'`
+		;;
+	ironclad*)
+		kernel=ironclad
+		os=`echo "$basic_os" | sed -e 's|ironclad|mlibc|'`
 		;;
 	linux*)
 		kernel=linux
-		os=$(echo $basic_os | sed -e 's|linux|gnu|')
+		os=`echo "$basic_os" | sed -e 's|linux|gnu|'`
+		;;
+	managarm*)
+		kernel=managarm
+		os=`echo "$basic_os" | sed -e 's|managarm|mlibc|'`
 		;;
 	*)
 		kernel=
@@ -1332,7 +1554,7 @@ case $os in
 		os=cnk
 		;;
 	solaris1 | solaris1.*)
-		os=$(echo $os | sed -e 's|solaris1|sunos4|')
+		os=`echo "$os" | sed -e 's|solaris1|sunos4|'`
 		;;
 	solaris)
 		os=solaris2
@@ -1340,6 +1562,23 @@ case $os in
 	unixware*)
 		os=sysv4.2uw
 		;;
+	# The marketing names for NeXT's operating systems were
+	# NeXTSTEP, NeXTSTEP 2, OpenSTEP 3, OpenSTEP 4.  'openstep' is
+	# mapped to 'openstep3', but 'openstep1' and 'openstep2' are
+	# mapped to 'nextstep' and 'nextstep2', consistent with the
+	# treatment of SunOS/Solaris.
+	ns | ns1 | nextstep | nextstep1 | openstep1)
+		os=nextstep
+		;;
+	ns2 | nextstep2 | openstep2)
+		os=nextstep2
+		;;
+	ns3 | nextstep3 | openstep | openstep3)
+		os=openstep3
+		;;
+	ns4 | nextstep4 | openstep4)
+		os=openstep4
+		;;
 	# es1800 is here to avoid being matched by es* (a different OS)
 	es1800*)
 		os=ose
@@ -1361,7 +1600,7 @@ case $os in
 		os=sco3.2v4
 		;;
 	sco3.2.[4-9]*)
-		os=$(echo $os | sed -e 's/sco3.2./sco3.2v/')
+		os=`echo "$os" | sed -e 's/sco3.2./sco3.2v/'`
 		;;
 	sco*v* | scout)
 		# Don't match below
@@ -1391,7 +1630,7 @@ case $os in
 		os=lynxos
 		;;
 	mac[0-9]*)
-		os=$(echo "$os" | sed -e 's|mac|macos|')
+		os=`echo "$os" | sed -e 's|mac|macos|'`
 		;;
 	opened*)
 		os=openedition
@@ -1400,16 +1639,17 @@ case $os in
 		os=os400
 		;;
 	sunos5*)
-		os=$(echo "$os" | sed -e 's|sunos5|solaris2|')
+		os=`echo "$os" | sed -e 's|sunos5|solaris2|'`
 		;;
 	sunos6*)
-		os=$(echo "$os" | sed -e 's|sunos6|solaris3|')
+		os=`echo "$os" | sed -e 's|sunos6|solaris3|'`
 		;;
 	wince*)
 		os=wince
 		;;
 	utek*)
 		os=bsd
+		vendor=`echo "$vendor" | sed -e 's|^unknown$|tektronix|'`
 		;;
 	dynix*)
 		os=bsd
@@ -1426,21 +1666,25 @@ case $os in
 	386bsd)
 		os=bsd
 		;;
-	ctix* | uts*)
+	ctix*)
+		os=sysv
+		vendor=`echo "$vendor" | sed -e 's|^unknown$|convergent|'`
+		;;
+	uts*)
 		os=sysv
 		;;
 	nova*)
-		os=rtmk-nova
-		;;
-	ns2)
-		os=nextstep2
+		kernel=rtmk
+		os=nova
 		;;
 	# Preserve the version number of sinix5.
 	sinix5.*)
-		os=$(echo $os | sed -e 's|sinix|sysv|')
+		os=`echo "$os" | sed -e 's|sinix|sysv|'`
+		vendor=`echo "$vendor" | sed -e 's|^unknown$|sni|'`
 		;;
 	sinix*)
 		os=sysv4
+		vendor=`echo "$vendor" | sed -e 's|^unknown$|sni|'`
 		;;
 	tpf*)
 		os=tpf
@@ -1478,10 +1722,16 @@ case $os in
 			os=eabi
 			;;
 		    *)
-			os=elf
+			os=
+			obj=elf
 			;;
 		esac
 		;;
+	aout* | coff* | elf* | pe*)
+		# These are machine code file formats, not OSes
+		obj=$os
+		os=
+		;;
 	*)
 		# No normalization, but not necessarily accepted, that comes below.
 		;;
@@ -1500,12 +1750,15 @@ else
 # system, and we'll never get to this point.
 
 kernel=
+obj=
 case $cpu-$vendor in
 	score-*)
-		os=elf
+		os=
+		obj=elf
 		;;
 	spu-*)
-		os=elf
+		os=
+		obj=elf
 		;;
 	*-acorn)
 		os=riscix1.2
@@ -1515,28 +1768,35 @@ case $cpu-$vendor in
 		os=gnu
 		;;
 	arm*-semi)
-		os=aout
+		os=
+		obj=aout
 		;;
 	c4x-* | tic4x-*)
-		os=coff
+		os=
+		obj=coff
 		;;
 	c8051-*)
-		os=elf
+		os=
+		obj=elf
 		;;
 	clipper-intergraph)
 		os=clix
 		;;
 	hexagon-*)
-		os=elf
+		os=
+		obj=elf
 		;;
 	tic54x-*)
-		os=coff
+		os=
+		obj=coff
 		;;
 	tic55x-*)
-		os=coff
+		os=
+		obj=coff
 		;;
 	tic6x-*)
-		os=coff
+		os=
+		obj=coff
 		;;
 	# This must come before the *-dec entry.
 	pdp10-*)
@@ -1558,28 +1818,43 @@ case $cpu-$vendor in
 		os=sunos3
 		;;
 	m68*-cisco)
-		os=aout
+		os=
+		obj=aout
 		;;
 	mep-*)
-		os=elf
+		os=
+		obj=elf
+		;;
+	# The -sgi and -siemens entries must be before the mips- entry
+	# or we get the wrong os.
+	*-sgi)
+		os=irix
+		;;
+	*-siemens)
+		os=sysv4
 		;;
 	mips*-cisco)
-		os=elf
+		os=
+		obj=elf
 		;;
-	mips*-*)
-		os=elf
+	mips*-*|nanomips*-*)
+		os=
+		obj=elf
 		;;
 	or32-*)
-		os=coff
+		os=
+		obj=coff
 		;;
-	*-tti)	# must be before sparc entry or we get the wrong os.
+	# This must be before the sparc-* entry or we get the wrong os.
+	*-tti)
 		os=sysv3
 		;;
 	sparc-* | *-sun)
 		os=sunos4.1.1
 		;;
 	pru-*)
-		os=elf
+		os=
+		obj=elf
 		;;
 	*-be)
 		os=beos
@@ -1603,7 +1878,7 @@ case $cpu-$vendor in
 		os=hpux
 		;;
 	*-hitachi)
-		os=hiux
+		os=hiuxwe2
 		;;
 	i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent)
 		os=sysv
@@ -1647,12 +1922,6 @@ case $cpu-$vendor in
 	*-encore)
 		os=bsd
 		;;
-	*-sgi)
-		os=irix
-		;;
-	*-siemens)
-		os=sysv4
-		;;
 	*-masscomp)
 		os=rtu
 		;;
@@ -1660,10 +1929,12 @@ case $cpu-$vendor in
 		os=uxpv
 		;;
 	*-rom68k)
-		os=coff
+		os=
+		obj=coff
 		;;
 	*-*bug)
-		os=coff
+		os=
+		obj=coff
 		;;
 	*-apple)
 		os=macos
@@ -1681,87 +1952,325 @@ esac
 
 fi
 
-# Now, validate our (potentially fixed-up) OS.
+# Now, validate our (potentially fixed-up) individual pieces (OS, OBJ).
+
 case $os in
-	# Sometimes we do "kernel-abi", so those need to count as OSes.
-	musl* | newlib* | uclibc*)
+	# Sometimes we do "kernel-libc", so those need to count as OSes.
+	llvm* | musl* | newlib* | relibc* | uclibc*)
 		;;
-	# Likewise for "kernel-libc"
+	# Likewise for "kernel-abi"
 	eabi* | gnueabi*)
 		;;
+	# VxWorks passes extra cpu info in the 4th filed.
+	simlinux | simwindows | spe)
+		;;
+	# See `case $cpu-$os` validation below
+	ghcjs)
+		;;
 	# Now accept the basic system types.
-	# The portable systems comes first.
 	# Each alternative MUST end in a * to match a version number.
-	gnu* | android* | bsd* | mach* | minix* | genix* | ultrix* | irix* \
-	     | *vms* | esix* | aix* | cnk* | sunos | sunos[34]* \
-	     | hpux* | unos* | osf* | luna* | dgux* | auroraux* | solaris* \
-	     | sym* |  plan9* | psp* | sim* | xray* | os68k* | v88r* \
-	     | hiux* | abug | nacl* | netware* | windows* \
-	     | os9* | macos* | osx* | ios* \
-	     | mpw* | magic* | mmixware* | mon960* | lnews* \
-	     | amigaos* | amigados* | msdos* | newsos* | unicos* | aof* \
-	     | aos* | aros* | cloudabi* | sortix* | twizzler* \
-	     | nindy* | vxsim* | vxworks* | ebmon* | hms* | mvs* \
-	     | clix* | riscos* | uniplus* | iris* | isc* | rtu* | xenix* \
-	     | mirbsd* | netbsd* | dicos* | openedition* | ose* \
-	     | bitrig* | openbsd* | solidbsd* | libertybsd* | os108* \
-	     | ekkobsd* | freebsd* | riscix* | lynxos* | os400* \
-	     | bosx* | nextstep* | cxux* | aout* | elf* | oabi* \
-	     | ptx* | coff* | ecoff* | winnt* | domain* | vsta* \
-	     | udi* | lites* | ieee* | go32* | aux* | hcos* \
-	     | chorusrdb* | cegcc* | glidix* \
-	     | cygwin* | msys* | pe* | moss* | proelf* | rtems* \
-	     | midipix* | mingw32* | mingw64* | mint* \
-	     | uxpv* | beos* | mpeix* | udk* | moxiebox* \
-	     | interix* | uwin* | mks* | rhapsody* | darwin* \
-	     | openstep* | oskit* | conix* | pw32* | nonstopux* \
-	     | storm-chaos* | tops10* | tenex* | tops20* | its* \
-	     | os2* | vos* | palmos* | uclinux* | nucleus* | morphos* \
-	     | scout* | superux* | sysv* | rtmk* | tpf* | windiss* \
-	     | powermax* | dnix* | nx6 | nx7 | sei* | dragonfly* \
-	     | skyos* | haiku* | rdos* | toppers* | drops* | es* \
-	     | onefs* | tirtos* | phoenix* | fuchsia* | redox* | bme* \
-	     | midnightbsd* | amdhsa* | unleashed* | emscripten* | wasi* \
-	     | nsk* | powerunix* | genode* | zvmoe* | qnx* | emx*)
+	  abug \
+	| aix* \
+	| amdhsa* \
+	| amigados* \
+	| amigaos* \
+	| android* \
+	| aof* \
+	| aos* \
+	| aros* \
+	| atheos* \
+	| auroraux* \
+	| aux* \
+	| banan_os* \
+	| beos* \
+	| bitrig* \
+	| bme* \
+	| bosx* \
+	| bsd* \
+	| cegcc* \
+	| chorusos* \
+	| chorusrdb* \
+	| clix* \
+	| cloudabi* \
+	| cnk* \
+	| conix* \
+	| cos* \
+	| cxux* \
+	| cygwin* \
+	| darwin* \
+	| dgux* \
+	| dicos* \
+	| dnix* \
+	| domain* \
+	| dragonfly* \
+	| drops* \
+	| ebmon* \
+	| ecoff* \
+	| ekkobsd* \
+	| emscripten* \
+	| emx* \
+	| es* \
+	| fiwix* \
+	| freebsd* \
+	| fuchsia* \
+	| genix* \
+	| genode* \
+	| glidix* \
+	| gnu* \
+	| go32* \
+	| haiku* \
+	| hcos* \
+	| hiux* \
+	| hms* \
+	| hpux* \
+	| ieee* \
+	| interix* \
+	| ios* \
+	| iris* \
+	| irix* \
+	| isc* \
+	| its* \
+	| l4re* \
+	| libertybsd* \
+	| lites* \
+	| lnews* \
+	| luna* \
+	| lynxos* \
+	| mach* \
+	| macos* \
+	| magic* \
+	| mbr* \
+	| midipix* \
+	| midnightbsd* \
+	| mingw32* \
+	| mingw64* \
+	| minix* \
+	| mint* \
+	| mirbsd* \
+	| mks* \
+	| mlibc* \
+	| mmixware* \
+	| mon960* \
+	| morphos* \
+	| moss* \
+	| moxiebox* \
+	| mpeix* \
+	| mpw* \
+	| msdos* \
+	| msys* \
+	| mvs* \
+	| nacl* \
+	| netbsd* \
+	| netware* \
+	| newsos* \
+	| nextstep* \
+	| nindy* \
+	| nonstopux* \
+	| nova* \
+	| nsk* \
+	| nucleus* \
+	| nx6 \
+	| nx7 \
+	| oabi* \
+	| ohos* \
+	| onefs* \
+	| openbsd* \
+	| openedition* \
+	| openstep* \
+	| os108* \
+	| os2* \
+	| os400* \
+	| os68k* \
+	| os9* \
+	| ose* \
+	| osf* \
+	| oskit* \
+	| osx* \
+	| palmos* \
+	| phoenix* \
+	| plan9* \
+	| powermax* \
+	| powerunix* \
+	| proelf* \
+	| psos* \
+	| psp* \
+	| ptx* \
+	| pw32* \
+	| qnx* \
+	| rdos* \
+	| redox* \
+	| rhapsody* \
+	| riscix* \
+	| riscos* \
+	| rtems* \
+	| rtmk* \
+	| rtu* \
+	| scout* \
+	| secbsd* \
+	| sei* \
+	| serenity* \
+	| sim* \
+	| skyos* \
+	| solaris* \
+	| solidbsd* \
+	| sortix* \
+	| storm-chaos* \
+	| sunos \
+	| sunos[34]* \
+	| superux* \
+	| syllable* \
+	| sym* \
+	| sysv* \
+	| tenex* \
+	| tirtos* \
+	| tock* \
+	| toppers* \
+	| tops10* \
+	| tops20* \
+	| tpf* \
+	| tvos* \
+	| twizzler* \
+	| uclinux* \
+	| udi* \
+	| udk* \
+	| ultrix* \
+	| unicos* \
+	| uniplus* \
+	| unleashed* \
+	| unos* \
+	| uwin* \
+	| uxpv* \
+	| v88r* \
+	|*vms* \
+	| vos* \
+	| vsta* \
+	| vxsim* \
+	| vxworks* \
+	| wasi* \
+	| watchos* \
+	| wince* \
+	| windiss* \
+	| windows* \
+	| winnt* \
+	| xenix* \
+	| xray* \
+	| zephyr* \
+	| zvmoe* )
 		;;
 	# This one is extra strict with allowed versions
 	sco3.2v2 | sco3.2v[4-9]* | sco5v6*)
 		# Don't forget version if it is 3.2v4 or newer.
 		;;
+	# This refers to builds using the UEFI calling convention
+	# (which depends on the architecture) and PE file format.
+	# Note that this is both a different calling convention and
+	# different file format than that of GNU-EFI
+	# (x86_64-w64-mingw32).
+	uefi)
+		;;
 	none)
 		;;
+	kernel* | msvc* )
+		# Restricted further below
+		;;
+	'')
+		if test x"$obj" = x
+		then
+			echo "Invalid configuration '$1': Blank OS only allowed with explicit machine code file format" 1>&2
+		fi
+		;;
 	*)
-		echo Invalid configuration \`"$1"\': OS \`"$os"\' not recognized 1>&2
+		echo "Invalid configuration '$1': OS '$os' not recognized" 1>&2
+		exit 1
+		;;
+esac
+
+case $obj in
+	aout* | coff* | elf* | pe*)
+		;;
+	'')
+		# empty is fine
+		;;
+	*)
+		echo "Invalid configuration '$1': Machine code format '$obj' not recognized" 1>&2
+		exit 1
+		;;
+esac
+
+# Here we handle the constraint that a (synthetic) cpu and os are
+# valid only in combination with each other and nowhere else.
+case $cpu-$os in
+	# The "javascript-unknown-ghcjs" triple is used by GHC; we
+	# accept it here in order to tolerate that, but reject any
+	# variations.
+	javascript-ghcjs)
+		;;
+	javascript-* | *-ghcjs)
+		echo "Invalid configuration '$1': cpu '$cpu' is not valid with os '$os$obj'" 1>&2
 		exit 1
 		;;
 esac
 
 # As a final step for OS-related things, validate the OS-kernel combination
 # (given a valid OS), if there is a kernel.
-case $kernel-$os in
-	linux-gnu* | linux-dietlibc* | linux-android* | linux-newlib* | linux-musl* | linux-uclibc* )
+case $kernel-$os-$obj in
+	linux-gnu*- | linux-android*- | linux-dietlibc*- | linux-llvm*- \
+		    | linux-mlibc*- | linux-musl*- | linux-newlib*- \
+		    | linux-relibc*- | linux-uclibc*- | linux-ohos*- )
 		;;
-	uclinux-uclibc* )
+	uclinux-uclibc*- | uclinux-gnu*- )
 		;;
-	-dietlibc* | -newlib* | -musl* | -uclibc* )
+	ironclad-mlibc*-)
+		;;
+	managarm-mlibc*- | managarm-kernel*- )
+		;;
+	windows*-msvc*-)
+		;;
+	-dietlibc*- | -llvm*- | -mlibc*- | -musl*- | -newlib*- | -relibc*- \
+		    | -uclibc*- )
 		# These are just libc implementations, not actual OSes, and thus
 		# require a kernel.
-		echo "Invalid configuration \`$1': libc \`$os' needs explicit kernel." 1>&2
+		echo "Invalid configuration '$1': libc '$os' needs explicit kernel." 1>&2
 		exit 1
 		;;
-	kfreebsd*-gnu* | kopensolaris*-gnu*)
+	-kernel*- )
+		echo "Invalid configuration '$1': '$os' needs explicit kernel." 1>&2
+		exit 1
 		;;
-	nto-qnx*)
+	*-kernel*- )
+		echo "Invalid configuration '$1': '$kernel' does not support '$os'." 1>&2
+		exit 1
 		;;
-	os2-emx)
+	*-msvc*- )
+		echo "Invalid configuration '$1': '$os' needs 'windows'." 1>&2
+		exit 1
 		;;
-	*-eabi* | *-gnueabi*)
+	kfreebsd*-gnu*- | knetbsd*-gnu*- | netbsd*-gnu*- | kopensolaris*-gnu*-)
 		;;
-	-*)
+	vxworks-simlinux- | vxworks-simwindows- | vxworks-spe-)
+		;;
+	nto-qnx*-)
+		;;
+	os2-emx-)
+		;;
+	rtmk-nova-)
+		;;
+	*-eabi*- | *-gnueabi*-)
+		;;
+	ios*-simulator- | tvos*-simulator- | watchos*-simulator- )
+		;;
+	none--*)
+		# None (no kernel, i.e. freestanding / bare metal),
+		# can be paired with an machine code file format
+		;;
+	-*-)
 		# Blank kernel with real OS is always fine.
 		;;
-	*-*)
-		echo "Invalid configuration \`$1': Kernel \`$kernel' not known to work with OS \`$os'." 1>&2
+	--*)
+		# Blank kernel and OS with real machine code file format is always fine.
+		;;
+	*-*-*)
+		echo "Invalid configuration '$1': Kernel '$kernel' not known to work with OS '$os'." 1>&2
 		exit 1
 		;;
 esac
@@ -1774,7 +2283,7 @@ case $vendor in
 			*-riscix*)
 				vendor=acorn
 				;;
-			*-sunos*)
+			*-sunos* | *-solaris*)
 				vendor=sun
 				;;
 			*-cnk* | *-aix*)
@@ -1844,12 +2353,12 @@ case $vendor in
 		;;
 esac
 
-echo "$cpu-$vendor-${kernel:+$kernel-}$os"
+echo "$cpu-$vendor${kernel:+-$kernel}${os:+-$os}${obj:+-$obj}"
 exit
 
 # Local variables:
-# eval: (add-hook 'before-save-hook 'time-stamp)
+# eval: (add-hook 'before-save-hook 'time-stamp nil t)
 # time-stamp-start: "timestamp='"
-# time-stamp-format: "%:y-%02m-%02d"
+# time-stamp-format: "%Y-%02m-%02d"
 # time-stamp-end: "'"
 # End:

From ce02945070fc569a3fe80ccc01cb5cb6feb4d858 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Aur=C3=A9lien=20Brooke?= <dev@abrooke.fr>
Date: Thu, 4 Sep 2025 08:55:47 +0200
Subject: [PATCH 2539/2608] Add missing thread_event_registry.c to Visual
 Studio projects

This file was added by b2a35a905f8d3c89529914987407ef33e6b05cec.
---
 msvc/projects/vc2015/jemalloc/jemalloc.vcxproj | 1 +
 msvc/projects/vc2017/jemalloc/jemalloc.vcxproj | 1 +
 msvc/projects/vc2019/jemalloc/jemalloc.vcxproj | 1 +
 msvc/projects/vc2022/jemalloc/jemalloc.vcxproj | 1 +
 4 files changed, 4 insertions(+)

diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 9743e10b..fff77a4b 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -95,6 +95,7 @@
     <ClCompile Include="..\..\..\..\src\tcache.c" />
     <ClCompile Include="..\..\..\..\src\test_hooks.c" />
     <ClCompile Include="..\..\..\..\src\thread_event.c" />
+    <ClCompile Include="..\..\..\..\src\thread_event_registry.c" />
     <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
     <ClCompile Include="..\..\..\..\src\util.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index c1ff11a9..53d4af8d 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -95,6 +95,7 @@
     <ClCompile Include="..\..\..\..\src\tcache.c" />
     <ClCompile Include="..\..\..\..\src\test_hooks.c" />
     <ClCompile Include="..\..\..\..\src\thread_event.c" />
+    <ClCompile Include="..\..\..\..\src\thread_event_registry.c" />
     <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
     <ClCompile Include="..\..\..\..\src\util.c" />
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
index 6cb1b35e..10514d35 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
@@ -95,6 +95,7 @@
     <ClCompile Include="..\..\..\..\src\tcache.c" />
     <ClCompile Include="..\..\..\..\src\test_hooks.c" />
     <ClCompile Include="..\..\..\..\src\thread_event.c" />
+    <ClCompile Include="..\..\..\..\src\thread_event_registry.c" />
     <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
     <ClCompile Include="..\..\..\..\src\util.c" />
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
index 5c7b00a2..cda827be 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
@@ -95,6 +95,7 @@
     <ClCompile Include="..\..\..\..\src\tcache.c" />
     <ClCompile Include="..\..\..\..\src\test_hooks.c" />
     <ClCompile Include="..\..\..\..\src\thread_event.c" />
+    <ClCompile Include="..\..\..\..\src\thread_event_registry.c" />
     <ClCompile Include="..\..\..\..\src\ticker.c" />
     <ClCompile Include="..\..\..\..\src\tsd.c" />
     <ClCompile Include="..\..\..\..\src\util.c" />

From daf44173c54f2e388210bc7f03b4e9bfd938597c Mon Sep 17 00:00:00 2001
From: Carl Shapiro <cshapiro@meta.com>
Date: Thu, 11 Sep 2025 14:46:35 -0700
Subject: [PATCH 2540/2608] Replace an instance of indentation with spaces with
 tabs

---
 Makefile.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile.in b/Makefile.in
index 4e9d0bea..12cde70c 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -155,7 +155,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/tcache.c \
 	$(srcroot)src/test_hooks.c \
 	$(srcroot)src/thread_event.c \
-        $(srcroot)src/thread_event_registry.c \
+	$(srcroot)src/thread_event_registry.c \
 	$(srcroot)src/ticker.c \
 	$(srcroot)src/tsd.c \
 	$(srcroot)src/util.c \

From 56cdce8592bf4ffd7962bed99b31027f22e1895d Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Sun, 24 Aug 2025 18:56:12 -0700
Subject: [PATCH 2541/2608] Adding trace analysis in preparation for page
 allocator microbenchmark.

---
 .gitignore                              |   1 +
 Makefile.in                             |  19 ++
 test/stress/pa/.gitignore               |  23 ++
 test/stress/pa/data/.gitignore          |   6 +
 test/stress/pa/pa_data_preprocessor.cpp | 423 ++++++++++++++++++++++++
 5 files changed, 472 insertions(+)
 create mode 100644 test/stress/pa/.gitignore
 create mode 100644 test/stress/pa/data/.gitignore
 create mode 100644 test/stress/pa/pa_data_preprocessor.cpp

diff --git a/.gitignore b/.gitignore
index 9180ddf1..95dbaa5f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -73,6 +73,7 @@ test/include/test/jemalloc_test_defs.h
 
 /test/stress/[A-Za-z]*
 !/test/stress/[A-Za-z]*.*
+!/test/stress/pa/
 /test/stress/*.[od]
 /test/stress/*.out
 
diff --git a/Makefile.in b/Makefile.in
index 12cde70c..8ea194e5 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -566,6 +566,24 @@ $(objroot)test/stress/%$(EXE): $(objroot)test/stress/%.$(O) $(C_JET_OBJS) $(C_TE
 	@mkdir -p $(@D)
 	$(CC) $(TEST_LD_MODE) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(objroot)lib/$(LIBJEMALLOC).$(IMPORTLIB) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS)
 
+$(objroot)test/stress/pa/pa_data_preprocessor$(EXE): $(objroot)test/stress/pa/pa_data_preprocessor.$(O)
+	@mkdir -p $(@D)
+	$(CXX) $(LDTARGET) $(filter %.$(O),$^) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS)
+
+$(objroot)test/stress/pa/%.$(O): $(srcroot)test/stress/pa/%.c
+	@mkdir -p $(@D)
+	$(CC) $(CFLAGS) -c $(CPPFLAGS) -I$(srcroot)test/include -I$(objroot)test/include $(CTARGET) $<
+ifdef CC_MM
+	@$(CC) -MM $(CPPFLAGS) -I$(srcroot)test/include -I$(objroot)test/include -MT $@ -o $(@:%.$(O)=%.d) $<
+endif
+
+$(objroot)test/stress/pa/%.$(O): $(srcroot)test/stress/pa/%.cpp
+	@mkdir -p $(@D)
+	$(CXX) $(CXXFLAGS) -c $(CPPFLAGS) -I$(srcroot)test/include -I$(objroot)test/include $(CTARGET) $<
+ifdef CC_MM
+	@$(CXX) -MM $(CPPFLAGS) -I$(srcroot)test/include -I$(objroot)test/include -MT $@ -o $(@:%.$(O)=%.d) $<
+endif
+
 build_lib_shared: $(DSOS)
 build_lib_static: $(STATIC_LIBS)
 ifeq ($(enable_shared), 1)
@@ -686,6 +704,7 @@ tests_unit: $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%$(EXE))
 tests_integration: $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%$(EXE)) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%$(EXE))
 tests_analyze: $(TESTS_ANALYZE:$(srcroot)%.c=$(objroot)%$(EXE))
 tests_stress: $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%$(EXE)) $(TESTS_STRESS_CPP:$(srcroot)%.cpp=$(objroot)%$(EXE))
+tests_pa: $(objroot)test/stress/pa/pa_data_preprocessor$(EXE)
 tests: tests_unit tests_integration tests_analyze tests_stress
 
 check_unit_dir:
diff --git a/test/stress/pa/.gitignore b/test/stress/pa/.gitignore
new file mode 100644
index 00000000..378ee4e0
--- /dev/null
+++ b/test/stress/pa/.gitignore
@@ -0,0 +1,23 @@
+# Ignore executable files
+pa_microbench
+pa_data_preprocessor
+
+# Ignore object files
+*.o
+*.d
+
+# Ignore temporary and backup files
+*~
+*.tmp
+*.bak
+
+# Ignore compiled output files
+*.out
+
+# Keep source files and documentation
+!*.c
+!*.cpp
+!*.h
+!*.md
+!*.sh
+!Makefile*
diff --git a/test/stress/pa/data/.gitignore b/test/stress/pa/data/.gitignore
new file mode 100644
index 00000000..3b8ddcbb
--- /dev/null
+++ b/test/stress/pa/data/.gitignore
@@ -0,0 +1,6 @@
+# Ignore data files
+*.csv
+
+# But keep example files
+!example_*.csv
+!*.md
diff --git a/test/stress/pa/pa_data_preprocessor.cpp b/test/stress/pa/pa_data_preprocessor.cpp
new file mode 100644
index 00000000..757f37bb
--- /dev/null
+++ b/test/stress/pa/pa_data_preprocessor.cpp
@@ -0,0 +1,423 @@
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+#include <cstdint>
+#include <cassert>
+
+/*
+ * Page Allocator Data Preprocessor (C++ Version)
+ *
+ * This tool processes real allocation traces (collected via BPF)
+ * and converts them into a format suitable for the PA simulator.
+ *
+ * Supported input formats:
+ *   HPA: shard_ind_int,addr_int,nsecs_int,probe,size_int
+ *   SEC: process_id,thread_id,thread_name,nsecs_int,_c4,sec_ptr_int,sec_shard_ptr_int,edata_ptr_int,size_int,is_frequent_reuse_int
+ *
+ * Output format (4 columns):
+ *   shard_ind_int,operation_index,size_or_alloc_index,is_frequent
+ *   where:
+ *   - shard_ind_int: shard index as integer
+ *   - operation_index: 0=alloc, 1=dalloc
+ *   - size_or_alloc_index: for alloc operations show bytes,
+ *                          for dalloc operations show index of corresponding alloc
+ *   - is_frequent: 1 if frequent reuse allocation, 0 otherwise
+ */
+
+enum class TraceFormat { HPA, SEC };
+
+struct TraceEvent {
+	int         shard_ind;
+	uintptr_t   addr;
+	uint64_t    nsecs;
+	std::string probe;
+	size_t      size;
+	bool        is_frequent;
+};
+
+struct AllocationRecord {
+	uintptr_t addr;
+	size_t    size;
+	int       shard_ind;
+	size_t    alloc_index;
+	uint64_t  nsecs;
+};
+
+class AllocationTracker {
+      private:
+	std::unordered_map<uintptr_t, AllocationRecord> records_;
+
+      public:
+	void
+	add_allocation(uintptr_t addr, size_t size, int shard_ind,
+	    size_t alloc_index, uint64_t nsecs) {
+		records_[addr] = {addr, size, shard_ind, alloc_index, nsecs};
+	}
+
+	AllocationRecord *
+	find_allocation(uintptr_t addr) {
+		auto it = records_.find(addr);
+		return (it != records_.end()) ? &it->second : nullptr;
+	}
+
+	void
+	remove_allocation(uintptr_t addr) {
+		records_.erase(addr);
+	}
+
+	size_t
+	count() const {
+		return records_.size();
+	}
+};
+
+class ArenaMapper {
+      private:
+	std::unordered_map<uintptr_t, int> sec_ptr_to_arena_;
+	int                                next_arena_index_;
+
+      public:
+	ArenaMapper() : next_arena_index_(0) {}
+
+	int
+	get_arena_index(uintptr_t sec_ptr) {
+		if (sec_ptr == 0) {
+			/* Should not be seeing null sec pointer anywhere. Use this as a sanity check.*/
+			return 0;
+		}
+
+		auto it = sec_ptr_to_arena_.find(sec_ptr);
+		if (it != sec_ptr_to_arena_.end()) {
+			return it->second;
+		}
+
+		/* New sec_ptr, assign next available arena index */
+		int arena_index = next_arena_index_++;
+		sec_ptr_to_arena_[sec_ptr] = arena_index;
+		return arena_index;
+	}
+
+	size_t
+	arena_count() const {
+		return sec_ptr_to_arena_.size();
+	}
+};
+
+bool
+is_alloc_operation(const std::string &probe) {
+	return (probe == "hpa_alloc" || probe == "sec_alloc");
+}
+
+bool
+is_dalloc_operation(const std::string &probe) {
+	return (probe == "hpa_dalloc" || probe == "sec_dalloc");
+}
+
+bool
+parse_hpa_line(const std::string &line, TraceEvent &event) {
+	std::istringstream ss(line);
+	std::string        token;
+
+	/* Parse shard_ind_int */
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+	event.shard_ind = std::stoi(token);
+
+	/* Parse addr_int */
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+	event.addr = std::stoull(token);
+
+	/* Parse nsecs_int */
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+	event.nsecs = std::stoull(token);
+
+	/* Parse probe */
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+	event.probe = token;
+
+	/* Parse size_int */
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+	event.size = std::stoull(token);
+
+	/* HPA format doesn't have is_frequent field, set default */
+	event.is_frequent = true;
+
+	return false;
+}
+
+bool
+parse_sec_line(
+    const std::string &line, TraceEvent &event, ArenaMapper &arena_mapper) {
+	std::istringstream ss(line);
+	std::string        token;
+
+	/* Skip process_id */
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+
+	/* Skip thread_id */
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+
+	/* Skip thread_name */
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+
+	/* Parse nsecs_int */
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+	event.nsecs = std::stoull(token);
+
+	/* Parse operation */
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+
+	event.probe = token;
+
+	/* Parse sec_ptr_int (used for arena mapping) */
+	uintptr_t sec_ptr = 0;
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+	if (!token.empty()) {
+		sec_ptr = std::stoull(token);
+	}
+
+	/* Map sec_ptr to arena index */
+	event.shard_ind = arena_mapper.get_arena_index(sec_ptr);
+
+	/* Skip sec_shard_ptr_int */
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+
+	/* Parse edata_ptr_int (used as the address) */
+	if (!std::getline(ss, token, ',')) {
+		return true;
+	}
+	if (!token.empty()) {
+		event.addr = std::stoull(token);
+	} else {
+		event.addr = 0;
+	}
+
+	/* Parse size_int */
+	if (!std::getline(ss, token, ',')
+	    && !is_dalloc_operation(event.probe)) {
+		/* SEC format may not always have size for dalloc */
+		return true;
+	}
+	if (!token.empty()) {
+		event.size = std::stoull(token);
+	} else {
+		/* When no size given, this is a dalloc, size won't be used. */
+		event.size = 0;
+	}
+
+	/* Parse is_frequent_reuse_int */
+	if (!std::getline(ss, token, ',')
+	    && !is_dalloc_operation(event.probe)) {
+		return true;
+	}
+	if (!token.empty()) {
+		event.is_frequent = (std::stoi(token) != 0);
+	} else {
+		/*
+		 * When no is_frequent_reuse_int given, this is a dalloc,
+		 * is_frequent won't be used.
+		 */
+		event.is_frequent = false;
+	}
+
+	return false;
+}
+
+void
+write_output_header(std::ofstream &output) {
+	output << "shard_ind,operation,size_or_alloc_index,is_frequent\n";
+}
+
+void
+write_output_event(std::ofstream &output, int shard_ind, int operation,
+    size_t value, bool is_frequent) {
+	output << shard_ind << "," << operation << "," << value << ","
+	       << (is_frequent ? 1 : 0) << "\n";
+}
+
+size_t
+process_trace_file(const std::string &input_filename,
+    const std::string &output_filename, TraceFormat format) {
+	std::ifstream input(input_filename);
+	if (!input.is_open()) {
+		std::cerr << "Failed to open input file: " << input_filename
+		          << std::endl;
+		return 0;
+	}
+
+	std::ofstream output(output_filename);
+	if (!output.is_open()) {
+		std::cerr << "Failed to open output file: " << output_filename
+		          << std::endl;
+		return 0;
+	}
+
+	AllocationTracker tracker;
+	ArenaMapper       arena_mapper; /* For SEC format arena mapping */
+
+	std::string line;
+	size_t      line_count = 0;
+	size_t      output_count = 0;
+	size_t      alloc_sequence = 0; /* Sequential index for allocations */
+	size_t      unmatched_frees = 0;
+
+	write_output_header(output);
+	std::cout << "Reading from: " << input_filename << std::endl;
+
+	/* Skip header line */
+	if (!std::getline(input, line)) {
+		std::cerr << "Error: Empty input file" << std::endl;
+		return 0;
+	}
+
+	while (std::getline(input, line)) {
+		line_count++;
+
+		/* Skip empty lines */
+		if (line.empty()) {
+			continue;
+		}
+
+		TraceEvent event;
+		bool       parse_error = false;
+
+		if (format == TraceFormat::HPA) {
+			parse_error = parse_hpa_line(line, event);
+		} else if (format == TraceFormat::SEC) {
+			parse_error = parse_sec_line(line, event, arena_mapper);
+		}
+
+		if (parse_error) {
+			continue;
+		}
+
+		if (is_alloc_operation(event.probe)) {
+			/* This is an allocation */
+			write_output_event(output, event.shard_ind, 0,
+			    event.size, event.is_frequent);
+
+			/* Track this allocation with the current sequence number */
+			tracker.add_allocation(event.addr, event.size,
+			    event.shard_ind, alloc_sequence, event.nsecs);
+			alloc_sequence++;
+		} else if (is_dalloc_operation(event.probe)) {
+			/* This is a deallocation. Ignore dalloc without a corresponding alloc. */
+			AllocationRecord *record = tracker.find_allocation(
+			    event.addr);
+
+			if (record) {
+				/* Validate timing: deallocation should happen after allocation */
+				assert(event.nsecs >= record->nsecs);
+				/* Found matching allocation with valid timing */
+				write_output_event(output, event.shard_ind, 1,
+				    record->alloc_index, event.is_frequent);
+				tracker.remove_allocation(event.addr);
+				output_count++; /* Count this deallocation */
+			} else {
+				unmatched_frees++;
+			}
+		} else {
+			std::cerr << "Unknown operation: " << event.probe
+			          << std::endl;
+		}
+	}
+
+	std::cout << "Processed " << line_count << " lines" << std::endl;
+	std::cout << "Unmatched frees: " << unmatched_frees << std::endl;
+	std::cout << "Extracted " << output_count << " alloc/dalloc pairs"
+	          << std::endl;
+	std::cout << "Results written to: " << output_filename << std::endl;
+
+	return output_count;
+}
+
+TraceFormat
+parse_format(const std::string &format_str) {
+	if (format_str == "hpa") {
+		return TraceFormat::HPA;
+	} else if (format_str == "sec") {
+		return TraceFormat::SEC;
+	} else {
+		throw std::invalid_argument(
+		    "Unknown format: " + format_str + ". Use 'hpa' or 'sec'");
+	}
+}
+
+int
+main(int argc, char *argv[]) {
+	if (argc < 4 || argc > 5) {
+		std::cerr << "Usage: " << argv[0]
+		          << " <format> <input_csv_file> <output_file>"
+		          << std::endl;
+		std::cerr << std::endl;
+		std::cerr << "Arguments:" << std::endl;
+		std::cerr << "  format          - Input format: 'hpa' or 'sec'"
+		          << std::endl;
+		std::cerr
+		    << "                    hpa: shard_ind_int,addr_int,nsecs_int,probe,size_int"
+		    << std::endl;
+		std::cerr
+		    << "                    sec: process_id,thread_id,thread_name,nsecs_int,_c4,sec_ptr_int,sec_shard_ptr_int,edata_ptr_int,size_int,is_frequent_reuse_int"
+		    << std::endl;
+		std::cerr << "  input_csv_file  - Input CSV trace file"
+		          << std::endl;
+		std::cerr
+		    << "  output_file     - Output file for simulator with format:"
+		    << std::endl;
+		std::cerr
+		    << "                    shard_ind,operation,size_or_alloc_index,is_frequent"
+		    << std::endl;
+		std::cerr << std::endl;
+		std::cerr << "Output format:" << std::endl;
+		std::cerr << "  - operation: 0=alloc, 1=dalloc" << std::endl;
+		std::cerr
+		    << "  - size_or_alloc_index: bytes for alloc, alloc index for dalloc"
+		    << std::endl;
+		return 1;
+	}
+
+	try {
+		TraceFormat format = parse_format(argv[1]);
+		std::string input_file = argv[2];
+		std::string output_file = argv[3];
+
+		size_t events_generated = process_trace_file(
+		    input_file, output_file, format);
+
+		if (events_generated == 0) {
+			std::cerr
+			    << "No events generated. Check input file format and filtering criteria."
+			    << std::endl;
+			return 1;
+		}
+		return 0;
+	} catch (const std::exception &e) {
+		std::cerr << "Error: " << e.what() << std::endl;
+		return 1;
+	}
+}

From 261591f12360fbce99440584a611e9c338ff7378 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Sun, 24 Aug 2025 23:57:26 -0700
Subject: [PATCH 2542/2608] Add a page-allocator microbenchmark.

---
 Makefile.in                    |  10 +-
 test/stress/pa/README.md       | 118 ++++++
 test/stress/pa/pa_microbench.c | 672 +++++++++++++++++++++++++++++++++
 3 files changed, 797 insertions(+), 3 deletions(-)
 create mode 100644 test/stress/pa/README.md
 create mode 100644 test/stress/pa/pa_microbench.c

diff --git a/Makefile.in b/Makefile.in
index 8ea194e5..047e05cb 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -570,11 +570,15 @@ $(objroot)test/stress/pa/pa_data_preprocessor$(EXE): $(objroot)test/stress/pa/pa
 	@mkdir -p $(@D)
 	$(CXX) $(LDTARGET) $(filter %.$(O),$^) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS)
 
+$(objroot)test/stress/pa/pa_microbench$(EXE): $(objroot)test/stress/pa/pa_microbench.$(O) $(C_JET_OBJS) $(C_TESTLIB_STRESS_OBJS)
+	@mkdir -p $(@D)
+	$(CC) $(LDTARGET) $(filter %.$(O),$^) $(call RPATH,$(objroot)lib) $(LDFLAGS) $(filter-out -lm,$(LIBS)) $(LM) $(EXTRA_LDFLAGS)
+
 $(objroot)test/stress/pa/%.$(O): $(srcroot)test/stress/pa/%.c
 	@mkdir -p $(@D)
-	$(CC) $(CFLAGS) -c $(CPPFLAGS) -I$(srcroot)test/include -I$(objroot)test/include $(CTARGET) $<
+	$(CC) $(CFLAGS) -c $(CPPFLAGS) -DJEMALLOC_STRESS_TEST -I$(srcroot)test/include -I$(objroot)test/include $(CTARGET) $<
 ifdef CC_MM
-	@$(CC) -MM $(CPPFLAGS) -I$(srcroot)test/include -I$(objroot)test/include -MT $@ -o $(@:%.$(O)=%.d) $<
+	@$(CC) -MM $(CPPFLAGS) -DJEMALLOC_STRESS_TEST -I$(srcroot)test/include -I$(objroot)test/include -MT $@ -o $(@:%.$(O)=%.d) $<
 endif
 
 $(objroot)test/stress/pa/%.$(O): $(srcroot)test/stress/pa/%.cpp
@@ -704,7 +708,7 @@ tests_unit: $(TESTS_UNIT:$(srcroot)%.c=$(objroot)%$(EXE))
 tests_integration: $(TESTS_INTEGRATION:$(srcroot)%.c=$(objroot)%$(EXE)) $(TESTS_INTEGRATION_CPP:$(srcroot)%.cpp=$(objroot)%$(EXE))
 tests_analyze: $(TESTS_ANALYZE:$(srcroot)%.c=$(objroot)%$(EXE))
 tests_stress: $(TESTS_STRESS:$(srcroot)%.c=$(objroot)%$(EXE)) $(TESTS_STRESS_CPP:$(srcroot)%.cpp=$(objroot)%$(EXE))
-tests_pa: $(objroot)test/stress/pa/pa_data_preprocessor$(EXE)
+tests_pa: $(objroot)test/stress/pa/pa_data_preprocessor$(EXE) $(objroot)test/stress/pa/pa_microbench$(EXE)
 tests: tests_unit tests_integration tests_analyze tests_stress
 
 check_unit_dir:
diff --git a/test/stress/pa/README.md b/test/stress/pa/README.md
new file mode 100644
index 00000000..4ae59bb9
--- /dev/null
+++ b/test/stress/pa/README.md
@@ -0,0 +1,118 @@
+# Page Allocator (PA) Microbenchmark Suite
+
+This directory contains a comprehensive microbenchmark suite for testing and analyzing jemalloc's Page Allocator (PA) system, including the Hugepage-aware Page Allocator (HPA) and Slab Extent Cache (SEC) components.
+
+## Overview
+
+The PA microbenchmark suite consists of two main programs designed to preprocess allocation traces and replay them against jemalloc's internal PA system to measure performance, memory usage, and allocation patterns.
+
+To summarize how to run it, assume we have a file `test/stress/pa/data/hpa.csv` collected from a real application using USDT, the simulation can be run as follows:
+```
+make tests_pa
+./test/stress/pa/pa_data_preprocessor hpa test/stress/pa/data/hpa.csv test/stress/pa/data/sample_hpa_output.csv
+./test/stress/pa/pa_microbench -p -o test/stress/pa/data/sample_hpa_stats.csv test/stress/pa/data/sample_hpa_output.csv
+```
+
+If it's sec, simply replace the first parameter passed to `pa_data_preprocessor` with sec.
+
+## Architecture
+
+### PA System Components
+
+The Page Allocator sits at the core of jemalloc's memory management hierarchy:
+
+```
+Application
+    ↓
+Arena (tcache, bins)
+    ↓
+PA (Page Allocator) ← This is what we benchmark
+    ├── HPA (Hugepage-aware Page Allocator)
+    └── SEC (Slab Extent Cache)
+    ↓
+Extent Management (emap, edata)
+    ↓
+Base Allocator
+    ↓
+OS (mmap/munmap)
+```
+
+### Microbenchmark Architecture
+
+```
+Raw Allocation Traces
+    ↓
+[pa_data_preprocessor] ← Preprocesses and filters traces
+    ↓
+CSV alloc/dalloc Files
+    ↓
+[pa_microbench] ← Replays against real PA system
+    ↓
+Performance Statistics & Analysis
+```
+
+## Programs
+
+### 1. pa_data_preprocessor
+
+A C++ data preprocessing tool that converts raw allocation traces into a standardized CSV format suitable for microbenchmarking.
+
+**Purpose:**
+- Parse and filter raw allocation trace data
+- Convert various trace formats to standardized CSV
+- Filter by process ID, thread ID, or other criteria
+- Validate and clean allocation/deallocation sequences
+
+### 2. pa_microbench
+
+A C microbenchmark that replays allocation traces against jemalloc's actual PA system to measure performance and behavior with HPA statistics collection.
+
+**Purpose:**
+- Initialize real PA infrastructure (HPA, SEC, base allocators, emaps)
+- Replay allocation/deallocation sequences from CSV traces
+- Measure allocation latency, memory usage, and fragmentation
+- Test different PA configurations (HPA-only vs HPA+SEC)
+- Generate detailed HPA internal statistics
+
+**Key Features:**
+- **Real PA Integration**: Uses jemalloc's actual PA implementation, not simulation
+- **Multi-shard Support**: Tests allocation patterns across multiple PA shards
+- **Configurable Modes**: Supports HPA-only mode (`-p`) and HPA+SEC mode (`-s`)
+- **Statistics Output**: Detailed per-shard statistics and timing data
+- **Configurable Intervals**: Customizable statistics output frequency (`-i/--interval`)
+
+## Building
+
+### Compilation
+
+```bash
+# Build both PA microbenchmark tools
+cd /path/to/jemalloc
+make tests_pa
+```
+
+This creates:
+- `test/stress/pa/pa_data_preprocessor` - Data preprocessing tool
+- `test/stress/pa/pa_microbench` - PA microbenchmark
+
+## Usage
+
+### Data Preprocessing
+
+```bash
+# Basic preprocessing
+./test/stress/pa/pa_data_preprocessor <hpa/sec> input_trace.txt output.csv
+```
+
+### Microbenchmark Execution
+
+```bash
+# Run with HPA + SEC (default mode)
+./test/stress/pa/pa_microbench -s -o stats.csv trace.csv
+
+# Run with HPA-only (no SEC)
+./test/stress/pa/pa_microbench -p -o stats.csv trace.csv
+
+# Show help
+./test/stress/pa/pa_microbench -h
+```
diff --git a/test/stress/pa/pa_microbench.c b/test/stress/pa/pa_microbench.c
new file mode 100644
index 00000000..4ad3652d
--- /dev/null
+++ b/test/stress/pa/pa_microbench.c
@@ -0,0 +1,672 @@
+#include "test/jemalloc_test.h"
+
+/* Additional includes for PA functionality */
+#include "jemalloc/internal/pa.h"
+#include "jemalloc/internal/tsd.h"
+#include "jemalloc/internal/sz.h"
+#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/ehooks.h"
+#include "jemalloc/internal/nstime.h"
+#include "jemalloc/internal/hpa.h"
+#include "jemalloc/internal/sec.h"
+#include "jemalloc/internal/emap.h"
+#include "jemalloc/internal/psset.h"
+
+/*
+ * PA Microbenchmark (Simplified Version)
+ *
+ * This tool reads allocation traces and simulates PA behavior
+ * for testing and understanding the allocation patterns.
+ *
+ * Features:
+ * 1. Reads CSV input file with format: shard_ind,operation,size_or_alloc_index,is_frequent
+ * 2. Simulates allocations/deallocations tracking
+ * 3. Provides basic statistics analysis
+ * 4. Validates the framework setup
+ */
+
+#define MAX_LINE_LENGTH 1024
+#define MAX_ALLOCATIONS 10000000
+#define MAX_ARENAS 128
+
+typedef enum { PA_ALLOC = 0, PA_DALLOC = 1 } pa_op_t;
+
+typedef struct {
+	int     shard_ind;
+	pa_op_t operation;
+	size_t  size_or_alloc_index;
+	int     is_frequent;
+} pa_event_t;
+
+typedef struct {
+	edata_t *edata;
+	size_t   size;
+	int      shard_ind;
+	bool     active;
+} allocation_record_t;
+
+/* Structure to group per-shard tracking statistics */
+typedef struct {
+	uint64_t alloc_count;     /* Number of allocations */
+	uint64_t dealloc_count;   /* Number of deallocations */
+	uint64_t bytes_allocated; /* Current bytes allocated */
+} shard_stats_t;
+
+/* Structure to group per-shard PA infrastructure */
+typedef struct {
+	base_t          *base;        /* Base allocator */
+	emap_t           emap;        /* Extent map */
+	pa_shard_t       pa_shard;    /* PA shard */
+	pa_shard_stats_t shard_stats; /* PA shard statistics */
+	malloc_mutex_t   stats_mtx;   /* Statistics mutex */
+} shard_infrastructure_t;
+
+static FILE                *g_stats_output = NULL; /* Output file for stats */
+static size_t               g_alloc_counter = 0; /* Global allocation counter */
+static allocation_record_t *g_alloc_records =
+    NULL;                     /* Global allocation tracking */
+static bool g_use_sec = true; /* Global flag for SEC vs HPA-only */
+
+/* Refactored arrays using structures */
+static shard_stats_t *g_shard_stats = NULL; /* Per-shard tracking statistics */
+static shard_infrastructure_t *g_shard_infra =
+    NULL;                         /* Per-shard PA infrastructure */
+static pa_central_t g_pa_central; /* Global PA central */
+
+static void cleanup_pa_infrastructure(int num_shards);
+
+static bool
+initialize_pa_infrastructure(int num_shards) {
+	/*
+	 * Note when we call malloc, it resolves to je_malloc, while internal
+	 * functions like base_new resolve to jet_malloc.  This is because this
+	 * file is compiled with -DJEMALLOC_JET as a test.  This allows us to
+	 * completely isolate the PA infrastructure benchmark from the rest of
+	 * the jemalloc usage.
+	*/
+	void *dummy_jet = jet_malloc(16);
+	if (dummy_jet == NULL) {
+		fprintf(stderr, "Failed to initialize JET jemalloc\n");
+		return 1;
+	}
+
+	/* Force JET system to be fully initialized */
+	if (jet_mallctl("epoch", NULL, NULL, NULL, 0) != 0) {
+		fprintf(stderr, "Failed to initialize JET system fully\n");
+		jet_free(dummy_jet);
+		return 1;
+	}
+	jet_free(dummy_jet);
+
+	/* Allocate shard tracking statistics */
+	g_shard_stats = calloc(num_shards, sizeof(shard_stats_t));
+	if (g_shard_stats == NULL) {
+		printf("DEBUG: Failed to allocate shard stats\n");
+		return true;
+	}
+
+	/* Allocate shard infrastructure */
+	g_shard_infra = calloc(num_shards, sizeof(shard_infrastructure_t));
+	if (g_shard_infra == NULL) {
+		printf("DEBUG: Failed to allocate shard infrastructure\n");
+		free(g_shard_stats);
+		return true;
+	}
+
+	/* Initialize one base allocator for PA central */
+	base_t *central_base = base_new(tsd_tsdn(tsd_fetch()), 0 /* ind */,
+	    (extent_hooks_t *)&ehooks_default_extent_hooks,
+	    /* metadata_use_hooks */ true);
+	if (central_base == NULL) {
+		printf("DEBUG: Failed to create central_base\n");
+		free(g_shard_stats);
+		free(g_shard_infra);
+		return true;
+	}
+
+	/* Initialize PA central with HPA enabled */
+	if (pa_central_init(&g_pa_central, central_base, true /* hpa */,
+	        &hpa_hooks_default)) {
+		printf("DEBUG: Failed to initialize PA central\n");
+		base_delete(tsd_tsdn(tsd_fetch()), central_base);
+		free(g_shard_stats);
+		free(g_shard_infra);
+		return true;
+	}
+
+	for (int i = 0; i < num_shards; i++) {
+		/* Create a separate base allocator for each shard */
+		g_shard_infra[i].base = base_new(tsd_tsdn(tsd_fetch()),
+		    i /* ind */, (extent_hooks_t *)&ehooks_default_extent_hooks,
+		    /* metadata_use_hooks */ true);
+		if (g_shard_infra[i].base == NULL) {
+			printf("DEBUG: Failed to create base %d\n", i);
+			/* Clean up partially initialized shards */
+			cleanup_pa_infrastructure(num_shards);
+			return true;
+		}
+
+		/* Initialize emap for this shard */
+		if (emap_init(&g_shard_infra[i].emap, g_shard_infra[i].base,
+		        /* zeroed */ false)) {
+			printf("DEBUG: Failed to initialize emap %d\n", i);
+			/* Clean up partially initialized shards */
+			cleanup_pa_infrastructure(num_shards);
+			return true;
+		}
+
+		/* Initialize stats mutex */
+		if (malloc_mutex_init(&g_shard_infra[i].stats_mtx,
+		        "pa_shard_stats", WITNESS_RANK_OMIT,
+		        malloc_mutex_rank_exclusive)) {
+			printf(
+			    "DEBUG: Failed to initialize stats mutex %d\n", i);
+			/* Clean up partially initialized shards */
+			cleanup_pa_infrastructure(num_shards);
+			return true;
+		}
+
+		/* Initialize PA shard */
+		nstime_t cur_time;
+		nstime_init_zero(&cur_time);
+
+		if (pa_shard_init(tsd_tsdn(tsd_fetch()),
+		        &g_shard_infra[i].pa_shard, &g_pa_central,
+		        &g_shard_infra[i].emap /* emap */,
+		        g_shard_infra[i].base, i /* ind */,
+		        &g_shard_infra[i].shard_stats /* stats */,
+		        &g_shard_infra[i].stats_mtx /* stats_mtx */,
+		        &cur_time /* cur_time */,
+		        SIZE_MAX /* oversize_threshold */,
+		        -1 /* dirty_decay_ms */, -1 /* muzzy_decay_ms */)) {
+			printf("DEBUG: Failed to initialize PA shard %d\n", i);
+			/* Clean up partially initialized shards */
+			cleanup_pa_infrastructure(num_shards);
+			return true;
+		}
+
+		/* Enable HPA for this shard with proper configuration */
+		hpa_shard_opts_t hpa_opts = HPA_SHARD_OPTS_DEFAULT;
+		hpa_opts.deferral_allowed =
+		    false; /* No background threads in microbench */
+
+		sec_opts_t sec_opts = SEC_OPTS_DEFAULT;
+		if (!g_use_sec) {
+			/* Disable SEC by setting nshards to 0 */
+			sec_opts.nshards = 0;
+		}
+
+		if (pa_shard_enable_hpa(tsd_tsdn(tsd_fetch()),
+		        &g_shard_infra[i].pa_shard, &hpa_opts, &sec_opts)) {
+			fprintf(
+			    stderr, "Failed to enable HPA on shard %d\n", i);
+			/* Clean up partially initialized shards */
+			cleanup_pa_infrastructure(num_shards);
+			return true;
+		}
+	}
+
+	printf("PA infrastructure configured: HPA=enabled, SEC=%s\n",
+	    g_use_sec ? "enabled" : "disabled");
+
+	return false;
+}
+
+static void
+cleanup_pa_infrastructure(int num_shards) {
+	if (g_shard_infra != NULL) {
+		for (int i = 0; i < num_shards; i++) {
+			pa_shard_destroy(
+			    tsd_tsdn(tsd_fetch()), &g_shard_infra[i].pa_shard);
+			if (g_shard_infra[i].base != NULL) {
+				base_delete(tsd_tsdn(tsd_fetch()),
+				    g_shard_infra[i].base);
+			}
+		}
+		free(g_shard_infra);
+		g_shard_infra = NULL;
+	}
+
+	if (g_shard_stats != NULL) {
+		free(g_shard_stats);
+		g_shard_stats = NULL;
+	}
+}
+
+static bool
+parse_csv_line(const char *line, pa_event_t *event) {
+	/* Expected format: shard_ind,operation,size_or_alloc_index,is_frequent */
+	int operation;
+	int fields = sscanf(line, "%d,%d,%zu,%d", &event->shard_ind, &operation,
+	    &event->size_or_alloc_index, &event->is_frequent);
+
+	if (fields < 3) { /* is_frequent is optional */
+		return false;
+	}
+
+	if (fields == 3) {
+		event->is_frequent = 0; /* Default value */
+	}
+
+	if (operation == 0) {
+		event->operation = PA_ALLOC;
+	} else if (operation == 1) {
+		event->operation = PA_DALLOC;
+	} else {
+		return false;
+	}
+
+	return true;
+}
+
+static size_t
+load_trace_file(const char *filename, pa_event_t **events, int *max_shard_id) {
+	FILE *file = fopen(filename, "r");
+	if (!file) {
+		fprintf(stderr, "Failed to open trace file: %s\n", filename);
+		return 0;
+	}
+
+	*events = malloc(MAX_ALLOCATIONS * sizeof(pa_event_t));
+	if (!*events) {
+		fclose(file);
+		return 0;
+	}
+
+	char   line[MAX_LINE_LENGTH];
+	size_t count = 0;
+	*max_shard_id = 0;
+
+	/* Skip header line */
+	if (fgets(line, sizeof(line), file) == NULL) {
+		fclose(file);
+		free(*events);
+		return 0;
+	}
+
+	while (fgets(line, sizeof(line), file) && count < MAX_ALLOCATIONS) {
+		if (parse_csv_line(line, &(*events)[count])) {
+			if ((*events)[count].shard_ind > *max_shard_id) {
+				*max_shard_id = (*events)[count].shard_ind;
+			}
+			count++;
+		}
+	}
+
+	fclose(file);
+	printf("Loaded %zu events from %s\n", count, filename);
+	printf("Maximum shard ID found: %d\n", *max_shard_id);
+	return count;
+}
+
+static void
+collect_hpa_stats(int shard_id, hpa_shard_stats_t *hpa_stats_out) {
+	/* Get tsdn for statistics collection */
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+
+	/* Clear the output structure */
+	memset(hpa_stats_out, 0, sizeof(hpa_shard_stats_t));
+
+	/* Check if this shard has HPA enabled */
+	if (!g_shard_infra[shard_id].pa_shard.ever_used_hpa) {
+		return;
+	}
+
+	/* Merge HPA statistics from the shard */
+	hpa_shard_stats_merge(
+	    tsdn, &g_shard_infra[shard_id].pa_shard.hpa_shard, hpa_stats_out);
+}
+
+static void
+print_shard_stats(int shard_id, size_t operation_count) {
+	if (!g_stats_output) {
+		return;
+	}
+
+	/* Collect HPA statistics */
+	hpa_shard_stats_t hpa_stats;
+	collect_hpa_stats(shard_id, &hpa_stats);
+	psset_stats_t *psset_stats = &hpa_stats.psset_stats;
+
+	/* Total pageslabs */
+	size_t total_pageslabs = psset_stats->merged.npageslabs;
+
+	/* Full pageslabs breakdown by hugification */
+	size_t full_pageslabs_non_huge =
+	    psset_stats->full_slabs[0].npageslabs; /* [0] = non-hugified */
+	size_t full_pageslabs_huge =
+	    psset_stats->full_slabs[1].npageslabs; /* [1] = hugified */
+	size_t full_pageslabs_total = full_pageslabs_non_huge
+	    + full_pageslabs_huge;
+
+	/* Empty pageslabs breakdown by hugification */
+	size_t empty_pageslabs_non_huge =
+	    psset_stats->empty_slabs[0].npageslabs; /* [0] = non-hugified */
+	size_t empty_pageslabs_huge =
+	    psset_stats->empty_slabs[1].npageslabs; /* [1] = hugified */
+	size_t empty_pageslabs_total = empty_pageslabs_non_huge
+	    + empty_pageslabs_huge;
+
+	/* Hugified pageslabs (full + empty + partial) */
+	size_t hugified_pageslabs = full_pageslabs_huge + empty_pageslabs_huge;
+	/* Add hugified partial slabs */
+	for (int i = 0; i < PSSET_NPSIZES; i++) {
+		hugified_pageslabs +=
+		    psset_stats->nonfull_slabs[i][1].npageslabs;
+	}
+
+	/* Dirty bytes */
+	size_t dirty_bytes = psset_stats->merged.ndirty * PAGE;
+
+	/* Output enhanced stats with detailed breakdown */
+	fprintf(g_stats_output,
+	    "%zu,%d,%lu,%lu,%lu,%zu,%zu,%zu,%zu,%zu,%zu,%zu,%zu,%zu,%lu,%lu,%lu\n",
+	    operation_count, shard_id, g_shard_stats[shard_id].alloc_count,
+	    g_shard_stats[shard_id].dealloc_count,
+	    g_shard_stats[shard_id].bytes_allocated, total_pageslabs,
+	    full_pageslabs_total, empty_pageslabs_total, hugified_pageslabs,
+	    full_pageslabs_non_huge, full_pageslabs_huge,
+	    empty_pageslabs_non_huge, empty_pageslabs_huge, dirty_bytes,
+	    hpa_stats.nonderived_stats.nhugifies,
+	    hpa_stats.nonderived_stats.nhugify_failures,
+	    hpa_stats.nonderived_stats.ndehugifies);
+	fflush(g_stats_output);
+}
+
+static void
+simulate_trace(
+    int num_shards, pa_event_t *events, size_t count, size_t stats_interval) {
+	uint64_t total_allocs = 0, total_deallocs = 0;
+	uint64_t total_allocated_bytes = 0;
+
+	printf("Starting simulation with %zu events across %d shards...\n",
+	    count, num_shards);
+
+	for (size_t i = 0; i < count; i++) {
+		pa_event_t *event = &events[i];
+
+		/* Validate shard index */
+		if (event->shard_ind >= num_shards) {
+			fprintf(stderr,
+			    "Warning: Invalid shard index %d (max %d)\n",
+			    event->shard_ind, num_shards - 1);
+			continue;
+		}
+
+		switch (event->operation) {
+		case PA_ALLOC: {
+			size_t size = event->size_or_alloc_index;
+
+			/* Get tsdn and calculate parameters for PA */
+			tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+			szind_t szind = sz_size2index(size);
+			bool    slab =
+			    event
+			        ->is_frequent; /* Use frequent_reuse for slab */
+			bool deferred_work_generated = false;
+
+			/* Allocate using PA allocator */
+			edata_t *edata = pa_alloc(tsdn,
+			    &g_shard_infra[event->shard_ind].pa_shard, size,
+			    PAGE /* alignment */, slab, szind, false /* zero */,
+			    false /* guarded */, &deferred_work_generated);
+
+			if (edata != NULL) {
+				/* Store allocation record */
+				g_alloc_records[g_alloc_counter].edata = edata;
+				g_alloc_records[g_alloc_counter].size = size;
+				g_alloc_records[g_alloc_counter].shard_ind =
+				    event->shard_ind;
+				g_alloc_records[g_alloc_counter].active = true;
+				g_alloc_counter++;
+
+				/* Update shard-specific stats */
+				g_shard_stats[event->shard_ind].alloc_count++;
+				g_shard_stats[event->shard_ind]
+				    .bytes_allocated += size;
+
+				total_allocs++;
+				total_allocated_bytes += size;
+			}
+			break;
+		}
+		case PA_DALLOC: {
+			size_t alloc_index = event->size_or_alloc_index;
+			if (alloc_index < g_alloc_counter
+			    && g_alloc_records[alloc_index].active
+			    && g_alloc_records[alloc_index].shard_ind
+			        == event->shard_ind) {
+				/* Get tsdn for PA */
+				tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+				bool    deferred_work_generated = false;
+
+				/* Deallocate using PA allocator */
+				pa_dalloc(tsdn,
+				    &g_shard_infra[event->shard_ind].pa_shard,
+				    g_alloc_records[alloc_index].edata,
+				    &deferred_work_generated);
+
+				/* Update shard-specific stats */
+				g_shard_stats[event->shard_ind].dealloc_count++;
+				g_shard_stats[event->shard_ind]
+				    .bytes_allocated -=
+				    g_alloc_records[alloc_index].size;
+
+				g_alloc_records[alloc_index].active = false;
+				total_deallocs++;
+			}
+			break;
+		}
+		}
+
+		/* Periodic stats output and progress reporting */
+		if (stats_interval > 0 && (i + 1) % stats_interval == 0) {
+			/* Print stats for all shards */
+			for (int j = 0; j < num_shards; j++) {
+				print_shard_stats(j, i + 1);
+			}
+		}
+	}
+
+	printf("\nSimulation completed:\n");
+	printf("  Total allocations: %lu\n", total_allocs);
+	printf("  Total deallocations: %lu\n", total_deallocs);
+	printf("  Total allocated: %lu bytes\n", total_allocated_bytes);
+	printf("  Active allocations: %lu\n", g_alloc_counter - total_deallocs);
+
+	/* Print final stats for all shards */
+	printf("\nFinal shard statistics:\n");
+	for (int i = 0; i < num_shards; i++) {
+		printf(
+		    "  Shard %d: Allocs=%lu, Deallocs=%lu, Active Bytes=%lu\n",
+		    i, g_shard_stats[i].alloc_count,
+		    g_shard_stats[i].dealloc_count,
+		    g_shard_stats[i].bytes_allocated);
+
+		/* Final stats to file */
+		print_shard_stats(i, count);
+	}
+}
+
+static void
+cleanup_remaining_allocations(int num_shards) {
+	size_t cleaned_up = 0;
+
+	printf("Cleaning up remaining allocations...\n");
+
+	for (size_t i = 0; i < g_alloc_counter; i++) {
+		if (g_alloc_records[i].active) {
+			int shard_ind = g_alloc_records[i].shard_ind;
+			if (shard_ind < num_shards) {
+				tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+				bool    deferred_work_generated = false;
+
+				pa_dalloc(tsdn,
+				    &g_shard_infra[shard_ind].pa_shard,
+				    g_alloc_records[i].edata,
+				    &deferred_work_generated);
+
+				g_alloc_records[i].active = false;
+				cleaned_up++;
+			}
+		}
+	}
+
+	printf("Cleaned up %zu remaining allocations\n", cleaned_up);
+}
+
+static void
+print_usage(const char *program) {
+	printf("Usage: %s [options] <trace_file.csv>\n", program);
+	printf("Options:\n");
+	printf("  -h, --help           Show this help message\n");
+	printf(
+	    "  -o, --output FILE    Output file for statistics (default: stdout)\n");
+	printf("  -s, --sec            Use SEC (default)\n");
+	printf("  -p, --hpa-only       Use HPA only (no SEC)\n");
+	printf(
+	    "  -i, --interval N     Stats print interval (default: 100000, 0=disable)\n");
+	printf(
+	    "\nTrace file format: shard_ind,operation,size_or_alloc_index,is_frequent\n");
+	printf("  - operation: 0=alloc, 1=dealloc\n");
+	printf("  - is_frequent: optional column\n");
+}
+
+int
+main(int argc, char *argv[]) {
+	const char *trace_file = NULL;
+	const char *stats_output_file = NULL;
+	size_t      stats_interval = 100000; /* Default stats print interval */
+	/* Parse command line arguments */
+	for (int i = 1; i < argc; i++) {
+		if (strcmp(argv[i], "-h") == 0
+		    || strcmp(argv[i], "--help") == 0) {
+			print_usage(argv[0]);
+			return 0;
+		} else if (strcmp(argv[i], "-o") == 0
+		    || strcmp(argv[i], "--output") == 0) {
+			if (i + 1 >= argc) {
+				fprintf(stderr,
+				    "Error: %s requires an argument\n",
+				    argv[i]);
+				return 1;
+			}
+			stats_output_file = argv[++i];
+		} else if (strcmp(argv[i], "-s") == 0
+		    || strcmp(argv[i], "--sec") == 0) {
+			g_use_sec = true;
+		} else if (strcmp(argv[i], "-p") == 0
+		    || strcmp(argv[i], "--hpa-only") == 0) {
+			g_use_sec = false;
+		} else if (strcmp(argv[i], "-i") == 0
+		    || strcmp(argv[i], "--interval") == 0) {
+			if (i + 1 >= argc) {
+				fprintf(stderr,
+				    "Error: %s requires an argument\n",
+				    argv[i]);
+				return 1;
+			}
+			stats_interval = (size_t)atol(argv[++i]);
+		} else if (argv[i][0] != '-') {
+			trace_file = argv[i];
+		} else {
+			fprintf(stderr, "Unknown option: %s\n", argv[i]);
+			print_usage(argv[0]);
+			return 1;
+		}
+	}
+
+	if (!trace_file) {
+		fprintf(stderr, "Error: No trace file specified\n");
+		print_usage(argv[0]);
+		return 1;
+	}
+
+	printf("Trace file: %s\n", trace_file);
+	printf("Mode: %s\n", g_use_sec ? "PA with SEC" : "HPA only");
+
+	/* Open stats output file */
+	if (stats_output_file) {
+		g_stats_output = fopen(stats_output_file, "w");
+		if (!g_stats_output) {
+			fprintf(stderr,
+			    "Failed to open stats output file: %s\n",
+			    stats_output_file);
+			return 1;
+		}
+		printf("Stats output: %s\n", stats_output_file);
+
+		/* Write CSV header */
+		fprintf(g_stats_output,
+		    "operation_count,shard_id,alloc_count,dealloc_count,active_bytes,"
+		    "total_pageslabs,full_pageslabs_total,empty_pageslabs_total,hugified_pageslabs,"
+		    "full_pageslabs_non_huge,full_pageslabs_huge,"
+		    "empty_pageslabs_non_huge,empty_pageslabs_huge,"
+		    "dirty_bytes,nhugifies,nhugify_failures,ndehugifies\n");
+	}
+
+	/* Load trace data and determine max number of arenas */
+	pa_event_t *events;
+	int         max_shard_id;
+	size_t      event_count = load_trace_file(
+            trace_file, &events, &max_shard_id);
+	if (event_count == 0) {
+		if (g_stats_output)
+			fclose(g_stats_output);
+		return 1;
+	}
+
+	int num_shards = max_shard_id + 1; /* shard IDs are 0-based */
+	if (num_shards > MAX_ARENAS) {
+		fprintf(stderr, "Error: Too many arenas required (%d > %d)\n",
+		    num_shards, MAX_ARENAS);
+		free(events);
+		if (g_stats_output)
+			fclose(g_stats_output);
+		return 1;
+	}
+
+	/* Allocate allocation tracking array */
+	g_alloc_records = malloc(event_count * sizeof(allocation_record_t));
+
+	if (!g_alloc_records) {
+		fprintf(
+		    stderr, "Failed to allocate allocation tracking array\n");
+		free(events);
+		if (g_stats_output) {
+			fclose(g_stats_output);
+		}
+		return 1;
+	}
+
+	/* Initialize PA infrastructure */
+	if (initialize_pa_infrastructure(num_shards)) {
+		fprintf(stderr, "Failed to initialize PA infrastructure\n");
+		free(events);
+		free(g_alloc_records);
+		if (g_stats_output) {
+			fclose(g_stats_output);
+		}
+		return 1;
+	}
+
+	/* Run simulation */
+	simulate_trace(num_shards, events, event_count, stats_interval);
+
+	/* Clean up remaining allocations */
+	cleanup_remaining_allocations(num_shards);
+
+	/* Cleanup PA infrastructure */
+	cleanup_pa_infrastructure(num_shards);
+
+	/* Cleanup */
+	free(g_alloc_records);
+	free(events);
+
+	if (g_stats_output) {
+		fclose(g_stats_output);
+		printf("Statistics written to: %s\n", stats_output_file);
+	}
+
+	return 0;
+}

From 67435187d103a9bef7995be3d625712329578e64 Mon Sep 17 00:00:00 2001
From: Carl Shapiro <cshapiro@meta.com>
Date: Fri, 12 Sep 2025 18:07:12 -0700
Subject: [PATCH 2543/2608] Improve the portability of grep patterns in
 configure.ac

The configure.ac script uses backslash plus in its grep patterns to
match one or more occurrences.  This is a GNU grep extension to the
Basic Regular Expressions syntax that fails on systems with a more
traditional grep.  This changes fixes grep patterns that use backslash
plus to use a star instead.

Closes: #2777
---
 configure.ac | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index 8ea092d6..5e907511 100644
--- a/configure.ac
+++ b/configure.ac
@@ -652,7 +652,7 @@ AC_ARG_WITH([version],
   [AS_HELP_STRING([--with-version=<major>.<minor>.<bugfix>-<nrev>-g<gid>],
    [Version string])],
   [
-    echo "${with_version}" | grep ['^[0-9]\+\.[0-9]\+\.[0-9]\+-[0-9]\+-g[0-9a-f]\+$'] 2>&1 1>/dev/null
+    echo "${with_version}" | grep ['^[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*-[0-9][0-9]*-g[0-9a-f][0-9a-f]*$'] 2>&1 1>/dev/null
     if test $? -eq 0 ; then
       echo "$with_version" > "${objroot}VERSION"
     else
@@ -2059,7 +2059,7 @@ if test "x${je_cv_lg_hugepage}" = "x" ; then
   dnl   Hugepagesize:       2048 kB
   if test -e "/proc/meminfo" ; then
     hpsk=[`cat /proc/meminfo 2>/dev/null | \
-          grep -e '^Hugepagesize:[[:space:]]\+[0-9]\+[[:space:]]kB$' | \
+          grep '^Hugepagesize:[[:space:]][[:space:]]*[0-9][0-9]*[[:space:]]kB$' | \
           awk '{print $2}'`]
     if test "x${hpsk}" != "x" ; then
       je_cv_lg_hugepage=10

From d70882a05d02e21c27990d4c6deb5c5bf614d9ec Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Thu, 7 Aug 2025 09:34:30 -0700
Subject: [PATCH 2544/2608] [sdt] Add some tracepoints to sec and hpa modules

---
 src/hpa.c    | 8 ++++++++
 src/hpdata.c | 5 -----
 src/sec.c    | 5 +++++
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/src/hpa.c b/src/hpa.c
index e297e411..d848b1ed 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -6,6 +6,7 @@
 
 #include "jemalloc/internal/fb.h"
 #include "jemalloc/internal/witness.h"
+#include "jemalloc/internal/jemalloc_probe.h"
 
 #define HPA_EDEN_SIZE (128 * HUGEPAGE)
 
@@ -752,6 +753,8 @@ hpa_try_alloc_one_no_grow(
 	}
 
 	void *addr = hpdata_reserve_alloc(ps, size);
+	JE_USDT(hpa_alloc, 5, shard->ind, addr, size, hpdata_nactive_get(ps),
+	    hpdata_age_get(ps));
 	edata_init(edata, shard->ind, addr, size, /* slab */ false, SC_NSIZES,
 	    /* sn */ hpdata_age_get(ps), extent_state_active,
 	    /* zeroed */ false, /* committed */ true, EXTENT_PAI_HPA,
@@ -771,6 +774,9 @@ hpa_try_alloc_one_no_grow(
 	if (err) {
 		hpdata_unreserve(
 		    ps, edata_addr_get(edata), edata_size_get(edata));
+		JE_USDT(hpa_dalloc_err, 5, shard->ind, edata_addr_get(edata),
+		    edata_size_get(edata), hpdata_nactive_get(ps),
+		    hpdata_age_get(ps));
 		/*
 		 * We should arguably reset dirty state here, but this would
 		 * require some sort of prepare + commit functionality that's a
@@ -1024,6 +1030,8 @@ hpa_dalloc_locked(tsdn_t *tsdn, hpa_shard_t *shard, edata_t *edata) {
 
 	psset_update_begin(&shard->psset, ps);
 	hpdata_unreserve(ps, unreserve_addr, unreserve_size);
+	JE_USDT(hpa_dalloc, 5, shard->ind, unreserve_addr, unreserve_size,
+	    hpdata_nactive_get(ps), hpdata_age_get(ps));
 	hpa_update_purge_hugify_eligibility(tsdn, shard, ps);
 	psset_update_end(&shard->psset, ps);
 }
diff --git a/src/hpdata.c b/src/hpdata.c
index e18e03cd..f9c8f4fa 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -2,7 +2,6 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/hpdata.h"
-#include "jemalloc/internal/jemalloc_probe.h"
 
 static int
 hpdata_age_comp(const hpdata_t *a, const hpdata_t *b) {
@@ -102,8 +101,6 @@ hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) {
 	    hpdata->touched_pages, HUGEPAGE_PAGES, result, npages);
 	fb_set_range(hpdata->touched_pages, HUGEPAGE_PAGES, result, npages);
 	hpdata->h_ntouched += new_dirty;
-	JE_USDT(hpa_reserve, 5, npages, hpdata->h_nactive, hpdata->h_ntouched,
-	    new_dirty, largest_unchosen_range);
 
 	/*
 	 * If we allocated out of a range that was the longest in the hpdata, it
@@ -164,8 +161,6 @@ hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) {
 	hpdata->h_nactive -= npages;
 
 	hpdata_assert_consistent(hpdata);
-	JE_USDT(hpa_unreserve, 5, npages, hpdata->h_nactive, hpdata->h_ntouched,
-	    old_longest_range, new_range_len);
 }
 
 size_t
diff --git a/src/sec.c b/src/sec.c
index 36cd2dcc..c827dd5c 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -2,6 +2,7 @@
 #include "jemalloc/internal/jemalloc_internal_includes.h"
 
 #include "jemalloc/internal/sec.h"
+#include "jemalloc/internal/jemalloc_probe.h"
 
 static edata_t *sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t alignment, bool zero, bool guarded, bool frequent_reuse,
@@ -266,6 +267,7 @@ sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
 			    deferred_work_generated);
 		}
 	}
+	JE_USDT(sec_alloc, 5, sec, shard, edata, size, frequent_reuse);
 	return edata;
 }
 
@@ -273,6 +275,7 @@ static bool
 sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
     size_t new_size, bool zero, bool *deferred_work_generated) {
 	sec_t *sec = (sec_t *)self;
+	JE_USDT(sec_expand, 4, sec, edata, old_size, new_size);
 	return pai_expand(tsdn, sec->fallback, edata, old_size, new_size, zero,
 	    deferred_work_generated);
 }
@@ -281,6 +284,7 @@ static bool
 sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
     size_t new_size, bool *deferred_work_generated) {
 	sec_t *sec = (sec_t *)self;
+	JE_USDT(sec_shrink, 4, sec, edata, old_size, new_size);
 	return pai_shrink(tsdn, sec->fallback, edata, old_size, new_size,
 	    deferred_work_generated);
 }
@@ -351,6 +355,7 @@ sec_dalloc(
 		return;
 	}
 	sec_shard_t *shard = sec_shard_pick(tsdn, sec);
+	JE_USDT(sec_dalloc, 3, sec, shard, edata);
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	if (shard->enabled) {
 		sec_shard_dalloc_and_unlock(tsdn, sec, shard, edata);

From 755735a6bf8f7b7f4e31ebc684f0fce7ac22dd78 Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Wed, 17 Sep 2025 10:18:25 -0700
Subject: [PATCH 2545/2608] Remove Travis Windows CI for now since it has infra
 failures.

---
 .travis.yml           | 24 ------------------------
 scripts/gen_travis.py |  5 ++++-
 2 files changed, 4 insertions(+), 25 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 433288cb..643da4f1 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -10,30 +10,6 @@ dist: jammy
 
 jobs:
   include:
-    - os: windows
-      arch: amd64
-      env: CC=gcc CXX=g++ EXTRA_CFLAGS="-fcommon"
-    - os: windows
-      arch: amd64
-      env: CC=gcc CXX=g++ CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-fcommon"
-    - os: windows
-      arch: amd64
-      env: CC=cl.exe CXX=cl.exe
-    - os: windows
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes EXTRA_CFLAGS="-fcommon"
-    - os: windows
-      arch: amd64
-      env: CC=cl.exe CXX=cl.exe CONFIGURE_FLAGS="--enable-debug"
-    - os: windows
-      arch: amd64
-      env: CC=gcc CXX=g++ CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug" EXTRA_CFLAGS="-fcommon"
-    - os: windows
-      arch: amd64
-      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes
-    - os: windows
-      arch: amd64
-      env: CC=cl.exe CXX=cl.exe CROSS_COMPILE_32BIT=yes CONFIGURE_FLAGS="--enable-debug"
     - os: linux
       arch: amd64
       env: CC=gcc CXX=g++ EXTRA_CFLAGS="-Werror -Wno-array-bounds"
diff --git a/scripts/gen_travis.py b/scripts/gen_travis.py
index d43c802e..3f7aeab0 100755
--- a/scripts/gen_travis.py
+++ b/scripts/gen_travis.py
@@ -333,7 +333,10 @@ EXTRA_CFLAGS="-Werror -Wno-array-bounds"
 
 def main():
     jobs = '\n'.join((
-        generate_windows(AMD64),
+        # Travis is failing on Windows due to infra failures, comment it out for
+        # now.  Should resume once it is fixed.
+
+        # generate_windows(AMD64),
 
         # Travis currently provides only FreeBSD 12.1 which is EOL.  Builds are
         # not working as of Jan 2024.  Disable the tests for now to avoid the

From de886e05d27ef3806dca802f3b9d9a0af7765046 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Mon, 29 Sep 2025 09:41:14 -0700
Subject: [PATCH 2546/2608] Revert "Remove an unused function and global
 variable"

This reverts commit acd85e5359fc3ee38388e0763ceac72db7ca7150.
---
 test/unit/hpa_vectorized_madvise_large_batch.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/test/unit/hpa_vectorized_madvise_large_batch.c b/test/unit/hpa_vectorized_madvise_large_batch.c
index e1393225..c974500c 100644
--- a/test/unit/hpa_vectorized_madvise_large_batch.c
+++ b/test/unit/hpa_vectorized_madvise_large_batch.c
@@ -113,6 +113,12 @@ defer_test_hugify(void *ptr, size_t size, bool sync) {
 	return false;
 }
 
+static size_t ndefer_dehugify_calls = 0;
+static void
+defer_test_dehugify(void *ptr, size_t size) {
+	++ndefer_dehugify_calls;
+}
+
 static nstime_t defer_curtime;
 static void
 defer_test_curtime(nstime_t *r_time, bool first_reading) {

From 2688047b56e6ef21d960e40281cb13774c8c17ab Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Mon, 29 Sep 2025 09:41:51 -0700
Subject: [PATCH 2547/2608] Revert "Do not dehugify when purging"

This reverts commit 16c5abd1cd0a21e8f985f77d8e342c8ed91450d7.
---
 include/jemalloc/internal/hpa_hooks.h         |  1 +
 src/hpa.c                                     |  9 +++++++-
 src/hpa_hooks.c                               | 12 ++++++++--
 test/unit/hpa.c                               | 23 +++++++++++++++++++
 test/unit/hpa_vectorized_madvise.c            | 11 +++++++++
 .../unit/hpa_vectorized_madvise_large_batch.c |  1 +
 6 files changed, 54 insertions(+), 3 deletions(-)

diff --git a/include/jemalloc/internal/hpa_hooks.h b/include/jemalloc/internal/hpa_hooks.h
index 5e68e349..f50ff58f 100644
--- a/include/jemalloc/internal/hpa_hooks.h
+++ b/include/jemalloc/internal/hpa_hooks.h
@@ -10,6 +10,7 @@ struct hpa_hooks_s {
 	void (*unmap)(void *ptr, size_t size);
 	void (*purge)(void *ptr, size_t size);
 	bool (*hugify)(void *ptr, size_t size, bool sync);
+	void (*dehugify)(void *ptr, size_t size);
 	void (*curtime)(nstime_t *r_time, bool first_reading);
 	uint64_t (*ms_since)(nstime_t *r_time);
 	bool (*vectorized_purge)(void *vec, size_t vlen, size_t nbytes);
diff --git a/src/hpa.c b/src/hpa.c
index d848b1ed..271b1af4 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -433,11 +433,18 @@ hpa_purge_actual_unlocked(
 	hpa_range_accum_init(&accum, vec, len);
 
 	for (size_t i = 0; i < batch_sz; ++i) {
+		hpdata_t *to_purge = batch[i].hp;
+
+		/* Actually do the purging, now that the lock is dropped. */
+		if (batch[i].dehugify) {
+			shard->central->hooks.dehugify(
+			    hpdata_addr_get(to_purge), HUGEPAGE);
+		}
 		void  *purge_addr;
 		size_t purge_size;
 		size_t total_purged_on_one_hp = 0;
 		while (hpdata_purge_next(
-		    batch[i].hp, &batch[i].state, &purge_addr, &purge_size)) {
+		    to_purge, &batch[i].state, &purge_addr, &purge_size)) {
 			total_purged_on_one_hp += purge_size;
 			assert(total_purged_on_one_hp <= HUGEPAGE);
 			hpa_range_accum_add(
diff --git a/src/hpa_hooks.c b/src/hpa_hooks.c
index e40d30ec..14005ae0 100644
--- a/src/hpa_hooks.c
+++ b/src/hpa_hooks.c
@@ -8,13 +8,14 @@ static void    *hpa_hooks_map(size_t size);
 static void     hpa_hooks_unmap(void *ptr, size_t size);
 static void     hpa_hooks_purge(void *ptr, size_t size);
 static bool     hpa_hooks_hugify(void *ptr, size_t size, bool sync);
+static void     hpa_hooks_dehugify(void *ptr, size_t size);
 static void     hpa_hooks_curtime(nstime_t *r_nstime, bool first_reading);
 static uint64_t hpa_hooks_ms_since(nstime_t *past_nstime);
 static bool hpa_hooks_vectorized_purge(void *vec, size_t vlen, size_t nbytes);
 
 const hpa_hooks_t hpa_hooks_default = {&hpa_hooks_map, &hpa_hooks_unmap,
-    &hpa_hooks_purge, &hpa_hooks_hugify, &hpa_hooks_curtime,
-    &hpa_hooks_ms_since, &hpa_hooks_vectorized_purge};
+    &hpa_hooks_purge, &hpa_hooks_hugify, &hpa_hooks_dehugify,
+    &hpa_hooks_curtime, &hpa_hooks_ms_since, &hpa_hooks_vectorized_purge};
 
 static void *
 hpa_hooks_map(size_t size) {
@@ -60,6 +61,13 @@ hpa_hooks_hugify(void *ptr, size_t size, bool sync) {
 	return err;
 }
 
+static void
+hpa_hooks_dehugify(void *ptr, size_t size) {
+	bool err = pages_nohuge(ptr, size);
+	JE_USDT(hpa_dehugify, 3, size, ptr, err);
+	(void)err;
+}
+
 static void
 hpa_hooks_curtime(nstime_t *r_nstime, bool first_reading) {
 	if (first_reading) {
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index d62ac762..1fed8a80 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -389,6 +389,12 @@ defer_test_hugify(void *ptr, size_t size, bool sync) {
 	return false;
 }
 
+static size_t ndefer_dehugify_calls = 0;
+static void
+defer_test_dehugify(void *ptr, size_t size) {
+	++ndefer_dehugify_calls;
+}
+
 static nstime_t defer_curtime;
 static void
 defer_test_curtime(nstime_t *r_time, bool first_reading) {
@@ -408,6 +414,7 @@ TEST_BEGIN(test_defer_time) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge;
@@ -446,8 +453,10 @@ TEST_BEGIN(test_defer_time) {
 	hpa_shard_do_deferred_work(tsdn, shard);
 
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(1, ndefer_dehugify_calls, "Should have dehugified");
 	expect_zu_eq(1, ndefer_purge_calls, "Should have purged");
 	ndefer_hugify_calls = 0;
+	ndefer_dehugify_calls = 0;
 	ndefer_purge_calls = 0;
 
 	/*
@@ -468,6 +477,7 @@ TEST_BEGIN(test_defer_time) {
 	nstime_init2(&defer_curtime, 22, 0);
 	hpa_shard_do_deferred_work(tsdn, shard);
 	expect_zu_eq(1, ndefer_hugify_calls, "Failed to hugify");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Unexpected dehugify");
 	expect_zu_eq(0, ndefer_purge_calls, "Unexpected purge");
 	ndefer_hugify_calls = 0;
 
@@ -514,6 +524,7 @@ TEST_BEGIN(test_no_min_purge_interval) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge;
@@ -540,6 +551,7 @@ TEST_BEGIN(test_no_min_purge_interval) {
 	 * we have dirty pages.
 	 */
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 	expect_zu_eq(1, ndefer_purge_calls, "Expect purge");
 	ndefer_purge_calls = 0;
 
@@ -555,6 +567,7 @@ TEST_BEGIN(test_min_purge_interval) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge;
@@ -580,6 +593,7 @@ TEST_BEGIN(test_min_purge_interval) {
 	 * opt.min_purge_interval_ms didn't pass yet.
 	 */
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 	expect_zu_eq(0, ndefer_purge_calls, "Purged too early");
 
 	/* Minumum purge interval is set to 5 seconds in options. */
@@ -588,6 +602,7 @@ TEST_BEGIN(test_min_purge_interval) {
 
 	/* Now we should purge, but nothing else. */
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 	expect_zu_eq(1, ndefer_purge_calls, "Expect purge");
 	ndefer_purge_calls = 0;
 
@@ -603,6 +618,7 @@ TEST_BEGIN(test_purge) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge;
@@ -632,6 +648,7 @@ TEST_BEGIN(test_purge) {
 	hpa_shard_do_deferred_work(tsdn, shard);
 
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 	/*
 	 * Expect only 2 purges, because opt.dirty_mult is set to 0.25 and we still
 	 * have 5 active hugepages (1 / 5 = 0.2 < 0.25).
@@ -648,6 +665,7 @@ TEST_BEGIN(test_purge) {
 	 */
 	expect_zu_eq(5, ndefer_hugify_calls, "Expect hugification");
 	ndefer_hugify_calls = 0;
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 	/*
 	 * We still have completely dirty hugepage, but we are below
 	 * opt.dirty_mult.
@@ -667,6 +685,7 @@ TEST_BEGIN(test_experimental_max_purge_nhp) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge;
@@ -697,6 +716,7 @@ TEST_BEGIN(test_experimental_max_purge_nhp) {
 	hpa_shard_do_deferred_work(tsdn, shard);
 
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 	/*
 	 * Expect only one purge call, because opts.experimental_max_purge_nhp
 	 * is set to 1.
@@ -709,6 +729,7 @@ TEST_BEGIN(test_experimental_max_purge_nhp) {
 
 	expect_zu_eq(5, ndefer_hugify_calls, "Expect hugification");
 	ndefer_hugify_calls = 0;
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 	/* We still above the limit for dirty pages. */
 	expect_zu_eq(1, ndefer_purge_calls, "Expect purge");
 	ndefer_purge_calls = 0;
@@ -717,6 +738,7 @@ TEST_BEGIN(test_experimental_max_purge_nhp) {
 	hpa_shard_do_deferred_work(tsdn, shard);
 
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 	/* Finally, we are below the limit, no purges are expected. */
 	expect_zu_eq(0, ndefer_purge_calls, "Purged too early");
 
@@ -732,6 +754,7 @@ TEST_BEGIN(test_vectorized_opt_eq_zero) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge;
diff --git a/test/unit/hpa_vectorized_madvise.c b/test/unit/hpa_vectorized_madvise.c
index c2aa3b58..8df54d06 100644
--- a/test/unit/hpa_vectorized_madvise.c
+++ b/test/unit/hpa_vectorized_madvise.c
@@ -123,6 +123,12 @@ defer_test_hugify(void *ptr, size_t size, bool sync) {
 	return false;
 }
 
+static size_t ndefer_dehugify_calls = 0;
+static void
+defer_test_dehugify(void *ptr, size_t size) {
+	++ndefer_dehugify_calls;
+}
+
 static nstime_t defer_curtime;
 static void
 defer_test_curtime(nstime_t *r_time, bool first_reading) {
@@ -142,6 +148,7 @@ TEST_BEGIN(test_vectorized_failure_fallback) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge_fail;
@@ -181,6 +188,7 @@ TEST_BEGIN(test_more_regions_purged_from_one_page) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge;
@@ -223,6 +231,7 @@ TEST_BEGIN(test_more_regions_purged_from_one_page) {
 	 * we have dirty pages.
 	 */
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 
 	/* We purge from 2 huge pages, each one 3 dirty continous segments.
 	 * For opt_process_madvise_max_batch = 2, that is
@@ -250,6 +259,7 @@ TEST_BEGIN(test_more_pages_than_batch_page_size) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge;
@@ -286,6 +296,7 @@ TEST_BEGIN(test_more_pages_than_batch_page_size) {
 	 * we have dirty pages.
 	 */
 	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
 
 	/* We have page batch size = 1.
 	 * we have 5 * HP active pages, 3 * HP dirty pages
diff --git a/test/unit/hpa_vectorized_madvise_large_batch.c b/test/unit/hpa_vectorized_madvise_large_batch.c
index c974500c..a5766620 100644
--- a/test/unit/hpa_vectorized_madvise_large_batch.c
+++ b/test/unit/hpa_vectorized_madvise_large_batch.c
@@ -140,6 +140,7 @@ TEST_BEGIN(test_vectorized_purge) {
 	hooks.unmap = &defer_test_unmap;
 	hooks.purge = &defer_test_purge;
 	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
 	hooks.curtime = &defer_test_curtime;
 	hooks.ms_since = &defer_test_ms_since;
 	hooks.vectorized_purge = &defer_vectorized_purge;

From ace437d26ae9c2b27d08492135da52d211c53e01 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Tue, 30 Sep 2025 07:13:12 -0700
Subject: [PATCH 2548/2608] Running clang-format on two files

---
 src/jemalloc.c | 49 ++++++++++++++++++++++++-------------------------
 src/pages.c    | 14 +++++++-------
 2 files changed, 31 insertions(+), 32 deletions(-)

diff --git a/src/jemalloc.c b/src/jemalloc.c
index 5c77621c..a3f01b3c 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -172,8 +172,7 @@ unsigned ncpus;
 unsigned opt_debug_double_free_max_scan =
     SAFETY_CHECK_DOUBLE_FREE_MAX_SCAN_DEFAULT;
 
-size_t opt_calloc_madvise_threshold =
-    CALLOC_MADVISE_THRESHOLD_DEFAULT;
+size_t opt_calloc_madvise_threshold = CALLOC_MADVISE_THRESHOLD_DEFAULT;
 
 /* Protects arenas initialization. */
 static malloc_mutex_t arenas_lock;
@@ -261,8 +260,8 @@ _init_init_lock(void) {
 #		ifdef _MSC_VER
 #			pragma section(".CRT$XCU", read)
 JEMALLOC_SECTION(".CRT$XCU")
-JEMALLOC_ATTR(used) static const
-    void(WINAPI *init_init_lock)(void) = _init_init_lock;
+JEMALLOC_ATTR(used)
+static const void(WINAPI *init_init_lock)(void) = _init_init_lock;
 #		endif
 #	endif
 #else
@@ -2883,8 +2882,8 @@ malloc_default(size_t size) {
  */
 
 JEMALLOC_EXPORT
-    JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
-    JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1) je_malloc(size_t size) {
+JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1) je_malloc(size_t size) {
 	LOG("core.malloc.entry", "size: %zu", size);
 
 	void *ret = imalloc_fastpath(size, &malloc_default);
@@ -2935,9 +2934,9 @@ JEMALLOC_ATTR(nonnull(1))
 }
 
 JEMALLOC_EXPORT
-    JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
-    JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(2)
-        je_aligned_alloc(size_t alignment, size_t size) {
+JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(2)
+    je_aligned_alloc(size_t alignment, size_t size) {
 	void *ret;
 
 	static_opts_t  sopts;
@@ -2976,9 +2975,9 @@ JEMALLOC_EXPORT
 }
 
 JEMALLOC_EXPORT
-    JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
-    JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2)
-        je_calloc(size_t num, size_t size) {
+JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE2(1, 2)
+    je_calloc(size_t num, size_t size) {
 	void          *ret;
 	static_opts_t  sopts;
 	dynamic_opts_t dopts;
@@ -3186,8 +3185,8 @@ je_free_aligned_sized(void *ptr, size_t alignment, size_t size) {
 
 #ifdef JEMALLOC_OVERRIDE_MEMALIGN
 JEMALLOC_EXPORT
-    JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
-    JEMALLOC_ATTR(malloc) je_memalign(size_t alignment, size_t size) {
+JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+JEMALLOC_ATTR(malloc) je_memalign(size_t alignment, size_t size) {
 	void          *ret;
 	static_opts_t  sopts;
 	dynamic_opts_t dopts;
@@ -3225,8 +3224,8 @@ JEMALLOC_EXPORT
 
 #ifdef JEMALLOC_OVERRIDE_VALLOC
 JEMALLOC_EXPORT
-    JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
-    JEMALLOC_ATTR(malloc) je_valloc(size_t size) {
+JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+JEMALLOC_ATTR(malloc) je_valloc(size_t size) {
 	void *ret;
 
 	static_opts_t  sopts;
@@ -3262,8 +3261,8 @@ JEMALLOC_EXPORT
 
 #ifdef JEMALLOC_OVERRIDE_PVALLOC
 JEMALLOC_EXPORT
-    JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
-    JEMALLOC_ATTR(malloc) je_pvalloc(size_t size) {
+JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+JEMALLOC_ATTR(malloc) je_pvalloc(size_t size) {
 	void *ret;
 
 	static_opts_t  sopts;
@@ -3457,9 +3456,9 @@ JEMALLOC_EXPORT JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN smallocx_return_t
 #endif
 
 JEMALLOC_EXPORT
-    JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
-    JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)
-        je_mallocx(size_t size, int flags) {
+JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+JEMALLOC_ATTR(malloc) JEMALLOC_ALLOC_SIZE(1)
+    je_mallocx(size_t size, int flags) {
 	void          *ret;
 	static_opts_t  sopts;
 	dynamic_opts_t dopts;
@@ -3639,8 +3638,8 @@ label_oom:
 }
 
 JEMALLOC_EXPORT
-    JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
-    JEMALLOC_ALLOC_SIZE(2) je_rallocx(void *ptr, size_t size, int flags) {
+JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+JEMALLOC_ALLOC_SIZE(2) je_rallocx(void *ptr, size_t size, int flags) {
 	LOG("core.rallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr, size,
 	    flags);
 	void *ret = do_rallocx(ptr, size, flags, false);
@@ -3689,8 +3688,8 @@ do_realloc_nonnull_zero(void *ptr) {
 }
 
 JEMALLOC_EXPORT
-    JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
-    JEMALLOC_ALLOC_SIZE(2) je_realloc(void *ptr, size_t size) {
+JEMALLOC_ALLOCATOR JEMALLOC_RESTRICT_RETURN void JEMALLOC_NOTHROW *
+JEMALLOC_ALLOC_SIZE(2) je_realloc(void *ptr, size_t size) {
 	LOG("core.realloc.entry", "ptr: %p, size: %zu\n", ptr, size);
 
 	if (likely(ptr != NULL && size != 0)) {
diff --git a/src/pages.c b/src/pages.c
index 44c57b28..bc1093a3 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -622,9 +622,9 @@ pages_dodump(void *addr, size_t size) {
 #	include <sys/mman.h>
 #	include <sys/syscall.h>
 
-#ifndef PIDFD_SELF
-#define PIDFD_SELF -10000
-#endif
+#	ifndef PIDFD_SELF
+#		define PIDFD_SELF -10000
+#	endif
 
 static atomic_b_t process_madvise_gate = ATOMIC_INIT(true);
 
@@ -659,15 +659,15 @@ pages_purge_process_madvise_impl(
 	 * TODO: remove this save/restore of errno after supporting errno
 	 * preservation for free() call properly.
 	 */
-	int saved_errno = get_errno();
+	int    saved_errno = get_errno();
 	size_t purged_bytes = (size_t)syscall(JE_SYS_PROCESS_MADVISE_NR,
 	    PIDFD_SELF, (struct iovec *)vec, vec_len, MADV_DONTNEED, 0);
-	if (purged_bytes == (size_t) -1) {
+	if (purged_bytes == (size_t)-1) {
 		if (errno == EPERM || errno == EINVAL || errno == ENOSYS
 		    || errno == EBADF) {
 			/* Process madvise not supported the way we need it. */
-			atomic_store_b(&process_madvise_gate, false,
-				       ATOMIC_RELAXED);
+			atomic_store_b(
+			    &process_madvise_gate, false, ATOMIC_RELAXED);
 		}
 		set_errno(saved_errno);
 	}

From a199278f3711bc0806e15e2f5f16004f3b287177 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Mon, 25 Aug 2025 13:23:07 -0700
Subject: [PATCH 2549/2608] [HPA] Add ability to start page as huge and more
 flexibility for purging

---
 include/jemalloc/internal/hpa.h               |   9 +
 include/jemalloc/internal/hpa_opts.h          | 103 ++-
 include/jemalloc/internal/hpdata.h            |  34 +-
 include/jemalloc/internal/nstime.h            |   4 +-
 include/jemalloc/internal/psset.h             |   8 +-
 src/ctl.c                                     |  12 +-
 src/hpa.c                                     | 211 ++++--
 src/hpa_hooks.c                               |   6 +
 src/hpdata.c                                  |  17 +-
 src/jemalloc.c                                |  44 ++
 src/nstime.c                                  |  17 +-
 src/pages.c                                   |  10 +
 src/psset.c                                   |  40 +-
 src/stats.c                                   |   3 +
 test/unit/hpa.c                               | 669 +++++++++++++++++-
 test/unit/hpa_vectorized_madvise.c            |   8 +-
 .../unit/hpa_vectorized_madvise_large_batch.c |   8 +-
 test/unit/hpdata.c                            |  10 +-
 test/unit/mallctl.c                           |   3 +
 test/unit/psset.c                             | 131 +++-
 20 files changed, 1231 insertions(+), 116 deletions(-)

diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 7a6ba0b9..131bbb90 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -147,6 +147,15 @@ struct hpa_shard_s {
 	 * Last time we performed purge on this shard.
 	 */
 	nstime_t last_purge;
+
+	/*
+	 * Last time when we attempted work (purging or hugifying). If deferral
+	 * of the work is allowed (we have background thread), this is the time
+	 * when background thread checked if purging or hugifying needs to be
+	 * done. If deferral is not allowed, this is the time of (hpa_alloc or
+	 * hpa_dalloc) activity in the shard.
+	 */
+	nstime_t last_time_work_attempted;
 };
 
 bool hpa_hugepage_size_exceeds_limit(void);
diff --git a/include/jemalloc/internal/hpa_opts.h b/include/jemalloc/internal/hpa_opts.h
index 9e7f76ac..6747c2db 100644
--- a/include/jemalloc/internal/hpa_opts.h
+++ b/include/jemalloc/internal/hpa_opts.h
@@ -7,8 +7,60 @@
 /*
  * This file is morally part of hpa.h, but is split out for header-ordering
  * reasons.
+ *
+ * All of these hpa_shard_opts below are experimental. We are exploring more
+ * efficient packing, hugifying, and purging approaches to make efficient
+ * trade-offs between CPU, memory, latency, and usability. This means all of
+ * them are at the risk of being deprecated and corresponding configurations
+ * should be updated once the final version settles.
  */
 
+/*
+ * This enum controls how jemalloc hugifies/dehugifies pages.  Each style may be
+ * more suitable depending on deployment environments.
+ *
+ * hpa_hugify_style_none
+ * Using this means that jemalloc will not be hugifying or dehugifying pages,
+ * but will let the kernel make those decisions.  This style only makes sense
+ * when deploying on systems where THP are enabled in 'always' mode.  With this
+ * style, you most likely want to have no purging at all (dirty_mult=-1) or
+ * purge_threshold=HUGEPAGE bytes (2097152 for 2Mb page), although other
+ * thresholds may work well depending on kernel settings of your deployment
+ * targets.
+ *
+ * hpa_hugify_style_eager
+ * This style results in jemalloc giving hugepage advice, if needed, to
+ * anonymous memory immediately after it is mapped, so huge pages can be backing
+ * that memory at page-fault time.  This is usually more efficient than doing
+ * it later, and it allows us to benefit from the hugepages from the start.
+ * Same options for purging as for the style 'none' are good starting choices:
+ * no purging, or purge_threshold=HUGEPAGE, some min_purge_delay_ms that allows
+ * for page not to be purged quickly, etc.  This is a good choice if you can
+ * afford extra memory and your application gets performance increase from
+ * transparent hughepages.
+ *
+ * hpa_hugify_style_lazy
+ * This style is suitable when you purge more aggressively (you sacrifice CPU
+ * performance for less memory).  When this style is chosen, jemalloc will
+ * hugify once hugification_threshold is reached, and dehugify before purging.
+ * If the kernel is configured to use direct compaction you may experience some
+ * allocation latency when using this style.  The best is to measure what works
+ * better for your application needs, and in the target deployment environment.
+ * This is a good choice for apps that cannot afford a lot of memory regression,
+ * but would still like to benefit from backing certain memory regions with
+ * hugepages.
+ */
+enum hpa_hugify_style_e {
+	hpa_hugify_style_auto = 0,
+	hpa_hugify_style_none = 1,
+	hpa_hugify_style_eager = 2,
+	hpa_hugify_style_lazy = 3,
+	hpa_hugify_style_limit = hpa_hugify_style_lazy + 1
+};
+typedef enum hpa_hugify_style_e hpa_hugify_style_t;
+
+extern const char *const hpa_hugify_style_names[];
+
 typedef struct hpa_shard_opts_s hpa_shard_opts_t;
 struct hpa_shard_opts_s {
 	/*
@@ -46,7 +98,8 @@ struct hpa_shard_opts_s {
 	uint64_t hugify_delay_ms;
 
 	/*
-	 * Hugify pages synchronously.
+	 * Hugify pages synchronously (hugify will happen even if hugify_style
+	 * is not hpa_hugify_style_lazy).
 	 */
 	bool hugify_sync;
 
@@ -59,6 +112,46 @@ struct hpa_shard_opts_s {
 	 * Maximum number of hugepages to purge on each purging attempt.
 	 */
 	ssize_t experimental_max_purge_nhp;
+
+	/*
+	 * Minimum number of inactive bytes needed for a non-empty page to be
+	 * considered purgable.
+	 *
+	 * When the number of touched inactive bytes on non-empty hugepage is
+	 * >= purge_threshold, the page is purgable.  Empty pages are always
+	 * purgable.  Setting this to HUGEPAGE bytes would only purge empty
+	 * pages if using hugify_style_eager and the purges would be exactly
+	 * HUGEPAGE bytes.  Depending on your kernel settings, this may result
+	 * in better performance.
+	 *
+	 * Please note, when threshold is reached, we will purge all the dirty
+	 * bytes, and not just up to the threshold.  If this is PAGE bytes, then
+	 * all the pages that have any dirty bytes are purgable.  We treat
+	 * purgability constraint for purge_threshold as stronger than
+	 * dirty_mult, IOW, if no page meets purge_threshold, we will not purge
+	 * even if we are above dirty_mult.
+	 */
+	size_t purge_threshold;
+
+	/*
+	 * Minimum number of ms that needs to elapse between HP page becoming
+	 * eligible for purging and actually getting purged.
+	 *
+	 * Setting this to a larger number would give better chance of reusing
+	 * that memory.  Setting it to 0 means that page is eligible for purging
+	 * as soon as it meets the purge_threshold.  The clock resets when
+	 * purgability of the page changes (page goes from being non-purgable to
+	 * purgable).  When using eager style you probably want to allow for
+	 * some delay, to avoid purging the page too quickly and give it time to
+	 * be used.
+	 */
+	uint64_t min_purge_delay_ms;
+
+	/*
+	 * Style of hugification/dehugification (see comment at
+	 * hpa_hugify_style_t for options).
+	 */
+	hpa_hugify_style_t hugify_style;
 };
 
 /* clang-format off */
@@ -84,7 +177,13 @@ struct hpa_shard_opts_s {
 	/* min_purge_interval_ms */					\
 	5 * 1000,							\
 	/* experimental_max_purge_nhp */				\
-	-1								\
+	-1,      							\
+	/* size_t purge_threshold */					\
+	PAGE,								\
+	/* min_purge_delay_ms */             				\
+	0,  								\
+	/* hugify_style */                				\
+	hpa_hugify_style_lazy						\
 }
 /* clang-format on */
 
diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index 75550f9b..eb83c900 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -124,6 +124,12 @@ struct hpdata_s {
 
 	/* The touched pages (using the same definition as above). */
 	fb_group_t touched_pages[FB_NGROUPS(HUGEPAGE_PAGES)];
+
+	/* Time when this extent (hpdata) becomes eligible for purging */
+	nstime_t h_time_purge_allowed;
+
+	/* True if the extent was huge and empty last time when it was purged */
+	bool h_purged_when_empty_and_huge;
 };
 
 TYPED_LIST(hpdata_empty_list, hpdata_t, ql_link_empty)
@@ -284,17 +290,17 @@ hpdata_longest_free_range_set(hpdata_t *hpdata, size_t longest_free_range) {
 }
 
 static inline size_t
-hpdata_nactive_get(hpdata_t *hpdata) {
+hpdata_nactive_get(const hpdata_t *hpdata) {
 	return hpdata->h_nactive;
 }
 
 static inline size_t
-hpdata_ntouched_get(hpdata_t *hpdata) {
+hpdata_ntouched_get(const hpdata_t *hpdata) {
 	return hpdata->h_ntouched;
 }
 
 static inline size_t
-hpdata_ndirty_get(hpdata_t *hpdata) {
+hpdata_ndirty_get(const hpdata_t *hpdata) {
 	return hpdata->h_ntouched - hpdata->h_nactive;
 }
 
@@ -303,6 +309,26 @@ hpdata_nretained_get(hpdata_t *hpdata) {
 	return HUGEPAGE_PAGES - hpdata->h_ntouched;
 }
 
+static inline void
+hpdata_time_purge_allowed_set(hpdata_t *hpdata, const nstime_t *v) {
+	nstime_copy(&hpdata->h_time_purge_allowed, v);
+}
+
+static inline const nstime_t *
+hpdata_time_purge_allowed_get(const hpdata_t *hpdata) {
+	return &hpdata->h_time_purge_allowed;
+}
+
+static inline bool
+hpdata_purged_when_empty_and_huge_get(const hpdata_t *hpdata) {
+	return hpdata->h_purged_when_empty_and_huge;
+}
+
+static inline void
+hpdata_purged_when_empty_and_huge_set(hpdata_t *hpdata, bool v) {
+	hpdata->h_purged_when_empty_and_huge = v;
+}
+
 static inline void
 hpdata_assert_empty(hpdata_t *hpdata) {
 	assert(fb_empty(hpdata->active_pages, HUGEPAGE_PAGES));
@@ -360,7 +386,7 @@ hpdata_full(const hpdata_t *hpdata) {
 	return hpdata->h_nactive == HUGEPAGE_PAGES;
 }
 
-void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age);
+void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age, bool is_huge);
 
 /*
  * Given an hpdata which can serve an allocation request, pick and reserve an
diff --git a/include/jemalloc/internal/nstime.h b/include/jemalloc/internal/nstime.h
index a10b2de1..0848b9d0 100644
--- a/include/jemalloc/internal/nstime.h
+++ b/include/jemalloc/internal/nstime.h
@@ -40,6 +40,8 @@ void     nstime_isubtract(nstime_t *time, uint64_t subtrahend);
 void     nstime_imultiply(nstime_t *time, uint64_t multiplier);
 void     nstime_idivide(nstime_t *time, uint64_t divisor);
 uint64_t nstime_divide(const nstime_t *time, const nstime_t *divisor);
+uint64_t nstime_ns_between(const nstime_t *earlier, const nstime_t *later);
+uint64_t nstime_ms_between(const nstime_t *earlier, const nstime_t *later);
 uint64_t nstime_ns_since(const nstime_t *past);
 uint64_t nstime_ms_since(const nstime_t *past);
 
@@ -67,7 +69,7 @@ nstime_init_zero(nstime_t *time) {
 }
 
 JEMALLOC_ALWAYS_INLINE bool
-nstime_equals_zero(nstime_t *time) {
+nstime_equals_zero(const nstime_t *time) {
 	int diff = nstime_compare(time, &nstime_zero);
 	assert(diff >= 0);
 	return diff == 0;
diff --git a/include/jemalloc/internal/psset.h b/include/jemalloc/internal/psset.h
index 3fdecaed..f096e414 100644
--- a/include/jemalloc/internal/psset.h
+++ b/include/jemalloc/internal/psset.h
@@ -121,8 +121,12 @@ void psset_update_end(psset_t *psset, hpdata_t *ps);
 
 /* Analogous to the eset_fit; pick a hpdata to serve the request. */
 hpdata_t *psset_pick_alloc(psset_t *psset, size_t size);
-/* Pick one to purge. */
-hpdata_t *psset_pick_purge(psset_t *psset);
+/*
+ * Pick one to purge that is purgable before given time (inclusive).  If now
+ * is NULL then time is not considered.
+ */
+hpdata_t *psset_pick_purge(psset_t *psset, const nstime_t *now);
+
 /* Pick one to hugify. */
 hpdata_t *psset_pick_hugify(psset_t *psset);
 
diff --git a/src/ctl.c b/src/ctl.c
index a4c60ce0..85583bec 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -106,6 +106,9 @@ CTL_PROTO(opt_hpa_hugify_delay_ms)
 CTL_PROTO(opt_hpa_hugify_sync)
 CTL_PROTO(opt_hpa_min_purge_interval_ms)
 CTL_PROTO(opt_experimental_hpa_max_purge_nhp)
+CTL_PROTO(opt_hpa_purge_threshold)
+CTL_PROTO(opt_hpa_min_purge_delay_ms)
+CTL_PROTO(opt_hpa_hugify_style)
 CTL_PROTO(opt_hpa_dirty_mult)
 CTL_PROTO(opt_hpa_sec_nshards)
 CTL_PROTO(opt_hpa_sec_max_alloc)
@@ -469,6 +472,9 @@ static const ctl_named_node_t opt_node[] = {{NAME("abort"), CTL(opt_abort)},
     {NAME("hpa_min_purge_interval_ms"), CTL(opt_hpa_min_purge_interval_ms)},
     {NAME("experimental_hpa_max_purge_nhp"),
         CTL(opt_experimental_hpa_max_purge_nhp)},
+    {NAME("hpa_purge_threshold"), CTL(opt_hpa_purge_threshold)},
+    {NAME("hpa_min_purge_delay_ms"), CTL(opt_hpa_min_purge_delay_ms)},
+    {NAME("hpa_hugify_style"), CTL(opt_hpa_hugify_style)},
     {NAME("hpa_dirty_mult"), CTL(opt_hpa_dirty_mult)},
     {NAME("hpa_sec_nshards"), CTL(opt_hpa_sec_nshards)},
     {NAME("hpa_sec_max_alloc"), CTL(opt_hpa_sec_max_alloc)},
@@ -2137,7 +2143,11 @@ CTL_RO_NL_GEN(
     opt_hpa_min_purge_interval_ms, opt_hpa_opts.min_purge_interval_ms, uint64_t)
 CTL_RO_NL_GEN(opt_experimental_hpa_max_purge_nhp,
     opt_hpa_opts.experimental_max_purge_nhp, ssize_t)
-
+CTL_RO_NL_GEN(opt_hpa_purge_threshold, opt_hpa_opts.purge_threshold, size_t)
+CTL_RO_NL_GEN(
+    opt_hpa_min_purge_delay_ms, opt_hpa_opts.min_purge_delay_ms, uint64_t)
+CTL_RO_NL_GEN(opt_hpa_hugify_style,
+    hpa_hugify_style_names[opt_hpa_opts.hugify_style], const char *)
 /*
  * This will have to change before we publicly document this option; fxp_t and
  * its representation are internal implementation details.
diff --git a/src/hpa.c b/src/hpa.c
index 271b1af4..27db53a9 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -26,6 +26,8 @@ static void     hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self,
         edata_list_active_t *list, bool *deferred_work_generated);
 static uint64_t hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self);
 
+const char *const hpa_hugify_style_names[] = {"auto", "none", "eager", "lazy"};
+
 bool
 hpa_hugepage_size_exceeds_limit(void) {
 	return HUGEPAGE > HUGEPAGE_MAX_EXPECTED_SIZE;
@@ -97,7 +99,7 @@ hpa_alloc_ps(tsdn_t *tsdn, hpa_central_t *central) {
 
 static hpdata_t *
 hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
-    uint64_t age, bool *oom) {
+    uint64_t age, bool hugify_eager, bool *oom) {
 	/* Don't yet support big allocations; these should get filtered out. */
 	assert(size <= HUGEPAGE);
 	/*
@@ -120,7 +122,7 @@ hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
 			malloc_mutex_unlock(tsdn, &central->grow_mtx);
 			return NULL;
 		}
-		hpdata_init(ps, central->eden, age);
+		hpdata_init(ps, central->eden, age, hugify_eager);
 		central->eden = NULL;
 		central->eden_len = 0;
 		malloc_mutex_unlock(tsdn, &central->grow_mtx);
@@ -133,22 +135,20 @@ hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
 	 * allocate an edata_t for the new psset.
 	 */
 	if (central->eden == NULL) {
-		/*
-		 * During development, we're primarily concerned with systems
-		 * with overcommit.  Eventually, we should be more careful here.
-		 */
-		bool commit = true;
 		/* Allocate address space, bailing if we fail. */
-		void *new_eden = pages_map(
-		    NULL, HPA_EDEN_SIZE, HUGEPAGE, &commit);
+		void *new_eden = central->hooks.map(HPA_EDEN_SIZE);
 		if (new_eden == NULL) {
 			*oom = true;
 			malloc_mutex_unlock(tsdn, &central->grow_mtx);
 			return NULL;
 		}
+		if (hugify_eager) {
+			central->hooks.hugify(
+			    new_eden, HPA_EDEN_SIZE, /* sync */ false);
+		}
 		ps = hpa_alloc_ps(tsdn, central);
 		if (ps == NULL) {
-			pages_unmap(new_eden, HPA_EDEN_SIZE);
+			central->hooks.unmap(new_eden, HPA_EDEN_SIZE);
 			*oom = true;
 			malloc_mutex_unlock(tsdn, &central->grow_mtx);
 			return NULL;
@@ -170,7 +170,7 @@ hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
 	assert(central->eden_len % HUGEPAGE == 0);
 	assert(HUGEPAGE_ADDR2BASE(central->eden) == central->eden);
 
-	hpdata_init(ps, central->eden, age);
+	hpdata_init(ps, central->eden, age, hugify_eager);
 
 	char *eden_char = (char *)central->eden;
 	eden_char += HUGEPAGE;
@@ -213,6 +213,7 @@ hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
 
 	shard->npending_purge = 0;
 	nstime_init_zero(&shard->last_purge);
+	nstime_init_zero(&shard->last_time_work_attempted);
 
 	shard->stats.npurge_passes = 0;
 	shard->stats.npurges = 0;
@@ -274,6 +275,34 @@ hpa_shard_stats_merge(
 	malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 }
 
+static bool
+hpa_is_hugify_eager(hpa_shard_t *shard) {
+	return shard->opts.hugify_style == hpa_hugify_style_eager;
+}
+
+static bool
+hpa_is_hugify_lazy(hpa_shard_t *shard) {
+	/* When hugify_sync==true we also set/unset HG bit manually */
+	return shard->opts.hugify_style == hpa_hugify_style_lazy
+	    || shard->opts.hugify_sync;
+}
+
+static bool
+hpa_is_hugify_none(hpa_shard_t *shard) {
+	return shard->opts.hugify_style == hpa_hugify_style_none;
+}
+
+/*
+ * Experimentation has shown that when we are purging only HUGEPAGE ranges and
+ * hugifying eagerly (or thp enabled=always) we get huge pages more often.  This
+ * helps us have more realistic accounting.
+ */
+static bool
+hpa_should_assume_huge(hpa_shard_t *shard, const hpdata_t *ps) {
+	return (hpa_is_hugify_eager(shard) || hpa_is_hugify_none(shard))
+	    && hpdata_purged_when_empty_and_huge_get(ps);
+}
+
 static bool
 hpa_good_hugification_candidate(hpa_shard_t *shard, hpdata_t *ps) {
 	/*
@@ -285,6 +314,20 @@ hpa_good_hugification_candidate(hpa_shard_t *shard, hpdata_t *ps) {
 	    >= shard->opts.hugification_threshold;
 }
 
+static bool
+hpa_good_purge_candidate(hpa_shard_t *shard, hpdata_t *ps) {
+	if (shard->opts.dirty_mult == (fxp_t)-1) {
+		/* No purging. */
+		return false;
+	}
+	size_t ndirty = hpdata_ndirty_get(ps);
+	/* Empty pages are good candidate for purging. */
+	if (ndirty > 0 && hpdata_empty(ps)) {
+		return true;
+	}
+	return ndirty * PAGE >= shard->opts.purge_threshold;
+}
+
 static size_t
 hpa_adjusted_ndirty(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
@@ -316,6 +359,14 @@ hpa_hugify_blocked_by_ndirty(tsdn_t *tsdn, hpa_shard_t *shard) {
 static bool
 hpa_should_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	/*
+	 * The page that is purgable may be delayed, but we just want to know
+	 * if there is a need for bg thread to wake up in the future.
+	 */
+	hpdata_t *ps = psset_pick_purge(&shard->psset, NULL);
+	if (ps == NULL) {
+		return false;
+	}
 	if (hpa_adjusted_ndirty(tsdn, shard) > hpa_ndirty_max(tsdn, shard)) {
 		return true;
 	}
@@ -325,6 +376,20 @@ hpa_should_purge(tsdn_t *tsdn, hpa_shard_t *shard) {
 	return false;
 }
 
+static void
+hpa_assume_huge(tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+
+	assert(hpa_should_assume_huge(shard, ps));
+	if (hpdata_huge_get(ps) || hpdata_empty(ps)) {
+		return;
+	}
+
+	if (hpdata_ntouched_get(ps) != HUGEPAGE_PAGES) {
+		hpdata_hugify(ps);
+	}
+}
+
 static void
 hpa_update_purge_hugify_eligibility(
     tsdn_t *tsdn, hpa_shard_t *shard, hpdata_t *ps) {
@@ -356,13 +421,28 @@ hpa_update_purge_hugify_eligibility(
 	 * allocator's end at all; we just try to pack allocations in a
 	 * hugepage-friendly manner and let the OS hugify in the background.
 	 */
-	hpdata_purge_allowed_set(ps, hpdata_ndirty_get(ps) > 0);
-	if (hpa_good_hugification_candidate(shard, ps)
+	if (hpa_should_assume_huge(shard, ps)) {
+		/* Assume it is huge without the need to madvise */
+		hpa_assume_huge(tsdn, shard, ps);
+	}
+	if (hpa_is_hugify_lazy(shard)
+	    && hpa_good_hugification_candidate(shard, ps)
 	    && !hpdata_huge_get(ps)) {
 		nstime_t now;
 		shard->central->hooks.curtime(&now, /* first_reading */ true);
 		hpdata_allow_hugify(ps, now);
 	}
+	bool purgable = hpa_good_purge_candidate(shard, ps);
+	if (purgable && !hpdata_purge_allowed_get(ps)
+	    && (shard->opts.min_purge_delay_ms > 0)) {
+		nstime_t now;
+		uint64_t delayns = shard->opts.min_purge_delay_ms * 1000 * 1000;
+		shard->central->hooks.curtime(&now, /* first_reading */ true);
+		nstime_iadd(&now, delayns);
+		hpdata_time_purge_allowed_set(ps, &now);
+	}
+	hpdata_purge_allowed_set(ps, purgable);
+
 	/*
 	 * Once a hugepage has become eligible for hugification, we don't mark
 	 * it as ineligible just because it stops meeting the criteria (this
@@ -375,7 +455,7 @@ hpa_update_purge_hugify_eligibility(
 	 * empty; it definitely doesn't help there until the hugepage gets
 	 * reused, which is likely not for a while.
 	 */
-	if (hpdata_nactive_get(ps) == 0) {
+	if (hpdata_nactive_get(ps) == 0 && !hpa_should_assume_huge(shard, ps)) {
 		hpdata_disallow_hugify(ps);
 	}
 }
@@ -394,8 +474,7 @@ hpa_shard_has_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
  * This value protects two things:
  *    1. Stack size
  *    2. Number of huge pages that are being purged in a batch as
- *       we do not allow allocations while making *madvise
- *       syscall.
+ *       we do not allow allocations while making madvise syscall.
  */
 #define HPA_PURGE_BATCH_MAX_DEFAULT 16
 
@@ -433,18 +512,16 @@ hpa_purge_actual_unlocked(
 	hpa_range_accum_init(&accum, vec, len);
 
 	for (size_t i = 0; i < batch_sz; ++i) {
-		hpdata_t *to_purge = batch[i].hp;
-
 		/* Actually do the purging, now that the lock is dropped. */
 		if (batch[i].dehugify) {
 			shard->central->hooks.dehugify(
-			    hpdata_addr_get(to_purge), HUGEPAGE);
+			    hpdata_addr_get(batch[i].hp), HUGEPAGE);
 		}
 		void  *purge_addr;
 		size_t purge_size;
 		size_t total_purged_on_one_hp = 0;
 		while (hpdata_purge_next(
-		    to_purge, &batch[i].state, &purge_addr, &purge_size)) {
+		    batch[i].hp, &batch[i].state, &purge_addr, &purge_size)) {
 			total_purged_on_one_hp += purge_size;
 			assert(total_purged_on_one_hp <= HUGEPAGE);
 			hpa_range_accum_add(
@@ -454,14 +531,23 @@ hpa_purge_actual_unlocked(
 	hpa_range_accum_finish(&accum, shard);
 }
 
-/* Prepare purge of one page. Return num of dirty regular pages on it
+static inline bool
+hpa_needs_dehugify(hpa_shard_t *shard, const hpdata_t *ps) {
+	return hpa_is_hugify_lazy(shard) && hpdata_huge_get(ps)
+	    && !hpdata_empty(ps);
+}
+
+/* Prepare purge of one page. Return number of dirty regular pages on it
  * Return 0 if no purgable huge page is found
  *
  * If there was a page to purge its purge state is initialized
  */
 static inline size_t
-hpa_purge_start_hp(hpa_purge_batch_t *b, psset_t *psset) {
-	hpdata_t *to_purge = psset_pick_purge(psset);
+hpa_purge_start_hp(hpa_purge_batch_t *b, hpa_shard_t *shard) {
+	psset_t  *psset = &shard->psset;
+	hpdata_t *to_purge = (shard->opts.min_purge_delay_ms > 0)
+	    ? psset_pick_purge(psset, &shard->last_time_work_attempted)
+	    : psset_pick_purge(psset, NULL);
 	if (to_purge == NULL) {
 		return 0;
 	}
@@ -493,7 +579,9 @@ hpa_purge_start_hp(hpa_purge_batch_t *b, psset_t *psset) {
 	b->item_cnt++;
 	hp_item->hp = to_purge;
 	/* Gather all the metadata we'll need during the purge. */
-	hp_item->dehugify = hpdata_huge_get(hp_item->hp);
+	hp_item->dehugify = hpa_needs_dehugify(shard, hp_item->hp);
+	hpdata_purged_when_empty_and_huge_set(hp_item->hp,
+	    hpdata_huge_get(hp_item->hp) && hpdata_empty(hp_item->hp));
 	size_t nranges;
 	size_t ndirty = hpdata_purge_begin(
 	    hp_item->hp, &hp_item->state, &nranges);
@@ -513,7 +601,11 @@ hpa_purge_finish_hp(
 	}
 	/* The hpdata updates. */
 	psset_update_begin(&shard->psset, hp_item->hp);
-	if (hp_item->dehugify) {
+	if (hpdata_huge_get(hp_item->hp)) {
+		/*
+		 * Even when dehugify is not explicitly called, the page is
+		 * assumed to be non-huge after purge.
+		 */
 		hpdata_dehugify(hp_item->hp);
 	}
 	hpdata_purge_end(hp_item->hp, &hp_item->state);
@@ -569,8 +661,7 @@ hpa_purge(tsdn_t *tsdn, hpa_shard_t *shard, size_t max_hp) {
 		assert(hpa_batch_empty(&batch));
 		while (
 		    !hpa_batch_full(&batch) && hpa_should_purge(tsdn, shard)) {
-			size_t ndirty = hpa_purge_start_hp(
-			    &batch, &shard->psset);
+			size_t ndirty = hpa_purge_start_hp(&batch, shard);
 			if (ndirty == 0) {
 				break;
 			}
@@ -633,25 +724,33 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 	hpdata_disallow_hugify(to_hugify);
 	assert(hpdata_alloc_allowed_get(to_hugify));
 	psset_update_end(&shard->psset, to_hugify);
-
-	malloc_mutex_unlock(tsdn, &shard->mtx);
-
-	bool err = shard->central->hooks.hugify(
-	    hpdata_addr_get(to_hugify), HUGEPAGE, shard->opts.hugify_sync);
-
-	malloc_mutex_lock(tsdn, &shard->mtx);
-	shard->stats.nhugifies++;
-	if (err) {
-		/*
-		 * When asynchronous hugification is used
-		 * (shard->opts.hugify_sync option is false), we are not
-		 * expecting to get here, unless something went terrible wrong.
-		 * Because underlying syscall is only setting kernel flag for
-		 * memory range (actual hugification happens asynchronously
-		 * and we are not getting any feedback about its outcome), we
-		 * expect syscall to be successful all the time.
-		 */
-		shard->stats.nhugify_failures++;
+	/*
+	 * Without lazy hugification, user relies on eagerly setting HG bit, or
+	 * leaving everything up to the kernel (ex: thp enabled=always).  We
+	 * will still pretend that call succeeds to keep our accounting close to
+	 * what user believes is the truth on the target system, but we won't
+	 * update nhugifies stat as system call is not being made.
+	 */
+	if (hpa_is_hugify_lazy(shard)) {
+		malloc_mutex_unlock(tsdn, &shard->mtx);
+		bool err = shard->central->hooks.hugify(
+		    hpdata_addr_get(to_hugify), HUGEPAGE,
+		    shard->opts.hugify_sync);
+		malloc_mutex_lock(tsdn, &shard->mtx);
+		shard->stats.nhugifies++;
+		if (err) {
+			/*
+			 * When asynchronous hugification is used
+			 * (shard->opts.hugify_sync option is false), we are not
+			 * expecting to get here, unless something went terrible
+			 * wrong. Because underlying syscall is only setting
+			 * kernel flag for memory range (actual hugification
+			 * happens asynchronously and we are not getting any
+			 * feedback about its outcome), we expect syscall to be
+			 * successful all the time.
+			 */
+			shard->stats.nhugify_failures++;
+		}
 	}
 
 	psset_update_begin(&shard->psset, to_hugify);
@@ -666,11 +765,18 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 static bool
 hpa_min_purge_interval_passed(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_assert_owner(tsdn, &shard->mtx);
-	uint64_t since_last_purge_ms = shard->central->hooks.ms_since(
-	    &shard->last_purge);
+	uint64_t since_last_purge_ms = nstime_ms_between(
+	    &shard->last_purge, &shard->last_time_work_attempted);
 	return since_last_purge_ms >= shard->opts.min_purge_interval_ms;
 }
 
+static inline void
+hpa_update_time_work_attempted(tsdn_t *tsdn, hpa_shard_t *shard) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+	shard->central->hooks.curtime(&shard->last_time_work_attempted,
+	    /* first_reading */ false);
+}
+
 /*
  * Execution of deferred work is forced if it's triggered by an explicit
  * hpa_shard_do_deferred_work() call.
@@ -682,6 +788,7 @@ hpa_shard_maybe_do_deferred_work(
 	if (!forced && shard->opts.deferral_allowed) {
 		return;
 	}
+	hpa_update_time_work_attempted(tsdn, shard);
 
 	/*
 	 * If we're on a background thread, do work so long as there's work to
@@ -753,8 +860,8 @@ hpa_try_alloc_one_no_grow(
 		 * If the pageslab used to be empty, treat it as though it's
 		 * brand new for fragmentation-avoidance purposes; what we're
 		 * trying to approximate is the age of the allocations *in* that
-		 * pageslab, and the allocations in the new pageslab are
-		 * definitionally the youngest in this hpa shard.
+		 * pageslab, and the allocations in the new pageslab are by
+		 * definition the youngest in this hpa shard.
 		 */
 		hpdata_age_set(ps, shard->age_counter++);
 	}
@@ -861,8 +968,8 @@ hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 	 * deallocations (and allocations of smaller sizes) may still succeed
 	 * while we're doing this potentially expensive system call.
 	 */
-	hpdata_t *ps = hpa_central_extract(
-	    tsdn, shard->central, size, shard->age_counter++, &oom);
+	hpdata_t *ps = hpa_central_extract(tsdn, shard->central, size,
+	    shard->age_counter++, hpa_is_hugify_eager(shard), &oom);
 	if (ps == NULL) {
 		malloc_mutex_unlock(tsdn, &shard->grow_mtx);
 		return nsuccess;
diff --git a/src/hpa_hooks.c b/src/hpa_hooks.c
index 14005ae0..2ec7029d 100644
--- a/src/hpa_hooks.c
+++ b/src/hpa_hooks.c
@@ -19,7 +19,13 @@ const hpa_hooks_t hpa_hooks_default = {&hpa_hooks_map, &hpa_hooks_unmap,
 
 static void *
 hpa_hooks_map(size_t size) {
+	/*
+	 * During development, we're primarily concerned with systems
+	 * that overcommit.  Eventually, we should be more careful here.
+	 */
+
 	bool commit = true;
+	assert((size & HUGEPAGE_MASK) == 0);
 	void *ret = pages_map(NULL, size, HUGEPAGE, &commit);
 	JE_USDT(hpa_map, 2, size, ret);
 	return ret;
diff --git a/src/hpdata.c b/src/hpdata.c
index f9c8f4fa..e17d9ecf 100644
--- a/src/hpdata.c
+++ b/src/hpdata.c
@@ -17,11 +17,10 @@ hpdata_age_comp(const hpdata_t *a, const hpdata_t *b) {
 
 ph_gen(, hpdata_age_heap, hpdata_t, age_link, hpdata_age_comp)
 
-void
-hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
+    void hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age, bool is_huge) {
 	hpdata_addr_set(hpdata, addr);
 	hpdata_age_set(hpdata, age);
-	hpdata->h_huge = false;
+	hpdata->h_huge = is_huge;
 	hpdata->h_alloc_allowed = true;
 	hpdata->h_in_psset_alloc_container = false;
 	hpdata->h_purge_allowed = false;
@@ -34,8 +33,16 @@ hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) {
 	hpdata_longest_free_range_set(hpdata, HUGEPAGE_PAGES);
 	hpdata->h_nactive = 0;
 	fb_init(hpdata->active_pages, HUGEPAGE_PAGES);
-	hpdata->h_ntouched = 0;
-	fb_init(hpdata->touched_pages, HUGEPAGE_PAGES);
+	if (is_huge) {
+		fb_set_range(
+		    hpdata->touched_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES);
+		hpdata->h_ntouched = HUGEPAGE_PAGES;
+	} else {
+		fb_init(hpdata->touched_pages, HUGEPAGE_PAGES);
+		hpdata->h_ntouched = 0;
+	}
+	nstime_init_zero(&hpdata->h_time_purge_allowed);
+	hpdata->h_purged_when_empty_and_huge = false;
 
 	hpdata_assert_consistent(hpdata);
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index a3f01b3c..72216508 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1619,6 +1619,50 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    opt_hpa_opts.experimental_max_purge_nhp,
 			    "experimental_hpa_max_purge_nhp", -1, SSIZE_MAX);
 
+			/*
+			 * Accept either a ratio-based or an exact purge
+			 * threshold.
+			 */
+			CONF_HANDLE_SIZE_T(opt_hpa_opts.purge_threshold,
+			    "hpa_purge_threshold", PAGE, HUGEPAGE,
+			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
+			if (CONF_MATCH("hpa_purge_threshold_ratio")) {
+				fxp_t ratio;
+				char *end;
+				bool  err = fxp_parse(&ratio, v, &end);
+				if (err || (size_t)(end - v) != vlen
+				    || ratio > FXP_INIT_INT(1)) {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				} else {
+					opt_hpa_opts.purge_threshold =
+					    fxp_mul_frac(HUGEPAGE, ratio);
+				}
+				CONF_CONTINUE;
+			}
+
+			CONF_HANDLE_UINT64_T(opt_hpa_opts.min_purge_delay_ms,
+			    "hpa_min_purge_delay_ms", 0, UINT64_MAX,
+			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false);
+
+			if (strncmp("hpa_hugify_style", k, klen) == 0) {
+				bool match = false;
+				for (int m = 0; m < hpa_hugify_style_limit; m++) {
+					if (strncmp(hpa_hugify_style_names[m],
+					        v, vlen)
+					    == 0) {
+						opt_hpa_opts.hugify_style = m;
+						match = true;
+						break;
+					}
+				}
+				if (!match) {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				}
+				CONF_CONTINUE;
+			}
+
 			if (CONF_MATCH("hpa_dirty_mult")) {
 				if (CONF_MATCH_VALUE("-1")) {
 					opt_hpa_opts.dirty_mult = (fxp_t)-1;
diff --git a/src/nstime.c b/src/nstime.c
index ee2ddc51..0dfbeda1 100644
--- a/src/nstime.c
+++ b/src/nstime.c
@@ -160,6 +160,19 @@ nstime_divide(const nstime_t *time, const nstime_t *divisor) {
 	return time->ns / divisor->ns;
 }
 
+uint64_t
+nstime_ns_between(const nstime_t *earlier, const nstime_t *later) {
+	nstime_assert_initialized(earlier);
+	nstime_assert_initialized(later);
+	assert(nstime_compare(later, earlier) >= 0);
+	return later->ns - earlier->ns;
+}
+
+uint64_t
+nstime_ms_between(const nstime_t *earlier, const nstime_t *later) {
+	return nstime_ns_between(earlier, later) / MILLION;
+}
+
 /* Returns time since *past in nanoseconds, w/o updating *past. */
 uint64_t
 nstime_ns_since(const nstime_t *past) {
@@ -168,9 +181,7 @@ nstime_ns_since(const nstime_t *past) {
 	nstime_t now;
 	nstime_copy(&now, past);
 	nstime_update(&now);
-
-	assert(nstime_compare(&now, past) >= 0);
-	return now.ns - past->ns;
+	return nstime_ns_between(past, &now);
 }
 
 /* Returns time since *past in milliseconds, w/o updating *past. */
diff --git a/src/pages.c b/src/pages.c
index bc1093a3..000b87fe 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -833,9 +833,19 @@ init_thp_state(void) {
 	} else {
 		goto label_error;
 	}
+	if (opt_hpa_opts.hugify_style == hpa_hugify_style_auto) {
+		if (init_system_thp_mode == thp_mode_default) {
+			opt_hpa_opts.hugify_style = hpa_hugify_style_lazy;
+		} else {
+			opt_hpa_opts.hugify_style = hpa_hugify_style_none;
+		}
+	}
 	return;
 #elif defined(JEMALLOC_HAVE_MEMCNTL)
 	init_system_thp_mode = thp_mode_default;
+	if (opt_hpa_opts.hugify_style == hpa_hugify_style_auto) {
+		opt_hpa_opts.hugify_style = hpa_hugify_style_eager;
+	}
 	return;
 #endif
 label_error:
diff --git a/src/psset.c b/src/psset.c
index 509df064..a8a9615d 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -390,17 +390,37 @@ psset_pick_alloc(psset_t *psset, size_t size) {
 }
 
 hpdata_t *
-psset_pick_purge(psset_t *psset) {
-	ssize_t ind_ssz = fb_fls(
-	    psset->purge_bitmap, PSSET_NPURGE_LISTS, PSSET_NPURGE_LISTS - 1);
-	if (ind_ssz < 0) {
-		return NULL;
+psset_pick_purge(psset_t *psset, const nstime_t *now) {
+	size_t max_bit = PSSET_NPURGE_LISTS - 1;
+	while (1) {
+		ssize_t ind_ssz = fb_fls(
+		    psset->purge_bitmap, PSSET_NPURGE_LISTS, max_bit);
+		if (ind_ssz < 0) {
+			break;
+		}
+		pszind_t ind = (pszind_t)ind_ssz;
+		assert(ind < PSSET_NPURGE_LISTS);
+		hpdata_t *ps = hpdata_purge_list_first(&psset->to_purge[ind]);
+		assert(ps != NULL);
+		if (now == NULL) {
+			return ps;
+		}
+		/*
+		 * We only check the first page (it had least recent hpa_alloc
+		 * or hpa_dalloc). It is possible that some page in the list
+		 * would meet the time, but we only guarantee the min delay. If
+		 * we want to get the one that changed the state to purgable
+		 * the earliest, we would change the list into a heap ordered by
+		 * time.  We will use benchmark to make a decision.
+		 */
+		const nstime_t *tm_allowed = hpdata_time_purge_allowed_get(ps);
+		if (nstime_compare(tm_allowed, now) <= 0) {
+			return ps;
+		}
+		max_bit--;
 	}
-	pszind_t ind = (pszind_t)ind_ssz;
-	assert(ind < PSSET_NPURGE_LISTS);
-	hpdata_t *ps = hpdata_purge_list_first(&psset->to_purge[ind]);
-	assert(ps != NULL);
-	return ps;
+	/* No page is ready yet */
+	return NULL;
 }
 
 hpdata_t *
diff --git a/src/stats.c b/src/stats.c
index a8a574ac..ea7a4e2e 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1618,6 +1618,9 @@ stats_general_print(emitter_t *emitter) {
 			    "opt.hpa_dirty_mult", emitter_type_string, &bufp);
 		}
 	}
+	OPT_WRITE_SIZE_T("hpa_purge_threshold")
+	OPT_WRITE_UINT64("hpa_min_purge_delay_ms")
+	OPT_WRITE_CHAR_P("hpa_hugify_style")
 	OPT_WRITE_SIZE_T("hpa_sec_nshards")
 	OPT_WRITE_SIZE_T("hpa_sec_max_alloc")
 	OPT_WRITE_SIZE_T("hpa_sec_max_bytes")
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 1fed8a80..df2c9d96 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -37,7 +37,13 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
     /* min_purge_interval_ms */
     5 * 1000,
     /* experimental_max_purge_nhp */
-    -1};
+    -1,
+    /* purge_threshold */
+    1,
+    /* min_purge_delay_ms */
+    0,
+    /* hugify_style */
+    hpa_hugify_style_lazy};
 
 static hpa_shard_opts_t test_hpa_shard_opts_purge = {
     /* slab_max_alloc */
@@ -55,7 +61,37 @@ static hpa_shard_opts_t test_hpa_shard_opts_purge = {
     /* min_purge_interval_ms */
     5 * 1000,
     /* experimental_max_purge_nhp */
-    -1};
+    -1,
+    /* purge_threshold */
+    1,
+    /* min_purge_delay_ms */
+    0,
+    /* hugify_style */
+    hpa_hugify_style_lazy};
+
+static hpa_shard_opts_t test_hpa_shard_opts_aggressive = {
+    /* slab_max_alloc */
+    HUGEPAGE,
+    /* hugification_threshold */
+    0.9 * HUGEPAGE,
+    /* dirty_mult */
+    FXP_INIT_PERCENT(11),
+    /* deferral_allowed */
+    true,
+    /* hugify_delay_ms */
+    0,
+    /* hugify_sync */
+    false,
+    /* min_purge_interval_ms */
+    5,
+    /* experimental_max_purge_nhp */
+    -1,
+    /* purge_threshold */
+    HUGEPAGE - 5 * PAGE,
+    /* min_purge_delay_ms */
+    10,
+    /* hugify_style */
+    hpa_hugify_style_eager};
 
 static hpa_shard_t *
 create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
@@ -365,10 +401,11 @@ defer_test_unmap(void *ptr, size_t size) {
 }
 
 static size_t ndefer_purge_calls = 0;
+static size_t npurge_size = 0;
 static void
 defer_test_purge(void *ptr, size_t size) {
 	(void)ptr;
-	(void)size;
+	npurge_size = size;
 	++ndefer_purge_calls;
 }
 
@@ -783,6 +820,625 @@ TEST_BEGIN(test_vectorized_opt_eq_zero) {
 }
 TEST_END
 
+TEST_BEGIN(test_starts_huge) {
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0)
+	    || !config_stats);
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_aggressive;
+	opts.deferral_allowed = true;
+	opts.min_purge_delay_ms = 10;
+	opts.min_purge_interval_ms = 0;
+
+	defer_vectorized_purge_called = false;
+	ndefer_purge_calls = 0;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+	bool         deferred_work_generated = false;
+	nstime_init2(&defer_curtime, 100, 0);
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum { NALLOCS = 2 * HUGEPAGE_PAGES };
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	/* Deallocate 75%  */
+	int pages_to_deallocate = (int)(0.75 * NALLOCS);
+	for (int i = 0; i < pages_to_deallocate; i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+
+	/*
+	 * While there is enough to purge as we have one empty page and that
+	 * one meets the threshold,  we need to respect the delay, so no purging
+	 * should happen yet.
+	 */
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(0, ndefer_purge_calls, "Purged too early, delay==10ms");
+
+	nstime_iadd(&defer_curtime, opts.min_purge_delay_ms * 1000 * 1000);
+	/* Now, enough time has passed, so we expect to purge */
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(1, ndefer_purge_calls, "Expected purge");
+
+	/*
+	 * We purged one hugepage, so we expect to have one non-full page and it
+	 * should have half of the other dirty.
+	 */
+	psset_stats_t *stat = &shard->psset.stats;
+	expect_zu_eq(
+	    stat->empty_slabs[1].npageslabs, 0, "Expected zero huge slabs");
+	expect_zu_eq(stat->empty_slabs[0].npageslabs, 1, "Expected 1 nh slab");
+	expect_zu_eq(stat->full_slabs[0].npageslabs, 0, "");
+	expect_zu_eq(stat->full_slabs[1].npageslabs, 0, "");
+	expect_zu_eq(
+	    stat->merged.ndirty, HUGEPAGE_PAGES / 2, "One HP half dirty");
+
+	/*
+	 * We now allocate one more PAGE than a half the hugepage because we
+	 * want to make sure that one more hugepage is needed.
+	 */
+	deferred_work_generated = false;
+	const size_t HALF = HUGEPAGE_PAGES / 2;
+	edatas[1] = pai_alloc(tsdn, &shard->pai, PAGE * (HALF + 1), PAGE, false,
+	    false, false, &deferred_work_generated);
+	expect_ptr_not_null(edatas[1], "Unexpected null edata");
+	expect_false(deferred_work_generated, "No page is purgable");
+
+	expect_zu_eq(stat->empty_slabs[1].npageslabs, 0, "");
+	expect_zu_eq(stat->empty_slabs[0].npageslabs, 0, "");
+	expect_zu_eq(stat->full_slabs[0].npageslabs, 0, "");
+	expect_zu_eq(stat->full_slabs[1].npageslabs, 0, "");
+
+	/*
+	 * We expect that all inactive bytes on the second page are counted as
+	 * dirty (this is because the page was huge and empty when we purged
+	 * it, thus, it is assumed to come back as huge, thus all the bytes are
+	 * counted as touched).
+	 */
+	expect_zu_eq(stat->merged.ndirty, 2 * HALF - 1,
+	    "2nd page is huge because it was empty and huge when purged");
+	expect_zu_eq(stat->merged.nactive, HALF + (HALF + 1), "1st + 2nd");
+
+	nstime_iadd(&defer_curtime, opts.min_purge_delay_ms * 1000 * 1000);
+	pai_dalloc(tsdn, &shard->pai, edatas[1], &deferred_work_generated);
+	expect_true(deferred_work_generated, "");
+	expect_zu_eq(stat->merged.ndirty, 3 * HALF, "1st + 2nd");
+
+	/*
+	 * Deallocate last allocation and confirm that page is empty again, and
+	 * once new minimum delay is reached, page should be purged.
+	 */
+	ndefer_purge_calls = 0;
+	nstime_iadd(&defer_curtime, opts.min_purge_delay_ms * 1000 * 1000);
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(1, ndefer_purge_calls, "");
+	expect_zu_eq(stat->merged.ndirty, HALF, "2nd cleared as it was empty");
+	ndefer_purge_calls = 0;
+
+	/* Deallocate all the rest, but leave only two active */
+	for (int i = pages_to_deallocate; i < NALLOCS - 2; ++i) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+
+	/*
+	 * With prior pai_dalloc our last page becomes purgable, however we
+	 * still want to respect the delay.  Thus, it is not time to purge yet.
+	 */
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_true(deferred_work_generated, "Above limit, but not time yet");
+	expect_zu_eq(0, ndefer_purge_calls, "");
+
+	/*
+	 * Finally, we move the time ahead, and we confirm that purge happens
+	 * and that we have exactly two active base pages and none dirty.
+	 */
+	nstime_iadd(&defer_curtime, opts.min_purge_delay_ms * 1000 * 1000);
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_true(deferred_work_generated, "Above limit, but not time yet");
+	expect_zu_eq(1, ndefer_purge_calls, "");
+	expect_zu_eq(stat->merged.ndirty, 0, "Purged all");
+	expect_zu_eq(stat->merged.nactive, 2, "1st only");
+
+	ndefer_purge_calls = 0;
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_start_huge_purge_empty_only) {
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0)
+	    || !config_stats);
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_aggressive;
+	opts.deferral_allowed = true;
+	opts.purge_threshold = HUGEPAGE;
+	opts.min_purge_delay_ms = 0;
+	opts.hugify_style = hpa_hugify_style_eager;
+	opts.min_purge_interval_ms = 0;
+
+	ndefer_purge_calls = 0;
+	npurge_size = 0;
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+	bool         deferred_work_generated = false;
+	nstime_init(&defer_curtime, 10 * 1000 * 1000);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum { NALLOCS = 2 * HUGEPAGE_PAGES };
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	/* Deallocate all from the first and one PAGE from the second HP. */
+	for (int i = 0; i < NALLOCS / 2 + 1; i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_true(deferred_work_generated, "");
+	expect_zu_eq(1, ndefer_purge_calls, "Should purge, delay==0ms");
+	expect_zu_eq(HUGEPAGE, npurge_size, "Purge whole folio");
+	expect_zu_eq(shard->psset.stats.merged.ndirty, 1, "");
+	expect_zu_eq(shard->psset.stats.merged.nactive, HUGEPAGE_PAGES - 1, "");
+
+	ndefer_purge_calls = 0;
+	npurge_size = 0;
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(0, ndefer_purge_calls, "Should not purge anything");
+
+	/* Allocate and free 2*PAGE so that it spills into second page again */
+	edatas[0] = pai_alloc(tsdn, &shard->pai, 2 * PAGE, PAGE, false, false,
+	    false, &deferred_work_generated);
+	pai_dalloc(tsdn, &shard->pai, edatas[0], &deferred_work_generated);
+	expect_true(deferred_work_generated, "");
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(1, ndefer_purge_calls, "Should purge, delay==0ms");
+	expect_zu_eq(HUGEPAGE, npurge_size, "Purge whole folio");
+
+	ndefer_purge_calls = 0;
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_assume_huge_purge_fully) {
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0)
+	    || !config_stats);
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_aggressive;
+	opts.deferral_allowed = true;
+	opts.purge_threshold = PAGE;
+	opts.hugification_threshold = HUGEPAGE;
+	opts.min_purge_delay_ms = 0;
+	opts.min_purge_interval_ms = 0;
+	opts.hugify_style = hpa_hugify_style_eager;
+	opts.dirty_mult = FXP_INIT_PERCENT(1);
+
+	ndefer_purge_calls = 0;
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+	bool         deferred_work_generated = false;
+	nstime_init(&defer_curtime, 10 * 1000 * 1000);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum { NALLOCS = HUGEPAGE_PAGES };
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	/* Deallocate all */
+	for (int i = 0; i < NALLOCS; i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_true(deferred_work_generated, "");
+	expect_zu_eq(1, ndefer_purge_calls, "Should purge, delay==0ms");
+
+	/* Stats should say no active */
+	expect_zu_eq(shard->psset.stats.merged.nactive, 0, "");
+	expect_zu_eq(
+	    shard->psset.stats.empty_slabs[0].npageslabs, 1, "Non huge");
+	npurge_size = 0;
+	edatas[0] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false, false,
+	    false, &deferred_work_generated);
+	expect_ptr_not_null(edatas[0], "Unexpected null edata");
+	expect_zu_eq(shard->psset.stats.merged.nactive, 1, "");
+	expect_zu_eq(shard->psset.stats.slabs[1].npageslabs, 1, "Huge nonfull");
+	pai_dalloc(tsdn, &shard->pai, edatas[0], &deferred_work_generated);
+	expect_true(deferred_work_generated, "");
+	ndefer_purge_calls = 0;
+	npurge_size = 0;
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(1, ndefer_purge_calls, "Should purge, delay==0ms");
+	expect_zu_eq(HUGEPAGE, npurge_size, "Should purge full folio");
+
+	/* Now allocate all, free 10%, alloc 5%, assert non-huge */
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	int ten_pct = NALLOCS / 10;
+	for (int i = 0; i < ten_pct; i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+	ndefer_purge_calls = 0;
+	npurge_size = 0;
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(1, ndefer_purge_calls, "Should purge, delay==0ms");
+	expect_zu_eq(
+	    ten_pct * PAGE, npurge_size, "Should purge 10 percent of pages");
+
+	for (int i = 0; i < ten_pct / 2; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	expect_zu_eq(
+	    shard->psset.stats.slabs[0].npageslabs, 1, "Nonhuge nonfull");
+	expect_zu_eq(shard->psset.stats.merged.ndirty, 0, "No dirty");
+
+	npurge_size = 0;
+	ndefer_purge_calls = 0;
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_eager_with_purge_threshold) {
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0));
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	const size_t     THRESHOLD = 10;
+	hpa_shard_opts_t opts = test_hpa_shard_opts_aggressive;
+	opts.deferral_allowed = true;
+	opts.purge_threshold = THRESHOLD * PAGE;
+	opts.min_purge_delay_ms = 0;
+	opts.hugify_style = hpa_hugify_style_eager;
+	opts.dirty_mult = FXP_INIT_PERCENT(0);
+
+	ndefer_purge_calls = 0;
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+	bool         deferred_work_generated = false;
+	nstime_init(&defer_curtime, 10 * 1000 * 1000);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum { NALLOCS = HUGEPAGE_PAGES };
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	/* Deallocate less then threshold PAGEs. */
+	for (size_t i = 0; i < THRESHOLD - 1; i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_false(deferred_work_generated, "No page is purgable");
+	expect_zu_eq(0, ndefer_purge_calls, "Should not purge yet");
+	/* Deallocate one more page to meet the threshold */
+	pai_dalloc(
+	    tsdn, &shard->pai, edatas[THRESHOLD - 1], &deferred_work_generated);
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(1, ndefer_purge_calls, "Should purge");
+	expect_zu_eq(shard->psset.stats.merged.ndirty, 0, "");
+
+	ndefer_purge_calls = 0;
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_delay_when_not_allowed_deferral) {
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0));
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	const uint64_t   DELAY_NS = 100 * 1000 * 1000;
+	hpa_shard_opts_t opts = test_hpa_shard_opts_aggressive;
+	opts.deferral_allowed = false;
+	opts.purge_threshold = HUGEPAGE - 2 * PAGE;
+	opts.min_purge_delay_ms = DELAY_NS / (1000 * 1000);
+	opts.hugify_style = hpa_hugify_style_lazy;
+	opts.min_purge_interval_ms = 0;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+	bool         deferred_work_generated = false;
+	nstime_init2(&defer_curtime, 100, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum { NALLOCS = HUGEPAGE_PAGES };
+	edata_t *edatas[NALLOCS];
+	ndefer_purge_calls = 0;
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	/* Deallocate all */
+	for (int i = 0; i < NALLOCS; i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+	/* curtime = 100.0s */
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_true(deferred_work_generated, "");
+	expect_zu_eq(0, ndefer_purge_calls, "Too early");
+
+	nstime_iadd(&defer_curtime, DELAY_NS - 1);
+	/* This activity will take the curtime=100.1 and reset purgability */
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	/* Dealloc all but 2 pages, purgable delay_ns later*/
+	for (int i = 0; i < NALLOCS - 2; i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+
+	nstime_iadd(&defer_curtime, DELAY_NS);
+	pai_dalloc(
+	    tsdn, &shard->pai, edatas[NALLOCS - 1], &deferred_work_generated);
+	expect_true(ndefer_purge_calls > 0, "Should have purged");
+
+	ndefer_purge_calls = 0;
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_deferred_until_time) {
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0));
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_aggressive;
+	opts.deferral_allowed = true;
+	opts.purge_threshold = PAGE;
+	opts.min_purge_delay_ms = 1000;
+	opts.hugification_threshold = HUGEPAGE / 2;
+	opts.dirty_mult = FXP_INIT_PERCENT(10);
+	opts.hugify_style = hpa_hugify_style_none;
+	opts.min_purge_interval_ms = 500;
+	opts.hugify_delay_ms = 3000;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+	bool         deferred_work_generated = false;
+	/* Current time = 10ms */
+	nstime_init(&defer_curtime, 10 * 1000 * 1000);
+
+	/* Allocate one huge page */
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum { NALLOCS = HUGEPAGE_PAGES };
+	edata_t *edatas[NALLOCS];
+	ndefer_purge_calls = 0;
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	/* Deallocate 25% */
+	for (int i = 0; i < NALLOCS / 4; i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+	expect_true(deferred_work_generated, "We should hugify and purge");
+
+	/* Current time = 300ms, purge_eligible at 300ms + 1000ms */
+	nstime_init(&defer_curtime, 300UL * 1000 * 1000);
+	for (int i = NALLOCS / 4; i < NALLOCS; i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+	expect_true(deferred_work_generated, "Purge work generated");
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(0, ndefer_purge_calls, "not time for purging yet");
+
+	/* Current time = 900ms, purge_eligible at 1300ms */
+	nstime_init(&defer_curtime, 900UL * 1000 * 1000);
+	uint64_t until_ns = pai_time_until_deferred_work(tsdn, &shard->pai);
+	expect_u64_eq(until_ns, BACKGROUND_THREAD_DEFERRED_MIN,
+	    "First pass did not happen");
+
+	/* Fake that first pass happened more than min_purge_interval_ago */
+	nstime_init(&shard->last_purge, 350UL * 1000 * 1000);
+	shard->stats.npurge_passes = 1;
+	until_ns = pai_time_until_deferred_work(tsdn, &shard->pai);
+	expect_u64_eq(until_ns, BACKGROUND_THREAD_DEFERRED_MIN,
+	    "No need to heck anything it is more than interval");
+
+	nstime_init(&shard->last_purge, 900UL * 1000 * 1000);
+	nstime_init(&defer_curtime, 1000UL * 1000 * 1000);
+	/* Next purge expected at 900ms + min_purge_interval = 1400ms */
+	uint64_t expected_ms = 1400 - 1000;
+	until_ns = pai_time_until_deferred_work(tsdn, &shard->pai);
+	expect_u64_eq(expected_ms, until_ns / (1000 * 1000), "Next in 400ms");
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_eager_no_hugify_on_threshold) {
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0)
+	    || !config_stats);
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_aggressive;
+	opts.deferral_allowed = true;
+	opts.purge_threshold = PAGE;
+	opts.min_purge_delay_ms = 0;
+	opts.hugification_threshold = HUGEPAGE * 0.9;
+	opts.dirty_mult = FXP_INIT_PERCENT(10);
+	opts.hugify_style = hpa_hugify_style_eager;
+	opts.min_purge_interval_ms = 0;
+	opts.hugify_delay_ms = 0;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+	bool         deferred_work_generated = false;
+	/* Current time = 10ms */
+	nstime_init(&defer_curtime, 10 * 1000 * 1000);
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	/* First allocation makes the page huge */
+	enum { NALLOCS = HUGEPAGE_PAGES };
+	edata_t *edatas[NALLOCS];
+	ndefer_purge_calls = 0;
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	ndefer_hugify_calls = 0;
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(ndefer_hugify_calls, 0, "No hugify needed - eager");
+	expect_zu_eq(shard->psset.stats.full_slabs[1].npageslabs, 1,
+	    "Page should be full-huge");
+
+	/* Deallocate 25% */
+	for (int i = 0; i < NALLOCS / 4; i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+	expect_true(deferred_work_generated, "purge is needed");
+	ndefer_purge_calls = 0;
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(ndefer_hugify_calls, 0, "No hugify needed - eager");
+	expect_zu_eq(ndefer_purge_calls, 1, "Purge should have happened");
+
+	/* Allocate 20% again, so that we are above hugification threshold */
+	ndefer_purge_calls = 0;
+	nstime_iadd(&defer_curtime, 800UL * 1000 * 1000);
+	for (int i = 0; i < NALLOCS / 4 - 1; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(0, ndefer_purge_calls, "no purging needed");
+	expect_zu_eq(ndefer_hugify_calls, 0, "no hugify - eager");
+	destroy_test_data(shard);
+}
+TEST_END
+
+TEST_BEGIN(test_hpa_hugify_style_none_huge_no_syscall) {
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0));
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_aggressive;
+	opts.deferral_allowed = true;
+	opts.purge_threshold = PAGE;
+	opts.min_purge_delay_ms = 0;
+	opts.hugification_threshold = HUGEPAGE * 0.25;
+	opts.dirty_mult = FXP_INIT_PERCENT(10);
+	opts.hugify_style = hpa_hugify_style_none;
+	opts.min_purge_interval_ms = 0;
+	opts.hugify_delay_ms = 0;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+	bool         deferred_work_generated = false;
+	/* Current time = 10ms */
+	nstime_init(&defer_curtime, 10 * 1000 * 1000);
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	/* First allocation makes the page huge */
+	enum { NALLOCS = HUGEPAGE_PAGES };
+	edata_t *edatas[NALLOCS];
+	ndefer_purge_calls = 0;
+	for (int i = 0; i < NALLOCS / 2; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	hpdata_t *ps = psset_pick_alloc(&shard->psset, PAGE);
+	expect_false(hpdata_huge_get(ps), "Page should be non-huge");
+
+	ndefer_hugify_calls = 0;
+	ndefer_purge_calls = 0;
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(ndefer_hugify_calls, 0, "Hugify none, no syscall");
+	ps = psset_pick_alloc(&shard->psset, PAGE);
+	expect_true(ps, "Page should be huge");
+
+	destroy_test_data(shard);
+}
+TEST_END
+
 int
 main(void) {
 	/*
@@ -801,5 +1457,10 @@ main(void) {
 	    test_alloc_dalloc_batch, test_defer_time,
 	    test_purge_no_infinite_loop, test_no_min_purge_interval,
 	    test_min_purge_interval, test_purge,
-	    test_experimental_max_purge_nhp, test_vectorized_opt_eq_zero);
+	    test_experimental_max_purge_nhp, test_vectorized_opt_eq_zero,
+	    test_starts_huge, test_start_huge_purge_empty_only,
+	    test_assume_huge_purge_fully, test_eager_with_purge_threshold,
+	    test_delay_when_not_allowed_deferral, test_deferred_until_time,
+	    test_eager_no_hugify_on_threshold,
+	    test_hpa_hugify_style_none_huge_no_syscall);
 }
diff --git a/test/unit/hpa_vectorized_madvise.c b/test/unit/hpa_vectorized_madvise.c
index 8df54d06..c66811e1 100644
--- a/test/unit/hpa_vectorized_madvise.c
+++ b/test/unit/hpa_vectorized_madvise.c
@@ -37,7 +37,13 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
     /* min_purge_interval_ms */
     5 * 1000,
     /* experimental_max_purge_nhp */
-    -1};
+    -1,
+    /* purge_threshold */
+    1,
+    /* purge_delay_ms */
+    0,
+    /* hugify_style */
+    hpa_hugify_style_lazy};
 
 static hpa_shard_t *
 create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
diff --git a/test/unit/hpa_vectorized_madvise_large_batch.c b/test/unit/hpa_vectorized_madvise_large_batch.c
index a5766620..8e7be7c0 100644
--- a/test/unit/hpa_vectorized_madvise_large_batch.c
+++ b/test/unit/hpa_vectorized_madvise_large_batch.c
@@ -37,7 +37,13 @@ static hpa_shard_opts_t test_hpa_shard_opts_default = {
     /* min_purge_interval_ms */
     5 * 1000,
     /* experimental_max_purge_nhp */
-    -1};
+    -1,
+    /* purge_threshold */
+    1,
+    /* min_purge_delay_ms */
+    0,
+    /* hugify_style */
+    hpa_hugify_style_lazy};
 
 static hpa_shard_t *
 create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
diff --git a/test/unit/hpdata.c b/test/unit/hpdata.c
index 2329f065..ac45d697 100644
--- a/test/unit/hpdata.c
+++ b/test/unit/hpdata.c
@@ -5,7 +5,7 @@
 
 TEST_BEGIN(test_reserve_alloc) {
 	hpdata_t hpdata;
-	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
+	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE, /* is_huge */ false);
 
 	/* Allocating a page at a time, we should do first fit. */
 	for (size_t i = 0; i < HUGEPAGE_PAGES; i++) {
@@ -57,7 +57,7 @@ TEST_END
 
 TEST_BEGIN(test_purge_simple) {
 	hpdata_t hpdata;
-	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
+	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE, /* is_huge */ false);
 
 	void *alloc = hpdata_reserve_alloc(&hpdata, HUGEPAGE_PAGES / 2 * PAGE);
 	expect_ptr_eq(alloc, HPDATA_ADDR, "");
@@ -101,7 +101,7 @@ TEST_END
  */
 TEST_BEGIN(test_purge_intervening_dalloc) {
 	hpdata_t hpdata;
-	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
+	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE, /* is_huge */ false);
 
 	/* Allocate the first 3/4 of the pages. */
 	void *alloc = hpdata_reserve_alloc(
@@ -164,7 +164,7 @@ TEST_BEGIN(test_purge_over_retained) {
 	size_t purge_size;
 
 	hpdata_t hpdata;
-	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
+	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE, /* is_huge */ false);
 
 	/* Allocate the first 3/4 of the pages. */
 	void *alloc = hpdata_reserve_alloc(
@@ -238,7 +238,7 @@ TEST_END
 
 TEST_BEGIN(test_hugify) {
 	hpdata_t hpdata;
-	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE);
+	hpdata_init(&hpdata, HPDATA_ADDR, HPDATA_AGE, /* is_huge */ false);
 
 	void *alloc = hpdata_reserve_alloc(&hpdata, HUGEPAGE / 2);
 	expect_ptr_eq(alloc, HPDATA_ADDR, "");
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index ac7506cf..d1974e0f 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -313,6 +313,9 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(size_t, hpa_sec_bytes_after_flush, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_batch_fill_extra, always);
 	TEST_MALLCTL_OPT(ssize_t, experimental_hpa_max_purge_nhp, always);
+	TEST_MALLCTL_OPT(size_t, hpa_purge_threshold, always);
+	TEST_MALLCTL_OPT(uint64_t, hpa_min_purge_delay_ms, always);
+	TEST_MALLCTL_OPT(const char *, hpa_hugify_style, always);
 	TEST_MALLCTL_OPT(unsigned, narenas, always);
 	TEST_MALLCTL_OPT(const char *, percpu_arena, always);
 	TEST_MALLCTL_OPT(size_t, oversize_threshold, always);
diff --git a/test/unit/psset.c b/test/unit/psset.c
index 73a9835a..3ce8e976 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -124,7 +124,8 @@ TEST_BEGIN(test_empty) {
 	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	bool     err;
 	hpdata_t pageslab;
-	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
+	bool     is_huge = false;
+	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
 
 	edata_t alloc;
 	edata_init_test(&alloc);
@@ -141,9 +142,10 @@ TEST_END
 TEST_BEGIN(test_fill) {
 	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	bool err;
+	bool is_huge = false;
 
 	hpdata_t pageslab;
-	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
+	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
 
 	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
@@ -179,7 +181,8 @@ TEST_BEGIN(test_reuse) {
 	hpdata_t *ps;
 
 	hpdata_t pageslab;
-	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
+	bool     is_huge = false;
+	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
 
 	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
@@ -274,7 +277,8 @@ TEST_BEGIN(test_evict) {
 	hpdata_t *ps;
 
 	hpdata_t pageslab;
-	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
+	bool     is_huge = false;
+	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
 
 	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
@@ -311,9 +315,10 @@ TEST_BEGIN(test_multi_pageslab) {
 	hpdata_t *ps;
 
 	hpdata_t pageslab[2];
-	hpdata_init(&pageslab[0], PAGESLAB_ADDR, PAGESLAB_AGE);
+	bool     is_huge = false;
+	hpdata_init(&pageslab[0], PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
 	hpdata_init(&pageslab[1], (void *)((uintptr_t)PAGESLAB_ADDR + HUGEPAGE),
-	    PAGESLAB_AGE + 1);
+	    PAGESLAB_AGE + 1, is_huge);
 
 	edata_t *alloc[2];
 	alloc[0] = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
@@ -376,7 +381,8 @@ TEST_END
 
 TEST_BEGIN(test_stats_merged) {
 	hpdata_t pageslab;
-	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
+	bool     is_huge = false;
+	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
 
 	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
@@ -442,7 +448,8 @@ TEST_BEGIN(test_stats_huge) {
 	test_skip_if(hpa_hugepage_size_exceeds_limit());
 
 	hpdata_t pageslab;
-	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
+	bool     is_huge = false;
+	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
 
 	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
@@ -570,7 +577,8 @@ TEST_BEGIN(test_stats_fullness) {
 	bool err;
 
 	hpdata_t pageslab;
-	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE);
+	bool     is_huge = false;
+	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
 
 	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
@@ -620,13 +628,15 @@ static void
 init_test_pageslabs(psset_t *psset, hpdata_t *pageslab,
     hpdata_t *worse_pageslab, edata_t *alloc, edata_t *worse_alloc) {
 	bool err;
+	bool is_huge = false;
 
-	hpdata_init(pageslab, (void *)(10 * HUGEPAGE), PAGESLAB_AGE);
+	hpdata_init(pageslab, (void *)(10 * HUGEPAGE), PAGESLAB_AGE, is_huge);
 	/*
 	 * This pageslab would be better from an address-first-fit POV, but
 	 * worse from an age POV.
 	 */
-	hpdata_init(worse_pageslab, (void *)(9 * HUGEPAGE), PAGESLAB_AGE + 1);
+	hpdata_init(
+	    worse_pageslab, (void *)(9 * HUGEPAGE), PAGESLAB_AGE + 1, is_huge);
 
 	psset_init(psset);
 
@@ -763,14 +773,15 @@ TEST_BEGIN(test_purge_prefers_nonhuge) {
 	hpdata_t  hpdata_nonhuge[NHP];
 	uintptr_t nonhuge_begin = (uintptr_t)&hpdata_nonhuge[0];
 	uintptr_t nonhuge_end = (uintptr_t)&hpdata_nonhuge[NHP];
+	bool      is_huge = false;
 
 	for (size_t i = 0; i < NHP; i++) {
-		hpdata_init(
-		    &hpdata_huge[i], (void *)((10 + i) * HUGEPAGE), 123 + i);
+		hpdata_init(&hpdata_huge[i], (void *)((10 + i) * HUGEPAGE),
+		    123 + i, is_huge);
 		psset_insert(&psset, &hpdata_huge[i]);
 
 		hpdata_init(&hpdata_nonhuge[i],
-		    (void *)((10 + NHP + i) * HUGEPAGE), 456 + i);
+		    (void *)((10 + NHP + i) * HUGEPAGE), 456 + i, is_huge);
 		psset_insert(&psset, &hpdata_nonhuge[i]);
 	}
 	for (int i = 0; i < 2 * NHP; i++) {
@@ -802,7 +813,7 @@ TEST_BEGIN(test_purge_prefers_nonhuge) {
 	 * further.
 	 */
 	for (int i = 0; i < NHP; i++) {
-		hpdata = psset_pick_purge(&psset);
+		hpdata = psset_pick_purge(&psset, NULL);
 		assert_true(nonhuge_begin <= (uintptr_t)hpdata
 		        && (uintptr_t)hpdata < nonhuge_end,
 		    "");
@@ -812,7 +823,7 @@ TEST_BEGIN(test_purge_prefers_nonhuge) {
 		psset_update_end(&psset, hpdata);
 	}
 	for (int i = 0; i < NHP; i++) {
-		hpdata = psset_pick_purge(&psset);
+		hpdata = psset_pick_purge(&psset, NULL);
 		expect_true(huge_begin <= (uintptr_t)hpdata
 		        && (uintptr_t)hpdata < huge_end,
 		    "");
@@ -825,6 +836,72 @@ TEST_BEGIN(test_purge_prefers_nonhuge) {
 }
 TEST_END
 
+TEST_BEGIN(test_purge_timing) {
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
+	void *ptr;
+
+	psset_t psset;
+	psset_init(&psset);
+
+	hpdata_t hpdata_empty_nh;
+	hpdata_t hpdata_empty_huge;
+	hpdata_t hpdata_nonempty;
+
+	nstime_t       basetime, now, empty_nh_tm, empty_huge_tm, nonempty_tm;
+	const uint64_t BASE_SEC = 100;
+	nstime_init2(&basetime, BASE_SEC, 0);
+
+	/* Create and add to psset */
+	hpdata_init(&hpdata_empty_nh, (void *)(9 * HUGEPAGE), 102, false);
+	psset_insert(&psset, &hpdata_empty_nh);
+	hpdata_init(&hpdata_empty_huge, (void *)(10 * HUGEPAGE), 123, true);
+	psset_insert(&psset, &hpdata_empty_huge);
+	hpdata_init(&hpdata_nonempty, (void *)(11 * HUGEPAGE), 456, false);
+	psset_insert(&psset, &hpdata_nonempty);
+
+	psset_update_begin(&psset, &hpdata_empty_nh);
+	ptr = hpdata_reserve_alloc(&hpdata_empty_nh, PAGE);
+	expect_ptr_eq(hpdata_addr_get(&hpdata_empty_nh), ptr, "");
+	hpdata_unreserve(&hpdata_empty_nh, ptr, PAGE);
+	hpdata_purge_allowed_set(&hpdata_empty_nh, true);
+	nstime_init2(&empty_nh_tm, BASE_SEC + 100, 0);
+	hpdata_time_purge_allowed_set(&hpdata_empty_nh, &empty_nh_tm);
+	psset_update_end(&psset, &hpdata_empty_nh);
+
+	psset_update_begin(&psset, &hpdata_empty_huge);
+	ptr = hpdata_reserve_alloc(&hpdata_empty_huge, PAGE);
+	expect_ptr_eq(hpdata_addr_get(&hpdata_empty_huge), ptr, "");
+	hpdata_unreserve(&hpdata_empty_huge, ptr, PAGE);
+	nstime_init2(&empty_huge_tm, BASE_SEC + 110, 0);
+	hpdata_time_purge_allowed_set(&hpdata_empty_huge, &empty_huge_tm);
+	hpdata_purge_allowed_set(&hpdata_empty_huge, true);
+	psset_update_end(&psset, &hpdata_empty_huge);
+
+	psset_update_begin(&psset, &hpdata_nonempty);
+	ptr = hpdata_reserve_alloc(&hpdata_nonempty, 10 * PAGE);
+	expect_ptr_eq(hpdata_addr_get(&hpdata_nonempty), ptr, "");
+	hpdata_unreserve(&hpdata_nonempty, ptr, 9 * PAGE);
+	hpdata_purge_allowed_set(&hpdata_nonempty, true);
+	nstime_init2(&nonempty_tm, BASE_SEC + 80, 0);
+	hpdata_time_purge_allowed_set(&hpdata_nonempty, &nonempty_tm);
+	psset_update_end(&psset, &hpdata_nonempty);
+
+	/* The best to purge with no time restriction is the huge one */
+	hpdata_t *ps = psset_pick_purge(&psset, NULL);
+	expect_ptr_eq(&hpdata_empty_huge, ps, "Without tick, pick huge");
+
+	/* However, only the one eligible for purging can be picked */
+	nstime_init2(&now, BASE_SEC + 90, 0);
+	ps = psset_pick_purge(&psset, &now);
+	expect_ptr_eq(&hpdata_nonempty, ps, "Only non empty purgable");
+
+	/* When all eligible, huge empty is the best */
+	nstime_init2(&now, BASE_SEC + 110, 0);
+	ps = psset_pick_purge(&psset, &now);
+	expect_ptr_eq(&hpdata_empty_huge, ps, "Huge empty is the best");
+}
+TEST_END
+
 TEST_BEGIN(test_purge_prefers_empty) {
 	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	void *ptr;
@@ -834,9 +911,10 @@ TEST_BEGIN(test_purge_prefers_empty) {
 
 	hpdata_t hpdata_empty;
 	hpdata_t hpdata_nonempty;
-	hpdata_init(&hpdata_empty, (void *)(10 * HUGEPAGE), 123);
+	bool     is_huge = false;
+	hpdata_init(&hpdata_empty, (void *)(10 * HUGEPAGE), 123, is_huge);
 	psset_insert(&psset, &hpdata_empty);
-	hpdata_init(&hpdata_nonempty, (void *)(11 * HUGEPAGE), 456);
+	hpdata_init(&hpdata_nonempty, (void *)(11 * HUGEPAGE), 456, is_huge);
 	psset_insert(&psset, &hpdata_nonempty);
 
 	psset_update_begin(&psset, &hpdata_empty);
@@ -857,7 +935,7 @@ TEST_BEGIN(test_purge_prefers_empty) {
 	 * The nonempty slab has 9 dirty pages, while the empty one has only 1.
 	 * We should still pick the empty one for purging.
 	 */
-	hpdata_t *to_purge = psset_pick_purge(&psset);
+	hpdata_t *to_purge = psset_pick_purge(&psset, NULL);
 	expect_ptr_eq(&hpdata_empty, to_purge, "");
 }
 TEST_END
@@ -876,13 +954,16 @@ TEST_BEGIN(test_purge_prefers_empty_huge) {
 
 	uintptr_t cur_addr = 100 * HUGEPAGE;
 	uint64_t  cur_age = 123;
+	bool      is_huge = false;
 	for (int i = 0; i < NHP; i++) {
-		hpdata_init(&hpdata_huge[i], (void *)cur_addr, cur_age);
+		hpdata_init(
+		    &hpdata_huge[i], (void *)cur_addr, cur_age, is_huge);
 		cur_addr += HUGEPAGE;
 		cur_age++;
 		psset_insert(&psset, &hpdata_huge[i]);
 
-		hpdata_init(&hpdata_nonhuge[i], (void *)cur_addr, cur_age);
+		hpdata_init(
+		    &hpdata_nonhuge[i], (void *)cur_addr, cur_age, is_huge);
 		cur_addr += HUGEPAGE;
 		cur_age++;
 		psset_insert(&psset, &hpdata_nonhuge[i]);
@@ -917,14 +998,14 @@ TEST_BEGIN(test_purge_prefers_empty_huge) {
 	 * any of the non-huge ones for purging.
 	 */
 	for (int i = 0; i < NHP; i++) {
-		hpdata_t *to_purge = psset_pick_purge(&psset);
+		hpdata_t *to_purge = psset_pick_purge(&psset, NULL);
 		expect_ptr_eq(&hpdata_huge[i], to_purge, "");
 		psset_update_begin(&psset, to_purge);
 		hpdata_purge_allowed_set(to_purge, false);
 		psset_update_end(&psset, to_purge);
 	}
 	for (int i = 0; i < NHP; i++) {
-		hpdata_t *to_purge = psset_pick_purge(&psset);
+		hpdata_t *to_purge = psset_pick_purge(&psset, NULL);
 		expect_ptr_eq(&hpdata_nonhuge[i], to_purge, "");
 		psset_update_begin(&psset, to_purge);
 		hpdata_purge_allowed_set(to_purge, false);
@@ -938,6 +1019,6 @@ main(void) {
 	return test_no_reentrancy(test_empty, test_fill, test_reuse, test_evict,
 	    test_multi_pageslab, test_stats_merged, test_stats_huge,
 	    test_stats_fullness, test_oldest_fit, test_insert_remove,
-	    test_purge_prefers_nonhuge, test_purge_prefers_empty,
-	    test_purge_prefers_empty_huge);
+	    test_purge_prefers_nonhuge, test_purge_timing,
+	    test_purge_prefers_empty, test_purge_prefers_empty_huge);
 }

From 707aab0c955e97abed6bd0780eb47cd38e7b1843 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Tue, 16 Sep 2025 13:25:42 -0700
Subject: [PATCH 2550/2608] [pa-bench] Add clock to pa benchmark

---
 test/stress/pa/pa_data_preprocessor.cpp | 20 ++++++-----
 test/stress/pa/pa_microbench.c          | 45 ++++++++++++++++++++-----
 2 files changed, 47 insertions(+), 18 deletions(-)

diff --git a/test/stress/pa/pa_data_preprocessor.cpp b/test/stress/pa/pa_data_preprocessor.cpp
index 757f37bb..44e84e8c 100644
--- a/test/stress/pa/pa_data_preprocessor.cpp
+++ b/test/stress/pa/pa_data_preprocessor.cpp
@@ -16,13 +16,14 @@
  *   HPA: shard_ind_int,addr_int,nsecs_int,probe,size_int
  *   SEC: process_id,thread_id,thread_name,nsecs_int,_c4,sec_ptr_int,sec_shard_ptr_int,edata_ptr_int,size_int,is_frequent_reuse_int
  *
- * Output format (4 columns):
- *   shard_ind_int,operation_index,size_or_alloc_index,is_frequent
+ * Output format (5 columns):
+ *   shard_ind_int,operation_index,size_or_alloc_index,nsecs,is_frequent
  *   where:
  *   - shard_ind_int: shard index as integer
  *   - operation_index: 0=alloc, 1=dalloc
  *   - size_or_alloc_index: for alloc operations show bytes,
  *                          for dalloc operations show index of corresponding alloc
+ *   - nsecs: nanonosec of some monotonic clock
  *   - is_frequent: 1 if frequent reuse allocation, 0 otherwise
  */
 
@@ -250,14 +251,14 @@ parse_sec_line(
 
 void
 write_output_header(std::ofstream &output) {
-	output << "shard_ind,operation,size_or_alloc_index,is_frequent\n";
+	output << "shard_ind,operation,size_or_alloc_index,nsecs,is_frequent\n";
 }
 
 void
 write_output_event(std::ofstream &output, int shard_ind, int operation,
-    size_t value, bool is_frequent) {
-	output << shard_ind << "," << operation << "," << value << ","
-	       << (is_frequent ? 1 : 0) << "\n";
+    size_t value, uint64_t nsecs, bool is_frequent) {
+	output << shard_ind << "," << operation << "," << value << "," << nsecs
+	       << "," << (is_frequent ? 1 : 0) << "\n";
 }
 
 size_t
@@ -319,7 +320,7 @@ process_trace_file(const std::string &input_filename,
 		if (is_alloc_operation(event.probe)) {
 			/* This is an allocation */
 			write_output_event(output, event.shard_ind, 0,
-			    event.size, event.is_frequent);
+			    event.size, event.nsecs, event.is_frequent);
 
 			/* Track this allocation with the current sequence number */
 			tracker.add_allocation(event.addr, event.size,
@@ -335,7 +336,8 @@ process_trace_file(const std::string &input_filename,
 				assert(event.nsecs >= record->nsecs);
 				/* Found matching allocation with valid timing */
 				write_output_event(output, event.shard_ind, 1,
-				    record->alloc_index, event.is_frequent);
+				    record->alloc_index, event.nsecs,
+				    event.is_frequent);
 				tracker.remove_allocation(event.addr);
 				output_count++; /* Count this deallocation */
 			} else {
@@ -390,7 +392,7 @@ main(int argc, char *argv[]) {
 		    << "  output_file     - Output file for simulator with format:"
 		    << std::endl;
 		std::cerr
-		    << "                    shard_ind,operation,size_or_alloc_index,is_frequent"
+		    << "                    shard_ind,operation,size_or_alloc_index,nsecs,is_frequent"
 		    << std::endl;
 		std::cerr << std::endl;
 		std::cerr << "Output format:" << std::endl;
diff --git a/test/stress/pa/pa_microbench.c b/test/stress/pa/pa_microbench.c
index 4ad3652d..c4706b04 100644
--- a/test/stress/pa/pa_microbench.c
+++ b/test/stress/pa/pa_microbench.c
@@ -32,10 +32,11 @@
 typedef enum { PA_ALLOC = 0, PA_DALLOC = 1 } pa_op_t;
 
 typedef struct {
-	int     shard_ind;
-	pa_op_t operation;
-	size_t  size_or_alloc_index;
-	int     is_frequent;
+	int      shard_ind;
+	pa_op_t  operation;
+	size_t   size_or_alloc_index;
+	uint64_t nsecs;
+	int      is_frequent;
 } pa_event_t;
 
 typedef struct {
@@ -73,6 +74,29 @@ static shard_infrastructure_t *g_shard_infra =
     NULL;                         /* Per-shard PA infrastructure */
 static pa_central_t g_pa_central; /* Global PA central */
 
+/* Override for curtime */
+static hpa_hooks_t hpa_hooks_override;
+static nstime_t    cur_time_clock;
+
+void
+curtime(nstime_t *r_time, bool first_reading) {
+	if (first_reading) {
+		nstime_init_zero(r_time);
+	}
+	*r_time = cur_time_clock;
+}
+
+static void
+set_clock(uint64_t nsecs) {
+	nstime_init(&cur_time_clock, nsecs);
+}
+
+static void
+init_hpa_hooks() {
+	hpa_hooks_override = hpa_hooks_default;
+	hpa_hooks_override.curtime = curtime;
+}
+
 static void cleanup_pa_infrastructure(int num_shards);
 
 static bool
@@ -125,8 +149,9 @@ initialize_pa_infrastructure(int num_shards) {
 	}
 
 	/* Initialize PA central with HPA enabled */
+	init_hpa_hooks();
 	if (pa_central_init(&g_pa_central, central_base, true /* hpa */,
-	        &hpa_hooks_default)) {
+	        &hpa_hooks_override)) {
 		printf("DEBUG: Failed to initialize PA central\n");
 		base_delete(tsd_tsdn(tsd_fetch()), central_base);
 		free(g_shard_stats);
@@ -237,14 +262,15 @@ static bool
 parse_csv_line(const char *line, pa_event_t *event) {
 	/* Expected format: shard_ind,operation,size_or_alloc_index,is_frequent */
 	int operation;
-	int fields = sscanf(line, "%d,%d,%zu,%d", &event->shard_ind, &operation,
-	    &event->size_or_alloc_index, &event->is_frequent);
+	int fields = sscanf(line, "%d,%d,%zu,%lu,%d", &event->shard_ind,
+	    &operation, &event->size_or_alloc_index, &event->nsecs,
+	    &event->is_frequent);
 
-	if (fields < 3) { /* is_frequent is optional */
+	if (fields < 4) { /* is_frequent is optional */
 		return false;
 	}
 
-	if (fields == 3) {
+	if (fields == 4) {
 		event->is_frequent = 0; /* Default value */
 	}
 
@@ -393,6 +419,7 @@ simulate_trace(
 			continue;
 		}
 
+		set_clock(event->nsecs);
 		switch (event->operation) {
 		case PA_ALLOC: {
 			size_t size = event->size_or_alloc_index;

From 7c40be249cc204b2698d7f97ec5ac1de5551a3cc Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Tue, 16 Sep 2025 16:50:11 -0700
Subject: [PATCH 2551/2608] Add npurges and npurge_passes to output of
 pa_benchmark

---
 test/stress/pa/pa_microbench.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/test/stress/pa/pa_microbench.c b/test/stress/pa/pa_microbench.c
index c4706b04..3e7d8aef 100644
--- a/test/stress/pa/pa_microbench.c
+++ b/test/stress/pa/pa_microbench.c
@@ -382,11 +382,17 @@ print_shard_stats(int shard_id, size_t operation_count) {
 	}
 
 	/* Dirty bytes */
-	size_t dirty_bytes = psset_stats->merged.ndirty * PAGE;
+	size_t   dirty_bytes = psset_stats->merged.ndirty * PAGE;
+	uint64_t npurge_passes = hpa_stats.nonderived_stats.npurge_passes;
+	uint64_t npurges = hpa_stats.nonderived_stats.npurges;
 
+	assert(g_use_sec
+	    || psset_stats->merged.nactive * PAGE
+	        == g_shard_stats[shard_id].bytes_allocated);
 	/* Output enhanced stats with detailed breakdown */
 	fprintf(g_stats_output,
-	    "%zu,%d,%lu,%lu,%lu,%zu,%zu,%zu,%zu,%zu,%zu,%zu,%zu,%zu,%lu,%lu,%lu\n",
+	    "%zu,%d,%lu,%lu,%lu,%zu,%zu,%zu,%zu,%zu,%zu,%zu,%zu,%zu,%lu,%lu,%lu"
+	    ",%lu,%lu\n",
 	    operation_count, shard_id, g_shard_stats[shard_id].alloc_count,
 	    g_shard_stats[shard_id].dealloc_count,
 	    g_shard_stats[shard_id].bytes_allocated, total_pageslabs,
@@ -395,7 +401,7 @@ print_shard_stats(int shard_id, size_t operation_count) {
 	    empty_pageslabs_non_huge, empty_pageslabs_huge, dirty_bytes,
 	    hpa_stats.nonderived_stats.nhugifies,
 	    hpa_stats.nonderived_stats.nhugify_failures,
-	    hpa_stats.nonderived_stats.ndehugifies);
+	    hpa_stats.nonderived_stats.ndehugifies, npurge_passes, npurges);
 	fflush(g_stats_output);
 }
 
@@ -629,7 +635,8 @@ main(int argc, char *argv[]) {
 		    "total_pageslabs,full_pageslabs_total,empty_pageslabs_total,hugified_pageslabs,"
 		    "full_pageslabs_non_huge,full_pageslabs_huge,"
 		    "empty_pageslabs_non_huge,empty_pageslabs_huge,"
-		    "dirty_bytes,nhugifies,nhugify_failures,ndehugifies\n");
+		    "dirty_bytes,nhugifies,nhugify_failures,ndehugifies,"
+		    "npurge_passes,npurges\n");
 	}
 
 	/* Load trace data and determine max number of arenas */

From 5e49c28ef042d7c1f446ec6615d6d84bafabb3fd Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Mon, 6 Oct 2025 12:01:13 -0700
Subject: [PATCH 2552/2608] [EASY] Spelling in the comments

---
 include/jemalloc/internal/edata.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/jemalloc/internal/edata.h b/include/jemalloc/internal/edata.h
index 2b229e7d..06b6c545 100644
--- a/include/jemalloc/internal/edata.h
+++ b/include/jemalloc/internal/edata.h
@@ -23,7 +23,7 @@
 
 /*
  * Defines how many nodes visited when enumerating the heap to search for
- * qualifed extents.  More nodes visited may result in better choices at
+ * qualified extents.  More nodes visited may result in better choices at
  * the cost of longer search time.  This size should not exceed 2^16 - 1
  * because we use uint16_t for accessing the queue needed for enumeration.
  */
@@ -230,7 +230,7 @@ struct edata_s {
 
 	/*
 	 * If this edata is a user allocation from an HPA, it comes out of some
-	 * pageslab (we don't yet support huegpage allocations that don't fit
+	 * pageslab (we don't yet support hugepage allocations that don't fit
 	 * into pageslabs).  This tracks it.
 	 */
 	hpdata_t *e_ps;

From f714cd9249eb1df010b035623ebca89b7614b1cc Mon Sep 17 00:00:00 2001
From: Carl Shapiro <cshapiro@meta.com>
Date: Mon, 6 Oct 2025 15:45:38 -0700
Subject: [PATCH 2553/2608] Inline the value of an always false boolean local
 variable

Next to its use, which is always as an argument, we include the name
of the parameter in a constant.  This completes a partially
implemented cleanup suggested in an earlier commit.
---
 test/unit/psset.c | 64 +++++++++++++++++++++++------------------------
 1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/test/unit/psset.c b/test/unit/psset.c
index 3ce8e976..12d55941 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -124,8 +124,8 @@ TEST_BEGIN(test_empty) {
 	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	bool     err;
 	hpdata_t pageslab;
-	bool     is_huge = false;
-	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
+	hpdata_init(
+	    &pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, /* is_huge */ false);
 
 	edata_t alloc;
 	edata_init_test(&alloc);
@@ -142,10 +142,10 @@ TEST_END
 TEST_BEGIN(test_fill) {
 	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	bool err;
-	bool is_huge = false;
 
 	hpdata_t pageslab;
-	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
+	hpdata_init(
+	    &pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, /* is_huge */ false);
 
 	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
@@ -181,8 +181,8 @@ TEST_BEGIN(test_reuse) {
 	hpdata_t *ps;
 
 	hpdata_t pageslab;
-	bool     is_huge = false;
-	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
+	hpdata_init(
+	    &pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, /* is_huge */ false);
 
 	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
@@ -277,8 +277,8 @@ TEST_BEGIN(test_evict) {
 	hpdata_t *ps;
 
 	hpdata_t pageslab;
-	bool     is_huge = false;
-	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
+	hpdata_init(
+	    &pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, /* is_huge */ false);
 
 	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
@@ -315,10 +315,10 @@ TEST_BEGIN(test_multi_pageslab) {
 	hpdata_t *ps;
 
 	hpdata_t pageslab[2];
-	bool     is_huge = false;
-	hpdata_init(&pageslab[0], PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
+	hpdata_init(
+	    &pageslab[0], PAGESLAB_ADDR, PAGESLAB_AGE, /* is_huge */ false);
 	hpdata_init(&pageslab[1], (void *)((uintptr_t)PAGESLAB_ADDR + HUGEPAGE),
-	    PAGESLAB_AGE + 1, is_huge);
+	    PAGESLAB_AGE + 1, /* is_huge */ false);
 
 	edata_t *alloc[2];
 	alloc[0] = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
@@ -381,8 +381,8 @@ TEST_END
 
 TEST_BEGIN(test_stats_merged) {
 	hpdata_t pageslab;
-	bool     is_huge = false;
-	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
+	hpdata_init(
+	    &pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, /* is_huge */ false);
 
 	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
@@ -448,8 +448,8 @@ TEST_BEGIN(test_stats_huge) {
 	test_skip_if(hpa_hugepage_size_exceeds_limit());
 
 	hpdata_t pageslab;
-	bool     is_huge = false;
-	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
+	hpdata_init(
+	    &pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, /* is_huge */ false);
 
 	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
@@ -577,8 +577,8 @@ TEST_BEGIN(test_stats_fullness) {
 	bool err;
 
 	hpdata_t pageslab;
-	bool     is_huge = false;
-	hpdata_init(&pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, is_huge);
+	hpdata_init(
+	    &pageslab, PAGESLAB_ADDR, PAGESLAB_AGE, /* is_huge */ false);
 
 	edata_t *alloc = (edata_t *)malloc(sizeof(edata_t) * HUGEPAGE_PAGES);
 
@@ -628,15 +628,15 @@ static void
 init_test_pageslabs(psset_t *psset, hpdata_t *pageslab,
     hpdata_t *worse_pageslab, edata_t *alloc, edata_t *worse_alloc) {
 	bool err;
-	bool is_huge = false;
 
-	hpdata_init(pageslab, (void *)(10 * HUGEPAGE), PAGESLAB_AGE, is_huge);
+	hpdata_init(pageslab, (void *)(10 * HUGEPAGE), PAGESLAB_AGE,
+	    /* is_huge */ false);
 	/*
 	 * This pageslab would be better from an address-first-fit POV, but
 	 * worse from an age POV.
 	 */
-	hpdata_init(
-	    worse_pageslab, (void *)(9 * HUGEPAGE), PAGESLAB_AGE + 1, is_huge);
+	hpdata_init(worse_pageslab, (void *)(9 * HUGEPAGE), PAGESLAB_AGE + 1,
+	    /* is_huge */ false);
 
 	psset_init(psset);
 
@@ -773,15 +773,15 @@ TEST_BEGIN(test_purge_prefers_nonhuge) {
 	hpdata_t  hpdata_nonhuge[NHP];
 	uintptr_t nonhuge_begin = (uintptr_t)&hpdata_nonhuge[0];
 	uintptr_t nonhuge_end = (uintptr_t)&hpdata_nonhuge[NHP];
-	bool      is_huge = false;
 
 	for (size_t i = 0; i < NHP; i++) {
 		hpdata_init(&hpdata_huge[i], (void *)((10 + i) * HUGEPAGE),
-		    123 + i, is_huge);
+		    123 + i, /* is_huge */ false);
 		psset_insert(&psset, &hpdata_huge[i]);
 
 		hpdata_init(&hpdata_nonhuge[i],
-		    (void *)((10 + NHP + i) * HUGEPAGE), 456 + i, is_huge);
+		    (void *)((10 + NHP + i) * HUGEPAGE), 456 + i,
+		    /* is_huge */ false);
 		psset_insert(&psset, &hpdata_nonhuge[i]);
 	}
 	for (int i = 0; i < 2 * NHP; i++) {
@@ -911,10 +911,11 @@ TEST_BEGIN(test_purge_prefers_empty) {
 
 	hpdata_t hpdata_empty;
 	hpdata_t hpdata_nonempty;
-	bool     is_huge = false;
-	hpdata_init(&hpdata_empty, (void *)(10 * HUGEPAGE), 123, is_huge);
+	hpdata_init(
+	    &hpdata_empty, (void *)(10 * HUGEPAGE), 123, /* is_huge */ false);
 	psset_insert(&psset, &hpdata_empty);
-	hpdata_init(&hpdata_nonempty, (void *)(11 * HUGEPAGE), 456, is_huge);
+	hpdata_init(&hpdata_nonempty, (void *)(11 * HUGEPAGE), 456,
+	    /* is_huge */ false);
 	psset_insert(&psset, &hpdata_nonempty);
 
 	psset_update_begin(&psset, &hpdata_empty);
@@ -954,16 +955,15 @@ TEST_BEGIN(test_purge_prefers_empty_huge) {
 
 	uintptr_t cur_addr = 100 * HUGEPAGE;
 	uint64_t  cur_age = 123;
-	bool      is_huge = false;
 	for (int i = 0; i < NHP; i++) {
-		hpdata_init(
-		    &hpdata_huge[i], (void *)cur_addr, cur_age, is_huge);
+		hpdata_init(&hpdata_huge[i], (void *)cur_addr, cur_age,
+		    /* is_huge */ false);
 		cur_addr += HUGEPAGE;
 		cur_age++;
 		psset_insert(&psset, &hpdata_huge[i]);
 
-		hpdata_init(
-		    &hpdata_nonhuge[i], (void *)cur_addr, cur_age, is_huge);
+		hpdata_init(&hpdata_nonhuge[i], (void *)cur_addr, cur_age,
+		    /* is_huge */ false);
 		cur_addr += HUGEPAGE;
 		cur_age++;
 		psset_insert(&psset, &hpdata_nonhuge[i]);

From 87555dfbb22efb0c4bcfc59be0b7ccad19725edf Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Tue, 7 Oct 2025 16:42:15 -0700
Subject: [PATCH 2554/2608] Do not release the hpa_shard->mtx when inserting
 newly retrieved page from central before allocating from it

---
 src/hpa.c | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/src/hpa.c b/src/hpa.c
index 27db53a9..a7875e89 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -840,6 +840,8 @@ hpa_shard_maybe_do_deferred_work(
 static edata_t *
 hpa_try_alloc_one_no_grow(
     tsdn_t *tsdn, hpa_shard_t *shard, size_t size, bool *oom) {
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
+
 	bool     err;
 	edata_t *edata = edata_cache_fast_get(tsdn, &shard->ecf);
 	if (edata == NULL) {
@@ -912,10 +914,10 @@ hpa_try_alloc_one_no_grow(
 }
 
 static size_t
-hpa_try_alloc_batch_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
-    bool *oom, size_t nallocs, edata_list_active_t *results,
+hpa_try_alloc_batch_no_grow_locked(tsdn_t *tsdn, hpa_shard_t *shard,
+    size_t size, bool *oom, size_t nallocs, edata_list_active_t *results,
     bool *deferred_work_generated) {
-	malloc_mutex_lock(tsdn, &shard->mtx);
+	malloc_mutex_assert_owner(tsdn, &shard->mtx);
 	size_t nsuccess = 0;
 	for (; nsuccess < nallocs; nsuccess++) {
 		edata_t *edata = hpa_try_alloc_one_no_grow(
@@ -928,6 +930,16 @@ hpa_try_alloc_batch_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 
 	hpa_shard_maybe_do_deferred_work(tsdn, shard, /* forced */ false);
 	*deferred_work_generated = hpa_shard_has_deferred_work(tsdn, shard);
+	return nsuccess;
+}
+
+static size_t
+hpa_try_alloc_batch_no_grow(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
+    bool *oom, size_t nallocs, edata_list_active_t *results,
+    bool *deferred_work_generated) {
+	malloc_mutex_lock(tsdn, &shard->mtx);
+	size_t nsuccess = hpa_try_alloc_batch_no_grow_locked(
+	    tsdn, shard, size, oom, nallocs, results, deferred_work_generated);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 	return nsuccess;
 }
@@ -976,17 +988,16 @@ hpa_alloc_batch_psset(tsdn_t *tsdn, hpa_shard_t *shard, size_t size,
 	}
 
 	/*
-	 * We got the pageslab; allocate from it.  This does an unlock followed
-	 * by a lock on the same mutex, and holds the grow mutex while doing
-	 * deferred work, but this is an uncommon path; the simplicity is worth
-	 * it.
+	 * We got the pageslab; allocate from it.  This holds the grow mutex
+	 * while doing deferred work, but this is an uncommon path; the
+	 * simplicity is worth it.
 	 */
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	psset_insert(&shard->psset, ps);
+	nsuccess += hpa_try_alloc_batch_no_grow_locked(tsdn, shard, size, &oom,
+	    nallocs - nsuccess, results, deferred_work_generated);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 
-	nsuccess += hpa_try_alloc_batch_no_grow(tsdn, shard, size, &oom,
-	    nallocs - nsuccess, results, deferred_work_generated);
 	/*
 	 * Drop grow_mtx before doing deferred work; other threads blocked on it
 	 * should be allowed to proceed while we're working.

From 2cfa41913e71b0ff24788812f61d5485f04b647d Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Tue, 7 Oct 2025 16:18:07 -0700
Subject: [PATCH 2555/2608] Refactor init_system_thp_mode and print it in
 malloc stats.

---
 include/jemalloc/internal/pages.h | 18 +++++++++++---
 src/arena.c                       |  4 +--
 src/base.c                        |  4 +--
 src/pages.c                       | 41 ++++++++++++++++++++-----------
 src/stats.c                       | 16 ++++++++++++
 test/unit/pages.c                 |  2 +-
 6 files changed, 63 insertions(+), 22 deletions(-)

diff --git a/include/jemalloc/internal/pages.h b/include/jemalloc/internal/pages.h
index 31909934..a4282c9b 100644
--- a/include/jemalloc/internal/pages.h
+++ b/include/jemalloc/internal/pages.h
@@ -97,8 +97,12 @@ static const bool pages_can_hugify =
 #endif
     ;
 
+/*
+ * thp_mode_t are values for opt.thp, while system_thp_mode_t is for kernel thp
+ * settings, i.e., init_system_thp_mode.
+ */
 typedef enum {
-	thp_mode_default = 0, /* Do not change hugepage settings. */
+	thp_mode_do_nothing = 0, /* Respect kernel thp settings. */
 	thp_mode_always = 1,  /* Always set MADV_HUGEPAGE. */
 	thp_mode_never = 2,   /* Always set MADV_NOHUGEPAGE. */
 
@@ -106,10 +110,18 @@ typedef enum {
 	thp_mode_not_supported = 3 /* No THP support detected. */
 } thp_mode_t;
 
-#define THP_MODE_DEFAULT thp_mode_default
+typedef enum {
+	system_thp_mode_madvise = 0,     /* Kernel THP mode: madvise */
+	system_thp_mode_always = 1,      /* Kernel THP mode: always */
+	system_thp_mode_never = 2,       /* Kernel THP mode: never */
+	system_thp_mode_not_supported = 3 /* No THP support detected. */
+} system_thp_mode_t;
+
+#define THP_MODE_DEFAULT thp_mode_do_nothing
 extern thp_mode_t        opt_thp;
-extern thp_mode_t        init_system_thp_mode; /* Initial system wide state. */
+extern system_thp_mode_t init_system_thp_mode; /* Initial system wide state. */
 extern const char *const thp_mode_names[];
+extern const char *const system_thp_mode_names[];
 
 void *pages_map(void *addr, size_t size, size_t alignment, bool *commit);
 void  pages_unmap(void *addr, size_t size);
diff --git a/src/arena.c b/src/arena.c
index 962a325d..224a9b63 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1890,8 +1890,8 @@ arena_init_huge(tsdn_t *tsdn, arena_t *a0) {
 		/* Make sure that b0 thp auto-switch won't happen concurrently here. */
 		malloc_mutex_lock(tsdn, &b0->mtx);
 		(&huge_arena_pac_thp)->thp_madvise = opt_huge_arena_pac_thp
-		    && metadata_thp_enabled() && (opt_thp == thp_mode_default)
-		    && (init_system_thp_mode == thp_mode_default);
+		    && metadata_thp_enabled() && (opt_thp == thp_mode_do_nothing)
+		    && (init_system_thp_mode == system_thp_mode_madvise);
 		(&huge_arena_pac_thp)->auto_thp_switched =
 		    b0->auto_thp_switched;
 		malloc_mutex_init(&(&huge_arena_pac_thp)->lock, "pac_thp",
diff --git a/src/base.c b/src/base.c
index c494556c..ef7f0dd4 100644
--- a/src/base.c
+++ b/src/base.c
@@ -28,8 +28,8 @@ const char *const metadata_thp_mode_names[] = {"disabled", "auto", "always"};
 
 static inline bool
 metadata_thp_madvise(void) {
-	return (metadata_thp_enabled()
-	    && (init_system_thp_mode == thp_mode_default));
+	return (metadata_thp_enabled() &&
+	    (init_system_thp_mode == system_thp_mode_madvise));
 }
 
 static void *
diff --git a/src/pages.c b/src/pages.c
index 000b87fe..e7766fcc 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -44,8 +44,10 @@ static bool os_overcommits;
 
 const char *const thp_mode_names[] = {
     "default", "always", "never", "not supported"};
-thp_mode_t opt_thp = THP_MODE_DEFAULT;
-thp_mode_t init_system_thp_mode;
+const char *const system_thp_mode_names[] = {
+    "madvise", "always", "never", "not supported"};
+thp_mode_t        opt_thp = THP_MODE_DEFAULT;
+system_thp_mode_t init_system_thp_mode;
 
 /* Runtime support for lazy purge. Irrelevant when !pages_can_purge_lazy. */
 static bool pages_can_purge_lazy_runtime = true;
@@ -778,21 +780,31 @@ os_overcommits_proc(void) {
 }
 #endif
 
+static bool
+pages_should_skip_set_thp_state() {
+	if (opt_thp == thp_mode_do_nothing
+	    || (opt_thp == thp_mode_always
+	        && init_system_thp_mode == system_thp_mode_always)
+	    || (opt_thp == thp_mode_never
+	        && init_system_thp_mode == system_thp_mode_never)) {
+		return true;
+	}
+	return false;
+}
 void
 pages_set_thp_state(void *ptr, size_t size) {
-	if (opt_thp == thp_mode_default || opt_thp == init_system_thp_mode) {
+	if (pages_should_skip_set_thp_state()) {
 		return;
 	}
 	assert(opt_thp != thp_mode_not_supported
-	    && init_system_thp_mode != thp_mode_not_supported);
+	    && init_system_thp_mode != system_thp_mode_not_supported);
 
 	if (opt_thp == thp_mode_always
-	    && init_system_thp_mode != thp_mode_never) {
-		assert(init_system_thp_mode == thp_mode_default);
+	    && init_system_thp_mode == system_thp_mode_madvise) {
 		pages_huge_unaligned(ptr, size);
 	} else if (opt_thp == thp_mode_never) {
-		assert(init_system_thp_mode == thp_mode_default
-		    || init_system_thp_mode == thp_mode_always);
+		assert(init_system_thp_mode == system_thp_mode_madvise
+		    || init_system_thp_mode == system_thp_mode_always);
 		pages_nohuge_unaligned(ptr, size);
 	}
 }
@@ -825,16 +837,16 @@ init_thp_state(void) {
 	}
 
 	if (strncmp(buf, sys_state_madvise, (size_t)nread) == 0) {
-		init_system_thp_mode = thp_mode_default;
+		init_system_thp_mode = system_thp_mode_madvise;
 	} else if (strncmp(buf, sys_state_always, (size_t)nread) == 0) {
-		init_system_thp_mode = thp_mode_always;
+		init_system_thp_mode = system_thp_mode_always;
 	} else if (strncmp(buf, sys_state_never, (size_t)nread) == 0) {
-		init_system_thp_mode = thp_mode_never;
+		init_system_thp_mode = system_thp_mode_never;
 	} else {
 		goto label_error;
 	}
 	if (opt_hpa_opts.hugify_style == hpa_hugify_style_auto) {
-		if (init_system_thp_mode == thp_mode_default) {
+		if (init_system_thp_mode == system_thp_mode_madvise) {
 			opt_hpa_opts.hugify_style = hpa_hugify_style_lazy;
 		} else {
 			opt_hpa_opts.hugify_style = hpa_hugify_style_none;
@@ -842,14 +854,15 @@ init_thp_state(void) {
 	}
 	return;
 #elif defined(JEMALLOC_HAVE_MEMCNTL)
-	init_system_thp_mode = thp_mode_default;
+	init_system_thp_mode = system_thp_mode_madvise;
 	if (opt_hpa_opts.hugify_style == hpa_hugify_style_auto) {
 		opt_hpa_opts.hugify_style = hpa_hugify_style_eager;
 	}
 	return;
 #endif
 label_error:
-	opt_thp = init_system_thp_mode = thp_mode_not_supported;
+	opt_thp = thp_mode_not_supported;
+	init_system_thp_mode = system_thp_mode_not_supported;
 }
 
 bool
diff --git a/src/stats.c b/src/stats.c
index ea7a4e2e..366f96f7 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1516,6 +1516,22 @@ stats_general_print(emitter_t *emitter) {
 #undef CONFIG_WRITE_BOOL
 	emitter_dict_end(emitter); /* Close "config" dict. */
 
+	/* system. */
+	emitter_dict_begin(emitter, "system", "System configuration");
+
+	/*
+	 * This shows system's THP mode detected at jemalloc's init time.
+	 * jemalloc does not re-detect the mode even if it changes after
+	 * jemalloc's init.  It is assumed that system's THP mode is stable
+	 * during the process's lifetime and a violation could lead to
+	 * undefined behavior.
+	*/
+	const char *thp_mode_name = system_thp_mode_names[init_system_thp_mode];
+	emitter_kv(emitter, "thp_mode", "system.thp_mode", emitter_type_string,
+	    &thp_mode_name);
+
+	emitter_dict_end(emitter); /* Close "system". */
+
 	/* opt. */
 #define OPT_WRITE(name, var, size, emitter_type)                               \
 	if (je_mallctl("opt." name, (void *)&var, &size, NULL, 0) == 0) {      \
diff --git a/test/unit/pages.c b/test/unit/pages.c
index dbee2f0c..66afb84b 100644
--- a/test/unit/pages.c
+++ b/test/unit/pages.c
@@ -10,7 +10,7 @@ TEST_BEGIN(test_pages_huge) {
 	pages = pages_map(NULL, alloc_size, PAGE, &commit);
 	expect_ptr_not_null(pages, "Unexpected pages_map() error");
 
-	if (init_system_thp_mode == thp_mode_default) {
+	if (init_system_thp_mode == system_thp_mode_madvise) {
 		hugepage = (void *)(ALIGNMENT_CEILING(
 		    (uintptr_t)pages, HUGEPAGE));
 		expect_b_ne(pages_huge(hugepage, HUGEPAGE), have_madvise_huge,

From 3678a57c101b84400d6db85c96ad8ce18d5fcdf9 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Mon, 13 Oct 2025 11:37:49 -0700
Subject: [PATCH 2556/2608] When extracting from central, hugify_eager is
 different than start_as_huge

---
 Makefile.in                                   |   1 +
 .../internal/jemalloc_internal_externs.h      |   1 +
 src/ctl.c                                     |   5 +
 src/hpa.c                                     |   9 +-
 src/jemalloc.c                                |   6 +-
 src/stats.c                                   |   1 +
 test/unit/hpa.c                               |   8 +-
 test/unit/hpa.sh                              |   2 +-
 test/unit/hpa_background_thread.sh            |   2 +-
 test/unit/hpa_thp_always.c                    | 202 ++++++++++++++++++
 test/unit/hpa_thp_always.sh                   |   3 +
 test/unit/hpa_vectorized_madvise.sh           |   2 +-
 test/unit/mallctl.c                           |   2 +
 13 files changed, 235 insertions(+), 9 deletions(-)
 create mode 100644 test/unit/hpa_thp_always.c
 create mode 100644 test/unit/hpa_thp_always.sh

diff --git a/Makefile.in b/Makefile.in
index 047e05cb..c63e6f8f 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -228,6 +228,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/hash.c \
 	$(srcroot)test/unit/hook.c \
 	$(srcroot)test/unit/hpa.c \
+	$(srcroot)test/unit/hpa_thp_always.c \
 	$(srcroot)test/unit/hpa_vectorized_madvise.c \
 	$(srcroot)test/unit/hpa_vectorized_madvise_large_batch.c \
 	$(srcroot)test/unit/hpa_background_thread.c \
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index b502c7e7..a319dc81 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -15,6 +15,7 @@ extern bool malloc_slow;
 extern bool             opt_abort;
 extern bool             opt_abort_conf;
 extern bool             opt_trust_madvise;
+extern bool             opt_experimental_hpa_start_huge_if_thp_always;
 extern bool             opt_confirm_conf;
 extern bool             opt_hpa;
 extern hpa_shard_opts_t opt_hpa_opts;
diff --git a/src/ctl.c b/src/ctl.c
index 85583bec..d3443a13 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -98,6 +98,7 @@ CTL_PROTO(opt_abort_conf)
 CTL_PROTO(opt_cache_oblivious)
 CTL_PROTO(opt_debug_double_free_max_scan)
 CTL_PROTO(opt_trust_madvise)
+CTL_PROTO(opt_experimental_hpa_start_huge_if_thp_always)
 CTL_PROTO(opt_confirm_conf)
 CTL_PROTO(opt_hpa)
 CTL_PROTO(opt_hpa_slab_max_alloc)
@@ -464,6 +465,8 @@ static const ctl_named_node_t opt_node[] = {{NAME("abort"), CTL(opt_abort)},
     {NAME("abort_conf"), CTL(opt_abort_conf)},
     {NAME("cache_oblivious"), CTL(opt_cache_oblivious)},
     {NAME("trust_madvise"), CTL(opt_trust_madvise)},
+    {NAME("experimental_hpa_start_huge_if_thp_always"),
+        CTL(opt_experimental_hpa_start_huge_if_thp_always)},
     {NAME("confirm_conf"), CTL(opt_confirm_conf)}, {NAME("hpa"), CTL(opt_hpa)},
     {NAME("hpa_slab_max_alloc"), CTL(opt_hpa_slab_max_alloc)},
     {NAME("hpa_hugification_threshold"), CTL(opt_hpa_hugification_threshold)},
@@ -2131,6 +2134,8 @@ CTL_RO_NL_GEN(opt_cache_oblivious, opt_cache_oblivious, bool)
 CTL_RO_NL_GEN(
     opt_debug_double_free_max_scan, opt_debug_double_free_max_scan, unsigned)
 CTL_RO_NL_GEN(opt_trust_madvise, opt_trust_madvise, bool)
+CTL_RO_NL_GEN(opt_experimental_hpa_start_huge_if_thp_always,
+    opt_experimental_hpa_start_huge_if_thp_always, bool)
 CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
 
 /* HPA options. */
diff --git a/src/hpa.c b/src/hpa.c
index a7875e89..3687e6ea 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -28,6 +28,8 @@ static uint64_t hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self);
 
 const char *const hpa_hugify_style_names[] = {"auto", "none", "eager", "lazy"};
 
+bool opt_experimental_hpa_start_huge_if_thp_always = true;
+
 bool
 hpa_hugepage_size_exceeds_limit(void) {
 	return HUGEPAGE > HUGEPAGE_MAX_EXPECTED_SIZE;
@@ -113,6 +115,9 @@ hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
 	*oom = false;
 
 	hpdata_t *ps = NULL;
+	bool      start_as_huge = hugify_eager
+	    || (init_system_thp_mode == system_thp_mode_always
+	        && opt_experimental_hpa_start_huge_if_thp_always);
 
 	/* Is eden a perfect fit? */
 	if (central->eden != NULL && central->eden_len == HUGEPAGE) {
@@ -122,7 +127,7 @@ hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
 			malloc_mutex_unlock(tsdn, &central->grow_mtx);
 			return NULL;
 		}
-		hpdata_init(ps, central->eden, age, hugify_eager);
+		hpdata_init(ps, central->eden, age, start_as_huge);
 		central->eden = NULL;
 		central->eden_len = 0;
 		malloc_mutex_unlock(tsdn, &central->grow_mtx);
@@ -170,7 +175,7 @@ hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
 	assert(central->eden_len % HUGEPAGE == 0);
 	assert(HUGEPAGE_ADDR2BASE(central->eden) == central->eden);
 
-	hpdata_init(ps, central->eden, age, hugify_eager);
+	hpdata_init(ps, central->eden, age, start_as_huge);
 
 	char *eden_char = (char *)central->eden;
 	eden_char += HUGEPAGE;
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 72216508..0f6ff0c3 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1302,6 +1302,9 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			CONF_HANDLE_BOOL(opt_abort_conf, "abort_conf")
 			CONF_HANDLE_BOOL(opt_cache_oblivious, "cache_oblivious")
 			CONF_HANDLE_BOOL(opt_trust_madvise, "trust_madvise")
+			CONF_HANDLE_BOOL(
+			    opt_experimental_hpa_start_huge_if_thp_always,
+			    "experimental_hpa_start_huge_if_thp_always")
 			CONF_HANDLE_BOOL(
 			    opt_huge_arena_pac_thp, "huge_arena_pac_thp")
 			if (strncmp("metadata_thp", k, klen) == 0) {
@@ -1647,7 +1650,8 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 
 			if (strncmp("hpa_hugify_style", k, klen) == 0) {
 				bool match = false;
-				for (int m = 0; m < hpa_hugify_style_limit; m++) {
+				for (int m = 0; m < hpa_hugify_style_limit;
+				     m++) {
 					if (strncmp(hpa_hugify_style_names[m],
 					        v, vlen)
 					    == 0) {
diff --git a/src/stats.c b/src/stats.c
index 366f96f7..4e04336e 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1604,6 +1604,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_BOOL("abort_conf")
 	OPT_WRITE_BOOL("cache_oblivious")
 	OPT_WRITE_BOOL("confirm_conf")
+	OPT_WRITE_BOOL("experimental_hpa_start_huge_if_thp_always")
 	OPT_WRITE_BOOL("retain")
 	OPT_WRITE_CHAR_P("dss")
 	OPT_WRITE_UNSIGNED("narenas")
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index df2c9d96..0398e21a 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -1416,7 +1416,6 @@ TEST_BEGIN(test_hpa_hugify_style_none_huge_no_syscall) {
 	nstime_init(&defer_curtime, 10 * 1000 * 1000);
 
 	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
-	/* First allocation makes the page huge */
 	enum { NALLOCS = HUGEPAGE_PAGES };
 	edata_t *edatas[NALLOCS];
 	ndefer_purge_calls = 0;
@@ -1426,14 +1425,17 @@ TEST_BEGIN(test_hpa_hugify_style_none_huge_no_syscall) {
 		expect_ptr_not_null(edatas[i], "Unexpected null edata");
 	}
 	hpdata_t *ps = psset_pick_alloc(&shard->psset, PAGE);
-	expect_false(hpdata_huge_get(ps), "Page should be non-huge");
+	expect_false(
+	    hpdata_huge_get(ps), "style=none, thp=madvise, should be non-huge");
 
 	ndefer_hugify_calls = 0;
 	ndefer_purge_calls = 0;
 	hpa_shard_do_deferred_work(tsdn, shard);
 	expect_zu_eq(ndefer_hugify_calls, 0, "Hugify none, no syscall");
 	ps = psset_pick_alloc(&shard->psset, PAGE);
-	expect_true(ps, "Page should be huge");
+	expect_ptr_not_null(ps, "Unexpected null page");
+	expect_false(
+	    hpdata_huge_get(ps), "style=none, thp=madvise, should be non-huge");
 
 	destroy_test_data(shard);
 }
diff --git a/test/unit/hpa.sh b/test/unit/hpa.sh
index fe0e0b67..22451f1d 100644
--- a/test/unit/hpa.sh
+++ b/test/unit/hpa.sh
@@ -1,3 +1,3 @@
 #!/bin/sh
 
-export MALLOC_CONF="process_madvise_max_batch:0"
+export MALLOC_CONF="process_madvise_max_batch:0,experimental_hpa_start_huge_if_thp_always:false"
diff --git a/test/unit/hpa_background_thread.sh b/test/unit/hpa_background_thread.sh
index 65a56a08..5c85d48b 100644
--- a/test/unit/hpa_background_thread.sh
+++ b/test/unit/hpa_background_thread.sh
@@ -1,4 +1,4 @@
 #!/bin/sh
 
-export MALLOC_CONF="hpa_dirty_mult:0,hpa_min_purge_interval_ms:50,hpa_sec_nshards:0"
+export MALLOC_CONF="hpa_dirty_mult:0,hpa_min_purge_interval_ms:50,hpa_sec_nshards:0,experimental_hpa_start_huge_if_thp_always:false"
 
diff --git a/test/unit/hpa_thp_always.c b/test/unit/hpa_thp_always.c
new file mode 100644
index 00000000..29c86cdd
--- /dev/null
+++ b/test/unit/hpa_thp_always.c
@@ -0,0 +1,202 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/hpa.h"
+#include "jemalloc/internal/nstime.h"
+
+#define SHARD_IND 111
+
+#define ALLOC_MAX (HUGEPAGE)
+
+typedef struct test_data_s test_data_t;
+struct test_data_s {
+	/*
+	 * Must be the first member -- we convert back and forth between the
+	 * test_data_t and the hpa_shard_t;
+	 */
+	hpa_shard_t   shard;
+	hpa_central_t central;
+	base_t       *base;
+	edata_cache_t shard_edata_cache;
+
+	emap_t emap;
+};
+
+static hpa_shard_opts_t test_hpa_shard_opts_aggressive = {
+    /* slab_max_alloc */
+    HUGEPAGE,
+    /* hugification_threshold */
+    0.9 * HUGEPAGE,
+    /* dirty_mult */
+    FXP_INIT_PERCENT(11),
+    /* deferral_allowed */
+    true,
+    /* hugify_delay_ms */
+    0,
+    /* hugify_sync */
+    false,
+    /* min_purge_interval_ms */
+    5,
+    /* experimental_max_purge_nhp */
+    -1,
+    /* purge_threshold */
+    HUGEPAGE - 5 * PAGE,
+    /* min_purge_delay_ms */
+    10,
+    /* hugify_style */
+    hpa_hugify_style_eager};
+
+static hpa_shard_t *
+create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
+	bool    err;
+	base_t *base = base_new(TSDN_NULL, /* ind */ SHARD_IND,
+	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
+	assert_ptr_not_null(base, "");
+
+	test_data_t *test_data = malloc(sizeof(test_data_t));
+	assert_ptr_not_null(test_data, "");
+
+	test_data->base = base;
+
+	err = edata_cache_init(&test_data->shard_edata_cache, base);
+	assert_false(err, "");
+
+	err = emap_init(&test_data->emap, test_data->base, /* zeroed */ false);
+	assert_false(err, "");
+
+	err = hpa_central_init(&test_data->central, test_data->base, hooks);
+	assert_false(err, "");
+
+	err = hpa_shard_init(&test_data->shard, &test_data->central,
+	    &test_data->emap, test_data->base, &test_data->shard_edata_cache,
+	    SHARD_IND, opts);
+	assert_false(err, "");
+
+	return (hpa_shard_t *)test_data;
+}
+
+static void
+destroy_test_data(hpa_shard_t *shard) {
+	test_data_t *test_data = (test_data_t *)shard;
+	base_delete(TSDN_NULL, test_data->base);
+	free(test_data);
+}
+
+static uintptr_t defer_bump_ptr = HUGEPAGE * 123;
+static void *
+defer_test_map(size_t size) {
+	void *result = (void *)defer_bump_ptr;
+	defer_bump_ptr += size;
+	return result;
+}
+
+static void
+defer_test_unmap(void *ptr, size_t size) {
+	(void)ptr;
+	(void)size;
+}
+
+static size_t ndefer_purge_calls = 0;
+static size_t npurge_size = 0;
+static void
+defer_test_purge(void *ptr, size_t size) {
+	(void)ptr;
+	npurge_size = size;
+	++ndefer_purge_calls;
+}
+
+static bool defer_vectorized_purge_called = false;
+static bool
+defer_vectorized_purge(void *vec, size_t vlen, size_t nbytes) {
+	(void)vec;
+	(void)nbytes;
+	++ndefer_purge_calls;
+	defer_vectorized_purge_called = true;
+	return false;
+}
+
+static size_t ndefer_hugify_calls = 0;
+static bool
+defer_test_hugify(void *ptr, size_t size, bool sync) {
+	++ndefer_hugify_calls;
+	return false;
+}
+
+static size_t ndefer_dehugify_calls = 0;
+static void
+defer_test_dehugify(void *ptr, size_t size) {
+	++ndefer_dehugify_calls;
+}
+
+static nstime_t defer_curtime;
+static void
+defer_test_curtime(nstime_t *r_time, bool first_reading) {
+	*r_time = defer_curtime;
+}
+
+static uint64_t
+defer_test_ms_since(nstime_t *past_time) {
+	return (nstime_ns(&defer_curtime) - nstime_ns(past_time)) / 1000 / 1000;
+}
+
+TEST_BEGIN(test_hpa_hugify_style_none_huge_no_syscall_thp_always) {
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0));
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_aggressive;
+	opts.deferral_allowed = true;
+	opts.purge_threshold = PAGE;
+	opts.min_purge_delay_ms = 0;
+	opts.hugification_threshold = HUGEPAGE * 0.25;
+	opts.dirty_mult = FXP_INIT_PERCENT(10);
+	opts.hugify_style = hpa_hugify_style_none;
+	opts.min_purge_interval_ms = 0;
+	opts.hugify_delay_ms = 0;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+	bool         deferred_work_generated = false;
+	/* Current time = 10ms */
+	nstime_init(&defer_curtime, 10 * 1000 * 1000);
+
+	/* Fake that system is in thp_always mode */
+	system_thp_mode_t old_mode = init_system_thp_mode;
+	init_system_thp_mode = system_thp_mode_always;
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum { NALLOCS = HUGEPAGE_PAGES };
+	edata_t *edatas[NALLOCS];
+	ndefer_purge_calls = 0;
+	for (int i = 0; i < NALLOCS / 2; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	hpdata_t *ps = psset_pick_alloc(&shard->psset, PAGE);
+	expect_true(hpdata_huge_get(ps),
+	    "Page should be huge because thp=always and hugify_style is none");
+
+	ndefer_hugify_calls = 0;
+	ndefer_purge_calls = 0;
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(ndefer_hugify_calls, 0, "style=none, no syscall");
+	expect_zu_eq(ndefer_dehugify_calls, 0, "style=none, no syscall");
+	expect_zu_eq(ndefer_purge_calls, 1, "purge should happen");
+
+	destroy_test_data(shard);
+	init_system_thp_mode = old_mode;
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_hpa_hugify_style_none_huge_no_syscall_thp_always);
+}
diff --git a/test/unit/hpa_thp_always.sh b/test/unit/hpa_thp_always.sh
new file mode 100644
index 00000000..8b93006d
--- /dev/null
+++ b/test/unit/hpa_thp_always.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="process_madvise_max_batch:0,experimental_hpa_start_huge_if_thp_always:true"
diff --git a/test/unit/hpa_vectorized_madvise.sh b/test/unit/hpa_vectorized_madvise.sh
index c5d66afa..35d7e6b6 100644
--- a/test/unit/hpa_vectorized_madvise.sh
+++ b/test/unit/hpa_vectorized_madvise.sh
@@ -1,3 +1,3 @@
 #!/bin/sh
 
-export MALLOC_CONF="process_madvise_max_batch:2"
+export MALLOC_CONF="process_madvise_max_batch:2,experimental_hpa_start_huge_if_thp_always:false"
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index d1974e0f..2415fda1 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -300,6 +300,8 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, abort_conf, always);
 	TEST_MALLCTL_OPT(bool, cache_oblivious, always);
 	TEST_MALLCTL_OPT(bool, trust_madvise, always);
+	TEST_MALLCTL_OPT(
+	    bool, experimental_hpa_start_huge_if_thp_always, always);
 	TEST_MALLCTL_OPT(bool, confirm_conf, always);
 	TEST_MALLCTL_OPT(const char *, metadata_thp, always);
 	TEST_MALLCTL_OPT(bool, retain, always);

From 6d4611197e62285ae69fd0237e6b3a29494213c0 Mon Sep 17 00:00:00 2001
From: Shirui Cheng <sherrycheng@meta.com>
Date: Tue, 21 Oct 2025 18:53:35 -0700
Subject: [PATCH 2557/2608] move fill/flush pointer array out of tcache.c

---
 include/jemalloc/internal/arena_externs.h |  17 +-
 include/jemalloc/internal/arena_types.h   |   3 +-
 include/jemalloc/internal/cache_bin.h     |  12 +
 src/arena.c                               | 378 ++++++++++++++++++++-
 src/tcache.c                              | 384 ++--------------------
 5 files changed, 404 insertions(+), 390 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 39d2099d..cf191aeb 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -61,13 +61,13 @@ bool arena_decay_ms_set(
 ssize_t arena_decay_ms_get(arena_t *arena, extent_state_t state);
 void    arena_decay(
        tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all);
-uint64_t arena_time_until_deferred(tsdn_t *tsdn, arena_t *arena);
-void     arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena);
-void     arena_reset(tsd_t *tsd, arena_t *arena);
-void     arena_destroy(tsd_t *tsd, arena_t *arena);
-void     arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena,
-        cache_bin_t *cache_bin, szind_t binind, const cache_bin_sz_t nfill_min,
-        const cache_bin_sz_t nfill_max);
+uint64_t       arena_time_until_deferred(tsdn_t *tsdn, arena_t *arena);
+void           arena_do_deferred_work(tsdn_t *tsdn, arena_t *arena);
+void           arena_reset(tsd_t *tsd, arena_t *arena);
+void           arena_destroy(tsd_t *tsd, arena_t *arena);
+cache_bin_sz_t arena_ptr_array_fill_small(tsdn_t *tsdn, arena_t *arena,
+    szind_t binind, cache_bin_ptr_array_t *arr, const cache_bin_sz_t nfill_min,
+    const cache_bin_sz_t nfill_max, cache_bin_stats_t merge_stats);
 
 void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, szind_t ind,
     bool zero, bool slab);
@@ -84,6 +84,9 @@ void arena_dalloc_bin_locked_handle_newly_empty(
 void arena_dalloc_bin_locked_handle_newly_nonempty(
     tsdn_t *tsdn, arena_t *arena, edata_t *slab, bin_t *bin);
 void  arena_dalloc_small(tsdn_t *tsdn, void *ptr);
+void  arena_ptr_array_flush(tsd_t *tsd, szind_t binind,
+     cache_bin_ptr_array_t *arr, unsigned nflush, bool small,
+     arena_t *stats_arena, cache_bin_stats_t merge_stats);
 bool  arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
      size_t extra, bool zero, size_t *newsize);
 void *arena_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t oldsize,
diff --git a/include/jemalloc/internal/arena_types.h b/include/jemalloc/internal/arena_types.h
index 7ed2b968..c586164f 100644
--- a/include/jemalloc/internal/arena_types.h
+++ b/include/jemalloc/internal/arena_types.h
@@ -12,8 +12,7 @@
 /* Maximum length of the arena name. */
 #define ARENA_NAME_LEN 32
 
-typedef struct arena_decay_s arena_decay_t;
-typedef struct arena_s       arena_t;
+typedef struct arena_s arena_t;
 
 typedef enum {
 	percpu_arena_mode_names_base = 0, /* Used for options processing. */
diff --git a/include/jemalloc/internal/cache_bin.h b/include/jemalloc/internal/cache_bin.h
index 08ee0d6a..bea3a2fc 100644
--- a/include/jemalloc/internal/cache_bin.h
+++ b/include/jemalloc/internal/cache_bin.h
@@ -690,6 +690,10 @@ cache_bin_finish_fill(
 		    nfilled * sizeof(void *));
 	}
 	bin->stack_head = empty_position - nfilled;
+	/* Reset the bin stats as it's merged during fill. */
+	if (config_stats) {
+		bin->tstats.nrequests = 0;
+	}
 }
 
 /*
@@ -711,6 +715,10 @@ cache_bin_finish_flush(
 	    bin->stack_head + nflushed, bin->stack_head, rem * sizeof(void *));
 	bin->stack_head += nflushed;
 	cache_bin_low_water_adjust(bin);
+	/* Reset the bin stats as it's merged during flush. */
+	if (config_stats) {
+		bin->tstats.nrequests = 0;
+	}
 }
 
 static inline void
@@ -731,6 +739,10 @@ cache_bin_finish_flush_stashed(cache_bin_t *bin) {
 	/* Reset the bin local full position. */
 	bin->low_bits_full = (uint16_t)(uintptr_t)low_bound;
 	assert(cache_bin_nstashed_get_local(bin) == 0);
+	/* Reset the bin stats as it's merged during flush. */
+	if (config_stats) {
+		bin->tstats.nrequests = 0;
+	}
 }
 
 /*
diff --git a/src/arena.c b/src/arena.c
index 224a9b63..664ed6a3 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -693,7 +693,7 @@ arena_bin_reset(tsd_t *tsd, arena_t *arena, bin_t *bin) {
 		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
 	}
 	for (slab = edata_list_active_first(&bin->slabs_full); slab != NULL;
-	     slab = edata_list_active_first(&bin->slabs_full)) {
+	    slab = edata_list_active_first(&bin->slabs_full)) {
 		arena_bin_slabs_full_remove(arena, bin, slab);
 		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 		arena_slab_dalloc(tsd_tsdn(tsd), arena, slab);
@@ -799,7 +799,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) {
 	malloc_mutex_lock(tsd_tsdn(tsd), &arena->large_mtx);
 
 	for (edata_t *edata = edata_list_active_first(&arena->large);
-	     edata != NULL; edata = edata_list_active_first(&arena->large)) {
+	    edata != NULL; edata = edata_list_active_first(&arena->large)) {
 		void  *ptr = edata_base_get(edata);
 		size_t usize;
 
@@ -1052,18 +1052,13 @@ arena_bin_choose(
 	return arena_get_bin(arena, binind, binshard);
 }
 
-void
-arena_cache_bin_fill_small(tsdn_t *tsdn, arena_t *arena, cache_bin_t *cache_bin,
-    szind_t binind, const cache_bin_sz_t nfill_min,
-    const cache_bin_sz_t nfill_max) {
-	assert(cache_bin_ncached_get_local(cache_bin) == 0);
+cache_bin_sz_t
+arena_ptr_array_fill_small(tsdn_t *tsdn, arena_t *arena, szind_t binind,
+    cache_bin_ptr_array_t *arr, const cache_bin_sz_t nfill_min,
+    const cache_bin_sz_t nfill_max, cache_bin_stats_t merge_stats) {
 	assert(nfill_min > 0 && nfill_min <= nfill_max);
-	assert(nfill_max <= cache_bin_ncached_max_get(cache_bin));
 
 	const bin_info_t *bin_info = &bin_infos[binind];
-
-	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nfill_max);
-	cache_bin_init_ptr_array_for_fill(cache_bin, &ptrs, nfill_max);
 	/*
 	 * Bin-local resources are used first: 1) bin->slabcur, and 2) nonfull
 	 * slabs.  After both are exhausted, new slabs will be allocated through
@@ -1115,7 +1110,7 @@ label_refill:
 			}
 
 			arena_slab_reg_alloc_batch(
-			    slabcur, bin_info, cnt, &ptrs.ptr[filled]);
+			    slabcur, bin_info, cnt, &arr->ptr[filled]);
 			made_progress = true;
 			filled += cnt;
 			continue;
@@ -1153,10 +1148,9 @@ label_refill:
 
 	if (config_stats && !alloc_and_retry) {
 		bin->stats.nmalloc += filled;
-		bin->stats.nrequests += cache_bin->tstats.nrequests;
+		bin->stats.nrequests += merge_stats.nrequests;
 		bin->stats.curregs += filled;
 		bin->stats.nfills++;
-		cache_bin->tstats.nrequests = 0;
 	}
 
 	malloc_mutex_unlock(tsdn, &bin->lock);
@@ -1184,8 +1178,8 @@ label_refill:
 		fresh_slab = NULL;
 	}
 
-	cache_bin_finish_fill(cache_bin, &ptrs, filled);
 	arena_decay_tick(tsdn, arena);
+	return filled;
 }
 
 size_t
@@ -1472,6 +1466,357 @@ arena_dalloc_small(tsdn_t *tsdn, void *ptr) {
 	arena_decay_tick(tsdn, arena);
 }
 
+static const void *
+arena_ptr_array_flush_ptr_getter(void *arr_ctx, size_t ind) {
+	cache_bin_ptr_array_t *arr = (cache_bin_ptr_array_t *)arr_ctx;
+	return arr->ptr[ind];
+}
+
+static void
+arena_ptr_array_flush_metadata_visitor(
+    void *szind_sum_ctx, emap_full_alloc_ctx_t *alloc_ctx) {
+	size_t *szind_sum = (size_t *)szind_sum_ctx;
+	*szind_sum -= alloc_ctx->szind;
+	util_prefetch_write_range(alloc_ctx->edata, sizeof(edata_t));
+}
+
+JEMALLOC_NOINLINE static void
+arena_ptr_array_flush_size_check_fail(cache_bin_ptr_array_t *arr, szind_t szind,
+    size_t nptrs, emap_batch_lookup_result_t *edatas) {
+	bool found_mismatch = false;
+	for (size_t i = 0; i < nptrs; i++) {
+		szind_t true_szind = edata_szind_get(edatas[i].edata);
+		if (true_szind != szind) {
+			found_mismatch = true;
+			safety_check_fail_sized_dealloc(
+			    /* current_dealloc */ false,
+			    /* ptr */ arena_ptr_array_flush_ptr_getter(arr, i),
+			    /* true_size */ sz_index2size(true_szind),
+			    /* input_size */ sz_index2size(szind));
+		}
+	}
+	assert(found_mismatch);
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_ptr_array_flush_impl_small(tsdn_t *tsdn, szind_t binind,
+    cache_bin_ptr_array_t *arr, emap_batch_lookup_result_t *item_edata,
+    cache_bin_sz_t nflush, arena_t *stats_arena,
+    cache_bin_stats_t **merge_stats) {
+	/*
+	 * The slabs where we freed the last remaining object in the slab (and
+	 * so need to free the slab itself).
+	 * Used only if small == true.
+	 */
+	unsigned dalloc_count = 0;
+	VARIABLE_ARRAY(edata_t *, dalloc_slabs, nflush + 1);
+	/*
+	 * We're about to grab a bunch of locks.  If one of them happens to be
+	 * the one guarding the arena-level stats counters we flush our
+	 * thread-local ones to, we do so under one critical section.
+	 */
+	/*
+	 * We maintain the invariant that all edatas yet to be flushed are
+	 * contained in the half-open range [flush_start, flush_end).  We'll
+	 * repeatedly partition the array so that the unflushed items are at the
+	 * end.
+	 */
+	unsigned flush_start = 0;
+
+	while (flush_start < nflush) {
+		/*
+		 * After our partitioning step, all objects to flush will be in
+		 * the half-open range [prev_flush_start, flush_start), and
+		 * flush_start will be updated to correspond to the next loop
+		 * iteration.
+		 */
+		unsigned prev_flush_start = flush_start;
+
+		edata_t *cur_edata = item_edata[flush_start].edata;
+		unsigned cur_arena_ind = edata_arena_ind_get(cur_edata);
+		arena_t *cur_arena = arena_get(tsdn, cur_arena_ind, false);
+
+		unsigned cur_binshard = edata_binshard_get(cur_edata);
+		bin_t *cur_bin = arena_get_bin(cur_arena, binind, cur_binshard);
+		assert(cur_binshard < bin_infos[binind].n_shards);
+		/*
+		 * Start off the partition; item_edata[i] always matches itself
+		 * of course.
+		 */
+		flush_start++;
+		for (unsigned i = flush_start; i < nflush; i++) {
+			void    *ptr = arr->ptr[i];
+			edata_t *edata = item_edata[i].edata;
+			assert(ptr != NULL && edata != NULL);
+			assert(
+			    (uintptr_t)ptr >= (uintptr_t)edata_addr_get(edata));
+			assert(
+			    (uintptr_t)ptr < (uintptr_t)edata_past_get(edata));
+			if (edata_arena_ind_get(edata) == cur_arena_ind
+			    && edata_binshard_get(edata) == cur_binshard) {
+				/* Swap the edatas. */
+				emap_batch_lookup_result_t temp_edata =
+				    item_edata[flush_start];
+				item_edata[flush_start] = item_edata[i];
+				item_edata[i] = temp_edata;
+				/* Swap the pointers */
+				void *temp_ptr = arr->ptr[flush_start];
+				arr->ptr[flush_start] = arr->ptr[i];
+				arr->ptr[i] = temp_ptr;
+				flush_start++;
+			}
+		}
+		/* Make sure we implemented partitioning correctly. */
+		if (config_debug) {
+			for (unsigned i = prev_flush_start; i < flush_start;
+			    i++) {
+				edata_t *edata = item_edata[i].edata;
+				unsigned arena_ind = edata_arena_ind_get(edata);
+				assert(arena_ind == cur_arena_ind);
+				unsigned binshard = edata_binshard_get(edata);
+				assert(binshard == cur_binshard);
+			}
+			for (unsigned i = flush_start; i < nflush; i++) {
+				edata_t *edata = item_edata[i].edata;
+				assert(
+				    edata_arena_ind_get(edata) != cur_arena_ind
+				    || edata_binshard_get(edata)
+				        != cur_binshard);
+			}
+		}
+
+		/* Actually do the flushing. */
+		malloc_mutex_lock(tsdn, &cur_bin->lock);
+
+		/*
+		 * Flush stats first, if that was the right lock.  Note that we
+		 * don't actually have to flush stats into the current thread's
+		 * binshard. Flushing into any binshard in the same arena is
+		 * enough; we don't expose stats on per-binshard basis (just
+		 * per-bin).
+		 */
+		if (config_stats && stats_arena == cur_arena
+		    && *merge_stats != NULL) {
+			cur_bin->stats.nflushes++;
+			cur_bin->stats.nrequests += (*merge_stats)->nrequests;
+			*merge_stats = NULL;
+		}
+
+		/* Next flush objects. */
+		/* Init only to avoid used-uninitialized warning. */
+		arena_dalloc_bin_locked_info_t dalloc_bin_info = {0};
+		arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind);
+		for (unsigned i = prev_flush_start; i < flush_start; i++) {
+			void    *ptr = arr->ptr[i];
+			edata_t *edata = item_edata[i].edata;
+			if (arena_dalloc_bin_locked_step(tsdn, cur_arena,
+			        cur_bin, &dalloc_bin_info, binind, edata,
+			        ptr)) {
+				dalloc_slabs[dalloc_count] = edata;
+				dalloc_count++;
+			}
+		}
+
+		arena_dalloc_bin_locked_finish(
+		    tsdn, cur_arena, cur_bin, &dalloc_bin_info);
+		malloc_mutex_unlock(tsdn, &cur_bin->lock);
+
+		arena_decay_ticks(
+		    tsdn, cur_arena, flush_start - prev_flush_start);
+	}
+
+	/* Handle all deferred slab dalloc. */
+	for (unsigned i = 0; i < dalloc_count; i++) {
+		edata_t *slab = dalloc_slabs[i];
+		arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
+	}
+
+	if (config_stats && *merge_stats != NULL) {
+		/*
+		 * The flush loop didn't happen to flush to this
+		 * thread's arena, so the stats didn't get merged.
+		 * Manually do so now.
+		 */
+		bin_t *bin = arena_bin_choose(tsdn, stats_arena, binind, NULL);
+		malloc_mutex_lock(tsdn, &bin->lock);
+		bin->stats.nflushes++;
+		bin->stats.nrequests += (*merge_stats)->nrequests;
+		*merge_stats = NULL;
+		malloc_mutex_unlock(tsdn, &bin->lock);
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_ptr_array_flush_impl_large(tsdn_t *tsdn, szind_t binind,
+    cache_bin_ptr_array_t *arr, emap_batch_lookup_result_t *item_edata,
+    cache_bin_sz_t nflush, arena_t *stats_arena,
+    cache_bin_stats_t **merge_stats) {
+	/*
+	 * We're about to grab a bunch of locks.  If one of them happens to be
+	 * the one guarding the arena-level stats counters we flush our
+	 * thread-local ones to, we do so under one critical section.
+	 */
+	while (nflush > 0) {
+		/* Lock the arena, or bin, associated with the first object. */
+		edata_t *edata = item_edata[0].edata;
+		unsigned cur_arena_ind = edata_arena_ind_get(edata);
+		arena_t *cur_arena = arena_get(tsdn, cur_arena_ind, false);
+
+		if (!arena_is_auto(cur_arena)) {
+			malloc_mutex_lock(tsdn, &cur_arena->large_mtx);
+		}
+
+		/*
+		 * If we acquired the right lock and have some stats to flush,
+		 * flush them.
+		 */
+		if (config_stats && stats_arena == cur_arena
+		    && *merge_stats != NULL) {
+			arena_stats_large_flush_nrequests_add(tsdn,
+			    &stats_arena->stats, binind,
+			    (*merge_stats)->nrequests);
+			*merge_stats = NULL;
+		}
+
+		/*
+		 * Large allocations need special prep done.  Afterwards, we can
+		 * drop the large lock.
+		 */
+		for (unsigned i = 0; i < nflush; i++) {
+			void *ptr = arr->ptr[i];
+			edata = item_edata[i].edata;
+			assert(ptr != NULL && edata != NULL);
+
+			if (edata_arena_ind_get(edata) == cur_arena_ind) {
+				large_dalloc_prep_locked(tsdn, edata);
+			}
+		}
+		if (!arena_is_auto(cur_arena)) {
+			malloc_mutex_unlock(tsdn, &cur_arena->large_mtx);
+		}
+
+		/* Deallocate whatever we can. */
+		unsigned ndeferred = 0;
+		for (unsigned i = 0; i < nflush; i++) {
+			void *ptr = arr->ptr[i];
+			edata = item_edata[i].edata;
+			assert(ptr != NULL && edata != NULL);
+			if (edata_arena_ind_get(edata) != cur_arena_ind) {
+				/*
+				 * The object was allocated either via a
+				 * different arena, or a different bin in this
+				 * arena.  Either way, stash the object so that
+				 * it can be handled in a future pass.
+				 */
+				arr->ptr[ndeferred] = ptr;
+				item_edata[ndeferred].edata = edata;
+				ndeferred++;
+				continue;
+			}
+			if (large_dalloc_safety_checks(
+			        edata, ptr, sz_index2size(binind))) {
+				/* See the comment in isfree. */
+				continue;
+			}
+			large_dalloc_finish(tsdn, edata);
+		}
+		arena_decay_ticks(tsdn, cur_arena, nflush - ndeferred);
+		nflush = ndeferred;
+	}
+
+	if (config_stats && *merge_stats != NULL) {
+		arena_stats_large_flush_nrequests_add(tsdn, &stats_arena->stats,
+		    binind, (*merge_stats)->nrequests);
+		*merge_stats = NULL;
+	}
+}
+
+JEMALLOC_ALWAYS_INLINE void
+arena_ptr_array_flush_impl(tsd_t *tsd, szind_t binind,
+    cache_bin_ptr_array_t *arr, unsigned nflush, bool small,
+    arena_t *stats_arena, cache_bin_stats_t **merge_stats) {
+	/*
+	 * A couple lookup calls take tsdn; declare it once for convenience
+	 * instead of calling tsd_tsdn(tsd) all the time.
+	 */
+	tsdn_t *tsdn = tsd_tsdn(tsd);
+	/*
+	 * Variable length array must have > 0 length; the last element is never
+	 * touched (it's just included to satisfy the no-zero-length rule).
+	 */
+	VARIABLE_ARRAY(emap_batch_lookup_result_t, item_edata, nflush + 1);
+	/*
+	 * This gets compiled away when config_opt_safety_checks is false.
+	 * Checks for sized deallocation bugs, failing early rather than
+	 * corrupting metadata.
+	 */
+	size_t szind_sum = binind * nflush;
+	emap_edata_lookup_batch(tsd, &arena_emap_global, nflush,
+	    &arena_ptr_array_flush_ptr_getter, (void *)arr,
+	    &arena_ptr_array_flush_metadata_visitor, (void *)&szind_sum,
+	    item_edata);
+	if (config_opt_safety_checks && unlikely(szind_sum != 0)) {
+		arena_ptr_array_flush_size_check_fail(
+		    arr, binind, nflush, item_edata);
+	}
+
+	/*
+	 * The small/large flush logic is very similar; you might conclude that
+	 * it's a good opportunity to share code.  We've tried this, and by and
+	 * large found this to obscure more than it helps; there are so many
+	 * fiddly bits around things like stats handling, precisely when and
+	 * which mutexes are acquired, etc., that almost all code ends up being
+	 * gated behind 'if (small) { ... } else { ... }'.  Even though the
+	 * '...' is morally equivalent, the code itself needs slight tweaks.
+	 */
+	if (small) {
+		return arena_ptr_array_flush_impl_small(tsdn, binind, arr,
+		    item_edata, nflush, stats_arena, merge_stats);
+	} else {
+		return arena_ptr_array_flush_impl_large(tsdn, binind, arr,
+		    item_edata, nflush, stats_arena, merge_stats);
+	}
+}
+
+/*
+ * In practice, pointers are flushed back to their original allocation arenas,
+ * so multiple arenas may be involved here. The input stats_arena simply
+ * indicates where the cache stats should be merged into.
+ */
+void
+arena_ptr_array_flush(tsd_t *tsd, szind_t binind, cache_bin_ptr_array_t *arr,
+    unsigned nflush, bool small, arena_t *stats_arena,
+    cache_bin_stats_t merge_stats) {
+	assert(arr != NULL && arr->ptr != NULL);
+	/*
+     * The input cache bin stats represent a snapshot taken when the pointer
+	 * array is set up, and will be merged into the next-level bin stats.
+     * The original bin stats will be reset by the caller itself.
+     * This separation ensures that each layer operates independently and
+     * does not modify another layer's data directly.
+     */
+	cache_bin_stats_t    *stats = &merge_stats;
+	unsigned              nflush_batch, nflushed = 0;
+	cache_bin_ptr_array_t ptrs_batch;
+	do {
+		nflush_batch = nflush - nflushed;
+		if (nflush_batch > CACHE_BIN_NFLUSH_BATCH_MAX) {
+			nflush_batch = CACHE_BIN_NFLUSH_BATCH_MAX;
+		}
+		assert(nflush_batch <= CACHE_BIN_NFLUSH_BATCH_MAX);
+		(&ptrs_batch)->n = (cache_bin_sz_t)nflush_batch;
+		(&ptrs_batch)->ptr = arr->ptr + nflushed;
+		arena_ptr_array_flush_impl(tsd, binind, &ptrs_batch,
+		    nflush_batch, small, stats_arena, &stats);
+		nflushed += nflush_batch;
+	} while (nflushed < nflush);
+	assert(nflush == nflushed);
+	assert((arr->ptr + nflush) == ((&ptrs_batch)->ptr + nflush_batch));
+	if (config_stats) {
+		assert(stats == NULL);
+	}
+}
+
 bool
 arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size,
     size_t extra, bool zero, size_t *newsize) {
@@ -1890,7 +2235,8 @@ arena_init_huge(tsdn_t *tsdn, arena_t *a0) {
 		/* Make sure that b0 thp auto-switch won't happen concurrently here. */
 		malloc_mutex_lock(tsdn, &b0->mtx);
 		(&huge_arena_pac_thp)->thp_madvise = opt_huge_arena_pac_thp
-		    && metadata_thp_enabled() && (opt_thp == thp_mode_do_nothing)
+		    && metadata_thp_enabled()
+		    && (opt_thp == thp_mode_do_nothing)
 		    && (init_system_thp_mode == system_thp_mode_madvise);
 		(&huge_arena_pac_thp)->auto_thp_switched =
 		    b0->auto_thp_switched;
diff --git a/src/tcache.c b/src/tcache.c
index 2d73237b..74ff4718 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -601,15 +601,26 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 
 	assert(tcache_slow->arena != NULL);
 	assert(!tcache_bin_disabled(binind, cache_bin, tcache_slow));
+	assert(cache_bin_ncached_get_local(cache_bin) == 0);
 	cache_bin_sz_t nfill = cache_bin_ncached_max_get(cache_bin)
 	    >> tcache_nfill_small_lg_div_get(tcache_slow, binind);
 	if (nfill == 0) {
 		nfill = 1;
 	}
-	arena_cache_bin_fill_small(tsdn, arena, cache_bin, binind,
-	    /* nfill_min */
-	    opt_experimental_tcache_gc ? ((nfill >> 1) + 1) : nfill,
-	    /* nfill_max */ nfill);
+	cache_bin_sz_t nfill_min = opt_experimental_tcache_gc
+	    ? ((nfill >> 1) + 1)
+	    : nfill;
+	cache_bin_sz_t nfill_max = nfill;
+	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nfill_max);
+	cache_bin_init_ptr_array_for_fill(cache_bin, &ptrs, nfill_max);
+
+	cache_bin_sz_t filled = arena_ptr_array_fill_small(tsdn, arena, binind,
+	    &ptrs, /* nfill_min */ nfill_min, /* nfill_max */ nfill_max,
+	    cache_bin->tstats);
+	cache_bin_finish_fill(cache_bin, &ptrs, filled);
+	assert(filled >= nfill_min && filled <= nfill_max);
+	assert(cache_bin_ncached_get_local(cache_bin) == filled);
+
 	tcache_slow->bin_refilled[binind] = true;
 	tcache_nfill_small_burst_prepare(tcache_slow, binind);
 	ret = cache_bin_alloc(cache_bin, tcache_success);
@@ -617,363 +628,6 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
 	return ret;
 }
 
-static const void *
-tcache_bin_flush_ptr_getter(void *arr_ctx, size_t ind) {
-	cache_bin_ptr_array_t *arr = (cache_bin_ptr_array_t *)arr_ctx;
-	return arr->ptr[ind];
-}
-
-static void
-tcache_bin_flush_metadata_visitor(
-    void *szind_sum_ctx, emap_full_alloc_ctx_t *alloc_ctx) {
-	size_t *szind_sum = (size_t *)szind_sum_ctx;
-	*szind_sum -= alloc_ctx->szind;
-	util_prefetch_write_range(alloc_ctx->edata, sizeof(edata_t));
-}
-
-JEMALLOC_NOINLINE static void
-tcache_bin_flush_size_check_fail(cache_bin_ptr_array_t *arr, szind_t szind,
-    size_t nptrs, emap_batch_lookup_result_t *edatas) {
-	bool found_mismatch = false;
-	for (size_t i = 0; i < nptrs; i++) {
-		szind_t true_szind = edata_szind_get(edatas[i].edata);
-		if (true_szind != szind) {
-			found_mismatch = true;
-			safety_check_fail_sized_dealloc(
-			    /* current_dealloc */ false,
-			    /* ptr */ tcache_bin_flush_ptr_getter(arr, i),
-			    /* true_size */ sz_index2size(true_szind),
-			    /* input_size */ sz_index2size(szind));
-		}
-	}
-	assert(found_mismatch);
-}
-
-static void
-tcache_bin_flush_edatas_lookup(tsd_t *tsd, cache_bin_ptr_array_t *arr,
-    szind_t binind, size_t nflush, emap_batch_lookup_result_t *edatas) {
-	/*
-	 * This gets compiled away when config_opt_safety_checks is false.
-	 * Checks for sized deallocation bugs, failing early rather than
-	 * corrupting metadata.
-	 */
-	size_t szind_sum = binind * nflush;
-	emap_edata_lookup_batch(tsd, &arena_emap_global, nflush,
-	    &tcache_bin_flush_ptr_getter, (void *)arr,
-	    &tcache_bin_flush_metadata_visitor, (void *)&szind_sum, edatas);
-	if (config_opt_safety_checks && unlikely(szind_sum != 0)) {
-		tcache_bin_flush_size_check_fail(arr, binind, nflush, edatas);
-	}
-}
-
-JEMALLOC_ALWAYS_INLINE void
-tcache_bin_flush_impl_small(tsd_t *tsd, tcache_t *tcache,
-    cache_bin_t *cache_bin, szind_t binind, cache_bin_ptr_array_t *ptrs,
-    unsigned nflush) {
-	tcache_slow_t *tcache_slow = tcache->tcache_slow;
-	/*
-	 * A couple lookup calls take tsdn; declare it once for convenience
-	 * instead of calling tsd_tsdn(tsd) all the time.
-	 */
-	tsdn_t *tsdn = tsd_tsdn(tsd);
-
-	assert(binind < SC_NBINS);
-	arena_t *tcache_arena = tcache_slow->arena;
-	assert(tcache_arena != NULL);
-
-	/*
-	 * Variable length array must have > 0 length; the last element is never
-	 * touched (it's just included to satisfy the no-zero-length rule).
-	 */
-	VARIABLE_ARRAY(emap_batch_lookup_result_t, item_edata, nflush + 1);
-	tcache_bin_flush_edatas_lookup(tsd, ptrs, binind, nflush, item_edata);
-
-	/*
-	 * The slabs where we freed the last remaining object in the slab (and
-	 * so need to free the slab itself).
-	 * Used only if small == true.
-	 */
-	unsigned dalloc_count = 0;
-	VARIABLE_ARRAY(edata_t *, dalloc_slabs, nflush + 1);
-
-	/*
-	 * We're about to grab a bunch of locks.  If one of them happens to be
-	 * the one guarding the arena-level stats counters we flush our
-	 * thread-local ones to, we do so under one critical section.
-	 */
-	bool merged_stats = false;
-	/*
-	 * We maintain the invariant that all edatas yet to be flushed are
-	 * contained in the half-open range [flush_start, flush_end).  We'll
-	 * repeatedly partition the array so that the unflushed items are at the
-	 * end.
-	 */
-	unsigned flush_start = 0;
-
-	while (flush_start < nflush) {
-		/*
-		 * After our partitioning step, all objects to flush will be in
-		 * the half-open range [prev_flush_start, flush_start), and
-		 * flush_start will be updated to correspond to the next loop
-		 * iteration.
-		 */
-		unsigned prev_flush_start = flush_start;
-
-		edata_t *cur_edata = item_edata[flush_start].edata;
-		unsigned cur_arena_ind = edata_arena_ind_get(cur_edata);
-		arena_t *cur_arena = arena_get(tsdn, cur_arena_ind, false);
-
-		unsigned cur_binshard = edata_binshard_get(cur_edata);
-		bin_t *cur_bin = arena_get_bin(cur_arena, binind, cur_binshard);
-		assert(cur_binshard < bin_infos[binind].n_shards);
-		/*
-		 * Start off the partition; item_edata[i] always matches itself
-		 * of course.
-		 */
-		flush_start++;
-		for (unsigned i = flush_start; i < nflush; i++) {
-			void    *ptr = ptrs->ptr[i];
-			edata_t *edata = item_edata[i].edata;
-			assert(ptr != NULL && edata != NULL);
-			assert(
-			    (uintptr_t)ptr >= (uintptr_t)edata_addr_get(edata));
-			assert(
-			    (uintptr_t)ptr < (uintptr_t)edata_past_get(edata));
-			if (edata_arena_ind_get(edata) == cur_arena_ind
-			    && edata_binshard_get(edata) == cur_binshard) {
-				/* Swap the edatas. */
-				emap_batch_lookup_result_t temp_edata =
-				    item_edata[flush_start];
-				item_edata[flush_start] = item_edata[i];
-				item_edata[i] = temp_edata;
-				/* Swap the pointers */
-				void *temp_ptr = ptrs->ptr[flush_start];
-				ptrs->ptr[flush_start] = ptrs->ptr[i];
-				ptrs->ptr[i] = temp_ptr;
-				flush_start++;
-			}
-		}
-		/* Make sure we implemented partitioning correctly. */
-		if (config_debug) {
-			for (unsigned i = prev_flush_start; i < flush_start;
-			     i++) {
-				edata_t *edata = item_edata[i].edata;
-				unsigned arena_ind = edata_arena_ind_get(edata);
-				assert(arena_ind == cur_arena_ind);
-				unsigned binshard = edata_binshard_get(edata);
-				assert(binshard == cur_binshard);
-			}
-			for (unsigned i = flush_start; i < nflush; i++) {
-				edata_t *edata = item_edata[i].edata;
-				assert(
-				    edata_arena_ind_get(edata) != cur_arena_ind
-				    || edata_binshard_get(edata)
-				        != cur_binshard);
-			}
-		}
-
-		/* Actually do the flushing. */
-		malloc_mutex_lock(tsdn, &cur_bin->lock);
-
-		/*
-		 * Flush stats first, if that was the right lock.  Note that we
-		 * don't actually have to flush stats into the current thread's
-		 * binshard. Flushing into any binshard in the same arena is
-		 * enough; we don't expose stats on per-binshard basis (just
-		 * per-bin).
-		 */
-		if (config_stats && tcache_arena == cur_arena
-		    && !merged_stats) {
-			merged_stats = true;
-			cur_bin->stats.nflushes++;
-			cur_bin->stats.nrequests += cache_bin->tstats.nrequests;
-			cache_bin->tstats.nrequests = 0;
-		}
-
-		/* Next flush objects. */
-		/* Init only to avoid used-uninitialized warning. */
-		arena_dalloc_bin_locked_info_t dalloc_bin_info = {0};
-		arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind);
-		for (unsigned i = prev_flush_start; i < flush_start; i++) {
-			void    *ptr = ptrs->ptr[i];
-			edata_t *edata = item_edata[i].edata;
-			if (arena_dalloc_bin_locked_step(tsdn, cur_arena,
-			        cur_bin, &dalloc_bin_info, binind, edata,
-			        ptr)) {
-				dalloc_slabs[dalloc_count] = edata;
-				dalloc_count++;
-			}
-		}
-
-		arena_dalloc_bin_locked_finish(
-		    tsdn, cur_arena, cur_bin, &dalloc_bin_info);
-		malloc_mutex_unlock(tsdn, &cur_bin->lock);
-
-		arena_decay_ticks(
-		    tsdn, cur_arena, flush_start - prev_flush_start);
-	}
-
-	/* Handle all deferred slab dalloc. */
-	for (unsigned i = 0; i < dalloc_count; i++) {
-		edata_t *slab = dalloc_slabs[i];
-		arena_slab_dalloc(tsdn, arena_get_from_edata(slab), slab);
-	}
-
-	if (config_stats && !merged_stats) {
-		/*
-		 * The flush loop didn't happen to flush to this
-		 * thread's arena, so the stats didn't get merged.
-		 * Manually do so now.
-		 */
-		bin_t *bin = arena_bin_choose(tsdn, tcache_arena, binind, NULL);
-		malloc_mutex_lock(tsdn, &bin->lock);
-		bin->stats.nflushes++;
-		bin->stats.nrequests += cache_bin->tstats.nrequests;
-		cache_bin->tstats.nrequests = 0;
-		malloc_mutex_unlock(tsdn, &bin->lock);
-	}
-}
-
-JEMALLOC_ALWAYS_INLINE void
-tcache_bin_flush_impl_large(tsd_t *tsd, tcache_t *tcache,
-    cache_bin_t *cache_bin, szind_t binind, cache_bin_ptr_array_t *ptrs,
-    unsigned nflush) {
-	tcache_slow_t *tcache_slow = tcache->tcache_slow;
-	/*
-	 * A couple lookup calls take tsdn; declare it once for convenience
-	 * instead of calling tsd_tsdn(tsd) all the time.
-	 */
-	tsdn_t *tsdn = tsd_tsdn(tsd);
-
-	assert(binind < tcache_nbins_get(tcache_slow));
-	arena_t *tcache_arena = tcache_slow->arena;
-	assert(tcache_arena != NULL);
-
-	/*
-	 * Variable length array must have > 0 length; the last element is never
-	 * touched (it's just included to satisfy the no-zero-length rule).
-	 */
-	VARIABLE_ARRAY(emap_batch_lookup_result_t, item_edata, nflush + 1);
-	tcache_bin_flush_edatas_lookup(tsd, ptrs, binind, nflush, item_edata);
-
-	/*
-	 * We're about to grab a bunch of locks.  If one of them happens to be
-	 * the one guarding the arena-level stats counters we flush our
-	 * thread-local ones to, we do so under one critical section.
-	 */
-	bool merged_stats = false;
-	while (nflush > 0) {
-		/* Lock the arena, or bin, associated with the first object. */
-		edata_t *edata = item_edata[0].edata;
-		unsigned cur_arena_ind = edata_arena_ind_get(edata);
-		arena_t *cur_arena = arena_get(tsdn, cur_arena_ind, false);
-
-		if (!arena_is_auto(cur_arena)) {
-			malloc_mutex_lock(tsdn, &cur_arena->large_mtx);
-		}
-
-		/*
-		 * If we acquired the right lock and have some stats to flush,
-		 * flush them.
-		 */
-		if (config_stats && tcache_arena == cur_arena
-		    && !merged_stats) {
-			merged_stats = true;
-			arena_stats_large_flush_nrequests_add(tsdn,
-			    &tcache_arena->stats, binind,
-			    cache_bin->tstats.nrequests);
-			cache_bin->tstats.nrequests = 0;
-		}
-
-		/*
-		 * Large allocations need special prep done.  Afterwards, we can
-		 * drop the large lock.
-		 */
-		for (unsigned i = 0; i < nflush; i++) {
-			void *ptr = ptrs->ptr[i];
-			edata = item_edata[i].edata;
-			assert(ptr != NULL && edata != NULL);
-
-			if (edata_arena_ind_get(edata) == cur_arena_ind) {
-				large_dalloc_prep_locked(tsdn, edata);
-			}
-		}
-		if (!arena_is_auto(cur_arena)) {
-			malloc_mutex_unlock(tsdn, &cur_arena->large_mtx);
-		}
-
-		/* Deallocate whatever we can. */
-		unsigned ndeferred = 0;
-		for (unsigned i = 0; i < nflush; i++) {
-			void *ptr = ptrs->ptr[i];
-			edata = item_edata[i].edata;
-			assert(ptr != NULL && edata != NULL);
-			if (edata_arena_ind_get(edata) != cur_arena_ind) {
-				/*
-				 * The object was allocated either via a
-				 * different arena, or a different bin in this
-				 * arena.  Either way, stash the object so that
-				 * it can be handled in a future pass.
-				 */
-				ptrs->ptr[ndeferred] = ptr;
-				item_edata[ndeferred].edata = edata;
-				ndeferred++;
-				continue;
-			}
-			if (large_dalloc_safety_checks(
-			        edata, ptr, sz_index2size(binind))) {
-				/* See the comment in isfree. */
-				continue;
-			}
-			large_dalloc_finish(tsdn, edata);
-		}
-		arena_decay_ticks(tsdn, cur_arena, nflush - ndeferred);
-		nflush = ndeferred;
-	}
-
-	if (config_stats && !merged_stats) {
-		arena_stats_large_flush_nrequests_add(tsdn,
-		    &tcache_arena->stats, binind, cache_bin->tstats.nrequests);
-		cache_bin->tstats.nrequests = 0;
-	}
-}
-
-JEMALLOC_ALWAYS_INLINE void
-tcache_bin_flush_impl(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
-    szind_t binind, cache_bin_ptr_array_t *ptrs, unsigned nflush, bool small) {
-	assert(ptrs != NULL && ptrs->ptr != NULL);
-	unsigned              nflush_batch, nflushed = 0;
-	cache_bin_ptr_array_t ptrs_batch;
-	do {
-		nflush_batch = nflush - nflushed;
-		if (nflush_batch > CACHE_BIN_NFLUSH_BATCH_MAX) {
-			nflush_batch = CACHE_BIN_NFLUSH_BATCH_MAX;
-		}
-		assert(nflush_batch <= CACHE_BIN_NFLUSH_BATCH_MAX);
-		(&ptrs_batch)->n = (cache_bin_sz_t)nflush_batch;
-		(&ptrs_batch)->ptr = ptrs->ptr + nflushed;
-		/*
-		 * The small/large flush logic is very similar; you might conclude that
-		 * it's a good opportunity to share code.  We've tried this, and by and
-		 * large found this to obscure more than it helps; there are so many
-		 * fiddly bits around things like stats handling, precisely when and
-		 * which mutexes are acquired, etc., that almost all code ends up being
-		 * gated behind 'if (small) { ... } else { ... }'.  Even though the
-		 * '...' is morally equivalent, the code itself needs slight tweaks.
-		 */
-		if (small) {
-			tcache_bin_flush_impl_small(tsd, tcache, cache_bin,
-			    binind, &ptrs_batch, nflush_batch);
-		} else {
-			tcache_bin_flush_impl_large(tsd, tcache, cache_bin,
-			    binind, &ptrs_batch, nflush_batch);
-		}
-		nflushed += nflush_batch;
-	} while (nflushed < nflush);
-	assert(nflush == nflushed);
-	assert((ptrs->ptr + nflush) == ((&ptrs_batch)->ptr + nflush_batch));
-}
-
 JEMALLOC_ALWAYS_INLINE void
 tcache_bin_flush_bottom(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
     szind_t binind, unsigned rem, bool small) {
@@ -1001,8 +655,8 @@ tcache_bin_flush_bottom(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	CACHE_BIN_PTR_ARRAY_DECLARE(ptrs, nflush);
 	cache_bin_init_ptr_array_for_flush(cache_bin, &ptrs, nflush);
 
-	tcache_bin_flush_impl(
-	    tsd, tcache, cache_bin, binind, &ptrs, nflush, small);
+	arena_ptr_array_flush(tsd, binind, &ptrs, nflush, small,
+	    tcache->tcache_slow->arena, cache_bin->tstats);
 
 	cache_bin_finish_flush(cache_bin, &ptrs, nflush);
 }
@@ -1054,8 +708,8 @@ tcache_bin_flush_stashed(tsd_t *tsd, tcache_t *tcache, cache_bin_t *cache_bin,
 	cache_bin_init_ptr_array_for_stashed(
 	    cache_bin, binind, &ptrs, nstashed);
 	san_check_stashed_ptrs(ptrs.ptr, nstashed, sz_index2size(binind));
-	tcache_bin_flush_impl(
-	    tsd, tcache, cache_bin, binind, &ptrs, nstashed, is_small);
+	arena_ptr_array_flush(tsd, binind, &ptrs, nstashed, is_small,
+	    tcache->tcache_slow->arena, cache_bin->tstats);
 	cache_bin_finish_flush_stashed(cache_bin);
 
 	assert(cache_bin_nstashed_get_local(cache_bin) == 0);

From 47aeff1d08806deb4ea8f91535f5470d7de89915 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Wed, 19 Nov 2025 14:25:58 -0800
Subject: [PATCH 2558/2608] Add experimental_enforce_hugify

---
 .../internal/jemalloc_internal_externs.h      |  1 +
 src/ctl.c                                     | 61 +++++++-------
 src/hpa.c                                     | 10 ++-
 src/jemalloc.c                                |  6 +-
 src/stats.c                                   |  3 +-
 test/unit/hpa.c                               | 83 ++++++++++++++++++-
 test/unit/mallctl.c                           | 17 ++--
 7 files changed, 133 insertions(+), 48 deletions(-)

diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index a319dc81..ea739ea8 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -16,6 +16,7 @@ extern bool             opt_abort;
 extern bool             opt_abort_conf;
 extern bool             opt_trust_madvise;
 extern bool             opt_experimental_hpa_start_huge_if_thp_always;
+extern bool             opt_experimental_hpa_enforce_hugify;
 extern bool             opt_confirm_conf;
 extern bool             opt_hpa;
 extern hpa_shard_opts_t opt_hpa_opts;
diff --git a/src/ctl.c b/src/ctl.c
index d3443a13..3e65e23f 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -99,6 +99,7 @@ CTL_PROTO(opt_cache_oblivious)
 CTL_PROTO(opt_debug_double_free_max_scan)
 CTL_PROTO(opt_trust_madvise)
 CTL_PROTO(opt_experimental_hpa_start_huge_if_thp_always)
+CTL_PROTO(opt_experimental_hpa_enforce_hugify)
 CTL_PROTO(opt_confirm_conf)
 CTL_PROTO(opt_hpa)
 CTL_PROTO(opt_hpa_slab_max_alloc)
@@ -467,6 +468,8 @@ static const ctl_named_node_t opt_node[] = {{NAME("abort"), CTL(opt_abort)},
     {NAME("trust_madvise"), CTL(opt_trust_madvise)},
     {NAME("experimental_hpa_start_huge_if_thp_always"),
         CTL(opt_experimental_hpa_start_huge_if_thp_always)},
+    {NAME("experimental_hpa_enforce_hugify"),
+        CTL(opt_experimental_hpa_enforce_hugify)},
     {NAME("confirm_conf"), CTL(opt_confirm_conf)}, {NAME("hpa"), CTL(opt_hpa)},
     {NAME("hpa_slab_max_alloc"), CTL(opt_hpa_slab_max_alloc)},
     {NAME("hpa_hugification_threshold"), CTL(opt_hpa_hugification_threshold)},
@@ -1108,30 +1111,30 @@ ctl_arena_stats_sdmerge(
 		}
 
 		ctl_accum_locked_u64(&sdstats->astats.pa_shard_stats.pac_stats
-		                          .decay_dirty.npurge,
+		                         .decay_dirty.npurge,
 		    &astats->astats.pa_shard_stats.pac_stats.decay_dirty
-		         .npurge);
+		        .npurge);
 		ctl_accum_locked_u64(&sdstats->astats.pa_shard_stats.pac_stats
-		                          .decay_dirty.nmadvise,
+		                         .decay_dirty.nmadvise,
 		    &astats->astats.pa_shard_stats.pac_stats.decay_dirty
-		         .nmadvise);
+		        .nmadvise);
 		ctl_accum_locked_u64(&sdstats->astats.pa_shard_stats.pac_stats
-		                          .decay_dirty.purged,
+		                         .decay_dirty.purged,
 		    &astats->astats.pa_shard_stats.pac_stats.decay_dirty
-		         .purged);
+		        .purged);
 
 		ctl_accum_locked_u64(&sdstats->astats.pa_shard_stats.pac_stats
-		                          .decay_muzzy.npurge,
+		                         .decay_muzzy.npurge,
 		    &astats->astats.pa_shard_stats.pac_stats.decay_muzzy
-		         .npurge);
+		        .npurge);
 		ctl_accum_locked_u64(&sdstats->astats.pa_shard_stats.pac_stats
-		                          .decay_muzzy.nmadvise,
+		                         .decay_muzzy.nmadvise,
 		    &astats->astats.pa_shard_stats.pac_stats.decay_muzzy
-		         .nmadvise);
+		        .nmadvise);
 		ctl_accum_locked_u64(&sdstats->astats.pa_shard_stats.pac_stats
-		                          .decay_muzzy.purged,
+		                         .decay_muzzy.purged,
 		    &astats->astats.pa_shard_stats.pac_stats.decay_muzzy
-		         .purged);
+		        .purged);
 
 #define OP(mtx)                                                                \
 	malloc_mutex_prof_merge(                                               \
@@ -1390,7 +1393,7 @@ ctl_refresh(tsdn_t *tsdn) {
 			    background_thread_lock);
 		} else {
 			memset(&ctl_stats->mutex_prof_data
-			            [global_prof_mutex_background_thread],
+			           [global_prof_mutex_background_thread],
 			    0, sizeof(mutex_prof_data_t));
 		}
 		/* We own ctl mutex already. */
@@ -2136,6 +2139,8 @@ CTL_RO_NL_GEN(
 CTL_RO_NL_GEN(opt_trust_madvise, opt_trust_madvise, bool)
 CTL_RO_NL_GEN(opt_experimental_hpa_start_huge_if_thp_always,
     opt_experimental_hpa_start_huge_if_thp_always, bool)
+CTL_RO_NL_GEN(opt_experimental_hpa_enforce_hugify,
+    opt_experimental_hpa_enforce_hugify, bool)
 CTL_RO_NL_GEN(opt_confirm_conf, opt_confirm_conf, bool)
 
 /* HPA options. */
@@ -3770,35 +3775,29 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_extent_avail,
     arenas_i(mib[2])->astats->astats.pa_shard_stats.edata_avail, size_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_npurge,
-    locked_read_u64_unsynchronized(
-        &arenas_i(mib[2])
-             ->astats->astats.pa_shard_stats.pac_stats.decay_dirty.npurge),
+    locked_read_u64_unsynchronized(&arenas_i(mib[2])
+            ->astats->astats.pa_shard_stats.pac_stats.decay_dirty.npurge),
     uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_nmadvise,
-    locked_read_u64_unsynchronized(
-        &arenas_i(mib[2])
-             ->astats->astats.pa_shard_stats.pac_stats.decay_dirty.nmadvise),
+    locked_read_u64_unsynchronized(&arenas_i(mib[2])
+            ->astats->astats.pa_shard_stats.pac_stats.decay_dirty.nmadvise),
     uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_purged,
-    locked_read_u64_unsynchronized(
-        &arenas_i(mib[2])
-             ->astats->astats.pa_shard_stats.pac_stats.decay_dirty.purged),
+    locked_read_u64_unsynchronized(&arenas_i(mib[2])
+            ->astats->astats.pa_shard_stats.pac_stats.decay_dirty.purged),
     uint64_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_npurge,
-    locked_read_u64_unsynchronized(
-        &arenas_i(mib[2])
-             ->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.npurge),
+    locked_read_u64_unsynchronized(&arenas_i(mib[2])
+            ->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.npurge),
     uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_nmadvise,
-    locked_read_u64_unsynchronized(
-        &arenas_i(mib[2])
-             ->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.nmadvise),
+    locked_read_u64_unsynchronized(&arenas_i(mib[2])
+            ->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.nmadvise),
     uint64_t)
 CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_purged,
-    locked_read_u64_unsynchronized(
-        &arenas_i(mib[2])
-             ->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.purged),
+    locked_read_u64_unsynchronized(&arenas_i(mib[2])
+            ->astats->astats.pa_shard_stats.pac_stats.decay_muzzy.purged),
     uint64_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_base,
diff --git a/src/hpa.c b/src/hpa.c
index 3687e6ea..f6d46b25 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -29,6 +29,7 @@ static uint64_t hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self);
 const char *const hpa_hugify_style_names[] = {"auto", "none", "eager", "lazy"};
 
 bool opt_experimental_hpa_start_huge_if_thp_always = true;
+bool opt_experimental_hpa_enforce_hugify = false;
 
 bool
 hpa_hugepage_size_exceeds_limit(void) {
@@ -430,7 +431,7 @@ hpa_update_purge_hugify_eligibility(
 		/* Assume it is huge without the need to madvise */
 		hpa_assume_huge(tsdn, shard, ps);
 	}
-	if (hpa_is_hugify_lazy(shard)
+	if ((hpa_is_hugify_lazy(shard) || opt_experimental_hpa_enforce_hugify)
 	    && hpa_good_hugification_candidate(shard, ps)
 	    && !hpdata_huge_get(ps)) {
 		nstime_t now;
@@ -538,8 +539,9 @@ hpa_purge_actual_unlocked(
 
 static inline bool
 hpa_needs_dehugify(hpa_shard_t *shard, const hpdata_t *ps) {
-	return hpa_is_hugify_lazy(shard) && hpdata_huge_get(ps)
-	    && !hpdata_empty(ps);
+	return (hpa_is_hugify_lazy(shard)
+	           || opt_experimental_hpa_enforce_hugify)
+	    && hpdata_huge_get(ps) && !hpdata_empty(ps);
 }
 
 /* Prepare purge of one page. Return number of dirty regular pages on it
@@ -736,7 +738,7 @@ hpa_try_hugify(tsdn_t *tsdn, hpa_shard_t *shard) {
 	 * what user believes is the truth on the target system, but we won't
 	 * update nhugifies stat as system call is not being made.
 	 */
-	if (hpa_is_hugify_lazy(shard)) {
+	if (hpa_is_hugify_lazy(shard) || opt_experimental_hpa_enforce_hugify) {
 		malloc_mutex_unlock(tsdn, &shard->mtx);
 		bool err = shard->central->hooks.hugify(
 		    hpdata_addr_get(to_hugify), HUGEPAGE,
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 0f6ff0c3..6844da5a 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1305,6 +1305,8 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			CONF_HANDLE_BOOL(
 			    opt_experimental_hpa_start_huge_if_thp_always,
 			    "experimental_hpa_start_huge_if_thp_always")
+			CONF_HANDLE_BOOL(opt_experimental_hpa_enforce_hugify,
+			    "experimental_hpa_enforce_hugify")
 			CONF_HANDLE_BOOL(
 			    opt_huge_arena_pac_thp, "huge_arena_pac_thp")
 			if (strncmp("metadata_thp", k, klen) == 0) {
@@ -1554,7 +1556,7 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			if (strncmp("percpu_arena", k, klen) == 0) {
 				bool match = false;
 				for (int m = percpu_arena_mode_names_base;
-				     m < percpu_arena_mode_names_limit; m++) {
+				    m < percpu_arena_mode_names_limit; m++) {
 					if (strncmp(percpu_arena_mode_names[m],
 					        v, vlen)
 					    == 0) {
@@ -1651,7 +1653,7 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			if (strncmp("hpa_hugify_style", k, klen) == 0) {
 				bool match = false;
 				for (int m = 0; m < hpa_hugify_style_limit;
-				     m++) {
+				    m++) {
 					if (strncmp(hpa_hugify_style_names[m],
 					        v, vlen)
 					    == 0) {
diff --git a/src/stats.c b/src/stats.c
index 4e04336e..2ccac6c9 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1115,7 +1115,7 @@ stats_arena_mutexes_print(
 	CTL_LEAF_PREPARE(stats_arenas_mib, 3, "mutexes");
 
 	for (mutex_prof_arena_ind_t i = 0; i < mutex_prof_num_arena_mutexes;
-	     i++) {
+	    i++) {
 		const char *name = arena_mutex_names[i];
 		emitter_json_object_kv_begin(emitter, name);
 		mutex_stats_read_arena(
@@ -1605,6 +1605,7 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_BOOL("cache_oblivious")
 	OPT_WRITE_BOOL("confirm_conf")
 	OPT_WRITE_BOOL("experimental_hpa_start_huge_if_thp_always")
+	OPT_WRITE_BOOL("experimental_hpa_enforce_hugify")
 	OPT_WRITE_BOOL("retain")
 	OPT_WRITE_CHAR_P("dss")
 	OPT_WRITE_UNSIGNED("narenas")
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 0398e21a..5937601e 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -286,7 +286,7 @@ TEST_BEGIN(test_stress) {
 
 	size_t ntreenodes = 0;
 	for (mem_contents_t *contents = mem_tree_first(&tree); contents != NULL;
-	     contents = mem_tree_next(&tree, contents)) {
+	    contents = mem_tree_next(&tree, contents)) {
 		ntreenodes++;
 		node_check(&tree, contents);
 	}
@@ -1441,6 +1441,84 @@ TEST_BEGIN(test_hpa_hugify_style_none_huge_no_syscall) {
 }
 TEST_END
 
+TEST_BEGIN(test_experimental_hpa_enforce_hugify) {
+	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch != 0)
+	    || !config_stats);
+
+	bool old_opt_value = opt_experimental_hpa_enforce_hugify;
+	opt_experimental_hpa_enforce_hugify = true;
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	/* Use eager so hugify would normally not be made on threshold */
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.hugify_style = hpa_hugify_style_eager;
+	opts.deferral_allowed = true;
+	opts.hugify_delay_ms = 0;
+	opts.min_purge_interval_ms = 0;
+	opts.hugification_threshold = 0.9 * HUGEPAGE;
+
+	ndefer_hugify_calls = 0;
+	ndefer_dehugify_calls = 0;
+	ndefer_purge_calls = 0;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+	bool         deferred_work_generated = false;
+	nstime_init2(&defer_curtime, 100, 0);
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	enum { NALLOCS = HUGEPAGE_PAGES * 95 / 100 };
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+
+	ndefer_hugify_calls = 0;
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(ndefer_hugify_calls, 0, "Page was already huge");
+
+	ndefer_hugify_calls = 0;
+	ndefer_dehugify_calls = 0;
+	ndefer_purge_calls = 0;
+
+	/* Deallocate half to trigger purge */
+	for (int i = 0; i < NALLOCS / 2; i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+
+	hpa_shard_do_deferred_work(tsdn, shard);
+	/*
+	 * Enforce hugify should have triggered dehugify syscall during purge
+	 * when the page is huge and not empty.
+	 */
+	expect_zu_ge(ndefer_dehugify_calls, 1,
+	    "Should have triggered dehugify syscall with eager style");
+
+	for (int i = 0; i < NALLOCS / 2; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	ndefer_hugify_calls = 0;
+	hpa_shard_do_deferred_work(tsdn, shard);
+	expect_zu_eq(ndefer_hugify_calls, 1, "");
+
+	opt_experimental_hpa_enforce_hugify = old_opt_value;
+	destroy_test_data(shard);
+}
+TEST_END
+
 int
 main(void) {
 	/*
@@ -1464,5 +1542,6 @@ main(void) {
 	    test_assume_huge_purge_fully, test_eager_with_purge_threshold,
 	    test_delay_when_not_allowed_deferral, test_deferred_until_time,
 	    test_eager_no_hugify_on_threshold,
-	    test_hpa_hugify_style_none_huge_no_syscall);
+	    test_hpa_hugify_style_none_huge_no_syscall,
+	    test_experimental_hpa_enforce_hugify);
 }
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 2415fda1..f409f687 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -302,6 +302,7 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, trust_madvise, always);
 	TEST_MALLCTL_OPT(
 	    bool, experimental_hpa_start_huge_if_thp_always, always);
+	TEST_MALLCTL_OPT(bool, experimental_hpa_enforce_hugify, always);
 	TEST_MALLCTL_OPT(bool, confirm_conf, always);
 	TEST_MALLCTL_OPT(const char *, metadata_thp, always);
 	TEST_MALLCTL_OPT(bool, retain, always);
@@ -616,8 +617,8 @@ TEST_BEGIN(test_arena_i_dirty_decay_ms) {
 	    0, "Unexpected mallctl() failure");
 
 	for (prev_dirty_decay_ms = dirty_decay_ms, dirty_decay_ms = -1;
-	     dirty_decay_ms < 20;
-	     prev_dirty_decay_ms = dirty_decay_ms, dirty_decay_ms++) {
+	    dirty_decay_ms < 20;
+	    prev_dirty_decay_ms = dirty_decay_ms, dirty_decay_ms++) {
 		ssize_t old_dirty_decay_ms;
 
 		expect_d_eq(mallctl("arena.0.dirty_decay_ms",
@@ -649,8 +650,8 @@ TEST_BEGIN(test_arena_i_muzzy_decay_ms) {
 	    0, "Unexpected mallctl() failure");
 
 	for (prev_muzzy_decay_ms = muzzy_decay_ms, muzzy_decay_ms = -1;
-	     muzzy_decay_ms < 20;
-	     prev_muzzy_decay_ms = muzzy_decay_ms, muzzy_decay_ms++) {
+	    muzzy_decay_ms < 20;
+	    prev_muzzy_decay_ms = muzzy_decay_ms, muzzy_decay_ms++) {
 		ssize_t old_muzzy_decay_ms;
 
 		expect_d_eq(mallctl("arena.0.muzzy_decay_ms",
@@ -869,8 +870,8 @@ TEST_BEGIN(test_arenas_dirty_decay_ms) {
 	    0, "Expected mallctl() failure");
 
 	for (prev_dirty_decay_ms = dirty_decay_ms, dirty_decay_ms = -1;
-	     dirty_decay_ms < 20;
-	     prev_dirty_decay_ms = dirty_decay_ms, dirty_decay_ms++) {
+	    dirty_decay_ms < 20;
+	    prev_dirty_decay_ms = dirty_decay_ms, dirty_decay_ms++) {
 		ssize_t old_dirty_decay_ms;
 
 		expect_d_eq(mallctl("arenas.dirty_decay_ms",
@@ -902,8 +903,8 @@ TEST_BEGIN(test_arenas_muzzy_decay_ms) {
 	    0, "Expected mallctl() failure");
 
 	for (prev_muzzy_decay_ms = muzzy_decay_ms, muzzy_decay_ms = -1;
-	     muzzy_decay_ms < 20;
-	     prev_muzzy_decay_ms = muzzy_decay_ms, muzzy_decay_ms++) {
+	    muzzy_decay_ms < 20;
+	    prev_muzzy_decay_ms = muzzy_decay_ms, muzzy_decay_ms++) {
 		ssize_t old_muzzy_decay_ms;
 
 		expect_d_eq(mallctl("arenas.muzzy_decay_ms",

From 355774270dc41a66e38565b4c5573fd53a8c090f Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Tue, 30 Sep 2025 08:36:19 -0700
Subject: [PATCH 2559/2608] [EASY] Encapsulate better, do not pass hpa_shard
 when hooks are enough, move shard independent actions to hpa_utils

---
 Makefile.in                                   |  1 +
 include/jemalloc/internal/hpa_utils.h         | 74 ++++++++++++----
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |  3 +-
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |  6 ++
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |  3 +-
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |  6 ++
 .../projects/vc2019/jemalloc/jemalloc.vcxproj |  3 +-
 .../vc2019/jemalloc/jemalloc.vcxproj.filters  |  6 ++
 .../projects/vc2022/jemalloc/jemalloc.vcxproj |  3 +-
 .../vc2022/jemalloc/jemalloc.vcxproj.filters  |  6 ++
 src/hpa.c                                     | 87 +------------------
 src/hpa_utils.c                               | 33 +++++++
 test/unit/hpa_vectorized_madvise.c            | 71 +--------------
 .../unit/hpa_vectorized_madvise_large_batch.c | 71 ++++++++++++++-
 14 files changed, 198 insertions(+), 175 deletions(-)
 create mode 100644 src/hpa_utils.c

diff --git a/Makefile.in b/Makefile.in
index c63e6f8f..4dd4ce85 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -124,6 +124,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/hook.c \
 	$(srcroot)src/hpa.c \
 	$(srcroot)src/hpa_hooks.c \
+	$(srcroot)src/hpa_utils.c \
 	$(srcroot)src/hpdata.c \
 	$(srcroot)src/inspect.c \
 	$(srcroot)src/large.c \
diff --git a/include/jemalloc/internal/hpa_utils.h b/include/jemalloc/internal/hpa_utils.h
index 53bcb670..6b006cff 100644
--- a/include/jemalloc/internal/hpa_utils.h
+++ b/include/jemalloc/internal/hpa_utils.h
@@ -2,8 +2,20 @@
 #define JEMALLOC_INTERNAL_HPA_UTILS_H
 
 #include "jemalloc/internal/hpa.h"
+#include "jemalloc/internal/extent.h"
 
 #define HPA_MIN_VAR_VEC_SIZE 8
+/*
+ * This is used for jemalloc internal tuning and may change in the future based
+ * on production traffic.
+ *
+ * This value protects two things:
+ *    1. Stack size
+ *    2. Number of huge pages that are being purged in a batch as we do not
+ *       allow allocations while making madvise syscall.
+ */
+#define HPA_PURGE_BATCH_MAX 16
+
 #ifdef JEMALLOC_HAVE_PROCESS_MADVISE
 typedef struct iovec hpa_io_vector_t;
 #else
@@ -13,27 +25,35 @@ typedef struct {
 } hpa_io_vector_t;
 #endif
 
+static inline size_t
+hpa_process_madvise_max_iovec_len(void) {
+	assert(
+	    opt_process_madvise_max_batch <= PROCESS_MADVISE_MAX_BATCH_LIMIT);
+	return opt_process_madvise_max_batch == 0
+	    ? HPA_MIN_VAR_VEC_SIZE
+	    : opt_process_madvise_max_batch;
+}
+
 /* Actually invoke hooks. If we fail vectorized, use single purges */
 static void
 hpa_try_vectorized_purge(
-    hpa_shard_t *shard, hpa_io_vector_t *vec, size_t vlen, size_t nbytes) {
+    hpa_hooks_t *hooks, hpa_io_vector_t *vec, size_t vlen, size_t nbytes) {
 	bool success = opt_process_madvise_max_batch > 0
-	    && !shard->central->hooks.vectorized_purge(vec, vlen, nbytes);
+	    && !hooks->vectorized_purge(vec, vlen, nbytes);
 	if (!success) {
 		/* On failure, it is safe to purge again (potential perf
-         * penalty) If kernel can tell exactly which regions
-         * failed, we could avoid that penalty.
-         */
+		 * penalty) If kernel can tell exactly which regions
+		 * failed, we could avoid that penalty.
+		 */
 		for (size_t i = 0; i < vlen; ++i) {
-			shard->central->hooks.purge(
-			    vec[i].iov_base, vec[i].iov_len);
+			hooks->purge(vec[i].iov_base, vec[i].iov_len);
 		}
 	}
 }
 
 /*
- * This struct accumulates the regions for process_madvise.
- * It invokes the hook when batch limit is reached
+ * This structure accumulates the regions for process_madvise. It invokes the
+ * hook when batch limit is reached.
  */
 typedef struct {
 	hpa_io_vector_t *vp;
@@ -51,16 +71,16 @@ hpa_range_accum_init(hpa_range_accum_t *ra, hpa_io_vector_t *v, size_t sz) {
 }
 
 static inline void
-hpa_range_accum_flush(hpa_range_accum_t *ra, hpa_shard_t *shard) {
+hpa_range_accum_flush(hpa_range_accum_t *ra, hpa_hooks_t *hooks) {
 	assert(ra->total_bytes > 0 && ra->cur > 0);
-	hpa_try_vectorized_purge(shard, ra->vp, ra->cur, ra->total_bytes);
+	hpa_try_vectorized_purge(hooks, ra->vp, ra->cur, ra->total_bytes);
 	ra->cur = 0;
 	ra->total_bytes = 0;
 }
 
 static inline void
 hpa_range_accum_add(
-    hpa_range_accum_t *ra, void *addr, size_t sz, hpa_shard_t *shard) {
+    hpa_range_accum_t *ra, void *addr, size_t sz, hpa_hooks_t *hooks) {
 	assert(ra->cur < ra->capacity);
 
 	ra->vp[ra->cur].iov_base = addr;
@@ -69,14 +89,14 @@ hpa_range_accum_add(
 	ra->cur++;
 
 	if (ra->cur == ra->capacity) {
-		hpa_range_accum_flush(ra, shard);
+		hpa_range_accum_flush(ra, hooks);
 	}
 }
 
 static inline void
-hpa_range_accum_finish(hpa_range_accum_t *ra, hpa_shard_t *shard) {
+hpa_range_accum_finish(hpa_range_accum_t *ra, hpa_hooks_t *hooks) {
 	if (ra->cur > 0) {
-		hpa_range_accum_flush(ra, shard);
+		hpa_range_accum_flush(ra, hooks);
 	}
 }
 
@@ -114,4 +134,28 @@ struct hpa_purge_batch_s {
 	size_t npurged_hp_total;
 };
 
+static inline bool
+hpa_batch_full(hpa_purge_batch_t *b) {
+	/* It's okay for ranges to go above */
+	return b->npurged_hp_total == b->max_hp
+	    || b->item_cnt == b->items_capacity
+	    || b->nranges >= b->range_watermark;
+}
+
+static inline void
+hpa_batch_pass_start(hpa_purge_batch_t *b) {
+	b->item_cnt = 0;
+	b->nranges = 0;
+	b->ndirty_in_batch = 0;
+}
+
+static inline bool
+hpa_batch_empty(hpa_purge_batch_t *b) {
+	return b->item_cnt == 0;
+}
+
+/* Purge pages in a batch using given hooks */
+void hpa_purge_batch(
+    hpa_hooks_t *hooks, hpa_purge_item_t *batch, size_t batch_sz);
+
 #endif /* JEMALLOC_INTERNAL_HPA_UTILS_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index fff77a4b..abdeb7b7 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -62,6 +62,7 @@
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
+    <ClCompile Include="..\..\..\..\src\hpa_utils.c" />
     <ClCompile Include="..\..\..\..\src\hpdata.c" />
     <ClCompile Include="..\..\..\..\src\inspect.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
@@ -380,4 +381,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index c8236a12..7ce66945 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -70,6 +70,9 @@
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa_utils.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hpdata.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -163,6 +166,9 @@
     <ClCompile Include="..\..\..\..\src\thread_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\thread_event_registry.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\ticker.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 53d4af8d..1f39cb91 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -62,6 +62,7 @@
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
+    <ClCompile Include="..\..\..\..\src\hpa_utils.c" />
     <ClCompile Include="..\..\..\..\src\hpdata.c" />
     <ClCompile Include="..\..\..\..\src\inspect.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
@@ -379,4 +380,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index c8236a12..7ce66945 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -70,6 +70,9 @@
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa_utils.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hpdata.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -163,6 +166,9 @@
     <ClCompile Include="..\..\..\..\src\thread_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\thread_event_registry.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\ticker.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
index 10514d35..0b1e1707 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
@@ -62,6 +62,7 @@
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
+    <ClCompile Include="..\..\..\..\src\hpa_utils.c" />
     <ClCompile Include="..\..\..\..\src\hpdata.c" />
     <ClCompile Include="..\..\..\..\src\inspect.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
@@ -379,4 +380,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
index c8236a12..7ce66945 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
@@ -70,6 +70,9 @@
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa_utils.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hpdata.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -163,6 +166,9 @@
     <ClCompile Include="..\..\..\..\src\thread_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\thread_event_registry.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\ticker.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
index cda827be..54462516 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
@@ -62,6 +62,7 @@
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
+    <ClCompile Include="..\..\..\..\src\hpa_utils.c" />
     <ClCompile Include="..\..\..\..\src\hpdata.c" />
     <ClCompile Include="..\..\..\..\src\inspect.c" />
     <ClCompile Include="..\..\..\..\src\jemalloc.c" />
@@ -379,4 +380,4 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
index c8236a12..7ce66945 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
@@ -70,6 +70,9 @@
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa_utils.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hpdata.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -163,6 +166,9 @@
     <ClCompile Include="..\..\..\..\src\thread_event.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\thread_event_registry.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\ticker.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/hpa.c b/src/hpa.c
index f6d46b25..5e3727a1 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -473,70 +473,6 @@ hpa_shard_has_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
 	return to_hugify != NULL || hpa_should_purge(tsdn, shard);
 }
 
-/*
- * This is used for jemalloc internal tuning and may change in the
- * future based on production traffic.
- *
- * This value protects two things:
- *    1. Stack size
- *    2. Number of huge pages that are being purged in a batch as
- *       we do not allow allocations while making madvise syscall.
- */
-#define HPA_PURGE_BATCH_MAX_DEFAULT 16
-
-#ifndef JEMALLOC_JET
-#	define HPA_PURGE_BATCH_MAX HPA_PURGE_BATCH_MAX_DEFAULT
-#else
-size_t hpa_purge_max_batch_size_for_test = HPA_PURGE_BATCH_MAX_DEFAULT;
-size_t
-hpa_purge_max_batch_size_for_test_set(size_t new_size) {
-	size_t old_size = hpa_purge_max_batch_size_for_test;
-	hpa_purge_max_batch_size_for_test = new_size;
-	return old_size;
-}
-#	define HPA_PURGE_BATCH_MAX hpa_purge_max_batch_size_for_test
-#endif
-
-static inline size_t
-hpa_process_madvise_max_iovec_len(void) {
-	assert(
-	    opt_process_madvise_max_batch <= PROCESS_MADVISE_MAX_BATCH_LIMIT);
-	return opt_process_madvise_max_batch == 0
-	    ? HPA_MIN_VAR_VEC_SIZE
-	    : opt_process_madvise_max_batch;
-}
-
-static inline void
-hpa_purge_actual_unlocked(
-    hpa_shard_t *shard, hpa_purge_item_t *batch, size_t batch_sz) {
-	assert(batch_sz > 0);
-
-	size_t len = hpa_process_madvise_max_iovec_len();
-	VARIABLE_ARRAY(hpa_io_vector_t, vec, len);
-
-	hpa_range_accum_t accum;
-	hpa_range_accum_init(&accum, vec, len);
-
-	for (size_t i = 0; i < batch_sz; ++i) {
-		/* Actually do the purging, now that the lock is dropped. */
-		if (batch[i].dehugify) {
-			shard->central->hooks.dehugify(
-			    hpdata_addr_get(batch[i].hp), HUGEPAGE);
-		}
-		void  *purge_addr;
-		size_t purge_size;
-		size_t total_purged_on_one_hp = 0;
-		while (hpdata_purge_next(
-		    batch[i].hp, &batch[i].state, &purge_addr, &purge_size)) {
-			total_purged_on_one_hp += purge_size;
-			assert(total_purged_on_one_hp <= HUGEPAGE);
-			hpa_range_accum_add(
-			    &accum, purge_addr, purge_size, shard);
-		}
-	}
-	hpa_range_accum_finish(&accum, shard);
-}
-
 static inline bool
 hpa_needs_dehugify(hpa_shard_t *shard, const hpdata_t *ps) {
 	return (hpa_is_hugify_lazy(shard)
@@ -624,26 +560,6 @@ hpa_purge_finish_hp(
 	psset_update_end(&shard->psset, hp_item->hp);
 }
 
-static inline bool
-hpa_batch_full(hpa_purge_batch_t *b) {
-	/* It's okay for ranges to go above */
-	return b->npurged_hp_total == b->max_hp
-	    || b->item_cnt == b->items_capacity
-	    || b->nranges >= b->range_watermark;
-}
-
-static inline void
-hpa_batch_pass_start(hpa_purge_batch_t *b) {
-	b->item_cnt = 0;
-	b->nranges = 0;
-	b->ndirty_in_batch = 0;
-}
-
-static inline bool
-hpa_batch_empty(hpa_purge_batch_t *b) {
-	return b->item_cnt == 0;
-}
-
 /* Returns number of huge pages purged. */
 static inline size_t
 hpa_purge(tsdn_t *tsdn, hpa_shard_t *shard, size_t max_hp) {
@@ -679,8 +595,9 @@ hpa_purge(tsdn_t *tsdn, hpa_shard_t *shard, size_t max_hp) {
 		if (hpa_batch_empty(&batch)) {
 			break;
 		}
+		hpa_hooks_t *hooks = &shard->central->hooks;
 		malloc_mutex_unlock(tsdn, &shard->mtx);
-		hpa_purge_actual_unlocked(shard, batch.items, batch.item_cnt);
+		hpa_purge_batch(hooks, batch.items, batch.item_cnt);
 		malloc_mutex_lock(tsdn, &shard->mtx);
 
 		/* The shard updates */
diff --git a/src/hpa_utils.c b/src/hpa_utils.c
new file mode 100644
index 00000000..59bb0d1f
--- /dev/null
+++ b/src/hpa_utils.c
@@ -0,0 +1,33 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/hpa_utils.h"
+
+void
+hpa_purge_batch(hpa_hooks_t *hooks, hpa_purge_item_t *batch, size_t batch_sz) {
+	assert(batch_sz > 0);
+
+	size_t len = hpa_process_madvise_max_iovec_len();
+	VARIABLE_ARRAY(hpa_io_vector_t, vec, len);
+
+	hpa_range_accum_t accum;
+	hpa_range_accum_init(&accum, vec, len);
+
+	for (size_t i = 0; i < batch_sz; ++i) {
+		/* Actually do the purging, now that the lock is dropped. */
+		if (batch[i].dehugify) {
+			hooks->dehugify(hpdata_addr_get(batch[i].hp), HUGEPAGE);
+		}
+		void  *purge_addr;
+		size_t purge_size;
+		size_t total_purged_on_one_hp = 0;
+		while (hpdata_purge_next(
+		    batch[i].hp, &batch[i].state, &purge_addr, &purge_size)) {
+			total_purged_on_one_hp += purge_size;
+			assert(total_purged_on_one_hp <= HUGEPAGE);
+			hpa_range_accum_add(
+			    &accum, purge_addr, purge_size, hooks);
+		}
+	}
+	hpa_range_accum_finish(&accum, hooks);
+}
diff --git a/test/unit/hpa_vectorized_madvise.c b/test/unit/hpa_vectorized_madvise.c
index c66811e1..e82f0ffb 100644
--- a/test/unit/hpa_vectorized_madvise.c
+++ b/test/unit/hpa_vectorized_madvise.c
@@ -253,77 +253,8 @@ TEST_BEGIN(test_more_regions_purged_from_one_page) {
 }
 TEST_END
 
-size_t hpa_purge_max_batch_size_for_test_set(size_t new_size);
-TEST_BEGIN(test_more_pages_than_batch_page_size) {
-	test_skip_if(!hpa_supported() || (opt_process_madvise_max_batch == 0)
-	    || HUGEPAGE_PAGES <= 4);
-
-	size_t old_page_batch = hpa_purge_max_batch_size_for_test_set(1);
-
-	hpa_hooks_t hooks;
-	hooks.map = &defer_test_map;
-	hooks.unmap = &defer_test_unmap;
-	hooks.purge = &defer_test_purge;
-	hooks.hugify = &defer_test_hugify;
-	hooks.dehugify = &defer_test_dehugify;
-	hooks.curtime = &defer_test_curtime;
-	hooks.ms_since = &defer_test_ms_since;
-	hooks.vectorized_purge = &defer_vectorized_purge;
-
-	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
-	opts.deferral_allowed = true;
-	opts.min_purge_interval_ms = 0;
-	ndefer_vec_purge_calls = 0;
-	ndefer_purge_calls = 0;
-
-	hpa_shard_t *shard = create_test_data(&hooks, &opts);
-
-	bool deferred_work_generated = false;
-
-	nstime_init(&defer_curtime, 0);
-	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
-
-	enum { NALLOCS = 8 * HUGEPAGE_PAGES };
-	edata_t *edatas[NALLOCS];
-	for (int i = 0; i < NALLOCS; i++) {
-		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
-		    false, false, &deferred_work_generated);
-		expect_ptr_not_null(edatas[i], "Unexpected null edata");
-	}
-	for (int i = 0; i < 3 * (int)HUGEPAGE_PAGES; i++) {
-		pai_dalloc(
-		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
-	}
-
-	hpa_shard_do_deferred_work(tsdn, shard);
-
-	/*
-	 * Strict minimum purge interval is not set, we should purge as long as
-	 * we have dirty pages.
-	 */
-	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
-	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
-
-	/* We have page batch size = 1.
-	 * we have 5 * HP active pages, 3 * HP dirty pages
-	 * To achieve the balance of 25% max dirty we need to
-	 * purge 2 pages. Since batch is 1 that must be 2 calls
-	 * no matter what opt_process_madvise_max_batch is
-	 */
-	size_t nexpected = 2;
-	expect_zu_eq(nexpected, ndefer_vec_purge_calls, "Expect purge");
-	expect_zu_eq(0, ndefer_purge_calls, "Expect no non-vec purge");
-	ndefer_vec_purge_calls = 0;
-
-	hpa_purge_max_batch_size_for_test_set(old_page_batch);
-
-	destroy_test_data(shard);
-}
-TEST_END
-
 int
 main(void) {
 	return test_no_reentrancy(test_vectorized_failure_fallback,
-	    test_more_regions_purged_from_one_page,
-	    test_more_pages_than_batch_page_size);
+	    test_more_regions_purged_from_one_page);
 }
diff --git a/test/unit/hpa_vectorized_madvise_large_batch.c b/test/unit/hpa_vectorized_madvise_large_batch.c
index 8e7be7c0..d542f72a 100644
--- a/test/unit/hpa_vectorized_madvise_large_batch.c
+++ b/test/unit/hpa_vectorized_madvise_large_batch.c
@@ -1,6 +1,7 @@
 #include "test/jemalloc_test.h"
 
 #include "jemalloc/internal/hpa.h"
+#include "jemalloc/internal/hpa_utils.h"
 #include "jemalloc/internal/nstime.h"
 
 #define SHARD_IND 111
@@ -195,7 +196,75 @@ TEST_BEGIN(test_vectorized_purge) {
 }
 TEST_END
 
+TEST_BEGIN(test_purge_more_than_one_batch_pages) {
+	test_skip_if(!hpa_supported()
+	    || (opt_process_madvise_max_batch < HPA_PURGE_BATCH_MAX)
+	    || HUGEPAGE_PAGES <= 4);
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts_default;
+	opts.deferral_allowed = true;
+	opts.min_purge_interval_ms = 0;
+	opts.dirty_mult = FXP_INIT_PERCENT(1);
+	ndefer_vec_purge_calls = 0;
+	ndefer_purge_calls = 0;
+	ndefer_hugify_calls = 0;
+	ndefer_dehugify_calls = 0;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts);
+
+	bool deferred_work_generated = false;
+
+	nstime_init(&defer_curtime, 0);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+
+	enum { NALLOCS = HPA_PURGE_BATCH_MAX * 3 * HUGEPAGE_PAGES };
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	for (int i = 0; i < HPA_PURGE_BATCH_MAX * 2 * (int)HUGEPAGE_PAGES;
+	    i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+
+	hpa_shard_do_deferred_work(tsdn, shard);
+
+	/*
+	 * Strict minimum purge interval is not set, we should purge as long as
+	 * we have dirty pages.
+	 */
+	expect_zu_eq(0, ndefer_hugify_calls, "Hugified too early");
+	expect_zu_eq(0, ndefer_dehugify_calls, "Dehugified too early");
+
+	/* We have page batch size = HPA_PURGE_BATCH_MAX.  We have
+	 * HPA_PURGE_BATCH_MAX active pages, 2 * HPA_PURGE_BATCH_MAX dirty.
+	 * To achieve the balance of 1% max dirty we need to purge more than one
+	 * batch.
+	 */
+	size_t nexpected = 2;
+	expect_zu_eq(nexpected, ndefer_vec_purge_calls, "Expect purge");
+	expect_zu_eq(0, ndefer_purge_calls, "Expect no non-vec purge");
+	ndefer_vec_purge_calls = 0;
+
+	destroy_test_data(shard);
+}
+TEST_END
+
 int
 main(void) {
-	return test_no_reentrancy(test_vectorized_purge);
+	return test_no_reentrancy(
+	    test_vectorized_purge, test_purge_more_than_one_batch_pages);
 }

From 8a06b086f3b514764c1924451ec453a67444470b Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Tue, 30 Sep 2025 09:37:09 -0700
Subject: [PATCH 2560/2608] [EASY] Extract hpa_central component from hpa
 source file

---
 Makefile.in                                   |   1 +
 include/jemalloc/internal/hpa.h               |  27 +---
 include/jemalloc/internal/hpa_central.h       |  41 ++++++
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |   5 +-
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |   5 +-
 .../projects/vc2019/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2019/jemalloc/jemalloc.vcxproj.filters  |   5 +-
 .../projects/vc2022/jemalloc/jemalloc.vcxproj |   1 +
 .../vc2022/jemalloc/jemalloc.vcxproj.filters  |   5 +-
 src/hpa.c                                     | 115 -----------------
 src/hpa_central.c                             | 121 ++++++++++++++++++
 13 files changed, 184 insertions(+), 145 deletions(-)
 create mode 100644 include/jemalloc/internal/hpa_central.h
 create mode 100644 src/hpa_central.c

diff --git a/Makefile.in b/Makefile.in
index 4dd4ce85..7365a923 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -123,6 +123,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/san_bump.c \
 	$(srcroot)src/hook.c \
 	$(srcroot)src/hpa.c \
+	$(srcroot)src/hpa_central.c \
 	$(srcroot)src/hpa_hooks.c \
 	$(srcroot)src/hpa_utils.c \
 	$(srcroot)src/hpdata.c \
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 131bbb90..06567740 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -6,36 +6,13 @@
 #include "jemalloc/internal/edata_cache.h"
 #include "jemalloc/internal/emap.h"
 #include "jemalloc/internal/exp_grow.h"
+#include "jemalloc/internal/hpa_central.h"
 #include "jemalloc/internal/hpa_hooks.h"
 #include "jemalloc/internal/hpa_opts.h"
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/pai.h"
 #include "jemalloc/internal/psset.h"
 
-typedef struct hpa_central_s hpa_central_t;
-struct hpa_central_s {
-	/*
-	 * Guards expansion of eden.  We separate this from the regular mutex so
-	 * that cheaper operations can still continue while we're doing the OS
-	 * call.
-	 */
-	malloc_mutex_t grow_mtx;
-	/*
-	 * Either NULL (if empty), or some integer multiple of a
-	 * hugepage-aligned number of hugepages.  We carve them off one at a
-	 * time to satisfy new pageslab requests.
-	 *
-	 * Guarded by grow_mtx.
-	 */
-	void  *eden;
-	size_t eden_len;
-	/* Source for metadata. */
-	base_t *base;
-
-	/* The HPA hooks. */
-	hpa_hooks_t hooks;
-};
-
 typedef struct hpa_shard_nonderived_stats_s hpa_shard_nonderived_stats_t;
 struct hpa_shard_nonderived_stats_s {
 	/*
@@ -165,8 +142,6 @@ bool hpa_hugepage_size_exceeds_limit(void);
  * just that it can function properly given the system it's running on.
  */
 bool hpa_supported(void);
-bool hpa_central_init(
-    hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks);
 bool hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
     base_t *base, edata_cache_t *edata_cache, unsigned ind,
     const hpa_shard_opts_t *opts);
diff --git a/include/jemalloc/internal/hpa_central.h b/include/jemalloc/internal/hpa_central.h
new file mode 100644
index 00000000..3e0ff7da
--- /dev/null
+++ b/include/jemalloc/internal/hpa_central.h
@@ -0,0 +1,41 @@
+#ifndef JEMALLOC_INTERNAL_HPA_CENTRAL_H
+#define JEMALLOC_INTERNAL_HPA_CENTRAL_H
+
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/base.h"
+#include "jemalloc/internal/hpa_hooks.h"
+#include "jemalloc/internal/hpdata.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/tsd_types.h"
+
+typedef struct hpa_central_s hpa_central_t;
+struct hpa_central_s {
+	/*
+	 * Guards expansion of eden.  We separate this from the regular mutex so
+	 * that cheaper operations can still continue while we're doing the OS
+	 * call.
+	 */
+	malloc_mutex_t grow_mtx;
+	/*
+	 * Either NULL (if empty), or some integer multiple of a
+	 * hugepage-aligned number of hugepages.  We carve them off one at a
+	 * time to satisfy new pageslab requests.
+	 *
+	 * Guarded by grow_mtx.
+	 */
+	void  *eden;
+	size_t eden_len;
+	/* Source for metadata. */
+	base_t *base;
+
+	/* The HPA hooks. */
+	hpa_hooks_t hooks;
+};
+
+bool hpa_central_init(
+    hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks);
+
+hpdata_t *hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
+    uint64_t age, bool hugify_eager, bool *oom);
+
+#endif /* JEMALLOC_INTERNAL_HPA_CENTRAL_H */
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index abdeb7b7..bfb62d78 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -61,6 +61,7 @@
     <ClCompile Include="..\..\..\..\src\fxp.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
+    <ClCompile Include="..\..\..\..\src\hpa_central.c" />
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
     <ClCompile Include="..\..\..\..\src\hpa_utils.c" />
     <ClCompile Include="..\..\..\..\src\hpdata.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 7ce66945..26408c8e 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -67,6 +67,9 @@
     <ClCompile Include="..\..\..\..\src\hpa.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa_central.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -206,4 +209,4 @@
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 1f39cb91..037eb724 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -61,6 +61,7 @@
     <ClCompile Include="..\..\..\..\src\fxp.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
+    <ClCompile Include="..\..\..\..\src\hpa_central.c" />
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
     <ClCompile Include="..\..\..\..\src\hpa_utils.c" />
     <ClCompile Include="..\..\..\..\src\hpdata.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 7ce66945..26408c8e 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -67,6 +67,9 @@
     <ClCompile Include="..\..\..\..\src\hpa.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa_central.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -206,4 +209,4 @@
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
index 0b1e1707..bd6595b1 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
@@ -61,6 +61,7 @@
     <ClCompile Include="..\..\..\..\src\fxp.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
+    <ClCompile Include="..\..\..\..\src\hpa_central.c" />
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
     <ClCompile Include="..\..\..\..\src\hpa_utils.c" />
     <ClCompile Include="..\..\..\..\src\hpdata.c" />
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
index 7ce66945..26408c8e 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
@@ -67,6 +67,9 @@
     <ClCompile Include="..\..\..\..\src\hpa.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa_central.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -206,4 +209,4 @@
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
index 54462516..3f880176 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
@@ -61,6 +61,7 @@
     <ClCompile Include="..\..\..\..\src\fxp.c" />
     <ClCompile Include="..\..\..\..\src\hook.c" />
     <ClCompile Include="..\..\..\..\src\hpa.c" />
+    <ClCompile Include="..\..\..\..\src\hpa_central.c" />
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c" />
     <ClCompile Include="..\..\..\..\src\hpa_utils.c" />
     <ClCompile Include="..\..\..\..\src\hpdata.c" />
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
index 7ce66945..26408c8e 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
@@ -67,6 +67,9 @@
     <ClCompile Include="..\..\..\..\src\hpa.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\..\..\src\hpa_central.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
     <ClCompile Include="..\..\..\..\src\hpa_hooks.c">
       <Filter>Source Files</Filter>
     </ClCompile>
@@ -206,4 +209,4 @@
       <Filter>Source Files</Filter>
     </ClCompile>
   </ItemGroup>
-</Project>
\ No newline at end of file
+</Project>
diff --git a/src/hpa.c b/src/hpa.c
index 5e3727a1..cc330379 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -8,8 +8,6 @@
 #include "jemalloc/internal/witness.h"
 #include "jemalloc/internal/jemalloc_probe.h"
 
-#define HPA_EDEN_SIZE (128 * HUGEPAGE)
-
 static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t alignment, bool zero, bool guarded, bool frequent_reuse,
     bool *deferred_work_generated);
@@ -75,119 +73,6 @@ hpa_do_consistency_checks(hpa_shard_t *shard) {
 	assert(shard->base != NULL);
 }
 
-bool
-hpa_central_init(
-    hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks) {
-	/* malloc_conf processing should have filtered out these cases. */
-	assert(hpa_supported());
-	bool err;
-	err = malloc_mutex_init(&central->grow_mtx, "hpa_central_grow",
-	    WITNESS_RANK_HPA_CENTRAL_GROW, malloc_mutex_rank_exclusive);
-	if (err) {
-		return true;
-	}
-
-	central->base = base;
-	central->eden = NULL;
-	central->eden_len = 0;
-	central->hooks = *hooks;
-	return false;
-}
-
-static hpdata_t *
-hpa_alloc_ps(tsdn_t *tsdn, hpa_central_t *central) {
-	return (hpdata_t *)base_alloc(
-	    tsdn, central->base, sizeof(hpdata_t), CACHELINE);
-}
-
-static hpdata_t *
-hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
-    uint64_t age, bool hugify_eager, bool *oom) {
-	/* Don't yet support big allocations; these should get filtered out. */
-	assert(size <= HUGEPAGE);
-	/*
-	 * Should only try to extract from the central allocator if the local
-	 * shard is exhausted.  We should hold the grow_mtx on that shard.
-	 */
-	witness_assert_positive_depth_to_rank(
-	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_HPA_SHARD_GROW);
-
-	malloc_mutex_lock(tsdn, &central->grow_mtx);
-	*oom = false;
-
-	hpdata_t *ps = NULL;
-	bool      start_as_huge = hugify_eager
-	    || (init_system_thp_mode == system_thp_mode_always
-	        && opt_experimental_hpa_start_huge_if_thp_always);
-
-	/* Is eden a perfect fit? */
-	if (central->eden != NULL && central->eden_len == HUGEPAGE) {
-		ps = hpa_alloc_ps(tsdn, central);
-		if (ps == NULL) {
-			*oom = true;
-			malloc_mutex_unlock(tsdn, &central->grow_mtx);
-			return NULL;
-		}
-		hpdata_init(ps, central->eden, age, start_as_huge);
-		central->eden = NULL;
-		central->eden_len = 0;
-		malloc_mutex_unlock(tsdn, &central->grow_mtx);
-		return ps;
-	}
-
-	/*
-	 * We're about to try to allocate from eden by splitting.  If eden is
-	 * NULL, we have to allocate it too.  Otherwise, we just have to
-	 * allocate an edata_t for the new psset.
-	 */
-	if (central->eden == NULL) {
-		/* Allocate address space, bailing if we fail. */
-		void *new_eden = central->hooks.map(HPA_EDEN_SIZE);
-		if (new_eden == NULL) {
-			*oom = true;
-			malloc_mutex_unlock(tsdn, &central->grow_mtx);
-			return NULL;
-		}
-		if (hugify_eager) {
-			central->hooks.hugify(
-			    new_eden, HPA_EDEN_SIZE, /* sync */ false);
-		}
-		ps = hpa_alloc_ps(tsdn, central);
-		if (ps == NULL) {
-			central->hooks.unmap(new_eden, HPA_EDEN_SIZE);
-			*oom = true;
-			malloc_mutex_unlock(tsdn, &central->grow_mtx);
-			return NULL;
-		}
-		central->eden = new_eden;
-		central->eden_len = HPA_EDEN_SIZE;
-	} else {
-		/* Eden is already nonempty; only need an edata for ps. */
-		ps = hpa_alloc_ps(tsdn, central);
-		if (ps == NULL) {
-			*oom = true;
-			malloc_mutex_unlock(tsdn, &central->grow_mtx);
-			return NULL;
-		}
-	}
-	assert(ps != NULL);
-	assert(central->eden != NULL);
-	assert(central->eden_len > HUGEPAGE);
-	assert(central->eden_len % HUGEPAGE == 0);
-	assert(HUGEPAGE_ADDR2BASE(central->eden) == central->eden);
-
-	hpdata_init(ps, central->eden, age, start_as_huge);
-
-	char *eden_char = (char *)central->eden;
-	eden_char += HUGEPAGE;
-	central->eden = (void *)eden_char;
-	central->eden_len -= HUGEPAGE;
-
-	malloc_mutex_unlock(tsdn, &central->grow_mtx);
-
-	return ps;
-}
-
 bool
 hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
     base_t *base, edata_cache_t *edata_cache, unsigned ind,
diff --git a/src/hpa_central.c b/src/hpa_central.c
new file mode 100644
index 00000000..b4f770c2
--- /dev/null
+++ b/src/hpa_central.c
@@ -0,0 +1,121 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/hpa_central.h"
+#include "jemalloc/internal/tsd.h"
+#include "jemalloc/internal/witness.h"
+
+#define HPA_EDEN_SIZE (128 * HUGEPAGE)
+
+bool
+hpa_central_init(
+    hpa_central_t *central, base_t *base, const hpa_hooks_t *hooks) {
+	/* malloc_conf processing should have filtered out these cases. */
+	assert(hpa_supported());
+	bool err;
+	err = malloc_mutex_init(&central->grow_mtx, "hpa_central_grow",
+	    WITNESS_RANK_HPA_CENTRAL_GROW, malloc_mutex_rank_exclusive);
+	if (err) {
+		return true;
+	}
+
+	central->base = base;
+	central->eden = NULL;
+	central->eden_len = 0;
+	central->hooks = *hooks;
+	return false;
+}
+
+static hpdata_t *
+hpa_alloc_ps(tsdn_t *tsdn, hpa_central_t *central) {
+	return (hpdata_t *)base_alloc(
+	    tsdn, central->base, sizeof(hpdata_t), CACHELINE);
+}
+
+hpdata_t *
+hpa_central_extract(tsdn_t *tsdn, hpa_central_t *central, size_t size,
+    uint64_t age, bool hugify_eager, bool *oom) {
+	/* Don't yet support big allocations; these should get filtered out. */
+	assert(size <= HUGEPAGE);
+	/*
+	 * Should only try to extract from the central allocator if the local
+	 * shard is exhausted.  We should hold the grow_mtx on that shard.
+	 */
+	witness_assert_positive_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_HPA_SHARD_GROW);
+
+	malloc_mutex_lock(tsdn, &central->grow_mtx);
+	*oom = false;
+
+	hpdata_t *ps = NULL;
+	bool      start_as_huge = hugify_eager
+	    || (init_system_thp_mode == system_thp_mode_always
+	        && opt_experimental_hpa_start_huge_if_thp_always);
+
+	/* Is eden a perfect fit? */
+	if (central->eden != NULL && central->eden_len == HUGEPAGE) {
+		ps = hpa_alloc_ps(tsdn, central);
+		if (ps == NULL) {
+			*oom = true;
+			malloc_mutex_unlock(tsdn, &central->grow_mtx);
+			return NULL;
+		}
+		hpdata_init(ps, central->eden, age, start_as_huge);
+		central->eden = NULL;
+		central->eden_len = 0;
+		malloc_mutex_unlock(tsdn, &central->grow_mtx);
+		return ps;
+	}
+
+	/*
+	 * We're about to try to allocate from eden by splitting.  If eden is
+	 * NULL, we have to allocate it too.  Otherwise, we just have to
+	 * allocate an edata_t for the new psset.
+	 */
+	if (central->eden == NULL) {
+		/* Allocate address space, bailing if we fail. */
+		void *new_eden = central->hooks.map(HPA_EDEN_SIZE);
+		if (new_eden == NULL) {
+			*oom = true;
+			malloc_mutex_unlock(tsdn, &central->grow_mtx);
+			return NULL;
+		}
+		if (hugify_eager) {
+			central->hooks.hugify(
+			    new_eden, HPA_EDEN_SIZE, /* sync */ false);
+		}
+		ps = hpa_alloc_ps(tsdn, central);
+		if (ps == NULL) {
+			central->hooks.unmap(new_eden, HPA_EDEN_SIZE);
+			*oom = true;
+			malloc_mutex_unlock(tsdn, &central->grow_mtx);
+			return NULL;
+		}
+		central->eden = new_eden;
+		central->eden_len = HPA_EDEN_SIZE;
+	} else {
+		/* Eden is already nonempty; only need an edata for ps. */
+		ps = hpa_alloc_ps(tsdn, central);
+		if (ps == NULL) {
+			*oom = true;
+			malloc_mutex_unlock(tsdn, &central->grow_mtx);
+			return NULL;
+		}
+	}
+	assert(ps != NULL);
+	assert(central->eden != NULL);
+	assert(central->eden_len > HUGEPAGE);
+	assert(central->eden_len % HUGEPAGE == 0);
+	assert(HUGEPAGE_ADDR2BASE(central->eden) == central->eden);
+
+	hpdata_init(ps, central->eden, age, start_as_huge);
+
+	char *eden_char = (char *)central->eden;
+	eden_char += HUGEPAGE;
+	central->eden = (void *)eden_char;
+	central->eden_len -= HUGEPAGE;
+
+	malloc_mutex_unlock(tsdn, &central->grow_mtx);
+
+	return ps;
+}

From 0988583d7cd67cb9a5327c5e326b56d63f89cf16 Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@uh.edu>
Date: Tue, 25 Nov 2025 16:26:49 -0800
Subject: [PATCH 2561/2608] Add a mallctl for users to get an approximate of
 active bytes.

---
 src/ctl.c         |  45 ++++++++++++++++++++
 test/unit/stats.c | 106 +++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 150 insertions(+), 1 deletion(-)

diff --git a/src/ctl.c b/src/ctl.c
index 3e65e23f..553c58ad 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -353,6 +353,7 @@ CTL_PROTO(stats_resident)
 CTL_PROTO(stats_mapped)
 CTL_PROTO(stats_retained)
 CTL_PROTO(stats_zero_reallocs)
+CTL_PROTO(approximate_stats_active)
 CTL_PROTO(experimental_hooks_install)
 CTL_PROTO(experimental_hooks_remove)
 CTL_PROTO(experimental_hooks_prof_backtrace)
@@ -853,6 +854,10 @@ static const ctl_named_node_t stats_mutexes_node[] = {
     {NAME("reset"), CTL(stats_mutexes_reset)}};
 #undef MUTEX_PROF_DATA_NODE
 
+static const ctl_named_node_t approximate_stats_node[] = {
+    {NAME("active"), CTL(approximate_stats_active)},
+};
+
 static const ctl_named_node_t stats_node[] = {
     {NAME("allocated"), CTL(stats_allocated)},
     {NAME("active"), CTL(stats_active)},
@@ -920,6 +925,7 @@ static const ctl_named_node_t root_node[] = {{NAME("version"), CTL(version)},
     {NAME("arena"), CHILD(indexed, arena)},
     {NAME("arenas"), CHILD(named, arenas)}, {NAME("prof"), CHILD(named, prof)},
     {NAME("stats"), CHILD(named, stats)},
+    {NAME("approximate_stats"), CHILD(named, approximate_stats)},
     {NAME("experimental"), CHILD(named, experimental)}};
 static const ctl_named_node_t super_root_node[] = {
     {NAME(""), CHILD(named, root)}};
@@ -3756,6 +3762,45 @@ CTL_RO_CGEN(config_stats, stats_background_thread_run_interval,
 CTL_RO_CGEN(config_stats, stats_zero_reallocs,
     atomic_load_zu(&zero_realloc_count, ATOMIC_RELAXED), size_t)
 
+/*
+ * approximate_stats.active returns a result that is informative itself,
+ * but the returned value SHOULD NOT be compared against other stats retrieved.
+ * For instance, approximate_stats.active should not be compared against
+ * any stats, e.g., stats.active or stats.resident, because there is no
+ * guarantee in the comparison results.  Results returned by stats.*, on the
+ * other hand, provides such guarantees, i.e., stats.active <= stats.resident,
+ * as long as epoch is called right before the queries.
+ */
+
+static int
+approximate_stats_active_ctl(tsd_t *tsd, const size_t *mib, size_t miblen,
+    void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
+	int    ret;
+	size_t approximate_nactive = 0;
+	size_t approximate_active_bytes = 0;
+
+	READONLY();
+
+	tsdn_t  *tsdn = tsd_tsdn(tsd);
+	unsigned n = narenas_total_get();
+
+	for (unsigned i = 0; i < n; i++) {
+		arena_t *arena = arena_get(tsdn, i, false);
+		if (!arena) {
+			continue;
+		}
+		/* Accumulate nactive pages from each arena's pa_shard */
+		approximate_nactive += pa_shard_nactive(&arena->pa_shard);
+	}
+
+	approximate_active_bytes = approximate_nactive << LG_PAGE;
+	READ(approximate_active_bytes, size_t);
+
+	ret = 0;
+label_return:
+	return ret;
+}
+
 CTL_RO_GEN(stats_arenas_i_dss, arenas_i(mib[2])->dss, const char *)
 CTL_RO_GEN(
     stats_arenas_i_dirty_decay_ms, arenas_i(mib[2])->dirty_decay_ms, ssize_t)
diff --git a/test/unit/stats.c b/test/unit/stats.c
index 26516fa8..d2719db2 100644
--- a/test/unit/stats.c
+++ b/test/unit/stats.c
@@ -1,5 +1,7 @@
 #include "test/jemalloc_test.h"
 
+#include "jemalloc/internal/arena_structs.h"
+
 #define STRINGIFY_HELPER(x) #x
 #define STRINGIFY(x) STRINGIFY_HELPER(x)
 
@@ -445,11 +447,113 @@ TEST_BEGIN(test_stats_tcache_bytes_large) {
 }
 TEST_END
 
+TEST_BEGIN(test_approximate_stats_active) {
+	/*
+	 * Test 1: create a manual arena that we exclusively control and use it
+	 * to verify the values returned by pa_shard_nactive() is accurate.
+	 * This also helps verify the correctness of approximate_stats.active
+	 * since it simply sums the pa_shard_nactive() of all arenas.
+	 */
+	tsdn_t  *tsdn = tsdn_fetch();
+	unsigned arena_ind;
+	size_t   sz = sizeof(unsigned);
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+	    0, "Arena creation failed");
+
+	arena_t *arena = arena_get(tsdn, arena_ind, false);
+	expect_ptr_not_null(arena, "Failed to get arena");
+
+	size_t nactive_initial = pa_shard_nactive(&arena->pa_shard);
+
+	/*
+	 * Allocate a small size from this arena.  Use MALLOCX_TCACHE_NONE
+	 * to bypass tcache and ensure the allocation goes directly to the
+	 * arena's pa_shard.
+	 */
+	size_t small_alloc_size = 128;
+	void  *p_small = mallocx(
+            small_alloc_size, MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
+	expect_ptr_not_null(p_small, "Unexpected mallocx() failure for small");
+
+	size_t nactive_after_small = pa_shard_nactive(&arena->pa_shard);
+	/*
+	 * For small allocations, jemalloc allocates a slab.  The slab size can
+	 * be looked up via bin_infos[szind].slab_size.  The assertion allows
+	 * for extra overhead from profiling, HPA, or sanitizer guard pages.
+	 */
+	size_t small_usize = nallocx(
+	    small_alloc_size, MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
+	szind_t small_szind = sz_size2index(small_usize);
+	size_t  expected_small_pages = bin_infos[small_szind].slab_size / PAGE;
+	expect_zu_ge(nactive_after_small - nactive_initial,
+	    expected_small_pages,
+	    "nactive increase should be at least the slab size in pages");
+
+	/*
+	 * Allocate a large size from this arena.
+	 */
+	size_t large_alloc_size = SC_LARGE_MINCLASS;
+	void  *p_large = mallocx(
+            large_alloc_size, MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
+	expect_ptr_not_null(p_large, "Unexpected mallocx() failure for large");
+
+	size_t nactive_after_large = pa_shard_nactive(&arena->pa_shard);
+	/*
+	 * For large allocations, the increase in pa_shard_nactive should be at
+	 * least the allocation size in pages with sz_large_pad considered.
+	 * The assertion allows for extra overhead from profiling, HPA, or
+	 * sanitizer guard pages.
+	 */
+	size_t large_usize = nallocx(
+	    large_alloc_size, MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE);
+	size_t expected_large_pages = (large_usize + sz_large_pad) / PAGE;
+	expect_zu_ge(nactive_after_large - nactive_after_small,
+	    expected_large_pages,
+	    "nactive increase should be at least the large allocation size in pages");
+
+	/*
+	 * Deallocate both allocations and verify nactive returns to the
+	 * original value.
+	 */
+	dallocx(p_small, MALLOCX_TCACHE_NONE);
+	dallocx(p_large, MALLOCX_TCACHE_NONE);
+
+	size_t nactive_final = pa_shard_nactive(&arena->pa_shard);
+	expect_zu_ge(nactive_final - nactive_after_large,
+	    expected_small_pages + expected_large_pages,
+	    "nactive should return to original value after deallocation");
+
+	/*
+	 * Test 2: allocate a large allocation in the auto arena and confirm
+	 * that approximate_stats.active increases.  Since there may be other
+	 * allocs/dallocs going on, cannot make more accurate assertions like
+	 * Test 1.
+	 */
+	size_t approximate_active_before = 0;
+	size_t approximate_active_after = 0;
+	sz = sizeof(size_t);
+	expect_d_eq(mallctl("approximate_stats.active",
+	                (void *)&approximate_active_before, &sz, NULL, 0),
+	    0, "Unexpected mallctl() result");
+
+	void *p0 = mallocx(4 * SC_SMALL_MAXCLASS, MALLOCX_TCACHE_NONE);
+	expect_ptr_not_null(p0, "Unexpected mallocx() failure");
+
+	expect_d_eq(mallctl("approximate_stats.active",
+	                (void *)&approximate_active_after, &sz, NULL, 0),
+	    0, "Unexpected mallctl() result");
+	expect_zu_gt(approximate_active_after, approximate_active_before,
+	    "approximate_stats.active should increase after the allocation");
+
+	free(p0);
+}
+TEST_END
+
 int
 main(void) {
 	return test_no_reentrancy(test_stats_summary, test_stats_large,
 	    test_stats_arenas_summary, test_stats_arenas_small,
 	    test_stats_arenas_large, test_stats_arenas_bins,
 	    test_stats_arenas_lextents, test_stats_tcache_bytes_small,
-	    test_stats_tcache_bytes_large);
+	    test_stats_tcache_bytes_large, test_approximate_stats_active);
 }

From 441e840df77b88c2fb32d07f56483097261c2f5c Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@meta.com>
Date: Fri, 5 Dec 2025 19:45:17 -0500
Subject: [PATCH 2562/2608] Add a script to generate github actions instead of
 Travis CI and Cirrus

---
 .github/workflows/freebsd-ci.yml |  66 +++
 .github/workflows/linux-ci.yml   | 695 +++++++++++++++++++++++++++++++
 .github/workflows/macos-ci.yml   | 212 ++++++++++
 .github/workflows/windows-ci.yml | 155 +++++++
 scripts/README_GH_ACTIONS.md     | 181 ++++++++
 scripts/gen_gh_actions.py        | 686 ++++++++++++++++++++++++++++++
 6 files changed, 1995 insertions(+)
 create mode 100644 .github/workflows/freebsd-ci.yml
 create mode 100644 .github/workflows/linux-ci.yml
 create mode 100644 .github/workflows/macos-ci.yml
 create mode 100644 .github/workflows/windows-ci.yml
 create mode 100644 scripts/README_GH_ACTIONS.md
 create mode 100755 scripts/gen_gh_actions.py

diff --git a/.github/workflows/freebsd-ci.yml b/.github/workflows/freebsd-ci.yml
new file mode 100644
index 00000000..6c702d88
--- /dev/null
+++ b/.github/workflows/freebsd-ci.yml
@@ -0,0 +1,66 @@
+# This config file is generated by ./scripts/gen_gh_actions.py.
+# Do not edit by hand.
+
+name: FreeBSD CI
+
+on:
+  push:
+    branches: [ dev, ci_travis ]
+  pull_request:
+    branches: [ dev ]
+
+jobs:
+  test-freebsd:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        debug: ['--enable-debug', '--disable-debug']
+        prof: ['--enable-prof', '--disable-prof']
+        arch: ['64-bit', '32-bit']
+        uncommon:
+          - ''
+          - '--with-lg-page=16 --with-malloc-conf=tcache:false'
+
+    name: FreeBSD (${{ matrix.arch }}, debug=${{ matrix.debug }}, prof=${{ matrix.prof }}${{ matrix.uncommon && ', uncommon' || '' }})
+
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: 1
+
+    - name: Test on FreeBSD
+      uses: vmactions/freebsd-vm@v1
+      with:
+        release: '15.0'
+        usesh: true
+        prepare: |
+          pkg install -y autoconf gmake
+        run: |
+          # Verify we're running in FreeBSD
+          echo "==== System Information ===="
+          uname -a
+          freebsd-version
+          echo "============================"
+
+          # Set compiler flags for 32-bit if needed
+          if [ "${{ matrix.arch }}" = "32-bit" ]; then
+            export CC="cc -m32"
+            export CXX="c++ -m32"
+          fi
+
+          # Generate configure script
+          autoconf
+
+          # Configure with matrix options
+          ./configure --with-jemalloc-prefix=ci_ ${{ matrix.debug }} ${{ matrix.prof }} ${{ matrix.uncommon }}
+
+          # Get CPU count for parallel builds
+          export JFLAG=$(sysctl -n kern.smp.cpus)
+
+          gmake -j${JFLAG}
+          gmake -j${JFLAG} tests
+          gmake check
+
+
+
diff --git a/.github/workflows/linux-ci.yml b/.github/workflows/linux-ci.yml
new file mode 100644
index 00000000..c5e0c9aa
--- /dev/null
+++ b/.github/workflows/linux-ci.yml
@@ -0,0 +1,695 @@
+# This config file is generated by ./scripts/gen_gh_actions.py.
+# Do not edit by hand.
+
+name: Linux CI
+
+on:
+  push:
+    branches: [ dev, ci_travis ]
+  pull_request:
+    branches: [ dev ]
+
+jobs:
+  test-linux:
+    runs-on: ubuntu-24.04
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - env:
+              CC: gcc
+              CXX: g++
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: clang
+              CXX: clang++
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --enable-debug
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --enable-prof
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --disable-stats
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --disable-libdl
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --enable-opt-safety-checks
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --with-lg-page=16
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --enable-prof-frameptr"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=dss:primary"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: clang
+              CXX: clang++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: clang
+              CXX: clang++
+              CONFIGURE_FLAGS: --enable-debug
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: clang
+              CXX: clang++
+              CONFIGURE_FLAGS: --enable-prof
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: clang
+              CXX: clang++
+              CONFIGURE_FLAGS: --disable-stats
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: clang
+              CXX: clang++
+              CONFIGURE_FLAGS: --disable-libdl
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: clang
+              CXX: clang++
+              CONFIGURE_FLAGS: --enable-opt-safety-checks
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: clang
+              CXX: clang++
+              CONFIGURE_FLAGS: --with-lg-page=16
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: clang
+              CXX: clang++
+              CONFIGURE_FLAGS: "--enable-prof --enable-prof-frameptr"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: clang
+              CXX: clang++
+              CONFIGURE_FLAGS: "--with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: clang
+              CXX: clang++
+              CONFIGURE_FLAGS: "--with-malloc-conf=dss:primary"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: clang
+              CXX: clang++
+              CONFIGURE_FLAGS: "--with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: clang
+              CXX: clang++
+              CONFIGURE_FLAGS: "--with-malloc-conf=background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              CONFIGURE_FLAGS: --enable-debug
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              CONFIGURE_FLAGS: --enable-prof
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              CONFIGURE_FLAGS: --disable-stats
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              CONFIGURE_FLAGS: --disable-libdl
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              CONFIGURE_FLAGS: --enable-opt-safety-checks
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              CONFIGURE_FLAGS: --with-lg-page=16
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              CONFIGURE_FLAGS: "--enable-prof --enable-prof-frameptr"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              CONFIGURE_FLAGS: "--with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              CONFIGURE_FLAGS: "--with-malloc-conf=dss:primary"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              CONFIGURE_FLAGS: "--with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              COMPILER_FLAGS: -m32
+              CONFIGURE_FLAGS: "--with-malloc-conf=background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-debug --enable-prof"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-debug --disable-stats"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-debug --disable-libdl"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-debug --enable-opt-safety-checks"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-debug --with-lg-page=16"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-debug --enable-prof --enable-prof-frameptr"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-debug --with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-debug --with-malloc-conf=dss:primary"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-debug --with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-debug --with-malloc-conf=background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --disable-stats"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --disable-libdl"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --enable-opt-safety-checks"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --with-lg-page=16"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --enable-prof --enable-prof-frameptr"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --with-malloc-conf=dss:primary"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --with-malloc-conf=background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-stats --disable-libdl"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-stats --enable-opt-safety-checks"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-stats --with-lg-page=16"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-stats --enable-prof --enable-prof-frameptr"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-stats --with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-stats --with-malloc-conf=dss:primary"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-stats --with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-stats --with-malloc-conf=background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-libdl --enable-opt-safety-checks"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-libdl --with-lg-page=16"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-libdl --enable-prof --enable-prof-frameptr"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-libdl --with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-libdl --with-malloc-conf=dss:primary"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-libdl --with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--disable-libdl --with-malloc-conf=background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-opt-safety-checks --with-lg-page=16"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-opt-safety-checks --enable-prof --enable-prof-frameptr"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-opt-safety-checks --with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-opt-safety-checks --with-malloc-conf=dss:primary"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-opt-safety-checks --with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-opt-safety-checks --with-malloc-conf=background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-lg-page=16 --enable-prof --enable-prof-frameptr"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-lg-page=16 --with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-lg-page=16 --with-malloc-conf=dss:primary"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-lg-page=16 --with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-lg-page=16 --with-malloc-conf=background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --enable-prof-frameptr --with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --enable-prof-frameptr --with-malloc-conf=dss:primary"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --enable-prof-frameptr --with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --enable-prof-frameptr --with-malloc-conf=background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=tcache:false,dss:primary"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=tcache:false,percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=tcache:false,background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=dss:primary,percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=dss:primary,background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=percpu_arena:percpu,background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-debug --enable-experimental-smallocx --enable-stats --enable-prof"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Show OS version
+      run: |
+        echo "=== System Information ==="
+        uname -a
+        echo ""
+        echo "=== Architecture ==="
+        uname -m
+        arch
+        echo ""
+        echo "=== OS Release ==="
+        cat /etc/os-release || true
+        echo ""
+        echo "=== CPU Info ==="
+        lscpu | grep -E "Architecture|CPU op-mode|Byte Order|CPU\(s\):" || true
+
+    - name: Install dependencies (32-bit)
+      if: matrix.env.CROSS_COMPILE_32BIT == 'yes'
+      run: |
+        sudo dpkg --add-architecture i386
+        sudo apt-get update
+        sudo apt-get install -y gcc-multilib g++-multilib libc6-dev-i386
+
+    - name: Build and test
+      env:
+        CC: ${{ matrix.env.CC }}
+        CXX: ${{ matrix.env.CXX }}
+        COMPILER_FLAGS: ${{ matrix.env.COMPILER_FLAGS }}
+        CONFIGURE_FLAGS: ${{ matrix.env.CONFIGURE_FLAGS }}
+        EXTRA_CFLAGS: ${{ matrix.env.EXTRA_CFLAGS }}
+      run: |
+        # Verify the script generates the same output
+        ./scripts/gen_gh_actions.py > gh_actions_script.yml
+
+        # Run autoconf
+        autoconf
+
+        # Configure with flags
+        if [ -n "$COMPILER_FLAGS" ]; then
+          ./configure CC="${CC} ${COMPILER_FLAGS}" CXX="${CXX} ${COMPILER_FLAGS}" $CONFIGURE_FLAGS
+        else
+          ./configure $CONFIGURE_FLAGS
+        fi
+
+        # Build
+        make -j3
+        make -j3 tests
+
+        # Run tests
+        make check
+
+
+  test-linux-arm64:
+    runs-on: ubuntu-24.04-arm
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - env:
+              CC: gcc
+              CXX: g++
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: clang
+              CXX: clang++
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --enable-debug
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --enable-prof
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --disable-stats
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --disable-libdl
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --enable-opt-safety-checks
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --with-lg-page=16
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-lg-page=16 --with-lg-hugepage=29"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--enable-prof --enable-prof-frameptr"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=dss:primary"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=background_thread:true"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds"
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Show OS version
+      run: |
+        echo "=== System Information ==="
+        uname -a
+        echo ""
+        echo "=== Architecture ==="
+        uname -m
+        arch
+        echo ""
+        echo "=== OS Release ==="
+        cat /etc/os-release || true
+        echo ""
+        echo "=== CPU Info ==="
+        lscpu | grep -E "Architecture|CPU op-mode|Byte Order|CPU\(s\):" || true
+
+    - name: Install dependencies (32-bit)
+      if: matrix.env.CROSS_COMPILE_32BIT == 'yes'
+      run: |
+        sudo dpkg --add-architecture i386
+        sudo apt-get update
+        sudo apt-get install -y gcc-multilib g++-multilib libc6-dev-i386
+
+    - name: Build and test
+      env:
+        CC: ${{ matrix.env.CC }}
+        CXX: ${{ matrix.env.CXX }}
+        COMPILER_FLAGS: ${{ matrix.env.COMPILER_FLAGS }}
+        CONFIGURE_FLAGS: ${{ matrix.env.CONFIGURE_FLAGS }}
+        EXTRA_CFLAGS: ${{ matrix.env.EXTRA_CFLAGS }}
+      run: |
+        # Verify the script generates the same output
+        ./scripts/gen_gh_actions.py > gh_actions_script.yml
+
+        # Run autoconf
+        autoconf
+
+        # Configure with flags
+        if [ -n "$COMPILER_FLAGS" ]; then
+          ./configure CC="${CC} ${COMPILER_FLAGS}" CXX="${CXX} ${COMPILER_FLAGS}" $CONFIGURE_FLAGS
+        else
+          ./configure $CONFIGURE_FLAGS
+        fi
+
+        # Build
+        make -j3
+        make -j3 tests
+
+        # Run tests
+        make check
+
+
+
diff --git a/.github/workflows/macos-ci.yml b/.github/workflows/macos-ci.yml
new file mode 100644
index 00000000..585551d0
--- /dev/null
+++ b/.github/workflows/macos-ci.yml
@@ -0,0 +1,212 @@
+# This config file is generated by ./scripts/gen_gh_actions.py.
+# Do not edit by hand.
+
+name: macOS CI
+
+on:
+  push:
+    branches: [ dev, ci_travis ]
+  pull_request:
+    branches: [ dev ]
+
+jobs:
+  test-macos:
+    runs-on: macos-15-intel
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - env:
+              CC: gcc
+              CXX: g++
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --enable-debug
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --disable-stats
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --disable-libdl
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --enable-opt-safety-checks
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --with-lg-page=16
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Show OS version
+      run: |
+        echo "=== macOS Version ==="
+        sw_vers
+        echo ""
+        echo "=== Architecture ==="
+        uname -m
+        arch
+        echo ""
+        echo "=== CPU Info ==="
+        sysctl -n machdep.cpu.brand_string
+        sysctl -n hw.machine
+
+    - name: Install dependencies
+      run: |
+        brew install autoconf
+
+    - name: Build and test
+      env:
+        CC: ${{ matrix.env.CC || 'gcc' }}
+        CXX: ${{ matrix.env.CXX || 'g++' }}
+        COMPILER_FLAGS: ${{ matrix.env.COMPILER_FLAGS }}
+        CONFIGURE_FLAGS: ${{ matrix.env.CONFIGURE_FLAGS }}
+        EXTRA_CFLAGS: ${{ matrix.env.EXTRA_CFLAGS }}
+      run: |
+        # Run autoconf
+        autoconf
+
+        # Configure with flags
+        if [ -n "$COMPILER_FLAGS" ]; then
+          ./configure CC="${CC} ${COMPILER_FLAGS}" CXX="${CXX} ${COMPILER_FLAGS}" $CONFIGURE_FLAGS
+        else
+          ./configure $CONFIGURE_FLAGS
+        fi
+
+        # Build
+        make -j3
+        make -j3 tests
+
+        # Run tests
+        make check
+
+
+  test-macos-arm64:
+    runs-on: macos-15
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - env:
+              CC: gcc
+              CXX: g++
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --enable-debug
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --disable-stats
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --disable-libdl
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --enable-opt-safety-checks
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --with-lg-page=16
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-lg-page=16 --with-lg-hugepage=29"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=tcache:false"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: "--with-malloc-conf=percpu_arena:percpu"
+              EXTRA_CFLAGS: "-Werror -Wno-array-bounds -Wno-unknown-warning-option -Wno-ignored-attributes -Wno-deprecated-declarations"
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Show OS version
+      run: |
+        echo "=== macOS Version ==="
+        sw_vers
+        echo ""
+        echo "=== Architecture ==="
+        uname -m
+        arch
+        echo ""
+        echo "=== CPU Info ==="
+        sysctl -n machdep.cpu.brand_string
+        sysctl -n hw.machine
+
+    - name: Install dependencies
+      run: |
+        brew install autoconf
+
+    - name: Build and test
+      env:
+        CC: ${{ matrix.env.CC || 'gcc' }}
+        CXX: ${{ matrix.env.CXX || 'g++' }}
+        COMPILER_FLAGS: ${{ matrix.env.COMPILER_FLAGS }}
+        CONFIGURE_FLAGS: ${{ matrix.env.CONFIGURE_FLAGS }}
+        EXTRA_CFLAGS: ${{ matrix.env.EXTRA_CFLAGS }}
+      run: |
+        # Run autoconf
+        autoconf
+
+        # Configure with flags
+        if [ -n "$COMPILER_FLAGS" ]; then
+          ./configure CC="${CC} ${COMPILER_FLAGS}" CXX="${CXX} ${COMPILER_FLAGS}" $CONFIGURE_FLAGS
+        else
+          ./configure $CONFIGURE_FLAGS
+        fi
+
+        # Build
+        make -j3
+        make -j3 tests
+
+        # Run tests
+        make check
+
+
+
diff --git a/.github/workflows/windows-ci.yml b/.github/workflows/windows-ci.yml
new file mode 100644
index 00000000..f40ba086
--- /dev/null
+++ b/.github/workflows/windows-ci.yml
@@ -0,0 +1,155 @@
+# This config file is generated by ./scripts/gen_gh_actions.py.
+# Do not edit by hand.
+
+name: Windows CI
+
+on:
+  push:
+    branches: [ dev, ci_travis ]
+  pull_request:
+    branches: [ dev ]
+
+jobs:
+  test-windows:
+    runs-on: windows-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - env:
+              CC: gcc
+              CXX: g++
+              EXTRA_CFLAGS: -fcommon
+          - env:
+              CC: gcc
+              CXX: g++
+              CONFIGURE_FLAGS: --enable-debug
+              EXTRA_CFLAGS: -fcommon
+          - env:
+              CC: cl.exe
+              CXX: cl.exe
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              EXTRA_CFLAGS: -fcommon
+          - env:
+              CC: cl.exe
+              CXX: cl.exe
+              CONFIGURE_FLAGS: --enable-debug
+          - env:
+              CC: gcc
+              CXX: g++
+              CROSS_COMPILE_32BIT: yes
+              CONFIGURE_FLAGS: --enable-debug
+              EXTRA_CFLAGS: -fcommon
+          - env:
+              CC: cl.exe
+              CXX: cl.exe
+              CROSS_COMPILE_32BIT: yes
+          - env:
+              CC: cl.exe
+              CXX: cl.exe
+              CROSS_COMPILE_32BIT: yes
+              CONFIGURE_FLAGS: --enable-debug
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Show OS version
+      shell: cmd
+      run: |
+        echo === Windows Version ===
+        systeminfo | findstr /B /C:"OS Name" /C:"OS Version"
+        ver
+        echo.
+        echo === Architecture ===
+        echo PROCESSOR_ARCHITECTURE=%PROCESSOR_ARCHITECTURE%
+        echo.
+
+    - name: Setup MSYS2
+      uses: msys2/setup-msys2@v2
+      with:
+        msystem: ${{ matrix.env.CROSS_COMPILE_32BIT == 'yes' && 'MINGW32' || 'MINGW64' }}
+        update: true
+        install: >-
+          autotools
+          git
+        pacboy: >-
+          make:p
+          gcc:p
+          binutils:p
+
+    - name: Build and test (MinGW-GCC)
+      if: matrix.env.CC != 'cl.exe'
+      shell: msys2 {0}
+      env:
+        CC: ${{ matrix.env.CC || 'gcc' }}
+        CXX: ${{ matrix.env.CXX || 'g++' }}
+        COMPILER_FLAGS: ${{ matrix.env.COMPILER_FLAGS }}
+        CONFIGURE_FLAGS: ${{ matrix.env.CONFIGURE_FLAGS }}
+        EXTRA_CFLAGS: ${{ matrix.env.EXTRA_CFLAGS }}
+      run: |
+        # Run autoconf
+        autoconf
+
+        # Configure with flags
+        if [ -n "$COMPILER_FLAGS" ]; then
+          ./configure CC="${CC} ${COMPILER_FLAGS}" CXX="${CXX} ${COMPILER_FLAGS}" $CONFIGURE_FLAGS
+        else
+          ./configure $CONFIGURE_FLAGS
+        fi
+
+        # Build (mingw32-make is the "make" command in MSYS2)
+        mingw32-make -j3
+        mingw32-make tests
+
+        # Run tests
+        mingw32-make -k check
+
+    - name: Setup MSVC environment
+      if: matrix.env.CC == 'cl.exe'
+      uses: ilammy/msvc-dev-cmd@v1
+      with:
+        arch: ${{ matrix.env.CROSS_COMPILE_32BIT == 'yes' && 'x86' || 'x64' }}
+
+    - name: Build and test (MSVC)
+      if: matrix.env.CC == 'cl.exe'
+      shell: msys2 {0}
+      env:
+        CONFIGURE_FLAGS: ${{ matrix.env.CONFIGURE_FLAGS }}
+        MSYS2_PATH_TYPE: inherit
+      run: |
+        # Export MSVC environment variables for configure
+        export CC=cl.exe
+        export CXX=cl.exe
+        export AR=lib.exe
+        export NM=dumpbin.exe
+        export RANLIB=:
+
+        # Verify cl.exe is accessible (should be in PATH via inherit)
+        if ! which cl.exe > /dev/null 2>&1; then
+          echo "cl.exe not found, trying to locate MSVC..."
+          # Find and add MSVC bin directory to PATH
+          MSVC_BIN=$(cmd.exe /c "echo %VCToolsInstallDir%" | tr -d '\\r' | sed 's/\\\\\\\\/\//g' | sed 's/C:/\\/c/g')
+          if [ -n "$MSVC_BIN" ]; then
+            export PATH="$PATH:$MSVC_BIN/bin/Hostx64/x64:$MSVC_BIN/bin/Hostx86/x86"
+          fi
+        fi
+
+        # Run autoconf
+        autoconf
+
+        # Configure with MSVC
+        ./configure CC=cl.exe CXX=cl.exe AR=lib.exe $CONFIGURE_FLAGS
+
+        # Build (mingw32-make is the "make" command in MSYS2)
+        mingw32-make -j3
+        # Build tests sequentially due to PDB file issues
+        mingw32-make tests
+
+        # Run tests
+        mingw32-make -k check
+
+
+
diff --git a/scripts/README_GH_ACTIONS.md b/scripts/README_GH_ACTIONS.md
new file mode 100644
index 00000000..1cb236ad
--- /dev/null
+++ b/scripts/README_GH_ACTIONS.md
@@ -0,0 +1,181 @@
+# GitHub Actions Workflow Generator
+
+This directory contains `gen_gh_actions.py`, a script to generate GitHub Actions CI workflows from the same configuration logic used for Travis CI.
+
+## Usage
+
+The script can generate workflows for different platforms:
+
+```bash
+# Generate Linux CI workflow (default)
+./scripts/gen_gh_actions.py linux > .github/workflows/linux-ci.yml
+
+# Generate macOS CI workflow
+./scripts/gen_gh_actions.py macos > .github/workflows/macos-ci.yml
+
+# Generate Windows CI workflow
+./scripts/gen_gh_actions.py windows > .github/workflows/windows-ci.yml
+
+# Generate FreeBSD CI workflow
+./scripts/gen_gh_actions.py freebsd > .github/workflows/freebsd-ci.yml
+
+# Generate combined workflow with all platforms
+./scripts/gen_gh_actions.py all > .github/workflows/ci-all.yml
+```
+
+## Generated Workflows
+
+### Linux CI (`linux-ci.yml`)
+- **test-linux** (AMD64): `ubuntu-latest` (x86_64)
+  - ~96 configurations covering GCC, Clang, various flags
+- **test-linux-arm64** (ARM64): `ubuntu-24.04-arm` (aarch64)
+  - ~14 configurations including large hugepage tests
+  - **Note:** Free ARM64 runners (Public Preview) - may have longer queue times during peak hours
+
+**Total:** 110 configurations
+
+### macOS CI (`macos-ci.yml`)
+- **test-macos** (Intel): `macos-15-intel` (x86_64)
+  - ~10 configurations with GCC compiler
+- **test-macos-arm64** (Apple Silicon): `macos-latest` (arm64)
+  - ~11 configurations including large hugepage tests
+
+**Total:** 21 configurations
+
+### Windows CI (`windows-ci.yml`)
+- **test-windows** (AMD64): `windows-latest` (x86_64)
+  - 10 configurations covering MinGW-GCC and MSVC compilers
+  - 32-bit and 64-bit builds
+  - Uses MSYS2 for build environment
+
+**Total:** 10 configurations
+
+### FreeBSD CI (`freebsd-ci.yml`)
+- **test-freebsd** (AMD64): Runs in FreeBSD VM on `ubuntu-latest`
+  - Matrix testing: debug (on/off), prof (on/off), arch (32/64-bit), uncommon configs
+  - 16 total configuration combinations
+  - Uses FreeBSD 15.0 via `vmactions/freebsd-vm@v1`
+  - Uses `gmake` (GNU Make) instead of BSD make
+
+**Total:** 16 configurations
+
+## Architecture Verification
+
+Each workflow includes a "Show OS version" step that prints:
+
+**Linux:**
+```bash
+=== System Information ===
+uname -a              # Kernel and architecture
+=== Architecture ===
+uname -m              # x86_64, aarch64, etc.
+arch                 # Architecture type
+=== CPU Info ===
+lscpu                # Detailed CPU information
+```
+
+**macOS:**
+```bash
+=== macOS Version ===
+sw_vers              # macOS version and build
+=== Architecture ===
+uname -m             # x86_64 or arm64
+arch                # i386 or arm64
+=== CPU Info ===
+sysctl machdep.cpu.brand_string  # CPU model
+```
+
+**Windows:**
+```cmd
+=== Windows Version ===
+systeminfo           # OS name and version
+ver                 # Windows version
+=== Architecture ===
+PROCESSOR_ARCHITECTURE  # AMD64, x86, ARM64
+```
+
+## GitHub Runner Images
+
+| Platform | Runner Label | Architecture | OS Version | Strategy |
+|----------|--------------|--------------|------------|----------|
+| Linux AMD64 | ubuntu-latest | x86_64 | Ubuntu 22.04+ | Auto-update |
+| Linux ARM64 | ubuntu-24.04-arm | aarch64 | Ubuntu 24.04 | Free (Public Preview) |
+| macOS Intel | macos-15-intel | x86_64 | macOS 15 Sequoia | Pinned |
+| macOS Apple Silicon | macos-15 | arm64 | macOS 15 Sequoia | Pinned |
+| Windows | windows-latest | x86_64 | Windows Server 2022+ | Auto-update |
+| FreeBSD | ubuntu-latest (VM) | x86_64 | FreeBSD 15.0 in VM | VM-based |
+
+### Runner Strategy Explained
+
+We use a **hybrid approach** to balance stability and maintenance:
+
+**Auto-update runners (`-latest`):**
+- **Linux AMD64**: `ubuntu-latest` - Very stable, rarely breaks, auto-updates to newest Ubuntu LTS
+- **Windows**: `windows-latest` - Backward compatible, auto-updates to newest Windows Server
+
+**Pinned runners (specific versions):**
+- **Linux ARM64**: `ubuntu-24.04-arm` - **Free for public repos** (Public Preview, may have queue delays)
+- **macOS Intel**: `macos-15-intel` - Last Intel macOS runner (EOL **August 2027**)
+- **macOS Apple Silicon**: `macos-15` - Pin for control over macOS upgrades
+
+**Why this approach?**
+-  Reduces maintenance (auto-update where safe)
+-  Prevents surprise breakages (pin where needed)
+-  Balances stability and staying current
+-  Uses free ARM64 runners for public repositories
+
+### ARM64 Queue Times
+
+**If you experience long waits for ARM64 jobs:**
+
+The `ubuntu-24.04-arm` runner is **free for public repositories** but is in **Public Preview**. GitHub warns: *"you may experience longer queue times during peak usage hours"*.
+
+To reduce wait times we should upgrade to Team/Enterprise plan - then we could use `ubuntu-24.04-arm64` for faster, paid runners
+
+### Important Deprecation Timeline
+
+| Date | Event | Action Required |
+|------|-------|------------------|
+| **August 2027** | macOS Intel runners removed | Must drop Intel macOS testing or use self-hosted |
+| **TBD** | ARM64 runners leave Public Preview | May see improved queue times |
+
+**Note:** `macos-15-intel` is the **last Intel-based macOS runner** from GitHub Actions. After August 2027, only Apple Silicon runners will be available.
+
+## Platform-Specific Details
+
+### Windows Build Process
+The Windows workflow uses:
+1. **MSYS2** setup via `msys2/setup-msys2@v2` action
+2. **MinGW-GCC**: Standard autotools build process in MSYS2 shell
+3. **MSVC (cl.exe)**: Requires `ilammy/msvc-dev-cmd@v1` for environment setup
+   - Uses `MSYS2_PATH_TYPE: inherit` to inherit Windows PATH
+   - Exports `AR=lib.exe`, `NM=dumpbin.exe`, `RANLIB=:`
+4. **mingw32-make**: Used instead of `make` (standard in MSYS2)
+
+### macOS Build Process
+- Uses Homebrew to install `autoconf`
+- Tests on both Intel (x86_64) and Apple Silicon (ARM64)
+- Standard autotools build process
+- Excludes certain malloc configurations not supported on macOS
+
+### Linux Build Process
+- Ubuntu Latest for AMD64, Ubuntu 24.04 for ARM64
+- Installs 32-bit cross-compilation dependencies when needed
+- Most comprehensive test matrix (110 configurations)
+
+## Relationship to Travis CI
+
+This script mirrors the logic from `gen_travis.py` but generates GitHub Actions workflows instead of `.travis.yml`. The test matrices are designed to provide equivalent coverage to the Travis CI configuration.
+
+## Regenerating Workflows
+
+To regenerate all workflows after modifying `gen_gh_actions.py`:
+
+```bash
+./scripts/gen_gh_actions.py linux > .github/workflows/linux-ci.yml
+./scripts/gen_gh_actions.py macos > .github/workflows/macos-ci.yml
+./scripts/gen_gh_actions.py windows > .github/workflows/windows-ci.yml
+```
+
+**Note**: The generated files should not be edited by hand. All changes should be made to `gen_gh_actions.py` and then regenerated.
+
diff --git a/scripts/gen_gh_actions.py b/scripts/gen_gh_actions.py
new file mode 100755
index 00000000..4c5474ab
--- /dev/null
+++ b/scripts/gen_gh_actions.py
@@ -0,0 +1,686 @@
+#!/usr/bin/env python3
+
+from itertools import combinations, chain
+from enum import Enum, auto
+
+
+LINUX = 'ubuntu-24.04'
+OSX = 'macos-latest'
+WINDOWS = 'windows-latest'
+FREEBSD = 'freebsd'
+
+AMD64 = 'amd64'
+ARM64 = 'arm64'
+PPC64LE = 'ppc64le'
+
+
+GITHUB_ACTIONS_TEMPLATE = """\
+# This config file is generated by ./scripts/gen_gh_actions.py.
+# Do not edit by hand.
+
+name: {name}
+
+on:
+  push:
+    branches: [ dev, ci_travis ]
+  pull_request:
+    branches: [ dev ]
+
+jobs:
+{jobs}
+"""
+
+
+class Option(object):
+    class Type:
+        COMPILER = auto()
+        COMPILER_FLAG = auto()
+        CONFIGURE_FLAG = auto()
+        MALLOC_CONF = auto()
+        FEATURE = auto()
+
+    def __init__(self, type, value):
+        self.type = type
+        self.value = value
+
+    @staticmethod
+    def as_compiler(value):
+        return Option(Option.Type.COMPILER, value)
+
+    @staticmethod
+    def as_compiler_flag(value):
+        return Option(Option.Type.COMPILER_FLAG, value)
+
+    @staticmethod
+    def as_configure_flag(value):
+        return Option(Option.Type.CONFIGURE_FLAG, value)
+
+    @staticmethod
+    def as_malloc_conf(value):
+        return Option(Option.Type.MALLOC_CONF, value)
+
+    @staticmethod
+    def as_feature(value):
+        return Option(Option.Type.FEATURE, value)
+
+    def __eq__(self, obj):
+        return (isinstance(obj, Option) and obj.type == self.type
+                and obj.value == self.value)
+
+    def __repr__(self):
+        type_names = {
+            Option.Type.COMPILER: 'COMPILER',
+            Option.Type.COMPILER_FLAG: 'COMPILER_FLAG',
+            Option.Type.CONFIGURE_FLAG: 'CONFIGURE_FLAG',
+            Option.Type.MALLOC_CONF: 'MALLOC_CONF',
+            Option.Type.FEATURE: 'FEATURE'
+        }
+        return f"Option({type_names[self.type]}, {repr(self.value)})"
+
+
+# The 'default' configuration is gcc, on linux, with no compiler or configure
+# flags.  We also test with clang, -m32, --enable-debug, --enable-prof,
+# --disable-stats, and --with-malloc-conf=tcache:false.  To avoid abusing
+# CI resources though, we don't test all 2**7 = 128 possible combinations of these;
+# instead, we only test combinations of up to 2 'unusual' settings, under the
+# hope that bugs involving interactions of such settings are rare.
+MAX_UNUSUAL_OPTIONS = 2
+
+
+GCC = Option.as_compiler('CC=gcc CXX=g++')
+CLANG = Option.as_compiler('CC=clang CXX=clang++')
+CL = Option.as_compiler('CC=cl.exe CXX=cl.exe')
+
+
+compilers_unusual = [CLANG,]
+
+
+CROSS_COMPILE_32BIT = Option.as_feature('CROSS_COMPILE_32BIT')
+feature_unusuals = [CROSS_COMPILE_32BIT]
+
+
+configure_flag_unusuals = [Option.as_configure_flag(opt) for opt in (
+    '--enable-debug',
+    '--enable-prof',
+    '--disable-stats',
+    '--disable-libdl',
+    '--enable-opt-safety-checks',
+    '--with-lg-page=16',
+    '--with-lg-page=16 --with-lg-hugepage=29',
+)]
+LARGE_HUGEPAGE = Option.as_configure_flag("--with-lg-page=16 --with-lg-hugepage=29")
+
+
+malloc_conf_unusuals = [Option.as_malloc_conf(opt) for opt in (
+    'tcache:false',
+    'dss:primary',
+    'percpu_arena:percpu',
+    'background_thread:true',
+)]
+
+
+all_unusuals = (compilers_unusual + feature_unusuals
+    + configure_flag_unusuals + malloc_conf_unusuals)
+
+
+def get_extra_cflags(os, compiler):
+    if os == WINDOWS:
+        # For non-CL compilers under Windows (for now it's only MinGW-GCC),
+        # -fcommon needs to be specified to correctly handle multiple
+        # 'malloc_conf' symbols and such, which are declared weak under Linux.
+        # Weak symbols don't work with MinGW-GCC.
+        if compiler != CL.value:
+            return ['-fcommon']
+        else:
+            return []
+
+    # We get some spurious errors when -Warray-bounds is enabled.
+    extra_cflags = ['-Werror', '-Wno-array-bounds']
+    if compiler == CLANG.value or os == OSX:
+        extra_cflags += [
+            '-Wno-unknown-warning-option',
+            '-Wno-ignored-attributes'
+        ]
+    if os == OSX:
+        extra_cflags += [
+            '-Wno-deprecated-declarations',
+        ]
+    return extra_cflags
+
+
+def format_env_dict(os, arch, combination):
+    """Format environment variables as a dictionary for the matrix."""
+    compilers = [x.value for x in combination if x.type == Option.Type.COMPILER]
+    compiler_flags = [x.value for x in combination if x.type == Option.Type.COMPILER_FLAG]
+    configure_flags = [x.value for x in combination if x.type == Option.Type.CONFIGURE_FLAG]
+    malloc_conf = [x.value for x in combination if x.type == Option.Type.MALLOC_CONF]
+    features = [x.value for x in combination if x.type == Option.Type.FEATURE]
+
+    if len(malloc_conf) > 0:
+        configure_flags.append('--with-malloc-conf=' + ','.join(malloc_conf))
+
+    if not compilers:
+        compiler = GCC.value
+    else:
+        compiler = compilers[0]
+
+    cross_compile = CROSS_COMPILE_32BIT.value in features
+    if os == LINUX and cross_compile:
+        compiler_flags.append('-m32')
+
+    env_dict = {}
+
+    # Parse compiler
+    cc_parts = compiler.split()
+    for part in cc_parts:
+        if part.startswith('CC='):
+            env_dict['CC'] = part.split('=')[1]
+        elif part.startswith('CXX='):
+            env_dict['CXX'] = part.split('=')[1]
+
+    # Add features
+    for feature in features:
+        env_dict[feature] = 'yes'
+
+    # Add flags
+    if compiler_flags:
+        env_dict['COMPILER_FLAGS'] = ' '.join(compiler_flags)
+    if configure_flags:
+        env_dict['CONFIGURE_FLAGS'] = ' '.join(configure_flags)
+
+    extra_cflags = get_extra_cflags(os, compiler)
+    if extra_cflags:
+        env_dict['EXTRA_CFLAGS'] = ' '.join(extra_cflags)
+
+    return env_dict
+
+
+def generate_job_matrix_entries(os, arch, exclude, max_unusual_opts, unusuals=all_unusuals):
+    """Generate matrix entries for a job."""
+    entries = []
+    for combination in chain.from_iterable(
+            [combinations(unusuals, i) for i in range(max_unusual_opts + 1)]):
+        if not any(excluded in combination for excluded in exclude):
+            env_dict = format_env_dict(os, arch, combination)
+            entries.append(env_dict)
+    return entries
+
+
+def generate_linux_job(arch):
+    """Generate Linux job configuration."""
+    os = LINUX
+
+    # Only generate 2 unusual options for AMD64 to reduce matrix size
+    max_unusual_opts = MAX_UNUSUAL_OPTIONS if arch == AMD64 else 1
+
+    exclude = []
+    if arch == PPC64LE:
+        # Avoid 32 bit builds and clang on PowerPC
+        exclude = (CROSS_COMPILE_32BIT, CLANG,)
+    if arch == ARM64:
+        # Avoid 32 bit build on ARM64
+        exclude = (CROSS_COMPILE_32BIT,)
+
+    if arch != ARM64:
+        exclude += [LARGE_HUGEPAGE]
+
+    linux_configure_flags = list(configure_flag_unusuals)
+    linux_configure_flags.append(Option.as_configure_flag("--enable-prof --enable-prof-frameptr"))
+
+    linux_unusuals = (compilers_unusual + feature_unusuals
+                    + linux_configure_flags + malloc_conf_unusuals)
+
+    matrix_entries = generate_job_matrix_entries(os, arch, exclude, max_unusual_opts, linux_unusuals)
+
+    arch_suffix = f"-{arch}" if arch != AMD64 else ""
+
+    # Select appropriate runner based on architecture
+    if arch == ARM64:
+        runner = "ubuntu-24.04-arm"    # Free ARM64 runner for public repos (Public Preview)
+    elif arch == PPC64LE:
+        # GitHub doesn't provide PPC runners, would need self-hosted
+        runner = "self-hosted-ppc64le"
+    else:  # AMD64
+        runner = "ubuntu-24.04"        # Ubuntu 24.04 LTS
+
+    job = f"""  test-linux{arch_suffix}:
+    runs-on: {runner}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+"""
+
+    for entry in matrix_entries:
+        job += "          - env:\n"
+        for key, value in entry.items():
+            # Properly escape values with special characters
+            if ' ' in str(value) or any(c in str(value) for c in [':', ',', '#']):
+                job += f'              {key}: "{value}"\n'
+            else:
+                job += f"              {key}: {value}\n"
+
+    # Add manual job entries
+    manual_entries = [
+        {
+            'CC': 'gcc',
+            'CXX': 'g++',
+            'CONFIGURE_FLAGS': '--enable-debug --disable-cache-oblivious --enable-stats --enable-log --enable-prof',
+            'EXTRA_CFLAGS': '-Werror -Wno-array-bounds'
+        },
+        {
+            'CC': 'gcc',
+            'CXX': 'g++',
+            'CONFIGURE_FLAGS': '--enable-debug --enable-experimental-smallocx --enable-stats --enable-prof',
+            'EXTRA_CFLAGS': '-Werror -Wno-array-bounds'
+        }
+    ]
+
+    if arch == AMD64:
+        for entry in manual_entries:
+            job += "          - env:\n"
+            for key, value in entry.items():
+                if ' ' in str(value):
+                    job += f'              {key}: "{value}"\n'
+                else:
+                    job += f"              {key}: {value}\n"
+
+    job += f"""
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Show OS version
+      run: |
+        echo "=== System Information ==="
+        uname -a
+        echo ""
+        echo "=== Architecture ==="
+        uname -m
+        arch
+        echo ""
+        echo "=== OS Release ==="
+        cat /etc/os-release || true
+        echo ""
+        echo "=== CPU Info ==="
+        lscpu | grep -E "Architecture|CPU op-mode|Byte Order|CPU\(s\):" || true
+
+    - name: Install dependencies (32-bit)
+      if: matrix.env.CROSS_COMPILE_32BIT == 'yes'
+      run: |
+        sudo dpkg --add-architecture i386
+        sudo apt-get update
+        sudo apt-get install -y gcc-multilib g++-multilib libc6-dev-i386
+
+    - name: Build and test
+      env:
+        CC: ${{{{ matrix.env.CC }}}}
+        CXX: ${{{{ matrix.env.CXX }}}}
+        COMPILER_FLAGS: ${{{{ matrix.env.COMPILER_FLAGS }}}}
+        CONFIGURE_FLAGS: ${{{{ matrix.env.CONFIGURE_FLAGS }}}}
+        EXTRA_CFLAGS: ${{{{ matrix.env.EXTRA_CFLAGS }}}}
+      run: |
+        # Verify the script generates the same output
+        ./scripts/gen_gh_actions.py > gh_actions_script.yml
+
+        # Run autoconf
+        autoconf
+
+        # Configure with flags
+        if [ -n "$COMPILER_FLAGS" ]; then
+          ./configure CC="${{CC}} ${{COMPILER_FLAGS}}" CXX="${{CXX}} ${{COMPILER_FLAGS}}" $CONFIGURE_FLAGS
+        else
+          ./configure $CONFIGURE_FLAGS
+        fi
+
+        # Build
+        make -j3
+        make -j3 tests
+
+        # Run tests
+        make check
+
+"""
+
+    return job
+
+
+def generate_macos_job(arch):
+    """Generate macOS job configuration."""
+    os = OSX
+    max_unusual_opts = 1
+
+    exclude = ([Option.as_malloc_conf(opt) for opt in (
+            'dss:primary',
+            'background_thread:true')] +
+        [Option.as_configure_flag('--enable-prof')] +
+        [CLANG,])
+
+    if arch != ARM64:
+        exclude += [LARGE_HUGEPAGE]
+
+    matrix_entries = generate_job_matrix_entries(os, arch, exclude, max_unusual_opts)
+
+    arch_suffix = f"-{arch}" if arch != AMD64 else ""
+
+    # Select appropriate runner based on architecture
+    # Pin both for more control over OS upgrades
+    if arch == ARM64:
+        runner = "macos-15"          # Pinned macOS 15 on Apple Silicon
+    else:  # AMD64
+        runner = "macos-15-intel"    # Pinned macOS 15 on Intel (last Intel runner, EOL Aug 2027)
+
+    job = f"""  test-macos{arch_suffix}:
+    runs-on: {runner}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+"""
+
+    for entry in matrix_entries:
+        job += "          - env:\n"
+        for key, value in entry.items():
+            if ' ' in str(value) or any(c in str(value) for c in [':', ',', '#']):
+                job += f'              {key}: "{value}"\n'
+            else:
+                job += f"              {key}: {value}\n"
+
+    job += f"""
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Show OS version
+      run: |
+        echo "=== macOS Version ==="
+        sw_vers
+        echo ""
+        echo "=== Architecture ==="
+        uname -m
+        arch
+        echo ""
+        echo "=== CPU Info ==="
+        sysctl -n machdep.cpu.brand_string
+        sysctl -n hw.machine
+
+    - name: Install dependencies
+      run: |
+        brew install autoconf
+
+    - name: Build and test
+      env:
+        CC: ${{{{ matrix.env.CC || 'gcc' }}}}
+        CXX: ${{{{ matrix.env.CXX || 'g++' }}}}
+        COMPILER_FLAGS: ${{{{ matrix.env.COMPILER_FLAGS }}}}
+        CONFIGURE_FLAGS: ${{{{ matrix.env.CONFIGURE_FLAGS }}}}
+        EXTRA_CFLAGS: ${{{{ matrix.env.EXTRA_CFLAGS }}}}
+      run: |
+        # Run autoconf
+        autoconf
+
+        # Configure with flags
+        if [ -n "$COMPILER_FLAGS" ]; then
+          ./configure CC="${{CC}} ${{COMPILER_FLAGS}}" CXX="${{CXX}} ${{COMPILER_FLAGS}}" $CONFIGURE_FLAGS
+        else
+          ./configure $CONFIGURE_FLAGS
+        fi
+
+        # Build
+        make -j3
+        make -j3 tests
+
+        # Run tests
+        make check
+
+"""
+
+    return job
+
+
+def generate_windows_job(arch):
+    """Generate Windows job configuration."""
+    os = WINDOWS
+    max_unusual_opts = 3
+    unusuals = (
+        Option.as_configure_flag('--enable-debug'),
+        CL,
+        CROSS_COMPILE_32BIT,
+    )
+
+    matrix_entries = generate_job_matrix_entries(os, arch, (), max_unusual_opts, unusuals)
+
+    arch_suffix = f"-{arch}" if arch != AMD64 else ""
+
+    # Use latest for Windows - tends to be backward compatible and stable
+    job = f"""  test-windows{arch_suffix}:
+    runs-on: windows-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+"""
+
+    for entry in matrix_entries:
+        job += "          - env:\n"
+        for key, value in entry.items():
+            if ' ' in str(value) or any(c in str(value) for c in [':', ',', '#']):
+                job += f'              {key}: "{value}"\n'
+            else:
+                job += f"              {key}: {value}\n"
+
+    job += f"""
+    steps:
+    - uses: actions/checkout@v4
+
+    - name: Show OS version
+      shell: cmd
+      run: |
+        echo === Windows Version ===
+        systeminfo | findstr /B /C:"OS Name" /C:"OS Version"
+        ver
+        echo.
+        echo === Architecture ===
+        echo PROCESSOR_ARCHITECTURE=%PROCESSOR_ARCHITECTURE%
+        echo.
+
+    - name: Setup MSYS2
+      uses: msys2/setup-msys2@v2
+      with:
+        msystem: ${{{{ matrix.env.CROSS_COMPILE_32BIT == 'yes' && 'MINGW32' || 'MINGW64' }}}}
+        update: true
+        install: >-
+          autotools
+          git
+        pacboy: >-
+          make:p
+          gcc:p
+          binutils:p
+
+    - name: Build and test (MinGW-GCC)
+      if: matrix.env.CC != 'cl.exe'
+      shell: msys2 {{0}}
+      env:
+        CC: ${{{{ matrix.env.CC || 'gcc' }}}}
+        CXX: ${{{{ matrix.env.CXX || 'g++' }}}}
+        COMPILER_FLAGS: ${{{{ matrix.env.COMPILER_FLAGS }}}}
+        CONFIGURE_FLAGS: ${{{{ matrix.env.CONFIGURE_FLAGS }}}}
+        EXTRA_CFLAGS: ${{{{ matrix.env.EXTRA_CFLAGS }}}}
+      run: |
+        # Run autoconf
+        autoconf
+
+        # Configure with flags
+        if [ -n "$COMPILER_FLAGS" ]; then
+          ./configure CC="${{CC}} ${{COMPILER_FLAGS}}" CXX="${{CXX}} ${{COMPILER_FLAGS}}" $CONFIGURE_FLAGS
+        else
+          ./configure $CONFIGURE_FLAGS
+        fi
+
+        # Build (mingw32-make is the "make" command in MSYS2)
+        mingw32-make -j3
+        mingw32-make tests
+
+        # Run tests
+        mingw32-make -k check
+
+    - name: Setup MSVC environment
+      if: matrix.env.CC == 'cl.exe'
+      uses: ilammy/msvc-dev-cmd@v1
+      with:
+        arch: ${{{{ matrix.env.CROSS_COMPILE_32BIT == 'yes' && 'x86' || 'x64' }}}}
+
+    - name: Build and test (MSVC)
+      if: matrix.env.CC == 'cl.exe'
+      shell: msys2 {{0}}
+      env:
+        CONFIGURE_FLAGS: ${{{{ matrix.env.CONFIGURE_FLAGS }}}}
+        MSYS2_PATH_TYPE: inherit
+      run: |
+        # Export MSVC environment variables for configure
+        export CC=cl.exe
+        export CXX=cl.exe
+        export AR=lib.exe
+        export NM=dumpbin.exe
+        export RANLIB=:
+
+        # Verify cl.exe is accessible (should be in PATH via inherit)
+        if ! which cl.exe > /dev/null 2>&1; then
+          echo "cl.exe not found, trying to locate MSVC..."
+          # Find and add MSVC bin directory to PATH
+          MSVC_BIN=$(cmd.exe /c "echo %VCToolsInstallDir%" | tr -d '\\\\r' | sed 's/\\\\\\\\\\\\\\\\/\\//g' | sed 's/C:/\\\\/c/g')
+          if [ -n "$MSVC_BIN" ]; then
+            export PATH="$PATH:$MSVC_BIN/bin/Hostx64/x64:$MSVC_BIN/bin/Hostx86/x86"
+          fi
+        fi
+
+        # Run autoconf
+        autoconf
+
+        # Configure with MSVC
+        ./configure CC=cl.exe CXX=cl.exe AR=lib.exe $CONFIGURE_FLAGS
+
+        # Build (mingw32-make is the "make" command in MSYS2)
+        mingw32-make -j3
+        # Build tests sequentially due to PDB file issues
+        mingw32-make tests
+
+        # Run tests
+        mingw32-make -k check
+
+"""
+
+    return job
+
+
+def generate_freebsd_job(arch):
+    """Generate FreeBSD job configuration."""
+    # FreeBSD runs in a VM on ubuntu-latest, not native
+
+    job = f"""  test-freebsd:
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        debug: ['--enable-debug', '--disable-debug']
+        prof: ['--enable-prof', '--disable-prof']
+        arch: ['64-bit', '32-bit']
+        uncommon:
+          - ''
+          - '--with-lg-page=16 --with-malloc-conf=tcache:false'
+
+    name: FreeBSD (${{{{ matrix.arch }}}}, debug=${{{{ matrix.debug }}}}, prof=${{{{ matrix.prof }}}}${{{{ matrix.uncommon && ', uncommon' || '' }}}})
+
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        fetch-depth: 1
+
+    - name: Test on FreeBSD
+      uses: vmactions/freebsd-vm@v1
+      with:
+        release: '15.0'
+        usesh: true
+        prepare: |
+          pkg install -y autoconf gmake
+        run: |
+          # Verify we're running in FreeBSD
+          echo "==== System Information ===="
+          uname -a
+          freebsd-version
+          echo "============================"
+
+          # Set compiler flags for 32-bit if needed
+          if [ "${{{{ matrix.arch }}}}" = "32-bit" ]; then
+            export CC="cc -m32"
+            export CXX="c++ -m32"
+          fi
+
+          # Generate configure script
+          autoconf
+
+          # Configure with matrix options
+          ./configure --with-jemalloc-prefix=ci_ ${{{{ matrix.debug }}}} ${{{{ matrix.prof }}}} ${{{{ matrix.uncommon }}}}
+
+          # Get CPU count for parallel builds
+          export JFLAG=$(sysctl -n kern.smp.cpus)
+
+          gmake -j${{JFLAG}}
+          gmake -j${{JFLAG}} tests
+          gmake check
+
+"""
+
+    return job
+
+
+def main():
+    import sys
+
+    # Determine which workflow to generate based on command-line argument
+    workflow_type = sys.argv[1] if len(sys.argv) > 1 else 'linux'
+
+    if workflow_type == 'linux':
+        jobs = '\n'.join((
+            generate_linux_job(AMD64),
+            generate_linux_job(ARM64),
+        ))
+        print(GITHUB_ACTIONS_TEMPLATE.format(name='Linux CI', jobs=jobs))
+
+    elif workflow_type == 'macos':
+        jobs = '\n'.join((
+            generate_macos_job(AMD64),   # Intel x86_64
+            generate_macos_job(ARM64),   # Apple Silicon
+        ))
+        print(GITHUB_ACTIONS_TEMPLATE.format(name='macOS CI', jobs=jobs))
+
+    elif workflow_type == 'windows':
+        jobs = generate_windows_job(AMD64)
+        print(GITHUB_ACTIONS_TEMPLATE.format(name='Windows CI', jobs=jobs))
+
+    elif workflow_type == 'freebsd':
+        jobs = generate_freebsd_job(AMD64)
+        print(GITHUB_ACTIONS_TEMPLATE.format(name='FreeBSD CI', jobs=jobs))
+
+    elif workflow_type == 'all':
+        # Generate all workflow files
+        linux_jobs = '\n'.join((
+            generate_linux_job(AMD64),
+            generate_linux_job(ARM64),
+        ))
+        macos_jobs = '\n'.join((
+            generate_macos_job(AMD64),   # Intel
+            generate_macos_job(ARM64),   # Apple Silicon
+        ))
+        windows_jobs = generate_windows_job(AMD64)
+        freebsd_jobs = generate_freebsd_job(AMD64)
+
+        all_jobs = '\n'.join((linux_jobs, macos_jobs, windows_jobs, freebsd_jobs))
+        print(GITHUB_ACTIONS_TEMPLATE.format(name='CI', jobs=all_jobs))
+
+    else:
+        print(f"Unknown workflow type: {workflow_type}", file=sys.stderr)
+        print("Usage: gen_gh_actions.py [linux|macos|windows|freebsd|all]", file=sys.stderr)
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main()

From c7690e92da89cb08ea43a786d7e7ff5378c4d6af Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@meta.com>
Date: Tue, 9 Dec 2025 13:14:24 -0500
Subject: [PATCH 2563/2608] Remove Cirrus CI

---
 .cirrus.yml | 47 -----------------------------------------------
 1 file changed, 47 deletions(-)
 delete mode 100644 .cirrus.yml

diff --git a/.cirrus.yml b/.cirrus.yml
deleted file mode 100644
index 585aa42f..00000000
--- a/.cirrus.yml
+++ /dev/null
@@ -1,47 +0,0 @@
-env:
-  CIRRUS_CLONE_DEPTH: 1
-  ARCH: amd64
-
-task:
-  matrix:
-      env:
-        DEBUG_CONFIG: --enable-debug
-      env:
-        DEBUG_CONFIG: --disable-debug
-  matrix:
-    - env:
-        PROF_CONFIG: --enable-prof
-    - env:
-        PROF_CONFIG: --disable-prof
-  matrix:
-    - name: 64-bit
-      env:
-        CC:
-        CXX:
-    - name: 32-bit
-      env:
-        CC: cc -m32
-        CXX: c++ -m32
-  matrix:
-    - env:
-        UNCOMMON_CONFIG:
-    - env:
-        UNCOMMON_CONFIG: --with-lg-page=16 --with-malloc-conf=tcache:false
-  matrix:
-     - name: 15-CURRENT
-       freebsd_instance:
-         image_family: freebsd-15-0-snap
-  install_script:
-    - sed -i.bak -e 's,pkg+http://pkg.FreeBSD.org/\${ABI}/quarterly,pkg+http://pkg.FreeBSD.org/\${ABI}/latest,' /etc/pkg/FreeBSD.conf
-    - pkg upgrade -y
-    - pkg install -y autoconf gmake
-  script:
-    - autoconf
-    # We don't perfectly track freebsd stdlib.h definitions.  This is fine when
-    # we count as a system header, but breaks otherwise, like during these
-    # tests.
-    - ./configure --with-jemalloc-prefix=ci_ ${DEBUG_CONFIG} ${PROF_CONFIG} ${UNCOMMON_CONFIG}
-    - export JFLAG=`sysctl -n kern.smp.cpus`
-    - gmake -j${JFLAG}
-    - gmake -j${JFLAG} tests
-    - gmake check

From 6016d86c187ce01ef8cbe1c3023a3ca394c9b47f Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Thu, 30 Oct 2025 16:05:04 -0700
Subject: [PATCH 2564/2608] [SEC] Make SEC owned by hpa_shard, simplify the
 code, add stats, lock per bin

---
 Makefile.in                                   |    2 +-
 include/jemalloc/internal/arena_externs.h     |    2 +-
 include/jemalloc/internal/ctl.h               |    1 -
 include/jemalloc/internal/hpa.h               |   19 +-
 include/jemalloc/internal/pa.h                |   12 +-
 include/jemalloc/internal/pai.h               |   37 -
 include/jemalloc/internal/sec.h               |  129 +-
 include/jemalloc/internal/sec_opts.h          |   43 +-
 include/jemalloc/internal/witness.h           |    2 +-
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |    1 -
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |    3 -
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |    1 -
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |    3 -
 .../projects/vc2019/jemalloc/jemalloc.vcxproj |    1 -
 .../vc2019/jemalloc/jemalloc.vcxproj.filters  |    3 -
 .../projects/vc2022/jemalloc/jemalloc.vcxproj |    1 -
 .../vc2022/jemalloc/jemalloc.vcxproj.filters  |    3 -
 src/arena.c                                   |    6 +-
 src/ctl.c                                     |   31 +-
 src/hpa.c                                     |  144 ++-
 src/jemalloc.c                                |   17 +-
 src/pa.c                                      |   22 +-
 src/pa_extra.c                                |   10 +-
 src/pac.c                                     |    8 +-
 src/pai.c                                     |   32 -
 src/sec.c                                     |  564 ++++-----
 src/stats.c                                   |   27 +-
 test/unit/hpa.c                               |   88 +-
 test/unit/hpa_sec_integration.c               |  239 ++++
 test/unit/hpa_sec_integration.sh              |    3 +
 test/unit/hpa_thp_always.c                    |    8 +-
 test/unit/hpa_vectorized_madvise.c            |    7 +-
 .../unit/hpa_vectorized_madvise_large_batch.c |    8 +-
 test/unit/mallctl.c                           |    1 -
 test/unit/sec.c                               | 1043 ++++++++---------
 35 files changed, 1264 insertions(+), 1257 deletions(-)
 delete mode 100644 src/pai.c
 create mode 100644 test/unit/hpa_sec_integration.c
 create mode 100644 test/unit/hpa_sec_integration.sh

diff --git a/Makefile.in b/Makefile.in
index 7365a923..83f04e64 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -135,7 +135,6 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/nstime.c \
 	$(srcroot)src/pa.c \
 	$(srcroot)src/pa_extra.c \
-	$(srcroot)src/pai.c \
 	$(srcroot)src/pac.c \
 	$(srcroot)src/pages.c \
 	$(srcroot)src/peak_event.c \
@@ -230,6 +229,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/hash.c \
 	$(srcroot)test/unit/hook.c \
 	$(srcroot)test/unit/hpa.c \
+	$(srcroot)test/unit/hpa_sec_integration.c \
 	$(srcroot)test/unit/hpa_thp_always.c \
 	$(srcroot)test/unit/hpa_vectorized_madvise.c \
 	$(srcroot)test/unit/hpa_vectorized_madvise_large_batch.c \
diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index cf191aeb..1d004635 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -46,7 +46,7 @@ void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
     bin_stats_data_t *bstats, arena_stats_large_t *lstats, pac_estats_t *estats,
-    hpa_shard_stats_t *hpastats, sec_stats_t *secstats);
+    hpa_shard_stats_t *hpastats);
 void arena_handle_deferred_work(tsdn_t *tsdn, arena_t *arena);
 edata_t *arena_extent_alloc_large(
     tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment, bool zero);
diff --git a/include/jemalloc/internal/ctl.h b/include/jemalloc/internal/ctl.h
index b290411b..82035fe3 100644
--- a/include/jemalloc/internal/ctl.h
+++ b/include/jemalloc/internal/ctl.h
@@ -51,7 +51,6 @@ typedef struct ctl_arena_stats_s {
 	arena_stats_large_t lstats[SC_NSIZES - SC_NBINS];
 	pac_estats_t        estats[SC_NPSIZES];
 	hpa_shard_stats_t   hpastats;
-	sec_stats_t         secstats;
 } ctl_arena_stats_t;
 
 typedef struct ctl_stats_s {
diff --git a/include/jemalloc/internal/hpa.h b/include/jemalloc/internal/hpa.h
index 06567740..dc7725b7 100644
--- a/include/jemalloc/internal/hpa.h
+++ b/include/jemalloc/internal/hpa.h
@@ -12,6 +12,7 @@
 #include "jemalloc/internal/mutex.h"
 #include "jemalloc/internal/pai.h"
 #include "jemalloc/internal/psset.h"
+#include "jemalloc/internal/sec.h"
 
 typedef struct hpa_shard_nonderived_stats_s hpa_shard_nonderived_stats_t;
 struct hpa_shard_nonderived_stats_s {
@@ -57,6 +58,7 @@ typedef struct hpa_shard_stats_s hpa_shard_stats_t;
 struct hpa_shard_stats_s {
 	psset_stats_t                psset_stats;
 	hpa_shard_nonderived_stats_t nonderived_stats;
+	sec_stats_t                  secstats;
 };
 
 typedef struct hpa_shard_s hpa_shard_t;
@@ -69,14 +71,17 @@ struct hpa_shard_s {
 
 	/* The central allocator we get our hugepages from. */
 	hpa_central_t *central;
+
 	/* Protects most of this shard's state. */
 	malloc_mutex_t mtx;
+
 	/*
 	 * Guards the shard's access to the central allocator (preventing
 	 * multiple threads operating on this shard from accessing the central
 	 * allocator).
 	 */
 	malloc_mutex_t grow_mtx;
+
 	/* The base metadata allocator. */
 	base_t *base;
 
@@ -87,6 +92,9 @@ struct hpa_shard_s {
 	 */
 	edata_cache_fast_t ecf;
 
+	/* Small extent cache (not guarded by mtx) */
+	JEMALLOC_ALIGNED(CACHELINE) sec_t sec;
+
 	psset_t psset;
 
 	/*
@@ -142,9 +150,9 @@ bool hpa_hugepage_size_exceeds_limit(void);
  * just that it can function properly given the system it's running on.
  */
 bool hpa_supported(void);
-bool hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
-    base_t *base, edata_cache_t *edata_cache, unsigned ind,
-    const hpa_shard_opts_t *opts);
+bool hpa_shard_init(tsdn_t *tsdn, hpa_shard_t *shard, hpa_central_t *central,
+    emap_t *emap, base_t *base, edata_cache_t *edata_cache, unsigned ind,
+    const hpa_shard_opts_t *opts, const sec_opts_t *sec_opts);
 
 void hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src);
 void hpa_shard_stats_merge(
@@ -157,6 +165,8 @@ void hpa_shard_stats_merge(
  */
 void hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard);
 void hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard);
+/* Flush caches that shard may be using */
+void hpa_shard_flush(tsdn_t *tsdn, hpa_shard_t *shard);
 
 void hpa_shard_set_deferral_allowed(
     tsdn_t *tsdn, hpa_shard_t *shard, bool deferral_allowed);
@@ -164,8 +174,9 @@ void hpa_shard_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard);
 
 /*
  * We share the fork ordering with the PA and arena prefork handling; that's why
- * these are 3 and 4 rather than 0 and 1.
+ * these are 2, 3 and 4 rather than 0 and 1.
  */
+void hpa_shard_prefork2(tsdn_t *tsdn, hpa_shard_t *shard);
 void hpa_shard_prefork3(tsdn_t *tsdn, hpa_shard_t *shard);
 void hpa_shard_prefork4(tsdn_t *tsdn, hpa_shard_t *shard);
 void hpa_shard_postfork_parent(tsdn_t *tsdn, hpa_shard_t *shard);
diff --git a/include/jemalloc/internal/pa.h b/include/jemalloc/internal/pa.h
index 3f2d10b0..f3910ad8 100644
--- a/include/jemalloc/internal/pa.h
+++ b/include/jemalloc/internal/pa.h
@@ -96,12 +96,6 @@ struct pa_shard_s {
 	/* Allocates from a PAC. */
 	pac_t pac;
 
-	/*
-	 * We place a small extent cache in front of the HPA, since we intend
-	 * these configurations to use many fewer arenas, and therefore have a
-	 * higher risk of hot locks.
-	 */
-	sec_t       hpa_sec;
 	hpa_shard_t hpa_shard;
 
 	/* The source of edata_t objects. */
@@ -166,6 +160,9 @@ void pa_shard_reset(tsdn_t *tsdn, pa_shard_t *shard);
  */
 void pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard);
 
+/* Flush any caches used by shard */
+void pa_shard_flush(tsdn_t *tsdn, pa_shard_t *shard);
+
 /* Gets an edata for the given allocation. */
 edata_t *pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size,
     size_t alignment, bool slab, szind_t szind, bool zero, bool guarded,
@@ -233,8 +230,7 @@ void pa_shard_basic_stats_merge(
 
 void pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
     pa_shard_stats_t *pa_shard_stats_out, pac_estats_t *estats_out,
-    hpa_shard_stats_t *hpa_stats_out, sec_stats_t *sec_stats_out,
-    size_t *resident);
+    hpa_shard_stats_t *hpa_stats_out, size_t *resident);
 
 /*
  * Reads the PA-owned mutex stats into the output stats array, at the
diff --git a/include/jemalloc/internal/pai.h b/include/jemalloc/internal/pai.h
index 1d924657..9b4c257b 100644
--- a/include/jemalloc/internal/pai.h
+++ b/include/jemalloc/internal/pai.h
@@ -13,15 +13,6 @@ struct pai_s {
 	edata_t *(*alloc)(tsdn_t *tsdn, pai_t *self, size_t size,
 	    size_t alignment, bool zero, bool guarded, bool frequent_reuse,
 	    bool *deferred_work_generated);
-	/*
-	 * Returns the number of extents added to the list (which may be fewer
-	 * than requested, in case of OOM).  The list should already be
-	 * initialized.  The only alignment guarantee is page-alignment, and
-	 * the results are not necessarily zeroed.
-	 */
-	size_t (*alloc_batch)(tsdn_t *tsdn, pai_t *self, size_t size,
-	    size_t nallocs, edata_list_active_t *results, bool frequent_reuse,
-	    bool *deferred_work_generated);
 	bool (*expand)(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 	    size_t old_size, size_t new_size, bool zero,
 	    bool *deferred_work_generated);
@@ -29,9 +20,6 @@ struct pai_s {
 	    size_t old_size, size_t new_size, bool *deferred_work_generated);
 	void (*dalloc)(tsdn_t *tsdn, pai_t *self, edata_t *edata,
 	    bool *deferred_work_generated);
-	/* This function empties out list as a side-effect of being called. */
-	void (*dalloc_batch)(tsdn_t *tsdn, pai_t *self,
-	    edata_list_active_t *list, bool *deferred_work_generated);
 	uint64_t (*time_until_deferred_work)(tsdn_t *tsdn, pai_t *self);
 };
 
@@ -47,14 +35,6 @@ pai_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
 	    frequent_reuse, deferred_work_generated);
 }
 
-static inline size_t
-pai_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
-    edata_list_active_t *results, bool frequent_reuse,
-    bool *deferred_work_generated) {
-	return self->alloc_batch(tsdn, self, size, nallocs, results,
-	    frequent_reuse, deferred_work_generated);
-}
-
 static inline bool
 pai_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
     size_t new_size, bool zero, bool *deferred_work_generated) {
@@ -75,26 +55,9 @@ pai_dalloc(
 	self->dalloc(tsdn, self, edata, deferred_work_generated);
 }
 
-static inline void
-pai_dalloc_batch(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list,
-    bool *deferred_work_generated) {
-	self->dalloc_batch(tsdn, self, list, deferred_work_generated);
-}
-
 static inline uint64_t
 pai_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
 	return self->time_until_deferred_work(tsdn, self);
 }
 
-/*
- * An implementation of batch allocation that simply calls alloc once for
- * each item in the list.
- */
-size_t pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t nallocs, edata_list_active_t *results, bool frequent_reuse,
-    bool *deferred_work_generated);
-/* Ditto, for dalloc. */
-void pai_dalloc_batch_default(tsdn_t *tsdn, pai_t *self,
-    edata_list_active_t *list, bool *deferred_work_generated);
-
 #endif /* JEMALLOC_INTERNAL_PAI_H */
diff --git a/include/jemalloc/internal/sec.h b/include/jemalloc/internal/sec.h
index 50daf066..cc458b9d 100644
--- a/include/jemalloc/internal/sec.h
+++ b/include/jemalloc/internal/sec.h
@@ -17,91 +17,104 @@
  * knowledge of the underlying PAI implementation).
  */
 
-/*
- * For now, this is just one field; eventually, we'll probably want to get more
- * fine-grained data out (like per-size class statistics).
- */
+typedef struct sec_bin_stats_s sec_bin_stats_t;
+struct sec_bin_stats_s {
+	/* Number of alloc requests that did not find extent in this bin */
+	size_t nmisses;
+	/* Number of successful alloc requests. */
+	size_t nhits;
+	/* Number of dallocs causing the flush */
+	size_t ndalloc_flush;
+	/* Number of dallocs not causing the flush */
+	size_t ndalloc_noflush;
+	/* Number of fills that hit max_bytes */
+	size_t noverfills;
+};
 typedef struct sec_stats_s sec_stats_t;
 struct sec_stats_s {
 	/* Sum of bytes_cur across all shards. */
 	size_t bytes;
+
+	/* Totals of bin_stats. */
+	sec_bin_stats_t total;
 };
 
+static inline void
+sec_bin_stats_init(sec_bin_stats_t *stats) {
+	stats->ndalloc_flush = 0;
+	stats->nmisses = 0;
+	stats->nhits = 0;
+	stats->ndalloc_noflush = 0;
+	stats->noverfills = 0;
+}
+
+static inline void
+sec_bin_stats_accum(sec_bin_stats_t *dst, sec_bin_stats_t *src) {
+	dst->nmisses += src->nmisses;
+	dst->nhits += src->nhits;
+	dst->ndalloc_flush += src->ndalloc_flush;
+	dst->ndalloc_noflush += src->ndalloc_noflush;
+	dst->noverfills += src->noverfills;
+}
+
 static inline void
 sec_stats_accum(sec_stats_t *dst, sec_stats_t *src) {
 	dst->bytes += src->bytes;
+	sec_bin_stats_accum(&dst->total, &src->total);
 }
 
 /* A collections of free extents, all of the same size. */
 typedef struct sec_bin_s sec_bin_t;
 struct sec_bin_s {
 	/*
-	 * When we fail to fulfill an allocation, we do a batch-alloc on the
-	 * underlying allocator to fill extra items, as well.  We drop the SEC
-	 * lock while doing so, to allow operations on other bins to succeed.
-	 * That introduces the possibility of other threads also trying to
-	 * allocate out of this bin, failing, and also going to the backing
-	 * allocator.  To avoid a thundering herd problem in which lots of
-	 * threads do batch allocs and overfill this bin as a result, we only
-	 * allow one batch allocation at a time for a bin.  This bool tracks
-	 * whether or not some thread is already batch allocating.
-	 *
-	 * Eventually, the right answer may be a smarter sharding policy for the
-	 * bins (e.g. a mutex per bin, which would also be more scalable
-	 * generally; the batch-allocating thread could hold it while
-	 * batch-allocating).
+	 * Protects the data members of the bin.
 	 */
-	bool being_batch_filled;
+	malloc_mutex_t mtx;
 
 	/*
-	 * Number of bytes in this particular bin (as opposed to the
-	 * sec_shard_t's bytes_cur.  This isn't user visible or reported in
-	 * stats; rather, it allows us to quickly determine the change in the
-	 * centralized counter when flushing.
+	 * Number of bytes in this particular bin.
 	 */
 	size_t              bytes_cur;
 	edata_list_active_t freelist;
-};
-
-typedef struct sec_shard_s sec_shard_t;
-struct sec_shard_s {
-	/*
-	 * We don't keep per-bin mutexes, even though that would allow more
-	 * sharding; this allows global cache-eviction, which in turn allows for
-	 * better balancing across free lists.
-	 */
-	malloc_mutex_t mtx;
-	/*
-	 * A SEC may need to be shut down (i.e. flushed of its contents and
-	 * prevented from further caching).  To avoid tricky synchronization
-	 * issues, we just track enabled-status in each shard, guarded by a
-	 * mutex.  In practice, this is only ever checked during brief races,
-	 * since the arena-level atomic boolean tracking HPA enabled-ness means
-	 * that we won't go down these pathways very often after custom extent
-	 * hooks are installed.
-	 */
-	bool       enabled;
-	sec_bin_t *bins;
-	/* Number of bytes in all bins in the shard. */
-	size_t bytes_cur;
-	/* The next pszind to flush in the flush-some pathways. */
-	pszind_t to_flush_next;
+	sec_bin_stats_t     stats;
 };
 
 typedef struct sec_s sec_t;
 struct sec_s {
-	pai_t  pai;
-	pai_t *fallback;
-
-	sec_opts_t   opts;
-	sec_shard_t *shards;
-	pszind_t     npsizes;
+	sec_opts_t opts;
+	sec_bin_t *bins;
+	pszind_t   npsizes;
 };
 
-bool sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, pai_t *fallback,
-    const sec_opts_t *opts);
-void sec_flush(tsdn_t *tsdn, sec_t *sec);
-void sec_disable(tsdn_t *tsdn, sec_t *sec);
+static inline bool
+sec_is_used(sec_t *sec) {
+	return sec->opts.nshards != 0;
+}
+
+static inline bool
+sec_size_supported(sec_t *sec, size_t size) {
+	return sec_is_used(sec) && size <= sec->opts.max_alloc;
+}
+
+/* If sec does not have extent available, it will return NULL. */
+edata_t *sec_alloc(tsdn_t *tsdn, sec_t *sec, size_t size);
+void     sec_fill(tsdn_t *tsdn, sec_t *sec, size_t size,
+        edata_list_active_t *result, size_t nallocs);
+
+/*
+ * Upon return dalloc_list may be empty if edata is consumed by sec or non-empty
+ * if there are extents that need to be flushed from cache.  Please note, that
+ * if we need to flush, extent(s) returned in the list to be deallocated
+ * will almost certainly not contain the one being dalloc-ed (that one will be
+ * considered "hot" and preserved in the cache, while "colder" ones are
+ * returned).
+ */
+void sec_dalloc(tsdn_t *tsdn, sec_t *sec, edata_list_active_t *dalloc_list);
+
+bool sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, const sec_opts_t *opts);
+
+/* Fills to_flush with extents that need to be deallocated */
+void sec_flush(tsdn_t *tsdn, sec_t *sec, edata_list_active_t *to_flush);
 
 /*
  * Morally, these two stats methods probably ought to be a single one (and the
diff --git a/include/jemalloc/internal/sec_opts.h b/include/jemalloc/internal/sec_opts.h
index e0699d7a..039d423c 100644
--- a/include/jemalloc/internal/sec_opts.h
+++ b/include/jemalloc/internal/sec_opts.h
@@ -12,46 +12,39 @@ typedef struct sec_opts_s sec_opts_t;
 struct sec_opts_s {
 	/*
 	 * We don't necessarily always use all the shards; requests are
-	 * distributed across shards [0, nshards - 1).
+	 * distributed across shards [0, nshards - 1).  Once thread picks a
+	 * shard it will always use that one.  If this value is set to 0 sec is
+	 * not used.
 	 */
 	size_t nshards;
 	/*
 	 * We'll automatically refuse to cache any objects in this sec if
-	 * they're larger than max_alloc bytes, instead forwarding such objects
-	 * directly to the fallback.
+	 * they're larger than max_alloc bytes.
 	 */
 	size_t max_alloc;
 	/*
-	 * Exceeding this amount of cached extents in a shard causes us to start
-	 * flushing bins in that shard until we fall below bytes_after_flush.
+	 * Exceeding this amount of cached extents in a bin causes us to flush
+	 * until we are 1/4 below max_bytes.
 	 */
 	size_t max_bytes;
-	/*
-	 * The number of bytes (in all bins) we flush down to when we exceed
-	 * bytes_cur.  We want this to be less than bytes_cur, because
-	 * otherwise we could get into situations where a shard undergoing
-	 * net-deallocation keeps bytes_cur very near to max_bytes, so that
-	 * most deallocations get immediately forwarded to the underlying PAI
-	 * implementation, defeating the point of the SEC.
-	 */
-	size_t bytes_after_flush;
 	/*
 	 * When we can't satisfy an allocation out of the SEC because there are
-	 * no available ones cached, we allocate multiple of that size out of
-	 * the fallback allocator.  Eventually we might want to do something
-	 * cleverer, but for now we just grab a fixed number.
+	 * no available ones cached, allocator will allocate a batch with extra
+	 * batch_fill_extra extents of the same size.
 	 */
 	size_t batch_fill_extra;
 };
 
+#define SEC_OPTS_NSHARDS_DEFAULT 2
+#define SEC_OPTS_BATCH_FILL_EXTRA_DEFAULT 3
+#define SEC_OPTS_MAX_ALLOC_DEFAULT ((32 * 1024) < PAGE ? PAGE : (32 * 1024))
+#define SEC_OPTS_MAX_BYTES_DEFAULT                                             \
+	((256 * 1024) < (4 * SEC_OPTS_MAX_ALLOC_DEFAULT)                       \
+	        ? (4 * SEC_OPTS_MAX_ALLOC_DEFAULT)                             \
+	        : (256 * 1024))
+
 #define SEC_OPTS_DEFAULT                                                       \
-	{                                                                      \
-		/* nshards */                                                  \
-		4,                                           /* max_alloc */   \
-		    (32 * 1024) < PAGE ? PAGE : (32 * 1024), /* max_bytes */   \
-		    256 * 1024, /* bytes_after_flush */                        \
-		    128 * 1024, /* batch_fill_extra */                         \
-		    0                                                          \
-	}
+	{SEC_OPTS_NSHARDS_DEFAULT, SEC_OPTS_MAX_ALLOC_DEFAULT,                 \
+	    SEC_OPTS_MAX_BYTES_DEFAULT, SEC_OPTS_BATCH_FILL_EXTRA_DEFAULT}
 
 #endif /* JEMALLOC_INTERNAL_SEC_OPTS_H */
diff --git a/include/jemalloc/internal/witness.h b/include/jemalloc/internal/witness.h
index 7ca3c347..0a426ff5 100644
--- a/include/jemalloc/internal/witness.h
+++ b/include/jemalloc/internal/witness.h
@@ -46,7 +46,7 @@ enum witness_rank_e {
 	WITNESS_RANK_DECAY = WITNESS_RANK_CORE,
 	WITNESS_RANK_TCACHE_QL,
 
-	WITNESS_RANK_SEC_SHARD,
+	WITNESS_RANK_SEC_BIN,
 
 	WITNESS_RANK_EXTENT_GROW,
 	WITNESS_RANK_HPA_SHARD_GROW = WITNESS_RANK_EXTENT_GROW,
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index bfb62d78..1e8def75 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -74,7 +74,6 @@
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pa.c" />
     <ClCompile Include="..\..\..\..\src\pa_extra.c" />
-    <ClCompile Include="..\..\..\..\src\pai.c" />
     <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index 26408c8e..f6e340cf 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -106,9 +106,6 @@
     <ClCompile Include="..\..\..\..\src\pa_extra.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\pai.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\pac.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 037eb724..45ddf73d 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -74,7 +74,6 @@
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pa.c" />
     <ClCompile Include="..\..\..\..\src\pa_extra.c" />
-    <ClCompile Include="..\..\..\..\src\pai.c" />
     <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index 26408c8e..f6e340cf 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -106,9 +106,6 @@
     <ClCompile Include="..\..\..\..\src\pa_extra.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\pai.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\pac.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
index bd6595b1..f1a5158a 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
@@ -74,7 +74,6 @@
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pa.c" />
     <ClCompile Include="..\..\..\..\src\pa_extra.c" />
-    <ClCompile Include="..\..\..\..\src\pai.c" />
     <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
index 26408c8e..f6e340cf 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
@@ -106,9 +106,6 @@
     <ClCompile Include="..\..\..\..\src\pa_extra.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\pai.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\pac.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
index 3f880176..a6f92ccf 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
@@ -74,7 +74,6 @@
     <ClCompile Include="..\..\..\..\src\nstime.c" />
     <ClCompile Include="..\..\..\..\src\pa.c" />
     <ClCompile Include="..\..\..\..\src\pa_extra.c" />
-    <ClCompile Include="..\..\..\..\src\pai.c" />
     <ClCompile Include="..\..\..\..\src\pac.c" />
     <ClCompile Include="..\..\..\..\src\pages.c" />
     <ClCompile Include="..\..\..\..\src\peak_event.c" />
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
index 26408c8e..f6e340cf 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
@@ -106,9 +106,6 @@
     <ClCompile Include="..\..\..\..\src\pa_extra.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\pai.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\pac.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/arena.c b/src/arena.c
index 664ed6a3..5b144c63 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -89,7 +89,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
     const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms,
     size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats,
     bin_stats_data_t *bstats, arena_stats_large_t *lstats, pac_estats_t *estats,
-    hpa_shard_stats_t *hpastats, sec_stats_t *secstats) {
+    hpa_shard_stats_t *hpastats) {
 	cassert(config_stats);
 
 	arena_basic_stats_merge(tsdn, arena, nthreads, dss, dirty_decay_ms,
@@ -159,7 +159,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads,
 	}
 
 	pa_shard_stats_merge(tsdn, &arena->pa_shard, &astats->pa_shard_stats,
-	    estats, hpastats, secstats, &astats->resident);
+	    estats, hpastats, &astats->resident);
 
 	LOCKEDINT_MTX_UNLOCK(tsdn, arena->stats.mtx);
 
@@ -529,7 +529,7 @@ arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all) {
 		 * as possible", including flushing any caches (for situations
 		 * like thread death, or manual purge calls).
 		 */
-		sec_flush(tsdn, &arena->pa_shard.hpa_sec);
+		pa_shard_flush(tsdn, &arena->pa_shard);
 	}
 	if (arena_decay_dirty(tsdn, arena, is_background_thread, all)) {
 		return;
diff --git a/src/ctl.c b/src/ctl.c
index 553c58ad..1260e197 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -115,7 +115,6 @@ CTL_PROTO(opt_hpa_dirty_mult)
 CTL_PROTO(opt_hpa_sec_nshards)
 CTL_PROTO(opt_hpa_sec_max_alloc)
 CTL_PROTO(opt_hpa_sec_max_bytes)
-CTL_PROTO(opt_hpa_sec_bytes_after_flush)
 CTL_PROTO(opt_hpa_sec_batch_fill_extra)
 CTL_PROTO(opt_huge_arena_pac_thp)
 CTL_PROTO(opt_metadata_thp)
@@ -339,6 +338,11 @@ CTL_PROTO(stats_arenas_i_tcache_stashed_bytes)
 CTL_PROTO(stats_arenas_i_resident)
 CTL_PROTO(stats_arenas_i_abandoned_vm)
 CTL_PROTO(stats_arenas_i_hpa_sec_bytes)
+CTL_PROTO(stats_arenas_i_hpa_sec_hits)
+CTL_PROTO(stats_arenas_i_hpa_sec_misses)
+CTL_PROTO(stats_arenas_i_hpa_sec_dalloc_flush)
+CTL_PROTO(stats_arenas_i_hpa_sec_dalloc_noflush)
+CTL_PROTO(stats_arenas_i_hpa_sec_overfills)
 INDEX_PROTO(stats_arenas_i)
 CTL_PROTO(stats_allocated)
 CTL_PROTO(stats_active)
@@ -486,7 +490,6 @@ static const ctl_named_node_t opt_node[] = {{NAME("abort"), CTL(opt_abort)},
     {NAME("hpa_sec_nshards"), CTL(opt_hpa_sec_nshards)},
     {NAME("hpa_sec_max_alloc"), CTL(opt_hpa_sec_max_alloc)},
     {NAME("hpa_sec_max_bytes"), CTL(opt_hpa_sec_max_bytes)},
-    {NAME("hpa_sec_bytes_after_flush"), CTL(opt_hpa_sec_bytes_after_flush)},
     {NAME("hpa_sec_batch_fill_extra"), CTL(opt_hpa_sec_batch_fill_extra)},
     {NAME("huge_arena_pac_thp"), CTL(opt_huge_arena_pac_thp)},
     {NAME("metadata_thp"), CTL(opt_metadata_thp)},
@@ -826,6 +829,12 @@ static const ctl_named_node_t stats_arenas_i_node[] = {
     {NAME("resident"), CTL(stats_arenas_i_resident)},
     {NAME("abandoned_vm"), CTL(stats_arenas_i_abandoned_vm)},
     {NAME("hpa_sec_bytes"), CTL(stats_arenas_i_hpa_sec_bytes)},
+    {NAME("hpa_sec_hits"), CTL(stats_arenas_i_hpa_sec_hits)},
+    {NAME("hpa_sec_misses"), CTL(stats_arenas_i_hpa_sec_misses)},
+    {NAME("hpa_sec_dalloc_noflush"),
+        CTL(stats_arenas_i_hpa_sec_dalloc_noflush)},
+    {NAME("hpa_sec_dalloc_flush"), CTL(stats_arenas_i_hpa_sec_dalloc_flush)},
+    {NAME("hpa_sec_overfills"), CTL(stats_arenas_i_hpa_sec_overfills)},
     {NAME("small"), CHILD(named, stats_arenas_i_small)},
     {NAME("large"), CHILD(named, stats_arenas_i_large)},
     {NAME("bins"), CHILD(indexed, stats_arenas_i_bins)},
@@ -1066,7 +1075,7 @@ ctl_arena_stats_amerge(tsdn_t *tsdn, ctl_arena_t *ctl_arena, arena_t *arena) {
 		    &ctl_arena->pdirty, &ctl_arena->pmuzzy,
 		    &ctl_arena->astats->astats, ctl_arena->astats->bstats,
 		    ctl_arena->astats->lstats, ctl_arena->astats->estats,
-		    &ctl_arena->astats->hpastats, &ctl_arena->astats->secstats);
+		    &ctl_arena->astats->hpastats);
 
 		for (i = 0; i < SC_NBINS; i++) {
 			bin_stats_t *bstats =
@@ -1258,7 +1267,6 @@ ctl_arena_stats_sdmerge(
 
 		/* Merge HPA stats. */
 		hpa_shard_stats_accum(&sdstats->hpastats, &astats->hpastats);
-		sec_stats_accum(&sdstats->secstats, &astats->secstats);
 	}
 }
 
@@ -2175,11 +2183,8 @@ CTL_RO_NL_GEN(opt_hpa_slab_max_alloc, opt_hpa_opts.slab_max_alloc, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_nshards, opt_hpa_sec_opts.nshards, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_max_alloc, opt_hpa_sec_opts.max_alloc, size_t)
 CTL_RO_NL_GEN(opt_hpa_sec_max_bytes, opt_hpa_sec_opts.max_bytes, size_t)
-CTL_RO_NL_GEN(
-    opt_hpa_sec_bytes_after_flush, opt_hpa_sec_opts.bytes_after_flush, size_t)
 CTL_RO_NL_GEN(
     opt_hpa_sec_batch_fill_extra, opt_hpa_sec_opts.batch_fill_extra, size_t)
-
 CTL_RO_NL_GEN(opt_huge_arena_pac_thp, opt_huge_arena_pac_thp, bool)
 CTL_RO_NL_GEN(
     opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp], const char *)
@@ -3869,7 +3874,17 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_abandoned_vm,
     size_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_sec_bytes,
-    arenas_i(mib[2])->astats->secstats.bytes, size_t)
+    arenas_i(mib[2])->astats->hpastats.secstats.bytes, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_sec_hits,
+    arenas_i(mib[2])->astats->hpastats.secstats.total.nhits, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_sec_misses,
+    arenas_i(mib[2])->astats->hpastats.secstats.total.nmisses, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_sec_dalloc_flush,
+    arenas_i(mib[2])->astats->hpastats.secstats.total.ndalloc_flush, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_sec_dalloc_noflush,
+    arenas_i(mib[2])->astats->hpastats.secstats.total.ndalloc_noflush, size_t)
+CTL_RO_CGEN(config_stats, stats_arenas_i_hpa_sec_overfills,
+    arenas_i(mib[2])->astats->hpastats.secstats.total.noverfills, size_t)
 
 CTL_RO_CGEN(config_stats, stats_arenas_i_small_allocated,
     arenas_i(mib[2])->astats->allocated_small, size_t)
diff --git a/src/hpa.c b/src/hpa.c
index cc330379..7e5b5f72 100644
--- a/src/hpa.c
+++ b/src/hpa.c
@@ -11,19 +11,17 @@
 static edata_t *hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
     size_t alignment, bool zero, bool guarded, bool frequent_reuse,
     bool *deferred_work_generated);
-static size_t   hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size,
-      size_t nallocs, edata_list_active_t *results, bool frequent_reuse,
-      bool *deferred_work_generated);
 static bool     hpa_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
         size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
 static bool     hpa_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
         size_t old_size, size_t new_size, bool *deferred_work_generated);
 static void     hpa_dalloc(
         tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated);
-static void     hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self,
-        edata_list_active_t *list, bool *deferred_work_generated);
 static uint64_t hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self);
 
+static void hpa_dalloc_batch(tsdn_t *tsdn, pai_t *self,
+    edata_list_active_t *list, bool *deferred_work_generated);
+
 const char *const hpa_hugify_style_names[] = {"auto", "none", "eager", "lazy"};
 
 bool opt_experimental_hpa_start_huge_if_thp_always = true;
@@ -74,9 +72,9 @@ hpa_do_consistency_checks(hpa_shard_t *shard) {
 }
 
 bool
-hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
-    base_t *base, edata_cache_t *edata_cache, unsigned ind,
-    const hpa_shard_opts_t *opts) {
+hpa_shard_init(tsdn_t *tsdn, hpa_shard_t *shard, hpa_central_t *central,
+    emap_t *emap, base_t *base, edata_cache_t *edata_cache, unsigned ind,
+    const hpa_shard_opts_t *opts, const sec_opts_t *sec_opts) {
 	/* malloc_conf processing should have filtered out these cases. */
 	assert(hpa_supported());
 	bool err;
@@ -118,13 +116,16 @@ hpa_shard_init(hpa_shard_t *shard, hpa_central_t *central, emap_t *emap,
 	 * operating on corrupted data.
 	 */
 	shard->pai.alloc = &hpa_alloc;
-	shard->pai.alloc_batch = &hpa_alloc_batch;
 	shard->pai.expand = &hpa_expand;
 	shard->pai.shrink = &hpa_shrink;
 	shard->pai.dalloc = &hpa_dalloc;
-	shard->pai.dalloc_batch = &hpa_dalloc_batch;
 	shard->pai.time_until_deferred_work = &hpa_time_until_deferred_work;
 
+	err = sec_init(tsdn, &shard->sec, base, sec_opts);
+	if (err) {
+		return true;
+	}
+
 	hpa_do_consistency_checks(shard);
 
 	return false;
@@ -151,6 +152,7 @@ hpa_shard_stats_accum(hpa_shard_stats_t *dst, hpa_shard_stats_t *src) {
 	psset_stats_accum(&dst->psset_stats, &src->psset_stats);
 	hpa_shard_nonderived_stats_accum(
 	    &dst->nonderived_stats, &src->nonderived_stats);
+	sec_stats_accum(&dst->secstats, &src->secstats);
 }
 
 void
@@ -164,6 +166,8 @@ hpa_shard_stats_merge(
 	hpa_shard_nonderived_stats_accum(&dst->nonderived_stats, &shard->stats);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 	malloc_mutex_unlock(tsdn, &shard->grow_mtx);
+
+	sec_stats_merge(tsdn, &shard->sec, &dst->secstats);
 }
 
 static bool
@@ -825,37 +829,9 @@ hpa_from_pai(pai_t *self) {
 	return (hpa_shard_t *)self;
 }
 
-static size_t
-hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
-    edata_list_active_t *results, bool frequent_reuse,
-    bool *deferred_work_generated) {
-	assert(nallocs > 0);
-	assert((size & PAGE_MASK) == 0);
-	witness_assert_depth_to_rank(
-	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
-	hpa_shard_t *shard = hpa_from_pai(self);
-
-	/*
-	 * frequent_use here indicates this request comes from the arena bins,
-	 * in which case it will be split into slabs, and therefore there is no
-	 * intrinsic slack in the allocation (the entire range of allocated size
-	 * will be accessed).
-	 *
-	 * In this case bypass the slab_max_alloc limit (if still within the
-	 * huge page size).  These requests do not concern internal
-	 * fragmentation with huge pages (again, the full size will be used).
-	 */
-	if (!(frequent_reuse && size <= HUGEPAGE)
-	    && (size > shard->opts.slab_max_alloc)) {
-		return 0;
-	}
-
-	size_t nsuccess = hpa_alloc_batch_psset(
-	    tsdn, shard, size, nallocs, results, deferred_work_generated);
-
-	witness_assert_depth_to_rank(
-	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
-
+static void
+hpa_assert_results(
+    tsdn_t *tsdn, hpa_shard_t *shard, edata_list_active_t *results) {
 	/*
 	 * Guard the sanity checks with config_debug because the loop cannot be
 	 * proven non-circular by the compiler, even if everything within the
@@ -876,7 +852,6 @@ hpa_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
 			assert(edata_base_get(edata) != NULL);
 		}
 	}
-	return nsuccess;
 }
 
 static edata_t *
@@ -891,16 +866,52 @@ hpa_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
 	if (alignment > PAGE || zero) {
 		return NULL;
 	}
+	hpa_shard_t *shard = hpa_from_pai(self);
+
 	/*
-	 * An alloc with alignment == PAGE and zero == false is equivalent to a
-	 * batch alloc of 1.  Just do that, so we can share code.
+	 * frequent_use here indicates this request comes from the arena bins,
+	 * in which case it will be split into slabs, and therefore there is no
+	 * intrinsic slack in the allocation (the entire range of allocated size
+	 * will be accessed).
+	 *
+	 * In this case bypass the slab_max_alloc limit (if still within the
+	 * huge page size).  These requests do not concern internal
+	 * fragmentation with huge pages (again, the full size will be used).
 	 */
+	if (!(frequent_reuse && size <= HUGEPAGE)
+	    && (size > shard->opts.slab_max_alloc)) {
+		return NULL;
+	}
+	edata_t *edata = sec_alloc(tsdn, &shard->sec, size);
+	if (edata != NULL) {
+		return edata;
+	}
+	size_t              nallocs = sec_size_supported(&shard->sec, size)
+	                 ? shard->sec.opts.batch_fill_extra + 1
+	                 : 1;
 	edata_list_active_t results;
 	edata_list_active_init(&results);
-	size_t nallocs = hpa_alloc_batch(tsdn, self, size, /* nallocs */ 1,
-	    &results, frequent_reuse, deferred_work_generated);
-	assert(nallocs == 0 || nallocs == 1);
-	edata_t *edata = edata_list_active_first(&results);
+	size_t nsuccess = hpa_alloc_batch_psset(
+	    tsdn, shard, size, nallocs, &results, deferred_work_generated);
+	hpa_assert_results(tsdn, shard, &results);
+	edata = edata_list_active_first(&results);
+
+	if (edata != NULL) {
+		edata_list_active_remove(&results, edata);
+		assert(nsuccess > 0);
+		nsuccess--;
+	}
+	if (nsuccess > 0) {
+		assert(sec_size_supported(&shard->sec, size));
+		sec_fill(tsdn, &shard->sec, size, &results, nsuccess);
+		/* Unlikely rollback in case of overfill */
+		if (!edata_list_active_empty(&results)) {
+			hpa_dalloc_batch(
+			    tsdn, self, &results, deferred_work_generated);
+		}
+	}
+	witness_assert_depth_to_rank(
+	    tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0);
 	return edata;
 }
 
@@ -996,10 +1007,19 @@ static void
 hpa_dalloc(
     tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated) {
 	assert(!edata_guarded_get(edata));
-	/* Just a dalloc_batch of size 1; this lets us share logic. */
+
 	edata_list_active_t dalloc_list;
 	edata_list_active_init(&dalloc_list);
 	edata_list_active_append(&dalloc_list, edata);
+
+	hpa_shard_t *shard = hpa_from_pai(self);
+	sec_dalloc(tsdn, &shard->sec, &dalloc_list);
+	if (edata_list_active_empty(&dalloc_list)) {
+		/* sec consumed the pointer */
+		*deferred_work_generated = false;
+		return;
+	}
+	/* We may have more than one pointer to flush now */
 	hpa_dalloc_batch(tsdn, self, &dalloc_list, deferred_work_generated);
 }
 
@@ -1063,15 +1083,32 @@ hpa_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) {
 	return time_ns;
 }
 
+static void
+hpa_sec_flush_impl(tsdn_t *tsdn, hpa_shard_t *shard) {
+	edata_list_active_t to_flush;
+	edata_list_active_init(&to_flush);
+
+	sec_flush(tsdn, &shard->sec, &to_flush);
+	bool deferred_work_generated;
+	hpa_dalloc_batch(
+	    tsdn, (pai_t *)shard, &to_flush, &deferred_work_generated);
+}
+
 void
 hpa_shard_disable(tsdn_t *tsdn, hpa_shard_t *shard) {
 	hpa_do_consistency_checks(shard);
+	hpa_sec_flush_impl(tsdn, shard);
 
 	malloc_mutex_lock(tsdn, &shard->mtx);
 	edata_cache_fast_disable(tsdn, &shard->ecf);
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 }
 
+void
+hpa_shard_flush(tsdn_t *tsdn, hpa_shard_t *shard) {
+	hpa_sec_flush_impl(tsdn, shard);
+}
+
 static void
 hpa_shard_assert_stats_empty(psset_bin_stats_t *bin_stats) {
 	assert(bin_stats->npageslabs == 0);
@@ -1093,6 +1130,7 @@ hpa_assert_empty(tsdn_t *tsdn, hpa_shard_t *shard, psset_t *psset) {
 void
 hpa_shard_destroy(tsdn_t *tsdn, hpa_shard_t *shard) {
 	hpa_do_consistency_checks(shard);
+	hpa_shard_flush(tsdn, shard);
 	/*
 	 * By the time we're here, the arena code should have dalloc'd all the
 	 * active extents, which means we should have eventually evicted
@@ -1137,6 +1175,12 @@ hpa_shard_do_deferred_work(tsdn_t *tsdn, hpa_shard_t *shard) {
 	malloc_mutex_unlock(tsdn, &shard->mtx);
 }
 
+void
+hpa_shard_prefork2(tsdn_t *tsdn, hpa_shard_t *shard) {
+	hpa_do_consistency_checks(shard);
+	sec_prefork2(tsdn, &shard->sec);
+}
+
 void
 hpa_shard_prefork3(tsdn_t *tsdn, hpa_shard_t *shard) {
 	hpa_do_consistency_checks(shard);
@@ -1155,6 +1199,7 @@ void
 hpa_shard_postfork_parent(tsdn_t *tsdn, hpa_shard_t *shard) {
 	hpa_do_consistency_checks(shard);
 
+	sec_postfork_parent(tsdn, &shard->sec);
 	malloc_mutex_postfork_parent(tsdn, &shard->grow_mtx);
 	malloc_mutex_postfork_parent(tsdn, &shard->mtx);
 }
@@ -1163,6 +1208,7 @@ void
 hpa_shard_postfork_child(tsdn_t *tsdn, hpa_shard_t *shard) {
 	hpa_do_consistency_checks(shard);
 
+	sec_postfork_child(tsdn, &shard->sec);
 	malloc_mutex_postfork_child(tsdn, &shard->grow_mtx);
 	malloc_mutex_postfork_child(tsdn, &shard->mtx);
 }
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 6844da5a..5d23962d 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1013,6 +1013,15 @@ malloc_conf_error(
 		/* However, tolerate experimental features. */
 		return;
 	}
+	const char  *deprecated[] = {"hpa_sec_bytes_after_flush"};
+	const size_t deprecated_cnt = (sizeof(deprecated)
+	    / sizeof(deprecated[0]));
+	for (size_t i = 0; i < deprecated_cnt; ++i) {
+		if (strncmp(k, deprecated[i], strlen(deprecated[i])) == 0) {
+			/* Tolerate deprecated features. */
+			return;
+		}
+	}
 	had_conf_error = true;
 }
 
@@ -1685,7 +1694,6 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				}
 				CONF_CONTINUE;
 			}
-
 			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.nshards,
 			    "hpa_sec_nshards", 0, 0, CONF_CHECK_MIN,
 			    CONF_DONT_CHECK_MAX, true);
@@ -1694,13 +1702,10 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			    USIZE_GROW_SLOW_THRESHOLD, CONF_CHECK_MIN,
 			    CONF_CHECK_MAX, true);
 			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.max_bytes,
-			    "hpa_sec_max_bytes", PAGE, 0, CONF_CHECK_MIN,
-			    CONF_DONT_CHECK_MAX, true);
-			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.bytes_after_flush,
-			    "hpa_sec_bytes_after_flush", PAGE, 0,
+			    "hpa_sec_max_bytes", SEC_OPTS_MAX_BYTES_DEFAULT, 0,
 			    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
 			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.batch_fill_extra,
-			    "hpa_sec_batch_fill_extra", 0, HUGEPAGE_PAGES,
+			    "hpa_sec_batch_fill_extra", 1, HUGEPAGE_PAGES,
 			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
 
 			if (CONF_MATCH("slab_sizes")) {
diff --git a/src/pa.c b/src/pa.c
index becf69b1..a03b0c1c 100644
--- a/src/pa.c
+++ b/src/pa.c
@@ -67,12 +67,9 @@ pa_shard_init(tsdn_t *tsdn, pa_shard_t *shard, pa_central_t *central,
 bool
 pa_shard_enable_hpa(tsdn_t *tsdn, pa_shard_t *shard,
     const hpa_shard_opts_t *hpa_opts, const sec_opts_t *hpa_sec_opts) {
-	if (hpa_shard_init(&shard->hpa_shard, &shard->central->hpa, shard->emap,
-	        shard->base, &shard->edata_cache, shard->ind, hpa_opts)) {
-		return true;
-	}
-	if (sec_init(tsdn, &shard->hpa_sec, shard->base, &shard->hpa_shard.pai,
-	        hpa_sec_opts)) {
+	if (hpa_shard_init(tsdn, &shard->hpa_shard, &shard->central->hpa,
+	        shard->emap, shard->base, &shard->edata_cache, shard->ind,
+	        hpa_opts, hpa_sec_opts)) {
 		return true;
 	}
 	shard->ever_used_hpa = true;
@@ -85,7 +82,6 @@ void
 pa_shard_disable_hpa(tsdn_t *tsdn, pa_shard_t *shard) {
 	atomic_store_b(&shard->use_hpa, false, ATOMIC_RELAXED);
 	if (shard->ever_used_hpa) {
-		sec_disable(tsdn, &shard->hpa_sec);
 		hpa_shard_disable(tsdn, &shard->hpa_shard);
 	}
 }
@@ -93,8 +89,13 @@ pa_shard_disable_hpa(tsdn_t *tsdn, pa_shard_t *shard) {
 void
 pa_shard_reset(tsdn_t *tsdn, pa_shard_t *shard) {
 	atomic_store_zu(&shard->nactive, 0, ATOMIC_RELAXED);
+	pa_shard_flush(tsdn, shard);
+}
+
+void
+pa_shard_flush(tsdn_t *tsdn, pa_shard_t *shard) {
 	if (shard->ever_used_hpa) {
-		sec_flush(tsdn, &shard->hpa_sec);
+		hpa_shard_flush(tsdn, &shard->hpa_shard);
 	}
 }
 
@@ -107,7 +108,6 @@ void
 pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard) {
 	pac_destroy(tsdn, &shard->pac);
 	if (shard->ever_used_hpa) {
-		sec_flush(tsdn, &shard->hpa_sec);
 		hpa_shard_destroy(tsdn, &shard->hpa_shard);
 	}
 }
@@ -115,7 +115,7 @@ pa_shard_destroy(tsdn_t *tsdn, pa_shard_t *shard) {
 static pai_t *
 pa_get_pai(pa_shard_t *shard, edata_t *edata) {
 	return (edata_pai_get(edata) == EXTENT_PAI_PAC ? &shard->pac.pai
-	                                               : &shard->hpa_sec.pai);
+	                                               : &shard->hpa_shard.pai);
 }
 
 edata_t *
@@ -128,7 +128,7 @@ pa_alloc(tsdn_t *tsdn, pa_shard_t *shard, size_t size, size_t alignment,
 
 	edata_t *edata = NULL;
 	if (!guarded && pa_shard_uses_hpa(shard)) {
-		edata = pai_alloc(tsdn, &shard->hpa_sec.pai, size, alignment,
+		edata = pai_alloc(tsdn, &shard->hpa_shard.pai, size, alignment,
 		    zero, /* guarded */ false, slab, deferred_work_generated);
 	}
 	/*
diff --git a/src/pa_extra.c b/src/pa_extra.c
index 7c2498b7..ff45674f 100644
--- a/src/pa_extra.c
+++ b/src/pa_extra.c
@@ -17,7 +17,7 @@ pa_shard_prefork0(tsdn_t *tsdn, pa_shard_t *shard) {
 void
 pa_shard_prefork2(tsdn_t *tsdn, pa_shard_t *shard) {
 	if (shard->ever_used_hpa) {
-		sec_prefork2(tsdn, &shard->hpa_sec);
+		hpa_shard_prefork2(tsdn, &shard->hpa_shard);
 	}
 }
 
@@ -54,7 +54,6 @@ pa_shard_postfork_parent(tsdn_t *tsdn, pa_shard_t *shard) {
 	malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_dirty.mtx);
 	malloc_mutex_postfork_parent(tsdn, &shard->pac.decay_muzzy.mtx);
 	if (shard->ever_used_hpa) {
-		sec_postfork_parent(tsdn, &shard->hpa_sec);
 		hpa_shard_postfork_parent(tsdn, &shard->hpa_shard);
 	}
 }
@@ -69,7 +68,6 @@ pa_shard_postfork_child(tsdn_t *tsdn, pa_shard_t *shard) {
 	malloc_mutex_postfork_child(tsdn, &shard->pac.decay_dirty.mtx);
 	malloc_mutex_postfork_child(tsdn, &shard->pac.decay_muzzy.mtx);
 	if (shard->ever_used_hpa) {
-		sec_postfork_child(tsdn, &shard->hpa_sec);
 		hpa_shard_postfork_child(tsdn, &shard->hpa_shard);
 	}
 }
@@ -104,8 +102,7 @@ pa_shard_basic_stats_merge(
 void
 pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
     pa_shard_stats_t *pa_shard_stats_out, pac_estats_t *estats_out,
-    hpa_shard_stats_t *hpa_stats_out, sec_stats_t *sec_stats_out,
-    size_t *resident) {
+    hpa_shard_stats_t *hpa_stats_out, size_t *resident) {
 	cassert(config_stats);
 
 	pa_shard_stats_out->pac_stats.retained +=
@@ -170,7 +167,6 @@ pa_shard_stats_merge(tsdn_t *tsdn, pa_shard_t *shard,
 
 	if (shard->ever_used_hpa) {
 		hpa_shard_stats_merge(tsdn, &shard->hpa_shard, hpa_stats_out);
-		sec_stats_merge(tsdn, &shard->hpa_sec, sec_stats_out);
 	}
 }
 
@@ -204,7 +200,7 @@ pa_shard_mtx_stats_read(tsdn_t *tsdn, pa_shard_t *shard,
 		pa_shard_mtx_stats_read_single(tsdn, mutex_prof_data,
 		    &shard->hpa_shard.grow_mtx,
 		    arena_prof_mutex_hpa_shard_grow);
-		sec_mutex_stats_read(tsdn, &shard->hpa_sec,
+		sec_mutex_stats_read(tsdn, &shard->hpa_shard.sec,
 		    &mutex_prof_data[arena_prof_mutex_hpa_sec]);
 	}
 }
diff --git a/src/pac.c b/src/pac.c
index 361816e9..86001139 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -97,11 +97,9 @@ pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap,
 	atomic_store_zu(&pac->extent_sn_next, 0, ATOMIC_RELAXED);
 
 	pac->pai.alloc = &pac_alloc_impl;
-	pac->pai.alloc_batch = &pai_alloc_batch_default;
 	pac->pai.expand = &pac_expand_impl;
 	pac->pai.shrink = &pac_shrink_impl;
 	pac->pai.dalloc = &pac_dalloc_impl;
-	pac->pai.dalloc_batch = &pai_dalloc_batch_default;
 	pac->pai.time_until_deferred_work = &pac_time_until_deferred_work;
 
 	return false;
@@ -449,8 +447,8 @@ decay_with_process_madvise(edata_list_inactive_t *decay_extents) {
 
 	size_t cur = 0, total_bytes = 0;
 	for (edata_t *edata = edata_list_inactive_first(decay_extents);
-	     edata != NULL;
-	     edata = edata_list_inactive_next(decay_extents, edata)) {
+	    edata != NULL;
+	    edata = edata_list_inactive_next(decay_extents, edata)) {
 		size_t pages_bytes = edata_size_get(edata);
 		vec[cur].iov_base = edata_base_get(edata);
 		vec[cur].iov_len = pages_bytes;
@@ -511,7 +509,7 @@ pac_decay_stashed(tsdn_t *tsdn, pac_t *pac, decay_t *decay,
 	}
 
 	for (edata_t *edata = edata_list_inactive_first(decay_extents);
-	     edata != NULL; edata = edata_list_inactive_first(decay_extents)) {
+	    edata != NULL; edata = edata_list_inactive_first(decay_extents)) {
 		edata_list_inactive_remove(decay_extents, edata);
 
 		size_t size = edata_size_get(edata);
diff --git a/src/pai.c b/src/pai.c
deleted file mode 100644
index 3114e658..00000000
--- a/src/pai.c
+++ /dev/null
@@ -1,32 +0,0 @@
-#include "jemalloc/internal/jemalloc_preamble.h"
-#include "jemalloc/internal/jemalloc_internal_includes.h"
-
-size_t
-pai_alloc_batch_default(tsdn_t *tsdn, pai_t *self, size_t size, size_t nallocs,
-    edata_list_active_t *results, bool frequent_reuse,
-    bool *deferred_work_generated) {
-	for (size_t i = 0; i < nallocs; i++) {
-		bool     deferred_by_alloc = false;
-		edata_t *edata = pai_alloc(tsdn, self, size, PAGE,
-		    /* zero */ false, /* guarded */ false, frequent_reuse,
-		    &deferred_by_alloc);
-		*deferred_work_generated |= deferred_by_alloc;
-		if (edata == NULL) {
-			return i;
-		}
-		edata_list_active_append(results, edata);
-	}
-	return nallocs;
-}
-
-void
-pai_dalloc_batch_default(tsdn_t *tsdn, pai_t *self, edata_list_active_t *list,
-    bool *deferred_work_generated) {
-	edata_t *edata;
-	while ((edata = edata_list_active_first(list)) != NULL) {
-		bool deferred_by_dalloc = false;
-		edata_list_active_remove(list, edata);
-		pai_dalloc(tsdn, self, edata, &deferred_by_dalloc);
-		*deferred_work_generated |= deferred_by_dalloc;
-	}
-}
diff --git a/src/sec.c b/src/sec.c
index c827dd5c..5f65362f 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -4,95 +4,56 @@
 #include "jemalloc/internal/sec.h"
 #include "jemalloc/internal/jemalloc_probe.h"
 
-static edata_t *sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t alignment, bool zero, bool guarded, bool frequent_reuse,
-    bool *deferred_work_generated);
-static bool     sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-        size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated);
-static bool     sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-        size_t old_size, size_t new_size, bool *deferred_work_generated);
-static void     sec_dalloc(
-        tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated);
-
-static void
+static bool
 sec_bin_init(sec_bin_t *bin) {
-	bin->being_batch_filled = false;
 	bin->bytes_cur = 0;
+	sec_bin_stats_init(&bin->stats);
 	edata_list_active_init(&bin->freelist);
+	bool err = malloc_mutex_init(&bin->mtx, "sec_bin", WITNESS_RANK_SEC_BIN,
+	    malloc_mutex_rank_exclusive);
+	if (err) {
+		return true;
+	}
+
+	return false;
 }
 
 bool
-sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, pai_t *fallback,
-    const sec_opts_t *opts) {
+sec_init(tsdn_t *tsdn, sec_t *sec, base_t *base, const sec_opts_t *opts) {
+	sec->opts = *opts;
+	if (opts->nshards == 0) {
+		return false;
+	}
 	assert(opts->max_alloc >= PAGE);
+
 	/*
 	 * Same as tcache, sec do not cache allocs/dallocs larger than
 	 * USIZE_GROW_SLOW_THRESHOLD because the usize above this increases
 	 * by PAGE and the number of usizes is too large.
 	 */
-	assert(!sz_large_size_classes_disabled()
-	    || opts->max_alloc <= USIZE_GROW_SLOW_THRESHOLD);
+	assert(opts->max_alloc <= USIZE_GROW_SLOW_THRESHOLD);
 
 	size_t   max_alloc = PAGE_FLOOR(opts->max_alloc);
 	pszind_t npsizes = sz_psz2ind(max_alloc) + 1;
 
-	size_t sz_shards = opts->nshards * sizeof(sec_shard_t);
-	size_t sz_bins = opts->nshards * (size_t)npsizes * sizeof(sec_bin_t);
-	size_t sz_alloc = sz_shards + sz_bins;
-	void  *dynalloc = base_alloc(tsdn, base, sz_alloc, CACHELINE);
+	size_t ntotal_bins = opts->nshards * (size_t)npsizes;
+	size_t sz_bins = sizeof(sec_bin_t) * ntotal_bins;
+	void  *dynalloc = base_alloc(tsdn, base, sz_bins, CACHELINE);
 	if (dynalloc == NULL) {
 		return true;
 	}
-	sec_shard_t *shard_cur = (sec_shard_t *)dynalloc;
-	sec->shards = shard_cur;
-	sec_bin_t *bin_cur = (sec_bin_t *)&shard_cur[opts->nshards];
-	/* Just for asserts, below. */
-	sec_bin_t *bin_start = bin_cur;
-
-	for (size_t i = 0; i < opts->nshards; i++) {
-		sec_shard_t *shard = shard_cur;
-		shard_cur++;
-		bool err = malloc_mutex_init(&shard->mtx, "sec_shard",
-		    WITNESS_RANK_SEC_SHARD, malloc_mutex_rank_exclusive);
-		if (err) {
+	sec->bins = (sec_bin_t *)dynalloc;
+	for (pszind_t j = 0; j < ntotal_bins; j++) {
+		if (sec_bin_init(&sec->bins[j])) {
 			return true;
 		}
-		shard->enabled = true;
-		shard->bins = bin_cur;
-		for (pszind_t j = 0; j < npsizes; j++) {
-			sec_bin_init(&shard->bins[j]);
-			bin_cur++;
-		}
-		shard->bytes_cur = 0;
-		shard->to_flush_next = 0;
 	}
-	/*
-	 * Should have exactly matched the bin_start to the first unused byte
-	 * after the shards.
-	 */
-	assert((void *)shard_cur == (void *)bin_start);
-	/* And the last bin to use up the last bytes of the allocation. */
-	assert((char *)bin_cur == ((char *)dynalloc + sz_alloc));
-	sec->fallback = fallback;
-
-	sec->opts = *opts;
 	sec->npsizes = npsizes;
 
-	/*
-	 * Initialize these last so that an improper use of an SEC whose
-	 * initialization failed will segfault in an easy-to-spot way.
-	 */
-	sec->pai.alloc = &sec_alloc;
-	sec->pai.alloc_batch = &pai_alloc_batch_default;
-	sec->pai.expand = &sec_expand;
-	sec->pai.shrink = &sec_shrink;
-	sec->pai.dalloc = &sec_dalloc;
-	sec->pai.dalloc_batch = &pai_dalloc_batch_default;
-
 	return false;
 }
 
-static sec_shard_t *
+static uint8_t
 sec_shard_pick(tsdn_t *tsdn, sec_t *sec) {
 	/*
 	 * Eventually, we should implement affinity, tracking source shard using
@@ -100,7 +61,7 @@ sec_shard_pick(tsdn_t *tsdn, sec_t *sec) {
 	 * distribute across all shards.
 	 */
 	if (tsdn_null(tsdn)) {
-		return &sec->shards[0];
+		return 0;
 	}
 	tsd_t   *tsd = tsdn_tsd(tsdn);
 	uint8_t *idxp = tsd_sec_shardp_get(tsd);
@@ -118,284 +79,252 @@ sec_shard_pick(tsdn_t *tsdn, sec_t *sec) {
 		assert(idx < (uint32_t)sec->opts.nshards);
 		*idxp = (uint8_t)idx;
 	}
-	return &sec->shards[*idxp];
+	return *idxp;
 }
 
-/*
- * Perhaps surprisingly, this can be called on the alloc pathways; if we hit an
- * empty cache, we'll try to fill it, which can push the shard over it's limit.
- */
-static void
-sec_flush_some_and_unlock(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
-	malloc_mutex_assert_owner(tsdn, &shard->mtx);
-	edata_list_active_t to_flush;
-	edata_list_active_init(&to_flush);
-	while (shard->bytes_cur > sec->opts.bytes_after_flush) {
-		/* Pick a victim. */
-		sec_bin_t *bin = &shard->bins[shard->to_flush_next];
-
-		/* Update our victim-picking state. */
-		shard->to_flush_next++;
-		if (shard->to_flush_next == sec->npsizes) {
-			shard->to_flush_next = 0;
-		}
-
-		assert(shard->bytes_cur >= bin->bytes_cur);
-		if (bin->bytes_cur != 0) {
-			shard->bytes_cur -= bin->bytes_cur;
-			bin->bytes_cur = 0;
-			edata_list_active_concat(&to_flush, &bin->freelist);
-		}
-		/*
-		 * Either bin->bytes_cur was 0, in which case we didn't touch
-		 * the bin list but it should be empty anyways (or else we
-		 * missed a bytes_cur update on a list modification), or it
-		 * *was* 0 and we emptied it ourselves.  Either way, it should
-		 * be empty now.
-		 */
-		assert(edata_list_active_empty(&bin->freelist));
-	}
-
-	malloc_mutex_unlock(tsdn, &shard->mtx);
-	bool deferred_work_generated = false;
-	pai_dalloc_batch(
-	    tsdn, sec->fallback, &to_flush, &deferred_work_generated);
+static sec_bin_t *
+sec_bin_pick(sec_t *sec, uint8_t shard, pszind_t pszind) {
+	assert(shard < sec->opts.nshards);
+	size_t ind = (size_t)shard * sec->npsizes + pszind;
+	assert(ind < sec->npsizes * sec->opts.nshards);
+	return &sec->bins[ind];
 }
 
 static edata_t *
-sec_shard_alloc_locked(
-    tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard, sec_bin_t *bin) {
-	malloc_mutex_assert_owner(tsdn, &shard->mtx);
-	if (!shard->enabled) {
-		return NULL;
-	}
+sec_bin_alloc_locked(tsdn_t *tsdn, sec_t *sec, sec_bin_t *bin, size_t size) {
+	malloc_mutex_assert_owner(tsdn, &bin->mtx);
+
 	edata_t *edata = edata_list_active_first(&bin->freelist);
 	if (edata != NULL) {
+		assert(!edata_list_active_empty(&bin->freelist));
 		edata_list_active_remove(&bin->freelist, edata);
-		assert(edata_size_get(edata) <= bin->bytes_cur);
-		bin->bytes_cur -= edata_size_get(edata);
-		assert(edata_size_get(edata) <= shard->bytes_cur);
-		shard->bytes_cur -= edata_size_get(edata);
+		size_t sz = edata_size_get(edata);
+		assert(sz <= bin->bytes_cur && sz > 0);
+		bin->bytes_cur -= sz;
+		bin->stats.nhits++;
 	}
 	return edata;
 }
 
 static edata_t *
-sec_batch_fill_and_alloc(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard,
-    sec_bin_t *bin, size_t size, bool frequent_reuse) {
-	malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
+sec_multishard_trylock_alloc(
+    tsdn_t *tsdn, sec_t *sec, size_t size, pszind_t pszind) {
+	assert(sec->opts.nshards > 0);
 
-	edata_list_active_t result;
-	edata_list_active_init(&result);
-	bool   deferred_work_generated = false;
-	size_t nalloc = pai_alloc_batch(tsdn, sec->fallback, size,
-	    1 + sec->opts.batch_fill_extra, &result, frequent_reuse,
-	    &deferred_work_generated);
-
-	edata_t *ret = edata_list_active_first(&result);
-	if (ret != NULL) {
-		edata_list_active_remove(&result, ret);
+	uint8_t    cur_shard = sec_shard_pick(tsdn, sec);
+	sec_bin_t *bin;
+	for (size_t i = 0; i < sec->opts.nshards; ++i) {
+		bin = sec_bin_pick(sec, cur_shard, pszind);
+		if (!malloc_mutex_trylock(tsdn, &bin->mtx)) {
+			edata_t *edata = sec_bin_alloc_locked(
+			    tsdn, sec, bin, size);
+			malloc_mutex_unlock(tsdn, &bin->mtx);
+			if (edata != NULL) {
+				JE_USDT(sec_alloc, 5, sec, bin, edata, size,
+				    /* frequent_reuse */ 1);
+				return edata;
+			}
+		}
+		cur_shard++;
+		if (cur_shard == sec->opts.nshards) {
+			cur_shard = 0;
+		}
 	}
-
-	malloc_mutex_lock(tsdn, &shard->mtx);
-	bin->being_batch_filled = false;
-	/*
-	 * Handle the easy case first: nothing to cache.  Note that this can
-	 * only happen in case of OOM, since sec_alloc checks the expected
-	 * number of allocs, and doesn't bother going down the batch_fill
-	 * pathway if there won't be anything left to cache.  So to be in this
-	 * code path, we must have asked for > 1 alloc, but only gotten 1 back.
-	 */
-	if (nalloc <= 1) {
-		malloc_mutex_unlock(tsdn, &shard->mtx);
-		return ret;
+	/* No bin had alloc or had the extent */
+	assert(cur_shard == sec_shard_pick(tsdn, sec));
+	bin = sec_bin_pick(sec, cur_shard, pszind);
+	malloc_mutex_lock(tsdn, &bin->mtx);
+	edata_t *edata = sec_bin_alloc_locked(tsdn, sec, bin, size);
+	if (edata == NULL) {
+		/* Only now we know it is a miss */
+		bin->stats.nmisses++;
 	}
-
-	size_t new_cached_bytes = (nalloc - 1) * size;
-
-	edata_list_active_concat(&bin->freelist, &result);
-	bin->bytes_cur += new_cached_bytes;
-	shard->bytes_cur += new_cached_bytes;
-
-	if (shard->bytes_cur > sec->opts.max_bytes) {
-		sec_flush_some_and_unlock(tsdn, sec, shard);
-	} else {
-		malloc_mutex_unlock(tsdn, &shard->mtx);
-	}
-
-	return ret;
+	malloc_mutex_unlock(tsdn, &bin->mtx);
+	JE_USDT(sec_alloc, 5, sec, bin, edata, size, /* frequent_reuse */ 1);
+	return edata;
 }
 
-static edata_t *
-sec_alloc(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, bool zero,
-    bool guarded, bool frequent_reuse, bool *deferred_work_generated) {
+edata_t *
+sec_alloc(tsdn_t *tsdn, sec_t *sec, size_t size) {
+	if (!sec_size_supported(sec, size)) {
+		return NULL;
+	}
 	assert((size & PAGE_MASK) == 0);
-	assert(!guarded);
-
-	sec_t *sec = (sec_t *)self;
-
-	if (zero || alignment > PAGE || sec->opts.nshards == 0
-	    || size > sec->opts.max_alloc) {
-		return pai_alloc(tsdn, sec->fallback, size, alignment, zero,
-		    /* guarded */ false, frequent_reuse,
-		    deferred_work_generated);
-	}
 	pszind_t pszind = sz_psz2ind(size);
 	assert(pszind < sec->npsizes);
 
-	sec_shard_t *shard = sec_shard_pick(tsdn, sec);
-	sec_bin_t   *bin = &shard->bins[pszind];
-	bool         do_batch_fill = false;
-
-	malloc_mutex_lock(tsdn, &shard->mtx);
-	edata_t *edata = sec_shard_alloc_locked(tsdn, sec, shard, bin);
-	if (edata == NULL) {
-		if (!bin->being_batch_filled
-		    && sec->opts.batch_fill_extra > 0) {
-			bin->being_batch_filled = true;
-			do_batch_fill = true;
+	/*
+	 * If there's only one shard, skip the trylock optimization and
+	 * go straight to the blocking lock.
+	 */
+	if (sec->opts.nshards == 1) {
+		sec_bin_t *bin = sec_bin_pick(sec, /* shard */ 0, pszind);
+		malloc_mutex_lock(tsdn, &bin->mtx);
+		edata_t *edata = sec_bin_alloc_locked(tsdn, sec, bin, size);
+		if (edata == NULL) {
+			bin->stats.nmisses++;
 		}
+		malloc_mutex_unlock(tsdn, &bin->mtx);
+		JE_USDT(sec_alloc, 5, sec, bin, edata, size,
+		    /* frequent_reuse */ 1);
+		return edata;
 	}
-	malloc_mutex_unlock(tsdn, &shard->mtx);
-	if (edata == NULL) {
-		if (do_batch_fill) {
-			edata = sec_batch_fill_and_alloc(
-			    tsdn, sec, shard, bin, size, frequent_reuse);
-		} else {
-			edata = pai_alloc(tsdn, sec->fallback, size, alignment,
-			    zero, /* guarded */ false, frequent_reuse,
-			    deferred_work_generated);
-		}
-	}
-	JE_USDT(sec_alloc, 5, sec, shard, edata, size, frequent_reuse);
-	return edata;
-}
-
-static bool
-sec_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
-    size_t new_size, bool zero, bool *deferred_work_generated) {
-	sec_t *sec = (sec_t *)self;
-	JE_USDT(sec_expand, 4, sec, edata, old_size, new_size);
-	return pai_expand(tsdn, sec->fallback, edata, old_size, new_size, zero,
-	    deferred_work_generated);
-}
-
-static bool
-sec_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size,
-    size_t new_size, bool *deferred_work_generated) {
-	sec_t *sec = (sec_t *)self;
-	JE_USDT(sec_shrink, 4, sec, edata, old_size, new_size);
-	return pai_shrink(tsdn, sec->fallback, edata, old_size, new_size,
-	    deferred_work_generated);
+	return sec_multishard_trylock_alloc(tsdn, sec, size, pszind);
 }
 
 static void
-sec_flush_all_locked(tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard) {
-	malloc_mutex_assert_owner(tsdn, &shard->mtx);
-	shard->bytes_cur = 0;
-	edata_list_active_t to_flush;
-	edata_list_active_init(&to_flush);
-	for (pszind_t i = 0; i < sec->npsizes; i++) {
-		sec_bin_t *bin = &shard->bins[i];
-		bin->bytes_cur = 0;
-		edata_list_active_concat(&to_flush, &bin->freelist);
-	}
+sec_bin_dalloc_locked(tsdn_t *tsdn, sec_t *sec, sec_bin_t *bin, size_t size,
+    edata_list_active_t *dalloc_list) {
+	malloc_mutex_assert_owner(tsdn, &bin->mtx);
 
-	/*
-	 * Ordinarily we would try to avoid doing the batch deallocation while
-	 * holding the shard mutex, but the flush_all pathways only happen when
-	 * we're disabling the HPA or resetting the arena, both of which are
-	 * rare pathways.
-	 */
-	bool deferred_work_generated = false;
-	pai_dalloc_batch(
-	    tsdn, sec->fallback, &to_flush, &deferred_work_generated);
-}
-
-static void
-sec_shard_dalloc_and_unlock(
-    tsdn_t *tsdn, sec_t *sec, sec_shard_t *shard, edata_t *edata) {
-	malloc_mutex_assert_owner(tsdn, &shard->mtx);
-	assert(shard->bytes_cur <= sec->opts.max_bytes);
-	size_t   size = edata_size_get(edata);
-	pszind_t pszind = sz_psz2ind(size);
-	assert(pszind < sec->npsizes);
-	/*
-	 * Prepending here results in LIFO allocation per bin, which seems
-	 * reasonable.
-	 */
-	sec_bin_t *bin = &shard->bins[pszind];
-	edata_list_active_prepend(&bin->freelist, edata);
 	bin->bytes_cur += size;
-	shard->bytes_cur += size;
-	if (shard->bytes_cur > sec->opts.max_bytes) {
-		/*
-		 * We've exceeded the shard limit.  We make two nods in the
-		 * direction of fragmentation avoidance: we flush everything in
-		 * the shard, rather than one particular bin, and we hold the
-		 * lock while flushing (in case one of the extents we flush is
-		 * highly preferred from a fragmentation-avoidance perspective
-		 * in the backing allocator).  This has the extra advantage of
-		 * not requiring advanced cache balancing strategies.
-		 */
-		sec_flush_some_and_unlock(tsdn, sec, shard);
-		malloc_mutex_assert_not_owner(tsdn, &shard->mtx);
-	} else {
-		malloc_mutex_unlock(tsdn, &shard->mtx);
-	}
-}
+	edata_t *edata = edata_list_active_first(dalloc_list);
+	assert(edata != NULL);
+	edata_list_active_remove(dalloc_list, edata);
+	JE_USDT(sec_dalloc, 3, sec, bin, edata);
+	edata_list_active_prepend(&bin->freelist, edata);
+	/* Single extent can be returned to SEC */
+	assert(edata_list_active_empty(dalloc_list));
 
-static void
-sec_dalloc(
-    tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated) {
-	sec_t *sec = (sec_t *)self;
-	if (sec->opts.nshards == 0
-	    || edata_size_get(edata) > sec->opts.max_alloc) {
-		pai_dalloc(tsdn, sec->fallback, edata, deferred_work_generated);
+	if (bin->bytes_cur <= sec->opts.max_bytes) {
+		bin->stats.ndalloc_noflush++;
 		return;
 	}
-	sec_shard_t *shard = sec_shard_pick(tsdn, sec);
-	JE_USDT(sec_dalloc, 3, sec, shard, edata);
-	malloc_mutex_lock(tsdn, &shard->mtx);
-	if (shard->enabled) {
-		sec_shard_dalloc_and_unlock(tsdn, sec, shard, edata);
+	bin->stats.ndalloc_flush++;
+	/* we want to flush 1/4 of max_bytes */
+	size_t bytes_target = sec->opts.max_bytes - (sec->opts.max_bytes >> 2);
+	while (bin->bytes_cur > bytes_target
+	    && !edata_list_active_empty(&bin->freelist)) {
+		edata_t *cur = edata_list_active_last(&bin->freelist);
+		size_t   sz = edata_size_get(cur);
+		assert(sz <= bin->bytes_cur && sz > 0);
+		bin->bytes_cur -= sz;
+		edata_list_active_remove(&bin->freelist, cur);
+		edata_list_active_append(dalloc_list, cur);
+	}
+}
+
+static void
+sec_multishard_trylock_dalloc(tsdn_t *tsdn, sec_t *sec, size_t size,
+    pszind_t pszind, edata_list_active_t *dalloc_list) {
+	assert(sec->opts.nshards > 0);
+
+	/* Try to dalloc in this threads bin first */
+	uint8_t cur_shard = sec_shard_pick(tsdn, sec);
+	for (size_t i = 0; i < sec->opts.nshards; ++i) {
+		sec_bin_t *bin = sec_bin_pick(sec, cur_shard, pszind);
+		if (!malloc_mutex_trylock(tsdn, &bin->mtx)) {
+			sec_bin_dalloc_locked(
+			    tsdn, sec, bin, size, dalloc_list);
+			malloc_mutex_unlock(tsdn, &bin->mtx);
+			return;
+		}
+		cur_shard++;
+		if (cur_shard == sec->opts.nshards) {
+			cur_shard = 0;
+		}
+	}
+	/* No bin had alloc or had the extent */
+	assert(cur_shard == sec_shard_pick(tsdn, sec));
+	sec_bin_t *bin = sec_bin_pick(sec, cur_shard, pszind);
+	malloc_mutex_lock(tsdn, &bin->mtx);
+	sec_bin_dalloc_locked(tsdn, sec, bin, size, dalloc_list);
+	malloc_mutex_unlock(tsdn, &bin->mtx);
+}
+
+void
+sec_dalloc(tsdn_t *tsdn, sec_t *sec, edata_list_active_t *dalloc_list) {
+	if (!sec_is_used(sec)) {
+		return;
+	}
+	edata_t *edata = edata_list_active_first(dalloc_list);
+	size_t   size = edata_size_get(edata);
+	if (size > sec->opts.max_alloc) {
+		return;
+	}
+	pszind_t pszind = sz_psz2ind(size);
+	assert(pszind < sec->npsizes);
+
+	/*
+         * If there's only one shard, skip the trylock optimization and
+	 * go straight to the blocking lock.
+	 */
+	if (sec->opts.nshards == 1) {
+		sec_bin_t *bin = sec_bin_pick(sec, /* shard */ 0, pszind);
+		malloc_mutex_lock(tsdn, &bin->mtx);
+		sec_bin_dalloc_locked(tsdn, sec, bin, size, dalloc_list);
+		malloc_mutex_unlock(tsdn, &bin->mtx);
+		return;
+	}
+	sec_multishard_trylock_dalloc(tsdn, sec, size, pszind, dalloc_list);
+}
+
+void
+sec_fill(tsdn_t *tsdn, sec_t *sec, size_t size, edata_list_active_t *result,
+    size_t nallocs) {
+	assert((size & PAGE_MASK) == 0);
+	assert(sec->opts.nshards != 0 && size <= sec->opts.max_alloc);
+	assert(nallocs > 0);
+
+	pszind_t pszind = sz_psz2ind(size);
+	assert(pszind < sec->npsizes);
+
+	sec_bin_t *bin = sec_bin_pick(sec, sec_shard_pick(tsdn, sec), pszind);
+	malloc_mutex_assert_not_owner(tsdn, &bin->mtx);
+	malloc_mutex_lock(tsdn, &bin->mtx);
+	size_t new_cached_bytes = nallocs * size;
+	if (bin->bytes_cur + new_cached_bytes <= sec->opts.max_bytes) {
+		assert(!edata_list_active_empty(result));
+		edata_list_active_concat(&bin->freelist, result);
+		bin->bytes_cur += new_cached_bytes;
 	} else {
-		malloc_mutex_unlock(tsdn, &shard->mtx);
-		pai_dalloc(tsdn, sec->fallback, edata, deferred_work_generated);
+		/*
+		 * Unlikely case of many threads filling at the same time and
+		 * going above max.
+		 */
+		bin->stats.noverfills++;
+		while (bin->bytes_cur + size <= sec->opts.max_bytes) {
+			edata_t *edata = edata_list_active_first(result);
+			if (edata == NULL) {
+				break;
+			}
+			edata_list_active_remove(result, edata);
+			assert(size == edata_size_get(edata));
+			edata_list_active_append(&bin->freelist, edata);
+			bin->bytes_cur += size;
+		}
 	}
+	malloc_mutex_unlock(tsdn, &bin->mtx);
 }
 
 void
-sec_flush(tsdn_t *tsdn, sec_t *sec) {
-	for (size_t i = 0; i < sec->opts.nshards; i++) {
-		malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
-		sec_flush_all_locked(tsdn, sec, &sec->shards[i]);
-		malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
+sec_flush(tsdn_t *tsdn, sec_t *sec, edata_list_active_t *to_flush) {
+	if (!sec_is_used(sec)) {
+		return;
 	}
-}
-
-void
-sec_disable(tsdn_t *tsdn, sec_t *sec) {
-	for (size_t i = 0; i < sec->opts.nshards; i++) {
-		malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
-		sec->shards[i].enabled = false;
-		sec_flush_all_locked(tsdn, sec, &sec->shards[i]);
-		malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
+	size_t ntotal_bins = sec->opts.nshards * sec->npsizes;
+	for (pszind_t i = 0; i < ntotal_bins; i++) {
+		sec_bin_t *bin = &sec->bins[i];
+		malloc_mutex_lock(tsdn, &bin->mtx);
+		bin->bytes_cur = 0;
+		edata_list_active_concat(to_flush, &bin->freelist);
+		malloc_mutex_unlock(tsdn, &bin->mtx);
 	}
 }
 
 void
 sec_stats_merge(tsdn_t *tsdn, sec_t *sec, sec_stats_t *stats) {
+	if (!sec_is_used(sec)) {
+		return;
+	}
 	size_t sum = 0;
-	for (size_t i = 0; i < sec->opts.nshards; i++) {
-		/*
-		 * We could save these lock acquisitions by making bytes_cur
-		 * atomic, but stats collection is rare anyways and we expect
-		 * the number and type of stats to get more interesting.
-		 */
-		malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
-		sum += sec->shards[i].bytes_cur;
-		malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
+	size_t ntotal_bins = sec->opts.nshards * sec->npsizes;
+	for (pszind_t i = 0; i < ntotal_bins; i++) {
+		sec_bin_t *bin = &sec->bins[i];
+		malloc_mutex_lock(tsdn, &bin->mtx);
+		sum += bin->bytes_cur;
+		sec_bin_stats_accum(&stats->total, &bin->stats);
+		malloc_mutex_unlock(tsdn, &bin->mtx);
 	}
 	stats->bytes += sum;
 }
@@ -403,31 +332,50 @@ sec_stats_merge(tsdn_t *tsdn, sec_t *sec, sec_stats_t *stats) {
 void
 sec_mutex_stats_read(
     tsdn_t *tsdn, sec_t *sec, mutex_prof_data_t *mutex_prof_data) {
-	for (size_t i = 0; i < sec->opts.nshards; i++) {
-		malloc_mutex_lock(tsdn, &sec->shards[i].mtx);
-		malloc_mutex_prof_accum(
-		    tsdn, mutex_prof_data, &sec->shards[i].mtx);
-		malloc_mutex_unlock(tsdn, &sec->shards[i].mtx);
+	if (!sec_is_used(sec)) {
+		return;
+	}
+	size_t ntotal_bins = sec->opts.nshards * sec->npsizes;
+	for (pszind_t i = 0; i < ntotal_bins; i++) {
+		sec_bin_t *bin = &sec->bins[i];
+		malloc_mutex_lock(tsdn, &bin->mtx);
+		malloc_mutex_prof_accum(tsdn, mutex_prof_data, &bin->mtx);
+		malloc_mutex_unlock(tsdn, &bin->mtx);
 	}
 }
 
 void
 sec_prefork2(tsdn_t *tsdn, sec_t *sec) {
-	for (size_t i = 0; i < sec->opts.nshards; i++) {
-		malloc_mutex_prefork(tsdn, &sec->shards[i].mtx);
+	if (!sec_is_used(sec)) {
+		return;
+	}
+	size_t ntotal_bins = sec->opts.nshards * sec->npsizes;
+	for (pszind_t i = 0; i < ntotal_bins; i++) {
+		sec_bin_t *bin = &sec->bins[i];
+		malloc_mutex_prefork(tsdn, &bin->mtx);
 	}
 }
 
 void
 sec_postfork_parent(tsdn_t *tsdn, sec_t *sec) {
-	for (size_t i = 0; i < sec->opts.nshards; i++) {
-		malloc_mutex_postfork_parent(tsdn, &sec->shards[i].mtx);
+	if (!sec_is_used(sec)) {
+		return;
+	}
+	size_t ntotal_bins = sec->opts.nshards * sec->npsizes;
+	for (pszind_t i = 0; i < ntotal_bins; i++) {
+		sec_bin_t *bin = &sec->bins[i];
+		malloc_mutex_postfork_parent(tsdn, &bin->mtx);
 	}
 }
 
 void
 sec_postfork_child(tsdn_t *tsdn, sec_t *sec) {
-	for (size_t i = 0; i < sec->opts.nshards; i++) {
-		malloc_mutex_postfork_child(tsdn, &sec->shards[i].mtx);
+	if (!sec_is_used(sec)) {
+		return;
+	}
+	size_t ntotal_bins = sec->opts.nshards * sec->npsizes;
+	for (pszind_t i = 0; i < ntotal_bins; i++) {
+		sec_bin_t *bin = &sec->bins[i];
+		malloc_mutex_postfork_child(tsdn, &bin->mtx);
 	}
 }
diff --git a/src/stats.c b/src/stats.c
index 2ccac6c9..be70a6fc 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -791,9 +791,35 @@ stats_arena_extents_print(emitter_t *emitter, unsigned i) {
 static void
 stats_arena_hpa_shard_sec_print(emitter_t *emitter, unsigned i) {
 	size_t sec_bytes;
+	size_t sec_hits;
+	size_t sec_misses;
+	size_t sec_dalloc_flush;
+	size_t sec_dalloc_noflush;
+	size_t sec_overfills;
 	CTL_M2_GET("stats.arenas.0.hpa_sec_bytes", i, &sec_bytes, size_t);
 	emitter_kv(emitter, "sec_bytes", "Bytes in small extent cache",
 	    emitter_type_size, &sec_bytes);
+	CTL_M2_GET("stats.arenas.0.hpa_sec_hits", i, &sec_hits, size_t);
+	emitter_kv(emitter, "sec_hits", "Total hits in small extent cache",
+	    emitter_type_size, &sec_hits);
+	CTL_M2_GET("stats.arenas.0.hpa_sec_misses", i, &sec_misses, size_t);
+	emitter_kv(emitter, "sec_misses", "Total misses in small extent cache",
+	    emitter_type_size, &sec_misses);
+	CTL_M2_GET("stats.arenas.0.hpa_sec_dalloc_noflush", i,
+	    &sec_dalloc_noflush, size_t);
+	emitter_kv(emitter, "sec_dalloc_noflush",
+	    "Dalloc calls without flush in small extent cache",
+	    emitter_type_size, &sec_dalloc_noflush);
+	CTL_M2_GET("stats.arenas.0.hpa_sec_dalloc_flush", i, &sec_dalloc_flush,
+	    size_t);
+	emitter_kv(emitter, "sec_dalloc_flush",
+	    "Dalloc calls with flush in small extent cache", emitter_type_size,
+	    &sec_dalloc_flush);
+	CTL_M2_GET(
+	    "stats.arenas.0.hpa_sec_overfills", i, &sec_overfills, size_t);
+	emitter_kv(emitter, "sec_overfills",
+	    "sec_fill calls that went over max_bytes", emitter_type_size,
+	    &sec_overfills);
 }
 
 static void
@@ -1642,7 +1668,6 @@ stats_general_print(emitter_t *emitter) {
 	OPT_WRITE_SIZE_T("hpa_sec_nshards")
 	OPT_WRITE_SIZE_T("hpa_sec_max_alloc")
 	OPT_WRITE_SIZE_T("hpa_sec_max_bytes")
-	OPT_WRITE_SIZE_T("hpa_sec_bytes_after_flush")
 	OPT_WRITE_SIZE_T("hpa_sec_batch_fill_extra")
 	OPT_WRITE_BOOL("huge_arena_pac_thp")
 	OPT_WRITE_CHAR_P("metadata_thp")
diff --git a/test/unit/hpa.c b/test/unit/hpa.c
index 5937601e..9c4253cd 100644
--- a/test/unit/hpa.c
+++ b/test/unit/hpa.c
@@ -113,10 +113,12 @@ create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
 
 	err = hpa_central_init(&test_data->central, test_data->base, hooks);
 	assert_false(err, "");
-
-	err = hpa_shard_init(&test_data->shard, &test_data->central,
+	sec_opts_t sec_opts;
+	sec_opts.nshards = 0;
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	err = hpa_shard_init(tsdn, &test_data->shard, &test_data->central,
 	    &test_data->emap, test_data->base, &test_data->shard_edata_cache,
-	    SHARD_IND, opts);
+	    SHARD_IND, opts, &sec_opts);
 	assert_false(err, "");
 
 	return (hpa_shard_t *)test_data;
@@ -309,83 +311,6 @@ TEST_BEGIN(test_stress) {
 }
 TEST_END
 
-static void
-expect_contiguous(edata_t **edatas, size_t nedatas) {
-	for (size_t i = 0; i < nedatas; i++) {
-		size_t expected = (size_t)edata_base_get(edatas[0]) + i * PAGE;
-		expect_zu_eq(expected, (size_t)edata_base_get(edatas[i]),
-		    "Mismatch at index %zu", i);
-	}
-}
-
-TEST_BEGIN(test_alloc_dalloc_batch) {
-	test_skip_if(!hpa_supported());
-
-	hpa_shard_t *shard = create_test_data(
-	    &hpa_hooks_default, &test_hpa_shard_opts_default);
-	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
-
-	bool deferred_work_generated = false;
-
-	enum { NALLOCS = 8 };
-
-	edata_t *allocs[NALLOCS];
-	/*
-	 * Allocate a mix of ways; first half from regular alloc, second half
-	 * from alloc_batch.
-	 */
-	for (size_t i = 0; i < NALLOCS / 2; i++) {
-		allocs[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false,
-		    /* frequent_reuse */ false, &deferred_work_generated);
-		expect_ptr_not_null(allocs[i], "Unexpected alloc failure");
-	}
-	edata_list_active_t allocs_list;
-	edata_list_active_init(&allocs_list);
-	size_t nsuccess = pai_alloc_batch(tsdn, &shard->pai, PAGE, NALLOCS / 2,
-	    &allocs_list, /* frequent_reuse */ false, &deferred_work_generated);
-	expect_zu_eq(NALLOCS / 2, nsuccess, "Unexpected oom");
-	for (size_t i = NALLOCS / 2; i < NALLOCS; i++) {
-		allocs[i] = edata_list_active_first(&allocs_list);
-		edata_list_active_remove(&allocs_list, allocs[i]);
-	}
-
-	/*
-	 * Should have allocated them contiguously, despite the differing
-	 * methods used.
-	 */
-	void *orig_base = edata_base_get(allocs[0]);
-	expect_contiguous(allocs, NALLOCS);
-
-	/*
-	 * Batch dalloc the first half, individually deallocate the second half.
-	 */
-	for (size_t i = 0; i < NALLOCS / 2; i++) {
-		edata_list_active_append(&allocs_list, allocs[i]);
-	}
-	pai_dalloc_batch(
-	    tsdn, &shard->pai, &allocs_list, &deferred_work_generated);
-	for (size_t i = NALLOCS / 2; i < NALLOCS; i++) {
-		pai_dalloc(
-		    tsdn, &shard->pai, allocs[i], &deferred_work_generated);
-	}
-
-	/* Reallocate (individually), and ensure reuse and contiguity. */
-	for (size_t i = 0; i < NALLOCS; i++) {
-		allocs[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
-		    false, &deferred_work_generated);
-		expect_ptr_not_null(allocs[i], "Unexpected alloc failure.");
-	}
-	void *new_base = edata_base_get(allocs[0]);
-	expect_ptr_eq(
-	    orig_base, new_base, "Failed to reuse the allocated memory.");
-	expect_contiguous(allocs, NALLOCS);
-
-	destroy_test_data(shard);
-}
-TEST_END
-
 static uintptr_t defer_bump_ptr = HUGEPAGE * 123;
 static void *
 defer_test_map(size_t size) {
@@ -1533,8 +1458,7 @@ main(void) {
 	(void)mem_tree_iter;
 	(void)mem_tree_reverse_iter;
 	(void)mem_tree_destroy;
-	return test_no_reentrancy(test_alloc_max, test_stress,
-	    test_alloc_dalloc_batch, test_defer_time,
+	return test_no_reentrancy(test_alloc_max, test_stress, test_defer_time,
 	    test_purge_no_infinite_loop, test_no_min_purge_interval,
 	    test_min_purge_interval, test_purge,
 	    test_experimental_max_purge_nhp, test_vectorized_opt_eq_zero,
diff --git a/test/unit/hpa_sec_integration.c b/test/unit/hpa_sec_integration.c
new file mode 100644
index 00000000..c54cdc0c
--- /dev/null
+++ b/test/unit/hpa_sec_integration.c
@@ -0,0 +1,239 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/hpa.h"
+#include "jemalloc/internal/nstime.h"
+
+#define SHARD_IND 111
+
+#define ALLOC_MAX (HUGEPAGE)
+
+typedef struct test_data_s test_data_t;
+struct test_data_s {
+	/*
+	 * Must be the first member -- we convert back and forth between the
+	 * test_data_t and the hpa_shard_t;
+	 */
+	hpa_shard_t   shard;
+	hpa_central_t central;
+	base_t       *base;
+	edata_cache_t shard_edata_cache;
+
+	emap_t emap;
+};
+
+static hpa_shard_opts_t test_hpa_shard_opts = {
+    /* slab_max_alloc */
+    HUGEPAGE,
+    /* hugification_threshold */
+    0.9 * HUGEPAGE,
+    /* dirty_mult */
+    FXP_INIT_PERCENT(10),
+    /* deferral_allowed */
+    true,
+    /* hugify_delay_ms */
+    0,
+    /* hugify_sync */
+    false,
+    /* min_purge_interval_ms */
+    5,
+    /* experimental_max_purge_nhp */
+    -1,
+    /* purge_threshold */
+    PAGE,
+    /* min_purge_delay_ms */
+    10,
+    /* hugify_style */
+    hpa_hugify_style_lazy};
+
+static hpa_shard_t *
+create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts,
+    const sec_opts_t *sec_opts) {
+	bool    err;
+	base_t *base = base_new(TSDN_NULL, /* ind */ SHARD_IND,
+	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
+	assert_ptr_not_null(base, "");
+
+	test_data_t *test_data = malloc(sizeof(test_data_t));
+	assert_ptr_not_null(test_data, "");
+
+	test_data->base = base;
+
+	err = edata_cache_init(&test_data->shard_edata_cache, base);
+	assert_false(err, "");
+
+	err = emap_init(&test_data->emap, test_data->base, /* zeroed */ false);
+	assert_false(err, "");
+
+	err = hpa_central_init(&test_data->central, test_data->base, hooks);
+	assert_false(err, "");
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	err = hpa_shard_init(tsdn, &test_data->shard, &test_data->central,
+	    &test_data->emap, test_data->base, &test_data->shard_edata_cache,
+	    SHARD_IND, opts, sec_opts);
+	assert_false(err, "");
+
+	return (hpa_shard_t *)test_data;
+}
+
+static void
+destroy_test_data(hpa_shard_t *shard) {
+	test_data_t *test_data = (test_data_t *)shard;
+	base_delete(TSDN_NULL, test_data->base);
+	free(test_data);
+}
+
+static uintptr_t defer_bump_ptr = HUGEPAGE * 123;
+static void *
+defer_test_map(size_t size) {
+	void *result = (void *)defer_bump_ptr;
+	defer_bump_ptr += size;
+	return result;
+}
+
+static void
+defer_test_unmap(void *ptr, size_t size) {
+	(void)ptr;
+	(void)size;
+}
+
+static size_t ndefer_purge_calls = 0;
+static size_t npurge_size = 0;
+static void
+defer_test_purge(void *ptr, size_t size) {
+	(void)ptr;
+	npurge_size = size;
+	++ndefer_purge_calls;
+}
+
+static bool defer_vectorized_purge_called = false;
+static bool
+defer_vectorized_purge(void *vec, size_t vlen, size_t nbytes) {
+	(void)vec;
+	(void)nbytes;
+	++ndefer_purge_calls;
+	defer_vectorized_purge_called = true;
+	return false;
+}
+
+static size_t ndefer_hugify_calls = 0;
+static bool
+defer_test_hugify(void *ptr, size_t size, bool sync) {
+	++ndefer_hugify_calls;
+	return false;
+}
+
+static size_t ndefer_dehugify_calls = 0;
+static void
+defer_test_dehugify(void *ptr, size_t size) {
+	++ndefer_dehugify_calls;
+}
+
+static nstime_t defer_curtime;
+static void
+defer_test_curtime(nstime_t *r_time, bool first_reading) {
+	*r_time = defer_curtime;
+}
+
+static uint64_t
+defer_test_ms_since(nstime_t *past_time) {
+	return (nstime_ns(&defer_curtime) - nstime_ns(past_time)) / 1000 / 1000;
+}
+
+// test that freed pages stay in SEC and hpa thinks they are active
+
+TEST_BEGIN(test_hpa_sec) {
+	test_skip_if(!hpa_supported());
+
+	hpa_hooks_t hooks;
+	hooks.map = &defer_test_map;
+	hooks.unmap = &defer_test_unmap;
+	hooks.purge = &defer_test_purge;
+	hooks.hugify = &defer_test_hugify;
+	hooks.dehugify = &defer_test_dehugify;
+	hooks.curtime = &defer_test_curtime;
+	hooks.ms_since = &defer_test_ms_since;
+	hooks.vectorized_purge = &defer_vectorized_purge;
+
+	hpa_shard_opts_t opts = test_hpa_shard_opts;
+
+	enum { NALLOCS = 8 };
+	sec_opts_t sec_opts;
+	sec_opts.nshards = 1;
+	sec_opts.max_alloc = 2 * PAGE;
+	sec_opts.max_bytes = NALLOCS * PAGE;
+	sec_opts.batch_fill_extra = 4;
+
+	hpa_shard_t *shard = create_test_data(&hooks, &opts, &sec_opts);
+	bool         deferred_work_generated = false;
+	tsdn_t      *tsdn = tsd_tsdn(tsd_fetch());
+
+	/* alloc 1 PAGE, confirm sec has fill_extra bytes. */
+	edata_t *edata1 = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false, false,
+	    false, &deferred_work_generated);
+	expect_ptr_not_null(edata1, "Unexpected null edata");
+	hpa_shard_stats_t hpa_stats;
+	memset(&hpa_stats, 0, sizeof(hpa_shard_stats_t));
+	hpa_shard_stats_merge(tsdn, shard, &hpa_stats);
+	expect_zu_eq(hpa_stats.psset_stats.merged.nactive,
+	    1 + sec_opts.batch_fill_extra, "");
+	expect_zu_eq(hpa_stats.secstats.bytes, PAGE * sec_opts.batch_fill_extra,
+	    "sec should have fill extra pages");
+
+	/* Alloc/dealloc NALLOCS times and confirm extents are in sec. */
+	edata_t *edatas[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edatas[i] = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false,
+		    false, false, &deferred_work_generated);
+		expect_ptr_not_null(edatas[i], "Unexpected null edata");
+	}
+	memset(&hpa_stats, 0, sizeof(hpa_shard_stats_t));
+	hpa_shard_stats_merge(tsdn, shard, &hpa_stats);
+	expect_zu_eq(hpa_stats.psset_stats.merged.nactive, 2 + NALLOCS, "");
+	expect_zu_eq(hpa_stats.secstats.bytes, PAGE, "2 refills (at 0 and 4)");
+
+	for (int i = 0; i < NALLOCS - 1; i++) {
+		pai_dalloc(
+		    tsdn, &shard->pai, edatas[i], &deferred_work_generated);
+	}
+	memset(&hpa_stats, 0, sizeof(hpa_shard_stats_t));
+	hpa_shard_stats_merge(tsdn, shard, &hpa_stats);
+	expect_zu_eq(hpa_stats.psset_stats.merged.nactive, (2 + NALLOCS), "");
+	expect_zu_eq(
+	    hpa_stats.secstats.bytes, sec_opts.max_bytes, "sec should be full");
+
+	/* this one should flush 1 + 0.25 * 8 = 3 extents */
+	pai_dalloc(
+	    tsdn, &shard->pai, edatas[NALLOCS - 1], &deferred_work_generated);
+	memset(&hpa_stats, 0, sizeof(hpa_shard_stats_t));
+	hpa_shard_stats_merge(tsdn, shard, &hpa_stats);
+	expect_zu_eq(hpa_stats.psset_stats.merged.nactive, (NALLOCS - 1), "");
+	expect_zu_eq(hpa_stats.psset_stats.merged.ndirty, 3, "");
+	expect_zu_eq(hpa_stats.secstats.bytes, 0.75 * sec_opts.max_bytes,
+	    "sec should be full");
+
+	/* Next allocation should come from SEC and not increase active */
+	edata_t *edata2 = pai_alloc(tsdn, &shard->pai, PAGE, PAGE, false, false,
+	    false, &deferred_work_generated);
+	expect_ptr_not_null(edata2, "Unexpected null edata");
+	memset(&hpa_stats, 0, sizeof(hpa_shard_stats_t));
+	hpa_shard_stats_merge(tsdn, shard, &hpa_stats);
+	expect_zu_eq(hpa_stats.psset_stats.merged.nactive, NALLOCS - 1, "");
+	expect_zu_eq(hpa_stats.secstats.bytes, 0.75 * sec_opts.max_bytes - PAGE,
+	    "sec should have max_bytes minus one page that just came from it");
+
+	/* We return this one and it stays in the cache */
+	pai_dalloc(tsdn, &shard->pai, edata2, &deferred_work_generated);
+	memset(&hpa_stats, 0, sizeof(hpa_shard_stats_t));
+	hpa_shard_stats_merge(tsdn, shard, &hpa_stats);
+	expect_zu_eq(hpa_stats.psset_stats.merged.nactive, NALLOCS - 1, "");
+	expect_zu_eq(hpa_stats.psset_stats.merged.ndirty, 3, "");
+	expect_zu_eq(hpa_stats.secstats.bytes, 0.75 * sec_opts.max_bytes, "");
+
+	destroy_test_data(shard);
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(test_hpa_sec);
+}
diff --git a/test/unit/hpa_sec_integration.sh b/test/unit/hpa_sec_integration.sh
new file mode 100644
index 00000000..22451f1d
--- /dev/null
+++ b/test/unit/hpa_sec_integration.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+export MALLOC_CONF="process_madvise_max_batch:0,experimental_hpa_start_huge_if_thp_always:false"
diff --git a/test/unit/hpa_thp_always.c b/test/unit/hpa_thp_always.c
index 29c86cdd..6e56e663 100644
--- a/test/unit/hpa_thp_always.c
+++ b/test/unit/hpa_thp_always.c
@@ -65,10 +65,12 @@ create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
 
 	err = hpa_central_init(&test_data->central, test_data->base, hooks);
 	assert_false(err, "");
-
-	err = hpa_shard_init(&test_data->shard, &test_data->central,
+	sec_opts_t sec_opts;
+	sec_opts.nshards = 0;
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	err = hpa_shard_init(tsdn, &test_data->shard, &test_data->central,
 	    &test_data->emap, test_data->base, &test_data->shard_edata_cache,
-	    SHARD_IND, opts);
+	    SHARD_IND, opts, &sec_opts);
 	assert_false(err, "");
 
 	return (hpa_shard_t *)test_data;
diff --git a/test/unit/hpa_vectorized_madvise.c b/test/unit/hpa_vectorized_madvise.c
index e82f0ffb..2121de49 100644
--- a/test/unit/hpa_vectorized_madvise.c
+++ b/test/unit/hpa_vectorized_madvise.c
@@ -66,9 +66,12 @@ create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
 	err = hpa_central_init(&test_data->central, test_data->base, hooks);
 	assert_false(err, "");
 
-	err = hpa_shard_init(&test_data->shard, &test_data->central,
+	sec_opts_t sec_opts;
+	sec_opts.nshards = 0;
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	err = hpa_shard_init(tsdn, &test_data->shard, &test_data->central,
 	    &test_data->emap, test_data->base, &test_data->shard_edata_cache,
-	    SHARD_IND, opts);
+	    SHARD_IND, opts, &sec_opts);
 	assert_false(err, "");
 
 	return (hpa_shard_t *)test_data;
diff --git a/test/unit/hpa_vectorized_madvise_large_batch.c b/test/unit/hpa_vectorized_madvise_large_batch.c
index d542f72a..e92988de 100644
--- a/test/unit/hpa_vectorized_madvise_large_batch.c
+++ b/test/unit/hpa_vectorized_madvise_large_batch.c
@@ -66,10 +66,12 @@ create_test_data(const hpa_hooks_t *hooks, hpa_shard_opts_t *opts) {
 
 	err = hpa_central_init(&test_data->central, test_data->base, hooks);
 	assert_false(err, "");
-
-	err = hpa_shard_init(&test_data->shard, &test_data->central,
+	sec_opts_t sec_opts;
+	sec_opts.nshards = 0;
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	err = hpa_shard_init(tsdn, &test_data->shard, &test_data->central,
 	    &test_data->emap, test_data->base, &test_data->shard_edata_cache,
-	    SHARD_IND, opts);
+	    SHARD_IND, opts, &sec_opts);
 	assert_false(err, "");
 
 	return (hpa_shard_t *)test_data;
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index f409f687..4c11e485 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -313,7 +313,6 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(size_t, hpa_sec_nshards, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_max_alloc, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_max_bytes, always);
-	TEST_MALLCTL_OPT(size_t, hpa_sec_bytes_after_flush, always);
 	TEST_MALLCTL_OPT(size_t, hpa_sec_batch_fill_extra, always);
 	TEST_MALLCTL_OPT(ssize_t, experimental_hpa_max_purge_nhp, always);
 	TEST_MALLCTL_OPT(size_t, hpa_purge_threshold, always);
diff --git a/test/unit/sec.c b/test/unit/sec.c
index d57c66ec..2a6a00ce 100644
--- a/test/unit/sec.c
+++ b/test/unit/sec.c
@@ -2,618 +2,493 @@
 
 #include "jemalloc/internal/sec.h"
 
-typedef struct pai_test_allocator_s pai_test_allocator_t;
-struct pai_test_allocator_s {
-	pai_t  pai;
-	bool   alloc_fail;
-	size_t alloc_count;
-	size_t alloc_batch_count;
-	size_t dalloc_count;
-	size_t dalloc_batch_count;
+typedef struct test_data_s test_data_t;
+struct test_data_s {
 	/*
-	 * We use a simple bump allocator as the implementation.  This isn't
-	 * *really* correct, since we may allow expansion into a subsequent
-	 * allocation, but it's not like the SEC is really examining the
-	 * pointers it gets back; this is mostly just helpful for debugging.
+	 * Must be the first member -- we convert back and forth between the
+	 * test_data_t and the sec_t;
 	 */
-	uintptr_t next_ptr;
-	size_t    expand_count;
-	bool      expand_return_value;
-	size_t    shrink_count;
-	bool      shrink_return_value;
+	sec_t   sec;
+	base_t *base;
 };
 
 static void
-test_sec_init(sec_t *sec, pai_t *fallback, size_t nshards, size_t max_alloc,
-    size_t max_bytes) {
-	sec_opts_t opts;
-	opts.nshards = 1;
-	opts.max_alloc = max_alloc;
-	opts.max_bytes = max_bytes;
-	/*
-	 * Just choose reasonable defaults for these; most tests don't care so
-	 * long as they're something reasonable.
-	 */
-	opts.bytes_after_flush = max_bytes / 2;
-	opts.batch_fill_extra = 4;
-
-	/*
-	 * We end up leaking this base, but that's fine; this test is
-	 * short-running, and SECs are arena-scoped in reality.
-	 */
-	base_t *base = base_new(TSDN_NULL, /* ind */ 123,
+test_data_init(tsdn_t *tsdn, test_data_t *tdata, const sec_opts_t *opts) {
+	tdata->base = base_new(TSDN_NULL, /* ind */ 123,
 	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
 
-	bool err = sec_init(TSDN_NULL, sec, base, fallback, &opts);
+	bool err = sec_init(tsdn, &tdata->sec, tdata->base, opts);
 	assert_false(err, "Unexpected initialization failure");
-	assert_u_ge(sec->npsizes, 0, "Zero size classes allowed for caching");
-}
-
-static inline edata_t *
-pai_test_allocator_alloc(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t alignment, bool zero, bool guarded, bool frequent_reuse,
-    bool *deferred_work_generated) {
-	assert(!guarded);
-	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
-	if (ta->alloc_fail) {
-		return NULL;
+	if (tdata->sec.opts.nshards > 0) {
+		assert_u_ge(tdata->sec.npsizes, 0,
+		    "Zero size classes allowed for caching");
 	}
-	edata_t *edata = malloc(sizeof(edata_t));
-	assert_ptr_not_null(edata, "");
-	ta->next_ptr += alignment - 1;
-	edata_init(edata, /* arena_ind */ 0,
-	    (void *)(ta->next_ptr & ~(alignment - 1)), size,
-	    /* slab */ false,
-	    /* szind */ 0, /* sn */ 1, extent_state_active, /* zero */ zero,
-	    /* comitted */ true, /* ranged */ false, EXTENT_NOT_HEAD);
-	ta->next_ptr += size;
-	ta->alloc_count++;
-	return edata;
-}
-
-static inline size_t
-pai_test_allocator_alloc_batch(tsdn_t *tsdn, pai_t *self, size_t size,
-    size_t nallocs, edata_list_active_t *results, bool frequent_reuse,
-    bool *deferred_work_generated) {
-	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
-	if (ta->alloc_fail) {
-		return 0;
-	}
-	for (size_t i = 0; i < nallocs; i++) {
-		edata_t *edata = malloc(sizeof(edata_t));
-		assert_ptr_not_null(edata, "");
-		edata_init(edata, /* arena_ind */ 0, (void *)ta->next_ptr, size,
-		    /* slab */ false, /* szind */ 0, /* sn */ 1,
-		    extent_state_active, /* zero */ false, /* comitted */ true,
-		    /* ranged */ false, EXTENT_NOT_HEAD);
-		ta->next_ptr += size;
-		ta->alloc_batch_count++;
-		edata_list_active_append(results, edata);
-	}
-	return nallocs;
-}
-
-static bool
-pai_test_allocator_expand(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool zero,
-    bool *deferred_work_generated) {
-	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
-	ta->expand_count++;
-	return ta->expand_return_value;
-}
-
-static bool
-pai_test_allocator_shrink(tsdn_t *tsdn, pai_t *self, edata_t *edata,
-    size_t old_size, size_t new_size, bool *deferred_work_generated) {
-	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
-	ta->shrink_count++;
-	return ta->shrink_return_value;
 }
 
 static void
-pai_test_allocator_dalloc(
-    tsdn_t *tsdn, pai_t *self, edata_t *edata, bool *deferred_work_generated) {
-	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
-	ta->dalloc_count++;
-	free(edata);
+destroy_test_data(tsdn_t *tsdn, test_data_t *tdata) {
+	/* There is no destroy sec to delete the bins ?! */
+	base_delete(tsdn, tdata->base);
 }
 
-static void
-pai_test_allocator_dalloc_batch(tsdn_t *tsdn, pai_t *self,
-    edata_list_active_t *list, bool *deferred_work_generated) {
-	pai_test_allocator_t *ta = (pai_test_allocator_t *)self;
-
-	edata_t *edata;
-	while ((edata = edata_list_active_first(list)) != NULL) {
-		edata_list_active_remove(list, edata);
-		ta->dalloc_batch_count++;
-		free(edata);
-	}
-}
-
-static inline void
-pai_test_allocator_init(pai_test_allocator_t *ta) {
-	ta->alloc_fail = false;
-	ta->alloc_count = 0;
-	ta->alloc_batch_count = 0;
-	ta->dalloc_count = 0;
-	ta->dalloc_batch_count = 0;
-	/* Just don't start the edata at 0. */
-	ta->next_ptr = 10 * PAGE;
-	ta->expand_count = 0;
-	ta->expand_return_value = false;
-	ta->shrink_count = 0;
-	ta->shrink_return_value = false;
-	ta->pai.alloc = &pai_test_allocator_alloc;
-	ta->pai.alloc_batch = &pai_test_allocator_alloc_batch;
-	ta->pai.expand = &pai_test_allocator_expand;
-	ta->pai.shrink = &pai_test_allocator_shrink;
-	ta->pai.dalloc = &pai_test_allocator_dalloc;
-	ta->pai.dalloc_batch = &pai_test_allocator_dalloc_batch;
-}
-
-TEST_BEGIN(test_reuse) {
-	pai_test_allocator_t ta;
-	pai_test_allocator_init(&ta);
-	sec_t sec;
-	/*
-	 * We can't use the "real" tsd, since we malloc within the test
-	 * allocator hooks; we'd get lock inversion crashes.  Eventually, we
-	 * should have a way to mock tsds, but for now just don't do any
-	 * lock-order checking.
-	 */
-	tsdn_t *tsdn = TSDN_NULL;
-	/*
-	 * 11 allocs apiece of 1-PAGE and 2-PAGE objects means that we should be
-	 * able to get to 33 pages in the cache before triggering a flush.  We
-	 * set the flush liimt to twice this amount, to avoid accidentally
-	 * triggering a flush caused by the batch-allocation down the cache fill
-	 * pathway disrupting ordering.
-	 */
-	enum { NALLOCS = 11 };
-	edata_t *one_page[NALLOCS];
-	edata_t *two_page[NALLOCS];
-	bool     deferred_work_generated = false;
-	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ 2 * PAGE,
-	    /* max_bytes */ 2 * (NALLOCS * PAGE + NALLOCS * 2 * PAGE));
-	for (int i = 0; i < NALLOCS; i++) {
-		one_page[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
-		    false, &deferred_work_generated);
-		expect_ptr_not_null(one_page[i], "Unexpected alloc failure");
-		two_page[i] = pai_alloc(tsdn, &sec.pai, 2 * PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
-		    false, &deferred_work_generated);
-		expect_ptr_not_null(one_page[i], "Unexpected alloc failure");
-	}
-	expect_zu_eq(0, ta.alloc_count, "Should be using batch allocs");
-	size_t max_allocs = ta.alloc_count + ta.alloc_batch_count;
-	expect_zu_le(
-	    2 * NALLOCS, max_allocs, "Incorrect number of allocations");
-	expect_zu_eq(0, ta.dalloc_count, "Incorrect number of allocations");
-	/*
-	 * Free in a different order than we allocated, to make sure free-list
-	 * separation works correctly.
-	 */
-	for (int i = NALLOCS - 1; i >= 0; i--) {
-		pai_dalloc(
-		    tsdn, &sec.pai, one_page[i], &deferred_work_generated);
-	}
-	for (int i = NALLOCS - 1; i >= 0; i--) {
-		pai_dalloc(
-		    tsdn, &sec.pai, two_page[i], &deferred_work_generated);
-	}
-	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
-	    "Incorrect number of allocations");
-	expect_zu_eq(0, ta.dalloc_count, "Incorrect number of allocations");
-	/*
-	 * Check that the n'th most recent deallocated extent is returned for
-	 * the n'th alloc request of a given size.
-	 */
-	for (int i = 0; i < NALLOCS; i++) {
-		edata_t *alloc1 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
-		    false, &deferred_work_generated);
-		edata_t *alloc2 = pai_alloc(tsdn, &sec.pai, 2 * PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
-		    false, &deferred_work_generated);
-		expect_ptr_eq(one_page[i], alloc1, "Got unexpected allocation");
-		expect_ptr_eq(two_page[i], alloc2, "Got unexpected allocation");
-	}
-	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
-	    "Incorrect number of allocations");
-	expect_zu_eq(0, ta.dalloc_count, "Incorrect number of allocations");
-}
-TEST_END
-
-TEST_BEGIN(test_auto_flush) {
-	pai_test_allocator_t ta;
-	pai_test_allocator_init(&ta);
-	sec_t sec;
-	/* See the note above -- we can't use the real tsd. */
-	tsdn_t *tsdn = TSDN_NULL;
-	/*
-	 * 10-allocs apiece of 1-PAGE and 2-PAGE objects means that we should be
-	 * able to get to 30 pages in the cache before triggering a flush.  The
-	 * choice of NALLOCS here is chosen to match the batch allocation
-	 * default (4 extra + 1 == 5; so 10 allocations leaves the cache exactly
-	 * empty, even in the presence of batch allocation on fill).
-	 * Eventually, once our allocation batching strategies become smarter,
-	 * this should change.
-	 */
-	enum { NALLOCS = 10 };
-	edata_t *extra_alloc;
-	edata_t *allocs[NALLOCS];
-	bool     deferred_work_generated = false;
-	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
-	    /* max_bytes */ NALLOCS * PAGE);
-	for (int i = 0; i < NALLOCS; i++) {
-		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
-		    false, &deferred_work_generated);
-		expect_ptr_not_null(allocs[i], "Unexpected alloc failure");
-	}
-	extra_alloc = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false,
-	    /* guarded */ false, /* frequent_reuse */ false,
-	    &deferred_work_generated);
-	expect_ptr_not_null(extra_alloc, "Unexpected alloc failure");
-	size_t max_allocs = ta.alloc_count + ta.alloc_batch_count;
-	expect_zu_le(
-	    NALLOCS + 1, max_allocs, "Incorrect number of allocations");
-	expect_zu_eq(0, ta.dalloc_count, "Incorrect number of allocations");
-	/* Free until the SEC is full, but should not have flushed yet. */
-	for (int i = 0; i < NALLOCS; i++) {
-		pai_dalloc(tsdn, &sec.pai, allocs[i], &deferred_work_generated);
-	}
-	expect_zu_le(
-	    NALLOCS + 1, max_allocs, "Incorrect number of allocations");
-	expect_zu_eq(0, ta.dalloc_count, "Incorrect number of allocations");
-	/*
-	 * Free the extra allocation; this should trigger a flush.  The internal
-	 * flushing logic is allowed to get complicated; for now, we rely on our
-	 * whitebox knowledge of the fact that the SEC flushes bins in their
-	 * entirety when it decides to do so, and it has only one bin active
-	 * right now.
-	 */
-	pai_dalloc(tsdn, &sec.pai, extra_alloc, &deferred_work_generated);
-	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
-	    "Incorrect number of allocations");
-	expect_zu_eq(0, ta.dalloc_count,
-	    "Incorrect number of (non-batch) deallocations");
-	expect_zu_eq(NALLOCS + 1, ta.dalloc_batch_count,
-	    "Incorrect number of batch deallocations");
-}
-TEST_END
-
-/*
- * A disable and a flush are *almost* equivalent; the only difference is what
- * happens afterwards; disabling disallows all future caching as well.
- */
-static void
-do_disable_flush_test(bool is_disable) {
-	pai_test_allocator_t ta;
-	pai_test_allocator_init(&ta);
-	sec_t sec;
-	/* See the note above -- we can't use the real tsd. */
-	tsdn_t *tsdn = TSDN_NULL;
-
-	enum { NALLOCS = 11 };
-	edata_t *allocs[NALLOCS];
-	bool     deferred_work_generated = false;
-	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
-	    /* max_bytes */ NALLOCS * PAGE);
-	for (int i = 0; i < NALLOCS; i++) {
-		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
-		    false, &deferred_work_generated);
-		expect_ptr_not_null(allocs[i], "Unexpected alloc failure");
-	}
-	/* Free all but the last aloc. */
-	for (int i = 0; i < NALLOCS - 1; i++) {
-		pai_dalloc(tsdn, &sec.pai, allocs[i], &deferred_work_generated);
-	}
-	size_t max_allocs = ta.alloc_count + ta.alloc_batch_count;
-
-	expect_zu_le(NALLOCS, max_allocs, "Incorrect number of allocations");
-	expect_zu_eq(0, ta.dalloc_count, "Incorrect number of allocations");
-
-	if (is_disable) {
-		sec_disable(tsdn, &sec);
-	} else {
-		sec_flush(tsdn, &sec);
-	}
-
-	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
-	    "Incorrect number of allocations");
-	expect_zu_eq(0, ta.dalloc_count,
-	    "Incorrect number of (non-batch) deallocations");
-	expect_zu_le(NALLOCS - 1, ta.dalloc_batch_count,
-	    "Incorrect number of batch deallocations");
-	size_t old_dalloc_batch_count = ta.dalloc_batch_count;
-
-	/*
-	 * If we free into a disabled SEC, it should forward to the fallback.
-	 * Otherwise, the SEC should accept the allocation.
-	 */
-	pai_dalloc(
-	    tsdn, &sec.pai, allocs[NALLOCS - 1], &deferred_work_generated);
-
-	expect_zu_eq(max_allocs, ta.alloc_count + ta.alloc_batch_count,
-	    "Incorrect number of allocations");
-	expect_zu_eq(is_disable ? 1 : 0, ta.dalloc_count,
-	    "Incorrect number of (non-batch) deallocations");
-	expect_zu_eq(old_dalloc_batch_count, ta.dalloc_batch_count,
-	    "Incorrect number of batch deallocations");
-}
-
-TEST_BEGIN(test_disable) {
-	do_disable_flush_test(/* is_disable */ true);
-}
-TEST_END
-
-TEST_BEGIN(test_flush) {
-	do_disable_flush_test(/* is_disable */ false);
-}
-TEST_END
-
-TEST_BEGIN(test_max_alloc_respected) {
-	pai_test_allocator_t ta;
-	pai_test_allocator_init(&ta);
-	sec_t sec;
-	/* See the note above -- we can't use the real tsd. */
-	tsdn_t *tsdn = TSDN_NULL;
-
-	size_t max_alloc = 2 * PAGE;
-	size_t attempted_alloc = 3 * PAGE;
-
-	bool deferred_work_generated = false;
-
-	test_sec_init(&sec, &ta.pai, /* nshards */ 1, max_alloc,
-	    /* max_bytes */ 1000 * PAGE);
-
-	for (size_t i = 0; i < 100; i++) {
-		expect_zu_eq(
-		    i, ta.alloc_count, "Incorrect number of allocations");
-		expect_zu_eq(
-		    i, ta.dalloc_count, "Incorrect number of deallocations");
-		edata_t *edata = pai_alloc(tsdn, &sec.pai, attempted_alloc,
-		    PAGE, /* zero */ false, /* guarded */ false,
-		    /* frequent_reuse */ false, &deferred_work_generated);
-		expect_ptr_not_null(edata, "Unexpected alloc failure");
-		expect_zu_eq(
-		    i + 1, ta.alloc_count, "Incorrect number of allocations");
-		expect_zu_eq(
-		    i, ta.dalloc_count, "Incorrect number of deallocations");
-		pai_dalloc(tsdn, &sec.pai, edata, &deferred_work_generated);
-	}
-}
-TEST_END
-
-TEST_BEGIN(test_expand_shrink_delegate) {
-	/*
-	 * Expand and shrink shouldn't affect sec state; they should just
-	 * delegate to the fallback PAI.
-	 */
-	pai_test_allocator_t ta;
-	pai_test_allocator_init(&ta);
-	sec_t sec;
-	/* See the note above -- we can't use the real tsd. */
-	tsdn_t *tsdn = TSDN_NULL;
-
-	bool deferred_work_generated = false;
-
-	test_sec_init(&sec, &ta.pai, /* nshards */ 1,
-	    /* max_alloc */ USIZE_GROW_SLOW_THRESHOLD,
-	    /* max_bytes */ 1000 * PAGE);
-	edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-	    /* zero */ false, /* guarded */ false, /* frequent_reuse */ false,
-	    &deferred_work_generated);
-	expect_ptr_not_null(edata, "Unexpected alloc failure");
-
-	bool err = pai_expand(tsdn, &sec.pai, edata, PAGE, 4 * PAGE,
-	    /* zero */ false, &deferred_work_generated);
-	expect_false(err, "Unexpected expand failure");
-	expect_zu_eq(1, ta.expand_count, "");
-	ta.expand_return_value = true;
-	err = pai_expand(tsdn, &sec.pai, edata, 4 * PAGE, 3 * PAGE,
-	    /* zero */ false, &deferred_work_generated);
-	expect_true(err, "Unexpected expand success");
-	expect_zu_eq(2, ta.expand_count, "");
-
-	err = pai_shrink(tsdn, &sec.pai, edata, 4 * PAGE, 2 * PAGE,
-	    &deferred_work_generated);
-	expect_false(err, "Unexpected shrink failure");
-	expect_zu_eq(1, ta.shrink_count, "");
-	ta.shrink_return_value = true;
-	err = pai_shrink(
-	    tsdn, &sec.pai, edata, 2 * PAGE, PAGE, &deferred_work_generated);
-	expect_true(err, "Unexpected shrink success");
-	expect_zu_eq(2, ta.shrink_count, "");
-}
-TEST_END
-
-TEST_BEGIN(test_nshards_0) {
-	pai_test_allocator_t ta;
-	pai_test_allocator_init(&ta);
-	sec_t sec;
-	/* See the note above -- we can't use the real tsd. */
-	tsdn_t *tsdn = TSDN_NULL;
-	base_t *base = base_new(TSDN_NULL, /* ind */ 123,
-	    &ehooks_default_extent_hooks, /* metadata_use_hooks */ true);
-
-	sec_opts_t opts = SEC_OPTS_DEFAULT;
+TEST_BEGIN(test_max_nshards_option_zero) {
+	test_data_t tdata;
+	sec_opts_t  opts;
 	opts.nshards = 0;
-	sec_init(TSDN_NULL, &sec, base, &ta.pai, &opts);
+	opts.max_alloc = PAGE;
+	opts.max_bytes = 512 * PAGE;
 
-	bool     deferred_work_generated = false;
-	edata_t *edata = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-	    /* zero */ false, /* guarded */ false, /* frequent_reuse */ false,
-	    &deferred_work_generated);
-	pai_dalloc(tsdn, &sec.pai, edata, &deferred_work_generated);
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	test_data_init(tsdn, &tdata, &opts);
 
-	/* Both operations should have gone directly to the fallback. */
-	expect_zu_eq(1, ta.alloc_count, "");
-	expect_zu_eq(1, ta.dalloc_count, "");
+	edata_t *edata = sec_alloc(tsdn, &tdata.sec, PAGE);
+	expect_ptr_null(edata, "SEC should be disabled when nshards==0");
+	destroy_test_data(tsdn, &tdata);
 }
 TEST_END
 
+TEST_BEGIN(test_max_alloc_option_too_small) {
+	test_data_t tdata;
+	sec_opts_t  opts;
+	opts.nshards = 1;
+	opts.max_alloc = 2 * PAGE;
+	opts.max_bytes = 512 * PAGE;
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	test_data_init(tsdn, &tdata, &opts);
+
+	edata_t *edata = sec_alloc(tsdn, &tdata.sec, 3 * PAGE);
+	expect_ptr_null(edata, "max_alloc is 2*PAGE, should not alloc 3*PAGE");
+	destroy_test_data(tsdn, &tdata);
+}
+TEST_END
+
+TEST_BEGIN(test_sec_fill) {
+	test_data_t tdata;
+	sec_opts_t  opts;
+	opts.nshards = 1;
+	opts.max_alloc = 2 * PAGE;
+	opts.max_bytes = 4 * PAGE;
+	opts.batch_fill_extra = 2;
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	test_data_init(tsdn, &tdata, &opts);
+
+	/* Fill the cache with two extents */
+	sec_stats_t         stats = {0};
+	edata_list_active_t allocs;
+	edata_list_active_init(&allocs);
+	edata_t edata1, edata2;
+	edata_size_set(&edata1, PAGE);
+	edata_size_set(&edata2, PAGE);
+	edata_list_active_append(&allocs, &edata1);
+	edata_list_active_append(&allocs, &edata2);
+	sec_fill(tsdn, &tdata.sec, PAGE, &allocs, 2);
+	sec_stats_merge(tsdn, &tdata.sec, &stats);
+	expect_zu_eq(stats.bytes, 2 * PAGE, "SEC should have what we filled");
+	expect_true(edata_list_active_empty(&allocs),
+	    "extents should be consumed by sec");
+
+	/* Try to overfill and confirm that max_bytes is respected. */
+	stats.bytes = 0;
+	edata_t edata5, edata4, edata3;
+	edata_size_set(&edata3, PAGE);
+	edata_size_set(&edata4, PAGE);
+	edata_size_set(&edata5, PAGE);
+	edata_list_active_append(&allocs, &edata3);
+	edata_list_active_append(&allocs, &edata4);
+	edata_list_active_append(&allocs, &edata5);
+	sec_fill(tsdn, &tdata.sec, PAGE, &allocs, 3);
+	sec_stats_merge(tsdn, &tdata.sec, &stats);
+	expect_zu_eq(
+	    stats.bytes, opts.max_bytes, "SEC can't have more than max_bytes");
+	expect_false(edata_list_active_empty(&allocs), "Not all should fit");
+	expect_zu_eq(stats.total.noverfills, 1, "Expected one overfill");
+	destroy_test_data(tsdn, &tdata);
+}
+TEST_END
+
+TEST_BEGIN(test_sec_alloc) {
+	test_data_t tdata;
+	sec_opts_t  opts;
+	opts.nshards = 1;
+	opts.max_alloc = 2 * PAGE;
+	opts.max_bytes = 4 * PAGE;
+	opts.batch_fill_extra = 1;
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	test_data_init(tsdn, &tdata, &opts);
+
+	/* Alloc from empty cache returns NULL */
+	edata_t *edata = sec_alloc(tsdn, &tdata.sec, PAGE);
+	expect_ptr_null(edata, "SEC is empty");
+
+	/* Place two extents into the sec */
+	edata_list_active_t allocs;
+	edata_list_active_init(&allocs);
+	edata_t edata1, edata2;
+	edata_size_set(&edata1, PAGE);
+	edata_list_active_append(&allocs, &edata1);
+	sec_dalloc(tsdn, &tdata.sec, &allocs);
+	expect_true(edata_list_active_empty(&allocs), "");
+	edata_size_set(&edata2, PAGE);
+	edata_list_active_append(&allocs, &edata2);
+	sec_dalloc(tsdn, &tdata.sec, &allocs);
+	expect_true(edata_list_active_empty(&allocs), "");
+
+	sec_stats_t stats = {0};
+	sec_stats_merge(tsdn, &tdata.sec, &stats);
+	expect_zu_eq(stats.bytes, 2 * PAGE,
+	    "After fill bytes should reflect what is in the cache");
+	stats.bytes = 0;
+
+	/* Most recently cached extent should be used on alloc */
+	edata = sec_alloc(tsdn, &tdata.sec, PAGE);
+	expect_ptr_eq(edata, &edata2, "edata2 is most recently used");
+	sec_stats_merge(tsdn, &tdata.sec, &stats);
+	expect_zu_eq(stats.bytes, PAGE, "One more item left in the cache");
+	stats.bytes = 0;
+
+	/* Alloc can still get extents from cache */
+	edata = sec_alloc(tsdn, &tdata.sec, PAGE);
+	expect_ptr_eq(edata, &edata1, "SEC is not empty");
+	sec_stats_merge(tsdn, &tdata.sec, &stats);
+	expect_zu_eq(stats.bytes, 0, "No more items after last one is popped");
+
+	/* And cache is empty again */
+	edata = sec_alloc(tsdn, &tdata.sec, PAGE);
+	expect_ptr_null(edata, "SEC is empty");
+	destroy_test_data(tsdn, &tdata);
+}
+TEST_END
+
+TEST_BEGIN(test_sec_dalloc) {
+	test_data_t tdata;
+	sec_opts_t  opts;
+	opts.nshards = 1;
+	opts.max_alloc = PAGE;
+	opts.max_bytes = 2 * PAGE;
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	test_data_init(tsdn, &tdata, &opts);
+
+	/* Return one extent into the cache */
+	edata_list_active_t allocs;
+	edata_list_active_init(&allocs);
+	edata_t edata1;
+	edata_size_set(&edata1, PAGE);
+	edata_list_active_append(&allocs, &edata1);
+
+	/* SEC is empty, we return one pointer to it */
+	sec_dalloc(tsdn, &tdata.sec, &allocs);
+	expect_true(
+	    edata_list_active_empty(&allocs), "extents should be consumed");
+
+	/* Return one more extent, so that we are at the limit */
+	edata_t edata2;
+	edata_size_set(&edata2, PAGE);
+	edata_list_active_append(&allocs, &edata2);
+	/* Sec can take one more as well and we will be exactly at max_bytes */
+	sec_dalloc(tsdn, &tdata.sec, &allocs);
+	expect_true(
+	    edata_list_active_empty(&allocs), "extents should be consumed");
+
+	sec_stats_t stats = {0};
+	sec_stats_merge(tsdn, &tdata.sec, &stats);
+	expect_zu_eq(stats.bytes, opts.max_bytes, "Size should match deallocs");
+	stats.bytes = 0;
+
+	/*
+	 * We are at max_bytes.  Now, we dalloc one more pointer and we go above
+	 * the limit.  This will force flush to 3/4 of max_bytes and given that
+	 * we have max of 2 pages, we will have to flush two. We will not flush
+	 * the one given in the input as it is the most recently used.
+	 */
+	edata_t edata3;
+	edata_size_set(&edata3, PAGE);
+	edata_list_active_append(&allocs, &edata3);
+	sec_dalloc(tsdn, &tdata.sec, &allocs);
+	expect_false(
+	    edata_list_active_empty(&allocs), "extents should NOT be consumed");
+	expect_ptr_ne(
+	    edata_list_active_first(&allocs), &edata3, "edata3 is MRU");
+	expect_ptr_ne(
+	    edata_list_active_last(&allocs), &edata3, "edata3 is MRU");
+	sec_stats_merge(tsdn, &tdata.sec, &stats);
+	expect_zu_eq(PAGE, stats.bytes, "Should have flushed");
+	destroy_test_data(tsdn, &tdata);
+}
+TEST_END
+
+TEST_BEGIN(test_max_bytes_too_low) {
+	test_data_t tdata;
+	sec_opts_t  opts;
+	opts.nshards = 1;
+	opts.max_alloc = 4 * PAGE;
+	opts.max_bytes = 2 * PAGE;
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	test_data_init(tsdn, &tdata, &opts);
+
+	/* Return one extent into the cache. Item is too big */
+	edata_list_active_t allocs;
+	edata_list_active_init(&allocs);
+	edata_t edata1;
+	edata_size_set(&edata1, 3 * PAGE);
+	edata_list_active_append(&allocs, &edata1);
+
+	/* SEC is empty, we return one pointer to it */
+	sec_dalloc(tsdn, &tdata.sec, &allocs);
+	expect_false(
+	    edata_list_active_empty(&allocs), "extents should not be consumed");
+	destroy_test_data(tsdn, &tdata);
+}
+TEST_END
+
+TEST_BEGIN(test_sec_flush) {
+	test_data_t tdata;
+	sec_opts_t  opts;
+	opts.nshards = 1;
+	opts.max_alloc = 4 * PAGE;
+	opts.max_bytes = 1024 * PAGE;
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	test_data_init(tsdn, &tdata, &opts);
+
+	/* We put in 10 one-page extents, and 10 four-page extents */
+	edata_list_active_t allocs1;
+	edata_list_active_t allocs4;
+	edata_list_active_init(&allocs1);
+	edata_list_active_init(&allocs4);
+	enum { NALLOCS = 10 };
+	edata_t edata1[NALLOCS];
+	edata_t edata4[NALLOCS];
+	for (int i = 0; i < NALLOCS; i++) {
+		edata_size_set(&edata1[i], PAGE);
+		edata_size_set(&edata4[i], 4 * PAGE);
+
+		edata_list_active_append(&allocs1, &edata1[i]);
+		sec_dalloc(tsdn, &tdata.sec, &allocs1);
+		edata_list_active_append(&allocs4, &edata4[i]);
+		sec_dalloc(tsdn, &tdata.sec, &allocs4);
+	}
+
+	sec_stats_t stats = {0};
+	sec_stats_merge(tsdn, &tdata.sec, &stats);
+	expect_zu_eq(
+	    stats.bytes, 10 * 5 * PAGE, "SEC should have what we filled");
+	stats.bytes = 0;
+
+	expect_true(edata_list_active_empty(&allocs1), "");
+	sec_flush(tsdn, &tdata.sec, &allocs1);
+	expect_false(edata_list_active_empty(&allocs1), "");
+
+	sec_stats_merge(tsdn, &tdata.sec, &stats);
+	expect_zu_eq(stats.bytes, 0, "SEC should be empty");
+	stats.bytes = 0;
+	destroy_test_data(tsdn, &tdata);
+}
+TEST_END
+
+TEST_BEGIN(test_sec_stats) {
+	test_data_t tdata;
+	sec_opts_t  opts;
+	opts.nshards = 1;
+	opts.max_alloc = PAGE;
+	opts.max_bytes = 2 * PAGE;
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	test_data_init(tsdn, &tdata, &opts);
+
+	edata_list_active_t allocs;
+	edata_list_active_init(&allocs);
+	edata_t edata1;
+	edata_size_set(&edata1, PAGE);
+	edata_list_active_append(&allocs, &edata1);
+
+	/* SEC is empty alloc fails. nmisses==1 */
+	edata_t *edata = sec_alloc(tsdn, &tdata.sec, PAGE);
+	expect_ptr_null(edata, "SEC should be empty");
+
+	/* SEC is empty, we return one pointer to it. ndalloc_noflush=1 */
+	sec_dalloc(tsdn, &tdata.sec, &allocs);
+	expect_true(
+	    edata_list_active_empty(&allocs), "extents should be consumed");
+
+	edata_t edata2;
+	edata_size_set(&edata2, PAGE);
+	edata_list_active_append(&allocs, &edata2);
+	/* Sec can take one more, so ndalloc_noflush=2 */
+	sec_dalloc(tsdn, &tdata.sec, &allocs);
+	expect_true(
+	    edata_list_active_empty(&allocs), "extents should be consumed");
+
+	sec_stats_t stats;
+	memset(&stats, 0, sizeof(sec_stats_t));
+	sec_stats_merge(tsdn, &tdata.sec, &stats);
+	expect_zu_eq(stats.bytes, opts.max_bytes, "Size should match deallocs");
+	expect_zu_eq(stats.total.ndalloc_noflush, 2, "");
+	expect_zu_eq(stats.total.nmisses, 1, "");
+
+	memset(&stats, 0, sizeof(sec_stats_t));
+
+	/*
+	 * We are at max_bytes.  Now, we dalloc one more pointer and we go above
+	 * the limit.  This will force flush, so ndalloc_flush = 1.
+	 */
+	edata_t edata3;
+	edata_size_set(&edata3, PAGE);
+	edata_list_active_append(&allocs, &edata3);
+	sec_dalloc(tsdn, &tdata.sec, &allocs);
+	expect_false(
+	    edata_list_active_empty(&allocs), "extents should NOT be consumed");
+	sec_stats_merge(tsdn, &tdata.sec, &stats);
+	expect_zu_eq(PAGE, stats.bytes, "Should have flushed");
+	expect_zu_eq(stats.total.ndalloc_flush, 1, "");
+	memset(&stats, 0, sizeof(sec_stats_t));
+	destroy_test_data(tsdn, &tdata);
+}
+TEST_END
+
+#define NOPS_PER_THREAD 100
+#define NPREFILL 32
+
 static void
-expect_stats_pages(tsdn_t *tsdn, sec_t *sec, size_t npages) {
-	sec_stats_t stats;
+edata_init_test(edata_t *edata) {
+	memset(edata, 0, sizeof(*edata));
+}
+
+typedef struct {
+	sec_t              *sec;
+	uint8_t             preferred_shard;
+	size_t              nallocs;
+	size_t              nallocs_fail;
+	size_t              ndallocs;
+	size_t              ndallocs_fail;
+	edata_list_active_t fill_list;
+	size_t              fill_list_sz;
+	edata_t            *edata[NOPS_PER_THREAD];
+} trylock_test_arg_t;
+
+static void *
+thd_trylock_test(void *varg) {
+	trylock_test_arg_t *arg = (trylock_test_arg_t *)varg;
+	tsd_t              *tsd = tsd_fetch();
+	tsdn_t             *tsdn = tsd_tsdn(tsd);
+
+	/* Set the preferred shard for this thread */
+	uint8_t *shard_idx = tsd_sec_shardp_get(tsd);
+	*shard_idx = arg->preferred_shard;
+
+	/* Fill the shard with some extents */
+	sec_fill(tsdn, arg->sec, PAGE, &arg->fill_list, arg->fill_list_sz);
+	expect_true(edata_list_active_empty(&arg->fill_list), "");
+
+	for (unsigned i = 0; i < NOPS_PER_THREAD; i++) {
+		/* Try to allocate from SEC */
+		arg->edata[i] = sec_alloc(tsdn, arg->sec, PAGE);
+		if (arg->edata[i] != NULL) {
+			expect_zu_eq(edata_size_get(arg->edata[i]), PAGE, "");
+		}
+	}
+
+	for (unsigned i = 0; i < NOPS_PER_THREAD; i++) {
+		if (arg->edata[i] != NULL) {
+			edata_list_active_t list;
+			edata_list_active_init(&list);
+			arg->nallocs++;
+			edata_list_active_append(&list, arg->edata[i]);
+			expect_zu_eq(edata_size_get(arg->edata[i]), PAGE, "");
+			sec_dalloc(tsdn, arg->sec, &list);
+			if (edata_list_active_empty(&list)) {
+				arg->ndallocs++;
+			} else {
+				arg->ndallocs_fail++;
+			}
+		} else {
+			arg->nallocs_fail++;
+		}
+	}
+
+	return NULL;
+}
+
+TEST_BEGIN(test_sec_multishard) {
+	test_data_t tdata;
+	sec_opts_t  opts;
+	enum { NSHARDS = 2 };
+	enum { NTHREADS = NSHARDS * 16 };
+	opts.nshards = NSHARDS;
+	opts.max_alloc = 2 * PAGE;
+	opts.max_bytes = 64 * NTHREADS * PAGE;
+
+	tsdn_t *tsdn = tsd_tsdn(tsd_fetch());
+	test_data_init(tsdn, &tdata, &opts);
+
+	/* Create threads with different preferred shards */
+	thd_t              thds[NTHREADS];
+	trylock_test_arg_t args[NTHREADS];
+
+	edata_t all_edatas[NPREFILL * NTHREADS];
+
+	for (unsigned i = 0; i < NTHREADS; i++) {
+		edata_list_active_init(&args[i].fill_list);
+		for (unsigned j = 0; j < NPREFILL; ++j) {
+			size_t ind = i * NPREFILL + j;
+			edata_init_test(&all_edatas[ind]);
+			edata_size_set(&all_edatas[ind], PAGE);
+			edata_list_active_append(
+			    &args[i].fill_list, &all_edatas[ind]);
+		}
+		args[i].fill_list_sz = NPREFILL;
+		args[i].sec = &tdata.sec;
+		args[i].preferred_shard = i % opts.nshards;
+		args[i].nallocs = 0;
+		args[i].nallocs_fail = 0;
+		args[i].ndallocs = 0;
+		args[i].ndallocs_fail = 0;
+		memset(
+		    &args[i].edata[0], 0, NOPS_PER_THREAD * sizeof(edata_t *));
+		thd_create(&thds[i], thd_trylock_test, &args[i]);
+	}
+
+	for (unsigned i = 0; i < NTHREADS; i++) {
+		thd_join(thds[i], NULL);
+	}
+
+	/* Wait for all threads to complete */
+	size_t total_allocs = 0;
+	size_t total_dallocs = 0;
+	size_t total_allocs_fail = 0;
+	for (unsigned i = 0; i < NTHREADS; i++) {
+		total_allocs += args[i].nallocs;
+		total_dallocs += args[i].ndallocs;
+		total_allocs_fail += args[i].nallocs_fail;
+	}
+
+	/* We must have at least some hits */
+	expect_zu_gt(total_allocs, 0, "");
 	/*
-	 * Check that the stats merging accumulates rather than overwrites by
-	 * putting some (made up) data there to begin with.
+	 * We must have at least some successful dallocs by design (max_bytes is
+	 * big enough).
 	 */
-	stats.bytes = 123;
-	sec_stats_merge(tsdn, sec, &stats);
-	assert_zu_le(npages * PAGE + 123, stats.bytes, "");
-}
+	expect_zu_gt(total_dallocs, 0, "");
 
-TEST_BEGIN(test_stats_simple) {
-	pai_test_allocator_t ta;
-	pai_test_allocator_init(&ta);
-	sec_t sec;
+	/* Get final stats to verify that hits and misses are accurate */
+	sec_stats_t stats = {0};
+	memset(&stats, 0, sizeof(sec_stats_t));
+	sec_stats_merge(tsdn, &tdata.sec, &stats);
+	expect_zu_eq(stats.total.nhits, total_allocs, "");
+	expect_zu_eq(stats.total.nmisses, total_allocs_fail, "");
 
-	/* See the note above -- we can't use the real tsd. */
-	tsdn_t *tsdn = TSDN_NULL;
-
-	enum {
-		NITERS = 100,
-		FLUSH_PAGES = 20,
-	};
-
-	bool deferred_work_generated = false;
-
-	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
-	    /* max_bytes */ FLUSH_PAGES * PAGE);
-
-	edata_t *allocs[FLUSH_PAGES];
-	for (size_t i = 0; i < FLUSH_PAGES; i++) {
-		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
-		    false, &deferred_work_generated);
-		expect_stats_pages(tsdn, &sec, 0);
-	}
-
-	/* Increase and decrease, without flushing. */
-	for (size_t i = 0; i < NITERS; i++) {
-		for (size_t j = 0; j < FLUSH_PAGES / 2; j++) {
-			pai_dalloc(tsdn, &sec.pai, allocs[j],
-			    &deferred_work_generated);
-			expect_stats_pages(tsdn, &sec, j + 1);
-		}
-		for (size_t j = 0; j < FLUSH_PAGES / 2; j++) {
-			allocs[j] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-			    /* zero */ false, /* guarded */ false,
-			    /* frequent_reuse */ false,
-			    &deferred_work_generated);
-			expect_stats_pages(tsdn, &sec, FLUSH_PAGES / 2 - j - 1);
-		}
-	}
-}
-TEST_END
-
-TEST_BEGIN(test_stats_auto_flush) {
-	pai_test_allocator_t ta;
-	pai_test_allocator_init(&ta);
-	sec_t sec;
-
-	/* See the note above -- we can't use the real tsd. */
-	tsdn_t *tsdn = TSDN_NULL;
-
-	enum {
-		FLUSH_PAGES = 10,
-	};
-
-	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
-	    /* max_bytes */ FLUSH_PAGES * PAGE);
-
-	edata_t *extra_alloc0;
-	edata_t *extra_alloc1;
-	edata_t *allocs[2 * FLUSH_PAGES];
-
-	bool deferred_work_generated = false;
-
-	extra_alloc0 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false,
-	    /* guarded */ false, /* frequent_reuse */ false,
-	    &deferred_work_generated);
-	extra_alloc1 = pai_alloc(tsdn, &sec.pai, PAGE, PAGE, /* zero */ false,
-	    /* guarded */ false, /* frequent_reuse */ false,
-	    &deferred_work_generated);
-
-	for (size_t i = 0; i < 2 * FLUSH_PAGES; i++) {
-		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
-		    false, &deferred_work_generated);
-	}
-
-	for (size_t i = 0; i < FLUSH_PAGES; i++) {
-		pai_dalloc(tsdn, &sec.pai, allocs[i], &deferred_work_generated);
-	}
-	pai_dalloc(tsdn, &sec.pai, extra_alloc0, &deferred_work_generated);
-
-	/* Flush the remaining pages; stats should still work. */
-	for (size_t i = 0; i < FLUSH_PAGES; i++) {
-		pai_dalloc(tsdn, &sec.pai, allocs[FLUSH_PAGES + i],
-		    &deferred_work_generated);
-	}
-
-	pai_dalloc(tsdn, &sec.pai, extra_alloc1, &deferred_work_generated);
-
-	expect_stats_pages(tsdn, &sec,
-	    ta.alloc_count + ta.alloc_batch_count - ta.dalloc_count
-	        - ta.dalloc_batch_count);
-}
-TEST_END
-
-TEST_BEGIN(test_stats_manual_flush) {
-	pai_test_allocator_t ta;
-	pai_test_allocator_init(&ta);
-	sec_t sec;
-
-	/* See the note above -- we can't use the real tsd. */
-	tsdn_t *tsdn = TSDN_NULL;
-
-	enum {
-		FLUSH_PAGES = 10,
-	};
-
-	test_sec_init(&sec, &ta.pai, /* nshards */ 1, /* max_alloc */ PAGE,
-	    /* max_bytes */ FLUSH_PAGES * PAGE);
-
-	bool     deferred_work_generated = false;
-	edata_t *allocs[FLUSH_PAGES];
-	for (size_t i = 0; i < FLUSH_PAGES; i++) {
-		allocs[i] = pai_alloc(tsdn, &sec.pai, PAGE, PAGE,
-		    /* zero */ false, /* guarded */ false, /* frequent_reuse */
-		    false, &deferred_work_generated);
-		expect_stats_pages(tsdn, &sec, 0);
-	}
-
-	/* Dalloc the first half of the allocations. */
-	for (size_t i = 0; i < FLUSH_PAGES / 2; i++) {
-		pai_dalloc(tsdn, &sec.pai, allocs[i], &deferred_work_generated);
-		expect_stats_pages(tsdn, &sec, i + 1);
-	}
-
-	sec_flush(tsdn, &sec);
-	expect_stats_pages(tsdn, &sec, 0);
-
-	/* Flush the remaining pages. */
-	for (size_t i = 0; i < FLUSH_PAGES / 2; i++) {
-		pai_dalloc(tsdn, &sec.pai, allocs[FLUSH_PAGES / 2 + i],
-		    &deferred_work_generated);
-		expect_stats_pages(tsdn, &sec, i + 1);
-	}
-	sec_disable(tsdn, &sec);
-	expect_stats_pages(tsdn, &sec, 0);
+	destroy_test_data(tsdn, &tdata);
 }
 TEST_END
 
 int
 main(void) {
-	return test(test_reuse, test_auto_flush, test_disable, test_flush,
-	    test_max_alloc_respected, test_expand_shrink_delegate,
-	    test_nshards_0, test_stats_simple, test_stats_auto_flush,
-	    test_stats_manual_flush);
+	return test(test_max_nshards_option_zero,
+	    test_max_alloc_option_too_small, test_sec_fill, test_sec_alloc,
+	    test_sec_dalloc, test_max_bytes_too_low, test_sec_flush,
+	    test_sec_stats, test_sec_multishard);
 }

From 365747bc8d1cf202342d905555d7cd360f9ba118 Mon Sep 17 00:00:00 2001
From: Carl Shapiro <cshapiro@meta.com>
Date: Mon, 22 Dec 2025 20:36:03 -0800
Subject: [PATCH 2565/2608] Use the BRE construct \{1,\} for one or more
 consecutive matches

This removes duplication introduced by my earlier commit that
eliminating the use of the non-standard "\+" from BREs in the
configure script.
---
 configure.ac | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index 5e907511..897f1719 100644
--- a/configure.ac
+++ b/configure.ac
@@ -652,7 +652,7 @@ AC_ARG_WITH([version],
   [AS_HELP_STRING([--with-version=<major>.<minor>.<bugfix>-<nrev>-g<gid>],
    [Version string])],
   [
-    echo "${with_version}" | grep ['^[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*-[0-9][0-9]*-g[0-9a-f][0-9a-f]*$'] 2>&1 1>/dev/null
+    echo "${with_version}" | grep ['^[0-9]\{1,\}\.[0-9]\{1,\}\.[0-9]\{1,\}-[0-9]\{1,\}-g[0-9a-f]\{1,\}$'] 2>&1 1>/dev/null
     if test $? -eq 0 ; then
       echo "$with_version" > "${objroot}VERSION"
     else
@@ -2059,7 +2059,7 @@ if test "x${je_cv_lg_hugepage}" = "x" ; then
   dnl   Hugepagesize:       2048 kB
   if test -e "/proc/meminfo" ; then
     hpsk=[`cat /proc/meminfo 2>/dev/null | \
-          grep '^Hugepagesize:[[:space:]][[:space:]]*[0-9][0-9]*[[:space:]]kB$' | \
+          grep '^Hugepagesize:[[:space:]]\{1,\}[0-9]\{1,\}[[:space:]]kB$' | \
           awk '{print $2}'`]
     if test "x${hpsk}" != "x" ; then
       je_cv_lg_hugepage=10

From 5f353dc28383d070ffa540d1679153f8101e2aa7 Mon Sep 17 00:00:00 2001
From: Carl Shapiro <cshapiro@meta.com>
Date: Tue, 23 Dec 2025 14:18:43 -0800
Subject: [PATCH 2566/2608] Remove an incorrect use of the address operator

The address of the local variable created_threads is a different
location than the data it points to.  Incorrectly treating these
values as being the same can cause out-of-bounds writes to the stack.

Closes: facebook/jemalloc#59
---
 src/background_thread.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/background_thread.c b/src/background_thread.c
index 2eb08dd2..82911ee7 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -448,7 +448,7 @@ background_thread0_work(tsd_t *tsd) {
 		}
 		if (check_background_thread_creation(tsd,
 		        const_max_background_threads, &n_created,
-		        (bool *)&created_threads)) {
+		        created_threads)) {
 			continue;
 		}
 		background_work_sleep_once(

From c51abba131e7665e05da0de60c66fb219976050d Mon Sep 17 00:00:00 2001
From: Carl Shapiro <cshapiro@meta.com>
Date: Tue, 20 Jan 2026 18:56:32 -0800
Subject: [PATCH 2567/2608] Determine the page size on Android from NDK header
 files

The definition of the PAGE_SIZE macro is used as a signal for a 32-bit
target or a 64-bit target with an older NDK.  Otherwise, a 16KiB page
size is assumed.

Closes: #2657
---
 configure.ac | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/configure.ac b/configure.ac
index 897f1719..376779b0 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1990,6 +1990,11 @@ case "${host}" in
         LG_PAGE=14
       fi
       ;;
+  *-*-linux-android)
+      if test "x$LG_PAGE" = "xdetect"; then
+	AC_CHECK_DECLS([PAGE_SIZE], [LG_PAGE=12], [LG_PAGE=14], [#include <sys/user.h>])
+      fi
+      ;;
   aarch64-unknown-linux-*)
       if test "x$LG_PAGE" = "xdetect"; then
         LG_PAGE=16

From d4908fe44a869858840fc7b9d4d3e69a3629a25f Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Thu, 5 Feb 2026 19:18:19 -0800
Subject: [PATCH 2568/2608] Revert "Experimental configuration option for fast
 path prefetch from cache_bin"

This reverts commit f9fae9f1f841f8c6c566746480865da8ae3a1d11.
---
 configure.ac                                  | 30 -------------------
 .../internal/jemalloc_internal_defs.h.in      |  5 ----
 .../internal/jemalloc_internal_inlines_c.h    |  6 ----
 3 files changed, 41 deletions(-)

diff --git a/configure.ac b/configure.ac
index 376779b0..ae206a19 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1434,36 +1434,6 @@ if test "x$enable_experimental_smallocx" = "x1" ; then
 fi
 AC_SUBST([enable_experimental_smallocx])
 
-dnl Do not enable fastpath prefetch by default.
-AC_ARG_ENABLE([experimental_fp_prefetch],
-  [AS_HELP_STRING([--enable-experimental-fp-prefetch], [Enable experimental fastpath prefetch])],
-[if test "x$enable_experimental_fp_prefetch" = "xno" ; then
-enable_experimental_fp_prefetch="0"
-else
-  dnl Check if we have __builtin_prefetch.
-  JE_CFLAGS_SAVE()
-  JE_CFLAGS_ADD([-Werror=implicit-function-declaration])
-  JE_COMPILABLE([builtin prefetch], [], [
-void foo(void *p) { __builtin_prefetch(p, 1, 3); }
-  	],
-	[je_cv_have_builtin_prefetch])
-
-	if test "x${je_cv_have_builtin_prefetch}" = "xyes" ; then
-	   enable_experimental_fp_prefetch="1"
-	else
-	   enable_experimental_fp_prefetch="0"
-	   AC_MSG_ERROR([--enable--experimental-fp-prefetch can only be used when builtin_preftech is available])
-	fi
-   JE_CFLAGS_RESTORE()
-fi
-],
-[enable_experimental_fp_prefetch="0"]
-)
-if test "x$enable_experimental_fp_prefetch" = "x1" ; then
-  AC_DEFINE([JEMALLOC_EXPERIMENTAL_FASTPATH_PREFETCH], [ ], [ ])
-fi
-AC_SUBST([enable_experimental_fp_prefetch])
-
 dnl Do not enable profiling by default.
 AC_ARG_ENABLE([prof],
   [AS_HELP_STRING([--enable-prof], [Enable allocation profiling])],
diff --git a/include/jemalloc/internal/jemalloc_internal_defs.h.in b/include/jemalloc/internal/jemalloc_internal_defs.h.in
index 3a945ba1..31ae2e8e 100644
--- a/include/jemalloc/internal/jemalloc_internal_defs.h.in
+++ b/include/jemalloc/internal/jemalloc_internal_defs.h.in
@@ -160,11 +160,6 @@
 /* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */
 #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API
 
-/* JEMALLOC_EXPERIMENTAL_FASTPATH_PREFETCH enables prefetch
- * on malloc fast path.
- */
-#undef JEMALLOC_EXPERIMENTAL_FASTPATH_PREFETCH
-
 /* JEMALLOC_PROF enables allocation profiling. */
 #undef JEMALLOC_PROF
 
diff --git a/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
index 16f86ad4..2c61f8c4 100644
--- a/include/jemalloc/internal/jemalloc_internal_inlines_c.h
+++ b/include/jemalloc/internal/jemalloc_internal_inlines_c.h
@@ -374,12 +374,6 @@ imalloc_fastpath(size_t size, void *(fallback_alloc)(size_t)) {
 	 */
 	ret = cache_bin_alloc_easy(bin, &tcache_success);
 	if (tcache_success) {
-#if defined(JEMALLOC_EXPERIMENTAL_FASTPATH_PREFETCH)
-		cache_bin_sz_t lb = (cache_bin_sz_t)(uintptr_t)bin->stack_head;
-		if(likely(lb != bin->low_bits_empty)) {
-			util_prefetch_write_range(*(bin->stack_head), usize);
-		}
-#endif
 		fastpath_success_finish(tsd, allocated_after, bin, ret);
 		return ret;
 	}

From 4d0ffa075b93fe9263cfd5f11467b2e8df44ed93 Mon Sep 17 00:00:00 2001
From: Andrei Pechkurov <apechkurov@gmail.com>
Date: Fri, 9 Jan 2026 21:55:45 +0200
Subject: [PATCH 2569/2608] Fix background thread initialization race

---
 Makefile.in                        |   1 +
 src/background_thread.c            |   8 +-
 test/unit/background_thread_init.c | 183 +++++++++++++++++++++++++++++
 3 files changed, 190 insertions(+), 2 deletions(-)
 create mode 100644 test/unit/background_thread_init.c

diff --git a/Makefile.in b/Makefile.in
index 83f04e64..4b5b6507 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -206,6 +206,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/atomic.c \
 	$(srcroot)test/unit/background_thread.c \
 	$(srcroot)test/unit/background_thread_enable.c \
+	$(srcroot)test/unit/background_thread_init.c \
 	$(srcroot)test/unit/base.c \
 	$(srcroot)test/unit/batch_alloc.c \
 	$(srcroot)test/unit/binshard.c \
diff --git a/src/background_thread.c b/src/background_thread.c
index 82911ee7..4901856a 100644
--- a/src/background_thread.c
+++ b/src/background_thread.c
@@ -547,8 +547,13 @@ background_thread_create_locked(tsd_t *tsd, unsigned arena_ind) {
 
 	bool need_new_thread;
 	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
+	/*
+	 * The last check is there to leave Thread 0 creation entirely
+	 * to the initializing thread (arena 0).
+	 */
 	need_new_thread = background_thread_enabled()
-	    && (info->state == background_thread_stopped);
+	    && (info->state == background_thread_stopped)
+	    && (thread_ind != 0 || arena_ind == 0);
 	if (need_new_thread) {
 		background_thread_init(tsd, info);
 	}
@@ -560,7 +565,6 @@ background_thread_create_locked(tsd_t *tsd, unsigned arena_ind) {
 		/* Threads are created asynchronously by Thread 0. */
 		background_thread_info_t *t0 = &background_thread_info[0];
 		malloc_mutex_lock(tsd_tsdn(tsd), &t0->mtx);
-		assert(t0->state == background_thread_started);
 		pthread_cond_signal(&t0->cond);
 		malloc_mutex_unlock(tsd_tsdn(tsd), &t0->mtx);
 
diff --git a/test/unit/background_thread_init.c b/test/unit/background_thread_init.c
new file mode 100644
index 00000000..169b96c7
--- /dev/null
+++ b/test/unit/background_thread_init.c
@@ -0,0 +1,183 @@
+#include "test/jemalloc_test.h"
+
+/*
+ * Test to verify that background thread initialization has no race conditions.
+ *
+ * See https://github.com/facebook/jemalloc/pull/68
+ */
+
+#ifdef JEMALLOC_BACKGROUND_THREAD
+const char *malloc_conf = "background_thread:true,percpu_arena:percpu";
+#else
+const char *malloc_conf = "";
+#endif
+
+#define N_INIT_THREADS 32
+#define N_ITERATIONS 10
+
+static mtx_t barrier_mtx;
+static atomic_u32_t n_waiting;
+static unsigned n_threads;
+static atomic_b_t release;
+
+/*
+ * Simple spin barrier - all threads wait until everyone arrives,
+ * then they all proceed to call malloc() simultaneously.
+ */
+static void
+barrier_wait(void) {
+	mtx_lock(&barrier_mtx);
+	uint32_t waiting = atomic_load_u32(&n_waiting, ATOMIC_RELAXED) + 1;
+	atomic_store_u32(&n_waiting, waiting, ATOMIC_RELAXED);
+	bool should_release = (waiting == n_threads);
+	mtx_unlock(&barrier_mtx);
+
+	if (should_release) {
+		atomic_store_b(&release, true, ATOMIC_RELEASE);
+	}
+
+	while (!atomic_load_b(&release, ATOMIC_ACQUIRE)) {
+		/* Spin until released. */
+	}
+}
+
+static void
+barrier_reset(void) {
+	atomic_store_u32(&n_waiting, 0, ATOMIC_RELAXED);
+	atomic_store_b(&release, false, ATOMIC_RELAXED);
+}
+
+static void *
+thd_start(void *arg) {
+	barrier_wait();
+
+	/*
+	 * All threads race to malloc simultaneously.
+	 * This triggers concurrent arena initialization with percpu_arena.
+	 */
+	void *p = malloc(64);
+	expect_ptr_not_null(p, "malloc failed");
+	free(p);
+
+	return NULL;
+}
+
+TEST_BEGIN(test_mt_background_thread_init) {
+	test_skip_if(!have_background_thread);
+	test_skip_if(!have_percpu_arena ||
+	    !PERCPU_ARENA_ENABLED(opt_percpu_arena));
+
+	thd_t thds[N_INIT_THREADS];
+
+	expect_false(mtx_init(&barrier_mtx), "mtx_init failed");
+	n_threads = N_INIT_THREADS;
+	barrier_reset();
+
+	/* Create threads that will all race to call malloc(). */
+	for (unsigned i = 0; i < N_INIT_THREADS; i++) {
+		thd_create(&thds[i], thd_start, NULL);
+	}
+
+	/* Wait for all threads to complete. */
+	for (unsigned i = 0; i < N_INIT_THREADS; i++) {
+		thd_join(thds[i], NULL);
+	}
+
+	mtx_fini(&barrier_mtx);
+
+	/*
+	 * Verify background threads are properly running. Before the fix,
+	 * the race could leave Thread 0 marked as "started" without an
+	 * actual pthread behind it.
+	 */
+#ifdef JEMALLOC_BACKGROUND_THREAD
+	tsd_t *tsd = tsd_fetch();
+	background_thread_info_t *t0 = &background_thread_info[0];
+
+	malloc_mutex_lock(tsd_tsdn(tsd), &t0->mtx);
+	expect_d_eq(t0->state, background_thread_started,
+	    "Thread 0 should be in started state");
+	malloc_mutex_unlock(tsd_tsdn(tsd), &t0->mtx);
+
+	expect_zu_gt(n_background_threads, 0,
+	    "At least one background thread should be running");
+#endif
+}
+TEST_END
+
+TEST_BEGIN(test_mt_background_thread_init_stress) {
+	test_skip_if(!have_background_thread);
+	test_skip_if(!config_stats);
+
+	thd_t thds[N_INIT_THREADS];
+
+	expect_false(mtx_init(&barrier_mtx), "mtx_init failed");
+	n_threads = N_INIT_THREADS;
+
+	/*
+	 * Run multiple iterations to increase the chance of hitting
+	 * any race conditions. Each iteration creates new threads that
+	 * perform allocations concurrently.
+	 */
+	for (unsigned iter = 0; iter < N_ITERATIONS; iter++) {
+		barrier_reset();
+
+		for (unsigned i = 0; i < N_INIT_THREADS; i++) {
+			thd_create(&thds[i], thd_start, NULL);
+		}
+
+		for (unsigned i = 0; i < N_INIT_THREADS; i++) {
+			thd_join(thds[i], NULL);
+		}
+	}
+
+	mtx_fini(&barrier_mtx);
+
+#ifdef JEMALLOC_BACKGROUND_THREAD
+	/*
+	 * Verify Thread 0 is actually running by checking it has done work.
+	 * Wait up to a few seconds for the background thread to run.
+	 */
+	tsd_t *tsd = tsd_fetch();
+	background_thread_info_t *t0 = &background_thread_info[0];
+
+	nstime_t start;
+	nstime_init_update(&start);
+
+	bool ran = false;
+	while (!ran) {
+		malloc_mutex_lock(tsd_tsdn(tsd), &t0->mtx);
+		if (t0->tot_n_runs > 0) {
+			ran = true;
+		}
+		malloc_mutex_unlock(tsd_tsdn(tsd), &t0->mtx);
+
+		if (ran) {
+			break;
+		}
+
+		nstime_t now;
+		nstime_init_update(&now);
+		nstime_subtract(&now, &start);
+		if (nstime_sec(&now) > 10) {
+			/*
+			 * If Thread 0 hasn't run after 10 seconds, it's
+			 * likely not actually running (the bug condition).
+			 */
+			expect_true(false,
+			    "Thread 0 did not run within 10 seconds - "
+			    "possible initialization race");
+			break;
+		}
+		sleep_ns(100 * 1000 * 1000); /* 100ms */
+	}
+#endif
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(
+	    test_mt_background_thread_init,
+	    test_mt_background_thread_init_stress);
+}

From 34ace9169bad794cea6f8639e188d83b42310762 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Thu, 12 Feb 2026 11:49:28 -0800
Subject: [PATCH 2570/2608] Remove prof_threshold built-in event. It is trivial
 to implement it as user event if needed

---
 Makefile.in                                   |   3 -
 include/jemalloc/internal/prof_externs.h      |   5 -
 include/jemalloc/internal/prof_hook.h         |   6 -
 include/jemalloc/internal/prof_threshold.h    |   8 --
 .../jemalloc/internal/thread_event_registry.h |   1 -
 .../projects/vc2015/jemalloc/jemalloc.vcxproj |   1 -
 .../vc2015/jemalloc/jemalloc.vcxproj.filters  |   3 -
 .../projects/vc2017/jemalloc/jemalloc.vcxproj |   1 -
 .../vc2017/jemalloc/jemalloc.vcxproj.filters  |   3 -
 .../projects/vc2019/jemalloc/jemalloc.vcxproj |   1 -
 .../vc2019/jemalloc/jemalloc.vcxproj.filters  |   3 -
 .../projects/vc2022/jemalloc/jemalloc.vcxproj |   1 -
 .../vc2022/jemalloc/jemalloc.vcxproj.filters  |   3 -
 src/ctl.c                                     |  30 -----
 src/jemalloc.c                                |   5 -
 src/prof_threshold.c                          |  69 -----------
 src/thread_event.c                            |   9 --
 src/thread_event_registry.c                   |   3 +-
 test/unit/mallctl.c                           |   1 -
 test/unit/prof_threshold.c                    | 112 ------------------
 test/unit/prof_threshold_small.c              |   2 -
 test/unit/prof_threshold_small.sh             |   1 -
 22 files changed, 1 insertion(+), 270 deletions(-)
 delete mode 100644 include/jemalloc/internal/prof_threshold.h
 delete mode 100644 src/prof_threshold.c
 delete mode 100644 test/unit/prof_threshold.c
 delete mode 100644 test/unit/prof_threshold_small.c
 delete mode 100644 test/unit/prof_threshold_small.sh

diff --git a/Makefile.in b/Makefile.in
index 4b5b6507..9db36530 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -145,7 +145,6 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/prof_stack_range.c \
 	$(srcroot)src/prof_stats.c \
 	$(srcroot)src/prof_sys.c \
-	$(srcroot)src/prof_threshold.c \
 	$(srcroot)src/psset.c \
 	$(srcroot)src/rtree.c \
 	$(srcroot)src/safety_check.c \
@@ -271,8 +270,6 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/prof_stats.c \
 	$(srcroot)test/unit/prof_tctx.c \
 	$(srcroot)test/unit/prof_thread_name.c \
-	$(srcroot)test/unit/prof_threshold.c \
-	$(srcroot)test/unit/prof_threshold_small.c \
 	$(srcroot)test/unit/prof_sys_thread_name.c \
 	$(srcroot)test/unit/psset.c \
 	$(srcroot)test/unit/ql.c \
diff --git a/include/jemalloc/internal/prof_externs.h b/include/jemalloc/internal/prof_externs.h
index e41e30a0..e07e69f5 100644
--- a/include/jemalloc/internal/prof_externs.h
+++ b/include/jemalloc/internal/prof_externs.h
@@ -12,8 +12,6 @@ extern bool     opt_prof_active;
 extern bool     opt_prof_thread_active_init;
 extern unsigned opt_prof_bt_max;
 extern size_t   opt_lg_prof_sample; /* Mean bytes between samples. */
-extern size_t
-    opt_experimental_lg_prof_threshold; /* Mean bytes between thresholds. */
 extern ssize_t opt_lg_prof_interval;    /* lg(prof_interval). */
 extern bool    opt_prof_gdump;          /* High-water memory dumping. */
 extern bool    opt_prof_final;          /* Final profile dumping. */
@@ -70,9 +68,6 @@ prof_sample_hook_t prof_sample_hook_get(void);
 void                    prof_sample_free_hook_set(prof_sample_free_hook_t hook);
 prof_sample_free_hook_t prof_sample_free_hook_get(void);
 
-void                  prof_threshold_hook_set(prof_threshold_hook_t hook);
-prof_threshold_hook_t prof_threshold_hook_get(void);
-
 /* Functions only accessed in prof_inlines.h */
 prof_tdata_t *prof_tdata_init(tsd_t *tsd);
 prof_tdata_t *prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata);
diff --git a/include/jemalloc/internal/prof_hook.h b/include/jemalloc/internal/prof_hook.h
index 69dfaabf..d5a9b0ff 100644
--- a/include/jemalloc/internal/prof_hook.h
+++ b/include/jemalloc/internal/prof_hook.h
@@ -27,10 +27,4 @@ typedef void (*prof_sample_hook_t)(const void *ptr, size_t size,
 /* ptr, size */
 typedef void (*prof_sample_free_hook_t)(const void *, size_t);
 
-/*
- * A callback hook that notifies when an allocation threshold has been crossed.
- */
-typedef void (*prof_threshold_hook_t)(
-    uint64_t alloc, uint64_t dealloc, uint64_t peak);
-
 #endif /* JEMALLOC_INTERNAL_PROF_HOOK_H */
diff --git a/include/jemalloc/internal/prof_threshold.h b/include/jemalloc/internal/prof_threshold.h
deleted file mode 100644
index 93e9478e..00000000
--- a/include/jemalloc/internal/prof_threshold.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_THRESHOLD_EVENT_H
-#define JEMALLOC_INTERNAL_THRESHOLD_EVENT_H
-
-#include "jemalloc/internal/tsd_types.h"
-
-extern te_base_cb_t prof_threshold_te_handler;
-
-#endif /* JEMALLOC_INTERNAL_THRESHOLD_EVENT_H */
diff --git a/include/jemalloc/internal/thread_event_registry.h b/include/jemalloc/internal/thread_event_registry.h
index 7ded440d..bfb140aa 100644
--- a/include/jemalloc/internal/thread_event_registry.h
+++ b/include/jemalloc/internal/thread_event_registry.h
@@ -14,7 +14,6 @@ enum te_alloc_e {
 	te_alloc_stats_interval,
 	te_alloc_tcache_gc,
 #ifdef JEMALLOC_STATS
-	te_alloc_prof_threshold,
 	te_alloc_peak,
 #endif
 	te_alloc_user0,
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
index 1e8def75..ca2a8532 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj
@@ -83,7 +83,6 @@
     <ClCompile Include="..\..\..\..\src\prof_recent.c" />
     <ClCompile Include="..\..\..\..\src\prof_stats.c" />
     <ClCompile Include="..\..\..\..\src\prof_sys.c" />
-    <ClCompile Include="..\..\..\..\src\prof_threshold.c" />
     <ClCompile Include="..\..\..\..\src\psset.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\safety_check.c" />
diff --git a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
index f6e340cf..443e71a5 100644
--- a/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2015/jemalloc/jemalloc.vcxproj.filters
@@ -133,9 +133,6 @@
     <ClCompile Include="..\..\..\..\src\prof_sys.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\prof_threshold.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\psset.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
index 45ddf73d..c5d1116b 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj
@@ -83,7 +83,6 @@
     <ClCompile Include="..\..\..\..\src\prof_recent.c" />
     <ClCompile Include="..\..\..\..\src\prof_stats.c" />
     <ClCompile Include="..\..\..\..\src\prof_sys.c" />
-    <ClCompile Include="..\..\..\..\src\prof_threshold.c" />
     <ClCompile Include="..\..\..\..\src\psset.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\safety_check.c" />
diff --git a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
index f6e340cf..443e71a5 100644
--- a/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2017/jemalloc/jemalloc.vcxproj.filters
@@ -133,9 +133,6 @@
     <ClCompile Include="..\..\..\..\src\prof_sys.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\prof_threshold.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\psset.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
index f1a5158a..4df570c8 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj
@@ -83,7 +83,6 @@
     <ClCompile Include="..\..\..\..\src\prof_recent.c" />
     <ClCompile Include="..\..\..\..\src\prof_stats.c" />
     <ClCompile Include="..\..\..\..\src\prof_sys.c" />
-    <ClCompile Include="..\..\..\..\src\prof_threshold.c" />
     <ClCompile Include="..\..\..\..\src\psset.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\safety_check.c" />
diff --git a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
index f6e340cf..443e71a5 100644
--- a/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2019/jemalloc/jemalloc.vcxproj.filters
@@ -133,9 +133,6 @@
     <ClCompile Include="..\..\..\..\src\prof_sys.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\prof_threshold.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\psset.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
index a6f92ccf..5e256ec6 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj
@@ -83,7 +83,6 @@
     <ClCompile Include="..\..\..\..\src\prof_recent.c" />
     <ClCompile Include="..\..\..\..\src\prof_stats.c" />
     <ClCompile Include="..\..\..\..\src\prof_sys.c" />
-    <ClCompile Include="..\..\..\..\src\prof_threshold.c" />
     <ClCompile Include="..\..\..\..\src\psset.c" />
     <ClCompile Include="..\..\..\..\src\rtree.c" />
     <ClCompile Include="..\..\..\..\src\safety_check.c" />
diff --git a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
index f6e340cf..443e71a5 100644
--- a/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
+++ b/msvc/projects/vc2022/jemalloc/jemalloc.vcxproj.filters
@@ -133,9 +133,6 @@
     <ClCompile Include="..\..\..\..\src\prof_sys.c">
       <Filter>Source Files</Filter>
     </ClCompile>
-    <ClCompile Include="..\..\..\..\src\prof_threshold.c">
-      <Filter>Source Files</Filter>
-    </ClCompile>
     <ClCompile Include="..\..\..\..\src\psset.c">
       <Filter>Source Files</Filter>
     </ClCompile>
diff --git a/src/ctl.c b/src/ctl.c
index 1260e197..4cac5608 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -156,7 +156,6 @@ CTL_PROTO(opt_prof_active)
 CTL_PROTO(opt_prof_thread_active_init)
 CTL_PROTO(opt_prof_bt_max)
 CTL_PROTO(opt_lg_prof_sample)
-CTL_PROTO(opt_experimental_lg_prof_threshold)
 CTL_PROTO(opt_lg_prof_interval)
 CTL_PROTO(opt_prof_gdump)
 CTL_PROTO(opt_prof_final)
@@ -364,7 +363,6 @@ CTL_PROTO(experimental_hooks_prof_backtrace)
 CTL_PROTO(experimental_hooks_prof_dump)
 CTL_PROTO(experimental_hooks_prof_sample)
 CTL_PROTO(experimental_hooks_prof_sample_free)
-CTL_PROTO(experimental_hooks_prof_threshold)
 CTL_PROTO(experimental_hooks_thread_event)
 CTL_PROTO(experimental_hooks_safety_check_abort)
 CTL_PROTO(experimental_thread_activity_callback)
@@ -527,8 +525,6 @@ static const ctl_named_node_t opt_node[] = {{NAME("abort"), CTL(opt_abort)},
     {NAME("prof_thread_active_init"), CTL(opt_prof_thread_active_init)},
     {NAME("prof_bt_max"), CTL(opt_prof_bt_max)},
     {NAME("lg_prof_sample"), CTL(opt_lg_prof_sample)},
-    {NAME("experimental_lg_prof_threshold"),
-        CTL(opt_experimental_lg_prof_threshold)},
     {NAME("lg_prof_interval"), CTL(opt_lg_prof_interval)},
     {NAME("prof_gdump"), CTL(opt_prof_gdump)},
     {NAME("prof_final"), CTL(opt_prof_final)},
@@ -890,7 +886,6 @@ static const ctl_named_node_t experimental_hooks_node[] = {
     {NAME("prof_dump"), CTL(experimental_hooks_prof_dump)},
     {NAME("prof_sample"), CTL(experimental_hooks_prof_sample)},
     {NAME("prof_sample_free"), CTL(experimental_hooks_prof_sample_free)},
-    {NAME("prof_threshold"), CTL(experimental_hooks_prof_threshold)},
     {NAME("safety_check_abort"), CTL(experimental_hooks_safety_check_abort)},
     {NAME("thread_event"), CTL(experimental_hooks_thread_event)},
 };
@@ -2236,8 +2231,6 @@ CTL_RO_NL_CGEN(
     config_prof, opt_prof_thread_active_init, opt_prof_thread_active_init, bool)
 CTL_RO_NL_CGEN(config_prof, opt_prof_bt_max, opt_prof_bt_max, unsigned)
 CTL_RO_NL_CGEN(config_prof, opt_lg_prof_sample, opt_lg_prof_sample, size_t)
-CTL_RO_NL_CGEN(config_prof, opt_experimental_lg_prof_threshold,
-    opt_experimental_lg_prof_threshold, size_t)
 CTL_RO_NL_CGEN(config_prof, opt_prof_accum, opt_prof_accum, bool)
 CTL_RO_NL_CGEN(
     config_prof, opt_prof_pid_namespace, opt_prof_pid_namespace, bool)
@@ -3681,29 +3674,6 @@ label_return:
 	return ret;
 }
 
-static int
-experimental_hooks_prof_threshold_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
-
-	if (oldp == NULL && newp == NULL) {
-		ret = EINVAL;
-		goto label_return;
-	}
-	if (oldp != NULL) {
-		prof_threshold_hook_t old_hook = prof_threshold_hook_get();
-		READ(old_hook, prof_threshold_hook_t);
-	}
-	if (newp != NULL) {
-		prof_threshold_hook_t new_hook JEMALLOC_CC_SILENCE_INIT(NULL);
-		WRITE(new_hook, prof_threshold_hook_t);
-		prof_threshold_hook_set(new_hook);
-	}
-	ret = 0;
-label_return:
-	return ret;
-}
-
 static int
 experimental_hooks_thread_event_ctl(tsd_t *tsd, const size_t *mib,
     size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
diff --git a/src/jemalloc.c b/src/jemalloc.c
index 5d23962d..d82788eb 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -1747,11 +1747,6 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 				    "lg_prof_sample", 0,
 				    (sizeof(uint64_t) << 3) - 1,
 				    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX, true)
-				CONF_HANDLE_SIZE_T(
-				    opt_experimental_lg_prof_threshold,
-				    "experimental_lg_prof_threshold", 0,
-				    (sizeof(uint64_t) << 3) - 1,
-				    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX, true)
 				CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum")
 				CONF_HANDLE_UNSIGNED(opt_prof_bt_max,
 				    "prof_bt_max", 1, PROF_BT_MAX_LIMIT,
diff --git a/src/prof_threshold.c b/src/prof_threshold.c
deleted file mode 100644
index 5b72a491..00000000
--- a/src/prof_threshold.c
+++ /dev/null
@@ -1,69 +0,0 @@
-#include "jemalloc/internal/jemalloc_preamble.h"
-#include "jemalloc/internal/jemalloc_internal_includes.h"
-
-#include "jemalloc/internal/activity_callback.h"
-#include "jemalloc/internal/prof_threshold.h"
-
-#include "jemalloc/internal/prof_externs.h"
-
-/*
- * Update every 128MB by default.
- */
-#define PROF_THRESHOLD_LG_WAIT_DEFAULT 27
-
-/* Logically a prof_threshold_hook_t. */
-static atomic_p_t prof_threshold_hook;
-size_t opt_experimental_lg_prof_threshold = PROF_THRESHOLD_LG_WAIT_DEFAULT;
-
-void
-prof_threshold_hook_set(prof_threshold_hook_t hook) {
-	atomic_store_p(&prof_threshold_hook, hook, ATOMIC_RELEASE);
-}
-
-prof_threshold_hook_t
-prof_threshold_hook_get(void) {
-	return (prof_threshold_hook_t)atomic_load_p(
-	    &prof_threshold_hook, ATOMIC_ACQUIRE);
-}
-
-/* Invoke callback for threshold reached */
-static inline void
-prof_threshold_update(tsd_t *tsd) {
-	prof_threshold_hook_t prof_threshold_hook = prof_threshold_hook_get();
-	if (prof_threshold_hook == NULL) {
-		return;
-	}
-	uint64_t alloc = tsd_thread_allocated_get(tsd);
-	uint64_t dalloc = tsd_thread_deallocated_get(tsd);
-	peak_t  *peak = tsd_peakp_get(tsd);
-	pre_reentrancy(tsd, NULL);
-	prof_threshold_hook(alloc, dalloc, peak->cur_max);
-	post_reentrancy(tsd);
-}
-
-uint64_t
-prof_threshold_new_event_wait(tsd_t *tsd) {
-	return 1 << opt_experimental_lg_prof_threshold;
-}
-
-uint64_t
-prof_threshold_postponed_event_wait(tsd_t *tsd) {
-	return TE_MIN_START_WAIT;
-}
-
-void
-prof_threshold_event_handler(tsd_t *tsd) {
-	prof_threshold_update(tsd);
-}
-
-static te_enabled_t
-prof_threshold_enabled(void) {
-	return config_stats ? te_enabled_yes : te_enabled_no;
-}
-
-te_base_cb_t prof_threshold_te_handler = {
-    .enabled = &prof_threshold_enabled,
-    .new_event_wait = &prof_threshold_new_event_wait,
-    .postponed_event_wait = &prof_threshold_postponed_event_wait,
-    .event_handler = &prof_threshold_event_handler,
-};
diff --git a/src/thread_event.c b/src/thread_event.c
index c59027ed..82776342 100644
--- a/src/thread_event.c
+++ b/src/thread_event.c
@@ -290,15 +290,6 @@ te_update_alloc_events(tsd_t *tsd, te_base_cb_t **to_trigger,
 		to_trigger[nto_trigger++] = te_alloc_handlers[te_alloc_peak];
 	}
 
-	assert(te_enabled_yes
-	    == te_alloc_handlers[te_alloc_prof_threshold]->enabled());
-	if (te_update_wait(tsd, accumbytes, allow,
-	        &waits[te_alloc_prof_threshold], wait,
-	        te_alloc_handlers[te_alloc_prof_threshold],
-	        1 << opt_experimental_lg_prof_threshold)) {
-		to_trigger[nto_trigger++] =
-		    te_alloc_handlers[te_alloc_prof_threshold];
-	}
 #endif
 
 	for (te_alloc_t ue = te_alloc_user0; ue <= te_alloc_user3; ue++) {
diff --git a/src/thread_event_registry.c b/src/thread_event_registry.c
index 05882616..b8307df0 100644
--- a/src/thread_event_registry.c
+++ b/src/thread_event_registry.c
@@ -6,7 +6,6 @@
 #include "jemalloc/internal/tcache_externs.h"
 #include "jemalloc/internal/peak_event.h"
 #include "jemalloc/internal/prof_externs.h"
-#include "jemalloc/internal/prof_threshold.h"
 #include "jemalloc/internal/stats.h"
 
 static malloc_mutex_t uevents_mu;
@@ -149,7 +148,7 @@ te_base_cb_t *te_alloc_handlers[te_alloc_count] = {
 #endif
     &stats_interval_te_handler, &tcache_gc_te_handler,
 #ifdef JEMALLOC_STATS
-    &prof_threshold_te_handler, &peak_te_handler,
+    &peak_te_handler,
 #endif
     &user_alloc_handler0, &user_alloc_handler1, &user_alloc_handler2,
     &user_alloc_handler3};
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 4c11e485..4cd0225b 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -342,7 +342,6 @@ TEST_BEGIN(test_mallctl_opt) {
 	TEST_MALLCTL_OPT(bool, prof_active, prof);
 	TEST_MALLCTL_OPT(unsigned, prof_bt_max, prof);
 	TEST_MALLCTL_OPT(ssize_t, lg_prof_sample, prof);
-	TEST_MALLCTL_OPT(ssize_t, experimental_lg_prof_threshold, prof);
 	TEST_MALLCTL_OPT(bool, prof_accum, prof);
 	TEST_MALLCTL_OPT(bool, prof_pid_namespace, prof);
 	TEST_MALLCTL_OPT(ssize_t, lg_prof_interval, prof);
diff --git a/test/unit/prof_threshold.c b/test/unit/prof_threshold.c
deleted file mode 100644
index a31a5a24..00000000
--- a/test/unit/prof_threshold.c
+++ /dev/null
@@ -1,112 +0,0 @@
-#include "test/jemalloc_test.h"
-
-/* Test config (set in reset_test_config) */
-#define ALLOC_ITERATIONS_IN_THRESHOLD 10
-uint64_t threshold_bytes = 0;
-uint64_t chunk_size = 0;
-
-/* Test globals for calblack */
-uint64_t hook_calls = 0;
-uint64_t last_peak = 0;
-uint64_t last_alloc = 0;
-uint64_t alloc_baseline = 0;
-
-void
-mock_prof_threshold_hook(uint64_t alloc, uint64_t dealloc, uint64_t peak) {
-	hook_calls++;
-	last_peak = peak;
-	last_alloc = alloc;
-}
-
-/* Need the do_write flag because NULL is a valid to_write value. */
-static void
-read_write_prof_threshold_hook(prof_threshold_hook_t *to_read, bool do_write,
-    prof_threshold_hook_t to_write) {
-	size_t hook_sz = sizeof(prof_threshold_hook_t);
-	expect_d_eq(
-	    mallctl("experimental.hooks.prof_threshold", (void *)to_read,
-	        &hook_sz, do_write ? &to_write : NULL, hook_sz),
-	    0, "Unexpected prof_threshold_hook mallctl failure");
-}
-
-static void
-write_prof_threshold_hook(prof_threshold_hook_t new_hook) {
-	read_write_prof_threshold_hook(NULL, true, new_hook);
-}
-
-static prof_threshold_hook_t
-read_prof_threshold_hook() {
-	prof_threshold_hook_t hook;
-	read_write_prof_threshold_hook(&hook, false, NULL);
-	return hook;
-}
-
-static void
-reset_test_config() {
-	hook_calls = 0;
-	last_peak = 0;
-	alloc_baseline = last_alloc; /* We run the test multiple times */
-	last_alloc = 0;
-	threshold_bytes = 1 << opt_experimental_lg_prof_threshold;
-	chunk_size = threshold_bytes / ALLOC_ITERATIONS_IN_THRESHOLD;
-}
-
-static void
-expect_threshold_calls(int calls) {
-	expect_u64_eq(
-	    hook_calls, calls, "Hook called the right amount of times");
-	expect_u64_lt(
-	    last_peak, chunk_size * 2, "We allocate chunk_size at a time");
-	expect_u64_ge(
-	    last_alloc, threshold_bytes * calls + alloc_baseline, "Crosses");
-}
-
-static void
-allocate_chunks(int chunks) {
-	for (int i = 0; i < chunks; i++) {
-		void *p = mallocx((size_t)chunk_size, 0);
-		expect_ptr_not_null(p, "Failed to allocate");
-		free(p);
-	}
-}
-
-TEST_BEGIN(test_prof_threshold_hook) {
-	test_skip_if(!config_stats);
-
-	/* Test setting and reading the hook (both value and null) */
-	write_prof_threshold_hook(mock_prof_threshold_hook);
-	expect_ptr_eq(read_prof_threshold_hook(), mock_prof_threshold_hook,
-	    "Unexpected hook");
-
-	write_prof_threshold_hook(NULL);
-	expect_ptr_null(read_prof_threshold_hook(), "Hook was erased");
-
-	/* Reset everything before the test */
-	reset_test_config();
-	write_prof_threshold_hook(mock_prof_threshold_hook);
-
-	int err = mallctl("thread.peak.reset", NULL, NULL, NULL, 0);
-	expect_d_eq(err, 0, "Peak reset failed");
-
-	/* Note that since we run this test multiple times and we don't reset
-	   the allocation counter, each time we offset the callback by the
-	   amount we allocate over the threshold. */
-
-	/* A simple small allocation is not enough to trigger the callback */
-	allocate_chunks(1);
-	expect_u64_eq(hook_calls, 0, "Hook not called yet");
-
-	/* Enough allocations to trigger the callback */
-	allocate_chunks(ALLOC_ITERATIONS_IN_THRESHOLD);
-	expect_threshold_calls(1);
-
-	/* Enough allocations to trigger the callback again */
-	allocate_chunks(ALLOC_ITERATIONS_IN_THRESHOLD);
-	expect_threshold_calls(2);
-}
-TEST_END
-
-int
-main(void) {
-	return test(test_prof_threshold_hook);
-}
diff --git a/test/unit/prof_threshold_small.c b/test/unit/prof_threshold_small.c
deleted file mode 100644
index 67f444b1..00000000
--- a/test/unit/prof_threshold_small.c
+++ /dev/null
@@ -1,2 +0,0 @@
-#include "test/jemalloc_test.h"
-#include "prof_threshold.c"
diff --git a/test/unit/prof_threshold_small.sh b/test/unit/prof_threshold_small.sh
deleted file mode 100644
index 62726069..00000000
--- a/test/unit/prof_threshold_small.sh
+++ /dev/null
@@ -1 +0,0 @@
-export MALLOC_CONF="experimental_lg_prof_threshold:22"

From 0fa27fd28fd75fc3305d61c742ed028c5b874231 Mon Sep 17 00:00:00 2001
From: Tony Printezis <printezis@fb.com>
Date: Thu, 19 Feb 2026 12:42:52 -0800
Subject: [PATCH 2571/2608] Run single subtest from a test file

Add mechanism to be able to select a test to run from a test file. The test harness will read the JEMALLOC_TEST_NAME env and, if set, it will only run subtests with that name.
---
 test/include/test/test.h | 11 +++++++----
 test/src/test.c          | 18 ++++++++++++++++--
 2 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/test/include/test/test.h b/test/include/test/test.h
index 025c167d..79f47e98 100644
--- a/test/include/test/test.h
+++ b/test/include/test/test.h
@@ -520,12 +520,15 @@ typedef void(test_t)(void);
 
 #define TEST_BEGIN(f)                                                          \
 	static void f(void) {                                                  \
-		p_test_init(#f);
+		const bool skip_test = p_test_init(#f);                        \
+		if (skip_test) {                                               \
+			goto label_test_end;                                   \
+		}
 
 #define TEST_END                                                               \
 	goto label_test_end;                                                   \
 	label_test_end:                                                        \
-	p_test_fini();                                                         \
+	p_test_fini(skip_test);                                                \
 	}
 
 #define test(...) p_test(__VA_ARGS__, NULL)
@@ -552,6 +555,6 @@ void test_fail(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2);
 test_status_t p_test(test_t *t, ...);
 test_status_t p_test_no_reentrancy(test_t *t, ...);
 test_status_t p_test_no_malloc_init(test_t *t, ...);
-void          p_test_init(const char *name);
-void          p_test_fini(void);
+bool          p_test_init(const char *name);
+void          p_test_fini(bool skip_test);
 void p_test_fail(bool may_abort, const char *prefix, const char *message);
diff --git a/test/src/test.c b/test/src/test.c
index 6eb84338..e5e33ae6 100644
--- a/test/src/test.c
+++ b/test/src/test.c
@@ -6,6 +6,7 @@ static unsigned      test_count = 0;
 static test_status_t test_counts[test_status_count] = {0, 0, 0};
 static test_status_t test_status = test_status_pass;
 static const char   *test_name = "";
+static const char   *selected_test_name = NULL;
 
 /* Reentrancy testing helpers. */
 
@@ -100,15 +101,26 @@ test_status_string(test_status_t current_status) {
 	}
 }
 
-void
+bool
 p_test_init(const char *name) {
+	if (selected_test_name != NULL && strcmp(selected_test_name, name)) {
+		/* skip test */
+		return true;
+	}
+
 	test_count++;
 	test_status = test_status_pass;
 	test_name = name;
+
+	return false;
 }
 
 void
-p_test_fini(void) {
+p_test_fini(bool skip_test) {
+	if (skip_test) {
+		return;
+	}
+
 	test_counts[test_status]++;
 	malloc_printf("%s (%s): %s\n", test_name, reentrancy_t_str(reentrancy),
 	    test_status_string(test_status));
@@ -130,6 +142,8 @@ check_global_slow(test_status_t *status) {
 
 static test_status_t
 p_test_impl(bool do_malloc_init, bool do_reentrant, test_t *t, va_list ap) {
+	selected_test_name = getenv("JEMALLOC_TEST_NAME");
+
 	test_status_t ret;
 
 	if (do_malloc_init) {

From a10ef3e1f1c7593fb1cb211329e02c542af14694 Mon Sep 17 00:00:00 2001
From: Yuxuan Chen <ych@meta.com>
Date: Tue, 24 Feb 2026 18:12:56 -0800
Subject: [PATCH 2572/2608] configure: add --with-cxx-stdlib option

When C++ support is enabled, configure unconditionally probes
`-lstdc++` and keeps it in LIBS if the link test succeeds. On
platforms using libc++, this probe can succeed at compile time (if
libstdc++ headers/libraries happen to be installed) but then cause
runtime failures when configure tries to execute test binaries
because `libstdc++.so.6` isn't actually available.

Add a `--with-cxx-stdlib=<libstdc++|libcxx>` option that lets the
build system specify which C++ standard library to link. When given,
the probe is skipped and the specified library is linked directly.
When not given, the original probe behavior is preserved.
---
 configure.ac | 30 +++++++++++++++++++++++++-----
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/configure.ac b/configure.ac
index ae206a19..e57d0667 100644
--- a/configure.ac
+++ b/configure.ac
@@ -324,6 +324,15 @@ fi
 ,
 enable_cxx="1"
 )
+AC_ARG_WITH([cxx_stdlib],
+  [AS_HELP_STRING([--with-cxx-stdlib=<libstdc++|libcxx>],
+  [Specify the C++ standard library to link (default: probe for libstdc++)])],
+  [case "${with_cxx_stdlib}" in
+    libstdc++|libcxx) ;;
+    *) AC_MSG_ERROR([bad value ${with_cxx_stdlib} for --with-cxx-stdlib]) ;;
+  esac],
+  [with_cxx_stdlib=""]
+)
 if test "x$enable_cxx" = "x1" ; then
   dnl Require at least c++14, which is the first version to support sized
   dnl deallocation.  C++ support is not compiled otherwise.
@@ -338,17 +347,28 @@ if test "x$enable_cxx" = "x1" ; then
     JE_CXXFLAGS_ADD([-g3])
 
     SAVED_LIBS="${LIBS}"
-    JE_APPEND_VS(LIBS, -lstdc++)
-    JE_COMPILABLE([libstdc++ linkage], [
+    case "${with_cxx_stdlib}" in
+      libstdc++)
+        JE_APPEND_VS(LIBS, -lstdc++)
+        ;;
+      libcxx)
+        JE_APPEND_VS(LIBS, -lc++)
+        ;;
+      *)
+        dnl Probe for libstdc++ (the default when --with-cxx-stdlib is not given).
+        JE_APPEND_VS(LIBS, -lstdc++)
+        JE_COMPILABLE([libstdc++ linkage], [
 #include <stdlib.h>
 ], [[
 	int *arr = (int *)malloc(sizeof(int) * 42);
 	if (arr == NULL)
 		return 1;
 ]], [je_cv_libstdcxx])
-    if test "x${je_cv_libstdcxx}" = "xno" ; then
-      LIBS="${SAVED_LIBS}"
-    fi
+        if test "x${je_cv_libstdcxx}" = "xno" ; then
+          LIBS="${SAVED_LIBS}"
+        fi
+        ;;
+    esac
   else
     enable_cxx="0"
   fi

From 79cc7dcc827bb506f5be0345df2a7ce356b84165 Mon Sep 17 00:00:00 2001
From: Carl Shapiro <cshapiro@meta.com>
Date: Wed, 25 Feb 2026 13:00:42 -0800
Subject: [PATCH 2573/2608] Guard os_page_id against a NULL address

While undocumented, the prctl system call will set errno to ENOMEM
when passed NULL as an address.  Under that condition, an assertion
that check for EINVAL as the only possible errno value will fail.  To
avoid the assertion failure, this change skips the call to os_page_id
when address is NULL.  NULL can only occur after mmap fails in which
case there is no mapping to name.
---
 src/pages.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/pages.c b/src/pages.c
index e7766fcc..2a4f0093 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -113,8 +113,12 @@ os_page_id(void *addr, size_t size, const char *name) {
 	 * While parsing `/proc/<pid>/maps` file, the block could appear as
 	 * 7f4836000000-7f4836800000 rw-p 00000000 00:00 0 [anon:jemalloc_pg_overcommit]`
 	 */
-	return prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, (uintptr_t)addr, size,
+	int n;
+	assert(addr != NULL);
+	n = prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, (uintptr_t)addr, size,
 	    (uintptr_t)name);
+	assert(n == 0 || (n == -1 && get_errno() == EINVAL));
+	return n;
 #	else
 	return 0;
 #	endif
@@ -187,9 +191,10 @@ os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
 	assert(ret == NULL || (addr == NULL && ret != addr)
 	    || (addr != NULL && ret == addr));
 #ifdef JEMALLOC_PAGEID
-	int n = os_page_id(ret, size,
-	    os_overcommits ? "jemalloc_pg_overcommit" : "jemalloc_pg");
-	assert(n == 0 || (n == -1 && get_errno() == EINVAL));
+	if (ret != NULL) {
+		os_page_id(ret, size,
+		    os_overcommits ? "jemalloc_pg_overcommit" : "jemalloc_pg");
+	}
 #endif
 	return ret;
 }

From 12b33ed8f1a776ea36a5bafa14c65461b9efa64d Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Wed, 18 Feb 2026 17:39:57 -0800
Subject: [PATCH 2574/2608] Fix wrong mutex stats in json-formatted malloc
 stats

During mutex stats emit, derived counters are not emitted for json.
Yet the array indexing counter should still be increased to skip
derived elements in the output, which was not. This commit fixes it.
---
 src/stats.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index be70a6fc..22b412bd 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -260,10 +260,10 @@ mutex_stats_emit(emitter_t *emitter, emitter_row_t *row,
 #define OP(counter, type, human, derived, base_counter)                        \
 	if (!derived) {                                                        \
 		col = &col_##type[k_##type];                                   \
-		++k_##type;                                                    \
 		emitter_json_kv(emitter, #counter, EMITTER_TYPE_##type,        \
 		    (const void *)&col->bool_val);                             \
-	}
+	}                                                                      \
+	++k_##type;
 	MUTEX_PROF_COUNTERS;
 #undef OP
 #undef EMITTER_TYPE_uint32_t

From c73ab1c2ff9c47ad56c2d550b7481bbc80119bcb Mon Sep 17 00:00:00 2001
From: guangli-dai <gdai@meta.com>
Date: Sat, 21 Feb 2026 11:36:55 -0800
Subject: [PATCH 2575/2608] Add a test to check the output in JSON-based stats
 is consistent with mallctl results.

---
 Makefile.in            |   1 +
 test/unit/json_stats.c | 243 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 244 insertions(+)
 create mode 100644 test/unit/json_stats.c

diff --git a/Makefile.in b/Makefile.in
index 9db36530..f916ad71 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -240,6 +240,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/junk.c \
 	$(srcroot)test/unit/junk_alloc.c \
 	$(srcroot)test/unit/junk_free.c \
+	$(srcroot)test/unit/json_stats.c \
 	$(srcroot)test/unit/log.c \
 	$(srcroot)test/unit/mallctl.c \
 	$(srcroot)test/unit/malloc_conf_2.c \
diff --git a/test/unit/json_stats.c b/test/unit/json_stats.c
new file mode 100644
index 00000000..ea8a170b
--- /dev/null
+++ b/test/unit/json_stats.c
@@ -0,0 +1,243 @@
+#include "test/jemalloc_test.h"
+
+typedef struct {
+	char  *buf;
+	size_t len;
+	size_t capacity;
+} stats_buf_t;
+
+static void
+stats_buf_init(stats_buf_t *sbuf) {
+	/* 1MB buffer should be enough since per-arena stats are omitted. */
+	sbuf->capacity = 1 << 20;
+	sbuf->buf = mallocx(sbuf->capacity, MALLOCX_TCACHE_NONE);
+	assert_ptr_not_null(sbuf->buf, "Failed to allocate stats buffer");
+	sbuf->len = 0;
+	sbuf->buf[0] = '\0';
+}
+
+static void
+stats_buf_fini(stats_buf_t *sbuf) {
+	dallocx(sbuf->buf, MALLOCX_TCACHE_NONE);
+}
+
+static void
+stats_buf_write_cb(void *opaque, const char *str) {
+	stats_buf_t *sbuf = (stats_buf_t *)opaque;
+	size_t       slen = strlen(str);
+
+	if (sbuf->len + slen + 1 > sbuf->capacity) {
+		return;
+	}
+	memcpy(&sbuf->buf[sbuf->len], str, slen + 1);
+	sbuf->len += slen;
+}
+
+static bool
+json_extract_uint64(const char *json, const char *key, uint64_t *result) {
+	char   search_key[128];
+	size_t key_len;
+
+	key_len = snprintf(search_key, sizeof(search_key), "\"%s\":", key);
+	if (key_len >= sizeof(search_key)) {
+		return true;
+	}
+
+	const char *pos = strstr(json, search_key);
+	if (pos == NULL) {
+		return true;
+	}
+
+	pos += key_len;
+	while (*pos == ' ' || *pos == '\t' || *pos == '\n') {
+		pos++;
+	}
+
+	char    *endptr;
+	uint64_t value = strtoull(pos, &endptr, 10);
+	if (endptr == pos) {
+		return true;
+	}
+
+	*result = value;
+	return false;
+}
+
+static const char *
+json_find_section(const char *json, const char *section_name) {
+	char   search_pattern[128];
+	size_t pattern_len;
+
+	pattern_len = snprintf(
+	    search_pattern, sizeof(search_pattern), "\"%s\":", section_name);
+	if (pattern_len >= sizeof(search_pattern)) {
+		return NULL;
+	}
+
+	return strstr(json, search_pattern);
+}
+
+static void
+verify_mutex_json(const char *mutexes_section, const char *mallctl_prefix,
+    const char *mutex_name) {
+	char   mallctl_path[128];
+	size_t sz;
+
+	const char *mutex_section = json_find_section(
+	    mutexes_section, mutex_name);
+	expect_ptr_not_null(mutex_section,
+	    "Could not find %s mutex section in JSON", mutex_name);
+
+	uint64_t ctl_num_ops, ctl_num_wait, ctl_num_spin_acq;
+	uint64_t ctl_num_owner_switch, ctl_total_wait_time, ctl_max_wait_time;
+	uint32_t ctl_max_num_thds;
+
+	sz = sizeof(uint64_t);
+	snprintf(mallctl_path, sizeof(mallctl_path), "%s.%s.num_ops",
+	    mallctl_prefix, mutex_name);
+	expect_d_eq(mallctl(mallctl_path, &ctl_num_ops, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure for %s", mallctl_path);
+
+	snprintf(mallctl_path, sizeof(mallctl_path), "%s.%s.num_wait",
+	    mallctl_prefix, mutex_name);
+	expect_d_eq(mallctl(mallctl_path, &ctl_num_wait, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure for %s", mallctl_path);
+
+	snprintf(mallctl_path, sizeof(mallctl_path), "%s.%s.num_spin_acq",
+	    mallctl_prefix, mutex_name);
+	expect_d_eq(mallctl(mallctl_path, &ctl_num_spin_acq, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure for %s", mallctl_path);
+
+	snprintf(mallctl_path, sizeof(mallctl_path), "%s.%s.num_owner_switch",
+	    mallctl_prefix, mutex_name);
+	expect_d_eq(mallctl(mallctl_path, &ctl_num_owner_switch, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure for %s", mallctl_path);
+
+	snprintf(mallctl_path, sizeof(mallctl_path), "%s.%s.total_wait_time",
+	    mallctl_prefix, mutex_name);
+	expect_d_eq(mallctl(mallctl_path, &ctl_total_wait_time, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure for %s", mallctl_path);
+
+	snprintf(mallctl_path, sizeof(mallctl_path), "%s.%s.max_wait_time",
+	    mallctl_prefix, mutex_name);
+	expect_d_eq(mallctl(mallctl_path, &ctl_max_wait_time, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure for %s", mallctl_path);
+
+	sz = sizeof(uint32_t);
+	snprintf(mallctl_path, sizeof(mallctl_path), "%s.%s.max_num_thds",
+	    mallctl_prefix, mutex_name);
+	expect_d_eq(mallctl(mallctl_path, &ctl_max_num_thds, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure for %s", mallctl_path);
+
+	uint64_t json_num_ops, json_num_wait, json_num_spin_acq;
+	uint64_t json_num_owner_switch, json_total_wait_time,
+	    json_max_wait_time;
+	uint64_t json_max_num_thds;
+
+	expect_false(
+	    json_extract_uint64(mutex_section, "num_ops", &json_num_ops),
+	    "%s: num_ops not found in JSON", mutex_name);
+	expect_false(
+	    json_extract_uint64(mutex_section, "num_wait", &json_num_wait),
+	    "%s: num_wait not found in JSON", mutex_name);
+	expect_false(json_extract_uint64(
+	                 mutex_section, "num_spin_acq", &json_num_spin_acq),
+	    "%s: num_spin_acq not found in JSON", mutex_name);
+	expect_false(json_extract_uint64(mutex_section, "num_owner_switch",
+	                 &json_num_owner_switch),
+	    "%s: num_owner_switch not found in JSON", mutex_name);
+	expect_false(json_extract_uint64(mutex_section, "total_wait_time",
+	                 &json_total_wait_time),
+	    "%s: total_wait_time not found in JSON", mutex_name);
+	expect_false(json_extract_uint64(
+	                 mutex_section, "max_wait_time", &json_max_wait_time),
+	    "%s: max_wait_time not found in JSON", mutex_name);
+	expect_false(json_extract_uint64(
+	                 mutex_section, "max_num_thds", &json_max_num_thds),
+	    "%s: max_num_thds not found in JSON", mutex_name);
+
+	expect_u64_eq(json_num_ops, ctl_num_ops,
+	    "%s: JSON num_ops doesn't match mallctl", mutex_name);
+	expect_u64_eq(json_num_wait, ctl_num_wait,
+	    "%s: JSON num_wait doesn't match mallctl", mutex_name);
+	expect_u64_eq(json_num_spin_acq, ctl_num_spin_acq,
+	    "%s: JSON num_spin_acq doesn't match mallctl", mutex_name);
+	expect_u64_eq(json_num_owner_switch, ctl_num_owner_switch,
+	    "%s: JSON num_owner_switch doesn't match mallctl", mutex_name);
+	expect_u64_eq(json_total_wait_time, ctl_total_wait_time,
+	    "%s: JSON total_wait_time doesn't match mallctl", mutex_name);
+	expect_u64_eq(json_max_wait_time, ctl_max_wait_time,
+	    "%s: JSON max_wait_time doesn't match mallctl", mutex_name);
+	expect_u32_eq((uint32_t)json_max_num_thds, ctl_max_num_thds,
+	    "%s: JSON max_num_thds doesn't match mallctl", mutex_name);
+}
+
+static const char  *global_mutex_names[] = {"background_thread",
+     "max_per_bg_thd", "ctl", "prof", "prof_thds_data", "prof_dump",
+     "prof_recent_alloc", "prof_recent_dump", "prof_stats"};
+static const size_t num_global_mutexes = sizeof(global_mutex_names)
+    / sizeof(global_mutex_names[0]);
+
+static const char  *arena_mutex_names[] = {"large", "extent_avail",
+     "extents_dirty", "extents_muzzy", "extents_retained", "decay_dirty",
+     "decay_muzzy", "base", "tcache_list", "hpa_shard", "hpa_shard_grow",
+     "hpa_sec"};
+static const size_t num_arena_mutexes = sizeof(arena_mutex_names)
+    / sizeof(arena_mutex_names[0]);
+
+TEST_BEGIN(test_json_stats_mutexes) {
+	test_skip_if(!config_stats);
+
+	uint64_t epoch;
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sizeof(epoch)),
+	    0, "Unexpected mallctl() failure");
+
+	stats_buf_t sbuf;
+	stats_buf_init(&sbuf);
+	/* "J" for JSON format, "a" to omit per-arena stats. */
+	malloc_stats_print(stats_buf_write_cb, &sbuf, "Ja");
+
+	/* Verify global mutexes under stats.mutexes. */
+	const char *global_mutexes_section = json_find_section(
+	    sbuf.buf, "mutexes");
+	expect_ptr_not_null(global_mutexes_section,
+	    "Could not find global mutexes section in JSON output");
+
+	for (size_t i = 0; i < num_global_mutexes; i++) {
+		verify_mutex_json(global_mutexes_section, "stats.mutexes",
+		    global_mutex_names[i]);
+	}
+
+	/* Verify arena mutexes under stats.arenas.merged.mutexes. */
+	const char *arenas_section = json_find_section(
+	    sbuf.buf, "stats.arenas");
+	expect_ptr_not_null(arenas_section,
+	    "Could not find stats.arenas section in JSON output");
+
+	const char *merged_section = json_find_section(
+	    arenas_section, "merged");
+	expect_ptr_not_null(
+	    merged_section, "Could not find merged section in JSON output");
+
+	const char *arena_mutexes_section = json_find_section(
+	    merged_section, "mutexes");
+	expect_ptr_not_null(arena_mutexes_section,
+	    "Could not find arena mutexes section in JSON output");
+
+	for (size_t i = 0; i < num_arena_mutexes; i++) {
+		/*
+		 * MALLCTL_ARENAS_ALL is 4096 representing all arenas in
+		 * mallctl queries.
+		 */
+		verify_mutex_json(arena_mutexes_section,
+		    "stats.arenas.4096.mutexes", arena_mutex_names[i]);
+	}
+
+	stats_buf_fini(&sbuf);
+}
+TEST_END
+
+int
+main(void) {
+	return test(test_json_stats_mutexes);
+}

From 1cc563f531ae26ffa17f7afb3568cf773d80550a Mon Sep 17 00:00:00 2001
From: Carl Shapiro <cshapiro@meta.com>
Date: Mon, 23 Feb 2026 23:31:12 -0800
Subject: [PATCH 2576/2608] Move bin functions from arena.c to bin.c

This is a clean-up change that gives the bin functions implemented in
the area code a prefix of bin_ and moves them into the bin code.

To further decouple the bin code from the arena code, bin functions
that had taken an arena_t to check arena_is_auto now take an is_auto
parameter instead.
---
 include/jemalloc/internal/arena_externs.h   |   6 -
 include/jemalloc/internal/arena_inlines_b.h |   8 +-
 include/jemalloc/internal/bin.h             |  38 +++
 src/arena.c                                 | 308 ++------------------
 src/bin.c                                   | 263 +++++++++++++++++
 src/large.c                                 |   4 +-
 src/tcache.c                                |   4 +-
 7 files changed, 326 insertions(+), 305 deletions(-)

diff --git a/include/jemalloc/internal/arena_externs.h b/include/jemalloc/internal/arena_externs.h
index 1d004635..39794b3e 100644
--- a/include/jemalloc/internal/arena_externs.h
+++ b/include/jemalloc/internal/arena_externs.h
@@ -79,10 +79,6 @@ void arena_dalloc_promoted(
     tsdn_t *tsdn, void *ptr, tcache_t *tcache, bool slow_path);
 void arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab);
 
-void arena_dalloc_bin_locked_handle_newly_empty(
-    tsdn_t *tsdn, arena_t *arena, edata_t *slab, bin_t *bin);
-void arena_dalloc_bin_locked_handle_newly_nonempty(
-    tsdn_t *tsdn, arena_t *arena, edata_t *slab, bin_t *bin);
 void  arena_dalloc_small(tsdn_t *tsdn, void *ptr);
 void  arena_ptr_array_flush(tsd_t *tsd, szind_t binind,
      cache_bin_ptr_array_t *arr, unsigned nflush, bool small,
@@ -111,8 +107,6 @@ void     arena_nthreads_dec(arena_t *arena, bool internal);
 arena_t *arena_new(tsdn_t *tsdn, unsigned ind, const arena_config_t *config);
 bool     arena_init_huge(tsdn_t *tsdn, arena_t *a0);
 arena_t *arena_choose_huge(tsd_t *tsd);
-bin_t   *arena_bin_choose(
-      tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned *binshard);
 size_t arena_fill_small_fresh(tsdn_t *tsdn, arena_t *arena, szind_t binind,
     void **ptrs, size_t nfill, bool zero);
 bool   arena_boot(sc_data_t *sc_data, base_t *base, bool hpa);
diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index 6276deaa..a0caf586 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -609,12 +609,12 @@ arena_dalloc_bin_locked_step(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
 
 	unsigned nfree = edata_nfree_get(slab);
 	if (nfree == bin_info->nregs) {
-		arena_dalloc_bin_locked_handle_newly_empty(
-		    tsdn, arena, slab, bin);
+		bin_dalloc_locked_handle_newly_empty(
+		    tsdn, arena_is_auto(arena), slab, bin);
 		return true;
 	} else if (nfree == 1 && slab != bin->slabcur) {
-		arena_dalloc_bin_locked_handle_newly_nonempty(
-		    tsdn, arena, slab, bin);
+		bin_dalloc_locked_handle_newly_nonempty(
+		    tsdn, arena_is_auto(arena), slab, bin);
 	}
 	return false;
 }
diff --git a/include/jemalloc/internal/bin.h b/include/jemalloc/internal/bin.h
index 05a2f845..51d4c89e 100644
--- a/include/jemalloc/internal/bin.h
+++ b/include/jemalloc/internal/bin.h
@@ -2,6 +2,7 @@
 #define JEMALLOC_INTERNAL_BIN_H
 
 #include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/bin_info.h"
 #include "jemalloc/internal/bin_stats.h"
 #include "jemalloc/internal/bin_types.h"
 #include "jemalloc/internal/edata.h"
@@ -61,6 +62,43 @@ void bin_prefork(tsdn_t *tsdn, bin_t *bin);
 void bin_postfork_parent(tsdn_t *tsdn, bin_t *bin);
 void bin_postfork_child(tsdn_t *tsdn, bin_t *bin);
 
+/* Slab region allocation. */
+void *bin_slab_reg_alloc(edata_t *slab, const bin_info_t *bin_info);
+void  bin_slab_reg_alloc_batch(
+     edata_t *slab, const bin_info_t *bin_info, unsigned cnt, void **ptrs);
+
+/* Slab list management. */
+void     bin_slabs_nonfull_insert(bin_t *bin, edata_t *slab);
+void     bin_slabs_nonfull_remove(bin_t *bin, edata_t *slab);
+edata_t *bin_slabs_nonfull_tryget(bin_t *bin);
+void     bin_slabs_full_insert(bool is_auto, bin_t *bin, edata_t *slab);
+void     bin_slabs_full_remove(bool is_auto, bin_t *bin, edata_t *slab);
+
+/* Slab association / demotion. */
+void bin_dissociate_slab(bool is_auto, edata_t *slab, bin_t *bin);
+void bin_lower_slab(tsdn_t *tsdn, bool is_auto, edata_t *slab, bin_t *bin);
+
+/* Deallocation helpers (called under bin lock). */
+void bin_dalloc_slab_prepare(tsdn_t *tsdn, edata_t *slab, bin_t *bin);
+void bin_dalloc_locked_handle_newly_empty(
+    tsdn_t *tsdn, bool is_auto, edata_t *slab, bin_t *bin);
+void bin_dalloc_locked_handle_newly_nonempty(
+    tsdn_t *tsdn, bool is_auto, edata_t *slab, bin_t *bin);
+
+/* Slabcur refill and allocation. */
+void  bin_refill_slabcur_with_fresh_slab(tsdn_t *tsdn, bin_t *bin,
+    szind_t binind, edata_t *fresh_slab);
+void *bin_malloc_with_fresh_slab(tsdn_t *tsdn, bin_t *bin,
+    szind_t binind, edata_t *fresh_slab);
+bool  bin_refill_slabcur_no_fresh_slab(tsdn_t *tsdn, bool is_auto,
+    bin_t *bin);
+void *bin_malloc_no_fresh_slab(tsdn_t *tsdn, bool is_auto, bin_t *bin,
+    szind_t binind);
+
+/* Bin selection. */
+bin_t *bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
+    unsigned *binshard_p);
+
 /* Stats. */
 static inline void
 bin_stats_merge(tsdn_t *tsdn, bin_stats_data_t *dst_bin_stats, bin_t *bin) {
diff --git a/src/arena.c b/src/arena.c
index 5b144c63..338cc330 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -66,8 +66,6 @@ const arena_config_t arena_config_default = {
 
 static bool arena_decay_dirty(
     tsdn_t *tsdn, arena_t *arena, bool is_background_thread, bool all);
-static void arena_bin_lower_slab(
-    tsdn_t *tsdn, arena_t *arena, edata_t *slab, bin_t *bin);
 static void arena_maybe_do_deferred_work(
     tsdn_t *tsdn, arena_t *arena, decay_t *decay, size_t npages_new);
 
@@ -241,71 +239,6 @@ arena_handle_deferred_work(tsdn_t *tsdn, arena_t *arena) {
 	arena_background_thread_inactivity_check(tsdn, arena, false);
 }
 
-static void *
-arena_slab_reg_alloc(edata_t *slab, const bin_info_t *bin_info) {
-	void        *ret;
-	slab_data_t *slab_data = edata_slab_data_get(slab);
-	size_t       regind;
-
-	assert(edata_nfree_get(slab) > 0);
-	assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info));
-
-	regind = bitmap_sfu(slab_data->bitmap, &bin_info->bitmap_info);
-	ret = (void *)((byte_t *)edata_addr_get(slab)
-	    + (uintptr_t)(bin_info->reg_size * regind));
-	edata_nfree_dec(slab);
-	return ret;
-}
-
-static void
-arena_slab_reg_alloc_batch(
-    edata_t *slab, const bin_info_t *bin_info, unsigned cnt, void **ptrs) {
-	slab_data_t *slab_data = edata_slab_data_get(slab);
-
-	assert(edata_nfree_get(slab) >= cnt);
-	assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info));
-
-#if (!defined JEMALLOC_INTERNAL_POPCOUNTL) || (defined BITMAP_USE_TREE)
-	for (unsigned i = 0; i < cnt; i++) {
-		size_t regind = bitmap_sfu(
-		    slab_data->bitmap, &bin_info->bitmap_info);
-		*(ptrs + i) = (void *)((uintptr_t)edata_addr_get(slab)
-		    + (uintptr_t)(bin_info->reg_size * regind));
-	}
-#else
-	unsigned group = 0;
-	bitmap_t g = slab_data->bitmap[group];
-	unsigned i = 0;
-	while (i < cnt) {
-		while (g == 0) {
-			g = slab_data->bitmap[++group];
-		}
-		size_t shift = group << LG_BITMAP_GROUP_NBITS;
-		size_t pop = popcount_lu(g);
-		if (pop > (cnt - i)) {
-			pop = cnt - i;
-		}
-
-		/*
-		 * Load from memory locations only once, outside the
-		 * hot loop below.
-		 */
-		uintptr_t base = (uintptr_t)edata_addr_get(slab);
-		uintptr_t regsize = (uintptr_t)bin_info->reg_size;
-		while (pop--) {
-			size_t bit = cfs_lu(&g);
-			size_t regind = shift + bit;
-			/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
-			*(ptrs + i) = (void *)(base + regsize * regind);
-
-			i++;
-		}
-		slab_data->bitmap[group] = g;
-	}
-#endif
-	edata_nfree_sub(slab, cnt);
-}
-
 static void
 arena_large_malloc_stats_update(tsdn_t *tsdn, arena_t *arena, size_t usize) {
 	cassert(config_stats);
@@ -622,58 +555,6 @@ arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, edata_t *slab) {
 	}
 }
 
-static void
-arena_bin_slabs_nonfull_insert(bin_t *bin, edata_t *slab) {
-	assert(edata_nfree_get(slab) > 0);
-	edata_heap_insert(&bin->slabs_nonfull, slab);
-	if (config_stats) {
-		bin->stats.nonfull_slabs++;
-	}
-}
-
-static void
-arena_bin_slabs_nonfull_remove(bin_t *bin, edata_t *slab) {
-	edata_heap_remove(&bin->slabs_nonfull, slab);
-	if (config_stats) {
-		bin->stats.nonfull_slabs--;
-	}
-}
-
-static edata_t *
-arena_bin_slabs_nonfull_tryget(bin_t *bin) {
-	edata_t *slab = edata_heap_remove_first(&bin->slabs_nonfull);
-	if (slab == NULL) {
-		return NULL;
-	}
-	if (config_stats) {
-		bin->stats.reslabs++;
-		bin->stats.nonfull_slabs--;
-	}
-	return slab;
-}
-
-static void
-arena_bin_slabs_full_insert(arena_t *arena, bin_t *bin, edata_t *slab) {
-	assert(edata_nfree_get(slab) == 0);
-	/*
-	 *  Tracking extents is required by arena_reset, which is not allowed
-	 *  for auto arenas.  Bypass this step to avoid touching the edata
-	 *  linkage (often results in cache misses) for auto arenas.
-	 */
-	if (arena_is_auto(arena)) {
-		return;
-	}
-	edata_list_active_append(&bin->slabs_full, slab);
-}
-
-static void
-arena_bin_slabs_full_remove(arena_t *arena, bin_t *bin, edata_t *slab) {
-	if (arena_is_auto(arena)) {
-		return;
-	}
-	edata_list_active_remove(&bin->slabs_full, slab);
-}
-
 static void
 arena_bin_reset(tsd_t *tsd, arena_t *arena, bin_t *bin) {
 	edata_t *slab;
@@ -694,7 +575,7 @@ arena_bin_reset(tsd_t *tsd, arena_t *arena, bin_t *bin) {
 	}
 	for (slab = edata_list_active_first(&bin->slabs_full); slab != NULL;
 	    slab = edata_list_active_first(&bin->slabs_full)) {
-		arena_bin_slabs_full_remove(arena, bin, slab);
+		bin_slabs_full_remove(false, bin, slab);
 		malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
 		arena_slab_dalloc(tsd_tsdn(tsd), arena, slab);
 		malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
@@ -985,73 +866,6 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 	return slab;
 }
 
-/*
- * Before attempting the _with_fresh_slab approaches below, the _no_fresh_slab
- * variants (i.e. through slabcur and nonfull) must be tried first.
- */
-static void
-arena_bin_refill_slabcur_with_fresh_slab(tsdn_t *tsdn, arena_t *arena,
-    bin_t *bin, szind_t binind, edata_t *fresh_slab) {
-	malloc_mutex_assert_owner(tsdn, &bin->lock);
-	/* Only called after slabcur and nonfull both failed. */
-	assert(bin->slabcur == NULL);
-	assert(edata_heap_first(&bin->slabs_nonfull) == NULL);
-	assert(fresh_slab != NULL);
-
-	/* A new slab from arena_slab_alloc() */
-	assert(edata_nfree_get(fresh_slab) == bin_infos[binind].nregs);
-	if (config_stats) {
-		bin->stats.nslabs++;
-		bin->stats.curslabs++;
-	}
-	bin->slabcur = fresh_slab;
-}
-
-/* Refill slabcur and then alloc using the fresh slab */
-static void *
-arena_bin_malloc_with_fresh_slab(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    szind_t binind, edata_t *fresh_slab) {
-	malloc_mutex_assert_owner(tsdn, &bin->lock);
-	arena_bin_refill_slabcur_with_fresh_slab(
-	    tsdn, arena, bin, binind, fresh_slab);
-
-	return arena_slab_reg_alloc(bin->slabcur, &bin_infos[binind]);
-}
-
-static bool
-arena_bin_refill_slabcur_no_fresh_slab(
-    tsdn_t *tsdn, arena_t *arena, bin_t *bin) {
-	malloc_mutex_assert_owner(tsdn, &bin->lock);
-	/* Only called after arena_slab_reg_alloc[_batch] failed. */
-	assert(bin->slabcur == NULL || edata_nfree_get(bin->slabcur) == 0);
-
-	if (bin->slabcur != NULL) {
-		arena_bin_slabs_full_insert(arena, bin, bin->slabcur);
-	}
-
-	/* Look for a usable slab. */
-	bin->slabcur = arena_bin_slabs_nonfull_tryget(bin);
-	assert(bin->slabcur == NULL || edata_nfree_get(bin->slabcur) > 0);
-
-	return (bin->slabcur == NULL);
-}
-
-bin_t *
-arena_bin_choose(
-    tsdn_t *tsdn, arena_t *arena, szind_t binind, unsigned *binshard_p) {
-	unsigned binshard;
-	if (tsdn_null(tsdn) || tsd_arena_get(tsdn_tsd(tsdn)) == NULL) {
-		binshard = 0;
-	} else {
-		binshard = tsd_binshardsp_get(tsdn_tsd(tsdn))->binshard[binind];
-	}
-	assert(binshard < bin_infos[binind].n_shards);
-	if (binshard_p != NULL) {
-		*binshard_p = binshard;
-	}
-	return arena_get_bin(arena, binind, binshard);
-}
-
 cache_bin_sz_t
 arena_ptr_array_fill_small(tsdn_t *tsdn, arena_t *arena, szind_t binind,
     cache_bin_ptr_array_t *arr, const cache_bin_sz_t nfill_min,
@@ -1088,9 +902,10 @@ arena_ptr_array_fill_small(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 	bool           made_progress = true;
 	edata_t       *fresh_slab = NULL;
 	bool           alloc_and_retry = false;
+	bool           is_auto = arena_is_auto(arena);
 	cache_bin_sz_t filled = 0;
 	unsigned       binshard;
-	bin_t         *bin = arena_bin_choose(tsdn, arena, binind, &binshard);
+	bin_t         *bin = bin_choose(tsdn, arena, binind, &binshard);
 
 label_refill:
 	malloc_mutex_lock(tsdn, &bin->lock);
@@ -1109,22 +924,22 @@ label_refill:
 				cnt = nfill_min - filled;
 			}
 
-			arena_slab_reg_alloc_batch(
+			bin_slab_reg_alloc_batch(
 			    slabcur, bin_info, cnt, &arr->ptr[filled]);
 			made_progress = true;
 			filled += cnt;
 			continue;
 		}
 		/* Next try refilling slabcur from nonfull slabs. */
-		if (!arena_bin_refill_slabcur_no_fresh_slab(tsdn, arena, bin)) {
+		if (!bin_refill_slabcur_no_fresh_slab(tsdn, is_auto, bin)) {
 			assert(bin->slabcur != NULL);
 			continue;
 		}
 
 		/* Then see if a new slab was reserved already. */
 		if (fresh_slab != NULL) {
-			arena_bin_refill_slabcur_with_fresh_slab(
-			    tsdn, arena, bin, binind, fresh_slab);
+			bin_refill_slabcur_with_fresh_slab(
+			    tsdn, bin, binind, fresh_slab);
 			assert(bin->slabcur != NULL);
 			fresh_slab = NULL;
 			continue;
@@ -1193,7 +1008,7 @@ arena_fill_small_fresh(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 
 	const bool manual_arena = !arena_is_auto(arena);
 	unsigned   binshard;
-	bin_t     *bin = arena_bin_choose(tsdn, arena, binind, &binshard);
+	bin_t     *bin = bin_choose(tsdn, arena, binind, &binshard);
 
 	size_t              nslab = 0;
 	size_t              filled = 0;
@@ -1212,7 +1027,7 @@ arena_fill_small_fresh(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 			batch = nregs;
 		}
 		assert(batch > 0);
-		arena_slab_reg_alloc_batch(
+		bin_slab_reg_alloc_batch(
 		    slab, bin_info, (unsigned)batch, &ptrs[filled]);
 		assert(edata_addr_get(slab) == ptrs[filled]);
 		if (zero) {
@@ -1233,7 +1048,7 @@ arena_fill_small_fresh(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 	 * iff slab != NULL.
 	 */
 	if (slab != NULL) {
-		arena_bin_lower_slab(tsdn, arena, slab, bin);
+		bin_lower_slab(tsdn, !manual_arena, slab, bin);
 	}
 	if (manual_arena) {
 		edata_list_active_concat(&bin->slabs_full, &fulls);
@@ -1252,35 +1067,18 @@ arena_fill_small_fresh(tsdn_t *tsdn, arena_t *arena, szind_t binind,
 	return filled;
 }
 
-/*
- * Without allocating a new slab, try arena_slab_reg_alloc() and re-fill
- * bin->slabcur if necessary.
- */
-static void *
-arena_bin_malloc_no_fresh_slab(
-    tsdn_t *tsdn, arena_t *arena, bin_t *bin, szind_t binind) {
-	malloc_mutex_assert_owner(tsdn, &bin->lock);
-	if (bin->slabcur == NULL || edata_nfree_get(bin->slabcur) == 0) {
-		if (arena_bin_refill_slabcur_no_fresh_slab(tsdn, arena, bin)) {
-			return NULL;
-		}
-	}
-
-	assert(bin->slabcur != NULL && edata_nfree_get(bin->slabcur) > 0);
-	return arena_slab_reg_alloc(bin->slabcur, &bin_infos[binind]);
-}
-
 static void *
 arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
 	assert(binind < SC_NBINS);
 	const bin_info_t *bin_info = &bin_infos[binind];
 	size_t            usize = sz_index2size(binind);
+	bool              is_auto = arena_is_auto(arena);
 	unsigned          binshard;
-	bin_t *bin = arena_bin_choose(tsdn, arena, binind, &binshard);
+	bin_t *bin = bin_choose(tsdn, arena, binind, &binshard);
 
 	malloc_mutex_lock(tsdn, &bin->lock);
 	edata_t *fresh_slab = NULL;
-	void    *ret = arena_bin_malloc_no_fresh_slab(tsdn, arena, bin, binind);
+	void    *ret = bin_malloc_no_fresh_slab(tsdn, is_auto, bin, binind);
 	if (ret == NULL) {
 		malloc_mutex_unlock(tsdn, &bin->lock);
 		/******************************/
@@ -1289,15 +1087,15 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) {
 		/********************************/
 		malloc_mutex_lock(tsdn, &bin->lock);
 		/* Retry since the lock was dropped. */
-		ret = arena_bin_malloc_no_fresh_slab(tsdn, arena, bin, binind);
+		ret = bin_malloc_no_fresh_slab(tsdn, is_auto, bin, binind);
 		if (ret == NULL) {
 			if (fresh_slab == NULL) {
 				/* OOM */
 				malloc_mutex_unlock(tsdn, &bin->lock);
 				return NULL;
 			}
-			ret = arena_bin_malloc_with_fresh_slab(
-			    tsdn, arena, bin, binind, fresh_slab);
+			ret = bin_malloc_with_fresh_slab(
+			    tsdn, bin, binind, fresh_slab);
 			fresh_slab = NULL;
 		}
 	}
@@ -1366,78 +1164,6 @@ arena_palloc(tsdn_t *tsdn, arena_t *arena, size_t usize, size_t alignment,
 	}
 }
 
-static void
-arena_dissociate_bin_slab(arena_t *arena, edata_t *slab, bin_t *bin) {
-	/* Dissociate slab from bin. */
-	if (slab == bin->slabcur) {
-		bin->slabcur = NULL;
-	} else {
-		szind_t           binind = edata_szind_get(slab);
-		const bin_info_t *bin_info = &bin_infos[binind];
-
-		/*
-		 * The following block's conditional is necessary because if the
-		 * slab only contains one region, then it never gets inserted
-		 * into the non-full slabs heap.
-		 */
-		if (bin_info->nregs == 1) {
-			arena_bin_slabs_full_remove(arena, bin, slab);
-		} else {
-			arena_bin_slabs_nonfull_remove(bin, slab);
-		}
-	}
-}
-
-static void
-arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, edata_t *slab, bin_t *bin) {
-	assert(edata_nfree_get(slab) > 0);
-
-	/*
-	 * Make sure that if bin->slabcur is non-NULL, it refers to the
-	 * oldest/lowest non-full slab.  It is okay to NULL slabcur out rather
-	 * than proactively keeping it pointing at the oldest/lowest non-full
-	 * slab.
-	 */
-	if (bin->slabcur != NULL && edata_snad_comp(bin->slabcur, slab) > 0) {
-		/* Switch slabcur. */
-		if (edata_nfree_get(bin->slabcur) > 0) {
-			arena_bin_slabs_nonfull_insert(bin, bin->slabcur);
-		} else {
-			arena_bin_slabs_full_insert(arena, bin, bin->slabcur);
-		}
-		bin->slabcur = slab;
-		if (config_stats) {
-			bin->stats.reslabs++;
-		}
-	} else {
-		arena_bin_slabs_nonfull_insert(bin, slab);
-	}
-}
-
-static void
-arena_dalloc_bin_slab_prepare(tsdn_t *tsdn, edata_t *slab, bin_t *bin) {
-	malloc_mutex_assert_owner(tsdn, &bin->lock);
-
-	assert(slab != bin->slabcur);
-	if (config_stats) {
-		bin->stats.curslabs--;
-	}
-}
-
-void
-arena_dalloc_bin_locked_handle_newly_empty(
-    tsdn_t *tsdn, arena_t *arena, edata_t *slab, bin_t *bin) {
-	arena_dissociate_bin_slab(arena, slab, bin);
-	arena_dalloc_bin_slab_prepare(tsdn, slab, bin);
-}
-
-void
-arena_dalloc_bin_locked_handle_newly_nonempty(
-    tsdn_t *tsdn, arena_t *arena, edata_t *slab, bin_t *bin) {
-	arena_bin_slabs_full_remove(arena, bin, slab);
-	arena_bin_lower_slab(tsdn, arena, slab, bin);
-}
-
 static void
 arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, edata_t *edata, void *ptr) {
 	szind_t  binind = edata_szind_get(edata);
@@ -1637,7 +1363,7 @@ arena_ptr_array_flush_impl_small(tsdn_t *tsdn, szind_t binind,
 		 * thread's arena, so the stats didn't get merged.
 		 * Manually do so now.
 		 */
-		bin_t *bin = arena_bin_choose(tsdn, stats_arena, binind, NULL);
+		bin_t *bin = bin_choose(tsdn, stats_arena, binind, NULL);
 		malloc_mutex_lock(tsdn, &bin->lock);
 		bin->stats.nflushes++;
 		bin->stats.nrequests += (*merge_stats)->nrequests;
diff --git a/src/bin.c b/src/bin.c
index a11b108e..6bab4b22 100644
--- a/src/bin.c
+++ b/src/bin.c
@@ -67,3 +67,266 @@ void
 bin_postfork_child(tsdn_t *tsdn, bin_t *bin) {
 	malloc_mutex_postfork_child(tsdn, &bin->lock);
 }
+
+void *
+bin_slab_reg_alloc(edata_t *slab, const bin_info_t *bin_info) {
+	void        *ret;
+	slab_data_t *slab_data = edata_slab_data_get(slab);
+	size_t       regind;
+
+	assert(edata_nfree_get(slab) > 0);
+	assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info));
+
+	regind = bitmap_sfu(slab_data->bitmap, &bin_info->bitmap_info);
+	ret = (void *)((byte_t *)edata_addr_get(slab)
+	    + (uintptr_t)(bin_info->reg_size * regind));
+	edata_nfree_dec(slab);
+	return ret;
+}
+
+void
+bin_slab_reg_alloc_batch(
+    edata_t *slab, const bin_info_t *bin_info, unsigned cnt, void **ptrs) {
+	slab_data_t *slab_data = edata_slab_data_get(slab);
+
+	assert(edata_nfree_get(slab) >= cnt);
+	assert(!bitmap_full(slab_data->bitmap, &bin_info->bitmap_info));
+
+#if (!defined JEMALLOC_INTERNAL_POPCOUNTL) || (defined BITMAP_USE_TREE)
+	for (unsigned i = 0; i < cnt; i++) {
+		size_t regind = bitmap_sfu(
+		    slab_data->bitmap, &bin_info->bitmap_info);
+		*(ptrs + i) = (void *)((uintptr_t)edata_addr_get(slab)
+		    + (uintptr_t)(bin_info->reg_size * regind));
+	}
+#else
+	unsigned group = 0;
+	bitmap_t g = slab_data->bitmap[group];
+	unsigned i = 0;
+	while (i < cnt) {
+		while (g == 0) {
+			g = slab_data->bitmap[++group];
+		}
+		size_t shift = group << LG_BITMAP_GROUP_NBITS;
+		size_t pop = popcount_lu(g);
+		if (pop > (cnt - i)) {
+			pop = cnt - i;
+		}
+
+		/*
+		 * Load from memory locations only once, outside the
+		 * hot loop below.
+		 */
+		uintptr_t base = (uintptr_t)edata_addr_get(slab);
+		uintptr_t regsize = (uintptr_t)bin_info->reg_size;
+		while (pop--) {
+			size_t bit = cfs_lu(&g);
+			size_t regind = shift + bit;
+			/* NOLINTNEXTLINE(performance-no-int-to-ptr) */
+			*(ptrs + i) = (void *)(base + regsize * regind);
+
+			i++;
+		}
+		slab_data->bitmap[group] = g;
+	}
+#endif
+	edata_nfree_sub(slab, cnt);
+}
+
+void
+bin_slabs_nonfull_insert(bin_t *bin, edata_t *slab) {
+	assert(edata_nfree_get(slab) > 0);
+	edata_heap_insert(&bin->slabs_nonfull, slab);
+	if (config_stats) {
+		bin->stats.nonfull_slabs++;
+	}
+}
+
+void
+bin_slabs_nonfull_remove(bin_t *bin, edata_t *slab) {
+	edata_heap_remove(&bin->slabs_nonfull, slab);
+	if (config_stats) {
+		bin->stats.nonfull_slabs--;
+	}
+}
+
+edata_t *
+bin_slabs_nonfull_tryget(bin_t *bin) {
+	edata_t *slab = edata_heap_remove_first(&bin->slabs_nonfull);
+	if (slab == NULL) {
+		return NULL;
+	}
+	if (config_stats) {
+		bin->stats.reslabs++;
+		bin->stats.nonfull_slabs--;
+	}
+	return slab;
+}
+
+void
+bin_slabs_full_insert(bool is_auto, bin_t *bin, edata_t *slab) {
+	assert(edata_nfree_get(slab) == 0);
+	/*
+	 *  Tracking extents is required by arena_reset, which is not allowed
+	 *  for auto arenas.  Bypass this step to avoid touching the edata
+	 *  linkage (often results in cache misses) for auto arenas.
+	 */
+	if (is_auto) {
+		return;
+	}
+	edata_list_active_append(&bin->slabs_full, slab);
+}
+
+void
+bin_slabs_full_remove(bool is_auto, bin_t *bin, edata_t *slab) {
+	if (is_auto) {
+		return;
+	}
+	edata_list_active_remove(&bin->slabs_full, slab);
+}
+
+void
+bin_dissociate_slab(bool is_auto, edata_t *slab, bin_t *bin) {
+	/* Dissociate slab from bin. */
+	if (slab == bin->slabcur) {
+		bin->slabcur = NULL;
+	} else {
+		szind_t           binind = edata_szind_get(slab);
+		const bin_info_t *bin_info = &bin_infos[binind];
+
+		/*
+		 * The following block's conditional is necessary because if the
+		 * slab only contains one region, then it never gets inserted
+		 * into the non-full slabs heap.
+		 */
+		if (bin_info->nregs == 1) {
+			bin_slabs_full_remove(is_auto, bin, slab);
+		} else {
+			bin_slabs_nonfull_remove(bin, slab);
+		}
+	}
+}
+
+void
+bin_lower_slab(tsdn_t *tsdn, bool is_auto, edata_t *slab, bin_t *bin) {
+	assert(edata_nfree_get(slab) > 0);
+
+	/*
+	 * Make sure that if bin->slabcur is non-NULL, it refers to the
+	 * oldest/lowest non-full slab.  It is okay to NULL slabcur out rather
+	 * than proactively keeping it pointing at the oldest/lowest non-full
+	 * slab.
+	 */
+	if (bin->slabcur != NULL && edata_snad_comp(bin->slabcur, slab) > 0) {
+		/* Switch slabcur. */
+		if (edata_nfree_get(bin->slabcur) > 0) {
+			bin_slabs_nonfull_insert(bin, bin->slabcur);
+		} else {
+			bin_slabs_full_insert(is_auto, bin, bin->slabcur);
+		}
+		bin->slabcur = slab;
+		if (config_stats) {
+			bin->stats.reslabs++;
+		}
+	} else {
+		bin_slabs_nonfull_insert(bin, slab);
+	}
+}
+
+void
+bin_dalloc_slab_prepare(tsdn_t *tsdn, edata_t *slab, bin_t *bin) {
+	malloc_mutex_assert_owner(tsdn, &bin->lock);
+
+	assert(slab != bin->slabcur);
+	if (config_stats) {
+		bin->stats.curslabs--;
+	}
+}
+
+void
+bin_dalloc_locked_handle_newly_empty(
+    tsdn_t *tsdn, bool is_auto, edata_t *slab, bin_t *bin) {
+	bin_dissociate_slab(is_auto, slab, bin);
+	bin_dalloc_slab_prepare(tsdn, slab, bin);
+}
+
+void
+bin_dalloc_locked_handle_newly_nonempty(
+    tsdn_t *tsdn, bool is_auto, edata_t *slab, bin_t *bin) {
+	bin_slabs_full_remove(is_auto, bin, slab);
+	bin_lower_slab(tsdn, is_auto, slab, bin);
+}
+
+void
+bin_refill_slabcur_with_fresh_slab(tsdn_t *tsdn, bin_t *bin,
+    szind_t binind, edata_t *fresh_slab) {
+	malloc_mutex_assert_owner(tsdn, &bin->lock);
+	/* Only called after slabcur and nonfull both failed. */
+	assert(bin->slabcur == NULL);
+	assert(edata_heap_first(&bin->slabs_nonfull) == NULL);
+	assert(fresh_slab != NULL);
+
+	/* A new slab from arena_slab_alloc() */
+	assert(edata_nfree_get(fresh_slab) == bin_infos[binind].nregs);
+	if (config_stats) {
+		bin->stats.nslabs++;
+		bin->stats.curslabs++;
+	}
+	bin->slabcur = fresh_slab;
+}
+
+void *
+bin_malloc_with_fresh_slab(tsdn_t *tsdn, bin_t *bin,
+    szind_t binind, edata_t *fresh_slab) {
+	malloc_mutex_assert_owner(tsdn, &bin->lock);
+	bin_refill_slabcur_with_fresh_slab(tsdn, bin, binind, fresh_slab);
+
+	return bin_slab_reg_alloc(bin->slabcur, &bin_infos[binind]);
+}
+
+bool
+bin_refill_slabcur_no_fresh_slab(tsdn_t *tsdn, bool is_auto, bin_t *bin) {
+	malloc_mutex_assert_owner(tsdn, &bin->lock);
+	/* Only called after bin_slab_reg_alloc[_batch] failed. */
+	assert(bin->slabcur == NULL || edata_nfree_get(bin->slabcur) == 0);
+
+	if (bin->slabcur != NULL) {
+		bin_slabs_full_insert(is_auto, bin, bin->slabcur);
+	}
+
+	/* Look for a usable slab. */
+	bin->slabcur = bin_slabs_nonfull_tryget(bin);
+	assert(bin->slabcur == NULL || edata_nfree_get(bin->slabcur) > 0);
+
+	return (bin->slabcur == NULL);
+}
+
+void *
+bin_malloc_no_fresh_slab(tsdn_t *tsdn, bool is_auto, bin_t *bin,
+    szind_t binind) {
+	malloc_mutex_assert_owner(tsdn, &bin->lock);
+	if (bin->slabcur == NULL || edata_nfree_get(bin->slabcur) == 0) {
+		if (bin_refill_slabcur_no_fresh_slab(tsdn, is_auto, bin)) {
+			return NULL;
+		}
+	}
+
+	assert(bin->slabcur != NULL && edata_nfree_get(bin->slabcur) > 0);
+	return bin_slab_reg_alloc(bin->slabcur, &bin_infos[binind]);
+}
+
+bin_t *
+bin_choose(tsdn_t *tsdn, arena_t *arena, szind_t binind,
+    unsigned *binshard_p) {
+	unsigned binshard;
+	if (tsdn_null(tsdn) || tsd_arena_get(tsdn_tsd(tsdn)) == NULL) {
+		binshard = 0;
+	} else {
+		binshard = tsd_binshardsp_get(tsdn_tsd(tsdn))->binshard[binind];
+	}
+	assert(binshard < bin_infos[binind].n_shards);
+	if (binshard_p != NULL) {
+		*binshard_p = binshard;
+	}
+	return arena_get_bin(arena, binind, binshard);
+}
diff --git a/src/large.c b/src/large.c
index 7cae61ae..087df99d 100644
--- a/src/large.c
+++ b/src/large.c
@@ -41,7 +41,7 @@ large_palloc(
 		return NULL;
 	}
 
-	/* See comments in arena_bin_slabs_full_insert(). */
+	/* See comments in bin_slabs_full_insert(). */
 	if (!arena_is_auto(arena)) {
 		/* Insert edata into large. */
 		malloc_mutex_lock(tsdn, &arena->large_mtx);
@@ -233,7 +233,7 @@ static void
 large_dalloc_prep_impl(
     tsdn_t *tsdn, arena_t *arena, edata_t *edata, bool locked) {
 	if (!locked) {
-		/* See comments in arena_bin_slabs_full_insert(). */
+		/* See comments in bin_slabs_full_insert(). */
 		if (!arena_is_auto(arena)) {
 			malloc_mutex_lock(tsdn, &arena->large_mtx);
 			edata_list_active_remove(&arena->large, edata);
diff --git a/src/tcache.c b/src/tcache.c
index 74ff4718..172d9320 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -218,7 +218,7 @@ tcache_gc_small_heuristic_addr_get(
     tsd_t *tsd, tcache_slow_t *tcache_slow, szind_t szind) {
 	assert(szind < SC_NBINS);
 	tsdn_t *tsdn = tsd_tsdn(tsd);
-	bin_t  *bin = arena_bin_choose(tsdn, tcache_slow->arena, szind, NULL);
+	bin_t  *bin = bin_choose(tsdn, tcache_slow->arena, szind, NULL);
 	assert(bin != NULL);
 
 	malloc_mutex_lock(tsdn, &bin->lock);
@@ -1275,7 +1275,7 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
 			continue;
 		}
 		if (i < SC_NBINS) {
-			bin_t *bin = arena_bin_choose(tsdn, arena, i, NULL);
+			bin_t *bin = bin_choose(tsdn, arena, i, NULL);
 			malloc_mutex_lock(tsdn, &bin->lock);
 			bin->stats.nrequests += cache_bin->tstats.nrequests;
 			malloc_mutex_unlock(tsdn, &bin->lock);

From 0ac9380cf1b2fe1b255a96c5d57d6eab33a78330 Mon Sep 17 00:00:00 2001
From: Carl Shapiro <cshapiro@meta.com>
Date: Mon, 23 Feb 2026 23:31:27 -0800
Subject: [PATCH 2577/2608] Move bin inline functions from arena_inlines_b.h to
 bin_inlines.h

This is a continuation of my previous clean-up change, now focusing on
the inline functions defined in header files.
---
 include/jemalloc/internal/arena_inlines_b.h | 106 +-----------------
 include/jemalloc/internal/bin_inlines.h     | 112 ++++++++++++++++++++
 src/arena.c                                 |  21 ++--
 test/unit/slab.c                            |  10 +-
 4 files changed, 131 insertions(+), 118 deletions(-)
 create mode 100644 include/jemalloc/internal/bin_inlines.h

diff --git a/include/jemalloc/internal/arena_inlines_b.h b/include/jemalloc/internal/arena_inlines_b.h
index a0caf586..bda256b9 100644
--- a/include/jemalloc/internal/arena_inlines_b.h
+++ b/include/jemalloc/internal/arena_inlines_b.h
@@ -4,6 +4,7 @@
 #include "jemalloc/internal/jemalloc_preamble.h"
 #include "jemalloc/internal/arena_externs.h"
 #include "jemalloc/internal/arena_structs.h"
+#include "jemalloc/internal/bin_inlines.h"
 #include "jemalloc/internal/div.h"
 #include "jemalloc/internal/emap.h"
 #include "jemalloc/internal/jemalloc_internal_inlines_b.h"
@@ -335,29 +336,6 @@ arena_dalloc_large(tsdn_t *tsdn, void *ptr, tcache_t *tcache, szind_t szind,
 	}
 }
 
-/* Find the region index of a pointer. */
-JEMALLOC_ALWAYS_INLINE size_t
-arena_slab_regind_impl(
-    div_info_t *div_info, szind_t binind, edata_t *slab, const void *ptr) {
-	size_t diff, regind;
-
-	/* Freeing a pointer outside the slab can cause assertion failure. */
-	assert((uintptr_t)ptr >= (uintptr_t)edata_addr_get(slab));
-	assert((uintptr_t)ptr < (uintptr_t)edata_past_get(slab));
-	/* Freeing an interior pointer can cause assertion failure. */
-	assert(((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab))
-	        % (uintptr_t)bin_infos[binind].reg_size
-	    == 0);
-
-	diff = (size_t)((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab));
-
-	/* Avoid doing division with a variable divisor. */
-	regind = div_compute(div_info, diff);
-	assert(regind < bin_infos[binind].nregs);
-	return regind;
-}
-
-/* Checks whether ptr is currently active in the arena. */
 JEMALLOC_ALWAYS_INLINE bool
 arena_tcache_dalloc_small_safety_check(tsdn_t *tsdn, void *ptr) {
 	if (!config_debug) {
@@ -367,10 +345,10 @@ arena_tcache_dalloc_small_safety_check(tsdn_t *tsdn, void *ptr) {
 	szind_t    binind = edata_szind_get(edata);
 	div_info_t div_info = arena_binind_div_info[binind];
 	/*
-	 * Calls the internal function arena_slab_regind_impl because the
+	 * Calls the internal function bin_slab_regind_impl because the
 	 * safety check does not require a lock.
 	 */
-	size_t regind = arena_slab_regind_impl(&div_info, binind, edata, ptr);
+	size_t regind = bin_slab_regind_impl(&div_info, binind, edata, ptr);
 	slab_data_t      *slab_data = edata_slab_data_get(edata);
 	const bin_info_t *bin_info = &bin_infos[binind];
 	assert(edata_nfree_get(edata) < bin_info->nregs);
@@ -551,84 +529,6 @@ arena_cache_oblivious_randomize(
 	}
 }
 
-/*
- * The dalloc bin info contains just the information that the common paths need
- * during tcache flushes.  By force-inlining these paths, and using local copies
- * of data (so that the compiler knows it's constant), we avoid a whole bunch of
- * redundant loads and stores by leaving this information in registers.
- */
-typedef struct arena_dalloc_bin_locked_info_s arena_dalloc_bin_locked_info_t;
-struct arena_dalloc_bin_locked_info_s {
-	div_info_t div_info;
-	uint32_t   nregs;
-	uint64_t   ndalloc;
-};
-
-JEMALLOC_ALWAYS_INLINE size_t
-arena_slab_regind(arena_dalloc_bin_locked_info_t *info, szind_t binind,
-    edata_t *slab, const void *ptr) {
-	size_t regind = arena_slab_regind_impl(
-	    &info->div_info, binind, slab, ptr);
-	return regind;
-}
-
-JEMALLOC_ALWAYS_INLINE void
-arena_dalloc_bin_locked_begin(
-    arena_dalloc_bin_locked_info_t *info, szind_t binind) {
-	info->div_info = arena_binind_div_info[binind];
-	info->nregs = bin_infos[binind].nregs;
-	info->ndalloc = 0;
-}
-
-/*
- * Does the deallocation work associated with freeing a single pointer (a
- * "step") in between a arena_dalloc_bin_locked begin and end call.
- *
- * Returns true if arena_slab_dalloc must be called on slab.  Doesn't do
- * stats updates, which happen during finish (this lets running counts get left
- * in a register).
- */
-JEMALLOC_ALWAYS_INLINE bool
-arena_dalloc_bin_locked_step(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    arena_dalloc_bin_locked_info_t *info, szind_t binind, edata_t *slab,
-    void *ptr) {
-	const bin_info_t *bin_info = &bin_infos[binind];
-	size_t            regind = arena_slab_regind(info, binind, slab, ptr);
-	slab_data_t      *slab_data = edata_slab_data_get(slab);
-
-	assert(edata_nfree_get(slab) < bin_info->nregs);
-	/* Freeing an unallocated pointer can cause assertion failure. */
-	assert(bitmap_get(slab_data->bitmap, &bin_info->bitmap_info, regind));
-
-	bitmap_unset(slab_data->bitmap, &bin_info->bitmap_info, regind);
-	edata_nfree_inc(slab);
-
-	if (config_stats) {
-		info->ndalloc++;
-	}
-
-	unsigned nfree = edata_nfree_get(slab);
-	if (nfree == bin_info->nregs) {
-		bin_dalloc_locked_handle_newly_empty(
-		    tsdn, arena_is_auto(arena), slab, bin);
-		return true;
-	} else if (nfree == 1 && slab != bin->slabcur) {
-		bin_dalloc_locked_handle_newly_nonempty(
-		    tsdn, arena_is_auto(arena), slab, bin);
-	}
-	return false;
-}
-
-JEMALLOC_ALWAYS_INLINE void
-arena_dalloc_bin_locked_finish(tsdn_t *tsdn, arena_t *arena, bin_t *bin,
-    arena_dalloc_bin_locked_info_t *info) {
-	if (config_stats) {
-		bin->stats.ndalloc += info->ndalloc;
-		assert(bin->stats.curregs >= (size_t)info->ndalloc);
-		bin->stats.curregs -= (size_t)info->ndalloc;
-	}
-}
-
 static inline bin_t *
 arena_get_bin(arena_t *arena, szind_t binind, unsigned binshard) {
 	bin_t *shard0 = (bin_t *)((byte_t *)arena + arena_bin_offsets[binind]);
diff --git a/include/jemalloc/internal/bin_inlines.h b/include/jemalloc/internal/bin_inlines.h
new file mode 100644
index 00000000..f4291169
--- /dev/null
+++ b/include/jemalloc/internal/bin_inlines.h
@@ -0,0 +1,112 @@
+#ifndef JEMALLOC_INTERNAL_BIN_INLINES_H
+#define JEMALLOC_INTERNAL_BIN_INLINES_H
+
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/bin.h"
+#include "jemalloc/internal/bin_info.h"
+#include "jemalloc/internal/bitmap.h"
+#include "jemalloc/internal/div.h"
+#include "jemalloc/internal/edata.h"
+#include "jemalloc/internal/sc.h"
+
+/*
+ * The dalloc bin info contains just the information that the common paths need
+ * during tcache flushes.  By force-inlining these paths, and using local copies
+ * of data (so that the compiler knows it's constant), we avoid a whole bunch of
+ * redundant loads and stores by leaving this information in registers.
+ */
+typedef struct bin_dalloc_locked_info_s bin_dalloc_locked_info_t;
+struct bin_dalloc_locked_info_s {
+	div_info_t div_info;
+	uint32_t   nregs;
+	uint64_t   ndalloc;
+};
+
+/* Find the region index of a pointer within a slab. */
+JEMALLOC_ALWAYS_INLINE size_t
+bin_slab_regind_impl(
+    div_info_t *div_info, szind_t binind, edata_t *slab, const void *ptr) {
+	size_t diff, regind;
+
+	/* Freeing a pointer outside the slab can cause assertion failure. */
+	assert((uintptr_t)ptr >= (uintptr_t)edata_addr_get(slab));
+	assert((uintptr_t)ptr < (uintptr_t)edata_past_get(slab));
+	/* Freeing an interior pointer can cause assertion failure. */
+	assert(((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab))
+	        % (uintptr_t)bin_infos[binind].reg_size
+	    == 0);
+
+	diff = (size_t)((uintptr_t)ptr - (uintptr_t)edata_addr_get(slab));
+
+	/* Avoid doing division with a variable divisor. */
+	regind = div_compute(div_info, diff);
+	assert(regind < bin_infos[binind].nregs);
+	return regind;
+}
+
+JEMALLOC_ALWAYS_INLINE size_t
+bin_slab_regind(bin_dalloc_locked_info_t *info, szind_t binind,
+    edata_t *slab, const void *ptr) {
+	size_t regind = bin_slab_regind_impl(
+	    &info->div_info, binind, slab, ptr);
+	return regind;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+bin_dalloc_locked_begin(
+    bin_dalloc_locked_info_t *info, szind_t binind) {
+	info->div_info = arena_binind_div_info[binind];
+	info->nregs = bin_infos[binind].nregs;
+	info->ndalloc = 0;
+}
+
+/*
+ * Does the deallocation work associated with freeing a single pointer (a
+ * "step") in between a bin_dalloc_locked begin and end call.
+ *
+ * Returns true if arena_slab_dalloc must be called on slab.  Doesn't do
+ * stats updates, which happen during finish (this lets running counts get left
+ * in a register).
+ */
+JEMALLOC_ALWAYS_INLINE bool
+bin_dalloc_locked_step(tsdn_t *tsdn, bool is_auto, bin_t *bin,
+    bin_dalloc_locked_info_t *info, szind_t binind, edata_t *slab,
+    void *ptr) {
+	const bin_info_t *bin_info = &bin_infos[binind];
+	size_t            regind = bin_slab_regind(info, binind, slab, ptr);
+	slab_data_t      *slab_data = edata_slab_data_get(slab);
+
+	assert(edata_nfree_get(slab) < bin_info->nregs);
+	/* Freeing an unallocated pointer can cause assertion failure. */
+	assert(bitmap_get(slab_data->bitmap, &bin_info->bitmap_info, regind));
+
+	bitmap_unset(slab_data->bitmap, &bin_info->bitmap_info, regind);
+	edata_nfree_inc(slab);
+
+	if (config_stats) {
+		info->ndalloc++;
+	}
+
+	unsigned nfree = edata_nfree_get(slab);
+	if (nfree == bin_info->nregs) {
+		bin_dalloc_locked_handle_newly_empty(
+		    tsdn, is_auto, slab, bin);
+		return true;
+	} else if (nfree == 1 && slab != bin->slabcur) {
+		bin_dalloc_locked_handle_newly_nonempty(
+		    tsdn, is_auto, slab, bin);
+	}
+	return false;
+}
+
+JEMALLOC_ALWAYS_INLINE void
+bin_dalloc_locked_finish(tsdn_t *tsdn, bin_t *bin,
+    bin_dalloc_locked_info_t *info) {
+	if (config_stats) {
+		bin->stats.ndalloc += info->ndalloc;
+		assert(bin->stats.curregs >= (size_t)info->ndalloc);
+		bin->stats.curregs -= (size_t)info->ndalloc;
+	}
+}
+
+#endif /* JEMALLOC_INTERNAL_BIN_INLINES_H */
diff --git a/src/arena.c b/src/arena.c
index 338cc330..d7c8cd1f 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -1171,11 +1171,11 @@ arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, edata_t *edata, void *ptr) {
 	bin_t   *bin = arena_get_bin(arena, binind, binshard);
 
 	malloc_mutex_lock(tsdn, &bin->lock);
-	arena_dalloc_bin_locked_info_t info;
-	arena_dalloc_bin_locked_begin(&info, binind);
-	bool ret = arena_dalloc_bin_locked_step(
-	    tsdn, arena, bin, &info, binind, edata, ptr);
-	arena_dalloc_bin_locked_finish(tsdn, arena, bin, &info);
+	bin_dalloc_locked_info_t info;
+	bin_dalloc_locked_begin(&info, binind);
+	bool ret = bin_dalloc_locked_step(
+	    tsdn, arena_is_auto(arena), bin, &info, binind, edata, ptr);
+	bin_dalloc_locked_finish(tsdn, bin, &info);
 	malloc_mutex_unlock(tsdn, &bin->lock);
 
 	if (ret) {
@@ -1330,12 +1330,13 @@ arena_ptr_array_flush_impl_small(tsdn_t *tsdn, szind_t binind,
 
 		/* Next flush objects. */
 		/* Init only to avoid used-uninitialized warning. */
-		arena_dalloc_bin_locked_info_t dalloc_bin_info = {0};
-		arena_dalloc_bin_locked_begin(&dalloc_bin_info, binind);
+		bin_dalloc_locked_info_t dalloc_bin_info = {0};
+		bin_dalloc_locked_begin(&dalloc_bin_info, binind);
 		for (unsigned i = prev_flush_start; i < flush_start; i++) {
 			void    *ptr = arr->ptr[i];
 			edata_t *edata = item_edata[i].edata;
-			if (arena_dalloc_bin_locked_step(tsdn, cur_arena,
+			if (bin_dalloc_locked_step(tsdn,
+			        arena_is_auto(cur_arena),
 			        cur_bin, &dalloc_bin_info, binind, edata,
 			        ptr)) {
 				dalloc_slabs[dalloc_count] = edata;
@@ -1343,8 +1344,8 @@ arena_ptr_array_flush_impl_small(tsdn_t *tsdn, szind_t binind,
 			}
 		}
 
-		arena_dalloc_bin_locked_finish(
-		    tsdn, cur_arena, cur_bin, &dalloc_bin_info);
+		bin_dalloc_locked_finish(
+		    tsdn, cur_bin, &dalloc_bin_info);
 		malloc_mutex_unlock(tsdn, &cur_bin->lock);
 
 		arena_decay_ticks(
diff --git a/test/unit/slab.c b/test/unit/slab.c
index 5c48e762..d98663e8 100644
--- a/test/unit/slab.c
+++ b/test/unit/slab.c
@@ -2,7 +2,7 @@
 
 #define INVALID_ARENA_IND ((1U << MALLOCX_ARENA_BITS) - 1)
 
-TEST_BEGIN(test_arena_slab_regind) {
+TEST_BEGIN(test_bin_slab_regind) {
 	szind_t binind;
 
 	for (binind = 0; binind < SC_NBINS; binind++) {
@@ -15,13 +15,13 @@ TEST_BEGIN(test_arena_slab_regind) {
 		    false, true, EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
 		expect_ptr_not_null(
 		    edata_addr_get(&slab), "Unexpected malloc() failure");
-		arena_dalloc_bin_locked_info_t dalloc_info;
-		arena_dalloc_bin_locked_begin(&dalloc_info, binind);
+		bin_dalloc_locked_info_t dalloc_info;
+		bin_dalloc_locked_begin(&dalloc_info, binind);
 		for (regind = 0; regind < bin_info->nregs; regind++) {
 			void *reg = (void *)((uintptr_t)edata_addr_get(&slab)
 			    + (bin_info->reg_size * regind));
 			expect_zu_eq(
-			    arena_slab_regind(&dalloc_info, binind, &slab, reg),
+			    bin_slab_regind(&dalloc_info, binind, &slab, reg),
 			    regind,
 			    "Incorrect region index computed for size %zu",
 			    bin_info->reg_size);
@@ -33,5 +33,5 @@ TEST_END
 
 int
 main(void) {
-	return test(test_arena_slab_regind);
+	return test(test_bin_slab_regind);
 }

From a75655badf31a2c6187bf069f8103c626542941f Mon Sep 17 00:00:00 2001
From: Carl Shapiro <cshapiro@meta.com>
Date: Fri, 27 Feb 2026 12:02:07 -0800
Subject: [PATCH 2578/2608] Add unit test coverage for bin interfaces

---
 Makefile.in     |   1 +
 test/unit/bin.c | 825 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 826 insertions(+)
 create mode 100644 test/unit/bin.c

diff --git a/Makefile.in b/Makefile.in
index f916ad71..463693df 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -208,6 +208,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/background_thread_init.c \
 	$(srcroot)test/unit/base.c \
 	$(srcroot)test/unit/batch_alloc.c \
+	$(srcroot)test/unit/bin.c \
 	$(srcroot)test/unit/binshard.c \
 	$(srcroot)test/unit/bitmap.c \
 	$(srcroot)test/unit/bit_util.c \
diff --git a/test/unit/bin.c b/test/unit/bin.c
new file mode 100644
index 00000000..002bbf11
--- /dev/null
+++ b/test/unit/bin.c
@@ -0,0 +1,825 @@
+#include "test/jemalloc_test.h"
+
+#define INVALID_ARENA_IND ((1U << MALLOCX_ARENA_BITS) - 1)
+
+/* Create a page-aligned mock slab with all regions free. */
+static void
+create_mock_slab(edata_t *slab, szind_t binind, uint64_t sn) {
+	const bin_info_t *bin_info = &bin_infos[binind];
+	void *addr;
+	slab_data_t *slab_data;
+
+	addr = mallocx(bin_info->slab_size, MALLOCX_LG_ALIGN(LG_PAGE));
+	assert_ptr_not_null(addr, "Unexpected mallocx failure");
+
+	memset(slab, 0, sizeof(edata_t));
+	edata_init(slab, INVALID_ARENA_IND, addr, bin_info->slab_size,
+	    true, binind, sn, extent_state_active, false, true,
+	    EXTENT_PAI_PAC, EXTENT_NOT_HEAD);
+	edata_nfree_set(slab, bin_info->nregs);
+
+	/* Initialize bitmap to all regions free. */
+	slab_data = edata_slab_data_get(slab);
+	bitmap_init(slab_data->bitmap, &bin_info->bitmap_info, false);
+}
+
+/*
+ * Test that bin_init produces a valid empty bin.
+ */
+TEST_BEGIN(test_bin_init) {
+	bin_t bin;
+	bool err;
+
+	err = bin_init(&bin);
+	expect_false(err, "bin_init should succeed");
+	expect_ptr_null(bin.slabcur, "New bin should have NULL slabcur");
+	expect_ptr_null(edata_heap_first(&bin.slabs_nonfull),
+	    "New bin should have empty nonfull heap");
+	expect_true(edata_list_active_empty(&bin.slabs_full),
+	    "New bin should have empty full list");
+	if (config_stats) {
+		expect_u64_eq(bin.stats.nmalloc, 0,
+		    "New bin should have zero nmalloc");
+		expect_u64_eq(bin.stats.ndalloc, 0,
+		    "New bin should have zero ndalloc");
+		expect_zu_eq(bin.stats.curregs, 0,
+		    "New bin should have zero curregs");
+		expect_zu_eq(bin.stats.curslabs, 0,
+		    "New bin should have zero curslabs");
+	}
+}
+TEST_END
+
+/*
+ * Test single-region allocation from a slab.
+ */
+TEST_BEGIN(test_bin_slab_reg_alloc) {
+	szind_t binind = 0;
+	const bin_info_t *bin_info = &bin_infos[binind];
+	edata_t slab;
+	unsigned nregs;
+	unsigned i;
+
+	create_mock_slab(&slab, binind, 0);
+	nregs = bin_info->nregs;
+
+	for (i = 0; i < nregs; i++) {
+		void *reg;
+
+		expect_u_gt(edata_nfree_get(&slab), 0,
+		    "Slab should have free regions");
+		reg = bin_slab_reg_alloc(&slab, bin_info);
+		expect_ptr_not_null(reg,
+		    "bin_slab_reg_alloc should return non-NULL");
+		/* Verify the pointer is within the slab. */
+		expect_true(
+		    (uintptr_t)reg >= (uintptr_t)edata_addr_get(&slab) &&
+		    (uintptr_t)reg < (uintptr_t)edata_addr_get(&slab)
+		    + bin_info->slab_size,
+		    "Allocated region should be within slab bounds");
+	}
+	expect_u_eq(edata_nfree_get(&slab), 0,
+	    "Slab should be full after allocating all regions");
+	free(edata_addr_get(&slab));
+}
+TEST_END
+
+/*
+ * Test batch allocation from a slab.
+ */
+TEST_BEGIN(test_bin_slab_reg_alloc_batch) {
+	szind_t binind = 0;
+	const bin_info_t *bin_info = &bin_infos[binind];
+	edata_t slab;
+	unsigned nregs;
+	void **ptrs;
+	unsigned i;
+
+	create_mock_slab(&slab, binind, 0);
+	nregs = bin_info->nregs;
+	ptrs = mallocx(nregs * sizeof(void *), 0);
+	assert_ptr_not_null(ptrs, "Unexpected mallocx failure");
+
+	bin_slab_reg_alloc_batch(&slab, bin_info, nregs, ptrs);
+	expect_u_eq(edata_nfree_get(&slab), 0,
+	    "Slab should be full after batch alloc of all regions");
+
+	/* Verify all pointers are within the slab and distinct. */
+	for (i = 0; i < nregs; i++) {
+		unsigned j;
+
+		expect_ptr_not_null(ptrs[i], "Batch pointer should be non-NULL");
+		expect_true(
+		    (uintptr_t)ptrs[i] >= (uintptr_t)edata_addr_get(&slab) &&
+		    (uintptr_t)ptrs[i] < (uintptr_t)edata_addr_get(&slab)
+		    + bin_info->slab_size,
+		    "Batch pointer should be within slab bounds");
+		for (j = 0; j < i; j++) {
+			expect_ptr_ne(ptrs[i], ptrs[j],
+			    "Batch pointers should be distinct");
+		}
+	}
+	free(ptrs);
+	free(edata_addr_get(&slab));
+}
+TEST_END
+
+/*
+ * Test partial batch allocation from a slab.
+ */
+TEST_BEGIN(test_bin_slab_reg_alloc_batch_partial) {
+	szind_t binind = 0;
+	const bin_info_t *bin_info = &bin_infos[binind];
+	edata_t slab;
+	unsigned nregs;
+	unsigned half;
+	void **ptrs;
+
+	create_mock_slab(&slab, binind, 0);
+	nregs = bin_info->nregs;
+
+	/* Only allocate half. */
+	half = nregs / 2;
+	if (half == 0) {
+		half = 1;
+	}
+	ptrs = mallocx(half * sizeof(void *), 0);
+	assert_ptr_not_null(ptrs, "Unexpected mallocx failure");
+
+	bin_slab_reg_alloc_batch(&slab, bin_info, half, ptrs);
+	expect_u_eq(edata_nfree_get(&slab), nregs - half,
+	    "Slab nfree should reflect partial batch alloc");
+
+	free(ptrs);
+	free(edata_addr_get(&slab));
+}
+TEST_END
+
+/*
+ * Test nonfull slab list insert, remove, and tryget.
+ */
+TEST_BEGIN(test_bin_slabs_nonfull) {
+	bin_t bin;
+	szind_t binind = 0;
+	edata_t slab1, slab2;
+	edata_t *got;
+	edata_t *remaining;
+
+	bin_init(&bin);
+
+	/* Create two non-full slabs with different serial numbers. */
+	create_mock_slab(&slab1, binind, 1);
+	create_mock_slab(&slab2, binind, 2);
+
+	/* Insert both into the nonfull heap. */
+	bin_slabs_nonfull_insert(&bin, &slab1);
+	expect_ptr_not_null(edata_heap_first(&bin.slabs_nonfull),
+	    "Nonfull heap should be non-empty after insert");
+
+	bin_slabs_nonfull_insert(&bin, &slab2);
+
+	/* tryget should return a slab. */
+	got = bin_slabs_nonfull_tryget(&bin);
+	expect_ptr_not_null(got, "tryget should return a slab");
+
+	/* Remove the remaining one explicitly. */
+	remaining = edata_heap_first(&bin.slabs_nonfull);
+	expect_ptr_not_null(remaining, "One slab should still remain");
+	bin_slabs_nonfull_remove(&bin, remaining);
+	expect_ptr_null(edata_heap_first(&bin.slabs_nonfull),
+	    "Nonfull heap should be empty after removing both slabs");
+
+	free(edata_addr_get(&slab1));
+	free(edata_addr_get(&slab2));
+}
+TEST_END
+
+/*
+ * Test full slab list insert and remove (non-auto arena case).
+ */
+TEST_BEGIN(test_bin_slabs_full) {
+	bin_t bin;
+	szind_t binind = 0;
+	const bin_info_t *bin_info = &bin_infos[binind];
+	edata_t slab;
+	unsigned i;
+
+	bin_init(&bin);
+	create_mock_slab(&slab, binind, 0);
+
+	/* Consume all regions so the slab appears full. */
+	for (i = 0; i < bin_info->nregs; i++) {
+		bin_slab_reg_alloc(&slab, bin_info);
+	}
+	expect_u_eq(edata_nfree_get(&slab), 0, "Slab should be full");
+
+	/* Insert into full list (is_auto=false to actually track). */
+	bin_slabs_full_insert(false, &bin, &slab);
+	expect_false(edata_list_active_empty(&bin.slabs_full),
+	    "Full list should be non-empty after insert");
+
+	/* Remove from full list. */
+	bin_slabs_full_remove(false, &bin, &slab);
+	expect_true(edata_list_active_empty(&bin.slabs_full),
+	    "Full list should be empty after remove");
+
+	free(edata_addr_get(&slab));
+}
+TEST_END
+
+/*
+ * Test that full slab insert/remove is a no-op for auto arenas.
+ */
+TEST_BEGIN(test_bin_slabs_full_auto) {
+	bin_t bin;
+	szind_t binind = 0;
+	const bin_info_t *bin_info = &bin_infos[binind];
+	edata_t slab;
+	unsigned i;
+
+	bin_init(&bin);
+	create_mock_slab(&slab, binind, 0);
+	for (i = 0; i < bin_info->nregs; i++) {
+		bin_slab_reg_alloc(&slab, bin_info);
+	}
+
+	/* is_auto=true: insert should be a no-op. */
+	bin_slabs_full_insert(true, &bin, &slab);
+	expect_true(edata_list_active_empty(&bin.slabs_full),
+	    "Full list should remain empty for auto arenas");
+
+	/* Remove should also be a no-op without crashing. */
+	bin_slabs_full_remove(true, &bin, &slab);
+
+	free(edata_addr_get(&slab));
+}
+TEST_END
+
+/*
+ * Test dissociate_slab when the slab is slabcur.
+ */
+TEST_BEGIN(test_bin_dissociate_slabcur) {
+	bin_t bin;
+	szind_t binind = 0;
+	edata_t slab;
+
+	bin_init(&bin);
+	create_mock_slab(&slab, binind, 0);
+
+	bin.slabcur = &slab;
+	bin_dissociate_slab(true, &slab, &bin);
+	expect_ptr_null(bin.slabcur,
+	    "Dissociating slabcur should NULL it out");
+
+	free(edata_addr_get(&slab));
+}
+TEST_END
+
+/*
+ * Test dissociate_slab when the slab is in the nonfull heap.
+ */
+TEST_BEGIN(test_bin_dissociate_nonfull) {
+	bin_t bin;
+	szind_t binind = 0;
+	const bin_info_t *bin_info = &bin_infos[binind];
+	edata_t slab;
+
+	bin_init(&bin);
+	create_mock_slab(&slab, binind, 0);
+
+	/*
+	 * Only dissociate from nonfull when nregs > 1.  For nregs == 1,
+	 * the slab goes directly to the full list, never nonfull.
+	 */
+	test_skip_if(bin_info->nregs == 1);
+
+	bin_slabs_nonfull_insert(&bin, &slab);
+	bin_dissociate_slab(true, &slab, &bin);
+	expect_ptr_null(edata_heap_first(&bin.slabs_nonfull),
+	    "Nonfull heap should be empty after dissociating the slab");
+
+	free(edata_addr_get(&slab));
+}
+TEST_END
+
+/*
+ * Test refill slabcur with a fresh slab.
+ */
+TEST_BEGIN(test_bin_refill_slabcur_with_fresh_slab) {
+	tsdn_t *tsdn = tsdn_fetch();
+	bin_t bin;
+	szind_t binind = 0;
+	const bin_info_t *bin_info = &bin_infos[binind];
+	edata_t fresh;
+
+	bin_init(&bin);
+	create_mock_slab(&fresh, binind, 0);
+
+	malloc_mutex_lock(tsdn, &bin.lock);
+	bin_refill_slabcur_with_fresh_slab(tsdn, &bin, binind, &fresh);
+	expect_ptr_eq(bin.slabcur, &fresh,
+	    "Fresh slab should become slabcur");
+	if (config_stats) {
+		expect_u64_eq(bin.stats.nslabs, 1,
+		    "nslabs should be 1 after installing fresh slab");
+		expect_zu_eq(bin.stats.curslabs, 1,
+		    "curslabs should be 1 after installing fresh slab");
+	}
+	expect_u_eq(edata_nfree_get(bin.slabcur), bin_info->nregs,
+	    "Fresh slab should have all regions free");
+	malloc_mutex_unlock(tsdn, &bin.lock);
+
+	free(edata_addr_get(&fresh));
+}
+TEST_END
+
+/*
+ * Test refill slabcur without a fresh slab (from the nonfull heap).
+ */
+TEST_BEGIN(test_bin_refill_slabcur_no_fresh_slab) {
+	tsdn_t *tsdn = tsdn_fetch();
+	bin_t bin;
+	szind_t binind = 0;
+	edata_t slab;
+	bool empty;
+
+	bin_init(&bin);
+	create_mock_slab(&slab, binind, 0);
+
+	malloc_mutex_lock(tsdn, &bin.lock);
+
+	/* With no slabcur and empty nonfull heap, refill should fail. */
+	empty = bin_refill_slabcur_no_fresh_slab(tsdn, true, &bin);
+	expect_true(empty,
+	    "Refill should fail when nonfull heap is empty");
+	expect_ptr_null(bin.slabcur, "slabcur should remain NULL");
+
+	/* Insert a slab into nonfull, then refill should succeed. */
+	bin_slabs_nonfull_insert(&bin, &slab);
+	empty = bin_refill_slabcur_no_fresh_slab(tsdn, true, &bin);
+	expect_false(empty,
+	    "Refill should succeed when nonfull heap has a slab");
+	expect_ptr_eq(bin.slabcur, &slab,
+	    "slabcur should be the slab from nonfull heap");
+
+	malloc_mutex_unlock(tsdn, &bin.lock);
+	free(edata_addr_get(&slab));
+}
+TEST_END
+
+/*
+ * Test that refill moves a full slabcur into the full list.
+ */
+TEST_BEGIN(test_bin_refill_slabcur_full_to_list) {
+	tsdn_t *tsdn = tsdn_fetch();
+	bin_t bin;
+	szind_t binind = 0;
+	const bin_info_t *bin_info = &bin_infos[binind];
+	edata_t full_slab, nonfull_slab;
+	unsigned i;
+	bool empty;
+
+	bin_init(&bin);
+	create_mock_slab(&full_slab, binind, 0);
+	create_mock_slab(&nonfull_slab, binind, 1);
+
+	/* Make full_slab actually full. */
+	for (i = 0; i < bin_info->nregs; i++) {
+		bin_slab_reg_alloc(&full_slab, bin_info);
+	}
+
+	malloc_mutex_lock(tsdn, &bin.lock);
+	bin.slabcur = &full_slab;
+	bin_slabs_nonfull_insert(&bin, &nonfull_slab);
+
+	/* Refill should move the full slabcur to full list and pick nonfull. */
+	empty = bin_refill_slabcur_no_fresh_slab(tsdn, false, &bin);
+	expect_false(empty, "Refill should succeed");
+	expect_ptr_eq(bin.slabcur, &nonfull_slab,
+	    "slabcur should now be the nonfull slab");
+	expect_false(edata_list_active_empty(&bin.slabs_full),
+	    "Old full slabcur should be in the full list");
+	malloc_mutex_unlock(tsdn, &bin.lock);
+
+	free(edata_addr_get(&full_slab));
+	free(edata_addr_get(&nonfull_slab));
+}
+TEST_END
+
+/*
+ * Test malloc with a fresh slab.
+ */
+TEST_BEGIN(test_bin_malloc_with_fresh_slab) {
+	tsdn_t *tsdn = tsdn_fetch();
+	bin_t bin;
+	szind_t binind = 0;
+	const bin_info_t *bin_info = &bin_infos[binind];
+	edata_t fresh;
+	void *ptr;
+
+	bin_init(&bin);
+	create_mock_slab(&fresh, binind, 0);
+
+	malloc_mutex_lock(tsdn, &bin.lock);
+	ptr = bin_malloc_with_fresh_slab(tsdn, &bin, binind, &fresh);
+	expect_ptr_not_null(ptr, "Should allocate from fresh slab");
+	expect_ptr_eq(bin.slabcur, &fresh,
+	    "Fresh slab should be installed as slabcur");
+	expect_u_eq(edata_nfree_get(&fresh), bin_info->nregs - 1,
+	    "One region should be consumed from fresh slab");
+	if (config_stats) {
+		expect_u64_eq(bin.stats.nslabs, 1, "nslabs should be 1");
+		expect_zu_eq(bin.stats.curslabs, 1, "curslabs should be 1");
+	}
+	malloc_mutex_unlock(tsdn, &bin.lock);
+
+	free(edata_addr_get(&fresh));
+}
+TEST_END
+
+/*
+ * Test malloc without a fresh slab (from existing slabcur).
+ */
+TEST_BEGIN(test_bin_malloc_no_fresh_slab) {
+	tsdn_t *tsdn = tsdn_fetch();
+	bin_t bin;
+	szind_t binind = 0;
+	const bin_info_t *bin_info = &bin_infos[binind];
+	edata_t slab;
+	void *ptr;
+
+	bin_init(&bin);
+	create_mock_slab(&slab, binind, 0);
+
+	malloc_mutex_lock(tsdn, &bin.lock);
+
+	/* With no slabcur and empty nonfull, should return NULL. */
+	ptr = bin_malloc_no_fresh_slab(tsdn, true, &bin, binind);
+	expect_ptr_null(ptr,
+	    "Should return NULL when no slabs available");
+
+	/* Set up a slabcur; malloc should succeed. */
+	bin.slabcur = &slab;
+	ptr = bin_malloc_no_fresh_slab(tsdn, true, &bin, binind);
+	expect_ptr_not_null(ptr,
+	    "Should allocate from slabcur");
+	expect_u_eq(edata_nfree_get(&slab), bin_info->nregs - 1,
+	    "One region should be consumed");
+	malloc_mutex_unlock(tsdn, &bin.lock);
+
+	free(edata_addr_get(&slab));
+}
+TEST_END
+
+/*
+ * Test the bin_dalloc_locked begin/step/finish sequence.
+ */
+TEST_BEGIN(test_bin_dalloc_locked) {
+	tsdn_t *tsdn = tsdn_fetch();
+	bin_t bin;
+	szind_t binind = 0;
+	const bin_info_t *bin_info = &bin_infos[binind];
+	edata_t slab;
+	unsigned nregs;
+	void **ptrs;
+	unsigned i;
+	bin_dalloc_locked_info_t info;
+	bool slab_empty;
+	bool found_empty;
+
+	bin_init(&bin);
+	create_mock_slab(&slab, binind, 0);
+
+	/* Allocate all regions from the slab. */
+	nregs = bin_info->nregs;
+	ptrs = mallocx(nregs * sizeof(void *), 0);
+	assert_ptr_not_null(ptrs, "Unexpected mallocx failure");
+	for (i = 0; i < nregs; i++) {
+		ptrs[i] = bin_slab_reg_alloc(&slab, bin_info);
+		assert_ptr_not_null(ptrs[i], "Alloc should succeed");
+	}
+	expect_u_eq(edata_nfree_get(&slab), 0, "Slab should be full");
+
+	/* Set this slab as slabcur so dalloc steps work correctly. */
+	bin.slabcur = &slab;
+	if (config_stats) {
+		bin.stats.nmalloc = nregs;
+		bin.stats.curregs = nregs;
+		bin.stats.nslabs = 1;
+		bin.stats.curslabs = 1;
+	}
+
+	malloc_mutex_lock(tsdn, &bin.lock);
+
+	/* Free one region and verify step returns false (not yet empty). */
+	bin_dalloc_locked_begin(&info, binind);
+	slab_empty = bin_dalloc_locked_step(
+	    tsdn, true, &bin, &info, binind, &slab, ptrs[0]);
+	if (nregs > 1) {
+		expect_false(slab_empty,
+		    "Slab should not be empty after freeing one region");
+	}
+	bin_dalloc_locked_finish(tsdn, &bin, &info);
+	if (config_stats) {
+		expect_zu_eq(bin.stats.curregs, nregs - 1,
+		    "curregs should decrement by 1");
+	}
+
+	/* Free all remaining regions; the last one should empty the slab. */
+	bin_dalloc_locked_begin(&info, binind);
+	found_empty = false;
+	for (i = 1; i < nregs; i++) {
+		slab_empty = bin_dalloc_locked_step(
+		    tsdn, true, &bin, &info, binind, &slab, ptrs[i]);
+		if (slab_empty) {
+			found_empty = true;
+		}
+	}
+	bin_dalloc_locked_finish(tsdn, &bin, &info);
+	expect_true(found_empty,
+	    "Freeing all regions should produce an empty slab");
+	expect_u_eq(edata_nfree_get(&slab), nregs,
+	    "All regions should be free");
+	if (config_stats) {
+		expect_zu_eq(bin.stats.curregs, 0,
+		    "curregs should be 0 after freeing all");
+	}
+
+	malloc_mutex_unlock(tsdn, &bin.lock);
+	free(ptrs);
+	free(edata_addr_get(&slab));
+}
+TEST_END
+
+/*
+ * Test that bin_lower_slab replaces slabcur when the new slab is older.
+ */
+TEST_BEGIN(test_bin_lower_slab_replaces_slabcur) {
+	tsdn_t *tsdn = tsdn_fetch();
+	bin_t bin;
+	szind_t binind = 0;
+	edata_t slab_old, slab_new;
+
+	bin_init(&bin);
+
+	/* slab_old has sn=0 (older), slab_new has sn=1 (newer). */
+	create_mock_slab(&slab_old, binind, 0);
+	create_mock_slab(&slab_new, binind, 1);
+
+	/* Make slab_new the slabcur. */
+	bin.slabcur = &slab_new;
+
+	/*
+	 * bin_lower_slab with the older slab should replace slabcur and move
+	 * slab_new into either nonfull or full.
+	 */
+	malloc_mutex_lock(tsdn, &bin.lock);
+	bin_lower_slab(tsdn, true, &slab_old, &bin);
+	expect_ptr_eq(bin.slabcur, &slab_old,
+	    "Older slab should replace slabcur");
+	malloc_mutex_unlock(tsdn, &bin.lock);
+
+	free(edata_addr_get(&slab_old));
+	free(edata_addr_get(&slab_new));
+}
+TEST_END
+
+/*
+ * Test that bin_lower_slab inserts into the nonfull heap when the new slab
+ * is newer than slabcur.
+ */
+TEST_BEGIN(test_bin_lower_slab_inserts_nonfull) {
+	tsdn_t *tsdn = tsdn_fetch();
+	bin_t bin;
+	szind_t binind = 0;
+	edata_t slab_old, slab_new;
+
+	bin_init(&bin);
+	create_mock_slab(&slab_old, binind, 0);
+	create_mock_slab(&slab_new, binind, 1);
+
+	/* Make slab_old the slabcur (older). */
+	bin.slabcur = &slab_old;
+
+	/* bin_lower_slab with the newer slab should insert into nonfull. */
+	malloc_mutex_lock(tsdn, &bin.lock);
+	bin_lower_slab(tsdn, true, &slab_new, &bin);
+	expect_ptr_eq(bin.slabcur, &slab_old,
+	    "Older slabcur should remain");
+	expect_ptr_not_null(edata_heap_first(&bin.slabs_nonfull),
+	    "Newer slab should be inserted into nonfull heap");
+	malloc_mutex_unlock(tsdn, &bin.lock);
+
+	free(edata_addr_get(&slab_old));
+	free(edata_addr_get(&slab_new));
+}
+TEST_END
+
+/*
+ * Test bin_dalloc_slab_prepare updates stats.
+ */
+TEST_BEGIN(test_bin_dalloc_slab_prepare) {
+	tsdn_t *tsdn = tsdn_fetch();
+	bin_t bin;
+	szind_t binind = 0;
+	edata_t slab;
+
+	bin_init(&bin);
+	create_mock_slab(&slab, binind, 0);
+
+	if (config_stats) {
+		bin.stats.curslabs = 2;
+	}
+
+	/*
+	 * bin_dalloc_slab_prepare requires the slab is not slabcur,
+	 * so leave slabcur NULL.
+	 */
+	malloc_mutex_lock(tsdn, &bin.lock);
+	bin_dalloc_slab_prepare(tsdn, &slab, &bin);
+	if (config_stats) {
+		expect_zu_eq(bin.stats.curslabs, 1,
+		    "curslabs should decrement");
+	}
+	malloc_mutex_unlock(tsdn, &bin.lock);
+
+	free(edata_addr_get(&slab));
+}
+TEST_END
+
+/*
+ * Test bin_shard_sizes_boot and bin_update_shard_size.
+ */
+TEST_BEGIN(test_bin_shard_sizes) {
+	unsigned shard_sizes[SC_NBINS];
+	unsigned i;
+	bool err;
+	szind_t ind1, ind2;
+
+	/* Boot should set all to the default. */
+	bin_shard_sizes_boot(shard_sizes);
+	for (i = 0; i < SC_NBINS; i++) {
+		expect_u_eq(shard_sizes[i], N_BIN_SHARDS_DEFAULT,
+		    "Shard sizes should be default after boot");
+	}
+
+	/* Update with nshards=0 should fail (returns true). */
+	err = bin_update_shard_size(shard_sizes, 1, 1, 0);
+	expect_true(err, "nshards=0 should be an error");
+
+	/* Update with nshards > BIN_SHARDS_MAX should fail. */
+	err = bin_update_shard_size(shard_sizes, 1, 1, BIN_SHARDS_MAX + 1);
+	expect_true(err, "nshards > BIN_SHARDS_MAX should be an error");
+
+	/* Valid update: set a range to 4 shards. */
+	err = bin_update_shard_size(shard_sizes, 1, 128, 4);
+	expect_false(err, "Valid update should succeed");
+	/* Verify the range was updated. */
+	ind1 = sz_size2index_compute(1);
+	ind2 = sz_size2index_compute(128);
+	for (i = ind1; i <= ind2; i++) {
+		expect_u_eq(shard_sizes[i], 4,
+		    "Updated range should have nshards=4");
+	}
+
+	/* Update beyond SC_SMALL_MAXCLASS should be clamped, not fail. */
+	err = bin_update_shard_size(shard_sizes,
+	    SC_SMALL_MAXCLASS, SC_SMALL_MAXCLASS * 2, 2);
+	expect_false(err,
+	    "Update with end beyond SMALL_MAXCLASS should succeed");
+}
+TEST_END
+
+/*
+ * Test a full alloc-then-free cycle by allocating all regions from a bin
+ * via bin_malloc_with_fresh_slab, then freeing them all via the
+ * bin_dalloc_locked sequence.
+ */
+TEST_BEGIN(test_bin_alloc_free_cycle) {
+	tsdn_t *tsdn = tsdn_fetch();
+	bin_t bin;
+	szind_t binind = 0;
+	const bin_info_t *bin_info = &bin_infos[binind];
+	unsigned nregs = bin_info->nregs;
+	edata_t slab;
+	void **ptrs;
+	unsigned i;
+	bin_dalloc_locked_info_t info;
+
+	bin_init(&bin);
+	create_mock_slab(&slab, binind, 0);
+
+	ptrs = mallocx(nregs * sizeof(void *), 0);
+	assert_ptr_not_null(ptrs, "Unexpected mallocx failure");
+
+	malloc_mutex_lock(tsdn, &bin.lock);
+
+	/* Allocate the first pointer via fresh slab path. */
+	ptrs[0] = bin_malloc_with_fresh_slab(tsdn, &bin, binind, &slab);
+	expect_ptr_not_null(ptrs[0], "First alloc should succeed");
+
+	/* Allocate the rest from slabcur. */
+	for (i = 1; i < nregs; i++) {
+		ptrs[i] = bin_malloc_no_fresh_slab(tsdn, true, &bin, binind);
+		expect_ptr_not_null(ptrs[i], "Alloc should succeed");
+	}
+	if (config_stats) {
+		bin.stats.nmalloc += nregs;
+		bin.stats.curregs += nregs;
+	}
+
+	expect_u_eq(edata_nfree_get(&slab), 0, "Slab should be full");
+
+	/* Free all regions. */
+	bin_dalloc_locked_begin(&info, binind);
+	for (i = 0; i < nregs; i++) {
+		bin_dalloc_locked_step(
+		    tsdn, true, &bin, &info, binind, &slab, ptrs[i]);
+	}
+	bin_dalloc_locked_finish(tsdn, &bin, &info);
+
+	expect_u_eq(edata_nfree_get(&slab), nregs,
+	    "All regions should be free after full cycle");
+	if (config_stats) {
+		expect_zu_eq(bin.stats.curregs, 0,
+		    "curregs should be 0 after full cycle");
+	}
+
+	malloc_mutex_unlock(tsdn, &bin.lock);
+	free(ptrs);
+	free(edata_addr_get(&slab));
+}
+TEST_END
+
+/*
+ * Test alloc/free cycle across multiple bin size classes.
+ */
+TEST_BEGIN(test_bin_multi_size_class) {
+	tsdn_t *tsdn = tsdn_fetch();
+	szind_t test_indices[] = {0, SC_NBINS / 2, SC_NBINS - 1};
+	unsigned nindices = sizeof(test_indices) / sizeof(test_indices[0]);
+	unsigned t;
+
+	for (t = 0; t < nindices; t++) {
+		szind_t binind = test_indices[t];
+		const bin_info_t *bin_info = &bin_infos[binind];
+		bin_t bin;
+		edata_t slab;
+		void *ptr;
+		bin_dalloc_locked_info_t info;
+
+		bin_init(&bin);
+		create_mock_slab(&slab, binind, 0);
+
+		malloc_mutex_lock(tsdn, &bin.lock);
+		ptr = bin_malloc_with_fresh_slab(
+		    tsdn, &bin, binind, &slab);
+		expect_ptr_not_null(ptr,
+		    "Alloc should succeed for binind %u", binind);
+		expect_u_eq(edata_nfree_get(&slab), bin_info->nregs - 1,
+		    "nfree should be nregs-1 for binind %u", binind);
+
+		/* Free the allocated region. */
+		if (config_stats) {
+			bin.stats.nmalloc = 1;
+			bin.stats.curregs = 1;
+		}
+		bin_dalloc_locked_begin(&info, binind);
+		bin_dalloc_locked_step(
+		    tsdn, true, &bin, &info, binind, &slab, ptr);
+		bin_dalloc_locked_finish(tsdn, &bin, &info);
+
+		expect_u_eq(edata_nfree_get(&slab), bin_info->nregs,
+		    "All regions should be free for binind %u", binind);
+		malloc_mutex_unlock(tsdn, &bin.lock);
+
+		free(edata_addr_get(&slab));
+	}
+}
+TEST_END
+
+int
+main(void) {
+	return test(
+	    test_bin_init,
+	    test_bin_slab_reg_alloc,
+	    test_bin_slab_reg_alloc_batch,
+	    test_bin_slab_reg_alloc_batch_partial,
+	    test_bin_slabs_nonfull,
+	    test_bin_slabs_full,
+	    test_bin_slabs_full_auto,
+	    test_bin_dissociate_slabcur,
+	    test_bin_dissociate_nonfull,
+	    test_bin_refill_slabcur_with_fresh_slab,
+	    test_bin_refill_slabcur_no_fresh_slab,
+	    test_bin_refill_slabcur_full_to_list,
+	    test_bin_malloc_with_fresh_slab,
+	    test_bin_malloc_no_fresh_slab,
+	    test_bin_dalloc_locked,
+	    test_bin_lower_slab_replaces_slabcur,
+	    test_bin_lower_slab_inserts_nonfull,
+	    test_bin_dalloc_slab_prepare,
+	    test_bin_shard_sizes,
+	    test_bin_alloc_free_cycle,
+	    test_bin_multi_size_class);
+}

From a056c20d671e5d001d9d232a7c6d9bb30288e9ef Mon Sep 17 00:00:00 2001
From: Carl Shapiro <cshapiro@meta.com>
Date: Mon, 2 Mar 2026 17:15:35 -0800
Subject: [PATCH 2579/2608] Handle tcache init failures gracefully

tsd_tcache_data_init() returns true on failure but its callers ignore
this return value, leaving the per-thread tcache in an uninitialized
state after a failure.

This change disables the tcache on an initialization failure and logs
an error message.  If opt_abort is true, it will also abort.

New unit tests have been added to test tcache initialization failures.
---
 Makefile.in                                |   1 +
 include/jemalloc/internal/tcache_externs.h |   5 +-
 src/tcache.c                               |  61 +++++++----
 test/unit/tcache_init.c                    | 116 +++++++++++++++++++++
 4 files changed, 162 insertions(+), 21 deletions(-)
 create mode 100644 test/unit/tcache_init.c

diff --git a/Makefile.in b/Makefile.in
index 463693df..ec2215b3 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -292,6 +292,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/stats.c \
 	$(srcroot)test/unit/stats_print.c \
 	$(srcroot)test/unit/sz.c \
+	$(srcroot)test/unit/tcache_init.c \
 	$(srcroot)test/unit/tcache_max.c \
 	$(srcroot)test/unit/test_hooks.c \
 	$(srcroot)test/unit/thread_event.c \
diff --git a/include/jemalloc/internal/tcache_externs.h b/include/jemalloc/internal/tcache_externs.h
index 73126db7..b7fdb5a4 100644
--- a/include/jemalloc/internal/tcache_externs.h
+++ b/include/jemalloc/internal/tcache_externs.h
@@ -64,7 +64,7 @@ bool tcache_bin_ncached_max_read(
 void tcache_arena_reassociate(
     tsdn_t *tsdn, tcache_slow_t *tcache_slow, tcache_t *tcache, arena_t *arena);
 tcache_t *tcache_create_explicit(tsd_t *tsd);
-void      thread_tcache_max_set(tsd_t *tsd, size_t tcache_max);
+bool      thread_tcache_max_set(tsd_t *tsd, size_t tcache_max);
 void      tcache_cleanup(tsd_t *tsd);
 void      tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena);
 bool      tcaches_create(tsd_t *tsd, base_t *base, unsigned *r_ind);
@@ -80,6 +80,9 @@ void tcache_flush(tsd_t *tsd);
 bool tsd_tcache_enabled_data_init(tsd_t *tsd);
 void tcache_enabled_set(tsd_t *tsd, bool enabled);
 
+extern void *(*JET_MUTABLE tcache_stack_alloc)(tsdn_t *tsdn, size_t size,
+    size_t alignment);
+
 void tcache_assert_initialized(tcache_t *tcache);
 
 extern te_base_cb_t tcache_gc_te_handler;
diff --git a/src/tcache.c b/src/tcache.c
index 172d9320..10fa7c21 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -949,6 +949,21 @@ tcache_bin_info_compute(cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
 	}
 }
 
+static void *
+tcache_stack_alloc_impl(tsdn_t *tsdn, size_t size, size_t alignment) {
+	if (cache_bin_stack_use_thp()) {
+		/* Alignment is ignored since it comes from THP. */
+		assert(alignment == QUANTUM);
+		return b0_alloc_tcache_stack(tsdn, size);
+	}
+	size = sz_sa2u(size, alignment);
+	return ipallocztm(tsdn, size, alignment, true, NULL,
+	    true, arena_get(TSDN_NULL, 0, true));
+}
+
+void *(*JET_MUTABLE tcache_stack_alloc)(tsdn_t *tsdn, size_t size,
+    size_t alignment) = tcache_stack_alloc_impl;
+
 static bool
 tsd_tcache_data_init_impl(
     tsd_t *tsd, arena_t *arena, const cache_bin_info_t *tcache_bin_info) {
@@ -961,16 +976,7 @@ tsd_tcache_data_init_impl(
 	cache_bin_info_compute_alloc(
 	    tcache_bin_info, tcache_nbins, &size, &alignment);
 
-	void *mem;
-	if (cache_bin_stack_use_thp()) {
-		/* Alignment is ignored since it comes from THP. */
-		assert(alignment == QUANTUM);
-		mem = b0_alloc_tcache_stack(tsd_tsdn(tsd), size);
-	} else {
-		size = sz_sa2u(size, alignment);
-		mem = ipallocztm(tsd_tsdn(tsd), size, alignment, true, NULL,
-		    true, arena_get(TSDN_NULL, 0, true));
-	}
+	void *mem = tcache_stack_alloc(tsd_tsdn(tsd), size, alignment);
 	if (mem == NULL) {
 		return true;
 	}
@@ -1010,7 +1016,20 @@ static bool
 tsd_tcache_data_init(tsd_t *tsd, arena_t *arena,
     const cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX]) {
 	assert(tcache_bin_info != NULL);
-	return tsd_tcache_data_init_impl(tsd, arena, tcache_bin_info);
+	bool err = tsd_tcache_data_init_impl(tsd, arena, tcache_bin_info);
+	if (unlikely(err)) {
+		/*
+		 * Disable the tcache before calling malloc_write to
+		 * avoid recursive allocations through libc hooks.
+		 */
+		tsd_tcache_enabled_set(tsd, false);
+		tsd_slow_update(tsd);
+		malloc_write("<jemalloc>: Failed to allocate tcache data\n");
+		if (opt_abort) {
+			abort();
+		}
+	}
+	return err;
 }
 
 /* Created manual tcache for tcache.create mallctl. */
@@ -1062,8 +1081,8 @@ tsd_tcache_enabled_data_init(tsd_t *tsd) {
 
 	if (opt_tcache) {
 		/* Trigger tcache init. */
-		tsd_tcache_data_init(
-		    tsd, NULL, tcache_get_default_ncached_max());
+		return tsd_tcache_data_init(
+			tsd, NULL, tcache_get_default_ncached_max());
 	}
 
 	return false;
@@ -1074,8 +1093,10 @@ tcache_enabled_set(tsd_t *tsd, bool enabled) {
 	bool was_enabled = tsd_tcache_enabled_get(tsd);
 
 	if (!was_enabled && enabled) {
-		tsd_tcache_data_init(
-		    tsd, NULL, tcache_get_default_ncached_max());
+		if (tsd_tcache_data_init(
+		    tsd, NULL, tcache_get_default_ncached_max())) {
+			return;
+		}
 	} else if (was_enabled && !enabled) {
 		tcache_cleanup(tsd);
 	}
@@ -1084,13 +1105,14 @@ tcache_enabled_set(tsd_t *tsd, bool enabled) {
 	tsd_slow_update(tsd);
 }
 
-void
+bool
 thread_tcache_max_set(tsd_t *tsd, size_t tcache_max) {
 	assert(tcache_max <= TCACHE_MAXCLASS_LIMIT);
 	assert(tcache_max == sz_s2u(tcache_max));
 	tcache_t        *tcache = tsd_tcachep_get(tsd);
 	tcache_slow_t   *tcache_slow = tcache->tcache_slow;
 	cache_bin_info_t tcache_bin_info[TCACHE_NBINS_MAX] = {{0}};
+	bool             ret = false;
 	assert(tcache != NULL && tcache_slow != NULL);
 
 	bool                    enabled = tcache_available(tsd);
@@ -1111,10 +1133,11 @@ thread_tcache_max_set(tsd_t *tsd, size_t tcache_max) {
 	tcache_max_set(tcache_slow, tcache_max);
 
 	if (enabled) {
-		tsd_tcache_data_init(tsd, assigned_arena, tcache_bin_info);
+		ret = tsd_tcache_data_init(tsd, assigned_arena, tcache_bin_info);
 	}
 
 	assert(tcache_nbins_get(tcache_slow) == sz_size2index(tcache_max) + 1);
+	return ret;
 }
 
 static bool
@@ -1177,9 +1200,7 @@ tcache_bins_ncached_max_write(tsd_t *tsd, char *settings, size_t len) {
 
 	arena_t *assigned_arena = tcache->tcache_slow->arena;
 	tcache_cleanup(tsd);
-	tsd_tcache_data_init(tsd, assigned_arena, tcache_bin_info);
-
-	return false;
+	return tsd_tcache_data_init(tsd, assigned_arena, tcache_bin_info);
 }
 
 static void
diff --git a/test/unit/tcache_init.c b/test/unit/tcache_init.c
new file mode 100644
index 00000000..11d4b654
--- /dev/null
+++ b/test/unit/tcache_init.c
@@ -0,0 +1,116 @@
+#include "test/jemalloc_test.h"
+
+static void *
+tcache_stack_alloc_fail(tsdn_t *tsdn, size_t size, size_t alignment) {
+	return NULL;
+}
+
+TEST_BEGIN(test_tcache_data_init_oom) {
+	bool orig_opt_abort = opt_abort;
+	void *(*orig_tcache_stack_alloc)(tsdn_t *, size_t, size_t) =
+	    tcache_stack_alloc;
+
+	opt_abort = false;
+	tcache_stack_alloc = tcache_stack_alloc_fail;
+
+	/*
+	 * Trigger init through tcache_enabled_set by enabling and
+	 * disabling the tcache.
+	 */
+	bool e0, e1;
+	size_t bool_sz = sizeof(bool);
+
+	/* Disable the tcache. */
+	e1 = false;
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &bool_sz,
+	    (void *)&e1, bool_sz), 0, "Unexpected mallctl failure");
+
+	/* Try to enable the tcache.  Initialization should fail. */
+	e1 = true;
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &bool_sz,
+	    (void *)&e1, bool_sz), 0, "Unexpected mallctl failure");
+
+	/* The tcache should be disabled. */
+	tsd_t *tsd = tsd_fetch();
+	expect_false(tsd_tcache_enabled_get(tsd),
+	    "tcache should be disabled after init failure");
+
+	/* Allocations should go to the arena. */
+	void *p = malloc(64);
+	expect_ptr_not_null(p, "malloc should succeed without tcache");
+	free(p);
+
+	/* Restore the original values */
+	tcache_stack_alloc = orig_tcache_stack_alloc;
+	opt_abort = orig_opt_abort;
+
+	/*
+	 * Try to enable the tcache again.  This time initialization
+	 * should succeed.
+	 */
+	e1 = true;
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &bool_sz,
+	    (void *)&e1, bool_sz), 0, "Unexpected mallctl failure");
+}
+TEST_END
+
+TEST_BEGIN(test_tcache_reinit_oom) {
+	bool orig_opt_abort = opt_abort;
+	void *(*orig_tcache_stack_alloc)(tsdn_t *, size_t, size_t) =
+	    tcache_stack_alloc;
+
+	/* Read current tcache max. */
+	size_t old_tcache_max, sz;
+	sz = sizeof(old_tcache_max);
+	expect_d_eq(mallctl("thread.tcache.max", (void *)&old_tcache_max, &sz,
+	    NULL, 0), 0, "Unexpected mallctl failure");
+
+	opt_abort = false;
+	tcache_stack_alloc = tcache_stack_alloc_fail;
+
+	/*
+	 * Setting thread.tcache.max causes a reinitialization.  With
+	 * the thread_stack_alloc override reinitialization should
+	 * fail and disable tcache.
+	 */
+	size_t new_tcache_max = 1024;
+	new_tcache_max = sz_s2u(new_tcache_max);
+	expect_d_eq(mallctl("thread.tcache.max", NULL, NULL,
+	    (void *)&new_tcache_max, sizeof(new_tcache_max)), 0,
+	    "Unexpected mallctl failure");
+
+	/* Check that the tcache was disabled. */
+	tsd_t *tsd = tsd_fetch();
+	expect_false(tsd_tcache_enabled_get(tsd),
+	    "tcache should be disabled after reinit failure");
+
+	/* Allocations should go to the arena. */
+	void *p = malloc(64);
+	expect_ptr_not_null(p, "malloc should succeed without tcache");
+	free(p);
+
+	/* Restore the original values */
+	tcache_stack_alloc = orig_tcache_stack_alloc;
+	opt_abort = orig_opt_abort;
+
+	/*
+	 * Try to enable the tcache again.  This time initialization
+	 * should succeed.
+	 */
+	bool e0, e1;
+	size_t bool_sz = sizeof(bool);
+	e1 = true;
+	expect_d_eq(mallctl("thread.tcache.enabled", (void *)&e0, &bool_sz,
+	    (void *)&e1, bool_sz), 0, "Unexpected mallctl failure");
+
+	/* Restore the original tcache max. */
+	expect_d_eq(mallctl("thread.tcache.max", NULL, NULL,
+	    (void *)&old_tcache_max, sizeof(old_tcache_max)), 0,
+	    "Unexpected mallctl failure");
+}
+TEST_END
+
+int
+main(void) {
+	return test(test_tcache_data_init_oom, test_tcache_reinit_oom);
+}

From ad726adf7539f78bf652db04f215333f1536bf85 Mon Sep 17 00:00:00 2001
From: Carl Shapiro <cshapiro@meta.com>
Date: Mon, 2 Mar 2026 13:02:46 -0800
Subject: [PATCH 2580/2608] Separate out the configuration code from
 initialization

---
 Makefile.in                                   |    1 +
 include/jemalloc/internal/conf.h              |   27 +
 .../internal/jemalloc_internal_externs.h      |    2 +
 src/conf.c                                    | 1228 +++++++++++++++++
 src/jemalloc.c                                | 1137 +--------------
 5 files changed, 1261 insertions(+), 1134 deletions(-)
 create mode 100644 include/jemalloc/internal/conf.h
 create mode 100644 src/conf.c

diff --git a/Makefile.in b/Makefile.in
index ec2215b3..1a7207e0 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -131,6 +131,7 @@ C_SRCS := $(srcroot)src/jemalloc.c \
 	$(srcroot)src/large.c \
 	$(srcroot)src/log.c \
 	$(srcroot)src/malloc_io.c \
+	$(srcroot)src/conf.c \
 	$(srcroot)src/mutex.c \
 	$(srcroot)src/nstime.c \
 	$(srcroot)src/pa.c \
diff --git a/include/jemalloc/internal/conf.h b/include/jemalloc/internal/conf.h
new file mode 100644
index 00000000..21661955
--- /dev/null
+++ b/include/jemalloc/internal/conf.h
@@ -0,0 +1,27 @@
+#ifndef JEMALLOC_INTERNAL_CONF_H
+#define JEMALLOC_INTERNAL_CONF_H
+
+#include "jemalloc/internal/sc.h"
+
+void malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
+    char readlink_buf[PATH_MAX + 1]);
+void malloc_abort_invalid_conf(void);
+
+#ifdef JEMALLOC_JET
+extern bool had_conf_error;
+bool conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
+    char const **v_p, size_t *vlen_p);
+void conf_error(const char *msg, const char *k, size_t klen,
+    const char *v, size_t vlen);
+bool conf_handle_bool(const char *v, size_t vlen, bool *result);
+bool conf_handle_unsigned(const char *v, size_t vlen,
+    uintmax_t min, uintmax_t max, bool check_min, bool check_max,
+    bool clip, uintmax_t *result);
+bool conf_handle_signed(const char *v, size_t vlen,
+    intmax_t min, intmax_t max, bool check_min, bool check_max,
+    bool clip, intmax_t *result);
+bool conf_handle_char_p(const char *v, size_t vlen,
+    char *dest, size_t dest_sz);
+#endif
+
+#endif /* JEMALLOC_INTERNAL_CONF_H */
diff --git a/include/jemalloc/internal/jemalloc_internal_externs.h b/include/jemalloc/internal/jemalloc_internal_externs.h
index ea739ea8..9911c199 100644
--- a/include/jemalloc/internal/jemalloc_internal_externs.h
+++ b/include/jemalloc/internal/jemalloc_internal_externs.h
@@ -3,6 +3,7 @@
 
 #include "jemalloc/internal/arena_types.h"
 #include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/fxp.h"
 #include "jemalloc/internal/hpa_opts.h"
 #include "jemalloc/internal/nstime.h"
 #include "jemalloc/internal/sec_opts.h"
@@ -34,6 +35,7 @@ extern bool                  opt_experimental_infallible_new;
 extern bool                  opt_experimental_tcache_gc;
 extern bool                  opt_zero;
 extern unsigned              opt_narenas;
+extern fxp_t                 opt_narenas_ratio;
 extern zero_realloc_action_t opt_zero_realloc_action;
 extern malloc_init_t         malloc_init_state;
 extern const char *const     zero_realloc_mode_names[];
diff --git a/src/conf.c b/src/conf.c
new file mode 100644
index 00000000..8a23bda6
--- /dev/null
+++ b/src/conf.c
@@ -0,0 +1,1228 @@
+#include "jemalloc/internal/jemalloc_preamble.h"
+#include "jemalloc/internal/jemalloc_internal_includes.h"
+
+#include "jemalloc/internal/assert.h"
+#include "jemalloc/internal/atomic.h"
+#include "jemalloc/internal/extent_dss.h"
+#include "jemalloc/internal/extent_mmap.h"
+#include "jemalloc/internal/fxp.h"
+#include "jemalloc/internal/log.h"
+#include "jemalloc/internal/malloc_io.h"
+#include "jemalloc/internal/mutex.h"
+#include "jemalloc/internal/nstime.h"
+#include "jemalloc/internal/safety_check.h"
+#include "jemalloc/internal/san.h"
+#include "jemalloc/internal/sc.h"
+#include "jemalloc/internal/util.h"
+
+#include "jemalloc/internal/conf.h"
+
+/* Whether encountered any invalid config options. */
+bool had_conf_error;
+
+static char *
+jemalloc_getenv(const char *name) {
+#ifdef JEMALLOC_FORCE_GETENV
+	return getenv(name);
+#else
+#	ifdef JEMALLOC_HAVE_SECURE_GETENV
+	return secure_getenv(name);
+#	else
+#		ifdef JEMALLOC_HAVE_ISSETUGID
+	if (issetugid() != 0) {
+		return NULL;
+	}
+#		endif
+	return getenv(name);
+#	endif
+#endif
+}
+
+static void
+init_opt_stats_opts(const char *v, size_t vlen, char *dest) {
+	size_t opts_len = strlen(dest);
+	assert(opts_len <= stats_print_tot_num_options);
+
+	for (size_t i = 0; i < vlen; i++) {
+		switch (v[i]) {
+#define OPTION(o, v, d, s)                                                     \
+	case o:                                                                \
+		break;
+			STATS_PRINT_OPTIONS
+#undef OPTION
+		default:
+			continue;
+		}
+
+		if (strchr(dest, v[i]) != NULL) {
+			/* Ignore repeated. */
+			continue;
+		}
+
+		dest[opts_len++] = v[i];
+		dest[opts_len] = '\0';
+		assert(opts_len <= stats_print_tot_num_options);
+	}
+	assert(opts_len == strlen(dest));
+}
+
+static void
+malloc_conf_format_error(const char *msg, const char *begin, const char *end) {
+	size_t len = end - begin + 1;
+	len = len > BUFERROR_BUF ? BUFERROR_BUF : len;
+
+	malloc_printf("<jemalloc>: %s -- %.*s\n", msg, (int)len, begin);
+}
+
+JET_EXTERN bool
+conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
+    char const **v_p, size_t *vlen_p) {
+	bool        accept;
+	const char *opts = *opts_p;
+
+	*k_p = opts;
+
+	for (accept = false; !accept;) {
+		switch (*opts) {
+		case 'A':
+		case 'B':
+		case 'C':
+		case 'D':
+		case 'E':
+		case 'F':
+		case 'G':
+		case 'H':
+		case 'I':
+		case 'J':
+		case 'K':
+		case 'L':
+		case 'M':
+		case 'N':
+		case 'O':
+		case 'P':
+		case 'Q':
+		case 'R':
+		case 'S':
+		case 'T':
+		case 'U':
+		case 'V':
+		case 'W':
+		case 'X':
+		case 'Y':
+		case 'Z':
+		case 'a':
+		case 'b':
+		case 'c':
+		case 'd':
+		case 'e':
+		case 'f':
+		case 'g':
+		case 'h':
+		case 'i':
+		case 'j':
+		case 'k':
+		case 'l':
+		case 'm':
+		case 'n':
+		case 'o':
+		case 'p':
+		case 'q':
+		case 'r':
+		case 's':
+		case 't':
+		case 'u':
+		case 'v':
+		case 'w':
+		case 'x':
+		case 'y':
+		case 'z':
+		case '0':
+		case '1':
+		case '2':
+		case '3':
+		case '4':
+		case '5':
+		case '6':
+		case '7':
+		case '8':
+		case '9':
+		case '_':
+			opts++;
+			break;
+		case ':':
+			opts++;
+			*klen_p = (uintptr_t)opts - 1 - (uintptr_t)*k_p;
+			*v_p = opts;
+			accept = true;
+			break;
+		case '\0':
+			if (opts != *opts_p) {
+				malloc_conf_format_error(
+				    "Conf string ends with key", *opts_p,
+				    opts - 1);
+				had_conf_error = true;
+			}
+			return true;
+		default:
+			malloc_conf_format_error(
+			    "Malformed conf string", *opts_p, opts);
+			had_conf_error = true;
+			return true;
+		}
+	}
+
+	for (accept = false; !accept;) {
+		switch (*opts) {
+		case ',':
+			opts++;
+			/*
+			 * Look ahead one character here, because the next time
+			 * this function is called, it will assume that end of
+			 * input has been cleanly reached if no input remains,
+			 * but we have optimistically already consumed the
+			 * comma if one exists.
+			 */
+			if (*opts == '\0') {
+				malloc_conf_format_error(
+				    "Conf string ends with comma", *opts_p,
+				    opts - 1);
+				had_conf_error = true;
+			}
+			*vlen_p = (uintptr_t)opts - 1 - (uintptr_t)*v_p;
+			accept = true;
+			break;
+		case '\0':
+			*vlen_p = (uintptr_t)opts - (uintptr_t)*v_p;
+			accept = true;
+			break;
+		default:
+			opts++;
+			break;
+		}
+	}
+
+	*opts_p = opts;
+	return false;
+}
+
+void
+malloc_abort_invalid_conf(void) {
+	assert(opt_abort_conf);
+	malloc_printf(
+	    "<jemalloc>: Abort (abort_conf:true) on invalid conf "
+	    "value (see above).\n");
+	invalid_conf_abort();
+}
+
+JET_EXTERN void
+conf_error(
+    const char *msg, const char *k, size_t klen, const char *v, size_t vlen) {
+	malloc_printf(
+	    "<jemalloc>: %s: %.*s:%.*s\n", msg, (int)klen, k, (int)vlen, v);
+	/* If abort_conf is set, error out after processing all options. */
+	const char *experimental = "experimental_";
+	if (strncmp(k, experimental, strlen(experimental)) == 0) {
+		/* However, tolerate experimental features. */
+		return;
+	}
+	const char  *deprecated[] = {"hpa_sec_bytes_after_flush"};
+	const size_t deprecated_cnt = (sizeof(deprecated)
+	    / sizeof(deprecated[0]));
+	for (size_t i = 0; i < deprecated_cnt; ++i) {
+		if (strncmp(k, deprecated[i], strlen(deprecated[i])) == 0) {
+			/* Tolerate deprecated features. */
+			return;
+		}
+	}
+	had_conf_error = true;
+}
+
+JET_EXTERN bool
+conf_handle_bool(const char *v, size_t vlen, bool *result) {
+	if (sizeof("true") - 1 == vlen && strncmp("true", v, vlen) == 0) {
+		*result = true;
+	} else if (sizeof("false") - 1 == vlen
+	    && strncmp("false", v, vlen) == 0) {
+		*result = false;
+	} else {
+		return true;
+	}
+	return false;
+}
+
+JEMALLOC_DIAGNOSTIC_PUSH
+JEMALLOC_DIAGNOSTIC_IGNORE("-Wunused-function")
+
+JET_EXTERN bool
+conf_handle_unsigned(const char *v, size_t vlen,
+    uintmax_t min, uintmax_t max, bool check_min, bool check_max,
+    bool clip, uintmax_t *result) {
+	char *end;
+	set_errno(0);
+	uintmax_t mv = (uintmax_t)malloc_strtoumax(v, &end, 0);
+	if (get_errno() != 0 || (uintptr_t)end - (uintptr_t)v != vlen) {
+		return true;
+	}
+	if (clip) {
+		if (check_min && mv < min) {
+			*result = min;
+		} else if (check_max && mv > max) {
+			*result = max;
+		} else {
+			*result = mv;
+		}
+	} else {
+		if ((check_min && mv < min) || (check_max && mv > max)) {
+			return true;
+		}
+		*result = mv;
+	}
+	return false;
+}
+
+JET_EXTERN bool
+conf_handle_signed(const char *v, size_t vlen,
+    intmax_t min, intmax_t max, bool check_min, bool check_max,
+    bool clip, intmax_t *result) {
+	char *end;
+	set_errno(0);
+	intmax_t mv = (intmax_t)malloc_strtoumax(v, &end, 0);
+	if (get_errno() != 0 || (uintptr_t)end - (uintptr_t)v != vlen) {
+		return true;
+	}
+	if (clip) {
+		if (check_min && mv < min) {
+			*result = min;
+		} else if (check_max && mv > max) {
+			*result = max;
+		} else {
+			*result = mv;
+		}
+	} else {
+		if ((check_min && mv < min) || (check_max && mv > max)) {
+			return true;
+		}
+		*result = mv;
+	}
+	return false;
+}
+
+JET_EXTERN bool
+conf_handle_char_p(const char *v, size_t vlen, char *dest, size_t dest_sz) {
+	size_t cpylen = (vlen <= dest_sz - 1) ? vlen : dest_sz - 1;
+	strncpy(dest, v, cpylen);
+	dest[cpylen] = '\0';
+	return false;
+}
+
+JEMALLOC_DIAGNOSTIC_POP
+
+/* Number of sources for initializing malloc_conf */
+#define MALLOC_CONF_NSOURCES 5
+
+static const char *
+obtain_malloc_conf(unsigned which_source, char readlink_buf[PATH_MAX + 1]) {
+	if (config_debug) {
+		static unsigned read_source = 0;
+		/*
+		 * Each source should only be read once, to minimize # of
+		 * syscalls on init.
+		 */
+		assert(read_source == which_source);
+		read_source++;
+	}
+	assert(which_source < MALLOC_CONF_NSOURCES);
+
+	const char *ret;
+	switch (which_source) {
+	case 0:
+		ret = config_malloc_conf;
+		break;
+	case 1:
+		if (je_malloc_conf != NULL) {
+			/* Use options that were compiled into the program. */
+			ret = je_malloc_conf;
+		} else {
+			/* No configuration specified. */
+			ret = NULL;
+		}
+		break;
+	case 2: {
+#ifndef JEMALLOC_CONFIG_FILE
+		ret = NULL;
+		break;
+#else
+		ssize_t linklen = 0;
+#	ifndef _WIN32
+		int         saved_errno = errno;
+		const char *linkname =
+#		ifdef JEMALLOC_PREFIX
+		    "/etc/" JEMALLOC_PREFIX "malloc.conf"
+#		else
+		    "/etc/malloc.conf"
+#		endif
+		    ;
+
+		/*
+		 * Try to use the contents of the "/etc/malloc.conf" symbolic
+		 * link's name.
+		 */
+#		ifndef JEMALLOC_READLINKAT
+		linklen = readlink(linkname, readlink_buf, PATH_MAX);
+#		else
+		linklen = readlinkat(
+		    AT_FDCWD, linkname, readlink_buf, PATH_MAX);
+#		endif
+		if (linklen == -1) {
+			/* No configuration specified. */
+			linklen = 0;
+			/* Restore errno. */
+			set_errno(saved_errno);
+		}
+#	endif
+		readlink_buf[linklen] = '\0';
+		ret = readlink_buf;
+		break;
+#endif
+	}
+	case 3: {
+#ifndef JEMALLOC_CONFIG_ENV
+		ret = NULL;
+		break;
+#else
+		const char *envname =
+#	ifdef JEMALLOC_PREFIX
+		    JEMALLOC_CPREFIX "MALLOC_CONF"
+#	else
+		    "MALLOC_CONF"
+#	endif
+		    ;
+
+		if ((ret = jemalloc_getenv(envname)) != NULL) {
+			opt_malloc_conf_env_var = ret;
+		} else {
+			/* No configuration specified. */
+			ret = NULL;
+		}
+		break;
+#endif
+	}
+	case 4: {
+		ret = je_malloc_conf_2_conf_harder;
+		break;
+	}
+	default:
+		not_reached();
+		ret = NULL;
+	}
+	return ret;
+}
+
+static void
+validate_hpa_settings(void) {
+	if (!hpa_supported() || !opt_hpa) {
+		return;
+	}
+	if (HUGEPAGE > HUGEPAGE_MAX_EXPECTED_SIZE) {
+		had_conf_error = true;
+		malloc_printf(
+		    "<jemalloc>: huge page size (%zu) greater than expected."
+		    "May not be supported or behave as expected.",
+		    HUGEPAGE);
+	}
+#ifndef JEMALLOC_HAVE_MADVISE_COLLAPSE
+	if (opt_hpa_opts.hugify_sync) {
+		had_conf_error = true;
+		malloc_printf(
+		    "<jemalloc>: hpa_hugify_sync config option is enabled, "
+		    "but MADV_COLLAPSE support was not detected at build "
+		    "time.");
+	}
+#endif
+}
+
+static void
+malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
+    bool initial_call, const char *opts_cache[MALLOC_CONF_NSOURCES],
+    char readlink_buf[PATH_MAX + 1]) {
+	static const char *opts_explain[MALLOC_CONF_NSOURCES] = {
+	    "string specified via --with-malloc-conf",
+	    "string pointed to by the global variable malloc_conf",
+	    "\"name\" of the file referenced by the symbolic link named "
+	    "/etc/malloc.conf",
+	    "value of the environment variable MALLOC_CONF",
+	    "string pointed to by the global variable "
+	    "malloc_conf_2_conf_harder",
+	};
+	unsigned    i;
+	const char *opts, *k, *v;
+	size_t      klen, vlen;
+
+	for (i = 0; i < MALLOC_CONF_NSOURCES; i++) {
+		/* Get runtime configuration. */
+		if (initial_call) {
+			opts_cache[i] = obtain_malloc_conf(i, readlink_buf);
+		}
+		opts = opts_cache[i];
+		if (!initial_call && opt_confirm_conf) {
+			malloc_printf(
+			    "<jemalloc>: malloc_conf #%u (%s): \"%s\"\n", i + 1,
+			    opts_explain[i], opts != NULL ? opts : "");
+		}
+		if (opts == NULL) {
+			continue;
+		}
+
+		while (*opts != '\0'
+		    && !conf_next(&opts, &k, &klen, &v, &vlen)) {
+#define CONF_ERROR(msg, k, klen, v, vlen)                                      \
+	if (!initial_call) {                                                   \
+		conf_error(msg, k, klen, v, vlen);                      \
+		cur_opt_valid = false;                                         \
+	}
+#define CONF_CONTINUE                                                          \
+	{                                                                      \
+		if (!initial_call && opt_confirm_conf && cur_opt_valid) {      \
+			malloc_printf(                                         \
+			    "<jemalloc>: -- "                                  \
+			    "Set conf value: %.*s:%.*s"                        \
+			    "\n",                                              \
+			    (int)klen, k, (int)vlen, v);                       \
+		}                                                              \
+		continue;                                                      \
+	}
+#define CONF_MATCH(n) (sizeof(n) - 1 == klen && strncmp(n, k, klen) == 0)
+#define CONF_MATCH_VALUE(n) (sizeof(n) - 1 == vlen && strncmp(n, v, vlen) == 0)
+#define CONF_HANDLE_BOOL(o, n)                                                 \
+	if (CONF_MATCH(n)) {                                                   \
+		if (conf_handle_bool(v, vlen, &o)) {                           \
+			CONF_ERROR("Invalid conf value", k, klen, v, vlen);    \
+		}                                                              \
+		CONF_CONTINUE;                                                 \
+	}
+			/*
+       * One of the CONF_MIN macros below expands, in one of the use points,
+       * to "unsigned integer < 0", which is always false, triggering the
+       * GCC -Wtype-limits warning, which we disable here and re-enable below.
+       */
+			JEMALLOC_DIAGNOSTIC_PUSH
+			JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
+
+#define CONF_DONT_CHECK_MIN(um, min) false
+#define CONF_CHECK_MIN(um, min) ((um) < (min))
+#define CONF_DONT_CHECK_MAX(um, max) false
+#define CONF_CHECK_MAX(um, max) ((um) > (max))
+
+#define CONF_VALUE_READ(max_t, result)                                         \
+	char *end;                                                             \
+	set_errno(0);                                                          \
+	result = (max_t)malloc_strtoumax(v, &end, 0);
+#define CONF_VALUE_READ_FAIL()                                                 \
+	(get_errno() != 0 || (uintptr_t)end - (uintptr_t)v != vlen)
+
+#define CONF_HANDLE_T(t, max_t, o, n, min, max, check_min, check_max, clip)    \
+	if (CONF_MATCH(n)) {                                                   \
+		max_t mv;                                                      \
+		CONF_VALUE_READ(max_t, mv)                                     \
+		if (CONF_VALUE_READ_FAIL()) {                                  \
+			CONF_ERROR("Invalid conf value", k, klen, v, vlen);    \
+		} else if (clip) {                                             \
+			if (check_min(mv, (t)(min))) {                         \
+				o = (t)(min);                                  \
+			} else if (check_max(mv, (t)(max))) {                  \
+				o = (t)(max);                                  \
+			} else {                                               \
+				o = (t)mv;                                     \
+			}                                                      \
+		} else {                                                       \
+			if (check_min(mv, (t)(min))                            \
+			    || check_max(mv, (t)(max))) {                      \
+				CONF_ERROR(                                    \
+				    "Out-of-range "                            \
+				    "conf value",                              \
+				    k, klen, v, vlen);                         \
+			} else {                                               \
+				o = (t)mv;                                     \
+			}                                                      \
+		}                                                              \
+		CONF_CONTINUE;                                                 \
+	}
+#define CONF_HANDLE_T_U(t, o, n, min, max, check_min, check_max, clip)         \
+	CONF_HANDLE_T(t, uintmax_t, o, n, min, max, check_min, check_max, clip)
+#define CONF_HANDLE_T_SIGNED(t, o, n, min, max, check_min, check_max, clip)    \
+	CONF_HANDLE_T(t, intmax_t, o, n, min, max, check_min, check_max, clip)
+
+#define CONF_HANDLE_UNSIGNED(o, n, min, max, check_min, check_max, clip)       \
+	CONF_HANDLE_T_U(unsigned, o, n, min, max, check_min, check_max, clip)
+#define CONF_HANDLE_SIZE_T(o, n, min, max, check_min, check_max, clip)         \
+	CONF_HANDLE_T_U(size_t, o, n, min, max, check_min, check_max, clip)
+#define CONF_HANDLE_INT64_T(o, n, min, max, check_min, check_max, clip)        \
+	CONF_HANDLE_T_SIGNED(                                                  \
+	    int64_t, o, n, min, max, check_min, check_max, clip)
+#define CONF_HANDLE_UINT64_T(o, n, min, max, check_min, check_max, clip)       \
+	CONF_HANDLE_T_U(uint64_t, o, n, min, max, check_min, check_max, clip)
+#define CONF_HANDLE_SSIZE_T(o, n, min, max)                                    \
+	CONF_HANDLE_T_SIGNED(                                                  \
+	    ssize_t, o, n, min, max, CONF_CHECK_MIN, CONF_CHECK_MAX, false)
+#define CONF_HANDLE_CHAR_P(o, n, d)                                            \
+	if (CONF_MATCH(n)) {                                                   \
+		size_t cpylen = (vlen <= sizeof(o) - 1) ? vlen                 \
+		                                        : sizeof(o) - 1;       \
+		strncpy(o, v, cpylen);                                         \
+		o[cpylen] = '\0';                                              \
+		CONF_CONTINUE;                                                 \
+	}
+
+			bool cur_opt_valid = true;
+
+			CONF_HANDLE_BOOL(opt_confirm_conf, "confirm_conf")
+			if (initial_call) {
+				continue;
+			}
+
+			CONF_HANDLE_BOOL(opt_abort, "abort")
+			CONF_HANDLE_BOOL(opt_abort_conf, "abort_conf")
+			CONF_HANDLE_BOOL(opt_cache_oblivious, "cache_oblivious")
+			CONF_HANDLE_BOOL(opt_trust_madvise, "trust_madvise")
+			CONF_HANDLE_BOOL(
+			    opt_experimental_hpa_start_huge_if_thp_always,
+			    "experimental_hpa_start_huge_if_thp_always")
+			CONF_HANDLE_BOOL(opt_experimental_hpa_enforce_hugify,
+			    "experimental_hpa_enforce_hugify")
+			CONF_HANDLE_BOOL(
+			    opt_huge_arena_pac_thp, "huge_arena_pac_thp")
+			if (strncmp("metadata_thp", k, klen) == 0) {
+				int  m;
+				bool match = false;
+				for (m = 0; m < metadata_thp_mode_limit; m++) {
+					if (strncmp(metadata_thp_mode_names[m],
+					        v, vlen)
+					    == 0) {
+						opt_metadata_thp = m;
+						match = true;
+						break;
+					}
+				}
+				if (!match) {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				}
+				CONF_CONTINUE;
+			}
+			CONF_HANDLE_BOOL(opt_retain, "retain")
+			if (strncmp("dss", k, klen) == 0) {
+				int  m;
+				bool match = false;
+				for (m = 0; m < dss_prec_limit; m++) {
+					if (strncmp(dss_prec_names[m], v, vlen)
+					    == 0) {
+						if (extent_dss_prec_set(m)) {
+							CONF_ERROR(
+							    "Error setting dss",
+							    k, klen, v, vlen);
+						} else {
+							opt_dss =
+							    dss_prec_names[m];
+							match = true;
+							break;
+						}
+					}
+				}
+				if (!match) {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				}
+				CONF_CONTINUE;
+			}
+			if (CONF_MATCH("narenas")) {
+				if (CONF_MATCH_VALUE("default")) {
+					opt_narenas = 0;
+					CONF_CONTINUE;
+				} else {
+					CONF_HANDLE_UNSIGNED(opt_narenas,
+					    "narenas", 1, UINT_MAX,
+					    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX,
+					    /* clip */ false)
+				}
+			}
+			if (CONF_MATCH("narenas_ratio")) {
+				char *end;
+				bool  err = fxp_parse(
+                                    &opt_narenas_ratio, v, &end);
+				if (err || (size_t)(end - v) != vlen) {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				}
+				CONF_CONTINUE;
+			}
+			if (CONF_MATCH("bin_shards")) {
+				const char *bin_shards_segment_cur = v;
+				size_t      vlen_left = vlen;
+				do {
+					size_t size_start;
+					size_t size_end;
+					size_t nshards;
+					bool   err = multi_setting_parse_next(
+                                            &bin_shards_segment_cur, &vlen_left,
+                                            &size_start, &size_end, &nshards);
+					if (err
+					    || bin_update_shard_size(
+					        bin_shard_sizes, size_start,
+					        size_end, nshards)) {
+						CONF_ERROR(
+						    "Invalid settings for "
+						    "bin_shards",
+						    k, klen, v, vlen);
+						break;
+					}
+				} while (vlen_left > 0);
+				CONF_CONTINUE;
+			}
+			if (CONF_MATCH("tcache_ncached_max")) {
+				bool err = tcache_bin_info_default_init(
+				    v, vlen);
+				if (err) {
+					CONF_ERROR(
+					    "Invalid settings for "
+					    "tcache_ncached_max",
+					    k, klen, v, vlen);
+				}
+				CONF_CONTINUE;
+			}
+			CONF_HANDLE_INT64_T(opt_mutex_max_spin,
+			    "mutex_max_spin", -1, INT64_MAX, CONF_CHECK_MIN,
+			    CONF_DONT_CHECK_MAX, false);
+			CONF_HANDLE_SSIZE_T(opt_dirty_decay_ms,
+			    "dirty_decay_ms", -1,
+			    NSTIME_SEC_MAX * KQU(1000) < QU(SSIZE_MAX)
+			        ? NSTIME_SEC_MAX * KQU(1000)
+			        : SSIZE_MAX);
+			CONF_HANDLE_SSIZE_T(opt_muzzy_decay_ms,
+			    "muzzy_decay_ms", -1,
+			    NSTIME_SEC_MAX * KQU(1000) < QU(SSIZE_MAX)
+			        ? NSTIME_SEC_MAX * KQU(1000)
+			        : SSIZE_MAX);
+			CONF_HANDLE_SIZE_T(opt_process_madvise_max_batch,
+			    "process_madvise_max_batch", 0,
+			    PROCESS_MADVISE_MAX_BATCH_LIMIT,
+			    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
+			    /* clip */ true)
+			CONF_HANDLE_BOOL(opt_stats_print, "stats_print")
+			if (CONF_MATCH("stats_print_opts")) {
+				init_opt_stats_opts(
+				    v, vlen, opt_stats_print_opts);
+				CONF_CONTINUE;
+			}
+			CONF_HANDLE_INT64_T(opt_stats_interval,
+			    "stats_interval", -1, INT64_MAX, CONF_CHECK_MIN,
+			    CONF_DONT_CHECK_MAX, false)
+			if (CONF_MATCH("stats_interval_opts")) {
+				init_opt_stats_opts(
+				    v, vlen, opt_stats_interval_opts);
+				CONF_CONTINUE;
+			}
+			if (config_fill) {
+				if (CONF_MATCH("junk")) {
+					if (CONF_MATCH_VALUE("true")) {
+						opt_junk = "true";
+						opt_junk_alloc = opt_junk_free =
+						    true;
+					} else if (CONF_MATCH_VALUE("false")) {
+						opt_junk = "false";
+						opt_junk_alloc = opt_junk_free =
+						    false;
+					} else if (CONF_MATCH_VALUE("alloc")) {
+						opt_junk = "alloc";
+						opt_junk_alloc = true;
+						opt_junk_free = false;
+					} else if (CONF_MATCH_VALUE("free")) {
+						opt_junk = "free";
+						opt_junk_alloc = false;
+						opt_junk_free = true;
+					} else {
+						CONF_ERROR("Invalid conf value",
+						    k, klen, v, vlen);
+					}
+					CONF_CONTINUE;
+				}
+				CONF_HANDLE_BOOL(opt_zero, "zero")
+			}
+			if (config_utrace) {
+				CONF_HANDLE_BOOL(opt_utrace, "utrace")
+			}
+			if (config_xmalloc) {
+				CONF_HANDLE_BOOL(opt_xmalloc, "xmalloc")
+			}
+			if (config_enable_cxx) {
+				CONF_HANDLE_BOOL(
+				    opt_experimental_infallible_new,
+				    "experimental_infallible_new")
+			}
+
+			CONF_HANDLE_BOOL(opt_experimental_tcache_gc,
+			    "experimental_tcache_gc")
+			CONF_HANDLE_BOOL(opt_tcache, "tcache")
+			CONF_HANDLE_SIZE_T(opt_tcache_max, "tcache_max", 0,
+			    TCACHE_MAXCLASS_LIMIT, CONF_DONT_CHECK_MIN,
+			    CONF_CHECK_MAX, /* clip */ true)
+			if (CONF_MATCH("lg_tcache_max")) {
+				size_t m;
+				CONF_VALUE_READ(size_t, m)
+				if (CONF_VALUE_READ_FAIL()) {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				} else {
+					/* clip if necessary */
+					if (m > TCACHE_LG_MAXCLASS_LIMIT) {
+						m = TCACHE_LG_MAXCLASS_LIMIT;
+					}
+					opt_tcache_max = (size_t)1 << m;
+				}
+				CONF_CONTINUE;
+			}
+			/*
+			 * Anyone trying to set a value outside -16 to 16 is
+			 * deeply confused.
+			 */
+			CONF_HANDLE_SSIZE_T(opt_lg_tcache_nslots_mul,
+			    "lg_tcache_nslots_mul", -16, 16)
+			/* Ditto with values past 2048. */
+			CONF_HANDLE_UNSIGNED(opt_tcache_nslots_small_min,
+			    "tcache_nslots_small_min", 1, 2048, CONF_CHECK_MIN,
+			    CONF_CHECK_MAX, /* clip */ true)
+			CONF_HANDLE_UNSIGNED(opt_tcache_nslots_small_max,
+			    "tcache_nslots_small_max", 1, 2048, CONF_CHECK_MIN,
+			    CONF_CHECK_MAX, /* clip */ true)
+			CONF_HANDLE_UNSIGNED(opt_tcache_nslots_large,
+			    "tcache_nslots_large", 1, 2048, CONF_CHECK_MIN,
+			    CONF_CHECK_MAX, /* clip */ true)
+			CONF_HANDLE_SIZE_T(opt_tcache_gc_incr_bytes,
+			    "tcache_gc_incr_bytes", 1024, SIZE_T_MAX,
+			    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX,
+			    /* clip */ true)
+			CONF_HANDLE_SIZE_T(opt_tcache_gc_delay_bytes,
+			    "tcache_gc_delay_bytes", 0, SIZE_T_MAX,
+			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
+			    /* clip */ false)
+			CONF_HANDLE_UNSIGNED(opt_lg_tcache_flush_small_div,
+			    "lg_tcache_flush_small_div", 1, 16, CONF_CHECK_MIN,
+			    CONF_CHECK_MAX, /* clip */ true)
+			CONF_HANDLE_UNSIGNED(opt_lg_tcache_flush_large_div,
+			    "lg_tcache_flush_large_div", 1, 16, CONF_CHECK_MIN,
+			    CONF_CHECK_MAX, /* clip */ true)
+			CONF_HANDLE_UNSIGNED(opt_debug_double_free_max_scan,
+			    "debug_double_free_max_scan", 0, UINT_MAX,
+			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
+			    /* clip */ false)
+			CONF_HANDLE_SIZE_T(opt_calloc_madvise_threshold,
+			    "calloc_madvise_threshold", 0, SC_LARGE_MAXCLASS,
+			    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
+			    /* clip */ false)
+
+			/*
+			 * The runtime option of oversize_threshold remains
+			 * undocumented.  It may be tweaked in the next major
+			 * release (6.0).  The default value 8M is rather
+			 * conservative / safe.  Tuning it further down may
+			 * improve fragmentation a bit more, but may also cause
+			 * contention on the huge arena.
+			 */
+			CONF_HANDLE_SIZE_T(opt_oversize_threshold,
+			    "oversize_threshold", 0, SC_LARGE_MAXCLASS,
+			    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX, false)
+			CONF_HANDLE_SIZE_T(opt_lg_extent_max_active_fit,
+			    "lg_extent_max_active_fit", 0,
+			    (sizeof(size_t) << 3), CONF_DONT_CHECK_MIN,
+			    CONF_CHECK_MAX, false)
+
+			if (strncmp("percpu_arena", k, klen) == 0) {
+				bool match = false;
+				for (int m = percpu_arena_mode_names_base;
+				    m < percpu_arena_mode_names_limit; m++) {
+					if (strncmp(percpu_arena_mode_names[m],
+					        v, vlen)
+					    == 0) {
+						if (!have_percpu_arena) {
+							CONF_ERROR(
+							    "No getcpu support",
+							    k, klen, v, vlen);
+						}
+						opt_percpu_arena = m;
+						match = true;
+						break;
+					}
+				}
+				if (!match) {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				}
+				CONF_CONTINUE;
+			}
+			CONF_HANDLE_BOOL(
+			    opt_background_thread, "background_thread");
+			CONF_HANDLE_SIZE_T(opt_max_background_threads,
+			    "max_background_threads", 1,
+			    opt_max_background_threads, CONF_CHECK_MIN,
+			    CONF_CHECK_MAX, true);
+			CONF_HANDLE_BOOL(opt_hpa, "hpa")
+			CONF_HANDLE_SIZE_T(opt_hpa_opts.slab_max_alloc,
+			    "hpa_slab_max_alloc", PAGE, HUGEPAGE,
+			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
+
+			/*
+			 * Accept either a ratio-based or an exact hugification
+			 * threshold.
+			 */
+			CONF_HANDLE_SIZE_T(opt_hpa_opts.hugification_threshold,
+			    "hpa_hugification_threshold", PAGE, HUGEPAGE,
+			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
+			if (CONF_MATCH("hpa_hugification_threshold_ratio")) {
+				fxp_t ratio;
+				char *end;
+				bool  err = fxp_parse(&ratio, v, &end);
+				if (err || (size_t)(end - v) != vlen
+				    || ratio > FXP_INIT_INT(1)) {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				} else {
+					opt_hpa_opts.hugification_threshold =
+					    fxp_mul_frac(HUGEPAGE, ratio);
+				}
+				CONF_CONTINUE;
+			}
+
+			CONF_HANDLE_UINT64_T(opt_hpa_opts.hugify_delay_ms,
+			    "hpa_hugify_delay_ms", 0, 0, CONF_DONT_CHECK_MIN,
+			    CONF_DONT_CHECK_MAX, false);
+
+			CONF_HANDLE_BOOL(
+			    opt_hpa_opts.hugify_sync, "hpa_hugify_sync");
+
+			CONF_HANDLE_UINT64_T(opt_hpa_opts.min_purge_interval_ms,
+			    "hpa_min_purge_interval_ms", 0, 0,
+			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false);
+
+			CONF_HANDLE_SSIZE_T(
+			    opt_hpa_opts.experimental_max_purge_nhp,
+			    "experimental_hpa_max_purge_nhp", -1, SSIZE_MAX);
+
+			/*
+			 * Accept either a ratio-based or an exact purge
+			 * threshold.
+			 */
+			CONF_HANDLE_SIZE_T(opt_hpa_opts.purge_threshold,
+			    "hpa_purge_threshold", PAGE, HUGEPAGE,
+			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
+			if (CONF_MATCH("hpa_purge_threshold_ratio")) {
+				fxp_t ratio;
+				char *end;
+				bool  err = fxp_parse(&ratio, v, &end);
+				if (err || (size_t)(end - v) != vlen
+				    || ratio > FXP_INIT_INT(1)) {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				} else {
+					opt_hpa_opts.purge_threshold =
+					    fxp_mul_frac(HUGEPAGE, ratio);
+				}
+				CONF_CONTINUE;
+			}
+
+			CONF_HANDLE_UINT64_T(opt_hpa_opts.min_purge_delay_ms,
+			    "hpa_min_purge_delay_ms", 0, UINT64_MAX,
+			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false);
+
+			if (strncmp("hpa_hugify_style", k, klen) == 0) {
+				bool match = false;
+				for (int m = 0; m < hpa_hugify_style_limit;
+				    m++) {
+					if (strncmp(hpa_hugify_style_names[m],
+					        v, vlen)
+					    == 0) {
+						opt_hpa_opts.hugify_style = m;
+						match = true;
+						break;
+					}
+				}
+				if (!match) {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				}
+				CONF_CONTINUE;
+			}
+
+			if (CONF_MATCH("hpa_dirty_mult")) {
+				if (CONF_MATCH_VALUE("-1")) {
+					opt_hpa_opts.dirty_mult = (fxp_t)-1;
+					CONF_CONTINUE;
+				}
+				fxp_t ratio;
+				char *end;
+				bool  err = fxp_parse(&ratio, v, &end);
+				if (err || (size_t)(end - v) != vlen) {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				} else {
+					opt_hpa_opts.dirty_mult = ratio;
+				}
+				CONF_CONTINUE;
+			}
+			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.nshards,
+			    "hpa_sec_nshards", 0, 0, CONF_CHECK_MIN,
+			    CONF_DONT_CHECK_MAX, true);
+			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.max_alloc,
+			    "hpa_sec_max_alloc", PAGE,
+			    USIZE_GROW_SLOW_THRESHOLD, CONF_CHECK_MIN,
+			    CONF_CHECK_MAX, true);
+			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.max_bytes,
+			    "hpa_sec_max_bytes", SEC_OPTS_MAX_BYTES_DEFAULT, 0,
+			    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
+			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.batch_fill_extra,
+			    "hpa_sec_batch_fill_extra", 1, HUGEPAGE_PAGES,
+			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
+
+			if (CONF_MATCH("slab_sizes")) {
+				if (CONF_MATCH_VALUE("default")) {
+					sc_data_init(sc_data);
+					CONF_CONTINUE;
+				}
+				bool        err;
+				const char *slab_size_segment_cur = v;
+				size_t      vlen_left = vlen;
+				do {
+					size_t slab_start;
+					size_t slab_end;
+					size_t pgs;
+					err = multi_setting_parse_next(
+					    &slab_size_segment_cur, &vlen_left,
+					    &slab_start, &slab_end, &pgs);
+					if (!err) {
+						sc_data_update_slab_size(
+						    sc_data, slab_start,
+						    slab_end, (int)pgs);
+					} else {
+						CONF_ERROR(
+						    "Invalid settings "
+						    "for slab_sizes",
+						    k, klen, v, vlen);
+					}
+				} while (!err && vlen_left > 0);
+				CONF_CONTINUE;
+			}
+			if (config_prof) {
+				CONF_HANDLE_BOOL(opt_prof, "prof")
+				CONF_HANDLE_CHAR_P(
+				    opt_prof_prefix, "prof_prefix", "jeprof")
+				CONF_HANDLE_BOOL(opt_prof_active, "prof_active")
+				CONF_HANDLE_BOOL(opt_prof_thread_active_init,
+				    "prof_thread_active_init")
+				CONF_HANDLE_SIZE_T(opt_lg_prof_sample,
+				    "lg_prof_sample", 0,
+				    (sizeof(uint64_t) << 3) - 1,
+				    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX, true)
+				CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum")
+				CONF_HANDLE_UNSIGNED(opt_prof_bt_max,
+				    "prof_bt_max", 1, PROF_BT_MAX_LIMIT,
+				    CONF_CHECK_MIN, CONF_CHECK_MAX,
+				    /* clip */ true)
+				CONF_HANDLE_SSIZE_T(opt_lg_prof_interval,
+				    "lg_prof_interval", -1,
+				    (sizeof(uint64_t) << 3) - 1)
+				CONF_HANDLE_BOOL(opt_prof_gdump, "prof_gdump")
+				CONF_HANDLE_BOOL(opt_prof_final, "prof_final")
+				CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak")
+				CONF_HANDLE_BOOL(
+				    opt_prof_leak_error, "prof_leak_error")
+				CONF_HANDLE_BOOL(opt_prof_log, "prof_log")
+				CONF_HANDLE_BOOL(opt_prof_pid_namespace,
+				    "prof_pid_namespace")
+				CONF_HANDLE_SSIZE_T(opt_prof_recent_alloc_max,
+				    "prof_recent_alloc_max", -1, SSIZE_MAX)
+				CONF_HANDLE_BOOL(opt_prof_stats, "prof_stats")
+				CONF_HANDLE_BOOL(opt_prof_sys_thread_name,
+				    "prof_sys_thread_name")
+				if (CONF_MATCH("prof_time_resolution")) {
+					if (CONF_MATCH_VALUE("default")) {
+						opt_prof_time_res =
+						    prof_time_res_default;
+					} else if (CONF_MATCH_VALUE("high")) {
+						if (!config_high_res_timer) {
+							CONF_ERROR(
+							    "No high resolution"
+							    " timer support",
+							    k, klen, v, vlen);
+						} else {
+							opt_prof_time_res =
+							    prof_time_res_high;
+						}
+					} else {
+						CONF_ERROR("Invalid conf value",
+						    k, klen, v, vlen);
+					}
+					CONF_CONTINUE;
+				}
+				/*
+				 * Undocumented.  When set to false, don't
+				 * correct for an unbiasing bug in jeprof
+				 * attribution.  This can be handy if you want
+				 * to get consistent numbers from your binary
+				 * across different jemalloc versions, even if
+				 * those numbers are incorrect.  The default is
+				 * true.
+				 */
+				CONF_HANDLE_BOOL(opt_prof_unbias, "prof_unbias")
+			}
+			if (config_log) {
+				if (CONF_MATCH("log")) {
+					size_t cpylen = (vlen
+					            <= sizeof(log_var_names)
+					        ? vlen
+					        : sizeof(log_var_names) - 1);
+					strncpy(log_var_names, v, cpylen);
+					log_var_names[cpylen] = '\0';
+					CONF_CONTINUE;
+				}
+			}
+			if (CONF_MATCH("thp")) {
+				bool match = false;
+				for (int m = 0; m < thp_mode_names_limit; m++) {
+					if (strncmp(thp_mode_names[m], v, vlen)
+					    == 0) {
+						if (!have_madvise_huge
+						    && !have_memcntl) {
+							CONF_ERROR(
+							    "No THP support", k,
+							    klen, v, vlen);
+						}
+						opt_thp = m;
+						match = true;
+						break;
+					}
+				}
+				if (!match) {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				}
+				CONF_CONTINUE;
+			}
+			if (CONF_MATCH("zero_realloc")) {
+				if (CONF_MATCH_VALUE("alloc")) {
+					opt_zero_realloc_action =
+					    zero_realloc_action_alloc;
+				} else if (CONF_MATCH_VALUE("free")) {
+					opt_zero_realloc_action =
+					    zero_realloc_action_free;
+				} else if (CONF_MATCH_VALUE("abort")) {
+					opt_zero_realloc_action =
+					    zero_realloc_action_abort;
+				} else {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				}
+				CONF_CONTINUE;
+			}
+			if (config_uaf_detection
+			    && CONF_MATCH("lg_san_uaf_align")) {
+				ssize_t a;
+				CONF_VALUE_READ(ssize_t, a)
+				if (CONF_VALUE_READ_FAIL() || a < -1) {
+					CONF_ERROR("Invalid conf value", k,
+					    klen, v, vlen);
+				}
+				if (a == -1) {
+					opt_lg_san_uaf_align = -1;
+					CONF_CONTINUE;
+				}
+
+				/* clip if necessary */
+				ssize_t max_allowed = (sizeof(size_t) << 3) - 1;
+				ssize_t min_allowed = LG_PAGE;
+				if (a > max_allowed) {
+					a = max_allowed;
+				} else if (a < min_allowed) {
+					a = min_allowed;
+				}
+
+				opt_lg_san_uaf_align = a;
+				CONF_CONTINUE;
+			}
+
+			CONF_HANDLE_SIZE_T(opt_san_guard_small,
+			    "san_guard_small", 0, SIZE_T_MAX,
+			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false)
+			CONF_HANDLE_SIZE_T(opt_san_guard_large,
+			    "san_guard_large", 0, SIZE_T_MAX,
+			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false)
+
+			/*
+			 * Disable large size classes is now the default
+			 * behavior in jemalloc.  Although it is configurable
+			 * in MALLOC_CONF, this is mainly for debugging
+			 * purposes and should not be tuned.
+			 */
+			CONF_HANDLE_BOOL(opt_disable_large_size_classes,
+			    "disable_large_size_classes");
+
+			CONF_ERROR("Invalid conf pair", k, klen, v, vlen);
+#undef CONF_ERROR
+#undef CONF_CONTINUE
+#undef CONF_MATCH
+#undef CONF_MATCH_VALUE
+#undef CONF_HANDLE_BOOL
+#undef CONF_DONT_CHECK_MIN
+#undef CONF_CHECK_MIN
+#undef CONF_DONT_CHECK_MAX
+#undef CONF_CHECK_MAX
+#undef CONF_HANDLE_T
+#undef CONF_HANDLE_T_U
+#undef CONF_HANDLE_T_SIGNED
+#undef CONF_HANDLE_UNSIGNED
+#undef CONF_HANDLE_SIZE_T
+#undef CONF_HANDLE_SSIZE_T
+#undef CONF_HANDLE_CHAR_P
+			/* Re-enable diagnostic "-Wtype-limits" */
+			JEMALLOC_DIAGNOSTIC_POP
+		}
+		validate_hpa_settings();
+		if (opt_abort_conf && had_conf_error) {
+			malloc_abort_invalid_conf();
+		}
+	}
+	atomic_store_b(&log_init_done, true, ATOMIC_RELEASE);
+}
+
+static bool
+malloc_conf_init_check_deps(void) {
+	if (opt_prof_leak_error && !opt_prof_final) {
+		malloc_printf(
+		    "<jemalloc>: prof_leak_error is set w/o "
+		    "prof_final.\n");
+		return true;
+	}
+	/* To emphasize in the stats output that opt is disabled when !debug. */
+	if (!config_debug) {
+		opt_debug_double_free_max_scan = 0;
+	}
+
+	return false;
+}
+
+void
+malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
+    char readlink_buf[PATH_MAX + 1]) {
+	const char *opts_cache[MALLOC_CONF_NSOURCES] = {
+	    NULL, NULL, NULL, NULL, NULL};
+
+	/* The first call only set the confirm_conf option and opts_cache */
+	malloc_conf_init_helper(NULL, NULL, true, opts_cache, readlink_buf);
+	malloc_conf_init_helper(
+	    sc_data, bin_shard_sizes, false, opts_cache, NULL);
+	if (malloc_conf_init_check_deps()) {
+		/* check_deps does warning msg only; abort below if needed. */
+		if (opt_abort_conf) {
+			malloc_abort_invalid_conf();
+		}
+	}
+}
+
+#undef MALLOC_CONF_NSOURCES
diff --git a/src/jemalloc.c b/src/jemalloc.c
index d82788eb..8d341ba3 100644
--- a/src/jemalloc.c
+++ b/src/jemalloc.c
@@ -25,6 +25,8 @@
 #include "jemalloc/internal/thread_event.h"
 #include "jemalloc/internal/util.h"
 
+#include "jemalloc/internal/conf.h"
+
 /******************************************************************************/
 /* Data. */
 
@@ -165,7 +167,7 @@ bool         opt_experimental_infallible_new = false;
 bool         opt_experimental_tcache_gc = true;
 bool         opt_zero = false;
 unsigned     opt_narenas = 0;
-static fxp_t opt_narenas_ratio = FXP_INIT_INT(4);
+fxp_t opt_narenas_ratio = FXP_INIT_INT(4);
 
 unsigned ncpus;
 
@@ -291,8 +293,6 @@ typedef struct {
 #	define UTRACE(a, b, c)
 #endif
 
-/* Whether encountered any invalid config options. */
-static bool had_conf_error = false;
 
 /******************************************************************************/
 /*
@@ -733,24 +733,6 @@ check_entry_exit_locking(tsdn_t *tsdn) {
  * Begin initialization functions.
  */
 
-static char *
-jemalloc_getenv(const char *name) {
-#ifdef JEMALLOC_FORCE_GETENV
-	return getenv(name);
-#else
-#	ifdef JEMALLOC_HAVE_SECURE_GETENV
-	return secure_getenv(name);
-#	else
-#		ifdef JEMALLOC_HAVE_ISSETUGID
-	if (issetugid() != 0) {
-		return NULL;
-	}
-#		endif
-	return getenv(name);
-#	endif
-#endif
-}
-
 static unsigned
 malloc_ncpus(void) {
 	long result;
@@ -826,205 +808,6 @@ malloc_cpu_count_is_deterministic(void) {
 #endif
 }
 
-static void
-init_opt_stats_opts(const char *v, size_t vlen, char *dest) {
-	size_t opts_len = strlen(dest);
-	assert(opts_len <= stats_print_tot_num_options);
-
-	for (size_t i = 0; i < vlen; i++) {
-		switch (v[i]) {
-#define OPTION(o, v, d, s)                                                     \
-	case o:                                                                \
-		break;
-			STATS_PRINT_OPTIONS
-#undef OPTION
-		default:
-			continue;
-		}
-
-		if (strchr(dest, v[i]) != NULL) {
-			/* Ignore repeated. */
-			continue;
-		}
-
-		dest[opts_len++] = v[i];
-		dest[opts_len] = '\0';
-		assert(opts_len <= stats_print_tot_num_options);
-	}
-	assert(opts_len == strlen(dest));
-}
-
-static void
-malloc_conf_format_error(const char *msg, const char *begin, const char *end) {
-	size_t len = end - begin + 1;
-	len = len > BUFERROR_BUF ? BUFERROR_BUF : len;
-
-	malloc_printf("<jemalloc>: %s -- %.*s\n", msg, (int)len, begin);
-}
-
-static bool
-malloc_conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
-    char const **v_p, size_t *vlen_p) {
-	bool        accept;
-	const char *opts = *opts_p;
-
-	*k_p = opts;
-
-	for (accept = false; !accept;) {
-		switch (*opts) {
-		case 'A':
-		case 'B':
-		case 'C':
-		case 'D':
-		case 'E':
-		case 'F':
-		case 'G':
-		case 'H':
-		case 'I':
-		case 'J':
-		case 'K':
-		case 'L':
-		case 'M':
-		case 'N':
-		case 'O':
-		case 'P':
-		case 'Q':
-		case 'R':
-		case 'S':
-		case 'T':
-		case 'U':
-		case 'V':
-		case 'W':
-		case 'X':
-		case 'Y':
-		case 'Z':
-		case 'a':
-		case 'b':
-		case 'c':
-		case 'd':
-		case 'e':
-		case 'f':
-		case 'g':
-		case 'h':
-		case 'i':
-		case 'j':
-		case 'k':
-		case 'l':
-		case 'm':
-		case 'n':
-		case 'o':
-		case 'p':
-		case 'q':
-		case 'r':
-		case 's':
-		case 't':
-		case 'u':
-		case 'v':
-		case 'w':
-		case 'x':
-		case 'y':
-		case 'z':
-		case '0':
-		case '1':
-		case '2':
-		case '3':
-		case '4':
-		case '5':
-		case '6':
-		case '7':
-		case '8':
-		case '9':
-		case '_':
-			opts++;
-			break;
-		case ':':
-			opts++;
-			*klen_p = (uintptr_t)opts - 1 - (uintptr_t)*k_p;
-			*v_p = opts;
-			accept = true;
-			break;
-		case '\0':
-			if (opts != *opts_p) {
-				malloc_conf_format_error(
-				    "Conf string ends with key", *opts_p,
-				    opts - 1);
-				had_conf_error = true;
-			}
-			return true;
-		default:
-			malloc_conf_format_error(
-			    "Malformed conf string", *opts_p, opts);
-			had_conf_error = true;
-			return true;
-		}
-	}
-
-	for (accept = false; !accept;) {
-		switch (*opts) {
-		case ',':
-			opts++;
-			/*
-			 * Look ahead one character here, because the next time
-			 * this function is called, it will assume that end of
-			 * input has been cleanly reached if no input remains,
-			 * but we have optimistically already consumed the
-			 * comma if one exists.
-			 */
-			if (*opts == '\0') {
-				malloc_conf_format_error(
-				    "Conf string ends with comma", *opts_p,
-				    opts - 1);
-				had_conf_error = true;
-			}
-			*vlen_p = (uintptr_t)opts - 1 - (uintptr_t)*v_p;
-			accept = true;
-			break;
-		case '\0':
-			*vlen_p = (uintptr_t)opts - (uintptr_t)*v_p;
-			accept = true;
-			break;
-		default:
-			opts++;
-			break;
-		}
-	}
-
-	*opts_p = opts;
-	return false;
-}
-
-static void
-malloc_abort_invalid_conf(void) {
-	assert(opt_abort_conf);
-	malloc_printf(
-	    "<jemalloc>: Abort (abort_conf:true) on invalid conf "
-	    "value (see above).\n");
-	invalid_conf_abort();
-}
-
-static void
-malloc_conf_error(
-    const char *msg, const char *k, size_t klen, const char *v, size_t vlen) {
-	malloc_printf(
-	    "<jemalloc>: %s: %.*s:%.*s\n", msg, (int)klen, k, (int)vlen, v);
-	/* If abort_conf is set, error out after processing all options. */
-	const char *experimental = "experimental_";
-	if (strncmp(k, experimental, strlen(experimental)) == 0) {
-		/* However, tolerate experimental features. */
-		return;
-	}
-	const char  *deprecated[] = {"hpa_sec_bytes_after_flush"};
-	const size_t deprecated_cnt = (sizeof(deprecated)
-	    / sizeof(deprecated[0]));
-	for (size_t i = 0; i < deprecated_cnt; ++i) {
-		if (strncmp(k, deprecated[i], strlen(deprecated[i])) == 0) {
-			/* Tolerate deprecated features. */
-			return;
-		}
-	}
-	had_conf_error = true;
-}
-
 static void
 malloc_slow_flag_init(void) {
 	/*
@@ -1040,920 +823,6 @@ malloc_slow_flag_init(void) {
 	malloc_slow = (malloc_slow_flags != 0);
 }
 
-/* Number of sources for initializing malloc_conf */
-#define MALLOC_CONF_NSOURCES 5
-
-static const char *
-obtain_malloc_conf(unsigned which_source, char readlink_buf[PATH_MAX + 1]) {
-	if (config_debug) {
-		static unsigned read_source = 0;
-		/*
-		 * Each source should only be read once, to minimize # of
-		 * syscalls on init.
-		 */
-		assert(read_source == which_source);
-		read_source++;
-	}
-	assert(which_source < MALLOC_CONF_NSOURCES);
-
-	const char *ret;
-	switch (which_source) {
-	case 0:
-		ret = config_malloc_conf;
-		break;
-	case 1:
-		if (je_malloc_conf != NULL) {
-			/* Use options that were compiled into the program. */
-			ret = je_malloc_conf;
-		} else {
-			/* No configuration specified. */
-			ret = NULL;
-		}
-		break;
-	case 2: {
-#ifndef JEMALLOC_CONFIG_FILE
-		ret = NULL;
-		break;
-#else
-		ssize_t linklen = 0;
-#	ifndef _WIN32
-		int         saved_errno = errno;
-		const char *linkname =
-#		ifdef JEMALLOC_PREFIX
-		    "/etc/" JEMALLOC_PREFIX "malloc.conf"
-#		else
-		    "/etc/malloc.conf"
-#		endif
-		    ;
-
-		/*
-		 * Try to use the contents of the "/etc/malloc.conf" symbolic
-		 * link's name.
-		 */
-#		ifndef JEMALLOC_READLINKAT
-		linklen = readlink(linkname, readlink_buf, PATH_MAX);
-#		else
-		linklen = readlinkat(
-		    AT_FDCWD, linkname, readlink_buf, PATH_MAX);
-#		endif
-		if (linklen == -1) {
-			/* No configuration specified. */
-			linklen = 0;
-			/* Restore errno. */
-			set_errno(saved_errno);
-		}
-#	endif
-		readlink_buf[linklen] = '\0';
-		ret = readlink_buf;
-		break;
-#endif
-	}
-	case 3: {
-#ifndef JEMALLOC_CONFIG_ENV
-		ret = NULL;
-		break;
-#else
-		const char *envname =
-#	ifdef JEMALLOC_PREFIX
-		    JEMALLOC_CPREFIX "MALLOC_CONF"
-#	else
-		    "MALLOC_CONF"
-#	endif
-		    ;
-
-		if ((ret = jemalloc_getenv(envname)) != NULL) {
-			opt_malloc_conf_env_var = ret;
-		} else {
-			/* No configuration specified. */
-			ret = NULL;
-		}
-		break;
-#endif
-	}
-	case 4: {
-		ret = je_malloc_conf_2_conf_harder;
-		break;
-	}
-	default:
-		not_reached();
-		ret = NULL;
-	}
-	return ret;
-}
-
-static void
-validate_hpa_settings(void) {
-	if (!hpa_supported() || !opt_hpa) {
-		return;
-	}
-	if (HUGEPAGE > HUGEPAGE_MAX_EXPECTED_SIZE) {
-		had_conf_error = true;
-		malloc_printf(
-		    "<jemalloc>: huge page size (%zu) greater than expected."
-		    "May not be supported or behave as expected.",
-		    HUGEPAGE);
-	}
-#ifndef JEMALLOC_HAVE_MADVISE_COLLAPSE
-	if (opt_hpa_opts.hugify_sync) {
-		had_conf_error = true;
-		malloc_printf(
-		    "<jemalloc>: hpa_hugify_sync config option is enabled, "
-		    "but MADV_COLLAPSE support was not detected at build "
-		    "time.");
-	}
-#endif
-}
-
-static void
-malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
-    bool initial_call, const char *opts_cache[MALLOC_CONF_NSOURCES],
-    char readlink_buf[PATH_MAX + 1]) {
-	static const char *opts_explain[MALLOC_CONF_NSOURCES] = {
-	    "string specified via --with-malloc-conf",
-	    "string pointed to by the global variable malloc_conf",
-	    "\"name\" of the file referenced by the symbolic link named "
-	    "/etc/malloc.conf",
-	    "value of the environment variable MALLOC_CONF",
-	    "string pointed to by the global variable "
-	    "malloc_conf_2_conf_harder",
-	};
-	unsigned    i;
-	const char *opts, *k, *v;
-	size_t      klen, vlen;
-
-	for (i = 0; i < MALLOC_CONF_NSOURCES; i++) {
-		/* Get runtime configuration. */
-		if (initial_call) {
-			opts_cache[i] = obtain_malloc_conf(i, readlink_buf);
-		}
-		opts = opts_cache[i];
-		if (!initial_call && opt_confirm_conf) {
-			malloc_printf(
-			    "<jemalloc>: malloc_conf #%u (%s): \"%s\"\n", i + 1,
-			    opts_explain[i], opts != NULL ? opts : "");
-		}
-		if (opts == NULL) {
-			continue;
-		}
-
-		while (*opts != '\0'
-		    && !malloc_conf_next(&opts, &k, &klen, &v, &vlen)) {
-#define CONF_ERROR(msg, k, klen, v, vlen)                                      \
-	if (!initial_call) {                                                   \
-		malloc_conf_error(msg, k, klen, v, vlen);                      \
-		cur_opt_valid = false;                                         \
-	}
-#define CONF_CONTINUE                                                          \
-	{                                                                      \
-		if (!initial_call && opt_confirm_conf && cur_opt_valid) {      \
-			malloc_printf(                                         \
-			    "<jemalloc>: -- "                                  \
-			    "Set conf value: %.*s:%.*s"                        \
-			    "\n",                                              \
-			    (int)klen, k, (int)vlen, v);                       \
-		}                                                              \
-		continue;                                                      \
-	}
-#define CONF_MATCH(n) (sizeof(n) - 1 == klen && strncmp(n, k, klen) == 0)
-#define CONF_MATCH_VALUE(n) (sizeof(n) - 1 == vlen && strncmp(n, v, vlen) == 0)
-#define CONF_HANDLE_BOOL(o, n)                                                 \
-	if (CONF_MATCH(n)) {                                                   \
-		if (CONF_MATCH_VALUE("true")) {                                \
-			o = true;                                              \
-		} else if (CONF_MATCH_VALUE("false")) {                        \
-			o = false;                                             \
-		} else {                                                       \
-			CONF_ERROR("Invalid conf value", k, klen, v, vlen);    \
-		}                                                              \
-		CONF_CONTINUE;                                                 \
-	}
-			/*
-       * One of the CONF_MIN macros below expands, in one of the use points,
-       * to "unsigned integer < 0", which is always false, triggering the
-       * GCC -Wtype-limits warning, which we disable here and re-enable below.
-       */
-			JEMALLOC_DIAGNOSTIC_PUSH
-			JEMALLOC_DIAGNOSTIC_IGNORE_TYPE_LIMITS
-
-#define CONF_DONT_CHECK_MIN(um, min) false
-#define CONF_CHECK_MIN(um, min) ((um) < (min))
-#define CONF_DONT_CHECK_MAX(um, max) false
-#define CONF_CHECK_MAX(um, max) ((um) > (max))
-
-#define CONF_VALUE_READ(max_t, result)                                         \
-	char *end;                                                             \
-	set_errno(0);                                                          \
-	result = (max_t)malloc_strtoumax(v, &end, 0);
-#define CONF_VALUE_READ_FAIL()                                                 \
-	(get_errno() != 0 || (uintptr_t)end - (uintptr_t)v != vlen)
-
-#define CONF_HANDLE_T(t, max_t, o, n, min, max, check_min, check_max, clip)    \
-	if (CONF_MATCH(n)) {                                                   \
-		max_t mv;                                                      \
-		CONF_VALUE_READ(max_t, mv)                                     \
-		if (CONF_VALUE_READ_FAIL()) {                                  \
-			CONF_ERROR("Invalid conf value", k, klen, v, vlen);    \
-		} else if (clip) {                                             \
-			if (check_min(mv, (t)(min))) {                         \
-				o = (t)(min);                                  \
-			} else if (check_max(mv, (t)(max))) {                  \
-				o = (t)(max);                                  \
-			} else {                                               \
-				o = (t)mv;                                     \
-			}                                                      \
-		} else {                                                       \
-			if (check_min(mv, (t)(min))                            \
-			    || check_max(mv, (t)(max))) {                      \
-				CONF_ERROR(                                    \
-				    "Out-of-range "                            \
-				    "conf value",                              \
-				    k, klen, v, vlen);                         \
-			} else {                                               \
-				o = (t)mv;                                     \
-			}                                                      \
-		}                                                              \
-		CONF_CONTINUE;                                                 \
-	}
-#define CONF_HANDLE_T_U(t, o, n, min, max, check_min, check_max, clip)         \
-	CONF_HANDLE_T(t, uintmax_t, o, n, min, max, check_min, check_max, clip)
-#define CONF_HANDLE_T_SIGNED(t, o, n, min, max, check_min, check_max, clip)    \
-	CONF_HANDLE_T(t, intmax_t, o, n, min, max, check_min, check_max, clip)
-
-#define CONF_HANDLE_UNSIGNED(o, n, min, max, check_min, check_max, clip)       \
-	CONF_HANDLE_T_U(unsigned, o, n, min, max, check_min, check_max, clip)
-#define CONF_HANDLE_SIZE_T(o, n, min, max, check_min, check_max, clip)         \
-	CONF_HANDLE_T_U(size_t, o, n, min, max, check_min, check_max, clip)
-#define CONF_HANDLE_INT64_T(o, n, min, max, check_min, check_max, clip)        \
-	CONF_HANDLE_T_SIGNED(                                                  \
-	    int64_t, o, n, min, max, check_min, check_max, clip)
-#define CONF_HANDLE_UINT64_T(o, n, min, max, check_min, check_max, clip)       \
-	CONF_HANDLE_T_U(uint64_t, o, n, min, max, check_min, check_max, clip)
-#define CONF_HANDLE_SSIZE_T(o, n, min, max)                                    \
-	CONF_HANDLE_T_SIGNED(                                                  \
-	    ssize_t, o, n, min, max, CONF_CHECK_MIN, CONF_CHECK_MAX, false)
-#define CONF_HANDLE_CHAR_P(o, n, d)                                            \
-	if (CONF_MATCH(n)) {                                                   \
-		size_t cpylen = (vlen <= sizeof(o) - 1) ? vlen                 \
-		                                        : sizeof(o) - 1;       \
-		strncpy(o, v, cpylen);                                         \
-		o[cpylen] = '\0';                                              \
-		CONF_CONTINUE;                                                 \
-	}
-
-			bool cur_opt_valid = true;
-
-			CONF_HANDLE_BOOL(opt_confirm_conf, "confirm_conf")
-			if (initial_call) {
-				continue;
-			}
-
-			CONF_HANDLE_BOOL(opt_abort, "abort")
-			CONF_HANDLE_BOOL(opt_abort_conf, "abort_conf")
-			CONF_HANDLE_BOOL(opt_cache_oblivious, "cache_oblivious")
-			CONF_HANDLE_BOOL(opt_trust_madvise, "trust_madvise")
-			CONF_HANDLE_BOOL(
-			    opt_experimental_hpa_start_huge_if_thp_always,
-			    "experimental_hpa_start_huge_if_thp_always")
-			CONF_HANDLE_BOOL(opt_experimental_hpa_enforce_hugify,
-			    "experimental_hpa_enforce_hugify")
-			CONF_HANDLE_BOOL(
-			    opt_huge_arena_pac_thp, "huge_arena_pac_thp")
-			if (strncmp("metadata_thp", k, klen) == 0) {
-				int  m;
-				bool match = false;
-				for (m = 0; m < metadata_thp_mode_limit; m++) {
-					if (strncmp(metadata_thp_mode_names[m],
-					        v, vlen)
-					    == 0) {
-						opt_metadata_thp = m;
-						match = true;
-						break;
-					}
-				}
-				if (!match) {
-					CONF_ERROR("Invalid conf value", k,
-					    klen, v, vlen);
-				}
-				CONF_CONTINUE;
-			}
-			CONF_HANDLE_BOOL(opt_retain, "retain")
-			if (strncmp("dss", k, klen) == 0) {
-				int  m;
-				bool match = false;
-				for (m = 0; m < dss_prec_limit; m++) {
-					if (strncmp(dss_prec_names[m], v, vlen)
-					    == 0) {
-						if (extent_dss_prec_set(m)) {
-							CONF_ERROR(
-							    "Error setting dss",
-							    k, klen, v, vlen);
-						} else {
-							opt_dss =
-							    dss_prec_names[m];
-							match = true;
-							break;
-						}
-					}
-				}
-				if (!match) {
-					CONF_ERROR("Invalid conf value", k,
-					    klen, v, vlen);
-				}
-				CONF_CONTINUE;
-			}
-			if (CONF_MATCH("narenas")) {
-				if (CONF_MATCH_VALUE("default")) {
-					opt_narenas = 0;
-					CONF_CONTINUE;
-				} else {
-					CONF_HANDLE_UNSIGNED(opt_narenas,
-					    "narenas", 1, UINT_MAX,
-					    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX,
-					    /* clip */ false)
-				}
-			}
-			if (CONF_MATCH("narenas_ratio")) {
-				char *end;
-				bool  err = fxp_parse(
-                                    &opt_narenas_ratio, v, &end);
-				if (err || (size_t)(end - v) != vlen) {
-					CONF_ERROR("Invalid conf value", k,
-					    klen, v, vlen);
-				}
-				CONF_CONTINUE;
-			}
-			if (CONF_MATCH("bin_shards")) {
-				const char *bin_shards_segment_cur = v;
-				size_t      vlen_left = vlen;
-				do {
-					size_t size_start;
-					size_t size_end;
-					size_t nshards;
-					bool   err = multi_setting_parse_next(
-                                            &bin_shards_segment_cur, &vlen_left,
-                                            &size_start, &size_end, &nshards);
-					if (err
-					    || bin_update_shard_size(
-					        bin_shard_sizes, size_start,
-					        size_end, nshards)) {
-						CONF_ERROR(
-						    "Invalid settings for "
-						    "bin_shards",
-						    k, klen, v, vlen);
-						break;
-					}
-				} while (vlen_left > 0);
-				CONF_CONTINUE;
-			}
-			if (CONF_MATCH("tcache_ncached_max")) {
-				bool err = tcache_bin_info_default_init(
-				    v, vlen);
-				if (err) {
-					CONF_ERROR(
-					    "Invalid settings for "
-					    "tcache_ncached_max",
-					    k, klen, v, vlen);
-				}
-				CONF_CONTINUE;
-			}
-			CONF_HANDLE_INT64_T(opt_mutex_max_spin,
-			    "mutex_max_spin", -1, INT64_MAX, CONF_CHECK_MIN,
-			    CONF_DONT_CHECK_MAX, false);
-			CONF_HANDLE_SSIZE_T(opt_dirty_decay_ms,
-			    "dirty_decay_ms", -1,
-			    NSTIME_SEC_MAX * KQU(1000) < QU(SSIZE_MAX)
-			        ? NSTIME_SEC_MAX * KQU(1000)
-			        : SSIZE_MAX);
-			CONF_HANDLE_SSIZE_T(opt_muzzy_decay_ms,
-			    "muzzy_decay_ms", -1,
-			    NSTIME_SEC_MAX * KQU(1000) < QU(SSIZE_MAX)
-			        ? NSTIME_SEC_MAX * KQU(1000)
-			        : SSIZE_MAX);
-			CONF_HANDLE_SIZE_T(opt_process_madvise_max_batch,
-			    "process_madvise_max_batch", 0,
-			    PROCESS_MADVISE_MAX_BATCH_LIMIT,
-			    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
-			    /* clip */ true)
-			CONF_HANDLE_BOOL(opt_stats_print, "stats_print")
-			if (CONF_MATCH("stats_print_opts")) {
-				init_opt_stats_opts(
-				    v, vlen, opt_stats_print_opts);
-				CONF_CONTINUE;
-			}
-			CONF_HANDLE_INT64_T(opt_stats_interval,
-			    "stats_interval", -1, INT64_MAX, CONF_CHECK_MIN,
-			    CONF_DONT_CHECK_MAX, false)
-			if (CONF_MATCH("stats_interval_opts")) {
-				init_opt_stats_opts(
-				    v, vlen, opt_stats_interval_opts);
-				CONF_CONTINUE;
-			}
-			if (config_fill) {
-				if (CONF_MATCH("junk")) {
-					if (CONF_MATCH_VALUE("true")) {
-						opt_junk = "true";
-						opt_junk_alloc = opt_junk_free =
-						    true;
-					} else if (CONF_MATCH_VALUE("false")) {
-						opt_junk = "false";
-						opt_junk_alloc = opt_junk_free =
-						    false;
-					} else if (CONF_MATCH_VALUE("alloc")) {
-						opt_junk = "alloc";
-						opt_junk_alloc = true;
-						opt_junk_free = false;
-					} else if (CONF_MATCH_VALUE("free")) {
-						opt_junk = "free";
-						opt_junk_alloc = false;
-						opt_junk_free = true;
-					} else {
-						CONF_ERROR("Invalid conf value",
-						    k, klen, v, vlen);
-					}
-					CONF_CONTINUE;
-				}
-				CONF_HANDLE_BOOL(opt_zero, "zero")
-			}
-			if (config_utrace) {
-				CONF_HANDLE_BOOL(opt_utrace, "utrace")
-			}
-			if (config_xmalloc) {
-				CONF_HANDLE_BOOL(opt_xmalloc, "xmalloc")
-			}
-			if (config_enable_cxx) {
-				CONF_HANDLE_BOOL(
-				    opt_experimental_infallible_new,
-				    "experimental_infallible_new")
-			}
-
-			CONF_HANDLE_BOOL(opt_experimental_tcache_gc,
-			    "experimental_tcache_gc")
-			CONF_HANDLE_BOOL(opt_tcache, "tcache")
-			CONF_HANDLE_SIZE_T(opt_tcache_max, "tcache_max", 0,
-			    TCACHE_MAXCLASS_LIMIT, CONF_DONT_CHECK_MIN,
-			    CONF_CHECK_MAX, /* clip */ true)
-			if (CONF_MATCH("lg_tcache_max")) {
-				size_t m;
-				CONF_VALUE_READ(size_t, m)
-				if (CONF_VALUE_READ_FAIL()) {
-					CONF_ERROR("Invalid conf value", k,
-					    klen, v, vlen);
-				} else {
-					/* clip if necessary */
-					if (m > TCACHE_LG_MAXCLASS_LIMIT) {
-						m = TCACHE_LG_MAXCLASS_LIMIT;
-					}
-					opt_tcache_max = (size_t)1 << m;
-				}
-				CONF_CONTINUE;
-			}
-			/*
-			 * Anyone trying to set a value outside -16 to 16 is
-			 * deeply confused.
-			 */
-			CONF_HANDLE_SSIZE_T(opt_lg_tcache_nslots_mul,
-			    "lg_tcache_nslots_mul", -16, 16)
-			/* Ditto with values past 2048. */
-			CONF_HANDLE_UNSIGNED(opt_tcache_nslots_small_min,
-			    "tcache_nslots_small_min", 1, 2048, CONF_CHECK_MIN,
-			    CONF_CHECK_MAX, /* clip */ true)
-			CONF_HANDLE_UNSIGNED(opt_tcache_nslots_small_max,
-			    "tcache_nslots_small_max", 1, 2048, CONF_CHECK_MIN,
-			    CONF_CHECK_MAX, /* clip */ true)
-			CONF_HANDLE_UNSIGNED(opt_tcache_nslots_large,
-			    "tcache_nslots_large", 1, 2048, CONF_CHECK_MIN,
-			    CONF_CHECK_MAX, /* clip */ true)
-			CONF_HANDLE_SIZE_T(opt_tcache_gc_incr_bytes,
-			    "tcache_gc_incr_bytes", 1024, SIZE_T_MAX,
-			    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX,
-			    /* clip */ true)
-			CONF_HANDLE_SIZE_T(opt_tcache_gc_delay_bytes,
-			    "tcache_gc_delay_bytes", 0, SIZE_T_MAX,
-			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
-			    /* clip */ false)
-			CONF_HANDLE_UNSIGNED(opt_lg_tcache_flush_small_div,
-			    "lg_tcache_flush_small_div", 1, 16, CONF_CHECK_MIN,
-			    CONF_CHECK_MAX, /* clip */ true)
-			CONF_HANDLE_UNSIGNED(opt_lg_tcache_flush_large_div,
-			    "lg_tcache_flush_large_div", 1, 16, CONF_CHECK_MIN,
-			    CONF_CHECK_MAX, /* clip */ true)
-			CONF_HANDLE_UNSIGNED(opt_debug_double_free_max_scan,
-			    "debug_double_free_max_scan", 0, UINT_MAX,
-			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX,
-			    /* clip */ false)
-			CONF_HANDLE_SIZE_T(opt_calloc_madvise_threshold,
-			    "calloc_madvise_threshold", 0, SC_LARGE_MAXCLASS,
-			    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX,
-			    /* clip */ false)
-
-			/*
-			 * The runtime option of oversize_threshold remains
-			 * undocumented.  It may be tweaked in the next major
-			 * release (6.0).  The default value 8M is rather
-			 * conservative / safe.  Tuning it further down may
-			 * improve fragmentation a bit more, but may also cause
-			 * contention on the huge arena.
-			 */
-			CONF_HANDLE_SIZE_T(opt_oversize_threshold,
-			    "oversize_threshold", 0, SC_LARGE_MAXCLASS,
-			    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX, false)
-			CONF_HANDLE_SIZE_T(opt_lg_extent_max_active_fit,
-			    "lg_extent_max_active_fit", 0,
-			    (sizeof(size_t) << 3), CONF_DONT_CHECK_MIN,
-			    CONF_CHECK_MAX, false)
-
-			if (strncmp("percpu_arena", k, klen) == 0) {
-				bool match = false;
-				for (int m = percpu_arena_mode_names_base;
-				    m < percpu_arena_mode_names_limit; m++) {
-					if (strncmp(percpu_arena_mode_names[m],
-					        v, vlen)
-					    == 0) {
-						if (!have_percpu_arena) {
-							CONF_ERROR(
-							    "No getcpu support",
-							    k, klen, v, vlen);
-						}
-						opt_percpu_arena = m;
-						match = true;
-						break;
-					}
-				}
-				if (!match) {
-					CONF_ERROR("Invalid conf value", k,
-					    klen, v, vlen);
-				}
-				CONF_CONTINUE;
-			}
-			CONF_HANDLE_BOOL(
-			    opt_background_thread, "background_thread");
-			CONF_HANDLE_SIZE_T(opt_max_background_threads,
-			    "max_background_threads", 1,
-			    opt_max_background_threads, CONF_CHECK_MIN,
-			    CONF_CHECK_MAX, true);
-			CONF_HANDLE_BOOL(opt_hpa, "hpa")
-			CONF_HANDLE_SIZE_T(opt_hpa_opts.slab_max_alloc,
-			    "hpa_slab_max_alloc", PAGE, HUGEPAGE,
-			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
-
-			/*
-			 * Accept either a ratio-based or an exact hugification
-			 * threshold.
-			 */
-			CONF_HANDLE_SIZE_T(opt_hpa_opts.hugification_threshold,
-			    "hpa_hugification_threshold", PAGE, HUGEPAGE,
-			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
-			if (CONF_MATCH("hpa_hugification_threshold_ratio")) {
-				fxp_t ratio;
-				char *end;
-				bool  err = fxp_parse(&ratio, v, &end);
-				if (err || (size_t)(end - v) != vlen
-				    || ratio > FXP_INIT_INT(1)) {
-					CONF_ERROR("Invalid conf value", k,
-					    klen, v, vlen);
-				} else {
-					opt_hpa_opts.hugification_threshold =
-					    fxp_mul_frac(HUGEPAGE, ratio);
-				}
-				CONF_CONTINUE;
-			}
-
-			CONF_HANDLE_UINT64_T(opt_hpa_opts.hugify_delay_ms,
-			    "hpa_hugify_delay_ms", 0, 0, CONF_DONT_CHECK_MIN,
-			    CONF_DONT_CHECK_MAX, false);
-
-			CONF_HANDLE_BOOL(
-			    opt_hpa_opts.hugify_sync, "hpa_hugify_sync");
-
-			CONF_HANDLE_UINT64_T(opt_hpa_opts.min_purge_interval_ms,
-			    "hpa_min_purge_interval_ms", 0, 0,
-			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false);
-
-			CONF_HANDLE_SSIZE_T(
-			    opt_hpa_opts.experimental_max_purge_nhp,
-			    "experimental_hpa_max_purge_nhp", -1, SSIZE_MAX);
-
-			/*
-			 * Accept either a ratio-based or an exact purge
-			 * threshold.
-			 */
-			CONF_HANDLE_SIZE_T(opt_hpa_opts.purge_threshold,
-			    "hpa_purge_threshold", PAGE, HUGEPAGE,
-			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
-			if (CONF_MATCH("hpa_purge_threshold_ratio")) {
-				fxp_t ratio;
-				char *end;
-				bool  err = fxp_parse(&ratio, v, &end);
-				if (err || (size_t)(end - v) != vlen
-				    || ratio > FXP_INIT_INT(1)) {
-					CONF_ERROR("Invalid conf value", k,
-					    klen, v, vlen);
-				} else {
-					opt_hpa_opts.purge_threshold =
-					    fxp_mul_frac(HUGEPAGE, ratio);
-				}
-				CONF_CONTINUE;
-			}
-
-			CONF_HANDLE_UINT64_T(opt_hpa_opts.min_purge_delay_ms,
-			    "hpa_min_purge_delay_ms", 0, UINT64_MAX,
-			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false);
-
-			if (strncmp("hpa_hugify_style", k, klen) == 0) {
-				bool match = false;
-				for (int m = 0; m < hpa_hugify_style_limit;
-				    m++) {
-					if (strncmp(hpa_hugify_style_names[m],
-					        v, vlen)
-					    == 0) {
-						opt_hpa_opts.hugify_style = m;
-						match = true;
-						break;
-					}
-				}
-				if (!match) {
-					CONF_ERROR("Invalid conf value", k,
-					    klen, v, vlen);
-				}
-				CONF_CONTINUE;
-			}
-
-			if (CONF_MATCH("hpa_dirty_mult")) {
-				if (CONF_MATCH_VALUE("-1")) {
-					opt_hpa_opts.dirty_mult = (fxp_t)-1;
-					CONF_CONTINUE;
-				}
-				fxp_t ratio;
-				char *end;
-				bool  err = fxp_parse(&ratio, v, &end);
-				if (err || (size_t)(end - v) != vlen) {
-					CONF_ERROR("Invalid conf value", k,
-					    klen, v, vlen);
-				} else {
-					opt_hpa_opts.dirty_mult = ratio;
-				}
-				CONF_CONTINUE;
-			}
-			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.nshards,
-			    "hpa_sec_nshards", 0, 0, CONF_CHECK_MIN,
-			    CONF_DONT_CHECK_MAX, true);
-			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.max_alloc,
-			    "hpa_sec_max_alloc", PAGE,
-			    USIZE_GROW_SLOW_THRESHOLD, CONF_CHECK_MIN,
-			    CONF_CHECK_MAX, true);
-			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.max_bytes,
-			    "hpa_sec_max_bytes", SEC_OPTS_MAX_BYTES_DEFAULT, 0,
-			    CONF_CHECK_MIN, CONF_DONT_CHECK_MAX, true);
-			CONF_HANDLE_SIZE_T(opt_hpa_sec_opts.batch_fill_extra,
-			    "hpa_sec_batch_fill_extra", 1, HUGEPAGE_PAGES,
-			    CONF_CHECK_MIN, CONF_CHECK_MAX, true);
-
-			if (CONF_MATCH("slab_sizes")) {
-				if (CONF_MATCH_VALUE("default")) {
-					sc_data_init(sc_data);
-					CONF_CONTINUE;
-				}
-				bool        err;
-				const char *slab_size_segment_cur = v;
-				size_t      vlen_left = vlen;
-				do {
-					size_t slab_start;
-					size_t slab_end;
-					size_t pgs;
-					err = multi_setting_parse_next(
-					    &slab_size_segment_cur, &vlen_left,
-					    &slab_start, &slab_end, &pgs);
-					if (!err) {
-						sc_data_update_slab_size(
-						    sc_data, slab_start,
-						    slab_end, (int)pgs);
-					} else {
-						CONF_ERROR(
-						    "Invalid settings "
-						    "for slab_sizes",
-						    k, klen, v, vlen);
-					}
-				} while (!err && vlen_left > 0);
-				CONF_CONTINUE;
-			}
-			if (config_prof) {
-				CONF_HANDLE_BOOL(opt_prof, "prof")
-				CONF_HANDLE_CHAR_P(
-				    opt_prof_prefix, "prof_prefix", "jeprof")
-				CONF_HANDLE_BOOL(opt_prof_active, "prof_active")
-				CONF_HANDLE_BOOL(opt_prof_thread_active_init,
-				    "prof_thread_active_init")
-				CONF_HANDLE_SIZE_T(opt_lg_prof_sample,
-				    "lg_prof_sample", 0,
-				    (sizeof(uint64_t) << 3) - 1,
-				    CONF_DONT_CHECK_MIN, CONF_CHECK_MAX, true)
-				CONF_HANDLE_BOOL(opt_prof_accum, "prof_accum")
-				CONF_HANDLE_UNSIGNED(opt_prof_bt_max,
-				    "prof_bt_max", 1, PROF_BT_MAX_LIMIT,
-				    CONF_CHECK_MIN, CONF_CHECK_MAX,
-				    /* clip */ true)
-				CONF_HANDLE_SSIZE_T(opt_lg_prof_interval,
-				    "lg_prof_interval", -1,
-				    (sizeof(uint64_t) << 3) - 1)
-				CONF_HANDLE_BOOL(opt_prof_gdump, "prof_gdump")
-				CONF_HANDLE_BOOL(opt_prof_final, "prof_final")
-				CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak")
-				CONF_HANDLE_BOOL(
-				    opt_prof_leak_error, "prof_leak_error")
-				CONF_HANDLE_BOOL(opt_prof_log, "prof_log")
-				CONF_HANDLE_BOOL(opt_prof_pid_namespace,
-				    "prof_pid_namespace")
-				CONF_HANDLE_SSIZE_T(opt_prof_recent_alloc_max,
-				    "prof_recent_alloc_max", -1, SSIZE_MAX)
-				CONF_HANDLE_BOOL(opt_prof_stats, "prof_stats")
-				CONF_HANDLE_BOOL(opt_prof_sys_thread_name,
-				    "prof_sys_thread_name")
-				if (CONF_MATCH("prof_time_resolution")) {
-					if (CONF_MATCH_VALUE("default")) {
-						opt_prof_time_res =
-						    prof_time_res_default;
-					} else if (CONF_MATCH_VALUE("high")) {
-						if (!config_high_res_timer) {
-							CONF_ERROR(
-							    "No high resolution"
-							    " timer support",
-							    k, klen, v, vlen);
-						} else {
-							opt_prof_time_res =
-							    prof_time_res_high;
-						}
-					} else {
-						CONF_ERROR("Invalid conf value",
-						    k, klen, v, vlen);
-					}
-					CONF_CONTINUE;
-				}
-				/*
-				 * Undocumented.  When set to false, don't
-				 * correct for an unbiasing bug in jeprof
-				 * attribution.  This can be handy if you want
-				 * to get consistent numbers from your binary
-				 * across different jemalloc versions, even if
-				 * those numbers are incorrect.  The default is
-				 * true.
-				 */
-				CONF_HANDLE_BOOL(opt_prof_unbias, "prof_unbias")
-			}
-			if (config_log) {
-				if (CONF_MATCH("log")) {
-					size_t cpylen = (vlen
-					            <= sizeof(log_var_names)
-					        ? vlen
-					        : sizeof(log_var_names) - 1);
-					strncpy(log_var_names, v, cpylen);
-					log_var_names[cpylen] = '\0';
-					CONF_CONTINUE;
-				}
-			}
-			if (CONF_MATCH("thp")) {
-				bool match = false;
-				for (int m = 0; m < thp_mode_names_limit; m++) {
-					if (strncmp(thp_mode_names[m], v, vlen)
-					    == 0) {
-						if (!have_madvise_huge
-						    && !have_memcntl) {
-							CONF_ERROR(
-							    "No THP support", k,
-							    klen, v, vlen);
-						}
-						opt_thp = m;
-						match = true;
-						break;
-					}
-				}
-				if (!match) {
-					CONF_ERROR("Invalid conf value", k,
-					    klen, v, vlen);
-				}
-				CONF_CONTINUE;
-			}
-			if (CONF_MATCH("zero_realloc")) {
-				if (CONF_MATCH_VALUE("alloc")) {
-					opt_zero_realloc_action =
-					    zero_realloc_action_alloc;
-				} else if (CONF_MATCH_VALUE("free")) {
-					opt_zero_realloc_action =
-					    zero_realloc_action_free;
-				} else if (CONF_MATCH_VALUE("abort")) {
-					opt_zero_realloc_action =
-					    zero_realloc_action_abort;
-				} else {
-					CONF_ERROR("Invalid conf value", k,
-					    klen, v, vlen);
-				}
-				CONF_CONTINUE;
-			}
-			if (config_uaf_detection
-			    && CONF_MATCH("lg_san_uaf_align")) {
-				ssize_t a;
-				CONF_VALUE_READ(ssize_t, a)
-				if (CONF_VALUE_READ_FAIL() || a < -1) {
-					CONF_ERROR("Invalid conf value", k,
-					    klen, v, vlen);
-				}
-				if (a == -1) {
-					opt_lg_san_uaf_align = -1;
-					CONF_CONTINUE;
-				}
-
-				/* clip if necessary */
-				ssize_t max_allowed = (sizeof(size_t) << 3) - 1;
-				ssize_t min_allowed = LG_PAGE;
-				if (a > max_allowed) {
-					a = max_allowed;
-				} else if (a < min_allowed) {
-					a = min_allowed;
-				}
-
-				opt_lg_san_uaf_align = a;
-				CONF_CONTINUE;
-			}
-
-			CONF_HANDLE_SIZE_T(opt_san_guard_small,
-			    "san_guard_small", 0, SIZE_T_MAX,
-			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false)
-			CONF_HANDLE_SIZE_T(opt_san_guard_large,
-			    "san_guard_large", 0, SIZE_T_MAX,
-			    CONF_DONT_CHECK_MIN, CONF_DONT_CHECK_MAX, false)
-
-			/*
-			 * Disable large size classes is now the default
-			 * behavior in jemalloc.  Although it is configurable
-			 * in MALLOC_CONF, this is mainly for debugging
-			 * purposes and should not be tuned.
-			 */
-			CONF_HANDLE_BOOL(opt_disable_large_size_classes,
-			    "disable_large_size_classes");
-
-			CONF_ERROR("Invalid conf pair", k, klen, v, vlen);
-#undef CONF_ERROR
-#undef CONF_CONTINUE
-#undef CONF_MATCH
-#undef CONF_MATCH_VALUE
-#undef CONF_HANDLE_BOOL
-#undef CONF_DONT_CHECK_MIN
-#undef CONF_CHECK_MIN
-#undef CONF_DONT_CHECK_MAX
-#undef CONF_CHECK_MAX
-#undef CONF_HANDLE_T
-#undef CONF_HANDLE_T_U
-#undef CONF_HANDLE_T_SIGNED
-#undef CONF_HANDLE_UNSIGNED
-#undef CONF_HANDLE_SIZE_T
-#undef CONF_HANDLE_SSIZE_T
-#undef CONF_HANDLE_CHAR_P
-			/* Re-enable diagnostic "-Wtype-limits" */
-			JEMALLOC_DIAGNOSTIC_POP
-		}
-		validate_hpa_settings();
-		if (opt_abort_conf && had_conf_error) {
-			malloc_abort_invalid_conf();
-		}
-	}
-	atomic_store_b(&log_init_done, true, ATOMIC_RELEASE);
-}
-
-static bool
-malloc_conf_init_check_deps(void) {
-	if (opt_prof_leak_error && !opt_prof_final) {
-		malloc_printf(
-		    "<jemalloc>: prof_leak_error is set w/o "
-		    "prof_final.\n");
-		return true;
-	}
-	/* To emphasize in the stats output that opt is disabled when !debug. */
-	if (!config_debug) {
-		opt_debug_double_free_max_scan = 0;
-	}
-
-	return false;
-}
-
-static void
-malloc_conf_init(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
-    char readlink_buf[PATH_MAX + 1]) {
-	const char *opts_cache[MALLOC_CONF_NSOURCES] = {
-	    NULL, NULL, NULL, NULL, NULL};
-
-	/* The first call only set the confirm_conf option and opts_cache */
-	malloc_conf_init_helper(NULL, NULL, true, opts_cache, readlink_buf);
-	malloc_conf_init_helper(
-	    sc_data, bin_shard_sizes, false, opts_cache, NULL);
-	if (malloc_conf_init_check_deps()) {
-		/* check_deps does warning msg only; abort below if needed. */
-		if (opt_abort_conf) {
-			malloc_abort_invalid_conf();
-		}
-	}
-}
-
-#undef MALLOC_CONF_NSOURCES
-
 static bool
 malloc_init_hard_needed(void) {
 	if (malloc_initialized()

From 86b721921386a7192e010ec28c7b2308373d07b0 Mon Sep 17 00:00:00 2001
From: Carl Shapiro <cshapiro@meta.com>
Date: Mon, 2 Mar 2026 13:02:59 -0800
Subject: [PATCH 2581/2608] Add unit tests for conf parsing and its helpers

---
 Makefile.in                   |   5 ++
 test/unit/conf.c              | 113 +++++++++++++++++++++++++++++
 test/unit/conf_init_0.c       |  22 ++++++
 test/unit/conf_init_1.c       |  23 ++++++
 test/unit/conf_init_confirm.c |  39 ++++++++++
 test/unit/conf_parse.c        | 130 ++++++++++++++++++++++++++++++++++
 test/unit/malloc_conf_2.c     |  24 ++++++-
 7 files changed, 354 insertions(+), 2 deletions(-)
 create mode 100644 test/unit/conf.c
 create mode 100644 test/unit/conf_init_0.c
 create mode 100644 test/unit/conf_init_1.c
 create mode 100644 test/unit/conf_init_confirm.c
 create mode 100644 test/unit/conf_parse.c

diff --git a/Makefile.in b/Makefile.in
index 1a7207e0..459f98fb 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -216,6 +216,11 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/buf_writer.c \
 	$(srcroot)test/unit/cache_bin.c \
 	$(srcroot)test/unit/ckh.c \
+	$(srcroot)test/unit/conf.c \
+	$(srcroot)test/unit/conf_init_0.c \
+	$(srcroot)test/unit/conf_init_1.c \
+	$(srcroot)test/unit/conf_init_confirm.c \
+	$(srcroot)test/unit/conf_parse.c \
 	$(srcroot)test/unit/counter.c \
 	$(srcroot)test/unit/decay.c \
 	$(srcroot)test/unit/div.c \
diff --git a/test/unit/conf.c b/test/unit/conf.c
new file mode 100644
index 00000000..1a1cde7c
--- /dev/null
+++ b/test/unit/conf.c
@@ -0,0 +1,113 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/conf.h"
+
+TEST_BEGIN(test_conf_next_simple) {
+	const char *opts = "key:value";
+	const char *k;
+	size_t      klen;
+	const char *v;
+	size_t      vlen;
+
+	had_conf_error = false;
+	bool end = conf_next(&opts, &k, &klen, &v, &vlen);
+	expect_false(end, "Should not be at end");
+	expect_zu_eq(klen, 3, "Key length should be 3");
+	expect_false(strncmp(k, "key", klen), "Key should be \"key\"");
+	expect_zu_eq(vlen, 5, "Value length should be 5");
+	expect_false(strncmp(v, "value", vlen), "Value should be \"value\"");
+	expect_false(had_conf_error, "Should not have had an error");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_next_multi) {
+	const char *opts = "k1:v1,k2:v2";
+	const char *k;
+	size_t      klen;
+	const char *v;
+	size_t      vlen;
+	bool        end;
+
+	had_conf_error = false;
+
+	end = conf_next(&opts, &k, &klen, &v, &vlen);
+	expect_false(end, "Should not be at end after first pair");
+	expect_zu_eq(klen, 2, "First key length should be 2");
+	expect_false(strncmp(k, "k1", klen), "First key should be \"k1\"");
+	expect_zu_eq(vlen, 2, "First value length should be 2");
+	expect_false(strncmp(v, "v1", vlen), "First value should be \"v1\"");
+
+	end = conf_next(&opts, &k, &klen, &v, &vlen);
+	expect_false(end, "Should not be at end after second pair");
+	expect_zu_eq(klen, 2, "Second key length should be 2");
+	expect_false(strncmp(k, "k2", klen), "Second key should be \"k2\"");
+	expect_zu_eq(vlen, 2, "Second value length should be 2");
+	expect_false(strncmp(v, "v2", vlen), "Second value should be \"v2\"");
+
+	expect_false(had_conf_error, "Should not have had an error");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_next_empty) {
+	const char *opts = "";
+	const char *k;
+	size_t      klen;
+	const char *v;
+	size_t      vlen;
+
+	had_conf_error = false;
+	bool end = conf_next(&opts, &k, &klen, &v, &vlen);
+	expect_true(end, "Empty string should return true (end)");
+	expect_false(had_conf_error, "Empty string should not set error");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_next_missing_value) {
+	const char *opts = "key_only";
+	const char *k;
+	size_t      klen;
+	const char *v;
+	size_t      vlen;
+
+	had_conf_error = false;
+	bool end = conf_next(&opts, &k, &klen, &v, &vlen);
+	expect_true(end, "Key without value should return true (end)");
+	expect_true(had_conf_error, "Key without value should set error");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_next_malformed) {
+	const char *opts = "bad!key:val";
+	const char *k;
+	size_t      klen;
+	const char *v;
+	size_t      vlen;
+
+	had_conf_error = false;
+	bool end = conf_next(&opts, &k, &klen, &v, &vlen);
+	expect_true(end, "Malformed key should return true (end)");
+	expect_true(had_conf_error, "Malformed key should set error");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_next_trailing_comma) {
+	const char *opts = "k:v,";
+	const char *k;
+	size_t      klen;
+	const char *v;
+	size_t      vlen;
+
+	had_conf_error = false;
+	bool end = conf_next(&opts, &k, &klen, &v, &vlen);
+	expect_false(end, "Should parse the first pair successfully");
+	expect_true(had_conf_error,
+	    "Trailing comma should set error");
+}
+TEST_END
+
+int
+main(void) {
+	return test(test_conf_next_simple, test_conf_next_multi,
+	    test_conf_next_empty, test_conf_next_missing_value,
+	    test_conf_next_malformed, test_conf_next_trailing_comma);
+}
diff --git a/test/unit/conf_init_0.c b/test/unit/conf_init_0.c
new file mode 100644
index 00000000..a1f0e63f
--- /dev/null
+++ b/test/unit/conf_init_0.c
@@ -0,0 +1,22 @@
+#include "test/jemalloc_test.h"
+
+TEST_BEGIN(test_default_dirty_decay_ms) {
+#ifdef _WIN32
+	test_skip("not supported on win32");
+#endif
+
+	ssize_t dirty_decay_ms;
+	size_t sz = sizeof(dirty_decay_ms);
+
+	int err = mallctl("opt.dirty_decay_ms", &dirty_decay_ms, &sz, NULL, 0);
+	assert_d_eq(err, 0, "Unexpected mallctl failure");
+	expect_zd_eq(dirty_decay_ms, 10000,
+	    "dirty_decay_ms should be the default (10000)"
+	    " when no global variables are set");
+}
+TEST_END
+
+int
+main(void) {
+	return test(test_default_dirty_decay_ms);
+}
diff --git a/test/unit/conf_init_1.c b/test/unit/conf_init_1.c
new file mode 100644
index 00000000..07aec5dc
--- /dev/null
+++ b/test/unit/conf_init_1.c
@@ -0,0 +1,23 @@
+#include "test/jemalloc_test.h"
+
+const char *malloc_conf = "dirty_decay_ms:1234";
+
+TEST_BEGIN(test_malloc_conf_dirty_decay_ms) {
+#ifdef _WIN32
+	test_skip("not supported on win32");
+#endif
+
+	ssize_t dirty_decay_ms;
+	size_t sz = sizeof(dirty_decay_ms);
+
+	int err = mallctl("opt.dirty_decay_ms", &dirty_decay_ms, &sz, NULL, 0);
+	assert_d_eq(err, 0, "Unexpected mallctl failure");
+	expect_zd_eq(dirty_decay_ms, 1234,
+	    "dirty_decay_ms should be 1234 (set via malloc_conf)");
+}
+TEST_END
+
+int
+main(void) {
+	return test(test_malloc_conf_dirty_decay_ms);
+}
diff --git a/test/unit/conf_init_confirm.c b/test/unit/conf_init_confirm.c
new file mode 100644
index 00000000..a4358359
--- /dev/null
+++ b/test/unit/conf_init_confirm.c
@@ -0,0 +1,39 @@
+#include "test/jemalloc_test.h"
+
+const char *malloc_conf = "dirty_decay_ms:1234,confirm_conf:true";
+
+TEST_BEGIN(test_confirm_conf_two_pass) {
+#ifdef _WIN32
+	test_skip("not supported on win32");
+#endif
+
+	bool confirm_conf;
+	size_t sz = sizeof(confirm_conf);
+
+	int err = mallctl("opt.confirm_conf", &confirm_conf, &sz, NULL, 0);
+	assert_d_eq(err, 0, "Unexpected mallctl failure");
+	expect_true(confirm_conf,
+	    "confirm_conf should be true (processed in pass 1)");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_option_applied_in_second_pass) {
+#ifdef _WIN32
+	test_skip("not supported on win32");
+#endif
+
+	ssize_t dirty_decay_ms;
+	size_t sz = sizeof(dirty_decay_ms);
+
+	int err = mallctl("opt.dirty_decay_ms", &dirty_decay_ms, &sz, NULL, 0);
+	assert_d_eq(err, 0, "Unexpected mallctl failure");
+	expect_zd_eq(dirty_decay_ms, 1234,
+	    "dirty_decay_ms should be 1234 (processed in pass 2)");
+}
+TEST_END
+
+int
+main(void) {
+	return test(test_confirm_conf_two_pass,
+	    test_conf_option_applied_in_second_pass);
+}
diff --git a/test/unit/conf_parse.c b/test/unit/conf_parse.c
new file mode 100644
index 00000000..b3fedb40
--- /dev/null
+++ b/test/unit/conf_parse.c
@@ -0,0 +1,130 @@
+#include "test/jemalloc_test.h"
+
+#include "jemalloc/internal/conf.h"
+
+TEST_BEGIN(test_conf_handle_bool_true) {
+	bool result = false;
+	bool err = conf_handle_bool("true", sizeof("true") - 1, &result);
+	expect_false(err, "conf_handle_bool should succeed for \"true\"");
+	expect_true(result, "result should be true");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_handle_bool_false) {
+	bool result = true;
+	bool err = conf_handle_bool("false", sizeof("false") - 1, &result);
+	expect_false(err, "conf_handle_bool should succeed for \"false\"");
+	expect_false(result, "result should be false");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_handle_bool_invalid) {
+	bool result = false;
+	bool err = conf_handle_bool("yes", sizeof("yes") - 1, &result);
+	expect_true(err, "conf_handle_bool should fail for \"yes\"");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_handle_unsigned_in_range) {
+	uintmax_t result = 0;
+	bool err = conf_handle_unsigned("100", sizeof("100") - 1,
+	    1, 2048, true, true, true, &result);
+	expect_false(err, "Should succeed for in-range value");
+	expect_u64_eq((uint64_t)result, 100, "result should be 100");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_handle_unsigned_clip_max) {
+	uintmax_t result = 0;
+	bool err = conf_handle_unsigned("9999", sizeof("9999") - 1,
+	    1, 2048, true, true, true, &result);
+	expect_false(err, "Should succeed with clipping");
+	expect_u64_eq((uint64_t)result, 2048,
+	    "result should be clipped to max 2048");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_handle_unsigned_clip_min) {
+	uintmax_t result = 0;
+	bool err = conf_handle_unsigned("0", sizeof("0") - 1,
+	    1, 2048, true, true, true, &result);
+	expect_false(err, "Should succeed with clipping");
+	expect_u64_eq((uint64_t)result, 1,
+	    "result should be clipped to min 1");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_handle_unsigned_no_clip_reject) {
+	uintmax_t result = 0;
+	bool err = conf_handle_unsigned("9999", sizeof("9999") - 1,
+	    1, 2048, true, true, false, &result);
+	expect_true(err, "Should fail for out-of-range value without clip");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_handle_unsigned_invalid) {
+	uintmax_t result = 0;
+	bool err = conf_handle_unsigned("abc", sizeof("abc") - 1,
+	    1, 2048, true, true, true, &result);
+	expect_true(err, "Should fail for non-numeric input");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_handle_signed_valid) {
+	intmax_t result = 0;
+	bool err = conf_handle_signed("5000", sizeof("5000") - 1,
+	    -1, INTMAX_MAX, true, false, false, &result);
+	expect_false(err, "Should succeed for valid value");
+	expect_d64_eq((int64_t)result, 5000, "result should be 5000");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_handle_signed_negative) {
+	intmax_t result = 0;
+	bool err = conf_handle_signed("-1", sizeof("-1") - 1,
+	    -1, INTMAX_MAX, true, false, false, &result);
+	expect_false(err, "Should succeed for -1");
+	expect_d64_eq((int64_t)result, -1, "result should be -1");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_handle_signed_out_of_range) {
+	intmax_t result = 0;
+	bool err = conf_handle_signed("5000", sizeof("5000") - 1,
+	    -1, 4999, true, true, false, &result);
+	expect_true(err, "Should fail for out-of-range value");
+}
+TEST_END
+
+TEST_BEGIN(test_conf_handle_char_p) {
+	char buf[8];
+	bool err;
+
+	/* Normal copy. */
+	err = conf_handle_char_p("hello", sizeof("hello") - 1, buf, sizeof(buf));
+	expect_false(err, "Should succeed");
+	expect_str_eq(buf, "hello", "Should copy string");
+
+	/* Truncation. */
+	err = conf_handle_char_p("longstring", sizeof("longstring") - 1,
+	    buf, sizeof(buf));
+	expect_false(err, "Should succeed even when truncating");
+	expect_str_eq(buf, "longstr", "Should truncate to dest_sz - 1");
+}
+TEST_END
+
+int
+main(void) {
+	return test(test_conf_handle_bool_true,
+	    test_conf_handle_bool_false,
+	    test_conf_handle_bool_invalid,
+	    test_conf_handle_unsigned_in_range,
+	    test_conf_handle_unsigned_clip_max,
+	    test_conf_handle_unsigned_clip_min,
+	    test_conf_handle_unsigned_no_clip_reject,
+	    test_conf_handle_unsigned_invalid,
+	    test_conf_handle_signed_valid,
+	    test_conf_handle_signed_negative,
+	    test_conf_handle_signed_out_of_range,
+	    test_conf_handle_char_p);
+}
diff --git a/test/unit/malloc_conf_2.c b/test/unit/malloc_conf_2.c
index 023b7102..667e7006 100644
--- a/test/unit/malloc_conf_2.c
+++ b/test/unit/malloc_conf_2.c
@@ -1,6 +1,6 @@
 #include "test/jemalloc_test.h"
 
-const char *malloc_conf = "dirty_decay_ms:1000";
+const char *malloc_conf = "dirty_decay_ms:1000,muzzy_decay_ms:2000";
 const char *malloc_conf_2_conf_harder = "dirty_decay_ms:1234";
 
 TEST_BEGIN(test_malloc_conf_2) {
@@ -49,7 +49,27 @@ TEST_BEGIN(test_mallctl_global_var) {
 }
 TEST_END
 
+TEST_BEGIN(test_non_conflicting_var) {
+#ifdef _WIN32
+	bool windows = true;
+#else
+	bool windows = false;
+#endif
+	/* Windows doesn't support weak symbol linker trickery. */
+	test_skip_if(windows);
+
+	ssize_t muzzy_decay_ms;
+	size_t  sz = sizeof(muzzy_decay_ms);
+
+	int err = mallctl("opt.muzzy_decay_ms", &muzzy_decay_ms, &sz, NULL, 0);
+	assert_d_eq(err, 0, "Unexpected mallctl failure");
+	expect_zd_eq(muzzy_decay_ms, 2000,
+	    "Non-conflicting option from malloc_conf should pass through");
+}
+TEST_END
+
 int
 main(void) {
-	return test(test_malloc_conf_2, test_mallctl_global_var);
+	return test(test_malloc_conf_2, test_mallctl_global_var,
+	    test_non_conflicting_var);
 }

From 1d018d8fdabec88134b32122aa054cb8b37fe29c Mon Sep 17 00:00:00 2001
From: Tony Printezis <printezis@fb.com>
Date: Wed, 25 Mar 2026 06:59:01 -0700
Subject: [PATCH 2582/2608] improve hpdata_assert_consistent()

A few ways this consistency check can be improved:
* Print which conditions fail and associated values.
* Accumulate the result so that we can print all conditions that fail.
* Turn hpdata_assert_consistent() into a macro so, when it fails,
  we can get line number where it's called from.
---
 include/jemalloc/internal/hpdata.h | 79 ++++++++++++++++++++++--------
 1 file changed, 58 insertions(+), 21 deletions(-)

diff --git a/include/jemalloc/internal/hpdata.h b/include/jemalloc/internal/hpdata.h
index eb83c900..a9c507f0 100644
--- a/include/jemalloc/internal/hpdata.h
+++ b/include/jemalloc/internal/hpdata.h
@@ -342,39 +342,76 @@ hpdata_assert_empty(hpdata_t *hpdata) {
  */
 static inline bool
 hpdata_consistent(hpdata_t *hpdata) {
-	if (fb_urange_longest(hpdata->active_pages, HUGEPAGE_PAGES)
-	    != hpdata_longest_free_range_get(hpdata)) {
-		return false;
+	bool res = true;
+
+	const size_t active_urange_longest = fb_urange_longest(
+	    hpdata->active_pages, HUGEPAGE_PAGES);
+	const size_t longest_free_range = hpdata_longest_free_range_get(hpdata);
+	if (active_urange_longest != longest_free_range) {
+		malloc_printf(
+		    "<jemalloc>: active_fb_urange_longest=%zu != hpdata_longest_free_range=%zu\n",
+		    active_urange_longest, longest_free_range);
+		res = false;
 	}
-	if (fb_scount(hpdata->active_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES)
-	    != hpdata->h_nactive) {
-		return false;
+
+	const size_t active_scount = fb_scount(
+	    hpdata->active_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES);
+	if (active_scount != hpdata->h_nactive) {
+		malloc_printf(
+		    "<jemalloc>: active_fb_scount=%zu != hpdata_nactive=%zu\n",
+		    active_scount, hpdata->h_nactive);
+		res = false;
 	}
-	if (fb_scount(hpdata->touched_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES)
-	    != hpdata->h_ntouched) {
-		return false;
+
+	const size_t touched_scount = fb_scount(
+	    hpdata->touched_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES);
+	if (touched_scount != hpdata->h_ntouched) {
+		malloc_printf(
+		    "<jemalloc>: touched_fb_scount=%zu != hpdata_ntouched=%zu\n",
+		    touched_scount, hpdata->h_ntouched);
+		res = false;
 	}
+
 	if (hpdata->h_ntouched < hpdata->h_nactive) {
-		return false;
+		malloc_printf(
+		    "<jemalloc>: hpdata_ntouched=%zu < hpdata_nactive=%zu\n",
+		    hpdata->h_ntouched, hpdata->h_nactive);
+		res = false;
 	}
-	if (hpdata->h_huge && hpdata->h_ntouched != HUGEPAGE_PAGES) {
-		return false;
+
+	if (hpdata->h_huge && (hpdata->h_ntouched != HUGEPAGE_PAGES)) {
+		malloc_printf(
+		    "<jemalloc>: hpdata_huge=%d && (hpdata_ntouched=%zu != hugepage_pages=%zu)\n",
+		    hpdata->h_huge, hpdata->h_ntouched, HUGEPAGE_PAGES);
+		res = false;
 	}
-	if (hpdata_changing_state_get(hpdata)
-	    && ((hpdata->h_purge_allowed) || hpdata->h_hugify_allowed)) {
-		return false;
+
+	const bool changing_state = hpdata_changing_state_get(hpdata);
+	if (changing_state
+	    && (hpdata->h_purge_allowed || hpdata->h_hugify_allowed)) {
+		malloc_printf(
+		    "<jemalloc>: hpdata_changing_state=%d && (hpdata_purge_allowed=%d || hpdata_hugify_allowed=%d)\n",
+		    changing_state, hpdata->h_purge_allowed,
+		    hpdata->h_hugify_allowed);
+		res = false;
 	}
+
 	if (hpdata_hugify_allowed_get(hpdata)
 	    != hpdata_in_psset_hugify_container_get(hpdata)) {
-		return false;
+		malloc_printf(
+		    "<jemalloc>: hpdata_hugify_allowed=%d != hpdata_in_psset_hugify_container=%d\n",
+		    hpdata_hugify_allowed_get(hpdata),
+		    hpdata_in_psset_hugify_container_get(hpdata));
+		res = false;
 	}
-	return true;
+
+	return res;
 }
 
-static inline void
-hpdata_assert_consistent(hpdata_t *hpdata) {
-	assert(hpdata_consistent(hpdata));
-}
+#define hpdata_assert_consistent(hpdata)                                       \
+	do {                                                                   \
+		assert(hpdata_consistent(hpdata));                             \
+	} while (0)
 
 static inline bool
 hpdata_empty(const hpdata_t *hpdata) {

From d758349ca438ee35769409b06c642ca2d8e408ac Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Wed, 25 Mar 2026 21:13:34 -0700
Subject: [PATCH 2583/2608] Fix psset_pick_purge when last candidate with index
 0 dirtiness is ineligible

psset_pick_purge used max_bit-- after rejecting a time-ineligible
candidate, which caused unnecessary re-scanning of the same bitmap
and makes assert fail in debug mode) and a size_t underflow
when the lowest-index entry was rejected.  Use max_bit = ind - 1
to skip directly past the rejected index.
---
 src/psset.c       |  5 ++++-
 test/unit/psset.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/src/psset.c b/src/psset.c
index a8a9615d..4e904feb 100644
--- a/src/psset.c
+++ b/src/psset.c
@@ -417,7 +417,10 @@ psset_pick_purge(psset_t *psset, const nstime_t *now) {
 		if (nstime_compare(tm_allowed, now) <= 0) {
 			return ps;
 		}
-		max_bit--;
+		if (ind == 0) {
+			break;
+		}
+		max_bit = ind - 1;
 	}
 	/* No page is ready yet */
 	return NULL;
diff --git a/test/unit/psset.c b/test/unit/psset.c
index 12d55941..6ad653f5 100644
--- a/test/unit/psset.c
+++ b/test/unit/psset.c
@@ -941,6 +941,58 @@ TEST_BEGIN(test_purge_prefers_empty) {
 }
 TEST_END
 
+TEST_BEGIN(test_pick_purge_underflow) {
+	test_skip_if(hpa_hugepage_size_exceeds_limit());
+	void *ptr;
+
+	psset_t psset;
+	psset_init(&psset);
+
+	/*
+	 * Test that psset_pick_purge skips directly past a time-ineligible
+	 * entry without underflow.
+	 *
+	 * Create a hugified, non-empty hpdata with 1 dirty page, which
+	 * lands at purge list index 0 (pind=0, huge=true).  Set its
+	 * purge-allowed time in the future.  Calling psset_pick_purge
+	 * with a "now" before that time should return NULL without
+	 * looping through all higher indices on the way down.
+	 */
+	hpdata_t       hpdata_lowest;
+	nstime_t       future_tm, now;
+	const uint64_t BASE_SEC = 1000;
+
+	hpdata_init(&hpdata_lowest, (void *)(10 * HUGEPAGE), 100, false);
+	psset_insert(&psset, &hpdata_lowest);
+
+	psset_update_begin(&psset, &hpdata_lowest);
+	/* Allocate all pages. */
+	ptr = hpdata_reserve_alloc(&hpdata_lowest, HUGEPAGE_PAGES * PAGE);
+	expect_ptr_eq(hpdata_addr_get(&hpdata_lowest), ptr, "");
+	/* Hugify the slab. */
+	hpdata_hugify(&hpdata_lowest);
+	/* Free the last page to create exactly 1 dirty page. */
+	hpdata_unreserve(&hpdata_lowest,
+	    (void *)((uintptr_t)ptr + (HUGEPAGE_PAGES - 1) * PAGE), PAGE);
+	/* Now: nactive = HUGEPAGE_PAGES-1, ndirty = 1, huge = true.
+	 * purge_list_ind = sz_psz2ind(sz_psz_quantize_floor(PAGE)) * 2 + 0
+	 * which should be index 0. */
+	hpdata_purge_allowed_set(&hpdata_lowest, true);
+	nstime_init2(&future_tm, BASE_SEC + 9999, 0);
+	hpdata_time_purge_allowed_set(&hpdata_lowest, &future_tm);
+	psset_update_end(&psset, &hpdata_lowest);
+
+	/*
+	 * Call with a "now" before the future time.  Should return NULL
+	 * (no eligible entry).
+	 */
+	nstime_init2(&now, BASE_SEC + 500, 0);
+	hpdata_t *to_purge = psset_pick_purge(&psset, &now);
+	expect_ptr_null(
+	    to_purge, "Should return NULL when no entry is time-eligible");
+}
+TEST_END
+
 TEST_BEGIN(test_purge_prefers_empty_huge) {
 	test_skip_if(hpa_hugepage_size_exceeds_limit());
 	void *ptr;
@@ -1020,5 +1072,6 @@ main(void) {
 	    test_multi_pageslab, test_stats_merged, test_stats_huge,
 	    test_stats_fullness, test_oldest_fit, test_insert_remove,
 	    test_purge_prefers_nonhuge, test_purge_timing,
-	    test_purge_prefers_empty, test_purge_prefers_empty_huge);
+	    test_purge_prefers_empty, test_pick_purge_underflow,
+	    test_purge_prefers_empty_huge);
 }

From a87c518babfe81395a63b6b023245d8359ca1b96 Mon Sep 17 00:00:00 2001
From: Weixie Cui <cuiweixie@gmail.com>
Date: Wed, 18 Mar 2026 11:35:09 +0800
Subject: [PATCH 2584/2608] Fix typo in prof_log_rep_check: use != instead of
 || for alloc_count

The condition incorrectly used 'alloc_count || 0' which was likely a typo
for 'alloc_count != 0'. While both evaluate similarly for the zero/non-zero
case, the fix ensures consistency with bt_count and thr_count checks and
uses the correct comparison operator.
---
 src/prof_log.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/prof_log.c b/src/prof_log.c
index 64b363bb..74f1372f 100644
--- a/src/prof_log.c
+++ b/src/prof_log.c
@@ -375,7 +375,7 @@ prof_log_rep_check(void) {
 	size_t alloc_count = prof_log_alloc_count();
 
 	if (prof_logging_state == prof_logging_state_stopped) {
-		if (bt_count != 0 || thr_count != 0 || alloc_count || 0) {
+		if (bt_count != 0 || thr_count != 0 || alloc_count != 0) {
 			return true;
 		}
 	}

From 19bbefe136cf8684e126cdb80f7ef2aba88e55dc Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Tue, 31 Mar 2026 18:25:50 -0700
Subject: [PATCH 2585/2608] Remove dead code: extent_commit_wrapper,
 large_salloc, tcache_gc_dalloc event waits

These functions had zero callers anywhere in the codebase:
- extent_commit_wrapper: wrapper never called, _impl used directly
- large_salloc: trivial wrapper never called
- tcache_gc_dalloc_new_event_wait: no header declaration, no callers
- tcache_gc_dalloc_postponed_event_wait: no header declaration, no callers
---
 include/jemalloc/internal/extent.h        |  2 --
 include/jemalloc/internal/large_externs.h |  1 -
 src/extent.c                              |  7 -------
 src/large.c                               |  5 -----
 src/tcache.c                              | 10 ----------
 5 files changed, 25 deletions(-)

diff --git a/include/jemalloc/internal/extent.h b/include/jemalloc/internal/extent.h
index e81dff2c..a9f81cb7 100644
--- a/include/jemalloc/internal/extent.h
+++ b/include/jemalloc/internal/extent.h
@@ -57,8 +57,6 @@ void extent_dalloc_wrapper_purged(
     tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata);
 void extent_destroy_wrapper(
     tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, edata_t *edata);
-bool extent_commit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    size_t offset, size_t length);
 bool extent_purge_lazy_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length);
 bool extent_purge_forced_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
diff --git a/include/jemalloc/internal/large_externs.h b/include/jemalloc/internal/large_externs.h
index 7cee6752..84c6c5d6 100644
--- a/include/jemalloc/internal/large_externs.h
+++ b/include/jemalloc/internal/large_externs.h
@@ -17,7 +17,6 @@ void *large_ralloc(tsdn_t *tsdn, arena_t *arena, void *ptr, size_t usize,
 void   large_dalloc_prep_locked(tsdn_t *tsdn, edata_t *edata);
 void   large_dalloc_finish(tsdn_t *tsdn, edata_t *edata);
 void   large_dalloc(tsdn_t *tsdn, edata_t *edata);
-size_t large_salloc(tsdn_t *tsdn, const edata_t *edata);
 void   large_prof_info_get(
       tsd_t *tsd, edata_t *edata, prof_info_t *prof_info, bool reset_recent);
 void large_prof_tctx_reset(edata_t *edata);
diff --git a/src/extent.c b/src/extent.c
index 0a23bbd9..4b927191 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -1239,13 +1239,6 @@ extent_commit_impl(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
 	return err;
 }
 
-bool
-extent_commit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
-    size_t offset, size_t length) {
-	return extent_commit_impl(tsdn, ehooks, edata, offset, length,
-	    /* growing_retained */ false);
-}
-
 static bool
 extent_decommit_wrapper(tsdn_t *tsdn, ehooks_t *ehooks, edata_t *edata,
     size_t offset, size_t length) {
diff --git a/src/large.c b/src/large.c
index 087df99d..56fa16ab 100644
--- a/src/large.c
+++ b/src/large.c
@@ -276,11 +276,6 @@ large_dalloc(tsdn_t *tsdn, edata_t *edata) {
 	arena_decay_tick(tsdn, arena);
 }
 
-size_t
-large_salloc(tsdn_t *tsdn, const edata_t *edata) {
-	return edata_usize_get(edata);
-}
-
 void
 large_prof_info_get(
     tsd_t *tsd, edata_t *edata, prof_info_t *prof_info, bool reset_recent) {
diff --git a/src/tcache.c b/src/tcache.c
index 10fa7c21..fe210d27 100644
--- a/src/tcache.c
+++ b/src/tcache.c
@@ -111,16 +111,6 @@ tcache_gc_postponed_event_wait(tsd_t *tsd) {
 	return TE_MIN_START_WAIT;
 }
 
-uint64_t
-tcache_gc_dalloc_new_event_wait(tsd_t *tsd) {
-	return opt_tcache_gc_incr_bytes;
-}
-
-uint64_t
-tcache_gc_dalloc_postponed_event_wait(tsd_t *tsd) {
-	return TE_MIN_START_WAIT;
-}
-
 static inline void
 tcache_bin_fill_ctl_init(tcache_slow_t *tcache_slow, szind_t szind) {
 	assert(szind < SC_NBINS);

From 176ea0a801338cae1b938c47f0d7dba7ffef0d25 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Tue, 31 Mar 2026 18:12:23 -0700
Subject: [PATCH 2586/2608] Remove experimental.thread.activity_callback

---
 include/jemalloc/internal/activity_callback.h | 26 -------
 include/jemalloc/internal/tsd_internals.h     |  6 +-
 src/ctl.c                                     | 33 +--------
 src/peak_event.c                              | 13 ----
 test/unit/mallctl.c                           | 73 +------------------
 5 files changed, 3 insertions(+), 148 deletions(-)
 delete mode 100644 include/jemalloc/internal/activity_callback.h

diff --git a/include/jemalloc/internal/activity_callback.h b/include/jemalloc/internal/activity_callback.h
deleted file mode 100644
index 6745f1a2..00000000
--- a/include/jemalloc/internal/activity_callback.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#ifndef JEMALLOC_INTERNAL_ACTIVITY_CALLBACK_H
-#define JEMALLOC_INTERNAL_ACTIVITY_CALLBACK_H
-
-#include "jemalloc/internal/jemalloc_preamble.h"
-
-/*
- * The callback to be executed "periodically", in response to some amount of
- * allocator activity.
- *
- * This callback need not be computing any sort of peak (although that's the
- * intended first use case), but we drive it from the peak counter, so it's
- * keeps things tidy to keep it here.
- *
- * The calls to this thunk get driven by the peak_event module.
- */
-#define ACTIVITY_CALLBACK_THUNK_INITIALIZER                                    \
-	{ NULL, NULL }
-typedef void (*activity_callback_t)(
-    void *uctx, uint64_t allocated, uint64_t deallocated);
-typedef struct activity_callback_thunk_s activity_callback_thunk_t;
-struct activity_callback_thunk_s {
-	activity_callback_t callback;
-	void               *uctx;
-};
-
-#endif /* JEMALLOC_INTERNAL_ACTIVITY_CALLBACK_H */
diff --git a/include/jemalloc/internal/tsd_internals.h b/include/jemalloc/internal/tsd_internals.h
index f675587d..53b58d0c 100644
--- a/include/jemalloc/internal/tsd_internals.h
+++ b/include/jemalloc/internal/tsd_internals.h
@@ -4,7 +4,6 @@
 #define JEMALLOC_INTERNAL_TSD_INTERNALS_H
 
 #include "jemalloc/internal/jemalloc_preamble.h"
-#include "jemalloc/internal/activity_callback.h"
 #include "jemalloc/internal/arena_types.h"
 #include "jemalloc/internal/assert.h"
 #include "jemalloc/internal/bin_types.h"
@@ -84,8 +83,6 @@ typedef ql_elm(tsd_t) tsd_link_t;
 	O(tsd_link, tsd_link_t, tsd_link_t)                                    \
 	O(in_hook, bool, bool)                                                 \
 	O(peak, peak_t, peak_t)                                                \
-	O(activity_callback_thunk, activity_callback_thunk_t,                  \
-	    activity_callback_thunk_t)                                         \
 	O(tcache_slow, tcache_slow_t, tcache_slow_t)                           \
 	O(rtree_ctx, rtree_ctx_t, rtree_ctx_t)
 
@@ -105,8 +102,7 @@ typedef ql_elm(tsd_t) tsd_link_t;
 	    /* sec_shard */ (uint8_t) - 1,                                     \
 	    /* binshards */ TSD_BINSHARDS_ZERO_INITIALIZER,                    \
 	    /* tsd_link */ {NULL}, /* in_hook */ false,                        \
-	    /* peak */ PEAK_INITIALIZER, /* activity_callback_thunk */         \
-	    ACTIVITY_CALLBACK_THUNK_INITIALIZER,                               \
+	    /* peak */ PEAK_INITIALIZER,                                       \
 	    /* tcache_slow */ TCACHE_SLOW_ZERO_INITIALIZER,                    \
 	    /* rtree_ctx */ RTREE_CTX_INITIALIZER,
 
diff --git a/src/ctl.c b/src/ctl.c
index 4cac5608..89824d6a 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -365,7 +365,6 @@ CTL_PROTO(experimental_hooks_prof_sample)
 CTL_PROTO(experimental_hooks_prof_sample_free)
 CTL_PROTO(experimental_hooks_thread_event)
 CTL_PROTO(experimental_hooks_safety_check_abort)
-CTL_PROTO(experimental_thread_activity_callback)
 CTL_PROTO(experimental_utilization_query)
 CTL_PROTO(experimental_utilization_batch_query)
 CTL_PROTO(experimental_arenas_i_pactivep)
@@ -890,9 +889,6 @@ static const ctl_named_node_t experimental_hooks_node[] = {
     {NAME("thread_event"), CTL(experimental_hooks_thread_event)},
 };
 
-static const ctl_named_node_t experimental_thread_node[] = {
-    {NAME("activity_callback"), CTL(experimental_thread_activity_callback)}};
-
 static const ctl_named_node_t experimental_utilization_node[] = {
     {NAME("query"), CTL(experimental_utilization_query)},
     {NAME("batch_query"), CTL(experimental_utilization_batch_query)}};
@@ -916,8 +912,7 @@ static const ctl_named_node_t experimental_node[] = {
     {NAME("arenas"), CHILD(indexed, experimental_arenas)},
     {NAME("arenas_create_ext"), CTL(experimental_arenas_create_ext)},
     {NAME("prof_recent"), CHILD(named, experimental_prof_recent)},
-    {NAME("batch_alloc"), CTL(experimental_batch_alloc)},
-    {NAME("thread"), CHILD(named, experimental_thread)}};
+    {NAME("batch_alloc"), CTL(experimental_batch_alloc)}};
 
 static const ctl_named_node_t root_node[] = {{NAME("version"), CTL(version)},
     {NAME("epoch"), CTL(epoch)},
@@ -4255,32 +4250,6 @@ label_return:
 	return ret;
 }
 
-static int
-experimental_thread_activity_callback_ctl(tsd_t *tsd, const size_t *mib,
-    size_t miblen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) {
-	int ret;
-
-	if (!config_stats) {
-		return ENOENT;
-	}
-
-	activity_callback_thunk_t t_old = tsd_activity_callback_thunk_get(tsd);
-	READ(t_old, activity_callback_thunk_t);
-
-	if (newp != NULL) {
-		/*
-		 * This initialization is unnecessary.  If it's omitted, though,
-		 * clang gets confused and warns on the subsequent use of t_new.
-		 */
-		activity_callback_thunk_t t_new = {NULL, NULL};
-		WRITE(t_new, activity_callback_thunk_t);
-		tsd_activity_callback_thunk_set(tsd, t_new);
-	}
-	ret = 0;
-label_return:
-	return ret;
-}
-
 /*
  * Output six memory utilization entries for an input pointer, the first one of
  * type (void *) and the remaining five of type size_t, describing the following
diff --git a/src/peak_event.c b/src/peak_event.c
index e7f54dba..39f90b70 100644
--- a/src/peak_event.c
+++ b/src/peak_event.c
@@ -3,7 +3,6 @@
 
 #include "jemalloc/internal/peak_event.h"
 
-#include "jemalloc/internal/activity_callback.h"
 #include "jemalloc/internal/peak.h"
 #include "jemalloc/internal/thread_event_registry.h"
 
@@ -16,17 +15,6 @@ peak_event_update(tsd_t *tsd) {
 	peak_update(peak, alloc, dalloc);
 }
 
-static void
-peak_event_activity_callback(tsd_t *tsd) {
-	activity_callback_thunk_t *thunk = tsd_activity_callback_thunkp_get(
-	    tsd);
-	uint64_t alloc = tsd_thread_allocated_get(tsd);
-	uint64_t dalloc = tsd_thread_deallocated_get(tsd);
-	if (thunk->callback != NULL) {
-		thunk->callback(thunk->uctx, alloc, dalloc);
-	}
-}
-
 /* Set current state to zero. */
 void
 peak_event_zero(tsd_t *tsd) {
@@ -55,7 +43,6 @@ peak_event_postponed_event_wait(tsd_t *tsd) {
 static void
 peak_event_handler(tsd_t *tsd) {
 	peak_event_update(tsd);
-	peak_event_activity_callback(tsd);
 }
 
 static te_enabled_t
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 4cd0225b..8f1d2a66 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -1332,77 +1332,6 @@ TEST_BEGIN(test_thread_peak) {
 }
 TEST_END
 
-typedef struct activity_test_data_s activity_test_data_t;
-struct activity_test_data_s {
-	uint64_t obtained_alloc;
-	uint64_t obtained_dalloc;
-};
-
-static void
-activity_test_callback(void *uctx, uint64_t alloc, uint64_t dalloc) {
-	activity_test_data_t *test_data = (activity_test_data_t *)uctx;
-	test_data->obtained_alloc = alloc;
-	test_data->obtained_dalloc = dalloc;
-}
-
-TEST_BEGIN(test_thread_activity_callback) {
-	test_skip_if(!config_stats);
-
-	const size_t big_size = 10 * 1024 * 1024;
-	void        *ptr;
-	int          err;
-	size_t       sz;
-
-	uint64_t *allocatedp;
-	uint64_t *deallocatedp;
-	sz = sizeof(allocatedp);
-	err = mallctl("thread.allocatedp", &allocatedp, &sz, NULL, 0);
-	assert_d_eq(0, err, "");
-	err = mallctl("thread.deallocatedp", &deallocatedp, &sz, NULL, 0);
-	assert_d_eq(0, err, "");
-
-	activity_callback_thunk_t old_thunk = {
-	    (activity_callback_t)111, (void *)222};
-
-	activity_test_data_t      test_data = {333, 444};
-	activity_callback_thunk_t new_thunk = {
-	    &activity_test_callback, &test_data};
-
-	sz = sizeof(old_thunk);
-	err = mallctl("experimental.thread.activity_callback", &old_thunk, &sz,
-	    &new_thunk, sizeof(new_thunk));
-	assert_d_eq(0, err, "");
-
-	expect_true(old_thunk.callback == NULL, "Callback already installed");
-	expect_true(old_thunk.uctx == NULL, "Callback data already installed");
-
-	ptr = mallocx(big_size, 0);
-	expect_u64_eq(test_data.obtained_alloc, *allocatedp, "");
-	expect_u64_eq(test_data.obtained_dalloc, *deallocatedp, "");
-
-	free(ptr);
-	expect_u64_eq(test_data.obtained_alloc, *allocatedp, "");
-	expect_u64_eq(test_data.obtained_dalloc, *deallocatedp, "");
-
-	sz = sizeof(old_thunk);
-	new_thunk = (activity_callback_thunk_t){NULL, NULL};
-	err = mallctl("experimental.thread.activity_callback", &old_thunk, &sz,
-	    &new_thunk, sizeof(new_thunk));
-	assert_d_eq(0, err, "");
-
-	expect_true(old_thunk.callback == &activity_test_callback, "");
-	expect_true(old_thunk.uctx == &test_data, "");
-
-	/* Inserting NULL should have turned off tracking. */
-	test_data.obtained_alloc = 333;
-	test_data.obtained_dalloc = 444;
-	ptr = mallocx(big_size, 0);
-	free(ptr);
-	expect_u64_eq(333, test_data.obtained_alloc, "");
-	expect_u64_eq(444, test_data.obtained_dalloc, "");
-}
-TEST_END
-
 static unsigned nuser_thread_event_cb_calls;
 static void
 user_thread_event_cb(bool is_alloc, uint64_t tallocated, uint64_t tdallocated) {
@@ -1455,5 +1384,5 @@ main(void) {
 	    test_stats_arenas_hpa_shard_counters,
 	    test_stats_arenas_hpa_shard_slabs, test_hooks,
 	    test_hooks_exhaustion, test_thread_idle, test_thread_peak,
-	    test_thread_activity_callback, test_thread_event_hook);
+	    test_thread_event_hook);
 }

From 513778bcb18f7e98073775d2b358674b14f7433f Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 27 Mar 2026 08:42:54 -0700
Subject: [PATCH 2587/2608] Fix off-by-one in arenas_bin_i_index and
 arenas_lextent_i_index bounds checks

The index validation used > instead of >=, allowing access at index
SC_NBINS (for bins) and SC_NSIZES-SC_NBINS (for lextents), which are
one past the valid range. This caused out-of-bounds reads in bin_infos[]
and sz_index2size_unsafe().

Add unit tests that verify the boundary indices return ENOENT.
---
 src/ctl.c           |  4 ++--
 test/unit/mallctl.c | 47 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index 89824d6a..bfc12469 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3250,7 +3250,7 @@ CTL_RO_NL_GEN(arenas_bin_i_slab_size, bin_infos[mib[2]].slab_size, size_t)
 CTL_RO_NL_GEN(arenas_bin_i_nshards, bin_infos[mib[2]].n_shards, uint32_t)
 static const ctl_named_node_t *
 arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
-	if (i > SC_NBINS) {
+	if (i >= SC_NBINS) {
 		return NULL;
 	}
 	return super_arenas_bin_i_node;
@@ -3262,7 +3262,7 @@ CTL_RO_NL_GEN(arenas_lextent_i_size,
 static const ctl_named_node_t *
 arenas_lextent_i_index(
     tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) {
-	if (i > SC_NSIZES - SC_NBINS) {
+	if (i >= SC_NSIZES - SC_NBINS) {
 		return NULL;
 	}
 	return super_arenas_lextent_i_node;
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 8f1d2a66..6ab443af 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -956,6 +956,52 @@ TEST_BEGIN(test_arenas_bin_constants) {
 }
 TEST_END
 
+TEST_BEGIN(test_arenas_bin_oob) {
+	size_t sz;
+	size_t result;
+	char   buf[128];
+
+	/*
+	 * Querying the bin at index SC_NBINS should fail because valid
+	 * indices are [0, SC_NBINS).
+	 */
+	sz = sizeof(result);
+	malloc_snprintf(
+	    buf, sizeof(buf), "arenas.bin.%u.size", (unsigned)SC_NBINS);
+	expect_d_eq(mallctl(buf, (void *)&result, &sz, NULL, 0), ENOENT,
+	    "mallctl() should fail for out-of-bounds bin index SC_NBINS");
+
+	/* One below the boundary should succeed. */
+	malloc_snprintf(
+	    buf, sizeof(buf), "arenas.bin.%u.size", (unsigned)(SC_NBINS - 1));
+	expect_d_eq(mallctl(buf, (void *)&result, &sz, NULL, 0), 0,
+	    "mallctl() should succeed for valid bin index SC_NBINS-1");
+}
+TEST_END
+
+TEST_BEGIN(test_arenas_lextent_oob) {
+	size_t   sz;
+	size_t   result;
+	char     buf[128];
+	unsigned nlextents = SC_NSIZES - SC_NBINS;
+
+	/*
+	 * Querying the lextent at index nlextents should fail because valid
+	 * indices are [0, nlextents).
+	 */
+	sz = sizeof(result);
+	malloc_snprintf(buf, sizeof(buf), "arenas.lextent.%u.size", nlextents);
+	expect_d_eq(mallctl(buf, (void *)&result, &sz, NULL, 0), ENOENT,
+	    "mallctl() should fail for out-of-bounds lextent index");
+
+	/* Querying the last element (nlextents - 1) should succeed. */
+	malloc_snprintf(
+	    buf, sizeof(buf), "arenas.lextent.%u.size", nlextents - 1);
+	expect_d_eq(mallctl(buf, (void *)&result, &sz, NULL, 0), 0,
+	    "mallctl() should succeed for valid lextent index");
+}
+TEST_END
+
 TEST_BEGIN(test_arenas_lextent_constants) {
 #define TEST_ARENAS_LEXTENT_CONSTANT(t, name, expected)                        \
 	do {                                                                   \
@@ -1379,6 +1425,7 @@ main(void) {
 	    test_arena_i_dss, test_arena_i_name, test_arena_i_retain_grow_limit,
 	    test_arenas_dirty_decay_ms, test_arenas_muzzy_decay_ms,
 	    test_arenas_constants, test_arenas_bin_constants,
+	    test_arenas_bin_oob, test_arenas_lextent_oob,
 	    test_arenas_lextent_constants, test_arenas_create,
 	    test_arenas_lookup, test_prof_active, test_stats_arenas,
 	    test_stats_arenas_hpa_shard_counters,

From 87f9938de51be77946b02f0ed54cbd32a5ff055b Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 27 Mar 2026 09:02:32 -0700
Subject: [PATCH 2588/2608] Fix duplicate "nactive_huge" JSON key in HPA shard
 stats output

In both the full_slabs and empty_slabs JSON sections of HPA shard
stats, "nactive_huge" was emitted twice instead of emitting
"ndirty_huge" as the second entry. This caused ndirty_huge to be
missing from the JSON output entirely.

Add a unit test that verifies both sections contain "ndirty_huge".
---
 src/stats.c            |  4 +--
 test/unit/json_stats.c | 73 +++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 74 insertions(+), 3 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index 22b412bd..d906ade6 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -981,7 +981,7 @@ stats_arena_hpa_shard_slabs_print(emitter_t *emitter, unsigned i) {
 	emitter_json_kv(
 	    emitter, "nactive_huge", emitter_type_size, &nactive_huge);
 	emitter_json_kv(
-	    emitter, "nactive_huge", emitter_type_size, &nactive_huge);
+	    emitter, "ndirty_huge", emitter_type_size, &ndirty_huge);
 	emitter_json_kv(emitter, "npageslabs_nonhuge", emitter_type_size,
 	    &npageslabs_nonhuge);
 	emitter_json_kv(
@@ -1022,7 +1022,7 @@ stats_arena_hpa_shard_slabs_print(emitter_t *emitter, unsigned i) {
 	emitter_json_kv(
 	    emitter, "nactive_huge", emitter_type_size, &nactive_huge);
 	emitter_json_kv(
-	    emitter, "nactive_huge", emitter_type_size, &nactive_huge);
+	    emitter, "ndirty_huge", emitter_type_size, &ndirty_huge);
 	emitter_json_kv(emitter, "npageslabs_nonhuge", emitter_type_size,
 	    &npageslabs_nonhuge);
 	emitter_json_kv(
diff --git a/test/unit/json_stats.c b/test/unit/json_stats.c
index ea8a170b..eee717c7 100644
--- a/test/unit/json_stats.c
+++ b/test/unit/json_stats.c
@@ -237,7 +237,78 @@ TEST_BEGIN(test_json_stats_mutexes) {
 }
 TEST_END
 
+/*
+ * Verify that hpa_shard JSON stats contain "ndirty_huge" key in both
+ * full_slabs and empty_slabs sections.  A previous bug emitted duplicate
+ * "nactive_huge" instead of "ndirty_huge".
+ */
+TEST_BEGIN(test_hpa_shard_json_ndirty_huge) {
+	test_skip_if(!config_stats);
+	test_skip_if(!hpa_supported());
+
+	/* Do some allocation to create HPA state. */
+	void *p = mallocx(PAGE, MALLOCX_TCACHE_NONE);
+	expect_ptr_not_null(p, "Unexpected mallocx failure");
+
+	uint64_t epoch = 1;
+	size_t   sz = sizeof(epoch);
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sz), 0,
+	    "Unexpected mallctl() failure");
+
+	stats_buf_t sbuf;
+	stats_buf_init(&sbuf);
+	/* "J" for JSON, include per-arena HPA stats. */
+	malloc_stats_print(stats_buf_write_cb, &sbuf, "J");
+
+	/*
+	 * Find "full_slabs" and check it contains "ndirty_huge".
+	 */
+	const char *full_slabs = strstr(sbuf.buf, "\"full_slabs\"");
+	if (full_slabs != NULL) {
+		const char *empty_slabs = strstr(full_slabs, "\"empty_slabs\"");
+		const char *search_end = empty_slabs != NULL
+		    ? empty_slabs
+		    : sbuf.buf + sbuf.len;
+		/*
+		 * Search for "ndirty_huge" between full_slabs and
+		 * empty_slabs.
+		 */
+		const char *ndirty = full_slabs;
+		bool        found = false;
+		while (ndirty < search_end) {
+			ndirty = strstr(ndirty, "\"ndirty_huge\"");
+			if (ndirty != NULL && ndirty < search_end) {
+				found = true;
+				break;
+			}
+			break;
+		}
+		expect_true(
+		    found, "full_slabs section should contain ndirty_huge key");
+	}
+
+	/*
+	 * Find "empty_slabs" and check it contains "ndirty_huge".
+	 */
+	const char *empty_slabs = strstr(sbuf.buf, "\"empty_slabs\"");
+	if (empty_slabs != NULL) {
+		/* Find the end of the empty_slabs object. */
+		const char *nonfull = strstr(empty_slabs, "\"nonfull_slabs\"");
+		const char *search_end = nonfull != NULL ? nonfull
+		                                         : sbuf.buf + sbuf.len;
+		const char *ndirty = strstr(empty_slabs, "\"ndirty_huge\"");
+		bool        found = (ndirty != NULL && ndirty < search_end);
+		expect_true(found,
+		    "empty_slabs section should contain ndirty_huge key");
+	}
+
+	stats_buf_fini(&sbuf);
+	dallocx(p, MALLOCX_TCACHE_NONE);
+}
+TEST_END
+
 int
 main(void) {
-	return test(test_json_stats_mutexes);
+	return test_no_reentrancy(test_json_stats_mutexes,
+	    test_hpa_shard_json_ndirty_huge);
 }

From a0f2bdf91ddd4e5662790c7cd877052c9009441d Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 27 Mar 2026 09:57:28 -0700
Subject: [PATCH 2589/2608] Fix missing negation in large_ralloc_no_move
 usize_min fallback

The second expansion attempt in large_ralloc_no_move omitted the !
before large_ralloc_no_move_expand(), inverting the return value.
On expansion failure, the function falsely reported success, making
callers believe the allocation was expanded in-place when it was not.
On expansion success, the function falsely reported failure, causing
callers to unnecessarily allocate, copy, and free.

Add unit test that verifies the return value matches actual size change.
---
 Makefile.in                      |  1 +
 include/jemalloc/internal/util.h |  3 ++
 src/large.c                      |  2 +-
 test/unit/large_ralloc.c         | 76 ++++++++++++++++++++++++++++++++
 4 files changed, 81 insertions(+), 1 deletion(-)
 create mode 100644 test/unit/large_ralloc.c

diff --git a/Makefile.in b/Makefile.in
index 459f98fb..435fc34d 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -248,6 +248,7 @@ TESTS_UNIT := \
 	$(srcroot)test/unit/junk_alloc.c \
 	$(srcroot)test/unit/junk_free.c \
 	$(srcroot)test/unit/json_stats.c \
+	$(srcroot)test/unit/large_ralloc.c \
 	$(srcroot)test/unit/log.c \
 	$(srcroot)test/unit/mallctl.c \
 	$(srcroot)test/unit/malloc_conf_2.c \
diff --git a/include/jemalloc/internal/util.h b/include/jemalloc/internal/util.h
index bf246c95..ecfa76b8 100644
--- a/include/jemalloc/internal/util.h
+++ b/include/jemalloc/internal/util.h
@@ -20,6 +20,9 @@
  */
 #define JEMALLOC_ARG_CONCAT(...) __VA_ARGS__
 
+/* Number of elements in a fixed-size array. */
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
 /* cpp macro definition stringification. */
 #define STRINGIFY_HELPER(x) #x
 #define STRINGIFY(x) STRINGIFY_HELPER(x)
diff --git a/src/large.c b/src/large.c
index 56fa16ab..6ccf49d7 100644
--- a/src/large.c
+++ b/src/large.c
@@ -147,7 +147,7 @@ large_ralloc_no_move(tsdn_t *tsdn, edata_t *edata, size_t usize_min,
 		}
 		/* Try again, this time with usize_min. */
 		if (usize_min < usize_max && usize_min > oldusize
-		    && large_ralloc_no_move_expand(
+		    && !large_ralloc_no_move_expand(
 		        tsdn, edata, usize_min, zero)) {
 			arena_decay_tick(tsdn, arena_get_from_edata(edata));
 			return false;
diff --git a/test/unit/large_ralloc.c b/test/unit/large_ralloc.c
new file mode 100644
index 00000000..1f08d125
--- /dev/null
+++ b/test/unit/large_ralloc.c
@@ -0,0 +1,76 @@
+#include "test/jemalloc_test.h"
+
+/*
+ * Test that large_ralloc_no_move causes a failure (returns true) when
+ * in-place extent expansion cannot succeed for either usize_max or
+ * usize_min.
+ *
+ * A previous bug omitted the ! negation on the second extent expansion
+ * attempt (usize_min fallback), causing false success (return false) when
+ * the expansion actually failed.
+ */
+TEST_BEGIN(test_large_ralloc_no_move_expand_fail) {
+	/*
+	 * Allocate two adjacent large objects in the same arena to block
+	 * in-place expansion of the first one.
+	 */
+	unsigned arena_ind;
+	size_t   sz = sizeof(arena_ind);
+	expect_d_eq(mallctl("arenas.create", (void *)&arena_ind, &sz, NULL, 0),
+	    0, "Unexpected mallctl() failure");
+
+	int flags = MALLOCX_ARENA(arena_ind) | MALLOCX_TCACHE_NONE;
+
+	size_t large_sz = SC_LARGE_MINCLASS;
+	/* Allocate several blocks to prevent expansion of the first. */
+	void *blocks[8];
+	for (size_t i = 0; i < ARRAY_SIZE(blocks); i++) {
+		blocks[i] = mallocx(large_sz, flags);
+		expect_ptr_not_null(blocks[i], "Unexpected mallocx() failure");
+	}
+
+	/*
+	 * Try to expand blocks[0] in place. Use usize_min < usize_max to
+	 * exercise the fallback path.
+	 */
+	tsd_t   *tsd = tsd_fetch();
+	edata_t *edata = emap_edata_lookup(
+	    tsd_tsdn(tsd), &arena_emap_global, blocks[0]);
+	expect_ptr_not_null(edata, "Unexpected edata lookup failure");
+
+	size_t oldusize = edata_usize_get(edata);
+	size_t usize_min = sz_s2u(oldusize + 1);
+	size_t usize_max = sz_s2u(oldusize * 2);
+
+	/* Ensure min and max are in different size classes. */
+	if (usize_min == usize_max) {
+		usize_max = sz_s2u(usize_min + 1);
+	}
+
+	bool ret = large_ralloc_no_move(
+	    tsd_tsdn(tsd), edata, usize_min, usize_max, false);
+
+	/*
+	 * With adjacent allocations blocking expansion, this should fail.
+	 * The bug caused ret == false (success) even when expansion failed.
+	 */
+	if (!ret) {
+		/*
+		 * Expansion might actually succeed if adjacent memory
+		 * is free.  Verify the size actually changed.
+		 */
+		size_t newusize = edata_usize_get(edata);
+		expect_zu_ge(newusize, usize_min,
+		    "Expansion reported success but size didn't change");
+	}
+
+	for (size_t i = 0; i < ARRAY_SIZE(blocks); i++) {
+		dallocx(blocks[i], flags);
+	}
+}
+TEST_END
+
+int
+main(void) {
+	return test_no_reentrancy(test_large_ralloc_no_move_expand_fail);
+}

From eab2b29736a3f499f7be1236950ed9aab57c4267 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 27 Mar 2026 09:57:34 -0700
Subject: [PATCH 2590/2608] Fix off-by-one in stats_arenas_i_bins_j and
 stats_arenas_i_lextents_j bounds checks

Same pattern as arenas_bin_i_index: used > instead of >= allowing
access one past the end of bstats[] and lstats[] arrays.

Add unit tests that verify boundary indices return ENOENT.
---
 src/ctl.c           |  4 ++--
 test/unit/mallctl.c | 54 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/src/ctl.c b/src/ctl.c
index bfc12469..0b72086c 100644
--- a/src/ctl.c
+++ b/src/ctl.c
@@ -3998,7 +3998,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_bins_j_nonfull_slabs,
 static const ctl_named_node_t *
 stats_arenas_i_bins_j_index(
     tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t j) {
-	if (j > SC_NBINS) {
+	if (j >= SC_NBINS) {
 		return NULL;
 	}
 	return super_stats_arenas_i_bins_j_node;
@@ -4022,7 +4022,7 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_curlextents,
 static const ctl_named_node_t *
 stats_arenas_i_lextents_j_index(
     tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t j) {
-	if (j > SC_NSIZES - SC_NBINS) {
+	if (j >= SC_NSIZES - SC_NBINS) {
 		return NULL;
 	}
 	return super_stats_arenas_i_lextents_j_node;
diff --git a/test/unit/mallctl.c b/test/unit/mallctl.c
index 6ab443af..11710c27 100644
--- a/test/unit/mallctl.c
+++ b/test/unit/mallctl.c
@@ -1002,6 +1002,59 @@ TEST_BEGIN(test_arenas_lextent_oob) {
 }
 TEST_END
 
+TEST_BEGIN(test_stats_arenas_bins_oob) {
+	test_skip_if(!config_stats);
+	size_t   sz;
+	uint64_t result;
+	char     buf[128];
+
+	uint64_t epoch = 1;
+	sz = sizeof(epoch);
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sz), 0,
+	    "Unexpected mallctl() failure");
+
+	/* SC_NBINS is one past the valid range. */
+	sz = sizeof(result);
+	malloc_snprintf(buf, sizeof(buf), "stats.arenas.0.bins.%u.nmalloc",
+	    (unsigned)SC_NBINS);
+	expect_d_eq(mallctl(buf, (void *)&result, &sz, NULL, 0), ENOENT,
+	    "mallctl() should fail for out-of-bounds stats bin index");
+
+	/* SC_NBINS - 1 is valid. */
+	malloc_snprintf(buf, sizeof(buf), "stats.arenas.0.bins.%u.nmalloc",
+	    (unsigned)(SC_NBINS - 1));
+	expect_d_eq(mallctl(buf, (void *)&result, &sz, NULL, 0), 0,
+	    "mallctl() should succeed for valid stats bin index");
+}
+TEST_END
+
+TEST_BEGIN(test_stats_arenas_lextents_oob) {
+	test_skip_if(!config_stats);
+	size_t   sz;
+	uint64_t result;
+	char     buf[128];
+	unsigned nlextents = SC_NSIZES - SC_NBINS;
+
+	uint64_t epoch = 1;
+	sz = sizeof(epoch);
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sz), 0,
+	    "Unexpected mallctl() failure");
+
+	/* nlextents is one past the valid range. */
+	sz = sizeof(result);
+	malloc_snprintf(
+	    buf, sizeof(buf), "stats.arenas.0.lextents.%u.nmalloc", nlextents);
+	expect_d_eq(mallctl(buf, (void *)&result, &sz, NULL, 0), ENOENT,
+	    "mallctl() should fail for out-of-bounds stats lextent index");
+
+	/* nlextents - 1 is valid. */
+	malloc_snprintf(buf, sizeof(buf), "stats.arenas.0.lextents.%u.nmalloc",
+	    nlextents - 1);
+	expect_d_eq(mallctl(buf, (void *)&result, &sz, NULL, 0), 0,
+	    "mallctl() should succeed for valid stats lextent index");
+}
+TEST_END
+
 TEST_BEGIN(test_arenas_lextent_constants) {
 #define TEST_ARENAS_LEXTENT_CONSTANT(t, name, expected)                        \
 	do {                                                                   \
@@ -1426,6 +1479,7 @@ main(void) {
 	    test_arenas_dirty_decay_ms, test_arenas_muzzy_decay_ms,
 	    test_arenas_constants, test_arenas_bin_constants,
 	    test_arenas_bin_oob, test_arenas_lextent_oob,
+	    test_stats_arenas_bins_oob, test_stats_arenas_lextents_oob,
 	    test_arenas_lextent_constants, test_arenas_create,
 	    test_arenas_lookup, test_prof_active, test_stats_arenas,
 	    test_stats_arenas_hpa_shard_counters,

From c2d57040f0d281449febb9bb80287e63bfb271fe Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 27 Mar 2026 10:02:59 -0700
Subject: [PATCH 2591/2608] Fix out-of-bounds write in malloc_vsnprintf when
 size is 0

When called with size==0, the else branch wrote to str[size-1] which
is str[(size_t)-1], a massive out-of-bounds write. Standard vsnprintf
allows size==0 to mean "compute length only, write nothing".

Add unit test for the size==0 case.
---
 src/malloc_io.c       |  2 +-
 test/unit/malloc_io.c | 20 +++++++++++++++++++-
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/src/malloc_io.c b/src/malloc_io.c
index 779cdc05..9716c668 100644
--- a/src/malloc_io.c
+++ b/src/malloc_io.c
@@ -692,7 +692,7 @@ malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap) {
 label_out:
 	if (i < size) {
 		str[i] = '\0';
-	} else {
+	} else if (size != 0) {
 		str[size - 1] = '\0';
 	}
 
diff --git a/test/unit/malloc_io.c b/test/unit/malloc_io.c
index f7895945..ee744a78 100644
--- a/test/unit/malloc_io.c
+++ b/test/unit/malloc_io.c
@@ -252,8 +252,26 @@ TEST_BEGIN(test_malloc_snprintf) {
 }
 TEST_END
 
+TEST_BEGIN(test_malloc_snprintf_zero_size) {
+	char   buf[8];
+	size_t result;
+
+	/*
+	 * malloc_snprintf with size==0 should not write anything but should
+	 * return the length that would have been written.  A previous bug
+	 * caused an out-of-bounds write via str[size - 1] when size was 0.
+	 */
+	memset(buf, 'X', sizeof(buf));
+	result = malloc_snprintf(buf, 0, "%s", "hello");
+	expect_zu_eq(result, 5, "Expected length 5 for \"hello\"");
+	/* buf should be untouched. */
+	expect_c_eq(buf[0], 'X', "Buffer should not have been modified");
+}
+TEST_END
+
 int
 main(void) {
 	return test(test_malloc_strtoumax_no_endptr, test_malloc_strtoumax,
-	    test_malloc_snprintf_truncated, test_malloc_snprintf);
+	    test_malloc_snprintf_truncated, test_malloc_snprintf,
+	    test_malloc_snprintf_zero_size);
 }

From 3a8bee81f18bd241ba571a6a77c940c8f8cfcfb1 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 27 Mar 2026 10:03:06 -0700
Subject: [PATCH 2592/2608] Fix pac_mapped stats inflation on allocation
 failure

newly_mapped_size was set unconditionally in the ecache_alloc_grow
fallback path, even when the allocation returned NULL. This inflated
pac_mapped stats without a corresponding deallocation to correct them.

Guard the assignment with an edata != NULL check, matching the pattern
used in the batched allocation path above it.
---
 src/pac.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/pac.c b/src/pac.c
index 86001139..ed0f77c2 100644
--- a/src/pac.c
+++ b/src/pac.c
@@ -198,7 +198,9 @@ pac_alloc_real(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size,
 		edata = ecache_alloc_grow(tsdn, pac, ehooks,
 		    &pac->ecache_retained, NULL, size, alignment, zero,
 		    guarded);
-		newly_mapped_size = size;
+		if (edata != NULL) {
+			newly_mapped_size = size;
+		}
 	}
 
 	if (config_stats && newly_mapped_size != 0) {

From dd30c91eaaf02e5f347e37a49f99eae670b94c88 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 27 Mar 2026 10:04:28 -0700
Subject: [PATCH 2593/2608] Fix wrong fallback value in os_page_detect when
 sysconf fails

Returned LG_PAGE (log2 of page size, e.g. 12) instead of PAGE (actual
page size, e.g. 4096) when sysconf(_SC_PAGESIZE) failed. This would
cause os_page to be set to an absurdly small value, breaking all
page-aligned operations.
---
 src/pages.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pages.c b/src/pages.c
index 2a4f0093..5c12ae42 100644
--- a/src/pages.c
+++ b/src/pages.c
@@ -718,7 +718,7 @@ os_page_detect(void) {
 #else
 	long result = sysconf(_SC_PAGESIZE);
 	if (result == -1) {
-		return LG_PAGE;
+		return PAGE;
 	}
 	return (size_t)result;
 #endif

From 3f6e63e86a193e8a4d685480165812cac6d2350f Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 27 Mar 2026 10:04:36 -0700
Subject: [PATCH 2594/2608] Fix wrong type for malloc_read_fd return value in
 prof_stack_range

Used size_t (unsigned) instead of ssize_t for the return value of
malloc_read_fd, which returns -1 on error. With size_t, -1 becomes
a huge positive value, bypassing the error check and corrupting the
remaining byte count.
---
 src/prof_stack_range.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/prof_stack_range.c b/src/prof_stack_range.c
index b167b132..8ebcab8e 100644
--- a/src/prof_stack_range.c
+++ b/src/prof_stack_range.c
@@ -73,17 +73,21 @@ prof_mapping_containing_addr(uintptr_t addr, const char *maps_path,
 			}
 
 			remaining = malloc_read_fd(fd, buf, sizeof(buf));
-			if (remaining <= 0) {
+			if (remaining < 0) {
 				ret = errno;
 				break;
+			} else if (remaining == 0) {
+				break;
 			}
 			line = buf;
 		} else if (line == NULL) {
 			/* case 1: no newline found in buf */
 			remaining = malloc_read_fd(fd, buf, sizeof(buf));
-			if (remaining <= 0) {
+			if (remaining < 0) {
 				ret = errno;
 				break;
+			} else if (remaining == 0) {
+				break;
 			}
 			line = memchr(buf, '\n', remaining);
 			if (line != NULL) {
@@ -99,11 +103,13 @@ prof_mapping_containing_addr(uintptr_t addr, const char *maps_path,
 			    remaining); /* copy remaining characters to start of buf */
 			line = buf;
 
-			size_t count = malloc_read_fd(
+			ssize_t count = malloc_read_fd(
 			    fd, buf + remaining, sizeof(buf) - remaining);
-			if (count <= 0) {
+			if (count < 0) {
 				ret = errno;
 				break;
+			} else if (count == 0) {
+				break;
 			}
 
 			remaining +=

From 675ab079e7e6f08a74727ec53569ec2db578d515 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 27 Mar 2026 10:04:43 -0700
Subject: [PATCH 2595/2608] Fix missing release of acquired neighbor edata in
 extent_try_coalesce_impl

When emap_try_acquire_edata_neighbor returned a non-NULL neighbor but
the size check failed, the neighbor was never released from
extent_state_merging, making it permanently invisible to future
allocation and coalescing operations.

Release the neighbor when it doesn't meet the size requirement,
matching the pattern used in extent_recycle_extract.
---
 src/extent.c   | 44 +++++++++++++++++++++++++++-----------------
 test/unit/pa.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 73 insertions(+), 18 deletions(-)

diff --git a/src/extent.c b/src/extent.c
index 4b927191..118c8785 100644
--- a/src/extent.c
+++ b/src/extent.c
@@ -916,15 +916,20 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		size_t   max_next_neighbor = max_size > edata_size_get(edata)
 		      ? max_size - edata_size_get(edata)
 		      : 0;
-		if (next != NULL && edata_size_get(next) <= max_next_neighbor) {
-			if (!extent_coalesce(
-			        tsdn, pac, ehooks, ecache, edata, next, true)) {
-				if (ecache->delay_coalesce) {
-					/* Do minimal coalescing. */
-					*coalesced = true;
-					return edata;
+		if (next != NULL) {
+			if (edata_size_get(next) > max_next_neighbor) {
+				emap_release_edata(
+				    tsdn, pac->emap, next, ecache->state);
+			} else {
+				if (!extent_coalesce(tsdn, pac, ehooks, ecache,
+				        edata, next, true)) {
+					if (ecache->delay_coalesce) {
+						/* Do minimal coalescing. */
+						*coalesced = true;
+						return edata;
+					}
+					again = true;
 				}
-				again = true;
 			}
 		}
 
@@ -934,16 +939,21 @@ extent_try_coalesce_impl(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks,
 		size_t   max_prev_neighbor = max_size > edata_size_get(edata)
 		      ? max_size - edata_size_get(edata)
 		      : 0;
-		if (prev != NULL && edata_size_get(prev) <= max_prev_neighbor) {
-			if (!extent_coalesce(tsdn, pac, ehooks, ecache, edata,
-			        prev, false)) {
-				edata = prev;
-				if (ecache->delay_coalesce) {
-					/* Do minimal coalescing. */
-					*coalesced = true;
-					return edata;
+		if (prev != NULL) {
+			if (edata_size_get(prev) > max_prev_neighbor) {
+				emap_release_edata(
+				    tsdn, pac->emap, prev, ecache->state);
+			} else {
+				if (!extent_coalesce(tsdn, pac, ehooks, ecache,
+				        edata, prev, false)) {
+					edata = prev;
+					if (ecache->delay_coalesce) {
+						/* Do minimal coalescing. */
+						*coalesced = true;
+						return edata;
+					}
+					again = true;
 				}
-				again = true;
 			}
 		}
 	} while (again);
diff --git a/test/unit/pa.c b/test/unit/pa.c
index 8552225f..c1562d7b 100644
--- a/test/unit/pa.c
+++ b/test/unit/pa.c
@@ -121,7 +121,52 @@ TEST_BEGIN(test_alloc_free_purge_thds) {
 }
 TEST_END
 
+TEST_BEGIN(test_failed_coalesce_releases_neighbor) {
+	test_skip_if(!maps_coalesce);
+
+	test_data_t *test_data = init_test_data(-1, -1);
+	size_t old_lg_extent_max_active_fit = opt_lg_extent_max_active_fit;
+	opt_lg_extent_max_active_fit = 0;
+
+	bool     deferred_work_generated = false;
+	size_t   unit = SC_LARGE_MINCLASS;
+	size_t   alloc_size = 4 * unit;
+	edata_t *edata = pa_alloc(TSDN_NULL, &test_data->shard, alloc_size,
+	    PAGE,
+	    /* slab */ false, sz_size2index(alloc_size), /* zero */ false,
+	    /* guarded */ false, &deferred_work_generated);
+	expect_ptr_not_null(edata, "Unexpected pa_alloc() failure");
+
+	void *tail_addr = (void *)((uintptr_t)edata_base_get(edata) + unit);
+	expect_false(pa_shrink(TSDN_NULL, &test_data->shard, edata, alloc_size,
+	                 unit, sz_size2index(unit), &deferred_work_generated),
+	    "Unexpected pa_shrink() failure");
+
+	edata_t *tail = emap_edata_lookup(
+	    TSDN_NULL, &test_data->emap, tail_addr);
+	expect_ptr_not_null(tail, "Expected dirty tail extent after shrink");
+	expect_ptr_eq(
+	    edata_base_get(tail), tail_addr, "Unexpected tail extent address");
+	expect_zu_eq(
+	    edata_size_get(tail), 3 * unit, "Unexpected tail extent size");
+	expect_d_eq(edata_state_get(tail), extent_state_dirty,
+	    "Expected tail extent to start dirty");
+
+	pa_dalloc(
+	    TSDN_NULL, &test_data->shard, edata, &deferred_work_generated);
+
+	tail = emap_edata_lookup(TSDN_NULL, &test_data->emap, tail_addr);
+	expect_ptr_not_null(
+	    tail, "Expected oversized dirty neighbor to remain discoverable");
+	expect_d_eq(edata_state_get(tail), extent_state_dirty,
+	    "Failed coalesce must release oversized dirty neighbor");
+
+	opt_lg_extent_max_active_fit = old_lg_extent_max_active_fit;
+}
+TEST_END
+
 int
 main(void) {
-	return test(test_alloc_free_purge_thds);
+	return test(
+	    test_alloc_free_purge_thds, test_failed_coalesce_releases_neighbor);
 }

From 234404d324458d4404ef382742741cb4ffbcf921 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 27 Mar 2026 10:06:15 -0700
Subject: [PATCH 2596/2608] Fix wrong loop variable for array index in
 sz_boot_pind2sz_tab

The sentinel fill loop used sz_pind2sz_tab[pind] (constant) instead
of sz_pind2sz_tab[i] (loop variable), writing only to the first
entry repeatedly and leaving subsequent entries uninitialized.
---
 src/sz.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/sz.c b/src/sz.c
index 4a4c057d..da92f2b4 100644
--- a/src/sz.c
+++ b/src/sz.c
@@ -65,7 +65,7 @@ sz_boot_pind2sz_tab(const sc_data_t *sc_data) {
 		}
 	}
 	for (int i = pind; i <= (int)SC_NPSIZES; i++) {
-		sz_pind2sz_tab[pind] = sc_data->large_maxclass + PAGE;
+		sz_pind2sz_tab[i] = sc_data->large_maxclass + PAGE;
 	}
 }
 
@@ -93,7 +93,7 @@ sz_boot_size2index_tab(const sc_data_t *sc_data) {
 	size_t dst_max = (SC_LOOKUP_MAXCLASS >> SC_LG_TINY_MIN) + 1;
 	size_t dst_ind = 0;
 	for (unsigned sc_ind = 0; sc_ind < SC_NSIZES && dst_ind < dst_max;
-	     sc_ind++) {
+	    sc_ind++) {
 		const sc_t *sc = &sc_data->sc[sc_ind];
 		size_t      sz = (ZU(1) << sc->lg_base)
 		    + (ZU(sc->ndelta) << sc->lg_delta);

From 2fceece256c0a01a28743652ce3e5cc67723e453 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 27 Mar 2026 10:06:22 -0700
Subject: [PATCH 2597/2608] Fix extra size argument in edata_init call in
 extent_alloc_dss

An extra 'size' argument was passed where 'slab' (false) should be,
shifting all subsequent arguments: slab got size (nonzero=true),
szind got false (0), and sn got SC_NSIZES instead of a proper serial
number from extent_sn_next(). Match the correct pattern used by the
gap edata_init call above.
---
 src/extent_dss.c | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/src/extent_dss.c b/src/extent_dss.c
index 3f7a15d0..c7c34207 100644
--- a/src/extent_dss.c
+++ b/src/extent_dss.c
@@ -153,11 +153,14 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 			    - (uintptr_t)gap_addr_page;
 			if (gap_size_page != 0) {
 				edata_init(gap, arena_ind_get(arena),
-				    gap_addr_page, gap_size_page, false,
-				    SC_NSIZES,
+				    gap_addr_page, gap_size_page,
+				    /* slab */ false,
+				    /* szind */ SC_NSIZES,
 				    extent_sn_next(&arena->pa_shard.pac),
-				    extent_state_active, false, true,
-				    EXTENT_PAI_PAC, head_state);
+				    extent_state_active,
+				    /* zeroed */ false,
+				    /* committed */ true,
+				    /* pai */ EXTENT_PAI_PAC, head_state);
 			}
 			/*
 			 * Compute the address just past the end of the desired
@@ -203,9 +206,16 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size,
 					    arena);
 
 					edata_init(&edata, arena_ind_get(arena),
-					    ret, size, size, false, SC_NSIZES,
-					    extent_state_active, false, true,
-					    EXTENT_PAI_PAC, head_state);
+					    ret, size,
+					    /* slab */ false,
+					    /* szind */ SC_NSIZES,
+					    extent_sn_next(
+					        &arena->pa_shard.pac),
+					    extent_state_active,
+					    /* zeroed */ false,
+					    /* committed */ true,
+					    /* pai */ EXTENT_PAI_PAC,
+					    head_state);
 					if (extent_purge_forced_wrapper(tsdn,
 					        ehooks, &edata, 0, size)) {
 						memset(ret, 0, size);

From 5904a421878b31d6a5ec674027b35db63e64537d Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 27 Mar 2026 10:07:05 -0700
Subject: [PATCH 2598/2608] Fix memory leak of old curr_reg on
 san_bump_grow_locked failure

When san_bump_grow_locked fails, it sets sba->curr_reg to NULL.
The old curr_reg (saved in to_destroy) was never freed or restored,
leaking the virtual memory extent. Restore sba->curr_reg from
to_destroy on failure so the old region remains usable.
---
 src/san_bump.c       |  1 +
 test/unit/san_bump.c | 89 +++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 89 insertions(+), 1 deletion(-)

diff --git a/src/san_bump.c b/src/san_bump.c
index 09ed18ca..11031290 100644
--- a/src/san_bump.c
+++ b/src/san_bump.c
@@ -31,6 +31,7 @@ san_bump_alloc(tsdn_t *tsdn, san_bump_alloc_t *sba, pac_t *pac,
 		bool err = san_bump_grow_locked(
 		    tsdn, sba, pac, ehooks, guarded_size);
 		if (err) {
+			sba->curr_reg = to_destroy;
 			goto label_err;
 		}
 	} else {
diff --git a/test/unit/san_bump.c b/test/unit/san_bump.c
index 9aa0210e..54d8583d 100644
--- a/test/unit/san_bump.c
+++ b/test/unit/san_bump.c
@@ -4,6 +4,50 @@
 #include "jemalloc/internal/arena_structs.h"
 #include "jemalloc/internal/san_bump.h"
 
+static extent_hooks_t *san_bump_default_hooks;
+static extent_hooks_t  san_bump_hooks;
+static bool            fail_retained_alloc;
+static unsigned        retained_alloc_fail_calls;
+
+static void *
+san_bump_fail_alloc_hook(extent_hooks_t *UNUSED extent_hooks, void *new_addr,
+    size_t size, size_t alignment, bool *zero, bool *commit,
+    unsigned arena_ind) {
+	if (fail_retained_alloc && new_addr == NULL
+	    && size >= SBA_RETAINED_ALLOC_SIZE) {
+		retained_alloc_fail_calls++;
+		return NULL;
+	}
+	return san_bump_default_hooks->alloc(san_bump_default_hooks, new_addr,
+	    size, alignment, zero, commit, arena_ind);
+}
+
+static void
+install_san_bump_fail_alloc_hooks(unsigned arena_ind) {
+	size_t          hooks_mib[3];
+	size_t          hooks_miblen = sizeof(hooks_mib) / sizeof(size_t);
+	size_t          old_size = sizeof(extent_hooks_t *);
+	size_t          new_size = sizeof(extent_hooks_t *);
+	extent_hooks_t *new_hooks;
+	extent_hooks_t *old_hooks;
+
+	expect_d_eq(
+	    mallctlnametomib("arena.0.extent_hooks", hooks_mib, &hooks_miblen),
+	    0, "Unexpected mallctlnametomib() failure");
+	hooks_mib[1] = (size_t)arena_ind;
+	expect_d_eq(mallctlbymib(hooks_mib, hooks_miblen, (void *)&old_hooks,
+	                &old_size, NULL, 0),
+	    0, "Unexpected extent_hooks error");
+
+	san_bump_default_hooks = old_hooks;
+	san_bump_hooks = *old_hooks;
+	san_bump_hooks.alloc = san_bump_fail_alloc_hook;
+	new_hooks = &san_bump_hooks;
+	expect_d_eq(mallctlbymib(hooks_mib, hooks_miblen, NULL, NULL,
+	                (void *)&new_hooks, new_size),
+	    0, "Unexpected extent_hooks install failure");
+}
+
 TEST_BEGIN(test_san_bump_alloc) {
 	test_skip_if(!maps_coalesce || !opt_retain);
 
@@ -69,6 +113,48 @@ TEST_BEGIN(test_san_bump_alloc) {
 }
 TEST_END
 
+TEST_BEGIN(test_failed_grow_preserves_curr_reg) {
+	test_skip_if(!maps_coalesce || !opt_retain);
+
+	tsdn_t *tsdn = tsdn_fetch();
+
+	san_bump_alloc_t sba;
+	san_bump_alloc_init(&sba);
+
+	unsigned arena_ind = do_arena_create(0, 0);
+	assert_u_ne(arena_ind, UINT_MAX, "Failed to create an arena");
+	install_san_bump_fail_alloc_hooks(arena_ind);
+
+	arena_t *arena = arena_get(tsdn, arena_ind, false);
+	pac_t   *pac = &arena->pa_shard.pac;
+
+	size_t   small_alloc_size = PAGE * 16;
+	edata_t *edata = san_bump_alloc(tsdn, &sba, pac, pac_ehooks_get(pac),
+	    small_alloc_size, /* zero */ false);
+	expect_ptr_not_null(edata, "Initial san_bump allocation failed");
+	expect_ptr_not_null(sba.curr_reg,
+	    "Expected retained region remainder after initial allocation");
+
+	fail_retained_alloc = true;
+	retained_alloc_fail_calls = 0;
+
+	edata_t *failed = san_bump_alloc(tsdn, &sba, pac, pac_ehooks_get(pac),
+	    SBA_RETAINED_ALLOC_SIZE, /* zero */ false);
+	expect_ptr_null(failed, "Expected retained grow allocation failure");
+	expect_u_eq(retained_alloc_fail_calls, 1,
+	    "Expected exactly one failed retained allocation attempt");
+
+	edata_t *reused = san_bump_alloc(tsdn, &sba, pac, pac_ehooks_get(pac),
+	    small_alloc_size, /* zero */ false);
+	expect_ptr_not_null(
+	    reused, "Expected allocator to reuse preexisting current region");
+	expect_u_eq(retained_alloc_fail_calls, 1,
+	    "Reuse path should not attempt another retained grow allocation");
+
+	fail_retained_alloc = false;
+}
+TEST_END
+
 TEST_BEGIN(test_large_alloc_size) {
 	test_skip_if(!maps_coalesce || !opt_retain);
 
@@ -105,5 +191,6 @@ TEST_END
 
 int
 main(void) {
-	return test(test_san_bump_alloc, test_large_alloc_size);
+	return test(test_san_bump_alloc, test_failed_grow_preserves_curr_reg,
+	    test_large_alloc_size);
 }

From 3ac9f96158f3b095496e260259a3c32857eafd28 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Tue, 31 Mar 2026 20:33:49 -0700
Subject: [PATCH 2599/2608] Run clang-format on test/unit/conf_parse.c

---
 test/unit/conf_parse.c | 58 ++++++++++++++++++++----------------------
 1 file changed, 27 insertions(+), 31 deletions(-)

diff --git a/test/unit/conf_parse.c b/test/unit/conf_parse.c
index b3fedb40..eb107865 100644
--- a/test/unit/conf_parse.c
+++ b/test/unit/conf_parse.c
@@ -27,8 +27,8 @@ TEST_END
 
 TEST_BEGIN(test_conf_handle_unsigned_in_range) {
 	uintmax_t result = 0;
-	bool err = conf_handle_unsigned("100", sizeof("100") - 1,
-	    1, 2048, true, true, true, &result);
+	bool      err = conf_handle_unsigned(
+            "100", sizeof("100") - 1, 1, 2048, true, true, true, &result);
 	expect_false(err, "Should succeed for in-range value");
 	expect_u64_eq((uint64_t)result, 100, "result should be 100");
 }
@@ -36,44 +36,43 @@ TEST_END
 
 TEST_BEGIN(test_conf_handle_unsigned_clip_max) {
 	uintmax_t result = 0;
-	bool err = conf_handle_unsigned("9999", sizeof("9999") - 1,
-	    1, 2048, true, true, true, &result);
+	bool      err = conf_handle_unsigned(
+            "9999", sizeof("9999") - 1, 1, 2048, true, true, true, &result);
 	expect_false(err, "Should succeed with clipping");
-	expect_u64_eq((uint64_t)result, 2048,
-	    "result should be clipped to max 2048");
+	expect_u64_eq(
+	    (uint64_t)result, 2048, "result should be clipped to max 2048");
 }
 TEST_END
 
 TEST_BEGIN(test_conf_handle_unsigned_clip_min) {
 	uintmax_t result = 0;
-	bool err = conf_handle_unsigned("0", sizeof("0") - 1,
-	    1, 2048, true, true, true, &result);
+	bool      err = conf_handle_unsigned(
+            "0", sizeof("0") - 1, 1, 2048, true, true, true, &result);
 	expect_false(err, "Should succeed with clipping");
-	expect_u64_eq((uint64_t)result, 1,
-	    "result should be clipped to min 1");
+	expect_u64_eq((uint64_t)result, 1, "result should be clipped to min 1");
 }
 TEST_END
 
 TEST_BEGIN(test_conf_handle_unsigned_no_clip_reject) {
 	uintmax_t result = 0;
-	bool err = conf_handle_unsigned("9999", sizeof("9999") - 1,
-	    1, 2048, true, true, false, &result);
+	bool      err = conf_handle_unsigned(
+            "9999", sizeof("9999") - 1, 1, 2048, true, true, false, &result);
 	expect_true(err, "Should fail for out-of-range value without clip");
 }
 TEST_END
 
 TEST_BEGIN(test_conf_handle_unsigned_invalid) {
 	uintmax_t result = 0;
-	bool err = conf_handle_unsigned("abc", sizeof("abc") - 1,
-	    1, 2048, true, true, true, &result);
+	bool      err = conf_handle_unsigned(
+            "abc", sizeof("abc") - 1, 1, 2048, true, true, true, &result);
 	expect_true(err, "Should fail for non-numeric input");
 }
 TEST_END
 
 TEST_BEGIN(test_conf_handle_signed_valid) {
 	intmax_t result = 0;
-	bool err = conf_handle_signed("5000", sizeof("5000") - 1,
-	    -1, INTMAX_MAX, true, false, false, &result);
+	bool     err = conf_handle_signed("5000", sizeof("5000") - 1, -1,
+	        INTMAX_MAX, true, false, false, &result);
 	expect_false(err, "Should succeed for valid value");
 	expect_d64_eq((int64_t)result, 5000, "result should be 5000");
 }
@@ -81,8 +80,8 @@ TEST_END
 
 TEST_BEGIN(test_conf_handle_signed_negative) {
 	intmax_t result = 0;
-	bool err = conf_handle_signed("-1", sizeof("-1") - 1,
-	    -1, INTMAX_MAX, true, false, false, &result);
+	bool err = conf_handle_signed("-1", sizeof("-1") - 1, -1, INTMAX_MAX,
+	    true, false, false, &result);
 	expect_false(err, "Should succeed for -1");
 	expect_d64_eq((int64_t)result, -1, "result should be -1");
 }
@@ -90,8 +89,8 @@ TEST_END
 
 TEST_BEGIN(test_conf_handle_signed_out_of_range) {
 	intmax_t result = 0;
-	bool err = conf_handle_signed("5000", sizeof("5000") - 1,
-	    -1, 4999, true, true, false, &result);
+	bool     err = conf_handle_signed(
+            "5000", sizeof("5000") - 1, -1, 4999, true, true, false, &result);
 	expect_true(err, "Should fail for out-of-range value");
 }
 TEST_END
@@ -101,13 +100,14 @@ TEST_BEGIN(test_conf_handle_char_p) {
 	bool err;
 
 	/* Normal copy. */
-	err = conf_handle_char_p("hello", sizeof("hello") - 1, buf, sizeof(buf));
+	err = conf_handle_char_p(
+	    "hello", sizeof("hello") - 1, buf, sizeof(buf));
 	expect_false(err, "Should succeed");
 	expect_str_eq(buf, "hello", "Should copy string");
 
 	/* Truncation. */
-	err = conf_handle_char_p("longstring", sizeof("longstring") - 1,
-	    buf, sizeof(buf));
+	err = conf_handle_char_p(
+	    "longstring", sizeof("longstring") - 1, buf, sizeof(buf));
 	expect_false(err, "Should succeed even when truncating");
 	expect_str_eq(buf, "longstr", "Should truncate to dest_sz - 1");
 }
@@ -115,16 +115,12 @@ TEST_END
 
 int
 main(void) {
-	return test(test_conf_handle_bool_true,
-	    test_conf_handle_bool_false,
-	    test_conf_handle_bool_invalid,
-	    test_conf_handle_unsigned_in_range,
+	return test(test_conf_handle_bool_true, test_conf_handle_bool_false,
+	    test_conf_handle_bool_invalid, test_conf_handle_unsigned_in_range,
 	    test_conf_handle_unsigned_clip_max,
 	    test_conf_handle_unsigned_clip_min,
 	    test_conf_handle_unsigned_no_clip_reject,
-	    test_conf_handle_unsigned_invalid,
-	    test_conf_handle_signed_valid,
+	    test_conf_handle_unsigned_invalid, test_conf_handle_signed_valid,
 	    test_conf_handle_signed_negative,
-	    test_conf_handle_signed_out_of_range,
-	    test_conf_handle_char_p);
+	    test_conf_handle_signed_out_of_range, test_conf_handle_char_p);
 }

From b507644cb084d095917aea6e2573c702caff3e5a Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 27 Mar 2026 12:27:00 -0700
Subject: [PATCH 2600/2608] Fix conf_handle_char_p zero-sized dest and remove
 unused conf_handle_unsigned

---
 include/jemalloc/internal/conf.h | 16 +++-----
 src/conf.c                       | 41 ++++-----------------
 test/unit/conf_parse.c           | 63 +++++++-------------------------
 3 files changed, 27 insertions(+), 93 deletions(-)

diff --git a/include/jemalloc/internal/conf.h b/include/jemalloc/internal/conf.h
index 21661955..26983ee9 100644
--- a/include/jemalloc/internal/conf.h
+++ b/include/jemalloc/internal/conf.h
@@ -9,19 +9,15 @@ void malloc_abort_invalid_conf(void);
 
 #ifdef JEMALLOC_JET
 extern bool had_conf_error;
+
 bool conf_next(char const **opts_p, char const **k_p, size_t *klen_p,
     char const **v_p, size_t *vlen_p);
-void conf_error(const char *msg, const char *k, size_t klen,
-    const char *v, size_t vlen);
+void conf_error(
+    const char *msg, const char *k, size_t klen, const char *v, size_t vlen);
 bool conf_handle_bool(const char *v, size_t vlen, bool *result);
-bool conf_handle_unsigned(const char *v, size_t vlen,
-    uintmax_t min, uintmax_t max, bool check_min, bool check_max,
-    bool clip, uintmax_t *result);
-bool conf_handle_signed(const char *v, size_t vlen,
-    intmax_t min, intmax_t max, bool check_min, bool check_max,
-    bool clip, intmax_t *result);
-bool conf_handle_char_p(const char *v, size_t vlen,
-    char *dest, size_t dest_sz);
+bool conf_handle_signed(const char *v, size_t vlen, intmax_t min, intmax_t max,
+    bool check_min, bool check_max, bool clip, intmax_t *result);
+bool conf_handle_char_p(const char *v, size_t vlen, char *dest, size_t dest_sz);
 #endif
 
 #endif /* JEMALLOC_INTERNAL_CONF_H */
diff --git a/src/conf.c b/src/conf.c
index 8a23bda6..65abcd25 100644
--- a/src/conf.c
+++ b/src/conf.c
@@ -254,36 +254,8 @@ JEMALLOC_DIAGNOSTIC_PUSH
 JEMALLOC_DIAGNOSTIC_IGNORE("-Wunused-function")
 
 JET_EXTERN bool
-conf_handle_unsigned(const char *v, size_t vlen,
-    uintmax_t min, uintmax_t max, bool check_min, bool check_max,
-    bool clip, uintmax_t *result) {
-	char *end;
-	set_errno(0);
-	uintmax_t mv = (uintmax_t)malloc_strtoumax(v, &end, 0);
-	if (get_errno() != 0 || (uintptr_t)end - (uintptr_t)v != vlen) {
-		return true;
-	}
-	if (clip) {
-		if (check_min && mv < min) {
-			*result = min;
-		} else if (check_max && mv > max) {
-			*result = max;
-		} else {
-			*result = mv;
-		}
-	} else {
-		if ((check_min && mv < min) || (check_max && mv > max)) {
-			return true;
-		}
-		*result = mv;
-	}
-	return false;
-}
-
-JET_EXTERN bool
-conf_handle_signed(const char *v, size_t vlen,
-    intmax_t min, intmax_t max, bool check_min, bool check_max,
-    bool clip, intmax_t *result) {
+conf_handle_signed(const char *v, size_t vlen, intmax_t min, intmax_t max,
+    bool check_min, bool check_max, bool clip, intmax_t *result) {
 	char *end;
 	set_errno(0);
 	intmax_t mv = (intmax_t)malloc_strtoumax(v, &end, 0);
@@ -309,6 +281,9 @@ conf_handle_signed(const char *v, size_t vlen,
 
 JET_EXTERN bool
 conf_handle_char_p(const char *v, size_t vlen, char *dest, size_t dest_sz) {
+	if (dest_sz == 0) {
+		return false;
+	}
 	size_t cpylen = (vlen <= dest_sz - 1) ? vlen : dest_sz - 1;
 	strncpy(dest, v, cpylen);
 	dest[cpylen] = '\0';
@@ -473,11 +448,11 @@ malloc_conf_init_helper(sc_data_t *sc_data, unsigned bin_shard_sizes[SC_NBINS],
 			continue;
 		}
 
-		while (*opts != '\0'
-		    && !conf_next(&opts, &k, &klen, &v, &vlen)) {
+		while (
+		    *opts != '\0' && !conf_next(&opts, &k, &klen, &v, &vlen)) {
 #define CONF_ERROR(msg, k, klen, v, vlen)                                      \
 	if (!initial_call) {                                                   \
-		conf_error(msg, k, klen, v, vlen);                      \
+		conf_error(msg, k, klen, v, vlen);                             \
 		cur_opt_valid = false;                                         \
 	}
 #define CONF_CONTINUE                                                          \
diff --git a/test/unit/conf_parse.c b/test/unit/conf_parse.c
index eb107865..448cc84a 100644
--- a/test/unit/conf_parse.c
+++ b/test/unit/conf_parse.c
@@ -25,50 +25,6 @@ TEST_BEGIN(test_conf_handle_bool_invalid) {
 }
 TEST_END
 
-TEST_BEGIN(test_conf_handle_unsigned_in_range) {
-	uintmax_t result = 0;
-	bool      err = conf_handle_unsigned(
-            "100", sizeof("100") - 1, 1, 2048, true, true, true, &result);
-	expect_false(err, "Should succeed for in-range value");
-	expect_u64_eq((uint64_t)result, 100, "result should be 100");
-}
-TEST_END
-
-TEST_BEGIN(test_conf_handle_unsigned_clip_max) {
-	uintmax_t result = 0;
-	bool      err = conf_handle_unsigned(
-            "9999", sizeof("9999") - 1, 1, 2048, true, true, true, &result);
-	expect_false(err, "Should succeed with clipping");
-	expect_u64_eq(
-	    (uint64_t)result, 2048, "result should be clipped to max 2048");
-}
-TEST_END
-
-TEST_BEGIN(test_conf_handle_unsigned_clip_min) {
-	uintmax_t result = 0;
-	bool      err = conf_handle_unsigned(
-            "0", sizeof("0") - 1, 1, 2048, true, true, true, &result);
-	expect_false(err, "Should succeed with clipping");
-	expect_u64_eq((uint64_t)result, 1, "result should be clipped to min 1");
-}
-TEST_END
-
-TEST_BEGIN(test_conf_handle_unsigned_no_clip_reject) {
-	uintmax_t result = 0;
-	bool      err = conf_handle_unsigned(
-            "9999", sizeof("9999") - 1, 1, 2048, true, true, false, &result);
-	expect_true(err, "Should fail for out-of-range value without clip");
-}
-TEST_END
-
-TEST_BEGIN(test_conf_handle_unsigned_invalid) {
-	uintmax_t result = 0;
-	bool      err = conf_handle_unsigned(
-            "abc", sizeof("abc") - 1, 1, 2048, true, true, true, &result);
-	expect_true(err, "Should fail for non-numeric input");
-}
-TEST_END
-
 TEST_BEGIN(test_conf_handle_signed_valid) {
 	intmax_t result = 0;
 	bool     err = conf_handle_signed("5000", sizeof("5000") - 1, -1,
@@ -113,14 +69,21 @@ TEST_BEGIN(test_conf_handle_char_p) {
 }
 TEST_END
 
+TEST_BEGIN(test_conf_handle_char_p_zero_dest_sz) {
+	char buf[4] = {'X', 'Y', 'Z', '\0'};
+	bool err;
+
+	err = conf_handle_char_p("abc", sizeof("abc") - 1, buf, 0);
+	expect_false(err, "Should succeed for zero-sized destination");
+	expect_c_eq(buf[0], 'X', "Zero-sized destination must not be modified");
+}
+TEST_END
+
 int
 main(void) {
 	return test(test_conf_handle_bool_true, test_conf_handle_bool_false,
-	    test_conf_handle_bool_invalid, test_conf_handle_unsigned_in_range,
-	    test_conf_handle_unsigned_clip_max,
-	    test_conf_handle_unsigned_clip_min,
-	    test_conf_handle_unsigned_no_clip_reject,
-	    test_conf_handle_unsigned_invalid, test_conf_handle_signed_valid,
+	    test_conf_handle_bool_invalid, test_conf_handle_signed_valid,
 	    test_conf_handle_signed_negative,
-	    test_conf_handle_signed_out_of_range, test_conf_handle_char_p);
+	    test_conf_handle_signed_out_of_range, test_conf_handle_char_p,
+	    test_conf_handle_char_p_zero_dest_sz);
 }

From a47fa33b5a7d91ab0218436a75b652a2b65588c9 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Tue, 31 Mar 2026 20:37:23 -0700
Subject: [PATCH 2601/2608] Run clang-format on test/unit/tcache_max.c

---
 test/unit/tcache_max.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/unit/tcache_max.c b/test/unit/tcache_max.c
index d57b2d3b..653563ca 100644
--- a/test/unit/tcache_max.c
+++ b/test/unit/tcache_max.c
@@ -195,9 +195,9 @@ TEST_BEGIN(test_tcache_max) {
 
 	global_test = true;
 	for (alloc_option = alloc_option_start; alloc_option < alloc_option_end;
-	     alloc_option++) {
+	    alloc_option++) {
 		for (dalloc_option = dalloc_option_start;
-		     dalloc_option < dalloc_option_end; dalloc_option++) {
+		    dalloc_option < dalloc_option_end; dalloc_option++) {
 			/* opt.tcache_max set to 1024 in tcache_max.sh. */
 			test_tcache_max_impl(1024, alloc_option, dalloc_option);
 		}
@@ -318,9 +318,9 @@ tcache_check(void *arg) {
 	expect_zu_eq(tcache_nbins, tcache_max2nbins(new_tcache_max),
 	    "Unexpected value for tcache_nbins");
 	for (unsigned alloc_option = alloc_option_start;
-	     alloc_option < alloc_option_end; alloc_option++) {
+	    alloc_option < alloc_option_end; alloc_option++) {
 		for (unsigned dalloc_option = dalloc_option_start;
-		     dalloc_option < dalloc_option_end; dalloc_option++) {
+		    dalloc_option < dalloc_option_end; dalloc_option++) {
 			test_tcache_max_impl(
 			    new_tcache_max, alloc_option, dalloc_option);
 		}

From 3cc56d325c15cdb7d6047ed513ab908121c66698 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 27 Mar 2026 13:30:58 -0700
Subject: [PATCH 2602/2608] Fix large alloc nrequests under-counting on cache
 misses

---
 include/jemalloc/internal/tcache_inlines.h |  6 +--
 test/unit/tcache_max.c                     | 47 +++++++++++++++++++++-
 2 files changed, 49 insertions(+), 4 deletions(-)

diff --git a/include/jemalloc/internal/tcache_inlines.h b/include/jemalloc/internal/tcache_inlines.h
index 6bd1b339..5f8ed317 100644
--- a/include/jemalloc/internal/tcache_inlines.h
+++ b/include/jemalloc/internal/tcache_inlines.h
@@ -163,10 +163,10 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size,
 			assert(usize <= tcache_max_get(tcache->tcache_slow));
 			memset(ret, 0, usize);
 		}
+	}
 
-		if (config_stats) {
-			bin->tstats.nrequests++;
-		}
+	if (config_stats) {
+		bin->tstats.nrequests++;
 	}
 
 	return ret;
diff --git a/test/unit/tcache_max.c b/test/unit/tcache_max.c
index 653563ca..ab54da39 100644
--- a/test/unit/tcache_max.c
+++ b/test/unit/tcache_max.c
@@ -206,6 +206,50 @@ TEST_BEGIN(test_tcache_max) {
 }
 TEST_END
 
+TEST_BEGIN(test_large_tcache_nrequests_on_miss) {
+	test_skip_if(!config_stats);
+	test_skip_if(!opt_tcache);
+	test_skip_if(opt_prof);
+	test_skip_if(san_uaf_detection_enabled());
+
+	size_t large;
+	size_t sz = sizeof(large);
+	expect_d_eq(
+	    mallctl("arenas.lextent.0.size", (void *)&large, &sz, NULL, 0), 0,
+	    "Unexpected mallctl() failure");
+	expect_d_eq(mallctl("thread.tcache.max", NULL, NULL, (void *)&large,
+	                sizeof(large)),
+	    0, "Unexpected mallctl() failure");
+	expect_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0), 0,
+	    "Unexpected tcache flush failure");
+
+	tsd_t *tsd = tsd_fetch();
+	expect_ptr_not_null(tsd, "Unexpected tsd_fetch() failure");
+	tcache_t *tcache = tcache_get(tsd);
+	expect_ptr_not_null(tcache, "Expected auto tcache");
+
+	szind_t binind = sz_size2index(large);
+	expect_true(binind >= SC_NBINS, "Expected large size class");
+	cache_bin_t *bin = &tcache->bins[binind];
+	bin->tstats.nrequests = 0;
+
+	void *p = mallocx(large, 0);
+	expect_ptr_not_null(p, "Unexpected mallocx() failure");
+	expect_u64_eq(bin->tstats.nrequests, 1,
+	    "Large tcache miss should count as one request");
+
+	dallocx(p, 0);
+	p = mallocx(large, 0);
+	expect_ptr_not_null(p, "Unexpected mallocx() failure");
+	expect_u64_eq(bin->tstats.nrequests, 2,
+	    "Large tcache hit should increment request count again");
+
+	dallocx(p, 0);
+	expect_d_eq(mallctl("thread.tcache.flush", NULL, NULL, NULL, 0), 0,
+	    "Unexpected tcache flush failure");
+}
+TEST_END
+
 static size_t
 tcache_max2nbins(size_t tcache_max) {
 	return sz_size2index(tcache_max) + 1;
@@ -358,5 +402,6 @@ TEST_END
 
 int
 main(void) {
-	return test(test_tcache_max, test_thread_tcache_max);
+	return test(test_tcache_max, test_large_tcache_nrequests_on_miss,
+	    test_thread_tcache_max);
 }

From 6281482c395fdbf721ff1f09f531315744446b35 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 27 Mar 2026 13:50:11 -0700
Subject: [PATCH 2603/2608] Nest HPA SEC stats inside hpa_shard JSON

---
 src/stats.c            |  3 +-
 test/unit/json_stats.c | 77 +++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 77 insertions(+), 3 deletions(-)

diff --git a/src/stats.c b/src/stats.c
index d906ade6..fa018ea0 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -1113,9 +1113,8 @@ stats_arena_hpa_shard_slabs_print(emitter_t *emitter, unsigned i) {
 
 static void
 stats_arena_hpa_shard_print(emitter_t *emitter, unsigned i, uint64_t uptime) {
-	stats_arena_hpa_shard_sec_print(emitter, i);
-
 	emitter_json_object_kv_begin(emitter, "hpa_shard");
+	stats_arena_hpa_shard_sec_print(emitter, i);
 	stats_arena_hpa_shard_counters_print(emitter, i, uptime);
 	stats_arena_hpa_shard_slabs_print(emitter, i);
 	emitter_json_object_end(emitter); /* End "hpa_shard" */
diff --git a/test/unit/json_stats.c b/test/unit/json_stats.c
index eee717c7..d6cbc50c 100644
--- a/test/unit/json_stats.c
+++ b/test/unit/json_stats.c
@@ -185,6 +185,41 @@ static const char  *arena_mutex_names[] = {"large", "extent_avail",
 static const size_t num_arena_mutexes = sizeof(arena_mutex_names)
     / sizeof(arena_mutex_names[0]);
 
+static const char *
+json_find_object_end(const char *object_begin) {
+	int depth = 0;
+	for (const char *cur = object_begin; *cur != '\0'; cur++) {
+		if (*cur == '{') {
+			depth++;
+		} else if (*cur == '}') {
+			depth--;
+			if (depth == 0) {
+				return cur;
+			}
+		}
+	}
+	return NULL;
+}
+
+static const char *
+json_find_previous_hpa_shard_object(
+    const char *json, const char *pos, const char **object_end) {
+	*object_end = NULL;
+	const char *found = NULL;
+	const char *cur = json;
+	const char *next;
+
+	while ((next = strstr(cur, "\"hpa_shard\":{")) != NULL && next < pos) {
+		found = strchr(next, '{');
+		cur = next + 1;
+	}
+	if (found == NULL) {
+		return NULL;
+	}
+	*object_end = json_find_object_end(found);
+	return found;
+}
+
 TEST_BEGIN(test_json_stats_mutexes) {
 	test_skip_if(!config_stats);
 
@@ -307,8 +342,48 @@ TEST_BEGIN(test_hpa_shard_json_ndirty_huge) {
 }
 TEST_END
 
+TEST_BEGIN(test_hpa_shard_json_contains_sec_stats) {
+	test_skip_if(!config_stats);
+	test_skip_if(!hpa_supported());
+
+	void *p = mallocx(PAGE, MALLOCX_TCACHE_NONE);
+	expect_ptr_not_null(p, "Unexpected mallocx failure");
+
+	uint64_t epoch = 1;
+	size_t   sz = sizeof(epoch);
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sz), 0,
+	    "Unexpected mallctl() failure");
+
+	stats_buf_t sbuf;
+	stats_buf_init(&sbuf);
+	malloc_stats_print(stats_buf_write_cb, &sbuf, "J");
+
+	const char *sec_bytes = strstr(sbuf.buf, "\"sec_bytes\"");
+	expect_ptr_not_null(sec_bytes, "JSON output should contain sec_bytes");
+	const char *hpa_shard_end = NULL;
+	const char *hpa_shard = json_find_previous_hpa_shard_object(
+	    sbuf.buf, sec_bytes, &hpa_shard_end);
+	expect_ptr_not_null(hpa_shard,
+	    "sec_bytes should be associated with an hpa_shard JSON object");
+	expect_ptr_not_null(hpa_shard_end,
+	    "Could not find end of enclosing hpa_shard JSON object");
+	expect_true(sec_bytes != NULL && sec_bytes < hpa_shard_end,
+	    "sec_bytes should be nested inside hpa_shard JSON object");
+	const char *sec_hits = strstr(hpa_shard, "\"sec_hits\"");
+	expect_true(sec_hits != NULL && sec_hits < hpa_shard_end,
+	    "sec_hits should be nested inside hpa_shard JSON object");
+	const char *sec_misses = strstr(hpa_shard, "\"sec_misses\"");
+	expect_true(sec_misses != NULL && sec_misses < hpa_shard_end,
+	    "sec_misses should be nested inside hpa_shard JSON object");
+
+	stats_buf_fini(&sbuf);
+	dallocx(p, MALLOCX_TCACHE_NONE);
+}
+TEST_END
+
 int
 main(void) {
 	return test_no_reentrancy(test_json_stats_mutexes,
-	    test_hpa_shard_json_ndirty_huge);
+	    test_hpa_shard_json_ndirty_huge,
+	    test_hpa_shard_json_contains_sec_stats);
 }

From db7d99703d41e58ba2932e98a6e12dd377028231 Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 27 Mar 2026 13:54:15 -0700
Subject: [PATCH 2604/2608] Add TODO to benchmark possibly better policy

---
 src/sec.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/sec.c b/src/sec.c
index 5f65362f..a3254537 100644
--- a/src/sec.c
+++ b/src/sec.c
@@ -130,13 +130,17 @@ sec_multishard_trylock_alloc(
 			cur_shard = 0;
 		}
 	}
-	/* No bin had alloc or had the extent */
+	/*
+	 * TODO: Benchmark whether it is worth blocking on all shards here before
+	 * declaring a miss.  That could recover more remote-shard hits under
+	 * contention, but it also changes the allocation latency policy.
+	 */
 	assert(cur_shard == sec_shard_pick(tsdn, sec));
 	bin = sec_bin_pick(sec, cur_shard, pszind);
 	malloc_mutex_lock(tsdn, &bin->mtx);
 	edata_t *edata = sec_bin_alloc_locked(tsdn, sec, bin, size);
 	if (edata == NULL) {
-		/* Only now we know it is a miss */
+		/* Only now we know it is a miss. */
 		bin->stats.nmisses++;
 	}
 	malloc_mutex_unlock(tsdn, &bin->mtx);

From f265645d02f0bde59833c46977b66acd94dec42e Mon Sep 17 00:00:00 2001
From: Slobodan Predolac <spredolac@fb.com>
Date: Fri, 27 Mar 2026 13:58:27 -0700
Subject: [PATCH 2605/2608] Emit retained HPA slab stats in JSON

---
 src/stats.c            |   6 ++
 test/unit/json_stats.c | 122 ++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 127 insertions(+), 1 deletion(-)

diff --git a/src/stats.c b/src/stats.c
index fa018ea0..82458fec 100644
--- a/src/stats.c
+++ b/src/stats.c
@@ -988,6 +988,8 @@ stats_arena_hpa_shard_slabs_print(emitter_t *emitter, unsigned i) {
 	    emitter, "nactive_nonhuge", emitter_type_size, &nactive_nonhuge);
 	emitter_json_kv(
 	    emitter, "ndirty_nonhuge", emitter_type_size, &ndirty_nonhuge);
+	emitter_json_kv(emitter, "nretained_nonhuge", emitter_type_size,
+	    &nretained_nonhuge);
 	emitter_json_object_end(emitter); /* End "full_slabs" */
 
 	/* Next, empty slab stats. */
@@ -1029,6 +1031,8 @@ stats_arena_hpa_shard_slabs_print(emitter_t *emitter, unsigned i) {
 	    emitter, "nactive_nonhuge", emitter_type_size, &nactive_nonhuge);
 	emitter_json_kv(
 	    emitter, "ndirty_nonhuge", emitter_type_size, &ndirty_nonhuge);
+	emitter_json_kv(emitter, "nretained_nonhuge", emitter_type_size,
+	    &nretained_nonhuge);
 	emitter_json_object_end(emitter); /* End "empty_slabs" */
 
 	/* Last, nonfull slab stats. */
@@ -1103,6 +1107,8 @@ stats_arena_hpa_shard_slabs_print(emitter_t *emitter, unsigned i) {
 		    &nactive_nonhuge);
 		emitter_json_kv(emitter, "ndirty_nonhuge", emitter_type_size,
 		    &ndirty_nonhuge);
+		emitter_json_kv(emitter, "nretained_nonhuge", emitter_type_size,
+		    &nretained_nonhuge);
 		emitter_json_object_end(emitter);
 	}
 	emitter_json_array_end(emitter); /* End "nonfull_slabs" */
diff --git a/test/unit/json_stats.c b/test/unit/json_stats.c
index d6cbc50c..c206974b 100644
--- a/test/unit/json_stats.c
+++ b/test/unit/json_stats.c
@@ -196,6 +196,28 @@ json_find_object_end(const char *object_begin) {
 			if (depth == 0) {
 				return cur;
 			}
+			if (depth < 0) {
+				return NULL;
+			}
+		}
+	}
+	return NULL;
+}
+
+static const char *
+json_find_array_end(const char *array_begin) {
+	int depth = 0;
+	for (const char *cur = array_begin; *cur != '\0'; cur++) {
+		if (*cur == '[') {
+			depth++;
+		} else if (*cur == ']') {
+			depth--;
+			if (depth == 0) {
+				return cur;
+			}
+			if (depth < 0) {
+				return NULL;
+			}
 		}
 	}
 	return NULL;
@@ -220,6 +242,52 @@ json_find_previous_hpa_shard_object(
 	return found;
 }
 
+static const char *
+json_find_named_object(
+    const char *json, const char *key, const char **object_end) {
+	*object_end = NULL;
+	char   search_key[128];
+	size_t written = malloc_snprintf(
+	    search_key, sizeof(search_key), "\"%s\":{", key);
+	if (written >= sizeof(search_key)) {
+		return NULL;
+	}
+
+	const char *object_begin = strstr(json, search_key);
+	if (object_begin == NULL) {
+		return NULL;
+	}
+	object_begin = strchr(object_begin, '{');
+	if (object_begin == NULL) {
+		return NULL;
+	}
+	*object_end = json_find_object_end(object_begin);
+	return object_begin;
+}
+
+static const char *
+json_find_named_array(
+    const char *json, const char *key, const char **array_end) {
+	*array_end = NULL;
+	char   search_key[128];
+	size_t written = malloc_snprintf(
+	    search_key, sizeof(search_key), "\"%s\":[", key);
+	if (written >= sizeof(search_key)) {
+		return NULL;
+	}
+
+	const char *array_begin = strstr(json, search_key);
+	if (array_begin == NULL) {
+		return NULL;
+	}
+	array_begin = strchr(array_begin, '[');
+	if (array_begin == NULL) {
+		return NULL;
+	}
+	*array_end = json_find_array_end(array_begin);
+	return array_begin;
+}
+
 TEST_BEGIN(test_json_stats_mutexes) {
 	test_skip_if(!config_stats);
 
@@ -381,9 +449,61 @@ TEST_BEGIN(test_hpa_shard_json_contains_sec_stats) {
 }
 TEST_END
 
+TEST_BEGIN(test_hpa_shard_json_contains_retained_stats) {
+	test_skip_if(!config_stats);
+	test_skip_if(!hpa_supported());
+
+	void *p = mallocx(PAGE, MALLOCX_TCACHE_NONE);
+	expect_ptr_not_null(p, "Unexpected mallocx failure");
+
+	uint64_t epoch = 1;
+	size_t   sz = sizeof(epoch);
+	expect_d_eq(mallctl("epoch", NULL, NULL, (void *)&epoch, sz), 0,
+	    "Unexpected mallctl() failure");
+
+	stats_buf_t sbuf;
+	stats_buf_init(&sbuf);
+	malloc_stats_print(stats_buf_write_cb, &sbuf, "J");
+
+	const char *full_slabs_end = NULL;
+	const char *full_slabs = json_find_named_object(
+	    sbuf.buf, "full_slabs", &full_slabs_end);
+	expect_ptr_not_null(
+	    full_slabs, "JSON output should contain full_slabs");
+	const char *full_retained = strstr(full_slabs, "\"nretained_nonhuge\"");
+	expect_true(full_retained != NULL && full_retained < full_slabs_end,
+	    "full_slabs should contain nretained_nonhuge");
+
+	const char *empty_slabs_end = NULL;
+	const char *empty_slabs = json_find_named_object(
+	    sbuf.buf, "empty_slabs", &empty_slabs_end);
+	expect_ptr_not_null(
+	    empty_slabs, "JSON output should contain empty_slabs");
+	const char *empty_retained = strstr(
+	    empty_slabs, "\"nretained_nonhuge\"");
+	expect_true(empty_retained != NULL && empty_retained < empty_slabs_end,
+	    "empty_slabs should contain nretained_nonhuge");
+
+	const char *nonfull_slabs_end = NULL;
+	const char *nonfull_slabs = json_find_named_array(
+	    sbuf.buf, "nonfull_slabs", &nonfull_slabs_end);
+	expect_ptr_not_null(
+	    nonfull_slabs, "JSON output should contain nonfull_slabs");
+	const char *nonfull_retained = strstr(
+	    nonfull_slabs, "\"nretained_nonhuge\"");
+	expect_true(
+	    nonfull_retained != NULL && nonfull_retained < nonfull_slabs_end,
+	    "nonfull_slabs should contain nretained_nonhuge");
+
+	stats_buf_fini(&sbuf);
+	dallocx(p, MALLOCX_TCACHE_NONE);
+}
+TEST_END
+
 int
 main(void) {
 	return test_no_reentrancy(test_json_stats_mutexes,
 	    test_hpa_shard_json_ndirty_huge,
-	    test_hpa_shard_json_contains_sec_stats);
+	    test_hpa_shard_json_contains_sec_stats,
+	    test_hpa_shard_json_contains_retained_stats);
 }

From 6515df8cec7fe50f6b45069f82bdf685171f9ee7 Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@meta.com>
Date: Tue, 7 Apr 2026 10:41:44 -0700
Subject: [PATCH 2606/2608] Documentation updates (#2869)

* Document new mallctl interfaces added since 5.3.0

Add documentation for the following new mallctl entries:
- opt.debug_double_free_max_scan: double-free detection scan limit
- opt.prof_bt_max: max profiling backtrace depth
- opt.disable_large_size_classes: page-aligned large allocations
- opt.process_madvise_max_batch: batched process_madvise purging
- thread.tcache.max: per-thread tcache_max control
- thread.tcache.ncached_max.read_sizeclass: query ncached_max
- thread.tcache.ncached_max.write: set ncached_max per size range
- arena.<i>.name: get/set arena names
- arenas.hugepage: hugepage size
- approximate_stats.active: lightweight active bytes estimate

Remove config.prof_frameptr since it still needs more development
and is still experimental.

Co-authored-by: lexprfuncall <carl.shapiro@gmail.com>
---
 doc/jemalloc.xml.in | 157 +++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 147 insertions(+), 10 deletions(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 2a8573b8..692658f8 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -897,16 +897,6 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         during build configuration.</para></listitem>
       </varlistentry>
 
-      <varlistentry id="config.prof_frameptr">
-        <term>
-          <mallctl>config.prof_frameptr</mallctl>
-          (<type>bool</type>)
-          <literal>r-</literal>
-        </term>
-        <listitem><para><option>--enable-prof-frameptr</option> was specified
-        during build configuration.</para></listitem>
-      </varlistentry>
-
       <varlistentry id="config.stats">
         <term>
           <mallctl>config.stats</mallctl>
@@ -1419,6 +1409,17 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         extent hooks.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.prof_bt_max">
+        <term>
+          <mallctl>opt.prof_bt_max</mallctl>
+          (<type>unsigned</type>)
+          <literal>r-</literal>
+          [<option>--enable-prof</option>]
+        </term>
+        <listitem><para>Maximum number of stack frames to record in profiling
+        backtraces.  The default is 128.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="opt.prof">
         <term>
           <mallctl>opt.prof</mallctl>
@@ -1666,6 +1667,53 @@ malloc_conf = "xmalloc:true";]]></programlisting>
 	testing this behavior.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="opt.debug_double_free_max_scan">
+        <term>
+          <mallctl>opt.debug_double_free_max_scan</mallctl>
+          (<type>unsigned</type>)
+          <literal>r-</literal>
+          [<option>--enable-debug</option>]
+        </term>
+        <listitem><para>Maximum number of cached pointers to scan in the
+        thread cache when checking for double-free errors on deallocation.
+        When debug is enabled, each deallocation into the tcache scans up to
+        this many recently cached pointers to detect whether the same pointer
+        is being freed twice.  Setting this to 0 disables the check.  This
+        option is set to 0 and has no effect when debug is not enabled.  The
+        default is 32.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="opt.disable_large_size_classes">
+        <term>
+          <mallctl>opt.disable_large_size_classes</mallctl>
+          (<type>bool</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>When enabled (the default), large allocations
+        (i.e. allocations of size &gt;= <constant>SC_LARGE_MINCLASS</constant>)
+        are rounded up to the nearest page boundary rather than the nearest
+        large size class.  This minimizes memory overhead, especially when
+        using hugepages, at the cost of disabling the standard large size
+        class hierarchy.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="opt.process_madvise_max_batch">
+        <term>
+          <mallctl>opt.process_madvise_max_batch</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Maximum number of memory regions to include in each
+        <citerefentry><refentrytitle>process_madvise</refentrytitle>
+        <manvolnum>2</manvolnum></citerefentry> batch call.  When set to 0
+        (the default), process_madvise is not used, and the standard
+        <citerefentry><refentrytitle>madvise</refentrytitle>
+        <manvolnum>2</manvolnum></citerefentry> is used instead.  Setting this
+        to a positive value enables batched purging via process_madvise, which
+        can reduce the number of system calls needed for
+        purging.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="thread.arena">
         <term>
           <mallctl>thread.arena</mallctl>
@@ -1802,6 +1850,47 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         the developer may find manual flushing useful.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="thread.tcache.max">
+        <term>
+          <mallctl>thread.tcache.max</mallctl>
+          (<type>size_t</type>)
+          <literal>rw</literal>
+        </term>
+        <listitem><para>Get or set the maximum cached size class
+        (<varname>tcache_max</varname>) for the calling thread's tcache.  The
+        value is clamped to the maximum allowed limit and rounded up to the
+        nearest size class boundary.  Changing this value will resize the
+        thread cache accordingly.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="thread.tcache.ncached_max.read_sizeclass">
+        <term>
+          <mallctl>thread.tcache.ncached_max.read_sizeclass</mallctl>
+          (<type>size_t</type>)
+          <literal>rw</literal>
+        </term>
+        <listitem><para>Query the maximum number of cached objects
+        (<varname>ncached_max</varname>) for a given size class in the calling
+        thread's tcache.  The size class is passed in via
+        <parameter>newp</parameter>, and the corresponding
+        <varname>ncached_max</varname> is returned via
+        <parameter>oldp</parameter>.</para></listitem>
+      </varlistentry>
+
+      <varlistentry id="thread.tcache.ncached_max.write">
+        <term>
+          <mallctl>thread.tcache.ncached_max.write</mallctl>
+          (<type>char *</type>)
+          <literal>-w</literal>
+        </term>
+        <listitem><para>Set the maximum number of cached objects
+        (<varname>ncached_max</varname>) for size classes in the calling
+        thread's tcache.  The input is a string of pipe-separated settings,
+        where each setting specifies a size range and a count, in the same
+        format as the <mallctl>opt.tcache_ncached_max</mallctl> runtime
+        option.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="thread.prof.name">
         <term>
           <mallctl>thread.prof.name</mallctl>
@@ -1985,6 +2074,24 @@ malloc_conf = "xmalloc:true";]]></programlisting>
         linkend="thread.arena"><mallctl>thread.arena</mallctl></link>.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="arena.i.name">
+        <term>
+          <mallctl>arena.&lt;i&gt;.name</mallctl>
+          (<type>char *</type>)
+          <literal>rw</literal>
+        </term>
+        <listitem><para>Get or set a descriptive name for arena &lt;i&gt;.
+        Arena names can be up to 32 characters long (including the null
+        terminator); longer names are truncated.  When reading, the caller
+        passes a pointer to a pre-allocated buffer (of at least 32 bytes) via
+        <parameter>oldp</parameter>, and
+        <parameter>*oldlenp</parameter> must be
+        <code language="C">sizeof(<type>char *</type>)</code>.
+        Arena names are also included in the output of <link
+        linkend="stats_print"><function>malloc_stats_print()</function></link>.
+        </para></listitem>
+      </varlistentry>
+
       <varlistentry id="arena.i.dss">
         <term>
           <mallctl>arena.&lt;i&gt;.dss</mallctl>
@@ -2342,6 +2449,18 @@ struct extent_hooks_s {
         <listitem><para>Page size.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="arenas.hugepage">
+        <term>
+          <mallctl>arenas.hugepage</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Hugepage size.  This value is also reported in the
+        output of <link
+        linkend="stats_print"><function>malloc_stats_print()</function></link>.
+        </para></listitem>
+      </varlistentry>
+
       <varlistentry id="arenas.tcache_max">
         <term>
           <mallctl>arenas.tcache_max</mallctl>
@@ -2561,6 +2680,24 @@ struct extent_hooks_s {
         option for additional information.</para></listitem>
       </varlistentry>
 
+      <varlistentry id="approximate_stats.active">
+        <term>
+          <mallctl>approximate_stats.active</mallctl>
+          (<type>size_t</type>)
+          <literal>r-</literal>
+        </term>
+        <listitem><para>Return the total number of bytes in active pages
+        collected in an unsynchronized manner, without requiring an
+        <link linkend="epoch"><mallctl>epoch</mallctl></link> update.
+        As a result, this value should NOT be compared with other
+        stats.  For example, the relative ordering between
+        <mallctl>approximate_stats.active</mallctl> and <link
+        linkend="stats.active"><mallctl>stats.active</mallctl></link> or <link
+        linkend="stats.resident"><mallctl>stats.resident</mallctl></link> is
+        not guaranteed.  This interface is intended for lightweight monitoring
+        where an approximate value is sufficient.</para></listitem>
+      </varlistentry>
+
       <varlistentry id="stats.allocated">
         <term>
           <mallctl>stats.allocated</mallctl>

From b8646f4db33338411b590b67f1f04e8a1eedc061 Mon Sep 17 00:00:00 2001
From: Ian Ker-Seymer <ian.kerseymer@shopify.com>
Date: Fri, 10 Apr 2026 09:05:09 -0400
Subject: [PATCH 2607/2608] Fix `opt.max_background_threads` default in docs

---
 doc/jemalloc.xml.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in
index 692658f8..8bbe8120 100644
--- a/doc/jemalloc.xml.in
+++ b/doc/jemalloc.xml.in
@@ -1137,7 +1137,7 @@ mallctl("arena." STRINGIFY(MALLCTL_ARENAS_ALL) ".decay",
         </term>
         <listitem><para>Maximum number of background threads that will be created
         if <link linkend="background_thread">background_thread</link> is set.
-        Defaults to number of cpus.</para></listitem>
+        Defaults to 4.</para></listitem>
       </varlistentry>
 
       <varlistentry id="opt.dirty_decay_ms">

From 81034ce1f1373e37dc865038e1bc8eeecf559ce8 Mon Sep 17 00:00:00 2001
From: Guangli Dai <gdai@meta.com>
Date: Mon, 13 Apr 2026 17:12:37 -0700
Subject: [PATCH 2608/2608] Update ChangeLog for release 5.3.1

---
 ChangeLog | 148 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 148 insertions(+)

diff --git a/ChangeLog b/ChangeLog
index 32fde562..3bc84360 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,154 @@ brevity.  Much more detail can be found in the git revision history:
 
     https://github.com/jemalloc/jemalloc
 
+* 5.3.1 (Apr 13, 2026)
+
+This release includes over 390 commits spanning bug fixes, new features,
+performance optimizations, and portability improvements.  Multiple percent
+of system-level metric improvements were measured in tested production
+workloads.  The release has gone through large-scale production testing
+at Meta.
+
+New features:
+  - Support pvalloc.  (@Lapenkov: 5b1f2cc5)
+  - Add double free detection for the debug build.  (@izaitsevfb:
+    36366f3c, @guangli-dai: 42daa1ac, @divanorama: 1897f185)
+  - Add compile-time option `--enable-pageid` to enable memory mapping
+    annotation.  (@devnexen: 4fc5c4fb)
+  - Add runtime option `prof_bt_max` to control the max stack depth for
+    profiling.  (@guangli-dai: a0734fd6)
+  - Add compile-time option `--enable-force-getenv` to use `getenv` instead
+    of `secure_getenv`.  (@interwq: 481bbfc9)
+  - Add compile-time option `--disable-dss` to disable the usage of
+    `sbrk(2)`.  (@Svetlitski: ea5b7bea)
+  - Add runtime option `tcache_ncached_max` to control the number of items
+    in each size bin in the thread cache.  (@guangli-dai: 8a22d10b)
+  - Add runtime option `calloc_madvise_threshold` to determine if kernel or
+    memset is used to zero the allocations for calloc.  (@nullptr0-0:
+    5081c16b)
+  - Add compile-time option `--disable-user-config` to disable reading the
+    runtime configurations from `/etc/malloc.conf` or environment variable
+    `MALLOC_CONF`.  (@roblabla: c17bf8b3)
+  - Add runtime option `disable_large_size_classes` to guard the new usable
+    size calculation, which minimizes the memory overhead for large
+    allocations, i.e., >= 4 * PAGE.  (@guangli-dai: c067a55c, 8347f104)
+  - Enable process_madvise usage, add runtime option
+    `process_madvise_max_batch` to control the max # of regions in each
+    madvise batch.  (@interwq: 22440a02, @spredolac: 4246475b)
+  - Add mallctl interfaces:
+    + `opt.prof_bt_max`  (@guangli-dai: a0734fd6)
+    + `arena.<i>.name` to set and get arena names.  (@guangli-dai: ba19d2cb)
+    + `thread.tcache.max` to set and get the `tcache_max` of the current
+      thread.  (@guangli-dai: a442d9b8)
+    + `thread.tcache.ncached_max.write` and
+      `thread.tcache.ncached_max.read_sizeclass` to set and get the
+      `ncached_max` setup of the current thread.  (@guangli-dai: 630f7de9,
+      6b197fdd)
+    + `arenas.hugepage` to return the hugepage size used, also exported to
+      malloc stats.  (@ilvokhin: 90c627ed)
+    + `approximate_stats.active` to return an estimate of the current active
+      bytes, which should not be compared with other stats retrieved.
+      (@guangli-dai: 0988583d)
+
+Bug fixes:
+  - Prevent potential deadlocks in decaying during reentrancy.  (@interwq:
+    434a68e2)
+  - Fix segfault in extent coalescing.  (@Svetlitski: 12311fe6)
+  - Add null pointer detections in mallctl calls.  (@Svetlitski: dc0a184f,
+    0288126d)
+  - Make mallctl `arenas.lookup` triable without crashing on invalid
+    pointers.  (@auxten: 019cccc2, 5bac3849)
+  - Demote sampled allocations for proper deallocations during
+    `arena_reset`.  (@Svetlitski: 62648c88)
+  - Fix jemalloc's `read(2)` and `write(2)`.  (@Svetlitski: d2c9ed3d, @lexprfuncall:
+    9fdc1160)
+  - Fix the pkg-config metadata file.  (@BtbN: ed7e6fe7, ce8ce99a)
+  - Fix the autogen.sh so that it accepts quoted extra options.
+    (@honggyukim: f6fe6abd)
+  - Fix `rallocx()` to set errno to ENOMEM upon OOMing.  (@arter97: 38056fea,
+    @interwq: 83b07578)
+  - Avoid stack overflow for internal variable array usage.  (@nullptr0-0:
+    47c9bcd4, 48f66cf4, @xinydev: 9169e927)
+  - Fix background thread initialization race.  (@puzpuzpuz: 4d0ffa07)
+  - Guard os_page_id against a NULL address.  (@lexprfuncall: 79cc7dcc)
+  - Handle tcache init failures gracefully.  (@lexprfuncall: a056c20d)
+  - Fix missing release of acquired neighbor edata in
+    extent_try_coalesce_impl.  (@spredolac: 675ab079)
+  - Fix memory leak of old curr_reg on san_bump_grow_locked failure.
+    (@spredolac: 5904a421)
+  - Fix large alloc nrequests under-counting on cache misses.  (@spredolac:
+    3cc56d32)
+
+Portability improvements:
+  - Fix the build in C99.  (@abaelhe: 56ddbea2)
+  - Add `pthread_setaffinity_np` detection for non Linux/BSD platforms.
+    (@devnexen: 4c95c953)
+  - Make `VARIABLE_ARRAY` compatible with compilers not supporting VLA,
+    i.e., Visual Studio C compiler in C11 or C17 modes.  (@madscientist:
+    be65438f)
+  - Fix the build on Linux using musl library.  (@marv: aba1645f, 45249cf5)
+  - Reduce the memory overhead in small allocation sampling for systems
+    with larger page sizes, e.g., ARM.  (@Svetlitski: 5a858c64)
+  - Add C23's `free_sized` and `free_aligned_sized`.  (@Svetlitski:
+    cdb2c0e0)
+  - Enable heap profiling on MacOS.  (@nullptr0-0: 4b555c11)
+  - Fix incorrect printing on 32bit.  (@sundb: 630434bb)
+  - Make `JEMALLOC_CXX_THROW` compatible with C++ versions newer than
+    C++17.  (@r-barnes, @guangli-dai: 21bcc0a8)
+  - Fix mmap tag conflicts on MacOS.  (@kdrag0n: c893fcd1)
+  - Fix monotonic timer assumption for win32.  (@burtonli: 8dc97b11)
+  - Fix VM over-reservation on systems with larger pages, e.g., aarch64.
+    (@interwq: cd05b19f)
+  - Remove `unreachable()` macro conditionally to prevent definition
+    conflicts for C23+.  (@appujee: d8486b26, 4b88bddb)
+  - Fix dlsym failure observed on FreeBSD.  (@rhelmot: 86bbabac)
+  - Change the default page size to 64KB on aarch64 Linux.  (@lexprfuncall:
+    9442300c)
+  - Update config.guess and config.sub to the latest version.
+    (@lexprfuncall: c51949ea)
+  - Determine the page size on Android from NDK header files.
+    (@lexprfuncall: c51abba1)
+  - Improve the portability of grep patterns in configure.ac.
+    (@lexprfuncall: 365747bc)
+  - Add compile-time option `--with-cxx-stdlib` to specify the C++ standard
+    library.  (@yuxuanchen1997: a10ef3e1)
+
+Optimizations and refactors:
+  - Enable tcache for deallocation-only threads.  (@interwq: 143e9c4a)
+  - Inline to accelerate operator delete.  (@guangli-dai: e8f9f138)
+  - Optimize pairing heap's performance.  (@deadalnix: 5266152d, be6da4f6,
+    543e2d61, 10d71315, 92aa52c0, @Svetlitski: 36ca0c1b)
+  - Inline the storage for thread name in the profiling data.  (@interwq:
+    ce0b7ab6, e62aa478)
+  - Optimize a hot function `edata_cmp_summary_comp` to accelerate it.
+    (@Svetlitski: 6841110b, @guangli-dai: 0181aaa4)
+  - Allocate thread cache using the base allocator, which enables thread
+    cache to use thp when `metadata_thp` is turned on.  (@interwq:
+    72cfdce7)
+  - Allow oversize arena not to purge immediately when background threads
+    are enabled, although the default decay time is 0 to be back compatible.
+    (@interwq: d1313313)
+  - Optimize thread-local storage implementation on Windows.  (@mcfi:
+    9e123a83, 3a0d9cda)
+  - Optimize fast path to allow static size class computation.  (@interwq:
+    323ed2e3)
+  - Redesign tcache GC to regulate the frequency and make it
+    locality-aware. The new design is default on, guarded by option
+    `experimental_tcache_gc`.  (@nullptr0-0: 0c88be9e, e2c9f3a9,
+    14d5dc13, @deadalnix: 5afff2e4)
+  - Reduce the arena switching overhead by avoiding forced purging when
+    background thread is enabled.  (@interwq: a3910b98)
+  - Improve the reuse efficiency by limiting the maximum coalesced size for
+    large extents.  (@jiebinn: 3c14707b)
+  - Refactor thread events to allow registration of users' thread events
+    and remove prof_threshold as the built-in event.  (@spredolac: e6864c60,
+    015b0179, 34ace916)
+
+Documentation:
+  - Update Windows building instructions.  (@Lapenkov: 37139328)
+  - Add vcpkg installation instructions.  (@LilyWangLL: c0c9783e)
+  - Update profiling internals with an example.  (@jordalgo: b04e7666)
+
 * 5.3.0 (May 6, 2022)
 
   This release contains many speed and space optimizations, from micro